diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,121858 +3,152399 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 17405, + "global_step": 21768, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "grad_norm": 1.0507381496559225, - "learning_rate": 3.824091778202677e-08, - "loss": 0.2942, + "grad_norm": 1.1896751676677808, + "learning_rate": 3.0581039755351686e-08, + "loss": 0.3301, "step": 1 }, { "epoch": 0.0, - "grad_norm": 0.8070182416007133, - "learning_rate": 7.648183556405354e-08, - "loss": 0.3872, + "grad_norm": 1.0029278066787826, + "learning_rate": 6.116207951070337e-08, + "loss": 0.3915, "step": 2 }, { "epoch": 0.0, - "grad_norm": 1.80041143859621, - "learning_rate": 1.1472275334608032e-07, - "loss": 0.5905, + "grad_norm": 2.1703349053558765, + "learning_rate": 9.174311926605506e-08, + "loss": 0.6309, "step": 3 }, { "epoch": 0.0, - "grad_norm": 0.8120071191832767, - "learning_rate": 1.5296367112810708e-07, - "loss": 0.2618, + "grad_norm": 0.9131179113708626, + "learning_rate": 1.2232415902140674e-07, + "loss": 0.275, "step": 4 }, { "epoch": 0.0, - "grad_norm": 0.5949392601041631, - "learning_rate": 1.9120458891013387e-07, - "loss": 0.2642, + "grad_norm": 0.7030740475915367, + "learning_rate": 1.5290519877675842e-07, + "loss": 0.254, "step": 5 }, { "epoch": 0.0, - "grad_norm": 4.055790990377875, - "learning_rate": 2.2944550669216063e-07, - "loss": 0.7541, + "grad_norm": 4.848491169484364, + "learning_rate": 1.8348623853211012e-07, + "loss": 0.7207, "step": 6 }, { "epoch": 0.0, - "grad_norm": 0.7613072929404066, - "learning_rate": 2.676864244741874e-07, - "loss": 0.2649, + "grad_norm": 0.9297694531189934, + "learning_rate": 2.140672782874618e-07, + "loss": 0.2531, "step": 7 }, { "epoch": 0.0, - "grad_norm": 1.304037252213996, - "learning_rate": 3.0592734225621416e-07, - "loss": 0.5504, + "grad_norm": 1.4877978876141211, + "learning_rate": 2.446483180428135e-07, + "loss": 0.5763, "step": 8 }, { "epoch": 0.0, - "grad_norm": 0.9141654125014961, - "learning_rate": 3.441682600382409e-07, - "loss": 0.3869, + "grad_norm": 1.1114322130048153, + "learning_rate": 2.752293577981652e-07, + "loss": 0.4025, "step": 9 }, { "epoch": 0.0, - "grad_norm": 0.7609269558117047, - "learning_rate": 3.8240917782026774e-07, - "loss": 0.2751, + "grad_norm": 0.9746775937464117, + "learning_rate": 3.0581039755351683e-07, + "loss": 0.2694, "step": 10 }, { "epoch": 0.0, - "grad_norm": 0.7955995450796811, - "learning_rate": 4.206500956022945e-07, - "loss": 0.1885, + "grad_norm": 0.9793302921045483, + "learning_rate": 3.363914373088685e-07, + "loss": 0.2089, "step": 11 }, { "epoch": 0.0, - "grad_norm": 1.8612661473785725, - "learning_rate": 4.5889101338432127e-07, - "loss": 0.585, + "grad_norm": 1.852204792652111, + "learning_rate": 3.6697247706422023e-07, + "loss": 0.5606, "step": 12 }, { "epoch": 0.0, - "grad_norm": 0.7748274547715579, - "learning_rate": 4.97131931166348e-07, - "loss": 0.3195, + "grad_norm": 0.9117889839097142, + "learning_rate": 3.975535168195719e-07, + "loss": 0.3266, "step": 13 }, { "epoch": 0.0, - "grad_norm": 0.8639653689032786, - "learning_rate": 5.353728489483748e-07, - "loss": 0.3883, + "grad_norm": 0.9864663310271774, + "learning_rate": 4.281345565749236e-07, + "loss": 0.3865, "step": 14 }, { "epoch": 0.0, - "grad_norm": 2.5884579720855796, - "learning_rate": 5.736137667304016e-07, - "loss": 0.785, + "grad_norm": 2.506889830882708, + "learning_rate": 4.587155963302753e-07, + "loss": 0.7021, "step": 15 }, { "epoch": 0.0, - "grad_norm": 1.8703771449042337, - "learning_rate": 6.118546845124283e-07, - "loss": 0.3993, + "grad_norm": 1.7764826003955219, + "learning_rate": 4.89296636085627e-07, + "loss": 0.3314, "step": 16 }, { "epoch": 0.0, - "grad_norm": 0.6961613428356354, - "learning_rate": 6.500956022944552e-07, - "loss": 0.2686, + "grad_norm": 0.6380223564776178, + "learning_rate": 5.198776758409786e-07, + "loss": 0.2747, "step": 17 }, { "epoch": 0.0, - "grad_norm": 0.971276714643732, - "learning_rate": 6.883365200764818e-07, - "loss": 0.3745, + "grad_norm": 1.12564089310686, + "learning_rate": 5.504587155963304e-07, + "loss": 0.3476, "step": 18 }, { "epoch": 0.0, - "grad_norm": 1.4283398141820636, - "learning_rate": 7.265774378585087e-07, - "loss": 0.4011, + "grad_norm": 1.3906186159580616, + "learning_rate": 5.81039755351682e-07, + "loss": 0.3971, "step": 19 }, { "epoch": 0.0, - "grad_norm": 0.9170881209785016, - "learning_rate": 7.648183556405355e-07, - "loss": 0.3542, + "grad_norm": 1.0055363674251718, + "learning_rate": 6.116207951070337e-07, + "loss": 0.3663, "step": 20 }, { "epoch": 0.0, - "grad_norm": 0.9135336035316859, - "learning_rate": 8.030592734225621e-07, - "loss": 0.3697, + "grad_norm": 1.1763220205271772, + "learning_rate": 6.422018348623854e-07, + "loss": 0.3956, "step": 21 }, { "epoch": 0.0, - "grad_norm": 1.0476506879512788, - "learning_rate": 8.41300191204589e-07, - "loss": 0.3371, + "grad_norm": 1.1771358477951004, + "learning_rate": 6.72782874617737e-07, + "loss": 0.3343, "step": 22 }, { "epoch": 0.0, - "grad_norm": 0.6115452493182032, - "learning_rate": 8.795411089866157e-07, - "loss": 0.2105, + "grad_norm": 0.8682733905020673, + "learning_rate": 7.033639143730888e-07, + "loss": 0.2151, "step": 23 }, { "epoch": 0.0, - "grad_norm": 1.67676855881491, - "learning_rate": 9.177820267686425e-07, - "loss": 0.5603, + "grad_norm": 1.806191955924715, + "learning_rate": 7.339449541284405e-07, + "loss": 0.5262, "step": 24 }, { "epoch": 0.0, - "grad_norm": 0.7202407335460682, - "learning_rate": 9.560229445506693e-07, - "loss": 0.3293, + "grad_norm": 0.7920947235713969, + "learning_rate": 7.645259938837921e-07, + "loss": 0.3274, "step": 25 }, { "epoch": 0.0, - "grad_norm": 0.9865131037247048, - "learning_rate": 9.94263862332696e-07, - "loss": 0.41, + "grad_norm": 1.3212618760626385, + "learning_rate": 7.951070336391438e-07, + "loss": 0.3825, "step": 26 }, { "epoch": 0.0, - "grad_norm": 2.4535881440886347, - "learning_rate": 1.0325047801147228e-06, - "loss": 0.5634, + "grad_norm": 2.735046553729485, + "learning_rate": 8.256880733944956e-07, + "loss": 0.5674, "step": 27 }, { "epoch": 0.0, - "grad_norm": 1.0947257185323476, - "learning_rate": 1.0707456978967496e-06, - "loss": 0.3149, + "grad_norm": 1.1243660525291819, + "learning_rate": 8.562691131498472e-07, + "loss": 0.3288, "step": 28 }, { "epoch": 0.0, - "grad_norm": 1.0129147168499806, - "learning_rate": 1.1089866156787763e-06, - "loss": 0.3681, + "grad_norm": 1.0530811537369742, + "learning_rate": 8.868501529051989e-07, + "loss": 0.3487, "step": 29 }, { "epoch": 0.0, - "grad_norm": 0.6576020756810448, - "learning_rate": 1.1472275334608031e-06, - "loss": 0.2731, + "grad_norm": 0.7831702324371507, + "learning_rate": 9.174311926605506e-07, + "loss": 0.2744, "step": 30 }, { "epoch": 0.0, - "grad_norm": 0.9047392383713737, - "learning_rate": 1.1854684512428299e-06, - "loss": 0.3647, + "grad_norm": 1.054113376915378, + "learning_rate": 9.480122324159022e-07, + "loss": 0.3711, "step": 31 }, { "epoch": 0.0, - "grad_norm": 1.4541026438021523, - "learning_rate": 1.2237093690248566e-06, - "loss": 0.4479, + "grad_norm": 1.7387637511827139, + "learning_rate": 9.78593272171254e-07, + "loss": 0.4615, "step": 32 }, { "epoch": 0.0, - "grad_norm": 0.8653286599987811, - "learning_rate": 1.2619502868068834e-06, - "loss": 0.3584, + "grad_norm": 0.9137121198649181, + "learning_rate": 1.0091743119266057e-06, + "loss": 0.369, "step": 33 }, { "epoch": 0.0, - "grad_norm": 2.121457616803094, - "learning_rate": 1.3001912045889104e-06, - "loss": 0.3729, + "grad_norm": 1.9576881450474133, + "learning_rate": 1.0397553516819571e-06, + "loss": 0.3786, "step": 34 }, { "epoch": 0.0, - "grad_norm": 0.8277941941005295, - "learning_rate": 1.3384321223709371e-06, - "loss": 0.3191, + "grad_norm": 1.332010783690538, + "learning_rate": 1.070336391437309e-06, + "loss": 0.4082, "step": 35 }, { "epoch": 0.0, - "grad_norm": 0.7443085270016412, - "learning_rate": 1.3766730401529637e-06, - "loss": 0.3493, + "grad_norm": 0.7729452322569007, + "learning_rate": 1.1009174311926608e-06, + "loss": 0.3462, "step": 36 }, { "epoch": 0.0, - "grad_norm": 1.1585945317118969, - "learning_rate": 1.4149139579349905e-06, - "loss": 0.3186, + "grad_norm": 1.3710484414254225, + "learning_rate": 1.1314984709480122e-06, + "loss": 0.3393, "step": 37 }, { "epoch": 0.0, - "grad_norm": 1.5269750513842424, - "learning_rate": 1.4531548757170174e-06, - "loss": 0.3761, + "grad_norm": 0.8633454596814635, + "learning_rate": 1.162079510703364e-06, + "loss": 0.2741, "step": 38 }, { "epoch": 0.0, - "grad_norm": 2.837589017463728, - "learning_rate": 1.4913957934990442e-06, - "loss": 0.7611, + "grad_norm": 3.3227585736572496, + "learning_rate": 1.1926605504587159e-06, + "loss": 0.7708, "step": 39 }, { "epoch": 0.0, - "grad_norm": 1.1361050627404332, - "learning_rate": 1.529636711281071e-06, - "loss": 0.1478, + "grad_norm": 1.2254278901256441, + "learning_rate": 1.2232415902140673e-06, + "loss": 0.1408, "step": 40 }, { "epoch": 0.0, - "grad_norm": 0.9717312245736287, - "learning_rate": 1.5678776290630975e-06, - "loss": 0.3778, + "grad_norm": 1.2658216734261158, + "learning_rate": 1.253822629969419e-06, + "loss": 0.3555, "step": 41 }, { "epoch": 0.0, - "grad_norm": 1.6502427723791129, - "learning_rate": 1.6061185468451243e-06, - "loss": 0.5467, + "grad_norm": 1.8492790091748121, + "learning_rate": 1.2844036697247707e-06, + "loss": 0.5538, "step": 42 }, { "epoch": 0.0, - "grad_norm": 0.6014184946161949, - "learning_rate": 1.6443594646271512e-06, - "loss": 0.1958, + "grad_norm": 0.6521772305476505, + "learning_rate": 1.3149847094801224e-06, + "loss": 0.189, "step": 43 }, { "epoch": 0.0, - "grad_norm": 1.7347266365889769, - "learning_rate": 1.682600382409178e-06, - "loss": 0.4705, + "grad_norm": 2.6694535297067086, + "learning_rate": 1.345565749235474e-06, + "loss": 0.434, "step": 44 }, { "epoch": 0.0, - "grad_norm": 0.9784708341611575, - "learning_rate": 1.7208413001912048e-06, - "loss": 0.3925, + "grad_norm": 0.9831137254699878, + "learning_rate": 1.3761467889908258e-06, + "loss": 0.3866, "step": 45 }, { "epoch": 0.0, - "grad_norm": 0.8155632192584802, - "learning_rate": 1.7590822179732313e-06, - "loss": 0.2767, + "grad_norm": 0.8810512708255911, + "learning_rate": 1.4067278287461775e-06, + "loss": 0.2705, "step": 46 }, { "epoch": 0.0, - "grad_norm": 1.6297697128120328, - "learning_rate": 1.7973231357552585e-06, - "loss": 0.4902, + "grad_norm": 1.4724167975436921, + "learning_rate": 1.437308868501529e-06, + "loss": 0.4621, "step": 47 }, { "epoch": 0.0, - "grad_norm": 2.0192981344155907, - "learning_rate": 1.835564053537285e-06, - "loss": 0.5705, + "grad_norm": 1.7970235943355533, + "learning_rate": 1.467889908256881e-06, + "loss": 0.5871, "step": 48 }, { "epoch": 0.0, - "grad_norm": 0.6176671121632008, - "learning_rate": 1.8738049713193118e-06, - "loss": 0.2417, + "grad_norm": 0.7397532216141224, + "learning_rate": 1.4984709480122326e-06, + "loss": 0.2425, "step": 49 }, { "epoch": 0.0, - "grad_norm": 1.715246719483139, - "learning_rate": 1.9120458891013386e-06, - "loss": 0.2814, + "grad_norm": 1.3531752520765654, + "learning_rate": 1.5290519877675841e-06, + "loss": 0.2837, "step": 50 }, { "epoch": 0.0, - "grad_norm": 3.0156791314075595, - "learning_rate": 1.950286806883365e-06, - "loss": 0.8249, + "grad_norm": 2.7721355044000253, + "learning_rate": 1.559633027522936e-06, + "loss": 0.7699, "step": 51 }, { "epoch": 0.0, - "grad_norm": 1.656560840965785, - "learning_rate": 1.988527724665392e-06, - "loss": 0.5013, + "grad_norm": 2.6317311369397967, + "learning_rate": 1.5902140672782875e-06, + "loss": 0.6162, "step": 52 }, { "epoch": 0.0, - "grad_norm": 1.1577287645486485, - "learning_rate": 2.026768642447419e-06, - "loss": 0.2761, + "grad_norm": 0.7515064151592153, + "learning_rate": 1.6207951070336392e-06, + "loss": 0.2914, "step": 53 }, { "epoch": 0.0, - "grad_norm": 3.0509099785445395, - "learning_rate": 2.0650095602294456e-06, - "loss": 0.6112, + "grad_norm": 1.7002464468128908, + "learning_rate": 1.6513761467889911e-06, + "loss": 0.5804, "step": 54 }, { "epoch": 0.0, - "grad_norm": 0.9025595249195338, - "learning_rate": 2.103250478011472e-06, - "loss": 0.1648, + "grad_norm": 0.9310827779153261, + "learning_rate": 1.6819571865443426e-06, + "loss": 0.187, "step": 55 }, { "epoch": 0.0, - "grad_norm": 0.9417125486809438, - "learning_rate": 2.141491395793499e-06, - "loss": 0.325, + "grad_norm": 0.9453208569685909, + "learning_rate": 1.7125382262996943e-06, + "loss": 0.313, "step": 56 }, { "epoch": 0.0, - "grad_norm": 0.9816339235493463, - "learning_rate": 2.179732313575526e-06, - "loss": 0.39, + "grad_norm": 1.3517635854510925, + "learning_rate": 1.743119266055046e-06, + "loss": 0.3995, "step": 57 }, { "epoch": 0.0, - "grad_norm": 1.7398045274330625, - "learning_rate": 2.2179732313575527e-06, - "loss": 0.495, + "grad_norm": 1.8537884159510714, + "learning_rate": 1.7737003058103977e-06, + "loss": 0.4677, "step": 58 }, { "epoch": 0.0, - "grad_norm": 1.1507513410092913, - "learning_rate": 2.2562141491395797e-06, - "loss": 0.3634, + "grad_norm": 1.0516230279352063, + "learning_rate": 1.8042813455657492e-06, + "loss": 0.3637, "step": 59 }, { "epoch": 0.0, - "grad_norm": 4.505952386635836, - "learning_rate": 2.2944550669216062e-06, - "loss": 0.7398, + "grad_norm": 4.991851047334885, + "learning_rate": 1.8348623853211011e-06, + "loss": 0.776, "step": 60 }, { "epoch": 0.0, - "grad_norm": 0.706667753638737, - "learning_rate": 2.332695984703633e-06, - "loss": 0.2919, + "grad_norm": 0.8244058625346203, + "learning_rate": 1.8654434250764528e-06, + "loss": 0.2856, "step": 61 }, { "epoch": 0.0, - "grad_norm": 0.702597844899516, - "learning_rate": 2.3709369024856597e-06, - "loss": 0.2103, + "grad_norm": 1.0513707917964694, + "learning_rate": 1.8960244648318043e-06, + "loss": 0.2224, "step": 62 }, { "epoch": 0.0, - "grad_norm": 7.783725003259625, - "learning_rate": 2.4091778202676867e-06, - "loss": 0.853, + "grad_norm": 3.733463571230695, + "learning_rate": 1.9266055045871564e-06, + "loss": 0.7926, "step": 63 }, { "epoch": 0.0, - "grad_norm": 1.3779057437038236, - "learning_rate": 2.4474187380497133e-06, - "loss": 0.4548, + "grad_norm": 1.4331701359581341, + "learning_rate": 1.957186544342508e-06, + "loss": 0.436, "step": 64 }, { "epoch": 0.0, - "grad_norm": 2.4971913730088304, - "learning_rate": 2.4856596558317402e-06, - "loss": 0.3769, + "grad_norm": 0.968076559901853, + "learning_rate": 1.9877675840978594e-06, + "loss": 0.3406, "step": 65 }, { "epoch": 0.0, - "grad_norm": 2.077225117758281, - "learning_rate": 2.523900573613767e-06, - "loss": 0.4513, + "grad_norm": 1.7069546427476723, + "learning_rate": 2.0183486238532113e-06, + "loss": 0.4375, "step": 66 }, { "epoch": 0.0, - "grad_norm": 1.7469593011509847, - "learning_rate": 2.5621414913957938e-06, - "loss": 0.1754, + "grad_norm": 1.0824662002687286, + "learning_rate": 2.048929663608563e-06, + "loss": 0.1711, "step": 67 }, { "epoch": 0.0, - "grad_norm": 3.1701958149020584, - "learning_rate": 2.6003824091778207e-06, - "loss": 0.381, + "grad_norm": 1.287811817097283, + "learning_rate": 2.0795107033639143e-06, + "loss": 0.2932, "step": 68 }, { "epoch": 0.0, - "grad_norm": 1.0667732033492074, - "learning_rate": 2.6386233269598473e-06, - "loss": 0.365, + "grad_norm": 1.2058019894798737, + "learning_rate": 2.110091743119266e-06, + "loss": 0.3628, "step": 69 }, { "epoch": 0.0, - "grad_norm": 2.805103712753199, - "learning_rate": 2.6768642447418743e-06, - "loss": 0.5996, + "grad_norm": 1.9273038528574367, + "learning_rate": 2.140672782874618e-06, + "loss": 0.5324, "step": 70 }, { "epoch": 0.0, - "grad_norm": 1.1103519370245432, - "learning_rate": 2.7151051625239004e-06, - "loss": 0.2977, + "grad_norm": 1.1423535036310135, + "learning_rate": 2.1712538226299696e-06, + "loss": 0.3543, "step": 71 }, { "epoch": 0.0, - "grad_norm": 0.9741205038133375, - "learning_rate": 2.7533460803059274e-06, - "loss": 0.3441, + "grad_norm": 1.0949424822461358, + "learning_rate": 2.2018348623853215e-06, + "loss": 0.3636, "step": 72 }, { "epoch": 0.0, - "grad_norm": 3.3389588104138266, - "learning_rate": 2.7915869980879544e-06, - "loss": 0.4686, + "grad_norm": 1.4549594179096172, + "learning_rate": 2.232415902140673e-06, + "loss": 0.2468, "step": 73 }, { "epoch": 0.0, - "grad_norm": 1.1622466550114607, - "learning_rate": 2.829827915869981e-06, - "loss": 0.2293, + "grad_norm": 0.8198094235029172, + "learning_rate": 2.2629969418960245e-06, + "loss": 0.2413, "step": 74 }, { "epoch": 0.0, - "grad_norm": 1.8095204360677337, - "learning_rate": 2.868068833652008e-06, - "loss": 0.4982, + "grad_norm": 1.6675227493932148, + "learning_rate": 2.2935779816513764e-06, + "loss": 0.4978, "step": 75 }, { "epoch": 0.0, - "grad_norm": 2.216426651097327, - "learning_rate": 2.906309751434035e-06, - "loss": 0.4559, + "grad_norm": 1.446692978731366, + "learning_rate": 2.324159021406728e-06, + "loss": 0.4012, "step": 76 }, { "epoch": 0.0, - "grad_norm": 1.7137062684636206, - "learning_rate": 2.9445506692160614e-06, - "loss": 0.3426, + "grad_norm": 1.4390957501457091, + "learning_rate": 2.35474006116208e-06, + "loss": 0.3624, "step": 77 }, { "epoch": 0.0, - "grad_norm": 3.186090472631493, - "learning_rate": 2.9827915869980884e-06, - "loss": 0.6149, + "grad_norm": 2.8151804511762015, + "learning_rate": 2.3853211009174317e-06, + "loss": 0.5645, "step": 78 }, { "epoch": 0.0, - "grad_norm": 1.2457594513024053, - "learning_rate": 3.021032504780115e-06, - "loss": 0.2545, + "grad_norm": 1.3426830160741372, + "learning_rate": 2.415902140672783e-06, + "loss": 0.2479, "step": 79 }, { "epoch": 0.0, - "grad_norm": 1.3233143755012595, - "learning_rate": 3.059273422562142e-06, - "loss": 0.3427, + "grad_norm": 1.2681889178622874, + "learning_rate": 2.4464831804281347e-06, + "loss": 0.3634, "step": 80 }, { "epoch": 0.0, - "grad_norm": 1.9962990513631782, - "learning_rate": 3.097514340344169e-06, - "loss": 0.4061, + "grad_norm": 1.1029696697831592, + "learning_rate": 2.4770642201834866e-06, + "loss": 0.3844, "step": 81 }, { "epoch": 0.0, - "grad_norm": 2.1748881777328464, - "learning_rate": 3.135755258126195e-06, - "loss": 0.4804, + "grad_norm": 2.1777977953320735, + "learning_rate": 2.507645259938838e-06, + "loss": 0.4662, "step": 82 }, { "epoch": 0.0, - "grad_norm": 1.1349511100419203, - "learning_rate": 3.173996175908222e-06, - "loss": 0.3407, + "grad_norm": 2.0930817836691333, + "learning_rate": 2.5382262996941896e-06, + "loss": 0.3283, "step": 83 }, { "epoch": 0.0, - "grad_norm": 0.8325230383368618, - "learning_rate": 3.2122370936902485e-06, - "loss": 0.2679, + "grad_norm": 0.9618578596429419, + "learning_rate": 2.5688073394495415e-06, + "loss": 0.2871, "step": 84 }, { "epoch": 0.0, - "grad_norm": 1.4409698819778003, - "learning_rate": 3.2504780114722755e-06, - "loss": 0.2756, + "grad_norm": 1.491372591620853, + "learning_rate": 2.599388379204893e-06, + "loss": 0.2788, "step": 85 }, { "epoch": 0.0, - "grad_norm": 0.7717083869066103, - "learning_rate": 3.2887189292543025e-06, - "loss": 0.2771, + "grad_norm": 1.033756853768047, + "learning_rate": 2.629969418960245e-06, + "loss": 0.2904, "step": 86 }, { "epoch": 0.0, - "grad_norm": 3.0396106154008105, - "learning_rate": 3.326959847036329e-06, - "loss": 0.5461, + "grad_norm": 2.436778514618774, + "learning_rate": 2.6605504587155968e-06, + "loss": 0.5784, "step": 87 }, { - "epoch": 0.01, - "grad_norm": 0.8349352537444616, - "learning_rate": 3.365200764818356e-06, - "loss": 0.3778, + "epoch": 0.0, + "grad_norm": 1.093113945903888, + "learning_rate": 2.691131498470948e-06, + "loss": 0.3698, "step": 88 }, { - "epoch": 0.01, - "grad_norm": 0.8634730104834349, - "learning_rate": 3.4034416826003826e-06, - "loss": 0.26, + "epoch": 0.0, + "grad_norm": 1.0664203102086078, + "learning_rate": 2.7217125382262998e-06, + "loss": 0.3395, "step": 89 }, { - "epoch": 0.01, - "grad_norm": 1.8073245544233498, - "learning_rate": 3.4416826003824095e-06, - "loss": 0.3411, + "epoch": 0.0, + "grad_norm": 1.4632709792867227, + "learning_rate": 2.7522935779816517e-06, + "loss": 0.3909, "step": 90 }, { - "epoch": 0.01, - "grad_norm": 2.7074868343906857, - "learning_rate": 3.4799235181644365e-06, - "loss": 0.7505, + "epoch": 0.0, + "grad_norm": 3.7309198364057177, + "learning_rate": 2.782874617737003e-06, + "loss": 0.6458, "step": 91 }, { - "epoch": 0.01, - "grad_norm": 1.4009581352720348, - "learning_rate": 3.5181644359464626e-06, - "loss": 0.3037, + "epoch": 0.0, + "grad_norm": 1.1422162518395853, + "learning_rate": 2.813455657492355e-06, + "loss": 0.3022, "step": 92 }, { - "epoch": 0.01, - "grad_norm": 2.7581392814988335, - "learning_rate": 3.5564053537284896e-06, - "loss": 0.4342, + "epoch": 0.0, + "grad_norm": 1.6304582732152335, + "learning_rate": 2.844036697247707e-06, + "loss": 0.4352, "step": 93 }, { - "epoch": 0.01, - "grad_norm": 4.586880035707701, - "learning_rate": 3.594646271510517e-06, - "loss": 0.95, + "epoch": 0.0, + "grad_norm": 3.1863798722019894, + "learning_rate": 2.874617737003058e-06, + "loss": 0.6853, "step": 94 }, { - "epoch": 0.01, - "grad_norm": 0.9134436895106094, - "learning_rate": 3.632887189292543e-06, - "loss": 0.1859, + "epoch": 0.0, + "grad_norm": 0.9285198255375843, + "learning_rate": 2.90519877675841e-06, + "loss": 0.1771, "step": 95 }, { - "epoch": 0.01, - "grad_norm": 3.5368674428633557, - "learning_rate": 3.67112810707457e-06, - "loss": 0.4042, + "epoch": 0.0, + "grad_norm": 8.895422932528957, + "learning_rate": 2.935779816513762e-06, + "loss": 0.7249, "step": 96 }, { - "epoch": 0.01, - "grad_norm": 1.695940105136588, - "learning_rate": 3.7093690248565967e-06, - "loss": 0.4439, + "epoch": 0.0, + "grad_norm": 1.8827546621354185, + "learning_rate": 2.9663608562691134e-06, + "loss": 0.3843, "step": 97 }, { - "epoch": 0.01, - "grad_norm": 1.4933772907830718, - "learning_rate": 3.7476099426386236e-06, - "loss": 0.3166, + "epoch": 0.0, + "grad_norm": 1.4098972750111525, + "learning_rate": 2.9969418960244653e-06, + "loss": 0.3115, "step": 98 }, { - "epoch": 0.01, - "grad_norm": 4.929994478426711, - "learning_rate": 3.7858508604206506e-06, - "loss": 0.6366, + "epoch": 0.0, + "grad_norm": 7.913707114335056, + "learning_rate": 3.0275229357798168e-06, + "loss": 0.6868, "step": 99 }, { - "epoch": 0.01, - "grad_norm": 1.0588657191516118, - "learning_rate": 3.824091778202677e-06, - "loss": 0.3998, + "epoch": 0.0, + "grad_norm": 1.204575966433304, + "learning_rate": 3.0581039755351682e-06, + "loss": 0.4232, "step": 100 }, { - "epoch": 0.01, - "grad_norm": 0.8299018542448147, - "learning_rate": 3.862332695984704e-06, - "loss": 0.3014, + "epoch": 0.0, + "grad_norm": 1.2922951097458462, + "learning_rate": 3.08868501529052e-06, + "loss": 0.2423, "step": 101 }, { - "epoch": 0.01, - "grad_norm": 0.894972655187633, - "learning_rate": 3.90057361376673e-06, - "loss": 0.152, + "epoch": 0.0, + "grad_norm": 1.1754783996270737, + "learning_rate": 3.119266055045872e-06, + "loss": 0.2179, "step": 102 }, { - "epoch": 0.01, - "grad_norm": 3.0056612950240744, - "learning_rate": 3.938814531548758e-06, - "loss": 0.5743, + "epoch": 0.0, + "grad_norm": 4.33751709349973, + "learning_rate": 3.149847094801223e-06, + "loss": 0.5415, "step": 103 }, { - "epoch": 0.01, - "grad_norm": 0.9439871853559727, - "learning_rate": 3.977055449330784e-06, - "loss": 0.292, + "epoch": 0.0, + "grad_norm": 1.8223977037973127, + "learning_rate": 3.180428134556575e-06, + "loss": 0.3081, "step": 104 }, { - "epoch": 0.01, - "grad_norm": 1.0959590718704717, - "learning_rate": 4.015296367112811e-06, - "loss": 0.3727, + "epoch": 0.0, + "grad_norm": 1.6163955424306307, + "learning_rate": 3.211009174311927e-06, + "loss": 0.3782, "step": 105 }, { - "epoch": 0.01, - "grad_norm": 2.2912607966735963, - "learning_rate": 4.053537284894838e-06, - "loss": 0.7559, + "epoch": 0.0, + "grad_norm": 2.9741963944177927, + "learning_rate": 3.2415902140672784e-06, + "loss": 0.8341, "step": 106 }, { - "epoch": 0.01, - "grad_norm": 0.9171050201317897, - "learning_rate": 4.091778202676865e-06, - "loss": 0.314, + "epoch": 0.0, + "grad_norm": 1.4032479374425009, + "learning_rate": 3.2721712538226303e-06, + "loss": 0.3134, "step": 107 }, { - "epoch": 0.01, - "grad_norm": 0.6284229206544971, - "learning_rate": 4.130019120458891e-06, - "loss": 0.2165, + "epoch": 0.0, + "grad_norm": 0.6730033777364434, + "learning_rate": 3.3027522935779823e-06, + "loss": 0.2241, "step": 108 }, { "epoch": 0.01, - "grad_norm": 2.212927916169031, - "learning_rate": 4.168260038240919e-06, - "loss": 0.6106, + "grad_norm": 3.8738976822835736, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.624, "step": 109 }, { "epoch": 0.01, - "grad_norm": 0.8099374882393748, - "learning_rate": 4.206500956022944e-06, - "loss": 0.3248, + "grad_norm": 1.399574507841844, + "learning_rate": 3.3639143730886852e-06, + "loss": 0.3419, "step": 110 }, { "epoch": 0.01, - "grad_norm": 2.655364301227268, - "learning_rate": 4.244741873804972e-06, - "loss": 0.5664, + "grad_norm": 2.374032456694889, + "learning_rate": 3.394495412844037e-06, + "loss": 0.5837, "step": 111 }, { "epoch": 0.01, - "grad_norm": 1.4636213484496585, - "learning_rate": 4.282982791586998e-06, - "loss": 0.393, + "grad_norm": 1.920566353832276, + "learning_rate": 3.4250764525993886e-06, + "loss": 0.3732, "step": 112 }, { "epoch": 0.01, - "grad_norm": 1.0063990268097915, - "learning_rate": 4.321223709369025e-06, - "loss": 0.3236, + "grad_norm": 1.5364781648366226, + "learning_rate": 3.4556574923547405e-06, + "loss": 0.3387, "step": 113 }, { "epoch": 0.01, - "grad_norm": 0.6371885352644112, - "learning_rate": 4.359464627151052e-06, - "loss": 0.1923, + "grad_norm": 0.6583827360487446, + "learning_rate": 3.486238532110092e-06, + "loss": 0.1138, "step": 114 }, { "epoch": 0.01, - "grad_norm": 3.490580634292843, - "learning_rate": 4.397705544933079e-06, - "loss": 0.5172, + "grad_norm": 1.982102901622778, + "learning_rate": 3.5168195718654435e-06, + "loss": 0.5624, "step": 115 }, { "epoch": 0.01, - "grad_norm": 0.8466201174343196, - "learning_rate": 4.435946462715105e-06, - "loss": 0.3246, + "grad_norm": 0.7784898839165266, + "learning_rate": 3.5474006116207954e-06, + "loss": 0.302, "step": 116 }, { "epoch": 0.01, - "grad_norm": 1.5069924896763471, - "learning_rate": 4.474187380497133e-06, - "loss": 0.3987, + "grad_norm": 1.4422037410571615, + "learning_rate": 3.5779816513761473e-06, + "loss": 0.3872, "step": 117 }, { "epoch": 0.01, - "grad_norm": 2.72546443790111, - "learning_rate": 4.512428298279159e-06, - "loss": 0.6617, + "grad_norm": 2.4858937811033655, + "learning_rate": 3.6085626911314984e-06, + "loss": 0.6512, "step": 118 }, { "epoch": 0.01, - "grad_norm": 2.2270235639211564, - "learning_rate": 4.550669216061186e-06, - "loss": 0.3173, + "grad_norm": 1.1312079116364202, + "learning_rate": 3.6391437308868503e-06, + "loss": 0.2889, "step": 119 }, { "epoch": 0.01, - "grad_norm": 0.611271995853063, - "learning_rate": 4.5889101338432124e-06, - "loss": 0.2566, + "grad_norm": 0.7188341571305118, + "learning_rate": 3.6697247706422022e-06, + "loss": 0.2514, "step": 120 }, { "epoch": 0.01, - "grad_norm": 2.1591179991848053, - "learning_rate": 4.627151051625239e-06, - "loss": 0.4692, + "grad_norm": 1.8181595950088452, + "learning_rate": 3.7003058103975537e-06, + "loss": 0.4464, "step": 121 }, { "epoch": 0.01, - "grad_norm": 0.8409326010763974, - "learning_rate": 4.665391969407266e-06, - "loss": 0.2925, + "grad_norm": 0.8359553931637378, + "learning_rate": 3.7308868501529056e-06, + "loss": 0.2763, "step": 122 }, { "epoch": 0.01, - "grad_norm": 1.6635869940410941, - "learning_rate": 4.703632887189293e-06, - "loss": 0.4966, + "grad_norm": 1.7071941676273454, + "learning_rate": 3.7614678899082575e-06, + "loss": 0.497, "step": 123 }, { "epoch": 0.01, - "grad_norm": 0.8489652670343713, - "learning_rate": 4.7418738049713195e-06, - "loss": 0.3289, + "grad_norm": 0.91288758078914, + "learning_rate": 3.7920489296636086e-06, + "loss": 0.3321, "step": 124 }, { "epoch": 0.01, - "grad_norm": 0.7948026475678698, - "learning_rate": 4.780114722753346e-06, - "loss": 0.3079, + "grad_norm": 1.0036249295277695, + "learning_rate": 3.8226299694189605e-06, + "loss": 0.3437, "step": 125 }, { "epoch": 0.01, - "grad_norm": 0.62879378549951, - "learning_rate": 4.8183556405353734e-06, - "loss": 0.2514, + "grad_norm": 0.8191724463982983, + "learning_rate": 3.853211009174313e-06, + "loss": 0.2408, "step": 126 }, { "epoch": 0.01, - "grad_norm": 1.9267329931460127, - "learning_rate": 4.8565965583174e-06, - "loss": 0.575, + "grad_norm": 2.194134844467503, + "learning_rate": 3.8837920489296635e-06, + "loss": 0.5882, "step": 127 }, { "epoch": 0.01, - "grad_norm": 0.6930803429078386, - "learning_rate": 4.8948374760994265e-06, - "loss": 0.2828, + "grad_norm": 0.7138060615993572, + "learning_rate": 3.914373088685016e-06, + "loss": 0.2701, "step": 128 }, { "epoch": 0.01, - "grad_norm": 1.567288278653447, - "learning_rate": 4.933078393881454e-06, - "loss": 0.5037, + "grad_norm": 1.2306809170322444, + "learning_rate": 3.944954128440367e-06, + "loss": 0.4799, "step": 129 }, { "epoch": 0.01, - "grad_norm": 2.4508660819457764, - "learning_rate": 4.9713193116634805e-06, - "loss": 0.8513, + "grad_norm": 2.734030366671196, + "learning_rate": 3.975535168195719e-06, + "loss": 0.8143, "step": 130 }, { "epoch": 0.01, - "grad_norm": 0.7264682519312051, - "learning_rate": 5.009560229445507e-06, - "loss": 0.2465, + "grad_norm": 0.813907380650915, + "learning_rate": 4.00611620795107e-06, + "loss": 0.2476, "step": 131 }, { "epoch": 0.01, - "grad_norm": 0.826742636113808, - "learning_rate": 5.047801147227534e-06, - "loss": 0.3116, + "grad_norm": 0.9397904598149287, + "learning_rate": 4.036697247706423e-06, + "loss": 0.426, "step": 132 }, { "epoch": 0.01, - "grad_norm": 1.1392276722179755, - "learning_rate": 5.086042065009561e-06, - "loss": 0.3726, + "grad_norm": 1.3889043403649437, + "learning_rate": 4.067278287461774e-06, + "loss": 0.4055, "step": 133 }, { "epoch": 0.01, - "grad_norm": 1.0376511756469218, - "learning_rate": 5.1242829827915875e-06, - "loss": 0.2853, + "grad_norm": 0.6853913872092066, + "learning_rate": 4.097859327217126e-06, + "loss": 0.214, "step": 134 }, { "epoch": 0.01, - "grad_norm": 2.1718548855836737, - "learning_rate": 5.162523900573614e-06, - "loss": 0.8045, + "grad_norm": 2.608634645749338, + "learning_rate": 4.128440366972478e-06, + "loss": 0.7945, "step": 135 }, { "epoch": 0.01, - "grad_norm": 0.8519765854258867, - "learning_rate": 5.2007648183556415e-06, - "loss": 0.4227, + "grad_norm": 0.8627309147561726, + "learning_rate": 4.1590214067278286e-06, + "loss": 0.3871, "step": 136 }, { "epoch": 0.01, - "grad_norm": 0.8055693275531942, - "learning_rate": 5.239005736137668e-06, - "loss": 0.2194, + "grad_norm": 1.003891431102375, + "learning_rate": 4.189602446483181e-06, + "loss": 0.0966, "step": 137 }, { "epoch": 0.01, - "grad_norm": 1.7267804162988287, - "learning_rate": 5.277246653919695e-06, - "loss": 0.4669, + "grad_norm": 0.9150538390537358, + "learning_rate": 4.220183486238532e-06, + "loss": 0.4021, "step": 138 }, { "epoch": 0.01, - "grad_norm": 0.5457146032547153, - "learning_rate": 5.315487571701722e-06, - "loss": 0.2702, + "grad_norm": 0.7674051837465868, + "learning_rate": 4.250764525993884e-06, + "loss": 0.3073, "step": 139 }, { "epoch": 0.01, - "grad_norm": 0.8774674725605524, - "learning_rate": 5.3537284894837486e-06, - "loss": 0.3322, + "grad_norm": 0.9959065873870295, + "learning_rate": 4.281345565749236e-06, + "loss": 0.3697, "step": 140 }, { "epoch": 0.01, - "grad_norm": 1.150336747102144, - "learning_rate": 5.391969407265774e-06, - "loss": 0.3492, + "grad_norm": 0.8373783098933085, + "learning_rate": 4.311926605504588e-06, + "loss": 0.3278, "step": 141 }, { "epoch": 0.01, - "grad_norm": 2.1991018274354253, - "learning_rate": 5.430210325047801e-06, - "loss": 0.7824, + "grad_norm": 4.486915958162343, + "learning_rate": 4.342507645259939e-06, + "loss": 0.7064, "step": 142 }, { "epoch": 0.01, - "grad_norm": 1.2228654899985612, - "learning_rate": 5.468451242829829e-06, - "loss": 0.4072, + "grad_norm": 1.1654048996396578, + "learning_rate": 4.373088685015291e-06, + "loss": 0.4024, "step": 143 }, { "epoch": 0.01, - "grad_norm": 0.7038931034059297, - "learning_rate": 5.506692160611855e-06, - "loss": 0.3396, + "grad_norm": 0.7067746746186605, + "learning_rate": 4.403669724770643e-06, + "loss": 0.3566, "step": 144 }, { "epoch": 0.01, - "grad_norm": 1.745883442870367, - "learning_rate": 5.544933078393881e-06, - "loss": 0.6713, + "grad_norm": 1.84563082625386, + "learning_rate": 4.4342507645259945e-06, + "loss": 0.6108, "step": 145 }, { "epoch": 0.01, - "grad_norm": 0.5065291039901288, - "learning_rate": 5.583173996175909e-06, - "loss": 0.2111, + "grad_norm": 0.6681476122690627, + "learning_rate": 4.464831804281346e-06, + "loss": 0.1448, "step": 146 }, { "epoch": 0.01, - "grad_norm": 0.89516023920091, - "learning_rate": 5.621414913957935e-06, - "loss": 0.3344, + "grad_norm": 1.0405232062853644, + "learning_rate": 4.4954128440366975e-06, + "loss": 0.3461, "step": 147 }, { "epoch": 0.01, - "grad_norm": 1.5215200346193276, - "learning_rate": 5.659655831739962e-06, - "loss": 0.4512, + "grad_norm": 0.7949934652837498, + "learning_rate": 4.525993883792049e-06, + "loss": 0.364, "step": 148 }, { "epoch": 0.01, - "grad_norm": 1.0561066764454057, - "learning_rate": 5.697896749521989e-06, - "loss": 0.348, + "grad_norm": 1.0848711116616896, + "learning_rate": 4.556574923547401e-06, + "loss": 0.3632, "step": 149 }, { "epoch": 0.01, - "grad_norm": 1.7077571805921623, - "learning_rate": 5.736137667304016e-06, - "loss": 0.44, + "grad_norm": 1.8959000993872266, + "learning_rate": 4.587155963302753e-06, + "loss": 0.4375, "step": 150 }, { "epoch": 0.01, - "grad_norm": 0.7032590857528236, - "learning_rate": 5.774378585086042e-06, - "loss": 0.3231, + "grad_norm": 1.0965700036512873, + "learning_rate": 4.617737003058104e-06, + "loss": 0.2331, "step": 151 }, { "epoch": 0.01, - "grad_norm": 0.6939036942549888, - "learning_rate": 5.81261950286807e-06, - "loss": 0.2651, + "grad_norm": 0.6315554425204258, + "learning_rate": 4.648318042813456e-06, + "loss": 0.2915, "step": 152 }, { "epoch": 0.01, - "grad_norm": 1.23769484190234, - "learning_rate": 5.850860420650096e-06, - "loss": 0.4505, + "grad_norm": 1.8937871002407352, + "learning_rate": 4.678899082568808e-06, + "loss": 0.4949, "step": 153 }, { "epoch": 0.01, - "grad_norm": 3.57505503947449, - "learning_rate": 5.889101338432123e-06, - "loss": 0.5387, + "grad_norm": 2.7295858132289914, + "learning_rate": 4.70948012232416e-06, + "loss": 0.5493, "step": 154 }, { "epoch": 0.01, - "grad_norm": 1.4496022682391447, - "learning_rate": 5.927342256214149e-06, - "loss": 0.4709, + "grad_norm": 1.4863363038682678, + "learning_rate": 4.740061162079511e-06, + "loss": 0.428, "step": 155 }, { "epoch": 0.01, - "grad_norm": 0.9171040848212857, - "learning_rate": 5.965583173996177e-06, - "loss": 0.3718, + "grad_norm": 0.7309226519349551, + "learning_rate": 4.770642201834863e-06, + "loss": 0.3548, "step": 156 }, { "epoch": 0.01, - "grad_norm": 1.3430140069069527, - "learning_rate": 6.003824091778203e-06, - "loss": 0.2232, + "grad_norm": 1.7476121950751375, + "learning_rate": 4.801223241590214e-06, + "loss": 0.2879, "step": 157 }, { "epoch": 0.01, - "grad_norm": 0.7080554277096157, - "learning_rate": 6.04206500956023e-06, - "loss": 0.2103, + "grad_norm": 0.8427509373166522, + "learning_rate": 4.831804281345566e-06, + "loss": 0.2142, "step": 158 }, { "epoch": 0.01, - "grad_norm": 1.2894404724722592, - "learning_rate": 6.080305927342257e-06, - "loss": 0.3918, + "grad_norm": 1.1557806766790273, + "learning_rate": 4.862385321100918e-06, + "loss": 0.3448, "step": 159 }, { "epoch": 0.01, - "grad_norm": 3.1163562975594825, - "learning_rate": 6.118546845124284e-06, - "loss": 0.4489, + "grad_norm": 1.9430914493078817, + "learning_rate": 4.892966360856269e-06, + "loss": 0.3876, "step": 160 }, { "epoch": 0.01, - "grad_norm": 1.8601545568123237, - "learning_rate": 6.15678776290631e-06, - "loss": 0.427, + "grad_norm": 1.6970393626889206, + "learning_rate": 4.923547400611622e-06, + "loss": 0.4626, "step": 161 }, { "epoch": 0.01, - "grad_norm": 0.9819232651964342, - "learning_rate": 6.195028680688338e-06, - "loss": 0.4107, + "grad_norm": 0.9646979458527443, + "learning_rate": 4.954128440366973e-06, + "loss": 0.4257, "step": 162 }, { "epoch": 0.01, - "grad_norm": 1.0574104719168467, - "learning_rate": 6.233269598470364e-06, - "loss": 0.3337, + "grad_norm": 6.0685845081564675, + "learning_rate": 4.984709480122325e-06, + "loss": 0.4528, "step": 163 }, { "epoch": 0.01, - "grad_norm": 0.7953279060143272, - "learning_rate": 6.27151051625239e-06, - "loss": 0.2119, + "grad_norm": 0.6263312419143187, + "learning_rate": 5.015290519877676e-06, + "loss": 0.2578, "step": 164 }, { "epoch": 0.01, - "grad_norm": 1.0482843536337256, - "learning_rate": 6.3097514340344166e-06, - "loss": 0.3817, + "grad_norm": 1.2354709901553151, + "learning_rate": 5.045871559633028e-06, + "loss": 0.4148, "step": 165 }, { "epoch": 0.01, - "grad_norm": 2.1370186037298056, - "learning_rate": 6.347992351816444e-06, - "loss": 0.6306, + "grad_norm": 2.1384651348668022, + "learning_rate": 5.076452599388379e-06, + "loss": 0.6045, "step": 166 }, { "epoch": 0.01, - "grad_norm": 1.6915570774478592, - "learning_rate": 6.3862332695984705e-06, - "loss": 0.3499, + "grad_norm": 1.7593991316187028, + "learning_rate": 5.1070336391437315e-06, + "loss": 0.2988, "step": 167 }, { "epoch": 0.01, - "grad_norm": 0.9773007045626713, - "learning_rate": 6.424474187380497e-06, - "loss": 0.369, + "grad_norm": 1.1070426511637235, + "learning_rate": 5.137614678899083e-06, + "loss": 0.3412, "step": 168 }, { "epoch": 0.01, - "grad_norm": 2.6951767139469633, - "learning_rate": 6.4627151051625245e-06, - "loss": 0.7238, + "grad_norm": 16.755042729934523, + "learning_rate": 5.168195718654435e-06, + "loss": 0.834, "step": 169 }, { "epoch": 0.01, - "grad_norm": 0.6679450780611562, - "learning_rate": 6.500956022944551e-06, - "loss": 0.1775, + "grad_norm": 0.595942287487927, + "learning_rate": 5.198776758409786e-06, + "loss": 0.1784, "step": 170 }, { "epoch": 0.01, - "grad_norm": 1.2440364112811422, - "learning_rate": 6.539196940726578e-06, - "loss": 0.4651, + "grad_norm": 2.9781977459238878, + "learning_rate": 5.229357798165137e-06, + "loss": 0.4856, "step": 171 }, { "epoch": 0.01, - "grad_norm": 1.4532282158517489, - "learning_rate": 6.577437858508605e-06, - "loss": 0.4368, + "grad_norm": 1.743707761854706, + "learning_rate": 5.25993883792049e-06, + "loss": 0.4108, "step": 172 }, { "epoch": 0.01, - "grad_norm": 1.0484087595525342, - "learning_rate": 6.6156787762906315e-06, - "loss": 0.2313, + "grad_norm": 1.4335585377217397, + "learning_rate": 5.290519877675841e-06, + "loss": 0.2801, "step": 173 }, { "epoch": 0.01, - "grad_norm": 1.2959855414896784, - "learning_rate": 6.653919694072658e-06, - "loss": 0.3893, + "grad_norm": 3.268448706873278, + "learning_rate": 5.3211009174311936e-06, + "loss": 0.4064, "step": 174 }, { "epoch": 0.01, - "grad_norm": 1.149727668633913, - "learning_rate": 6.6921606118546855e-06, - "loss": 0.4034, + "grad_norm": 6.046064006825953, + "learning_rate": 5.351681957186545e-06, + "loss": 0.5894, "step": 175 }, { "epoch": 0.01, - "grad_norm": 2.3990532753289666, - "learning_rate": 6.730401529636712e-06, - "loss": 0.7575, + "grad_norm": 2.0907423816342123, + "learning_rate": 5.382262996941896e-06, + "loss": 0.3124, "step": 176 }, { "epoch": 0.01, - "grad_norm": 0.7216334237931397, - "learning_rate": 6.768642447418739e-06, - "loss": 0.2945, + "grad_norm": 1.0392059683210848, + "learning_rate": 5.412844036697248e-06, + "loss": 0.3503, "step": 177 }, { "epoch": 0.01, - "grad_norm": 1.6587270290307061, - "learning_rate": 6.806883365200765e-06, - "loss": 0.5882, + "grad_norm": 1.748876170924891, + "learning_rate": 5.4434250764525995e-06, + "loss": 0.5963, "step": 178 }, { "epoch": 0.01, - "grad_norm": 0.7186727610904583, - "learning_rate": 6.8451242829827925e-06, - "loss": 0.2788, + "grad_norm": 0.7539196564526174, + "learning_rate": 5.474006116207952e-06, + "loss": 0.2244, "step": 179 }, { "epoch": 0.01, - "grad_norm": 0.7580278664452965, - "learning_rate": 6.883365200764819e-06, - "loss": 0.2313, + "grad_norm": 0.9629743745178759, + "learning_rate": 5.504587155963303e-06, + "loss": 0.2872, "step": 180 }, { "epoch": 0.01, - "grad_norm": 3.3484131561448582, - "learning_rate": 6.921606118546846e-06, - "loss": 0.7148, + "grad_norm": 4.06741027922091, + "learning_rate": 5.535168195718656e-06, + "loss": 0.6977, "step": 181 }, { "epoch": 0.01, - "grad_norm": 1.2480514471359154, - "learning_rate": 6.959847036328873e-06, - "loss": 0.5018, + "grad_norm": 1.9051407200342025, + "learning_rate": 5.565749235474006e-06, + "loss": 0.6034, "step": 182 }, { "epoch": 0.01, - "grad_norm": 0.853569911541321, - "learning_rate": 6.9980879541109e-06, - "loss": 0.334, + "grad_norm": 0.8719893024341318, + "learning_rate": 5.596330275229358e-06, + "loss": 0.329, "step": 183 }, { "epoch": 0.01, - "grad_norm": 1.5935129539425128, - "learning_rate": 7.036328871892925e-06, - "loss": 0.484, + "grad_norm": 1.981439202429174, + "learning_rate": 5.62691131498471e-06, + "loss": 0.4728, "step": 184 }, { "epoch": 0.01, - "grad_norm": 0.8264510735195749, - "learning_rate": 7.0745697896749535e-06, - "loss": 0.2185, + "grad_norm": 0.7483912688496023, + "learning_rate": 5.657492354740062e-06, + "loss": 0.2022, "step": 185 }, { "epoch": 0.01, - "grad_norm": 0.9399844092870507, - "learning_rate": 7.112810707456979e-06, - "loss": 0.3235, + "grad_norm": 1.9298684647192363, + "learning_rate": 5.688073394495414e-06, + "loss": 0.294, "step": 186 }, { "epoch": 0.01, - "grad_norm": 0.7297204562340768, - "learning_rate": 7.151051625239006e-06, - "loss": 0.3819, + "grad_norm": 1.1019510638621983, + "learning_rate": 5.7186544342507654e-06, + "loss": 0.4441, "step": 187 }, { "epoch": 0.01, - "grad_norm": 1.2104810033539928, - "learning_rate": 7.189292543021034e-06, - "loss": 0.4596, + "grad_norm": 1.0229267963551927, + "learning_rate": 5.749235474006116e-06, + "loss": 0.4189, "step": 188 }, { "epoch": 0.01, - "grad_norm": 0.931742528076528, - "learning_rate": 7.22753346080306e-06, - "loss": 0.4061, + "grad_norm": 0.9956188357743009, + "learning_rate": 5.7798165137614684e-06, + "loss": 0.3087, "step": 189 }, { "epoch": 0.01, - "grad_norm": 1.490075769645383, - "learning_rate": 7.265774378585086e-06, - "loss": 0.5611, + "grad_norm": 2.5129588509120038, + "learning_rate": 5.81039755351682e-06, + "loss": 0.6393, "step": 190 }, { "epoch": 0.01, - "grad_norm": 0.8152941819336589, - "learning_rate": 7.304015296367113e-06, - "loss": 0.3674, + "grad_norm": 0.6523042917995521, + "learning_rate": 5.840978593272172e-06, + "loss": 0.2511, "step": 191 }, { "epoch": 0.01, - "grad_norm": 0.710166833510888, - "learning_rate": 7.34225621414914e-06, - "loss": 0.2668, + "grad_norm": 0.8102556333991116, + "learning_rate": 5.871559633027524e-06, + "loss": 0.4086, "step": 192 }, { "epoch": 0.01, - "grad_norm": 1.568404819733602, - "learning_rate": 7.380497131931167e-06, - "loss": 0.1746, + "grad_norm": 1.0311729709467465, + "learning_rate": 5.902140672782875e-06, + "loss": 0.1756, "step": 193 }, { "epoch": 0.01, - "grad_norm": 1.2671387794567825, - "learning_rate": 7.418738049713193e-06, - "loss": 0.519, + "grad_norm": 2.235756450108399, + "learning_rate": 5.932721712538227e-06, + "loss": 0.6144, "step": 194 }, { "epoch": 0.01, - "grad_norm": 0.7997462545674416, - "learning_rate": 7.456978967495221e-06, - "loss": 0.3353, + "grad_norm": 0.9232365776140682, + "learning_rate": 5.963302752293578e-06, + "loss": 0.3716, "step": 195 }, { "epoch": 0.01, - "grad_norm": 0.9409506841315287, - "learning_rate": 7.495219885277247e-06, - "loss": 0.3744, + "grad_norm": 0.8667312621772022, + "learning_rate": 5.9938837920489305e-06, + "loss": 0.3824, "step": 196 }, { "epoch": 0.01, - "grad_norm": 1.0262626181568872, - "learning_rate": 7.533460803059274e-06, - "loss": 0.1996, + "grad_norm": 0.8949942763725209, + "learning_rate": 6.024464831804282e-06, + "loss": 0.2913, "step": 197 }, { "epoch": 0.01, - "grad_norm": 0.6961696439810705, - "learning_rate": 7.571701720841301e-06, - "loss": 0.3293, + "grad_norm": 0.6167732198797761, + "learning_rate": 6.0550458715596335e-06, + "loss": 0.2619, "step": 198 }, { "epoch": 0.01, - "grad_norm": 0.805884804921434, - "learning_rate": 7.609942638623328e-06, - "loss": 0.3308, + "grad_norm": 0.9634660569871618, + "learning_rate": 6.085626911314986e-06, + "loss": 0.3756, "step": 199 }, { "epoch": 0.01, - "grad_norm": 0.94997387801345, - "learning_rate": 7.648183556405354e-06, - "loss": 0.3929, + "grad_norm": 1.5629625117733297, + "learning_rate": 6.1162079510703365e-06, + "loss": 0.5047, "step": 200 }, { "epoch": 0.01, - "grad_norm": 0.7291239386927754, - "learning_rate": 7.686424474187381e-06, - "loss": 0.3934, + "grad_norm": 0.7809240701431114, + "learning_rate": 6.146788990825688e-06, + "loss": 0.3506, "step": 201 }, { "epoch": 0.01, - "grad_norm": 2.004028030890788, - "learning_rate": 7.724665391969407e-06, - "loss": 0.8334, + "grad_norm": 2.1375791584853383, + "learning_rate": 6.17737003058104e-06, + "loss": 0.6487, "step": 202 }, { "epoch": 0.01, - "grad_norm": 0.7999130816704142, - "learning_rate": 7.762906309751434e-06, - "loss": 0.3051, + "grad_norm": 0.7827517797926076, + "learning_rate": 6.207951070336392e-06, + "loss": 0.3578, "step": 203 }, { "epoch": 0.01, - "grad_norm": 0.5985667826912873, - "learning_rate": 7.80114722753346e-06, - "loss": 0.2605, + "grad_norm": 0.7016954304377935, + "learning_rate": 6.238532110091744e-06, + "loss": 0.2803, "step": 204 }, { "epoch": 0.01, - "grad_norm": 1.1179195289862098, - "learning_rate": 7.839388145315489e-06, - "loss": 0.2994, + "grad_norm": 1.0272677012145377, + "learning_rate": 6.269113149847096e-06, + "loss": 0.2799, "step": 205 }, { "epoch": 0.01, - "grad_norm": 1.0573596138813663, - "learning_rate": 7.877629063097515e-06, - "loss": 0.4686, + "grad_norm": 1.4383931929682603, + "learning_rate": 6.299694189602446e-06, + "loss": 0.4794, "step": 206 }, { "epoch": 0.01, - "grad_norm": 1.024477703497844, - "learning_rate": 7.915869980879542e-06, - "loss": 0.3344, + "grad_norm": 0.8459267391845401, + "learning_rate": 6.330275229357799e-06, + "loss": 0.3339, "step": 207 }, { "epoch": 0.01, - "grad_norm": 0.8805912272772863, - "learning_rate": 7.954110898661568e-06, - "loss": 0.4153, + "grad_norm": 0.78025624675871, + "learning_rate": 6.36085626911315e-06, + "loss": 0.4105, "step": 208 }, { "epoch": 0.01, - "grad_norm": 1.8453276392969327, - "learning_rate": 7.992351816443595e-06, - "loss": 0.3134, + "grad_norm": 0.6623810180029133, + "learning_rate": 6.391437308868502e-06, + "loss": 0.1782, "step": 209 }, { "epoch": 0.01, - "grad_norm": 0.6630727990199321, - "learning_rate": 8.030592734225622e-06, - "loss": 0.278, + "grad_norm": 0.7778848534162461, + "learning_rate": 6.422018348623854e-06, + "loss": 0.3507, "step": 210 }, { "epoch": 0.01, - "grad_norm": 1.593524418186559, - "learning_rate": 8.06883365200765e-06, - "loss": 0.3308, + "grad_norm": 0.6560569098697357, + "learning_rate": 6.452599388379206e-06, + "loss": 0.3087, "step": 211 }, { "epoch": 0.01, - "grad_norm": 1.5867712659122044, - "learning_rate": 8.107074569789676e-06, - "loss": 0.5022, + "grad_norm": 1.7114425698835818, + "learning_rate": 6.483180428134557e-06, + "loss": 0.5154, "step": 212 }, { "epoch": 0.01, - "grad_norm": 1.858433894348274, - "learning_rate": 8.145315487571703e-06, - "loss": 0.3147, + "grad_norm": 0.6889659100785178, + "learning_rate": 6.513761467889908e-06, + "loss": 0.2943, "step": 213 }, { "epoch": 0.01, - "grad_norm": 1.9048694714090977, - "learning_rate": 8.18355640535373e-06, - "loss": 0.8704, + "grad_norm": 2.2908756798050636, + "learning_rate": 6.544342507645261e-06, + "loss": 0.8062, "step": 214 }, { "epoch": 0.01, - "grad_norm": 0.795882586881069, - "learning_rate": 8.221797323135756e-06, - "loss": 0.3508, + "grad_norm": 0.7967143432356965, + "learning_rate": 6.574923547400612e-06, + "loss": 0.3369, "step": 215 }, { "epoch": 0.01, - "grad_norm": 1.2363337807852626, - "learning_rate": 8.260038240917783e-06, - "loss": 0.2234, + "grad_norm": 0.5703746818280078, + "learning_rate": 6.6055045871559645e-06, + "loss": 0.2597, "step": 216 }, { "epoch": 0.01, - "grad_norm": 1.2961495565006664, - "learning_rate": 8.298279158699809e-06, - "loss": 0.3844, + "grad_norm": 1.1364167821993945, + "learning_rate": 6.636085626911316e-06, + "loss": 0.3605, "step": 217 }, { "epoch": 0.01, - "grad_norm": 1.7121762384248471, - "learning_rate": 8.336520076481837e-06, - "loss": 0.6158, + "grad_norm": 2.0648778888584127, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6531, "step": 218 }, { "epoch": 0.01, - "grad_norm": 0.7273021014242879, - "learning_rate": 8.374760994263862e-06, - "loss": 0.2802, + "grad_norm": 0.6038783969023067, + "learning_rate": 6.697247706422019e-06, + "loss": 0.2848, "step": 219 }, { "epoch": 0.01, - "grad_norm": 2.6273244509030462, - "learning_rate": 8.413001912045889e-06, - "loss": 0.7777, + "grad_norm": 1.03834383898345, + "learning_rate": 6.7278287461773705e-06, + "loss": 0.4219, "step": 220 }, { "epoch": 0.01, - "grad_norm": 1.4955213875868227, - "learning_rate": 8.451242829827917e-06, - "loss": 0.7094, + "grad_norm": 1.9149126612284773, + "learning_rate": 6.758409785932723e-06, + "loss": 0.7638, "step": 221 }, { "epoch": 0.01, - "grad_norm": 1.0844734774581914, - "learning_rate": 8.489483747609944e-06, - "loss": 0.2128, + "grad_norm": 0.5476725930632746, + "learning_rate": 6.788990825688074e-06, + "loss": 0.2069, "step": 222 }, { "epoch": 0.01, - "grad_norm": 0.7637627517102057, - "learning_rate": 8.52772466539197e-06, - "loss": 0.416, + "grad_norm": 0.6949295487001947, + "learning_rate": 6.819571865443425e-06, + "loss": 0.3383, "step": 223 }, { "epoch": 0.01, - "grad_norm": 1.5085068537732527, - "learning_rate": 8.565965583173997e-06, - "loss": 0.4051, + "grad_norm": 2.839944387833604, + "learning_rate": 6.850152905198777e-06, + "loss": 0.606, "step": 224 }, { "epoch": 0.01, - "grad_norm": 0.6779193752161682, - "learning_rate": 8.604206500956023e-06, - "loss": 0.248, + "grad_norm": 0.7736955057458493, + "learning_rate": 6.880733944954129e-06, + "loss": 0.2625, "step": 225 }, { "epoch": 0.01, - "grad_norm": 3.324560462924011, - "learning_rate": 8.64244741873805e-06, - "loss": 0.823, + "grad_norm": 2.4174870123849326, + "learning_rate": 6.911314984709481e-06, + "loss": 0.8062, "step": 226 }, { "epoch": 0.01, - "grad_norm": 0.7247870815802524, - "learning_rate": 8.680688336520076e-06, - "loss": 0.3926, + "grad_norm": 0.8721822096067412, + "learning_rate": 6.941896024464833e-06, + "loss": 0.3815, "step": 227 }, { "epoch": 0.01, - "grad_norm": 0.874007650243524, - "learning_rate": 8.718929254302105e-06, - "loss": 0.3379, + "grad_norm": 0.7188439344849527, + "learning_rate": 6.972477064220184e-06, + "loss": 0.2521, "step": 228 }, { "epoch": 0.01, - "grad_norm": 1.4143629669283038, - "learning_rate": 8.757170172084131e-06, - "loss": 0.3663, + "grad_norm": 2.377125921548495, + "learning_rate": 7.0030581039755356e-06, + "loss": 0.6094, "step": 229 }, { "epoch": 0.01, - "grad_norm": 1.164683195417105, - "learning_rate": 8.795411089866158e-06, - "loss": 0.2845, + "grad_norm": 0.6777293145930288, + "learning_rate": 7.033639143730887e-06, + "loss": 0.2064, "step": 230 }, { "epoch": 0.01, - "grad_norm": 0.5604708590835067, - "learning_rate": 8.833652007648184e-06, - "loss": 0.3083, + "grad_norm": 0.7274148101358805, + "learning_rate": 7.0642201834862385e-06, + "loss": 0.3427, "step": 231 }, { "epoch": 0.01, - "grad_norm": 1.196489898146485, - "learning_rate": 8.87189292543021e-06, - "loss": 0.4396, + "grad_norm": 1.228664566153047, + "learning_rate": 7.094801223241591e-06, + "loss": 0.3745, "step": 232 }, { "epoch": 0.01, - "grad_norm": 2.3549774616852077, - "learning_rate": 8.910133843212237e-06, - "loss": 0.7806, + "grad_norm": 2.3010104651391137, + "learning_rate": 7.125382262996942e-06, + "loss": 0.6975, "step": 233 }, { "epoch": 0.01, - "grad_norm": 0.8069917514152943, - "learning_rate": 8.948374760994266e-06, - "loss": 0.3283, + "grad_norm": 0.8441680062439891, + "learning_rate": 7.155963302752295e-06, + "loss": 0.3513, "step": 234 }, { "epoch": 0.01, - "grad_norm": 0.8060819660517314, - "learning_rate": 8.986615678776292e-06, - "loss": 0.4036, + "grad_norm": 1.0555790027411698, + "learning_rate": 7.186544342507645e-06, + "loss": 0.4059, "step": 235 }, { "epoch": 0.01, - "grad_norm": 1.6268257383806288, - "learning_rate": 9.024856596558319e-06, - "loss": 0.2297, + "grad_norm": 0.6459776115670207, + "learning_rate": 7.217125382262997e-06, + "loss": 0.1799, "step": 236 }, { "epoch": 0.01, - "grad_norm": 0.8055104715664961, - "learning_rate": 9.063097514340345e-06, - "loss": 0.325, + "grad_norm": 0.7663421135004944, + "learning_rate": 7.247706422018349e-06, + "loss": 0.3065, "step": 237 }, { "epoch": 0.01, - "grad_norm": 1.922017240792222, - "learning_rate": 9.101338432122372e-06, - "loss": 0.6324, + "grad_norm": 3.006211329319932, + "learning_rate": 7.278287461773701e-06, + "loss": 0.6771, "step": 238 }, { "epoch": 0.01, - "grad_norm": 1.1570165829521175, - "learning_rate": 9.139579349904398e-06, - "loss": 0.4164, + "grad_norm": 0.9203900291884897, + "learning_rate": 7.308868501529053e-06, + "loss": 0.4023, "step": 239 }, { "epoch": 0.01, - "grad_norm": 1.0882648871617964, - "learning_rate": 9.177820267686425e-06, - "loss": 0.3779, + "grad_norm": 0.8002503075901388, + "learning_rate": 7.3394495412844045e-06, + "loss": 0.3513, "step": 240 }, { "epoch": 0.01, - "grad_norm": 2.0650365945242295, - "learning_rate": 9.216061185468453e-06, - "loss": 0.6302, + "grad_norm": 2.5765533599687913, + "learning_rate": 7.370030581039755e-06, + "loss": 0.5199, "step": 241 }, { "epoch": 0.01, - "grad_norm": 0.7876721236295158, - "learning_rate": 9.254302103250478e-06, - "loss": 0.2161, + "grad_norm": 0.7466662650627771, + "learning_rate": 7.4006116207951074e-06, + "loss": 0.224, "step": 242 }, { "epoch": 0.01, - "grad_norm": 0.877348325878266, - "learning_rate": 9.292543021032505e-06, - "loss": 0.3256, + "grad_norm": 0.9923797436053715, + "learning_rate": 7.431192660550459e-06, + "loss": 0.3222, "step": 243 }, { "epoch": 0.01, - "grad_norm": 1.2294123749330266, - "learning_rate": 9.330783938814533e-06, - "loss": 0.4789, + "grad_norm": 1.0301250651028047, + "learning_rate": 7.461773700305811e-06, + "loss": 0.4212, "step": 244 }, { "epoch": 0.01, - "grad_norm": 2.0693063329445263, - "learning_rate": 9.36902485659656e-06, - "loss": 0.5469, + "grad_norm": 2.3480764358410604, + "learning_rate": 7.492354740061163e-06, + "loss": 0.548, "step": 245 }, { "epoch": 0.01, - "grad_norm": 0.8369746366963294, - "learning_rate": 9.407265774378586e-06, - "loss": 0.342, + "grad_norm": 0.7814353726760943, + "learning_rate": 7.522935779816515e-06, + "loss": 0.2655, "step": 246 }, { "epoch": 0.01, - "grad_norm": 0.9630683957243528, - "learning_rate": 9.445506692160612e-06, - "loss": 0.3704, + "grad_norm": 0.7208081461233595, + "learning_rate": 7.553516819571866e-06, + "loss": 0.3723, "step": 247 }, { "epoch": 0.01, - "grad_norm": 0.5500498119249109, - "learning_rate": 9.483747609942639e-06, - "loss": 0.0887, + "grad_norm": 1.110897860240016, + "learning_rate": 7.584097859327217e-06, + "loss": 0.0905, "step": 248 }, { "epoch": 0.01, - "grad_norm": 0.7378523176182469, - "learning_rate": 9.521988527724666e-06, - "loss": 0.3357, + "grad_norm": 0.7492195329269087, + "learning_rate": 7.6146788990825695e-06, + "loss": 0.3429, "step": 249 }, { "epoch": 0.01, - "grad_norm": 1.2327162595508403, - "learning_rate": 9.560229445506692e-06, - "loss": 0.4636, + "grad_norm": 1.1872761753193617, + "learning_rate": 7.645259938837921e-06, + "loss": 0.4847, "step": 250 }, { "epoch": 0.01, - "grad_norm": 0.9886430366490976, - "learning_rate": 9.59847036328872e-06, - "loss": 0.416, + "grad_norm": 0.9754240277457226, + "learning_rate": 7.675840978593273e-06, + "loss": 0.3903, "step": 251 }, { "epoch": 0.01, - "grad_norm": 0.9459014328163804, - "learning_rate": 9.636711281070747e-06, - "loss": 0.3347, + "grad_norm": 0.9295037610052851, + "learning_rate": 7.706422018348626e-06, + "loss": 0.3871, "step": 252 }, { "epoch": 0.01, - "grad_norm": 1.4714918256305243, - "learning_rate": 9.674952198852773e-06, - "loss": 0.6434, + "grad_norm": 1.1093522955080313, + "learning_rate": 7.737003058103975e-06, + "loss": 0.5004, "step": 253 }, { "epoch": 0.01, - "grad_norm": 0.6094277496484368, - "learning_rate": 9.7131931166348e-06, - "loss": 0.2764, + "grad_norm": 1.0757285158880268, + "learning_rate": 7.767584097859327e-06, + "loss": 0.2359, "step": 254 }, { "epoch": 0.01, - "grad_norm": 0.6710565760041877, - "learning_rate": 9.751434034416827e-06, - "loss": 0.242, + "grad_norm": 0.6359404616760351, + "learning_rate": 7.79816513761468e-06, + "loss": 0.2966, "step": 255 }, { "epoch": 0.01, - "grad_norm": 2.07823406563584, - "learning_rate": 9.789674952198853e-06, - "loss": 0.8126, + "grad_norm": 2.156136784597946, + "learning_rate": 7.828746177370032e-06, + "loss": 0.8406, "step": 256 }, { "epoch": 0.01, - "grad_norm": 1.7712955553118728, - "learning_rate": 9.827915869980881e-06, - "loss": 0.5811, + "grad_norm": 1.1395863799096988, + "learning_rate": 7.859327217125383e-06, + "loss": 0.6139, "step": 257 }, { "epoch": 0.01, - "grad_norm": 0.8150028793715761, - "learning_rate": 9.866156787762908e-06, - "loss": 0.2496, + "grad_norm": 0.8198175381111662, + "learning_rate": 7.889908256880735e-06, + "loss": 0.2974, "step": 258 }, { "epoch": 0.01, - "grad_norm": 0.7771432411433113, - "learning_rate": 9.904397705544934e-06, - "loss": 0.3765, + "grad_norm": 0.7813166509177186, + "learning_rate": 7.920489296636086e-06, + "loss": 0.3595, "step": 259 }, { "epoch": 0.01, - "grad_norm": 0.6160523614998065, - "learning_rate": 9.942638623326961e-06, - "loss": 0.2061, + "grad_norm": 1.0388730778049184, + "learning_rate": 7.951070336391438e-06, + "loss": 0.3006, "step": 260 }, { "epoch": 0.01, - "grad_norm": 0.7046157767914728, - "learning_rate": 9.980879541108988e-06, - "loss": 0.232, + "grad_norm": 1.5543601443420103, + "learning_rate": 7.981651376146789e-06, + "loss": 0.1913, "step": 261 }, { - "epoch": 0.02, - "grad_norm": 2.149024983560915, - "learning_rate": 1.0019120458891014e-05, - "loss": 0.4943, + "epoch": 0.01, + "grad_norm": 0.7933105741285864, + "learning_rate": 8.01223241590214e-06, + "loss": 0.4219, "step": 262 }, { - "epoch": 0.02, - "grad_norm": 1.2286307024685774, - "learning_rate": 1.005736137667304e-05, - "loss": 0.4477, + "epoch": 0.01, + "grad_norm": 0.7478939291210178, + "learning_rate": 8.042813455657494e-06, + "loss": 0.4328, "step": 263 }, { - "epoch": 0.02, - "grad_norm": 1.097112552920304, - "learning_rate": 1.0095602294455067e-05, - "loss": 0.3751, + "epoch": 0.01, + "grad_norm": 1.2206596211456053, + "learning_rate": 8.073394495412845e-06, + "loss": 0.1402, "step": 264 }, { - "epoch": 0.02, - "grad_norm": 0.7614533976101915, - "learning_rate": 1.0133843212237095e-05, - "loss": 0.3311, + "epoch": 0.01, + "grad_norm": 0.8138014072241885, + "learning_rate": 8.103975535168197e-06, + "loss": 0.3852, "step": 265 }, { - "epoch": 0.02, - "grad_norm": 0.7984309540612244, - "learning_rate": 1.0172084130019122e-05, - "loss": 0.3827, + "epoch": 0.01, + "grad_norm": 0.7242538877420546, + "learning_rate": 8.134556574923548e-06, + "loss": 0.3119, "step": 266 }, { - "epoch": 0.02, - "grad_norm": 0.564765501949692, - "learning_rate": 1.0210325047801149e-05, - "loss": 0.213, + "epoch": 0.01, + "grad_norm": 0.8470395027208345, + "learning_rate": 8.1651376146789e-06, + "loss": 0.3194, "step": 267 }, { - "epoch": 0.02, - "grad_norm": 1.75929373969915, - "learning_rate": 1.0248565965583175e-05, - "loss": 0.3951, + "epoch": 0.01, + "grad_norm": 0.8269864606439408, + "learning_rate": 8.195718654434251e-06, + "loss": 0.3704, "step": 268 }, { - "epoch": 0.02, - "grad_norm": 2.166353414260506, - "learning_rate": 1.0286806883365202e-05, - "loss": 0.6514, + "epoch": 0.01, + "grad_norm": 1.4414825479093278, + "learning_rate": 8.226299694189603e-06, + "loss": 0.6307, "step": 269 }, { - "epoch": 0.02, - "grad_norm": 0.49891390595289997, - "learning_rate": 1.0325047801147228e-05, - "loss": 0.2669, + "epoch": 0.01, + "grad_norm": 0.6648340963285406, + "learning_rate": 8.256880733944956e-06, + "loss": 0.1903, "step": 270 }, { - "epoch": 0.02, - "grad_norm": 0.7155232121133612, - "learning_rate": 1.0363288718929255e-05, - "loss": 0.3295, + "epoch": 0.01, + "grad_norm": 0.6931424601917758, + "learning_rate": 8.287461773700306e-06, + "loss": 0.3134, "step": 271 }, { - "epoch": 0.02, - "grad_norm": 2.4276693546072683, - "learning_rate": 1.0401529636711283e-05, - "loss": 0.8387, - "step": 272 + "epoch": 0.01, + "grad_norm": 1.8853101134324053, + "learning_rate": 8.318042813455657e-06, + "loss": 0.6335, + "step": 272 }, { - "epoch": 0.02, - "grad_norm": 0.7231598135654922, - "learning_rate": 1.043977055449331e-05, - "loss": 0.3431, + "epoch": 0.01, + "grad_norm": 1.1563705540376346, + "learning_rate": 8.34862385321101e-06, + "loss": 0.4397, "step": 273 }, { - "epoch": 0.02, - "grad_norm": 1.7399633483574997, - "learning_rate": 1.0478011472275336e-05, - "loss": 0.4369, + "epoch": 0.01, + "grad_norm": 0.8124464000726055, + "learning_rate": 8.379204892966362e-06, + "loss": 0.3661, "step": 274 }, { - "epoch": 0.02, - "grad_norm": 1.0433473187882643, - "learning_rate": 1.0516252390057363e-05, - "loss": 0.4318, + "epoch": 0.01, + "grad_norm": 0.8105427241463092, + "learning_rate": 8.409785932721713e-06, + "loss": 0.4586, "step": 275 }, { - "epoch": 0.02, - "grad_norm": 0.4793959174200006, - "learning_rate": 1.055449330783939e-05, - "loss": 0.2016, + "epoch": 0.01, + "grad_norm": 0.5357711109158649, + "learning_rate": 8.440366972477065e-06, + "loss": 0.2098, "step": 276 }, { - "epoch": 0.02, - "grad_norm": 1.1899034899997953, - "learning_rate": 1.0592734225621416e-05, - "loss": 0.4827, + "epoch": 0.01, + "grad_norm": 1.5360722979697834, + "learning_rate": 8.470948012232416e-06, + "loss": 0.4153, "step": 277 }, { - "epoch": 0.02, - "grad_norm": 0.7622063676502636, - "learning_rate": 1.0630975143403444e-05, - "loss": 0.3748, + "epoch": 0.01, + "grad_norm": 0.745352890390255, + "learning_rate": 8.501529051987768e-06, + "loss": 0.4, "step": 278 }, { - "epoch": 0.02, - "grad_norm": 1.0017165788816116, - "learning_rate": 1.066921606118547e-05, - "loss": 0.4185, + "epoch": 0.01, + "grad_norm": 1.4003080145679683, + "learning_rate": 8.53211009174312e-06, + "loss": 0.5465, "step": 279 }, { - "epoch": 0.02, - "grad_norm": 0.8309753387936472, - "learning_rate": 1.0707456978967497e-05, - "loss": 0.4142, + "epoch": 0.01, + "grad_norm": 0.7829585221579652, + "learning_rate": 8.562691131498472e-06, + "loss": 0.3896, "step": 280 }, { - "epoch": 0.02, - "grad_norm": 0.6895111232491048, - "learning_rate": 1.0745697896749524e-05, - "loss": 0.211, + "epoch": 0.01, + "grad_norm": 0.8361081286916281, + "learning_rate": 8.593272171253824e-06, + "loss": 0.2742, "step": 281 }, { - "epoch": 0.02, - "grad_norm": 0.6991545205239938, - "learning_rate": 1.0783938814531549e-05, - "loss": 0.3342, + "epoch": 0.01, + "grad_norm": 0.7728878594774012, + "learning_rate": 8.623853211009175e-06, + "loss": 0.3566, "step": 282 }, { - "epoch": 0.02, - "grad_norm": 0.8430434837260139, - "learning_rate": 1.0822179732313575e-05, - "loss": 0.3487, + "epoch": 0.01, + "grad_norm": 0.7858461329116725, + "learning_rate": 8.654434250764527e-06, + "loss": 0.3368, "step": 283 }, { - "epoch": 0.02, - "grad_norm": 2.4751180199401044, - "learning_rate": 1.0860420650095602e-05, - "loss": 0.5592, + "epoch": 0.01, + "grad_norm": 1.7053783257735307, + "learning_rate": 8.685015290519878e-06, + "loss": 0.5649, "step": 284 }, { - "epoch": 0.02, - "grad_norm": 1.1958526321793117, - "learning_rate": 1.0898661567877632e-05, - "loss": 0.4458, + "epoch": 0.01, + "grad_norm": 0.9796624221853422, + "learning_rate": 8.71559633027523e-06, + "loss": 0.5042, "step": 285 }, { - "epoch": 0.02, - "grad_norm": 0.893083908512126, - "learning_rate": 1.0936902485659658e-05, - "loss": 0.3897, + "epoch": 0.01, + "grad_norm": 0.7502616242629604, + "learning_rate": 8.746177370030581e-06, + "loss": 0.3745, "step": 286 }, { - "epoch": 0.02, - "grad_norm": 1.1128175053171323, - "learning_rate": 1.0975143403441683e-05, - "loss": 0.3729, + "epoch": 0.01, + "grad_norm": 0.7865669305134176, + "learning_rate": 8.776758409785935e-06, + "loss": 0.3314, "step": 287 }, { - "epoch": 0.02, - "grad_norm": 0.7310765740074888, - "learning_rate": 1.101338432122371e-05, - "loss": 0.1865, + "epoch": 0.01, + "grad_norm": 0.750507980180111, + "learning_rate": 8.807339449541286e-06, + "loss": 0.1672, "step": 288 }, { - "epoch": 0.02, - "grad_norm": 1.146099557909946, - "learning_rate": 1.1051625239005736e-05, - "loss": 0.3986, + "epoch": 0.01, + "grad_norm": 0.8574640040502772, + "learning_rate": 8.837920489296636e-06, + "loss": 0.4182, "step": 289 }, { - "epoch": 0.02, - "grad_norm": 0.8860784783630814, - "learning_rate": 1.1089866156787763e-05, - "loss": 0.4463, + "epoch": 0.01, + "grad_norm": 0.9761410121659166, + "learning_rate": 8.868501529051989e-06, + "loss": 0.3252, "step": 290 }, { - "epoch": 0.02, - "grad_norm": 0.7447706908798476, - "learning_rate": 1.1128107074569791e-05, - "loss": 0.3275, + "epoch": 0.01, + "grad_norm": 0.900373323607157, + "learning_rate": 8.89908256880734e-06, + "loss": 0.3753, "step": 291 }, { - "epoch": 0.02, - "grad_norm": 1.202436710568476, - "learning_rate": 1.1166347992351817e-05, - "loss": 0.5282, + "epoch": 0.01, + "grad_norm": 0.8811686643189814, + "learning_rate": 8.929663608562692e-06, + "loss": 0.4141, "step": 292 }, { - "epoch": 0.02, - "grad_norm": 1.7152994728714328, - "learning_rate": 1.1204588910133844e-05, - "loss": 0.7841, + "epoch": 0.01, + "grad_norm": 1.6556180206640245, + "learning_rate": 8.960244648318043e-06, + "loss": 0.7313, "step": 293 }, { - "epoch": 0.02, - "grad_norm": 0.5435181164029416, - "learning_rate": 1.124282982791587e-05, - "loss": 0.2422, + "epoch": 0.01, + "grad_norm": 0.6197447614759172, + "learning_rate": 8.990825688073395e-06, + "loss": 0.2555, "step": 294 }, { - "epoch": 0.02, - "grad_norm": 0.6497594633770308, - "learning_rate": 1.1281070745697897e-05, - "loss": 0.315, + "epoch": 0.01, + "grad_norm": 0.6435016896033203, + "learning_rate": 9.021406727828746e-06, + "loss": 0.2879, "step": 295 }, { - "epoch": 0.02, - "grad_norm": 1.4737099800720082, - "learning_rate": 1.1319311663479924e-05, - "loss": 0.6034, + "epoch": 0.01, + "grad_norm": 1.6833695167150602, + "learning_rate": 9.051987767584098e-06, + "loss": 0.6286, "step": 296 }, { - "epoch": 0.02, - "grad_norm": 1.60322244237985, - "learning_rate": 1.135755258126195e-05, - "loss": 0.3813, + "epoch": 0.01, + "grad_norm": 1.4149862929273584, + "learning_rate": 9.08256880733945e-06, + "loss": 0.4438, "step": 297 }, { - "epoch": 0.02, - "grad_norm": 0.7002085464698281, - "learning_rate": 1.1395793499043978e-05, - "loss": 0.3664, + "epoch": 0.01, + "grad_norm": 0.8498702304557568, + "learning_rate": 9.113149847094803e-06, + "loss": 0.3515, "step": 298 }, { - "epoch": 0.02, - "grad_norm": 0.951906463609564, - "learning_rate": 1.1434034416826005e-05, - "loss": 0.4852, + "epoch": 0.01, + "grad_norm": 0.8523773132344277, + "learning_rate": 9.143730886850154e-06, + "loss": 0.4076, "step": 299 }, { - "epoch": 0.02, - "grad_norm": 0.44217514977603756, - "learning_rate": 1.1472275334608032e-05, - "loss": 0.1206, + "epoch": 0.01, + "grad_norm": 0.5624440781104854, + "learning_rate": 9.174311926605506e-06, + "loss": 0.1182, "step": 300 }, { - "epoch": 0.02, - "grad_norm": 0.6930363078552553, - "learning_rate": 1.1510516252390058e-05, - "loss": 0.3479, + "epoch": 0.01, + "grad_norm": 0.8298233411139421, + "learning_rate": 9.204892966360857e-06, + "loss": 0.3659, "step": 301 }, { - "epoch": 0.02, - "grad_norm": 1.5346406725473178, - "learning_rate": 1.1548757170172085e-05, - "loss": 0.4308, + "epoch": 0.01, + "grad_norm": 1.4104560359545986, + "learning_rate": 9.235474006116209e-06, + "loss": 0.405, "step": 302 }, { - "epoch": 0.02, - "grad_norm": 1.83215837021332, - "learning_rate": 1.1586998087954111e-05, - "loss": 0.5532, + "epoch": 0.01, + "grad_norm": 2.0032774097771338, + "learning_rate": 9.26605504587156e-06, + "loss": 0.4441, "step": 303 }, { - "epoch": 0.02, - "grad_norm": 0.6617004606774994, - "learning_rate": 1.162523900573614e-05, - "loss": 0.2918, + "epoch": 0.01, + "grad_norm": 0.748144233633211, + "learning_rate": 9.296636085626912e-06, + "loss": 0.3472, "step": 304 }, { - "epoch": 0.02, - "grad_norm": 1.7033213061876957, - "learning_rate": 1.1663479923518166e-05, - "loss": 0.8174, + "epoch": 0.01, + "grad_norm": 2.418993936486022, + "learning_rate": 9.327217125382265e-06, + "loss": 0.7451, "step": 305 }, { - "epoch": 0.02, - "grad_norm": 0.5124769881897817, - "learning_rate": 1.1701720841300193e-05, - "loss": 0.2797, + "epoch": 0.01, + "grad_norm": 0.6437626848442127, + "learning_rate": 9.357798165137616e-06, + "loss": 0.3129, "step": 306 }, { - "epoch": 0.02, - "grad_norm": 0.5573421129903319, - "learning_rate": 1.1739961759082219e-05, - "loss": 0.2435, + "epoch": 0.01, + "grad_norm": 0.6505061969727871, + "learning_rate": 9.388379204892966e-06, + "loss": 0.2198, "step": 307 }, { - "epoch": 0.02, - "grad_norm": 1.936707015756441, - "learning_rate": 1.1778202676864246e-05, - "loss": 0.587, + "epoch": 0.01, + "grad_norm": 2.3903685173655465, + "learning_rate": 9.41896024464832e-06, + "loss": 0.584, "step": 308 }, { - "epoch": 0.02, - "grad_norm": 1.101229402299148, - "learning_rate": 1.1816443594646272e-05, - "loss": 0.5648, + "epoch": 0.01, + "grad_norm": 1.8450947741650001, + "learning_rate": 9.44954128440367e-06, + "loss": 0.5697, "step": 309 }, { - "epoch": 0.02, - "grad_norm": 0.5718313616620087, - "learning_rate": 1.1854684512428299e-05, - "loss": 0.2873, + "epoch": 0.01, + "grad_norm": 0.7760574948045484, + "learning_rate": 9.480122324159022e-06, + "loss": 0.2787, "step": 310 }, { - "epoch": 0.02, - "grad_norm": 0.9736432054583405, - "learning_rate": 1.1892925430210327e-05, - "loss": 0.4525, + "epoch": 0.01, + "grad_norm": 1.0058871279995607, + "learning_rate": 9.510703363914374e-06, + "loss": 0.4273, "step": 311 }, { - "epoch": 0.02, - "grad_norm": 0.5300437727042384, - "learning_rate": 1.1931166347992354e-05, - "loss": 0.1757, + "epoch": 0.01, + "grad_norm": 0.5370371077102385, + "learning_rate": 9.541284403669727e-06, + "loss": 0.1899, "step": 312 }, { - "epoch": 0.02, - "grad_norm": 0.6818941883625804, - "learning_rate": 1.196940726577438e-05, - "loss": 0.2652, + "epoch": 0.01, + "grad_norm": 0.8100499620762998, + "learning_rate": 9.571865443425077e-06, + "loss": 0.2656, "step": 313 }, { - "epoch": 0.02, - "grad_norm": 1.0449214685551322, - "learning_rate": 1.2007648183556407e-05, - "loss": 0.4432, + "epoch": 0.01, + "grad_norm": 2.2157671474499936, + "learning_rate": 9.602446483180428e-06, + "loss": 0.4622, "step": 314 }, { - "epoch": 0.02, - "grad_norm": 1.3218968880735233, - "learning_rate": 1.2045889101338433e-05, - "loss": 0.5656, + "epoch": 0.01, + "grad_norm": 2.4671811401365935, + "learning_rate": 9.633027522935781e-06, + "loss": 0.6748, "step": 315 }, { - "epoch": 0.02, - "grad_norm": 0.675172558207587, - "learning_rate": 1.208413001912046e-05, - "loss": 0.3515, + "epoch": 0.01, + "grad_norm": 0.7547832083232264, + "learning_rate": 9.663608562691133e-06, + "loss": 0.2638, "step": 316 }, { - "epoch": 0.02, - "grad_norm": 1.9376915018087, - "learning_rate": 1.2122370936902486e-05, - "loss": 0.6026, + "epoch": 0.01, + "grad_norm": 1.9812979691121875, + "learning_rate": 9.694189602446484e-06, + "loss": 0.7611, "step": 317 }, { - "epoch": 0.02, - "grad_norm": 0.7527951764348736, - "learning_rate": 1.2160611854684515e-05, - "loss": 0.39, + "epoch": 0.01, + "grad_norm": 1.022682195549178, + "learning_rate": 9.724770642201836e-06, + "loss": 0.399, "step": 318 }, { - "epoch": 0.02, - "grad_norm": 0.6271551344942625, - "learning_rate": 1.2198852772466541e-05, - "loss": 0.3249, + "epoch": 0.01, + "grad_norm": 0.6676515391118802, + "learning_rate": 9.755351681957187e-06, + "loss": 0.2653, "step": 319 }, { - "epoch": 0.02, - "grad_norm": 0.9520118512963162, - "learning_rate": 1.2237093690248568e-05, - "loss": 0.2872, + "epoch": 0.01, + "grad_norm": 4.0568140936833395, + "learning_rate": 9.785932721712539e-06, + "loss": 0.4753, "step": 320 }, { - "epoch": 0.02, - "grad_norm": 0.9489835344276659, - "learning_rate": 1.2275334608030594e-05, - "loss": 0.4268, + "epoch": 0.01, + "grad_norm": 2.587635635676488, + "learning_rate": 9.81651376146789e-06, + "loss": 0.8273, "step": 321 }, { - "epoch": 0.02, - "grad_norm": 0.7608542124414743, - "learning_rate": 1.231357552581262e-05, - "loss": 0.3252, + "epoch": 0.01, + "grad_norm": 0.7085024748208396, + "learning_rate": 9.847094801223243e-06, + "loss": 0.3223, "step": 322 }, { - "epoch": 0.02, - "grad_norm": 4.06054704906878, - "learning_rate": 1.2351816443594646e-05, - "loss": 0.692, + "epoch": 0.01, + "grad_norm": 1.1549277159230098, + "learning_rate": 9.877675840978595e-06, + "loss": 0.3669, "step": 323 }, { - "epoch": 0.02, - "grad_norm": 1.0376904064881367, - "learning_rate": 1.2390057361376676e-05, - "loss": 0.4878, + "epoch": 0.01, + "grad_norm": 2.319490537481074, + "learning_rate": 9.908256880733946e-06, + "loss": 0.6666, "step": 324 }, { - "epoch": 0.02, - "grad_norm": 0.7936001862726354, - "learning_rate": 1.2428298279158702e-05, - "loss": 0.3846, + "epoch": 0.01, + "grad_norm": 0.960294437970511, + "learning_rate": 9.938837920489298e-06, + "loss": 0.3424, "step": 325 }, { - "epoch": 0.02, - "grad_norm": 0.7655996437533465, - "learning_rate": 1.2466539196940729e-05, - "loss": 0.2969, + "epoch": 0.01, + "grad_norm": 1.2785524540762303, + "learning_rate": 9.96941896024465e-06, + "loss": 0.4015, "step": 326 }, { "epoch": 0.02, - "grad_norm": 1.6075196309967965, - "learning_rate": 1.2504780114722753e-05, - "loss": 0.322, + "grad_norm": 0.6456288537563728, + "learning_rate": 1e-05, + "loss": 0.2521, "step": 327 }, { "epoch": 0.02, - "grad_norm": 0.8333976096028658, - "learning_rate": 1.254302103250478e-05, - "loss": 0.3605, + "grad_norm": 0.759864669199683, + "learning_rate": 1.0030581039755352e-05, + "loss": 0.3502, "step": 328 }, { "epoch": 0.02, - "grad_norm": 2.395249474296435, - "learning_rate": 1.2581261950286807e-05, - "loss": 0.8674, + "grad_norm": 2.076650660134877, + "learning_rate": 1.0061162079510704e-05, + "loss": 0.5814, "step": 329 }, { "epoch": 0.02, - "grad_norm": 0.9426907172159265, - "learning_rate": 1.2619502868068833e-05, - "loss": 0.3545, + "grad_norm": 0.637064214853258, + "learning_rate": 1.0091743119266055e-05, + "loss": 0.4012, "step": 330 }, { "epoch": 0.02, - "grad_norm": 0.6597426619503529, - "learning_rate": 1.2657743785850863e-05, - "loss": 0.3462, + "grad_norm": 0.7931196364157752, + "learning_rate": 1.0122324159021408e-05, + "loss": 0.3373, "step": 331 }, { "epoch": 0.02, - "grad_norm": 0.9277589366404783, - "learning_rate": 1.2695984703632888e-05, - "loss": 0.4374, + "grad_norm": 1.0645734733060122, + "learning_rate": 1.0152905198776758e-05, + "loss": 0.434, "step": 332 }, { "epoch": 0.02, - "grad_norm": 0.49886983704319643, - "learning_rate": 1.2734225621414914e-05, - "loss": 0.2108, + "grad_norm": 0.6966006334314522, + "learning_rate": 1.018348623853211e-05, + "loss": 0.234, "step": 333 }, { "epoch": 0.02, - "grad_norm": 0.6541306009678697, - "learning_rate": 1.2772466539196941e-05, - "loss": 0.3265, + "grad_norm": 0.7690031493695907, + "learning_rate": 1.0214067278287463e-05, + "loss": 0.3198, "step": 334 }, { "epoch": 0.02, - "grad_norm": 1.7330589458418397, - "learning_rate": 1.2810707456978968e-05, - "loss": 0.765, + "grad_norm": 1.1847149440874944, + "learning_rate": 1.0244648318042814e-05, + "loss": 0.4529, "step": 335 }, { "epoch": 0.02, - "grad_norm": 1.9292808339282705, - "learning_rate": 1.2848948374760994e-05, - "loss": 0.5198, + "grad_norm": 1.5650718798411607, + "learning_rate": 1.0275229357798166e-05, + "loss": 0.4984, "step": 336 }, { "epoch": 0.02, - "grad_norm": 0.8238173474277146, - "learning_rate": 1.2887189292543022e-05, - "loss": 0.313, + "grad_norm": 0.6738602270311174, + "learning_rate": 1.0305810397553517e-05, + "loss": 0.339, "step": 337 }, { "epoch": 0.02, - "grad_norm": 0.7391319260856145, - "learning_rate": 1.2925430210325049e-05, - "loss": 0.3138, + "grad_norm": 0.7598539416526311, + "learning_rate": 1.033639143730887e-05, + "loss": 0.3975, "step": 338 }, { "epoch": 0.02, - "grad_norm": 0.8080684350177465, - "learning_rate": 1.2963671128107076e-05, - "loss": 0.3086, + "grad_norm": 0.6076053808679849, + "learning_rate": 1.036697247706422e-05, + "loss": 0.1222, "step": 339 }, { "epoch": 0.02, - "grad_norm": 0.7345914153185755, - "learning_rate": 1.3001912045889102e-05, - "loss": 0.2706, + "grad_norm": 0.7186668506285876, + "learning_rate": 1.0397553516819572e-05, + "loss": 0.3186, "step": 340 }, { "epoch": 0.02, - "grad_norm": 1.9255928748149223, - "learning_rate": 1.3040152963671129e-05, - "loss": 0.7527, + "grad_norm": 2.2237523565383626, + "learning_rate": 1.0428134556574925e-05, + "loss": 0.73, "step": 341 }, { "epoch": 0.02, - "grad_norm": 1.2808433824506942, - "learning_rate": 1.3078393881453155e-05, - "loss": 0.4384, + "grad_norm": 1.0291621282285297, + "learning_rate": 1.0458715596330275e-05, + "loss": 0.4066, "step": 342 }, { "epoch": 0.02, - "grad_norm": 0.7058183166356123, - "learning_rate": 1.3116634799235182e-05, - "loss": 0.3182, + "grad_norm": 0.7359736072379032, + "learning_rate": 1.0489296636085628e-05, + "loss": 0.3533, "step": 343 }, { "epoch": 0.02, - "grad_norm": 2.226462284510022, - "learning_rate": 1.315487571701721e-05, - "loss": 0.7395, + "grad_norm": 2.33749940059033, + "learning_rate": 1.051987767584098e-05, + "loss": 0.7251, "step": 344 }, { "epoch": 0.02, - "grad_norm": 0.6742925329478396, - "learning_rate": 1.3193116634799237e-05, - "loss": 0.2795, + "grad_norm": 0.6416158029010471, + "learning_rate": 1.055045871559633e-05, + "loss": 0.2455, "step": 345 }, { "epoch": 0.02, - "grad_norm": 0.7439587116812453, - "learning_rate": 1.3231357552581263e-05, - "loss": 0.2749, + "grad_norm": 0.6058298620506867, + "learning_rate": 1.0581039755351682e-05, + "loss": 0.2532, "step": 346 }, { "epoch": 0.02, - "grad_norm": 1.4361449975745808, - "learning_rate": 1.326959847036329e-05, - "loss": 0.7106, + "grad_norm": 1.0936915943993688, + "learning_rate": 1.0611620795107034e-05, + "loss": 0.4751, "step": 347 }, { "epoch": 0.02, - "grad_norm": 1.2439492090019573, - "learning_rate": 1.3307839388145316e-05, - "loss": 0.589, + "grad_norm": 1.2240821373266129, + "learning_rate": 1.0642201834862387e-05, + "loss": 0.558, "step": 348 }, { "epoch": 0.02, - "grad_norm": 0.7435609340200998, - "learning_rate": 1.3346080305927343e-05, - "loss": 0.2519, + "grad_norm": 0.7564767772488803, + "learning_rate": 1.0672782874617737e-05, + "loss": 0.2557, "step": 349 }, { "epoch": 0.02, - "grad_norm": 0.7689761834441273, - "learning_rate": 1.3384321223709371e-05, - "loss": 0.3878, + "grad_norm": 0.7336968297136265, + "learning_rate": 1.070336391437309e-05, + "loss": 0.3602, "step": 350 }, { "epoch": 0.02, - "grad_norm": 0.604279488549852, - "learning_rate": 1.3422562141491398e-05, - "loss": 0.2728, + "grad_norm": 0.8095160037694071, + "learning_rate": 1.0733944954128442e-05, + "loss": 0.306, "step": 351 }, { "epoch": 0.02, - "grad_norm": 0.6789890550766914, - "learning_rate": 1.3460803059273424e-05, - "loss": 0.3179, + "grad_norm": 0.6639109354982137, + "learning_rate": 1.0764525993883791e-05, + "loss": 0.2604, "step": 352 }, { "epoch": 0.02, - "grad_norm": 1.115636152118617, - "learning_rate": 1.349904397705545e-05, - "loss": 0.4138, + "grad_norm": 0.9584361311046328, + "learning_rate": 1.0795107033639145e-05, + "loss": 0.3934, "step": 353 }, { "epoch": 0.02, - "grad_norm": 0.6232254061695315, - "learning_rate": 1.3537284894837477e-05, - "loss": 0.3321, + "grad_norm": 0.8109570842065387, + "learning_rate": 1.0825688073394496e-05, + "loss": 0.42, "step": 354 }, { "epoch": 0.02, - "grad_norm": 0.6478378710624862, - "learning_rate": 1.3575525812619504e-05, - "loss": 0.2918, + "grad_norm": 0.7546490824345372, + "learning_rate": 1.085626911314985e-05, + "loss": 0.2424, "step": 355 }, { "epoch": 0.02, - "grad_norm": 0.6760264548742129, - "learning_rate": 1.361376673040153e-05, - "loss": 0.2258, + "grad_norm": 2.2379281967102345, + "learning_rate": 1.0886850152905199e-05, + "loss": 0.7048, "step": 356 }, { "epoch": 0.02, - "grad_norm": 0.8021332779117495, - "learning_rate": 1.3652007648183559e-05, - "loss": 0.4408, + "grad_norm": 1.9752761586037637, + "learning_rate": 1.091743119266055e-05, + "loss": 0.8772, "step": 357 }, { "epoch": 0.02, - "grad_norm": 0.6232210579121872, - "learning_rate": 1.3690248565965585e-05, - "loss": 0.3153, + "grad_norm": 0.6279187050026355, + "learning_rate": 1.0948012232415904e-05, + "loss": 0.3152, "step": 358 }, { "epoch": 0.02, - "grad_norm": 1.737205410962497, - "learning_rate": 1.3728489483747612e-05, - "loss": 0.5192, + "grad_norm": 0.7609205821095003, + "learning_rate": 1.0978593272171254e-05, + "loss": 0.2513, "step": 359 }, { "epoch": 0.02, - "grad_norm": 0.882470126979341, - "learning_rate": 1.3766730401529638e-05, - "loss": 0.428, + "grad_norm": 0.8830514530017181, + "learning_rate": 1.1009174311926607e-05, + "loss": 0.4218, "step": 360 }, { "epoch": 0.02, - "grad_norm": 0.6041133303431246, - "learning_rate": 1.3804971319311665e-05, - "loss": 0.3065, + "grad_norm": 0.7209664013982029, + "learning_rate": 1.1039755351681958e-05, + "loss": 0.3377, "step": 361 }, { "epoch": 0.02, - "grad_norm": 0.8041421990760149, - "learning_rate": 1.3843212237093691e-05, - "loss": 0.3698, + "grad_norm": 0.8470943324736788, + "learning_rate": 1.1070336391437311e-05, + "loss": 0.326, "step": 362 }, { "epoch": 0.02, - "grad_norm": 1.7265607031963566, - "learning_rate": 1.388145315487572e-05, - "loss": 0.7857, + "grad_norm": 2.4407430843873543, + "learning_rate": 1.1100917431192661e-05, + "loss": 0.7243, "step": 363 }, { "epoch": 0.02, - "grad_norm": 0.8204545366840523, - "learning_rate": 1.3919694072657746e-05, - "loss": 0.3723, + "grad_norm": 0.9412489307585115, + "learning_rate": 1.1131498470948013e-05, + "loss": 0.3216, "step": 364 }, { "epoch": 0.02, - "grad_norm": 1.8724560011222986, - "learning_rate": 1.3957934990439773e-05, - "loss": 0.579, + "grad_norm": 1.0119696792791166, + "learning_rate": 1.1162079510703366e-05, + "loss": 0.4486, "step": 365 }, { "epoch": 0.02, - "grad_norm": 0.49392242007190124, - "learning_rate": 1.39961759082218e-05, - "loss": 0.2412, + "grad_norm": 0.5471987353259963, + "learning_rate": 1.1192660550458716e-05, + "loss": 0.2975, "step": 366 }, { "epoch": 0.02, - "grad_norm": 0.6500233155836509, - "learning_rate": 1.4034416826003826e-05, - "loss": 0.3158, + "grad_norm": 1.009739717556094, + "learning_rate": 1.1223241590214069e-05, + "loss": 0.3711, "step": 367 }, { "epoch": 0.02, - "grad_norm": 1.0068899962526328, - "learning_rate": 1.407265774378585e-05, - "loss": 0.3929, + "grad_norm": 1.0376472336023075, + "learning_rate": 1.125382262996942e-05, + "loss": 0.3671, "step": 368 }, { "epoch": 0.02, - "grad_norm": 1.1908103503569025, - "learning_rate": 1.4110898661567877e-05, - "loss": 0.3623, + "grad_norm": 0.812724228520615, + "learning_rate": 1.128440366972477e-05, + "loss": 0.3484, "step": 369 }, { "epoch": 0.02, - "grad_norm": 0.6938542623801782, - "learning_rate": 1.4149139579349907e-05, - "loss": 0.3063, + "grad_norm": 0.726029139719463, + "learning_rate": 1.1314984709480123e-05, + "loss": 0.3279, "step": 370 }, { "epoch": 0.02, - "grad_norm": 2.1507116005912006, - "learning_rate": 1.4187380497131934e-05, - "loss": 0.5147, + "grad_norm": 1.0894714071167073, + "learning_rate": 1.1345565749235475e-05, + "loss": 0.4861, "step": 371 }, { "epoch": 0.02, - "grad_norm": 0.7129151557497264, - "learning_rate": 1.4225621414913958e-05, - "loss": 0.1442, + "grad_norm": 0.9872371730419399, + "learning_rate": 1.1376146788990828e-05, + "loss": 0.1539, "step": 372 }, { "epoch": 0.02, - "grad_norm": 0.5561459063539664, - "learning_rate": 1.4263862332695985e-05, - "loss": 0.3316, + "grad_norm": 0.8118300125124747, + "learning_rate": 1.1406727828746178e-05, + "loss": 0.3325, "step": 373 }, { "epoch": 0.02, - "grad_norm": 0.7795311581768043, - "learning_rate": 1.4302103250478012e-05, - "loss": 0.3971, + "grad_norm": 0.8190293509812799, + "learning_rate": 1.1437308868501531e-05, + "loss": 0.3736, "step": 374 }, { "epoch": 0.02, - "grad_norm": 2.1573702444835594, - "learning_rate": 1.4340344168260038e-05, - "loss": 0.5285, + "grad_norm": 1.417328509273749, + "learning_rate": 1.1467889908256882e-05, + "loss": 0.5584, "step": 375 }, { "epoch": 0.02, - "grad_norm": 0.8271672714492758, - "learning_rate": 1.4378585086042068e-05, - "loss": 0.346, + "grad_norm": 1.6513182871456002, + "learning_rate": 1.1498470948012232e-05, + "loss": 0.3738, "step": 376 }, { "epoch": 0.02, - "grad_norm": 1.22345199872424, - "learning_rate": 1.4416826003824093e-05, - "loss": 0.4845, + "grad_norm": 0.9677129715153956, + "learning_rate": 1.1529051987767585e-05, + "loss": 0.4837, "step": 377 }, { "epoch": 0.02, - "grad_norm": 0.6311654742572094, - "learning_rate": 1.445506692160612e-05, - "loss": 0.3056, + "grad_norm": 0.6074026460827666, + "learning_rate": 1.1559633027522937e-05, + "loss": 0.2104, "step": 378 }, { "epoch": 0.02, - "grad_norm": 0.48625752015949253, - "learning_rate": 1.4493307839388146e-05, - "loss": 0.2126, + "grad_norm": 0.9201543615627558, + "learning_rate": 1.159021406727829e-05, + "loss": 0.3436, "step": 379 }, { "epoch": 0.02, - "grad_norm": 1.9799198948538195, - "learning_rate": 1.4531548757170173e-05, - "loss": 0.7895, + "grad_norm": 1.059686538958258, + "learning_rate": 1.162079510703364e-05, + "loss": 0.4311, "step": 380 }, { "epoch": 0.02, - "grad_norm": 0.8516318849459286, - "learning_rate": 1.4569789674952199e-05, - "loss": 0.4181, + "grad_norm": 0.9011482100134448, + "learning_rate": 1.1651376146788991e-05, + "loss": 0.4742, "step": 381 }, { "epoch": 0.02, - "grad_norm": 1.014567135790311, - "learning_rate": 1.4608030592734226e-05, - "loss": 0.3468, + "grad_norm": 0.9137900547996819, + "learning_rate": 1.1681957186544344e-05, + "loss": 0.3068, "step": 382 }, { "epoch": 0.02, - "grad_norm": 0.8579154771118827, - "learning_rate": 1.4646271510516254e-05, - "loss": 0.4599, + "grad_norm": 1.0100750082919883, + "learning_rate": 1.1712538226299694e-05, + "loss": 0.4867, "step": 383 }, { "epoch": 0.02, - "grad_norm": 0.6148793445042039, - "learning_rate": 1.468451242829828e-05, - "loss": 0.234, + "grad_norm": 0.5061832127642694, + "learning_rate": 1.1743119266055047e-05, + "loss": 0.2197, "step": 384 }, { "epoch": 0.02, - "grad_norm": 0.5488181146615484, - "learning_rate": 1.4722753346080307e-05, - "loss": 0.2141, + "grad_norm": 0.8539484631601367, + "learning_rate": 1.1773700305810399e-05, + "loss": 0.0943, "step": 385 }, { "epoch": 0.02, - "grad_norm": 0.6799560781486946, - "learning_rate": 1.4760994263862334e-05, - "loss": 0.3486, + "grad_norm": 0.8151997765387853, + "learning_rate": 1.180428134556575e-05, + "loss": 0.3545, "step": 386 }, { "epoch": 0.02, - "grad_norm": 1.3813615427326338, - "learning_rate": 1.479923518164436e-05, - "loss": 0.5956, + "grad_norm": 1.8597978147295617, + "learning_rate": 1.1834862385321102e-05, + "loss": 0.5969, "step": 387 }, { "epoch": 0.02, - "grad_norm": 0.6414359509532044, - "learning_rate": 1.4837476099426387e-05, - "loss": 0.2277, + "grad_norm": 1.009957260666378, + "learning_rate": 1.1865443425076453e-05, + "loss": 0.3068, "step": 388 }, { "epoch": 0.02, - "grad_norm": 0.586932100761519, - "learning_rate": 1.4875717017208415e-05, - "loss": 0.3545, + "grad_norm": 0.6987589274442721, + "learning_rate": 1.1896024464831805e-05, + "loss": 0.3651, "step": 389 }, { "epoch": 0.02, - "grad_norm": 0.7838674501697059, - "learning_rate": 1.4913957934990441e-05, - "loss": 0.2907, + "grad_norm": 1.0381633184548078, + "learning_rate": 1.1926605504587156e-05, + "loss": 0.4402, "step": 390 }, { "epoch": 0.02, - "grad_norm": 0.5658830728766918, - "learning_rate": 1.4952198852772468e-05, - "loss": 0.2235, + "grad_norm": 0.6409620176569877, + "learning_rate": 1.195718654434251e-05, + "loss": 0.1702, "step": 391 }, { "epoch": 0.02, - "grad_norm": 0.8068619501663652, - "learning_rate": 1.4990439770554495e-05, - "loss": 0.344, + "grad_norm": 1.0353341950998738, + "learning_rate": 1.1987767584097861e-05, + "loss": 0.436, "step": 392 }, { "epoch": 0.02, - "grad_norm": 0.6596708569982442, - "learning_rate": 1.5028680688336521e-05, - "loss": 0.3774, + "grad_norm": 1.4944905572706597, + "learning_rate": 1.2018348623853211e-05, + "loss": 0.4473, "step": 393 }, { "epoch": 0.02, - "grad_norm": 0.8793793597200517, - "learning_rate": 1.5066921606118548e-05, - "loss": 0.4466, + "grad_norm": 1.1226048672566518, + "learning_rate": 1.2048929663608564e-05, + "loss": 0.4166, "step": 394 }, { "epoch": 0.02, - "grad_norm": 1.0600744438699607, - "learning_rate": 1.5105162523900574e-05, - "loss": 0.3287, + "grad_norm": 1.0359219477391735, + "learning_rate": 1.2079510703363916e-05, + "loss": 0.3408, "step": 395 }, { "epoch": 0.02, - "grad_norm": 0.7919519835895109, - "learning_rate": 1.5143403441682602e-05, - "loss": 0.3511, + "grad_norm": 0.8237632127378013, + "learning_rate": 1.2110091743119267e-05, + "loss": 0.3221, "step": 396 }, { "epoch": 0.02, - "grad_norm": 0.5578491432818139, - "learning_rate": 1.5181644359464629e-05, - "loss": 0.2575, + "grad_norm": 0.6501662996700841, + "learning_rate": 1.2140672782874619e-05, + "loss": 0.2828, "step": 397 }, { "epoch": 0.02, - "grad_norm": 0.8105475118217892, - "learning_rate": 1.5219885277246656e-05, - "loss": 0.3245, + "grad_norm": 0.8528311194744408, + "learning_rate": 1.2171253822629972e-05, + "loss": 0.3213, "step": 398 }, { "epoch": 0.02, - "grad_norm": 0.7809252479896669, - "learning_rate": 1.5258126195028682e-05, - "loss": 0.5919, + "grad_norm": 2.0044968679659902, + "learning_rate": 1.2201834862385321e-05, + "loss": 0.6022, "step": 399 }, { "epoch": 0.02, - "grad_norm": 0.6766528930681027, - "learning_rate": 1.529636711281071e-05, - "loss": 0.3405, + "grad_norm": 1.005543914958151, + "learning_rate": 1.2232415902140673e-05, + "loss": 0.3743, "step": 400 }, { "epoch": 0.02, - "grad_norm": 0.6551308795259978, - "learning_rate": 1.5334608030592735e-05, - "loss": 0.3199, + "grad_norm": 1.001992040492003, + "learning_rate": 1.2262996941896026e-05, + "loss": 0.308, "step": 401 }, { "epoch": 0.02, - "grad_norm": 0.6205927231666513, - "learning_rate": 1.5372848948374762e-05, - "loss": 0.2926, + "grad_norm": 0.6844145238324899, + "learning_rate": 1.2293577981651376e-05, + "loss": 0.3827, "step": 402 }, { "epoch": 0.02, - "grad_norm": 0.6434102165393524, - "learning_rate": 1.541108986615679e-05, - "loss": 0.2168, + "grad_norm": 0.7712804353737467, + "learning_rate": 1.2324159021406729e-05, + "loss": 0.1784, "step": 403 }, { "epoch": 0.02, - "grad_norm": 0.7035727497301221, - "learning_rate": 1.5449330783938815e-05, - "loss": 0.3766, + "grad_norm": 0.927226474041373, + "learning_rate": 1.235474006116208e-05, + "loss": 0.3421, "step": 404 }, { "epoch": 0.02, - "grad_norm": 0.6351981901932477, - "learning_rate": 1.548757170172084e-05, - "loss": 0.4042, + "grad_norm": 0.9874817567285509, + "learning_rate": 1.238532110091743e-05, + "loss": 0.4119, "step": 405 }, { "epoch": 0.02, - "grad_norm": 1.0733492412828631, - "learning_rate": 1.5525812619502868e-05, - "loss": 0.3453, + "grad_norm": 1.2850100671559304, + "learning_rate": 1.2415902140672784e-05, + "loss": 0.5462, "step": 406 }, { "epoch": 0.02, - "grad_norm": 0.6931124633556552, - "learning_rate": 1.5564053537284895e-05, - "loss": 0.4334, + "grad_norm": 0.7945217936225786, + "learning_rate": 1.2446483180428135e-05, + "loss": 0.3457, "step": 407 }, { "epoch": 0.02, - "grad_norm": 1.344580603509804, - "learning_rate": 1.560229445506692e-05, - "loss": 0.3942, + "grad_norm": 1.7028886133352672, + "learning_rate": 1.2477064220183488e-05, + "loss": 0.3181, "step": 408 }, { "epoch": 0.02, - "grad_norm": 0.48351949036648106, - "learning_rate": 1.564053537284895e-05, - "loss": 0.2785, + "grad_norm": 0.5108561208138009, + "learning_rate": 1.2507645259938838e-05, + "loss": 0.2417, "step": 409 }, { "epoch": 0.02, - "grad_norm": 0.5724541069062071, - "learning_rate": 1.5678776290630978e-05, - "loss": 0.3334, + "grad_norm": 0.7507026872828835, + "learning_rate": 1.2538226299694191e-05, + "loss": 0.3939, "step": 410 }, { "epoch": 0.02, - "grad_norm": 1.0690880110758512, - "learning_rate": 1.5717017208413004e-05, - "loss": 0.5032, + "grad_norm": 1.634793257460326, + "learning_rate": 1.2568807339449543e-05, + "loss": 0.515, "step": 411 }, { "epoch": 0.02, - "grad_norm": 0.8955745333838802, - "learning_rate": 1.575525812619503e-05, - "loss": 0.4174, + "grad_norm": 1.1705692756241106, + "learning_rate": 1.2599388379204893e-05, + "loss": 0.3827, "step": 412 }, { "epoch": 0.02, - "grad_norm": 0.6489343273710856, - "learning_rate": 1.5793499043977057e-05, - "loss": 0.354, + "grad_norm": 0.704842009959545, + "learning_rate": 1.2629969418960246e-05, + "loss": 0.3675, "step": 413 }, { "epoch": 0.02, - "grad_norm": 2.00140719177433, - "learning_rate": 1.5831739961759084e-05, - "loss": 0.6871, + "grad_norm": 0.9546621527533904, + "learning_rate": 1.2660550458715597e-05, + "loss": 0.3704, "step": 414 }, { "epoch": 0.02, - "grad_norm": 0.9468441119751057, - "learning_rate": 1.586998087954111e-05, - "loss": 0.3439, + "grad_norm": 0.6866097869958888, + "learning_rate": 1.269113149847095e-05, + "loss": 0.3019, "step": 415 }, { "epoch": 0.02, - "grad_norm": 0.6811167520988824, - "learning_rate": 1.5908221797323137e-05, - "loss": 0.4035, + "grad_norm": 0.788637594215398, + "learning_rate": 1.27217125382263e-05, + "loss": 0.3802, "step": 416 }, { "epoch": 0.02, - "grad_norm": 0.5067638312443203, - "learning_rate": 1.5946462715105163e-05, - "loss": 0.2935, + "grad_norm": 0.6524731806834094, + "learning_rate": 1.2752293577981652e-05, + "loss": 0.3276, "step": 417 }, { "epoch": 0.02, - "grad_norm": 0.7214352543145547, - "learning_rate": 1.598470363288719e-05, - "loss": 0.2881, + "grad_norm": 0.9544698235194905, + "learning_rate": 1.2782874617737005e-05, + "loss": 0.4293, "step": 418 }, { "epoch": 0.02, - "grad_norm": 1.0508862135463248, - "learning_rate": 1.6022944550669217e-05, - "loss": 0.4995, + "grad_norm": 0.7443026046479974, + "learning_rate": 1.2813455657492355e-05, + "loss": 0.3396, "step": 419 }, { "epoch": 0.02, - "grad_norm": 1.8285100746936782, - "learning_rate": 1.6061185468451243e-05, - "loss": 0.6615, + "grad_norm": 1.659484928471325, + "learning_rate": 1.2844036697247708e-05, + "loss": 0.761, "step": 420 }, { "epoch": 0.02, - "grad_norm": 0.5708789432768756, - "learning_rate": 1.609942638623327e-05, - "loss": 0.3237, + "grad_norm": 0.6879528706866395, + "learning_rate": 1.287461773700306e-05, + "loss": 0.3629, "step": 421 }, { "epoch": 0.02, - "grad_norm": 0.7903225965702565, - "learning_rate": 1.61376673040153e-05, - "loss": 0.3891, + "grad_norm": 0.7196086203688604, + "learning_rate": 1.2905198776758412e-05, + "loss": 0.3475, "step": 422 }, { "epoch": 0.02, - "grad_norm": 0.5582795431672651, - "learning_rate": 1.6175908221797326e-05, - "loss": 0.2004, + "grad_norm": 0.5591856445738644, + "learning_rate": 1.2935779816513762e-05, + "loss": 0.248, "step": 423 }, { "epoch": 0.02, - "grad_norm": 0.8407191661050721, - "learning_rate": 1.6214149139579353e-05, - "loss": 0.312, + "grad_norm": 0.9675577284207191, + "learning_rate": 1.2966360856269114e-05, + "loss": 0.3126, "step": 424 }, { "epoch": 0.02, - "grad_norm": 0.6198087613324328, - "learning_rate": 1.625239005736138e-05, - "loss": 0.3276, + "grad_norm": 0.7340575606616077, + "learning_rate": 1.2996941896024467e-05, + "loss": 0.3291, "step": 425 }, { "epoch": 0.02, - "grad_norm": 1.8207898222982781, - "learning_rate": 1.6290630975143406e-05, - "loss": 0.7767, + "grad_norm": 1.1196690378481349, + "learning_rate": 1.3027522935779817e-05, + "loss": 0.4513, "step": 426 }, { "epoch": 0.02, - "grad_norm": 1.330115693764209, - "learning_rate": 1.6328871892925432e-05, - "loss": 0.5159, + "grad_norm": 0.9605708680285826, + "learning_rate": 1.305810397553517e-05, + "loss": 0.4623, "step": 427 }, { "epoch": 0.02, - "grad_norm": 0.6387056248865568, - "learning_rate": 1.636711281070746e-05, - "loss": 0.3419, + "grad_norm": 0.667672776088915, + "learning_rate": 1.3088685015290521e-05, + "loss": 0.3887, "step": 428 }, { "epoch": 0.02, - "grad_norm": 0.6215273109764146, - "learning_rate": 1.6405353728489485e-05, - "loss": 0.3152, + "grad_norm": 0.5978278305226283, + "learning_rate": 1.3119266055045871e-05, + "loss": 0.2308, "step": 429 }, { "epoch": 0.02, - "grad_norm": 0.9514248403782344, - "learning_rate": 1.6443594646271512e-05, - "loss": 0.2815, + "grad_norm": 0.813447611175994, + "learning_rate": 1.3149847094801224e-05, + "loss": 0.3384, "step": 430 }, { "epoch": 0.02, - "grad_norm": 0.9310589290777825, - "learning_rate": 1.648183556405354e-05, - "loss": 0.3134, + "grad_norm": 0.8006386077076075, + "learning_rate": 1.3180428134556576e-05, + "loss": 0.3553, "step": 431 }, { "epoch": 0.02, - "grad_norm": 1.858149168694772, - "learning_rate": 1.6520076481835565e-05, - "loss": 0.5817, + "grad_norm": 1.5360859572371073, + "learning_rate": 1.3211009174311929e-05, + "loss": 0.5853, "step": 432 }, { "epoch": 0.02, - "grad_norm": 0.7691230463430312, - "learning_rate": 1.655831739961759e-05, - "loss": 0.3825, + "grad_norm": 0.586386662347721, + "learning_rate": 1.3241590214067279e-05, + "loss": 0.3862, "step": 433 }, { "epoch": 0.02, - "grad_norm": 0.7411479882760116, - "learning_rate": 1.6596558317399618e-05, - "loss": 0.2704, + "grad_norm": 0.755074881509733, + "learning_rate": 1.3272171253822632e-05, + "loss": 0.3108, "step": 434 }, { "epoch": 0.02, - "grad_norm": 1.1171597084891796, - "learning_rate": 1.6634799235181648e-05, - "loss": 0.1792, + "grad_norm": 0.5473555246521312, + "learning_rate": 1.3302752293577984e-05, + "loss": 0.2196, "step": 435 }, { - "epoch": 0.03, - "grad_norm": 1.0198428305122083, - "learning_rate": 1.6673040152963675e-05, - "loss": 0.4587, + "epoch": 0.02, + "grad_norm": 1.1672039223034718, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5633, "step": 436 }, { - "epoch": 0.03, - "grad_norm": 0.8646954669117713, - "learning_rate": 1.67112810707457e-05, - "loss": 0.3177, + "epoch": 0.02, + "grad_norm": 0.606662784642807, + "learning_rate": 1.3363914373088686e-05, + "loss": 0.2706, "step": 437 }, { - "epoch": 0.03, - "grad_norm": 2.2919448984679605, - "learning_rate": 1.6749521988527724e-05, - "loss": 0.6591, + "epoch": 0.02, + "grad_norm": 1.2423080988451263, + "learning_rate": 1.3394495412844038e-05, + "loss": 0.4528, "step": 438 }, { - "epoch": 0.03, - "grad_norm": 1.385420612343266, - "learning_rate": 1.678776290630975e-05, - "loss": 0.5364, + "epoch": 0.02, + "grad_norm": 1.1074032355173484, + "learning_rate": 1.3425076452599391e-05, + "loss": 0.5942, "step": 439 }, { - "epoch": 0.03, - "grad_norm": 0.8656076895746075, - "learning_rate": 1.6826003824091778e-05, - "loss": 0.3426, + "epoch": 0.02, + "grad_norm": 0.645449711454087, + "learning_rate": 1.3455657492354741e-05, + "loss": 0.2469, "step": 440 }, { - "epoch": 0.03, - "grad_norm": 0.8693095612262008, - "learning_rate": 1.6864244741873804e-05, - "loss": 0.2902, + "epoch": 0.02, + "grad_norm": 0.4847833719261981, + "learning_rate": 1.3486238532110092e-05, + "loss": 0.2742, "step": 441 }, { - "epoch": 0.03, - "grad_norm": 2.203916729584415, - "learning_rate": 1.6902485659655834e-05, - "loss": 0.9356, + "epoch": 0.02, + "grad_norm": 2.1431202184548037, + "learning_rate": 1.3516819571865446e-05, + "loss": 0.8556, "step": 442 }, { - "epoch": 0.03, - "grad_norm": 0.6401954440906067, - "learning_rate": 1.694072657743786e-05, - "loss": 0.2328, + "epoch": 0.02, + "grad_norm": 0.6514009317026143, + "learning_rate": 1.3547400611620795e-05, + "loss": 0.2511, "step": 443 }, { - "epoch": 0.03, - "grad_norm": 1.2732766268188778, - "learning_rate": 1.6978967495219887e-05, - "loss": 0.4489, + "epoch": 0.02, + "grad_norm": 1.4354544132986642, + "learning_rate": 1.3577981651376149e-05, + "loss": 0.5671, "step": 444 }, { - "epoch": 0.03, - "grad_norm": 1.0743702431986437, - "learning_rate": 1.7017208413001914e-05, - "loss": 0.4417, + "epoch": 0.02, + "grad_norm": 0.7239924956114187, + "learning_rate": 1.36085626911315e-05, + "loss": 0.3976, "step": 445 }, { - "epoch": 0.03, - "grad_norm": 0.7493088642138847, - "learning_rate": 1.705544933078394e-05, - "loss": 0.3438, + "epoch": 0.02, + "grad_norm": 0.7279374850402145, + "learning_rate": 1.363914373088685e-05, + "loss": 0.334, "step": 446 }, { - "epoch": 0.03, - "grad_norm": 1.872577130088869, - "learning_rate": 1.7093690248565967e-05, - "loss": 0.5092, + "epoch": 0.02, + "grad_norm": 0.5945817494147503, + "learning_rate": 1.3669724770642203e-05, + "loss": 0.1589, "step": 447 }, { - "epoch": 0.03, - "grad_norm": 1.6800381906510835, - "learning_rate": 1.7131931166347993e-05, - "loss": 0.3587, + "epoch": 0.02, + "grad_norm": 1.1648940925457805, + "learning_rate": 1.3700305810397555e-05, + "loss": 0.42, "step": 448 }, { - "epoch": 0.03, - "grad_norm": 0.6966844101543027, - "learning_rate": 1.717017208413002e-05, - "loss": 0.3088, + "epoch": 0.02, + "grad_norm": 0.7809969399888335, + "learning_rate": 1.3730886850152908e-05, + "loss": 0.3331, "step": 449 }, { - "epoch": 0.03, - "grad_norm": 1.2041520310637208, - "learning_rate": 1.7208413001912046e-05, - "loss": 0.5373, + "epoch": 0.02, + "grad_norm": 1.192381980515498, + "learning_rate": 1.3761467889908258e-05, + "loss": 0.3608, "step": 450 }, { - "epoch": 0.03, - "grad_norm": 1.1907325057349623, - "learning_rate": 1.7246653919694073e-05, - "loss": 0.3502, + "epoch": 0.02, + "grad_norm": 1.301344636000289, + "learning_rate": 1.379204892966361e-05, + "loss": 0.6585, "step": 451 }, { - "epoch": 0.03, - "grad_norm": 0.9197893826285918, - "learning_rate": 1.72848948374761e-05, - "loss": 0.3349, + "epoch": 0.02, + "grad_norm": 0.7395911814833751, + "learning_rate": 1.3822629969418962e-05, + "loss": 0.3299, "step": 452 }, { - "epoch": 0.03, - "grad_norm": 0.8202204456029969, - "learning_rate": 1.7323135755258126e-05, - "loss": 0.391, + "epoch": 0.02, + "grad_norm": 0.7770700230037193, + "learning_rate": 1.3853211009174312e-05, + "loss": 0.3101, "step": 453 }, { - "epoch": 0.03, - "grad_norm": 1.0635885255387458, - "learning_rate": 1.7361376673040153e-05, - "loss": 0.4402, + "epoch": 0.02, + "grad_norm": 1.3610907448825111, + "learning_rate": 1.3883792048929665e-05, + "loss": 0.5689, "step": 454 }, { - "epoch": 0.03, - "grad_norm": 0.7312803167255396, - "learning_rate": 1.7399617590822183e-05, - "loss": 0.341, + "epoch": 0.02, + "grad_norm": 0.7271017440606534, + "learning_rate": 1.3914373088685017e-05, + "loss": 0.3369, "step": 455 }, { - "epoch": 0.03, - "grad_norm": 0.7672739780107467, - "learning_rate": 1.743785850860421e-05, - "loss": 0.3814, + "epoch": 0.02, + "grad_norm": 1.3096426775910195, + "learning_rate": 1.3944954128440368e-05, + "loss": 0.3313, "step": 456 }, { - "epoch": 0.03, - "grad_norm": 0.7107829663539708, - "learning_rate": 1.7476099426386236e-05, - "loss": 0.2131, + "epoch": 0.02, + "grad_norm": 0.8885430971695621, + "learning_rate": 1.397553516819572e-05, + "loss": 0.3831, "step": 457 }, { - "epoch": 0.03, - "grad_norm": 0.7640513015983967, - "learning_rate": 1.7514340344168262e-05, - "loss": 0.3358, + "epoch": 0.02, + "grad_norm": 0.6769653298219982, + "learning_rate": 1.4006116207951071e-05, + "loss": 0.3139, "step": 458 }, { - "epoch": 0.03, - "grad_norm": 1.4799590833631586, - "learning_rate": 1.755258126195029e-05, - "loss": 0.7856, + "epoch": 0.02, + "grad_norm": 1.7947572154075946, + "learning_rate": 1.4036697247706423e-05, + "loss": 0.7653, "step": 459 }, { - "epoch": 0.03, - "grad_norm": 0.6436605240448509, - "learning_rate": 1.7590822179732315e-05, - "loss": 0.3497, + "epoch": 0.02, + "grad_norm": 0.8755100459118086, + "learning_rate": 1.4067278287461774e-05, + "loss": 0.358, "step": 460 }, { - "epoch": 0.03, - "grad_norm": 0.7464987117639431, - "learning_rate": 1.7629063097514342e-05, - "loss": 0.3435, + "epoch": 0.02, + "grad_norm": 0.710535014517863, + "learning_rate": 1.4097859327217127e-05, + "loss": 0.3158, "step": 461 }, { - "epoch": 0.03, - "grad_norm": 0.7696930422997337, - "learning_rate": 1.766730401529637e-05, - "loss": 0.4322, + "epoch": 0.02, + "grad_norm": 0.8693653692462091, + "learning_rate": 1.4128440366972477e-05, + "loss": 0.4248, "step": 462 }, { - "epoch": 0.03, - "grad_norm": 0.6025265599658696, - "learning_rate": 1.7705544933078395e-05, - "loss": 0.1584, + "epoch": 0.02, + "grad_norm": 0.8776195046761086, + "learning_rate": 1.415902140672783e-05, + "loss": 0.1549, "step": 463 }, { - "epoch": 0.03, - "grad_norm": 0.6378754667711887, - "learning_rate": 1.774378585086042e-05, - "loss": 0.3131, + "epoch": 0.02, + "grad_norm": 0.6691649362322377, + "learning_rate": 1.4189602446483182e-05, + "loss": 0.3263, "step": 464 }, { - "epoch": 0.03, - "grad_norm": 0.6729866894407964, - "learning_rate": 1.7782026768642448e-05, - "loss": 0.3856, + "epoch": 0.02, + "grad_norm": 0.8216060926424095, + "learning_rate": 1.4220183486238533e-05, + "loss": 0.3765, "step": 465 }, { - "epoch": 0.03, - "grad_norm": 0.935070447059843, - "learning_rate": 1.7820267686424475e-05, - "loss": 0.6001, + "epoch": 0.02, + "grad_norm": 1.089918844569685, + "learning_rate": 1.4250764525993885e-05, + "loss": 0.5202, "step": 466 }, { - "epoch": 0.03, - "grad_norm": 0.5333343068447588, - "learning_rate": 1.78585086042065e-05, - "loss": 0.3228, + "epoch": 0.02, + "grad_norm": 0.7602998809240603, + "learning_rate": 1.4281345565749236e-05, + "loss": 0.3625, "step": 467 }, { - "epoch": 0.03, - "grad_norm": 0.9091879424576893, - "learning_rate": 1.789674952198853e-05, - "loss": 0.4292, + "epoch": 0.02, + "grad_norm": 1.2952893981457272, + "learning_rate": 1.431192660550459e-05, + "loss": 0.6468, "step": 468 }, { - "epoch": 0.03, - "grad_norm": 0.5387821924240094, - "learning_rate": 1.7934990439770558e-05, - "loss": 0.2639, + "epoch": 0.02, + "grad_norm": 0.5256011312783779, + "learning_rate": 1.434250764525994e-05, + "loss": 0.2423, "step": 469 }, { - "epoch": 0.03, - "grad_norm": 0.5311447953582863, - "learning_rate": 1.7973231357552584e-05, - "loss": 0.2444, + "epoch": 0.02, + "grad_norm": 0.7293705482607076, + "learning_rate": 1.437308868501529e-05, + "loss": 0.3174, "step": 470 }, { - "epoch": 0.03, - "grad_norm": 1.395307363682624, - "learning_rate": 1.801147227533461e-05, - "loss": 0.7412, + "epoch": 0.02, + "grad_norm": 1.0463625981203963, + "learning_rate": 1.4403669724770644e-05, + "loss": 0.4852, "step": 471 }, { - "epoch": 0.03, - "grad_norm": 0.6504582290964441, - "learning_rate": 1.8049713193116637e-05, - "loss": 0.4336, + "epoch": 0.02, + "grad_norm": 0.9194004666491546, + "learning_rate": 1.4434250764525994e-05, + "loss": 0.4247, "step": 472 }, { - "epoch": 0.03, - "grad_norm": 0.5715139902093684, - "learning_rate": 1.8087954110898664e-05, - "loss": 0.2398, + "epoch": 0.02, + "grad_norm": 1.2220454533289145, + "learning_rate": 1.4464831804281347e-05, + "loss": 0.288, "step": 473 }, { - "epoch": 0.03, - "grad_norm": 1.125542602171137, - "learning_rate": 1.812619502868069e-05, - "loss": 0.6667, + "epoch": 0.02, + "grad_norm": 0.8731377686752274, + "learning_rate": 1.4495412844036698e-05, + "loss": 0.4758, "step": 474 }, { - "epoch": 0.03, - "grad_norm": 0.4982371633361562, - "learning_rate": 1.8164435946462717e-05, - "loss": 0.237, + "epoch": 0.02, + "grad_norm": 6.683467004572762, + "learning_rate": 1.4525993883792051e-05, + "loss": 0.2428, "step": 475 }, { - "epoch": 0.03, - "grad_norm": 0.5691266935810649, - "learning_rate": 1.8202676864244744e-05, - "loss": 0.2451, + "epoch": 0.02, + "grad_norm": 2.980093308442375, + "learning_rate": 1.4556574923547401e-05, + "loss": 0.2708, "step": 476 }, { - "epoch": 0.03, - "grad_norm": 0.7030820342200949, - "learning_rate": 1.824091778202677e-05, - "loss": 0.4042, + "epoch": 0.02, + "grad_norm": 1.1700476361636072, + "learning_rate": 1.4587155963302753e-05, + "loss": 0.3966, "step": 477 }, { - "epoch": 0.03, - "grad_norm": 1.3941896853612525, - "learning_rate": 1.8279158699808797e-05, - "loss": 0.5841, + "epoch": 0.02, + "grad_norm": 2.468004893696746, + "learning_rate": 1.4617737003058106e-05, + "loss": 0.5948, "step": 478 }, { - "epoch": 0.03, - "grad_norm": 0.6743046891263709, - "learning_rate": 1.8317399617590823e-05, - "loss": 0.3182, + "epoch": 0.02, + "grad_norm": 1.7645683586829224, + "learning_rate": 1.4648318042813456e-05, + "loss": 0.346, "step": 479 }, { - "epoch": 0.03, - "grad_norm": 0.750355710951829, - "learning_rate": 1.835564053537285e-05, - "loss": 0.3391, + "epoch": 0.02, + "grad_norm": 1.2849940490044633, + "learning_rate": 1.4678899082568809e-05, + "loss": 0.4757, "step": 480 }, { - "epoch": 0.03, - "grad_norm": 0.46562694341915173, - "learning_rate": 1.839388145315488e-05, - "loss": 0.2027, + "epoch": 0.02, + "grad_norm": 1.961676230979329, + "learning_rate": 1.470948012232416e-05, + "loss": 0.2653, "step": 481 }, { - "epoch": 0.03, - "grad_norm": 0.6383543781218285, - "learning_rate": 1.8432122370936906e-05, - "loss": 0.3138, + "epoch": 0.02, + "grad_norm": 1.2913928380581234, + "learning_rate": 1.474006116207951e-05, + "loss": 0.3029, "step": 482 }, { - "epoch": 0.03, - "grad_norm": 2.128120166212137, - "learning_rate": 1.847036328871893e-05, - "loss": 0.5683, + "epoch": 0.02, + "grad_norm": 1.5455869052683289, + "learning_rate": 1.4770642201834863e-05, + "loss": 0.7507, "step": 483 }, { - "epoch": 0.03, - "grad_norm": 0.7605870557660157, - "learning_rate": 1.8508604206500956e-05, - "loss": 0.4408, + "epoch": 0.02, + "grad_norm": 1.3602876553679932, + "learning_rate": 1.4801223241590215e-05, + "loss": 0.5013, "step": 484 }, { - "epoch": 0.03, - "grad_norm": 0.6354556583007445, - "learning_rate": 1.8546845124282983e-05, - "loss": 0.3147, + "epoch": 0.02, + "grad_norm": 0.9929300678353943, + "learning_rate": 1.4831804281345568e-05, + "loss": 0.3224, "step": 485 }, { - "epoch": 0.03, - "grad_norm": 2.9816014322679436, - "learning_rate": 1.858508604206501e-05, - "loss": 0.5888, + "epoch": 0.02, + "grad_norm": 1.1985581374787173, + "learning_rate": 1.4862385321100918e-05, + "loss": 0.3821, "step": 486 }, { - "epoch": 0.03, - "grad_norm": 0.5289682974012722, - "learning_rate": 1.8623326959847036e-05, - "loss": 0.2015, + "epoch": 0.02, + "grad_norm": 1.8770875744301763, + "learning_rate": 1.4892966360856271e-05, + "loss": 0.2011, "step": 487 }, { - "epoch": 0.03, - "grad_norm": 0.6236586560186679, - "learning_rate": 1.8661567877629066e-05, - "loss": 0.3256, + "epoch": 0.02, + "grad_norm": 1.282651345783927, + "learning_rate": 1.4923547400611623e-05, + "loss": 0.3054, "step": 488 }, { - "epoch": 0.03, - "grad_norm": 0.6494916866609775, - "learning_rate": 1.8699808795411092e-05, - "loss": 0.3929, + "epoch": 0.02, + "grad_norm": 0.9533178221936187, + "learning_rate": 1.4954128440366972e-05, + "loss": 0.3556, "step": 489 }, { - "epoch": 0.03, - "grad_norm": 1.1871808128906545, - "learning_rate": 1.873804971319312e-05, - "loss": 0.6403, + "epoch": 0.02, + "grad_norm": 1.6014454797828033, + "learning_rate": 1.4984709480122325e-05, + "loss": 0.5903, "step": 490 }, { - "epoch": 0.03, - "grad_norm": 0.7214128916285852, - "learning_rate": 1.8776290630975145e-05, - "loss": 0.3347, + "epoch": 0.02, + "grad_norm": 1.1202974185833243, + "learning_rate": 1.5015290519877677e-05, + "loss": 0.4219, "step": 491 }, { - "epoch": 0.03, - "grad_norm": 0.6602266191900171, - "learning_rate": 1.8814531548757172e-05, - "loss": 0.3601, + "epoch": 0.02, + "grad_norm": 0.7783539187647316, + "learning_rate": 1.504587155963303e-05, + "loss": 0.3357, "step": 492 }, { - "epoch": 0.03, - "grad_norm": 0.694604817799548, - "learning_rate": 1.88527724665392e-05, - "loss": 0.1621, + "epoch": 0.02, + "grad_norm": 1.2577640719206675, + "learning_rate": 1.507645259938838e-05, + "loss": 0.2763, "step": 493 }, { - "epoch": 0.03, - "grad_norm": 0.7076032930955318, - "learning_rate": 1.8891013384321225e-05, - "loss": 0.3279, + "epoch": 0.02, + "grad_norm": 1.1266647794434705, + "learning_rate": 1.5107033639143731e-05, + "loss": 0.3716, "step": 494 }, { - "epoch": 0.03, - "grad_norm": 0.8666075990621492, - "learning_rate": 1.892925430210325e-05, - "loss": 0.4989, + "epoch": 0.02, + "grad_norm": 1.126172315991585, + "learning_rate": 1.5137614678899085e-05, + "loss": 0.428, "step": 495 }, { - "epoch": 0.03, - "grad_norm": 0.7678193345063739, - "learning_rate": 1.8967495219885278e-05, - "loss": 0.3314, + "epoch": 0.02, + "grad_norm": 1.3099132658854535, + "learning_rate": 1.5168195718654434e-05, + "loss": 0.4324, "step": 496 }, { - "epoch": 0.03, - "grad_norm": 0.555716909316726, - "learning_rate": 1.9005736137667305e-05, - "loss": 0.3502, + "epoch": 0.02, + "grad_norm": 0.6815149750374333, + "learning_rate": 1.5198776758409788e-05, + "loss": 0.3325, "step": 497 }, { - "epoch": 0.03, - "grad_norm": 0.8106958346923805, - "learning_rate": 1.904397705544933e-05, - "loss": 0.4847, + "epoch": 0.02, + "grad_norm": 0.9174933663288322, + "learning_rate": 1.5229357798165139e-05, + "loss": 0.4853, "step": 498 }, { - "epoch": 0.03, - "grad_norm": 0.5347451642599353, - "learning_rate": 1.9082217973231358e-05, - "loss": 0.1286, + "epoch": 0.02, + "grad_norm": 1.3981923289607003, + "learning_rate": 1.5259938837920492e-05, + "loss": 0.1075, "step": 499 }, { - "epoch": 0.03, - "grad_norm": 0.6613152718234083, - "learning_rate": 1.9120458891013384e-05, - "loss": 0.3141, + "epoch": 0.02, + "grad_norm": 0.7327084755885843, + "learning_rate": 1.5290519877675842e-05, + "loss": 0.3344, "step": 500 }, { - "epoch": 0.03, - "grad_norm": 0.9116473642098698, - "learning_rate": 1.9158699808795414e-05, - "loss": 0.4658, + "epoch": 0.02, + "grad_norm": 1.0327508058416586, + "learning_rate": 1.5321100917431192e-05, + "loss": 0.3957, "step": 501 }, { - "epoch": 0.03, - "grad_norm": 1.199682601869777, - "learning_rate": 1.919694072657744e-05, - "loss": 0.6147, + "epoch": 0.02, + "grad_norm": 2.255494235382191, + "learning_rate": 1.5351681957186545e-05, + "loss": 0.547, "step": 502 }, { - "epoch": 0.03, - "grad_norm": 0.7314781117930744, - "learning_rate": 1.9235181644359467e-05, - "loss": 0.2652, + "epoch": 0.02, + "grad_norm": 0.9140387628508145, + "learning_rate": 1.5382262996941898e-05, + "loss": 0.3239, "step": 503 }, { - "epoch": 0.03, - "grad_norm": 0.8325395903217065, - "learning_rate": 1.9273422562141494e-05, - "loss": 0.3995, + "epoch": 0.02, + "grad_norm": 1.04989856294697, + "learning_rate": 1.541284403669725e-05, + "loss": 0.4339, "step": 504 }, { - "epoch": 0.03, - "grad_norm": 0.5062483839186255, - "learning_rate": 1.931166347992352e-05, - "loss": 0.3077, + "epoch": 0.02, + "grad_norm": 0.8812860317360276, + "learning_rate": 1.54434250764526e-05, + "loss": 0.2674, "step": 505 }, { - "epoch": 0.03, - "grad_norm": 0.5967051191367326, - "learning_rate": 1.9349904397705547e-05, - "loss": 0.3258, + "epoch": 0.02, + "grad_norm": 1.831856893445324, + "learning_rate": 1.547400611620795e-05, + "loss": 0.4357, "step": 506 }, { - "epoch": 0.03, - "grad_norm": 0.8272368056777742, - "learning_rate": 1.9388145315487573e-05, - "loss": 0.3995, + "epoch": 0.02, + "grad_norm": 1.4056752506608128, + "learning_rate": 1.5504587155963304e-05, + "loss": 0.4382, "step": 507 }, { - "epoch": 0.03, - "grad_norm": 0.9304286527087062, - "learning_rate": 1.94263862332696e-05, - "loss": 0.3921, + "epoch": 0.02, + "grad_norm": 0.872167252270934, + "learning_rate": 1.5535168195718654e-05, + "loss": 0.4046, "step": 508 }, { - "epoch": 0.03, - "grad_norm": 0.5272587543969978, - "learning_rate": 1.9464627151051627e-05, - "loss": 0.2309, + "epoch": 0.02, + "grad_norm": 1.0399986025759034, + "learning_rate": 1.5565749235474007e-05, + "loss": 0.1933, "step": 509 }, { - "epoch": 0.03, - "grad_norm": 0.8486834700401805, - "learning_rate": 1.9502868068833653e-05, - "loss": 0.5258, + "epoch": 0.02, + "grad_norm": 0.8528721395695101, + "learning_rate": 1.559633027522936e-05, + "loss": 0.3721, "step": 510 }, { - "epoch": 0.03, - "grad_norm": 0.6631668030453906, - "learning_rate": 1.954110898661568e-05, - "loss": 0.4544, + "epoch": 0.02, + "grad_norm": 0.9150225679257917, + "learning_rate": 1.5626911314984713e-05, + "loss": 0.3397, "step": 511 }, { - "epoch": 0.03, - "grad_norm": 0.763621465095935, - "learning_rate": 1.9579349904397706e-05, - "loss": 0.2702, + "epoch": 0.02, + "grad_norm": 0.8806655034618698, + "learning_rate": 1.5657492354740063e-05, + "loss": 0.2922, "step": 512 }, { - "epoch": 0.03, - "grad_norm": 0.5529369486422537, - "learning_rate": 1.9617590822179733e-05, - "loss": 0.3651, + "epoch": 0.02, + "grad_norm": 0.8949009978261282, + "learning_rate": 1.5688073394495413e-05, + "loss": 0.3993, "step": 513 }, { - "epoch": 0.03, - "grad_norm": 0.648527669233175, - "learning_rate": 1.9655831739961763e-05, - "loss": 0.3392, + "epoch": 0.02, + "grad_norm": 0.9601737049681667, + "learning_rate": 1.5718654434250766e-05, + "loss": 0.2832, "step": 514 }, { - "epoch": 0.03, - "grad_norm": 0.8333363603111317, - "learning_rate": 1.969407265774379e-05, - "loss": 0.4271, + "epoch": 0.02, + "grad_norm": 0.875586350918399, + "learning_rate": 1.5749235474006116e-05, + "loss": 0.2977, "step": 515 }, { - "epoch": 0.03, - "grad_norm": 0.9578624303870017, - "learning_rate": 1.9732313575525816e-05, - "loss": 0.3442, + "epoch": 0.02, + "grad_norm": 0.7570352003479348, + "learning_rate": 1.577981651376147e-05, + "loss": 0.3545, "step": 516 }, { - "epoch": 0.03, - "grad_norm": 0.9338089452068773, - "learning_rate": 1.9770554493307842e-05, - "loss": 0.5805, + "epoch": 0.02, + "grad_norm": 1.5486300962026418, + "learning_rate": 1.5810397553516822e-05, + "loss": 0.6702, "step": 517 }, { - "epoch": 0.03, - "grad_norm": 0.7261028256374711, - "learning_rate": 1.980879541108987e-05, - "loss": 0.4576, + "epoch": 0.02, + "grad_norm": 0.7814457491381758, + "learning_rate": 1.5840978593272172e-05, + "loss": 0.3575, "step": 518 }, { - "epoch": 0.03, - "grad_norm": 0.6206482079476885, - "learning_rate": 1.9847036328871892e-05, - "loss": 0.2308, + "epoch": 0.02, + "grad_norm": 0.9243364397273003, + "learning_rate": 1.5871559633027525e-05, + "loss": 0.4523, "step": 519 }, { - "epoch": 0.03, - "grad_norm": 0.5731488669023147, - "learning_rate": 1.9885277246653922e-05, - "loss": 0.3147, + "epoch": 0.02, + "grad_norm": 0.6499495745889232, + "learning_rate": 1.5902140672782875e-05, + "loss": 0.2645, "step": 520 }, { - "epoch": 0.03, - "grad_norm": 0.5715431597204902, - "learning_rate": 1.992351816443595e-05, - "loss": 0.3604, + "epoch": 0.02, + "grad_norm": 1.2694639813178892, + "learning_rate": 1.593272171253823e-05, + "loss": 0.4202, "step": 521 }, { - "epoch": 0.03, - "grad_norm": 0.7894908009717267, - "learning_rate": 1.9961759082217975e-05, - "loss": 0.3063, + "epoch": 0.02, + "grad_norm": 0.7599068489701073, + "learning_rate": 1.5963302752293578e-05, + "loss": 0.3055, "step": 522 }, { - "epoch": 0.03, - "grad_norm": 0.8229915510438305, - "learning_rate": 2e-05, - "loss": 0.4539, + "epoch": 0.02, + "grad_norm": 0.9785632275788022, + "learning_rate": 1.599388379204893e-05, + "loss": 0.5777, "step": 523 }, { - "epoch": 0.03, - "grad_norm": 0.6027096957912251, - "learning_rate": 1.9999999826850218e-05, - "loss": 0.3497, + "epoch": 0.02, + "grad_norm": 0.6790897030633063, + "learning_rate": 1.602446483180428e-05, + "loss": 0.3636, "step": 524 }, { - "epoch": 0.03, - "grad_norm": 0.9160865998931952, - "learning_rate": 1.999999930740087e-05, - "loss": 0.2652, + "epoch": 0.02, + "grad_norm": 0.9140843119700909, + "learning_rate": 1.6055045871559634e-05, + "loss": 0.2676, "step": 525 }, { - "epoch": 0.03, - "grad_norm": 0.4656131298936089, - "learning_rate": 1.9999998441651974e-05, - "loss": 0.224, + "epoch": 0.02, + "grad_norm": 0.8736694215579709, + "learning_rate": 1.6085626911314988e-05, + "loss": 0.2191, "step": 526 }, { - "epoch": 0.03, - "grad_norm": 1.0868401681216302, - "learning_rate": 1.999999722960356e-05, - "loss": 0.5024, + "epoch": 0.02, + "grad_norm": 2.049300027295384, + "learning_rate": 1.6116207951070337e-05, + "loss": 0.7223, "step": 527 }, { - "epoch": 0.03, - "grad_norm": 1.0309672847824662, - "learning_rate": 1.9999995671255675e-05, - "loss": 0.3227, + "epoch": 0.02, + "grad_norm": 0.6778432735183949, + "learning_rate": 1.614678899082569e-05, + "loss": 0.2883, "step": 528 }, { - "epoch": 0.03, - "grad_norm": 0.9537024074650399, - "learning_rate": 1.999999376660837e-05, - "loss": 0.5217, + "epoch": 0.02, + "grad_norm": 1.0919065066083935, + "learning_rate": 1.617737003058104e-05, + "loss": 0.5876, "step": 529 }, { - "epoch": 0.03, - "grad_norm": 0.7989227071893219, - "learning_rate": 1.9999991515661712e-05, - "loss": 0.3248, + "epoch": 0.02, + "grad_norm": 0.9459794370883273, + "learning_rate": 1.6207951070336393e-05, + "loss": 0.432, "step": 530 }, { - "epoch": 0.03, - "grad_norm": 0.5587548459372644, - "learning_rate": 1.9999988918415777e-05, - "loss": 0.2575, + "epoch": 0.02, + "grad_norm": 0.7737494415657642, + "learning_rate": 1.6238532110091743e-05, + "loss": 0.2162, "step": 531 }, { - "epoch": 0.03, - "grad_norm": 0.6038947163659312, - "learning_rate": 1.9999985974870653e-05, - "loss": 0.2639, + "epoch": 0.02, + "grad_norm": 0.7317896407309216, + "learning_rate": 1.6269113149847096e-05, + "loss": 0.4076, "step": 532 }, { - "epoch": 0.03, - "grad_norm": 0.9005065511749109, - "learning_rate": 1.999998268502645e-05, - "loss": 0.424, + "epoch": 0.02, + "grad_norm": 0.8745741302314319, + "learning_rate": 1.629969418960245e-05, + "loss": 0.2933, "step": 533 }, { - "epoch": 0.03, - "grad_norm": 0.7791223727687504, - "learning_rate": 1.9999979048883275e-05, - "loss": 0.3973, + "epoch": 0.02, + "grad_norm": 0.7632785121254949, + "learning_rate": 1.63302752293578e-05, + "loss": 0.3399, "step": 534 }, { - "epoch": 0.03, - "grad_norm": 1.049952558614156, - "learning_rate": 1.999997506644125e-05, - "loss": 0.402, + "epoch": 0.02, + "grad_norm": 1.756992746168261, + "learning_rate": 1.6360856269113153e-05, + "loss": 0.5201, "step": 535 }, { - "epoch": 0.03, - "grad_norm": 0.654247607069547, - "learning_rate": 1.9999970737700526e-05, - "loss": 0.3599, + "epoch": 0.02, + "grad_norm": 0.7508433564040993, + "learning_rate": 1.6391437308868502e-05, + "loss": 0.3777, "step": 536 }, { - "epoch": 0.03, - "grad_norm": 0.794701933156306, - "learning_rate": 1.999996606266124e-05, - "loss": 0.3839, + "epoch": 0.02, + "grad_norm": 0.7453562420281863, + "learning_rate": 1.6422018348623852e-05, + "loss": 0.3603, "step": 537 }, { - "epoch": 0.03, - "grad_norm": 0.7277135121566177, - "learning_rate": 1.999996104132356e-05, - "loss": 0.235, + "epoch": 0.02, + "grad_norm": 0.9641580952876424, + "learning_rate": 1.6452599388379205e-05, + "loss": 0.2439, "step": 538 }, { - "epoch": 0.03, - "grad_norm": 0.4981718868298558, - "learning_rate": 1.9999955673687663e-05, - "loss": 0.2761, + "epoch": 0.02, + "grad_norm": 0.98778493857445, + "learning_rate": 1.648318042813456e-05, + "loss": 0.2677, "step": 539 }, { - "epoch": 0.03, - "grad_norm": 0.7402595774332386, - "learning_rate": 1.999994995975373e-05, - "loss": 0.3425, + "epoch": 0.02, + "grad_norm": 1.0128604636517755, + "learning_rate": 1.6513761467889912e-05, + "loss": 0.3895, "step": 540 }, { - "epoch": 0.03, - "grad_norm": 1.5576818896204434, - "learning_rate": 1.9999943899521955e-05, - "loss": 0.587, + "epoch": 0.02, + "grad_norm": 1.0705232861062697, + "learning_rate": 1.654434250764526e-05, + "loss": 0.4209, "step": 541 }, { - "epoch": 0.03, - "grad_norm": 0.5357370858034539, - "learning_rate": 1.9999937492992558e-05, - "loss": 0.2118, + "epoch": 0.02, + "grad_norm": 1.4487232667085705, + "learning_rate": 1.657492354740061e-05, + "loss": 0.4326, "step": 542 }, { - "epoch": 0.03, - "grad_norm": 0.8512678621652553, - "learning_rate": 1.9999930740165755e-05, - "loss": 0.3902, + "epoch": 0.02, + "grad_norm": 0.7209177262689151, + "learning_rate": 1.6605504587155964e-05, + "loss": 0.359, "step": 543 }, { - "epoch": 0.03, - "grad_norm": 0.8553424623445812, - "learning_rate": 1.999992364104178e-05, - "loss": 0.3927, + "epoch": 0.02, + "grad_norm": 0.7321592298041024, + "learning_rate": 1.6636085626911314e-05, + "loss": 0.3459, "step": 544 }, { "epoch": 0.03, - "grad_norm": 1.5979085965888817, - "learning_rate": 1.9999916195620875e-05, - "loss": 0.3931, + "grad_norm": 0.8578179072398843, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.187, "step": 545 }, { "epoch": 0.03, - "grad_norm": 0.7901713482230103, - "learning_rate": 1.9999908403903307e-05, - "loss": 0.3896, + "grad_norm": 0.6947439082731846, + "learning_rate": 1.669724770642202e-05, + "loss": 0.3227, "step": 546 }, { "epoch": 0.03, - "grad_norm": 0.394859846631288, - "learning_rate": 1.999990026588934e-05, - "loss": 0.2883, + "grad_norm": 1.4462703552851028, + "learning_rate": 1.672782874617737e-05, + "loss": 0.5991, "step": 547 }, { "epoch": 0.03, - "grad_norm": 0.7640357579682913, - "learning_rate": 1.9999891781579256e-05, - "loss": 0.3455, + "grad_norm": 0.7741927327467244, + "learning_rate": 1.6758409785932724e-05, + "loss": 0.3243, "step": 548 }, { "epoch": 0.03, - "grad_norm": 0.6769055305092657, - "learning_rate": 1.9999882950973352e-05, - "loss": 0.3481, + "grad_norm": 0.6849085032519028, + "learning_rate": 1.6788990825688073e-05, + "loss": 0.2896, "step": 549 }, { "epoch": 0.03, - "grad_norm": 1.9577307589064314, - "learning_rate": 1.999987377407193e-05, - "loss": 0.7917, + "grad_norm": 2.202215894736286, + "learning_rate": 1.6819571865443427e-05, + "loss": 0.8234, "step": 550 }, { "epoch": 0.03, - "grad_norm": 0.9439456813870318, - "learning_rate": 1.9999864250875305e-05, - "loss": 0.2947, + "grad_norm": 1.2074253208145473, + "learning_rate": 1.6850152905198776e-05, + "loss": 0.4511, "step": 551 }, { "epoch": 0.03, - "grad_norm": 0.8175729706645952, - "learning_rate": 1.9999854381383817e-05, - "loss": 0.3907, + "grad_norm": 0.6959016215409739, + "learning_rate": 1.688073394495413e-05, + "loss": 0.3307, "step": 552 }, { "epoch": 0.03, - "grad_norm": 0.4732231542439032, - "learning_rate": 1.99998441655978e-05, - "loss": 0.2284, + "grad_norm": 0.8729185268229616, + "learning_rate": 1.6911314984709483e-05, + "loss": 0.3558, "step": 553 }, { "epoch": 0.03, - "grad_norm": 1.238787974808822, - "learning_rate": 1.999983360351761e-05, - "loss": 0.5136, + "grad_norm": 0.46330907769153035, + "learning_rate": 1.6941896024464833e-05, + "loss": 0.1798, "step": 554 }, { "epoch": 0.03, - "grad_norm": 0.7050079922045303, - "learning_rate": 1.999982269514361e-05, - "loss": 0.3216, + "grad_norm": 0.9333485199345983, + "learning_rate": 1.6972477064220186e-05, + "loss": 0.3855, "step": 555 }, { "epoch": 0.03, - "grad_norm": 0.8573120183532486, - "learning_rate": 1.9999811440476182e-05, - "loss": 0.3806, + "grad_norm": 0.8694759665134488, + "learning_rate": 1.7003058103975536e-05, + "loss": 0.413, "step": 556 }, { "epoch": 0.03, - "grad_norm": 1.2215743690492187, - "learning_rate": 1.999979983951571e-05, - "loss": 0.5398, + "grad_norm": 2.140666296733427, + "learning_rate": 1.703363914373089e-05, + "loss": 0.4751, "step": 557 }, { "epoch": 0.03, - "grad_norm": 0.6446022868528241, - "learning_rate": 1.9999787892262605e-05, - "loss": 0.3098, + "grad_norm": 0.8099459746301703, + "learning_rate": 1.706422018348624e-05, + "loss": 0.3288, "step": 558 }, { "epoch": 0.03, - "grad_norm": 0.5002653329984211, - "learning_rate": 1.9999775598717276e-05, - "loss": 0.2903, + "grad_norm": 0.6498983749150069, + "learning_rate": 1.709480122324159e-05, + "loss": 0.2493, "step": 559 }, { "epoch": 0.03, - "grad_norm": 0.5895344073161108, - "learning_rate": 1.9999762958880145e-05, - "loss": 0.2155, + "grad_norm": 0.8525040848039012, + "learning_rate": 1.7125382262996945e-05, + "loss": 0.344, "step": 560 }, { "epoch": 0.03, - "grad_norm": 0.8267429050787638, - "learning_rate": 1.9999749972751653e-05, - "loss": 0.3351, + "grad_norm": 0.7954311518183899, + "learning_rate": 1.7155963302752295e-05, + "loss": 0.2824, "step": 561 }, { "epoch": 0.03, - "grad_norm": 1.353467415655348, - "learning_rate": 1.999973664033225e-05, - "loss": 0.5793, + "grad_norm": 2.227621674589291, + "learning_rate": 1.7186544342507648e-05, + "loss": 0.7956, "step": 562 }, { "epoch": 0.03, - "grad_norm": 0.7361197465689773, - "learning_rate": 1.9999722961622395e-05, - "loss": 0.3886, + "grad_norm": 1.7872297509319615, + "learning_rate": 1.7217125382262998e-05, + "loss": 0.5998, "step": 563 }, { "epoch": 0.03, - "grad_norm": 0.5447246086308706, - "learning_rate": 1.9999708936622564e-05, - "loss": 0.2858, + "grad_norm": 0.8126031175413392, + "learning_rate": 1.724770642201835e-05, + "loss": 0.2885, "step": 564 }, { "epoch": 0.03, - "grad_norm": 0.5984252294712458, - "learning_rate": 1.9999694565333246e-05, - "loss": 0.1902, + "grad_norm": 0.7245772600476947, + "learning_rate": 1.72782874617737e-05, + "loss": 0.2604, "step": 565 }, { "epoch": 0.03, - "grad_norm": 1.482940990942458, - "learning_rate": 1.999967984775493e-05, - "loss": 0.724, + "grad_norm": 2.337651420009429, + "learning_rate": 1.7308868501529054e-05, + "loss": 0.8435, "step": 566 }, { "epoch": 0.03, - "grad_norm": 0.6741795289929788, - "learning_rate": 1.9999664783888138e-05, - "loss": 0.3405, + "grad_norm": 1.1724205316352214, + "learning_rate": 1.7339449541284407e-05, + "loss": 0.2932, "step": 567 }, { "epoch": 0.03, - "grad_norm": 0.9182428191621349, - "learning_rate": 1.9999649373733377e-05, - "loss": 0.3617, + "grad_norm": 1.1874596269657847, + "learning_rate": 1.7370030581039757e-05, + "loss": 0.3898, "step": 568 }, { "epoch": 0.03, - "grad_norm": 1.6389573056553957, - "learning_rate": 1.9999633617291196e-05, - "loss": 0.6102, + "grad_norm": 2.6023961807171903, + "learning_rate": 1.740061162079511e-05, + "loss": 0.6172, "step": 569 }, { "epoch": 0.03, - "grad_norm": 0.6475024165740241, - "learning_rate": 1.9999617514562125e-05, - "loss": 0.3249, + "grad_norm": 0.6433772543521216, + "learning_rate": 1.743119266055046e-05, + "loss": 0.2791, "step": 570 }, { "epoch": 0.03, - "grad_norm": 0.7182517478278182, - "learning_rate": 1.9999601065546733e-05, - "loss": 0.3586, + "grad_norm": 0.5450377729561972, + "learning_rate": 1.746177370030581e-05, + "loss": 0.2039, "step": 571 }, { "epoch": 0.03, - "grad_norm": 0.6687765532978037, - "learning_rate": 1.9999584270245588e-05, - "loss": 0.206, + "grad_norm": 0.9061500979243665, + "learning_rate": 1.7492354740061163e-05, + "loss": 0.3976, "step": 572 }, { "epoch": 0.03, - "grad_norm": 0.7089634046898335, - "learning_rate": 1.9999567128659267e-05, - "loss": 0.3192, + "grad_norm": 0.8488602857785469, + "learning_rate": 1.7522935779816516e-05, + "loss": 0.299, "step": 573 }, { "epoch": 0.03, - "grad_norm": 2.260767215457327, - "learning_rate": 1.9999549640788368e-05, - "loss": 0.5278, + "grad_norm": 4.250866575345858, + "learning_rate": 1.755351681957187e-05, + "loss": 0.5778, "step": 574 }, { "epoch": 0.03, - "grad_norm": 0.6480748864054804, - "learning_rate": 1.9999531806633493e-05, - "loss": 0.3919, + "grad_norm": 1.2581616477898643, + "learning_rate": 1.758409785932722e-05, + "loss": 0.4166, "step": 575 }, { "epoch": 0.03, - "grad_norm": 0.675673550540879, - "learning_rate": 1.9999513626195265e-05, - "loss": 0.3158, + "grad_norm": 0.751805311621028, + "learning_rate": 1.7614678899082572e-05, + "loss": 0.3315, "step": 576 }, { "epoch": 0.03, - "grad_norm": 1.6538901519919962, - "learning_rate": 1.9999495099474306e-05, - "loss": 0.831, + "grad_norm": 0.661720626741957, + "learning_rate": 1.7645259938837922e-05, + "loss": 0.252, "step": 577 }, { "epoch": 0.03, - "grad_norm": 0.5293004891675508, - "learning_rate": 1.9999476226471265e-05, - "loss": 0.1482, + "grad_norm": 0.8223523821071027, + "learning_rate": 1.767584097859327e-05, + "loss": 0.3536, "step": 578 }, { "epoch": 0.03, - "grad_norm": 0.9542678819942969, - "learning_rate": 1.999945700718679e-05, - "loss": 0.3594, + "grad_norm": 0.7458239397231019, + "learning_rate": 1.7706422018348625e-05, + "loss": 0.3387, "step": 579 }, { "epoch": 0.03, - "grad_norm": 1.3111617705092098, - "learning_rate": 1.9999437441621547e-05, - "loss": 0.4239, + "grad_norm": 1.3601661321129337, + "learning_rate": 1.7737003058103978e-05, + "loss": 0.3918, "step": 580 }, { "epoch": 0.03, - "grad_norm": 2.0926709671371593, - "learning_rate": 1.9999417529776218e-05, - "loss": 0.5737, + "grad_norm": 2.352395200191048, + "learning_rate": 1.7767584097859328e-05, + "loss": 0.7019, "step": 581 }, { "epoch": 0.03, - "grad_norm": 0.6708490869863095, - "learning_rate": 1.999939727165149e-05, - "loss": 0.3331, + "grad_norm": 0.6301710310142474, + "learning_rate": 1.779816513761468e-05, + "loss": 0.3119, "step": 582 }, { "epoch": 0.03, - "grad_norm": 0.7622221883483998, - "learning_rate": 1.9999376667248068e-05, - "loss": 0.3782, + "grad_norm": 0.742062036460496, + "learning_rate": 1.782874617737003e-05, + "loss": 0.3369, "step": 583 }, { "epoch": 0.03, - "grad_norm": 0.7441804908580606, - "learning_rate": 1.9999355716566655e-05, - "loss": 0.1226, + "grad_norm": 0.4965118762174356, + "learning_rate": 1.7859327217125384e-05, + "loss": 0.2229, "step": 584 }, { "epoch": 0.03, - "grad_norm": 0.899774384946839, - "learning_rate": 1.999933441960799e-05, - "loss": 0.3422, + "grad_norm": 0.6249186191082478, + "learning_rate": 1.7889908256880734e-05, + "loss": 0.3162, "step": 585 }, { "epoch": 0.03, - "grad_norm": 3.4082682727776334, - "learning_rate": 1.9999312776372798e-05, - "loss": 0.6145, + "grad_norm": 1.7517714202897434, + "learning_rate": 1.7920489296636087e-05, + "loss": 0.6236, "step": 586 }, { "epoch": 0.03, - "grad_norm": 0.9534436469990148, - "learning_rate": 1.9999290786861837e-05, - "loss": 0.3507, + "grad_norm": 0.8637113701816764, + "learning_rate": 1.795107033639144e-05, + "loss": 0.3579, "step": 587 }, { "epoch": 0.03, - "grad_norm": 0.7050693202744416, - "learning_rate": 1.999926845107587e-05, - "loss": 0.3421, + "grad_norm": 0.6399565201846126, + "learning_rate": 1.798165137614679e-05, + "loss": 0.3067, "step": 588 }, { "epoch": 0.03, - "grad_norm": 1.7025023466205644, - "learning_rate": 1.9999245769015663e-05, - "loss": 0.7674, + "grad_norm": 1.6765949487586396, + "learning_rate": 1.8012232415902143e-05, + "loss": 0.7398, "step": 589 }, { "epoch": 0.03, - "grad_norm": 0.7588867665045335, - "learning_rate": 1.9999222740682004e-05, - "loss": 0.2988, + "grad_norm": 0.43464764170156306, + "learning_rate": 1.8042813455657493e-05, + "loss": 0.1918, "step": 590 }, { "epoch": 0.03, - "grad_norm": 0.8053056561388221, - "learning_rate": 1.9999199366075694e-05, - "loss": 0.279, + "grad_norm": 0.6504943597783651, + "learning_rate": 1.8073394495412846e-05, + "loss": 0.3636, "step": 591 }, { "epoch": 0.03, - "grad_norm": 2.7201821145047025, - "learning_rate": 1.9999175645197537e-05, - "loss": 0.6306, + "grad_norm": 0.6509793922605535, + "learning_rate": 1.8103975535168196e-05, + "loss": 0.3744, "step": 592 }, { "epoch": 0.03, - "grad_norm": 1.6922097540301058, - "learning_rate": 1.9999151578048357e-05, - "loss": 0.806, + "grad_norm": 1.536685794905627, + "learning_rate": 1.813455657492355e-05, + "loss": 0.6188, "step": 593 }, { "epoch": 0.03, - "grad_norm": 0.5154629765050703, - "learning_rate": 1.9999127164628992e-05, - "loss": 0.2397, + "grad_norm": 0.6697907990817732, + "learning_rate": 1.81651376146789e-05, + "loss": 0.319, "step": 594 }, { "epoch": 0.03, - "grad_norm": 0.6208505950553166, - "learning_rate": 1.999910240494028e-05, - "loss": 0.3727, + "grad_norm": 0.8474550155861955, + "learning_rate": 1.8195718654434252e-05, + "loss": 0.47, "step": 595 }, { "epoch": 0.03, - "grad_norm": 0.6884024496186021, - "learning_rate": 1.9999077298983084e-05, - "loss": 0.2778, + "grad_norm": 0.46761997406859507, + "learning_rate": 1.8226299694189605e-05, + "loss": 0.2631, "step": 596 }, { "epoch": 0.03, - "grad_norm": 0.7202762821630867, - "learning_rate": 1.9999051846758267e-05, - "loss": 0.2434, + "grad_norm": 0.6926844267337992, + "learning_rate": 1.8256880733944955e-05, + "loss": 0.3056, "step": 597 }, { "epoch": 0.03, - "grad_norm": 2.2942921881997282, - "learning_rate": 1.999902604826672e-05, - "loss": 0.7034, + "grad_norm": 1.0323910442931747, + "learning_rate": 1.8287461773700308e-05, + "loss": 0.4921, "step": 598 }, { "epoch": 0.03, - "grad_norm": 0.772582124815898, - "learning_rate": 1.9998999903509326e-05, - "loss": 0.4107, + "grad_norm": 0.6445634672872429, + "learning_rate": 1.8318042813455658e-05, + "loss": 0.4193, "step": 599 }, { "epoch": 0.03, - "grad_norm": 0.6564284208707677, - "learning_rate": 1.9998973412487e-05, - "loss": 0.2485, + "grad_norm": 0.5852737619260164, + "learning_rate": 1.834862385321101e-05, + "loss": 0.2272, "step": 600 }, { "epoch": 0.03, - "grad_norm": 4.5218916723217415, - "learning_rate": 1.9998946575200652e-05, - "loss": 0.6996, + "grad_norm": 0.8889726388942958, + "learning_rate": 1.837920489296636e-05, + "loss": 0.4455, "step": 601 }, { "epoch": 0.03, - "grad_norm": 1.2246507040544916, - "learning_rate": 1.9998919391651214e-05, - "loss": 0.5521, + "grad_norm": 0.5006919792552491, + "learning_rate": 1.8409785932721714e-05, + "loss": 0.2565, "step": 602 }, { "epoch": 0.03, - "grad_norm": 0.6937961374682375, - "learning_rate": 1.9998891861839627e-05, - "loss": 0.3197, + "grad_norm": 0.5649618018359593, + "learning_rate": 1.8440366972477067e-05, + "loss": 0.3215, "step": 603 }, { "epoch": 0.03, - "grad_norm": 1.0318409532385129, - "learning_rate": 1.9998863985766845e-05, - "loss": 0.2606, + "grad_norm": 0.6099383032481673, + "learning_rate": 1.8470948012232417e-05, + "loss": 0.4097, "step": 604 }, { "epoch": 0.03, - "grad_norm": 2.722572887274754, - "learning_rate": 1.9998835763433836e-05, - "loss": 0.8884, + "grad_norm": 1.8919272703625916, + "learning_rate": 1.850152905198777e-05, + "loss": 0.8647, "step": 605 }, { "epoch": 0.03, - "grad_norm": 0.6300069061706091, - "learning_rate": 1.999880719484157e-05, - "loss": 0.3101, + "grad_norm": 0.6096233623462712, + "learning_rate": 1.853211009174312e-05, + "loss": 0.273, "step": 606 }, { "epoch": 0.03, - "grad_norm": 0.6983482017923085, - "learning_rate": 1.9998778279991042e-05, - "loss": 0.3086, + "grad_norm": 1.8445469276492599, + "learning_rate": 1.856269113149847e-05, + "loss": 0.7694, "step": 607 }, { "epoch": 0.03, - "grad_norm": 2.0707704364945454, - "learning_rate": 1.999874901888325e-05, - "loss": 0.6135, + "grad_norm": 0.5655396878029959, + "learning_rate": 1.8593272171253823e-05, + "loss": 0.4023, "step": 608 }, { "epoch": 0.03, - "grad_norm": 0.5749567826655014, - "learning_rate": 1.9998719411519215e-05, - "loss": 0.3335, + "grad_norm": 0.7003000363944234, + "learning_rate": 1.8623853211009176e-05, + "loss": 0.4023, "step": 609 }, { - "epoch": 0.04, - "grad_norm": 0.5182988783531453, - "learning_rate": 1.9998689457899955e-05, - "loss": 0.1555, + "epoch": 0.03, + "grad_norm": 0.4445205314233728, + "learning_rate": 1.865443425076453e-05, + "loss": 0.1796, "step": 610 }, { - "epoch": 0.04, - "grad_norm": 0.7617833054443183, - "learning_rate": 1.999865915802651e-05, - "loss": 0.4108, + "epoch": 0.03, + "grad_norm": 0.6898680598343396, + "learning_rate": 1.868501529051988e-05, + "loss": 0.3761, "step": 611 }, { - "epoch": 0.04, - "grad_norm": 1.1526576959541612, - "learning_rate": 1.9998628511899925e-05, - "loss": 0.4166, + "epoch": 0.03, + "grad_norm": 1.3202911728885998, + "learning_rate": 1.8715596330275232e-05, + "loss": 0.4715, "step": 612 }, { - "epoch": 0.04, - "grad_norm": 0.6571422467528628, - "learning_rate": 1.999859751952127e-05, - "loss": 0.3332, + "epoch": 0.03, + "grad_norm": 0.646909924603647, + "learning_rate": 1.8746177370030582e-05, + "loss": 0.2493, "step": 613 }, { - "epoch": 0.04, - "grad_norm": 0.7513551490147077, - "learning_rate": 1.9998566180891606e-05, - "loss": 0.4341, + "epoch": 0.03, + "grad_norm": 1.0767242189370316, + "learning_rate": 1.8776758409785932e-05, + "loss": 0.5681, "step": 614 }, { - "epoch": 0.04, - "grad_norm": 0.5806459633657569, - "learning_rate": 1.9998534496012026e-05, - "loss": 0.3158, + "epoch": 0.03, + "grad_norm": 0.6858692085214954, + "learning_rate": 1.8807339449541285e-05, + "loss": 0.3159, "step": 615 }, { - "epoch": 0.04, - "grad_norm": 1.6232848400792341, - "learning_rate": 1.9998502464883632e-05, - "loss": 0.2476, + "epoch": 0.03, + "grad_norm": 0.48385196454453877, + "learning_rate": 1.883792048929664e-05, + "loss": 0.2351, "step": 616 }, { - "epoch": 0.04, - "grad_norm": 2.5384024602749533, - "learning_rate": 1.9998470087507522e-05, - "loss": 0.6709, + "epoch": 0.03, + "grad_norm": 1.8706918032724593, + "learning_rate": 1.886850152905199e-05, + "loss": 0.78, "step": 617 }, { - "epoch": 0.04, - "grad_norm": 0.6685859795334497, - "learning_rate": 1.9998437363884825e-05, - "loss": 0.3515, + "epoch": 0.03, + "grad_norm": 0.9088513540672198, + "learning_rate": 1.889908256880734e-05, + "loss": 0.4208, "step": 618 }, { - "epoch": 0.04, - "grad_norm": 0.5741696913240106, - "learning_rate": 1.999840429401667e-05, - "loss": 0.3636, + "epoch": 0.03, + "grad_norm": 0.5565090932500756, + "learning_rate": 1.892966360856269e-05, + "loss": 0.3059, "step": 619 }, { - "epoch": 0.04, - "grad_norm": 1.5049238219312997, - "learning_rate": 1.9998370877904208e-05, - "loss": 0.5154, + "epoch": 0.03, + "grad_norm": 0.9793562355809654, + "learning_rate": 1.8960244648318044e-05, + "loss": 0.5401, "step": 620 }, { - "epoch": 0.04, - "grad_norm": 0.672421234183969, - "learning_rate": 1.9998337115548588e-05, - "loss": 0.2922, + "epoch": 0.03, + "grad_norm": 0.8538753484296357, + "learning_rate": 1.8990825688073394e-05, + "loss": 0.4252, "step": 621 }, { - "epoch": 0.04, - "grad_norm": 2.2798101602612184, - "learning_rate": 1.999830300695099e-05, - "loss": 0.3201, + "epoch": 0.03, + "grad_norm": 0.46124442975487, + "learning_rate": 1.9021406727828747e-05, + "loss": 0.2288, "step": 622 }, { - "epoch": 0.04, - "grad_norm": 0.6970423165330458, - "learning_rate": 1.9998268552112586e-05, - "loss": 0.3156, + "epoch": 0.03, + "grad_norm": 0.6789815026384124, + "learning_rate": 1.90519877675841e-05, + "loss": 0.3355, "step": 623 }, { - "epoch": 0.04, - "grad_norm": 0.4610523510184624, - "learning_rate": 1.999823375103457e-05, - "loss": 0.2628, + "epoch": 0.03, + "grad_norm": 0.7111122903322493, + "learning_rate": 1.9082568807339454e-05, + "loss": 0.3881, "step": 624 }, { - "epoch": 0.04, - "grad_norm": 2.7809585438545255, - "learning_rate": 1.9998198603718148e-05, - "loss": 0.6929, + "epoch": 0.03, + "grad_norm": 0.8226479233299241, + "learning_rate": 1.9113149847094803e-05, + "loss": 0.4525, "step": 625 }, { - "epoch": 0.04, - "grad_norm": 0.7009702901425052, - "learning_rate": 1.9998163110164543e-05, - "loss": 0.4038, + "epoch": 0.03, + "grad_norm": 1.6297482386105937, + "learning_rate": 1.9143730886850153e-05, + "loss": 0.5456, "step": 626 }, { - "epoch": 0.04, - "grad_norm": 0.7154276273247365, - "learning_rate": 1.9998127270374975e-05, - "loss": 0.3343, + "epoch": 0.03, + "grad_norm": 0.5737852969913682, + "learning_rate": 1.9174311926605506e-05, + "loss": 0.3054, "step": 627 }, { - "epoch": 0.04, - "grad_norm": 0.6401189026507259, - "learning_rate": 1.999809108435069e-05, - "loss": 0.2824, + "epoch": 0.03, + "grad_norm": 0.9456525867708102, + "learning_rate": 1.9204892966360856e-05, + "loss": 0.3724, "step": 628 }, { - "epoch": 0.04, - "grad_norm": 1.1197570166445445, - "learning_rate": 1.9998054552092943e-05, - "loss": 0.3704, + "epoch": 0.03, + "grad_norm": 0.6013425890007309, + "learning_rate": 1.923547400611621e-05, + "loss": 0.1528, "step": 629 }, { - "epoch": 0.04, - "grad_norm": 0.6645133821988366, - "learning_rate": 1.9998017673602996e-05, - "loss": 0.2726, + "epoch": 0.03, + "grad_norm": 1.2536824530197315, + "learning_rate": 1.9266055045871563e-05, + "loss": 0.433, "step": 630 }, { - "epoch": 0.04, - "grad_norm": 0.6906799629741556, - "learning_rate": 1.9997980448882125e-05, - "loss": 0.3588, + "epoch": 0.03, + "grad_norm": 0.7670470180194572, + "learning_rate": 1.9296636085626912e-05, + "loss": 0.3641, "step": 631 }, { - "epoch": 0.04, - "grad_norm": 1.227238559775126, - "learning_rate": 1.9997942877931624e-05, - "loss": 0.6336, + "epoch": 0.03, + "grad_norm": 1.3528669925857149, + "learning_rate": 1.9327217125382266e-05, + "loss": 0.4655, "step": 632 }, { - "epoch": 0.04, - "grad_norm": 0.7603717426281402, - "learning_rate": 1.9997904960752794e-05, - "loss": 0.2539, + "epoch": 0.03, + "grad_norm": 1.1429141623250159, + "learning_rate": 1.9357798165137615e-05, + "loss": 0.4517, "step": 633 }, { - "epoch": 0.04, - "grad_norm": 0.7780626216278012, - "learning_rate": 1.999786669734694e-05, - "loss": 0.4715, + "epoch": 0.03, + "grad_norm": 0.5564169184424588, + "learning_rate": 1.938837920489297e-05, + "loss": 0.2639, "step": 634 }, { - "epoch": 0.04, - "grad_norm": 0.5482206816162322, - "learning_rate": 1.999782808771539e-05, - "loss": 0.3319, + "epoch": 0.03, + "grad_norm": 0.7186284871364891, + "learning_rate": 1.9418960244648318e-05, + "loss": 0.4095, "step": 635 }, { - "epoch": 0.04, - "grad_norm": 0.7702010294483638, - "learning_rate": 1.999778913185949e-05, - "loss": 0.2772, + "epoch": 0.03, + "grad_norm": 0.745074349973661, + "learning_rate": 1.944954128440367e-05, + "loss": 0.1654, "step": 636 }, { - "epoch": 0.04, - "grad_norm": 0.5466923355560228, - "learning_rate": 1.9997749829780577e-05, - "loss": 0.3879, + "epoch": 0.03, + "grad_norm": 1.0342172010641828, + "learning_rate": 1.9480122324159025e-05, + "loss": 0.424, "step": 637 }, { - "epoch": 0.04, - "grad_norm": 0.7200638224194983, - "learning_rate": 1.9997710181480018e-05, - "loss": 0.381, + "epoch": 0.03, + "grad_norm": 1.696175127438202, + "learning_rate": 1.9510703363914374e-05, + "loss": 0.6544, "step": 638 }, { - "epoch": 0.04, - "grad_norm": 0.47588589352893357, - "learning_rate": 1.9997670186959187e-05, - "loss": 0.288, + "epoch": 0.03, + "grad_norm": 0.6218422534174443, + "learning_rate": 1.9541284403669728e-05, + "loss": 0.3074, "step": 639 }, { - "epoch": 0.04, - "grad_norm": 0.8159078143196873, - "learning_rate": 1.999762984621947e-05, - "loss": 0.4299, + "epoch": 0.03, + "grad_norm": 0.6671118160732972, + "learning_rate": 1.9571865443425077e-05, + "loss": 0.4002, "step": 640 }, { - "epoch": 0.04, - "grad_norm": 0.764774529343326, - "learning_rate": 1.9997589159262255e-05, - "loss": 0.3122, + "epoch": 0.03, + "grad_norm": 2.13420600308768, + "learning_rate": 1.960244648318043e-05, + "loss": 0.8405, "step": 641 }, { - "epoch": 0.04, - "grad_norm": 0.5095595388066965, - "learning_rate": 1.9997548126088963e-05, - "loss": 0.3581, + "epoch": 0.03, + "grad_norm": 1.079641507540786, + "learning_rate": 1.963302752293578e-05, + "loss": 0.2949, "step": 642 }, { - "epoch": 0.04, - "grad_norm": 0.5936175913635908, - "learning_rate": 1.9997506746701006e-05, - "loss": 0.3294, + "epoch": 0.03, + "grad_norm": 0.8236367982538636, + "learning_rate": 1.9663608562691134e-05, + "loss": 0.3382, "step": 643 }, { - "epoch": 0.04, - "grad_norm": 0.5773573972412904, - "learning_rate": 1.9997465021099818e-05, - "loss": 0.4026, + "epoch": 0.03, + "grad_norm": 0.7314061796585635, + "learning_rate": 1.9694189602446487e-05, + "loss": 0.3797, "step": 644 }, { - "epoch": 0.04, - "grad_norm": 0.5790983443211878, - "learning_rate": 1.9997422949286852e-05, - "loss": 0.3016, + "epoch": 0.03, + "grad_norm": 0.5909264738534531, + "learning_rate": 1.9724770642201837e-05, + "loss": 0.2392, "step": 645 }, { - "epoch": 0.04, - "grad_norm": 1.5202482266217976, - "learning_rate": 1.9997380531263555e-05, - "loss": 0.3345, + "epoch": 0.03, + "grad_norm": 0.8683891194604073, + "learning_rate": 1.975535168195719e-05, + "loss": 0.4147, "step": 646 }, { - "epoch": 0.04, - "grad_norm": 0.5326923128194373, - "learning_rate": 1.99973377670314e-05, - "loss": 0.4049, + "epoch": 0.03, + "grad_norm": 0.8060486115298693, + "learning_rate": 1.978593272171254e-05, + "loss": 0.4212, "step": 647 }, { - "epoch": 0.04, - "grad_norm": 0.6871631996017432, - "learning_rate": 1.999729465659187e-05, - "loss": 0.4462, + "epoch": 0.03, + "grad_norm": 1.2932079610196028, + "learning_rate": 1.9816513761467893e-05, + "loss": 0.5297, "step": 648 }, { - "epoch": 0.04, - "grad_norm": 0.5156786584531907, - "learning_rate": 1.9997251199946456e-05, - "loss": 0.2645, + "epoch": 0.03, + "grad_norm": 0.5747385443367162, + "learning_rate": 1.9847094801223243e-05, + "loss": 0.3293, "step": 649 }, { - "epoch": 0.04, - "grad_norm": 0.523957794870545, - "learning_rate": 1.999720739709666e-05, - "loss": 0.3247, + "epoch": 0.03, + "grad_norm": 0.576213068735911, + "learning_rate": 1.9877675840978596e-05, + "loss": 0.2407, "step": 650 }, { - "epoch": 0.04, - "grad_norm": 1.1519934314835216, - "learning_rate": 1.9997163248044008e-05, - "loss": 0.4499, + "epoch": 0.03, + "grad_norm": 0.7787385181904303, + "learning_rate": 1.9908256880733945e-05, + "loss": 0.3833, "step": 651 }, { - "epoch": 0.04, - "grad_norm": 0.714868485827706, - "learning_rate": 1.9997118752790016e-05, - "loss": 0.2821, + "epoch": 0.03, + "grad_norm": 0.6811038827845374, + "learning_rate": 1.99388379204893e-05, + "loss": 0.2424, "step": 652 }, { - "epoch": 0.04, - "grad_norm": 0.732881875664929, - "learning_rate": 1.9997073911336234e-05, - "loss": 0.5607, + "epoch": 0.03, + "grad_norm": 1.3807143225609593, + "learning_rate": 1.9969418960244652e-05, + "loss": 0.6969, "step": 653 }, { - "epoch": 0.04, - "grad_norm": 0.6503986849828309, - "learning_rate": 1.9997028723684213e-05, - "loss": 0.3507, + "epoch": 0.03, + "grad_norm": 0.7367678546216719, + "learning_rate": 2e-05, + "loss": 0.4626, "step": 654 }, { - "epoch": 0.04, - "grad_norm": 0.5726787275180806, - "learning_rate": 1.999698318983552e-05, - "loss": 0.3602, + "epoch": 0.03, + "grad_norm": 0.6994972573022487, + "learning_rate": 1.999999988930482e-05, + "loss": 0.2692, "step": 655 }, { - "epoch": 0.04, - "grad_norm": 0.5050293657924345, - "learning_rate": 1.9996937309791722e-05, - "loss": 0.1513, + "epoch": 0.03, + "grad_norm": 0.4802055143096561, + "learning_rate": 1.999999955721928e-05, + "loss": 0.2192, "step": 656 }, { - "epoch": 0.04, - "grad_norm": 0.8105239826073464, - "learning_rate": 1.999689108355442e-05, - "loss": 0.4691, + "epoch": 0.03, + "grad_norm": 1.0916650335286944, + "learning_rate": 1.9999999003743386e-05, + "loss": 0.5638, "step": 657 }, { - "epoch": 0.04, - "grad_norm": 0.5513704345716917, - "learning_rate": 1.9996844511125205e-05, - "loss": 0.3814, + "epoch": 0.03, + "grad_norm": 0.6510485788388284, + "learning_rate": 1.9999998228877155e-05, + "loss": 0.3021, "step": 658 }, { - "epoch": 0.04, - "grad_norm": 0.6100721289861497, - "learning_rate": 1.9996797592505703e-05, - "loss": 0.4041, + "epoch": 0.03, + "grad_norm": 0.7121993966385886, + "learning_rate": 1.99999972326206e-05, + "loss": 0.3928, "step": 659 }, { - "epoch": 0.04, - "grad_norm": 0.6755649661583865, - "learning_rate": 1.9996750327697523e-05, - "loss": 0.4135, + "epoch": 0.03, + "grad_norm": 1.2453033306706074, + "learning_rate": 1.9999996014973747e-05, + "loss": 0.5474, "step": 660 }, { - "epoch": 0.04, - "grad_norm": 0.5909747328001143, - "learning_rate": 1.999670271670231e-05, - "loss": 0.3755, + "epoch": 0.03, + "grad_norm": 0.6913780217277756, + "learning_rate": 1.9999994575936615e-05, + "loss": 0.339, "step": 661 }, { - "epoch": 0.04, - "grad_norm": 0.5530198259517983, - "learning_rate": 1.9996654759521713e-05, - "loss": 0.2282, + "epoch": 0.03, + "grad_norm": 0.40348748248024663, + "learning_rate": 1.9999992915509245e-05, + "loss": 0.1173, "step": 662 }, { - "epoch": 0.04, - "grad_norm": 1.0659525428191399, - "learning_rate": 1.999660645615739e-05, - "loss": 0.2963, + "epoch": 0.03, + "grad_norm": 0.6535040215803822, + "learning_rate": 1.999999103369167e-05, + "loss": 0.3434, "step": 663 }, { - "epoch": 0.04, - "grad_norm": 0.6884011137041578, - "learning_rate": 1.9996557806611017e-05, - "loss": 0.375, + "epoch": 0.03, + "grad_norm": 0.6930306960745453, + "learning_rate": 1.999998893048393e-05, + "loss": 0.35, "step": 664 }, { - "epoch": 0.04, - "grad_norm": 1.2005303148264228, - "learning_rate": 1.9996508810884277e-05, - "loss": 0.5657, + "epoch": 0.03, + "grad_norm": 1.9849130280483798, + "learning_rate": 1.9999986605886072e-05, + "loss": 0.5549, "step": 665 }, { - "epoch": 0.04, - "grad_norm": 0.7004073880089609, - "learning_rate": 1.9996459468978865e-05, - "loss": 0.2783, + "epoch": 0.03, + "grad_norm": 0.6882180217021225, + "learning_rate": 1.9999984059898153e-05, + "loss": 0.4308, "step": 666 }, { - "epoch": 0.04, - "grad_norm": 0.6849488869877197, - "learning_rate": 1.9996409780896495e-05, - "loss": 0.3791, + "epoch": 0.03, + "grad_norm": 0.6124594703728439, + "learning_rate": 1.9999981292520222e-05, + "loss": 0.3161, "step": 667 }, { - "epoch": 0.04, - "grad_norm": 0.9692201762247729, - "learning_rate": 1.999635974663888e-05, - "loss": 0.2391, + "epoch": 0.03, + "grad_norm": 0.45373797605211397, + "learning_rate": 1.9999978303752342e-05, + "loss": 0.1635, "step": 668 }, { - "epoch": 0.04, - "grad_norm": 1.3591328381927894, - "learning_rate": 1.999630936620776e-05, - "loss": 0.3121, + "epoch": 0.03, + "grad_norm": 1.6726889463237635, + "learning_rate": 1.9999975093594583e-05, + "loss": 0.7403, "step": 669 }, { - "epoch": 0.04, - "grad_norm": 0.6988627891771934, - "learning_rate": 1.9996258639604874e-05, - "loss": 0.3539, + "epoch": 0.03, + "grad_norm": 0.706324175059597, + "learning_rate": 1.9999971662047012e-05, + "loss": 0.3227, "step": 670 }, { - "epoch": 0.04, - "grad_norm": 0.9556786113936547, - "learning_rate": 1.999620756683198e-05, - "loss": 0.4806, + "epoch": 0.03, + "grad_norm": 1.376503289477581, + "learning_rate": 1.9999968009109708e-05, + "loss": 0.3949, "step": 671 }, { - "epoch": 0.04, - "grad_norm": 1.4342440011179771, - "learning_rate": 1.999615614789085e-05, - "loss": 0.3508, + "epoch": 0.03, + "grad_norm": 1.561842210104898, + "learning_rate": 1.999996413478275e-05, + "loss": 0.6576, "step": 672 }, { - "epoch": 0.04, - "grad_norm": 0.6267491039851717, - "learning_rate": 1.9996104382783266e-05, - "loss": 0.3627, + "epoch": 0.03, + "grad_norm": 0.5963514740766326, + "learning_rate": 1.9999960039066226e-05, + "loss": 0.3089, "step": 673 }, { - "epoch": 0.04, - "grad_norm": 0.6388301817445797, - "learning_rate": 1.9996052271511012e-05, - "loss": 0.2443, + "epoch": 0.03, + "grad_norm": 0.5908896183227859, + "learning_rate": 1.999995572196022e-05, + "loss": 0.1836, "step": 674 }, { - "epoch": 0.04, - "grad_norm": 0.6126860481430475, - "learning_rate": 1.99959998140759e-05, - "loss": 0.2574, + "epoch": 0.03, + "grad_norm": 0.8489908552865938, + "learning_rate": 1.9999951183464837e-05, + "loss": 0.317, "step": 675 }, { - "epoch": 0.04, - "grad_norm": 0.6335639107595146, - "learning_rate": 1.9995947010479744e-05, - "loss": 0.4149, + "epoch": 0.03, + "grad_norm": 0.7948053603194695, + "learning_rate": 1.999994642358017e-05, + "loss": 0.3421, "step": 676 }, { - "epoch": 0.04, - "grad_norm": 1.009173507203124, - "learning_rate": 1.999589386072437e-05, - "loss": 0.6275, + "epoch": 0.03, + "grad_norm": 2.0197352368512274, + "learning_rate": 1.9999941442306328e-05, + "loss": 0.5878, "step": 677 }, { - "epoch": 0.04, - "grad_norm": 0.5334355696559099, - "learning_rate": 1.9995840364811627e-05, - "loss": 0.3687, + "epoch": 0.03, + "grad_norm": 0.7028968072632593, + "learning_rate": 1.9999936239643422e-05, + "loss": 0.2938, "step": 678 }, { - "epoch": 0.04, - "grad_norm": 0.5264094885639136, - "learning_rate": 1.999578652274336e-05, - "loss": 0.2631, + "epoch": 0.03, + "grad_norm": 0.6473862209980831, + "learning_rate": 1.9999930815591565e-05, + "loss": 0.3149, "step": 679 }, { - "epoch": 0.04, - "grad_norm": 0.7406091669303169, - "learning_rate": 1.9995732334521432e-05, - "loss": 0.2581, + "epoch": 0.03, + "grad_norm": 0.6194245546090891, + "learning_rate": 1.9999925170150877e-05, + "loss": 0.317, "step": 680 }, { - "epoch": 0.04, - "grad_norm": 0.9294914520950267, - "learning_rate": 1.9995677800147728e-05, - "loss": 0.5048, + "epoch": 0.03, + "grad_norm": 0.7818454454963415, + "learning_rate": 1.9999919303321482e-05, + "loss": 0.246, "step": 681 }, { - "epoch": 0.04, - "grad_norm": 0.5268582006547304, - "learning_rate": 1.9995622919624127e-05, - "loss": 0.2644, + "epoch": 0.03, + "grad_norm": 0.7564850406368873, + "learning_rate": 1.9999913215103516e-05, + "loss": 0.3363, "step": 682 }, { - "epoch": 0.04, - "grad_norm": 0.848890819064537, - "learning_rate": 1.9995567692952537e-05, - "loss": 0.4564, + "epoch": 0.03, + "grad_norm": 1.3707537220992398, + "learning_rate": 1.9999906905497104e-05, + "loss": 0.4397, "step": 683 }, { - "epoch": 0.04, - "grad_norm": 0.9587797278208916, - "learning_rate": 1.9995512120134867e-05, - "loss": 0.5373, + "epoch": 0.03, + "grad_norm": 1.8947073083180777, + "learning_rate": 1.9999900374502395e-05, + "loss": 0.6268, "step": 684 }, { - "epoch": 0.04, - "grad_norm": 0.5326515080037058, - "learning_rate": 1.9995456201173044e-05, - "loss": 0.2536, + "epoch": 0.03, + "grad_norm": 0.5537455145557966, + "learning_rate": 1.999989362211953e-05, + "loss": 0.317, "step": 685 }, { - "epoch": 0.04, - "grad_norm": 0.4332630514103188, - "learning_rate": 1.9995399936069e-05, - "loss": 0.2911, + "epoch": 0.03, + "grad_norm": 0.46942044717482495, + "learning_rate": 1.9999886648348657e-05, + "loss": 0.2324, "step": 686 }, { - "epoch": 0.04, - "grad_norm": 0.8241672572613798, - "learning_rate": 1.9995343324824686e-05, - "loss": 0.3667, + "epoch": 0.03, + "grad_norm": 1.0547854666743743, + "learning_rate": 1.999987945318993e-05, + "loss": 0.4551, "step": 687 }, { - "epoch": 0.04, - "grad_norm": 0.5755331586063459, - "learning_rate": 1.9995286367442062e-05, - "loss": 0.3133, + "epoch": 0.03, + "grad_norm": 0.9012278004185484, + "learning_rate": 1.9999872036643514e-05, + "loss": 0.3195, "step": 688 }, { - "epoch": 0.04, - "grad_norm": 1.406266965308756, - "learning_rate": 1.9995229063923104e-05, - "loss": 0.8357, + "epoch": 0.03, + "grad_norm": 1.8792789171490352, + "learning_rate": 1.9999864398709565e-05, + "loss": 0.6657, "step": 689 }, { - "epoch": 0.04, - "grad_norm": 0.5461542233249763, - "learning_rate": 1.9995171414269793e-05, - "loss": 0.3926, + "epoch": 0.03, + "grad_norm": 0.9117257609069949, + "learning_rate": 1.999985653938826e-05, + "loss": 0.4492, "step": 690 }, { - "epoch": 0.04, - "grad_norm": 0.5332452855766027, - "learning_rate": 1.999511341848412e-05, - "loss": 0.3142, + "epoch": 0.03, + "grad_norm": 0.5059014074587862, + "learning_rate": 1.9999848458679768e-05, + "loss": 0.2697, "step": 691 }, { - "epoch": 0.04, - "grad_norm": 0.5558375093163075, - "learning_rate": 1.9995055076568107e-05, - "loss": 0.1537, + "epoch": 0.03, + "grad_norm": 1.6784231504936034, + "learning_rate": 1.9999840156584266e-05, + "loss": 0.7376, "step": 692 }, { - "epoch": 0.04, - "grad_norm": 0.5096980092738956, - "learning_rate": 1.999499638852376e-05, - "loss": 0.3855, + "epoch": 0.03, + "grad_norm": 1.0279329861427517, + "learning_rate": 1.999983163310194e-05, + "loss": 0.3029, "step": 693 }, { - "epoch": 0.04, - "grad_norm": 0.585061570533692, - "learning_rate": 1.999493735435312e-05, - "loss": 0.3167, + "epoch": 0.03, + "grad_norm": 0.7443388737145241, + "learning_rate": 1.9999822888232987e-05, + "loss": 0.2911, "step": 694 }, { - "epoch": 0.04, - "grad_norm": 1.3571760544795732, - "learning_rate": 1.999487797405823e-05, - "loss": 0.5282, + "epoch": 0.03, + "grad_norm": 0.8909206120929277, + "learning_rate": 1.999981392197759e-05, + "loss": 0.4313, "step": 695 }, { - "epoch": 0.04, - "grad_norm": 1.1375980474442728, - "learning_rate": 1.9994818247641147e-05, - "loss": 0.5273, + "epoch": 0.03, + "grad_norm": 2.1491058685292717, + "learning_rate": 1.999980473433595e-05, + "loss": 0.8305, "step": 696 }, { - "epoch": 0.04, - "grad_norm": 0.6087244752271485, - "learning_rate": 1.9994758175103935e-05, - "loss": 0.3229, + "epoch": 0.03, + "grad_norm": 0.6283181234864429, + "learning_rate": 1.999979532530827e-05, + "loss": 0.2077, "step": 697 }, { - "epoch": 0.04, - "grad_norm": 0.6091437037486682, - "learning_rate": 1.999469775644868e-05, - "loss": 0.316, + "epoch": 0.03, + "grad_norm": 0.452576294961913, + "learning_rate": 1.999978569489476e-05, + "loss": 0.2857, "step": 698 }, { - "epoch": 0.04, - "grad_norm": 1.0768666202960535, - "learning_rate": 1.999463699167747e-05, - "loss": 0.5091, + "epoch": 0.03, + "grad_norm": 0.5860738060514112, + "learning_rate": 1.9999775843095636e-05, + "loss": 0.4072, "step": 699 }, { - "epoch": 0.04, - "grad_norm": 0.6652825079939793, - "learning_rate": 1.999457588079241e-05, - "loss": 0.3182, + "epoch": 0.03, + "grad_norm": 0.6489991442796038, + "learning_rate": 1.9999765769911108e-05, + "loss": 0.3565, "step": 700 }, { - "epoch": 0.04, - "grad_norm": 0.5327514124177343, - "learning_rate": 1.9994514423795616e-05, - "loss": 0.1642, + "epoch": 0.03, + "grad_norm": 1.0568446265222562, + "learning_rate": 1.9999755475341403e-05, + "loss": 0.4024, "step": 701 }, { - "epoch": 0.04, - "grad_norm": 0.6094186828307505, - "learning_rate": 1.9994452620689218e-05, - "loss": 0.3472, + "epoch": 0.03, + "grad_norm": 0.7019954406751752, + "learning_rate": 1.9999744959386753e-05, + "loss": 0.3835, "step": 702 }, { - "epoch": 0.04, - "grad_norm": 0.5620751646923052, - "learning_rate": 1.999439047147536e-05, - "loss": 0.3312, + "epoch": 0.03, + "grad_norm": 0.7141355700787837, + "learning_rate": 1.9999734222047387e-05, + "loss": 0.3248, "step": 703 }, { - "epoch": 0.04, - "grad_norm": 0.9069035528958392, - "learning_rate": 1.9994327976156184e-05, - "loss": 0.6049, + "epoch": 0.03, + "grad_norm": 0.9652208499912159, + "learning_rate": 1.9999723263323543e-05, + "loss": 0.4238, "step": 704 }, { - "epoch": 0.04, - "grad_norm": 0.6506383954312657, - "learning_rate": 1.9994265134733862e-05, - "loss": 0.397, + "epoch": 0.03, + "grad_norm": 0.7627714465831604, + "learning_rate": 1.9999712083215465e-05, + "loss": 0.6007, "step": 705 }, { - "epoch": 0.04, - "grad_norm": 0.45557859200370937, - "learning_rate": 1.999420194721057e-05, - "loss": 0.3243, + "epoch": 0.03, + "grad_norm": 0.45046026371794445, + "learning_rate": 1.9999700681723396e-05, + "loss": 0.1914, "step": 706 }, { - "epoch": 0.04, - "grad_norm": 0.48790557123093586, - "learning_rate": 1.999413841358849e-05, - "loss": 0.2508, + "epoch": 0.03, + "grad_norm": 0.7714181503044668, + "learning_rate": 1.9999689058847595e-05, + "loss": 0.3423, "step": 707 }, { - "epoch": 0.04, - "grad_norm": 1.776312246851432, - "learning_rate": 1.999407453386983e-05, - "loss": 0.3304, + "epoch": 0.03, + "grad_norm": 2.076608431677552, + "learning_rate": 1.9999677214588314e-05, + "loss": 0.825, "step": 708 }, { - "epoch": 0.04, - "grad_norm": 0.4977790934365835, - "learning_rate": 1.99940103080568e-05, - "loss": 0.3172, + "epoch": 0.03, + "grad_norm": 0.5633517160121146, + "learning_rate": 1.999966514894582e-05, + "loss": 0.3005, "step": 709 }, { - "epoch": 0.04, - "grad_norm": 0.6252880128089249, - "learning_rate": 1.999394573615162e-05, - "loss": 0.3972, + "epoch": 0.03, + "grad_norm": 0.898140516968242, + "learning_rate": 1.9999652861920374e-05, + "loss": 0.3885, "step": 710 }, { - "epoch": 0.04, - "grad_norm": 1.1498258098687408, - "learning_rate": 1.999388081815653e-05, - "loss": 0.5164, + "epoch": 0.03, + "grad_norm": 0.8505430394287419, + "learning_rate": 1.999964035351225e-05, + "loss": 0.4604, "step": 711 }, { - "epoch": 0.04, - "grad_norm": 0.892056556618054, - "learning_rate": 1.9993815554073778e-05, - "loss": 0.2629, + "epoch": 0.03, + "grad_norm": 0.6266940564645365, + "learning_rate": 1.999962762372173e-05, + "loss": 0.3346, "step": 712 }, { - "epoch": 0.04, - "grad_norm": 1.6940538088084591, - "learning_rate": 1.999374994390562e-05, - "loss": 0.7976, + "epoch": 0.03, + "grad_norm": 0.7517726933464418, + "learning_rate": 1.9999614672549092e-05, + "loss": 0.3314, "step": 713 }, { - "epoch": 0.04, - "grad_norm": 0.500147764955215, - "learning_rate": 1.9993683987654333e-05, - "loss": 0.2667, + "epoch": 0.03, + "grad_norm": 0.531404795184928, + "learning_rate": 1.9999601499994625e-05, + "loss": 0.2808, "step": 714 }, { - "epoch": 0.04, - "grad_norm": 0.560974892337304, - "learning_rate": 1.99936176853222e-05, - "loss": 0.3075, + "epoch": 0.03, + "grad_norm": 0.6691745818531178, + "learning_rate": 1.9999588106058613e-05, + "loss": 0.3373, "step": 715 }, { - "epoch": 0.04, - "grad_norm": 1.183879772416323, - "learning_rate": 1.9993551036911514e-05, - "loss": 0.5875, + "epoch": 0.03, + "grad_norm": 0.8449221358998767, + "learning_rate": 1.999957449074136e-05, + "loss": 0.4503, "step": 716 }, { - "epoch": 0.04, - "grad_norm": 0.5553161207454865, - "learning_rate": 1.9993484042424588e-05, - "loss": 0.3873, + "epoch": 0.03, + "grad_norm": 0.735217362518927, + "learning_rate": 1.999956065404317e-05, + "loss": 0.3327, "step": 717 }, { - "epoch": 0.04, - "grad_norm": 0.4812560176886251, - "learning_rate": 1.9993416701863736e-05, - "loss": 0.2393, + "epoch": 0.03, + "grad_norm": 0.7037397792101784, + "learning_rate": 1.9999546595964338e-05, + "loss": 0.3773, "step": 718 }, { - "epoch": 0.04, - "grad_norm": 1.3210916333620273, - "learning_rate": 1.9993349015231297e-05, - "loss": 0.8372, + "epoch": 0.03, + "grad_norm": 0.6250477789870648, + "learning_rate": 1.9999532316505185e-05, + "loss": 0.3406, "step": 719 }, { - "epoch": 0.04, - "grad_norm": 0.5269059279825339, - "learning_rate": 1.999328098252961e-05, - "loss": 0.2579, + "epoch": 0.03, + "grad_norm": 0.5633685793357558, + "learning_rate": 1.9999517815666027e-05, + "loss": 0.2003, "step": 720 }, { - "epoch": 0.04, - "grad_norm": 0.6285141235409536, - "learning_rate": 1.9993212603761032e-05, - "loss": 0.2367, + "epoch": 0.03, + "grad_norm": 0.5982309218100806, + "learning_rate": 1.999950309344718e-05, + "loss": 0.3029, "step": 721 }, { - "epoch": 0.04, - "grad_norm": 0.564077425147305, - "learning_rate": 1.9993143878927933e-05, - "loss": 0.413, + "epoch": 0.03, + "grad_norm": 0.560957703357082, + "learning_rate": 1.9999488149848972e-05, + "loss": 0.3891, "step": 722 }, { - "epoch": 0.04, - "grad_norm": 1.5407667855885592, - "learning_rate": 1.999307480803269e-05, - "loss": 0.6998, + "epoch": 0.03, + "grad_norm": 1.0400463559915587, + "learning_rate": 1.9999472984871734e-05, + "loss": 0.5104, "step": 723 }, { - "epoch": 0.04, - "grad_norm": 0.44099260765567994, - "learning_rate": 1.9993005391077694e-05, - "loss": 0.2509, + "epoch": 0.03, + "grad_norm": 0.6184426632319019, + "learning_rate": 1.9999457598515798e-05, + "loss": 0.3455, "step": 724 }, { - "epoch": 0.04, - "grad_norm": 0.6939970930509446, - "learning_rate": 1.999293562806535e-05, - "loss": 0.4579, + "epoch": 0.03, + "grad_norm": 0.857855399864822, + "learning_rate": 1.9999441990781515e-05, + "loss": 0.4999, "step": 725 }, { - "epoch": 0.04, - "grad_norm": 0.5590999653786924, - "learning_rate": 1.999286551899808e-05, - "loss": 0.2503, + "epoch": 0.03, + "grad_norm": 0.470679232366824, + "learning_rate": 1.999942616166922e-05, + "loss": 0.3035, "step": 726 }, { - "epoch": 0.04, - "grad_norm": 0.4594339329962618, - "learning_rate": 1.9992795063878304e-05, - "loss": 0.2918, + "epoch": 0.03, + "grad_norm": 0.4450416279979177, + "learning_rate": 1.999941011117927e-05, + "loss": 0.2428, "step": 727 }, { - "epoch": 0.04, - "grad_norm": 0.6897597721436182, - "learning_rate": 1.9992724262708466e-05, - "loss": 0.5618, + "epoch": 0.03, + "grad_norm": 0.8144791238556535, + "learning_rate": 1.9999393839312016e-05, + "loss": 0.4535, "step": 728 }, { - "epoch": 0.04, - "grad_norm": 0.6934232532251842, - "learning_rate": 1.9992653115491015e-05, - "loss": 0.4563, + "epoch": 0.03, + "grad_norm": 1.332367753540305, + "learning_rate": 1.999937734606782e-05, + "loss": 0.638, "step": 729 }, { - "epoch": 0.04, - "grad_norm": 0.457017970343513, - "learning_rate": 1.9992581622228418e-05, - "loss": 0.3187, + "epoch": 0.03, + "grad_norm": 0.5830639493935122, + "learning_rate": 1.9999360631447044e-05, + "loss": 0.2595, "step": 730 }, { - "epoch": 0.04, - "grad_norm": 1.331623318724993, - "learning_rate": 1.9992509782923148e-05, - "loss": 0.5542, + "epoch": 0.03, + "grad_norm": 0.7894196025658753, + "learning_rate": 1.9999343695450063e-05, + "loss": 0.4279, "step": 731 }, { - "epoch": 0.04, - "grad_norm": 0.5444148013267605, - "learning_rate": 1.9992437597577692e-05, - "loss": 0.3955, + "epoch": 0.03, + "grad_norm": 0.43328149280680284, + "learning_rate": 1.9999326538077254e-05, + "loss": 0.2092, "step": 732 }, { - "epoch": 0.04, - "grad_norm": 0.4182804082754334, - "learning_rate": 1.9992365066194554e-05, - "loss": 0.3364, + "epoch": 0.03, + "grad_norm": 0.9174107851471719, + "learning_rate": 1.999930915932899e-05, + "loss": 0.3295, "step": 733 }, { - "epoch": 0.04, - "grad_norm": 0.4630481339095031, - "learning_rate": 1.9992292188776244e-05, - "loss": 0.2866, + "epoch": 0.03, + "grad_norm": 0.9850803191033781, + "learning_rate": 1.999929155920566e-05, + "loss": 0.4172, "step": 734 }, { - "epoch": 0.04, - "grad_norm": 1.576845422602978, - "learning_rate": 1.9992218965325283e-05, - "loss": 0.6871, + "epoch": 0.03, + "grad_norm": 0.9405791870277539, + "learning_rate": 1.9999273737707648e-05, + "loss": 0.485, "step": 735 }, { - "epoch": 0.04, - "grad_norm": 0.8118373704300686, - "learning_rate": 1.999214539584421e-05, - "loss": 0.369, + "epoch": 0.03, + "grad_norm": 1.298388079580774, + "learning_rate": 1.9999255694835357e-05, + "loss": 0.5279, "step": 736 }, { - "epoch": 0.04, - "grad_norm": 0.5701701832074956, - "learning_rate": 1.9992071480335574e-05, - "loss": 0.3343, + "epoch": 0.03, + "grad_norm": 0.6531127356655981, + "learning_rate": 1.999923743058918e-05, + "loss": 0.29, "step": 737 }, { - "epoch": 0.04, - "grad_norm": 0.592621937478156, - "learning_rate": 1.9991997218801925e-05, - "loss": 0.4622, + "epoch": 0.03, + "grad_norm": 0.5994652674352005, + "learning_rate": 1.9999218944969528e-05, + "loss": 0.3155, "step": 738 }, { - "epoch": 0.04, - "grad_norm": 0.5980367247552338, - "learning_rate": 1.999192261124585e-05, - "loss": 0.3063, + "epoch": 0.03, + "grad_norm": 0.9980673029573037, + "learning_rate": 1.99992002379768e-05, + "loss": 0.4091, "step": 739 }, { - "epoch": 0.04, - "grad_norm": 0.40888204345849005, - "learning_rate": 1.9991847657669922e-05, - "loss": 0.2738, + "epoch": 0.03, + "grad_norm": 0.6376066531632776, + "learning_rate": 1.999918130961142e-05, + "loss": 0.2926, "step": 740 }, { - "epoch": 0.04, - "grad_norm": 0.6537608743254626, - "learning_rate": 1.9991772358076738e-05, - "loss": 0.3047, + "epoch": 0.03, + "grad_norm": 2.343414190770355, + "learning_rate": 1.9999162159873802e-05, + "loss": 0.6812, "step": 741 }, { - "epoch": 0.04, - "grad_norm": 0.5674094793046328, - "learning_rate": 1.999169671246891e-05, - "loss": 0.3443, + "epoch": 0.03, + "grad_norm": 0.5565838678048582, + "learning_rate": 1.9999142788764373e-05, + "loss": 0.3333, "step": 742 }, { - "epoch": 0.04, - "grad_norm": 0.6513678869487316, - "learning_rate": 1.999162072084905e-05, - "loss": 0.4706, + "epoch": 0.03, + "grad_norm": 0.7311130707357333, + "learning_rate": 1.9999123196283557e-05, + "loss": 0.3241, "step": 743 }, { - "epoch": 0.04, - "grad_norm": 0.7395606121601905, - "learning_rate": 1.9991544383219796e-05, - "loss": 0.4724, + "epoch": 0.03, + "grad_norm": 1.2046386666563171, + "learning_rate": 1.999910338243179e-05, + "loss": 0.5745, "step": 744 }, { - "epoch": 0.04, - "grad_norm": 0.4922112233983982, - "learning_rate": 1.999146769958379e-05, - "loss": 0.312, + "epoch": 0.03, + "grad_norm": 0.6964171700147567, + "learning_rate": 1.9999083347209515e-05, + "loss": 0.4752, "step": 745 }, { - "epoch": 0.04, - "grad_norm": 0.4493305840997312, - "learning_rate": 1.999139066994369e-05, - "loss": 0.2616, + "epoch": 0.03, + "grad_norm": 0.43233961864693127, + "learning_rate": 1.999906309061717e-05, + "loss": 0.2196, "step": 746 }, { - "epoch": 0.04, - "grad_norm": 1.197209300931008, - "learning_rate": 1.9991313294302155e-05, - "loss": 0.4788, + "epoch": 0.03, + "grad_norm": 0.5324720210341239, + "learning_rate": 1.9999042612655205e-05, + "loss": 0.3211, "step": 747 }, { - "epoch": 0.04, - "grad_norm": 0.4994103566217086, - "learning_rate": 1.9991235572661874e-05, - "loss": 0.3094, + "epoch": 0.03, + "grad_norm": 0.899735971909227, + "learning_rate": 1.9999021913324074e-05, + "loss": 0.4005, "step": 748 }, { - "epoch": 0.04, - "grad_norm": 1.6447067429291433, - "learning_rate": 1.9991157505025534e-05, - "loss": 0.9033, + "epoch": 0.03, + "grad_norm": 0.7035874211150875, + "learning_rate": 1.9999000992624236e-05, + "loss": 0.3465, "step": 749 }, { - "epoch": 0.04, - "grad_norm": 0.5606319354875383, - "learning_rate": 1.999107909139584e-05, - "loss": 0.3895, + "epoch": 0.03, + "grad_norm": 0.6538429942639787, + "learning_rate": 1.9998979850556152e-05, + "loss": 0.3829, "step": 750 }, { - "epoch": 0.04, - "grad_norm": 0.5734727183073857, - "learning_rate": 1.9991000331775506e-05, - "loss": 0.3423, + "epoch": 0.03, + "grad_norm": 0.6407790926645832, + "learning_rate": 1.9998958487120294e-05, + "loss": 0.3873, "step": 751 }, { - "epoch": 0.04, - "grad_norm": 0.569467318382504, - "learning_rate": 1.999092122616726e-05, - "loss": 0.2506, + "epoch": 0.03, + "grad_norm": 0.47937420104128425, + "learning_rate": 1.999893690231713e-05, + "loss": 0.2545, "step": 752 }, { - "epoch": 0.04, - "grad_norm": 0.5087244938226237, - "learning_rate": 1.9990841774573843e-05, - "loss": 0.331, + "epoch": 0.03, + "grad_norm": 0.463671622172868, + "learning_rate": 1.9998915096147137e-05, + "loss": 0.2495, "step": 753 }, { - "epoch": 0.04, - "grad_norm": 0.34419700613207677, - "learning_rate": 1.9990761976998e-05, - "loss": 0.0829, + "epoch": 0.03, + "grad_norm": 0.6910165827091868, + "learning_rate": 1.9998893068610804e-05, + "loss": 0.3842, "step": 754 }, { - "epoch": 0.04, - "grad_norm": 0.5802236112684132, - "learning_rate": 1.9990681833442503e-05, - "loss": 0.3881, + "epoch": 0.03, + "grad_norm": 0.6016532613526538, + "learning_rate": 1.9998870819708617e-05, + "loss": 0.344, "step": 755 }, { - "epoch": 0.04, - "grad_norm": 0.9103632693548557, - "learning_rate": 1.9990601343910126e-05, - "loss": 0.6007, + "epoch": 0.03, + "grad_norm": 0.977890154867551, + "learning_rate": 1.999884834944106e-05, + "loss": 0.5149, "step": 756 }, { - "epoch": 0.04, - "grad_norm": 0.45808385012447494, - "learning_rate": 1.999052050840365e-05, - "loss": 0.2621, + "epoch": 0.03, + "grad_norm": 1.0264565966166421, + "learning_rate": 1.9998825657808647e-05, + "loss": 0.4745, "step": 757 }, { - "epoch": 0.04, - "grad_norm": 0.4230696341008969, - "learning_rate": 1.9990439326925882e-05, - "loss": 0.2886, + "epoch": 0.03, + "grad_norm": 0.5712145317691718, + "learning_rate": 1.9998802744811867e-05, + "loss": 0.3134, "step": 758 }, { - "epoch": 0.04, - "grad_norm": 0.78737798009791, - "learning_rate": 1.9990357799479626e-05, - "loss": 0.342, + "epoch": 0.03, + "grad_norm": 0.39939493805463433, + "learning_rate": 1.9998779610451232e-05, + "loss": 0.1477, "step": 759 }, { - "epoch": 0.04, - "grad_norm": 0.5573592556824364, - "learning_rate": 1.9990275926067713e-05, - "loss": 0.2124, + "epoch": 0.03, + "grad_norm": 0.8016835113226563, + "learning_rate": 1.999875625472725e-05, + "loss": 0.4425, "step": 760 }, { - "epoch": 0.04, - "grad_norm": 0.6546682929758058, - "learning_rate": 1.9990193706692972e-05, - "loss": 0.3749, + "epoch": 0.03, + "grad_norm": 0.6525164426236323, + "learning_rate": 1.9998732677640445e-05, + "loss": 0.4001, "step": 761 }, { "epoch": 0.04, - "grad_norm": 1.533380714290272, - "learning_rate": 1.9990111141358252e-05, - "loss": 0.6221, + "grad_norm": 0.5765724325576432, + "learning_rate": 1.9998708879191336e-05, + "loss": 0.3834, "step": 762 }, { "epoch": 0.04, - "grad_norm": 0.5379176222315797, - "learning_rate": 1.9990028230066413e-05, - "loss": 0.2695, + "grad_norm": 0.6221839046652744, + "learning_rate": 1.999868485938045e-05, + "loss": 0.2987, "step": 763 }, { "epoch": 0.04, - "grad_norm": 0.4715225267416928, - "learning_rate": 1.998994497282033e-05, - "loss": 0.252, + "grad_norm": 0.4736990471163756, + "learning_rate": 1.999866061820831e-05, + "loss": 0.2708, "step": 764 }, { "epoch": 0.04, - "grad_norm": 0.4811990499682349, - "learning_rate": 1.9989861369622877e-05, - "loss": 0.2896, + "grad_norm": 0.7464858462524168, + "learning_rate": 1.9998636155675467e-05, + "loss": 0.3757, "step": 765 }, { "epoch": 0.04, - "grad_norm": 0.886555411853447, - "learning_rate": 1.9989777420476954e-05, - "loss": 0.4155, + "grad_norm": 0.52752991918669, + "learning_rate": 1.999861147178246e-05, + "loss": 0.2853, "step": 766 }, { "epoch": 0.04, - "grad_norm": 0.9790462773253676, - "learning_rate": 1.998969312538547e-05, - "loss": 0.3598, + "grad_norm": 0.6628785333272043, + "learning_rate": 1.9998586566529825e-05, + "loss": 0.3721, "step": 767 }, { "epoch": 0.04, - "grad_norm": 2.1859233395253255, - "learning_rate": 1.9989608484351343e-05, - "loss": 0.676, + "grad_norm": 1.0671708414897525, + "learning_rate": 1.9998561439918123e-05, + "loss": 0.5772, "step": 768 }, { "epoch": 0.04, - "grad_norm": 0.5154902003353229, - "learning_rate": 1.9989523497377505e-05, - "loss": 0.3098, + "grad_norm": 0.6821973641096672, + "learning_rate": 1.9998536091947907e-05, + "loss": 0.3511, "step": 769 }, { "epoch": 0.04, - "grad_norm": 0.6483670299556568, - "learning_rate": 1.9989438164466896e-05, - "loss": 0.3439, + "grad_norm": 0.604087790137277, + "learning_rate": 1.999851052261974e-05, + "loss": 0.3212, "step": 770 }, { "epoch": 0.04, - "grad_norm": 0.39362482228028733, - "learning_rate": 1.9989352485622472e-05, - "loss": 0.1467, + "grad_norm": 0.48022168622977923, + "learning_rate": 1.9998484731934185e-05, + "loss": 0.1839, "step": 771 }, { "epoch": 0.04, - "grad_norm": 1.74006668147056, - "learning_rate": 1.9989266460847207e-05, - "loss": 0.4542, + "grad_norm": 1.2865781233443245, + "learning_rate": 1.9998458719891815e-05, + "loss": 0.4076, "step": 772 }, { "epoch": 0.04, - "grad_norm": 0.8813615902844071, - "learning_rate": 1.998918009014407e-05, - "loss": 0.3356, + "grad_norm": 0.660244300049914, + "learning_rate": 1.9998432486493206e-05, + "loss": 0.3894, "step": 773 }, { "epoch": 0.04, - "grad_norm": 1.841448242464731, - "learning_rate": 1.9989093373516053e-05, - "loss": 0.7304, + "grad_norm": 0.5950558360000362, + "learning_rate": 1.999840603173894e-05, + "loss": 0.4217, "step": 774 }, { "epoch": 0.04, - "grad_norm": 1.0736502504903374, - "learning_rate": 1.9989006310966162e-05, - "loss": 0.4557, + "grad_norm": 2.08724364052135, + "learning_rate": 1.99983793556296e-05, + "loss": 0.7936, "step": 775 }, { "epoch": 0.04, - "grad_norm": 0.5013227982869481, - "learning_rate": 1.9988918902497417e-05, - "loss": 0.2518, + "grad_norm": 0.5568481907840251, + "learning_rate": 1.9998352458165776e-05, + "loss": 0.2852, "step": 776 }, { "epoch": 0.04, - "grad_norm": 0.5610738984393089, - "learning_rate": 1.998883114811284e-05, - "loss": 0.3094, + "grad_norm": 0.48050320675218167, + "learning_rate": 1.9998325339348066e-05, + "loss": 0.2171, "step": 777 }, { "epoch": 0.04, - "grad_norm": 1.8664247677141819, - "learning_rate": 1.9988743047815465e-05, - "loss": 0.4851, + "grad_norm": 0.7895863179300946, + "learning_rate": 1.999829799917707e-05, + "loss": 0.4113, "step": 778 }, { "epoch": 0.04, - "grad_norm": 1.1921957110147678, - "learning_rate": 1.9988654601608354e-05, - "loss": 0.4159, + "grad_norm": 0.5901676127366048, + "learning_rate": 1.9998270437653392e-05, + "loss": 0.316, "step": 779 }, { "epoch": 0.04, - "grad_norm": 1.8570412985712648, - "learning_rate": 1.998856580949456e-05, - "loss": 0.6539, + "grad_norm": 2.0261322048310695, + "learning_rate": 1.9998242654777643e-05, + "loss": 0.8467, "step": 780 }, { "epoch": 0.04, - "grad_norm": 0.5304787388263474, - "learning_rate": 1.998847667147716e-05, - "loss": 0.3169, + "grad_norm": 0.5924618213299074, + "learning_rate": 1.9998214650550437e-05, + "loss": 0.3668, "step": 781 }, { "epoch": 0.04, - "grad_norm": 0.6695906793491527, - "learning_rate": 1.9988387187559245e-05, - "loss": 0.3891, + "grad_norm": 0.5018233916959908, + "learning_rate": 1.9998186424972397e-05, + "loss": 0.2948, "step": 782 }, { "epoch": 0.04, - "grad_norm": 0.4991033760441603, - "learning_rate": 1.9988297357743906e-05, - "loss": 0.1626, + "grad_norm": 0.43261885864187866, + "learning_rate": 1.9998157978044147e-05, + "loss": 0.1883, "step": 783 }, { - "epoch": 0.05, - "grad_norm": 1.6294509961056738, - "learning_rate": 1.9988207182034264e-05, - "loss": 0.4898, + "epoch": 0.04, + "grad_norm": 1.1660603663169706, + "learning_rate": 1.9998129309766315e-05, + "loss": 0.4999, "step": 784 }, { - "epoch": 0.05, - "grad_norm": 0.7317674592716401, - "learning_rate": 1.9988116660433435e-05, - "loss": 0.3378, + "epoch": 0.04, + "grad_norm": 0.6537275769543484, + "learning_rate": 1.9998100420139534e-05, + "loss": 0.3049, "step": 785 }, { - "epoch": 0.05, - "grad_norm": 1.1238616873997904, - "learning_rate": 1.9988025792944558e-05, - "loss": 0.3753, + "epoch": 0.04, + "grad_norm": 0.6802858286840942, + "learning_rate": 1.9998071309164446e-05, + "loss": 0.3758, "step": 786 }, { - "epoch": 0.05, - "grad_norm": 0.7119062202083866, - "learning_rate": 1.9987934579570776e-05, - "loss": 0.3211, + "epoch": 0.04, + "grad_norm": 1.178637154802895, + "learning_rate": 1.9998041976841698e-05, + "loss": 0.4392, "step": 787 }, { - "epoch": 0.05, - "grad_norm": 0.7562474683264186, - "learning_rate": 1.9987843020315248e-05, - "loss": 0.3854, + "epoch": 0.04, + "grad_norm": 0.5818540616134477, + "learning_rate": 1.9998012423171935e-05, + "loss": 0.3322, "step": 788 }, { - "epoch": 0.05, - "grad_norm": 0.5043603296184589, - "learning_rate": 1.9987751115181147e-05, - "loss": 0.3109, + "epoch": 0.04, + "grad_norm": 0.5056956222475707, + "learning_rate": 1.9997982648155813e-05, + "loss": 0.2565, "step": 789 }, { - "epoch": 0.05, - "grad_norm": 1.0146693352319747, - "learning_rate": 1.9987658864171653e-05, - "loss": 0.5063, + "epoch": 0.04, + "grad_norm": 0.8828440529641679, + "learning_rate": 1.9997952651793994e-05, + "loss": 0.4737, "step": 790 }, { - "epoch": 0.05, - "grad_norm": 0.42337892738407523, - "learning_rate": 1.9987566267289963e-05, - "loss": 0.2544, + "epoch": 0.04, + "grad_norm": 0.5756106810974572, + "learning_rate": 1.9997922434087137e-05, + "loss": 0.3312, "step": 791 }, { - "epoch": 0.05, - "grad_norm": 1.2475416323106774, - "learning_rate": 1.9987473324539285e-05, - "loss": 0.7866, + "epoch": 0.04, + "grad_norm": 1.5897186352977308, + "learning_rate": 1.9997891995035914e-05, + "loss": 0.6355, "step": 792 }, { - "epoch": 0.05, - "grad_norm": 0.7025050670031587, - "learning_rate": 1.998738003592283e-05, - "loss": 0.3059, + "epoch": 0.04, + "grad_norm": 0.5499526164154648, + "learning_rate": 1.9997861334640997e-05, + "loss": 0.4087, "step": 793 }, { - "epoch": 0.05, - "grad_norm": 0.5384616703375238, - "learning_rate": 1.9987286401443838e-05, - "loss": 0.3314, + "epoch": 0.04, + "grad_norm": 0.5433584967866018, + "learning_rate": 1.999783045290307e-05, + "loss": 0.3033, "step": 794 }, { - "epoch": 0.05, - "grad_norm": 1.0796972648685363, - "learning_rate": 1.9987192421105546e-05, - "loss": 0.5532, + "epoch": 0.04, + "grad_norm": 0.475453192226817, + "learning_rate": 1.9997799349822812e-05, + "loss": 0.1851, "step": 795 }, { - "epoch": 0.05, - "grad_norm": 0.34313687983856905, - "learning_rate": 1.998709809491121e-05, - "loss": 0.1962, + "epoch": 0.04, + "grad_norm": 0.7651960299450754, + "learning_rate": 1.9997768025400908e-05, + "loss": 0.4275, "step": 796 }, { - "epoch": 0.05, - "grad_norm": 0.3985139635769342, - "learning_rate": 1.9987003422864094e-05, - "loss": 0.2577, + "epoch": 0.04, + "grad_norm": 0.5969652380177083, + "learning_rate": 1.9997736479638063e-05, + "loss": 0.3367, "step": 797 }, { - "epoch": 0.05, - "grad_norm": 1.6243134259537804, - "learning_rate": 1.998690840496748e-05, - "loss": 0.8693, + "epoch": 0.04, + "grad_norm": 0.7123633668305078, + "learning_rate": 1.999770471253496e-05, + "loss": 0.3366, "step": 798 }, { - "epoch": 0.05, - "grad_norm": 0.7373614038171744, - "learning_rate": 1.9986813041224662e-05, - "loss": 0.2976, + "epoch": 0.04, + "grad_norm": 2.303039805237259, + "learning_rate": 1.9997672724092315e-05, + "loss": 0.7682, "step": 799 }, { - "epoch": 0.05, - "grad_norm": 0.6195132227874984, - "learning_rate": 1.9986717331638935e-05, - "loss": 0.3861, + "epoch": 0.04, + "grad_norm": 0.5502241043167116, + "learning_rate": 1.9997640514310832e-05, + "loss": 0.3126, "step": 800 }, { - "epoch": 0.05, - "grad_norm": 0.5595292667728948, - "learning_rate": 1.9986621276213616e-05, - "loss": 0.3883, + "epoch": 0.04, + "grad_norm": 0.9772419010667845, + "learning_rate": 1.999760808319122e-05, + "loss": 0.5978, "step": 801 }, { - "epoch": 0.05, - "grad_norm": 0.5434227894915622, - "learning_rate": 1.998652487495203e-05, - "loss": 0.2684, + "epoch": 0.04, + "grad_norm": 0.4668644488856414, + "learning_rate": 1.999757543073421e-05, + "loss": 0.3053, "step": 802 }, { - "epoch": 0.05, - "grad_norm": 0.4591481879778902, - "learning_rate": 1.998642812785752e-05, - "loss": 0.2694, + "epoch": 0.04, + "grad_norm": 0.6413898020191837, + "learning_rate": 1.9997542556940508e-05, + "loss": 0.3607, "step": 803 }, { - "epoch": 0.05, - "grad_norm": 1.6614380862805616, - "learning_rate": 1.998633103493343e-05, - "loss": 0.8139, + "epoch": 0.04, + "grad_norm": 0.9423226395695657, + "learning_rate": 1.9997509461810848e-05, + "loss": 0.373, "step": 804 }, { - "epoch": 0.05, - "grad_norm": 0.4670262324947827, - "learning_rate": 1.998623359618313e-05, - "loss": 0.3484, + "epoch": 0.04, + "grad_norm": 0.668903962348119, + "learning_rate": 1.999747614534597e-05, + "loss": 0.3338, "step": 805 }, { - "epoch": 0.05, - "grad_norm": 0.564486768316231, - "learning_rate": 1.9986135811609983e-05, - "loss": 0.2648, + "epoch": 0.04, + "grad_norm": 0.604130696686784, + "learning_rate": 1.9997442607546603e-05, + "loss": 0.349, "step": 806 }, { - "epoch": 0.05, - "grad_norm": 1.053932627292119, - "learning_rate": 1.998603768121739e-05, - "loss": 0.5622, + "epoch": 0.04, + "grad_norm": 0.8830409901952293, + "learning_rate": 1.9997408848413494e-05, + "loss": 0.5988, "step": 807 }, { - "epoch": 0.05, - "grad_norm": 0.4672012049877789, - "learning_rate": 1.9985939205008734e-05, - "loss": 0.3168, + "epoch": 0.04, + "grad_norm": 0.5792570557704654, + "learning_rate": 1.9997374867947385e-05, + "loss": 0.2605, "step": 808 }, { - "epoch": 0.05, - "grad_norm": 0.4293014248611022, - "learning_rate": 1.998584038298744e-05, - "loss": 0.2512, + "epoch": 0.04, + "grad_norm": 0.6560120247516701, + "learning_rate": 1.9997340666149036e-05, + "loss": 0.3209, "step": 809 }, { - "epoch": 0.05, - "grad_norm": 0.6436970530457229, - "learning_rate": 1.998574121515692e-05, - "loss": 0.3514, + "epoch": 0.04, + "grad_norm": 0.5655796778710414, + "learning_rate": 1.99973062430192e-05, + "loss": 0.3671, "step": 810 }, { - "epoch": 0.05, - "grad_norm": 1.0177245118126792, - "learning_rate": 1.998564170152061e-05, - "loss": 0.5856, + "epoch": 0.04, + "grad_norm": 0.49596274420367475, + "learning_rate": 1.9997271598558637e-05, + "loss": 0.1698, "step": 811 }, { - "epoch": 0.05, - "grad_norm": 0.45703131510777767, - "learning_rate": 1.9985541842081957e-05, - "loss": 0.3059, + "epoch": 0.04, + "grad_norm": 0.5286044937573018, + "learning_rate": 1.999723673276812e-05, + "loss": 0.3255, "step": 812 }, { - "epoch": 0.05, - "grad_norm": 0.48741084543212293, - "learning_rate": 1.9985441636844424e-05, - "loss": 0.413, + "epoch": 0.04, + "grad_norm": 0.7029184033007397, + "learning_rate": 1.9997201645648413e-05, + "loss": 0.4832, "step": 813 }, { - "epoch": 0.05, - "grad_norm": 1.4244781136880331, - "learning_rate": 1.998534108581147e-05, - "loss": 0.6553, + "epoch": 0.04, + "grad_norm": 0.7475112048108062, + "learning_rate": 1.99971663372003e-05, + "loss": 0.4719, "step": 814 }, { - "epoch": 0.05, - "grad_norm": 0.46494993418916686, - "learning_rate": 1.998524018898659e-05, - "loss": 0.2524, + "epoch": 0.04, + "grad_norm": 0.40384113305517166, + "learning_rate": 1.9997130807424556e-05, + "loss": 0.2039, "step": 815 }, { - "epoch": 0.05, - "grad_norm": 0.5302682621937632, - "learning_rate": 1.9985138946373266e-05, - "loss": 0.3199, + "epoch": 0.04, + "grad_norm": 1.631823826655496, + "learning_rate": 1.9997095056321974e-05, + "loss": 0.8259, "step": 816 }, { - "epoch": 0.05, - "grad_norm": 0.43980037601885463, - "learning_rate": 1.9985037357975013e-05, - "loss": 0.3068, + "epoch": 0.04, + "grad_norm": 0.5845073966189925, + "learning_rate": 1.999705908389334e-05, + "loss": 0.3072, "step": 817 }, { - "epoch": 0.05, - "grad_norm": 0.504431850220822, - "learning_rate": 1.9984935423795345e-05, - "loss": 0.3338, + "epoch": 0.04, + "grad_norm": 0.5392523134790606, + "learning_rate": 1.9997022890139455e-05, + "loss": 0.2682, "step": 818 }, { - "epoch": 0.05, - "grad_norm": 0.8249104939952835, - "learning_rate": 1.998483314383779e-05, - "loss": 0.4644, + "epoch": 0.04, + "grad_norm": 1.0400808956552139, + "learning_rate": 1.999698647506112e-05, + "loss": 0.54, "step": 819 }, { - "epoch": 0.05, - "grad_norm": 0.5695000365147458, - "learning_rate": 1.9984730518105897e-05, - "loss": 0.3879, + "epoch": 0.04, + "grad_norm": 1.5583154274696405, + "learning_rate": 1.999694983865914e-05, + "loss": 0.7755, "step": 820 }, { - "epoch": 0.05, - "grad_norm": 0.4473843111395704, - "learning_rate": 1.9984627546603214e-05, - "loss": 0.3239, + "epoch": 0.04, + "grad_norm": 0.45759376013182856, + "learning_rate": 1.9996912980934326e-05, + "loss": 0.2487, "step": 821 }, { - "epoch": 0.05, - "grad_norm": 0.3833543869741045, - "learning_rate": 1.9984524229333307e-05, - "loss": 0.1974, + "epoch": 0.04, + "grad_norm": 0.6254106544368976, + "learning_rate": 1.999687590188749e-05, + "loss": 0.4166, "step": 822 }, { - "epoch": 0.05, - "grad_norm": 0.4942323075738801, - "learning_rate": 1.9984420566299756e-05, - "loss": 0.347, + "epoch": 0.04, + "grad_norm": 0.4941280875203359, + "learning_rate": 1.999683860151946e-05, + "loss": 0.2888, "step": 823 }, { - "epoch": 0.05, - "grad_norm": 0.5670416194035222, - "learning_rate": 1.998431655750615e-05, - "loss": 0.3483, + "epoch": 0.04, + "grad_norm": 0.5173581643301941, + "learning_rate": 1.9996801079831057e-05, + "loss": 0.238, "step": 824 }, { - "epoch": 0.05, - "grad_norm": 0.5051832663125587, - "learning_rate": 1.998421220295609e-05, - "loss": 0.3617, + "epoch": 0.04, + "grad_norm": 0.5541502312038257, + "learning_rate": 1.9996763336823112e-05, + "loss": 0.4238, "step": 825 }, { - "epoch": 0.05, - "grad_norm": 1.6905171954763243, - "learning_rate": 1.9984107502653193e-05, - "loss": 0.7626, + "epoch": 0.04, + "grad_norm": 1.6780349291313532, + "learning_rate": 1.9996725372496463e-05, + "loss": 0.7396, "step": 826 }, { - "epoch": 0.05, - "grad_norm": 0.4875542594639041, - "learning_rate": 1.9984002456601082e-05, - "loss": 0.2914, + "epoch": 0.04, + "grad_norm": 0.6125175798532856, + "learning_rate": 1.999668718685195e-05, + "loss": 0.3582, "step": 827 }, { - "epoch": 0.05, - "grad_norm": 0.6393314261217024, - "learning_rate": 1.9983897064803396e-05, - "loss": 0.4494, + "epoch": 0.04, + "grad_norm": 0.327647314937058, + "learning_rate": 1.9996648779890416e-05, + "loss": 0.1781, "step": 828 }, { - "epoch": 0.05, - "grad_norm": 0.5626873081314722, - "learning_rate": 1.9983791327263782e-05, - "loss": 0.4125, + "epoch": 0.04, + "grad_norm": 0.6104878366275504, + "learning_rate": 1.9996610151612716e-05, + "loss": 0.3823, "step": 829 }, { - "epoch": 0.05, - "grad_norm": 0.4740622156552829, - "learning_rate": 1.9983685243985905e-05, - "loss": 0.3176, + "epoch": 0.04, + "grad_norm": 0.5377376482925117, + "learning_rate": 1.99965713020197e-05, + "loss": 0.3828, "step": 830 }, { - "epoch": 0.05, - "grad_norm": 0.45140349812898456, - "learning_rate": 1.9983578814973437e-05, - "loss": 0.2087, + "epoch": 0.04, + "grad_norm": 0.6800835935442078, + "learning_rate": 1.999653223111223e-05, + "loss": 0.4031, "step": 831 }, { - "epoch": 0.05, - "grad_norm": 0.582391697868439, - "learning_rate": 1.9983472040230063e-05, - "loss": 0.3153, + "epoch": 0.04, + "grad_norm": 1.2927429471967855, + "learning_rate": 1.999649293889117e-05, + "loss": 0.7099, "step": 832 }, { - "epoch": 0.05, - "grad_norm": 0.504699200239562, - "learning_rate": 1.998336491975948e-05, - "loss": 0.3052, + "epoch": 0.04, + "grad_norm": 0.5187102319329004, + "learning_rate": 1.999645342535739e-05, + "loss": 0.3049, "step": 833 }, { - "epoch": 0.05, - "grad_norm": 1.6280795918613231, - "learning_rate": 1.9983257453565402e-05, - "loss": 0.8253, + "epoch": 0.04, + "grad_norm": 0.875908360169765, + "learning_rate": 1.9996413690511768e-05, + "loss": 0.345, "step": 834 }, { - "epoch": 0.05, - "grad_norm": 0.538834623854235, - "learning_rate": 1.9983149641651546e-05, - "loss": 0.3547, + "epoch": 0.04, + "grad_norm": 0.6354933385684809, + "learning_rate": 1.9996373734355183e-05, + "loss": 0.3811, "step": 835 }, { - "epoch": 0.05, - "grad_norm": 0.661409036567593, - "learning_rate": 1.998304148402165e-05, - "loss": 0.3122, + "epoch": 0.04, + "grad_norm": 0.5057055711998557, + "learning_rate": 1.9996333556888517e-05, + "loss": 0.3419, "step": 836 }, { - "epoch": 0.05, - "grad_norm": 0.48427368476034816, - "learning_rate": 1.9982932980679455e-05, - "loss": 0.2972, + "epoch": 0.04, + "grad_norm": 0.5339816110697241, + "learning_rate": 1.9996293158112663e-05, + "loss": 0.2469, "step": 837 }, { - "epoch": 0.05, - "grad_norm": 1.8611669536442395, - "learning_rate": 1.998282413162872e-05, - "loss": 0.5054, + "epoch": 0.04, + "grad_norm": 1.5121879087425267, + "learning_rate": 1.999625253802851e-05, + "loss": 0.7203, "step": 838 }, { - "epoch": 0.05, - "grad_norm": 0.5038129864771659, - "learning_rate": 1.9982714936873215e-05, - "loss": 0.3187, + "epoch": 0.04, + "grad_norm": 0.881828144279427, + "learning_rate": 1.999621169663696e-05, + "loss": 0.4695, "step": 839 }, { - "epoch": 0.05, - "grad_norm": 0.6294109039118858, - "learning_rate": 1.998260539641672e-05, - "loss": 0.4348, + "epoch": 0.04, + "grad_norm": 0.7222669613699906, + "learning_rate": 1.9996170633938917e-05, + "loss": 0.3987, "step": 840 }, { - "epoch": 0.05, - "grad_norm": 0.582952653039707, - "learning_rate": 1.998249551026303e-05, - "loss": 0.4443, + "epoch": 0.04, + "grad_norm": 0.955440623602667, + "learning_rate": 1.9996129349935293e-05, + "loss": 0.3968, "step": 841 }, { - "epoch": 0.05, - "grad_norm": 0.5631283283567661, - "learning_rate": 1.998238527841595e-05, - "loss": 0.2381, + "epoch": 0.04, + "grad_norm": 0.832416792512411, + "learning_rate": 1.9996087844627e-05, + "loss": 0.2785, "step": 842 }, { - "epoch": 0.05, - "grad_norm": 0.40488497045596294, - "learning_rate": 1.9982274700879295e-05, - "loss": 0.1833, + "epoch": 0.04, + "grad_norm": 0.3870388775845241, + "learning_rate": 1.9996046118014955e-05, + "loss": 0.2392, "step": 843 }, { - "epoch": 0.05, - "grad_norm": 0.6296325473623197, - "learning_rate": 1.9982163777656902e-05, - "loss": 0.3783, + "epoch": 0.04, + "grad_norm": 1.8022179139572887, + "learning_rate": 1.9996004170100083e-05, + "loss": 0.4769, "step": 844 }, { - "epoch": 0.05, - "grad_norm": 0.5638688872776851, - "learning_rate": 1.9982052508752605e-05, - "loss": 0.2327, + "epoch": 0.04, + "grad_norm": 0.6175786547193028, + "learning_rate": 1.9995962000883312e-05, + "loss": 0.3192, "step": 845 }, { - "epoch": 0.05, - "grad_norm": 0.9837522407153001, - "learning_rate": 1.998194089417025e-05, - "loss": 0.5804, + "epoch": 0.04, + "grad_norm": 0.6220709199747196, + "learning_rate": 1.9995919610365577e-05, + "loss": 0.3886, "step": 846 }, { - "epoch": 0.05, - "grad_norm": 0.8229817506353978, - "learning_rate": 1.9981828933913722e-05, - "loss": 0.5947, + "epoch": 0.04, + "grad_norm": 0.989885066157605, + "learning_rate": 1.999587699854782e-05, + "loss": 0.4966, "step": 847 }, { - "epoch": 0.05, - "grad_norm": 0.5743285905374217, - "learning_rate": 1.9981716627986882e-05, - "loss": 0.2498, + "epoch": 0.04, + "grad_norm": 0.5692009900079642, + "learning_rate": 1.9995834165430975e-05, + "loss": 0.2627, "step": 848 }, { - "epoch": 0.05, - "grad_norm": 0.440183500729955, - "learning_rate": 1.9981603976393625e-05, - "loss": 0.2566, + "epoch": 0.04, + "grad_norm": 0.4848685945627893, + "learning_rate": 1.9995791111016e-05, + "loss": 0.2935, "step": 849 }, { - "epoch": 0.05, - "grad_norm": 1.769652766589286, - "learning_rate": 1.9981490979137853e-05, - "loss": 0.8859, + "epoch": 0.04, + "grad_norm": 2.2832035975787934, + "learning_rate": 1.999574783530384e-05, + "loss": 0.8151, "step": 850 }, { - "epoch": 0.05, - "grad_norm": 0.5989281680159326, - "learning_rate": 1.9981377636223477e-05, - "loss": 0.2521, + "epoch": 0.04, + "grad_norm": 0.7475504853648355, + "learning_rate": 1.9995704338295462e-05, + "loss": 0.2695, "step": 851 }, { - "epoch": 0.05, - "grad_norm": 1.0097503020108294, - "learning_rate": 1.998126394765442e-05, - "loss": 0.4525, + "epoch": 0.04, + "grad_norm": 0.9218909512968196, + "learning_rate": 1.9995660619991817e-05, + "loss": 0.4347, "step": 852 }, { - "epoch": 0.05, - "grad_norm": 0.7673720570657228, - "learning_rate": 1.9981149913434626e-05, - "loss": 0.4724, + "epoch": 0.04, + "grad_norm": 0.7031620247783074, + "learning_rate": 1.9995616680393885e-05, + "loss": 0.4121, "step": 853 }, { - "epoch": 0.05, - "grad_norm": 0.4981975468603891, - "learning_rate": 1.9981035533568035e-05, - "loss": 0.3007, + "epoch": 0.04, + "grad_norm": 0.8582604939052164, + "learning_rate": 1.9995572519502632e-05, + "loss": 0.2955, "step": 854 }, { - "epoch": 0.05, - "grad_norm": 0.340649236428933, - "learning_rate": 1.998092080805862e-05, - "loss": 0.1132, + "epoch": 0.04, + "grad_norm": 0.5399389797209696, + "learning_rate": 1.999552813731904e-05, + "loss": 0.22, "step": 855 }, { - "epoch": 0.05, - "grad_norm": 0.652425187067407, - "learning_rate": 1.9980805736910337e-05, - "loss": 0.4076, + "epoch": 0.04, + "grad_norm": 1.910275505356983, + "learning_rate": 1.9995483533844086e-05, + "loss": 0.7414, "step": 856 }, { - "epoch": 0.05, - "grad_norm": 0.5959681924395248, - "learning_rate": 1.9980690320127188e-05, - "loss": 0.3472, + "epoch": 0.04, + "grad_norm": 0.48641088009816946, + "learning_rate": 1.9995438709078757e-05, + "loss": 0.2598, "step": 857 }, { - "epoch": 0.05, - "grad_norm": 1.4708566945755925, - "learning_rate": 1.998057455771316e-05, - "loss": 0.4343, + "epoch": 0.04, + "grad_norm": 0.7821998840861388, + "learning_rate": 1.9995393663024054e-05, + "loss": 0.421, "step": 858 }, { - "epoch": 0.05, - "grad_norm": 1.6946441906172074, - "learning_rate": 1.9980458449672263e-05, - "loss": 0.7589, + "epoch": 0.04, + "grad_norm": 1.0579596603493202, + "learning_rate": 1.9995348395680968e-05, + "loss": 0.6033, "step": 859 }, { - "epoch": 0.05, - "grad_norm": 0.546655269731215, - "learning_rate": 1.998034199600852e-05, - "loss": 0.3238, + "epoch": 0.04, + "grad_norm": 0.41411602326651864, + "learning_rate": 1.99953029070505e-05, + "loss": 0.0846, "step": 860 }, { - "epoch": 0.05, - "grad_norm": 0.4237690098340433, - "learning_rate": 1.9980225196725964e-05, - "loss": 0.2079, + "epoch": 0.04, + "grad_norm": 0.6336592227158708, + "learning_rate": 1.999525719713366e-05, + "loss": 0.2826, "step": 861 }, { - "epoch": 0.05, - "grad_norm": 1.1400927331869855, - "learning_rate": 1.998010805182864e-05, - "loss": 0.5551, + "epoch": 0.04, + "grad_norm": 2.6536167160593815, + "learning_rate": 1.999521126593146e-05, + "loss": 0.7309, "step": 862 }, { - "epoch": 0.05, - "grad_norm": 0.5594124097189067, - "learning_rate": 1.9979990561320597e-05, - "loss": 0.3234, + "epoch": 0.04, + "grad_norm": 0.9089598618310353, + "learning_rate": 1.9995165113444917e-05, + "loss": 0.4851, "step": 863 }, { - "epoch": 0.05, - "grad_norm": 0.8241251910035258, - "learning_rate": 1.9979872725205915e-05, - "loss": 0.3545, + "epoch": 0.04, + "grad_norm": 0.6241762585201575, + "learning_rate": 1.999511873967505e-05, + "loss": 0.3413, "step": 864 }, { - "epoch": 0.05, - "grad_norm": 1.6342272397960347, - "learning_rate": 1.997975454348867e-05, - "loss": 0.7562, + "epoch": 0.04, + "grad_norm": 0.6650235501057201, + "learning_rate": 1.9995072144622888e-05, + "loss": 0.4154, "step": 865 }, { - "epoch": 0.05, - "grad_norm": 0.5442992828218223, - "learning_rate": 1.9979636016172952e-05, - "loss": 0.3479, + "epoch": 0.04, + "grad_norm": 1.1397443913282685, + "learning_rate": 1.999502532828946e-05, + "loss": 0.4706, "step": 866 }, { - "epoch": 0.05, - "grad_norm": 1.0426475164986881, - "learning_rate": 1.9979517143262867e-05, - "loss": 0.2534, + "epoch": 0.04, + "grad_norm": 0.47825851561854876, + "learning_rate": 1.999497829067581e-05, + "loss": 0.2042, "step": 867 }, { - "epoch": 0.05, - "grad_norm": 0.8241038141038709, - "learning_rate": 1.9979397924762537e-05, - "loss": 0.3189, + "epoch": 0.04, + "grad_norm": 2.215742964071151, + "learning_rate": 1.999493103178297e-05, + "loss": 0.7824, "step": 868 }, { - "epoch": 0.05, - "grad_norm": 0.8868676701867368, - "learning_rate": 1.9979278360676082e-05, - "loss": 0.3811, + "epoch": 0.04, + "grad_norm": 0.6332233760929028, + "learning_rate": 1.9994883551611993e-05, + "loss": 0.3492, "step": 869 }, { - "epoch": 0.05, - "grad_norm": 0.8775413046921993, - "learning_rate": 1.9979158451007648e-05, - "loss": 0.4496, + "epoch": 0.04, + "grad_norm": 0.6475903648006387, + "learning_rate": 1.9994835850163926e-05, + "loss": 0.3539, "step": 870 }, { - "epoch": 0.05, - "grad_norm": 1.841188215520176, - "learning_rate": 1.9979038195761386e-05, - "loss": 0.5085, + "epoch": 0.04, + "grad_norm": 1.294760104625491, + "learning_rate": 1.9994787927439825e-05, + "loss": 0.7046, "step": 871 }, { - "epoch": 0.05, - "grad_norm": 0.46194012023312764, - "learning_rate": 1.997891759494146e-05, - "loss": 0.2981, + "epoch": 0.04, + "grad_norm": 0.8632421083751662, + "learning_rate": 1.9994739783440753e-05, + "loss": 0.331, "step": 872 }, { - "epoch": 0.05, - "grad_norm": 0.7829547374288088, - "learning_rate": 1.9978796648552045e-05, - "loss": 0.415, + "epoch": 0.04, + "grad_norm": 0.7451891562684548, + "learning_rate": 1.9994691418167775e-05, + "loss": 0.2965, "step": 873 }, { - "epoch": 0.05, - "grad_norm": 0.7663806220882737, - "learning_rate": 1.9978675356597334e-05, - "loss": 0.2796, + "epoch": 0.04, + "grad_norm": 0.46212658527416045, + "learning_rate": 1.9994642831621964e-05, + "loss": 0.2377, "step": 874 }, { - "epoch": 0.05, - "grad_norm": 0.6505647168387442, - "learning_rate": 1.9978553719081523e-05, - "loss": 0.3577, + "epoch": 0.04, + "grad_norm": 0.9597196637391371, + "learning_rate": 1.999459402380439e-05, + "loss": 0.4885, "step": 875 }, { - "epoch": 0.05, - "grad_norm": 0.6693024697212153, - "learning_rate": 1.997843173600883e-05, - "loss": 0.3511, + "epoch": 0.04, + "grad_norm": 0.6448242288360911, + "learning_rate": 1.999454499471614e-05, + "loss": 0.3554, "step": 876 }, { - "epoch": 0.05, - "grad_norm": 1.4101677187766488, - "learning_rate": 1.997830940738347e-05, - "loss": 0.5603, + "epoch": 0.04, + "grad_norm": 0.619971338954494, + "learning_rate": 1.9994495744358296e-05, + "loss": 0.343, "step": 877 }, { - "epoch": 0.05, - "grad_norm": 0.8793601862533259, - "learning_rate": 1.9978186733209686e-05, - "loss": 0.3397, + "epoch": 0.04, + "grad_norm": 1.0786091152311266, + "learning_rate": 1.999444627273195e-05, + "loss": 0.4976, "step": 878 }, { - "epoch": 0.05, - "grad_norm": 0.587777319443702, - "learning_rate": 1.997806371349172e-05, - "loss": 0.2368, + "epoch": 0.04, + "grad_norm": 0.43500808758377507, + "learning_rate": 1.9994396579838195e-05, + "loss": 0.2842, "step": 879 }, { - "epoch": 0.05, - "grad_norm": 0.5639544073283919, - "learning_rate": 1.9977940348233845e-05, - "loss": 0.4186, + "epoch": 0.04, + "grad_norm": 0.7349777479099476, + "learning_rate": 1.9994346665678133e-05, + "loss": 0.3091, "step": 880 }, { - "epoch": 0.05, - "grad_norm": 0.7457185631167834, - "learning_rate": 1.997781663744032e-05, - "loss": 0.2677, + "epoch": 0.04, + "grad_norm": 1.1785563316895715, + "learning_rate": 1.999429653025287e-05, + "loss": 0.4329, "step": 881 }, { - "epoch": 0.05, - "grad_norm": 0.6447444340438591, - "learning_rate": 1.9977692581115436e-05, - "loss": 0.3951, + "epoch": 0.04, + "grad_norm": 0.5816170744190584, + "learning_rate": 1.999424617356351e-05, + "loss": 0.3438, "step": 882 }, { - "epoch": 0.05, - "grad_norm": 1.6502688235687761, - "learning_rate": 1.9977568179263484e-05, - "loss": 0.8296, + "epoch": 0.04, + "grad_norm": 1.2066082102731222, + "learning_rate": 1.9994195595611175e-05, + "loss": 0.4574, "step": 883 }, { - "epoch": 0.05, - "grad_norm": 0.4514393605711949, - "learning_rate": 1.9977443431888778e-05, - "loss": 0.2656, + "epoch": 0.04, + "grad_norm": 0.6383000046702992, + "learning_rate": 1.9994144796396985e-05, + "loss": 0.3347, "step": 884 }, { - "epoch": 0.05, - "grad_norm": 1.0553305711117273, - "learning_rate": 1.9977318338995632e-05, - "loss": 0.4572, + "epoch": 0.04, + "grad_norm": 0.5339145127830404, + "learning_rate": 1.9994093775922058e-05, + "loss": 0.341, "step": 885 }, { - "epoch": 0.05, - "grad_norm": 0.8407200121591809, - "learning_rate": 1.9977192900588385e-05, - "loss": 0.5764, + "epoch": 0.04, + "grad_norm": 0.689674617401782, + "learning_rate": 1.999404253418753e-05, + "loss": 0.2249, "step": 886 }, { - "epoch": 0.05, - "grad_norm": 0.47881812578845484, - "learning_rate": 1.9977067116671374e-05, - "loss": 0.1567, + "epoch": 0.04, + "grad_norm": 0.9687013764004975, + "learning_rate": 1.999399107119453e-05, + "loss": 0.4649, "step": 887 }, { - "epoch": 0.05, - "grad_norm": 0.4873118349825849, - "learning_rate": 1.9976940987248956e-05, + "epoch": 0.04, + "grad_norm": 0.7504387593965892, + "learning_rate": 1.9993939386944198e-05, "loss": 0.3673, "step": 888 }, { - "epoch": 0.05, - "grad_norm": 1.4079005001201457, - "learning_rate": 1.9976814512325503e-05, - "loss": 0.8054, + "epoch": 0.04, + "grad_norm": 0.6970812273061224, + "learning_rate": 1.9993887481437684e-05, + "loss": 0.3862, "step": 889 }, { - "epoch": 0.05, - "grad_norm": 0.5093928927192002, - "learning_rate": 1.9976687691905394e-05, - "loss": 0.2287, + "epoch": 0.04, + "grad_norm": 0.8115876633920088, + "learning_rate": 1.999383535467613e-05, + "loss": 0.0776, "step": 890 }, { - "epoch": 0.05, - "grad_norm": 0.7518705232066628, - "learning_rate": 1.9976560525993015e-05, - "loss": 0.4441, + "epoch": 0.04, + "grad_norm": 0.7432659082859107, + "learning_rate": 1.99937830066607e-05, + "loss": 0.3653, "step": 891 }, { - "epoch": 0.05, - "grad_norm": 0.6356126660781017, - "learning_rate": 1.9976433014592776e-05, - "loss": 0.4113, + "epoch": 0.04, + "grad_norm": 0.5814620025765047, + "learning_rate": 1.999373043739254e-05, + "loss": 0.3523, "step": 892 }, { - "epoch": 0.05, - "grad_norm": 0.52789164129456, - "learning_rate": 1.9976305157709092e-05, - "loss": 0.2405, + "epoch": 0.04, + "grad_norm": 0.49674721409289674, + "learning_rate": 1.9993677646872826e-05, + "loss": 0.2556, "step": 893 }, { - "epoch": 0.05, - "grad_norm": 0.6451124335109072, - "learning_rate": 1.9976176955346392e-05, - "loss": 0.3365, + "epoch": 0.04, + "grad_norm": 0.6177008259762825, + "learning_rate": 1.9993624635102712e-05, + "loss": 0.362, "step": 894 }, { - "epoch": 0.05, - "grad_norm": 0.4228472921247635, - "learning_rate": 1.9976048407509107e-05, - "loss": 0.3027, + "epoch": 0.04, + "grad_norm": 1.5699728279519556, + "learning_rate": 1.9993571402083388e-05, + "loss": 0.7655, "step": 895 }, { - "epoch": 0.05, - "grad_norm": 0.5781676443680558, - "learning_rate": 1.99759195142017e-05, - "loss": 0.3938, + "epoch": 0.04, + "grad_norm": 0.5942911656256071, + "learning_rate": 1.9993517947816025e-05, + "loss": 0.3037, "step": 896 }, { - "epoch": 0.05, - "grad_norm": 0.5641365163135668, - "learning_rate": 1.9975790275428625e-05, - "loss": 0.3254, + "epoch": 0.04, + "grad_norm": 0.6175935867396398, + "learning_rate": 1.9993464272301803e-05, + "loss": 0.2869, "step": 897 }, { - "epoch": 0.05, - "grad_norm": 0.5717025160344602, - "learning_rate": 1.9975660691194365e-05, - "loss": 0.4194, + "epoch": 0.04, + "grad_norm": 0.7461719565078261, + "learning_rate": 1.9993410375541915e-05, + "loss": 0.5671, "step": 898 }, { - "epoch": 0.05, - "grad_norm": 1.0582116646632875, - "learning_rate": 1.99755307615034e-05, - "loss": 0.5222, + "epoch": 0.04, + "grad_norm": 0.4062962453943622, + "learning_rate": 1.9993356257537556e-05, + "loss": 0.1353, "step": 899 }, { - "epoch": 0.05, - "grad_norm": 0.4722208730822841, - "learning_rate": 1.997540048636024e-05, - "loss": 0.2897, + "epoch": 0.04, + "grad_norm": 0.6205281397746988, + "learning_rate": 1.9993301918289916e-05, + "loss": 0.3416, "step": 900 }, { - "epoch": 0.05, - "grad_norm": 0.6038363034287668, - "learning_rate": 1.997526986576938e-05, - "loss": 0.3378, + "epoch": 0.04, + "grad_norm": 0.5543827196612083, + "learning_rate": 1.9993247357800207e-05, + "loss": 0.3173, "step": 901 }, { - "epoch": 0.05, - "grad_norm": 0.5519184370509896, - "learning_rate": 1.9975138899735366e-05, - "loss": 0.3994, + "epoch": 0.04, + "grad_norm": 1.3417071420297146, + "learning_rate": 1.999319257606963e-05, + "loss": 0.6472, "step": 902 }, { - "epoch": 0.05, - "grad_norm": 0.4720499415776914, - "learning_rate": 1.9975007588262715e-05, - "loss": 0.3905, + "epoch": 0.04, + "grad_norm": 0.6164035518734712, + "learning_rate": 1.9993137573099403e-05, + "loss": 0.3073, "step": 903 }, { - "epoch": 0.05, - "grad_norm": 0.5569224560379596, - "learning_rate": 1.9974875931355977e-05, - "loss": 0.3047, + "epoch": 0.04, + "grad_norm": 0.6895871347085274, + "learning_rate": 1.999308234889074e-05, + "loss": 0.4406, "step": 904 }, { - "epoch": 0.05, - "grad_norm": 0.7713492034663683, - "learning_rate": 1.9974743929019717e-05, - "loss": 0.3841, + "epoch": 0.04, + "grad_norm": 0.73236998569119, + "learning_rate": 1.9993026903444868e-05, + "loss": 0.4064, "step": 905 }, { - "epoch": 0.05, - "grad_norm": 0.5967554524752199, - "learning_rate": 1.99746115812585e-05, - "loss": 0.3936, + "epoch": 0.04, + "grad_norm": 0.516429519223869, + "learning_rate": 1.9992971236763012e-05, + "loss": 0.2289, "step": 906 }, { - "epoch": 0.05, - "grad_norm": 0.4020352157362454, - "learning_rate": 1.997447888807692e-05, - "loss": 0.1904, + "epoch": 0.04, + "grad_norm": 0.41154720105171755, + "learning_rate": 1.9992915348846403e-05, + "loss": 0.1912, "step": 907 }, { - "epoch": 0.05, - "grad_norm": 0.4793739647988955, - "learning_rate": 1.997434584947956e-05, - "loss": 0.3606, + "epoch": 0.04, + "grad_norm": 0.6310419490572622, + "learning_rate": 1.9992859239696278e-05, + "loss": 0.3594, "step": 908 }, { - "epoch": 0.05, - "grad_norm": 0.5324891885968183, - "learning_rate": 1.9974212465471037e-05, - "loss": 0.3638, + "epoch": 0.04, + "grad_norm": 0.5160161500729866, + "learning_rate": 1.9992802909313882e-05, + "loss": 0.2982, "step": 909 }, { - "epoch": 0.05, - "grad_norm": 0.711921743120288, - "learning_rate": 1.9974078736055963e-05, - "loss": 0.4732, + "epoch": 0.04, + "grad_norm": 0.9306496601334961, + "learning_rate": 1.999274635770046e-05, + "loss": 0.5419, "step": 910 }, { - "epoch": 0.05, - "grad_norm": 0.4754960806126236, - "learning_rate": 1.997394466123897e-05, - "loss": 0.3441, + "epoch": 0.04, + "grad_norm": 0.8721081683280697, + "learning_rate": 1.999268958485727e-05, + "loss": 0.5229, "step": 911 }, { - "epoch": 0.05, - "grad_norm": 0.4828296923573561, - "learning_rate": 1.99738102410247e-05, - "loss": 0.3559, + "epoch": 0.04, + "grad_norm": 0.5658151351013206, + "learning_rate": 1.999263259078556e-05, + "loss": 0.2608, "step": 912 }, { - "epoch": 0.05, - "grad_norm": 0.4723973954273907, - "learning_rate": 1.9973675475417814e-05, - "loss": 0.1828, + "epoch": 0.04, + "grad_norm": 0.37172475684252704, + "learning_rate": 1.9992575375486592e-05, + "loss": 0.2807, "step": 913 }, { - "epoch": 0.05, - "grad_norm": 0.9490921519164291, - "learning_rate": 1.9973540364422973e-05, - "loss": 0.4723, + "epoch": 0.04, + "grad_norm": 1.0857608129906875, + "learning_rate": 1.9992517938961638e-05, + "loss": 0.4929, "step": 914 }, { - "epoch": 0.05, - "grad_norm": 0.5314162635369581, - "learning_rate": 1.997340490804486e-05, - "loss": 0.3647, + "epoch": 0.04, + "grad_norm": 0.5245775587465598, + "learning_rate": 1.9992460281211966e-05, + "loss": 0.3234, "step": 915 }, { - "epoch": 0.05, - "grad_norm": 0.48861096170235063, - "learning_rate": 1.9973269106288163e-05, - "loss": 0.387, + "epoch": 0.04, + "grad_norm": 0.4792296883052154, + "learning_rate": 1.9992402402238858e-05, + "loss": 0.3239, "step": 916 }, { - "epoch": 0.05, - "grad_norm": 0.635391914689005, - "learning_rate": 1.997313295915759e-05, - "loss": 0.3044, + "epoch": 0.04, + "grad_norm": 1.3727946687334898, + "learning_rate": 1.999234430204359e-05, + "loss": 0.7596, "step": 917 }, { - "epoch": 0.05, - "grad_norm": 0.46713708953393285, - "learning_rate": 1.9972996466657846e-05, - "loss": 0.3306, + "epoch": 0.04, + "grad_norm": 0.5324991095707883, + "learning_rate": 1.9992285980627452e-05, + "loss": 0.3098, "step": 918 }, { - "epoch": 0.05, - "grad_norm": 0.478474730302629, - "learning_rate": 1.9972859628793663e-05, - "loss": 0.3477, + "epoch": 0.04, + "grad_norm": 0.40234298372205257, + "learning_rate": 1.999222743799173e-05, + "loss": 0.1326, "step": 919 }, { - "epoch": 0.05, - "grad_norm": 0.8346975964677359, - "learning_rate": 1.9972722445569782e-05, - "loss": 0.3668, + "epoch": 0.04, + "grad_norm": 0.6719099559651767, + "learning_rate": 1.9992168674137724e-05, + "loss": 0.4133, "step": 920 }, { - "epoch": 0.05, - "grad_norm": 0.44496567184720826, - "learning_rate": 1.997258491699095e-05, - "loss": 0.2714, + "epoch": 0.04, + "grad_norm": 0.5362409628455475, + "learning_rate": 1.9992109689066733e-05, + "loss": 0.3546, "step": 921 }, { - "epoch": 0.05, - "grad_norm": 1.387562812852257, - "learning_rate": 1.9972447043061933e-05, - "loss": 0.7089, + "epoch": 0.04, + "grad_norm": 0.9298121537431454, + "learning_rate": 1.9992050482780067e-05, + "loss": 0.4538, "step": 922 }, { - "epoch": 0.05, - "grad_norm": 0.4650130501105177, - "learning_rate": 1.9972308823787504e-05, - "loss": 0.2542, + "epoch": 0.04, + "grad_norm": 1.530785493231479, + "learning_rate": 1.9991991055279033e-05, + "loss": 0.8836, "step": 923 }, { - "epoch": 0.05, - "grad_norm": 0.49044841015158935, - "learning_rate": 1.9972170259172444e-05, - "loss": 0.3412, + "epoch": 0.04, + "grad_norm": 0.5833791293185981, + "learning_rate": 1.9991931406564944e-05, + "loss": 0.3268, "step": 924 }, { - "epoch": 0.05, - "grad_norm": 0.9751351988159547, - "learning_rate": 1.9972031349221563e-05, - "loss": 0.585, + "epoch": 0.04, + "grad_norm": 0.4364867901706965, + "learning_rate": 1.9991871536639128e-05, + "loss": 0.2026, "step": 925 }, { - "epoch": 0.05, - "grad_norm": 0.4007795809187631, - "learning_rate": 1.9971892093939663e-05, - "loss": 0.2822, + "epoch": 0.04, + "grad_norm": 1.105569502819989, + "learning_rate": 1.9991811445502905e-05, + "loss": 0.5508, "step": 926 }, { - "epoch": 0.05, - "grad_norm": 0.4016861009078226, - "learning_rate": 1.9971752493331568e-05, - "loss": 0.2736, + "epoch": 0.04, + "grad_norm": 0.5787875579017364, + "learning_rate": 1.9991751133157608e-05, + "loss": 0.3206, "step": 927 }, { - "epoch": 0.05, - "grad_norm": 0.5568432011062416, - "learning_rate": 1.9971612547402116e-05, - "loss": 0.3634, + "epoch": 0.04, + "grad_norm": 0.600147492158044, + "learning_rate": 1.999169059960457e-05, + "loss": 0.3739, "step": 928 }, { - "epoch": 0.05, - "grad_norm": 0.8775709744925506, - "learning_rate": 1.9971472256156147e-05, - "loss": 0.507, + "epoch": 0.04, + "grad_norm": 0.718451581768904, + "learning_rate": 1.9991629844845132e-05, + "loss": 0.3561, "step": 929 }, { - "epoch": 0.05, - "grad_norm": 0.5513163113977712, - "learning_rate": 1.997133161959852e-05, - "loss": 0.2609, + "epoch": 0.04, + "grad_norm": 0.5456314912511349, + "learning_rate": 1.999156886888064e-05, + "loss": 0.303, "step": 930 }, { - "epoch": 0.05, - "grad_norm": 0.5854383825688924, - "learning_rate": 1.9971190637734113e-05, - "loss": 0.3781, + "epoch": 0.04, + "grad_norm": 1.5076021551695222, + "learning_rate": 1.9991507671712444e-05, + "loss": 0.5714, "step": 931 }, { - "epoch": 0.05, - "grad_norm": 0.5414854941618819, - "learning_rate": 1.99710493105678e-05, - "loss": 0.3819, + "epoch": 0.04, + "grad_norm": 0.5595340697514456, + "learning_rate": 1.99914462533419e-05, + "loss": 0.3581, "step": 932 }, { - "epoch": 0.05, - "grad_norm": 0.36193619268358984, - "learning_rate": 1.9970907638104483e-05, - "loss": 0.1874, + "epoch": 0.04, + "grad_norm": 0.414383331599564, + "learning_rate": 1.999138461377036e-05, + "loss": 0.235, "step": 933 }, { - "epoch": 0.05, - "grad_norm": 1.0396706344808258, - "learning_rate": 1.9970765620349058e-05, - "loss": 0.6366, + "epoch": 0.04, + "grad_norm": 0.702445961710312, + "learning_rate": 1.9991322752999195e-05, + "loss": 0.3137, "step": 934 }, { - "epoch": 0.05, - "grad_norm": 0.5805743491039917, - "learning_rate": 1.997062325730645e-05, - "loss": 0.36, + "epoch": 0.04, + "grad_norm": 2.5204391132755144, + "learning_rate": 1.9991260671029777e-05, + "loss": 0.6434, "step": 935 }, { - "epoch": 0.05, - "grad_norm": 0.4610257475867486, - "learning_rate": 1.997048054898159e-05, - "loss": 0.2515, + "epoch": 0.04, + "grad_norm": 0.4131843698077403, + "learning_rate": 1.999119836786348e-05, + "loss": 0.2977, "step": 936 }, { - "epoch": 0.05, - "grad_norm": 1.3399968664903208, - "learning_rate": 1.997033749537941e-05, - "loss": 0.5433, + "epoch": 0.04, + "grad_norm": 0.7913090216802677, + "learning_rate": 1.9991135843501675e-05, + "loss": 0.3975, "step": 937 }, { - "epoch": 0.05, - "grad_norm": 0.5558017309777695, - "learning_rate": 1.9970194096504877e-05, - "loss": 0.2575, + "epoch": 0.04, + "grad_norm": 0.80133344769209, + "learning_rate": 1.9991073097945756e-05, + "loss": 0.369, "step": 938 }, { - "epoch": 0.05, - "grad_norm": 0.47350587576542036, - "learning_rate": 1.9970050352362952e-05, - "loss": 0.2149, + "epoch": 0.04, + "grad_norm": 0.5160021159393685, + "learning_rate": 1.9991010131197106e-05, + "loss": 0.2451, "step": 939 }, { - "epoch": 0.05, - "grad_norm": 0.8210880489033746, - "learning_rate": 1.996990626295861e-05, - "loss": 0.4412, + "epoch": 0.04, + "grad_norm": 0.8689536784771379, + "learning_rate": 1.999094694325712e-05, + "loss": 0.4339, "step": 940 }, { - "epoch": 0.05, - "grad_norm": 1.5633944668244224, - "learning_rate": 1.9969761828296843e-05, - "loss": 0.8621, + "epoch": 0.04, + "grad_norm": 1.238713261021553, + "learning_rate": 1.99908835341272e-05, + "loss": 0.5047, "step": 941 }, { - "epoch": 0.05, - "grad_norm": 0.5279281352946498, - "learning_rate": 1.9969617048382653e-05, - "loss": 0.3227, + "epoch": 0.04, + "grad_norm": 0.5275999017994039, + "learning_rate": 1.999081990380875e-05, + "loss": 0.2429, "step": 942 }, { - "epoch": 0.05, - "grad_norm": 0.5300481438173281, - "learning_rate": 1.996947192322105e-05, - "loss": 0.3458, + "epoch": 0.04, + "grad_norm": 0.9978396791273116, + "learning_rate": 1.9990756052303175e-05, + "loss": 0.5525, "step": 943 }, { - "epoch": 0.05, - "grad_norm": 0.7143815231199013, - "learning_rate": 1.9969326452817068e-05, - "loss": 0.4565, + "epoch": 0.04, + "grad_norm": 0.5125599041463396, + "learning_rate": 1.999069197961189e-05, + "loss": 0.3969, "step": 944 }, { - "epoch": 0.05, - "grad_norm": 0.5568644549729681, - "learning_rate": 1.9969180637175737e-05, - "loss": 0.3141, + "epoch": 0.04, + "grad_norm": 0.39525693737493667, + "learning_rate": 1.999062768573631e-05, + "loss": 0.161, "step": 945 }, { - "epoch": 0.05, - "grad_norm": 0.4280076221992522, - "learning_rate": 1.9969034476302108e-05, - "loss": 0.0969, + "epoch": 0.04, + "grad_norm": 0.5262891431816694, + "learning_rate": 1.9990563170677867e-05, + "loss": 0.2563, "step": 946 }, { - "epoch": 0.05, - "grad_norm": 0.5319356976602545, - "learning_rate": 1.996888797020125e-05, - "loss": 0.3553, + "epoch": 0.04, + "grad_norm": 0.740560031195802, + "learning_rate": 1.9990498434437983e-05, + "loss": 0.4753, "step": 947 }, { - "epoch": 0.05, - "grad_norm": 0.5872685383034628, - "learning_rate": 1.9968741118878224e-05, - "loss": 0.3226, + "epoch": 0.04, + "grad_norm": 0.4780722786968319, + "learning_rate": 1.999043347701809e-05, + "loss": 0.2442, "step": 948 }, { - "epoch": 0.05, - "grad_norm": 1.0190650467630764, - "learning_rate": 1.9968593922338125e-05, - "loss": 0.4937, + "epoch": 0.04, + "grad_norm": 0.9456155504350245, + "learning_rate": 1.9990368298419635e-05, + "loss": 0.469, "step": 949 }, { - "epoch": 0.05, - "grad_norm": 0.6087067671856272, - "learning_rate": 1.9968446380586045e-05, - "loss": 0.3468, + "epoch": 0.04, + "grad_norm": 1.452877476684087, + "learning_rate": 1.999030289864405e-05, + "loss": 0.6181, "step": 950 }, { - "epoch": 0.05, - "grad_norm": 0.5114568446529008, - "learning_rate": 1.9968298493627096e-05, - "loss": 0.2784, + "epoch": 0.04, + "grad_norm": 0.3759676570878058, + "learning_rate": 1.9990237277692787e-05, + "loss": 0.2051, "step": 951 }, { - "epoch": 0.05, - "grad_norm": 0.40876696190949857, - "learning_rate": 1.99681502614664e-05, - "loss": 0.2605, + "epoch": 0.04, + "grad_norm": 0.4485445477430428, + "learning_rate": 1.99901714355673e-05, + "loss": 0.2937, "step": 952 }, { - "epoch": 0.05, - "grad_norm": 1.1545115092345783, - "learning_rate": 1.9968001684109086e-05, - "loss": 0.6098, + "epoch": 0.04, + "grad_norm": 1.523344595178022, + "learning_rate": 1.999010537226905e-05, + "loss": 0.7377, "step": 953 }, { - "epoch": 0.05, - "grad_norm": 0.48476631935801145, - "learning_rate": 1.9967852761560304e-05, - "loss": 0.3156, + "epoch": 0.04, + "grad_norm": 0.584859474115927, + "learning_rate": 1.999003908779949e-05, + "loss": 0.3591, "step": 954 }, { - "epoch": 0.05, - "grad_norm": 0.4489912788177858, - "learning_rate": 1.996770349382521e-05, - "loss": 0.3845, + "epoch": 0.04, + "grad_norm": 0.5521426218392671, + "learning_rate": 1.9989972582160097e-05, + "loss": 0.4074, "step": 955 }, { - "epoch": 0.05, - "grad_norm": 0.8817666801720644, - "learning_rate": 1.9967553880908973e-05, - "loss": 0.3831, + "epoch": 0.04, + "grad_norm": 0.5800887310270454, + "learning_rate": 1.998990585535234e-05, + "loss": 0.3842, "step": 956 }, { - "epoch": 0.05, - "grad_norm": 0.5005095482186253, - "learning_rate": 1.996740392281677e-05, - "loss": 0.3326, + "epoch": 0.04, + "grad_norm": 0.4881910111488858, + "learning_rate": 1.9989838907377692e-05, + "loss": 0.2524, "step": 957 }, { - "epoch": 0.06, - "grad_norm": 0.41208570683327056, - "learning_rate": 1.9967253619553805e-05, - "loss": 0.1964, + "epoch": 0.04, + "grad_norm": 0.4790424890435586, + "learning_rate": 1.998977173823764e-05, + "loss": 0.1943, "step": 958 }, { - "epoch": 0.06, - "grad_norm": 0.9145023410100701, - "learning_rate": 1.996710297112527e-05, - "loss": 0.3778, + "epoch": 0.04, + "grad_norm": 0.6962271254232019, + "learning_rate": 1.998970434793367e-05, + "loss": 0.4686, "step": 959 }, { - "epoch": 0.06, - "grad_norm": 0.5094709013843017, - "learning_rate": 1.9966951977536387e-05, - "loss": 0.3345, + "epoch": 0.04, + "grad_norm": 0.4835271628604268, + "learning_rate": 1.9989636736467278e-05, + "loss": 0.3358, "step": 960 }, { - "epoch": 0.06, - "grad_norm": 1.1238884116633348, - "learning_rate": 1.996680063879239e-05, - "loss": 0.6331, + "epoch": 0.04, + "grad_norm": 0.5692985075528063, + "learning_rate": 1.9989568903839952e-05, + "loss": 0.3927, "step": 961 }, { - "epoch": 0.06, - "grad_norm": 0.6022319418093043, - "learning_rate": 1.9966648954898515e-05, - "loss": 0.3348, + "epoch": 0.04, + "grad_norm": 1.4956237327350015, + "learning_rate": 1.99895008500532e-05, + "loss": 0.6386, "step": 962 }, { - "epoch": 0.06, - "grad_norm": 0.5012059527869338, - "learning_rate": 1.9966496925860014e-05, - "loss": 0.2811, + "epoch": 0.04, + "grad_norm": 0.4565069798471337, + "learning_rate": 1.998943257510853e-05, + "loss": 0.1955, "step": 963 }, { - "epoch": 0.06, - "grad_norm": 0.34219376584073213, - "learning_rate": 1.996634455168215e-05, - "loss": 0.2451, + "epoch": 0.04, + "grad_norm": 0.4647760789653528, + "learning_rate": 1.9989364079007446e-05, + "loss": 0.2637, "step": 964 }, { - "epoch": 0.06, - "grad_norm": 1.2382160129106405, - "learning_rate": 1.9966191832370208e-05, - "loss": 0.4533, + "epoch": 0.04, + "grad_norm": 0.9861343491695227, + "learning_rate": 1.998929536175147e-05, + "loss": 0.5807, "step": 965 }, { - "epoch": 0.06, - "grad_norm": 0.5785626838524301, - "learning_rate": 1.9966038767929468e-05, - "loss": 0.3016, + "epoch": 0.04, + "grad_norm": 0.7161360280077669, + "learning_rate": 1.9989226423342127e-05, + "loss": 0.3899, "step": 966 }, { - "epoch": 0.06, - "grad_norm": 0.5612011179574358, - "learning_rate": 1.9965885358365234e-05, - "loss": 0.3872, + "epoch": 0.04, + "grad_norm": 0.60958303854926, + "learning_rate": 1.9989157263780934e-05, + "loss": 0.3888, "step": 967 }, { - "epoch": 0.06, - "grad_norm": 1.4325677209096839, - "learning_rate": 1.996573160368282e-05, - "loss": 0.6171, + "epoch": 0.04, + "grad_norm": 0.5243349855444062, + "learning_rate": 1.9989087883069428e-05, + "loss": 0.3113, "step": 968 }, { - "epoch": 0.06, - "grad_norm": 0.4714099776950298, - "learning_rate": 1.996557750388755e-05, - "loss": 0.2267, + "epoch": 0.04, + "grad_norm": 0.8467787643272635, + "learning_rate": 1.9989018281209145e-05, + "loss": 0.3414, "step": 969 }, { - "epoch": 0.06, - "grad_norm": 0.45563509639747796, - "learning_rate": 1.996542305898476e-05, - "loss": 0.2878, + "epoch": 0.04, + "grad_norm": 0.40066678020707, + "learning_rate": 1.9988948458201625e-05, + "loss": 0.2312, "step": 970 }, { - "epoch": 0.06, - "grad_norm": 0.5373298863023417, - "learning_rate": 1.9965268268979794e-05, - "loss": 0.357, + "epoch": 0.04, + "grad_norm": 1.3659289740799592, + "learning_rate": 1.998887841404841e-05, + "loss": 0.4124, "step": 971 }, { - "epoch": 0.06, - "grad_norm": 0.588654271043847, - "learning_rate": 1.996511313387802e-05, - "loss": 0.2835, + "epoch": 0.04, + "grad_norm": 0.519433988464514, + "learning_rate": 1.998880814875106e-05, + "loss": 0.3273, "step": 972 }, { - "epoch": 0.06, - "grad_norm": 1.4807502575156875, - "learning_rate": 1.9964957653684804e-05, - "loss": 0.8575, + "epoch": 0.04, + "grad_norm": 0.6223772842466917, + "learning_rate": 1.9988737662311123e-05, + "loss": 0.4644, "step": 973 }, { - "epoch": 0.06, - "grad_norm": 1.4442004066106364, - "learning_rate": 1.9964801828405536e-05, - "loss": 0.6195, + "epoch": 0.04, + "grad_norm": 1.3361472257312175, + "learning_rate": 1.998866695473016e-05, + "loss": 0.4604, "step": 974 }, { - "epoch": 0.06, - "grad_norm": 0.4943797589189964, - "learning_rate": 1.9964645658045607e-05, - "loss": 0.2719, + "epoch": 0.04, + "grad_norm": 0.5071431762912818, + "learning_rate": 1.9988596026009735e-05, + "loss": 0.2091, "step": 975 }, { - "epoch": 0.06, - "grad_norm": 0.39577055533039807, - "learning_rate": 1.9964489142610426e-05, - "loss": 0.1685, + "epoch": 0.04, + "grad_norm": 0.7147933990191029, + "learning_rate": 1.9988524876151425e-05, + "loss": 0.3791, "step": 976 }, { - "epoch": 0.06, - "grad_norm": 1.1935907452878245, - "learning_rate": 1.996433228210542e-05, - "loss": 0.5387, + "epoch": 0.04, + "grad_norm": 0.9227266475815541, + "learning_rate": 1.99884535051568e-05, + "loss": 0.3679, "step": 977 }, { - "epoch": 0.06, - "grad_norm": 0.5879709174870033, - "learning_rate": 1.996417507653601e-05, - "loss": 0.2531, + "epoch": 0.04, + "grad_norm": 0.4881554860063917, + "learning_rate": 1.998838191302744e-05, + "loss": 0.2979, "step": 978 }, { - "epoch": 0.06, - "grad_norm": 0.5808913193155929, - "learning_rate": 1.9964017525907646e-05, - "loss": 0.365, + "epoch": 0.04, + "grad_norm": 0.794042099865485, + "learning_rate": 1.9988310099764937e-05, + "loss": 0.4219, "step": 979 }, { - "epoch": 0.06, - "grad_norm": 1.8732982117118, - "learning_rate": 1.9963859630225786e-05, - "loss": 0.7228, + "epoch": 0.05, + "grad_norm": 0.7035929358392993, + "learning_rate": 1.9988238065370872e-05, + "loss": 0.3602, "step": 980 }, { - "epoch": 0.06, - "grad_norm": 0.5626726887011435, - "learning_rate": 1.9963701389495896e-05, - "loss": 0.3331, + "epoch": 0.05, + "grad_norm": 1.137024646625258, + "learning_rate": 1.9988165809846843e-05, + "loss": 0.2319, "step": 981 }, { - "epoch": 0.06, - "grad_norm": 0.77938332796842, - "learning_rate": 1.9963542803723452e-05, - "loss": 0.4066, + "epoch": 0.05, + "grad_norm": 0.4694239440620951, + "learning_rate": 1.9988093333194447e-05, + "loss": 0.2726, "step": 982 }, { - "epoch": 0.06, - "grad_norm": 0.42358202474178924, - "learning_rate": 1.996338387291395e-05, - "loss": 0.2655, + "epoch": 0.05, + "grad_norm": 0.6202300838670854, + "learning_rate": 1.9988020635415295e-05, + "loss": 0.4678, "step": 983 }, { - "epoch": 0.06, - "grad_norm": 0.7147571092797101, - "learning_rate": 1.9963224597072896e-05, - "loss": 0.3894, + "epoch": 0.05, + "grad_norm": 0.49001755380737105, + "learning_rate": 1.9987947716510988e-05, + "loss": 0.237, "step": 984 }, { - "epoch": 0.06, - "grad_norm": 0.6968938961710522, - "learning_rate": 1.99630649762058e-05, - "loss": 0.3112, + "epoch": 0.05, + "grad_norm": 0.4831203909365543, + "learning_rate": 1.998787457648315e-05, + "loss": 0.2941, "step": 985 }, { - "epoch": 0.06, - "grad_norm": 0.6853819380607388, - "learning_rate": 1.996290501031819e-05, - "loss": 0.4494, + "epoch": 0.05, + "grad_norm": 1.6855712597431922, + "learning_rate": 1.9987801215333395e-05, + "loss": 0.671, "step": 986 }, { - "epoch": 0.06, - "grad_norm": 0.4657733992528829, - "learning_rate": 1.996274469941561e-05, - "loss": 0.317, + "epoch": 0.05, + "grad_norm": 0.504797432938001, + "learning_rate": 1.9987727633063344e-05, + "loss": 0.2733, "step": 987 }, { - "epoch": 0.06, - "grad_norm": 0.9112005971743489, - "learning_rate": 1.9962584043503616e-05, - "loss": 0.4292, + "epoch": 0.05, + "grad_norm": 0.45478965084470363, + "learning_rate": 1.9987653829674633e-05, + "loss": 0.3022, "step": 988 }, { - "epoch": 0.06, - "grad_norm": 0.49945616956948385, - "learning_rate": 1.9962423042587756e-05, - "loss": 0.2444, + "epoch": 0.05, + "grad_norm": 1.0636559602385816, + "learning_rate": 1.998757980516889e-05, + "loss": 0.5604, "step": 989 }, { - "epoch": 0.06, - "grad_norm": 0.5901638109218509, - "learning_rate": 1.996226169667362e-05, - "loss": 0.3631, + "epoch": 0.05, + "grad_norm": 1.1651161689430332, + "learning_rate": 1.998750555954776e-05, + "loss": 0.6223, "step": 990 }, { - "epoch": 0.06, - "grad_norm": 0.5898119279938763, - "learning_rate": 1.9962100005766783e-05, - "loss": 0.3696, + "epoch": 0.05, + "grad_norm": 0.430859085606783, + "learning_rate": 1.998743109281288e-05, + "loss": 0.2104, "step": 991 }, { - "epoch": 0.06, - "grad_norm": 0.37546046574179853, - "learning_rate": 1.9961937969872858e-05, - "loss": 0.0755, + "epoch": 0.05, + "grad_norm": 0.5855933886814805, + "learning_rate": 1.99873564049659e-05, + "loss": 0.346, "step": 992 }, { - "epoch": 0.06, - "grad_norm": 0.538046268884345, - "learning_rate": 1.996177558899745e-05, - "loss": 0.3463, + "epoch": 0.05, + "grad_norm": 0.8515855886420527, + "learning_rate": 1.9987281496008476e-05, + "loss": 0.3773, "step": 993 }, { - "epoch": 0.06, - "grad_norm": 0.7582586586626521, - "learning_rate": 1.9961612863146175e-05, - "loss": 0.4278, + "epoch": 0.05, + "grad_norm": 0.6328950487110743, + "learning_rate": 1.998720636594227e-05, + "loss": 0.344, "step": 994 }, { - "epoch": 0.06, - "grad_norm": 0.4647624246924399, - "learning_rate": 1.9961449792324677e-05, - "loss": 0.2752, + "epoch": 0.05, + "grad_norm": 0.8083515397381871, + "learning_rate": 1.9987131014768936e-05, + "loss": 0.4596, "step": 995 }, { - "epoch": 0.06, - "grad_norm": 0.46627636277037593, - "learning_rate": 1.9961286376538607e-05, - "loss": 0.2918, + "epoch": 0.05, + "grad_norm": 0.5809661808750618, + "learning_rate": 1.9987055442490148e-05, + "loss": 0.3206, "step": 996 }, { - "epoch": 0.06, - "grad_norm": 1.4470087570646943, - "learning_rate": 1.996112261579361e-05, - "loss": 0.8131, + "epoch": 0.05, + "grad_norm": 0.40468680142753666, + "learning_rate": 1.998697964910758e-05, + "loss": 0.222, "step": 997 }, { - "epoch": 0.06, - "grad_norm": 0.40123913886301504, - "learning_rate": 1.9960958510095373e-05, - "loss": 0.248, + "epoch": 0.05, + "grad_norm": 0.8677605740898781, + "learning_rate": 1.9986903634622907e-05, + "loss": 0.3158, "step": 998 }, { - "epoch": 0.06, - "grad_norm": 0.48846965331054437, - "learning_rate": 1.9960794059449564e-05, - "loss": 0.3432, + "epoch": 0.05, + "grad_norm": 0.49550263527990357, + "learning_rate": 1.998682739903781e-05, + "loss": 0.3925, "step": 999 }, { - "epoch": 0.06, - "grad_norm": 0.6562528678683638, - "learning_rate": 1.996062926386189e-05, - "loss": 0.4336, + "epoch": 0.05, + "grad_norm": 0.427527637484721, + "learning_rate": 1.9986750942353983e-05, + "loss": 0.2965, "step": 1000 }, { - "epoch": 0.06, - "grad_norm": 0.9824380086974319, - "learning_rate": 1.996046412333805e-05, - "loss": 0.472, + "epoch": 0.05, + "grad_norm": 1.2149984943321288, + "learning_rate": 1.9986674264573115e-05, + "loss": 0.6908, "step": 1001 }, { - "epoch": 0.06, - "grad_norm": 0.70434564261008, - "learning_rate": 1.996029863788377e-05, - "loss": 0.335, + "epoch": 0.05, + "grad_norm": 1.2751362179001977, + "learning_rate": 1.9986597365696908e-05, + "loss": 0.727, "step": 1002 }, { - "epoch": 0.06, - "grad_norm": 0.47245421383507813, - "learning_rate": 1.9960132807504772e-05, - "loss": 0.3284, + "epoch": 0.05, + "grad_norm": 0.479898713590962, + "learning_rate": 1.9986520245727054e-05, + "loss": 0.2536, "step": 1003 }, { - "epoch": 0.06, - "grad_norm": 0.39877422949668473, - "learning_rate": 1.9959966632206804e-05, - "loss": 0.2873, + "epoch": 0.05, + "grad_norm": 0.5075103598388898, + "learning_rate": 1.998644290466527e-05, + "loss": 0.254, "step": 1004 }, { - "epoch": 0.06, - "grad_norm": 0.5732104169024447, - "learning_rate": 1.995980011199562e-05, - "loss": 0.3251, + "epoch": 0.05, + "grad_norm": 0.7275548516290863, + "learning_rate": 1.9986365342513266e-05, + "loss": 0.4644, "step": 1005 }, { - "epoch": 0.06, - "grad_norm": 0.5576190638879068, - "learning_rate": 1.9959633246876987e-05, - "loss": 0.3643, + "epoch": 0.05, + "grad_norm": 0.500596740936002, + "learning_rate": 1.9986287559272758e-05, + "loss": 0.3731, "step": 1006 }, { - "epoch": 0.06, - "grad_norm": 0.48385803317161563, - "learning_rate": 1.995946603685668e-05, - "loss": 0.3651, + "epoch": 0.05, + "grad_norm": 0.5977477165322007, + "learning_rate": 1.9986209554945467e-05, + "loss": 0.3486, "step": 1007 }, { - "epoch": 0.06, - "grad_norm": 0.46973803118307117, - "learning_rate": 1.99592984819405e-05, - "loss": 0.2235, + "epoch": 0.05, + "grad_norm": 0.6004062653354671, + "learning_rate": 1.998613132953312e-05, + "loss": 0.4105, "step": 1008 }, { - "epoch": 0.06, - "grad_norm": 0.31711744184779644, - "learning_rate": 1.9959130582134234e-05, - "loss": 0.2126, + "epoch": 0.05, + "grad_norm": 0.40608649574557887, + "learning_rate": 1.9986052883037452e-05, + "loss": 0.2632, "step": 1009 }, { - "epoch": 0.06, - "grad_norm": 0.6060670327723078, - "learning_rate": 1.995896233744371e-05, - "loss": 0.4179, + "epoch": 0.05, + "grad_norm": 0.5048406079782527, + "learning_rate": 1.9985974215460198e-05, + "loss": 0.2388, "step": 1010 }, { - "epoch": 0.06, - "grad_norm": 0.39326789263335804, - "learning_rate": 1.9958793747874744e-05, - "loss": 0.2994, + "epoch": 0.05, + "grad_norm": 0.5465757473460651, + "learning_rate": 1.9985895326803096e-05, + "loss": 0.3937, "step": 1011 }, { - "epoch": 0.06, - "grad_norm": 0.6352828886575169, - "learning_rate": 1.995862481343318e-05, - "loss": 0.4093, + "epoch": 0.05, + "grad_norm": 0.5161979502632593, + "learning_rate": 1.99858162170679e-05, + "loss": 0.2919, "step": 1012 }, { - "epoch": 0.06, - "grad_norm": 1.117282848504116, - "learning_rate": 1.9958455534124867e-05, - "loss": 0.6806, + "epoch": 0.05, + "grad_norm": 0.9728933744806545, + "learning_rate": 1.998573688625636e-05, + "loss": 0.4861, "step": 1013 }, { - "epoch": 0.06, - "grad_norm": 0.38513507477952136, - "learning_rate": 1.9958285909955668e-05, - "loss": 0.2518, + "epoch": 0.05, + "grad_norm": 0.7469536263271251, + "learning_rate": 1.9985657334370227e-05, + "loss": 0.3688, "step": 1014 }, { - "epoch": 0.06, - "grad_norm": 0.49782104906396, - "learning_rate": 1.9958115940931454e-05, - "loss": 0.3526, + "epoch": 0.05, + "grad_norm": 0.5988767394089402, + "learning_rate": 1.9985577561411263e-05, + "loss": 0.3644, "step": 1015 }, { - "epoch": 0.06, - "grad_norm": 0.5610251919492635, - "learning_rate": 1.9957945627058115e-05, - "loss": 0.3765, + "epoch": 0.05, + "grad_norm": 0.39362226905559555, + "learning_rate": 1.9985497567381237e-05, + "loss": 0.269, "step": 1016 }, { - "epoch": 0.06, - "grad_norm": 0.349097678917595, - "learning_rate": 1.995777496834155e-05, - "loss": 0.2702, + "epoch": 0.05, + "grad_norm": 0.9895627757818932, + "learning_rate": 1.9985417352281918e-05, + "loss": 0.4768, "step": 1017 }, { - "epoch": 0.06, - "grad_norm": 0.6490771901337339, - "learning_rate": 1.9957603964787662e-05, - "loss": 0.2902, + "epoch": 0.05, + "grad_norm": 0.4899535177969121, + "learning_rate": 1.9985336916115083e-05, + "loss": 0.283, "step": 1018 }, { - "epoch": 0.06, - "grad_norm": 0.5038053547197864, - "learning_rate": 1.9957432616402377e-05, - "loss": 0.4018, + "epoch": 0.05, + "grad_norm": 0.49523275071095835, + "learning_rate": 1.9985256258882515e-05, + "loss": 0.3636, "step": 1019 }, { - "epoch": 0.06, - "grad_norm": 1.0330461815808312, - "learning_rate": 1.995726092319163e-05, - "loss": 0.4524, + "epoch": 0.05, + "grad_norm": 1.762096303898484, + "learning_rate": 1.9985175380585996e-05, + "loss": 0.6448, "step": 1020 }, { - "epoch": 0.06, - "grad_norm": 0.4900679690379944, - "learning_rate": 1.9957088885161366e-05, - "loss": 0.3125, + "epoch": 0.05, + "grad_norm": 0.4963071965254022, + "learning_rate": 1.998509428122732e-05, + "loss": 0.3384, "step": 1021 }, { - "epoch": 0.06, - "grad_norm": 0.46117236213253127, - "learning_rate": 1.9956916502317537e-05, - "loss": 0.395, + "epoch": 0.05, + "grad_norm": 0.46614153685632953, + "learning_rate": 1.9985012960808275e-05, + "loss": 0.2927, "step": 1022 }, { - "epoch": 0.06, - "grad_norm": 0.4574938839171714, - "learning_rate": 1.9956743774666124e-05, - "loss": 0.2827, + "epoch": 0.05, + "grad_norm": 0.7771375243734702, + "learning_rate": 1.998493141933067e-05, + "loss": 0.3728, "step": 1023 }, { - "epoch": 0.06, - "grad_norm": 0.36527000688924915, - "learning_rate": 1.99565707022131e-05, - "loss": 0.2012, + "epoch": 0.05, + "grad_norm": 0.5522348263572887, + "learning_rate": 1.998484965679631e-05, + "loss": 0.3055, "step": 1024 }, { - "epoch": 0.06, - "grad_norm": 1.771745956925933, - "learning_rate": 1.995639728496446e-05, - "loss": 0.859, + "epoch": 0.05, + "grad_norm": 1.9954383433286353, + "learning_rate": 1.9984767673206996e-05, + "loss": 0.7861, "step": 1025 }, { - "epoch": 0.06, - "grad_norm": 0.5436455796015267, - "learning_rate": 1.9956223522926212e-05, - "loss": 0.3379, + "epoch": 0.05, + "grad_norm": 1.9708932766226501, + "learning_rate": 1.9984685468564553e-05, + "loss": 0.3874, "step": 1026 }, { - "epoch": 0.06, - "grad_norm": 0.5224236889719983, - "learning_rate": 1.995604941610437e-05, - "loss": 0.3158, + "epoch": 0.05, + "grad_norm": 0.6335637508220593, + "learning_rate": 1.9984603042870797e-05, + "loss": 0.2982, "step": 1027 }, { - "epoch": 0.06, - "grad_norm": 0.8151837330528591, - "learning_rate": 1.9955874964504964e-05, - "loss": 0.5744, + "epoch": 0.05, + "grad_norm": 1.0324386333682112, + "learning_rate": 1.9984520396127554e-05, + "loss": 0.5388, "step": 1028 }, { - "epoch": 0.06, - "grad_norm": 0.31072521368728423, - "learning_rate": 1.995570016813404e-05, - "loss": 0.158, + "epoch": 0.05, + "grad_norm": 0.6289412374997954, + "learning_rate": 1.9984437528336648e-05, + "loss": 0.2839, "step": 1029 }, { - "epoch": 0.06, - "grad_norm": 0.6735818235853589, - "learning_rate": 1.995552502699764e-05, - "loss": 0.3841, + "epoch": 0.05, + "grad_norm": 0.62307764767172, + "learning_rate": 1.9984354439499923e-05, + "loss": 0.2518, "step": 1030 }, { - "epoch": 0.06, - "grad_norm": 0.6394142288022795, - "learning_rate": 1.9955349541101844e-05, - "loss": 0.3293, + "epoch": 0.05, + "grad_norm": 0.6896124271842595, + "learning_rate": 1.9984271129619214e-05, + "loss": 0.4185, "step": 1031 }, { - "epoch": 0.06, - "grad_norm": 0.7060434285177977, - "learning_rate": 1.995517371045272e-05, - "loss": 0.4121, + "epoch": 0.05, + "grad_norm": 2.0140401899036675, + "learning_rate": 1.9984187598696364e-05, + "loss": 0.8853, "step": 1032 }, { - "epoch": 0.06, - "grad_norm": 0.7891476065439303, - "learning_rate": 1.9954997535056354e-05, - "loss": 0.361, + "epoch": 0.05, + "grad_norm": 0.6469496887899895, + "learning_rate": 1.9984103846733222e-05, + "loss": 0.2348, "step": 1033 }, { - "epoch": 0.06, - "grad_norm": 0.6378419743565197, - "learning_rate": 1.9954821014918857e-05, - "loss": 0.3563, + "epoch": 0.05, + "grad_norm": 1.3677696866643623, + "learning_rate": 1.998401987373164e-05, + "loss": 0.5303, "step": 1034 }, { - "epoch": 0.06, - "grad_norm": 0.48004431946828835, - "learning_rate": 1.995464415004633e-05, - "loss": 0.2107, + "epoch": 0.05, + "grad_norm": 0.5418242656364944, + "learning_rate": 1.9983935679693487e-05, + "loss": 0.2929, "step": 1035 }, { - "epoch": 0.06, - "grad_norm": 0.3995032608655963, - "learning_rate": 1.9954466940444913e-05, - "loss": 0.2438, + "epoch": 0.05, + "grad_norm": 0.44840025797545957, + "learning_rate": 1.998385126462062e-05, + "loss": 0.1952, "step": 1036 }, { - "epoch": 0.06, - "grad_norm": 1.1159782193087138, - "learning_rate": 1.9954289386120728e-05, - "loss": 0.5159, + "epoch": 0.05, + "grad_norm": 1.5791718978340326, + "learning_rate": 1.9983766628514907e-05, + "loss": 0.7071, "step": 1037 }, { - "epoch": 0.06, - "grad_norm": 0.5618332940945832, - "learning_rate": 1.995411148707993e-05, - "loss": 0.3351, + "epoch": 0.05, + "grad_norm": 1.3593488386457846, + "learning_rate": 1.9983681771378222e-05, + "loss": 0.7262, "step": 1038 }, { - "epoch": 0.06, - "grad_norm": 0.5215455455748942, - "learning_rate": 1.995393324332868e-05, - "loss": 0.3607, + "epoch": 0.05, + "grad_norm": 0.5275877158878143, + "learning_rate": 1.9983596693212447e-05, + "loss": 0.2899, "step": 1039 }, { - "epoch": 0.06, - "grad_norm": 0.8208653345275203, - "learning_rate": 1.9953754654873148e-05, - "loss": 0.5031, + "epoch": 0.05, + "grad_norm": 0.8240702281240538, + "learning_rate": 1.9983511394019462e-05, + "loss": 0.4713, "step": 1040 }, { - "epoch": 0.06, - "grad_norm": 0.3994022765006455, - "learning_rate": 1.995357572171952e-05, - "loss": 0.1645, + "epoch": 0.05, + "grad_norm": 0.42025613280846374, + "learning_rate": 1.9983425873801158e-05, + "loss": 0.1943, "step": 1041 }, { - "epoch": 0.06, - "grad_norm": 0.49644502143567504, - "learning_rate": 1.9953396443873996e-05, - "loss": 0.322, + "epoch": 0.05, + "grad_norm": 0.5338642696283458, + "learning_rate": 1.998334013255943e-05, + "loss": 0.3105, "step": 1042 }, { - "epoch": 0.06, - "grad_norm": 0.7034835367330016, - "learning_rate": 1.995321682134278e-05, - "loss": 0.433, + "epoch": 0.05, + "grad_norm": 0.5578723726040757, + "learning_rate": 1.998325417029617e-05, + "loss": 0.3026, "step": 1043 }, { - "epoch": 0.06, - "grad_norm": 0.7908560910972711, - "learning_rate": 1.995303685413209e-05, - "loss": 0.4042, + "epoch": 0.05, + "grad_norm": 1.2462571758614014, + "learning_rate": 1.9983167987013285e-05, + "loss": 0.6758, "step": 1044 }, { - "epoch": 0.06, - "grad_norm": 0.49210532169863536, - "learning_rate": 1.9952856542248168e-05, - "loss": 0.3592, + "epoch": 0.05, + "grad_norm": 0.5031919742624393, + "learning_rate": 1.9983081582712684e-05, + "loss": 0.3212, "step": 1045 }, { - "epoch": 0.06, - "grad_norm": 0.646689735534446, - "learning_rate": 1.995267588569725e-05, - "loss": 0.4115, + "epoch": 0.05, + "grad_norm": 0.8114986414865737, + "learning_rate": 1.9982994957396277e-05, + "loss": 0.4849, "step": 1046 }, { - "epoch": 0.06, - "grad_norm": 0.4048654510347636, - "learning_rate": 1.9952494884485593e-05, - "loss": 0.2289, + "epoch": 0.05, + "grad_norm": 0.5545200382339603, + "learning_rate": 1.9982908111065986e-05, + "loss": 0.2796, "step": 1047 }, { - "epoch": 0.06, - "grad_norm": 0.4411817060201455, - "learning_rate": 1.9952313538619467e-05, - "loss": 0.2335, + "epoch": 0.05, + "grad_norm": 0.6211476578082482, + "learning_rate": 1.9982821043723728e-05, + "loss": 0.3077, "step": 1048 }, { - "epoch": 0.06, - "grad_norm": 1.2620590338316189, - "learning_rate": 1.995213184810515e-05, - "loss": 0.7683, + "epoch": 0.05, + "grad_norm": 0.5494028574397645, + "learning_rate": 1.9982733755371434e-05, + "loss": 0.2118, "step": 1049 }, { - "epoch": 0.06, - "grad_norm": 0.5329777387061266, - "learning_rate": 1.9951949812948933e-05, - "loss": 0.3357, + "epoch": 0.05, + "grad_norm": 0.9982660593738515, + "learning_rate": 1.9982646246011036e-05, + "loss": 0.4784, "step": 1050 }, { - "epoch": 0.06, - "grad_norm": 0.5525441866753129, - "learning_rate": 1.9951767433157126e-05, - "loss": 0.3914, + "epoch": 0.05, + "grad_norm": 0.5671751617924845, + "learning_rate": 1.9982558515644475e-05, + "loss": 0.3144, "step": 1051 }, { - "epoch": 0.06, - "grad_norm": 1.0809939876830499, - "learning_rate": 1.9951584708736038e-05, - "loss": 0.7235, + "epoch": 0.05, + "grad_norm": 0.6048241028893799, + "learning_rate": 1.9982470564273687e-05, + "loss": 0.3359, "step": 1052 }, { - "epoch": 0.06, - "grad_norm": 0.3968852627552618, - "learning_rate": 1.9951401639692e-05, - "loss": 0.1826, + "epoch": 0.05, + "grad_norm": 0.43923445306440884, + "learning_rate": 1.9982382391900624e-05, + "loss": 0.2496, "step": 1053 }, { - "epoch": 0.06, - "grad_norm": 0.4945020554203922, - "learning_rate": 1.9951218226031354e-05, - "loss": 0.3181, + "epoch": 0.05, + "grad_norm": 0.6220298356411785, + "learning_rate": 1.9982293998527234e-05, + "loss": 0.3267, "step": 1054 }, { - "epoch": 0.06, - "grad_norm": 0.42878235628024847, - "learning_rate": 1.9951034467760446e-05, - "loss": 0.3214, + "epoch": 0.05, + "grad_norm": 0.5569834664710244, + "learning_rate": 1.9982205384155473e-05, + "loss": 0.3483, "step": 1055 }, { - "epoch": 0.06, - "grad_norm": 0.904246221547212, - "learning_rate": 1.995085036488564e-05, - "loss": 0.5193, + "epoch": 0.05, + "grad_norm": 1.57446670838614, + "learning_rate": 1.998211654878731e-05, + "loss": 0.4894, "step": 1056 }, { - "epoch": 0.06, - "grad_norm": 0.4788553603759133, - "learning_rate": 1.9950665917413318e-05, - "loss": 0.2582, + "epoch": 0.05, + "grad_norm": 0.4831801337142973, + "learning_rate": 1.9982027492424708e-05, + "loss": 0.3317, "step": 1057 }, { - "epoch": 0.06, - "grad_norm": 0.497482325015419, - "learning_rate": 1.995048112534986e-05, - "loss": 0.3624, + "epoch": 0.05, + "grad_norm": 0.9697348897283845, + "learning_rate": 1.9981938215069637e-05, + "loss": 0.6091, "step": 1058 }, { - "epoch": 0.06, - "grad_norm": 1.1997009804190655, - "learning_rate": 1.995029598870167e-05, - "loss": 0.6325, + "epoch": 0.05, + "grad_norm": 0.6014356049322473, + "learning_rate": 1.9981848716724074e-05, + "loss": 0.3035, "step": 1059 }, { - "epoch": 0.06, - "grad_norm": 0.3760169516809768, - "learning_rate": 1.995011050747516e-05, - "loss": 0.1641, + "epoch": 0.05, + "grad_norm": 0.4370543057318882, + "learning_rate": 1.998175899739e-05, + "loss": 0.228, "step": 1060 }, { - "epoch": 0.06, - "grad_norm": 1.0218299290087096, - "learning_rate": 1.994992468167675e-05, - "loss": 0.5896, + "epoch": 0.05, + "grad_norm": 0.6289187465066766, + "learning_rate": 1.99816690570694e-05, + "loss": 0.2922, "step": 1061 }, { - "epoch": 0.06, - "grad_norm": 0.5325934080764194, - "learning_rate": 1.9949738511312872e-05, - "loss": 0.4082, + "epoch": 0.05, + "grad_norm": 1.3422247285624014, + "learning_rate": 1.9981578895764272e-05, + "loss": 0.4315, "step": 1062 }, { - "epoch": 0.06, - "grad_norm": 0.5189018963919375, - "learning_rate": 1.994955199638998e-05, - "loss": 0.2595, + "epoch": 0.05, + "grad_norm": 0.48809492203062965, + "learning_rate": 1.998148851347661e-05, + "loss": 0.2954, "step": 1063 }, { - "epoch": 0.06, - "grad_norm": 1.8985392734797513, - "learning_rate": 1.994936513691453e-05, - "loss": 0.8639, + "epoch": 0.05, + "grad_norm": 1.7160386605376932, + "learning_rate": 1.9981397910208408e-05, + "loss": 0.7015, "step": 1064 }, { - "epoch": 0.06, - "grad_norm": 0.616301320543746, - "learning_rate": 1.9949177932892997e-05, - "loss": 0.4081, + "epoch": 0.05, + "grad_norm": 0.5456047373459513, + "learning_rate": 1.9981307085961678e-05, + "loss": 0.1695, "step": 1065 }, { - "epoch": 0.06, - "grad_norm": 0.43255047491592685, - "learning_rate": 1.9948990384331853e-05, - "loss": 0.2952, + "epoch": 0.05, + "grad_norm": 0.5819838951697648, + "learning_rate": 1.998121604073843e-05, + "loss": 0.2246, "step": 1066 }, { - "epoch": 0.06, - "grad_norm": 0.4972852098105318, - "learning_rate": 1.9948802491237608e-05, - "loss": 0.2134, + "epoch": 0.05, + "grad_norm": 0.6198302151296564, + "learning_rate": 1.9981124774540676e-05, + "loss": 0.3251, "step": 1067 }, { - "epoch": 0.06, - "grad_norm": 0.7883153357673944, - "learning_rate": 1.994861425361675e-05, - "loss": 0.525, + "epoch": 0.05, + "grad_norm": 1.3423867896019503, + "learning_rate": 1.9981033287370443e-05, + "loss": 0.5623, "step": 1068 }, { - "epoch": 0.06, - "grad_norm": 0.6785269587723624, - "learning_rate": 1.9948425671475816e-05, - "loss": 0.3279, + "epoch": 0.05, + "grad_norm": 0.4959791278675541, + "learning_rate": 1.9980941579229748e-05, + "loss": 0.2262, "step": 1069 }, { - "epoch": 0.06, - "grad_norm": 0.5178554482526371, - "learning_rate": 1.9948236744821327e-05, - "loss": 0.2993, + "epoch": 0.05, + "grad_norm": 0.7183727038215335, + "learning_rate": 1.9980849650120634e-05, + "loss": 0.4454, "step": 1070 }, { - "epoch": 0.06, - "grad_norm": 0.9167086626332699, - "learning_rate": 1.994804747365983e-05, - "loss": 0.4278, + "epoch": 0.05, + "grad_norm": 0.5291848831758632, + "learning_rate": 1.998075750004512e-05, + "loss": 0.3841, "step": 1071 }, { - "epoch": 0.06, - "grad_norm": 0.5356860464606096, - "learning_rate": 1.994785785799787e-05, - "loss": 0.3144, + "epoch": 0.05, + "grad_norm": 0.4547122867092, + "learning_rate": 1.998066512900526e-05, + "loss": 0.1591, "step": 1072 }, { - "epoch": 0.06, - "grad_norm": 0.4967397434179642, - "learning_rate": 1.9947667897842027e-05, - "loss": 0.2778, + "epoch": 0.05, + "grad_norm": 0.7470972871569668, + "learning_rate": 1.998057253700309e-05, + "loss": 0.3448, "step": 1073 }, { - "epoch": 0.06, - "grad_norm": 0.6967124996922901, - "learning_rate": 1.994747759319887e-05, - "loss": 0.3812, + "epoch": 0.05, + "grad_norm": 1.6902504409348107, + "learning_rate": 1.9980479724040665e-05, + "loss": 0.6228, "step": 1074 }, { - "epoch": 0.06, - "grad_norm": 0.48475894899954397, - "learning_rate": 1.994728694407499e-05, - "loss": 0.2961, + "epoch": 0.05, + "grad_norm": 0.450645603372596, + "learning_rate": 1.998038669012004e-05, + "loss": 0.2734, "step": 1075 }, { - "epoch": 0.06, - "grad_norm": 0.9083581496874299, - "learning_rate": 1.9947095950476992e-05, - "loss": 0.5082, + "epoch": 0.05, + "grad_norm": 0.6597542532327847, + "learning_rate": 1.9980293435243272e-05, + "loss": 0.4375, "step": 1076 }, { - "epoch": 0.06, - "grad_norm": 0.7444906820012983, - "learning_rate": 1.994690461241149e-05, - "loss": 0.3969, + "epoch": 0.05, + "grad_norm": 1.2660495124764697, + "learning_rate": 1.9980199959412426e-05, + "loss": 0.6177, "step": 1077 }, { - "epoch": 0.06, - "grad_norm": 0.46270242209941526, - "learning_rate": 1.994671292988511e-05, - "loss": 0.3147, + "epoch": 0.05, + "grad_norm": 0.4420534105983837, + "learning_rate": 1.998010626262957e-05, + "loss": 0.1925, "step": 1078 }, { - "epoch": 0.06, - "grad_norm": 0.7264460965731782, - "learning_rate": 1.9946520902904485e-05, - "loss": 0.5275, + "epoch": 0.05, + "grad_norm": 0.6523094156153382, + "learning_rate": 1.9980012344896784e-05, + "loss": 0.3956, "step": 1079 }, { - "epoch": 0.06, - "grad_norm": 0.43704367289000573, - "learning_rate": 1.994632853147627e-05, - "loss": 0.1736, + "epoch": 0.05, + "grad_norm": 1.8663892962641737, + "learning_rate": 1.9979918206216142e-05, + "loss": 0.5795, "step": 1080 }, { - "epoch": 0.06, - "grad_norm": 0.5335354016333684, - "learning_rate": 1.9946135815607128e-05, - "loss": 0.3071, + "epoch": 0.05, + "grad_norm": 0.5987547105991187, + "learning_rate": 1.9979823846589726e-05, + "loss": 0.3388, "step": 1081 }, { - "epoch": 0.06, - "grad_norm": 0.4954050549137195, - "learning_rate": 1.9945942755303727e-05, - "loss": 0.2834, + "epoch": 0.05, + "grad_norm": 0.4010706910791419, + "learning_rate": 1.9979729266019635e-05, + "loss": 0.1605, "step": 1082 }, { - "epoch": 0.06, - "grad_norm": 1.0935397207678366, - "learning_rate": 1.994574935057276e-05, - "loss": 0.4421, + "epoch": 0.05, + "grad_norm": 0.6072503371257313, + "learning_rate": 1.9979634464507953e-05, + "loss": 0.3862, "step": 1083 }, { - "epoch": 0.06, - "grad_norm": 0.438437072971696, - "learning_rate": 1.994555560142092e-05, - "loss": 0.3371, + "epoch": 0.05, + "grad_norm": 0.8258915030622537, + "learning_rate": 1.9979539442056782e-05, + "loss": 0.3913, "step": 1084 }, { - "epoch": 0.06, - "grad_norm": 0.8748799090587484, - "learning_rate": 1.9945361507854914e-05, - "loss": 0.5721, + "epoch": 0.05, + "grad_norm": 0.9548213668298036, + "learning_rate": 1.9979444198668228e-05, + "loss": 0.3867, "step": 1085 }, { - "epoch": 0.06, - "grad_norm": 0.49367308799806964, - "learning_rate": 1.9945167069881468e-05, - "loss": 0.3301, + "epoch": 0.05, + "grad_norm": 0.49754332537663454, + "learning_rate": 1.99793487343444e-05, + "loss": 0.323, "step": 1086 }, { - "epoch": 0.06, - "grad_norm": 0.5471671685840018, - "learning_rate": 1.9944972287507316e-05, - "loss": 0.2885, + "epoch": 0.05, + "grad_norm": 1.0226892357096733, + "learning_rate": 1.9979253049087408e-05, + "loss": 0.4947, "step": 1087 }, { - "epoch": 0.06, - "grad_norm": 0.44722860542267706, - "learning_rate": 1.99447771607392e-05, - "loss": 0.2388, + "epoch": 0.05, + "grad_norm": 0.4175762377100039, + "learning_rate": 1.9979157142899367e-05, + "loss": 0.2273, "step": 1088 }, { - "epoch": 0.06, - "grad_norm": 0.4399451723523636, - "learning_rate": 1.9944581689583878e-05, - "loss": 0.3063, + "epoch": 0.05, + "grad_norm": 1.629751589344646, + "learning_rate": 1.9979061015782406e-05, + "loss": 0.6893, "step": 1089 }, { - "epoch": 0.06, - "grad_norm": 0.49743000266769444, - "learning_rate": 1.994438587404812e-05, - "loss": 0.3283, + "epoch": 0.05, + "grad_norm": 0.6745406433383159, + "learning_rate": 1.9978964667738655e-05, + "loss": 0.4126, "step": 1090 }, { - "epoch": 0.06, - "grad_norm": 1.2104431749854316, - "learning_rate": 1.994418971413871e-05, - "loss": 0.5652, + "epoch": 0.05, + "grad_norm": 0.5351538557462366, + "learning_rate": 1.9978868098770244e-05, + "loss": 0.3442, "step": 1091 }, { - "epoch": 0.06, - "grad_norm": 1.2263233603048742, - "learning_rate": 1.994399320986243e-05, - "loss": 0.7256, + "epoch": 0.05, + "grad_norm": 1.0247962491319385, + "learning_rate": 1.997877130887931e-05, + "loss": 0.5144, "step": 1092 }, { - "epoch": 0.06, - "grad_norm": 0.5740931856541661, - "learning_rate": 1.99437963612261e-05, - "loss": 0.2438, + "epoch": 0.05, + "grad_norm": 0.5227267877144892, + "learning_rate": 1.9978674298067995e-05, + "loss": 0.2979, "step": 1093 }, { - "epoch": 0.06, - "grad_norm": 0.39197406731229295, - "learning_rate": 1.9943599168236526e-05, - "loss": 0.2557, + "epoch": 0.05, + "grad_norm": 0.4854007208947763, + "learning_rate": 1.997857706633845e-05, + "loss": 0.2995, "step": 1094 }, { - "epoch": 0.06, - "grad_norm": 1.245863493813506, - "learning_rate": 1.9943401630900543e-05, - "loss": 0.5469, + "epoch": 0.05, + "grad_norm": 0.5445875162885575, + "learning_rate": 1.9978479613692827e-05, + "loss": 0.2978, "step": 1095 }, { - "epoch": 0.06, - "grad_norm": 0.5865781893361174, - "learning_rate": 1.9943203749224986e-05, - "loss": 0.3007, + "epoch": 0.05, + "grad_norm": 0.7484940288710152, + "learning_rate": 1.9978381940133286e-05, + "loss": 0.4461, "step": 1096 }, { - "epoch": 0.06, - "grad_norm": 1.3374424837824597, - "learning_rate": 1.9943005523216713e-05, - "loss": 0.581, + "epoch": 0.05, + "grad_norm": 0.5580674618364786, + "learning_rate": 1.997828404566198e-05, + "loss": 0.3942, "step": 1097 }, { - "epoch": 0.06, - "grad_norm": 0.5545592888985182, - "learning_rate": 1.9942806952882587e-05, - "loss": 0.3663, + "epoch": 0.05, + "grad_norm": 0.48268761513020386, + "learning_rate": 1.9978185930281087e-05, + "loss": 0.3199, "step": 1098 }, { - "epoch": 0.06, - "grad_norm": 0.4807505633477306, - "learning_rate": 1.994260803822948e-05, - "loss": 0.2451, + "epoch": 0.05, + "grad_norm": 0.5457081587158406, + "learning_rate": 1.997808759399277e-05, + "loss": 0.3314, "step": 1099 }, { - "epoch": 0.06, - "grad_norm": 0.3769247168053469, - "learning_rate": 1.994240877926429e-05, - "loss": 0.2144, + "epoch": 0.05, + "grad_norm": 0.5473826611804381, + "learning_rate": 1.9977989036799215e-05, + "loss": 0.3073, "step": 1100 }, { - "epoch": 0.06, - "grad_norm": 0.7380917848688296, - "learning_rate": 1.994220917599391e-05, - "loss": 0.4029, + "epoch": 0.05, + "grad_norm": 0.6596614452992666, + "learning_rate": 1.99778902587026e-05, + "loss": 0.3253, "step": 1101 }, { - "epoch": 0.06, - "grad_norm": 0.5737493430661074, - "learning_rate": 1.994200922842525e-05, - "loss": 0.3119, + "epoch": 0.05, + "grad_norm": 0.5332226474026266, + "learning_rate": 1.9977791259705105e-05, + "loss": 0.3777, "step": 1102 }, { - "epoch": 0.06, - "grad_norm": 2.063385576846166, - "learning_rate": 1.994180893656524e-05, - "loss": 0.8398, + "epoch": 0.05, + "grad_norm": 0.5168865944923321, + "learning_rate": 1.9977692039808937e-05, + "loss": 0.3262, "step": 1103 }, { - "epoch": 0.06, - "grad_norm": 1.3920732373420577, - "learning_rate": 1.9941608300420815e-05, - "loss": 0.7834, + "epoch": 0.05, + "grad_norm": 1.4717359427446952, + "learning_rate": 1.9977592599016277e-05, + "loss": 0.7971, "step": 1104 }, { - "epoch": 0.06, - "grad_norm": 0.7518563438767537, - "learning_rate": 1.9941407319998918e-05, - "loss": 0.3335, + "epoch": 0.05, + "grad_norm": 0.36266198048414167, + "learning_rate": 1.9977492937329334e-05, + "loss": 0.1477, "step": 1105 }, { - "epoch": 0.06, - "grad_norm": 0.45728680330538174, - "learning_rate": 1.9941205995306517e-05, - "loss": 0.2271, + "epoch": 0.05, + "grad_norm": 0.42333667900727157, + "learning_rate": 1.997739305475032e-05, + "loss": 0.2717, "step": 1106 }, { - "epoch": 0.06, - "grad_norm": 0.7047347617250996, - "learning_rate": 1.994100432635058e-05, - "loss": 0.5542, + "epoch": 0.05, + "grad_norm": 0.5257587548497785, + "learning_rate": 1.9977292951281433e-05, + "loss": 0.4129, "step": 1107 }, { - "epoch": 0.06, - "grad_norm": 0.7164373543084449, - "learning_rate": 1.9940802313138092e-05, - "loss": 0.3288, + "epoch": 0.05, + "grad_norm": 0.6042337283916402, + "learning_rate": 1.99771926269249e-05, + "loss": 0.3296, "step": 1108 }, { - "epoch": 0.06, - "grad_norm": 0.4610040084120822, - "learning_rate": 1.994059995567604e-05, - "loss": 0.2943, + "epoch": 0.05, + "grad_norm": 0.5814381820672428, + "learning_rate": 1.9977092081682938e-05, + "loss": 0.3625, "step": 1109 }, { - "epoch": 0.06, - "grad_norm": 0.6608778183845305, - "learning_rate": 1.9940397253971447e-05, - "loss": 0.3997, + "epoch": 0.05, + "grad_norm": 0.6856570988627066, + "learning_rate": 1.997699131555777e-05, + "loss": 0.4113, "step": 1110 }, { - "epoch": 0.06, - "grad_norm": 0.6010430641256433, - "learning_rate": 1.9940194208031322e-05, - "loss": 0.3541, + "epoch": 0.05, + "grad_norm": 0.5147832923867899, + "learning_rate": 1.9976890328551633e-05, + "loss": 0.2491, "step": 1111 }, { - "epoch": 0.06, - "grad_norm": 0.5892069559400017, - "learning_rate": 1.9939990817862696e-05, - "loss": 0.3829, + "epoch": 0.05, + "grad_norm": 0.34001413591187396, + "learning_rate": 1.9976789120666763e-05, + "loss": 0.2075, "step": 1112 }, { - "epoch": 0.06, - "grad_norm": 0.39423776153888224, - "learning_rate": 1.9939787083472616e-05, - "loss": 0.3134, + "epoch": 0.05, + "grad_norm": 0.7650426852052872, + "learning_rate": 1.9976687691905394e-05, + "loss": 0.5617, "step": 1113 }, { - "epoch": 0.06, - "grad_norm": 0.3783860241639249, - "learning_rate": 1.993958300486814e-05, - "loss": 0.2465, + "epoch": 0.05, + "grad_norm": 0.4391616377403253, + "learning_rate": 1.9976586042269776e-05, + "loss": 0.2973, "step": 1114 }, { - "epoch": 0.06, - "grad_norm": 0.5857598729183383, - "learning_rate": 1.9939378582056332e-05, - "loss": 0.3123, + "epoch": 0.05, + "grad_norm": 0.6077550113941413, + "learning_rate": 1.9976484171762158e-05, + "loss": 0.345, "step": 1115 }, { - "epoch": 0.06, - "grad_norm": 1.451790292271993, - "learning_rate": 1.993917381504427e-05, - "loss": 0.8534, + "epoch": 0.05, + "grad_norm": 1.069603696377442, + "learning_rate": 1.9976382080384797e-05, + "loss": 0.7186, "step": 1116 }, { - "epoch": 0.06, - "grad_norm": 0.43312088945702426, - "learning_rate": 1.9938968703839045e-05, - "loss": 0.3252, + "epoch": 0.05, + "grad_norm": 0.46568385108253085, + "learning_rate": 1.9976279768139953e-05, + "loss": 0.2744, "step": 1117 }, { - "epoch": 0.06, - "grad_norm": 0.5774867354453822, - "learning_rate": 1.9938763248447762e-05, - "loss": 0.3247, + "epoch": 0.05, + "grad_norm": 0.46952148027976065, + "learning_rate": 1.997617723502989e-05, + "loss": 0.2935, "step": 1118 }, { - "epoch": 0.06, - "grad_norm": 0.5606158673409164, - "learning_rate": 1.9938557448877536e-05, - "loss": 0.3491, + "epoch": 0.05, + "grad_norm": 0.5630416726388018, + "learning_rate": 1.9976074481056874e-05, + "loss": 0.4171, "step": 1119 }, { - "epoch": 0.06, - "grad_norm": 0.3791222671011608, - "learning_rate": 1.9938351305135492e-05, - "loss": 0.235, + "epoch": 0.05, + "grad_norm": 0.47563302779439093, + "learning_rate": 1.9975971506223187e-05, + "loss": 0.2043, "step": 1120 }, { - "epoch": 0.06, - "grad_norm": 1.9555671938828716, - "learning_rate": 1.993814481722877e-05, - "loss": 0.8035, + "epoch": 0.05, + "grad_norm": 0.5720707343523462, + "learning_rate": 1.9975868310531107e-05, + "loss": 0.286, "step": 1121 }, { - "epoch": 0.06, - "grad_norm": 0.5317081120210266, - "learning_rate": 1.9937937985164518e-05, - "loss": 0.3285, + "epoch": 0.05, + "grad_norm": 0.49894132428643306, + "learning_rate": 1.9975764893982914e-05, + "loss": 0.3826, "step": 1122 }, { - "epoch": 0.06, - "grad_norm": 0.7753229601610528, - "learning_rate": 1.9937730808949905e-05, - "loss": 0.3667, + "epoch": 0.05, + "grad_norm": 1.3697330448786862, + "learning_rate": 1.9975661256580902e-05, + "loss": 0.7577, "step": 1123 }, { - "epoch": 0.06, - "grad_norm": 0.5828111764073042, - "learning_rate": 1.99375232885921e-05, - "loss": 0.4137, + "epoch": 0.05, + "grad_norm": 0.3984135182494554, + "learning_rate": 1.997555739832737e-05, + "loss": 0.2721, "step": 1124 }, { - "epoch": 0.06, - "grad_norm": 0.39500188850271334, - "learning_rate": 1.9937315424098288e-05, - "loss": 0.291, + "epoch": 0.05, + "grad_norm": 0.41592499396802457, + "learning_rate": 1.9975453319224604e-05, + "loss": 0.2977, "step": 1125 }, { - "epoch": 0.06, - "grad_norm": 0.41533959678148713, - "learning_rate": 1.9937107215475673e-05, - "loss": 0.2114, + "epoch": 0.05, + "grad_norm": 0.5216556970166795, + "learning_rate": 1.9975349019274918e-05, + "loss": 0.377, "step": 1126 }, { - "epoch": 0.06, - "grad_norm": 0.5307863364980268, - "learning_rate": 1.9936898662731463e-05, - "loss": 0.3774, + "epoch": 0.05, + "grad_norm": 0.5078698752888756, + "learning_rate": 1.9975244498480623e-05, + "loss": 0.2344, "step": 1127 }, { - "epoch": 0.06, - "grad_norm": 1.1478798383437205, - "learning_rate": 1.9936689765872878e-05, - "loss": 0.5261, + "epoch": 0.05, + "grad_norm": 1.4877637172474483, + "learning_rate": 1.9975139756844024e-05, + "loss": 0.7742, "step": 1128 }, { - "epoch": 0.06, - "grad_norm": 0.46423088674225926, - "learning_rate": 1.9936480524907154e-05, - "loss": 0.3142, + "epoch": 0.05, + "grad_norm": 0.6587749047373671, + "learning_rate": 1.9975034794367448e-05, + "loss": 0.469, "step": 1129 }, { - "epoch": 0.06, - "grad_norm": 0.4766471909599253, - "learning_rate": 1.9936270939841536e-05, - "loss": 0.3808, + "epoch": 0.05, + "grad_norm": 0.4371718169764839, + "learning_rate": 1.9974929611053214e-05, + "loss": 0.336, "step": 1130 }, { - "epoch": 0.06, - "grad_norm": 1.155229588534361, - "learning_rate": 1.9936061010683285e-05, - "loss": 0.5931, + "epoch": 0.05, + "grad_norm": 0.5209726642539527, + "learning_rate": 1.9974824206903657e-05, + "loss": 0.3851, "step": 1131 }, { - "epoch": 0.07, - "grad_norm": 0.3543521865584727, - "learning_rate": 1.9935850737439667e-05, - "loss": 0.134, + "epoch": 0.05, + "grad_norm": 0.45724511141227425, + "learning_rate": 1.99747185819211e-05, + "loss": 0.1378, "step": 1132 }, { - "epoch": 0.07, - "grad_norm": 0.4931860315991214, - "learning_rate": 1.9935640120117965e-05, - "loss": 0.3337, + "epoch": 0.05, + "grad_norm": 0.5862257872561927, + "learning_rate": 1.997461273610789e-05, + "loss": 0.3401, "step": 1133 }, { - "epoch": 0.07, - "grad_norm": 1.390135619937858, - "learning_rate": 1.9935429158725475e-05, - "loss": 0.8683, + "epoch": 0.05, + "grad_norm": 0.5599331181086553, + "learning_rate": 1.997450666946637e-05, + "loss": 0.3318, "step": 1134 }, { - "epoch": 0.07, - "grad_norm": 0.5975249328681871, - "learning_rate": 1.9935217853269497e-05, - "loss": 0.3176, + "epoch": 0.05, + "grad_norm": 0.7651768267156948, + "learning_rate": 1.9974400381998882e-05, + "loss": 0.5061, "step": 1135 }, { - "epoch": 0.07, - "grad_norm": 0.625255175254, - "learning_rate": 1.9935006203757354e-05, - "loss": 0.4517, + "epoch": 0.05, + "grad_norm": 0.5631345370123252, + "learning_rate": 1.9974293873707784e-05, + "loss": 0.3179, "step": 1136 }, { - "epoch": 0.07, - "grad_norm": 0.5236283609437692, - "learning_rate": 1.9934794210196374e-05, - "loss": 0.3806, + "epoch": 0.05, + "grad_norm": 0.45161314694438126, + "learning_rate": 1.9974187144595433e-05, + "loss": 0.2789, "step": 1137 }, { - "epoch": 0.07, - "grad_norm": 0.25258242792276325, - "learning_rate": 1.9934581872593893e-05, - "loss": 0.1451, + "epoch": 0.05, + "grad_norm": 0.4473631541633391, + "learning_rate": 1.9974080194664195e-05, + "loss": 0.2921, "step": 1138 }, { - "epoch": 0.07, - "grad_norm": 0.6981887311269585, - "learning_rate": 1.9934369190957275e-05, - "loss": 0.4007, + "epoch": 0.05, + "grad_norm": 0.553464902501885, + "learning_rate": 1.9973973023916428e-05, + "loss": 0.312, "step": 1139 }, { - "epoch": 0.07, - "grad_norm": 0.8433075775942684, - "learning_rate": 1.9934156165293878e-05, - "loss": 0.6068, + "epoch": 0.05, + "grad_norm": 1.0043129206370265, + "learning_rate": 1.9973865632354516e-05, + "loss": 0.5639, "step": 1140 }, { - "epoch": 0.07, - "grad_norm": 0.4589248024137913, - "learning_rate": 1.9933942795611075e-05, - "loss": 0.2664, + "epoch": 0.05, + "grad_norm": 0.9102169898636161, + "learning_rate": 1.997375801998083e-05, + "loss": 0.5519, "step": 1141 }, { - "epoch": 0.07, - "grad_norm": 0.5854364839569616, - "learning_rate": 1.9933729081916266e-05, - "loss": 0.4127, + "epoch": 0.05, + "grad_norm": 0.4778856945763343, + "learning_rate": 1.9973650186797756e-05, + "loss": 0.3003, "step": 1142 }, { - "epoch": 0.07, - "grad_norm": 1.1202946340701216, - "learning_rate": 1.9933515024216844e-05, - "loss": 0.6557, + "epoch": 0.05, + "grad_norm": 0.6506466136706037, + "learning_rate": 1.997354213280768e-05, + "loss": 0.4795, "step": 1143 }, { - "epoch": 0.07, - "grad_norm": 0.32203550195598013, - "learning_rate": 1.9933300622520225e-05, - "loss": 0.1755, + "epoch": 0.05, + "grad_norm": 0.29929405489103755, + "learning_rate": 1.9973433858012992e-05, + "loss": 0.1139, "step": 1144 }, { - "epoch": 0.07, - "grad_norm": 0.47969552743697114, - "learning_rate": 1.9933085876833833e-05, - "loss": 0.2807, + "epoch": 0.05, + "grad_norm": 0.5257793408683905, + "learning_rate": 1.9973325362416093e-05, + "loss": 0.3036, "step": 1145 }, { - "epoch": 0.07, - "grad_norm": 1.0268504627365178, - "learning_rate": 1.99328707871651e-05, - "loss": 0.6002, + "epoch": 0.05, + "grad_norm": 0.5383563349492909, + "learning_rate": 1.997321664601938e-05, + "loss": 0.3935, "step": 1146 }, { - "epoch": 0.07, - "grad_norm": 0.6890581144690542, - "learning_rate": 1.9932655353521483e-05, - "loss": 0.4666, + "epoch": 0.05, + "grad_norm": 1.2905773133172835, + "learning_rate": 1.9973107708825264e-05, + "loss": 0.4963, "step": 1147 }, { - "epoch": 0.07, - "grad_norm": 0.5003936995109405, - "learning_rate": 1.9932439575910436e-05, - "loss": 0.2906, + "epoch": 0.05, + "grad_norm": 0.49895310084793865, + "learning_rate": 1.9972998550836155e-05, + "loss": 0.3061, "step": 1148 }, { - "epoch": 0.07, - "grad_norm": 0.5008695250916106, - "learning_rate": 1.9932223454339435e-05, - "loss": 0.3429, + "epoch": 0.05, + "grad_norm": 1.5504762430691623, + "learning_rate": 1.9972889172054472e-05, + "loss": 0.6912, "step": 1149 }, { - "epoch": 0.07, - "grad_norm": 0.43940406343985966, - "learning_rate": 1.993200698881596e-05, - "loss": 0.2448, + "epoch": 0.05, + "grad_norm": 0.34121759701594295, + "learning_rate": 1.9972779572482636e-05, + "loss": 0.1934, "step": 1150 }, { - "epoch": 0.07, - "grad_norm": 0.37846887106776983, - "learning_rate": 1.9931790179347514e-05, - "loss": 0.2156, + "epoch": 0.05, + "grad_norm": 0.5769537082363037, + "learning_rate": 1.997266975212307e-05, + "loss": 0.3179, "step": 1151 }, { - "epoch": 0.07, - "grad_norm": 1.9187364850397228, - "learning_rate": 1.99315730259416e-05, - "loss": 0.6074, + "epoch": 0.05, + "grad_norm": 1.3138973019842257, + "learning_rate": 1.9972559710978206e-05, + "loss": 0.6918, "step": 1152 }, { - "epoch": 0.07, - "grad_norm": 0.4227557946650834, - "learning_rate": 1.9931355528605738e-05, - "loss": 0.3318, + "epoch": 0.05, + "grad_norm": 0.6634364913490587, + "learning_rate": 1.997244944905048e-05, + "loss": 0.3939, "step": 1153 }, { - "epoch": 0.07, - "grad_norm": 0.6158280113417645, - "learning_rate": 1.993113768734746e-05, - "loss": 0.3654, + "epoch": 0.05, + "grad_norm": 0.5236383855268214, + "learning_rate": 1.9972338966342338e-05, + "loss": 0.3066, "step": 1154 }, { - "epoch": 0.07, - "grad_norm": 1.1925768363660976, - "learning_rate": 1.9930919502174312e-05, - "loss": 0.4372, + "epoch": 0.05, + "grad_norm": 0.7119741323801098, + "learning_rate": 1.9972228262856223e-05, + "loss": 0.4269, "step": 1155 }, { - "epoch": 0.07, - "grad_norm": 0.2978244213918772, - "learning_rate": 1.993070097309385e-05, - "loss": 0.1655, + "epoch": 0.05, + "grad_norm": 0.4093004517259618, + "learning_rate": 1.9972117338594585e-05, + "loss": 0.2031, "step": 1156 }, { - "epoch": 0.07, - "grad_norm": 0.4938521213731747, - "learning_rate": 1.9930482100113642e-05, - "loss": 0.3306, + "epoch": 0.05, + "grad_norm": 0.5096893494305453, + "learning_rate": 1.997200619355988e-05, + "loss": 0.2303, "step": 1157 }, { - "epoch": 0.07, - "grad_norm": 1.026122384333359, - "learning_rate": 1.9930262883241265e-05, - "loss": 0.4183, + "epoch": 0.05, + "grad_norm": 0.5790033442524483, + "learning_rate": 1.9971894827754574e-05, + "loss": 0.3883, "step": 1158 }, { - "epoch": 0.07, - "grad_norm": 0.9371568067117475, - "learning_rate": 1.9930043322484306e-05, - "loss": 0.552, + "epoch": 0.05, + "grad_norm": 1.329675028815536, + "learning_rate": 1.997178324118112e-05, + "loss": 0.5448, "step": 1159 }, { - "epoch": 0.07, - "grad_norm": 0.5631135620050378, - "learning_rate": 1.992982341785038e-05, - "loss": 0.3326, + "epoch": 0.05, + "grad_norm": 0.46028544540181093, + "learning_rate": 1.9971671433842e-05, + "loss": 0.2487, "step": 1160 }, { - "epoch": 0.07, - "grad_norm": 0.5588638465425702, - "learning_rate": 1.9929603169347095e-05, - "loss": 0.3178, + "epoch": 0.05, + "grad_norm": 1.1316403843781724, + "learning_rate": 1.9971559405739683e-05, + "loss": 0.6383, "step": 1161 }, { - "epoch": 0.07, - "grad_norm": 1.096666482734466, - "learning_rate": 1.9929382576982076e-05, - "loss": 0.4737, + "epoch": 0.05, + "grad_norm": 0.5483716526744153, + "learning_rate": 1.9971447156876653e-05, + "loss": 0.3645, "step": 1162 }, { - "epoch": 0.07, - "grad_norm": 0.3927720395948921, - "learning_rate": 1.9929161640762968e-05, - "loss": 0.281, + "epoch": 0.05, + "grad_norm": 0.3739125651290316, + "learning_rate": 1.9971334687255394e-05, + "loss": 0.188, "step": 1163 }, { - "epoch": 0.07, - "grad_norm": 0.7510863349671362, - "learning_rate": 1.992894036069742e-05, - "loss": 0.5086, + "epoch": 0.05, + "grad_norm": 1.2154986854383885, + "learning_rate": 1.9971221996878395e-05, + "loss": 0.5902, "step": 1164 }, { - "epoch": 0.07, - "grad_norm": 0.6070472615776253, - "learning_rate": 1.992871873679309e-05, - "loss": 0.3358, + "epoch": 0.05, + "grad_norm": 0.7677637238380506, + "learning_rate": 1.997110908574815e-05, + "loss": 0.4656, "step": 1165 }, { - "epoch": 0.07, - "grad_norm": 0.4624380279404961, - "learning_rate": 1.9928496769057662e-05, - "loss": 0.2936, + "epoch": 0.05, + "grad_norm": 0.3686923177327155, + "learning_rate": 1.9970995953867162e-05, + "loss": 0.2229, "step": 1166 }, { - "epoch": 0.07, - "grad_norm": 2.0190631314339442, - "learning_rate": 1.9928274457498818e-05, - "loss": 0.7872, + "epoch": 0.05, + "grad_norm": 2.5245175932036164, + "learning_rate": 1.997088260123793e-05, + "loss": 0.6066, "step": 1167 }, { - "epoch": 0.07, - "grad_norm": 0.4957965334493548, - "learning_rate": 1.9928051802124252e-05, - "loss": 0.3326, + "epoch": 0.05, + "grad_norm": 0.6489208359182874, + "learning_rate": 1.9970769027862973e-05, + "loss": 0.3428, "step": 1168 }, { - "epoch": 0.07, - "grad_norm": 0.47560370552216386, - "learning_rate": 1.9927828802941683e-05, - "loss": 0.3471, + "epoch": 0.05, + "grad_norm": 0.5311720896746537, + "learning_rate": 1.9970655233744795e-05, + "loss": 0.3121, "step": 1169 }, { - "epoch": 0.07, - "grad_norm": 0.5550644991870759, - "learning_rate": 1.9927605459958825e-05, - "loss": 0.3929, + "epoch": 0.05, + "grad_norm": 0.690687377660559, + "learning_rate": 1.9970541218885924e-05, + "loss": 0.3616, "step": 1170 }, { - "epoch": 0.07, - "grad_norm": 0.4000080669193282, - "learning_rate": 1.992738177318342e-05, - "loss": 0.1703, + "epoch": 0.05, + "grad_norm": 0.7520060512569802, + "learning_rate": 1.997042698328888e-05, + "loss": 0.3526, "step": 1171 }, { - "epoch": 0.07, - "grad_norm": 0.6353595781264442, - "learning_rate": 1.992715774262321e-05, - "loss": 0.3254, + "epoch": 0.05, + "grad_norm": 0.5684629660274684, + "learning_rate": 1.997031252695619e-05, + "loss": 0.2889, "step": 1172 }, { - "epoch": 0.07, - "grad_norm": 0.5679976742869749, - "learning_rate": 1.992693336828596e-05, - "loss": 0.3704, + "epoch": 0.05, + "grad_norm": 0.7526467351528564, + "learning_rate": 1.9970197849890388e-05, + "loss": 0.3293, "step": 1173 }, { - "epoch": 0.07, - "grad_norm": 0.5135060035524516, - "learning_rate": 1.9926708650179426e-05, - "loss": 0.2752, + "epoch": 0.05, + "grad_norm": 0.5315589654161116, + "learning_rate": 1.997008295209402e-05, + "loss": 0.3593, "step": 1174 }, { - "epoch": 0.07, - "grad_norm": 0.5082286635920554, - "learning_rate": 1.9926483588311402e-05, - "loss": 0.3722, + "epoch": 0.05, + "grad_norm": 0.5537541499354507, + "learning_rate": 1.996996783356962e-05, + "loss": 0.3049, "step": 1175 }, { - "epoch": 0.07, - "grad_norm": 0.7864428636362328, - "learning_rate": 1.9926258182689677e-05, - "loss": 0.5978, + "epoch": 0.05, + "grad_norm": 0.8059848374189049, + "learning_rate": 1.9969852494319743e-05, + "loss": 0.4338, "step": 1176 }, { - "epoch": 0.07, - "grad_norm": 0.4977399864736148, - "learning_rate": 1.992603243332206e-05, - "loss": 0.2923, + "epoch": 0.05, + "grad_norm": 0.639588083045552, + "learning_rate": 1.9969736934346945e-05, + "loss": 0.2725, "step": 1177 }, { - "epoch": 0.07, - "grad_norm": 0.3823089546041058, - "learning_rate": 1.9925806340216365e-05, - "loss": 0.2497, + "epoch": 0.05, + "grad_norm": 0.46054505164548587, + "learning_rate": 1.9969621153653772e-05, + "loss": 0.31, "step": 1178 }, { - "epoch": 0.07, - "grad_norm": 0.5603124203509675, - "learning_rate": 1.9925579903380425e-05, - "loss": 0.3242, + "epoch": 0.05, + "grad_norm": 0.43197130870603695, + "learning_rate": 1.9969505152242805e-05, + "loss": 0.2088, "step": 1179 }, { - "epoch": 0.07, - "grad_norm": 0.4985678408576438, - "learning_rate": 1.9925353122822077e-05, - "loss": 0.4399, + "epoch": 0.05, + "grad_norm": 1.2127338665901193, + "learning_rate": 1.9969388930116596e-05, + "loss": 0.6592, "step": 1180 }, { - "epoch": 0.07, - "grad_norm": 0.37733182664093273, - "learning_rate": 1.992512599854918e-05, - "loss": 0.269, + "epoch": 0.05, + "grad_norm": 0.46409025597188236, + "learning_rate": 1.9969272487277728e-05, + "loss": 0.3495, "step": 1181 }, { - "epoch": 0.07, - "grad_norm": 1.1088873935769934, - "learning_rate": 1.9924898530569594e-05, - "loss": 0.6713, + "epoch": 0.05, + "grad_norm": 0.44776772173672413, + "learning_rate": 1.9969155823728772e-05, + "loss": 0.3453, "step": 1182 }, { - "epoch": 0.07, - "grad_norm": 0.5461847080526667, - "learning_rate": 1.99246707188912e-05, - "loss": 0.3439, + "epoch": 0.05, + "grad_norm": 0.4168916081093098, + "learning_rate": 1.9969038939472315e-05, + "loss": 0.1991, "step": 1183 }, { - "epoch": 0.07, - "grad_norm": 0.3354047490782291, - "learning_rate": 1.9924442563521885e-05, - "loss": 0.1924, + "epoch": 0.05, + "grad_norm": 0.3811016333317376, + "learning_rate": 1.996892183451095e-05, + "loss": 0.2652, "step": 1184 }, { - "epoch": 0.07, - "grad_norm": 0.5354214457370254, - "learning_rate": 1.992421406446955e-05, - "loss": 0.3413, + "epoch": 0.05, + "grad_norm": 1.3337985420000187, + "learning_rate": 1.9968804508847256e-05, + "loss": 0.6179, "step": 1185 }, { - "epoch": 0.07, - "grad_norm": 0.9941015836479209, - "learning_rate": 1.9923985221742112e-05, - "loss": 0.5636, + "epoch": 0.05, + "grad_norm": 0.5655449980818164, + "learning_rate": 1.9968686962483842e-05, + "loss": 0.3613, "step": 1186 }, { - "epoch": 0.07, - "grad_norm": 0.4010314605169855, - "learning_rate": 1.992375603534749e-05, - "loss": 0.2394, + "epoch": 0.05, + "grad_norm": 0.5902771393544244, + "learning_rate": 1.9968569195423307e-05, + "loss": 0.3264, "step": 1187 }, { - "epoch": 0.07, - "grad_norm": 1.578704678511741, - "learning_rate": 1.9923526505293623e-05, - "loss": 0.844, + "epoch": 0.05, + "grad_norm": 1.1272976638182726, + "learning_rate": 1.9968451207668257e-05, + "loss": 0.6194, "step": 1188 }, { - "epoch": 0.07, - "grad_norm": 0.5145164289755612, - "learning_rate": 1.9923296631588462e-05, - "loss": 0.3961, + "epoch": 0.05, + "grad_norm": 0.5389020560484016, + "learning_rate": 1.9968332999221306e-05, + "loss": 0.3242, "step": 1189 }, { - "epoch": 0.07, - "grad_norm": 0.42736151507104164, - "learning_rate": 1.9923066414239965e-05, - "loss": 0.2347, + "epoch": 0.05, + "grad_norm": 0.5691163887433358, + "learning_rate": 1.996821457008507e-05, + "loss": 0.1716, "step": 1190 }, { - "epoch": 0.07, - "grad_norm": 0.39926855460341876, - "learning_rate": 1.9922835853256103e-05, - "loss": 0.2363, + "epoch": 0.05, + "grad_norm": 0.44446067140285683, + "learning_rate": 1.996809592026217e-05, + "loss": 0.2887, "step": 1191 }, { - "epoch": 0.07, - "grad_norm": 0.6873331025968474, - "learning_rate": 1.9922604948644865e-05, - "loss": 0.4264, + "epoch": 0.05, + "grad_norm": 1.0470164185527266, + "learning_rate": 1.9967977049755233e-05, + "loss": 0.4804, "step": 1192 }, { - "epoch": 0.07, - "grad_norm": 0.5172055478516355, - "learning_rate": 1.992237370041424e-05, - "loss": 0.3225, + "epoch": 0.05, + "grad_norm": 0.5735372632845884, + "learning_rate": 1.9967857958566893e-05, + "loss": 0.3033, "step": 1193 }, { - "epoch": 0.07, - "grad_norm": 0.5461826946326311, - "learning_rate": 1.9922142108572245e-05, - "loss": 0.3776, + "epoch": 0.05, + "grad_norm": 0.4896150687434479, + "learning_rate": 1.996773864669978e-05, + "loss": 0.3534, "step": 1194 }, { - "epoch": 0.07, - "grad_norm": 0.8171086797305573, - "learning_rate": 1.9921910173126894e-05, - "loss": 0.545, + "epoch": 0.05, + "grad_norm": 1.436543974309954, + "learning_rate": 1.996761911415655e-05, + "loss": 0.7047, "step": 1195 }, { - "epoch": 0.07, - "grad_norm": 0.545195871554347, - "learning_rate": 1.9921677894086217e-05, - "loss": 0.3055, + "epoch": 0.05, + "grad_norm": 0.4051021454899342, + "learning_rate": 1.9967499360939833e-05, + "loss": 0.1719, "step": 1196 }, { - "epoch": 0.07, - "grad_norm": 0.296707240427443, - "learning_rate": 1.9921445271458263e-05, - "loss": 0.2576, + "epoch": 0.05, + "grad_norm": 0.49399555124374717, + "learning_rate": 1.996737938705229e-05, + "loss": 0.3191, "step": 1197 }, { - "epoch": 0.07, - "grad_norm": 1.0695270171865263, - "learning_rate": 1.992121230525109e-05, - "loss": 0.5985, + "epoch": 0.06, + "grad_norm": 0.6825799557939733, + "learning_rate": 1.996725919249657e-05, + "loss": 0.4338, "step": 1198 }, { - "epoch": 0.07, - "grad_norm": 0.5373484107110249, - "learning_rate": 1.992097899547276e-05, - "loss": 0.3029, + "epoch": 0.06, + "grad_norm": 0.5032434668565547, + "learning_rate": 1.996713877727534e-05, + "loss": 0.3081, "step": 1199 }, { - "epoch": 0.07, - "grad_norm": 0.521812372352844, - "learning_rate": 1.992074534213135e-05, - "loss": 0.3079, + "epoch": 0.06, + "grad_norm": 0.8112355102822437, + "learning_rate": 1.9967018141391262e-05, + "loss": 0.4496, "step": 1200 }, { - "epoch": 0.07, - "grad_norm": 1.3838297456653332, - "learning_rate": 1.9920511345234956e-05, - "loss": 0.7985, + "epoch": 0.06, + "grad_norm": 0.7108022616565274, + "learning_rate": 1.996689728484701e-05, + "loss": 0.4164, "step": 1201 }, { - "epoch": 0.07, - "grad_norm": 0.48375303096121197, - "learning_rate": 1.9920277004791682e-05, - "loss": 0.3086, + "epoch": 0.06, + "grad_norm": 0.3522073846522065, + "learning_rate": 1.9966776207645263e-05, + "loss": 0.2108, "step": 1202 }, { - "epoch": 0.07, - "grad_norm": 0.552106262015009, - "learning_rate": 1.992004232080964e-05, - "loss": 0.3058, + "epoch": 0.06, + "grad_norm": 0.45129365969278645, + "learning_rate": 1.996665490978869e-05, + "loss": 0.2648, "step": 1203 }, { - "epoch": 0.07, - "grad_norm": 0.5531874559226082, - "learning_rate": 1.9919807293296963e-05, - "loss": 0.3231, + "epoch": 0.06, + "grad_norm": 0.8470233341936348, + "learning_rate": 1.9966533391279988e-05, + "loss": 0.5805, "step": 1204 }, { - "epoch": 0.07, - "grad_norm": 0.5455190994044464, - "learning_rate": 1.9919571922261784e-05, - "loss": 0.3043, + "epoch": 0.06, + "grad_norm": 0.5153136741122437, + "learning_rate": 1.996641165212184e-05, + "loss": 0.2479, "step": 1205 }, { - "epoch": 0.07, - "grad_norm": 1.2944476032556527, - "learning_rate": 1.9919336207712258e-05, - "loss": 0.7407, + "epoch": 0.06, + "grad_norm": 0.5738378417603217, + "learning_rate": 1.9966289692316944e-05, + "loss": 0.3532, "step": 1206 }, { - "epoch": 0.07, - "grad_norm": 0.9246456693780484, - "learning_rate": 1.991910014965654e-05, - "loss": 0.5139, + "epoch": 0.06, + "grad_norm": 1.2502564440994315, + "learning_rate": 1.9966167511868005e-05, + "loss": 0.7154, "step": 1207 }, { - "epoch": 0.07, - "grad_norm": 0.5754654948616781, - "learning_rate": 1.9918863748102818e-05, - "loss": 0.2991, + "epoch": 0.06, + "grad_norm": 0.5143350617149302, + "learning_rate": 1.9966045110777718e-05, + "loss": 0.2714, "step": 1208 }, { - "epoch": 0.07, - "grad_norm": 0.5043664025461552, - "learning_rate": 1.9918627003059266e-05, - "loss": 0.3907, + "epoch": 0.06, + "grad_norm": 0.3699257819047486, + "learning_rate": 1.9965922489048795e-05, + "loss": 0.252, "step": 1209 }, { - "epoch": 0.07, - "grad_norm": 0.3706019964753908, - "learning_rate": 1.9918389914534086e-05, - "loss": 0.2143, + "epoch": 0.06, + "grad_norm": 0.6633411906644198, + "learning_rate": 1.9965799646683958e-05, + "loss": 0.4369, "step": 1210 }, { - "epoch": 0.07, - "grad_norm": 0.5300152129471692, - "learning_rate": 1.9918152482535494e-05, - "loss": 0.3016, + "epoch": 0.06, + "grad_norm": 1.1584443332841368, + "learning_rate": 1.996567658368592e-05, + "loss": 0.5005, "step": 1211 }, { - "epoch": 0.07, - "grad_norm": 0.5248817577305702, - "learning_rate": 1.9917914707071703e-05, - "loss": 0.4038, + "epoch": 0.06, + "grad_norm": 0.515885224793534, + "learning_rate": 1.996555330005741e-05, + "loss": 0.2788, "step": 1212 }, { - "epoch": 0.07, - "grad_norm": 0.617387402551657, - "learning_rate": 1.991767658815096e-05, - "loss": 0.3454, + "epoch": 0.06, + "grad_norm": 0.5443877693120539, + "learning_rate": 1.9965429795801152e-05, + "loss": 0.3646, "step": 1213 }, { - "epoch": 0.07, - "grad_norm": 0.4336728307512205, - "learning_rate": 1.9917438125781497e-05, - "loss": 0.2924, + "epoch": 0.06, + "grad_norm": 0.8790145860331129, + "learning_rate": 1.9965306070919883e-05, + "loss": 0.3856, "step": 1214 }, { - "epoch": 0.07, - "grad_norm": 0.72894687779715, - "learning_rate": 1.991719931997158e-05, - "loss": 0.5171, + "epoch": 0.06, + "grad_norm": 0.38804465056044746, + "learning_rate": 1.9965182125416344e-05, + "loss": 0.2972, "step": 1215 }, { - "epoch": 0.07, - "grad_norm": 0.3394246258776419, - "learning_rate": 1.9916960170729475e-05, - "loss": 0.2009, + "epoch": 0.06, + "grad_norm": 0.5951957334833682, + "learning_rate": 1.9965057959293275e-05, + "loss": 0.3267, "step": 1216 }, { - "epoch": 0.07, - "grad_norm": 0.44296100608782896, - "learning_rate": 1.9916720678063467e-05, - "loss": 0.3119, + "epoch": 0.06, + "grad_norm": 0.5252805011592172, + "learning_rate": 1.9964933572553428e-05, + "loss": 0.3398, "step": 1217 }, { - "epoch": 0.07, - "grad_norm": 1.2136487371261702, - "learning_rate": 1.9916480841981853e-05, - "loss": 0.6634, + "epoch": 0.06, + "grad_norm": 0.5157599734716378, + "learning_rate": 1.9964808965199557e-05, + "loss": 0.3381, "step": 1218 }, { - "epoch": 0.07, - "grad_norm": 0.9793411528882585, - "learning_rate": 1.991624066249293e-05, - "loss": 0.6157, + "epoch": 0.06, + "grad_norm": 1.1431014920552056, + "learning_rate": 1.996468413723442e-05, + "loss": 0.5341, "step": 1219 }, { - "epoch": 0.07, - "grad_norm": 0.3788574100278588, - "learning_rate": 1.9916000139605013e-05, - "loss": 0.2773, + "epoch": 0.06, + "grad_norm": 0.5841309783027583, + "learning_rate": 1.996455908866078e-05, + "loss": 0.4152, "step": 1220 }, { - "epoch": 0.07, - "grad_norm": 0.5509266148859141, - "learning_rate": 1.9915759273326447e-05, - "loss": 0.4402, + "epoch": 0.06, + "grad_norm": 0.5453515610399624, + "learning_rate": 1.9964433819481407e-05, + "loss": 0.3707, "step": 1221 }, { - "epoch": 0.07, - "grad_norm": 0.35834376031323334, - "learning_rate": 1.9915518063665556e-05, - "loss": 0.1633, + "epoch": 0.06, + "grad_norm": 0.3520302625232259, + "learning_rate": 1.996430832969907e-05, + "loss": 0.2273, "step": 1222 }, { - "epoch": 0.07, - "grad_norm": 0.4119437399996039, - "learning_rate": 1.991527651063071e-05, - "loss": 0.2417, + "epoch": 0.06, + "grad_norm": 0.7856306784416885, + "learning_rate": 1.996418261931655e-05, + "loss": 0.3456, "step": 1223 }, { - "epoch": 0.07, - "grad_norm": 0.6280447549338211, - "learning_rate": 1.9915034614230256e-05, - "loss": 0.4287, + "epoch": 0.06, + "grad_norm": 0.5934987523065206, + "learning_rate": 1.9964056688336636e-05, + "loss": 0.3311, "step": 1224 }, { - "epoch": 0.07, - "grad_norm": 0.5800209981404687, - "learning_rate": 1.9914792374472584e-05, - "loss": 0.4533, + "epoch": 0.06, + "grad_norm": 0.527845873764024, + "learning_rate": 1.9963930536762105e-05, + "loss": 0.3184, "step": 1225 }, { - "epoch": 0.07, - "grad_norm": 0.43318599109249784, - "learning_rate": 1.991454979136608e-05, - "loss": 0.262, + "epoch": 0.06, + "grad_norm": 0.8184956370164702, + "learning_rate": 1.996380416459576e-05, + "loss": 0.475, "step": 1226 }, { - "epoch": 0.07, - "grad_norm": 0.8602780715330212, - "learning_rate": 1.991430686491914e-05, - "loss": 0.5644, + "epoch": 0.06, + "grad_norm": 0.4663782765782296, + "learning_rate": 1.9963677571840386e-05, + "loss": 0.3588, "step": 1227 }, { - "epoch": 0.07, - "grad_norm": 0.37876834389922714, - "learning_rate": 1.9914063595140184e-05, - "loss": 0.3045, + "epoch": 0.06, + "grad_norm": 0.3800536653508172, + "learning_rate": 1.99635507584988e-05, + "loss": 0.1972, "step": 1228 }, { - "epoch": 0.07, - "grad_norm": 0.41026526676860525, - "learning_rate": 1.9913819982037627e-05, - "loss": 0.2101, + "epoch": 0.06, + "grad_norm": 0.5452266758475576, + "learning_rate": 1.99634237245738e-05, + "loss": 0.3563, "step": 1229 }, { - "epoch": 0.07, - "grad_norm": 0.5216085347059072, - "learning_rate": 1.991357602561991e-05, - "loss": 0.2894, + "epoch": 0.06, + "grad_norm": 0.5494057860457114, + "learning_rate": 1.9963296470068207e-05, + "loss": 0.3135, "step": 1230 }, { - "epoch": 0.07, - "grad_norm": 0.9685343917475587, - "learning_rate": 1.9913331725895485e-05, - "loss": 0.5665, + "epoch": 0.06, + "grad_norm": 0.9303041072959958, + "learning_rate": 1.9963168994984827e-05, + "loss": 0.5988, "step": 1231 }, { - "epoch": 0.07, - "grad_norm": 0.49616526614313705, - "learning_rate": 1.9913087082872808e-05, - "loss": 0.3046, + "epoch": 0.06, + "grad_norm": 0.6055607481087258, + "learning_rate": 1.9963041299326492e-05, + "loss": 0.4, "step": 1232 }, { - "epoch": 0.07, - "grad_norm": 0.40009900721312874, - "learning_rate": 1.9912842096560348e-05, - "loss": 0.2986, + "epoch": 0.06, + "grad_norm": 0.47677557574217383, + "learning_rate": 1.9962913383096025e-05, + "loss": 0.3105, "step": 1233 }, { - "epoch": 0.07, - "grad_norm": 0.6011999434725922, - "learning_rate": 1.9912596766966598e-05, - "loss": 0.3332, + "epoch": 0.06, + "grad_norm": 0.7722442172118236, + "learning_rate": 1.9962785246296253e-05, + "loss": 0.414, "step": 1234 }, { - "epoch": 0.07, - "grad_norm": 0.3754282911061259, - "learning_rate": 1.9912351094100043e-05, - "loss": 0.2408, + "epoch": 0.06, + "grad_norm": 0.33160680602751264, + "learning_rate": 1.9962656888930023e-05, + "loss": 0.1252, "step": 1235 }, { - "epoch": 0.07, - "grad_norm": 0.4790995862974184, - "learning_rate": 1.99121050779692e-05, - "loss": 0.313, + "epoch": 0.06, + "grad_norm": 0.5972832031786504, + "learning_rate": 1.9962528311000172e-05, + "loss": 0.3753, "step": 1236 }, { - "epoch": 0.07, - "grad_norm": 0.9519799944694786, - "learning_rate": 1.9911858718582583e-05, - "loss": 0.5728, + "epoch": 0.06, + "grad_norm": 0.5168366680825283, + "learning_rate": 1.9962399512509544e-05, + "loss": 0.3595, "step": 1237 }, { - "epoch": 0.07, - "grad_norm": 0.48524535965068866, - "learning_rate": 1.9911612015948726e-05, - "loss": 0.3642, + "epoch": 0.06, + "grad_norm": 1.2684477070233244, + "learning_rate": 1.996227049346099e-05, + "loss": 0.4019, "step": 1238 }, { - "epoch": 0.07, - "grad_norm": 0.6463784806450834, - "learning_rate": 1.9911364970076167e-05, - "loss": 0.2903, + "epoch": 0.06, + "grad_norm": 0.6127849174333978, + "learning_rate": 1.9962141253857376e-05, + "loss": 0.3589, "step": 1239 }, { - "epoch": 0.07, - "grad_norm": 0.5487653396668991, - "learning_rate": 1.9911117580973468e-05, - "loss": 0.3872, + "epoch": 0.06, + "grad_norm": 0.5002453328543703, + "learning_rate": 1.996201179370155e-05, + "loss": 0.2551, "step": 1240 }, { - "epoch": 0.07, - "grad_norm": 0.28116441246936236, - "learning_rate": 1.9910869848649192e-05, - "loss": 0.1866, + "epoch": 0.06, + "grad_norm": 0.39307595653253935, + "learning_rate": 1.9961882112996387e-05, + "loss": 0.2185, "step": 1241 }, { - "epoch": 0.07, - "grad_norm": 0.7251262440666021, - "learning_rate": 1.991062177311192e-05, - "loss": 0.4662, + "epoch": 0.06, + "grad_norm": 0.6800073989911231, + "learning_rate": 1.9961752211744753e-05, + "loss": 0.347, "step": 1242 }, { - "epoch": 0.07, - "grad_norm": 1.4629712657903426, - "learning_rate": 1.9910373354370245e-05, - "loss": 0.504, + "epoch": 0.06, + "grad_norm": 1.7181547448702197, + "learning_rate": 1.996162208994953e-05, + "loss": 0.6094, "step": 1243 }, { - "epoch": 0.07, - "grad_norm": 0.4424282521343179, - "learning_rate": 1.9910124592432763e-05, - "loss": 0.3082, + "epoch": 0.06, + "grad_norm": 1.096282627015062, + "learning_rate": 1.9961491747613594e-05, + "loss": 0.5667, "step": 1244 }, { - "epoch": 0.07, - "grad_norm": 0.7088073521939233, - "learning_rate": 1.9909875487308096e-05, - "loss": 0.4297, + "epoch": 0.06, + "grad_norm": 0.478931563960749, + "learning_rate": 1.9961361184739832e-05, + "loss": 0.2532, "step": 1245 }, { - "epoch": 0.07, - "grad_norm": 0.45330422783598706, - "learning_rate": 1.9909626039004862e-05, - "loss": 0.1286, + "epoch": 0.06, + "grad_norm": 0.5158876102855414, + "learning_rate": 1.9961230401331132e-05, + "loss": 0.2678, "step": 1246 }, { - "epoch": 0.07, - "grad_norm": 0.6475879211850505, - "learning_rate": 1.990937624753171e-05, - "loss": 0.3348, + "epoch": 0.06, + "grad_norm": 0.7717896304223325, + "learning_rate": 1.9961099397390395e-05, + "loss": 0.2956, "step": 1247 }, { - "epoch": 0.07, - "grad_norm": 0.5626112796455328, - "learning_rate": 1.990912611289728e-05, - "loss": 0.3295, + "epoch": 0.06, + "grad_norm": 0.7745740952118951, + "learning_rate": 1.9960968172920516e-05, + "loss": 0.2651, "step": 1248 }, { - "epoch": 0.07, - "grad_norm": 2.169279671276457, - "learning_rate": 1.9908875635110244e-05, - "loss": 0.5194, + "epoch": 0.06, + "grad_norm": 1.3475506089916525, + "learning_rate": 1.9960836727924403e-05, + "loss": 0.4069, "step": 1249 }, { - "epoch": 0.07, - "grad_norm": 0.7257332887197935, - "learning_rate": 1.990862481417927e-05, - "loss": 0.4001, + "epoch": 0.06, + "grad_norm": 2.3245086561547255, + "learning_rate": 1.9960705062404963e-05, + "loss": 0.6258, "step": 1250 }, { - "epoch": 0.07, - "grad_norm": 0.6408089696432202, - "learning_rate": 1.990837365011304e-05, - "loss": 0.3905, + "epoch": 0.06, + "grad_norm": 0.43870123509861536, + "learning_rate": 1.9960573176365114e-05, + "loss": 0.2467, "step": 1251 }, { - "epoch": 0.07, - "grad_norm": 0.628381425763534, - "learning_rate": 1.9908122142920262e-05, - "loss": 0.2872, + "epoch": 0.06, + "grad_norm": 1.3441387246331329, + "learning_rate": 1.9960441069807778e-05, + "loss": 0.6879, "step": 1252 }, { - "epoch": 0.07, - "grad_norm": 0.393935167434499, - "learning_rate": 1.9907870292609633e-05, - "loss": 0.1497, + "epoch": 0.06, + "grad_norm": 0.4774527582591661, + "learning_rate": 1.996030874273587e-05, + "loss": 0.2664, "step": 1253 }, { - "epoch": 0.07, - "grad_norm": 1.3577178025888306, - "learning_rate": 1.990761809918989e-05, - "loss": 0.4176, + "epoch": 0.06, + "grad_norm": 0.8850089900335283, + "learning_rate": 1.9960176195152336e-05, + "loss": 0.2881, "step": 1254 }, { - "epoch": 0.07, - "grad_norm": 2.0334997177234264, - "learning_rate": 1.9907365562669753e-05, - "loss": 0.6454, + "epoch": 0.06, + "grad_norm": 2.5999705037944874, + "learning_rate": 1.9960043427060093e-05, + "loss": 0.6018, "step": 1255 }, { - "epoch": 0.07, - "grad_norm": 0.4657792094873402, - "learning_rate": 1.9907112683057974e-05, - "loss": 0.2523, + "epoch": 0.06, + "grad_norm": 1.3197289026862369, + "learning_rate": 1.995991043846209e-05, + "loss": 0.4981, "step": 1256 }, { - "epoch": 0.07, - "grad_norm": 0.6638005020345322, - "learning_rate": 1.9906859460363307e-05, - "loss": 0.4164, + "epoch": 0.06, + "grad_norm": 0.568404623518023, + "learning_rate": 1.995977722936127e-05, + "loss": 0.3411, "step": 1257 }, { - "epoch": 0.07, - "grad_norm": 1.2400648252392563, - "learning_rate": 1.9906605894594525e-05, - "loss": 0.6687, + "epoch": 0.06, + "grad_norm": 0.7496475426077246, + "learning_rate": 1.9959643799760583e-05, + "loss": 0.3459, "step": 1258 }, { - "epoch": 0.07, - "grad_norm": 0.5219896184442447, - "learning_rate": 1.990635198576041e-05, - "loss": 0.2149, + "epoch": 0.06, + "grad_norm": 0.5640299892939787, + "learning_rate": 1.9959510149662978e-05, + "loss": 0.2973, "step": 1259 }, { - "epoch": 0.07, - "grad_norm": 0.7292828300759986, - "learning_rate": 1.9906097733869746e-05, - "loss": 0.3419, + "epoch": 0.06, + "grad_norm": 0.6051759840439525, + "learning_rate": 1.9959376279071422e-05, + "loss": 0.3291, "step": 1260 }, { - "epoch": 0.07, - "grad_norm": 2.141928613905644, - "learning_rate": 1.990584313893135e-05, - "loss": 0.5934, + "epoch": 0.06, + "grad_norm": 1.29615684564551, + "learning_rate": 1.995924218798887e-05, + "loss": 0.3918, "step": 1261 }, { - "epoch": 0.07, - "grad_norm": 0.352641454123304, - "learning_rate": 1.9905588200954025e-05, - "loss": 0.1808, + "epoch": 0.06, + "grad_norm": 0.6555339729114142, + "learning_rate": 1.9959107876418295e-05, + "loss": 0.2988, "step": 1262 }, { - "epoch": 0.07, - "grad_norm": 0.743654610148312, - "learning_rate": 1.990533291994661e-05, - "loss": 0.4418, + "epoch": 0.06, + "grad_norm": 0.6154034643074809, + "learning_rate": 1.9958973344362674e-05, + "loss": 0.304, "step": 1263 }, { - "epoch": 0.07, - "grad_norm": 0.5783868326461529, - "learning_rate": 1.9905077295917944e-05, - "loss": 0.3728, + "epoch": 0.06, + "grad_norm": 0.8794967563409657, + "learning_rate": 1.995883859182498e-05, + "loss": 0.3652, "step": 1264 }, { - "epoch": 0.07, - "grad_norm": 0.6927545700509946, - "learning_rate": 1.9904821328876873e-05, - "loss": 0.2807, + "epoch": 0.06, + "grad_norm": 0.5332218584085938, + "learning_rate": 1.9958703618808198e-05, + "loss": 0.2899, "step": 1265 }, { - "epoch": 0.07, - "grad_norm": 0.9029660267620941, - "learning_rate": 1.9904565018832267e-05, - "loss": 0.4061, + "epoch": 0.06, + "grad_norm": 0.5896183731518845, + "learning_rate": 1.9958568425315316e-05, + "loss": 0.3349, "step": 1266 }, { - "epoch": 0.07, - "grad_norm": 0.48832856958609416, - "learning_rate": 1.9904308365792998e-05, - "loss": 0.3221, + "epoch": 0.06, + "grad_norm": 0.8506147131814346, + "learning_rate": 1.9958433011349325e-05, + "loss": 0.5098, "step": 1267 }, { - "epoch": 0.07, - "grad_norm": 0.6071012810126546, - "learning_rate": 1.9904051369767958e-05, - "loss": 0.3727, + "epoch": 0.06, + "grad_norm": 0.4929205925320681, + "learning_rate": 1.9958297376913226e-05, + "loss": 0.3107, "step": 1268 }, { - "epoch": 0.07, - "grad_norm": 0.4333485493904293, - "learning_rate": 1.9903794030766047e-05, - "loss": 0.216, + "epoch": 0.06, + "grad_norm": 0.4779271708323994, + "learning_rate": 1.9958161522010022e-05, + "loss": 0.2827, "step": 1269 }, { - "epoch": 0.07, - "grad_norm": 0.8550028384302811, - "learning_rate": 1.9903536348796172e-05, - "loss": 0.5802, + "epoch": 0.06, + "grad_norm": 1.7930503818602477, + "learning_rate": 1.9958025446642722e-05, + "loss": 0.6931, "step": 1270 }, { - "epoch": 0.07, - "grad_norm": 0.5444160978702124, - "learning_rate": 1.9903278323867262e-05, - "loss": 0.502, + "epoch": 0.06, + "grad_norm": 1.2154969649836658, + "learning_rate": 1.9957889150814332e-05, + "loss": 0.481, "step": 1271 }, { - "epoch": 0.07, - "grad_norm": 0.49056768853210037, - "learning_rate": 1.9903019955988246e-05, - "loss": 0.2712, + "epoch": 0.06, + "grad_norm": 0.5080763311824805, + "learning_rate": 1.9957752634527877e-05, + "loss": 0.3212, "step": 1272 }, { - "epoch": 0.07, - "grad_norm": 1.1139660490114864, - "learning_rate": 1.9902761245168078e-05, - "loss": 0.659, + "epoch": 0.06, + "grad_norm": 0.6730797743213048, + "learning_rate": 1.9957615897786377e-05, + "loss": 0.3944, "step": 1273 }, { - "epoch": 0.07, - "grad_norm": 0.36944374947304287, - "learning_rate": 1.990250219141571e-05, - "loss": 0.1944, + "epoch": 0.06, + "grad_norm": 0.3214645245013888, + "learning_rate": 1.9957478940592852e-05, + "loss": 0.1526, "step": 1274 }, { - "epoch": 0.07, - "grad_norm": 0.4744817910147028, - "learning_rate": 1.990224279474012e-05, - "loss": 0.2431, + "epoch": 0.06, + "grad_norm": 0.5740035016500189, + "learning_rate": 1.9957341762950346e-05, + "loss": 0.3094, "step": 1275 }, { - "epoch": 0.07, - "grad_norm": 0.5063465239101601, - "learning_rate": 1.990198305515029e-05, - "loss": 0.3569, + "epoch": 0.06, + "grad_norm": 0.7623673966363658, + "learning_rate": 1.9957204364861892e-05, + "loss": 0.4458, "step": 1276 }, { - "epoch": 0.07, - "grad_norm": 0.8148200431071357, - "learning_rate": 1.9901722972655207e-05, - "loss": 0.4337, + "epoch": 0.06, + "grad_norm": 0.5685605689429389, + "learning_rate": 1.9957066746330524e-05, + "loss": 0.335, "step": 1277 }, { - "epoch": 0.07, - "grad_norm": 0.4741481971958385, - "learning_rate": 1.990146254726389e-05, - "loss": 0.3101, + "epoch": 0.06, + "grad_norm": 0.5057445049492054, + "learning_rate": 1.99569289073593e-05, + "loss": 0.3326, "step": 1278 }, { - "epoch": 0.07, - "grad_norm": 0.5962509814169538, - "learning_rate": 1.9901201778985344e-05, - "loss": 0.4426, + "epoch": 0.06, + "grad_norm": 1.1787415254414606, + "learning_rate": 1.9956790847951264e-05, + "loss": 0.614, "step": 1279 }, { - "epoch": 0.07, - "grad_norm": 0.38573360540688933, - "learning_rate": 1.9900940667828606e-05, - "loss": 0.259, + "epoch": 0.06, + "grad_norm": 0.33658493100232073, + "learning_rate": 1.9956652568109477e-05, + "loss": 0.2038, "step": 1280 }, { - "epoch": 0.07, - "grad_norm": 0.40658105084541624, - "learning_rate": 1.9900679213802724e-05, - "loss": 0.2474, + "epoch": 0.06, + "grad_norm": 0.5516853961866517, + "learning_rate": 1.9956514067836994e-05, + "loss": 0.2721, "step": 1281 }, { - "epoch": 0.07, - "grad_norm": 0.7175034272163154, - "learning_rate": 1.9900417416916742e-05, - "loss": 0.4537, + "epoch": 0.06, + "grad_norm": 1.0924449694984035, + "learning_rate": 1.9956375347136893e-05, + "loss": 0.5797, "step": 1282 }, { - "epoch": 0.07, - "grad_norm": 0.5669166964944146, - "learning_rate": 1.9900155277179734e-05, - "loss": 0.3968, + "epoch": 0.06, + "grad_norm": 0.654942123347561, + "learning_rate": 1.9956236406012232e-05, + "loss": 0.5146, "step": 1283 }, { - "epoch": 0.07, - "grad_norm": 0.7815627889065767, - "learning_rate": 1.989989279460077e-05, - "loss": 0.313, + "epoch": 0.06, + "grad_norm": 0.45894806726904186, + "learning_rate": 1.9956097244466094e-05, + "loss": 0.2759, "step": 1284 }, { - "epoch": 0.07, - "grad_norm": 0.9383698484651365, - "learning_rate": 1.989962996918895e-05, - "loss": 0.5734, + "epoch": 0.06, + "grad_norm": 0.6541774053942259, + "learning_rate": 1.995595786250156e-05, + "loss": 0.3758, "step": 1285 }, { - "epoch": 0.07, - "grad_norm": 0.4976846772814327, - "learning_rate": 1.9899366800953367e-05, - "loss": 0.2426, + "epoch": 0.06, + "grad_norm": 0.5299085439080724, + "learning_rate": 1.9955818260121716e-05, + "loss": 0.2107, "step": 1286 }, { - "epoch": 0.07, - "grad_norm": 0.38644317778329124, - "learning_rate": 1.9899103289903137e-05, - "loss": 0.2545, + "epoch": 0.06, + "grad_norm": 0.406442686592732, + "learning_rate": 1.995567843732965e-05, + "loss": 0.2077, "step": 1287 }, { - "epoch": 0.07, - "grad_norm": 0.545966418703407, - "learning_rate": 1.9898839436047384e-05, - "loss": 0.3516, + "epoch": 0.06, + "grad_norm": 1.2376583449428011, + "learning_rate": 1.995553839412846e-05, + "loss": 0.5901, "step": 1288 }, { - "epoch": 0.07, - "grad_norm": 0.8579484617086817, - "learning_rate": 1.989857523939525e-05, - "loss": 0.4972, + "epoch": 0.06, + "grad_norm": 0.5412765152955386, + "learning_rate": 1.995539813052124e-05, + "loss": 0.4048, "step": 1289 }, { - "epoch": 0.07, - "grad_norm": 0.455372961518016, - "learning_rate": 1.9898310699955884e-05, - "loss": 0.3282, + "epoch": 0.06, + "grad_norm": 0.47036725375015975, + "learning_rate": 1.9955257646511105e-05, + "loss": 0.2542, "step": 1290 }, { - "epoch": 0.07, - "grad_norm": 0.6335961270391565, - "learning_rate": 1.989804581773844e-05, - "loss": 0.3331, + "epoch": 0.06, + "grad_norm": 1.798913404077829, + "learning_rate": 1.995511694210116e-05, + "loss": 0.8552, "step": 1291 }, { - "epoch": 0.07, - "grad_norm": 0.50902091626044, - "learning_rate": 1.989778059275209e-05, - "loss": 0.3618, + "epoch": 0.06, + "grad_norm": 0.5073032030697933, + "learning_rate": 1.995497601729452e-05, + "loss": 0.3683, "step": 1292 }, { - "epoch": 0.07, - "grad_norm": 0.30150229710949894, - "learning_rate": 1.989751502500603e-05, - "loss": 0.2083, + "epoch": 0.06, + "grad_norm": 0.38696405464427724, + "learning_rate": 1.9954834872094308e-05, + "loss": 0.1622, "step": 1293 }, { - "epoch": 0.07, - "grad_norm": 0.9675402387132627, - "learning_rate": 1.9897249114509453e-05, - "loss": 0.533, + "epoch": 0.06, + "grad_norm": 0.5258666353465125, + "learning_rate": 1.9954693506503644e-05, + "loss": 0.315, "step": 1294 }, { - "epoch": 0.07, - "grad_norm": 0.5243771162575476, - "learning_rate": 1.9896982861271558e-05, - "loss": 0.3046, + "epoch": 0.06, + "grad_norm": 1.124654269885573, + "learning_rate": 1.9954551920525662e-05, + "loss": 0.5263, "step": 1295 }, { - "epoch": 0.07, - "grad_norm": 0.5198603235923646, - "learning_rate": 1.9896716265301577e-05, - "loss": 0.3362, + "epoch": 0.06, + "grad_norm": 0.7879964457044714, + "learning_rate": 1.9954410114163494e-05, + "loss": 0.3652, "step": 1296 }, { - "epoch": 0.07, - "grad_norm": 1.1509175904091664, - "learning_rate": 1.9896449326608734e-05, - "loss": 0.8126, + "epoch": 0.06, + "grad_norm": 0.45746256424010084, + "learning_rate": 1.995426808742028e-05, + "loss": 0.2989, "step": 1297 }, { - "epoch": 0.07, - "grad_norm": 0.23527713331733763, - "learning_rate": 1.9896182045202278e-05, - "loss": 0.1337, + "epoch": 0.06, + "grad_norm": 0.3600814320667932, + "learning_rate": 1.9954125840299165e-05, + "loss": 0.1781, "step": 1298 }, { - "epoch": 0.07, - "grad_norm": 0.5617916958589548, - "learning_rate": 1.989591442109146e-05, - "loss": 0.3786, + "epoch": 0.06, + "grad_norm": 0.5763941836749205, + "learning_rate": 1.99539833728033e-05, + "loss": 0.3014, "step": 1299 }, { - "epoch": 0.07, - "grad_norm": 0.5253914577139904, - "learning_rate": 1.9895646454285558e-05, - "loss": 0.4054, + "epoch": 0.06, + "grad_norm": 0.5026134104003527, + "learning_rate": 1.995384068493583e-05, + "loss": 0.371, "step": 1300 }, { - "epoch": 0.07, - "grad_norm": 0.3003271411328984, - "learning_rate": 1.989537814479384e-05, - "loss": 0.1277, + "epoch": 0.06, + "grad_norm": 0.5890066865568168, + "learning_rate": 1.9953697776699926e-05, + "loss": 0.3957, "step": 1301 }, { - "epoch": 0.07, - "grad_norm": 0.4684808438512099, - "learning_rate": 1.9895109492625604e-05, - "loss": 0.2995, + "epoch": 0.06, + "grad_norm": 0.4951438894740915, + "learning_rate": 1.9953554648098748e-05, + "loss": 0.3129, "step": 1302 }, { - "epoch": 0.07, - "grad_norm": 0.5034967279795296, - "learning_rate": 1.9894840497790154e-05, - "loss": 0.3855, + "epoch": 0.06, + "grad_norm": 0.6959817049180712, + "learning_rate": 1.995341129913546e-05, + "loss": 0.3745, "step": 1303 }, { - "epoch": 0.07, - "grad_norm": 0.6015096789873883, - "learning_rate": 1.98945711602968e-05, - "loss": 0.4516, + "epoch": 0.06, + "grad_norm": 0.5105233304054243, + "learning_rate": 1.995326772981324e-05, + "loss": 0.3345, "step": 1304 }, { - "epoch": 0.07, - "grad_norm": 0.43202696608905033, - "learning_rate": 1.9894301480154873e-05, - "loss": 0.2953, + "epoch": 0.06, + "grad_norm": 0.40858323744546404, + "learning_rate": 1.9953123940135265e-05, + "loss": 0.2513, "step": 1305 }, { - "epoch": 0.08, - "grad_norm": 0.39747979018767193, - "learning_rate": 1.989403145737371e-05, - "loss": 0.2653, + "epoch": 0.06, + "grad_norm": 0.48725681968798995, + "learning_rate": 1.995297993010472e-05, + "loss": 0.3861, "step": 1306 }, { - "epoch": 0.08, - "grad_norm": 0.5408971085490359, - "learning_rate": 1.989376109196266e-05, - "loss": 0.3496, + "epoch": 0.06, + "grad_norm": 0.4517315042238337, + "learning_rate": 1.9952835699724796e-05, + "loss": 0.2908, "step": 1307 }, { - "epoch": 0.08, - "grad_norm": 0.4058095462488843, - "learning_rate": 1.9893490383931095e-05, - "loss": 0.2257, + "epoch": 0.06, + "grad_norm": 0.427422462477044, + "learning_rate": 1.9952691248998676e-05, + "loss": 0.296, "step": 1308 }, { - "epoch": 0.08, - "grad_norm": 1.6788930294125746, - "learning_rate": 1.989321933328838e-05, - "loss": 0.8471, + "epoch": 0.06, + "grad_norm": 0.5576014079317251, + "learning_rate": 1.995254657792957e-05, + "loss": 0.3494, "step": 1309 }, { - "epoch": 0.08, - "grad_norm": 0.7626511562337264, - "learning_rate": 1.9892947940043906e-05, - "loss": 0.5442, + "epoch": 0.06, + "grad_norm": 0.4068060189230465, + "learning_rate": 1.995240168652067e-05, + "loss": 0.1495, "step": 1310 }, { - "epoch": 0.08, - "grad_norm": 0.5049748058118498, - "learning_rate": 1.989267620420707e-05, - "loss": 0.2766, + "epoch": 0.06, + "grad_norm": 0.5180145317515017, + "learning_rate": 1.995225657477519e-05, + "loss": 0.3435, "step": 1311 }, { - "epoch": 0.08, - "grad_norm": 0.7666799027992355, - "learning_rate": 1.9892404125787283e-05, - "loss": 0.438, + "epoch": 0.06, + "grad_norm": 0.5316091913828168, + "learning_rate": 1.995211124269634e-05, + "loss": 0.4337, "step": 1312 }, { - "epoch": 0.08, - "grad_norm": 0.3823035156163941, - "learning_rate": 1.9892131704793968e-05, - "loss": 0.2047, + "epoch": 0.06, + "grad_norm": 0.37967668038502944, + "learning_rate": 1.9951965690287344e-05, + "loss": 0.2661, "step": 1313 }, { - "epoch": 0.08, - "grad_norm": 0.47813218026484006, - "learning_rate": 1.9891858941236554e-05, - "loss": 0.2231, + "epoch": 0.06, + "grad_norm": 0.7427038515583471, + "learning_rate": 1.9951819917551418e-05, + "loss": 0.363, "step": 1314 }, { - "epoch": 0.08, - "grad_norm": 0.573911958015997, - "learning_rate": 1.989158583512449e-05, - "loss": 0.377, + "epoch": 0.06, + "grad_norm": 0.6165636373261265, + "learning_rate": 1.995167392449179e-05, + "loss": 0.3872, "step": 1315 }, { - "epoch": 0.08, - "grad_norm": 0.7540145368248665, - "learning_rate": 1.989131238646724e-05, - "loss": 0.4964, + "epoch": 0.06, + "grad_norm": 0.41667590799126847, + "learning_rate": 1.9951527711111692e-05, + "loss": 0.3138, "step": 1316 }, { - "epoch": 0.08, - "grad_norm": 0.534381050112668, - "learning_rate": 1.989103859527426e-05, - "loss": 0.3059, + "epoch": 0.06, + "grad_norm": 0.6860358935750404, + "learning_rate": 1.995138127741436e-05, + "loss": 0.3853, "step": 1317 }, { - "epoch": 0.08, - "grad_norm": 0.4482636946041032, - "learning_rate": 1.9890764461555044e-05, - "loss": 0.2821, + "epoch": 0.06, + "grad_norm": 0.49486025768093755, + "learning_rate": 1.995123462340304e-05, + "loss": 0.3889, "step": 1318 }, { - "epoch": 0.08, - "grad_norm": 0.43344662744253243, - "learning_rate": 1.9890489985319077e-05, - "loss": 0.268, + "epoch": 0.06, + "grad_norm": 0.3075310113082781, + "learning_rate": 1.995108774908098e-05, + "loss": 0.1049, "step": 1319 }, { - "epoch": 0.08, - "grad_norm": 0.5202040997266357, - "learning_rate": 1.989021516657587e-05, - "loss": 0.2804, + "epoch": 0.06, + "grad_norm": 0.4416736251717402, + "learning_rate": 1.9950940654451423e-05, + "loss": 0.2971, "step": 1320 }, { - "epoch": 0.08, - "grad_norm": 1.2201274898081262, - "learning_rate": 1.9889940005334932e-05, - "loss": 0.5302, + "epoch": 0.06, + "grad_norm": 0.5406353233958303, + "learning_rate": 1.9950793339517632e-05, + "loss": 0.3685, "step": 1321 }, { - "epoch": 0.08, - "grad_norm": 0.8568685374679005, - "learning_rate": 1.98896645016058e-05, - "loss": 0.5544, + "epoch": 0.06, + "grad_norm": 1.013042561872607, + "learning_rate": 1.9950645804282867e-05, + "loss": 0.6152, "step": 1322 }, { - "epoch": 0.08, - "grad_norm": 0.45238693312134165, - "learning_rate": 1.9889388655398015e-05, - "loss": 0.3015, + "epoch": 0.06, + "grad_norm": 0.41939724424822994, + "learning_rate": 1.9950498048750398e-05, + "loss": 0.3398, "step": 1323 }, { - "epoch": 0.08, - "grad_norm": 0.45007359982134754, - "learning_rate": 1.9889112466721122e-05, - "loss": 0.3122, + "epoch": 0.06, + "grad_norm": 0.5424359787162983, + "learning_rate": 1.9950350072923487e-05, + "loss": 0.3816, "step": 1324 }, { - "epoch": 0.08, - "grad_norm": 0.3702787792276908, - "learning_rate": 1.9888835935584686e-05, - "loss": 0.2158, + "epoch": 0.06, + "grad_norm": 0.3701024490172656, + "learning_rate": 1.995020187680542e-05, + "loss": 0.2179, "step": 1325 }, { - "epoch": 0.08, - "grad_norm": 0.44701134918728447, - "learning_rate": 1.9888559061998294e-05, - "loss": 0.2895, + "epoch": 0.06, + "grad_norm": 0.4756271645305226, + "learning_rate": 1.9950053460399472e-05, + "loss": 0.2305, "step": 1326 }, { - "epoch": 0.08, - "grad_norm": 0.5825053647444507, - "learning_rate": 1.9888281845971522e-05, - "loss": 0.3207, + "epoch": 0.06, + "grad_norm": 0.7042955325013491, + "learning_rate": 1.994990482370893e-05, + "loss": 0.4529, "step": 1327 }, { - "epoch": 0.08, - "grad_norm": 1.9820095173697092, - "learning_rate": 1.988800428751398e-05, - "loss": 0.6004, + "epoch": 0.06, + "grad_norm": 0.4481337085040498, + "learning_rate": 1.9949755966737087e-05, + "loss": 0.3656, "step": 1328 }, { - "epoch": 0.08, - "grad_norm": 0.46305327622259507, - "learning_rate": 1.988772638663527e-05, - "loss": 0.3056, + "epoch": 0.06, + "grad_norm": 0.7429431662803592, + "learning_rate": 1.9949606889487234e-05, + "loss": 0.3592, "step": 1329 }, { - "epoch": 0.08, - "grad_norm": 1.3268004204954384, - "learning_rate": 1.9887448143345022e-05, - "loss": 0.6153, + "epoch": 0.06, + "grad_norm": 0.6292078347675322, + "learning_rate": 1.9949457591962675e-05, + "loss": 0.3749, "step": 1330 }, { - "epoch": 0.08, - "grad_norm": 0.3606881840853597, - "learning_rate": 1.988716955765287e-05, - "loss": 0.2031, + "epoch": 0.06, + "grad_norm": 0.3988898580370855, + "learning_rate": 1.9949308074166714e-05, + "loss": 0.2006, "step": 1331 }, { - "epoch": 0.08, - "grad_norm": 0.45448857925172526, - "learning_rate": 1.988689062956846e-05, - "loss": 0.2976, + "epoch": 0.06, + "grad_norm": 0.4739221449065497, + "learning_rate": 1.9949158336102664e-05, + "loss": 0.3152, "step": 1332 }, { - "epoch": 0.08, - "grad_norm": 0.8464056685167116, - "learning_rate": 1.9886611359101455e-05, - "loss": 0.4598, + "epoch": 0.06, + "grad_norm": 0.5057646602244956, + "learning_rate": 1.9949008377773835e-05, + "loss": 0.284, "step": 1333 }, { - "epoch": 0.08, - "grad_norm": 0.8824103472910778, - "learning_rate": 1.9886331746261523e-05, - "loss": 0.4065, + "epoch": 0.06, + "grad_norm": 0.7320121911521591, + "learning_rate": 1.9948858199183548e-05, + "loss": 0.5449, "step": 1334 }, { - "epoch": 0.08, - "grad_norm": 0.4968644088356621, - "learning_rate": 1.988605179105835e-05, - "loss": 0.3155, + "epoch": 0.06, + "grad_norm": 0.915663331664433, + "learning_rate": 1.9948707800335133e-05, + "loss": 0.4888, "step": 1335 }, { - "epoch": 0.08, - "grad_norm": 0.5104609846598314, - "learning_rate": 1.9885771493501625e-05, - "loss": 0.3783, + "epoch": 0.06, + "grad_norm": 0.44308116410686177, + "learning_rate": 1.9948557181231915e-05, + "loss": 0.2774, "step": 1336 }, { - "epoch": 0.08, - "grad_norm": 0.28315552118650017, - "learning_rate": 1.9885490853601058e-05, - "loss": 0.1439, + "epoch": 0.06, + "grad_norm": 0.4888480715509671, + "learning_rate": 1.994840634187723e-05, + "loss": 0.2911, "step": 1337 }, { - "epoch": 0.08, - "grad_norm": 0.441602883979966, - "learning_rate": 1.988520987136637e-05, - "loss": 0.3015, + "epoch": 0.06, + "grad_norm": 0.4952590136618777, + "learning_rate": 1.9948255282274414e-05, + "loss": 0.3024, "step": 1338 }, { - "epoch": 0.08, - "grad_norm": 0.4708608501945979, - "learning_rate": 1.9884928546807286e-05, - "loss": 0.3942, + "epoch": 0.06, + "grad_norm": 0.44755179272388174, + "learning_rate": 1.9948104002426814e-05, + "loss": 0.3281, "step": 1339 }, { - "epoch": 0.08, - "grad_norm": 0.7510346237626402, - "learning_rate": 1.9884646879933555e-05, - "loss": 0.5117, + "epoch": 0.06, + "grad_norm": 0.4654316373348151, + "learning_rate": 1.9947952502337783e-05, + "loss": 0.3495, "step": 1340 }, { - "epoch": 0.08, - "grad_norm": 0.48347633854447297, - "learning_rate": 1.9884364870754925e-05, - "loss": 0.2751, + "epoch": 0.06, + "grad_norm": 0.6241173653307627, + "learning_rate": 1.9947800782010672e-05, + "loss": 0.3984, "step": 1341 }, { - "epoch": 0.08, - "grad_norm": 1.5360278255553699, - "learning_rate": 1.988408251928117e-05, - "loss": 0.6755, + "epoch": 0.06, + "grad_norm": 0.5647403277861793, + "learning_rate": 1.994764884144884e-05, + "loss": 0.2754, "step": 1342 }, { - "epoch": 0.08, - "grad_norm": 0.38517016065839627, - "learning_rate": 1.9883799825522056e-05, - "loss": 0.2931, + "epoch": 0.06, + "grad_norm": 0.4318564591793054, + "learning_rate": 1.9947496680655643e-05, + "loss": 0.2167, "step": 1343 }, { - "epoch": 0.08, - "grad_norm": 0.36798370244603723, - "learning_rate": 1.988351678948738e-05, - "loss": 0.1964, + "epoch": 0.06, + "grad_norm": 0.42741855314127203, + "learning_rate": 1.9947344299634464e-05, + "loss": 0.2985, "step": 1344 }, { - "epoch": 0.08, - "grad_norm": 0.8098718422245624, - "learning_rate": 1.9883233411186947e-05, - "loss": 0.5179, + "epoch": 0.06, + "grad_norm": 0.6073580058069129, + "learning_rate": 1.9947191698388667e-05, + "loss": 0.3502, "step": 1345 }, { - "epoch": 0.08, - "grad_norm": 1.1491988511215931, - "learning_rate": 1.9882949690630563e-05, - "loss": 0.634, + "epoch": 0.06, + "grad_norm": 0.7933908596241529, + "learning_rate": 1.9947038876921634e-05, + "loss": 0.4735, "step": 1346 }, { - "epoch": 0.08, - "grad_norm": 0.38455018173049227, - "learning_rate": 1.9882665627828054e-05, - "loss": 0.2628, + "epoch": 0.06, + "grad_norm": 0.6761816913979622, + "learning_rate": 1.9946885835236746e-05, + "loss": 0.3859, "step": 1347 }, { - "epoch": 0.08, - "grad_norm": 2.134503438141174, - "learning_rate": 1.988238122278926e-05, - "loss": 0.6759, + "epoch": 0.06, + "grad_norm": 0.5207255678585393, + "learning_rate": 1.9946732573337396e-05, + "loss": 0.3227, "step": 1348 }, { - "epoch": 0.08, - "grad_norm": 0.45620155579332167, - "learning_rate": 1.9882096475524032e-05, - "loss": 0.3142, + "epoch": 0.06, + "grad_norm": 0.46105928182721706, + "learning_rate": 1.994657909122697e-05, + "loss": 0.2337, "step": 1349 }, { - "epoch": 0.08, - "grad_norm": 0.6149984382187618, - "learning_rate": 1.988181138604223e-05, - "loss": 0.2522, + "epoch": 0.06, + "grad_norm": 1.2304705823078474, + "learning_rate": 1.994642538890887e-05, + "loss": 0.6115, "step": 1350 }, { - "epoch": 0.08, - "grad_norm": 0.534257884253937, - "learning_rate": 1.988152595435372e-05, - "loss": 0.3617, + "epoch": 0.06, + "grad_norm": 0.453079955860743, + "learning_rate": 1.9946271466386498e-05, + "loss": 0.3136, "step": 1351 }, { - "epoch": 0.08, - "grad_norm": 0.6039783722918978, - "learning_rate": 1.9881240180468394e-05, - "loss": 0.3739, + "epoch": 0.06, + "grad_norm": 0.363419597438667, + "learning_rate": 1.9946117323663265e-05, + "loss": 0.2992, "step": 1352 }, { - "epoch": 0.08, - "grad_norm": 0.5317008736940727, - "learning_rate": 1.988095406439614e-05, - "loss": 0.1945, + "epoch": 0.06, + "grad_norm": 0.8591501316939716, + "learning_rate": 1.9945962960742578e-05, + "loss": 0.4189, "step": 1353 }, { - "epoch": 0.08, - "grad_norm": 0.7544906573407119, - "learning_rate": 1.9880667606146878e-05, - "loss": 0.4314, + "epoch": 0.06, + "grad_norm": 0.5160097384483079, + "learning_rate": 1.9945808377627857e-05, + "loss": 0.3161, "step": 1354 }, { - "epoch": 0.08, - "grad_norm": 0.461252463212955, - "learning_rate": 1.988038080573052e-05, - "loss": 0.3904, + "epoch": 0.06, + "grad_norm": 1.0901912471437143, + "learning_rate": 1.9945653574322527e-05, + "loss": 0.2919, "step": 1355 }, { - "epoch": 0.08, - "grad_norm": 0.6937852377504348, - "learning_rate": 1.9880093663157e-05, - "loss": 0.3911, + "epoch": 0.06, + "grad_norm": 0.4962465559181494, + "learning_rate": 1.994549855083001e-05, + "loss": 0.3596, "step": 1356 }, { - "epoch": 0.08, - "grad_norm": 0.5038983872790718, - "learning_rate": 1.9879806178436258e-05, - "loss": 0.355, + "epoch": 0.06, + "grad_norm": 0.5056413382856962, + "learning_rate": 1.994534330715374e-05, + "loss": 0.3541, "step": 1357 }, { - "epoch": 0.08, - "grad_norm": 0.6390378111434681, - "learning_rate": 1.9879518351578254e-05, - "loss": 0.2838, + "epoch": 0.06, + "grad_norm": 1.0663421784113676, + "learning_rate": 1.9945187843297157e-05, + "loss": 0.6049, "step": 1358 }, { - "epoch": 0.08, - "grad_norm": 0.38546418075325506, - "learning_rate": 1.9879230182592958e-05, - "loss": 0.3129, + "epoch": 0.06, + "grad_norm": 0.3707171474832705, + "learning_rate": 1.9945032159263702e-05, + "loss": 0.1753, "step": 1359 }, { - "epoch": 0.08, - "grad_norm": 0.49492171454772504, - "learning_rate": 1.9878941671490342e-05, - "loss": 0.2142, + "epoch": 0.06, + "grad_norm": 0.5170023769837524, + "learning_rate": 1.9944876255056817e-05, + "loss": 0.3105, "step": 1360 }, { - "epoch": 0.08, - "grad_norm": 0.8425537889343386, - "learning_rate": 1.9878652818280402e-05, - "loss": 0.5642, + "epoch": 0.06, + "grad_norm": 1.4738522844014403, + "learning_rate": 1.994472013067996e-05, + "loss": 0.5927, "step": 1361 }, { - "epoch": 0.08, - "grad_norm": 0.4003643201454913, - "learning_rate": 1.9878363622973137e-05, - "loss": 0.3595, + "epoch": 0.06, + "grad_norm": 0.5351778052632282, + "learning_rate": 1.9944563786136578e-05, + "loss": 0.3517, "step": 1362 }, { - "epoch": 0.08, - "grad_norm": 0.43574709207441126, - "learning_rate": 1.987807408557857e-05, - "loss": 0.2916, + "epoch": 0.06, + "grad_norm": 0.5659327296889876, + "learning_rate": 1.9944407221430144e-05, + "loss": 0.384, "step": 1363 }, { - "epoch": 0.08, - "grad_norm": 0.517603021457836, - "learning_rate": 1.987778420610672e-05, - "loss": 0.3182, + "epoch": 0.06, + "grad_norm": 0.5874721856277222, + "learning_rate": 1.9944250436564116e-05, + "loss": 0.4253, "step": 1364 }, { - "epoch": 0.08, - "grad_norm": 0.40541367749597956, - "learning_rate": 1.9877493984567623e-05, - "loss": 0.2377, + "epoch": 0.06, + "grad_norm": 0.4516469260065655, + "learning_rate": 1.994409343154197e-05, + "loss": 0.0793, "step": 1365 }, { - "epoch": 0.08, - "grad_norm": 0.595146514308601, - "learning_rate": 1.9877203420971338e-05, - "loss": 0.3073, + "epoch": 0.06, + "grad_norm": 0.6171126877631664, + "learning_rate": 1.9943936206367176e-05, + "loss": 0.3698, "step": 1366 }, { - "epoch": 0.08, - "grad_norm": 0.3996742522387982, - "learning_rate": 1.9876912515327925e-05, - "loss": 0.3877, + "epoch": 0.06, + "grad_norm": 1.4353796430746342, + "learning_rate": 1.9943778761043223e-05, + "loss": 0.6095, "step": 1367 }, { - "epoch": 0.08, - "grad_norm": 0.6372451758270241, - "learning_rate": 1.9876621267647452e-05, - "loss": 0.4087, + "epoch": 0.06, + "grad_norm": 0.44048046571661476, + "learning_rate": 1.9943621095573588e-05, + "loss": 0.3495, "step": 1368 }, { - "epoch": 0.08, - "grad_norm": 0.5014611845347008, - "learning_rate": 1.9876329677940015e-05, - "loss": 0.3651, + "epoch": 0.06, + "grad_norm": 0.506301107956799, + "learning_rate": 1.9943463209961767e-05, + "loss": 0.3212, "step": 1369 }, { - "epoch": 0.08, - "grad_norm": 0.4638886549372142, - "learning_rate": 1.9876037746215703e-05, - "loss": 0.3335, + "epoch": 0.06, + "grad_norm": 0.5019659309405168, + "learning_rate": 1.9943305104211256e-05, + "loss": 0.3082, "step": 1370 }, { - "epoch": 0.08, - "grad_norm": 0.5265298020277424, - "learning_rate": 1.9875745472484627e-05, - "loss": 0.2185, + "epoch": 0.06, + "grad_norm": 0.5159478238754939, + "learning_rate": 1.9943146778325553e-05, + "loss": 0.2569, "step": 1371 }, { - "epoch": 0.08, - "grad_norm": 0.38274955304444164, - "learning_rate": 1.987545285675691e-05, - "loss": 0.261, + "epoch": 0.06, + "grad_norm": 0.48151152268807706, + "learning_rate": 1.9942988232308163e-05, + "loss": 0.2661, "step": 1372 }, { - "epoch": 0.08, - "grad_norm": 0.7594647538424038, - "learning_rate": 1.9875159899042685e-05, - "loss": 0.4655, + "epoch": 0.06, + "grad_norm": 1.1273418440273861, + "learning_rate": 1.9942829466162595e-05, + "loss": 0.5935, "step": 1373 }, { - "epoch": 0.08, - "grad_norm": 0.653896701468227, - "learning_rate": 1.98748665993521e-05, - "loss": 0.3104, + "epoch": 0.06, + "grad_norm": 0.9127036204733932, + "learning_rate": 1.9942670479892367e-05, + "loss": 0.5675, "step": 1374 }, { - "epoch": 0.08, - "grad_norm": 0.39040508633732796, - "learning_rate": 1.987457295769531e-05, - "loss": 0.3225, + "epoch": 0.06, + "grad_norm": 0.44651839490263934, + "learning_rate": 1.9942511273500997e-05, + "loss": 0.2486, "step": 1375 }, { - "epoch": 0.08, - "grad_norm": 1.0234810635667073, - "learning_rate": 1.9874278974082482e-05, - "loss": 0.5349, + "epoch": 0.06, + "grad_norm": 0.5439851095740903, + "learning_rate": 1.9942351846992012e-05, + "loss": 0.3753, "step": 1376 }, { - "epoch": 0.08, - "grad_norm": 0.34396246269087427, - "learning_rate": 1.9873984648523796e-05, - "loss": 0.1595, + "epoch": 0.06, + "grad_norm": 0.4251608321252265, + "learning_rate": 1.994219220036894e-05, + "loss": 0.1818, "step": 1377 }, { - "epoch": 0.08, - "grad_norm": 0.5609481420866197, - "learning_rate": 1.9873689981029445e-05, - "loss": 0.3817, + "epoch": 0.06, + "grad_norm": 0.4742163618692392, + "learning_rate": 1.994203233363531e-05, + "loss": 0.2269, "step": 1378 }, { - "epoch": 0.08, - "grad_norm": 0.5179111274026793, - "learning_rate": 1.9873394971609636e-05, - "loss": 0.3453, + "epoch": 0.06, + "grad_norm": 2.0607449153008806, + "learning_rate": 1.994187224679467e-05, + "loss": 0.5876, "step": 1379 }, { - "epoch": 0.08, - "grad_norm": 0.46525695462885736, - "learning_rate": 1.9873099620274585e-05, - "loss": 0.327, + "epoch": 0.06, + "grad_norm": 0.6541229335899382, + "learning_rate": 1.9941711939850563e-05, + "loss": 0.4012, "step": 1380 }, { - "epoch": 0.08, - "grad_norm": 0.6728477544118068, - "learning_rate": 1.987280392703452e-05, - "loss": 0.433, + "epoch": 0.06, + "grad_norm": 0.5420574076321603, + "learning_rate": 1.9941551412806533e-05, + "loss": 0.2398, "step": 1381 }, { - "epoch": 0.08, - "grad_norm": 0.6129052674102026, - "learning_rate": 1.987250789189968e-05, - "loss": 0.437, + "epoch": 0.06, + "grad_norm": 1.8417737989564364, + "learning_rate": 1.9941390665666135e-05, + "loss": 0.7334, "step": 1382 }, { - "epoch": 0.08, - "grad_norm": 0.33943298098310193, - "learning_rate": 1.987221151488031e-05, - "loss": 0.215, + "epoch": 0.06, + "grad_norm": 0.4411964092154671, + "learning_rate": 1.994122969843293e-05, + "loss": 0.2422, "step": 1383 }, { - "epoch": 0.08, - "grad_norm": 0.4695780101175386, - "learning_rate": 1.9871914795986683e-05, - "loss": 0.2768, + "epoch": 0.06, + "grad_norm": 0.6355271912700533, + "learning_rate": 1.9941068511110485e-05, + "loss": 0.324, "step": 1384 }, { - "epoch": 0.08, - "grad_norm": 0.9439529862803306, - "learning_rate": 1.987161773522907e-05, - "loss": 0.5373, + "epoch": 0.06, + "grad_norm": 2.1290160257842237, + "learning_rate": 1.994090710370236e-05, + "loss": 0.4236, "step": 1385 }, { - "epoch": 0.08, - "grad_norm": 0.4313605518517955, - "learning_rate": 1.9871320332617762e-05, - "loss": 0.2314, + "epoch": 0.06, + "grad_norm": 1.4768260632328456, + "learning_rate": 1.9940745476212135e-05, + "loss": 0.6352, "step": 1386 }, { - "epoch": 0.08, - "grad_norm": 0.4968256366599863, - "learning_rate": 1.9871022588163057e-05, - "loss": 0.3409, + "epoch": 0.06, + "grad_norm": 0.4683855589913403, + "learning_rate": 1.9940583628643385e-05, + "loss": 0.2897, "step": 1387 }, { - "epoch": 0.08, - "grad_norm": 1.4379547696004578, - "learning_rate": 1.987072450187526e-05, - "loss": 0.8263, + "epoch": 0.06, + "grad_norm": 0.6605832940412394, + "learning_rate": 1.9940421560999693e-05, + "loss": 0.3126, "step": 1388 }, { - "epoch": 0.08, - "grad_norm": 0.3114063006115645, - "learning_rate": 1.98704260737647e-05, - "loss": 0.117, + "epoch": 0.06, + "grad_norm": 0.4215509151095274, + "learning_rate": 1.994025927328465e-05, + "loss": 0.1975, "step": 1389 }, { - "epoch": 0.08, - "grad_norm": 0.5394045691022546, - "learning_rate": 1.9870127303841708e-05, - "loss": 0.393, + "epoch": 0.06, + "grad_norm": 0.6943782380010035, + "learning_rate": 1.994009676550185e-05, + "loss": 0.2953, "step": 1390 }, { - "epoch": 0.08, - "grad_norm": 0.6326035735789947, - "learning_rate": 1.9869828192116634e-05, - "loss": 0.3016, + "epoch": 0.06, + "grad_norm": 1.5067796441086032, + "learning_rate": 1.9939934037654885e-05, + "loss": 0.4328, "step": 1391 }, { - "epoch": 0.08, - "grad_norm": 0.73994607381943, - "learning_rate": 1.986952873859983e-05, - "loss": 0.4724, + "epoch": 0.06, + "grad_norm": 4.195905971471969, + "learning_rate": 1.9939771089747358e-05, + "loss": 0.4647, "step": 1392 }, { - "epoch": 0.08, - "grad_norm": 0.43056343619915194, - "learning_rate": 1.9869228943301677e-05, - "loss": 0.2817, + "epoch": 0.06, + "grad_norm": 0.5339538520452175, + "learning_rate": 1.9939607921782884e-05, + "loss": 0.3157, "step": 1393 }, { - "epoch": 0.08, - "grad_norm": 0.6526090883250378, - "learning_rate": 1.9868928806232545e-05, - "loss": 0.4252, + "epoch": 0.06, + "grad_norm": 1.8817361852608496, + "learning_rate": 1.993944453376507e-05, + "loss": 0.5078, "step": 1394 }, { - "epoch": 0.08, - "grad_norm": 0.5255751811418452, - "learning_rate": 1.9868628327402833e-05, - "loss": 0.3748, + "epoch": 0.06, + "grad_norm": 0.3657077680941286, + "learning_rate": 1.993928092569753e-05, + "loss": 0.237, "step": 1395 }, { - "epoch": 0.08, - "grad_norm": 0.3704563536878913, - "learning_rate": 1.9868327506822948e-05, - "loss": 0.3383, + "epoch": 0.06, + "grad_norm": 0.9695611371347761, + "learning_rate": 1.9939117097583894e-05, + "loss": 0.2744, "step": 1396 }, { - "epoch": 0.08, - "grad_norm": 0.4162983542173049, - "learning_rate": 1.9868026344503307e-05, - "loss": 0.1772, + "epoch": 0.06, + "grad_norm": 1.9459512517195656, + "learning_rate": 1.9938953049427782e-05, + "loss": 0.5594, "step": 1397 }, { - "epoch": 0.08, - "grad_norm": 0.5122113190467039, - "learning_rate": 1.9867724840454336e-05, - "loss": 0.3366, + "epoch": 0.06, + "grad_norm": 2.8070238056153074, + "learning_rate": 1.9938788781232833e-05, + "loss": 0.5795, "step": 1398 }, { - "epoch": 0.08, - "grad_norm": 0.46115541335438065, - "learning_rate": 1.986742299468648e-05, - "loss": 0.2477, + "epoch": 0.06, + "grad_norm": 0.6498849817484506, + "learning_rate": 1.9938624293002674e-05, + "loss": 0.3326, "step": 1399 }, { - "epoch": 0.08, - "grad_norm": 3.5151180809976266, - "learning_rate": 1.9867120807210188e-05, - "loss": 0.7334, + "epoch": 0.06, + "grad_norm": 0.686408096844314, + "learning_rate": 1.9938459584740955e-05, + "loss": 0.351, "step": 1400 }, { - "epoch": 0.08, - "grad_norm": 2.158542189747334, - "learning_rate": 1.9866818278035926e-05, - "loss": 0.4279, + "epoch": 0.06, + "grad_norm": 0.8540767453773233, + "learning_rate": 1.993829465645132e-05, + "loss": 0.3163, "step": 1401 }, { - "epoch": 0.08, - "grad_norm": 0.6994009052744022, - "learning_rate": 1.9866515407174174e-05, - "loss": 0.3324, + "epoch": 0.06, + "grad_norm": 0.5608648450411757, + "learning_rate": 1.9938129508137417e-05, + "loss": 0.3095, "step": 1402 }, { - "epoch": 0.08, - "grad_norm": 1.0880865146791605, - "learning_rate": 1.9866212194635414e-05, - "loss": 0.2424, + "epoch": 0.06, + "grad_norm": 0.7857766805375322, + "learning_rate": 1.993796413980291e-05, + "loss": 0.3517, "step": 1403 }, { - "epoch": 0.08, - "grad_norm": 1.9025391903348476, - "learning_rate": 1.986590864043015e-05, - "loss": 0.4065, + "epoch": 0.06, + "grad_norm": 0.6758768420556903, + "learning_rate": 1.993779855145145e-05, + "loss": 0.2749, "step": 1404 }, { - "epoch": 0.08, - "grad_norm": 1.2725994136331205, - "learning_rate": 1.98656047445689e-05, - "loss": 0.3837, + "epoch": 0.06, + "grad_norm": 0.5500003071239046, + "learning_rate": 1.9937632743086712e-05, + "loss": 0.3197, "step": 1405 }, { - "epoch": 0.08, - "grad_norm": 1.1275735184080333, - "learning_rate": 1.9865300507062177e-05, - "loss": 0.3384, + "epoch": 0.06, + "grad_norm": 1.9210820689038117, + "learning_rate": 1.993746671471236e-05, + "loss": 0.7352, "step": 1406 }, { - "epoch": 0.08, - "grad_norm": 0.6833289165964801, - "learning_rate": 1.986499592792052e-05, - "loss": 0.501, + "epoch": 0.06, + "grad_norm": 0.5820966766615646, + "learning_rate": 1.9937300466332078e-05, + "loss": 0.3374, "step": 1407 }, { - "epoch": 0.08, - "grad_norm": 1.7700107274879846, - "learning_rate": 1.9864691007154486e-05, - "loss": 0.3731, + "epoch": 0.06, + "grad_norm": 0.4649022836543558, + "learning_rate": 1.993713399794954e-05, + "loss": 0.3109, "step": 1408 }, { - "epoch": 0.08, - "grad_norm": 2.4943906235284774, - "learning_rate": 1.9864385744774618e-05, - "loss": 0.1505, + "epoch": 0.06, + "grad_norm": 0.5155670330597211, + "learning_rate": 1.9936967309568427e-05, + "loss": 0.2718, "step": 1409 }, { - "epoch": 0.08, - "grad_norm": 0.7558856050651058, - "learning_rate": 1.9864080140791503e-05, - "loss": 0.3196, + "epoch": 0.06, + "grad_norm": 1.5081446333829827, + "learning_rate": 1.993680040119244e-05, + "loss": 0.7935, "step": 1410 }, { - "epoch": 0.08, - "grad_norm": 0.9313219061687138, - "learning_rate": 1.9863774195215714e-05, - "loss": 0.359, + "epoch": 0.06, + "grad_norm": 0.5975098847159008, + "learning_rate": 1.993663327282527e-05, + "loss": 0.2451, "step": 1411 }, { - "epoch": 0.08, - "grad_norm": 1.319474351714657, - "learning_rate": 1.9863467908057846e-05, - "loss": 0.4746, + "epoch": 0.06, + "grad_norm": 0.7221236999671103, + "learning_rate": 1.9936465924470612e-05, + "loss": 0.3723, "step": 1412 }, { - "epoch": 0.08, - "grad_norm": 0.7970602847741426, - "learning_rate": 1.986316127932851e-05, - "loss": 0.4742, + "epoch": 0.06, + "grad_norm": 1.199042087144706, + "learning_rate": 1.993629835613218e-05, + "loss": 0.5797, "step": 1413 }, { - "epoch": 0.08, - "grad_norm": 2.602170573188123, - "learning_rate": 1.9862854309038324e-05, - "loss": 0.3773, + "epoch": 0.06, + "grad_norm": 0.5513120312953381, + "learning_rate": 1.9936130567813675e-05, + "loss": 0.3087, "step": 1414 }, { - "epoch": 0.08, - "grad_norm": 0.9078365075980418, - "learning_rate": 1.9862546997197917e-05, - "loss": 0.2455, + "epoch": 0.07, + "grad_norm": 0.603239125871396, + "learning_rate": 1.9935962559518817e-05, + "loss": 0.298, "step": 1415 }, { - "epoch": 0.08, - "grad_norm": 0.6767700019137007, - "learning_rate": 1.9862239343817932e-05, - "loss": 0.2878, + "epoch": 0.07, + "grad_norm": 0.6072141420920694, + "learning_rate": 1.993579433125133e-05, + "loss": 0.3598, "step": 1416 }, { - "epoch": 0.08, - "grad_norm": 3.0559677425033875, - "learning_rate": 1.9861931348909024e-05, - "loss": 0.3493, + "epoch": 0.07, + "grad_norm": 0.5124345274165617, + "learning_rate": 1.9935625883014925e-05, + "loss": 0.2189, "step": 1417 }, { - "epoch": 0.08, - "grad_norm": 1.0023429161123776, - "learning_rate": 1.9861623012481853e-05, - "loss": 0.4367, + "epoch": 0.07, + "grad_norm": 0.7524818405958181, + "learning_rate": 1.9935457214813344e-05, + "loss": 0.4685, "step": 1418 }, { - "epoch": 0.08, - "grad_norm": 0.9368686272084427, - "learning_rate": 1.9861314334547105e-05, - "loss": 0.3789, + "epoch": 0.07, + "grad_norm": 0.6688746450540434, + "learning_rate": 1.9935288326650314e-05, + "loss": 0.4157, "step": 1419 }, { - "epoch": 0.08, - "grad_norm": 0.9117183827974491, - "learning_rate": 1.9861005315115466e-05, - "loss": 0.3442, + "epoch": 0.07, + "grad_norm": 0.5529417729383638, + "learning_rate": 1.9935119218529574e-05, + "loss": 0.2666, "step": 1420 }, { - "epoch": 0.08, - "grad_norm": 1.3748896935498363, - "learning_rate": 1.9860695954197635e-05, - "loss": 0.3071, + "epoch": 0.07, + "grad_norm": 0.5216176923810515, + "learning_rate": 1.9934949890454877e-05, + "loss": 0.2659, "step": 1421 }, { - "epoch": 0.08, - "grad_norm": 0.6790630927567458, - "learning_rate": 1.9860386251804327e-05, - "loss": 0.2388, + "epoch": 0.07, + "grad_norm": 0.8265677056398746, + "learning_rate": 1.993478034242996e-05, + "loss": 0.3322, "step": 1422 }, { - "epoch": 0.08, - "grad_norm": 0.8602801769926192, - "learning_rate": 1.9860076207946268e-05, - "loss": 0.352, + "epoch": 0.07, + "grad_norm": 0.4760150613711586, + "learning_rate": 1.993461057445858e-05, + "loss": 0.3288, "step": 1423 }, { - "epoch": 0.08, - "grad_norm": 1.4437574717264712, - "learning_rate": 1.9859765822634194e-05, - "loss": 0.5806, + "epoch": 0.07, + "grad_norm": 0.48461423825758154, + "learning_rate": 1.9934440586544498e-05, + "loss": 0.3287, "step": 1424 }, { - "epoch": 0.08, - "grad_norm": 0.8911016493029239, - "learning_rate": 1.9859455095878853e-05, - "loss": 0.4489, + "epoch": 0.07, + "grad_norm": 0.8964440325041704, + "learning_rate": 1.9934270378691478e-05, + "loss": 0.5651, "step": 1425 }, { - "epoch": 0.08, - "grad_norm": 0.8138852682470921, - "learning_rate": 1.9859144027691006e-05, - "loss": 0.3231, + "epoch": 0.07, + "grad_norm": 0.5473409725961967, + "learning_rate": 1.9934099950903286e-05, + "loss": 0.3215, "step": 1426 }, { - "epoch": 0.08, - "grad_norm": 1.3279680757064087, - "learning_rate": 1.9858832618081427e-05, - "loss": 0.2975, + "epoch": 0.07, + "grad_norm": 0.39444844342964397, + "learning_rate": 1.9933929303183695e-05, + "loss": 0.2279, "step": 1427 }, { - "epoch": 0.08, - "grad_norm": 0.8824804769850708, - "learning_rate": 1.9858520867060897e-05, - "loss": 0.1917, + "epoch": 0.07, + "grad_norm": 0.4616044700934544, + "learning_rate": 1.9933758435536485e-05, + "loss": 0.2847, "step": 1428 }, { - "epoch": 0.08, - "grad_norm": 0.7736102829235555, - "learning_rate": 1.9858208774640213e-05, - "loss": 0.3659, + "epoch": 0.07, + "grad_norm": 0.5396450467791876, + "learning_rate": 1.9933587347965437e-05, + "loss": 0.3402, "step": 1429 }, { - "epoch": 0.08, - "grad_norm": 1.003301610323338, - "learning_rate": 1.9857896340830182e-05, - "loss": 0.4005, + "epoch": 0.07, + "grad_norm": 0.6030172337779957, + "learning_rate": 1.993341604047434e-05, + "loss": 0.3584, "step": 1430 }, { - "epoch": 0.08, - "grad_norm": 1.6699557262792868, - "learning_rate": 1.9857583565641627e-05, - "loss": 0.6112, + "epoch": 0.07, + "grad_norm": 0.5031896289017802, + "learning_rate": 1.9933244513066983e-05, + "loss": 0.417, "step": 1431 }, { - "epoch": 0.08, - "grad_norm": 0.7784077780986679, - "learning_rate": 1.9857270449085378e-05, - "loss": 0.2576, + "epoch": 0.07, + "grad_norm": 0.5594265126655634, + "learning_rate": 1.9933072765747167e-05, + "loss": 0.3136, "step": 1432 }, { - "epoch": 0.08, - "grad_norm": 1.7558353459824572, - "learning_rate": 1.9856956991172272e-05, - "loss": 0.7202, + "epoch": 0.07, + "grad_norm": 0.6178087688788707, + "learning_rate": 1.99329007985187e-05, + "loss": 0.3099, "step": 1433 }, { - "epoch": 0.08, - "grad_norm": 0.8726908894911565, - "learning_rate": 1.9856643191913173e-05, - "loss": 0.3263, + "epoch": 0.07, + "grad_norm": 0.3741865565321857, + "learning_rate": 1.9932728611385376e-05, + "loss": 0.2447, "step": 1434 }, { - "epoch": 0.08, - "grad_norm": 0.6394845806469756, - "learning_rate": 1.9856329051318942e-05, - "loss": 0.2556, + "epoch": 0.07, + "grad_norm": 0.4776112652831589, + "learning_rate": 1.993255620435101e-05, + "loss": 0.3269, "step": 1435 }, { - "epoch": 0.08, - "grad_norm": 2.364157046676915, - "learning_rate": 1.9856014569400463e-05, - "loss": 0.5657, + "epoch": 0.07, + "grad_norm": 0.49017365764501275, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.3511, "step": 1436 }, { - "epoch": 0.08, - "grad_norm": 0.6999329889210596, - "learning_rate": 1.985569974616862e-05, - "loss": 0.3046, + "epoch": 0.07, + "grad_norm": 0.6691951883879022, + "learning_rate": 1.993221073059445e-05, + "loss": 0.4507, "step": 1437 }, { - "epoch": 0.08, - "grad_norm": 0.8304526967619219, - "learning_rate": 1.9855384581634322e-05, - "loss": 0.2348, + "epoch": 0.07, + "grad_norm": 0.6986990730730509, + "learning_rate": 1.9932037663879897e-05, + "loss": 0.3707, "step": 1438 }, { - "epoch": 0.08, - "grad_norm": 0.6726255986180282, - "learning_rate": 1.985506907580848e-05, - "loss": 0.4075, + "epoch": 0.07, + "grad_norm": 0.49199227274235935, + "learning_rate": 1.99318643772796e-05, + "loss": 0.3571, "step": 1439 }, { - "epoch": 0.08, - "grad_norm": 0.6573471554110082, - "learning_rate": 1.9854753228702016e-05, - "loss": 0.3843, + "epoch": 0.07, + "grad_norm": 0.34763800920784327, + "learning_rate": 1.99316908707974e-05, + "loss": 0.1926, "step": 1440 }, { - "epoch": 0.08, - "grad_norm": 0.5886305659040076, - "learning_rate": 1.9854437040325872e-05, - "loss": 0.234, + "epoch": 0.07, + "grad_norm": 0.603460983873073, + "learning_rate": 1.9931517144437136e-05, + "loss": 0.374, "step": 1441 }, { - "epoch": 0.08, - "grad_norm": 0.7112981770432031, - "learning_rate": 1.9854120510691e-05, - "loss": 0.4008, + "epoch": 0.07, + "grad_norm": 0.5006650612381522, + "learning_rate": 1.9931343198202655e-05, + "loss": 0.414, "step": 1442 }, { - "epoch": 0.08, - "grad_norm": 0.6036621461010034, - "learning_rate": 1.9853803639808357e-05, - "loss": 0.2906, + "epoch": 0.07, + "grad_norm": 0.5237408236945418, + "learning_rate": 1.9931169032097807e-05, + "loss": 0.3036, "step": 1443 }, { - "epoch": 0.08, - "grad_norm": 0.5678057149957291, - "learning_rate": 1.9853486427688918e-05, - "loss": 0.3099, + "epoch": 0.07, + "grad_norm": 0.7634324151572106, + "learning_rate": 1.993099464612645e-05, + "loss": 0.3371, "step": 1444 }, { - "epoch": 0.08, - "grad_norm": 0.5707284738168328, - "learning_rate": 1.9853168874343665e-05, - "loss": 0.3151, + "epoch": 0.07, + "grad_norm": 0.5353228489857171, + "learning_rate": 1.993082004029244e-05, + "loss": 0.3511, "step": 1445 }, { - "epoch": 0.08, - "grad_norm": 0.6543721185475424, - "learning_rate": 1.98528509797836e-05, - "loss": 0.4543, + "epoch": 0.07, + "grad_norm": 0.3745865828741059, + "learning_rate": 1.9930645214599648e-05, + "loss": 0.1723, "step": 1446 }, { - "epoch": 0.08, - "grad_norm": 0.5152518450197626, - "learning_rate": 1.985253274401973e-05, - "loss": 0.3259, + "epoch": 0.07, + "grad_norm": 0.40050656205923973, + "learning_rate": 1.993047016905194e-05, + "loss": 0.3005, "step": 1447 }, { - "epoch": 0.08, - "grad_norm": 0.7630087118038243, - "learning_rate": 1.985221416706307e-05, - "loss": 0.5161, + "epoch": 0.07, + "grad_norm": 0.6434613549824244, + "learning_rate": 1.9930294903653195e-05, + "loss": 0.4429, "step": 1448 }, { - "epoch": 0.08, - "grad_norm": 0.6039997332315664, - "learning_rate": 1.9851895248924662e-05, - "loss": 0.2395, + "epoch": 0.07, + "grad_norm": 0.5421359110896032, + "learning_rate": 1.9930119418407296e-05, + "loss": 0.3722, "step": 1449 }, { - "epoch": 0.08, - "grad_norm": 0.5091314902898457, - "learning_rate": 1.9851575989615545e-05, - "loss": 0.3048, + "epoch": 0.07, + "grad_norm": 0.44766153773917966, + "learning_rate": 1.992994371331812e-05, + "loss": 0.2364, "step": 1450 }, { - "epoch": 0.08, - "grad_norm": 1.4961746423041666, - "learning_rate": 1.9851256389146774e-05, - "loss": 0.089, + "epoch": 0.07, + "grad_norm": 0.5746163874371876, + "learning_rate": 1.992976778838956e-05, + "loss": 0.3713, "step": 1451 }, { - "epoch": 0.08, - "grad_norm": 0.8407751460559815, - "learning_rate": 1.985093644752942e-05, - "loss": 0.5311, + "epoch": 0.07, + "grad_norm": 0.6913309350112251, + "learning_rate": 1.9929591643625512e-05, + "loss": 0.4251, "step": 1452 }, { - "epoch": 0.08, - "grad_norm": 0.5369866218744758, - "learning_rate": 1.9850616164774556e-05, - "loss": 0.3195, + "epoch": 0.07, + "grad_norm": 0.7273438091097474, + "learning_rate": 1.9929415279029875e-05, + "loss": 0.3806, "step": 1453 }, { - "epoch": 0.08, - "grad_norm": 0.6396346126247083, - "learning_rate": 1.985029554089328e-05, - "loss": 0.3142, + "epoch": 0.07, + "grad_norm": 0.5013361599907687, + "learning_rate": 1.9929238694606556e-05, + "loss": 0.3458, "step": 1454 }, { - "epoch": 0.08, - "grad_norm": 0.6021130089990159, - "learning_rate": 1.9849974575896695e-05, - "loss": 0.2219, + "epoch": 0.07, + "grad_norm": 0.3755126757137734, + "learning_rate": 1.9929061890359457e-05, + "loss": 0.2421, "step": 1455 }, { - "epoch": 0.08, - "grad_norm": 0.8167722928601668, - "learning_rate": 1.984965326979591e-05, - "loss": 0.3154, + "epoch": 0.07, + "grad_norm": 0.5256354272483413, + "learning_rate": 1.9928884866292502e-05, + "loss": 0.2143, "step": 1456 }, { - "epoch": 0.08, - "grad_norm": 0.6078781381764322, - "learning_rate": 1.984933162260206e-05, - "loss": 0.4259, + "epoch": 0.07, + "grad_norm": 0.5697943591646129, + "learning_rate": 1.9928707622409605e-05, + "loss": 0.3685, "step": 1457 }, { - "epoch": 0.08, - "grad_norm": 0.5111068434766219, - "learning_rate": 1.9849009634326275e-05, - "loss": 0.3663, + "epoch": 0.07, + "grad_norm": 0.6684369488372943, + "learning_rate": 1.9928530158714692e-05, + "loss": 0.4263, "step": 1458 }, { - "epoch": 0.08, - "grad_norm": 0.491679245588098, - "learning_rate": 1.984868730497971e-05, - "loss": 0.3022, + "epoch": 0.07, + "grad_norm": 0.6262869460056422, + "learning_rate": 1.992835247521169e-05, + "loss": 0.3949, "step": 1459 }, { - "epoch": 0.08, - "grad_norm": 0.6657778643771437, - "learning_rate": 1.9848364634573533e-05, - "loss": 0.461, + "epoch": 0.07, + "grad_norm": 0.4468050847767522, + "learning_rate": 1.992817457190453e-05, + "loss": 0.3098, "step": 1460 }, { - "epoch": 0.08, - "grad_norm": 0.4499622319663323, - "learning_rate": 1.984804162311891e-05, - "loss": 0.1674, + "epoch": 0.07, + "grad_norm": 0.3950713564536097, + "learning_rate": 1.9927996448797157e-05, + "loss": 0.1835, "step": 1461 }, { - "epoch": 0.08, - "grad_norm": 0.43775680064572114, - "learning_rate": 1.9847718270627022e-05, - "loss": 0.2935, + "epoch": 0.07, + "grad_norm": 0.9146197308180377, + "learning_rate": 1.992781810589351e-05, + "loss": 0.4685, "step": 1462 }, { - "epoch": 0.08, - "grad_norm": 0.9724835720183442, - "learning_rate": 1.9847394577109083e-05, - "loss": 0.6095, + "epoch": 0.07, + "grad_norm": 0.4591940260608514, + "learning_rate": 1.992763954319754e-05, + "loss": 0.2703, "step": 1463 }, { - "epoch": 0.08, - "grad_norm": 0.6832214021917391, - "learning_rate": 1.984707054257629e-05, - "loss": 0.4568, + "epoch": 0.07, + "grad_norm": 1.0069955891120153, + "learning_rate": 1.9927460760713198e-05, + "loss": 0.6176, "step": 1464 }, { - "epoch": 0.08, - "grad_norm": 0.553754646433227, - "learning_rate": 1.9846746167039864e-05, - "loss": 0.2975, + "epoch": 0.07, + "grad_norm": 0.6308389150493487, + "learning_rate": 1.992728175844444e-05, + "loss": 0.4691, "step": 1465 }, { - "epoch": 0.08, - "grad_norm": 0.4793911200155322, - "learning_rate": 1.9846421450511045e-05, - "loss": 0.3857, + "epoch": 0.07, + "grad_norm": 0.5517675743027622, + "learning_rate": 1.992710253639524e-05, + "loss": 0.2532, "step": 1466 }, { - "epoch": 0.08, - "grad_norm": 0.553399104439865, - "learning_rate": 1.9846096393001074e-05, - "loss": 0.1008, + "epoch": 0.07, + "grad_norm": 0.4074979338072206, + "learning_rate": 1.992692309456955e-05, + "loss": 0.3079, "step": 1467 }, { - "epoch": 0.08, - "grad_norm": 0.4279760960702405, - "learning_rate": 1.984577099452121e-05, - "loss": 0.2942, + "epoch": 0.07, + "grad_norm": 0.521291902171804, + "learning_rate": 1.9926743432971355e-05, + "loss": 0.237, "step": 1468 }, { - "epoch": 0.08, - "grad_norm": 1.0356861473365662, - "learning_rate": 1.984544525508272e-05, - "loss": 0.5771, + "epoch": 0.07, + "grad_norm": 0.6524320103631052, + "learning_rate": 1.9926563551604622e-05, + "loss": 0.2853, "step": 1469 }, { - "epoch": 0.08, - "grad_norm": 0.4577243781894562, - "learning_rate": 1.9845119174696882e-05, - "loss": 0.4015, + "epoch": 0.07, + "grad_norm": 0.9386537246669457, + "learning_rate": 1.9926383450473344e-05, + "loss": 0.4479, "step": 1470 }, { - "epoch": 0.08, - "grad_norm": 0.42424685772485127, - "learning_rate": 1.984479275337499e-05, - "loss": 0.2457, + "epoch": 0.07, + "grad_norm": 0.610611819142604, + "learning_rate": 1.9926203129581503e-05, + "loss": 0.3963, "step": 1471 }, { - "epoch": 0.08, - "grad_norm": 1.4301253843484638, - "learning_rate": 1.984446599112835e-05, - "loss": 0.838, + "epoch": 0.07, + "grad_norm": 0.5208484063746759, + "learning_rate": 1.9926022588933093e-05, + "loss": 0.3101, "step": 1472 }, { - "epoch": 0.08, - "grad_norm": 0.3919599300947234, - "learning_rate": 1.9844138887968273e-05, - "loss": 0.304, + "epoch": 0.07, + "grad_norm": 0.3787873873359929, + "learning_rate": 1.9925841828532108e-05, + "loss": 0.1316, "step": 1473 }, { - "epoch": 0.08, - "grad_norm": 0.3932132520222729, - "learning_rate": 1.9843811443906093e-05, - "loss": 0.1543, + "epoch": 0.07, + "grad_norm": 0.6247565233932612, + "learning_rate": 1.9925660848382554e-05, + "loss": 0.3647, "step": 1474 }, { - "epoch": 0.08, - "grad_norm": 0.6901366164674826, - "learning_rate": 1.9843483658953148e-05, - "loss": 0.4529, + "epoch": 0.07, + "grad_norm": 0.4865431538527934, + "learning_rate": 1.992547964848843e-05, + "loss": 0.2888, "step": 1475 }, { - "epoch": 0.08, - "grad_norm": 0.7955472019255666, - "learning_rate": 1.9843155533120782e-05, - "loss": 0.5426, + "epoch": 0.07, + "grad_norm": 1.4687961061338841, + "learning_rate": 1.992529822885376e-05, + "loss": 0.4877, "step": 1476 }, { - "epoch": 0.08, - "grad_norm": 0.47244127096043176, - "learning_rate": 1.9842827066420366e-05, - "loss": 0.2129, + "epoch": 0.07, + "grad_norm": 1.0292343610276768, + "learning_rate": 1.9925116589482548e-05, + "loss": 0.5292, "step": 1477 }, { - "epoch": 0.08, - "grad_norm": 0.4441680857479455, - "learning_rate": 1.9842498258863274e-05, - "loss": 0.362, + "epoch": 0.07, + "grad_norm": 0.49795888908904185, + "learning_rate": 1.992493473037882e-05, + "loss": 0.3316, "step": 1478 }, { - "epoch": 0.08, - "grad_norm": 0.5322606042879695, - "learning_rate": 1.9842169110460885e-05, - "loss": 0.295, + "epoch": 0.07, + "grad_norm": 0.5689199824202983, + "learning_rate": 1.9924752651546604e-05, + "loss": 0.3312, "step": 1479 }, { - "epoch": 0.09, - "grad_norm": 0.404128094447175, - "learning_rate": 1.9841839621224606e-05, - "loss": 0.2315, + "epoch": 0.07, + "grad_norm": 0.4995229966348174, + "learning_rate": 1.9924570352989932e-05, + "loss": 0.1336, "step": 1480 }, { - "epoch": 0.09, - "grad_norm": 0.5781529263016978, - "learning_rate": 1.9841509791165847e-05, - "loss": 0.3367, + "epoch": 0.07, + "grad_norm": 0.6928466225986046, + "learning_rate": 1.9924387834712836e-05, + "loss": 0.3538, "step": 1481 }, { - "epoch": 0.09, - "grad_norm": 0.6389770846973686, - "learning_rate": 1.9841179620296022e-05, - "loss": 0.4023, + "epoch": 0.07, + "grad_norm": 2.1706820467100805, + "learning_rate": 1.992420509671936e-05, + "loss": 0.4919, "step": 1482 }, { - "epoch": 0.09, - "grad_norm": 0.5240438297258404, - "learning_rate": 1.9840849108626574e-05, - "loss": 0.3152, + "epoch": 0.07, + "grad_norm": 0.5526627998334057, + "learning_rate": 1.9924022139013548e-05, + "loss": 0.3236, "step": 1483 }, { - "epoch": 0.09, - "grad_norm": 0.6238085399437978, - "learning_rate": 1.984051825616894e-05, - "loss": 0.34, + "epoch": 0.07, + "grad_norm": 0.5865691830523876, + "learning_rate": 1.992383896159945e-05, + "loss": 0.347, "step": 1484 }, { - "epoch": 0.09, - "grad_norm": 0.5489870116923643, - "learning_rate": 1.9840187062934583e-05, - "loss": 0.3452, + "epoch": 0.07, + "grad_norm": 1.3838575907475468, + "learning_rate": 1.992365556448112e-05, + "loss": 0.693, "step": 1485 }, { - "epoch": 0.09, - "grad_norm": 0.3663527210417489, - "learning_rate": 1.9839855528934972e-05, - "loss": 0.2432, + "epoch": 0.07, + "grad_norm": 0.3732546480240207, + "learning_rate": 1.9923471947662624e-05, + "loss": 0.2009, "step": 1486 }, { - "epoch": 0.09, - "grad_norm": 0.6521194126080185, - "learning_rate": 1.983952365418159e-05, - "loss": 0.3821, + "epoch": 0.07, + "grad_norm": 0.6289618250378007, + "learning_rate": 1.9923288111148022e-05, + "loss": 0.3557, "step": 1487 }, { - "epoch": 0.09, - "grad_norm": 0.48007821958494157, - "learning_rate": 1.9839191438685922e-05, - "loss": 0.3019, + "epoch": 0.07, + "grad_norm": 2.225963206120445, + "learning_rate": 1.9923104054941386e-05, + "loss": 0.578, "step": 1488 }, { - "epoch": 0.09, - "grad_norm": 0.5161949916231795, - "learning_rate": 1.9838858882459483e-05, - "loss": 0.3056, + "epoch": 0.07, + "grad_norm": 0.6390694047929903, + "learning_rate": 1.992291977904679e-05, + "loss": 0.343, "step": 1489 }, { - "epoch": 0.09, - "grad_norm": 0.5390200772732368, - "learning_rate": 1.9838525985513783e-05, - "loss": 0.2989, + "epoch": 0.07, + "grad_norm": 0.6043409143185197, + "learning_rate": 1.9922735283468314e-05, + "loss": 0.3202, "step": 1490 }, { - "epoch": 0.09, - "grad_norm": 0.8864658034838391, - "learning_rate": 1.9838192747860345e-05, - "loss": 0.6144, + "epoch": 0.07, + "grad_norm": 0.6294854949311045, + "learning_rate": 1.992255056821004e-05, + "loss": 0.3677, "step": 1491 }, { - "epoch": 0.09, - "grad_norm": 0.5180367210317303, - "learning_rate": 1.9837859169510723e-05, - "loss": 0.4018, + "epoch": 0.07, + "grad_norm": 0.5293231067889055, + "learning_rate": 1.992236563327606e-05, + "loss": 0.2172, "step": 1492 }, { - "epoch": 0.09, - "grad_norm": 0.4120010395143323, - "learning_rate": 1.9837525250476454e-05, - "loss": 0.2938, + "epoch": 0.07, + "grad_norm": 0.5918134445860066, + "learning_rate": 1.992218047867047e-05, + "loss": 0.3259, "step": 1493 }, { - "epoch": 0.09, - "grad_norm": 0.40632841819630106, - "learning_rate": 1.9837190990769115e-05, - "loss": 0.2472, + "epoch": 0.07, + "grad_norm": 1.008386117279372, + "learning_rate": 1.992199510439737e-05, + "loss": 0.4155, "step": 1494 }, { - "epoch": 0.09, - "grad_norm": 0.7107951744200354, - "learning_rate": 1.9836856390400273e-05, - "loss": 0.3808, + "epoch": 0.07, + "grad_norm": 0.8220185485267679, + "learning_rate": 1.992180951046086e-05, + "loss": 0.3406, "step": 1495 }, { - "epoch": 0.09, - "grad_norm": 0.5256133147742968, - "learning_rate": 1.9836521449381515e-05, - "loss": 0.3597, + "epoch": 0.07, + "grad_norm": 0.5218961313792428, + "learning_rate": 1.9921623696865046e-05, + "loss": 0.3019, "step": 1496 }, { - "epoch": 0.09, - "grad_norm": 0.43642958130027093, - "learning_rate": 1.9836186167724443e-05, - "loss": 0.3331, + "epoch": 0.07, + "grad_norm": 1.6689870911268383, + "learning_rate": 1.992143766361405e-05, + "loss": 0.6691, "step": 1497 }, { - "epoch": 0.09, - "grad_norm": 0.6079517801611382, - "learning_rate": 1.983585054544067e-05, - "loss": 0.4103, + "epoch": 0.07, + "grad_norm": 0.4236442596716266, + "learning_rate": 1.9921251410711986e-05, + "loss": 0.2986, "step": 1498 }, { - "epoch": 0.09, - "grad_norm": 0.46354758872951146, - "learning_rate": 1.9835514582541812e-05, - "loss": 0.3661, + "epoch": 0.07, + "grad_norm": 0.478992561272367, + "learning_rate": 1.992106493816298e-05, + "loss": 0.2932, "step": 1499 }, { - "epoch": 0.09, - "grad_norm": 0.2972644570786185, - "learning_rate": 1.983517827903951e-05, - "loss": 0.1053, + "epoch": 0.07, + "grad_norm": 0.5363452749761587, + "learning_rate": 1.9920878245971152e-05, + "loss": 0.3342, "step": 1500 }, { - "epoch": 0.09, - "grad_norm": 0.3795868350126114, - "learning_rate": 1.9834841634945402e-05, - "loss": 0.2982, + "epoch": 0.07, + "grad_norm": 1.481371912253873, + "learning_rate": 1.9920691334140646e-05, + "loss": 0.737, "step": 1501 }, { - "epoch": 0.09, - "grad_norm": 0.5592247863268277, - "learning_rate": 1.9834504650271157e-05, - "loss": 0.3494, + "epoch": 0.07, + "grad_norm": 0.5341309178144115, + "learning_rate": 1.9920504202675595e-05, + "loss": 0.2471, "step": 1502 }, { - "epoch": 0.09, - "grad_norm": 0.6832464565355145, - "learning_rate": 1.9834167325028436e-05, - "loss": 0.4326, + "epoch": 0.07, + "grad_norm": 0.6164072971562811, + "learning_rate": 1.9920316851580142e-05, + "loss": 0.36, "step": 1503 }, { - "epoch": 0.09, - "grad_norm": 0.45112271937913245, - "learning_rate": 1.9833829659228923e-05, - "loss": 0.3454, + "epoch": 0.07, + "grad_norm": 0.9386550228673339, + "learning_rate": 1.9920129280858434e-05, + "loss": 0.5267, "step": 1504 }, { - "epoch": 0.09, - "grad_norm": 0.6484508195836393, - "learning_rate": 1.983349165288431e-05, - "loss": 0.3971, + "epoch": 0.07, + "grad_norm": 0.4840531695952259, + "learning_rate": 1.991994149051463e-05, + "loss": 0.2331, "step": 1505 }, { - "epoch": 0.09, - "grad_norm": 0.3254094644109379, - "learning_rate": 1.983315330600631e-05, - "loss": 0.2295, + "epoch": 0.07, + "grad_norm": 0.3435908178834481, + "learning_rate": 1.9919753480552877e-05, + "loss": 0.2319, "step": 1506 }, { - "epoch": 0.09, - "grad_norm": 0.45949992766591424, - "learning_rate": 1.983281461860663e-05, - "loss": 0.2378, + "epoch": 0.07, + "grad_norm": 0.7148718727366655, + "learning_rate": 1.9919565250977345e-05, + "loss": 0.4447, "step": 1507 }, { - "epoch": 0.09, - "grad_norm": 0.6729297248346193, - "learning_rate": 1.9832475590697e-05, - "loss": 0.4185, + "epoch": 0.07, + "grad_norm": 0.4735822461374496, + "learning_rate": 1.9919376801792198e-05, + "loss": 0.2341, "step": 1508 }, { - "epoch": 0.09, - "grad_norm": 0.5529243330352177, - "learning_rate": 1.9832136222289168e-05, - "loss": 0.3886, + "epoch": 0.07, + "grad_norm": 1.0735293429647461, + "learning_rate": 1.991918813300161e-05, + "loss": 0.6065, "step": 1509 }, { - "epoch": 0.09, - "grad_norm": 0.6263171618684181, - "learning_rate": 1.983179651339488e-05, - "loss": 0.3395, + "epoch": 0.07, + "grad_norm": 0.6691131889075466, + "learning_rate": 1.9918999244609757e-05, + "loss": 0.4491, "step": 1510 }, { - "epoch": 0.09, - "grad_norm": 0.4995660700715559, - "learning_rate": 1.9831456464025897e-05, - "loss": 0.3677, + "epoch": 0.07, + "grad_norm": 0.46447420797096395, + "learning_rate": 1.9918810136620818e-05, + "loss": 0.2825, "step": 1511 }, { - "epoch": 0.09, - "grad_norm": 0.32612760199585944, - "learning_rate": 1.9831116074194006e-05, - "loss": 0.1966, + "epoch": 0.07, + "grad_norm": 0.35224650826100323, + "learning_rate": 1.9918620809038987e-05, + "loss": 0.1635, "step": 1512 }, { - "epoch": 0.09, - "grad_norm": 0.507043580890754, - "learning_rate": 1.9830775343910984e-05, - "loss": 0.2792, + "epoch": 0.07, + "grad_norm": 1.6048888561669619, + "learning_rate": 1.9918431261868445e-05, + "loss": 0.7987, "step": 1513 }, { - "epoch": 0.09, - "grad_norm": 0.5548136660748703, - "learning_rate": 1.9830434273188636e-05, - "loss": 0.3315, + "epoch": 0.07, + "grad_norm": 0.5269809647470082, + "learning_rate": 1.99182414951134e-05, + "loss": 0.3221, "step": 1514 }, { - "epoch": 0.09, - "grad_norm": 1.1903386244763454, - "learning_rate": 1.9830092862038773e-05, - "loss": 0.5327, + "epoch": 0.07, + "grad_norm": 0.5024405326355051, + "learning_rate": 1.9918051508778045e-05, + "loss": 0.2753, "step": 1515 }, { - "epoch": 0.09, - "grad_norm": 0.35362906803195593, - "learning_rate": 1.9829751110473215e-05, - "loss": 0.0803, + "epoch": 0.07, + "grad_norm": 1.0102530515698134, + "learning_rate": 1.991786130286659e-05, + "loss": 0.5391, "step": 1516 }, { - "epoch": 0.09, - "grad_norm": 0.38747733634272685, - "learning_rate": 1.98294090185038e-05, - "loss": 0.3157, + "epoch": 0.07, + "grad_norm": 0.5095096149203909, + "learning_rate": 1.9917670877383244e-05, + "loss": 0.3395, "step": 1517 }, { - "epoch": 0.09, - "grad_norm": 0.3734505271613158, - "learning_rate": 1.9829066586142375e-05, - "loss": 0.2002, + "epoch": 0.07, + "grad_norm": 0.34202074954775563, + "learning_rate": 1.9917480232332226e-05, + "loss": 0.1801, "step": 1518 }, { - "epoch": 0.09, - "grad_norm": 0.9309793973511313, - "learning_rate": 1.982872381340079e-05, - "loss": 0.5253, + "epoch": 0.07, + "grad_norm": 0.6726957961539836, + "learning_rate": 1.9917289367717748e-05, + "loss": 0.4603, "step": 1519 }, { - "epoch": 0.09, - "grad_norm": 0.8832068734492745, - "learning_rate": 1.982838070029093e-05, - "loss": 0.3373, + "epoch": 0.07, + "grad_norm": 0.49330561121157185, + "learning_rate": 1.9917098283544046e-05, + "loss": 0.285, "step": 1520 }, { - "epoch": 0.09, - "grad_norm": 0.5060812038856458, - "learning_rate": 1.9828037246824664e-05, - "loss": 0.3777, + "epoch": 0.07, + "grad_norm": 0.8908499294300708, + "learning_rate": 1.9916906979815345e-05, + "loss": 0.4642, "step": 1521 }, { - "epoch": 0.09, - "grad_norm": 0.7545887148384214, - "learning_rate": 1.9827693453013892e-05, - "loss": 0.5325, + "epoch": 0.07, + "grad_norm": 0.4936213530434846, + "learning_rate": 1.9916715456535884e-05, + "loss": 0.3922, "step": 1522 }, { - "epoch": 0.09, - "grad_norm": 0.3855633161515275, - "learning_rate": 1.982734931887052e-05, - "loss": 0.2315, + "epoch": 0.07, + "grad_norm": 0.5360738411411892, + "learning_rate": 1.9916523713709898e-05, + "loss": 0.2964, "step": 1523 }, { - "epoch": 0.09, - "grad_norm": 0.3295751571087598, - "learning_rate": 1.982700484440646e-05, - "loss": 0.2219, + "epoch": 0.07, + "grad_norm": 0.4024242535447471, + "learning_rate": 1.9916331751341635e-05, + "loss": 0.2468, "step": 1524 }, { - "epoch": 0.09, - "grad_norm": 0.4242518325493269, - "learning_rate": 1.982666002963365e-05, - "loss": 0.3222, + "epoch": 0.07, + "grad_norm": 0.5146570721272373, + "learning_rate": 1.9916139569435345e-05, + "loss": 0.3566, "step": 1525 }, { - "epoch": 0.09, - "grad_norm": 0.4857458074762703, - "learning_rate": 1.982631487456402e-05, - "loss": 0.3091, + "epoch": 0.07, + "grad_norm": 0.5241297550316266, + "learning_rate": 1.9915947167995286e-05, + "loss": 0.3294, "step": 1526 }, { - "epoch": 0.09, - "grad_norm": 0.8730410681906142, - "learning_rate": 1.9825969379209533e-05, - "loss": 0.5321, + "epoch": 0.07, + "grad_norm": 0.4992925608496907, + "learning_rate": 1.991575454702571e-05, + "loss": 0.4009, "step": 1527 }, { - "epoch": 0.09, - "grad_norm": 0.8632641640956932, - "learning_rate": 1.9825623543582145e-05, - "loss": 0.4488, + "epoch": 0.07, + "grad_norm": 0.9095677394369446, + "learning_rate": 1.9915561706530882e-05, + "loss": 0.4231, "step": 1528 }, { - "epoch": 0.09, - "grad_norm": 0.38414737870140997, - "learning_rate": 1.982527736769384e-05, - "loss": 0.2726, + "epoch": 0.07, + "grad_norm": 0.5337551094289965, + "learning_rate": 1.991536864651508e-05, + "loss": 0.3156, "step": 1529 }, { - "epoch": 0.09, - "grad_norm": 0.4745151571052424, - "learning_rate": 1.9824930851556604e-05, - "loss": 0.3034, + "epoch": 0.07, + "grad_norm": 0.36667257920464214, + "learning_rate": 1.991517536698257e-05, + "loss": 0.2976, "step": 1530 }, { - "epoch": 0.09, - "grad_norm": 0.9141713621897183, - "learning_rate": 1.982458399518243e-05, - "loss": 0.5761, + "epoch": 0.07, + "grad_norm": 1.3485829050691673, + "learning_rate": 1.9914981867937635e-05, + "loss": 0.6222, "step": 1531 }, { - "epoch": 0.09, - "grad_norm": 0.4592062472149273, - "learning_rate": 1.9824236798583338e-05, - "loss": 0.3236, + "epoch": 0.07, + "grad_norm": 0.40615256703752833, + "learning_rate": 1.991478814938456e-05, + "loss": 0.2492, "step": 1532 }, { - "epoch": 0.09, - "grad_norm": 0.32912152990907256, - "learning_rate": 1.9823889261771346e-05, - "loss": 0.2727, + "epoch": 0.07, + "grad_norm": 0.9322147976390529, + "learning_rate": 1.9914594211327623e-05, + "loss": 0.5351, "step": 1533 }, { - "epoch": 0.09, - "grad_norm": 0.9264614208288615, - "learning_rate": 1.9823541384758492e-05, - "loss": 0.4646, + "epoch": 0.07, + "grad_norm": 0.517942649927803, + "learning_rate": 1.9914400053771136e-05, + "loss": 0.3193, "step": 1534 }, { - "epoch": 0.09, - "grad_norm": 0.43380933167627445, - "learning_rate": 1.982319316755682e-05, - "loss": 0.3267, + "epoch": 0.07, + "grad_norm": 0.42162433110708175, + "learning_rate": 1.9914205676719382e-05, + "loss": 0.3005, "step": 1535 }, { - "epoch": 0.09, - "grad_norm": 0.3434369284109887, - "learning_rate": 1.9822844610178394e-05, - "loss": 0.1541, + "epoch": 0.07, + "grad_norm": 0.48892814476864305, + "learning_rate": 1.991401108017667e-05, + "loss": 0.2688, "step": 1536 }, { - "epoch": 0.09, - "grad_norm": 0.42805356727710603, - "learning_rate": 1.982249571263528e-05, - "loss": 0.35, + "epoch": 0.07, + "grad_norm": 1.244581596447113, + "learning_rate": 1.991381626414731e-05, + "loss": 0.7364, "step": 1537 }, { - "epoch": 0.09, - "grad_norm": 0.46768921780410705, - "learning_rate": 1.9822146474939563e-05, - "loss": 0.3597, + "epoch": 0.07, + "grad_norm": 0.49450559805278826, + "learning_rate": 1.9913621228635608e-05, + "loss": 0.2743, "step": 1538 }, { - "epoch": 0.09, - "grad_norm": 0.3205738141307784, - "learning_rate": 1.9821796897103334e-05, - "loss": 0.171, + "epoch": 0.07, + "grad_norm": 0.52999713367487, + "learning_rate": 1.9913425973645894e-05, + "loss": 0.378, "step": 1539 }, { - "epoch": 0.09, - "grad_norm": 0.4918143379538728, - "learning_rate": 1.98214469791387e-05, - "loss": 0.3352, + "epoch": 0.07, + "grad_norm": 0.5227068460743706, + "learning_rate": 1.991323049918248e-05, + "loss": 0.274, "step": 1540 }, { - "epoch": 0.09, - "grad_norm": 0.38168005214386574, - "learning_rate": 1.9821096721057787e-05, - "loss": 0.3347, + "epoch": 0.07, + "grad_norm": 0.4820267632534573, + "learning_rate": 1.9913034805249697e-05, + "loss": 0.2009, "step": 1541 }, { - "epoch": 0.09, - "grad_norm": 0.9996924570187796, - "learning_rate": 1.982074612287271e-05, - "loss": 0.4523, + "epoch": 0.07, + "grad_norm": 0.43346931641409564, + "learning_rate": 1.9912838891851877e-05, + "loss": 0.334, "step": 1542 }, { - "epoch": 0.09, - "grad_norm": 0.532892984371382, - "learning_rate": 1.982039518459562e-05, - "loss": 0.4597, + "epoch": 0.07, + "grad_norm": 1.0289571281384937, + "learning_rate": 1.991264275899336e-05, + "loss": 0.5898, "step": 1543 }, { - "epoch": 0.09, - "grad_norm": 0.4556488663844575, - "learning_rate": 1.9820043906238667e-05, - "loss": 0.3112, + "epoch": 0.07, + "grad_norm": 0.5082834174720121, + "learning_rate": 1.9912446406678484e-05, + "loss": 0.2666, "step": 1544 }, { - "epoch": 0.09, - "grad_norm": 0.5333818960745033, - "learning_rate": 1.9819692287814014e-05, - "loss": 0.3715, + "epoch": 0.07, + "grad_norm": 0.3681903439352201, + "learning_rate": 1.99122498349116e-05, + "loss": 0.2482, "step": 1545 }, { - "epoch": 0.09, - "grad_norm": 0.3205794074373295, - "learning_rate": 1.981934032933384e-05, - "loss": 0.1031, + "epoch": 0.07, + "grad_norm": 0.5488576444182283, + "learning_rate": 1.9912053043697058e-05, + "loss": 0.4004, "step": 1546 }, { - "epoch": 0.09, - "grad_norm": 0.5161864800464732, - "learning_rate": 1.981898803081033e-05, - "loss": 0.3052, + "epoch": 0.07, + "grad_norm": 0.4155364622945553, + "learning_rate": 1.9911856033039213e-05, + "loss": 0.2196, "step": 1547 }, { - "epoch": 0.09, - "grad_norm": 0.7795833825785495, - "learning_rate": 1.981863539225569e-05, - "loss": 0.5345, + "epoch": 0.07, + "grad_norm": 0.6581225781750124, + "learning_rate": 1.9911658802942432e-05, + "loss": 0.4075, "step": 1548 }, { - "epoch": 0.09, - "grad_norm": 0.4615357704091355, - "learning_rate": 1.9818282413682127e-05, - "loss": 0.3508, + "epoch": 0.07, + "grad_norm": 0.8759597520374959, + "learning_rate": 1.9911461353411074e-05, + "loss": 0.6097, "step": 1549 }, { - "epoch": 0.09, - "grad_norm": 0.3946497104947286, - "learning_rate": 1.981792909510187e-05, - "loss": 0.2904, + "epoch": 0.07, + "grad_norm": 0.38823356006275106, + "learning_rate": 1.991126368444952e-05, + "loss": 0.3148, "step": 1550 }, { - "epoch": 0.09, - "grad_norm": 0.3454321623744058, - "learning_rate": 1.9817575436527147e-05, - "loss": 0.2079, + "epoch": 0.07, + "grad_norm": 0.6430386672639617, + "learning_rate": 1.9911065796062137e-05, + "loss": 0.3295, "step": 1551 }, { - "epoch": 0.09, - "grad_norm": 0.4495745099321259, - "learning_rate": 1.981722143797021e-05, - "loss": 0.2907, + "epoch": 0.07, + "grad_norm": 0.501096721596823, + "learning_rate": 1.9910867688253307e-05, + "loss": 0.2124, "step": 1552 }, { - "epoch": 0.09, - "grad_norm": 0.45876952116799197, - "learning_rate": 1.9816867099443314e-05, - "loss": 0.3153, + "epoch": 0.07, + "grad_norm": 0.5579527929957838, + "learning_rate": 1.9910669361027425e-05, + "loss": 0.2916, "step": 1553 }, { - "epoch": 0.09, - "grad_norm": 1.1485942385955623, - "learning_rate": 1.9816512420958734e-05, - "loss": 0.5895, + "epoch": 0.07, + "grad_norm": 0.45283228337814474, + "learning_rate": 1.991047081438887e-05, + "loss": 0.2913, "step": 1554 }, { - "epoch": 0.09, - "grad_norm": 0.9054974881219597, - "learning_rate": 1.9816157402528753e-05, - "loss": 0.5669, + "epoch": 0.07, + "grad_norm": 1.0624903975926498, + "learning_rate": 1.991027204834205e-05, + "loss": 0.5811, "step": 1555 }, { - "epoch": 0.09, - "grad_norm": 0.4243340339417857, - "learning_rate": 1.9815802044165663e-05, - "loss": 0.2367, + "epoch": 0.07, + "grad_norm": 0.49870610844660096, + "learning_rate": 1.991007306289135e-05, + "loss": 0.3519, "step": 1556 }, { - "epoch": 0.09, - "grad_norm": 0.5642073908469096, - "learning_rate": 1.981544634588177e-05, - "loss": 0.3691, + "epoch": 0.07, + "grad_norm": 0.49115759949122106, + "learning_rate": 1.9909873858041187e-05, + "loss": 0.2112, "step": 1557 }, { - "epoch": 0.09, - "grad_norm": 0.46111573851136795, - "learning_rate": 1.9815090307689392e-05, - "loss": 0.2229, + "epoch": 0.07, + "grad_norm": 0.3832792079133785, + "learning_rate": 1.990967443379597e-05, + "loss": 0.2972, "step": 1558 }, { - "epoch": 0.09, - "grad_norm": 0.42590791206403383, - "learning_rate": 1.9814733929600857e-05, - "loss": 0.2272, + "epoch": 0.07, + "grad_norm": 0.9625658885904004, + "learning_rate": 1.990947479016011e-05, + "loss": 0.5549, "step": 1559 }, { - "epoch": 0.09, - "grad_norm": 1.9421334937751409, - "learning_rate": 1.981437721162851e-05, - "loss": 0.5379, + "epoch": 0.07, + "grad_norm": 0.5932491643804502, + "learning_rate": 1.990927492713803e-05, + "loss": 0.297, "step": 1560 }, { - "epoch": 0.09, - "grad_norm": 0.561819349052626, - "learning_rate": 1.98140201537847e-05, - "loss": 0.3477, + "epoch": 0.07, + "grad_norm": 0.8675220525746768, + "learning_rate": 1.9909074844734152e-05, + "loss": 0.4535, "step": 1561 }, { - "epoch": 0.09, - "grad_norm": 0.4240617607652223, - "learning_rate": 1.9813662756081794e-05, - "loss": 0.2214, + "epoch": 0.07, + "grad_norm": 0.4960835082167606, + "learning_rate": 1.990887454295291e-05, + "loss": 0.2949, "step": 1562 }, { - "epoch": 0.09, - "grad_norm": 1.0596622839127672, - "learning_rate": 1.9813305018532172e-05, - "loss": 0.7146, + "epoch": 0.07, + "grad_norm": 0.5461508613100479, + "learning_rate": 1.9908674021798735e-05, + "loss": 0.3808, "step": 1563 }, { - "epoch": 0.09, - "grad_norm": 0.3903654621157037, - "learning_rate": 1.981294694114822e-05, - "loss": 0.2457, + "epoch": 0.07, + "grad_norm": 0.30497126466041974, + "learning_rate": 1.9908473281276068e-05, + "loss": 0.0824, "step": 1564 }, { - "epoch": 0.09, - "grad_norm": 0.4339515383696826, - "learning_rate": 1.9812588523942334e-05, - "loss": 0.2166, + "epoch": 0.07, + "grad_norm": 0.8057675810488799, + "learning_rate": 1.990827232138935e-05, + "loss": 0.366, "step": 1565 }, { - "epoch": 0.09, - "grad_norm": 1.650174074607824, - "learning_rate": 1.981222976692693e-05, - "loss": 0.5498, + "epoch": 0.07, + "grad_norm": 0.556448624705394, + "learning_rate": 1.9908071142143036e-05, + "loss": 0.3168, "step": 1566 }, { - "epoch": 0.09, - "grad_norm": 1.2077256521150712, - "learning_rate": 1.981187067011443e-05, - "loss": 0.6577, + "epoch": 0.07, + "grad_norm": 1.172294514574269, + "learning_rate": 1.9907869743541576e-05, + "loss": 0.4791, "step": 1567 }, { - "epoch": 0.09, - "grad_norm": 0.6294600660337998, - "learning_rate": 1.9811511233517275e-05, - "loss": 0.313, + "epoch": 0.07, + "grad_norm": 0.6424740817969817, + "learning_rate": 1.9907668125589424e-05, + "loss": 0.3808, "step": 1568 }, { - "epoch": 0.09, - "grad_norm": 0.560575934466919, - "learning_rate": 1.9811151457147904e-05, - "loss": 0.29, + "epoch": 0.07, + "grad_norm": 0.45521081546171216, + "learning_rate": 1.9907466288291054e-05, + "loss": 0.2831, "step": 1569 }, { - "epoch": 0.09, - "grad_norm": 0.3676919839173703, - "learning_rate": 1.981079134101878e-05, - "loss": 0.214, + "epoch": 0.07, + "grad_norm": 0.3853403378918902, + "learning_rate": 1.9907264231650927e-05, + "loss": 0.22, "step": 1570 }, { - "epoch": 0.09, - "grad_norm": 0.5136664835618114, - "learning_rate": 1.9810430885142377e-05, - "loss": 0.312, + "epoch": 0.07, + "grad_norm": 0.728215779597287, + "learning_rate": 1.990706195567352e-05, + "loss": 0.3672, "step": 1571 }, { - "epoch": 0.09, - "grad_norm": 1.4254358022411515, - "learning_rate": 1.981007008953117e-05, - "loss": 0.4594, + "epoch": 0.07, + "grad_norm": 0.7422748526949783, + "learning_rate": 1.9906859460363307e-05, + "loss": 0.3681, "step": 1572 }, { - "epoch": 0.09, - "grad_norm": 0.7586444608485484, - "learning_rate": 1.9809708954197658e-05, - "loss": 0.3544, + "epoch": 0.07, + "grad_norm": 1.8082452053295823, + "learning_rate": 1.990665674572478e-05, + "loss": 0.5557, "step": 1573 }, { - "epoch": 0.09, - "grad_norm": 0.5827592667394669, - "learning_rate": 1.980934747915435e-05, - "loss": 0.2916, + "epoch": 0.07, + "grad_norm": 0.4457112867292302, + "learning_rate": 1.9906453811762415e-05, + "loss": 0.257, "step": 1574 }, { - "epoch": 0.09, - "grad_norm": 1.6997799572006387, - "learning_rate": 1.9808985664413757e-05, - "loss": 0.4934, + "epoch": 0.07, + "grad_norm": 0.7499465778804616, + "learning_rate": 1.9906250658480712e-05, + "loss": 0.4179, "step": 1575 }, { - "epoch": 0.09, - "grad_norm": 0.34869623686568857, - "learning_rate": 1.9808623509988415e-05, - "loss": 0.2492, + "epoch": 0.07, + "grad_norm": 0.3588120538579702, + "learning_rate": 1.9906047285884168e-05, + "loss": 0.232, "step": 1576 }, { - "epoch": 0.09, - "grad_norm": 0.7051869049693643, - "learning_rate": 1.980826101589086e-05, - "loss": 0.3112, + "epoch": 0.07, + "grad_norm": 1.1577489811422432, + "learning_rate": 1.9905843693977288e-05, + "loss": 0.3668, "step": 1577 }, { - "epoch": 0.09, - "grad_norm": 2.375367808092143, - "learning_rate": 1.980789818213365e-05, - "loss": 0.4905, + "epoch": 0.07, + "grad_norm": 0.5303478905504729, + "learning_rate": 1.9905639882764573e-05, + "loss": 0.3144, "step": 1578 }, { - "epoch": 0.09, - "grad_norm": 1.6868012547392646, - "learning_rate": 1.9807535008729347e-05, - "loss": 0.8162, + "epoch": 0.07, + "grad_norm": 1.4562165819409276, + "learning_rate": 1.9905435852250535e-05, + "loss": 0.5832, "step": 1579 }, { - "epoch": 0.09, - "grad_norm": 0.5252690640932575, - "learning_rate": 1.980717149569053e-05, - "loss": 0.3133, + "epoch": 0.07, + "grad_norm": 0.5423490235388511, + "learning_rate": 1.9905231602439697e-05, + "loss": 0.2935, "step": 1580 }, { - "epoch": 0.09, - "grad_norm": 0.467302492898827, - "learning_rate": 1.9806807643029786e-05, - "loss": 0.3452, + "epoch": 0.07, + "grad_norm": 0.5110195187141678, + "learning_rate": 1.990502713333658e-05, + "loss": 0.3359, "step": 1581 }, { - "epoch": 0.09, - "grad_norm": 1.0265829484598235, - "learning_rate": 1.9806443450759715e-05, - "loss": 0.3143, + "epoch": 0.07, + "grad_norm": 0.4071794586068742, + "learning_rate": 1.9904822444945706e-05, + "loss": 0.2927, "step": 1582 }, { - "epoch": 0.09, - "grad_norm": 0.5886112657839901, - "learning_rate": 1.9806078918892925e-05, - "loss": 0.34, + "epoch": 0.07, + "grad_norm": 0.5213134783081707, + "learning_rate": 1.9904617537271608e-05, + "loss": 0.1887, "step": 1583 }, { - "epoch": 0.09, - "grad_norm": 0.5920413529763409, - "learning_rate": 1.9805714047442045e-05, - "loss": 0.3122, + "epoch": 0.07, + "grad_norm": 0.8555721123981634, + "learning_rate": 1.9904412410318828e-05, + "loss": 0.4186, "step": 1584 }, { - "epoch": 0.09, - "grad_norm": 0.6764117396792537, - "learning_rate": 1.9805348836419712e-05, - "loss": 0.3036, + "epoch": 0.07, + "grad_norm": 1.8746265717332116, + "learning_rate": 1.99042070640919e-05, + "loss": 0.8166, "step": 1585 }, { - "epoch": 0.09, - "grad_norm": 0.5594852265265499, - "learning_rate": 1.9804983285838567e-05, - "loss": 0.3569, + "epoch": 0.07, + "grad_norm": 0.5361763529706056, + "learning_rate": 1.9904001498595374e-05, + "loss": 0.3668, "step": 1586 }, { - "epoch": 0.09, - "grad_norm": 0.6844695489408091, - "learning_rate": 1.9804617395711275e-05, - "loss": 0.4128, + "epoch": 0.07, + "grad_norm": 0.4967554784829038, + "learning_rate": 1.99037957138338e-05, + "loss": 0.2397, "step": 1587 }, { - "epoch": 0.09, - "grad_norm": 0.6307190369431338, - "learning_rate": 1.9804251166050505e-05, - "loss": 0.3458, + "epoch": 0.07, + "grad_norm": 0.36306897827220835, + "learning_rate": 1.990358970981174e-05, + "loss": 0.2037, "step": 1588 }, { - "epoch": 0.09, - "grad_norm": 0.41952195036620993, - "learning_rate": 1.9803884596868937e-05, - "loss": 0.3168, + "epoch": 0.07, + "grad_norm": 0.4731443444917627, + "learning_rate": 1.9903383486533743e-05, + "loss": 0.4076, "step": 1589 }, { - "epoch": 0.09, - "grad_norm": 0.3453562469345302, - "learning_rate": 1.9803517688179264e-05, - "loss": 0.2204, + "epoch": 0.07, + "grad_norm": 0.5574544132989624, + "learning_rate": 1.990317704400438e-05, + "loss": 0.2661, "step": 1590 }, { - "epoch": 0.09, - "grad_norm": 1.0262728387774869, - "learning_rate": 1.9803150439994202e-05, - "loss": 0.5397, + "epoch": 0.07, + "grad_norm": 0.9888429485348341, + "learning_rate": 1.9902970382228226e-05, + "loss": 0.5912, "step": 1591 }, { - "epoch": 0.09, - "grad_norm": 0.5369925676511108, - "learning_rate": 1.9802782852326456e-05, - "loss": 0.3018, + "epoch": 0.07, + "grad_norm": 0.9528458373653957, + "learning_rate": 1.990276350120985e-05, + "loss": 0.5419, "step": 1592 }, { - "epoch": 0.09, - "grad_norm": 0.7179121095737457, - "learning_rate": 1.9802414925188766e-05, - "loss": 0.3874, + "epoch": 0.07, + "grad_norm": 0.517579710946933, + "learning_rate": 1.9902556400953835e-05, + "loss": 0.2476, "step": 1593 }, { - "epoch": 0.09, - "grad_norm": 0.7147170188690286, - "learning_rate": 1.9802046658593867e-05, - "loss": 0.5446, + "epoch": 0.07, + "grad_norm": 0.4500146942769244, + "learning_rate": 1.9902349081464767e-05, + "loss": 0.3548, "step": 1594 }, { - "epoch": 0.09, - "grad_norm": 0.4854192777883944, - "learning_rate": 1.9801678052554512e-05, - "loss": 0.2607, + "epoch": 0.07, + "grad_norm": 0.8633345033696577, + "learning_rate": 1.9902141542747233e-05, + "loss": 0.5357, "step": 1595 }, { - "epoch": 0.09, - "grad_norm": 0.3895561134200103, - "learning_rate": 1.9801309107083465e-05, - "loss": 0.2836, + "epoch": 0.07, + "grad_norm": 0.4842912604598192, + "learning_rate": 1.9901933784805828e-05, + "loss": 0.1659, "step": 1596 }, { - "epoch": 0.09, - "grad_norm": 0.3859364875104156, - "learning_rate": 1.9800939822193512e-05, - "loss": 0.2896, + "epoch": 0.07, + "grad_norm": 0.5689434009108335, + "learning_rate": 1.9901725807645154e-05, + "loss": 0.3734, "step": 1597 }, { - "epoch": 0.09, - "grad_norm": 0.4179295750829518, - "learning_rate": 1.980057019789743e-05, - "loss": 0.2141, + "epoch": 0.07, + "grad_norm": 1.4465191235184665, + "learning_rate": 1.9901517611269813e-05, + "loss": 0.7006, "step": 1598 }, { - "epoch": 0.09, - "grad_norm": 0.5931917930493955, - "learning_rate": 1.9800200234208022e-05, - "loss": 0.3975, + "epoch": 0.07, + "grad_norm": 0.5449836359935476, + "learning_rate": 1.9901309195684418e-05, + "loss": 0.2953, "step": 1599 }, { - "epoch": 0.09, - "grad_norm": 0.5242128316638529, - "learning_rate": 1.9799829931138107e-05, - "loss": 0.4053, + "epoch": 0.07, + "grad_norm": 0.994237056401829, + "learning_rate": 1.9901100560893578e-05, + "loss": 0.4323, "step": 1600 }, { - "epoch": 0.09, - "grad_norm": 0.4126511092046684, - "learning_rate": 1.9799459288700498e-05, - "loss": 0.2093, + "epoch": 0.07, + "grad_norm": 0.552301594696816, + "learning_rate": 1.9900891706901914e-05, + "loss": 0.3929, "step": 1601 }, { - "epoch": 0.09, - "grad_norm": 0.3701533373340219, - "learning_rate": 1.9799088306908035e-05, - "loss": 0.2427, + "epoch": 0.07, + "grad_norm": 0.399950324865002, + "learning_rate": 1.990068263371405e-05, + "loss": 0.2592, "step": 1602 }, { - "epoch": 0.09, - "grad_norm": 1.1308744292919486, - "learning_rate": 1.9798716985773567e-05, - "loss": 0.6998, + "epoch": 0.07, + "grad_norm": 0.3735408961654906, + "learning_rate": 1.9900473341334616e-05, + "loss": 0.135, "step": 1603 }, { - "epoch": 0.09, - "grad_norm": 0.3758134428810142, - "learning_rate": 1.9798345325309952e-05, - "loss": 0.2527, + "epoch": 0.07, + "grad_norm": 0.9387129568378368, + "learning_rate": 1.9900263829768246e-05, + "loss": 0.5299, "step": 1604 }, { - "epoch": 0.09, - "grad_norm": 0.5785694558525246, - "learning_rate": 1.9797973325530058e-05, - "loss": 0.4437, + "epoch": 0.07, + "grad_norm": 0.5585915389768993, + "learning_rate": 1.9900054099019575e-05, + "loss": 0.3238, "step": 1605 }, { - "epoch": 0.09, - "grad_norm": 0.7455283306651675, - "learning_rate": 1.9797600986446773e-05, - "loss": 0.5741, + "epoch": 0.07, + "grad_norm": 0.5122355485072088, + "learning_rate": 1.9899844149093244e-05, + "loss": 0.3286, "step": 1606 }, { - "epoch": 0.09, - "grad_norm": 0.49548617080975527, - "learning_rate": 1.9797228308072982e-05, - "loss": 0.3097, + "epoch": 0.07, + "grad_norm": 0.800521640669671, + "learning_rate": 1.9899633979993913e-05, + "loss": 0.4826, "step": 1607 }, { - "epoch": 0.09, - "grad_norm": 0.399945434741605, - "learning_rate": 1.97968552904216e-05, - "loss": 0.2833, + "epoch": 0.07, + "grad_norm": 0.507745206351051, + "learning_rate": 1.989942359172622e-05, + "loss": 0.3077, "step": 1608 }, { - "epoch": 0.09, - "grad_norm": 0.3260983798124931, - "learning_rate": 1.9796481933505535e-05, - "loss": 0.1926, + "epoch": 0.07, + "grad_norm": 0.3302013299867463, + "learning_rate": 1.9899212984294837e-05, + "loss": 0.2203, "step": 1609 }, { - "epoch": 0.09, - "grad_norm": 0.5046829217920206, - "learning_rate": 1.9796108237337724e-05, - "loss": 0.3538, + "epoch": 0.07, + "grad_norm": 1.0595432551140034, + "learning_rate": 1.9899002157704412e-05, + "loss": 0.5075, "step": 1610 }, { - "epoch": 0.09, - "grad_norm": 0.588275165004359, - "learning_rate": 1.979573420193111e-05, - "loss": 0.3805, + "epoch": 0.07, + "grad_norm": 0.49726877068949554, + "learning_rate": 1.9898791111959624e-05, + "loss": 0.3101, "step": 1611 }, { - "epoch": 0.09, - "grad_norm": 0.45169272979663194, - "learning_rate": 1.9795359827298643e-05, - "loss": 0.3902, + "epoch": 0.07, + "grad_norm": 0.6763077556030332, + "learning_rate": 1.9898579847065143e-05, + "loss": 0.5238, "step": 1612 }, { - "epoch": 0.09, - "grad_norm": 0.45282344751971243, - "learning_rate": 1.979498511345328e-05, - "loss": 0.3399, + "epoch": 0.07, + "grad_norm": 0.49801115849229144, + "learning_rate": 1.989836836302564e-05, + "loss": 0.3203, "step": 1613 }, { - "epoch": 0.09, - "grad_norm": 0.3358407517147635, - "learning_rate": 1.9794610060408007e-05, - "loss": 0.1913, + "epoch": 0.07, + "grad_norm": 0.4240985732303542, + "learning_rate": 1.9898156659845806e-05, + "loss": 0.2942, "step": 1614 }, { - "epoch": 0.09, - "grad_norm": 0.468574580020193, - "learning_rate": 1.979423466817581e-05, - "loss": 0.3122, + "epoch": 0.07, + "grad_norm": 0.4849334992439375, + "learning_rate": 1.9897944737530325e-05, + "loss": 0.2358, "step": 1615 }, { - "epoch": 0.09, - "grad_norm": 0.49239486490812656, - "learning_rate": 1.9793858936769683e-05, - "loss": 0.3336, + "epoch": 0.07, + "grad_norm": 1.192371844720865, + "learning_rate": 1.989773259608388e-05, + "loss": 0.4501, "step": 1616 }, { - "epoch": 0.09, - "grad_norm": 0.4719865931595738, - "learning_rate": 1.9793482866202645e-05, - "loss": 0.3269, + "epoch": 0.07, + "grad_norm": 0.45438057048368363, + "learning_rate": 1.9897520235511177e-05, + "loss": 0.3287, "step": 1617 }, { - "epoch": 0.09, - "grad_norm": 0.8462916216082507, - "learning_rate": 1.9793106456487717e-05, - "loss": 0.5964, + "epoch": 0.07, + "grad_norm": 0.47832547543209186, + "learning_rate": 1.989730765581692e-05, + "loss": 0.3981, "step": 1618 }, { - "epoch": 0.09, - "grad_norm": 0.7204368957434376, - "learning_rate": 1.9792729707637935e-05, - "loss": 0.3242, + "epoch": 0.07, + "grad_norm": 1.0887922022788834, + "learning_rate": 1.9897094857005808e-05, + "loss": 0.3157, "step": 1619 }, { - "epoch": 0.09, - "grad_norm": 0.36900163439375716, - "learning_rate": 1.979235261966634e-05, - "loss": 0.323, + "epoch": 0.07, + "grad_norm": 0.37112109980242863, + "learning_rate": 1.9896881839082554e-05, + "loss": 0.2486, "step": 1620 }, { - "epoch": 0.09, - "grad_norm": 0.3250448460505923, - "learning_rate": 1.9791975192586e-05, - "loss": 0.1878, + "epoch": 0.07, + "grad_norm": 0.49782844225620065, + "learning_rate": 1.9896668602051877e-05, + "loss": 0.2301, "step": 1621 }, { - "epoch": 0.09, - "grad_norm": 0.7114004359000305, - "learning_rate": 1.9791597426409973e-05, - "loss": 0.4203, + "epoch": 0.07, + "grad_norm": 0.484764191192648, + "learning_rate": 1.9896455145918493e-05, + "loss": 0.3139, "step": 1622 }, { - "epoch": 0.09, - "grad_norm": 0.48779469367220873, - "learning_rate": 1.9791219321151356e-05, - "loss": 0.4049, + "epoch": 0.07, + "grad_norm": 0.4246721921128543, + "learning_rate": 1.9896241470687132e-05, + "loss": 0.3033, "step": 1623 }, { - "epoch": 0.09, - "grad_norm": 0.33688741860596855, - "learning_rate": 1.979084087682323e-05, - "loss": 0.2401, + "epoch": 0.07, + "grad_norm": 0.8093219665446579, + "learning_rate": 1.9896027576362523e-05, + "loss": 0.5553, "step": 1624 }, { - "epoch": 0.09, - "grad_norm": 0.6457978863500597, - "learning_rate": 1.9790462093438707e-05, - "loss": 0.3773, + "epoch": 0.07, + "grad_norm": 0.5067173743916127, + "learning_rate": 1.98958134629494e-05, + "loss": 0.3656, "step": 1625 }, { - "epoch": 0.09, - "grad_norm": 0.43997114105386825, - "learning_rate": 1.9790082971010903e-05, - "loss": 0.3352, + "epoch": 0.07, + "grad_norm": 0.4778987820016766, + "learning_rate": 1.9895599130452507e-05, + "loss": 0.224, "step": 1626 }, { - "epoch": 0.09, - "grad_norm": 0.8578347622033227, - "learning_rate": 1.9789703509552947e-05, - "loss": 0.4459, + "epoch": 0.07, + "grad_norm": 0.6483303406555019, + "learning_rate": 1.9895384578876586e-05, + "loss": 0.3679, "step": 1627 }, { - "epoch": 0.09, - "grad_norm": 0.3320817142902381, - "learning_rate": 1.978932370907798e-05, - "loss": 0.3051, + "epoch": 0.07, + "grad_norm": 0.7685597554944757, + "learning_rate": 1.9895169808226384e-05, + "loss": 0.5533, "step": 1628 }, { - "epoch": 0.09, - "grad_norm": 0.46309576333499364, - "learning_rate": 1.978894356959915e-05, - "loss": 0.4221, + "epoch": 0.07, + "grad_norm": 0.45344718766203446, + "learning_rate": 1.9894954818506666e-05, + "loss": 0.2295, "step": 1629 }, { - "epoch": 0.09, - "grad_norm": 0.23451118651819808, - "learning_rate": 1.978856309112963e-05, - "loss": 0.1448, + "epoch": 0.07, + "grad_norm": 0.41250133428465596, + "learning_rate": 1.989473960972218e-05, + "loss": 0.3166, "step": 1630 }, { - "epoch": 0.09, - "grad_norm": 0.4679183307690478, - "learning_rate": 1.978818227368259e-05, - "loss": 0.2926, + "epoch": 0.07, + "grad_norm": 1.303157109297041, + "learning_rate": 1.9894524181877696e-05, + "loss": 0.6755, "step": 1631 }, { - "epoch": 0.09, - "grad_norm": 0.4429604060875303, - "learning_rate": 1.9787801117271213e-05, - "loss": 0.3402, + "epoch": 0.07, + "grad_norm": 0.4882174012369254, + "learning_rate": 1.9894308534977985e-05, + "loss": 0.2341, "step": 1632 }, { - "epoch": 0.09, - "grad_norm": 0.8594224955091179, - "learning_rate": 1.978741962190871e-05, - "loss": 0.6281, + "epoch": 0.08, + "grad_norm": 0.3770057595932876, + "learning_rate": 1.989409266902782e-05, + "loss": 0.2714, "step": 1633 }, { - "epoch": 0.09, - "grad_norm": 0.6065069575331038, - "learning_rate": 1.9787037787608287e-05, - "loss": 0.3928, + "epoch": 0.08, + "grad_norm": 0.9844196557664995, + "learning_rate": 1.9893876584031975e-05, + "loss": 0.5868, "step": 1634 }, { - "epoch": 0.09, - "grad_norm": 0.42824444304880876, - "learning_rate": 1.9786655614383163e-05, - "loss": 0.3295, + "epoch": 0.08, + "grad_norm": 0.4450450253321704, + "learning_rate": 1.9893660279995244e-05, + "loss": 0.298, "step": 1635 }, { - "epoch": 0.09, - "grad_norm": 0.30050373194288177, - "learning_rate": 1.978627310224658e-05, - "loss": 0.2354, + "epoch": 0.08, + "grad_norm": 0.4597287665483372, + "learning_rate": 1.9893443756922407e-05, + "loss": 0.3177, "step": 1636 }, { - "epoch": 0.09, - "grad_norm": 0.5963019421892746, - "learning_rate": 1.9785890251211777e-05, - "loss": 0.1868, + "epoch": 0.08, + "grad_norm": 0.4405564572491822, + "learning_rate": 1.989322701481826e-05, + "loss": 0.3429, "step": 1637 }, { - "epoch": 0.09, - "grad_norm": 0.400518818725669, - "learning_rate": 1.9785507061292017e-05, - "loss": 0.2862, + "epoch": 0.08, + "grad_norm": 0.4728958715696773, + "learning_rate": 1.9893010053687606e-05, + "loss": 0.2939, "step": 1638 }, { - "epoch": 0.09, - "grad_norm": 0.9336374686602577, - "learning_rate": 1.978512353250057e-05, - "loss": 0.5577, + "epoch": 0.08, + "grad_norm": 1.1689618272043967, + "learning_rate": 1.989279287353524e-05, + "loss": 0.4529, "step": 1639 }, { - "epoch": 0.09, - "grad_norm": 0.35757051249823835, - "learning_rate": 1.978473966485071e-05, - "loss": 0.2641, + "epoch": 0.08, + "grad_norm": 0.8526981173087761, + "learning_rate": 1.9892575474365977e-05, + "loss": 0.5214, "step": 1640 }, { - "epoch": 0.09, - "grad_norm": 0.4439429484546262, - "learning_rate": 1.978435545835574e-05, - "loss": 0.3602, + "epoch": 0.08, + "grad_norm": 0.4527155098074377, + "learning_rate": 1.989235785618463e-05, + "loss": 0.2924, "step": 1641 }, { - "epoch": 0.09, - "grad_norm": 0.3921207587665929, - "learning_rate": 1.978397091302896e-05, - "loss": 0.1743, + "epoch": 0.08, + "grad_norm": 0.3097026637349731, + "learning_rate": 1.9892140018996006e-05, + "loss": 0.1641, "step": 1642 }, { - "epoch": 0.09, - "grad_norm": 0.7618586521311463, - "learning_rate": 1.9783586028883688e-05, - "loss": 0.4513, + "epoch": 0.08, + "grad_norm": 1.0630176550954846, + "learning_rate": 1.9891921962804942e-05, + "loss": 0.5816, "step": 1643 }, { - "epoch": 0.09, - "grad_norm": 0.3751913533863891, - "learning_rate": 1.9783200805933252e-05, - "loss": 0.2761, + "epoch": 0.08, + "grad_norm": 0.5414931523287791, + "learning_rate": 1.989170368761626e-05, + "loss": 0.3299, "step": 1644 }, { - "epoch": 0.09, - "grad_norm": 0.7805939473702508, - "learning_rate": 1.9782815244190997e-05, - "loss": 0.5491, + "epoch": 0.08, + "grad_norm": 0.4976880333796028, + "learning_rate": 1.9891485193434793e-05, + "loss": 0.2925, "step": 1645 }, { - "epoch": 0.09, - "grad_norm": 0.6195714209412733, - "learning_rate": 1.9782429343670267e-05, - "loss": 0.5169, + "epoch": 0.08, + "grad_norm": 1.1646133362805333, + "learning_rate": 1.9891266480265375e-05, + "loss": 0.5288, "step": 1646 }, { - "epoch": 0.09, - "grad_norm": 0.4918147444122574, - "learning_rate": 1.978204310438443e-05, - "loss": 0.2578, + "epoch": 0.08, + "grad_norm": 0.4953483727227327, + "learning_rate": 1.989104754811285e-05, + "loss": 0.2767, "step": 1647 }, { - "epoch": 0.09, - "grad_norm": 0.3435562889326771, - "learning_rate": 1.9781656526346863e-05, - "loss": 0.2275, + "epoch": 0.08, + "grad_norm": 0.3138336748197105, + "learning_rate": 1.9890828396982068e-05, + "loss": 0.1629, "step": 1648 }, { - "epoch": 0.09, - "grad_norm": 0.7237826939442035, - "learning_rate": 1.9781269609570945e-05, - "loss": 0.3185, + "epoch": 0.08, + "grad_norm": 0.5506180846955351, + "learning_rate": 1.9890609026877877e-05, + "loss": 0.3613, "step": 1649 }, { - "epoch": 0.09, - "grad_norm": 0.5178707759432076, - "learning_rate": 1.978088235407009e-05, - "loss": 0.2934, + "epoch": 0.08, + "grad_norm": 0.5467883967262682, + "learning_rate": 1.9890389437805132e-05, + "loss": 0.3132, "step": 1650 }, { - "epoch": 0.09, - "grad_norm": 0.7923454047135651, - "learning_rate": 1.978049475985769e-05, - "loss": 0.4562, + "epoch": 0.08, + "grad_norm": 0.6905715706838206, + "learning_rate": 1.98901696297687e-05, + "loss": 0.427, "step": 1651 }, { - "epoch": 0.09, - "grad_norm": 0.5428509403531181, - "learning_rate": 1.9780106826947184e-05, - "loss": 0.3805, + "epoch": 0.08, + "grad_norm": 1.2294580035663532, + "learning_rate": 1.9889949602773444e-05, + "loss": 0.476, "step": 1652 }, { - "epoch": 0.09, - "grad_norm": 0.45702446485133047, - "learning_rate": 1.9779718555351997e-05, - "loss": 0.2591, + "epoch": 0.08, + "grad_norm": 0.3916650481120573, + "learning_rate": 1.9889729356824236e-05, + "loss": 0.2915, "step": 1653 }, { - "epoch": 0.1, - "grad_norm": 0.36296431359883, - "learning_rate": 1.9779329945085578e-05, - "loss": 0.2195, + "epoch": 0.08, + "grad_norm": 0.38776614631871353, + "learning_rate": 1.9889508891925953e-05, + "loss": 0.2589, "step": 1654 }, { - "epoch": 0.1, - "grad_norm": 0.6704714680728022, - "learning_rate": 1.9778940996161382e-05, - "loss": 0.4751, + "epoch": 0.08, + "grad_norm": 1.4931787681862343, + "learning_rate": 1.9889288208083476e-05, + "loss": 0.3153, "step": 1655 }, { - "epoch": 0.1, - "grad_norm": 0.43102537372482086, - "learning_rate": 1.9778551708592883e-05, - "loss": 0.2816, + "epoch": 0.08, + "grad_norm": 0.5027262591833506, + "learning_rate": 1.9889067305301685e-05, + "loss": 0.2891, "step": 1656 }, { - "epoch": 0.1, - "grad_norm": 0.7987627103272095, - "learning_rate": 1.9778162082393554e-05, - "loss": 0.4378, + "epoch": 0.08, + "grad_norm": 0.6581571670767872, + "learning_rate": 1.9888846183585478e-05, + "loss": 0.378, "step": 1657 }, { - "epoch": 0.1, - "grad_norm": 1.4586157829032407, - "learning_rate": 1.9777772117576893e-05, - "loss": 0.8086, + "epoch": 0.08, + "grad_norm": 0.9304593286937063, + "learning_rate": 1.988862484293975e-05, + "loss": 0.4036, "step": 1658 }, { - "epoch": 0.1, - "grad_norm": 0.5097364905331209, - "learning_rate": 1.977738181415641e-05, - "loss": 0.2658, + "epoch": 0.08, + "grad_norm": 0.45142523695020226, + "learning_rate": 1.9888403283369393e-05, + "loss": 0.3302, "step": 1659 }, { - "epoch": 0.1, - "grad_norm": 0.5945672810230304, - "learning_rate": 1.977699117214561e-05, - "loss": 0.2941, + "epoch": 0.08, + "grad_norm": 0.6149164576347504, + "learning_rate": 1.9888181504879323e-05, + "loss": 0.3209, "step": 1660 }, { - "epoch": 0.1, - "grad_norm": 0.3399558203328304, - "learning_rate": 1.9776600191558025e-05, - "loss": 0.2018, + "epoch": 0.08, + "grad_norm": 0.4547645917702171, + "learning_rate": 1.988795950747444e-05, + "loss": 0.2543, "step": 1661 }, { - "epoch": 0.1, - "grad_norm": 0.6445674144621936, - "learning_rate": 1.9776208872407202e-05, - "loss": 0.3421, + "epoch": 0.08, + "grad_norm": 0.5828886577255246, + "learning_rate": 1.9887737291159665e-05, + "loss": 0.3375, "step": 1662 }, { - "epoch": 0.1, - "grad_norm": 0.7888347334580998, - "learning_rate": 1.9775817214706682e-05, - "loss": 0.3533, + "epoch": 0.08, + "grad_norm": 0.7524311304727527, + "learning_rate": 1.988751485593992e-05, + "loss": 0.4427, "step": 1663 }, { - "epoch": 0.1, - "grad_norm": 0.7188794356920819, - "learning_rate": 1.977542521847003e-05, - "loss": 0.3961, + "epoch": 0.08, + "grad_norm": 0.7206443752254421, + "learning_rate": 1.9887292201820125e-05, + "loss": 0.4471, "step": 1664 }, { - "epoch": 0.1, - "grad_norm": 0.5233002822851152, - "learning_rate": 1.9775032883710826e-05, - "loss": 0.3212, + "epoch": 0.08, + "grad_norm": 0.46760598212826837, + "learning_rate": 1.9887069328805207e-05, + "loss": 0.2397, "step": 1665 }, { - "epoch": 0.1, - "grad_norm": 0.22069561834964382, - "learning_rate": 1.9774640210442654e-05, - "loss": 0.1167, + "epoch": 0.08, + "grad_norm": 0.4200051755598856, + "learning_rate": 1.9886846236900102e-05, + "loss": 0.2593, "step": 1666 }, { - "epoch": 0.1, - "grad_norm": 0.4538299309152852, - "learning_rate": 1.977424719867911e-05, - "loss": 0.3369, + "epoch": 0.08, + "grad_norm": 0.5352550944129146, + "learning_rate": 1.9886622926109755e-05, + "loss": 0.3131, "step": 1667 }, { - "epoch": 0.1, - "grad_norm": 0.5837962589537384, - "learning_rate": 1.9773853848433806e-05, - "loss": 0.3332, + "epoch": 0.08, + "grad_norm": 0.47558925357094656, + "learning_rate": 1.9886399396439104e-05, + "loss": 0.2583, "step": 1668 }, { - "epoch": 0.1, - "grad_norm": 1.2822390523629652, - "learning_rate": 1.9773460159720365e-05, - "loss": 0.499, + "epoch": 0.08, + "grad_norm": 0.5195161085138071, + "learning_rate": 1.9886175647893097e-05, + "loss": 0.375, "step": 1669 }, { - "epoch": 0.1, - "grad_norm": 0.6098981703977231, - "learning_rate": 1.977306613255242e-05, - "loss": 0.3464, + "epoch": 0.08, + "grad_norm": 1.23648971252555, + "learning_rate": 1.9885951680476693e-05, + "loss": 0.6727, "step": 1670 }, { - "epoch": 0.1, - "grad_norm": 0.5010711098035411, - "learning_rate": 1.977267176694361e-05, - "loss": 0.3058, + "epoch": 0.08, + "grad_norm": 0.5432546410343398, + "learning_rate": 1.9885727494194843e-05, + "loss": 0.2455, "step": 1671 }, { - "epoch": 0.1, - "grad_norm": 0.4940495865152775, - "learning_rate": 1.97722770629076e-05, - "loss": 0.3481, + "epoch": 0.08, + "grad_norm": 0.38313882128025817, + "learning_rate": 1.9885503089052517e-05, + "loss": 0.2278, "step": 1672 }, { - "epoch": 0.1, - "grad_norm": 0.6839400527952039, - "learning_rate": 1.9771882020458055e-05, - "loss": 0.3003, + "epoch": 0.08, + "grad_norm": 0.5411188084812046, + "learning_rate": 1.9885278465054682e-05, + "loss": 0.3454, "step": 1673 }, { - "epoch": 0.1, - "grad_norm": 0.4901359588398224, - "learning_rate": 1.9771486639608657e-05, - "loss": 0.3097, + "epoch": 0.08, + "grad_norm": 0.5998430639642894, + "learning_rate": 1.9885053622206305e-05, + "loss": 0.3152, "step": 1674 }, { - "epoch": 0.1, - "grad_norm": 0.38335500215663265, - "learning_rate": 1.9771090920373096e-05, - "loss": 0.2784, + "epoch": 0.08, + "grad_norm": 0.8232777457317849, + "learning_rate": 1.988482856051237e-05, + "loss": 0.4278, "step": 1675 }, { - "epoch": 0.1, - "grad_norm": 1.0457668872874353, - "learning_rate": 1.9770694862765077e-05, - "loss": 0.6059, + "epoch": 0.08, + "grad_norm": 1.2179614324615915, + "learning_rate": 1.9884603279977858e-05, + "loss": 0.7844, "step": 1676 }, { - "epoch": 0.1, - "grad_norm": 0.4529475843166634, - "learning_rate": 1.9770298466798316e-05, - "loss": 0.2996, + "epoch": 0.08, + "grad_norm": 0.48192141350880985, + "learning_rate": 1.9884377780607755e-05, + "loss": 0.3221, "step": 1677 }, { - "epoch": 0.1, - "grad_norm": 0.9647389731875299, - "learning_rate": 1.9769901732486542e-05, - "loss": 0.545, + "epoch": 0.08, + "grad_norm": 0.5357704901220921, + "learning_rate": 1.9884152062407057e-05, + "loss": 0.299, "step": 1678 }, { - "epoch": 0.1, - "grad_norm": 0.4138833322271508, - "learning_rate": 1.9769504659843486e-05, - "loss": 0.3303, + "epoch": 0.08, + "grad_norm": 0.44068915798364067, + "learning_rate": 1.9883926125380757e-05, + "loss": 0.2689, "step": 1679 }, { - "epoch": 0.1, - "grad_norm": 0.37714688186515205, - "learning_rate": 1.9769107248882904e-05, - "loss": 0.2794, + "epoch": 0.08, + "grad_norm": 0.6731271429884765, + "learning_rate": 1.988369996953386e-05, + "loss": 0.4098, "step": 1680 }, { - "epoch": 0.1, - "grad_norm": 0.539646531653099, - "learning_rate": 1.976870949961856e-05, - "loss": 0.2455, + "epoch": 0.08, + "grad_norm": 0.5144584101710767, + "learning_rate": 1.988347359487137e-05, + "loss": 0.2695, "step": 1681 }, { - "epoch": 0.1, - "grad_norm": 1.9706385236557056, - "learning_rate": 1.9768311412064224e-05, - "loss": 0.8027, + "epoch": 0.08, + "grad_norm": 1.4463496848907313, + "learning_rate": 1.9883247001398304e-05, + "loss": 0.7756, "step": 1682 }, { - "epoch": 0.1, - "grad_norm": 0.4506680655535206, - "learning_rate": 1.9767912986233685e-05, - "loss": 0.2213, + "epoch": 0.08, + "grad_norm": 0.9946146263286346, + "learning_rate": 1.988302018911967e-05, + "loss": 0.4598, "step": 1683 }, { - "epoch": 0.1, - "grad_norm": 0.4818504309643764, - "learning_rate": 1.976751422214074e-05, - "loss": 0.3729, + "epoch": 0.08, + "grad_norm": 0.3525617375334301, + "learning_rate": 1.9882793158040498e-05, + "loss": 0.2057, "step": 1684 }, { - "epoch": 0.1, - "grad_norm": 0.6867809896350154, - "learning_rate": 1.9767115119799197e-05, - "loss": 0.5068, + "epoch": 0.08, + "grad_norm": 0.5596522464997875, + "learning_rate": 1.9882565908165808e-05, + "loss": 0.3088, "step": 1685 }, { - "epoch": 0.1, - "grad_norm": 0.4081668174851612, - "learning_rate": 1.9766715679222875e-05, - "loss": 0.2312, + "epoch": 0.08, + "grad_norm": 1.0155448381599104, + "learning_rate": 1.9882338439500634e-05, + "loss": 0.5071, "step": 1686 }, { - "epoch": 0.1, - "grad_norm": 0.3395615177013707, - "learning_rate": 1.9766315900425613e-05, - "loss": 0.2423, + "epoch": 0.08, + "grad_norm": 0.49575001764748106, + "learning_rate": 1.9882110752050015e-05, + "loss": 0.2744, "step": 1687 }, { - "epoch": 0.1, - "grad_norm": 1.7966060680798426, - "learning_rate": 1.976591578342125e-05, - "loss": 0.8855, + "epoch": 0.08, + "grad_norm": 1.6863284735308244, + "learning_rate": 1.9881882845818987e-05, + "loss": 0.8465, "step": 1688 }, { - "epoch": 0.1, - "grad_norm": 0.380188460807513, - "learning_rate": 1.9765515328223644e-05, - "loss": 0.2262, + "epoch": 0.08, + "grad_norm": 0.40225741232124107, + "learning_rate": 1.9881654720812594e-05, + "loss": 0.2899, "step": 1689 }, { - "epoch": 0.1, - "grad_norm": 0.7703079045710871, - "learning_rate": 1.9765114534846662e-05, - "loss": 0.5256, + "epoch": 0.08, + "grad_norm": 0.6141325518413121, + "learning_rate": 1.9881426377035897e-05, + "loss": 0.3594, "step": 1690 }, { - "epoch": 0.1, - "grad_norm": 0.5143745240081559, - "learning_rate": 1.9764713403304183e-05, - "loss": 0.3954, + "epoch": 0.08, + "grad_norm": 1.069362132590462, + "learning_rate": 1.9881197814493936e-05, + "loss": 0.4805, "step": 1691 }, { - "epoch": 0.1, - "grad_norm": 0.46836814781278047, - "learning_rate": 1.97643119336101e-05, - "loss": 0.2407, + "epoch": 0.08, + "grad_norm": 0.4252447823942229, + "learning_rate": 1.9880969033191782e-05, + "loss": 0.2438, "step": 1692 }, { - "epoch": 0.1, - "grad_norm": 0.3690528313572445, - "learning_rate": 1.976391012577831e-05, - "loss": 0.1872, + "epoch": 0.08, + "grad_norm": 0.40439029522586517, + "learning_rate": 1.98807400331345e-05, + "loss": 0.2649, "step": 1693 }, { - "epoch": 0.1, - "grad_norm": 1.2780455908200286, - "learning_rate": 1.9763507979822737e-05, - "loss": 0.7456, + "epoch": 0.08, + "grad_norm": 1.1276575940889462, + "learning_rate": 1.988051081432715e-05, + "loss": 0.6103, "step": 1694 }, { - "epoch": 0.1, - "grad_norm": 0.4214444867559315, - "learning_rate": 1.97631054957573e-05, - "loss": 0.2931, + "epoch": 0.08, + "grad_norm": 0.7899311739463363, + "learning_rate": 1.988028137677482e-05, + "loss": 0.368, "step": 1695 }, { - "epoch": 0.1, - "grad_norm": 0.5189345658634478, - "learning_rate": 1.9762702673595943e-05, - "loss": 0.3177, + "epoch": 0.08, + "grad_norm": 0.5751096057935386, + "learning_rate": 1.9880051720482578e-05, + "loss": 0.3401, "step": 1696 }, { - "epoch": 0.1, - "grad_norm": 0.880757909211329, - "learning_rate": 1.9762299513352604e-05, - "loss": 0.5314, + "epoch": 0.08, + "grad_norm": 0.6877863918955138, + "learning_rate": 1.9879821845455518e-05, + "loss": 0.3739, "step": 1697 }, { - "epoch": 0.1, - "grad_norm": 0.4845554317665074, - "learning_rate": 1.976189601504126e-05, - "loss": 0.2937, + "epoch": 0.08, + "grad_norm": 0.31648028887624996, + "learning_rate": 1.9879591751698722e-05, + "loss": 0.1836, "step": 1698 }, { - "epoch": 0.1, - "grad_norm": 0.30040774511408, - "learning_rate": 1.9761492178675876e-05, - "loss": 0.1939, + "epoch": 0.08, + "grad_norm": 0.523113514850932, + "learning_rate": 1.9879361439217284e-05, + "loss": 0.3132, "step": 1699 }, { - "epoch": 0.1, - "grad_norm": 1.1642293277701257, - "learning_rate": 1.9761088004270435e-05, - "loss": 0.7888, + "epoch": 0.08, + "grad_norm": 0.7125821880293861, + "learning_rate": 1.987913090801631e-05, + "loss": 0.4693, "step": 1700 }, { - "epoch": 0.1, - "grad_norm": 0.43107411175540156, - "learning_rate": 1.976068349183894e-05, - "loss": 0.2908, + "epoch": 0.08, + "grad_norm": 0.5035830238003098, + "learning_rate": 1.98789001581009e-05, + "loss": 0.2794, "step": 1701 }, { - "epoch": 0.1, - "grad_norm": 0.7767979561549465, - "learning_rate": 1.9760278641395395e-05, - "loss": 0.447, + "epoch": 0.08, + "grad_norm": 0.5551740711498908, + "learning_rate": 1.987866918947616e-05, + "loss": 0.3586, "step": 1702 }, { - "epoch": 0.1, - "grad_norm": 0.562402325785457, - "learning_rate": 1.9759873452953816e-05, - "loss": 0.3703, + "epoch": 0.08, + "grad_norm": 0.7045511744517999, + "learning_rate": 1.98784380021472e-05, + "loss": 0.5288, "step": 1703 }, { - "epoch": 0.1, - "grad_norm": 0.4995224833626151, - "learning_rate": 1.9759467926528242e-05, - "loss": 0.3042, + "epoch": 0.08, + "grad_norm": 0.39000663888317577, + "learning_rate": 1.987820659611915e-05, + "loss": 0.1985, "step": 1704 }, { - "epoch": 0.1, - "grad_norm": 0.27285547576376873, - "learning_rate": 1.975906206213271e-05, - "loss": 0.1633, + "epoch": 0.08, + "grad_norm": 0.36800394086173355, + "learning_rate": 1.9877974971397127e-05, + "loss": 0.2606, "step": 1705 }, { - "epoch": 0.1, - "grad_norm": 0.9079324937335543, - "learning_rate": 1.9758655859781282e-05, - "loss": 0.6932, + "epoch": 0.08, + "grad_norm": 1.725130084538193, + "learning_rate": 1.9877743127986257e-05, + "loss": 0.8952, "step": 1706 }, { - "epoch": 0.1, - "grad_norm": 0.40677426359666474, - "learning_rate": 1.975824931948802e-05, - "loss": 0.2871, + "epoch": 0.08, + "grad_norm": 0.649753449726772, + "learning_rate": 1.9877511065891676e-05, + "loss": 0.3451, "step": 1707 }, { - "epoch": 0.1, - "grad_norm": 0.45580786137028356, - "learning_rate": 1.975784244126701e-05, - "loss": 0.3893, + "epoch": 0.08, + "grad_norm": 0.5207297072375531, + "learning_rate": 1.987727878511852e-05, + "loss": 0.3086, "step": 1708 }, { - "epoch": 0.1, - "grad_norm": 0.9194512800758327, - "learning_rate": 1.9757435225132325e-05, - "loss": 0.3548, + "epoch": 0.08, + "grad_norm": 0.6524409046184836, + "learning_rate": 1.987704628567193e-05, + "loss": 0.4395, "step": 1709 }, { - "epoch": 0.1, - "grad_norm": 0.44115743053683315, - "learning_rate": 1.9757027671098083e-05, - "loss": 0.3031, + "epoch": 0.08, + "grad_norm": 0.4795991306948634, + "learning_rate": 1.9876813567557054e-05, + "loss": 0.2167, "step": 1710 }, { - "epoch": 0.1, - "grad_norm": 0.2825713405110046, - "learning_rate": 1.9756619779178393e-05, - "loss": 0.2427, + "epoch": 0.08, + "grad_norm": 0.4254843411252256, + "learning_rate": 1.9876580630779047e-05, + "loss": 0.2845, "step": 1711 }, { - "epoch": 0.1, - "grad_norm": 1.1849133538579537, - "learning_rate": 1.9756211549387378e-05, - "loss": 0.4944, + "epoch": 0.08, + "grad_norm": 0.721172479639759, + "learning_rate": 1.9876347475343062e-05, + "loss": 0.4632, "step": 1712 }, { - "epoch": 0.1, - "grad_norm": 0.36690700948942095, - "learning_rate": 1.975580298173918e-05, - "loss": 0.3057, + "epoch": 0.08, + "grad_norm": 0.3624708519004964, + "learning_rate": 1.9876114101254264e-05, + "loss": 0.2649, "step": 1713 }, { - "epoch": 0.1, - "grad_norm": 0.7245805282332856, - "learning_rate": 1.975539407624794e-05, - "loss": 0.5556, + "epoch": 0.08, + "grad_norm": 0.4770494234188718, + "learning_rate": 1.9875880508517818e-05, + "loss": 0.2813, "step": 1714 }, { - "epoch": 0.1, - "grad_norm": 0.4612175857613643, - "learning_rate": 1.9754984832927825e-05, - "loss": 0.3085, + "epoch": 0.08, + "grad_norm": 0.7501680679197721, + "learning_rate": 1.9875646697138896e-05, + "loss": 0.5356, "step": 1715 }, { - "epoch": 0.1, - "grad_norm": 0.44090499684018897, - "learning_rate": 1.9754575251793006e-05, - "loss": 0.3089, + "epoch": 0.08, + "grad_norm": 0.513524764590981, + "learning_rate": 1.9875412667122674e-05, + "loss": 0.3498, "step": 1716 }, { - "epoch": 0.1, - "grad_norm": 0.5288877897785201, - "learning_rate": 1.975416533285766e-05, - "loss": 0.3071, + "epoch": 0.08, + "grad_norm": 0.3873540607627214, + "learning_rate": 1.9875178418474336e-05, + "loss": 0.1962, "step": 1717 }, { - "epoch": 0.1, - "grad_norm": 1.3106501640407626, - "learning_rate": 1.9753755076135994e-05, - "loss": 0.587, + "epoch": 0.08, + "grad_norm": 0.6252522171839381, + "learning_rate": 1.9874943951199063e-05, + "loss": 0.3652, "step": 1718 }, { - "epoch": 0.1, - "grad_norm": 0.3528842608221892, - "learning_rate": 1.9753344481642205e-05, - "loss": 0.2396, + "epoch": 0.08, + "grad_norm": 0.9612689489621633, + "learning_rate": 1.987470926530205e-05, + "loss": 0.542, "step": 1719 }, { - "epoch": 0.1, - "grad_norm": 0.6296848852224964, - "learning_rate": 1.975293354939052e-05, - "loss": 0.4465, + "epoch": 0.08, + "grad_norm": 0.4203414924716156, + "learning_rate": 1.9874474360788493e-05, + "loss": 0.2743, "step": 1720 }, { - "epoch": 0.1, - "grad_norm": 0.5365509567706639, - "learning_rate": 1.975252227939516e-05, - "loss": 0.2626, + "epoch": 0.08, + "grad_norm": 0.5713223355242646, + "learning_rate": 1.9874239237663588e-05, + "loss": 0.3982, "step": 1721 }, { - "epoch": 0.1, - "grad_norm": 0.3821923286062335, - "learning_rate": 1.9752110671670375e-05, - "loss": 0.1844, + "epoch": 0.08, + "grad_norm": 1.5007293139938762, + "learning_rate": 1.9874003895932544e-05, + "loss": 0.722, "step": 1722 }, { - "epoch": 0.1, - "grad_norm": 0.40328934838267844, - "learning_rate": 1.975169872623042e-05, - "loss": 0.3276, + "epoch": 0.08, + "grad_norm": 0.4415202174568797, + "learning_rate": 1.987376833560057e-05, + "loss": 0.2498, "step": 1723 }, { - "epoch": 0.1, - "grad_norm": 0.9932472840383428, - "learning_rate": 1.9751286443089555e-05, - "loss": 0.5172, + "epoch": 0.08, + "grad_norm": 0.6759396856638009, + "learning_rate": 1.987353255667288e-05, + "loss": 0.389, "step": 1724 }, { - "epoch": 0.1, - "grad_norm": 0.39449755648213397, - "learning_rate": 1.975087382226206e-05, - "loss": 0.2579, + "epoch": 0.08, + "grad_norm": 0.5071586810012434, + "learning_rate": 1.98732965591547e-05, + "loss": 0.3577, "step": 1725 }, { - "epoch": 0.1, - "grad_norm": 0.34791684251849897, - "learning_rate": 1.9750460863762225e-05, - "loss": 0.2135, + "epoch": 0.08, + "grad_norm": 0.4968893859447733, + "learning_rate": 1.9873060343051246e-05, + "loss": 0.3081, "step": 1726 }, { - "epoch": 0.1, - "grad_norm": 0.4824628574743985, - "learning_rate": 1.9750047567604348e-05, - "loss": 0.3734, + "epoch": 0.08, + "grad_norm": 0.3893735651853771, + "learning_rate": 1.987282390836775e-05, + "loss": 0.1274, "step": 1727 }, { - "epoch": 0.1, - "grad_norm": 0.45556148548025616, - "learning_rate": 1.9749633933802743e-05, - "loss": 0.22, + "epoch": 0.08, + "grad_norm": 0.5759343988896747, + "learning_rate": 1.9872587255109455e-05, + "loss": 0.3751, "step": 1728 }, { - "epoch": 0.1, - "grad_norm": 0.6722527683545421, - "learning_rate": 1.9749219962371736e-05, - "loss": 0.4172, + "epoch": 0.08, + "grad_norm": 0.4852073022232205, + "learning_rate": 1.9872350383281588e-05, + "loss": 0.2954, "step": 1729 }, { - "epoch": 0.1, - "grad_norm": 1.3213518952540688, - "learning_rate": 1.9748805653325657e-05, - "loss": 0.5264, + "epoch": 0.08, + "grad_norm": 0.9299259179887859, + "learning_rate": 1.9872113292889402e-05, + "loss": 0.5046, "step": 1730 }, { - "epoch": 0.1, - "grad_norm": 0.43129412639939235, - "learning_rate": 1.974839100667886e-05, - "loss": 0.3006, + "epoch": 0.08, + "grad_norm": 0.48448437195419847, + "learning_rate": 1.987187598393814e-05, + "loss": 0.368, "step": 1731 }, { - "epoch": 0.1, - "grad_norm": 0.5744145659725003, - "learning_rate": 1.97479760224457e-05, - "loss": 0.3189, + "epoch": 0.08, + "grad_norm": 0.5847601671824171, + "learning_rate": 1.987163845643306e-05, + "loss": 0.3299, "step": 1732 }, { - "epoch": 0.1, - "grad_norm": 0.28489126228750195, - "learning_rate": 1.9747560700640552e-05, - "loss": 0.1805, + "epoch": 0.08, + "grad_norm": 0.45951701738600986, + "learning_rate": 1.987140071037942e-05, + "loss": 0.2686, "step": 1733 }, { - "epoch": 0.1, - "grad_norm": 0.4691600450533372, - "learning_rate": 1.97471450412778e-05, - "loss": 0.2848, + "epoch": 0.08, + "grad_norm": 0.950985944503552, + "learning_rate": 1.987116274578248e-05, + "loss": 0.5239, "step": 1734 }, { - "epoch": 0.1, - "grad_norm": 0.48444296804977377, - "learning_rate": 1.9746729044371826e-05, - "loss": 0.2677, + "epoch": 0.08, + "grad_norm": 0.480880458174946, + "learning_rate": 1.9870924562647512e-05, + "loss": 0.3316, "step": 1735 }, { - "epoch": 0.1, - "grad_norm": 1.6659386268209997, - "learning_rate": 1.9746312709937047e-05, - "loss": 0.5739, + "epoch": 0.08, + "grad_norm": 0.48103525747567766, + "learning_rate": 1.9870686160979785e-05, + "loss": 0.3294, "step": 1736 }, { - "epoch": 0.1, - "grad_norm": 0.4936719526225396, - "learning_rate": 1.974589603798788e-05, - "loss": 0.3041, + "epoch": 0.08, + "grad_norm": 0.7709234986862589, + "learning_rate": 1.9870447540784584e-05, + "loss": 0.541, "step": 1737 }, { - "epoch": 0.1, - "grad_norm": 0.34597533311036405, - "learning_rate": 1.974547902853875e-05, - "loss": 0.2043, + "epoch": 0.08, + "grad_norm": 0.45204059827988025, + "learning_rate": 1.9870208702067185e-05, + "loss": 0.2738, "step": 1738 }, { - "epoch": 0.1, - "grad_norm": 0.38018595031990254, - "learning_rate": 1.9745061681604104e-05, - "loss": 0.3154, + "epoch": 0.08, + "grad_norm": 0.42987515861131753, + "learning_rate": 1.986996964483288e-05, + "loss": 0.1659, "step": 1739 }, { - "epoch": 0.1, - "grad_norm": 0.8001416151657222, - "learning_rate": 1.974464399719839e-05, - "loss": 0.461, + "epoch": 0.08, + "grad_norm": 0.5177532861292381, + "learning_rate": 1.9869730369086962e-05, + "loss": 0.3035, "step": 1740 }, { - "epoch": 0.1, - "grad_norm": 0.5315768234902671, - "learning_rate": 1.974422597533607e-05, - "loss": 0.3053, + "epoch": 0.08, + "grad_norm": 0.4789341495917472, + "learning_rate": 1.9869490874834723e-05, + "loss": 0.3422, "step": 1741 }, { - "epoch": 0.1, - "grad_norm": 0.5469926220469847, - "learning_rate": 1.9743807616031624e-05, - "loss": 0.3805, + "epoch": 0.08, + "grad_norm": 0.9670744592369863, + "learning_rate": 1.9869251162081468e-05, + "loss": 0.5874, "step": 1742 }, { - "epoch": 0.1, - "grad_norm": 0.7185184120264385, - "learning_rate": 1.974338891929954e-05, - "loss": 0.3472, + "epoch": 0.08, + "grad_norm": 0.7457490954902003, + "learning_rate": 1.9869011230832503e-05, + "loss": 0.4634, "step": 1743 }, { - "epoch": 0.1, - "grad_norm": 0.5697328626613448, - "learning_rate": 1.9742969885154318e-05, - "loss": 0.3864, + "epoch": 0.08, + "grad_norm": 0.4377343714746419, + "learning_rate": 1.9868771081093145e-05, + "loss": 0.3087, "step": 1744 }, { - "epoch": 0.1, - "grad_norm": 0.2519361023605971, - "learning_rate": 1.974255051361047e-05, - "loss": 0.076, + "epoch": 0.08, + "grad_norm": 0.3930404623250186, + "learning_rate": 1.9868530712868705e-05, + "loss": 0.2206, "step": 1745 }, { - "epoch": 0.1, - "grad_norm": 0.5995345198599, - "learning_rate": 1.974213080468251e-05, - "loss": 0.3818, + "epoch": 0.08, + "grad_norm": 1.6604068065097912, + "learning_rate": 1.9868290126164507e-05, + "loss": 0.4942, "step": 1746 }, { - "epoch": 0.1, - "grad_norm": 0.40605771267747187, - "learning_rate": 1.9741710758384985e-05, - "loss": 0.3281, + "epoch": 0.08, + "grad_norm": 0.4944818233324763, + "learning_rate": 1.9868049320985877e-05, + "loss": 0.2795, "step": 1747 }, { - "epoch": 0.1, - "grad_norm": 0.852458852226233, - "learning_rate": 1.9741290374732434e-05, - "loss": 0.4804, + "epoch": 0.08, + "grad_norm": 0.5084430788038516, + "learning_rate": 1.9867808297338148e-05, + "loss": 0.3857, "step": 1748 }, { - "epoch": 0.1, - "grad_norm": 0.6862694898087204, - "learning_rate": 1.9740869653739413e-05, - "loss": 0.4156, + "epoch": 0.08, + "grad_norm": 0.9046739113643756, + "learning_rate": 1.9867567055226652e-05, + "loss": 0.4903, "step": 1749 }, { - "epoch": 0.1, - "grad_norm": 0.5487997218358596, - "learning_rate": 1.97404485954205e-05, - "loss": 0.3663, + "epoch": 0.08, + "grad_norm": 0.5544441773985028, + "learning_rate": 1.986732559465673e-05, + "loss": 0.2935, "step": 1750 }, { - "epoch": 0.1, - "grad_norm": 0.303352834458015, - "learning_rate": 1.974002719979027e-05, - "loss": 0.1821, + "epoch": 0.08, + "grad_norm": 0.38825085950069643, + "learning_rate": 1.986708391563373e-05, + "loss": 0.1905, "step": 1751 }, { - "epoch": 0.1, - "grad_norm": 0.5481110458066554, - "learning_rate": 1.973960546686331e-05, - "loss": 0.3175, + "epoch": 0.08, + "grad_norm": 0.6985435776079773, + "learning_rate": 1.9866842018163e-05, + "loss": 0.3823, "step": 1752 }, { - "epoch": 0.1, - "grad_norm": 0.6217773627623907, - "learning_rate": 1.973918339665424e-05, - "loss": 0.4053, + "epoch": 0.08, + "grad_norm": 0.5052266103402563, + "learning_rate": 1.9866599902249905e-05, + "loss": 0.2225, "step": 1753 }, { - "epoch": 0.1, - "grad_norm": 0.4157060592887071, - "learning_rate": 1.9738760989177665e-05, - "loss": 0.3342, + "epoch": 0.08, + "grad_norm": 1.1630305720705896, + "learning_rate": 1.986635756789979e-05, + "loss": 0.5532, "step": 1754 }, { - "epoch": 0.1, - "grad_norm": 0.48771030560903766, - "learning_rate": 1.9738338244448214e-05, - "loss": 0.3222, + "epoch": 0.08, + "grad_norm": 1.9834602718784935, + "learning_rate": 1.9866115015118034e-05, + "loss": 0.7262, "step": 1755 }, { - "epoch": 0.1, - "grad_norm": 0.5874946088172913, - "learning_rate": 1.9737915162480527e-05, - "loss": 0.4358, + "epoch": 0.08, + "grad_norm": 0.5009715796752674, + "learning_rate": 1.9865872243909997e-05, + "loss": 0.2629, "step": 1756 }, { - "epoch": 0.1, - "grad_norm": 0.37947933594438027, - "learning_rate": 1.973749174328926e-05, - "loss": 0.2179, + "epoch": 0.08, + "grad_norm": 0.39269810813349826, + "learning_rate": 1.986562925428106e-05, + "loss": 0.2608, "step": 1757 }, { - "epoch": 0.1, - "grad_norm": 0.4789152218360017, - "learning_rate": 1.9737067986889072e-05, - "loss": 0.3133, + "epoch": 0.08, + "grad_norm": 1.8067721336369729, + "learning_rate": 1.9865386046236597e-05, + "loss": 0.7001, "step": 1758 }, { - "epoch": 0.1, - "grad_norm": 0.45954057561314693, - "learning_rate": 1.973664389329464e-05, - "loss": 0.3215, + "epoch": 0.08, + "grad_norm": 0.9627950670229396, + "learning_rate": 1.9865142619781996e-05, + "loss": 0.3014, "step": 1759 }, { - "epoch": 0.1, - "grad_norm": 1.0575391871829922, - "learning_rate": 1.9736219462520645e-05, - "loss": 0.6064, + "epoch": 0.08, + "grad_norm": 0.9837848782304732, + "learning_rate": 1.9864898974922645e-05, + "loss": 0.4019, "step": 1760 }, { - "epoch": 0.1, - "grad_norm": 0.5316241369605608, - "learning_rate": 1.973579469458179e-05, - "loss": 0.2216, + "epoch": 0.08, + "grad_norm": 1.7191488952086396, + "learning_rate": 1.9864655111663943e-05, + "loss": 0.8414, "step": 1761 }, { - "epoch": 0.1, - "grad_norm": 0.5200064785449022, - "learning_rate": 1.9735369589492786e-05, - "loss": 0.3547, + "epoch": 0.08, + "grad_norm": 0.5333491767759412, + "learning_rate": 1.9864411030011278e-05, + "loss": 0.2293, "step": 1762 }, { - "epoch": 0.1, - "grad_norm": 0.3661703504092863, - "learning_rate": 1.973494414726835e-05, - "loss": 0.2464, + "epoch": 0.08, + "grad_norm": 0.4167151639940066, + "learning_rate": 1.9864166729970066e-05, + "loss": 0.1753, "step": 1763 }, { - "epoch": 0.1, - "grad_norm": 0.6885337064692816, - "learning_rate": 1.9734518367923216e-05, - "loss": 0.3472, + "epoch": 0.08, + "grad_norm": 0.6035918577050162, + "learning_rate": 1.9863922211545708e-05, + "loss": 0.3739, "step": 1764 }, { - "epoch": 0.1, - "grad_norm": 0.44627825575802316, - "learning_rate": 1.973409225147213e-05, - "loss": 0.3789, + "epoch": 0.08, + "grad_norm": 0.6751172389225426, + "learning_rate": 1.9863677474743618e-05, + "loss": 0.3295, "step": 1765 }, { - "epoch": 0.1, - "grad_norm": 0.5653737927716992, - "learning_rate": 1.973366579792985e-05, - "loss": 0.3915, + "epoch": 0.08, + "grad_norm": 1.8045785028446848, + "learning_rate": 1.9863432519569215e-05, + "loss": 0.5187, "step": 1766 }, { - "epoch": 0.1, - "grad_norm": 0.47244377328827575, - "learning_rate": 1.9733239007311137e-05, - "loss": 0.2816, + "epoch": 0.08, + "grad_norm": 0.6610463641175794, + "learning_rate": 1.9863187346027922e-05, + "loss": 0.4342, "step": 1767 }, { - "epoch": 0.1, - "grad_norm": 0.5118989988502627, - "learning_rate": 1.973281187963078e-05, - "loss": 0.3149, + "epoch": 0.08, + "grad_norm": 0.557196577372603, + "learning_rate": 1.986294195412517e-05, + "loss": 0.3035, "step": 1768 }, { - "epoch": 0.1, - "grad_norm": 0.8961397366295146, - "learning_rate": 1.9732384414903562e-05, - "loss": 0.5801, + "epoch": 0.08, + "grad_norm": 0.4139781351442848, + "learning_rate": 1.986269634386639e-05, + "loss": 0.1926, "step": 1769 }, { - "epoch": 0.1, - "grad_norm": 0.40915889685555634, - "learning_rate": 1.9731956613144297e-05, - "loss": 0.3385, + "epoch": 0.08, + "grad_norm": 1.1762058447433266, + "learning_rate": 1.9862450515257015e-05, + "loss": 0.547, "step": 1770 }, { - "epoch": 0.1, - "grad_norm": 0.36489091879616176, - "learning_rate": 1.9731528474367787e-05, - "loss": 0.1888, + "epoch": 0.08, + "grad_norm": 0.7487274928627795, + "learning_rate": 1.9862204468302492e-05, + "loss": 0.4105, "step": 1771 }, { - "epoch": 0.1, - "grad_norm": 0.4653678958356583, - "learning_rate": 1.9731099998588865e-05, - "loss": 0.3244, + "epoch": 0.08, + "grad_norm": 0.5146472282125227, + "learning_rate": 1.986195820300827e-05, + "loss": 0.2934, "step": 1772 }, { - "epoch": 0.1, - "grad_norm": 0.7889078900037176, - "learning_rate": 1.973067118582237e-05, - "loss": 0.4141, + "epoch": 0.08, + "grad_norm": 1.3678804523076638, + "learning_rate": 1.9861711719379793e-05, + "loss": 0.7715, "step": 1773 }, { - "epoch": 0.1, - "grad_norm": 0.48457358345367246, - "learning_rate": 1.973024203608315e-05, - "loss": 0.2676, + "epoch": 0.08, + "grad_norm": 0.6364653823076378, + "learning_rate": 1.986146501742253e-05, + "loss": 0.3269, "step": 1774 }, { - "epoch": 0.1, - "grad_norm": 0.5849351746372535, - "learning_rate": 1.9729812549386066e-05, - "loss": 0.3811, + "epoch": 0.08, + "grad_norm": 0.29646971919735887, + "learning_rate": 1.986121809714193e-05, + "loss": 0.1932, "step": 1775 }, { - "epoch": 0.1, - "grad_norm": 0.6100840543617714, - "learning_rate": 1.9729382725745997e-05, - "loss": 0.3586, + "epoch": 0.08, + "grad_norm": 0.5820540971559472, + "learning_rate": 1.986097095854347e-05, + "loss": 0.4151, "step": 1776 }, { - "epoch": 0.1, - "grad_norm": 0.32169239000575955, - "learning_rate": 1.9728952565177817e-05, - "loss": 0.1859, + "epoch": 0.08, + "grad_norm": 0.434658750201213, + "learning_rate": 1.9860723601632616e-05, + "loss": 0.3497, "step": 1777 }, { - "epoch": 0.1, - "grad_norm": 0.4926537571689009, - "learning_rate": 1.972852206769643e-05, - "loss": 0.3794, + "epoch": 0.08, + "grad_norm": 0.688245621447827, + "learning_rate": 1.9860476026414846e-05, + "loss": 0.45, "step": 1778 }, { - "epoch": 0.1, - "grad_norm": 1.4956203678206292, - "learning_rate": 1.972809123331674e-05, - "loss": 0.8704, + "epoch": 0.08, + "grad_norm": 0.5694979366994609, + "learning_rate": 1.986022823289564e-05, + "loss": 0.3346, "step": 1779 }, { - "epoch": 0.1, - "grad_norm": 0.432781530735122, - "learning_rate": 1.972766006205367e-05, - "loss": 0.2187, + "epoch": 0.08, + "grad_norm": 0.44001999226644634, + "learning_rate": 1.9859980221080483e-05, + "loss": 0.3175, "step": 1780 }, { - "epoch": 0.1, - "grad_norm": 0.8836133604422183, - "learning_rate": 1.9727228553922152e-05, - "loss": 0.5072, + "epoch": 0.08, + "grad_norm": 0.6980227695687897, + "learning_rate": 1.9859731990974867e-05, + "loss": 0.3857, "step": 1781 }, { - "epoch": 0.1, - "grad_norm": 0.6000939456264764, - "learning_rate": 1.9726796708937125e-05, - "loss": 0.3967, + "epoch": 0.08, + "grad_norm": 0.3721466893422118, + "learning_rate": 1.985948354258429e-05, + "loss": 0.2584, "step": 1782 }, { - "epoch": 0.1, - "grad_norm": 0.36428401062594407, - "learning_rate": 1.972636452711355e-05, - "loss": 0.2402, + "epoch": 0.08, + "grad_norm": 0.5466344289648984, + "learning_rate": 1.9859234875914247e-05, + "loss": 0.3093, "step": 1783 }, { - "epoch": 0.1, - "grad_norm": 0.35233509057631474, - "learning_rate": 1.9725932008466383e-05, - "loss": 0.1392, + "epoch": 0.08, + "grad_norm": 0.4921616624470089, + "learning_rate": 1.9858985990970252e-05, + "loss": 0.3327, "step": 1784 }, { - "epoch": 0.1, - "grad_norm": 0.7541938557788923, - "learning_rate": 1.9725499153010613e-05, - "loss": 0.4919, + "epoch": 0.08, + "grad_norm": 0.8966039834652199, + "learning_rate": 1.9858736887757802e-05, + "loss": 0.5317, "step": 1785 }, { - "epoch": 0.1, - "grad_norm": 0.4648704692571199, - "learning_rate": 1.9725065960761225e-05, - "loss": 0.3193, + "epoch": 0.08, + "grad_norm": 0.6351571047753894, + "learning_rate": 1.9858487566282424e-05, + "loss": 0.3013, "step": 1786 }, { - "epoch": 0.1, - "grad_norm": 0.7643519431832295, - "learning_rate": 1.9724632431733223e-05, - "loss": 0.3782, + "epoch": 0.08, + "grad_norm": 0.4874977634065661, + "learning_rate": 1.9858238026549634e-05, + "loss": 0.3698, "step": 1787 }, { - "epoch": 0.1, - "grad_norm": 0.4383417741200626, - "learning_rate": 1.9724198565941616e-05, - "loss": 0.2711, + "epoch": 0.08, + "grad_norm": 0.36207404343623495, + "learning_rate": 1.9857988268564955e-05, + "loss": 0.318, "step": 1788 }, { - "epoch": 0.1, - "grad_norm": 0.5752027956523929, - "learning_rate": 1.972376436340143e-05, - "loss": 0.3578, + "epoch": 0.08, + "grad_norm": 0.4828406802128624, + "learning_rate": 1.9857738292333915e-05, + "loss": 0.1871, "step": 1789 }, { - "epoch": 0.1, - "grad_norm": 0.3491638947718094, - "learning_rate": 1.9723329824127703e-05, - "loss": 0.2435, + "epoch": 0.08, + "grad_norm": 0.5878710318490176, + "learning_rate": 1.985748809786205e-05, + "loss": 0.3832, "step": 1790 }, { - "epoch": 0.1, - "grad_norm": 0.9210790563154199, - "learning_rate": 1.9722894948135485e-05, - "loss": 0.5047, + "epoch": 0.08, + "grad_norm": 0.5976773730710854, + "learning_rate": 1.9857237685154897e-05, + "loss": 0.4466, "step": 1791 }, { - "epoch": 0.1, - "grad_norm": 0.4745370515957307, - "learning_rate": 1.972245973543983e-05, - "loss": 0.3136, + "epoch": 0.08, + "grad_norm": 0.4130798822183903, + "learning_rate": 1.9856987054218007e-05, + "loss": 0.2146, "step": 1792 }, { - "epoch": 0.1, - "grad_norm": 0.8457754702291259, - "learning_rate": 1.9722024186055812e-05, - "loss": 0.4412, + "epoch": 0.08, + "grad_norm": 0.45883273817813086, + "learning_rate": 1.985673620505692e-05, + "loss": 0.3917, "step": 1793 }, { - "epoch": 0.1, - "grad_norm": 0.5232986705641869, - "learning_rate": 1.9721588299998516e-05, - "loss": 0.3358, + "epoch": 0.08, + "grad_norm": 0.7021508110705673, + "learning_rate": 1.9856485137677197e-05, + "loss": 0.5366, "step": 1794 }, { - "epoch": 0.1, - "grad_norm": 0.43026069963986585, - "learning_rate": 1.9721152077283038e-05, - "loss": 0.2955, + "epoch": 0.08, + "grad_norm": 0.3291559587571528, + "learning_rate": 1.985623385208439e-05, + "loss": 0.1417, "step": 1795 }, { - "epoch": 0.1, - "grad_norm": 0.3170651313977208, - "learning_rate": 1.9720715517924476e-05, - "loss": 0.2067, + "epoch": 0.08, + "grad_norm": 0.4927305062406124, + "learning_rate": 1.9855982348284066e-05, + "loss": 0.3386, "step": 1796 }, { - "epoch": 0.1, - "grad_norm": 1.055213009657008, - "learning_rate": 1.9720278621937954e-05, - "loss": 0.4355, + "epoch": 0.08, + "grad_norm": 1.3701714066718698, + "learning_rate": 1.985573062628179e-05, + "loss": 0.7233, "step": 1797 }, { - "epoch": 0.1, - "grad_norm": 0.39566676103877046, - "learning_rate": 1.9719841389338605e-05, - "loss": 0.2776, + "epoch": 0.08, + "grad_norm": 0.435791366350211, + "learning_rate": 1.985547868608314e-05, + "loss": 0.2557, "step": 1798 }, { - "epoch": 0.1, - "grad_norm": 0.45559232573659136, - "learning_rate": 1.9719403820141563e-05, - "loss": 0.4138, + "epoch": 0.08, + "grad_norm": 0.6093149419417401, + "learning_rate": 1.9855226527693693e-05, + "loss": 0.4452, "step": 1799 }, { - "epoch": 0.1, - "grad_norm": 0.9887763688604866, - "learning_rate": 1.971896591436199e-05, - "loss": 0.2925, + "epoch": 0.08, + "grad_norm": 0.5144921774910622, + "learning_rate": 1.9854974151119027e-05, + "loss": 0.386, "step": 1800 }, { - "epoch": 0.1, - "grad_norm": 0.3638885992702867, - "learning_rate": 1.971852767201504e-05, - "loss": 0.2546, + "epoch": 0.08, + "grad_norm": 0.4209592504991791, + "learning_rate": 1.9854721556364734e-05, + "loss": 0.1833, "step": 1801 }, { - "epoch": 0.1, - "grad_norm": 0.6125066940881474, - "learning_rate": 1.97180890931159e-05, - "loss": 0.4316, + "epoch": 0.08, + "grad_norm": 0.6771450206752724, + "learning_rate": 1.98544687434364e-05, + "loss": 0.4158, "step": 1802 }, { - "epoch": 0.1, - "grad_norm": 0.645523555658483, - "learning_rate": 1.971765017767975e-05, - "loss": 0.3526, + "epoch": 0.08, + "grad_norm": 0.3667182150744982, + "learning_rate": 1.9854215712339626e-05, + "loss": 0.2946, "step": 1803 }, { - "epoch": 0.1, - "grad_norm": 0.48292600807911545, - "learning_rate": 1.9717210925721792e-05, - "loss": 0.3628, + "epoch": 0.08, + "grad_norm": 0.7983659536596921, + "learning_rate": 1.9853962463080013e-05, + "loss": 0.4547, "step": 1804 }, { - "epoch": 0.1, - "grad_norm": 0.510701994227429, - "learning_rate": 1.9716771337257235e-05, - "loss": 0.2944, + "epoch": 0.08, + "grad_norm": 0.42993724589799986, + "learning_rate": 1.985370899566317e-05, + "loss": 0.3045, "step": 1805 }, { - "epoch": 0.1, - "grad_norm": 0.512257770671276, - "learning_rate": 1.9716331412301304e-05, - "loss": 0.3456, + "epoch": 0.08, + "grad_norm": 0.890410847893566, + "learning_rate": 1.985345531009471e-05, + "loss": 0.5417, "step": 1806 }, { - "epoch": 0.1, - "grad_norm": 0.3723557207434571, - "learning_rate": 1.971589115086924e-05, - "loss": 0.2188, + "epoch": 0.08, + "grad_norm": 0.5845268120611091, + "learning_rate": 1.9853201406380243e-05, + "loss": 0.3584, "step": 1807 }, { - "epoch": 0.1, - "grad_norm": 0.4776975335292782, - "learning_rate": 1.971545055297628e-05, - "loss": 0.3271, + "epoch": 0.08, + "grad_norm": 0.4383520410767089, + "learning_rate": 1.9852947284525393e-05, + "loss": 0.2795, "step": 1808 }, { - "epoch": 0.1, - "grad_norm": 0.4671253964539537, - "learning_rate": 1.971500961863768e-05, - "loss": 0.4177, + "epoch": 0.08, + "grad_norm": 0.383028993786552, + "learning_rate": 1.9852692944535786e-05, + "loss": 0.2108, "step": 1809 }, { - "epoch": 0.1, - "grad_norm": 0.4226489406088548, - "learning_rate": 1.9714568347868722e-05, - "loss": 0.2196, + "epoch": 0.08, + "grad_norm": 0.7577384456563768, + "learning_rate": 1.9852438386417058e-05, + "loss": 0.4691, "step": 1810 }, { - "epoch": 0.1, - "grad_norm": 0.36354839297692376, - "learning_rate": 1.9714126740684677e-05, - "loss": 0.3186, + "epoch": 0.08, + "grad_norm": 0.4020921198349881, + "learning_rate": 1.9852183610174833e-05, + "loss": 0.285, "step": 1811 }, { - "epoch": 0.1, - "grad_norm": 1.4618017892562303, - "learning_rate": 1.9713684797100843e-05, - "loss": 0.7304, + "epoch": 0.08, + "grad_norm": 0.5996892203156494, + "learning_rate": 1.9851928615814764e-05, + "loss": 0.3627, "step": 1812 }, { - "epoch": 0.1, - "grad_norm": 0.35343600195296193, - "learning_rate": 1.9713242517132518e-05, - "loss": 0.2205, + "epoch": 0.08, + "grad_norm": 1.0401280549156477, + "learning_rate": 1.9851673403342488e-05, + "loss": 0.5368, "step": 1813 }, { - "epoch": 0.1, - "grad_norm": 0.4373763331330048, - "learning_rate": 1.9712799900795026e-05, - "loss": 0.3891, + "epoch": 0.08, + "grad_norm": 0.3531853456030447, + "learning_rate": 1.985141797276366e-05, + "loss": 0.2163, "step": 1814 }, { - "epoch": 0.1, - "grad_norm": 0.7587560756410427, - "learning_rate": 1.971235694810369e-05, - "loss": 0.5362, + "epoch": 0.08, + "grad_norm": 0.3824840866553206, + "learning_rate": 1.9851162324083933e-05, + "loss": 0.2431, "step": 1815 }, { - "epoch": 0.1, - "grad_norm": 0.38486193172658234, - "learning_rate": 1.971191365907385e-05, - "loss": 0.2663, + "epoch": 0.08, + "grad_norm": 0.5074746833733829, + "learning_rate": 1.9850906457308965e-05, + "loss": 0.3609, "step": 1816 }, { - "epoch": 0.1, - "grad_norm": 0.3515214902888066, - "learning_rate": 1.9711470033720856e-05, - "loss": 0.168, + "epoch": 0.08, + "grad_norm": 0.5263925597285825, + "learning_rate": 1.9850650372444425e-05, + "loss": 0.3741, "step": 1817 }, { - "epoch": 0.1, - "grad_norm": 0.48100670484562935, - "learning_rate": 1.9711026072060077e-05, - "loss": 0.3562, + "epoch": 0.08, + "grad_norm": 0.7174660642026556, + "learning_rate": 1.9850394069495976e-05, + "loss": 0.4505, "step": 1818 }, { - "epoch": 0.1, - "grad_norm": 0.39630337446911107, - "learning_rate": 1.971058177410688e-05, - "loss": 0.2998, + "epoch": 0.08, + "grad_norm": 0.4094659702876812, + "learning_rate": 1.98501375484693e-05, + "loss": 0.3153, "step": 1819 }, { - "epoch": 0.1, - "grad_norm": 0.8004872736078943, - "learning_rate": 1.9710137139876653e-05, - "loss": 0.4614, + "epoch": 0.08, + "grad_norm": 0.5446998550688928, + "learning_rate": 1.984988080937007e-05, + "loss": 0.3498, "step": 1820 }, { - "epoch": 0.1, - "grad_norm": 0.49674968425439975, - "learning_rate": 1.9709692169384794e-05, - "loss": 0.4202, + "epoch": 0.08, + "grad_norm": 0.4845316922040717, + "learning_rate": 1.9849623852203975e-05, + "loss": 0.2985, "step": 1821 }, { - "epoch": 0.1, - "grad_norm": 0.3863426434060569, - "learning_rate": 1.9709246862646712e-05, - "loss": 0.2811, + "epoch": 0.08, + "grad_norm": 0.493235423958602, + "learning_rate": 1.9849366676976697e-05, + "loss": 0.2963, "step": 1822 }, { - "epoch": 0.1, - "grad_norm": 0.22159405934145307, - "learning_rate": 1.9708801219677832e-05, - "loss": 0.0938, + "epoch": 0.08, + "grad_norm": 0.5050256645091589, + "learning_rate": 1.9849109283693937e-05, + "loss": 0.3276, "step": 1823 }, { - "epoch": 0.1, - "grad_norm": 1.116757279703663, - "learning_rate": 1.970835524049358e-05, - "loss": 0.6516, + "epoch": 0.08, + "grad_norm": 0.5386128973100998, + "learning_rate": 1.9848851672361392e-05, + "loss": 0.3148, "step": 1824 }, { - "epoch": 0.1, - "grad_norm": 0.4811222290577782, - "learning_rate": 1.9707908925109405e-05, - "loss": 0.3042, + "epoch": 0.08, + "grad_norm": 1.0259265337866235, + "learning_rate": 1.984859384298476e-05, + "loss": 0.4823, "step": 1825 }, { - "epoch": 0.1, - "grad_norm": 0.4106501961078986, - "learning_rate": 1.9707462273540762e-05, - "loss": 0.3154, + "epoch": 0.08, + "grad_norm": 0.5128208762718018, + "learning_rate": 1.984833579556975e-05, + "loss": 0.3309, "step": 1826 }, { - "epoch": 0.1, - "grad_norm": 0.8876173475980776, - "learning_rate": 1.9707015285803117e-05, - "loss": 0.5606, + "epoch": 0.08, + "grad_norm": 0.6095455133225118, + "learning_rate": 1.9848077530122083e-05, + "loss": 0.4099, "step": 1827 }, { - "epoch": 0.11, - "grad_norm": 0.4763770328566096, - "learning_rate": 1.9706567961911952e-05, - "loss": 0.3034, + "epoch": 0.08, + "grad_norm": 0.4824538325559773, + "learning_rate": 1.984781904664747e-05, + "loss": 0.2529, "step": 1828 }, { - "epoch": 0.11, - "grad_norm": 0.2731838409066813, - "learning_rate": 1.9706120301882755e-05, - "loss": 0.1925, + "epoch": 0.08, + "grad_norm": 0.375963897631843, + "learning_rate": 1.984756034515163e-05, + "loss": 0.2803, "step": 1829 }, { - "epoch": 0.11, - "grad_norm": 0.6050226311696162, - "learning_rate": 1.9705672305731027e-05, - "loss": 0.3795, + "epoch": 0.08, + "grad_norm": 2.4802573941507466, + "learning_rate": 1.98473014256403e-05, + "loss": 0.8471, "step": 1830 }, { - "epoch": 0.11, - "grad_norm": 0.5254253951284472, - "learning_rate": 1.9705223973472288e-05, - "loss": 0.3244, + "epoch": 0.08, + "grad_norm": 0.5463633190470593, + "learning_rate": 1.9847042288119205e-05, + "loss": 0.2898, "step": 1831 }, { - "epoch": 0.11, - "grad_norm": 0.7309290009909968, - "learning_rate": 1.9704775305122057e-05, - "loss": 0.4473, + "epoch": 0.08, + "grad_norm": 0.5258542414770248, + "learning_rate": 1.9846782932594085e-05, + "loss": 0.3369, "step": 1832 }, { - "epoch": 0.11, - "grad_norm": 0.5142034091004006, - "learning_rate": 1.9704326300695874e-05, - "loss": 0.3584, + "epoch": 0.08, + "grad_norm": 0.9534239370229391, + "learning_rate": 1.9846523359070683e-05, + "loss": 0.5282, "step": 1833 }, { - "epoch": 0.11, - "grad_norm": 0.41208408396675505, - "learning_rate": 1.9703876960209292e-05, - "loss": 0.2997, + "epoch": 0.08, + "grad_norm": 0.6923769803438491, + "learning_rate": 1.9846263567554744e-05, + "loss": 0.2063, "step": 1834 }, { - "epoch": 0.11, - "grad_norm": 0.4215025386501301, - "learning_rate": 1.970342728367787e-05, - "loss": 0.207, + "epoch": 0.08, + "grad_norm": 0.60034200594085, + "learning_rate": 1.984600355805202e-05, + "loss": 0.3114, "step": 1835 }, { - "epoch": 0.11, - "grad_norm": 1.1215430532038166, - "learning_rate": 1.9702977271117172e-05, - "loss": 0.3586, + "epoch": 0.08, + "grad_norm": 0.5918890816462327, + "learning_rate": 1.9845743330568265e-05, + "loss": 0.3474, "step": 1836 }, { - "epoch": 0.11, - "grad_norm": 0.40290356090073365, - "learning_rate": 1.9702526922542788e-05, - "loss": 0.3035, + "epoch": 0.08, + "grad_norm": 0.7038753312985933, + "learning_rate": 1.984548288510924e-05, + "loss": 0.2313, "step": 1837 }, { - "epoch": 0.11, - "grad_norm": 0.4444886784774347, - "learning_rate": 1.970207623797032e-05, - "loss": 0.367, + "epoch": 0.08, + "grad_norm": 0.5526065822192627, + "learning_rate": 1.9845222221680717e-05, + "loss": 0.3343, "step": 1838 }, { - "epoch": 0.11, - "grad_norm": 0.7960973226307128, - "learning_rate": 1.9701625217415366e-05, - "loss": 0.4945, + "epoch": 0.08, + "grad_norm": 0.66080857145344, + "learning_rate": 1.984496134028846e-05, + "loss": 0.3968, "step": 1839 }, { - "epoch": 0.11, - "grad_norm": 0.40419674822028395, - "learning_rate": 1.9701173860893547e-05, - "loss": 0.2819, + "epoch": 0.08, + "grad_norm": 1.159336585806815, + "learning_rate": 1.984470024093825e-05, + "loss": 0.6108, "step": 1840 }, { - "epoch": 0.11, - "grad_norm": 0.3928574273644804, - "learning_rate": 1.9700722168420493e-05, - "loss": 0.1876, + "epoch": 0.08, + "grad_norm": 0.5783577561227164, + "learning_rate": 1.9844438923635866e-05, + "loss": 0.1855, "step": 1841 }, { - "epoch": 0.11, - "grad_norm": 0.6832258064420349, - "learning_rate": 1.9700270140011852e-05, - "loss": 0.3169, + "epoch": 0.08, + "grad_norm": 0.7087109712689228, + "learning_rate": 1.9844177388387093e-05, + "loss": 0.3379, "step": 1842 }, { - "epoch": 0.11, - "grad_norm": 0.5857771512420725, - "learning_rate": 1.969981777568327e-05, - "loss": 0.3352, + "epoch": 0.08, + "grad_norm": 0.5127281358865282, + "learning_rate": 1.9843915635197714e-05, + "loss": 0.3677, "step": 1843 }, { - "epoch": 0.11, - "grad_norm": 0.4742736573868894, - "learning_rate": 1.969936507545042e-05, - "loss": 0.3776, + "epoch": 0.08, + "grad_norm": 0.4869597953292276, + "learning_rate": 1.9843653664073535e-05, + "loss": 0.2701, "step": 1844 }, { - "epoch": 0.11, - "grad_norm": 0.448882200382126, - "learning_rate": 1.969891203932897e-05, - "loss": 0.3702, + "epoch": 0.08, + "grad_norm": 1.20001843304987, + "learning_rate": 1.9843391475020352e-05, + "loss": 0.5731, "step": 1845 }, { - "epoch": 0.11, - "grad_norm": 0.3997430046247253, - "learning_rate": 1.9698458667334616e-05, - "loss": 0.2028, + "epoch": 0.08, + "grad_norm": 0.6501968017424933, + "learning_rate": 1.984312906804397e-05, + "loss": 0.2681, "step": 1846 }, { - "epoch": 0.11, - "grad_norm": 0.42185324820247727, - "learning_rate": 1.969800495948305e-05, - "loss": 0.2546, + "epoch": 0.08, + "grad_norm": 0.4616482986061454, + "learning_rate": 1.9842866443150192e-05, + "loss": 0.2688, "step": 1847 }, { - "epoch": 0.11, - "grad_norm": 0.581614305117135, - "learning_rate": 1.9697550915789992e-05, - "loss": 0.3695, + "epoch": 0.08, + "grad_norm": 0.5552073882286912, + "learning_rate": 1.9842603600344844e-05, + "loss": 0.2315, "step": 1848 }, { - "epoch": 0.11, - "grad_norm": 0.46363413685206445, - "learning_rate": 1.9697096536271166e-05, - "loss": 0.2585, + "epoch": 0.08, + "grad_norm": 1.1855278689203255, + "learning_rate": 1.9842340539633734e-05, + "loss": 0.6302, "step": 1849 }, { - "epoch": 0.11, - "grad_norm": 0.42718596651710894, - "learning_rate": 1.96966418209423e-05, - "loss": 0.3225, + "epoch": 0.08, + "grad_norm": 0.5182621508611136, + "learning_rate": 1.984207726102269e-05, + "loss": 0.2915, "step": 1850 }, { - "epoch": 0.11, - "grad_norm": 1.4101683380916672, - "learning_rate": 1.9696186769819146e-05, - "loss": 0.8429, + "epoch": 0.09, + "grad_norm": 0.47268756722408256, + "learning_rate": 1.9841813764517548e-05, + "loss": 0.3682, "step": 1851 }, { - "epoch": 0.11, - "grad_norm": 0.406650262457706, - "learning_rate": 1.9695731382917463e-05, - "loss": 0.2515, + "epoch": 0.09, + "grad_norm": 1.016167795419829, + "learning_rate": 1.984155005012413e-05, + "loss": 0.5965, "step": 1852 }, { - "epoch": 0.11, - "grad_norm": 0.2883048378866706, - "learning_rate": 1.9695275660253017e-05, - "loss": 0.236, + "epoch": 0.09, + "grad_norm": 0.4463406306772212, + "learning_rate": 1.984128611784828e-05, + "loss": 0.3036, "step": 1853 }, { - "epoch": 0.11, - "grad_norm": 0.5570129462774673, - "learning_rate": 1.9694819601841593e-05, - "loss": 0.4075, + "epoch": 0.09, + "grad_norm": 0.35674620848399313, + "learning_rate": 1.984102196769584e-05, + "loss": 0.0996, "step": 1854 }, { - "epoch": 0.11, - "grad_norm": 0.6608487821497272, - "learning_rate": 1.969436320769898e-05, - "loss": 0.324, + "epoch": 0.09, + "grad_norm": 0.63074319011239, + "learning_rate": 1.984075759967266e-05, + "loss": 0.3864, "step": 1855 }, { - "epoch": 0.11, - "grad_norm": 0.695684745170112, - "learning_rate": 1.969390647784099e-05, - "loss": 0.4257, + "epoch": 0.09, + "grad_norm": 0.5936145519888961, + "learning_rate": 1.984049301378459e-05, + "loss": 0.3223, "step": 1856 }, { - "epoch": 0.11, - "grad_norm": 0.42950676535452104, - "learning_rate": 1.9693449412283435e-05, - "loss": 0.3572, + "epoch": 0.09, + "grad_norm": 0.9966760633366543, + "learning_rate": 1.984022821003749e-05, + "loss": 0.3896, "step": 1857 }, { - "epoch": 0.11, - "grad_norm": 0.422128890021055, - "learning_rate": 1.9692992011042143e-05, - "loss": 0.2884, + "epoch": 0.09, + "grad_norm": 0.6550273216994056, + "learning_rate": 1.9839963188437223e-05, + "loss": 0.3966, "step": 1858 }, { - "epoch": 0.11, - "grad_norm": 0.4976931054505805, - "learning_rate": 1.969253427413295e-05, - "loss": 0.3108, + "epoch": 0.09, + "grad_norm": 0.43336951763895726, + "learning_rate": 1.983969794898965e-05, + "loss": 0.2901, "step": 1859 }, { - "epoch": 0.11, - "grad_norm": 0.42506504553438845, - "learning_rate": 1.969207620157172e-05, - "loss": 0.2968, + "epoch": 0.09, + "grad_norm": 0.36139528585867514, + "learning_rate": 1.9839432491700653e-05, + "loss": 0.1936, "step": 1860 }, { - "epoch": 0.11, - "grad_norm": 0.44845845302893456, - "learning_rate": 1.9691617793374304e-05, - "loss": 0.2973, + "epoch": 0.09, + "grad_norm": 0.9222199873128843, + "learning_rate": 1.9839166816576097e-05, + "loss": 0.5312, "step": 1861 }, { - "epoch": 0.11, - "grad_norm": 0.463892625466207, - "learning_rate": 1.969115904955658e-05, - "loss": 0.2725, + "epoch": 0.09, + "grad_norm": 0.4844929993518195, + "learning_rate": 1.983890092362188e-05, + "loss": 0.3177, "step": 1862 }, { - "epoch": 0.11, - "grad_norm": 1.2968732447910472, - "learning_rate": 1.9690699970134436e-05, - "loss": 0.7922, + "epoch": 0.09, + "grad_norm": 0.5090428114844479, + "learning_rate": 1.9838634812843875e-05, + "loss": 0.31, "step": 1863 }, { - "epoch": 0.11, - "grad_norm": 0.7591209376006144, - "learning_rate": 1.9690240555123767e-05, - "loss": 0.4119, + "epoch": 0.09, + "grad_norm": 1.966088143719992, + "learning_rate": 1.9838368484247976e-05, + "loss": 0.8561, "step": 1864 }, { - "epoch": 0.11, - "grad_norm": 0.37942803845107786, - "learning_rate": 1.9689780804540487e-05, - "loss": 0.2586, + "epoch": 0.09, + "grad_norm": 0.49432071929148247, + "learning_rate": 1.9838101937840085e-05, + "loss": 0.3002, "step": 1865 }, { - "epoch": 0.11, - "grad_norm": 0.5185336534105881, - "learning_rate": 1.968932071840051e-05, - "loss": 0.4203, + "epoch": 0.09, + "grad_norm": 0.45064531408217734, + "learning_rate": 1.9837835173626095e-05, + "loss": 0.1859, "step": 1866 }, { - "epoch": 0.11, - "grad_norm": 0.41377909230155135, - "learning_rate": 1.968886029671978e-05, - "loss": 0.2385, + "epoch": 0.09, + "grad_norm": 0.5952836236105027, + "learning_rate": 1.983756819161192e-05, + "loss": 0.3557, "step": 1867 }, { - "epoch": 0.11, - "grad_norm": 0.3525014148154284, - "learning_rate": 1.968839953951423e-05, - "loss": 0.2215, + "epoch": 0.09, + "grad_norm": 0.4480817831374804, + "learning_rate": 1.9837300991803462e-05, + "loss": 0.2876, "step": 1868 }, { - "epoch": 0.11, - "grad_norm": 0.4768967276948403, - "learning_rate": 1.968793844679982e-05, - "loss": 0.3546, + "epoch": 0.09, + "grad_norm": 1.5133015864181774, + "learning_rate": 1.9837033574206644e-05, + "loss": 0.8068, "step": 1869 }, { - "epoch": 0.11, - "grad_norm": 0.5977294660884478, - "learning_rate": 1.9687477018592517e-05, - "loss": 0.3633, + "epoch": 0.09, + "grad_norm": 0.5169006779804324, + "learning_rate": 1.9836765938827385e-05, + "loss": 0.2915, "step": 1870 }, { - "epoch": 0.11, - "grad_norm": 0.4148338563966621, - "learning_rate": 1.9687015254908305e-05, - "loss": 0.375, + "epoch": 0.09, + "grad_norm": 0.49168121245995894, + "learning_rate": 1.9836498085671613e-05, + "loss": 0.2727, "step": 1871 }, { - "epoch": 0.11, - "grad_norm": 0.5783402829003099, - "learning_rate": 1.968655315576317e-05, - "loss": 0.4202, + "epoch": 0.09, + "grad_norm": 0.4821743322605833, + "learning_rate": 1.9836230014745248e-05, + "loss": 0.2206, "step": 1872 }, { - "epoch": 0.11, - "grad_norm": 0.3409097705300037, - "learning_rate": 1.9686090721173118e-05, - "loss": 0.2646, + "epoch": 0.09, + "grad_norm": 0.7327361527476197, + "learning_rate": 1.9835961726054228e-05, + "loss": 0.468, "step": 1873 }, { - "epoch": 0.11, - "grad_norm": 0.3731720064413754, - "learning_rate": 1.9685627951154154e-05, - "loss": 0.259, + "epoch": 0.09, + "grad_norm": 0.5315220831053276, + "learning_rate": 1.98356932196045e-05, + "loss": 0.3028, "step": 1874 }, { - "epoch": 0.11, - "grad_norm": 1.056227121717924, - "learning_rate": 1.9685164845722317e-05, - "loss": 0.6084, + "epoch": 0.09, + "grad_norm": 0.5963145940506028, + "learning_rate": 1.9835424495402004e-05, + "loss": 0.3769, "step": 1875 }, { - "epoch": 0.11, - "grad_norm": 0.47496672745326424, - "learning_rate": 1.9684701404893635e-05, - "loss": 0.331, + "epoch": 0.09, + "grad_norm": 1.1877550815088374, + "learning_rate": 1.983515555345269e-05, + "loss": 0.4056, "step": 1876 }, { - "epoch": 0.11, - "grad_norm": 0.6892217331540745, - "learning_rate": 1.9684237628684162e-05, - "loss": 0.4158, + "epoch": 0.09, + "grad_norm": 0.5433237951246551, + "learning_rate": 1.983488639376251e-05, + "loss": 0.2983, "step": 1877 }, { - "epoch": 0.11, - "grad_norm": 0.5381495786374161, - "learning_rate": 1.9683773517109954e-05, - "loss": 0.3442, + "epoch": 0.09, + "grad_norm": 0.48176949042586076, + "learning_rate": 1.9834617016337424e-05, + "loss": 0.374, "step": 1878 }, { - "epoch": 0.11, - "grad_norm": 0.4442211799150528, - "learning_rate": 1.9683309070187088e-05, - "loss": 0.2373, + "epoch": 0.09, + "grad_norm": 0.4802190161939199, + "learning_rate": 1.9834347421183392e-05, + "loss": 0.3549, "step": 1879 }, { - "epoch": 0.11, - "grad_norm": 0.4516614938674143, - "learning_rate": 1.9682844287931643e-05, - "loss": 0.2681, + "epoch": 0.09, + "grad_norm": 0.3852920657080918, + "learning_rate": 1.9834077608306392e-05, + "loss": 0.205, "step": 1880 }, { - "epoch": 0.11, - "grad_norm": 0.46034638626235896, - "learning_rate": 1.9682379170359717e-05, - "loss": 0.379, + "epoch": 0.09, + "grad_norm": 1.5455878296498895, + "learning_rate": 1.9833807577712393e-05, + "loss": 0.893, "step": 1881 }, { - "epoch": 0.11, - "grad_norm": 0.7113109005722336, - "learning_rate": 1.9681913717487418e-05, - "loss": 0.2923, + "epoch": 0.09, + "grad_norm": 1.1486914008528828, + "learning_rate": 1.983353732940737e-05, + "loss": 0.6357, "step": 1882 }, { - "epoch": 0.11, - "grad_norm": 0.46398582128192567, - "learning_rate": 1.9681447929330864e-05, - "loss": 0.343, + "epoch": 0.09, + "grad_norm": 0.4181837906322127, + "learning_rate": 1.9833266863397304e-05, + "loss": 0.2527, "step": 1883 }, { - "epoch": 0.11, - "grad_norm": 0.8005165026287019, - "learning_rate": 1.9680981805906182e-05, - "loss": 0.5295, + "epoch": 0.09, + "grad_norm": 0.9567670849628994, + "learning_rate": 1.983299617968819e-05, + "loss": 0.557, "step": 1884 }, { - "epoch": 0.11, - "grad_norm": 0.5318756913666476, - "learning_rate": 1.968051534722952e-05, - "loss": 0.222, + "epoch": 0.09, + "grad_norm": 0.3771553785315941, + "learning_rate": 1.983272527828602e-05, + "loss": 0.29, "step": 1885 }, { - "epoch": 0.11, - "grad_norm": 0.36430519799466327, - "learning_rate": 1.968004855331702e-05, - "loss": 0.2523, + "epoch": 0.09, + "grad_norm": 0.47636028529950875, + "learning_rate": 1.983245415919679e-05, + "loss": 0.2405, "step": 1886 }, { - "epoch": 0.11, - "grad_norm": 1.379727729095134, - "learning_rate": 1.9679581424184862e-05, - "loss": 0.803, + "epoch": 0.09, + "grad_norm": 0.5099678036440669, + "learning_rate": 1.9832182822426498e-05, + "loss": 0.3435, "step": 1887 }, { - "epoch": 0.11, - "grad_norm": 0.6338746679719071, - "learning_rate": 1.9679113959849213e-05, - "loss": 0.3052, + "epoch": 0.09, + "grad_norm": 1.1601399744986034, + "learning_rate": 1.9831911267981156e-05, + "loss": 0.5918, "step": 1888 }, { - "epoch": 0.11, - "grad_norm": 0.436401101542336, - "learning_rate": 1.9678646160326268e-05, - "loss": 0.3473, + "epoch": 0.09, + "grad_norm": 0.432514674027098, + "learning_rate": 1.9831639495866774e-05, + "loss": 0.221, "step": 1889 }, { - "epoch": 0.11, - "grad_norm": 0.9931177098972608, - "learning_rate": 1.9678178025632217e-05, - "loss": 0.5409, + "epoch": 0.09, + "grad_norm": 0.8471092428022736, + "learning_rate": 1.983136750608937e-05, + "loss": 0.5272, "step": 1890 }, { - "epoch": 0.11, - "grad_norm": 0.4048200278519992, - "learning_rate": 1.9677709555783278e-05, - "loss": 0.1736, + "epoch": 0.09, + "grad_norm": 0.46684174340132695, + "learning_rate": 1.9831095298654964e-05, + "loss": 0.2521, "step": 1891 }, { - "epoch": 0.11, - "grad_norm": 0.5199305893202164, - "learning_rate": 1.9677240750795677e-05, - "loss": 0.3528, + "epoch": 0.09, + "grad_norm": 0.8182503063277783, + "learning_rate": 1.9830822873569585e-05, + "loss": 0.4021, "step": 1892 }, { - "epoch": 0.11, - "grad_norm": 0.45682355339831787, - "learning_rate": 1.967677161068564e-05, - "loss": 0.3718, + "epoch": 0.09, + "grad_norm": 0.5375459177786547, + "learning_rate": 1.9830550230839263e-05, + "loss": 0.3312, "step": 1893 }, { - "epoch": 0.11, - "grad_norm": 0.6801158394462933, - "learning_rate": 1.9676302135469424e-05, - "loss": 0.3543, + "epoch": 0.09, + "grad_norm": 0.49702408568767525, + "learning_rate": 1.9830277370470035e-05, + "loss": 0.3616, "step": 1894 }, { - "epoch": 0.11, - "grad_norm": 0.4675467552847163, - "learning_rate": 1.9675832325163277e-05, - "loss": 0.2873, + "epoch": 0.09, + "grad_norm": 0.6809466244501035, + "learning_rate": 1.9830004292467936e-05, + "loss": 0.4166, "step": 1895 }, { - "epoch": 0.11, - "grad_norm": 1.21108403861959, - "learning_rate": 1.9675362179783472e-05, - "loss": 0.6525, + "epoch": 0.09, + "grad_norm": 0.47835888050493636, + "learning_rate": 1.982973099683902e-05, + "loss": 0.3534, "step": 1896 }, { - "epoch": 0.11, - "grad_norm": 0.4128238832269528, - "learning_rate": 1.967489169934629e-05, - "loss": 0.3157, + "epoch": 0.09, + "grad_norm": 0.6585766413239953, + "learning_rate": 1.982945748358933e-05, + "loss": 0.2595, "step": 1897 }, { - "epoch": 0.11, - "grad_norm": 0.4572471040151339, - "learning_rate": 1.9674420883868032e-05, - "loss": 0.2685, + "epoch": 0.09, + "grad_norm": 0.46306406550497015, + "learning_rate": 1.982918375272493e-05, + "loss": 0.33, "step": 1898 }, { - "epoch": 0.11, - "grad_norm": 0.5102926952782346, - "learning_rate": 1.9673949733364987e-05, - "loss": 0.3365, + "epoch": 0.09, + "grad_norm": 0.37369491938377014, + "learning_rate": 1.982890980425187e-05, + "loss": 0.2453, "step": 1899 }, { - "epoch": 0.11, - "grad_norm": 0.801037699421989, - "learning_rate": 1.9673478247853482e-05, - "loss": 0.5427, + "epoch": 0.09, + "grad_norm": 1.4683752686530696, + "learning_rate": 1.9828635638176226e-05, + "loss": 0.6446, "step": 1900 }, { - "epoch": 0.11, - "grad_norm": 0.40519304528153915, - "learning_rate": 1.967300642734984e-05, - "loss": 0.2522, + "epoch": 0.09, + "grad_norm": 0.4897173634114251, + "learning_rate": 1.9828361254504057e-05, + "loss": 0.3536, "step": 1901 }, { - "epoch": 0.11, - "grad_norm": 0.43817917000824186, - "learning_rate": 1.96725342718704e-05, - "loss": 0.2323, + "epoch": 0.09, + "grad_norm": 0.5582314579275497, + "learning_rate": 1.9828086653241444e-05, + "loss": 0.3461, "step": 1902 }, { - "epoch": 0.11, - "grad_norm": 1.0546181162648953, - "learning_rate": 1.967206178143152e-05, - "loss": 0.6465, + "epoch": 0.09, + "grad_norm": 0.5483590179233153, + "learning_rate": 1.9827811834394467e-05, + "loss": 0.3553, "step": 1903 }, { - "epoch": 0.11, - "grad_norm": 0.40293228795698843, - "learning_rate": 1.967158895604955e-05, - "loss": 0.2422, + "epoch": 0.09, + "grad_norm": 0.5245475272236104, + "learning_rate": 1.9827536797969205e-05, + "loss": 0.3042, "step": 1904 }, { - "epoch": 0.11, - "grad_norm": 0.4840118868245216, - "learning_rate": 1.967111579574087e-05, - "loss": 0.3647, + "epoch": 0.09, + "grad_norm": 0.7548663663818748, + "learning_rate": 1.982726154397175e-05, + "loss": 0.4534, "step": 1905 }, { - "epoch": 0.11, - "grad_norm": 0.856390248834586, - "learning_rate": 1.967064230052187e-05, - "loss": 0.3756, + "epoch": 0.09, + "grad_norm": 0.32805362298999935, + "learning_rate": 1.98269860724082e-05, + "loss": 0.226, "step": 1906 }, { - "epoch": 0.11, - "grad_norm": 0.3372824122208772, - "learning_rate": 1.9670168470408942e-05, - "loss": 0.2382, + "epoch": 0.09, + "grad_norm": 0.6275196456082323, + "learning_rate": 1.982671038328465e-05, + "loss": 0.4124, "step": 1907 }, { - "epoch": 0.11, - "grad_norm": 1.0445851645817175, - "learning_rate": 1.9669694305418498e-05, - "loss": 0.3723, + "epoch": 0.09, + "grad_norm": 0.5440372823144775, + "learning_rate": 1.9826434476607198e-05, + "loss": 0.3913, "step": 1908 }, { - "epoch": 0.11, - "grad_norm": 0.5236158385845334, - "learning_rate": 1.9669219805566954e-05, - "loss": 0.3219, + "epoch": 0.09, + "grad_norm": 0.9669380376912023, + "learning_rate": 1.982615835238196e-05, + "loss": 0.4756, "step": 1909 }, { - "epoch": 0.11, - "grad_norm": 0.489704534862564, - "learning_rate": 1.966874497087074e-05, - "loss": 0.3341, + "epoch": 0.09, + "grad_norm": 0.8046501352707974, + "learning_rate": 1.9825882010615048e-05, + "loss": 0.3438, "step": 1910 }, { - "epoch": 0.11, - "grad_norm": 0.8448033082087018, - "learning_rate": 1.9668269801346305e-05, - "loss": 0.4509, + "epoch": 0.09, + "grad_norm": 0.5108115968976568, + "learning_rate": 1.9825605451312574e-05, + "loss": 0.3112, "step": 1911 }, { - "epoch": 0.11, - "grad_norm": 0.440510759931443, - "learning_rate": 1.966779429701011e-05, - "loss": 0.3606, + "epoch": 0.09, + "grad_norm": 0.5182733888660727, + "learning_rate": 1.9825328674480672e-05, + "loss": 0.2714, "step": 1912 }, { - "epoch": 0.11, - "grad_norm": 0.49307606252366426, - "learning_rate": 1.9667318457878604e-05, - "loss": 0.3268, + "epoch": 0.09, + "grad_norm": 0.6780928161168133, + "learning_rate": 1.982505168012546e-05, + "loss": 0.287, "step": 1913 }, { - "epoch": 0.11, - "grad_norm": 0.4100642263944072, - "learning_rate": 1.966684228396828e-05, - "loss": 0.2066, + "epoch": 0.09, + "grad_norm": 0.5094638805766286, + "learning_rate": 1.982477446825307e-05, + "loss": 0.3294, "step": 1914 }, { - "epoch": 0.11, - "grad_norm": 0.9405707203282181, - "learning_rate": 1.9666365775295622e-05, - "loss": 0.4985, + "epoch": 0.09, + "grad_norm": 0.6781153046639767, + "learning_rate": 1.982449703886965e-05, + "loss": 0.3558, "step": 1915 }, { - "epoch": 0.11, - "grad_norm": 0.46413787240405263, - "learning_rate": 1.966588893187714e-05, - "loss": 0.2986, + "epoch": 0.09, + "grad_norm": 0.7549813902725848, + "learning_rate": 1.9824219391981326e-05, + "loss": 0.4052, "step": 1916 }, { - "epoch": 0.11, - "grad_norm": 0.532938541056413, - "learning_rate": 1.966541175372933e-05, - "loss": 0.3551, + "epoch": 0.09, + "grad_norm": 0.4238395156147871, + "learning_rate": 1.982394152759426e-05, + "loss": 0.2722, "step": 1917 }, { - "epoch": 0.11, - "grad_norm": 0.8985917488823684, - "learning_rate": 1.966493424086873e-05, - "loss": 0.6217, + "epoch": 0.09, + "grad_norm": 0.5458126639026571, + "learning_rate": 1.9823663445714595e-05, + "loss": 0.4178, "step": 1918 }, { - "epoch": 0.11, - "grad_norm": 0.33445042184657847, - "learning_rate": 1.9664456393311876e-05, - "loss": 0.2323, + "epoch": 0.09, + "grad_norm": 0.36298129912973, + "learning_rate": 1.9823385146348485e-05, + "loss": 0.2388, "step": 1919 }, { - "epoch": 0.11, - "grad_norm": 0.5402494374756727, - "learning_rate": 1.966397821107531e-05, - "loss": 0.2774, + "epoch": 0.09, + "grad_norm": 0.531623486243174, + "learning_rate": 1.98231066295021e-05, + "loss": 0.353, "step": 1920 }, { - "epoch": 0.11, - "grad_norm": 0.45417449458805104, - "learning_rate": 1.9663499694175596e-05, - "loss": 0.2966, + "epoch": 0.09, + "grad_norm": 1.1972605224079003, + "learning_rate": 1.9822827895181603e-05, + "loss": 0.6799, "step": 1921 }, { - "epoch": 0.11, - "grad_norm": 0.41078506159188094, - "learning_rate": 1.96630208426293e-05, - "loss": 0.3234, + "epoch": 0.09, + "grad_norm": 0.4282930503954081, + "learning_rate": 1.9822548943393164e-05, + "loss": 0.26, "step": 1922 }, { - "epoch": 0.11, - "grad_norm": 0.880978732867112, - "learning_rate": 1.966254165645301e-05, - "loss": 0.5256, + "epoch": 0.09, + "grad_norm": 0.5391102249757683, + "learning_rate": 1.9822269774142954e-05, + "loss": 0.3595, "step": 1923 }, { - "epoch": 0.11, - "grad_norm": 0.397876864335666, - "learning_rate": 1.9662062135663316e-05, - "loss": 0.2994, + "epoch": 0.09, + "grad_norm": 0.6983716142445426, + "learning_rate": 1.982199038743716e-05, + "loss": 0.5432, "step": 1924 }, { - "epoch": 0.11, - "grad_norm": 0.36149216851821453, - "learning_rate": 1.9661582280276828e-05, - "loss": 0.2885, + "epoch": 0.09, + "grad_norm": 0.29483046464153667, + "learning_rate": 1.9821710783281966e-05, + "loss": 0.0814, "step": 1925 }, { - "epoch": 0.11, - "grad_norm": 0.32020921506739036, - "learning_rate": 1.9661102090310157e-05, - "loss": 0.1698, + "epoch": 0.09, + "grad_norm": 0.5168243373029131, + "learning_rate": 1.9821430961683565e-05, + "loss": 0.3348, "step": 1926 }, { - "epoch": 0.11, - "grad_norm": 1.0554723084064312, - "learning_rate": 1.9660621565779943e-05, - "loss": 0.4549, + "epoch": 0.09, + "grad_norm": 0.6724981218084933, + "learning_rate": 1.9821150922648143e-05, + "loss": 0.4434, "step": 1927 }, { - "epoch": 0.11, - "grad_norm": 0.39709968758603115, - "learning_rate": 1.966014070670281e-05, - "loss": 0.2861, + "epoch": 0.09, + "grad_norm": 0.555236279380417, + "learning_rate": 1.982087066618191e-05, + "loss": 0.2933, "step": 1928 }, { - "epoch": 0.11, - "grad_norm": 0.4627118390227328, - "learning_rate": 1.965965951309543e-05, - "loss": 0.37, + "epoch": 0.09, + "grad_norm": 0.49561793290000494, + "learning_rate": 1.982059019229106e-05, + "loss": 0.3557, "step": 1929 }, { - "epoch": 0.11, - "grad_norm": 0.5652502961754505, - "learning_rate": 1.965917798497445e-05, - "loss": 0.3301, + "epoch": 0.09, + "grad_norm": 0.46832332643153696, + "learning_rate": 1.9820309500981813e-05, + "loss": 0.393, "step": 1930 }, { - "epoch": 0.11, - "grad_norm": 0.5014863993079642, - "learning_rate": 1.9658696122356556e-05, - "loss": 0.3489, + "epoch": 0.09, + "grad_norm": 0.42306146417495866, + "learning_rate": 1.9820028592260375e-05, + "loss": 0.228, "step": 1931 }, { - "epoch": 0.11, - "grad_norm": 0.3653922709051474, - "learning_rate": 1.965821392525843e-05, - "loss": 0.2062, + "epoch": 0.09, + "grad_norm": 0.35120687273762274, + "learning_rate": 1.9819747466132972e-05, + "loss": 0.1989, "step": 1932 }, { - "epoch": 0.11, - "grad_norm": 0.5667706777693249, - "learning_rate": 1.9657731393696768e-05, - "loss": 0.3548, + "epoch": 0.09, + "grad_norm": 1.606184885870421, + "learning_rate": 1.9819466122605826e-05, + "loss": 0.7207, "step": 1933 }, { - "epoch": 0.11, - "grad_norm": 0.36616733693133574, - "learning_rate": 1.9657248527688285e-05, - "loss": 0.2127, + "epoch": 0.09, + "grad_norm": 0.4476171944299043, + "learning_rate": 1.981918456168516e-05, + "loss": 0.3204, "step": 1934 }, { - "epoch": 0.11, - "grad_norm": 1.0432472752070607, - "learning_rate": 1.9656765327249697e-05, - "loss": 0.5552, + "epoch": 0.09, + "grad_norm": 0.5184225689386348, + "learning_rate": 1.9818902783377215e-05, + "loss": 0.3083, "step": 1935 }, { - "epoch": 0.11, - "grad_norm": 0.5719621038132259, - "learning_rate": 1.9656281792397745e-05, - "loss": 0.4127, + "epoch": 0.09, + "grad_norm": 1.0494532166868527, + "learning_rate": 1.981862078768822e-05, + "loss": 0.5774, "step": 1936 }, { - "epoch": 0.11, - "grad_norm": 0.37457017917808055, - "learning_rate": 1.965579792314917e-05, - "loss": 0.2565, + "epoch": 0.09, + "grad_norm": 0.3605269751141371, + "learning_rate": 1.981833857462443e-05, + "loss": 0.2021, "step": 1937 }, { - "epoch": 0.11, - "grad_norm": 0.30681311507200504, - "learning_rate": 1.9655313719520726e-05, - "loss": 0.2221, + "epoch": 0.09, + "grad_norm": 0.4253002447023497, + "learning_rate": 1.9818056144192084e-05, + "loss": 0.2744, "step": 1938 }, { - "epoch": 0.11, - "grad_norm": 0.8535758857629228, - "learning_rate": 1.9654829181529186e-05, - "loss": 0.5047, + "epoch": 0.09, + "grad_norm": 0.60020737342717, + "learning_rate": 1.981777349639744e-05, + "loss": 0.3808, "step": 1939 }, { - "epoch": 0.11, - "grad_norm": 0.5719271930360567, - "learning_rate": 1.965434430919132e-05, - "loss": 0.286, + "epoch": 0.09, + "grad_norm": 0.8660255411677589, + "learning_rate": 1.9817490631246754e-05, + "loss": 0.4866, "step": 1940 }, { - "epoch": 0.11, - "grad_norm": 0.5563675709499672, - "learning_rate": 1.9653859102523936e-05, - "loss": 0.364, + "epoch": 0.09, + "grad_norm": 0.5004577618248672, + "learning_rate": 1.9817207548746286e-05, + "loss": 0.3435, "step": 1941 }, { - "epoch": 0.11, - "grad_norm": 1.3137731119716958, - "learning_rate": 1.965337356154382e-05, - "loss": 0.8021, + "epoch": 0.09, + "grad_norm": 0.4751157918722982, + "learning_rate": 1.9816924248902304e-05, + "loss": 0.3391, "step": 1942 }, { - "epoch": 0.11, - "grad_norm": 0.38001594373431286, - "learning_rate": 1.9652887686267795e-05, - "loss": 0.2393, + "epoch": 0.09, + "grad_norm": 0.3914551342958525, + "learning_rate": 1.9816640731721086e-05, + "loss": 0.2103, "step": 1943 }, { - "epoch": 0.11, - "grad_norm": 0.2948874349792371, - "learning_rate": 1.965240147671268e-05, - "loss": 0.1511, + "epoch": 0.09, + "grad_norm": 0.40678447806936613, + "learning_rate": 1.9816356997208896e-05, + "loss": 0.2896, "step": 1944 }, { - "epoch": 0.11, - "grad_norm": 0.6219444278818353, - "learning_rate": 1.965191493289532e-05, - "loss": 0.4022, + "epoch": 0.09, + "grad_norm": 0.9977725035841613, + "learning_rate": 1.9816073045372026e-05, + "loss": 0.475, "step": 1945 }, { - "epoch": 0.11, - "grad_norm": 0.48882053571073614, - "learning_rate": 1.9651428054832562e-05, - "loss": 0.3009, + "epoch": 0.09, + "grad_norm": 0.5266811102400007, + "learning_rate": 1.981578887621676e-05, + "loss": 0.3888, "step": 1946 }, { - "epoch": 0.11, - "grad_norm": 1.0667319837664195, - "learning_rate": 1.9650940842541265e-05, - "loss": 0.4638, + "epoch": 0.09, + "grad_norm": 0.4468288918273721, + "learning_rate": 1.981550448974939e-05, + "loss": 0.3284, "step": 1947 }, { - "epoch": 0.11, - "grad_norm": 0.4139384433063878, - "learning_rate": 1.9650453296038302e-05, - "loss": 0.3544, + "epoch": 0.09, + "grad_norm": 0.7049747137680635, + "learning_rate": 1.981521988597621e-05, + "loss": 0.4502, "step": 1948 }, { - "epoch": 0.11, - "grad_norm": 0.5010220701794562, - "learning_rate": 1.9649965415340553e-05, - "loss": 0.3336, + "epoch": 0.09, + "grad_norm": 0.39492775420163345, + "learning_rate": 1.981493506490352e-05, + "loss": 0.1844, "step": 1949 }, { - "epoch": 0.11, - "grad_norm": 0.28320481878320053, - "learning_rate": 1.964947720046492e-05, - "loss": 0.1546, + "epoch": 0.09, + "grad_norm": 0.4020384021093621, + "learning_rate": 1.9814650026537632e-05, + "loss": 0.2834, "step": 1950 }, { - "epoch": 0.11, - "grad_norm": 1.2928573922057744, - "learning_rate": 1.9648988651428308e-05, - "loss": 0.5374, + "epoch": 0.09, + "grad_norm": 0.6384599910049662, + "learning_rate": 1.9814364770884847e-05, + "loss": 0.341, "step": 1951 }, { - "epoch": 0.11, - "grad_norm": 0.48585857871631216, - "learning_rate": 1.964849976824763e-05, - "loss": 0.2929, + "epoch": 0.09, + "grad_norm": 0.9892789303944801, + "learning_rate": 1.9814079297951486e-05, + "loss": 0.5043, "step": 1952 }, { - "epoch": 0.11, - "grad_norm": 0.4246659305786616, - "learning_rate": 1.964801055093982e-05, - "loss": 0.3102, + "epoch": 0.09, + "grad_norm": 0.42087800341626025, + "learning_rate": 1.981379360774387e-05, + "loss": 0.3228, "step": 1953 }, { - "epoch": 0.11, - "grad_norm": 1.487500029193504, - "learning_rate": 1.964752099952182e-05, - "loss": 0.8572, + "epoch": 0.09, + "grad_norm": 0.5039570434846425, + "learning_rate": 1.9813507700268323e-05, + "loss": 0.3576, "step": 1954 }, { - "epoch": 0.11, - "grad_norm": 0.4105755914914621, - "learning_rate": 1.9647031114010585e-05, - "loss": 0.3072, + "epoch": 0.09, + "grad_norm": 0.3541316994376223, + "learning_rate": 1.9813221575531172e-05, + "loss": 0.1761, "step": 1955 }, { - "epoch": 0.11, - "grad_norm": 0.3978007522631092, - "learning_rate": 1.9646540894423074e-05, - "loss": 0.2497, + "epoch": 0.09, + "grad_norm": 0.36132319946466196, + "learning_rate": 1.9812935233538754e-05, + "loss": 0.2645, "step": 1956 }, { - "epoch": 0.11, - "grad_norm": 1.6591002883094217, - "learning_rate": 1.964605034077627e-05, - "loss": 0.5882, + "epoch": 0.09, + "grad_norm": 1.1792060738055052, + "learning_rate": 1.9812648674297405e-05, + "loss": 0.6376, "step": 1957 }, { - "epoch": 0.11, - "grad_norm": 0.34811743917000226, - "learning_rate": 1.9645559453087158e-05, - "loss": 0.235, + "epoch": 0.09, + "grad_norm": 0.4593508859237822, + "learning_rate": 1.9812361897813477e-05, + "loss": 0.3236, "step": 1958 }, { - "epoch": 0.11, - "grad_norm": 0.6295042608541986, - "learning_rate": 1.9645068231372733e-05, - "loss": 0.426, + "epoch": 0.09, + "grad_norm": 0.47948513319936614, + "learning_rate": 1.981207490409331e-05, + "loss": 0.3293, "step": 1959 }, { - "epoch": 0.11, - "grad_norm": 0.3943625001062919, - "learning_rate": 1.9644576675650012e-05, - "loss": 0.2832, + "epoch": 0.09, + "grad_norm": 1.2340169439532689, + "learning_rate": 1.9811787693143266e-05, + "loss": 0.7185, "step": 1960 }, { - "epoch": 0.11, - "grad_norm": 0.4604908836391139, - "learning_rate": 1.9644084785936014e-05, - "loss": 0.3313, + "epoch": 0.09, + "grad_norm": 0.41089348937240583, + "learning_rate": 1.9811500264969695e-05, + "loss": 0.2091, "step": 1961 }, { - "epoch": 0.11, - "grad_norm": 1.0978664581234787, - "learning_rate": 1.9643592562247776e-05, - "loss": 0.5663, + "epoch": 0.09, + "grad_norm": 0.5366054330211604, + "learning_rate": 1.9811212619578967e-05, + "loss": 0.3185, "step": 1962 }, { - "epoch": 0.11, - "grad_norm": 0.5053998885488958, - "learning_rate": 1.964310000460234e-05, - "loss": 0.2447, + "epoch": 0.09, + "grad_norm": 0.4995655785619846, + "learning_rate": 1.9810924756977444e-05, + "loss": 0.3339, "step": 1963 }, { - "epoch": 0.11, - "grad_norm": 0.4433806067056563, - "learning_rate": 1.964260711301677e-05, - "loss": 0.2844, + "epoch": 0.09, + "grad_norm": 1.029761756265269, + "learning_rate": 1.9810636677171506e-05, + "loss": 0.4914, "step": 1964 }, { - "epoch": 0.11, - "grad_norm": 0.5933816056127612, - "learning_rate": 1.9642113887508127e-05, - "loss": 0.4007, + "epoch": 0.09, + "grad_norm": 0.47787231645546624, + "learning_rate": 1.9810348380167527e-05, + "loss": 0.2889, "step": 1965 }, { - "epoch": 0.11, - "grad_norm": 0.9395519950185571, - "learning_rate": 1.9641620328093496e-05, - "loss": 0.5341, + "epoch": 0.09, + "grad_norm": 0.5414596156521655, + "learning_rate": 1.9810059865971892e-05, + "loss": 0.3776, "step": 1966 }, { - "epoch": 0.11, - "grad_norm": 0.6664142487597251, - "learning_rate": 1.964112643478997e-05, - "loss": 0.3412, + "epoch": 0.09, + "grad_norm": 1.536746540311559, + "learning_rate": 1.9809771134590983e-05, + "loss": 0.7409, "step": 1967 }, { - "epoch": 0.11, - "grad_norm": 0.5458830028835255, - "learning_rate": 1.9640632207614647e-05, - "loss": 0.3502, + "epoch": 0.09, + "grad_norm": 0.33350452955558973, + "learning_rate": 1.9809482186031198e-05, + "loss": 0.186, "step": 1968 }, { - "epoch": 0.11, - "grad_norm": 0.4650976263027919, - "learning_rate": 1.9640137646584646e-05, - "loss": 0.3399, + "epoch": 0.09, + "grad_norm": 0.8274444251539987, + "learning_rate": 1.9809193020298932e-05, + "loss": 0.3997, "step": 1969 }, { - "epoch": 0.11, - "grad_norm": 0.2908370498165278, - "learning_rate": 1.963964275171709e-05, - "loss": 0.1562, + "epoch": 0.09, + "grad_norm": 0.56137419880525, + "learning_rate": 1.9808903637400584e-05, + "loss": 0.3844, "step": 1970 }, { - "epoch": 0.11, - "grad_norm": 0.7007220925956545, - "learning_rate": 1.9639147523029125e-05, - "loss": 0.4742, + "epoch": 0.09, + "grad_norm": 0.4168610505692883, + "learning_rate": 1.980861403734257e-05, + "loss": 0.2404, "step": 1971 }, { - "epoch": 0.11, - "grad_norm": 0.5483497042322073, - "learning_rate": 1.963865196053789e-05, - "loss": 0.3736, + "epoch": 0.09, + "grad_norm": 1.1072294839575338, + "learning_rate": 1.9808324220131287e-05, + "loss": 0.6665, "step": 1972 }, { - "epoch": 0.11, - "grad_norm": 0.42343628698039426, - "learning_rate": 1.9638156064260555e-05, - "loss": 0.1913, + "epoch": 0.09, + "grad_norm": 0.6059469190844824, + "learning_rate": 1.9808034185773164e-05, + "loss": 0.4175, "step": 1973 }, { - "epoch": 0.11, - "grad_norm": 0.4601079222814512, - "learning_rate": 1.9637659834214294e-05, - "loss": 0.4321, + "epoch": 0.09, + "grad_norm": 0.3642313557994086, + "learning_rate": 1.9807743934274614e-05, + "loss": 0.2197, "step": 1974 }, { - "epoch": 0.11, - "grad_norm": 1.3014578591026609, - "learning_rate": 1.9637163270416283e-05, - "loss": 0.7311, + "epoch": 0.09, + "grad_norm": 0.8088831330634305, + "learning_rate": 1.980745346564207e-05, + "loss": 0.5195, "step": 1975 }, { - "epoch": 0.11, - "grad_norm": 0.31662376400744924, - "learning_rate": 1.9636666372883722e-05, - "loss": 0.2132, + "epoch": 0.09, + "grad_norm": 0.4992232557524347, + "learning_rate": 1.9807162779881956e-05, + "loss": 0.3658, "step": 1976 }, { - "epoch": 0.11, - "grad_norm": 0.423523284773505, - "learning_rate": 1.963616914163382e-05, - "loss": 0.2908, + "epoch": 0.09, + "grad_norm": 0.4623638893758382, + "learning_rate": 1.980687187700071e-05, + "loss": 0.2323, "step": 1977 }, { - "epoch": 0.11, - "grad_norm": 1.229874934088239, - "learning_rate": 1.9635671576683798e-05, - "loss": 0.7617, + "epoch": 0.09, + "grad_norm": 0.5360289981400598, + "learning_rate": 1.9806580757004776e-05, + "loss": 0.3769, "step": 1978 }, { - "epoch": 0.11, - "grad_norm": 0.4814629313067314, - "learning_rate": 1.9635173678050878e-05, - "loss": 0.3127, + "epoch": 0.09, + "grad_norm": 1.0185027830756135, + "learning_rate": 1.9806289419900592e-05, + "loss": 0.5417, "step": 1979 }, { - "epoch": 0.11, - "grad_norm": 0.5001839315639627, - "learning_rate": 1.963467544575231e-05, - "loss": 0.3492, + "epoch": 0.09, + "grad_norm": 0.6933488528653476, + "learning_rate": 1.9805997865694616e-05, + "loss": 0.3246, "step": 1980 }, { - "epoch": 0.11, - "grad_norm": 0.5230200200146033, - "learning_rate": 1.9634176879805347e-05, - "loss": 0.3975, + "epoch": 0.09, + "grad_norm": 0.5207463884013039, + "learning_rate": 1.9805706094393293e-05, + "loss": 0.3525, "step": 1981 }, { - "epoch": 0.11, - "grad_norm": 0.31796381358669257, - "learning_rate": 1.9633677980227254e-05, - "loss": 0.2032, + "epoch": 0.09, + "grad_norm": 0.42637707857514257, + "learning_rate": 1.980541410600309e-05, + "loss": 0.3118, "step": 1982 }, { - "epoch": 0.11, - "grad_norm": 0.562655816237142, - "learning_rate": 1.96331787470353e-05, - "loss": 0.3209, + "epoch": 0.09, + "grad_norm": 0.5951078075831547, + "learning_rate": 1.980512190053047e-05, + "loss": 0.3669, "step": 1983 }, { - "epoch": 0.11, - "grad_norm": 0.3877665388095057, - "learning_rate": 1.9632679180246787e-05, - "loss": 0.3566, + "epoch": 0.09, + "grad_norm": 0.41427740290486037, + "learning_rate": 1.98048294779819e-05, + "loss": 0.2212, "step": 1984 }, { - "epoch": 0.11, - "grad_norm": 0.5555037398364667, - "learning_rate": 1.9632179279879006e-05, - "loss": 0.4005, + "epoch": 0.09, + "grad_norm": 0.6137914985401658, + "learning_rate": 1.9804536838363858e-05, + "loss": 0.4008, "step": 1985 }, { - "epoch": 0.11, - "grad_norm": 0.4124910862080048, - "learning_rate": 1.963167904594927e-05, - "loss": 0.3273, + "epoch": 0.09, + "grad_norm": 0.4086097098322167, + "learning_rate": 1.9804243981682817e-05, + "loss": 0.3002, "step": 1986 }, { - "epoch": 0.11, - "grad_norm": 0.6000884096195452, - "learning_rate": 1.9631178478474905e-05, - "loss": 0.3107, + "epoch": 0.09, + "grad_norm": 0.7064764250559041, + "learning_rate": 1.9803950907945267e-05, + "loss": 0.4466, "step": 1987 }, { - "epoch": 0.11, - "grad_norm": 0.4283814322598163, - "learning_rate": 1.9630677577473242e-05, - "loss": 0.3005, + "epoch": 0.09, + "grad_norm": 0.5554398889456554, + "learning_rate": 1.9803657617157693e-05, + "loss": 0.3282, "step": 1988 }, { - "epoch": 0.11, - "grad_norm": 0.4370673442250718, - "learning_rate": 1.963017634296163e-05, - "loss": 0.2721, + "epoch": 0.09, + "grad_norm": 0.47965803260818074, + "learning_rate": 1.9803364109326586e-05, + "loss": 0.2963, "step": 1989 }, { - "epoch": 0.11, - "grad_norm": 0.4521900634802621, - "learning_rate": 1.9629674774957425e-05, - "loss": 0.3517, + "epoch": 0.09, + "grad_norm": 0.39610035644393304, + "learning_rate": 1.9803070384458448e-05, + "loss": 0.2239, "step": 1990 }, { - "epoch": 0.11, - "grad_norm": 0.5563225794926104, - "learning_rate": 1.9629172873477995e-05, - "loss": 0.3998, + "epoch": 0.09, + "grad_norm": 0.9274105785659928, + "learning_rate": 1.9802776442559777e-05, + "loss": 0.5518, "step": 1991 }, { - "epoch": 0.11, - "grad_norm": 0.4409425742430659, - "learning_rate": 1.9628670638540722e-05, - "loss": 0.2862, + "epoch": 0.09, + "grad_norm": 0.47310440867051357, + "learning_rate": 1.9802482283637086e-05, + "loss": 0.3364, "step": 1992 }, { - "epoch": 0.11, - "grad_norm": 1.3474166819860802, - "learning_rate": 1.9628168070163e-05, - "loss": 0.6459, + "epoch": 0.09, + "grad_norm": 0.8922765734240776, + "learning_rate": 1.9802187907696884e-05, + "loss": 0.5365, "step": 1993 }, { - "epoch": 0.11, - "grad_norm": 0.6413772554148557, - "learning_rate": 1.9627665168362234e-05, - "loss": 0.4017, + "epoch": 0.09, + "grad_norm": 0.44591853283532534, + "learning_rate": 1.980189331474569e-05, + "loss": 0.3024, "step": 1994 }, { - "epoch": 0.11, - "grad_norm": 0.4979080455824859, - "learning_rate": 1.9627161933155833e-05, - "loss": 0.3773, + "epoch": 0.09, + "grad_norm": 0.5205649119233964, + "learning_rate": 1.9801598504790025e-05, + "loss": 0.3215, "step": 1995 }, { - "epoch": 0.11, - "grad_norm": 0.47734635309643114, - "learning_rate": 1.962665836456123e-05, - "loss": 0.3279, + "epoch": 0.09, + "grad_norm": 0.3804715994250329, + "learning_rate": 1.9801303477836417e-05, + "loss": 0.2474, "step": 1996 }, { - "epoch": 0.11, - "grad_norm": 0.4702257531545852, - "learning_rate": 1.9626154462595863e-05, - "loss": 0.2722, + "epoch": 0.09, + "grad_norm": 0.7508012739459844, + "learning_rate": 1.9801008233891395e-05, + "loss": 0.3524, "step": 1997 }, { - "epoch": 0.11, - "grad_norm": 0.36757418615763193, - "learning_rate": 1.9625650227277182e-05, - "loss": 0.2572, + "epoch": 0.09, + "grad_norm": 0.528287447103849, + "learning_rate": 1.98007127729615e-05, + "loss": 0.3137, "step": 1998 }, { - "epoch": 0.11, - "grad_norm": 2.0698780573684648, - "learning_rate": 1.9625145658622644e-05, - "loss": 0.3931, + "epoch": 0.09, + "grad_norm": 0.5863772715882213, + "learning_rate": 1.9800417095053266e-05, + "loss": 0.4451, "step": 1999 }, { - "epoch": 0.11, - "grad_norm": 0.4586305864242774, - "learning_rate": 1.962464075664973e-05, - "loss": 0.2985, + "epoch": 0.09, + "grad_norm": 1.1865521045685201, + "learning_rate": 1.9800121200173246e-05, + "loss": 0.5324, "step": 2000 }, { - "epoch": 0.11, - "grad_norm": 0.4799034083760339, - "learning_rate": 1.9624135521375914e-05, - "loss": 0.345, + "epoch": 0.09, + "grad_norm": 0.5161589307886086, + "learning_rate": 1.9799825088327987e-05, + "loss": 0.3308, "step": 2001 }, { - "epoch": 0.12, - "grad_norm": 0.8087456132963655, - "learning_rate": 1.9623629952818705e-05, - "loss": 0.4573, + "epoch": 0.09, + "grad_norm": 0.34655198264312825, + "learning_rate": 1.9799528759524045e-05, + "loss": 0.2596, "step": 2002 }, { - "epoch": 0.12, - "grad_norm": 0.273169211369182, - "learning_rate": 1.9623124050995603e-05, - "loss": 0.1335, + "epoch": 0.09, + "grad_norm": 1.100367717431487, + "learning_rate": 1.979923221376798e-05, + "loss": 0.4394, "step": 2003 }, { - "epoch": 0.12, - "grad_norm": 0.44311381479955264, - "learning_rate": 1.9622617815924125e-05, - "loss": 0.3134, + "epoch": 0.09, + "grad_norm": 0.4849782370565985, + "learning_rate": 1.9798935451066363e-05, + "loss": 0.3643, "step": 2004 }, { - "epoch": 0.12, - "grad_norm": 4.874639231040764, - "learning_rate": 1.962211124762181e-05, - "loss": 0.5349, + "epoch": 0.09, + "grad_norm": 0.47977694449325736, + "learning_rate": 1.9798638471425756e-05, + "loss": 0.3233, "step": 2005 }, { - "epoch": 0.12, - "grad_norm": 0.910928440837772, - "learning_rate": 1.9621604346106197e-05, - "loss": 0.5488, + "epoch": 0.09, + "grad_norm": 0.6553709123209148, + "learning_rate": 1.979834127485274e-05, + "loss": 0.4269, "step": 2006 }, { - "epoch": 0.12, - "grad_norm": 0.730448021870774, - "learning_rate": 1.9621097111394837e-05, - "loss": 0.334, + "epoch": 0.09, + "grad_norm": 0.3923977542751138, + "learning_rate": 1.979804386135389e-05, + "loss": 0.245, "step": 2007 }, { - "epoch": 0.12, - "grad_norm": 0.8847582506626019, - "learning_rate": 1.9620589543505297e-05, - "loss": 0.4115, + "epoch": 0.09, + "grad_norm": 0.33925261433225296, + "learning_rate": 1.9797746230935792e-05, + "loss": 0.1793, "step": 2008 }, { - "epoch": 0.12, - "grad_norm": 0.26085846379198124, - "learning_rate": 1.9620081642455155e-05, - "loss": 0.109, + "epoch": 0.09, + "grad_norm": 0.9151751027601358, + "learning_rate": 1.9797448383605036e-05, + "loss": 0.3925, "step": 2009 }, { - "epoch": 0.12, - "grad_norm": 0.37726397681575125, - "learning_rate": 1.9619573408262004e-05, - "loss": 0.2366, + "epoch": 0.09, + "grad_norm": 0.5475192505680544, + "learning_rate": 1.979715031936822e-05, + "loss": 0.2806, "step": 2010 }, { - "epoch": 0.12, - "grad_norm": 8.216495069611042, - "learning_rate": 1.9619064840943432e-05, - "loss": 0.8289, + "epoch": 0.09, + "grad_norm": 1.468885005215159, + "learning_rate": 1.9796852038231932e-05, + "loss": 0.7366, "step": 2011 }, { - "epoch": 0.12, - "grad_norm": 0.4878648634362295, - "learning_rate": 1.9618555940517062e-05, - "loss": 0.2654, + "epoch": 0.09, + "grad_norm": 1.3694146325131684, + "learning_rate": 1.9796553540202785e-05, + "loss": 0.8477, "step": 2012 }, { - "epoch": 0.12, - "grad_norm": 8.093933887287843, - "learning_rate": 1.9618046707000515e-05, - "loss": 0.4438, + "epoch": 0.09, + "grad_norm": 0.4893060853244445, + "learning_rate": 1.9796254825287385e-05, + "loss": 0.2319, "step": 2013 }, { - "epoch": 0.12, - "grad_norm": 7.040222786715266, - "learning_rate": 1.9617537140411423e-05, - "loss": 0.6686, + "epoch": 0.09, + "grad_norm": 0.459106924236629, + "learning_rate": 1.9795955893492344e-05, + "loss": 0.3488, "step": 2014 }, { - "epoch": 0.12, - "grad_norm": 1.4288877331493517, - "learning_rate": 1.961702724076743e-05, - "loss": 0.2278, + "epoch": 0.09, + "grad_norm": 0.377399440407178, + "learning_rate": 1.979565674482428e-05, + "loss": 0.2906, "step": 2015 }, { - "epoch": 0.12, - "grad_norm": 0.609857521709017, - "learning_rate": 1.96165170080862e-05, - "loss": 0.3412, + "epoch": 0.09, + "grad_norm": 0.5028002809836686, + "learning_rate": 1.979535737928982e-05, + "loss": 0.2265, "step": 2016 }, { - "epoch": 0.12, - "grad_norm": 2.3849639020833395, - "learning_rate": 1.9616006442385403e-05, - "loss": 0.5256, + "epoch": 0.09, + "grad_norm": 0.48636333265568715, + "learning_rate": 1.9795057796895585e-05, + "loss": 0.3696, "step": 2017 }, { - "epoch": 0.12, - "grad_norm": 1.0769342424347998, - "learning_rate": 1.9615495543682712e-05, - "loss": 0.3283, + "epoch": 0.09, + "grad_norm": 0.6451088666394826, + "learning_rate": 1.9794757997648214e-05, + "loss": 0.4372, "step": 2018 }, { - "epoch": 0.12, - "grad_norm": 1.679860697691885, - "learning_rate": 1.9614984311995825e-05, - "loss": 0.4145, + "epoch": 0.09, + "grad_norm": 0.8553634158607151, + "learning_rate": 1.979445798155434e-05, + "loss": 0.3966, "step": 2019 }, { - "epoch": 0.12, - "grad_norm": 0.7572448145592617, - "learning_rate": 1.9614472747342445e-05, - "loss": 0.4164, + "epoch": 0.09, + "grad_norm": 0.4190236061141445, + "learning_rate": 1.9794157748620604e-05, + "loss": 0.3416, "step": 2020 }, { - "epoch": 0.12, - "grad_norm": 1.0369229362139345, - "learning_rate": 1.9613960849740284e-05, - "loss": 0.4782, + "epoch": 0.09, + "grad_norm": 0.42135222458560323, + "learning_rate": 1.9793857298853657e-05, + "loss": 0.2884, "step": 2021 }, { - "epoch": 0.12, - "grad_norm": 0.43789872014124714, - "learning_rate": 1.9613448619207077e-05, - "loss": 0.172, + "epoch": 0.09, + "grad_norm": 0.4012091778228384, + "learning_rate": 1.979355663226015e-05, + "loss": 0.2461, "step": 2022 }, { - "epoch": 0.12, - "grad_norm": 0.8217791474880007, - "learning_rate": 1.9612936055760557e-05, - "loss": 0.4245, + "epoch": 0.09, + "grad_norm": 0.48755856788473173, + "learning_rate": 1.9793255748846737e-05, + "loss": 0.2858, "step": 2023 }, { - "epoch": 0.12, - "grad_norm": 0.7758649033699128, - "learning_rate": 1.9612423159418474e-05, - "loss": 0.4332, + "epoch": 0.09, + "grad_norm": 1.5215206163835893, + "learning_rate": 1.9792954648620083e-05, + "loss": 0.9342, "step": 2024 }, { - "epoch": 0.12, - "grad_norm": 0.5558536412840944, - "learning_rate": 1.9611909930198588e-05, - "loss": 0.305, + "epoch": 0.09, + "grad_norm": 0.4541215652729916, + "learning_rate": 1.9792653331586852e-05, + "loss": 0.3108, "step": 2025 }, { - "epoch": 0.12, - "grad_norm": 2.763245662074697, - "learning_rate": 1.9611396368118675e-05, - "loss": 0.68, + "epoch": 0.09, + "grad_norm": 0.4299551654401156, + "learning_rate": 1.979235179775371e-05, + "loss": 0.3176, "step": 2026 }, { - "epoch": 0.12, - "grad_norm": 0.6710808942620035, - "learning_rate": 1.961088247319652e-05, - "loss": 0.4107, + "epoch": 0.09, + "grad_norm": 0.5191101721597068, + "learning_rate": 1.9792050047127337e-05, + "loss": 0.3524, "step": 2027 }, { - "epoch": 0.12, - "grad_norm": 0.36096294284561037, - "learning_rate": 1.961036824544992e-05, - "loss": 0.2201, + "epoch": 0.09, + "grad_norm": 0.37183006099288957, + "learning_rate": 1.979174807971442e-05, + "loss": 0.2259, "step": 2028 }, { - "epoch": 0.12, - "grad_norm": 0.5810650256593232, - "learning_rate": 1.960985368489668e-05, - "loss": 0.2909, + "epoch": 0.09, + "grad_norm": 0.46329440797192506, + "learning_rate": 1.9791445895521634e-05, + "loss": 0.2856, "step": 2029 }, { - "epoch": 0.12, - "grad_norm": 1.5690149812729766, - "learning_rate": 1.9609338791554623e-05, - "loss": 0.5314, + "epoch": 0.09, + "grad_norm": 0.692076040525972, + "learning_rate": 1.9791143494555672e-05, + "loss": 0.4928, "step": 2030 }, { - "epoch": 0.12, - "grad_norm": 0.6393362197055744, - "learning_rate": 1.960882356544157e-05, - "loss": 0.2609, + "epoch": 0.09, + "grad_norm": 0.5874302361207011, + "learning_rate": 1.979084087682323e-05, + "loss": 0.4136, "step": 2031 }, { - "epoch": 0.12, - "grad_norm": 0.6976100461836577, - "learning_rate": 1.9608308006575373e-05, - "loss": 0.4226, - "step": 2032 + "epoch": 0.09, + "grad_norm": 0.47679929964558293, + "learning_rate": 1.979053804233101e-05, + "loss": 0.3918, + "step": 2032 }, { - "epoch": 0.12, - "grad_norm": 1.479240310964179, - "learning_rate": 1.9607792114973884e-05, - "loss": 0.5768, + "epoch": 0.09, + "grad_norm": 0.48170021632104, + "learning_rate": 1.979023499108571e-05, + "loss": 0.2938, "step": 2033 }, { - "epoch": 0.12, - "grad_norm": 0.3668310724900021, - "learning_rate": 1.9607275890654967e-05, - "loss": 0.2626, + "epoch": 0.09, + "grad_norm": 0.3427586317710395, + "learning_rate": 1.9789931723094046e-05, + "loss": 0.1929, "step": 2034 }, { - "epoch": 0.12, - "grad_norm": 2.857454535850804, - "learning_rate": 1.9606759333636498e-05, - "loss": 0.2436, + "epoch": 0.09, + "grad_norm": 0.4991603878462252, + "learning_rate": 1.978962823836273e-05, + "loss": 0.3136, "step": 2035 }, { - "epoch": 0.12, - "grad_norm": 0.9569480593524808, - "learning_rate": 1.9606242443936368e-05, - "loss": 0.4232, + "epoch": 0.09, + "grad_norm": 0.7892557411119868, + "learning_rate": 1.978932453689848e-05, + "loss": 0.4781, "step": 2036 }, { - "epoch": 0.12, - "grad_norm": 0.5653570617669198, - "learning_rate": 1.9605725221572475e-05, - "loss": 0.3075, + "epoch": 0.09, + "grad_norm": 0.47873530717926904, + "learning_rate": 1.9789020618708018e-05, + "loss": 0.2869, "step": 2037 }, { - "epoch": 0.12, - "grad_norm": 1.2376985969066299, - "learning_rate": 1.960520766656273e-05, - "loss": 0.594, + "epoch": 0.09, + "grad_norm": 0.48280893308741524, + "learning_rate": 1.9788716483798077e-05, + "loss": 0.4015, "step": 2038 }, { - "epoch": 0.12, - "grad_norm": 0.6424647493461252, - "learning_rate": 1.9604689778925056e-05, - "loss": 0.3995, + "epoch": 0.09, + "grad_norm": 1.040557718900965, + "learning_rate": 1.9788412132175386e-05, + "loss": 0.4278, "step": 2039 }, { - "epoch": 0.12, - "grad_norm": 0.4235494564972714, - "learning_rate": 1.960417155867739e-05, - "loss": 0.3047, + "epoch": 0.09, + "grad_norm": 0.30399268222380765, + "learning_rate": 1.9788107563846682e-05, + "loss": 0.1874, "step": 2040 }, { - "epoch": 0.12, - "grad_norm": 0.5912471531591597, - "learning_rate": 1.960365300583767e-05, - "loss": 0.2393, + "epoch": 0.09, + "grad_norm": 0.3850387225362169, + "learning_rate": 1.9787802778818713e-05, + "loss": 0.3136, "step": 2041 }, { - "epoch": 0.12, - "grad_norm": 1.029688638479728, - "learning_rate": 1.960313412042387e-05, - "loss": 0.5656, + "epoch": 0.09, + "grad_norm": 0.9479317914244941, + "learning_rate": 1.9787497777098226e-05, + "loss": 0.4567, "step": 2042 }, { - "epoch": 0.12, - "grad_norm": 0.4732960711478903, - "learning_rate": 1.960261490245394e-05, - "loss": 0.3001, + "epoch": 0.09, + "grad_norm": 0.5846011877712373, + "learning_rate": 1.978719255869197e-05, + "loss": 0.4012, "step": 2043 }, { - "epoch": 0.12, - "grad_norm": 0.5220022647134588, - "learning_rate": 1.9602095351945872e-05, - "loss": 0.3535, + "epoch": 0.09, + "grad_norm": 0.6486162644774188, + "learning_rate": 1.97868871236067e-05, + "loss": 0.3492, "step": 2044 }, { - "epoch": 0.12, - "grad_norm": 1.5883266373730363, - "learning_rate": 1.9601575468917654e-05, - "loss": 0.4899, + "epoch": 0.09, + "grad_norm": 0.501669820525786, + "learning_rate": 1.9786581471849186e-05, + "loss": 0.3645, "step": 2045 }, { - "epoch": 0.12, - "grad_norm": 0.3387809210650048, - "learning_rate": 1.9601055253387292e-05, - "loss": 0.2403, + "epoch": 0.09, + "grad_norm": 0.2555958187319086, + "learning_rate": 1.978627560342619e-05, + "loss": 0.0791, "step": 2046 }, { - "epoch": 0.12, - "grad_norm": 0.5623556957444193, - "learning_rate": 1.9600534705372795e-05, - "loss": 0.3546, + "epoch": 0.09, + "grad_norm": 0.5619358701796047, + "learning_rate": 1.9785969518344484e-05, + "loss": 0.3803, "step": 2047 }, { - "epoch": 0.12, - "grad_norm": 0.5773352363021086, - "learning_rate": 1.9600013824892198e-05, - "loss": 0.3738, + "epoch": 0.09, + "grad_norm": 1.256864624742129, + "learning_rate": 1.9785663216610843e-05, + "loss": 0.5627, "step": 2048 }, { - "epoch": 0.12, - "grad_norm": 0.4232267894991952, - "learning_rate": 1.9599492611963533e-05, - "loss": 0.29, + "epoch": 0.09, + "grad_norm": 0.4012730803791754, + "learning_rate": 1.9785356698232052e-05, + "loss": 0.2836, "step": 2049 }, { - "epoch": 0.12, - "grad_norm": 1.5190140255118185, - "learning_rate": 1.9598971066604854e-05, - "loss": 0.818, + "epoch": 0.09, + "grad_norm": 0.5840905122322081, + "learning_rate": 1.9785049963214895e-05, + "loss": 0.3802, "step": 2050 }, { - "epoch": 0.12, - "grad_norm": 0.6714832479028133, - "learning_rate": 1.9598449188834218e-05, - "loss": 0.314, + "epoch": 0.09, + "grad_norm": 1.662399195733512, + "learning_rate": 1.9784743011566162e-05, + "loss": 0.8109, "step": 2051 }, { - "epoch": 0.12, - "grad_norm": 0.44430442128339687, - "learning_rate": 1.95979269786697e-05, - "loss": 0.2978, + "epoch": 0.09, + "grad_norm": 0.251756321730411, + "learning_rate": 1.9784435843292652e-05, + "loss": 0.1006, "step": 2052 }, { - "epoch": 0.12, - "grad_norm": 0.9537546657131378, - "learning_rate": 1.959740443612938e-05, - "loss": 0.5501, + "epoch": 0.09, + "grad_norm": 0.4683704180769018, + "learning_rate": 1.978412845840116e-05, + "loss": 0.304, "step": 2053 }, { - "epoch": 0.12, - "grad_norm": 0.2242161983506829, - "learning_rate": 1.9596881561231363e-05, - "loss": 0.1592, + "epoch": 0.09, + "grad_norm": 1.2834791222468118, + "learning_rate": 1.9783820856898492e-05, + "loss": 0.5557, "step": 2054 }, { - "epoch": 0.12, - "grad_norm": 0.48220806957042844, - "learning_rate": 1.9596358353993747e-05, - "loss": 0.3105, + "epoch": 0.09, + "grad_norm": 0.766612481050488, + "learning_rate": 1.9783513038791463e-05, + "loss": 0.3642, "step": 2055 }, { - "epoch": 0.12, - "grad_norm": 0.6976369230595237, - "learning_rate": 1.959583481443465e-05, - "loss": 0.4052, + "epoch": 0.09, + "grad_norm": 0.5001740353759953, + "learning_rate": 1.9783205004086884e-05, + "loss": 0.2982, "step": 2056 }, { - "epoch": 0.12, - "grad_norm": 1.8327429933334323, - "learning_rate": 1.9595310942572212e-05, - "loss": 0.768, + "epoch": 0.09, + "grad_norm": 0.5271195116157156, + "learning_rate": 1.9782896752791576e-05, + "loss": 0.3426, "step": 2057 }, { - "epoch": 0.12, - "grad_norm": 0.44394301554498045, - "learning_rate": 1.9594786738424566e-05, - "loss": 0.2232, + "epoch": 0.09, + "grad_norm": 0.5304167482899875, + "learning_rate": 1.978258828491236e-05, + "loss": 0.2965, "step": 2058 }, { - "epoch": 0.12, - "grad_norm": 0.7342598439395362, - "learning_rate": 1.959426220200987e-05, - "loss": 0.4006, + "epoch": 0.09, + "grad_norm": 0.44599342972225464, + "learning_rate": 1.978227960045607e-05, + "loss": 0.2245, "step": 2059 }, { - "epoch": 0.12, - "grad_norm": 0.427795716449992, - "learning_rate": 1.9593737333346286e-05, - "loss": 0.2239, + "epoch": 0.09, + "grad_norm": 1.685826861566089, + "learning_rate": 1.978197069942954e-05, + "loss": 0.5888, "step": 2060 }, { - "epoch": 0.12, - "grad_norm": 0.4343683489018406, - "learning_rate": 1.9593212132451992e-05, - "loss": 0.2375, + "epoch": 0.09, + "grad_norm": 0.5086787170099829, + "learning_rate": 1.9781661581839607e-05, + "loss": 0.3477, "step": 2061 }, { - "epoch": 0.12, - "grad_norm": 1.6866265621347991, - "learning_rate": 1.959268659934517e-05, - "loss": 0.7592, + "epoch": 0.09, + "grad_norm": 0.5054693740003389, + "learning_rate": 1.978135224769311e-05, + "loss": 0.2533, "step": 2062 }, { - "epoch": 0.12, - "grad_norm": 0.46897540943464006, - "learning_rate": 1.9592160734044027e-05, - "loss": 0.3492, + "epoch": 0.09, + "grad_norm": 0.9128541613747733, + "learning_rate": 1.9781042696996904e-05, + "loss": 0.6258, "step": 2063 }, { - "epoch": 0.12, - "grad_norm": 0.46889413154603504, - "learning_rate": 1.9591634536566766e-05, - "loss": 0.271, + "epoch": 0.09, + "grad_norm": 0.43737761989527796, + "learning_rate": 1.978073292975784e-05, + "loss": 0.2444, "step": 2064 }, { - "epoch": 0.12, - "grad_norm": 1.0019504066133773, - "learning_rate": 1.9591108006931618e-05, - "loss": 0.5227, + "epoch": 0.09, + "grad_norm": 0.4287635026159656, + "learning_rate": 1.9780422945982775e-05, + "loss": 0.2481, "step": 2065 }, { - "epoch": 0.12, - "grad_norm": 0.4826247658886829, - "learning_rate": 1.9590581145156812e-05, - "loss": 0.2342, + "epoch": 0.09, + "grad_norm": 0.9657517402785877, + "learning_rate": 1.9780112745678574e-05, + "loss": 0.4264, "step": 2066 }, { - "epoch": 0.12, - "grad_norm": 0.46222434250869515, - "learning_rate": 1.959005395126059e-05, - "loss": 0.2521, + "epoch": 0.09, + "grad_norm": 0.9765950969579139, + "learning_rate": 1.97798023288521e-05, + "loss": 0.5766, "step": 2067 }, { - "epoch": 0.12, - "grad_norm": 0.5843926885025996, - "learning_rate": 1.9589526425261213e-05, - "loss": 0.4092, + "epoch": 0.1, + "grad_norm": 0.5419086256816057, + "learning_rate": 1.9779491695510233e-05, + "loss": 0.3022, "step": 2068 }, { - "epoch": 0.12, - "grad_norm": 0.8389100016932158, - "learning_rate": 1.958899856717695e-05, - "loss": 0.5575, + "epoch": 0.1, + "grad_norm": 0.5163604585646017, + "learning_rate": 1.977918084565984e-05, + "loss": 0.2897, "step": 2069 }, { - "epoch": 0.12, - "grad_norm": 0.4238709325497757, - "learning_rate": 1.958847037702608e-05, - "loss": 0.334, + "epoch": 0.1, + "grad_norm": 0.6337207858066779, + "learning_rate": 1.977886977930781e-05, + "loss": 0.3566, "step": 2070 }, { - "epoch": 0.12, - "grad_norm": 0.4507205040580907, - "learning_rate": 1.9587941854826892e-05, - "loss": 0.3119, + "epoch": 0.1, + "grad_norm": 0.41916703099448366, + "learning_rate": 1.9778558496461032e-05, + "loss": 0.3015, "step": 2071 }, { - "epoch": 0.12, - "grad_norm": 0.47985631218897673, - "learning_rate": 1.9587413000597687e-05, - "loss": 0.3043, + "epoch": 0.1, + "grad_norm": 0.3741898432727063, + "learning_rate": 1.977824699712639e-05, + "loss": 0.3057, "step": 2072 }, { - "epoch": 0.12, - "grad_norm": 0.7661228750951127, - "learning_rate": 1.9586883814356785e-05, - "loss": 0.4541, + "epoch": 0.1, + "grad_norm": 0.6469142559572005, + "learning_rate": 1.9777935281310784e-05, + "loss": 0.3782, "step": 2073 }, { - "epoch": 0.12, - "grad_norm": 0.3672906869250385, - "learning_rate": 1.958635429612251e-05, - "loss": 0.2207, + "epoch": 0.1, + "grad_norm": 0.46062939843566225, + "learning_rate": 1.9777623349021117e-05, + "loss": 0.3036, "step": 2074 }, { - "epoch": 0.12, - "grad_norm": 0.4746221052006005, - "learning_rate": 1.9585824445913194e-05, - "loss": 0.3761, + "epoch": 0.1, + "grad_norm": 1.374122882349871, + "learning_rate": 1.9777311200264292e-05, + "loss": 0.3458, "step": 2075 }, { - "epoch": 0.12, - "grad_norm": 0.4915279326174938, - "learning_rate": 1.9585294263747192e-05, - "loss": 0.3161, + "epoch": 0.1, + "grad_norm": 0.5805875086386575, + "learning_rate": 1.977699883504722e-05, + "loss": 0.4343, "step": 2076 }, { - "epoch": 0.12, - "grad_norm": 0.49898984543053004, - "learning_rate": 1.9584763749642862e-05, - "loss": 0.3595, + "epoch": 0.1, + "grad_norm": 0.4474600042586244, + "learning_rate": 1.9776686253376816e-05, + "loss": 0.3003, "step": 2077 }, { - "epoch": 0.12, - "grad_norm": 0.5341824181711823, - "learning_rate": 1.9584232903618576e-05, - "loss": 0.3403, + "epoch": 0.1, + "grad_norm": 0.4214045696450386, + "learning_rate": 1.977637345526e-05, + "loss": 0.2739, "step": 2078 }, { - "epoch": 0.12, - "grad_norm": 0.38017236195959025, - "learning_rate": 1.9583701725692716e-05, - "loss": 0.3137, + "epoch": 0.1, + "grad_norm": 1.021478995118339, + "learning_rate": 1.9776060440703703e-05, + "loss": 0.6443, "step": 2079 }, { - "epoch": 0.12, - "grad_norm": 0.32745929971134763, - "learning_rate": 1.9583170215883677e-05, - "loss": 0.2298, + "epoch": 0.1, + "grad_norm": 0.4122736946293039, + "learning_rate": 1.9775747209714847e-05, + "loss": 0.2602, "step": 2080 }, { - "epoch": 0.12, - "grad_norm": 0.8625293763295988, - "learning_rate": 1.9582638374209864e-05, - "loss": 0.5644, + "epoch": 0.1, + "grad_norm": 0.5680351370776875, + "learning_rate": 1.977543376230037e-05, + "loss": 0.368, "step": 2081 }, { - "epoch": 0.12, - "grad_norm": 0.4296796221626104, - "learning_rate": 1.9582106200689698e-05, - "loss": 0.3539, + "epoch": 0.1, + "grad_norm": 0.8472518439061804, + "learning_rate": 1.9775120098467212e-05, + "loss": 0.4055, "step": 2082 }, { - "epoch": 0.12, - "grad_norm": 0.43436138803461416, - "learning_rate": 1.9581573695341607e-05, - "loss": 0.3427, + "epoch": 0.1, + "grad_norm": 0.48007071057726103, + "learning_rate": 1.9774806218222317e-05, + "loss": 0.2993, "step": 2083 }, { - "epoch": 0.12, - "grad_norm": 0.9284517598696714, - "learning_rate": 1.9581040858184028e-05, - "loss": 0.3858, + "epoch": 0.1, + "grad_norm": 0.5235336868669804, + "learning_rate": 1.977449212157263e-05, + "loss": 0.338, "step": 2084 }, { - "epoch": 0.12, - "grad_norm": 0.38693079051285023, - "learning_rate": 1.958050768923542e-05, - "loss": 0.3074, + "epoch": 0.1, + "grad_norm": 0.4714849739363711, + "learning_rate": 1.9774177808525113e-05, + "loss": 0.3259, "step": 2085 }, { - "epoch": 0.12, - "grad_norm": 0.4582866470000151, - "learning_rate": 1.957997418851424e-05, - "loss": 0.2932, + "epoch": 0.1, + "grad_norm": 0.43486199444321916, + "learning_rate": 1.9773863279086717e-05, + "loss": 0.3063, "step": 2086 }, { - "epoch": 0.12, - "grad_norm": 0.7347478591787949, - "learning_rate": 1.9579440356038966e-05, - "loss": 0.3689, + "epoch": 0.1, + "grad_norm": 0.5823190847229298, + "learning_rate": 1.9773548533264406e-05, + "loss": 0.3337, "step": 2087 }, { - "epoch": 0.12, - "grad_norm": 0.3424729743541756, - "learning_rate": 1.9578906191828086e-05, - "loss": 0.2313, + "epoch": 0.1, + "grad_norm": 0.59517639040424, + "learning_rate": 1.9773233571065154e-05, + "loss": 0.3688, "step": 2088 }, { - "epoch": 0.12, - "grad_norm": 0.6525889969280737, - "learning_rate": 1.9578371695900097e-05, - "loss": 0.4339, + "epoch": 0.1, + "grad_norm": 0.37481076902184907, + "learning_rate": 1.977291839249593e-05, + "loss": 0.2996, "step": 2089 }, { - "epoch": 0.12, - "grad_norm": 0.49133362050474455, - "learning_rate": 1.9577836868273504e-05, - "loss": 0.3199, + "epoch": 0.1, + "grad_norm": 1.055614153837425, + "learning_rate": 1.977260299756371e-05, + "loss": 0.6411, "step": 2090 }, { - "epoch": 0.12, - "grad_norm": 0.46103788217869224, - "learning_rate": 1.9577301708966837e-05, - "loss": 0.3133, + "epoch": 0.1, + "grad_norm": 0.4096261817559592, + "learning_rate": 1.977228738627548e-05, + "loss": 0.1683, "step": 2091 }, { - "epoch": 0.12, - "grad_norm": 0.3968124504129748, - "learning_rate": 1.9576766217998622e-05, - "loss": 0.2731, + "epoch": 0.1, + "grad_norm": 0.3919653632217819, + "learning_rate": 1.9771971558638226e-05, + "loss": 0.2482, "step": 2092 }, { - "epoch": 0.12, - "grad_norm": 0.5598574134391687, - "learning_rate": 1.9576230395387403e-05, - "loss": 0.3231, + "epoch": 0.1, + "grad_norm": 0.4981627376003498, + "learning_rate": 1.977165551465894e-05, + "loss": 0.342, "step": 2093 }, { - "epoch": 0.12, - "grad_norm": 0.507072225088975, - "learning_rate": 1.9575694241151737e-05, - "loss": 0.3042, + "epoch": 0.1, + "grad_norm": 0.7949045293190264, + "learning_rate": 1.9771339254344616e-05, + "loss": 0.4958, "step": 2094 }, { - "epoch": 0.12, - "grad_norm": 0.4229706741547905, - "learning_rate": 1.9575157755310193e-05, - "loss": 0.3297, + "epoch": 0.1, + "grad_norm": 0.37134000339919704, + "learning_rate": 1.977102277770226e-05, + "loss": 0.2488, "step": 2095 }, { - "epoch": 0.12, - "grad_norm": 1.2787555528563692, - "learning_rate": 1.957462093788135e-05, - "loss": 0.677, + "epoch": 0.1, + "grad_norm": 1.1299938719523501, + "learning_rate": 1.9770706084738884e-05, + "loss": 0.6859, "step": 2096 }, { - "epoch": 0.12, - "grad_norm": 0.40537782087377044, - "learning_rate": 1.957408378888379e-05, - "loss": 0.245, + "epoch": 0.1, + "grad_norm": 0.48977463551737904, + "learning_rate": 1.9770389175461485e-05, + "loss": 0.3657, "step": 2097 }, { - "epoch": 0.12, - "grad_norm": 0.6392618964501624, - "learning_rate": 1.957354630833612e-05, - "loss": 0.4378, + "epoch": 0.1, + "grad_norm": 0.4427595300958791, + "learning_rate": 1.9770072049877087e-05, + "loss": 0.2127, "step": 2098 }, { - "epoch": 0.12, - "grad_norm": 0.44083595616310706, - "learning_rate": 1.957300849625696e-05, - "loss": 0.3578, + "epoch": 0.1, + "grad_norm": 0.40201157488549977, + "learning_rate": 1.9769754707992712e-05, + "loss": 0.2358, "step": 2099 }, { - "epoch": 0.12, - "grad_norm": 0.277591819404161, - "learning_rate": 1.9572470352664923e-05, - "loss": 0.1437, + "epoch": 0.1, + "grad_norm": 0.46702125298681396, + "learning_rate": 1.9769437149815384e-05, + "loss": 0.4077, "step": 2100 }, { - "epoch": 0.12, - "grad_norm": 0.6445522078777529, - "learning_rate": 1.957193187757865e-05, - "loss": 0.4184, + "epoch": 0.1, + "grad_norm": 0.4040077546107262, + "learning_rate": 1.9769119375352133e-05, + "loss": 0.211, "step": 2101 }, { - "epoch": 0.12, - "grad_norm": 1.06003956945609, - "learning_rate": 1.9571393071016785e-05, - "loss": 0.7141, + "epoch": 0.1, + "grad_norm": 1.4289465176348344, + "learning_rate": 1.9768801384609996e-05, + "loss": 0.6883, "step": 2102 }, { - "epoch": 0.12, - "grad_norm": 0.3327901938604386, - "learning_rate": 1.9570853932997993e-05, - "loss": 0.2564, + "epoch": 0.1, + "grad_norm": 0.5395786803833855, + "learning_rate": 1.9768483177596008e-05, + "loss": 0.3235, "step": 2103 }, { - "epoch": 0.12, - "grad_norm": 0.5349142642984711, - "learning_rate": 1.957031446354094e-05, - "loss": 0.4241, + "epoch": 0.1, + "grad_norm": 0.39806268988383775, + "learning_rate": 1.9768164754317222e-05, + "loss": 0.2221, "step": 2104 }, { - "epoch": 0.12, - "grad_norm": 0.7450362254049, - "learning_rate": 1.9569774662664306e-05, - "loss": 0.5647, + "epoch": 0.1, + "grad_norm": 0.36788733168080495, + "learning_rate": 1.976784611478068e-05, + "loss": 0.3318, "step": 2105 }, { - "epoch": 0.12, - "grad_norm": 0.2616609861296739, - "learning_rate": 1.9569234530386792e-05, - "loss": 0.1297, + "epoch": 0.1, + "grad_norm": 0.8086084709742454, + "learning_rate": 1.976752725899344e-05, + "loss": 0.5056, "step": 2106 }, { - "epoch": 0.12, - "grad_norm": 0.4211284765636636, - "learning_rate": 1.9568694066727095e-05, - "loss": 0.311, + "epoch": 0.1, + "grad_norm": 0.554104515266716, + "learning_rate": 1.976720818696256e-05, + "loss": 0.3109, "step": 2107 }, { - "epoch": 0.12, - "grad_norm": 1.4618973142603389, - "learning_rate": 1.9568153271703932e-05, - "loss": 0.8132, + "epoch": 0.1, + "grad_norm": 0.456639022945078, + "learning_rate": 1.9766888898695105e-05, + "loss": 0.2955, "step": 2108 }, { - "epoch": 0.12, - "grad_norm": 0.738279974487151, - "learning_rate": 1.9567612145336036e-05, - "loss": 0.4929, + "epoch": 0.1, + "grad_norm": 1.5126332944899685, + "learning_rate": 1.9766569394198143e-05, + "loss": 0.8181, "step": 2109 }, { - "epoch": 0.12, - "grad_norm": 0.44960921087855577, - "learning_rate": 1.9567070687642142e-05, - "loss": 0.317, + "epoch": 0.1, + "grad_norm": 0.47664182530428684, + "learning_rate": 1.976624967347875e-05, + "loss": 0.2993, "step": 2110 }, { - "epoch": 0.12, - "grad_norm": 0.4821347287488108, - "learning_rate": 1.9566528898641e-05, - "loss": 0.3665, + "epoch": 0.1, + "grad_norm": 0.6344329945938374, + "learning_rate": 1.9765929736544e-05, + "loss": 0.3781, "step": 2111 }, { - "epoch": 0.12, - "grad_norm": 0.32569543755682334, - "learning_rate": 1.9565986778351376e-05, - "loss": 0.1666, + "epoch": 0.1, + "grad_norm": 0.4082381123444978, + "learning_rate": 1.9765609583400977e-05, + "loss": 0.2751, "step": 2112 }, { - "epoch": 0.12, - "grad_norm": 0.48347428505913215, - "learning_rate": 1.9565444326792038e-05, - "loss": 0.2783, + "epoch": 0.1, + "grad_norm": 0.5200583902347743, + "learning_rate": 1.9765289214056774e-05, + "loss": 0.3014, "step": 2113 }, { - "epoch": 0.12, - "grad_norm": 1.5037634230328805, - "learning_rate": 1.9564901543981776e-05, - "loss": 0.7917, + "epoch": 0.1, + "grad_norm": 1.0514423799906136, + "learning_rate": 1.9764968628518475e-05, + "loss": 0.4267, "step": 2114 }, { - "epoch": 0.12, - "grad_norm": 0.3697719835295979, - "learning_rate": 1.9564358429939386e-05, - "loss": 0.3053, + "epoch": 0.1, + "grad_norm": 0.9210822209678126, + "learning_rate": 1.9764647826793185e-05, + "loss": 0.6372, "step": 2115 }, { - "epoch": 0.12, - "grad_norm": 0.5447348890954887, - "learning_rate": 1.9563814984683674e-05, - "loss": 0.3506, + "epoch": 0.1, + "grad_norm": 0.44777294294102676, + "learning_rate": 1.9764326808888002e-05, + "loss": 0.3094, "step": 2116 }, { - "epoch": 0.12, - "grad_norm": 1.304263404947526, - "learning_rate": 1.9563271208233462e-05, - "loss": 0.8144, + "epoch": 0.1, + "grad_norm": 0.4648348455228835, + "learning_rate": 1.9764005574810035e-05, + "loss": 0.3278, "step": 2117 }, { - "epoch": 0.12, - "grad_norm": 0.3159438724391187, - "learning_rate": 1.9562727100607577e-05, - "loss": 0.1621, + "epoch": 0.1, + "grad_norm": 0.41801974858853064, + "learning_rate": 1.9763684124566395e-05, + "loss": 0.2601, "step": 2118 }, { - "epoch": 0.12, - "grad_norm": 0.41587057997986904, - "learning_rate": 1.956218266182486e-05, - "loss": 0.2664, + "epoch": 0.1, + "grad_norm": 0.722316656245178, + "learning_rate": 1.97633624581642e-05, + "loss": 0.2869, "step": 2119 }, { - "epoch": 0.12, - "grad_norm": 1.0253769563866983, - "learning_rate": 1.9561637891904176e-05, - "loss": 0.568, + "epoch": 0.1, + "grad_norm": 0.4596482776364018, + "learning_rate": 1.9763040575610567e-05, + "loss": 0.3356, "step": 2120 }, { - "epoch": 0.12, - "grad_norm": 0.6434762446373379, - "learning_rate": 1.9561092790864376e-05, - "loss": 0.4318, + "epoch": 0.1, + "grad_norm": 0.5270898639748991, + "learning_rate": 1.976271847691263e-05, + "loss": 0.3632, "step": 2121 }, { - "epoch": 0.12, - "grad_norm": 0.5164150783625712, - "learning_rate": 1.956054735872435e-05, - "loss": 0.3499, + "epoch": 0.1, + "grad_norm": 0.5050544539553165, + "learning_rate": 1.9762396162077513e-05, + "loss": 0.3335, "step": 2122 }, { - "epoch": 0.12, - "grad_norm": 0.437135421408849, - "learning_rate": 1.9560001595502978e-05, - "loss": 0.3035, + "epoch": 0.1, + "grad_norm": 0.4876806477381173, + "learning_rate": 1.9762073631112354e-05, + "loss": 0.4309, "step": 2123 }, { - "epoch": 0.12, - "grad_norm": 0.3042150377766891, - "learning_rate": 1.955945550121916e-05, - "loss": 0.1825, + "epoch": 0.1, + "grad_norm": 0.40867237784946964, + "learning_rate": 1.9761750884024297e-05, + "loss": 0.2297, "step": 2124 }, { - "epoch": 0.12, - "grad_norm": 0.506806034858422, - "learning_rate": 1.9558909075891812e-05, - "loss": 0.3272, + "epoch": 0.1, + "grad_norm": 0.6079082435175089, + "learning_rate": 1.976142792082048e-05, + "loss": 0.2331, "step": 2125 }, { - "epoch": 0.12, - "grad_norm": 0.47732056135622875, - "learning_rate": 1.955836231953985e-05, - "loss": 0.3269, + "epoch": 0.1, + "grad_norm": 0.6431651858303786, + "learning_rate": 1.976110474150806e-05, + "loss": 0.3978, "step": 2126 }, { - "epoch": 0.12, - "grad_norm": 0.968804033210661, - "learning_rate": 1.9557815232182216e-05, - "loss": 0.4565, + "epoch": 0.1, + "grad_norm": 0.7325508129241449, + "learning_rate": 1.9760781346094183e-05, + "loss": 0.4293, "step": 2127 }, { - "epoch": 0.12, - "grad_norm": 0.3925570608975105, - "learning_rate": 1.9557267813837848e-05, - "loss": 0.3436, + "epoch": 0.1, + "grad_norm": 0.4172392382591397, + "learning_rate": 1.9760457734586017e-05, + "loss": 0.3331, "step": 2128 }, { - "epoch": 0.12, - "grad_norm": 0.9226390952793319, - "learning_rate": 1.955672006452571e-05, - "loss": 0.4685, + "epoch": 0.1, + "grad_norm": 0.4303314234027147, + "learning_rate": 1.9760133906990728e-05, + "loss": 0.3529, "step": 2129 }, { - "epoch": 0.12, - "grad_norm": 0.4344185349778874, - "learning_rate": 1.9556171984264765e-05, - "loss": 0.2778, + "epoch": 0.1, + "grad_norm": 0.28199684538308967, + "learning_rate": 1.9759809863315477e-05, + "loss": 0.0994, "step": 2130 }, { - "epoch": 0.12, - "grad_norm": 0.3822100632725823, - "learning_rate": 1.9555623573073994e-05, - "loss": 0.2504, + "epoch": 0.1, + "grad_norm": 0.5277874028977486, + "learning_rate": 1.9759485603567447e-05, + "loss": 0.2926, "step": 2131 }, { - "epoch": 0.12, - "grad_norm": 1.166974059789117, - "learning_rate": 1.955507483097239e-05, - "loss": 0.548, + "epoch": 0.1, + "grad_norm": 0.6983155816937586, + "learning_rate": 1.9759161127753804e-05, + "loss": 0.4114, "step": 2132 }, { - "epoch": 0.12, - "grad_norm": 0.7183519526044224, - "learning_rate": 1.9554525757978958e-05, - "loss": 0.4007, + "epoch": 0.1, + "grad_norm": 0.5376766004189407, + "learning_rate": 1.9758836435881745e-05, + "loss": 0.3788, "step": 2133 }, { - "epoch": 0.12, - "grad_norm": 0.40625089003812276, - "learning_rate": 1.955397635411271e-05, - "loss": 0.3013, + "epoch": 0.1, + "grad_norm": 0.42761906447658266, + "learning_rate": 1.9758511527958456e-05, + "loss": 0.2919, "step": 2134 }, { - "epoch": 0.12, - "grad_norm": 0.6427276061652427, - "learning_rate": 1.955342661939267e-05, - "loss": 0.4044, + "epoch": 0.1, + "grad_norm": 1.3183544276901813, + "learning_rate": 1.9758186403991122e-05, + "loss": 0.7132, "step": 2135 }, { - "epoch": 0.12, - "grad_norm": 0.7481016203756692, - "learning_rate": 1.9552876553837878e-05, - "loss": 0.2465, + "epoch": 0.1, + "grad_norm": 0.3789106890907285, + "learning_rate": 1.975786106398695e-05, + "loss": 0.2787, "step": 2136 }, { - "epoch": 0.12, - "grad_norm": 0.4078942359314576, - "learning_rate": 1.955232615746738e-05, - "loss": 0.2344, + "epoch": 0.1, + "grad_norm": 0.2955599002249521, + "learning_rate": 1.9757535507953133e-05, + "loss": 0.1185, "step": 2137 }, { - "epoch": 0.12, - "grad_norm": 0.9841112838622158, - "learning_rate": 1.9551775430300238e-05, - "loss": 0.4383, + "epoch": 0.1, + "grad_norm": 0.5656064428404165, + "learning_rate": 1.975720973589689e-05, + "loss": 0.3506, "step": 2138 }, { - "epoch": 0.12, - "grad_norm": 0.5783608548387257, - "learning_rate": 1.9551224372355523e-05, - "loss": 0.2947, + "epoch": 0.1, + "grad_norm": 0.6859807498607633, + "learning_rate": 1.9756883747825424e-05, + "loss": 0.5143, "step": 2139 }, { - "epoch": 0.12, - "grad_norm": 0.513156088021875, - "learning_rate": 1.955067298365232e-05, - "loss": 0.3523, + "epoch": 0.1, + "grad_norm": 0.3989102319005813, + "learning_rate": 1.975655754374596e-05, + "loss": 0.2443, "step": 2140 }, { - "epoch": 0.12, - "grad_norm": 1.4460222044906592, - "learning_rate": 1.9550121264209724e-05, - "loss": 0.8275, + "epoch": 0.1, + "grad_norm": 0.5420401994781315, + "learning_rate": 1.975623112366571e-05, + "loss": 0.3969, "step": 2141 }, { - "epoch": 0.12, - "grad_norm": 0.47796263241095566, - "learning_rate": 1.9549569214046837e-05, - "loss": 0.261, + "epoch": 0.1, + "grad_norm": 0.481622320257203, + "learning_rate": 1.9755904487591907e-05, + "loss": 0.2168, "step": 2142 }, { - "epoch": 0.12, - "grad_norm": 0.5068551991890565, - "learning_rate": 1.954901683318278e-05, - "loss": 0.2742, + "epoch": 0.1, + "grad_norm": 0.33653438827375337, + "learning_rate": 1.9755577635531783e-05, + "loss": 0.1599, "step": 2143 }, { - "epoch": 0.12, - "grad_norm": 0.7854309469545647, - "learning_rate": 1.9548464121636678e-05, - "loss": 0.39, + "epoch": 0.1, + "grad_norm": 0.45511025554911416, + "learning_rate": 1.9755250567492573e-05, + "loss": 0.3386, "step": 2144 }, { - "epoch": 0.12, - "grad_norm": 0.9562992698117687, - "learning_rate": 1.954791107942768e-05, - "loss": 0.4997, + "epoch": 0.1, + "grad_norm": 1.0130892911225096, + "learning_rate": 1.975492328348152e-05, + "loss": 0.5371, "step": 2145 }, { - "epoch": 0.12, - "grad_norm": 0.49884170414180384, - "learning_rate": 1.9547357706574926e-05, - "loss": 0.2523, + "epoch": 0.1, + "grad_norm": 0.5973837723028796, + "learning_rate": 1.9754595783505864e-05, + "loss": 0.4098, "step": 2146 }, { - "epoch": 0.12, - "grad_norm": 0.43289131925381763, - "learning_rate": 1.9546804003097588e-05, - "loss": 0.3589, + "epoch": 0.1, + "grad_norm": 0.4891071058211198, + "learning_rate": 1.975426806757286e-05, + "loss": 0.2778, "step": 2147 }, { - "epoch": 0.12, - "grad_norm": 1.1386748667131321, - "learning_rate": 1.9546249969014836e-05, - "loss": 0.673, + "epoch": 0.1, + "grad_norm": 0.46478000544467324, + "learning_rate": 1.9753940135689762e-05, + "loss": 0.3744, "step": 2148 }, { - "epoch": 0.12, - "grad_norm": 0.478383355031457, - "learning_rate": 1.954569560434586e-05, - "loss": 0.283, + "epoch": 0.1, + "grad_norm": 0.34344842587494384, + "learning_rate": 1.975361198786383e-05, + "loss": 0.1672, "step": 2149 }, { - "epoch": 0.12, - "grad_norm": 0.5539994701491447, - "learning_rate": 1.9545140909109854e-05, - "loss": 0.3762, + "epoch": 0.1, + "grad_norm": 0.6224444421866723, + "learning_rate": 1.975328362410233e-05, + "loss": 0.3083, "step": 2150 }, { - "epoch": 0.12, - "grad_norm": 0.35583342252654265, - "learning_rate": 1.954458588332603e-05, - "loss": 0.168, + "epoch": 0.1, + "grad_norm": 1.6624986319441464, + "learning_rate": 1.975295504441253e-05, + "loss": 0.5495, "step": 2151 }, { - "epoch": 0.12, - "grad_norm": 0.47482346170901524, - "learning_rate": 1.9544030527013603e-05, - "loss": 0.236, + "epoch": 0.1, + "grad_norm": 0.4083532970107414, + "learning_rate": 1.9752626248801707e-05, + "loss": 0.3145, "step": 2152 }, { - "epoch": 0.12, - "grad_norm": 1.488695547220567, - "learning_rate": 1.9543474840191817e-05, - "loss": 0.7431, + "epoch": 0.1, + "grad_norm": 0.6140521327996014, + "learning_rate": 1.9752297237277136e-05, + "loss": 0.2937, "step": 2153 }, { - "epoch": 0.12, - "grad_norm": 0.4557534247583365, - "learning_rate": 1.9542918822879902e-05, - "loss": 0.3779, + "epoch": 0.1, + "grad_norm": 0.5924161105777705, + "learning_rate": 1.9751968009846107e-05, + "loss": 0.2758, "step": 2154 }, { - "epoch": 0.12, - "grad_norm": 0.33288832250828193, - "learning_rate": 1.954236247509712e-05, - "loss": 0.2592, + "epoch": 0.1, + "grad_norm": 0.4711705892669173, + "learning_rate": 1.9751638566515903e-05, + "loss": 0.2411, "step": 2155 }, { - "epoch": 0.12, - "grad_norm": 0.4796173843723215, - "learning_rate": 1.954180579686274e-05, - "loss": 0.3337, + "epoch": 0.1, + "grad_norm": 0.5645447959083201, + "learning_rate": 1.975130890729382e-05, + "loss": 0.2908, "step": 2156 }, { - "epoch": 0.12, - "grad_norm": 0.4089067855760762, - "learning_rate": 1.954124878819603e-05, - "loss": 0.2759, + "epoch": 0.1, + "grad_norm": 1.8859694042073942, + "learning_rate": 1.9750979032187157e-05, + "loss": 0.5671, "step": 2157 }, { - "epoch": 0.12, - "grad_norm": 0.4690219866747868, - "learning_rate": 1.9540691449116286e-05, - "loss": 0.2872, + "epoch": 0.1, + "grad_norm": 1.0915836262542027, + "learning_rate": 1.9750648941203216e-05, + "loss": 0.536, "step": 2158 }, { - "epoch": 0.12, - "grad_norm": 0.445945068109367, - "learning_rate": 1.9540133779642806e-05, - "loss": 0.3255, + "epoch": 0.1, + "grad_norm": 0.48920467992871297, + "learning_rate": 1.9750318634349302e-05, + "loss": 0.3078, "step": 2159 }, { - "epoch": 0.12, - "grad_norm": 0.620237870223999, - "learning_rate": 1.9539575779794906e-05, - "loss": 0.4513, + "epoch": 0.1, + "grad_norm": 0.5036544758332026, + "learning_rate": 1.9749988111632735e-05, + "loss": 0.2984, "step": 2160 }, { - "epoch": 0.12, - "grad_norm": 0.4622798158558348, - "learning_rate": 1.9539017449591905e-05, - "loss": 0.3245, + "epoch": 0.1, + "grad_norm": 0.5472862968831728, + "learning_rate": 1.974965737306083e-05, + "loss": 0.2938, "step": 2161 }, { - "epoch": 0.12, - "grad_norm": 0.40301614161542826, - "learning_rate": 1.9538458789053143e-05, - "loss": 0.3361, + "epoch": 0.1, + "grad_norm": 0.7784049130906795, + "learning_rate": 1.9749326418640903e-05, + "loss": 0.3686, "step": 2162 }, { - "epoch": 0.12, - "grad_norm": 0.4533281560660749, - "learning_rate": 1.9537899798197963e-05, - "loss": 0.2963, + "epoch": 0.1, + "grad_norm": 1.9306301938359, + "learning_rate": 1.974899524838029e-05, + "loss": 0.518, "step": 2163 }, { - "epoch": 0.12, - "grad_norm": 0.4209930222368525, - "learning_rate": 1.953734047704572e-05, - "loss": 0.281, + "epoch": 0.1, + "grad_norm": 0.4303476937648894, + "learning_rate": 1.9748663862286315e-05, + "loss": 0.3009, "step": 2164 }, { - "epoch": 0.12, - "grad_norm": 0.36565295658400504, - "learning_rate": 1.9536780825615788e-05, - "loss": 0.2151, + "epoch": 0.1, + "grad_norm": 0.41065126036491767, + "learning_rate": 1.9748332260366316e-05, + "loss": 0.2957, "step": 2165 }, { - "epoch": 0.12, - "grad_norm": 0.4422205941031057, - "learning_rate": 1.9536220843927544e-05, - "loss": 0.3669, + "epoch": 0.1, + "grad_norm": 1.2998310779507305, + "learning_rate": 1.974800044262764e-05, + "loss": 0.3834, "step": 2166 }, { - "epoch": 0.12, - "grad_norm": 0.4385665259665185, - "learning_rate": 1.953566053200039e-05, - "loss": 0.3155, + "epoch": 0.1, + "grad_norm": 1.251330312265991, + "learning_rate": 1.9747668409077628e-05, + "loss": 0.4423, "step": 2167 }, { - "epoch": 0.12, - "grad_norm": 0.6424769906790054, - "learning_rate": 1.953509988985371e-05, - "loss": 0.4169, + "epoch": 0.1, + "grad_norm": 0.5378470923839239, + "learning_rate": 1.9747336159723636e-05, + "loss": 0.3399, "step": 2168 }, { - "epoch": 0.12, - "grad_norm": 1.194460975030824, - "learning_rate": 1.953453891750694e-05, - "loss": 0.7403, - "step": 2169 + "epoch": 0.1, + "grad_norm": 0.42478663739553907, + "learning_rate": 1.974700369457301e-05, + "loss": 0.2637, + "step": 2169 }, { - "epoch": 0.12, - "grad_norm": 0.3717051558823366, - "learning_rate": 1.9533977614979493e-05, - "loss": 0.2938, + "epoch": 0.1, + "grad_norm": 0.476230097921976, + "learning_rate": 1.9746671013633123e-05, + "loss": 0.2838, "step": 2170 }, { - "epoch": 0.12, - "grad_norm": 0.3201587539393018, - "learning_rate": 1.9533415982290813e-05, - "loss": 0.2192, + "epoch": 0.1, + "grad_norm": 0.5707161414998441, + "learning_rate": 1.974633811691133e-05, + "loss": 0.3511, "step": 2171 }, { - "epoch": 0.12, - "grad_norm": 0.7368872184262916, - "learning_rate": 1.9532854019460346e-05, - "loss": 0.4229, + "epoch": 0.1, + "grad_norm": 0.5050082456780789, + "learning_rate": 1.9746005004415004e-05, + "loss": 0.3462, "step": 2172 }, { - "epoch": 0.12, - "grad_norm": 0.44181930693740384, - "learning_rate": 1.9532291726507557e-05, - "loss": 0.2848, + "epoch": 0.1, + "grad_norm": 1.1402183452298285, + "learning_rate": 1.974567167615152e-05, + "loss": 0.4573, "step": 2173 }, { - "epoch": 0.12, - "grad_norm": 0.4664201908407062, - "learning_rate": 1.9531729103451912e-05, - "loss": 0.35, + "epoch": 0.1, + "grad_norm": 0.45057377023130346, + "learning_rate": 1.9745338132128262e-05, + "loss": 0.3138, "step": 2174 }, { - "epoch": 0.12, - "grad_norm": 1.454446688689338, - "learning_rate": 1.9531166150312902e-05, - "loss": 0.605, + "epoch": 0.1, + "grad_norm": 0.6112395290137412, + "learning_rate": 1.9745004372352606e-05, + "loss": 0.4187, "step": 2175 }, { - "epoch": 0.13, - "grad_norm": 0.46776768368543914, - "learning_rate": 1.9530602867110015e-05, - "loss": 0.2971, + "epoch": 0.1, + "grad_norm": 0.36464829458990694, + "learning_rate": 1.974467039683195e-05, + "loss": 0.1944, "step": 2176 }, { - "epoch": 0.13, - "grad_norm": 0.3477231628548521, - "learning_rate": 1.953003925386276e-05, - "loss": 0.2378, + "epoch": 0.1, + "grad_norm": 0.432123913201605, + "learning_rate": 1.974433620557368e-05, + "loss": 0.276, "step": 2177 }, { - "epoch": 0.13, - "grad_norm": 0.5094617271180392, - "learning_rate": 1.9529475310590656e-05, - "loss": 0.3366, + "epoch": 0.1, + "grad_norm": 1.147450516210223, + "learning_rate": 1.97440017985852e-05, + "loss": 0.5794, "step": 2178 }, { - "epoch": 0.13, - "grad_norm": 0.46931079454798147, - "learning_rate": 1.9528911037313233e-05, - "loss": 0.3177, + "epoch": 0.1, + "grad_norm": 0.7020237605619556, + "learning_rate": 1.9743667175873908e-05, + "loss": 0.3555, "step": 2179 }, { - "epoch": 0.13, - "grad_norm": 1.178746644251515, - "learning_rate": 1.952834643405003e-05, - "loss": 0.6243, + "epoch": 0.1, + "grad_norm": 0.4817949909637308, + "learning_rate": 1.9743332337447222e-05, + "loss": 0.3336, "step": 2180 }, { - "epoch": 0.13, - "grad_norm": 1.9884192839837473, - "learning_rate": 1.95277815008206e-05, - "loss": 0.5839, + "epoch": 0.1, + "grad_norm": 1.2154682722530228, + "learning_rate": 1.9742997283312543e-05, + "loss": 0.7213, "step": 2181 }, { - "epoch": 0.13, - "grad_norm": 0.4751766716695899, - "learning_rate": 1.9527216237644508e-05, - "loss": 0.2844, + "epoch": 0.1, + "grad_norm": 0.35944610430046714, + "learning_rate": 1.97426620134773e-05, + "loss": 0.2309, "step": 2182 }, { - "epoch": 0.13, - "grad_norm": 0.587711364919638, - "learning_rate": 1.9526650644541326e-05, - "loss": 0.3622, + "epoch": 0.1, + "grad_norm": 0.419296109841567, + "learning_rate": 1.9742326527948904e-05, + "loss": 0.2486, "step": 2183 }, { - "epoch": 0.13, - "grad_norm": 0.8459515983868039, - "learning_rate": 1.952608472153064e-05, - "loss": 0.3906, + "epoch": 0.1, + "grad_norm": 0.6056328246067001, + "learning_rate": 1.9741990826734793e-05, + "loss": 0.3643, "step": 2184 }, { - "epoch": 0.13, - "grad_norm": 0.4486221294280846, - "learning_rate": 1.952551846863205e-05, - "loss": 0.2203, + "epoch": 0.1, + "grad_norm": 0.9564733493580043, + "learning_rate": 1.9741654909842394e-05, + "loss": 0.4695, "step": 2185 }, { - "epoch": 0.13, - "grad_norm": 0.6553354179919787, - "learning_rate": 1.9524951885865165e-05, - "loss": 0.3764, + "epoch": 0.1, + "grad_norm": 0.5255979956132542, + "learning_rate": 1.9741318777279143e-05, + "loss": 0.2375, "step": 2186 }, { - "epoch": 0.13, - "grad_norm": 1.830584238414141, - "learning_rate": 1.952438497324961e-05, - "loss": 0.7863, + "epoch": 0.1, + "grad_norm": 1.533314518458618, + "learning_rate": 1.974098242905248e-05, + "loss": 0.7751, "step": 2187 }, { - "epoch": 0.13, - "grad_norm": 0.5252954020431466, - "learning_rate": 1.9523817730805008e-05, - "loss": 0.2538, + "epoch": 0.1, + "grad_norm": 0.3643456814371708, + "learning_rate": 1.974064586516986e-05, + "loss": 0.2498, "step": 2188 }, { - "epoch": 0.13, - "grad_norm": 0.8584255441697742, - "learning_rate": 1.952325015855101e-05, - "loss": 0.5336, + "epoch": 0.1, + "grad_norm": 0.3337031822908557, + "learning_rate": 1.9740309085638727e-05, + "loss": 0.1804, "step": 2189 }, { - "epoch": 0.13, - "grad_norm": 0.3471580515229295, - "learning_rate": 1.9522682256507268e-05, - "loss": 0.2511, + "epoch": 0.1, + "grad_norm": 0.968982998919056, + "learning_rate": 1.9739972090466538e-05, + "loss": 0.5333, "step": 2190 }, { - "epoch": 0.13, - "grad_norm": 0.4088705601951199, - "learning_rate": 1.952211402469345e-05, - "loss": 0.2464, + "epoch": 0.1, + "grad_norm": 0.6593580144607655, + "learning_rate": 1.9739634879660758e-05, + "loss": 0.3819, "step": 2191 }, { - "epoch": 0.13, - "grad_norm": 1.2705970535049596, - "learning_rate": 1.952154546312923e-05, - "loss": 0.6611, + "epoch": 0.1, + "grad_norm": 0.4545542454932407, + "learning_rate": 1.9739297453228842e-05, + "loss": 0.2464, "step": 2192 }, { - "epoch": 0.13, - "grad_norm": 0.6454425353434776, - "learning_rate": 1.9520976571834304e-05, - "loss": 0.4209, + "epoch": 0.1, + "grad_norm": 1.69648692975947, + "learning_rate": 1.9738959811178273e-05, + "loss": 0.8943, "step": 2193 }, { - "epoch": 0.13, - "grad_norm": 0.44634869722763765, - "learning_rate": 1.9520407350828364e-05, - "loss": 0.2355, + "epoch": 0.1, + "grad_norm": 3.8701638473312925, + "learning_rate": 1.9738621953516517e-05, + "loss": 0.2653, "step": 2194 }, { - "epoch": 0.13, - "grad_norm": 0.7939220131831405, - "learning_rate": 1.9519837800131127e-05, - "loss": 0.4377, + "epoch": 0.1, + "grad_norm": 0.42370326478041115, + "learning_rate": 1.973828388025106e-05, + "loss": 0.2296, "step": 2195 }, { - "epoch": 0.13, - "grad_norm": 0.3147220383048209, - "learning_rate": 1.9519267919762318e-05, - "loss": 0.1702, + "epoch": 0.1, + "grad_norm": 0.9719842354945029, + "learning_rate": 1.9737945591389384e-05, + "loss": 0.3647, "step": 2196 }, { - "epoch": 0.13, - "grad_norm": 0.703105533799731, - "learning_rate": 1.951869770974167e-05, - "loss": 0.3809, + "epoch": 0.1, + "grad_norm": 1.3562021016350019, + "learning_rate": 1.9737607086938975e-05, + "loss": 0.5287, "step": 2197 }, { - "epoch": 0.13, - "grad_norm": 0.47335849847144024, - "learning_rate": 1.951812717008893e-05, - "loss": 0.2881, + "epoch": 0.1, + "grad_norm": 0.5507630385567706, + "learning_rate": 1.9737268366907328e-05, + "loss": 0.2891, "step": 2198 }, { - "epoch": 0.13, - "grad_norm": 1.1606156210211465, - "learning_rate": 1.951755630082386e-05, - "loss": 0.7156, + "epoch": 0.1, + "grad_norm": 1.5700491189311163, + "learning_rate": 1.973692943130195e-05, + "loss": 0.6074, "step": 2199 }, { - "epoch": 0.13, - "grad_norm": 0.45947436250339385, - "learning_rate": 1.9516985101966218e-05, - "loss": 0.3269, + "epoch": 0.1, + "grad_norm": 0.8445721338943345, + "learning_rate": 1.973659028013034e-05, + "loss": 0.3308, "step": 2200 }, { - "epoch": 0.13, - "grad_norm": 0.43358484416349463, - "learning_rate": 1.9516413573535794e-05, - "loss": 0.3523, + "epoch": 0.1, + "grad_norm": 0.5566301262255688, + "learning_rate": 1.97362509134e-05, + "loss": 0.3363, "step": 2201 }, { - "epoch": 0.13, - "grad_norm": 0.5472429483088781, - "learning_rate": 1.9515841715552376e-05, - "loss": 0.3946, + "epoch": 0.1, + "grad_norm": 1.0507449152666521, + "learning_rate": 1.9735911331118452e-05, + "loss": 0.3163, "step": 2202 }, { - "epoch": 0.13, - "grad_norm": 0.34276882389222835, - "learning_rate": 1.951526952803577e-05, - "loss": 0.1918, + "epoch": 0.1, + "grad_norm": 0.704474598330572, + "learning_rate": 1.973557153329321e-05, + "loss": 0.3802, "step": 2203 }, { - "epoch": 0.13, - "grad_norm": 0.5604731992436495, - "learning_rate": 1.951469701100579e-05, - "loss": 0.3188, + "epoch": 0.1, + "grad_norm": 0.680440487823801, + "learning_rate": 1.9735231519931796e-05, + "loss": 0.3331, "step": 2204 }, { - "epoch": 0.13, - "grad_norm": 0.5485943753095354, - "learning_rate": 1.951412416448226e-05, - "loss": 0.4226, + "epoch": 0.1, + "grad_norm": 1.0006056529581375, + "learning_rate": 1.973489129104174e-05, + "loss": 0.3693, "step": 2205 }, { - "epoch": 0.13, - "grad_norm": 0.38949595158304373, - "learning_rate": 1.951355098848502e-05, - "loss": 0.3067, + "epoch": 0.1, + "grad_norm": 0.8237427228004932, + "learning_rate": 1.9734550846630575e-05, + "loss": 0.494, "step": 2206 }, { - "epoch": 0.13, - "grad_norm": 0.5935212725749861, - "learning_rate": 1.9512977483033916e-05, - "loss": 0.4256, + "epoch": 0.1, + "grad_norm": 0.6219840196045429, + "learning_rate": 1.9734210186705837e-05, + "loss": 0.3057, "step": 2207 }, { - "epoch": 0.13, - "grad_norm": 0.3030094098212264, - "learning_rate": 1.9512403648148813e-05, - "loss": 0.1428, + "epoch": 0.1, + "grad_norm": 0.5726745966277148, + "learning_rate": 1.9733869311275063e-05, + "loss": 0.3983, "step": 2208 }, { - "epoch": 0.13, - "grad_norm": 0.3956227850830546, - "learning_rate": 1.951182948384958e-05, - "loss": 0.2861, + "epoch": 0.1, + "grad_norm": 0.5883624012694447, + "learning_rate": 1.973352822034581e-05, + "loss": 0.0855, "step": 2209 }, { - "epoch": 0.13, - "grad_norm": 0.4697965880003851, - "learning_rate": 1.9511254990156103e-05, - "loss": 0.3211, + "epoch": 0.1, + "grad_norm": 0.4618386052318375, + "learning_rate": 1.973318691392562e-05, + "loss": 0.2819, "step": 2210 }, { - "epoch": 0.13, - "grad_norm": 0.9172394638687885, - "learning_rate": 1.9510680167088275e-05, - "loss": 0.4232, + "epoch": 0.1, + "grad_norm": 0.6166845564878932, + "learning_rate": 1.9732845392022052e-05, + "loss": 0.4593, "step": 2211 }, { - "epoch": 0.13, - "grad_norm": 0.43535146422603604, - "learning_rate": 1.9510105014665998e-05, - "loss": 0.3297, + "epoch": 0.1, + "grad_norm": 0.5345335800266068, + "learning_rate": 1.9732503654642667e-05, + "loss": 0.3187, "step": 2212 }, { - "epoch": 0.13, - "grad_norm": 0.5773882269858571, - "learning_rate": 1.9509529532909196e-05, - "loss": 0.4388, + "epoch": 0.1, + "grad_norm": 0.4769875630202992, + "learning_rate": 1.9732161701795034e-05, + "loss": 0.3323, "step": 2213 }, { - "epoch": 0.13, - "grad_norm": 0.3137998718551292, - "learning_rate": 1.9508953721837795e-05, - "loss": 0.2522, + "epoch": 0.1, + "grad_norm": 0.4248093276101492, + "learning_rate": 1.973181953348672e-05, + "loss": 0.2411, "step": 2214 }, { - "epoch": 0.13, - "grad_norm": 0.35939192870248576, - "learning_rate": 1.9508377581471735e-05, - "loss": 0.1858, + "epoch": 0.1, + "grad_norm": 0.4968614159676831, + "learning_rate": 1.97314771497253e-05, + "loss": 0.309, "step": 2215 }, { - "epoch": 0.13, - "grad_norm": 0.5116306737278765, - "learning_rate": 1.9507801111830967e-05, - "loss": 0.3576, + "epoch": 0.1, + "grad_norm": 0.4524940166547822, + "learning_rate": 1.9731134550518355e-05, + "loss": 0.3231, "step": 2216 }, { - "epoch": 0.13, - "grad_norm": 0.5731809416440271, - "learning_rate": 1.950722431293546e-05, - "loss": 0.3353, + "epoch": 0.1, + "grad_norm": 1.4989127752968847, + "learning_rate": 1.9730791735873472e-05, + "loss": 0.8732, "step": 2217 }, { - "epoch": 0.13, - "grad_norm": 0.47304259161573736, - "learning_rate": 1.950664718480518e-05, - "loss": 0.3236, + "epoch": 0.1, + "grad_norm": 0.6223564638310751, + "learning_rate": 1.973044870579824e-05, + "loss": 0.4624, "step": 2218 }, { - "epoch": 0.13, - "grad_norm": 0.6276525740396246, - "learning_rate": 1.9506069727460116e-05, - "loss": 0.4444, + "epoch": 0.1, + "grad_norm": 0.4481740607186792, + "learning_rate": 1.973010546030025e-05, + "loss": 0.2929, "step": 2219 }, { - "epoch": 0.13, - "grad_norm": 1.090832100391312, - "learning_rate": 1.9505491940920268e-05, - "loss": 0.6109, + "epoch": 0.1, + "grad_norm": 0.6396224296821759, + "learning_rate": 1.9729761999387102e-05, + "loss": 0.3939, "step": 2220 }, { - "epoch": 0.13, - "grad_norm": 0.2481407623113843, - "learning_rate": 1.9504913825205643e-05, - "loss": 0.1352, + "epoch": 0.1, + "grad_norm": 0.5671267141358581, + "learning_rate": 1.9729418323066403e-05, + "loss": 0.1872, "step": 2221 }, { - "epoch": 0.13, - "grad_norm": 0.4448099721555093, - "learning_rate": 1.950433538033626e-05, - "loss": 0.3269, + "epoch": 0.1, + "grad_norm": 0.45026434296421153, + "learning_rate": 1.972907443134576e-05, + "loss": 0.2227, "step": 2222 }, { - "epoch": 0.13, - "grad_norm": 0.866009184797271, - "learning_rate": 1.950375660633215e-05, - "loss": 0.55, + "epoch": 0.1, + "grad_norm": 0.5685168347868768, + "learning_rate": 1.9728730324232782e-05, + "loss": 0.3887, "step": 2223 }, { - "epoch": 0.13, - "grad_norm": 0.4384851116001498, - "learning_rate": 1.950317750321336e-05, - "loss": 0.2842, + "epoch": 0.1, + "grad_norm": 0.7394597761937198, + "learning_rate": 1.9728386001735097e-05, + "loss": 0.4272, "step": 2224 }, { - "epoch": 0.13, - "grad_norm": 0.6313666828316659, - "learning_rate": 1.950259807099994e-05, - "loss": 0.3842, + "epoch": 0.1, + "grad_norm": 0.499493928200407, + "learning_rate": 1.9728041463860318e-05, + "loss": 0.2515, "step": 2225 }, { - "epoch": 0.13, - "grad_norm": 0.4331107293530284, - "learning_rate": 1.950201830971196e-05, - "loss": 0.3443, + "epoch": 0.1, + "grad_norm": 0.6790780673355199, + "learning_rate": 1.972769671061608e-05, + "loss": 0.4053, "step": 2226 }, { - "epoch": 0.13, - "grad_norm": 0.23533665688629704, - "learning_rate": 1.9501438219369492e-05, - "loss": 0.1392, + "epoch": 0.1, + "grad_norm": 0.4318757865373186, + "learning_rate": 1.972735174201001e-05, + "loss": 0.2584, "step": 2227 }, { - "epoch": 0.13, - "grad_norm": 0.6139970636009742, - "learning_rate": 1.9500857799992628e-05, - "loss": 0.4041, + "epoch": 0.1, + "grad_norm": 0.396611312679269, + "learning_rate": 1.972700655804975e-05, + "loss": 0.2247, "step": 2228 }, { - "epoch": 0.13, - "grad_norm": 0.48155996698312703, - "learning_rate": 1.9500277051601465e-05, - "loss": 0.3663, + "epoch": 0.1, + "grad_norm": 1.0111621756301359, + "learning_rate": 1.9726661158742938e-05, + "loss": 0.6302, "step": 2229 }, { - "epoch": 0.13, - "grad_norm": 0.4152605894643503, - "learning_rate": 1.949969597421612e-05, - "loss": 0.2154, + "epoch": 0.1, + "grad_norm": 0.9110780265475183, + "learning_rate": 1.9726315544097223e-05, + "loss": 0.5314, "step": 2230 }, { - "epoch": 0.13, - "grad_norm": 0.6361750555191724, - "learning_rate": 1.9499114567856708e-05, - "loss": 0.4438, + "epoch": 0.1, + "grad_norm": 0.4741694506451656, + "learning_rate": 1.9725969714120256e-05, + "loss": 0.2604, "step": 2231 }, { - "epoch": 0.13, - "grad_norm": 1.2016891543780488, - "learning_rate": 1.9498532832543372e-05, - "loss": 0.6434, + "epoch": 0.1, + "grad_norm": 0.6072701530107135, + "learning_rate": 1.9725623668819694e-05, + "loss": 0.3967, "step": 2232 }, { - "epoch": 0.13, - "grad_norm": 0.39561891747852557, - "learning_rate": 1.9497950768296246e-05, - "loss": 0.2549, + "epoch": 0.1, + "grad_norm": 0.3971734623387778, + "learning_rate": 1.9725277408203198e-05, + "loss": 0.1874, "step": 2233 }, { - "epoch": 0.13, - "grad_norm": 0.31869872802169985, - "learning_rate": 1.9497368375135497e-05, - "loss": 0.2302, + "epoch": 0.1, + "grad_norm": 0.5137023465851119, + "learning_rate": 1.9724930932278435e-05, + "loss": 0.3089, "step": 2234 }, { - "epoch": 0.13, - "grad_norm": 0.8584671892353136, - "learning_rate": 1.949678565308129e-05, - "loss": 0.4976, + "epoch": 0.1, + "grad_norm": 0.6769462325112753, + "learning_rate": 1.972458424105307e-05, + "loss": 0.325, "step": 2235 }, { - "epoch": 0.13, - "grad_norm": 0.8802500200900603, - "learning_rate": 1.9496202602153805e-05, - "loss": 0.4712, + "epoch": 0.1, + "grad_norm": 1.0178131297168105, + "learning_rate": 1.9724237334534786e-05, + "loss": 0.4665, "step": 2236 }, { - "epoch": 0.13, - "grad_norm": 0.35855578518631354, - "learning_rate": 1.949561922237323e-05, - "loss": 0.2521, + "epoch": 0.1, + "grad_norm": 0.4030555267181865, + "learning_rate": 1.972389021273126e-05, + "loss": 0.3009, "step": 2237 }, { - "epoch": 0.13, - "grad_norm": 0.5343010087481445, - "learning_rate": 1.9495035513759772e-05, - "loss": 0.386, + "epoch": 0.1, + "grad_norm": 0.7399447482774754, + "learning_rate": 1.9723542875650176e-05, + "loss": 0.4178, "step": 2238 }, { - "epoch": 0.13, - "grad_norm": 0.3763359873365377, - "learning_rate": 1.9494451476333637e-05, - "loss": 0.2417, + "epoch": 0.1, + "grad_norm": 0.35790169784215436, + "learning_rate": 1.9723195323299222e-05, + "loss": 0.2483, "step": 2239 }, { - "epoch": 0.13, - "grad_norm": 0.7266720426527591, - "learning_rate": 1.949386711011506e-05, - "loss": 0.3591, + "epoch": 0.1, + "grad_norm": 0.4964662695625661, + "learning_rate": 1.9722847555686094e-05, + "loss": 0.2842, "step": 2240 }, { - "epoch": 0.13, - "grad_norm": 0.3434275838027053, - "learning_rate": 1.9493282415124274e-05, - "loss": 0.279, + "epoch": 0.1, + "grad_norm": 0.5974334542618154, + "learning_rate": 1.9722499572818496e-05, + "loss": 0.3776, "step": 2241 }, { - "epoch": 0.13, - "grad_norm": 0.793014395205871, - "learning_rate": 1.9492697391381523e-05, - "loss": 0.3409, + "epoch": 0.1, + "grad_norm": 0.9527547463324139, + "learning_rate": 1.9722151374704128e-05, + "loss": 0.5839, "step": 2242 }, { - "epoch": 0.13, - "grad_norm": 0.4304945592429074, - "learning_rate": 1.949211203890707e-05, - "loss": 0.2789, + "epoch": 0.1, + "grad_norm": 0.46939326020246824, + "learning_rate": 1.97218029613507e-05, + "loss": 0.2844, "step": 2243 }, { - "epoch": 0.13, - "grad_norm": 0.8892252947519116, - "learning_rate": 1.949152635772119e-05, - "loss": 0.617, + "epoch": 0.1, + "grad_norm": 0.4265678947988853, + "learning_rate": 1.9721454332765918e-05, + "loss": 0.2808, "step": 2244 }, { - "epoch": 0.13, - "grad_norm": 0.362436878968347, - "learning_rate": 1.9490940347844156e-05, - "loss": 0.3207, + "epoch": 0.1, + "grad_norm": 0.34949276753249836, + "learning_rate": 1.972110548895751e-05, + "loss": 0.2108, "step": 2245 }, { - "epoch": 0.13, - "grad_norm": 0.44349164878713104, - "learning_rate": 1.9490354009296268e-05, - "loss": 0.3616, + "epoch": 0.1, + "grad_norm": 0.626401990070787, + "learning_rate": 1.9720756429933198e-05, + "loss": 0.3742, "step": 2246 }, { - "epoch": 0.13, - "grad_norm": 0.3274107448446957, - "learning_rate": 1.9489767342097824e-05, - "loss": 0.2196, + "epoch": 0.1, + "grad_norm": 0.39177326150665026, + "learning_rate": 1.9720407155700706e-05, + "loss": 0.327, "step": 2247 }, { - "epoch": 0.13, - "grad_norm": 0.8783237744052049, - "learning_rate": 1.948918034626915e-05, - "loss": 0.5678, + "epoch": 0.1, + "grad_norm": 1.2161482680855527, + "learning_rate": 1.972005766626777e-05, + "loss": 0.4649, "step": 2248 }, { - "epoch": 0.13, - "grad_norm": 0.43709607755935653, - "learning_rate": 1.948859302183057e-05, - "loss": 0.329, + "epoch": 0.1, + "grad_norm": 0.49884535459800067, + "learning_rate": 1.9719707961642122e-05, + "loss": 0.3418, "step": 2249 }, { - "epoch": 0.13, - "grad_norm": 0.4099301199962942, - "learning_rate": 1.9488005368802415e-05, - "loss": 0.2837, + "epoch": 0.1, + "grad_norm": 0.6630128481249439, + "learning_rate": 1.971935804183151e-05, + "loss": 0.4039, "step": 2250 }, { - "epoch": 0.13, - "grad_norm": 0.5420335066249002, - "learning_rate": 1.948741738720505e-05, - "loss": 0.4656, + "epoch": 0.1, + "grad_norm": 0.37111506961829666, + "learning_rate": 1.971900790684368e-05, + "loss": 0.2383, "step": 2251 }, { - "epoch": 0.13, - "grad_norm": 0.3731959010848403, - "learning_rate": 1.9486829077058823e-05, - "loss": 0.3333, + "epoch": 0.1, + "grad_norm": 0.4530526857397637, + "learning_rate": 1.971865755668638e-05, + "loss": 0.2644, "step": 2252 }, { - "epoch": 0.13, - "grad_norm": 0.2855765128398837, - "learning_rate": 1.9486240438384114e-05, - "loss": 0.2129, + "epoch": 0.1, + "grad_norm": 0.8473836291047714, + "learning_rate": 1.971830699136737e-05, + "loss": 0.4428, "step": 2253 }, { - "epoch": 0.13, - "grad_norm": 0.8666950180014855, - "learning_rate": 1.9485651471201306e-05, - "loss": 0.4876, + "epoch": 0.1, + "grad_norm": 0.9113872612642971, + "learning_rate": 1.971795621089441e-05, + "loss": 0.5937, "step": 2254 }, { - "epoch": 0.13, - "grad_norm": 0.46097536285247287, - "learning_rate": 1.94850621755308e-05, - "loss": 0.3323, + "epoch": 0.1, + "grad_norm": 0.42573822166032294, + "learning_rate": 1.9717605215275263e-05, + "loss": 0.3136, "step": 2255 }, { - "epoch": 0.13, - "grad_norm": 0.7361756550595048, - "learning_rate": 1.9484472551392993e-05, - "loss": 0.4123, + "epoch": 0.1, + "grad_norm": 0.5890520441848602, + "learning_rate": 1.9717254004517707e-05, + "loss": 0.4004, "step": 2256 }, { - "epoch": 0.13, - "grad_norm": 0.3658556105623814, - "learning_rate": 1.9483882598808315e-05, - "loss": 0.3458, + "epoch": 0.1, + "grad_norm": 0.38592348159489226, + "learning_rate": 1.9716902578629507e-05, + "loss": 0.2235, "step": 2257 }, { - "epoch": 0.13, - "grad_norm": 0.44969033758873034, - "learning_rate": 1.948329231779719e-05, - "loss": 0.3461, + "epoch": 0.1, + "grad_norm": 0.4591218882363621, + "learning_rate": 1.9716550937618456e-05, + "loss": 0.352, "step": 2258 }, { - "epoch": 0.13, - "grad_norm": 0.5560341222882007, - "learning_rate": 1.9482701708380056e-05, - "loss": 0.2802, + "epoch": 0.1, + "grad_norm": 0.3987410712178617, + "learning_rate": 1.971619908149233e-05, + "loss": 0.3331, "step": 2259 }, { - "epoch": 0.13, - "grad_norm": 0.4553291061872389, - "learning_rate": 1.948211077057737e-05, - "loss": 0.2485, + "epoch": 0.1, + "grad_norm": 1.0796229290513142, + "learning_rate": 1.9715847010258918e-05, + "loss": 0.6681, "step": 2260 }, { - "epoch": 0.13, - "grad_norm": 0.4033237221641209, - "learning_rate": 1.9481519504409596e-05, - "loss": 0.2866, + "epoch": 0.1, + "grad_norm": 0.3073222948611999, + "learning_rate": 1.9715494723926017e-05, + "loss": 0.116, "step": 2261 }, { - "epoch": 0.13, - "grad_norm": 0.5367463676843832, - "learning_rate": 1.948092790989721e-05, - "loss": 0.3968, + "epoch": 0.1, + "grad_norm": 0.5328137682362506, + "learning_rate": 1.971514222250143e-05, + "loss": 0.3421, "step": 2262 }, { - "epoch": 0.13, - "grad_norm": 0.5626236268127532, - "learning_rate": 1.94803359870607e-05, - "loss": 0.3514, + "epoch": 0.1, + "grad_norm": 0.43378195094021044, + "learning_rate": 1.9714789505992956e-05, + "loss": 0.3493, "step": 2263 }, { - "epoch": 0.13, - "grad_norm": 0.4338608687064876, - "learning_rate": 1.947974373592056e-05, - "loss": 0.3034, + "epoch": 0.1, + "grad_norm": 0.6188836183555025, + "learning_rate": 1.9714436574408408e-05, + "loss": 0.2951, "step": 2264 }, { - "epoch": 0.13, - "grad_norm": 0.4131007253237356, - "learning_rate": 1.9479151156497303e-05, - "loss": 0.3456, + "epoch": 0.1, + "grad_norm": 0.36961998241477345, + "learning_rate": 1.9714083427755594e-05, + "loss": 0.2968, "step": 2265 }, { - "epoch": 0.13, - "grad_norm": 0.7775496063551903, - "learning_rate": 1.9478558248811448e-05, - "loss": 0.3321, + "epoch": 0.1, + "grad_norm": 1.626537460344871, + "learning_rate": 1.9713730066042335e-05, + "loss": 0.7628, "step": 2266 }, { - "epoch": 0.13, - "grad_norm": 0.35258832797577216, - "learning_rate": 1.947796501288353e-05, - "loss": 0.2487, + "epoch": 0.1, + "grad_norm": 0.3217633037971974, + "learning_rate": 1.971337648927646e-05, + "loss": 0.201, "step": 2267 }, { - "epoch": 0.13, - "grad_norm": 0.6701083819158502, - "learning_rate": 1.947737144873409e-05, - "loss": 0.3394, + "epoch": 0.1, + "grad_norm": 0.5669891635865614, + "learning_rate": 1.9713022697465786e-05, + "loss": 0.3473, "step": 2268 }, { - "epoch": 0.13, - "grad_norm": 0.43898287408668396, - "learning_rate": 1.9476777556383685e-05, - "loss": 0.3219, + "epoch": 0.1, + "grad_norm": 0.950178402909665, + "learning_rate": 1.9712668690618154e-05, + "loss": 0.5388, "step": 2269 }, { - "epoch": 0.13, - "grad_norm": 0.3596482929485019, - "learning_rate": 1.947618333585288e-05, - "loss": 0.2842, + "epoch": 0.1, + "grad_norm": 0.4800787406092234, + "learning_rate": 1.9712314468741395e-05, + "loss": 0.2972, "step": 2270 }, { - "epoch": 0.13, - "grad_norm": 1.1019436057037184, - "learning_rate": 1.947558878716225e-05, - "loss": 0.7142, + "epoch": 0.1, + "grad_norm": 0.5235121405261489, + "learning_rate": 1.9711960031843358e-05, + "loss": 0.3611, "step": 2271 }, { - "epoch": 0.13, - "grad_norm": 0.5411126947410136, - "learning_rate": 1.9474993910332394e-05, - "loss": 0.3461, + "epoch": 0.1, + "grad_norm": 0.6653556774438358, + "learning_rate": 1.9711605379931885e-05, + "loss": 0.4358, "step": 2272 }, { - "epoch": 0.13, - "grad_norm": 0.40898556290402976, - "learning_rate": 1.9474398705383904e-05, - "loss": 0.2542, + "epoch": 0.1, + "grad_norm": 0.3263249248311736, + "learning_rate": 1.971125051301483e-05, + "loss": 0.1506, "step": 2273 }, { - "epoch": 0.13, - "grad_norm": 0.6471286583545248, - "learning_rate": 1.9473803172337396e-05, - "loss": 0.3206, + "epoch": 0.1, + "grad_norm": 0.49670218041613534, + "learning_rate": 1.971089543110005e-05, + "loss": 0.256, "step": 2274 }, { - "epoch": 0.13, - "grad_norm": 0.8779107686075143, - "learning_rate": 1.947320731121349e-05, - "loss": 0.5398, + "epoch": 0.1, + "grad_norm": 0.44875306108868424, + "learning_rate": 1.97105401341954e-05, + "loss": 0.3648, "step": 2275 }, { - "epoch": 0.13, - "grad_norm": 0.3532457572292047, - "learning_rate": 1.947261112203282e-05, - "loss": 0.2389, + "epoch": 0.1, + "grad_norm": 0.5842243289372618, + "learning_rate": 1.971018462230875e-05, + "loss": 0.4289, "step": 2276 }, { - "epoch": 0.13, - "grad_norm": 0.4493598183529648, - "learning_rate": 1.947201460481604e-05, - "loss": 0.3389, + "epoch": 0.1, + "grad_norm": 0.5064043570294681, + "learning_rate": 1.9709828895447977e-05, + "loss": 0.2941, "step": 2277 }, { - "epoch": 0.13, - "grad_norm": 1.2898301269699424, - "learning_rate": 1.9471417759583796e-05, - "loss": 0.7665, + "epoch": 0.1, + "grad_norm": 0.4430669483631709, + "learning_rate": 1.9709472953620946e-05, + "loss": 0.2962, "step": 2278 }, { - "epoch": 0.13, - "grad_norm": 0.465301976628202, - "learning_rate": 1.9470820586356763e-05, - "loss": 0.2486, + "epoch": 0.1, + "grad_norm": 0.4198493266243077, + "learning_rate": 1.9709116796835543e-05, + "loss": 0.2628, "step": 2279 }, { - "epoch": 0.13, - "grad_norm": 0.8359470240166336, - "learning_rate": 1.9470223085155622e-05, - "loss": 0.3916, + "epoch": 0.1, + "grad_norm": 0.4310531107621618, + "learning_rate": 1.9708760425099653e-05, + "loss": 0.2535, "step": 2280 }, { - "epoch": 0.13, - "grad_norm": 0.36417049272575086, - "learning_rate": 1.946962525600106e-05, - "loss": 0.2749, + "epoch": 0.1, + "grad_norm": 0.6593456595309842, + "learning_rate": 1.9708403838421164e-05, + "loss": 0.4895, "step": 2281 }, { - "epoch": 0.13, - "grad_norm": 0.39093612328850025, - "learning_rate": 1.9469027098913787e-05, - "loss": 0.2384, + "epoch": 0.1, + "grad_norm": 0.8936465390574527, + "learning_rate": 1.9708047036807973e-05, + "loss": 0.4559, "step": 2282 }, { - "epoch": 0.13, - "grad_norm": 1.4785923875213445, - "learning_rate": 1.946842861391451e-05, - "loss": 0.876, + "epoch": 0.1, + "grad_norm": 0.3979566798767169, + "learning_rate": 1.9707690020267974e-05, + "loss": 0.2706, "step": 2283 }, { - "epoch": 0.13, - "grad_norm": 0.49453376260267884, - "learning_rate": 1.9467829801023957e-05, - "loss": 0.3659, + "epoch": 0.1, + "grad_norm": 1.436365570492759, + "learning_rate": 1.9707332788809076e-05, + "loss": 0.8557, "step": 2284 }, { - "epoch": 0.13, - "grad_norm": 0.5000531898361844, - "learning_rate": 1.9467230660262864e-05, - "loss": 0.3059, + "epoch": 0.1, + "grad_norm": 0.37399829132552814, + "learning_rate": 1.9706975342439186e-05, + "loss": 0.1981, "step": 2285 }, { - "epoch": 0.13, - "grad_norm": 0.5643620845088193, - "learning_rate": 1.9466631191651984e-05, - "loss": 0.305, + "epoch": 0.11, + "grad_norm": 0.4437140486410228, + "learning_rate": 1.970661768116622e-05, + "loss": 0.3429, "step": 2286 }, { - "epoch": 0.13, - "grad_norm": 0.4382366231139346, - "learning_rate": 1.9466031395212073e-05, - "loss": 0.3067, + "epoch": 0.11, + "grad_norm": 0.45692170794703907, + "learning_rate": 1.9706259804998093e-05, + "loss": 0.3335, "step": 2287 }, { - "epoch": 0.13, - "grad_norm": 0.40747576339356095, - "learning_rate": 1.9465431270963898e-05, - "loss": 0.2737, + "epoch": 0.11, + "grad_norm": 0.8920294403839797, + "learning_rate": 1.9705901713942723e-05, + "loss": 0.4798, "step": 2288 }, { - "epoch": 0.13, - "grad_norm": 0.4389132877484424, - "learning_rate": 1.9464830818928247e-05, - "loss": 0.2912, + "epoch": 0.11, + "grad_norm": 0.4551100221045932, + "learning_rate": 1.970554340800805e-05, + "loss": 0.2958, "step": 2289 }, { - "epoch": 0.13, - "grad_norm": 0.8213346014038311, - "learning_rate": 1.946423003912591e-05, - "loss": 0.648, + "epoch": 0.11, + "grad_norm": 0.5016047979117828, + "learning_rate": 1.9705184887202e-05, + "loss": 0.354, "step": 2290 }, { - "epoch": 0.13, - "grad_norm": 0.3995884995275768, - "learning_rate": 1.94636289315777e-05, - "loss": 0.3271, + "epoch": 0.11, + "grad_norm": 0.3913487023187884, + "learning_rate": 1.9704826151532506e-05, + "loss": 0.2756, "step": 2291 }, { - "epoch": 0.13, - "grad_norm": 0.8771463518079066, - "learning_rate": 1.946302749630442e-05, - "loss": 0.4139, + "epoch": 0.11, + "grad_norm": 0.4048141065071724, + "learning_rate": 1.970446720100752e-05, + "loss": 0.2602, "step": 2292 }, { - "epoch": 0.13, - "grad_norm": 0.3162617732642643, - "learning_rate": 1.9462425733326906e-05, - "loss": 0.2405, + "epoch": 0.11, + "grad_norm": 0.5973975246368928, + "learning_rate": 1.970410803563498e-05, + "loss": 0.4444, "step": 2293 }, { - "epoch": 0.13, - "grad_norm": 0.40209065551847967, - "learning_rate": 1.9461823642666e-05, - "loss": 0.2874, + "epoch": 0.11, + "grad_norm": 0.44958090868219014, + "learning_rate": 1.9703748655422843e-05, + "loss": 0.3168, "step": 2294 }, { - "epoch": 0.13, - "grad_norm": 0.9720138998527125, - "learning_rate": 1.9461221224342544e-05, - "loss": 0.5947, + "epoch": 0.11, + "grad_norm": 0.4434498982362864, + "learning_rate": 1.970338906037906e-05, + "loss": 0.299, "step": 2295 }, { - "epoch": 0.13, - "grad_norm": 0.46874994093569994, - "learning_rate": 1.9460618478377406e-05, - "loss": 0.342, + "epoch": 0.11, + "grad_norm": 0.9322793622952742, + "learning_rate": 1.97030292505116e-05, + "loss": 0.5895, "step": 2296 }, { - "epoch": 0.13, - "grad_norm": 0.4541039308298201, - "learning_rate": 1.9460015404791456e-05, - "loss": 0.3137, + "epoch": 0.11, + "grad_norm": 0.5157730057179291, + "learning_rate": 1.9702669225828422e-05, + "loss": 0.3392, "step": 2297 }, { - "epoch": 0.13, - "grad_norm": 0.9474289796905376, - "learning_rate": 1.9459412003605577e-05, - "loss": 0.595, + "epoch": 0.11, + "grad_norm": 0.39372034878125683, + "learning_rate": 1.9702308986337498e-05, + "loss": 0.3096, "step": 2298 }, { - "epoch": 0.13, - "grad_norm": 0.3041258639698369, - "learning_rate": 1.945880827484067e-05, - "loss": 0.1296, + "epoch": 0.11, + "grad_norm": 0.34520994581152353, + "learning_rate": 1.9701948532046805e-05, + "loss": 0.2943, "step": 2299 }, { - "epoch": 0.13, - "grad_norm": 0.44826921009046905, - "learning_rate": 1.9458204218517638e-05, - "loss": 0.2813, + "epoch": 0.11, + "grad_norm": 0.6190294890137501, + "learning_rate": 1.9701587862964325e-05, + "loss": 0.1776, "step": 2300 }, { - "epoch": 0.13, - "grad_norm": 0.4353294044673283, - "learning_rate": 1.94575998346574e-05, - "loss": 0.3258, + "epoch": 0.11, + "grad_norm": 0.46899139204107076, + "learning_rate": 1.9701226979098037e-05, + "loss": 0.3436, "step": 2301 }, { - "epoch": 0.13, - "grad_norm": 0.9102026658374495, - "learning_rate": 1.945699512328089e-05, - "loss": 0.4023, + "epoch": 0.11, + "grad_norm": 0.5673506229928984, + "learning_rate": 1.9700865880455936e-05, + "loss": 0.4129, "step": 2302 }, { - "epoch": 0.13, - "grad_norm": 0.4436564510925583, - "learning_rate": 1.9456390084409044e-05, - "loss": 0.3012, + "epoch": 0.11, + "grad_norm": 0.46478924737985755, + "learning_rate": 1.9700504567046013e-05, + "loss": 0.3162, "step": 2303 }, { - "epoch": 0.13, - "grad_norm": 0.8056583922169369, - "learning_rate": 1.9455784718062813e-05, - "loss": 0.5757, - "step": 2304 + "epoch": 0.11, + "grad_norm": 0.39984235194352996, + "learning_rate": 1.9700143038876267e-05, + "loss": 0.3537, + "step": 2304 }, { - "epoch": 0.13, - "grad_norm": 0.2843505676702236, - "learning_rate": 1.9455179024263166e-05, - "loss": 0.1952, + "epoch": 0.11, + "grad_norm": 0.38542220679275685, + "learning_rate": 1.9699781295954706e-05, + "loss": 0.2166, "step": 2305 }, { - "epoch": 0.13, - "grad_norm": 0.42052209498560444, - "learning_rate": 1.9454573003031078e-05, - "loss": 0.2669, + "epoch": 0.11, + "grad_norm": 0.4182811546124366, + "learning_rate": 1.9699419338289335e-05, + "loss": 0.2833, "step": 2306 }, { - "epoch": 0.13, - "grad_norm": 0.9245941501277933, - "learning_rate": 1.945396665438753e-05, - "loss": 0.6041, + "epoch": 0.11, + "grad_norm": 0.46318718450068797, + "learning_rate": 1.9699057165888165e-05, + "loss": 0.326, "step": 2307 }, { - "epoch": 0.13, - "grad_norm": 0.5978445007136682, - "learning_rate": 1.9453359978353524e-05, - "loss": 0.4349, + "epoch": 0.11, + "grad_norm": 1.4660170163116364, + "learning_rate": 1.969869477875922e-05, + "loss": 0.8417, "step": 2308 }, { - "epoch": 0.13, - "grad_norm": 0.32742532663412943, - "learning_rate": 1.945275297495007e-05, - "loss": 0.2599, + "epoch": 0.11, + "grad_norm": 0.7266525695581226, + "learning_rate": 1.9698332176910524e-05, + "loss": 0.5034, "step": 2309 }, { - "epoch": 0.13, - "grad_norm": 0.9431148146992495, - "learning_rate": 1.9452145644198185e-05, - "loss": 0.6554, + "epoch": 0.11, + "grad_norm": 0.4125397360182415, + "learning_rate": 1.9697969360350098e-05, + "loss": 0.2242, "step": 2310 }, { - "epoch": 0.13, - "grad_norm": 0.3051067841482634, - "learning_rate": 1.9451537986118904e-05, - "loss": 0.1965, + "epoch": 0.11, + "grad_norm": 0.341030502798929, + "learning_rate": 1.9697606329085977e-05, + "loss": 0.247, "step": 2311 }, { - "epoch": 0.13, - "grad_norm": 0.4158374417954412, - "learning_rate": 1.945093000073327e-05, - "loss": 0.2121, + "epoch": 0.11, + "grad_norm": 0.9922297064690038, + "learning_rate": 1.9697243083126197e-05, + "loss": 0.4661, "step": 2312 }, { - "epoch": 0.13, - "grad_norm": 0.4787539021841934, - "learning_rate": 1.9450321688062336e-05, - "loss": 0.3388, + "epoch": 0.11, + "grad_norm": 0.40636916342167856, + "learning_rate": 1.96968796224788e-05, + "loss": 0.2399, "step": 2313 }, { - "epoch": 0.13, - "grad_norm": 1.1867093489703773, - "learning_rate": 1.944971304812717e-05, - "loss": 0.5359, + "epoch": 0.11, + "grad_norm": 0.47112356019254353, + "learning_rate": 1.969651594715184e-05, + "loss": 0.372, "step": 2314 }, { - "epoch": 0.13, - "grad_norm": 0.31635180255683376, - "learning_rate": 1.9449104080948842e-05, - "loss": 0.2358, + "epoch": 0.11, + "grad_norm": 0.9050273930829859, + "learning_rate": 1.969615205715336e-05, + "loss": 0.5187, "step": 2315 }, { - "epoch": 0.13, - "grad_norm": 1.9261529169569618, - "learning_rate": 1.9448494786548448e-05, - "loss": 0.8326, + "epoch": 0.11, + "grad_norm": 0.39464002527936254, + "learning_rate": 1.9695787952491415e-05, + "loss": 0.2685, "step": 2316 }, { - "epoch": 0.13, - "grad_norm": 0.34949286273506625, - "learning_rate": 1.944788516494709e-05, - "loss": 0.2497, + "epoch": 0.11, + "grad_norm": 0.4289571238393471, + "learning_rate": 1.9695423633174076e-05, + "loss": 0.2043, "step": 2317 }, { - "epoch": 0.13, - "grad_norm": 0.4379583841036261, - "learning_rate": 1.944727521616587e-05, - "loss": 0.2183, + "epoch": 0.11, + "grad_norm": 0.524019893612768, + "learning_rate": 1.96950590992094e-05, + "loss": 0.3444, "step": 2318 }, { - "epoch": 0.13, - "grad_norm": 0.8861251508067872, - "learning_rate": 1.9446664940225917e-05, - "loss": 0.4202, + "epoch": 0.11, + "grad_norm": 0.4516141622200916, + "learning_rate": 1.9694694350605456e-05, + "loss": 0.2414, "step": 2319 }, { - "epoch": 0.13, - "grad_norm": 0.5479306060921134, - "learning_rate": 1.9446054337148364e-05, - "loss": 0.3957, + "epoch": 0.11, + "grad_norm": 0.9882801512526483, + "learning_rate": 1.9694329387370327e-05, + "loss": 0.5164, "step": 2320 }, { - "epoch": 0.13, - "grad_norm": 0.8068260676833023, - "learning_rate": 1.9445443406954357e-05, - "loss": 0.4384, + "epoch": 0.11, + "grad_norm": 0.8613955224711916, + "learning_rate": 1.9693964209512088e-05, + "loss": 0.5408, "step": 2321 }, { - "epoch": 0.13, - "grad_norm": 0.4521357323513071, - "learning_rate": 1.9444832149665048e-05, - "loss": 0.285, + "epoch": 0.11, + "grad_norm": 0.49972580825040785, + "learning_rate": 1.9693598817038825e-05, + "loss": 0.2689, "step": 2322 }, { - "epoch": 0.13, - "grad_norm": 0.3373624792377293, - "learning_rate": 1.944422056530161e-05, - "loss": 0.1889, + "epoch": 0.11, + "grad_norm": 0.5241971213303899, + "learning_rate": 1.9693233209958627e-05, + "loss": 0.3003, "step": 2323 }, { - "epoch": 0.13, - "grad_norm": 0.5131559776901216, - "learning_rate": 1.944360865388522e-05, - "loss": 0.3261, + "epoch": 0.11, + "grad_norm": 0.4827230185558488, + "learning_rate": 1.9692867388279587e-05, + "loss": 0.2544, "step": 2324 }, { - "epoch": 0.13, - "grad_norm": 0.4789087105248987, - "learning_rate": 1.9442996415437066e-05, - "loss": 0.3175, + "epoch": 0.11, + "grad_norm": 0.4946438710570667, + "learning_rate": 1.9692501352009804e-05, + "loss": 0.3156, "step": 2325 }, { - "epoch": 0.13, - "grad_norm": 1.0279162829610848, - "learning_rate": 1.9442383849978354e-05, - "loss": 0.6445, + "epoch": 0.11, + "grad_norm": 1.0013816215685254, + "learning_rate": 1.9692135101157387e-05, + "loss": 0.3808, "step": 2326 }, { - "epoch": 0.13, - "grad_norm": 0.5037670907171229, - "learning_rate": 1.9441770957530295e-05, - "loss": 0.3263, + "epoch": 0.11, + "grad_norm": 0.7641323296084365, + "learning_rate": 1.969176863573044e-05, + "loss": 0.4481, "step": 2327 }, { - "epoch": 0.13, - "grad_norm": 0.4267882635388313, - "learning_rate": 1.9441157738114114e-05, - "loss": 0.2697, + "epoch": 0.11, + "grad_norm": 0.4855079265978535, + "learning_rate": 1.969140195573707e-05, + "loss": 0.3072, "step": 2328 }, { - "epoch": 0.13, - "grad_norm": 0.3900503798338192, - "learning_rate": 1.9440544191751046e-05, - "loss": 0.2927, + "epoch": 0.11, + "grad_norm": 0.27373720446990485, + "learning_rate": 1.969103506118541e-05, + "loss": 0.1506, "step": 2329 }, { - "epoch": 0.13, - "grad_norm": 0.3245214667402623, - "learning_rate": 1.943993031846234e-05, - "loss": 0.2375, + "epoch": 0.11, + "grad_norm": 0.5283255658404069, + "learning_rate": 1.969066795208357e-05, + "loss": 0.3595, "step": 2330 }, { - "epoch": 0.13, - "grad_norm": 0.7409088978080296, - "learning_rate": 1.9439316118269248e-05, - "loss": 0.3682, + "epoch": 0.11, + "grad_norm": 0.5246319920094062, + "learning_rate": 1.969030062843968e-05, + "loss": 0.3168, "step": 2331 }, { - "epoch": 0.13, - "grad_norm": 0.5008460271734648, - "learning_rate": 1.943870159119305e-05, - "loss": 0.3801, + "epoch": 0.11, + "grad_norm": 0.8437737828629368, + "learning_rate": 1.9689933090261873e-05, + "loss": 0.4091, "step": 2332 }, { - "epoch": 0.13, - "grad_norm": 0.41324478033839196, - "learning_rate": 1.9438086737255022e-05, - "loss": 0.2974, + "epoch": 0.11, + "grad_norm": 1.3223682756115784, + "learning_rate": 1.968956533755829e-05, + "loss": 0.7679, "step": 2333 }, { - "epoch": 0.13, - "grad_norm": 1.0615661938603904, - "learning_rate": 1.9437471556476454e-05, - "loss": 0.6605, + "epoch": 0.11, + "grad_norm": 0.492603283821184, + "learning_rate": 1.9689197370337068e-05, + "loss": 0.2861, "step": 2334 }, { - "epoch": 0.13, - "grad_norm": 0.7578715878324432, - "learning_rate": 1.9436856048878653e-05, - "loss": 0.4057, + "epoch": 0.11, + "grad_norm": 0.5087001736309223, + "learning_rate": 1.9688829188606356e-05, + "loss": 0.3953, "step": 2335 }, { - "epoch": 0.13, - "grad_norm": 0.4578545746760707, - "learning_rate": 1.943624021448293e-05, - "loss": 0.2886, + "epoch": 0.11, + "grad_norm": 0.32484926435058337, + "learning_rate": 1.96884607923743e-05, + "loss": 0.1071, "step": 2336 }, { - "epoch": 0.13, - "grad_norm": 0.46684580870173853, - "learning_rate": 1.9435624053310617e-05, - "loss": 0.335, + "epoch": 0.11, + "grad_norm": 0.46769170515929204, + "learning_rate": 1.9688092181649065e-05, + "loss": 0.312, "step": 2337 }, { - "epoch": 0.13, - "grad_norm": 0.2697068340642194, - "learning_rate": 1.943500756538305e-05, - "loss": 0.1427, + "epoch": 0.11, + "grad_norm": 0.5712527710560433, + "learning_rate": 1.9687723356438804e-05, + "loss": 0.4243, "step": 2338 }, { - "epoch": 0.13, - "grad_norm": 0.620415357649466, - "learning_rate": 1.943439075072157e-05, - "loss": 0.2752, + "epoch": 0.11, + "grad_norm": 0.5608539838733599, + "learning_rate": 1.9687354316751685e-05, + "loss": 0.3181, "step": 2339 }, { - "epoch": 0.13, - "grad_norm": 0.4502906967970935, - "learning_rate": 1.9433773609347553e-05, - "loss": 0.3357, + "epoch": 0.11, + "grad_norm": 0.4449033548842678, + "learning_rate": 1.968698506259588e-05, + "loss": 0.2744, "step": 2340 }, { - "epoch": 0.13, - "grad_norm": 0.5019698091806195, - "learning_rate": 1.9433156141282356e-05, - "loss": 0.3581, + "epoch": 0.11, + "grad_norm": 1.4178183084397107, + "learning_rate": 1.968661559397956e-05, + "loss": 0.664, "step": 2341 }, { - "epoch": 0.13, - "grad_norm": 0.6175844702796243, - "learning_rate": 1.943253834654737e-05, - "loss": 0.4041, + "epoch": 0.11, + "grad_norm": 0.3429835611742759, + "learning_rate": 1.9686245910910908e-05, + "loss": 0.2051, "step": 2342 }, { - "epoch": 0.13, - "grad_norm": 0.365708234558843, - "learning_rate": 1.9431920225163984e-05, - "loss": 0.2691, + "epoch": 0.11, + "grad_norm": 0.42099036261680933, + "learning_rate": 1.9685876013398108e-05, + "loss": 0.3086, "step": 2343 }, { - "epoch": 0.13, - "grad_norm": 0.45935395249114086, - "learning_rate": 1.9431301777153607e-05, - "loss": 0.2848, + "epoch": 0.11, + "grad_norm": 0.8195551267189042, + "learning_rate": 1.9685505901449346e-05, + "loss": 0.5072, "step": 2344 }, { - "epoch": 0.13, - "grad_norm": 0.44585991482447607, - "learning_rate": 1.9430683002537657e-05, - "loss": 0.327, + "epoch": 0.11, + "grad_norm": 0.5813314876408059, + "learning_rate": 1.968513557507282e-05, + "loss": 0.324, "step": 2345 }, { - "epoch": 0.13, - "grad_norm": 0.3728807303254334, - "learning_rate": 1.9430063901337562e-05, - "loss": 0.3105, + "epoch": 0.11, + "grad_norm": 0.5221771940454054, + "learning_rate": 1.9684765034276726e-05, + "loss": 0.2825, "step": 2346 }, { - "epoch": 0.13, - "grad_norm": 0.7637717215205987, - "learning_rate": 1.9429444473574753e-05, - "loss": 0.5145, + "epoch": 0.11, + "grad_norm": 0.5376458699304676, + "learning_rate": 1.968439427906927e-05, + "loss": 0.3704, "step": 2347 }, { - "epoch": 0.13, - "grad_norm": 0.4234374308721751, - "learning_rate": 1.942882471927069e-05, - "loss": 0.2724, + "epoch": 0.11, + "grad_norm": 0.5146347641997501, + "learning_rate": 1.968402330945866e-05, + "loss": 0.3641, "step": 2348 }, { - "epoch": 0.13, - "grad_norm": 0.569665903816151, - "learning_rate": 1.9428204638446834e-05, - "loss": 0.412, + "epoch": 0.11, + "grad_norm": 0.4719791361874272, + "learning_rate": 1.9683652125453102e-05, + "loss": 0.2579, "step": 2349 }, { - "epoch": 0.14, - "grad_norm": 0.4707793907074113, - "learning_rate": 1.9427584231124656e-05, - "loss": 0.3092, + "epoch": 0.11, + "grad_norm": 0.4665111779833458, + "learning_rate": 1.9683280727060824e-05, + "loss": 0.353, "step": 2350 }, { - "epoch": 0.14, - "grad_norm": 0.2978071148204672, - "learning_rate": 1.942696349732564e-05, - "loss": 0.1615, + "epoch": 0.11, + "grad_norm": 0.4655544231639097, + "learning_rate": 1.968290911429004e-05, + "loss": 0.2705, "step": 2351 }, { - "epoch": 0.14, - "grad_norm": 0.4934235765164325, - "learning_rate": 1.9426342437071287e-05, - "loss": 0.3496, + "epoch": 0.11, + "grad_norm": 0.4449371778839725, + "learning_rate": 1.9682537287148985e-05, + "loss": 0.192, "step": 2352 }, { - "epoch": 0.14, - "grad_norm": 0.5296265448238974, - "learning_rate": 1.94257210503831e-05, - "loss": 0.4341, + "epoch": 0.11, + "grad_norm": 0.6383687052984196, + "learning_rate": 1.9682165245645884e-05, + "loss": 0.3859, "step": 2353 }, { - "epoch": 0.14, - "grad_norm": 0.4039824220929225, - "learning_rate": 1.9425099337282596e-05, - "loss": 0.2677, + "epoch": 0.11, + "grad_norm": 0.4805221497471112, + "learning_rate": 1.9681792989788973e-05, + "loss": 0.3926, "step": 2354 }, { - "epoch": 0.14, - "grad_norm": 0.46538885633625465, - "learning_rate": 1.942447729779131e-05, - "loss": 0.3583, + "epoch": 0.11, + "grad_norm": 0.36974950787201116, + "learning_rate": 1.9681420519586502e-05, + "loss": 0.2764, "step": 2355 }, { - "epoch": 0.14, - "grad_norm": 0.5022552896111364, - "learning_rate": 1.9423854931930778e-05, - "loss": 0.3459, + "epoch": 0.11, + "grad_norm": 0.8837184645065015, + "learning_rate": 1.9681047835046708e-05, + "loss": 0.5753, "step": 2356 }, { - "epoch": 0.14, - "grad_norm": 0.2656556336693924, - "learning_rate": 1.9423232239722557e-05, - "loss": 0.1004, + "epoch": 0.11, + "grad_norm": 0.3731665676523108, + "learning_rate": 1.968067493617785e-05, + "loss": 0.2162, "step": 2357 }, { - "epoch": 0.14, - "grad_norm": 0.3751636957128666, - "learning_rate": 1.9422609221188208e-05, - "loss": 0.2678, + "epoch": 0.11, + "grad_norm": 0.39261007704737544, + "learning_rate": 1.9680301822988177e-05, + "loss": 0.2562, "step": 2358 }, { - "epoch": 0.14, - "grad_norm": 0.7635549619993852, - "learning_rate": 1.9421985876349307e-05, - "loss": 0.5, + "epoch": 0.11, + "grad_norm": 1.1331640205725197, + "learning_rate": 1.967992849548595e-05, + "loss": 0.6137, "step": 2359 }, { - "epoch": 0.14, - "grad_norm": 0.4246343907147413, - "learning_rate": 1.942136220522744e-05, - "loss": 0.3152, + "epoch": 0.11, + "grad_norm": 0.6394315504444733, + "learning_rate": 1.967955495367944e-05, + "loss": 0.5028, "step": 2360 }, { - "epoch": 0.14, - "grad_norm": 0.40637458685864913, - "learning_rate": 1.9420738207844202e-05, - "loss": 0.2499, + "epoch": 0.11, + "grad_norm": 0.49067529604623294, + "learning_rate": 1.9679181197576907e-05, + "loss": 0.3592, "step": 2361 }, { - "epoch": 0.14, - "grad_norm": 1.2699260617905255, - "learning_rate": 1.9420113884221207e-05, - "loss": 0.7759, + "epoch": 0.11, + "grad_norm": 0.42068399055756417, + "learning_rate": 1.9678807227186635e-05, + "loss": 0.2836, "step": 2362 }, { - "epoch": 0.14, - "grad_norm": 0.2895707578621004, - "learning_rate": 1.9419489234380077e-05, - "loss": 0.1907, + "epoch": 0.11, + "grad_norm": 0.42734307212767414, + "learning_rate": 1.96784330425169e-05, + "loss": 0.2476, "step": 2363 }, { - "epoch": 0.14, - "grad_norm": 0.42114540707427844, - "learning_rate": 1.9418864258342433e-05, - "loss": 0.3008, + "epoch": 0.11, + "grad_norm": 0.7765022388761557, + "learning_rate": 1.9678058643575985e-05, + "loss": 0.364, "step": 2364 }, { - "epoch": 0.14, - "grad_norm": 0.823988721160775, - "learning_rate": 1.941823895612993e-05, - "loss": 0.5296, + "epoch": 0.11, + "grad_norm": 0.4421099752995076, + "learning_rate": 1.9677684030372178e-05, + "loss": 0.3123, "step": 2365 }, { - "epoch": 0.14, - "grad_norm": 0.5848088756752329, - "learning_rate": 1.9417613327764214e-05, - "loss": 0.3968, + "epoch": 0.11, + "grad_norm": 0.4848143681057836, + "learning_rate": 1.9677309202913773e-05, + "loss": 0.4045, "step": 2366 }, { - "epoch": 0.14, - "grad_norm": 0.4707688843312788, - "learning_rate": 1.9416987373266957e-05, - "loss": 0.2714, + "epoch": 0.11, + "grad_norm": 0.750116212197387, + "learning_rate": 1.9676934161209073e-05, + "loss": 0.3247, "step": 2367 }, { - "epoch": 0.14, - "grad_norm": 0.4300757765992352, - "learning_rate": 1.941636109265983e-05, - "loss": 0.3517, + "epoch": 0.11, + "grad_norm": 0.5062295245693124, + "learning_rate": 1.9676558905266377e-05, + "loss": 0.3082, "step": 2368 }, { - "epoch": 0.14, - "grad_norm": 0.3547634392299474, - "learning_rate": 1.941573448596452e-05, - "loss": 0.2276, + "epoch": 0.11, + "grad_norm": 0.33450938278833114, + "learning_rate": 1.967618343509399e-05, + "loss": 0.213, "step": 2369 }, { - "epoch": 0.14, - "grad_norm": 0.4239024769417483, - "learning_rate": 1.9415107553202736e-05, - "loss": 0.3535, + "epoch": 0.11, + "grad_norm": 0.47346910146204113, + "learning_rate": 1.967580775070023e-05, + "loss": 0.3283, "step": 2370 }, { - "epoch": 0.14, - "grad_norm": 0.5859513694469434, - "learning_rate": 1.9414480294396178e-05, - "loss": 0.3019, + "epoch": 0.11, + "grad_norm": 0.4488088485374118, + "learning_rate": 1.967543185209341e-05, + "loss": 0.3223, "step": 2371 }, { - "epoch": 0.14, - "grad_norm": 0.41543686750719744, - "learning_rate": 1.9413852709566574e-05, - "loss": 0.3323, + "epoch": 0.11, + "grad_norm": 0.7034320705884325, + "learning_rate": 1.9675055739281857e-05, + "loss": 0.4846, "step": 2372 }, { - "epoch": 0.14, - "grad_norm": 0.4297104608283397, - "learning_rate": 1.9413224798735655e-05, - "loss": 0.3159, + "epoch": 0.11, + "grad_norm": 0.6995491227820101, + "learning_rate": 1.9674679412273892e-05, + "loss": 0.4032, "step": 2373 }, { - "epoch": 0.14, - "grad_norm": 0.9676280782400583, - "learning_rate": 1.9412596561925164e-05, - "loss": 0.5264, + "epoch": 0.11, + "grad_norm": 0.4161193266503635, + "learning_rate": 1.967430287107785e-05, + "loss": 0.294, "step": 2374 }, { - "epoch": 0.14, - "grad_norm": 0.7162830264380677, - "learning_rate": 1.9411967999156866e-05, - "loss": 0.5078, + "epoch": 0.11, + "grad_norm": 0.2910120292770254, + "learning_rate": 1.9673926115702067e-05, + "loss": 0.1261, "step": 2375 }, { - "epoch": 0.14, - "grad_norm": 0.3800369126423308, - "learning_rate": 1.9411339110452512e-05, - "loss": 0.3029, + "epoch": 0.11, + "grad_norm": 0.5575687745251163, + "learning_rate": 1.9673549146154886e-05, + "loss": 0.3105, "step": 2376 }, { - "epoch": 0.14, - "grad_norm": 0.3623660175814287, - "learning_rate": 1.9410709895833895e-05, - "loss": 0.2252, + "epoch": 0.11, + "grad_norm": 0.5889349088699504, + "learning_rate": 1.967317196244465e-05, + "loss": 0.4214, "step": 2377 }, { - "epoch": 0.14, - "grad_norm": 0.7698720451832731, - "learning_rate": 1.9410080355322797e-05, - "loss": 0.4136, + "epoch": 0.11, + "grad_norm": 0.44187637622413756, + "learning_rate": 1.9672794564579707e-05, + "loss": 0.3589, "step": 2378 }, { - "epoch": 0.14, - "grad_norm": 0.5038883234863918, - "learning_rate": 1.9409450488941018e-05, - "loss": 0.3308, + "epoch": 0.11, + "grad_norm": 0.48682463745918714, + "learning_rate": 1.9672416952568416e-05, + "loss": 0.3324, "step": 2379 }, { - "epoch": 0.14, - "grad_norm": 0.523722918460601, - "learning_rate": 1.9408820296710377e-05, - "loss": 0.3026, + "epoch": 0.11, + "grad_norm": 0.7262201717926974, + "learning_rate": 1.967203912641914e-05, + "loss": 0.488, "step": 2380 }, { - "epoch": 0.14, - "grad_norm": 0.6180731282065387, - "learning_rate": 1.9408189778652694e-05, - "loss": 0.473, + "epoch": 0.11, + "grad_norm": 0.26635591346581416, + "learning_rate": 1.9671661086140235e-05, + "loss": 0.1659, "step": 2381 }, { - "epoch": 0.14, - "grad_norm": 0.4264018524998352, - "learning_rate": 1.94075589347898e-05, - "loss": 0.3437, + "epoch": 0.11, + "grad_norm": 0.5081223906297634, + "learning_rate": 1.9671282831740076e-05, + "loss": 0.3423, "step": 2382 }, { - "epoch": 0.14, - "grad_norm": 0.34952776740085484, - "learning_rate": 1.940692776514355e-05, - "loss": 0.1839, + "epoch": 0.11, + "grad_norm": 0.42080558228184156, + "learning_rate": 1.9670904363227036e-05, + "loss": 0.342, "step": 2383 }, { - "epoch": 0.14, - "grad_norm": 0.3725548545644662, - "learning_rate": 1.9406296269735792e-05, - "loss": 0.2575, + "epoch": 0.11, + "grad_norm": 0.8568149216352813, + "learning_rate": 1.96705256806095e-05, + "loss": 0.4452, "step": 2384 }, { - "epoch": 0.14, - "grad_norm": 0.5378553612475551, - "learning_rate": 1.94056644485884e-05, - "loss": 0.3269, + "epoch": 0.11, + "grad_norm": 0.6437641145671199, + "learning_rate": 1.967014678389584e-05, + "loss": 0.3009, "step": 2385 }, { - "epoch": 0.14, - "grad_norm": 1.312515301867269, - "learning_rate": 1.940503230172325e-05, - "loss": 0.8482, + "epoch": 0.11, + "grad_norm": 0.44522865414107443, + "learning_rate": 1.966976767309445e-05, + "loss": 0.3158, "step": 2386 }, { - "epoch": 0.14, - "grad_norm": 0.6711137585190222, - "learning_rate": 1.940439982916224e-05, - "loss": 0.3149, + "epoch": 0.11, + "grad_norm": 0.5461252924749982, + "learning_rate": 1.9669388348213726e-05, + "loss": 0.3187, "step": 2387 }, { - "epoch": 0.14, - "grad_norm": 0.39804796254758124, - "learning_rate": 1.9403767030927265e-05, - "loss": 0.3114, + "epoch": 0.11, + "grad_norm": 0.8720392968612125, + "learning_rate": 1.9669008809262064e-05, + "loss": 0.2945, "step": 2388 }, { - "epoch": 0.14, - "grad_norm": 0.49038512112971977, - "learning_rate": 1.9403133907040245e-05, - "loss": 0.3422, + "epoch": 0.11, + "grad_norm": 0.4284640951176341, + "learning_rate": 1.9668629056247863e-05, + "loss": 0.3095, "step": 2389 }, { - "epoch": 0.14, - "grad_norm": 0.2498696865928933, - "learning_rate": 1.94025004575231e-05, - "loss": 0.1121, + "epoch": 0.11, + "grad_norm": 0.5064080059835947, + "learning_rate": 1.9668249089179535e-05, + "loss": 0.3599, "step": 2390 }, { - "epoch": 0.14, - "grad_norm": 0.4388817507166522, - "learning_rate": 1.940186668239777e-05, - "loss": 0.3111, + "epoch": 0.11, + "grad_norm": 0.4588697907573374, + "learning_rate": 1.966786890806549e-05, + "loss": 0.2216, "step": 2391 }, { - "epoch": 0.14, - "grad_norm": 0.5706651361437006, - "learning_rate": 1.94012325816862e-05, - "loss": 0.3399, + "epoch": 0.11, + "grad_norm": 0.5445103855022165, + "learning_rate": 1.966748851291415e-05, + "loss": 0.383, "step": 2392 }, { - "epoch": 0.14, - "grad_norm": 1.1738840800470578, - "learning_rate": 1.9400598155410352e-05, - "loss": 0.4457, + "epoch": 0.11, + "grad_norm": 0.4870176632134462, + "learning_rate": 1.9667107903733925e-05, + "loss": 0.2889, "step": 2393 }, { - "epoch": 0.14, - "grad_norm": 0.4022071782421249, - "learning_rate": 1.939996340359219e-05, - "loss": 0.2773, + "epoch": 0.11, + "grad_norm": 0.5962244225037607, + "learning_rate": 1.9666727080533253e-05, + "loss": 0.2977, "step": 2394 }, { - "epoch": 0.14, - "grad_norm": 0.3049991474196785, - "learning_rate": 1.9399328326253702e-05, - "loss": 0.2383, + "epoch": 0.11, + "grad_norm": 0.560557254061879, + "learning_rate": 1.966634604332056e-05, + "loss": 0.3931, "step": 2395 }, { - "epoch": 0.14, - "grad_norm": 0.5410903011939309, - "learning_rate": 1.9398692923416877e-05, - "loss": 0.3912, + "epoch": 0.11, + "grad_norm": 0.5072354116095269, + "learning_rate": 1.966596479210428e-05, + "loss": 0.3467, "step": 2396 }, { - "epoch": 0.14, - "grad_norm": 0.38073227562966827, - "learning_rate": 1.9398057195103725e-05, - "loss": 0.2435, + "epoch": 0.11, + "grad_norm": 0.3826299882424985, + "learning_rate": 1.9665583326892858e-05, + "loss": 0.2462, "step": 2397 }, { - "epoch": 0.14, - "grad_norm": 1.3803216316707985, - "learning_rate": 1.9397421141336252e-05, - "loss": 0.6396, + "epoch": 0.11, + "grad_norm": 0.5294948670019645, + "learning_rate": 1.9665201647694733e-05, + "loss": 0.3579, "step": 2398 }, { - "epoch": 0.14, - "grad_norm": 0.5643971893815023, - "learning_rate": 1.9396784762136488e-05, - "loss": 0.3818, + "epoch": 0.11, + "grad_norm": 0.9156658333563149, + "learning_rate": 1.9664819754518363e-05, + "loss": 0.5674, "step": 2399 }, { - "epoch": 0.14, - "grad_norm": 0.31425978785959, - "learning_rate": 1.9396148057526473e-05, - "loss": 0.2359, + "epoch": 0.11, + "grad_norm": 0.6824253083979831, + "learning_rate": 1.9664437647372196e-05, + "loss": 0.4738, "step": 2400 }, { - "epoch": 0.14, - "grad_norm": 1.311778884841662, - "learning_rate": 1.9395511027528257e-05, - "loss": 0.653, + "epoch": 0.11, + "grad_norm": 0.34888225860389926, + "learning_rate": 1.9664055326264698e-05, + "loss": 0.1922, "step": 2401 }, { - "epoch": 0.14, - "grad_norm": 0.3134910613406851, - "learning_rate": 1.9394873672163896e-05, - "loss": 0.1867, + "epoch": 0.11, + "grad_norm": 0.39238652146208075, + "learning_rate": 1.9663672791204328e-05, + "loss": 0.2781, "step": 2402 }, { - "epoch": 0.14, - "grad_norm": 0.45026996141169107, - "learning_rate": 1.9394235991455464e-05, - "loss": 0.2147, + "epoch": 0.11, + "grad_norm": 1.471132361075159, + "learning_rate": 1.9663290042199552e-05, + "loss": 0.8546, "step": 2403 }, { - "epoch": 0.14, - "grad_norm": 0.6240035351919085, - "learning_rate": 1.9393597985425044e-05, - "loss": 0.3687, + "epoch": 0.11, + "grad_norm": 0.42277430974815894, + "learning_rate": 1.9662907079258852e-05, + "loss": 0.2392, "step": 2404 }, { - "epoch": 0.14, - "grad_norm": 1.0016231929095378, - "learning_rate": 1.939295965409473e-05, - "loss": 0.4781, + "epoch": 0.11, + "grad_norm": 0.640294752779203, + "learning_rate": 1.96625239023907e-05, + "loss": 0.3998, "step": 2405 }, { - "epoch": 0.14, - "grad_norm": 0.39450446781921794, - "learning_rate": 1.9392320997486624e-05, - "loss": 0.234, + "epoch": 0.11, + "grad_norm": 0.582639360292068, + "learning_rate": 1.9662140511603586e-05, + "loss": 0.3524, "step": 2406 }, { - "epoch": 0.14, - "grad_norm": 0.6254131653942641, - "learning_rate": 1.939168201562285e-05, - "loss": 0.3663, + "epoch": 0.11, + "grad_norm": 0.3601152110052125, + "learning_rate": 1.966175690690599e-05, + "loss": 0.1786, "step": 2407 }, { - "epoch": 0.14, - "grad_norm": 0.3195029509674968, - "learning_rate": 1.939104270852553e-05, - "loss": 0.2335, + "epoch": 0.11, + "grad_norm": 0.5731455817783268, + "learning_rate": 1.966137308830641e-05, + "loss": 0.2841, "step": 2408 }, { - "epoch": 0.14, - "grad_norm": 0.46483875879314784, - "learning_rate": 1.9390403076216805e-05, - "loss": 0.3006, + "epoch": 0.11, + "grad_norm": 0.5987671643358456, + "learning_rate": 1.9660989055813342e-05, + "loss": 0.3866, "step": 2409 }, { - "epoch": 0.14, - "grad_norm": 1.1589225599424164, - "learning_rate": 1.9389763118718824e-05, - "loss": 0.4651, + "epoch": 0.11, + "grad_norm": 0.4117176835138656, + "learning_rate": 1.9660604809435283e-05, + "loss": 0.2624, "step": 2410 }, { - "epoch": 0.14, - "grad_norm": 0.6169719370131748, - "learning_rate": 1.938912283605375e-05, - "loss": 0.3751, + "epoch": 0.11, + "grad_norm": 0.9891552063131267, + "learning_rate": 1.966022034918075e-05, + "loss": 0.5101, "step": 2411 }, { - "epoch": 0.14, - "grad_norm": 0.3554253129647085, - "learning_rate": 1.9388482228243758e-05, - "loss": 0.3024, + "epoch": 0.11, + "grad_norm": 0.5955023863126869, + "learning_rate": 1.965983567505825e-05, + "loss": 0.455, "step": 2412 }, { - "epoch": 0.14, - "grad_norm": 0.4486534251987144, - "learning_rate": 1.938784129531103e-05, - "loss": 0.2002, + "epoch": 0.11, + "grad_norm": 0.5111277897495989, + "learning_rate": 1.9659450787076293e-05, + "loss": 0.2904, "step": 2413 }, { - "epoch": 0.14, - "grad_norm": 0.47231607456050406, - "learning_rate": 1.938720003727776e-05, - "loss": 0.2697, + "epoch": 0.11, + "grad_norm": 0.3748361645902313, + "learning_rate": 1.9659065685243407e-05, + "loss": 0.2111, "step": 2414 }, { - "epoch": 0.14, - "grad_norm": 0.4328718171018966, - "learning_rate": 1.9386558454166158e-05, - "loss": 0.3028, + "epoch": 0.11, + "grad_norm": 1.0423318623867175, + "learning_rate": 1.965868036956812e-05, + "loss": 0.6038, "step": 2415 }, { - "epoch": 0.14, - "grad_norm": 0.9168616970784886, - "learning_rate": 1.938591654599844e-05, - "loss": 0.324, + "epoch": 0.11, + "grad_norm": 0.4674103147166009, + "learning_rate": 1.9658294840058954e-05, + "loss": 0.3061, "step": 2416 }, { - "epoch": 0.14, - "grad_norm": 1.0797620646008737, - "learning_rate": 1.9385274312796834e-05, - "loss": 0.6565, + "epoch": 0.11, + "grad_norm": 0.6217743661339244, + "learning_rate": 1.9657909096724452e-05, + "loss": 0.3627, "step": 2417 }, { - "epoch": 0.14, - "grad_norm": 0.400563368589376, - "learning_rate": 1.9384631754583586e-05, - "loss": 0.2876, + "epoch": 0.11, + "grad_norm": 0.876919447188364, + "learning_rate": 1.9657523139573153e-05, + "loss": 0.474, "step": 2418 }, { - "epoch": 0.14, - "grad_norm": 0.7285786800582772, - "learning_rate": 1.938398887138094e-05, - "loss": 0.2817, + "epoch": 0.11, + "grad_norm": 0.6327828274375091, + "learning_rate": 1.9657136968613594e-05, + "loss": 0.3158, "step": 2419 }, { - "epoch": 0.14, - "grad_norm": 0.2902928252182725, - "learning_rate": 1.9383345663211162e-05, - "loss": 0.2576, + "epoch": 0.11, + "grad_norm": 0.40976144372861417, + "learning_rate": 1.9656750583854333e-05, + "loss": 0.1528, "step": 2420 }, { - "epoch": 0.14, - "grad_norm": 0.3972358838624784, - "learning_rate": 1.938270213009653e-05, - "loss": 0.2703, + "epoch": 0.11, + "grad_norm": 0.5336734787318893, + "learning_rate": 1.9656363985303923e-05, + "loss": 0.363, "step": 2421 }, { - "epoch": 0.14, - "grad_norm": 0.8769939771723178, - "learning_rate": 1.9382058272059326e-05, - "loss": 0.5173, + "epoch": 0.11, + "grad_norm": 0.5466365781237489, + "learning_rate": 1.9655977172970918e-05, + "loss": 0.3013, "step": 2422 }, { - "epoch": 0.14, - "grad_norm": 0.5224940687326848, - "learning_rate": 1.9381414089121848e-05, - "loss": 0.3176, + "epoch": 0.11, + "grad_norm": 1.1839309100426945, + "learning_rate": 1.9655590146863886e-05, + "loss": 0.5106, "step": 2423 }, { - "epoch": 0.14, - "grad_norm": 0.4746503525476009, - "learning_rate": 1.9380769581306404e-05, - "loss": 0.2982, + "epoch": 0.11, + "grad_norm": 0.9057626938144441, + "learning_rate": 1.9655202906991397e-05, + "loss": 0.3574, "step": 2424 }, { - "epoch": 0.14, - "grad_norm": 0.5547203830668347, - "learning_rate": 1.9380124748635312e-05, - "loss": 0.4058, + "epoch": 0.11, + "grad_norm": 0.4935735595061597, + "learning_rate": 1.9654815453362016e-05, + "loss": 0.2755, "step": 2425 }, { - "epoch": 0.14, - "grad_norm": 0.8275991864380573, - "learning_rate": 1.9379479591130903e-05, - "loss": 0.197, + "epoch": 0.11, + "grad_norm": 0.43094582068701714, + "learning_rate": 1.9654427785984335e-05, + "loss": 0.2918, "step": 2426 }, { - "epoch": 0.14, - "grad_norm": 0.395529200272553, - "learning_rate": 1.937883410881552e-05, - "loss": 0.2868, + "epoch": 0.11, + "grad_norm": 0.3800624781621088, + "learning_rate": 1.9654039904866922e-05, + "loss": 0.1789, "step": 2427 }, { - "epoch": 0.14, - "grad_norm": 0.5053460012049817, - "learning_rate": 1.937818830171151e-05, - "loss": 0.3728, + "epoch": 0.11, + "grad_norm": 0.5765702230825885, + "learning_rate": 1.965365181001837e-05, + "loss": 0.3449, "step": 2428 }, { - "epoch": 0.14, - "grad_norm": 1.0284565975396147, - "learning_rate": 1.937754216984125e-05, - "loss": 0.5913, + "epoch": 0.11, + "grad_norm": 0.6543214592410309, + "learning_rate": 1.965326350144727e-05, + "loss": 0.4019, "step": 2429 }, { - "epoch": 0.14, - "grad_norm": 0.41865327079166226, - "learning_rate": 1.9376895713227106e-05, - "loss": 0.2927, + "epoch": 0.11, + "grad_norm": 0.49558833507167893, + "learning_rate": 1.965287497916222e-05, + "loss": 0.26, "step": 2430 }, { - "epoch": 0.14, - "grad_norm": 0.5272358314902269, - "learning_rate": 1.9376248931891463e-05, - "loss": 0.3642, + "epoch": 0.11, + "grad_norm": 0.5212071543740817, + "learning_rate": 1.9652486243171826e-05, + "loss": 0.3091, "step": 2431 }, { - "epoch": 0.14, - "grad_norm": 0.7547226733060595, - "learning_rate": 1.9375601825856724e-05, - "loss": 0.4149, + "epoch": 0.11, + "grad_norm": 0.43370427733776956, + "learning_rate": 1.9652097293484688e-05, + "loss": 0.1907, "step": 2432 }, { - "epoch": 0.14, - "grad_norm": 0.37814727594751524, - "learning_rate": 1.93749543951453e-05, - "loss": 0.2974, + "epoch": 0.11, + "grad_norm": 0.5184147397473483, + "learning_rate": 1.965170813010942e-05, + "loss": 0.3014, "step": 2433 }, { - "epoch": 0.14, - "grad_norm": 0.4377703315425029, - "learning_rate": 1.9374306639779606e-05, - "loss": 0.2508, + "epoch": 0.11, + "grad_norm": 0.5018202790728643, + "learning_rate": 1.9651318753054634e-05, + "loss": 0.3442, "step": 2434 }, { - "epoch": 0.14, - "grad_norm": 0.46933076479616304, - "learning_rate": 1.9373658559782075e-05, - "loss": 0.3411, + "epoch": 0.11, + "grad_norm": 0.8547234252386541, + "learning_rate": 1.9650929162328953e-05, + "loss": 0.5453, "step": 2435 }, { - "epoch": 0.14, - "grad_norm": 0.38251871677416266, - "learning_rate": 1.9373010155175155e-05, - "loss": 0.2416, + "epoch": 0.11, + "grad_norm": 1.4158918523416615, + "learning_rate": 1.9650539357941003e-05, + "loss": 0.7095, "step": 2436 }, { - "epoch": 0.14, - "grad_norm": 1.4471766073972423, - "learning_rate": 1.9372361425981293e-05, - "loss": 0.7063, + "epoch": 0.11, + "grad_norm": 0.4588509042023961, + "learning_rate": 1.965014933989941e-05, + "loss": 0.2181, "step": 2437 }, { - "epoch": 0.14, - "grad_norm": 0.7185286092475877, - "learning_rate": 1.937171237222296e-05, - "loss": 0.4922, + "epoch": 0.11, + "grad_norm": 0.4125769319076145, + "learning_rate": 1.9649759108212817e-05, + "loss": 0.2544, "step": 2438 }, { - "epoch": 0.14, - "grad_norm": 0.3228268085409524, - "learning_rate": 1.9371062993922627e-05, - "loss": 0.238, + "epoch": 0.11, + "grad_norm": 0.8590755874566982, + "learning_rate": 1.9649368662889852e-05, + "loss": 0.5389, "step": 2439 }, { - "epoch": 0.14, - "grad_norm": 0.3947789797377527, - "learning_rate": 1.937041329110279e-05, - "loss": 0.2659, + "epoch": 0.11, + "grad_norm": 0.454662488223262, + "learning_rate": 1.9648978003939168e-05, + "loss": 0.2847, "step": 2440 }, { - "epoch": 0.14, - "grad_norm": 0.4828332809183167, - "learning_rate": 1.9369763263785942e-05, - "loss": 0.2961, + "epoch": 0.11, + "grad_norm": 0.5428993964845165, + "learning_rate": 1.964858713136941e-05, + "loss": 0.4009, "step": 2441 }, { - "epoch": 0.14, - "grad_norm": 0.42032450024642715, - "learning_rate": 1.936911291199459e-05, - "loss": 0.1853, + "epoch": 0.11, + "grad_norm": 1.3635984648202488, + "learning_rate": 1.9648196045189233e-05, + "loss": 0.6687, "step": 2442 }, { - "epoch": 0.14, - "grad_norm": 0.42092663555747223, - "learning_rate": 1.9368462235751267e-05, - "loss": 0.3511, + "epoch": 0.11, + "grad_norm": 0.4395503969060432, + "learning_rate": 1.9647804745407296e-05, + "loss": 0.2543, "step": 2443 }, { - "epoch": 0.14, - "grad_norm": 0.8694030752192309, - "learning_rate": 1.93678112350785e-05, - "loss": 0.5347, + "epoch": 0.11, + "grad_norm": 0.5135995298295045, + "learning_rate": 1.9647413232032258e-05, + "loss": 0.316, "step": 2444 }, { - "epoch": 0.14, - "grad_norm": 0.3919234530205258, - "learning_rate": 1.936715990999883e-05, - "loss": 0.1909, + "epoch": 0.11, + "grad_norm": 0.4953730600389147, + "learning_rate": 1.964702150507279e-05, + "loss": 0.3704, "step": 2445 }, { - "epoch": 0.14, - "grad_norm": 0.43121382968926086, - "learning_rate": 1.9366508260534816e-05, - "loss": 0.2989, + "epoch": 0.11, + "grad_norm": 0.48400705566838687, + "learning_rate": 1.9646629564537565e-05, + "loss": 0.2814, "step": 2446 }, { - "epoch": 0.14, - "grad_norm": 0.41099307028572735, - "learning_rate": 1.936585628670902e-05, - "loss": 0.3394, + "epoch": 0.11, + "grad_norm": 1.566093461921978, + "learning_rate": 1.964623741043526e-05, + "loss": 0.8214, "step": 2447 }, { - "epoch": 0.14, - "grad_norm": 0.3230654577529209, - "learning_rate": 1.936520398854403e-05, - "loss": 0.2249, + "epoch": 0.11, + "grad_norm": 0.5647696547481772, + "learning_rate": 1.9645845042774555e-05, + "loss": 0.2667, "step": 2448 }, { - "epoch": 0.14, - "grad_norm": 0.5695656142368685, - "learning_rate": 1.9364551366062426e-05, - "loss": 0.3446, + "epoch": 0.11, + "grad_norm": 0.45791487647353035, + "learning_rate": 1.9645452461564135e-05, + "loss": 0.2856, "step": 2449 }, { - "epoch": 0.14, - "grad_norm": 0.7429372957735979, - "learning_rate": 1.9363898419286812e-05, - "loss": 0.5617, + "epoch": 0.11, + "grad_norm": 0.4999026052685404, + "learning_rate": 1.9645059666812695e-05, + "loss": 0.2993, "step": 2450 }, { - "epoch": 0.14, - "grad_norm": 0.5023685924135727, - "learning_rate": 1.9363245148239796e-05, - "loss": 0.3299, + "epoch": 0.11, + "grad_norm": 0.8224378440474981, + "learning_rate": 1.964466665852893e-05, + "loss": 0.5036, "step": 2451 }, { - "epoch": 0.14, - "grad_norm": 0.45158885365379514, - "learning_rate": 1.9362591552944004e-05, - "loss": 0.2682, + "epoch": 0.11, + "grad_norm": 0.5172168079855265, + "learning_rate": 1.964427343672154e-05, + "loss": 0.2956, "step": 2452 }, { - "epoch": 0.14, - "grad_norm": 0.4618495334104154, - "learning_rate": 1.9361937633422066e-05, - "loss": 0.3204, + "epoch": 0.11, + "grad_norm": 0.42987443309026996, + "learning_rate": 1.9643880001399233e-05, + "loss": 0.285, "step": 2453 }, { - "epoch": 0.14, - "grad_norm": 0.28793626450866544, - "learning_rate": 1.9361283389696637e-05, - "loss": 0.2239, + "epoch": 0.11, + "grad_norm": 0.3681416022092646, + "learning_rate": 1.964348635257072e-05, + "loss": 0.2037, "step": 2454 }, { - "epoch": 0.14, - "grad_norm": 0.4599655334941602, - "learning_rate": 1.9360628821790365e-05, - "loss": 0.3066, + "epoch": 0.11, + "grad_norm": 0.4903733064433681, + "learning_rate": 1.964309249024471e-05, + "loss": 0.301, "step": 2455 }, { - "epoch": 0.14, - "grad_norm": 1.1546352730802452, - "learning_rate": 1.9359973929725915e-05, - "loss": 0.5382, + "epoch": 0.11, + "grad_norm": 0.6079645380248424, + "learning_rate": 1.964269841442993e-05, + "loss": 0.3653, "step": 2456 }, { - "epoch": 0.14, - "grad_norm": 0.41592337368392673, - "learning_rate": 1.9359318713525974e-05, - "loss": 0.303, + "epoch": 0.11, + "grad_norm": 0.464806254709901, + "learning_rate": 1.9642304125135095e-05, + "loss": 0.3637, "step": 2457 }, { - "epoch": 0.14, - "grad_norm": 1.5380319613336388, - "learning_rate": 1.9358663173213227e-05, - "loss": 0.8324, + "epoch": 0.11, + "grad_norm": 0.5037759049866136, + "learning_rate": 1.9641909622368948e-05, + "loss": 0.3619, "step": 2458 }, { - "epoch": 0.14, - "grad_norm": 0.2911517817560607, - "learning_rate": 1.9358007308810377e-05, - "loss": 0.23, + "epoch": 0.11, + "grad_norm": 0.24591148715329916, + "learning_rate": 1.9641514906140207e-05, + "loss": 0.1636, "step": 2459 }, { - "epoch": 0.14, - "grad_norm": 0.32365945054881184, - "learning_rate": 1.9357351120340137e-05, - "loss": 0.2067, + "epoch": 0.11, + "grad_norm": 1.4557600217045132, + "learning_rate": 1.9641119976457623e-05, + "loss": 0.7008, "step": 2460 }, { - "epoch": 0.14, - "grad_norm": 0.5107713270649263, - "learning_rate": 1.935669460782523e-05, - "loss": 0.3869, + "epoch": 0.11, + "grad_norm": 0.36452873337576785, + "learning_rate": 1.9640724833329935e-05, + "loss": 0.294, "step": 2461 }, { - "epoch": 0.14, - "grad_norm": 0.5950877636646806, - "learning_rate": 1.935603777128839e-05, - "loss": 0.3651, + "epoch": 0.11, + "grad_norm": 0.583030146846671, + "learning_rate": 1.964032947676589e-05, + "loss": 0.3876, "step": 2462 }, { - "epoch": 0.14, - "grad_norm": 0.4329840077090914, - "learning_rate": 1.9355380610752364e-05, - "loss": 0.2897, + "epoch": 0.11, + "grad_norm": 0.7351575830096171, + "learning_rate": 1.9639933906774244e-05, + "loss": 0.4525, "step": 2463 }, { - "epoch": 0.14, - "grad_norm": 0.5263592276254527, - "learning_rate": 1.9354723126239913e-05, - "loss": 0.368, + "epoch": 0.11, + "grad_norm": 0.4118058482394788, + "learning_rate": 1.963953812336375e-05, + "loss": 0.2855, "step": 2464 }, { - "epoch": 0.14, - "grad_norm": 0.8971518502199158, - "learning_rate": 1.93540653177738e-05, - "loss": 0.5344, + "epoch": 0.11, + "grad_norm": 0.6267854507803734, + "learning_rate": 1.963914212654317e-05, + "loss": 0.4096, "step": 2465 }, { - "epoch": 0.14, - "grad_norm": 0.3520567138494093, - "learning_rate": 1.9353407185376805e-05, - "loss": 0.2685, + "epoch": 0.11, + "grad_norm": 0.33644446828390706, + "learning_rate": 1.9638745916321274e-05, + "loss": 0.1967, "step": 2466 }, { - "epoch": 0.14, - "grad_norm": 0.4554545967398719, - "learning_rate": 1.9352748729071727e-05, - "loss": 0.3259, + "epoch": 0.11, + "grad_norm": 0.47832127396601737, + "learning_rate": 1.963834949270684e-05, + "loss": 0.3049, "step": 2467 }, { - "epoch": 0.14, - "grad_norm": 0.4117401158899818, - "learning_rate": 1.9352089948881358e-05, - "loss": 0.2224, + "epoch": 0.11, + "grad_norm": 0.6754383493657978, + "learning_rate": 1.9637952855708634e-05, + "loss": 0.4249, "step": 2468 }, { - "epoch": 0.14, - "grad_norm": 0.4564482028613299, - "learning_rate": 1.935143084482852e-05, - "loss": 0.2908, + "epoch": 0.11, + "grad_norm": 0.4226792942667077, + "learning_rate": 1.963755600533544e-05, + "loss": 0.3167, "step": 2469 }, { - "epoch": 0.14, - "grad_norm": 0.6895418863474747, - "learning_rate": 1.935077141693603e-05, - "loss": 0.447, + "epoch": 0.11, + "grad_norm": 0.5468397253092615, + "learning_rate": 1.9637158941596045e-05, + "loss": 0.3083, "step": 2470 }, { - "epoch": 0.14, - "grad_norm": 0.5274251237370065, - "learning_rate": 1.935011166522673e-05, - "loss": 0.3838, + "epoch": 0.11, + "grad_norm": 0.6211164157736757, + "learning_rate": 1.963676166449924e-05, + "loss": 0.4443, "step": 2471 }, { - "epoch": 0.14, - "grad_norm": 0.4138804782363925, - "learning_rate": 1.9349451589723465e-05, - "loss": 0.2777, + "epoch": 0.11, + "grad_norm": 0.2598726879832464, + "learning_rate": 1.9636364174053818e-05, + "loss": 0.1899, "step": 2472 }, { - "epoch": 0.14, - "grad_norm": 0.5436885501881874, - "learning_rate": 1.9348791190449092e-05, - "loss": 0.4258, + "epoch": 0.11, + "grad_norm": 0.5140077027708775, + "learning_rate": 1.9635966470268583e-05, + "loss": 0.2845, "step": 2473 }, { - "epoch": 0.14, - "grad_norm": 0.3883095750427234, - "learning_rate": 1.9348130467426486e-05, - "loss": 0.1774, + "epoch": 0.11, + "grad_norm": 0.658779319196403, + "learning_rate": 1.9635568553152337e-05, + "loss": 0.4607, "step": 2474 }, { - "epoch": 0.14, - "grad_norm": 0.3857708543855049, - "learning_rate": 1.934746942067852e-05, - "loss": 0.2547, + "epoch": 0.11, + "grad_norm": 1.090078169678851, + "learning_rate": 1.9635170422713892e-05, + "loss": 0.6085, "step": 2475 }, { - "epoch": 0.14, - "grad_norm": 0.5736300598247335, - "learning_rate": 1.9346808050228093e-05, - "loss": 0.4197, + "epoch": 0.11, + "grad_norm": 0.47826234688342517, + "learning_rate": 1.963477207896206e-05, + "loss": 0.2426, "step": 2476 }, { - "epoch": 0.14, - "grad_norm": 0.7074214232035989, - "learning_rate": 1.9346146356098103e-05, - "loss": 0.5278, + "epoch": 0.11, + "grad_norm": 0.4548323560606868, + "learning_rate": 1.9634373521905655e-05, + "loss": 0.3307, "step": 2477 }, { - "epoch": 0.14, - "grad_norm": 0.442403841592901, - "learning_rate": 1.9345484338311467e-05, - "loss": 0.2977, + "epoch": 0.11, + "grad_norm": 0.3389298727393347, + "learning_rate": 1.963397475155351e-05, + "loss": 0.1704, "step": 2478 }, { - "epoch": 0.14, - "grad_norm": 0.4645088493036005, - "learning_rate": 1.9344821996891106e-05, - "loss": 0.3155, + "epoch": 0.11, + "grad_norm": 0.48496244887825124, + "learning_rate": 1.963357576791445e-05, + "loss": 0.2822, "step": 2479 }, { - "epoch": 0.14, - "grad_norm": 0.4166778230104967, - "learning_rate": 1.9344159331859965e-05, - "loss": 0.1801, + "epoch": 0.11, + "grad_norm": 0.5814114494301109, + "learning_rate": 1.9633176570997308e-05, + "loss": 0.4218, "step": 2480 }, { - "epoch": 0.14, - "grad_norm": 0.4899829826259577, - "learning_rate": 1.9343496343240994e-05, - "loss": 0.1517, + "epoch": 0.11, + "grad_norm": 0.5286279026544006, + "learning_rate": 1.963277716081092e-05, + "loss": 0.3772, "step": 2481 }, { - "epoch": 0.14, - "grad_norm": 0.5196827332828534, - "learning_rate": 1.9342833031057138e-05, - "loss": 0.3627, + "epoch": 0.11, + "grad_norm": 0.4247354690432872, + "learning_rate": 1.9632377537364128e-05, + "loss": 0.2448, "step": 2482 }, { - "epoch": 0.14, - "grad_norm": 0.46032261971842325, - "learning_rate": 1.934216939533138e-05, - "loss": 0.367, + "epoch": 0.11, + "grad_norm": 1.5729899262906724, + "learning_rate": 1.9631977700665784e-05, + "loss": 0.8685, "step": 2483 }, { - "epoch": 0.14, - "grad_norm": 0.8366442088496777, - "learning_rate": 1.9341505436086695e-05, - "loss": 0.441, + "epoch": 0.11, + "grad_norm": 0.42038930551682085, + "learning_rate": 1.9631577650724734e-05, + "loss": 0.3248, "step": 2484 }, { - "epoch": 0.14, - "grad_norm": 0.48325853141372255, - "learning_rate": 1.934084115334608e-05, - "loss": 0.263, + "epoch": 0.11, + "grad_norm": 0.4224808804097906, + "learning_rate": 1.9631177387549842e-05, + "loss": 0.2681, "step": 2485 }, { - "epoch": 0.14, - "grad_norm": 0.38690498341366925, - "learning_rate": 1.9340176547132536e-05, - "loss": 0.2036, + "epoch": 0.11, + "grad_norm": 0.45915120979969043, + "learning_rate": 1.9630776911149963e-05, + "loss": 0.3279, "step": 2486 }, { - "epoch": 0.14, - "grad_norm": 0.41703692608099363, - "learning_rate": 1.9339511617469082e-05, - "loss": 0.3184, + "epoch": 0.11, + "grad_norm": 1.5001476633278168, + "learning_rate": 1.9630376221533965e-05, + "loss": 0.6549, "step": 2487 }, { - "epoch": 0.14, - "grad_norm": 0.47705445064380225, - "learning_rate": 1.9338846364378742e-05, - "loss": 0.2599, + "epoch": 0.11, + "grad_norm": 0.9648544411504661, + "learning_rate": 1.962997531871072e-05, + "loss": 0.5401, "step": 2488 }, { - "epoch": 0.14, - "grad_norm": 1.186205154115369, - "learning_rate": 1.9338180787884554e-05, - "loss": 0.5164, + "epoch": 0.11, + "grad_norm": 0.3942882783530878, + "learning_rate": 1.9629574202689104e-05, + "loss": 0.2611, "step": 2489 }, { - "epoch": 0.14, - "grad_norm": 0.5865526165668318, - "learning_rate": 1.9337514888009566e-05, - "loss": 0.4188, + "epoch": 0.11, + "grad_norm": 0.6908872590412966, + "learning_rate": 1.9629172873477995e-05, + "loss": 0.4929, "step": 2490 }, { - "epoch": 0.14, - "grad_norm": 0.3801379438763642, - "learning_rate": 1.933684866477684e-05, - "loss": 0.264, + "epoch": 0.11, + "grad_norm": 0.5686480329778618, + "learning_rate": 1.962877133108628e-05, + "loss": 0.3588, "step": 2491 }, { - "epoch": 0.14, - "grad_norm": 0.2829332968836524, - "learning_rate": 1.9336182118209442e-05, - "loss": 0.192, + "epoch": 0.11, + "grad_norm": 0.3068569960292089, + "learning_rate": 1.9628369575522847e-05, + "loss": 0.1801, "step": 2492 }, { - "epoch": 0.14, - "grad_norm": 0.5463688788581329, - "learning_rate": 1.9335515248330462e-05, - "loss": 0.3456, + "epoch": 0.11, + "grad_norm": 0.5063842798640841, + "learning_rate": 1.962796760679659e-05, + "loss": 0.3587, "step": 2493 }, { - "epoch": 0.14, - "grad_norm": 0.719184449716863, - "learning_rate": 1.933484805516299e-05, - "loss": 0.3116, + "epoch": 0.11, + "grad_norm": 0.7493501625042578, + "learning_rate": 1.962756542491641e-05, + "loss": 0.4149, "step": 2494 }, { - "epoch": 0.14, - "grad_norm": 0.8530292958738765, - "learning_rate": 1.9334180538730133e-05, - "loss": 0.3951, + "epoch": 0.11, + "grad_norm": 0.43914600283490773, + "learning_rate": 1.9627163029891216e-05, + "loss": 0.2781, "step": 2495 }, { - "epoch": 0.14, - "grad_norm": 1.5021582635497452, - "learning_rate": 1.9333512699055004e-05, - "loss": 0.7942, + "epoch": 0.11, + "grad_norm": 0.6065097569979341, + "learning_rate": 1.9626760421729905e-05, + "loss": 0.4289, "step": 2496 }, { - "epoch": 0.14, - "grad_norm": 0.37553192886357234, - "learning_rate": 1.933284453616073e-05, - "loss": 0.3011, + "epoch": 0.11, + "grad_norm": 0.4798517732590873, + "learning_rate": 1.9626357600441403e-05, + "loss": 0.3514, "step": 2497 }, { - "epoch": 0.14, - "grad_norm": 0.27112912159156816, - "learning_rate": 1.9332176050070454e-05, - "loss": 0.2007, + "epoch": 0.11, + "grad_norm": 0.32827023814861495, + "learning_rate": 1.9625954566034618e-05, + "loss": 0.2118, "step": 2498 }, { - "epoch": 0.14, - "grad_norm": 0.763728328487966, - "learning_rate": 1.9331507240807322e-05, - "loss": 0.4586, + "epoch": 0.11, + "grad_norm": 0.5232440038921418, + "learning_rate": 1.9625551318518473e-05, + "loss": 0.321, "step": 2499 }, { - "epoch": 0.14, - "grad_norm": 0.6138837661580483, - "learning_rate": 1.9330838108394497e-05, - "loss": 0.327, + "epoch": 0.11, + "grad_norm": 0.55490372608103, + "learning_rate": 1.9625147857901906e-05, + "loss": 0.3896, "step": 2500 }, { - "epoch": 0.14, - "grad_norm": 1.051207841064682, - "learning_rate": 1.933016865285515e-05, - "loss": 0.438, + "epoch": 0.11, + "grad_norm": 0.405644720765052, + "learning_rate": 1.962474418419384e-05, + "loss": 0.3303, "step": 2501 }, { - "epoch": 0.14, - "grad_norm": 0.5530078844473687, - "learning_rate": 1.932949887421246e-05, - "loss": 0.3198, + "epoch": 0.11, + "grad_norm": 0.8621462924834044, + "learning_rate": 1.962434029740321e-05, + "loss": 0.4427, "step": 2502 }, { - "epoch": 0.14, - "grad_norm": 0.4045931786615891, - "learning_rate": 1.9328828772489623e-05, - "loss": 0.293, + "epoch": 0.11, + "grad_norm": 0.4898793865322952, + "learning_rate": 1.9623936197538968e-05, + "loss": 0.3012, "step": 2503 }, { - "epoch": 0.14, - "grad_norm": 0.3240936130174602, - "learning_rate": 1.932815834770985e-05, - "loss": 0.1162, + "epoch": 0.12, + "grad_norm": 0.3543885215722855, + "learning_rate": 1.962353188461005e-05, + "loss": 0.2247, "step": 2504 }, { - "epoch": 0.14, - "grad_norm": 0.37395828600806214, - "learning_rate": 1.9327487599896355e-05, - "loss": 0.2758, + "epoch": 0.12, + "grad_norm": 0.48334954248837375, + "learning_rate": 1.962312735862541e-05, + "loss": 0.3256, "step": 2505 }, { - "epoch": 0.14, - "grad_norm": 0.4175861001760937, - "learning_rate": 1.9326816529072364e-05, - "loss": 0.3197, + "epoch": 0.12, + "grad_norm": 1.0627911535540613, + "learning_rate": 1.962272261959401e-05, + "loss": 0.5518, "step": 2506 }, { - "epoch": 0.14, - "grad_norm": 0.7256771592836744, - "learning_rate": 1.932614513526112e-05, - "loss": 0.3772, + "epoch": 0.12, + "grad_norm": 0.47535627422165594, + "learning_rate": 1.9622317667524805e-05, + "loss": 0.3293, "step": 2507 }, { - "epoch": 0.14, - "grad_norm": 0.6586431832270313, - "learning_rate": 1.932547341848587e-05, - "loss": 0.4013, + "epoch": 0.12, + "grad_norm": 0.4857915805759971, + "learning_rate": 1.962191250242676e-05, + "loss": 0.3401, "step": 2508 }, { - "epoch": 0.14, - "grad_norm": 0.5348147230679424, - "learning_rate": 1.932480137876988e-05, - "loss": 0.3208, + "epoch": 0.12, + "grad_norm": 0.9186063961948101, + "learning_rate": 1.9621507124308845e-05, + "loss": 0.4586, "step": 2509 }, { - "epoch": 0.14, - "grad_norm": 0.39809772127850734, - "learning_rate": 1.9324129016136414e-05, - "loss": 0.265, + "epoch": 0.12, + "grad_norm": 0.376313178768367, + "learning_rate": 1.9621101533180034e-05, + "loss": 0.2648, "step": 2510 }, { - "epoch": 0.14, - "grad_norm": 0.29378693207938744, - "learning_rate": 1.9323456330608762e-05, - "loss": 0.2083, + "epoch": 0.12, + "grad_norm": 0.3017447138594228, + "learning_rate": 1.9620695729049314e-05, + "loss": 0.1271, "step": 2511 }, { - "epoch": 0.14, - "grad_norm": 0.41811342753632974, - "learning_rate": 1.932278332221022e-05, - "loss": 0.3463, + "epoch": 0.12, + "grad_norm": 0.5160262999333746, + "learning_rate": 1.962028971192566e-05, + "loss": 0.3481, "step": 2512 }, { - "epoch": 0.14, - "grad_norm": 0.7113376176543778, - "learning_rate": 1.932210999096409e-05, - "loss": 0.5525, + "epoch": 0.12, + "grad_norm": 0.4030337475232264, + "learning_rate": 1.961988348181806e-05, + "loss": 0.3389, "step": 2513 }, { - "epoch": 0.14, - "grad_norm": 0.4728687902245757, - "learning_rate": 1.9321436336893693e-05, - "loss": 0.2821, + "epoch": 0.12, + "grad_norm": 0.7817185212644212, + "learning_rate": 1.9619477038735516e-05, + "loss": 0.5071, "step": 2514 }, { - "epoch": 0.14, - "grad_norm": 0.3785533771206013, - "learning_rate": 1.9320762360022354e-05, - "loss": 0.2939, + "epoch": 0.12, + "grad_norm": 0.515840366138476, + "learning_rate": 1.9619070382687017e-05, + "loss": 0.2076, "step": 2515 }, { - "epoch": 0.14, - "grad_norm": 0.4423604779271077, - "learning_rate": 1.932008806037342e-05, - "loss": 0.2852, + "epoch": 0.12, + "grad_norm": 0.37041109243283304, + "learning_rate": 1.9618663513681574e-05, + "loss": 0.2584, "step": 2516 }, { - "epoch": 0.14, - "grad_norm": 0.34758961266810223, - "learning_rate": 1.9319413437970236e-05, - "loss": 0.2658, + "epoch": 0.12, + "grad_norm": 0.4094582097499317, + "learning_rate": 1.961825643172819e-05, + "loss": 0.2986, "step": 2517 }, { - "epoch": 0.14, - "grad_norm": 0.40220948422232616, - "learning_rate": 1.9318738492836165e-05, - "loss": 0.2915, + "epoch": 0.12, + "grad_norm": 0.6615393808111882, + "learning_rate": 1.9617849136835883e-05, + "loss": 0.2989, "step": 2518 }, { - "epoch": 0.14, - "grad_norm": 0.515572698759552, - "learning_rate": 1.931806322499458e-05, - "loss": 0.4523, + "epoch": 0.12, + "grad_norm": 0.5266020916942835, + "learning_rate": 1.961744162901366e-05, + "loss": 0.3427, "step": 2519 }, { - "epoch": 0.14, - "grad_norm": 0.5901676572616871, - "learning_rate": 1.9317387634468867e-05, - "loss": 0.2373, + "epoch": 0.12, + "grad_norm": 0.5857525692361484, + "learning_rate": 1.9617033908270552e-05, + "loss": 0.4278, "step": 2520 }, { - "epoch": 0.14, - "grad_norm": 0.42464998973281115, - "learning_rate": 1.9316711721282423e-05, - "loss": 0.3158, + "epoch": 0.12, + "grad_norm": 0.455309869545046, + "learning_rate": 1.9616625974615584e-05, + "loss": 0.2621, "step": 2521 }, { - "epoch": 0.14, - "grad_norm": 0.5342049535459824, - "learning_rate": 1.931603548545865e-05, - "loss": 0.3712, + "epoch": 0.12, + "grad_norm": 0.4708513817946174, + "learning_rate": 1.9616217828057783e-05, + "loss": 0.3574, "step": 2522 }, { - "epoch": 0.14, - "grad_norm": 0.3737853946547916, - "learning_rate": 1.9315358927020965e-05, - "loss": 0.3404, + "epoch": 0.12, + "grad_norm": 0.4112295799455282, + "learning_rate": 1.961580946860619e-05, + "loss": 0.2479, "step": 2523 }, { - "epoch": 0.15, - "grad_norm": 0.28710083849023404, - "learning_rate": 1.931468204599281e-05, - "loss": 0.2055, + "epoch": 0.12, + "grad_norm": 0.5126351633448597, + "learning_rate": 1.961540089626984e-05, + "loss": 0.3039, "step": 2524 }, { - "epoch": 0.15, - "grad_norm": 1.221810801453039, - "learning_rate": 1.931400484239761e-05, - "loss": 0.8419, + "epoch": 0.12, + "grad_norm": 0.6655348146016421, + "learning_rate": 1.9614992111057782e-05, + "loss": 0.3612, "step": 2525 }, { - "epoch": 0.15, - "grad_norm": 0.4907234180429648, - "learning_rate": 1.931332731625882e-05, - "loss": 0.3614, + "epoch": 0.12, + "grad_norm": 1.0867455288399606, + "learning_rate": 1.9614583112979068e-05, + "loss": 0.5969, "step": 2526 }, { - "epoch": 0.15, - "grad_norm": 0.37691010597331226, - "learning_rate": 1.931264946759991e-05, - "loss": 0.2088, + "epoch": 0.12, + "grad_norm": 1.7758515189825195, + "learning_rate": 1.961417390204275e-05, + "loss": 0.7333, "step": 2527 }, { - "epoch": 0.15, - "grad_norm": 0.7873124730767418, - "learning_rate": 1.931197129644435e-05, - "loss": 0.5543, + "epoch": 0.12, + "grad_norm": 0.33919878708268514, + "learning_rate": 1.9613764478257885e-05, + "loss": 0.2285, "step": 2528 }, { - "epoch": 0.15, - "grad_norm": 0.5526920490936016, - "learning_rate": 1.9311292802815626e-05, - "loss": 0.4587, + "epoch": 0.12, + "grad_norm": 0.44066905360494996, + "learning_rate": 1.9613354841633544e-05, + "loss": 0.2939, "step": 2529 }, { - "epoch": 0.15, - "grad_norm": 0.28601007260420575, - "learning_rate": 1.931061398673723e-05, - "loss": 0.1858, + "epoch": 0.12, + "grad_norm": 1.5003467422606467, + "learning_rate": 1.9612944992178792e-05, + "loss": 0.5441, "step": 2530 }, { - "epoch": 0.15, - "grad_norm": 0.4887404163838093, - "learning_rate": 1.930993484823267e-05, - "loss": 0.3604, + "epoch": 0.12, + "grad_norm": 0.5355212530017303, + "learning_rate": 1.9612534929902702e-05, + "loss": 0.2687, "step": 2531 }, { - "epoch": 0.15, - "grad_norm": 0.5338039650083546, - "learning_rate": 1.9309255387325468e-05, - "loss": 0.3452, + "epoch": 0.12, + "grad_norm": 0.4063100131457627, + "learning_rate": 1.961212465481435e-05, + "loss": 0.2885, "step": 2532 }, { - "epoch": 0.15, - "grad_norm": 0.34880351685054517, - "learning_rate": 1.9308575604039156e-05, - "loss": 0.2113, + "epoch": 0.12, + "grad_norm": 0.5124389903296207, + "learning_rate": 1.9611714166922827e-05, + "loss": 0.3705, "step": 2533 }, { - "epoch": 0.15, - "grad_norm": 0.50223678765854, - "learning_rate": 1.9307895498397268e-05, - "loss": 0.3496, + "epoch": 0.12, + "grad_norm": 0.4689745929162062, + "learning_rate": 1.961130346623722e-05, + "loss": 0.2565, "step": 2534 }, { - "epoch": 0.15, - "grad_norm": 1.0383332882780774, - "learning_rate": 1.9307215070423354e-05, - "loss": 0.5223, + "epoch": 0.12, + "grad_norm": 0.6562092988879855, + "learning_rate": 1.9610892552766607e-05, + "loss": 0.3119, "step": 2535 }, { - "epoch": 0.15, - "grad_norm": 0.40725782340222744, - "learning_rate": 1.930653432014099e-05, - "loss": 0.3043, + "epoch": 0.12, + "grad_norm": 0.5583476063000099, + "learning_rate": 1.9610481426520103e-05, + "loss": 0.3636, "step": 2536 }, { - "epoch": 0.15, - "grad_norm": 1.0892283717210414, - "learning_rate": 1.930585324757374e-05, - "loss": 0.5579, + "epoch": 0.12, + "grad_norm": 0.501664987497338, + "learning_rate": 1.96100700875068e-05, + "loss": 0.284, "step": 2537 }, { - "epoch": 0.15, - "grad_norm": 0.3285256564934091, - "learning_rate": 1.9305171852745192e-05, - "loss": 0.2542, + "epoch": 0.12, + "grad_norm": 0.4380259118738081, + "learning_rate": 1.960965853573581e-05, + "loss": 0.264, "step": 2538 }, { - "epoch": 0.15, - "grad_norm": 0.3941296085844982, - "learning_rate": 1.9304490135678944e-05, - "loss": 0.2776, + "epoch": 0.12, + "grad_norm": 0.9151286481633896, + "learning_rate": 1.9609246771216242e-05, + "loss": 0.4504, "step": 2539 }, { - "epoch": 0.15, - "grad_norm": 0.8550640205170302, - "learning_rate": 1.9303808096398602e-05, - "loss": 0.4352, + "epoch": 0.12, + "grad_norm": 0.4851332386356835, + "learning_rate": 1.960883479395721e-05, + "loss": 0.3185, "step": 2540 }, { - "epoch": 0.15, - "grad_norm": 0.7635534341325637, - "learning_rate": 1.9303125734927784e-05, - "loss": 0.5426, + "epoch": 0.12, + "grad_norm": 0.42442989030626066, + "learning_rate": 1.9608422603967838e-05, + "loss": 0.3289, "step": 2541 }, { - "epoch": 0.15, - "grad_norm": 0.3402636225143411, - "learning_rate": 1.9302443051290126e-05, - "loss": 0.2776, + "epoch": 0.12, + "grad_norm": 0.9241926229256986, + "learning_rate": 1.9608010201257247e-05, + "loss": 0.557, "step": 2542 }, { - "epoch": 0.15, - "grad_norm": 0.6774987084654007, - "learning_rate": 1.930176004550926e-05, - "loss": 0.3408, + "epoch": 0.12, + "grad_norm": 0.44366428315734924, + "learning_rate": 1.9607597585834573e-05, + "loss": 0.2982, "step": 2543 }, { - "epoch": 0.15, - "grad_norm": 0.33787908931664956, - "learning_rate": 1.930107671760885e-05, - "loss": 0.2045, + "epoch": 0.12, + "grad_norm": 0.3209855852335516, + "learning_rate": 1.9607184757708953e-05, + "loss": 0.194, "step": 2544 }, { - "epoch": 0.15, - "grad_norm": 0.47972956125767385, - "learning_rate": 1.9300393067612545e-05, - "loss": 0.2875, + "epoch": 0.12, + "grad_norm": 1.0496640620970652, + "learning_rate": 1.9606771716889517e-05, + "loss": 0.6465, "step": 2545 }, { - "epoch": 0.15, - "grad_norm": 0.4447447023685629, - "learning_rate": 1.9299709095544035e-05, - "loss": 0.3332, + "epoch": 0.12, + "grad_norm": 0.41262532035728755, + "learning_rate": 1.9606358463385414e-05, + "loss": 0.3153, "step": 2546 }, { - "epoch": 0.15, - "grad_norm": 0.9395633232005106, - "learning_rate": 1.9299024801426994e-05, - "loss": 0.4455, + "epoch": 0.12, + "grad_norm": 0.9805713474863744, + "learning_rate": 1.9605944997205795e-05, + "loss": 0.4389, "step": 2547 }, { - "epoch": 0.15, - "grad_norm": 0.4374552033580974, - "learning_rate": 1.9298340185285127e-05, - "loss": 0.2683, + "epoch": 0.12, + "grad_norm": 0.44107125206284997, + "learning_rate": 1.9605531318359812e-05, + "loss": 0.3506, "step": 2548 }, { - "epoch": 0.15, - "grad_norm": 0.6582182316770087, - "learning_rate": 1.929765524714214e-05, - "loss": 0.4531, + "epoch": 0.12, + "grad_norm": 0.4272222145185835, + "learning_rate": 1.9605117426856622e-05, + "loss": 0.2971, "step": 2549 }, { - "epoch": 0.15, - "grad_norm": 0.3174736595811298, - "learning_rate": 1.929696998702175e-05, - "loss": 0.1964, + "epoch": 0.12, + "grad_norm": 0.5466258017698202, + "learning_rate": 1.960470332270539e-05, + "loss": 0.2807, "step": 2550 }, { - "epoch": 0.15, - "grad_norm": 0.40811572548117436, - "learning_rate": 1.929628440494769e-05, - "loss": 0.2882, + "epoch": 0.12, + "grad_norm": 0.410555547951116, + "learning_rate": 1.9604289005915286e-05, + "loss": 0.2192, "step": 2551 }, { - "epoch": 0.15, - "grad_norm": 0.8862066628895517, - "learning_rate": 1.92955985009437e-05, - "loss": 0.4916, + "epoch": 0.12, + "grad_norm": 0.3710278484686862, + "learning_rate": 1.960387447649548e-05, + "loss": 0.2864, "step": 2552 }, { - "epoch": 0.15, - "grad_norm": 1.0030933658275236, - "learning_rate": 1.9294912275033533e-05, - "loss": 0.3916, + "epoch": 0.12, + "grad_norm": 1.1449905899715322, + "learning_rate": 1.9603459734455147e-05, + "loss": 0.5276, "step": 2553 }, { - "epoch": 0.15, - "grad_norm": 0.4245887044072022, - "learning_rate": 1.9294225727240958e-05, - "loss": 0.2804, + "epoch": 0.12, + "grad_norm": 0.7578743666263131, + "learning_rate": 1.9603044779803474e-05, + "loss": 0.3884, "step": 2554 }, { - "epoch": 0.15, - "grad_norm": 0.5604093210603397, - "learning_rate": 1.9293538857589737e-05, - "loss": 0.4478, + "epoch": 0.12, + "grad_norm": 0.5241030932793594, + "learning_rate": 1.9602629612549643e-05, + "loss": 0.3063, "step": 2555 }, { - "epoch": 0.15, - "grad_norm": 0.22753638244231286, - "learning_rate": 1.9292851666103673e-05, - "loss": 0.1424, + "epoch": 0.12, + "grad_norm": 0.3879330306472563, + "learning_rate": 1.9602214232702846e-05, + "loss": 0.2895, "step": 2556 }, { - "epoch": 0.15, - "grad_norm": 0.393215750130867, - "learning_rate": 1.9292164152806553e-05, - "loss": 0.321, + "epoch": 0.12, + "grad_norm": 0.3773987632788053, + "learning_rate": 1.9601798640272283e-05, + "loss": 0.119, "step": 2557 }, { - "epoch": 0.15, - "grad_norm": 0.42867114089154623, - "learning_rate": 1.9291476317722185e-05, - "loss": 0.3651, + "epoch": 0.12, + "grad_norm": 0.48224869475081095, + "learning_rate": 1.9601382835267154e-05, + "loss": 0.314, "step": 2558 }, { - "epoch": 0.15, - "grad_norm": 1.0719463340093578, - "learning_rate": 1.9290788160874395e-05, - "loss": 0.5735, + "epoch": 0.12, + "grad_norm": 1.4454864527126023, + "learning_rate": 1.9600966817696657e-05, + "loss": 0.5681, "step": 2559 }, { - "epoch": 0.15, - "grad_norm": 0.42211388783214765, - "learning_rate": 1.9290099682287012e-05, - "loss": 0.2557, + "epoch": 0.12, + "grad_norm": 0.605761703630391, + "learning_rate": 1.9600550587570015e-05, + "loss": 0.2952, "step": 2560 }, { - "epoch": 0.15, - "grad_norm": 1.346065145365312, - "learning_rate": 1.928941088198387e-05, - "loss": 0.877, + "epoch": 0.12, + "grad_norm": 0.463168681266348, + "learning_rate": 1.9600134144896433e-05, + "loss": 0.2902, "step": 2561 }, { - "epoch": 0.15, - "grad_norm": 0.4098509316745687, - "learning_rate": 1.9288721759988834e-05, - "loss": 0.372, + "epoch": 0.12, + "grad_norm": 0.3851500863074438, + "learning_rate": 1.9599717489685134e-05, + "loss": 0.201, "step": 2562 }, { - "epoch": 0.15, - "grad_norm": 0.42484258362884286, - "learning_rate": 1.9288032316325762e-05, - "loss": 0.2418, + "epoch": 0.12, + "grad_norm": 1.0509635302282927, + "learning_rate": 1.959930062194534e-05, + "loss": 0.612, "step": 2563 }, { - "epoch": 0.15, - "grad_norm": 0.35294191462631713, - "learning_rate": 1.928734255101853e-05, - "loss": 0.2576, + "epoch": 0.12, + "grad_norm": 0.5313588202115289, + "learning_rate": 1.9598883541686287e-05, + "loss": 0.2639, "step": 2564 }, { - "epoch": 0.15, - "grad_norm": 0.5300039653477053, - "learning_rate": 1.9286652464091024e-05, - "loss": 0.4365, + "epoch": 0.12, + "grad_norm": 0.8251710814771125, + "learning_rate": 1.9598466248917202e-05, + "loss": 0.5301, "step": 2565 }, { - "epoch": 0.15, - "grad_norm": 0.36024518354461466, - "learning_rate": 1.9285962055567144e-05, - "loss": 0.2375, + "epoch": 0.12, + "grad_norm": 1.5995774823438609, + "learning_rate": 1.9598048743647323e-05, + "loss": 0.8728, "step": 2566 }, { - "epoch": 0.15, - "grad_norm": 0.676422785771166, - "learning_rate": 1.9285271325470794e-05, - "loss": 0.4694, + "epoch": 0.12, + "grad_norm": 0.4540988239175763, + "learning_rate": 1.9597631025885898e-05, + "loss": 0.2303, "step": 2567 }, { - "epoch": 0.15, - "grad_norm": 0.8126513636201301, - "learning_rate": 1.92845802738259e-05, - "loss": 0.5042, + "epoch": 0.12, + "grad_norm": 0.47480477527524195, + "learning_rate": 1.959721309564217e-05, + "loss": 0.3462, "step": 2568 }, { - "epoch": 0.15, - "grad_norm": 0.4114429506728939, - "learning_rate": 1.928388890065639e-05, - "loss": 0.2532, + "epoch": 0.12, + "grad_norm": 0.40273697247110785, + "learning_rate": 1.9596794952925397e-05, + "loss": 0.2094, "step": 2569 }, { - "epoch": 0.15, - "grad_norm": 0.3213342473563277, - "learning_rate": 1.928319720598621e-05, - "loss": 0.27, + "epoch": 0.12, + "grad_norm": 0.4384801286881789, + "learning_rate": 1.959637659774483e-05, + "loss": 0.2837, "step": 2570 }, { - "epoch": 0.15, - "grad_norm": 0.40903951950701906, - "learning_rate": 1.9282505189839305e-05, - "loss": 0.2594, + "epoch": 0.12, + "grad_norm": 1.0485722826765629, + "learning_rate": 1.9595958030109736e-05, + "loss": 0.58, "step": 2571 }, { - "epoch": 0.15, - "grad_norm": 0.44270353797659523, - "learning_rate": 1.9281812852239647e-05, - "loss": 0.3207, + "epoch": 0.12, + "grad_norm": 0.5660588269828383, + "learning_rate": 1.959553925002938e-05, + "loss": 0.3428, "step": 2572 }, { - "epoch": 0.15, - "grad_norm": 0.43969504549066846, - "learning_rate": 1.928112019321121e-05, - "loss": 0.297, + "epoch": 0.12, + "grad_norm": 0.45685738686334393, + "learning_rate": 1.9595120257513035e-05, + "loss": 0.2092, "step": 2573 }, { - "epoch": 0.15, - "grad_norm": 0.6514459661898435, - "learning_rate": 1.9280427212777976e-05, - "loss": 0.4354, + "epoch": 0.12, + "grad_norm": 0.47452120488728067, + "learning_rate": 1.959470105256997e-05, + "loss": 0.2755, "step": 2574 }, { - "epoch": 0.15, - "grad_norm": 0.5278709615714426, - "learning_rate": 1.927973391096395e-05, - "loss": 0.334, - "step": 2575 + "epoch": 0.12, + "grad_norm": 0.48581645087313585, + "learning_rate": 1.9594281635209476e-05, + "loss": 0.3543, + "step": 2575 }, { - "epoch": 0.15, - "grad_norm": 0.3225729087069408, - "learning_rate": 1.9279040287793138e-05, - "loss": 0.1843, + "epoch": 0.12, + "grad_norm": 0.593662076779592, + "learning_rate": 1.9593862005440836e-05, + "loss": 0.3469, "step": 2576 }, { - "epoch": 0.15, - "grad_norm": 0.38192438331171524, - "learning_rate": 1.9278346343289555e-05, - "loss": 0.285, + "epoch": 0.12, + "grad_norm": 0.4431907716001137, + "learning_rate": 1.959344216327333e-05, + "loss": 0.3146, "step": 2577 }, { - "epoch": 0.15, - "grad_norm": 0.4238320027450154, - "learning_rate": 1.9277652077477242e-05, - "loss": 0.3222, + "epoch": 0.12, + "grad_norm": 1.748148954253344, + "learning_rate": 1.959302210871627e-05, + "loss": 0.8162, "step": 2578 }, { - "epoch": 0.15, - "grad_norm": 0.5692015716239939, - "learning_rate": 1.9276957490380236e-05, - "loss": 0.3179, + "epoch": 0.12, + "grad_norm": 0.4822448004821637, + "learning_rate": 1.959260184177894e-05, + "loss": 0.2998, "step": 2579 }, { - "epoch": 0.15, - "grad_norm": 0.6989023862803406, - "learning_rate": 1.927626258202259e-05, - "loss": 0.4818, + "epoch": 0.12, + "grad_norm": 0.418449652833715, + "learning_rate": 1.9592181362470653e-05, + "loss": 0.2845, "step": 2580 }, { - "epoch": 0.15, - "grad_norm": 0.47851777806709617, - "learning_rate": 1.927556735242837e-05, - "loss": 0.3246, + "epoch": 0.12, + "grad_norm": 0.5364238361570387, + "learning_rate": 1.9591760670800714e-05, + "loss": 0.3368, "step": 2581 }, { - "epoch": 0.15, - "grad_norm": 0.37801509190886534, - "learning_rate": 1.9274871801621652e-05, - "loss": 0.2653, + "epoch": 0.12, + "grad_norm": 0.5334928936842943, + "learning_rate": 1.959133976677844e-05, + "loss": 0.3643, "step": 2582 }, { - "epoch": 0.15, - "grad_norm": 0.31902918198499913, - "learning_rate": 1.927417592962652e-05, - "loss": 0.2124, + "epoch": 0.12, + "grad_norm": 0.35874488141141914, + "learning_rate": 1.9590918650413146e-05, + "loss": 0.1847, "step": 2583 }, { - "epoch": 0.15, - "grad_norm": 0.5711538165034585, - "learning_rate": 1.9273479736467077e-05, - "loss": 0.3241, + "epoch": 0.12, + "grad_norm": 0.510757997443261, + "learning_rate": 1.959049732171416e-05, + "loss": 0.3629, "step": 2584 }, { - "epoch": 0.15, - "grad_norm": 0.48424088195332443, - "learning_rate": 1.9272783222167424e-05, - "loss": 0.3557, + "epoch": 0.12, + "grad_norm": 0.41799222287937543, + "learning_rate": 1.9590075780690805e-05, + "loss": 0.3048, "step": 2585 }, { - "epoch": 0.15, - "grad_norm": 0.5128003089466692, - "learning_rate": 1.9272086386751693e-05, - "loss": 0.3375, + "epoch": 0.12, + "grad_norm": 1.0725572883415753, + "learning_rate": 1.9589654027352412e-05, + "loss": 0.3993, "step": 2586 }, { - "epoch": 0.15, - "grad_norm": 0.5824582826960971, - "learning_rate": 1.927138923024401e-05, - "loss": 0.3402, + "epoch": 0.12, + "grad_norm": 0.5957783600453085, + "learning_rate": 1.9589232061708325e-05, + "loss": 0.4507, "step": 2587 }, { - "epoch": 0.15, - "grad_norm": 0.3477545206134362, - "learning_rate": 1.927069175266851e-05, - "loss": 0.2719, + "epoch": 0.12, + "grad_norm": 0.4736260109350406, + "learning_rate": 1.9588809883767884e-05, + "loss": 0.2914, "step": 2588 }, { - "epoch": 0.15, - "grad_norm": 0.35480404863309206, - "learning_rate": 1.926999395404936e-05, - "loss": 0.1912, + "epoch": 0.12, + "grad_norm": 0.3897620682324087, + "learning_rate": 1.958838749354043e-05, + "loss": 0.1907, "step": 2589 }, { - "epoch": 0.15, - "grad_norm": 0.4088392855070457, - "learning_rate": 1.9269295834410715e-05, - "loss": 0.3091, + "epoch": 0.12, + "grad_norm": 1.8147010221046953, + "learning_rate": 1.958796489103532e-05, + "loss": 0.6194, "step": 2590 }, { - "epoch": 0.15, - "grad_norm": 0.4128220609130996, - "learning_rate": 1.9268597393776753e-05, - "loss": 0.3596, + "epoch": 0.12, + "grad_norm": 0.6789291023220007, + "learning_rate": 1.958754207626191e-05, + "loss": 0.3645, "step": 2591 }, { - "epoch": 0.15, - "grad_norm": 0.6523103375473858, - "learning_rate": 1.9267898632171663e-05, - "loss": 0.4379, + "epoch": 0.12, + "grad_norm": 0.4735617288983153, + "learning_rate": 1.9587119049229558e-05, + "loss": 0.3079, "step": 2592 }, { - "epoch": 0.15, - "grad_norm": 0.42876866445114575, - "learning_rate": 1.9267199549619643e-05, - "loss": 0.2967, + "epoch": 0.12, + "grad_norm": 0.9069992616130493, + "learning_rate": 1.958669580994763e-05, + "loss": 0.45, "step": 2593 }, { - "epoch": 0.15, - "grad_norm": 0.4201511676991754, - "learning_rate": 1.92665001461449e-05, - "loss": 0.3146, + "epoch": 0.12, + "grad_norm": 0.3858744376084387, + "learning_rate": 1.9586272358425494e-05, + "loss": 0.1982, "step": 2594 }, { - "epoch": 0.15, - "grad_norm": 0.23291446601377253, - "learning_rate": 1.9265800421771655e-05, - "loss": 0.1366, + "epoch": 0.12, + "grad_norm": 0.4004544868787489, + "learning_rate": 1.9585848694672533e-05, + "loss": 0.2823, "step": 2595 }, { - "epoch": 0.15, - "grad_norm": 0.43171545122840194, - "learning_rate": 1.926510037652414e-05, - "loss": 0.3457, + "epoch": 0.12, + "grad_norm": 0.5412561975312115, + "learning_rate": 1.958542481869812e-05, + "loss": 0.3143, "step": 2596 }, { - "epoch": 0.15, - "grad_norm": 0.5129067503078448, - "learning_rate": 1.9264400010426598e-05, - "loss": 0.4012, + "epoch": 0.12, + "grad_norm": 0.5197965944717913, + "learning_rate": 1.9585000730511635e-05, + "loss": 0.3114, "step": 2597 }, { - "epoch": 0.15, - "grad_norm": 0.41831673800447344, - "learning_rate": 1.926369932350328e-05, - "loss": 0.3836, + "epoch": 0.12, + "grad_norm": 0.590601159444078, + "learning_rate": 1.9584576430122473e-05, + "loss": 0.4247, "step": 2598 }, { - "epoch": 0.15, - "grad_norm": 0.30934776416133264, - "learning_rate": 1.9262998315778453e-05, - "loss": 0.1444, + "epoch": 0.12, + "grad_norm": 0.5076060346432055, + "learning_rate": 1.958415191754003e-05, + "loss": 0.3656, "step": 2599 }, { - "epoch": 0.15, - "grad_norm": 0.46632659562332673, - "learning_rate": 1.9262296987276395e-05, - "loss": 0.348, + "epoch": 0.12, + "grad_norm": 0.39928875463415014, + "learning_rate": 1.9583727192773698e-05, + "loss": 0.2966, "step": 2600 }, { - "epoch": 0.15, - "grad_norm": 0.27229649864445243, - "learning_rate": 1.9261595338021388e-05, - "loss": 0.2331, + "epoch": 0.12, + "grad_norm": 0.3452853931989869, + "learning_rate": 1.9583302255832883e-05, + "loss": 0.2325, "step": 2601 }, { - "epoch": 0.15, - "grad_norm": 0.40189885147596294, - "learning_rate": 1.9260893368037735e-05, - "loss": 0.1689, + "epoch": 0.12, + "grad_norm": 1.5265472210689686, + "learning_rate": 1.9582877106726994e-05, + "loss": 0.8948, "step": 2602 }, { - "epoch": 0.15, - "grad_norm": 0.41329355608342555, - "learning_rate": 1.9260191077349742e-05, - "loss": 0.3672, + "epoch": 0.12, + "grad_norm": 0.44827496586425275, + "learning_rate": 1.9582451745465444e-05, + "loss": 0.2731, "step": 2603 }, { - "epoch": 0.15, - "grad_norm": 0.6931020159640395, - "learning_rate": 1.925948846598173e-05, - "loss": 0.5568, + "epoch": 0.12, + "grad_norm": 0.46652685291081547, + "learning_rate": 1.9582026172057644e-05, + "loss": 0.3243, "step": 2604 }, { - "epoch": 0.15, - "grad_norm": 0.3821818839566917, - "learning_rate": 1.925878553395803e-05, - "loss": 0.2077, + "epoch": 0.12, + "grad_norm": 0.8103679080665189, + "learning_rate": 1.9581600386513022e-05, + "loss": 0.5497, "step": 2605 }, { - "epoch": 0.15, - "grad_norm": 0.36378068853839796, - "learning_rate": 1.9258082281302988e-05, - "loss": 0.3239, + "epoch": 0.12, + "grad_norm": 0.418313476703401, + "learning_rate": 1.9581174388841002e-05, + "loss": 0.2371, "step": 2606 }, { - "epoch": 0.15, - "grad_norm": 0.3489085591030622, - "learning_rate": 1.925737870804095e-05, - "loss": 0.1991, + "epoch": 0.12, + "grad_norm": 0.4520783504053943, + "learning_rate": 1.9580748179051013e-05, + "loss": 0.2523, "step": 2607 }, { - "epoch": 0.15, - "grad_norm": 0.4365373846944763, - "learning_rate": 1.925667481419629e-05, - "loss": 0.3204, + "epoch": 0.12, + "grad_norm": 0.5158814182806029, + "learning_rate": 1.95803217571525e-05, + "loss": 0.389, "step": 2608 }, { - "epoch": 0.15, - "grad_norm": 0.37061176255326356, - "learning_rate": 1.9255970599793376e-05, - "loss": 0.3358, + "epoch": 0.12, + "grad_norm": 0.5308513677170296, + "learning_rate": 1.957989512315489e-05, + "loss": 0.2663, "step": 2609 }, { - "epoch": 0.15, - "grad_norm": 0.5448112276722097, - "learning_rate": 1.92552660648566e-05, - "loss": 0.4135, + "epoch": 0.12, + "grad_norm": 0.4817956087165091, + "learning_rate": 1.957946827706764e-05, + "loss": 0.4003, "step": 2610 }, { - "epoch": 0.15, - "grad_norm": 0.7625349028762688, - "learning_rate": 1.925456120941036e-05, - "loss": 0.4468, + "epoch": 0.12, + "grad_norm": 0.4443214604983722, + "learning_rate": 1.957904121890019e-05, + "loss": 0.3561, "step": 2611 }, { - "epoch": 0.15, - "grad_norm": 0.3891806726343858, - "learning_rate": 1.925385603347906e-05, - "loss": 0.2244, + "epoch": 0.12, + "grad_norm": 0.4591132937153668, + "learning_rate": 1.9578613948662005e-05, + "loss": 0.1495, "step": 2612 }, { - "epoch": 0.15, - "grad_norm": 0.3134500837415368, - "learning_rate": 1.9253150537087123e-05, - "loss": 0.2471, + "epoch": 0.12, + "grad_norm": 0.3845310937869779, + "learning_rate": 1.9578186466362538e-05, + "loss": 0.2947, "step": 2613 }, { - "epoch": 0.15, - "grad_norm": 0.6358650633518437, - "learning_rate": 1.9252444720258982e-05, - "loss": 0.3955, + "epoch": 0.12, + "grad_norm": 0.4844498422257844, + "learning_rate": 1.9577758772011252e-05, + "loss": 0.3465, "step": 2614 }, { - "epoch": 0.15, - "grad_norm": 0.4146219135777843, - "learning_rate": 1.925173858301908e-05, - "loss": 0.3018, + "epoch": 0.12, + "grad_norm": 0.5119467988951399, + "learning_rate": 1.9577330865617618e-05, + "loss": 0.341, "step": 2615 }, { - "epoch": 0.15, - "grad_norm": 0.5250021533099688, - "learning_rate": 1.9251032125391867e-05, - "loss": 0.3568, + "epoch": 0.12, + "grad_norm": 0.35294840394706045, + "learning_rate": 1.957690274719111e-05, + "loss": 0.2953, "step": 2616 }, { - "epoch": 0.15, - "grad_norm": 0.3748774238993723, - "learning_rate": 1.925032534740181e-05, - "loss": 0.3043, + "epoch": 0.12, + "grad_norm": 1.455303062444168, + "learning_rate": 1.9576474416741206e-05, + "loss": 0.7456, "step": 2617 }, { - "epoch": 0.15, - "grad_norm": 0.4535619682866479, - "learning_rate": 1.9249618249073384e-05, - "loss": 0.2748, + "epoch": 0.12, + "grad_norm": 0.7147676567215108, + "learning_rate": 1.957604587427739e-05, + "loss": 0.4223, "step": 2618 }, { - "epoch": 0.15, - "grad_norm": 0.7356712302689734, - "learning_rate": 1.9248910830431073e-05, - "loss": 0.5015, + "epoch": 0.12, + "grad_norm": 0.33271723827782546, + "learning_rate": 1.9575617119809144e-05, + "loss": 0.2456, "step": 2619 }, { - "epoch": 0.15, - "grad_norm": 0.4561318815496416, - "learning_rate": 1.924820309149938e-05, - "loss": 0.3636, + "epoch": 0.12, + "grad_norm": 1.6133911631795532, + "learning_rate": 1.9575188153345966e-05, + "loss": 0.9356, "step": 2620 }, { - "epoch": 0.15, - "grad_norm": 0.40170842706925475, - "learning_rate": 1.9247495032302812e-05, - "loss": 0.317, + "epoch": 0.12, + "grad_norm": 0.5610447400749208, + "learning_rate": 1.957475897489735e-05, + "loss": 0.3892, "step": 2621 }, { - "epoch": 0.15, - "grad_norm": 0.3116553045717942, - "learning_rate": 1.924678665286589e-05, - "loss": 0.2193, + "epoch": 0.12, + "grad_norm": 0.399179052450849, + "learning_rate": 1.95743295844728e-05, + "loss": 0.2332, "step": 2622 }, { - "epoch": 0.15, - "grad_norm": 1.0170395298052468, - "learning_rate": 1.924607795321314e-05, - "loss": 0.4508, + "epoch": 0.12, + "grad_norm": 0.49918721276215844, + "learning_rate": 1.957389998208182e-05, + "loss": 0.3602, "step": 2623 }, { - "epoch": 0.15, - "grad_norm": 0.4078089046880881, - "learning_rate": 1.924536893336911e-05, - "loss": 0.3254, + "epoch": 0.12, + "grad_norm": 0.7045487648291536, + "learning_rate": 1.9573470167733926e-05, + "loss": 0.3295, "step": 2624 }, { - "epoch": 0.15, - "grad_norm": 0.43203476595198353, - "learning_rate": 1.924465959335835e-05, - "loss": 0.3156, + "epoch": 0.12, + "grad_norm": 0.34135872303828413, + "learning_rate": 1.9573040141438625e-05, + "loss": 0.205, "step": 2625 }, { - "epoch": 0.15, - "grad_norm": 0.8382291099953593, - "learning_rate": 1.924394993320543e-05, - "loss": 0.4348, + "epoch": 0.12, + "grad_norm": 0.6924770682946492, + "learning_rate": 1.9572609903205442e-05, + "loss": 0.5041, "step": 2626 }, { - "epoch": 0.15, - "grad_norm": 0.4875964972050568, - "learning_rate": 1.9243239952934918e-05, - "loss": 0.3129, + "epoch": 0.12, + "grad_norm": 0.4248369267366425, + "learning_rate": 1.9572179453043905e-05, + "loss": 0.3279, "step": 2627 }, { - "epoch": 0.15, - "grad_norm": 0.3377201613499536, - "learning_rate": 1.9242529652571405e-05, - "loss": 0.1133, + "epoch": 0.12, + "grad_norm": 0.438879360586693, + "learning_rate": 1.957174879096354e-05, + "loss": 0.3113, "step": 2628 }, { - "epoch": 0.15, - "grad_norm": 0.4149281039334817, - "learning_rate": 1.9241819032139487e-05, - "loss": 0.3227, + "epoch": 0.12, + "grad_norm": 0.3521882108441771, + "learning_rate": 1.9571317916973877e-05, + "loss": 0.1488, "step": 2629 }, { - "epoch": 0.15, - "grad_norm": 0.48560499229576615, - "learning_rate": 1.9241108091663774e-05, - "loss": 0.3227, + "epoch": 0.12, + "grad_norm": 0.9036686206433856, + "learning_rate": 1.9570886831084466e-05, + "loss": 0.4536, "step": 2630 }, { - "epoch": 0.15, - "grad_norm": 1.3978598633144965, - "learning_rate": 1.9240396831168883e-05, - "loss": 0.4226, + "epoch": 0.12, + "grad_norm": 0.3535453112761301, + "learning_rate": 1.957045553330484e-05, + "loss": 0.2469, "step": 2631 }, { - "epoch": 0.15, - "grad_norm": 0.5892322349385228, - "learning_rate": 1.923968525067945e-05, - "loss": 0.3879, + "epoch": 0.12, + "grad_norm": 0.45898846848902886, + "learning_rate": 1.957002402364456e-05, + "loss": 0.3555, "step": 2632 }, { - "epoch": 0.15, - "grad_norm": 0.5121453834736848, - "learning_rate": 1.923897335022011e-05, - "loss": 0.3623, + "epoch": 0.12, + "grad_norm": 0.7710637409088189, + "learning_rate": 1.9569592302113164e-05, + "loss": 0.455, "step": 2633 }, { - "epoch": 0.15, - "grad_norm": 0.4052392757884597, - "learning_rate": 1.9238261129815526e-05, - "loss": 0.2655, + "epoch": 0.12, + "grad_norm": 0.3949491521042867, + "learning_rate": 1.956916036872022e-05, + "loss": 0.2399, "step": 2634 }, { - "epoch": 0.15, - "grad_norm": 0.3905272412860965, - "learning_rate": 1.9237548589490355e-05, - "loss": 0.14, + "epoch": 0.12, + "grad_norm": 0.5231927274189248, + "learning_rate": 1.9568728223475292e-05, + "loss": 0.2975, "step": 2635 }, { - "epoch": 0.15, - "grad_norm": 0.7394376527945423, - "learning_rate": 1.923683572926927e-05, - "loss": 0.3439, + "epoch": 0.12, + "grad_norm": 1.0538937915345412, + "learning_rate": 1.956829586638794e-05, + "loss": 0.4178, "step": 2636 }, { - "epoch": 0.15, - "grad_norm": 0.8275157121617628, - "learning_rate": 1.9236122549176963e-05, - "loss": 0.3755, + "epoch": 0.12, + "grad_norm": 0.41415657038140974, + "learning_rate": 1.956786329746774e-05, + "loss": 0.3151, "step": 2637 }, { - "epoch": 0.15, - "grad_norm": 0.9844646238383457, - "learning_rate": 1.923540904923813e-05, - "loss": 0.3717, + "epoch": 0.12, + "grad_norm": 0.963778736543261, + "learning_rate": 1.9567430516724268e-05, + "loss": 0.4265, "step": 2638 }, { - "epoch": 0.15, - "grad_norm": 0.3459704137199816, - "learning_rate": 1.9234695229477475e-05, - "loss": 0.3101, + "epoch": 0.12, + "grad_norm": 0.4774903334790501, + "learning_rate": 1.9566997524167108e-05, + "loss": 0.3312, "step": 2639 }, { - "epoch": 0.15, - "grad_norm": 0.31297296064647323, - "learning_rate": 1.9233981089919727e-05, - "loss": 0.1909, + "epoch": 0.12, + "grad_norm": 0.5319709754758282, + "learning_rate": 1.9566564319805842e-05, + "loss": 0.3954, "step": 2640 }, { - "epoch": 0.15, - "grad_norm": 0.6016717619270547, - "learning_rate": 1.9233266630589607e-05, - "loss": 0.2894, + "epoch": 0.12, + "grad_norm": 0.37041739189839096, + "learning_rate": 1.9566130903650064e-05, + "loss": 0.2008, "step": 2641 }, { - "epoch": 0.15, - "grad_norm": 0.5144862160701495, - "learning_rate": 1.923255185151186e-05, - "loss": 0.3252, + "epoch": 0.12, + "grad_norm": 0.4821871487489271, + "learning_rate": 1.9565697275709366e-05, + "loss": 0.1478, "step": 2642 }, { - "epoch": 0.15, - "grad_norm": 1.0734016508045525, - "learning_rate": 1.923183675271124e-05, - "loss": 0.4787, + "epoch": 0.12, + "grad_norm": 0.5570046882565686, + "learning_rate": 1.956526343599335e-05, + "loss": 0.3584, "step": 2643 }, { - "epoch": 0.15, - "grad_norm": 0.6162045563854098, - "learning_rate": 1.9231121334212513e-05, - "loss": 0.2236, + "epoch": 0.12, + "grad_norm": 0.6624509850092503, + "learning_rate": 1.956482938451162e-05, + "loss": 0.3824, "step": 2644 }, { - "epoch": 0.15, - "grad_norm": 0.45479225577934984, - "learning_rate": 1.9230405596040448e-05, - "loss": 0.2862, + "epoch": 0.12, + "grad_norm": 0.69034167169188, + "learning_rate": 1.9564395121273785e-05, + "loss": 0.3418, "step": 2645 }, { - "epoch": 0.15, - "grad_norm": 1.5405861989390848, - "learning_rate": 1.922968953821984e-05, - "loss": 0.8193, + "epoch": 0.12, + "grad_norm": 0.5521586322251684, + "learning_rate": 1.9563960646289464e-05, + "loss": 0.3196, "step": 2646 }, { - "epoch": 0.15, - "grad_norm": 0.3822968065664251, - "learning_rate": 1.9228973160775474e-05, - "loss": 0.2538, + "epoch": 0.12, + "grad_norm": 0.3333338962503935, + "learning_rate": 1.956352595956827e-05, + "loss": 0.2506, "step": 2647 }, { - "epoch": 0.15, - "grad_norm": 0.4312183020139809, - "learning_rate": 1.9228256463732165e-05, - "loss": 0.2563, + "epoch": 0.12, + "grad_norm": 0.6441977581805436, + "learning_rate": 1.956309106111983e-05, + "loss": 0.1974, "step": 2648 }, { - "epoch": 0.15, - "grad_norm": 0.614927999518781, - "learning_rate": 1.9227539447114732e-05, - "loss": 0.3677, + "epoch": 0.12, + "grad_norm": 0.48900449265425344, + "learning_rate": 1.9562655950953768e-05, + "loss": 0.3206, "step": 2649 }, { - "epoch": 0.15, - "grad_norm": 1.1379887071076775, - "learning_rate": 1.9226822110948005e-05, - "loss": 0.6745, + "epoch": 0.12, + "grad_norm": 1.191370054413781, + "learning_rate": 1.9562220629079723e-05, + "loss": 0.5293, "step": 2650 }, { - "epoch": 0.15, - "grad_norm": 0.37894151707122936, - "learning_rate": 1.9226104455256827e-05, - "loss": 0.2201, + "epoch": 0.12, + "grad_norm": 0.4894821562012367, + "learning_rate": 1.9561785095507327e-05, + "loss": 0.2617, "step": 2651 }, { - "epoch": 0.15, - "grad_norm": 0.4781041246729594, - "learning_rate": 1.9225386480066046e-05, - "loss": 0.3187, + "epoch": 0.12, + "grad_norm": 0.5084907722530567, + "learning_rate": 1.9561349350246226e-05, + "loss": 0.3337, "step": 2652 }, { - "epoch": 0.15, - "grad_norm": 0.4630776461667929, - "learning_rate": 1.9224668185400528e-05, - "loss": 0.3508, + "epoch": 0.12, + "grad_norm": 0.44079058431848234, + "learning_rate": 1.9560913393306068e-05, + "loss": 0.1741, "step": 2653 }, { - "epoch": 0.15, - "grad_norm": 0.38185126415206494, - "learning_rate": 1.922394957128515e-05, - "loss": 0.2651, + "epoch": 0.12, + "grad_norm": 1.0370178279451354, + "learning_rate": 1.95604772246965e-05, + "loss": 0.5233, "step": 2654 }, { - "epoch": 0.15, - "grad_norm": 0.4365198237742083, - "learning_rate": 1.9223230637744792e-05, - "loss": 0.2976, + "epoch": 0.12, + "grad_norm": 0.41637920036119297, + "learning_rate": 1.956004084442718e-05, + "loss": 0.2493, "step": 2655 }, { - "epoch": 0.15, - "grad_norm": 1.6072299775806103, - "learning_rate": 1.9222511384804355e-05, - "loss": 0.8611, + "epoch": 0.12, + "grad_norm": 0.9393828595331755, + "learning_rate": 1.9559604252507768e-05, + "loss": 0.5775, "step": 2656 }, { - "epoch": 0.15, - "grad_norm": 0.35629856336310844, - "learning_rate": 1.9221791812488746e-05, - "loss": 0.2504, + "epoch": 0.12, + "grad_norm": 1.4046058257688947, + "learning_rate": 1.9559167448947937e-05, + "loss": 0.7905, "step": 2657 }, { - "epoch": 0.15, - "grad_norm": 0.9970693080774518, - "learning_rate": 1.9221071920822882e-05, - "loss": 0.6513, + "epoch": 0.12, + "grad_norm": 0.5229844929324957, + "learning_rate": 1.9558730433757348e-05, + "loss": 0.222, "step": 2658 }, { - "epoch": 0.15, - "grad_norm": 0.7995130307427668, - "learning_rate": 1.922035170983169e-05, - "loss": 0.5226, + "epoch": 0.12, + "grad_norm": 0.3029112203647432, + "learning_rate": 1.955829320694568e-05, + "loss": 0.2186, "step": 2659 }, { - "epoch": 0.15, - "grad_norm": 0.3808491244278673, - "learning_rate": 1.9219631179540125e-05, - "loss": 0.3165, + "epoch": 0.12, + "grad_norm": 1.0222830837202752, + "learning_rate": 1.9557855768522613e-05, + "loss": 0.4525, "step": 2660 }, { - "epoch": 0.15, - "grad_norm": 0.3197238539222895, - "learning_rate": 1.921891032997312e-05, - "loss": 0.1838, + "epoch": 0.12, + "grad_norm": 0.4159857051421944, + "learning_rate": 1.9557418118497832e-05, + "loss": 0.2738, "step": 2661 }, { - "epoch": 0.15, - "grad_norm": 1.503646870246312, - "learning_rate": 1.921818916115565e-05, - "loss": 0.794, + "epoch": 0.12, + "grad_norm": 1.0006003860946577, + "learning_rate": 1.9556980256881025e-05, + "loss": 0.63, "step": 2662 }, { - "epoch": 0.15, - "grad_norm": 0.39543891144707394, - "learning_rate": 1.9217467673112685e-05, - "loss": 0.2965, + "epoch": 0.12, + "grad_norm": 0.5365428375772864, + "learning_rate": 1.955654218368189e-05, + "loss": 0.3614, "step": 2663 }, { - "epoch": 0.15, - "grad_norm": 0.9269117583869534, - "learning_rate": 1.921674586586921e-05, - "loss": 0.4454, + "epoch": 0.12, + "grad_norm": 0.48757551217910594, + "learning_rate": 1.9556103898910116e-05, + "loss": 0.2877, "step": 2664 }, { - "epoch": 0.15, - "grad_norm": 0.4249499526884838, - "learning_rate": 1.9216023739450222e-05, - "loss": 0.3679, + "epoch": 0.12, + "grad_norm": 0.3386132806310522, + "learning_rate": 1.9555665402575418e-05, + "loss": 0.1427, "step": 2665 }, { - "epoch": 0.15, - "grad_norm": 0.4217638497070136, - "learning_rate": 1.921530129388073e-05, - "loss": 0.2539, + "epoch": 0.12, + "grad_norm": 1.0713907556699434, + "learning_rate": 1.9555226694687492e-05, + "loss": 0.5201, "step": 2666 }, { - "epoch": 0.15, - "grad_norm": 0.277953569574096, - "learning_rate": 1.921457852918575e-05, - "loss": 0.1021, + "epoch": 0.12, + "grad_norm": 0.4196810875497365, + "learning_rate": 1.955478777525606e-05, + "loss": 0.2907, "step": 2667 }, { - "epoch": 0.15, - "grad_norm": 0.44369267599406603, - "learning_rate": 1.921385544539031e-05, + "epoch": 0.12, + "grad_norm": 0.5527529652206862, + "learning_rate": 1.9554348644290832e-05, "loss": 0.3468, "step": 2668 }, { - "epoch": 0.15, - "grad_norm": 0.4842457706418174, - "learning_rate": 1.9213132042519453e-05, - "loss": 0.2911, + "epoch": 0.12, + "grad_norm": 1.5884057836820071, + "learning_rate": 1.9553909301801536e-05, + "loss": 0.8254, "step": 2669 }, { - "epoch": 0.15, - "grad_norm": 0.836503549357935, - "learning_rate": 1.921240832059823e-05, - "loss": 0.376, + "epoch": 0.12, + "grad_norm": 0.4361367584529289, + "learning_rate": 1.9553469747797898e-05, + "loss": 0.3137, "step": 2670 }, { - "epoch": 0.15, - "grad_norm": 0.8464103679581624, - "learning_rate": 1.9211684279651703e-05, - "loss": 0.5637, + "epoch": 0.12, + "grad_norm": 0.373542468620049, + "learning_rate": 1.9553029982289645e-05, + "loss": 0.222, "step": 2671 }, { - "epoch": 0.15, - "grad_norm": 0.4795410800772796, - "learning_rate": 1.9210959919704946e-05, - "loss": 0.2707, + "epoch": 0.12, + "grad_norm": 0.7936844764878231, + "learning_rate": 1.9552590005286518e-05, + "loss": 0.3865, "step": 2672 }, { - "epoch": 0.15, - "grad_norm": 0.3464321091289899, - "learning_rate": 1.9210235240783044e-05, - "loss": 0.2455, + "epoch": 0.12, + "grad_norm": 0.49784027194747066, + "learning_rate": 1.9552149816798255e-05, + "loss": 0.3009, "step": 2673 }, { - "epoch": 0.15, - "grad_norm": 1.184053359160218, - "learning_rate": 1.920951024291109e-05, - "loss": 0.4233, + "epoch": 0.12, + "grad_norm": 1.118725279014554, + "learning_rate": 1.9551709416834596e-05, + "loss": 0.4737, "step": 2674 }, { - "epoch": 0.15, - "grad_norm": 0.4197906790697942, - "learning_rate": 1.9208784926114194e-05, - "loss": 0.3018, + "epoch": 0.12, + "grad_norm": 0.5623489336116214, + "learning_rate": 1.9551268805405302e-05, + "loss": 0.3561, "step": 2675 }, { - "epoch": 0.15, - "grad_norm": 0.9864362176828683, - "learning_rate": 1.9208059290417468e-05, - "loss": 0.5272, + "epoch": 0.12, + "grad_norm": 0.4415744540421797, + "learning_rate": 1.9550827982520122e-05, + "loss": 0.2686, "step": 2676 }, { - "epoch": 0.15, - "grad_norm": 0.42562445797911436, - "learning_rate": 1.9207333335846048e-05, - "loss": 0.311, + "epoch": 0.12, + "grad_norm": 0.9916534503157868, + "learning_rate": 1.9550386948188814e-05, + "loss": 0.5826, "step": 2677 }, { - "epoch": 0.15, - "grad_norm": 0.4449285311849742, - "learning_rate": 1.920660706242507e-05, - "loss": 0.2907, + "epoch": 0.12, + "grad_norm": 0.32826891612978104, + "learning_rate": 1.9549945702421144e-05, + "loss": 0.2929, "step": 2678 }, { - "epoch": 0.15, - "grad_norm": 0.4022883840813125, - "learning_rate": 1.9205880470179682e-05, - "loss": 0.2319, + "epoch": 0.12, + "grad_norm": 0.49601626008277505, + "learning_rate": 1.954950424522688e-05, + "loss": 0.3042, "step": 2679 }, { - "epoch": 0.15, - "grad_norm": 0.45241222820165394, - "learning_rate": 1.9205153559135056e-05, - "loss": 0.2982, + "epoch": 0.12, + "grad_norm": 0.4769741156261984, + "learning_rate": 1.9549062576615797e-05, + "loss": 0.262, "step": 2680 }, { - "epoch": 0.15, - "grad_norm": 0.3837648287989111, - "learning_rate": 1.9204426329316354e-05, - "loss": 0.28, + "epoch": 0.12, + "grad_norm": 0.5833148390207082, + "learning_rate": 1.9548620696597672e-05, + "loss": 0.2852, "step": 2681 }, { - "epoch": 0.15, - "grad_norm": 1.1774690784929287, - "learning_rate": 1.9203698780748765e-05, - "loss": 0.5684, + "epoch": 0.12, + "grad_norm": 0.5273574815493769, + "learning_rate": 1.954817860518229e-05, + "loss": 0.2929, "step": 2682 }, { - "epoch": 0.15, - "grad_norm": 0.9706697136795795, - "learning_rate": 1.920297091345748e-05, - "loss": 0.4298, + "epoch": 0.12, + "grad_norm": 0.4717920748783304, + "learning_rate": 1.9547736302379433e-05, + "loss": 0.3656, "step": 2683 }, { - "epoch": 0.15, - "grad_norm": 0.44832358661289956, - "learning_rate": 1.9202242727467713e-05, - "loss": 0.2841, + "epoch": 0.12, + "grad_norm": 0.5333053224613801, + "learning_rate": 1.95472937881989e-05, + "loss": 0.3024, "step": 2684 }, { - "epoch": 0.15, - "grad_norm": 0.40119273159907204, - "learning_rate": 1.9201514222804672e-05, - "loss": 0.2904, + "epoch": 0.12, + "grad_norm": 0.3265390233175672, + "learning_rate": 1.954685106265048e-05, + "loss": 0.2243, "step": 2685 }, { - "epoch": 0.15, - "grad_norm": 0.5340909714198232, - "learning_rate": 1.9200785399493592e-05, - "loss": 0.3694, + "epoch": 0.12, + "grad_norm": 1.2659938909769657, + "learning_rate": 1.954640812574398e-05, + "loss": 0.7928, "step": 2686 }, { - "epoch": 0.15, - "grad_norm": 0.6551279965817546, - "learning_rate": 1.9200056257559706e-05, - "loss": 0.2458, + "epoch": 0.12, + "grad_norm": 0.4823140218116708, + "learning_rate": 1.9545964977489205e-05, + "loss": 0.3005, "step": 2687 }, { - "epoch": 0.15, - "grad_norm": 1.2765480970724306, - "learning_rate": 1.9199326797028268e-05, - "loss": 0.4481, + "epoch": 0.12, + "grad_norm": 0.5172891358452325, + "learning_rate": 1.9545521617895965e-05, + "loss": 0.2906, "step": 2688 }, { - "epoch": 0.15, - "grad_norm": 0.4896591885875324, - "learning_rate": 1.9198597017924543e-05, - "loss": 0.3536, + "epoch": 0.12, + "grad_norm": 0.6262499379706858, + "learning_rate": 1.954507804697408e-05, + "loss": 0.5276, "step": 2689 }, { - "epoch": 0.15, - "grad_norm": 0.3569245432977872, - "learning_rate": 1.9197866920273794e-05, - "loss": 0.2064, + "epoch": 0.12, + "grad_norm": 0.5455439660363581, + "learning_rate": 1.9544634264733363e-05, + "loss": 0.4374, "step": 2690 }, { - "epoch": 0.15, - "grad_norm": 0.33094841706483585, - "learning_rate": 1.919713650410131e-05, - "loss": 0.2783, + "epoch": 0.12, + "grad_norm": 0.3950906285573646, + "learning_rate": 1.9544190271183647e-05, + "loss": 0.214, "step": 2691 }, { - "epoch": 0.15, - "grad_norm": 0.46907427883634445, - "learning_rate": 1.9196405769432385e-05, - "loss": 0.3683, + "epoch": 0.12, + "grad_norm": 0.58345490444392, + "learning_rate": 1.9543746066334755e-05, + "loss": 0.2675, "step": 2692 }, { - "epoch": 0.15, - "grad_norm": 0.540396516824427, - "learning_rate": 1.9195674716292326e-05, - "loss": 0.2609, + "epoch": 0.12, + "grad_norm": 1.3264339841866752, + "learning_rate": 1.9543301650196523e-05, + "loss": 0.7517, "step": 2693 }, { - "epoch": 0.15, - "grad_norm": 0.7820904049432057, - "learning_rate": 1.9194943344706444e-05, - "loss": 0.423, + "epoch": 0.12, + "grad_norm": 0.4158528564419858, + "learning_rate": 1.954285702277879e-05, + "loss": 0.257, "step": 2694 }, { - "epoch": 0.15, - "grad_norm": 1.4341198750989717, - "learning_rate": 1.919421165470007e-05, - "loss": 0.7056, + "epoch": 0.12, + "grad_norm": 0.4822177004526829, + "learning_rate": 1.95424121840914e-05, + "loss": 0.3639, "step": 2695 }, { - "epoch": 0.15, - "grad_norm": 0.41243600637594274, - "learning_rate": 1.919347964629854e-05, - "loss": 0.217, + "epoch": 0.12, + "grad_norm": 0.48643286227064786, + "learning_rate": 1.95419671341442e-05, + "loss": 0.3103, "step": 2696 }, { - "epoch": 0.15, - "grad_norm": 0.29327859156718405, - "learning_rate": 1.919274731952721e-05, - "loss": 0.2332, + "epoch": 0.12, + "grad_norm": 0.3485184890450811, + "learning_rate": 1.954152187294705e-05, + "loss": 0.1499, "step": 2697 }, { - "epoch": 0.16, - "grad_norm": 0.9189693213979191, - "learning_rate": 1.919201467441143e-05, - "loss": 0.629, + "epoch": 0.12, + "grad_norm": 0.6371826709355921, + "learning_rate": 1.9541076400509798e-05, + "loss": 0.4582, "step": 2698 }, { - "epoch": 0.16, - "grad_norm": 0.46368652568716096, - "learning_rate": 1.919128171097658e-05, - "loss": 0.3495, + "epoch": 0.12, + "grad_norm": 0.5122405831985051, + "learning_rate": 1.954063071684231e-05, + "loss": 0.3606, "step": 2699 }, { - "epoch": 0.16, - "grad_norm": 0.594946940268688, - "learning_rate": 1.919054842924804e-05, - "loss": 0.341, + "epoch": 0.12, + "grad_norm": 0.4386015764678328, + "learning_rate": 1.9540184821954456e-05, + "loss": 0.2422, "step": 2700 }, { - "epoch": 0.16, - "grad_norm": 0.4819190307628047, - "learning_rate": 1.9189814829251204e-05, - "loss": 0.3548, + "epoch": 0.12, + "grad_norm": 0.6397039168501374, + "learning_rate": 1.95397387158561e-05, + "loss": 0.4529, "step": 2701 }, { - "epoch": 0.16, - "grad_norm": 0.48033771181806284, - "learning_rate": 1.9189080911011474e-05, - "loss": 0.3313, + "epoch": 0.12, + "grad_norm": 0.6542233140675675, + "learning_rate": 1.953929239855713e-05, + "loss": 0.3905, "step": 2702 }, { - "epoch": 0.16, - "grad_norm": 0.38210522382790274, - "learning_rate": 1.9188346674554267e-05, - "loss": 0.1974, + "epoch": 0.12, + "grad_norm": 0.4864412335597301, + "learning_rate": 1.9538845870067412e-05, + "loss": 0.3011, "step": 2703 }, { - "epoch": 0.16, - "grad_norm": 0.36886884191893365, - "learning_rate": 1.918761211990501e-05, - "loss": 0.2936, + "epoch": 0.12, + "grad_norm": 0.30948337924147673, + "learning_rate": 1.953839913039685e-05, + "loss": 0.1829, "step": 2704 }, { - "epoch": 0.16, - "grad_norm": 0.47789844802984893, - "learning_rate": 1.918687724708914e-05, - "loss": 0.3213, + "epoch": 0.12, + "grad_norm": 0.8308036852282087, + "learning_rate": 1.9537952179555315e-05, + "loss": 0.6176, "step": 2705 }, { - "epoch": 0.16, - "grad_norm": 0.5360226368299564, - "learning_rate": 1.918614205613211e-05, - "loss": 0.3334, + "epoch": 0.12, + "grad_norm": 0.48109952612974693, + "learning_rate": 1.9537505017552716e-05, + "loss": 0.3544, "step": 2706 }, { - "epoch": 0.16, - "grad_norm": 0.878108287963956, - "learning_rate": 1.9185406547059367e-05, - "loss": 0.5888, + "epoch": 0.12, + "grad_norm": 0.41061095648212154, + "learning_rate": 1.9537057644398948e-05, + "loss": 0.3038, "step": 2707 }, { - "epoch": 0.16, - "grad_norm": 0.48964078753235296, - "learning_rate": 1.91846707198964e-05, - "loss": 0.3056, + "epoch": 0.12, + "grad_norm": 1.2575042671889498, + "learning_rate": 1.9536610060103916e-05, + "loss": 0.7074, "step": 2708 }, { - "epoch": 0.16, - "grad_norm": 0.4418702371752559, - "learning_rate": 1.9183934574668674e-05, - "loss": 0.3089, + "epoch": 0.12, + "grad_norm": 0.3627395772924548, + "learning_rate": 1.953616226467753e-05, + "loss": 0.2251, "step": 2709 }, { - "epoch": 0.16, - "grad_norm": 0.4469509833893016, - "learning_rate": 1.918319811140169e-05, - "loss": 0.3066, + "epoch": 0.12, + "grad_norm": 0.5395140255526679, + "learning_rate": 1.95357142581297e-05, + "loss": 0.2933, "step": 2710 }, { - "epoch": 0.16, - "grad_norm": 0.5447582634426448, - "learning_rate": 1.9182461330120952e-05, - "loss": 0.3778, + "epoch": 0.12, + "grad_norm": 0.4149019537642368, + "learning_rate": 1.953526604047035e-05, + "loss": 0.3772, "step": 2711 }, { - "epoch": 0.16, - "grad_norm": 0.38486688340331743, - "learning_rate": 1.918172423085197e-05, - "loss": 0.3022, - "step": 2712 + "epoch": 0.12, + "grad_norm": 0.37795623499851605, + "learning_rate": 1.9534817611709395e-05, + "loss": 0.3069, + "step": 2712 }, { - "epoch": 0.16, - "grad_norm": 0.3383846109182444, - "learning_rate": 1.9180986813620276e-05, - "loss": 0.2254, + "epoch": 0.12, + "grad_norm": 0.3837095307584616, + "learning_rate": 1.953436897185677e-05, + "loss": 0.2139, "step": 2713 }, { - "epoch": 0.16, - "grad_norm": 0.5701648234097344, - "learning_rate": 1.9180249078451406e-05, - "loss": 0.3499, + "epoch": 0.12, + "grad_norm": 0.4884241444771125, + "learning_rate": 1.9533920120922407e-05, + "loss": 0.3586, "step": 2714 }, { - "epoch": 0.16, - "grad_norm": 0.4083474638888638, - "learning_rate": 1.9179511025370902e-05, - "loss": 0.3619, + "epoch": 0.12, + "grad_norm": 0.8181213710817211, + "learning_rate": 1.953347105891624e-05, + "loss": 0.404, "step": 2715 }, { - "epoch": 0.16, - "grad_norm": 0.41177540040311006, - "learning_rate": 1.9178772654404323e-05, - "loss": 0.3057, + "epoch": 0.12, + "grad_norm": 0.3781155672368854, + "learning_rate": 1.9533021785848215e-05, + "loss": 0.2652, "step": 2716 }, { - "epoch": 0.16, - "grad_norm": 0.3629090355785191, - "learning_rate": 1.9178033965577243e-05, - "loss": 0.2618, + "epoch": 0.12, + "grad_norm": 0.674419641095578, + "learning_rate": 1.9532572301728274e-05, + "loss": 0.4299, "step": 2717 }, { - "epoch": 0.16, - "grad_norm": 0.5828011824693402, - "learning_rate": 1.9177294958915246e-05, - "loss": 0.4277, + "epoch": 0.12, + "grad_norm": 0.5301144849367732, + "learning_rate": 1.9532122606566368e-05, + "loss": 0.3221, "step": 2718 }, { - "epoch": 0.16, - "grad_norm": 0.44338515108899634, - "learning_rate": 1.9176555634443912e-05, - "loss": 0.1719, + "epoch": 0.12, + "grad_norm": 0.38442693168779324, + "learning_rate": 1.9531672700372457e-05, + "loss": 0.2619, "step": 2719 }, { - "epoch": 0.16, - "grad_norm": 0.3385222291686445, - "learning_rate": 1.9175815992188856e-05, - "loss": 0.2838, + "epoch": 0.12, + "grad_norm": 0.893107632772404, + "learning_rate": 1.9531222583156496e-05, + "loss": 0.3969, "step": 2720 }, { - "epoch": 0.16, - "grad_norm": 0.5432606412549738, - "learning_rate": 1.9175076032175685e-05, - "loss": 0.3732, + "epoch": 0.12, + "grad_norm": 0.6587154115642132, + "learning_rate": 1.953077225492846e-05, + "loss": 0.4252, "step": 2721 }, { - "epoch": 0.16, - "grad_norm": 0.5378990298129169, - "learning_rate": 1.9174335754430026e-05, - "loss": 0.4871, + "epoch": 0.13, + "grad_norm": 0.432918225197781, + "learning_rate": 1.9530321715698303e-05, + "loss": 0.3274, "step": 2722 }, { - "epoch": 0.16, - "grad_norm": 0.544793784552123, - "learning_rate": 1.9173595158977515e-05, - "loss": 0.2266, + "epoch": 0.13, + "grad_norm": 0.6454133785954299, + "learning_rate": 1.9529870965476016e-05, + "loss": 0.3587, "step": 2723 }, { - "epoch": 0.16, - "grad_norm": 0.34300168100501205, - "learning_rate": 1.9172854245843796e-05, - "loss": 0.3132, + "epoch": 0.13, + "grad_norm": 0.49642465655349327, + "learning_rate": 1.9529420004271568e-05, + "loss": 0.3567, "step": 2724 }, { - "epoch": 0.16, - "grad_norm": 0.35438645648218475, - "learning_rate": 1.917211301505453e-05, - "loss": 0.2455, + "epoch": 0.13, + "grad_norm": 0.3257606577489552, + "learning_rate": 1.9528968832094947e-05, + "loss": 0.1918, "step": 2725 }, { - "epoch": 0.16, - "grad_norm": 0.37041691780405817, - "learning_rate": 1.9171371466635385e-05, - "loss": 0.1947, + "epoch": 0.13, + "grad_norm": 0.4811564882075819, + "learning_rate": 1.9528517448956137e-05, + "loss": 0.2942, "step": 2726 }, { - "epoch": 0.16, - "grad_norm": 0.41229526848301834, - "learning_rate": 1.9170629600612044e-05, - "loss": 0.3676, + "epoch": 0.13, + "grad_norm": 0.5025518396030556, + "learning_rate": 1.9528065854865137e-05, + "loss": 0.3021, "step": 2727 }, { - "epoch": 0.16, - "grad_norm": 0.3968209388434555, - "learning_rate": 1.916988741701019e-05, - "loss": 0.3847, + "epoch": 0.13, + "grad_norm": 0.7289336550926332, + "learning_rate": 1.952761404983194e-05, + "loss": 0.3908, "step": 2728 }, { - "epoch": 0.16, - "grad_norm": 0.45513126229457, - "learning_rate": 1.9169144915855532e-05, - "loss": 0.1219, + "epoch": 0.13, + "grad_norm": 1.2657848401552678, + "learning_rate": 1.9527162033866553e-05, + "loss": 0.5196, "step": 2729 }, { - "epoch": 0.16, - "grad_norm": 0.4630940115074412, - "learning_rate": 1.9168402097173774e-05, - "loss": 0.3857, + "epoch": 0.13, + "grad_norm": 0.33520501341426207, + "learning_rate": 1.952670980697898e-05, + "loss": 0.2596, "step": 2730 }, { - "epoch": 0.16, - "grad_norm": 0.31886087385947326, - "learning_rate": 1.916765896099065e-05, - "loss": 0.1737, + "epoch": 0.13, + "grad_norm": 0.32280453011747723, + "learning_rate": 1.9526257369179234e-05, + "loss": 0.2117, "step": 2731 }, { - "epoch": 0.16, - "grad_norm": 0.38274972925488704, - "learning_rate": 1.916691550733189e-05, - "loss": 0.2351, + "epoch": 0.13, + "grad_norm": 1.5288177718008797, + "learning_rate": 1.9525804720477334e-05, + "loss": 0.709, "step": 2732 }, { - "epoch": 0.16, - "grad_norm": 0.3942346069361589, - "learning_rate": 1.9166171736223244e-05, - "loss": 0.3809, + "epoch": 0.13, + "grad_norm": 0.48520347157174504, + "learning_rate": 1.9525351860883295e-05, + "loss": 0.1335, "step": 2733 }, { - "epoch": 0.16, - "grad_norm": 0.7779362864741345, - "learning_rate": 1.9165427647690457e-05, - "loss": 0.5735, + "epoch": 0.13, + "grad_norm": 0.5573358080341992, + "learning_rate": 1.952489879040715e-05, + "loss": 0.3427, "step": 2734 }, { - "epoch": 0.16, - "grad_norm": 0.7486116425794241, - "learning_rate": 1.916468324175931e-05, - "loss": 0.5144, + "epoch": 0.13, + "grad_norm": 0.8793106741548435, + "learning_rate": 1.952444550905892e-05, + "loss": 0.454, "step": 2735 }, { - "epoch": 0.16, - "grad_norm": 0.4035043380202906, - "learning_rate": 1.9163938518455577e-05, - "loss": 0.2557, + "epoch": 0.13, + "grad_norm": 0.4004608023974184, + "learning_rate": 1.952399201684865e-05, + "loss": 0.1364, "step": 2736 }, { - "epoch": 0.16, - "grad_norm": 0.2841072985025231, - "learning_rate": 1.9163193477805042e-05, - "loss": 0.2075, + "epoch": 0.13, + "grad_norm": 0.3657707487604822, + "learning_rate": 1.9523538313786375e-05, + "loss": 0.2389, "step": 2737 }, { - "epoch": 0.16, - "grad_norm": 0.4792294242182115, - "learning_rate": 1.9162448119833515e-05, - "loss": 0.4132, + "epoch": 0.13, + "grad_norm": 0.5697021320997692, + "learning_rate": 1.9523084399882143e-05, + "loss": 0.3455, "step": 2738 }, { - "epoch": 0.16, - "grad_norm": 0.440562055294282, - "learning_rate": 1.9161702444566803e-05, - "loss": 0.3153, + "epoch": 0.13, + "grad_norm": 0.44594186799801006, + "learning_rate": 1.9522630275146e-05, + "loss": 0.1242, "step": 2739 }, { - "epoch": 0.16, - "grad_norm": 0.43331696516541285, - "learning_rate": 1.9160956452030728e-05, - "loss": 0.3573, + "epoch": 0.13, + "grad_norm": 0.6536382645589643, + "learning_rate": 1.9522175939588003e-05, + "loss": 0.3949, "step": 2740 }, { - "epoch": 0.16, - "grad_norm": 0.7305226944821447, - "learning_rate": 1.9160210142251127e-05, - "loss": 0.4819, + "epoch": 0.13, + "grad_norm": 0.9872161811986501, + "learning_rate": 1.9521721393218204e-05, + "loss": 0.5995, "step": 2741 }, { - "epoch": 0.16, - "grad_norm": 0.39263699843117555, - "learning_rate": 1.9159463515253842e-05, - "loss": 0.2561, + "epoch": 0.13, + "grad_norm": 0.4859685734691315, + "learning_rate": 1.9521266636046672e-05, + "loss": 0.3523, "step": 2742 }, { - "epoch": 0.16, - "grad_norm": 0.28498177963286886, - "learning_rate": 1.9158716571064728e-05, - "loss": 0.2061, + "epoch": 0.13, + "grad_norm": 0.36719922944291267, + "learning_rate": 1.9520811668083472e-05, + "loss": 0.2042, "step": 2743 }, { - "epoch": 0.16, - "grad_norm": 0.509629190991603, - "learning_rate": 1.9157969309709656e-05, - "loss": 0.4165, + "epoch": 0.13, + "grad_norm": 0.6789302601937945, + "learning_rate": 1.9520356489338682e-05, + "loss": 0.3481, "step": 2744 }, { - "epoch": 0.16, - "grad_norm": 0.343301559511347, - "learning_rate": 1.9157221731214498e-05, - "loss": 0.2715, + "epoch": 0.13, + "grad_norm": 0.68770105102681, + "learning_rate": 1.951990109982237e-05, + "loss": 0.4135, "step": 2745 }, { - "epoch": 0.16, - "grad_norm": 0.9309225184678585, - "learning_rate": 1.9156473835605146e-05, - "loss": 0.6432, + "epoch": 0.13, + "grad_norm": 0.600709015073133, + "learning_rate": 1.9519445499544628e-05, + "loss": 0.2862, "step": 2746 }, { - "epoch": 0.16, - "grad_norm": 0.8504928341821835, - "learning_rate": 1.9155725622907496e-05, - "loss": 0.5317, + "epoch": 0.13, + "grad_norm": 0.8118126572157136, + "learning_rate": 1.9518989688515533e-05, + "loss": 0.4451, "step": 2747 }, { - "epoch": 0.16, - "grad_norm": 0.3641216283918297, - "learning_rate": 1.9154977093147467e-05, - "loss": 0.2938, + "epoch": 0.13, + "grad_norm": 0.7424200738399783, + "learning_rate": 1.9518533666745183e-05, + "loss": 0.3801, "step": 2748 }, { - "epoch": 0.16, - "grad_norm": 0.36303716960804644, - "learning_rate": 1.915422824635097e-05, - "loss": 0.148, + "epoch": 0.13, + "grad_norm": 0.2869061808105938, + "learning_rate": 1.951807743424367e-05, + "loss": 0.167, "step": 2749 }, { - "epoch": 0.16, - "grad_norm": 0.6009779568927565, - "learning_rate": 1.9153479082543945e-05, - "loss": 0.4414, + "epoch": 0.13, + "grad_norm": 0.3930092415093938, + "learning_rate": 1.95176209910211e-05, + "loss": 0.3283, "step": 2750 }, { - "epoch": 0.16, - "grad_norm": 0.3125761277927421, - "learning_rate": 1.9152729601752334e-05, - "loss": 0.2688, + "epoch": 0.13, + "grad_norm": 0.9737698862633957, + "learning_rate": 1.9517164337087575e-05, + "loss": 0.5595, "step": 2751 }, { - "epoch": 0.16, - "grad_norm": 0.4369004478121061, - "learning_rate": 1.9151979804002086e-05, - "loss": 0.2762, + "epoch": 0.13, + "grad_norm": 0.4183785959187085, + "learning_rate": 1.95167074724532e-05, + "loss": 0.2799, "step": 2752 }, { - "epoch": 0.16, - "grad_norm": 0.8335443101192821, - "learning_rate": 1.9151229689319177e-05, - "loss": 0.5237, + "epoch": 0.13, + "grad_norm": 1.1085401405518995, + "learning_rate": 1.9516250397128095e-05, + "loss": 0.7747, "step": 2753 }, { - "epoch": 0.16, - "grad_norm": 0.41561726711084734, - "learning_rate": 1.9150479257729576e-05, - "loss": 0.3092, + "epoch": 0.13, + "grad_norm": 0.48546134492392634, + "learning_rate": 1.951579311112238e-05, + "loss": 0.3428, "step": 2754 }, { - "epoch": 0.16, - "grad_norm": 0.9114026808149253, - "learning_rate": 1.9149728509259268e-05, - "loss": 0.3958, + "epoch": 0.13, + "grad_norm": 0.44941389051691616, + "learning_rate": 1.9515335614446172e-05, + "loss": 0.3003, "step": 2755 }, { - "epoch": 0.16, - "grad_norm": 0.37242842792677777, - "learning_rate": 1.9148977443934257e-05, - "loss": 0.313, + "epoch": 0.13, + "grad_norm": 0.353062941887427, + "learning_rate": 1.9514877907109612e-05, + "loss": 0.1523, "step": 2756 }, { - "epoch": 0.16, - "grad_norm": 0.45442449929965356, - "learning_rate": 1.914822606178055e-05, - "loss": 0.328, + "epoch": 0.13, + "grad_norm": 0.7607167501632541, + "learning_rate": 1.951441998912282e-05, + "loss": 0.4257, "step": 2757 }, { - "epoch": 0.16, - "grad_norm": 0.29522579252993075, - "learning_rate": 1.914747436282417e-05, - "loss": 0.1609, + "epoch": 0.13, + "grad_norm": 0.41644459660926225, + "learning_rate": 1.951396186049594e-05, + "loss": 0.2949, "step": 2758 }, { - "epoch": 0.16, - "grad_norm": 0.5405467495766618, - "learning_rate": 1.9146722347091145e-05, - "loss": 0.385, + "epoch": 0.13, + "grad_norm": 1.2735480429693264, + "learning_rate": 1.9513503521239116e-05, + "loss": 0.5326, "step": 2759 }, { - "epoch": 0.16, - "grad_norm": 0.3952204533027063, - "learning_rate": 1.9145970014607517e-05, - "loss": 0.297, + "epoch": 0.13, + "grad_norm": 0.8172160465077501, + "learning_rate": 1.9513044971362494e-05, + "loss": 0.5044, "step": 2760 }, { - "epoch": 0.16, - "grad_norm": 1.4252943635395237, - "learning_rate": 1.914521736539934e-05, - "loss": 0.5132, + "epoch": 0.13, + "grad_norm": 0.32402605057839046, + "learning_rate": 1.9512586210876223e-05, + "loss": 0.2122, "step": 2761 }, { - "epoch": 0.16, - "grad_norm": 0.42261706849570274, - "learning_rate": 1.9144464399492682e-05, - "loss": 0.2417, + "epoch": 0.13, + "grad_norm": 0.5973014932966829, + "learning_rate": 1.9512127239790463e-05, + "loss": 0.3008, "step": 2762 }, { - "epoch": 0.16, - "grad_norm": 0.3112984251374848, - "learning_rate": 1.9143711116913614e-05, - "loss": 0.2201, + "epoch": 0.13, + "grad_norm": 1.2406595123437545, + "learning_rate": 1.9511668058115375e-05, + "loss": 0.5092, "step": 2763 }, { - "epoch": 0.16, - "grad_norm": 0.5503114825951091, - "learning_rate": 1.9142957517688226e-05, - "loss": 0.3508, + "epoch": 0.13, + "grad_norm": 0.521271451529703, + "learning_rate": 1.951120866586112e-05, + "loss": 0.3167, "step": 2764 }, { - "epoch": 0.16, - "grad_norm": 2.290525038080281, - "learning_rate": 1.9142203601842607e-05, - "loss": 0.626, + "epoch": 0.13, + "grad_norm": 1.4250463987824893, + "learning_rate": 1.9510749063037876e-05, + "loss": 0.5902, "step": 2765 }, { - "epoch": 0.16, - "grad_norm": 0.4340454110617708, - "learning_rate": 1.9141449369402873e-05, - "loss": 0.305, + "epoch": 0.13, + "grad_norm": 0.4387400393617523, + "learning_rate": 1.951028924965581e-05, + "loss": 0.3096, "step": 2766 }, { - "epoch": 0.16, - "grad_norm": 1.0459985522737103, - "learning_rate": 1.914069482039514e-05, - "loss": 0.5296, + "epoch": 0.13, + "grad_norm": 0.5729286084890407, + "learning_rate": 1.950982922572511e-05, + "loss": 0.3358, "step": 2767 }, { - "epoch": 0.16, - "grad_norm": 0.4969749645117052, - "learning_rate": 1.913993995484554e-05, - "loss": 0.2863, + "epoch": 0.13, + "grad_norm": 0.5016881309631069, + "learning_rate": 1.9509368991255955e-05, + "loss": 0.2683, "step": 2768 }, { - "epoch": 0.16, - "grad_norm": 0.40985606509878225, - "learning_rate": 1.913918477278021e-05, - "loss": 0.2643, + "epoch": 0.13, + "grad_norm": 1.4319347970899394, + "learning_rate": 1.9508908546258535e-05, + "loss": 0.4604, "step": 2769 }, { - "epoch": 0.16, - "grad_norm": 0.6034420793845431, - "learning_rate": 1.9138429274225306e-05, - "loss": 0.3003, + "epoch": 0.13, + "grad_norm": 0.406908777435969, + "learning_rate": 1.950844789074305e-05, + "loss": 0.2859, "step": 2770 }, { - "epoch": 0.16, - "grad_norm": 0.9476623773086797, - "learning_rate": 1.913767345920699e-05, - "loss": 0.2913, + "epoch": 0.13, + "grad_norm": 0.60347220104888, + "learning_rate": 1.9507987024719686e-05, + "loss": 0.4303, "step": 2771 }, { - "epoch": 0.16, - "grad_norm": 0.7160270392478728, - "learning_rate": 1.9136917327751433e-05, - "loss": 0.3226, + "epoch": 0.13, + "grad_norm": 0.9701522684664564, + "learning_rate": 1.9507525948198657e-05, + "loss": 0.2933, "step": 2772 }, { - "epoch": 0.16, - "grad_norm": 1.8619252386421343, - "learning_rate": 1.913616087988482e-05, - "loss": 0.4995, + "epoch": 0.13, + "grad_norm": 0.42463834956329255, + "learning_rate": 1.950706466119016e-05, + "loss": 0.2956, "step": 2773 }, { - "epoch": 0.16, - "grad_norm": 1.1457060818764169, - "learning_rate": 1.9135404115633354e-05, - "loss": 0.6252, + "epoch": 0.13, + "grad_norm": 0.4943662887424194, + "learning_rate": 1.9506603163704427e-05, + "loss": 0.3764, "step": 2774 }, { - "epoch": 0.16, - "grad_norm": 0.4366886822707938, - "learning_rate": 1.9134647035023233e-05, - "loss": 0.2177, + "epoch": 0.13, + "grad_norm": 0.40481840936892105, + "learning_rate": 1.9506141455751652e-05, + "loss": 0.1575, "step": 2775 }, { - "epoch": 0.16, - "grad_norm": 0.37860818327377915, - "learning_rate": 1.913388963808068e-05, - "loss": 0.2423, + "epoch": 0.13, + "grad_norm": 0.5260020648484066, + "learning_rate": 1.9505679537342073e-05, + "loss": 0.3128, "step": 2776 }, { - "epoch": 0.16, - "grad_norm": 0.9684838256494043, - "learning_rate": 1.9133131924831917e-05, - "loss": 0.4987, + "epoch": 0.13, + "grad_norm": 1.5157984955215067, + "learning_rate": 1.950521740848591e-05, + "loss": 0.7835, "step": 2777 }, { - "epoch": 0.16, - "grad_norm": 0.8367814228740066, - "learning_rate": 1.9132373895303193e-05, - "loss": 0.2736, + "epoch": 0.13, + "grad_norm": 0.4688222092875608, + "learning_rate": 1.950475506919339e-05, + "loss": 0.2621, "step": 2778 }, { - "epoch": 0.16, - "grad_norm": 1.2098389822749138, - "learning_rate": 1.9131615549520752e-05, - "loss": 0.4306, + "epoch": 0.13, + "grad_norm": 0.48815828119810367, + "learning_rate": 1.950429251947476e-05, + "loss": 0.3263, "step": 2779 }, { - "epoch": 0.16, - "grad_norm": 0.8197725081398024, - "learning_rate": 1.913085688751086e-05, - "loss": 0.3871, + "epoch": 0.13, + "grad_norm": 0.7914354656834245, + "learning_rate": 1.950382975934025e-05, + "loss": 0.5288, "step": 2780 }, { - "epoch": 0.16, - "grad_norm": 0.49208863169300127, - "learning_rate": 1.913009790929978e-05, - "loss": 0.2306, + "epoch": 0.13, + "grad_norm": 0.35107923717638584, + "learning_rate": 1.950336678880011e-05, + "loss": 0.2091, "step": 2781 }, { - "epoch": 0.16, - "grad_norm": 0.3526381294259932, - "learning_rate": 1.9129338614913808e-05, - "loss": 0.2522, + "epoch": 0.13, + "grad_norm": 0.4383983500720953, + "learning_rate": 1.950290360786459e-05, + "loss": 0.2193, "step": 2782 }, { - "epoch": 0.16, - "grad_norm": 0.4756058297463948, - "learning_rate": 1.912857900437923e-05, - "loss": 0.326, + "epoch": 0.13, + "grad_norm": 0.6058417904690261, + "learning_rate": 1.950244021654394e-05, + "loss": 0.4111, "step": 2783 }, { - "epoch": 0.16, - "grad_norm": 0.45837754491678995, - "learning_rate": 1.9127819077722353e-05, - "loss": 0.3238, + "epoch": 0.13, + "grad_norm": 0.9889919122225294, + "learning_rate": 1.9501976614848425e-05, + "loss": 0.5436, "step": 2784 }, { - "epoch": 0.16, - "grad_norm": 1.4324033883404848, - "learning_rate": 1.9127058834969494e-05, - "loss": 0.4619, + "epoch": 0.13, + "grad_norm": 0.39388507729982825, + "learning_rate": 1.9501512802788306e-05, + "loss": 0.2278, "step": 2785 }, { - "epoch": 0.16, - "grad_norm": 1.0480337876957362, - "learning_rate": 1.9126298276146982e-05, - "loss": 0.5592, + "epoch": 0.13, + "grad_norm": 0.4630320384460615, + "learning_rate": 1.9501048780373853e-05, + "loss": 0.3645, "step": 2786 }, { - "epoch": 0.16, - "grad_norm": 0.41564243767044307, - "learning_rate": 1.912553740128115e-05, - "loss": 0.2959, + "epoch": 0.13, + "grad_norm": 0.3185265786835577, + "learning_rate": 1.9500584547615332e-05, + "loss": 0.1688, "step": 2787 }, { - "epoch": 0.16, - "grad_norm": 0.2892471983222418, - "learning_rate": 1.9124776210398354e-05, - "loss": 0.1853, + "epoch": 0.13, + "grad_norm": 0.3920268706444493, + "learning_rate": 1.9500120104523027e-05, + "loss": 0.2269, "step": 2788 }, { - "epoch": 0.16, - "grad_norm": 0.7875199409762493, - "learning_rate": 1.9124014703524946e-05, - "loss": 0.5366, + "epoch": 0.13, + "grad_norm": 0.5784021965223299, + "learning_rate": 1.9499655451107223e-05, + "loss": 0.4243, "step": 2789 }, { - "epoch": 0.16, - "grad_norm": 0.4263502403630036, - "learning_rate": 1.9123252880687303e-05, - "loss": 0.3377, + "epoch": 0.13, + "grad_norm": 0.6113989804919907, + "learning_rate": 1.94991905873782e-05, + "loss": 0.4376, "step": 2790 }, { - "epoch": 0.16, - "grad_norm": 0.5112805395238533, - "learning_rate": 1.9122490741911806e-05, - "loss": 0.2663, + "epoch": 0.13, + "grad_norm": 0.3915034966628301, + "learning_rate": 1.9498725513346254e-05, + "loss": 0.3049, "step": 2791 }, { - "epoch": 0.16, - "grad_norm": 0.525359587134657, - "learning_rate": 1.9121728287224844e-05, - "loss": 0.4025, + "epoch": 0.13, + "grad_norm": 0.8975716931366301, + "learning_rate": 1.9498260229021683e-05, + "loss": 0.4373, "step": 2792 }, { - "epoch": 0.16, - "grad_norm": 0.4425050759597999, - "learning_rate": 1.9120965516652828e-05, - "loss": 0.2785, + "epoch": 0.13, + "grad_norm": 0.2975147193454297, + "learning_rate": 1.9497794734414782e-05, + "loss": 0.2056, "step": 2793 }, { - "epoch": 0.16, - "grad_norm": 0.2788256255576857, - "learning_rate": 1.912020243022217e-05, - "loss": 0.1893, + "epoch": 0.13, + "grad_norm": 0.3666551769459927, + "learning_rate": 1.949732902953586e-05, + "loss": 0.2901, "step": 2794 }, { - "epoch": 0.16, - "grad_norm": 0.47566417935386807, - "learning_rate": 1.911943902795929e-05, - "loss": 0.3708, + "epoch": 0.13, + "grad_norm": 0.8456670959164763, + "learning_rate": 1.9496863114395223e-05, + "loss": 0.6062, "step": 2795 }, { - "epoch": 0.16, - "grad_norm": 0.47443133106229063, - "learning_rate": 1.9118675309890628e-05, - "loss": 0.3175, + "epoch": 0.13, + "grad_norm": 0.739838159372834, + "learning_rate": 1.9496396989003195e-05, + "loss": 0.4867, "step": 2796 }, { - "epoch": 0.16, - "grad_norm": 0.5872484241415183, - "learning_rate": 1.911791127604263e-05, - "loss": 0.4337, + "epoch": 0.13, + "grad_norm": 0.4258264861736871, + "learning_rate": 1.9495930653370088e-05, + "loss": 0.3199, "step": 2797 }, { - "epoch": 0.16, - "grad_norm": 0.9705192208886395, - "learning_rate": 1.9117146926441757e-05, - "loss": 0.3478, + "epoch": 0.13, + "grad_norm": 0.4784883452351987, + "learning_rate": 1.949546410750623e-05, + "loss": 0.297, "step": 2798 }, { - "epoch": 0.16, - "grad_norm": 0.3903267981435643, - "learning_rate": 1.9116382261114484e-05, - "loss": 0.2852, + "epoch": 0.13, + "grad_norm": 0.43358302967145923, + "learning_rate": 1.9494997351421946e-05, + "loss": 0.2555, "step": 2799 }, { - "epoch": 0.16, - "grad_norm": 0.3265402016275641, - "learning_rate": 1.911561728008728e-05, - "loss": 0.2788, + "epoch": 0.13, + "grad_norm": 0.42473575408629693, + "learning_rate": 1.9494530385127578e-05, + "loss": 0.281, "step": 2800 }, { - "epoch": 0.16, - "grad_norm": 0.7350955291555213, - "learning_rate": 1.9114851983386646e-05, - "loss": 0.4241, + "epoch": 0.13, + "grad_norm": 0.3931367615956359, + "learning_rate": 1.949406320863345e-05, + "loss": 0.2273, "step": 2801 }, { - "epoch": 0.16, - "grad_norm": 0.4414743371523098, - "learning_rate": 1.9114086371039078e-05, - "loss": 0.3136, + "epoch": 0.13, + "grad_norm": 0.7312989154615723, + "learning_rate": 1.949359582194992e-05, + "loss": 0.4464, "step": 2802 }, { - "epoch": 0.16, - "grad_norm": 0.4236622091499315, - "learning_rate": 1.911332044307109e-05, - "loss": 0.3239, + "epoch": 0.13, + "grad_norm": 0.5037413164123241, + "learning_rate": 1.9493128225087325e-05, + "loss": 0.2646, "step": 2803 }, { - "epoch": 0.16, - "grad_norm": 0.5945493542557609, - "learning_rate": 1.9112554199509207e-05, - "loss": 0.3062, + "epoch": 0.13, + "grad_norm": 0.7587385168440014, + "learning_rate": 1.9492660418056023e-05, + "loss": 0.4265, "step": 2804 }, { - "epoch": 0.16, - "grad_norm": 0.4080833519256838, - "learning_rate": 1.911178764037996e-05, - "loss": 0.2928, + "epoch": 0.13, + "grad_norm": 0.4501135382174161, + "learning_rate": 1.9492192400866366e-05, + "loss": 0.2954, "step": 2805 }, { - "epoch": 0.16, - "grad_norm": 0.803133176210868, - "learning_rate": 1.9111020765709905e-05, - "loss": 0.5323, + "epoch": 0.13, + "grad_norm": 0.48163449896690286, + "learning_rate": 1.949172417352872e-05, + "loss": 0.3377, "step": 2806 }, { - "epoch": 0.16, - "grad_norm": 0.4639683487323455, - "learning_rate": 1.9110253575525593e-05, - "loss": 0.3419, + "epoch": 0.13, + "grad_norm": 0.39728511648694087, + "learning_rate": 1.9491255736053448e-05, + "loss": 0.2648, "step": 2807 }, { - "epoch": 0.16, - "grad_norm": 0.48221297823220266, - "learning_rate": 1.910948606985359e-05, - "loss": 0.3114, + "epoch": 0.13, + "grad_norm": 1.0107534819519373, + "learning_rate": 1.9490787088450922e-05, + "loss": 0.4283, "step": 2808 }, { - "epoch": 0.16, - "grad_norm": 0.4188949921186524, - "learning_rate": 1.9108718248720472e-05, - "loss": 0.2776, + "epoch": 0.13, + "grad_norm": 0.3748556884861427, + "learning_rate": 1.949031823073152e-05, + "loss": 0.2419, "step": 2809 }, { - "epoch": 0.16, - "grad_norm": 0.4180698257150617, - "learning_rate": 1.9107950112152838e-05, - "loss": 0.2458, + "epoch": 0.13, + "grad_norm": 0.5628163699959556, + "learning_rate": 1.9489849162905613e-05, + "loss": 0.3876, "step": 2810 }, { - "epoch": 0.16, - "grad_norm": 0.33142474909538355, - "learning_rate": 1.910718166017728e-05, - "loss": 0.2246, + "epoch": 0.13, + "grad_norm": 0.8488054922242871, + "learning_rate": 1.9489379884983594e-05, + "loss": 0.2922, "step": 2811 }, { - "epoch": 0.16, - "grad_norm": 0.3976551718650196, - "learning_rate": 1.910641289282041e-05, - "loss": 0.3871, + "epoch": 0.13, + "grad_norm": 0.4282767645109543, + "learning_rate": 1.948891039697585e-05, + "loss": 0.298, "step": 2812 }, { - "epoch": 0.16, - "grad_norm": 0.754813347285477, - "learning_rate": 1.910564381010886e-05, - "loss": 0.5863, + "epoch": 0.13, + "grad_norm": 1.100961880061495, + "learning_rate": 1.9488440698892777e-05, + "loss": 0.5963, "step": 2813 }, { - "epoch": 0.16, - "grad_norm": 0.4022736855798634, - "learning_rate": 1.9104874412069253e-05, - "loss": 0.2171, + "epoch": 0.13, + "grad_norm": 0.3958890772853788, + "learning_rate": 1.9487970790744774e-05, + "loss": 0.2841, "step": 2814 }, { - "epoch": 0.16, - "grad_norm": 0.32538048947625997, - "learning_rate": 1.9104104698728235e-05, - "loss": 0.2844, + "epoch": 0.13, + "grad_norm": 0.31484311993082836, + "learning_rate": 1.9487500672542242e-05, + "loss": 0.234, "step": 2815 }, { - "epoch": 0.16, - "grad_norm": 0.4742319547411235, - "learning_rate": 1.9103334670112468e-05, - "loss": 0.3209, + "epoch": 0.13, + "grad_norm": 1.480546280574842, + "learning_rate": 1.9487030344295586e-05, + "loss": 0.7548, "step": 2816 }, { - "epoch": 0.16, - "grad_norm": 0.3539327872330429, - "learning_rate": 1.9102564326248608e-05, - "loss": 0.1927, + "epoch": 0.13, + "grad_norm": 0.5488601431227028, + "learning_rate": 1.9486559806015223e-05, + "loss": 0.3529, "step": 2817 }, { - "epoch": 0.16, - "grad_norm": 0.5182586066974086, - "learning_rate": 1.910179366716334e-05, - "loss": 0.4189, + "epoch": 0.13, + "grad_norm": 0.40819259012276216, + "learning_rate": 1.948608905771157e-05, + "loss": 0.2239, "step": 2818 }, { - "epoch": 0.16, - "grad_norm": 0.46291557914978526, - "learning_rate": 1.9101022692883348e-05, - "loss": 0.3427, + "epoch": 0.13, + "grad_norm": 1.2813114906017016, + "learning_rate": 1.948561809939505e-05, + "loss": 0.6409, "step": 2819 }, { - "epoch": 0.16, - "grad_norm": 0.3223453767011489, - "learning_rate": 1.910025140343533e-05, - "loss": 0.2285, + "epoch": 0.13, + "grad_norm": 0.5388129318395684, + "learning_rate": 1.948514693107608e-05, + "loss": 0.3754, "step": 2820 }, { - "epoch": 0.16, - "grad_norm": 0.4671981056244132, - "learning_rate": 1.9099479798845997e-05, - "loss": 0.2945, + "epoch": 0.13, + "grad_norm": 0.3285410010221218, + "learning_rate": 1.9484675552765107e-05, + "loss": 0.1574, "step": 2821 }, { - "epoch": 0.16, - "grad_norm": 0.48019689418661415, - "learning_rate": 1.9098707879142072e-05, - "loss": 0.3136, + "epoch": 0.13, + "grad_norm": 0.5001823189301893, + "learning_rate": 1.9484203964472558e-05, + "loss": 0.3216, "step": 2822 }, { - "epoch": 0.16, - "grad_norm": 0.4030849015440731, - "learning_rate": 1.9097935644350284e-05, - "loss": 0.2743, + "epoch": 0.13, + "grad_norm": 1.5421405736348646, + "learning_rate": 1.948373216620887e-05, + "loss": 0.6466, "step": 2823 }, { - "epoch": 0.16, - "grad_norm": 0.5656401007116433, - "learning_rate": 1.9097163094497374e-05, - "loss": 0.349, + "epoch": 0.13, + "grad_norm": 0.4583724636482592, + "learning_rate": 1.9483260157984497e-05, + "loss": 0.2094, "step": 2824 }, { - "epoch": 0.16, - "grad_norm": 0.6376831416506765, - "learning_rate": 1.9096390229610095e-05, - "loss": 0.557, + "epoch": 0.13, + "grad_norm": 0.5272388263955063, + "learning_rate": 1.948278793980988e-05, + "loss": 0.3917, "step": 2825 }, { - "epoch": 0.16, - "grad_norm": 0.4153453663473684, - "learning_rate": 1.9095617049715217e-05, - "loss": 0.3185, + "epoch": 0.13, + "grad_norm": 0.5005979324059838, + "learning_rate": 1.948231551169548e-05, + "loss": 0.3848, "step": 2826 }, { - "epoch": 0.16, - "grad_norm": 0.34493890499970015, - "learning_rate": 1.9094843554839513e-05, - "loss": 0.2722, + "epoch": 0.13, + "grad_norm": 0.24490559384719637, + "learning_rate": 1.9481842873651752e-05, + "loss": 0.1347, "step": 2827 }, { - "epoch": 0.16, - "grad_norm": 0.3024656467389478, - "learning_rate": 1.9094069745009766e-05, - "loss": 0.2226, + "epoch": 0.13, + "grad_norm": 0.750757486712475, + "learning_rate": 1.948137002568916e-05, + "loss": 0.4101, "step": 2828 }, { - "epoch": 0.16, - "grad_norm": 0.7295724146360978, - "learning_rate": 1.9093295620252776e-05, - "loss": 0.4354, + "epoch": 0.13, + "grad_norm": 0.528174893075401, + "learning_rate": 1.9480896967818176e-05, + "loss": 0.3684, "step": 2829 }, { - "epoch": 0.16, - "grad_norm": 0.3640806422459361, - "learning_rate": 1.9092521180595347e-05, - "loss": 0.2594, + "epoch": 0.13, + "grad_norm": 0.5074187237832363, + "learning_rate": 1.9480423700049275e-05, + "loss": 0.294, "step": 2830 }, { - "epoch": 0.16, - "grad_norm": 0.3788662395591776, - "learning_rate": 1.9091746426064303e-05, - "loss": 0.3483, + "epoch": 0.13, + "grad_norm": 0.4785170340488487, + "learning_rate": 1.9479950222392925e-05, + "loss": 0.3579, "step": 2831 }, { - "epoch": 0.16, - "grad_norm": 0.5866543462727462, - "learning_rate": 1.9090971356686473e-05, - "loss": 0.3439, + "epoch": 0.13, + "grad_norm": 0.9110532536162825, + "learning_rate": 1.9479476534859615e-05, + "loss": 0.6253, "step": 2832 }, { - "epoch": 0.16, - "grad_norm": 0.2907543494961357, - "learning_rate": 1.909019597248869e-05, - "loss": 0.2154, + "epoch": 0.13, + "grad_norm": 0.328390340221259, + "learning_rate": 1.9479002637459835e-05, + "loss": 0.2467, "step": 2833 }, { - "epoch": 0.16, - "grad_norm": 0.440487120983383, - "learning_rate": 1.9089420273497813e-05, - "loss": 0.2879, + "epoch": 0.13, + "grad_norm": 0.3993954503399109, + "learning_rate": 1.9478528530204068e-05, + "loss": 0.2065, "step": 2834 }, { - "epoch": 0.16, - "grad_norm": 0.3480069803830963, - "learning_rate": 1.9088644259740708e-05, - "loss": 0.3217, + "epoch": 0.13, + "grad_norm": 1.2971328917007663, + "learning_rate": 1.9478054213102817e-05, + "loss": 0.6084, "step": 2835 }, { - "epoch": 0.16, - "grad_norm": 0.5148676360714839, - "learning_rate": 1.9087867931244238e-05, - "loss": 0.364, + "epoch": 0.13, + "grad_norm": 0.6269204282202119, + "learning_rate": 1.9477579686166578e-05, + "loss": 0.3612, "step": 2836 }, { - "epoch": 0.16, - "grad_norm": 0.9445440875281329, - "learning_rate": 1.9087091288035293e-05, - "loss": 0.4219, + "epoch": 0.13, + "grad_norm": 0.4722885581933981, + "learning_rate": 1.9477104949405862e-05, + "loss": 0.3153, "step": 2837 }, { - "epoch": 0.16, - "grad_norm": 0.6342925351854132, - "learning_rate": 1.908631433014077e-05, - "loss": 0.4129, + "epoch": 0.13, + "grad_norm": 0.5527156876296377, + "learning_rate": 1.9476630002831175e-05, + "loss": 0.4068, "step": 2838 }, { - "epoch": 0.16, - "grad_norm": 0.32986836933112085, - "learning_rate": 1.9085537057587568e-05, - "loss": 0.3206, + "epoch": 0.13, + "grad_norm": 0.6674494946320816, + "learning_rate": 1.9476154846453037e-05, + "loss": 0.32, "step": 2839 }, { - "epoch": 0.16, - "grad_norm": 0.5347354807326525, - "learning_rate": 1.9084759470402612e-05, - "loss": 0.247, + "epoch": 0.13, + "grad_norm": 0.4067762750922892, + "learning_rate": 1.947567948028196e-05, + "loss": 0.1805, "step": 2840 }, { - "epoch": 0.16, - "grad_norm": 0.4500849622065172, - "learning_rate": 1.9083981568612828e-05, - "loss": 0.3415, + "epoch": 0.13, + "grad_norm": 0.40555496929204754, + "learning_rate": 1.9475203904328476e-05, + "loss": 0.3547, "step": 2841 }, { - "epoch": 0.16, - "grad_norm": 0.4047582580502225, - "learning_rate": 1.9083203352245148e-05, - "loss": 0.255, + "epoch": 0.13, + "grad_norm": 0.6624134475721274, + "learning_rate": 1.9474728118603107e-05, + "loss": 0.3937, "step": 2842 }, { - "epoch": 0.16, - "grad_norm": 0.4042265707783651, - "learning_rate": 1.9082424821326532e-05, - "loss": 0.3031, + "epoch": 0.13, + "grad_norm": 0.5252666646488344, + "learning_rate": 1.9474252123116388e-05, + "loss": 0.3821, "step": 2843 }, { - "epoch": 0.16, - "grad_norm": 0.4400449057120318, - "learning_rate": 1.9081645975883928e-05, - "loss": 0.325, + "epoch": 0.13, + "grad_norm": 1.146066423973684, + "learning_rate": 1.9473775917878862e-05, + "loss": 0.5262, "step": 2844 }, { - "epoch": 0.16, - "grad_norm": 0.5280907990199353, - "learning_rate": 1.908086681594432e-05, - "loss": 0.3971, + "epoch": 0.13, + "grad_norm": 0.3592800142084382, + "learning_rate": 1.9473299502901065e-05, + "loss": 0.2678, "step": 2845 }, { - "epoch": 0.16, - "grad_norm": 0.4402381574442051, - "learning_rate": 1.908008734153468e-05, - "loss": 0.3617, + "epoch": 0.13, + "grad_norm": 0.3227891643343318, + "learning_rate": 1.947282287819355e-05, + "loss": 0.2198, "step": 2846 }, { - "epoch": 0.16, - "grad_norm": 0.34365348876661916, - "learning_rate": 1.9079307552682013e-05, - "loss": 0.2332, + "epoch": 0.13, + "grad_norm": 0.8967841621019016, + "learning_rate": 1.9472346043766866e-05, + "loss": 0.4268, "step": 2847 }, { - "epoch": 0.16, - "grad_norm": 0.3630634485960283, - "learning_rate": 1.907852744941331e-05, - "loss": 0.2653, + "epoch": 0.13, + "grad_norm": 0.6206643221196362, + "learning_rate": 1.947186899963157e-05, + "loss": 0.4109, "step": 2848 }, { - "epoch": 0.16, - "grad_norm": 0.9110965112820536, - "learning_rate": 1.9077747031755594e-05, - "loss": 0.6379, + "epoch": 0.13, + "grad_norm": 0.4411361484433389, + "learning_rate": 1.947139174579822e-05, + "loss": 0.3295, "step": 2849 }, { - "epoch": 0.16, - "grad_norm": 0.4468249874232147, - "learning_rate": 1.9076966299735887e-05, - "loss": 0.1208, + "epoch": 0.13, + "grad_norm": 0.5668312019195174, + "learning_rate": 1.9470914282277387e-05, + "loss": 0.3178, "step": 2850 }, { - "epoch": 0.16, - "grad_norm": 0.3606662643476382, - "learning_rate": 1.9076185253381227e-05, - "loss": 0.2985, + "epoch": 0.13, + "grad_norm": 0.4546291005809553, + "learning_rate": 1.9470436609079645e-05, + "loss": 0.3065, "step": 2851 }, { - "epoch": 0.16, - "grad_norm": 0.6445782349707652, - "learning_rate": 1.9075403892718664e-05, - "loss": 0.533, + "epoch": 0.13, + "grad_norm": 0.29049155184985687, + "learning_rate": 1.946995872621556e-05, + "loss": 0.1896, "step": 2852 }, { - "epoch": 0.16, - "grad_norm": 0.3657364361197772, - "learning_rate": 1.9074622217775253e-05, - "loss": 0.1565, + "epoch": 0.13, + "grad_norm": 0.4881503094894983, + "learning_rate": 1.9469480633695715e-05, + "loss": 0.3225, "step": 2853 }, { - "epoch": 0.16, - "grad_norm": 0.352593659165976, - "learning_rate": 1.9073840228578068e-05, - "loss": 0.2859, + "epoch": 0.13, + "grad_norm": 0.7418212927463746, + "learning_rate": 1.9469002331530696e-05, + "loss": 0.393, "step": 2854 }, { - "epoch": 0.16, - "grad_norm": 0.4152158108112304, - "learning_rate": 1.9073057925154184e-05, - "loss": 0.3108, + "epoch": 0.13, + "grad_norm": 0.4980044292765687, + "learning_rate": 1.9468523819731095e-05, + "loss": 0.3537, "step": 2855 }, { - "epoch": 0.16, - "grad_norm": 0.5767800434899424, - "learning_rate": 1.9072275307530692e-05, - "loss": 0.2169, + "epoch": 0.13, + "grad_norm": 1.2872589102002467, + "learning_rate": 1.94680450983075e-05, + "loss": 0.7791, "step": 2856 }, { - "epoch": 0.16, - "grad_norm": 0.406189229853736, - "learning_rate": 1.9071492375734698e-05, - "loss": 0.3657, + "epoch": 0.13, + "grad_norm": 0.44645215780859826, + "learning_rate": 1.946756616727051e-05, + "loss": 0.2753, "step": 2857 }, { - "epoch": 0.16, - "grad_norm": 0.7434134856812202, - "learning_rate": 1.9070709129793313e-05, - "loss": 0.536, + "epoch": 0.13, + "grad_norm": 0.2772434930065425, + "learning_rate": 1.9467087026630733e-05, + "loss": 0.2063, "step": 2858 }, { - "epoch": 0.16, - "grad_norm": 0.30162267512857593, - "learning_rate": 1.906992556973366e-05, - "loss": 0.1912, + "epoch": 0.13, + "grad_norm": 0.8195769010881503, + "learning_rate": 1.9466607676398773e-05, + "loss": 0.519, "step": 2859 }, { - "epoch": 0.16, - "grad_norm": 0.360668147936046, - "learning_rate": 1.906914169558288e-05, - "loss": 0.2589, + "epoch": 0.13, + "grad_norm": 0.49380699577152537, + "learning_rate": 1.9466128116585242e-05, + "loss": 0.2066, "step": 2860 }, { - "epoch": 0.16, - "grad_norm": 0.8302107796578109, - "learning_rate": 1.9068357507368108e-05, - "loss": 0.6726, + "epoch": 0.13, + "grad_norm": 0.35838641909007946, + "learning_rate": 1.946564834720076e-05, + "loss": 0.3095, "step": 2861 }, { - "epoch": 0.16, - "grad_norm": 0.43014673007102716, - "learning_rate": 1.9067573005116506e-05, - "loss": 0.3113, + "epoch": 0.13, + "grad_norm": 1.5934028222744971, + "learning_rate": 1.9465168368255946e-05, + "loss": 0.8863, "step": 2862 }, { - "epoch": 0.16, - "grad_norm": 0.351210921130384, - "learning_rate": 1.9066788188855237e-05, - "loss": 0.2871, + "epoch": 0.13, + "grad_norm": 0.44995941036687864, + "learning_rate": 1.946468817976143e-05, + "loss": 0.233, "step": 2863 }, { - "epoch": 0.16, - "grad_norm": 0.8047851678299188, - "learning_rate": 1.906600305861149e-05, - "loss": 0.4959, + "epoch": 0.13, + "grad_norm": 0.4008155861561321, + "learning_rate": 1.9464207781727837e-05, + "loss": 0.3047, "step": 2864 }, { - "epoch": 0.16, - "grad_norm": 0.47323207583686905, - "learning_rate": 1.906521761441244e-05, - "loss": 0.2408, + "epoch": 0.13, + "grad_norm": 0.4961502799107277, + "learning_rate": 1.9463727174165802e-05, + "loss": 0.3546, "step": 2865 }, { - "epoch": 0.16, - "grad_norm": 0.3269727310027329, - "learning_rate": 1.90644318562853e-05, - "loss": 0.1871, + "epoch": 0.13, + "grad_norm": 0.29642968218164567, + "learning_rate": 1.9463246357085973e-05, + "loss": 0.1611, "step": 2866 }, { - "epoch": 0.16, - "grad_norm": 0.4722268465786524, - "learning_rate": 1.9063645784257274e-05, - "loss": 0.3434, + "epoch": 0.13, + "grad_norm": 0.5524041298144406, + "learning_rate": 1.946276533049899e-05, + "loss": 0.3652, "step": 2867 }, { - "epoch": 0.16, - "grad_norm": 0.8141264243435448, - "learning_rate": 1.906285939835558e-05, - "loss": 0.5588, + "epoch": 0.13, + "grad_norm": 1.6191332874432969, + "learning_rate": 1.94622840944155e-05, + "loss": 0.8323, "step": 2868 }, { - "epoch": 0.16, - "grad_norm": 0.3784784877528985, - "learning_rate": 1.9062072698607457e-05, - "loss": 0.2705, + "epoch": 0.13, + "grad_norm": 0.40941459468805647, + "learning_rate": 1.9461802648846163e-05, + "loss": 0.3313, "step": 2869 }, { - "epoch": 0.16, - "grad_norm": 0.6574080394539428, - "learning_rate": 1.9061285685040148e-05, - "loss": 0.3812, + "epoch": 0.13, + "grad_norm": 0.47907680930525204, + "learning_rate": 1.9461320993801633e-05, + "loss": 0.2515, "step": 2870 }, { - "epoch": 0.16, - "grad_norm": 0.4694759377072518, - "learning_rate": 1.9060498357680905e-05, - "loss": 0.3247, + "epoch": 0.13, + "grad_norm": 0.48112552535124037, + "learning_rate": 1.9460839129292575e-05, + "loss": 0.357, "step": 2871 }, { - "epoch": 0.17, - "grad_norm": 0.3590925137545595, - "learning_rate": 1.905971071655699e-05, - "loss": 0.243, + "epoch": 0.13, + "grad_norm": 0.349387993020198, + "learning_rate": 1.946035705532966e-05, + "loss": 0.1902, "step": 2872 }, { - "epoch": 0.17, - "grad_norm": 0.43092525195270276, - "learning_rate": 1.9058922761695684e-05, - "loss": 0.1675, + "epoch": 0.13, + "grad_norm": 0.44940603178533056, + "learning_rate": 1.9459874771923556e-05, + "loss": 0.2777, "step": 2873 }, { - "epoch": 0.17, - "grad_norm": 0.509367055075115, - "learning_rate": 1.9058134493124275e-05, - "loss": 0.3637, + "epoch": 0.13, + "grad_norm": 1.366761921031347, + "learning_rate": 1.9459392279084942e-05, + "loss": 0.8208, "step": 2874 }, { - "epoch": 0.17, - "grad_norm": 0.3456206104797771, - "learning_rate": 1.9057345910870054e-05, - "loss": 0.3076, + "epoch": 0.13, + "grad_norm": 0.9086740026426672, + "learning_rate": 1.94589095768245e-05, + "loss": 0.5648, "step": 2875 }, { - "epoch": 0.17, - "grad_norm": 1.0697084543942041, - "learning_rate": 1.905655701496034e-05, - "loss": 0.3674, + "epoch": 0.13, + "grad_norm": 0.40287550340105055, + "learning_rate": 1.9458426665152918e-05, + "loss": 0.2746, "step": 2876 }, { - "epoch": 0.17, - "grad_norm": 0.40182912681545313, - "learning_rate": 1.9055767805422438e-05, - "loss": 0.2836, + "epoch": 0.13, + "grad_norm": 0.39515145655812706, + "learning_rate": 1.9457943544080883e-05, + "loss": 0.3032, "step": 2877 }, { - "epoch": 0.17, - "grad_norm": 0.473138542860798, - "learning_rate": 1.905497828228369e-05, - "loss": 0.3122, + "epoch": 0.13, + "grad_norm": 0.4553196784369032, + "learning_rate": 1.9457460213619096e-05, + "loss": 0.2111, "step": 2878 }, { - "epoch": 0.17, - "grad_norm": 0.3813765291907576, - "learning_rate": 1.9054188445571435e-05, - "loss": 0.2404, + "epoch": 0.13, + "grad_norm": 0.441589637020675, + "learning_rate": 1.945697667377825e-05, + "loss": 0.2403, "step": 2879 }, { - "epoch": 0.17, - "grad_norm": 0.7796531744569375, - "learning_rate": 1.905339829531302e-05, - "loss": 0.448, + "epoch": 0.13, + "grad_norm": 1.1382514092871128, + "learning_rate": 1.9456492924569063e-05, + "loss": 0.6977, "step": 2880 }, { - "epoch": 0.17, - "grad_norm": 0.42795114627019815, - "learning_rate": 1.9052607831535812e-05, - "loss": 0.2856, + "epoch": 0.13, + "grad_norm": 0.43872769219695046, + "learning_rate": 1.9456008966002235e-05, + "loss": 0.3335, "step": 2881 }, { - "epoch": 0.17, - "grad_norm": 0.4012186112644199, - "learning_rate": 1.9051817054267184e-05, - "loss": 0.3072, + "epoch": 0.13, + "grad_norm": 0.42143152021122965, + "learning_rate": 1.945552479808848e-05, + "loss": 0.3347, "step": 2882 }, { - "epoch": 0.17, - "grad_norm": 0.748819417416899, - "learning_rate": 1.9051025963534526e-05, - "loss": 0.4127, + "epoch": 0.13, + "grad_norm": 0.8543263808496883, + "learning_rate": 1.9455040420838517e-05, + "loss": 0.4054, "step": 2883 }, { - "epoch": 0.17, - "grad_norm": 0.41468273696256386, - "learning_rate": 1.9050234559365223e-05, - "loss": 0.3054, + "epoch": 0.13, + "grad_norm": 0.3054398881733468, + "learning_rate": 1.9454555834263077e-05, + "loss": 0.2242, "step": 2884 }, { - "epoch": 0.17, - "grad_norm": 0.5002904393428474, - "learning_rate": 1.904944284178669e-05, - "loss": 0.3061, + "epoch": 0.13, + "grad_norm": 0.419589585139474, + "learning_rate": 1.945407103837288e-05, + "loss": 0.3164, "step": 2885 }, { - "epoch": 0.17, - "grad_norm": 0.4156710785103392, - "learning_rate": 1.9048650810826333e-05, - "loss": 0.3083, + "epoch": 0.13, + "grad_norm": 0.9345784559852062, + "learning_rate": 1.945358603317866e-05, + "loss": 0.4571, "step": 2886 }, { - "epoch": 0.17, - "grad_norm": 0.40806478413793273, - "learning_rate": 1.9047858466511594e-05, - "loss": 0.3049, + "epoch": 0.13, + "grad_norm": 0.7164887451346594, + "learning_rate": 1.9453100818691162e-05, + "loss": 0.4616, "step": 2887 }, { - "epoch": 0.17, - "grad_norm": 0.4128113081838106, - "learning_rate": 1.9047065808869902e-05, - "loss": 0.2899, + "epoch": 0.13, + "grad_norm": 0.4635393380843925, + "learning_rate": 1.9452615394921124e-05, + "loss": 0.3126, "step": 2888 }, { - "epoch": 0.17, - "grad_norm": 0.3442654178253383, - "learning_rate": 1.9046272837928713e-05, - "loss": 0.1134, + "epoch": 0.13, + "grad_norm": 0.4317265100389451, + "learning_rate": 1.9452129761879287e-05, + "loss": 0.3021, "step": 2889 }, { - "epoch": 0.17, - "grad_norm": 0.3907830303341359, - "learning_rate": 1.9045479553715482e-05, - "loss": 0.2846, + "epoch": 0.13, + "grad_norm": 0.402334428219918, + "learning_rate": 1.945164391957641e-05, + "loss": 0.1945, "step": 2890 }, { - "epoch": 0.17, - "grad_norm": 0.5828855253121485, - "learning_rate": 1.9044685956257686e-05, - "loss": 0.4289, + "epoch": 0.13, + "grad_norm": 0.47512840904404013, + "learning_rate": 1.9451157868023244e-05, + "loss": 0.2964, "step": 2891 }, { - "epoch": 0.17, - "grad_norm": 0.514484579636209, - "learning_rate": 1.9043892045582804e-05, - "loss": 0.3811, + "epoch": 0.13, + "grad_norm": 0.831216127013056, + "learning_rate": 1.9450671607230555e-05, + "loss": 0.3398, "step": 2892 }, { - "epoch": 0.17, - "grad_norm": 0.3266743076958912, - "learning_rate": 1.9043097821718327e-05, - "loss": 0.2815, + "epoch": 0.13, + "grad_norm": 0.6826246846647532, + "learning_rate": 1.94501851372091e-05, + "loss": 0.4102, "step": 2893 }, { - "epoch": 0.17, - "grad_norm": 0.5032500743515141, - "learning_rate": 1.9042303284691762e-05, - "loss": 0.3833, + "epoch": 0.13, + "grad_norm": 0.3846588778394741, + "learning_rate": 1.944969845796966e-05, + "loss": 0.3048, "step": 2894 }, { - "epoch": 0.17, - "grad_norm": 0.4345386460910853, - "learning_rate": 1.9041508434530622e-05, - "loss": 0.2607, + "epoch": 0.13, + "grad_norm": 1.2903335362931123, + "learning_rate": 1.9449211569523002e-05, + "loss": 0.5959, "step": 2895 }, { - "epoch": 0.17, - "grad_norm": 0.41878196971218673, - "learning_rate": 1.9040713271262438e-05, - "loss": 0.2993, + "epoch": 0.13, + "grad_norm": 0.3835317202047741, + "learning_rate": 1.9448724471879905e-05, + "loss": 0.233, "step": 2896 }, { - "epoch": 0.17, - "grad_norm": 0.8099840083560745, - "learning_rate": 1.9039917794914736e-05, - "loss": 0.5032, + "epoch": 0.13, + "grad_norm": 0.4579775810180769, + "learning_rate": 1.9448237165051155e-05, + "loss": 0.2938, "step": 2897 }, { - "epoch": 0.17, - "grad_norm": 0.42788121734475587, - "learning_rate": 1.9039122005515074e-05, - "loss": 0.3714, + "epoch": 0.13, + "grad_norm": 0.6574760559842552, + "learning_rate": 1.944774964904754e-05, + "loss": 0.3793, "step": 2898 }, { - "epoch": 0.17, - "grad_norm": 0.3654518107930367, - "learning_rate": 1.9038325903091003e-05, - "loss": 0.218, + "epoch": 0.13, + "grad_norm": 1.5577957221159746, + "learning_rate": 1.9447261923879858e-05, + "loss": 0.4172, "step": 2899 }, { - "epoch": 0.17, - "grad_norm": 0.37284915313610173, - "learning_rate": 1.90375294876701e-05, - "loss": 0.2112, + "epoch": 0.13, + "grad_norm": 0.47983638241508664, + "learning_rate": 1.94467739895589e-05, + "loss": 0.2951, "step": 2900 }, { - "epoch": 0.17, - "grad_norm": 1.25242807424076, - "learning_rate": 1.9036732759279935e-05, - "loss": 0.6335, + "epoch": 0.13, + "grad_norm": 0.535976341839264, + "learning_rate": 1.944628584609547e-05, + "loss": 0.3682, "step": 2901 }, { - "epoch": 0.17, - "grad_norm": 0.3850326090104052, - "learning_rate": 1.9035935717948102e-05, - "loss": 0.2102, + "epoch": 0.13, + "grad_norm": 1.2942518610775824, + "learning_rate": 1.9445797493500377e-05, + "loss": 0.3474, "step": 2902 }, { - "epoch": 0.17, - "grad_norm": 0.6094638643634038, - "learning_rate": 1.9035138363702206e-05, - "loss": 0.392, + "epoch": 0.13, + "grad_norm": 0.4702898809117938, + "learning_rate": 1.944530893178443e-05, + "loss": 0.3526, "step": 2903 }, { - "epoch": 0.17, - "grad_norm": 0.9291079961641789, - "learning_rate": 1.9034340696569858e-05, - "loss": 0.635, + "epoch": 0.13, + "grad_norm": 0.8470772800967312, + "learning_rate": 1.944482016095845e-05, + "loss": 0.3839, "step": 2904 }, { - "epoch": 0.17, - "grad_norm": 0.6338307726280604, - "learning_rate": 1.9033542716578677e-05, - "loss": 0.248, + "epoch": 0.13, + "grad_norm": 0.6883578292978668, + "learning_rate": 1.9444331181033253e-05, + "loss": 0.3245, "step": 2905 }, { - "epoch": 0.17, - "grad_norm": 0.3460124838063629, - "learning_rate": 1.90327444237563e-05, - "loss": 0.2289, + "epoch": 0.13, + "grad_norm": 0.41948378401097774, + "learning_rate": 1.9443841992019666e-05, + "loss": 0.2088, "step": 2906 }, { - "epoch": 0.17, - "grad_norm": 1.568956704643555, - "learning_rate": 1.9031945818130373e-05, - "loss": 0.7279, + "epoch": 0.13, + "grad_norm": 0.9363271850852859, + "learning_rate": 1.9443352593928518e-05, + "loss": 0.5534, "step": 2907 }, { - "epoch": 0.17, - "grad_norm": 0.4521370738515677, - "learning_rate": 1.9031146899728555e-05, - "loss": 0.2614, + "epoch": 0.13, + "grad_norm": 0.4654242006795171, + "learning_rate": 1.9442862986770645e-05, + "loss": 0.3661, "step": 2908 }, { - "epoch": 0.17, - "grad_norm": 0.9225487734393911, - "learning_rate": 1.9030347668578506e-05, - "loss": 0.5389, + "epoch": 0.13, + "grad_norm": 0.4610474265065332, + "learning_rate": 1.944237317055689e-05, + "loss": 0.2211, "step": 2909 }, { - "epoch": 0.17, - "grad_norm": 0.5007109970727628, - "learning_rate": 1.90295481247079e-05, - "loss": 0.3537, + "epoch": 0.13, + "grad_norm": 0.7112185195432745, + "learning_rate": 1.944188314529809e-05, + "loss": 0.5284, "step": 2910 }, { - "epoch": 0.17, - "grad_norm": 0.3697140213680634, - "learning_rate": 1.902874826814444e-05, - "loss": 0.2837, + "epoch": 0.13, + "grad_norm": 0.32162694484719206, + "learning_rate": 1.94413929110051e-05, + "loss": 0.2361, "step": 2911 }, { - "epoch": 0.17, - "grad_norm": 0.25441038379523395, - "learning_rate": 1.902794809891581e-05, - "loss": 0.1146, + "epoch": 0.13, + "grad_norm": 0.40477685546820236, + "learning_rate": 1.9440902467688772e-05, + "loss": 0.2152, "step": 2912 }, { - "epoch": 0.17, - "grad_norm": 0.9960926038338246, - "learning_rate": 1.9027147617049727e-05, - "loss": 0.5207, + "epoch": 0.13, + "grad_norm": 0.49410989293304114, + "learning_rate": 1.9440411815359957e-05, + "loss": 0.3489, "step": 2913 }, { - "epoch": 0.17, - "grad_norm": 0.49115998471316646, - "learning_rate": 1.9026346822573906e-05, - "loss": 0.2785, + "epoch": 0.13, + "grad_norm": 1.3391030784347844, + "learning_rate": 1.9439920954029527e-05, + "loss": 0.7046, "step": 2914 }, { - "epoch": 0.17, - "grad_norm": 0.6090023738796487, - "learning_rate": 1.902554571551609e-05, - "loss": 0.3221, + "epoch": 0.13, + "grad_norm": 0.36208087905490566, + "learning_rate": 1.9439429883708344e-05, + "loss": 0.2545, "step": 2915 }, { - "epoch": 0.17, - "grad_norm": 1.1039710139860757, - "learning_rate": 1.902474429590401e-05, - "loss": 0.735, + "epoch": 0.13, + "grad_norm": 0.4673203435459726, + "learning_rate": 1.9438938604407283e-05, + "loss": 0.4, "step": 2916 }, { - "epoch": 0.17, - "grad_norm": 0.49261792202989546, - "learning_rate": 1.9023942563765422e-05, - "loss": 0.2849, + "epoch": 0.13, + "grad_norm": 0.4034777431903411, + "learning_rate": 1.9438447116137218e-05, + "loss": 0.2821, "step": 2917 }, { - "epoch": 0.17, - "grad_norm": 0.28069746124257844, - "learning_rate": 1.9023140519128093e-05, - "loss": 0.1992, + "epoch": 0.13, + "grad_norm": 0.3745479288801289, + "learning_rate": 1.943795541890903e-05, + "loss": 0.2484, "step": 2918 }, { - "epoch": 0.17, - "grad_norm": 1.3144734644202736, - "learning_rate": 1.9022338162019794e-05, - "loss": 0.5199, + "epoch": 0.13, + "grad_norm": 1.0478111204784013, + "learning_rate": 1.9437463512733607e-05, + "loss": 0.3924, "step": 2919 }, { - "epoch": 0.17, - "grad_norm": 0.5300530829580868, - "learning_rate": 1.9021535492468313e-05, - "loss": 0.3645, + "epoch": 0.13, + "grad_norm": 0.5589443557700237, + "learning_rate": 1.9436971397621834e-05, + "loss": 0.3898, "step": 2920 }, { - "epoch": 0.17, - "grad_norm": 0.5599830716860857, - "learning_rate": 1.9020732510501445e-05, - "loss": 0.3213, + "epoch": 0.13, + "grad_norm": 0.4728030809067571, + "learning_rate": 1.9436479073584617e-05, + "loss": 0.3024, "step": 2921 }, { - "epoch": 0.17, - "grad_norm": 0.4318688829311013, - "learning_rate": 1.9019929216147002e-05, - "loss": 0.3321, + "epoch": 0.13, + "grad_norm": 0.6878128655053624, + "learning_rate": 1.9435986540632843e-05, + "loss": 0.4158, "step": 2922 }, { - "epoch": 0.17, - "grad_norm": 0.40200132881097284, - "learning_rate": 1.9019125609432793e-05, - "loss": 0.2933, + "epoch": 0.13, + "grad_norm": 0.3528471135911017, + "learning_rate": 1.943549379877742e-05, + "loss": 0.1992, "step": 2923 }, { - "epoch": 0.17, - "grad_norm": 0.35823983826684375, - "learning_rate": 1.9018321690386656e-05, - "loss": 0.2411, + "epoch": 0.13, + "grad_norm": 0.4735949154677886, + "learning_rate": 1.943500084802926e-05, + "loss": 0.284, "step": 2924 }, { - "epoch": 0.17, - "grad_norm": 1.2091907994532067, - "learning_rate": 1.9017517459036426e-05, - "loss": 0.4403, + "epoch": 0.13, + "grad_norm": 0.4967112187595432, + "learning_rate": 1.943450768839928e-05, + "loss": 0.2843, "step": 2925 }, { - "epoch": 0.17, - "grad_norm": 0.44885633440024797, - "learning_rate": 1.9016712915409953e-05, - "loss": 0.2987, + "epoch": 0.13, + "grad_norm": 0.8239924272925735, + "learning_rate": 1.9434014319898383e-05, + "loss": 0.5752, "step": 2926 }, { - "epoch": 0.17, - "grad_norm": 0.6097786376505031, - "learning_rate": 1.90159080595351e-05, - "loss": 0.3783, + "epoch": 0.13, + "grad_norm": 0.49230049065217124, + "learning_rate": 1.943352074253751e-05, + "loss": 0.2829, "step": 2927 }, { - "epoch": 0.17, - "grad_norm": 1.4166969282231294, - "learning_rate": 1.901510289143974e-05, - "loss": 0.5974, + "epoch": 0.13, + "grad_norm": 0.33377275743423607, + "learning_rate": 1.9433026956327577e-05, + "loss": 0.3189, "step": 2928 }, { - "epoch": 0.17, - "grad_norm": 0.39087853057358346, - "learning_rate": 1.901429741115175e-05, - "loss": 0.2789, + "epoch": 0.13, + "grad_norm": 1.3410700961867785, + "learning_rate": 1.9432532961279516e-05, + "loss": 0.7849, "step": 2929 }, { - "epoch": 0.17, - "grad_norm": 0.4707325846249934, - "learning_rate": 1.901349161869903e-05, - "loss": 0.3342, + "epoch": 0.13, + "grad_norm": 0.28759336199283464, + "learning_rate": 1.943203875740427e-05, + "loss": 0.1828, "step": 2930 }, { - "epoch": 0.17, - "grad_norm": 0.5044461165624786, - "learning_rate": 1.9012685514109487e-05, - "loss": 0.2705, + "epoch": 0.13, + "grad_norm": 0.6866012706335546, + "learning_rate": 1.9431544344712776e-05, + "loss": 0.3896, "step": 2931 }, { - "epoch": 0.17, - "grad_norm": 0.446221372568942, - "learning_rate": 1.9011879097411028e-05, - "loss": 0.2971, + "epoch": 0.13, + "grad_norm": 0.5417934821151584, + "learning_rate": 1.943104972321598e-05, + "loss": 0.3065, "step": 2932 }, { - "epoch": 0.17, - "grad_norm": 0.7000113910533794, - "learning_rate": 1.9011072368631586e-05, - "loss": 0.4128, + "epoch": 0.13, + "grad_norm": 0.4522012087546732, + "learning_rate": 1.9430554892924834e-05, + "loss": 0.3078, "step": 2933 }, { - "epoch": 0.17, - "grad_norm": 0.6515818280851625, - "learning_rate": 1.9010265327799092e-05, - "loss": 0.3242, + "epoch": 0.13, + "grad_norm": 0.9097237924793209, + "learning_rate": 1.9430059853850292e-05, + "loss": 0.6023, "step": 2934 }, { - "epoch": 0.17, - "grad_norm": 0.4527641519593625, - "learning_rate": 1.90094579749415e-05, - "loss": 0.271, + "epoch": 0.13, + "grad_norm": 0.3400763036064946, + "learning_rate": 1.942956460600331e-05, + "loss": 0.1673, "step": 2935 }, { - "epoch": 0.17, - "grad_norm": 0.34571287724653443, - "learning_rate": 1.9008650310086768e-05, - "loss": 0.3034, + "epoch": 0.13, + "grad_norm": 0.42301433597288635, + "learning_rate": 1.942906914939486e-05, + "loss": 0.2607, "step": 2936 }, { - "epoch": 0.17, - "grad_norm": 0.3554877938716127, - "learning_rate": 1.900784233326286e-05, - "loss": 0.3289, + "epoch": 0.13, + "grad_norm": 0.562921678379757, + "learning_rate": 1.9428573484035905e-05, + "loss": 0.3279, "step": 2937 }, { - "epoch": 0.17, - "grad_norm": 0.4412511425860835, - "learning_rate": 1.9007034044497757e-05, - "loss": 0.2159, + "epoch": 0.13, + "grad_norm": 0.845392382995151, + "learning_rate": 1.9428077609937422e-05, + "loss": 0.4339, "step": 2938 }, { - "epoch": 0.17, - "grad_norm": 0.7452586967068775, - "learning_rate": 1.9006225443819456e-05, - "loss": 0.4174, + "epoch": 0.14, + "grad_norm": 0.43403781503553085, + "learning_rate": 1.9427581527110387e-05, + "loss": 0.2954, "step": 2939 }, { - "epoch": 0.17, - "grad_norm": 1.353204527219416, - "learning_rate": 1.900541653125595e-05, - "loss": 0.8481, + "epoch": 0.14, + "grad_norm": 0.6488486027284759, + "learning_rate": 1.942708523556578e-05, + "loss": 0.3914, "step": 2940 }, { - "epoch": 0.17, - "grad_norm": 0.4632062818338658, - "learning_rate": 1.9004607306835263e-05, - "loss": 0.218, + "epoch": 0.14, + "grad_norm": 0.35372505871879095, + "learning_rate": 1.9426588735314596e-05, + "loss": 0.2445, "step": 2941 }, { - "epoch": 0.17, - "grad_norm": 0.5341120108017874, - "learning_rate": 1.900379777058541e-05, - "loss": 0.3449, + "epoch": 0.14, + "grad_norm": 0.43198754897761893, + "learning_rate": 1.9426092026367822e-05, + "loss": 0.2214, "step": 2942 }, { - "epoch": 0.17, - "grad_norm": 0.9366471498287889, - "learning_rate": 1.9002987922534427e-05, - "loss": 0.5459, + "epoch": 0.14, + "grad_norm": 0.5302542697717891, + "learning_rate": 1.9425595108736454e-05, + "loss": 0.323, "step": 2943 }, { - "epoch": 0.17, - "grad_norm": 0.27741904876939394, - "learning_rate": 1.900217776271036e-05, - "loss": 0.1587, + "epoch": 0.14, + "grad_norm": 0.61066221031102, + "learning_rate": 1.94250979824315e-05, + "loss": 0.3537, "step": 2944 }, { - "epoch": 0.17, - "grad_norm": 1.3941750294590425, - "learning_rate": 1.9001367291141264e-05, - "loss": 0.6409, + "epoch": 0.14, + "grad_norm": 0.5196896967586498, + "learning_rate": 1.9424600647463955e-05, + "loss": 0.2577, "step": 2945 }, { - "epoch": 0.17, - "grad_norm": 0.48903414699985137, - "learning_rate": 1.9000556507855204e-05, - "loss": 0.353, + "epoch": 0.14, + "grad_norm": 0.6336482565745153, + "learning_rate": 1.9424103103844837e-05, + "loss": 0.4593, "step": 2946 }, { - "epoch": 0.17, - "grad_norm": 0.4693605982996768, - "learning_rate": 1.8999745412880264e-05, - "loss": 0.3303, + "epoch": 0.14, + "grad_norm": 0.9534744796712975, + "learning_rate": 1.9423605351585157e-05, + "loss": 0.5325, "step": 2947 }, { - "epoch": 0.17, - "grad_norm": 0.5832080658668408, - "learning_rate": 1.8998934006244522e-05, - "loss": 0.3351, + "epoch": 0.14, + "grad_norm": 0.3405004762116534, + "learning_rate": 1.9423107390695942e-05, + "loss": 0.1878, "step": 2948 }, { - "epoch": 0.17, - "grad_norm": 0.5745640754829965, - "learning_rate": 1.8998122287976085e-05, - "loss": 0.4208, + "epoch": 0.14, + "grad_norm": 0.39998822062285394, + "learning_rate": 1.9422609221188208e-05, + "loss": 0.3219, "step": 2949 }, { - "epoch": 0.17, - "grad_norm": 0.3367288088164626, - "learning_rate": 1.899731025810306e-05, - "loss": 0.2534, + "epoch": 0.14, + "grad_norm": 0.8719287307134982, + "learning_rate": 1.9422110843072986e-05, + "loss": 0.5167, "step": 2950 }, { - "epoch": 0.17, - "grad_norm": 0.38374292249387476, - "learning_rate": 1.8996497916653565e-05, - "loss": 0.1868, + "epoch": 0.14, + "grad_norm": 0.43666795576944717, + "learning_rate": 1.942161225636131e-05, + "loss": 0.2342, "step": 2951 }, { - "epoch": 0.17, - "grad_norm": 1.4710186990718137, - "learning_rate": 1.899568526365574e-05, - "loss": 0.8454, + "epoch": 0.14, + "grad_norm": 0.4562447822500738, + "learning_rate": 1.9421113461064226e-05, + "loss": 0.3518, "step": 2952 }, { - "epoch": 0.17, - "grad_norm": 0.6019236351354754, - "learning_rate": 1.8994872299137715e-05, - "loss": 0.462, + "epoch": 0.14, + "grad_norm": 1.4931287653994332, + "learning_rate": 1.942061445719277e-05, + "loss": 0.7072, "step": 2953 }, { - "epoch": 0.17, - "grad_norm": 0.3551118807958904, - "learning_rate": 1.8994059023127655e-05, - "loss": 0.2787, + "epoch": 0.14, + "grad_norm": 0.30232457338830604, + "learning_rate": 1.9420115244757985e-05, + "loss": 0.1788, "step": 2954 }, { - "epoch": 0.17, - "grad_norm": 0.4704132793170269, - "learning_rate": 1.899324543565371e-05, - "loss": 0.3093, + "epoch": 0.14, + "grad_norm": 0.5324271347870605, + "learning_rate": 1.941961582377093e-05, + "loss": 0.2682, "step": 2955 }, { - "epoch": 0.17, - "grad_norm": 0.37749498137552984, - "learning_rate": 1.899243153674407e-05, - "loss": 0.1934, + "epoch": 0.14, + "grad_norm": 0.4826746887337732, + "learning_rate": 1.9419116194242655e-05, + "loss": 0.3702, "step": 2956 }, { - "epoch": 0.17, - "grad_norm": 0.4254951358246995, - "learning_rate": 1.8991617326426907e-05, - "loss": 0.2868, + "epoch": 0.14, + "grad_norm": 0.6661161353932512, + "learning_rate": 1.9418616356184233e-05, + "loss": 0.3446, "step": 2957 }, { - "epoch": 0.17, - "grad_norm": 0.6753680434296535, - "learning_rate": 1.8990802804730424e-05, - "loss": 0.4295, + "epoch": 0.14, + "grad_norm": 0.5828021767397665, + "learning_rate": 1.9418116309606717e-05, + "loss": 0.3189, "step": 2958 }, { - "epoch": 0.17, - "grad_norm": 0.7358847969565454, - "learning_rate": 1.8989987971682828e-05, - "loss": 0.3888, + "epoch": 0.14, + "grad_norm": 1.8387653815033052, + "learning_rate": 1.9417616054521186e-05, + "loss": 0.7676, "step": 2959 }, { - "epoch": 0.17, - "grad_norm": 0.40847603026118584, - "learning_rate": 1.8989172827312337e-05, - "loss": 0.3701, + "epoch": 0.14, + "grad_norm": 0.38570063542358163, + "learning_rate": 1.941711559093871e-05, + "loss": 0.2972, "step": 2960 }, { - "epoch": 0.17, - "grad_norm": 0.5456246498843511, - "learning_rate": 1.8988357371647173e-05, - "loss": 0.2735, + "epoch": 0.14, + "grad_norm": 0.40789276258751733, + "learning_rate": 1.9416614918870372e-05, + "loss": 0.2603, "step": 2961 }, { - "epoch": 0.17, - "grad_norm": 0.31106052444760784, - "learning_rate": 1.8987541604715584e-05, - "loss": 0.2288, + "epoch": 0.14, + "grad_norm": 0.5091158507427102, + "learning_rate": 1.9416114038327255e-05, + "loss": 0.3391, "step": 2962 }, { - "epoch": 0.17, - "grad_norm": 0.36068213668776067, - "learning_rate": 1.898672552654581e-05, - "loss": 0.2461, + "epoch": 0.14, + "grad_norm": 1.0085101271785766, + "learning_rate": 1.9415612949320453e-05, + "loss": 0.5476, "step": 2963 }, { - "epoch": 0.17, - "grad_norm": 0.9711275231098498, - "learning_rate": 1.8985909137166122e-05, - "loss": 0.4776, + "epoch": 0.14, + "grad_norm": 0.4459637566524281, + "learning_rate": 1.9415111651861052e-05, + "loss": 0.2719, "step": 2964 }, { - "epoch": 0.17, - "grad_norm": 0.3911974616653507, - "learning_rate": 1.8985092436604783e-05, - "loss": 0.329, + "epoch": 0.14, + "grad_norm": 1.1746904239405833, + "learning_rate": 1.941461014596015e-05, + "loss": 0.558, "step": 2965 }, { - "epoch": 0.17, - "grad_norm": 0.4923858502324332, - "learning_rate": 1.8984275424890085e-05, - "loss": 0.3115, + "epoch": 0.14, + "grad_norm": 0.5812730136497452, + "learning_rate": 1.9414108431628857e-05, + "loss": 0.3659, "step": 2966 }, { - "epoch": 0.17, - "grad_norm": 0.5945187050292371, - "learning_rate": 1.8983458102050313e-05, - "loss": 0.4092, + "epoch": 0.14, + "grad_norm": 0.48438880590568284, + "learning_rate": 1.941360650887828e-05, + "loss": 0.3337, "step": 2967 }, { - "epoch": 0.17, - "grad_norm": 0.24507124970481212, - "learning_rate": 1.8982640468113774e-05, - "loss": 0.197, + "epoch": 0.14, + "grad_norm": 0.4054303183908276, + "learning_rate": 1.9413104377719523e-05, + "loss": 0.2784, "step": 2968 }, { - "epoch": 0.17, - "grad_norm": 0.5955934559945066, - "learning_rate": 1.898182252310878e-05, - "loss": 0.4072, + "epoch": 0.14, + "grad_norm": 0.46448095576359333, + "learning_rate": 1.941260203816371e-05, + "loss": 0.2318, "step": 2969 }, { - "epoch": 0.17, - "grad_norm": 0.429200057143055, - "learning_rate": 1.8981004267063658e-05, - "loss": 0.3242, + "epoch": 0.14, + "grad_norm": 0.5127598735175937, + "learning_rate": 1.941209949022196e-05, + "loss": 0.3921, "step": 2970 }, { - "epoch": 0.17, - "grad_norm": 0.5983194080034205, - "learning_rate": 1.8980185700006744e-05, - "loss": 0.4249, + "epoch": 0.14, + "grad_norm": 1.196186185364542, + "learning_rate": 1.9411596733905393e-05, + "loss": 0.4027, "step": 2971 }, { - "epoch": 0.17, - "grad_norm": 0.42932383596148743, - "learning_rate": 1.8979366821966386e-05, - "loss": 0.3293, + "epoch": 0.14, + "grad_norm": 0.4135414056053703, + "learning_rate": 1.941109376922515e-05, + "loss": 0.3104, "step": 2972 }, { - "epoch": 0.17, - "grad_norm": 0.3954252758600323, - "learning_rate": 1.8978547632970943e-05, - "loss": 0.3225, + "epoch": 0.14, + "grad_norm": 0.5123966751028497, + "learning_rate": 1.9410590596192362e-05, + "loss": 0.3943, "step": 2973 }, { - "epoch": 0.17, - "grad_norm": 0.30943424037499556, - "learning_rate": 1.897772813304878e-05, - "loss": 0.0774, + "epoch": 0.14, + "grad_norm": 0.3762696736388155, + "learning_rate": 1.9410087214818167e-05, + "loss": 0.2127, "step": 2974 }, { - "epoch": 0.17, - "grad_norm": 0.2955268319884107, - "learning_rate": 1.8976908322228277e-05, - "loss": 0.2291, + "epoch": 0.14, + "grad_norm": 0.4995647581073475, + "learning_rate": 1.940958362511371e-05, + "loss": 0.3121, "step": 2975 }, { - "epoch": 0.17, - "grad_norm": 0.8219325687989213, - "learning_rate": 1.897608820053783e-05, - "loss": 0.5307, + "epoch": 0.14, + "grad_norm": 0.5240746054911626, + "learning_rate": 1.9409079827090145e-05, + "loss": 0.3458, "step": 2976 }, { - "epoch": 0.17, - "grad_norm": 0.3811432509694952, - "learning_rate": 1.8975267768005828e-05, - "loss": 0.2864, + "epoch": 0.14, + "grad_norm": 0.5060787714269666, + "learning_rate": 1.9408575820758616e-05, + "loss": 0.3205, "step": 2977 }, { - "epoch": 0.17, - "grad_norm": 0.4285338176452864, - "learning_rate": 1.897444702466069e-05, - "loss": 0.3331, + "epoch": 0.14, + "grad_norm": 0.6420422982925267, + "learning_rate": 1.9408071606130288e-05, + "loss": 0.4106, "step": 2978 }, { - "epoch": 0.17, - "grad_norm": 0.9754019324573616, - "learning_rate": 1.897362597053084e-05, - "loss": 0.5513, + "epoch": 0.14, + "grad_norm": 0.4614270003903362, + "learning_rate": 1.9407567183216323e-05, + "loss": 0.3525, "step": 2979 }, { - "epoch": 0.17, - "grad_norm": 0.28062601130507825, - "learning_rate": 1.897280460564471e-05, - "loss": 0.1539, + "epoch": 0.14, + "grad_norm": 0.3361710807752136, + "learning_rate": 1.9407062552027887e-05, + "loss": 0.2336, "step": 2980 }, { - "epoch": 0.17, - "grad_norm": 0.5016667633376, - "learning_rate": 1.897198293003074e-05, - "loss": 0.3324, + "epoch": 0.14, + "grad_norm": 0.7876959945937185, + "learning_rate": 1.9406557712576154e-05, + "loss": 0.4422, "step": 2981 }, { - "epoch": 0.17, - "grad_norm": 0.5883370722958858, - "learning_rate": 1.8971160943717387e-05, - "loss": 0.3618, + "epoch": 0.14, + "grad_norm": 0.38380778398443377, + "learning_rate": 1.94060526648723e-05, + "loss": 0.2811, "step": 2982 }, { - "epoch": 0.17, - "grad_norm": 0.7596055078716403, - "learning_rate": 1.8970338646733112e-05, - "loss": 0.3371, + "epoch": 0.14, + "grad_norm": 0.5302023483735993, + "learning_rate": 1.9405547408927504e-05, + "loss": 0.4274, "step": 2983 }, { - "epoch": 0.17, - "grad_norm": 0.34230411328604804, - "learning_rate": 1.8969516039106402e-05, - "loss": 0.2392, + "epoch": 0.14, + "grad_norm": 0.46290824341095616, + "learning_rate": 1.9405041944752953e-05, + "loss": 0.3127, "step": 2984 }, { - "epoch": 0.17, - "grad_norm": 0.42128346482721546, - "learning_rate": 1.8968693120865734e-05, - "loss": 0.3327, + "epoch": 0.14, + "grad_norm": 0.4634207737682182, + "learning_rate": 1.9404536272359838e-05, + "loss": 0.3202, "step": 2985 }, { - "epoch": 0.17, - "grad_norm": 0.6798764575206757, - "learning_rate": 1.896786989203961e-05, - "loss": 0.3974, + "epoch": 0.14, + "grad_norm": 0.5826415696599602, + "learning_rate": 1.9404030391759353e-05, + "loss": 0.3342, "step": 2986 }, { - "epoch": 0.17, - "grad_norm": 0.3784241264767709, - "learning_rate": 1.896704635265654e-05, - "loss": 0.2136, + "epoch": 0.14, + "grad_norm": 0.449570055496348, + "learning_rate": 1.9403524302962703e-05, + "loss": 0.2501, "step": 2987 }, { - "epoch": 0.17, - "grad_norm": 1.276853121304933, - "learning_rate": 1.8966222502745034e-05, - "loss": 0.5163, + "epoch": 0.14, + "grad_norm": 0.35044173197799666, + "learning_rate": 1.9403018005981086e-05, + "loss": 0.2813, "step": 2988 }, { - "epoch": 0.17, - "grad_norm": 0.44923140900964686, - "learning_rate": 1.8965398342333632e-05, - "loss": 0.3497, + "epoch": 0.14, + "grad_norm": 1.0127999136447283, + "learning_rate": 1.9402511500825712e-05, + "loss": 0.5636, "step": 2989 }, { - "epoch": 0.17, - "grad_norm": 0.2639236438544276, - "learning_rate": 1.896457387145087e-05, - "loss": 0.1798, + "epoch": 0.14, + "grad_norm": 0.4692235448132522, + "learning_rate": 1.9402004787507798e-05, + "loss": 0.3376, "step": 2990 }, { - "epoch": 0.17, - "grad_norm": 1.1097809048080438, - "learning_rate": 1.8963749090125302e-05, - "loss": 0.6302, + "epoch": 0.14, + "grad_norm": 0.48765783876564567, + "learning_rate": 1.940149786603856e-05, + "loss": 0.3464, "step": 2991 }, { - "epoch": 0.17, - "grad_norm": 1.234947244764597, - "learning_rate": 1.8962923998385487e-05, - "loss": 0.6959, + "epoch": 0.14, + "grad_norm": 0.5072996441214087, + "learning_rate": 1.9400990736429217e-05, + "loss": 0.3354, "step": 2992 }, { - "epoch": 0.17, - "grad_norm": 0.4410351618966857, - "learning_rate": 1.896209859626e-05, - "loss": 0.258, + "epoch": 0.14, + "grad_norm": 0.4573707719052176, + "learning_rate": 1.9400483398691e-05, + "loss": 0.2482, "step": 2993 }, { - "epoch": 0.17, - "grad_norm": 0.9236199272405345, - "learning_rate": 1.8961272883777424e-05, - "loss": 0.4043, + "epoch": 0.14, + "grad_norm": 0.40323224385066714, + "learning_rate": 1.9399975852835142e-05, + "loss": 0.2523, "step": 2994 }, { - "epoch": 0.17, - "grad_norm": 0.8780417499501578, - "learning_rate": 1.8960446860966353e-05, - "loss": 0.5626, + "epoch": 0.14, + "grad_norm": 0.5338930338356309, + "learning_rate": 1.939946809887288e-05, + "loss": 0.3419, "step": 2995 }, { - "epoch": 0.17, - "grad_norm": 0.2345376241037861, - "learning_rate": 1.895962052785539e-05, - "loss": 0.1591, + "epoch": 0.14, + "grad_norm": 0.5482413294478314, + "learning_rate": 1.9398960136815454e-05, + "loss": 0.3692, "step": 2996 }, { - "epoch": 0.17, - "grad_norm": 0.4550241241419985, - "learning_rate": 1.895879388447316e-05, - "loss": 0.347, + "epoch": 0.14, + "grad_norm": 0.42729666056444005, + "learning_rate": 1.9398451966674108e-05, + "loss": 0.2581, "step": 2997 }, { - "epoch": 0.17, - "grad_norm": 1.0166192508391756, - "learning_rate": 1.8957966930848278e-05, - "loss": 0.4882, + "epoch": 0.14, + "grad_norm": 1.1707424468587229, + "learning_rate": 1.9397943588460094e-05, + "loss": 0.7097, "step": 2998 }, { - "epoch": 0.17, - "grad_norm": 0.4244725281572897, - "learning_rate": 1.8957139667009388e-05, - "loss": 0.3237, + "epoch": 0.14, + "grad_norm": 0.5667960415253117, + "learning_rate": 1.9397435002184665e-05, + "loss": 0.3675, "step": 2999 }, { - "epoch": 0.17, - "grad_norm": 0.9454011346453592, - "learning_rate": 1.8956312092985135e-05, - "loss": 0.453, + "epoch": 0.14, + "grad_norm": 0.38333966835026145, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.2488, "step": 3000 }, { - "epoch": 0.17, - "grad_norm": 0.4287534901474559, - "learning_rate": 1.895548420880418e-05, - "loss": 0.3222, + "epoch": 0.14, + "grad_norm": 0.5120685900681801, + "learning_rate": 1.9396417205494614e-05, + "loss": 0.3642, "step": 3001 }, { - "epoch": 0.17, - "grad_norm": 0.3976559492680062, - "learning_rate": 1.8954656014495193e-05, - "loss": 0.2943, + "epoch": 0.14, + "grad_norm": 0.4793709871047172, + "learning_rate": 1.9395907995102524e-05, + "loss": 0.2621, "step": 3002 }, { - "epoch": 0.17, - "grad_norm": 0.256110448009788, - "learning_rate": 1.8953827510086855e-05, - "loss": 0.128, + "epoch": 0.14, + "grad_norm": 0.3926186664332781, + "learning_rate": 1.9395398576694087e-05, + "loss": 0.2368, "step": 3003 }, { - "epoch": 0.17, - "grad_norm": 0.6654967054817726, - "learning_rate": 1.8952998695607848e-05, - "loss": 0.4478, + "epoch": 0.14, + "grad_norm": 0.5465883412686288, + "learning_rate": 1.939488895028058e-05, + "loss": 0.3192, "step": 3004 }, { - "epoch": 0.17, - "grad_norm": 0.4407137936298601, - "learning_rate": 1.895216957108689e-05, - "loss": 0.2961, + "epoch": 0.14, + "grad_norm": 1.3117216261122207, + "learning_rate": 1.939437911587329e-05, + "loss": 0.792, "step": 3005 }, { - "epoch": 0.17, - "grad_norm": 0.4160984346255178, - "learning_rate": 1.8951340136552677e-05, - "loss": 0.3153, + "epoch": 0.14, + "grad_norm": 0.39981982058204735, + "learning_rate": 1.9393869073483492e-05, + "loss": 0.2712, "step": 3006 }, { - "epoch": 0.17, - "grad_norm": 1.078401739989723, - "learning_rate": 1.8950510392033945e-05, - "loss": 0.669, + "epoch": 0.14, + "grad_norm": 0.5020979732613493, + "learning_rate": 1.9393358823122496e-05, + "loss": 0.3882, "step": 3007 }, { - "epoch": 0.17, - "grad_norm": 0.33084119777448523, - "learning_rate": 1.8949680337559422e-05, - "loss": 0.2457, + "epoch": 0.14, + "grad_norm": 0.30887277093037674, + "learning_rate": 1.9392848364801583e-05, + "loss": 0.2223, "step": 3008 }, { - "epoch": 0.17, - "grad_norm": 0.3027583624026009, - "learning_rate": 1.8948849973157855e-05, - "loss": 0.2139, + "epoch": 0.14, + "grad_norm": 0.46107887850317086, + "learning_rate": 1.939233769853206e-05, + "loss": 0.2854, "step": 3009 }, { - "epoch": 0.17, - "grad_norm": 0.9818615820212763, - "learning_rate": 1.8948019298858e-05, - "loss": 0.4861, + "epoch": 0.14, + "grad_norm": 1.1581240759842242, + "learning_rate": 1.9391826824325238e-05, + "loss": 0.4464, "step": 3010 }, { - "epoch": 0.17, - "grad_norm": 0.4073751640836383, - "learning_rate": 1.8947188314688614e-05, - "loss": 0.3296, + "epoch": 0.14, + "grad_norm": 0.42789056194553304, + "learning_rate": 1.939131574219242e-05, + "loss": 0.3396, "step": 3011 }, { - "epoch": 0.17, - "grad_norm": 0.7477288026893142, - "learning_rate": 1.8946357020678484e-05, - "loss": 0.5517, + "epoch": 0.14, + "grad_norm": 0.43944187394143236, + "learning_rate": 1.9390804452144922e-05, + "loss": 0.2942, "step": 3012 }, { - "epoch": 0.17, - "grad_norm": 0.38402672958445916, - "learning_rate": 1.89455254168564e-05, - "loss": 0.2941, + "epoch": 0.14, + "grad_norm": 1.0499571738476294, + "learning_rate": 1.939029295419406e-05, + "loss": 0.4391, "step": 3013 }, { - "epoch": 0.17, - "grad_norm": 0.378465393496418, - "learning_rate": 1.8944693503251154e-05, - "loss": 0.2712, + "epoch": 0.14, + "grad_norm": 0.3356106445064085, + "learning_rate": 1.9389781248351168e-05, + "loss": 0.2261, "step": 3014 }, { - "epoch": 0.17, - "grad_norm": 0.38536621633444945, - "learning_rate": 1.8943861279891555e-05, - "loss": 0.19, + "epoch": 0.14, + "grad_norm": 0.4533181388320035, + "learning_rate": 1.938926933462757e-05, + "loss": 0.2816, "step": 3015 }, { - "epoch": 0.17, - "grad_norm": 1.112414636701315, - "learning_rate": 1.8943028746806423e-05, - "loss": 0.3941, + "epoch": 0.14, + "grad_norm": 0.519220593449586, + "learning_rate": 1.9388757213034596e-05, + "loss": 0.2955, "step": 3016 }, { - "epoch": 0.17, - "grad_norm": 0.4202034683893572, - "learning_rate": 1.8942195904024593e-05, - "loss": 0.315, + "epoch": 0.14, + "grad_norm": 1.4025788590626416, + "learning_rate": 1.9388244883583587e-05, + "loss": 0.5466, "step": 3017 }, { - "epoch": 0.17, - "grad_norm": 1.5026074471147242, - "learning_rate": 1.89413627515749e-05, - "loss": 0.8241, + "epoch": 0.14, + "grad_norm": 0.4295631653910816, + "learning_rate": 1.9387732346285885e-05, + "loss": 0.2972, "step": 3018 }, { - "epoch": 0.17, - "grad_norm": 0.6070421657926213, - "learning_rate": 1.89405292894862e-05, - "loss": 0.3541, + "epoch": 0.14, + "grad_norm": 0.3803640394142232, + "learning_rate": 1.9387219601152837e-05, + "loss": 0.3173, "step": 3019 }, { - "epoch": 0.17, - "grad_norm": 0.41453425610717626, - "learning_rate": 1.8939695517787355e-05, - "loss": 0.3023, + "epoch": 0.14, + "grad_norm": 0.38648590453852244, + "learning_rate": 1.9386706648195793e-05, + "loss": 0.2187, "step": 3020 }, { - "epoch": 0.17, - "grad_norm": 0.29993170844835293, - "learning_rate": 1.893886143650724e-05, - "loss": 0.2491, + "epoch": 0.14, + "grad_norm": 0.4419829471251474, + "learning_rate": 1.938619348742611e-05, + "loss": 0.2873, "step": 3021 }, { - "epoch": 0.17, - "grad_norm": 0.6600485195325879, - "learning_rate": 1.893802704567474e-05, - "loss": 0.3919, + "epoch": 0.14, + "grad_norm": 1.1436234691471576, + "learning_rate": 1.9385680118855153e-05, + "loss": 0.7067, "step": 3022 }, { - "epoch": 0.17, - "grad_norm": 0.4849563665909965, - "learning_rate": 1.8937192345318745e-05, - "loss": 0.2887, + "epoch": 0.14, + "grad_norm": 0.7051584421199674, + "learning_rate": 1.938516654249428e-05, + "loss": 0.3338, "step": 3023 }, { - "epoch": 0.17, - "grad_norm": 1.121191174841304, - "learning_rate": 1.8936357335468164e-05, - "loss": 0.7787, + "epoch": 0.14, + "grad_norm": 0.501472317795451, + "learning_rate": 1.9384652758354872e-05, + "loss": 0.3227, "step": 3024 }, { - "epoch": 0.17, - "grad_norm": 0.4336597655448138, - "learning_rate": 1.8935522016151914e-05, - "loss": 0.3233, + "epoch": 0.14, + "grad_norm": 1.0605457377255982, + "learning_rate": 1.938413876644829e-05, + "loss": 0.613, "step": 3025 }, { - "epoch": 0.17, - "grad_norm": 0.30783811468715805, - "learning_rate": 1.8934686387398916e-05, - "loss": 0.2047, + "epoch": 0.14, + "grad_norm": 0.2598785475579807, + "learning_rate": 1.9383624566785924e-05, + "loss": 0.0994, "step": 3026 }, { - "epoch": 0.17, - "grad_norm": 0.31539444070821426, - "learning_rate": 1.8933850449238118e-05, - "loss": 0.2582, + "epoch": 0.14, + "grad_norm": 0.4354805647913339, + "learning_rate": 1.9383110159379154e-05, + "loss": 0.2906, "step": 3027 }, { - "epoch": 0.17, - "grad_norm": 0.7055654305578103, - "learning_rate": 1.893301420169846e-05, - "loss": 0.4926, + "epoch": 0.14, + "grad_norm": 0.5142853282298973, + "learning_rate": 1.938259554423937e-05, + "loss": 0.3468, "step": 3028 }, { - "epoch": 0.17, - "grad_norm": 0.3686311554656913, - "learning_rate": 1.893217764480891e-05, - "loss": 0.2407, + "epoch": 0.14, + "grad_norm": 0.6778219088987136, + "learning_rate": 1.938208072137796e-05, + "loss": 0.3724, "step": 3029 }, { - "epoch": 0.17, - "grad_norm": 1.1909355193895523, - "learning_rate": 1.8931340778598427e-05, - "loss": 0.6177, + "epoch": 0.14, + "grad_norm": 0.4504311081264537, + "learning_rate": 1.9381565690806328e-05, + "loss": 0.2896, "step": 3030 }, { - "epoch": 0.17, - "grad_norm": 1.4663985038033136, - "learning_rate": 1.8930503603095996e-05, - "loss": 0.8403, + "epoch": 0.14, + "grad_norm": 0.5953598184661302, + "learning_rate": 1.9381050452535873e-05, + "loss": 0.4465, "step": 3031 }, { - "epoch": 0.17, - "grad_norm": 0.3190431419463848, - "learning_rate": 1.892966611833061e-05, - "loss": 0.1833, + "epoch": 0.14, + "grad_norm": 0.3231538294241132, + "learning_rate": 1.9380535006578e-05, + "loss": 0.2206, "step": 3032 }, { - "epoch": 0.17, - "grad_norm": 0.4511305241542145, - "learning_rate": 1.892882832433127e-05, - "loss": 0.3602, + "epoch": 0.14, + "grad_norm": 0.38436959237511403, + "learning_rate": 1.9380019352944127e-05, + "loss": 0.2183, "step": 3033 }, { - "epoch": 0.17, - "grad_norm": 0.7068053529043494, - "learning_rate": 1.8927990221126992e-05, - "loss": 0.3597, + "epoch": 0.14, + "grad_norm": 0.7741207498064205, + "learning_rate": 1.9379503491645666e-05, + "loss": 0.5124, "step": 3034 }, { - "epoch": 0.17, - "grad_norm": 0.42401792713410896, - "learning_rate": 1.8927151808746794e-05, - "loss": 0.284, + "epoch": 0.14, + "grad_norm": 0.45391623262727476, + "learning_rate": 1.9378987422694035e-05, + "loss": 0.3435, "step": 3035 }, { - "epoch": 0.17, - "grad_norm": 1.0660654062947625, - "learning_rate": 1.8926313087219715e-05, - "loss": 0.4866, + "epoch": 0.14, + "grad_norm": 0.37519019234806844, + "learning_rate": 1.9378471146100662e-05, + "loss": 0.2625, "step": 3036 }, { - "epoch": 0.17, - "grad_norm": 0.4213915786757873, - "learning_rate": 1.8925474056574797e-05, - "loss": 0.3367, + "epoch": 0.14, + "grad_norm": 0.9484861915160815, + "learning_rate": 1.937795466187698e-05, + "loss": 0.671, "step": 3037 }, { - "epoch": 0.17, - "grad_norm": 0.5059010362430902, - "learning_rate": 1.8924634716841095e-05, - "loss": 0.3145, + "epoch": 0.14, + "grad_norm": 0.3412270275660992, + "learning_rate": 1.937743797003442e-05, + "loss": 0.1877, "step": 3038 }, { - "epoch": 0.17, - "grad_norm": 0.6904293276334135, - "learning_rate": 1.8923795068047676e-05, - "loss": 0.3638, + "epoch": 0.14, + "grad_norm": 0.336172842286069, + "learning_rate": 1.937692107058442e-05, + "loss": 0.2427, "step": 3039 }, { - "epoch": 0.17, - "grad_norm": 0.32126069141038616, - "learning_rate": 1.892295511022362e-05, - "loss": 0.2206, + "epoch": 0.14, + "grad_norm": 0.581892218083669, + "learning_rate": 1.9376403963538424e-05, + "loss": 0.3541, "step": 3040 }, { - "epoch": 0.17, - "grad_norm": 0.49072173398664753, - "learning_rate": 1.8922114843398008e-05, - "loss": 0.2853, + "epoch": 0.14, + "grad_norm": 1.0103575848048958, + "learning_rate": 1.9375886648907882e-05, + "loss": 0.5249, "step": 3041 }, { - "epoch": 0.17, - "grad_norm": 0.6178054617716159, - "learning_rate": 1.8921274267599948e-05, - "loss": 0.3293, + "epoch": 0.14, + "grad_norm": 0.3988257348087596, + "learning_rate": 1.937536912670425e-05, + "loss": 0.2335, "step": 3042 }, { - "epoch": 0.17, - "grad_norm": 1.3910184013031428, - "learning_rate": 1.8920433382858543e-05, - "loss": 0.6896, + "epoch": 0.14, + "grad_norm": 0.6395909430725215, + "learning_rate": 1.9374851396938978e-05, + "loss": 0.3624, "step": 3043 }, { - "epoch": 0.17, - "grad_norm": 0.43153226746326395, - "learning_rate": 1.8919592189202907e-05, - "loss": 0.3513, + "epoch": 0.14, + "grad_norm": 0.43720429592972626, + "learning_rate": 1.9374333459623532e-05, + "loss": 0.2892, "step": 3044 }, { - "epoch": 0.17, - "grad_norm": 0.4102488838420799, - "learning_rate": 1.8918750686662182e-05, - "loss": 0.3131, + "epoch": 0.14, + "grad_norm": 0.35300830278020523, + "learning_rate": 1.9373815314769375e-05, + "loss": 0.2373, "step": 3045 }, { - "epoch": 0.18, - "grad_norm": 0.47947338773237685, - "learning_rate": 1.8917908875265507e-05, - "loss": 0.3053, + "epoch": 0.14, + "grad_norm": 1.1798288142491746, + "learning_rate": 1.9373296962387988e-05, + "loss": 0.4301, "step": 3046 }, { - "epoch": 0.18, - "grad_norm": 0.3460541287559192, - "learning_rate": 1.8917066755042028e-05, - "loss": 0.2311, + "epoch": 0.14, + "grad_norm": 0.452486458373176, + "learning_rate": 1.9372778402490834e-05, + "loss": 0.3841, "step": 3047 }, { - "epoch": 0.18, - "grad_norm": 0.5716077686683415, - "learning_rate": 1.891622432602091e-05, - "loss": 0.4332, + "epoch": 0.14, + "grad_norm": 0.726923586227702, + "learning_rate": 1.9372259635089405e-05, + "loss": 0.3832, "step": 3048 }, { - "epoch": 0.18, - "grad_norm": 0.5029071813624045, - "learning_rate": 1.8915381588231327e-05, - "loss": 0.2939, + "epoch": 0.14, + "grad_norm": 0.5641466893554887, + "learning_rate": 1.9371740660195178e-05, + "loss": 0.2882, "step": 3049 }, { - "epoch": 0.18, - "grad_norm": 0.44046657183956867, - "learning_rate": 1.8914538541702466e-05, - "loss": 0.2922, + "epoch": 0.14, + "grad_norm": 0.36370897264492574, + "learning_rate": 1.9371221477819647e-05, + "loss": 0.2267, "step": 3050 }, { - "epoch": 0.18, - "grad_norm": 0.7995531866883153, - "learning_rate": 1.8913695186463517e-05, - "loss": 0.4911, + "epoch": 0.14, + "grad_norm": 0.515165584941363, + "learning_rate": 1.9370702087974302e-05, + "loss": 0.2964, "step": 3051 }, { - "epoch": 0.18, - "grad_norm": 0.36615694629870316, - "learning_rate": 1.8912851522543687e-05, - "loss": 0.2274, + "epoch": 0.14, + "grad_norm": 0.9801416042731813, + "learning_rate": 1.9370182490670645e-05, + "loss": 0.3482, "step": 3052 }, { - "epoch": 0.18, - "grad_norm": 0.32321900527474984, - "learning_rate": 1.891200754997219e-05, - "loss": 0.2136, + "epoch": 0.14, + "grad_norm": 1.6724616429235706, + "learning_rate": 1.936966268592018e-05, + "loss": 0.7983, "step": 3053 }, { - "epoch": 0.18, - "grad_norm": 1.2596204295464923, - "learning_rate": 1.8911163268778257e-05, - "loss": 0.8114, + "epoch": 0.14, + "grad_norm": 0.4570869717991394, + "learning_rate": 1.9369142673734416e-05, + "loss": 0.3219, "step": 3054 }, { - "epoch": 0.18, - "grad_norm": 0.7144167478169268, - "learning_rate": 1.891031867899112e-05, - "loss": 0.4086, + "epoch": 0.14, + "grad_norm": 0.412349700555376, + "learning_rate": 1.9368622454124863e-05, + "loss": 0.3023, "step": 3055 }, { - "epoch": 0.18, - "grad_norm": 0.6253671899765448, - "learning_rate": 1.8909473780640037e-05, - "loss": 0.3703, + "epoch": 0.14, + "grad_norm": 1.194767227056874, + "learning_rate": 1.9368102027103032e-05, + "loss": 0.6455, "step": 3056 }, { - "epoch": 0.18, - "grad_norm": 0.45134821280279186, - "learning_rate": 1.8908628573754254e-05, - "loss": 0.3021, + "epoch": 0.14, + "grad_norm": 0.35822342598006096, + "learning_rate": 1.9367581392680458e-05, + "loss": 0.2533, "step": 3057 }, { - "epoch": 0.18, - "grad_norm": 0.22005853180138574, - "learning_rate": 1.890778305836305e-05, - "loss": 0.1269, + "epoch": 0.14, + "grad_norm": 0.6267124207515261, + "learning_rate": 1.9367060550868657e-05, + "loss": 0.4291, "step": 3058 }, { - "epoch": 0.18, - "grad_norm": 0.4524272668639035, - "learning_rate": 1.89069372344957e-05, - "loss": 0.264, + "epoch": 0.14, + "grad_norm": 0.38445614554466684, + "learning_rate": 1.9366539501679165e-05, + "loss": 0.2431, "step": 3059 }, { - "epoch": 0.18, - "grad_norm": 0.6209278718230915, - "learning_rate": 1.8906091102181495e-05, - "loss": 0.4388, + "epoch": 0.14, + "grad_norm": 0.49512261636507643, + "learning_rate": 1.9366018245123515e-05, + "loss": 0.2905, "step": 3060 }, { - "epoch": 0.18, - "grad_norm": 0.4937884114799369, - "learning_rate": 1.890524466144974e-05, - "loss": 0.3877, + "epoch": 0.14, + "grad_norm": 1.4357130953093995, + "learning_rate": 1.9365496781213248e-05, + "loss": 0.7668, "step": 3061 }, { - "epoch": 0.18, - "grad_norm": 0.41369254008640416, - "learning_rate": 1.8904397912329745e-05, - "loss": 0.2448, + "epoch": 0.14, + "grad_norm": 0.5340813233536238, + "learning_rate": 1.9364975109959913e-05, + "loss": 0.3313, "step": 3062 }, { - "epoch": 0.18, - "grad_norm": 0.5973610783320861, - "learning_rate": 1.8903550854850834e-05, - "loss": 0.3712, + "epoch": 0.14, + "grad_norm": 0.44446208227026246, + "learning_rate": 1.9364453231375048e-05, + "loss": 0.2806, "step": 3063 }, { - "epoch": 0.18, - "grad_norm": 0.656951303682737, - "learning_rate": 1.890270348904234e-05, - "loss": 0.4271, + "epoch": 0.14, + "grad_norm": 0.5316307284252908, + "learning_rate": 1.9363931145470222e-05, + "loss": 0.401, "step": 3064 }, { - "epoch": 0.18, - "grad_norm": 0.2452544715442981, - "learning_rate": 1.8901855814933607e-05, - "loss": 0.1632, + "epoch": 0.14, + "grad_norm": 0.28721554476489247, + "learning_rate": 1.936340885225698e-05, + "loss": 0.1263, "step": 3065 }, { - "epoch": 0.18, - "grad_norm": 0.5677559684846096, - "learning_rate": 1.890100783255399e-05, - "loss": 0.4578, + "epoch": 0.14, + "grad_norm": 0.749450635299404, + "learning_rate": 1.936288635174689e-05, + "loss": 0.3665, "step": 3066 }, { - "epoch": 0.18, - "grad_norm": 0.5891915481483365, - "learning_rate": 1.890015954193285e-05, - "loss": 0.4746, + "epoch": 0.14, + "grad_norm": 0.4376586622221232, + "learning_rate": 1.9362363643951524e-05, + "loss": 0.3126, "step": 3067 }, { - "epoch": 0.18, - "grad_norm": 0.35663869657398584, - "learning_rate": 1.8899310943099573e-05, - "loss": 0.2468, + "epoch": 0.14, + "grad_norm": 0.8901577863002759, + "learning_rate": 1.9361840728882447e-05, + "loss": 0.4311, "step": 3068 }, { - "epoch": 0.18, - "grad_norm": 0.5247041604668781, - "learning_rate": 1.8898462036083537e-05, - "loss": 0.3401, + "epoch": 0.14, + "grad_norm": 0.6894238174113327, + "learning_rate": 1.936131760655124e-05, + "loss": 0.3789, "step": 3069 }, { - "epoch": 0.18, - "grad_norm": 1.0380567894925528, - "learning_rate": 1.8897612820914147e-05, - "loss": 0.5755, + "epoch": 0.14, + "grad_norm": 0.4774387998226648, + "learning_rate": 1.9360794276969487e-05, + "loss": 0.2726, "step": 3070 }, { - "epoch": 0.18, - "grad_norm": 0.2670561595530922, - "learning_rate": 1.8896763297620805e-05, - "loss": 0.1841, + "epoch": 0.14, + "grad_norm": 0.5593482846428411, + "learning_rate": 1.9360270740148766e-05, + "loss": 0.2847, "step": 3071 }, { - "epoch": 0.18, - "grad_norm": 0.9455444266024311, - "learning_rate": 1.8895913466232937e-05, - "loss": 0.6592, + "epoch": 0.14, + "grad_norm": 0.39694825605655887, + "learning_rate": 1.9359746996100676e-05, + "loss": 0.2226, "step": 3072 }, { - "epoch": 0.18, - "grad_norm": 0.41053313749568826, - "learning_rate": 1.8895063326779965e-05, - "loss": 0.3663, + "epoch": 0.14, + "grad_norm": 0.603305899650726, + "learning_rate": 1.9359223044836807e-05, + "loss": 0.3926, "step": 3073 }, { - "epoch": 0.18, - "grad_norm": 0.36267065242506263, - "learning_rate": 1.8894212879291332e-05, - "loss": 0.2047, + "epoch": 0.14, + "grad_norm": 1.0420300824190283, + "learning_rate": 1.935869888636876e-05, + "loss": 0.5284, "step": 3074 }, { - "epoch": 0.18, - "grad_norm": 0.4786448101222816, - "learning_rate": 1.8893362123796488e-05, - "loss": 0.2759, + "epoch": 0.14, + "grad_norm": 0.41835801848217313, + "learning_rate": 1.935817452070814e-05, + "loss": 0.266, "step": 3075 }, { - "epoch": 0.18, - "grad_norm": 0.4093384984549816, - "learning_rate": 1.88925110603249e-05, - "loss": 0.3397, + "epoch": 0.14, + "grad_norm": 0.5765255494226064, + "learning_rate": 1.9357649947866558e-05, + "loss": 0.4027, "step": 3076 }, { - "epoch": 0.18, - "grad_norm": 0.845745579057613, - "learning_rate": 1.8891659688906033e-05, - "loss": 0.4151, + "epoch": 0.14, + "grad_norm": 0.5960438968910211, + "learning_rate": 1.935712516785562e-05, + "loss": 0.2547, "step": 3077 }, { - "epoch": 0.18, - "grad_norm": 0.47292178771932475, - "learning_rate": 1.8890808009569376e-05, - "loss": 0.3051, + "epoch": 0.14, + "grad_norm": 0.3032855485650015, + "learning_rate": 1.9356600180686954e-05, + "loss": 0.1758, "step": 3078 }, { - "epoch": 0.18, - "grad_norm": 0.49182209863150383, - "learning_rate": 1.8889956022344414e-05, - "loss": 0.3479, + "epoch": 0.14, + "grad_norm": 0.45961567964926103, + "learning_rate": 1.9356074986372176e-05, + "loss": 0.3327, "step": 3079 }, { - "epoch": 0.18, - "grad_norm": 0.46806648165181836, - "learning_rate": 1.8889103727260666e-05, - "loss": 0.2757, + "epoch": 0.14, + "grad_norm": 1.1170804601775841, + "learning_rate": 1.9355549584922917e-05, + "loss": 0.5158, "step": 3080 }, { - "epoch": 0.18, - "grad_norm": 0.3120414790673563, - "learning_rate": 1.888825112434763e-05, - "loss": 0.2244, + "epoch": 0.14, + "grad_norm": 0.4279031917199713, + "learning_rate": 1.9355023976350805e-05, + "loss": 0.2418, "step": 3081 }, { - "epoch": 0.18, - "grad_norm": 1.2679215891920346, - "learning_rate": 1.8887398213634848e-05, - "loss": 0.7125, + "epoch": 0.14, + "grad_norm": 0.5915021649071351, + "learning_rate": 1.935449816066748e-05, + "loss": 0.4054, "step": 3082 }, { - "epoch": 0.18, - "grad_norm": 0.6403408615039261, - "learning_rate": 1.8886544995151844e-05, - "loss": 0.38, + "epoch": 0.14, + "grad_norm": 0.3735567539083631, + "learning_rate": 1.935397213788458e-05, + "loss": 0.2869, "step": 3083 }, { - "epoch": 0.18, - "grad_norm": 0.33151253151833543, - "learning_rate": 1.8885691468928166e-05, - "loss": 0.2932, + "epoch": 0.14, + "grad_norm": 1.0356493275063283, + "learning_rate": 1.9353445908013756e-05, + "loss": 0.602, "step": 3084 }, { - "epoch": 0.18, - "grad_norm": 0.7277961331311545, - "learning_rate": 1.8884837634993377e-05, - "loss": 0.5574, + "epoch": 0.14, + "grad_norm": 0.42929026095302564, + "learning_rate": 1.935291947106665e-05, + "loss": 0.236, "step": 3085 }, { - "epoch": 0.18, - "grad_norm": 0.340274101058813, - "learning_rate": 1.8883983493377045e-05, - "loss": 0.1544, + "epoch": 0.14, + "grad_norm": 0.868352016420955, + "learning_rate": 1.9352392827054926e-05, + "loss": 0.4177, "step": 3086 }, { - "epoch": 0.18, - "grad_norm": 0.4429892097106967, - "learning_rate": 1.8883129044108744e-05, - "loss": 0.3517, + "epoch": 0.14, + "grad_norm": 0.626643714235681, + "learning_rate": 1.9351865975990235e-05, + "loss": 0.3575, "step": 3087 }, { - "epoch": 0.18, - "grad_norm": 0.3928554684234481, - "learning_rate": 1.8882274287218067e-05, - "loss": 0.2741, + "epoch": 0.14, + "grad_norm": 0.43976400904505125, + "learning_rate": 1.9351338917884247e-05, + "loss": 0.2362, "step": 3088 }, { - "epoch": 0.18, - "grad_norm": 0.4743146801940653, - "learning_rate": 1.8881419222734615e-05, - "loss": 0.3134, + "epoch": 0.14, + "grad_norm": 1.2571203273599227, + "learning_rate": 1.9350811652748625e-05, + "loss": 0.7756, "step": 3089 }, { - "epoch": 0.18, - "grad_norm": 0.5239268770428587, - "learning_rate": 1.8880563850687995e-05, - "loss": 0.4167, + "epoch": 0.14, + "grad_norm": 0.39479254298439054, + "learning_rate": 1.935028418059505e-05, + "loss": 0.2748, "step": 3090 }, { - "epoch": 0.18, - "grad_norm": 0.8884943070873123, - "learning_rate": 1.8879708171107828e-05, - "loss": 0.4566, + "epoch": 0.14, + "grad_norm": 0.38658171780407424, + "learning_rate": 1.9349756501435192e-05, + "loss": 0.2367, "step": 3091 }, { - "epoch": 0.18, - "grad_norm": 0.5371132744319287, - "learning_rate": 1.8878852184023754e-05, - "loss": 0.3002, + "epoch": 0.14, + "grad_norm": 0.8964361605996685, + "learning_rate": 1.9349228615280736e-05, + "loss": 0.5108, "step": 3092 }, { - "epoch": 0.18, - "grad_norm": 0.32469681817681595, - "learning_rate": 1.887799588946541e-05, - "loss": 0.2159, + "epoch": 0.14, + "grad_norm": 0.5883277176850754, + "learning_rate": 1.9348700522143374e-05, + "loss": 0.3627, "step": 3093 }, { - "epoch": 0.18, - "grad_norm": 1.2594353839536387, - "learning_rate": 1.8877139287462446e-05, - "loss": 0.3695, + "epoch": 0.14, + "grad_norm": 0.6356564615886784, + "learning_rate": 1.934817222203479e-05, + "loss": 0.2501, "step": 3094 }, { - "epoch": 0.18, - "grad_norm": 0.6393985107547493, - "learning_rate": 1.8876282378044535e-05, - "loss": 0.4849, + "epoch": 0.14, + "grad_norm": 0.513464236082426, + "learning_rate": 1.9347643714966682e-05, + "loss": 0.3859, "step": 3095 }, { - "epoch": 0.18, - "grad_norm": 0.3707643080188264, - "learning_rate": 1.8875425161241345e-05, - "loss": 0.3116, + "epoch": 0.14, + "grad_norm": 0.7817588579059302, + "learning_rate": 1.934711500095075e-05, + "loss": 0.4073, "step": 3096 }, { - "epoch": 0.18, - "grad_norm": 0.5094016079937749, - "learning_rate": 1.887456763708256e-05, - "loss": 0.3134, + "epoch": 0.14, + "grad_norm": 0.5613951357956724, + "learning_rate": 1.9346586079998705e-05, + "loss": 0.3567, "step": 3097 }, { - "epoch": 0.18, - "grad_norm": 0.2347285327171995, - "learning_rate": 1.8873709805597884e-05, - "loss": 0.1598, + "epoch": 0.14, + "grad_norm": 0.4103688167604076, + "learning_rate": 1.934605695212225e-05, + "loss": 0.2424, "step": 3098 }, { - "epoch": 0.18, - "grad_norm": 0.40752913954631803, - "learning_rate": 1.8872851666817017e-05, - "loss": 0.3338, + "epoch": 0.14, + "grad_norm": 0.5219125871738478, + "learning_rate": 1.9345527617333103e-05, + "loss": 0.3449, "step": 3099 }, { - "epoch": 0.18, - "grad_norm": 0.5350281103112107, - "learning_rate": 1.887199322076968e-05, - "loss": 0.3887, + "epoch": 0.14, + "grad_norm": 0.5713456411238049, + "learning_rate": 1.934499807564298e-05, + "loss": 0.3535, "step": 3100 }, { - "epoch": 0.18, - "grad_norm": 0.6124219414870147, - "learning_rate": 1.8871134467485597e-05, - "loss": 0.2902, + "epoch": 0.14, + "grad_norm": 0.9670463387595841, + "learning_rate": 1.934446832706361e-05, + "loss": 0.5215, "step": 3101 }, { - "epoch": 0.18, - "grad_norm": 0.5018307054344358, - "learning_rate": 1.8870275406994513e-05, - "loss": 0.3797, + "epoch": 0.14, + "grad_norm": 0.7128032153454966, + "learning_rate": 1.9343938371606714e-05, + "loss": 0.4638, "step": 3102 }, { - "epoch": 0.18, - "grad_norm": 1.1961520940052548, - "learning_rate": 1.886941603932617e-05, - "loss": 0.7538, + "epoch": 0.14, + "grad_norm": 0.5117238957593905, + "learning_rate": 1.934340820928403e-05, + "loss": 0.2961, "step": 3103 }, { - "epoch": 0.18, - "grad_norm": 0.32363681224514806, - "learning_rate": 1.886855636451033e-05, - "loss": 0.2232, + "epoch": 0.14, + "grad_norm": 0.3838912001935049, + "learning_rate": 1.9342877840107292e-05, + "loss": 0.2009, "step": 3104 }, { - "epoch": 0.18, - "grad_norm": 0.4023577478846728, - "learning_rate": 1.8867696382576767e-05, - "loss": 0.2839, + "epoch": 0.14, + "grad_norm": 0.9074265473426006, + "learning_rate": 1.9342347264088245e-05, + "loss": 0.4875, "step": 3105 }, { - "epoch": 0.18, - "grad_norm": 0.437139177965491, - "learning_rate": 1.886683609355526e-05, - "loss": 0.3293, + "epoch": 0.14, + "grad_norm": 0.5009958674211009, + "learning_rate": 1.934181648123863e-05, + "loss": 0.3176, "step": 3106 }, { - "epoch": 0.18, - "grad_norm": 0.6121045236750057, - "learning_rate": 1.8865975497475596e-05, - "loss": 0.333, + "epoch": 0.14, + "grad_norm": 0.4623723583297876, + "learning_rate": 1.9341285491570207e-05, + "loss": 0.3031, "step": 3107 }, { - "epoch": 0.18, - "grad_norm": 0.4427691977419533, - "learning_rate": 1.8865114594367585e-05, - "loss": 0.3435, + "epoch": 0.14, + "grad_norm": 0.7211731899186699, + "learning_rate": 1.9340754295094726e-05, + "loss": 0.4879, "step": 3108 }, { - "epoch": 0.18, - "grad_norm": 0.6110743595027336, - "learning_rate": 1.8864253384261036e-05, - "loss": 0.4338, + "epoch": 0.14, + "grad_norm": 0.4307073952641585, + "learning_rate": 1.9340222891823943e-05, + "loss": 0.3471, "step": 3109 }, { - "epoch": 0.18, - "grad_norm": 0.580897320895251, - "learning_rate": 1.8863391867185774e-05, - "loss": 0.3527, + "epoch": 0.14, + "grad_norm": 0.6207881965037696, + "learning_rate": 1.9339691281769632e-05, + "loss": 0.3446, "step": 3110 }, { - "epoch": 0.18, - "grad_norm": 0.29260609606365173, - "learning_rate": 1.8862530043171633e-05, - "loss": 0.1696, + "epoch": 0.14, + "grad_norm": 0.3605653062959438, + "learning_rate": 1.9339159464943558e-05, + "loss": 0.2151, "step": 3111 }, { - "epoch": 0.18, - "grad_norm": 0.4524691334794982, - "learning_rate": 1.8861667912248456e-05, - "loss": 0.3644, + "epoch": 0.14, + "grad_norm": 0.48203588067908704, + "learning_rate": 1.933862744135749e-05, + "loss": 0.317, "step": 3112 }, { - "epoch": 0.18, - "grad_norm": 0.8502053958943329, - "learning_rate": 1.8860805474446103e-05, - "loss": 0.5187, + "epoch": 0.14, + "grad_norm": 1.5983382295056547, + "learning_rate": 1.9338095211023216e-05, + "loss": 0.8199, "step": 3113 }, { - "epoch": 0.18, - "grad_norm": 0.34026675743767276, - "learning_rate": 1.8859942729794433e-05, - "loss": 0.2207, + "epoch": 0.14, + "grad_norm": 0.6175340310973247, + "learning_rate": 1.9337562773952512e-05, + "loss": 0.2933, "step": 3114 }, { - "epoch": 0.18, - "grad_norm": 0.5803398218349507, - "learning_rate": 1.885907967832333e-05, - "loss": 0.4215, + "epoch": 0.14, + "grad_norm": 0.47368056372614004, + "learning_rate": 1.9337030130157166e-05, + "loss": 0.3058, "step": 3115 }, { - "epoch": 0.18, - "grad_norm": 0.5413184538995245, - "learning_rate": 1.885821632006268e-05, - "loss": 0.385, + "epoch": 0.14, + "grad_norm": 0.47135945769868387, + "learning_rate": 1.9336497279648977e-05, + "loss": 0.347, "step": 3116 }, { - "epoch": 0.18, - "grad_norm": 0.267538412442085, - "learning_rate": 1.8857352655042378e-05, - "loss": 0.1605, + "epoch": 0.14, + "grad_norm": 0.27242105592464244, + "learning_rate": 1.9335964222439733e-05, + "loss": 0.1132, "step": 3117 }, { - "epoch": 0.18, - "grad_norm": 0.9481877105032955, - "learning_rate": 1.885648868329234e-05, - "loss": 0.5323, + "epoch": 0.14, + "grad_norm": 0.5139362719838799, + "learning_rate": 1.933543095854124e-05, + "loss": 0.3274, "step": 3118 }, { - "epoch": 0.18, - "grad_norm": 0.7094437605080949, - "learning_rate": 1.8855624404842472e-05, - "loss": 0.4595, + "epoch": 0.14, + "grad_norm": 0.5148103179931479, + "learning_rate": 1.93348974879653e-05, + "loss": 0.3557, "step": 3119 }, { - "epoch": 0.18, - "grad_norm": 0.3795999302636725, - "learning_rate": 1.8854759819722713e-05, - "loss": 0.2596, + "epoch": 0.14, + "grad_norm": 1.0088769052468993, + "learning_rate": 1.9334363810723733e-05, + "loss": 0.434, "step": 3120 }, { - "epoch": 0.18, - "grad_norm": 1.3263785752350479, - "learning_rate": 1.8853894927963004e-05, - "loss": 0.8709, + "epoch": 0.14, + "grad_norm": 0.47735853819042645, + "learning_rate": 1.9333829926828347e-05, + "loss": 0.2941, "step": 3121 }, { - "epoch": 0.18, - "grad_norm": 0.6358746810174966, - "learning_rate": 1.8853029729593296e-05, - "loss": 0.4608, + "epoch": 0.14, + "grad_norm": 0.4882950644129855, + "learning_rate": 1.933329583629096e-05, + "loss": 0.3453, "step": 3122 }, { - "epoch": 0.18, - "grad_norm": 0.3726386152293299, - "learning_rate": 1.8852164224643546e-05, - "loss": 0.2343, + "epoch": 0.14, + "grad_norm": 0.35974849404424575, + "learning_rate": 1.93327615391234e-05, + "loss": 0.2024, "step": 3123 }, { - "epoch": 0.18, - "grad_norm": 0.5836516260644941, - "learning_rate": 1.885129841314373e-05, - "loss": 0.3314, + "epoch": 0.14, + "grad_norm": 0.41255318366801796, + "learning_rate": 1.933222703533749e-05, + "loss": 0.2407, "step": 3124 }, { - "epoch": 0.18, - "grad_norm": 0.49963353752080175, - "learning_rate": 1.8850432295123832e-05, - "loss": 0.2861, + "epoch": 0.14, + "grad_norm": 1.1115162413354707, + "learning_rate": 1.9331692324945072e-05, + "loss": 0.5387, "step": 3125 }, { - "epoch": 0.18, - "grad_norm": 0.4523725849030945, - "learning_rate": 1.8849565870613844e-05, - "loss": 0.3031, + "epoch": 0.14, + "grad_norm": 0.553485779764722, + "learning_rate": 1.933115740795798e-05, + "loss": 0.3811, "step": 3126 }, { - "epoch": 0.18, - "grad_norm": 0.47633458502229503, - "learning_rate": 1.8848699139643768e-05, - "loss": 0.3484, + "epoch": 0.14, + "grad_norm": 0.38948878861094655, + "learning_rate": 1.9330622284388057e-05, + "loss": 0.2385, "step": 3127 }, { - "epoch": 0.18, - "grad_norm": 0.4572869976239571, - "learning_rate": 1.8847832102243626e-05, - "loss": 0.3506, + "epoch": 0.14, + "grad_norm": 1.2777550969012572, + "learning_rate": 1.933008695424715e-05, + "loss": 0.696, "step": 3128 }, { - "epoch": 0.18, - "grad_norm": 0.4738886849479047, - "learning_rate": 1.8846964758443434e-05, - "loss": 0.3156, + "epoch": 0.14, + "grad_norm": 0.37741015210642403, + "learning_rate": 1.932955141754711e-05, + "loss": 0.175, "step": 3129 }, { - "epoch": 0.18, - "grad_norm": 0.3158973478786652, - "learning_rate": 1.8846097108273234e-05, - "loss": 0.2454, + "epoch": 0.14, + "grad_norm": 0.40205784396575917, + "learning_rate": 1.932901567429979e-05, + "loss": 0.2546, "step": 3130 }, { - "epoch": 0.18, - "grad_norm": 0.6088936520152631, - "learning_rate": 1.8845229151763072e-05, - "loss": 0.3999, + "epoch": 0.14, + "grad_norm": 0.6602560551868947, + "learning_rate": 1.9328479724517057e-05, + "loss": 0.4296, "step": 3131 }, { - "epoch": 0.18, - "grad_norm": 0.341198313608363, - "learning_rate": 1.884436088894301e-05, - "loss": 0.2854, + "epoch": 0.14, + "grad_norm": 1.1272302028814996, + "learning_rate": 1.9327943568210775e-05, + "loss": 0.5583, "step": 3132 }, { - "epoch": 0.18, - "grad_norm": 0.8632109606054379, - "learning_rate": 1.8843492319843105e-05, - "loss": 0.5362, + "epoch": 0.14, + "grad_norm": 0.42758808120611264, + "learning_rate": 1.932740720539281e-05, + "loss": 0.2219, "step": 3133 }, { - "epoch": 0.18, - "grad_norm": 0.8698131777727494, - "learning_rate": 1.884262344449344e-05, - "loss": 0.5899, + "epoch": 0.14, + "grad_norm": 0.5187066250663277, + "learning_rate": 1.9326870636075042e-05, + "loss": 0.3824, "step": 3134 }, { - "epoch": 0.18, - "grad_norm": 0.40345140665949575, - "learning_rate": 1.8841754262924106e-05, - "loss": 0.2877, + "epoch": 0.14, + "grad_norm": 0.39098493002892803, + "learning_rate": 1.9326333860269347e-05, + "loss": 0.1976, "step": 3135 }, { - "epoch": 0.18, - "grad_norm": 0.4323619276330218, - "learning_rate": 1.8840884775165204e-05, - "loss": 0.3597, + "epoch": 0.14, + "grad_norm": 0.46286981207879774, + "learning_rate": 1.9325796877987607e-05, + "loss": 0.3069, "step": 3136 }, { - "epoch": 0.18, - "grad_norm": 0.23494410269272176, - "learning_rate": 1.8840014981246843e-05, - "loss": 0.121, + "epoch": 0.14, + "grad_norm": 1.1093022522423166, + "learning_rate": 1.9325259689241714e-05, + "loss": 0.4565, "step": 3137 }, { - "epoch": 0.18, - "grad_norm": 0.39915844033516507, - "learning_rate": 1.8839144881199144e-05, - "loss": 0.2998, + "epoch": 0.14, + "grad_norm": 0.5166068260655114, + "learning_rate": 1.932472229404356e-05, + "loss": 0.3738, "step": 3138 }, { - "epoch": 0.18, - "grad_norm": 1.3686412428770773, - "learning_rate": 1.8838274475052233e-05, - "loss": 0.8266, + "epoch": 0.14, + "grad_norm": 0.45517193194226285, + "learning_rate": 1.932418469240504e-05, + "loss": 0.2971, "step": 3139 }, { - "epoch": 0.18, - "grad_norm": 0.3427600739038595, - "learning_rate": 1.883740376283626e-05, - "loss": 0.2858, + "epoch": 0.14, + "grad_norm": 0.8685505899577974, + "learning_rate": 1.932364688433806e-05, + "loss": 0.4191, "step": 3140 }, { - "epoch": 0.18, - "grad_norm": 0.4077783868641141, - "learning_rate": 1.8836532744581377e-05, - "loss": 0.3271, + "epoch": 0.14, + "grad_norm": 0.334812296149803, + "learning_rate": 1.9323108869854522e-05, + "loss": 0.1751, "step": 3141 }, { - "epoch": 0.18, - "grad_norm": 0.7926122519324589, - "learning_rate": 1.8835661420317745e-05, - "loss": 0.4978, + "epoch": 0.14, + "grad_norm": 0.48383810120579757, + "learning_rate": 1.932257064896634e-05, + "loss": 0.3219, "step": 3142 }, { - "epoch": 0.18, - "grad_norm": 0.25284561606242717, - "learning_rate": 1.8834789790075536e-05, - "loss": 0.1756, + "epoch": 0.14, + "grad_norm": 0.5473558823070933, + "learning_rate": 1.932203222168543e-05, + "loss": 0.3468, "step": 3143 }, { - "epoch": 0.18, - "grad_norm": 0.38142146209876004, - "learning_rate": 1.8833917853884935e-05, - "loss": 0.2853, + "epoch": 0.14, + "grad_norm": 1.3030465460043923, + "learning_rate": 1.932149358802371e-05, + "loss": 0.9103, "step": 3144 }, { - "epoch": 0.18, - "grad_norm": 1.088916732216867, - "learning_rate": 1.8833045611776143e-05, - "loss": 0.801, + "epoch": 0.14, + "grad_norm": 0.4264402447689228, + "learning_rate": 1.9320954747993106e-05, + "loss": 0.2879, "step": 3145 }, { - "epoch": 0.18, - "grad_norm": 0.618587140267945, - "learning_rate": 1.8832173063779357e-05, - "loss": 0.3862, + "epoch": 0.14, + "grad_norm": 0.5747687122894152, + "learning_rate": 1.932041570160555e-05, + "loss": 0.3869, "step": 3146 }, { - "epoch": 0.18, - "grad_norm": 0.4643314856293626, - "learning_rate": 1.8831300209924797e-05, - "loss": 0.3202, + "epoch": 0.14, + "grad_norm": 0.3499749620250195, + "learning_rate": 1.931987644887297e-05, + "loss": 0.2106, "step": 3147 }, { - "epoch": 0.18, - "grad_norm": 0.40377489613397566, - "learning_rate": 1.8830427050242693e-05, - "loss": 0.3142, + "epoch": 0.14, + "grad_norm": 0.3958265029799334, + "learning_rate": 1.931933698980731e-05, + "loss": 0.3117, "step": 3148 }, { - "epoch": 0.18, - "grad_norm": 0.2564840205627374, - "learning_rate": 1.8829553584763278e-05, - "loss": 0.1487, + "epoch": 0.14, + "grad_norm": 0.727048942262723, + "learning_rate": 1.931879732442051e-05, + "loss": 0.5368, "step": 3149 }, { - "epoch": 0.18, - "grad_norm": 0.3232933389811095, - "learning_rate": 1.8828679813516806e-05, - "loss": 0.2236, + "epoch": 0.14, + "grad_norm": 0.5044311380775349, + "learning_rate": 1.931825745272452e-05, + "loss": 0.3228, "step": 3150 }, { - "epoch": 0.18, - "grad_norm": 0.5490085608060375, - "learning_rate": 1.8827805736533528e-05, - "loss": 0.4398, + "epoch": 0.14, + "grad_norm": 0.5023875892921424, + "learning_rate": 1.9317717374731292e-05, + "loss": 0.3057, "step": 3151 }, { - "epoch": 0.18, - "grad_norm": 0.5491388527656633, - "learning_rate": 1.8826931353843717e-05, - "loss": 0.4112, + "epoch": 0.14, + "grad_norm": 0.7016889597521573, + "learning_rate": 1.931717709045278e-05, + "loss": 0.3991, "step": 3152 }, { - "epoch": 0.18, - "grad_norm": 0.39943577631082894, - "learning_rate": 1.8826056665477654e-05, - "loss": 0.2803, + "epoch": 0.14, + "grad_norm": 0.40331104420299063, + "learning_rate": 1.9316636599900947e-05, + "loss": 0.3522, "step": 3153 }, { - "epoch": 0.18, - "grad_norm": 0.773866438726851, - "learning_rate": 1.8825181671465628e-05, - "loss": 0.5776, + "epoch": 0.14, + "grad_norm": 0.5241706320398022, + "learning_rate": 1.931609590308776e-05, + "loss": 0.3225, "step": 3154 }, { - "epoch": 0.18, - "grad_norm": 0.3216913090997973, - "learning_rate": 1.882430637183794e-05, - "loss": 0.2737, + "epoch": 0.14, + "grad_norm": 0.3740078675819883, + "learning_rate": 1.931555500002519e-05, + "loss": 0.2415, "step": 3155 }, { - "epoch": 0.18, - "grad_norm": 0.290138086584041, - "learning_rate": 1.8823430766624905e-05, - "loss": 0.1886, + "epoch": 0.14, + "grad_norm": 1.1405798232241413, + "learning_rate": 1.931501389072521e-05, + "loss": 0.586, "step": 3156 }, { - "epoch": 0.18, - "grad_norm": 1.4431178972907859, - "learning_rate": 1.8822554855856838e-05, - "loss": 0.8918, + "epoch": 0.15, + "grad_norm": 0.49033640783815846, + "learning_rate": 1.93144725751998e-05, + "loss": 0.2898, "step": 3157 }, { - "epoch": 0.18, - "grad_norm": 0.5797258534093347, - "learning_rate": 1.8821678639564075e-05, - "loss": 0.4781, + "epoch": 0.15, + "grad_norm": 0.4694372411955041, + "learning_rate": 1.931393105346094e-05, + "loss": 0.3511, "step": 3158 }, { - "epoch": 0.18, - "grad_norm": 0.3837048306336846, - "learning_rate": 1.8820802117776963e-05, - "loss": 0.2224, + "epoch": 0.15, + "grad_norm": 1.0157328287272502, + "learning_rate": 1.931338932552063e-05, + "loss": 0.503, "step": 3159 }, { - "epoch": 0.18, - "grad_norm": 0.42205950077117843, - "learning_rate": 1.8819925290525854e-05, - "loss": 0.3351, + "epoch": 0.15, + "grad_norm": 0.44054732427689425, + "learning_rate": 1.9312847391390853e-05, + "loss": 0.2743, "step": 3160 }, { - "epoch": 0.18, - "grad_norm": 0.3076485977032022, - "learning_rate": 1.8819048157841105e-05, - "loss": 0.2032, + "epoch": 0.15, + "grad_norm": 0.45787188658909833, + "learning_rate": 1.9312305251083613e-05, + "loss": 0.3205, "step": 3161 }, { - "epoch": 0.18, - "grad_norm": 0.5041804621749775, - "learning_rate": 1.8818170719753104e-05, - "loss": 0.3487, + "epoch": 0.15, + "grad_norm": 0.47298131094982027, + "learning_rate": 1.9311762904610913e-05, + "loss": 0.3438, "step": 3162 }, { - "epoch": 0.18, - "grad_norm": 0.38071028970387893, - "learning_rate": 1.8817292976292227e-05, - "loss": 0.33, + "epoch": 0.15, + "grad_norm": 0.3221861595436087, + "learning_rate": 1.931122035198475e-05, + "loss": 0.1875, "step": 3163 }, { - "epoch": 0.18, - "grad_norm": 0.4558400988040032, - "learning_rate": 1.8816414927488877e-05, - "loss": 0.3912, + "epoch": 0.15, + "grad_norm": 1.4479287194271593, + "learning_rate": 1.9310677593217148e-05, + "loss": 0.6514, "step": 3164 }, { - "epoch": 0.18, - "grad_norm": 0.45130705580419683, - "learning_rate": 1.8815536573373453e-05, - "loss": 0.2836, + "epoch": 0.15, + "grad_norm": 0.7957697410146628, + "learning_rate": 1.9310134628320116e-05, + "loss": 0.4793, "step": 3165 }, { - "epoch": 0.18, - "grad_norm": 0.8510761019378265, - "learning_rate": 1.8814657913976377e-05, - "loss": 0.302, + "epoch": 0.15, + "grad_norm": 0.5345584119848411, + "learning_rate": 1.9309591457305673e-05, + "loss": 0.2487, "step": 3166 }, { - "epoch": 0.18, - "grad_norm": 0.429963063044294, - "learning_rate": 1.881377894932808e-05, - "loss": 0.3323, + "epoch": 0.15, + "grad_norm": 0.5586270857642788, + "learning_rate": 1.930904808018585e-05, + "loss": 0.2693, "step": 3167 }, { - "epoch": 0.18, - "grad_norm": 0.29400249907992115, - "learning_rate": 1.8812899679458993e-05, - "loss": 0.23, + "epoch": 0.15, + "grad_norm": 0.5025095582070777, + "learning_rate": 1.9308504496972676e-05, + "loss": 0.3158, "step": 3168 }, { - "epoch": 0.18, - "grad_norm": 0.464996279016933, - "learning_rate": 1.8812020104399572e-05, - "loss": 0.3656, + "epoch": 0.15, + "grad_norm": 0.4276138329993246, + "learning_rate": 1.9307960707678185e-05, + "loss": 0.1915, "step": 3169 }, { - "epoch": 0.18, - "grad_norm": 0.46758247447598833, - "learning_rate": 1.8811140224180273e-05, - "loss": 0.3112, + "epoch": 0.15, + "grad_norm": 0.39186645826195876, + "learning_rate": 1.9307416712314415e-05, + "loss": 0.341, "step": 3170 }, { - "epoch": 0.18, - "grad_norm": 0.41433320208215574, - "learning_rate": 1.8810260038831564e-05, - "loss": 0.291, + "epoch": 0.15, + "grad_norm": 0.807479002631538, + "learning_rate": 1.9306872510893407e-05, + "loss": 0.4967, "step": 3171 }, { - "epoch": 0.18, - "grad_norm": 0.452144073174074, - "learning_rate": 1.8809379548383932e-05, - "loss": 0.2709, + "epoch": 0.15, + "grad_norm": 0.7918766368098512, + "learning_rate": 1.9306328103427214e-05, + "loss": 0.3508, "step": 3172 }, { - "epoch": 0.18, - "grad_norm": 1.178725602164265, - "learning_rate": 1.8808498752867863e-05, - "loss": 0.7155, + "epoch": 0.15, + "grad_norm": 0.4446293951684391, + "learning_rate": 1.9305783489927886e-05, + "loss": 0.2895, "step": 3173 }, { - "epoch": 0.18, - "grad_norm": 0.34409535391847335, - "learning_rate": 1.880761765231386e-05, - "loss": 0.2906, + "epoch": 0.15, + "grad_norm": 0.4767187685024565, + "learning_rate": 1.930523867040748e-05, + "loss": 0.3787, "step": 3174 }, { - "epoch": 0.18, - "grad_norm": 0.7192857758620904, - "learning_rate": 1.8806736246752443e-05, - "loss": 0.4855, + "epoch": 0.15, + "grad_norm": 0.40019431117495385, + "learning_rate": 1.9304693644878057e-05, + "loss": 0.1505, "step": 3175 }, { - "epoch": 0.18, - "grad_norm": 0.3361678418408267, - "learning_rate": 1.8805854536214122e-05, - "loss": 0.2721, + "epoch": 0.15, + "grad_norm": 0.4455918680395822, + "learning_rate": 1.9304148413351687e-05, + "loss": 0.3268, "step": 3176 }, { - "epoch": 0.18, - "grad_norm": 0.37055400507860653, - "learning_rate": 1.8804972520729443e-05, - "loss": 0.262, + "epoch": 0.15, + "grad_norm": 0.8683809062909452, + "learning_rate": 1.9303602975840436e-05, + "loss": 0.5015, "step": 3177 }, { - "epoch": 0.18, - "grad_norm": 0.9664731315219579, - "learning_rate": 1.8804090200328938e-05, - "loss": 0.5504, + "epoch": 0.15, + "grad_norm": 0.39139701711935504, + "learning_rate": 1.9303057332356386e-05, + "loss": 0.2954, "step": 3178 }, { - "epoch": 0.18, - "grad_norm": 0.42261767384952514, - "learning_rate": 1.880320757504317e-05, - "loss": 0.2837, + "epoch": 0.15, + "grad_norm": 0.5558429432628517, + "learning_rate": 1.930251148291161e-05, + "loss": 0.2984, "step": 3179 }, { - "epoch": 0.18, - "grad_norm": 0.614889244744422, - "learning_rate": 1.8802324644902704e-05, - "loss": 0.3954, + "epoch": 0.15, + "grad_norm": 0.5010111188600969, + "learning_rate": 1.9301965427518196e-05, + "loss": 0.3111, "step": 3180 }, { - "epoch": 0.18, - "grad_norm": 0.5047054055116674, - "learning_rate": 1.880144140993811e-05, - "loss": 0.365, + "epoch": 0.15, + "grad_norm": 0.34024890323423984, + "learning_rate": 1.9301419166188237e-05, + "loss": 0.2288, "step": 3181 }, { - "epoch": 0.18, - "grad_norm": 0.2809330896067468, - "learning_rate": 1.880055787017998e-05, - "loss": 0.1917, + "epoch": 0.15, + "grad_norm": 0.4033983913869017, + "learning_rate": 1.9300872698933814e-05, + "loss": 0.286, "step": 3182 }, { - "epoch": 0.18, - "grad_norm": 0.4424624416740233, - "learning_rate": 1.8799674025658913e-05, - "loss": 0.3117, + "epoch": 0.15, + "grad_norm": 0.8657700252769647, + "learning_rate": 1.930032602576704e-05, + "loss": 0.5075, "step": 3183 }, { - "epoch": 0.18, - "grad_norm": 0.4259910851470908, - "learning_rate": 1.879878987640551e-05, - "loss": 0.3169, + "epoch": 0.15, + "grad_norm": 0.49056725907574783, + "learning_rate": 1.929977914670001e-05, + "loss": 0.3039, "step": 3184 }, { - "epoch": 0.18, - "grad_norm": 1.5477900879594761, - "learning_rate": 1.879790542245039e-05, - "loss": 0.4208, + "epoch": 0.15, + "grad_norm": 0.6186625125551448, + "learning_rate": 1.9299232061744833e-05, + "loss": 0.3722, "step": 3185 }, { - "epoch": 0.18, - "grad_norm": 0.5038440134908405, - "learning_rate": 1.8797020663824187e-05, - "loss": 0.3235, + "epoch": 0.15, + "grad_norm": 0.4533090921206725, + "learning_rate": 1.929868477091362e-05, + "loss": 0.3045, "step": 3186 }, { - "epoch": 0.18, - "grad_norm": 0.4738666383574769, - "learning_rate": 1.8796135600557534e-05, - "loss": 0.3355, + "epoch": 0.15, + "grad_norm": 0.3303742698411355, + "learning_rate": 1.929813727421849e-05, + "loss": 0.2245, "step": 3187 }, { - "epoch": 0.18, - "grad_norm": 1.4251583580601876, - "learning_rate": 1.8795250232681085e-05, - "loss": 0.9205, + "epoch": 0.15, + "grad_norm": 0.6080446515393702, + "learning_rate": 1.929758957167156e-05, + "loss": 0.3689, "step": 3188 }, { - "epoch": 0.18, - "grad_norm": 0.253214970145997, - "learning_rate": 1.8794364560225496e-05, - "loss": 0.1462, + "epoch": 0.15, + "grad_norm": 0.3923103014251482, + "learning_rate": 1.9297041663284962e-05, + "loss": 0.2681, "step": 3189 }, { - "epoch": 0.18, - "grad_norm": 0.6313789827965512, - "learning_rate": 1.8793478583221448e-05, - "loss": 0.3916, + "epoch": 0.15, + "grad_norm": 0.7392780901983406, + "learning_rate": 1.929649354907082e-05, + "loss": 0.3038, "step": 3190 }, { - "epoch": 0.18, - "grad_norm": 0.4616837157366747, - "learning_rate": 1.879259230169961e-05, - "loss": 0.3263, + "epoch": 0.15, + "grad_norm": 0.5710596378624694, + "learning_rate": 1.9295945229041272e-05, + "loss": 0.3741, "step": 3191 }, { - "epoch": 0.18, - "grad_norm": 0.3453392906380078, - "learning_rate": 1.8791705715690675e-05, - "loss": 0.2672, + "epoch": 0.15, + "grad_norm": 1.0158258086576393, + "learning_rate": 1.9295396703208454e-05, + "loss": 0.479, "step": 3192 }, { - "epoch": 0.18, - "grad_norm": 0.7657077369940369, - "learning_rate": 1.8790818825225355e-05, - "loss": 0.5951, + "epoch": 0.15, + "grad_norm": 0.5678705213468687, + "learning_rate": 1.9294847971584513e-05, + "loss": 0.3769, "step": 3193 }, { - "epoch": 0.18, - "grad_norm": 0.4873348104716959, - "learning_rate": 1.8789931630334353e-05, - "loss": 0.3264, + "epoch": 0.15, + "grad_norm": 0.4670509314420693, + "learning_rate": 1.9294299034181594e-05, + "loss": 0.3384, "step": 3194 }, { - "epoch": 0.18, - "grad_norm": 0.3471370066679422, - "learning_rate": 1.8789044131048397e-05, - "loss": 0.2468, + "epoch": 0.15, + "grad_norm": 0.27623614490639964, + "learning_rate": 1.9293749891011855e-05, + "loss": 0.1386, "step": 3195 }, { - "epoch": 0.18, - "grad_norm": 0.5258610482459171, - "learning_rate": 1.8788156327398225e-05, - "loss": 0.2788, + "epoch": 0.15, + "grad_norm": 0.7682915154555132, + "learning_rate": 1.9293200542087453e-05, + "loss": 0.3626, "step": 3196 }, { - "epoch": 0.18, - "grad_norm": 0.5999552621196282, - "learning_rate": 1.8787268219414572e-05, - "loss": 0.4748, + "epoch": 0.15, + "grad_norm": 0.5279769404968618, + "learning_rate": 1.9292650987420543e-05, + "loss": 0.3554, "step": 3197 }, { - "epoch": 0.18, - "grad_norm": 0.5376375761332872, - "learning_rate": 1.87863798071282e-05, - "loss": 0.3582, + "epoch": 0.15, + "grad_norm": 0.5299545096333286, + "learning_rate": 1.92921012270233e-05, + "loss": 0.3284, "step": 3198 }, { - "epoch": 0.18, - "grad_norm": 0.42167059924717126, - "learning_rate": 1.8785491090569876e-05, - "loss": 0.2779, + "epoch": 0.15, + "grad_norm": 0.4617630524083326, + "learning_rate": 1.929155126090789e-05, + "loss": 0.2786, "step": 3199 }, { - "epoch": 0.18, - "grad_norm": 1.048591073850127, - "learning_rate": 1.878460206977037e-05, - "loss": 0.7897, + "epoch": 0.15, + "grad_norm": 0.5454302394682277, + "learning_rate": 1.929100108908649e-05, + "loss": 0.4754, "step": 3200 }, { - "epoch": 0.18, - "grad_norm": 0.3659218143881057, - "learning_rate": 1.8783712744760475e-05, - "loss": 0.1953, + "epoch": 0.15, + "grad_norm": 0.4338124441517534, + "learning_rate": 1.9290450711571282e-05, + "loss": 0.2396, "step": 3201 }, { - "epoch": 0.18, - "grad_norm": 0.4075025882352195, - "learning_rate": 1.878282311557098e-05, - "loss": 0.2572, + "epoch": 0.15, + "grad_norm": 0.37686532247718624, + "learning_rate": 1.9289900128374446e-05, + "loss": 0.2331, "step": 3202 }, { - "epoch": 0.18, - "grad_norm": 0.43005347413976514, - "learning_rate": 1.8781933182232702e-05, - "loss": 0.3816, + "epoch": 0.15, + "grad_norm": 0.5891473713972789, + "learning_rate": 1.9289349339508182e-05, + "loss": 0.4028, "step": 3203 }, { - "epoch": 0.18, - "grad_norm": 0.5294171894124521, - "learning_rate": 1.8781042944776457e-05, - "loss": 0.3735, + "epoch": 0.15, + "grad_norm": 0.7410257508227323, + "learning_rate": 1.9288798344984673e-05, + "loss": 0.5607, "step": 3204 }, { - "epoch": 0.18, - "grad_norm": 0.4682006514675936, - "learning_rate": 1.8780152403233073e-05, - "loss": 0.29, + "epoch": 0.15, + "grad_norm": 0.38872939624007946, + "learning_rate": 1.928824714481612e-05, + "loss": 0.2891, "step": 3205 }, { - "epoch": 0.18, - "grad_norm": 1.4391156031332297, - "learning_rate": 1.8779261557633385e-05, - "loss": 0.696, + "epoch": 0.15, + "grad_norm": 0.43577287495685746, + "learning_rate": 1.928769573901473e-05, + "loss": 0.3048, "step": 3206 }, { - "epoch": 0.18, - "grad_norm": 0.34352071114583893, - "learning_rate": 1.8778370408008247e-05, - "loss": 0.256, + "epoch": 0.15, + "grad_norm": 0.36404204777898613, + "learning_rate": 1.9287144127592704e-05, + "loss": 0.2077, "step": 3207 }, { - "epoch": 0.18, - "grad_norm": 0.31215051226110274, - "learning_rate": 1.877747895438852e-05, - "loss": 0.2139, + "epoch": 0.15, + "grad_norm": 0.5096703451805814, + "learning_rate": 1.928659231056226e-05, + "loss": 0.1539, "step": 3208 }, { - "epoch": 0.18, - "grad_norm": 0.7384179652760221, - "learning_rate": 1.8776587196805077e-05, - "loss": 0.4823, + "epoch": 0.15, + "grad_norm": 0.5258365712449604, + "learning_rate": 1.9286040287935614e-05, + "loss": 0.3614, "step": 3209 }, { - "epoch": 0.18, - "grad_norm": 0.6360730312478228, - "learning_rate": 1.8775695135288794e-05, - "loss": 0.4391, + "epoch": 0.15, + "grad_norm": 0.4864640441382751, + "learning_rate": 1.9285488059724985e-05, + "loss": 0.3875, "step": 3210 }, { - "epoch": 0.18, - "grad_norm": 0.46584518363056193, - "learning_rate": 1.8774802769870564e-05, - "loss": 0.3308, + "epoch": 0.15, + "grad_norm": 0.6650448955162372, + "learning_rate": 1.92849356259426e-05, + "loss": 0.4279, "step": 3211 }, { - "epoch": 0.18, - "grad_norm": 0.4706672925631658, - "learning_rate": 1.8773910100581294e-05, - "loss": 0.3272, + "epoch": 0.15, + "grad_norm": 0.42961559333754157, + "learning_rate": 1.9284382986600692e-05, + "loss": 0.2781, "step": 3212 }, { - "epoch": 0.18, - "grad_norm": 0.2877101741955395, - "learning_rate": 1.8773017127451893e-05, - "loss": 0.2086, + "epoch": 0.15, + "grad_norm": 0.35629413293473033, + "learning_rate": 1.9283830141711487e-05, + "loss": 0.1621, "step": 3213 }, { - "epoch": 0.18, - "grad_norm": 0.4410049272946392, - "learning_rate": 1.877212385051329e-05, - "loss": 0.2814, + "epoch": 0.15, + "grad_norm": 0.47219871543789527, + "learning_rate": 1.9283277091287237e-05, + "loss": 0.3068, "step": 3214 }, { - "epoch": 0.18, - "grad_norm": 0.4041381893357965, - "learning_rate": 1.8771230269796412e-05, - "loss": 0.3087, + "epoch": 0.15, + "grad_norm": 0.44318002424973796, + "learning_rate": 1.9282723835340173e-05, + "loss": 0.272, "step": 3215 }, { - "epoch": 0.18, - "grad_norm": 0.7639969423662106, - "learning_rate": 1.877033638533221e-05, - "loss": 0.4319, + "epoch": 0.15, + "grad_norm": 0.8280396135063036, + "learning_rate": 1.9282170373882553e-05, + "loss": 0.4922, "step": 3216 }, { - "epoch": 0.18, - "grad_norm": 0.4278602034149037, - "learning_rate": 1.876944219715163e-05, - "loss": 0.3137, + "epoch": 0.15, + "grad_norm": 0.4185485193047068, + "learning_rate": 1.9281616706926632e-05, + "loss": 0.3226, "step": 3217 }, { - "epoch": 0.18, - "grad_norm": 0.48961249607176893, - "learning_rate": 1.876854770528565e-05, - "loss": 0.3156, + "epoch": 0.15, + "grad_norm": 0.4690936911060301, + "learning_rate": 1.9281062834484656e-05, + "loss": 0.2356, "step": 3218 }, { - "epoch": 0.18, - "grad_norm": 0.44431744410002555, - "learning_rate": 1.8767652909765236e-05, - "loss": 0.3015, + "epoch": 0.15, + "grad_norm": 0.31955870983017265, + "learning_rate": 1.9280508756568895e-05, + "loss": 0.1843, "step": 3219 }, { - "epoch": 0.19, - "grad_norm": 0.33106297024120673, - "learning_rate": 1.8766757810621383e-05, - "loss": 0.3407, + "epoch": 0.15, + "grad_norm": 0.5254575889635318, + "learning_rate": 1.927995447319162e-05, + "loss": 0.3301, "step": 3220 }, { - "epoch": 0.19, - "grad_norm": 0.28688345276164595, - "learning_rate": 1.876586240788508e-05, - "loss": 0.1918, + "epoch": 0.15, + "grad_norm": 0.4872702392818005, + "learning_rate": 1.9279399984365094e-05, + "loss": 0.2988, "step": 3221 }, { - "epoch": 0.19, - "grad_norm": 0.5071273372638695, - "learning_rate": 1.876496670158734e-05, - "loss": 0.3566, + "epoch": 0.15, + "grad_norm": 0.48131124066296466, + "learning_rate": 1.9278845290101594e-05, + "loss": 0.3855, "step": 3222 }, { - "epoch": 0.19, - "grad_norm": 0.4110348765229278, - "learning_rate": 1.876407069175918e-05, - "loss": 0.316, + "epoch": 0.15, + "grad_norm": 0.6557331026414057, + "learning_rate": 1.9278290390413404e-05, + "loss": 0.3521, "step": 3223 }, { - "epoch": 0.19, - "grad_norm": 0.9657130710337607, - "learning_rate": 1.876317437843163e-05, - "loss": 0.6621, + "epoch": 0.15, + "grad_norm": 0.5461575474809912, + "learning_rate": 1.9277735285312805e-05, + "loss": 0.3378, "step": 3224 }, { - "epoch": 0.19, - "grad_norm": 0.4092046974694814, - "learning_rate": 1.8762277761635725e-05, - "loss": 0.2433, + "epoch": 0.15, + "grad_norm": 0.36294688277412485, + "learning_rate": 1.9277179974812094e-05, + "loss": 0.249, "step": 3225 }, { - "epoch": 0.19, - "grad_norm": 0.39059696176709385, - "learning_rate": 1.876138084140252e-05, - "loss": 0.3564, + "epoch": 0.15, + "grad_norm": 0.5608482701557713, + "learning_rate": 1.9276624458923555e-05, + "loss": 0.3109, "step": 3226 }, { - "epoch": 0.19, - "grad_norm": 0.3427885145541495, - "learning_rate": 1.876048361776307e-05, - "loss": 0.2631, + "epoch": 0.15, + "grad_norm": 0.4228448283263457, + "learning_rate": 1.9276068737659495e-05, + "loss": 0.2883, "step": 3227 }, { - "epoch": 0.19, - "grad_norm": 0.2848821101035255, - "learning_rate": 1.8759586090748454e-05, - "loss": 0.1501, + "epoch": 0.15, + "grad_norm": 0.6885275099330229, + "learning_rate": 1.9275512811032212e-05, + "loss": 0.4361, "step": 3228 }, { - "epoch": 0.19, - "grad_norm": 0.4979226409712117, - "learning_rate": 1.8758688260389747e-05, - "loss": 0.3348, + "epoch": 0.15, + "grad_norm": 0.4318053598840434, + "learning_rate": 1.9274956679054012e-05, + "loss": 0.3014, "step": 3229 }, { - "epoch": 0.19, - "grad_norm": 1.7883755482032335, - "learning_rate": 1.8757790126718044e-05, - "loss": 0.849, + "epoch": 0.15, + "grad_norm": 0.48066167373300417, + "learning_rate": 1.9274400341737214e-05, + "loss": 0.3261, "step": 3230 }, { - "epoch": 0.19, - "grad_norm": 0.3038487339560472, - "learning_rate": 1.8756891689764444e-05, - "loss": 0.2654, + "epoch": 0.15, + "grad_norm": 0.31439854213144336, + "learning_rate": 1.927384379909413e-05, + "loss": 0.1202, "step": 3231 }, { - "epoch": 0.19, - "grad_norm": 0.426868655627405, - "learning_rate": 1.8755992949560058e-05, - "loss": 0.3428, + "epoch": 0.15, + "grad_norm": 0.42552318931994654, + "learning_rate": 1.927328705113708e-05, + "loss": 0.3098, "step": 3232 }, { - "epoch": 0.19, - "grad_norm": 0.34227594083645335, - "learning_rate": 1.8755093906136016e-05, - "loss": 0.2369, + "epoch": 0.15, + "grad_norm": 0.3759589021155831, + "learning_rate": 1.92727300978784e-05, + "loss": 0.3337, "step": 3233 }, { - "epoch": 0.19, - "grad_norm": 0.4159884317750057, - "learning_rate": 1.8754194559523447e-05, - "loss": 0.2368, + "epoch": 0.15, + "grad_norm": 0.601441605695641, + "learning_rate": 1.9272172939330406e-05, + "loss": 0.3329, "step": 3234 }, { - "epoch": 0.19, - "grad_norm": 0.4452450173245431, - "learning_rate": 1.87532949097535e-05, - "loss": 0.3102, + "epoch": 0.15, + "grad_norm": 0.8761752522400605, + "learning_rate": 1.9271615575505445e-05, + "loss": 0.5781, "step": 3235 }, { - "epoch": 0.19, - "grad_norm": 0.8178285752035969, - "learning_rate": 1.8752394956857322e-05, - "loss": 0.5632, + "epoch": 0.15, + "grad_norm": 0.45208895347602995, + "learning_rate": 1.9271058006415845e-05, + "loss": 0.3135, "step": 3236 }, { - "epoch": 0.19, - "grad_norm": 0.6512942111877557, - "learning_rate": 1.8751494700866088e-05, - "loss": 0.491, + "epoch": 0.15, + "grad_norm": 0.6807572315940577, + "learning_rate": 1.927050023207396e-05, + "loss": 0.4518, "step": 3237 }, { - "epoch": 0.19, - "grad_norm": 0.36820713984463505, - "learning_rate": 1.8750594141810964e-05, - "loss": 0.205, + "epoch": 0.15, + "grad_norm": 0.3062312840277606, + "learning_rate": 1.9269942252492134e-05, + "loss": 0.212, "step": 3238 }, { - "epoch": 0.19, - "grad_norm": 0.32327909485014955, - "learning_rate": 1.8749693279723146e-05, - "loss": 0.241, + "epoch": 0.15, + "grad_norm": 0.45363753036145527, + "learning_rate": 1.9269384067682725e-05, + "loss": 0.3244, "step": 3239 }, { - "epoch": 0.19, - "grad_norm": 1.3048291367110827, - "learning_rate": 1.874879211463382e-05, - "loss": 0.6865, + "epoch": 0.15, + "grad_norm": 0.8604366655507797, + "learning_rate": 1.926882567765808e-05, + "loss": 0.5354, "step": 3240 }, { - "epoch": 0.19, - "grad_norm": 0.38934075541803975, - "learning_rate": 1.8747890646574204e-05, - "loss": 0.2433, + "epoch": 0.15, + "grad_norm": 0.43135930644390114, + "learning_rate": 1.9268267082430573e-05, + "loss": 0.2805, "step": 3241 }, { - "epoch": 0.19, - "grad_norm": 0.8793842949876021, - "learning_rate": 1.874698887557551e-05, - "loss": 0.5371, + "epoch": 0.15, + "grad_norm": 0.5364504020433147, + "learning_rate": 1.9267708282012563e-05, + "loss": 0.2905, "step": 3242 }, { - "epoch": 0.19, - "grad_norm": 0.4606832588802145, - "learning_rate": 1.8746086801668964e-05, - "loss": 0.3555, + "epoch": 0.15, + "grad_norm": 0.5488661320997008, + "learning_rate": 1.9267149276416424e-05, + "loss": 0.3489, "step": 3243 }, { - "epoch": 0.19, - "grad_norm": 0.34513483530045763, - "learning_rate": 1.8745184424885815e-05, - "loss": 0.2315, + "epoch": 0.15, + "grad_norm": 0.9155370263954979, + "learning_rate": 1.9266590065654532e-05, + "loss": 0.298, "step": 3244 }, { - "epoch": 0.19, - "grad_norm": 0.6050070424752414, - "learning_rate": 1.87442817452573e-05, - "loss": 0.2856, + "epoch": 0.15, + "grad_norm": 0.3535429216755445, + "learning_rate": 1.926603064973927e-05, + "loss": 0.2624, "step": 3245 }, { - "epoch": 0.19, - "grad_norm": 0.30067116685715717, - "learning_rate": 1.8743378762814685e-05, - "loss": 0.2406, + "epoch": 0.15, + "grad_norm": 0.5368167740178603, + "learning_rate": 1.9265471028683017e-05, + "loss": 0.416, "step": 3246 }, { - "epoch": 0.19, - "grad_norm": 0.3879704127020492, - "learning_rate": 1.874247547758924e-05, - "loss": 0.2495, + "epoch": 0.15, + "grad_norm": 0.9234398128792525, + "learning_rate": 1.9264911202498164e-05, + "loss": 0.4423, "step": 3247 }, { - "epoch": 0.19, - "grad_norm": 1.0069201260432241, - "learning_rate": 1.8741571889612248e-05, - "loss": 0.5666, + "epoch": 0.15, + "grad_norm": 0.4684647557990439, + "learning_rate": 1.926435117119711e-05, + "loss": 0.2688, "step": 3248 }, { - "epoch": 0.19, - "grad_norm": 0.6375161367998676, - "learning_rate": 1.8740667998914996e-05, - "loss": 0.4336, + "epoch": 0.15, + "grad_norm": 0.5907174287261161, + "learning_rate": 1.926379093479225e-05, + "loss": 0.3488, "step": 3249 }, { - "epoch": 0.19, - "grad_norm": 0.42327093852418024, - "learning_rate": 1.8739763805528782e-05, - "loss": 0.306, + "epoch": 0.15, + "grad_norm": 0.6682518049715541, + "learning_rate": 1.9263230493295987e-05, + "loss": 0.3993, "step": 3250 }, { - "epoch": 0.19, - "grad_norm": 0.41315398403240317, - "learning_rate": 1.8738859309484926e-05, - "loss": 0.2775, - "step": 3251 + "epoch": 0.15, + "grad_norm": 0.31436921504914705, + "learning_rate": 1.9262669846720727e-05, + "loss": 0.2154, + "step": 3251 }, { - "epoch": 0.19, - "grad_norm": 0.4775464807987565, - "learning_rate": 1.8737954510814752e-05, - "loss": 0.306, + "epoch": 0.15, + "grad_norm": 0.5731635351710359, + "learning_rate": 1.9262108995078885e-05, + "loss": 0.3459, "step": 3252 }, { - "epoch": 0.19, - "grad_norm": 0.3845598831291664, - "learning_rate": 1.8737049409549586e-05, - "loss": 0.2899, + "epoch": 0.15, + "grad_norm": 0.5271963303375132, + "learning_rate": 1.926154793838288e-05, + "loss": 0.336, "step": 3253 }, { - "epoch": 0.19, - "grad_norm": 0.7332887356293013, - "learning_rate": 1.8736144005720775e-05, - "loss": 0.3581, + "epoch": 0.15, + "grad_norm": 0.45455298292902047, + "learning_rate": 1.9260986676645127e-05, + "loss": 0.2069, "step": 3254 }, { - "epoch": 0.19, - "grad_norm": 0.5740626229995096, - "learning_rate": 1.8735238299359672e-05, - "loss": 0.3243, + "epoch": 0.15, + "grad_norm": 1.0058343659208009, + "learning_rate": 1.9260425209878052e-05, + "loss": 0.6, "step": 3255 }, { - "epoch": 0.19, - "grad_norm": 0.37707225799385136, - "learning_rate": 1.8734332290497642e-05, - "loss": 0.285, + "epoch": 0.15, + "grad_norm": 0.5564218257548972, + "learning_rate": 1.9259863538094096e-05, + "loss": 0.4186, "step": 3256 }, { - "epoch": 0.19, - "grad_norm": 0.2926649412762437, - "learning_rate": 1.8733425979166063e-05, - "loss": 0.1384, + "epoch": 0.15, + "grad_norm": 0.37200959636958075, + "learning_rate": 1.925930166130568e-05, + "loss": 0.2095, "step": 3257 }, { - "epoch": 0.19, - "grad_norm": 0.4234930784962659, - "learning_rate": 1.8732519365396314e-05, - "loss": 0.3293, + "epoch": 0.15, + "grad_norm": 0.39796237168670145, + "learning_rate": 1.925873957952525e-05, + "loss": 0.3125, "step": 3258 }, { - "epoch": 0.19, - "grad_norm": 0.44694857769056656, - "learning_rate": 1.87316124492198e-05, - "loss": 0.3063, + "epoch": 0.15, + "grad_norm": 0.5023912325281488, + "learning_rate": 1.925817729276525e-05, + "loss": 0.2842, "step": 3259 }, { - "epoch": 0.19, - "grad_norm": 0.881998655230814, - "learning_rate": 1.8730705230667916e-05, - "loss": 0.4143, + "epoch": 0.15, + "grad_norm": 0.4407457891965165, + "learning_rate": 1.9257614801038128e-05, + "loss": 0.2177, "step": 3260 }, { - "epoch": 0.19, - "grad_norm": 0.47126328441633636, - "learning_rate": 1.8729797709772088e-05, - "loss": 0.3326, + "epoch": 0.15, + "grad_norm": 0.4762798895675324, + "learning_rate": 1.9257052104356337e-05, + "loss": 0.3455, "step": 3261 }, { - "epoch": 0.19, - "grad_norm": 0.3424134567721488, - "learning_rate": 1.872888988656374e-05, - "loss": 0.309, + "epoch": 0.15, + "grad_norm": 0.886704237193615, + "learning_rate": 1.9256489202732333e-05, + "loss": 0.5161, "step": 3262 }, { - "epoch": 0.19, - "grad_norm": 0.5270321114224565, - "learning_rate": 1.8727981761074315e-05, - "loss": 0.375, + "epoch": 0.15, + "grad_norm": 0.4157789720900731, + "learning_rate": 1.925592609617858e-05, + "loss": 0.3021, "step": 3263 }, { - "epoch": 0.19, - "grad_norm": 0.6391706970436589, - "learning_rate": 1.872707333333525e-05, - "loss": 0.3429, + "epoch": 0.15, + "grad_norm": 0.4377061734221589, + "learning_rate": 1.9255362784707546e-05, + "loss": 0.1923, "step": 3264 }, { - "epoch": 0.19, - "grad_norm": 0.4914387054979702, - "learning_rate": 1.8726164603378016e-05, - "loss": 0.3219, + "epoch": 0.15, + "grad_norm": 0.36430024358809976, + "learning_rate": 1.92547992683317e-05, + "loss": 0.2754, "step": 3265 }, { - "epoch": 0.19, - "grad_norm": 0.6902772708493562, - "learning_rate": 1.8725255571234075e-05, - "loss": 0.4276, + "epoch": 0.15, + "grad_norm": 0.4119056127851174, + "learning_rate": 1.925423554706352e-05, + "loss": 0.2939, "step": 3266 }, { - "epoch": 0.19, - "grad_norm": 0.252273761014582, - "learning_rate": 1.872434623693491e-05, - "loss": 0.1688, + "epoch": 0.15, + "grad_norm": 0.9998265212843225, + "learning_rate": 1.9253671620915478e-05, + "loss": 0.4127, "step": 3267 }, { - "epoch": 0.19, - "grad_norm": 0.43328993906220703, - "learning_rate": 1.8723436600512007e-05, - "loss": 0.3001, + "epoch": 0.15, + "grad_norm": 0.8562086879603014, + "learning_rate": 1.925310748990007e-05, + "loss": 0.5161, "step": 3268 }, { - "epoch": 0.19, - "grad_norm": 1.5775971207484492, - "learning_rate": 1.8722526661996872e-05, - "loss": 0.6708, + "epoch": 0.15, + "grad_norm": 0.35901322617884873, + "learning_rate": 1.925254315402978e-05, + "loss": 0.2742, "step": 3269 }, { - "epoch": 0.19, - "grad_norm": 0.4552562263007585, - "learning_rate": 1.8721616421421017e-05, - "loss": 0.3101, + "epoch": 0.15, + "grad_norm": 0.6241828175210817, + "learning_rate": 1.9251978613317104e-05, + "loss": 0.3282, "step": 3270 }, { - "epoch": 0.19, - "grad_norm": 0.3964430436328616, - "learning_rate": 1.8720705878815953e-05, - "loss": 0.2978, + "epoch": 0.15, + "grad_norm": 0.40436323721775413, + "learning_rate": 1.9251413867774537e-05, + "loss": 0.2135, "step": 3271 }, { - "epoch": 0.19, - "grad_norm": 0.9240033860694061, - "learning_rate": 1.8719795034213226e-05, - "loss": 0.5123, + "epoch": 0.15, + "grad_norm": 0.5536224034960677, + "learning_rate": 1.9250848917414582e-05, + "loss": 0.2964, "step": 3272 }, { - "epoch": 0.19, - "grad_norm": 0.35059453995859596, - "learning_rate": 1.871888388764437e-05, - "loss": 0.1863, + "epoch": 0.15, + "grad_norm": 0.5790627036452501, + "learning_rate": 1.9250283762249748e-05, + "loss": 0.3672, "step": 3273 }, { - "epoch": 0.19, - "grad_norm": 0.32757580915834644, - "learning_rate": 1.8717972439140938e-05, - "loss": 0.23, + "epoch": 0.15, + "grad_norm": 1.327590276026709, + "learning_rate": 1.924971840229255e-05, + "loss": 0.4477, "step": 3274 }, { - "epoch": 0.19, - "grad_norm": 0.5944268003456217, - "learning_rate": 1.8717060688734495e-05, - "loss": 0.41, + "epoch": 0.15, + "grad_norm": 0.49081487666703755, + "learning_rate": 1.92491528375555e-05, + "loss": 0.2879, "step": 3275 }, { - "epoch": 0.19, - "grad_norm": 1.1405676959172855, - "learning_rate": 1.871614863645662e-05, - "loss": 0.5975, + "epoch": 0.15, + "grad_norm": 0.6970790993951184, + "learning_rate": 1.924858706805112e-05, + "loss": 0.3895, "step": 3276 }, { - "epoch": 0.19, - "grad_norm": 0.4561238387683592, - "learning_rate": 1.871523628233889e-05, - "loss": 0.267, + "epoch": 0.15, + "grad_norm": 0.31340868493387325, + "learning_rate": 1.9248021093791935e-05, + "loss": 0.1995, "step": 3277 }, { - "epoch": 0.19, - "grad_norm": 0.5709846215706098, - "learning_rate": 1.87143236264129e-05, - "loss": 0.4121, + "epoch": 0.15, + "grad_norm": 0.5390803529145244, + "learning_rate": 1.924745491479048e-05, + "loss": 0.2784, "step": 3278 }, { - "epoch": 0.19, - "grad_norm": 0.32475528845468604, - "learning_rate": 1.871341066871026e-05, - "loss": 0.2167, + "epoch": 0.15, + "grad_norm": 1.2557325667823025, + "learning_rate": 1.924688853105928e-05, + "loss": 0.4389, "step": 3279 }, { - "epoch": 0.19, - "grad_norm": 0.35223528951570476, - "learning_rate": 1.8712497409262582e-05, - "loss": 0.2124, + "epoch": 0.15, + "grad_norm": 0.9167091316434258, + "learning_rate": 1.9246321942610884e-05, + "loss": 0.404, "step": 3280 }, { - "epoch": 0.19, - "grad_norm": 1.107902808950662, - "learning_rate": 1.8711583848101492e-05, - "loss": 0.6202, + "epoch": 0.15, + "grad_norm": 0.44770955422012515, + "learning_rate": 1.9245755149457833e-05, + "loss": 0.2887, "step": 3281 }, { - "epoch": 0.19, - "grad_norm": 0.5448763076692719, - "learning_rate": 1.871066998525863e-05, - "loss": 0.3702, + "epoch": 0.15, + "grad_norm": 0.5937622152708458, + "learning_rate": 1.9245188151612674e-05, + "loss": 0.4438, "step": 3282 }, { - "epoch": 0.19, - "grad_norm": 0.43760624896546474, - "learning_rate": 1.870975582076564e-05, - "loss": 0.2446, + "epoch": 0.15, + "grad_norm": 0.45601425257337, + "learning_rate": 1.924462094908796e-05, + "loss": 0.2287, "step": 3283 }, { - "epoch": 0.19, - "grad_norm": 0.49297898663282697, - "learning_rate": 1.8708841354654184e-05, - "loss": 0.4166, + "epoch": 0.15, + "grad_norm": 0.4649645056399888, + "learning_rate": 1.9244053541896246e-05, + "loss": 0.2988, "step": 3284 }, { - "epoch": 0.19, - "grad_norm": 0.3323663731490816, - "learning_rate": 1.870792658695592e-05, - "loss": 0.2341, + "epoch": 0.15, + "grad_norm": 0.5502184276511177, + "learning_rate": 1.92434859300501e-05, + "loss": 0.3709, "step": 3285 }, { - "epoch": 0.19, - "grad_norm": 0.49578418381735967, - "learning_rate": 1.870701151770254e-05, - "loss": 0.2838, + "epoch": 0.15, + "grad_norm": 0.5138328953697913, + "learning_rate": 1.924291811356208e-05, + "loss": 0.2486, "step": 3286 }, { - "epoch": 0.19, - "grad_norm": 0.4174917171345786, - "learning_rate": 1.870609614692572e-05, - "loss": 0.2761, + "epoch": 0.15, + "grad_norm": 0.49727748473383454, + "learning_rate": 1.9242350092444763e-05, + "loss": 0.2345, "step": 3287 }, { - "epoch": 0.19, - "grad_norm": 0.750839879220727, - "learning_rate": 1.8705180474657166e-05, - "loss": 0.4965, + "epoch": 0.15, + "grad_norm": 1.560662131944898, + "learning_rate": 1.9241781866710726e-05, + "loss": 0.7828, "step": 3288 }, { - "epoch": 0.19, - "grad_norm": 0.5129470258434858, - "learning_rate": 1.8704264500928588e-05, - "loss": 0.2986, + "epoch": 0.15, + "grad_norm": 0.4750498647626232, + "learning_rate": 1.9241213436372543e-05, + "loss": 0.3789, "step": 3289 }, { - "epoch": 0.19, - "grad_norm": 0.42347571147632856, - "learning_rate": 1.87033482257717e-05, - "loss": 0.2931, + "epoch": 0.15, + "grad_norm": 0.3475992208925108, + "learning_rate": 1.9240644801442802e-05, + "loss": 0.2783, "step": 3290 }, { - "epoch": 0.19, - "grad_norm": 0.481272218463836, - "learning_rate": 1.8702431649218245e-05, - "loss": 0.2205, + "epoch": 0.15, + "grad_norm": 0.40579746051318505, + "learning_rate": 1.9240075961934092e-05, + "loss": 0.2665, "step": 3291 }, { - "epoch": 0.19, - "grad_norm": 0.30216937260105364, - "learning_rate": 1.870151477129995e-05, - "loss": 0.2286, + "epoch": 0.15, + "grad_norm": 0.38512043530242285, + "learning_rate": 1.9239506917859004e-05, + "loss": 0.2552, "step": 3292 }, { - "epoch": 0.19, - "grad_norm": 0.5601484432369667, - "learning_rate": 1.8700597592048576e-05, - "loss": 0.3279, + "epoch": 0.15, + "grad_norm": 0.4311164021082418, + "learning_rate": 1.9238937669230138e-05, + "loss": 0.226, "step": 3293 }, { - "epoch": 0.19, - "grad_norm": 0.4983673447612351, - "learning_rate": 1.869968011149588e-05, - "loss": 0.3449, + "epoch": 0.15, + "grad_norm": 0.9940823119976907, + "learning_rate": 1.9238368216060098e-05, + "loss": 0.4922, "step": 3294 }, { - "epoch": 0.19, - "grad_norm": 0.48122935402031364, - "learning_rate": 1.8698762329673636e-05, - "loss": 0.3335, + "epoch": 0.15, + "grad_norm": 1.209824418830558, + "learning_rate": 1.9237798558361488e-05, + "loss": 0.511, "step": 3295 }, { - "epoch": 0.19, - "grad_norm": 0.5877751356413182, - "learning_rate": 1.869784424661363e-05, - "loss": 0.3133, + "epoch": 0.15, + "grad_norm": 0.4371190551947611, + "learning_rate": 1.9237228696146922e-05, + "loss": 0.2472, "step": 3296 }, { - "epoch": 0.19, - "grad_norm": 0.3422421447813925, - "learning_rate": 1.8696925862347647e-05, - "loss": 0.2158, + "epoch": 0.15, + "grad_norm": 0.34082713099362383, + "learning_rate": 1.9236658629429014e-05, + "loss": 0.2703, "step": 3297 }, { - "epoch": 0.19, - "grad_norm": 0.3768728050060615, - "learning_rate": 1.8696007176907494e-05, - "loss": 0.2928, + "epoch": 0.15, + "grad_norm": 0.4643349039847566, + "learning_rate": 1.9236088358220392e-05, + "loss": 0.3044, "step": 3298 }, { - "epoch": 0.19, - "grad_norm": 0.6163884429509207, - "learning_rate": 1.869508819032499e-05, - "loss": 0.3752, + "epoch": 0.15, + "grad_norm": 0.6497774195822112, + "learning_rate": 1.923551788253367e-05, + "loss": 0.3098, "step": 3299 }, { - "epoch": 0.19, - "grad_norm": 0.8648706566024917, - "learning_rate": 1.8694168902631957e-05, - "loss": 0.4133, + "epoch": 0.15, + "grad_norm": 0.5661247585432231, + "learning_rate": 1.9234947202381487e-05, + "loss": 0.2929, "step": 3300 }, { - "epoch": 0.19, - "grad_norm": 0.4157955496291116, - "learning_rate": 1.8693249313860225e-05, - "loss": 0.3008, + "epoch": 0.15, + "grad_norm": 0.9197302245810614, + "learning_rate": 1.923437631777647e-05, + "loss": 0.4342, "step": 3301 }, { - "epoch": 0.19, - "grad_norm": 0.3660210547536385, - "learning_rate": 1.8692329424041648e-05, - "loss": 0.3064, + "epoch": 0.15, + "grad_norm": 0.7841073884232832, + "learning_rate": 1.9233805228731265e-05, + "loss": 0.3298, "step": 3302 }, { - "epoch": 0.19, - "grad_norm": 0.267248482819533, - "learning_rate": 1.8691409233208072e-05, - "loss": 0.1172, + "epoch": 0.15, + "grad_norm": 0.49044323148364, + "learning_rate": 1.923323393525851e-05, + "loss": 0.2034, "step": 3303 }, { - "epoch": 0.19, - "grad_norm": 0.6430966014648096, - "learning_rate": 1.869048874139137e-05, - "loss": 0.3144, + "epoch": 0.15, + "grad_norm": 0.40820254885043755, + "learning_rate": 1.9232662437370855e-05, + "loss": 0.2612, "step": 3304 }, { - "epoch": 0.19, - "grad_norm": 0.666856224468795, - "learning_rate": 1.8689567948623417e-05, - "loss": 0.3695, + "epoch": 0.15, + "grad_norm": 0.40701801935866583, + "learning_rate": 1.9232090735080953e-05, + "loss": 0.276, "step": 3305 }, { - "epoch": 0.19, - "grad_norm": 0.5142155690233231, - "learning_rate": 1.86886468549361e-05, - "loss": 0.3119, + "epoch": 0.15, + "grad_norm": 0.6368522448134105, + "learning_rate": 1.9231518828401458e-05, + "loss": 0.3135, "step": 3306 }, { - "epoch": 0.19, - "grad_norm": 0.4921167365321923, - "learning_rate": 1.8687725460361315e-05, - "loss": 0.3192, + "epoch": 0.15, + "grad_norm": 0.8722022723109727, + "learning_rate": 1.9230946717345035e-05, + "loss": 0.5006, "step": 3307 }, { - "epoch": 0.19, - "grad_norm": 0.6095335014762302, - "learning_rate": 1.868680376493097e-05, - "loss": 0.4577, + "epoch": 0.15, + "grad_norm": 0.4875013214388049, + "learning_rate": 1.923037440192435e-05, + "loss": 0.3015, "step": 3308 }, { - "epoch": 0.19, - "grad_norm": 0.3542443554161993, - "learning_rate": 1.8685881768676983e-05, - "loss": 0.1199, + "epoch": 0.15, + "grad_norm": 0.4699682926855817, + "learning_rate": 1.922980188215207e-05, + "loss": 0.268, "step": 3309 }, { - "epoch": 0.19, - "grad_norm": 0.3551171004629968, - "learning_rate": 1.868495947163129e-05, - "loss": 0.2994, + "epoch": 0.15, + "grad_norm": 0.3307216533842796, + "learning_rate": 1.9229229158040872e-05, + "loss": 0.2484, "step": 3310 }, { - "epoch": 0.19, - "grad_norm": 0.4032175463163124, - "learning_rate": 1.8684036873825817e-05, - "loss": 0.3191, + "epoch": 0.15, + "grad_norm": 0.8660069976275132, + "learning_rate": 1.9228656229603436e-05, + "loss": 0.4969, "step": 3311 }, { - "epoch": 0.19, - "grad_norm": 0.9000679546075138, - "learning_rate": 1.8683113975292522e-05, - "loss": 0.5656, + "epoch": 0.15, + "grad_norm": 0.44977681045476764, + "learning_rate": 1.922808309685245e-05, + "loss": 0.3255, "step": 3312 }, { - "epoch": 0.19, - "grad_norm": 0.4240286543723701, - "learning_rate": 1.8682190776063368e-05, - "loss": 0.2259, + "epoch": 0.15, + "grad_norm": 0.4331173901424907, + "learning_rate": 1.9227509759800595e-05, + "loss": 0.353, "step": 3313 }, { - "epoch": 0.19, - "grad_norm": 0.4201800746491544, - "learning_rate": 1.8681267276170315e-05, - "loss": 0.3116, + "epoch": 0.15, + "grad_norm": 0.5969217504370009, + "learning_rate": 1.9226936218460567e-05, + "loss": 0.3377, "step": 3314 }, { - "epoch": 0.19, - "grad_norm": 0.8506988193417204, - "learning_rate": 1.8680343475645354e-05, - "loss": 0.5609, + "epoch": 0.15, + "grad_norm": 0.49554752360815485, + "learning_rate": 1.9226362472845062e-05, + "loss": 0.3384, "step": 3315 }, { - "epoch": 0.19, - "grad_norm": 0.3969631858789526, - "learning_rate": 1.8679419374520467e-05, - "loss": 0.2653, + "epoch": 0.15, + "grad_norm": 0.30298122227568713, + "learning_rate": 1.9225788522966787e-05, + "loss": 0.1937, "step": 3316 }, { - "epoch": 0.19, - "grad_norm": 0.6065235075604374, - "learning_rate": 1.8678494972827665e-05, - "loss": 0.419, + "epoch": 0.15, + "grad_norm": 0.6917326400031215, + "learning_rate": 1.922521436883845e-05, + "loss": 0.3643, "step": 3317 }, { - "epoch": 0.19, - "grad_norm": 0.32114184153525965, - "learning_rate": 1.8677570270598956e-05, - "loss": 0.2346, + "epoch": 0.15, + "grad_norm": 0.46419337025078505, + "learning_rate": 1.922464001047275e-05, + "loss": 0.3588, "step": 3318 }, { - "epoch": 0.19, - "grad_norm": 0.40387175409091025, - "learning_rate": 1.8676645267866356e-05, - "loss": 0.2204, + "epoch": 0.15, + "grad_norm": 0.7904336551585145, + "learning_rate": 1.922406544788242e-05, + "loss": 0.4341, "step": 3319 }, { - "epoch": 0.19, - "grad_norm": 0.6801443170904025, - "learning_rate": 1.867571996466191e-05, - "loss": 0.4311, + "epoch": 0.15, + "grad_norm": 0.5421949033752944, + "learning_rate": 1.9223490681080164e-05, + "loss": 0.3205, "step": 3320 }, { - "epoch": 0.19, - "grad_norm": 0.5159272006258127, - "learning_rate": 1.867479436101765e-05, - "loss": 0.3767, + "epoch": 0.15, + "grad_norm": 0.39750841010299504, + "learning_rate": 1.9222915710078717e-05, + "loss": 0.2973, "step": 3321 }, { - "epoch": 0.19, - "grad_norm": 0.4089571870928136, - "learning_rate": 1.8673868456965635e-05, - "loss": 0.2527, + "epoch": 0.15, + "grad_norm": 0.26899841262128654, + "learning_rate": 1.9222340534890803e-05, + "loss": 0.1205, "step": 3322 }, { - "epoch": 0.19, - "grad_norm": 0.48397213976412556, - "learning_rate": 1.867294225253793e-05, - "loss": 0.3643, + "epoch": 0.15, + "grad_norm": 0.48270028212241406, + "learning_rate": 1.922176515552916e-05, + "loss": 0.3246, "step": 3323 }, { - "epoch": 0.19, - "grad_norm": 0.3446079467709578, - "learning_rate": 1.8672015747766606e-05, - "loss": 0.205, + "epoch": 0.15, + "grad_norm": 0.6829021997980356, + "learning_rate": 1.9221189572006524e-05, + "loss": 0.4476, "step": 3324 }, { - "epoch": 0.19, - "grad_norm": 0.8503150555575159, - "learning_rate": 1.8671088942683752e-05, - "loss": 0.5678, + "epoch": 0.15, + "grad_norm": 0.6278132018412705, + "learning_rate": 1.922061378433564e-05, + "loss": 0.3565, "step": 3325 }, { - "epoch": 0.19, - "grad_norm": 0.39654527040197246, - "learning_rate": 1.8670161837321457e-05, - "loss": 0.2591, + "epoch": 0.15, + "grad_norm": 0.3825649980388092, + "learning_rate": 1.922003779252925e-05, + "loss": 0.2129, "step": 3326 }, { - "epoch": 0.19, - "grad_norm": 1.0220278362714632, - "learning_rate": 1.8669234431711833e-05, - "loss": 0.5651, + "epoch": 0.15, + "grad_norm": 0.605153418199528, + "learning_rate": 1.9219461596600112e-05, + "loss": 0.3806, "step": 3327 }, { - "epoch": 0.19, - "grad_norm": 0.5649493509801116, - "learning_rate": 1.866830672588699e-05, - "loss": 0.3735, + "epoch": 0.15, + "grad_norm": 0.28271677535211903, + "learning_rate": 1.9218885196560984e-05, + "loss": 0.2254, "step": 3328 }, { - "epoch": 0.19, - "grad_norm": 0.3635380013575951, - "learning_rate": 1.866737871987906e-05, - "loss": 0.2953, + "epoch": 0.15, + "grad_norm": 0.5680727950276573, + "learning_rate": 1.921830859242462e-05, + "loss": 0.3252, "step": 3329 }, { - "epoch": 0.19, - "grad_norm": 0.3433034425476781, - "learning_rate": 1.866645041372018e-05, - "loss": 0.2176, + "epoch": 0.15, + "grad_norm": 0.44884564616846256, + "learning_rate": 1.9217731784203786e-05, + "loss": 0.3358, "step": 3330 }, { - "epoch": 0.19, - "grad_norm": 0.6755984958812598, - "learning_rate": 1.8665521807442495e-05, - "loss": 0.4254, + "epoch": 0.15, + "grad_norm": 0.794181012103662, + "learning_rate": 1.9217154771911256e-05, + "loss": 0.6, "step": 3331 }, { - "epoch": 0.19, - "grad_norm": 0.41328940725588204, - "learning_rate": 1.866459290107816e-05, - "loss": 0.2699, + "epoch": 0.15, + "grad_norm": 0.4905123050727757, + "learning_rate": 1.9216577555559805e-05, + "loss": 0.2562, "step": 3332 }, { - "epoch": 0.19, - "grad_norm": 0.5305543815291648, - "learning_rate": 1.8663663694659348e-05, - "loss": 0.3819, + "epoch": 0.15, + "grad_norm": 0.505954927323551, + "learning_rate": 1.9216000135162206e-05, + "loss": 0.3101, "step": 3333 }, { - "epoch": 0.19, - "grad_norm": 0.42991083738123465, - "learning_rate": 1.866273418821823e-05, - "loss": 0.3441, + "epoch": 0.15, + "grad_norm": 0.36472486665209713, + "learning_rate": 1.921542251073125e-05, + "loss": 0.2122, "step": 3334 }, { - "epoch": 0.19, - "grad_norm": 0.6544073955837417, - "learning_rate": 1.8661804381787e-05, - "loss": 0.2438, + "epoch": 0.15, + "grad_norm": 0.6716010643391185, + "learning_rate": 1.921484468227972e-05, + "loss": 0.3156, "step": 3335 }, { - "epoch": 0.19, - "grad_norm": 0.3474028790555719, - "learning_rate": 1.8660874275397864e-05, - "loss": 0.2269, + "epoch": 0.15, + "grad_norm": 0.4671365378819608, + "learning_rate": 1.921426664982041e-05, + "loss": 0.3208, "step": 3336 }, { - "epoch": 0.19, - "grad_norm": 0.44660721208933224, - "learning_rate": 1.8659943869083016e-05, - "loss": 0.3194, + "epoch": 0.15, + "grad_norm": 0.6686578842401671, + "learning_rate": 1.9213688413366118e-05, + "loss": 0.4174, "step": 3337 }, { - "epoch": 0.19, - "grad_norm": 0.36479851194376395, - "learning_rate": 1.865901316287469e-05, - "loss": 0.3026, + "epoch": 0.15, + "grad_norm": 0.8549037510537436, + "learning_rate": 1.9213109972929645e-05, + "loss": 0.5043, "step": 3338 }, { - "epoch": 0.19, - "grad_norm": 0.7251793067592559, - "learning_rate": 1.8658082156805105e-05, - "loss": 0.4025, + "epoch": 0.15, + "grad_norm": 0.3905145913697109, + "learning_rate": 1.9212531328523796e-05, + "loss": 0.2331, "step": 3339 }, { - "epoch": 0.19, - "grad_norm": 1.1762657605614177, - "learning_rate": 1.8657150850906515e-05, - "loss": 0.4898, + "epoch": 0.15, + "grad_norm": 0.37922556052393236, + "learning_rate": 1.9211952480161382e-05, + "loss": 0.2735, "step": 3340 }, { - "epoch": 0.19, - "grad_norm": 0.44822935372518946, - "learning_rate": 1.8656219245211157e-05, - "loss": 0.3101, + "epoch": 0.15, + "grad_norm": 0.8212454911391099, + "learning_rate": 1.921137342785522e-05, + "loss": 0.4546, "step": 3341 }, { - "epoch": 0.19, - "grad_norm": 0.3095470714000383, - "learning_rate": 1.86552873397513e-05, - "loss": 0.2329, + "epoch": 0.15, + "grad_norm": 0.4001899040816719, + "learning_rate": 1.9210794171618127e-05, + "loss": 0.1844, "step": 3342 }, { - "epoch": 0.19, - "grad_norm": 0.8687453149208805, - "learning_rate": 1.8654355134559216e-05, - "loss": 0.5129, + "epoch": 0.15, + "grad_norm": 1.2866075066813119, + "learning_rate": 1.9210214711462928e-05, + "loss": 0.9067, "step": 3343 }, { - "epoch": 0.19, - "grad_norm": 0.3021584131565959, - "learning_rate": 1.8653422629667183e-05, - "loss": 0.2725, + "epoch": 0.15, + "grad_norm": 0.4348203154457227, + "learning_rate": 1.9209635047402456e-05, + "loss": 0.2832, "step": 3344 }, { - "epoch": 0.19, - "grad_norm": 0.5873873067313393, - "learning_rate": 1.8652489825107497e-05, - "loss": 0.358, + "epoch": 0.15, + "grad_norm": 0.4473319107634509, + "learning_rate": 1.920905517944954e-05, + "loss": 0.2804, "step": 3345 }, { - "epoch": 0.19, - "grad_norm": 0.46313571761420985, - "learning_rate": 1.865155672091246e-05, - "loss": 0.3272, + "epoch": 0.15, + "grad_norm": 0.761625305575963, + "learning_rate": 1.9208475107617012e-05, + "loss": 0.5267, "step": 3346 }, { - "epoch": 0.19, - "grad_norm": 0.42763704537288455, - "learning_rate": 1.865062331711439e-05, - "loss": 0.2979, + "epoch": 0.15, + "grad_norm": 0.4407027160960978, + "learning_rate": 1.9207894831917725e-05, + "loss": 0.2989, "step": 3347 }, { - "epoch": 0.19, - "grad_norm": 0.8943885070194173, - "learning_rate": 1.8649689613745605e-05, - "loss": 0.4268, + "epoch": 0.15, + "grad_norm": 0.43965056638242483, + "learning_rate": 1.9207314352364523e-05, + "loss": 0.2671, "step": 3348 }, { - "epoch": 0.19, - "grad_norm": 0.4519340597314842, - "learning_rate": 1.864875561083844e-05, - "loss": 0.3254, + "epoch": 0.15, + "grad_norm": 0.4122549034445267, + "learning_rate": 1.920673366897025e-05, + "loss": 0.2853, "step": 3349 }, { - "epoch": 0.19, - "grad_norm": 0.34699922308137227, - "learning_rate": 1.864782130842524e-05, - "loss": 0.2848, + "epoch": 0.15, + "grad_norm": 1.8982321521251455, + "learning_rate": 1.9206152781747772e-05, + "loss": 0.9683, "step": 3350 }, { - "epoch": 0.19, - "grad_norm": 0.6473095340940566, - "learning_rate": 1.8646886706538358e-05, - "loss": 0.471, + "epoch": 0.15, + "grad_norm": 0.4109761683590333, + "learning_rate": 1.9205571690709942e-05, + "loss": 0.2913, "step": 3351 }, { - "epoch": 0.19, - "grad_norm": 0.337720021022626, - "learning_rate": 1.8645951805210164e-05, - "loss": 0.1635, + "epoch": 0.15, + "grad_norm": 0.42602643517806127, + "learning_rate": 1.9204990395869626e-05, + "loss": 0.33, "step": 3352 }, { - "epoch": 0.19, - "grad_norm": 0.515168665181641, - "learning_rate": 1.864501660447303e-05, - "loss": 0.363, + "epoch": 0.15, + "grad_norm": 0.8126048368642423, + "learning_rate": 1.9204408897239697e-05, + "loss": 0.5386, "step": 3353 }, { - "epoch": 0.19, - "grad_norm": 0.45899933383969893, - "learning_rate": 1.8644081104359343e-05, - "loss": 0.3239, + "epoch": 0.15, + "grad_norm": 0.4784871389361435, + "learning_rate": 1.9203827194833026e-05, + "loss": 0.3086, "step": 3354 }, { - "epoch": 0.19, - "grad_norm": 0.4341098182538872, - "learning_rate": 1.8643145304901497e-05, - "loss": 0.2779, + "epoch": 0.15, + "grad_norm": 0.36940173638560225, + "learning_rate": 1.9203245288662492e-05, + "loss": 0.1842, "step": 3355 }, { - "epoch": 0.19, - "grad_norm": 0.43247406577763553, - "learning_rate": 1.8642209206131902e-05, - "loss": 0.3562, + "epoch": 0.15, + "grad_norm": 0.3547309936891921, + "learning_rate": 1.9202663178740978e-05, + "loss": 0.2654, "step": 3356 }, { - "epoch": 0.19, - "grad_norm": 0.315059317047122, - "learning_rate": 1.8641272808082975e-05, - "loss": 0.3091, + "epoch": 0.15, + "grad_norm": 0.4096928430914158, + "learning_rate": 1.920208086508137e-05, + "loss": 0.2973, "step": 3357 }, { - "epoch": 0.19, - "grad_norm": 0.24709989347560327, - "learning_rate": 1.864033611078714e-05, - "loss": 0.0748, + "epoch": 0.15, + "grad_norm": 0.7595927508156719, + "learning_rate": 1.9201498347696563e-05, + "loss": 0.4186, "step": 3358 }, { - "epoch": 0.19, - "grad_norm": 0.4234610157900327, - "learning_rate": 1.863939911427684e-05, - "loss": 0.3157, + "epoch": 0.15, + "grad_norm": 0.956723091357573, + "learning_rate": 1.9200915626599442e-05, + "loss": 0.5554, "step": 3359 }, { - "epoch": 0.19, - "grad_norm": 1.0117601153578217, - "learning_rate": 1.8638461818584517e-05, - "loss": 0.672, + "epoch": 0.15, + "grad_norm": 0.49713162807509637, + "learning_rate": 1.9200332701802925e-05, + "loss": 0.3111, "step": 3360 }, { - "epoch": 0.19, - "grad_norm": 0.4147187717202964, - "learning_rate": 1.8637524223742636e-05, - "loss": 0.3426, + "epoch": 0.15, + "grad_norm": 0.3590860993104815, + "learning_rate": 1.919974957331991e-05, + "loss": 0.2399, "step": 3361 }, { - "epoch": 0.19, - "grad_norm": 0.35245150890474214, - "learning_rate": 1.863658632978366e-05, - "loss": 0.2728, + "epoch": 0.15, + "grad_norm": 0.40672914501526425, + "learning_rate": 1.9199166241163302e-05, + "loss": 0.2572, "step": 3362 }, { - "epoch": 0.19, - "grad_norm": 0.4678697988608274, - "learning_rate": 1.8635648136740072e-05, - "loss": 0.3828, + "epoch": 0.15, + "grad_norm": 0.4879901757286407, + "learning_rate": 1.9198582705346023e-05, + "loss": 0.3213, "step": 3363 }, { - "epoch": 0.19, - "grad_norm": 0.4667194994023077, - "learning_rate": 1.863470964464436e-05, - "loss": 0.2872, + "epoch": 0.15, + "grad_norm": 0.4802292393967778, + "learning_rate": 1.919799896588099e-05, + "loss": 0.374, "step": 3364 }, { - "epoch": 0.19, - "grad_norm": 0.32564335685434603, - "learning_rate": 1.8633770853529025e-05, - "loss": 0.236, + "epoch": 0.15, + "grad_norm": 0.6971322839437677, + "learning_rate": 1.919741502278112e-05, + "loss": 0.3281, "step": 3365 }, { - "epoch": 0.19, - "grad_norm": 0.5608166440455324, - "learning_rate": 1.8632831763426574e-05, - "loss": 0.3899, + "epoch": 0.15, + "grad_norm": 0.5631305327923952, + "learning_rate": 1.9196830876059348e-05, + "loss": 0.2896, "step": 3366 }, { - "epoch": 0.19, - "grad_norm": 0.6682512540559473, - "learning_rate": 1.863189237436953e-05, - "loss": 0.4906, + "epoch": 0.15, + "grad_norm": 0.4551873078435857, + "learning_rate": 1.9196246525728607e-05, + "loss": 0.3234, "step": 3367 }, { - "epoch": 0.19, - "grad_norm": 0.35298988092787115, - "learning_rate": 1.863095268639043e-05, - "loss": 0.2482, + "epoch": 0.15, + "grad_norm": 0.32605135390133727, + "learning_rate": 1.9195661971801825e-05, + "loss": 0.2283, "step": 3368 }, { - "epoch": 0.19, - "grad_norm": 0.42340797367221217, - "learning_rate": 1.8630012699521806e-05, - "loss": 0.3356, + "epoch": 0.15, + "grad_norm": 0.4036311476491305, + "learning_rate": 1.9195077214291955e-05, + "loss": 0.3072, "step": 3369 }, { - "epoch": 0.19, - "grad_norm": 0.2767430952636719, - "learning_rate": 1.8629072413796213e-05, - "loss": 0.1825, + "epoch": 0.15, + "grad_norm": 0.7624643434653674, + "learning_rate": 1.919449225321194e-05, + "loss": 0.4935, "step": 3370 }, { - "epoch": 0.19, - "grad_norm": 0.3605012936700501, - "learning_rate": 1.862813182924621e-05, - "loss": 0.2341, + "epoch": 0.15, + "grad_norm": 0.532283504393621, + "learning_rate": 1.9193907088574725e-05, + "loss": 0.279, "step": 3371 }, { - "epoch": 0.19, - "grad_norm": 1.153440050494576, - "learning_rate": 1.8627190945904382e-05, - "loss": 0.5661, + "epoch": 0.15, + "grad_norm": 0.33530039765016545, + "learning_rate": 1.9193321720393267e-05, + "loss": 0.2849, "step": 3372 }, { - "epoch": 0.19, - "grad_norm": 0.48897754156527273, - "learning_rate": 1.8626249763803295e-05, - "loss": 0.3302, + "epoch": 0.15, + "grad_norm": 1.4523890547065752, + "learning_rate": 1.9192736148680525e-05, + "loss": 0.8646, "step": 3373 }, { - "epoch": 0.19, - "grad_norm": 0.46612356766072266, - "learning_rate": 1.8625308282975552e-05, - "loss": 0.3312, + "epoch": 0.15, + "grad_norm": 0.2990860317267795, + "learning_rate": 1.9192150373449473e-05, + "loss": 0.2273, "step": 3374 }, { - "epoch": 0.19, - "grad_norm": 0.3181013883121376, - "learning_rate": 1.8624366503453752e-05, - "loss": 0.2266, + "epoch": 0.16, + "grad_norm": 0.3977582439527299, + "learning_rate": 1.9191564394713063e-05, + "loss": 0.3181, "step": 3375 }, { - "epoch": 0.19, - "grad_norm": 0.4652146982144647, - "learning_rate": 1.8623424425270514e-05, - "loss": 0.2808, + "epoch": 0.16, + "grad_norm": 0.5060475766557591, + "learning_rate": 1.919097821248428e-05, + "loss": 0.3583, "step": 3376 }, { - "epoch": 0.19, - "grad_norm": 0.39602293510398723, - "learning_rate": 1.8622482048458454e-05, - "loss": 0.3131, + "epoch": 0.16, + "grad_norm": 1.08003385667202, + "learning_rate": 1.9190391826776097e-05, + "loss": 0.6451, "step": 3377 }, { - "epoch": 0.19, - "grad_norm": 0.44220445658577895, - "learning_rate": 1.8621539373050218e-05, - "loss": 0.3288, + "epoch": 0.16, + "grad_norm": 0.3419907478698583, + "learning_rate": 1.9189805237601497e-05, + "loss": 0.2109, "step": 3378 }, { - "epoch": 0.19, - "grad_norm": 0.7299981826510856, - "learning_rate": 1.862059639907844e-05, - "loss": 0.569, + "epoch": 0.16, + "grad_norm": 1.4844841124003776, + "learning_rate": 1.9189218444973467e-05, + "loss": 0.8914, "step": 3379 }, { - "epoch": 0.19, - "grad_norm": 0.39958092415064894, - "learning_rate": 1.861965312657578e-05, - "loss": 0.2999, + "epoch": 0.16, + "grad_norm": 0.35211594593899825, + "learning_rate": 1.9188631448904998e-05, + "loss": 0.2978, "step": 3380 }, { - "epoch": 0.19, - "grad_norm": 0.3869027931462021, - "learning_rate": 1.8618709555574903e-05, - "loss": 0.277, + "epoch": 0.16, + "grad_norm": 0.3880115842542888, + "learning_rate": 1.9188044249409082e-05, + "loss": 0.2112, "step": 3381 }, { - "epoch": 0.19, - "grad_norm": 0.3501211008611354, - "learning_rate": 1.8617765686108486e-05, - "loss": 0.2184, + "epoch": 0.16, + "grad_norm": 0.4912770402845999, + "learning_rate": 1.9187456846498722e-05, + "loss": 0.365, "step": 3382 }, { - "epoch": 0.19, - "grad_norm": 0.3614144002216407, - "learning_rate": 1.8616821518209213e-05, - "loss": 0.2989, + "epoch": 0.16, + "grad_norm": 0.447897537730246, + "learning_rate": 1.9186869240186925e-05, + "loss": 0.3441, "step": 3383 }, { - "epoch": 0.19, - "grad_norm": 0.8955308958623391, - "learning_rate": 1.8615877051909783e-05, - "loss": 0.4199, + "epoch": 0.16, + "grad_norm": 0.3663215331948095, + "learning_rate": 1.9186281430486695e-05, + "loss": 0.2055, "step": 3384 }, { - "epoch": 0.19, - "grad_norm": 0.420971422861682, - "learning_rate": 1.8614932287242897e-05, - "loss": 0.3649, + "epoch": 0.16, + "grad_norm": 1.0933820119881812, + "learning_rate": 1.9185693417411053e-05, + "loss": 0.6602, "step": 3385 }, { - "epoch": 0.19, - "grad_norm": 0.39696752016884945, - "learning_rate": 1.8613987224241283e-05, - "loss": 0.2856, - "step": 3386 + "epoch": 0.16, + "grad_norm": 0.6421661385304326, + "learning_rate": 1.9185105200973004e-05, + "loss": 0.4421, + "step": 3386 }, { - "epoch": 0.19, - "grad_norm": 1.1534536864598737, - "learning_rate": 1.8613041862937656e-05, - "loss": 0.6503, + "epoch": 0.16, + "grad_norm": 0.39422910514625803, + "learning_rate": 1.918451678118558e-05, + "loss": 0.3096, "step": 3387 }, { - "epoch": 0.19, - "grad_norm": 0.26372459169274415, - "learning_rate": 1.861209620336476e-05, - "loss": 0.0746, + "epoch": 0.16, + "grad_norm": 0.26765993085072315, + "learning_rate": 1.9183928158061814e-05, + "loss": 0.1752, "step": 3388 }, { - "epoch": 0.19, - "grad_norm": 0.5163757820951047, - "learning_rate": 1.8611150245555345e-05, - "loss": 0.3125, + "epoch": 0.16, + "grad_norm": 1.8074355500271955, + "learning_rate": 1.9183339331614723e-05, + "loss": 0.8148, "step": 3389 }, { - "epoch": 0.19, - "grad_norm": 0.5196639264351134, - "learning_rate": 1.861020398954217e-05, - "loss": 0.3546, + "epoch": 0.16, + "grad_norm": 0.4041699650984824, + "learning_rate": 1.9182750301857354e-05, + "loss": 0.2987, "step": 3390 }, { - "epoch": 0.19, - "grad_norm": 0.8214877192095381, - "learning_rate": 1.8609257435357995e-05, - "loss": 0.387, + "epoch": 0.16, + "grad_norm": 0.8503668681469448, + "learning_rate": 1.9182161068802742e-05, + "loss": 0.4359, "step": 3391 }, { - "epoch": 0.19, - "grad_norm": 0.45946932605048707, - "learning_rate": 1.8608310583035607e-05, - "loss": 0.3055, + "epoch": 0.16, + "grad_norm": 0.3990898977772437, + "learning_rate": 1.9181571632463933e-05, + "loss": 0.3653, "step": 3392 }, { - "epoch": 0.19, - "grad_norm": 0.452824130541817, - "learning_rate": 1.8607363432607793e-05, - "loss": 0.3342, + "epoch": 0.16, + "grad_norm": 0.41311632453767994, + "learning_rate": 1.918098199285398e-05, + "loss": 0.3074, "step": 3393 }, { - "epoch": 0.2, - "grad_norm": 0.33934441895880785, - "learning_rate": 1.8606415984107357e-05, - "loss": 0.1221, + "epoch": 0.16, + "grad_norm": 0.25178970137932033, + "learning_rate": 1.918039214998593e-05, + "loss": 0.078, "step": 3394 }, { - "epoch": 0.2, - "grad_norm": 0.4336853637931559, - "learning_rate": 1.8605468237567103e-05, - "loss": 0.2819, + "epoch": 0.16, + "grad_norm": 0.44107135034782713, + "learning_rate": 1.917980210387285e-05, + "loss": 0.3349, "step": 3395 }, { - "epoch": 0.2, - "grad_norm": 1.0742880039688079, - "learning_rate": 1.8604520193019855e-05, - "loss": 0.5185, + "epoch": 0.16, + "grad_norm": 0.4798315229064209, + "learning_rate": 1.91792118545278e-05, + "loss": 0.2967, "step": 3396 }, { - "epoch": 0.2, - "grad_norm": 0.39427788076090214, - "learning_rate": 1.860357185049844e-05, - "loss": 0.2925, + "epoch": 0.16, + "grad_norm": 0.6197963655649253, + "learning_rate": 1.9178621401963843e-05, + "loss": 0.3414, "step": 3397 }, { - "epoch": 0.2, - "grad_norm": 0.39690461047371556, - "learning_rate": 1.8602623210035707e-05, - "loss": 0.307, + "epoch": 0.16, + "grad_norm": 0.7427572385598589, + "learning_rate": 1.9178030746194055e-05, + "loss": 0.4925, "step": 3398 }, { - "epoch": 0.2, - "grad_norm": 1.1140528804897003, - "learning_rate": 1.8601674271664497e-05, - "loss": 0.7088, + "epoch": 0.16, + "grad_norm": 0.4876514619448412, + "learning_rate": 1.917743988723152e-05, + "loss": 0.3023, "step": 3399 }, { - "epoch": 0.2, - "grad_norm": 0.2886567836009131, - "learning_rate": 1.8600725035417678e-05, - "loss": 0.1846, + "epoch": 0.16, + "grad_norm": 0.3335945675313101, + "learning_rate": 1.91768488250893e-05, + "loss": 0.2768, "step": 3400 }, { - "epoch": 0.2, - "grad_norm": 0.3227616596528429, - "learning_rate": 1.8599775501328125e-05, - "loss": 0.2425, + "epoch": 0.16, + "grad_norm": 0.33143204372264295, + "learning_rate": 1.9176257559780497e-05, + "loss": 0.184, "step": 3401 }, { - "epoch": 0.2, - "grad_norm": 0.848213881735556, - "learning_rate": 1.8598825669428713e-05, - "loss": 0.473, + "epoch": 0.16, + "grad_norm": 0.5032840776863647, + "learning_rate": 1.9175666091318196e-05, + "loss": 0.3482, "step": 3402 }, { - "epoch": 0.2, - "grad_norm": 0.837716610668294, - "learning_rate": 1.8597875539752337e-05, - "loss": 0.562, + "epoch": 0.16, + "grad_norm": 0.6995482609409982, + "learning_rate": 1.917507441971549e-05, + "loss": 0.4259, "step": 3403 }, { - "epoch": 0.2, - "grad_norm": 0.33372430563348676, - "learning_rate": 1.85969251123319e-05, - "loss": 0.2439, + "epoch": 0.16, + "grad_norm": 0.4070389390470506, + "learning_rate": 1.917448254498548e-05, + "loss": 0.3093, "step": 3404 }, { - "epoch": 0.2, - "grad_norm": 0.49703820168497725, - "learning_rate": 1.859597438720032e-05, - "loss": 0.3647, + "epoch": 0.16, + "grad_norm": 0.5142480387539397, + "learning_rate": 1.9173890467141268e-05, + "loss": 0.2743, "step": 3405 }, { - "epoch": 0.2, - "grad_norm": 0.4523814047602288, - "learning_rate": 1.8595023364390515e-05, - "loss": 0.3094, + "epoch": 0.16, + "grad_norm": 0.31929163897954016, + "learning_rate": 1.9173298186195964e-05, + "loss": 0.2215, "step": 3406 }, { - "epoch": 0.2, - "grad_norm": 0.3932121628983667, - "learning_rate": 1.8594072043935418e-05, - "loss": 0.241, + "epoch": 0.16, + "grad_norm": 0.4772866803527093, + "learning_rate": 1.917270570216268e-05, + "loss": 0.3001, "step": 3407 }, { - "epoch": 0.2, - "grad_norm": 0.530507341323851, - "learning_rate": 1.8593120425867977e-05, - "loss": 0.3333, + "epoch": 0.16, + "grad_norm": 0.3553699338870784, + "learning_rate": 1.917211301505453e-05, + "loss": 0.2764, "step": 3408 }, { - "epoch": 0.2, - "grad_norm": 0.45189749977195065, - "learning_rate": 1.859216851022115e-05, - "loss": 0.3763, + "epoch": 0.16, + "grad_norm": 0.8210697437366932, + "learning_rate": 1.9171520124884643e-05, + "loss": 0.5633, "step": 3409 }, { - "epoch": 0.2, - "grad_norm": 0.31896053252137, - "learning_rate": 1.859121629702789e-05, - "loss": 0.2049, + "epoch": 0.16, + "grad_norm": 0.5964754285802557, + "learning_rate": 1.9170927031666137e-05, + "loss": 0.429, "step": 3410 }, { - "epoch": 0.2, - "grad_norm": 1.4510347587273766, - "learning_rate": 1.8590263786321182e-05, - "loss": 0.7991, + "epoch": 0.16, + "grad_norm": 0.4364954694802805, + "learning_rate": 1.9170333735412147e-05, + "loss": 0.2703, "step": 3411 }, { - "epoch": 0.2, - "grad_norm": 0.5061445451316303, - "learning_rate": 1.858931097813401e-05, - "loss": 0.4186, + "epoch": 0.16, + "grad_norm": 0.4253643041858598, + "learning_rate": 1.9169740236135804e-05, + "loss": 0.3128, "step": 3412 }, { - "epoch": 0.2, - "grad_norm": 0.35254918453174994, - "learning_rate": 1.8588357872499364e-05, - "loss": 0.2737, + "epoch": 0.16, + "grad_norm": 0.3971326643000123, + "learning_rate": 1.9169146533850252e-05, + "loss": 0.2333, "step": 3413 }, { - "epoch": 0.2, - "grad_norm": 0.30901119829012413, - "learning_rate": 1.8587404469450256e-05, - "loss": 0.1988, + "epoch": 0.16, + "grad_norm": 0.46388794944744244, + "learning_rate": 1.9168552628568632e-05, + "loss": 0.2498, "step": 3414 }, { - "epoch": 0.2, - "grad_norm": 1.1748732561056525, - "learning_rate": 1.85864507690197e-05, - "loss": 0.7539, + "epoch": 0.16, + "grad_norm": 0.7429689066988482, + "learning_rate": 1.916795852030409e-05, + "loss": 0.4178, "step": 3415 }, { - "epoch": 0.2, - "grad_norm": 0.4472144634212105, - "learning_rate": 1.8585496771240726e-05, - "loss": 0.3451, + "epoch": 0.16, + "grad_norm": 0.4329306168174274, + "learning_rate": 1.916736420906979e-05, + "loss": 0.3474, "step": 3416 }, { - "epoch": 0.2, - "grad_norm": 0.3288085382989217, - "learning_rate": 1.8584542476146364e-05, - "loss": 0.2777, + "epoch": 0.16, + "grad_norm": 0.42073630767930015, + "learning_rate": 1.9166769694878877e-05, + "loss": 0.19, "step": 3417 }, { - "epoch": 0.2, - "grad_norm": 0.701787075717411, - "learning_rate": 1.8583587883769668e-05, - "loss": 0.5152, + "epoch": 0.16, + "grad_norm": 0.36779516695066033, + "learning_rate": 1.916617497774452e-05, + "loss": 0.2655, "step": 3418 }, { - "epoch": 0.2, - "grad_norm": 0.4124720953337286, - "learning_rate": 1.8582632994143693e-05, - "loss": 0.3517, + "epoch": 0.16, + "grad_norm": 0.3588948714738646, + "learning_rate": 1.916558005767988e-05, + "loss": 0.312, "step": 3419 }, { - "epoch": 0.2, - "grad_norm": 0.26448985422784416, - "learning_rate": 1.8581677807301507e-05, - "loss": 0.1267, + "epoch": 0.16, + "grad_norm": 0.3962461274970632, + "learning_rate": 1.9164984934698136e-05, + "loss": 0.2415, "step": 3420 }, { - "epoch": 0.2, - "grad_norm": 0.4416407925509442, - "learning_rate": 1.8580722323276186e-05, - "loss": 0.35, + "epoch": 0.16, + "grad_norm": 0.6543796323648982, + "learning_rate": 1.9164389608812458e-05, + "loss": 0.4531, "step": 3421 }, { - "epoch": 0.2, - "grad_norm": 0.36003377851432783, - "learning_rate": 1.857976654210082e-05, - "loss": 0.2706, + "epoch": 0.16, + "grad_norm": 1.1775146613630108, + "learning_rate": 1.9163794080036026e-05, + "loss": 0.6552, "step": 3422 }, { - "epoch": 0.2, - "grad_norm": 1.0498458699375062, - "learning_rate": 1.857881046380851e-05, - "loss": 0.3784, + "epoch": 0.16, + "grad_norm": 0.3538804275266444, + "learning_rate": 1.9163198348382023e-05, + "loss": 0.2347, "step": 3423 }, { - "epoch": 0.2, - "grad_norm": 0.3688550683980165, - "learning_rate": 1.8577854088432355e-05, - "loss": 0.3505, + "epoch": 0.16, + "grad_norm": 0.40215654435324955, + "learning_rate": 1.9162602413863646e-05, + "loss": 0.2693, "step": 3424 }, { - "epoch": 0.2, - "grad_norm": 0.44942001956730154, - "learning_rate": 1.8576897416005487e-05, - "loss": 0.3227, + "epoch": 0.16, + "grad_norm": 0.47727541533278633, + "learning_rate": 1.916200627649408e-05, + "loss": 0.3294, "step": 3425 }, { - "epoch": 0.2, - "grad_norm": 0.6148292590435895, - "learning_rate": 1.857594044656103e-05, - "loss": 0.3691, + "epoch": 0.16, + "grad_norm": 0.5434506729464939, + "learning_rate": 1.9161409936286524e-05, + "loss": 0.4095, "step": 3426 }, { - "epoch": 0.2, - "grad_norm": 0.26088148061349903, - "learning_rate": 1.8574983180132128e-05, - "loss": 0.139, + "epoch": 0.16, + "grad_norm": 0.48657745932793073, + "learning_rate": 1.9160813393254182e-05, + "loss": 0.2871, "step": 3427 }, { - "epoch": 0.2, - "grad_norm": 0.6143931862134248, - "learning_rate": 1.8574025616751923e-05, - "loss": 0.2772, + "epoch": 0.16, + "grad_norm": 0.45459399273711115, + "learning_rate": 1.916021664741026e-05, + "loss": 0.3614, "step": 3428 }, { - "epoch": 0.2, - "grad_norm": 0.40106184091905833, - "learning_rate": 1.8573067756453578e-05, - "loss": 0.312, + "epoch": 0.16, + "grad_norm": 0.47962185870991336, + "learning_rate": 1.9159619698767972e-05, + "loss": 0.3369, "step": 3429 }, { - "epoch": 0.2, - "grad_norm": 0.6215169188275277, - "learning_rate": 1.8572109599270266e-05, - "loss": 0.4106, + "epoch": 0.16, + "grad_norm": 0.5735934177845237, + "learning_rate": 1.915902254734053e-05, + "loss": 0.3043, "step": 3430 }, { - "epoch": 0.2, - "grad_norm": 0.4565998521916527, - "learning_rate": 1.857115114523517e-05, - "loss": 0.3022, + "epoch": 0.16, + "grad_norm": 0.29270488331025524, + "learning_rate": 1.915842519314116e-05, + "loss": 0.2378, "step": 3431 }, { - "epoch": 0.2, - "grad_norm": 0.3754630031717685, - "learning_rate": 1.857019239438148e-05, - "loss": 0.2459, + "epoch": 0.16, + "grad_norm": 0.472462457346063, + "learning_rate": 1.915782763618308e-05, + "loss": 0.3445, "step": 3432 }, { - "epoch": 0.2, - "grad_norm": 0.3584316941904439, - "learning_rate": 1.8569233346742392e-05, - "loss": 0.2165, + "epoch": 0.16, + "grad_norm": 0.5143698222543387, + "learning_rate": 1.9157229876479525e-05, + "loss": 0.338, "step": 3433 }, { - "epoch": 0.2, - "grad_norm": 0.4805254322830583, - "learning_rate": 1.856827400235112e-05, - "loss": 0.3607, + "epoch": 0.16, + "grad_norm": 2.114173546019667, + "learning_rate": 1.9156631914043723e-05, + "loss": 0.7025, "step": 3434 }, { - "epoch": 0.2, - "grad_norm": 0.5382648282007101, - "learning_rate": 1.8567314361240893e-05, - "loss": 0.3951, + "epoch": 0.16, + "grad_norm": 0.5089458134745325, + "learning_rate": 1.9156033748888918e-05, + "loss": 0.3349, "step": 3435 }, { - "epoch": 0.2, - "grad_norm": 0.4865226514408291, - "learning_rate": 1.8566354423444933e-05, - "loss": 0.3781, + "epoch": 0.16, + "grad_norm": 0.34787092617807697, + "learning_rate": 1.9155435381028348e-05, + "loss": 0.2452, "step": 3436 }, { - "epoch": 0.2, - "grad_norm": 0.358374646552267, - "learning_rate": 1.856539418899649e-05, - "loss": 0.2488, + "epoch": 0.16, + "grad_norm": 0.5140115462147723, + "learning_rate": 1.9154836810475266e-05, + "loss": 0.3419, "step": 3437 }, { - "epoch": 0.2, - "grad_norm": 0.5303949361998881, - "learning_rate": 1.8564433657928815e-05, - "loss": 0.3902, + "epoch": 0.16, + "grad_norm": 0.46848933564646866, + "learning_rate": 1.915423803724292e-05, + "loss": 0.2454, "step": 3438 }, { - "epoch": 0.2, - "grad_norm": 0.4547723658493619, - "learning_rate": 1.8563472830275172e-05, - "loss": 0.3408, + "epoch": 0.16, + "grad_norm": 0.4286154581992925, + "learning_rate": 1.9153639061344568e-05, + "loss": 0.2899, "step": 3439 }, { - "epoch": 0.2, - "grad_norm": 0.270919077914169, - "learning_rate": 1.856251170606883e-05, - "loss": 0.1925, + "epoch": 0.16, + "grad_norm": 0.48910352139556, + "learning_rate": 1.9153039882793466e-05, + "loss": 0.2988, "step": 3440 }, { - "epoch": 0.2, - "grad_norm": 0.38320462625120033, - "learning_rate": 1.8561550285343077e-05, - "loss": 0.3415, + "epoch": 0.16, + "grad_norm": 0.651841107884288, + "learning_rate": 1.9152440501602885e-05, + "loss": 0.362, "step": 3441 }, { - "epoch": 0.2, - "grad_norm": 0.7710149460976287, - "learning_rate": 1.8560588568131205e-05, - "loss": 0.5157, + "epoch": 0.16, + "grad_norm": 0.4416154036761676, + "learning_rate": 1.9151840917786092e-05, + "loss": 0.3526, "step": 3442 }, { - "epoch": 0.2, - "grad_norm": 0.45201487807738494, - "learning_rate": 1.8559626554466523e-05, - "loss": 0.1742, + "epoch": 0.16, + "grad_norm": 0.37928587249066165, + "learning_rate": 1.915124113135636e-05, + "loss": 0.3134, "step": 3443 }, { - "epoch": 0.2, - "grad_norm": 0.6001960036849652, - "learning_rate": 1.8558664244382338e-05, - "loss": 0.3938, + "epoch": 0.16, + "grad_norm": 0.43733033871745575, + "learning_rate": 1.9150641142326975e-05, + "loss": 0.2404, "step": 3444 }, { - "epoch": 0.2, - "grad_norm": 0.4399877397096689, - "learning_rate": 1.8557701637911978e-05, - "loss": 0.3467, + "epoch": 0.16, + "grad_norm": 0.5038621414655685, + "learning_rate": 1.915004095071121e-05, + "loss": 0.3618, "step": 3445 }, { - "epoch": 0.2, - "grad_norm": 0.6694405301341718, - "learning_rate": 1.855673873508878e-05, - "loss": 0.2563, + "epoch": 0.16, + "grad_norm": 0.34215194936473936, + "learning_rate": 1.9149440556522357e-05, + "loss": 0.1736, "step": 3446 }, { - "epoch": 0.2, - "grad_norm": 0.5429342439640811, - "learning_rate": 1.855577553594609e-05, - "loss": 0.3684, + "epoch": 0.16, + "grad_norm": 0.4216096476780459, + "learning_rate": 1.9148839959773712e-05, + "loss": 0.3052, "step": 3447 }, { - "epoch": 0.2, - "grad_norm": 0.3009787386986737, - "learning_rate": 1.8554812040517255e-05, - "loss": 0.2322, + "epoch": 0.16, + "grad_norm": 0.46783037600400706, + "learning_rate": 1.9148239160478565e-05, + "loss": 0.3401, "step": 3448 }, { - "epoch": 0.2, - "grad_norm": 0.782021777816558, - "learning_rate": 1.855384824883565e-05, - "loss": 0.4354, + "epoch": 0.16, + "grad_norm": 0.6148351220840176, + "learning_rate": 1.914763815865022e-05, + "loss": 0.4, "step": 3449 }, { - "epoch": 0.2, - "grad_norm": 0.3773608366203985, - "learning_rate": 1.8552884160934647e-05, - "loss": 0.2431, + "epoch": 0.16, + "grad_norm": 0.49464379906017614, + "learning_rate": 1.9147036954301986e-05, + "loss": 0.322, "step": 3450 }, { - "epoch": 0.2, - "grad_norm": 1.2925400215861826, - "learning_rate": 1.8551919776847634e-05, - "loss": 0.837, + "epoch": 0.16, + "grad_norm": 0.4569241533831194, + "learning_rate": 1.9146435547447168e-05, + "loss": 0.3166, "step": 3451 }, { - "epoch": 0.2, - "grad_norm": 0.43774430556639365, - "learning_rate": 1.8550955096608007e-05, - "loss": 0.3146, + "epoch": 0.16, + "grad_norm": 0.30321510146375885, + "learning_rate": 1.914583393809908e-05, + "loss": 0.2677, "step": 3452 }, { - "epoch": 0.2, - "grad_norm": 0.4075921947601884, - "learning_rate": 1.8549990120249174e-05, - "loss": 0.3066, + "epoch": 0.16, + "grad_norm": 0.6365400162442456, + "learning_rate": 1.9145232126271045e-05, + "loss": 0.2831, "step": 3453 }, { - "epoch": 0.2, - "grad_norm": 0.3309237068426338, - "learning_rate": 1.8549024847804547e-05, - "loss": 0.1845, + "epoch": 0.16, + "grad_norm": 0.39671575129971, + "learning_rate": 1.9144630111976385e-05, + "loss": 0.3473, "step": 3454 }, { - "epoch": 0.2, - "grad_norm": 0.5641842287581473, - "learning_rate": 1.854805927930756e-05, - "loss": 0.3114, + "epoch": 0.16, + "grad_norm": 0.47449816609150136, + "learning_rate": 1.914402789522843e-05, + "loss": 0.3903, "step": 3455 }, { - "epoch": 0.2, - "grad_norm": 0.4169525329222009, - "learning_rate": 1.854709341479165e-05, - "loss": 0.2763, + "epoch": 0.16, + "grad_norm": 0.5516235976536191, + "learning_rate": 1.9143425476040508e-05, + "loss": 0.1499, "step": 3456 }, { - "epoch": 0.2, - "grad_norm": 0.49289840010942465, - "learning_rate": 1.8546127254290257e-05, - "loss": 0.3636, + "epoch": 0.16, + "grad_norm": 0.4340650598047938, + "learning_rate": 1.914282285442596e-05, + "loss": 0.3322, "step": 3457 }, { - "epoch": 0.2, - "grad_norm": 0.5541013067086684, - "learning_rate": 1.8545160797836847e-05, - "loss": 0.4148, + "epoch": 0.16, + "grad_norm": 0.32812347694788363, + "learning_rate": 1.9142220030398128e-05, + "loss": 0.1965, "step": 3458 }, { - "epoch": 0.2, - "grad_norm": 0.4141408240338715, - "learning_rate": 1.8544194045464888e-05, - "loss": 0.2903, + "epoch": 0.16, + "grad_norm": 0.35545950408669325, + "learning_rate": 1.914161700397035e-05, + "loss": 0.2608, "step": 3459 }, { - "epoch": 0.2, - "grad_norm": 0.28105441189183067, - "learning_rate": 1.8543226997207854e-05, - "loss": 0.2312, + "epoch": 0.16, + "grad_norm": 0.4413803333802271, + "learning_rate": 1.914101377515599e-05, + "loss": 0.3494, "step": 3460 }, { - "epoch": 0.2, - "grad_norm": 0.6534352410994891, - "learning_rate": 1.8542259653099236e-05, - "loss": 0.2852, + "epoch": 0.16, + "grad_norm": 0.9095260286058134, + "learning_rate": 1.914041034396839e-05, + "loss": 0.5996, "step": 3461 }, { - "epoch": 0.2, - "grad_norm": 0.4909083697020356, - "learning_rate": 1.8541292013172538e-05, - "loss": 0.3315, + "epoch": 0.16, + "grad_norm": 0.731698288828411, + "learning_rate": 1.9139806710420914e-05, + "loss": 0.2648, "step": 3462 }, { - "epoch": 0.2, - "grad_norm": 0.924048730821875, - "learning_rate": 1.854032407746126e-05, - "loss": 0.4172, + "epoch": 0.16, + "grad_norm": 0.3937954904865098, + "learning_rate": 1.913920287452693e-05, + "loss": 0.3103, "step": 3463 }, { - "epoch": 0.2, - "grad_norm": 0.36777627302291016, - "learning_rate": 1.853935584599893e-05, - "loss": 0.2993, + "epoch": 0.16, + "grad_norm": 0.3678625175062782, + "learning_rate": 1.91385988362998e-05, + "loss": 0.27, "step": 3464 }, { - "epoch": 0.2, - "grad_norm": 0.4041852347714912, - "learning_rate": 1.8538387318819074e-05, - "loss": 0.3167, + "epoch": 0.16, + "grad_norm": 0.4891904157853559, + "learning_rate": 1.91379945957529e-05, + "loss": 0.3213, "step": 3465 }, { - "epoch": 0.2, - "grad_norm": 0.2281237706488959, - "learning_rate": 1.853741849595523e-05, - "loss": 0.0881, + "epoch": 0.16, + "grad_norm": 0.4692580810977307, + "learning_rate": 1.9137390152899608e-05, + "loss": 0.2742, "step": 3466 }, { - "epoch": 0.2, - "grad_norm": 0.8221586813021399, - "learning_rate": 1.853644937744095e-05, - "loss": 0.486, + "epoch": 0.16, + "grad_norm": 0.4351870902893192, + "learning_rate": 1.9136785507753302e-05, + "loss": 0.3367, "step": 3467 }, { - "epoch": 0.2, - "grad_norm": 0.3859475003282859, - "learning_rate": 1.8535479963309796e-05, - "loss": 0.313, + "epoch": 0.16, + "grad_norm": 0.771071937960453, + "learning_rate": 1.9136180660327377e-05, + "loss": 0.5116, "step": 3468 }, { - "epoch": 0.2, - "grad_norm": 0.6530897529329045, - "learning_rate": 1.853451025359534e-05, - "loss": 0.3369, + "epoch": 0.16, + "grad_norm": 0.4155420541972083, + "learning_rate": 1.913557561063521e-05, + "loss": 0.2303, "step": 3469 }, { - "epoch": 0.2, - "grad_norm": 1.042977278524951, - "learning_rate": 1.8533540248331162e-05, - "loss": 0.4573, + "epoch": 0.16, + "grad_norm": 0.31922242719289784, + "learning_rate": 1.913497035869021e-05, + "loss": 0.2344, "step": 3470 }, { - "epoch": 0.2, - "grad_norm": 0.41146027977536204, - "learning_rate": 1.8532569947550846e-05, - "loss": 0.3255, + "epoch": 0.16, + "grad_norm": 0.43879675886993946, + "learning_rate": 1.913436490450577e-05, + "loss": 0.3395, "step": 3471 }, { - "epoch": 0.2, - "grad_norm": 0.35765902247522974, - "learning_rate": 1.8531599351288007e-05, - "loss": 0.2252, + "epoch": 0.16, + "grad_norm": 0.41411630055213, + "learning_rate": 1.9133759248095294e-05, + "loss": 0.2825, "step": 3472 }, { - "epoch": 0.2, - "grad_norm": 0.3855611438390921, - "learning_rate": 1.8530628459576243e-05, - "loss": 0.2276, + "epoch": 0.16, + "grad_norm": 1.1173247654134, + "learning_rate": 1.913315338947219e-05, + "loss": 0.6101, "step": 3473 }, { - "epoch": 0.2, - "grad_norm": 0.5204397928422537, - "learning_rate": 1.8529657272449186e-05, - "loss": 0.3064, + "epoch": 0.16, + "grad_norm": 1.4161497313563582, + "learning_rate": 1.9132547328649873e-05, + "loss": 0.8446, "step": 3474 }, { - "epoch": 0.2, - "grad_norm": 1.810437510701928, - "learning_rate": 1.8528685789940463e-05, - "loss": 0.5076, + "epoch": 0.16, + "grad_norm": 0.3899908204352992, + "learning_rate": 1.913194106564176e-05, + "loss": 0.2504, "step": 3475 }, { - "epoch": 0.2, - "grad_norm": 0.40869087933670045, - "learning_rate": 1.8527714012083718e-05, - "loss": 0.2685, + "epoch": 0.16, + "grad_norm": 0.8812685354613593, + "learning_rate": 1.9131334600461274e-05, + "loss": 0.5134, "step": 3476 }, { - "epoch": 0.2, - "grad_norm": 0.43618505181314465, - "learning_rate": 1.8526741938912605e-05, - "loss": 0.3277, + "epoch": 0.16, + "grad_norm": 0.44594836535757465, + "learning_rate": 1.9130727933121842e-05, + "loss": 0.3571, "step": 3477 }, { - "epoch": 0.2, - "grad_norm": 0.42187847427639297, - "learning_rate": 1.8525769570460783e-05, - "loss": 0.2454, + "epoch": 0.16, + "grad_norm": 0.360254478660426, + "learning_rate": 1.9130121063636893e-05, + "loss": 0.2119, "step": 3478 }, { - "epoch": 0.2, - "grad_norm": 0.3165643524146173, - "learning_rate": 1.8524796906761928e-05, - "loss": 0.1485, + "epoch": 0.16, + "grad_norm": 0.48687590351124066, + "learning_rate": 1.9129513992019864e-05, + "loss": 0.2759, "step": 3479 }, { - "epoch": 0.2, - "grad_norm": 0.46732317429125, - "learning_rate": 1.8523823947849722e-05, - "loss": 0.2923, + "epoch": 0.16, + "grad_norm": 1.4028502930318765, + "learning_rate": 1.9128906718284192e-05, + "loss": 0.779, "step": 3480 }, { - "epoch": 0.2, - "grad_norm": 1.1275700154976764, - "learning_rate": 1.8522850693757865e-05, - "loss": 0.4158, + "epoch": 0.16, + "grad_norm": 0.3800523846803639, + "learning_rate": 1.9128299242443325e-05, + "loss": 0.2863, "step": 3481 }, { - "epoch": 0.2, - "grad_norm": 0.7907367906006263, - "learning_rate": 1.8521877144520047e-05, - "loss": 0.2819, + "epoch": 0.16, + "grad_norm": 0.7864540152394304, + "learning_rate": 1.9127691564510714e-05, + "loss": 0.3981, "step": 3482 }, { - "epoch": 0.2, - "grad_norm": 0.44558259936120503, - "learning_rate": 1.8520903300169993e-05, - "loss": 0.3128, + "epoch": 0.16, + "grad_norm": 0.42160130001516083, + "learning_rate": 1.9127083684499805e-05, + "loss": 0.3363, "step": 3483 }, { - "epoch": 0.2, - "grad_norm": 0.31241687566734866, - "learning_rate": 1.8519929160741427e-05, - "loss": 0.2827, + "epoch": 0.16, + "grad_norm": 0.526323565600426, + "learning_rate": 1.912647560242406e-05, + "loss": 0.2797, "step": 3484 }, { - "epoch": 0.2, - "grad_norm": 0.31618469870226495, - "learning_rate": 1.8518954726268076e-05, - "loss": 0.1135, + "epoch": 0.16, + "grad_norm": 0.36132367162232637, + "learning_rate": 1.9125867318296946e-05, + "loss": 0.1459, "step": 3485 }, { - "epoch": 0.2, - "grad_norm": 0.3964958438859382, - "learning_rate": 1.851797999678369e-05, - "loss": 0.3282, + "epoch": 0.16, + "grad_norm": 0.48669425168280406, + "learning_rate": 1.912525883213192e-05, + "loss": 0.3961, "step": 3486 }, { - "epoch": 0.2, - "grad_norm": 0.8788401444161869, - "learning_rate": 1.8517004972322022e-05, - "loss": 0.5349, + "epoch": 0.16, + "grad_norm": 0.44839878549127704, + "learning_rate": 1.912465014394246e-05, + "loss": 0.3372, "step": 3487 }, { - "epoch": 0.2, - "grad_norm": 0.5842037117974624, - "learning_rate": 1.851602965291684e-05, - "loss": 0.3523, + "epoch": 0.16, + "grad_norm": 0.6853048696835087, + "learning_rate": 1.9124041253742042e-05, + "loss": 0.3756, "step": 3488 }, { - "epoch": 0.2, - "grad_norm": 0.4250093402378405, - "learning_rate": 1.851505403860192e-05, - "loss": 0.2307, + "epoch": 0.16, + "grad_norm": 1.1239841290208872, + "learning_rate": 1.9123432161544143e-05, + "loss": 0.6775, "step": 3489 }, { - "epoch": 0.2, - "grad_norm": 1.1648122285456606, - "learning_rate": 1.8514078129411045e-05, - "loss": 0.5946, + "epoch": 0.16, + "grad_norm": 0.32899443409281237, + "learning_rate": 1.912282286736225e-05, + "loss": 0.2369, "step": 3490 }, { - "epoch": 0.2, - "grad_norm": 0.33870419487266124, - "learning_rate": 1.8513101925378006e-05, - "loss": 0.2547, + "epoch": 0.16, + "grad_norm": 0.3911369336831165, + "learning_rate": 1.9122213371209848e-05, + "loss": 0.2899, "step": 3491 }, { - "epoch": 0.2, - "grad_norm": 0.3289610462955457, - "learning_rate": 1.8512125426536617e-05, - "loss": 0.2312, + "epoch": 0.16, + "grad_norm": 1.608779414341514, + "learning_rate": 1.912160367310044e-05, + "loss": 0.5468, "step": 3492 }, { - "epoch": 0.2, - "grad_norm": 0.7856786137489506, - "learning_rate": 1.8511148632920685e-05, - "loss": 0.4921, + "epoch": 0.16, + "grad_norm": 0.39956878497461185, + "learning_rate": 1.9120993773047512e-05, + "loss": 0.2927, "step": 3493 }, { - "epoch": 0.2, - "grad_norm": 0.9243725044925123, - "learning_rate": 1.851017154456405e-05, - "loss": 0.5751, + "epoch": 0.16, + "grad_norm": 1.0418945565542326, + "learning_rate": 1.9120383671064577e-05, + "loss": 0.5134, "step": 3494 }, { - "epoch": 0.2, - "grad_norm": 0.38206441570861877, - "learning_rate": 1.8509194161500536e-05, - "loss": 0.24, + "epoch": 0.16, + "grad_norm": 0.4534093265244846, + "learning_rate": 1.911977336716514e-05, + "loss": 0.2572, "step": 3495 }, { - "epoch": 0.2, - "grad_norm": 0.4583245897267825, - "learning_rate": 1.8508216483763993e-05, - "loss": 0.3524, + "epoch": 0.16, + "grad_norm": 0.5345755583661993, + "learning_rate": 1.9119162861362703e-05, + "loss": 0.2663, "step": 3496 }, { - "epoch": 0.2, - "grad_norm": 0.2942176231528043, - "learning_rate": 1.850723851138828e-05, - "loss": 0.2291, + "epoch": 0.16, + "grad_norm": 0.4224818044341632, + "learning_rate": 1.9118552153670796e-05, + "loss": 0.1854, "step": 3497 }, { - "epoch": 0.2, - "grad_norm": 0.3675286195522216, - "learning_rate": 1.850626024440726e-05, - "loss": 0.2646, + "epoch": 0.16, + "grad_norm": 0.48274037404054887, + "learning_rate": 1.911794124410293e-05, + "loss": 0.2899, "step": 3498 }, { - "epoch": 0.2, - "grad_norm": 0.4881052772922558, - "learning_rate": 1.850528168285482e-05, - "loss": 0.4122, + "epoch": 0.16, + "grad_norm": 0.44682123197430085, + "learning_rate": 1.9117330132672633e-05, + "loss": 0.3191, "step": 3499 }, { - "epoch": 0.2, - "grad_norm": 0.5091923599798945, - "learning_rate": 1.8504302826764835e-05, - "loss": 0.3891, + "epoch": 0.16, + "grad_norm": 0.8358462328703415, + "learning_rate": 1.9116718819393434e-05, + "loss": 0.5083, "step": 3500 }, { - "epoch": 0.2, - "grad_norm": 0.3633056005786531, - "learning_rate": 1.8503323676171212e-05, - "loss": 0.2949, + "epoch": 0.16, + "grad_norm": 0.37676259324671696, + "learning_rate": 1.9116107304278867e-05, + "loss": 0.2359, "step": 3501 }, { - "epoch": 0.2, - "grad_norm": 0.5991201857527949, - "learning_rate": 1.8502344231107855e-05, - "loss": 0.3974, + "epoch": 0.16, + "grad_norm": 0.5242522303620496, + "learning_rate": 1.911549558734247e-05, + "loss": 0.2884, "step": 3502 }, { - "epoch": 0.2, - "grad_norm": 0.40415730403043115, - "learning_rate": 1.8501364491608683e-05, - "loss": 0.3875, + "epoch": 0.16, + "grad_norm": 0.3322886814798018, + "learning_rate": 1.911488366859779e-05, + "loss": 0.2728, "step": 3503 }, { - "epoch": 0.2, - "grad_norm": 0.34569731298708684, - "learning_rate": 1.8500384457707625e-05, - "loss": 0.285, + "epoch": 0.16, + "grad_norm": 0.9102443678886043, + "learning_rate": 1.9114271548058365e-05, + "loss": 0.6597, "step": 3504 }, { - "epoch": 0.2, - "grad_norm": 0.2568026864658739, - "learning_rate": 1.8499404129438617e-05, - "loss": 0.1563, + "epoch": 0.16, + "grad_norm": 0.4137721630784232, + "learning_rate": 1.9113659225737757e-05, + "loss": 0.2154, "step": 3505 }, { - "epoch": 0.2, - "grad_norm": 1.1707986100418522, - "learning_rate": 1.8498423506835613e-05, - "loss": 0.7772, + "epoch": 0.16, + "grad_norm": 0.6215459450322264, + "learning_rate": 1.9113046701649517e-05, + "loss": 0.4272, "step": 3506 }, { - "epoch": 0.2, - "grad_norm": 0.38602275849489903, - "learning_rate": 1.8497442589932568e-05, - "loss": 0.2805, + "epoch": 0.16, + "grad_norm": 0.5743797803037992, + "learning_rate": 1.9112433975807204e-05, + "loss": 0.3924, "step": 3507 }, { - "epoch": 0.2, - "grad_norm": 0.4079578462025017, - "learning_rate": 1.8496461378763445e-05, - "loss": 0.2396, + "epoch": 0.16, + "grad_norm": 0.3846197950569047, + "learning_rate": 1.9111821048224387e-05, + "loss": 0.2312, "step": 3508 }, { - "epoch": 0.2, - "grad_norm": 0.5885481273942008, - "learning_rate": 1.8495479873362237e-05, - "loss": 0.4813, + "epoch": 0.16, + "grad_norm": 0.3409559485094247, + "learning_rate": 1.9111207918914633e-05, + "loss": 0.2005, "step": 3509 }, { - "epoch": 0.2, - "grad_norm": 0.28505728074008135, - "learning_rate": 1.8494498073762924e-05, - "loss": 0.2308, + "epoch": 0.16, + "grad_norm": 0.5229713941797228, + "learning_rate": 1.911059458789152e-05, + "loss": 0.4135, "step": 3510 }, { - "epoch": 0.2, - "grad_norm": 0.3259515702055285, - "learning_rate": 1.8493515979999508e-05, - "loss": 0.2147, + "epoch": 0.16, + "grad_norm": 0.36540609633968346, + "learning_rate": 1.9109981055168624e-05, + "loss": 0.2571, "step": 3511 }, { - "epoch": 0.2, - "grad_norm": 0.5081829044842984, - "learning_rate": 1.8492533592105998e-05, - "loss": 0.3786, + "epoch": 0.16, + "grad_norm": 0.8106590512115882, + "learning_rate": 1.9109367320759522e-05, + "loss": 0.5676, "step": 3512 }, { - "epoch": 0.2, - "grad_norm": 0.40211123448914077, - "learning_rate": 1.8491550910116415e-05, - "loss": 0.2937, + "epoch": 0.16, + "grad_norm": 1.3875298946431176, + "learning_rate": 1.910875338467781e-05, + "loss": 0.6127, "step": 3513 }, { - "epoch": 0.2, - "grad_norm": 0.5797803004312481, - "learning_rate": 1.8490567934064788e-05, - "loss": 0.3893, + "epoch": 0.16, + "grad_norm": 0.4258125040812, + "learning_rate": 1.910813924693708e-05, + "loss": 0.299, "step": 3514 }, { - "epoch": 0.2, - "grad_norm": 0.35474516854979343, - "learning_rate": 1.848958466398516e-05, - "loss": 0.311, + "epoch": 0.16, + "grad_norm": 0.45367298094079395, + "learning_rate": 1.9107524907550922e-05, + "loss": 0.2196, "step": 3515 }, { - "epoch": 0.2, - "grad_norm": 0.3743499556350666, - "learning_rate": 1.8488601099911582e-05, - "loss": 0.2879, + "epoch": 0.16, + "grad_norm": 0.42890909475885713, + "learning_rate": 1.910691036653294e-05, + "loss": 0.3248, "step": 3516 }, { - "epoch": 0.2, - "grad_norm": 0.34838342234863295, - "learning_rate": 1.8487617241878114e-05, - "loss": 0.2501, + "epoch": 0.16, + "grad_norm": 0.455499110905812, + "learning_rate": 1.9106295623896744e-05, + "loss": 0.3159, "step": 3517 }, { - "epoch": 0.2, - "grad_norm": 0.9330455844434704, - "learning_rate": 1.8486633089918823e-05, - "loss": 0.5301, + "epoch": 0.16, + "grad_norm": 1.2991342100155205, + "learning_rate": 1.9105680679655938e-05, + "loss": 0.4528, "step": 3518 }, { - "epoch": 0.2, - "grad_norm": 0.3876398473412494, - "learning_rate": 1.848564864406779e-05, - "loss": 0.2736, + "epoch": 0.16, + "grad_norm": 0.4756550473255202, + "learning_rate": 1.9105065533824136e-05, + "loss": 0.3887, "step": 3519 }, { - "epoch": 0.2, - "grad_norm": 0.38224800897615363, - "learning_rate": 1.8484663904359112e-05, - "loss": 0.3232, + "epoch": 0.16, + "grad_norm": 0.7340965260393196, + "learning_rate": 1.9104450186414963e-05, + "loss": 0.4344, "step": 3520 }, { - "epoch": 0.2, - "grad_norm": 0.7342180011930438, - "learning_rate": 1.848367887082689e-05, - "loss": 0.4027, - "step": 3521 + "epoch": 0.16, + "grad_norm": 0.35557001322735815, + "learning_rate": 1.9103834637442035e-05, + "loss": 0.1699, + "step": 3521 }, { - "epoch": 0.2, - "grad_norm": 0.37253887582059153, - "learning_rate": 1.848269354350523e-05, - "loss": 0.2922, + "epoch": 0.16, + "grad_norm": 0.49549043709383356, + "learning_rate": 1.9103218886918983e-05, + "loss": 0.3834, "step": 3522 }, { - "epoch": 0.2, - "grad_norm": 0.4486634748559754, - "learning_rate": 1.848170792242826e-05, - "loss": 0.3188, + "epoch": 0.16, + "grad_norm": 0.8288347004829953, + "learning_rate": 1.9102602934859437e-05, + "loss": 0.3933, "step": 3523 }, { - "epoch": 0.2, - "grad_norm": 0.41124068257992336, - "learning_rate": 1.848072200763011e-05, - "loss": 0.3629, + "epoch": 0.16, + "grad_norm": 0.481243823133131, + "learning_rate": 1.9101986781277037e-05, + "loss": 0.2759, "step": 3524 }, { - "epoch": 0.2, - "grad_norm": 0.252569055610881, - "learning_rate": 1.8479735799144917e-05, - "loss": 0.1877, + "epoch": 0.16, + "grad_norm": 1.0482720217005597, + "learning_rate": 1.9101370426185418e-05, + "loss": 0.5761, "step": 3525 }, { - "epoch": 0.2, - "grad_norm": 0.6926210684337959, - "learning_rate": 1.847874929700684e-05, - "loss": 0.5067, + "epoch": 0.16, + "grad_norm": 0.5176275388076008, + "learning_rate": 1.9100753869598237e-05, + "loss": 0.3192, "step": 3526 }, { - "epoch": 0.2, - "grad_norm": 0.5730030339821481, - "learning_rate": 1.8477762501250046e-05, - "loss": 0.3562, + "epoch": 0.16, + "grad_norm": 0.34008250491417535, + "learning_rate": 1.9100137111529135e-05, + "loss": 0.2906, "step": 3527 }, { - "epoch": 0.2, - "grad_norm": 0.36594434314465024, - "learning_rate": 1.847677541190869e-05, - "loss": 0.2255, + "epoch": 0.16, + "grad_norm": 0.6799676695944602, + "learning_rate": 1.9099520151991765e-05, + "loss": 0.4285, "step": 3528 }, { - "epoch": 0.2, - "grad_norm": 0.5439482028879972, - "learning_rate": 1.8475788029016974e-05, - "loss": 0.3023, + "epoch": 0.16, + "grad_norm": 0.47546095129369154, + "learning_rate": 1.909890299099979e-05, + "loss": 0.3255, "step": 3529 }, { - "epoch": 0.2, - "grad_norm": 0.5147312308129324, - "learning_rate": 1.847480035260908e-05, - "loss": 0.3325, + "epoch": 0.16, + "grad_norm": 0.3252836851202552, + "learning_rate": 1.9098285628566872e-05, + "loss": 0.2619, "step": 3530 }, { - "epoch": 0.2, - "grad_norm": 0.396855562434019, - "learning_rate": 1.8473812382719215e-05, - "loss": 0.2331, + "epoch": 0.16, + "grad_norm": 0.5159822451330393, + "learning_rate": 1.909766806470668e-05, + "loss": 0.2857, "step": 3531 }, { - "epoch": 0.2, - "grad_norm": 0.3650937303546053, - "learning_rate": 1.8472824119381592e-05, - "loss": 0.3324, + "epoch": 0.16, + "grad_norm": 0.426437760138583, + "learning_rate": 1.9097050299432886e-05, + "loss": 0.3026, "step": 3532 }, { - "epoch": 0.2, - "grad_norm": 0.8069519781469283, - "learning_rate": 1.8471835562630435e-05, - "loss": 0.5369, + "epoch": 0.16, + "grad_norm": 0.4684530024818983, + "learning_rate": 1.9096432332759167e-05, + "loss": 0.2929, "step": 3533 }, { - "epoch": 0.2, - "grad_norm": 0.3224016264255935, - "learning_rate": 1.8470846712499977e-05, - "loss": 0.2151, + "epoch": 0.16, + "grad_norm": 0.4249940951000583, + "learning_rate": 1.90958141646992e-05, + "loss": 0.3507, "step": 3534 }, { - "epoch": 0.2, - "grad_norm": 0.49887651442034286, - "learning_rate": 1.846985756902446e-05, - "loss": 0.4021, + "epoch": 0.16, + "grad_norm": 0.4155006261853128, + "learning_rate": 1.9095195795266677e-05, + "loss": 0.2711, "step": 3535 }, { - "epoch": 0.2, - "grad_norm": 0.372999385227418, - "learning_rate": 1.8468868132238138e-05, - "loss": 0.3151, + "epoch": 0.16, + "grad_norm": 1.362589731001263, + "learning_rate": 1.9094577224475283e-05, + "loss": 0.8216, "step": 3536 }, { - "epoch": 0.2, - "grad_norm": 0.4155513944432335, - "learning_rate": 1.8467878402175278e-05, - "loss": 0.3341, + "epoch": 0.16, + "grad_norm": 0.3732635666647341, + "learning_rate": 1.9093958452338717e-05, + "loss": 0.1632, "step": 3537 }, { - "epoch": 0.2, - "grad_norm": 0.38115414788893204, - "learning_rate": 1.8466888378870155e-05, - "loss": 0.2816, + "epoch": 0.16, + "grad_norm": 0.4733395209874384, + "learning_rate": 1.9093339478870675e-05, + "loss": 0.3164, "step": 3538 }, { - "epoch": 0.2, - "grad_norm": 0.3971013745780681, - "learning_rate": 1.8465898062357048e-05, - "loss": 0.3233, + "epoch": 0.16, + "grad_norm": 0.40573139968984, + "learning_rate": 1.909272030408486e-05, + "loss": 0.3534, "step": 3539 }, { - "epoch": 0.2, - "grad_norm": 0.405786263334606, - "learning_rate": 1.846490745267026e-05, - "loss": 0.3123, + "epoch": 0.16, + "grad_norm": 0.7161759689409551, + "learning_rate": 1.9092100927994982e-05, + "loss": 0.483, "step": 3540 }, { - "epoch": 0.2, - "grad_norm": 0.524142882921503, - "learning_rate": 1.8463916549844084e-05, - "loss": 0.2972, + "epoch": 0.16, + "grad_norm": 0.48984749549176204, + "learning_rate": 1.9091481350614753e-05, + "loss": 0.1812, "step": 3541 }, { - "epoch": 0.2, - "grad_norm": 0.43131354308913267, - "learning_rate": 1.846292535391285e-05, - "loss": 0.3471, + "epoch": 0.16, + "grad_norm": 0.34212256922895506, + "learning_rate": 1.9090861571957887e-05, + "loss": 0.2624, "step": 3542 }, { - "epoch": 0.2, - "grad_norm": 0.3522374006752674, - "learning_rate": 1.846193386491087e-05, - "loss": 0.316, + "epoch": 0.16, + "grad_norm": 0.43209077874933327, + "learning_rate": 1.909024159203811e-05, + "loss": 0.3102, "step": 3543 }, { - "epoch": 0.2, - "grad_norm": 0.4367897212599001, - "learning_rate": 1.846094208287248e-05, - "loss": 0.3275, + "epoch": 0.16, + "grad_norm": 0.4261400814812236, + "learning_rate": 1.908962141086915e-05, + "loss": 0.2099, "step": 3544 }, { - "epoch": 0.2, - "grad_norm": 0.4318912036953102, - "learning_rate": 1.845995000783204e-05, - "loss": 0.2809, + "epoch": 0.16, + "grad_norm": 0.4177690076918309, + "learning_rate": 1.9089001028464724e-05, + "loss": 0.368, "step": 3545 }, { - "epoch": 0.2, - "grad_norm": 0.37576132450278826, - "learning_rate": 1.8458957639823887e-05, - "loss": 0.271, + "epoch": 0.16, + "grad_norm": 0.6405738950003872, + "learning_rate": 1.908838044483858e-05, + "loss": 0.4079, "step": 3546 }, { - "epoch": 0.2, - "grad_norm": 0.44073497833225533, - "learning_rate": 1.8457964978882397e-05, - "loss": 0.2809, + "epoch": 0.16, + "grad_norm": 0.3800116210254458, + "learning_rate": 1.9087759660004452e-05, + "loss": 0.2426, "step": 3547 }, { - "epoch": 0.2, - "grad_norm": 0.4402566585814481, - "learning_rate": 1.8456972025041943e-05, - "loss": 0.411, + "epoch": 0.16, + "grad_norm": 0.4106437041733624, + "learning_rate": 1.9087138673976086e-05, + "loss": 0.2787, "step": 3548 }, { - "epoch": 0.2, - "grad_norm": 0.5817384512608036, - "learning_rate": 1.845597877833691e-05, - "loss": 0.3939, + "epoch": 0.16, + "grad_norm": 0.5493707968947954, + "learning_rate": 1.9086517486767223e-05, + "loss": 0.3256, "step": 3549 }, { - "epoch": 0.2, - "grad_norm": 0.347563565667777, - "learning_rate": 1.84549852388017e-05, - "loss": 0.2921, + "epoch": 0.16, + "grad_norm": 0.35510871794918764, + "learning_rate": 1.9085896098391624e-05, + "loss": 0.2515, "step": 3550 }, { - "epoch": 0.2, - "grad_norm": 0.3335895526495127, - "learning_rate": 1.845399140647071e-05, - "loss": 0.2274, + "epoch": 0.16, + "grad_norm": 0.6256709964613038, + "learning_rate": 1.9085274508863043e-05, + "loss": 0.406, "step": 3551 }, { - "epoch": 0.2, - "grad_norm": 0.7074274468152657, - "learning_rate": 1.8452997281378364e-05, - "loss": 0.3816, + "epoch": 0.16, + "grad_norm": 0.776056461390364, + "learning_rate": 1.9084652718195237e-05, + "loss": 0.538, "step": 3552 }, { - "epoch": 0.2, - "grad_norm": 0.4984396584595269, - "learning_rate": 1.8452002863559086e-05, - "loss": 0.3568, + "epoch": 0.16, + "grad_norm": 0.6968350263339173, + "learning_rate": 1.908403072640198e-05, + "loss": 0.3582, "step": 3553 }, { - "epoch": 0.2, - "grad_norm": 0.6284443435298703, - "learning_rate": 1.845100815304731e-05, - "loss": 0.4117, + "epoch": 0.16, + "grad_norm": 0.37635818534348725, + "learning_rate": 1.9083408533497037e-05, + "loss": 0.2567, "step": 3554 }, { - "epoch": 0.2, - "grad_norm": 0.3490991839784079, - "learning_rate": 1.845001314987749e-05, - "loss": 0.28, + "epoch": 0.16, + "grad_norm": 0.3236260358672682, + "learning_rate": 1.908278613949418e-05, + "loss": 0.2421, "step": 3555 }, { - "epoch": 0.2, - "grad_norm": 0.4250866389209264, - "learning_rate": 1.8449017854084072e-05, - "loss": 0.3298, + "epoch": 0.16, + "grad_norm": 0.6384570131214236, + "learning_rate": 1.9082163544407198e-05, + "loss": 0.3756, "step": 3556 }, { - "epoch": 0.2, - "grad_norm": 0.3043618837957513, - "learning_rate": 1.844802226570153e-05, - "loss": 0.1234, + "epoch": 0.16, + "grad_norm": 0.40641684592730404, + "learning_rate": 1.9081540748249864e-05, + "loss": 0.311, "step": 3557 }, { - "epoch": 0.2, - "grad_norm": 0.4379559170905638, - "learning_rate": 1.8447026384764343e-05, - "loss": 0.3207, + "epoch": 0.16, + "grad_norm": 0.4862360695997552, + "learning_rate": 1.908091775103597e-05, + "loss": 0.3496, "step": 3558 }, { - "epoch": 0.2, - "grad_norm": 0.3965722955626275, - "learning_rate": 1.8446030211306993e-05, - "loss": 0.3391, + "epoch": 0.16, + "grad_norm": 0.8602029369380233, + "learning_rate": 1.9080294552779313e-05, + "loss": 0.5222, "step": 3559 }, { - "epoch": 0.2, - "grad_norm": 1.024955890124149, - "learning_rate": 1.844503374536398e-05, - "loss": 0.427, + "epoch": 0.16, + "grad_norm": 0.3788894269648852, + "learning_rate": 1.9079671153493687e-05, + "loss": 0.257, "step": 3560 }, { - "epoch": 0.2, - "grad_norm": 0.41957172094011896, - "learning_rate": 1.8444036986969814e-05, - "loss": 0.336, + "epoch": 0.16, + "grad_norm": 0.43110993990624596, + "learning_rate": 1.907904755319289e-05, + "loss": 0.2675, "step": 3561 }, { - "epoch": 0.2, - "grad_norm": 0.5542877434696505, - "learning_rate": 1.8443039936159007e-05, - "loss": 0.3461, + "epoch": 0.16, + "grad_norm": 0.4268204931062618, + "learning_rate": 1.9078423751890734e-05, + "loss": 0.3406, "step": 3562 }, { - "epoch": 0.2, - "grad_norm": 0.2729193047980405, - "learning_rate": 1.8442042592966095e-05, - "loss": 0.2327, + "epoch": 0.16, + "grad_norm": 0.41412028995200467, + "learning_rate": 1.9077799749601024e-05, + "loss": 0.2681, "step": 3563 }, { - "epoch": 0.2, - "grad_norm": 0.3549744193912546, - "learning_rate": 1.8441044957425608e-05, - "loss": 0.1783, + "epoch": 0.16, + "grad_norm": 1.6606216242409337, + "learning_rate": 1.9077175546337575e-05, + "loss": 0.7719, "step": 3564 }, { - "epoch": 0.2, - "grad_norm": 0.5966312774507957, - "learning_rate": 1.8440047029572094e-05, - "loss": 0.41, + "epoch": 0.16, + "grad_norm": 0.7436470464305392, + "learning_rate": 1.907655114211421e-05, + "loss": 0.5102, "step": 3565 }, { - "epoch": 0.2, - "grad_norm": 1.0779161799481367, - "learning_rate": 1.843904880944012e-05, - "loss": 0.5309, + "epoch": 0.16, + "grad_norm": 0.4041107815644691, + "learning_rate": 1.907592653694475e-05, + "loss": 0.292, "step": 3566 }, { - "epoch": 0.2, - "grad_norm": 0.3425507945907625, - "learning_rate": 1.843805029706425e-05, - "loss": 0.2269, + "epoch": 0.16, + "grad_norm": 0.2492568307381671, + "learning_rate": 1.907530173084302e-05, + "loss": 0.1567, "step": 3567 }, { - "epoch": 0.2, - "grad_norm": 0.43024803622830704, - "learning_rate": 1.8437051492479053e-05, - "loss": 0.3461, + "epoch": 0.16, + "grad_norm": 0.6179193626268321, + "learning_rate": 1.9074676723822864e-05, + "loss": 0.408, "step": 3568 }, { - "epoch": 0.21, - "grad_norm": 0.32182440028828346, - "learning_rate": 1.843605239571913e-05, - "loss": 0.1903, + "epoch": 0.16, + "grad_norm": 0.504672466144843, + "learning_rate": 1.907405151589811e-05, + "loss": 0.3128, "step": 3569 }, { - "epoch": 0.21, - "grad_norm": 0.43595402051191434, - "learning_rate": 1.8435053006819073e-05, - "loss": 0.1819, + "epoch": 0.16, + "grad_norm": 0.46512781059897446, + "learning_rate": 1.90734261070826e-05, + "loss": 0.2629, "step": 3570 }, { - "epoch": 0.21, - "grad_norm": 0.4289935360872228, - "learning_rate": 1.8434053325813495e-05, - "loss": 0.3026, + "epoch": 0.16, + "grad_norm": 0.7054780195795514, + "learning_rate": 1.907280049739018e-05, + "loss": 0.3951, "step": 3571 }, { - "epoch": 0.21, - "grad_norm": 1.259600352356555, - "learning_rate": 1.8433053352737014e-05, - "loss": 0.5352, + "epoch": 0.16, + "grad_norm": 0.47315208394519115, + "learning_rate": 1.9072174686834703e-05, + "loss": 0.3346, "step": 3572 }, { - "epoch": 0.21, - "grad_norm": 0.46454768580118233, - "learning_rate": 1.8432053087624258e-05, - "loss": 0.2123, + "epoch": 0.16, + "grad_norm": 0.7189326714559875, + "learning_rate": 1.9071548675430018e-05, + "loss": 0.4339, "step": 3573 }, { - "epoch": 0.21, - "grad_norm": 0.3683549795511345, - "learning_rate": 1.8431052530509866e-05, - "loss": 0.2625, + "epoch": 0.16, + "grad_norm": 0.3447179161207031, + "learning_rate": 1.9070922463189993e-05, + "loss": 0.2593, "step": 3574 }, { - "epoch": 0.21, - "grad_norm": 0.3434082020790198, - "learning_rate": 1.8430051681428486e-05, - "loss": 0.2504, + "epoch": 0.16, + "grad_norm": 0.38130961293646937, + "learning_rate": 1.9070296050128486e-05, + "loss": 0.2497, "step": 3575 }, { - "epoch": 0.21, - "grad_norm": 0.6314261221401793, - "learning_rate": 1.842905054041478e-05, - "loss": 0.3683, + "epoch": 0.16, + "grad_norm": 0.8425483811393244, + "learning_rate": 1.906966943625937e-05, + "loss": 0.4708, "step": 3576 }, { - "epoch": 0.21, - "grad_norm": 0.7178093309482125, - "learning_rate": 1.8428049107503417e-05, - "loss": 0.3267, + "epoch": 0.16, + "grad_norm": 0.9982821552480875, + "learning_rate": 1.906904262159651e-05, + "loss": 0.5133, "step": 3577 }, { - "epoch": 0.21, - "grad_norm": 1.1845693433728908, - "learning_rate": 1.8427047382729073e-05, - "loss": 0.606, + "epoch": 0.16, + "grad_norm": 0.35449776971243824, + "learning_rate": 1.906841560615379e-05, + "loss": 0.3014, "step": 3578 }, { - "epoch": 0.21, - "grad_norm": 0.3857144192125777, - "learning_rate": 1.842604536612644e-05, - "loss": 0.3092, + "epoch": 0.16, + "grad_norm": 0.7820312405111502, + "learning_rate": 1.9067788389945083e-05, + "loss": 0.494, "step": 3579 }, { - "epoch": 0.21, - "grad_norm": 0.422890021608481, - "learning_rate": 1.842504305773022e-05, - "loss": 0.2626, + "epoch": 0.16, + "grad_norm": 0.3380813604525425, + "learning_rate": 1.9067160972984283e-05, + "loss": 0.0794, "step": 3580 }, { - "epoch": 0.21, - "grad_norm": 0.319692052216907, - "learning_rate": 1.8424040457575124e-05, - "loss": 0.1854, + "epoch": 0.16, + "grad_norm": 0.3862253788321075, + "learning_rate": 1.906653335528528e-05, + "loss": 0.2648, "step": 3581 }, { - "epoch": 0.21, - "grad_norm": 0.609801137363293, - "learning_rate": 1.8423037565695864e-05, - "loss": 0.3804, + "epoch": 0.16, + "grad_norm": 0.4780269964938683, + "learning_rate": 1.9065905536861967e-05, + "loss": 0.3483, "step": 3582 }, { - "epoch": 0.21, - "grad_norm": 0.3952230768238028, - "learning_rate": 1.842203438212718e-05, - "loss": 0.2583, + "epoch": 0.16, + "grad_norm": 0.5801806308924962, + "learning_rate": 1.9065277517728244e-05, + "loss": 0.3385, "step": 3583 }, { - "epoch": 0.21, - "grad_norm": 0.9995316109812836, - "learning_rate": 1.8421030906903805e-05, - "loss": 0.5573, + "epoch": 0.16, + "grad_norm": 0.4345445178658295, + "learning_rate": 1.9064649297898016e-05, + "loss": 0.3254, "step": 3584 }, { - "epoch": 0.21, - "grad_norm": 0.8080682129475155, - "learning_rate": 1.8420027140060493e-05, - "loss": 0.464, + "epoch": 0.16, + "grad_norm": 0.7853055884869111, + "learning_rate": 1.9064020877385184e-05, + "loss": 0.4993, "step": 3585 }, { - "epoch": 0.21, - "grad_norm": 0.38502970294226097, - "learning_rate": 1.8419023081632e-05, - "loss": 0.2237, + "epoch": 0.16, + "grad_norm": 0.3297062011323884, + "learning_rate": 1.9063392256203668e-05, + "loss": 0.2008, "step": 3586 }, { - "epoch": 0.21, - "grad_norm": 0.3003903655050074, - "learning_rate": 1.8418018731653106e-05, - "loss": 0.2376, + "epoch": 0.16, + "grad_norm": 0.46474404208477543, + "learning_rate": 1.9062763434367384e-05, + "loss": 0.3087, "step": 3587 }, { - "epoch": 0.21, - "grad_norm": 0.8598542283516147, - "learning_rate": 1.841701409015858e-05, - "loss": 0.4417, + "epoch": 0.16, + "grad_norm": 0.5215583393876929, + "learning_rate": 1.906213441189025e-05, + "loss": 0.3033, "step": 3588 }, { - "epoch": 0.21, - "grad_norm": 0.4722555501097505, - "learning_rate": 1.841600915718322e-05, - "loss": 0.3618, + "epoch": 0.16, + "grad_norm": 0.38592173162921733, + "learning_rate": 1.9061505188786196e-05, + "loss": 0.263, "step": 3589 }, { - "epoch": 0.21, - "grad_norm": 0.8780361475667137, - "learning_rate": 1.8415003932761823e-05, - "loss": 0.5438, + "epoch": 0.16, + "grad_norm": 0.42886632720046736, + "learning_rate": 1.9060875765069148e-05, + "loss": 0.3275, "step": 3590 }, { - "epoch": 0.21, - "grad_norm": 0.36609700316137517, - "learning_rate": 1.8413998416929205e-05, - "loss": 0.3317, + "epoch": 0.16, + "grad_norm": 1.000636545965187, + "learning_rate": 1.9060246140753047e-05, + "loss": 0.4981, "step": 3591 }, { - "epoch": 0.21, - "grad_norm": 0.4232364971031735, - "learning_rate": 1.8412992609720183e-05, - "loss": 0.3091, + "epoch": 0.17, + "grad_norm": 0.42929827215701666, + "learning_rate": 1.9059616315851827e-05, + "loss": 0.2574, "step": 3592 }, { - "epoch": 0.21, - "grad_norm": 0.2582320073159369, - "learning_rate": 1.8411986511169585e-05, - "loss": 0.0721, + "epoch": 0.17, + "grad_norm": 0.3188737702757787, + "learning_rate": 1.9058986290379432e-05, + "loss": 0.1808, "step": 3593 }, { - "epoch": 0.21, - "grad_norm": 0.615145234861627, - "learning_rate": 1.8410980121312258e-05, - "loss": 0.41, + "epoch": 0.17, + "grad_norm": 0.4732152715317135, + "learning_rate": 1.9058356064349818e-05, + "loss": 0.3413, "step": 3594 }, { - "epoch": 0.21, - "grad_norm": 0.3981913852643038, - "learning_rate": 1.8409973440183054e-05, - "loss": 0.3132, + "epoch": 0.17, + "grad_norm": 0.9192125721587081, + "learning_rate": 1.9057725637776924e-05, + "loss": 0.5591, "step": 3595 }, { - "epoch": 0.21, - "grad_norm": 1.2713381305155262, - "learning_rate": 1.840896646781683e-05, - "loss": 0.5107, + "epoch": 0.17, + "grad_norm": 0.4184446728067903, + "learning_rate": 1.9057095010674716e-05, + "loss": 0.271, "step": 3596 }, { - "epoch": 0.21, - "grad_norm": 0.5843553386069017, - "learning_rate": 1.8407959204248455e-05, - "loss": 0.3627, + "epoch": 0.17, + "grad_norm": 0.4888435262433435, + "learning_rate": 1.9056464183057157e-05, + "loss": 0.4041, "step": 3597 }, { - "epoch": 0.21, - "grad_norm": 0.46489119063537054, - "learning_rate": 1.8406951649512817e-05, - "loss": 0.3248, + "epoch": 0.17, + "grad_norm": 0.5630761644754007, + "learning_rate": 1.9055833154938208e-05, + "loss": 0.365, "step": 3598 }, { - "epoch": 0.21, - "grad_norm": 0.47103184457362807, - "learning_rate": 1.8405943803644803e-05, - "loss": 0.3158, + "epoch": 0.17, + "grad_norm": 0.24796852044454393, + "learning_rate": 1.9055201926331843e-05, + "loss": 0.159, "step": 3599 }, { - "epoch": 0.21, - "grad_norm": 0.7760715327257453, - "learning_rate": 1.840493566667932e-05, - "loss": 0.3387, + "epoch": 0.17, + "grad_norm": 1.4934362222001998, + "learning_rate": 1.9054570497252033e-05, + "loss": 0.7869, "step": 3600 }, { - "epoch": 0.21, - "grad_norm": 0.3663667134586227, - "learning_rate": 1.8403927238651274e-05, - "loss": 0.2674, + "epoch": 0.17, + "grad_norm": 0.6271211164870093, + "learning_rate": 1.9053938867712756e-05, + "loss": 0.4057, "step": 3601 }, { - "epoch": 0.21, - "grad_norm": 0.5303169306539498, - "learning_rate": 1.8402918519595592e-05, - "loss": 0.3238, + "epoch": 0.17, + "grad_norm": 0.33537383736379334, + "learning_rate": 1.9053307037728005e-05, + "loss": 0.2546, "step": 3602 }, { - "epoch": 0.21, - "grad_norm": 0.47049291683057637, - "learning_rate": 1.8401909509547196e-05, - "loss": 0.2613, + "epoch": 0.17, + "grad_norm": 1.1058377418057972, + "learning_rate": 1.9052675007311757e-05, + "loss": 0.594, "step": 3603 }, { - "epoch": 0.21, - "grad_norm": 0.4636995547912467, - "learning_rate": 1.8400900208541045e-05, - "loss": 0.3415, + "epoch": 0.17, + "grad_norm": 0.46280358685002165, + "learning_rate": 1.905204277647801e-05, + "loss": 0.2907, "step": 3604 }, { - "epoch": 0.21, - "grad_norm": 0.610816875773527, - "learning_rate": 1.8399890616612073e-05, - "loss": 0.4726, + "epoch": 0.17, + "grad_norm": 0.37174055584584664, + "learning_rate": 1.9051410345240762e-05, + "loss": 0.2769, "step": 3605 }, { - "epoch": 0.21, - "grad_norm": 0.3932357164783332, - "learning_rate": 1.8398880733795253e-05, - "loss": 0.2688, + "epoch": 0.17, + "grad_norm": 0.3267902390276805, + "learning_rate": 1.905077771361401e-05, + "loss": 0.2267, "step": 3606 }, { - "epoch": 0.21, - "grad_norm": 0.33115183414057575, - "learning_rate": 1.8397870560125554e-05, - "loss": 0.2524, + "epoch": 0.17, + "grad_norm": 0.7727724927058522, + "learning_rate": 1.9050144881611766e-05, + "loss": 0.492, "step": 3607 }, { - "epoch": 0.21, - "grad_norm": 0.5466588959281407, - "learning_rate": 1.839686009563796e-05, - "loss": 0.368, + "epoch": 0.17, + "grad_norm": 0.518280909747992, + "learning_rate": 1.9049511849248038e-05, + "loss": 0.3186, "step": 3608 }, { - "epoch": 0.21, - "grad_norm": 0.3873845994650159, - "learning_rate": 1.839584934036746e-05, - "loss": 0.0684, + "epoch": 0.17, + "grad_norm": 0.40515053873025125, + "learning_rate": 1.9048878616536837e-05, + "loss": 0.2929, "step": 3609 }, { - "epoch": 0.21, - "grad_norm": 0.5054505521879232, - "learning_rate": 1.8394838294349058e-05, - "loss": 0.3073, + "epoch": 0.17, + "grad_norm": 0.4648485607367147, + "learning_rate": 1.9048245183492183e-05, + "loss": 0.3139, "step": 3610 }, { - "epoch": 0.21, - "grad_norm": 0.595052581816832, - "learning_rate": 1.839382695761777e-05, - "loss": 0.3465, + "epoch": 0.17, + "grad_norm": 0.4494859829406393, + "learning_rate": 1.9047611550128103e-05, + "loss": 0.3202, "step": 3611 }, { - "epoch": 0.21, - "grad_norm": 1.1129720778207792, - "learning_rate": 1.839281533020861e-05, - "loss": 0.4943, + "epoch": 0.17, + "grad_norm": 0.5285339484543778, + "learning_rate": 1.9046977716458627e-05, + "loss": 0.2604, "step": 3612 }, { - "epoch": 0.21, - "grad_norm": 0.33785668181825734, - "learning_rate": 1.839180341215662e-05, - "loss": 0.1835, + "epoch": 0.17, + "grad_norm": 0.551302806007306, + "learning_rate": 1.9046343682497782e-05, + "loss": 0.4146, "step": 3613 }, { - "epoch": 0.21, - "grad_norm": 1.3938903417041624, - "learning_rate": 1.8390791203496842e-05, - "loss": 0.8968, + "epoch": 0.17, + "grad_norm": 0.36153224534077544, + "learning_rate": 1.9045709448259605e-05, + "loss": 0.265, "step": 3614 }, { - "epoch": 0.21, - "grad_norm": 0.3179001508479327, - "learning_rate": 1.838977870426432e-05, - "loss": 0.2777, + "epoch": 0.17, + "grad_norm": 0.3812848254682872, + "learning_rate": 1.904507501375814e-05, + "loss": 0.2146, "step": 3615 }, { - "epoch": 0.21, - "grad_norm": 0.38387065537886916, - "learning_rate": 1.8388765914494124e-05, - "loss": 0.1989, + "epoch": 0.17, + "grad_norm": 1.2345076073027008, + "learning_rate": 1.9044440379007433e-05, + "loss": 0.6762, "step": 3616 }, { - "epoch": 0.21, - "grad_norm": 0.9955530352807715, - "learning_rate": 1.8387752834221326e-05, - "loss": 0.4931, + "epoch": 0.17, + "grad_norm": 0.31548109578694955, + "learning_rate": 1.9043805544021533e-05, + "loss": 0.2256, "step": 3617 }, { - "epoch": 0.21, - "grad_norm": 0.45334877775759963, - "learning_rate": 1.8386739463481004e-05, - "loss": 0.2833, + "epoch": 0.17, + "grad_norm": 0.4729073558754234, + "learning_rate": 1.9043170508814493e-05, + "loss": 0.351, "step": 3618 }, { - "epoch": 0.21, - "grad_norm": 0.4419384557222577, - "learning_rate": 1.838572580230826e-05, - "loss": 0.2239, + "epoch": 0.17, + "grad_norm": 0.6990092133802741, + "learning_rate": 1.9042535273400377e-05, + "loss": 0.4435, "step": 3619 }, { - "epoch": 0.21, - "grad_norm": 1.2042963328857403, - "learning_rate": 1.838471185073819e-05, - "loss": 0.7506, + "epoch": 0.17, + "grad_norm": 0.4181696480935202, + "learning_rate": 1.904189983779324e-05, + "loss": 0.2916, "step": 3620 }, { - "epoch": 0.21, - "grad_norm": 0.28333197613712585, - "learning_rate": 1.8383697608805907e-05, - "loss": 0.2277, + "epoch": 0.17, + "grad_norm": 0.5446886838396515, + "learning_rate": 1.9041264202007158e-05, + "loss": 0.3072, "step": 3621 }, { - "epoch": 0.21, - "grad_norm": 0.35663776056728297, - "learning_rate": 1.838268307654654e-05, - "loss": 0.2561, + "epoch": 0.17, + "grad_norm": 0.44333955207391407, + "learning_rate": 1.9040628366056203e-05, + "loss": 0.2916, "step": 3622 }, { - "epoch": 0.21, - "grad_norm": 0.38137141453365087, - "learning_rate": 1.8381668253995216e-05, - "loss": 0.3414, + "epoch": 0.17, + "grad_norm": 0.4053059276584727, + "learning_rate": 1.903999232995445e-05, + "loss": 0.305, "step": 3623 }, { - "epoch": 0.21, - "grad_norm": 1.3007390949271715, - "learning_rate": 1.8380653141187084e-05, - "loss": 0.5905, + "epoch": 0.17, + "grad_norm": 0.7694280759508905, + "learning_rate": 1.9039356093715975e-05, + "loss": 0.5049, "step": 3624 }, { - "epoch": 0.21, - "grad_norm": 0.3586717879521889, - "learning_rate": 1.837963773815729e-05, - "loss": 0.2828, + "epoch": 0.17, + "grad_norm": 0.4229212744386553, + "learning_rate": 1.903871965735487e-05, + "loss": 0.3236, "step": 3625 }, { - "epoch": 0.21, - "grad_norm": 0.4624648099073319, - "learning_rate": 1.8378622044941007e-05, - "loss": 0.2992, + "epoch": 0.17, + "grad_norm": 0.4928128434745314, + "learning_rate": 1.9038083020885224e-05, + "loss": 0.2895, "step": 3626 }, { - "epoch": 0.21, - "grad_norm": 0.28370620018557996, - "learning_rate": 1.8377606061573398e-05, - "loss": 0.1702, + "epoch": 0.17, + "grad_norm": 0.3265285220116368, + "learning_rate": 1.9037446184321133e-05, + "loss": 0.2367, "step": 3627 }, { - "epoch": 0.21, - "grad_norm": 0.4113606179544061, - "learning_rate": 1.8376589788089655e-05, - "loss": 0.2951, + "epoch": 0.17, + "grad_norm": 1.2869534349387113, + "learning_rate": 1.9036809147676693e-05, + "loss": 0.3916, "step": 3628 }, { - "epoch": 0.21, - "grad_norm": 0.771704015761994, - "learning_rate": 1.837557322452496e-05, - "loss": 0.4089, + "epoch": 0.17, + "grad_norm": 0.47749848407116285, + "learning_rate": 1.9036171910966005e-05, + "loss": 0.2833, "step": 3629 }, { - "epoch": 0.21, - "grad_norm": 0.4695330219200323, - "learning_rate": 1.8374556370914533e-05, - "loss": 0.3328, + "epoch": 0.17, + "grad_norm": 0.4088996013471531, + "learning_rate": 1.9035534474203183e-05, + "loss": 0.3243, "step": 3630 }, { - "epoch": 0.21, - "grad_norm": 0.3891106792486114, - "learning_rate": 1.8373539227293576e-05, - "loss": 0.2798, + "epoch": 0.17, + "grad_norm": 0.7890569422241258, + "learning_rate": 1.9034896837402334e-05, + "loss": 0.5587, "step": 3631 }, { - "epoch": 0.21, - "grad_norm": 0.9231594968954966, - "learning_rate": 1.8372521793697317e-05, - "loss": 0.5186, + "epoch": 0.17, + "grad_norm": 0.39812506377254775, + "learning_rate": 1.903425900057758e-05, + "loss": 0.2378, "step": 3632 }, { - "epoch": 0.21, - "grad_norm": 0.5334306794574006, - "learning_rate": 1.8371504070160985e-05, - "loss": 0.3294, + "epoch": 0.17, + "grad_norm": 0.3656773819609009, + "learning_rate": 1.9033620963743037e-05, + "loss": 0.2394, "step": 3633 }, { - "epoch": 0.21, - "grad_norm": 0.4208129570730506, - "learning_rate": 1.8370486056719828e-05, - "loss": 0.2981, + "epoch": 0.17, + "grad_norm": 2.643147033178939, + "learning_rate": 1.9032982726912833e-05, + "loss": 0.6897, "step": 3634 }, { - "epoch": 0.21, - "grad_norm": 0.3440651749124253, - "learning_rate": 1.8369467753409102e-05, - "loss": 0.2406, + "epoch": 0.17, + "grad_norm": 0.39306352171887404, + "learning_rate": 1.9032344290101098e-05, + "loss": 0.2279, "step": 3635 }, { - "epoch": 0.21, - "grad_norm": 1.0039452445703967, - "learning_rate": 1.8368449160264064e-05, - "loss": 0.5453, + "epoch": 0.17, + "grad_norm": 0.8084941610203455, + "learning_rate": 1.9031705653321967e-05, + "loss": 0.5142, "step": 3636 }, { - "epoch": 0.21, - "grad_norm": 0.44073239184364055, - "learning_rate": 1.836743027731999e-05, - "loss": 0.3189, + "epoch": 0.17, + "grad_norm": 0.4312529594105459, + "learning_rate": 1.9031066816589575e-05, + "loss": 0.3485, "step": 3637 }, { - "epoch": 0.21, - "grad_norm": 0.40024632916236896, - "learning_rate": 1.8366411104612168e-05, - "loss": 0.3527, + "epoch": 0.17, + "grad_norm": 2.599628694406278, + "learning_rate": 1.9030427779918072e-05, + "loss": 0.2689, "step": 3638 }, { - "epoch": 0.21, - "grad_norm": 0.4302314709355559, - "learning_rate": 1.8365391642175892e-05, - "loss": 0.3616, + "epoch": 0.17, + "grad_norm": 1.2895489892850096, + "learning_rate": 1.90297885433216e-05, + "loss": 0.1837, "step": 3639 }, { - "epoch": 0.21, - "grad_norm": 0.3902773433049301, - "learning_rate": 1.836437189004646e-05, - "loss": 0.2706, + "epoch": 0.17, + "grad_norm": 1.087871744668784, + "learning_rate": 1.902914910681431e-05, + "loss": 0.565, "step": 3640 }, { - "epoch": 0.21, - "grad_norm": 0.3034132980546468, - "learning_rate": 1.836335184825919e-05, - "loss": 0.2261, + "epoch": 0.17, + "grad_norm": 1.0998215245215566, + "learning_rate": 1.9028509470410363e-05, + "loss": 0.3347, "step": 3641 }, { - "epoch": 0.21, - "grad_norm": 0.4224483560026623, - "learning_rate": 1.8362331516849405e-05, - "loss": 0.2859, + "epoch": 0.17, + "grad_norm": 0.44266237041915735, + "learning_rate": 1.9027869634123918e-05, + "loss": 0.3116, "step": 3642 }, { - "epoch": 0.21, - "grad_norm": 0.3997927453552849, - "learning_rate": 1.8361310895852437e-05, - "loss": 0.2909, + "epoch": 0.17, + "grad_norm": 0.9624466259785366, + "learning_rate": 1.902722959796914e-05, + "loss": 0.6728, "step": 3643 }, { - "epoch": 0.21, - "grad_norm": 0.8073075120292302, - "learning_rate": 1.8360289985303637e-05, - "loss": 0.5745, + "epoch": 0.17, + "grad_norm": 2.1412342745639976, + "learning_rate": 1.90265893619602e-05, + "loss": 0.4162, "step": 3644 }, { - "epoch": 0.21, - "grad_norm": 0.5995548870672561, - "learning_rate": 1.8359268785238348e-05, - "loss": 0.4058, + "epoch": 0.17, + "grad_norm": 0.5096362669807545, + "learning_rate": 1.902594892611127e-05, + "loss": 0.2385, "step": 3645 }, { - "epoch": 0.21, - "grad_norm": 0.4135190586223736, - "learning_rate": 1.8358247295691946e-05, - "loss": 0.2841, + "epoch": 0.17, + "grad_norm": 2.0974737301163393, + "learning_rate": 1.902530829043653e-05, + "loss": 0.3994, "step": 3646 }, { - "epoch": 0.21, - "grad_norm": 0.2933792066466321, - "learning_rate": 1.8357225516699797e-05, - "loss": 0.2371, + "epoch": 0.17, + "grad_norm": 4.640916146038519, + "learning_rate": 1.9024667454950165e-05, + "loss": 0.3906, "step": 3647 }, { - "epoch": 0.21, - "grad_norm": 0.8306758061657067, - "learning_rate": 1.835620344829729e-05, - "loss": 0.3256, + "epoch": 0.17, + "grad_norm": 0.6593392264993503, + "learning_rate": 1.902402641966636e-05, + "loss": 0.3145, "step": 3648 }, { - "epoch": 0.21, - "grad_norm": 0.40272226138516737, - "learning_rate": 1.8355181090519814e-05, - "loss": 0.2961, + "epoch": 0.17, + "grad_norm": 0.733442156835296, + "learning_rate": 1.9023385184599308e-05, + "loss": 0.3593, "step": 3649 }, { - "epoch": 0.21, - "grad_norm": 0.7342811633805951, - "learning_rate": 1.8354158443402777e-05, - "loss": 0.4417, + "epoch": 0.17, + "grad_norm": 2.1011254509141817, + "learning_rate": 1.9022743749763205e-05, + "loss": 0.3722, "step": 3650 }, { - "epoch": 0.21, - "grad_norm": 0.5508644077439324, - "learning_rate": 1.835313550698159e-05, - "loss": 0.3978, + "epoch": 0.17, + "grad_norm": 5.13940781479072, + "learning_rate": 1.902210211517225e-05, + "loss": 0.2424, "step": 3651 }, { - "epoch": 0.21, - "grad_norm": 0.3936970414946991, - "learning_rate": 1.8352112281291683e-05, - "loss": 0.2251, + "epoch": 0.17, + "grad_norm": 0.9980933983271467, + "learning_rate": 1.902146028084065e-05, + "loss": 0.546, "step": 3652 }, { - "epoch": 0.21, - "grad_norm": 0.30868325393237483, - "learning_rate": 1.8351088766368487e-05, - "loss": 0.1808, + "epoch": 0.17, + "grad_norm": 0.6194413557929157, + "learning_rate": 1.9020818246782614e-05, + "loss": 0.3113, "step": 3653 }, { - "epoch": 0.21, - "grad_norm": 0.4717999561118713, - "learning_rate": 1.8350064962247443e-05, - "loss": 0.3688, + "epoch": 0.17, + "grad_norm": 0.6029196987120017, + "learning_rate": 1.902017601301236e-05, + "loss": 0.3381, "step": 3654 }, { - "epoch": 0.21, - "grad_norm": 0.3690124510455886, - "learning_rate": 1.8349040868964012e-05, - "loss": 0.1872, + "epoch": 0.17, + "grad_norm": 10.300518900494996, + "learning_rate": 1.90195335795441e-05, + "loss": 0.5999, "step": 3655 }, { - "epoch": 0.21, - "grad_norm": 0.516836148527607, - "learning_rate": 1.8348016486553653e-05, - "loss": 0.4212, + "epoch": 0.17, + "grad_norm": 1.8183861237381742, + "learning_rate": 1.901889094639206e-05, + "loss": 0.3865, "step": 3656 }, { - "epoch": 0.21, - "grad_norm": 0.4174942641382965, - "learning_rate": 1.8346991815051844e-05, - "loss": 0.4136, + "epoch": 0.17, + "grad_norm": 0.8968419988136566, + "learning_rate": 1.9018248113570467e-05, + "loss": 0.3532, "step": 3657 }, { - "epoch": 0.21, - "grad_norm": 0.34246783846879647, - "learning_rate": 1.8345966854494065e-05, - "loss": 0.2218, + "epoch": 0.17, + "grad_norm": 1.6323725567344978, + "learning_rate": 1.901760508109355e-05, + "loss": 0.2662, "step": 3658 }, { - "epoch": 0.21, - "grad_norm": 0.29464001657216854, - "learning_rate": 1.8344941604915813e-05, - "loss": 0.2341, + "epoch": 0.17, + "grad_norm": 0.6921417386206159, + "learning_rate": 1.9016961848975554e-05, + "loss": 0.3472, "step": 3659 }, { - "epoch": 0.21, - "grad_norm": 1.0963469580919971, - "learning_rate": 1.8343916066352593e-05, - "loss": 0.5888, + "epoch": 0.17, + "grad_norm": 0.8121433824324087, + "learning_rate": 1.901631841723071e-05, + "loss": 0.4131, "step": 3660 }, { - "epoch": 0.21, - "grad_norm": 0.39006202105175725, - "learning_rate": 1.834289023883992e-05, - "loss": 0.2244, + "epoch": 0.17, + "grad_norm": 1.0053874535722547, + "learning_rate": 1.9015674785873262e-05, + "loss": 0.3475, "step": 3661 }, { - "epoch": 0.21, - "grad_norm": 0.45351816412520496, - "learning_rate": 1.8341864122413313e-05, - "loss": 0.3389, + "epoch": 0.17, + "grad_norm": 1.5901095939584549, + "learning_rate": 1.901503095491747e-05, + "loss": 0.3862, "step": 3662 }, { - "epoch": 0.21, - "grad_norm": 1.013846492560218, - "learning_rate": 1.8340837717108312e-05, - "loss": 0.6265, + "epoch": 0.17, + "grad_norm": 1.2729058474439323, + "learning_rate": 1.9014386924377583e-05, + "loss": 0.3844, "step": 3663 }, { - "epoch": 0.21, - "grad_norm": 0.44933729211335915, - "learning_rate": 1.8339811022960458e-05, - "loss": 0.3057, + "epoch": 0.17, + "grad_norm": 0.7690873668403949, + "learning_rate": 1.901374269426785e-05, + "loss": 0.325, "step": 3664 }, { - "epoch": 0.21, - "grad_norm": 0.31780800408881466, - "learning_rate": 1.833878404000531e-05, - "loss": 0.1626, + "epoch": 0.17, + "grad_norm": 1.4446244583011774, + "learning_rate": 1.9013098264602546e-05, + "loss": 0.3943, "step": 3665 }, { - "epoch": 0.21, - "grad_norm": 0.3529354991105561, - "learning_rate": 1.8337756768278425e-05, - "loss": 0.2648, + "epoch": 0.17, + "grad_norm": 0.5810935057751412, + "learning_rate": 1.901245363539593e-05, + "loss": 0.2938, "step": 3666 }, { - "epoch": 0.21, - "grad_norm": 0.4312121030833976, - "learning_rate": 1.8336729207815386e-05, - "loss": 0.2812, + "epoch": 0.17, + "grad_norm": 1.9797060112978142, + "learning_rate": 1.901180880666228e-05, + "loss": 0.9236, "step": 3667 }, { - "epoch": 0.21, - "grad_norm": 0.7354723969211455, - "learning_rate": 1.833570135865177e-05, - "loss": 0.3566, + "epoch": 0.17, + "grad_norm": 0.6829773557541724, + "learning_rate": 1.901116377841587e-05, + "loss": 0.1498, "step": 3668 }, { - "epoch": 0.21, - "grad_norm": 1.0099131930682428, - "learning_rate": 1.8334673220823175e-05, - "loss": 0.6111, + "epoch": 0.17, + "grad_norm": 1.0244203633578972, + "learning_rate": 1.9010518550670976e-05, + "loss": 0.335, "step": 3669 }, { - "epoch": 0.21, - "grad_norm": 0.31425998270147343, - "learning_rate": 1.8333644794365205e-05, - "loss": 0.27, + "epoch": 0.17, + "grad_norm": 1.2171900333239536, + "learning_rate": 1.9009873123441888e-05, + "loss": 0.4846, "step": 3670 }, { - "epoch": 0.21, - "grad_norm": 0.3286391464636836, - "learning_rate": 1.8332616079313473e-05, - "loss": 0.2264, + "epoch": 0.17, + "grad_norm": 0.6489798659947853, + "learning_rate": 1.900922749674289e-05, + "loss": 0.1805, "step": 3671 }, { - "epoch": 0.21, - "grad_norm": 0.47239060853182213, - "learning_rate": 1.8331587075703607e-05, - "loss": 0.302, + "epoch": 0.17, + "grad_norm": 0.9612107847301576, + "learning_rate": 1.900858167058828e-05, + "loss": 0.3967, "step": 3672 }, { - "epoch": 0.21, - "grad_norm": 0.5911532765330798, - "learning_rate": 1.833055778357124e-05, - "loss": 0.3735, + "epoch": 0.17, + "grad_norm": 0.6960731937377788, + "learning_rate": 1.9007935644992357e-05, + "loss": 0.373, "step": 3673 }, { - "epoch": 0.21, - "grad_norm": 0.3816260176156264, - "learning_rate": 1.8329528202952013e-05, - "loss": 0.2915, + "epoch": 0.17, + "grad_norm": 0.5099680196369142, + "learning_rate": 1.9007289419969418e-05, + "loss": 0.2442, "step": 3674 }, { - "epoch": 0.21, - "grad_norm": 1.2602046875475506, - "learning_rate": 1.832849833388158e-05, - "loss": 0.8317, + "epoch": 0.17, + "grad_norm": 0.7771283130293353, + "learning_rate": 1.9006642995533774e-05, + "loss": 0.4185, "step": 3675 }, { - "epoch": 0.21, - "grad_norm": 0.5802384065602598, - "learning_rate": 1.8327468176395614e-05, - "loss": 0.378, + "epoch": 0.17, + "grad_norm": 0.8646403974277469, + "learning_rate": 1.900599637169973e-05, + "loss": 0.4995, "step": 3676 }, { - "epoch": 0.21, - "grad_norm": 0.3233690864917701, - "learning_rate": 1.8326437730529778e-05, - "loss": 0.2625, + "epoch": 0.17, + "grad_norm": 0.6510585997859475, + "learning_rate": 1.9005349548481612e-05, + "loss": 0.2154, "step": 3677 }, { - "epoch": 0.21, - "grad_norm": 0.30650130488543587, - "learning_rate": 1.8325406996319762e-05, - "loss": 0.2177, + "epoch": 0.17, + "grad_norm": 0.6310483037752359, + "learning_rate": 1.900470252589373e-05, + "loss": 0.2444, "step": 3678 }, { - "epoch": 0.21, - "grad_norm": 0.4361961196440014, - "learning_rate": 1.8324375973801262e-05, - "loss": 0.3167, + "epoch": 0.17, + "grad_norm": 1.167449208825793, + "learning_rate": 1.900405530395042e-05, + "loss": 0.7863, "step": 3679 }, { - "epoch": 0.21, - "grad_norm": 0.540582742403159, - "learning_rate": 1.8323344663009976e-05, - "loss": 0.4255, + "epoch": 0.17, + "grad_norm": 1.118863241997588, + "learning_rate": 1.9003407882665998e-05, + "loss": 0.5513, "step": 3680 }, { - "epoch": 0.21, - "grad_norm": 1.26530181020778, - "learning_rate": 1.8322313063981628e-05, - "loss": 0.5116, + "epoch": 0.17, + "grad_norm": 0.5547192715644842, + "learning_rate": 1.9002760262054803e-05, + "loss": 0.2657, "step": 3681 }, { - "epoch": 0.21, - "grad_norm": 0.3019632406735647, - "learning_rate": 1.8321281176751932e-05, - "loss": 0.2601, + "epoch": 0.17, + "grad_norm": 0.6968001941131859, + "learning_rate": 1.9002112442131176e-05, + "loss": 0.5383, "step": 3682 }, { - "epoch": 0.21, - "grad_norm": 0.5169685558732896, - "learning_rate": 1.8320249001356627e-05, - "loss": 0.3703, + "epoch": 0.17, + "grad_norm": 0.6571420225621227, + "learning_rate": 1.900146442290945e-05, + "loss": 0.1766, "step": 3683 }, { - "epoch": 0.21, - "grad_norm": 0.40742847831985723, - "learning_rate": 1.831921653783146e-05, - "loss": 0.1924, + "epoch": 0.17, + "grad_norm": 0.5429096860541407, + "learning_rate": 1.9000816204403985e-05, + "loss": 0.2802, "step": 3684 }, { - "epoch": 0.21, - "grad_norm": 0.5064412702318805, - "learning_rate": 1.8318183786212177e-05, - "loss": 0.3476, + "epoch": 0.17, + "grad_norm": 0.5502346063745076, + "learning_rate": 1.900016778662912e-05, + "loss": 0.3408, "step": 3685 }, { - "epoch": 0.21, - "grad_norm": 0.39189664074458824, - "learning_rate": 1.8317150746534553e-05, - "loss": 0.3303, + "epoch": 0.17, + "grad_norm": 0.8931061630021733, + "learning_rate": 1.8999519169599217e-05, + "loss": 0.4635, "step": 3686 }, { - "epoch": 0.21, - "grad_norm": 1.386780392774275, - "learning_rate": 1.8316117418834352e-05, - "loss": 0.8824, + "epoch": 0.17, + "grad_norm": 0.43906256365063023, + "learning_rate": 1.899887035332863e-05, + "loss": 0.323, "step": 3687 }, { - "epoch": 0.21, - "grad_norm": 0.3476158374729174, - "learning_rate": 1.831508380314736e-05, - "loss": 0.1713, + "epoch": 0.17, + "grad_norm": 2.000663032252841, + "learning_rate": 1.8998221337831726e-05, + "loss": 0.8754, "step": 3688 }, { - "epoch": 0.21, - "grad_norm": 0.5407610308721493, - "learning_rate": 1.831404989950938e-05, - "loss": 0.4049, + "epoch": 0.17, + "grad_norm": 0.440839046465537, + "learning_rate": 1.899757212312288e-05, + "loss": 0.2802, "step": 3689 }, { - "epoch": 0.21, - "grad_norm": 0.34332848216555034, - "learning_rate": 1.8313015707956205e-05, - "loss": 0.2995, + "epoch": 0.17, + "grad_norm": 0.4263753706872294, + "learning_rate": 1.8996922709216456e-05, + "loss": 0.1935, "step": 3690 }, { - "epoch": 0.21, - "grad_norm": 0.4909483803893434, - "learning_rate": 1.831198122852366e-05, - "loss": 0.2905, + "epoch": 0.17, + "grad_norm": 1.4325165553682218, + "learning_rate": 1.8996273096126834e-05, + "loss": 0.8307, "step": 3691 }, { - "epoch": 0.21, - "grad_norm": 0.34866275708306377, - "learning_rate": 1.8310946461247553e-05, - "loss": 0.2568, + "epoch": 0.17, + "grad_norm": 0.9189253696946754, + "learning_rate": 1.8995623283868396e-05, + "loss": 0.4023, "step": 3692 }, { - "epoch": 0.21, - "grad_norm": 0.5144534607864611, - "learning_rate": 1.8309911406163736e-05, - "loss": 0.3944, + "epoch": 0.17, + "grad_norm": 0.5549957230873905, + "learning_rate": 1.8994973272455527e-05, + "loss": 0.3206, "step": 3693 }, { - "epoch": 0.21, - "grad_norm": 0.4451881757530133, - "learning_rate": 1.830887606330804e-05, - "loss": 0.2252, + "epoch": 0.17, + "grad_norm": 0.46522126407074005, + "learning_rate": 1.8994323061902623e-05, + "loss": 0.3439, "step": 3694 }, { - "epoch": 0.21, - "grad_norm": 0.46182446045835396, - "learning_rate": 1.8307840432716323e-05, - "loss": 0.3298, + "epoch": 0.17, + "grad_norm": 0.4977730831598749, + "learning_rate": 1.8993672652224074e-05, + "loss": 0.2276, "step": 3695 }, { - "epoch": 0.21, - "grad_norm": 0.7256437850629539, - "learning_rate": 1.830680451442445e-05, - "loss": 0.4658, + "epoch": 0.17, + "grad_norm": 0.39686913329361523, + "learning_rate": 1.899302204343428e-05, + "loss": 0.266, "step": 3696 }, { - "epoch": 0.21, - "grad_norm": 0.31246873377998513, - "learning_rate": 1.8305768308468294e-05, - "loss": 0.1741, + "epoch": 0.17, + "grad_norm": 0.6202783335228893, + "learning_rate": 1.8992371235547647e-05, + "loss": 0.3211, "step": 3697 }, { - "epoch": 0.21, - "grad_norm": 0.3792276788301899, - "learning_rate": 1.830473181488374e-05, - "loss": 0.2723, + "epoch": 0.17, + "grad_norm": 1.1883678431577398, + "learning_rate": 1.899172022857858e-05, + "loss": 0.549, "step": 3698 }, { - "epoch": 0.21, - "grad_norm": 1.4475052756468612, - "learning_rate": 1.8303695033706675e-05, - "loss": 0.8954, + "epoch": 0.17, + "grad_norm": 0.4235299168201256, + "learning_rate": 1.8991069022541495e-05, + "loss": 0.3161, "step": 3699 }, { - "epoch": 0.21, - "grad_norm": 0.4832360785231713, - "learning_rate": 1.8302657964973014e-05, - "loss": 0.3362, + "epoch": 0.17, + "grad_norm": 0.4434834887863107, + "learning_rate": 1.8990417617450806e-05, + "loss": 0.277, "step": 3700 }, { - "epoch": 0.21, - "grad_norm": 0.38449186292934234, - "learning_rate": 1.830162060871866e-05, - "loss": 0.2838, + "epoch": 0.17, + "grad_norm": 0.5024827734215425, + "learning_rate": 1.8989766013320938e-05, + "loss": 0.2186, "step": 3701 }, { - "epoch": 0.21, - "grad_norm": 0.4419538620503951, - "learning_rate": 1.8300582964979544e-05, - "loss": 0.3564, + "epoch": 0.17, + "grad_norm": 0.4047567569233461, + "learning_rate": 1.8989114210166312e-05, + "loss": 0.2381, "step": 3702 }, { - "epoch": 0.21, - "grad_norm": 0.36778425880138244, - "learning_rate": 1.8299545033791596e-05, - "loss": 0.2228, + "epoch": 0.17, + "grad_norm": 1.616415806603672, + "learning_rate": 1.8988462208001363e-05, + "loss": 0.4627, "step": 3703 }, { - "epoch": 0.21, - "grad_norm": 0.30802703243687346, - "learning_rate": 1.829850681519076e-05, - "loss": 0.1985, + "epoch": 0.17, + "grad_norm": 0.6845391748682544, + "learning_rate": 1.8987810006840525e-05, + "loss": 0.3676, "step": 3704 }, { - "epoch": 0.21, - "grad_norm": 0.6655387140633007, - "learning_rate": 1.8297468309212994e-05, - "loss": 0.4198, + "epoch": 0.17, + "grad_norm": 0.4026300469286597, + "learning_rate": 1.8987157606698234e-05, + "loss": 0.2959, "step": 3705 }, { - "epoch": 0.21, - "grad_norm": 0.45041933695514075, - "learning_rate": 1.8296429515894255e-05, - "loss": 0.3048, + "epoch": 0.17, + "grad_norm": 1.3782966715533436, + "learning_rate": 1.8986505007588936e-05, + "loss": 0.6863, "step": 3706 }, { - "epoch": 0.21, - "grad_norm": 0.41420386663343106, - "learning_rate": 1.8295390435270516e-05, - "loss": 0.2809, + "epoch": 0.17, + "grad_norm": 0.42262451196313494, + "learning_rate": 1.898585220952708e-05, + "loss": 0.206, "step": 3707 }, { - "epoch": 0.21, - "grad_norm": 0.6850026949422853, - "learning_rate": 1.8294351067377762e-05, - "loss": 0.4826, + "epoch": 0.17, + "grad_norm": 0.5353271489538961, + "learning_rate": 1.8985199212527115e-05, + "loss": 0.3356, "step": 3708 }, { - "epoch": 0.21, - "grad_norm": 0.3475258356965287, - "learning_rate": 1.829331141225199e-05, - "loss": 0.2416, + "epoch": 0.17, + "grad_norm": 0.7424185279194552, + "learning_rate": 1.8984546016603503e-05, + "loss": 0.384, "step": 3709 }, { - "epoch": 0.21, - "grad_norm": 0.39414644413001365, - "learning_rate": 1.8292271469929202e-05, - "loss": 0.2489, + "epoch": 0.17, + "grad_norm": 1.4662784354012395, + "learning_rate": 1.89838926217707e-05, + "loss": 0.399, "step": 3710 }, { - "epoch": 0.21, - "grad_norm": 0.46620981413281065, - "learning_rate": 1.829123124044541e-05, - "loss": 0.3112, + "epoch": 0.17, + "grad_norm": 0.4676236451135859, + "learning_rate": 1.898323902804317e-05, + "loss": 0.3036, "step": 3711 }, { - "epoch": 0.21, - "grad_norm": 0.8393024501634222, - "learning_rate": 1.8290190723836632e-05, - "loss": 0.4927, + "epoch": 0.17, + "grad_norm": 0.3998719552726794, + "learning_rate": 1.8982585235435385e-05, + "loss": 0.2779, "step": 3712 }, { - "epoch": 0.21, - "grad_norm": 0.5442988866000577, - "learning_rate": 1.828914992013891e-05, - "loss": 0.3368, + "epoch": 0.17, + "grad_norm": 0.4791024924881441, + "learning_rate": 1.8981931243961823e-05, + "loss": 0.2139, "step": 3713 }, { - "epoch": 0.21, - "grad_norm": 0.5338386638537052, - "learning_rate": 1.828810882938828e-05, - "loss": 0.2955, + "epoch": 0.17, + "grad_norm": 0.40480283423769126, + "learning_rate": 1.8981277053636963e-05, + "loss": 0.2267, "step": 3714 }, { - "epoch": 0.21, - "grad_norm": 0.7721046804040257, - "learning_rate": 1.8287067451620796e-05, - "loss": 0.4498, + "epoch": 0.17, + "grad_norm": 1.395365682942663, + "learning_rate": 1.898062266447528e-05, + "loss": 0.4894, "step": 3715 }, { - "epoch": 0.21, - "grad_norm": 0.39359198704409026, - "learning_rate": 1.8286025786872526e-05, - "loss": 0.3215, + "epoch": 0.17, + "grad_norm": 0.5614093371067592, + "learning_rate": 1.8979968076491273e-05, + "loss": 0.3111, "step": 3716 }, { - "epoch": 0.21, - "grad_norm": 0.309872202825924, - "learning_rate": 1.8284983835179536e-05, - "loss": 0.2202, + "epoch": 0.17, + "grad_norm": 0.46041510522170354, + "learning_rate": 1.8979313289699422e-05, + "loss": 0.3126, "step": 3717 }, { - "epoch": 0.21, - "grad_norm": 0.3891445865569338, - "learning_rate": 1.8283941596577917e-05, - "loss": 0.2658, + "epoch": 0.17, + "grad_norm": 0.6571036633173082, + "learning_rate": 1.8978658304114234e-05, + "loss": 0.36, "step": 3718 }, { - "epoch": 0.21, - "grad_norm": 0.5257520788875862, - "learning_rate": 1.8282899071103755e-05, - "loss": 0.3476, + "epoch": 0.17, + "grad_norm": 1.8866654247788257, + "learning_rate": 1.8978003119750203e-05, + "loss": 0.7089, "step": 3719 }, { - "epoch": 0.21, - "grad_norm": 0.9016773666981592, - "learning_rate": 1.8281856258793155e-05, - "loss": 0.364, + "epoch": 0.17, + "grad_norm": 0.42528689593962826, + "learning_rate": 1.8977347736621834e-05, + "loss": 0.2556, "step": 3720 }, { - "epoch": 0.21, - "grad_norm": 0.40787992390494676, - "learning_rate": 1.8280813159682226e-05, - "loss": 0.3125, + "epoch": 0.17, + "grad_norm": 0.565851674040267, + "learning_rate": 1.8976692154743638e-05, + "loss": 0.373, "step": 3721 }, { - "epoch": 0.21, - "grad_norm": 0.39215624227848944, - "learning_rate": 1.8279769773807094e-05, - "loss": 0.295, + "epoch": 0.17, + "grad_norm": 1.4838162471382552, + "learning_rate": 1.8976036374130134e-05, + "loss": 0.5963, "step": 3722 }, { - "epoch": 0.21, - "grad_norm": 0.19942256276475304, - "learning_rate": 1.8278726101203892e-05, - "loss": 0.1365, + "epoch": 0.17, + "grad_norm": 0.29957076529803106, + "learning_rate": 1.8975380394795833e-05, + "loss": 0.1552, "step": 3723 }, { - "epoch": 0.21, - "grad_norm": 1.0897560592528546, - "learning_rate": 1.8277682141908763e-05, - "loss": 0.4017, + "epoch": 0.17, + "grad_norm": 1.858469939406834, + "learning_rate": 1.8974724216755262e-05, + "loss": 0.7371, "step": 3724 }, { - "epoch": 0.21, - "grad_norm": 0.45932270538036235, - "learning_rate": 1.8276637895957853e-05, - "loss": 0.331, + "epoch": 0.17, + "grad_norm": 0.4807925018011489, + "learning_rate": 1.8974067840022945e-05, + "loss": 0.3649, "step": 3725 }, { - "epoch": 0.21, - "grad_norm": 0.38873202435458054, - "learning_rate": 1.827559336338733e-05, - "loss": 0.3411, + "epoch": 0.17, + "grad_norm": 0.3858511358962396, + "learning_rate": 1.897341126461342e-05, + "loss": 0.2759, "step": 3726 }, { - "epoch": 0.21, - "grad_norm": 0.7523879935802436, - "learning_rate": 1.8274548544233367e-05, - "loss": 0.2851, + "epoch": 0.17, + "grad_norm": 1.6854265381497475, + "learning_rate": 1.897275449054121e-05, + "loss": 0.5761, "step": 3727 }, { - "epoch": 0.21, - "grad_norm": 0.3509208008137019, - "learning_rate": 1.827350343853214e-05, - "loss": 0.2867, + "epoch": 0.17, + "grad_norm": 0.5749659330347707, + "learning_rate": 1.8972097517820873e-05, + "loss": 0.3638, "step": 3728 }, { - "epoch": 0.21, - "grad_norm": 0.4536301444246971, - "learning_rate": 1.8272458046319848e-05, - "loss": 0.3437, + "epoch": 0.17, + "grad_norm": 0.3751925093523235, + "learning_rate": 1.8971440346466937e-05, + "loss": 0.21, "step": 3729 }, { - "epoch": 0.21, - "grad_norm": 1.733104281686962, - "learning_rate": 1.8271412367632688e-05, - "loss": 0.4302, + "epoch": 0.17, + "grad_norm": 0.3891359935281408, + "learning_rate": 1.897078297649396e-05, + "loss": 0.2091, "step": 3730 }, { - "epoch": 0.21, - "grad_norm": 0.294900982042248, - "learning_rate": 1.8270366402506872e-05, - "loss": 0.223, + "epoch": 0.17, + "grad_norm": 1.064027229087353, + "learning_rate": 1.8970125407916497e-05, + "loss": 0.5092, "step": 3731 }, { - "epoch": 0.21, - "grad_norm": 0.45382345157960424, - "learning_rate": 1.8269320150978625e-05, - "loss": 0.312, + "epoch": 0.17, + "grad_norm": 0.4922246591920602, + "learning_rate": 1.89694676407491e-05, + "loss": 0.3039, "step": 3732 }, { - "epoch": 0.21, - "grad_norm": 0.4332203720271532, - "learning_rate": 1.8268273613084177e-05, - "loss": 0.2502, + "epoch": 0.17, + "grad_norm": 0.41657869002765374, + "learning_rate": 1.8968809675006334e-05, + "loss": 0.3078, "step": 3733 }, { - "epoch": 0.21, - "grad_norm": 0.4650150296518048, - "learning_rate": 1.826722678885977e-05, - "loss": 0.3157, + "epoch": 0.17, + "grad_norm": 1.2776787780400445, + "learning_rate": 1.896815151070277e-05, + "loss": 0.7771, "step": 3734 }, { - "epoch": 0.21, - "grad_norm": 1.571029287761417, - "learning_rate": 1.8266179678341654e-05, - "loss": 0.7587, + "epoch": 0.17, + "grad_norm": 0.39400485456716355, + "learning_rate": 1.8967493147852974e-05, + "loss": 0.3021, "step": 3735 }, { - "epoch": 0.21, - "grad_norm": 1.048302589229643, - "learning_rate": 1.826513228156609e-05, - "loss": 0.3022, + "epoch": 0.17, + "grad_norm": 0.3494657797789317, + "learning_rate": 1.8966834586471517e-05, + "loss": 0.1948, "step": 3736 }, { - "epoch": 0.21, - "grad_norm": 0.3862146200874309, - "learning_rate": 1.826408459856935e-05, - "loss": 0.2914, + "epoch": 0.17, + "grad_norm": 1.089432436252414, + "learning_rate": 1.896617582657299e-05, + "loss": 0.5613, "step": 3737 }, { - "epoch": 0.21, - "grad_norm": 0.5222607559671937, - "learning_rate": 1.826303662938772e-05, - "loss": 0.2394, + "epoch": 0.17, + "grad_norm": 0.4486100407780712, + "learning_rate": 1.8965516868171973e-05, + "loss": 0.3441, "step": 3738 }, { - "epoch": 0.21, - "grad_norm": 1.3267918828905096, - "learning_rate": 1.826198837405748e-05, - "loss": 0.709, + "epoch": 0.17, + "grad_norm": 0.7203657736566297, + "learning_rate": 1.896485771128305e-05, + "loss": 0.4487, "step": 3739 }, { - "epoch": 0.21, - "grad_norm": 0.39077333658956925, - "learning_rate": 1.8260939832614942e-05, - "loss": 0.2071, + "epoch": 0.17, + "grad_norm": 0.4852045214703237, + "learning_rate": 1.896419835592082e-05, + "loss": 0.3736, "step": 3740 }, { - "epoch": 0.21, - "grad_norm": 0.44790985884301965, - "learning_rate": 1.8259891005096414e-05, - "loss": 0.3655, + "epoch": 0.17, + "grad_norm": 0.46394766813715893, + "learning_rate": 1.8963538802099875e-05, + "loss": 0.3125, "step": 3741 }, { - "epoch": 0.21, - "grad_norm": 0.667719602409911, - "learning_rate": 1.8258841891538214e-05, - "loss": 0.4282, + "epoch": 0.17, + "grad_norm": 0.4394214497636867, + "learning_rate": 1.8962879049834825e-05, + "loss": 0.1049, "step": 3742 }, { - "epoch": 0.22, - "grad_norm": 0.516883514093155, - "learning_rate": 1.8257792491976676e-05, - "loss": 0.2252, + "epoch": 0.17, + "grad_norm": 1.0139946835718965, + "learning_rate": 1.8962219099140268e-05, + "loss": 0.4577, "step": 3743 }, { - "epoch": 0.22, - "grad_norm": 0.42556191862763, - "learning_rate": 1.825674280644814e-05, - "loss": 0.192, + "epoch": 0.17, + "grad_norm": 0.404295228616198, + "learning_rate": 1.896155895003082e-05, + "loss": 0.2854, "step": 3744 }, { - "epoch": 0.22, - "grad_norm": 0.42746714546213527, - "learning_rate": 1.8255692834988952e-05, - "loss": 0.3717, + "epoch": 0.17, + "grad_norm": 0.6598352667598504, + "learning_rate": 1.8960898602521093e-05, + "loss": 0.4018, "step": 3745 }, { - "epoch": 0.22, - "grad_norm": 0.3455175925005218, - "learning_rate": 1.8254642577635478e-05, - "loss": 0.2232, + "epoch": 0.17, + "grad_norm": 0.9699982477864619, + "learning_rate": 1.8960238056625707e-05, + "loss": 0.5274, "step": 3746 }, { - "epoch": 0.22, - "grad_norm": 0.5540250899869666, - "learning_rate": 1.8253592034424085e-05, - "loss": 0.5053, + "epoch": 0.17, + "grad_norm": 0.49111293582629667, + "learning_rate": 1.8959577312359287e-05, + "loss": 0.2835, "step": 3747 }, { - "epoch": 0.22, - "grad_norm": 1.0421298320869656, - "learning_rate": 1.8252541205391155e-05, - "loss": 0.6174, + "epoch": 0.17, + "grad_norm": 0.30084833323717486, + "learning_rate": 1.895891636973646e-05, + "loss": 0.2266, "step": 3748 }, { - "epoch": 0.22, - "grad_norm": 0.44154945291697445, - "learning_rate": 1.825149009057308e-05, - "loss": 0.2326, + "epoch": 0.17, + "grad_norm": 0.5663267396998604, + "learning_rate": 1.895825522877186e-05, + "loss": 0.373, "step": 3749 }, { - "epoch": 0.22, - "grad_norm": 0.3709396033941532, - "learning_rate": 1.8250438690006257e-05, - "loss": 0.2199, + "epoch": 0.17, + "grad_norm": 0.5180450970564421, + "learning_rate": 1.8957593889480127e-05, + "loss": 0.3266, "step": 3750 }, { - "epoch": 0.22, - "grad_norm": 1.3866050733970163, - "learning_rate": 1.8249387003727097e-05, - "loss": 0.7559, + "epoch": 0.17, + "grad_norm": 0.6340330213163782, + "learning_rate": 1.8956932351875893e-05, + "loss": 0.409, "step": 3751 }, { - "epoch": 0.22, - "grad_norm": 0.384792848713734, - "learning_rate": 1.824833503177202e-05, - "loss": 0.3101, + "epoch": 0.17, + "grad_norm": 0.47712638623116027, + "learning_rate": 1.895627061597381e-05, + "loss": 0.2895, "step": 3752 }, { - "epoch": 0.22, - "grad_norm": 0.3965591271182984, - "learning_rate": 1.8247282774177456e-05, - "loss": 0.3009, + "epoch": 0.17, + "grad_norm": 0.4533859551426609, + "learning_rate": 1.8955608681788532e-05, + "loss": 0.2782, "step": 3753 }, { - "epoch": 0.22, - "grad_norm": 0.7462894283297303, - "learning_rate": 1.824623023097984e-05, - "loss": 0.4997, + "epoch": 0.17, + "grad_norm": 0.3021569639092531, + "learning_rate": 1.8954946549334707e-05, + "loss": 0.2485, "step": 3754 }, { - "epoch": 0.22, - "grad_norm": 0.3943149843852122, - "learning_rate": 1.824517740221563e-05, - "loss": 0.2825, + "epoch": 0.17, + "grad_norm": 0.5965317285807341, + "learning_rate": 1.8954284218626995e-05, + "loss": 0.4855, "step": 3755 }, { - "epoch": 0.22, - "grad_norm": 0.2177870980472017, - "learning_rate": 1.824412428792128e-05, - "loss": 0.1226, + "epoch": 0.17, + "grad_norm": 0.34036434232534696, + "learning_rate": 1.895362168968006e-05, + "loss": 0.2222, "step": 3756 }, { - "epoch": 0.22, - "grad_norm": 0.499144612415864, - "learning_rate": 1.8243070888133262e-05, - "loss": 0.361, + "epoch": 0.17, + "grad_norm": 0.5941958584774611, + "learning_rate": 1.895295896250857e-05, + "loss": 0.4021, "step": 3757 }, { - "epoch": 0.22, - "grad_norm": 0.5001785109282243, - "learning_rate": 1.8242017202888053e-05, - "loss": 0.3159, + "epoch": 0.17, + "grad_norm": 1.7159526561327243, + "learning_rate": 1.89522960371272e-05, + "loss": 0.8156, "step": 3758 }, { - "epoch": 0.22, - "grad_norm": 0.7388789713211021, - "learning_rate": 1.824096323222214e-05, - "loss": 0.3556, + "epoch": 0.17, + "grad_norm": 0.45745800275554843, + "learning_rate": 1.8951632913550625e-05, + "loss": 0.2111, "step": 3759 }, { - "epoch": 0.22, - "grad_norm": 0.534150495177783, - "learning_rate": 1.8239908976172027e-05, - "loss": 0.4201, + "epoch": 0.17, + "grad_norm": 0.48009331366598, + "learning_rate": 1.895096959179352e-05, + "loss": 0.3866, "step": 3760 }, { - "epoch": 0.22, - "grad_norm": 0.4950814070548895, - "learning_rate": 1.823885443477422e-05, - "loss": 0.2938, + "epoch": 0.17, + "grad_norm": 0.4238114569873109, + "learning_rate": 1.8950306071870583e-05, + "loss": 0.3381, "step": 3761 }, { - "epoch": 0.22, - "grad_norm": 0.28446685490046, - "learning_rate": 1.8237799608065238e-05, - "loss": 0.2129, + "epoch": 0.17, + "grad_norm": 0.33041634711102796, + "learning_rate": 1.894964235379649e-05, + "loss": 0.1903, "step": 3762 }, { - "epoch": 0.22, - "grad_norm": 0.6083754544167314, - "learning_rate": 1.823674449608161e-05, - "loss": 0.405, + "epoch": 0.17, + "grad_norm": 1.1525106016945428, + "learning_rate": 1.8948978437585946e-05, + "loss": 0.7198, "step": 3763 }, { - "epoch": 0.22, - "grad_norm": 0.3692492992721875, - "learning_rate": 1.8235689098859874e-05, - "loss": 0.2551, + "epoch": 0.17, + "grad_norm": 0.4729686781775277, + "learning_rate": 1.894831432325364e-05, + "loss": 0.376, "step": 3764 }, { - "epoch": 0.22, - "grad_norm": 0.39185364731901257, - "learning_rate": 1.823463341643658e-05, - "loss": 0.3614, + "epoch": 0.17, + "grad_norm": 0.3300459819102284, + "learning_rate": 1.894765001081428e-05, + "loss": 0.0855, "step": 3765 }, { - "epoch": 0.22, - "grad_norm": 0.8683819485532979, - "learning_rate": 1.8233577448848283e-05, - "loss": 0.4439, + "epoch": 0.17, + "grad_norm": 0.4431413127446698, + "learning_rate": 1.8946985500282574e-05, + "loss": 0.3642, "step": 3766 }, { - "epoch": 0.22, - "grad_norm": 0.3771140986653881, - "learning_rate": 1.8232521196131552e-05, - "loss": 0.3055, + "epoch": 0.17, + "grad_norm": 0.30472923790560014, + "learning_rate": 1.8946320791673232e-05, + "loss": 0.2598, "step": 3767 }, { - "epoch": 0.22, - "grad_norm": 0.35751520286149197, - "learning_rate": 1.823146465832297e-05, - "loss": 0.2238, + "epoch": 0.17, + "grad_norm": 0.4798823303764402, + "learning_rate": 1.894565588500097e-05, + "loss": 0.3291, "step": 3768 }, { - "epoch": 0.22, - "grad_norm": 0.4560483343446017, - "learning_rate": 1.823040783545912e-05, - "loss": 0.309, + "epoch": 0.17, + "grad_norm": 0.4142473003065064, + "learning_rate": 1.894499078028051e-05, + "loss": 0.2792, "step": 3769 }, { - "epoch": 0.22, - "grad_norm": 0.35578254331441966, - "learning_rate": 1.8229350727576597e-05, - "loss": 0.2929, + "epoch": 0.17, + "grad_norm": 1.2834734537805503, + "learning_rate": 1.8944325477526573e-05, + "loss": 0.7206, "step": 3770 }, { - "epoch": 0.22, - "grad_norm": 0.7910099204556784, - "learning_rate": 1.8228293334712015e-05, - "loss": 0.5764, + "epoch": 0.17, + "grad_norm": 0.5993293374844946, + "learning_rate": 1.894365997675389e-05, + "loss": 0.3461, "step": 3771 }, { - "epoch": 0.22, - "grad_norm": 0.47524677083124306, - "learning_rate": 1.822723565690199e-05, - "loss": 0.3022, + "epoch": 0.17, + "grad_norm": 0.3529961841380468, + "learning_rate": 1.8942994277977197e-05, + "loss": 0.2848, "step": 3772 }, { - "epoch": 0.22, - "grad_norm": 0.3292122348325114, - "learning_rate": 1.8226177694183144e-05, - "loss": 0.2735, + "epoch": 0.17, + "grad_norm": 0.7505628723974592, + "learning_rate": 1.894232838121123e-05, + "loss": 0.5152, "step": 3773 }, { - "epoch": 0.22, - "grad_norm": 0.5184964716025334, - "learning_rate": 1.8225119446592122e-05, - "loss": 0.2828, + "epoch": 0.17, + "grad_norm": 0.29326615637552883, + "learning_rate": 1.894166228647073e-05, + "loss": 0.1495, "step": 3774 }, { - "epoch": 0.22, - "grad_norm": 0.3960302570284166, - "learning_rate": 1.8224060914165564e-05, - "loss": 0.3376, + "epoch": 0.17, + "grad_norm": 0.5303874621499461, + "learning_rate": 1.8940995993770448e-05, + "loss": 0.308, "step": 3775 }, { - "epoch": 0.22, - "grad_norm": 0.3777909633108905, - "learning_rate": 1.8223002096940133e-05, - "loss": 0.2277, + "epoch": 0.17, + "grad_norm": 0.6100775975676903, + "learning_rate": 1.894032950312513e-05, + "loss": 0.3779, "step": 3776 }, { - "epoch": 0.22, - "grad_norm": 0.3777145561989262, - "learning_rate": 1.822194299495249e-05, - "loss": 0.334, + "epoch": 0.17, + "grad_norm": 0.5380217322695732, + "learning_rate": 1.8939662814549532e-05, + "loss": 0.3409, "step": 3777 }, { - "epoch": 0.22, - "grad_norm": 0.9906118795976717, - "learning_rate": 1.8220883608239317e-05, - "loss": 0.6105, + "epoch": 0.17, + "grad_norm": 0.5186684794969783, + "learning_rate": 1.8938995928058417e-05, + "loss": 0.3427, "step": 3778 }, { - "epoch": 0.22, - "grad_norm": 0.3680252029129673, - "learning_rate": 1.82198239368373e-05, - "loss": 0.2007, + "epoch": 0.17, + "grad_norm": 0.3438986406149454, + "learning_rate": 1.8938328843666548e-05, + "loss": 0.2101, "step": 3779 }, { - "epoch": 0.22, - "grad_norm": 0.29715685119899815, - "learning_rate": 1.8218763980783127e-05, - "loss": 0.2653, + "epoch": 0.17, + "grad_norm": 0.3621507198293156, + "learning_rate": 1.8937661561388694e-05, + "loss": 0.2961, "step": 3780 }, { - "epoch": 0.22, - "grad_norm": 0.3725279137918228, - "learning_rate": 1.8217703740113518e-05, - "loss": 0.3547, + "epoch": 0.17, + "grad_norm": 0.6170979497152046, + "learning_rate": 1.8936994081239627e-05, + "loss": 0.3863, "step": 3781 }, { - "epoch": 0.22, - "grad_norm": 0.36171919213090536, - "learning_rate": 1.8216643214865176e-05, - "loss": 0.1849, + "epoch": 0.17, + "grad_norm": 0.7438517089597998, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.4586, "step": 3782 }, { - "epoch": 0.22, - "grad_norm": 0.5560972723081516, - "learning_rate": 1.8215582405074838e-05, - "loss": 0.4323, + "epoch": 0.17, + "grad_norm": 0.5604726344443632, + "learning_rate": 1.893565852738697e-05, + "loss": 0.4279, "step": 3783 }, { - "epoch": 0.22, - "grad_norm": 1.6986156985423673, - "learning_rate": 1.821452131077923e-05, - "loss": 0.7669, + "epoch": 0.17, + "grad_norm": 0.4244975284524602, + "learning_rate": 1.8934990453712944e-05, + "loss": 0.3269, "step": 3784 }, { - "epoch": 0.22, - "grad_norm": 0.2939380110603323, - "learning_rate": 1.8213459932015104e-05, - "loss": 0.2367, + "epoch": 0.17, + "grad_norm": 0.39896152818694464, + "learning_rate": 1.8934322182226843e-05, + "loss": 0.2274, "step": 3785 }, { - "epoch": 0.22, - "grad_norm": 0.5918622124773595, - "learning_rate": 1.8212398268819214e-05, - "loss": 0.4855, + "epoch": 0.17, + "grad_norm": 0.412941709907691, + "learning_rate": 1.8933653712943457e-05, + "loss": 0.2226, "step": 3786 }, { - "epoch": 0.22, - "grad_norm": 0.4214369828365935, - "learning_rate": 1.8211336321228326e-05, - "loss": 0.3198, + "epoch": 0.17, + "grad_norm": 0.43685101865741244, + "learning_rate": 1.8932985045877594e-05, + "loss": 0.308, "step": 3787 }, { - "epoch": 0.22, - "grad_norm": 0.3423353082604522, - "learning_rate": 1.8210274089279214e-05, - "loss": 0.2273, + "epoch": 0.17, + "grad_norm": 0.4610269608574551, + "learning_rate": 1.893231618104405e-05, + "loss": 0.3087, "step": 3788 }, { - "epoch": 0.22, - "grad_norm": 0.48273982176060215, - "learning_rate": 1.8209211573008663e-05, - "loss": 0.2885, + "epoch": 0.17, + "grad_norm": 0.6830710206289271, + "learning_rate": 1.893164711845763e-05, + "loss": 0.3556, "step": 3789 }, { - "epoch": 0.22, - "grad_norm": 1.081969336795844, - "learning_rate": 1.8208148772453466e-05, - "loss": 0.6395, - "step": 3790 + "epoch": 0.17, + "grad_norm": 0.42993448675483253, + "learning_rate": 1.8930977858133157e-05, + "loss": 0.3573, + "step": 3790 }, { - "epoch": 0.22, - "grad_norm": 0.4540363009130433, - "learning_rate": 1.8207085687650433e-05, - "loss": 0.3198, + "epoch": 0.17, + "grad_norm": 0.9703143784984662, + "learning_rate": 1.893030840008544e-05, + "loss": 0.2729, "step": 3791 }, { - "epoch": 0.22, - "grad_norm": 0.6965030051365356, - "learning_rate": 1.8206022318636375e-05, - "loss": 0.3283, + "epoch": 0.17, + "grad_norm": 0.29601278255293695, + "learning_rate": 1.89296387443293e-05, + "loss": 0.2396, "step": 3792 }, { - "epoch": 0.22, - "grad_norm": 0.36981946890013234, - "learning_rate": 1.8204958665448116e-05, - "loss": 0.3409, + "epoch": 0.17, + "grad_norm": 0.5202364884330402, + "learning_rate": 1.8928968890879567e-05, + "loss": 0.3816, "step": 3793 }, { - "epoch": 0.22, - "grad_norm": 0.30072224494974714, - "learning_rate": 1.8203894728122492e-05, - "loss": 0.2236, + "epoch": 0.17, + "grad_norm": 1.4523684326657587, + "learning_rate": 1.892829883975107e-05, + "loss": 0.524, "step": 3794 }, { - "epoch": 0.22, - "grad_norm": 0.489290813598683, - "learning_rate": 1.8202830506696346e-05, - "loss": 0.3333, + "epoch": 0.17, + "grad_norm": 0.34641037648278644, + "learning_rate": 1.8927628590958643e-05, + "loss": 0.2271, "step": 3795 }, { - "epoch": 0.22, - "grad_norm": 0.3308604611705892, - "learning_rate": 1.8201766001206533e-05, - "loss": 0.2731, + "epoch": 0.17, + "grad_norm": 0.3838078551354471, + "learning_rate": 1.8926958144517118e-05, + "loss": 0.33, "step": 3796 }, { - "epoch": 0.22, - "grad_norm": 0.5543044671117653, - "learning_rate": 1.8200701211689915e-05, - "loss": 0.3272, + "epoch": 0.17, + "grad_norm": 1.167711146671076, + "learning_rate": 1.8926287500441346e-05, + "loss": 0.6616, "step": 3797 }, { - "epoch": 0.22, - "grad_norm": 0.49671661520822036, - "learning_rate": 1.819963613818337e-05, - "loss": 0.3211, + "epoch": 0.17, + "grad_norm": 0.33171824424706625, + "learning_rate": 1.8925616658746174e-05, + "loss": 0.1562, "step": 3798 }, { - "epoch": 0.22, - "grad_norm": 0.39700198461844927, - "learning_rate": 1.8198570780723773e-05, - "loss": 0.2987, + "epoch": 0.17, + "grad_norm": 0.6671980643214174, + "learning_rate": 1.8924945619446452e-05, + "loss": 0.4283, "step": 3799 }, { - "epoch": 0.22, - "grad_norm": 0.6450937703071342, - "learning_rate": 1.8197505139348023e-05, - "loss": 0.3396, + "epoch": 0.17, + "grad_norm": 0.5880208623248997, + "learning_rate": 1.8924274382557033e-05, + "loss": 0.3771, "step": 3800 }, { - "epoch": 0.22, - "grad_norm": 0.34925975121207203, - "learning_rate": 1.8196439214093023e-05, - "loss": 0.2853, + "epoch": 0.17, + "grad_norm": 0.5875173546201404, + "learning_rate": 1.8923602948092782e-05, + "loss": 0.2291, "step": 3801 }, { - "epoch": 0.22, - "grad_norm": 0.3075989600836906, - "learning_rate": 1.819537300499569e-05, - "loss": 0.1727, + "epoch": 0.17, + "grad_norm": 0.4422039565180075, + "learning_rate": 1.892293131606856e-05, + "loss": 0.3301, "step": 3802 }, { - "epoch": 0.22, - "grad_norm": 0.40942585125745, - "learning_rate": 1.8194306512092938e-05, - "loss": 0.3184, + "epoch": 0.17, + "grad_norm": 0.5016023794249276, + "learning_rate": 1.8922259486499243e-05, + "loss": 0.3912, "step": 3803 }, { - "epoch": 0.22, - "grad_norm": 0.3781360749319683, - "learning_rate": 1.8193239735421703e-05, - "loss": 0.3548, + "epoch": 0.17, + "grad_norm": 0.43038263397043336, + "learning_rate": 1.8921587459399696e-05, + "loss": 0.2512, "step": 3804 }, { - "epoch": 0.22, - "grad_norm": 0.5636306621999936, - "learning_rate": 1.819217267501893e-05, - "loss": 0.2625, + "epoch": 0.17, + "grad_norm": 0.43277972727728126, + "learning_rate": 1.8920915234784805e-05, + "loss": 0.3244, "step": 3805 }, { - "epoch": 0.22, - "grad_norm": 0.44361177405533797, - "learning_rate": 1.819110533092157e-05, - "loss": 0.3215, + "epoch": 0.17, + "grad_norm": 0.8482089424796287, + "learning_rate": 1.8920242812669448e-05, + "loss": 0.5095, "step": 3806 }, { - "epoch": 0.22, - "grad_norm": 0.6164138406659124, - "learning_rate": 1.8190037703166585e-05, - "loss": 0.4649, + "epoch": 0.17, + "grad_norm": 0.35872356684728096, + "learning_rate": 1.891957019306851e-05, + "loss": 0.2119, "step": 3807 }, { - "epoch": 0.22, - "grad_norm": 0.25182141423743754, - "learning_rate": 1.8188969791790946e-05, - "loss": 0.1842, + "epoch": 0.17, + "grad_norm": 0.3555964240337658, + "learning_rate": 1.891889737599689e-05, + "loss": 0.2288, "step": 3808 }, { - "epoch": 0.22, - "grad_norm": 0.4578954704514088, - "learning_rate": 1.8187901596831638e-05, - "loss": 0.3233, + "epoch": 0.17, + "grad_norm": 1.2664678687376982, + "learning_rate": 1.8918224361469474e-05, + "loss": 0.6336, "step": 3809 }, { - "epoch": 0.22, - "grad_norm": 0.5217483050301579, - "learning_rate": 1.8186833118325645e-05, - "loss": 0.4206, + "epoch": 0.18, + "grad_norm": 0.8025907102716647, + "learning_rate": 1.8917551149501165e-05, + "loss": 0.5077, "step": 3810 }, { - "epoch": 0.22, - "grad_norm": 0.5431293205707014, - "learning_rate": 1.8185764356309975e-05, - "loss": 0.4136, + "epoch": 0.18, + "grad_norm": 0.3534170430749267, + "learning_rate": 1.891687774010687e-05, + "loss": 0.2932, "step": 3811 }, { - "epoch": 0.22, - "grad_norm": 0.45279199338795334, - "learning_rate": 1.8184695310821635e-05, - "loss": 0.2989, + "epoch": 0.18, + "grad_norm": 0.522821627782289, + "learning_rate": 1.89162041333015e-05, + "loss": 0.3986, "step": 3812 }, { - "epoch": 0.22, - "grad_norm": 0.5020020584142632, - "learning_rate": 1.8183625981897653e-05, - "loss": 0.3023, + "epoch": 0.18, + "grad_norm": 0.3694861954705531, + "learning_rate": 1.8915530329099958e-05, + "loss": 0.1883, "step": 3813 }, { - "epoch": 0.22, - "grad_norm": 0.34007283638326713, - "learning_rate": 1.818255636957505e-05, - "loss": 0.2279, + "epoch": 0.18, + "grad_norm": 0.4084187952089302, + "learning_rate": 1.891485632751717e-05, + "loss": 0.2293, "step": 3814 }, { - "epoch": 0.22, - "grad_norm": 0.5940995910555453, - "learning_rate": 1.818148647389088e-05, - "loss": 0.2751, + "epoch": 0.18, + "grad_norm": 0.5450867963604158, + "learning_rate": 1.8914182128568055e-05, + "loss": 0.3867, "step": 3815 }, { - "epoch": 0.22, - "grad_norm": 0.3728257688155502, - "learning_rate": 1.8180416294882178e-05, - "loss": 0.3428, + "epoch": 0.18, + "grad_norm": 0.49892401452017954, + "learning_rate": 1.891350773226754e-05, + "loss": 0.3567, "step": 3816 }, { - "epoch": 0.22, - "grad_norm": 0.540902906588058, - "learning_rate": 1.817934583258601e-05, - "loss": 0.413, + "epoch": 0.18, + "grad_norm": 0.392024968654682, + "learning_rate": 1.8912833138630552e-05, + "loss": 0.2647, "step": 3817 }, { - "epoch": 0.22, - "grad_norm": 0.32082684762622404, - "learning_rate": 1.8178275087039452e-05, - "loss": 0.1668, + "epoch": 0.18, + "grad_norm": 0.6253472137590154, + "learning_rate": 1.8912158347672032e-05, + "loss": 0.4868, "step": 3818 }, { - "epoch": 0.22, - "grad_norm": 0.4394101437520785, - "learning_rate": 1.8177204058279577e-05, - "loss": 0.3368, + "epoch": 0.18, + "grad_norm": 0.38146153639857605, + "learning_rate": 1.891148335940692e-05, + "loss": 0.2716, "step": 3819 }, { - "epoch": 0.22, - "grad_norm": 0.41746611985218063, - "learning_rate": 1.817613274634348e-05, - "loss": 0.2987, + "epoch": 0.18, + "grad_norm": 0.4345114657610416, + "learning_rate": 1.8910808173850145e-05, + "loss": 0.3154, "step": 3820 }, { - "epoch": 0.22, - "grad_norm": 0.5320382789127321, - "learning_rate": 1.8175061151268255e-05, - "loss": 0.3093, + "epoch": 0.18, + "grad_norm": 0.34524599137834444, + "learning_rate": 1.891013279101667e-05, + "loss": 0.1257, "step": 3821 }, { - "epoch": 0.22, - "grad_norm": 0.2989541511419468, - "learning_rate": 1.8173989273091014e-05, - "loss": 0.2661, + "epoch": 0.18, + "grad_norm": 0.7634413534659626, + "learning_rate": 1.890945721092144e-05, + "loss": 0.5379, "step": 3822 }, { - "epoch": 0.22, - "grad_norm": 1.724527721383952, - "learning_rate": 1.8172917111848878e-05, - "loss": 0.8488, + "epoch": 0.18, + "grad_norm": 0.3935076634723586, + "learning_rate": 1.890878143357942e-05, + "loss": 0.2809, "step": 3823 }, { - "epoch": 0.22, - "grad_norm": 0.3618052569029262, - "learning_rate": 1.817184466757897e-05, - "loss": 0.2378, + "epoch": 0.18, + "grad_norm": 0.36663621107665906, + "learning_rate": 1.8908105459005565e-05, + "loss": 0.3159, "step": 3824 }, { - "epoch": 0.22, - "grad_norm": 0.4104423447288719, - "learning_rate": 1.8170771940318437e-05, - "loss": 0.3514, + "epoch": 0.18, + "grad_norm": 0.45265457528388015, + "learning_rate": 1.890742928721484e-05, + "loss": 0.291, "step": 3825 }, { - "epoch": 0.22, - "grad_norm": 0.600402119776531, - "learning_rate": 1.816969893010442e-05, - "loss": 0.496, + "epoch": 0.18, + "grad_norm": 0.2948815824052943, + "learning_rate": 1.8906752918222213e-05, + "loss": 0.1988, "step": 3826 }, { - "epoch": 0.22, - "grad_norm": 0.3670048381276231, - "learning_rate": 1.8168625636974085e-05, - "loss": 0.2748, + "epoch": 0.18, + "grad_norm": 0.4890414945176667, + "learning_rate": 1.890607635204266e-05, + "loss": 0.2877, "step": 3827 }, { - "epoch": 0.22, - "grad_norm": 0.3009020200838833, - "learning_rate": 1.816755206096459e-05, - "loss": 0.1996, + "epoch": 0.18, + "grad_norm": 0.5198007296058611, + "learning_rate": 1.8905399588691165e-05, + "loss": 0.3936, "step": 3828 }, { - "epoch": 0.22, - "grad_norm": 0.504312784568991, - "learning_rate": 1.816647820211312e-05, - "loss": 0.38, + "epoch": 0.18, + "grad_norm": 0.3617933876138676, + "learning_rate": 1.8904722628182702e-05, + "loss": 0.3082, "step": 3829 }, { - "epoch": 0.22, - "grad_norm": 0.6567393314755094, - "learning_rate": 1.8165404060456863e-05, - "loss": 0.3679, + "epoch": 0.18, + "grad_norm": 0.9034954700428638, + "learning_rate": 1.8904045470532268e-05, + "loss": 0.5778, "step": 3830 }, { - "epoch": 0.22, - "grad_norm": 0.3984924468921483, - "learning_rate": 1.8164329636033012e-05, - "loss": 0.252, + "epoch": 0.18, + "grad_norm": 0.3950364556417189, + "learning_rate": 1.8903368115754843e-05, + "loss": 0.2959, "step": 3831 }, { - "epoch": 0.22, - "grad_norm": 0.4239600262528248, - "learning_rate": 1.8163254928878777e-05, - "loss": 0.3663, + "epoch": 0.18, + "grad_norm": 0.2993735687870497, + "learning_rate": 1.8902690563865432e-05, + "loss": 0.2316, "step": 3832 }, { - "epoch": 0.22, - "grad_norm": 0.4164722216338981, - "learning_rate": 1.8162179939031377e-05, - "loss": 0.299, + "epoch": 0.18, + "grad_norm": 0.5808594972599243, + "learning_rate": 1.890201281487903e-05, + "loss": 0.2789, "step": 3833 }, { - "epoch": 0.22, - "grad_norm": 0.35787438325680954, - "learning_rate": 1.8161104666528033e-05, - "loss": 0.1849, + "epoch": 0.18, + "grad_norm": 0.8378721245037347, + "learning_rate": 1.8901334868810647e-05, + "loss": 0.3803, "step": 3834 }, { - "epoch": 0.22, - "grad_norm": 0.982908517879726, - "learning_rate": 1.8160029111405986e-05, - "loss": 0.6916, + "epoch": 0.18, + "grad_norm": 0.45497158778151686, + "learning_rate": 1.8900656725675287e-05, + "loss": 0.3097, "step": 3835 }, { - "epoch": 0.22, - "grad_norm": 0.46631107207101113, - "learning_rate": 1.8158953273702486e-05, - "loss": 0.3176, + "epoch": 0.18, + "grad_norm": 0.43442316088443067, + "learning_rate": 1.889997838548797e-05, + "loss": 0.382, "step": 3836 }, { - "epoch": 0.22, - "grad_norm": 0.3537925294977268, - "learning_rate": 1.8157877153454785e-05, - "loss": 0.273, + "epoch": 0.18, + "grad_norm": 0.29611133006842777, + "learning_rate": 1.8899299848263704e-05, + "loss": 0.1558, "step": 3837 }, { - "epoch": 0.22, - "grad_norm": 0.6615628600913611, - "learning_rate": 1.8156800750700143e-05, - "loss": 0.4955, + "epoch": 0.18, + "grad_norm": 0.41694271447006276, + "learning_rate": 1.8898621114017522e-05, + "loss": 0.3016, "step": 3838 }, { - "epoch": 0.22, - "grad_norm": 0.41214597949670184, - "learning_rate": 1.8155724065475845e-05, - "loss": 0.2243, + "epoch": 0.18, + "grad_norm": 0.3353252128316122, + "learning_rate": 1.889794218276444e-05, + "loss": 0.2553, "step": 3839 }, { - "epoch": 0.22, - "grad_norm": 0.3755937652787498, - "learning_rate": 1.8154647097819172e-05, - "loss": 0.2564, + "epoch": 0.18, + "grad_norm": 1.1959355485543364, + "learning_rate": 1.8897263054519498e-05, + "loss": 0.4312, "step": 3840 }, { - "epoch": 0.22, - "grad_norm": 0.48876484681974797, - "learning_rate": 1.8153569847767423e-05, - "loss": 0.2991, + "epoch": 0.18, + "grad_norm": 0.3813352310210866, + "learning_rate": 1.8896583729297727e-05, + "loss": 0.2738, "step": 3841 }, { - "epoch": 0.22, - "grad_norm": 0.7458949562080135, - "learning_rate": 1.8152492315357902e-05, - "loss": 0.5104, + "epoch": 0.18, + "grad_norm": 1.2637744074166148, + "learning_rate": 1.8895904207114163e-05, + "loss": 0.8373, "step": 3842 }, { - "epoch": 0.22, - "grad_norm": 0.3735505680224295, - "learning_rate": 1.815141450062792e-05, - "loss": 0.282, + "epoch": 0.18, + "grad_norm": 0.4368784760704501, + "learning_rate": 1.8895224487983857e-05, + "loss": 0.2876, "step": 3843 }, { - "epoch": 0.22, - "grad_norm": 0.41540487618060473, - "learning_rate": 1.8150336403614804e-05, - "loss": 0.3188, + "epoch": 0.18, + "grad_norm": 0.2999088756258195, + "learning_rate": 1.8894544571921854e-05, + "loss": 0.2319, "step": 3844 }, { - "epoch": 0.22, - "grad_norm": 0.5712946769237918, - "learning_rate": 1.814925802435589e-05, - "loss": 0.3764, + "epoch": 0.18, + "grad_norm": 0.6898977886656115, + "learning_rate": 1.8893864458943207e-05, + "loss": 0.3323, "step": 3845 }, { - "epoch": 0.22, - "grad_norm": 0.3572232718406907, - "learning_rate": 1.814817936288852e-05, - "loss": 0.2627, + "epoch": 0.18, + "grad_norm": 1.2348608321140955, + "learning_rate": 1.8893184149062972e-05, + "loss": 0.5165, "step": 3846 }, { - "epoch": 0.22, - "grad_norm": 0.3055017102946289, - "learning_rate": 1.814710041925005e-05, - "loss": 0.1336, + "epoch": 0.18, + "grad_norm": 0.36173545688184566, + "learning_rate": 1.8892503642296208e-05, + "loss": 0.2414, "step": 3847 }, { - "epoch": 0.22, - "grad_norm": 0.3538207884429109, - "learning_rate": 1.8146021193477846e-05, - "loss": 0.3007, + "epoch": 0.18, + "grad_norm": 0.6278927132092558, + "learning_rate": 1.889182293865799e-05, + "loss": 0.4224, "step": 3848 }, { - "epoch": 0.22, - "grad_norm": 0.4314766190553378, - "learning_rate": 1.8144941685609273e-05, - "loss": 0.3486, + "epoch": 0.18, + "grad_norm": 1.1807056155577, + "learning_rate": 1.889114203816338e-05, + "loss": 0.6531, "step": 3849 }, { - "epoch": 0.22, - "grad_norm": 0.6441772312208831, - "learning_rate": 1.8143861895681723e-05, - "loss": 0.4833, + "epoch": 0.18, + "grad_norm": 0.28722350765323346, + "learning_rate": 1.8890460940827452e-05, + "loss": 0.1689, "step": 3850 }, { - "epoch": 0.22, - "grad_norm": 0.30729483425364906, - "learning_rate": 1.8142781823732582e-05, - "loss": 0.1791, + "epoch": 0.18, + "grad_norm": 0.4039621331491625, + "learning_rate": 1.888977964666529e-05, + "loss": 0.2926, "step": 3851 }, { - "epoch": 0.22, - "grad_norm": 0.3808537248820102, - "learning_rate": 1.814170146979926e-05, - "loss": 0.2803, + "epoch": 0.18, + "grad_norm": 1.3788718914310258, + "learning_rate": 1.8889098155691977e-05, + "loss": 0.5037, "step": 3852 }, { - "epoch": 0.22, - "grad_norm": 0.37558207046729103, - "learning_rate": 1.8140620833919165e-05, - "loss": 0.2885, + "epoch": 0.18, + "grad_norm": 0.42768862789949474, + "learning_rate": 1.8888416467922597e-05, + "loss": 0.2118, "step": 3853 }, { - "epoch": 0.22, - "grad_norm": 0.6154231825728195, - "learning_rate": 1.813953991612972e-05, - "loss": 0.4008, + "epoch": 0.18, + "grad_norm": 1.3971066508389074, + "learning_rate": 1.888773458337224e-05, + "loss": 0.7888, "step": 3854 }, { - "epoch": 0.22, - "grad_norm": 0.35044717507297185, - "learning_rate": 1.813845871646836e-05, - "loss": 0.2794, + "epoch": 0.18, + "grad_norm": 0.641727292763276, + "learning_rate": 1.8887052502056007e-05, + "loss": 0.3738, "step": 3855 }, { - "epoch": 0.22, - "grad_norm": 0.412504774677019, - "learning_rate": 1.813737723497252e-05, - "loss": 0.3406, + "epoch": 0.18, + "grad_norm": 0.4532146043193909, + "learning_rate": 1.8886370223989e-05, + "loss": 0.2164, "step": 3856 }, { - "epoch": 0.22, - "grad_norm": 0.5714736784254149, - "learning_rate": 1.8136295471679662e-05, - "loss": 0.2334, + "epoch": 0.18, + "grad_norm": 0.9723851103072669, + "learning_rate": 1.888568774918632e-05, + "loss": 0.4761, "step": 3857 }, { - "epoch": 0.22, - "grad_norm": 0.3209375714349231, - "learning_rate": 1.8135213426627237e-05, - "loss": 0.2582, + "epoch": 0.18, + "grad_norm": 0.31782148237399105, + "learning_rate": 1.8885005077663075e-05, + "loss": 0.1688, "step": 3858 }, { - "epoch": 0.22, - "grad_norm": 0.6265957239815656, - "learning_rate": 1.8134131099852724e-05, - "loss": 0.2952, + "epoch": 0.18, + "grad_norm": 0.4753434466667399, + "learning_rate": 1.888432220943438e-05, + "loss": 0.3018, "step": 3859 }, { - "epoch": 0.22, - "grad_norm": 0.4414280891315469, - "learning_rate": 1.81330484913936e-05, - "loss": 0.284, + "epoch": 0.18, + "grad_norm": 0.5392007280491453, + "learning_rate": 1.8883639144515354e-05, + "loss": 0.3199, "step": 3860 }, { - "epoch": 0.22, - "grad_norm": 0.41773899450768254, - "learning_rate": 1.8131965601287357e-05, - "loss": 0.3228, + "epoch": 0.18, + "grad_norm": 1.6760844446586534, + "learning_rate": 1.8882955882921122e-05, + "loss": 0.6275, "step": 3861 }, { - "epoch": 0.22, - "grad_norm": 0.8234390961731449, - "learning_rate": 1.8130882429571496e-05, - "loss": 0.5967, + "epoch": 0.18, + "grad_norm": 0.4712316602502112, + "learning_rate": 1.888227242466681e-05, + "loss": 0.3025, "step": 3862 }, { - "epoch": 0.22, - "grad_norm": 0.5027296598506996, - "learning_rate": 1.8129798976283522e-05, - "loss": 0.4022, + "epoch": 0.18, + "grad_norm": 0.5933829035910467, + "learning_rate": 1.8881588769767547e-05, + "loss": 0.3335, "step": 3863 }, { - "epoch": 0.22, - "grad_norm": 0.4194025271798038, - "learning_rate": 1.812871524146096e-05, - "loss": 0.2134, + "epoch": 0.18, + "grad_norm": 0.32632522735749997, + "learning_rate": 1.8880904918238465e-05, + "loss": 0.1875, "step": 3864 }, { - "epoch": 0.22, - "grad_norm": 0.3110021600253895, - "learning_rate": 1.812763122514134e-05, - "loss": 0.2123, + "epoch": 0.18, + "grad_norm": 0.5371370840025611, + "learning_rate": 1.888022087009471e-05, + "loss": 0.2619, "step": 3865 }, { - "epoch": 0.22, - "grad_norm": 0.7499695380435911, - "learning_rate": 1.8126546927362204e-05, - "loss": 0.4432, + "epoch": 0.18, + "grad_norm": 1.6491936302405654, + "learning_rate": 1.8879536625351424e-05, + "loss": 0.6028, "step": 3866 }, { - "epoch": 0.22, - "grad_norm": 0.35946689971793444, - "learning_rate": 1.8125462348161093e-05, - "loss": 0.2403, + "epoch": 0.18, + "grad_norm": 0.48920174311228604, + "learning_rate": 1.8878852184023754e-05, + "loss": 0.3611, "step": 3867 }, { - "epoch": 0.22, - "grad_norm": 0.4073612075479078, - "learning_rate": 1.812437748757557e-05, - "loss": 0.3583, + "epoch": 0.18, + "grad_norm": 0.39620543445130757, + "learning_rate": 1.8878167546126856e-05, + "loss": 0.2995, "step": 3868 }, { - "epoch": 0.22, - "grad_norm": 1.1775040168059026, - "learning_rate": 1.8123292345643203e-05, - "loss": 0.7311, + "epoch": 0.18, + "grad_norm": 1.1037585380451898, + "learning_rate": 1.8877482711675884e-05, + "loss": 0.4111, "step": 3869 }, { - "epoch": 0.22, - "grad_norm": 0.3359065562504141, - "learning_rate": 1.8122206922401573e-05, - "loss": 0.2049, + "epoch": 0.18, + "grad_norm": 0.35130993083203155, + "learning_rate": 1.8876797680686e-05, + "loss": 0.2259, "step": 3870 }, { - "epoch": 0.22, - "grad_norm": 0.3035022075371208, - "learning_rate": 1.8121121217888268e-05, - "loss": 0.2616, + "epoch": 0.18, + "grad_norm": 0.5273308499525118, + "learning_rate": 1.8876112453172374e-05, + "loss": 0.285, "step": 3871 }, { - "epoch": 0.22, - "grad_norm": 0.4404755357892392, - "learning_rate": 1.8120035232140884e-05, - "loss": 0.3895, + "epoch": 0.18, + "grad_norm": 0.608234119264191, + "learning_rate": 1.887542702915017e-05, + "loss": 0.3734, "step": 3872 }, { - "epoch": 0.22, - "grad_norm": 0.3304185548692813, - "learning_rate": 1.8118948965197027e-05, - "loss": 0.2378, + "epoch": 0.18, + "grad_norm": 0.7127655731209267, + "learning_rate": 1.887474140863457e-05, + "loss": 0.4171, "step": 3873 }, { - "epoch": 0.22, - "grad_norm": 1.1038158285453556, - "learning_rate": 1.8117862417094318e-05, - "loss": 0.8049, + "epoch": 0.18, + "grad_norm": 0.5279635456035189, + "learning_rate": 1.8874055591640746e-05, + "loss": 0.3436, "step": 3874 }, { - "epoch": 0.22, - "grad_norm": 0.47637000688014897, - "learning_rate": 1.811677558787038e-05, - "loss": 0.4025, + "epoch": 0.18, + "grad_norm": 0.7064370602473943, + "learning_rate": 1.8873369578183883e-05, + "loss": 0.3274, "step": 3875 }, { - "epoch": 0.22, - "grad_norm": 0.3109465314784538, - "learning_rate": 1.8115688477562855e-05, - "loss": 0.27, + "epoch": 0.18, + "grad_norm": 0.32552355853185594, + "learning_rate": 1.887268336827917e-05, + "loss": 0.0776, "step": 3876 }, { - "epoch": 0.22, - "grad_norm": 0.2238118146029211, - "learning_rate": 1.8114601086209387e-05, - "loss": 0.137, + "epoch": 0.18, + "grad_norm": 0.4353113290739778, + "learning_rate": 1.88719969619418e-05, + "loss": 0.2982, "step": 3877 }, { - "epoch": 0.22, - "grad_norm": 0.6389128213112062, - "learning_rate": 1.8113513413847634e-05, - "loss": 0.4794, + "epoch": 0.18, + "grad_norm": 0.6465512140580408, + "learning_rate": 1.8871310359186968e-05, + "loss": 0.4675, "step": 3878 }, { - "epoch": 0.22, - "grad_norm": 0.9375943093100196, - "learning_rate": 1.811242546051526e-05, - "loss": 0.2699, + "epoch": 0.18, + "grad_norm": 0.5136237005851464, + "learning_rate": 1.8870623560029875e-05, + "loss": 0.3179, "step": 3879 }, { - "epoch": 0.22, - "grad_norm": 0.4320199364520138, - "learning_rate": 1.811133722624994e-05, - "loss": 0.2924, + "epoch": 0.18, + "grad_norm": 0.5428223674298324, + "learning_rate": 1.8869936564485726e-05, + "loss": 0.3759, "step": 3880 }, { - "epoch": 0.22, - "grad_norm": 1.0738585193981018, - "learning_rate": 1.811024871108936e-05, - "loss": 0.5673, + "epoch": 0.18, + "grad_norm": 0.5489668008826314, + "learning_rate": 1.8869249372569732e-05, + "loss": 0.38, "step": 3881 }, { - "epoch": 0.22, - "grad_norm": 0.4571929492755686, - "learning_rate": 1.8109159915071215e-05, - "loss": 0.3033, + "epoch": 0.18, + "grad_norm": 0.3717546248072272, + "learning_rate": 1.8868561984297104e-05, + "loss": 0.1922, "step": 3882 }, { - "epoch": 0.22, - "grad_norm": 0.40435453226191476, - "learning_rate": 1.810807083823321e-05, - "loss": 0.3142, + "epoch": 0.18, + "grad_norm": 0.5037923779674243, + "learning_rate": 1.886787439968306e-05, + "loss": 0.2435, "step": 3883 }, { - "epoch": 0.22, - "grad_norm": 0.3231601205057292, - "learning_rate": 1.8106981480613063e-05, - "loss": 0.2424, + "epoch": 0.18, + "grad_norm": 1.4710045643917589, + "learning_rate": 1.8867186618742822e-05, + "loss": 0.8185, "step": 3884 }, { - "epoch": 0.22, - "grad_norm": 0.47123062190681675, - "learning_rate": 1.8105891842248496e-05, - "loss": 0.3023, + "epoch": 0.18, + "grad_norm": 0.7708456774429172, + "learning_rate": 1.8866498641491625e-05, + "loss": 0.5095, "step": 3885 }, { - "epoch": 0.22, - "grad_norm": 0.5011503701732182, - "learning_rate": 1.810480192317724e-05, - "loss": 0.3053, + "epoch": 0.18, + "grad_norm": 0.37758381447136374, + "learning_rate": 1.886581046794469e-05, + "loss": 0.2429, "step": 3886 }, { - "epoch": 0.22, - "grad_norm": 0.5743534405718205, - "learning_rate": 1.8103711723437048e-05, - "loss": 0.4042, + "epoch": 0.18, + "grad_norm": 0.4885306099521553, + "learning_rate": 1.8865122098117254e-05, + "loss": 0.3459, "step": 3887 }, { - "epoch": 0.22, - "grad_norm": 0.3408418367503883, - "learning_rate": 1.8102621243065665e-05, - "loss": 0.2877, + "epoch": 0.18, + "grad_norm": 0.5470028462415637, + "learning_rate": 1.886443353202456e-05, + "loss": 0.3385, "step": 3888 }, { - "epoch": 0.22, - "grad_norm": 0.6381779781359501, - "learning_rate": 1.8101530482100855e-05, - "loss": 0.4795, + "epoch": 0.18, + "grad_norm": 0.40661722093809755, + "learning_rate": 1.8863744769681855e-05, + "loss": 0.1665, "step": 3889 }, { - "epoch": 0.22, - "grad_norm": 0.2908090921032573, - "learning_rate": 1.8100439440580393e-05, - "loss": 0.1469, + "epoch": 0.18, + "grad_norm": 0.4238715713843672, + "learning_rate": 1.886305581110438e-05, + "loss": 0.3671, "step": 3890 }, { - "epoch": 0.22, - "grad_norm": 0.4231337495748093, - "learning_rate": 1.809934811854206e-05, - "loss": 0.3074, + "epoch": 0.18, + "grad_norm": 0.4846235788549622, + "learning_rate": 1.8862366656307394e-05, + "loss": 0.3636, "step": 3891 }, { - "epoch": 0.22, - "grad_norm": 0.3200805794748924, - "learning_rate": 1.8098256516023654e-05, - "loss": 0.2839, + "epoch": 0.18, + "grad_norm": 0.41183815855461015, + "learning_rate": 1.886167730530615e-05, + "loss": 0.1101, "step": 3892 }, { - "epoch": 0.22, - "grad_norm": 1.011602052709592, - "learning_rate": 1.809716463306297e-05, - "loss": 0.2713, + "epoch": 0.18, + "grad_norm": 0.5399374011104692, + "learning_rate": 1.8860987758115913e-05, + "loss": 0.3803, "step": 3893 }, { - "epoch": 0.22, - "grad_norm": 0.3933605424410423, - "learning_rate": 1.8096072469697822e-05, - "loss": 0.324, + "epoch": 0.18, + "grad_norm": 0.380833251564716, + "learning_rate": 1.8860298014751947e-05, + "loss": 0.2513, "step": 3894 }, { - "epoch": 0.22, - "grad_norm": 0.7690843943555367, - "learning_rate": 1.8094980025966036e-05, - "loss": 0.4747, + "epoch": 0.18, + "grad_norm": 0.4729171085864037, + "learning_rate": 1.885960807522952e-05, + "loss": 0.2761, "step": 3895 }, { - "epoch": 0.22, - "grad_norm": 0.3765206416318077, - "learning_rate": 1.809388730190544e-05, - "loss": 0.2389, + "epoch": 0.18, + "grad_norm": 0.4299675158107653, + "learning_rate": 1.8858917939563914e-05, + "loss": 0.306, "step": 3896 }, { - "epoch": 0.22, - "grad_norm": 0.3835611886266919, - "learning_rate": 1.8092794297553873e-05, - "loss": 0.28, + "epoch": 0.18, + "grad_norm": 0.7221040664145163, + "learning_rate": 1.8858227607770398e-05, + "loss": 0.5218, "step": 3897 }, { - "epoch": 0.22, - "grad_norm": 0.4180873590762182, - "learning_rate": 1.8091701012949187e-05, - "loss": 0.2725, + "epoch": 0.18, + "grad_norm": 0.39728819416102146, + "learning_rate": 1.8857537079864265e-05, + "loss": 0.1902, "step": 3898 }, { - "epoch": 0.22, - "grad_norm": 0.44449937407460755, - "learning_rate": 1.8090607448129244e-05, - "loss": 0.3152, + "epoch": 0.18, + "grad_norm": 0.3994820490714649, + "learning_rate": 1.8856846355860796e-05, + "loss": 0.2599, "step": 3899 }, { - "epoch": 0.22, - "grad_norm": 0.43700527552620777, - "learning_rate": 1.8089513603131913e-05, - "loss": 0.301, + "epoch": 0.18, + "grad_norm": 1.1052495011921455, + "learning_rate": 1.8856155435775284e-05, + "loss": 0.6198, "step": 3900 }, { - "epoch": 0.22, - "grad_norm": 0.6587341669479327, - "learning_rate": 1.808841947799507e-05, - "loss": 0.4034, + "epoch": 0.18, + "grad_norm": 0.6922221992822799, + "learning_rate": 1.8855464319623028e-05, + "loss": 0.435, "step": 3901 }, { - "epoch": 0.22, - "grad_norm": 1.2622215351655142, - "learning_rate": 1.808732507275661e-05, - "loss": 0.7012, + "epoch": 0.18, + "grad_norm": 0.4321182006922516, + "learning_rate": 1.8854773007419328e-05, + "loss": 0.2927, "step": 3902 }, { - "epoch": 0.22, - "grad_norm": 0.35778822861958404, - "learning_rate": 1.8086230387454434e-05, - "loss": 0.2038, + "epoch": 0.18, + "grad_norm": 0.4233142991833385, + "learning_rate": 1.8854081499179485e-05, + "loss": 0.367, "step": 3903 }, { - "epoch": 0.22, - "grad_norm": 0.41095801057868664, - "learning_rate": 1.8085135422126448e-05, - "loss": 0.2571, + "epoch": 0.18, + "grad_norm": 0.29217460573373005, + "learning_rate": 1.8853389794918816e-05, + "loss": 0.191, "step": 3904 }, { - "epoch": 0.22, - "grad_norm": 0.5132522096870761, - "learning_rate": 1.8084040176810567e-05, - "loss": 0.349, + "epoch": 0.18, + "grad_norm": 0.5217776676310971, + "learning_rate": 1.8852697894652623e-05, + "loss": 0.2958, "step": 3905 }, { - "epoch": 0.22, - "grad_norm": 0.4841833694973949, - "learning_rate": 1.808294465154472e-05, - "loss": 0.2723, + "epoch": 0.18, + "grad_norm": 0.42087506173347405, + "learning_rate": 1.8852005798396236e-05, + "loss": 0.3757, "step": 3906 }, { - "epoch": 0.22, - "grad_norm": 0.4857845170840667, - "learning_rate": 1.8081848846366852e-05, - "loss": 0.3875, + "epoch": 0.18, + "grad_norm": 0.8375875796325938, + "learning_rate": 1.885131350616497e-05, + "loss": 0.4458, "step": 3907 }, { - "epoch": 0.22, - "grad_norm": 0.5247363261757332, - "learning_rate": 1.8080752761314904e-05, - "loss": 0.3969, + "epoch": 0.18, + "grad_norm": 0.38043122864022877, + "learning_rate": 1.8850621017974157e-05, + "loss": 0.319, "step": 3908 }, { - "epoch": 0.22, - "grad_norm": 0.34761712484213264, - "learning_rate": 1.8079656396426834e-05, - "loss": 0.2346, + "epoch": 0.18, + "grad_norm": 0.3482098235881958, + "learning_rate": 1.8849928333839124e-05, + "loss": 0.2111, "step": 3909 }, { - "epoch": 0.22, - "grad_norm": 0.2940198797850371, - "learning_rate": 1.807855975174061e-05, - "loss": 0.1858, + "epoch": 0.18, + "grad_norm": 0.5169427562811084, + "learning_rate": 1.8849235453775202e-05, + "loss": 0.3115, "step": 3910 }, { - "epoch": 0.22, - "grad_norm": 0.40981413692161606, - "learning_rate": 1.8077462827294214e-05, - "loss": 0.3515, + "epoch": 0.18, + "grad_norm": 0.41956398094053515, + "learning_rate": 1.884854237779774e-05, + "loss": 0.3244, "step": 3911 }, { - "epoch": 0.22, - "grad_norm": 0.3987787782958305, - "learning_rate": 1.8076365623125625e-05, - "loss": 0.2753, + "epoch": 0.18, + "grad_norm": 0.7759240460824689, + "learning_rate": 1.884784910592208e-05, + "loss": 0.394, "step": 3912 }, { - "epoch": 0.22, - "grad_norm": 0.8541449017563671, - "learning_rate": 1.8075268139272842e-05, - "loss": 0.5343, + "epoch": 0.18, + "grad_norm": 0.5961789640505579, + "learning_rate": 1.884715563816357e-05, + "loss": 0.4292, "step": 3913 }, { - "epoch": 0.22, - "grad_norm": 1.122921649883041, - "learning_rate": 1.8074170375773867e-05, - "loss": 0.7047, + "epoch": 0.18, + "grad_norm": 0.4079529022437299, + "learning_rate": 1.8846461974537555e-05, + "loss": 0.3269, "step": 3914 }, { - "epoch": 0.22, - "grad_norm": 0.337497593773467, - "learning_rate": 1.8073072332666723e-05, - "loss": 0.2768, + "epoch": 0.18, + "grad_norm": 0.45105514497906485, + "learning_rate": 1.88457681150594e-05, + "loss": 0.2733, "step": 3915 }, { - "epoch": 0.22, - "grad_norm": 0.4214408990448417, - "learning_rate": 1.807197400998943e-05, - "loss": 0.257, + "epoch": 0.18, + "grad_norm": 0.33294928967557225, + "learning_rate": 1.884507405974447e-05, + "loss": 0.1638, "step": 3916 }, { - "epoch": 0.23, - "grad_norm": 0.6049790373030124, - "learning_rate": 1.8070875407780026e-05, - "loss": 0.355, + "epoch": 0.18, + "grad_norm": 0.4607174219785187, + "learning_rate": 1.884437980860812e-05, + "loss": 0.3391, "step": 3917 }, { - "epoch": 0.23, - "grad_norm": 0.30708101652710945, - "learning_rate": 1.806977652607655e-05, - "loss": 0.1988, + "epoch": 0.18, + "grad_norm": 0.5605722332414717, + "learning_rate": 1.8843685361665724e-05, + "loss": 0.2816, "step": 3918 }, { - "epoch": 0.23, - "grad_norm": 0.4084061861810741, - "learning_rate": 1.8068677364917063e-05, - "loss": 0.283, + "epoch": 0.18, + "grad_norm": 0.4209384522413471, + "learning_rate": 1.884299071893266e-05, + "loss": 0.3579, "step": 3919 }, { - "epoch": 0.23, - "grad_norm": 0.626383104086622, - "learning_rate": 1.806757792433962e-05, - "loss": 0.4173, + "epoch": 0.18, + "grad_norm": 0.43460967310403115, + "learning_rate": 1.8842295880424305e-05, + "loss": 0.345, "step": 3920 }, { - "epoch": 0.23, - "grad_norm": 0.6969864507969111, - "learning_rate": 1.806647820438231e-05, - "loss": 0.4274, + "epoch": 0.18, + "grad_norm": 1.0217785382138418, + "learning_rate": 1.884160084615604e-05, + "loss": 0.6139, "step": 3921 }, { - "epoch": 0.23, - "grad_norm": 0.3530384228236583, - "learning_rate": 1.8065378205083202e-05, - "loss": 0.316, + "epoch": 0.18, + "grad_norm": 0.26862609546890037, + "learning_rate": 1.884090561614326e-05, + "loss": 0.2095, "step": 3922 }, { - "epoch": 0.23, - "grad_norm": 0.4005606977157132, - "learning_rate": 1.8064277926480392e-05, - "loss": 0.3583, + "epoch": 0.18, + "grad_norm": 0.3684892915603788, + "learning_rate": 1.884021019040134e-05, + "loss": 0.239, "step": 3923 }, { - "epoch": 0.23, - "grad_norm": 0.31165153005202517, - "learning_rate": 1.8063177368611988e-05, - "loss": 0.214, + "epoch": 0.18, + "grad_norm": 0.8032374201687222, + "learning_rate": 1.8839514568945695e-05, + "loss": 0.5168, "step": 3924 }, { - "epoch": 0.23, - "grad_norm": 0.33150213382715915, - "learning_rate": 1.8062076531516094e-05, - "loss": 0.2387, + "epoch": 0.18, + "grad_norm": 0.6035613389473641, + "learning_rate": 1.8838818751791715e-05, + "loss": 0.3413, "step": 3925 }, { - "epoch": 0.23, - "grad_norm": 0.8408216515743814, - "learning_rate": 1.806097541523084e-05, - "loss": 0.4223, + "epoch": 0.18, + "grad_norm": 0.3967095742381883, + "learning_rate": 1.8838122738954808e-05, + "loss": 0.3016, "step": 3926 }, { - "epoch": 0.23, - "grad_norm": 0.43962821527072266, - "learning_rate": 1.8059874019794352e-05, - "loss": 0.3006, + "epoch": 0.18, + "grad_norm": 0.4420037435702996, + "learning_rate": 1.883742653045038e-05, + "loss": 0.3321, "step": 3927 }, { - "epoch": 0.23, - "grad_norm": 0.4049589074514208, - "learning_rate": 1.8058772345244775e-05, - "loss": 0.3666, + "epoch": 0.18, + "grad_norm": 0.2352552598300315, + "learning_rate": 1.8836730126293847e-05, + "loss": 0.1002, "step": 3928 }, { - "epoch": 0.23, - "grad_norm": 0.6436948491887067, - "learning_rate": 1.8057670391620258e-05, - "loss": 0.3893, + "epoch": 0.18, + "grad_norm": 0.39331906113518034, + "learning_rate": 1.8836033526500624e-05, + "loss": 0.2964, "step": 3929 }, { - "epoch": 0.23, - "grad_norm": 0.26980658996097057, - "learning_rate": 1.8056568158958958e-05, - "loss": 0.194, + "epoch": 0.18, + "grad_norm": 0.5226894532481836, + "learning_rate": 1.883533673108614e-05, + "loss": 0.366, "step": 3930 }, { - "epoch": 0.23, - "grad_norm": 0.5857657539673209, - "learning_rate": 1.8055465647299052e-05, - "loss": 0.4342, + "epoch": 0.18, + "grad_norm": 0.9833547568042474, + "learning_rate": 1.883463974006581e-05, + "loss": 0.3414, "step": 3931 }, { - "epoch": 0.23, - "grad_norm": 0.3769739875049705, - "learning_rate": 1.805436285667872e-05, - "loss": 0.2869, + "epoch": 0.18, + "grad_norm": 0.39721134128298635, + "learning_rate": 1.8833942553455073e-05, + "loss": 0.3309, "step": 3932 }, { - "epoch": 0.23, - "grad_norm": 0.44323285132468504, - "learning_rate": 1.8053259787136144e-05, - "loss": 0.3503, + "epoch": 0.18, + "grad_norm": 1.439852403871312, + "learning_rate": 1.883324517126936e-05, + "loss": 0.7941, "step": 3933 }, { - "epoch": 0.23, - "grad_norm": 0.5264863084482408, - "learning_rate": 1.8052156438709527e-05, - "loss": 0.3917, + "epoch": 0.18, + "grad_norm": 0.3435326846372854, + "learning_rate": 1.8832547593524116e-05, + "loss": 0.2686, "step": 3934 }, { - "epoch": 0.23, - "grad_norm": 0.3824530834229984, - "learning_rate": 1.805105281143708e-05, - "loss": 0.3065, + "epoch": 0.18, + "grad_norm": 0.28307856292848027, + "learning_rate": 1.8831849820234782e-05, + "loss": 0.1787, "step": 3935 }, { - "epoch": 0.23, - "grad_norm": 0.39071495256736205, - "learning_rate": 1.8049948905357023e-05, - "loss": 0.1898, + "epoch": 0.18, + "grad_norm": 0.853265183821286, + "learning_rate": 1.8831151851416803e-05, + "loss": 0.5033, "step": 3936 }, { - "epoch": 0.23, - "grad_norm": 0.3091528912278944, - "learning_rate": 1.804884472050758e-05, - "loss": 0.2312, + "epoch": 0.18, + "grad_norm": 0.7803789230857403, + "learning_rate": 1.8830453687085636e-05, + "loss": 0.4685, "step": 3937 }, { - "epoch": 0.23, - "grad_norm": 0.9481919338379736, - "learning_rate": 1.8047740256926993e-05, - "loss": 0.6748, + "epoch": 0.18, + "grad_norm": 0.42602051284773895, + "learning_rate": 1.8829755327256736e-05, + "loss": 0.235, "step": 3938 }, { - "epoch": 0.23, - "grad_norm": 0.35422805559962023, - "learning_rate": 1.8046635514653505e-05, - "loss": 0.2324, + "epoch": 0.18, + "grad_norm": 0.4847530285218601, + "learning_rate": 1.882905677194556e-05, + "loss": 0.3517, "step": 3939 }, { - "epoch": 0.23, - "grad_norm": 0.412781947395747, - "learning_rate": 1.8045530493725375e-05, - "loss": 0.3477, + "epoch": 0.18, + "grad_norm": 0.2970371314718206, + "learning_rate": 1.8828358021167576e-05, + "loss": 0.1724, "step": 3940 }, { - "epoch": 0.23, - "grad_norm": 0.7610604686105258, - "learning_rate": 1.8044425194180868e-05, - "loss": 0.5374, + "epoch": 0.18, + "grad_norm": 0.39085924878888245, + "learning_rate": 1.8827659074938256e-05, + "loss": 0.2169, "step": 3941 }, { - "epoch": 0.23, - "grad_norm": 0.23494787925537458, - "learning_rate": 1.8043319616058266e-05, - "loss": 0.0743, + "epoch": 0.18, + "grad_norm": 0.5718488465816467, + "learning_rate": 1.8826959933273074e-05, + "loss": 0.3857, "step": 3942 }, { - "epoch": 0.23, - "grad_norm": 0.32367312358901884, - "learning_rate": 1.804221375939585e-05, - "loss": 0.2955, + "epoch": 0.18, + "grad_norm": 1.1427216333066101, + "learning_rate": 1.8826260596187505e-05, + "loss": 0.5974, "step": 3943 }, { - "epoch": 0.23, - "grad_norm": 0.9361586658175016, - "learning_rate": 1.8041107624231916e-05, - "loss": 0.6321, + "epoch": 0.18, + "grad_norm": 0.3818328714214559, + "learning_rate": 1.882556106369703e-05, + "loss": 0.2215, "step": 3944 }, { - "epoch": 0.23, - "grad_norm": 0.4736112307448519, - "learning_rate": 1.804000121060477e-05, - "loss": 0.3214, + "epoch": 0.18, + "grad_norm": 1.1751020398434424, + "learning_rate": 1.8824861335817145e-05, + "loss": 0.6769, "step": 3945 }, { - "epoch": 0.23, - "grad_norm": 0.4152969262120841, - "learning_rate": 1.803889451855273e-05, - "loss": 0.346, + "epoch": 0.18, + "grad_norm": 0.4465930326278031, + "learning_rate": 1.8824161412563333e-05, + "loss": 0.3592, "step": 3946 }, { - "epoch": 0.23, - "grad_norm": 0.425406663947165, - "learning_rate": 1.8037787548114122e-05, - "loss": 0.3407, + "epoch": 0.18, + "grad_norm": 0.3206620712832924, + "learning_rate": 1.8823461293951092e-05, + "loss": 0.2402, "step": 3947 }, { - "epoch": 0.23, - "grad_norm": 0.3436781987502028, - "learning_rate": 1.8036680299327273e-05, - "loss": 0.1941, + "epoch": 0.18, + "grad_norm": 0.709438796656978, + "learning_rate": 1.8822760979995923e-05, + "loss": 0.3202, "step": 3948 }, { - "epoch": 0.23, - "grad_norm": 0.2759805308240463, - "learning_rate": 1.8035572772230526e-05, - "loss": 0.1915, + "epoch": 0.18, + "grad_norm": 1.583815003891385, + "learning_rate": 1.8822060470713327e-05, + "loss": 0.8699, "step": 3949 }, { - "epoch": 0.23, - "grad_norm": 0.7343999781835354, - "learning_rate": 1.8034464966862247e-05, - "loss": 0.5131, + "epoch": 0.18, + "grad_norm": 0.3904867367574193, + "learning_rate": 1.8821359766118817e-05, + "loss": 0.3093, "step": 3950 }, { - "epoch": 0.23, - "grad_norm": 0.3709341545619263, - "learning_rate": 1.8033356883260786e-05, - "loss": 0.3157, + "epoch": 0.18, + "grad_norm": 0.5167241733809467, + "learning_rate": 1.8820658866227902e-05, + "loss": 0.3045, "step": 3951 }, { - "epoch": 0.23, - "grad_norm": 0.3961664624622881, - "learning_rate": 1.8032248521464526e-05, - "loss": 0.301, + "epoch": 0.18, + "grad_norm": 0.7646413270054491, + "learning_rate": 1.8819957771056106e-05, + "loss": 0.535, "step": 3952 }, { - "epoch": 0.23, - "grad_norm": 0.9898774268867264, - "learning_rate": 1.8031139881511844e-05, - "loss": 0.6261, + "epoch": 0.18, + "grad_norm": 0.3940579288068486, + "learning_rate": 1.881925648061894e-05, + "loss": 0.2932, "step": 3953 }, { - "epoch": 0.23, - "grad_norm": 0.39230522301924536, - "learning_rate": 1.8030030963441133e-05, - "loss": 0.2662, + "epoch": 0.18, + "grad_norm": 0.48095373274264436, + "learning_rate": 1.8818554994931938e-05, + "loss": 0.3203, "step": 3954 }, { - "epoch": 0.23, - "grad_norm": 0.28249473186971336, - "learning_rate": 1.8028921767290796e-05, - "loss": 0.2525, + "epoch": 0.18, + "grad_norm": 0.369192953693975, + "learning_rate": 1.881785331401063e-05, + "loss": 0.2235, "step": 3955 }, { - "epoch": 0.23, - "grad_norm": 0.5699095328371475, - "learning_rate": 1.802781229309924e-05, - "loss": 0.3755, + "epoch": 0.18, + "grad_norm": 0.43588704969082, + "learning_rate": 1.8817151437870544e-05, + "loss": 0.2705, "step": 3956 }, { - "epoch": 0.23, - "grad_norm": 0.7814378890464383, - "learning_rate": 1.8026702540904893e-05, - "loss": 0.4934, + "epoch": 0.18, + "grad_norm": 0.9923244449979725, + "learning_rate": 1.881644936652723e-05, + "loss": 0.4432, "step": 3957 }, { - "epoch": 0.23, - "grad_norm": 0.31906069339089543, - "learning_rate": 1.802559251074618e-05, - "loss": 0.2279, + "epoch": 0.18, + "grad_norm": 0.3960451843575307, + "learning_rate": 1.8815747099996216e-05, + "loss": 0.3275, "step": 3958 }, { - "epoch": 0.23, - "grad_norm": 0.4300544687060639, - "learning_rate": 1.8024482202661544e-05, - "loss": 0.3191, + "epoch": 0.18, + "grad_norm": 0.43814326696069184, + "learning_rate": 1.8815044638293064e-05, + "loss": 0.282, "step": 3959 }, { - "epoch": 0.23, - "grad_norm": 0.7882393206382694, - "learning_rate": 1.8023371616689437e-05, - "loss": 0.4986, + "epoch": 0.18, + "grad_norm": 0.48216360379279827, + "learning_rate": 1.8814341981433318e-05, + "loss": 0.3669, "step": 3960 }, { - "epoch": 0.23, - "grad_norm": 0.2288574775693125, - "learning_rate": 1.8022260752868314e-05, - "loss": 0.1694, + "epoch": 0.18, + "grad_norm": 0.3066220899732937, + "learning_rate": 1.8813639129432532e-05, + "loss": 0.2072, "step": 3961 }, { - "epoch": 0.23, - "grad_norm": 1.129884549557266, - "learning_rate": 1.8021149611236644e-05, - "loss": 0.5047, + "epoch": 0.18, + "grad_norm": 0.4020740979705203, + "learning_rate": 1.8812936082306275e-05, + "loss": 0.2816, "step": 3962 }, { - "epoch": 0.23, - "grad_norm": 0.3673895981661131, - "learning_rate": 1.8020038191832912e-05, - "loss": 0.3243, + "epoch": 0.18, + "grad_norm": 0.5933214722023372, + "learning_rate": 1.8812232840070106e-05, + "loss": 0.406, "step": 3963 }, { - "epoch": 0.23, - "grad_norm": 0.3960014025969493, - "learning_rate": 1.80189264946956e-05, - "loss": 0.3041, + "epoch": 0.18, + "grad_norm": 0.7013550338251123, + "learning_rate": 1.8811529402739596e-05, + "loss": 0.4013, "step": 3964 }, { - "epoch": 0.23, - "grad_norm": 0.7265493417525126, - "learning_rate": 1.8017814519863206e-05, - "loss": 0.2853, + "epoch": 0.18, + "grad_norm": 0.3862024184424891, + "learning_rate": 1.8810825770330317e-05, + "loss": 0.3058, "step": 3965 }, { - "epoch": 0.23, - "grad_norm": 0.4288534438453486, - "learning_rate": 1.8016702267374243e-05, - "loss": 0.3599, + "epoch": 0.18, + "grad_norm": 0.4144961077843554, + "learning_rate": 1.8810121942857848e-05, + "loss": 0.3245, "step": 3966 }, { - "epoch": 0.23, - "grad_norm": 0.3331264382746232, - "learning_rate": 1.8015589737267226e-05, - "loss": 0.2876, + "epoch": 0.18, + "grad_norm": 0.24528902713484785, + "learning_rate": 1.880941792033777e-05, + "loss": 0.1027, "step": 3967 }, { - "epoch": 0.23, - "grad_norm": 0.43022925309376164, - "learning_rate": 1.801447692958068e-05, - "loss": 0.2281, + "epoch": 0.18, + "grad_norm": 0.393894016662953, + "learning_rate": 1.880871370278567e-05, + "loss": 0.2989, "step": 3968 }, { - "epoch": 0.23, - "grad_norm": 0.5957577630302175, - "learning_rate": 1.801336384435314e-05, - "loss": 0.3927, + "epoch": 0.18, + "grad_norm": 1.3351816873229205, + "learning_rate": 1.880800929021714e-05, + "loss": 0.8207, "step": 3969 }, { - "epoch": 0.23, - "grad_norm": 0.4640169255196903, - "learning_rate": 1.8012250481623158e-05, - "loss": 0.3406, + "epoch": 0.18, + "grad_norm": 0.4223318351673896, + "learning_rate": 1.880730468264777e-05, + "loss": 0.3268, "step": 3970 }, { - "epoch": 0.23, - "grad_norm": 0.39945224996755907, - "learning_rate": 1.8011136841429285e-05, - "loss": 0.2894, + "epoch": 0.18, + "grad_norm": 0.42740360955404016, + "learning_rate": 1.8806599880093163e-05, + "loss": 0.3379, "step": 3971 }, { - "epoch": 0.23, - "grad_norm": 0.8005686154322458, - "learning_rate": 1.801002292381009e-05, - "loss": 0.4909, + "epoch": 0.18, + "grad_norm": 1.121006182489048, + "learning_rate": 1.8805894882568926e-05, + "loss": 0.6971, "step": 3972 }, { - "epoch": 0.23, - "grad_norm": 0.33287331011769583, - "learning_rate": 1.800890872880414e-05, - "loss": 0.2381, + "epoch": 0.18, + "grad_norm": 0.3441684976160639, + "learning_rate": 1.880518969009066e-05, + "loss": 0.2092, "step": 3973 }, { - "epoch": 0.23, - "grad_norm": 0.36297847553286217, - "learning_rate": 1.8007794256450027e-05, - "loss": 0.3012, + "epoch": 0.18, + "grad_norm": 0.3909019338070255, + "learning_rate": 1.8804484302673982e-05, + "loss": 0.2264, "step": 3974 }, { - "epoch": 0.23, - "grad_norm": 0.5849863340592387, - "learning_rate": 1.800667950678635e-05, - "loss": 0.4239, + "epoch": 0.18, + "grad_norm": 0.63977440618754, + "learning_rate": 1.8803778720334512e-05, + "loss": 0.3957, "step": 3975 }, { - "epoch": 0.23, - "grad_norm": 0.378342194413626, - "learning_rate": 1.8005564479851697e-05, - "loss": 0.2682, + "epoch": 0.18, + "grad_norm": 0.6828658953875684, + "learning_rate": 1.8803072943087862e-05, + "loss": 0.4941, "step": 3976 }, { - "epoch": 0.23, - "grad_norm": 0.7952623423604379, - "learning_rate": 1.8004449175684697e-05, - "loss": 0.6006, + "epoch": 0.18, + "grad_norm": 0.3809574398008861, + "learning_rate": 1.8802366970949658e-05, + "loss": 0.2071, "step": 3977 }, { - "epoch": 0.23, - "grad_norm": 0.4097070482739936, - "learning_rate": 1.8003333594323962e-05, - "loss": 0.2736, + "epoch": 0.18, + "grad_norm": 0.342393643855471, + "learning_rate": 1.880166080393554e-05, + "loss": 0.3061, "step": 3978 }, { - "epoch": 0.23, - "grad_norm": 0.3728346283618812, - "learning_rate": 1.800221773580813e-05, - "loss": 0.2866, + "epoch": 0.18, + "grad_norm": 0.5528930196264737, + "learning_rate": 1.880095444206113e-05, + "loss": 0.3781, "step": 3979 }, { - "epoch": 0.23, - "grad_norm": 0.5766114897185256, - "learning_rate": 1.8001101600175843e-05, - "loss": 0.4659, + "epoch": 0.18, + "grad_norm": 0.38230488258442985, + "learning_rate": 1.8800247885342074e-05, + "loss": 0.2143, "step": 3980 }, { - "epoch": 0.23, - "grad_norm": 0.26613403625490034, - "learning_rate": 1.799998518746575e-05, - "loss": 0.1474, + "epoch": 0.18, + "grad_norm": 0.47129070528425276, + "learning_rate": 1.879954113379401e-05, + "loss": 0.3592, "step": 3981 }, { - "epoch": 0.23, - "grad_norm": 0.31031290429059133, - "learning_rate": 1.7998868497716516e-05, - "loss": 0.2421, + "epoch": 0.18, + "grad_norm": 0.43917426646226443, + "learning_rate": 1.879883418743259e-05, + "loss": 0.3466, "step": 3982 }, { - "epoch": 0.23, - "grad_norm": 0.4392924725322208, - "learning_rate": 1.7997751530966806e-05, - "loss": 0.3419, + "epoch": 0.18, + "grad_norm": 0.37607739445703614, + "learning_rate": 1.8798127046273457e-05, + "loss": 0.2165, "step": 3983 }, { - "epoch": 0.23, - "grad_norm": 0.5462229687452343, - "learning_rate": 1.799663428725531e-05, - "loss": 0.2927, + "epoch": 0.18, + "grad_norm": 0.9513792039567668, + "learning_rate": 1.8797419710332274e-05, + "loss": 0.5904, "step": 3984 }, { - "epoch": 0.23, - "grad_norm": 0.39202509019051524, - "learning_rate": 1.7995516766620706e-05, - "loss": 0.3135, + "epoch": 0.18, + "grad_norm": 1.583406491013997, + "learning_rate": 1.8796712179624695e-05, + "loss": 0.8648, "step": 3985 }, { - "epoch": 0.23, - "grad_norm": 0.7239484639028205, - "learning_rate": 1.7994398969101704e-05, - "loss": 0.5246, + "epoch": 0.18, + "grad_norm": 0.31664098503101346, + "learning_rate": 1.8796004454166386e-05, + "loss": 0.2788, "step": 3986 }, { - "epoch": 0.23, - "grad_norm": 0.3566107377592294, - "learning_rate": 1.799328089473701e-05, - "loss": 0.2878, + "epoch": 0.18, + "grad_norm": 0.3429370441062218, + "learning_rate": 1.879529653397302e-05, + "loss": 0.2118, "step": 3987 }, { - "epoch": 0.23, - "grad_norm": 0.4206515109068827, - "learning_rate": 1.799216254356534e-05, - "loss": 0.3109, + "epoch": 0.18, + "grad_norm": 0.44357907170776373, + "learning_rate": 1.8794588419060266e-05, + "loss": 0.3543, "step": 3988 }, { - "epoch": 0.23, - "grad_norm": 0.337123993090019, - "learning_rate": 1.7991043915625427e-05, - "loss": 0.2135, + "epoch": 0.18, + "grad_norm": 0.4359297537588003, + "learning_rate": 1.8793880109443797e-05, + "loss": 0.3024, "step": 3989 }, { - "epoch": 0.23, - "grad_norm": 0.34315829448210505, - "learning_rate": 1.798992501095601e-05, - "loss": 0.3353, + "epoch": 0.18, + "grad_norm": 0.459443742994942, + "learning_rate": 1.87931716051393e-05, + "loss": 0.2728, "step": 3990 }, { - "epoch": 0.23, - "grad_norm": 0.36382392161356036, - "learning_rate": 1.7988805829595825e-05, - "loss": 0.2506, + "epoch": 0.18, + "grad_norm": 0.9940553723656974, + "learning_rate": 1.879246290616246e-05, + "loss": 0.6284, "step": 3991 }, { - "epoch": 0.23, - "grad_norm": 0.6713822788963748, - "learning_rate": 1.7987686371583643e-05, - "loss": 0.501, + "epoch": 0.18, + "grad_norm": 0.47324194536475916, + "learning_rate": 1.8791754012528962e-05, + "loss": 0.2911, "step": 3992 }, { - "epoch": 0.23, - "grad_norm": 1.326313126631407, - "learning_rate": 1.7986566636958228e-05, - "loss": 0.8042, + "epoch": 0.18, + "grad_norm": 0.48096273663155553, + "learning_rate": 1.8791044924254506e-05, + "loss": 0.3563, "step": 3993 }, { - "epoch": 0.23, - "grad_norm": 0.27371657393729876, - "learning_rate": 1.798544662575835e-05, - "loss": 0.188, + "epoch": 0.18, + "grad_norm": 0.2844311415627377, + "learning_rate": 1.8790335641354785e-05, + "loss": 0.2265, "step": 3994 }, { - "epoch": 0.23, - "grad_norm": 0.31879060982136537, - "learning_rate": 1.7984326338022797e-05, - "loss": 0.2921, + "epoch": 0.18, + "grad_norm": 0.772733129284855, + "learning_rate": 1.878962616384551e-05, + "loss": 0.4001, "step": 3995 }, { - "epoch": 0.23, - "grad_norm": 0.6619585978080218, - "learning_rate": 1.798320577379037e-05, - "loss": 0.4928, + "epoch": 0.18, + "grad_norm": 0.46483450329263487, + "learning_rate": 1.878891649174238e-05, + "loss": 0.3068, "step": 3996 }, { - "epoch": 0.23, - "grad_norm": 0.353273027323354, - "learning_rate": 1.7982084933099868e-05, - "loss": 0.2369, + "epoch": 0.18, + "grad_norm": 0.42198199768045014, + "learning_rate": 1.8788206625061113e-05, + "loss": 0.313, "step": 3997 }, { - "epoch": 0.23, - "grad_norm": 1.1077945691153839, - "learning_rate": 1.798096381599011e-05, - "loss": 0.6902, + "epoch": 0.18, + "grad_norm": 0.49150202576998525, + "learning_rate": 1.878749656381742e-05, + "loss": 0.3141, "step": 3998 }, { - "epoch": 0.23, - "grad_norm": 0.3950848276115181, - "learning_rate": 1.7979842422499917e-05, - "loss": 0.3657, + "epoch": 0.18, + "grad_norm": 0.6198365518355851, + "learning_rate": 1.878678630802702e-05, + "loss": 0.4014, "step": 3999 }, { - "epoch": 0.23, - "grad_norm": 0.298536345873338, - "learning_rate": 1.7978720752668123e-05, - "loss": 0.2263, + "epoch": 0.18, + "grad_norm": 0.2507835431138398, + "learning_rate": 1.8786075857705645e-05, + "loss": 0.1162, "step": 4000 }, { - "epoch": 0.23, - "grad_norm": 0.3103745986561787, - "learning_rate": 1.7977598806533575e-05, - "loss": 0.1444, + "epoch": 0.18, + "grad_norm": 0.43906986133311776, + "learning_rate": 1.8785365212869014e-05, + "loss": 0.3368, "step": 4001 }, { - "epoch": 0.23, - "grad_norm": 0.46143426008863875, - "learning_rate": 1.797647658413512e-05, - "loss": 0.3617, + "epoch": 0.18, + "grad_norm": 0.3619943054589335, + "learning_rate": 1.8784654373532867e-05, + "loss": 0.3196, "step": 4002 }, { - "epoch": 0.23, - "grad_norm": 0.35964308521923766, - "learning_rate": 1.7975354085511627e-05, - "loss": 0.3244, + "epoch": 0.18, + "grad_norm": 0.8933023955303412, + "learning_rate": 1.8783943339712937e-05, + "loss": 0.5193, "step": 4003 }, { - "epoch": 0.23, - "grad_norm": 1.0071718247568406, - "learning_rate": 1.7974231310701964e-05, - "loss": 0.0483, + "epoch": 0.18, + "grad_norm": 0.49435648601329596, + "learning_rate": 1.878323211142497e-05, + "loss": 0.3666, "step": 4004 }, { - "epoch": 0.23, - "grad_norm": 0.8956530240852543, - "learning_rate": 1.7973108259745012e-05, - "loss": 0.7035, + "epoch": 0.18, + "grad_norm": 0.6520068088457173, + "learning_rate": 1.8782520688684708e-05, + "loss": 0.4388, "step": 4005 }, { - "epoch": 0.23, - "grad_norm": 0.4631104114709448, - "learning_rate": 1.7971984932679663e-05, - "loss": 0.2886, + "epoch": 0.18, + "grad_norm": 0.2632204696032936, + "learning_rate": 1.8781809071507905e-05, + "loss": 0.1954, "step": 4006 }, { - "epoch": 0.23, - "grad_norm": 0.25131716487061634, - "learning_rate": 1.7970861329544823e-05, - "loss": 0.2199, + "epoch": 0.18, + "grad_norm": 0.4209301099390935, + "learning_rate": 1.878109725991031e-05, + "loss": 0.2624, "step": 4007 }, { - "epoch": 0.23, - "grad_norm": 0.6088567020101945, - "learning_rate": 1.7969737450379395e-05, - "loss": 0.4963, + "epoch": 0.18, + "grad_norm": 0.6354671119006841, + "learning_rate": 1.8780385253907683e-05, + "loss": 0.4402, "step": 4008 }, { - "epoch": 0.23, - "grad_norm": 0.6053261675860214, - "learning_rate": 1.7968613295222304e-05, - "loss": 0.2756, + "epoch": 0.18, + "grad_norm": 0.5536095833548786, + "learning_rate": 1.8779673053515794e-05, + "loss": 0.4206, "step": 4009 }, { - "epoch": 0.23, - "grad_norm": 0.3335128304816215, - "learning_rate": 1.7967488864112473e-05, - "loss": 0.2663, + "epoch": 0.18, + "grad_norm": 0.3450921443156242, + "learning_rate": 1.8778960658750406e-05, + "loss": 0.2437, "step": 4010 }, { - "epoch": 0.23, - "grad_norm": 1.4790286393120566, - "learning_rate": 1.7966364157088853e-05, - "loss": 0.8433, + "epoch": 0.18, + "grad_norm": 0.5914851412022977, + "learning_rate": 1.8778248069627288e-05, + "loss": 0.4123, "step": 4011 }, { - "epoch": 0.23, - "grad_norm": 0.3681446596417048, - "learning_rate": 1.7965239174190376e-05, - "loss": 0.2871, + "epoch": 0.18, + "grad_norm": 0.34498119809539757, + "learning_rate": 1.8777535286162217e-05, + "loss": 0.2332, "step": 4012 }, { - "epoch": 0.23, - "grad_norm": 0.6855409947925746, - "learning_rate": 1.7964113915456013e-05, - "loss": 0.5071, + "epoch": 0.18, + "grad_norm": 0.297294478664778, + "learning_rate": 1.8776822308370977e-05, + "loss": 0.0684, "step": 4013 }, { - "epoch": 0.23, - "grad_norm": 0.3131770028920056, - "learning_rate": 1.7962988380924727e-05, - "loss": 0.2426, + "epoch": 0.18, + "grad_norm": 0.4616099498090414, + "learning_rate": 1.877610913626935e-05, + "loss": 0.2927, "step": 4014 }, { - "epoch": 0.23, - "grad_norm": 0.39953225148360916, - "learning_rate": 1.7961862570635496e-05, - "loss": 0.2161, + "epoch": 0.18, + "grad_norm": 1.1792369777909457, + "learning_rate": 1.8775395769873124e-05, + "loss": 0.4645, "step": 4015 }, { - "epoch": 0.23, - "grad_norm": 1.0905675166530646, - "learning_rate": 1.7960736484627306e-05, - "loss": 0.5986, + "epoch": 0.18, + "grad_norm": 0.4927609070659288, + "learning_rate": 1.8774682209198092e-05, + "loss": 0.249, "step": 4016 }, { - "epoch": 0.23, - "grad_norm": 0.8487839708425405, - "learning_rate": 1.7959610122939155e-05, - "loss": 0.521, + "epoch": 0.18, + "grad_norm": 0.4674389794346865, + "learning_rate": 1.8773968454260055e-05, + "loss": 0.3306, "step": 4017 }, { - "epoch": 0.23, - "grad_norm": 0.35305893350636597, - "learning_rate": 1.7958483485610048e-05, - "loss": 0.275, + "epoch": 0.18, + "grad_norm": 0.6153935103914157, + "learning_rate": 1.8773254505074812e-05, + "loss": 0.3685, "step": 4018 }, { - "epoch": 0.23, - "grad_norm": 0.5051109731237906, - "learning_rate": 1.7957356572678998e-05, - "loss": 0.389, + "epoch": 0.18, + "grad_norm": 0.26515885277279033, + "learning_rate": 1.8772540361658172e-05, + "loss": 0.1478, "step": 4019 }, { - "epoch": 0.23, - "grad_norm": 0.37882249996544415, - "learning_rate": 1.7956229384185036e-05, - "loss": 0.217, + "epoch": 0.18, + "grad_norm": 0.6890604781427501, + "learning_rate": 1.8771826024025944e-05, + "loss": 0.3908, "step": 4020 }, { - "epoch": 0.23, - "grad_norm": 0.43823490781549507, - "learning_rate": 1.7955101920167188e-05, - "loss": 0.2772, + "epoch": 0.18, + "grad_norm": 0.9466866723666275, + "learning_rate": 1.877111149219394e-05, + "loss": 0.4097, "step": 4021 }, { - "epoch": 0.23, - "grad_norm": 0.5312447099067517, - "learning_rate": 1.7953974180664504e-05, - "loss": 0.3589, + "epoch": 0.18, + "grad_norm": 0.6618791638080433, + "learning_rate": 1.8770396766177982e-05, + "loss": 0.3621, "step": 4022 }, { - "epoch": 0.23, - "grad_norm": 0.5289342208062883, - "learning_rate": 1.7952846165716038e-05, - "loss": 0.3424, + "epoch": 0.18, + "grad_norm": 0.4216990855645584, + "learning_rate": 1.8769681845993894e-05, + "loss": 0.2901, "step": 4023 }, { - "epoch": 0.23, - "grad_norm": 0.44434750769308423, - "learning_rate": 1.795171787536085e-05, - "loss": 0.2691, + "epoch": 0.18, + "grad_norm": 0.49743356128522165, + "learning_rate": 1.8768966731657498e-05, + "loss": 0.3494, "step": 4024 }, { - "epoch": 0.23, - "grad_norm": 0.5755076448856171, - "learning_rate": 1.7950589309638014e-05, - "loss": 0.4052, + "epoch": 0.18, + "grad_norm": 0.3537594222234786, + "learning_rate": 1.8768251423184637e-05, + "loss": 0.2497, "step": 4025 }, { - "epoch": 0.23, - "grad_norm": 0.30817817030080696, - "learning_rate": 1.794946046858661e-05, - "loss": 0.2699, + "epoch": 0.18, + "grad_norm": 0.45936464636574476, + "learning_rate": 1.8767535920591133e-05, + "loss": 0.2645, "step": 4026 }, { - "epoch": 0.23, - "grad_norm": 0.3306458946911386, - "learning_rate": 1.7948331352245736e-05, - "loss": 0.1414, + "epoch": 0.18, + "grad_norm": 0.9689641996080465, + "learning_rate": 1.876682022389284e-05, + "loss": 0.4773, "step": 4027 }, { - "epoch": 0.23, - "grad_norm": 0.509175609376385, - "learning_rate": 1.7947201960654488e-05, - "loss": 0.3813, + "epoch": 0.19, + "grad_norm": 0.7421056731019856, + "learning_rate": 1.8766104333105594e-05, + "loss": 0.4472, "step": 4028 }, { - "epoch": 0.23, - "grad_norm": 0.8703972842173666, - "learning_rate": 1.7946072293851976e-05, - "loss": 0.6002, + "epoch": 0.19, + "grad_norm": 0.48311824901737943, + "learning_rate": 1.8765388248245247e-05, + "loss": 0.2723, "step": 4029 }, { - "epoch": 0.23, - "grad_norm": 0.343542528326212, - "learning_rate": 1.794494235187732e-05, - "loss": 0.2338, + "epoch": 0.19, + "grad_norm": 0.3871914291360428, + "learning_rate": 1.8764671969327653e-05, + "loss": 0.3277, "step": 4030 }, { - "epoch": 0.23, - "grad_norm": 0.4128671296922721, - "learning_rate": 1.7943812134769656e-05, - "loss": 0.4134, + "epoch": 0.19, + "grad_norm": 0.30945069964211885, + "learning_rate": 1.8763955496368668e-05, + "loss": 0.1455, "step": 4031 }, { - "epoch": 0.23, - "grad_norm": 1.041845445196055, - "learning_rate": 1.794268164256812e-05, - "loss": 0.628, + "epoch": 0.19, + "grad_norm": 0.4520331679045915, + "learning_rate": 1.8763238829384156e-05, + "loss": 0.2891, "step": 4032 }, { - "epoch": 0.23, - "grad_norm": 0.23769051134991365, - "learning_rate": 1.7941550875311858e-05, - "loss": 0.1505, + "epoch": 0.19, + "grad_norm": 0.5625587918102873, + "learning_rate": 1.8762521968389983e-05, + "loss": 0.3667, "step": 4033 }, { - "epoch": 0.23, - "grad_norm": 0.45355094491900144, - "learning_rate": 1.794041983304003e-05, - "loss": 0.3421, + "epoch": 0.19, + "grad_norm": 0.8315770904840278, + "learning_rate": 1.876180491340202e-05, + "loss": 0.4981, "step": 4034 }, { - "epoch": 0.23, - "grad_norm": 1.1868507744408294, - "learning_rate": 1.7939288515791806e-05, - "loss": 0.7895, + "epoch": 0.19, + "grad_norm": 0.4537648300491066, + "learning_rate": 1.8761087664436137e-05, + "loss": 0.3291, "step": 4035 }, { - "epoch": 0.23, - "grad_norm": 0.395618032596114, - "learning_rate": 1.7938156923606362e-05, - "loss": 0.2637, + "epoch": 0.19, + "grad_norm": 1.0620814465976745, + "learning_rate": 1.876037022150822e-05, + "loss": 0.2689, "step": 4036 }, { - "epoch": 0.23, - "grad_norm": 0.7611232233618405, - "learning_rate": 1.7937025056522884e-05, - "loss": 0.5185, + "epoch": 0.19, + "grad_norm": 0.27215117133615047, + "learning_rate": 1.875965258463415e-05, + "loss": 0.226, "step": 4037 }, { - "epoch": 0.23, - "grad_norm": 0.4522028536625917, - "learning_rate": 1.7935892914580572e-05, - "loss": 0.3422, + "epoch": 0.19, + "grad_norm": 0.3821132470527215, + "learning_rate": 1.8758934753829813e-05, + "loss": 0.3199, "step": 4038 }, { - "epoch": 0.23, - "grad_norm": 0.32600638350104655, - "learning_rate": 1.7934760497818628e-05, - "loss": 0.1793, + "epoch": 0.19, + "grad_norm": 0.7466891878586411, + "learning_rate": 1.8758216729111104e-05, + "loss": 0.3953, "step": 4039 }, { - "epoch": 0.23, - "grad_norm": 0.5263969279202462, - "learning_rate": 1.7933627806276267e-05, - "loss": 0.2869, + "epoch": 0.19, + "grad_norm": 0.6796077368785677, + "learning_rate": 1.8757498510493915e-05, + "loss": 0.3621, "step": 4040 }, { - "epoch": 0.23, - "grad_norm": 0.5985454643455146, - "learning_rate": 1.7932494839992723e-05, - "loss": 0.4168, + "epoch": 0.19, + "grad_norm": 0.3951030452651413, + "learning_rate": 1.875678009799415e-05, + "loss": 0.3169, "step": 4041 }, { - "epoch": 0.23, - "grad_norm": 0.40800954663605743, - "learning_rate": 1.793136159900722e-05, - "loss": 0.286, + "epoch": 0.19, + "grad_norm": 0.4989226873074533, + "learning_rate": 1.8756061491627716e-05, + "loss": 0.3128, "step": 4042 }, { - "epoch": 0.23, - "grad_norm": 0.4796678193727095, - "learning_rate": 1.7930228083359002e-05, - "loss": 0.3264, + "epoch": 0.19, + "grad_norm": 0.3658374309040895, + "learning_rate": 1.875534269141052e-05, + "loss": 0.2183, "step": 4043 }, { - "epoch": 0.23, - "grad_norm": 0.4774420141970719, - "learning_rate": 1.792909429308733e-05, - "loss": 0.3023, + "epoch": 0.19, + "grad_norm": 0.4424638152107735, + "learning_rate": 1.8754623697358473e-05, + "loss": 0.3673, "step": 4044 }, { - "epoch": 0.23, - "grad_norm": 0.33552706676028116, - "learning_rate": 1.792796022823146e-05, - "loss": 0.1879, + "epoch": 0.19, + "grad_norm": 0.3017595486460874, + "learning_rate": 1.8753904509487497e-05, + "loss": 0.2676, "step": 4045 }, { - "epoch": 0.23, - "grad_norm": 0.34466688409543883, - "learning_rate": 1.7926825888830673e-05, - "loss": 0.2726, + "epoch": 0.19, + "grad_norm": 0.6307815847309481, + "learning_rate": 1.8753185127813512e-05, + "loss": 0.4126, "step": 4046 }, { - "epoch": 0.23, - "grad_norm": 0.6822063282086891, - "learning_rate": 1.7925691274924247e-05, - "loss": 0.5477, + "epoch": 0.19, + "grad_norm": 0.5025268019894982, + "learning_rate": 1.8752465552352443e-05, + "loss": 0.3221, "step": 4047 }, { - "epoch": 0.23, - "grad_norm": 0.4588359590822482, - "learning_rate": 1.7924556386551472e-05, - "loss": 0.4077, + "epoch": 0.19, + "grad_norm": 1.1178648532467765, + "learning_rate": 1.8751745783120224e-05, + "loss": 0.663, "step": 4048 }, { - "epoch": 0.23, - "grad_norm": 0.39472767234202216, - "learning_rate": 1.792342122375165e-05, - "loss": 0.289, + "epoch": 0.19, + "grad_norm": 0.40062289889231617, + "learning_rate": 1.875102582013279e-05, + "loss": 0.3093, "step": 4049 }, { - "epoch": 0.23, - "grad_norm": 0.4249909103766952, - "learning_rate": 1.792228578656409e-05, - "loss": 0.3337, + "epoch": 0.19, + "grad_norm": 0.38130032841038297, + "learning_rate": 1.8750305663406077e-05, + "loss": 0.3046, "step": 4050 }, { - "epoch": 0.23, - "grad_norm": 0.40114167914557236, - "learning_rate": 1.7921150075028112e-05, - "loss": 0.2137, + "epoch": 0.19, + "grad_norm": 0.36260549148975374, + "learning_rate": 1.8749585312956028e-05, + "loss": 0.2348, "step": 4051 }, { - "epoch": 0.23, - "grad_norm": 0.3296340901766964, - "learning_rate": 1.792001408918305e-05, - "loss": 0.2957, + "epoch": 0.19, + "grad_norm": 0.6269668264209588, + "learning_rate": 1.8748864768798596e-05, + "loss": 0.2773, "step": 4052 }, { - "epoch": 0.23, - "grad_norm": 0.8150910975190974, - "learning_rate": 1.791887782906824e-05, - "loss": 0.4098, + "epoch": 0.19, + "grad_norm": 0.4098295625980208, + "learning_rate": 1.8748144030949728e-05, + "loss": 0.2931, "step": 4053 }, { - "epoch": 0.23, - "grad_norm": 0.3098451298308945, - "learning_rate": 1.7917741294723035e-05, - "loss": 0.2909, + "epoch": 0.19, + "grad_norm": 0.6008618136743048, + "learning_rate": 1.8747423099425387e-05, + "loss": 0.3829, "step": 4054 }, { - "epoch": 0.23, - "grad_norm": 0.6086232025078487, - "learning_rate": 1.7916604486186786e-05, - "loss": 0.4586, + "epoch": 0.19, + "grad_norm": 0.5761730553079765, + "learning_rate": 1.8746701974241525e-05, + "loss": 0.3717, "step": 4055 }, { - "epoch": 0.23, - "grad_norm": 0.7056291363756321, - "learning_rate": 1.7915467403498864e-05, - "loss": 0.3649, + "epoch": 0.19, + "grad_norm": 0.40136981386742165, + "learning_rate": 1.8745980655414113e-05, + "loss": 0.3303, "step": 4056 }, { - "epoch": 0.23, - "grad_norm": 0.232611976797289, - "learning_rate": 1.791433004669865e-05, - "loss": 0.1749, + "epoch": 0.19, + "grad_norm": 0.3515300111033161, + "learning_rate": 1.8745259142959117e-05, + "loss": 0.2043, "step": 4057 }, { - "epoch": 0.23, - "grad_norm": 0.3738788987291126, - "learning_rate": 1.791319241582552e-05, - "loss": 0.3219, + "epoch": 0.19, + "grad_norm": 0.4967389945058046, + "learning_rate": 1.8744537436892517e-05, + "loss": 0.2918, "step": 4058 }, { - "epoch": 0.23, - "grad_norm": 1.0108750504208646, - "learning_rate": 1.791205451091888e-05, - "loss": 0.4338, + "epoch": 0.19, + "grad_norm": 0.45419702859968686, + "learning_rate": 1.8743815537230284e-05, + "loss": 0.3364, "step": 4059 }, { - "epoch": 0.23, - "grad_norm": 0.8253458918236195, - "learning_rate": 1.7910916332018137e-05, - "loss": 0.4663, + "epoch": 0.19, + "grad_norm": 0.8970131137846125, + "learning_rate": 1.87430934439884e-05, + "loss": 0.5287, "step": 4060 }, { - "epoch": 0.23, - "grad_norm": 0.4535609620382996, - "learning_rate": 1.7909777879162695e-05, - "loss": 0.2864, + "epoch": 0.19, + "grad_norm": 0.3993861416302628, + "learning_rate": 1.8742371157182856e-05, + "loss": 0.3451, "step": 4061 }, { - "epoch": 0.23, - "grad_norm": 0.5264572392672097, - "learning_rate": 1.7908639152391988e-05, - "loss": 0.3096, + "epoch": 0.19, + "grad_norm": 0.4173887333637627, + "learning_rate": 1.874164867682964e-05, + "loss": 0.2502, "step": 4062 }, { - "epoch": 0.23, - "grad_norm": 0.6313130758765294, - "learning_rate": 1.790750015174545e-05, - "loss": 0.3589, + "epoch": 0.19, + "grad_norm": 0.3176562227560707, + "learning_rate": 1.8740926002944747e-05, + "loss": 0.1888, "step": 4063 }, { - "epoch": 0.23, - "grad_norm": 0.4027259179702243, - "learning_rate": 1.7906360877262515e-05, - "loss": 0.3104, + "epoch": 0.19, + "grad_norm": 0.8815948896931961, + "learning_rate": 1.874020313554418e-05, + "loss": 0.5814, "step": 4064 }, { - "epoch": 0.23, - "grad_norm": 0.8796907969909685, - "learning_rate": 1.7905221328982647e-05, - "loss": 0.5063, + "epoch": 0.19, + "grad_norm": 0.34095189836042045, + "learning_rate": 1.8739480074643936e-05, + "loss": 0.2387, "step": 4065 }, { - "epoch": 0.23, - "grad_norm": 0.3771326162949683, - "learning_rate": 1.7904081506945304e-05, - "loss": 0.25, + "epoch": 0.19, + "grad_norm": 0.5847801595052117, + "learning_rate": 1.873875682026003e-05, + "loss": 0.4254, "step": 4066 }, { - "epoch": 0.23, - "grad_norm": 0.37190974661172604, - "learning_rate": 1.790294141118996e-05, - "loss": 0.2481, + "epoch": 0.19, + "grad_norm": 0.8731200352332138, + "learning_rate": 1.8738033372408467e-05, + "loss": 0.507, "step": 4067 }, { - "epoch": 0.23, - "grad_norm": 1.5489614559790572, - "learning_rate": 1.790180104175609e-05, - "loss": 0.7663, + "epoch": 0.19, + "grad_norm": 0.3858910815996692, + "learning_rate": 1.8737309731105266e-05, + "loss": 0.205, "step": 4068 }, { - "epoch": 0.23, - "grad_norm": 0.5644846432613142, - "learning_rate": 1.7900660398683192e-05, - "loss": 0.2647, + "epoch": 0.19, + "grad_norm": 0.3283125645812928, + "learning_rate": 1.8736585896366452e-05, + "loss": 0.2331, "step": 4069 }, { - "epoch": 0.23, - "grad_norm": 0.5649799604812463, - "learning_rate": 1.789951948201077e-05, - "loss": 0.3422, + "epoch": 0.19, + "grad_norm": 1.5158346111340124, + "learning_rate": 1.8735861868208047e-05, + "loss": 0.8187, "step": 4070 }, { - "epoch": 0.23, - "grad_norm": 0.5752127497152826, - "learning_rate": 1.789837829177832e-05, - "loss": 0.3297, + "epoch": 0.19, + "grad_norm": 0.39669100089283027, + "learning_rate": 1.8735137646646078e-05, + "loss": 0.2271, "step": 4071 }, { - "epoch": 0.23, - "grad_norm": 0.2515960625460765, - "learning_rate": 1.7897236828025373e-05, - "loss": 0.13, + "epoch": 0.19, + "grad_norm": 1.0898667717787507, + "learning_rate": 1.873441323169658e-05, + "loss": 0.5402, "step": 4072 }, { - "epoch": 0.23, - "grad_norm": 0.454993611244226, - "learning_rate": 1.7896095090791452e-05, - "loss": 0.3405, + "epoch": 0.19, + "grad_norm": 0.4406687861116597, + "learning_rate": 1.8733688623375595e-05, + "loss": 0.3533, "step": 4073 }, { - "epoch": 0.23, - "grad_norm": 0.46244441347114756, - "learning_rate": 1.7894953080116102e-05, - "loss": 0.3233, + "epoch": 0.19, + "grad_norm": 0.39628465706299576, + "learning_rate": 1.8732963821699158e-05, + "loss": 0.291, "step": 4074 }, { - "epoch": 0.23, - "grad_norm": 0.46040709971171495, - "learning_rate": 1.7893810796038862e-05, - "loss": 0.2484, + "epoch": 0.19, + "grad_norm": 0.36005318460321234, + "learning_rate": 1.873223882668332e-05, + "loss": 0.1215, "step": 4075 }, { - "epoch": 0.23, - "grad_norm": 0.7576076875799639, - "learning_rate": 1.7892668238599293e-05, - "loss": 0.4109, + "epoch": 0.19, + "grad_norm": 0.6954297748866335, + "learning_rate": 1.8731513638344128e-05, + "loss": 0.4379, "step": 4076 }, { - "epoch": 0.23, - "grad_norm": 0.6283333218980784, - "learning_rate": 1.7891525407836967e-05, - "loss": 0.4061, + "epoch": 0.19, + "grad_norm": 0.3925983123524538, + "learning_rate": 1.8730788256697642e-05, + "loss": 0.3001, "step": 4077 }, { - "epoch": 0.23, - "grad_norm": 0.49411712691782517, - "learning_rate": 1.789038230379145e-05, - "loss": 0.3519, + "epoch": 0.19, + "grad_norm": 0.7017349425695114, + "learning_rate": 1.873006268175992e-05, + "loss": 0.2901, "step": 4078 }, { - "epoch": 0.23, - "grad_norm": 0.25958003257867746, - "learning_rate": 1.7889238926502336e-05, - "loss": 0.1599, + "epoch": 0.19, + "grad_norm": 0.7681959949278588, + "learning_rate": 1.872933691354702e-05, + "loss": 0.5355, "step": 4079 }, { - "epoch": 0.23, - "grad_norm": 0.6904502163139321, - "learning_rate": 1.788809527600922e-05, - "loss": 0.5345, + "epoch": 0.19, + "grad_norm": 0.45255785397919956, + "learning_rate": 1.872861095207502e-05, + "loss": 0.2759, "step": 4080 }, { - "epoch": 0.23, - "grad_norm": 0.42273087648124946, - "learning_rate": 1.78869513523517e-05, - "loss": 0.3553, + "epoch": 0.19, + "grad_norm": 0.37721507703032003, + "learning_rate": 1.8727884797359984e-05, + "loss": 0.2624, "step": 4081 }, { - "epoch": 0.23, - "grad_norm": 0.41321642610780296, - "learning_rate": 1.7885807155569395e-05, - "loss": 0.2611, + "epoch": 0.19, + "grad_norm": 0.9145538686303181, + "learning_rate": 1.8727158449417993e-05, + "loss": 0.5851, "step": 4082 }, { - "epoch": 0.23, - "grad_norm": 1.29593229800086, - "learning_rate": 1.7884662685701927e-05, - "loss": 0.797, + "epoch": 0.19, + "grad_norm": 0.4739293217815633, + "learning_rate": 1.872643190826512e-05, + "loss": 0.3127, "step": 4083 }, { - "epoch": 0.23, - "grad_norm": 0.4185943883142081, - "learning_rate": 1.788351794278893e-05, - "loss": 0.2627, + "epoch": 0.19, + "grad_norm": 0.5254250299584181, + "learning_rate": 1.872570517391746e-05, + "loss": 0.2548, "step": 4084 }, { - "epoch": 0.23, - "grad_norm": 0.30586527586360296, - "learning_rate": 1.7882372926870045e-05, - "loss": 0.2179, + "epoch": 0.19, + "grad_norm": 0.4839695123647533, + "learning_rate": 1.8724978246391094e-05, + "loss": 0.3261, "step": 4085 }, { - "epoch": 0.23, - "grad_norm": 0.5378611831339616, - "learning_rate": 1.7881227637984922e-05, - "loss": 0.391, + "epoch": 0.19, + "grad_norm": 0.38929072820636357, + "learning_rate": 1.872425112570212e-05, + "loss": 0.2709, "step": 4086 }, { - "epoch": 0.23, - "grad_norm": 0.6274700701055269, - "learning_rate": 1.788008207617323e-05, - "loss": 0.4243, + "epoch": 0.19, + "grad_norm": 1.173006971425519, + "learning_rate": 1.8723523811866634e-05, + "loss": 0.5855, "step": 4087 }, { - "epoch": 0.23, - "grad_norm": 0.3917913457312723, - "learning_rate": 1.787893624147463e-05, - "loss": 0.3136, + "epoch": 0.19, + "grad_norm": 0.5049895709995518, + "learning_rate": 1.872279630490074e-05, + "loss": 0.323, "step": 4088 }, { - "epoch": 0.23, - "grad_norm": 0.4596357049274881, - "learning_rate": 1.7877790133928807e-05, - "loss": 0.2903, + "epoch": 0.19, + "grad_norm": 0.31590522510821323, + "learning_rate": 1.8722068604820546e-05, + "loss": 0.2689, "step": 4089 }, { - "epoch": 0.23, - "grad_norm": 0.4524210604969984, - "learning_rate": 1.7876643753575457e-05, - "loss": 0.3614, + "epoch": 0.19, + "grad_norm": 0.4719365460858741, + "learning_rate": 1.8721340711642153e-05, + "loss": 0.3619, "step": 4090 }, { - "epoch": 0.24, - "grad_norm": 0.382573208203039, - "learning_rate": 1.7875497100454266e-05, - "loss": 0.3108, + "epoch": 0.19, + "grad_norm": 0.29880143211145316, + "learning_rate": 1.8720612625381685e-05, + "loss": 0.1143, "step": 4091 }, { - "epoch": 0.24, - "grad_norm": 0.3418759654466254, - "learning_rate": 1.787435017460495e-05, - "loss": 0.1814, + "epoch": 0.19, + "grad_norm": 0.45132429195794876, + "learning_rate": 1.871988434605526e-05, + "loss": 0.2855, "step": 4092 }, { - "epoch": 0.24, - "grad_norm": 0.48679353512366635, - "learning_rate": 1.7873202976067225e-05, - "loss": 0.3678, + "epoch": 0.19, + "grad_norm": 0.4825642241460887, + "learning_rate": 1.8719155873678997e-05, + "loss": 0.3348, "step": 4093 }, { - "epoch": 0.24, - "grad_norm": 0.3769792537925658, - "learning_rate": 1.787205550488082e-05, - "loss": 0.3096, + "epoch": 0.19, + "grad_norm": 0.7928324056232585, + "learning_rate": 1.8718427208269028e-05, + "loss": 0.4324, "step": 4094 }, { - "epoch": 0.24, - "grad_norm": 0.744428708424359, - "learning_rate": 1.7870907761085474e-05, - "loss": 0.4643, + "epoch": 0.19, + "grad_norm": 0.4017577310825777, + "learning_rate": 1.8717698349841487e-05, + "loss": 0.3051, "step": 4095 }, { - "epoch": 0.24, - "grad_norm": 0.6447660292616412, - "learning_rate": 1.786975974472093e-05, - "loss": 0.4391, + "epoch": 0.19, + "grad_norm": 0.9593971553006387, + "learning_rate": 1.87169692984125e-05, + "loss": 0.6747, "step": 4096 }, { - "epoch": 0.24, - "grad_norm": 0.3339067669230319, - "learning_rate": 1.7868611455826942e-05, - "loss": 0.2451, + "epoch": 0.19, + "grad_norm": 0.2939174551982533, + "learning_rate": 1.8716240053998216e-05, + "loss": 0.1987, "step": 4097 }, { - "epoch": 0.24, - "grad_norm": 0.3580989985318797, - "learning_rate": 1.7867462894443283e-05, - "loss": 0.2714, + "epoch": 0.19, + "grad_norm": 0.4579487831070095, + "learning_rate": 1.8715510616614783e-05, + "loss": 0.3136, "step": 4098 }, { - "epoch": 0.24, - "grad_norm": 0.9081337673125298, - "learning_rate": 1.7866314060609714e-05, - "loss": 0.6394, + "epoch": 0.19, + "grad_norm": 0.570487794214327, + "learning_rate": 1.871478098627834e-05, + "loss": 0.3833, "step": 4099 }, { - "epoch": 0.24, - "grad_norm": 0.4002506801084894, - "learning_rate": 1.7865164954366033e-05, - "loss": 0.2894, + "epoch": 0.19, + "grad_norm": 0.4427923904129169, + "learning_rate": 1.871405116300505e-05, + "loss": 0.3745, "step": 4100 }, { - "epoch": 0.24, - "grad_norm": 0.4251875388623584, - "learning_rate": 1.7864015575752026e-05, - "loss": 0.353, + "epoch": 0.19, + "grad_norm": 0.43200144056182965, + "learning_rate": 1.8713321146811064e-05, + "loss": 0.2459, "step": 4101 }, { - "epoch": 0.24, - "grad_norm": 1.063061765139635, - "learning_rate": 1.78628659248075e-05, - "loss": 0.4411, + "epoch": 0.19, + "grad_norm": 0.600004724323595, + "learning_rate": 1.871259093771254e-05, + "loss": 0.4287, "step": 4102 }, { - "epoch": 0.24, - "grad_norm": 0.29534226202391967, - "learning_rate": 1.7861716001572262e-05, - "loss": 0.2349, + "epoch": 0.19, + "grad_norm": 0.38035863225806904, + "learning_rate": 1.871186053572566e-05, + "loss": 0.2405, "step": 4103 }, { - "epoch": 0.24, - "grad_norm": 0.6987250149273035, - "learning_rate": 1.7860565806086142e-05, - "loss": 0.4517, + "epoch": 0.19, + "grad_norm": 0.45845124013764627, + "learning_rate": 1.8711129940866577e-05, + "loss": 0.2121, "step": 4104 }, { - "epoch": 0.24, - "grad_norm": 0.31222378317392696, - "learning_rate": 1.7859415338388963e-05, - "loss": 0.228, + "epoch": 0.19, + "grad_norm": 0.4329663941746019, + "learning_rate": 1.8710399153151475e-05, + "loss": 0.3076, "step": 4105 }, { - "epoch": 0.24, - "grad_norm": 0.4311047635232939, - "learning_rate": 1.7858264598520568e-05, - "loss": 0.3151, + "epoch": 0.19, + "grad_norm": 0.6992489900673649, + "learning_rate": 1.870966817259653e-05, + "loss": 0.5098, "step": 4106 }, { - "epoch": 0.24, - "grad_norm": 1.4992911334787213, - "learning_rate": 1.7857113586520806e-05, - "loss": 0.7821, + "epoch": 0.19, + "grad_norm": 0.34395180714971296, + "learning_rate": 1.8708936999217932e-05, + "loss": 0.268, "step": 4107 }, { - "epoch": 0.24, - "grad_norm": 0.5395303710203192, - "learning_rate": 1.7855962302429542e-05, - "loss": 0.2653, + "epoch": 0.19, + "grad_norm": 0.6000487984948208, + "learning_rate": 1.8708205633031857e-05, + "loss": 0.3971, "step": 4108 }, { - "epoch": 0.24, - "grad_norm": 0.40037400934122036, - "learning_rate": 1.785481074628664e-05, - "loss": 0.3089, + "epoch": 0.19, + "grad_norm": 0.3487440062477048, + "learning_rate": 1.87074740740545e-05, + "loss": 0.2301, "step": 4109 }, { - "epoch": 0.24, - "grad_norm": 0.4222237834089705, - "learning_rate": 1.785365891813198e-05, - "loss": 0.3479, + "epoch": 0.19, + "grad_norm": 0.3970407382973214, + "learning_rate": 1.8706742322302064e-05, + "loss": 0.2353, "step": 4110 }, { - "epoch": 0.24, - "grad_norm": 0.26589948850227196, - "learning_rate": 1.785250681800545e-05, - "loss": 0.1399, + "epoch": 0.19, + "grad_norm": 1.0152483241237586, + "learning_rate": 1.8706010377790746e-05, + "loss": 0.6636, "step": 4111 }, { - "epoch": 0.24, - "grad_norm": 0.41721052652204943, - "learning_rate": 1.7851354445946944e-05, - "loss": 0.3075, + "epoch": 0.19, + "grad_norm": 0.5107988446633184, + "learning_rate": 1.8705278240536745e-05, + "loss": 0.4164, "step": 4112 }, { - "epoch": 0.24, - "grad_norm": 0.4215018631260107, - "learning_rate": 1.785020180199637e-05, - "loss": 0.3456, + "epoch": 0.19, + "grad_norm": 0.3555898251270073, + "learning_rate": 1.8704545910556278e-05, + "loss": 0.2935, "step": 4113 }, { - "epoch": 0.24, - "grad_norm": 1.3790817111716487, - "learning_rate": 1.7849048886193648e-05, - "loss": 0.8667, + "epoch": 0.19, + "grad_norm": 0.47716167363553574, + "learning_rate": 1.870381338786555e-05, + "loss": 0.2921, "step": 4114 }, { - "epoch": 0.24, - "grad_norm": 0.354931172130831, - "learning_rate": 1.7847895698578702e-05, - "loss": 0.2496, + "epoch": 0.19, + "grad_norm": 0.35111418159760394, + "learning_rate": 1.8703080672480784e-05, + "loss": 0.1804, "step": 4115 }, { - "epoch": 0.24, - "grad_norm": 0.423749864426112, - "learning_rate": 1.7846742239191464e-05, - "loss": 0.3579, + "epoch": 0.19, + "grad_norm": 0.4738758554667891, + "learning_rate": 1.87023477644182e-05, + "loss": 0.2972, "step": 4116 }, { - "epoch": 0.24, - "grad_norm": 0.3605983029128115, - "learning_rate": 1.784558850807188e-05, - "loss": 0.2686, + "epoch": 0.19, + "grad_norm": 0.3603507317470979, + "learning_rate": 1.8701614663694023e-05, + "loss": 0.2855, "step": 4117 }, { - "epoch": 0.24, - "grad_norm": 0.3667287214717069, - "learning_rate": 1.7844434505259904e-05, - "loss": 0.2296, + "epoch": 0.19, + "grad_norm": 0.6554457055049226, + "learning_rate": 1.8700881370324486e-05, + "loss": 0.5147, "step": 4118 }, { - "epoch": 0.24, - "grad_norm": 0.9766307941377927, - "learning_rate": 1.7843280230795496e-05, - "loss": 0.6913, + "epoch": 0.19, + "grad_norm": 0.7088819981565134, + "learning_rate": 1.8700147884325822e-05, + "loss": 0.4062, "step": 4119 }, { - "epoch": 0.24, - "grad_norm": 0.7464374748135114, - "learning_rate": 1.784212568471863e-05, - "loss": 0.5132, + "epoch": 0.19, + "grad_norm": 0.42379127833525326, + "learning_rate": 1.8699414205714265e-05, + "loss": 0.2808, "step": 4120 }, { - "epoch": 0.24, - "grad_norm": 0.34975514825247495, - "learning_rate": 1.7840970867069293e-05, - "loss": 0.2485, + "epoch": 0.19, + "grad_norm": 0.3074587918302642, + "learning_rate": 1.8698680334506067e-05, + "loss": 0.2379, "step": 4121 }, { - "epoch": 0.24, - "grad_norm": 0.7385557641533902, - "learning_rate": 1.7839815777887472e-05, - "loss": 0.511, + "epoch": 0.19, + "grad_norm": 0.6688850761808294, + "learning_rate": 1.8697946270717468e-05, + "loss": 0.3302, "step": 4122 }, { - "epoch": 0.24, - "grad_norm": 0.31425441408801236, - "learning_rate": 1.7838660417213166e-05, - "loss": 0.179, + "epoch": 0.19, + "grad_norm": 0.44008791084786675, + "learning_rate": 1.8697212014364724e-05, + "loss": 0.3611, "step": 4123 }, { - "epoch": 0.24, - "grad_norm": 0.3292178319221814, - "learning_rate": 1.7837504785086386e-05, - "loss": 0.2168, + "epoch": 0.19, + "grad_norm": 0.48351556889028213, + "learning_rate": 1.8696477565464085e-05, + "loss": 0.3271, "step": 4124 }, { - "epoch": 0.24, - "grad_norm": 0.36915751123007257, - "learning_rate": 1.7836348881547153e-05, - "loss": 0.3212, + "epoch": 0.19, + "grad_norm": 0.4091845901403694, + "learning_rate": 1.8695742924031816e-05, + "loss": 0.3032, "step": 4125 }, { - "epoch": 0.24, - "grad_norm": 0.9386073864728668, - "learning_rate": 1.7835192706635494e-05, - "loss": 0.5632, + "epoch": 0.19, + "grad_norm": 0.535129187692363, + "learning_rate": 1.8695008090084178e-05, + "loss": 0.3933, "step": 4126 }, { - "epoch": 0.24, - "grad_norm": 0.38900805335201366, - "learning_rate": 1.783403626039145e-05, - "loss": 0.2911, + "epoch": 0.19, + "grad_norm": 0.24036217655968822, + "learning_rate": 1.8694273063637444e-05, + "loss": 0.0749, "step": 4127 }, { - "epoch": 0.24, - "grad_norm": 0.7257634073735822, - "learning_rate": 1.7832879542855067e-05, - "loss": 0.4026, + "epoch": 0.19, + "grad_norm": 0.37495575057809033, + "learning_rate": 1.8693537844707884e-05, + "loss": 0.3313, "step": 4128 }, { - "epoch": 0.24, - "grad_norm": 0.260931509801551, - "learning_rate": 1.7831722554066403e-05, - "loss": 0.2349, + "epoch": 0.19, + "grad_norm": 0.4601205933578822, + "learning_rate": 1.8692802433311773e-05, + "loss": 0.3508, "step": 4129 }, { - "epoch": 0.24, - "grad_norm": 0.41780863397346774, - "learning_rate": 1.7830565294065522e-05, - "loss": 0.2964, + "epoch": 0.19, + "grad_norm": 0.6783083901461556, + "learning_rate": 1.8692066829465396e-05, + "loss": 0.4322, "step": 4130 }, { - "epoch": 0.24, - "grad_norm": 0.5120862661410127, - "learning_rate": 1.7829407762892504e-05, - "loss": 0.2983, + "epoch": 0.19, + "grad_norm": 0.5122276613740804, + "learning_rate": 1.8691331033185036e-05, + "loss": 0.3441, "step": 4131 }, { - "epoch": 0.24, - "grad_norm": 0.9249334484384528, - "learning_rate": 1.7828249960587428e-05, - "loss": 0.4811, + "epoch": 0.19, + "grad_norm": 0.6176107316002839, + "learning_rate": 1.8690595044486983e-05, + "loss": 0.3468, "step": 4132 }, { - "epoch": 0.24, - "grad_norm": 0.36244641485753243, - "learning_rate": 1.7827091887190396e-05, - "loss": 0.3057, + "epoch": 0.19, + "grad_norm": 0.2615467933068633, + "learning_rate": 1.8689858863387534e-05, + "loss": 0.2058, "step": 4133 }, { - "epoch": 0.24, - "grad_norm": 0.36789794513151913, - "learning_rate": 1.7825933542741506e-05, - "loss": 0.2761, + "epoch": 0.19, + "grad_norm": 0.5766915048836134, + "learning_rate": 1.8689122489902983e-05, + "loss": 0.3756, "step": 4134 }, { - "epoch": 0.24, - "grad_norm": 0.2799037207296845, - "learning_rate": 1.7824774927280877e-05, - "loss": 0.1626, + "epoch": 0.19, + "grad_norm": 0.4496838726514699, + "learning_rate": 1.8688385924049636e-05, + "loss": 0.3492, "step": 4135 }, { - "epoch": 0.24, - "grad_norm": 0.3573932183986537, - "learning_rate": 1.7823616040848625e-05, - "loss": 0.3016, + "epoch": 0.19, + "grad_norm": 0.394662739207563, + "learning_rate": 1.8687649165843797e-05, + "loss": 0.3553, "step": 4136 }, { - "epoch": 0.24, - "grad_norm": 0.4498035506977487, - "learning_rate": 1.782245688348489e-05, - "loss": 0.3051, + "epoch": 0.19, + "grad_norm": 0.4783256277908455, + "learning_rate": 1.868691221530178e-05, + "loss": 0.1731, "step": 4137 }, { - "epoch": 0.24, - "grad_norm": 1.1538000821845913, - "learning_rate": 1.7821297455229807e-05, - "loss": 0.4959, + "epoch": 0.19, + "grad_norm": 0.4981937629809839, + "learning_rate": 1.86861750724399e-05, + "loss": 0.342, "step": 4138 }, { - "epoch": 0.24, - "grad_norm": 0.3316262304163246, - "learning_rate": 1.7820137756123527e-05, - "loss": 0.2842, + "epoch": 0.19, + "grad_norm": 0.41986243297898335, + "learning_rate": 1.8685437737274476e-05, + "loss": 0.2903, "step": 4139 }, { - "epoch": 0.24, - "grad_norm": 1.53751881341363, - "learning_rate": 1.7818977786206217e-05, - "loss": 0.8152, + "epoch": 0.19, + "grad_norm": 0.3768983281775736, + "learning_rate": 1.8684700209821832e-05, + "loss": 0.244, "step": 4140 }, { - "epoch": 0.24, - "grad_norm": 0.32222675266258977, - "learning_rate": 1.7817817545518045e-05, - "loss": 0.2308, + "epoch": 0.19, + "grad_norm": 0.3595236474921723, + "learning_rate": 1.8683962490098293e-05, + "loss": 0.3426, "step": 4141 }, { - "epoch": 0.24, - "grad_norm": 0.3286628540441941, - "learning_rate": 1.7816657034099182e-05, - "loss": 0.2449, + "epoch": 0.19, + "grad_norm": 0.47211723054074545, + "learning_rate": 1.8683224578120197e-05, + "loss": 0.2549, "step": 4142 }, { - "epoch": 0.24, - "grad_norm": 1.4467305285964456, - "learning_rate": 1.781549625198982e-05, - "loss": 0.8061, + "epoch": 0.19, + "grad_norm": 0.43463807481612116, + "learning_rate": 1.8682486473903876e-05, + "loss": 0.2459, "step": 4143 }, { - "epoch": 0.24, - "grad_norm": 0.870823301196831, - "learning_rate": 1.7814335199230164e-05, - "loss": 0.3788, + "epoch": 0.19, + "grad_norm": 0.3479296531679587, + "learning_rate": 1.8681748177465673e-05, + "loss": 0.3038, "step": 4144 }, { - "epoch": 0.24, - "grad_norm": 0.336748819198341, - "learning_rate": 1.7813173875860416e-05, - "loss": 0.2872, + "epoch": 0.19, + "grad_norm": 0.9129937729350435, + "learning_rate": 1.8681009688821932e-05, + "loss": 0.5846, "step": 4145 }, { - "epoch": 0.24, - "grad_norm": 0.6111350426253821, - "learning_rate": 1.781201228192079e-05, - "loss": 0.4169, + "epoch": 0.19, + "grad_norm": 0.41380624137609584, + "learning_rate": 1.8680271007989007e-05, + "loss": 0.2922, "step": 4146 }, { - "epoch": 0.24, - "grad_norm": 0.21527619658183722, - "learning_rate": 1.7810850417451517e-05, - "loss": 0.1193, + "epoch": 0.19, + "grad_norm": 0.5819401863951018, + "learning_rate": 1.8679532134983242e-05, + "loss": 0.3784, "step": 4147 }, { - "epoch": 0.24, - "grad_norm": 0.4550362260441464, - "learning_rate": 1.780968828249283e-05, - "loss": 0.2929, + "epoch": 0.19, + "grad_norm": 0.2808789867807087, + "learning_rate": 1.8678793069821006e-05, + "loss": 0.2439, "step": 4148 }, { - "epoch": 0.24, - "grad_norm": 0.49114266037537563, - "learning_rate": 1.780852587708497e-05, - "loss": 0.3229, + "epoch": 0.19, + "grad_norm": 0.70544131912115, + "learning_rate": 1.8678053812518657e-05, + "loss": 0.4382, "step": 4149 }, { - "epoch": 0.24, - "grad_norm": 1.1197232619356512, - "learning_rate": 1.78073632012682e-05, - "loss": 0.4345, + "epoch": 0.19, + "grad_norm": 0.43774151905000996, + "learning_rate": 1.8677314363092555e-05, + "loss": 0.2528, "step": 4150 }, { - "epoch": 0.24, - "grad_norm": 0.4730494255334301, - "learning_rate": 1.780620025508277e-05, - "loss": 0.3158, + "epoch": 0.19, + "grad_norm": 0.6932091860919112, + "learning_rate": 1.867657472155908e-05, + "loss": 0.4767, "step": 4151 }, { - "epoch": 0.24, - "grad_norm": 0.5063609235250967, - "learning_rate": 1.7805037038568972e-05, - "loss": 0.4172, + "epoch": 0.19, + "grad_norm": 0.3625520053029062, + "learning_rate": 1.8675834887934604e-05, + "loss": 0.3098, "step": 4152 }, { - "epoch": 0.24, - "grad_norm": 0.41048674198046375, - "learning_rate": 1.780387355176707e-05, - "loss": 0.2985, + "epoch": 0.19, + "grad_norm": 0.410908978760509, + "learning_rate": 1.8675094862235502e-05, + "loss": 0.23, "step": 4153 }, { - "epoch": 0.24, - "grad_norm": 0.21889835566971555, - "learning_rate": 1.7802709794717363e-05, - "loss": 0.1212, + "epoch": 0.19, + "grad_norm": 0.40719130806744624, + "learning_rate": 1.8674354644478163e-05, + "loss": 0.2568, "step": 4154 }, { - "epoch": 0.24, - "grad_norm": 1.0661163910225424, - "learning_rate": 1.780154576746015e-05, - "loss": 0.4338, + "epoch": 0.19, + "grad_norm": 1.49686109623509, + "learning_rate": 1.867361423467897e-05, + "loss": 0.8323, "step": 4155 }, { - "epoch": 0.24, - "grad_norm": 1.3205369104367537, - "learning_rate": 1.7800381470035745e-05, - "loss": 0.7398, + "epoch": 0.19, + "grad_norm": 0.31741884244535146, + "learning_rate": 1.8672873632854322e-05, + "loss": 0.2386, "step": 4156 }, { - "epoch": 0.24, - "grad_norm": 0.36004560177244327, - "learning_rate": 1.7799216902484465e-05, - "loss": 0.2448, + "epoch": 0.19, + "grad_norm": 0.7604389779477356, + "learning_rate": 1.867213283902061e-05, + "loss": 0.4853, "step": 4157 }, { - "epoch": 0.24, - "grad_norm": 1.496192252384227, - "learning_rate": 1.7798052064846637e-05, - "loss": 0.805, + "epoch": 0.19, + "grad_norm": 0.6199582958052203, + "learning_rate": 1.8671391853194235e-05, + "loss": 0.4581, "step": 4158 }, { - "epoch": 0.24, - "grad_norm": 0.8488803818598749, - "learning_rate": 1.7796886957162603e-05, - "loss": 0.5812, + "epoch": 0.19, + "grad_norm": 0.32601407224753537, + "learning_rate": 1.86706506753916e-05, + "loss": 0.1849, "step": 4159 }, { - "epoch": 0.24, - "grad_norm": 0.2631792065123535, - "learning_rate": 1.7795721579472712e-05, - "loss": 0.2027, + "epoch": 0.19, + "grad_norm": 0.44842277184067525, + "learning_rate": 1.866990930562912e-05, + "loss": 0.3469, "step": 4160 }, { - "epoch": 0.24, - "grad_norm": 0.39486478303427447, - "learning_rate": 1.7794555931817314e-05, - "loss": 0.3416, + "epoch": 0.19, + "grad_norm": 0.3945089350021445, + "learning_rate": 1.86691677439232e-05, + "loss": 0.2633, "step": 4161 }, { - "epoch": 0.24, - "grad_norm": 0.7292174481348954, - "learning_rate": 1.779339001423678e-05, - "loss": 0.5763, + "epoch": 0.19, + "grad_norm": 0.4444742041732898, + "learning_rate": 1.866842599029026e-05, + "loss": 0.3224, "step": 4162 }, { - "epoch": 0.24, - "grad_norm": 0.277092082065919, - "learning_rate": 1.7792223826771484e-05, - "loss": 0.1583, + "epoch": 0.19, + "grad_norm": 1.286569326951523, + "learning_rate": 1.866768404474673e-05, + "loss": 0.4192, "step": 4163 }, { - "epoch": 0.24, - "grad_norm": 0.648421763994709, - "learning_rate": 1.779105736946181e-05, - "loss": 0.4098, + "epoch": 0.19, + "grad_norm": 0.40065726187994355, + "learning_rate": 1.8666941907309026e-05, + "loss": 0.3188, "step": 4164 }, { - "epoch": 0.24, - "grad_norm": 0.4148689107380746, - "learning_rate": 1.778989064234816e-05, - "loss": 0.3432, + "epoch": 0.19, + "grad_norm": 0.4234475112488729, + "learning_rate": 1.866619957799358e-05, + "loss": 0.3161, "step": 4165 }, { - "epoch": 0.24, - "grad_norm": 0.5232083805274033, - "learning_rate": 1.7788723645470928e-05, - "loss": 0.3272, + "epoch": 0.19, + "grad_norm": 0.4275319204453874, + "learning_rate": 1.866545705681683e-05, + "loss": 0.1917, "step": 4166 }, { - "epoch": 0.24, - "grad_norm": 0.2818994881344073, - "learning_rate": 1.7787556378870534e-05, - "loss": 0.2582, + "epoch": 0.19, + "grad_norm": 0.38726615664524344, + "learning_rate": 1.8664714343795213e-05, + "loss": 0.2314, "step": 4167 }, { - "epoch": 0.24, - "grad_norm": 0.457924102148931, - "learning_rate": 1.7786388842587397e-05, - "loss": 0.377, + "epoch": 0.19, + "grad_norm": 0.6271993644247684, + "learning_rate": 1.866397143894517e-05, + "loss": 0.3397, "step": 4168 }, { - "epoch": 0.24, - "grad_norm": 0.5522352742313344, - "learning_rate": 1.7785221036661945e-05, - "loss": 0.3301, + "epoch": 0.19, + "grad_norm": 0.6373485797217358, + "learning_rate": 1.866322834228315e-05, + "loss": 0.3424, "step": 4169 }, { - "epoch": 0.24, - "grad_norm": 0.32880274033610146, - "learning_rate": 1.778405296113463e-05, - "loss": 0.2008, + "epoch": 0.19, + "grad_norm": 0.9843045911060582, + "learning_rate": 1.86624850538256e-05, + "loss": 0.3838, "step": 4170 }, { - "epoch": 0.24, - "grad_norm": 0.7913590840489569, - "learning_rate": 1.7782884616045892e-05, - "loss": 0.5754, + "epoch": 0.19, + "grad_norm": 0.4276787637856182, + "learning_rate": 1.8661741573588984e-05, + "loss": 0.3215, "step": 4171 }, { - "epoch": 0.24, - "grad_norm": 0.5348610835612185, - "learning_rate": 1.7781716001436192e-05, - "loss": 0.3692, + "epoch": 0.19, + "grad_norm": 0.47136134009969677, + "learning_rate": 1.8660997901589758e-05, + "loss": 0.3062, "step": 4172 }, { - "epoch": 0.24, - "grad_norm": 0.3764624857271168, - "learning_rate": 1.7780547117346005e-05, - "loss": 0.2682, + "epoch": 0.19, + "grad_norm": 0.3946763554078555, + "learning_rate": 1.866025403784439e-05, + "loss": 0.2094, "step": 4173 }, { - "epoch": 0.24, - "grad_norm": 1.2520872298678765, - "learning_rate": 1.7779377963815804e-05, - "loss": 0.7315, + "epoch": 0.19, + "grad_norm": 0.4224640509324194, + "learning_rate": 1.865950998236934e-05, + "loss": 0.3066, "step": 4174 }, { - "epoch": 0.24, - "grad_norm": 0.3392114778810658, - "learning_rate": 1.7778208540886082e-05, - "loss": 0.1731, + "epoch": 0.19, + "grad_norm": 0.7803322201482245, + "learning_rate": 1.8658765735181084e-05, + "loss": 0.4865, "step": 4175 }, { - "epoch": 0.24, - "grad_norm": 0.5068710149305249, - "learning_rate": 1.777703884859733e-05, - "loss": 0.3636, + "epoch": 0.19, + "grad_norm": 0.41504883906285445, + "learning_rate": 1.8658021296296103e-05, + "loss": 0.2681, "step": 4176 }, { - "epoch": 0.24, - "grad_norm": 0.393948244622996, - "learning_rate": 1.7775868886990056e-05, - "loss": 0.3052, + "epoch": 0.19, + "grad_norm": 0.4014559075218351, + "learning_rate": 1.8657276665730874e-05, + "loss": 0.2752, "step": 4177 }, { - "epoch": 0.24, - "grad_norm": 0.4345252768576852, - "learning_rate": 1.7774698656104778e-05, - "loss": 0.343, + "epoch": 0.19, + "grad_norm": 1.5075213150953028, + "learning_rate": 1.8656531843501882e-05, + "loss": 0.8498, "step": 4178 }, { - "epoch": 0.24, - "grad_norm": 0.5250465194272104, - "learning_rate": 1.777352815598202e-05, - "loss": 0.3784, + "epoch": 0.19, + "grad_norm": 0.582299446058483, + "learning_rate": 1.865578682962562e-05, + "loss": 0.369, "step": 4179 }, { - "epoch": 0.24, - "grad_norm": 0.34170118548309036, - "learning_rate": 1.7772357386662316e-05, - "loss": 0.2921, + "epoch": 0.19, + "grad_norm": 0.3968796581595646, + "learning_rate": 1.865504162411858e-05, + "loss": 0.298, "step": 4180 }, { - "epoch": 0.24, - "grad_norm": 0.3091716890840008, - "learning_rate": 1.777118634818621e-05, - "loss": 0.2455, + "epoch": 0.19, + "grad_norm": 0.5380824013126718, + "learning_rate": 1.865429622699726e-05, + "loss": 0.3347, "step": 4181 }, { - "epoch": 0.24, - "grad_norm": 0.32875149163461864, - "learning_rate": 1.7770015040594256e-05, - "loss": 0.2709, + "epoch": 0.19, + "grad_norm": 0.31864338716591833, + "learning_rate": 1.865355063827816e-05, + "loss": 0.1458, "step": 4182 }, { - "epoch": 0.24, - "grad_norm": 0.5657387770353896, - "learning_rate": 1.7768843463927012e-05, - "loss": 0.4015, + "epoch": 0.19, + "grad_norm": 0.5149241402283542, + "learning_rate": 1.8652804857977795e-05, + "loss": 0.2938, "step": 4183 }, { - "epoch": 0.24, - "grad_norm": 0.4578594861031997, - "learning_rate": 1.776767161822506e-05, - "loss": 0.3508, + "epoch": 0.19, + "grad_norm": 0.5272483429854528, + "learning_rate": 1.8652058886112668e-05, + "loss": 0.3608, "step": 4184 }, { - "epoch": 0.24, - "grad_norm": 0.3722925933401782, - "learning_rate": 1.7766499503528965e-05, - "loss": 0.293, + "epoch": 0.19, + "grad_norm": 0.7197677646921471, + "learning_rate": 1.8651312722699297e-05, + "loss": 0.3616, "step": 4185 }, { - "epoch": 0.24, - "grad_norm": 0.8542895200701599, - "learning_rate": 1.776532711987933e-05, - "loss": 0.561, + "epoch": 0.19, + "grad_norm": 0.4716577641111177, + "learning_rate": 1.86505663677542e-05, + "loss": 0.3012, "step": 4186 }, { - "epoch": 0.24, - "grad_norm": 0.3903147749744142, - "learning_rate": 1.7764154467316753e-05, - "loss": 0.206, + "epoch": 0.19, + "grad_norm": 0.33763787108530596, + "learning_rate": 1.8649819821293897e-05, + "loss": 0.1897, "step": 4187 }, { - "epoch": 0.24, - "grad_norm": 0.3327376950139663, - "learning_rate": 1.776298154588184e-05, - "loss": 0.2483, + "epoch": 0.19, + "grad_norm": 0.4301601508073393, + "learning_rate": 1.8649073083334923e-05, + "loss": 0.3134, "step": 4188 }, { - "epoch": 0.24, - "grad_norm": 0.6250386359397111, - "learning_rate": 1.7761808355615207e-05, - "loss": 0.4006, + "epoch": 0.19, + "grad_norm": 0.3698944457036956, + "learning_rate": 1.8648326153893808e-05, + "loss": 0.2391, "step": 4189 }, { - "epoch": 0.24, - "grad_norm": 0.49450962581882085, - "learning_rate": 1.7760634896557483e-05, - "loss": 0.2286, + "epoch": 0.19, + "grad_norm": 1.0532131977735477, + "learning_rate": 1.8647579032987085e-05, + "loss": 0.7103, "step": 4190 }, { - "epoch": 0.24, - "grad_norm": 0.42356405355596793, - "learning_rate": 1.775946116874931e-05, - "loss": 0.3589, + "epoch": 0.19, + "grad_norm": 0.699085459607911, + "learning_rate": 1.86468317206313e-05, + "loss": 0.5148, "step": 4191 }, { - "epoch": 0.24, - "grad_norm": 0.5552779771392584, - "learning_rate": 1.7758287172231333e-05, - "loss": 0.4024, + "epoch": 0.19, + "grad_norm": 0.3157901619692983, + "learning_rate": 1.8646084216842993e-05, + "loss": 0.2444, "step": 4192 }, { - "epoch": 0.24, - "grad_norm": 0.37558968188355335, - "learning_rate": 1.77571129070442e-05, - "loss": 0.2241, + "epoch": 0.19, + "grad_norm": 0.4205461480357759, + "learning_rate": 1.8645336521638713e-05, + "loss": 0.2625, "step": 4193 }, { - "epoch": 0.24, - "grad_norm": 0.25518944264314564, - "learning_rate": 1.775593837322858e-05, - "loss": 0.2089, + "epoch": 0.19, + "grad_norm": 1.7450015034730575, + "learning_rate": 1.864458863503502e-05, + "loss": 0.6988, "step": 4194 }, { - "epoch": 0.24, - "grad_norm": 1.00903247293503, - "learning_rate": 1.775476357082515e-05, - "loss": 0.4944, + "epoch": 0.19, + "grad_norm": 0.3652664531919449, + "learning_rate": 1.8643840557048462e-05, + "loss": 0.2397, "step": 4195 }, { - "epoch": 0.24, - "grad_norm": 0.2975944794423369, - "learning_rate": 1.7753588499874592e-05, - "loss": 0.2273, + "epoch": 0.19, + "grad_norm": 0.4508231709420786, + "learning_rate": 1.8643092287695604e-05, + "loss": 0.3365, "step": 4196 }, { - "epoch": 0.24, - "grad_norm": 0.44397822897049244, - "learning_rate": 1.7752413160417597e-05, - "loss": 0.357, + "epoch": 0.19, + "grad_norm": 0.6318623707717334, + "learning_rate": 1.8642343826993015e-05, + "loss": 0.4869, "step": 4197 }, { - "epoch": 0.24, - "grad_norm": 1.1675312656602383, - "learning_rate": 1.7751237552494867e-05, - "loss": 0.862, + "epoch": 0.19, + "grad_norm": 0.40324997587226413, + "learning_rate": 1.864159517495726e-05, + "loss": 0.2326, "step": 4198 }, { - "epoch": 0.24, - "grad_norm": 0.2532797489976462, - "learning_rate": 1.7750061676147114e-05, - "loss": 0.108, + "epoch": 0.19, + "grad_norm": 0.34008914617717506, + "learning_rate": 1.8640846331604924e-05, + "loss": 0.2148, "step": 4199 }, { - "epoch": 0.24, - "grad_norm": 0.4934846294571999, - "learning_rate": 1.774888553141506e-05, - "loss": 0.3664, + "epoch": 0.19, + "grad_norm": 0.4105311755041284, + "learning_rate": 1.8640097296952577e-05, + "loss": 0.3071, "step": 4200 }, { - "epoch": 0.24, - "grad_norm": 0.3683129833102259, - "learning_rate": 1.7747709118339428e-05, - "loss": 0.2941, + "epoch": 0.19, + "grad_norm": 0.37575741391242884, + "learning_rate": 1.86393480710168e-05, + "loss": 0.2894, "step": 4201 }, { - "epoch": 0.24, - "grad_norm": 1.1727462583908863, - "learning_rate": 1.7746532436960965e-05, - "loss": 0.653, + "epoch": 0.19, + "grad_norm": 0.7221986833686256, + "learning_rate": 1.863859865381418e-05, + "loss": 0.4171, "step": 4202 }, { - "epoch": 0.24, - "grad_norm": 0.3065183783693479, - "learning_rate": 1.7745355487320418e-05, - "loss": 0.2344, + "epoch": 0.19, + "grad_norm": 0.40624930495804484, + "learning_rate": 1.863784904536132e-05, + "loss": 0.3635, "step": 4203 }, { - "epoch": 0.24, - "grad_norm": 0.41090849776968874, - "learning_rate": 1.7744178269458547e-05, - "loss": 0.3612, + "epoch": 0.19, + "grad_norm": 0.44560544596646917, + "learning_rate": 1.86370992456748e-05, + "loss": 0.2901, "step": 4204 }, { - "epoch": 0.24, - "grad_norm": 0.6804604082844093, - "learning_rate": 1.774300078341611e-05, - "loss": 0.5032, + "epoch": 0.19, + "grad_norm": 0.3727641965880537, + "learning_rate": 1.8636349254771234e-05, + "loss": 0.2042, "step": 4205 }, { - "epoch": 0.24, - "grad_norm": 0.3564856518489956, - "learning_rate": 1.7741823029233892e-05, - "loss": 0.2573, + "epoch": 0.19, + "grad_norm": 0.516023009585777, + "learning_rate": 1.8635599072667213e-05, + "loss": 0.3425, "step": 4206 }, { - "epoch": 0.24, - "grad_norm": 0.3123261819071861, - "learning_rate": 1.7740645006952674e-05, - "loss": 0.2185, + "epoch": 0.19, + "grad_norm": 0.4260646509227388, + "learning_rate": 1.8634848699379354e-05, + "loss": 0.2607, "step": 4207 }, { - "epoch": 0.24, - "grad_norm": 0.36540726586784045, - "learning_rate": 1.773946671661325e-05, - "loss": 0.2873, + "epoch": 0.19, + "grad_norm": 0.3598279327398515, + "learning_rate": 1.8634098134924267e-05, + "loss": 0.2907, "step": 4208 }, { - "epoch": 0.24, - "grad_norm": 0.3851981339848704, - "learning_rate": 1.773828815825643e-05, - "loss": 0.2368, + "epoch": 0.19, + "grad_norm": 0.9702796332041599, + "learning_rate": 1.863334737931857e-05, + "loss": 0.549, "step": 4209 }, { - "epoch": 0.24, - "grad_norm": 1.403401900227148, - "learning_rate": 1.773710933192302e-05, - "loss": 0.8898, + "epoch": 0.19, + "grad_norm": 0.3919851985249983, + "learning_rate": 1.8632596432578883e-05, + "loss": 0.2848, "step": 4210 }, { - "epoch": 0.24, - "grad_norm": 0.8506443084440741, - "learning_rate": 1.7735930237653853e-05, - "loss": 0.4175, + "epoch": 0.19, + "grad_norm": 0.4326622571949437, + "learning_rate": 1.863184529472183e-05, + "loss": 0.2627, "step": 4211 }, { - "epoch": 0.24, - "grad_norm": 0.3401308629226619, - "learning_rate": 1.773475087548975e-05, - "loss": 0.2571, + "epoch": 0.19, + "grad_norm": 0.31763419007332644, + "learning_rate": 1.8631093965764045e-05, + "loss": 0.166, "step": 4212 }, { - "epoch": 0.24, - "grad_norm": 0.3457458099433843, - "learning_rate": 1.7733571245471557e-05, - "loss": 0.2495, + "epoch": 0.19, + "grad_norm": 0.4018304788174188, + "learning_rate": 1.8630342445722152e-05, + "loss": 0.2826, "step": 4213 }, { - "epoch": 0.24, - "grad_norm": 0.3963265595086062, - "learning_rate": 1.7732391347640125e-05, - "loss": 0.2667, + "epoch": 0.19, + "grad_norm": 1.008148420842974, + "learning_rate": 1.86295907346128e-05, + "loss": 0.5096, "step": 4214 }, { - "epoch": 0.24, - "grad_norm": 0.41455830175953723, - "learning_rate": 1.7731211182036312e-05, - "loss": 0.3137, + "epoch": 0.19, + "grad_norm": 0.38199778723774036, + "learning_rate": 1.8628838832452628e-05, + "loss": 0.3102, "step": 4215 }, { - "epoch": 0.24, - "grad_norm": 0.40500875371408623, - "learning_rate": 1.773003074870099e-05, - "loss": 0.299, + "epoch": 0.19, + "grad_norm": 0.39715618525526375, + "learning_rate": 1.8628086739258278e-05, + "loss": 0.3, "step": 4216 }, { - "epoch": 0.24, - "grad_norm": 0.5403576844166071, - "learning_rate": 1.7728850047675035e-05, - "loss": 0.35, + "epoch": 0.19, + "grad_norm": 1.5529649301836324, + "learning_rate": 1.8627334455046404e-05, + "loss": 0.6103, "step": 4217 }, { - "epoch": 0.24, - "grad_norm": 0.4087263684779261, - "learning_rate": 1.7727669078999336e-05, - "loss": 0.3484, + "epoch": 0.19, + "grad_norm": 0.3406019305545106, + "learning_rate": 1.862658197983366e-05, + "loss": 0.1572, "step": 4218 }, { - "epoch": 0.24, - "grad_norm": 0.2268995265041368, - "learning_rate": 1.772648784271479e-05, - "loss": 0.1586, + "epoch": 0.19, + "grad_norm": 0.5056276918318409, + "learning_rate": 1.8625829313636707e-05, + "loss": 0.3183, "step": 4219 }, { - "epoch": 0.24, - "grad_norm": 0.38627405176100876, - "learning_rate": 1.7725306338862298e-05, - "loss": 0.3399, + "epoch": 0.19, + "grad_norm": 0.395127033173385, + "learning_rate": 1.86250764564722e-05, + "loss": 0.3232, "step": 4220 }, { - "epoch": 0.24, - "grad_norm": 0.34508423192036813, - "learning_rate": 1.7724124567482782e-05, - "loss": 0.2773, + "epoch": 0.19, + "grad_norm": 0.6973099338641351, + "learning_rate": 1.862432340835682e-05, + "loss": 0.4816, "step": 4221 }, { - "epoch": 0.24, - "grad_norm": 0.5635387172753441, - "learning_rate": 1.7722942528617163e-05, - "loss": 0.443, + "epoch": 0.19, + "grad_norm": 0.500428749980683, + "learning_rate": 1.862357016930723e-05, + "loss": 0.3079, "step": 4222 }, { - "epoch": 0.24, - "grad_norm": 0.6278867989446979, - "learning_rate": 1.772176022230638e-05, - "loss": 0.4841, + "epoch": 0.19, + "grad_norm": 0.5877206281907318, + "learning_rate": 1.862281673934011e-05, + "loss": 0.4011, "step": 4223 }, { - "epoch": 0.24, - "grad_norm": 0.3090845892934344, - "learning_rate": 1.7720577648591368e-05, - "loss": 0.2715, + "epoch": 0.19, + "grad_norm": 0.31240727049638506, + "learning_rate": 1.8622063118472135e-05, + "loss": 0.2035, "step": 4224 }, { - "epoch": 0.24, - "grad_norm": 0.43203694883147464, - "learning_rate": 1.771939480751309e-05, - "loss": 0.2774, + "epoch": 0.19, + "grad_norm": 0.5144508665958528, + "learning_rate": 1.8621309306719997e-05, + "loss": 0.3407, "step": 4225 }, { - "epoch": 0.24, - "grad_norm": 0.28886362206526994, - "learning_rate": 1.7718211699112496e-05, - "loss": 0.1862, + "epoch": 0.19, + "grad_norm": 0.7317596647436877, + "learning_rate": 1.8620555304100376e-05, + "loss": 0.4356, "step": 4226 }, { - "epoch": 0.24, - "grad_norm": 0.35675105970492776, - "learning_rate": 1.7717028323430562e-05, - "loss": 0.2986, + "epoch": 0.19, + "grad_norm": 0.42435270201847763, + "learning_rate": 1.861980111062997e-05, + "loss": 0.3376, "step": 4227 }, { - "epoch": 0.24, - "grad_norm": 0.4192480731244511, - "learning_rate": 1.7715844680508273e-05, - "loss": 0.3561, + "epoch": 0.19, + "grad_norm": 0.3947162720971995, + "learning_rate": 1.8619046726325475e-05, + "loss": 0.2002, "step": 4228 }, { - "epoch": 0.24, - "grad_norm": 0.954576314104161, - "learning_rate": 1.7714660770386615e-05, - "loss": 0.3737, + "epoch": 0.19, + "grad_norm": 0.6189747287194609, + "learning_rate": 1.8618292151203593e-05, + "loss": 0.4288, "step": 4229 }, { - "epoch": 0.24, - "grad_norm": 0.37465111339648155, - "learning_rate": 1.771347659310658e-05, - "loss": 0.3111, + "epoch": 0.19, + "grad_norm": 0.37510562648132945, + "learning_rate": 1.861753738528103e-05, + "loss": 0.2053, "step": 4230 }, { - "epoch": 0.24, - "grad_norm": 1.154325732369229, - "learning_rate": 1.7712292148709188e-05, - "loss": 0.667, + "epoch": 0.19, + "grad_norm": 0.39244009005410496, + "learning_rate": 1.8616782428574495e-05, + "loss": 0.2667, "step": 4231 }, { - "epoch": 0.24, - "grad_norm": 0.23526929972174873, - "learning_rate": 1.7711107437235453e-05, - "loss": 0.1821, + "epoch": 0.19, + "grad_norm": 0.41892092364403355, + "learning_rate": 1.86160272811007e-05, + "loss": 0.354, "step": 4232 }, { - "epoch": 0.24, - "grad_norm": 0.4190335124027483, - "learning_rate": 1.7709922458726395e-05, - "loss": 0.3299, + "epoch": 0.19, + "grad_norm": 1.410294070545441, + "learning_rate": 1.861527194287637e-05, + "loss": 0.8041, "step": 4233 }, { - "epoch": 0.24, - "grad_norm": 0.8046301903471953, - "learning_rate": 1.770873721322305e-05, - "loss": 0.5668, + "epoch": 0.19, + "grad_norm": 0.37826410249052556, + "learning_rate": 1.8614516413918218e-05, + "loss": 0.2288, "step": 4234 }, { - "epoch": 0.24, - "grad_norm": 0.3958648304902932, - "learning_rate": 1.7707551700766474e-05, - "loss": 0.2981, + "epoch": 0.19, + "grad_norm": 1.3672072258194714, + "learning_rate": 1.8613760694242978e-05, + "loss": 0.6668, "step": 4235 }, { - "epoch": 0.24, - "grad_norm": 0.4148428047558691, - "learning_rate": 1.770636592139771e-05, - "loss": 0.3152, + "epoch": 0.19, + "grad_norm": 0.47346234378571395, + "learning_rate": 1.8613004783867373e-05, + "loss": 0.357, "step": 4236 }, { - "epoch": 0.24, - "grad_norm": 0.4855183491267279, - "learning_rate": 1.7705179875157826e-05, - "loss": 0.3586, + "epoch": 0.19, + "grad_norm": 0.48788018254471677, + "learning_rate": 1.861224868280815e-05, + "loss": 0.3419, "step": 4237 }, { - "epoch": 0.24, - "grad_norm": 0.24397917134812547, - "learning_rate": 1.7703993562087895e-05, - "loss": 0.1169, + "epoch": 0.19, + "grad_norm": 0.25386238628449465, + "learning_rate": 1.861149239108204e-05, + "loss": 0.1526, "step": 4238 }, { - "epoch": 0.24, - "grad_norm": 0.3776877212817177, - "learning_rate": 1.7702806982229e-05, - "loss": 0.2973, + "epoch": 0.19, + "grad_norm": 0.45806722560237756, + "learning_rate": 1.8610735908705786e-05, + "loss": 0.3359, "step": 4239 }, { - "epoch": 0.24, - "grad_norm": 0.5154970926323544, - "learning_rate": 1.7701620135622228e-05, - "loss": 0.3764, + "epoch": 0.19, + "grad_norm": 1.0915031035295915, + "learning_rate": 1.860997923569614e-05, + "loss": 0.6028, "step": 4240 }, { - "epoch": 0.24, - "grad_norm": 1.042901070896944, - "learning_rate": 1.7700433022308684e-05, - "loss": 0.635, + "epoch": 0.19, + "grad_norm": 0.4183587145267306, + "learning_rate": 1.8609222372069852e-05, + "loss": 0.223, "step": 4241 }, { - "epoch": 0.24, - "grad_norm": 0.3494430310945817, - "learning_rate": 1.7699245642329473e-05, - "loss": 0.2064, + "epoch": 0.19, + "grad_norm": 0.722183538773224, + "learning_rate": 1.860846531784368e-05, + "loss": 0.462, "step": 4242 }, { - "epoch": 0.24, - "grad_norm": 1.1972989287454718, - "learning_rate": 1.7698057995725717e-05, - "loss": 0.6586, + "epoch": 0.19, + "grad_norm": 0.4822117616404776, + "learning_rate": 1.860770807303438e-05, + "loss": 0.2968, "step": 4243 }, { - "epoch": 0.24, - "grad_norm": 0.3358791750280596, - "learning_rate": 1.7696870082538544e-05, - "loss": 0.2379, + "epoch": 0.19, + "grad_norm": 0.26642257484066423, + "learning_rate": 1.8606950637658722e-05, + "loss": 0.1939, "step": 4244 }, { - "epoch": 0.24, - "grad_norm": 0.4175936922148545, - "learning_rate": 1.769568190280909e-05, - "loss": 0.2467, + "epoch": 0.2, + "grad_norm": 1.4719138271440375, + "learning_rate": 1.860619301173347e-05, + "loss": 0.8307, "step": 4245 }, { - "epoch": 0.24, - "grad_norm": 1.2074876580005733, - "learning_rate": 1.7694493456578503e-05, - "loss": 0.4767, + "epoch": 0.2, + "grad_norm": 0.6849635966770423, + "learning_rate": 1.86054351952754e-05, + "loss": 0.3877, "step": 4246 }, { - "epoch": 0.24, - "grad_norm": 0.4768857304128634, - "learning_rate": 1.769330474388794e-05, - "loss": 0.3806, + "epoch": 0.2, + "grad_norm": 0.4050195896111727, + "learning_rate": 1.8604677188301288e-05, + "loss": 0.267, "step": 4247 }, { - "epoch": 0.24, - "grad_norm": 0.3347435221937712, - "learning_rate": 1.7692115764778564e-05, - "loss": 0.2254, + "epoch": 0.2, + "grad_norm": 0.7986489568831321, + "learning_rate": 1.860391899082792e-05, + "loss": 0.4728, "step": 4248 }, { - "epoch": 0.24, - "grad_norm": 1.2119371591283326, - "learning_rate": 1.7690926519291548e-05, - "loss": 0.6777, + "epoch": 0.2, + "grad_norm": 0.7684005142880341, + "learning_rate": 1.8603160602872074e-05, + "loss": 0.3507, "step": 4249 }, { - "epoch": 0.24, - "grad_norm": 0.5732297049816935, - "learning_rate": 1.7689737007468082e-05, - "loss": 0.3127, + "epoch": 0.2, + "grad_norm": 0.3193234082154365, + "learning_rate": 1.8602402024450547e-05, + "loss": 0.2323, "step": 4250 }, { - "epoch": 0.24, - "grad_norm": 0.4110880892865378, - "learning_rate": 1.768854722934935e-05, - "loss": 0.2711, + "epoch": 0.2, + "grad_norm": 0.462570153195672, + "learning_rate": 1.860164325558013e-05, + "loss": 0.3056, "step": 4251 }, { - "epoch": 0.24, - "grad_norm": 0.5346347752222697, - "learning_rate": 1.7687357184976558e-05, - "loss": 0.304, + "epoch": 0.2, + "grad_norm": 0.5012261944582558, + "learning_rate": 1.8600884296277617e-05, + "loss": 0.3121, "step": 4252 }, { - "epoch": 0.24, - "grad_norm": 0.4209249612683654, - "learning_rate": 1.7686166874390916e-05, - "loss": 0.3113, + "epoch": 0.2, + "grad_norm": 0.6721438343562806, + "learning_rate": 1.860012514655982e-05, + "loss": 0.4336, "step": 4253 }, { - "epoch": 0.24, - "grad_norm": 0.4029524455386135, - "learning_rate": 1.768497629763365e-05, - "loss": 0.284, + "epoch": 0.2, + "grad_norm": 0.9386869920224649, + "learning_rate": 1.859936580644354e-05, + "loss": 0.4039, "step": 4254 }, { - "epoch": 0.24, - "grad_norm": 0.39416868752265927, - "learning_rate": 1.7683785454745983e-05, - "loss": 0.267, + "epoch": 0.2, + "grad_norm": 0.3281931726235795, + "learning_rate": 1.859860627594559e-05, + "loss": 0.2972, "step": 4255 }, { - "epoch": 0.24, - "grad_norm": 0.4629564240237894, - "learning_rate": 1.768259434576916e-05, - "loss": 0.3873, + "epoch": 0.2, + "grad_norm": 0.656306461205417, + "learning_rate": 1.8597846555082784e-05, + "loss": 0.4151, "step": 4256 }, { - "epoch": 0.24, - "grad_norm": 0.39823641313634994, - "learning_rate": 1.768140297074442e-05, - "loss": 0.3366, + "epoch": 0.2, + "grad_norm": 0.22655053636844913, + "learning_rate": 1.8597086643871943e-05, + "loss": 0.1349, "step": 4257 }, { - "epoch": 0.24, - "grad_norm": 0.44095663559634946, - "learning_rate": 1.7680211329713027e-05, - "loss": 0.3567, + "epoch": 0.2, + "grad_norm": 0.9686743337818449, + "learning_rate": 1.8596326542329888e-05, + "loss": 0.4468, "step": 4258 }, { - "epoch": 0.24, - "grad_norm": 0.3111575942167663, - "learning_rate": 1.7679019422716244e-05, - "loss": 0.2723, + "epoch": 0.2, + "grad_norm": 0.419880464276655, + "learning_rate": 1.8595566250473445e-05, + "loss": 0.3284, "step": 4259 }, { - "epoch": 0.24, - "grad_norm": 0.32667092720747753, - "learning_rate": 1.767782724979535e-05, - "loss": 0.2803, + "epoch": 0.2, + "grad_norm": 0.7111856428853667, + "learning_rate": 1.8594805768319457e-05, + "loss": 0.3414, "step": 4260 }, { - "epoch": 0.24, - "grad_norm": 0.3594877209168396, - "learning_rate": 1.767663481099163e-05, - "loss": 0.15, + "epoch": 0.2, + "grad_norm": 0.840799267252484, + "learning_rate": 1.8594045095884748e-05, + "loss": 0.4666, "step": 4261 }, { - "epoch": 0.24, - "grad_norm": 0.5838500192099852, - "learning_rate": 1.7675442106346377e-05, - "loss": 0.4736, + "epoch": 0.2, + "grad_norm": 0.37370294635857737, + "learning_rate": 1.8593284233186168e-05, + "loss": 0.2589, "step": 4262 }, { - "epoch": 0.24, - "grad_norm": 0.34204796905833723, - "learning_rate": 1.7674249135900892e-05, - "loss": 0.3, + "epoch": 0.2, + "grad_norm": 0.4588761679099954, + "learning_rate": 1.8592523180240552e-05, + "loss": 0.3364, "step": 4263 }, { - "epoch": 0.24, - "grad_norm": 0.4358076489958484, - "learning_rate": 1.767305589969649e-05, - "loss": 0.3492, + "epoch": 0.2, + "grad_norm": 0.24257829482369264, + "learning_rate": 1.859176193706476e-05, + "loss": 0.1227, "step": 4264 }, { - "epoch": 0.25, - "grad_norm": 0.21427413315468624, - "learning_rate": 1.7671862397774494e-05, - "loss": 0.1329, + "epoch": 0.2, + "grad_norm": 0.43654393219973653, + "learning_rate": 1.8591000503675635e-05, + "loss": 0.3622, "step": 4265 }, { - "epoch": 0.25, - "grad_norm": 0.35988453024304295, - "learning_rate": 1.767066863017623e-05, - "loss": 0.2818, + "epoch": 0.2, + "grad_norm": 0.8789514667030124, + "learning_rate": 1.8590238880090042e-05, + "loss": 0.5487, "step": 4266 }, { - "epoch": 0.25, - "grad_norm": 0.9682883111015376, - "learning_rate": 1.766947459694304e-05, - "loss": 0.5893, + "epoch": 0.2, + "grad_norm": 0.3764960102209963, + "learning_rate": 1.858947706632484e-05, + "loss": 0.2528, "step": 4267 }, { - "epoch": 0.25, - "grad_norm": 0.3709235509398682, - "learning_rate": 1.766828029811628e-05, - "loss": 0.3169, + "epoch": 0.2, + "grad_norm": 0.5018443247765786, + "learning_rate": 1.858871506239689e-05, + "loss": 0.3542, "step": 4268 }, { - "epoch": 0.25, - "grad_norm": 0.5406561017554826, - "learning_rate": 1.7667085733737298e-05, - "loss": 0.3701, + "epoch": 0.2, + "grad_norm": 1.4484056478095855, + "learning_rate": 1.858795286832307e-05, + "loss": 0.8241, "step": 4269 }, { - "epoch": 0.25, - "grad_norm": 0.4233765597253248, - "learning_rate": 1.7665890903847468e-05, - "loss": 0.3483, + "epoch": 0.2, + "grad_norm": 0.47138726810805803, + "learning_rate": 1.858719048412025e-05, + "loss": 0.2432, "step": 4270 }, { - "epoch": 0.25, - "grad_norm": 0.2603354062606371, - "learning_rate": 1.7664695808488164e-05, - "loss": 0.2296, + "epoch": 0.2, + "grad_norm": 0.4069346534547707, + "learning_rate": 1.8586427909805308e-05, + "loss": 0.2982, "step": 4271 }, { - "epoch": 0.25, - "grad_norm": 0.4590253911487976, - "learning_rate": 1.766350044770078e-05, - "loss": 0.2895, + "epoch": 0.2, + "grad_norm": 0.41383520443790894, + "learning_rate": 1.858566514539513e-05, + "loss": 0.2979, "step": 4272 }, { - "epoch": 0.25, - "grad_norm": 0.3741916057341281, - "learning_rate": 1.76623048215267e-05, - "loss": 0.3108, + "epoch": 0.2, + "grad_norm": 0.40997039287376597, + "learning_rate": 1.85849021909066e-05, + "loss": 0.211, "step": 4273 }, { - "epoch": 0.25, - "grad_norm": 0.7421170186002759, - "learning_rate": 1.7661108930007334e-05, - "loss": 0.3889, + "epoch": 0.2, + "grad_norm": 0.6519100111384685, + "learning_rate": 1.858413904635661e-05, + "loss": 0.3881, "step": 4274 }, { - "epoch": 0.25, - "grad_norm": 0.45537247750720905, - "learning_rate": 1.7659912773184095e-05, - "loss": 0.2839, + "epoch": 0.2, + "grad_norm": 0.4862669576064738, + "learning_rate": 1.8583375711762054e-05, + "loss": 0.3622, "step": 4275 }, { - "epoch": 0.25, - "grad_norm": 0.3989938266714445, - "learning_rate": 1.7658716351098407e-05, - "loss": 0.3258, + "epoch": 0.2, + "grad_norm": 0.7806093011096142, + "learning_rate": 1.858261218713983e-05, + "loss": 0.3965, "step": 4276 }, { - "epoch": 0.25, - "grad_norm": 1.318451272676308, - "learning_rate": 1.76575196637917e-05, - "loss": 0.7469, + "epoch": 0.2, + "grad_norm": 0.3897906346049912, + "learning_rate": 1.858184847250685e-05, + "loss": 0.2836, "step": 4277 }, { - "epoch": 0.25, - "grad_norm": 0.22641040750112476, - "learning_rate": 1.7656322711305417e-05, - "loss": 0.1216, + "epoch": 0.2, + "grad_norm": 0.33605509595260075, + "learning_rate": 1.8581084567880012e-05, + "loss": 0.2083, "step": 4278 }, { - "epoch": 0.25, - "grad_norm": 0.3801159653441589, - "learning_rate": 1.7655125493681012e-05, - "loss": 0.3474, + "epoch": 0.2, + "grad_norm": 0.4958439717340983, + "learning_rate": 1.8580320473276234e-05, + "loss": 0.3217, "step": 4279 }, { - "epoch": 0.25, - "grad_norm": 0.5049163226978106, - "learning_rate": 1.7653928010959936e-05, - "loss": 0.3951, + "epoch": 0.2, + "grad_norm": 0.40378939629410737, + "learning_rate": 1.857955618871243e-05, + "loss": 0.217, "step": 4280 }, { - "epoch": 0.25, - "grad_norm": 0.3293722513867542, - "learning_rate": 1.765273026318366e-05, - "loss": 0.2225, + "epoch": 0.2, + "grad_norm": 1.0668605638875857, + "learning_rate": 1.857879171420552e-05, + "loss": 0.6062, "step": 4281 }, { - "epoch": 0.25, - "grad_norm": 1.349210674604087, - "learning_rate": 1.7651532250393666e-05, - "loss": 0.7544, + "epoch": 0.2, + "grad_norm": 0.5995313988047305, + "learning_rate": 1.857802704977243e-05, + "loss": 0.3973, "step": 4282 }, { - "epoch": 0.25, - "grad_norm": 0.3886610404525146, - "learning_rate": 1.7650333972631443e-05, - "loss": 0.3501, + "epoch": 0.2, + "grad_norm": 0.39627075026799313, + "learning_rate": 1.857726219543009e-05, + "loss": 0.2373, "step": 4283 }, { - "epoch": 0.25, - "grad_norm": 0.24857630478813894, - "learning_rate": 1.7649135429938477e-05, - "loss": 0.1807, + "epoch": 0.2, + "grad_norm": 0.3131188984179075, + "learning_rate": 1.857649715119543e-05, + "loss": 0.2052, "step": 4284 }, { - "epoch": 0.25, - "grad_norm": 0.4471098422892603, - "learning_rate": 1.764793662235628e-05, - "loss": 0.3165, + "epoch": 0.2, + "grad_norm": 0.7731717920181239, + "learning_rate": 1.857573191708539e-05, + "loss": 0.5462, "step": 4285 }, { - "epoch": 0.25, - "grad_norm": 0.6148974597430013, - "learning_rate": 1.7646737549926376e-05, - "loss": 0.4766, + "epoch": 0.2, + "grad_norm": 0.5843632644313207, + "learning_rate": 1.857496649311691e-05, + "loss": 0.2675, "step": 4286 }, { - "epoch": 0.25, - "grad_norm": 0.37646323777327495, - "learning_rate": 1.764553821269027e-05, - "loss": 0.216, + "epoch": 0.2, + "grad_norm": 0.5809243707043547, + "learning_rate": 1.8574200879306938e-05, + "loss": 0.3591, "step": 4287 }, { - "epoch": 0.25, - "grad_norm": 0.39924535706099146, - "learning_rate": 1.764433861068951e-05, - "loss": 0.3246, + "epoch": 0.2, + "grad_norm": 1.137210960246675, + "learning_rate": 1.8573435075672422e-05, + "loss": 0.4499, "step": 4288 }, { - "epoch": 0.25, - "grad_norm": 1.0115253786277327, - "learning_rate": 1.764313874396563e-05, - "loss": 0.6713, + "epoch": 0.2, + "grad_norm": 0.46439071593907794, + "learning_rate": 1.857266908223032e-05, + "loss": 0.3029, "step": 4289 }, { - "epoch": 0.25, - "grad_norm": 0.37546513680467763, - "learning_rate": 1.7641938612560182e-05, - "loss": 0.2153, + "epoch": 0.2, + "grad_norm": 0.31689381380182335, + "learning_rate": 1.857190289899758e-05, + "loss": 0.0951, "step": 4290 }, { - "epoch": 0.25, - "grad_norm": 0.31499422778096564, - "learning_rate": 1.7640738216514733e-05, - "loss": 0.293, + "epoch": 0.2, + "grad_norm": 0.4679546528485914, + "learning_rate": 1.8571136525991178e-05, + "loss": 0.3078, "step": 4291 }, { - "epoch": 0.25, - "grad_norm": 0.341004193409986, - "learning_rate": 1.7639537555870844e-05, - "loss": 0.2931, + "epoch": 0.2, + "grad_norm": 0.585815750155175, + "learning_rate": 1.857036996322807e-05, + "loss": 0.3083, "step": 4292 }, { - "epoch": 0.25, - "grad_norm": 0.5975754148604239, - "learning_rate": 1.7638336630670102e-05, - "loss": 0.3508, + "epoch": 0.2, + "grad_norm": 1.7707977589677684, + "learning_rate": 1.8569603210725233e-05, + "loss": 0.4483, "step": 4293 }, { - "epoch": 0.25, - "grad_norm": 0.4147059353112628, - "learning_rate": 1.763713544095409e-05, - "loss": 0.299, + "epoch": 0.2, + "grad_norm": 0.49923180782558924, + "learning_rate": 1.8568836268499642e-05, + "loss": 0.3514, "step": 4294 }, { - "epoch": 0.25, - "grad_norm": 0.3928079898041313, - "learning_rate": 1.7635933986764403e-05, - "loss": 0.3508, + "epoch": 0.2, + "grad_norm": 0.40470734445423134, + "learning_rate": 1.8568069136568272e-05, + "loss": 0.3209, "step": 4295 }, { - "epoch": 0.25, - "grad_norm": 0.559386737857805, - "learning_rate": 1.7634732268142652e-05, - "loss": 0.3621, + "epoch": 0.2, + "grad_norm": 0.2698376544670076, + "learning_rate": 1.8567301814948112e-05, + "loss": 0.124, "step": 4296 }, { - "epoch": 0.25, - "grad_norm": 0.2935297955911469, - "learning_rate": 1.7633530285130452e-05, - "loss": 0.2541, + "epoch": 0.2, + "grad_norm": 1.2498194323472374, + "learning_rate": 1.8566534303656144e-05, + "loss": 0.6711, "step": 4297 }, { - "epoch": 0.25, - "grad_norm": 0.4767343833347403, - "learning_rate": 1.7632328037769423e-05, - "loss": 0.2457, + "epoch": 0.2, + "grad_norm": 0.575087247125326, + "learning_rate": 1.8565766602709365e-05, + "loss": 0.3151, "step": 4298 }, { - "epoch": 0.25, - "grad_norm": 0.3746108706148988, - "learning_rate": 1.7631125526101206e-05, - "loss": 0.3049, + "epoch": 0.2, + "grad_norm": 0.9593289933001017, + "learning_rate": 1.856499871212477e-05, + "loss": 0.3349, "step": 4299 }, { - "epoch": 0.25, - "grad_norm": 0.4020633558516471, - "learning_rate": 1.7629922750167437e-05, - "loss": 0.2688, + "epoch": 0.2, + "grad_norm": 1.2963006724299926, + "learning_rate": 1.8564230631919355e-05, + "loss": 0.5762, "step": 4300 }, { - "epoch": 0.25, - "grad_norm": 0.8783609175595337, - "learning_rate": 1.7628719710009777e-05, - "loss": 0.6493, + "epoch": 0.2, + "grad_norm": 0.7776943231269928, + "learning_rate": 1.856346236211013e-05, + "loss": 0.295, "step": 4301 }, { - "epoch": 0.25, - "grad_norm": 0.3907321374100405, - "learning_rate": 1.7627516405669876e-05, - "loss": 0.3237, + "epoch": 0.2, + "grad_norm": 0.3605089723684607, + "learning_rate": 1.8562693902714103e-05, + "loss": 0.176, "step": 4302 }, { - "epoch": 0.25, - "grad_norm": 0.49895888305836794, - "learning_rate": 1.7626312837189412e-05, - "loss": 0.4026, + "epoch": 0.2, + "grad_norm": 0.47683794100556987, + "learning_rate": 1.8561925253748283e-05, + "loss": 0.288, "step": 4303 }, { - "epoch": 0.25, - "grad_norm": 0.25989485619288133, - "learning_rate": 1.7625109004610065e-05, - "loss": 0.1937, + "epoch": 0.2, + "grad_norm": 0.5648674900877408, + "learning_rate": 1.8561156415229694e-05, + "loss": 0.284, "step": 4304 }, { - "epoch": 0.25, - "grad_norm": 0.7321965943129197, - "learning_rate": 1.7623904907973515e-05, - "loss": 0.395, + "epoch": 0.2, + "grad_norm": 1.2094934944081341, + "learning_rate": 1.8560387387175352e-05, + "loss": 0.4911, "step": 4305 }, { - "epoch": 0.25, - "grad_norm": 0.43686726649567725, - "learning_rate": 1.762270054732147e-05, - "loss": 0.3508, + "epoch": 0.2, + "grad_norm": 0.5289136182055869, + "learning_rate": 1.855961816960228e-05, + "loss": 0.2708, "step": 4306 }, { - "epoch": 0.25, - "grad_norm": 0.37833068558361926, - "learning_rate": 1.7621495922695633e-05, - "loss": 0.307, + "epoch": 0.2, + "grad_norm": 0.4976460192855919, + "learning_rate": 1.8558848762527517e-05, + "loss": 0.3027, "step": 4307 }, { - "epoch": 0.25, - "grad_norm": 0.6682080526735688, - "learning_rate": 1.7620291034137718e-05, - "loss": 0.4271, + "epoch": 0.2, + "grad_norm": 0.36676655909033784, + "learning_rate": 1.8558079165968087e-05, + "loss": 0.2775, "step": 4308 }, { - "epoch": 0.25, - "grad_norm": 0.3964426570352359, - "learning_rate": 1.7619085881689454e-05, - "loss": 0.3013, + "epoch": 0.2, + "grad_norm": 0.3288064090147841, + "learning_rate": 1.8557309379941037e-05, + "loss": 0.1968, "step": 4309 }, { - "epoch": 0.25, - "grad_norm": 0.23222205472935048, - "learning_rate": 1.761788046539257e-05, - "loss": 0.1052, + "epoch": 0.2, + "grad_norm": 0.4592607713250931, + "learning_rate": 1.8556539404463404e-05, + "loss": 0.3133, "step": 4310 }, { - "epoch": 0.25, - "grad_norm": 0.3912585076504096, - "learning_rate": 1.7616674785288815e-05, - "loss": 0.301, + "epoch": 0.2, + "grad_norm": 0.6501209516359576, + "learning_rate": 1.8555769239552232e-05, + "loss": 0.3698, "step": 4311 }, { - "epoch": 0.25, - "grad_norm": 0.47490678407484405, - "learning_rate": 1.761546884141994e-05, - "loss": 0.3829, + "epoch": 0.2, + "grad_norm": 1.1104770222519336, + "learning_rate": 1.855499888522458e-05, + "loss": 0.4961, "step": 4312 }, { - "epoch": 0.25, - "grad_norm": 0.7637011922535032, - "learning_rate": 1.761426263382771e-05, - "loss": 0.3716, + "epoch": 0.2, + "grad_norm": 0.42803878336672074, + "learning_rate": 1.8554228341497493e-05, + "loss": 0.2811, "step": 4313 }, { - "epoch": 0.25, - "grad_norm": 0.506704534728562, - "learning_rate": 1.761305616255389e-05, - "loss": 0.3931, + "epoch": 0.2, + "grad_norm": 0.30356638922204654, + "learning_rate": 1.855345760838804e-05, + "loss": 0.197, "step": 4314 }, { - "epoch": 0.25, - "grad_norm": 0.36459501700346764, - "learning_rate": 1.761184942764026e-05, - "loss": 0.3025, + "epoch": 0.2, + "grad_norm": 0.569775244592558, + "learning_rate": 1.8552686685913275e-05, + "loss": 0.3809, "step": 4315 }, { - "epoch": 0.25, - "grad_norm": 0.38492087075628456, - "learning_rate": 1.761064242912861e-05, - "loss": 0.2247, + "epoch": 0.2, + "grad_norm": 0.3637833366499475, + "learning_rate": 1.8551915574090277e-05, + "loss": 0.2721, "step": 4316 }, { - "epoch": 0.25, - "grad_norm": 0.2590479572690357, - "learning_rate": 1.7609435167060745e-05, - "loss": 0.1378, + "epoch": 0.2, + "grad_norm": 0.7878643115705979, + "learning_rate": 1.8551144272936103e-05, + "loss": 0.6212, "step": 4317 }, { - "epoch": 0.25, - "grad_norm": 0.505463936850372, - "learning_rate": 1.7608227641478467e-05, - "loss": 0.3552, + "epoch": 0.2, + "grad_norm": 0.5759455781547923, + "learning_rate": 1.855037278246784e-05, + "loss": 0.3904, "step": 4318 }, { - "epoch": 0.25, - "grad_norm": 0.4637726016282319, - "learning_rate": 1.760701985242359e-05, - "loss": 0.3487, + "epoch": 0.2, + "grad_norm": 0.33918927904691615, + "learning_rate": 1.8549601102702564e-05, + "loss": 0.234, "step": 4319 }, { - "epoch": 0.25, - "grad_norm": 0.9181606038737803, - "learning_rate": 1.7605811799937946e-05, - "loss": 0.3159, + "epoch": 0.2, + "grad_norm": 1.4102005371211603, + "learning_rate": 1.854882923365736e-05, + "loss": 0.7264, "step": 4320 }, { - "epoch": 0.25, - "grad_norm": 0.4582402750612981, - "learning_rate": 1.7604603484063363e-05, - "loss": 0.325, + "epoch": 0.2, + "grad_norm": 0.4110129516195724, + "learning_rate": 1.8548057175349314e-05, + "loss": 0.283, "step": 4321 }, { - "epoch": 0.25, - "grad_norm": 0.3085161695929028, - "learning_rate": 1.760339490484169e-05, - "loss": 0.2247, + "epoch": 0.2, + "grad_norm": 0.3481719720307271, + "learning_rate": 1.8547284927795527e-05, + "loss": 0.2588, "step": 4322 }, { - "epoch": 0.25, - "grad_norm": 0.3913027721852565, - "learning_rate": 1.760218606231478e-05, - "loss": 0.2511, + "epoch": 0.2, + "grad_norm": 0.6066801286349882, + "learning_rate": 1.8546512491013082e-05, + "loss": 0.4147, "step": 4323 }, { - "epoch": 0.25, - "grad_norm": 0.5046921571413169, - "learning_rate": 1.7600976956524493e-05, - "loss": 0.3392, + "epoch": 0.2, + "grad_norm": 1.1253572376444525, + "learning_rate": 1.854573986501909e-05, + "loss": 0.733, "step": 4324 }, { - "epoch": 0.25, - "grad_norm": 1.4373319914356162, - "learning_rate": 1.7599767587512698e-05, - "loss": 0.4993, + "epoch": 0.2, + "grad_norm": 0.42633931708458467, + "learning_rate": 1.8544967049830656e-05, + "loss": 0.1902, "step": 4325 }, { - "epoch": 0.25, - "grad_norm": 0.5257980257453084, - "learning_rate": 1.7598557955321282e-05, - "loss": 0.2709, + "epoch": 0.2, + "grad_norm": 0.37327992638226337, + "learning_rate": 1.8544194045464888e-05, + "loss": 0.278, "step": 4326 }, { - "epoch": 0.25, - "grad_norm": 0.32672331423731715, - "learning_rate": 1.7597348059992128e-05, - "loss": 0.3039, + "epoch": 0.2, + "grad_norm": 0.4107699728277899, + "learning_rate": 1.8543420851938895e-05, + "loss": 0.3301, "step": 4327 }, { - "epoch": 0.25, - "grad_norm": 0.2592941345566562, - "learning_rate": 1.7596137901567138e-05, - "loss": 0.1403, + "epoch": 0.2, + "grad_norm": 0.5103978324837614, + "learning_rate": 1.85426474692698e-05, + "loss": 0.2844, "step": 4328 }, { - "epoch": 0.25, - "grad_norm": 1.4062233302214164, - "learning_rate": 1.759492748008822e-05, - "loss": 0.8632, + "epoch": 0.2, + "grad_norm": 0.540719464121703, + "learning_rate": 1.8541873897474727e-05, + "loss": 0.3442, "step": 4329 }, { - "epoch": 0.25, - "grad_norm": 0.6436373842109813, - "learning_rate": 1.759371679559729e-05, - "loss": 0.2666, + "epoch": 0.2, + "grad_norm": 0.47485028926210837, + "learning_rate": 1.8541100136570796e-05, + "loss": 0.3563, "step": 4330 }, { - "epoch": 0.25, - "grad_norm": 0.5413600815168292, - "learning_rate": 1.759250584813627e-05, - "loss": 0.3359, + "epoch": 0.2, + "grad_norm": 0.4551609512600658, + "learning_rate": 1.8540326186575138e-05, + "loss": 0.3311, "step": 4331 }, { - "epoch": 0.25, - "grad_norm": 0.6795446930850362, - "learning_rate": 1.7591294637747104e-05, - "loss": 0.4545, + "epoch": 0.2, + "grad_norm": 0.5272102468708916, + "learning_rate": 1.853955204750489e-05, + "loss": 0.3142, "step": 4332 }, { - "epoch": 0.25, - "grad_norm": 0.34797795924646585, - "learning_rate": 1.7590083164471728e-05, - "loss": 0.2473, + "epoch": 0.2, + "grad_norm": 0.7036754218696629, + "learning_rate": 1.8538777719377194e-05, + "loss": 0.4887, "step": 4333 }, { - "epoch": 0.25, - "grad_norm": 0.4751834716100019, - "learning_rate": 1.75888714283521e-05, - "loss": 0.2814, + "epoch": 0.2, + "grad_norm": 0.2787362636360333, + "learning_rate": 1.8538003202209186e-05, + "loss": 0.1749, "step": 4334 }, { - "epoch": 0.25, - "grad_norm": 0.4792771622907987, - "learning_rate": 1.758765942943018e-05, - "loss": 0.3511, + "epoch": 0.2, + "grad_norm": 0.411163716910392, + "learning_rate": 1.8537228496018017e-05, + "loss": 0.3001, "step": 4335 }, { - "epoch": 0.25, - "grad_norm": 0.4409034335516681, - "learning_rate": 1.7586447167747943e-05, - "loss": 0.2339, + "epoch": 0.2, + "grad_norm": 1.5993647169250635, + "learning_rate": 1.8536453600820838e-05, + "loss": 0.6663, "step": 4336 }, { - "epoch": 0.25, - "grad_norm": 1.0216680780688296, - "learning_rate": 1.7585234643347363e-05, - "loss": 0.4759, + "epoch": 0.2, + "grad_norm": 0.41395799425926444, + "learning_rate": 1.8535678516634803e-05, + "loss": 0.3131, "step": 4337 }, { - "epoch": 0.25, - "grad_norm": 0.3495006405637813, - "learning_rate": 1.7584021856270435e-05, - "loss": 0.2516, + "epoch": 0.2, + "grad_norm": 0.5531271887297363, + "learning_rate": 1.8534903243477072e-05, + "loss": 0.3346, "step": 4338 }, { - "epoch": 0.25, - "grad_norm": 0.399030800710068, - "learning_rate": 1.7582808806559154e-05, - "loss": 0.3012, + "epoch": 0.2, + "grad_norm": 0.5192461296597799, + "learning_rate": 1.8534127781364814e-05, + "loss": 0.4067, "step": 4339 }, { - "epoch": 0.25, - "grad_norm": 0.33002710223861065, - "learning_rate": 1.7581595494255533e-05, - "loss": 0.2138, + "epoch": 0.2, + "grad_norm": 0.39986066894239397, + "learning_rate": 1.8533352130315185e-05, + "loss": 0.2874, "step": 4340 }, { - "epoch": 0.25, - "grad_norm": 0.6027956584907409, - "learning_rate": 1.7580381919401586e-05, - "loss": 0.4314, + "epoch": 0.2, + "grad_norm": 0.49231641606851856, + "learning_rate": 1.8532576290345368e-05, + "loss": 0.2913, "step": 4341 }, { - "epoch": 0.25, - "grad_norm": 0.3981500370131176, - "learning_rate": 1.757916808203934e-05, - "loss": 0.3312, + "epoch": 0.2, + "grad_norm": 0.33991968611855294, + "learning_rate": 1.8531800261472536e-05, + "loss": 0.2481, "step": 4342 }, { - "epoch": 0.25, - "grad_norm": 0.36799863739960603, - "learning_rate": 1.757795398221083e-05, - "loss": 0.3211, + "epoch": 0.2, + "grad_norm": 0.47664552994992193, + "learning_rate": 1.8531024043713868e-05, + "loss": 0.2941, "step": 4343 }, { - "epoch": 0.25, - "grad_norm": 0.44567616701273444, - "learning_rate": 1.7576739619958096e-05, - "loss": 0.2847, + "epoch": 0.2, + "grad_norm": 0.5820049977438846, + "learning_rate": 1.853024763708655e-05, + "loss": 0.386, "step": 4344 }, { - "epoch": 0.25, - "grad_norm": 0.37317146489865405, - "learning_rate": 1.7575524995323195e-05, - "loss": 0.2739, + "epoch": 0.2, + "grad_norm": 0.44935013101731247, + "learning_rate": 1.852947104160777e-05, + "loss": 0.2938, "step": 4345 }, { - "epoch": 0.25, - "grad_norm": 0.38858376631551034, - "learning_rate": 1.757431010834819e-05, - "loss": 0.2789, + "epoch": 0.2, + "grad_norm": 0.43234043786115367, + "learning_rate": 1.8528694257294723e-05, + "loss": 0.2924, "step": 4346 }, { - "epoch": 0.25, - "grad_norm": 0.7434345776855413, - "learning_rate": 1.7573094959075148e-05, - "loss": 0.5022, + "epoch": 0.2, + "grad_norm": 0.37278686898840957, + "learning_rate": 1.8527917284164604e-05, + "loss": 0.3214, "step": 4347 }, { - "epoch": 0.25, - "grad_norm": 0.3514566700372044, - "learning_rate": 1.757187954754616e-05, - "loss": 0.2869, + "epoch": 0.2, + "grad_norm": 0.32407216551283824, + "learning_rate": 1.852714012223462e-05, + "loss": 0.1556, "step": 4348 }, { - "epoch": 0.25, - "grad_norm": 0.7291519504367259, - "learning_rate": 1.7570663873803305e-05, - "loss": 0.4462, + "epoch": 0.2, + "grad_norm": 0.44523967327277575, + "learning_rate": 1.8526362771521968e-05, + "loss": 0.3014, "step": 4349 }, { - "epoch": 0.25, - "grad_norm": 0.2717965523574744, - "learning_rate": 1.7569447937888686e-05, - "loss": 0.2532, + "epoch": 0.2, + "grad_norm": 0.33929429334188205, + "learning_rate": 1.8525585232043863e-05, + "loss": 0.3431, "step": 4350 }, { - "epoch": 0.25, - "grad_norm": 0.35466411632539085, - "learning_rate": 1.756823173984441e-05, - "loss": 0.29, + "epoch": 0.2, + "grad_norm": 0.5721630840565419, + "learning_rate": 1.852480750381752e-05, + "loss": 0.3899, "step": 4351 }, { - "epoch": 0.25, - "grad_norm": 1.4631381574190476, - "learning_rate": 1.7567015279712598e-05, - "loss": 0.7348, + "epoch": 0.2, + "grad_norm": 0.4331620843649517, + "learning_rate": 1.8524029586860154e-05, + "loss": 0.3324, "step": 4352 }, { - "epoch": 0.25, - "grad_norm": 0.8172752249272174, - "learning_rate": 1.756579855753537e-05, - "loss": 0.3552, + "epoch": 0.2, + "grad_norm": 0.556403232307594, + "learning_rate": 1.8523251481188987e-05, + "loss": 0.4095, "step": 4353 }, { - "epoch": 0.25, - "grad_norm": 0.3753765285233513, - "learning_rate": 1.756458157335486e-05, - "loss": 0.3274, + "epoch": 0.2, + "grad_norm": 0.3856497972178304, + "learning_rate": 1.852247318682125e-05, + "loss": 0.2785, "step": 4354 }, { - "epoch": 0.25, - "grad_norm": 0.39089010889022724, - "learning_rate": 1.756336432721322e-05, - "loss": 0.3154, + "epoch": 0.2, + "grad_norm": 0.33890319024818455, + "learning_rate": 1.8521694703774166e-05, + "loss": 0.205, "step": 4355 }, { - "epoch": 0.25, - "grad_norm": 0.23367638912187802, - "learning_rate": 1.7562146819152595e-05, - "loss": 0.1237, + "epoch": 0.2, + "grad_norm": 0.5340483991443618, + "learning_rate": 1.852091603206498e-05, + "loss": 0.3932, "step": 4356 }, { - "epoch": 0.25, - "grad_norm": 0.4181181779260803, - "learning_rate": 1.7560929049215155e-05, - "loss": 0.3227, + "epoch": 0.2, + "grad_norm": 0.841549354372186, + "learning_rate": 1.8520137171710923e-05, + "loss": 0.539, "step": 4357 }, { - "epoch": 0.25, - "grad_norm": 0.8631313679687664, - "learning_rate": 1.7559711017443062e-05, - "loss": 0.4784, + "epoch": 0.2, + "grad_norm": 0.3669957306405334, + "learning_rate": 1.851935812272924e-05, + "loss": 0.2357, "step": 4358 }, { - "epoch": 0.25, - "grad_norm": 0.41323073775920643, - "learning_rate": 1.7558492723878507e-05, - "loss": 0.3094, + "epoch": 0.2, + "grad_norm": 0.591401417289004, + "learning_rate": 1.851857888513718e-05, + "loss": 0.43, "step": 4359 }, { - "epoch": 0.25, - "grad_norm": 0.3782931761755461, - "learning_rate": 1.755727416856367e-05, - "loss": 0.3099, + "epoch": 0.2, + "grad_norm": 0.4009867712349622, + "learning_rate": 1.8517799458951993e-05, + "loss": 0.2494, "step": 4360 }, { - "epoch": 0.25, - "grad_norm": 1.2000243266515298, - "learning_rate": 1.7556055351540757e-05, - "loss": 0.8228, + "epoch": 0.2, + "grad_norm": 0.4435805587919202, + "learning_rate": 1.851701984419094e-05, + "loss": 0.2718, "step": 4361 }, { - "epoch": 0.25, - "grad_norm": 0.2778527231030494, - "learning_rate": 1.7554836272851967e-05, - "loss": 0.1797, + "epoch": 0.2, + "grad_norm": 0.45599626280460354, + "learning_rate": 1.851624004087127e-05, + "loss": 0.355, "step": 4362 }, { - "epoch": 0.25, - "grad_norm": 0.34868729040736435, - "learning_rate": 1.7553616932539522e-05, - "loss": 0.2861, + "epoch": 0.2, + "grad_norm": 0.5885986872875313, + "learning_rate": 1.8515460049010254e-05, + "loss": 0.4573, "step": 4363 }, { - "epoch": 0.25, - "grad_norm": 1.0895145164950082, - "learning_rate": 1.7552397330645654e-05, - "loss": 0.5217, + "epoch": 0.2, + "grad_norm": 0.919810715342427, + "learning_rate": 1.8514679868625162e-05, + "loss": 0.5153, "step": 4364 }, { - "epoch": 0.25, - "grad_norm": 0.6910131523731271, - "learning_rate": 1.7551177467212585e-05, - "loss": 0.508, + "epoch": 0.2, + "grad_norm": 0.40571771462577466, + "learning_rate": 1.8513899499733267e-05, + "loss": 0.2629, "step": 4365 }, { - "epoch": 0.25, - "grad_norm": 0.3367083352550415, - "learning_rate": 1.7549957342282567e-05, - "loss": 0.2234, + "epoch": 0.2, + "grad_norm": 0.31090043942225026, + "learning_rate": 1.8513118942351838e-05, + "loss": 0.2856, "step": 4366 }, { - "epoch": 0.25, - "grad_norm": 0.4083921331597858, - "learning_rate": 1.7548736955897852e-05, - "loss": 0.3233, + "epoch": 0.2, + "grad_norm": 0.6613477166330156, + "learning_rate": 1.8512338196498165e-05, + "loss": 0.3469, "step": 4367 }, { - "epoch": 0.25, - "grad_norm": 0.310682312374531, - "learning_rate": 1.75475163081007e-05, - "loss": 0.1883, + "epoch": 0.2, + "grad_norm": 0.3938635390395695, + "learning_rate": 1.8511557262189525e-05, + "loss": 0.2558, "step": 4368 }, { - "epoch": 0.25, - "grad_norm": 0.35150586064186157, - "learning_rate": 1.7546295398933383e-05, - "loss": 0.2042, + "epoch": 0.2, + "grad_norm": 1.7553070912867637, + "learning_rate": 1.8510776139443212e-05, + "loss": 0.6168, "step": 4369 }, { - "epoch": 0.25, - "grad_norm": 1.1970685231744054, - "learning_rate": 1.754507422843818e-05, - "loss": 0.5545, + "epoch": 0.2, + "grad_norm": 0.3716812692073504, + "learning_rate": 1.850999482827652e-05, + "loss": 0.2994, "step": 4370 }, { - "epoch": 0.25, - "grad_norm": 0.390038478824965, - "learning_rate": 1.7543852796657382e-05, - "loss": 0.3434, + "epoch": 0.2, + "grad_norm": 0.4887162915109267, + "learning_rate": 1.8509213328706742e-05, + "loss": 0.3005, "step": 4371 }, { - "epoch": 0.25, - "grad_norm": 0.3332006509863933, - "learning_rate": 1.7542631103633284e-05, - "loss": 0.1989, + "epoch": 0.2, + "grad_norm": 0.6384983258630488, + "learning_rate": 1.8508431640751187e-05, + "loss": 0.4833, "step": 4372 }, { - "epoch": 0.25, - "grad_norm": 1.2245310547498793, - "learning_rate": 1.7541409149408198e-05, - "loss": 0.8196, + "epoch": 0.2, + "grad_norm": 0.5410682058159972, + "learning_rate": 1.8507649764427153e-05, + "loss": 0.3973, "step": 4373 }, { - "epoch": 0.25, - "grad_norm": 0.2594242254872551, - "learning_rate": 1.7540186934024434e-05, - "loss": 0.2391, + "epoch": 0.2, + "grad_norm": 0.3104542340792599, + "learning_rate": 1.850686769975195e-05, + "loss": 0.219, "step": 4374 }, { - "epoch": 0.25, - "grad_norm": 0.3663500648240596, - "learning_rate": 1.7538964457524326e-05, - "loss": 0.1984, + "epoch": 0.2, + "grad_norm": 0.36846554624916333, + "learning_rate": 1.8506085446742898e-05, + "loss": 0.2637, "step": 4375 }, { - "epoch": 0.25, - "grad_norm": 1.3797280623086665, - "learning_rate": 1.7537741719950197e-05, - "loss": 0.4802, + "epoch": 0.2, + "grad_norm": 0.7770039456206166, + "learning_rate": 1.850530300541731e-05, + "loss": 0.4035, "step": 4376 }, { - "epoch": 0.25, - "grad_norm": 0.9288280071064241, - "learning_rate": 1.75365187213444e-05, - "loss": 0.5739, + "epoch": 0.2, + "grad_norm": 0.533397545938857, + "learning_rate": 1.8504520375792513e-05, + "loss": 0.3515, "step": 4377 }, { - "epoch": 0.25, - "grad_norm": 0.4592788301354964, - "learning_rate": 1.7535295461749285e-05, - "loss": 0.3199, + "epoch": 0.2, + "grad_norm": 0.4281682691114867, + "learning_rate": 1.850373755788583e-05, + "loss": 0.31, "step": 4378 }, { - "epoch": 0.25, - "grad_norm": 0.371263615108485, - "learning_rate": 1.753407194120721e-05, - "loss": 0.2718, + "epoch": 0.2, + "grad_norm": 0.4625267344326516, + "learning_rate": 1.8502954551714598e-05, + "loss": 0.3419, "step": 4379 }, { - "epoch": 0.25, - "grad_norm": 0.4478970966584819, - "learning_rate": 1.753284815976055e-05, - "loss": 0.2777, + "epoch": 0.2, + "grad_norm": 0.43536687230779714, + "learning_rate": 1.8502171357296144e-05, + "loss": 0.2531, "step": 4380 }, { - "epoch": 0.25, - "grad_norm": 0.43708047320580123, - "learning_rate": 1.7531624117451678e-05, - "loss": 0.263, + "epoch": 0.2, + "grad_norm": 0.33368913736221056, + "learning_rate": 1.850138797464781e-05, + "loss": 0.2094, "step": 4381 }, { - "epoch": 0.25, - "grad_norm": 0.7647331876742203, - "learning_rate": 1.753039981432299e-05, - "loss": 0.3161, + "epoch": 0.2, + "grad_norm": 0.5224790476482379, + "learning_rate": 1.8500604403786943e-05, + "loss": 0.3309, "step": 4382 }, { - "epoch": 0.25, - "grad_norm": 1.2246597599600573, - "learning_rate": 1.7529175250416878e-05, - "loss": 0.5649, + "epoch": 0.2, + "grad_norm": 0.37772600058119277, + "learning_rate": 1.8499820644730885e-05, + "loss": 0.3301, "step": 4383 }, { - "epoch": 0.25, - "grad_norm": 0.4019441310451681, - "learning_rate": 1.7527950425775753e-05, - "loss": 0.2567, + "epoch": 0.2, + "grad_norm": 0.6105125192859253, + "learning_rate": 1.849903669749699e-05, + "loss": 0.4059, "step": 4384 }, { - "epoch": 0.25, - "grad_norm": 0.932022146410723, - "learning_rate": 1.7526725340442028e-05, - "loss": 0.5759, + "epoch": 0.2, + "grad_norm": 0.9461638237025233, + "learning_rate": 1.8498252562102615e-05, + "loss": 0.5433, "step": 4385 }, { - "epoch": 0.25, - "grad_norm": 0.31841220554720956, - "learning_rate": 1.7525499994458124e-05, - "loss": 0.2613, + "epoch": 0.2, + "grad_norm": 0.36288331797842355, + "learning_rate": 1.8497468238565118e-05, + "loss": 0.2845, "step": 4386 }, { - "epoch": 0.25, - "grad_norm": 0.37423694836917065, - "learning_rate": 1.7524274387866483e-05, - "loss": 0.2822, + "epoch": 0.2, + "grad_norm": 0.23701882307365282, + "learning_rate": 1.8496683726901865e-05, + "loss": 0.1223, "step": 4387 }, { - "epoch": 0.25, - "grad_norm": 0.5350548955670364, - "learning_rate": 1.7523048520709543e-05, - "loss": 0.2867, + "epoch": 0.2, + "grad_norm": 0.788831722113564, + "learning_rate": 1.8495899027130222e-05, + "loss": 0.3991, "step": 4388 }, { - "epoch": 0.25, - "grad_norm": 1.8947754710817244, - "learning_rate": 1.7521822393029758e-05, - "loss": 0.5852, + "epoch": 0.2, + "grad_norm": 0.4474401788459783, + "learning_rate": 1.8495114139267568e-05, + "loss": 0.3721, "step": 4389 }, { - "epoch": 0.25, - "grad_norm": 0.3998881128220424, - "learning_rate": 1.7520596004869584e-05, - "loss": 0.2702, + "epoch": 0.2, + "grad_norm": 0.4382183351444377, + "learning_rate": 1.849432906333127e-05, + "loss": 0.3597, "step": 4390 }, { - "epoch": 0.25, - "grad_norm": 0.5361072708652229, - "learning_rate": 1.7519369356271492e-05, - "loss": 0.3904, + "epoch": 0.2, + "grad_norm": 0.566346240713494, + "learning_rate": 1.849354379933871e-05, + "loss": 0.2369, "step": 4391 }, { - "epoch": 0.25, - "grad_norm": 0.8123518823570961, - "learning_rate": 1.751814244727797e-05, - "loss": 0.3895, + "epoch": 0.2, + "grad_norm": 0.3569981983294628, + "learning_rate": 1.849275834730728e-05, + "loss": 0.2574, "step": 4392 }, { - "epoch": 0.25, - "grad_norm": 0.6059125687840098, - "learning_rate": 1.751691527793149e-05, - "loss": 0.339, + "epoch": 0.2, + "grad_norm": 0.44273349348819596, + "learning_rate": 1.849197270725437e-05, + "loss": 0.245, "step": 4393 }, { - "epoch": 0.25, - "grad_norm": 0.3313177646320267, - "learning_rate": 1.7515687848274562e-05, - "loss": 0.2728, + "epoch": 0.2, + "grad_norm": 0.3726626617577536, + "learning_rate": 1.849118687919737e-05, + "loss": 0.2525, "step": 4394 }, { - "epoch": 0.25, - "grad_norm": 0.35552239325197443, - "learning_rate": 1.7514460158349686e-05, - "loss": 0.2089, + "epoch": 0.2, + "grad_norm": 0.5168800743061702, + "learning_rate": 1.8490400863153666e-05, + "loss": 0.3436, "step": 4395 }, { - "epoch": 0.25, - "grad_norm": 0.4505421213963791, - "learning_rate": 1.7513232208199378e-05, - "loss": 0.3099, + "epoch": 0.2, + "grad_norm": 0.7209666225132275, + "learning_rate": 1.848961465914068e-05, + "loss": 0.4906, "step": 4396 }, { - "epoch": 0.25, - "grad_norm": 0.6644199425595185, - "learning_rate": 1.751200399786616e-05, - "loss": 0.4381, + "epoch": 0.2, + "grad_norm": 0.3899518100354491, + "learning_rate": 1.8488828267175803e-05, + "loss": 0.2732, "step": 4397 }, { - "epoch": 0.25, - "grad_norm": 0.4423052272917962, - "learning_rate": 1.7510775527392566e-05, - "loss": 0.2984, + "epoch": 0.2, + "grad_norm": 0.33255232527366, + "learning_rate": 1.848804168727645e-05, + "loss": 0.2722, "step": 4398 }, { - "epoch": 0.25, - "grad_norm": 0.6068964287449121, - "learning_rate": 1.7509546796821144e-05, - "loss": 0.3403, + "epoch": 0.2, + "grad_norm": 0.3276942358710649, + "learning_rate": 1.8487254919460037e-05, + "loss": 0.1672, "step": 4399 }, { - "epoch": 0.25, - "grad_norm": 0.44069931145334174, - "learning_rate": 1.7508317806194436e-05, - "loss": 0.316, + "epoch": 0.2, + "grad_norm": 0.5392227071197087, + "learning_rate": 1.8486467963743977e-05, + "loss": 0.3144, "step": 4400 }, { - "epoch": 0.25, - "grad_norm": 0.30685445519676646, - "learning_rate": 1.7507088555555003e-05, - "loss": 0.1484, + "epoch": 0.2, + "grad_norm": 0.394977386623491, + "learning_rate": 1.8485680820145696e-05, + "loss": 0.344, "step": 4401 }, { - "epoch": 0.25, - "grad_norm": 0.319991819668123, - "learning_rate": 1.750585904494542e-05, - "loss": 0.2779, + "epoch": 0.2, + "grad_norm": 0.37907472127344916, + "learning_rate": 1.8484893488682622e-05, + "loss": 0.3537, "step": 4402 }, { - "epoch": 0.25, - "grad_norm": 0.5975686561638618, - "learning_rate": 1.7504629274408257e-05, - "loss": 0.4789, + "epoch": 0.2, + "grad_norm": 1.9878053536639875, + "learning_rate": 1.8484105969372184e-05, + "loss": 0.8964, "step": 4403 }, { - "epoch": 0.25, - "grad_norm": 0.8063943453403252, - "learning_rate": 1.750339924398611e-05, - "loss": 0.4771, + "epoch": 0.2, + "grad_norm": 0.33995285968531436, + "learning_rate": 1.8483318262231818e-05, + "loss": 0.2307, "step": 4404 }, { - "epoch": 0.25, - "grad_norm": 0.3169912413687381, - "learning_rate": 1.7502168953721564e-05, - "loss": 0.2506, + "epoch": 0.2, + "grad_norm": 0.3159330698344661, + "learning_rate": 1.8482530367278958e-05, + "loss": 0.2055, "step": 4405 }, { - "epoch": 0.25, - "grad_norm": 0.5907726571115687, - "learning_rate": 1.7500938403657235e-05, - "loss": 0.4054, + "epoch": 0.2, + "grad_norm": 0.4257550155514517, + "learning_rate": 1.8481742284531053e-05, + "loss": 0.348, "step": 4406 }, { - "epoch": 0.25, - "grad_norm": 0.28948065757597125, - "learning_rate": 1.7499707593835728e-05, - "loss": 0.1955, + "epoch": 0.2, + "grad_norm": 0.4107979107327484, + "learning_rate": 1.8480954014005553e-05, + "loss": 0.2694, "step": 4407 }, { - "epoch": 0.25, - "grad_norm": 0.346293298851469, - "learning_rate": 1.749847652429967e-05, - "loss": 0.2249, + "epoch": 0.2, + "grad_norm": 1.3947824071355424, + "learning_rate": 1.84801655557199e-05, + "loss": 0.7583, "step": 4408 }, { - "epoch": 0.25, - "grad_norm": 1.1109798245103102, - "learning_rate": 1.7497245195091694e-05, - "loss": 0.6656, + "epoch": 0.2, + "grad_norm": 0.5301235504443728, + "learning_rate": 1.8479376909691558e-05, + "loss": 0.3791, "step": 4409 }, { - "epoch": 0.25, - "grad_norm": 0.49764755117641957, - "learning_rate": 1.749601360625444e-05, - "loss": 0.3518, + "epoch": 0.2, + "grad_norm": 0.3670339082794531, + "learning_rate": 1.847858807593798e-05, + "loss": 0.2492, "step": 4410 }, { - "epoch": 0.25, - "grad_norm": 0.6586021135670197, - "learning_rate": 1.7494781757830554e-05, - "loss": 0.2586, + "epoch": 0.2, + "grad_norm": 0.38253953706095434, + "learning_rate": 1.8477799054476638e-05, + "loss": 0.2187, "step": 4411 }, { - "epoch": 0.25, - "grad_norm": 0.415911984220887, - "learning_rate": 1.74935496498627e-05, - "loss": 0.3387, + "epoch": 0.2, + "grad_norm": 0.542900459665312, + "learning_rate": 1.8477009845324994e-05, + "loss": 0.4408, "step": 4412 }, { - "epoch": 0.25, - "grad_norm": 0.3528941062652166, - "learning_rate": 1.7492317282393543e-05, - "loss": 0.1973, + "epoch": 0.2, + "grad_norm": 0.4476526777100846, + "learning_rate": 1.8476220448500523e-05, + "loss": 0.2616, "step": 4413 }, { - "epoch": 0.25, - "grad_norm": 0.5379242200023283, - "learning_rate": 1.7491084655465762e-05, - "loss": 0.3619, + "epoch": 0.2, + "grad_norm": 0.4804367324837792, + "learning_rate": 1.84754308640207e-05, + "loss": 0.3719, "step": 4414 }, { - "epoch": 0.25, - "grad_norm": 0.4547219643409678, - "learning_rate": 1.748985176912204e-05, - "loss": 0.2714, + "epoch": 0.2, + "grad_norm": 1.1244076640720961, + "learning_rate": 1.8474641091903003e-05, + "loss": 0.5059, "step": 4415 }, { - "epoch": 0.25, - "grad_norm": 0.9862381216448781, - "learning_rate": 1.7488618623405075e-05, - "loss": 0.5093, + "epoch": 0.2, + "grad_norm": 0.44666664237946974, + "learning_rate": 1.8473851132164925e-05, + "loss": 0.3148, "step": 4416 }, { - "epoch": 0.25, - "grad_norm": 0.6014511226082759, - "learning_rate": 1.748738521835757e-05, - "loss": 0.3081, + "epoch": 0.2, + "grad_norm": 0.28420515692853615, + "learning_rate": 1.847306098482395e-05, + "loss": 0.2019, "step": 4417 }, { - "epoch": 0.25, - "grad_norm": 0.42678893832728415, - "learning_rate": 1.7486151554022233e-05, - "loss": 0.266, + "epoch": 0.2, + "grad_norm": 0.47189472018014195, + "learning_rate": 1.847227064989757e-05, + "loss": 0.3727, "step": 4418 }, { - "epoch": 0.25, - "grad_norm": 0.29200160850606793, - "learning_rate": 1.748491763044179e-05, - "loss": 0.1875, + "epoch": 0.2, + "grad_norm": 0.44374810583881247, + "learning_rate": 1.8471480127403282e-05, + "loss": 0.2808, "step": 4419 }, { - "epoch": 0.25, - "grad_norm": 0.609705572125106, - "learning_rate": 1.7483683447658976e-05, - "loss": 0.3825, + "epoch": 0.2, + "grad_norm": 1.027147544502033, + "learning_rate": 1.847068941735859e-05, + "loss": 0.5333, "step": 4420 }, { - "epoch": 0.25, - "grad_norm": 0.6734514732700916, - "learning_rate": 1.748244900571652e-05, - "loss": 0.3149, + "epoch": 0.2, + "grad_norm": 0.46103726091028446, + "learning_rate": 1.8469898519781e-05, + "loss": 0.3689, "step": 4421 }, { - "epoch": 0.25, - "grad_norm": 0.5674551833098238, - "learning_rate": 1.748121430465718e-05, - "loss": 0.3457, + "epoch": 0.2, + "grad_norm": 0.4169384540882763, + "learning_rate": 1.846910743468802e-05, + "loss": 0.2671, "step": 4422 }, { - "epoch": 0.25, - "grad_norm": 0.5692437298539499, - "learning_rate": 1.747997934452371e-05, - "loss": 0.3981, + "epoch": 0.2, + "grad_norm": 0.3082090394264841, + "learning_rate": 1.846831616209716e-05, + "loss": 0.1542, "step": 4423 }, { - "epoch": 0.25, - "grad_norm": 0.40695746244632547, - "learning_rate": 1.7478744125358877e-05, - "loss": 0.2523, + "epoch": 0.2, + "grad_norm": 0.47559608400458564, + "learning_rate": 1.8467524702025946e-05, + "loss": 0.3593, "step": 4424 }, { - "epoch": 0.25, - "grad_norm": 0.26111116306772175, - "learning_rate": 1.7477508647205456e-05, - "loss": 0.2255, + "epoch": 0.2, + "grad_norm": 0.4142880098335269, + "learning_rate": 1.8466733054491897e-05, + "loss": 0.2974, "step": 4425 }, { - "epoch": 0.25, - "grad_norm": 0.3739424274051894, - "learning_rate": 1.7476272910106233e-05, - "loss": 0.2915, + "epoch": 0.2, + "grad_norm": 0.4953227641209142, + "learning_rate": 1.8465941219512533e-05, + "loss": 0.3025, "step": 4426 }, { - "epoch": 0.25, - "grad_norm": 0.5242746749994995, - "learning_rate": 1.7475036914104e-05, - "loss": 0.4205, + "epoch": 0.2, + "grad_norm": 1.0096049062964039, + "learning_rate": 1.8465149197105395e-05, + "loss": 0.6183, "step": 4427 }, { - "epoch": 0.25, - "grad_norm": 0.5970732747562592, - "learning_rate": 1.747380065924156e-05, - "loss": 0.409, + "epoch": 0.2, + "grad_norm": 0.42045356735942796, + "learning_rate": 1.8464356987288012e-05, + "loss": 0.302, "step": 4428 }, { - "epoch": 0.25, - "grad_norm": 0.7671707463296635, - "learning_rate": 1.7472564145561725e-05, - "loss": 0.4088, + "epoch": 0.2, + "grad_norm": 0.8752885739345332, + "learning_rate": 1.8463564590077922e-05, + "loss": 0.511, "step": 4429 }, { - "epoch": 0.25, - "grad_norm": 0.3501468041889073, - "learning_rate": 1.7471327373107317e-05, - "loss": 0.2827, + "epoch": 0.2, + "grad_norm": 0.31311122487759296, + "learning_rate": 1.8462772005492672e-05, + "loss": 0.2447, "step": 4430 }, { - "epoch": 0.25, - "grad_norm": 0.3703347723536773, - "learning_rate": 1.747009034192116e-05, - "loss": 0.1713, + "epoch": 0.2, + "grad_norm": 0.40000271386071967, + "learning_rate": 1.8461979233549802e-05, + "loss": 0.2985, "step": 4431 }, { - "epoch": 0.25, - "grad_norm": 0.6832880354899038, - "learning_rate": 1.7468853052046095e-05, - "loss": 0.4288, + "epoch": 0.2, + "grad_norm": 0.44039714183767725, + "learning_rate": 1.846118627426687e-05, + "loss": 0.304, "step": 4432 }, { - "epoch": 0.25, - "grad_norm": 0.385508552267273, - "learning_rate": 1.7467615503524973e-05, - "loss": 0.3287, + "epoch": 0.2, + "grad_norm": 0.4059346377595148, + "learning_rate": 1.846039312766143e-05, + "loss": 0.2579, "step": 4433 }, { - "epoch": 0.25, - "grad_norm": 0.2565484503974381, - "learning_rate": 1.7466377696400646e-05, - "loss": 0.2168, + "epoch": 0.2, + "grad_norm": 0.4173722020922135, + "learning_rate": 1.845959979375104e-05, + "loss": 0.3066, "step": 4434 }, { - "epoch": 0.25, - "grad_norm": 0.6083624161878634, - "learning_rate": 1.746513963071598e-05, - "loss": 0.374, + "epoch": 0.2, + "grad_norm": 0.9317788078181761, + "learning_rate": 1.845880627255326e-05, + "loss": 0.481, "step": 4435 }, { - "epoch": 0.25, - "grad_norm": 0.4290866249248875, - "learning_rate": 1.746390130651385e-05, - "loss": 0.3343, + "epoch": 0.2, + "grad_norm": 0.34104343885936417, + "learning_rate": 1.845801256408567e-05, + "loss": 0.2029, "step": 4436 }, { - "epoch": 0.25, - "grad_norm": 0.4210646856691826, - "learning_rate": 1.746266272383714e-05, - "loss": 0.1923, + "epoch": 0.2, + "grad_norm": 0.38445617022669215, + "learning_rate": 1.8457218668365824e-05, + "loss": 0.3042, "step": 4437 }, { - "epoch": 0.25, - "grad_norm": 0.31382665898340373, - "learning_rate": 1.746142388272874e-05, - "loss": 0.2997, + "epoch": 0.2, + "grad_norm": 0.42929741805960203, + "learning_rate": 1.845642458541131e-05, + "loss": 0.3446, "step": 4438 }, { - "epoch": 0.26, - "grad_norm": 0.40780261576434734, - "learning_rate": 1.746018478323155e-05, - "loss": 0.3683, + "epoch": 0.2, + "grad_norm": 0.27945648425499603, + "learning_rate": 1.8455630315239712e-05, + "loss": 0.1119, "step": 4439 }, { - "epoch": 0.26, - "grad_norm": 0.42413178269441665, - "learning_rate": 1.7458945425388484e-05, - "loss": 0.2996, + "epoch": 0.2, + "grad_norm": 0.3915906628524294, + "learning_rate": 1.8454835857868603e-05, + "loss": 0.3002, "step": 4440 }, { - "epoch": 0.26, - "grad_norm": 0.3334174550196039, - "learning_rate": 1.7457705809242455e-05, - "loss": 0.1964, + "epoch": 0.2, + "grad_norm": 0.6248890037133725, + "learning_rate": 1.845404121331558e-05, + "loss": 0.4187, "step": 4441 }, { - "epoch": 0.26, - "grad_norm": 0.3571659636822857, - "learning_rate": 1.74564659348364e-05, - "loss": 0.3006, + "epoch": 0.2, + "grad_norm": 0.5291168908969681, + "learning_rate": 1.8453246381598233e-05, + "loss": 0.4117, "step": 4442 }, { - "epoch": 0.26, - "grad_norm": 0.8945360836656251, - "learning_rate": 1.7455225802213246e-05, - "loss": 0.5754, + "epoch": 0.2, + "grad_norm": 0.28394591590560064, + "learning_rate": 1.8452451362734158e-05, + "loss": 0.1887, "step": 4443 }, { - "epoch": 0.26, - "grad_norm": 0.5110983783626839, - "learning_rate": 1.7453985411415945e-05, - "loss": 0.3305, + "epoch": 0.2, + "grad_norm": 1.4212640666332423, + "learning_rate": 1.8451656156740954e-05, + "loss": 0.8272, "step": 4444 }, { - "epoch": 0.26, - "grad_norm": 0.40930073398439676, - "learning_rate": 1.745274476248745e-05, - "loss": 0.3161, + "epoch": 0.2, + "grad_norm": 0.3277265910932489, + "learning_rate": 1.8450860763636232e-05, + "loss": 0.2759, "step": 4445 }, { - "epoch": 0.26, - "grad_norm": 0.2968822772265499, - "learning_rate": 1.7451503855470722e-05, - "loss": 0.2828, + "epoch": 0.2, + "grad_norm": 0.37949144874344004, + "learning_rate": 1.8450065183437594e-05, + "loss": 0.2399, "step": 4446 }, { - "epoch": 0.26, - "grad_norm": 0.27604539059250005, - "learning_rate": 1.745026269040874e-05, - "loss": 0.1201, + "epoch": 0.2, + "grad_norm": 0.8306450247301893, + "learning_rate": 1.844926941616266e-05, + "loss": 0.4675, "step": 4447 }, { - "epoch": 0.26, - "grad_norm": 0.4091950250734807, - "learning_rate": 1.744902126734448e-05, - "loss": 0.305, + "epoch": 0.2, + "grad_norm": 1.2258189307449783, + "learning_rate": 1.8448473461829045e-05, + "loss": 0.5991, "step": 4448 }, { - "epoch": 0.26, - "grad_norm": 0.4688130768266721, - "learning_rate": 1.744777958632093e-05, - "loss": 0.3995, + "epoch": 0.2, + "grad_norm": 0.34399662138255277, + "learning_rate": 1.8447677320454367e-05, + "loss": 0.2217, "step": 4449 }, { - "epoch": 0.26, - "grad_norm": 0.4199210326535034, - "learning_rate": 1.74465376473811e-05, - "loss": 0.2857, + "epoch": 0.2, + "grad_norm": 0.4462577761254286, + "learning_rate": 1.8446880992056257e-05, + "loss": 0.3484, "step": 4450 }, { - "epoch": 0.26, - "grad_norm": 0.36946539820255947, - "learning_rate": 1.7445295450567985e-05, - "loss": 0.3241, + "epoch": 0.2, + "grad_norm": 0.43238929526001907, + "learning_rate": 1.8446084476652344e-05, + "loss": 0.2543, "step": 4451 }, { - "epoch": 0.26, - "grad_norm": 0.30417635766535905, - "learning_rate": 1.7444052995924612e-05, - "loss": 0.2137, + "epoch": 0.2, + "grad_norm": 0.38503095284950883, + "learning_rate": 1.844528777426026e-05, + "loss": 0.2124, "step": 4452 }, { - "epoch": 0.26, - "grad_norm": 0.388316362774309, - "learning_rate": 1.7442810283494002e-05, - "loss": 0.3331, + "epoch": 0.2, + "grad_norm": 0.5185074696526006, + "learning_rate": 1.8444490884897643e-05, + "loss": 0.3625, "step": 4453 }, { - "epoch": 0.26, - "grad_norm": 0.33429814252358525, - "learning_rate": 1.7441567313319194e-05, - "loss": 0.2181, + "epoch": 0.2, + "grad_norm": 1.323745708541978, + "learning_rate": 1.844369380858214e-05, + "loss": 0.6126, "step": 4454 }, { - "epoch": 0.26, - "grad_norm": 0.7626510299733518, - "learning_rate": 1.7440324085443227e-05, - "loss": 0.4604, + "epoch": 0.2, + "grad_norm": 0.4867506163141617, + "learning_rate": 1.8442896545331394e-05, + "loss": 0.3279, "step": 4455 }, { - "epoch": 0.26, - "grad_norm": 0.5831532138070068, - "learning_rate": 1.7439080599909163e-05, - "loss": 0.4221, + "epoch": 0.2, + "grad_norm": 0.2659034359787943, + "learning_rate": 1.8442099095163052e-05, + "loss": 0.1676, "step": 4456 }, { - "epoch": 0.26, - "grad_norm": 0.29553524419263844, - "learning_rate": 1.743783685676005e-05, - "loss": 0.2498, + "epoch": 0.2, + "grad_norm": 0.5443897249545817, + "learning_rate": 1.8441301458094773e-05, + "loss": 0.349, "step": 4457 }, { - "epoch": 0.26, - "grad_norm": 0.49518512762179856, - "learning_rate": 1.743659285603897e-05, - "loss": 0.3983, + "epoch": 0.2, + "grad_norm": 0.497123551826214, + "learning_rate": 1.844050363414422e-05, + "loss": 0.3515, "step": 4458 }, { - "epoch": 0.26, - "grad_norm": 0.25390198843159745, - "learning_rate": 1.7435348597789e-05, - "loss": 0.1813, + "epoch": 0.2, + "grad_norm": 0.5339807415564104, + "learning_rate": 1.8439705623329048e-05, + "loss": 0.3164, "step": 4459 }, { - "epoch": 0.26, - "grad_norm": 0.36903051665820324, - "learning_rate": 1.7434104082053227e-05, - "loss": 0.2264, + "epoch": 0.2, + "grad_norm": 1.0593527936879776, + "learning_rate": 1.8438907425666927e-05, + "loss": 0.623, "step": 4460 }, { - "epoch": 0.26, - "grad_norm": 0.848917048411256, - "learning_rate": 1.743285930887475e-05, - "loss": 0.4192, + "epoch": 0.2, + "grad_norm": 0.32841147322899433, + "learning_rate": 1.8438109041175532e-05, + "loss": 0.2823, "step": 4461 }, { - "epoch": 0.26, - "grad_norm": 0.5239600697847725, - "learning_rate": 1.7431614278296672e-05, - "loss": 0.3618, + "epoch": 0.2, + "grad_norm": 0.525230139524885, + "learning_rate": 1.8437310469872535e-05, + "loss": 0.3059, "step": 4462 }, { - "epoch": 0.26, - "grad_norm": 0.39962990775437407, - "learning_rate": 1.7430368990362114e-05, - "loss": 0.2368, + "epoch": 0.21, + "grad_norm": 0.4128166956355856, + "learning_rate": 1.8436511711775615e-05, + "loss": 0.3357, "step": 4463 }, { - "epoch": 0.26, - "grad_norm": 0.41735657916516267, - "learning_rate": 1.7429123445114196e-05, - "loss": 0.2857, + "epoch": 0.21, + "grad_norm": 0.38554193352101607, + "learning_rate": 1.8435712766902458e-05, + "loss": 0.2902, "step": 4464 }, { - "epoch": 0.26, - "grad_norm": 0.32698671191140255, - "learning_rate": 1.7427877642596053e-05, - "loss": 0.2875, + "epoch": 0.21, + "grad_norm": 0.325444437079442, + "learning_rate": 1.843491363527075e-05, + "loss": 0.2077, "step": 4465 }, { - "epoch": 0.26, - "grad_norm": 0.3624786033802996, - "learning_rate": 1.7426631582850827e-05, - "loss": 0.2762, + "epoch": 0.21, + "grad_norm": 1.0054154890945404, + "learning_rate": 1.8434114316898185e-05, + "loss": 0.5957, "step": 4466 }, { - "epoch": 0.26, - "grad_norm": 0.5845800939907507, - "learning_rate": 1.742538526592167e-05, - "loss": 0.4098, + "epoch": 0.21, + "grad_norm": 0.7534035535583185, + "learning_rate": 1.8433314811802455e-05, + "loss": 0.3231, "step": 4467 }, { - "epoch": 0.26, - "grad_norm": 0.9870741959193776, - "learning_rate": 1.742413869185174e-05, - "loss": 0.6744, + "epoch": 0.21, + "grad_norm": 0.4875449636812026, + "learning_rate": 1.843251512000127e-05, + "loss": 0.3651, "step": 4468 }, { - "epoch": 0.26, - "grad_norm": 0.42257891785206114, - "learning_rate": 1.7422891860684202e-05, - "loss": 0.2697, + "epoch": 0.21, + "grad_norm": 0.4436255519343426, + "learning_rate": 1.8431715241512322e-05, + "loss": 0.3275, "step": 4469 }, { - "epoch": 0.26, - "grad_norm": 0.32188754467568104, - "learning_rate": 1.7421644772462247e-05, - "loss": 0.2059, + "epoch": 0.21, + "grad_norm": 0.7626308540041877, + "learning_rate": 1.8430915176353325e-05, + "loss": 0.3959, "step": 4470 }, { - "epoch": 0.26, - "grad_norm": 0.4304055386861345, - "learning_rate": 1.7420397427229045e-05, - "loss": 0.2905, + "epoch": 0.21, + "grad_norm": 0.3393845225210966, + "learning_rate": 1.8430114924541995e-05, + "loss": 0.2226, "step": 4471 }, { - "epoch": 0.26, - "grad_norm": 0.33978838217773394, - "learning_rate": 1.7419149825027802e-05, - "loss": 0.3002, + "epoch": 0.21, + "grad_norm": 0.9710703013390126, + "learning_rate": 1.8429314486096042e-05, + "loss": 0.3674, "step": 4472 }, { - "epoch": 0.26, - "grad_norm": 0.3788118884308564, - "learning_rate": 1.7417901965901717e-05, - "loss": 0.3153, + "epoch": 0.21, + "grad_norm": 0.3800482477474855, + "learning_rate": 1.8428513861033193e-05, + "loss": 0.2929, "step": 4473 }, { - "epoch": 0.26, - "grad_norm": 0.521900293581829, - "learning_rate": 1.7416653849894008e-05, - "loss": 0.3964, + "epoch": 0.21, + "grad_norm": 0.4734865335193066, + "learning_rate": 1.842771304937117e-05, + "loss": 0.3561, "step": 4474 }, { - "epoch": 0.26, - "grad_norm": 0.3665116506708937, - "learning_rate": 1.7415405477047895e-05, - "loss": 0.2777, + "epoch": 0.21, + "grad_norm": 0.7634465212710199, + "learning_rate": 1.8426912051127702e-05, + "loss": 0.3808, "step": 4475 }, { - "epoch": 0.26, - "grad_norm": 0.9883665529741489, - "learning_rate": 1.741415684740661e-05, - "loss": 0.4042, + "epoch": 0.21, + "grad_norm": 0.3738599373182764, + "learning_rate": 1.842611086632052e-05, + "loss": 0.2332, "step": 4476 }, { - "epoch": 0.26, - "grad_norm": 0.24629002200117167, - "learning_rate": 1.741290796101339e-05, - "loss": 0.2254, + "epoch": 0.21, + "grad_norm": 0.3491678112595703, + "learning_rate": 1.8425309494967368e-05, + "loss": 0.254, "step": 4477 }, { - "epoch": 0.26, - "grad_norm": 0.3537787580716675, - "learning_rate": 1.7411658817911487e-05, - "loss": 0.3092, + "epoch": 0.21, + "grad_norm": 2.244963672732106, + "learning_rate": 1.842450793708599e-05, + "loss": 0.7392, "step": 4478 }, { - "epoch": 0.26, - "grad_norm": 0.6388605821733183, - "learning_rate": 1.741040941814416e-05, - "loss": 0.4883, + "epoch": 0.21, + "grad_norm": 0.4096372250922761, + "learning_rate": 1.8423706192694118e-05, + "loss": 0.2439, "step": 4479 }, { - "epoch": 0.26, - "grad_norm": 0.6188260255813043, - "learning_rate": 1.740915976175467e-05, - "loss": 0.3133, + "epoch": 0.21, + "grad_norm": 0.709116923915328, + "learning_rate": 1.8422904261809512e-05, + "loss": 0.4124, "step": 4480 }, { - "epoch": 0.26, - "grad_norm": 0.4176908201497853, - "learning_rate": 1.74079098487863e-05, - "loss": 0.3111, + "epoch": 0.21, + "grad_norm": 0.5426902792370397, + "learning_rate": 1.8422102144449922e-05, + "loss": 0.3511, "step": 4481 }, { - "epoch": 0.26, - "grad_norm": 0.45129474320870905, - "learning_rate": 1.7406659679282326e-05, - "loss": 0.3424, + "epoch": 0.21, + "grad_norm": 0.39892283638639275, + "learning_rate": 1.8421299840633112e-05, + "loss": 0.2441, "step": 4482 }, { - "epoch": 0.26, - "grad_norm": 0.4174295494281297, - "learning_rate": 1.740540925328605e-05, - "loss": 0.2623, + "epoch": 0.21, + "grad_norm": 0.3177808559968744, + "learning_rate": 1.8420497350376838e-05, + "loss": 0.2023, "step": 4483 }, { - "epoch": 0.26, - "grad_norm": 0.3767565914818703, - "learning_rate": 1.7404158570840765e-05, - "loss": 0.282, + "epoch": 0.21, + "grad_norm": 1.2176875641679026, + "learning_rate": 1.8419694673698865e-05, + "loss": 0.6791, "step": 4484 }, { - "epoch": 0.26, - "grad_norm": 0.2783113500296502, - "learning_rate": 1.7402907631989793e-05, - "loss": 0.2917, + "epoch": 0.21, + "grad_norm": 0.6365815409978489, + "learning_rate": 1.8418891810616974e-05, + "loss": 0.2254, "step": 4485 }, { - "epoch": 0.26, - "grad_norm": 0.9873955581325996, - "learning_rate": 1.7401656436776445e-05, - "loss": 0.4259, + "epoch": 0.21, + "grad_norm": 0.4559997018330957, + "learning_rate": 1.8418088761148925e-05, + "loss": 0.3574, "step": 4486 }, { - "epoch": 0.26, - "grad_norm": 0.3845423744262796, - "learning_rate": 1.740040498524405e-05, - "loss": 0.2788, + "epoch": 0.21, + "grad_norm": 0.7655479580035389, + "learning_rate": 1.841728552531251e-05, + "loss": 0.5315, "step": 4487 }, { - "epoch": 0.26, - "grad_norm": 0.7272016165043925, - "learning_rate": 1.7399153277435954e-05, - "loss": 0.5691, + "epoch": 0.21, + "grad_norm": 0.30729938436871423, + "learning_rate": 1.8416482103125505e-05, + "loss": 0.0753, "step": 4488 }, { - "epoch": 0.26, - "grad_norm": 0.40153871463464896, - "learning_rate": 1.7397901313395497e-05, - "loss": 0.2958, + "epoch": 0.21, + "grad_norm": 0.33186913970259163, + "learning_rate": 1.84156784946057e-05, + "loss": 0.2753, "step": 4489 }, { - "epoch": 0.26, - "grad_norm": 0.39987319741953187, - "learning_rate": 1.7396649093166034e-05, - "loss": 0.3051, + "epoch": 0.21, + "grad_norm": 1.3241233056579782, + "learning_rate": 1.841487469977088e-05, + "loss": 0.7877, "step": 4490 }, { - "epoch": 0.26, - "grad_norm": 0.35067447545845665, - "learning_rate": 1.7395396616790932e-05, - "loss": 0.1451, + "epoch": 0.21, + "grad_norm": 0.7339615387859504, + "learning_rate": 1.8414070718638844e-05, + "loss": 0.4633, "step": 4491 }, { - "epoch": 0.26, - "grad_norm": 0.9550529008649429, - "learning_rate": 1.7394143884313562e-05, - "loss": 0.7368, + "epoch": 0.21, + "grad_norm": 0.3915467303863332, + "learning_rate": 1.8413266551227394e-05, + "loss": 0.2832, "step": 4492 }, { - "epoch": 0.26, - "grad_norm": 0.30621419928245114, - "learning_rate": 1.7392890895777305e-05, - "loss": 0.2344, + "epoch": 0.21, + "grad_norm": 0.5696379466662088, + "learning_rate": 1.8412462197554334e-05, + "loss": 0.3713, "step": 4493 }, { - "epoch": 0.26, - "grad_norm": 1.4312493233871788, - "learning_rate": 1.7391637651225556e-05, - "loss": 0.7849, + "epoch": 0.21, + "grad_norm": 0.6588948488218529, + "learning_rate": 1.8411657657637465e-05, + "loss": 0.3514, "step": 4494 }, { - "epoch": 0.26, - "grad_norm": 0.6076999334668871, - "learning_rate": 1.7390384150701715e-05, - "loss": 0.4489, + "epoch": 0.21, + "grad_norm": 0.29986766014166094, + "learning_rate": 1.8410852931494606e-05, + "loss": 0.1758, "step": 4495 }, { - "epoch": 0.26, - "grad_norm": 0.38957330875145113, - "learning_rate": 1.738913039424919e-05, - "loss": 0.1873, + "epoch": 0.21, + "grad_norm": 1.2105300696201593, + "learning_rate": 1.8410048019143568e-05, + "loss": 0.7088, "step": 4496 }, { - "epoch": 0.26, - "grad_norm": 0.34020086053360765, - "learning_rate": 1.7387876381911395e-05, - "loss": 0.2616, + "epoch": 0.21, + "grad_norm": 0.40957315010250506, + "learning_rate": 1.840924292060217e-05, + "loss": 0.3146, "step": 4497 }, { - "epoch": 0.26, - "grad_norm": 0.43665721845984257, - "learning_rate": 1.7386622113731758e-05, - "loss": 0.3332, + "epoch": 0.21, + "grad_norm": 0.5616058975749821, + "learning_rate": 1.8408437635888243e-05, + "loss": 0.2835, "step": 4498 }, { - "epoch": 0.26, - "grad_norm": 0.35317580614898686, - "learning_rate": 1.7385367589753714e-05, - "loss": 0.2111, + "epoch": 0.21, + "grad_norm": 0.9304271309650876, + "learning_rate": 1.840763216501961e-05, + "loss": 0.582, "step": 4499 }, { - "epoch": 0.26, - "grad_norm": 0.8179148641200064, - "learning_rate": 1.738411281002071e-05, - "loss": 0.4621, + "epoch": 0.21, + "grad_norm": 0.78292687362689, + "learning_rate": 1.84068265080141e-05, + "loss": 0.364, "step": 4500 }, { - "epoch": 0.26, - "grad_norm": 0.37262485183921623, - "learning_rate": 1.7382857774576195e-05, - "loss": 0.3425, + "epoch": 0.21, + "grad_norm": 0.4235250419818637, + "learning_rate": 1.8406020664889558e-05, + "loss": 0.263, "step": 4501 }, { - "epoch": 0.26, - "grad_norm": 0.4434924061456271, - "learning_rate": 1.7381602483463637e-05, - "loss": 0.2924, + "epoch": 0.21, + "grad_norm": 0.3919014340959677, + "learning_rate": 1.8405214635663817e-05, + "loss": 0.2591, "step": 4502 }, { - "epoch": 0.26, - "grad_norm": 0.23825968934381375, - "learning_rate": 1.73803469367265e-05, - "loss": 0.1482, + "epoch": 0.21, + "grad_norm": 0.8772265349328561, + "learning_rate": 1.8404408420354728e-05, + "loss": 0.4169, "step": 4503 }, { - "epoch": 0.26, - "grad_norm": 1.3993363887214298, - "learning_rate": 1.7379091134408265e-05, - "loss": 0.9296, + "epoch": 0.21, + "grad_norm": 0.4960614130586802, + "learning_rate": 1.8403602018980135e-05, + "loss": 0.3095, "step": 4504 }, { - "epoch": 0.26, - "grad_norm": 0.3323388490174505, - "learning_rate": 1.737783507655242e-05, - "loss": 0.2951, + "epoch": 0.21, + "grad_norm": 0.432634523712912, + "learning_rate": 1.8402795431557895e-05, + "loss": 0.3026, "step": 4505 }, { - "epoch": 0.26, - "grad_norm": 0.596493427646773, - "learning_rate": 1.7376578763202465e-05, - "loss": 0.3465, + "epoch": 0.21, + "grad_norm": 0.8445320864471082, + "learning_rate": 1.840198865810586e-05, + "loss": 0.4435, "step": 4506 }, { - "epoch": 0.26, - "grad_norm": 0.85841984621207, - "learning_rate": 1.7375322194401905e-05, - "loss": 0.5163, + "epoch": 0.21, + "grad_norm": 0.3424407943806172, + "learning_rate": 1.84011816986419e-05, + "loss": 0.2474, "step": 4507 }, { - "epoch": 0.26, - "grad_norm": 0.3979185219008631, - "learning_rate": 1.7374065370194253e-05, - "loss": 0.3023, + "epoch": 0.21, + "grad_norm": 0.4080477734016136, + "learning_rate": 1.840037455318387e-05, + "loss": 0.2543, "step": 4508 }, { - "epoch": 0.26, - "grad_norm": 0.3792909090305572, - "learning_rate": 1.7372808290623034e-05, - "loss": 0.2717, + "epoch": 0.21, + "grad_norm": 0.6336525898054867, + "learning_rate": 1.839956722174964e-05, + "loss": 0.37, "step": 4509 }, { - "epoch": 0.26, - "grad_norm": 0.3288322471843398, - "learning_rate": 1.7371550955731786e-05, - "loss": 0.2334, + "epoch": 0.21, + "grad_norm": 0.4321516926251434, + "learning_rate": 1.8398759704357093e-05, + "loss": 0.3307, "step": 4510 }, { - "epoch": 0.26, - "grad_norm": 0.3603072373462248, - "learning_rate": 1.737029336556404e-05, - "loss": 0.2721, + "epoch": 0.21, + "grad_norm": 0.8560378333076766, + "learning_rate": 1.83979520010241e-05, + "loss": 0.4089, "step": 4511 }, { - "epoch": 0.26, - "grad_norm": 1.0258413937489468, - "learning_rate": 1.7369035520163355e-05, - "loss": 0.4195, + "epoch": 0.21, + "grad_norm": 0.46544700233128194, + "learning_rate": 1.8397144111768543e-05, + "loss": 0.3577, "step": 4512 }, { - "epoch": 0.26, - "grad_norm": 0.38788937506634363, - "learning_rate": 1.7367777419573285e-05, - "loss": 0.3458, + "epoch": 0.21, + "grad_norm": 0.3635076984243348, + "learning_rate": 1.8396336036608307e-05, + "loss": 0.3023, "step": 4513 }, { - "epoch": 0.26, - "grad_norm": 0.3663495306117589, - "learning_rate": 1.73665190638374e-05, - "loss": 0.2827, + "epoch": 0.21, + "grad_norm": 0.33173049904380497, + "learning_rate": 1.8395527775561284e-05, + "loss": 0.1882, "step": 4514 }, { - "epoch": 0.26, - "grad_norm": 0.35904037745450235, - "learning_rate": 1.7365260452999277e-05, - "loss": 0.239, + "epoch": 0.21, + "grad_norm": 0.6000557727610799, + "learning_rate": 1.839471932864537e-05, + "loss": 0.4117, "step": 4515 }, { - "epoch": 0.26, - "grad_norm": 0.368230106992383, - "learning_rate": 1.73640015871025e-05, - "loss": 0.2299, + "epoch": 0.21, + "grad_norm": 0.47762628497453224, + "learning_rate": 1.839391069587846e-05, + "loss": 0.3568, "step": 4516 }, { - "epoch": 0.26, - "grad_norm": 0.4124820648794057, - "learning_rate": 1.7362742466190668e-05, - "loss": 0.2929, + "epoch": 0.21, + "grad_norm": 0.40974906813118483, + "learning_rate": 1.8393101877278455e-05, + "loss": 0.3395, "step": 4517 }, { - "epoch": 0.26, - "grad_norm": 0.6313760443488636, - "learning_rate": 1.736148309030738e-05, - "loss": 0.3801, + "epoch": 0.21, + "grad_norm": 0.38769849321606564, + "learning_rate": 1.839229287286327e-05, + "loss": 0.0748, "step": 4518 }, { - "epoch": 0.26, - "grad_norm": 0.8649835515367721, - "learning_rate": 1.7360223459496244e-05, - "loss": 0.3975, + "epoch": 0.21, + "grad_norm": 0.4406936750909376, + "learning_rate": 1.8391483682650803e-05, + "loss": 0.3583, "step": 4519 }, { - "epoch": 0.26, - "grad_norm": 0.3995323752107863, - "learning_rate": 1.735896357380089e-05, - "loss": 0.2723, + "epoch": 0.21, + "grad_norm": 0.35148677905224907, + "learning_rate": 1.8390674306658977e-05, + "loss": 0.3382, "step": 4520 }, { - "epoch": 0.26, - "grad_norm": 0.34544221499812, - "learning_rate": 1.7357703433264945e-05, - "loss": 0.3229, + "epoch": 0.21, + "grad_norm": 0.31228300037368295, + "learning_rate": 1.838986474490571e-05, + "loss": 0.2213, "step": 4521 }, { - "epoch": 0.26, - "grad_norm": 0.2902986551221321, - "learning_rate": 1.7356443037932046e-05, - "loss": 0.1643, + "epoch": 0.21, + "grad_norm": 0.47298557644576394, + "learning_rate": 1.8389054997408923e-05, + "loss": 0.3081, "step": 4522 }, { - "epoch": 0.26, - "grad_norm": 0.4838021066394585, - "learning_rate": 1.7355182387845843e-05, - "loss": 0.3209, + "epoch": 0.21, + "grad_norm": 1.1122345600832777, + "learning_rate": 1.8388245064186545e-05, + "loss": 0.725, "step": 4523 }, { - "epoch": 0.26, - "grad_norm": 0.5004591776306092, - "learning_rate": 1.7353921483049985e-05, - "loss": 0.3296, + "epoch": 0.21, + "grad_norm": 0.3933633071087256, + "learning_rate": 1.8387434945256503e-05, + "loss": 0.2766, "step": 4524 }, { - "epoch": 0.26, - "grad_norm": 0.4258313917846841, - "learning_rate": 1.7352660323588146e-05, - "loss": 0.2727, + "epoch": 0.21, + "grad_norm": 0.461293271743621, + "learning_rate": 1.8386624640636737e-05, + "loss": 0.2911, "step": 4525 }, { - "epoch": 0.26, - "grad_norm": 0.4033155476729041, - "learning_rate": 1.7351398909503995e-05, - "loss": 0.3081, + "epoch": 0.21, + "grad_norm": 0.6102961146517013, + "learning_rate": 1.8385814150345188e-05, + "loss": 0.4618, "step": 4526 }, { - "epoch": 0.26, - "grad_norm": 0.5300104630284196, - "learning_rate": 1.7350137240841218e-05, - "loss": 0.4131, + "epoch": 0.21, + "grad_norm": 0.29311780730516, + "learning_rate": 1.8385003474399792e-05, + "loss": 0.1266, "step": 4527 }, { - "epoch": 0.26, - "grad_norm": 0.6412206824998394, - "learning_rate": 1.73488753176435e-05, - "loss": 0.5222, + "epoch": 0.21, + "grad_norm": 0.4540746138464081, + "learning_rate": 1.8384192612818502e-05, + "loss": 0.3421, "step": 4528 }, { - "epoch": 0.26, - "grad_norm": 0.3077706381580906, - "learning_rate": 1.7347613139954548e-05, - "loss": 0.2402, + "epoch": 0.21, + "grad_norm": 0.542262524632551, + "learning_rate": 1.8383381565619267e-05, + "loss": 0.2711, "step": 4529 }, { - "epoch": 0.26, - "grad_norm": 0.6688065829605588, - "learning_rate": 1.734635070781807e-05, - "loss": 0.538, + "epoch": 0.21, + "grad_norm": 0.8693316708371276, + "learning_rate": 1.8382570332820045e-05, + "loss": 0.5351, "step": 4530 }, { - "epoch": 0.26, - "grad_norm": 0.35909638887144074, - "learning_rate": 1.734508802127778e-05, - "loss": 0.2608, + "epoch": 0.21, + "grad_norm": 0.35870645133630963, + "learning_rate": 1.8381758914438795e-05, + "loss": 0.252, "step": 4531 }, { - "epoch": 0.26, - "grad_norm": 0.41579088397011704, - "learning_rate": 1.734382508037741e-05, - "loss": 0.1842, + "epoch": 0.21, + "grad_norm": 0.47024022988957936, + "learning_rate": 1.8380947310493483e-05, + "loss": 0.3976, "step": 4532 }, { - "epoch": 0.26, - "grad_norm": 0.4211404512665711, - "learning_rate": 1.7342561885160694e-05, - "loss": 0.3147, + "epoch": 0.21, + "grad_norm": 0.5611438712399898, + "learning_rate": 1.838013552100207e-05, + "loss": 0.4061, "step": 4533 }, { - "epoch": 0.26, - "grad_norm": 0.8256111760795807, - "learning_rate": 1.7341298435671373e-05, - "loss": 0.5434, + "epoch": 0.21, + "grad_norm": 0.3873244248317515, + "learning_rate": 1.837932354598254e-05, + "loss": 0.2137, "step": 4534 }, { - "epoch": 0.26, - "grad_norm": 0.5292156928559316, - "learning_rate": 1.7340034731953204e-05, - "loss": 0.2707, + "epoch": 0.21, + "grad_norm": 0.3297114334101669, + "learning_rate": 1.8378511385452856e-05, + "loss": 0.2091, "step": 4535 }, { - "epoch": 0.26, - "grad_norm": 0.3626707079391625, - "learning_rate": 1.7338770774049948e-05, - "loss": 0.2969, + "epoch": 0.21, + "grad_norm": 0.41556957290650115, + "learning_rate": 1.8377699039431013e-05, + "loss": 0.3359, "step": 4536 }, { - "epoch": 0.26, - "grad_norm": 0.2987530938150695, - "learning_rate": 1.733750656200538e-05, - "loss": 0.2727, + "epoch": 0.21, + "grad_norm": 0.39090113564999057, + "learning_rate": 1.8376886507934984e-05, + "loss": 0.254, "step": 4537 }, { - "epoch": 0.26, - "grad_norm": 0.41984643592449417, - "learning_rate": 1.733624209586327e-05, - "loss": 0.0769, + "epoch": 0.21, + "grad_norm": 0.6445740374361782, + "learning_rate": 1.837607379098276e-05, + "loss": 0.4622, "step": 4538 }, { - "epoch": 0.26, - "grad_norm": 0.4498582260094257, - "learning_rate": 1.733497737566741e-05, - "loss": 0.3627, + "epoch": 0.21, + "grad_norm": 0.7844720760239691, + "learning_rate": 1.8375260888592335e-05, + "loss": 0.4886, "step": 4539 }, { - "epoch": 0.26, - "grad_norm": 0.7896624131435914, - "learning_rate": 1.7333712401461602e-05, - "loss": 0.5321, + "epoch": 0.21, + "grad_norm": 0.3841299386143589, + "learning_rate": 1.8374447800781706e-05, + "loss": 0.2349, "step": 4540 }, { - "epoch": 0.26, - "grad_norm": 0.33277012471811757, - "learning_rate": 1.7332447173289648e-05, - "loss": 0.3177, + "epoch": 0.21, + "grad_norm": 0.2746444221852081, + "learning_rate": 1.8373634527568877e-05, + "loss": 0.2328, "step": 4541 }, { - "epoch": 0.26, - "grad_norm": 0.4780594140246693, - "learning_rate": 1.7331181691195364e-05, - "loss": 0.2849, + "epoch": 0.21, + "grad_norm": 0.859137480089945, + "learning_rate": 1.837282106897185e-05, + "loss": 0.5171, "step": 4542 }, { - "epoch": 0.26, - "grad_norm": 0.35576731933438077, - "learning_rate": 1.7329915955222578e-05, - "loss": 0.2063, + "epoch": 0.21, + "grad_norm": 0.5726114146139141, + "learning_rate": 1.8372007425008633e-05, + "loss": 0.299, "step": 4543 }, { - "epoch": 0.26, - "grad_norm": 0.5907181564894486, - "learning_rate": 1.7328649965415108e-05, - "loss": 0.358, + "epoch": 0.21, + "grad_norm": 0.3699217434096532, + "learning_rate": 1.837119359569724e-05, + "loss": 0.2869, "step": 4544 }, { - "epoch": 0.26, - "grad_norm": 0.31900993570485137, - "learning_rate": 1.732738372181681e-05, - "loss": 0.2571, + "epoch": 0.21, + "grad_norm": 1.2411413064091992, + "learning_rate": 1.837037958105569e-05, + "loss": 0.7696, "step": 4545 }, { - "epoch": 0.26, - "grad_norm": 0.8407790010179014, - "learning_rate": 1.7326117224471534e-05, - "loss": 0.4832, + "epoch": 0.21, + "grad_norm": 0.40326436494667944, + "learning_rate": 1.8369565381102002e-05, + "loss": 0.3015, "step": 4546 }, { - "epoch": 0.26, - "grad_norm": 0.6475543922825446, - "learning_rate": 1.7324850473423124e-05, - "loss": 0.4714, + "epoch": 0.21, + "grad_norm": 0.23076170615833244, + "learning_rate": 1.8368750995854206e-05, + "loss": 0.1179, "step": 4547 }, { - "epoch": 0.26, - "grad_norm": 0.4103670098764781, - "learning_rate": 1.7323583468715464e-05, - "loss": 0.2612, + "epoch": 0.21, + "grad_norm": 0.4945097161309367, + "learning_rate": 1.836793642533033e-05, + "loss": 0.3707, "step": 4548 }, { - "epoch": 0.26, - "grad_norm": 0.25746760920249057, - "learning_rate": 1.732231621039242e-05, - "loss": 0.2225, + "epoch": 0.21, + "grad_norm": 0.38761428972781975, + "learning_rate": 1.83671216695484e-05, + "loss": 0.3019, "step": 4549 }, { - "epoch": 0.26, - "grad_norm": 0.45385734178505405, - "learning_rate": 1.732104869849788e-05, - "loss": 0.3283, + "epoch": 0.21, + "grad_norm": 0.8911628369805921, + "learning_rate": 1.8366306728526465e-05, + "loss": 0.3494, "step": 4550 }, { - "epoch": 0.26, - "grad_norm": 0.5715738690411651, - "learning_rate": 1.731978093307574e-05, - "loss": 0.3138, + "epoch": 0.21, + "grad_norm": 1.2939556702780841, + "learning_rate": 1.8365491602282565e-05, + "loss": 0.805, "step": 4551 }, { - "epoch": 0.26, - "grad_norm": 0.47645140136250524, - "learning_rate": 1.7318512914169903e-05, - "loss": 0.3329, + "epoch": 0.21, + "grad_norm": 0.3996867720441695, + "learning_rate": 1.8364676290834737e-05, + "loss": 0.291, "step": 4552 }, { - "epoch": 0.26, - "grad_norm": 0.8370401653202683, - "learning_rate": 1.7317244641824275e-05, - "loss": 0.5012, + "epoch": 0.21, + "grad_norm": 0.2836975215738741, + "learning_rate": 1.8363860794201042e-05, + "loss": 0.189, "step": 4553 }, { - "epoch": 0.26, - "grad_norm": 0.40217735496250034, - "learning_rate": 1.731597611608278e-05, - "loss": 0.319, + "epoch": 0.21, + "grad_norm": 0.9526515936212498, + "learning_rate": 1.836304511239953e-05, + "loss": 0.4688, "step": 4554 }, { - "epoch": 0.26, - "grad_norm": 0.22407629826832084, - "learning_rate": 1.731470733698935e-05, - "loss": 0.137, + "epoch": 0.21, + "grad_norm": 0.45601109398345857, + "learning_rate": 1.836222924544826e-05, + "loss": 0.2732, "step": 4555 }, { - "epoch": 0.26, - "grad_norm": 0.43215683904816754, - "learning_rate": 1.7313438304587918e-05, - "loss": 0.3071, + "epoch": 0.21, + "grad_norm": 0.4014681064055307, + "learning_rate": 1.8361413193365295e-05, + "loss": 0.3391, "step": 4556 }, { - "epoch": 0.26, - "grad_norm": 0.318869649065026, - "learning_rate": 1.731216901892243e-05, - "loss": 0.2901, + "epoch": 0.21, + "grad_norm": 0.4852827425818757, + "learning_rate": 1.8360596956168698e-05, + "loss": 0.3172, "step": 4557 }, { - "epoch": 0.26, - "grad_norm": 0.8355303688845711, - "learning_rate": 1.7310899480036845e-05, - "loss": 0.3845, + "epoch": 0.21, + "grad_norm": 0.4154254437291109, + "learning_rate": 1.8359780533876544e-05, + "loss": 0.289, "step": 4558 }, { - "epoch": 0.26, - "grad_norm": 0.8346211453513296, - "learning_rate": 1.7309629687975126e-05, - "loss": 0.5678, + "epoch": 0.21, + "grad_norm": 0.8806941914172582, + "learning_rate": 1.8358963926506908e-05, + "loss": 0.4861, "step": 4559 }, { - "epoch": 0.26, - "grad_norm": 0.3490837116866384, - "learning_rate": 1.730835964278124e-05, - "loss": 0.2892, + "epoch": 0.21, + "grad_norm": 0.4416382168301104, + "learning_rate": 1.8358147134077863e-05, + "loss": 0.3132, "step": 4560 }, { - "epoch": 0.26, - "grad_norm": 0.32214562875779884, - "learning_rate": 1.7307089344499178e-05, - "loss": 0.2011, + "epoch": 0.21, + "grad_norm": 0.3215025712249785, + "learning_rate": 1.83573301566075e-05, + "loss": 0.2374, "step": 4561 }, { - "epoch": 0.26, - "grad_norm": 0.4094468270950029, - "learning_rate": 1.730581879317293e-05, - "loss": 0.2334, + "epoch": 0.21, + "grad_norm": 0.49120500927244126, + "learning_rate": 1.83565129941139e-05, + "loss": 0.32, "step": 4562 }, { - "epoch": 0.26, - "grad_norm": 0.6224042650550641, - "learning_rate": 1.730454798884649e-05, - "loss": 0.3563, + "epoch": 0.21, + "grad_norm": 1.389224195201891, + "learning_rate": 1.8355695646615158e-05, + "loss": 0.6004, "step": 4563 }, { - "epoch": 0.26, - "grad_norm": 0.379128591299934, - "learning_rate": 1.7303276931563862e-05, - "loss": 0.2706, + "epoch": 0.21, + "grad_norm": 0.332578909554029, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.2911, "step": 4564 }, { - "epoch": 0.26, - "grad_norm": 0.6789909142350193, - "learning_rate": 1.730200562136907e-05, - "loss": 0.4433, + "epoch": 0.21, + "grad_norm": 0.5927002202672179, + "learning_rate": 1.8354060396674628e-05, + "loss": 0.3902, "step": 4565 }, { - "epoch": 0.26, - "grad_norm": 0.4124711177122947, - "learning_rate": 1.7300734058306138e-05, - "loss": 0.335, + "epoch": 0.21, + "grad_norm": 0.5609548862190832, + "learning_rate": 1.835324249426904e-05, + "loss": 0.2991, "step": 4566 }, { - "epoch": 0.26, - "grad_norm": 0.2858262013205675, - "learning_rate": 1.72994622424191e-05, - "loss": 0.2076, + "epoch": 0.21, + "grad_norm": 0.31926419091079944, + "learning_rate": 1.8352424406930714e-05, + "loss": 0.2216, "step": 4567 }, { - "epoch": 0.26, - "grad_norm": 0.30957498941420897, - "learning_rate": 1.7298190173751996e-05, - "loss": 0.2559, + "epoch": 0.21, + "grad_norm": 0.6562529874200298, + "learning_rate": 1.8351606134677766e-05, + "loss": 0.3897, "step": 4568 }, { - "epoch": 0.26, - "grad_norm": 0.45178314832303773, - "learning_rate": 1.7296917852348882e-05, - "loss": 0.342, + "epoch": 0.21, + "grad_norm": 0.7577010851148317, + "learning_rate": 1.8350787677528307e-05, + "loss": 0.3925, "step": 4569 }, { - "epoch": 0.26, - "grad_norm": 0.6733044811452652, - "learning_rate": 1.7295645278253817e-05, - "loss": 0.5276, + "epoch": 0.21, + "grad_norm": 0.3604380829380593, + "learning_rate": 1.8349969035500456e-05, + "loss": 0.2121, "step": 4570 }, { - "epoch": 0.26, - "grad_norm": 0.49577571076452776, - "learning_rate": 1.729437245151087e-05, - "loss": 0.2828, + "epoch": 0.21, + "grad_norm": 0.6901919160591082, + "learning_rate": 1.834915020861234e-05, + "loss": 0.4971, "step": 4571 }, { - "epoch": 0.26, - "grad_norm": 0.3413429533842971, - "learning_rate": 1.7293099372164118e-05, - "loss": 0.2894, + "epoch": 0.21, + "grad_norm": 0.40402774672212377, + "learning_rate": 1.8348331196882082e-05, + "loss": 0.3648, "step": 4572 }, { - "epoch": 0.26, - "grad_norm": 0.3407676552622252, - "learning_rate": 1.729182604025765e-05, - "loss": 0.2752, + "epoch": 0.21, + "grad_norm": 0.26025303015026535, + "learning_rate": 1.834751200032782e-05, + "loss": 0.1365, "step": 4573 }, { - "epoch": 0.26, - "grad_norm": 0.561291828414926, - "learning_rate": 1.729055245583556e-05, - "loss": 0.3268, + "epoch": 0.21, + "grad_norm": 0.4150870088650874, + "learning_rate": 1.8346692618967686e-05, + "loss": 0.2842, "step": 4574 }, { - "epoch": 0.26, - "grad_norm": 0.29530320708055313, - "learning_rate": 1.728927861894195e-05, - "loss": 0.2669, + "epoch": 0.21, + "grad_norm": 0.5295548604752122, + "learning_rate": 1.8345873052819825e-05, + "loss": 0.415, "step": 4575 }, { - "epoch": 0.26, - "grad_norm": 0.3881094912526957, - "learning_rate": 1.7288004529620935e-05, - "loss": 0.3326, + "epoch": 0.21, + "grad_norm": 0.38462285384017086, + "learning_rate": 1.8345053301902376e-05, + "loss": 0.2054, "step": 4576 }, { - "epoch": 0.26, - "grad_norm": 1.1701078373770246, - "learning_rate": 1.7286730187916635e-05, - "loss": 0.8335, + "epoch": 0.21, + "grad_norm": 0.9032391231196824, + "learning_rate": 1.8344233366233492e-05, + "loss": 0.4334, "step": 4577 }, { - "epoch": 0.26, - "grad_norm": 0.31692692063010275, - "learning_rate": 1.7285455593873183e-05, - "loss": 0.2234, + "epoch": 0.21, + "grad_norm": 1.3487902906527032, + "learning_rate": 1.8343413245831323e-05, + "loss": 0.7028, "step": 4578 }, { - "epoch": 0.26, - "grad_norm": 0.7275364835658681, - "learning_rate": 1.728418074753472e-05, - "loss": 0.5326, + "epoch": 0.21, + "grad_norm": 0.2973113728314521, + "learning_rate": 1.8342592940714026e-05, + "loss": 0.1867, "step": 4579 }, { - "epoch": 0.26, - "grad_norm": 0.3451992181340388, - "learning_rate": 1.7282905648945386e-05, - "loss": 0.342, + "epoch": 0.21, + "grad_norm": 0.32170342019545883, + "learning_rate": 1.834177245089976e-05, + "loss": 0.2687, "step": 4580 }, { - "epoch": 0.26, - "grad_norm": 0.2693554960138304, - "learning_rate": 1.7281630298149346e-05, - "loss": 0.2137, + "epoch": 0.21, + "grad_norm": 1.583807786972439, + "learning_rate": 1.8340951776406695e-05, + "loss": 0.9249, "step": 4581 }, { - "epoch": 0.26, - "grad_norm": 0.4373966659922116, - "learning_rate": 1.728035469519076e-05, - "loss": 0.2423, + "epoch": 0.21, + "grad_norm": 0.44375169137578174, + "learning_rate": 1.8340130917252995e-05, + "loss": 0.3125, "step": 4582 }, { - "epoch": 0.26, - "grad_norm": 0.8356130794756954, - "learning_rate": 1.7279078840113805e-05, - "loss": 0.4754, + "epoch": 0.21, + "grad_norm": 0.5468710800300232, + "learning_rate": 1.8339309873456835e-05, + "loss": 0.3434, "step": 4583 }, { - "epoch": 0.26, - "grad_norm": 0.3556758936236917, - "learning_rate": 1.7277802732962662e-05, - "loss": 0.2365, + "epoch": 0.21, + "grad_norm": 0.47646794831376693, + "learning_rate": 1.833848864503639e-05, + "loss": 0.3599, "step": 4584 }, { - "epoch": 0.26, - "grad_norm": 0.3887816940254315, - "learning_rate": 1.7276526373781525e-05, - "loss": 0.3398, + "epoch": 0.21, + "grad_norm": 0.3080238853413536, + "learning_rate": 1.8337667232009845e-05, + "loss": 0.2248, "step": 4585 }, { - "epoch": 0.26, - "grad_norm": 0.6248091341949249, - "learning_rate": 1.7275249762614592e-05, - "loss": 0.4088, + "epoch": 0.21, + "grad_norm": 0.31937636056518637, + "learning_rate": 1.8336845634395385e-05, + "loss": 0.1697, "step": 4586 }, { - "epoch": 0.26, - "grad_norm": 0.2780554084812607, - "learning_rate": 1.727397289950607e-05, - "loss": 0.1789, + "epoch": 0.21, + "grad_norm": 0.5312230431412615, + "learning_rate": 1.8336023852211197e-05, + "loss": 0.3911, "step": 4587 }, { - "epoch": 0.26, - "grad_norm": 0.35914471939866316, - "learning_rate": 1.7272695784500185e-05, - "loss": 0.2958, + "epoch": 0.21, + "grad_norm": 0.3702593500991801, + "learning_rate": 1.8335201885475476e-05, + "loss": 0.2979, "step": 4588 }, { - "epoch": 0.26, - "grad_norm": 1.1927746460401998, - "learning_rate": 1.7271418417641153e-05, - "loss": 0.7851, + "epoch": 0.21, + "grad_norm": 0.5446635664813944, + "learning_rate": 1.8334379734206415e-05, + "loss": 0.3337, "step": 4589 }, { - "epoch": 0.26, - "grad_norm": 0.3336997350745605, - "learning_rate": 1.7270140798973215e-05, - "loss": 0.259, + "epoch": 0.21, + "grad_norm": 1.1155471524612366, + "learning_rate": 1.8333557398422224e-05, + "loss": 0.5501, "step": 4590 }, { - "epoch": 0.26, - "grad_norm": 0.7934746268294893, - "learning_rate": 1.7268862928540616e-05, - "loss": 0.3916, + "epoch": 0.21, + "grad_norm": 0.30708505224170524, + "learning_rate": 1.8332734878141097e-05, + "loss": 0.1568, "step": 4591 }, { - "epoch": 0.26, - "grad_norm": 0.440992452385509, - "learning_rate": 1.7267584806387604e-05, - "loss": 0.3606, + "epoch": 0.21, + "grad_norm": 0.3658113913111306, + "learning_rate": 1.833191217338126e-05, + "loss": 0.2356, "step": 4592 }, { - "epoch": 0.26, - "grad_norm": 0.3643981449840152, - "learning_rate": 1.726630643255844e-05, - "loss": 0.2713, + "epoch": 0.21, + "grad_norm": 0.7467558800542814, + "learning_rate": 1.8331089284160914e-05, + "loss": 0.4761, "step": 4593 }, { - "epoch": 0.26, - "grad_norm": 0.23831222638239447, - "learning_rate": 1.7265027807097402e-05, - "loss": 0.1099, + "epoch": 0.21, + "grad_norm": 0.6299505730719956, + "learning_rate": 1.833026621049828e-05, + "loss": 0.3302, "step": 4594 }, { - "epoch": 0.26, - "grad_norm": 0.9794922046894605, - "learning_rate": 1.726374893004876e-05, - "loss": 0.641, + "epoch": 0.21, + "grad_norm": 0.5167602375645106, + "learning_rate": 1.8329442952411584e-05, + "loss": 0.3782, "step": 4595 }, { - "epoch": 0.26, - "grad_norm": 0.3302361301129314, - "learning_rate": 1.7262469801456806e-05, - "loss": 0.2753, + "epoch": 0.21, + "grad_norm": 0.4363828916744101, + "learning_rate": 1.8328619509919047e-05, + "loss": 0.2718, "step": 4596 }, { - "epoch": 0.26, - "grad_norm": 0.6913170287228856, - "learning_rate": 1.7261190421365836e-05, - "loss": 0.3405, + "epoch": 0.21, + "grad_norm": 0.7518047794759544, + "learning_rate": 1.83277958830389e-05, + "loss": 0.3452, "step": 4597 }, { - "epoch": 0.26, - "grad_norm": 0.7638555754563725, - "learning_rate": 1.7259910789820152e-05, - "loss": 0.5113, + "epoch": 0.21, + "grad_norm": 0.2926009312177105, + "learning_rate": 1.832697207178938e-05, + "loss": 0.2331, "step": 4598 }, { - "epoch": 0.26, - "grad_norm": 0.30601965328502595, - "learning_rate": 1.7258630906864068e-05, - "loss": 0.203, + "epoch": 0.21, + "grad_norm": 0.8532877939908471, + "learning_rate": 1.8326148076188725e-05, + "loss": 0.3479, "step": 4599 }, { - "epoch": 0.26, - "grad_norm": 0.35651461621894115, - "learning_rate": 1.7257350772541914e-05, - "loss": 0.2329, + "epoch": 0.21, + "grad_norm": 0.3897996687113407, + "learning_rate": 1.8325323896255175e-05, + "loss": 0.3025, "step": 4600 }, { - "epoch": 0.26, - "grad_norm": 0.8782920891777145, - "learning_rate": 1.725607038689801e-05, - "loss": 0.6243, + "epoch": 0.21, + "grad_norm": 0.6013179483021874, + "learning_rate": 1.8324499532006975e-05, + "loss": 0.4292, "step": 4601 }, { - "epoch": 0.26, - "grad_norm": 0.42622194472487185, - "learning_rate": 1.7254789749976703e-05, - "loss": 0.2775, + "epoch": 0.21, + "grad_norm": 0.8540723739208614, + "learning_rate": 1.8323674983462383e-05, + "loss": 0.371, "step": 4602 }, { - "epoch": 0.26, - "grad_norm": 1.1987544832356745, - "learning_rate": 1.7253508861822338e-05, - "loss": 0.4799, + "epoch": 0.21, + "grad_norm": 0.3353940101067354, + "learning_rate": 1.8322850250639645e-05, + "loss": 0.2183, "step": 4603 }, { - "epoch": 0.26, - "grad_norm": 0.37401071978627676, - "learning_rate": 1.725222772247927e-05, - "loss": 0.2316, + "epoch": 0.21, + "grad_norm": 0.367349511253539, + "learning_rate": 1.832202533355703e-05, + "loss": 0.3092, "step": 4604 }, { - "epoch": 0.26, - "grad_norm": 0.4714683560992018, - "learning_rate": 1.725094633199187e-05, - "loss": 0.279, + "epoch": 0.21, + "grad_norm": 0.576936953989063, + "learning_rate": 1.8321200232232792e-05, + "loss": 0.3234, "step": 4605 }, { - "epoch": 0.26, - "grad_norm": 0.3252260992389096, - "learning_rate": 1.7249664690404514e-05, - "loss": 0.1989, + "epoch": 0.21, + "grad_norm": 0.40632011584896016, + "learning_rate": 1.83203749466852e-05, + "loss": 0.257, "step": 4606 }, { - "epoch": 0.26, - "grad_norm": 1.2681138371986997, - "learning_rate": 1.7248382797761576e-05, - "loss": 0.4433, + "epoch": 0.21, + "grad_norm": 0.6323191584401766, + "learning_rate": 1.8319549476932527e-05, + "loss": 0.3819, "step": 4607 }, { - "epoch": 0.26, - "grad_norm": 0.38382751971823414, - "learning_rate": 1.7247100654107458e-05, - "loss": 0.2707, + "epoch": 0.21, + "grad_norm": 0.40187080521452123, + "learning_rate": 1.831872382299305e-05, + "loss": 0.3494, "step": 4608 }, { - "epoch": 0.26, - "grad_norm": 0.5398894230168891, - "learning_rate": 1.724581825948655e-05, - "loss": 0.3814, + "epoch": 0.21, + "grad_norm": 0.6619876159493546, + "learning_rate": 1.831789798488504e-05, + "loss": 0.1928, "step": 4609 }, { - "epoch": 0.26, - "grad_norm": 0.7840013858936152, - "learning_rate": 1.7244535613943273e-05, - "loss": 0.3339, + "epoch": 0.21, + "grad_norm": 0.31471640807963536, + "learning_rate": 1.831707196262679e-05, + "loss": 0.248, "step": 4610 }, { - "epoch": 0.26, - "grad_norm": 0.4177543238883827, - "learning_rate": 1.7243252717522037e-05, - "loss": 0.2759, + "epoch": 0.21, + "grad_norm": 0.5353264629725613, + "learning_rate": 1.8316245756236578e-05, + "loss": 0.3937, "step": 4611 }, { - "epoch": 0.26, - "grad_norm": 0.2814456169503759, - "learning_rate": 1.724196957026727e-05, - "loss": 0.2205, + "epoch": 0.21, + "grad_norm": 0.40248006020890803, + "learning_rate": 1.8315419365732702e-05, + "loss": 0.2275, "step": 4612 }, { - "epoch": 0.27, - "grad_norm": 1.286573728865289, - "learning_rate": 1.724068617222341e-05, - "loss": 0.4222, + "epoch": 0.21, + "grad_norm": 0.37234828261484243, + "learning_rate": 1.831459279113346e-05, + "loss": 0.2592, "step": 4613 }, { - "epoch": 0.27, - "grad_norm": 0.3835480722641368, - "learning_rate": 1.7239402523434898e-05, - "loss": 0.2947, + "epoch": 0.21, + "grad_norm": 1.071447345174701, + "learning_rate": 1.831376603245714e-05, + "loss": 0.5666, "step": 4614 }, { - "epoch": 0.27, - "grad_norm": 0.9076811236266914, - "learning_rate": 1.723811862394619e-05, - "loss": 0.5795, + "epoch": 0.21, + "grad_norm": 0.3625191470197243, + "learning_rate": 1.831293908972206e-05, + "loss": 0.2405, "step": 4615 }, { - "epoch": 0.27, - "grad_norm": 0.4121511463726725, - "learning_rate": 1.7236834473801744e-05, - "loss": 0.344, + "epoch": 0.21, + "grad_norm": 0.3329554715877784, + "learning_rate": 1.8312111962946517e-05, + "loss": 0.2622, "step": 4616 }, { - "epoch": 0.27, - "grad_norm": 0.33727558721149664, - "learning_rate": 1.723555007304603e-05, - "loss": 0.2264, + "epoch": 0.21, + "grad_norm": 0.9591723487439654, + "learning_rate": 1.8311284652148825e-05, + "loss": 0.4893, "step": 4617 }, { - "epoch": 0.27, - "grad_norm": 0.49247949403688007, - "learning_rate": 1.7234265421723528e-05, - "loss": 0.2619, + "epoch": 0.21, + "grad_norm": 0.8992108885779463, + "learning_rate": 1.8310457157347306e-05, + "loss": 0.5673, "step": 4618 }, { - "epoch": 0.27, - "grad_norm": 0.7509746626101724, - "learning_rate": 1.7232980519878727e-05, - "loss": 0.4011, + "epoch": 0.21, + "grad_norm": 0.3321857530404299, + "learning_rate": 1.830962947856027e-05, + "loss": 0.2007, "step": 4619 }, { - "epoch": 0.27, - "grad_norm": 0.3204159675138603, - "learning_rate": 1.723169536755612e-05, - "loss": 0.2003, + "epoch": 0.21, + "grad_norm": 0.464540730712128, + "learning_rate": 1.830880161580605e-05, + "loss": 0.327, "step": 4620 }, { - "epoch": 0.27, - "grad_norm": 0.41050620070576016, - "learning_rate": 1.7230409964800215e-05, - "loss": 0.3201, + "epoch": 0.21, + "grad_norm": 0.6866733230850293, + "learning_rate": 1.830797356910297e-05, + "loss": 0.3231, "step": 4621 }, { - "epoch": 0.27, - "grad_norm": 1.120953694972054, - "learning_rate": 1.7229124311655524e-05, - "loss": 0.5876, + "epoch": 0.21, + "grad_norm": 0.5194091625345517, + "learning_rate": 1.830714533846936e-05, + "loss": 0.3011, "step": 4622 }, { - "epoch": 0.27, - "grad_norm": 0.34506829258410193, - "learning_rate": 1.722783840816657e-05, - "loss": 0.2215, + "epoch": 0.21, + "grad_norm": 0.6108230984146801, + "learning_rate": 1.8306316923923564e-05, + "loss": 0.3743, "step": 4623 }, { - "epoch": 0.27, - "grad_norm": 0.39872366387719055, - "learning_rate": 1.7226552254377883e-05, - "loss": 0.3462, + "epoch": 0.21, + "grad_norm": 0.4234023757694488, + "learning_rate": 1.8305488325483913e-05, + "loss": 0.3121, "step": 4624 }, { - "epoch": 0.27, - "grad_norm": 0.5939456678866099, - "learning_rate": 1.7225265850333997e-05, - "loss": 0.4869, + "epoch": 0.21, + "grad_norm": 0.3022048740973783, + "learning_rate": 1.8304659543168757e-05, + "loss": 0.2031, "step": 4625 }, { - "epoch": 0.27, - "grad_norm": 0.3497056047001658, - "learning_rate": 1.7223979196079466e-05, - "loss": 0.2402, + "epoch": 0.21, + "grad_norm": 0.5021725160542018, + "learning_rate": 1.830383057699644e-05, + "loss": 0.256, "step": 4626 }, { - "epoch": 0.27, - "grad_norm": 0.31503886763827527, - "learning_rate": 1.7222692291658853e-05, - "loss": 0.2371, + "epoch": 0.21, + "grad_norm": 0.40584754966464476, + "learning_rate": 1.830300142698532e-05, + "loss": 0.3241, "step": 4627 }, { - "epoch": 0.27, - "grad_norm": 0.5476162155781016, - "learning_rate": 1.7221405137116712e-05, - "loss": 0.3956, + "epoch": 0.21, + "grad_norm": 0.38035674784310214, + "learning_rate": 1.830217209315375e-05, + "loss": 0.2649, "step": 4628 }, { - "epoch": 0.27, - "grad_norm": 0.4289308831084935, - "learning_rate": 1.722011773249762e-05, - "loss": 0.2724, + "epoch": 0.21, + "grad_norm": 0.9887265610109615, + "learning_rate": 1.830134257552009e-05, + "loss": 0.6486, "step": 4629 }, { - "epoch": 0.27, - "grad_norm": 0.45957358140657234, - "learning_rate": 1.7218830077846164e-05, - "loss": 0.329, + "epoch": 0.21, + "grad_norm": 1.123316084594646, + "learning_rate": 1.830051287410271e-05, + "loss": 0.6933, "step": 4630 }, { - "epoch": 0.27, - "grad_norm": 0.4495152254009832, - "learning_rate": 1.7217542173206932e-05, - "loss": 0.3848, + "epoch": 0.21, + "grad_norm": 0.30579650522816454, + "learning_rate": 1.8299682988919973e-05, + "loss": 0.2157, "step": 4631 }, { - "epoch": 0.27, - "grad_norm": 0.3251634536660676, - "learning_rate": 1.7216254018624524e-05, - "loss": 0.2706, + "epoch": 0.21, + "grad_norm": 0.34962032014734423, + "learning_rate": 1.8298852919990254e-05, + "loss": 0.2088, "step": 4632 }, { - "epoch": 0.27, - "grad_norm": 0.36095585230014704, - "learning_rate": 1.7214965614143554e-05, - "loss": 0.1692, + "epoch": 0.21, + "grad_norm": 0.6195422831240769, + "learning_rate": 1.829802266733193e-05, + "loss": 0.4219, "step": 4633 }, { - "epoch": 0.27, - "grad_norm": 0.42598816677891027, - "learning_rate": 1.721367695980863e-05, - "loss": 0.2535, + "epoch": 0.21, + "grad_norm": 0.4584659278579204, + "learning_rate": 1.829719223096338e-05, + "loss": 0.34, "step": 4634 }, { - "epoch": 0.27, - "grad_norm": 0.37269976982163927, - "learning_rate": 1.7212388055664385e-05, - "loss": 0.2948, + "epoch": 0.21, + "grad_norm": 0.42992961965023846, + "learning_rate": 1.8296361610902994e-05, + "loss": 0.3106, "step": 4635 }, { - "epoch": 0.27, - "grad_norm": 0.3768645284351786, - "learning_rate": 1.7211098901755453e-05, - "loss": 0.2551, + "epoch": 0.21, + "grad_norm": 0.589559115352862, + "learning_rate": 1.8295530807169158e-05, + "loss": 0.381, "step": 4636 }, { - "epoch": 0.27, - "grad_norm": 0.7238591197734653, - "learning_rate": 1.7209809498126473e-05, - "loss": 0.4616, + "epoch": 0.21, + "grad_norm": 0.3343543749720442, + "learning_rate": 1.8294699819780262e-05, + "loss": 0.2649, "step": 4637 }, { - "epoch": 0.27, - "grad_norm": 0.46787631269308716, - "learning_rate": 1.72085198448221e-05, - "loss": 0.327, + "epoch": 0.21, + "grad_norm": 0.40103138013636025, + "learning_rate": 1.8293868648754708e-05, + "loss": 0.1918, "step": 4638 }, { - "epoch": 0.27, - "grad_norm": 0.28015550313024745, - "learning_rate": 1.7207229941887e-05, - "loss": 0.2313, + "epoch": 0.21, + "grad_norm": 0.3976638493283039, + "learning_rate": 1.8293037294110897e-05, + "loss": 0.3264, "step": 4639 }, { - "epoch": 0.27, - "grad_norm": 0.32694743935340276, - "learning_rate": 1.7205939789365834e-05, - "loss": 0.2899, + "epoch": 0.21, + "grad_norm": 0.4033869931495121, + "learning_rate": 1.8292205755867233e-05, + "loss": 0.2883, "step": 4640 }, { - "epoch": 0.27, - "grad_norm": 0.57415873988465, - "learning_rate": 1.720464938730328e-05, - "loss": 0.3279, + "epoch": 0.21, + "grad_norm": 0.7098122268082854, + "learning_rate": 1.8291374034042127e-05, + "loss": 0.4081, "step": 4641 }, { - "epoch": 0.27, - "grad_norm": 0.4253211319708633, - "learning_rate": 1.720335873574403e-05, - "loss": 0.3365, + "epoch": 0.21, + "grad_norm": 0.7289665638466958, + "learning_rate": 1.829054212865399e-05, + "loss": 0.469, "step": 4642 }, { - "epoch": 0.27, - "grad_norm": 0.4051995236013618, - "learning_rate": 1.7202067834732778e-05, - "loss": 0.3115, + "epoch": 0.21, + "grad_norm": 0.5182878505099521, + "learning_rate": 1.8289710039721237e-05, + "loss": 0.3409, "step": 4643 }, { - "epoch": 0.27, - "grad_norm": 0.4540323810623343, - "learning_rate": 1.7200776684314226e-05, - "loss": 0.3126, + "epoch": 0.21, + "grad_norm": 0.3805657789707598, + "learning_rate": 1.8288877767262302e-05, + "loss": 0.2706, "step": 4644 }, { - "epoch": 0.27, - "grad_norm": 0.3539122334412321, - "learning_rate": 1.7199485284533088e-05, - "loss": 0.2621, + "epoch": 0.21, + "grad_norm": 0.8086804148945254, + "learning_rate": 1.8288045311295594e-05, + "loss": 0.4002, "step": 4645 }, { - "epoch": 0.27, - "grad_norm": 0.26353112650494664, - "learning_rate": 1.7198193635434083e-05, - "loss": 0.0729, + "epoch": 0.21, + "grad_norm": 0.43134834777388437, + "learning_rate": 1.8287212671839554e-05, + "loss": 0.3052, "step": 4646 }, { - "epoch": 0.27, - "grad_norm": 0.40396435081770415, - "learning_rate": 1.719690173706194e-05, - "loss": 0.3095, + "epoch": 0.21, + "grad_norm": 0.4312725745261092, + "learning_rate": 1.8286379848912612e-05, + "loss": 0.3506, "step": 4647 }, { - "epoch": 0.27, - "grad_norm": 0.33135791809707316, - "learning_rate": 1.71956095894614e-05, - "loss": 0.3202, + "epoch": 0.21, + "grad_norm": 0.8856913487808675, + "learning_rate": 1.828554684253321e-05, + "loss": 0.5767, "step": 4648 }, { - "epoch": 0.27, - "grad_norm": 0.8182839067779433, - "learning_rate": 1.719431719267721e-05, - "loss": 0.4072, + "epoch": 0.21, + "grad_norm": 0.4124641606171705, + "learning_rate": 1.828471365271978e-05, + "loss": 0.2705, "step": 4649 }, { - "epoch": 0.27, - "grad_norm": 0.4000599760224488, - "learning_rate": 1.7193024546754125e-05, - "loss": 0.3128, + "epoch": 0.21, + "grad_norm": 0.35856325788690446, + "learning_rate": 1.828388027949078e-05, + "loss": 0.2308, "step": 4650 }, { - "epoch": 0.27, - "grad_norm": 0.5156932368052041, - "learning_rate": 1.719173165173691e-05, - "loss": 0.406, + "epoch": 0.21, + "grad_norm": 0.409784233241191, + "learning_rate": 1.8283046722864656e-05, + "loss": 0.3141, "step": 4651 }, { - "epoch": 0.27, - "grad_norm": 0.2688587493026527, - "learning_rate": 1.7190438507670337e-05, - "loss": 0.1869, + "epoch": 0.21, + "grad_norm": 0.3855713725879107, + "learning_rate": 1.8282212982859856e-05, + "loss": 0.2819, "step": 4652 }, { - "epoch": 0.27, - "grad_norm": 0.36163058971972184, - "learning_rate": 1.7189145114599188e-05, - "loss": 0.3013, + "epoch": 0.21, + "grad_norm": 1.6425095129561391, + "learning_rate": 1.8281379059494845e-05, + "loss": 0.8959, "step": 4653 }, { - "epoch": 0.27, - "grad_norm": 0.787712368896139, - "learning_rate": 1.718785147256825e-05, - "loss": 0.4917, + "epoch": 0.21, + "grad_norm": 0.7935460672691838, + "learning_rate": 1.8280544952788086e-05, + "loss": 0.3372, "step": 4654 }, { - "epoch": 0.27, - "grad_norm": 0.3618209143003872, - "learning_rate": 1.7186557581622327e-05, - "loss": 0.3338, + "epoch": 0.21, + "grad_norm": 0.4054106948421424, + "learning_rate": 1.8279710662758037e-05, + "loss": 0.2795, "step": 4655 }, { - "epoch": 0.27, - "grad_norm": 0.32715965622544163, - "learning_rate": 1.7185263441806227e-05, - "loss": 0.2166, + "epoch": 0.21, + "grad_norm": 0.6681846480968677, + "learning_rate": 1.827887618942318e-05, + "loss": 0.5152, "step": 4656 }, { - "epoch": 0.27, - "grad_norm": 0.5405807570579455, - "learning_rate": 1.7183969053164757e-05, - "loss": 0.3873, + "epoch": 0.21, + "grad_norm": 0.3552188032688544, + "learning_rate": 1.827804153280198e-05, + "loss": 0.2441, "step": 4657 }, { - "epoch": 0.27, - "grad_norm": 0.33734823448871526, - "learning_rate": 1.718267441574275e-05, - "loss": 0.2136, + "epoch": 0.21, + "grad_norm": 0.3983191076778536, + "learning_rate": 1.8277206692912922e-05, + "loss": 0.2203, "step": 4658 }, { - "epoch": 0.27, - "grad_norm": 0.41124631225255276, - "learning_rate": 1.718137952958504e-05, - "loss": 0.2576, + "epoch": 0.21, + "grad_norm": 0.4557519331789354, + "learning_rate": 1.8276371669774482e-05, + "loss": 0.3305, "step": 4659 }, { - "epoch": 0.27, - "grad_norm": 0.372867877130881, - "learning_rate": 1.7180084394736464e-05, - "loss": 0.3317, + "epoch": 0.21, + "grad_norm": 1.5406306540507528, + "learning_rate": 1.827553646340515e-05, + "loss": 0.8721, "step": 4660 }, { - "epoch": 0.27, - "grad_norm": 1.2657038079052518, - "learning_rate": 1.717878901124187e-05, - "loss": 0.8484, + "epoch": 0.21, + "grad_norm": 0.45662552571470577, + "learning_rate": 1.8274701073823417e-05, + "loss": 0.2102, "step": 4661 }, { - "epoch": 0.27, - "grad_norm": 0.30325582500593035, - "learning_rate": 1.7177493379146123e-05, - "loss": 0.1239, + "epoch": 0.21, + "grad_norm": 0.69059765418248, + "learning_rate": 1.827386550104778e-05, + "loss": 0.4733, "step": 4662 }, { - "epoch": 0.27, - "grad_norm": 0.32165604472772885, - "learning_rate": 1.717619749849409e-05, - "loss": 0.296, + "epoch": 0.21, + "grad_norm": 0.4109244715818268, + "learning_rate": 1.8273029745096735e-05, + "loss": 0.281, "step": 4663 }, { - "epoch": 0.27, - "grad_norm": 0.30992102992511755, - "learning_rate": 1.7174901369330648e-05, - "loss": 0.2242, + "epoch": 0.21, + "grad_norm": 0.37609499392813966, + "learning_rate": 1.8272193805988782e-05, + "loss": 0.1875, "step": 4664 }, { - "epoch": 0.27, - "grad_norm": 0.48324249109187434, - "learning_rate": 1.7173604991700678e-05, - "loss": 0.363, + "epoch": 0.21, + "grad_norm": 1.256452389267183, + "learning_rate": 1.8271357683742435e-05, + "loss": 0.7646, "step": 4665 }, { - "epoch": 0.27, - "grad_norm": 0.403222549687474, - "learning_rate": 1.7172308365649077e-05, - "loss": 0.2954, + "epoch": 0.21, + "grad_norm": 1.1758866979955218, + "learning_rate": 1.82705213783762e-05, + "loss": 0.7102, "step": 4666 }, { - "epoch": 0.27, - "grad_norm": 0.45450122410769483, - "learning_rate": 1.7171011491220744e-05, - "loss": 0.3464, + "epoch": 0.21, + "grad_norm": 0.32678136383137224, + "learning_rate": 1.826968488990859e-05, + "loss": 0.2502, "step": 4667 }, { - "epoch": 0.27, - "grad_norm": 0.6262524047569294, - "learning_rate": 1.7169714368460593e-05, - "loss": 0.3987, + "epoch": 0.21, + "grad_norm": 0.5348602875936085, + "learning_rate": 1.826884821835813e-05, + "loss": 0.3979, "step": 4668 }, { - "epoch": 0.27, - "grad_norm": 0.40588895977274775, - "learning_rate": 1.716841699741354e-05, - "loss": 0.241, + "epoch": 0.21, + "grad_norm": 0.36751328397413585, + "learning_rate": 1.826801136374334e-05, + "loss": 0.1871, "step": 4669 }, { - "epoch": 0.27, - "grad_norm": 0.7130249951084894, - "learning_rate": 1.7167119378124516e-05, - "loss": 0.4351, + "epoch": 0.21, + "grad_norm": 0.5728108240020834, + "learning_rate": 1.826717432608274e-05, + "loss": 0.3179, "step": 4670 }, { - "epoch": 0.27, - "grad_norm": 0.3442235779286332, - "learning_rate": 1.7165821510638456e-05, - "loss": 0.2933, + "epoch": 0.21, + "grad_norm": 0.440010150035341, + "learning_rate": 1.8266337105394876e-05, + "loss": 0.264, "step": 4671 }, { - "epoch": 0.27, - "grad_norm": 0.23626658265469405, - "learning_rate": 1.7164523395000304e-05, - "loss": 0.1589, + "epoch": 0.21, + "grad_norm": 0.8911246903793755, + "learning_rate": 1.8265499701698276e-05, + "loss": 0.6378, "step": 4672 }, { - "epoch": 0.27, - "grad_norm": 0.8478012547554327, - "learning_rate": 1.7163225031255018e-05, - "loss": 0.6467, + "epoch": 0.21, + "grad_norm": 0.4343994218741047, + "learning_rate": 1.8264662115011476e-05, + "loss": 0.2722, "step": 4673 }, { - "epoch": 0.27, - "grad_norm": 0.5128732762763075, - "learning_rate": 1.7161926419447555e-05, - "loss": 0.3303, + "epoch": 0.21, + "grad_norm": 0.7786516368156159, + "learning_rate": 1.8263824345353024e-05, + "loss": 0.3971, "step": 4674 }, { - "epoch": 0.27, - "grad_norm": 0.3949461464032854, - "learning_rate": 1.7160627559622888e-05, - "loss": 0.2547, + "epoch": 0.21, + "grad_norm": 0.40711131198405126, + "learning_rate": 1.8262986392741466e-05, + "loss": 0.264, "step": 4675 }, { - "epoch": 0.27, - "grad_norm": 0.5768859794534354, - "learning_rate": 1.7159328451825995e-05, - "loss": 0.3725, + "epoch": 0.21, + "grad_norm": 0.5808226132267941, + "learning_rate": 1.826214825719535e-05, + "loss": 0.3029, "step": 4676 }, { - "epoch": 0.27, - "grad_norm": 0.4045747712340561, - "learning_rate": 1.7158029096101868e-05, - "loss": 0.2743, + "epoch": 0.21, + "grad_norm": 0.3383055486995955, + "learning_rate": 1.8261309938733238e-05, + "loss": 0.1881, "step": 4677 }, { - "epoch": 0.27, - "grad_norm": 0.31346151516334575, - "learning_rate": 1.71567294924955e-05, - "loss": 0.2506, + "epoch": 0.21, + "grad_norm": 0.6151882465383387, + "learning_rate": 1.8260471437373685e-05, + "loss": 0.4098, "step": 4678 }, { - "epoch": 0.27, - "grad_norm": 0.38541238161229885, - "learning_rate": 1.71554296410519e-05, - "loss": 0.281, + "epoch": 0.21, + "grad_norm": 0.4628567342825869, + "learning_rate": 1.8259632753135257e-05, + "loss": 0.2942, "step": 4679 }, { - "epoch": 0.27, - "grad_norm": 0.8739729047054263, - "learning_rate": 1.7154129541816078e-05, - "loss": 0.509, + "epoch": 0.21, + "grad_norm": 0.4345743459060977, + "learning_rate": 1.825879388603652e-05, + "loss": 0.2923, "step": 4680 }, { - "epoch": 0.27, - "grad_norm": 0.38755430202848945, - "learning_rate": 1.7152829194833054e-05, - "loss": 0.2878, + "epoch": 0.22, + "grad_norm": 0.3332112895744663, + "learning_rate": 1.825795483609605e-05, + "loss": 0.1965, "step": 4681 }, { - "epoch": 0.27, - "grad_norm": 0.7597051840219803, - "learning_rate": 1.7151528600147868e-05, - "loss": 0.3923, + "epoch": 0.22, + "grad_norm": 0.4558054923541028, + "learning_rate": 1.8257115603332413e-05, + "loss": 0.3045, "step": 4682 }, { - "epoch": 0.27, - "grad_norm": 0.3355241879573539, - "learning_rate": 1.7150227757805552e-05, - "loss": 0.3151, + "epoch": 0.22, + "grad_norm": 0.4229085197928831, + "learning_rate": 1.8256276187764197e-05, + "loss": 0.3212, "step": 4683 }, { - "epoch": 0.27, - "grad_norm": 0.25231565932966504, - "learning_rate": 1.7148926667851156e-05, - "loss": 0.1974, + "epoch": 0.22, + "grad_norm": 0.9069230591281934, + "learning_rate": 1.8255436589409984e-05, + "loss": 0.4163, "step": 4684 }, { - "epoch": 0.27, - "grad_norm": 0.9743567662925537, - "learning_rate": 1.7147625330329734e-05, - "loss": 0.5156, + "epoch": 0.22, + "grad_norm": 0.3595899320033163, + "learning_rate": 1.825459680828836e-05, + "loss": 0.3008, "step": 4685 }, { - "epoch": 0.27, - "grad_norm": 0.5801427580892613, - "learning_rate": 1.714632374528636e-05, - "loss": 0.4265, + "epoch": 0.22, + "grad_norm": 0.768017022563293, + "learning_rate": 1.825375684441792e-05, + "loss": 0.5708, "step": 4686 }, { - "epoch": 0.27, - "grad_norm": 0.38310225366593875, - "learning_rate": 1.7145021912766096e-05, - "loss": 0.3218, + "epoch": 0.22, + "grad_norm": 0.40642464390280436, + "learning_rate": 1.8252916697817258e-05, + "loss": 0.2664, "step": 4687 }, { - "epoch": 0.27, - "grad_norm": 0.5825425991209423, - "learning_rate": 1.7143719832814034e-05, - "loss": 0.3381, + "epoch": 0.22, + "grad_norm": 0.3855457516666452, + "learning_rate": 1.8252076368504976e-05, + "loss": 0.2298, "step": 4688 }, { - "epoch": 0.27, - "grad_norm": 0.3340408898626788, - "learning_rate": 1.714241750547526e-05, - "loss": 0.239, + "epoch": 0.22, + "grad_norm": 0.49992006374429154, + "learning_rate": 1.8251235856499677e-05, + "loss": 0.3272, "step": 4689 }, { - "epoch": 0.27, - "grad_norm": 0.3168336149753657, - "learning_rate": 1.7141114930794876e-05, - "loss": 0.2373, + "epoch": 0.22, + "grad_norm": 0.7237023060255751, + "learning_rate": 1.8250395161819966e-05, + "loss": 0.363, "step": 4690 }, { - "epoch": 0.27, - "grad_norm": 0.40724845843177176, - "learning_rate": 1.7139812108817988e-05, - "loss": 0.3455, + "epoch": 0.22, + "grad_norm": 0.322398875831228, + "learning_rate": 1.8249554284484458e-05, + "loss": 0.2728, "step": 4691 }, { - "epoch": 0.27, - "grad_norm": 0.5094991595317109, - "learning_rate": 1.7138509039589713e-05, - "loss": 0.2222, + "epoch": 0.22, + "grad_norm": 1.4807351590955335, + "learning_rate": 1.8248713224511774e-05, + "loss": 0.7584, "step": 4692 }, { - "epoch": 0.27, - "grad_norm": 0.4787797053735826, - "learning_rate": 1.7137205723155178e-05, - "loss": 0.2925, + "epoch": 0.22, + "grad_norm": 0.38304739488723544, + "learning_rate": 1.8247871981920524e-05, + "loss": 0.1534, "step": 4693 }, { - "epoch": 0.27, - "grad_norm": 0.7871752853141931, - "learning_rate": 1.7135902159559518e-05, - "loss": 0.4657, + "epoch": 0.22, + "grad_norm": 0.3773397731873623, + "learning_rate": 1.824703055672934e-05, + "loss": 0.1984, "step": 4694 }, { - "epoch": 0.27, - "grad_norm": 0.3929226671181522, - "learning_rate": 1.713459834884787e-05, - "loss": 0.2591, + "epoch": 0.22, + "grad_norm": 0.4164363671846114, + "learning_rate": 1.8246188948956847e-05, + "loss": 0.3022, "step": 4695 }, { - "epoch": 0.27, - "grad_norm": 0.31143988193266453, - "learning_rate": 1.7133294291065387e-05, - "loss": 0.2568, + "epoch": 0.22, + "grad_norm": 0.7268954039486533, + "learning_rate": 1.8245347158621683e-05, + "loss": 0.4606, "step": 4696 }, { - "epoch": 0.27, - "grad_norm": 0.5147095929397764, - "learning_rate": 1.7131989986257233e-05, - "loss": 0.3835, + "epoch": 0.22, + "grad_norm": 0.3798317209879306, + "learning_rate": 1.8244505185742475e-05, + "loss": 0.1913, "step": 4697 }, { - "epoch": 0.27, - "grad_norm": 0.8803237290957555, - "learning_rate": 1.713068543446857e-05, - "loss": 0.337, + "epoch": 0.22, + "grad_norm": 0.619270687036849, + "learning_rate": 1.824366303033787e-05, + "loss": 0.4137, "step": 4698 }, { - "epoch": 0.27, - "grad_norm": 0.32761454354627956, - "learning_rate": 1.7129380635744578e-05, - "loss": 0.2859, + "epoch": 0.22, + "grad_norm": 0.4168621970399676, + "learning_rate": 1.824282069242651e-05, + "loss": 0.3082, "step": 4699 }, { - "epoch": 0.27, - "grad_norm": 0.6789050208924245, - "learning_rate": 1.712807559013044e-05, - "loss": 0.5107, + "epoch": 0.22, + "grad_norm": 0.27801573609894153, + "learning_rate": 1.8241978172027044e-05, + "loss": 0.1453, "step": 4700 }, { - "epoch": 0.27, - "grad_norm": 0.580704421074495, - "learning_rate": 1.7126770297671353e-05, - "loss": 0.1709, + "epoch": 0.22, + "grad_norm": 0.48327397421284685, + "learning_rate": 1.8241135469158125e-05, + "loss": 0.3151, "step": 4701 }, { - "epoch": 0.27, - "grad_norm": 0.3050190429982249, - "learning_rate": 1.7125464758412517e-05, - "loss": 0.2517, + "epoch": 0.22, + "grad_norm": 0.8042465079406699, + "learning_rate": 1.824029258383841e-05, + "loss": 0.4928, "step": 4702 }, { - "epoch": 0.27, - "grad_norm": 0.4061107982630693, - "learning_rate": 1.7124158972399142e-05, - "loss": 0.3583, + "epoch": 0.22, + "grad_norm": 0.33807776918129595, + "learning_rate": 1.823944951608656e-05, + "loss": 0.2433, "step": 4703 }, { - "epoch": 0.27, - "grad_norm": 1.1006064066544996, - "learning_rate": 1.7122852939676448e-05, - "loss": 0.4624, + "epoch": 0.22, + "grad_norm": 0.5621665632148922, + "learning_rate": 1.823860626592124e-05, + "loss": 0.4174, "step": 4704 }, { - "epoch": 0.27, - "grad_norm": 0.41879601303777364, - "learning_rate": 1.712154666028966e-05, - "loss": 0.2789, + "epoch": 0.22, + "grad_norm": 1.1542294522912009, + "learning_rate": 1.8237762833361117e-05, + "loss": 0.6366, "step": 4705 }, { - "epoch": 0.27, - "grad_norm": 0.4464716584986142, - "learning_rate": 1.712024013428402e-05, - "loss": 0.2636, + "epoch": 0.22, + "grad_norm": 0.27209835289750167, + "learning_rate": 1.823691921842486e-05, + "loss": 0.1686, "step": 4706 }, { - "epoch": 0.27, - "grad_norm": 0.3530264257968865, - "learning_rate": 1.7118933361704773e-05, - "loss": 0.297, + "epoch": 0.22, + "grad_norm": 0.48289717455430436, + "learning_rate": 1.823607542113116e-05, + "loss": 0.3493, "step": 4707 }, { - "epoch": 0.27, - "grad_norm": 0.4282023284180023, - "learning_rate": 1.7117626342597168e-05, - "loss": 0.2344, + "epoch": 0.22, + "grad_norm": 1.2366229371075574, + "learning_rate": 1.823523144149868e-05, + "loss": 0.5253, "step": 4708 }, { - "epoch": 0.27, - "grad_norm": 0.4580585344409273, - "learning_rate": 1.711631907700647e-05, - "loss": 0.3537, + "epoch": 0.22, + "grad_norm": 0.4620283104961311, + "learning_rate": 1.8234387279546118e-05, + "loss": 0.3523, "step": 4709 }, { - "epoch": 0.27, - "grad_norm": 0.7468136572361314, - "learning_rate": 1.711501156497794e-05, - "loss": 0.3764, + "epoch": 0.22, + "grad_norm": 0.3749240780591437, + "learning_rate": 1.8233542935292153e-05, + "loss": 0.1686, "step": 4710 }, { - "epoch": 0.27, - "grad_norm": 0.37408046619978336, - "learning_rate": 1.7113703806556875e-05, - "loss": 0.2427, + "epoch": 0.22, + "grad_norm": 0.5301326583150134, + "learning_rate": 1.8232698408755488e-05, + "loss": 0.3422, "step": 4711 }, { - "epoch": 0.27, - "grad_norm": 0.49604510274681346, - "learning_rate": 1.711239580178855e-05, - "loss": 0.3652, + "epoch": 0.22, + "grad_norm": 0.7728638817517502, + "learning_rate": 1.8231853699954813e-05, + "loss": 0.3837, "step": 4712 }, { - "epoch": 0.27, - "grad_norm": 1.1377881037810844, - "learning_rate": 1.7111087550718265e-05, - "loss": 0.6517, + "epoch": 0.22, + "grad_norm": 0.7664304155970179, + "learning_rate": 1.823100880890883e-05, + "loss": 0.3311, "step": 4713 }, { - "epoch": 0.27, - "grad_norm": 0.3778912354875147, - "learning_rate": 1.7109779053391322e-05, - "loss": 0.2191, + "epoch": 0.22, + "grad_norm": 0.3515959542651426, + "learning_rate": 1.8230163735636245e-05, + "loss": 0.275, "step": 4714 }, { - "epoch": 0.27, - "grad_norm": 0.3796078306581816, - "learning_rate": 1.710847030985304e-05, - "loss": 0.3385, + "epoch": 0.22, + "grad_norm": 0.901724141031721, + "learning_rate": 1.822931848015577e-05, + "loss": 0.525, "step": 4715 }, { - "epoch": 0.27, - "grad_norm": 0.6783617967841638, - "learning_rate": 1.710716132014873e-05, - "loss": 0.4694, + "epoch": 0.22, + "grad_norm": 0.3584011863835632, + "learning_rate": 1.822847304248611e-05, + "loss": 0.2057, "step": 4716 }, { - "epoch": 0.27, - "grad_norm": 0.3984794551445479, - "learning_rate": 1.7105852084323736e-05, - "loss": 0.2969, + "epoch": 0.22, + "grad_norm": 1.2851131611178057, + "learning_rate": 1.8227627422645993e-05, + "loss": 0.6201, "step": 4717 }, { - "epoch": 0.27, - "grad_norm": 0.28023283128766, - "learning_rate": 1.7104542602423385e-05, - "loss": 0.0739, + "epoch": 0.22, + "grad_norm": 0.7538886112538911, + "learning_rate": 1.8226781620654133e-05, + "loss": 0.384, "step": 4718 }, { - "epoch": 0.27, - "grad_norm": 0.365961154130174, - "learning_rate": 1.710323287449303e-05, - "loss": 0.3137, + "epoch": 0.22, + "grad_norm": 0.4177940255824517, + "learning_rate": 1.8225935636529258e-05, + "loss": 0.3271, "step": 4719 }, { - "epoch": 0.27, - "grad_norm": 0.3846054233012314, - "learning_rate": 1.710192290057803e-05, - "loss": 0.2914, + "epoch": 0.22, + "grad_norm": 0.9951015345003666, + "learning_rate": 1.8225089470290093e-05, + "loss": 0.4381, "step": 4720 }, { - "epoch": 0.27, - "grad_norm": 0.7279304764082829, - "learning_rate": 1.7100612680723744e-05, - "loss": 0.4087, + "epoch": 0.22, + "grad_norm": 0.3749662940774599, + "learning_rate": 1.822424312195538e-05, + "loss": 0.2538, "step": 4721 }, { - "epoch": 0.27, - "grad_norm": 0.35474730728888954, - "learning_rate": 1.7099302214975545e-05, - "loss": 0.3523, + "epoch": 0.22, + "grad_norm": 0.33772245914777116, + "learning_rate": 1.8223396591543844e-05, + "loss": 0.2542, "step": 4722 }, { - "epoch": 0.27, - "grad_norm": 0.35652743317062197, - "learning_rate": 1.7097991503378812e-05, - "loss": 0.2756, + "epoch": 0.22, + "grad_norm": 0.4214906124449183, + "learning_rate": 1.8222549879074236e-05, + "loss": 0.2612, "step": 4723 }, { - "epoch": 0.27, - "grad_norm": 0.17111219321848872, - "learning_rate": 1.7096680545978946e-05, - "loss": 0.0714, + "epoch": 0.22, + "grad_norm": 0.9992692074300207, + "learning_rate": 1.82217029845653e-05, + "loss": 0.3815, "step": 4724 }, { - "epoch": 0.27, - "grad_norm": 0.7528476749497117, - "learning_rate": 1.709536934282133e-05, - "loss": 0.4266, + "epoch": 0.22, + "grad_norm": 0.5233078416390026, + "learning_rate": 1.8220855908035783e-05, + "loss": 0.3449, "step": 4725 }, { - "epoch": 0.27, - "grad_norm": 0.4257309461122103, - "learning_rate": 1.7094057893951385e-05, - "loss": 0.306, + "epoch": 0.22, + "grad_norm": 0.42529004586002195, + "learning_rate": 1.822000864950444e-05, + "loss": 0.2755, "step": 4726 }, { - "epoch": 0.27, - "grad_norm": 0.40249958959659726, - "learning_rate": 1.709274619941452e-05, - "loss": 0.3084, + "epoch": 0.22, + "grad_norm": 0.4325188832858573, + "learning_rate": 1.8219161208990028e-05, + "loss": 0.2746, "step": 4727 }, { - "epoch": 0.27, - "grad_norm": 0.6821814944692812, - "learning_rate": 1.7091434259256155e-05, - "loss": 0.4543, + "epoch": 0.22, + "grad_norm": 0.388877865397616, + "learning_rate": 1.8218313586511312e-05, + "loss": 0.2549, "step": 4728 }, { - "epoch": 0.27, - "grad_norm": 0.4068026712533653, - "learning_rate": 1.7090122073521726e-05, - "loss": 0.2961, + "epoch": 0.22, + "grad_norm": 0.613589652692113, + "learning_rate": 1.821746578208705e-05, + "loss": 0.28, "step": 4729 }, { - "epoch": 0.27, - "grad_norm": 0.30780879761690466, - "learning_rate": 1.7088809642256677e-05, - "loss": 0.2363, + "epoch": 0.22, + "grad_norm": 0.4312928598732534, + "learning_rate": 1.8216617795736016e-05, + "loss": 0.3248, "step": 4730 }, { - "epoch": 0.27, - "grad_norm": 0.4450334908612994, - "learning_rate": 1.7087496965506457e-05, - "loss": 0.2771, + "epoch": 0.22, + "grad_norm": 0.415490290999044, + "learning_rate": 1.8215769627476984e-05, + "loss": 0.3123, "step": 4731 }, { - "epoch": 0.27, - "grad_norm": 0.3757471843854514, - "learning_rate": 1.7086184043316518e-05, - "loss": 0.2816, + "epoch": 0.22, + "grad_norm": 1.3759486027783363, + "learning_rate": 1.821492127732873e-05, + "loss": 0.8813, "step": 4732 }, { - "epoch": 0.27, - "grad_norm": 0.718898235374059, - "learning_rate": 1.7084870875732332e-05, - "loss": 0.4816, + "epoch": 0.22, + "grad_norm": 0.2862880344142633, + "learning_rate": 1.821407274531004e-05, + "loss": 0.1198, "step": 4733 }, { - "epoch": 0.27, - "grad_norm": 0.35620390057471335, - "learning_rate": 1.708355746279937e-05, - "loss": 0.2784, + "epoch": 0.22, + "grad_norm": 0.34206664095532985, + "learning_rate": 1.821322403143969e-05, + "loss": 0.2644, "step": 4734 }, { - "epoch": 0.27, - "grad_norm": 0.3257456059274286, - "learning_rate": 1.7082243804563123e-05, - "loss": 0.2776, + "epoch": 0.22, + "grad_norm": 0.4036479430914809, + "learning_rate": 1.821237513573648e-05, + "loss": 0.371, "step": 4735 }, { - "epoch": 0.27, - "grad_norm": 0.2670452463661605, - "learning_rate": 1.7080929901069076e-05, - "loss": 0.163, + "epoch": 0.22, + "grad_norm": 0.5153622577734677, + "learning_rate": 1.82115260582192e-05, + "loss": 0.2828, "step": 4736 }, { - "epoch": 0.27, - "grad_norm": 0.6344362497021302, - "learning_rate": 1.7079615752362727e-05, - "loss": 0.3661, + "epoch": 0.22, + "grad_norm": 0.456750395413481, + "learning_rate": 1.8210676798906645e-05, + "loss": 0.364, "step": 4737 }, { - "epoch": 0.27, - "grad_norm": 0.35318412416104933, - "learning_rate": 1.707830135848959e-05, - "loss": 0.2906, + "epoch": 0.22, + "grad_norm": 0.602010975840167, + "learning_rate": 1.8209827357817624e-05, + "loss": 0.4441, "step": 4738 }, { - "epoch": 0.27, - "grad_norm": 0.415746007004765, - "learning_rate": 1.7076986719495184e-05, - "loss": 0.3621, + "epoch": 0.22, + "grad_norm": 0.34076125494475223, + "learning_rate": 1.820897773497093e-05, + "loss": 0.2133, "step": 4739 }, { - "epoch": 0.27, - "grad_norm": 0.9097433398595413, - "learning_rate": 1.7075671835425032e-05, - "loss": 0.6311, + "epoch": 0.22, + "grad_norm": 0.2971201763428285, + "learning_rate": 1.8208127930385387e-05, + "loss": 0.2218, "step": 4740 }, { - "epoch": 0.27, - "grad_norm": 0.30654017642837456, - "learning_rate": 1.7074356706324668e-05, - "loss": 0.22, + "epoch": 0.22, + "grad_norm": 0.6483415474296218, + "learning_rate": 1.82072779440798e-05, + "loss": 0.4817, "step": 4741 }, { - "epoch": 0.27, - "grad_norm": 0.2861658906239352, - "learning_rate": 1.7073041332239634e-05, - "loss": 0.1787, + "epoch": 0.22, + "grad_norm": 0.378368462747848, + "learning_rate": 1.8206427776072995e-05, + "loss": 0.2702, "step": 4742 }, { - "epoch": 0.27, - "grad_norm": 0.38805970077000235, - "learning_rate": 1.7071725713215483e-05, - "loss": 0.3573, + "epoch": 0.22, + "grad_norm": 0.5107855403346446, + "learning_rate": 1.8205577426383786e-05, + "loss": 0.3199, "step": 4743 }, { - "epoch": 0.27, - "grad_norm": 0.35965465826234444, - "learning_rate": 1.7070409849297774e-05, - "loss": 0.216, + "epoch": 0.22, + "grad_norm": 0.9841312577019259, + "learning_rate": 1.8204726895030997e-05, + "loss": 0.688, "step": 4744 }, { - "epoch": 0.27, - "grad_norm": 0.7013740357821047, - "learning_rate": 1.7069093740532083e-05, - "loss": 0.4085, + "epoch": 0.22, + "grad_norm": 0.3875498112274121, + "learning_rate": 1.8203876182033467e-05, + "loss": 0.268, "step": 4745 }, { - "epoch": 0.27, - "grad_norm": 0.3788364821404023, - "learning_rate": 1.706777738696397e-05, - "loss": 0.3364, + "epoch": 0.22, + "grad_norm": 0.3574070698500073, + "learning_rate": 1.8203025287410022e-05, + "loss": 0.2451, "step": 4746 }, { - "epoch": 0.27, - "grad_norm": 0.33608417673074076, - "learning_rate": 1.7066460788639035e-05, - "loss": 0.2014, + "epoch": 0.22, + "grad_norm": 0.43691703930614784, + "learning_rate": 1.8202174211179505e-05, + "loss": 0.3441, "step": 4747 }, { - "epoch": 0.27, - "grad_norm": 0.2977740305003241, - "learning_rate": 1.7065143945602867e-05, - "loss": 0.1881, + "epoch": 0.22, + "grad_norm": 0.35302804775028696, + "learning_rate": 1.8201322953360758e-05, + "loss": 0.1931, "step": 4748 }, { - "epoch": 0.27, - "grad_norm": 0.6092323032794157, - "learning_rate": 1.7063826857901066e-05, - "loss": 0.4919, + "epoch": 0.22, + "grad_norm": 0.4801863093009189, + "learning_rate": 1.8200471513972623e-05, + "loss": 0.271, "step": 4749 }, { - "epoch": 0.27, - "grad_norm": 0.35754157093924055, - "learning_rate": 1.7062509525579244e-05, - "loss": 0.2416, + "epoch": 0.22, + "grad_norm": 0.4660614059489682, + "learning_rate": 1.8199619893033954e-05, + "loss": 0.3589, "step": 4750 }, { - "epoch": 0.27, - "grad_norm": 0.3915972606090248, - "learning_rate": 1.7061191948683024e-05, - "loss": 0.3557, + "epoch": 0.22, + "grad_norm": 1.266493296324023, + "learning_rate": 1.8198768090563602e-05, + "loss": 0.6325, "step": 4751 }, { - "epoch": 0.27, - "grad_norm": 1.2231230334847767, - "learning_rate": 1.7059874127258028e-05, - "loss": 0.7646, + "epoch": 0.22, + "grad_norm": 0.3541500683018622, + "learning_rate": 1.8197916106580426e-05, + "loss": 0.2325, "step": 4752 }, { - "epoch": 0.27, - "grad_norm": 0.3921839884714695, - "learning_rate": 1.7058556061349894e-05, - "loss": 0.3111, + "epoch": 0.22, + "grad_norm": 0.35932638678185097, + "learning_rate": 1.819706394110329e-05, + "loss": 0.2917, "step": 4753 }, { - "epoch": 0.27, - "grad_norm": 0.2249213774799527, - "learning_rate": 1.705723775100427e-05, - "loss": 0.1823, + "epoch": 0.22, + "grad_norm": 0.4576577785609266, + "learning_rate": 1.8196211594151058e-05, + "loss": 0.3583, "step": 4754 }, { - "epoch": 0.27, - "grad_norm": 0.46286782233072077, - "learning_rate": 1.7055919196266806e-05, - "loss": 0.3736, + "epoch": 0.22, + "grad_norm": 0.39302659221463143, + "learning_rate": 1.81953590657426e-05, + "loss": 0.2084, "step": 4755 }, { - "epoch": 0.27, - "grad_norm": 0.441673470781393, - "learning_rate": 1.7054600397183162e-05, - "loss": 0.3233, + "epoch": 0.22, + "grad_norm": 1.5051807361455776, + "learning_rate": 1.8194506355896796e-05, + "loss": 0.8507, "step": 4756 }, { - "epoch": 0.27, - "grad_norm": 0.5357999621963078, - "learning_rate": 1.705328135379901e-05, - "loss": 0.2646, + "epoch": 0.22, + "grad_norm": 0.5966210692855977, + "learning_rate": 1.8193653464632513e-05, + "loss": 0.4403, "step": 4757 }, { - "epoch": 0.27, - "grad_norm": 0.4294620287968328, - "learning_rate": 1.7051962066160027e-05, - "loss": 0.3657, + "epoch": 0.22, + "grad_norm": 0.3535776414814971, + "learning_rate": 1.8192800391968643e-05, + "loss": 0.3011, "step": 4758 }, { - "epoch": 0.27, - "grad_norm": 0.36051927885640783, - "learning_rate": 1.7050642534311904e-05, - "loss": 0.2862, + "epoch": 0.22, + "grad_norm": 0.48462018141171237, + "learning_rate": 1.819194713792407e-05, + "loss": 0.3267, "step": 4759 }, { - "epoch": 0.27, - "grad_norm": 0.3010743262062142, - "learning_rate": 1.704932275830033e-05, - "loss": 0.1729, + "epoch": 0.22, + "grad_norm": 0.3017927467174878, + "learning_rate": 1.8191093702517678e-05, + "loss": 0.1626, "step": 4760 }, { - "epoch": 0.27, - "grad_norm": 0.4334278974070254, - "learning_rate": 1.704800273817101e-05, - "loss": 0.3478, + "epoch": 0.22, + "grad_norm": 0.47824627004262604, + "learning_rate": 1.8190240085768368e-05, + "loss": 0.3001, "step": 4761 }, { - "epoch": 0.27, - "grad_norm": 0.4264062843363303, - "learning_rate": 1.7046682473969664e-05, - "loss": 0.2949, + "epoch": 0.22, + "grad_norm": 0.424934072240903, + "learning_rate": 1.818938628769504e-05, + "loss": 0.2948, "step": 4762 }, { - "epoch": 0.27, - "grad_norm": 0.4010993186213262, - "learning_rate": 1.7045361965742004e-05, - "loss": 0.2649, + "epoch": 0.22, + "grad_norm": 0.684162917725333, + "learning_rate": 1.818853230831659e-05, + "loss": 0.439, "step": 4763 }, { - "epoch": 0.27, - "grad_norm": 1.2493476218292963, - "learning_rate": 1.704404121353376e-05, - "loss": 0.7307, + "epoch": 0.22, + "grad_norm": 0.41309690190185494, + "learning_rate": 1.8187678147651926e-05, + "loss": 0.2847, "step": 4764 }, { - "epoch": 0.27, - "grad_norm": 0.5374611777732092, - "learning_rate": 1.7042720217390677e-05, - "loss": 0.3441, + "epoch": 0.22, + "grad_norm": 0.39768868839878774, + "learning_rate": 1.818682380571996e-05, + "loss": 0.2361, "step": 4765 }, { - "epoch": 0.27, - "grad_norm": 0.35668982665157084, - "learning_rate": 1.7041398977358494e-05, - "loss": 0.2937, + "epoch": 0.22, + "grad_norm": 0.34224196850757, + "learning_rate": 1.8185969282539603e-05, + "loss": 0.2789, "step": 4766 }, { - "epoch": 0.27, - "grad_norm": 0.5150162023118233, - "learning_rate": 1.7040077493482964e-05, - "loss": 0.3482, + "epoch": 0.22, + "grad_norm": 0.41830297302215247, + "learning_rate": 1.818511457812978e-05, + "loss": 0.3093, "step": 4767 }, { - "epoch": 0.27, - "grad_norm": 0.3524313571951868, - "learning_rate": 1.7038755765809857e-05, - "loss": 0.2136, + "epoch": 0.22, + "grad_norm": 0.8313184852284813, + "learning_rate": 1.8184259692509407e-05, + "loss": 0.5134, "step": 4768 }, { - "epoch": 0.27, - "grad_norm": 0.3518307329267539, - "learning_rate": 1.7037433794384938e-05, - "loss": 0.2492, + "epoch": 0.22, + "grad_norm": 0.7076721907603172, + "learning_rate": 1.8183404625697414e-05, + "loss": 0.5149, "step": 4769 }, { - "epoch": 0.27, - "grad_norm": 0.37417425107718427, - "learning_rate": 1.7036111579253992e-05, - "loss": 0.2956, + "epoch": 0.22, + "grad_norm": 0.33474794273972025, + "learning_rate": 1.8182549377712728e-05, + "loss": 0.281, "step": 4770 }, { - "epoch": 0.27, - "grad_norm": 0.3943110391556108, - "learning_rate": 1.70347891204628e-05, - "loss": 0.3086, + "epoch": 0.22, + "grad_norm": 0.4826256554662767, + "learning_rate": 1.8181693948574285e-05, + "loss": 0.3736, "step": 4771 }, { - "epoch": 0.27, - "grad_norm": 0.4517027039596723, - "learning_rate": 1.7033466418057166e-05, - "loss": 0.3716, + "epoch": 0.22, + "grad_norm": 0.2199104722451651, + "learning_rate": 1.8180838338301027e-05, + "loss": 0.0981, "step": 4772 }, { - "epoch": 0.27, - "grad_norm": 0.44044704690746816, - "learning_rate": 1.7032143472082893e-05, - "loss": 0.2414, + "epoch": 0.22, + "grad_norm": 0.37015805059036166, + "learning_rate": 1.817998254691189e-05, + "loss": 0.3019, "step": 4773 }, { - "epoch": 0.27, - "grad_norm": 0.4506278934272334, - "learning_rate": 1.7030820282585795e-05, - "loss": 0.256, + "epoch": 0.22, + "grad_norm": 0.42979105796318157, + "learning_rate": 1.8179126574425823e-05, + "loss": 0.3682, "step": 4774 }, { - "epoch": 0.27, - "grad_norm": 0.39868821005548993, - "learning_rate": 1.7029496849611687e-05, - "loss": 0.2626, + "epoch": 0.22, + "grad_norm": 0.8447629241379996, + "learning_rate": 1.8178270420861777e-05, + "loss": 0.4078, "step": 4775 }, { - "epoch": 0.27, - "grad_norm": 1.3150474383898678, - "learning_rate": 1.702817317320641e-05, - "loss": 0.5298, + "epoch": 0.22, + "grad_norm": 0.39098817310102824, + "learning_rate": 1.8177414086238706e-05, + "loss": 0.305, "step": 4776 }, { - "epoch": 0.27, - "grad_norm": 0.3613155978762499, - "learning_rate": 1.70268492534158e-05, - "loss": 0.2521, + "epoch": 0.22, + "grad_norm": 1.0980167268530916, + "learning_rate": 1.817655757057557e-05, + "loss": 0.5838, "step": 4777 }, { - "epoch": 0.27, - "grad_norm": 0.460323545686999, - "learning_rate": 1.70255250902857e-05, - "loss": 0.3628, + "epoch": 0.22, + "grad_norm": 0.27983106939891933, + "learning_rate": 1.8175700873891328e-05, + "loss": 0.1875, "step": 4778 }, { - "epoch": 0.27, - "grad_norm": 0.5444376532180347, - "learning_rate": 1.702420068386197e-05, - "loss": 0.3987, + "epoch": 0.22, + "grad_norm": 0.41148485475483415, + "learning_rate": 1.817484399620495e-05, + "loss": 0.3007, "step": 4779 }, { - "epoch": 0.27, - "grad_norm": 0.26635586243885684, - "learning_rate": 1.7022876034190468e-05, - "loss": 0.0774, + "epoch": 0.22, + "grad_norm": 1.1246362723611958, + "learning_rate": 1.817398693753541e-05, + "loss": 0.6736, "step": 4780 }, { - "epoch": 0.27, - "grad_norm": 0.3994512255967334, - "learning_rate": 1.7021551141317075e-05, - "loss": 0.2613, + "epoch": 0.22, + "grad_norm": 0.5903963605238938, + "learning_rate": 1.8173129697901667e-05, + "loss": 0.3179, "step": 4781 }, { - "epoch": 0.27, - "grad_norm": 0.5373762784077355, - "learning_rate": 1.7020226005287665e-05, - "loss": 0.3608, + "epoch": 0.22, + "grad_norm": 0.3993939456526726, + "learning_rate": 1.817227227732272e-05, + "loss": 0.2907, "step": 4782 }, { - "epoch": 0.27, - "grad_norm": 0.5289576791227809, - "learning_rate": 1.701890062614813e-05, - "loss": 0.19, + "epoch": 0.22, + "grad_norm": 0.5668209823697162, + "learning_rate": 1.8171414675817534e-05, + "loss": 0.3914, "step": 4783 }, { - "epoch": 0.27, - "grad_norm": 0.40705610275602827, - "learning_rate": 1.7017575003944374e-05, - "loss": 0.3555, + "epoch": 0.22, + "grad_norm": 0.340181644577086, + "learning_rate": 1.8170556893405106e-05, + "loss": 0.1881, "step": 4784 }, { - "epoch": 0.27, - "grad_norm": 0.8059748782716618, - "learning_rate": 1.7016249138722295e-05, - "loss": 0.5116, + "epoch": 0.22, + "grad_norm": 0.3600906499156987, + "learning_rate": 1.816969893010442e-05, + "loss": 0.2015, "step": 4785 }, { - "epoch": 0.27, - "grad_norm": 0.3090383888410909, - "learning_rate": 1.7014923030527808e-05, - "loss": 0.1888, + "epoch": 0.22, + "grad_norm": 0.477724102786767, + "learning_rate": 1.816884078593448e-05, + "loss": 0.3472, "step": 4786 }, { - "epoch": 0.28, - "grad_norm": 0.3782301763328678, - "learning_rate": 1.701359667940684e-05, - "loss": 0.284, + "epoch": 0.22, + "grad_norm": 1.1388118834469854, + "learning_rate": 1.8167982460914273e-05, + "loss": 0.5269, "step": 4787 }, { - "epoch": 0.28, - "grad_norm": 1.3901516550893385, - "learning_rate": 1.7012270085405317e-05, - "loss": 0.8495, + "epoch": 0.22, + "grad_norm": 0.3906495963403317, + "learning_rate": 1.8167123955062805e-05, + "loss": 0.2225, "step": 4788 }, { - "epoch": 0.28, - "grad_norm": 0.4533944763836812, - "learning_rate": 1.7010943248569185e-05, - "loss": 0.2694, + "epoch": 0.22, + "grad_norm": 0.9933380816430054, + "learning_rate": 1.816626526839909e-05, + "loss": 0.5529, "step": 4789 }, { - "epoch": 0.28, - "grad_norm": 0.4358691412921533, - "learning_rate": 1.700961616894439e-05, - "loss": 0.3246, + "epoch": 0.22, + "grad_norm": 0.43152436127120847, + "learning_rate": 1.816540640094213e-05, + "loss": 0.3349, "step": 4790 }, { - "epoch": 0.28, - "grad_norm": 1.2521917339608342, - "learning_rate": 1.7008288846576886e-05, - "loss": 0.5989, + "epoch": 0.22, + "grad_norm": 0.29617315324453286, + "learning_rate": 1.816454735271094e-05, + "loss": 0.1816, "step": 4791 }, { - "epoch": 0.28, - "grad_norm": 0.3808308096968665, - "learning_rate": 1.7006961281512645e-05, - "loss": 0.251, + "epoch": 0.22, + "grad_norm": 0.9499800958465451, + "learning_rate": 1.8163688123724545e-05, + "loss": 0.5018, "step": 4792 }, { - "epoch": 0.28, - "grad_norm": 0.46592574712611845, - "learning_rate": 1.7005633473797632e-05, - "loss": 0.2673, + "epoch": 0.22, + "grad_norm": 0.5733666685609344, + "learning_rate": 1.8162828714001962e-05, + "loss": 0.404, "step": 4793 }, { - "epoch": 0.28, - "grad_norm": 0.3932673619699627, - "learning_rate": 1.7004305423477835e-05, - "loss": 0.3008, + "epoch": 0.22, + "grad_norm": 0.33265289355024075, + "learning_rate": 1.816196912356222e-05, + "loss": 0.218, "step": 4794 }, { - "epoch": 0.28, - "grad_norm": 0.6720359832254608, - "learning_rate": 1.700297713059924e-05, - "loss": 0.374, + "epoch": 0.22, + "grad_norm": 1.4574642507545301, + "learning_rate": 1.8161109352424344e-05, + "loss": 0.6969, "step": 4795 }, { - "epoch": 0.28, - "grad_norm": 0.38595536528371815, - "learning_rate": 1.700164859520785e-05, - "loss": 0.2642, + "epoch": 0.22, + "grad_norm": 0.5283057307505934, + "learning_rate": 1.8160249400607373e-05, + "loss": 0.3061, "step": 4796 }, { - "epoch": 0.28, - "grad_norm": 0.525812046298573, - "learning_rate": 1.7000319817349673e-05, - "loss": 0.3478, + "epoch": 0.22, + "grad_norm": 0.40521580864999374, + "learning_rate": 1.815938926813035e-05, + "loss": 0.2773, "step": 4797 }, { - "epoch": 0.28, - "grad_norm": 0.39726507148120443, - "learning_rate": 1.699899079707072e-05, - "loss": 0.3034, + "epoch": 0.22, + "grad_norm": 0.461144478827574, + "learning_rate": 1.815852895501231e-05, + "loss": 0.3084, "step": 4798 }, { - "epoch": 0.28, - "grad_norm": 0.38302101200138, - "learning_rate": 1.6997661534417015e-05, - "loss": 0.2645, + "epoch": 0.22, + "grad_norm": 0.5492646491140364, + "learning_rate": 1.8157668461272303e-05, + "loss": 0.3228, "step": 4799 }, { - "epoch": 0.28, - "grad_norm": 0.45339021595960577, - "learning_rate": 1.699633202943459e-05, - "loss": 0.3409, + "epoch": 0.22, + "grad_norm": 0.48333436529769075, + "learning_rate": 1.8156807786929378e-05, + "loss": 0.267, "step": 4800 }, { - "epoch": 0.28, - "grad_norm": 0.6208504710150762, - "learning_rate": 1.699500228216949e-05, - "loss": 0.4191, + "epoch": 0.22, + "grad_norm": 0.6457307796908567, + "learning_rate": 1.815594693200259e-05, + "loss": 0.2945, "step": 4801 }, { - "epoch": 0.28, - "grad_norm": 0.3185182893708112, - "learning_rate": 1.6993672292667766e-05, - "loss": 0.2207, + "epoch": 0.22, + "grad_norm": 0.4603110603931726, + "learning_rate": 1.8155085896510995e-05, + "loss": 0.3358, "step": 4802 }, { - "epoch": 0.28, - "grad_norm": 1.197840212134981, - "learning_rate": 1.6992342060975467e-05, - "loss": 0.6374, + "epoch": 0.22, + "grad_norm": 0.46381746713654237, + "learning_rate": 1.815422468047366e-05, + "loss": 0.2874, "step": 4803 }, { - "epoch": 0.28, - "grad_norm": 0.7354342772061968, - "learning_rate": 1.6991011587138665e-05, - "loss": 0.4787, + "epoch": 0.22, + "grad_norm": 0.6323139844144634, + "learning_rate": 1.8153363283909655e-05, + "loss": 0.3558, "step": 4804 }, { - "epoch": 0.28, - "grad_norm": 0.4419399786931295, - "learning_rate": 1.698968087120343e-05, - "loss": 0.3406, + "epoch": 0.22, + "grad_norm": 0.5685505744427842, + "learning_rate": 1.815250170683804e-05, + "loss": 0.3198, "step": 4805 }, { - "epoch": 0.28, - "grad_norm": 0.5150312372588404, - "learning_rate": 1.6988349913215848e-05, - "loss": 0.2817, + "epoch": 0.22, + "grad_norm": 0.35115838383146947, + "learning_rate": 1.8151639949277895e-05, + "loss": 0.2848, "step": 4806 }, { - "epoch": 0.28, - "grad_norm": 0.6398701761513857, - "learning_rate": 1.698701871322201e-05, - "loss": 0.3152, + "epoch": 0.22, + "grad_norm": 0.35888834037731565, + "learning_rate": 1.8150778011248298e-05, + "loss": 0.208, "step": 4807 }, { - "epoch": 0.28, - "grad_norm": 0.2889587602038658, - "learning_rate": 1.698568727126801e-05, - "loss": 0.2323, + "epoch": 0.22, + "grad_norm": 1.0459996156884617, + "learning_rate": 1.8149915892768334e-05, + "loss": 0.5732, "step": 4808 }, { - "epoch": 0.28, - "grad_norm": 0.46663731677113146, - "learning_rate": 1.6984355587399964e-05, - "loss": 0.284, + "epoch": 0.22, + "grad_norm": 0.4416051864788834, + "learning_rate": 1.8149053593857083e-05, + "loss": 0.3226, "step": 4809 }, { - "epoch": 0.28, - "grad_norm": 0.44856779416870474, - "learning_rate": 1.6983023661663987e-05, - "loss": 0.3398, + "epoch": 0.22, + "grad_norm": 0.36737304588694797, + "learning_rate": 1.8148191114533646e-05, + "loss": 0.324, "step": 4810 }, { - "epoch": 0.28, - "grad_norm": 0.5213901240015865, - "learning_rate": 1.6981691494106196e-05, - "loss": 0.3272, + "epoch": 0.22, + "grad_norm": 0.29458204704630014, + "learning_rate": 1.8147328454817107e-05, + "loss": 0.1785, "step": 4811 }, { - "epoch": 0.28, - "grad_norm": 1.055086036269047, - "learning_rate": 1.698035908477273e-05, - "loss": 0.4127, + "epoch": 0.22, + "grad_norm": 0.29219828779525503, + "learning_rate": 1.8146465614726566e-05, + "loss": 0.2278, "step": 4812 }, { - "epoch": 0.28, - "grad_norm": 0.4546873145751855, - "learning_rate": 1.697902643370973e-05, - "loss": 0.3177, + "epoch": 0.22, + "grad_norm": 0.9902808531450538, + "learning_rate": 1.814560259428113e-05, + "loss": 0.5717, "step": 4813 }, { - "epoch": 0.28, - "grad_norm": 0.4027812289256814, - "learning_rate": 1.6977693540963347e-05, - "loss": 0.3157, + "epoch": 0.22, + "grad_norm": 0.41616636445218996, + "learning_rate": 1.8144739393499905e-05, + "loss": 0.3123, "step": 4814 }, { - "epoch": 0.28, - "grad_norm": 0.2686362672041927, - "learning_rate": 1.6976360406579734e-05, - "loss": 0.1518, + "epoch": 0.22, + "grad_norm": 0.39471423690122903, + "learning_rate": 1.8143876012402e-05, + "loss": 0.3019, "step": 4815 }, { - "epoch": 0.28, - "grad_norm": 0.7597687415584575, - "learning_rate": 1.697502703060506e-05, - "loss": 0.4421, + "epoch": 0.22, + "grad_norm": 1.135870596713753, + "learning_rate": 1.8143012451006527e-05, + "loss": 0.5588, "step": 4816 }, { - "epoch": 0.28, - "grad_norm": 0.44251032893440007, - "learning_rate": 1.69736934130855e-05, - "loss": 0.2877, + "epoch": 0.22, + "grad_norm": 0.41529902598451396, + "learning_rate": 1.814214870933261e-05, + "loss": 0.2923, "step": 4817 }, { - "epoch": 0.28, - "grad_norm": 0.5865777905310624, - "learning_rate": 1.6972359554067237e-05, - "loss": 0.3693, + "epoch": 0.22, + "grad_norm": 0.3944434681196841, + "learning_rate": 1.8141284787399366e-05, + "loss": 0.2211, "step": 4818 }, { - "epoch": 0.28, - "grad_norm": 0.8136374248949291, - "learning_rate": 1.6971025453596463e-05, - "loss": 0.3066, + "epoch": 0.22, + "grad_norm": 0.35463714592759155, + "learning_rate": 1.8140420685225922e-05, + "loss": 0.2662, "step": 4819 }, { - "epoch": 0.28, - "grad_norm": 0.2770444473086494, - "learning_rate": 1.6969691111719377e-05, - "loss": 0.2053, + "epoch": 0.22, + "grad_norm": 0.7125285698447634, + "learning_rate": 1.8139556402831412e-05, + "loss": 0.4013, "step": 4820 }, { - "epoch": 0.28, - "grad_norm": 0.5373847607276323, - "learning_rate": 1.6968356528482187e-05, - "loss": 0.399, + "epoch": 0.22, + "grad_norm": 0.4991907731822161, + "learning_rate": 1.813869194023497e-05, + "loss": 0.3243, "step": 4821 }, { - "epoch": 0.28, - "grad_norm": 0.4172327112811106, - "learning_rate": 1.696702170393111e-05, - "loss": 0.225, + "epoch": 0.22, + "grad_norm": 0.41353127467976253, + "learning_rate": 1.813782729745573e-05, + "loss": 0.3383, "step": 4822 }, { - "epoch": 0.28, - "grad_norm": 0.4781253488053807, - "learning_rate": 1.6965686638112373e-05, - "loss": 0.308, + "epoch": 0.22, + "grad_norm": 1.0814407901911471, + "learning_rate": 1.8136962474512833e-05, + "loss": 0.7642, "step": 4823 }, { - "epoch": 0.28, - "grad_norm": 1.2234511064967395, - "learning_rate": 1.6964351331072205e-05, - "loss": 0.4584, + "epoch": 0.22, + "grad_norm": 0.3057561758157377, + "learning_rate": 1.8136097471425436e-05, + "loss": 0.1695, "step": 4824 }, { - "epoch": 0.28, - "grad_norm": 0.4180514861148252, - "learning_rate": 1.696301578285685e-05, - "loss": 0.2807, + "epoch": 0.22, + "grad_norm": 0.33609296104696146, + "learning_rate": 1.8135232288212677e-05, + "loss": 0.2843, "step": 4825 }, { - "epoch": 0.28, - "grad_norm": 0.2841258483758155, - "learning_rate": 1.696167999351256e-05, - "loss": 0.2448, + "epoch": 0.22, + "grad_norm": 0.5554603416211129, + "learning_rate": 1.813436692489372e-05, + "loss": 0.3961, "step": 4826 }, { - "epoch": 0.28, - "grad_norm": 0.459516630611072, - "learning_rate": 1.6960343963085587e-05, - "loss": 0.2917, + "epoch": 0.22, + "grad_norm": 0.43872979862737393, + "learning_rate": 1.813350138148772e-05, + "loss": 0.2686, "step": 4827 }, { - "epoch": 0.28, - "grad_norm": 1.0542058454902612, - "learning_rate": 1.6959007691622206e-05, - "loss": 0.5435, + "epoch": 0.22, + "grad_norm": 0.5658355848139722, + "learning_rate": 1.8132635658013837e-05, + "loss": 0.3919, "step": 4828 }, { - "epoch": 0.28, - "grad_norm": 0.3935580375131141, - "learning_rate": 1.6957671179168687e-05, - "loss": 0.2066, + "epoch": 0.22, + "grad_norm": 0.5930619443715385, + "learning_rate": 1.8131769754491237e-05, + "loss": 0.417, "step": 4829 }, { - "epoch": 0.28, - "grad_norm": 0.5260916371309337, - "learning_rate": 1.695633442577131e-05, - "loss": 0.349, + "epoch": 0.22, + "grad_norm": 0.2818847510129043, + "learning_rate": 1.8130903670939095e-05, + "loss": 0.1925, "step": 4830 }, { - "epoch": 0.28, - "grad_norm": 0.6822341191380212, - "learning_rate": 1.6954997431476376e-05, - "loss": 0.4439, + "epoch": 0.22, + "grad_norm": 0.4172377921461991, + "learning_rate": 1.813003740737658e-05, + "loss": 0.2743, "step": 4831 }, { - "epoch": 0.28, - "grad_norm": 0.34931542652822906, - "learning_rate": 1.695366019633018e-05, - "loss": 0.2223, + "epoch": 0.22, + "grad_norm": 0.8653866168812338, + "learning_rate": 1.8129170963822874e-05, + "loss": 0.4447, "step": 4832 }, { - "epoch": 0.28, - "grad_norm": 0.28818157760600444, - "learning_rate": 1.695232272037903e-05, - "loss": 0.2245, + "epoch": 0.22, + "grad_norm": 0.3443520775128014, + "learning_rate": 1.812830434029716e-05, + "loss": 0.2174, "step": 4833 }, { - "epoch": 0.28, - "grad_norm": 0.44886495313474906, - "learning_rate": 1.695098500366924e-05, - "loss": 0.3747, + "epoch": 0.22, + "grad_norm": 0.4142131987936221, + "learning_rate": 1.812743753681862e-05, + "loss": 0.3367, "step": 4834 }, { - "epoch": 0.28, - "grad_norm": 0.376489095537162, - "learning_rate": 1.694964704624714e-05, - "loss": 0.257, + "epoch": 0.22, + "grad_norm": 1.1316702084313228, + "learning_rate": 1.8126570553406443e-05, + "loss": 0.7461, "step": 4835 }, { - "epoch": 0.28, - "grad_norm": 0.6917611206680756, - "learning_rate": 1.6948308848159064e-05, - "loss": 0.5613, + "epoch": 0.22, + "grad_norm": 0.4409020778925048, + "learning_rate": 1.812570339007983e-05, + "loss": 0.3021, "step": 4836 }, { - "epoch": 0.28, - "grad_norm": 0.41105937337798043, - "learning_rate": 1.694697040945135e-05, - "loss": 0.3446, + "epoch": 0.22, + "grad_norm": 0.27536580880837785, + "learning_rate": 1.812483604685798e-05, + "loss": 0.2171, "step": 4837 }, { - "epoch": 0.28, - "grad_norm": 0.3334884198947588, - "learning_rate": 1.694563173017035e-05, - "loss": 0.225, + "epoch": 0.22, + "grad_norm": 0.5873427975437153, + "learning_rate": 1.8123968523760082e-05, + "loss": 0.3785, "step": 4838 }, { - "epoch": 0.28, - "grad_norm": 0.3046497158528754, - "learning_rate": 1.694429281036242e-05, - "loss": 0.1881, + "epoch": 0.22, + "grad_norm": 0.956826894026991, + "learning_rate": 1.8123100820805354e-05, + "loss": 0.3385, "step": 4839 }, { - "epoch": 0.28, - "grad_norm": 0.6787872188789502, - "learning_rate": 1.6942953650073926e-05, - "loss": 0.4572, + "epoch": 0.22, + "grad_norm": 0.41720032252015726, + "learning_rate": 1.812223293801301e-05, + "loss": 0.2671, "step": 4840 }, { - "epoch": 0.28, - "grad_norm": 0.3605281610226177, - "learning_rate": 1.6941614249351252e-05, - "loss": 0.3004, + "epoch": 0.22, + "grad_norm": 0.4424938434443703, + "learning_rate": 1.8121364875402246e-05, + "loss": 0.3628, "step": 4841 }, { - "epoch": 0.28, - "grad_norm": 0.4345068829504192, - "learning_rate": 1.6940274608240773e-05, - "loss": 0.2796, + "epoch": 0.22, + "grad_norm": 0.6729737982944347, + "learning_rate": 1.8120496632992298e-05, + "loss": 0.3947, "step": 4842 }, { - "epoch": 0.28, - "grad_norm": 1.0070853290732158, - "learning_rate": 1.693893472678888e-05, - "loss": 0.6626, + "epoch": 0.22, + "grad_norm": 0.29415467995743794, + "learning_rate": 1.811962821080238e-05, + "loss": 0.2641, "step": 4843 }, { - "epoch": 0.28, - "grad_norm": 0.3629286253161658, - "learning_rate": 1.693759460504198e-05, - "loss": 0.2631, + "epoch": 0.22, + "grad_norm": 0.44151005207757565, + "learning_rate": 1.8118759608851715e-05, + "loss": 0.2925, "step": 4844 }, { - "epoch": 0.28, - "grad_norm": 0.2677448034312148, - "learning_rate": 1.6936254243046472e-05, - "loss": 0.152, + "epoch": 0.22, + "grad_norm": 0.4962638776954698, + "learning_rate": 1.8117890827159543e-05, + "loss": 0.3166, "step": 4845 }, { - "epoch": 0.28, - "grad_norm": 0.6535965345558263, - "learning_rate": 1.6934913640848782e-05, - "loss": 0.3904, + "epoch": 0.22, + "grad_norm": 0.3753492923719378, + "learning_rate": 1.8117021865745088e-05, + "loss": 0.3112, "step": 4846 }, { - "epoch": 0.28, - "grad_norm": 0.3450106487612924, - "learning_rate": 1.6933572798495327e-05, - "loss": 0.2696, + "epoch": 0.22, + "grad_norm": 0.877620495284609, + "learning_rate": 1.8116152724627592e-05, + "loss": 0.4892, "step": 4847 }, { - "epoch": 0.28, - "grad_norm": 0.8901525449936295, - "learning_rate": 1.6932231716032548e-05, - "loss": 0.463, + "epoch": 0.22, + "grad_norm": 0.5600726502011535, + "learning_rate": 1.81152834038263e-05, + "loss": 0.3888, "step": 4848 }, { - "epoch": 0.28, - "grad_norm": 0.35316268141975193, - "learning_rate": 1.6930890393506882e-05, - "loss": 0.3056, + "epoch": 0.22, + "grad_norm": 0.43939279814065196, + "learning_rate": 1.8114413903360458e-05, + "loss": 0.3539, "step": 4849 }, { - "epoch": 0.28, - "grad_norm": 0.37333332234721345, - "learning_rate": 1.692954883096478e-05, - "loss": 0.314, + "epoch": 0.22, + "grad_norm": 0.253801875678131, + "learning_rate": 1.8113544223249305e-05, + "loss": 0.1997, "step": 4850 }, { - "epoch": 0.28, - "grad_norm": 0.26962638573625075, - "learning_rate": 1.6928207028452698e-05, - "loss": 0.1461, + "epoch": 0.22, + "grad_norm": 0.691455891517991, + "learning_rate": 1.811267436351211e-05, + "loss": 0.3134, "step": 4851 }, { - "epoch": 0.28, - "grad_norm": 0.7124643097673313, - "learning_rate": 1.6926864986017105e-05, - "loss": 0.4647, + "epoch": 0.22, + "grad_norm": 0.49150993290366224, + "learning_rate": 1.811180432416812e-05, + "loss": 0.3258, "step": 4852 }, { - "epoch": 0.28, - "grad_norm": 0.3938996265220362, - "learning_rate": 1.6925522703704475e-05, - "loss": 0.3013, + "epoch": 0.22, + "grad_norm": 0.48509243270785324, + "learning_rate": 1.8110934105236603e-05, + "loss": 0.2873, "step": 4853 }, { - "epoch": 0.28, - "grad_norm": 0.4226078356649217, - "learning_rate": 1.6924180181561297e-05, - "loss": 0.3757, + "epoch": 0.22, + "grad_norm": 0.8012016030099469, + "learning_rate": 1.8110063706736817e-05, + "loss": 0.359, "step": 4854 }, { - "epoch": 0.28, - "grad_norm": 0.993544391815923, - "learning_rate": 1.6922837419634052e-05, - "loss": 0.4262, + "epoch": 0.22, + "grad_norm": 0.4031519528108106, + "learning_rate": 1.8109193128688042e-05, + "loss": 0.32, "step": 4855 }, { - "epoch": 0.28, - "grad_norm": 0.37675799254882103, - "learning_rate": 1.6921494417969245e-05, - "loss": 0.2918, + "epoch": 0.22, + "grad_norm": 0.2844282692682203, + "learning_rate": 1.8108322371109547e-05, + "loss": 0.1604, "step": 4856 }, { - "epoch": 0.28, - "grad_norm": 0.3003610120862018, - "learning_rate": 1.6920151176613383e-05, - "loss": 0.2703, + "epoch": 0.22, + "grad_norm": 0.36104228062766386, + "learning_rate": 1.8107451434020605e-05, + "loss": 0.2769, "step": 4857 }, { - "epoch": 0.28, - "grad_norm": 0.5360807413545341, - "learning_rate": 1.6918807695612984e-05, - "loss": 0.2786, + "epoch": 0.22, + "grad_norm": 0.43024750928793426, + "learning_rate": 1.8106580317440507e-05, + "loss": 0.3029, "step": 4858 }, { - "epoch": 0.28, - "grad_norm": 0.37804769698458474, - "learning_rate": 1.6917463975014575e-05, - "loss": 0.2754, + "epoch": 0.22, + "grad_norm": 0.7999276773110835, + "learning_rate": 1.8105709021388534e-05, + "loss": 0.5215, "step": 4859 }, { - "epoch": 0.28, - "grad_norm": 1.1356673660352166, - "learning_rate": 1.691612001486468e-05, - "loss": 0.6455, + "epoch": 0.22, + "grad_norm": 0.537235222398753, + "learning_rate": 1.8104837545883974e-05, + "loss": 0.3447, "step": 4860 }, { - "epoch": 0.28, - "grad_norm": 0.3493864639681745, - "learning_rate": 1.6914775815209853e-05, - "loss": 0.2859, + "epoch": 0.22, + "grad_norm": 0.36127368463258286, + "learning_rate": 1.810396589094612e-05, + "loss": 0.3054, "step": 4861 }, { - "epoch": 0.28, - "grad_norm": 0.37809936495877855, - "learning_rate": 1.6913431376096633e-05, - "loss": 0.2852, + "epoch": 0.22, + "grad_norm": 0.5969677080858629, + "learning_rate": 1.8103094056594276e-05, + "loss": 0.3958, "step": 4862 }, { - "epoch": 0.28, - "grad_norm": 0.7970041802176591, - "learning_rate": 1.6912086697571584e-05, - "loss": 0.5109, + "epoch": 0.22, + "grad_norm": 0.22210384059199, + "learning_rate": 1.8102222042847735e-05, + "loss": 0.1117, "step": 4863 }, { - "epoch": 0.28, - "grad_norm": 0.2716805871139541, - "learning_rate": 1.6910741779681264e-05, - "loss": 0.2215, + "epoch": 0.22, + "grad_norm": 0.4885519340661976, + "learning_rate": 1.810134984972581e-05, + "loss": 0.3648, "step": 4864 }, { - "epoch": 0.28, - "grad_norm": 0.4244076834143325, - "learning_rate": 1.690939662247226e-05, - "loss": 0.2942, + "epoch": 0.22, + "grad_norm": 0.44305471709187827, + "learning_rate": 1.810047747724781e-05, + "loss": 0.3135, "step": 4865 }, { - "epoch": 0.28, - "grad_norm": 0.5023134654075716, - "learning_rate": 1.6908051225991146e-05, - "loss": 0.3721, + "epoch": 0.22, + "grad_norm": 1.0364214286890467, + "learning_rate": 1.8099604925433042e-05, + "loss": 0.3475, "step": 4866 }, { - "epoch": 0.28, - "grad_norm": 1.2878202571798292, - "learning_rate": 1.6906705590284517e-05, - "loss": 0.8234, + "epoch": 0.22, + "grad_norm": 0.44691047919538324, + "learning_rate": 1.8098732194300828e-05, + "loss": 0.3282, "step": 4867 }, { - "epoch": 0.28, - "grad_norm": 0.37248189574940166, - "learning_rate": 1.690535971539897e-05, - "loss": 0.1709, - "step": 4868 + "epoch": 0.22, + "grad_norm": 0.49227980493382817, + "learning_rate": 1.8097859283870488e-05, + "loss": 0.2859, + "step": 4868 }, { - "epoch": 0.28, - "grad_norm": 0.4067382861706183, - "learning_rate": 1.690401360138111e-05, - "loss": 0.3374, + "epoch": 0.22, + "grad_norm": 0.2889589439590141, + "learning_rate": 1.809698619416135e-05, + "loss": 0.2099, "step": 4869 }, { - "epoch": 0.28, - "grad_norm": 0.316105557965605, - "learning_rate": 1.6902667248277557e-05, - "loss": 0.2383, + "epoch": 0.22, + "grad_norm": 0.4693138537778226, + "learning_rate": 1.809611292519274e-05, + "loss": 0.3438, "step": 4870 }, { - "epoch": 0.28, - "grad_norm": 0.38753037141289065, - "learning_rate": 1.6901320656134935e-05, - "loss": 0.2109, + "epoch": 0.22, + "grad_norm": 1.2215041109995461, + "learning_rate": 1.8095239476983998e-05, + "loss": 0.56, "step": 4871 }, { - "epoch": 0.28, - "grad_norm": 0.9724087834349608, - "learning_rate": 1.6899973824999872e-05, - "loss": 0.7267, + "epoch": 0.22, + "grad_norm": 0.8683455170800684, + "learning_rate": 1.809436584955445e-05, + "loss": 0.5487, "step": 4872 }, { - "epoch": 0.28, - "grad_norm": 0.3812345170205305, - "learning_rate": 1.6898626754919018e-05, - "loss": 0.3212, + "epoch": 0.22, + "grad_norm": 0.3262896242519527, + "learning_rate": 1.8093492042923446e-05, + "loss": 0.2264, "step": 4873 }, { - "epoch": 0.28, - "grad_norm": 0.3219187996146285, - "learning_rate": 1.6897279445939012e-05, - "loss": 0.1968, + "epoch": 0.22, + "grad_norm": 0.3822380832388538, + "learning_rate": 1.809261805711033e-05, + "loss": 0.2843, "step": 4874 }, { - "epoch": 0.28, - "grad_norm": 0.6624262246155297, - "learning_rate": 1.6895931898106517e-05, - "loss": 0.4522, + "epoch": 0.22, + "grad_norm": 0.44776823424694767, + "learning_rate": 1.809174389213445e-05, + "loss": 0.2447, "step": 4875 }, { - "epoch": 0.28, - "grad_norm": 0.47937324578610235, - "learning_rate": 1.6894584111468196e-05, - "loss": 0.2651, + "epoch": 0.22, + "grad_norm": 0.4720823015631681, + "learning_rate": 1.8090869548015157e-05, + "loss": 0.2318, "step": 4876 }, { - "epoch": 0.28, - "grad_norm": 0.2998688492477607, - "learning_rate": 1.6893236086070722e-05, - "loss": 0.2302, + "epoch": 0.22, + "grad_norm": 0.7812423509473778, + "learning_rate": 1.808999502477181e-05, + "loss": 0.3845, "step": 4877 }, { - "epoch": 0.28, - "grad_norm": 0.4740529224435299, - "learning_rate": 1.6891887821960783e-05, - "loss": 0.299, + "epoch": 0.22, + "grad_norm": 1.254308289742614, + "learning_rate": 1.8089120322423773e-05, + "loss": 0.5601, "step": 4878 }, { - "epoch": 0.28, - "grad_norm": 0.7311170213555515, - "learning_rate": 1.689053931918506e-05, - "loss": 0.5443, + "epoch": 0.22, + "grad_norm": 0.4000921667625986, + "learning_rate": 1.8088245440990405e-05, + "loss": 0.2228, "step": 4879 }, { - "epoch": 0.28, - "grad_norm": 0.41467295139387256, - "learning_rate": 1.688919057779026e-05, - "loss": 0.3303, + "epoch": 0.22, + "grad_norm": 1.0799493334997052, + "learning_rate": 1.8087370380491083e-05, + "loss": 0.5448, "step": 4880 }, { - "epoch": 0.28, - "grad_norm": 0.3324366508784158, - "learning_rate": 1.6887841597823088e-05, - "loss": 0.2817, + "epoch": 0.22, + "grad_norm": 0.3041300654951305, + "learning_rate": 1.808649514094517e-05, + "loss": 0.219, "step": 4881 }, { - "epoch": 0.28, - "grad_norm": 0.4272022615661138, - "learning_rate": 1.6886492379330254e-05, - "loss": 0.2808, + "epoch": 0.22, + "grad_norm": 0.4716769690346684, + "learning_rate": 1.8085619722372056e-05, + "loss": 0.2504, "step": 4882 }, { - "epoch": 0.28, - "grad_norm": 0.29255807880365126, - "learning_rate": 1.6885142922358486e-05, - "loss": 0.2237, + "epoch": 0.22, + "grad_norm": 1.1829686784598237, + "learning_rate": 1.8084744124791108e-05, + "loss": 0.4653, "step": 4883 }, { - "epoch": 0.28, - "grad_norm": 0.8529213592258498, - "learning_rate": 1.6883793226954516e-05, - "loss": 0.5751, + "epoch": 0.22, + "grad_norm": 0.9325982551284647, + "learning_rate": 1.808386834822172e-05, + "loss": 0.4944, "step": 4884 }, { - "epoch": 0.28, - "grad_norm": 0.5232124412222744, - "learning_rate": 1.6882443293165083e-05, - "loss": 0.3541, + "epoch": 0.22, + "grad_norm": 0.41729995974159423, + "learning_rate": 1.808299239268328e-05, + "loss": 0.2958, "step": 4885 }, { - "epoch": 0.28, - "grad_norm": 0.513732509312485, - "learning_rate": 1.6881093121036933e-05, - "loss": 0.3429, + "epoch": 0.22, + "grad_norm": 0.5204619670380874, + "learning_rate": 1.8082116258195173e-05, + "loss": 0.2919, "step": 4886 }, { - "epoch": 0.28, - "grad_norm": 0.43596299045855724, - "learning_rate": 1.6879742710616826e-05, - "loss": 0.3057, + "epoch": 0.22, + "grad_norm": 0.3742924561090667, + "learning_rate": 1.8081239944776804e-05, + "loss": 0.2653, "step": 4887 }, { - "epoch": 0.28, - "grad_norm": 0.33911296956631076, - "learning_rate": 1.6878392061951525e-05, - "loss": 0.2385, + "epoch": 0.22, + "grad_norm": 0.45058769054038006, + "learning_rate": 1.8080363452447574e-05, + "loss": 0.3298, "step": 4888 }, { - "epoch": 0.28, - "grad_norm": 0.40006287993866735, - "learning_rate": 1.6877041175087802e-05, - "loss": 0.2909, + "epoch": 0.22, + "grad_norm": 0.79379721470678, + "learning_rate": 1.8079486781226883e-05, + "loss": 0.3491, "step": 4889 }, { - "epoch": 0.28, - "grad_norm": 0.43421895746675027, - "learning_rate": 1.6875690050072435e-05, - "loss": 0.3087, + "epoch": 0.22, + "grad_norm": 0.5342093586195866, + "learning_rate": 1.8078609931134142e-05, + "loss": 0.3344, "step": 4890 }, { - "epoch": 0.28, - "grad_norm": 0.4299156983242811, - "learning_rate": 1.687433868695222e-05, - "loss": 0.3169, + "epoch": 0.22, + "grad_norm": 0.3948632775031378, + "learning_rate": 1.8077732902188764e-05, + "loss": 0.2832, "step": 4891 }, { - "epoch": 0.28, - "grad_norm": 0.38431969838964786, - "learning_rate": 1.687298708577395e-05, - "loss": 0.3265, + "epoch": 0.22, + "grad_norm": 0.5265659296839025, + "learning_rate": 1.8076855694410163e-05, + "loss": 0.3236, "step": 4892 }, { - "epoch": 0.28, - "grad_norm": 0.4932553865657155, - "learning_rate": 1.687163524658444e-05, - "loss": 0.3143, + "epoch": 0.22, + "grad_norm": 0.3507293441149866, + "learning_rate": 1.8075978307817764e-05, + "loss": 0.2697, "step": 4893 }, { - "epoch": 0.28, - "grad_norm": 0.6671294183699192, - "learning_rate": 1.687028316943049e-05, - "loss": 0.3774, + "epoch": 0.22, + "grad_norm": 0.3975490530056394, + "learning_rate": 1.807510074243099e-05, + "loss": 0.2896, "step": 4894 }, { - "epoch": 0.28, - "grad_norm": 0.5398718612910458, - "learning_rate": 1.686893085435893e-05, - "loss": 0.2871, + "epoch": 0.22, + "grad_norm": 0.6715942483483468, + "learning_rate": 1.8074222998269268e-05, + "loss": 0.4188, "step": 4895 }, { - "epoch": 0.28, - "grad_norm": 0.5618030739271445, - "learning_rate": 1.686757830141659e-05, - "loss": 0.435, + "epoch": 0.22, + "grad_norm": 0.38869058170413096, + "learning_rate": 1.807334507535203e-05, + "loss": 0.2804, "step": 4896 }, { - "epoch": 0.28, - "grad_norm": 0.4098895772036004, - "learning_rate": 1.6866225510650312e-05, - "loss": 0.3044, + "epoch": 0.22, + "grad_norm": 0.3585395205190629, + "learning_rate": 1.807246697369871e-05, + "loss": 0.2681, "step": 4897 }, { - "epoch": 0.28, - "grad_norm": 0.3133046926958272, - "learning_rate": 1.686487248210694e-05, - "loss": 0.2384, + "epoch": 0.23, + "grad_norm": 1.3574704218987816, + "learning_rate": 1.8071588693328755e-05, + "loss": 0.6944, "step": 4898 }, { - "epoch": 0.28, - "grad_norm": 0.38071433829603696, - "learning_rate": 1.6863519215833327e-05, - "loss": 0.2787, + "epoch": 0.23, + "grad_norm": 0.9600124348572094, + "learning_rate": 1.8070710234261602e-05, + "loss": 0.3941, "step": 4899 }, { - "epoch": 0.28, - "grad_norm": 0.4427576937958323, - "learning_rate": 1.686216571187634e-05, - "loss": 0.2594, + "epoch": 0.23, + "grad_norm": 0.38566021947660506, + "learning_rate": 1.8069831596516708e-05, + "loss": 0.2965, "step": 4900 }, { - "epoch": 0.28, - "grad_norm": 0.37213118554343044, - "learning_rate": 1.6860811970282844e-05, - "loss": 0.2937, + "epoch": 0.23, + "grad_norm": 0.45817646101477905, + "learning_rate": 1.8068952780113517e-05, + "loss": 0.3524, "step": 4901 }, { - "epoch": 0.28, - "grad_norm": 0.5833133836609231, - "learning_rate": 1.685945799109973e-05, - "loss": 0.3999, + "epoch": 0.23, + "grad_norm": 0.25956103180349843, + "learning_rate": 1.806807378507149e-05, + "loss": 0.1434, "step": 4902 }, { - "epoch": 0.28, - "grad_norm": 0.6780494801302973, - "learning_rate": 1.6858103774373877e-05, - "loss": 0.5117, + "epoch": 0.23, + "grad_norm": 0.5173152303285927, + "learning_rate": 1.8067194611410088e-05, + "loss": 0.3302, "step": 4903 }, { - "epoch": 0.28, - "grad_norm": 0.24541000634477722, - "learning_rate": 1.685674932015219e-05, - "loss": 0.0744, + "epoch": 0.23, + "grad_norm": 0.6737547764909366, + "learning_rate": 1.806631525914877e-05, + "loss": 0.427, "step": 4904 }, { - "epoch": 0.28, - "grad_norm": 0.3375440873846795, - "learning_rate": 1.6855394628481565e-05, - "loss": 0.3181, + "epoch": 0.23, + "grad_norm": 0.4473395472693209, + "learning_rate": 1.8065435728307003e-05, + "loss": 0.295, "step": 4905 }, { - "epoch": 0.28, - "grad_norm": 0.8924703502539542, - "learning_rate": 1.6854039699408923e-05, - "loss": 0.5844, + "epoch": 0.23, + "grad_norm": 0.41613092614552577, + "learning_rate": 1.8064556018904267e-05, + "loss": 0.2956, "step": 4906 }, { - "epoch": 0.28, - "grad_norm": 0.4013162255792, - "learning_rate": 1.6852684532981176e-05, - "loss": 0.1698, + "epoch": 0.23, + "grad_norm": 0.8709645356162412, + "learning_rate": 1.8063676130960034e-05, + "loss": 0.5244, "step": 4907 }, { - "epoch": 0.28, - "grad_norm": 0.442763641896375, - "learning_rate": 1.685132912924526e-05, - "loss": 0.328, + "epoch": 0.23, + "grad_norm": 0.2629725713184213, + "learning_rate": 1.8062796064493778e-05, + "loss": 0.1922, "step": 4908 }, { - "epoch": 0.28, - "grad_norm": 0.48415946618162076, - "learning_rate": 1.684997348824811e-05, - "loss": 0.3534, + "epoch": 0.23, + "grad_norm": 0.40861769847708185, + "learning_rate": 1.8061915819524995e-05, + "loss": 0.2613, "step": 4909 }, { - "epoch": 0.28, - "grad_norm": 0.27868722935079016, - "learning_rate": 1.6848617610036676e-05, - "loss": 0.0971, + "epoch": 0.23, + "grad_norm": 0.8861222859783292, + "learning_rate": 1.8061035396073163e-05, + "loss": 0.5491, "step": 4910 }, { - "epoch": 0.28, - "grad_norm": 0.32980783132441815, - "learning_rate": 1.6847261494657904e-05, - "loss": 0.2478, + "epoch": 0.23, + "grad_norm": 0.7109338940194653, + "learning_rate": 1.8060154794157777e-05, + "loss": 0.4807, "step": 4911 }, { - "epoch": 0.28, - "grad_norm": 1.1178058794694627, - "learning_rate": 1.6845905142158764e-05, - "loss": 0.6226, + "epoch": 0.23, + "grad_norm": 0.39226744628715193, + "learning_rate": 1.805927401379833e-05, + "loss": 0.2437, "step": 4912 }, { - "epoch": 0.28, - "grad_norm": 0.2863922274104645, - "learning_rate": 1.6844548552586225e-05, - "loss": 0.2348, + "epoch": 0.23, + "grad_norm": 0.44135009022396576, + "learning_rate": 1.8058393055014326e-05, + "loss": 0.3806, "step": 4913 }, { - "epoch": 0.28, - "grad_norm": 0.49945881324068725, - "learning_rate": 1.684319172598726e-05, - "loss": 0.3969, + "epoch": 0.23, + "grad_norm": 0.5385783838909084, + "learning_rate": 1.805751191782527e-05, + "loss": 0.2912, "step": 4914 }, { - "epoch": 0.28, - "grad_norm": 0.7076134237344921, - "learning_rate": 1.684183466240886e-05, - "loss": 0.5169, + "epoch": 0.23, + "grad_norm": 0.26303306614178434, + "learning_rate": 1.805663060225066e-05, + "loss": 0.1805, "step": 4915 }, { - "epoch": 0.28, - "grad_norm": 0.3167088144334443, - "learning_rate": 1.684047736189802e-05, - "loss": 0.2335, + "epoch": 0.23, + "grad_norm": 0.8589848632983503, + "learning_rate": 1.8055749108310013e-05, + "loss": 0.5237, "step": 4916 }, { - "epoch": 0.28, - "grad_norm": 0.3104910368581827, - "learning_rate": 1.6839119824501747e-05, - "loss": 0.2116, + "epoch": 0.23, + "grad_norm": 0.3787048911027548, + "learning_rate": 1.805486743602285e-05, + "loss": 0.3861, "step": 4917 }, { - "epoch": 0.28, - "grad_norm": 0.9284231793627837, - "learning_rate": 1.6837762050267044e-05, - "loss": 0.567, + "epoch": 0.23, + "grad_norm": 0.3541288990070035, + "learning_rate": 1.805398558540868e-05, + "loss": 0.2237, "step": 4918 }, { - "epoch": 0.28, - "grad_norm": 0.596611993917205, - "learning_rate": 1.683640403924094e-05, - "loss": 0.3658, + "epoch": 0.23, + "grad_norm": 1.2380196928031408, + "learning_rate": 1.805310355648704e-05, + "loss": 0.7746, "step": 4919 }, { - "epoch": 0.28, - "grad_norm": 0.3944841548350714, - "learning_rate": 1.6835045791470453e-05, - "loss": 0.2971, + "epoch": 0.23, + "grad_norm": 0.39638605279729744, + "learning_rate": 1.8052221349277445e-05, + "loss": 0.3369, "step": 4920 }, { - "epoch": 0.28, - "grad_norm": 0.3928615059353819, - "learning_rate": 1.683368730700263e-05, - "loss": 0.3528, + "epoch": 0.23, + "grad_norm": 0.2894316450695463, + "learning_rate": 1.8051338963799426e-05, + "loss": 0.1417, "step": 4921 }, { - "epoch": 0.28, - "grad_norm": 0.8844377675062224, - "learning_rate": 1.6832328585884505e-05, - "loss": 0.4563, + "epoch": 0.23, + "grad_norm": 0.3990297858333398, + "learning_rate": 1.8050456400072525e-05, + "loss": 0.2783, "step": 4922 }, { - "epoch": 0.28, - "grad_norm": 0.24744733076868877, - "learning_rate": 1.6830969628163134e-05, - "loss": 0.1611, + "epoch": 0.23, + "grad_norm": 0.6648018960889627, + "learning_rate": 1.804957365811628e-05, + "loss": 0.4825, "step": 4923 }, { - "epoch": 0.28, - "grad_norm": 0.37799077238329093, - "learning_rate": 1.6829610433885583e-05, - "loss": 0.308, + "epoch": 0.23, + "grad_norm": 0.6606045529675441, + "learning_rate": 1.8048690737950233e-05, + "loss": 0.3645, "step": 4924 }, { - "epoch": 0.28, - "grad_norm": 0.7391018917542396, - "learning_rate": 1.6828251003098913e-05, - "loss": 0.448, + "epoch": 0.23, + "grad_norm": 0.3300698750007411, + "learning_rate": 1.8047807639593927e-05, + "loss": 0.2723, "step": 4925 }, { - "epoch": 0.28, - "grad_norm": 0.4267263829375562, - "learning_rate": 1.6826891335850205e-05, - "loss": 0.2828, + "epoch": 0.23, + "grad_norm": 0.31946665622115394, + "learning_rate": 1.8046924363066918e-05, + "loss": 0.2153, "step": 4926 }, { - "epoch": 0.28, - "grad_norm": 0.9901949114209159, - "learning_rate": 1.6825531432186545e-05, - "loss": 0.6693, + "epoch": 0.23, + "grad_norm": 0.4527209283985424, + "learning_rate": 1.804604090838876e-05, + "loss": 0.2529, "step": 4927 }, { - "epoch": 0.28, - "grad_norm": 0.4338305088608627, - "learning_rate": 1.682417129215502e-05, - "loss": 0.3244, + "epoch": 0.23, + "grad_norm": 0.4387573845847885, + "learning_rate": 1.8045157275579012e-05, + "loss": 0.3032, "step": 4928 }, { - "epoch": 0.28, - "grad_norm": 0.3122226230150543, - "learning_rate": 1.682281091580274e-05, - "loss": 0.2397, + "epoch": 0.23, + "grad_norm": 0.46947349892964557, + "learning_rate": 1.8044273464657235e-05, + "loss": 0.3716, "step": 4929 }, { - "epoch": 0.28, - "grad_norm": 0.2536689856422566, - "learning_rate": 1.682145030317681e-05, - "loss": 0.1118, + "epoch": 0.23, + "grad_norm": 0.406328131143055, + "learning_rate": 1.8043389475642994e-05, + "loss": 0.2661, "step": 4930 }, { - "epoch": 0.28, - "grad_norm": 1.2644511808907366, - "learning_rate": 1.6820089454324355e-05, - "loss": 0.4325, + "epoch": 0.23, + "grad_norm": 0.44643669372371564, + "learning_rate": 1.8042505308555863e-05, + "loss": 0.3231, "step": 4931 }, { - "epoch": 0.28, - "grad_norm": 0.3830284678075912, - "learning_rate": 1.681872836929249e-05, - "loss": 0.297, + "epoch": 0.23, + "grad_norm": 0.39343014583605024, + "learning_rate": 1.8041620963415418e-05, + "loss": 0.309, "step": 4932 }, { - "epoch": 0.28, - "grad_norm": 0.44403371374329265, - "learning_rate": 1.6817367048128357e-05, - "loss": 0.2896, + "epoch": 0.23, + "grad_norm": 0.3402967313795037, + "learning_rate": 1.8040736440241237e-05, + "loss": 0.2334, "step": 4933 }, { - "epoch": 0.28, - "grad_norm": 0.6483549831795611, - "learning_rate": 1.6816005490879096e-05, - "loss": 0.3426, + "epoch": 0.23, + "grad_norm": 0.4788876682630459, + "learning_rate": 1.8039851739052898e-05, + "loss": 0.3227, "step": 4934 }, { - "epoch": 0.28, - "grad_norm": 0.32440225504314685, - "learning_rate": 1.6814643697591857e-05, - "loss": 0.2452, + "epoch": 0.23, + "grad_norm": 0.4201025591158397, + "learning_rate": 1.803896685986999e-05, + "loss": 0.2668, "step": 4935 }, { - "epoch": 0.28, - "grad_norm": 0.39596665611969095, - "learning_rate": 1.68132816683138e-05, - "loss": 0.2387, + "epoch": 0.23, + "grad_norm": 0.39850208536829457, + "learning_rate": 1.8038081802712105e-05, + "loss": 0.2653, "step": 4936 }, { - "epoch": 0.28, - "grad_norm": 0.9595030167316971, - "learning_rate": 1.6811919403092093e-05, - "loss": 0.3894, + "epoch": 0.23, + "grad_norm": 0.4861331027459766, + "learning_rate": 1.8037196567598838e-05, + "loss": 0.3719, "step": 4937 }, { - "epoch": 0.28, - "grad_norm": 0.4468030719392794, - "learning_rate": 1.6810556901973907e-05, - "loss": 0.3355, + "epoch": 0.23, + "grad_norm": 0.42997859984442105, + "learning_rate": 1.8036311154549783e-05, + "loss": 0.1324, "step": 4938 }, { - "epoch": 0.28, - "grad_norm": 0.8717707766987532, - "learning_rate": 1.680919416500643e-05, - "loss": 0.5147, + "epoch": 0.23, + "grad_norm": 0.407048275873624, + "learning_rate": 1.8035425563584547e-05, + "loss": 0.3062, "step": 4939 }, { - "epoch": 0.28, - "grad_norm": 0.38028104901258003, - "learning_rate": 1.6807831192236855e-05, - "loss": 0.3398, + "epoch": 0.23, + "grad_norm": 0.48247985877054883, + "learning_rate": 1.803453979472273e-05, + "loss": 0.4209, "step": 4940 }, { - "epoch": 0.28, - "grad_norm": 0.3862054255947419, - "learning_rate": 1.6806467983712375e-05, - "loss": 0.2686, + "epoch": 0.23, + "grad_norm": 0.30509944324770105, + "learning_rate": 1.803365384798395e-05, + "loss": 0.2335, "step": 4941 }, { - "epoch": 0.28, - "grad_norm": 0.40855243616372805, - "learning_rate": 1.68051045394802e-05, - "loss": 0.2499, + "epoch": 0.23, + "grad_norm": 0.6312220628466788, + "learning_rate": 1.8032767723387815e-05, + "loss": 0.3115, "step": 4942 }, { - "epoch": 0.28, - "grad_norm": 0.7503261455917112, - "learning_rate": 1.680374085958755e-05, - "loss": 0.3199, + "epoch": 0.23, + "grad_norm": 0.44658657975447447, + "learning_rate": 1.8031881420953942e-05, + "loss": 0.3383, "step": 4943 }, { - "epoch": 0.28, - "grad_norm": 0.47228792715288803, - "learning_rate": 1.680237694408165e-05, - "loss": 0.3086, + "epoch": 0.23, + "grad_norm": 0.3459392819530411, + "learning_rate": 1.8030994940701955e-05, + "loss": 0.2833, "step": 4944 }, { - "epoch": 0.28, - "grad_norm": 0.5675653321231017, - "learning_rate": 1.6801012793009725e-05, - "loss": 0.4409, + "epoch": 0.23, + "grad_norm": 0.5265033838660462, + "learning_rate": 1.8030108282651487e-05, + "loss": 0.34, "step": 4945 }, { - "epoch": 0.28, - "grad_norm": 0.657462471357182, - "learning_rate": 1.6799648406419018e-05, - "loss": 0.2353, + "epoch": 0.23, + "grad_norm": 0.40533611255100954, + "learning_rate": 1.8029221446822152e-05, + "loss": 0.3568, "step": 4946 }, { - "epoch": 0.28, - "grad_norm": 0.2388900030606124, - "learning_rate": 1.679828378435678e-05, - "loss": 0.1982, + "epoch": 0.23, + "grad_norm": 0.24979892636233852, + "learning_rate": 1.80283344332336e-05, + "loss": 0.0972, "step": 4947 }, { - "epoch": 0.28, - "grad_norm": 0.4841571640687985, - "learning_rate": 1.6796918926870266e-05, - "loss": 0.3666, + "epoch": 0.23, + "grad_norm": 0.3683004679796032, + "learning_rate": 1.802744724190546e-05, + "loss": 0.2888, "step": 4948 }, { - "epoch": 0.28, - "grad_norm": 0.5190146104941877, - "learning_rate": 1.6795553834006746e-05, - "loss": 0.2917, + "epoch": 0.23, + "grad_norm": 0.4099562667436985, + "learning_rate": 1.8026559872857375e-05, + "loss": 0.3406, "step": 4949 }, { - "epoch": 0.28, - "grad_norm": 0.44852090196045463, - "learning_rate": 1.6794188505813486e-05, - "loss": 0.3222, + "epoch": 0.23, + "grad_norm": 0.9648779560767419, + "learning_rate": 1.802567232610899e-05, + "loss": 0.6488, "step": 4950 }, { - "epoch": 0.28, - "grad_norm": 0.985136552799657, - "learning_rate": 1.679282294233777e-05, - "loss": 0.622, + "epoch": 0.23, + "grad_norm": 0.3890559478680819, + "learning_rate": 1.8024784601679954e-05, + "loss": 0.2808, "step": 4951 }, { - "epoch": 0.28, - "grad_norm": 0.32555809213851533, - "learning_rate": 1.6791457143626887e-05, - "loss": 0.2532, + "epoch": 0.23, + "grad_norm": 0.4134675794820532, + "learning_rate": 1.8023896699589923e-05, + "loss": 0.3246, "step": 4952 }, { - "epoch": 0.28, - "grad_norm": 0.3561321537682771, - "learning_rate": 1.679009110972814e-05, - "loss": 0.2804, + "epoch": 0.23, + "grad_norm": 0.29829236180178204, + "learning_rate": 1.802300861985855e-05, + "loss": 0.1903, "step": 4953 }, { - "epoch": 0.28, - "grad_norm": 0.6560341934071064, - "learning_rate": 1.6788724840688823e-05, - "loss": 0.4473, + "epoch": 0.23, + "grad_norm": 0.38427470698638655, + "learning_rate": 1.8022120362505503e-05, + "loss": 0.2159, "step": 4954 }, { - "epoch": 0.28, - "grad_norm": 0.31007613366907594, - "learning_rate": 1.678735833655626e-05, - "loss": 0.1873, + "epoch": 0.23, + "grad_norm": 0.5770495694081988, + "learning_rate": 1.802123192755044e-05, + "loss": 0.3754, "step": 4955 }, { - "epoch": 0.28, - "grad_norm": 0.326241133662786, - "learning_rate": 1.6785991597377772e-05, - "loss": 0.243, + "epoch": 0.23, + "grad_norm": 0.38959411915089764, + "learning_rate": 1.8020343315013032e-05, + "loss": 0.3709, "step": 4956 }, { - "epoch": 0.28, - "grad_norm": 0.5177649120868529, - "learning_rate": 1.6784624623200684e-05, - "loss": 0.4381, + "epoch": 0.23, + "grad_norm": 0.5901815185721849, + "learning_rate": 1.8019454524912955e-05, + "loss": 0.2963, "step": 4957 }, { - "epoch": 0.28, - "grad_norm": 1.240446473986299, - "learning_rate": 1.6783257414072336e-05, - "loss": 0.6064, + "epoch": 0.23, + "grad_norm": 0.4660061246013941, + "learning_rate": 1.8018565557269886e-05, + "loss": 0.3181, "step": 4958 }, { - "epoch": 0.28, - "grad_norm": 0.33109360065083926, - "learning_rate": 1.678188997004008e-05, - "loss": 0.2334, + "epoch": 0.23, + "grad_norm": 0.3275253732285472, + "learning_rate": 1.80176764121035e-05, + "loss": 0.1886, "step": 4959 }, { - "epoch": 0.28, - "grad_norm": 0.39985346698702146, - "learning_rate": 1.678052229115126e-05, - "loss": 0.3504, + "epoch": 0.23, + "grad_norm": 0.4060322478994801, + "learning_rate": 1.801678708943349e-05, + "loss": 0.3159, "step": 4960 }, { - "epoch": 0.29, - "grad_norm": 0.5592780579901474, - "learning_rate": 1.6779154377453248e-05, - "loss": 0.1822, + "epoch": 0.23, + "grad_norm": 0.4268258902171265, + "learning_rate": 1.8015897589279537e-05, + "loss": 0.2552, "step": 4961 }, { - "epoch": 0.29, - "grad_norm": 0.3496638175036046, - "learning_rate": 1.677778622899341e-05, - "loss": 0.2214, + "epoch": 0.23, + "grad_norm": 1.0115930066360852, + "learning_rate": 1.8015007911661336e-05, + "loss": 0.457, "step": 4962 }, { - "epoch": 0.29, - "grad_norm": 1.0124778899205107, - "learning_rate": 1.677641784581913e-05, - "loss": 0.6795, + "epoch": 0.23, + "grad_norm": 0.7788025618758897, + "learning_rate": 1.801411805659859e-05, + "loss": 0.4154, "step": 4963 }, { - "epoch": 0.29, - "grad_norm": 0.41286864504725357, - "learning_rate": 1.6775049227977788e-05, - "loss": 0.3245, + "epoch": 0.23, + "grad_norm": 0.34469493728536554, + "learning_rate": 1.8013228024110988e-05, + "loss": 0.2606, "step": 4964 }, { - "epoch": 0.29, - "grad_norm": 0.37057078830092144, - "learning_rate": 1.6773680375516784e-05, - "loss": 0.2586, + "epoch": 0.23, + "grad_norm": 0.48842413949931923, + "learning_rate": 1.8012337814218245e-05, + "loss": 0.2914, "step": 4965 }, { - "epoch": 0.29, - "grad_norm": 0.6333735841850971, - "learning_rate": 1.6772311288483517e-05, - "loss": 0.4853, + "epoch": 0.23, + "grad_norm": 0.39885164678707197, + "learning_rate": 1.8011447426940066e-05, + "loss": 0.2662, "step": 4966 }, { - "epoch": 0.29, - "grad_norm": 0.4697963107259023, - "learning_rate": 1.6770941966925404e-05, - "loss": 0.2266, + "epoch": 0.23, + "grad_norm": 0.6686094714158469, + "learning_rate": 1.801055686229616e-05, + "loss": 0.2824, "step": 4967 }, { - "epoch": 0.29, - "grad_norm": 0.33424175052387356, - "learning_rate": 1.676957241088986e-05, - "loss": 0.2609, + "epoch": 0.23, + "grad_norm": 0.49380467875691375, + "learning_rate": 1.800966612030625e-05, + "loss": 0.3421, "step": 4968 }, { - "epoch": 0.29, - "grad_norm": 0.485965282810177, - "learning_rate": 1.6768202620424317e-05, - "loss": 0.2656, + "epoch": 0.23, + "grad_norm": 0.646814246183766, + "learning_rate": 1.8008775200990047e-05, + "loss": 0.3629, "step": 4969 }, { - "epoch": 0.29, - "grad_norm": 0.6859894051180101, - "learning_rate": 1.6766832595576205e-05, - "loss": 0.4445, + "epoch": 0.23, + "grad_norm": 0.4532885049029625, + "learning_rate": 1.800788410436728e-05, + "loss": 0.2409, "step": 4970 }, { - "epoch": 0.29, - "grad_norm": 0.41946713395899093, - "learning_rate": 1.6765462336392975e-05, - "loss": 0.2915, + "epoch": 0.23, + "grad_norm": 0.3429094182688301, + "learning_rate": 1.800699283045768e-05, + "loss": 0.2069, "step": 4971 }, { - "epoch": 0.29, - "grad_norm": 0.29292304755912535, - "learning_rate": 1.6764091842922076e-05, - "loss": 0.2806, + "epoch": 0.23, + "grad_norm": 0.42160877847068445, + "learning_rate": 1.8006101379280974e-05, + "loss": 0.2763, "step": 4972 }, { - "epoch": 0.29, - "grad_norm": 0.41222303207216854, - "learning_rate": 1.6762721115210964e-05, - "loss": 0.3055, + "epoch": 0.23, + "grad_norm": 0.524193873200823, + "learning_rate": 1.80052097508569e-05, + "loss": 0.3373, "step": 4973 }, { - "epoch": 0.29, - "grad_norm": 0.420655109877888, - "learning_rate": 1.676135015330711e-05, - "loss": 0.308, + "epoch": 0.23, + "grad_norm": 1.2884676025108464, + "learning_rate": 1.8004317945205197e-05, + "loss": 0.4232, "step": 4974 }, { - "epoch": 0.29, - "grad_norm": 0.29319502490689703, - "learning_rate": 1.675997895725799e-05, - "loss": 0.1811, + "epoch": 0.23, + "grad_norm": 0.6585775472921765, + "learning_rate": 1.800342596234561e-05, + "loss": 0.3259, "step": 4975 }, { - "epoch": 0.29, - "grad_norm": 0.4219624190782678, - "learning_rate": 1.675860752711109e-05, - "loss": 0.33, + "epoch": 0.23, + "grad_norm": 0.39055048734411835, + "learning_rate": 1.8002533802297888e-05, + "loss": 0.3014, "step": 4976 }, { - "epoch": 0.29, - "grad_norm": 0.3887698623250924, - "learning_rate": 1.675723586291391e-05, - "loss": 0.2888, + "epoch": 0.23, + "grad_norm": 0.30039473285011875, + "learning_rate": 1.800164146508178e-05, + "loss": 0.1944, "step": 4977 }, { - "epoch": 0.29, - "grad_norm": 0.6488987519940098, - "learning_rate": 1.6755863964713934e-05, - "loss": 0.3712, + "epoch": 0.23, + "grad_norm": 1.244373866114887, + "learning_rate": 1.800074895071704e-05, + "loss": 0.5682, "step": 4978 }, { - "epoch": 0.29, - "grad_norm": 0.42002949206495394, - "learning_rate": 1.675449183255868e-05, - "loss": 0.3128, + "epoch": 0.23, + "grad_norm": 0.4742673702158325, + "learning_rate": 1.799985625922343e-05, + "loss": 0.3086, "step": 4979 }, { - "epoch": 0.29, - "grad_norm": 0.33873593445127587, - "learning_rate": 1.6753119466495667e-05, - "loss": 0.2737, + "epoch": 0.23, + "grad_norm": 0.4236816339236766, + "learning_rate": 1.7998963390620713e-05, + "loss": 0.2567, "step": 4980 }, { - "epoch": 0.29, - "grad_norm": 0.334940371790023, - "learning_rate": 1.6751746866572415e-05, - "loss": 0.2823, + "epoch": 0.23, + "grad_norm": 0.6553713744692531, + "learning_rate": 1.7998070344928656e-05, + "loss": 0.3879, "step": 4981 }, { - "epoch": 0.29, - "grad_norm": 0.9608719470015646, - "learning_rate": 1.6750374032836462e-05, - "loss": 0.3832, + "epoch": 0.23, + "grad_norm": 0.4096341727193085, + "learning_rate": 1.799717712216703e-05, + "loss": 0.2876, "step": 4982 }, { - "epoch": 0.29, - "grad_norm": 0.33594948667433394, - "learning_rate": 1.6749000965335344e-05, - "loss": 0.2858, + "epoch": 0.23, + "grad_norm": 0.8082566017102061, + "learning_rate": 1.7996283722355612e-05, + "loss": 0.2622, "step": 4983 }, { - "epoch": 0.29, - "grad_norm": 0.3677272022726247, - "learning_rate": 1.6747627664116615e-05, - "loss": 0.3131, + "epoch": 0.23, + "grad_norm": 0.4010869658506476, + "learning_rate": 1.799539014551418e-05, + "loss": 0.3233, "step": 4984 }, { - "epoch": 0.29, - "grad_norm": 0.8046638072008366, - "learning_rate": 1.674625412922783e-05, - "loss": 0.4111, + "epoch": 0.23, + "grad_norm": 0.5162993492683894, + "learning_rate": 1.7994496391662513e-05, + "loss": 0.3247, "step": 4985 }, { - "epoch": 0.29, - "grad_norm": 0.32686647655558276, - "learning_rate": 1.6744880360716555e-05, - "loss": 0.2732, + "epoch": 0.23, + "grad_norm": 1.1079533540418687, + "learning_rate": 1.7993602460820403e-05, + "loss": 0.5842, "step": 4986 }, { - "epoch": 0.29, - "grad_norm": 0.41757090613796255, - "learning_rate": 1.6743506358630358e-05, - "loss": 0.2837, + "epoch": 0.23, + "grad_norm": 0.2556972329154646, + "learning_rate": 1.799270835300764e-05, + "loss": 0.1688, "step": 4987 }, { - "epoch": 0.29, - "grad_norm": 0.35935865239680986, - "learning_rate": 1.674213212301683e-05, - "loss": 0.268, + "epoch": 0.23, + "grad_norm": 0.34010955007100796, + "learning_rate": 1.7991814068244012e-05, + "loss": 0.2719, "step": 4988 }, { - "epoch": 0.29, - "grad_norm": 0.4316263565610041, - "learning_rate": 1.6740757653923557e-05, - "loss": 0.2988, + "epoch": 0.23, + "grad_norm": 0.943480844671025, + "learning_rate": 1.799091960654933e-05, + "loss": 0.6952, "step": 4989 }, { - "epoch": 0.29, - "grad_norm": 0.5887593298772523, - "learning_rate": 1.673938295139814e-05, - "loss": 0.3779, + "epoch": 0.23, + "grad_norm": 0.48934264694276663, + "learning_rate": 1.7990024967943385e-05, + "loss": 0.3014, "step": 4990 }, { - "epoch": 0.29, - "grad_norm": 0.5006344489216323, - "learning_rate": 1.6738008015488174e-05, - "loss": 0.3939, + "epoch": 0.23, + "grad_norm": 0.44444903972173233, + "learning_rate": 1.7989130152445987e-05, + "loss": 0.3504, "step": 4991 }, { - "epoch": 0.29, - "grad_norm": 0.32402544400838923, - "learning_rate": 1.6736632846241282e-05, - "loss": 0.2105, + "epoch": 0.23, + "grad_norm": 0.44279254846404587, + "learning_rate": 1.7988235160076953e-05, + "loss": 0.3492, "step": 4992 }, { - "epoch": 0.29, - "grad_norm": 0.3172635891197995, - "learning_rate": 1.6735257443705085e-05, - "loss": 0.2832, + "epoch": 0.23, + "grad_norm": 0.17618268762304568, + "learning_rate": 1.798733999085609e-05, + "loss": 0.0747, "step": 4993 }, { - "epoch": 0.29, - "grad_norm": 0.3766031153954571, - "learning_rate": 1.6733881807927212e-05, - "loss": 0.3244, + "epoch": 0.23, + "grad_norm": 0.4886679567575999, + "learning_rate": 1.7986444644803213e-05, + "loss": 0.3231, "step": 4994 }, { - "epoch": 0.29, - "grad_norm": 0.33773156689449546, - "learning_rate": 1.67325059389553e-05, - "loss": 0.1948, + "epoch": 0.23, + "grad_norm": 0.7919471482729082, + "learning_rate": 1.7985549121938155e-05, + "loss": 0.5406, "step": 4995 }, { - "epoch": 0.29, - "grad_norm": 0.4008554402803368, - "learning_rate": 1.6731129836837e-05, - "loss": 0.3248, + "epoch": 0.23, + "grad_norm": 0.3769936575632186, + "learning_rate": 1.7984653422280733e-05, + "loss": 0.3069, "step": 4996 }, { - "epoch": 0.29, - "grad_norm": 1.4279087807348623, - "learning_rate": 1.672975350161996e-05, - "loss": 0.7799, + "epoch": 0.23, + "grad_norm": 0.34074282864790595, + "learning_rate": 1.798375754585078e-05, + "loss": 0.279, "step": 4997 }, { - "epoch": 0.29, - "grad_norm": 0.32856664750277276, - "learning_rate": 1.672837693335185e-05, - "loss": 0.2259, + "epoch": 0.23, + "grad_norm": 0.4259196613007591, + "learning_rate": 1.7982861492668132e-05, + "loss": 0.2927, "step": 4998 }, { - "epoch": 0.29, - "grad_norm": 0.3994700065203357, - "learning_rate": 1.6727000132080332e-05, - "loss": 0.3668, + "epoch": 0.23, + "grad_norm": 0.44693566533597756, + "learning_rate": 1.7981965262752625e-05, + "loss": 0.2695, "step": 4999 }, { - "epoch": 0.29, - "grad_norm": 0.4195276798735785, - "learning_rate": 1.6725623097853092e-05, - "loss": 0.3758, + "epoch": 0.23, + "grad_norm": 0.34224274782385045, + "learning_rate": 1.7981068856124096e-05, + "loss": 0.2429, "step": 5000 }, { - "epoch": 0.29, - "grad_norm": 0.19978298158962446, - "learning_rate": 1.672424583071781e-05, - "loss": 0.1467, + "epoch": 0.23, + "grad_norm": 0.9267544651736447, + "learning_rate": 1.7980172272802398e-05, + "loss": 0.4539, "step": 5001 }, { - "epoch": 0.29, - "grad_norm": 1.198150816066066, - "learning_rate": 1.672286833072219e-05, - "loss": 0.6234, + "epoch": 0.23, + "grad_norm": 0.7859811498862915, + "learning_rate": 1.7979275512807374e-05, + "loss": 0.4729, "step": 5002 }, { - "epoch": 0.29, - "grad_norm": 0.45414769727547355, - "learning_rate": 1.6721490597913922e-05, - "loss": 0.3633, + "epoch": 0.23, + "grad_norm": 0.36038858364960336, + "learning_rate": 1.7978378576158885e-05, + "loss": 0.2341, "step": 5003 }, { - "epoch": 0.29, - "grad_norm": 0.38110366694566206, - "learning_rate": 1.672011263234073e-05, - "loss": 0.2942, + "epoch": 0.23, + "grad_norm": 0.39934532417285895, + "learning_rate": 1.797748146287678e-05, + "loss": 0.3336, "step": 5004 }, { - "epoch": 0.29, - "grad_norm": 0.43945156180002193, - "learning_rate": 1.671873443405032e-05, - "loss": 0.3225, + "epoch": 0.23, + "grad_norm": 0.2939776414112071, + "learning_rate": 1.7976584172980926e-05, + "loss": 0.1602, "step": 5005 }, { - "epoch": 0.29, - "grad_norm": 0.44775285754769784, - "learning_rate": 1.671735600309043e-05, - "loss": 0.3277, + "epoch": 0.23, + "grad_norm": 0.3562785766822121, + "learning_rate": 1.7975686706491187e-05, + "loss": 0.2057, "step": 5006 }, { - "epoch": 0.29, - "grad_norm": 0.31152176309096835, - "learning_rate": 1.6715977339508787e-05, - "loss": 0.2134, + "epoch": 0.23, + "grad_norm": 0.9798550143327551, + "learning_rate": 1.7974789063427432e-05, + "loss": 0.5223, "step": 5007 }, { - "epoch": 0.29, - "grad_norm": 0.335916270130865, - "learning_rate": 1.6714598443353137e-05, - "loss": 0.2523, + "epoch": 0.23, + "grad_norm": 0.43043121135174156, + "learning_rate": 1.7973891243809533e-05, + "loss": 0.3443, "step": 5008 }, { - "epoch": 0.29, - "grad_norm": 1.2214895870371527, - "learning_rate": 1.6713219314671235e-05, - "loss": 0.7403, + "epoch": 0.23, + "grad_norm": 0.3453947204773186, + "learning_rate": 1.7972993247657363e-05, + "loss": 0.2025, "step": 5009 }, { - "epoch": 0.29, - "grad_norm": 0.5000389590232583, - "learning_rate": 1.6711839953510835e-05, - "loss": 0.3313, + "epoch": 0.23, + "grad_norm": 0.9909072959377098, + "learning_rate": 1.797209507499081e-05, + "loss": 0.7143, "step": 5010 }, { - "epoch": 0.29, - "grad_norm": 0.371776258828232, - "learning_rate": 1.6710460359919708e-05, - "loss": 0.2937, + "epoch": 0.23, + "grad_norm": 0.26906878326357414, + "learning_rate": 1.7971196725829757e-05, + "loss": 0.2091, "step": 5011 }, { - "epoch": 0.29, - "grad_norm": 0.3811309362080807, - "learning_rate": 1.6709080533945624e-05, - "loss": 0.3059, + "epoch": 0.23, + "grad_norm": 0.4355806063082319, + "learning_rate": 1.7970298200194093e-05, + "loss": 0.3076, "step": 5012 }, { - "epoch": 0.29, - "grad_norm": 0.3476381176969048, - "learning_rate": 1.6707700475636372e-05, - "loss": 0.1721, + "epoch": 0.23, + "grad_norm": 0.6499085881449501, + "learning_rate": 1.7969399498103703e-05, + "loss": 0.3331, "step": 5013 }, { - "epoch": 0.29, - "grad_norm": 0.41573768957075496, - "learning_rate": 1.6706320185039742e-05, - "loss": 0.2663, + "epoch": 0.23, + "grad_norm": 0.966102950643204, + "learning_rate": 1.796850061957849e-05, + "loss": 0.5413, "step": 5014 }, { - "epoch": 0.29, - "grad_norm": 0.5511130497075707, - "learning_rate": 1.670493966220353e-05, - "loss": 0.3951, + "epoch": 0.23, + "grad_norm": 0.38373489288486073, + "learning_rate": 1.796760156463836e-05, + "loss": 0.3069, "step": 5015 }, { - "epoch": 0.29, - "grad_norm": 0.34083421472885944, - "learning_rate": 1.6703558907175548e-05, - "loss": 0.3184, + "epoch": 0.23, + "grad_norm": 0.36033742176904077, + "learning_rate": 1.7966702333303207e-05, + "loss": 0.296, "step": 5016 }, { - "epoch": 0.29, - "grad_norm": 0.47507592384082353, - "learning_rate": 1.6702177920003607e-05, - "loss": 0.4127, + "epoch": 0.23, + "grad_norm": 0.3505410340985064, + "learning_rate": 1.7965802925592938e-05, + "loss": 0.1972, "step": 5017 }, { - "epoch": 0.29, - "grad_norm": 0.8809556008590094, - "learning_rate": 1.6700796700735534e-05, - "loss": 0.3604, + "epoch": 0.23, + "grad_norm": 0.36733342077749354, + "learning_rate": 1.7964903341527475e-05, + "loss": 0.29, "step": 5018 }, { - "epoch": 0.29, - "grad_norm": 0.3058900980432858, - "learning_rate": 1.669941524941916e-05, - "loss": 0.1977, + "epoch": 0.23, + "grad_norm": 0.68552585615589, + "learning_rate": 1.7964003581126728e-05, + "loss": 0.3545, "step": 5019 }, { - "epoch": 0.29, - "grad_norm": 0.4092102952384807, - "learning_rate": 1.6698033566102322e-05, - "loss": 0.2895, + "epoch": 0.23, + "grad_norm": 0.5631352086392847, + "learning_rate": 1.7963103644410617e-05, + "loss": 0.3859, "step": 5020 }, { - "epoch": 0.29, - "grad_norm": 0.7002227774841857, - "learning_rate": 1.6696651650832874e-05, - "loss": 0.3808, + "epoch": 0.23, + "grad_norm": 0.3625065386007482, + "learning_rate": 1.7962203531399066e-05, + "loss": 0.2827, "step": 5021 }, { - "epoch": 0.29, - "grad_norm": 0.39936961207401184, - "learning_rate": 1.6695269503658663e-05, - "loss": 0.339, + "epoch": 0.23, + "grad_norm": 0.7826761805684815, + "learning_rate": 1.7961303242112003e-05, + "loss": 0.4384, "step": 5022 }, { - "epoch": 0.29, - "grad_norm": 0.5260095728189574, - "learning_rate": 1.6693887124627556e-05, - "loss": 0.3984, + "epoch": 0.23, + "grad_norm": 0.2771096742085613, + "learning_rate": 1.7960402776569358e-05, + "loss": 0.2211, "step": 5023 }, { - "epoch": 0.29, - "grad_norm": 0.41866801996607933, - "learning_rate": 1.6692504513787432e-05, - "loss": 0.2768, + "epoch": 0.23, + "grad_norm": 0.4020504042711531, + "learning_rate": 1.795950213479107e-05, + "loss": 0.2908, "step": 5024 }, { - "epoch": 0.29, - "grad_norm": 0.2600843617318522, - "learning_rate": 1.669112167118616e-05, - "loss": 0.1399, + "epoch": 0.23, + "grad_norm": 0.8002187101166851, + "learning_rate": 1.7958601316797075e-05, + "loss": 0.4971, "step": 5025 }, { - "epoch": 0.29, - "grad_norm": 0.44756851536480885, - "learning_rate": 1.668973859687163e-05, - "loss": 0.3325, + "epoch": 0.23, + "grad_norm": 0.8152742301341442, + "learning_rate": 1.795770032260732e-05, + "loss": 0.4577, "step": 5026 }, { - "epoch": 0.29, - "grad_norm": 0.42835429389794066, - "learning_rate": 1.6688355290891746e-05, - "loss": 0.308, + "epoch": 0.23, + "grad_norm": 0.486467464490504, + "learning_rate": 1.7956799152241746e-05, + "loss": 0.2833, "step": 5027 }, { - "epoch": 0.29, - "grad_norm": 0.5913982247983119, - "learning_rate": 1.66869717532944e-05, - "loss": 0.3365, + "epoch": 0.23, + "grad_norm": 0.3756770817565876, + "learning_rate": 1.7955897805720308e-05, + "loss": 0.3003, "step": 5028 }, { - "epoch": 0.29, - "grad_norm": 0.40328321757099445, - "learning_rate": 1.6685587984127513e-05, - "loss": 0.3689, + "epoch": 0.23, + "grad_norm": 0.40591926312626125, + "learning_rate": 1.7954996283062964e-05, + "loss": 0.2406, "step": 5029 }, { - "epoch": 0.29, - "grad_norm": 1.0759732739079926, - "learning_rate": 1.6684203983439e-05, - "loss": 0.606, + "epoch": 0.23, + "grad_norm": 0.47746270171148314, + "learning_rate": 1.7954094584289668e-05, + "loss": 0.267, "step": 5030 }, { - "epoch": 0.29, - "grad_norm": 0.2783514820703303, - "learning_rate": 1.668281975127679e-05, - "loss": 0.1537, + "epoch": 0.23, + "grad_norm": 0.3686969368185776, + "learning_rate": 1.795319270942038e-05, + "loss": 0.3011, "step": 5031 }, { - "epoch": 0.29, - "grad_norm": 0.33181140217007127, - "learning_rate": 1.6681435287688823e-05, - "loss": 0.2664, + "epoch": 0.23, + "grad_norm": 0.5489680611806266, + "learning_rate": 1.7952290658475075e-05, + "loss": 0.2705, "step": 5032 }, { - "epoch": 0.29, - "grad_norm": 0.8302507762797569, - "learning_rate": 1.6680050592723038e-05, - "loss": 0.5131, + "epoch": 0.23, + "grad_norm": 0.377050909869488, + "learning_rate": 1.795138843147372e-05, + "loss": 0.2775, "step": 5033 }, { - "epoch": 0.29, - "grad_norm": 0.6098011206817671, - "learning_rate": 1.6678665666427387e-05, - "loss": 0.3049, + "epoch": 0.23, + "grad_norm": 0.8690940993606971, + "learning_rate": 1.7950486028436284e-05, + "loss": 0.5408, "step": 5034 }, { - "epoch": 0.29, - "grad_norm": 0.35164039268183567, - "learning_rate": 1.6677280508849828e-05, - "loss": 0.3084, + "epoch": 0.23, + "grad_norm": 0.4216646423672232, + "learning_rate": 1.7949583449382754e-05, + "loss": 0.2827, "step": 5035 }, { - "epoch": 0.29, - "grad_norm": 0.951993401806805, - "learning_rate": 1.667589512003834e-05, - "loss": 0.6228, + "epoch": 0.23, + "grad_norm": 0.41538686584989826, + "learning_rate": 1.794868069433311e-05, + "loss": 0.2792, "step": 5036 }, { - "epoch": 0.29, - "grad_norm": 0.34895682350666707, - "learning_rate": 1.6674509500040885e-05, - "loss": 0.2188, + "epoch": 0.23, + "grad_norm": 0.359646296740704, + "learning_rate": 1.7947777763307333e-05, + "loss": 0.2358, "step": 5037 }, { - "epoch": 0.29, - "grad_norm": 0.3204560482309398, - "learning_rate": 1.6673123648905454e-05, - "loss": 0.2321, + "epoch": 0.23, + "grad_norm": 1.4300416270698473, + "learning_rate": 1.794687465632542e-05, + "loss": 0.8272, "step": 5038 }, { - "epoch": 0.29, - "grad_norm": 0.4577995928154056, - "learning_rate": 1.667173756668004e-05, - "loss": 0.337, + "epoch": 0.23, + "grad_norm": 0.38103629170890935, + "learning_rate": 1.7945971373407358e-05, + "loss": 0.2452, "step": 5039 }, { - "epoch": 0.29, - "grad_norm": 0.3938689527075941, - "learning_rate": 1.667035125341264e-05, - "loss": 0.2516, + "epoch": 0.23, + "grad_norm": 0.4314352546797358, + "learning_rate": 1.7945067914573147e-05, + "loss": 0.3416, "step": 5040 }, { - "epoch": 0.29, - "grad_norm": 0.572570413679517, - "learning_rate": 1.6668964709151265e-05, - "loss": 0.4199, + "epoch": 0.23, + "grad_norm": 0.8118304768648636, + "learning_rate": 1.7944164279842794e-05, + "loss": 0.5083, "step": 5041 }, { - "epoch": 0.29, - "grad_norm": 1.4299646855354586, - "learning_rate": 1.6667577933943925e-05, - "loss": 0.7393, + "epoch": 0.23, + "grad_norm": 0.39463596889960306, + "learning_rate": 1.79432604692363e-05, + "loss": 0.2673, "step": 5042 }, { - "epoch": 0.29, - "grad_norm": 0.33121075066613553, - "learning_rate": 1.666619092783865e-05, - "loss": 0.3047, + "epoch": 0.23, + "grad_norm": 0.3633128097207244, + "learning_rate": 1.794235648277367e-05, + "loss": 0.245, "step": 5043 }, { - "epoch": 0.29, - "grad_norm": 0.24035738339207335, - "learning_rate": 1.6664803690883465e-05, - "loss": 0.1529, + "epoch": 0.23, + "grad_norm": 0.3910083901651251, + "learning_rate": 1.794145232047493e-05, + "loss": 0.3106, "step": 5044 }, { - "epoch": 0.29, - "grad_norm": 0.5652684265469679, - "learning_rate": 1.666341622312642e-05, - "loss": 0.4583, + "epoch": 0.23, + "grad_norm": 0.38116107558308543, + "learning_rate": 1.7940547982360085e-05, + "loss": 0.1934, "step": 5045 }, { - "epoch": 0.29, - "grad_norm": 0.8551662769154373, - "learning_rate": 1.666202852461556e-05, - "loss": 0.5423, + "epoch": 0.23, + "grad_norm": 0.5913880272357627, + "learning_rate": 1.793964346844916e-05, + "loss": 0.4388, "step": 5046 }, { - "epoch": 0.29, - "grad_norm": 0.3637742889707305, - "learning_rate": 1.6660640595398934e-05, - "loss": 0.2563, + "epoch": 0.23, + "grad_norm": 0.3800826217932683, + "learning_rate": 1.7938738778762182e-05, + "loss": 0.3687, "step": 5047 }, { - "epoch": 0.29, - "grad_norm": 0.49084959630933184, - "learning_rate": 1.6659252435524613e-05, - "loss": 0.3588, + "epoch": 0.23, + "grad_norm": 0.37650891226813976, + "learning_rate": 1.793783391331918e-05, + "loss": 0.2309, "step": 5048 }, { - "epoch": 0.29, - "grad_norm": 0.9077022712537721, - "learning_rate": 1.6657864045040665e-05, - "loss": 0.4459, + "epoch": 0.23, + "grad_norm": 0.40496256830260635, + "learning_rate": 1.7936928872140184e-05, + "loss": 0.2843, "step": 5049 }, { - "epoch": 0.29, - "grad_norm": 0.44606011433230913, - "learning_rate": 1.665647542399517e-05, - "loss": 0.2837, + "epoch": 0.23, + "grad_norm": 0.4622874042245147, + "learning_rate": 1.7936023655245235e-05, + "loss": 0.309, "step": 5050 }, { - "epoch": 0.29, - "grad_norm": 0.40943914248299257, - "learning_rate": 1.665508657243622e-05, - "loss": 0.3512, + "epoch": 0.23, + "grad_norm": 0.42126213902983145, + "learning_rate": 1.7935118262654373e-05, + "loss": 0.3017, "step": 5051 }, { - "epoch": 0.29, - "grad_norm": 0.30297531819075785, - "learning_rate": 1.6653697490411905e-05, - "loss": 0.187, + "epoch": 0.23, + "grad_norm": 0.33526525955823777, + "learning_rate": 1.7934212694387633e-05, + "loss": 0.2861, "step": 5052 }, { - "epoch": 0.29, - "grad_norm": 0.3876810435050498, - "learning_rate": 1.6652308177970335e-05, - "loss": 0.2396, + "epoch": 0.23, + "grad_norm": 0.6335455119044751, + "learning_rate": 1.793330695046508e-05, + "loss": 0.4684, "step": 5053 }, { - "epoch": 0.29, - "grad_norm": 1.1355168231759345, - "learning_rate": 1.665091863515962e-05, - "loss": 0.6173, + "epoch": 0.23, + "grad_norm": 0.4621370173461592, + "learning_rate": 1.793240103090675e-05, + "loss": 0.2966, "step": 5054 }, { - "epoch": 0.29, - "grad_norm": 0.3430108180818135, - "learning_rate": 1.6649528862027877e-05, - "loss": 0.3208, + "epoch": 0.23, + "grad_norm": 0.29275487649549736, + "learning_rate": 1.793149493573271e-05, + "loss": 0.2014, "step": 5055 }, { - "epoch": 0.29, - "grad_norm": 0.3775947002380297, - "learning_rate": 1.6648138858623236e-05, - "loss": 0.3046, + "epoch": 0.23, + "grad_norm": 0.3700935017754927, + "learning_rate": 1.7930588664963017e-05, + "loss": 0.2722, "step": 5056 }, { - "epoch": 0.29, - "grad_norm": 0.3376050104963141, - "learning_rate": 1.664674862499383e-05, - "loss": 0.1998, + "epoch": 0.23, + "grad_norm": 0.4808787813036029, + "learning_rate": 1.7929682218617733e-05, + "loss": 0.309, "step": 5057 }, { - "epoch": 0.29, - "grad_norm": 0.5965276651128877, - "learning_rate": 1.664535816118781e-05, - "loss": 0.4102, + "epoch": 0.23, + "grad_norm": 0.5245466849790029, + "learning_rate": 1.7928775596716925e-05, + "loss": 0.3167, "step": 5058 }, { - "epoch": 0.29, - "grad_norm": 0.4126818965088, - "learning_rate": 1.6643967467253316e-05, - "loss": 0.3416, + "epoch": 0.23, + "grad_norm": 0.37717047325476427, + "learning_rate": 1.7927868799280673e-05, + "loss": 0.3543, "step": 5059 }, { - "epoch": 0.29, - "grad_norm": 0.35265629901301665, - "learning_rate": 1.664257654323852e-05, - "loss": 0.282, + "epoch": 0.23, + "grad_norm": 0.4810216205367915, + "learning_rate": 1.7926961826329043e-05, + "loss": 0.2915, "step": 5060 }, { - "epoch": 0.29, - "grad_norm": 0.46529035889049486, - "learning_rate": 1.664118538919158e-05, - "loss": 0.3418, + "epoch": 0.23, + "grad_norm": 0.4879956420441694, + "learning_rate": 1.792605467788212e-05, + "loss": 0.2724, "step": 5061 }, { - "epoch": 0.29, - "grad_norm": 0.386042563376412, - "learning_rate": 1.6639794005160677e-05, - "loss": 0.3206, + "epoch": 0.23, + "grad_norm": 0.34816355175159086, + "learning_rate": 1.7925147353959985e-05, + "loss": 0.2283, "step": 5062 }, { - "epoch": 0.29, - "grad_norm": 0.347541240252572, - "learning_rate": 1.6638402391193993e-05, - "loss": 0.234, + "epoch": 0.23, + "grad_norm": 0.4718450994324635, + "learning_rate": 1.7924239854582725e-05, + "loss": 0.3119, "step": 5063 }, { - "epoch": 0.29, - "grad_norm": 1.3657479293256798, - "learning_rate": 1.663701054733972e-05, - "loss": 0.8305, + "epoch": 0.23, + "grad_norm": 0.4352988898258253, + "learning_rate": 1.7923332179770433e-05, + "loss": 0.3155, "step": 5064 }, { - "epoch": 0.29, - "grad_norm": 0.35412391052650544, - "learning_rate": 1.6635618473646058e-05, - "loss": 0.2778, + "epoch": 0.23, + "grad_norm": 0.7512087345064935, + "learning_rate": 1.79224243295432e-05, + "loss": 0.3957, "step": 5065 }, { - "epoch": 0.29, - "grad_norm": 0.32667171769057474, - "learning_rate": 1.6634226170161213e-05, - "loss": 0.2391, + "epoch": 0.23, + "grad_norm": 0.6689176330344204, + "learning_rate": 1.7921516303921132e-05, + "loss": 0.4244, "step": 5066 }, { - "epoch": 0.29, - "grad_norm": 0.43478785542869364, - "learning_rate": 1.66328336369334e-05, - "loss": 0.3431, + "epoch": 0.23, + "grad_norm": 0.369814947764546, + "learning_rate": 1.7920608102924326e-05, + "loss": 0.3349, "step": 5067 }, { - "epoch": 0.29, - "grad_norm": 0.33734429475445615, - "learning_rate": 1.6631440874010842e-05, - "loss": 0.3147, + "epoch": 0.23, + "grad_norm": 0.2786512559484043, + "learning_rate": 1.7919699726572893e-05, + "loss": 0.1647, "step": 5068 }, { - "epoch": 0.29, - "grad_norm": 0.8787246640463405, - "learning_rate": 1.6630047881441774e-05, - "loss": 0.6012, + "epoch": 0.23, + "grad_norm": 0.5902070516974034, + "learning_rate": 1.791879117488694e-05, + "loss": 0.3423, "step": 5069 }, { - "epoch": 0.29, - "grad_norm": 0.49363055525613264, - "learning_rate": 1.6628654659274433e-05, - "loss": 0.3192, + "epoch": 0.23, + "grad_norm": 0.47220820240241557, + "learning_rate": 1.7917882447886585e-05, + "loss": 0.3452, "step": 5070 }, { - "epoch": 0.29, - "grad_norm": 0.30082111207502216, - "learning_rate": 1.6627261207557068e-05, - "loss": 0.2643, + "epoch": 0.23, + "grad_norm": 0.4081484900250525, + "learning_rate": 1.7916973545591937e-05, + "loss": 0.2614, "step": 5071 }, { - "epoch": 0.29, - "grad_norm": 0.26296980966438765, - "learning_rate": 1.6625867526337928e-05, - "loss": 0.1485, + "epoch": 0.23, + "grad_norm": 0.6746238677439688, + "learning_rate": 1.791606446802313e-05, + "loss": 0.3361, "step": 5072 }, { - "epoch": 0.29, - "grad_norm": 0.6053081587769884, - "learning_rate": 1.662447361566528e-05, - "loss": 0.3424, + "epoch": 0.23, + "grad_norm": 0.44215829655231037, + "learning_rate": 1.7915155215200285e-05, + "loss": 0.323, "step": 5073 }, { - "epoch": 0.29, - "grad_norm": 0.41307754279842807, - "learning_rate": 1.6623079475587403e-05, - "loss": 0.3124, + "epoch": 0.23, + "grad_norm": 0.2944637006754108, + "learning_rate": 1.7914245787143532e-05, + "loss": 0.1453, "step": 5074 }, { - "epoch": 0.29, - "grad_norm": 0.40972224319212935, - "learning_rate": 1.6621685106152564e-05, - "loss": 0.3288, + "epoch": 0.23, + "grad_norm": 0.2984632359789537, + "learning_rate": 1.7913336183873006e-05, + "loss": 0.2908, "step": 5075 }, { - "epoch": 0.29, - "grad_norm": 0.49432112174734544, - "learning_rate": 1.6620290507409053e-05, - "loss": 0.3193, + "epoch": 0.23, + "grad_norm": 0.5945538180873058, + "learning_rate": 1.7912426405408843e-05, + "loss": 0.4102, "step": 5076 }, { - "epoch": 0.29, - "grad_norm": 0.380041557575946, - "learning_rate": 1.6618895679405165e-05, - "loss": 0.283, + "epoch": 0.23, + "grad_norm": 0.4926857362317913, + "learning_rate": 1.7911516451771185e-05, + "loss": 0.3105, "step": 5077 }, { - "epoch": 0.29, - "grad_norm": 0.35160063555106125, - "learning_rate": 1.6617500622189208e-05, - "loss": 0.246, + "epoch": 0.23, + "grad_norm": 0.40201477451956613, + "learning_rate": 1.791060632298018e-05, + "loss": 0.2144, "step": 5078 }, { - "epoch": 0.29, - "grad_norm": 0.3811836486280641, - "learning_rate": 1.6616105335809487e-05, - "loss": 0.3454, + "epoch": 0.23, + "grad_norm": 0.47361384724889793, + "learning_rate": 1.7909696019055973e-05, + "loss": 0.3262, "step": 5079 }, { - "epoch": 0.29, - "grad_norm": 0.33824258296520193, - "learning_rate": 1.6614709820314323e-05, - "loss": 0.2238, + "epoch": 0.23, + "grad_norm": 0.6172385438899625, + "learning_rate": 1.790878554001872e-05, + "loss": 0.4132, "step": 5080 }, { - "epoch": 0.29, - "grad_norm": 0.8228245625272075, - "learning_rate": 1.6613314075752044e-05, - "loss": 0.5268, + "epoch": 0.23, + "grad_norm": 0.8913202990221846, + "learning_rate": 1.790787488588858e-05, + "loss": 0.3383, "step": 5081 }, { - "epoch": 0.29, - "grad_norm": 1.1414700434465859, - "learning_rate": 1.661191810217098e-05, - "loss": 0.7977, + "epoch": 0.23, + "grad_norm": 0.39622299418267953, + "learning_rate": 1.7906964056685706e-05, + "loss": 0.3411, "step": 5082 }, { - "epoch": 0.29, - "grad_norm": 0.294198134495597, - "learning_rate": 1.661052189961948e-05, - "loss": 0.2376, + "epoch": 0.23, + "grad_norm": 0.3015152379408921, + "learning_rate": 1.7906053052430275e-05, + "loss": 0.2321, "step": 5083 }, { - "epoch": 0.29, - "grad_norm": 0.46694318435537846, - "learning_rate": 1.660912546814589e-05, - "loss": 0.3321, + "epoch": 0.23, + "grad_norm": 0.3814474536597992, + "learning_rate": 1.7905141873142448e-05, + "loss": 0.1816, "step": 5084 }, { - "epoch": 0.29, - "grad_norm": 0.40253889624722783, - "learning_rate": 1.6607728807798568e-05, - "loss": 0.2855, + "epoch": 0.23, + "grad_norm": 0.46192952313076796, + "learning_rate": 1.7904230518842398e-05, + "loss": 0.3434, "step": 5085 }, { - "epoch": 0.29, - "grad_norm": 0.3838630594646114, - "learning_rate": 1.660633191862588e-05, - "loss": 0.2163, + "epoch": 0.23, + "grad_norm": 0.6123851064077006, + "learning_rate": 1.79033189895503e-05, + "loss": 0.3976, "step": 5086 }, { - "epoch": 0.29, - "grad_norm": 0.4452069568209437, - "learning_rate": 1.6604934800676207e-05, - "loss": 0.3252, + "epoch": 0.23, + "grad_norm": 0.5192329962608332, + "learning_rate": 1.7902407285286337e-05, + "loss": 0.3396, "step": 5087 }, { - "epoch": 0.29, - "grad_norm": 1.205226316107024, - "learning_rate": 1.660353745399792e-05, - "loss": 0.7247, + "epoch": 0.23, + "grad_norm": 0.45111407784528285, + "learning_rate": 1.79014954060707e-05, + "loss": 0.2908, "step": 5088 }, { - "epoch": 0.29, - "grad_norm": 0.32386036990488415, - "learning_rate": 1.6602139878639417e-05, - "loss": 0.2436, + "epoch": 0.23, + "grad_norm": 0.3495230606972884, + "learning_rate": 1.7900583351923565e-05, + "loss": 0.1891, "step": 5089 }, { - "epoch": 0.29, - "grad_norm": 0.7146860073301247, - "learning_rate": 1.6600742074649095e-05, - "loss": 0.4543, + "epoch": 0.23, + "grad_norm": 0.7658119751439924, + "learning_rate": 1.789967112286513e-05, + "loss": 0.4497, "step": 5090 }, { - "epoch": 0.29, - "grad_norm": 0.2521102701136926, - "learning_rate": 1.659934404207536e-05, - "loss": 0.2253, + "epoch": 0.23, + "grad_norm": 0.33489582883930885, + "learning_rate": 1.789875871891559e-05, + "loss": 0.2611, "step": 5091 }, { - "epoch": 0.29, - "grad_norm": 0.3929148102982469, - "learning_rate": 1.6597945780966626e-05, - "loss": 0.2923, + "epoch": 0.23, + "grad_norm": 0.8286510008622918, + "learning_rate": 1.7897846140095142e-05, + "loss": 0.5731, "step": 5092 }, { - "epoch": 0.29, - "grad_norm": 0.7744747631107772, - "learning_rate": 1.659654729137131e-05, - "loss": 0.3298, + "epoch": 0.23, + "grad_norm": 0.5505761213039706, + "learning_rate": 1.7896933386423998e-05, + "loss": 0.3915, "step": 5093 }, { - "epoch": 0.29, - "grad_norm": 0.385373842207475, - "learning_rate": 1.6595148573337843e-05, - "loss": 0.3322, + "epoch": 0.23, + "grad_norm": 0.4030146431128897, + "learning_rate": 1.7896020457922356e-05, + "loss": 0.233, "step": 5094 }, { - "epoch": 0.29, - "grad_norm": 0.44188848071352976, - "learning_rate": 1.6593749626914665e-05, - "loss": 0.3031, + "epoch": 0.23, + "grad_norm": 0.32751718753949155, + "learning_rate": 1.789510735461043e-05, + "loss": 0.2758, "step": 5095 }, { - "epoch": 0.29, - "grad_norm": 0.596587404876109, - "learning_rate": 1.6592350452150223e-05, - "loss": 0.34, + "epoch": 0.23, + "grad_norm": 0.3918692206972656, + "learning_rate": 1.7894194076508443e-05, + "loss": 0.1948, "step": 5096 }, { - "epoch": 0.29, - "grad_norm": 0.2930106307014018, - "learning_rate": 1.6590951049092966e-05, - "loss": 0.1554, + "epoch": 0.23, + "grad_norm": 0.4228737723619419, + "learning_rate": 1.7893280623636605e-05, + "loss": 0.2572, "step": 5097 }, { - "epoch": 0.29, - "grad_norm": 0.45593236462612596, - "learning_rate": 1.658955141779136e-05, - "loss": 0.3053, + "epoch": 0.23, + "grad_norm": 0.5845443932747214, + "learning_rate": 1.7892366996015138e-05, + "loss": 0.4071, "step": 5098 }, { - "epoch": 0.29, - "grad_norm": 0.5457068512880814, - "learning_rate": 1.6588151558293874e-05, - "loss": 0.2644, + "epoch": 0.23, + "grad_norm": 0.4928719272583364, + "learning_rate": 1.7891453193664276e-05, + "loss": 0.3476, "step": 5099 }, { - "epoch": 0.29, - "grad_norm": 1.2365592496960516, - "learning_rate": 1.658675147064898e-05, - "loss": 0.8412, + "epoch": 0.23, + "grad_norm": 0.445736013525592, + "learning_rate": 1.7890539216604245e-05, + "loss": 0.3355, "step": 5100 }, { - "epoch": 0.29, - "grad_norm": 0.4162610077770308, - "learning_rate": 1.6585351154905163e-05, - "loss": 0.2924, + "epoch": 0.23, + "grad_norm": 0.20490259686736165, + "learning_rate": 1.788962506485528e-05, + "loss": 0.1284, "step": 5101 }, { - "epoch": 0.29, - "grad_norm": 0.4220963462240051, - "learning_rate": 1.6583950611110923e-05, - "loss": 0.3159, + "epoch": 0.23, + "grad_norm": 0.5838090314548726, + "learning_rate": 1.7888710738437622e-05, + "loss": 0.4154, "step": 5102 }, { - "epoch": 0.29, - "grad_norm": 0.44314877537383723, - "learning_rate": 1.6582549839314756e-05, - "loss": 0.2598, + "epoch": 0.23, + "grad_norm": 0.3826177297512074, + "learning_rate": 1.788779623737151e-05, + "loss": 0.2612, "step": 5103 }, { - "epoch": 0.29, - "grad_norm": 0.2803168276990925, - "learning_rate": 1.658114883956517e-05, - "loss": 0.2255, + "epoch": 0.23, + "grad_norm": 0.8356145042914871, + "learning_rate": 1.7886881561677195e-05, + "loss": 0.3883, "step": 5104 }, { - "epoch": 0.29, - "grad_norm": 0.9679407388981311, - "learning_rate": 1.6579747611910684e-05, - "loss": 0.557, + "epoch": 0.23, + "grad_norm": 0.8754482570939112, + "learning_rate": 1.788596671137492e-05, + "loss": 0.4632, "step": 5105 }, { - "epoch": 0.29, - "grad_norm": 0.7293098670982834, - "learning_rate": 1.657834615639982e-05, - "loss": 0.2901, + "epoch": 0.23, + "grad_norm": 0.39233902738026744, + "learning_rate": 1.7885051686484942e-05, + "loss": 0.3112, "step": 5106 }, { - "epoch": 0.29, - "grad_norm": 0.3672744421536344, - "learning_rate": 1.6576944473081112e-05, - "loss": 0.2756, + "epoch": 0.23, + "grad_norm": 0.42263745102169586, + "learning_rate": 1.788413648702752e-05, + "loss": 0.2787, "step": 5107 }, { - "epoch": 0.29, - "grad_norm": 0.8130875641546872, - "learning_rate": 1.6575542562003097e-05, - "loss": 0.5067, + "epoch": 0.23, + "grad_norm": 0.3405670402336814, + "learning_rate": 1.7883221113022916e-05, + "loss": 0.206, "step": 5108 }, { - "epoch": 0.29, - "grad_norm": 0.2011438979448081, - "learning_rate": 1.6574140423214327e-05, - "loss": 0.1124, + "epoch": 0.23, + "grad_norm": 0.4944128898147866, + "learning_rate": 1.78823055644914e-05, + "loss": 0.3103, "step": 5109 }, { - "epoch": 0.29, - "grad_norm": 0.38553771059667385, - "learning_rate": 1.657273805676336e-05, - "loss": 0.2765, + "epoch": 0.23, + "grad_norm": 1.2349620342328662, + "learning_rate": 1.7881389841453227e-05, + "loss": 0.4121, "step": 5110 }, { - "epoch": 0.29, - "grad_norm": 0.4494145707717537, - "learning_rate": 1.6571335462698755e-05, - "loss": 0.3405, + "epoch": 0.23, + "grad_norm": 0.4201018126285985, + "learning_rate": 1.7880473943928684e-05, + "loss": 0.3325, "step": 5111 }, { - "epoch": 0.29, - "grad_norm": 1.1128740224578688, - "learning_rate": 1.6569932641069083e-05, - "loss": 0.4035, + "epoch": 0.23, + "grad_norm": 0.4304973261267877, + "learning_rate": 1.787955787193804e-05, + "loss": 0.3227, "step": 5112 }, { - "epoch": 0.29, - "grad_norm": 0.40172625854201377, - "learning_rate": 1.656852959192293e-05, - "loss": 0.325, + "epoch": 0.23, + "grad_norm": 1.0973808088007153, + "learning_rate": 1.787864162550158e-05, + "loss": 0.6788, "step": 5113 }, { - "epoch": 0.29, - "grad_norm": 0.4042048518785761, - "learning_rate": 1.656712631530888e-05, - "loss": 0.3685, + "epoch": 0.23, + "grad_norm": 0.26246339099638244, + "learning_rate": 1.7877725204639587e-05, + "loss": 0.1763, "step": 5114 }, { - "epoch": 0.29, - "grad_norm": 0.25272424394886184, - "learning_rate": 1.6565722811275526e-05, - "loss": 0.1761, + "epoch": 0.23, + "grad_norm": 0.48361457418978715, + "learning_rate": 1.7876808609372355e-05, + "loss": 0.3104, "step": 5115 }, { - "epoch": 0.29, - "grad_norm": 0.3961321724738942, - "learning_rate": 1.6564319079871472e-05, - "loss": 0.2789, + "epoch": 0.24, + "grad_norm": 1.6364090147413717, + "learning_rate": 1.787589183972017e-05, + "loss": 0.5048, "step": 5116 }, { - "epoch": 0.29, - "grad_norm": 0.5680165426058105, - "learning_rate": 1.656291512114533e-05, - "loss": 0.3786, + "epoch": 0.24, + "grad_norm": 0.5701602069864491, + "learning_rate": 1.787497489570333e-05, + "loss": 0.3161, "step": 5117 }, { - "epoch": 0.29, - "grad_norm": 0.5206323825700394, - "learning_rate": 1.656151093514572e-05, - "loss": 0.3638, + "epoch": 0.24, + "grad_norm": 0.43907752585742543, + "learning_rate": 1.7874057777342134e-05, + "loss": 0.2893, "step": 5118 }, { - "epoch": 0.29, - "grad_norm": 0.3543282139228944, - "learning_rate": 1.6560106521921272e-05, - "loss": 0.248, + "epoch": 0.24, + "grad_norm": 0.43587331370566484, + "learning_rate": 1.787314048465689e-05, + "loss": 0.3218, "step": 5119 }, { - "epoch": 0.29, - "grad_norm": 0.9717910872721999, - "learning_rate": 1.6558701881520616e-05, - "loss": 0.6354, + "epoch": 0.24, + "grad_norm": 0.4042903562503827, + "learning_rate": 1.7872223017667906e-05, + "loss": 0.1801, "step": 5120 }, { - "epoch": 0.29, - "grad_norm": 0.4011467519529105, - "learning_rate": 1.6557297013992395e-05, - "loss": 0.2849, + "epoch": 0.24, + "grad_norm": 0.47496957419460945, + "learning_rate": 1.787130537639549e-05, + "loss": 0.2894, "step": 5121 }, { - "epoch": 0.29, - "grad_norm": 0.2840900794225836, - "learning_rate": 1.6555891919385262e-05, - "loss": 0.1773, + "epoch": 0.24, + "grad_norm": 0.6384442801939174, + "learning_rate": 1.7870387560859958e-05, + "loss": 0.3961, "step": 5122 }, { - "epoch": 0.29, - "grad_norm": 0.4765438374827972, - "learning_rate": 1.655448659774787e-05, - "loss": 0.3306, + "epoch": 0.24, + "grad_norm": 0.5158052504200386, + "learning_rate": 1.786946957108163e-05, + "loss": 0.3037, "step": 5123 }, { - "epoch": 0.29, - "grad_norm": 1.0038008012167339, - "learning_rate": 1.6553081049128894e-05, - "loss": 0.4335, + "epoch": 0.24, + "grad_norm": 0.43578381106729347, + "learning_rate": 1.7868551407080832e-05, + "loss": 0.3334, "step": 5124 }, { - "epoch": 0.29, - "grad_norm": 0.3737795700734047, - "learning_rate": 1.6551675273577e-05, - "loss": 0.2289, + "epoch": 0.24, + "grad_norm": 1.2784151617131547, + "learning_rate": 1.7867633068877892e-05, + "loss": 0.6563, "step": 5125 }, { - "epoch": 0.29, - "grad_norm": 1.0662857043698486, - "learning_rate": 1.6550269271140872e-05, - "loss": 0.6902, + "epoch": 0.24, + "grad_norm": 0.33958296227831175, + "learning_rate": 1.7866714556493136e-05, + "loss": 0.2788, "step": 5126 }, { - "epoch": 0.29, - "grad_norm": 0.3832966114148078, - "learning_rate": 1.6548863041869203e-05, - "loss": 0.3521, + "epoch": 0.24, + "grad_norm": 0.3564227634273513, + "learning_rate": 1.7865795869946903e-05, + "loss": 0.2602, "step": 5127 }, { - "epoch": 0.29, - "grad_norm": 0.2462215752704896, - "learning_rate": 1.6547456585810687e-05, - "loss": 0.1567, + "epoch": 0.24, + "grad_norm": 0.478661209508023, + "learning_rate": 1.786487700925953e-05, + "loss": 0.3011, "step": 5128 }, { - "epoch": 0.29, - "grad_norm": 0.9104250748758096, - "learning_rate": 1.6546049903014034e-05, - "loss": 0.5161, + "epoch": 0.24, + "grad_norm": 1.1740255193604194, + "learning_rate": 1.786395797445136e-05, + "loss": 0.7691, "step": 5129 }, { - "epoch": 0.29, - "grad_norm": 0.39445391596511015, - "learning_rate": 1.6544642993527952e-05, - "loss": 0.3519, + "epoch": 0.24, + "grad_norm": 0.43313948420681764, + "learning_rate": 1.786303876554274e-05, + "loss": 0.2257, "step": 5130 }, { - "epoch": 0.29, - "grad_norm": 0.4248670156930363, - "learning_rate": 1.6543235857401163e-05, - "loss": 0.3151, + "epoch": 0.24, + "grad_norm": 0.4843830762632465, + "learning_rate": 1.786211938255402e-05, + "loss": 0.3691, "step": 5131 }, { - "epoch": 0.29, - "grad_norm": 0.5492970992711453, - "learning_rate": 1.6541828494682398e-05, - "loss": 0.3257, + "epoch": 0.24, + "grad_norm": 0.6144764355539256, + "learning_rate": 1.7861199825505556e-05, + "loss": 0.4631, "step": 5132 }, { - "epoch": 0.29, - "grad_norm": 0.4673011597855598, - "learning_rate": 1.6540420905420395e-05, - "loss": 0.2742, + "epoch": 0.24, + "grad_norm": 0.3450608236827846, + "learning_rate": 1.78602800944177e-05, + "loss": 0.2027, "step": 5133 }, { - "epoch": 0.29, - "grad_norm": 0.39281271146739444, - "learning_rate": 1.6539013089663897e-05, - "loss": 0.3005, + "epoch": 0.24, + "grad_norm": 0.29344124801121907, + "learning_rate": 1.7859360189310825e-05, + "loss": 0.2224, "step": 5134 }, { - "epoch": 0.3, - "grad_norm": 0.4087028084684856, - "learning_rate": 1.6537605047461654e-05, - "loss": 0.2509, + "epoch": 0.24, + "grad_norm": 0.6109325224891187, + "learning_rate": 1.7858440110205286e-05, + "loss": 0.4442, "step": 5135 }, { - "epoch": 0.3, - "grad_norm": 0.8586628812462002, - "learning_rate": 1.6536196778862433e-05, - "loss": 0.5085, + "epoch": 0.24, + "grad_norm": 0.37220596204716855, + "learning_rate": 1.785751985712146e-05, + "loss": 0.2169, "step": 5136 }, { - "epoch": 0.3, - "grad_norm": 0.5709002766581852, - "learning_rate": 1.6534788283915e-05, - "loss": 0.3931, + "epoch": 0.24, + "grad_norm": 0.7527615457393293, + "learning_rate": 1.7856599430079714e-05, + "loss": 0.499, "step": 5137 }, { - "epoch": 0.3, - "grad_norm": 0.3001544493179158, - "learning_rate": 1.6533379562668126e-05, - "loss": 0.261, + "epoch": 0.24, + "grad_norm": 0.4947554977141729, + "learning_rate": 1.7855678829100432e-05, + "loss": 0.369, "step": 5138 }, { - "epoch": 0.3, - "grad_norm": 1.2209468608505851, - "learning_rate": 1.6531970615170602e-05, - "loss": 0.7915, + "epoch": 0.24, + "grad_norm": 0.33828890534809214, + "learning_rate": 1.785475805420399e-05, + "loss": 0.2746, "step": 5139 }, { - "epoch": 0.3, - "grad_norm": 0.29863282667681906, - "learning_rate": 1.6530561441471215e-05, - "loss": 0.2372, + "epoch": 0.24, + "grad_norm": 0.2813207333207209, + "learning_rate": 1.7853837105410777e-05, + "loss": 0.1462, "step": 5140 }, { - "epoch": 0.3, - "grad_norm": 0.6536704998804858, - "learning_rate": 1.6529152041618767e-05, - "loss": 0.3358, + "epoch": 0.24, + "grad_norm": 1.374539679226561, + "learning_rate": 1.785291598274118e-05, + "loss": 0.7958, "step": 5141 }, { - "epoch": 0.3, - "grad_norm": 0.2971516691014197, - "learning_rate": 1.652774241566206e-05, - "loss": 0.2563, + "epoch": 0.24, + "grad_norm": 0.40936958480041513, + "learning_rate": 1.7851994686215592e-05, + "loss": 0.2704, "step": 5142 }, { - "epoch": 0.3, - "grad_norm": 0.4049693944771069, - "learning_rate": 1.652633256364992e-05, - "loss": 0.3354, + "epoch": 0.24, + "grad_norm": 0.42658478899250896, + "learning_rate": 1.7851073215854406e-05, + "loss": 0.2757, "step": 5143 }, { - "epoch": 0.3, - "grad_norm": 0.569984221282444, - "learning_rate": 1.652492248563116e-05, - "loss": 0.452, + "epoch": 0.24, + "grad_norm": 0.8468372190875213, + "learning_rate": 1.785015157167803e-05, + "loss": 0.5011, "step": 5144 }, { - "epoch": 0.3, - "grad_norm": 0.6023169787669885, - "learning_rate": 1.6523512181654616e-05, - "loss": 0.379, + "epoch": 0.24, + "grad_norm": 0.3959085581442702, + "learning_rate": 1.784922975370686e-05, + "loss": 0.2786, "step": 5145 }, { - "epoch": 0.3, - "grad_norm": 0.3194650614374405, - "learning_rate": 1.6522101651769124e-05, - "loss": 0.2582, + "epoch": 0.24, + "grad_norm": 0.2545993242185012, + "learning_rate": 1.784830776196131e-05, + "loss": 0.1411, "step": 5146 }, { - "epoch": 0.3, - "grad_norm": 0.6151729058432179, - "learning_rate": 1.6520690896023536e-05, - "loss": 0.3939, + "epoch": 0.24, + "grad_norm": 0.569926780503696, + "learning_rate": 1.7847385596461794e-05, + "loss": 0.3941, "step": 5147 }, { - "epoch": 0.3, - "grad_norm": 0.2655700172516779, - "learning_rate": 1.6519279914466703e-05, - "loss": 0.1816, + "epoch": 0.24, + "grad_norm": 0.38102671413681816, + "learning_rate": 1.7846463257228722e-05, + "loss": 0.2867, "step": 5148 }, { - "epoch": 0.3, - "grad_norm": 0.8313908290203407, - "learning_rate": 1.6517868707147484e-05, - "loss": 0.461, + "epoch": 0.24, + "grad_norm": 1.0898220587915632, + "learning_rate": 1.7845540744282518e-05, + "loss": 0.3872, "step": 5149 }, { - "epoch": 0.3, - "grad_norm": 0.40243904423512905, - "learning_rate": 1.651645727411475e-05, - "loss": 0.3019, + "epoch": 0.24, + "grad_norm": 0.4264252293970126, + "learning_rate": 1.7844618057643602e-05, + "loss": 0.3491, "step": 5150 }, { - "epoch": 0.3, - "grad_norm": 0.48595523304443944, - "learning_rate": 1.6515045615417385e-05, - "loss": 0.2973, + "epoch": 0.24, + "grad_norm": 0.4066966923141595, + "learning_rate": 1.78436951973324e-05, + "loss": 0.2533, "step": 5151 }, { - "epoch": 0.3, - "grad_norm": 0.6055444378343406, - "learning_rate": 1.6513633731104268e-05, - "loss": 0.3708, + "epoch": 0.24, + "grad_norm": 0.2875041928318243, + "learning_rate": 1.784277216336935e-05, + "loss": 0.1999, "step": 5152 }, { - "epoch": 0.3, - "grad_norm": 0.4178836687928149, - "learning_rate": 1.6512221621224296e-05, - "loss": 0.336, + "epoch": 0.24, + "grad_norm": 0.5074768042131431, + "learning_rate": 1.784184895577488e-05, + "loss": 0.3351, "step": 5153 }, { - "epoch": 0.3, - "grad_norm": 0.2420107469251138, - "learning_rate": 1.651080928582637e-05, - "loss": 0.2302, + "epoch": 0.24, + "grad_norm": 0.4559811337480079, + "learning_rate": 1.7840925574569437e-05, + "loss": 0.2716, "step": 5154 }, { - "epoch": 0.3, - "grad_norm": 0.2784278043315081, - "learning_rate": 1.6509396724959396e-05, - "loss": 0.1629, + "epoch": 0.24, + "grad_norm": 0.622990941790311, + "learning_rate": 1.7840002019773456e-05, + "loss": 0.3709, "step": 5155 }, { - "epoch": 0.3, - "grad_norm": 0.4202735086060693, - "learning_rate": 1.6507983938672295e-05, - "loss": 0.3557, + "epoch": 0.24, + "grad_norm": 0.9347673302327928, + "learning_rate": 1.7839078291407384e-05, + "loss": 0.3725, "step": 5156 }, { - "epoch": 0.3, - "grad_norm": 0.7828694849798539, - "learning_rate": 1.650657092701399e-05, - "loss": 0.5474, + "epoch": 0.24, + "grad_norm": 0.4488739714906619, + "learning_rate": 1.7838154389491677e-05, + "loss": 0.3136, "step": 5157 }, { - "epoch": 0.3, - "grad_norm": 0.30049067721571665, - "learning_rate": 1.6505157690033417e-05, - "loss": 0.2597, + "epoch": 0.24, + "grad_norm": 0.32927789280788533, + "learning_rate": 1.7837230314046787e-05, + "loss": 0.271, "step": 5158 }, { - "epoch": 0.3, - "grad_norm": 0.3824140483311469, - "learning_rate": 1.650374422777951e-05, - "loss": 0.3437, + "epoch": 0.24, + "grad_norm": 1.4351351690476724, + "learning_rate": 1.783630606509317e-05, + "loss": 0.58, "step": 5159 }, { - "epoch": 0.3, - "grad_norm": 0.42131426536676336, - "learning_rate": 1.6502330540301217e-05, - "loss": 0.266, + "epoch": 0.24, + "grad_norm": 0.31167215579178853, + "learning_rate": 1.783538164265129e-05, + "loss": 0.2381, "step": 5160 }, { - "epoch": 0.3, - "grad_norm": 0.337804821268556, - "learning_rate": 1.6500916627647498e-05, - "loss": 0.224, + "epoch": 0.24, + "grad_norm": 0.7971649239473425, + "learning_rate": 1.7834457046741615e-05, + "loss": 0.4666, "step": 5161 }, { - "epoch": 0.3, - "grad_norm": 0.30612728307066595, - "learning_rate": 1.6499502489867318e-05, - "loss": 0.2456, + "epoch": 0.24, + "grad_norm": 0.3840986936315121, + "learning_rate": 1.7833532277384607e-05, + "loss": 0.2925, "step": 5162 }, { - "epoch": 0.3, - "grad_norm": 0.5060885778707491, - "learning_rate": 1.6498088127009647e-05, - "loss": 0.4094, + "epoch": 0.24, + "grad_norm": 0.4072842289363352, + "learning_rate": 1.7832607334600747e-05, + "loss": 0.2759, "step": 5163 }, { - "epoch": 0.3, - "grad_norm": 0.34297861726048556, - "learning_rate": 1.649667353912346e-05, - "loss": 0.2678, + "epoch": 0.24, + "grad_norm": 0.4914228074639435, + "learning_rate": 1.783168221841051e-05, + "loss": 0.283, "step": 5164 }, { - "epoch": 0.3, - "grad_norm": 0.5259607161237364, - "learning_rate": 1.649525872625775e-05, - "loss": 0.3798, + "epoch": 0.24, + "grad_norm": 1.3469792481722633, + "learning_rate": 1.7830756928834377e-05, + "loss": 0.5944, "step": 5165 }, { - "epoch": 0.3, - "grad_norm": 0.3577087368747908, - "learning_rate": 1.649384368846151e-05, - "loss": 0.3183, + "epoch": 0.24, + "grad_norm": 0.3849682926411431, + "learning_rate": 1.7829831465892832e-05, + "loss": 0.2424, "step": 5166 }, { - "epoch": 0.3, - "grad_norm": 0.8135960641640962, - "learning_rate": 1.649242842578374e-05, - "loss": 0.504, + "epoch": 0.24, + "grad_norm": 0.43409061154265643, + "learning_rate": 1.7828905829606364e-05, + "loss": 0.3787, "step": 5167 }, { - "epoch": 0.3, - "grad_norm": 0.22855591149860482, - "learning_rate": 1.6491012938273457e-05, - "loss": 0.1531, + "epoch": 0.24, + "grad_norm": 0.5112131660296164, + "learning_rate": 1.7827980019995468e-05, + "loss": 0.309, "step": 5168 }, { - "epoch": 0.3, - "grad_norm": 0.3865190212108506, - "learning_rate": 1.6489597225979673e-05, - "loss": 0.3684, + "epoch": 0.24, + "grad_norm": 0.4182364417305089, + "learning_rate": 1.7827054037080638e-05, + "loss": 0.1867, "step": 5169 }, { - "epoch": 0.3, - "grad_norm": 0.45872722550496825, - "learning_rate": 1.6488181288951416e-05, - "loss": 0.4088, + "epoch": 0.24, + "grad_norm": 0.349916291432456, + "learning_rate": 1.7826127880882375e-05, + "loss": 0.3165, "step": 5170 }, { - "epoch": 0.3, - "grad_norm": 0.2849367931072605, - "learning_rate": 1.648676512723772e-05, - "loss": 0.2268, + "epoch": 0.24, + "grad_norm": 0.8468169688689349, + "learning_rate": 1.7825201551421186e-05, + "loss": 0.5387, "step": 5171 }, { - "epoch": 0.3, - "grad_norm": 1.246603368276133, - "learning_rate": 1.6485348740887624e-05, - "loss": 0.7727, + "epoch": 0.24, + "grad_norm": 0.3740907445727186, + "learning_rate": 1.7824275048717577e-05, + "loss": 0.2271, "step": 5172 }, { - "epoch": 0.3, - "grad_norm": 0.3887406473005966, - "learning_rate": 1.6483932129950183e-05, - "loss": 0.2651, + "epoch": 0.24, + "grad_norm": 0.28387501001331666, + "learning_rate": 1.782334837279206e-05, + "loss": 0.2224, "step": 5173 }, { - "epoch": 0.3, - "grad_norm": 0.24155302974837958, - "learning_rate": 1.648251529447445e-05, - "loss": 0.2227, + "epoch": 0.24, + "grad_norm": 0.4554594401129825, + "learning_rate": 1.7822421523665145e-05, + "loss": 0.3742, "step": 5174 }, { - "epoch": 0.3, - "grad_norm": 0.5747540021102897, - "learning_rate": 1.6481098234509493e-05, - "loss": 0.4604, + "epoch": 0.24, + "grad_norm": 0.35900336698876534, + "learning_rate": 1.782149450135736e-05, + "loss": 0.2, "step": 5175 }, { - "epoch": 0.3, - "grad_norm": 0.6065999774862946, - "learning_rate": 1.647968095010438e-05, - "loss": 0.4313, + "epoch": 0.24, + "grad_norm": 0.5932827917613999, + "learning_rate": 1.7820567305889228e-05, + "loss": 0.4319, "step": 5176 }, { - "epoch": 0.3, - "grad_norm": 0.3560299438233281, - "learning_rate": 1.6478263441308197e-05, - "loss": 0.2519, + "epoch": 0.24, + "grad_norm": 0.7536754323069328, + "learning_rate": 1.7819639937281267e-05, + "loss": 0.5086, "step": 5177 }, { - "epoch": 0.3, - "grad_norm": 0.4235898813948726, - "learning_rate": 1.6476845708170025e-05, - "loss": 0.3201, + "epoch": 0.24, + "grad_norm": 0.3672583383559498, + "learning_rate": 1.7818712395554017e-05, + "loss": 0.2915, "step": 5178 }, { - "epoch": 0.3, - "grad_norm": 0.7045373601265343, - "learning_rate": 1.647542775073897e-05, - "loss": 0.45, + "epoch": 0.24, + "grad_norm": 0.37015218238611, + "learning_rate": 1.7817784680728013e-05, + "loss": 0.2806, "step": 5179 }, { - "epoch": 0.3, - "grad_norm": 0.41738227489163404, - "learning_rate": 1.647400956906413e-05, - "loss": 0.3245, + "epoch": 0.24, + "grad_norm": 0.36906361554153094, + "learning_rate": 1.7816856792823792e-05, + "loss": 0.1509, "step": 5180 }, { - "epoch": 0.3, - "grad_norm": 0.3797306952733853, - "learning_rate": 1.6472591163194613e-05, - "loss": 0.2625, + "epoch": 0.24, + "grad_norm": 0.45548953693560196, + "learning_rate": 1.781592873186189e-05, + "loss": 0.2952, "step": 5181 }, { - "epoch": 0.3, - "grad_norm": 0.2729805774158222, - "learning_rate": 1.6471172533179545e-05, - "loss": 0.2373, + "epoch": 0.24, + "grad_norm": 0.36677997833233045, + "learning_rate": 1.781500049786286e-05, + "loss": 0.254, "step": 5182 }, { - "epoch": 0.3, - "grad_norm": 0.42957414092760055, - "learning_rate": 1.646975367906805e-05, - "loss": 0.3196, + "epoch": 0.24, + "grad_norm": 0.84822181983163, + "learning_rate": 1.7814072090847258e-05, + "loss": 0.5074, "step": 5183 }, { - "epoch": 0.3, - "grad_norm": 1.040323100353576, - "learning_rate": 1.6468334600909265e-05, - "loss": 0.4741, + "epoch": 0.24, + "grad_norm": 0.43773396133924425, + "learning_rate": 1.7813143510835625e-05, + "loss": 0.3473, "step": 5184 }, { - "epoch": 0.3, - "grad_norm": 0.4346814146354387, - "learning_rate": 1.646691529875233e-05, - "loss": 0.3474, + "epoch": 0.24, + "grad_norm": 0.32204299906052175, + "learning_rate": 1.7812214757848523e-05, + "loss": 0.1848, "step": 5185 }, { - "epoch": 0.3, - "grad_norm": 0.36473624376665237, - "learning_rate": 1.6465495772646395e-05, - "loss": 0.3155, + "epoch": 0.24, + "grad_norm": 0.31811909019134615, + "learning_rate": 1.7811285831906523e-05, + "loss": 0.2868, "step": 5186 }, { - "epoch": 0.3, - "grad_norm": 0.3374164626344427, - "learning_rate": 1.646407602264062e-05, - "loss": 0.2212, + "epoch": 0.24, + "grad_norm": 0.7255452545514577, + "learning_rate": 1.781035673303018e-05, + "loss": 0.4307, "step": 5187 }, { - "epoch": 0.3, - "grad_norm": 0.9076271617776354, - "learning_rate": 1.646265604878417e-05, - "loss": 0.5641, + "epoch": 0.24, + "grad_norm": 0.4352985348854899, + "learning_rate": 1.7809427461240068e-05, + "loss": 0.2599, "step": 5188 }, { - "epoch": 0.3, - "grad_norm": 0.34881563109518027, - "learning_rate": 1.6461235851126217e-05, - "loss": 0.2696, + "epoch": 0.24, + "grad_norm": 0.6703747768051963, + "learning_rate": 1.7808498016556755e-05, + "loss": 0.4372, "step": 5189 }, { - "epoch": 0.3, - "grad_norm": 0.4391047335953409, - "learning_rate": 1.6459815429715947e-05, - "loss": 0.2938, + "epoch": 0.24, + "grad_norm": 0.43465890712938987, + "learning_rate": 1.7807568399000824e-05, + "loss": 0.3044, "step": 5190 }, { - "epoch": 0.3, - "grad_norm": 0.6558038337089468, - "learning_rate": 1.6458394784602548e-05, - "loss": 0.4387, + "epoch": 0.24, + "grad_norm": 0.464771689421088, + "learning_rate": 1.7806638608592855e-05, + "loss": 0.3722, "step": 5191 }, { - "epoch": 0.3, - "grad_norm": 0.3632607082060281, - "learning_rate": 1.6456973915835216e-05, - "loss": 0.3324, + "epoch": 0.24, + "grad_norm": 0.22478242378053584, + "learning_rate": 1.780570864535343e-05, + "loss": 0.0757, "step": 5192 }, { - "epoch": 0.3, - "grad_norm": 0.8758092746992635, - "learning_rate": 1.645555282346315e-05, - "loss": 0.5511, + "epoch": 0.24, + "grad_norm": 0.6888541845162903, + "learning_rate": 1.7804778509303136e-05, + "loss": 0.395, "step": 5193 }, { - "epoch": 0.3, - "grad_norm": 0.25760201740560185, - "learning_rate": 1.645413150753557e-05, - "loss": 0.1809, + "epoch": 0.24, + "grad_norm": 0.35249292241742775, + "learning_rate": 1.7803848200462573e-05, + "loss": 0.311, "step": 5194 }, { - "epoch": 0.3, - "grad_norm": 0.37353304142667226, - "learning_rate": 1.645270996810169e-05, - "loss": 0.2963, + "epoch": 0.24, + "grad_norm": 0.6874776519898484, + "learning_rate": 1.780291771885233e-05, + "loss": 0.3892, "step": 5195 }, { - "epoch": 0.3, - "grad_norm": 1.087573636596399, - "learning_rate": 1.645128820521075e-05, - "loss": 0.7903, + "epoch": 0.24, + "grad_norm": 0.691690643929854, + "learning_rate": 1.780198706449301e-05, + "loss": 0.4044, "step": 5196 }, { - "epoch": 0.3, - "grad_norm": 0.33864893891719766, - "learning_rate": 1.644986621891197e-05, - "loss": 0.281, + "epoch": 0.24, + "grad_norm": 0.3457667817261081, + "learning_rate": 1.7801056237405213e-05, + "loss": 0.2646, "step": 5197 }, { - "epoch": 0.3, - "grad_norm": 0.39698780680277895, - "learning_rate": 1.64484440092546e-05, - "loss": 0.2983, + "epoch": 0.24, + "grad_norm": 0.30394760569388724, + "learning_rate": 1.7800125237609555e-05, + "loss": 0.2033, "step": 5198 }, { - "epoch": 0.3, - "grad_norm": 0.8776485906722473, - "learning_rate": 1.6447021576287893e-05, - "loss": 0.5644, + "epoch": 0.24, + "grad_norm": 0.5951573523861192, + "learning_rate": 1.7799194065126636e-05, + "loss": 0.3548, "step": 5199 }, { - "epoch": 0.3, - "grad_norm": 0.18605182562974185, - "learning_rate": 1.6445598920061104e-05, - "loss": 0.1143, + "epoch": 0.24, + "grad_norm": 0.43618286588987465, + "learning_rate": 1.7798262719977085e-05, + "loss": 0.3162, "step": 5200 }, { - "epoch": 0.3, - "grad_norm": 0.3799839090356079, - "learning_rate": 1.6444176040623506e-05, - "loss": 0.2902, + "epoch": 0.24, + "grad_norm": 0.7338380277663991, + "learning_rate": 1.7797331202181507e-05, + "loss": 0.4763, "step": 5201 }, { - "epoch": 0.3, - "grad_norm": 0.3898545043893843, - "learning_rate": 1.6442752938024367e-05, - "loss": 0.3586, + "epoch": 0.24, + "grad_norm": 0.28877046688066665, + "learning_rate": 1.7796399511760534e-05, + "loss": 0.2282, "step": 5202 }, { - "epoch": 0.3, - "grad_norm": 0.7637000427048503, - "learning_rate": 1.644132961231297e-05, - "loss": 0.3395, + "epoch": 0.24, + "grad_norm": 0.6705501276530482, + "learning_rate": 1.779546764873479e-05, + "loss": 0.4201, "step": 5203 }, { - "epoch": 0.3, - "grad_norm": 0.41850232975506924, - "learning_rate": 1.6439906063538602e-05, - "loss": 0.32, + "epoch": 0.24, + "grad_norm": 0.33688539306247717, + "learning_rate": 1.7794535613124907e-05, + "loss": 0.21, "step": 5204 }, { - "epoch": 0.3, - "grad_norm": 0.5203465807048746, - "learning_rate": 1.6438482291750567e-05, - "loss": 0.4057, + "epoch": 0.24, + "grad_norm": 0.5948832419149802, + "learning_rate": 1.7793603404951514e-05, + "loss": 0.3109, "step": 5205 }, { - "epoch": 0.3, - "grad_norm": 0.2428540144139758, - "learning_rate": 1.6437058296998168e-05, - "loss": 0.1995, + "epoch": 0.24, + "grad_norm": 0.3289715523539774, + "learning_rate": 1.7792671024235256e-05, + "loss": 0.2842, "step": 5206 }, { - "epoch": 0.3, - "grad_norm": 0.3433948371999074, - "learning_rate": 1.643563407933072e-05, - "loss": 0.2046, + "epoch": 0.24, + "grad_norm": 0.7410247293159987, + "learning_rate": 1.779173847099677e-05, + "loss": 0.5316, "step": 5207 }, { - "epoch": 0.3, - "grad_norm": 0.7815039800380226, - "learning_rate": 1.6434209638797535e-05, - "loss": 0.5257, + "epoch": 0.24, + "grad_norm": 0.4336635466877937, + "learning_rate": 1.7790805745256703e-05, + "loss": 0.2624, "step": 5208 }, { - "epoch": 0.3, - "grad_norm": 0.41070430570121524, - "learning_rate": 1.643278497544795e-05, - "loss": 0.3437, + "epoch": 0.24, + "grad_norm": 0.42537288957620334, + "learning_rate": 1.778987284703571e-05, + "loss": 0.2952, "step": 5209 }, { - "epoch": 0.3, - "grad_norm": 0.3379383058006753, - "learning_rate": 1.6431360089331297e-05, - "loss": 0.2553, + "epoch": 0.24, + "grad_norm": 0.3385913774892907, + "learning_rate": 1.778893977635444e-05, + "loss": 0.2628, "step": 5210 }, { - "epoch": 0.3, - "grad_norm": 0.943370279969184, - "learning_rate": 1.642993498049692e-05, - "loss": 0.6497, + "epoch": 0.24, + "grad_norm": 0.322044458872734, + "learning_rate": 1.778800653323355e-05, + "loss": 0.1447, "step": 5211 }, { - "epoch": 0.3, - "grad_norm": 0.2744195839372174, - "learning_rate": 1.6428509648994172e-05, - "loss": 0.1808, + "epoch": 0.24, + "grad_norm": 0.4597256157625955, + "learning_rate": 1.7787073117693697e-05, + "loss": 0.3632, "step": 5212 }, { - "epoch": 0.3, - "grad_norm": 0.2973940784991242, - "learning_rate": 1.6427084094872413e-05, - "loss": 0.2217, + "epoch": 0.24, + "grad_norm": 1.6048530591331966, + "learning_rate": 1.7786139529755556e-05, + "loss": 0.8603, "step": 5213 }, { - "epoch": 0.3, - "grad_norm": 0.5296153676122131, - "learning_rate": 1.6425658318181007e-05, - "loss": 0.4247, + "epoch": 0.24, + "grad_norm": 0.39735037428456554, + "learning_rate": 1.7785205769439787e-05, + "loss": 0.3477, "step": 5214 }, { - "epoch": 0.3, - "grad_norm": 0.7738880370457544, - "learning_rate": 1.6424232318969327e-05, - "loss": 0.4883, + "epoch": 0.24, + "grad_norm": 0.3875795766737469, + "learning_rate": 1.7784271836767066e-05, + "loss": 0.2253, "step": 5215 }, { - "epoch": 0.3, - "grad_norm": 0.3653662339834992, - "learning_rate": 1.642280609728676e-05, - "loss": 0.2146, + "epoch": 0.24, + "grad_norm": 0.3021693707839292, + "learning_rate": 1.778333773175807e-05, + "loss": 0.1854, "step": 5216 }, { - "epoch": 0.3, - "grad_norm": 0.532355787670923, - "learning_rate": 1.6421379653182695e-05, - "loss": 0.3656, + "epoch": 0.24, + "grad_norm": 0.4412982590001609, + "learning_rate": 1.778240345443348e-05, + "loss": 0.3371, "step": 5217 }, { - "epoch": 0.3, - "grad_norm": 0.2788563700674229, - "learning_rate": 1.6419952986706523e-05, - "loss": 0.2249, + "epoch": 0.24, + "grad_norm": 0.3482669023945623, + "learning_rate": 1.7781469004813977e-05, + "loss": 0.2355, "step": 5218 }, { - "epoch": 0.3, - "grad_norm": 0.42198391043135125, - "learning_rate": 1.641852609790766e-05, - "loss": 0.3224, + "epoch": 0.24, + "grad_norm": 0.6817499112254342, + "learning_rate": 1.7780534382920248e-05, + "loss": 0.5357, "step": 5219 }, { - "epoch": 0.3, - "grad_norm": 0.8746102140737043, - "learning_rate": 1.641709898683552e-05, - "loss": 0.3473, + "epoch": 0.24, + "grad_norm": 0.7962834666970865, + "learning_rate": 1.7779599588772987e-05, + "loss": 0.5543, "step": 5220 }, { - "epoch": 0.3, - "grad_norm": 0.4154761127237117, - "learning_rate": 1.641567165353951e-05, - "loss": 0.3684, + "epoch": 0.24, + "grad_norm": 0.34746926440815223, + "learning_rate": 1.7778664622392892e-05, + "loss": 0.2112, "step": 5221 }, { - "epoch": 0.3, - "grad_norm": 0.41962129121867997, - "learning_rate": 1.6414244098069068e-05, - "loss": 0.2641, + "epoch": 0.24, + "grad_norm": 0.39026670054962515, + "learning_rate": 1.777772948380066e-05, + "loss": 0.3483, "step": 5222 }, { - "epoch": 0.3, - "grad_norm": 0.46239702845240743, - "learning_rate": 1.641281632047363e-05, - "loss": 0.2797, + "epoch": 0.24, + "grad_norm": 0.7360276453317811, + "learning_rate": 1.777679417301699e-05, + "loss": 0.4678, "step": 5223 }, { - "epoch": 0.3, - "grad_norm": 0.34177914074807497, - "learning_rate": 1.6411388320802637e-05, - "loss": 0.2041, + "epoch": 0.24, + "grad_norm": 0.23411944466051962, + "learning_rate": 1.7775858690062593e-05, + "loss": 0.1521, "step": 5224 }, { - "epoch": 0.3, - "grad_norm": 0.3992637612794155, - "learning_rate": 1.6409960099105543e-05, - "loss": 0.2882, + "epoch": 0.24, + "grad_norm": 0.4210616976621871, + "learning_rate": 1.777492303495818e-05, + "loss": 0.3326, "step": 5225 }, { - "epoch": 0.3, - "grad_norm": 0.7002086710277914, - "learning_rate": 1.6408531655431806e-05, - "loss": 0.3284, + "epoch": 0.24, + "grad_norm": 1.2618000448733846, + "learning_rate": 1.7773987207724467e-05, + "loss": 0.6404, "step": 5226 }, { - "epoch": 0.3, - "grad_norm": 1.3006242151336662, - "learning_rate": 1.6407102989830894e-05, - "loss": 0.7902, + "epoch": 0.24, + "grad_norm": 0.38689482125096913, + "learning_rate": 1.7773051208382167e-05, + "loss": 0.2642, "step": 5227 }, { - "epoch": 0.3, - "grad_norm": 0.4334420186583708, - "learning_rate": 1.640567410235228e-05, - "loss": 0.2615, + "epoch": 0.24, + "grad_norm": 0.8526791239695162, + "learning_rate": 1.7772115036952002e-05, + "loss": 0.3557, "step": 5228 }, { - "epoch": 0.3, - "grad_norm": 0.6111767264288636, - "learning_rate": 1.6404244993045447e-05, - "loss": 0.2422, + "epoch": 0.24, + "grad_norm": 0.40293118092347097, + "learning_rate": 1.7771178693454703e-05, + "loss": 0.3249, "step": 5229 }, { - "epoch": 0.3, - "grad_norm": 0.30863707616793146, - "learning_rate": 1.6402815661959886e-05, - "loss": 0.227, + "epoch": 0.24, + "grad_norm": 0.2879482304325263, + "learning_rate": 1.7770242177911e-05, + "loss": 0.2228, "step": 5230 }, { - "epoch": 0.3, - "grad_norm": 0.38996995551842206, - "learning_rate": 1.6401386109145098e-05, - "loss": 0.291, + "epoch": 0.24, + "grad_norm": 0.26981264352907014, + "learning_rate": 1.7769305490341623e-05, + "loss": 0.1243, "step": 5231 }, { - "epoch": 0.3, - "grad_norm": 0.9864271562835062, - "learning_rate": 1.639995633465058e-05, - "loss": 0.4252, + "epoch": 0.24, + "grad_norm": 0.725667607709296, + "learning_rate": 1.7768368630767313e-05, + "loss": 0.4623, "step": 5232 }, { - "epoch": 0.3, - "grad_norm": 0.5006342644476464, - "learning_rate": 1.6398526338525852e-05, - "loss": 0.3058, + "epoch": 0.24, + "grad_norm": 0.41243550211453733, + "learning_rate": 1.7767431599208803e-05, + "loss": 0.2959, "step": 5233 }, { - "epoch": 0.3, - "grad_norm": 0.3515919149907774, - "learning_rate": 1.639709612082043e-05, - "loss": 0.2666, + "epoch": 0.24, + "grad_norm": 0.4381323487096433, + "learning_rate": 1.776649439568685e-05, + "loss": 0.2991, "step": 5234 }, { - "epoch": 0.3, - "grad_norm": 0.9812781991271189, - "learning_rate": 1.6395665681583842e-05, - "loss": 0.5665, + "epoch": 0.24, + "grad_norm": 0.6675486121232714, + "learning_rate": 1.7765557020222194e-05, + "loss": 0.4056, "step": 5235 }, { - "epoch": 0.3, - "grad_norm": 0.5594165984678707, - "learning_rate": 1.639423502086563e-05, - "loss": 0.3173, + "epoch": 0.24, + "grad_norm": 0.4096918069581638, + "learning_rate": 1.7764619472835588e-05, + "loss": 0.3086, "step": 5236 }, { - "epoch": 0.3, - "grad_norm": 0.4214927341840395, - "learning_rate": 1.6392804138715334e-05, - "loss": 0.28, + "epoch": 0.24, + "grad_norm": 0.27266475142535906, + "learning_rate": 1.7763681753547793e-05, + "loss": 0.1912, "step": 5237 }, { - "epoch": 0.3, - "grad_norm": 0.368125224933628, - "learning_rate": 1.6391373035182506e-05, - "loss": 0.296, + "epoch": 0.24, + "grad_norm": 0.9210119739105295, + "learning_rate": 1.7762743862379568e-05, + "loss": 0.5605, "step": 5238 }, { - "epoch": 0.3, - "grad_norm": 0.29769042288686703, - "learning_rate": 1.6389941710316703e-05, - "loss": 0.183, + "epoch": 0.24, + "grad_norm": 0.42080875466573603, + "learning_rate": 1.7761805799351674e-05, + "loss": 0.2861, "step": 5239 }, { - "epoch": 0.3, - "grad_norm": 0.42317068403499525, - "learning_rate": 1.6388510164167492e-05, - "loss": 0.2838, + "epoch": 0.24, + "grad_norm": 0.8105802284617355, + "learning_rate": 1.7760867564484878e-05, + "loss": 0.4493, "step": 5240 }, { - "epoch": 0.3, - "grad_norm": 0.38688120862100955, - "learning_rate": 1.6387078396784447e-05, - "loss": 0.317, + "epoch": 0.24, + "grad_norm": 0.39538362129150423, + "learning_rate": 1.7759929157799956e-05, + "loss": 0.2679, "step": 5241 }, { - "epoch": 0.3, - "grad_norm": 0.9506995811934351, - "learning_rate": 1.6385646408217158e-05, - "loss": 0.492, + "epoch": 0.24, + "grad_norm": 0.3705966075020011, + "learning_rate": 1.7758990579317684e-05, + "loss": 0.2781, "step": 5242 }, { - "epoch": 0.3, - "grad_norm": 0.3817544458955061, - "learning_rate": 1.638421419851521e-05, - "loss": 0.261, + "epoch": 0.24, + "grad_norm": 0.33612533562172037, + "learning_rate": 1.7758051829058835e-05, + "loss": 0.2386, "step": 5243 }, { - "epoch": 0.3, - "grad_norm": 0.3240206473375033, - "learning_rate": 1.638278176772819e-05, - "loss": 0.2811, + "epoch": 0.24, + "grad_norm": 1.1810369821346143, + "learning_rate": 1.77571129070442e-05, + "loss": 0.4231, "step": 5244 }, { - "epoch": 0.3, - "grad_norm": 0.3847891589500509, - "learning_rate": 1.6381349115905718e-05, - "loss": 0.3365, + "epoch": 0.24, + "grad_norm": 0.3659901356406624, + "learning_rate": 1.775617381329456e-05, + "loss": 0.3135, "step": 5245 }, { - "epoch": 0.3, - "grad_norm": 0.3515846322925854, - "learning_rate": 1.6379916243097398e-05, - "loss": 0.2199, + "epoch": 0.24, + "grad_norm": 0.46470476029951874, + "learning_rate": 1.7755234547830707e-05, + "loss": 0.3589, "step": 5246 }, { - "epoch": 0.3, - "grad_norm": 0.4147502570911691, - "learning_rate": 1.6378483149352857e-05, - "loss": 0.2787, + "epoch": 0.24, + "grad_norm": 1.0560709295396808, + "learning_rate": 1.7754295110673433e-05, + "loss": 0.2742, "step": 5247 }, { - "epoch": 0.3, - "grad_norm": 0.7085441353078938, - "learning_rate": 1.6377049834721713e-05, - "loss": 0.4585, + "epoch": 0.24, + "grad_norm": 0.314447804831735, + "learning_rate": 1.7753355501843544e-05, + "loss": 0.2245, "step": 5248 }, { - "epoch": 0.3, - "grad_norm": 0.3256721748852568, - "learning_rate": 1.637561629925361e-05, - "loss": 0.2384, + "epoch": 0.24, + "grad_norm": 0.419363263326942, + "learning_rate": 1.7752415721361834e-05, + "loss": 0.2283, "step": 5249 }, { - "epoch": 0.3, - "grad_norm": 0.850566415646217, - "learning_rate": 1.637418254299819e-05, - "loss": 0.5317, + "epoch": 0.24, + "grad_norm": 0.49720558314543606, + "learning_rate": 1.775147576924911e-05, + "loss": 0.3053, "step": 5250 }, { - "epoch": 0.3, - "grad_norm": 0.4374892514913494, - "learning_rate": 1.63727485660051e-05, - "loss": 0.3344, + "epoch": 0.24, + "grad_norm": 0.35826283630409983, + "learning_rate": 1.7750535645526185e-05, + "loss": 0.2912, "step": 5251 }, { - "epoch": 0.3, - "grad_norm": 0.24873000470380066, - "learning_rate": 1.6371314368324002e-05, - "loss": 0.136, + "epoch": 0.24, + "grad_norm": 0.7105411017383069, + "learning_rate": 1.7749595350213873e-05, + "loss": 0.5077, "step": 5252 }, { - "epoch": 0.3, - "grad_norm": 0.3694156067809314, - "learning_rate": 1.6369879950004564e-05, - "loss": 0.3051, + "epoch": 0.24, + "grad_norm": 0.4045070288356002, + "learning_rate": 1.7748654883332984e-05, + "loss": 0.3103, "step": 5253 }, { - "epoch": 0.3, - "grad_norm": 0.8041414855390386, - "learning_rate": 1.6368445311096452e-05, - "loss": 0.4433, + "epoch": 0.24, + "grad_norm": 0.3252566904611039, + "learning_rate": 1.7747714244904348e-05, + "loss": 0.1992, "step": 5254 }, { - "epoch": 0.3, - "grad_norm": 0.5618816209689073, - "learning_rate": 1.6367010451649357e-05, - "loss": 0.3546, + "epoch": 0.24, + "grad_norm": 0.5219822961456272, + "learning_rate": 1.7746773434948786e-05, + "loss": 0.3132, "step": 5255 }, { - "epoch": 0.3, - "grad_norm": 0.39348874710374027, - "learning_rate": 1.636557537171296e-05, - "loss": 0.2843, + "epoch": 0.24, + "grad_norm": 0.8543340044476929, + "learning_rate": 1.7745832453487128e-05, + "loss": 0.4871, "step": 5256 }, { - "epoch": 0.3, - "grad_norm": 0.3829165507517021, - "learning_rate": 1.6364140071336967e-05, - "loss": 0.3449, + "epoch": 0.24, + "grad_norm": 0.3374603771074712, + "learning_rate": 1.7744891300540204e-05, + "loss": 0.2129, "step": 5257 }, { - "epoch": 0.3, - "grad_norm": 0.3896253368807964, - "learning_rate": 1.6362704550571073e-05, - "loss": 0.2402, + "epoch": 0.24, + "grad_norm": 0.32247186276274276, + "learning_rate": 1.7743949976128848e-05, + "loss": 0.3165, "step": 5258 }, { - "epoch": 0.3, - "grad_norm": 0.2660992170071291, - "learning_rate": 1.6361268809464998e-05, - "loss": 0.2279, + "epoch": 0.24, + "grad_norm": 1.037241230234345, + "learning_rate": 1.774300848027391e-05, + "loss": 0.5874, "step": 5259 }, { - "epoch": 0.3, - "grad_norm": 0.7432002175469005, - "learning_rate": 1.6359832848068455e-05, - "loss": 0.4667, + "epoch": 0.24, + "grad_norm": 0.3553030391734681, + "learning_rate": 1.774206681299622e-05, + "loss": 0.2151, "step": 5260 }, { - "epoch": 0.3, - "grad_norm": 0.33013913089948044, - "learning_rate": 1.6358396666431176e-05, - "loss": 0.269, + "epoch": 0.24, + "grad_norm": 0.3177485099226369, + "learning_rate": 1.7741124974316633e-05, + "loss": 0.2668, "step": 5261 }, { - "epoch": 0.3, - "grad_norm": 0.39504619567062776, - "learning_rate": 1.635696026460289e-05, - "loss": 0.2618, + "epoch": 0.24, + "grad_norm": 0.9190638380058797, + "learning_rate": 1.7740182964256006e-05, + "loss": 0.5113, "step": 5262 }, { - "epoch": 0.3, - "grad_norm": 1.2763020879980969, - "learning_rate": 1.6355523642633346e-05, - "loss": 0.832, + "epoch": 0.24, + "grad_norm": 0.3607663271359514, + "learning_rate": 1.7739240782835182e-05, + "loss": 0.2741, "step": 5263 }, { - "epoch": 0.3, - "grad_norm": 0.23495958054537083, - "learning_rate": 1.6354086800572287e-05, - "loss": 0.1672, + "epoch": 0.24, + "grad_norm": 0.4515247262454211, + "learning_rate": 1.773829843007503e-05, + "loss": 0.3381, "step": 5264 }, { - "epoch": 0.3, - "grad_norm": 0.28905131340359264, - "learning_rate": 1.635264973846948e-05, - "loss": 0.2705, + "epoch": 0.24, + "grad_norm": 0.42496545092196186, + "learning_rate": 1.7737355905996406e-05, + "loss": 0.3362, "step": 5265 }, { - "epoch": 0.3, - "grad_norm": 0.7167395323464834, - "learning_rate": 1.6351212456374684e-05, - "loss": 0.4445, + "epoch": 0.24, + "grad_norm": 0.4016495691264929, + "learning_rate": 1.773641321062018e-05, + "loss": 0.2628, "step": 5266 }, { - "epoch": 0.3, - "grad_norm": 0.4054608197750515, - "learning_rate": 1.6349774954337676e-05, - "loss": 0.3239, + "epoch": 0.24, + "grad_norm": 0.9906745612945159, + "learning_rate": 1.7735470343967226e-05, + "loss": 0.4214, "step": 5267 }, { - "epoch": 0.3, - "grad_norm": 0.5343281535109453, - "learning_rate": 1.6348337232408235e-05, - "loss": 0.3897, + "epoch": 0.24, + "grad_norm": 0.7190889771795383, + "learning_rate": 1.773452730605841e-05, + "loss": 0.4796, "step": 5268 }, { - "epoch": 0.3, - "grad_norm": 0.3590528787248728, - "learning_rate": 1.6346899290636145e-05, - "loss": 0.288, + "epoch": 0.24, + "grad_norm": 0.3617645310747818, + "learning_rate": 1.773358409691462e-05, + "loss": 0.2783, "step": 5269 }, { - "epoch": 0.3, - "grad_norm": 0.3249108619545788, - "learning_rate": 1.6345461129071207e-05, - "loss": 0.2352, + "epoch": 0.24, + "grad_norm": 0.2685715013075121, + "learning_rate": 1.7732640716556724e-05, + "loss": 0.1528, "step": 5270 }, { - "epoch": 0.3, - "grad_norm": 0.5694464956960685, - "learning_rate": 1.6344022747763225e-05, - "loss": 0.3867, + "epoch": 0.24, + "grad_norm": 0.9698734634096667, + "learning_rate": 1.773169716500562e-05, + "loss": 0.5998, "step": 5271 }, { - "epoch": 0.3, - "grad_norm": 0.3305233715465694, - "learning_rate": 1.6342584146762005e-05, - "loss": 0.2424, - "step": 5272 + "epoch": 0.24, + "grad_norm": 0.4426938051822305, + "learning_rate": 1.773075344228219e-05, + "loss": 0.304, + "step": 5272 }, { - "epoch": 0.3, - "grad_norm": 0.5162607918437077, - "learning_rate": 1.634114532611737e-05, - "loss": 0.3408, + "epoch": 0.24, + "grad_norm": 0.37919730133409146, + "learning_rate": 1.7729809548407333e-05, + "loss": 0.2818, "step": 5273 }, { - "epoch": 0.3, - "grad_norm": 0.40391491962918796, - "learning_rate": 1.6339706285879144e-05, - "loss": 0.3143, + "epoch": 0.24, + "grad_norm": 0.9676076174283644, + "learning_rate": 1.772886548340194e-05, + "loss": 0.4784, "step": 5274 }, { - "epoch": 0.3, - "grad_norm": 0.9206819584778668, - "learning_rate": 1.6338267026097162e-05, - "loss": 0.5015, + "epoch": 0.24, + "grad_norm": 0.4434788850840855, + "learning_rate": 1.7727921247286916e-05, + "loss": 0.2774, "step": 5275 }, { - "epoch": 0.3, - "grad_norm": 0.4979518942637146, - "learning_rate": 1.633682754682127e-05, - "loss": 0.3239, + "epoch": 0.24, + "grad_norm": 0.259057207680575, + "learning_rate": 1.7726976840083163e-05, + "loss": 0.1555, "step": 5276 }, { - "epoch": 0.3, - "grad_norm": 0.3491657958787403, - "learning_rate": 1.6335387848101307e-05, - "loss": 0.3165, + "epoch": 0.24, + "grad_norm": 0.4464882984457147, + "learning_rate": 1.772603226181159e-05, + "loss": 0.351, "step": 5277 }, { - "epoch": 0.3, - "grad_norm": 0.28912270831094544, - "learning_rate": 1.6333947929987137e-05, - "loss": 0.177, + "epoch": 0.24, + "grad_norm": 0.48389635229113404, + "learning_rate": 1.7725087512493112e-05, + "loss": 0.3329, "step": 5278 }, { - "epoch": 0.3, - "grad_norm": 0.5085041190375486, - "learning_rate": 1.6332507792528626e-05, - "loss": 0.3688, + "epoch": 0.24, + "grad_norm": 0.6150766030173602, + "learning_rate": 1.7724142592148638e-05, + "loss": 0.4035, "step": 5279 }, { - "epoch": 0.3, - "grad_norm": 0.5023828501731868, - "learning_rate": 1.633106743577564e-05, - "loss": 0.3515, + "epoch": 0.24, + "grad_norm": 1.0467302687463638, + "learning_rate": 1.7723197500799094e-05, + "loss": 0.4174, "step": 5280 }, { - "epoch": 0.3, - "grad_norm": 0.4313172948281927, - "learning_rate": 1.6329626859778057e-05, - "loss": 0.3235, + "epoch": 0.24, + "grad_norm": 0.32914122859711714, + "learning_rate": 1.77222522384654e-05, + "loss": 0.2756, "step": 5281 }, { - "epoch": 0.3, - "grad_norm": 0.4878188145202696, - "learning_rate": 1.632818606458577e-05, - "loss": 0.2731, + "epoch": 0.24, + "grad_norm": 0.29164539262552425, + "learning_rate": 1.7721306805168482e-05, + "loss": 0.2218, "step": 5282 }, { - "epoch": 0.3, - "grad_norm": 0.39398917324063887, - "learning_rate": 1.6326745050248675e-05, - "loss": 0.3514, + "epoch": 0.24, + "grad_norm": 1.0002549553393807, + "learning_rate": 1.7720361200929277e-05, + "loss": 0.2734, "step": 5283 }, { - "epoch": 0.3, - "grad_norm": 0.3186584290148543, - "learning_rate": 1.632530381681667e-05, - "loss": 0.1892, + "epoch": 0.24, + "grad_norm": 0.526273043712311, + "learning_rate": 1.7719415425768717e-05, + "loss": 0.3203, "step": 5284 }, { - "epoch": 0.3, - "grad_norm": 0.2990432265997818, - "learning_rate": 1.6323862364339663e-05, - "loss": 0.2251, + "epoch": 0.24, + "grad_norm": 0.48531547345330445, + "learning_rate": 1.7718469479707736e-05, + "loss": 0.3528, "step": 5285 }, { - "epoch": 0.3, - "grad_norm": 0.5905438488593204, - "learning_rate": 1.6322420692867577e-05, - "loss": 0.3983, + "epoch": 0.24, + "grad_norm": 0.7371777220393582, + "learning_rate": 1.771752336276728e-05, + "loss": 0.3691, "step": 5286 }, { - "epoch": 0.3, - "grad_norm": 0.7228437691044046, - "learning_rate": 1.632097880245033e-05, - "loss": 0.585, + "epoch": 0.24, + "grad_norm": 0.39809046630610884, + "learning_rate": 1.7716577074968294e-05, + "loss": 0.3049, "step": 5287 }, { - "epoch": 0.3, - "grad_norm": 0.3268212611120316, - "learning_rate": 1.6319536693137862e-05, - "loss": 0.2669, + "epoch": 0.24, + "grad_norm": 0.39300245671791856, + "learning_rate": 1.7715630616331732e-05, + "loss": 0.2478, "step": 5288 }, { - "epoch": 0.3, - "grad_norm": 0.40593698432909653, - "learning_rate": 1.631809436498011e-05, - "loss": 0.3245, + "epoch": 0.24, + "grad_norm": 0.30475467441553045, + "learning_rate": 1.7714683986878546e-05, + "loss": 0.2109, "step": 5289 }, { - "epoch": 0.3, - "grad_norm": 0.2983396151975038, - "learning_rate": 1.6316651818027024e-05, - "loss": 0.1933, + "epoch": 0.24, + "grad_norm": 0.4939294644596549, + "learning_rate": 1.771373718662969e-05, + "loss": 0.298, "step": 5290 }, { - "epoch": 0.3, - "grad_norm": 0.47509655229133435, - "learning_rate": 1.6315209052328554e-05, - "loss": 0.1699, + "epoch": 0.24, + "grad_norm": 0.49380525786565277, + "learning_rate": 1.7712790215606125e-05, + "loss": 0.4032, "step": 5291 }, { - "epoch": 0.3, - "grad_norm": 0.46728137226170324, - "learning_rate": 1.6313766067934668e-05, - "loss": 0.3261, + "epoch": 0.24, + "grad_norm": 0.4937465102377239, + "learning_rate": 1.771184307382882e-05, + "loss": 0.4249, "step": 5292 }, { - "epoch": 0.3, - "grad_norm": 0.3831760523182167, - "learning_rate": 1.6312322864895334e-05, - "loss": 0.3321, + "epoch": 0.24, + "grad_norm": 0.3509530922577548, + "learning_rate": 1.7710895761318742e-05, + "loss": 0.2236, "step": 5293 }, { - "epoch": 0.3, - "grad_norm": 0.70358765871688, - "learning_rate": 1.631087944326053e-05, - "loss": 0.4898, + "epoch": 0.24, + "grad_norm": 0.3606802472398584, + "learning_rate": 1.7709948278096864e-05, + "loss": 0.2475, "step": 5294 }, { - "epoch": 0.3, - "grad_norm": 0.33376431508160115, - "learning_rate": 1.6309435803080244e-05, - "loss": 0.2051, + "epoch": 0.24, + "grad_norm": 0.4441923229747742, + "learning_rate": 1.7709000624184162e-05, + "loss": 0.2267, "step": 5295 }, { - "epoch": 0.3, - "grad_norm": 0.2822733219097903, - "learning_rate": 1.6307991944404466e-05, - "loss": 0.2165, + "epoch": 0.24, + "grad_norm": 0.3730345544866859, + "learning_rate": 1.7708052799601616e-05, + "loss": 0.2222, "step": 5296 }, { - "epoch": 0.3, - "grad_norm": 0.4344437630599208, - "learning_rate": 1.6306547867283197e-05, - "loss": 0.3403, + "epoch": 0.24, + "grad_norm": 0.3338858572538388, + "learning_rate": 1.770710480437021e-05, + "loss": 0.3307, "step": 5297 }, { - "epoch": 0.3, - "grad_norm": 0.3531370933554759, - "learning_rate": 1.630510357176645e-05, - "loss": 0.2542, + "epoch": 0.24, + "grad_norm": 0.8327284263440696, + "learning_rate": 1.770615663851093e-05, + "loss": 0.5879, "step": 5298 }, { - "epoch": 0.3, - "grad_norm": 0.8157810100988, - "learning_rate": 1.6303659057904232e-05, - "loss": 0.4894, + "epoch": 0.24, + "grad_norm": 0.36181070825841877, + "learning_rate": 1.7705208302044773e-05, + "loss": 0.2201, "step": 5299 }, { - "epoch": 0.3, - "grad_norm": 0.33867142000505207, - "learning_rate": 1.6302214325746577e-05, - "loss": 0.336, + "epoch": 0.24, + "grad_norm": 0.3145369428981611, + "learning_rate": 1.7704259794992734e-05, + "loss": 0.21, "step": 5300 }, { - "epoch": 0.3, - "grad_norm": 0.3516250686888519, - "learning_rate": 1.6300769375343508e-05, - "loss": 0.2335, + "epoch": 0.24, + "grad_norm": 0.40542346749301694, + "learning_rate": 1.7703311117375802e-05, + "loss": 0.3475, "step": 5301 }, { - "epoch": 0.3, - "grad_norm": 0.4404752724035265, - "learning_rate": 1.6299324206745066e-05, - "loss": 0.2584, + "epoch": 0.24, + "grad_norm": 0.3542386300371763, + "learning_rate": 1.7702362269214987e-05, + "loss": 0.2645, "step": 5302 }, { - "epoch": 0.3, - "grad_norm": 0.3540326459594793, - "learning_rate": 1.6297878820001302e-05, - "loss": 0.2618, + "epoch": 0.24, + "grad_norm": 0.5477915690020112, + "learning_rate": 1.7701413250531297e-05, + "loss": 0.3965, "step": 5303 }, { - "epoch": 0.3, - "grad_norm": 0.3477074044921296, - "learning_rate": 1.6296433215162258e-05, - "loss": 0.2255, + "epoch": 0.24, + "grad_norm": 1.0874136735992839, + "learning_rate": 1.770046406134574e-05, + "loss": 0.7622, "step": 5304 }, { - "epoch": 0.3, - "grad_norm": 0.444796893845932, - "learning_rate": 1.629498739227801e-05, - "loss": 0.3526, + "epoch": 0.24, + "grad_norm": 0.35738715186187187, + "learning_rate": 1.769951470167933e-05, + "loss": 0.2956, "step": 5305 }, { - "epoch": 0.3, - "grad_norm": 0.5521271885013249, - "learning_rate": 1.6293541351398616e-05, - "loss": 0.3678, + "epoch": 0.24, + "grad_norm": 0.3577696108706678, + "learning_rate": 1.7698565171553084e-05, + "loss": 0.2537, "step": 5306 }, { - "epoch": 0.3, - "grad_norm": 0.43164109413325197, - "learning_rate": 1.6292095092574154e-05, - "loss": 0.3293, + "epoch": 0.24, + "grad_norm": 0.2835751065905806, + "learning_rate": 1.7697615470988028e-05, + "loss": 0.2337, "step": 5307 }, { - "epoch": 0.3, - "grad_norm": 0.37843319285317356, - "learning_rate": 1.6290648615854712e-05, - "loss": 0.249, + "epoch": 0.24, + "grad_norm": 0.5011495333605145, + "learning_rate": 1.769666560000518e-05, + "loss": 0.3584, "step": 5308 }, { - "epoch": 0.31, - "grad_norm": 0.25288135816628304, - "learning_rate": 1.6289201921290377e-05, - "loss": 0.1774, + "epoch": 0.24, + "grad_norm": 0.35434548498309154, + "learning_rate": 1.769571555862558e-05, + "loss": 0.2608, "step": 5309 }, { - "epoch": 0.31, - "grad_norm": 0.335350277217877, - "learning_rate": 1.6287755008931255e-05, - "loss": 0.2844, + "epoch": 0.24, + "grad_norm": 1.4187300353634535, + "learning_rate": 1.769476534687025e-05, + "loss": 0.8306, "step": 5310 }, { - "epoch": 0.31, - "grad_norm": 0.7224538732310122, - "learning_rate": 1.6286307878827443e-05, - "loss": 0.4002, + "epoch": 0.24, + "grad_norm": 0.8124537410002787, + "learning_rate": 1.7693814964760232e-05, + "loss": 0.4306, "step": 5311 }, { - "epoch": 0.31, - "grad_norm": 0.4249497381815193, - "learning_rate": 1.6284860531029062e-05, - "loss": 0.342, + "epoch": 0.24, + "grad_norm": 0.3232773986465274, + "learning_rate": 1.769286441231657e-05, + "loss": 0.1941, "step": 5312 }, { - "epoch": 0.31, - "grad_norm": 0.37589361249803893, - "learning_rate": 1.6283412965586227e-05, - "loss": 0.2856, + "epoch": 0.24, + "grad_norm": 0.39943158492088676, + "learning_rate": 1.7691913689560298e-05, + "loss": 0.3028, "step": 5313 }, { - "epoch": 0.31, - "grad_norm": 0.32777900712701974, - "learning_rate": 1.6281965182549077e-05, - "loss": 0.1539, + "epoch": 0.24, + "grad_norm": 0.759021070553651, + "learning_rate": 1.7690962796512473e-05, + "loss": 0.4193, "step": 5314 }, { - "epoch": 0.31, - "grad_norm": 0.46467288740435403, - "learning_rate": 1.6280517181967733e-05, - "loss": 0.3337, + "epoch": 0.24, + "grad_norm": 0.37068074210510876, + "learning_rate": 1.7690011733194147e-05, + "loss": 0.2606, "step": 5315 }, { - "epoch": 0.31, - "grad_norm": 0.302574483899151, - "learning_rate": 1.6279068963892358e-05, - "loss": 0.2726, + "epoch": 0.24, + "grad_norm": 1.4859877427662964, + "learning_rate": 1.7689060499626372e-05, + "loss": 0.7402, "step": 5316 }, { - "epoch": 0.31, - "grad_norm": 0.5464579445728996, - "learning_rate": 1.6277620528373094e-05, - "loss": 0.3922, + "epoch": 0.24, + "grad_norm": 0.3588662416475269, + "learning_rate": 1.768810909583021e-05, + "loss": 0.2907, "step": 5317 }, { - "epoch": 0.31, - "grad_norm": 0.6586890435544829, - "learning_rate": 1.6276171875460097e-05, - "loss": 0.3371, + "epoch": 0.24, + "grad_norm": 0.49581248611612044, + "learning_rate": 1.7687157521826717e-05, + "loss": 0.3456, "step": 5318 }, { - "epoch": 0.31, - "grad_norm": 0.4057018760179316, - "learning_rate": 1.6274723005203542e-05, - "loss": 0.2928, + "epoch": 0.24, + "grad_norm": 1.081077636446496, + "learning_rate": 1.7686205777636968e-05, + "loss": 0.3914, "step": 5319 }, { - "epoch": 0.31, - "grad_norm": 0.5354467630028948, - "learning_rate": 1.6273273917653596e-05, - "loss": 0.3619, + "epoch": 0.24, + "grad_norm": 0.4724669378778735, + "learning_rate": 1.7685253863282034e-05, + "loss": 0.2636, "step": 5320 }, { - "epoch": 0.31, - "grad_norm": 0.24525178033362732, - "learning_rate": 1.6271824612860445e-05, - "loss": 0.1796, + "epoch": 0.24, + "grad_norm": 0.4383230350439651, + "learning_rate": 1.7684301778782985e-05, + "loss": 0.254, "step": 5321 }, { - "epoch": 0.31, - "grad_norm": 0.3806740733460614, - "learning_rate": 1.6270375090874276e-05, - "loss": 0.2955, + "epoch": 0.24, + "grad_norm": 1.1850448914299558, + "learning_rate": 1.76833495241609e-05, + "loss": 0.5776, "step": 5322 }, { - "epoch": 0.31, - "grad_norm": 1.016911246302538, - "learning_rate": 1.626892535174529e-05, - "loss": 0.4905, + "epoch": 0.24, + "grad_norm": 0.7103752962802321, + "learning_rate": 1.768239709943686e-05, + "loss": 0.4159, "step": 5323 }, { - "epoch": 0.31, - "grad_norm": 0.39313652227601653, - "learning_rate": 1.6267475395523686e-05, - "loss": 0.265, + "epoch": 0.24, + "grad_norm": 0.4637714608330094, + "learning_rate": 1.7681444504631954e-05, + "loss": 0.3284, "step": 5324 }, { - "epoch": 0.31, - "grad_norm": 0.42444131470135105, - "learning_rate": 1.626602522225968e-05, - "loss": 0.31, + "epoch": 0.24, + "grad_norm": 0.7745097220540436, + "learning_rate": 1.768049173976727e-05, + "loss": 0.3336, "step": 5325 }, { - "epoch": 0.31, - "grad_norm": 1.1439901110836537, - "learning_rate": 1.6264574832003492e-05, - "loss": 0.704, + "epoch": 0.24, + "grad_norm": 0.30202984267597993, + "learning_rate": 1.7679538804863903e-05, + "loss": 0.168, "step": 5326 }, { - "epoch": 0.31, - "grad_norm": 0.3857249796155631, - "learning_rate": 1.6263124224805345e-05, - "loss": 0.1889, + "epoch": 0.24, + "grad_norm": 0.45594436122596893, + "learning_rate": 1.7678585699942948e-05, + "loss": 0.3128, "step": 5327 }, { - "epoch": 0.31, - "grad_norm": 0.3412589948287796, - "learning_rate": 1.6261673400715475e-05, - "loss": 0.2426, + "epoch": 0.24, + "grad_norm": 0.5566839352451198, + "learning_rate": 1.7677632425025506e-05, + "loss": 0.4382, "step": 5328 }, { - "epoch": 0.31, - "grad_norm": 0.5851866759927743, - "learning_rate": 1.6260222359784123e-05, - "loss": 0.4064, + "epoch": 0.24, + "grad_norm": 0.39505831684370435, + "learning_rate": 1.7676678980132682e-05, + "loss": 0.2559, "step": 5329 }, { - "epoch": 0.31, - "grad_norm": 1.3336728934135882, - "learning_rate": 1.6258771102061543e-05, - "loss": 0.8571, + "epoch": 0.24, + "grad_norm": 0.40993040256619195, + "learning_rate": 1.7675725365285584e-05, + "loss": 0.31, "step": 5330 }, { - "epoch": 0.31, - "grad_norm": 0.3092169640199647, - "learning_rate": 1.6257319627597986e-05, - "loss": 0.2259, + "epoch": 0.24, + "grad_norm": 0.7886156900093502, + "learning_rate": 1.7674771580505322e-05, + "loss": 0.483, "step": 5331 }, { - "epoch": 0.31, - "grad_norm": 0.4063809902015279, - "learning_rate": 1.6255867936443724e-05, - "loss": 0.3177, + "epoch": 0.24, + "grad_norm": 0.36137530796608164, + "learning_rate": 1.767381762581302e-05, + "loss": 0.1697, "step": 5332 }, { - "epoch": 0.31, - "grad_norm": 0.7712406027659143, - "learning_rate": 1.625441602864902e-05, - "loss": 0.485, + "epoch": 0.24, + "grad_norm": 0.3543864315454361, + "learning_rate": 1.7672863501229785e-05, + "loss": 0.2562, "step": 5333 }, { - "epoch": 0.31, - "grad_norm": 0.3238377010953509, - "learning_rate": 1.625296390426416e-05, - "loss": 0.2591, + "epoch": 0.25, + "grad_norm": 1.3811676388884337, + "learning_rate": 1.7671909206776755e-05, + "loss": 0.7972, "step": 5334 }, { - "epoch": 0.31, - "grad_norm": 0.3062860851213545, - "learning_rate": 1.6251511563339426e-05, - "loss": 0.1903, + "epoch": 0.25, + "grad_norm": 0.6804625592687791, + "learning_rate": 1.7670954742475043e-05, + "loss": 0.3052, "step": 5335 }, { - "epoch": 0.31, - "grad_norm": 0.42276890338671497, - "learning_rate": 1.6250059005925117e-05, - "loss": 0.3612, + "epoch": 0.25, + "grad_norm": 0.36262471820811604, + "learning_rate": 1.7670000108345792e-05, + "loss": 0.3055, "step": 5336 }, { - "epoch": 0.31, - "grad_norm": 0.3259165375372027, - "learning_rate": 1.6248606232071536e-05, - "loss": 0.2178, + "epoch": 0.25, + "grad_norm": 0.5399199708988571, + "learning_rate": 1.7669045304410125e-05, + "loss": 0.391, "step": 5337 }, { - "epoch": 0.31, - "grad_norm": 1.165783767802092, - "learning_rate": 1.6247153241828985e-05, - "loss": 0.6111, + "epoch": 0.25, + "grad_norm": 0.4891472477728291, + "learning_rate": 1.766809033068919e-05, + "loss": 0.1823, "step": 5338 }, { - "epoch": 0.31, - "grad_norm": 0.71001471508266, - "learning_rate": 1.624570003524779e-05, - "loss": 0.3921, + "epoch": 0.25, + "grad_norm": 0.3811185111368319, + "learning_rate": 1.7667135187204122e-05, + "loss": 0.2745, "step": 5339 }, { - "epoch": 0.31, - "grad_norm": 0.3474997200766379, - "learning_rate": 1.624424661237827e-05, - "loss": 0.2082, + "epoch": 0.25, + "grad_norm": 0.6169401152155215, + "learning_rate": 1.7666179873976076e-05, + "loss": 0.4299, "step": 5340 }, { - "epoch": 0.31, - "grad_norm": 0.3418873789831343, - "learning_rate": 1.6242792973270758e-05, - "loss": 0.2739, + "epoch": 0.25, + "grad_norm": 0.35763136082698405, + "learning_rate": 1.7665224391026194e-05, + "loss": 0.2768, "step": 5341 }, { - "epoch": 0.31, - "grad_norm": 0.4231357230178703, - "learning_rate": 1.6241339117975596e-05, - "loss": 0.2998, + "epoch": 0.25, + "grad_norm": 0.40930570460072796, + "learning_rate": 1.766426873837563e-05, + "loss": 0.2608, "step": 5342 }, { - "epoch": 0.31, - "grad_norm": 0.4355566229176757, - "learning_rate": 1.6239885046543125e-05, - "loss": 0.2823, + "epoch": 0.25, + "grad_norm": 0.732089951340725, + "learning_rate": 1.7663312916045546e-05, + "loss": 0.5222, "step": 5343 }, { - "epoch": 0.31, - "grad_norm": 0.35899156253158837, - "learning_rate": 1.6238430759023706e-05, - "loss": 0.27, + "epoch": 0.25, + "grad_norm": 0.3911261660624748, + "learning_rate": 1.7662356924057097e-05, + "loss": 0.2784, "step": 5344 }, { - "epoch": 0.31, - "grad_norm": 0.8135718113775179, - "learning_rate": 1.6236976255467697e-05, - "loss": 0.4483, + "epoch": 0.25, + "grad_norm": 0.3097672211102889, + "learning_rate": 1.7661400762431452e-05, + "loss": 0.1856, "step": 5345 }, { - "epoch": 0.31, - "grad_norm": 0.4186232071036985, - "learning_rate": 1.623552153592547e-05, - "loss": 0.3089, + "epoch": 0.25, + "grad_norm": 0.4216428171639803, + "learning_rate": 1.766044443118978e-05, + "loss": 0.271, "step": 5346 }, { - "epoch": 0.31, - "grad_norm": 0.26769754516544547, - "learning_rate": 1.6234066600447397e-05, - "loss": 0.1907, + "epoch": 0.25, + "grad_norm": 0.7318256855388012, + "learning_rate": 1.7659487930353254e-05, + "loss": 0.4832, "step": 5347 }, { - "epoch": 0.31, - "grad_norm": 0.37312629158031374, - "learning_rate": 1.6232611449083866e-05, - "loss": 0.3283, + "epoch": 0.25, + "grad_norm": 0.32713674854107844, + "learning_rate": 1.7658531259943043e-05, + "loss": 0.2473, "step": 5348 }, { - "epoch": 0.31, - "grad_norm": 0.3136542119516781, - "learning_rate": 1.623115608188527e-05, - "loss": 0.2588, + "epoch": 0.25, + "grad_norm": 0.5035967159093254, + "learning_rate": 1.7657574419980332e-05, + "loss": 0.352, "step": 5349 }, { - "epoch": 0.31, - "grad_norm": 1.0894221985880963, - "learning_rate": 1.6229700498902008e-05, - "loss": 0.3676, + "epoch": 0.25, + "grad_norm": 1.2419471247231, + "learning_rate": 1.7656617410486304e-05, + "loss": 0.6467, "step": 5350 }, { - "epoch": 0.31, - "grad_norm": 0.7124718935045236, - "learning_rate": 1.6228244700184484e-05, - "loss": 0.5309, + "epoch": 0.25, + "grad_norm": 0.3762643443660175, + "learning_rate": 1.7655660231482146e-05, + "loss": 0.2314, "step": 5351 }, { - "epoch": 0.31, - "grad_norm": 0.3407741653174107, - "learning_rate": 1.622678868578311e-05, - "loss": 0.2742, + "epoch": 0.25, + "grad_norm": 0.6191544290234654, + "learning_rate": 1.765470288298905e-05, + "loss": 0.4019, "step": 5352 }, { - "epoch": 0.31, - "grad_norm": 0.47774823506024416, - "learning_rate": 1.622533245574832e-05, - "loss": 0.2726, + "epoch": 0.25, + "grad_norm": 0.4127265787398909, + "learning_rate": 1.765374536502821e-05, + "loss": 0.3071, "step": 5353 }, { - "epoch": 0.31, - "grad_norm": 0.33453155351886066, - "learning_rate": 1.622387601013053e-05, - "loss": 0.2116, + "epoch": 0.25, + "grad_norm": 0.362009018621164, + "learning_rate": 1.765278767762082e-05, + "loss": 0.2875, "step": 5354 }, { - "epoch": 0.31, - "grad_norm": 0.3884129554118678, - "learning_rate": 1.622241934898018e-05, - "loss": 0.2891, + "epoch": 0.25, + "grad_norm": 0.3155714146758715, + "learning_rate": 1.765182982078809e-05, + "loss": 0.1206, "step": 5355 }, { - "epoch": 0.31, - "grad_norm": 0.5698439071171101, - "learning_rate": 1.622096247234772e-05, - "loss": 0.3661, + "epoch": 0.25, + "grad_norm": 0.4476672216955325, + "learning_rate": 1.7650871794551224e-05, + "loss": 0.3252, "step": 5356 }, { - "epoch": 0.31, - "grad_norm": 1.6227867700244734, - "learning_rate": 1.6219505380283593e-05, - "loss": 0.4115, + "epoch": 0.25, + "grad_norm": 0.4287837833380792, + "learning_rate": 1.764991359893143e-05, + "loss": 0.289, "step": 5357 }, { - "epoch": 0.31, - "grad_norm": 0.35548790471839764, - "learning_rate": 1.6218048072838265e-05, - "loss": 0.2962, + "epoch": 0.25, + "grad_norm": 0.8789563948301996, + "learning_rate": 1.764895523394992e-05, + "loss": 0.4322, "step": 5358 }, { - "epoch": 0.31, - "grad_norm": 1.3251708651923235, - "learning_rate": 1.62165905500622e-05, - "loss": 0.7052, + "epoch": 0.25, + "grad_norm": 0.44726550857237085, + "learning_rate": 1.7647996699627917e-05, + "loss": 0.3142, "step": 5359 }, { - "epoch": 0.31, - "grad_norm": 0.29768315161623826, - "learning_rate": 1.621513281200587e-05, - "loss": 0.2041, + "epoch": 0.25, + "grad_norm": 0.40362695607033316, + "learning_rate": 1.7647037995986632e-05, + "loss": 0.2936, "step": 5360 }, { - "epoch": 0.31, - "grad_norm": 0.432601921603546, - "learning_rate": 1.6213674858719758e-05, - "loss": 0.3035, + "epoch": 0.25, + "grad_norm": 0.3299130694258084, + "learning_rate": 1.7646079123047304e-05, + "loss": 0.241, "step": 5361 }, { - "epoch": 0.31, - "grad_norm": 1.1288404236236713, - "learning_rate": 1.6212216690254353e-05, - "loss": 0.4222, + "epoch": 0.25, + "grad_norm": 0.8166023977599337, + "learning_rate": 1.7645120080831148e-05, + "loss": 0.4047, "step": 5362 }, { - "epoch": 0.31, - "grad_norm": 0.8101163644596073, - "learning_rate": 1.621075830666015e-05, - "loss": 0.3231, + "epoch": 0.25, + "grad_norm": 0.42804685778601004, + "learning_rate": 1.7644160869359404e-05, + "loss": 0.3248, "step": 5363 }, { - "epoch": 0.31, - "grad_norm": 0.3462029633898307, - "learning_rate": 1.6209299707987656e-05, - "loss": 0.2848, + "epoch": 0.25, + "grad_norm": 0.49572994572245704, + "learning_rate": 1.7643201488653304e-05, + "loss": 0.3077, "step": 5364 }, { - "epoch": 0.31, - "grad_norm": 0.5494931342333284, - "learning_rate": 1.6207840894287377e-05, - "loss": 0.42, + "epoch": 0.25, + "grad_norm": 1.0406711423087707, + "learning_rate": 1.7642241938734094e-05, + "loss": 0.4977, "step": 5365 }, { - "epoch": 0.31, - "grad_norm": 0.31147969450902874, - "learning_rate": 1.6206381865609836e-05, - "loss": 0.1297, + "epoch": 0.25, + "grad_norm": 0.3972197347625694, + "learning_rate": 1.764128221962301e-05, + "loss": 0.2901, "step": 5366 }, { - "epoch": 0.31, - "grad_norm": 0.42917601362153945, - "learning_rate": 1.620492262200556e-05, - "loss": 0.3263, + "epoch": 0.25, + "grad_norm": 0.32096132728343624, + "learning_rate": 1.7640322331341303e-05, + "loss": 0.1718, "step": 5367 }, { - "epoch": 0.31, - "grad_norm": 0.43424402968714376, - "learning_rate": 1.620346316352508e-05, - "loss": 0.3371, + "epoch": 0.25, + "grad_norm": 0.4084009645713907, + "learning_rate": 1.763936227391022e-05, + "loss": 0.2684, "step": 5368 }, { - "epoch": 0.31, - "grad_norm": 0.43098949036670525, - "learning_rate": 1.6202003490218932e-05, - "loss": 0.2333, + "epoch": 0.25, + "grad_norm": 0.4286986063950546, + "learning_rate": 1.7638402047351025e-05, + "loss": 0.2882, "step": 5369 }, { - "epoch": 0.31, - "grad_norm": 0.4145635482591804, - "learning_rate": 1.6200543602137676e-05, - "loss": 0.2221, + "epoch": 0.25, + "grad_norm": 0.7587476274767847, + "learning_rate": 1.7637441651684965e-05, + "loss": 0.4841, "step": 5370 }, { - "epoch": 0.31, - "grad_norm": 1.537768224298209, - "learning_rate": 1.619908349933186e-05, - "loss": 0.851, + "epoch": 0.25, + "grad_norm": 0.675007792203134, + "learning_rate": 1.7636481086933313e-05, + "loss": 0.3884, "step": 5371 }, { - "epoch": 0.31, - "grad_norm": 0.513208048312179, - "learning_rate": 1.619762318185205e-05, - "loss": 0.3555, + "epoch": 0.25, + "grad_norm": 0.35202158900089725, + "learning_rate": 1.7635520353117325e-05, + "loss": 0.2711, "step": 5372 }, { - "epoch": 0.31, - "grad_norm": 0.32926945386236683, - "learning_rate": 1.6196162649748815e-05, - "loss": 0.255, + "epoch": 0.25, + "grad_norm": 0.2668374327689299, + "learning_rate": 1.763455945025828e-05, + "loss": 0.1899, "step": 5373 }, { - "epoch": 0.31, - "grad_norm": 0.6119467962093083, - "learning_rate": 1.6194701903072734e-05, - "loss": 0.4787, + "epoch": 0.25, + "grad_norm": 0.750337832601005, + "learning_rate": 1.7633598378377445e-05, + "loss": 0.4295, "step": 5374 }, { - "epoch": 0.31, - "grad_norm": 0.2881545608075555, - "learning_rate": 1.619324094187439e-05, - "loss": 0.2352, + "epoch": 0.25, + "grad_norm": 0.3860518586114576, + "learning_rate": 1.76326371374961e-05, + "loss": 0.2965, "step": 5375 }, { - "epoch": 0.31, - "grad_norm": 0.3695005519980028, - "learning_rate": 1.619177976620438e-05, - "loss": 0.2304, + "epoch": 0.25, + "grad_norm": 0.5922896011481424, + "learning_rate": 1.7631675727635523e-05, + "loss": 0.3579, "step": 5376 }, { - "epoch": 0.31, - "grad_norm": 0.6251592955848971, - "learning_rate": 1.6190318376113307e-05, - "loss": 0.4482, + "epoch": 0.25, + "grad_norm": 1.2020795345724793, + "learning_rate": 1.7630714148817003e-05, + "loss": 0.4417, "step": 5377 }, { - "epoch": 0.31, - "grad_norm": 0.7395901583447987, - "learning_rate": 1.618885677165177e-05, - "loss": 0.4733, + "epoch": 0.25, + "grad_norm": 0.4298680205314211, + "learning_rate": 1.7629752401061827e-05, + "loss": 0.258, "step": 5378 }, { - "epoch": 0.31, - "grad_norm": 0.39642035979972523, - "learning_rate": 1.6187394952870392e-05, - "loss": 0.2308, + "epoch": 0.25, + "grad_norm": 0.2817596798236294, + "learning_rate": 1.7628790484391284e-05, + "loss": 0.1942, "step": 5379 }, { - "epoch": 0.31, - "grad_norm": 0.3336838668804227, - "learning_rate": 1.618593291981979e-05, - "loss": 0.305, + "epoch": 0.25, + "grad_norm": 0.4280741300576875, + "learning_rate": 1.7627828398826677e-05, + "loss": 0.3272, "step": 5380 }, { - "epoch": 0.31, - "grad_norm": 0.28950738150393335, - "learning_rate": 1.61844706725506e-05, - "loss": 0.1669, + "epoch": 0.25, + "grad_norm": 0.3555957654039311, + "learning_rate": 1.7626866144389298e-05, + "loss": 0.2068, "step": 5381 }, { - "epoch": 0.31, - "grad_norm": 0.5750376618606018, - "learning_rate": 1.6183008211113454e-05, - "loss": 0.3297, + "epoch": 0.25, + "grad_norm": 0.7844070263543746, + "learning_rate": 1.762590372110045e-05, + "loss": 0.4738, "step": 5382 }, { - "epoch": 0.31, - "grad_norm": 0.3405128455992142, - "learning_rate": 1.6181545535559e-05, - "loss": 0.277, + "epoch": 0.25, + "grad_norm": 1.0673990680732155, + "learning_rate": 1.7624941128981447e-05, + "loss": 0.6371, "step": 5383 }, { - "epoch": 0.31, - "grad_norm": 0.5570942141284251, - "learning_rate": 1.6180082645937888e-05, - "loss": 0.3617, + "epoch": 0.25, + "grad_norm": 0.3092697260348129, + "learning_rate": 1.76239783680536e-05, + "loss": 0.2296, "step": 5384 }, { - "epoch": 0.31, - "grad_norm": 0.6443057721616354, - "learning_rate": 1.6178619542300783e-05, - "loss": 0.3859, + "epoch": 0.25, + "grad_norm": 0.27674502266295214, + "learning_rate": 1.7623015438338213e-05, + "loss": 0.2336, "step": 5385 }, { - "epoch": 0.31, - "grad_norm": 0.3067421355198817, - "learning_rate": 1.617715622469835e-05, - "loss": 0.2076, + "epoch": 0.25, + "grad_norm": 1.555437244312456, + "learning_rate": 1.7622052339856616e-05, + "loss": 0.827, "step": 5386 }, { - "epoch": 0.31, - "grad_norm": 0.341538422997227, - "learning_rate": 1.617569269318126e-05, - "loss": 0.2534, + "epoch": 0.25, + "grad_norm": 0.37882981261221843, + "learning_rate": 1.7621089072630124e-05, + "loss": 0.2686, "step": 5387 }, { - "epoch": 0.31, - "grad_norm": 0.3996699166427557, - "learning_rate": 1.61742289478002e-05, - "loss": 0.2901, + "epoch": 0.25, + "grad_norm": 0.45064335583607196, + "learning_rate": 1.7620125636680066e-05, + "loss": 0.3539, "step": 5388 }, { - "epoch": 0.31, - "grad_norm": 0.35294345317967296, - "learning_rate": 1.6172764988605855e-05, - "loss": 0.2503, + "epoch": 0.25, + "grad_norm": 1.3016383335616688, + "learning_rate": 1.761916203202777e-05, + "loss": 0.7861, "step": 5389 }, { - "epoch": 0.31, - "grad_norm": 0.679404700705096, - "learning_rate": 1.6171300815648922e-05, - "loss": 0.4566, + "epoch": 0.25, + "grad_norm": 0.3699306466568561, + "learning_rate": 1.7618198258694573e-05, + "loss": 0.2104, "step": 5390 }, { - "epoch": 0.31, - "grad_norm": 0.4050109468817518, - "learning_rate": 1.6169836428980108e-05, - "loss": 0.3135, + "epoch": 0.25, + "grad_norm": 0.3166011455458693, + "learning_rate": 1.7617234316701805e-05, + "loss": 0.1793, "step": 5391 }, { - "epoch": 0.31, - "grad_norm": 0.33445459512149867, - "learning_rate": 1.6168371828650123e-05, - "loss": 0.2532, + "epoch": 0.25, + "grad_norm": 0.40494307795963713, + "learning_rate": 1.7616270206070814e-05, + "loss": 0.3503, "step": 5392 }, { - "epoch": 0.31, - "grad_norm": 0.29912038151863696, - "learning_rate": 1.616690701470969e-05, - "loss": 0.2024, + "epoch": 0.25, + "grad_norm": 0.3613131161632067, + "learning_rate": 1.761530592682294e-05, + "loss": 0.2929, "step": 5393 }, { - "epoch": 0.31, - "grad_norm": 0.4029994690890542, - "learning_rate": 1.6165441987209532e-05, - "loss": 0.2963, + "epoch": 0.25, + "grad_norm": 0.6878601471951274, + "learning_rate": 1.7614341478979534e-05, + "loss": 0.4139, "step": 5394 }, { - "epoch": 0.31, - "grad_norm": 0.5210104635097429, - "learning_rate": 1.6163976746200384e-05, - "loss": 0.3754, + "epoch": 0.25, + "grad_norm": 0.5634946063770254, + "learning_rate": 1.7613376862561945e-05, + "loss": 0.3851, "step": 5395 }, { - "epoch": 0.31, - "grad_norm": 0.41946498662119747, - "learning_rate": 1.6162511291732984e-05, - "loss": 0.3246, + "epoch": 0.25, + "grad_norm": 0.4720377988624362, + "learning_rate": 1.761241207759153e-05, + "loss": 0.3021, "step": 5396 }, { - "epoch": 0.31, - "grad_norm": 0.8088520261509311, - "learning_rate": 1.616104562385808e-05, - "loss": 0.4104, + "epoch": 0.25, + "grad_norm": 0.28455155420756945, + "learning_rate": 1.761144712408965e-05, + "loss": 0.1755, "step": 5397 }, { - "epoch": 0.31, - "grad_norm": 0.41364569032392223, - "learning_rate": 1.615957974262644e-05, - "loss": 0.3169, + "epoch": 0.25, + "grad_norm": 0.7821847278938959, + "learning_rate": 1.7610482002077664e-05, + "loss": 0.4859, "step": 5398 }, { - "epoch": 0.31, - "grad_norm": 0.25882567121199573, - "learning_rate": 1.615811364808881e-05, - "loss": 0.1889, + "epoch": 0.25, + "grad_norm": 0.6151619713153066, + "learning_rate": 1.7609516711576945e-05, + "loss": 0.4159, "step": 5399 }, { - "epoch": 0.31, - "grad_norm": 0.5591145105552626, - "learning_rate": 1.6156647340295973e-05, - "loss": 0.3205, + "epoch": 0.25, + "grad_norm": 0.342258907693212, + "learning_rate": 1.7608551252608856e-05, + "loss": 0.2748, "step": 5400 }, { - "epoch": 0.31, - "grad_norm": 0.39999487861467214, - "learning_rate": 1.6155180819298703e-05, - "loss": 0.3648, + "epoch": 0.25, + "grad_norm": 1.3379169610531831, + "learning_rate": 1.7607585625194777e-05, + "loss": 0.717, "step": 5401 }, { - "epoch": 0.31, - "grad_norm": 0.669508277334371, - "learning_rate": 1.6153714085147783e-05, - "loss": 0.3816, + "epoch": 0.25, + "grad_norm": 0.4483810569423919, + "learning_rate": 1.7606619829356085e-05, + "loss": 0.2805, "step": 5402 }, { - "epoch": 0.31, - "grad_norm": 0.397360661949208, - "learning_rate": 1.6152247137894012e-05, - "loss": 0.2861, + "epoch": 0.25, + "grad_norm": 0.2538079070203393, + "learning_rate": 1.7605653865114164e-05, + "loss": 0.1792, "step": 5403 }, { - "epoch": 0.31, - "grad_norm": 0.3426434244744426, - "learning_rate": 1.6150779977588186e-05, - "loss": 0.2873, + "epoch": 0.25, + "grad_norm": 0.5764539780612977, + "learning_rate": 1.7604687732490395e-05, + "loss": 0.3614, "step": 5404 }, { - "epoch": 0.31, - "grad_norm": 0.40844847649718674, - "learning_rate": 1.6149312604281115e-05, - "loss": 0.2428, + "epoch": 0.25, + "grad_norm": 0.4828898287129161, + "learning_rate": 1.7603721431506166e-05, + "loss": 0.3602, "step": 5405 }, { - "epoch": 0.31, - "grad_norm": 0.332255987493901, - "learning_rate": 1.6147845018023612e-05, - "loss": 0.1821, + "epoch": 0.25, + "grad_norm": 0.551999541937087, + "learning_rate": 1.760275496218288e-05, + "loss": 0.4022, "step": 5406 }, { - "epoch": 0.31, - "grad_norm": 0.35299100766743446, - "learning_rate": 1.61463772188665e-05, - "loss": 0.3115, + "epoch": 0.25, + "grad_norm": 0.49868403841116477, + "learning_rate": 1.7601788324541923e-05, + "loss": 0.3023, "step": 5407 }, { - "epoch": 0.31, - "grad_norm": 0.46449691352497247, - "learning_rate": 1.6144909206860607e-05, - "loss": 0.3955, + "epoch": 0.25, + "grad_norm": 0.3641138768088273, + "learning_rate": 1.7600821518604697e-05, + "loss": 0.2889, "step": 5408 }, { - "epoch": 0.31, - "grad_norm": 0.3484300824139732, - "learning_rate": 1.6143440982056777e-05, - "loss": 0.2251, + "epoch": 0.25, + "grad_norm": 0.6178808255566147, + "learning_rate": 1.759985454439261e-05, + "loss": 0.3469, "step": 5409 }, { - "epoch": 0.31, - "grad_norm": 0.6109240447729549, - "learning_rate": 1.614197254450585e-05, - "loss": 0.378, + "epoch": 0.25, + "grad_norm": 0.3737169623201628, + "learning_rate": 1.7598887401927067e-05, + "loss": 0.2347, "step": 5410 }, { - "epoch": 0.31, - "grad_norm": 0.29480715347424497, - "learning_rate": 1.6140503894258674e-05, - "loss": 0.2724, + "epoch": 0.25, + "grad_norm": 0.46720364496599337, + "learning_rate": 1.7597920091229485e-05, + "loss": 0.3177, "step": 5411 }, { - "epoch": 0.31, - "grad_norm": 0.3103918092644329, - "learning_rate": 1.6139035031366116e-05, - "loss": 0.169, + "epoch": 0.25, + "grad_norm": 0.39455697327976896, + "learning_rate": 1.759695261232127e-05, + "loss": 0.2881, "step": 5412 }, { - "epoch": 0.31, - "grad_norm": 0.3729779854233855, - "learning_rate": 1.6137565955879036e-05, - "loss": 0.3555, + "epoch": 0.25, + "grad_norm": 1.0322737334721312, + "learning_rate": 1.7595984965223847e-05, + "loss": 0.4857, "step": 5413 }, { - "epoch": 0.31, - "grad_norm": 0.7389313540620621, - "learning_rate": 1.6136096667848313e-05, - "loss": 0.5823, + "epoch": 0.25, + "grad_norm": 0.7169512191447588, + "learning_rate": 1.759501714995864e-05, + "loss": 0.3822, "step": 5414 }, { - "epoch": 0.31, - "grad_norm": 0.3544691682936791, - "learning_rate": 1.6134627167324827e-05, - "loss": 0.2543, + "epoch": 0.25, + "grad_norm": 0.4219749073623418, + "learning_rate": 1.7594049166547073e-05, + "loss": 0.3461, "step": 5415 }, { - "epoch": 0.31, - "grad_norm": 0.3960851188225142, - "learning_rate": 1.613315745435946e-05, - "loss": 0.2833, + "epoch": 0.25, + "grad_norm": 0.32681878773960116, + "learning_rate": 1.7593081015010576e-05, + "loss": 0.2607, "step": 5416 }, { - "epoch": 0.31, - "grad_norm": 0.2765012263636344, - "learning_rate": 1.613168752900312e-05, - "loss": 0.1917, + "epoch": 0.25, + "grad_norm": 0.34474579751439477, + "learning_rate": 1.7592112695370583e-05, + "loss": 0.1723, "step": 5417 }, { - "epoch": 0.31, - "grad_norm": 0.6375209628011956, - "learning_rate": 1.61302173913067e-05, - "loss": 0.4634, + "epoch": 0.25, + "grad_norm": 0.518080056678753, + "learning_rate": 1.7591144207648537e-05, + "loss": 0.3486, "step": 5418 }, { - "epoch": 0.31, - "grad_norm": 0.2715428312700595, - "learning_rate": 1.612874704132112e-05, - "loss": 0.2351, + "epoch": 0.25, + "grad_norm": 0.5348655738030185, + "learning_rate": 1.759017555186587e-05, + "loss": 0.4202, "step": 5419 }, { - "epoch": 0.31, - "grad_norm": 0.7184165179152175, - "learning_rate": 1.612727647909729e-05, - "loss": 0.5226, + "epoch": 0.25, + "grad_norm": 0.39325533270405216, + "learning_rate": 1.7589206728044033e-05, + "loss": 0.2187, "step": 5420 }, { - "epoch": 0.31, - "grad_norm": 0.7367803499422515, - "learning_rate": 1.612580570468614e-05, - "loss": 0.481, + "epoch": 0.25, + "grad_norm": 0.3914136878533845, + "learning_rate": 1.7588237736204473e-05, + "loss": 0.3559, "step": 5421 }, { - "epoch": 0.31, - "grad_norm": 0.40185485568900076, - "learning_rate": 1.6124334718138602e-05, - "loss": 0.2303, + "epoch": 0.25, + "grad_norm": 0.6978563700692048, + "learning_rate": 1.7587268576368644e-05, + "loss": 0.4926, "step": 5422 }, { - "epoch": 0.31, - "grad_norm": 0.4198601147325941, - "learning_rate": 1.6122863519505618e-05, - "loss": 0.3464, + "epoch": 0.25, + "grad_norm": 0.2376020644869763, + "learning_rate": 1.7586299248558002e-05, + "loss": 0.1381, "step": 5423 }, { - "epoch": 0.31, - "grad_norm": 0.4956845797294979, - "learning_rate": 1.6121392108838132e-05, - "loss": 0.3011, + "epoch": 0.25, + "grad_norm": 0.37996293801078646, + "learning_rate": 1.7585329752794005e-05, + "loss": 0.3394, "step": 5424 }, { - "epoch": 0.31, - "grad_norm": 0.26916066161630525, - "learning_rate": 1.6119920486187102e-05, - "loss": 0.2054, + "epoch": 0.25, + "grad_norm": 1.194114609951105, + "learning_rate": 1.758436008909812e-05, + "loss": 0.7244, "step": 5425 }, { - "epoch": 0.31, - "grad_norm": 1.1799061771549846, - "learning_rate": 1.611844865160349e-05, - "loss": 0.9037, + "epoch": 0.25, + "grad_norm": 0.3303461977181814, + "learning_rate": 1.758339025749181e-05, + "loss": 0.2354, "step": 5426 }, { - "epoch": 0.31, - "grad_norm": 0.34547055929760423, - "learning_rate": 1.611697660513826e-05, - "loss": 0.3116, + "epoch": 0.25, + "grad_norm": 0.4878896305020034, + "learning_rate": 1.758242025799655e-05, + "loss": 0.4082, "step": 5427 }, { - "epoch": 0.31, - "grad_norm": 0.3723557442890717, - "learning_rate": 1.6115504346842393e-05, - "loss": 0.215, + "epoch": 0.25, + "grad_norm": 0.3890857832072473, + "learning_rate": 1.758145009063381e-05, + "loss": 0.3401, "step": 5428 }, { - "epoch": 0.31, - "grad_norm": 0.7354679497674211, - "learning_rate": 1.6114031876766877e-05, - "loss": 0.4834, + "epoch": 0.25, + "grad_norm": 0.2596334678605455, + "learning_rate": 1.758047975542508e-05, + "loss": 0.1725, "step": 5429 }, { - "epoch": 0.31, - "grad_norm": 0.5397253363814973, - "learning_rate": 1.61125591949627e-05, - "loss": 0.385, + "epoch": 0.25, + "grad_norm": 0.6231709559718339, + "learning_rate": 1.7579509252391828e-05, + "loss": 0.4401, "step": 5430 }, { - "epoch": 0.31, - "grad_norm": 0.31820906837570323, - "learning_rate": 1.611108630148086e-05, - "loss": 0.2479, + "epoch": 0.25, + "grad_norm": 0.3003304733335632, + "learning_rate": 1.7578538581555547e-05, + "loss": 0.2721, "step": 5431 }, { - "epoch": 0.31, - "grad_norm": 0.43775281349809764, - "learning_rate": 1.610961319637236e-05, - "loss": 0.3013, + "epoch": 0.25, + "grad_norm": 0.7164788872061879, + "learning_rate": 1.757756774293773e-05, + "loss": 0.4277, "step": 5432 }, { - "epoch": 0.31, - "grad_norm": 0.48523621853194515, - "learning_rate": 1.610813987968822e-05, - "loss": 0.3288, + "epoch": 0.25, + "grad_norm": 0.3870120340850268, + "learning_rate": 1.757659673655986e-05, + "loss": 0.2765, "step": 5433 }, { - "epoch": 0.31, - "grad_norm": 0.3286365653300218, - "learning_rate": 1.6106666351479462e-05, - "loss": 0.2691, + "epoch": 0.25, + "grad_norm": 0.8221862114737117, + "learning_rate": 1.7575625562443446e-05, + "loss": 0.5428, "step": 5434 }, { - "epoch": 0.31, - "grad_norm": 0.4073669030943272, - "learning_rate": 1.610519261179711e-05, - "loss": 0.2907, + "epoch": 0.25, + "grad_norm": 0.5011581808411303, + "learning_rate": 1.7574654220609982e-05, + "loss": 0.3634, "step": 5435 }, { - "epoch": 0.31, - "grad_norm": 0.86365452708418, - "learning_rate": 1.61037186606922e-05, - "loss": 0.5174, + "epoch": 0.25, + "grad_norm": 0.3669117426025068, + "learning_rate": 1.7573682711080976e-05, + "loss": 0.2468, "step": 5436 }, { - "epoch": 0.31, - "grad_norm": 0.34139315066637166, - "learning_rate": 1.610224449821577e-05, - "loss": 0.2648, + "epoch": 0.25, + "grad_norm": 0.33691250694715735, + "learning_rate": 1.757271103387793e-05, + "loss": 0.173, "step": 5437 }, { - "epoch": 0.31, - "grad_norm": 0.781900670758003, - "learning_rate": 1.6100770124418882e-05, - "loss": 0.5627, + "epoch": 0.25, + "grad_norm": 0.6379642204530226, + "learning_rate": 1.7571739189022365e-05, + "loss": 0.4672, "step": 5438 }, { - "epoch": 0.31, - "grad_norm": 0.2584851055369354, - "learning_rate": 1.6099295539352583e-05, - "loss": 0.2251, + "epoch": 0.25, + "grad_norm": 0.33080544044469334, + "learning_rate": 1.757076717653579e-05, + "loss": 0.2518, "step": 5439 }, { - "epoch": 0.31, - "grad_norm": 0.32668801819835613, - "learning_rate": 1.6097820743067945e-05, - "loss": 0.2867, + "epoch": 0.25, + "grad_norm": 0.6051379155582781, + "learning_rate": 1.7569794996439723e-05, + "loss": 0.4238, "step": 5440 }, { - "epoch": 0.31, - "grad_norm": 0.8179062477321604, - "learning_rate": 1.6096345735616036e-05, - "loss": 0.3766, + "epoch": 0.25, + "grad_norm": 1.0151758589234574, + "learning_rate": 1.7568822648755698e-05, + "loss": 0.435, "step": 5441 }, { - "epoch": 0.31, - "grad_norm": 0.5923473411548568, - "learning_rate": 1.6094870517047937e-05, - "loss": 0.4198, + "epoch": 0.25, + "grad_norm": 0.2695400837074594, + "learning_rate": 1.7567850133505228e-05, + "loss": 0.199, "step": 5442 }, { - "epoch": 0.31, - "grad_norm": 0.31872599364875503, - "learning_rate": 1.609339508741473e-05, - "loss": 0.2893, + "epoch": 0.25, + "grad_norm": 0.344835665759451, + "learning_rate": 1.7566877450709853e-05, + "loss": 0.2654, "step": 5443 }, { - "epoch": 0.31, - "grad_norm": 1.379638334873006, - "learning_rate": 1.6091919446767517e-05, - "loss": 0.8334, + "epoch": 0.25, + "grad_norm": 0.39409050533222306, + "learning_rate": 1.7565904600391107e-05, + "loss": 0.3045, "step": 5444 }, { - "epoch": 0.31, - "grad_norm": 0.2200588702556372, - "learning_rate": 1.6090443595157396e-05, - "loss": 0.0865, + "epoch": 0.25, + "grad_norm": 0.4677493086821581, + "learning_rate": 1.7564931582570518e-05, + "loss": 0.3158, "step": 5445 }, { - "epoch": 0.31, - "grad_norm": 0.3663693385434264, - "learning_rate": 1.608896753263547e-05, - "loss": 0.3005, + "epoch": 0.25, + "grad_norm": 0.5786072097585501, + "learning_rate": 1.756395839726964e-05, + "loss": 0.381, "step": 5446 }, { - "epoch": 0.31, - "grad_norm": 0.4523250755868021, - "learning_rate": 1.6087491259252865e-05, - "loss": 0.3534, + "epoch": 0.25, + "grad_norm": 0.4359064415087416, + "learning_rate": 1.7562985044510013e-05, + "loss": 0.3011, "step": 5447 }, { - "epoch": 0.31, - "grad_norm": 0.6353127882984126, - "learning_rate": 1.60860147750607e-05, - "loss": 0.316, + "epoch": 0.25, + "grad_norm": 0.4554207174239243, + "learning_rate": 1.7562011524313187e-05, + "loss": 0.3203, "step": 5448 }, { - "epoch": 0.31, - "grad_norm": 0.4157340148684122, - "learning_rate": 1.60845380801101e-05, - "loss": 0.316, + "epoch": 0.25, + "grad_norm": 0.43245895209493135, + "learning_rate": 1.7561037836700712e-05, + "loss": 0.2733, "step": 5449 }, { - "epoch": 0.31, - "grad_norm": 1.2008936520366085, - "learning_rate": 1.6083061174452214e-05, - "loss": 0.8211, + "epoch": 0.25, + "grad_norm": 0.39512325259801495, + "learning_rate": 1.7560063981694147e-05, + "loss": 0.2513, "step": 5450 }, { - "epoch": 0.31, - "grad_norm": 0.22059778641398187, - "learning_rate": 1.6081584058138178e-05, - "loss": 0.1828, + "epoch": 0.25, + "grad_norm": 0.4566824497437569, + "learning_rate": 1.7559089959315055e-05, + "loss": 0.3156, "step": 5451 }, { - "epoch": 0.31, - "grad_norm": 0.38232047263168106, - "learning_rate": 1.6080106731219147e-05, - "loss": 0.2847, + "epoch": 0.25, + "grad_norm": 0.4423352364127872, + "learning_rate": 1.7558115769584993e-05, + "loss": 0.2668, "step": 5452 }, { - "epoch": 0.31, - "grad_norm": 0.7296256845720059, - "learning_rate": 1.6078629193746283e-05, - "loss": 0.4682, + "epoch": 0.25, + "grad_norm": 0.8350792917082918, + "learning_rate": 1.7557141412525537e-05, + "loss": 0.3648, "step": 5453 }, { - "epoch": 0.31, - "grad_norm": 0.49677717222315976, - "learning_rate": 1.607715144577075e-05, - "loss": 0.2671, + "epoch": 0.25, + "grad_norm": 0.4237963037705092, + "learning_rate": 1.7556166888158247e-05, + "loss": 0.3163, "step": 5454 }, { - "epoch": 0.31, - "grad_norm": 0.3413141139616929, - "learning_rate": 1.6075673487343725e-05, - "loss": 0.2721, + "epoch": 0.25, + "grad_norm": 0.3718714647792502, + "learning_rate": 1.755519219650471e-05, + "loss": 0.3253, "step": 5455 }, { - "epoch": 0.31, - "grad_norm": 1.2043097896012789, - "learning_rate": 1.6074195318516385e-05, - "loss": 0.8261, + "epoch": 0.25, + "grad_norm": 0.43311022096021995, + "learning_rate": 1.7554217337586498e-05, + "loss": 0.2025, "step": 5456 }, { - "epoch": 0.31, - "grad_norm": 0.3009263002823461, - "learning_rate": 1.6072716939339924e-05, - "loss": 0.227, + "epoch": 0.25, + "grad_norm": 0.33321326062518913, + "learning_rate": 1.7553242311425193e-05, + "loss": 0.2365, "step": 5457 }, { - "epoch": 0.31, - "grad_norm": 0.40159765325055435, - "learning_rate": 1.607123834986554e-05, - "loss": 0.2515, + "epoch": 0.25, + "grad_norm": 3.2902275822575535, + "learning_rate": 1.7552267118042387e-05, + "loss": 0.8229, "step": 5458 }, { - "epoch": 0.31, - "grad_norm": 0.4338965338706003, - "learning_rate": 1.606975955014443e-05, - "loss": 0.3357, + "epoch": 0.25, + "grad_norm": 0.44687465848182856, + "learning_rate": 1.7551291757459665e-05, + "loss": 0.2649, "step": 5459 }, { - "epoch": 0.31, - "grad_norm": 1.1490956748629666, - "learning_rate": 1.6068280540227807e-05, - "loss": 0.6926, + "epoch": 0.25, + "grad_norm": 0.7245716937805797, + "learning_rate": 1.755031622969862e-05, + "loss": 0.3053, "step": 5460 }, { - "epoch": 0.31, - "grad_norm": 0.36110726958810213, - "learning_rate": 1.6066801320166897e-05, - "loss": 0.2308, + "epoch": 0.25, + "grad_norm": 0.7769051688167621, + "learning_rate": 1.7549340534780852e-05, + "loss": 0.4785, "step": 5461 }, { - "epoch": 0.31, - "grad_norm": 0.998317680190995, - "learning_rate": 1.606532189001291e-05, - "loss": 0.6979, + "epoch": 0.25, + "grad_norm": 1.4746107021395496, + "learning_rate": 1.754836467272796e-05, + "loss": 0.2147, "step": 5462 }, { - "epoch": 0.31, - "grad_norm": 0.32793332659947005, - "learning_rate": 1.606384224981709e-05, - "loss": 0.3374, + "epoch": 0.25, + "grad_norm": 0.47671657402352136, + "learning_rate": 1.754738864356155e-05, + "loss": 0.3301, "step": 5463 }, { - "epoch": 0.31, - "grad_norm": 0.3159672370415879, - "learning_rate": 1.6062362399630673e-05, - "loss": 0.2052, + "epoch": 0.25, + "grad_norm": 0.43391529410615104, + "learning_rate": 1.754641244730323e-05, + "loss": 0.3225, "step": 5464 }, { - "epoch": 0.31, - "grad_norm": 0.28198983815444506, - "learning_rate": 1.6060882339504905e-05, - "loss": 0.2491, + "epoch": 0.25, + "grad_norm": 0.5811773940306859, + "learning_rate": 1.754543608397461e-05, + "loss": 0.2113, "step": 5465 }, { - "epoch": 0.31, - "grad_norm": 0.3914189720919406, - "learning_rate": 1.6059402069491047e-05, - "loss": 0.3358, + "epoch": 0.25, + "grad_norm": 0.4504944939740752, + "learning_rate": 1.754445955359731e-05, + "loss": 0.3213, "step": 5466 }, { - "epoch": 0.31, - "grad_norm": 0.31173129053212056, - "learning_rate": 1.605792158964035e-05, - "loss": 0.2001, + "epoch": 0.25, + "grad_norm": 0.6286840304556816, + "learning_rate": 1.7543482856192944e-05, + "loss": 0.3719, "step": 5467 }, { - "epoch": 0.31, - "grad_norm": 1.0947564593865875, - "learning_rate": 1.6056440900004094e-05, - "loss": 0.7421, + "epoch": 0.25, + "grad_norm": 0.9886658602524326, + "learning_rate": 1.754250599178314e-05, + "loss": 0.5822, "step": 5468 }, { - "epoch": 0.31, - "grad_norm": 0.5952027400139973, - "learning_rate": 1.6054960000633545e-05, - "loss": 0.4776, + "epoch": 0.25, + "grad_norm": 0.31863041089624744, + "learning_rate": 1.7541528960389525e-05, + "loss": 0.1721, "step": 5469 }, { - "epoch": 0.31, - "grad_norm": 0.420934632151069, - "learning_rate": 1.6053478891579993e-05, - "loss": 0.3136, + "epoch": 0.25, + "grad_norm": 0.5209085253144841, + "learning_rate": 1.7540551762033725e-05, + "loss": 0.319, "step": 5470 }, { - "epoch": 0.31, - "grad_norm": 0.24668825200323996, - "learning_rate": 1.605199757289473e-05, - "loss": 0.1935, + "epoch": 0.25, + "grad_norm": 0.4863838564157181, + "learning_rate": 1.7539574396737374e-05, + "loss": 0.3466, "step": 5471 }, { - "epoch": 0.31, - "grad_norm": 1.1178765441595595, - "learning_rate": 1.605051604462905e-05, - "loss": 0.646, + "epoch": 0.25, + "grad_norm": 0.44091713085452766, + "learning_rate": 1.7538596864522115e-05, + "loss": 0.2453, "step": 5472 }, { - "epoch": 0.31, - "grad_norm": 0.38129148872885377, - "learning_rate": 1.6049034306834258e-05, - "loss": 0.275, + "epoch": 0.25, + "grad_norm": 1.0637869008613243, + "learning_rate": 1.7537619165409588e-05, + "loss": 0.5225, "step": 5473 }, { - "epoch": 0.31, - "grad_norm": 0.4828124601149025, - "learning_rate": 1.6047552359561672e-05, - "loss": 0.3161, + "epoch": 0.25, + "grad_norm": 0.6071861090362503, + "learning_rate": 1.7536641299421437e-05, + "loss": 0.3065, "step": 5474 }, { - "epoch": 0.31, - "grad_norm": 0.6728617355740534, - "learning_rate": 1.6046070202862605e-05, - "loss": 0.378, + "epoch": 0.25, + "grad_norm": 0.3417735730793651, + "learning_rate": 1.753566326657931e-05, + "loss": 0.2379, "step": 5475 }, { - "epoch": 0.31, - "grad_norm": 0.37208712058090737, - "learning_rate": 1.6044587836788385e-05, - "loss": 0.2971, + "epoch": 0.25, + "grad_norm": 0.5201840419593602, + "learning_rate": 1.7534685066904865e-05, + "loss": 0.2679, "step": 5476 }, { - "epoch": 0.31, - "grad_norm": 0.2293316968318131, - "learning_rate": 1.6043105261390352e-05, - "loss": 0.0965, + "epoch": 0.25, + "grad_norm": 1.1233950613986918, + "learning_rate": 1.753370670041975e-05, + "loss": 0.5915, "step": 5477 }, { - "epoch": 0.31, - "grad_norm": 0.4546947048442612, - "learning_rate": 1.6041622476719842e-05, - "loss": 0.3595, + "epoch": 0.25, + "grad_norm": 0.4922709057585047, + "learning_rate": 1.7532728167145634e-05, + "loss": 0.2654, "step": 5478 }, { - "epoch": 0.31, - "grad_norm": 0.3668420835351272, - "learning_rate": 1.6040139482828207e-05, - "loss": 0.2734, + "epoch": 0.25, + "grad_norm": 0.41368725471825823, + "learning_rate": 1.7531749467104173e-05, + "loss": 0.3386, "step": 5479 }, { - "epoch": 0.31, - "grad_norm": 1.312312858344449, - "learning_rate": 1.60386562797668e-05, - "loss": 0.7963, + "epoch": 0.25, + "grad_norm": 0.9412234697337155, + "learning_rate": 1.753077060031704e-05, + "loss": 0.6559, "step": 5480 }, { - "epoch": 0.31, - "grad_norm": 0.6925249119934285, - "learning_rate": 1.6037172867586984e-05, - "loss": 0.3647, + "epoch": 0.25, + "grad_norm": 0.36733941121537433, + "learning_rate": 1.7529791566805905e-05, + "loss": 0.2679, "step": 5481 }, { - "epoch": 0.31, - "grad_norm": 0.4077235005317406, - "learning_rate": 1.6035689246340132e-05, - "loss": 0.2688, + "epoch": 0.25, + "grad_norm": 0.24829608526489824, + "learning_rate": 1.752881236659244e-05, + "loss": 0.0957, "step": 5482 }, { - "epoch": 0.32, - "grad_norm": 0.34113054517286356, - "learning_rate": 1.603420541607762e-05, - "loss": 0.291, + "epoch": 0.25, + "grad_norm": 0.5833504436778258, + "learning_rate": 1.7527832999698326e-05, + "loss": 0.3579, "step": 5483 }, { - "epoch": 0.32, - "grad_norm": 0.23609225173467213, - "learning_rate": 1.6032721376850837e-05, - "loss": 0.1449, + "epoch": 0.25, + "grad_norm": 0.5183251670338388, + "learning_rate": 1.7526853466145248e-05, + "loss": 0.2879, "step": 5484 }, { - "epoch": 0.32, - "grad_norm": 0.4239941906638344, - "learning_rate": 1.603123712871117e-05, - "loss": 0.2859, + "epoch": 0.25, + "grad_norm": 0.8100983767337303, + "learning_rate": 1.7525873765954883e-05, + "loss": 0.3634, "step": 5485 }, { - "epoch": 0.32, - "grad_norm": 0.42335183714015223, - "learning_rate": 1.602975267171002e-05, - "loss": 0.3501, + "epoch": 0.25, + "grad_norm": 0.5898599995075775, + "learning_rate": 1.7524893899148933e-05, + "loss": 0.3806, "step": 5486 }, { - "epoch": 0.32, - "grad_norm": 0.5602162541545916, - "learning_rate": 1.6028268005898798e-05, - "loss": 0.3304, + "epoch": 0.25, + "grad_norm": 0.3679107749423647, + "learning_rate": 1.7523913865749078e-05, + "loss": 0.2789, "step": 5487 }, { - "epoch": 0.32, - "grad_norm": 0.4283245085092912, - "learning_rate": 1.6026783131328915e-05, - "loss": 0.292, + "epoch": 0.25, + "grad_norm": 0.2682326309454914, + "learning_rate": 1.7522933665777026e-05, + "loss": 0.1769, "step": 5488 }, { - "epoch": 0.32, - "grad_norm": 0.27342802197048033, - "learning_rate": 1.6025298048051784e-05, - "loss": 0.229, + "epoch": 0.25, + "grad_norm": 0.7039475583184233, + "learning_rate": 1.752195329925447e-05, + "loss": 0.4787, "step": 5489 }, { - "epoch": 0.32, - "grad_norm": 0.34821181236786064, - "learning_rate": 1.6023812756118845e-05, - "loss": 0.2565, + "epoch": 0.25, + "grad_norm": 0.4600975327503642, + "learning_rate": 1.752097276620312e-05, + "loss": 0.3064, "step": 5490 }, { - "epoch": 0.32, - "grad_norm": 0.4254165247173863, - "learning_rate": 1.602232725558153e-05, - "loss": 0.3036, + "epoch": 0.25, + "grad_norm": 0.5610867409219054, + "learning_rate": 1.751999206664468e-05, + "loss": 0.294, "step": 5491 }, { - "epoch": 0.32, - "grad_norm": 0.4819874115221115, - "learning_rate": 1.6020841546491278e-05, - "loss": 0.3834, + "epoch": 0.25, + "grad_norm": 1.8584668492079515, + "learning_rate": 1.7519011200600868e-05, + "loss": 0.7175, "step": 5492 }, { - "epoch": 0.32, - "grad_norm": 0.8487213879795744, - "learning_rate": 1.6019355628899545e-05, - "loss": 0.5686, + "epoch": 0.25, + "grad_norm": 0.3878836382538289, + "learning_rate": 1.751803016809339e-05, + "loss": 0.3, "step": 5493 }, { - "epoch": 0.32, - "grad_norm": 0.33225687121251357, - "learning_rate": 1.6017869502857785e-05, - "loss": 0.2232, + "epoch": 0.25, + "grad_norm": 0.3295192742854476, + "learning_rate": 1.751704896914397e-05, + "loss": 0.2256, "step": 5494 }, { - "epoch": 0.32, - "grad_norm": 0.3973590452255858, - "learning_rate": 1.6016383168417463e-05, - "loss": 0.3365, + "epoch": 0.25, + "grad_norm": 0.351966662334592, + "learning_rate": 1.7516067603774334e-05, + "loss": 0.3035, "step": 5495 }, { - "epoch": 0.32, - "grad_norm": 0.29339420160733515, - "learning_rate": 1.601489662563005e-05, - "loss": 0.1986, + "epoch": 0.25, + "grad_norm": 0.628088490271632, + "learning_rate": 1.7515086072006206e-05, + "loss": 0.3153, "step": 5496 }, { - "epoch": 0.32, - "grad_norm": 0.3666754590084301, - "learning_rate": 1.6013409874547026e-05, - "loss": 0.2422, + "epoch": 0.25, + "grad_norm": 1.4191136572834981, + "learning_rate": 1.7514104373861315e-05, + "loss": 0.7893, "step": 5497 }, { - "epoch": 0.32, - "grad_norm": 0.8964912741710412, - "learning_rate": 1.6011922915219877e-05, - "loss": 0.4598, + "epoch": 0.25, + "grad_norm": 0.4116527780239412, + "learning_rate": 1.751312250936139e-05, + "loss": 0.2572, "step": 5498 }, { - "epoch": 0.32, - "grad_norm": 0.38686744503564313, - "learning_rate": 1.6010435747700097e-05, - "loss": 0.3271, + "epoch": 0.25, + "grad_norm": 0.4050692040878053, + "learning_rate": 1.7512140478528178e-05, + "loss": 0.2823, "step": 5499 }, { - "epoch": 0.32, - "grad_norm": 0.36402267171787295, - "learning_rate": 1.600894837203918e-05, - "loss": 0.1334, + "epoch": 0.25, + "grad_norm": 0.32692244189556385, + "learning_rate": 1.7511158281383414e-05, + "loss": 0.199, "step": 5500 }, { - "epoch": 0.32, - "grad_norm": 0.39810639722148555, - "learning_rate": 1.6007460788288644e-05, - "loss": 0.3626, + "epoch": 0.25, + "grad_norm": 0.6127417492125128, + "learning_rate": 1.7510175917948848e-05, + "loss": 0.4033, "step": 5501 }, { - "epoch": 0.32, - "grad_norm": 0.26937219839700005, - "learning_rate": 1.60059729965e-05, - "loss": 0.237, + "epoch": 0.25, + "grad_norm": 0.44488398197754225, + "learning_rate": 1.7509193388246228e-05, + "loss": 0.2975, "step": 5502 }, { - "epoch": 0.32, - "grad_norm": 0.3572405700250666, - "learning_rate": 1.600448499672477e-05, - "loss": 0.2023, + "epoch": 0.25, + "grad_norm": 0.4129553053942979, + "learning_rate": 1.7508210692297297e-05, + "loss": 0.3279, "step": 5503 }, { - "epoch": 0.32, - "grad_norm": 0.7519951161732132, - "learning_rate": 1.600299678901448e-05, - "loss": 0.4123, + "epoch": 0.25, + "grad_norm": 1.0363329813131057, + "learning_rate": 1.7507227830123823e-05, + "loss": 0.3648, "step": 5504 }, { - "epoch": 0.32, - "grad_norm": 0.8481384242186841, - "learning_rate": 1.6001508373420666e-05, - "loss": 0.609, + "epoch": 0.25, + "grad_norm": 0.47848448351362016, + "learning_rate": 1.750624480174756e-05, + "loss": 0.3238, "step": 5505 }, { - "epoch": 0.32, - "grad_norm": 0.38602966781075637, - "learning_rate": 1.6000019749994882e-05, - "loss": 0.3056, + "epoch": 0.25, + "grad_norm": 0.4066876099306231, + "learning_rate": 1.750526160719027e-05, + "loss": 0.327, "step": 5506 }, { - "epoch": 0.32, - "grad_norm": 0.3067806475149186, - "learning_rate": 1.5998530918788667e-05, - "loss": 0.254, + "epoch": 0.25, + "grad_norm": 0.4134135759344062, + "learning_rate": 1.750427824647372e-05, + "loss": 0.3355, "step": 5507 }, { - "epoch": 0.32, - "grad_norm": 0.28563113629324893, - "learning_rate": 1.5997041879853585e-05, - "loss": 0.1853, + "epoch": 0.25, + "grad_norm": 0.2578958417890306, + "learning_rate": 1.7503294719619686e-05, + "loss": 0.1796, "step": 5508 }, { - "epoch": 0.32, - "grad_norm": 0.560627051750689, - "learning_rate": 1.5995552633241206e-05, - "loss": 0.3737, + "epoch": 0.25, + "grad_norm": 1.4175381476440974, + "learning_rate": 1.7502311026649934e-05, + "loss": 0.8524, "step": 5509 }, { - "epoch": 0.32, - "grad_norm": 0.31902956156250195, - "learning_rate": 1.599406317900309e-05, - "loss": 0.2693, + "epoch": 0.25, + "grad_norm": 1.1015000460846274, + "learning_rate": 1.750132716758625e-05, + "loss": 0.6689, "step": 5510 }, { - "epoch": 0.32, - "grad_norm": 1.0324425132045223, - "learning_rate": 1.5992573517190826e-05, - "loss": 0.7321, + "epoch": 0.25, + "grad_norm": 0.31954073202572597, + "learning_rate": 1.750034314245041e-05, + "loss": 0.2532, "step": 5511 }, { - "epoch": 0.32, - "grad_norm": 0.5816127897195071, - "learning_rate": 1.5991083647856003e-05, - "loss": 0.3399, + "epoch": 0.25, + "grad_norm": 0.8446500106591986, + "learning_rate": 1.7499358951264206e-05, + "loss": 0.4939, "step": 5512 }, { - "epoch": 0.32, - "grad_norm": 0.3638464303346846, - "learning_rate": 1.5989593571050207e-05, - "loss": 0.2275, + "epoch": 0.25, + "grad_norm": 0.33155850869884296, + "learning_rate": 1.749837459404942e-05, + "loss": 0.2585, "step": 5513 }, { - "epoch": 0.32, - "grad_norm": 0.2997715451537564, - "learning_rate": 1.5988103286825043e-05, - "loss": 0.2977, + "epoch": 0.25, + "grad_norm": 0.33607717079610905, + "learning_rate": 1.749739007082785e-05, + "loss": 0.2288, "step": 5514 }, { - "epoch": 0.32, - "grad_norm": 0.3882699857377122, - "learning_rate": 1.5986612795232122e-05, - "loss": 0.3314, + "epoch": 0.25, + "grad_norm": 0.4390946480084589, + "learning_rate": 1.7496405381621287e-05, + "loss": 0.3352, "step": 5515 }, { - "epoch": 0.32, - "grad_norm": 0.4607329976592203, - "learning_rate": 1.5985122096323053e-05, - "loss": 0.3253, + "epoch": 0.25, + "grad_norm": 0.940680494017994, + "learning_rate": 1.7495420526451536e-05, + "loss": 0.649, "step": 5516 }, { - "epoch": 0.32, - "grad_norm": 0.42979471017625737, - "learning_rate": 1.5983631190149466e-05, - "loss": 0.3066, + "epoch": 0.25, + "grad_norm": 0.36551814881252376, + "learning_rate": 1.7494435505340397e-05, + "loss": 0.2102, "step": 5517 }, { - "epoch": 0.32, - "grad_norm": 0.3052137957577575, - "learning_rate": 1.5982140076762988e-05, - "loss": 0.2657, + "epoch": 0.25, + "grad_norm": 0.7879957787318361, + "learning_rate": 1.749345031830968e-05, + "loss": 0.4342, "step": 5518 }, { - "epoch": 0.32, - "grad_norm": 0.4134905162494446, - "learning_rate": 1.5980648756215256e-05, - "loss": 0.3369, + "epoch": 0.25, + "grad_norm": 0.3174696259669652, + "learning_rate": 1.7492464965381194e-05, + "loss": 0.2537, "step": 5519 }, { - "epoch": 0.32, - "grad_norm": 0.5637451037693646, - "learning_rate": 1.597915722855792e-05, - "loss": 0.3639, + "epoch": 0.25, + "grad_norm": 0.704205586885652, + "learning_rate": 1.7491479446576755e-05, + "loss": 0.338, "step": 5520 }, { - "epoch": 0.32, - "grad_norm": 0.37421259673932755, - "learning_rate": 1.597766549384262e-05, - "loss": 0.2469, + "epoch": 0.25, + "grad_norm": 0.4024999658306773, + "learning_rate": 1.7490493761918183e-05, + "loss": 0.2876, "step": 5521 }, { - "epoch": 0.32, - "grad_norm": 0.3058915736850958, - "learning_rate": 1.5976173552121023e-05, - "loss": 0.2922, + "epoch": 0.25, + "grad_norm": 0.45776731587904745, + "learning_rate": 1.74895079114273e-05, + "loss": 0.3656, "step": 5522 }, { - "epoch": 0.32, - "grad_norm": 0.2841295684463912, - "learning_rate": 1.597468140344479e-05, - "loss": 0.1977, + "epoch": 0.25, + "grad_norm": 0.5962707177436178, + "learning_rate": 1.7488521895125927e-05, + "loss": 0.3949, "step": 5523 }, { - "epoch": 0.32, - "grad_norm": 0.6162024062628615, - "learning_rate": 1.59731890478656e-05, - "loss": 0.2815, + "epoch": 0.25, + "grad_norm": 0.4076064963823635, + "learning_rate": 1.7487535713035897e-05, + "loss": 0.3203, "step": 5524 }, { - "epoch": 0.32, - "grad_norm": 0.4094893668831604, - "learning_rate": 1.5971696485435128e-05, - "loss": 0.3378, + "epoch": 0.25, + "grad_norm": 0.42992572075214125, + "learning_rate": 1.7486549365179045e-05, + "loss": 0.2369, "step": 5525 }, { - "epoch": 0.32, - "grad_norm": 0.40588421773351036, - "learning_rate": 1.5970203716205066e-05, - "loss": 0.2895, + "epoch": 0.25, + "grad_norm": 0.3954458500919235, + "learning_rate": 1.7485562851577205e-05, + "loss": 0.2901, "step": 5526 }, { - "epoch": 0.32, - "grad_norm": 0.6938594055918771, - "learning_rate": 1.5968710740227106e-05, - "loss": 0.3855, + "epoch": 0.25, + "grad_norm": 0.3271167740892574, + "learning_rate": 1.748457617225222e-05, + "loss": 0.2412, "step": 5527 }, { - "epoch": 0.32, - "grad_norm": 0.4116993177994943, - "learning_rate": 1.5967217557552944e-05, - "loss": 0.329, + "epoch": 0.25, + "grad_norm": 1.2681146861906707, + "learning_rate": 1.7483589327225927e-05, + "loss": 0.614, "step": 5528 }, { - "epoch": 0.32, - "grad_norm": 0.22779602155734383, - "learning_rate": 1.5965724168234304e-05, - "loss": 0.086, + "epoch": 0.25, + "grad_norm": 0.3870381544686057, + "learning_rate": 1.7482602316520183e-05, + "loss": 0.3309, "step": 5529 }, { - "epoch": 0.32, - "grad_norm": 0.3500048899822258, - "learning_rate": 1.5964230572322884e-05, - "loss": 0.2697, + "epoch": 0.25, + "grad_norm": 0.5584428584285267, + "learning_rate": 1.7481615140156837e-05, + "loss": 0.3171, "step": 5530 }, { - "epoch": 0.32, - "grad_norm": 0.4157625173818804, - "learning_rate": 1.596273676987042e-05, - "loss": 0.3202, + "epoch": 0.25, + "grad_norm": 0.43304931957735154, + "learning_rate": 1.7480627798157737e-05, + "loss": 0.3246, "step": 5531 }, { - "epoch": 0.32, - "grad_norm": 0.8107650750102577, - "learning_rate": 1.596124276092864e-05, - "loss": 0.4456, + "epoch": 0.25, + "grad_norm": 0.44842400368575336, + "learning_rate": 1.747964029054475e-05, + "loss": 0.3216, "step": 5532 }, { - "epoch": 0.32, - "grad_norm": 0.35678553696656035, - "learning_rate": 1.595974854554928e-05, - "loss": 0.2202, + "epoch": 0.25, + "grad_norm": 0.6474209819018528, + "learning_rate": 1.7478652617339738e-05, + "loss": 0.4007, "step": 5533 }, { - "epoch": 0.32, - "grad_norm": 0.40454325538013913, - "learning_rate": 1.5958254123784077e-05, - "loss": 0.3206, + "epoch": 0.25, + "grad_norm": 0.2895672645726194, + "learning_rate": 1.7477664778564562e-05, + "loss": 0.1985, "step": 5534 }, { - "epoch": 0.32, - "grad_norm": 0.2564749702657718, - "learning_rate": 1.5956759495684796e-05, - "loss": 0.2145, + "epoch": 0.25, + "grad_norm": 0.6804243401085998, + "learning_rate": 1.7476676774241097e-05, + "loss": 0.3806, "step": 5535 }, { - "epoch": 0.32, - "grad_norm": 0.37351136197316315, - "learning_rate": 1.595526466130319e-05, - "loss": 0.1787, + "epoch": 0.25, + "grad_norm": 0.4569814324692539, + "learning_rate": 1.7475688604391218e-05, + "loss": 0.3199, "step": 5536 }, { - "epoch": 0.32, - "grad_norm": 0.5263171270492955, - "learning_rate": 1.595376962069102e-05, - "loss": 0.3519, + "epoch": 0.25, + "grad_norm": 0.7927348814684542, + "learning_rate": 1.7474700269036794e-05, + "loss": 0.4179, "step": 5537 }, { - "epoch": 0.32, - "grad_norm": 0.42044904764267466, - "learning_rate": 1.5952274373900067e-05, - "loss": 0.3532, + "epoch": 0.25, + "grad_norm": 0.6753437141261602, + "learning_rate": 1.7473711768199708e-05, + "loss": 0.3589, "step": 5538 }, { - "epoch": 0.32, - "grad_norm": 0.45297528673077875, - "learning_rate": 1.5950778920982108e-05, - "loss": 0.2163, + "epoch": 0.25, + "grad_norm": 0.35681800898559896, + "learning_rate": 1.747272310190185e-05, + "loss": 0.3038, "step": 5539 }, { - "epoch": 0.32, - "grad_norm": 0.3791943745784225, - "learning_rate": 1.5949283261988934e-05, - "loss": 0.3221, + "epoch": 0.25, + "grad_norm": 0.41427464489092086, + "learning_rate": 1.7471734270165103e-05, + "loss": 0.2292, "step": 5540 }, { - "epoch": 0.32, - "grad_norm": 0.2635293913337198, - "learning_rate": 1.5947787396972332e-05, - "loss": 0.2126, + "epoch": 0.25, + "grad_norm": 0.7296291415376092, + "learning_rate": 1.7470745273011362e-05, + "loss": 0.3438, "step": 5541 }, { - "epoch": 0.32, - "grad_norm": 0.39755428740194615, - "learning_rate": 1.5946291325984108e-05, - "loss": 0.2607, - "step": 5542 + "epoch": 0.25, + "grad_norm": 0.3836577607827537, + "learning_rate": 1.7469756110462525e-05, + "loss": 0.2987, + "step": 5542 }, { - "epoch": 0.32, - "grad_norm": 0.3970521445893526, - "learning_rate": 1.5944795049076072e-05, - "loss": 0.3215, + "epoch": 0.25, + "grad_norm": 0.5038836479639311, + "learning_rate": 1.746876678254048e-05, + "loss": 0.3117, "step": 5543 }, { - "epoch": 0.32, - "grad_norm": 0.6915226148979662, - "learning_rate": 1.594329856630004e-05, - "loss": 0.5286, + "epoch": 0.25, + "grad_norm": 0.5555574090695773, + "learning_rate": 1.746777728926714e-05, + "loss": 0.3172, "step": 5544 }, { - "epoch": 0.32, - "grad_norm": 1.0272727002837614, - "learning_rate": 1.5941801877707835e-05, - "loss": 0.5231, + "epoch": 0.25, + "grad_norm": 0.3703209841274183, + "learning_rate": 1.746678763066441e-05, + "loss": 0.2682, "step": 5545 }, { - "epoch": 0.32, - "grad_norm": 0.2711694648544462, - "learning_rate": 1.594030498335129e-05, - "loss": 0.2551, + "epoch": 0.25, + "grad_norm": 0.4342322226128129, + "learning_rate": 1.7465797806754196e-05, + "loss": 0.3675, "step": 5546 }, { - "epoch": 0.32, - "grad_norm": 1.0522809594213387, - "learning_rate": 1.5938807883282234e-05, - "loss": 0.6938, + "epoch": 0.25, + "grad_norm": 0.28401551730067803, + "learning_rate": 1.7464807817558415e-05, + "loss": 0.2278, "step": 5547 }, { - "epoch": 0.32, - "grad_norm": 0.2780677764777073, - "learning_rate": 1.5937310577552517e-05, - "loss": 0.2152, + "epoch": 0.25, + "grad_norm": 0.47794456547842595, + "learning_rate": 1.7463817663098984e-05, + "loss": 0.3532, "step": 5548 }, { - "epoch": 0.32, - "grad_norm": 0.3764746606296057, - "learning_rate": 1.5935813066213988e-05, - "loss": 0.254, + "epoch": 0.25, + "grad_norm": 1.0636473162304059, + "learning_rate": 1.746282734339782e-05, + "loss": 0.6456, "step": 5549 }, { - "epoch": 0.32, - "grad_norm": 0.39611418844585977, - "learning_rate": 1.593431534931851e-05, - "loss": 0.345, + "epoch": 0.25, + "grad_norm": 0.32346275506783834, + "learning_rate": 1.7461836858476858e-05, + "loss": 0.2311, "step": 5550 }, { - "epoch": 0.32, - "grad_norm": 0.6565438887638069, - "learning_rate": 1.5932817426917945e-05, - "loss": 0.4974, + "epoch": 0.26, + "grad_norm": 0.4756908085461457, + "learning_rate": 1.7460846208358013e-05, + "loss": 0.3495, "step": 5551 }, { - "epoch": 0.32, - "grad_norm": 0.3633914159676232, - "learning_rate": 1.593131929906417e-05, - "loss": 0.2314, + "epoch": 0.26, + "grad_norm": 0.5989378972595614, + "learning_rate": 1.745985539306323e-05, + "loss": 0.4244, "step": 5552 }, { - "epoch": 0.32, - "grad_norm": 0.31526102547006846, - "learning_rate": 1.5929820965809063e-05, - "loss": 0.2752, + "epoch": 0.26, + "grad_norm": 0.23702202482773296, + "learning_rate": 1.7458864412614436e-05, + "loss": 0.0756, "step": 5553 }, { - "epoch": 0.32, - "grad_norm": 0.3736946671960914, - "learning_rate": 1.592832242720451e-05, - "loss": 0.3219, + "epoch": 0.26, + "grad_norm": 0.4152657828057209, + "learning_rate": 1.745787326703357e-05, + "loss": 0.3079, "step": 5554 }, { - "epoch": 0.32, - "grad_norm": 0.3615427063133629, - "learning_rate": 1.5926823683302404e-05, - "loss": 0.2956, + "epoch": 0.26, + "grad_norm": 0.5623651184138426, + "learning_rate": 1.7456881956342583e-05, + "loss": 0.3968, "step": 5555 }, { - "epoch": 0.32, - "grad_norm": 0.40873885213362576, - "learning_rate": 1.5925324734154654e-05, - "loss": 0.2416, + "epoch": 0.26, + "grad_norm": 0.4699301568068827, + "learning_rate": 1.745589048056341e-05, + "loss": 0.2673, "step": 5556 }, { - "epoch": 0.32, - "grad_norm": 0.5019426658331753, - "learning_rate": 1.5923825579813158e-05, - "loss": 0.4355, + "epoch": 0.26, + "grad_norm": 0.4194592787219083, + "learning_rate": 1.7454898839718014e-05, + "loss": 0.337, "step": 5557 }, { - "epoch": 0.32, - "grad_norm": 0.3081784728369049, - "learning_rate": 1.592232622032984e-05, - "loss": 0.2679, + "epoch": 0.26, + "grad_norm": 0.39633885544904907, + "learning_rate": 1.7453907033828343e-05, + "loss": 0.3409, "step": 5558 }, { - "epoch": 0.32, - "grad_norm": 0.581079170192961, - "learning_rate": 1.5920826655756617e-05, - "loss": 0.4, + "epoch": 0.26, + "grad_norm": 0.44179787569327256, + "learning_rate": 1.7452915062916348e-05, + "loss": 0.2552, "step": 5559 }, { - "epoch": 0.32, - "grad_norm": 0.5650872586904916, - "learning_rate": 1.591932688614542e-05, - "loss": 0.4542, + "epoch": 0.26, + "grad_norm": 0.273151538827484, + "learning_rate": 1.7451922927004003e-05, + "loss": 0.1806, "step": 5560 }, { - "epoch": 0.32, - "grad_norm": 0.27258563527685675, - "learning_rate": 1.5917826911548194e-05, - "loss": 0.2233, + "epoch": 0.26, + "grad_norm": 1.49414716285304, + "learning_rate": 1.7450930626113264e-05, + "loss": 0.7638, "step": 5561 }, { - "epoch": 0.32, - "grad_norm": 0.2959203113731466, - "learning_rate": 1.591632673201687e-05, - "loss": 0.2158, + "epoch": 0.26, + "grad_norm": 0.42873582401585386, + "learning_rate": 1.7449938160266105e-05, + "loss": 0.3382, "step": 5562 }, { - "epoch": 0.32, - "grad_norm": 1.2341214992158127, - "learning_rate": 1.5914826347603412e-05, - "loss": 0.7517, + "epoch": 0.26, + "grad_norm": 0.36190364832330235, + "learning_rate": 1.744894552948449e-05, + "loss": 0.2742, "step": 5563 }, { - "epoch": 0.32, - "grad_norm": 0.33028417983241837, - "learning_rate": 1.591332575835977e-05, - "loss": 0.2764, + "epoch": 0.26, + "grad_norm": 0.8134616846873223, + "learning_rate": 1.7447952733790404e-05, + "loss": 0.5386, "step": 5564 }, { - "epoch": 0.32, - "grad_norm": 0.46893872523644764, - "learning_rate": 1.591182496433791e-05, - "loss": 0.3126, + "epoch": 0.26, + "grad_norm": 0.3424556384055768, + "learning_rate": 1.7446959773205822e-05, + "loss": 0.1773, "step": 5565 }, { - "epoch": 0.32, - "grad_norm": 0.5536360678259141, - "learning_rate": 1.5910323965589803e-05, - "loss": 0.3726, + "epoch": 0.26, + "grad_norm": 0.3491969739889134, + "learning_rate": 1.7445966647752728e-05, + "loss": 0.2339, "step": 5566 }, { - "epoch": 0.32, - "grad_norm": 0.3735817111568126, - "learning_rate": 1.5908822762167435e-05, - "loss": 0.3044, + "epoch": 0.26, + "grad_norm": 0.5635256336639329, + "learning_rate": 1.744497335745311e-05, + "loss": 0.3855, "step": 5567 }, { - "epoch": 0.32, - "grad_norm": 0.35011633102273504, - "learning_rate": 1.5907321354122788e-05, - "loss": 0.2051, + "epoch": 0.26, + "grad_norm": 0.7408331178990825, + "learning_rate": 1.744397990232896e-05, + "loss": 0.4669, "step": 5568 }, { - "epoch": 0.32, - "grad_norm": 0.3548591682635962, - "learning_rate": 1.5905819741507856e-05, - "loss": 0.2592, + "epoch": 0.26, + "grad_norm": 0.4341395588313263, + "learning_rate": 1.7442986282402262e-05, + "loss": 0.2919, "step": 5569 }, { - "epoch": 0.32, - "grad_norm": 0.3581690176525001, - "learning_rate": 1.590431792437464e-05, - "loss": 0.3012, + "epoch": 0.26, + "grad_norm": 0.36482458113486965, + "learning_rate": 1.7441992497695026e-05, + "loss": 0.2912, "step": 5570 }, { - "epoch": 0.32, - "grad_norm": 0.7579819660734486, - "learning_rate": 1.590281590277515e-05, - "loss": 0.5101, + "epoch": 0.26, + "grad_norm": 0.3439316292028626, + "learning_rate": 1.744099854822925e-05, + "loss": 0.199, "step": 5571 }, { - "epoch": 0.32, - "grad_norm": 0.4774255317314944, - "learning_rate": 1.5901313676761397e-05, - "loss": 0.3312, + "epoch": 0.26, + "grad_norm": 0.3621455134742387, + "learning_rate": 1.7440004434026936e-05, + "loss": 0.254, "step": 5572 }, { - "epoch": 0.32, - "grad_norm": 0.3935810128176364, - "learning_rate": 1.5899811246385404e-05, - "loss": 0.2783, + "epoch": 0.26, + "grad_norm": 0.8622771821856549, + "learning_rate": 1.7439010155110097e-05, + "loss": 0.4139, "step": 5573 }, { - "epoch": 0.32, - "grad_norm": 0.2434832696741011, - "learning_rate": 1.5898308611699204e-05, - "loss": 0.2066, + "epoch": 0.26, + "grad_norm": 0.4538153950747039, + "learning_rate": 1.743801571150074e-05, + "loss": 0.3527, "step": 5574 }, { - "epoch": 0.32, - "grad_norm": 0.838098712521921, - "learning_rate": 1.589680577275483e-05, - "loss": 0.4197, + "epoch": 0.26, + "grad_norm": 0.3723007802785951, + "learning_rate": 1.7437021103220885e-05, + "loss": 0.2941, "step": 5575 }, { - "epoch": 0.32, - "grad_norm": 0.38018436720857546, - "learning_rate": 1.5895302729604323e-05, - "loss": 0.274, + "epoch": 0.26, + "grad_norm": 0.7528489615942247, + "learning_rate": 1.743602633029255e-05, + "loss": 0.416, "step": 5576 }, { - "epoch": 0.32, - "grad_norm": 0.5879533204990789, - "learning_rate": 1.589379948229974e-05, - "loss": 0.4019, + "epoch": 0.26, + "grad_norm": 0.33891911459719626, + "learning_rate": 1.743503139273776e-05, + "loss": 0.1789, "step": 5577 }, { - "epoch": 0.32, - "grad_norm": 0.469626009070046, - "learning_rate": 1.5892296030893134e-05, - "loss": 0.2904, + "epoch": 0.26, + "grad_norm": 0.3010382806624568, + "learning_rate": 1.743403629057854e-05, + "loss": 0.284, "step": 5578 }, { - "epoch": 0.32, - "grad_norm": 0.3569116943337263, - "learning_rate": 1.5890792375436568e-05, - "loss": 0.2826, + "epoch": 0.26, + "grad_norm": 0.4792897691666127, + "learning_rate": 1.7433041023836923e-05, + "loss": 0.2743, "step": 5579 }, { - "epoch": 0.32, - "grad_norm": 0.30781010648834556, - "learning_rate": 1.5889288515982118e-05, - "loss": 0.214, + "epoch": 0.26, + "grad_norm": 0.7178760141248861, + "learning_rate": 1.743204559253494e-05, + "loss": 0.4392, "step": 5580 }, { - "epoch": 0.32, - "grad_norm": 0.4452871745168325, - "learning_rate": 1.5887784452581857e-05, - "loss": 0.3477, + "epoch": 0.26, + "grad_norm": 0.40760133357083406, + "learning_rate": 1.743104999669463e-05, + "loss": 0.3359, "step": 5581 }, { - "epoch": 0.32, - "grad_norm": 0.3423890676840111, - "learning_rate": 1.5886280185287874e-05, - "loss": 0.2258, + "epoch": 0.26, + "grad_norm": 0.4355898438559445, + "learning_rate": 1.7430054236338037e-05, + "loss": 0.3365, "step": 5582 }, { - "epoch": 0.32, - "grad_norm": 0.9397225493977781, - "learning_rate": 1.588477571415226e-05, - "loss": 0.4489, + "epoch": 0.26, + "grad_norm": 0.32172275591398297, + "learning_rate": 1.7429058311487206e-05, + "loss": 0.158, "step": 5583 }, { - "epoch": 0.32, - "grad_norm": 0.8992191750862868, - "learning_rate": 1.588327103922712e-05, - "loss": 0.5669, + "epoch": 0.26, + "grad_norm": 0.3182237158452271, + "learning_rate": 1.742806222216418e-05, + "loss": 0.257, "step": 5584 }, { - "epoch": 0.32, - "grad_norm": 0.34519578977057513, - "learning_rate": 1.5881766160564553e-05, - "loss": 0.2008, + "epoch": 0.26, + "grad_norm": 0.9623790639019112, + "learning_rate": 1.742706596839102e-05, + "loss": 0.6209, "step": 5585 }, { - "epoch": 0.32, - "grad_norm": 0.2613946128658462, - "learning_rate": 1.588026107821668e-05, - "loss": 0.2106, + "epoch": 0.26, + "grad_norm": 0.36337913276205897, + "learning_rate": 1.7426069550189778e-05, + "loss": 0.2937, "step": 5586 }, { - "epoch": 0.32, - "grad_norm": 1.1043781598317808, - "learning_rate": 1.5878755792235616e-05, - "loss": 0.5675, + "epoch": 0.26, + "grad_norm": 0.4394258877417794, + "learning_rate": 1.7425072967582507e-05, + "loss": 0.3099, "step": 5587 }, { - "epoch": 0.32, - "grad_norm": 0.3429172657775994, - "learning_rate": 1.5877250302673493e-05, - "loss": 0.2068, + "epoch": 0.26, + "grad_norm": 1.0956554960119813, + "learning_rate": 1.7424076220591285e-05, + "loss": 0.8158, "step": 5588 }, { - "epoch": 0.32, - "grad_norm": 0.6522423001767897, - "learning_rate": 1.5875744609582444e-05, - "loss": 0.3912, + "epoch": 0.26, + "grad_norm": 0.27556780452144203, + "learning_rate": 1.7423079309238168e-05, + "loss": 0.1987, "step": 5589 }, { - "epoch": 0.32, - "grad_norm": 0.4731252524113648, - "learning_rate": 1.587423871301461e-05, - "loss": 0.3802, + "epoch": 0.26, + "grad_norm": 0.39900886071089864, + "learning_rate": 1.7422082233545226e-05, + "loss": 0.2783, "step": 5590 }, { - "epoch": 0.32, - "grad_norm": 0.33128379858329554, - "learning_rate": 1.5872732613022147e-05, - "loss": 0.2088, + "epoch": 0.26, + "grad_norm": 0.47435572851914515, + "learning_rate": 1.742108499353454e-05, + "loss": 0.2981, "step": 5591 }, { - "epoch": 0.32, - "grad_norm": 0.2749688226198136, - "learning_rate": 1.5871226309657203e-05, - "loss": 0.1898, + "epoch": 0.26, + "grad_norm": 0.8477030425880551, + "learning_rate": 1.7420087589228184e-05, + "loss": 0.4025, "step": 5592 }, { - "epoch": 0.32, - "grad_norm": 0.449249676522092, - "learning_rate": 1.5869719802971947e-05, - "loss": 0.3547, + "epoch": 0.26, + "grad_norm": 0.3945212961533725, + "learning_rate": 1.741909002064824e-05, + "loss": 0.2767, "step": 5593 }, { - "epoch": 0.32, - "grad_norm": 0.545477058578212, - "learning_rate": 1.5868213093018543e-05, - "loss": 0.3069, + "epoch": 0.26, + "grad_norm": 0.481711592424028, + "learning_rate": 1.7418092287816795e-05, + "loss": 0.3647, "step": 5594 }, { - "epoch": 0.32, - "grad_norm": 0.7375781226958218, - "learning_rate": 1.5866706179849172e-05, - "loss": 0.3483, + "epoch": 0.26, + "grad_norm": 1.458183075885284, + "learning_rate": 1.7417094390755936e-05, + "loss": 0.6424, "step": 5595 }, { - "epoch": 0.32, - "grad_norm": 1.0617075868345798, - "learning_rate": 1.586519906351602e-05, - "loss": 0.6041, + "epoch": 0.26, + "grad_norm": 0.2630830009014287, + "learning_rate": 1.7416096329487753e-05, + "loss": 0.1697, "step": 5596 }, { - "epoch": 0.32, - "grad_norm": 0.3294184144488115, - "learning_rate": 1.586369174407128e-05, - "loss": 0.2779, + "epoch": 0.26, + "grad_norm": 0.8055686369358457, + "learning_rate": 1.7415098104034348e-05, + "loss": 0.3358, "step": 5597 }, { - "epoch": 0.32, - "grad_norm": 0.2594062832915638, - "learning_rate": 1.5862184221567144e-05, - "loss": 0.1825, + "epoch": 0.26, + "grad_norm": 0.4673829162713825, + "learning_rate": 1.7414099714417817e-05, + "loss": 0.3569, "step": 5598 }, { - "epoch": 0.32, - "grad_norm": 0.9733876205705159, - "learning_rate": 1.586067649605582e-05, - "loss": 0.5383, + "epoch": 0.26, + "grad_norm": 0.3351364334154858, + "learning_rate": 1.7413101160660267e-05, + "loss": 0.2354, "step": 5599 }, { - "epoch": 0.32, - "grad_norm": 0.4270131446780353, - "learning_rate": 1.585916856758952e-05, - "loss": 0.2909, + "epoch": 0.26, + "grad_norm": 1.002943229775971, + "learning_rate": 1.7412102442783798e-05, + "loss": 0.6525, "step": 5600 }, { - "epoch": 0.32, - "grad_norm": 0.5168643875206522, - "learning_rate": 1.5857660436220466e-05, - "loss": 0.315, + "epoch": 0.26, + "grad_norm": 0.5334498407865381, + "learning_rate": 1.7411103560810528e-05, + "loss": 0.4041, "step": 5601 }, { - "epoch": 0.32, - "grad_norm": 0.5136780415745466, - "learning_rate": 1.5856152102000878e-05, - "loss": 0.3597, + "epoch": 0.26, + "grad_norm": 0.2526540479265092, + "learning_rate": 1.7410104514762565e-05, + "loss": 0.188, "step": 5602 }, { - "epoch": 0.32, - "grad_norm": 0.3472687358366042, - "learning_rate": 1.5854643564983e-05, - "loss": 0.2772, + "epoch": 0.26, + "grad_norm": 0.6150239105966623, + "learning_rate": 1.740910530466203e-05, + "loss": 0.4861, "step": 5603 }, { - "epoch": 0.32, - "grad_norm": 0.21984666591055935, - "learning_rate": 1.5853134825219066e-05, - "loss": 0.1203, + "epoch": 0.26, + "grad_norm": 0.45494544578867, + "learning_rate": 1.7408105930531047e-05, + "loss": 0.3379, "step": 5604 }, { - "epoch": 0.32, - "grad_norm": 0.5955025245870108, - "learning_rate": 1.5851625882761326e-05, - "loss": 0.3752, + "epoch": 0.26, + "grad_norm": 0.357999692372862, + "learning_rate": 1.7407106392391738e-05, + "loss": 0.2115, "step": 5605 }, { - "epoch": 0.32, - "grad_norm": 0.3950271910878111, - "learning_rate": 1.585011673766203e-05, - "loss": 0.2838, + "epoch": 0.26, + "grad_norm": 0.3755942713096529, + "learning_rate": 1.740610669026623e-05, + "loss": 0.3533, "step": 5606 }, { - "epoch": 0.32, - "grad_norm": 0.6397240293769488, - "learning_rate": 1.5848607389973446e-05, - "loss": 0.4141, + "epoch": 0.26, + "grad_norm": 0.9485555677729863, + "learning_rate": 1.740510682417666e-05, + "loss": 0.5368, "step": 5607 }, { - "epoch": 0.32, - "grad_norm": 0.4432763685637882, - "learning_rate": 1.584709783974784e-05, - "loss": 0.2897, + "epoch": 0.26, + "grad_norm": 0.46092882515729766, + "learning_rate": 1.7404106794145163e-05, + "loss": 0.3263, "step": 5608 }, { - "epoch": 0.32, - "grad_norm": 0.4240833841777455, - "learning_rate": 1.5845588087037484e-05, - "loss": 0.2904, + "epoch": 0.26, + "grad_norm": 0.4084643015341037, + "learning_rate": 1.7403106600193878e-05, + "loss": 0.2989, "step": 5609 }, { - "epoch": 0.32, - "grad_norm": 0.43798524592763793, - "learning_rate": 1.5844078131894668e-05, - "loss": 0.3254, + "epoch": 0.26, + "grad_norm": 0.3720021783684052, + "learning_rate": 1.7402106242344946e-05, + "loss": 0.2826, "step": 5610 }, { - "epoch": 0.32, - "grad_norm": 0.44403383059135365, - "learning_rate": 1.5842567974371672e-05, - "loss": 0.2548, + "epoch": 0.26, + "grad_norm": 0.46029209665660464, + "learning_rate": 1.7401105720620516e-05, + "loss": 0.3059, "step": 5611 }, { - "epoch": 0.32, - "grad_norm": 0.37943493120580635, - "learning_rate": 1.5841057614520803e-05, - "loss": 0.3146, + "epoch": 0.26, + "grad_norm": 0.332297214524411, + "learning_rate": 1.740010503504274e-05, + "loss": 0.2116, "step": 5612 }, { - "epoch": 0.32, - "grad_norm": 0.3330746925906659, - "learning_rate": 1.5839547052394356e-05, - "loss": 0.3125, + "epoch": 0.26, + "grad_norm": 0.5243993439037318, + "learning_rate": 1.739910418563377e-05, + "loss": 0.3978, "step": 5613 }, { - "epoch": 0.32, - "grad_norm": 0.36957538435382176, - "learning_rate": 1.583803628804465e-05, - "loss": 0.1807, + "epoch": 0.26, + "grad_norm": 0.3692500602824166, + "learning_rate": 1.7398103172415766e-05, + "loss": 0.2824, "step": 5614 }, { - "epoch": 0.32, - "grad_norm": 0.3799639377524021, - "learning_rate": 1.5836525321523998e-05, - "loss": 0.2976, + "epoch": 0.26, + "grad_norm": 0.8458931807717212, + "learning_rate": 1.739710199541089e-05, + "loss": 0.3971, "step": 5615 }, { - "epoch": 0.32, - "grad_norm": 0.5407995328754999, - "learning_rate": 1.5835014152884722e-05, - "loss": 0.371, + "epoch": 0.26, + "grad_norm": 0.438794326379432, + "learning_rate": 1.73961006546413e-05, + "loss": 0.2845, "step": 5616 }, { - "epoch": 0.32, - "grad_norm": 0.4602781375274827, - "learning_rate": 1.5833502782179157e-05, - "loss": 0.3056, + "epoch": 0.26, + "grad_norm": 0.38727393576428387, + "learning_rate": 1.7395099150129173e-05, + "loss": 0.2915, "step": 5617 }, { - "epoch": 0.32, - "grad_norm": 0.2703581818132286, - "learning_rate": 1.5831991209459646e-05, - "loss": 0.2294, + "epoch": 0.26, + "grad_norm": 0.3084459336446391, + "learning_rate": 1.7394097481896676e-05, + "loss": 0.2106, "step": 5618 }, { - "epoch": 0.32, - "grad_norm": 1.3941351735792076, - "learning_rate": 1.583047943477853e-05, - "loss": 0.8033, + "epoch": 0.26, + "grad_norm": 1.0550397826156737, + "learning_rate": 1.739309564996599e-05, + "loss": 0.525, "step": 5619 }, { - "epoch": 0.32, - "grad_norm": 0.4269407961412312, - "learning_rate": 1.5828967458188155e-05, - "loss": 0.2612, + "epoch": 0.26, + "grad_norm": 0.37656289439841306, + "learning_rate": 1.739209365435929e-05, + "loss": 0.2914, "step": 5620 }, { - "epoch": 0.32, - "grad_norm": 0.32749877211677897, - "learning_rate": 1.5827455279740892e-05, - "loss": 0.2406, + "epoch": 0.26, + "grad_norm": 0.7235882317830873, + "learning_rate": 1.7391091495098763e-05, + "loss": 0.5765, "step": 5621 }, { - "epoch": 0.32, - "grad_norm": 0.7597682432080118, - "learning_rate": 1.5825942899489103e-05, - "loss": 0.4945, + "epoch": 0.26, + "grad_norm": 0.3369582148775822, + "learning_rate": 1.7390089172206594e-05, + "loss": 0.2781, "step": 5622 }, { - "epoch": 0.32, - "grad_norm": 0.6648356899162475, - "learning_rate": 1.5824430317485163e-05, - "loss": 0.4611, + "epoch": 0.26, + "grad_norm": 0.46625909800849197, + "learning_rate": 1.7389086685704973e-05, + "loss": 0.2867, "step": 5623 }, { - "epoch": 0.32, - "grad_norm": 0.3576858149221899, - "learning_rate": 1.582291753378145e-05, - "loss": 0.1644, + "epoch": 0.26, + "grad_norm": 0.31703079580319904, + "learning_rate": 1.7388084035616092e-05, + "loss": 0.2144, "step": 5624 }, { - "epoch": 0.32, - "grad_norm": 0.30960874021564383, - "learning_rate": 1.5821404548430352e-05, - "loss": 0.2993, + "epoch": 0.26, + "grad_norm": 0.5907459046423541, + "learning_rate": 1.738708122196215e-05, + "loss": 0.3142, "step": 5625 }, { - "epoch": 0.32, - "grad_norm": 0.2692899639231665, - "learning_rate": 1.5819891361484266e-05, - "loss": 0.2074, + "epoch": 0.26, + "grad_norm": 0.454016486346263, + "learning_rate": 1.7386078244765355e-05, + "loss": 0.2734, "step": 5626 }, { - "epoch": 0.32, - "grad_norm": 0.362044309838756, - "learning_rate": 1.5818377972995594e-05, - "loss": 0.2007, + "epoch": 0.26, + "grad_norm": 0.506993614136617, + "learning_rate": 1.7385075104047903e-05, + "loss": 0.389, "step": 5627 }, { - "epoch": 0.32, - "grad_norm": 0.5932912412651169, - "learning_rate": 1.581686438301674e-05, - "loss": 0.3933, + "epoch": 0.26, + "grad_norm": 1.0332193451090583, + "learning_rate": 1.7384071799832008e-05, + "loss": 0.5181, "step": 5628 }, { - "epoch": 0.32, - "grad_norm": 0.37069755753292, - "learning_rate": 1.5815350591600124e-05, - "loss": 0.3382, + "epoch": 0.26, + "grad_norm": 0.4815321749613126, + "learning_rate": 1.738306833213988e-05, + "loss": 0.299, "step": 5629 }, { - "epoch": 0.32, - "grad_norm": 0.30830834981343785, - "learning_rate": 1.5813836598798168e-05, - "loss": 0.1911, + "epoch": 0.26, + "grad_norm": 0.3180429072866193, + "learning_rate": 1.7382064700993733e-05, + "loss": 0.229, "step": 5630 }, { - "epoch": 0.32, - "grad_norm": 0.36059298843687776, - "learning_rate": 1.5812322404663304e-05, - "loss": 0.2967, + "epoch": 0.26, + "grad_norm": 0.7769906387017061, + "learning_rate": 1.738106090641579e-05, + "loss": 0.3666, "step": 5631 }, { - "epoch": 0.32, - "grad_norm": 0.385845649979882, - "learning_rate": 1.581080800924796e-05, - "loss": 0.1999, + "epoch": 0.26, + "grad_norm": 0.42152042471688644, + "learning_rate": 1.7380056948428273e-05, + "loss": 0.309, "step": 5632 }, { - "epoch": 0.32, - "grad_norm": 0.33126056841829, - "learning_rate": 1.5809293412604584e-05, - "loss": 0.299, + "epoch": 0.26, + "grad_norm": 0.44256491183205654, + "learning_rate": 1.7379052827053405e-05, + "loss": 0.3069, "step": 5633 }, { - "epoch": 0.32, - "grad_norm": 0.386058427132325, - "learning_rate": 1.580777861478563e-05, - "loss": 0.3071, + "epoch": 0.26, + "grad_norm": 0.615511915459569, + "learning_rate": 1.7378048542313424e-05, + "loss": 0.4168, "step": 5634 }, { - "epoch": 0.32, - "grad_norm": 0.7101238664226756, - "learning_rate": 1.580626361584355e-05, - "loss": 0.5124, + "epoch": 0.26, + "grad_norm": 0.354479821178787, + "learning_rate": 1.7377044094230555e-05, + "loss": 0.2301, "step": 5635 }, { - "epoch": 0.32, - "grad_norm": 0.589818500555523, - "learning_rate": 1.5804748415830814e-05, - "loss": 0.4467, + "epoch": 0.26, + "grad_norm": 0.2832020994153795, + "learning_rate": 1.7376039482827043e-05, + "loss": 0.1765, "step": 5636 }, { - "epoch": 0.32, - "grad_norm": 0.2932950338270886, - "learning_rate": 1.5803233014799887e-05, - "loss": 0.2563, + "epoch": 0.26, + "grad_norm": 0.4967041052459835, + "learning_rate": 1.7375034708125126e-05, + "loss": 0.3644, "step": 5637 }, { - "epoch": 0.32, - "grad_norm": 0.2636118456350962, - "learning_rate": 1.5801717412803246e-05, - "loss": 0.2042, + "epoch": 0.26, + "grad_norm": 0.3561312702698118, + "learning_rate": 1.7374029770147048e-05, + "loss": 0.2453, "step": 5638 }, { - "epoch": 0.32, - "grad_norm": 0.3969761989987564, - "learning_rate": 1.580020160989339e-05, - "loss": 0.3166, + "epoch": 0.26, + "grad_norm": 1.381147977528637, + "learning_rate": 1.7373024668915058e-05, + "loss": 0.6876, "step": 5639 }, { - "epoch": 0.32, - "grad_norm": 0.5113430169982263, - "learning_rate": 1.5798685606122795e-05, - "loss": 0.3068, + "epoch": 0.26, + "grad_norm": 1.2047690047716177, + "learning_rate": 1.7372019404451403e-05, + "loss": 0.7801, "step": 5640 }, { - "epoch": 0.32, - "grad_norm": 0.40055815261009947, - "learning_rate": 1.579716940154397e-05, - "loss": 0.3314, + "epoch": 0.26, + "grad_norm": 0.37661967041072036, + "learning_rate": 1.737101397677835e-05, + "loss": 0.202, "step": 5641 }, { - "epoch": 0.32, - "grad_norm": 0.7927953903429585, - "learning_rate": 1.5795652996209416e-05, - "loss": 0.5219, + "epoch": 0.26, + "grad_norm": 0.3920474704374135, + "learning_rate": 1.7370008385918148e-05, + "loss": 0.3182, "step": 5642 }, { - "epoch": 0.32, - "grad_norm": 0.37086620699465855, - "learning_rate": 1.579413639017165e-05, - "loss": 0.3161, + "epoch": 0.26, + "grad_norm": 0.43538211661209547, + "learning_rate": 1.736900263189307e-05, + "loss": 0.2534, "step": 5643 }, { - "epoch": 0.32, - "grad_norm": 0.24236166483538968, - "learning_rate": 1.5792619583483183e-05, - "loss": 0.1854, + "epoch": 0.26, + "grad_norm": 0.4076558293800915, + "learning_rate": 1.736799671472537e-05, + "loss": 0.2037, "step": 5644 }, { - "epoch": 0.32, - "grad_norm": 0.6014945831118405, - "learning_rate": 1.5791102576196555e-05, - "loss": 0.4099, + "epoch": 0.26, + "grad_norm": 0.44744861355234516, + "learning_rate": 1.7366990634437328e-05, + "loss": 0.32, "step": 5645 }, { - "epoch": 0.32, - "grad_norm": 0.3698987883509682, - "learning_rate": 1.5789585368364296e-05, - "loss": 0.3163, + "epoch": 0.26, + "grad_norm": 0.6076813164620158, + "learning_rate": 1.736598439105121e-05, + "loss": 0.4046, "step": 5646 }, { - "epoch": 0.32, - "grad_norm": 0.7593189596779604, - "learning_rate": 1.5788067960038942e-05, - "loss": 0.3718, + "epoch": 0.26, + "grad_norm": 0.5982607587524688, + "learning_rate": 1.73649779845893e-05, + "loss": 0.3316, "step": 5647 }, { - "epoch": 0.32, - "grad_norm": 0.5227360365319026, - "learning_rate": 1.5786550351273043e-05, - "loss": 0.3476, + "epoch": 0.26, + "grad_norm": 0.3950500795104273, + "learning_rate": 1.7363971415073875e-05, + "loss": 0.3085, "step": 5648 }, { - "epoch": 0.32, - "grad_norm": 0.34840131645436817, - "learning_rate": 1.5785032542119155e-05, - "loss": 0.3044, + "epoch": 0.26, + "grad_norm": 0.35963493739046604, + "learning_rate": 1.736296468252722e-05, + "loss": 0.2817, "step": 5649 }, { - "epoch": 0.32, - "grad_norm": 0.3537908681190076, - "learning_rate": 1.578351453262984e-05, - "loss": 0.1915, + "epoch": 0.26, + "grad_norm": 0.32435917135990094, + "learning_rate": 1.7361957786971623e-05, + "loss": 0.2336, "step": 5650 }, { - "epoch": 0.32, - "grad_norm": 0.5534665540651362, - "learning_rate": 1.578199632285766e-05, - "loss": 0.3911, + "epoch": 0.26, + "grad_norm": 0.4350948857761065, + "learning_rate": 1.7360950728429377e-05, + "loss": 0.2686, "step": 5651 }, { - "epoch": 0.32, - "grad_norm": 0.3296603796034221, - "learning_rate": 1.57804779128552e-05, - "loss": 0.2536, + "epoch": 0.26, + "grad_norm": 1.4148213763420012, + "learning_rate": 1.7359943506922775e-05, + "loss": 0.8692, "step": 5652 }, { - "epoch": 0.32, - "grad_norm": 0.36719045075931117, - "learning_rate": 1.577895930267504e-05, - "loss": 0.255, + "epoch": 0.26, + "grad_norm": 0.35526705279168586, + "learning_rate": 1.7358936122474116e-05, + "loss": 0.3023, "step": 5653 }, { - "epoch": 0.32, - "grad_norm": 0.5549103218709931, - "learning_rate": 1.5777440492369764e-05, - "loss": 0.4179, + "epoch": 0.26, + "grad_norm": 0.3909234274804482, + "learning_rate": 1.7357928575105707e-05, + "loss": 0.2812, "step": 5654 }, { - "epoch": 0.32, - "grad_norm": 0.4099773567180406, - "learning_rate": 1.5775921481991976e-05, - "loss": 0.3489, + "epoch": 0.26, + "grad_norm": 0.42766647794751456, + "learning_rate": 1.735692086483985e-05, + "loss": 0.3338, "step": 5655 }, { - "epoch": 0.32, - "grad_norm": 0.9487980274852443, - "learning_rate": 1.5774402271594272e-05, - "loss": 0.4496, + "epoch": 0.26, + "grad_norm": 0.3860190473888694, + "learning_rate": 1.7355912991698857e-05, + "loss": 0.2115, "step": 5656 }, { - "epoch": 0.33, - "grad_norm": 0.30146272872090246, - "learning_rate": 1.577288286122927e-05, - "loss": 0.2396, + "epoch": 0.26, + "grad_norm": 0.4450769992913822, + "learning_rate": 1.7354904955705038e-05, + "loss": 0.2641, "step": 5657 }, { - "epoch": 0.33, - "grad_norm": 0.32480569594008457, - "learning_rate": 1.5771363250949582e-05, - "loss": 0.246, + "epoch": 0.26, + "grad_norm": 0.5352681874680946, + "learning_rate": 1.7353896756880716e-05, + "loss": 0.4147, "step": 5658 }, { - "epoch": 0.33, - "grad_norm": 0.3711344965945374, - "learning_rate": 1.5769843440807828e-05, - "loss": 0.2793, + "epoch": 0.26, + "grad_norm": 0.516889149628235, + "learning_rate": 1.73528883952482e-05, + "loss": 0.3467, "step": 5659 }, { - "epoch": 0.33, - "grad_norm": 0.3992119007745314, - "learning_rate": 1.5768323430856647e-05, - "loss": 0.2608, + "epoch": 0.26, + "grad_norm": 0.4220909448385419, + "learning_rate": 1.7351879870829828e-05, + "loss": 0.353, "step": 5660 }, { - "epoch": 0.33, - "grad_norm": 0.335697923487834, - "learning_rate": 1.5766803221148676e-05, - "loss": 0.2743, + "epoch": 0.26, + "grad_norm": 0.3632217779083125, + "learning_rate": 1.7350871183647918e-05, + "loss": 0.2884, "step": 5661 }, { - "epoch": 0.33, - "grad_norm": 1.1159080031165294, - "learning_rate": 1.576528281173655e-05, - "loss": 0.4803, + "epoch": 0.26, + "grad_norm": 0.3498193295901684, + "learning_rate": 1.734986233372481e-05, + "loss": 0.1534, "step": 5662 }, { - "epoch": 0.33, - "grad_norm": 0.24838037661375026, - "learning_rate": 1.5763762202672933e-05, - "loss": 0.1415, + "epoch": 0.26, + "grad_norm": 0.4371083048114835, + "learning_rate": 1.7348853321082832e-05, + "loss": 0.2898, "step": 5663 }, { - "epoch": 0.33, - "grad_norm": 0.30420554106490244, - "learning_rate": 1.576224139401048e-05, - "loss": 0.2233, + "epoch": 0.26, + "grad_norm": 0.7957103306423904, + "learning_rate": 1.734784414574432e-05, + "loss": 0.4008, "step": 5664 }, { - "epoch": 0.33, - "grad_norm": 0.422273654707413, - "learning_rate": 1.5760720385801855e-05, - "loss": 0.3285, + "epoch": 0.26, + "grad_norm": 0.3850971126805678, + "learning_rate": 1.7346834807731626e-05, + "loss": 0.2912, "step": 5665 }, { - "epoch": 0.33, - "grad_norm": 0.6169292780868955, - "learning_rate": 1.575919917809973e-05, - "loss": 0.2722, + "epoch": 0.26, + "grad_norm": 0.4251904203752995, + "learning_rate": 1.7345825307067086e-05, + "loss": 0.3733, "step": 5666 }, { - "epoch": 0.33, - "grad_norm": 0.4049489150331608, - "learning_rate": 1.5757677770956785e-05, - "loss": 0.2992, + "epoch": 0.26, + "grad_norm": 0.8041601759705365, + "learning_rate": 1.7344815643773058e-05, + "loss": 0.3763, "step": 5667 }, { - "epoch": 0.33, - "grad_norm": 0.8471162417110218, - "learning_rate": 1.5756156164425703e-05, - "loss": 0.5039, + "epoch": 0.26, + "grad_norm": 0.28941843269082057, + "learning_rate": 1.7343805817871885e-05, + "loss": 0.1598, "step": 5668 }, { - "epoch": 0.33, - "grad_norm": 0.4114900323796081, - "learning_rate": 1.5754634358559187e-05, - "loss": 0.2948, + "epoch": 0.26, + "grad_norm": 0.3075629209464799, + "learning_rate": 1.7342795829385933e-05, + "loss": 0.2825, "step": 5669 }, { - "epoch": 0.33, - "grad_norm": 0.24786075406450575, - "learning_rate": 1.5753112353409928e-05, - "loss": 0.1681, + "epoch": 0.26, + "grad_norm": 1.09065347121831, + "learning_rate": 1.7341785678337557e-05, + "loss": 0.4191, "step": 5670 }, { - "epoch": 0.33, - "grad_norm": 1.1452188003836672, - "learning_rate": 1.5751590149030632e-05, - "loss": 0.7431, + "epoch": 0.26, + "grad_norm": 0.5923272769500549, + "learning_rate": 1.7340775364749124e-05, + "loss": 0.3909, "step": 5671 }, { - "epoch": 0.33, - "grad_norm": 0.4562773003590891, - "learning_rate": 1.575006774547402e-05, - "loss": 0.3109, + "epoch": 0.26, + "grad_norm": 0.4631238582513766, + "learning_rate": 1.7339764888642998e-05, + "loss": 0.3288, "step": 5672 }, { - "epoch": 0.33, - "grad_norm": 0.434282672437878, - "learning_rate": 1.5748545142792807e-05, - "loss": 0.2306, + "epoch": 0.26, + "grad_norm": 0.47622480077415913, + "learning_rate": 1.7338754250041553e-05, + "loss": 0.3559, "step": 5673 }, { - "epoch": 0.33, - "grad_norm": 1.0737371125385688, - "learning_rate": 1.5747022341039727e-05, - "loss": 0.4466, + "epoch": 0.26, + "grad_norm": 0.1966611086715717, + "learning_rate": 1.733774344896716e-05, + "loss": 0.0749, "step": 5674 }, { - "epoch": 0.33, - "grad_norm": 0.719146996338594, - "learning_rate": 1.5745499340267508e-05, - "loss": 0.4678, + "epoch": 0.26, + "grad_norm": 0.4923213865082224, + "learning_rate": 1.73367324854422e-05, + "loss": 0.323, "step": 5675 }, { - "epoch": 0.33, - "grad_norm": 0.314472948884982, - "learning_rate": 1.5743976140528893e-05, - "loss": 0.1734, + "epoch": 0.26, + "grad_norm": 1.0501885045886348, + "learning_rate": 1.7335721359489058e-05, + "loss": 0.5343, "step": 5676 }, { - "epoch": 0.33, - "grad_norm": 0.40527113405825066, - "learning_rate": 1.5742452741876632e-05, - "loss": 0.2722, + "epoch": 0.26, + "grad_norm": 0.3522192425530954, + "learning_rate": 1.733471007113011e-05, + "loss": 0.2633, "step": 5677 }, { - "epoch": 0.33, - "grad_norm": 0.7245521666416738, - "learning_rate": 1.574092914436348e-05, - "loss": 0.4212, + "epoch": 0.26, + "grad_norm": 0.44787884626806374, + "learning_rate": 1.7333698620387755e-05, + "loss": 0.3715, "step": 5678 }, { - "epoch": 0.33, - "grad_norm": 0.4596505412358331, - "learning_rate": 1.5739405348042197e-05, - "loss": 0.2702, + "epoch": 0.26, + "grad_norm": 1.4619347999333387, + "learning_rate": 1.7332687007284374e-05, + "loss": 0.6438, "step": 5679 }, { - "epoch": 0.33, - "grad_norm": 0.42284574176881146, - "learning_rate": 1.5737881352965556e-05, - "loss": 0.34, + "epoch": 0.26, + "grad_norm": 0.24956571188799068, + "learning_rate": 1.7331675231842374e-05, + "loss": 0.0983, "step": 5680 }, { - "epoch": 0.33, - "grad_norm": 1.7840092327565245, - "learning_rate": 1.573635715918633e-05, - "loss": 0.7355, + "epoch": 0.26, + "grad_norm": 0.28766261713753105, + "learning_rate": 1.7330663294084153e-05, + "loss": 0.2728, "step": 5681 }, { - "epoch": 0.33, - "grad_norm": 0.3697786735178063, - "learning_rate": 1.5734832766757302e-05, - "loss": 0.2946, + "epoch": 0.26, + "grad_norm": 0.7681976327198932, + "learning_rate": 1.7329651194032108e-05, + "loss": 0.4802, "step": 5682 }, { - "epoch": 0.33, - "grad_norm": 0.26905521183914954, - "learning_rate": 1.573330817573126e-05, - "loss": 0.1296, + "epoch": 0.26, + "grad_norm": 0.6220765355851102, + "learning_rate": 1.7328638931708652e-05, + "loss": 0.3276, "step": 5683 }, { - "epoch": 0.33, - "grad_norm": 0.3968443457830917, - "learning_rate": 1.5731783386161007e-05, - "loss": 0.3475, + "epoch": 0.26, + "grad_norm": 0.381099517804756, + "learning_rate": 1.7327626507136194e-05, + "loss": 0.3142, "step": 5684 }, { - "epoch": 0.33, - "grad_norm": 0.3421390360738499, - "learning_rate": 1.5730258398099335e-05, - "loss": 0.2744, + "epoch": 0.26, + "grad_norm": 0.4168246988884917, + "learning_rate": 1.7326613920337147e-05, + "loss": 0.302, "step": 5685 }, { - "epoch": 0.33, - "grad_norm": 0.7376461566219767, - "learning_rate": 1.5728733211599067e-05, - "loss": 0.3775, + "epoch": 0.26, + "grad_norm": 0.5242040974825304, + "learning_rate": 1.7325601171333927e-05, + "loss": 0.2687, "step": 5686 }, { - "epoch": 0.33, - "grad_norm": 1.2203592124029954, - "learning_rate": 1.572720782671301e-05, - "loss": 0.6442, + "epoch": 0.26, + "grad_norm": 0.29266260419698903, + "learning_rate": 1.732458826014896e-05, + "loss": 0.2086, "step": 5687 }, { - "epoch": 0.33, - "grad_norm": 0.3257697053227165, - "learning_rate": 1.5725682243493995e-05, - "loss": 0.2935, + "epoch": 0.26, + "grad_norm": 0.884541165777677, + "learning_rate": 1.732357518680467e-05, + "loss": 0.483, "step": 5688 }, { - "epoch": 0.33, - "grad_norm": 0.24273051299883028, - "learning_rate": 1.572415646199485e-05, - "loss": 0.1582, + "epoch": 0.26, + "grad_norm": 0.37147893597954396, + "learning_rate": 1.732256195132348e-05, + "loss": 0.3212, "step": 5689 }, { - "epoch": 0.33, - "grad_norm": 0.7445388041307233, - "learning_rate": 1.5722630482268413e-05, - "loss": 0.4049, + "epoch": 0.26, + "grad_norm": 0.4020129683769188, + "learning_rate": 1.7321548553727828e-05, + "loss": 0.2408, "step": 5690 }, { - "epoch": 0.33, - "grad_norm": 0.3916307018462784, - "learning_rate": 1.5721104304367526e-05, - "loss": 0.318, + "epoch": 0.26, + "grad_norm": 0.9398524144431251, + "learning_rate": 1.7320534994040148e-05, + "loss": 0.5556, "step": 5691 }, { - "epoch": 0.33, - "grad_norm": 0.3538495633272354, - "learning_rate": 1.5719577928345045e-05, - "loss": 0.2772, + "epoch": 0.26, + "grad_norm": 0.3660214863767395, + "learning_rate": 1.731952127228288e-05, + "loss": 0.2494, "step": 5692 }, { - "epoch": 0.33, - "grad_norm": 0.7364894330489864, - "learning_rate": 1.5718051354253828e-05, - "loss": 0.4585, + "epoch": 0.26, + "grad_norm": 0.35284040939135825, + "learning_rate": 1.7318507388478464e-05, + "loss": 0.2452, "step": 5693 }, { - "epoch": 0.33, - "grad_norm": 0.4082152277198964, - "learning_rate": 1.5716524582146734e-05, - "loss": 0.304, + "epoch": 0.26, + "grad_norm": 0.559651903776116, + "learning_rate": 1.7317493342649346e-05, + "loss": 0.3134, "step": 5694 }, { - "epoch": 0.33, - "grad_norm": 0.31484704632365135, - "learning_rate": 1.5714997612076643e-05, - "loss": 0.232, + "epoch": 0.26, + "grad_norm": 0.8467735362485772, + "learning_rate": 1.731647913481798e-05, + "loss": 0.6168, "step": 5695 }, { - "epoch": 0.33, - "grad_norm": 0.39016338699647013, - "learning_rate": 1.571347044409643e-05, - "loss": 0.3046, + "epoch": 0.26, + "grad_norm": 0.43903044203404934, + "learning_rate": 1.731546476500682e-05, + "loss": 0.3127, "step": 5696 }, { - "epoch": 0.33, - "grad_norm": 0.3233481171244161, - "learning_rate": 1.571194307825898e-05, - "loss": 0.2492, + "epoch": 0.26, + "grad_norm": 0.35617455049077595, + "learning_rate": 1.7314450233238316e-05, + "loss": 0.2724, "step": 5697 }, { - "epoch": 0.33, - "grad_norm": 1.092219467523616, - "learning_rate": 1.5710415514617187e-05, - "loss": 0.7269, + "epoch": 0.26, + "grad_norm": 0.48892042488874515, + "learning_rate": 1.731343553953494e-05, + "loss": 0.3005, "step": 5698 }, { - "epoch": 0.33, - "grad_norm": 1.0398976979337753, - "learning_rate": 1.5708887753223953e-05, - "loss": 0.2765, + "epoch": 0.26, + "grad_norm": 0.3875388619343407, + "learning_rate": 1.7312420683919144e-05, + "loss": 0.2844, "step": 5699 }, { - "epoch": 0.33, - "grad_norm": 0.29904995978400906, - "learning_rate": 1.5707359794132178e-05, - "loss": 0.2535, + "epoch": 0.26, + "grad_norm": 0.3358371267370901, + "learning_rate": 1.7311405666413405e-05, + "loss": 0.2886, "step": 5700 }, { - "epoch": 0.33, - "grad_norm": 0.36950212436020996, - "learning_rate": 1.5705831637394783e-05, - "loss": 0.3008, + "epoch": 0.26, + "grad_norm": 0.5242297881453637, + "learning_rate": 1.7310390487040193e-05, + "loss": 0.4036, "step": 5701 }, { - "epoch": 0.33, - "grad_norm": 0.5786942167807272, - "learning_rate": 1.570430328306468e-05, - "loss": 0.3812, + "epoch": 0.26, + "grad_norm": 0.37759615456846335, + "learning_rate": 1.730937514582198e-05, + "loss": 0.3006, "step": 5702 }, { - "epoch": 0.33, - "grad_norm": 0.39500817970292046, - "learning_rate": 1.5702774731194802e-05, - "loss": 0.3126, + "epoch": 0.26, + "grad_norm": 0.8962198550028754, + "learning_rate": 1.730835964278124e-05, + "loss": 0.3179, "step": 5703 }, { - "epoch": 0.33, - "grad_norm": 0.3453869948145563, - "learning_rate": 1.570124598183808e-05, - "loss": 0.2725, + "epoch": 0.26, + "grad_norm": 0.44275502915243686, + "learning_rate": 1.7307343977940467e-05, + "loss": 0.3537, "step": 5704 }, { - "epoch": 0.33, - "grad_norm": 0.4214946705124231, - "learning_rate": 1.569971703504745e-05, - "loss": 0.2872, + "epoch": 0.26, + "grad_norm": 0.37448329798360214, + "learning_rate": 1.7306328151322142e-05, + "loss": 0.2974, "step": 5705 }, { - "epoch": 0.33, - "grad_norm": 0.3257990453502148, - "learning_rate": 1.5698187890875867e-05, - "loss": 0.2885, + "epoch": 0.26, + "grad_norm": 0.4951967233846368, + "learning_rate": 1.7305312162948754e-05, + "loss": 0.2319, "step": 5706 }, { - "epoch": 0.33, - "grad_norm": 0.6989314635364635, - "learning_rate": 1.5696658549376286e-05, - "loss": 0.5029, + "epoch": 0.26, + "grad_norm": 0.9714689662565439, + "learning_rate": 1.7304296012842794e-05, + "loss": 0.6148, "step": 5707 }, { - "epoch": 0.33, - "grad_norm": 0.34822630078350825, - "learning_rate": 1.569512901060166e-05, - "loss": 0.3442, + "epoch": 0.26, + "grad_norm": 0.3379417138763714, + "learning_rate": 1.730327970102676e-05, + "loss": 0.2345, "step": 5708 }, { - "epoch": 0.33, - "grad_norm": 0.34505897851320166, - "learning_rate": 1.569359927460496e-05, - "loss": 0.1921, + "epoch": 0.26, + "grad_norm": 0.4531665531555558, + "learning_rate": 1.7302263227523154e-05, + "loss": 0.3227, "step": 5709 }, { - "epoch": 0.33, - "grad_norm": 0.2695284101966276, - "learning_rate": 1.5692069341439164e-05, - "loss": 0.2228, + "epoch": 0.26, + "grad_norm": 0.6324633800801548, + "learning_rate": 1.7301246592354476e-05, + "loss": 0.351, "step": 5710 }, { - "epoch": 0.33, - "grad_norm": 0.5436723132485777, - "learning_rate": 1.5690539211157255e-05, - "loss": 0.3901, + "epoch": 0.26, + "grad_norm": 0.420000513436948, + "learning_rate": 1.7300229795543234e-05, + "loss": 0.2891, "step": 5711 }, { - "epoch": 0.33, - "grad_norm": 0.33269868149267295, - "learning_rate": 1.5689008883812212e-05, - "loss": 0.2106, + "epoch": 0.26, + "grad_norm": 0.4751467632596921, + "learning_rate": 1.7299212837111947e-05, + "loss": 0.2844, "step": 5712 }, { - "epoch": 0.33, - "grad_norm": 0.5032767085391571, - "learning_rate": 1.568747835945704e-05, - "loss": 0.3967, + "epoch": 0.26, + "grad_norm": 0.40294653526200697, + "learning_rate": 1.7298195717083117e-05, + "loss": 0.285, "step": 5713 }, { - "epoch": 0.33, - "grad_norm": 0.7674497920759249, - "learning_rate": 1.5685947638144736e-05, - "loss": 0.6305, + "epoch": 0.26, + "grad_norm": 0.3826517017215292, + "learning_rate": 1.729717843547927e-05, + "loss": 0.2872, "step": 5714 }, { - "epoch": 0.33, - "grad_norm": 0.33746113836252484, - "learning_rate": 1.5684416719928314e-05, - "loss": 0.2059, + "epoch": 0.26, + "grad_norm": 0.48401376817174496, + "learning_rate": 1.7296160992322922e-05, + "loss": 0.2942, "step": 5715 }, { - "epoch": 0.33, - "grad_norm": 0.27730558315725434, - "learning_rate": 1.568288560486078e-05, - "loss": 0.2342, + "epoch": 0.26, + "grad_norm": 0.4268489505252926, + "learning_rate": 1.729514338763661e-05, + "loss": 0.3299, "step": 5716 }, { - "epoch": 0.33, - "grad_norm": 0.8937613239506347, - "learning_rate": 1.5681354292995164e-05, - "loss": 0.5943, + "epoch": 0.26, + "grad_norm": 0.36317192336647813, + "learning_rate": 1.7294125621442848e-05, + "loss": 0.2833, "step": 5717 }, { - "epoch": 0.33, - "grad_norm": 0.32282729775940155, - "learning_rate": 1.5679822784384492e-05, - "loss": 0.213, + "epoch": 0.26, + "grad_norm": 0.9010094483756562, + "learning_rate": 1.7293107693764177e-05, + "loss": 0.5994, "step": 5718 }, { - "epoch": 0.33, - "grad_norm": 1.0217359415141611, - "learning_rate": 1.56782910790818e-05, - "loss": 0.4414, + "epoch": 0.26, + "grad_norm": 0.3021988986741908, + "learning_rate": 1.729208960462313e-05, + "loss": 0.157, "step": 5719 }, { - "epoch": 0.33, - "grad_norm": 0.37906399149967873, - "learning_rate": 1.5676759177140132e-05, - "loss": 0.3148, + "epoch": 0.26, + "grad_norm": 0.3065599133268593, + "learning_rate": 1.7291071354042247e-05, + "loss": 0.2259, "step": 5720 }, { - "epoch": 0.33, - "grad_norm": 0.32548021526366716, - "learning_rate": 1.567522707861254e-05, - "loss": 0.2696, + "epoch": 0.26, + "grad_norm": 0.5142229429835107, + "learning_rate": 1.729005294204407e-05, + "loss": 0.3521, "step": 5721 }, { - "epoch": 0.33, - "grad_norm": 0.27672622569811445, - "learning_rate": 1.567369478355208e-05, - "loss": 0.1349, + "epoch": 0.26, + "grad_norm": 0.6682596708382953, + "learning_rate": 1.728903436865115e-05, + "loss": 0.4585, "step": 5722 }, { - "epoch": 0.33, - "grad_norm": 0.34813866318867676, - "learning_rate": 1.567216229201181e-05, - "loss": 0.2762, + "epoch": 0.26, + "grad_norm": 0.37217664712457094, + "learning_rate": 1.7288015633886033e-05, + "loss": 0.2377, "step": 5723 }, { - "epoch": 0.33, - "grad_norm": 0.31379746680097137, - "learning_rate": 1.5670629604044804e-05, - "loss": 0.2616, + "epoch": 0.26, + "grad_norm": 0.9842439217105892, + "learning_rate": 1.7286996737771275e-05, + "loss": 0.6513, "step": 5724 }, { - "epoch": 0.33, - "grad_norm": 0.8743307806116454, - "learning_rate": 1.566909671970414e-05, - "loss": 0.3819, + "epoch": 0.26, + "grad_norm": 0.4378615319020107, + "learning_rate": 1.7285977680329434e-05, + "loss": 0.3646, "step": 5725 }, { - "epoch": 0.33, - "grad_norm": 0.9740313278256991, - "learning_rate": 1.5667563639042904e-05, - "loss": 0.6429, + "epoch": 0.26, + "grad_norm": 0.36344080572564, + "learning_rate": 1.728495846158307e-05, + "loss": 0.211, "step": 5726 }, { - "epoch": 0.33, - "grad_norm": 0.3849691561398392, - "learning_rate": 1.5666030362114175e-05, - "loss": 0.2787, + "epoch": 0.26, + "grad_norm": 0.44132970603907606, + "learning_rate": 1.728393908155474e-05, + "loss": 0.2769, "step": 5727 }, { - "epoch": 0.33, - "grad_norm": 0.29480543187124647, - "learning_rate": 1.566449688897106e-05, - "loss": 0.2065, + "epoch": 0.26, + "grad_norm": 0.41273891687674874, + "learning_rate": 1.7282919540267025e-05, + "loss": 0.3615, "step": 5728 }, { - "epoch": 0.33, - "grad_norm": 0.5564381113672089, - "learning_rate": 1.5662963219666666e-05, - "loss": 0.3209, + "epoch": 0.26, + "grad_norm": 0.36402768497411836, + "learning_rate": 1.728189983774249e-05, + "loss": 0.2034, "step": 5729 }, { - "epoch": 0.33, - "grad_norm": 0.6185522440055002, - "learning_rate": 1.5661429354254096e-05, - "loss": 0.3824, + "epoch": 0.26, + "grad_norm": 1.2272492539924984, + "learning_rate": 1.728087997400371e-05, + "loss": 0.7772, "step": 5730 }, { - "epoch": 0.33, - "grad_norm": 0.40049158281522773, - "learning_rate": 1.5659895292786474e-05, - "loss": 0.3183, + "epoch": 0.26, + "grad_norm": 0.47546045652196284, + "learning_rate": 1.7279859949073263e-05, + "loss": 0.3434, "step": 5731 }, { - "epoch": 0.33, - "grad_norm": 0.4038008265047527, - "learning_rate": 1.565836103531692e-05, - "loss": 0.3016, + "epoch": 0.26, + "grad_norm": 0.3673803950858512, + "learning_rate": 1.7278839762973734e-05, + "loss": 0.2036, "step": 5732 }, { - "epoch": 0.33, - "grad_norm": 0.3987251335173393, - "learning_rate": 1.5656826581898563e-05, - "loss": 0.2744, + "epoch": 0.26, + "grad_norm": 0.3403737123198815, + "learning_rate": 1.7277819415727712e-05, + "loss": 0.3178, "step": 5733 }, { - "epoch": 0.33, - "grad_norm": 0.34573611183952435, - "learning_rate": 1.565529193258455e-05, - "loss": 0.2572, + "epoch": 0.26, + "grad_norm": 0.8029106563032435, + "learning_rate": 1.7276798907357778e-05, + "loss": 0.4761, "step": 5734 }, { - "epoch": 0.33, - "grad_norm": 0.392359831338795, - "learning_rate": 1.5653757087428015e-05, - "loss": 0.2155, + "epoch": 0.26, + "grad_norm": 0.44449006366362753, + "learning_rate": 1.7275778237886535e-05, + "loss": 0.2912, "step": 5735 }, { - "epoch": 0.33, - "grad_norm": 0.29979714182243516, - "learning_rate": 1.5652222046482118e-05, - "loss": 0.288, + "epoch": 0.26, + "grad_norm": 0.370434068186547, + "learning_rate": 1.7274757407336567e-05, + "loss": 0.2667, "step": 5736 }, { - "epoch": 0.33, - "grad_norm": 0.5553731791527236, - "learning_rate": 1.5650686809800016e-05, - "loss": 0.388, + "epoch": 0.26, + "grad_norm": 1.5097823563245483, + "learning_rate": 1.7273736415730488e-05, + "loss": 0.7884, "step": 5737 }, { - "epoch": 0.33, - "grad_norm": 0.935348590466938, - "learning_rate": 1.564915137743487e-05, - "loss": 0.5044, + "epoch": 0.26, + "grad_norm": 0.4298364188280013, + "learning_rate": 1.7272715263090892e-05, + "loss": 0.3288, "step": 5738 }, { - "epoch": 0.33, - "grad_norm": 0.42901141158263, - "learning_rate": 1.5647615749439858e-05, - "loss": 0.302, + "epoch": 0.26, + "grad_norm": 0.5443692638418025, + "learning_rate": 1.7271693949440393e-05, + "loss": 0.3409, "step": 5739 }, { - "epoch": 0.33, - "grad_norm": 0.3289608155747018, - "learning_rate": 1.5646079925868152e-05, - "loss": 0.2922, + "epoch": 0.26, + "grad_norm": 0.2504824799741428, + "learning_rate": 1.7270672474801595e-05, + "loss": 0.2353, "step": 5740 }, { - "epoch": 0.33, - "grad_norm": 0.5271177700015318, - "learning_rate": 1.564454390677294e-05, - "loss": 0.2541, + "epoch": 0.26, + "grad_norm": 0.36093064266195934, + "learning_rate": 1.7269650839197115e-05, + "loss": 0.2828, "step": 5741 }, { - "epoch": 0.33, - "grad_norm": 0.3476763157178258, - "learning_rate": 1.5643007692207422e-05, - "loss": 0.3031, + "epoch": 0.26, + "grad_norm": 0.8202041556267456, + "learning_rate": 1.7268629042649575e-05, + "loss": 0.388, "step": 5742 }, { - "epoch": 0.33, - "grad_norm": 0.4244417596608532, - "learning_rate": 1.5641471282224788e-05, - "loss": 0.2396, + "epoch": 0.26, + "grad_norm": 0.9438074064482519, + "learning_rate": 1.7267607085181594e-05, + "loss": 0.6054, "step": 5743 }, { - "epoch": 0.33, - "grad_norm": 0.4413347170413946, - "learning_rate": 1.563993467687824e-05, - "loss": 0.3494, + "epoch": 0.26, + "grad_norm": 0.39074207454549437, + "learning_rate": 1.7266584966815794e-05, + "loss": 0.2933, "step": 5744 }, { - "epoch": 0.33, - "grad_norm": 0.38877102993582696, - "learning_rate": 1.5638397876221002e-05, - "loss": 0.2112, + "epoch": 0.26, + "grad_norm": 0.3531036342342758, + "learning_rate": 1.726556268757481e-05, + "loss": 0.2938, "step": 5745 }, { - "epoch": 0.33, - "grad_norm": 0.4975149211929953, - "learning_rate": 1.563686088030629e-05, - "loss": 0.4052, + "epoch": 0.26, + "grad_norm": 0.3201236283934675, + "learning_rate": 1.726454024748127e-05, + "loss": 0.2069, "step": 5746 }, { - "epoch": 0.33, - "grad_norm": 0.5640690106411652, - "learning_rate": 1.5635323689187323e-05, - "loss": 0.3971, + "epoch": 0.26, + "grad_norm": 0.6122363142521149, + "learning_rate": 1.7263517646557804e-05, + "loss": 0.3459, "step": 5747 }, { - "epoch": 0.33, - "grad_norm": 0.2221851855752157, - "learning_rate": 1.5633786302917343e-05, - "loss": 0.1517, + "epoch": 0.26, + "grad_norm": 0.3921042081591479, + "learning_rate": 1.7262494884827062e-05, + "loss": 0.3123, "step": 5748 }, { - "epoch": 0.33, - "grad_norm": 0.2994501028669219, - "learning_rate": 1.5632248721549584e-05, - "loss": 0.2548, + "epoch": 0.26, + "grad_norm": 0.48072946182860743, + "learning_rate": 1.7261471962311683e-05, + "loss": 0.3299, "step": 5749 }, { - "epoch": 0.33, - "grad_norm": 0.8917217167943163, - "learning_rate": 1.5630710945137293e-05, - "loss": 0.6478, + "epoch": 0.26, + "grad_norm": 0.4454915453155642, + "learning_rate": 1.7260448879034316e-05, + "loss": 0.2783, "step": 5750 }, { - "epoch": 0.33, - "grad_norm": 0.31361359549888296, - "learning_rate": 1.5629172973733724e-05, - "loss": 0.1688, + "epoch": 0.26, + "grad_norm": 0.5489973478649294, + "learning_rate": 1.7259425635017607e-05, + "loss": 0.3521, "step": 5751 }, { - "epoch": 0.33, - "grad_norm": 0.35912544773740485, - "learning_rate": 1.562763480739214e-05, - "loss": 0.2893, + "epoch": 0.26, + "grad_norm": 0.3241000928532584, + "learning_rate": 1.725840223028421e-05, + "loss": 0.2122, "step": 5752 }, { - "epoch": 0.33, - "grad_norm": 0.711575311545721, - "learning_rate": 1.56260964461658e-05, - "loss": 0.4265, + "epoch": 0.26, + "grad_norm": 0.33740597443333964, + "learning_rate": 1.7257378664856786e-05, + "loss": 0.2196, "step": 5753 }, { - "epoch": 0.33, - "grad_norm": 0.2564745105098827, - "learning_rate": 1.5624557890107983e-05, - "loss": 0.1733, + "epoch": 0.26, + "grad_norm": 0.6357272557447061, + "learning_rate": 1.725635493875799e-05, + "loss": 0.4354, "step": 5754 }, { - "epoch": 0.33, - "grad_norm": 0.3070550130073271, - "learning_rate": 1.5623019139271967e-05, - "loss": 0.2762, + "epoch": 0.26, + "grad_norm": 0.6842153455847927, + "learning_rate": 1.7255331052010495e-05, + "loss": 0.3914, "step": 5755 }, { - "epoch": 0.33, - "grad_norm": 0.5205475343496394, - "learning_rate": 1.5621480193711046e-05, - "loss": 0.427, + "epoch": 0.26, + "grad_norm": 0.35668705745627666, + "learning_rate": 1.7254307004636957e-05, + "loss": 0.2987, "step": 5756 }, { - "epoch": 0.33, - "grad_norm": 0.6040346459958775, - "learning_rate": 1.56199410534785e-05, - "loss": 0.3935, + "epoch": 0.26, + "grad_norm": 0.39085556828179274, + "learning_rate": 1.7253282796660054e-05, + "loss": 0.347, "step": 5757 }, { - "epoch": 0.33, - "grad_norm": 0.5393463384567777, - "learning_rate": 1.5618401718627644e-05, - "loss": 0.2879, + "epoch": 0.26, + "grad_norm": 0.2997024556732525, + "learning_rate": 1.7252258428102465e-05, + "loss": 0.0992, "step": 5758 }, { - "epoch": 0.33, - "grad_norm": 0.4889068289885966, - "learning_rate": 1.5616862189211774e-05, - "loss": 0.3763, + "epoch": 0.26, + "grad_norm": 0.49260611058103027, + "learning_rate": 1.725123389898686e-05, + "loss": 0.3484, "step": 5759 }, { - "epoch": 0.33, - "grad_norm": 0.27513668919227896, - "learning_rate": 1.561532246528421e-05, - "loss": 0.2058, + "epoch": 0.26, + "grad_norm": 0.6227143805467956, + "learning_rate": 1.725020920933593e-05, + "loss": 0.3773, "step": 5760 }, { - "epoch": 0.33, - "grad_norm": 0.2966718812667392, - "learning_rate": 1.5613782546898268e-05, - "loss": 0.2096, + "epoch": 0.26, + "grad_norm": 0.4459339345217128, + "learning_rate": 1.724918435917235e-05, + "loss": 0.3502, "step": 5761 }, { - "epoch": 0.33, - "grad_norm": 0.9302097875569516, - "learning_rate": 1.561224243410728e-05, - "loss": 0.6853, + "epoch": 0.26, + "grad_norm": 0.38610546633587495, + "learning_rate": 1.7248159348518818e-05, + "loss": 0.2548, "step": 5762 }, { - "epoch": 0.33, - "grad_norm": 0.3034217660889585, - "learning_rate": 1.561070212696458e-05, - "loss": 0.2851, + "epoch": 0.26, + "grad_norm": 1.114210841299817, + "learning_rate": 1.7247134177398023e-05, + "loss": 0.6353, "step": 5763 }, { - "epoch": 0.33, - "grad_norm": 0.3943008222386303, - "learning_rate": 1.56091616255235e-05, - "loss": 0.2914, + "epoch": 0.26, + "grad_norm": 0.3117591093552998, + "learning_rate": 1.7246108845832667e-05, + "loss": 0.2635, "step": 5764 }, { - "epoch": 0.33, - "grad_norm": 0.7197890030209596, - "learning_rate": 1.5607620929837398e-05, - "loss": 0.4985, + "epoch": 0.26, + "grad_norm": 0.24797431233334552, + "learning_rate": 1.724508335384544e-05, + "loss": 0.1028, "step": 5765 }, { - "epoch": 0.33, - "grad_norm": 0.3846549791318257, - "learning_rate": 1.5606080039959624e-05, - "loss": 0.2175, + "epoch": 0.26, + "grad_norm": 0.529518773243945, + "learning_rate": 1.724405770145905e-05, + "loss": 0.3922, "step": 5766 }, { - "epoch": 0.33, - "grad_norm": 0.2644764393345778, - "learning_rate": 1.5604538955943542e-05, - "loss": 0.2057, + "epoch": 0.26, + "grad_norm": 0.7757320019952428, + "learning_rate": 1.724303188869621e-05, + "loss": 0.4445, "step": 5767 }, { - "epoch": 0.33, - "grad_norm": 0.5406969227154103, - "learning_rate": 1.5602997677842515e-05, - "loss": 0.4251, + "epoch": 0.26, + "grad_norm": 0.3491916514592435, + "learning_rate": 1.724200591557962e-05, + "loss": 0.2368, "step": 5768 }, { - "epoch": 0.33, - "grad_norm": 0.6047624630317593, - "learning_rate": 1.560145620570992e-05, - "loss": 0.4523, + "epoch": 0.27, + "grad_norm": 0.48172193968435517, + "learning_rate": 1.7240979782131998e-05, + "loss": 0.3551, "step": 5769 }, { - "epoch": 0.33, - "grad_norm": 0.355995131291561, - "learning_rate": 1.5599914539599135e-05, - "loss": 0.314, + "epoch": 0.27, + "grad_norm": 0.5095995204420244, + "learning_rate": 1.7239953488376066e-05, + "loss": 0.2531, "step": 5770 }, { - "epoch": 0.33, - "grad_norm": 0.4253729585784995, - "learning_rate": 1.559837267956355e-05, - "loss": 0.2929, + "epoch": 0.27, + "grad_norm": 0.28557155392344324, + "learning_rate": 1.723892703433454e-05, + "loss": 0.1312, "step": 5771 }, { - "epoch": 0.33, - "grad_norm": 0.42743246173578286, - "learning_rate": 1.559683062565656e-05, - "loss": 0.3047, + "epoch": 0.27, + "grad_norm": 0.3801121457480909, + "learning_rate": 1.7237900420030147e-05, + "loss": 0.3278, "step": 5772 }, { - "epoch": 0.33, - "grad_norm": 0.2515830454917624, - "learning_rate": 1.559528837793157e-05, - "loss": 0.2118, + "epoch": 0.27, + "grad_norm": 0.868181009286335, + "learning_rate": 1.7236873645485615e-05, + "loss": 0.4714, "step": 5773 }, { - "epoch": 0.33, - "grad_norm": 0.7515736027063699, - "learning_rate": 1.559374593644198e-05, - "loss": 0.5555, + "epoch": 0.27, + "grad_norm": 0.5822177097303524, + "learning_rate": 1.7235846710723673e-05, + "loss": 0.3931, "step": 5774 }, { - "epoch": 0.33, - "grad_norm": 0.4013581312855905, - "learning_rate": 1.559220330124121e-05, - "loss": 0.3164, + "epoch": 0.27, + "grad_norm": 0.42598965445268405, + "learning_rate": 1.7234819615767062e-05, + "loss": 0.2516, "step": 5775 }, { - "epoch": 0.33, - "grad_norm": 0.32182922729328917, - "learning_rate": 1.5590660472382682e-05, - "loss": 0.3081, + "epoch": 0.27, + "grad_norm": 0.3970620460769952, + "learning_rate": 1.7233792360638517e-05, + "loss": 0.3418, "step": 5776 }, { - "epoch": 0.33, - "grad_norm": 0.853128767784892, - "learning_rate": 1.558911744991982e-05, - "loss": 0.2577, + "epoch": 0.27, + "grad_norm": 0.2489477097711643, + "learning_rate": 1.723276494536078e-05, + "loss": 0.175, "step": 5777 }, { - "epoch": 0.33, - "grad_norm": 0.3250498927602875, - "learning_rate": 1.5587574233906063e-05, - "loss": 0.2265, + "epoch": 0.27, + "grad_norm": 0.4650641750966152, + "learning_rate": 1.72317373699566e-05, + "loss": 0.2898, "step": 5778 }, { - "epoch": 0.33, - "grad_norm": 0.30334383260105524, - "learning_rate": 1.5586030824394848e-05, - "loss": 0.2567, + "epoch": 0.27, + "grad_norm": 0.9355180217620334, + "learning_rate": 1.7230709634448723e-05, + "loss": 0.5046, "step": 5779 }, { - "epoch": 0.33, - "grad_norm": 0.36184984465163766, - "learning_rate": 1.5584487221439628e-05, - "loss": 0.3016, + "epoch": 0.27, + "grad_norm": 0.34469339706021446, + "learning_rate": 1.7229681738859904e-05, + "loss": 0.3106, "step": 5780 }, { - "epoch": 0.33, - "grad_norm": 0.46282632685077807, - "learning_rate": 1.5582943425093856e-05, - "loss": 0.3936, + "epoch": 0.27, + "grad_norm": 0.42488854443534935, + "learning_rate": 1.72286536832129e-05, + "loss": 0.2773, "step": 5781 }, { - "epoch": 0.33, - "grad_norm": 0.40893305897006493, - "learning_rate": 1.558139943541099e-05, - "loss": 0.3362, + "epoch": 0.27, + "grad_norm": 0.7875686750730964, + "learning_rate": 1.722762546753047e-05, + "loss": 0.2938, "step": 5782 }, { - "epoch": 0.33, - "grad_norm": 0.3743712544882061, - "learning_rate": 1.5579855252444506e-05, - "loss": 0.3249, + "epoch": 0.27, + "grad_norm": 0.3879048918182757, + "learning_rate": 1.7226597091835377e-05, + "loss": 0.2362, "step": 5783 }, { - "epoch": 0.33, - "grad_norm": 0.46996244742050114, - "learning_rate": 1.557831087624787e-05, - "loss": 0.1574, + "epoch": 0.27, + "grad_norm": 0.2989793748894178, + "learning_rate": 1.722556855615039e-05, + "loss": 0.2532, "step": 5784 }, { - "epoch": 0.33, - "grad_norm": 0.2683925413029673, - "learning_rate": 1.5576766306874572e-05, - "loss": 0.2282, + "epoch": 0.27, + "grad_norm": 0.9189858035343479, + "learning_rate": 1.7224539860498282e-05, + "loss": 0.5291, "step": 5785 }, { - "epoch": 0.33, - "grad_norm": 0.6926340821108188, - "learning_rate": 1.5575221544378094e-05, - "loss": 0.4933, + "epoch": 0.27, + "grad_norm": 0.8813741482748013, + "learning_rate": 1.7223511004901822e-05, + "loss": 0.5108, "step": 5786 }, { - "epoch": 0.33, - "grad_norm": 0.31226964320871636, - "learning_rate": 1.5573676588811935e-05, - "loss": 0.2602, + "epoch": 0.27, + "grad_norm": 0.41102166488470704, + "learning_rate": 1.722248198938379e-05, + "loss": 0.3309, "step": 5787 }, { - "epoch": 0.33, - "grad_norm": 0.39932555162861216, - "learning_rate": 1.5572131440229593e-05, - "loss": 0.3255, + "epoch": 0.27, + "grad_norm": 0.32237967802251427, + "learning_rate": 1.722145281396697e-05, + "loss": 0.2662, "step": 5788 }, { - "epoch": 0.33, - "grad_norm": 0.3892684727048228, - "learning_rate": 1.557058609868458e-05, - "loss": 0.2558, + "epoch": 0.27, + "grad_norm": 0.42583782811837856, + "learning_rate": 1.7220423478674143e-05, + "loss": 0.2792, "step": 5789 }, { - "epoch": 0.33, - "grad_norm": 0.2385948593781187, - "learning_rate": 1.5569040564230414e-05, - "loss": 0.1092, + "epoch": 0.27, + "grad_norm": 0.4132686269014517, + "learning_rate": 1.72193939835281e-05, + "loss": 0.3214, "step": 5790 }, { - "epoch": 0.33, - "grad_norm": 0.27404269814150173, - "learning_rate": 1.556749483692061e-05, - "loss": 0.2733, + "epoch": 0.27, + "grad_norm": 0.9032654133647734, + "learning_rate": 1.7218364328551635e-05, + "loss": 0.4587, "step": 5791 }, { - "epoch": 0.33, - "grad_norm": 0.8492486232920454, - "learning_rate": 1.5565948916808697e-05, - "loss": 0.5681, + "epoch": 0.27, + "grad_norm": 0.38153176557901086, + "learning_rate": 1.7217334513767538e-05, + "loss": 0.2955, "step": 5792 }, { - "epoch": 0.33, - "grad_norm": 0.6782578354702179, - "learning_rate": 1.5564402803948215e-05, - "loss": 0.2971, + "epoch": 0.27, + "grad_norm": 0.37079660264385733, + "learning_rate": 1.721630453919861e-05, + "loss": 0.2496, "step": 5793 }, { - "epoch": 0.33, - "grad_norm": 0.3093663841738642, - "learning_rate": 1.55628564983927e-05, - "loss": 0.2356, + "epoch": 0.27, + "grad_norm": 1.0746624865275438, + "learning_rate": 1.7215274404867654e-05, + "loss": 0.3372, "step": 5794 }, { - "epoch": 0.33, - "grad_norm": 0.3650624325489127, - "learning_rate": 1.556131000019571e-05, - "loss": 0.3337, + "epoch": 0.27, + "grad_norm": 0.5883931719571168, + "learning_rate": 1.721424411079748e-05, + "loss": 0.3618, "step": 5795 }, { - "epoch": 0.33, - "grad_norm": 0.760194714593758, - "learning_rate": 1.5559763309410787e-05, - "loss": 0.4862, + "epoch": 0.27, + "grad_norm": 0.38883734381367896, + "learning_rate": 1.7213213657010896e-05, + "loss": 0.3388, "step": 5796 }, { - "epoch": 0.33, - "grad_norm": 0.3294635430927283, - "learning_rate": 1.5558216426091505e-05, - "loss": 0.2005, + "epoch": 0.27, + "grad_norm": 0.35261168000140575, + "learning_rate": 1.721218304353071e-05, + "loss": 0.2495, "step": 5797 }, { - "epoch": 0.33, - "grad_norm": 1.077418090885813, - "learning_rate": 1.5556669350291422e-05, - "loss": 0.4722, + "epoch": 0.27, + "grad_norm": 0.3782987484267413, + "learning_rate": 1.7211152270379745e-05, + "loss": 0.2499, "step": 5798 }, { - "epoch": 0.33, - "grad_norm": 0.3504419610826631, - "learning_rate": 1.5555122082064123e-05, - "loss": 0.323, + "epoch": 0.27, + "grad_norm": 0.4598594174131196, + "learning_rate": 1.7210121337580818e-05, + "loss": 0.3261, "step": 5799 }, { - "epoch": 0.33, - "grad_norm": 0.3475869790586581, - "learning_rate": 1.5553574621463183e-05, - "loss": 0.2275, + "epoch": 0.27, + "grad_norm": 0.3873382160467594, + "learning_rate": 1.7209090245156753e-05, + "loss": 0.3042, "step": 5800 }, { - "epoch": 0.33, - "grad_norm": 0.3020170894774215, - "learning_rate": 1.5552026968542192e-05, - "loss": 0.224, + "epoch": 0.27, + "grad_norm": 0.7672132406200473, + "learning_rate": 1.720805899313038e-05, + "loss": 0.3922, "step": 5801 }, { - "epoch": 0.33, - "grad_norm": 1.697556497115101, - "learning_rate": 1.555047912335475e-05, - "loss": 0.6149, + "epoch": 0.27, + "grad_norm": 0.3591073766703548, + "learning_rate": 1.7207027581524524e-05, + "loss": 0.2865, "step": 5802 }, { - "epoch": 0.33, - "grad_norm": 0.29416917374336826, - "learning_rate": 1.5548931085954448e-05, - "loss": 0.2219, + "epoch": 0.27, + "grad_norm": 0.5236412895694764, + "learning_rate": 1.720599601036203e-05, + "loss": 0.435, "step": 5803 }, { - "epoch": 0.33, - "grad_norm": 0.6232842819271314, - "learning_rate": 1.5547382856394905e-05, - "loss": 0.3939, + "epoch": 0.27, + "grad_norm": 0.26953839278131897, + "learning_rate": 1.7204964279665722e-05, + "loss": 0.1768, "step": 5804 }, { - "epoch": 0.33, - "grad_norm": 1.1892346356469627, - "learning_rate": 1.5545834434729732e-05, - "loss": 0.8569, + "epoch": 0.27, + "grad_norm": 0.30742273712711765, + "learning_rate": 1.7203932389458455e-05, + "loss": 0.2436, "step": 5805 }, { - "epoch": 0.33, - "grad_norm": 0.3967731998369141, - "learning_rate": 1.554428582101255e-05, - "loss": 0.2805, + "epoch": 0.27, + "grad_norm": 0.9557747287363251, + "learning_rate": 1.7202900339763066e-05, + "loss": 0.597, "step": 5806 }, { - "epoch": 0.33, - "grad_norm": 0.34087023673325834, - "learning_rate": 1.554273701529699e-05, - "loss": 0.1824, + "epoch": 0.27, + "grad_norm": 0.5364717619313458, + "learning_rate": 1.7201868130602405e-05, + "loss": 0.3133, "step": 5807 }, { - "epoch": 0.33, - "grad_norm": 1.374281261563989, - "learning_rate": 1.5541188017636683e-05, - "loss": 0.678, + "epoch": 0.27, + "grad_norm": 0.3620703099182744, + "learning_rate": 1.7200835761999325e-05, + "loss": 0.2779, "step": 5808 }, { - "epoch": 0.33, - "grad_norm": 0.3489684607123253, - "learning_rate": 1.5539638828085278e-05, - "loss": 0.2842, + "epoch": 0.27, + "grad_norm": 1.2175619693003903, + "learning_rate": 1.7199803233976683e-05, + "loss": 0.7602, "step": 5809 }, { - "epoch": 0.33, - "grad_norm": 0.6789014020469017, - "learning_rate": 1.5538089446696414e-05, - "loss": 0.3743, + "epoch": 0.27, + "grad_norm": 0.31839467340084004, + "learning_rate": 1.7198770546557337e-05, + "loss": 0.2199, "step": 5810 }, { - "epoch": 0.33, - "grad_norm": 0.4519718080022967, - "learning_rate": 1.553653987352375e-05, - "loss": 0.3394, + "epoch": 0.27, + "grad_norm": 0.35701777888281405, + "learning_rate": 1.7197737699764148e-05, + "loss": 0.2316, "step": 5811 }, { - "epoch": 0.33, - "grad_norm": 0.4552790837774947, - "learning_rate": 1.553499010862095e-05, - "loss": 0.299, + "epoch": 0.27, + "grad_norm": 0.425016871666638, + "learning_rate": 1.7196704693619985e-05, + "loss": 0.329, "step": 5812 }, { - "epoch": 0.33, - "grad_norm": 0.4965276597889446, - "learning_rate": 1.553344015204168e-05, - "loss": 0.1166, + "epoch": 0.27, + "grad_norm": 0.6701916109531195, + "learning_rate": 1.7195671528147712e-05, + "loss": 0.4309, "step": 5813 }, { - "epoch": 0.33, - "grad_norm": 0.5101673833679304, - "learning_rate": 1.553189000383962e-05, - "loss": 0.394, + "epoch": 0.27, + "grad_norm": 0.3948717405906899, + "learning_rate": 1.7194638203370212e-05, + "loss": 0.2223, "step": 5814 }, { - "epoch": 0.33, - "grad_norm": 0.42668233072512435, - "learning_rate": 1.553033966406844e-05, - "loss": 0.2904, + "epoch": 0.27, + "grad_norm": 1.2256942530519768, + "learning_rate": 1.7193604719310352e-05, + "loss": 0.7725, "step": 5815 }, { - "epoch": 0.33, - "grad_norm": 0.3709815294186113, - "learning_rate": 1.552878913278184e-05, - "loss": 0.3063, + "epoch": 0.27, + "grad_norm": 0.3028268025501044, + "learning_rate": 1.719257107599102e-05, + "loss": 0.2441, "step": 5816 }, { - "epoch": 0.33, - "grad_norm": 0.7836478889317833, - "learning_rate": 1.5527238410033508e-05, - "loss": 0.4855, + "epoch": 0.27, + "grad_norm": 0.29925141493278345, + "learning_rate": 1.7191537273435098e-05, + "loss": 0.1791, "step": 5817 }, { - "epoch": 0.33, - "grad_norm": 0.43253924637788094, - "learning_rate": 1.552568749587715e-05, - "loss": 0.2718, + "epoch": 0.27, + "grad_norm": 0.64299092472545, + "learning_rate": 1.719050331166547e-05, + "loss": 0.4657, "step": 5818 }, { - "epoch": 0.33, - "grad_norm": 0.287688911198083, - "learning_rate": 1.5524136390366468e-05, - "loss": 0.2535, + "epoch": 0.27, + "grad_norm": 0.3766146370633808, + "learning_rate": 1.7189469190705027e-05, + "loss": 0.3451, "step": 5819 }, { - "epoch": 0.33, - "grad_norm": 0.723004816360338, - "learning_rate": 1.5522585093555184e-05, - "loss": 0.3463, + "epoch": 0.27, + "grad_norm": 0.3829537997146153, + "learning_rate": 1.7188434910576667e-05, + "loss": 0.2215, "step": 5820 }, { - "epoch": 0.33, - "grad_norm": 0.3808008491185971, - "learning_rate": 1.5521033605497013e-05, - "loss": 0.2936, + "epoch": 0.27, + "grad_norm": 1.3214843597360413, + "learning_rate": 1.7187400471303285e-05, + "loss": 0.8656, "step": 5821 }, { - "epoch": 0.33, - "grad_norm": 0.8200187852102342, - "learning_rate": 1.5519481926245687e-05, - "loss": 0.5662, + "epoch": 0.27, + "grad_norm": 0.4949355759743745, + "learning_rate": 1.7186365872907787e-05, + "loss": 0.2233, "step": 5822 }, { - "epoch": 0.33, - "grad_norm": 0.36108551149142204, - "learning_rate": 1.551793005585494e-05, - "loss": 0.2783, + "epoch": 0.27, + "grad_norm": 0.271422659124242, + "learning_rate": 1.718533111541307e-05, + "loss": 0.2192, "step": 5823 }, { - "epoch": 0.33, - "grad_norm": 0.3836885248758884, - "learning_rate": 1.5516377994378513e-05, - "loss": 0.288, + "epoch": 0.27, + "grad_norm": 0.4502434979928553, + "learning_rate": 1.7184296198842055e-05, + "loss": 0.309, "step": 5824 }, { - "epoch": 0.33, - "grad_norm": 0.32215157055186644, - "learning_rate": 1.551482574187015e-05, - "loss": 0.234, + "epoch": 0.27, + "grad_norm": 0.861666700479389, + "learning_rate": 1.7183261123217644e-05, + "loss": 0.4755, "step": 5825 }, { - "epoch": 0.33, - "grad_norm": 0.4798535959282858, - "learning_rate": 1.5513273298383607e-05, - "loss": 0.3197, + "epoch": 0.27, + "grad_norm": 0.37327434428736245, + "learning_rate": 1.718222588856275e-05, + "loss": 0.2514, "step": 5826 }, { - "epoch": 0.33, - "grad_norm": 0.32604749167703956, - "learning_rate": 1.551172066397265e-05, - "loss": 0.2737, + "epoch": 0.27, + "grad_norm": 1.2286720664493243, + "learning_rate": 1.71811904949003e-05, + "loss": 0.5646, "step": 5827 }, { - "epoch": 0.33, - "grad_norm": 0.9180829156266932, - "learning_rate": 1.5510167838691047e-05, - "loss": 0.5507, + "epoch": 0.27, + "grad_norm": 0.33393699519502734, + "learning_rate": 1.7180154942253216e-05, + "loss": 0.2762, "step": 5828 }, { - "epoch": 0.33, - "grad_norm": 1.2475558469396781, - "learning_rate": 1.550861482259256e-05, - "loss": 0.5742, + "epoch": 0.27, + "grad_norm": 0.4325059664873086, + "learning_rate": 1.717911923064442e-05, + "loss": 0.2711, "step": 5829 }, { - "epoch": 0.33, - "grad_norm": 0.37812167267697283, - "learning_rate": 1.5507061615730986e-05, - "loss": 0.2834, + "epoch": 0.27, + "grad_norm": 0.8094255389578909, + "learning_rate": 1.7178083360096844e-05, + "loss": 0.261, "step": 5830 }, { - "epoch": 0.34, - "grad_norm": 0.31942624860729857, - "learning_rate": 1.5505508218160103e-05, - "loss": 0.2459, + "epoch": 0.27, + "grad_norm": 0.4332613296392825, + "learning_rate": 1.717704733063342e-05, + "loss": 0.3482, "step": 5831 }, { - "epoch": 0.34, - "grad_norm": 0.5164865863370692, - "learning_rate": 1.5503954629933707e-05, - "loss": 0.3329, + "epoch": 0.27, + "grad_norm": 0.5066992236696453, + "learning_rate": 1.7176011142277086e-05, + "loss": 0.3211, "step": 5832 }, { - "epoch": 0.34, - "grad_norm": 0.4604448154652938, - "learning_rate": 1.5502400851105603e-05, - "loss": 0.1969, + "epoch": 0.27, + "grad_norm": 0.7439765878280497, + "learning_rate": 1.7174974795050782e-05, + "loss": 0.346, "step": 5833 }, { - "epoch": 0.34, - "grad_norm": 0.57149449313321, - "learning_rate": 1.5500846881729587e-05, - "loss": 0.4065, + "epoch": 0.27, + "grad_norm": 0.6234978757292291, + "learning_rate": 1.7173938288977452e-05, + "loss": 0.4406, "step": 5834 }, { - "epoch": 0.34, - "grad_norm": 0.4424655995824884, - "learning_rate": 1.5499292721859483e-05, - "loss": 0.3512, + "epoch": 0.27, + "grad_norm": 0.45017634078879964, + "learning_rate": 1.717290162408004e-05, + "loss": 0.2895, "step": 5835 }, { - "epoch": 0.34, - "grad_norm": 0.3409734791754797, - "learning_rate": 1.5497738371549108e-05, - "loss": 0.2024, + "epoch": 0.27, + "grad_norm": 0.5012433321945928, + "learning_rate": 1.7171864800381502e-05, + "loss": 0.3469, "step": 5836 }, { - "epoch": 0.34, - "grad_norm": 0.44336289462689166, - "learning_rate": 1.549618383085229e-05, - "loss": 0.3014, + "epoch": 0.27, + "grad_norm": 0.21279027224363434, + "learning_rate": 1.7170827817904787e-05, + "loss": 0.0712, "step": 5837 }, { - "epoch": 0.34, - "grad_norm": 0.3706424088281798, - "learning_rate": 1.549462909982286e-05, - "loss": 0.2969, + "epoch": 0.27, + "grad_norm": 0.4013895858292727, + "learning_rate": 1.716979067667286e-05, + "loss": 0.2712, "step": 5838 }, { - "epoch": 0.34, - "grad_norm": 0.3273551180286123, - "learning_rate": 1.5493074178514665e-05, - "loss": 0.2201, + "epoch": 0.27, + "grad_norm": 0.5904687876023449, + "learning_rate": 1.7168753376708673e-05, + "loss": 0.428, "step": 5839 }, { - "epoch": 0.34, - "grad_norm": 1.4602400150554773, - "learning_rate": 1.5491519066981547e-05, - "loss": 0.7239, + "epoch": 0.27, + "grad_norm": 0.48353108877238155, + "learning_rate": 1.71677159180352e-05, + "loss": 0.2929, "step": 5840 }, { - "epoch": 0.34, - "grad_norm": 1.2934303316229814, - "learning_rate": 1.5489963765277356e-05, - "loss": 0.8344, + "epoch": 0.27, + "grad_norm": 0.3557112288052493, + "learning_rate": 1.7166678300675398e-05, + "loss": 0.2895, "step": 5841 }, { - "epoch": 0.34, - "grad_norm": 0.37170829852311305, - "learning_rate": 1.548840827345596e-05, - "loss": 0.2075, + "epoch": 0.27, + "grad_norm": 0.3616229942847358, + "learning_rate": 1.716564052465225e-05, + "loss": 0.2292, "step": 5842 }, { - "epoch": 0.34, - "grad_norm": 0.3690170784055627, - "learning_rate": 1.5486852591571217e-05, - "loss": 0.3312, + "epoch": 0.27, + "grad_norm": 0.3621274955800064, + "learning_rate": 1.7164602589988728e-05, + "loss": 0.2557, "step": 5843 }, { - "epoch": 0.34, - "grad_norm": 0.42868695087431713, - "learning_rate": 1.5485296719677005e-05, - "loss": 0.2912, + "epoch": 0.27, + "grad_norm": 0.37199458099465066, + "learning_rate": 1.716356449670781e-05, + "loss": 0.3001, "step": 5844 }, { - "epoch": 0.34, - "grad_norm": 0.32516213319606574, - "learning_rate": 1.5483740657827205e-05, - "loss": 0.2423, + "epoch": 0.27, + "grad_norm": 1.5768103943665102, + "learning_rate": 1.716252624483248e-05, + "loss": 0.7813, "step": 5845 }, { - "epoch": 0.34, - "grad_norm": 0.8624921342883979, - "learning_rate": 1.5482184406075705e-05, - "loss": 0.3653, + "epoch": 0.27, + "grad_norm": 0.7930134840988093, + "learning_rate": 1.7161487834385715e-05, + "loss": 0.4029, "step": 5846 }, { - "epoch": 0.34, - "grad_norm": 0.43311550871258736, - "learning_rate": 1.5480627964476392e-05, - "loss": 0.3782, + "epoch": 0.27, + "grad_norm": 0.38940351610650864, + "learning_rate": 1.716044926539052e-05, + "loss": 0.2844, "step": 5847 }, { - "epoch": 0.34, - "grad_norm": 0.3466225539727474, - "learning_rate": 1.547907133308317e-05, - "loss": 0.2827, + "epoch": 0.27, + "grad_norm": 0.6200257502722679, + "learning_rate": 1.7159410537869873e-05, + "loss": 0.3911, "step": 5848 }, { - "epoch": 0.34, - "grad_norm": 0.6526142864376929, - "learning_rate": 1.547751451194994e-05, - "loss": 0.3642, + "epoch": 0.27, + "grad_norm": 0.3000337809963599, + "learning_rate": 1.715837165184678e-05, + "loss": 0.1673, "step": 5849 }, { - "epoch": 0.34, - "grad_norm": 0.3194788764576168, - "learning_rate": 1.5475957501130622e-05, - "loss": 0.3314, + "epoch": 0.27, + "grad_norm": 0.39463446809050373, + "learning_rate": 1.715733260734424e-05, + "loss": 0.2032, "step": 5850 }, { - "epoch": 0.34, - "grad_norm": 0.33247814679611337, - "learning_rate": 1.5474400300679128e-05, - "loss": 0.2097, + "epoch": 0.27, + "grad_norm": 0.45259417730496887, + "learning_rate": 1.715629340438525e-05, + "loss": 0.3353, "step": 5851 }, { - "epoch": 0.34, - "grad_norm": 0.34086337965321845, - "learning_rate": 1.5472842910649387e-05, - "loss": 0.2012, + "epoch": 0.27, + "grad_norm": 0.8668755986058073, + "learning_rate": 1.7155254042992827e-05, + "loss": 0.4244, "step": 5852 }, { - "epoch": 0.34, - "grad_norm": 0.8793044262444761, - "learning_rate": 1.5471285331095334e-05, - "loss": 0.6676, + "epoch": 0.27, + "grad_norm": 0.4159204373056371, + "learning_rate": 1.7154214523189972e-05, + "loss": 0.2308, "step": 5853 }, { - "epoch": 0.34, - "grad_norm": 0.5278240555166073, - "learning_rate": 1.5469727562070904e-05, - "loss": 0.3511, + "epoch": 0.27, + "grad_norm": 0.5479494069423346, + "learning_rate": 1.71531748449997e-05, + "loss": 0.3447, "step": 5854 }, { - "epoch": 0.34, - "grad_norm": 0.3723179875730455, - "learning_rate": 1.5468169603630045e-05, - "loss": 0.2741, + "epoch": 0.27, + "grad_norm": 0.3224629679964485, + "learning_rate": 1.7152135008445037e-05, + "loss": 0.2353, "step": 5855 }, { - "epoch": 0.34, - "grad_norm": 0.4558277232143511, - "learning_rate": 1.5466611455826703e-05, - "loss": 0.2971, + "epoch": 0.27, + "grad_norm": 0.35217299699708465, + "learning_rate": 1.7151095013548996e-05, + "loss": 0.2112, "step": 5856 }, { - "epoch": 0.34, - "grad_norm": 0.3657369354981838, - "learning_rate": 1.5465053118714846e-05, - "loss": 0.2267, + "epoch": 0.27, + "grad_norm": 1.2285932683964818, + "learning_rate": 1.7150054860334605e-05, + "loss": 0.5675, "step": 5857 }, { - "epoch": 0.34, - "grad_norm": 0.40254068196433923, - "learning_rate": 1.5463494592348435e-05, - "loss": 0.3381, + "epoch": 0.27, + "grad_norm": 0.677607561893836, + "learning_rate": 1.714901454882489e-05, + "loss": 0.4906, "step": 5858 }, { - "epoch": 0.34, - "grad_norm": 0.4389789142234693, - "learning_rate": 1.5461935876781436e-05, - "loss": 0.3111, + "epoch": 0.27, + "grad_norm": 0.3286106397018948, + "learning_rate": 1.714797407904288e-05, + "loss": 0.2325, "step": 5859 }, { - "epoch": 0.34, - "grad_norm": 0.42792874304101514, - "learning_rate": 1.5460376972067837e-05, - "loss": 0.3259, + "epoch": 0.27, + "grad_norm": 0.5446666253228418, + "learning_rate": 1.7146933451011617e-05, + "loss": 0.4133, "step": 5860 }, { - "epoch": 0.34, - "grad_norm": 0.5828730662973869, - "learning_rate": 1.5458817878261617e-05, - "loss": 0.3779, + "epoch": 0.27, + "grad_norm": 0.31587820781403214, + "learning_rate": 1.7145892664754133e-05, + "loss": 0.1858, "step": 5861 }, { - "epoch": 0.34, - "grad_norm": 0.35919316933562373, - "learning_rate": 1.5457258595416766e-05, - "loss": 0.2507, + "epoch": 0.27, + "grad_norm": 0.4927012177450915, + "learning_rate": 1.7144851720293473e-05, + "loss": 0.3047, "step": 5862 }, { - "epoch": 0.34, - "grad_norm": 0.33609490374293677, - "learning_rate": 1.5455699123587286e-05, - "loss": 0.225, + "epoch": 0.27, + "grad_norm": 0.47065933721900155, + "learning_rate": 1.7143810617652682e-05, + "loss": 0.2839, "step": 5863 }, { - "epoch": 0.34, - "grad_norm": 0.34888688858881917, - "learning_rate": 1.5454139462827183e-05, - "loss": 0.2434, + "epoch": 0.27, + "grad_norm": 0.6183371403105031, + "learning_rate": 1.714276935685481e-05, + "loss": 0.4062, "step": 5864 }, { - "epoch": 0.34, - "grad_norm": 0.743990986427087, - "learning_rate": 1.5452579613190462e-05, - "loss": 0.4317, + "epoch": 0.27, + "grad_norm": 0.3945464366476125, + "learning_rate": 1.7141727937922912e-05, + "loss": 0.2879, "step": 5865 }, { - "epoch": 0.34, - "grad_norm": 0.3496844176665509, - "learning_rate": 1.5451019574731147e-05, - "loss": 0.3032, + "epoch": 0.27, + "grad_norm": 0.7058531087662331, + "learning_rate": 1.7140686360880036e-05, + "loss": 0.3724, "step": 5866 }, { - "epoch": 0.34, - "grad_norm": 0.39178507409648256, - "learning_rate": 1.5449459347503255e-05, - "loss": 0.3358, + "epoch": 0.27, + "grad_norm": 0.3434273443611105, + "learning_rate": 1.713964462574925e-05, + "loss": 0.2326, "step": 5867 }, { - "epoch": 0.34, - "grad_norm": 0.30121681347934715, - "learning_rate": 1.5447898931560824e-05, - "loss": 0.2011, + "epoch": 0.27, + "grad_norm": 0.4973094408858428, + "learning_rate": 1.713860273255361e-05, + "loss": 0.304, "step": 5868 }, { - "epoch": 0.34, - "grad_norm": 0.2932285924621865, - "learning_rate": 1.544633832695788e-05, - "loss": 0.205, + "epoch": 0.27, + "grad_norm": 0.4709200764103284, + "learning_rate": 1.7137560681316186e-05, + "loss": 0.3315, "step": 5869 }, { - "epoch": 0.34, - "grad_norm": 0.3773063886258503, - "learning_rate": 1.544477753374848e-05, - "loss": 0.3348, + "epoch": 0.27, + "grad_norm": 0.8171774224953345, + "learning_rate": 1.713651847206005e-05, + "loss": 0.5632, "step": 5870 }, { - "epoch": 0.34, - "grad_norm": 0.4860936216259758, - "learning_rate": 1.5443216551986667e-05, - "loss": 0.4027, + "epoch": 0.27, + "grad_norm": 0.43955160899274065, + "learning_rate": 1.7135476104808272e-05, + "loss": 0.2804, "step": 5871 }, { - "epoch": 0.34, - "grad_norm": 0.5365140122310895, - "learning_rate": 1.5441655381726496e-05, - "loss": 0.2675, + "epoch": 0.27, + "grad_norm": 0.35955418292862135, + "learning_rate": 1.713443357958393e-05, + "loss": 0.2526, "step": 5872 }, { - "epoch": 0.34, - "grad_norm": 0.5126169170705192, - "learning_rate": 1.5440094023022035e-05, - "loss": 0.347, + "epoch": 0.27, + "grad_norm": 0.37398628019480346, + "learning_rate": 1.7133390896410106e-05, + "loss": 0.2334, "step": 5873 }, { - "epoch": 0.34, - "grad_norm": 0.3880198754870328, - "learning_rate": 1.5438532475927354e-05, - "loss": 0.3277, + "epoch": 0.27, + "grad_norm": 0.5366213156376702, + "learning_rate": 1.7132348055309883e-05, + "loss": 0.4113, "step": 5874 }, { - "epoch": 0.34, - "grad_norm": 0.3398774666350326, - "learning_rate": 1.5436970740496527e-05, - "loss": 0.1709, + "epoch": 0.27, + "grad_norm": 0.327035655663534, + "learning_rate": 1.713130505630635e-05, + "loss": 0.2857, "step": 5875 }, { - "epoch": 0.34, - "grad_norm": 0.2934353428437412, - "learning_rate": 1.5435408816783635e-05, - "loss": 0.2122, + "epoch": 0.27, + "grad_norm": 0.7560665579329858, + "learning_rate": 1.713026189942259e-05, + "loss": 0.3955, "step": 5876 }, { - "epoch": 0.34, - "grad_norm": 1.1948470060235745, - "learning_rate": 1.543384670484277e-05, - "loss": 0.5015, + "epoch": 0.27, + "grad_norm": 0.40662581397332515, + "learning_rate": 1.712921858468171e-05, + "loss": 0.2842, "step": 5877 }, { - "epoch": 0.34, - "grad_norm": 0.37606014000496846, - "learning_rate": 1.5432284404728027e-05, - "loss": 0.256, + "epoch": 0.27, + "grad_norm": 0.5805503852268482, + "learning_rate": 1.71281751121068e-05, + "loss": 0.4002, "step": 5878 }, { - "epoch": 0.34, - "grad_norm": 0.4058687646461349, - "learning_rate": 1.5430721916493507e-05, - "loss": 0.3533, + "epoch": 0.27, + "grad_norm": 0.33117583284620183, + "learning_rate": 1.712713148172096e-05, + "loss": 0.2203, "step": 5879 }, { - "epoch": 0.34, - "grad_norm": 0.8713355870009596, - "learning_rate": 1.542915924019332e-05, - "loss": 0.6042, + "epoch": 0.27, + "grad_norm": 0.2829564150260896, + "learning_rate": 1.71260876935473e-05, + "loss": 0.2386, "step": 5880 }, { - "epoch": 0.34, - "grad_norm": 0.2912589294582795, - "learning_rate": 1.5427596375881587e-05, - "loss": 0.1711, + "epoch": 0.27, + "grad_norm": 0.5673148020543327, + "learning_rate": 1.712504374760893e-05, + "loss": 0.397, "step": 5881 }, { - "epoch": 0.34, - "grad_norm": 0.6110141021008527, - "learning_rate": 1.5426033323612425e-05, - "loss": 0.3838, + "epoch": 0.27, + "grad_norm": 0.9633938669381699, + "learning_rate": 1.7123999643928956e-05, + "loss": 0.5792, "step": 5882 }, { - "epoch": 0.34, - "grad_norm": 0.8743221394886532, - "learning_rate": 1.5424470083439958e-05, - "loss": 0.3455, + "epoch": 0.27, + "grad_norm": 0.30280809128875374, + "learning_rate": 1.7122955382530496e-05, + "loss": 0.2618, "step": 5883 }, { - "epoch": 0.34, - "grad_norm": 0.6854045258290827, - "learning_rate": 1.5422906655418327e-05, - "loss": 0.4075, + "epoch": 0.27, + "grad_norm": 0.5748785898886367, + "learning_rate": 1.7121910963436667e-05, + "loss": 0.4187, "step": 5884 }, { - "epoch": 0.34, - "grad_norm": 0.28645214108364625, - "learning_rate": 1.5421343039601672e-05, - "loss": 0.185, + "epoch": 0.27, + "grad_norm": 0.4027072858325578, + "learning_rate": 1.7120866386670596e-05, + "loss": 0.2073, "step": 5885 }, { - "epoch": 0.34, - "grad_norm": 0.39523662011429733, - "learning_rate": 1.5419779236044142e-05, - "loss": 0.3153, + "epoch": 0.27, + "grad_norm": 0.45659649518877693, + "learning_rate": 1.711982165225541e-05, + "loss": 0.326, "step": 5886 }, { - "epoch": 0.34, - "grad_norm": 0.6709944581548378, - "learning_rate": 1.541821524479989e-05, - "loss": 0.4063, + "epoch": 0.27, + "grad_norm": 0.4084976178266795, + "learning_rate": 1.711877676021423e-05, + "loss": 0.3211, "step": 5887 }, { - "epoch": 0.34, - "grad_norm": 0.3576383977318532, - "learning_rate": 1.541665106592307e-05, - "loss": 0.2235, + "epoch": 0.27, + "grad_norm": 1.013471602239002, + "learning_rate": 1.7117731710570195e-05, + "loss": 0.646, "step": 5888 }, { - "epoch": 0.34, - "grad_norm": 1.0212539106154426, - "learning_rate": 1.5415086699467864e-05, - "loss": 0.4622, + "epoch": 0.27, + "grad_norm": 0.2144267759206389, + "learning_rate": 1.711668650334644e-05, + "loss": 0.1026, "step": 5889 }, { - "epoch": 0.34, - "grad_norm": 0.39618828216583873, - "learning_rate": 1.5413522145488437e-05, - "loss": 0.3115, + "epoch": 0.27, + "grad_norm": 0.4474988324901726, + "learning_rate": 1.71156411385661e-05, + "loss": 0.3286, "step": 5890 }, { - "epoch": 0.34, - "grad_norm": 0.27461183094712965, - "learning_rate": 1.541195740403897e-05, - "loss": 0.1857, + "epoch": 0.27, + "grad_norm": 0.40574563565382427, + "learning_rate": 1.7114595616252333e-05, + "loss": 0.3439, "step": 5891 }, { - "epoch": 0.34, - "grad_norm": 0.9085111519204856, - "learning_rate": 1.541039247517365e-05, - "loss": 0.5692, + "epoch": 0.27, + "grad_norm": 0.582548240989252, + "learning_rate": 1.7113549936428268e-05, + "loss": 0.2617, "step": 5892 }, { - "epoch": 0.34, - "grad_norm": 0.8533930219299408, - "learning_rate": 1.5408827358946675e-05, - "loss": 0.5139, + "epoch": 0.27, + "grad_norm": 0.3260827288793383, + "learning_rate": 1.7112504099117068e-05, + "loss": 0.2732, "step": 5893 }, { - "epoch": 0.34, - "grad_norm": 0.4079774306429693, - "learning_rate": 1.5407262055412238e-05, - "loss": 0.2896, + "epoch": 0.27, + "grad_norm": 1.2391769374618, + "learning_rate": 1.711145810434188e-05, + "loss": 0.8635, "step": 5894 }, { - "epoch": 0.34, - "grad_norm": 0.7506257968661608, - "learning_rate": 1.540569656462455e-05, - "loss": 0.3278, + "epoch": 0.27, + "grad_norm": 0.29249206923785054, + "learning_rate": 1.7110411952125865e-05, + "loss": 0.1999, "step": 5895 }, { - "epoch": 0.34, - "grad_norm": 0.6944121449841651, - "learning_rate": 1.5404130886637822e-05, - "loss": 0.4847, + "epoch": 0.27, + "grad_norm": 0.40388406598616816, + "learning_rate": 1.710936564249218e-05, + "loss": 0.3259, "step": 5896 }, { - "epoch": 0.34, - "grad_norm": 0.289549678372288, - "learning_rate": 1.5402565021506273e-05, - "loss": 0.2154, + "epoch": 0.27, + "grad_norm": 0.7120424548109571, + "learning_rate": 1.7108319175463995e-05, + "loss": 0.4804, "step": 5897 }, { - "epoch": 0.34, - "grad_norm": 0.3543644317502926, - "learning_rate": 1.540099896928413e-05, - "loss": 0.237, + "epoch": 0.27, + "grad_norm": 0.3619483429348296, + "learning_rate": 1.710727255106447e-05, + "loss": 0.2629, "step": 5898 }, { - "epoch": 0.34, - "grad_norm": 0.5313370994029551, - "learning_rate": 1.5399432730025626e-05, - "loss": 0.3474, + "epoch": 0.27, + "grad_norm": 0.42753785654649634, + "learning_rate": 1.7106225769316787e-05, + "loss": 0.3433, "step": 5899 }, { - "epoch": 0.34, - "grad_norm": 0.3858996506274038, - "learning_rate": 1.5397866303784996e-05, - "loss": 0.3342, + "epoch": 0.27, + "grad_norm": 0.561893768698309, + "learning_rate": 1.7105178830244114e-05, + "loss": 0.4462, "step": 5900 }, { - "epoch": 0.34, - "grad_norm": 0.8419789834381768, - "learning_rate": 1.539629969061649e-05, - "loss": 0.428, + "epoch": 0.27, + "grad_norm": 0.2688047528381382, + "learning_rate": 1.7104131733869626e-05, + "loss": 0.1582, "step": 5901 }, { - "epoch": 0.34, - "grad_norm": 0.32299250679010993, - "learning_rate": 1.539473289057436e-05, - "loss": 0.3036, + "epoch": 0.27, + "grad_norm": 0.42863858502218144, + "learning_rate": 1.710308448021651e-05, + "loss": 0.2429, "step": 5902 }, { - "epoch": 0.34, - "grad_norm": 0.27648283554967706, - "learning_rate": 1.5393165903712856e-05, - "loss": 0.2395, + "epoch": 0.27, + "grad_norm": 0.41429441121269206, + "learning_rate": 1.710203706930795e-05, + "loss": 0.3384, "step": 5903 }, { - "epoch": 0.34, - "grad_norm": 0.2843375577344682, - "learning_rate": 1.5391598730086254e-05, - "loss": 0.1435, + "epoch": 0.27, + "grad_norm": 0.5679975406900668, + "learning_rate": 1.7100989501167132e-05, + "loss": 0.3788, "step": 5904 }, { - "epoch": 0.34, - "grad_norm": 0.5555925119853822, - "learning_rate": 1.539003136974882e-05, - "loss": 0.3774, + "epoch": 0.27, + "grad_norm": 0.39518832098371826, + "learning_rate": 1.7099941775817254e-05, + "loss": 0.2571, "step": 5905 }, { - "epoch": 0.34, - "grad_norm": 0.3144089559292263, - "learning_rate": 1.5388463822754827e-05, - "loss": 0.2653, + "epoch": 0.27, + "grad_norm": 0.39310314822991255, + "learning_rate": 1.7098893893281506e-05, + "loss": 0.3205, "step": 5906 }, { - "epoch": 0.34, - "grad_norm": 0.4289801488875986, - "learning_rate": 1.538689608915857e-05, - "loss": 0.3952, + "epoch": 0.27, + "grad_norm": 0.3376500534049663, + "learning_rate": 1.709784585358309e-05, + "loss": 0.2525, "step": 5907 }, { - "epoch": 0.34, - "grad_norm": 0.4080185198275796, - "learning_rate": 1.5385328169014325e-05, - "loss": 0.2654, + "epoch": 0.27, + "grad_norm": 0.3682762590735687, + "learning_rate": 1.709679765674521e-05, + "loss": 0.2259, "step": 5908 }, { - "epoch": 0.34, - "grad_norm": 0.23617679924423607, - "learning_rate": 1.53837600623764e-05, - "loss": 0.1897, + "epoch": 0.27, + "grad_norm": 0.6716331436295712, + "learning_rate": 1.7095749302791067e-05, + "loss": 0.4441, "step": 5909 }, { - "epoch": 0.34, - "grad_norm": 0.3640319175940855, - "learning_rate": 1.5382191769299096e-05, - "loss": 0.3338, + "epoch": 0.27, + "grad_norm": 0.8088737547134324, + "learning_rate": 1.7094700791743872e-05, + "loss": 0.4461, "step": 5910 }, { - "epoch": 0.34, - "grad_norm": 0.5949894824883931, - "learning_rate": 1.5380623289836724e-05, - "loss": 0.2869, + "epoch": 0.27, + "grad_norm": 0.2960270430871615, + "learning_rate": 1.709365212362684e-05, + "loss": 0.2436, "step": 5911 }, { - "epoch": 0.34, - "grad_norm": 0.33508321042850364, - "learning_rate": 1.5379054624043596e-05, - "loss": 0.3238, + "epoch": 0.27, + "grad_norm": 1.2843195248368082, + "learning_rate": 1.709260329846319e-05, + "loss": 0.8546, "step": 5912 }, { - "epoch": 0.34, - "grad_norm": 1.042458284916835, - "learning_rate": 1.537748577197404e-05, - "loss": 0.7016, + "epoch": 0.27, + "grad_norm": 0.31189513533251706, + "learning_rate": 1.7091554316276138e-05, + "loss": 0.1724, "step": 5913 }, { - "epoch": 0.34, - "grad_norm": 0.28987327834740845, - "learning_rate": 1.537591673368238e-05, - "loss": 0.2563, + "epoch": 0.27, + "grad_norm": 0.4602971756173651, + "learning_rate": 1.7090505177088906e-05, + "loss": 0.3303, "step": 5914 }, { - "epoch": 0.34, - "grad_norm": 0.25989501170956886, - "learning_rate": 1.5374347509222962e-05, - "loss": 0.2252, + "epoch": 0.27, + "grad_norm": 0.43344008028450426, + "learning_rate": 1.7089455880924724e-05, + "loss": 0.2971, "step": 5915 }, { - "epoch": 0.34, - "grad_norm": 0.4262012511260465, - "learning_rate": 1.5372778098650115e-05, - "loss": 0.2876, + "epoch": 0.27, + "grad_norm": 0.7778317419096797, + "learning_rate": 1.7088406427806825e-05, + "loss": 0.3723, "step": 5916 }, { - "epoch": 0.34, - "grad_norm": 0.6347305763733735, - "learning_rate": 1.5371208502018194e-05, - "loss": 0.3217, + "epoch": 0.27, + "grad_norm": 0.38608544339889544, + "learning_rate": 1.708735681775844e-05, + "loss": 0.3147, "step": 5917 }, { - "epoch": 0.34, - "grad_norm": 0.312424850972814, - "learning_rate": 1.5369638719381555e-05, - "loss": 0.2914, + "epoch": 0.27, + "grad_norm": 0.46509658491909134, + "learning_rate": 1.70863070508028e-05, + "loss": 0.3325, "step": 5918 }, { - "epoch": 0.34, - "grad_norm": 1.4014464340135386, - "learning_rate": 1.5368068750794557e-05, - "loss": 0.7689, + "epoch": 0.27, + "grad_norm": 0.320389087123723, + "learning_rate": 1.7085257126963154e-05, + "loss": 0.2496, "step": 5919 }, { - "epoch": 0.34, - "grad_norm": 0.6480541705283047, - "learning_rate": 1.5366498596311568e-05, - "loss": 0.4657, + "epoch": 0.27, + "grad_norm": 0.32782006316907925, + "learning_rate": 1.7084207046262745e-05, + "loss": 0.2529, "step": 5920 }, { - "epoch": 0.34, - "grad_norm": 0.2557476620889882, - "learning_rate": 1.5364928255986966e-05, - "loss": 0.1722, + "epoch": 0.27, + "grad_norm": 0.5490537286663046, + "learning_rate": 1.7083156808724817e-05, + "loss": 0.3929, "step": 5921 }, { - "epoch": 0.34, - "grad_norm": 0.3457499150185082, - "learning_rate": 1.5363357729875126e-05, - "loss": 0.2854, + "epoch": 0.27, + "grad_norm": 0.468908744682117, + "learning_rate": 1.708210641437263e-05, + "loss": 0.3265, "step": 5922 }, { - "epoch": 0.34, - "grad_norm": 0.7202941326070144, - "learning_rate": 1.536178701803044e-05, - "loss": 0.392, + "epoch": 0.27, + "grad_norm": 0.38405776769562694, + "learning_rate": 1.708105586322943e-05, + "loss": 0.3092, "step": 5923 }, { - "epoch": 0.34, - "grad_norm": 0.34428868696575277, - "learning_rate": 1.5360216120507302e-05, - "loss": 0.2324, + "epoch": 0.27, + "grad_norm": 0.8326960327561136, + "learning_rate": 1.7080005155318476e-05, + "loss": 0.5483, "step": 5924 }, { - "epoch": 0.34, - "grad_norm": 0.5147196984487438, - "learning_rate": 1.5358645037360108e-05, - "loss": 0.4216, + "epoch": 0.27, + "grad_norm": 0.43608237402044575, + "learning_rate": 1.7078954290663033e-05, + "loss": 0.2795, "step": 5925 }, { - "epoch": 0.34, - "grad_norm": 0.537087396199377, - "learning_rate": 1.535707376864327e-05, - "loss": 0.377, + "epoch": 0.27, + "grad_norm": 0.37808866573357075, + "learning_rate": 1.7077903269286366e-05, + "loss": 0.3198, "step": 5926 }, { - "epoch": 0.34, - "grad_norm": 0.34006389687556765, - "learning_rate": 1.5355502314411194e-05, - "loss": 0.1906, + "epoch": 0.27, + "grad_norm": 0.379720954350531, + "learning_rate": 1.707685209121174e-05, + "loss": 0.2869, "step": 5927 }, { - "epoch": 0.34, - "grad_norm": 0.3373315399765145, - "learning_rate": 1.5353930674718305e-05, - "loss": 0.2455, + "epoch": 0.27, + "grad_norm": 0.5178370784686631, + "learning_rate": 1.707580075646243e-05, + "loss": 0.1615, "step": 5928 }, { - "epoch": 0.34, - "grad_norm": 0.46153518536615695, - "learning_rate": 1.5352358849619024e-05, - "loss": 0.3767, + "epoch": 0.27, + "grad_norm": 0.39359873889018226, + "learning_rate": 1.7074749265061714e-05, + "loss": 0.3271, "step": 5929 }, { - "epoch": 0.34, - "grad_norm": 0.3368768244126122, - "learning_rate": 1.535078683916779e-05, - "loss": 0.2229, + "epoch": 0.27, + "grad_norm": 0.5957345151167208, + "learning_rate": 1.7073697617032867e-05, + "loss": 0.4118, "step": 5930 }, { - "epoch": 0.34, - "grad_norm": 1.1003227430828704, - "learning_rate": 1.5349214643419034e-05, - "loss": 0.6771, + "epoch": 0.27, + "grad_norm": 0.4582023081405017, + "learning_rate": 1.7072645812399172e-05, + "loss": 0.3029, "step": 5931 }, { - "epoch": 0.34, - "grad_norm": 1.4907212686940303, - "learning_rate": 1.5347642262427206e-05, - "loss": 0.8189, + "epoch": 0.27, + "grad_norm": 0.39460801200391943, + "learning_rate": 1.7071593851183915e-05, + "loss": 0.3449, "step": 5932 }, { - "epoch": 0.34, - "grad_norm": 0.3184937333207399, - "learning_rate": 1.5346069696246758e-05, - "loss": 0.2215, + "epoch": 0.27, + "grad_norm": 0.32835350756640214, + "learning_rate": 1.7070541733410383e-05, + "loss": 0.167, "step": 5933 }, { - "epoch": 0.34, - "grad_norm": 0.41845407671672097, - "learning_rate": 1.534449694493215e-05, - "loss": 0.2991, + "epoch": 0.27, + "grad_norm": 0.34307047054451734, + "learning_rate": 1.7069489459101876e-05, + "loss": 0.2585, "step": 5934 }, { - "epoch": 0.34, - "grad_norm": 0.4774275005408674, - "learning_rate": 1.534292400853784e-05, - "loss": 0.3517, + "epoch": 0.27, + "grad_norm": 0.4075466560552005, + "learning_rate": 1.7068437028281683e-05, + "loss": 0.316, "step": 5935 }, { - "epoch": 0.34, - "grad_norm": 0.379325144992996, - "learning_rate": 1.53413508871183e-05, - "loss": 0.2905, + "epoch": 0.27, + "grad_norm": 1.4424457538623434, + "learning_rate": 1.7067384440973106e-05, + "loss": 0.932, "step": 5936 }, { - "epoch": 0.34, - "grad_norm": 0.563366841637593, - "learning_rate": 1.5339777580728003e-05, - "loss": 0.3085, + "epoch": 0.27, + "grad_norm": 0.6437089694640437, + "learning_rate": 1.7066331697199454e-05, + "loss": 0.4623, "step": 5937 }, { - "epoch": 0.34, - "grad_norm": 0.463202999933601, - "learning_rate": 1.5338204089421447e-05, - "loss": 0.3625, + "epoch": 0.27, + "grad_norm": 0.3838995757116502, + "learning_rate": 1.7065278796984027e-05, + "loss": 0.2125, "step": 5938 }, { - "epoch": 0.34, - "grad_norm": 0.3695255382677873, - "learning_rate": 1.5336630413253108e-05, - "loss": 0.2843, + "epoch": 0.27, + "grad_norm": 0.2518315059890443, + "learning_rate": 1.7064225740350137e-05, + "loss": 0.2132, "step": 5939 }, { - "epoch": 0.34, - "grad_norm": 0.6537639861062193, - "learning_rate": 1.533505655227749e-05, - "loss": 0.3645, + "epoch": 0.27, + "grad_norm": 0.9210562135600497, + "learning_rate": 1.7063172527321096e-05, + "loss": 0.4485, "step": 5940 }, { - "epoch": 0.34, - "grad_norm": 0.255986281866402, - "learning_rate": 1.533348250654909e-05, - "loss": 0.2218, + "epoch": 0.27, + "grad_norm": 0.36665256513667205, + "learning_rate": 1.7062119157920222e-05, + "loss": 0.2339, "step": 5941 }, { - "epoch": 0.34, - "grad_norm": 0.40879555334744677, - "learning_rate": 1.5331908276122424e-05, - "loss": 0.3137, + "epoch": 0.27, + "grad_norm": 0.41921546425270406, + "learning_rate": 1.706106563217084e-05, + "loss": 0.3369, "step": 5942 }, { - "epoch": 0.34, - "grad_norm": 0.9387421903416497, - "learning_rate": 1.5330333861051998e-05, - "loss": 0.5082, + "epoch": 0.27, + "grad_norm": 0.7684850884279384, + "learning_rate": 1.706001195009627e-05, + "loss": 0.4465, "step": 5943 }, { - "epoch": 0.34, - "grad_norm": 1.137603306784779, - "learning_rate": 1.5328759261392344e-05, - "loss": 0.5119, - "step": 5944 + "epoch": 0.27, + "grad_norm": 0.350611482249508, + "learning_rate": 1.7058958111719836e-05, + "loss": 0.2499, + "step": 5944 }, { - "epoch": 0.34, - "grad_norm": 0.3617661594873792, - "learning_rate": 1.5327184477197984e-05, - "loss": 0.301, + "epoch": 0.27, + "grad_norm": 0.31333883509907856, + "learning_rate": 1.7057904117064875e-05, + "loss": 0.2046, "step": 5945 }, { - "epoch": 0.34, - "grad_norm": 0.443054484749188, - "learning_rate": 1.5325609508523456e-05, - "loss": 0.3465, + "epoch": 0.27, + "grad_norm": 0.4747905032968039, + "learning_rate": 1.705684996615472e-05, + "loss": 0.3294, "step": 5946 }, { - "epoch": 0.34, - "grad_norm": 0.2663146536046802, - "learning_rate": 1.53240343554233e-05, - "loss": 0.1144, + "epoch": 0.27, + "grad_norm": 0.3962397597331546, + "learning_rate": 1.7055795659012707e-05, + "loss": 0.2201, "step": 5947 }, { - "epoch": 0.34, - "grad_norm": 0.38139187527879836, - "learning_rate": 1.532245901795206e-05, - "loss": 0.2786, + "epoch": 0.27, + "grad_norm": 0.8090891394958526, + "learning_rate": 1.705474119566218e-05, + "loss": 0.5365, "step": 5948 }, { - "epoch": 0.34, - "grad_norm": 0.5305598314897465, - "learning_rate": 1.5320883496164295e-05, - "loss": 0.3807, + "epoch": 0.27, + "grad_norm": 0.7227864126728298, + "learning_rate": 1.7053686576126482e-05, + "loss": 0.4698, "step": 5949 }, { - "epoch": 0.34, - "grad_norm": 0.4413082084005958, - "learning_rate": 1.5319307790114563e-05, - "loss": 0.3002, + "epoch": 0.27, + "grad_norm": 0.4635124470831637, + "learning_rate": 1.7052631800428962e-05, + "loss": 0.2808, "step": 5950 }, { - "epoch": 0.34, - "grad_norm": 0.3583027015117161, - "learning_rate": 1.5317731899857434e-05, - "loss": 0.3078, + "epoch": 0.27, + "grad_norm": 0.39777661899904093, + "learning_rate": 1.705157686859297e-05, + "loss": 0.2728, "step": 5951 }, { - "epoch": 0.34, - "grad_norm": 0.7092834562172043, - "learning_rate": 1.5316155825447476e-05, - "loss": 0.4817, + "epoch": 0.27, + "grad_norm": 0.3408020935369634, + "learning_rate": 1.7050521780641864e-05, + "loss": 0.2248, "step": 5952 }, { - "epoch": 0.34, - "grad_norm": 0.2955321017741597, - "learning_rate": 1.5314579566939274e-05, - "loss": 0.1988, + "epoch": 0.27, + "grad_norm": 0.4014637825287904, + "learning_rate": 1.7049466536599e-05, + "loss": 0.2662, "step": 5953 }, { - "epoch": 0.34, - "grad_norm": 0.37715745593890787, - "learning_rate": 1.5313003124387404e-05, - "loss": 0.2891, + "epoch": 0.27, + "grad_norm": 0.6751528364714424, + "learning_rate": 1.7048411136487743e-05, + "loss": 0.3141, "step": 5954 }, { - "epoch": 0.34, - "grad_norm": 1.3060160054166214, - "learning_rate": 1.5311426497846466e-05, - "loss": 0.7819, + "epoch": 0.27, + "grad_norm": 0.6026289779540165, + "learning_rate": 1.7047355580331457e-05, + "loss": 0.4155, "step": 5955 }, { - "epoch": 0.34, - "grad_norm": 0.7017722643430202, - "learning_rate": 1.530984968737106e-05, - "loss": 0.3755, + "epoch": 0.27, + "grad_norm": 0.43519046763094116, + "learning_rate": 1.7046299868153507e-05, + "loss": 0.2816, "step": 5956 }, { - "epoch": 0.34, - "grad_norm": 0.44304273216374057, - "learning_rate": 1.5308272693015785e-05, - "loss": 0.2946, + "epoch": 0.27, + "grad_norm": 0.24149123979846776, + "learning_rate": 1.7045243999977274e-05, + "loss": 0.1473, "step": 5957 }, { - "epoch": 0.34, - "grad_norm": 0.3809476824053261, - "learning_rate": 1.530669551483525e-05, - "loss": 0.2984, + "epoch": 0.27, + "grad_norm": 0.40930465318030845, + "learning_rate": 1.7044187975826126e-05, + "loss": 0.3579, "step": 5958 }, { - "epoch": 0.34, - "grad_norm": 0.32731689524967394, - "learning_rate": 1.5305118152884086e-05, - "loss": 0.2025, + "epoch": 0.27, + "grad_norm": 0.4054815146348783, + "learning_rate": 1.7043131795723446e-05, + "loss": 0.2759, "step": 5959 }, { - "epoch": 0.34, - "grad_norm": 0.32662163125723054, - "learning_rate": 1.5303540607216906e-05, - "loss": 0.2074, + "epoch": 0.27, + "grad_norm": 0.6401056722694114, + "learning_rate": 1.7042075459692616e-05, + "loss": 0.3513, "step": 5960 }, { - "epoch": 0.34, - "grad_norm": 0.6125753143165558, - "learning_rate": 1.5301962877888338e-05, - "loss": 0.4477, + "epoch": 0.27, + "grad_norm": 1.335019752203884, + "learning_rate": 1.7041018967757024e-05, + "loss": 0.6319, "step": 5961 }, { - "epoch": 0.34, - "grad_norm": 0.7210324834024456, - "learning_rate": 1.5300384964953028e-05, - "loss": 0.3894, + "epoch": 0.27, + "grad_norm": 0.38521141886876703, + "learning_rate": 1.7039962319940054e-05, + "loss": 0.275, "step": 5962 }, { - "epoch": 0.34, - "grad_norm": 0.3512008862927995, - "learning_rate": 1.5298806868465615e-05, - "loss": 0.2395, + "epoch": 0.27, + "grad_norm": 0.39669759543479416, + "learning_rate": 1.703890551626511e-05, + "loss": 0.3629, "step": 5963 }, { - "epoch": 0.34, - "grad_norm": 1.203964316197915, - "learning_rate": 1.5297228588480744e-05, - "loss": 0.6873, + "epoch": 0.27, + "grad_norm": 0.3299941671127926, + "learning_rate": 1.703784855675558e-05, + "loss": 0.1072, "step": 5964 }, { - "epoch": 0.34, - "grad_norm": 0.4470807698277849, - "learning_rate": 1.5295650125053078e-05, - "loss": 0.344, + "epoch": 0.27, + "grad_norm": 0.3700914505473078, + "learning_rate": 1.7036791441434864e-05, + "loss": 0.3039, "step": 5965 }, { - "epoch": 0.34, - "grad_norm": 0.27900170832730514, - "learning_rate": 1.529407147823728e-05, - "loss": 0.1535, + "epoch": 0.27, + "grad_norm": 0.48813217712176504, + "learning_rate": 1.703573417032637e-05, + "loss": 0.3655, "step": 5966 }, { - "epoch": 0.34, - "grad_norm": 1.2564959101088011, - "learning_rate": 1.529249264808801e-05, - "loss": 0.9462, + "epoch": 0.27, + "grad_norm": 0.4843069303760202, + "learning_rate": 1.70346767434535e-05, + "loss": 0.3022, "step": 5967 }, { - "epoch": 0.34, - "grad_norm": 0.8273397489953469, - "learning_rate": 1.5290913634659946e-05, - "loss": 0.4594, + "epoch": 0.27, + "grad_norm": 0.39623930683239766, + "learning_rate": 1.703361916083967e-05, + "loss": 0.2911, "step": 5968 }, { - "epoch": 0.34, - "grad_norm": 0.38156537104710225, - "learning_rate": 1.528933443800777e-05, - "loss": 0.2717, + "epoch": 0.27, + "grad_norm": 1.2998819153745773, + "learning_rate": 1.7032561422508288e-05, + "loss": 0.7017, "step": 5969 }, { - "epoch": 0.34, - "grad_norm": 0.40525374010366605, - "learning_rate": 1.5287755058186173e-05, - "loss": 0.2764, + "epoch": 0.27, + "grad_norm": 0.32507802534360075, + "learning_rate": 1.7031503528482774e-05, + "loss": 0.2098, "step": 5970 }, { - "epoch": 0.34, - "grad_norm": 0.44559989652126264, - "learning_rate": 1.5286175495249845e-05, - "loss": 0.2679, + "epoch": 0.27, + "grad_norm": 0.35439757017275464, + "learning_rate": 1.703044547878655e-05, + "loss": 0.298, "step": 5971 }, { - "epoch": 0.34, - "grad_norm": 0.3800803496165543, - "learning_rate": 1.5284595749253486e-05, - "loss": 0.2887, + "epoch": 0.27, + "grad_norm": 0.6740460811168439, + "learning_rate": 1.702938727344304e-05, + "loss": 0.4836, "step": 5972 }, { - "epoch": 0.34, - "grad_norm": 0.5854777998684502, - "learning_rate": 1.5283015820251802e-05, - "loss": 0.3128, + "epoch": 0.27, + "grad_norm": 0.4855944208006054, + "learning_rate": 1.702832891247567e-05, + "loss": 0.2979, "step": 5973 }, { - "epoch": 0.34, - "grad_norm": 0.6490823546446826, - "learning_rate": 1.528143570829951e-05, - "loss": 0.3814, + "epoch": 0.27, + "grad_norm": 0.4731371035031592, + "learning_rate": 1.7027270395907872e-05, + "loss": 0.296, "step": 5974 }, { - "epoch": 0.34, - "grad_norm": 0.37018842276377245, - "learning_rate": 1.5279855413451323e-05, - "loss": 0.223, + "epoch": 0.27, + "grad_norm": 0.41276602074770175, + "learning_rate": 1.702621172376308e-05, + "loss": 0.3397, "step": 5975 }, { - "epoch": 0.34, - "grad_norm": 0.4312382176580631, - "learning_rate": 1.527827493576197e-05, - "loss": 0.284, + "epoch": 0.27, + "grad_norm": 0.4361444034100218, + "learning_rate": 1.7025152896064733e-05, + "loss": 0.3226, "step": 5976 }, { - "epoch": 0.34, - "grad_norm": 0.40708070399001484, - "learning_rate": 1.5276694275286188e-05, - "loss": 0.3473, + "epoch": 0.27, + "grad_norm": 0.37249748123834836, + "learning_rate": 1.702409391283627e-05, + "loss": 0.2229, "step": 5977 }, { - "epoch": 0.34, - "grad_norm": 0.5786187802356816, - "learning_rate": 1.5275113432078707e-05, - "loss": 0.3675, + "epoch": 0.27, + "grad_norm": 0.3936272414823332, + "learning_rate": 1.702303477410114e-05, + "loss": 0.317, "step": 5978 }, { - "epoch": 0.34, - "grad_norm": 0.5832872436341768, - "learning_rate": 1.5273532406194273e-05, - "loss": 0.3081, + "epoch": 0.27, + "grad_norm": 0.47408399348132163, + "learning_rate": 1.7021975479882788e-05, + "loss": 0.2565, "step": 5979 }, { - "epoch": 0.34, - "grad_norm": 0.48552801852112154, - "learning_rate": 1.5271951197687642e-05, - "loss": 0.3343, + "epoch": 0.27, + "grad_norm": 0.4040780028913679, + "learning_rate": 1.702091603020467e-05, + "loss": 0.1798, "step": 5980 }, { - "epoch": 0.34, - "grad_norm": 0.3366837647895135, - "learning_rate": 1.5270369806613566e-05, - "loss": 0.2745, + "epoch": 0.27, + "grad_norm": 0.5793933469561242, + "learning_rate": 1.7019856425090233e-05, + "loss": 0.3687, "step": 5981 }, { - "epoch": 0.34, - "grad_norm": 0.30439148010092015, - "learning_rate": 1.5268788233026813e-05, - "loss": 0.2576, + "epoch": 0.27, + "grad_norm": 0.4041240082953772, + "learning_rate": 1.701879666456294e-05, + "loss": 0.3447, "step": 5982 }, { - "epoch": 0.34, - "grad_norm": 0.8410593472550371, - "learning_rate": 1.5267206476982143e-05, - "loss": 0.2501, + "epoch": 0.27, + "grad_norm": 0.3936935373938144, + "learning_rate": 1.701773674864626e-05, + "loss": 0.2519, "step": 5983 }, { - "epoch": 0.34, - "grad_norm": 0.41983616658716516, - "learning_rate": 1.5265624538534346e-05, - "loss": 0.335, + "epoch": 0.27, + "grad_norm": 0.8124025409919884, + "learning_rate": 1.7016676677363646e-05, + "loss": 0.5801, "step": 5984 }, { - "epoch": 0.34, - "grad_norm": 0.29918174520908947, - "learning_rate": 1.5264042417738198e-05, - "loss": 0.3013, + "epoch": 0.27, + "grad_norm": 0.32712509727414457, + "learning_rate": 1.7015616450738575e-05, + "loss": 0.197, "step": 5985 }, { - "epoch": 0.34, - "grad_norm": 0.9234263832577031, - "learning_rate": 1.5262460114648487e-05, - "loss": 0.4014, + "epoch": 0.27, + "grad_norm": 0.3090090016451351, + "learning_rate": 1.701455606879452e-05, + "loss": 0.2398, "step": 5986 }, { - "epoch": 0.34, - "grad_norm": 0.23127103984282238, - "learning_rate": 1.5260877629320003e-05, - "loss": 0.1819, + "epoch": 0.28, + "grad_norm": 1.0961125156859128, + "learning_rate": 1.7013495531554952e-05, + "loss": 0.6021, "step": 5987 }, { - "epoch": 0.34, - "grad_norm": 0.6068087085849305, - "learning_rate": 1.5259294961807557e-05, - "loss": 0.3792, + "epoch": 0.28, + "grad_norm": 0.834353473295938, + "learning_rate": 1.7012434839043353e-05, + "loss": 0.4405, "step": 5988 }, { - "epoch": 0.34, - "grad_norm": 0.4333690325500259, - "learning_rate": 1.5257712112165952e-05, - "loss": 0.2803, + "epoch": 0.28, + "grad_norm": 0.44065027462902717, + "learning_rate": 1.7011373991283204e-05, + "loss": 0.3054, "step": 5989 }, { - "epoch": 0.34, - "grad_norm": 0.3537720037573216, - "learning_rate": 1.5256129080450004e-05, - "loss": 0.2856, + "epoch": 0.28, + "grad_norm": 0.38746338192129737, + "learning_rate": 1.7010312988297993e-05, + "loss": 0.2756, "step": 5990 }, { - "epoch": 0.34, - "grad_norm": 1.1232073792582922, - "learning_rate": 1.5254545866714531e-05, - "loss": 0.5189, + "epoch": 0.28, + "grad_norm": 0.32675984416050463, + "learning_rate": 1.7009251830111214e-05, + "loss": 0.2067, "step": 5991 }, { - "epoch": 0.34, - "grad_norm": 0.4744517484846774, - "learning_rate": 1.5252962471014358e-05, - "loss": 0.3488, + "epoch": 0.28, + "grad_norm": 0.6536680117197015, + "learning_rate": 1.7008190516746348e-05, + "loss": 0.3086, "step": 5992 }, { - "epoch": 0.34, - "grad_norm": 0.3529505990489683, - "learning_rate": 1.5251378893404324e-05, - "loss": 0.2746, + "epoch": 0.28, + "grad_norm": 0.51514725544651, + "learning_rate": 1.7007129048226903e-05, + "loss": 0.2821, "step": 5993 }, { - "epoch": 0.34, - "grad_norm": 0.2696964638067003, - "learning_rate": 1.5249795133939262e-05, - "loss": 0.2123, + "epoch": 0.28, + "grad_norm": 0.5207101334407666, + "learning_rate": 1.7006067424576372e-05, + "loss": 0.3537, "step": 5994 }, { - "epoch": 0.34, - "grad_norm": 0.98685682080623, - "learning_rate": 1.5248211192674023e-05, - "loss": 0.5288, + "epoch": 0.28, + "grad_norm": 0.7161801081282118, + "learning_rate": 1.7005005645818262e-05, + "loss": 0.3367, "step": 5995 }, { - "epoch": 0.34, - "grad_norm": 0.6945777224570654, - "learning_rate": 1.5246627069663453e-05, - "loss": 0.2634, + "epoch": 0.28, + "grad_norm": 0.4244158576760907, + "learning_rate": 1.7003943711976077e-05, + "loss": 0.2895, "step": 5996 }, { - "epoch": 0.34, - "grad_norm": 0.3390663328402181, - "learning_rate": 1.5245042764962416e-05, - "loss": 0.3044, + "epoch": 0.28, + "grad_norm": 0.26219144911785597, + "learning_rate": 1.700288162307333e-05, + "loss": 0.1817, "step": 5997 }, { - "epoch": 0.34, - "grad_norm": 1.1668116852346306, - "learning_rate": 1.5243458278625771e-05, - "loss": 0.8487, + "epoch": 0.28, + "grad_norm": 0.4951344654928905, + "learning_rate": 1.7001819379133532e-05, + "loss": 0.3341, "step": 5998 }, { - "epoch": 0.34, - "grad_norm": 0.20443862509215088, - "learning_rate": 1.5241873610708395e-05, - "loss": 0.0946, + "epoch": 0.28, + "grad_norm": 0.3726495835641448, + "learning_rate": 1.7000756980180206e-05, + "loss": 0.3028, "step": 5999 }, { - "epoch": 0.34, - "grad_norm": 0.4356866107324079, - "learning_rate": 1.5240288761265158e-05, - "loss": 0.3402, + "epoch": 0.28, + "grad_norm": 0.7759250923457206, + "learning_rate": 1.6999694426236862e-05, + "loss": 0.5065, "step": 6000 }, { - "epoch": 0.34, - "grad_norm": 0.36544328154024314, - "learning_rate": 1.523870373035095e-05, - "loss": 0.3262, + "epoch": 0.28, + "grad_norm": 0.668703465881885, + "learning_rate": 1.6998631717327034e-05, + "loss": 0.3654, "step": 6001 }, { - "epoch": 0.34, - "grad_norm": 0.684350221463673, - "learning_rate": 1.5237118518020656e-05, - "loss": 0.2627, + "epoch": 0.28, + "grad_norm": 0.37960069988963036, + "learning_rate": 1.6997568853474244e-05, + "loss": 0.29, "step": 6002 }, { - "epoch": 0.34, - "grad_norm": 0.7652539514029776, - "learning_rate": 1.5235533124329172e-05, - "loss": 0.4287, + "epoch": 0.28, + "grad_norm": 0.2023504059861079, + "learning_rate": 1.699650583470202e-05, + "loss": 0.1109, "step": 6003 }, { - "epoch": 0.34, - "grad_norm": 1.4259890730487947, - "learning_rate": 1.5233947549331399e-05, - "loss": 0.8299, + "epoch": 0.28, + "grad_norm": 0.45500710574339603, + "learning_rate": 1.6995442661033905e-05, + "loss": 0.2884, "step": 6004 }, { - "epoch": 0.35, - "grad_norm": 0.2552789675583203, - "learning_rate": 1.5232361793082251e-05, - "loss": 0.2188, + "epoch": 0.28, + "grad_norm": 0.516264511830306, + "learning_rate": 1.699437933249343e-05, + "loss": 0.3589, "step": 6005 }, { - "epoch": 0.35, - "grad_norm": 0.26345923306917046, - "learning_rate": 1.5230775855636635e-05, - "loss": 0.1973, + "epoch": 0.28, + "grad_norm": 0.453949929285188, + "learning_rate": 1.699331584910414e-05, + "loss": 0.3158, "step": 6006 }, { - "epoch": 0.35, - "grad_norm": 0.7307326078246826, - "learning_rate": 1.522918973704948e-05, - "loss": 0.4981, + "epoch": 0.28, + "grad_norm": 0.4407652820726819, + "learning_rate": 1.6992252210889573e-05, + "loss": 0.2931, "step": 6007 }, { - "epoch": 0.35, - "grad_norm": 0.622572484624821, - "learning_rate": 1.5227603437375704e-05, - "loss": 0.3457, + "epoch": 0.28, + "grad_norm": 0.5893706782684486, + "learning_rate": 1.6991188417873284e-05, + "loss": 0.394, "step": 6008 }, { - "epoch": 0.35, - "grad_norm": 0.3376871552073923, - "learning_rate": 1.5226016956670251e-05, - "loss": 0.2555, + "epoch": 0.28, + "grad_norm": 0.24325209210070836, + "learning_rate": 1.699012447007882e-05, + "loss": 0.1546, "step": 6009 }, { - "epoch": 0.35, - "grad_norm": 1.1334926262165705, - "learning_rate": 1.5224430294988055e-05, - "loss": 0.7165, + "epoch": 0.28, + "grad_norm": 0.4578150028039045, + "learning_rate": 1.698906036752974e-05, + "loss": 0.3077, "step": 6010 }, { - "epoch": 0.35, - "grad_norm": 0.5732166382836338, - "learning_rate": 1.522284345238406e-05, - "loss": 0.3788, + "epoch": 0.28, + "grad_norm": 0.38129347866487756, + "learning_rate": 1.6987996110249598e-05, + "loss": 0.3161, "step": 6011 }, { - "epoch": 0.35, - "grad_norm": 0.2403970752189537, - "learning_rate": 1.5221256428913225e-05, - "loss": 0.1541, + "epoch": 0.28, + "grad_norm": 0.6261919988439754, + "learning_rate": 1.698693169826196e-05, + "loss": 0.3825, "step": 6012 }, { - "epoch": 0.35, - "grad_norm": 0.46260415675738403, - "learning_rate": 1.52196692246305e-05, - "loss": 0.3579, + "epoch": 0.28, + "grad_norm": 0.6577505177874339, + "learning_rate": 1.6985867131590383e-05, + "loss": 0.3495, "step": 6013 }, { - "epoch": 0.35, - "grad_norm": 0.7041434257174486, - "learning_rate": 1.5218081839590855e-05, - "loss": 0.4429, + "epoch": 0.28, + "grad_norm": 0.33937185162094, + "learning_rate": 1.698480241025845e-05, + "loss": 0.2895, "step": 6014 }, { - "epoch": 0.35, - "grad_norm": 0.33938295637228305, - "learning_rate": 1.5216494273849261e-05, - "loss": 0.243, + "epoch": 0.28, + "grad_norm": 0.4413707928373477, + "learning_rate": 1.6983737534289714e-05, + "loss": 0.2673, "step": 6015 }, { - "epoch": 0.35, - "grad_norm": 0.5118168354209909, - "learning_rate": 1.5214906527460695e-05, - "loss": 0.3826, + "epoch": 0.28, + "grad_norm": 0.49356171285747646, + "learning_rate": 1.6982672503707762e-05, + "loss": 0.2618, "step": 6016 }, { - "epoch": 0.35, - "grad_norm": 0.4851060809319056, - "learning_rate": 1.5213318600480138e-05, - "loss": 0.3629, + "epoch": 0.28, + "grad_norm": 0.33354465622688945, + "learning_rate": 1.698160731853617e-05, + "loss": 0.3051, "step": 6017 }, { - "epoch": 0.35, - "grad_norm": 0.22921600058430264, - "learning_rate": 1.5211730492962587e-05, - "loss": 0.161, + "epoch": 0.28, + "grad_norm": 0.42406956238714966, + "learning_rate": 1.6980541978798528e-05, + "loss": 0.3579, "step": 6018 }, { - "epoch": 0.35, - "grad_norm": 0.675451182016171, - "learning_rate": 1.521014220496303e-05, - "loss": 0.4479, + "epoch": 0.28, + "grad_norm": 0.4118361159173712, + "learning_rate": 1.6979476484518408e-05, + "loss": 0.2122, "step": 6019 }, { - "epoch": 0.35, - "grad_norm": 0.5422555876026078, - "learning_rate": 1.5208553736536473e-05, - "loss": 0.3674, + "epoch": 0.28, + "grad_norm": 0.4653993626995571, + "learning_rate": 1.6978410835719407e-05, + "loss": 0.3143, "step": 6020 }, { - "epoch": 0.35, - "grad_norm": 0.34458266162583756, - "learning_rate": 1.5206965087737922e-05, - "loss": 0.2923, + "epoch": 0.28, + "grad_norm": 0.4493591437459993, + "learning_rate": 1.6977345032425113e-05, + "loss": 0.2791, "step": 6021 }, { - "epoch": 0.35, - "grad_norm": 0.9441003102508054, - "learning_rate": 1.5205376258622397e-05, - "loss": 0.4712, + "epoch": 0.28, + "grad_norm": 0.32919852786738657, + "learning_rate": 1.697627907465913e-05, + "loss": 0.2663, "step": 6022 }, { - "epoch": 0.35, - "grad_norm": 0.5721243311796721, - "learning_rate": 1.5203787249244914e-05, - "loss": 0.3698, + "epoch": 0.28, + "grad_norm": 0.41225887895580565, + "learning_rate": 1.697521296244505e-05, + "loss": 0.311, "step": 6023 }, { - "epoch": 0.35, - "grad_norm": 0.32675837351870496, - "learning_rate": 1.5202198059660504e-05, - "loss": 0.2448, + "epoch": 0.28, + "grad_norm": 0.6160853133769265, + "learning_rate": 1.6974146695806476e-05, + "loss": 0.3151, "step": 6024 }, { - "epoch": 0.35, - "grad_norm": 0.43602889305513376, - "learning_rate": 1.5200608689924197e-05, - "loss": 0.3166, + "epoch": 0.28, + "grad_norm": 0.325981469415452, + "learning_rate": 1.697308027476702e-05, + "loss": 0.2359, "step": 6025 }, { - "epoch": 0.35, - "grad_norm": 0.3492478650448725, - "learning_rate": 1.5199019140091037e-05, - "loss": 0.1646, + "epoch": 0.28, + "grad_norm": 0.4237713675844246, + "learning_rate": 1.6972013699350285e-05, + "loss": 0.3347, "step": 6026 }, { - "epoch": 0.35, - "grad_norm": 0.3940936399332753, - "learning_rate": 1.5197429410216065e-05, - "loss": 0.3279, + "epoch": 0.28, + "grad_norm": 0.7650524675386389, + "learning_rate": 1.6970946969579888e-05, + "loss": 0.5469, "step": 6027 }, { - "epoch": 0.35, - "grad_norm": 0.3892155238830433, - "learning_rate": 1.5195839500354337e-05, - "loss": 0.2918, + "epoch": 0.28, + "grad_norm": 0.593426899966432, + "learning_rate": 1.696988008547944e-05, + "loss": 0.3911, "step": 6028 }, { - "epoch": 0.35, - "grad_norm": 0.6679840842709365, - "learning_rate": 1.5194249410560913e-05, - "loss": 0.4226, + "epoch": 0.28, + "grad_norm": 0.28153683964539106, + "learning_rate": 1.6968813047072567e-05, + "loss": 0.1793, "step": 6029 }, { - "epoch": 0.35, - "grad_norm": 0.3529717737800829, - "learning_rate": 1.5192659140890851e-05, - "loss": 0.3048, + "epoch": 0.28, + "grad_norm": 0.30450963292171274, + "learning_rate": 1.6967745854382893e-05, + "loss": 0.262, "step": 6030 }, { - "epoch": 0.35, - "grad_norm": 0.3234022745735802, - "learning_rate": 1.5191068691399229e-05, - "loss": 0.2089, + "epoch": 0.28, + "grad_norm": 1.8843173363275398, + "learning_rate": 1.6966678507434035e-05, + "loss": 0.8055, "step": 6031 }, { - "epoch": 0.35, - "grad_norm": 0.4406803384243917, - "learning_rate": 1.518947806214112e-05, - "loss": 0.356, + "epoch": 0.28, + "grad_norm": 0.36372672819028323, + "learning_rate": 1.6965611006249635e-05, + "loss": 0.2339, "step": 6032 }, { - "epoch": 0.35, - "grad_norm": 0.3316857147599567, - "learning_rate": 1.5187887253171609e-05, - "loss": 0.3087, + "epoch": 0.28, + "grad_norm": 0.5611469696920824, + "learning_rate": 1.6964543350853316e-05, + "loss": 0.3845, "step": 6033 }, { - "epoch": 0.35, - "grad_norm": 0.9447485585303956, - "learning_rate": 1.5186296264545787e-05, - "loss": 0.6407, + "epoch": 0.28, + "grad_norm": 0.47126325851231976, + "learning_rate": 1.6963475541268723e-05, + "loss": 0.3809, "step": 6034 }, { - "epoch": 0.35, - "grad_norm": 0.507392674710051, - "learning_rate": 1.5184705096318748e-05, - "loss": 0.2305, + "epoch": 0.28, + "grad_norm": 0.3065203273828383, + "learning_rate": 1.6962407577519492e-05, + "loss": 0.1819, "step": 6035 }, { - "epoch": 0.35, - "grad_norm": 0.31821764679036, - "learning_rate": 1.5183113748545595e-05, - "loss": 0.2953, + "epoch": 0.28, + "grad_norm": 0.4491202114938126, + "learning_rate": 1.696133945962927e-05, + "loss": 0.2491, "step": 6036 }, { - "epoch": 0.35, - "grad_norm": 0.37936593474211566, - "learning_rate": 1.5181522221281435e-05, - "loss": 0.335, + "epoch": 0.28, + "grad_norm": 0.4840605326678392, + "learning_rate": 1.6960271187621696e-05, + "loss": 0.3743, "step": 6037 }, { - "epoch": 0.35, - "grad_norm": 0.1908639292571459, - "learning_rate": 1.5179930514581383e-05, - "loss": 0.1056, + "epoch": 0.28, + "grad_norm": 0.33831501323115126, + "learning_rate": 1.6959202761520432e-05, + "loss": 0.2472, "step": 6038 }, { - "epoch": 0.35, - "grad_norm": 0.39556981816109443, - "learning_rate": 1.517833862850056e-05, - "loss": 0.3185, + "epoch": 0.28, + "grad_norm": 0.7609414065011277, + "learning_rate": 1.6958134181349123e-05, + "loss": 0.4609, "step": 6039 }, { - "epoch": 0.35, - "grad_norm": 1.0851322992534065, - "learning_rate": 1.5176746563094092e-05, - "loss": 0.6819, + "epoch": 0.28, + "grad_norm": 0.5736951071401374, + "learning_rate": 1.6957065447131432e-05, + "loss": 0.452, "step": 6040 }, { - "epoch": 0.35, - "grad_norm": 0.32365313141441393, - "learning_rate": 1.5175154318417116e-05, - "loss": 0.2474, + "epoch": 0.28, + "grad_norm": 0.4035008562619525, + "learning_rate": 1.6955996558891015e-05, + "loss": 0.2844, "step": 6041 }, { - "epoch": 0.35, - "grad_norm": 0.44971760340828426, - "learning_rate": 1.5173561894524765e-05, - "loss": 0.3509, + "epoch": 0.28, + "grad_norm": 0.28519964042492596, + "learning_rate": 1.695492751665154e-05, + "loss": 0.2066, "step": 6042 }, { - "epoch": 0.35, - "grad_norm": 0.6905509263632318, - "learning_rate": 1.517196929147219e-05, - "loss": 0.4647, + "epoch": 0.28, + "grad_norm": 0.8209423128244802, + "learning_rate": 1.6953858320436673e-05, + "loss": 0.5503, "step": 6043 }, { - "epoch": 0.35, - "grad_norm": 0.24909171253007986, - "learning_rate": 1.5170376509314539e-05, - "loss": 0.1757, + "epoch": 0.28, + "grad_norm": 0.38490571150414665, + "learning_rate": 1.6952788970270083e-05, + "loss": 0.2692, "step": 6044 }, { - "epoch": 0.35, - "grad_norm": 0.39274007428317864, - "learning_rate": 1.5168783548106976e-05, - "loss": 0.3238, + "epoch": 0.28, + "grad_norm": 0.6216794434170183, + "learning_rate": 1.6951719466175446e-05, + "loss": 0.3147, "step": 6045 }, { - "epoch": 0.35, - "grad_norm": 0.9155198249464075, - "learning_rate": 1.5167190407904656e-05, - "loss": 0.6702, + "epoch": 0.28, + "grad_norm": 0.8719709012471768, + "learning_rate": 1.6950649808176443e-05, + "loss": 0.3836, "step": 6046 }, { - "epoch": 0.35, - "grad_norm": 0.573228632743676, - "learning_rate": 1.5165597088762757e-05, - "loss": 0.3907, + "epoch": 0.28, + "grad_norm": 0.40274786221622766, + "learning_rate": 1.694957999629675e-05, + "loss": 0.304, "step": 6047 }, { - "epoch": 0.35, - "grad_norm": 0.37380539990802764, - "learning_rate": 1.5164003590736452e-05, - "loss": 0.2131, + "epoch": 0.28, + "grad_norm": 0.2557647704909251, + "learning_rate": 1.6948510030560057e-05, + "loss": 0.1307, "step": 6048 }, { - "epoch": 0.35, - "grad_norm": 0.43565239129066136, - "learning_rate": 1.5162409913880927e-05, - "loss": 0.3494, + "epoch": 0.28, + "grad_norm": 0.5486500709570629, + "learning_rate": 1.694743991099005e-05, + "loss": 0.35, "step": 6049 }, { - "epoch": 0.35, - "grad_norm": 0.31162452510656125, - "learning_rate": 1.5160816058251367e-05, - "loss": 0.1867, + "epoch": 0.28, + "grad_norm": 0.45517943040362524, + "learning_rate": 1.6946369637610413e-05, + "loss": 0.2936, "step": 6050 }, { - "epoch": 0.35, - "grad_norm": 0.33539240652800983, - "learning_rate": 1.5159222023902969e-05, - "loss": 0.2238, + "epoch": 0.28, + "grad_norm": 1.5907954591387232, + "learning_rate": 1.6945299210444854e-05, + "loss": 0.5075, "step": 6051 }, { - "epoch": 0.35, - "grad_norm": 0.3631277574565755, - "learning_rate": 1.5157627810890937e-05, - "loss": 0.3246, + "epoch": 0.28, + "grad_norm": 0.8558424429360821, + "learning_rate": 1.694422862951706e-05, + "loss": 0.3208, "step": 6052 }, { - "epoch": 0.35, - "grad_norm": 0.744527008947136, - "learning_rate": 1.5156033419270472e-05, - "loss": 0.4741, + "epoch": 0.28, + "grad_norm": 0.3339334154799586, + "learning_rate": 1.694315789485074e-05, + "loss": 0.2779, "step": 6053 }, { - "epoch": 0.35, - "grad_norm": 0.3080561599256677, - "learning_rate": 1.5154438849096791e-05, - "loss": 0.2386, + "epoch": 0.28, + "grad_norm": 0.41894491336564543, + "learning_rate": 1.6942087006469593e-05, + "loss": 0.3042, "step": 6054 }, { - "epoch": 0.35, - "grad_norm": 0.9356964028488047, - "learning_rate": 1.5152844100425114e-05, - "loss": 0.5416, + "epoch": 0.28, + "grad_norm": 0.3447293429355375, + "learning_rate": 1.6941015964397335e-05, + "loss": 0.1583, "step": 6055 }, { - "epoch": 0.35, - "grad_norm": 0.24395016905721084, - "learning_rate": 1.5151249173310672e-05, - "loss": 0.2228, + "epoch": 0.28, + "grad_norm": 0.8016445606098862, + "learning_rate": 1.6939944768657667e-05, + "loss": 0.3299, "step": 6056 }, { - "epoch": 0.35, - "grad_norm": 0.36970463621043104, - "learning_rate": 1.5149654067808688e-05, - "loss": 0.2883, + "epoch": 0.28, + "grad_norm": 1.0262480428351024, + "learning_rate": 1.6938873419274317e-05, + "loss": 0.3908, "step": 6057 }, { - "epoch": 0.35, - "grad_norm": 0.9048703602569318, - "learning_rate": 1.5148058783974407e-05, - "loss": 0.4467, + "epoch": 0.28, + "grad_norm": 0.5059157558725144, + "learning_rate": 1.6937801916270994e-05, + "loss": 0.2791, "step": 6058 }, { - "epoch": 0.35, - "grad_norm": 0.695872288096352, - "learning_rate": 1.5146463321863069e-05, - "loss": 0.4602, + "epoch": 0.28, + "grad_norm": 0.41307188278869295, + "learning_rate": 1.6936730259671423e-05, + "loss": 0.3029, "step": 6059 }, { - "epoch": 0.35, - "grad_norm": 0.37486593573199756, - "learning_rate": 1.514486768152993e-05, - "loss": 0.2843, + "epoch": 0.28, + "grad_norm": 0.2878714921238245, + "learning_rate": 1.693565844949933e-05, + "loss": 0.1378, "step": 6060 }, { - "epoch": 0.35, - "grad_norm": 0.3653532580952253, - "learning_rate": 1.5143271863030244e-05, - "loss": 0.2839, + "epoch": 0.28, + "grad_norm": 0.348318136016693, + "learning_rate": 1.6934586485778447e-05, + "loss": 0.2673, "step": 6061 }, { - "epoch": 0.35, - "grad_norm": 0.2517009406199723, - "learning_rate": 1.5141675866419276e-05, - "loss": 0.1826, + "epoch": 0.28, + "grad_norm": 0.44898234027538114, + "learning_rate": 1.69335143685325e-05, + "loss": 0.2887, "step": 6062 }, { - "epoch": 0.35, - "grad_norm": 0.3704254478483168, - "learning_rate": 1.5140079691752293e-05, - "loss": 0.2775, + "epoch": 0.28, + "grad_norm": 0.827880079550586, + "learning_rate": 1.6932442097785223e-05, + "loss": 0.4794, "step": 6063 }, { - "epoch": 0.35, - "grad_norm": 0.46425654567960384, - "learning_rate": 1.5138483339084571e-05, - "loss": 0.3136, + "epoch": 0.28, + "grad_norm": 1.1689612534022173, + "learning_rate": 1.6931369673560366e-05, + "loss": 0.5734, "step": 6064 }, { - "epoch": 0.35, - "grad_norm": 0.959500141667516, - "learning_rate": 1.5136886808471389e-05, - "loss": 0.5221, + "epoch": 0.28, + "grad_norm": 0.41124638834998045, + "learning_rate": 1.693029709588166e-05, + "loss": 0.212, "step": 6065 }, { - "epoch": 0.35, - "grad_norm": 0.3435644123187082, - "learning_rate": 1.5135290099968043e-05, - "loss": 0.2827, + "epoch": 0.28, + "grad_norm": 0.28697558803579654, + "learning_rate": 1.692922436477286e-05, + "loss": 0.2121, "step": 6066 }, { - "epoch": 0.35, - "grad_norm": 1.0712411877235641, - "learning_rate": 1.5133693213629818e-05, - "loss": 0.3403, + "epoch": 0.28, + "grad_norm": 0.734994815884358, + "learning_rate": 1.6928151480257714e-05, + "loss": 0.529, "step": 6067 }, { - "epoch": 0.35, - "grad_norm": 0.2858791739075835, - "learning_rate": 1.513209614951202e-05, - "loss": 0.2423, + "epoch": 0.28, + "grad_norm": 0.38106499627715945, + "learning_rate": 1.6927078442359964e-05, + "loss": 0.266, "step": 6068 }, { - "epoch": 0.35, - "grad_norm": 0.4780296493408544, - "learning_rate": 1.5130498907669952e-05, - "loss": 0.2785, + "epoch": 0.28, + "grad_norm": 0.4141706436414943, + "learning_rate": 1.6926005251103375e-05, + "loss": 0.3363, "step": 6069 }, { - "epoch": 0.35, - "grad_norm": 0.8319222954221501, - "learning_rate": 1.512890148815893e-05, - "loss": 0.479, + "epoch": 0.28, + "grad_norm": 1.4455654506801, + "learning_rate": 1.6924931906511712e-05, + "loss": 0.6979, "step": 6070 }, { - "epoch": 0.35, - "grad_norm": 0.32882328520167026, - "learning_rate": 1.5127303891034264e-05, - "loss": 0.183, + "epoch": 0.28, + "grad_norm": 0.40147789886089097, + "learning_rate": 1.6923858408608725e-05, + "loss": 0.2373, "step": 6071 }, { - "epoch": 0.35, - "grad_norm": 0.33276901027696554, - "learning_rate": 1.5125706116351291e-05, - "loss": 0.279, + "epoch": 0.28, + "grad_norm": 0.5687400302165845, + "learning_rate": 1.6922784757418186e-05, + "loss": 0.2791, "step": 6072 }, { - "epoch": 0.35, - "grad_norm": 0.5268756881469756, - "learning_rate": 1.5124108164165333e-05, - "loss": 0.3849, + "epoch": 0.28, + "grad_norm": 0.39216835347918527, + "learning_rate": 1.6921710952963865e-05, + "loss": 0.3428, "step": 6073 }, { - "epoch": 0.35, - "grad_norm": 1.2489127782935394, - "learning_rate": 1.512251003453173e-05, - "loss": 0.4358, + "epoch": 0.28, + "grad_norm": 0.39676697199576405, + "learning_rate": 1.6920636995269538e-05, + "loss": 0.2624, "step": 6074 }, { - "epoch": 0.35, - "grad_norm": 0.34742346899867804, - "learning_rate": 1.5120911727505822e-05, - "loss": 0.2864, + "epoch": 0.28, + "grad_norm": 1.3725132668730633, + "learning_rate": 1.6919562884358974e-05, + "loss": 0.8211, "step": 6075 }, { - "epoch": 0.35, - "grad_norm": 0.537616793518178, - "learning_rate": 1.5119313243142964e-05, - "loss": 0.3793, + "epoch": 0.28, + "grad_norm": 0.40003579464518224, + "learning_rate": 1.691848862025596e-05, + "loss": 0.2199, "step": 6076 }, { - "epoch": 0.35, - "grad_norm": 0.4013084106067741, - "learning_rate": 1.5117714581498509e-05, - "loss": 0.1941, + "epoch": 0.28, + "grad_norm": 0.41450842589664544, + "learning_rate": 1.6917414202984276e-05, + "loss": 0.2788, "step": 6077 }, { - "epoch": 0.35, - "grad_norm": 0.38191970049336904, - "learning_rate": 1.5116115742627815e-05, - "loss": 0.2683, + "epoch": 0.28, + "grad_norm": 0.4211359398473587, + "learning_rate": 1.6916339632567708e-05, + "loss": 0.2763, "step": 6078 }, { - "epoch": 0.35, - "grad_norm": 0.8920627144260248, - "learning_rate": 1.5114516726586254e-05, - "loss": 0.5596, + "epoch": 0.28, + "grad_norm": 0.5568112877740296, + "learning_rate": 1.6915264909030045e-05, + "loss": 0.4786, "step": 6079 }, { - "epoch": 0.35, - "grad_norm": 0.4618446581767463, - "learning_rate": 1.51129175334292e-05, - "loss": 0.2894, + "epoch": 0.28, + "grad_norm": 0.47680523131497715, + "learning_rate": 1.691419003239508e-05, + "loss": 0.2764, "step": 6080 }, { - "epoch": 0.35, - "grad_norm": 0.43028981687553913, - "learning_rate": 1.5111318163212032e-05, - "loss": 0.2758, + "epoch": 0.28, + "grad_norm": 0.2917364824866476, + "learning_rate": 1.691311500268662e-05, + "loss": 0.274, "step": 6081 }, { - "epoch": 0.35, - "grad_norm": 0.6246636674476441, - "learning_rate": 1.5109718615990135e-05, - "loss": 0.397, + "epoch": 0.28, + "grad_norm": 0.29228890359227333, + "learning_rate": 1.691203981992845e-05, + "loss": 0.1753, "step": 6082 }, { - "epoch": 0.35, - "grad_norm": 0.2595488255827491, - "learning_rate": 1.51081188918189e-05, - "loss": 0.2166, + "epoch": 0.28, + "grad_norm": 0.4614316109945287, + "learning_rate": 1.691096448414438e-05, + "loss": 0.274, "step": 6083 }, { - "epoch": 0.35, - "grad_norm": 0.3366273152459292, - "learning_rate": 1.5106518990753731e-05, - "loss": 0.2066, + "epoch": 0.28, + "grad_norm": 0.45011371996749644, + "learning_rate": 1.6909888995358218e-05, + "loss": 0.3118, "step": 6084 }, { - "epoch": 0.35, - "grad_norm": 0.5509473415305383, - "learning_rate": 1.5104918912850029e-05, - "loss": 0.3952, + "epoch": 0.28, + "grad_norm": 0.4074407229846666, + "learning_rate": 1.6908813353593777e-05, + "loss": 0.3055, "step": 6085 }, { - "epoch": 0.35, - "grad_norm": 1.168537237871508, - "learning_rate": 1.5103318658163202e-05, - "loss": 0.6159, + "epoch": 0.28, + "grad_norm": 0.41330223814334716, + "learning_rate": 1.6907737558874865e-05, + "loss": 0.3149, "step": 6086 }, { - "epoch": 0.35, - "grad_norm": 0.44113838145483497, - "learning_rate": 1.5101718226748673e-05, - "loss": 0.2563, + "epoch": 0.28, + "grad_norm": 0.2545919589818811, + "learning_rate": 1.69066616112253e-05, + "loss": 0.1596, "step": 6087 }, { - "epoch": 0.35, - "grad_norm": 0.42233647100665583, - "learning_rate": 1.5100117618661856e-05, - "loss": 0.3352, + "epoch": 0.28, + "grad_norm": 1.3876838952017077, + "learning_rate": 1.69055855106689e-05, + "loss": 0.5954, "step": 6088 }, { - "epoch": 0.35, - "grad_norm": 1.153551365592648, - "learning_rate": 1.5098516833958187e-05, - "loss": 0.7705, + "epoch": 0.28, + "grad_norm": 0.34892759320725875, + "learning_rate": 1.69045092572295e-05, + "loss": 0.2682, "step": 6089 }, { - "epoch": 0.35, - "grad_norm": 0.21605941110243826, - "learning_rate": 1.50969158726931e-05, - "loss": 0.1453, + "epoch": 0.28, + "grad_norm": 0.46515044839801595, + "learning_rate": 1.6903432850930917e-05, + "loss": 0.3354, "step": 6090 }, { - "epoch": 0.35, - "grad_norm": 1.1214770160197514, - "learning_rate": 1.5095314734922037e-05, - "loss": 0.6139, + "epoch": 0.28, + "grad_norm": 0.6463762831254736, + "learning_rate": 1.6902356291796988e-05, + "loss": 0.3963, "step": 6091 }, { - "epoch": 0.35, - "grad_norm": 0.4166865421164272, - "learning_rate": 1.509371342070044e-05, - "loss": 0.323, + "epoch": 0.28, + "grad_norm": 0.37110478885632686, + "learning_rate": 1.690127957985154e-05, + "loss": 0.2721, "step": 6092 }, { - "epoch": 0.35, - "grad_norm": 0.3283893206173644, - "learning_rate": 1.509211193008377e-05, - "loss": 0.2456, + "epoch": 0.28, + "grad_norm": 0.5141544951089635, + "learning_rate": 1.6900202715118408e-05, + "loss": 0.3917, "step": 6093 }, { - "epoch": 0.35, - "grad_norm": 1.0555578416538822, - "learning_rate": 1.509051026312748e-05, - "loss": 0.6061, + "epoch": 0.28, + "grad_norm": 0.2299420957358207, + "learning_rate": 1.6899125697621442e-05, + "loss": 0.1728, "step": 6094 }, { - "epoch": 0.35, - "grad_norm": 0.29921174955999685, - "learning_rate": 1.508890841988704e-05, - "loss": 0.2598, + "epoch": 0.28, + "grad_norm": 0.4430942682785905, + "learning_rate": 1.6898048527384486e-05, + "loss": 0.3086, "step": 6095 }, { - "epoch": 0.35, - "grad_norm": 0.3149749288748337, - "learning_rate": 1.5087306400417921e-05, - "loss": 0.2747, + "epoch": 0.28, + "grad_norm": 0.6352324692695902, + "learning_rate": 1.6896971204431383e-05, + "loss": 0.3896, "step": 6096 }, { - "epoch": 0.35, - "grad_norm": 0.31212173256557457, - "learning_rate": 1.5085704204775598e-05, - "loss": 0.1728, + "epoch": 0.28, + "grad_norm": 0.3987868557716098, + "learning_rate": 1.6895893728785982e-05, + "loss": 0.2843, "step": 6097 }, { - "epoch": 0.35, - "grad_norm": 0.5652954029006605, - "learning_rate": 1.508410183301556e-05, - "loss": 0.4581, + "epoch": 0.28, + "grad_norm": 0.42551849940246234, + "learning_rate": 1.6894816100472145e-05, + "loss": 0.2786, "step": 6098 }, { - "epoch": 0.35, - "grad_norm": 0.6856421881804132, - "learning_rate": 1.508249928519329e-05, - "loss": 0.3919, + "epoch": 0.28, + "grad_norm": 0.549571389187052, + "learning_rate": 1.689373831951372e-05, + "loss": 0.4, "step": 6099 }, { - "epoch": 0.35, - "grad_norm": 0.30656764100593287, - "learning_rate": 1.5080896561364293e-05, - "loss": 0.2557, + "epoch": 0.28, + "grad_norm": 0.2723232488536985, + "learning_rate": 1.6892660385934573e-05, + "loss": 0.18, "step": 6100 }, { - "epoch": 0.35, - "grad_norm": 0.4687395321208568, - "learning_rate": 1.5079293661584063e-05, - "loss": 0.3314, + "epoch": 0.28, + "grad_norm": 0.44022394434425827, + "learning_rate": 1.689158229975857e-05, + "loss": 0.2978, "step": 6101 }, { - "epoch": 0.35, - "grad_norm": 0.3349070250202825, - "learning_rate": 1.5077690585908113e-05, - "loss": 0.2066, + "epoch": 0.28, + "grad_norm": 0.4916255036387172, + "learning_rate": 1.689050406100958e-05, + "loss": 0.4022, "step": 6102 }, { - "epoch": 0.35, - "grad_norm": 0.3640182696226341, - "learning_rate": 1.5076087334391957e-05, - "loss": 0.2836, + "epoch": 0.28, + "grad_norm": 1.0689724710598505, + "learning_rate": 1.6889425669711465e-05, + "loss": 0.6996, "step": 6103 }, { - "epoch": 0.35, - "grad_norm": 0.34746788590963384, - "learning_rate": 1.5074483907091115e-05, - "loss": 0.3276, + "epoch": 0.28, + "grad_norm": 0.3789200014912838, + "learning_rate": 1.688834712588811e-05, + "loss": 0.2177, "step": 6104 }, { - "epoch": 0.35, - "grad_norm": 0.3712053354906611, - "learning_rate": 1.5072880304061112e-05, - "loss": 0.3489, + "epoch": 0.28, + "grad_norm": 0.3720698295016904, + "learning_rate": 1.6887268429563387e-05, + "loss": 0.3069, "step": 6105 }, { - "epoch": 0.35, - "grad_norm": 0.4718096602897961, - "learning_rate": 1.5071276525357486e-05, - "loss": 0.3113, + "epoch": 0.28, + "grad_norm": 0.3285228171657959, + "learning_rate": 1.6886189580761182e-05, + "loss": 0.2083, "step": 6106 }, { - "epoch": 0.35, - "grad_norm": 0.5120999080003616, - "learning_rate": 1.5069672571035766e-05, - "loss": 0.3747, + "epoch": 0.28, + "grad_norm": 0.3871790844958291, + "learning_rate": 1.6885110579505376e-05, + "loss": 0.2608, "step": 6107 }, { - "epoch": 0.35, - "grad_norm": 0.28354797155566364, - "learning_rate": 1.506806844115151e-05, - "loss": 0.2414, + "epoch": 0.28, + "grad_norm": 0.5183039018569343, + "learning_rate": 1.6884031425819852e-05, + "loss": 0.3943, "step": 6108 }, { - "epoch": 0.35, - "grad_norm": 0.37166878535602804, - "learning_rate": 1.5066464135760254e-05, - "loss": 0.2915, + "epoch": 0.28, + "grad_norm": 0.3744984451984501, + "learning_rate": 1.6882952119728513e-05, + "loss": 0.3436, "step": 6109 }, { - "epoch": 0.35, - "grad_norm": 0.5029551308263018, - "learning_rate": 1.506485965491757e-05, - "loss": 0.3747, + "epoch": 0.28, + "grad_norm": 0.34136526441459136, + "learning_rate": 1.6881872661255246e-05, + "loss": 0.2276, "step": 6110 }, { - "epoch": 0.35, - "grad_norm": 0.4002329105834612, - "learning_rate": 1.5063254998679009e-05, - "loss": 0.2825, + "epoch": 0.28, + "grad_norm": 1.482088844935787, + "learning_rate": 1.6880793050423953e-05, + "loss": 0.8034, "step": 6111 }, { - "epoch": 0.35, - "grad_norm": 0.3814977933139438, - "learning_rate": 1.5061650167100146e-05, - "loss": 0.2968, + "epoch": 0.28, + "grad_norm": 0.358724382271073, + "learning_rate": 1.687971328725853e-05, + "loss": 0.2686, "step": 6112 }, { - "epoch": 0.35, - "grad_norm": 0.829273859116276, - "learning_rate": 1.5060045160236556e-05, - "loss": 0.452, + "epoch": 0.28, + "grad_norm": 0.3051728609100929, + "learning_rate": 1.6878633371782888e-05, + "loss": 0.2411, "step": 6113 }, { - "epoch": 0.35, - "grad_norm": 0.27937980686236547, - "learning_rate": 1.505843997814382e-05, - "loss": 0.2292, + "epoch": 0.28, + "grad_norm": 0.3740353912766072, + "learning_rate": 1.6877553304020932e-05, + "loss": 0.2909, "step": 6114 }, { - "epoch": 0.35, - "grad_norm": 0.47676045935760386, - "learning_rate": 1.5056834620877525e-05, - "loss": 0.312, + "epoch": 0.28, + "grad_norm": 1.188675630684188, + "learning_rate": 1.6876473083996577e-05, + "loss": 0.7182, "step": 6115 }, { - "epoch": 0.35, - "grad_norm": 0.3451101796926662, - "learning_rate": 1.5055229088493264e-05, - "loss": 0.2801, + "epoch": 0.28, + "grad_norm": 0.8364849318520382, + "learning_rate": 1.6875392711733734e-05, + "loss": 0.4632, "step": 6116 }, { - "epoch": 0.35, - "grad_norm": 0.579516426649944, - "learning_rate": 1.5053623381046639e-05, - "loss": 0.394, + "epoch": 0.28, + "grad_norm": 0.31318200167225707, + "learning_rate": 1.687431218725632e-05, + "loss": 0.2528, "step": 6117 }, { - "epoch": 0.35, - "grad_norm": 0.44553554966049524, - "learning_rate": 1.505201749859325e-05, - "loss": 0.3536, + "epoch": 0.28, + "grad_norm": 0.6102266840681903, + "learning_rate": 1.687323151058826e-05, + "loss": 0.4819, "step": 6118 }, { - "epoch": 0.35, - "grad_norm": 0.3590680813627751, - "learning_rate": 1.5050411441188714e-05, - "loss": 0.2677, + "epoch": 0.28, + "grad_norm": 0.5123954609596111, + "learning_rate": 1.6872150681753483e-05, + "loss": 0.3207, "step": 6119 }, { - "epoch": 0.35, - "grad_norm": 0.5367161392325959, - "learning_rate": 1.5048805208888651e-05, - "loss": 0.3061, + "epoch": 0.28, + "grad_norm": 0.25815428061903417, + "learning_rate": 1.6871069700775914e-05, + "loss": 0.177, "step": 6120 }, { - "epoch": 0.35, - "grad_norm": 0.4162731031659161, - "learning_rate": 1.5047198801748677e-05, - "loss": 0.3569, + "epoch": 0.28, + "grad_norm": 0.43121083925553133, + "learning_rate": 1.686998856767948e-05, + "loss": 0.309, "step": 6121 }, { - "epoch": 0.35, - "grad_norm": 0.2970681347926114, - "learning_rate": 1.5045592219824423e-05, - "loss": 0.2119, + "epoch": 0.28, + "grad_norm": 0.6689713108866191, + "learning_rate": 1.6868907282488123e-05, + "loss": 0.3585, "step": 6122 }, { - "epoch": 0.35, - "grad_norm": 0.35046893311451455, - "learning_rate": 1.5043985463171532e-05, - "loss": 0.2086, + "epoch": 0.28, + "grad_norm": 0.39755458221148543, + "learning_rate": 1.6867825845225775e-05, + "loss": 0.2721, "step": 6123 }, { - "epoch": 0.35, - "grad_norm": 0.313277427937686, - "learning_rate": 1.5042378531845638e-05, - "loss": 0.3118, + "epoch": 0.28, + "grad_norm": 0.5917405356812365, + "learning_rate": 1.686674425591639e-05, + "loss": 0.3943, "step": 6124 }, { - "epoch": 0.35, - "grad_norm": 1.2384222887646135, - "learning_rate": 1.5040771425902393e-05, - "loss": 0.744, + "epoch": 0.28, + "grad_norm": 0.40517840206435146, + "learning_rate": 1.68656625145839e-05, + "loss": 0.3089, "step": 6125 }, { - "epoch": 0.35, - "grad_norm": 0.3171223595122585, - "learning_rate": 1.503916414539745e-05, - "loss": 0.2281, + "epoch": 0.28, + "grad_norm": 0.31816780513752596, + "learning_rate": 1.686458062125226e-05, + "loss": 0.2008, "step": 6126 }, { - "epoch": 0.35, - "grad_norm": 0.413448931832896, - "learning_rate": 1.5037556690386472e-05, - "loss": 0.389, + "epoch": 0.28, + "grad_norm": 0.4010119293325127, + "learning_rate": 1.686349857594542e-05, + "loss": 0.2866, "step": 6127 }, { - "epoch": 0.35, - "grad_norm": 0.23777876075584098, - "learning_rate": 1.5035949060925118e-05, - "loss": 0.2254, + "epoch": 0.28, + "grad_norm": 0.4827422719525275, + "learning_rate": 1.686241637868734e-05, + "loss": 0.3284, "step": 6128 }, { - "epoch": 0.35, - "grad_norm": 0.3183163904203893, - "learning_rate": 1.5034341257069072e-05, - "loss": 0.2173, + "epoch": 0.28, + "grad_norm": 0.42813336158300624, + "learning_rate": 1.686133402950197e-05, + "loss": 0.2933, "step": 6129 }, { - "epoch": 0.35, - "grad_norm": 0.5650146581955648, - "learning_rate": 1.5032733278873996e-05, - "loss": 0.4094, + "epoch": 0.28, + "grad_norm": 0.7179846045814073, + "learning_rate": 1.6860251528413282e-05, + "loss": 0.3707, "step": 6130 }, { - "epoch": 0.35, - "grad_norm": 0.513930176388067, - "learning_rate": 1.5031125126395589e-05, - "loss": 0.4267, + "epoch": 0.28, + "grad_norm": 0.48624409515335804, + "learning_rate": 1.6859168875445236e-05, + "loss": 0.2945, "step": 6131 }, { - "epoch": 0.35, - "grad_norm": 0.3492885607799953, - "learning_rate": 1.5029516799689533e-05, - "loss": 0.3188, + "epoch": 0.28, + "grad_norm": 0.33958895094849484, + "learning_rate": 1.68580860706218e-05, + "loss": 0.1913, "step": 6132 }, { - "epoch": 0.35, - "grad_norm": 0.3884188797338796, - "learning_rate": 1.5027908298811527e-05, - "loss": 0.2838, + "epoch": 0.28, + "grad_norm": 0.42909751293322845, + "learning_rate": 1.685700311396695e-05, + "loss": 0.2727, "step": 6133 }, { - "epoch": 0.35, - "grad_norm": 0.28370779822743003, - "learning_rate": 1.5026299623817273e-05, - "loss": 0.2077, + "epoch": 0.28, + "grad_norm": 0.9082685050114968, + "learning_rate": 1.685592000550466e-05, + "loss": 0.5663, "step": 6134 }, { - "epoch": 0.35, - "grad_norm": 0.4015496523870909, - "learning_rate": 1.5024690774762478e-05, - "loss": 0.3009, + "epoch": 0.28, + "grad_norm": 0.4111659462283185, + "learning_rate": 1.685483674525891e-05, + "loss": 0.3172, "step": 6135 }, { - "epoch": 0.35, - "grad_norm": 0.3578652014760878, - "learning_rate": 1.5023081751702857e-05, - "loss": 0.2493, + "epoch": 0.28, + "grad_norm": 0.42576452363918815, + "learning_rate": 1.685375333325368e-05, + "loss": 0.3157, "step": 6136 }, { - "epoch": 0.35, - "grad_norm": 0.7011799483411165, - "learning_rate": 1.5021472554694134e-05, - "loss": 0.46, + "epoch": 0.28, + "grad_norm": 0.7430281881367897, + "learning_rate": 1.685266976951296e-05, + "loss": 0.4179, "step": 6137 }, { - "epoch": 0.35, - "grad_norm": 0.5617022132112179, - "learning_rate": 1.501986318379203e-05, - "loss": 0.4276, + "epoch": 0.28, + "grad_norm": 0.3274380332259152, + "learning_rate": 1.6851586054060734e-05, + "loss": 0.2378, "step": 6138 }, { - "epoch": 0.35, - "grad_norm": 0.3212717857556724, - "learning_rate": 1.501825363905228e-05, - "loss": 0.2388, + "epoch": 0.28, + "grad_norm": 0.2979767455354676, + "learning_rate": 1.6850502186920998e-05, + "loss": 0.1382, "step": 6139 }, { - "epoch": 0.35, - "grad_norm": 0.25259321483356195, - "learning_rate": 1.5016643920530625e-05, - "loss": 0.2136, + "epoch": 0.28, + "grad_norm": 0.4076645942048964, + "learning_rate": 1.684941816811774e-05, + "loss": 0.291, "step": 6140 }, { - "epoch": 0.35, - "grad_norm": 0.7643284203192063, - "learning_rate": 1.5015034028282802e-05, - "loss": 0.4888, + "epoch": 0.28, + "grad_norm": 0.33549334423819066, + "learning_rate": 1.684833399767497e-05, + "loss": 0.3082, "step": 6141 }, { - "epoch": 0.35, - "grad_norm": 0.3205694041270008, - "learning_rate": 1.5013423962364571e-05, - "loss": 0.2469, + "epoch": 0.28, + "grad_norm": 0.6949932639774373, + "learning_rate": 1.6847249675616685e-05, + "loss": 0.4677, "step": 6142 }, { - "epoch": 0.35, - "grad_norm": 0.47535488032774514, - "learning_rate": 1.501181372283168e-05, - "loss": 0.3845, + "epoch": 0.28, + "grad_norm": 0.40695497359213256, + "learning_rate": 1.684616520196689e-05, + "loss": 0.1946, "step": 6143 }, { - "epoch": 0.35, - "grad_norm": 0.5102639689359593, - "learning_rate": 1.5010203309739897e-05, - "loss": 0.3412, + "epoch": 0.28, + "grad_norm": 0.33476583211033745, + "learning_rate": 1.6845080576749597e-05, + "loss": 0.2583, "step": 6144 }, { - "epoch": 0.35, - "grad_norm": 0.34843918812053276, - "learning_rate": 1.5008592723144987e-05, - "loss": 0.3098, + "epoch": 0.28, + "grad_norm": 0.32779572957822056, + "learning_rate": 1.684399579998882e-05, + "loss": 0.2542, "step": 6145 }, { - "epoch": 0.35, - "grad_norm": 0.23040537290013943, - "learning_rate": 1.500698196310273e-05, - "loss": 0.0713, + "epoch": 0.28, + "grad_norm": 0.420084232412222, + "learning_rate": 1.6842910871708567e-05, + "loss": 0.2605, "step": 6146 }, { - "epoch": 0.35, - "grad_norm": 0.4026091520264639, - "learning_rate": 1.5005371029668899e-05, - "loss": 0.2918, + "epoch": 0.28, + "grad_norm": 0.4490139826704272, + "learning_rate": 1.684182579193287e-05, + "loss": 0.3492, "step": 6147 }, { - "epoch": 0.35, - "grad_norm": 0.3332787714379259, - "learning_rate": 1.5003759922899286e-05, - "loss": 0.2908, + "epoch": 0.28, + "grad_norm": 0.4775241914190654, + "learning_rate": 1.684074056068574e-05, + "loss": 0.3807, "step": 6148 }, { - "epoch": 0.35, - "grad_norm": 0.8116793651226156, - "learning_rate": 1.5002148642849683e-05, - "loss": 0.3831, + "epoch": 0.28, + "grad_norm": 0.5671926803838095, + "learning_rate": 1.683965517799121e-05, + "loss": 0.2295, "step": 6149 }, { - "epoch": 0.35, - "grad_norm": 0.4649050578936953, - "learning_rate": 1.5000537189575885e-05, - "loss": 0.3492, + "epoch": 0.28, + "grad_norm": 0.4400387959390509, + "learning_rate": 1.6838569643873298e-05, + "loss": 0.3444, "step": 6150 }, { - "epoch": 0.35, - "grad_norm": 0.398384632333775, - "learning_rate": 1.4998925563133702e-05, - "loss": 0.3362, + "epoch": 0.28, + "grad_norm": 0.2887850133710706, + "learning_rate": 1.6837483958356054e-05, + "loss": 0.223, "step": 6151 }, { - "epoch": 0.35, - "grad_norm": 0.2859884604619067, - "learning_rate": 1.499731376357894e-05, - "loss": 0.1982, + "epoch": 0.28, + "grad_norm": 0.3737983166875972, + "learning_rate": 1.6836398121463503e-05, + "loss": 0.2646, "step": 6152 }, { - "epoch": 0.35, - "grad_norm": 1.2499071609751304, - "learning_rate": 1.499570179096742e-05, - "loss": 0.8175, + "epoch": 0.28, + "grad_norm": 0.41767497772136275, + "learning_rate": 1.6835312133219686e-05, + "loss": 0.3283, "step": 6153 }, { - "epoch": 0.35, - "grad_norm": 0.35604390551676335, - "learning_rate": 1.499408964535496e-05, - "loss": 0.2641, + "epoch": 0.28, + "grad_norm": 0.7531149947347106, + "learning_rate": 1.6834225993648645e-05, + "loss": 0.5021, "step": 6154 }, { - "epoch": 0.35, - "grad_norm": 0.6096935274842166, - "learning_rate": 1.499247732679739e-05, - "loss": 0.3215, + "epoch": 0.28, + "grad_norm": 1.6108136139877414, + "learning_rate": 1.6833139702774427e-05, + "loss": 0.761, "step": 6155 }, { - "epoch": 0.35, - "grad_norm": 0.4061485551230446, - "learning_rate": 1.4990864835350544e-05, - "loss": 0.1853, + "epoch": 0.28, + "grad_norm": 0.34446432557055656, + "learning_rate": 1.6832053260621087e-05, + "loss": 0.217, "step": 6156 }, { - "epoch": 0.35, - "grad_norm": 0.3753388063151038, - "learning_rate": 1.4989252171070265e-05, - "loss": 0.2943, + "epoch": 0.28, + "grad_norm": 0.37263497557451325, + "learning_rate": 1.6830966667212666e-05, + "loss": 0.2816, "step": 6157 }, { - "epoch": 0.35, - "grad_norm": 0.43330743666285093, - "learning_rate": 1.4987639334012398e-05, - "loss": 0.2916, + "epoch": 0.28, + "grad_norm": 0.9423084149383641, + "learning_rate": 1.682987992257323e-05, + "loss": 0.498, "step": 6158 }, { - "epoch": 0.35, - "grad_norm": 0.3607792270908736, - "learning_rate": 1.4986026324232796e-05, - "loss": 0.2567, + "epoch": 0.28, + "grad_norm": 0.388627776621658, + "learning_rate": 1.6828793026726832e-05, + "loss": 0.2476, "step": 6159 }, { - "epoch": 0.35, - "grad_norm": 0.41735006093173743, - "learning_rate": 1.4984413141787312e-05, - "loss": 0.3247, + "epoch": 0.28, + "grad_norm": 0.4172312075947139, + "learning_rate": 1.6827705979697543e-05, + "loss": 0.3124, "step": 6160 }, { - "epoch": 0.35, - "grad_norm": 0.6882683097516364, - "learning_rate": 1.498279978673182e-05, - "loss": 0.4989, + "epoch": 0.28, + "grad_norm": 0.4529089485767976, + "learning_rate": 1.6826618781509424e-05, + "loss": 0.3475, "step": 6161 }, { - "epoch": 0.35, - "grad_norm": 0.33450054220257863, - "learning_rate": 1.4981186259122185e-05, - "loss": 0.176, + "epoch": 0.28, + "grad_norm": 0.3372153196100555, + "learning_rate": 1.6825531432186545e-05, + "loss": 0.2345, "step": 6162 }, { - "epoch": 0.35, - "grad_norm": 0.3138479440645983, - "learning_rate": 1.4979572559014284e-05, - "loss": 0.2784, + "epoch": 0.28, + "grad_norm": 0.4599082257205266, + "learning_rate": 1.6824443931752975e-05, + "loss": 0.3293, "step": 6163 }, { - "epoch": 0.35, - "grad_norm": 0.9177725613411829, - "learning_rate": 1.4977958686463998e-05, - "loss": 0.6058, + "epoch": 0.28, + "grad_norm": 0.38903388561941266, + "learning_rate": 1.6823356280232796e-05, + "loss": 0.3224, "step": 6164 }, { - "epoch": 0.35, - "grad_norm": 0.43862792944574786, - "learning_rate": 1.497634464152722e-05, - "loss": 0.2266, + "epoch": 0.28, + "grad_norm": 0.396568310019041, + "learning_rate": 1.682226847765008e-05, + "loss": 0.2574, "step": 6165 }, { - "epoch": 0.35, - "grad_norm": 0.4316088985564484, - "learning_rate": 1.4974730424259836e-05, - "loss": 0.3325, + "epoch": 0.28, + "grad_norm": 0.4503887692573361, + "learning_rate": 1.6821180524028923e-05, + "loss": 0.3259, "step": 6166 }, { - "epoch": 0.35, - "grad_norm": 0.4754951018407942, - "learning_rate": 1.4973116034717754e-05, - "loss": 0.3411, + "epoch": 0.28, + "grad_norm": 0.8104955755343283, + "learning_rate": 1.6820092419393402e-05, + "loss": 0.4164, "step": 6167 }, { - "epoch": 0.35, - "grad_norm": 0.25015470689457353, - "learning_rate": 1.4971501472956875e-05, - "loss": 0.1052, + "epoch": 0.28, + "grad_norm": 0.4170117764541688, + "learning_rate": 1.6819004163767603e-05, + "loss": 0.2852, "step": 6168 }, { - "epoch": 0.35, - "grad_norm": 0.46799978705707523, - "learning_rate": 1.4969886739033116e-05, - "loss": 0.3381, + "epoch": 0.28, + "grad_norm": 0.40572143292102075, + "learning_rate": 1.681791575717563e-05, + "loss": 0.3082, "step": 6169 }, { - "epoch": 0.35, - "grad_norm": 1.5456881995311484, - "learning_rate": 1.4968271833002393e-05, - "loss": 0.8595, + "epoch": 0.28, + "grad_norm": 0.7091611557836884, + "learning_rate": 1.681682719964157e-05, + "loss": 0.5656, "step": 6170 }, { - "epoch": 0.35, - "grad_norm": 0.35870092364375566, - "learning_rate": 1.4966656754920635e-05, - "loss": 0.3482, + "epoch": 0.28, + "grad_norm": 0.33634916313020513, + "learning_rate": 1.6815738491189527e-05, + "loss": 0.2604, "step": 6171 }, { - "epoch": 0.35, - "grad_norm": 0.32167903780767804, - "learning_rate": 1.496504150484376e-05, - "loss": 0.2257, + "epoch": 0.28, + "grad_norm": 0.2682924595643512, + "learning_rate": 1.6814649631843604e-05, + "loss": 0.1954, "step": 6172 }, { - "epoch": 0.35, - "grad_norm": 0.4229197497644949, - "learning_rate": 1.4963426082827714e-05, - "loss": 0.3368, + "epoch": 0.28, + "grad_norm": 1.0169714608082723, + "learning_rate": 1.6813560621627907e-05, + "loss": 0.6078, "step": 6173 }, { - "epoch": 0.35, - "grad_norm": 0.47531000042653976, - "learning_rate": 1.4961810488928434e-05, - "loss": 0.2762, + "epoch": 0.28, + "grad_norm": 0.37130390016311093, + "learning_rate": 1.681247146056654e-05, + "loss": 0.2723, "step": 6174 }, { - "epoch": 0.35, - "grad_norm": 0.3133918190580812, - "learning_rate": 1.4960194723201873e-05, - "loss": 0.2267, + "epoch": 0.28, + "grad_norm": 0.7861306371018686, + "learning_rate": 1.6811382148683627e-05, + "loss": 0.379, "step": 6175 }, { - "epoch": 0.35, - "grad_norm": 1.041910669070392, - "learning_rate": 1.4958578785703982e-05, - "loss": 0.6656, + "epoch": 0.28, + "grad_norm": 0.4085449472360082, + "learning_rate": 1.6810292686003275e-05, + "loss": 0.3176, "step": 6176 }, { - "epoch": 0.35, - "grad_norm": 0.827017268485664, - "learning_rate": 1.4956962676490719e-05, - "loss": 0.5432, + "epoch": 0.28, + "grad_norm": 0.3986526865305402, + "learning_rate": 1.680920307254961e-05, + "loss": 0.3029, "step": 6177 }, { - "epoch": 0.35, - "grad_norm": 0.3650626346399905, - "learning_rate": 1.495534639561805e-05, - "loss": 0.2469, + "epoch": 0.28, + "grad_norm": 0.444549614434705, + "learning_rate": 1.6808113308346744e-05, + "loss": 0.2624, "step": 6178 }, { - "epoch": 0.36, - "grad_norm": 0.3928206475372948, - "learning_rate": 1.4953729943141952e-05, - "loss": 0.3242, + "epoch": 0.28, + "grad_norm": 0.33771641695056204, + "learning_rate": 1.6807023393418814e-05, + "loss": 0.2158, "step": 6179 }, { - "epoch": 0.36, - "grad_norm": 0.284766364444701, - "learning_rate": 1.49521133191184e-05, - "loss": 0.1743, + "epoch": 0.28, + "grad_norm": 0.33512782605013125, + "learning_rate": 1.680593332778995e-05, + "loss": 0.2657, "step": 6180 }, { - "epoch": 0.36, - "grad_norm": 0.3653187691143623, - "learning_rate": 1.4950496523603373e-05, - "loss": 0.2148, + "epoch": 0.28, + "grad_norm": 0.9359137319840171, + "learning_rate": 1.680484311148428e-05, + "loss": 0.5217, "step": 6181 }, { - "epoch": 0.36, - "grad_norm": 0.9229835999005866, - "learning_rate": 1.4948879556652866e-05, - "loss": 0.5495, + "epoch": 0.28, + "grad_norm": 0.6859444847448948, + "learning_rate": 1.6803752744525944e-05, + "loss": 0.3614, "step": 6182 }, { - "epoch": 0.36, - "grad_norm": 0.4713117458300445, - "learning_rate": 1.4947262418322872e-05, - "loss": 0.3238, + "epoch": 0.28, + "grad_norm": 0.5275907568231512, + "learning_rate": 1.6802662226939077e-05, + "loss": 0.3205, "step": 6183 }, { - "epoch": 0.36, - "grad_norm": 0.4509127635088528, - "learning_rate": 1.4945645108669395e-05, - "loss": 0.2793, + "epoch": 0.28, + "grad_norm": 0.32306567555137367, + "learning_rate": 1.6801571558747827e-05, + "loss": 0.2753, "step": 6184 }, { - "epoch": 0.36, - "grad_norm": 0.4986348814970563, - "learning_rate": 1.4944027627748438e-05, - "loss": 0.3296, + "epoch": 0.28, + "grad_norm": 0.30279117848793197, + "learning_rate": 1.6800480739976336e-05, + "loss": 0.1034, "step": 6185 }, { - "epoch": 0.36, - "grad_norm": 0.2946512585424218, - "learning_rate": 1.4942409975616019e-05, - "loss": 0.1613, + "epoch": 0.28, + "grad_norm": 0.42352297994000604, + "learning_rate": 1.6799389770648757e-05, + "loss": 0.312, "step": 6186 }, { - "epoch": 0.36, - "grad_norm": 0.3968498037859521, - "learning_rate": 1.4940792152328156e-05, - "loss": 0.2815, + "epoch": 0.28, + "grad_norm": 1.268347074985257, + "learning_rate": 1.6798298650789243e-05, + "loss": 0.5, "step": 6187 }, { - "epoch": 0.36, - "grad_norm": 0.3634508359888088, - "learning_rate": 1.4939174157940872e-05, - "loss": 0.2695, + "epoch": 0.28, + "grad_norm": 0.3486603125602323, + "learning_rate": 1.6797207380421946e-05, + "loss": 0.2772, "step": 6188 }, { - "epoch": 0.36, - "grad_norm": 0.6800060796414303, - "learning_rate": 1.4937555992510198e-05, - "loss": 0.4762, + "epoch": 0.28, + "grad_norm": 0.3596056668236512, + "learning_rate": 1.679611595957103e-05, + "loss": 0.2817, "step": 6189 }, { - "epoch": 0.36, - "grad_norm": 0.34781887939904654, - "learning_rate": 1.4935937656092175e-05, - "loss": 0.2857, + "epoch": 0.28, + "grad_norm": 0.3489577443385812, + "learning_rate": 1.679502438826066e-05, + "loss": 0.2316, "step": 6190 }, { - "epoch": 0.36, - "grad_norm": 0.3356382407547884, - "learning_rate": 1.493431914874284e-05, - "loss": 0.2678, + "epoch": 0.28, + "grad_norm": 0.9594459643418749, + "learning_rate": 1.6793932666514993e-05, + "loss": 0.6075, "step": 6191 }, { - "epoch": 0.36, - "grad_norm": 0.2680360999255305, - "learning_rate": 1.4932700470518247e-05, - "loss": 0.1794, + "epoch": 0.28, + "grad_norm": 0.31062788415925635, + "learning_rate": 1.679284079435821e-05, + "loss": 0.2363, "step": 6192 }, { - "epoch": 0.36, - "grad_norm": 0.36281121646648407, - "learning_rate": 1.4931081621474448e-05, - "loss": 0.2694, + "epoch": 0.28, + "grad_norm": 0.8074440562023991, + "learning_rate": 1.679174877181448e-05, + "loss": 0.4801, "step": 6193 }, { - "epoch": 0.36, - "grad_norm": 0.7202442627048605, - "learning_rate": 1.4929462601667504e-05, - "loss": 0.3639, + "epoch": 0.28, + "grad_norm": 1.3176445324324617, + "learning_rate": 1.6790656598907972e-05, + "loss": 0.7438, "step": 6194 }, { - "epoch": 0.36, - "grad_norm": 0.38416910639035173, - "learning_rate": 1.4927843411153481e-05, - "loss": 0.3403, + "epoch": 0.28, + "grad_norm": 0.35937571183002975, + "learning_rate": 1.678956427566288e-05, + "loss": 0.2186, "step": 6195 }, { - "epoch": 0.36, - "grad_norm": 0.33023100678471856, - "learning_rate": 1.4926224049988456e-05, - "loss": 0.2986, + "epoch": 0.28, + "grad_norm": 0.39339232038619826, + "learning_rate": 1.6788471802103373e-05, + "loss": 0.3179, "step": 6196 }, { - "epoch": 0.36, - "grad_norm": 1.2815573161505327, - "learning_rate": 1.4924604518228503e-05, - "loss": 0.8653, + "epoch": 0.28, + "grad_norm": 0.33535457427796556, + "learning_rate": 1.6787379178253642e-05, + "loss": 0.2181, "step": 6197 }, { - "epoch": 0.36, - "grad_norm": 0.23076577952264316, - "learning_rate": 1.4922984815929707e-05, - "loss": 0.1842, + "epoch": 0.28, + "grad_norm": 0.38990069809666605, + "learning_rate": 1.6786286404137878e-05, + "loss": 0.2579, "step": 6198 }, { - "epoch": 0.36, - "grad_norm": 0.2980217129945579, - "learning_rate": 1.4921364943148158e-05, - "loss": 0.2783, + "epoch": 0.28, + "grad_norm": 0.9990616457067675, + "learning_rate": 1.678519347978028e-05, + "loss": 0.6198, "step": 6199 }, { - "epoch": 0.36, - "grad_norm": 0.8018563432059227, - "learning_rate": 1.4919744899939952e-05, - "loss": 0.4694, + "epoch": 0.28, + "grad_norm": 0.5088490275237567, + "learning_rate": 1.678410040520503e-05, + "loss": 0.3489, "step": 6200 }, { - "epoch": 0.36, - "grad_norm": 0.7658024271159686, - "learning_rate": 1.4918124686361193e-05, - "loss": 0.3918, + "epoch": 0.28, + "grad_norm": 0.42355370918877006, + "learning_rate": 1.678300718043634e-05, + "loss": 0.215, "step": 6201 }, { - "epoch": 0.36, - "grad_norm": 0.40099316107463984, - "learning_rate": 1.4916504302467987e-05, - "loss": 0.2832, + "epoch": 0.28, + "grad_norm": 0.3883425985063007, + "learning_rate": 1.6781913805498407e-05, + "loss": 0.2396, "step": 6202 }, { - "epoch": 0.36, - "grad_norm": 0.3396109439104663, - "learning_rate": 1.4914883748316448e-05, - "loss": 0.3053, + "epoch": 0.28, + "grad_norm": 0.36173677180693203, + "learning_rate": 1.6780820280415443e-05, + "loss": 0.3148, "step": 6203 }, { - "epoch": 0.36, - "grad_norm": 0.17946159090846295, - "learning_rate": 1.4913263023962698e-05, - "loss": 0.0944, + "epoch": 0.29, + "grad_norm": 0.44475274537331616, + "learning_rate": 1.6779726605211647e-05, + "loss": 0.2964, "step": 6204 }, { - "epoch": 0.36, - "grad_norm": 0.42650868983363494, - "learning_rate": 1.491164212946286e-05, - "loss": 0.2872, + "epoch": 0.29, + "grad_norm": 0.4406160184383901, + "learning_rate": 1.6778632779911244e-05, + "loss": 0.2867, "step": 6205 }, { - "epoch": 0.36, - "grad_norm": 0.7748315569899414, - "learning_rate": 1.4910021064873066e-05, - "loss": 0.4013, + "epoch": 0.29, + "grad_norm": 1.571132556379544, + "learning_rate": 1.677753880453844e-05, + "loss": 0.8139, "step": 6206 }, { - "epoch": 0.36, - "grad_norm": 0.33464339880757243, - "learning_rate": 1.4908399830249454e-05, - "loss": 0.2786, + "epoch": 0.29, + "grad_norm": 0.44379481212780597, + "learning_rate": 1.6776444679117462e-05, + "loss": 0.3072, "step": 6207 }, { - "epoch": 0.36, - "grad_norm": 0.32860205040007584, - "learning_rate": 1.4906778425648165e-05, - "loss": 0.2614, + "epoch": 0.29, + "grad_norm": 0.3857561584044854, + "learning_rate": 1.677535040367253e-05, + "loss": 0.2548, "step": 6208 }, { - "epoch": 0.36, - "grad_norm": 1.1586441525453701, - "learning_rate": 1.4905156851125354e-05, - "loss": 0.7866, + "epoch": 0.29, + "grad_norm": 0.4453954411026201, + "learning_rate": 1.6774255978227868e-05, + "loss": 0.3191, "step": 6209 }, { - "epoch": 0.36, - "grad_norm": 0.46748475889837865, - "learning_rate": 1.4903535106737166e-05, - "loss": 0.2579, + "epoch": 0.29, + "grad_norm": 0.5031902846841962, + "learning_rate": 1.6773161402807708e-05, + "loss": 0.3194, "step": 6210 }, { - "epoch": 0.36, - "grad_norm": 0.26132463339166095, - "learning_rate": 1.4901913192539773e-05, - "loss": 0.2383, + "epoch": 0.29, + "grad_norm": 0.36845310678394955, + "learning_rate": 1.6772066677436286e-05, + "loss": 0.1839, "step": 6211 }, { - "epoch": 0.36, - "grad_norm": 0.46282006638803863, - "learning_rate": 1.4900291108589335e-05, - "loss": 0.3461, + "epoch": 0.29, + "grad_norm": 0.4569215786045307, + "learning_rate": 1.6770971802137833e-05, + "loss": 0.3375, "step": 6212 }, { - "epoch": 0.36, - "grad_norm": 1.4299229249539138, - "learning_rate": 1.4898668854942029e-05, - "loss": 0.8093, + "epoch": 0.29, + "grad_norm": 0.378369893867642, + "learning_rate": 1.676987677693659e-05, + "loss": 0.2721, "step": 6213 }, { - "epoch": 0.36, - "grad_norm": 0.3654395694243854, - "learning_rate": 1.4897046431654028e-05, - "loss": 0.2173, + "epoch": 0.29, + "grad_norm": 0.9140137734247527, + "learning_rate": 1.67687816018568e-05, + "loss": 0.3614, "step": 6214 }, { - "epoch": 0.36, - "grad_norm": 0.40808297868803545, - "learning_rate": 1.4895423838781523e-05, - "loss": 0.3156, + "epoch": 0.29, + "grad_norm": 0.5566484769908658, + "learning_rate": 1.6767686276922708e-05, + "loss": 0.3893, "step": 6215 }, { - "epoch": 0.36, - "grad_norm": 0.8572774804511525, - "learning_rate": 1.4893801076380697e-05, - "loss": 0.6124, + "epoch": 0.29, + "grad_norm": 0.32996133843398096, + "learning_rate": 1.6766590802158567e-05, + "loss": 0.2613, "step": 6216 }, { - "epoch": 0.36, - "grad_norm": 0.3163807117672398, - "learning_rate": 1.4892178144507754e-05, - "loss": 0.2505, + "epoch": 0.29, + "grad_norm": 0.33123740142903274, + "learning_rate": 1.6765495177588626e-05, + "loss": 0.1714, "step": 6217 }, { - "epoch": 0.36, - "grad_norm": 0.39502399231456026, - "learning_rate": 1.4890555043218888e-05, - "loss": 0.3094, + "epoch": 0.29, + "grad_norm": 1.1497671674929857, + "learning_rate": 1.6764399403237142e-05, + "loss": 0.5797, "step": 6218 }, { - "epoch": 0.36, - "grad_norm": 0.305393097707522, - "learning_rate": 1.4888931772570314e-05, - "loss": 0.26, + "epoch": 0.29, + "grad_norm": 0.7003130140303847, + "learning_rate": 1.6763303479128375e-05, + "loss": 0.3869, "step": 6219 }, { - "epoch": 0.36, - "grad_norm": 0.4573475138697167, - "learning_rate": 1.4887308332618245e-05, - "loss": 0.2775, + "epoch": 0.29, + "grad_norm": 0.37767927823520225, + "learning_rate": 1.676220740528659e-05, + "loss": 0.3077, "step": 6220 }, { - "epoch": 0.36, - "grad_norm": 0.5099326579508142, - "learning_rate": 1.4885684723418897e-05, - "loss": 0.2951, + "epoch": 0.29, + "grad_norm": 0.8986653192060193, + "learning_rate": 1.6761111181736046e-05, + "loss": 0.4047, "step": 6221 }, { - "epoch": 0.36, - "grad_norm": 0.3622752534920921, - "learning_rate": 1.4884060945028495e-05, - "loss": 0.3238, + "epoch": 0.29, + "grad_norm": 0.4121861433500796, + "learning_rate": 1.676001480850102e-05, + "loss": 0.1735, "step": 6222 }, { - "epoch": 0.36, - "grad_norm": 0.4769109545150248, - "learning_rate": 1.4882436997503273e-05, - "loss": 0.3067, + "epoch": 0.29, + "grad_norm": 0.35794801121010156, + "learning_rate": 1.675891828560578e-05, + "loss": 0.2502, "step": 6223 }, { - "epoch": 0.36, - "grad_norm": 0.31122680816110204, - "learning_rate": 1.4880812880899472e-05, - "loss": 0.2261, + "epoch": 0.29, + "grad_norm": 0.4697442372345894, + "learning_rate": 1.6757821613074602e-05, + "loss": 0.2861, "step": 6224 }, { - "epoch": 0.36, - "grad_norm": 0.44219289230671227, - "learning_rate": 1.4879188595273326e-05, - "loss": 0.3029, + "epoch": 0.29, + "grad_norm": 0.4315078233040824, + "learning_rate": 1.675672479093177e-05, + "loss": 0.2932, "step": 6225 }, { - "epoch": 0.36, - "grad_norm": 0.566041714859357, - "learning_rate": 1.487756414068109e-05, - "loss": 0.334, + "epoch": 0.29, + "grad_norm": 0.6736215667094393, + "learning_rate": 1.6755627819201565e-05, + "loss": 0.4202, "step": 6226 }, { - "epoch": 0.36, - "grad_norm": 0.3654004821252147, - "learning_rate": 1.4875939517179016e-05, - "loss": 0.2639, + "epoch": 0.29, + "grad_norm": 0.6467972990353513, + "learning_rate": 1.6754530697908266e-05, + "loss": 0.3264, "step": 6227 }, { - "epoch": 0.36, - "grad_norm": 0.755425769942834, - "learning_rate": 1.4874314724823368e-05, - "loss": 0.5034, + "epoch": 0.29, + "grad_norm": 0.3532752657948103, + "learning_rate": 1.6753433427076172e-05, + "loss": 0.2858, "step": 6228 }, { - "epoch": 0.36, - "grad_norm": 0.3716823951023819, - "learning_rate": 1.487268976367041e-05, - "loss": 0.3237, + "epoch": 0.29, + "grad_norm": 0.28755185743151107, + "learning_rate": 1.675233600672957e-05, + "loss": 0.2124, "step": 6229 }, { - "epoch": 0.36, - "grad_norm": 0.2941511105541551, - "learning_rate": 1.4871064633776418e-05, - "loss": 0.2806, + "epoch": 0.29, + "grad_norm": 1.4080205338288985, + "learning_rate": 1.6751238436892754e-05, + "loss": 0.7149, "step": 6230 }, { - "epoch": 0.36, - "grad_norm": 0.31363827129766925, - "learning_rate": 1.4869439335197661e-05, - "loss": 0.1919, + "epoch": 0.29, + "grad_norm": 0.42433156002072586, + "learning_rate": 1.675014071759003e-05, + "loss": 0.2412, "step": 6231 }, { - "epoch": 0.36, - "grad_norm": 0.38920044645943885, - "learning_rate": 1.4867813867990435e-05, - "loss": 0.2882, + "epoch": 0.29, + "grad_norm": 0.39373225192887057, + "learning_rate": 1.6749042848845693e-05, + "loss": 0.3007, "step": 6232 }, { - "epoch": 0.36, - "grad_norm": 0.5297592578525103, - "learning_rate": 1.486618823221102e-05, - "loss": 0.4331, + "epoch": 0.29, + "grad_norm": 0.9977138063994544, + "learning_rate": 1.6747944830684052e-05, + "loss": 0.4683, "step": 6233 }, { - "epoch": 0.36, - "grad_norm": 0.34389016761519975, - "learning_rate": 1.4864562427915722e-05, - "loss": 0.2951, + "epoch": 0.29, + "grad_norm": 0.3833623733107689, + "learning_rate": 1.674684666312942e-05, + "loss": 0.2225, "step": 6234 }, { - "epoch": 0.36, - "grad_norm": 0.4098129452025419, - "learning_rate": 1.486293645516083e-05, - "loss": 0.3112, + "epoch": 0.29, + "grad_norm": 0.30157982985527987, + "learning_rate": 1.6745748346206102e-05, + "loss": 0.1823, "step": 6235 }, { - "epoch": 0.36, - "grad_norm": 0.5460341175814037, - "learning_rate": 1.4861310314002659e-05, - "loss": 0.339, + "epoch": 0.29, + "grad_norm": 0.4053713004656121, + "learning_rate": 1.6744649879938415e-05, + "loss": 0.3436, "step": 6236 }, { - "epoch": 0.36, - "grad_norm": 0.24296990938486865, - "learning_rate": 1.485968400449752e-05, - "loss": 0.1272, + "epoch": 0.29, + "grad_norm": 0.4959815269437256, + "learning_rate": 1.6743551264350685e-05, + "loss": 0.2365, "step": 6237 }, { - "epoch": 0.36, - "grad_norm": 0.4041372596227764, - "learning_rate": 1.485805752670174e-05, - "loss": 0.2752, + "epoch": 0.29, + "grad_norm": 0.46964425240217167, + "learning_rate": 1.674245249946723e-05, + "loss": 0.352, "step": 6238 }, { - "epoch": 0.36, - "grad_norm": 0.31120053524746066, - "learning_rate": 1.4856430880671628e-05, - "loss": 0.3153, + "epoch": 0.29, + "grad_norm": 0.38553237561564896, + "learning_rate": 1.674135358531237e-05, + "loss": 0.3262, "step": 6239 }, { - "epoch": 0.36, - "grad_norm": 0.5829267478852111, - "learning_rate": 1.485480406646353e-05, - "loss": 0.3675, + "epoch": 0.29, + "grad_norm": 0.3912276026898913, + "learning_rate": 1.6740254521910444e-05, + "loss": 0.1391, "step": 6240 }, { - "epoch": 0.36, - "grad_norm": 0.5853392794597301, - "learning_rate": 1.485317708413377e-05, - "loss": 0.2736, + "epoch": 0.29, + "grad_norm": 0.35203268476311156, + "learning_rate": 1.6739155309285777e-05, + "loss": 0.2674, "step": 6241 }, { - "epoch": 0.36, - "grad_norm": 0.3633480158282146, - "learning_rate": 1.48515499337387e-05, - "loss": 0.2897, + "epoch": 0.29, + "grad_norm": 0.5302204562081397, + "learning_rate": 1.6738055947462708e-05, + "loss": 0.3184, "step": 6242 }, { - "epoch": 0.36, - "grad_norm": 0.3294481476064836, - "learning_rate": 1.4849922615334662e-05, - "loss": 0.2107, + "epoch": 0.29, + "grad_norm": 0.43525642485272226, + "learning_rate": 1.6736956436465573e-05, + "loss": 0.3481, "step": 6243 }, { - "epoch": 0.36, - "grad_norm": 0.596863758511093, - "learning_rate": 1.4848295128978016e-05, - "loss": 0.3861, + "epoch": 0.29, + "grad_norm": 0.34913289211700765, + "learning_rate": 1.6735856776318717e-05, + "loss": 0.2811, "step": 6244 }, { - "epoch": 0.36, - "grad_norm": 0.30007973784979886, - "learning_rate": 1.4846667474725115e-05, - "loss": 0.2746, + "epoch": 0.29, + "grad_norm": 1.2958632343230958, + "learning_rate": 1.673475696704648e-05, + "loss": 0.6617, "step": 6245 }, { - "epoch": 0.36, - "grad_norm": 0.4615792309021516, - "learning_rate": 1.484503965263233e-05, - "loss": 0.3483, + "epoch": 0.29, + "grad_norm": 0.5950695357489686, + "learning_rate": 1.6733657008673222e-05, + "loss": 0.3777, "step": 6246 }, { - "epoch": 0.36, - "grad_norm": 0.35022336479364224, - "learning_rate": 1.4843411662756028e-05, - "loss": 0.2043, + "epoch": 0.29, + "grad_norm": 0.2772659339893456, + "learning_rate": 1.6732556901223285e-05, + "loss": 0.2371, "step": 6247 }, { - "epoch": 0.36, - "grad_norm": 0.4208558050427091, - "learning_rate": 1.484178350515259e-05, - "loss": 0.3449, + "epoch": 0.29, + "grad_norm": 1.3638579828752004, + "learning_rate": 1.6731456644721025e-05, + "loss": 0.7876, "step": 6248 }, { - "epoch": 0.36, - "grad_norm": 1.038883513719484, - "learning_rate": 1.4840155179878398e-05, - "loss": 0.7463, + "epoch": 0.29, + "grad_norm": 0.5403711670639993, + "learning_rate": 1.6730356239190803e-05, + "loss": 0.3488, "step": 6249 }, { - "epoch": 0.36, - "grad_norm": 0.2896134927667209, - "learning_rate": 1.4838526686989836e-05, - "loss": 0.2265, + "epoch": 0.29, + "grad_norm": 0.35083495921364105, + "learning_rate": 1.6729255684656985e-05, + "loss": 0.2192, "step": 6250 }, { - "epoch": 0.36, - "grad_norm": 0.2974585469109081, - "learning_rate": 1.4836898026543307e-05, - "loss": 0.2787, + "epoch": 0.29, + "grad_norm": 0.4141459537445995, + "learning_rate": 1.6728154981143926e-05, + "loss": 0.3439, "step": 6251 }, { - "epoch": 0.36, - "grad_norm": 0.4006467030665213, - "learning_rate": 1.4835269198595206e-05, - "loss": 0.3279, + "epoch": 0.29, + "grad_norm": 0.5811140674139026, + "learning_rate": 1.6727054128676003e-05, + "loss": 0.3093, "step": 6252 }, { - "epoch": 0.36, - "grad_norm": 0.33309182783458663, - "learning_rate": 1.483364020320194e-05, - "loss": 0.2246, + "epoch": 0.29, + "grad_norm": 0.30939466067042687, + "learning_rate": 1.6725953127277583e-05, + "loss": 0.1993, "step": 6253 }, { - "epoch": 0.36, - "grad_norm": 0.36433501957512965, - "learning_rate": 1.4832011040419922e-05, - "loss": 0.3392, + "epoch": 0.29, + "grad_norm": 0.8788591395332983, + "learning_rate": 1.6724851976973045e-05, + "loss": 0.5366, "step": 6254 }, { - "epoch": 0.36, - "grad_norm": 0.5059114706018922, - "learning_rate": 1.4830381710305572e-05, - "loss": 0.4148, + "epoch": 0.29, + "grad_norm": 0.4335156840167137, + "learning_rate": 1.6723750677786766e-05, + "loss": 0.3394, "step": 6255 }, { - "epoch": 0.36, - "grad_norm": 0.4326822693331583, - "learning_rate": 1.4828752212915309e-05, - "loss": 0.2464, + "epoch": 0.29, + "grad_norm": 0.3888677893498946, + "learning_rate": 1.672264922974313e-05, + "loss": 0.3269, "step": 6256 }, { - "epoch": 0.36, - "grad_norm": 0.346803304093782, - "learning_rate": 1.4827122548305566e-05, - "loss": 0.3429, + "epoch": 0.29, + "grad_norm": 0.3866378932333481, + "learning_rate": 1.672154763286652e-05, + "loss": 0.1418, "step": 6257 }, { - "epoch": 0.36, - "grad_norm": 0.24676986871220055, - "learning_rate": 1.4825492716532772e-05, - "loss": 0.2387, + "epoch": 0.29, + "grad_norm": 0.785775133398653, + "learning_rate": 1.672044588718132e-05, + "loss": 0.443, "step": 6258 }, { - "epoch": 0.36, - "grad_norm": 0.8936383019306559, - "learning_rate": 1.4823862717653377e-05, - "loss": 0.4183, + "epoch": 0.29, + "grad_norm": 0.3234054495124574, + "learning_rate": 1.6719343992711927e-05, + "loss": 0.2352, "step": 6259 }, { - "epoch": 0.36, - "grad_norm": 0.3613517629923259, - "learning_rate": 1.4822232551723824e-05, - "loss": 0.2395, + "epoch": 0.29, + "grad_norm": 0.41064044354499357, + "learning_rate": 1.6718241949482728e-05, + "loss": 0.3087, "step": 6260 }, { - "epoch": 0.36, - "grad_norm": 0.7576742857770312, - "learning_rate": 1.4820602218800562e-05, - "loss": 0.604, + "epoch": 0.29, + "grad_norm": 0.6730244103584585, + "learning_rate": 1.6717139757518136e-05, + "loss": 0.4425, "step": 6261 }, { - "epoch": 0.36, - "grad_norm": 0.36124256710193003, - "learning_rate": 1.4818971718940053e-05, - "loss": 0.282, + "epoch": 0.29, + "grad_norm": 0.3220522601637308, + "learning_rate": 1.671603741684254e-05, + "loss": 0.2334, "step": 6262 }, { - "epoch": 0.36, - "grad_norm": 0.3117081152835165, - "learning_rate": 1.4817341052198763e-05, - "loss": 0.2732, + "epoch": 0.29, + "grad_norm": 0.40052128746084487, + "learning_rate": 1.671493492748035e-05, + "loss": 0.264, "step": 6263 }, { - "epoch": 0.36, - "grad_norm": 0.4077993695451762, - "learning_rate": 1.481571021863316e-05, - "loss": 0.2519, + "epoch": 0.29, + "grad_norm": 0.9299012571992346, + "learning_rate": 1.671383228945597e-05, + "loss": 0.4632, "step": 6264 }, { - "epoch": 0.36, - "grad_norm": 0.36274755930196695, - "learning_rate": 1.4814079218299715e-05, - "loss": 0.2588, + "epoch": 0.29, + "grad_norm": 0.42619825889112395, + "learning_rate": 1.6712729502793817e-05, + "loss": 0.3148, "step": 6265 }, { - "epoch": 0.36, - "grad_norm": 0.31755716594371836, - "learning_rate": 1.4812448051254914e-05, - "loss": 0.2515, + "epoch": 0.29, + "grad_norm": 0.7420893871804587, + "learning_rate": 1.67116265675183e-05, + "loss": 0.3607, "step": 6266 }, { - "epoch": 0.36, - "grad_norm": 1.2379686903604816, - "learning_rate": 1.4810816717555248e-05, - "loss": 0.8207, + "epoch": 0.29, + "grad_norm": 0.3503878966888698, + "learning_rate": 1.6710523483653843e-05, + "loss": 0.3021, "step": 6267 }, { - "epoch": 0.36, - "grad_norm": 0.6200789147881792, - "learning_rate": 1.4809185217257205e-05, - "loss": 0.4401, + "epoch": 0.29, + "grad_norm": 0.44673891266326704, + "learning_rate": 1.6709420251224857e-05, + "loss": 0.3383, "step": 6268 }, { - "epoch": 0.36, - "grad_norm": 0.3576042390007011, - "learning_rate": 1.4807553550417281e-05, - "loss": 0.236, + "epoch": 0.29, + "grad_norm": 0.34466338094666155, + "learning_rate": 1.670831687025578e-05, + "loss": 0.2059, "step": 6269 }, { - "epoch": 0.36, - "grad_norm": 0.3135412551606865, - "learning_rate": 1.4805921717091989e-05, - "loss": 0.2926, + "epoch": 0.29, + "grad_norm": 0.3384130873697249, + "learning_rate": 1.6707213340771028e-05, + "loss": 0.1287, "step": 6270 }, { - "epoch": 0.36, - "grad_norm": 0.33504011374279746, - "learning_rate": 1.480428971733783e-05, - "loss": 0.2396, + "epoch": 0.29, + "grad_norm": 0.44415776855875705, + "learning_rate": 1.6706109662795042e-05, + "loss": 0.3424, "step": 6271 }, { - "epoch": 0.36, - "grad_norm": 0.395484364592832, - "learning_rate": 1.4802657551211331e-05, - "loss": 0.2966, + "epoch": 0.29, + "grad_norm": 0.4389914318106942, + "learning_rate": 1.6705005836352252e-05, + "loss": 0.3585, "step": 6272 }, { - "epoch": 0.36, - "grad_norm": 0.7334203945138051, - "learning_rate": 1.4801025218769001e-05, - "loss": 0.3673, + "epoch": 0.29, + "grad_norm": 0.4681074547384896, + "learning_rate": 1.670390186146709e-05, + "loss": 0.3217, "step": 6273 }, { - "epoch": 0.36, - "grad_norm": 0.3095981068865218, - "learning_rate": 1.4799392720067378e-05, - "loss": 0.2805, + "epoch": 0.29, + "grad_norm": 0.40079443358366845, + "learning_rate": 1.6702797738164006e-05, + "loss": 0.2801, "step": 6274 }, { - "epoch": 0.36, - "grad_norm": 0.37500869393675135, - "learning_rate": 1.4797760055162988e-05, - "loss": 0.3432, + "epoch": 0.29, + "grad_norm": 0.26748558497279584, + "learning_rate": 1.6701693466467438e-05, + "loss": 0.2378, "step": 6275 }, { - "epoch": 0.36, - "grad_norm": 0.21332335884529122, - "learning_rate": 1.4796127224112378e-05, - "loss": 0.1254, + "epoch": 0.29, + "grad_norm": 0.63155720708638, + "learning_rate": 1.6700589046401838e-05, + "loss": 0.1699, "step": 6276 }, { - "epoch": 0.36, - "grad_norm": 1.1738522008194672, - "learning_rate": 1.4794494226972088e-05, - "loss": 0.7157, + "epoch": 0.29, + "grad_norm": 0.389828455653685, + "learning_rate": 1.669948447799165e-05, + "loss": 0.323, "step": 6277 }, { - "epoch": 0.36, - "grad_norm": 0.2622768681089619, - "learning_rate": 1.4792861063798664e-05, - "loss": 0.2618, + "epoch": 0.29, + "grad_norm": 0.6006399960313791, + "learning_rate": 1.669837976126134e-05, + "loss": 0.4766, "step": 6278 }, { - "epoch": 0.36, - "grad_norm": 0.5248605026141624, - "learning_rate": 1.4791227734648672e-05, - "loss": 0.3278, + "epoch": 0.29, + "grad_norm": 0.3396219087726241, + "learning_rate": 1.6697274896235352e-05, + "loss": 0.2598, "step": 6279 }, { - "epoch": 0.36, - "grad_norm": 0.5791380522386198, - "learning_rate": 1.4789594239578668e-05, - "loss": 0.3969, + "epoch": 0.29, + "grad_norm": 0.4066565505831249, + "learning_rate": 1.6696169882938155e-05, + "loss": 0.3105, "step": 6280 }, { - "epoch": 0.36, - "grad_norm": 0.40689169491954263, - "learning_rate": 1.4787960578645222e-05, - "loss": 0.2991, + "epoch": 0.29, + "grad_norm": 0.3528771325034826, + "learning_rate": 1.669506472139421e-05, + "loss": 0.1733, "step": 6281 }, { - "epoch": 0.36, - "grad_norm": 0.35430059420501625, - "learning_rate": 1.4786326751904907e-05, - "loss": 0.2538, + "epoch": 0.29, + "grad_norm": 0.6296824056540992, + "learning_rate": 1.6693959411627988e-05, + "loss": 0.4715, "step": 6282 }, { - "epoch": 0.36, - "grad_norm": 0.25326613938208253, - "learning_rate": 1.4784692759414303e-05, - "loss": 0.1784, + "epoch": 0.29, + "grad_norm": 0.2955500196311048, + "learning_rate": 1.6692853953663952e-05, + "loss": 0.23, "step": 6283 }, { - "epoch": 0.36, - "grad_norm": 0.3879921231259588, - "learning_rate": 1.4783058601229994e-05, - "loss": 0.3079, + "epoch": 0.29, + "grad_norm": 0.7768370151595468, + "learning_rate": 1.6691748347526583e-05, + "loss": 0.5156, "step": 6284 }, { - "epoch": 0.36, - "grad_norm": 0.7226592658190863, - "learning_rate": 1.4781424277408572e-05, - "loss": 0.4558, + "epoch": 0.29, + "grad_norm": 1.2803989173196917, + "learning_rate": 1.6690642593240352e-05, + "loss": 0.8643, "step": 6285 }, { - "epoch": 0.36, - "grad_norm": 0.30693828632115694, - "learning_rate": 1.4779789788006632e-05, - "loss": 0.2734, + "epoch": 0.29, + "grad_norm": 0.3587370619822952, + "learning_rate": 1.6689536690829747e-05, + "loss": 0.2067, "step": 6286 }, { - "epoch": 0.36, - "grad_norm": 0.3880005622421795, - "learning_rate": 1.4778155133080776e-05, - "loss": 0.2958, + "epoch": 0.29, + "grad_norm": 0.2596244476789631, + "learning_rate": 1.6688430640319245e-05, + "loss": 0.2299, "step": 6287 }, { - "epoch": 0.36, - "grad_norm": 1.1057367174861266, - "learning_rate": 1.4776520312687614e-05, - "loss": 0.6752, + "epoch": 0.29, + "grad_norm": 0.7635178463698319, + "learning_rate": 1.6687324441733334e-05, + "loss": 0.4121, "step": 6288 }, { - "epoch": 0.36, - "grad_norm": 0.21447372968479209, - "learning_rate": 1.477488532688376e-05, - "loss": 0.0726, + "epoch": 0.29, + "grad_norm": 0.352632295180572, + "learning_rate": 1.6686218095096506e-05, + "loss": 0.2542, "step": 6289 }, { - "epoch": 0.36, - "grad_norm": 0.35395443621688044, - "learning_rate": 1.4773250175725833e-05, - "loss": 0.3048, + "epoch": 0.29, + "grad_norm": 0.7364891852525304, + "learning_rate": 1.6685111600433254e-05, + "loss": 0.5289, "step": 6290 }, { - "epoch": 0.36, - "grad_norm": 0.5289778545984578, - "learning_rate": 1.4771614859270458e-05, - "loss": 0.3525, + "epoch": 0.29, + "grad_norm": 0.33733571839356374, + "learning_rate": 1.6684004957768074e-05, + "loss": 0.2873, "step": 6291 }, { - "epoch": 0.36, - "grad_norm": 0.48440354407701286, - "learning_rate": 1.4769979377574264e-05, - "loss": 0.2645, + "epoch": 0.29, + "grad_norm": 0.45746261266090393, + "learning_rate": 1.6682898167125466e-05, + "loss": 0.3198, "step": 6292 }, { - "epoch": 0.36, - "grad_norm": 0.4060500797659063, - "learning_rate": 1.4768343730693888e-05, - "loss": 0.3174, + "epoch": 0.29, + "grad_norm": 0.24579898622521043, + "learning_rate": 1.6681791228529935e-05, + "loss": 0.129, "step": 6293 }, { - "epoch": 0.36, - "grad_norm": 0.3819313122256466, - "learning_rate": 1.4766707918685974e-05, - "loss": 0.3538, + "epoch": 0.29, + "grad_norm": 0.8075615873550177, + "learning_rate": 1.6680684142005982e-05, + "loss": 0.4445, "step": 6294 }, { - "epoch": 0.36, - "grad_norm": 0.3470332126651621, - "learning_rate": 1.4765071941607172e-05, - "loss": 0.2331, + "epoch": 0.29, + "grad_norm": 0.30063780559748843, + "learning_rate": 1.6679576907578127e-05, + "loss": 0.2545, "step": 6295 }, { - "epoch": 0.36, - "grad_norm": 0.306715059456966, - "learning_rate": 1.4763435799514132e-05, - "loss": 0.2315, + "epoch": 0.29, + "grad_norm": 0.493717360174348, + "learning_rate": 1.6678469525270875e-05, + "loss": 0.3204, "step": 6296 }, { - "epoch": 0.36, - "grad_norm": 0.5344701290774023, - "learning_rate": 1.4761799492463516e-05, - "loss": 0.381, + "epoch": 0.29, + "grad_norm": 1.5194390153763373, + "learning_rate": 1.6677361995108744e-05, + "loss": 0.8334, "step": 6297 }, { - "epoch": 0.36, - "grad_norm": 0.45486931737761455, - "learning_rate": 1.4760163020511986e-05, - "loss": 0.3881, + "epoch": 0.29, + "grad_norm": 0.37078101996787816, + "learning_rate": 1.6676254317116253e-05, + "loss": 0.2861, "step": 6298 }, { - "epoch": 0.36, - "grad_norm": 0.31770712221775865, - "learning_rate": 1.4758526383716219e-05, - "loss": 0.2241, + "epoch": 0.29, + "grad_norm": 0.3022909530929876, + "learning_rate": 1.6675146491317925e-05, + "loss": 0.2136, "step": 6299 }, { - "epoch": 0.36, - "grad_norm": 1.1779532369599066, - "learning_rate": 1.4756889582132886e-05, - "loss": 0.7267, + "epoch": 0.29, + "grad_norm": 0.5895145806009565, + "learning_rate": 1.667403851773829e-05, + "loss": 0.3422, "step": 6300 }, { - "epoch": 0.36, - "grad_norm": 0.25416641771855647, - "learning_rate": 1.4755252615818671e-05, - "loss": 0.2218, + "epoch": 0.29, + "grad_norm": 0.38695642650908796, + "learning_rate": 1.6672930396401878e-05, + "loss": 0.3004, "step": 6301 }, { - "epoch": 0.36, - "grad_norm": 0.28196065175567037, - "learning_rate": 1.4753615484830261e-05, - "loss": 0.2354, + "epoch": 0.29, + "grad_norm": 1.067488679094624, + "learning_rate": 1.667182212733321e-05, + "loss": 0.446, "step": 6302 }, { - "epoch": 0.36, - "grad_norm": 0.6902396292135428, - "learning_rate": 1.4751978189224354e-05, - "loss": 0.4771, + "epoch": 0.29, + "grad_norm": 0.41294349476655834, + "learning_rate": 1.6670713710556836e-05, + "loss": 0.3583, "step": 6303 }, { - "epoch": 0.36, - "grad_norm": 1.0221054595371608, - "learning_rate": 1.4750340729057646e-05, - "loss": 0.728, + "epoch": 0.29, + "grad_norm": 0.3805121091521622, + "learning_rate": 1.666960514609729e-05, + "loss": 0.2943, "step": 6304 }, { - "epoch": 0.36, - "grad_norm": 0.3009926847405351, - "learning_rate": 1.4748703104386843e-05, - "loss": 0.215, + "epoch": 0.29, + "grad_norm": 0.8663100037922389, + "learning_rate": 1.666849643397911e-05, + "loss": 0.5069, "step": 6305 }, { - "epoch": 0.36, - "grad_norm": 0.32641194854409294, - "learning_rate": 1.4747065315268655e-05, - "loss": 0.2941, + "epoch": 0.29, + "grad_norm": 0.3082071252749816, + "learning_rate": 1.666738757422685e-05, + "loss": 0.2648, "step": 6306 }, { - "epoch": 0.36, - "grad_norm": 0.4241970288360463, - "learning_rate": 1.4745427361759801e-05, - "loss": 0.3376, + "epoch": 0.29, + "grad_norm": 0.455373458543171, + "learning_rate": 1.6666278566865056e-05, + "loss": 0.3105, "step": 6307 }, { - "epoch": 0.36, - "grad_norm": 0.45701611260847547, - "learning_rate": 1.4743789243916999e-05, - "loss": 0.2903, + "epoch": 0.29, + "grad_norm": 0.3925738256555642, + "learning_rate": 1.6665169411918278e-05, + "loss": 0.2662, "step": 6308 }, { - "epoch": 0.36, - "grad_norm": 0.2999184950849396, - "learning_rate": 1.4742150961796981e-05, - "loss": 0.2455, + "epoch": 0.29, + "grad_norm": 0.5439166457188115, + "learning_rate": 1.6664060109411072e-05, + "loss": 0.2752, "step": 6309 }, { - "epoch": 0.36, - "grad_norm": 0.45425353186377854, - "learning_rate": 1.4740512515456479e-05, - "loss": 0.3645, + "epoch": 0.29, + "grad_norm": 0.39004279682172305, + "learning_rate": 1.6662950659368e-05, + "loss": 0.2738, "step": 6310 }, { - "epoch": 0.36, - "grad_norm": 0.374441697495103, - "learning_rate": 1.4738873904952232e-05, - "loss": 0.2927, + "epoch": 0.29, + "grad_norm": 0.4535619052993781, + "learning_rate": 1.666184106181362e-05, + "loss": 0.3534, "step": 6311 }, { - "epoch": 0.36, - "grad_norm": 0.8017812489117565, - "learning_rate": 1.4737235130340985e-05, - "loss": 0.4057, + "epoch": 0.29, + "grad_norm": 0.5175851342474954, + "learning_rate": 1.6660731316772503e-05, + "loss": 0.261, "step": 6312 }, { - "epoch": 0.36, - "grad_norm": 0.4483513029265171, - "learning_rate": 1.473559619167949e-05, - "loss": 0.3609, + "epoch": 0.29, + "grad_norm": 0.27573522809198897, + "learning_rate": 1.665962142426921e-05, + "loss": 0.2235, "step": 6313 }, { - "epoch": 0.36, - "grad_norm": 0.30337256483409936, - "learning_rate": 1.4733957089024502e-05, - "loss": 0.2775, + "epoch": 0.29, + "grad_norm": 1.1629230784224807, + "learning_rate": 1.6658511384328325e-05, + "loss": 0.7181, "step": 6314 }, { - "epoch": 0.36, - "grad_norm": 0.27788517511323146, - "learning_rate": 1.4732317822432782e-05, - "loss": 0.1849, + "epoch": 0.29, + "grad_norm": 0.3318162751432222, + "learning_rate": 1.6657401196974405e-05, + "loss": 0.2674, "step": 6315 }, { - "epoch": 0.36, - "grad_norm": 1.2877189024995006, - "learning_rate": 1.4730678391961102e-05, - "loss": 0.7865, + "epoch": 0.29, + "grad_norm": 0.3459968045154157, + "learning_rate": 1.6656290862232047e-05, + "loss": 0.2638, "step": 6316 }, { - "epoch": 0.36, - "grad_norm": 0.3273282971439404, - "learning_rate": 1.472903879766623e-05, - "loss": 0.2822, + "epoch": 0.29, + "grad_norm": 0.9530287036238254, + "learning_rate": 1.6655180380125823e-05, + "loss": 0.4683, "step": 6317 }, { - "epoch": 0.36, - "grad_norm": 0.36865587604384104, - "learning_rate": 1.4727399039604951e-05, - "loss": 0.2766, + "epoch": 0.29, + "grad_norm": 0.4835353857930054, + "learning_rate": 1.6654069750680316e-05, + "loss": 0.3997, "step": 6318 }, { - "epoch": 0.36, - "grad_norm": 0.6217638120111219, - "learning_rate": 1.4725759117834045e-05, - "loss": 0.4275, + "epoch": 0.29, + "grad_norm": 0.2558805157049701, + "learning_rate": 1.6652958973920122e-05, + "loss": 0.1935, "step": 6319 }, { - "epoch": 0.36, - "grad_norm": 0.34169170317511, - "learning_rate": 1.4724119032410305e-05, - "loss": 0.3136, + "epoch": 0.29, + "grad_norm": 0.4554211813492704, + "learning_rate": 1.6651848049869827e-05, + "loss": 0.2762, "step": 6320 }, { - "epoch": 0.36, - "grad_norm": 0.30920422479643384, - "learning_rate": 1.4722478783390522e-05, - "loss": 0.1771, + "epoch": 0.29, + "grad_norm": 1.3292099913978672, + "learning_rate": 1.6650736978554028e-05, + "loss": 0.8286, "step": 6321 }, { - "epoch": 0.36, - "grad_norm": 0.34775308402691785, - "learning_rate": 1.472083837083151e-05, - "loss": 0.2888, + "epoch": 0.29, + "grad_norm": 0.44992536777672276, + "learning_rate": 1.6649625759997323e-05, + "loss": 0.2347, "step": 6322 }, { - "epoch": 0.36, - "grad_norm": 0.3193141427715202, - "learning_rate": 1.471919779479006e-05, - "loss": 0.256, + "epoch": 0.29, + "grad_norm": 0.4761697221142709, + "learning_rate": 1.6648514394224312e-05, + "loss": 0.3489, "step": 6323 }, { - "epoch": 0.36, - "grad_norm": 1.1400092958513408, - "learning_rate": 1.4717557055322997e-05, - "loss": 0.7106, + "epoch": 0.29, + "grad_norm": 0.4676053973870956, + "learning_rate": 1.66474028812596e-05, + "loss": 0.3047, "step": 6324 }, { - "epoch": 0.36, - "grad_norm": 0.40323223147712445, - "learning_rate": 1.4715916152487135e-05, - "loss": 0.3131, + "epoch": 0.29, + "grad_norm": 0.27494626712851433, + "learning_rate": 1.6646291221127796e-05, + "loss": 0.1426, "step": 6325 }, { - "epoch": 0.36, - "grad_norm": 0.4250005070452697, - "learning_rate": 1.47142750863393e-05, - "loss": 0.2852, + "epoch": 0.29, + "grad_norm": 0.5364449897066189, + "learning_rate": 1.664517941385351e-05, + "loss": 0.3772, "step": 6326 }, { - "epoch": 0.36, - "grad_norm": 0.3343011248837186, - "learning_rate": 1.471263385693632e-05, - "loss": 0.2307, + "epoch": 0.29, + "grad_norm": 0.430613294379969, + "learning_rate": 1.6644067459461352e-05, + "loss": 0.3259, "step": 6327 }, { - "epoch": 0.36, - "grad_norm": 0.3055552288905154, - "learning_rate": 1.4710992464335034e-05, - "loss": 0.1793, + "epoch": 0.29, + "grad_norm": 0.4089229958897362, + "learning_rate": 1.664295535797595e-05, + "loss": 0.2233, "step": 6328 }, { - "epoch": 0.36, - "grad_norm": 0.40306747549933275, - "learning_rate": 1.4709350908592281e-05, - "loss": 0.2806, + "epoch": 0.29, + "grad_norm": 0.5861851110480223, + "learning_rate": 1.6641843109421913e-05, + "loss": 0.3913, "step": 6329 }, { - "epoch": 0.36, - "grad_norm": 0.32815408215415465, - "learning_rate": 1.4707709189764909e-05, - "loss": 0.3094, + "epoch": 0.29, + "grad_norm": 0.5321857399413813, + "learning_rate": 1.6640730713823877e-05, + "loss": 0.3773, "step": 6330 }, { - "epoch": 0.36, - "grad_norm": 0.6956160911594513, - "learning_rate": 1.470606730790977e-05, - "loss": 0.356, + "epoch": 0.29, + "grad_norm": 0.37885220313592155, + "learning_rate": 1.6639618171206458e-05, + "loss": 0.2714, "step": 6331 }, { - "epoch": 0.36, - "grad_norm": 0.3454136921848711, - "learning_rate": 1.4704425263083722e-05, - "loss": 0.2861, + "epoch": 0.29, + "grad_norm": 0.29796201483176976, + "learning_rate": 1.6638505481594298e-05, + "loss": 0.1663, "step": 6332 }, { - "epoch": 0.36, - "grad_norm": 0.30035263482807145, - "learning_rate": 1.470278305534363e-05, - "loss": 0.2579, + "epoch": 0.29, + "grad_norm": 0.8679850758011335, + "learning_rate": 1.663739264501202e-05, + "loss": 0.542, "step": 6333 }, { - "epoch": 0.36, - "grad_norm": 0.4375086593799831, - "learning_rate": 1.4701140684746363e-05, - "loss": 0.3229, + "epoch": 0.29, + "grad_norm": 0.4190510430581646, + "learning_rate": 1.6636279661484273e-05, + "loss": 0.3362, "step": 6334 }, { - "epoch": 0.36, - "grad_norm": 0.32172857440332125, - "learning_rate": 1.4699498151348797e-05, - "loss": 0.2142, + "epoch": 0.29, + "grad_norm": 0.32686844721063457, + "learning_rate": 1.663516653103568e-05, + "loss": 0.2784, "step": 6335 }, { - "epoch": 0.36, - "grad_norm": 0.6809092715571192, - "learning_rate": 1.469785545520781e-05, - "loss": 0.4272, + "epoch": 0.29, + "grad_norm": 1.04724465035836, + "learning_rate": 1.6634053253690903e-05, + "loss": 0.5535, "step": 6336 }, { - "epoch": 0.36, - "grad_norm": 0.3487777939185845, - "learning_rate": 1.469621259638029e-05, - "loss": 0.3407, + "epoch": 0.29, + "grad_norm": 0.310551281913641, + "learning_rate": 1.6632939829474576e-05, + "loss": 0.2248, "step": 6337 }, { - "epoch": 0.36, - "grad_norm": 0.29453832274472436, - "learning_rate": 1.4694569574923132e-05, - "loss": 0.2134, + "epoch": 0.29, + "grad_norm": 0.5606145855846372, + "learning_rate": 1.6631826258411358e-05, + "loss": 0.3006, "step": 6338 }, { - "epoch": 0.36, - "grad_norm": 0.8308626976224194, - "learning_rate": 1.4692926390893234e-05, - "loss": 0.5887, + "epoch": 0.29, + "grad_norm": 0.45055836410774835, + "learning_rate": 1.6630712540525896e-05, + "loss": 0.3416, "step": 6339 }, { - "epoch": 0.36, - "grad_norm": 0.39159026271164604, - "learning_rate": 1.469128304434749e-05, - "loss": 0.3049, + "epoch": 0.29, + "grad_norm": 0.39080085480333115, + "learning_rate": 1.6629598675842854e-05, + "loss": 0.2752, "step": 6340 }, { - "epoch": 0.36, - "grad_norm": 0.24883858751617285, - "learning_rate": 1.4689639535342823e-05, - "loss": 0.1906, + "epoch": 0.29, + "grad_norm": 0.39700730166110776, + "learning_rate": 1.6628484664386882e-05, + "loss": 0.1905, "step": 6341 }, { - "epoch": 0.36, - "grad_norm": 0.5107631135808215, - "learning_rate": 1.4687995863936135e-05, - "loss": 0.3797, + "epoch": 0.29, + "grad_norm": 0.4366012834277006, + "learning_rate": 1.6627370506182648e-05, + "loss": 0.3061, "step": 6342 }, { - "epoch": 0.36, - "grad_norm": 0.6558962140201424, - "learning_rate": 1.4686352030184354e-05, - "loss": 0.4598, + "epoch": 0.29, + "grad_norm": 0.6440574917110293, + "learning_rate": 1.662625620125482e-05, + "loss": 0.2987, "step": 6343 }, { - "epoch": 0.36, - "grad_norm": 0.32642046172243194, - "learning_rate": 1.4684708034144403e-05, - "loss": 0.174, + "epoch": 0.29, + "grad_norm": 0.35728094277128986, + "learning_rate": 1.6625141749628065e-05, + "loss": 0.263, "step": 6344 }, { - "epoch": 0.36, - "grad_norm": 0.3823814532754202, - "learning_rate": 1.4683063875873215e-05, - "loss": 0.3208, + "epoch": 0.29, + "grad_norm": 0.562164155255936, + "learning_rate": 1.6624027151327057e-05, + "loss": 0.3814, "step": 6345 }, { - "epoch": 0.36, - "grad_norm": 0.4802433585379294, - "learning_rate": 1.4681419555427727e-05, - "loss": 0.4175, + "epoch": 0.29, + "grad_norm": 0.43906920824476925, + "learning_rate": 1.6622912406376473e-05, + "loss": 0.3024, "step": 6346 }, { - "epoch": 0.36, - "grad_norm": 0.521892647783289, - "learning_rate": 1.467977507286488e-05, - "loss": 0.3415, + "epoch": 0.29, + "grad_norm": 0.33971591982119675, + "learning_rate": 1.6621797514800993e-05, + "loss": 0.2781, "step": 6347 }, { - "epoch": 0.36, - "grad_norm": 0.3458481345611792, - "learning_rate": 1.4678130428241623e-05, - "loss": 0.2438, + "epoch": 0.29, + "grad_norm": 1.131439514793796, + "learning_rate": 1.66206824766253e-05, + "loss": 0.3736, "step": 6348 }, { - "epoch": 0.36, - "grad_norm": 0.3032108143688598, - "learning_rate": 1.4676485621614913e-05, - "loss": 0.2495, + "epoch": 0.29, + "grad_norm": 0.5980276288514539, + "learning_rate": 1.6619567291874075e-05, + "loss": 0.3617, "step": 6349 }, { - "epoch": 0.36, - "grad_norm": 0.5573975802181028, - "learning_rate": 1.4674840653041706e-05, - "loss": 0.3531, + "epoch": 0.29, + "grad_norm": 0.35631731997263694, + "learning_rate": 1.6618451960572008e-05, + "loss": 0.3174, "step": 6350 }, { - "epoch": 0.36, - "grad_norm": 0.37910874435426417, - "learning_rate": 1.4673195522578967e-05, - "loss": 0.2586, + "epoch": 0.29, + "grad_norm": 0.41425111626751443, + "learning_rate": 1.6617336482743795e-05, + "loss": 0.3276, "step": 6351 }, { - "epoch": 0.36, - "grad_norm": 1.1453819020562075, - "learning_rate": 1.467155023028367e-05, - "loss": 0.8029, + "epoch": 0.29, + "grad_norm": 0.3922338544363831, + "learning_rate": 1.6616220858414133e-05, + "loss": 0.3006, "step": 6352 }, { - "epoch": 0.37, - "grad_norm": 0.3225394768457753, - "learning_rate": 1.4669904776212786e-05, - "loss": 0.301, + "epoch": 0.29, + "grad_norm": 0.2975966788566311, + "learning_rate": 1.6615105087607713e-05, + "loss": 0.1881, "step": 6353 }, { - "epoch": 0.37, - "grad_norm": 0.4135206494047781, - "learning_rate": 1.4668259160423305e-05, - "loss": 0.2995, + "epoch": 0.29, + "grad_norm": 0.40539495399710473, + "learning_rate": 1.6613989170349246e-05, + "loss": 0.2697, "step": 6354 }, { - "epoch": 0.37, - "grad_norm": 0.30632042423602535, - "learning_rate": 1.4666613382972205e-05, - "loss": 0.2069, + "epoch": 0.29, + "grad_norm": 0.4518923962727792, + "learning_rate": 1.661287310666343e-05, + "loss": 0.3097, "step": 6355 }, { - "epoch": 0.37, - "grad_norm": 0.38142377744009864, - "learning_rate": 1.4664967443916489e-05, - "loss": 0.3089, + "epoch": 0.29, + "grad_norm": 0.5939946551618978, + "learning_rate": 1.661175689657498e-05, + "loss": 0.413, "step": 6356 }, { - "epoch": 0.37, - "grad_norm": 0.3386038426663132, - "learning_rate": 1.4663321343313148e-05, - "loss": 0.2622, + "epoch": 0.29, + "grad_norm": 0.6031200865752705, + "learning_rate": 1.6610640540108606e-05, + "loss": 0.4633, "step": 6357 }, { - "epoch": 0.37, - "grad_norm": 0.45717911991857507, - "learning_rate": 1.4661675081219191e-05, - "loss": 0.3994, + "epoch": 0.29, + "grad_norm": 0.3015549065368941, + "learning_rate": 1.660952403728902e-05, + "loss": 0.2345, "step": 6358 }, { - "epoch": 0.37, - "grad_norm": 0.39907584698503695, - "learning_rate": 1.4660028657691626e-05, - "loss": 0.3111, + "epoch": 0.29, + "grad_norm": 0.3099986315859341, + "learning_rate": 1.6608407388140943e-05, + "loss": 0.2063, "step": 6359 }, { - "epoch": 0.37, - "grad_norm": 0.4939593127745104, - "learning_rate": 1.465838207278747e-05, - "loss": 0.3689, + "epoch": 0.29, + "grad_norm": 1.2604889669744475, + "learning_rate": 1.6607290592689094e-05, + "loss": 0.5545, "step": 6360 }, { - "epoch": 0.37, - "grad_norm": 0.2352579898444018, - "learning_rate": 1.4656735326563738e-05, - "loss": 0.1747, + "epoch": 0.29, + "grad_norm": 0.40911853771604456, + "learning_rate": 1.6606173650958203e-05, + "loss": 0.1232, "step": 6361 }, { - "epoch": 0.37, - "grad_norm": 0.59732590291894, - "learning_rate": 1.4655088419077466e-05, - "loss": 0.3185, + "epoch": 0.29, + "grad_norm": 0.38924994562926674, + "learning_rate": 1.660505656297299e-05, + "loss": 0.3234, "step": 6362 }, { - "epoch": 0.37, - "grad_norm": 0.39752606248880057, - "learning_rate": 1.4653441350385682e-05, - "loss": 0.3149, + "epoch": 0.29, + "grad_norm": 0.5997168965650626, + "learning_rate": 1.6603939328758195e-05, + "loss": 0.3821, "step": 6363 }, { - "epoch": 0.37, - "grad_norm": 0.4368499204854432, - "learning_rate": 1.4651794120545424e-05, - "loss": 0.3275, + "epoch": 0.29, + "grad_norm": 0.445420043278386, + "learning_rate": 1.6602821948338546e-05, + "loss": 0.1303, "step": 6364 }, { - "epoch": 0.37, - "grad_norm": 0.3946428941104599, - "learning_rate": 1.4650146729613735e-05, - "loss": 0.311, + "epoch": 0.29, + "grad_norm": 0.3114484834562211, + "learning_rate": 1.660170442173878e-05, + "loss": 0.2131, "step": 6365 }, { - "epoch": 0.37, - "grad_norm": 0.4052584218015384, - "learning_rate": 1.4648499177647665e-05, - "loss": 0.3519, + "epoch": 0.29, + "grad_norm": 0.4656108937759206, + "learning_rate": 1.6600586748983642e-05, + "loss": 0.346, "step": 6366 }, { - "epoch": 0.37, - "grad_norm": 0.2228152253683647, - "learning_rate": 1.4646851464704269e-05, - "loss": 0.0727, + "epoch": 0.29, + "grad_norm": 0.3307842997711832, + "learning_rate": 1.659946893009788e-05, + "loss": 0.1186, "step": 6367 }, { - "epoch": 0.37, - "grad_norm": 0.55650450715017, - "learning_rate": 1.4645203590840607e-05, - "loss": 0.3699, + "epoch": 0.29, + "grad_norm": 0.50826475733387, + "learning_rate": 1.6598350965106233e-05, + "loss": 0.3489, "step": 6368 }, { - "epoch": 0.37, - "grad_norm": 0.2782351877624767, - "learning_rate": 1.4643555556113742e-05, - "loss": 0.2925, + "epoch": 0.29, + "grad_norm": 1.0728728318204348, + "learning_rate": 1.6597232854033453e-05, + "loss": 0.6073, "step": 6369 }, { - "epoch": 0.37, - "grad_norm": 0.6856577889773536, - "learning_rate": 1.4641907360580749e-05, - "loss": 0.3796, + "epoch": 0.29, + "grad_norm": 0.38497940963281974, + "learning_rate": 1.65961145969043e-05, + "loss": 0.3017, "step": 6370 }, { - "epoch": 0.37, - "grad_norm": 0.49911463412678736, - "learning_rate": 1.4640259004298706e-05, - "loss": 0.3838, + "epoch": 0.29, + "grad_norm": 0.28089285923508095, + "learning_rate": 1.6594996193743525e-05, + "loss": 0.1894, "step": 6371 }, { - "epoch": 0.37, - "grad_norm": 0.3727450596138779, - "learning_rate": 1.4638610487324688e-05, - "loss": 0.333, + "epoch": 0.29, + "grad_norm": 0.47117422290957506, + "learning_rate": 1.6593877644575892e-05, + "loss": 0.2904, "step": 6372 }, { - "epoch": 0.37, - "grad_norm": 0.2839276525223571, - "learning_rate": 1.4636961809715793e-05, - "loss": 0.2624, + "epoch": 0.29, + "grad_norm": 0.569869256934075, + "learning_rate": 1.6592758949426162e-05, + "loss": 0.3819, "step": 6373 }, { - "epoch": 0.37, - "grad_norm": 0.24988639619999695, - "learning_rate": 1.463531297152911e-05, - "loss": 0.1668, + "epoch": 0.29, + "grad_norm": 0.32219923007340284, + "learning_rate": 1.6591640108319103e-05, + "loss": 0.2698, "step": 6374 }, { - "epoch": 0.37, - "grad_norm": 0.5883872688338556, - "learning_rate": 1.4633663972821737e-05, - "loss": 0.4016, + "epoch": 0.29, + "grad_norm": 0.47571440712025526, + "learning_rate": 1.659052112127949e-05, + "loss": 0.3982, "step": 6375 }, { - "epoch": 0.37, - "grad_norm": 0.969636159928934, - "learning_rate": 1.4632014813650779e-05, - "loss": 0.4825, + "epoch": 0.29, + "grad_norm": 0.6088639214926304, + "learning_rate": 1.6589401988332087e-05, + "loss": 0.3442, "step": 6376 }, { - "epoch": 0.37, - "grad_norm": 0.27400554273030164, - "learning_rate": 1.4630365494073348e-05, - "loss": 0.2349, + "epoch": 0.29, + "grad_norm": 0.25203709549402803, + "learning_rate": 1.6588282709501674e-05, + "loss": 0.1595, "step": 6377 }, { - "epoch": 0.37, - "grad_norm": 0.5706865974074818, - "learning_rate": 1.4628716014146558e-05, - "loss": 0.4436, + "epoch": 0.29, + "grad_norm": 0.433420173369272, + "learning_rate": 1.6587163284813034e-05, + "loss": 0.3675, "step": 6378 }, { - "epoch": 0.37, - "grad_norm": 0.28185065914961815, - "learning_rate": 1.4627066373927534e-05, - "loss": 0.1703, + "epoch": 0.29, + "grad_norm": 0.7254860163934584, + "learning_rate": 1.6586043714290946e-05, + "loss": 0.4435, "step": 6379 }, { - "epoch": 0.37, - "grad_norm": 0.5918352536787485, - "learning_rate": 1.4625416573473397e-05, - "loss": 0.2241, + "epoch": 0.29, + "grad_norm": 0.32955721812052813, + "learning_rate": 1.6584923997960196e-05, + "loss": 0.2515, "step": 6380 }, { - "epoch": 0.37, - "grad_norm": 0.2945602690656717, - "learning_rate": 1.4623766612841286e-05, - "loss": 0.2792, + "epoch": 0.29, + "grad_norm": 1.0185057360039, + "learning_rate": 1.6583804135845582e-05, + "loss": 0.7566, "step": 6381 }, { - "epoch": 0.37, - "grad_norm": 1.245141698601791, - "learning_rate": 1.4622116492088335e-05, - "loss": 0.4757, + "epoch": 0.29, + "grad_norm": 0.39712767852618336, + "learning_rate": 1.658268412797188e-05, + "loss": 0.3063, "step": 6382 }, { - "epoch": 0.37, - "grad_norm": 0.7575767671387406, - "learning_rate": 1.4620466211271686e-05, - "loss": 0.4512, + "epoch": 0.29, + "grad_norm": 0.38838602511710074, + "learning_rate": 1.6581563974363903e-05, + "loss": 0.2947, "step": 6383 }, { - "epoch": 0.37, - "grad_norm": 0.42239162484195003, - "learning_rate": 1.4618815770448493e-05, - "loss": 0.2349, + "epoch": 0.29, + "grad_norm": 0.37996672965524714, + "learning_rate": 1.658044367504644e-05, + "loss": 0.1405, "step": 6384 }, { - "epoch": 0.37, - "grad_norm": 0.28014002852446207, - "learning_rate": 1.461716516967591e-05, - "loss": 0.2347, + "epoch": 0.29, + "grad_norm": 0.8026404437762646, + "learning_rate": 1.65793232300443e-05, + "loss": 0.3809, "step": 6385 }, { - "epoch": 0.37, - "grad_norm": 0.792038715756322, - "learning_rate": 1.4615514409011093e-05, - "loss": 0.4452, + "epoch": 0.29, + "grad_norm": 0.320474651717984, + "learning_rate": 1.6578202639382284e-05, + "loss": 0.2801, "step": 6386 }, { - "epoch": 0.37, - "grad_norm": 0.7042697472461529, - "learning_rate": 1.4613863488511214e-05, - "loss": 0.2951, + "epoch": 0.29, + "grad_norm": 1.0708099246273617, + "learning_rate": 1.65770819030852e-05, + "loss": 0.5058, "step": 6387 }, { - "epoch": 0.37, - "grad_norm": 1.090984118378016, - "learning_rate": 1.4612212408233438e-05, - "loss": 0.5841, + "epoch": 0.29, + "grad_norm": 0.7274830765276327, + "learning_rate": 1.6575961021177862e-05, + "loss": 0.4803, "step": 6388 }, { - "epoch": 0.37, - "grad_norm": 0.3318163237944138, - "learning_rate": 1.4610561168234942e-05, - "loss": 0.3015, + "epoch": 0.29, + "grad_norm": 0.2751653610238452, + "learning_rate": 1.6574839993685087e-05, + "loss": 0.2154, "step": 6389 }, { - "epoch": 0.37, - "grad_norm": 0.36470561968264775, - "learning_rate": 1.4608909768572917e-05, - "loss": 0.2485, + "epoch": 0.29, + "grad_norm": 0.5314590233336374, + "learning_rate": 1.657371882063169e-05, + "loss": 0.2834, "step": 6390 }, { - "epoch": 0.37, - "grad_norm": 0.5483411965792786, - "learning_rate": 1.4607258209304542e-05, - "loss": 0.2347, + "epoch": 0.29, + "grad_norm": 1.30410068246301, + "learning_rate": 1.6572597502042492e-05, + "loss": 0.4119, "step": 6391 }, { - "epoch": 0.37, - "grad_norm": 0.4024074470010744, - "learning_rate": 1.4605606490487013e-05, - "loss": 0.2603, + "epoch": 0.29, + "grad_norm": 0.3833280788467357, + "learning_rate": 1.6571476037942322e-05, + "loss": 0.2891, "step": 6392 }, { - "epoch": 0.37, - "grad_norm": 0.3917554003411267, - "learning_rate": 1.4603954612177532e-05, - "loss": 0.231, + "epoch": 0.29, + "grad_norm": 1.000137740200687, + "learning_rate": 1.6570354428356007e-05, + "loss": 0.5199, "step": 6393 }, { - "epoch": 0.37, - "grad_norm": 1.2772938766587616, - "learning_rate": 1.4602302574433297e-05, - "loss": 0.486, + "epoch": 0.29, + "grad_norm": 0.36254230672963805, + "learning_rate": 1.6569232673308375e-05, + "loss": 0.3096, "step": 6394 }, { - "epoch": 0.37, - "grad_norm": 0.6668301917465416, - "learning_rate": 1.4600650377311523e-05, - "loss": 0.4474, + "epoch": 0.29, + "grad_norm": 0.5080913992817924, + "learning_rate": 1.6568110772824264e-05, + "loss": 0.3477, "step": 6395 }, { - "epoch": 0.37, - "grad_norm": 0.39712967225015333, - "learning_rate": 1.4598998020869426e-05, - "loss": 0.3148, + "epoch": 0.29, + "grad_norm": 0.5572110534200213, + "learning_rate": 1.6566988726928513e-05, + "loss": 0.263, "step": 6396 }, { - "epoch": 0.37, - "grad_norm": 0.28444972237384936, - "learning_rate": 1.4597345505164222e-05, - "loss": 0.218, + "epoch": 0.29, + "grad_norm": 1.1878365645437057, + "learning_rate": 1.6565866535645957e-05, + "loss": 0.3918, "step": 6397 }, { - "epoch": 0.37, - "grad_norm": 0.5753183483183651, - "learning_rate": 1.4595692830253143e-05, - "loss": 0.2875, + "epoch": 0.29, + "grad_norm": 0.3527137301841216, + "learning_rate": 1.6564744199001447e-05, + "loss": 0.2709, "step": 6398 }, { - "epoch": 0.37, - "grad_norm": 0.5209561480298429, - "learning_rate": 1.459403999619342e-05, - "loss": 0.3033, + "epoch": 0.29, + "grad_norm": 0.5945591121654509, + "learning_rate": 1.656362171701983e-05, + "loss": 0.439, "step": 6399 }, { - "epoch": 0.37, - "grad_norm": 0.47041709352127614, - "learning_rate": 1.4592387003042287e-05, - "loss": 0.2843, + "epoch": 0.29, + "grad_norm": 1.002199503429373, + "learning_rate": 1.656249908972595e-05, + "loss": 0.2774, "step": 6400 }, { - "epoch": 0.37, - "grad_norm": 1.4217366843048336, - "learning_rate": 1.4590733850856989e-05, - "loss": 0.7602, + "epoch": 0.29, + "grad_norm": 0.4818106349507305, + "learning_rate": 1.6561376317144668e-05, + "loss": 0.299, "step": 6401 }, { - "epoch": 0.37, - "grad_norm": 0.3419329387584038, - "learning_rate": 1.4589080539694778e-05, - "loss": 0.2929, + "epoch": 0.29, + "grad_norm": 0.7519456610443557, + "learning_rate": 1.6560253399300838e-05, + "loss": 0.371, "step": 6402 }, { - "epoch": 0.37, - "grad_norm": 0.6029781463091936, - "learning_rate": 1.4587427069612902e-05, - "loss": 0.3747, + "epoch": 0.29, + "grad_norm": 0.30058306025878523, + "learning_rate": 1.6559130336219317e-05, + "loss": 0.126, "step": 6403 }, { - "epoch": 0.37, - "grad_norm": 0.3860877146598658, - "learning_rate": 1.4585773440668626e-05, - "loss": 0.3044, + "epoch": 0.29, + "grad_norm": 0.41400588950928835, + "learning_rate": 1.655800712792498e-05, + "loss": 0.3191, "step": 6404 }, { - "epoch": 0.37, - "grad_norm": 0.3332149808196481, - "learning_rate": 1.4584119652919213e-05, - "loss": 0.2862, + "epoch": 0.29, + "grad_norm": 1.3828559546417385, + "learning_rate": 1.6556883774442675e-05, + "loss": 0.781, "step": 6405 }, { - "epoch": 0.37, - "grad_norm": 0.2517107564009192, - "learning_rate": 1.4582465706421935e-05, - "loss": 0.1132, + "epoch": 0.29, + "grad_norm": 0.38337491252087164, + "learning_rate": 1.655576027579729e-05, + "loss": 0.2558, "step": 6406 }, { - "epoch": 0.37, - "grad_norm": 0.9180317484308079, - "learning_rate": 1.4580811601234067e-05, - "loss": 0.55, + "epoch": 0.29, + "grad_norm": 0.36259767929482484, + "learning_rate": 1.6554636632013692e-05, + "loss": 0.2837, "step": 6407 }, { - "epoch": 0.37, - "grad_norm": 0.4199511381911743, - "learning_rate": 1.4579157337412886e-05, - "loss": 0.2734, + "epoch": 0.29, + "grad_norm": 0.7652063219605066, + "learning_rate": 1.655351284311675e-05, + "loss": 0.4721, "step": 6408 }, { - "epoch": 0.37, - "grad_norm": 0.40497014891679506, - "learning_rate": 1.4577502915015687e-05, - "loss": 0.3179, + "epoch": 0.29, + "grad_norm": 0.3870541204371109, + "learning_rate": 1.6552388909131357e-05, + "loss": 0.2264, "step": 6409 }, { - "epoch": 0.37, - "grad_norm": 0.7870272373752119, - "learning_rate": 1.4575848334099756e-05, - "loss": 0.3261, + "epoch": 0.29, + "grad_norm": 0.43873263787672284, + "learning_rate": 1.6551264830082385e-05, + "loss": 0.2093, "step": 6410 }, { - "epoch": 0.37, - "grad_norm": 0.30582763377378736, - "learning_rate": 1.4574193594722394e-05, - "loss": 0.256, + "epoch": 0.29, + "grad_norm": 0.7432212206213976, + "learning_rate": 1.655014060599473e-05, + "loss": 0.4335, "step": 6411 }, { - "epoch": 0.37, - "grad_norm": 0.3369040954406996, - "learning_rate": 1.4572538696940908e-05, - "loss": 0.2748, + "epoch": 0.29, + "grad_norm": 0.9396366879479621, + "learning_rate": 1.6549016236893263e-05, + "loss": 0.5101, "step": 6412 }, { - "epoch": 0.37, - "grad_norm": 0.36361392670595094, - "learning_rate": 1.4570883640812602e-05, - "loss": 0.2604, + "epoch": 0.29, + "grad_norm": 0.34664505583475064, + "learning_rate": 1.6547891722802897e-05, + "loss": 0.2074, "step": 6413 }, { - "epoch": 0.37, - "grad_norm": 0.3875128108510935, - "learning_rate": 1.456922842639479e-05, - "loss": 0.2852, + "epoch": 0.29, + "grad_norm": 0.36792639608171956, + "learning_rate": 1.6546767063748518e-05, + "loss": 0.3533, "step": 6414 }, { - "epoch": 0.37, - "grad_norm": 0.8988967262988671, - "learning_rate": 1.4567573053744797e-05, - "loss": 0.4492, + "epoch": 0.29, + "grad_norm": 0.30901995119216663, + "learning_rate": 1.6545642259755025e-05, + "loss": 0.1517, "step": 6415 }, { - "epoch": 0.37, - "grad_norm": 0.40165688387675075, - "learning_rate": 1.4565917522919944e-05, - "loss": 0.2684, + "epoch": 0.29, + "grad_norm": 0.34018907227089984, + "learning_rate": 1.6544517310847323e-05, + "loss": 0.2118, "step": 6416 }, { - "epoch": 0.37, - "grad_norm": 0.38594474836512377, - "learning_rate": 1.4564261833977563e-05, - "loss": 0.3048, + "epoch": 0.29, + "grad_norm": 0.5484774262032054, + "learning_rate": 1.6543392217050312e-05, + "loss": 0.3989, "step": 6417 }, { - "epoch": 0.37, - "grad_norm": 0.2507980036522095, - "learning_rate": 1.4562605986974991e-05, - "loss": 0.194, + "epoch": 0.29, + "grad_norm": 0.5907915075173672, + "learning_rate": 1.6542266978388905e-05, + "loss": 0.354, "step": 6418 }, { - "epoch": 0.37, - "grad_norm": 0.872875765589793, - "learning_rate": 1.456094998196957e-05, - "loss": 0.4096, + "epoch": 0.29, + "grad_norm": 0.426750177669408, + "learning_rate": 1.6541141594888016e-05, + "loss": 0.2794, "step": 6419 }, { - "epoch": 0.37, - "grad_norm": 0.358181410493378, - "learning_rate": 1.4559293819018648e-05, - "loss": 0.2836, + "epoch": 0.29, + "grad_norm": 0.7001037294796925, + "learning_rate": 1.6540016066572552e-05, + "loss": 0.3857, "step": 6420 }, { - "epoch": 0.37, - "grad_norm": 0.45407158678411264, - "learning_rate": 1.4557637498179577e-05, - "loss": 0.346, + "epoch": 0.29, + "grad_norm": 0.24729124452153697, + "learning_rate": 1.653889039346744e-05, + "loss": 0.1918, "step": 6421 }, { - "epoch": 0.37, - "grad_norm": 0.9255722275983588, - "learning_rate": 1.4555981019509716e-05, - "loss": 0.5238, + "epoch": 0.3, + "grad_norm": 0.3373098248567087, + "learning_rate": 1.6537764575597595e-05, + "loss": 0.2745, "step": 6422 }, { - "epoch": 0.37, - "grad_norm": 0.29691564293618045, - "learning_rate": 1.4554324383066427e-05, - "loss": 0.2093, + "epoch": 0.3, + "grad_norm": 1.151014603013369, + "learning_rate": 1.6536638612987943e-05, + "loss": 0.5794, "step": 6423 }, { - "epoch": 0.37, - "grad_norm": 0.4785704057795955, - "learning_rate": 1.4552667588907082e-05, - "loss": 0.3371, + "epoch": 0.3, + "grad_norm": 0.8049619141049491, + "learning_rate": 1.6535512505663413e-05, + "loss": 0.4705, "step": 6424 }, { - "epoch": 0.37, - "grad_norm": 0.2952211128804953, - "learning_rate": 1.4551010637089054e-05, - "loss": 0.2773, + "epoch": 0.3, + "grad_norm": 0.3490047826268383, + "learning_rate": 1.6534386253648933e-05, + "loss": 0.2827, "step": 6425 }, { - "epoch": 0.37, - "grad_norm": 0.36035741477242134, - "learning_rate": 1.454935352766972e-05, - "loss": 0.2268, + "epoch": 0.3, + "grad_norm": 0.427358246409588, + "learning_rate": 1.6533259856969444e-05, + "loss": 0.2638, "step": 6426 }, { - "epoch": 0.37, - "grad_norm": 0.8603431400721523, - "learning_rate": 1.4547696260706474e-05, - "loss": 0.4749, + "epoch": 0.3, + "grad_norm": 0.4880011568089692, + "learning_rate": 1.653213331564987e-05, + "loss": 0.296, "step": 6427 }, { - "epoch": 0.37, - "grad_norm": 0.45518616624458275, - "learning_rate": 1.4546038836256698e-05, - "loss": 0.3162, + "epoch": 0.3, + "grad_norm": 0.4028054990303717, + "learning_rate": 1.653100662971517e-05, + "loss": 0.2922, "step": 6428 }, { - "epoch": 0.37, - "grad_norm": 0.32397378870781374, - "learning_rate": 1.4544381254377794e-05, - "loss": 0.2011, + "epoch": 0.3, + "grad_norm": 0.36823458657292246, + "learning_rate": 1.652987979919027e-05, + "loss": 0.201, "step": 6429 }, { - "epoch": 0.37, - "grad_norm": 0.48798619337408167, - "learning_rate": 1.4542723515127161e-05, - "loss": 0.2987, + "epoch": 0.3, + "grad_norm": 0.7151789518021233, + "learning_rate": 1.6528752824100132e-05, + "loss": 0.3985, "step": 6430 }, { - "epoch": 0.37, - "grad_norm": 0.46757065365840805, - "learning_rate": 1.4541065618562211e-05, - "loss": 0.301, + "epoch": 0.3, + "grad_norm": 0.4727870770840873, + "learning_rate": 1.6527625704469695e-05, + "loss": 0.3021, "step": 6431 }, { - "epoch": 0.37, - "grad_norm": 0.39111537126054996, - "learning_rate": 1.4539407564740353e-05, - "loss": 0.2013, + "epoch": 0.3, + "grad_norm": 0.5097091756319991, + "learning_rate": 1.6526498440323914e-05, + "loss": 0.3732, "step": 6432 }, { - "epoch": 0.37, - "grad_norm": 0.4772229613888993, - "learning_rate": 1.4537749353719006e-05, - "loss": 0.3447, + "epoch": 0.3, + "grad_norm": 0.39534392625997816, + "learning_rate": 1.652537103168775e-05, + "loss": 0.2969, "step": 6433 }, { - "epoch": 0.37, - "grad_norm": 0.7128445538916925, - "learning_rate": 1.4536090985555595e-05, - "loss": 0.4374, + "epoch": 0.3, + "grad_norm": 0.3812483646063325, + "learning_rate": 1.6524243478586163e-05, + "loss": 0.2739, "step": 6434 }, { - "epoch": 0.37, - "grad_norm": 0.37996740838238047, - "learning_rate": 1.4534432460307546e-05, - "loss": 0.284, + "epoch": 0.3, + "grad_norm": 0.37011732969299765, + "learning_rate": 1.6523115781044112e-05, + "loss": 0.313, "step": 6435 }, { - "epoch": 0.37, - "grad_norm": 0.3521223528144348, - "learning_rate": 1.4532773778032297e-05, - "loss": 0.2757, + "epoch": 0.3, + "grad_norm": 0.7606936117525245, + "learning_rate": 1.6521987939086563e-05, + "loss": 0.3745, "step": 6436 }, { - "epoch": 0.37, - "grad_norm": 0.4727501869932283, - "learning_rate": 1.4531114938787285e-05, - "loss": 0.3093, + "epoch": 0.3, + "grad_norm": 0.29443869151396307, + "learning_rate": 1.652085995273849e-05, + "loss": 0.2171, "step": 6437 }, { - "epoch": 0.37, - "grad_norm": 0.34669175481499864, - "learning_rate": 1.452945594262996e-05, - "loss": 0.2832, + "epoch": 0.3, + "grad_norm": 0.3920016536379928, + "learning_rate": 1.651973182202486e-05, + "loss": 0.3335, "step": 6438 }, { - "epoch": 0.37, - "grad_norm": 0.3986170161984597, - "learning_rate": 1.452779678961777e-05, - "loss": 0.2385, + "epoch": 0.3, + "grad_norm": 0.9988018325901118, + "learning_rate": 1.6518603546970655e-05, + "loss": 0.2928, "step": 6439 }, { - "epoch": 0.37, - "grad_norm": 0.37509714723728876, - "learning_rate": 1.4526137479808173e-05, - "loss": 0.3058, + "epoch": 0.3, + "grad_norm": 0.405660531891023, + "learning_rate": 1.6517475127600845e-05, + "loss": 0.3, "step": 6440 }, { - "epoch": 0.37, - "grad_norm": 0.3784935823336974, - "learning_rate": 1.452447801325863e-05, - "loss": 0.2805, + "epoch": 0.3, + "grad_norm": 0.9319795140839109, + "learning_rate": 1.651634656394042e-05, + "loss": 0.6358, "step": 6441 }, { - "epoch": 0.37, - "grad_norm": 0.9648337594394374, - "learning_rate": 1.4522818390026605e-05, - "loss": 0.4174, + "epoch": 0.3, + "grad_norm": 0.3085834874736504, + "learning_rate": 1.651521785601436e-05, + "loss": 0.2628, "step": 6442 }, { - "epoch": 0.37, - "grad_norm": 0.36332205508671817, - "learning_rate": 1.4521158610169575e-05, - "loss": 0.3118, + "epoch": 0.3, + "grad_norm": 0.31520767514972325, + "learning_rate": 1.651408900384766e-05, + "loss": 0.2112, "step": 6443 }, { - "epoch": 0.37, - "grad_norm": 0.3043988410895945, - "learning_rate": 1.4519498673745019e-05, - "loss": 0.2756, + "epoch": 0.3, + "grad_norm": 1.3959477785880945, + "learning_rate": 1.6512960007465304e-05, + "loss": 0.57, "step": 6444 }, { - "epoch": 0.37, - "grad_norm": 0.3661883748618258, - "learning_rate": 1.4517838580810413e-05, - "loss": 0.2444, + "epoch": 0.3, + "grad_norm": 0.4420944524791979, + "learning_rate": 1.6511830866892294e-05, + "loss": 0.3302, "step": 6445 }, { - "epoch": 0.37, - "grad_norm": 0.8097337394192086, - "learning_rate": 1.4516178331423257e-05, - "loss": 0.576, + "epoch": 0.3, + "grad_norm": 0.382169930279093, + "learning_rate": 1.6510701582153622e-05, + "loss": 0.2294, "step": 6446 }, { - "epoch": 0.37, - "grad_norm": 0.33163644933954645, - "learning_rate": 1.4514517925641035e-05, - "loss": 0.3033, + "epoch": 0.3, + "grad_norm": 0.8556709441391702, + "learning_rate": 1.6509572153274293e-05, + "loss": 0.6459, "step": 6447 }, { - "epoch": 0.37, - "grad_norm": 0.5218645713887373, - "learning_rate": 1.4512857363521254e-05, - "loss": 0.393, + "epoch": 0.3, + "grad_norm": 0.5330859134336606, + "learning_rate": 1.6508442580279315e-05, + "loss": 0.3381, "step": 6448 }, { - "epoch": 0.37, - "grad_norm": 0.3793598909180065, - "learning_rate": 1.4511196645121414e-05, - "loss": 0.319, + "epoch": 0.3, + "grad_norm": 0.2722344894132027, + "learning_rate": 1.6507312863193685e-05, + "loss": 0.1474, "step": 6449 }, { - "epoch": 0.37, - "grad_norm": 0.3896431397236735, - "learning_rate": 1.450953577049903e-05, - "loss": 0.2771, + "epoch": 0.3, + "grad_norm": 0.4617405629559114, + "learning_rate": 1.650618300204242e-05, + "loss": 0.3043, "step": 6450 }, { - "epoch": 0.37, - "grad_norm": 0.2705214298693227, - "learning_rate": 1.4507874739711616e-05, - "loss": 0.2298, + "epoch": 0.3, + "grad_norm": 1.138138607850186, + "learning_rate": 1.6505052996850542e-05, + "loss": 0.5973, "step": 6451 }, { - "epoch": 0.37, - "grad_norm": 0.34851249948794466, - "learning_rate": 1.4506213552816694e-05, - "loss": 0.2555, + "epoch": 0.3, + "grad_norm": 0.42641339499950154, + "learning_rate": 1.650392284764305e-05, + "loss": 0.19, "step": 6452 }, { - "epoch": 0.37, - "grad_norm": 0.39110676315148374, - "learning_rate": 1.4504552209871791e-05, - "loss": 0.2659, + "epoch": 0.3, + "grad_norm": 0.4647623875488781, + "learning_rate": 1.650279255444498e-05, + "loss": 0.3553, "step": 6453 }, { - "epoch": 0.37, - "grad_norm": 0.5909154208429732, - "learning_rate": 1.4502890710934438e-05, - "loss": 0.4545, + "epoch": 0.3, + "grad_norm": 0.5271020832430143, + "learning_rate": 1.6501662117281352e-05, + "loss": 0.4169, "step": 6454 }, { - "epoch": 0.37, - "grad_norm": 0.8661006074737951, - "learning_rate": 1.4501229056062174e-05, - "loss": 0.36, + "epoch": 0.3, + "grad_norm": 0.2699697380530017, + "learning_rate": 1.650053153617719e-05, + "loss": 0.1342, "step": 6455 }, { - "epoch": 0.37, - "grad_norm": 0.2980737565737271, - "learning_rate": 1.449956724531254e-05, - "loss": 0.2735, + "epoch": 0.3, + "grad_norm": 0.5867352927487277, + "learning_rate": 1.649940081115752e-05, + "loss": 0.4223, "step": 6456 }, { - "epoch": 0.37, - "grad_norm": 0.37860627191593116, - "learning_rate": 1.4497905278743086e-05, - "loss": 0.3064, + "epoch": 0.3, + "grad_norm": 0.4168908015698718, + "learning_rate": 1.649826994224739e-05, + "loss": 0.3353, "step": 6457 }, { - "epoch": 0.37, - "grad_norm": 0.4986638746583317, - "learning_rate": 1.4496243156411367e-05, - "loss": 0.3235, + "epoch": 0.3, + "grad_norm": 0.4440769195248618, + "learning_rate": 1.649713892947182e-05, + "loss": 0.3082, "step": 6458 }, { - "epoch": 0.37, - "grad_norm": 0.36881140079123487, - "learning_rate": 1.4494580878374942e-05, - "loss": 0.2306, + "epoch": 0.3, + "grad_norm": 0.546380066666568, + "learning_rate": 1.6496007772855855e-05, + "loss": 0.3323, "step": 6459 }, { - "epoch": 0.37, - "grad_norm": 0.4396370739515429, - "learning_rate": 1.4492918444691371e-05, - "loss": 0.3544, + "epoch": 0.3, + "grad_norm": 0.927394827000011, + "learning_rate": 1.649487647242454e-05, + "loss": 0.5915, "step": 6460 }, { - "epoch": 0.37, - "grad_norm": 1.4881242453846941, - "learning_rate": 1.4491255855418228e-05, - "loss": 0.4526, + "epoch": 0.3, + "grad_norm": 0.3085757473844876, + "learning_rate": 1.649374502820292e-05, + "loss": 0.2627, "step": 6461 }, { - "epoch": 0.37, - "grad_norm": 0.31509315415532924, - "learning_rate": 1.4489593110613087e-05, - "loss": 0.2344, + "epoch": 0.3, + "grad_norm": 0.32810136978048665, + "learning_rate": 1.6492613440216044e-05, + "loss": 0.201, "step": 6462 }, { - "epoch": 0.37, - "grad_norm": 0.4766555392210357, - "learning_rate": 1.4487930210333532e-05, - "loss": 0.29, + "epoch": 0.3, + "grad_norm": 1.134202196600778, + "learning_rate": 1.6491481708488963e-05, + "loss": 0.5609, "step": 6463 }, { - "epoch": 0.37, - "grad_norm": 0.3242238172436043, - "learning_rate": 1.4486267154637146e-05, - "loss": 0.2684, + "epoch": 0.3, + "grad_norm": 0.5712556717635934, + "learning_rate": 1.6490349833046737e-05, + "loss": 0.3109, "step": 6464 }, { - "epoch": 0.37, - "grad_norm": 0.38454949832911445, - "learning_rate": 1.4484603943581523e-05, - "loss": 0.1761, + "epoch": 0.3, + "grad_norm": 0.3671115764477214, + "learning_rate": 1.648921781391442e-05, + "loss": 0.282, "step": 6465 }, { - "epoch": 0.37, - "grad_norm": 0.9162557910018109, - "learning_rate": 1.4482940577224254e-05, - "loss": 0.4149, + "epoch": 0.3, + "grad_norm": 0.5454346246915641, + "learning_rate": 1.6488085651117074e-05, + "loss": 0.392, "step": 6466 }, { - "epoch": 0.37, - "grad_norm": 0.9944619174618472, - "learning_rate": 1.4481277055622948e-05, - "loss": 0.3897, + "epoch": 0.3, + "grad_norm": 0.6252389708864353, + "learning_rate": 1.6486953344679764e-05, + "loss": 0.3289, "step": 6467 }, { - "epoch": 0.37, - "grad_norm": 0.3133210801429484, - "learning_rate": 1.4479613378835211e-05, - "loss": 0.2176, + "epoch": 0.3, + "grad_norm": 0.2545009118485677, + "learning_rate": 1.648582089462756e-05, + "loss": 0.1773, "step": 6468 }, { - "epoch": 0.37, - "grad_norm": 0.49623665028789815, - "learning_rate": 1.4477949546918655e-05, - "loss": 0.4053, + "epoch": 0.3, + "grad_norm": 0.4031403885745529, + "learning_rate": 1.6484688300985533e-05, + "loss": 0.3279, "step": 6469 }, { - "epoch": 0.37, - "grad_norm": 0.28227034323549527, - "learning_rate": 1.4476285559930899e-05, - "loss": 0.1809, + "epoch": 0.3, + "grad_norm": 0.6124678660679872, + "learning_rate": 1.6483555563778756e-05, + "loss": 0.3322, "step": 6470 }, { - "epoch": 0.37, - "grad_norm": 0.5943690582874419, - "learning_rate": 1.4474621417929566e-05, - "loss": 0.2999, + "epoch": 0.3, + "grad_norm": 0.43316173994258067, + "learning_rate": 1.6482422683032312e-05, + "loss": 0.3449, "step": 6471 }, { - "epoch": 0.37, - "grad_norm": 0.4368804350447025, - "learning_rate": 1.4472957120972284e-05, - "loss": 0.2854, + "epoch": 0.3, + "grad_norm": 0.9208136504046879, + "learning_rate": 1.648128965877127e-05, + "loss": 0.4879, "step": 6472 }, { - "epoch": 0.37, - "grad_norm": 1.0839653605743804, - "learning_rate": 1.4471292669116692e-05, - "loss": 0.4932, + "epoch": 0.3, + "grad_norm": 0.3417605253579729, + "learning_rate": 1.648015649102073e-05, + "loss": 0.2739, "step": 6473 }, { - "epoch": 0.37, - "grad_norm": 0.6103092418262205, - "learning_rate": 1.4469628062420427e-05, - "loss": 0.3787, + "epoch": 0.3, + "grad_norm": 0.2816719511279857, + "learning_rate": 1.6479023179805764e-05, + "loss": 0.1916, "step": 6474 }, { - "epoch": 0.37, - "grad_norm": 0.2815049852060284, - "learning_rate": 1.4467963300941135e-05, - "loss": 0.1699, + "epoch": 0.3, + "grad_norm": 0.6813936846086502, + "learning_rate": 1.6477889725151476e-05, + "loss": 0.3733, "step": 6475 }, { - "epoch": 0.37, - "grad_norm": 0.42956074118826143, - "learning_rate": 1.4466298384736463e-05, - "loss": 0.32, + "epoch": 0.3, + "grad_norm": 0.5448978403851198, + "learning_rate": 1.6476756127082946e-05, + "loss": 0.3566, "step": 6476 }, { - "epoch": 0.37, - "grad_norm": 0.42963144754338745, - "learning_rate": 1.4464633313864073e-05, - "loss": 0.2869, + "epoch": 0.3, + "grad_norm": 0.36469015522245996, + "learning_rate": 1.6475622385625284e-05, + "loss": 0.3183, "step": 6477 }, { - "epoch": 0.37, - "grad_norm": 0.5424138447706254, - "learning_rate": 1.4462968088381621e-05, - "loss": 0.327, + "epoch": 0.3, + "grad_norm": 0.4750408310436837, + "learning_rate": 1.6474488500803578e-05, + "loss": 0.3073, "step": 6478 }, { - "epoch": 0.37, - "grad_norm": 0.5458547157279315, - "learning_rate": 1.4461302708346778e-05, - "loss": 0.3884, + "epoch": 0.3, + "grad_norm": 0.4697146339811533, + "learning_rate": 1.647335447264294e-05, + "loss": 0.3323, "step": 6479 }, { - "epoch": 0.37, - "grad_norm": 0.32194605311238567, - "learning_rate": 1.4459637173817214e-05, - "loss": 0.2684, + "epoch": 0.3, + "grad_norm": 0.2832133721004585, + "learning_rate": 1.6472220301168473e-05, + "loss": 0.204, "step": 6480 }, { - "epoch": 0.37, - "grad_norm": 0.4947001893698327, - "learning_rate": 1.4457971484850604e-05, - "loss": 0.2686, + "epoch": 0.3, + "grad_norm": 0.4753356605456584, + "learning_rate": 1.6471085986405288e-05, + "loss": 0.2802, "step": 6481 }, { - "epoch": 0.37, - "grad_norm": 0.30792978417290967, - "learning_rate": 1.4456305641504636e-05, - "loss": 0.1929, + "epoch": 0.3, + "grad_norm": 0.6608626181946445, + "learning_rate": 1.646995152837849e-05, + "loss": 0.3333, "step": 6482 }, { - "epoch": 0.37, - "grad_norm": 0.39304218999952134, - "learning_rate": 1.4454639643836993e-05, - "loss": 0.3088, + "epoch": 0.3, + "grad_norm": 0.42900784324380353, + "learning_rate": 1.6468816927113205e-05, + "loss": 0.329, "step": 6483 }, { - "epoch": 0.37, - "grad_norm": 0.350822152919814, - "learning_rate": 1.4452973491905372e-05, - "loss": 0.3195, + "epoch": 0.3, + "grad_norm": 0.9741182913532257, + "learning_rate": 1.646768218263455e-05, + "loss": 0.6685, "step": 6484 }, { - "epoch": 0.37, - "grad_norm": 0.7457583251157986, - "learning_rate": 1.4451307185767469e-05, - "loss": 0.4278, + "epoch": 0.3, + "grad_norm": 0.39421012050696175, + "learning_rate": 1.6466547294967644e-05, + "loss": 0.2342, "step": 6485 }, { - "epoch": 0.37, - "grad_norm": 0.9576714232110599, - "learning_rate": 1.4449640725480991e-05, - "loss": 0.471, + "epoch": 0.3, + "grad_norm": 0.273922438912459, + "learning_rate": 1.6465412264137612e-05, + "loss": 0.1913, "step": 6486 }, { - "epoch": 0.37, - "grad_norm": 0.38426895057800664, - "learning_rate": 1.4447974111103645e-05, - "loss": 0.285, + "epoch": 0.3, + "grad_norm": 1.0887137023245104, + "learning_rate": 1.6464277090169583e-05, + "loss": 0.4488, "step": 6487 }, { - "epoch": 0.37, - "grad_norm": 0.22401458190615092, - "learning_rate": 1.4446307342693149e-05, - "loss": 0.2138, + "epoch": 0.3, + "grad_norm": 0.4291051285151722, + "learning_rate": 1.6463141773088693e-05, + "loss": 0.1869, "step": 6488 }, { - "epoch": 0.37, - "grad_norm": 0.5444407977701945, - "learning_rate": 1.4444640420307217e-05, - "loss": 0.319, + "epoch": 0.3, + "grad_norm": 0.3155116737247999, + "learning_rate": 1.6462006312920072e-05, + "loss": 0.2864, "step": 6489 }, { - "epoch": 0.37, - "grad_norm": 0.4500081741351894, - "learning_rate": 1.4442973344003582e-05, - "loss": 0.3505, + "epoch": 0.3, + "grad_norm": 1.5978708768858632, + "learning_rate": 1.646087070968886e-05, + "loss": 0.7105, "step": 6490 }, { - "epoch": 0.37, - "grad_norm": 1.0057233925466538, - "learning_rate": 1.4441306113839971e-05, - "loss": 0.5737, + "epoch": 0.3, + "grad_norm": 0.4810588920180695, + "learning_rate": 1.6459734963420194e-05, + "loss": 0.2244, "step": 6491 }, { - "epoch": 0.37, - "grad_norm": 0.34070622377314463, - "learning_rate": 1.4439638729874119e-05, - "loss": 0.2789, + "epoch": 0.3, + "grad_norm": 0.43802119282501034, + "learning_rate": 1.6458599074139224e-05, + "loss": 0.2635, "step": 6492 }, { - "epoch": 0.37, - "grad_norm": 0.39274503048589104, - "learning_rate": 1.4437971192163768e-05, - "loss": 0.3363, + "epoch": 0.3, + "grad_norm": 0.5200842674695335, + "learning_rate": 1.6457463041871097e-05, + "loss": 0.3478, "step": 6493 }, { - "epoch": 0.37, - "grad_norm": 0.260970503088254, - "learning_rate": 1.4436303500766667e-05, - "loss": 0.139, + "epoch": 0.3, + "grad_norm": 0.3939064074560006, + "learning_rate": 1.645632686664096e-05, + "loss": 0.1431, "step": 6494 }, { - "epoch": 0.37, - "grad_norm": 0.4353457494525623, - "learning_rate": 1.4434635655740566e-05, - "loss": 0.3152, + "epoch": 0.3, + "grad_norm": 0.5030782381319365, + "learning_rate": 1.645519054847397e-05, + "loss": 0.3983, "step": 6495 }, { - "epoch": 0.37, - "grad_norm": 0.36338683442474146, - "learning_rate": 1.4432967657143223e-05, - "loss": 0.3243, + "epoch": 0.3, + "grad_norm": 1.6266526676861548, + "learning_rate": 1.6454054087395284e-05, + "loss": 0.8158, "step": 6496 }, { - "epoch": 0.37, - "grad_norm": 1.4910761065746299, - "learning_rate": 1.44312995050324e-05, - "loss": 0.8249, + "epoch": 0.3, + "grad_norm": 0.366753809075817, + "learning_rate": 1.6452917483430058e-05, + "loss": 0.3035, "step": 6497 }, { - "epoch": 0.37, - "grad_norm": 0.3636983831741691, - "learning_rate": 1.4429631199465866e-05, - "loss": 0.2165, + "epoch": 0.3, + "grad_norm": 0.43293325953343664, + "learning_rate": 1.645178073660346e-05, + "loss": 0.2439, "step": 6498 }, { - "epoch": 0.37, - "grad_norm": 0.6184276802434224, - "learning_rate": 1.4427962740501396e-05, - "loss": 0.4225, + "epoch": 0.3, + "grad_norm": 0.4292164680054889, + "learning_rate": 1.6450643846940653e-05, + "loss": 0.3375, "step": 6499 }, { - "epoch": 0.37, - "grad_norm": 0.4139216454007609, - "learning_rate": 1.4426294128196763e-05, - "loss": 0.3305, + "epoch": 0.3, + "grad_norm": 0.3329224444778718, + "learning_rate": 1.6449506814466813e-05, + "loss": 0.1993, "step": 6500 }, { - "epoch": 0.37, - "grad_norm": 0.49001496573354486, - "learning_rate": 1.4424625362609757e-05, - "loss": 0.2682, + "epoch": 0.3, + "grad_norm": 0.41734160041646134, + "learning_rate": 1.6448369639207103e-05, + "loss": 0.2622, "step": 6501 }, { - "epoch": 0.37, - "grad_norm": 0.27947362207932114, - "learning_rate": 1.4422956443798165e-05, - "loss": 0.2087, + "epoch": 0.3, + "grad_norm": 1.34362989409153, + "learning_rate": 1.6447232321186706e-05, + "loss": 0.7333, "step": 6502 }, { - "epoch": 0.37, - "grad_norm": 0.5741699302860873, - "learning_rate": 1.4421287371819781e-05, - "loss": 0.4186, + "epoch": 0.3, + "grad_norm": 0.8554454989854707, + "learning_rate": 1.64460948604308e-05, + "loss": 0.5656, "step": 6503 }, { - "epoch": 0.37, - "grad_norm": 0.7860385506372445, - "learning_rate": 1.4419618146732404e-05, - "loss": 0.2432, + "epoch": 0.3, + "grad_norm": 0.3497685134856179, + "learning_rate": 1.644495725696457e-05, + "loss": 0.257, "step": 6504 }, { - "epoch": 0.37, - "grad_norm": 0.4018869658660575, - "learning_rate": 1.4417948768593842e-05, - "loss": 0.3231, + "epoch": 0.3, + "grad_norm": 0.3312840991808666, + "learning_rate": 1.6443819510813192e-05, + "loss": 0.2743, "step": 6505 }, { - "epoch": 0.37, - "grad_norm": 0.5974194600768572, - "learning_rate": 1.4416279237461903e-05, - "loss": 0.4459, + "epoch": 0.3, + "grad_norm": 0.5297136666415108, + "learning_rate": 1.6442681622001862e-05, + "loss": 0.2798, "step": 6506 }, { - "epoch": 0.37, - "grad_norm": 0.5043837478835471, - "learning_rate": 1.4414609553394408e-05, - "loss": 0.2003, + "epoch": 0.3, + "grad_norm": 0.366222201035637, + "learning_rate": 1.644154359055577e-05, + "loss": 0.2263, "step": 6507 }, { - "epoch": 0.37, - "grad_norm": 0.3785285725739497, - "learning_rate": 1.441293971644917e-05, - "loss": 0.2199, + "epoch": 0.3, + "grad_norm": 0.9062065267477913, + "learning_rate": 1.6440405416500117e-05, + "loss": 0.5924, "step": 6508 }, { - "epoch": 0.37, - "grad_norm": 1.1012730369361647, - "learning_rate": 1.4411269726684022e-05, - "loss": 0.7594, + "epoch": 0.3, + "grad_norm": 0.48981059173960173, + "learning_rate": 1.643926709986009e-05, + "loss": 0.3159, "step": 6509 }, { - "epoch": 0.37, - "grad_norm": 0.8161801633283232, - "learning_rate": 1.4409599584156791e-05, - "loss": 0.4433, + "epoch": 0.3, + "grad_norm": 0.49196951056219296, + "learning_rate": 1.6438128640660896e-05, + "loss": 0.3288, "step": 6510 }, { - "epoch": 0.37, - "grad_norm": 0.3898556198374485, - "learning_rate": 1.4407929288925316e-05, - "loss": 0.2459, + "epoch": 0.3, + "grad_norm": 0.8354605002344404, + "learning_rate": 1.643699003892774e-05, + "loss": 0.3427, "step": 6511 }, { - "epoch": 0.37, - "grad_norm": 0.5468015768810729, - "learning_rate": 1.440625884104744e-05, - "loss": 0.3783, + "epoch": 0.3, + "grad_norm": 0.2788487926854313, + "learning_rate": 1.643585129468583e-05, + "loss": 0.1987, "step": 6512 }, { - "epoch": 0.37, - "grad_norm": 0.734511890686694, - "learning_rate": 1.440458824058101e-05, - "loss": 0.3978, + "epoch": 0.3, + "grad_norm": 0.3464945325308807, + "learning_rate": 1.6434712407960375e-05, + "loss": 0.3081, "step": 6513 }, { - "epoch": 0.37, - "grad_norm": 0.2729669946052368, - "learning_rate": 1.4402917487583876e-05, - "loss": 0.1975, + "epoch": 0.3, + "grad_norm": 0.9497345174428397, + "learning_rate": 1.6433573378776587e-05, + "loss": 0.4322, "step": 6514 }, { - "epoch": 0.37, - "grad_norm": 0.5058855067236363, - "learning_rate": 1.4401246582113904e-05, - "loss": 0.4243, + "epoch": 0.3, + "grad_norm": 0.7505188900261159, + "learning_rate": 1.6432434207159683e-05, + "loss": 0.4061, "step": 6515 }, { - "epoch": 0.37, - "grad_norm": 0.2797481012857069, - "learning_rate": 1.4399575524228949e-05, - "loss": 0.237, + "epoch": 0.3, + "grad_norm": 0.4157365745160956, + "learning_rate": 1.643129489313489e-05, + "loss": 0.3121, "step": 6516 }, { - "epoch": 0.37, - "grad_norm": 0.36853983359838033, - "learning_rate": 1.4397904313986881e-05, - "loss": 0.2095, + "epoch": 0.3, + "grad_norm": 0.4006192315395858, + "learning_rate": 1.6430155436727427e-05, + "loss": 0.2886, "step": 6517 }, { - "epoch": 0.37, - "grad_norm": 0.8808245431323464, - "learning_rate": 1.439623295144558e-05, - "loss": 0.4242, + "epoch": 0.3, + "grad_norm": 0.32545635316675936, + "learning_rate": 1.6429015837962518e-05, + "loss": 0.1956, "step": 6518 }, { - "epoch": 0.37, - "grad_norm": 0.32676114481551416, - "learning_rate": 1.4394561436662917e-05, - "loss": 0.282, + "epoch": 0.3, + "grad_norm": 0.4379321418833836, + "learning_rate": 1.6427876096865394e-05, + "loss": 0.3069, "step": 6519 }, { - "epoch": 0.37, - "grad_norm": 0.32006809977392964, - "learning_rate": 1.4392889769696783e-05, - "loss": 0.2367, + "epoch": 0.3, + "grad_norm": 0.5114829747424323, + "learning_rate": 1.6426736213461293e-05, + "loss": 0.295, "step": 6520 }, { - "epoch": 0.37, - "grad_norm": 0.4047428854960159, - "learning_rate": 1.4391217950605063e-05, - "loss": 0.3227, + "epoch": 0.3, + "grad_norm": 0.6533020942405925, + "learning_rate": 1.6425596187775437e-05, + "loss": 0.3727, "step": 6521 }, { - "epoch": 0.37, - "grad_norm": 0.36465854255387276, - "learning_rate": 1.4389545979445655e-05, - "loss": 0.245, + "epoch": 0.3, + "grad_norm": 0.3730304243109258, + "learning_rate": 1.6424456019833085e-05, + "loss": 0.2997, "step": 6522 }, { - "epoch": 0.37, - "grad_norm": 0.35563849101387063, - "learning_rate": 1.438787385627646e-05, - "loss": 0.3014, + "epoch": 0.3, + "grad_norm": 0.8596725205814361, + "learning_rate": 1.6423315709659464e-05, + "loss": 0.5281, "step": 6523 }, { - "epoch": 0.37, - "grad_norm": 0.3814440985840305, - "learning_rate": 1.438620158115538e-05, - "loss": 0.2927, + "epoch": 0.3, + "grad_norm": 0.3182355629143691, + "learning_rate": 1.6422175257279822e-05, + "loss": 0.2185, "step": 6524 }, { - "epoch": 0.37, - "grad_norm": 1.1983742937891095, - "learning_rate": 1.438452915414033e-05, - "loss": 0.7023, + "epoch": 0.3, + "grad_norm": 0.38124723854814485, + "learning_rate": 1.6421034662719412e-05, + "loss": 0.2891, "step": 6525 }, { - "epoch": 0.37, - "grad_norm": 0.34701945637472476, - "learning_rate": 1.4382856575289223e-05, - "loss": 0.2919, + "epoch": 0.3, + "grad_norm": 0.3964815759041436, + "learning_rate": 1.641989392600348e-05, + "loss": 0.2678, "step": 6526 }, { - "epoch": 0.38, - "grad_norm": 0.35292559953145647, - "learning_rate": 1.438118384465998e-05, - "loss": 0.2924, + "epoch": 0.3, + "grad_norm": 0.771444130264634, + "learning_rate": 1.641875304715729e-05, + "loss": 0.3762, "step": 6527 }, { - "epoch": 0.38, - "grad_norm": 0.26027699865809883, - "learning_rate": 1.4379510962310532e-05, - "loss": 0.1848, + "epoch": 0.3, + "grad_norm": 0.42810467766420895, + "learning_rate": 1.641761202620609e-05, + "loss": 0.2762, "step": 6528 }, { - "epoch": 0.38, - "grad_norm": 0.35662767213458074, - "learning_rate": 1.4377837928298804e-05, - "loss": 0.3079, + "epoch": 0.3, + "grad_norm": 0.46037957759995773, + "learning_rate": 1.6416470863175148e-05, + "loss": 0.3287, "step": 6529 }, { - "epoch": 0.38, - "grad_norm": 0.6596498854621378, - "learning_rate": 1.4376164742682738e-05, - "loss": 0.3599, + "epoch": 0.3, + "grad_norm": 0.5978510254078991, + "learning_rate": 1.6415329558089722e-05, + "loss": 0.2977, "step": 6530 }, { - "epoch": 0.38, - "grad_norm": 0.36191791630131526, - "learning_rate": 1.4374491405520274e-05, - "loss": 0.2928, + "epoch": 0.3, + "grad_norm": 0.4127031440530877, + "learning_rate": 1.6414188110975085e-05, + "loss": 0.3168, "step": 6531 }, { - "epoch": 0.38, - "grad_norm": 0.3860443428448384, - "learning_rate": 1.4372817916869364e-05, - "loss": 0.3045, + "epoch": 0.3, + "grad_norm": 0.5794757409750412, + "learning_rate": 1.6413046521856504e-05, + "loss": 0.3198, "step": 6532 }, { - "epoch": 0.38, - "grad_norm": 0.9970264359008559, - "learning_rate": 1.4371144276787954e-05, - "loss": 0.5673, + "epoch": 0.3, + "grad_norm": 0.32487824411162197, + "learning_rate": 1.6411904790759255e-05, + "loss": 0.2918, "step": 6533 }, { - "epoch": 0.38, - "grad_norm": 0.4185340140823584, - "learning_rate": 1.436947048533401e-05, - "loss": 0.2696, + "epoch": 0.3, + "grad_norm": 0.35744032735483067, + "learning_rate": 1.6410762917708612e-05, + "loss": 0.2056, "step": 6534 }, { - "epoch": 0.38, - "grad_norm": 0.37333837957749794, - "learning_rate": 1.4367796542565486e-05, - "loss": 0.2877, + "epoch": 0.3, + "grad_norm": 0.9635264121585168, + "learning_rate": 1.640962090272986e-05, + "loss": 0.576, "step": 6535 }, { - "epoch": 0.38, - "grad_norm": 0.38025057980172355, - "learning_rate": 1.4366122448540361e-05, - "loss": 0.266, + "epoch": 0.3, + "grad_norm": 0.3742747925858866, + "learning_rate": 1.640847874584828e-05, + "loss": 0.3305, "step": 6536 }, { - "epoch": 0.38, - "grad_norm": 0.6973816542088787, - "learning_rate": 1.4364448203316599e-05, - "loss": 0.2136, + "epoch": 0.3, + "grad_norm": 0.33697788400838236, + "learning_rate": 1.640733644708915e-05, + "loss": 0.2073, "step": 6537 }, { - "epoch": 0.38, - "grad_norm": 0.354013344957813, - "learning_rate": 1.4362773806952184e-05, - "loss": 0.2847, + "epoch": 0.3, + "grad_norm": 0.6467408366970562, + "learning_rate": 1.6406194006477768e-05, + "loss": 0.4713, "step": 6538 }, { - "epoch": 0.38, - "grad_norm": 0.42870882657679155, - "learning_rate": 1.4361099259505102e-05, - "loss": 0.3461, + "epoch": 0.3, + "grad_norm": 0.34870994071230194, + "learning_rate": 1.640505142403943e-05, + "loss": 0.2306, "step": 6539 }, { - "epoch": 0.38, - "grad_norm": 0.7161912133243448, - "learning_rate": 1.4359424561033337e-05, - "loss": 0.3663, + "epoch": 0.3, + "grad_norm": 0.36269345573178563, + "learning_rate": 1.6403908699799423e-05, + "loss": 0.2005, "step": 6540 }, { - "epoch": 0.38, - "grad_norm": 0.32702988657639787, - "learning_rate": 1.435774971159489e-05, - "loss": 0.2704, + "epoch": 0.3, + "grad_norm": 0.42239697306336577, + "learning_rate": 1.6402765833783054e-05, + "loss": 0.3199, "step": 6541 }, { - "epoch": 0.38, - "grad_norm": 0.2874840199994187, - "learning_rate": 1.4356074711247759e-05, - "loss": 0.1933, + "epoch": 0.3, + "grad_norm": 1.0852601527734975, + "learning_rate": 1.6401622826015616e-05, + "loss": 0.7276, "step": 6542 }, { - "epoch": 0.38, - "grad_norm": 0.3422143523196144, - "learning_rate": 1.4354399560049943e-05, - "loss": 0.2543, + "epoch": 0.3, + "grad_norm": 0.3379799938024275, + "learning_rate": 1.640047967652242e-05, + "loss": 0.231, "step": 6543 }, { - "epoch": 0.38, - "grad_norm": 0.3620610486020329, - "learning_rate": 1.4352724258059461e-05, - "loss": 0.2882, + "epoch": 0.3, + "grad_norm": 0.47756456779866496, + "learning_rate": 1.6399336385328775e-05, + "loss": 0.3845, "step": 6544 }, { - "epoch": 0.38, - "grad_norm": 0.6999998230635959, - "learning_rate": 1.4351048805334325e-05, - "loss": 0.5029, + "epoch": 0.3, + "grad_norm": 0.35860597543024575, + "learning_rate": 1.6398192952459987e-05, + "loss": 0.2857, "step": 6545 }, { - "epoch": 0.38, - "grad_norm": 0.7040615510823431, - "learning_rate": 1.4349373201932553e-05, - "loss": 0.4828, + "epoch": 0.3, + "grad_norm": 0.3238611100152137, + "learning_rate": 1.6397049377941378e-05, + "loss": 0.2216, "step": 6546 }, { - "epoch": 0.38, - "grad_norm": 0.34328707423243926, - "learning_rate": 1.4347697447912176e-05, - "loss": 0.2311, + "epoch": 0.3, + "grad_norm": 0.6573411854352372, + "learning_rate": 1.6395905661798253e-05, + "loss": 0.3484, "step": 6547 }, { - "epoch": 0.38, - "grad_norm": 0.34255042274766273, - "learning_rate": 1.4346021543331224e-05, - "loss": 0.2109, + "epoch": 0.3, + "grad_norm": 0.44558358204369625, + "learning_rate": 1.6394761804055947e-05, + "loss": 0.3579, "step": 6548 }, { - "epoch": 0.38, - "grad_norm": 0.9416350640193081, - "learning_rate": 1.4344345488247733e-05, - "loss": 0.5087, + "epoch": 0.3, + "grad_norm": 0.3720275209746239, + "learning_rate": 1.6393617804739777e-05, + "loss": 0.2655, "step": 6549 }, { - "epoch": 0.38, - "grad_norm": 0.3524115680822469, - "learning_rate": 1.4342669282719741e-05, - "loss": 0.2236, + "epoch": 0.3, + "grad_norm": 0.731963916306467, + "learning_rate": 1.639247366387507e-05, + "loss": 0.3905, "step": 6550 }, { - "epoch": 0.38, - "grad_norm": 0.34939936511288033, - "learning_rate": 1.4340992926805304e-05, - "loss": 0.3067, + "epoch": 0.3, + "grad_norm": 0.2952154608547386, + "learning_rate": 1.6391329381487162e-05, + "loss": 0.1903, "step": 6551 }, { - "epoch": 0.38, - "grad_norm": 0.9389882276144642, - "learning_rate": 1.4339316420562464e-05, - "loss": 0.4627, + "epoch": 0.3, + "grad_norm": 0.49260663139873556, + "learning_rate": 1.6390184957601376e-05, + "loss": 0.3277, "step": 6552 }, { - "epoch": 0.38, - "grad_norm": 0.3572146276189607, - "learning_rate": 1.4337639764049285e-05, - "loss": 0.2025, + "epoch": 0.3, + "grad_norm": 0.35835665871153694, + "learning_rate": 1.6389040392243056e-05, + "loss": 0.2598, "step": 6553 }, { - "epoch": 0.38, - "grad_norm": 0.3440940221274594, - "learning_rate": 1.4335962957323827e-05, - "loss": 0.2088, + "epoch": 0.3, + "grad_norm": 0.888465678475625, + "learning_rate": 1.638789568543754e-05, + "loss": 0.49, "step": 6554 }, { - "epoch": 0.38, - "grad_norm": 0.43469991652072393, - "learning_rate": 1.433428600044416e-05, - "loss": 0.3331, + "epoch": 0.3, + "grad_norm": 0.5681236508741496, + "learning_rate": 1.638675083721017e-05, + "loss": 0.2694, "step": 6555 }, { - "epoch": 0.38, - "grad_norm": 0.30077868298122107, - "learning_rate": 1.4332608893468351e-05, - "loss": 0.1945, + "epoch": 0.3, + "grad_norm": 0.32378960558442227, + "learning_rate": 1.638560584758629e-05, + "loss": 0.2938, "step": 6556 }, { - "epoch": 0.38, - "grad_norm": 0.6253365617274365, - "learning_rate": 1.4330931636454489e-05, - "loss": 0.4415, + "epoch": 0.3, + "grad_norm": 1.1897165811018666, + "learning_rate": 1.6384460716591255e-05, + "loss": 0.6916, "step": 6557 }, { - "epoch": 0.38, - "grad_norm": 0.47640301786281314, - "learning_rate": 1.4329254229460645e-05, - "loss": 0.3846, + "epoch": 0.3, + "grad_norm": 0.2604145627671716, + "learning_rate": 1.638331544425041e-05, + "loss": 0.2078, "step": 6558 }, { - "epoch": 0.38, - "grad_norm": 0.30769115910174644, - "learning_rate": 1.4327576672544917e-05, - "loss": 0.2608, + "epoch": 0.3, + "grad_norm": 0.5654120186370319, + "learning_rate": 1.6382170030589116e-05, + "loss": 0.3269, "step": 6559 }, { - "epoch": 0.38, - "grad_norm": 0.24923379904346613, - "learning_rate": 1.432589896576539e-05, - "loss": 0.152, + "epoch": 0.3, + "grad_norm": 0.49399185386294203, + "learning_rate": 1.6381024475632727e-05, + "loss": 0.2909, "step": 6560 }, { - "epoch": 0.38, - "grad_norm": 1.1160575763268, - "learning_rate": 1.4324221109180173e-05, - "loss": 0.6775, + "epoch": 0.3, + "grad_norm": 0.47135892073226987, + "learning_rate": 1.6379878779406608e-05, + "loss": 0.3072, "step": 6561 }, { - "epoch": 0.38, - "grad_norm": 0.39827529165073156, - "learning_rate": 1.4322543102847362e-05, - "loss": 0.2889, + "epoch": 0.3, + "grad_norm": 1.0402325299141095, + "learning_rate": 1.637873294193612e-05, + "loss": 0.5584, "step": 6562 }, { - "epoch": 0.38, - "grad_norm": 0.4097091571777183, - "learning_rate": 1.432086494682507e-05, - "loss": 0.3128, + "epoch": 0.3, + "grad_norm": 0.2789542268993825, + "learning_rate": 1.6377586963246632e-05, + "loss": 0.1461, "step": 6563 }, { - "epoch": 0.38, - "grad_norm": 1.4122864900557979, - "learning_rate": 1.4319186641171412e-05, - "loss": 0.7525, + "epoch": 0.3, + "grad_norm": 0.3545696528232299, + "learning_rate": 1.6376440843363517e-05, + "loss": 0.2166, "step": 6564 }, { - "epoch": 0.38, - "grad_norm": 0.345257798485318, - "learning_rate": 1.4317508185944504e-05, - "loss": 0.2947, + "epoch": 0.3, + "grad_norm": 0.42002315458438283, + "learning_rate": 1.637529458231215e-05, + "loss": 0.3116, "step": 6565 }, { - "epoch": 0.38, - "grad_norm": 0.18780929074416594, - "learning_rate": 1.4315829581202474e-05, - "loss": 0.0706, + "epoch": 0.3, + "grad_norm": 1.162490950676672, + "learning_rate": 1.6374148180117898e-05, + "loss": 0.3841, "step": 6566 }, { - "epoch": 0.38, - "grad_norm": 0.37982000063976756, - "learning_rate": 1.431415082700345e-05, - "loss": 0.326, + "epoch": 0.3, + "grad_norm": 0.39181416972488065, + "learning_rate": 1.6373001636806153e-05, + "loss": 0.2964, "step": 6567 }, { - "epoch": 0.38, - "grad_norm": 0.37529555056231606, - "learning_rate": 1.4312471923405571e-05, - "loss": 0.2817, + "epoch": 0.3, + "grad_norm": 0.5113878099309274, + "learning_rate": 1.6371854952402297e-05, + "loss": 0.378, "step": 6568 }, { - "epoch": 0.38, - "grad_norm": 0.7558881857203945, - "learning_rate": 1.4310792870466973e-05, - "loss": 0.3752, + "epoch": 0.3, + "grad_norm": 0.31200994071467875, + "learning_rate": 1.637070812693171e-05, + "loss": 0.2171, "step": 6569 }, { - "epoch": 0.38, - "grad_norm": 0.4981673910095424, - "learning_rate": 1.4309113668245804e-05, - "loss": 0.4285, + "epoch": 0.3, + "grad_norm": 0.3603889434836613, + "learning_rate": 1.6369561160419783e-05, + "loss": 0.217, "step": 6570 }, { - "epoch": 0.38, - "grad_norm": 0.32064816753904357, - "learning_rate": 1.4307434316800213e-05, - "loss": 0.2705, + "epoch": 0.3, + "grad_norm": 0.42561214625380145, + "learning_rate": 1.6368414052891918e-05, + "loss": 0.3061, "step": 6571 }, { - "epoch": 0.38, - "grad_norm": 0.30413949280789165, - "learning_rate": 1.4305754816188358e-05, - "loss": 0.2559, + "epoch": 0.3, + "grad_norm": 0.5492331803478658, + "learning_rate": 1.6367266804373497e-05, + "loss": 0.3626, "step": 6572 }, { - "epoch": 0.38, - "grad_norm": 0.4309987650378867, - "learning_rate": 1.4304075166468396e-05, - "loss": 0.2168, + "epoch": 0.3, + "grad_norm": 0.4842441172718364, + "learning_rate": 1.636611941488993e-05, + "loss": 0.2311, "step": 6573 }, { - "epoch": 0.38, - "grad_norm": 0.40311342743064493, - "learning_rate": 1.43023953676985e-05, - "loss": 0.2705, + "epoch": 0.3, + "grad_norm": 0.5915639741104606, + "learning_rate": 1.6364971884466614e-05, + "loss": 0.4518, "step": 6574 }, { - "epoch": 0.38, - "grad_norm": 0.2954206348253972, - "learning_rate": 1.4300715419936834e-05, - "loss": 0.3028, + "epoch": 0.3, + "grad_norm": 0.904162550188888, + "learning_rate": 1.6363824213128953e-05, + "loss": 0.6507, "step": 6575 }, { - "epoch": 0.38, - "grad_norm": 0.9026406553036119, - "learning_rate": 1.4299035323241583e-05, - "loss": 0.5688, + "epoch": 0.3, + "grad_norm": 0.2796132707422062, + "learning_rate": 1.636267640090236e-05, + "loss": 0.1828, "step": 6576 }, { - "epoch": 0.38, - "grad_norm": 0.3587211236933703, - "learning_rate": 1.429735507767092e-05, - "loss": 0.2561, + "epoch": 0.3, + "grad_norm": 0.3129185611756594, + "learning_rate": 1.6361528447812244e-05, + "loss": 0.2779, "step": 6577 }, { - "epoch": 0.38, - "grad_norm": 0.26006833779910876, - "learning_rate": 1.4295674683283037e-05, - "loss": 0.2293, + "epoch": 0.3, + "grad_norm": 0.7736276625191115, + "learning_rate": 1.6360380353884018e-05, + "loss": 0.4636, "step": 6578 }, { - "epoch": 0.38, - "grad_norm": 0.3906207143635448, - "learning_rate": 1.4293994140136123e-05, - "loss": 0.2902, + "epoch": 0.3, + "grad_norm": 0.4248028508473478, + "learning_rate": 1.63592321191431e-05, + "loss": 0.2321, "step": 6579 }, { - "epoch": 0.38, - "grad_norm": 0.3336786346711805, - "learning_rate": 1.4292313448288377e-05, - "loss": 0.2849, + "epoch": 0.3, + "grad_norm": 0.39447472201104505, + "learning_rate": 1.6358083743614916e-05, + "loss": 0.321, "step": 6580 }, { - "epoch": 0.38, - "grad_norm": 0.7731830224890437, - "learning_rate": 1.4290632607797998e-05, - "loss": 0.4842, + "epoch": 0.3, + "grad_norm": 1.160599207404717, + "learning_rate": 1.6356935227324885e-05, + "loss": 0.6212, "step": 6581 }, { - "epoch": 0.38, - "grad_norm": 0.46191721868987656, - "learning_rate": 1.4288951618723201e-05, - "loss": 0.2651, + "epoch": 0.3, + "grad_norm": 0.25491342699720554, + "learning_rate": 1.6355786570298432e-05, + "loss": 0.1701, "step": 6582 }, { - "epoch": 0.38, - "grad_norm": 0.3088949080198425, - "learning_rate": 1.428727048112219e-05, - "loss": 0.2683, + "epoch": 0.3, + "grad_norm": 0.41688351398019957, + "learning_rate": 1.635463777256099e-05, + "loss": 0.2677, "step": 6583 }, { - "epoch": 0.38, - "grad_norm": 1.0472390701706424, - "learning_rate": 1.4285589195053191e-05, - "loss": 0.6867, + "epoch": 0.3, + "grad_norm": 0.43894958498593034, + "learning_rate": 1.6353488834137995e-05, + "loss": 0.3514, "step": 6584 }, { - "epoch": 0.38, - "grad_norm": 0.3409265332982283, - "learning_rate": 1.428390776057442e-05, - "loss": 0.2472, + "epoch": 0.3, + "grad_norm": 0.4896844953290846, + "learning_rate": 1.635233975505488e-05, + "loss": 0.312, "step": 6585 }, { - "epoch": 0.38, - "grad_norm": 0.35059501414858735, - "learning_rate": 1.4282226177744107e-05, - "loss": 0.2534, + "epoch": 0.3, + "grad_norm": 0.5567304375845444, + "learning_rate": 1.6351190535337084e-05, + "loss": 0.298, "step": 6586 }, { - "epoch": 0.38, - "grad_norm": 0.3968681655453618, - "learning_rate": 1.4280544446620485e-05, - "loss": 0.3418, + "epoch": 0.3, + "grad_norm": 1.6726880481382136, + "learning_rate": 1.635004117501005e-05, + "loss": 0.6986, "step": 6587 }, { - "epoch": 0.38, - "grad_norm": 1.3699405066952637, - "learning_rate": 1.4278862567261796e-05, - "loss": 0.8169, + "epoch": 0.3, + "grad_norm": 0.3492931456414217, + "learning_rate": 1.634889167409923e-05, + "loss": 0.2672, "step": 6588 }, { - "epoch": 0.38, - "grad_norm": 0.36354057094968956, - "learning_rate": 1.4277180539726278e-05, - "loss": 0.1807, + "epoch": 0.3, + "grad_norm": 0.3434865777364456, + "learning_rate": 1.634774203263006e-05, + "loss": 0.2333, "step": 6589 }, { - "epoch": 0.38, - "grad_norm": 0.37436367140991333, - "learning_rate": 1.427549836407218e-05, - "loss": 0.274, + "epoch": 0.3, + "grad_norm": 0.40991772471326116, + "learning_rate": 1.6346592250628005e-05, + "loss": 0.3278, "step": 6590 }, { - "epoch": 0.38, - "grad_norm": 0.4031137876109123, - "learning_rate": 1.4273816040357762e-05, - "loss": 0.3387, + "epoch": 0.3, + "grad_norm": 0.9259186845405233, + "learning_rate": 1.6345442328118516e-05, + "loss": 0.5356, "step": 6591 }, { - "epoch": 0.38, - "grad_norm": 0.39913767829709507, - "learning_rate": 1.4272133568641273e-05, - "loss": 0.2186, + "epoch": 0.3, + "grad_norm": 0.38355605242118107, + "learning_rate": 1.6344292265127045e-05, + "loss": 0.2511, "step": 6592 }, { - "epoch": 0.38, - "grad_norm": 0.34901425704231603, - "learning_rate": 1.4270450948980989e-05, - "loss": 0.2849, + "epoch": 0.3, + "grad_norm": 1.2056774717455023, + "learning_rate": 1.6343142061679063e-05, + "loss": 0.5917, "step": 6593 }, { - "epoch": 0.38, - "grad_norm": 0.43807677930615996, - "learning_rate": 1.4268768181435166e-05, - "loss": 0.3457, + "epoch": 0.3, + "grad_norm": 0.5849185890907708, + "learning_rate": 1.634199171780002e-05, + "loss": 0.3827, "step": 6594 }, { - "epoch": 0.38, - "grad_norm": 0.33170476466487625, - "learning_rate": 1.4267085266062088e-05, - "loss": 0.1751, + "epoch": 0.3, + "grad_norm": 0.44687750074031735, + "learning_rate": 1.6340841233515403e-05, + "loss": 0.3062, "step": 6595 }, { - "epoch": 0.38, - "grad_norm": 0.47900650476435713, - "learning_rate": 1.4265402202920029e-05, - "loss": 0.3583, + "epoch": 0.3, + "grad_norm": 0.3994128813316267, + "learning_rate": 1.633969060885067e-05, + "loss": 0.2704, "step": 6596 }, { - "epoch": 0.38, - "grad_norm": 0.6101026898693607, - "learning_rate": 1.4263718992067276e-05, - "loss": 0.44, + "epoch": 0.3, + "grad_norm": 0.46614208420869735, + "learning_rate": 1.6338539843831294e-05, + "loss": 0.23, "step": 6597 }, { - "epoch": 0.38, - "grad_norm": 0.3100847262447763, - "learning_rate": 1.4262035633562117e-05, - "loss": 0.2415, + "epoch": 0.3, + "grad_norm": 0.5080092868190113, + "learning_rate": 1.6337388938482755e-05, + "loss": 0.3573, "step": 6598 }, { - "epoch": 0.38, - "grad_norm": 0.29612737366854475, - "learning_rate": 1.4260352127462848e-05, - "loss": 0.2147, + "epoch": 0.3, + "grad_norm": 0.9378578063602536, + "learning_rate": 1.6336237892830537e-05, + "loss": 0.3754, "step": 6599 }, { - "epoch": 0.38, - "grad_norm": 1.0381415145756596, - "learning_rate": 1.4258668473827766e-05, - "loss": 0.7037, + "epoch": 0.3, + "grad_norm": 0.35400897780871404, + "learning_rate": 1.6335086706900115e-05, + "loss": 0.2836, "step": 6600 }, { - "epoch": 0.38, - "grad_norm": 0.40518343031761545, - "learning_rate": 1.4256984672715182e-05, - "loss": 0.2977, + "epoch": 0.3, + "grad_norm": 0.4337132596414439, + "learning_rate": 1.633393538071698e-05, + "loss": 0.3339, "step": 6601 }, { - "epoch": 0.38, - "grad_norm": 0.4991942601955228, - "learning_rate": 1.4255300724183396e-05, - "loss": 0.2844, + "epoch": 0.3, + "grad_norm": 0.32505940605019057, + "learning_rate": 1.6332783914306622e-05, + "loss": 0.1958, "step": 6602 }, { - "epoch": 0.38, - "grad_norm": 0.43905524147113645, - "learning_rate": 1.4253616628290735e-05, - "loss": 0.3412, + "epoch": 0.3, + "grad_norm": 0.45687920251500214, + "learning_rate": 1.6331632307694532e-05, + "loss": 0.3003, "step": 6603 }, { - "epoch": 0.38, - "grad_norm": 0.32293505302934256, - "learning_rate": 1.425193238509551e-05, - "loss": 0.2129, + "epoch": 0.3, + "grad_norm": 0.46936589442553284, + "learning_rate": 1.6330480560906205e-05, + "loss": 0.321, "step": 6604 }, { - "epoch": 0.38, - "grad_norm": 0.30523910076856414, - "learning_rate": 1.425024799465605e-05, - "loss": 0.1843, + "epoch": 0.3, + "grad_norm": 0.5100559966180981, + "learning_rate": 1.6329328673967138e-05, + "loss": 0.2819, "step": 6605 }, { - "epoch": 0.38, - "grad_norm": 0.7134300357593555, - "learning_rate": 1.4248563457030684e-05, - "loss": 0.3323, + "epoch": 0.3, + "grad_norm": 0.550454797363434, + "learning_rate": 1.6328176646902835e-05, + "loss": 0.392, "step": 6606 }, { - "epoch": 0.38, - "grad_norm": 0.3821121733080358, - "learning_rate": 1.4246878772277748e-05, - "loss": 0.2799, + "epoch": 0.3, + "grad_norm": 0.4499413744673752, + "learning_rate": 1.63270244797388e-05, + "loss": 0.3628, "step": 6607 }, { - "epoch": 0.38, - "grad_norm": 0.5362957512770763, - "learning_rate": 1.4245193940455583e-05, - "loss": 0.2907, + "epoch": 0.3, + "grad_norm": 0.34968759889757317, + "learning_rate": 1.6325872172500542e-05, + "loss": 0.2332, "step": 6608 }, { - "epoch": 0.38, - "grad_norm": 0.7015232324733971, - "learning_rate": 1.4243508961622536e-05, - "loss": 0.4711, + "epoch": 0.3, + "grad_norm": 0.9990188160091769, + "learning_rate": 1.6324719725213572e-05, + "loss": 0.6058, "step": 6609 }, { - "epoch": 0.38, - "grad_norm": 0.36283666869718023, - "learning_rate": 1.4241823835836957e-05, - "loss": 0.231, + "epoch": 0.3, + "grad_norm": 0.3665778852545278, + "learning_rate": 1.63235671379034e-05, + "loss": 0.2647, "step": 6610 }, { - "epoch": 0.38, - "grad_norm": 0.3676225906926481, - "learning_rate": 1.4240138563157197e-05, - "loss": 0.2852, + "epoch": 0.3, + "grad_norm": 0.5049814006095249, + "learning_rate": 1.6322414410595548e-05, + "loss": 0.3838, "step": 6611 }, { - "epoch": 0.38, - "grad_norm": 0.3206621409131723, - "learning_rate": 1.4238453143641623e-05, - "loss": 0.1736, + "epoch": 0.3, + "grad_norm": 0.4563241127204825, + "learning_rate": 1.6321261543315534e-05, + "loss": 0.2876, "step": 6612 }, { - "epoch": 0.38, - "grad_norm": 0.6024596594608613, - "learning_rate": 1.4236767577348597e-05, - "loss": 0.3085, + "epoch": 0.3, + "grad_norm": 0.42906696644990255, + "learning_rate": 1.6320108536088882e-05, + "loss": 0.2981, "step": 6613 }, { - "epoch": 0.38, - "grad_norm": 0.3601264112579988, - "learning_rate": 1.4235081864336495e-05, - "loss": 0.3118, + "epoch": 0.3, + "grad_norm": 0.43835403624413555, + "learning_rate": 1.631895538894112e-05, + "loss": 0.2734, "step": 6614 }, { - "epoch": 0.38, - "grad_norm": 0.4754139334883257, - "learning_rate": 1.4233396004663686e-05, - "loss": 0.2908, + "epoch": 0.3, + "grad_norm": 0.39212094038623563, + "learning_rate": 1.6317802101897776e-05, + "loss": 0.2466, "step": 6615 }, { - "epoch": 0.38, - "grad_norm": 0.6780714577012267, - "learning_rate": 1.423170999838856e-05, - "loss": 0.3664, + "epoch": 0.3, + "grad_norm": 0.30264006684812716, + "learning_rate": 1.6316648674984384e-05, + "loss": 0.2613, "step": 6616 }, { - "epoch": 0.38, - "grad_norm": 0.4271908689523186, - "learning_rate": 1.4230023845569497e-05, - "loss": 0.375, + "epoch": 0.3, + "grad_norm": 0.9147456877051281, + "learning_rate": 1.6315495108226473e-05, + "loss": 0.5134, "step": 6617 }, { - "epoch": 0.38, - "grad_norm": 0.2646098949172002, - "learning_rate": 1.422833754626489e-05, - "loss": 0.2093, + "epoch": 0.3, + "grad_norm": 0.5243111004367629, + "learning_rate": 1.631434140164959e-05, + "loss": 0.2919, "step": 6618 }, { - "epoch": 0.38, - "grad_norm": 0.36022570135160514, - "learning_rate": 1.4226651100533136e-05, - "loss": 0.2529, + "epoch": 0.3, + "grad_norm": 0.4207730961732931, + "learning_rate": 1.631318755527928e-05, + "loss": 0.2993, "step": 6619 }, { - "epoch": 0.38, - "grad_norm": 0.4342374594084135, - "learning_rate": 1.4224964508432635e-05, - "loss": 0.3454, + "epoch": 0.3, + "grad_norm": 0.3910479649172596, + "learning_rate": 1.6312033569141074e-05, + "loss": 0.3473, "step": 6620 }, { - "epoch": 0.38, - "grad_norm": 1.0431596454133143, - "learning_rate": 1.4223277770021794e-05, - "loss": 0.4782, + "epoch": 0.3, + "grad_norm": 0.38394718072345824, + "learning_rate": 1.631087944326053e-05, + "loss": 0.2358, "step": 6621 }, { - "epoch": 0.38, - "grad_norm": 0.2630385189545252, - "learning_rate": 1.4221590885359029e-05, - "loss": 0.2208, + "epoch": 0.3, + "grad_norm": 0.31632673088315016, + "learning_rate": 1.6309725177663198e-05, + "loss": 0.2357, "step": 6622 }, { - "epoch": 0.38, - "grad_norm": 0.37912554241453955, - "learning_rate": 1.421990385450275e-05, - "loss": 0.3119, + "epoch": 0.3, + "grad_norm": 0.5400865336847385, + "learning_rate": 1.6308570772374633e-05, + "loss": 0.2937, "step": 6623 }, { - "epoch": 0.38, - "grad_norm": 0.31620577959594415, - "learning_rate": 1.4218216677511383e-05, - "loss": 0.2133, + "epoch": 0.3, + "grad_norm": 0.4609562941086053, + "learning_rate": 1.630741622742039e-05, + "loss": 0.3502, "step": 6624 }, { - "epoch": 0.38, - "grad_norm": 0.7817419011776064, - "learning_rate": 1.4216529354443355e-05, - "loss": 0.2797, + "epoch": 0.3, + "grad_norm": 0.35723449188110534, + "learning_rate": 1.6306261542826035e-05, + "loss": 0.2405, "step": 6625 }, { - "epoch": 0.38, - "grad_norm": 0.3580806273138164, - "learning_rate": 1.4214841885357096e-05, - "loss": 0.3249, + "epoch": 0.3, + "grad_norm": 1.1537993159643016, + "learning_rate": 1.6305106718617122e-05, + "loss": 0.7594, "step": 6626 }, { - "epoch": 0.38, - "grad_norm": 0.47803700129219767, - "learning_rate": 1.4213154270311043e-05, - "loss": 0.4245, + "epoch": 0.3, + "grad_norm": 0.5084133730292307, + "learning_rate": 1.6303951754819226e-05, + "loss": 0.3942, "step": 6627 }, { - "epoch": 0.38, - "grad_norm": 0.29809480038275465, - "learning_rate": 1.421146650936364e-05, - "loss": 0.1387, + "epoch": 0.3, + "grad_norm": 0.28867098184928724, + "learning_rate": 1.6302796651457913e-05, + "loss": 0.2221, "step": 6628 }, { - "epoch": 0.38, - "grad_norm": 0.3552015102693456, - "learning_rate": 1.4209778602573332e-05, - "loss": 0.2911, + "epoch": 0.3, + "grad_norm": 0.4188880799190303, + "learning_rate": 1.6301641408558758e-05, + "loss": 0.3502, "step": 6629 }, { - "epoch": 0.38, - "grad_norm": 0.371671833280386, - "learning_rate": 1.4208090549998572e-05, - "loss": 0.3407, + "epoch": 0.3, + "grad_norm": 0.5011034823953276, + "learning_rate": 1.6300486026147334e-05, + "loss": 0.2896, "step": 6630 }, { - "epoch": 0.38, - "grad_norm": 0.774988864764028, - "learning_rate": 1.420640235169782e-05, - "loss": 0.3303, + "epoch": 0.3, + "grad_norm": 0.35207389748379175, + "learning_rate": 1.6299330504249224e-05, + "loss": 0.2359, "step": 6631 }, { - "epoch": 0.38, - "grad_norm": 0.2656032968656245, - "learning_rate": 1.420471400772953e-05, - "loss": 0.2185, + "epoch": 0.3, + "grad_norm": 0.38497907163084577, + "learning_rate": 1.6298174842890006e-05, + "loss": 0.2938, "step": 6632 }, { - "epoch": 0.38, - "grad_norm": 0.7574900916209983, - "learning_rate": 1.4203025518152178e-05, - "loss": 0.4876, + "epoch": 0.3, + "grad_norm": 1.314538370182189, + "learning_rate": 1.629701904209527e-05, + "loss": 0.796, "step": 6633 }, { - "epoch": 0.38, - "grad_norm": 0.3571697183187376, - "learning_rate": 1.420133688302423e-05, - "loss": 0.3068, + "epoch": 0.3, + "grad_norm": 0.3369898610076391, + "learning_rate": 1.6295863101890603e-05, + "loss": 0.2508, "step": 6634 }, { - "epoch": 0.38, - "grad_norm": 0.3627025977269262, - "learning_rate": 1.419964810240417e-05, - "loss": 0.2454, + "epoch": 0.3, + "grad_norm": 0.47024977776365207, + "learning_rate": 1.629470702230159e-05, + "loss": 0.3608, "step": 6635 }, { - "epoch": 0.38, - "grad_norm": 0.9722718639281581, - "learning_rate": 1.4197959176350476e-05, - "loss": 0.5431, + "epoch": 0.3, + "grad_norm": 0.27704271718116796, + "learning_rate": 1.6293550803353832e-05, + "loss": 0.2039, "step": 6636 }, { - "epoch": 0.38, - "grad_norm": 0.45319845739485237, - "learning_rate": 1.4196270104921637e-05, - "loss": 0.2968, + "epoch": 0.3, + "grad_norm": 0.4259659476564318, + "learning_rate": 1.6292394445072927e-05, + "loss": 0.3, "step": 6637 }, { - "epoch": 0.38, - "grad_norm": 0.3168864543772265, - "learning_rate": 1.4194580888176141e-05, - "loss": 0.2416, + "epoch": 0.3, + "grad_norm": 1.2456542277269682, + "learning_rate": 1.629123794748447e-05, + "loss": 0.4059, "step": 6638 }, { - "epoch": 0.38, - "grad_norm": 0.4402300020745205, - "learning_rate": 1.4192891526172494e-05, - "loss": 0.2527, + "epoch": 0.3, + "grad_norm": 0.3905702507302019, + "learning_rate": 1.629008131061407e-05, + "loss": 0.3038, "step": 6639 }, { - "epoch": 0.38, - "grad_norm": 0.7619509636775692, - "learning_rate": 1.419120201896919e-05, - "loss": 0.4639, + "epoch": 0.31, + "grad_norm": 0.4082656287472168, + "learning_rate": 1.6288924534487332e-05, + "loss": 0.2866, "step": 6640 }, { - "epoch": 0.38, - "grad_norm": 0.4094808720491614, - "learning_rate": 1.4189512366624745e-05, - "loss": 0.2223, + "epoch": 0.31, + "grad_norm": 0.7821103023045858, + "learning_rate": 1.628776761912987e-05, + "loss": 0.3859, "step": 6641 }, { - "epoch": 0.38, - "grad_norm": 0.49203402514236594, - "learning_rate": 1.4187822569197662e-05, - "loss": 0.3586, + "epoch": 0.31, + "grad_norm": 0.29564259848287044, + "learning_rate": 1.6286610564567288e-05, + "loss": 0.1779, "step": 6642 }, { - "epoch": 0.38, - "grad_norm": 0.6042575546599518, - "learning_rate": 1.4186132626746466e-05, - "loss": 0.3776, + "epoch": 0.31, + "grad_norm": 0.3985979065971165, + "learning_rate": 1.628545337082521e-05, + "loss": 0.2637, "step": 6643 }, { - "epoch": 0.38, - "grad_norm": 0.2850850597785273, - "learning_rate": 1.4184442539329677e-05, - "loss": 0.1775, + "epoch": 0.31, + "grad_norm": 0.4621808473295159, + "learning_rate": 1.6284296037929253e-05, + "loss": 0.2753, "step": 6644 }, { - "epoch": 0.38, - "grad_norm": 0.41097360765078583, - "learning_rate": 1.4182752307005822e-05, - "loss": 0.2656, + "epoch": 0.31, + "grad_norm": 0.7849065580877207, + "learning_rate": 1.6283138565905034e-05, + "loss": 0.4651, "step": 6645 }, { - "epoch": 0.38, - "grad_norm": 0.3749280168749956, - "learning_rate": 1.4181061929833435e-05, - "loss": 0.3093, + "epoch": 0.31, + "grad_norm": 0.3900640459562685, + "learning_rate": 1.628198095477819e-05, + "loss": 0.2911, "step": 6646 }, { - "epoch": 0.38, - "grad_norm": 0.36949049484555013, - "learning_rate": 1.4179371407871054e-05, - "loss": 0.2755, + "epoch": 0.31, + "grad_norm": 0.441058727034453, + "learning_rate": 1.6280823204574335e-05, + "loss": 0.2914, "step": 6647 }, { - "epoch": 0.38, - "grad_norm": 0.8305477195457657, - "learning_rate": 1.4177680741177217e-05, - "loss": 0.3878, + "epoch": 0.31, + "grad_norm": 0.3023276919347662, + "learning_rate": 1.6279665315319114e-05, + "loss": 0.1633, "step": 6648 }, { - "epoch": 0.38, - "grad_norm": 0.9615488498518213, - "learning_rate": 1.4175989929810481e-05, - "loss": 0.5805, + "epoch": 0.31, + "grad_norm": 0.43219489998759303, + "learning_rate": 1.6278507287038154e-05, + "loss": 0.2863, "step": 6649 }, { - "epoch": 0.38, - "grad_norm": 0.2774945282091035, - "learning_rate": 1.417429897382939e-05, - "loss": 0.2652, + "epoch": 0.31, + "grad_norm": 1.0578711094906632, + "learning_rate": 1.627734911975709e-05, + "loss": 0.6318, "step": 6650 }, { - "epoch": 0.38, - "grad_norm": 0.21826789390351636, - "learning_rate": 1.4172607873292505e-05, - "loss": 0.1444, + "epoch": 0.31, + "grad_norm": 0.47878463713258884, + "learning_rate": 1.627619081350157e-05, + "loss": 0.3008, "step": 6651 }, { - "epoch": 0.38, - "grad_norm": 0.7917466348827511, - "learning_rate": 1.4170916628258392e-05, - "loss": 0.5094, + "epoch": 0.31, + "grad_norm": 0.3784177058794493, + "learning_rate": 1.6275032368297234e-05, + "loss": 0.3219, "step": 6652 }, { - "epoch": 0.38, - "grad_norm": 0.4402092475426011, - "learning_rate": 1.4169225238785611e-05, - "loss": 0.3204, + "epoch": 0.31, + "grad_norm": 0.985831808752877, + "learning_rate": 1.6273873784169726e-05, + "loss": 0.5825, "step": 6653 }, { - "epoch": 0.38, - "grad_norm": 0.48816511867608736, - "learning_rate": 1.4167533704932743e-05, - "loss": 0.2935, + "epoch": 0.31, + "grad_norm": 0.22157624563053643, + "learning_rate": 1.6272715061144705e-05, + "loss": 0.0984, "step": 6654 }, { - "epoch": 0.38, - "grad_norm": 0.7306057653718944, - "learning_rate": 1.416584202675836e-05, - "loss": 0.44, + "epoch": 0.31, + "grad_norm": 0.4806553177361242, + "learning_rate": 1.6271556199247816e-05, + "loss": 0.2876, "step": 6655 }, { - "epoch": 0.38, - "grad_norm": 0.32525920456793944, - "learning_rate": 1.4164150204321046e-05, - "loss": 0.2374, + "epoch": 0.31, + "grad_norm": 0.4436336409772609, + "learning_rate": 1.6270397198504713e-05, + "loss": 0.3317, "step": 6656 }, { - "epoch": 0.38, - "grad_norm": 0.3296501402587944, - "learning_rate": 1.4162458237679389e-05, - "loss": 0.1485, + "epoch": 0.31, + "grad_norm": 0.750027154728805, + "learning_rate": 1.626923805894107e-05, + "loss": 0.3553, "step": 6657 }, { - "epoch": 0.38, - "grad_norm": 0.36981045940986673, - "learning_rate": 1.4160766126891985e-05, - "loss": 0.3043, + "epoch": 0.31, + "grad_norm": 0.4018477183641706, + "learning_rate": 1.626807878058253e-05, + "loss": 0.303, "step": 6658 }, { - "epoch": 0.38, - "grad_norm": 0.3883473725235902, - "learning_rate": 1.4159073872017427e-05, - "loss": 0.2873, + "epoch": 0.31, + "grad_norm": 0.5838624704541613, + "learning_rate": 1.6266919363454767e-05, + "loss": 0.4501, "step": 6659 }, { - "epoch": 0.38, - "grad_norm": 0.8316799239970233, - "learning_rate": 1.4157381473114323e-05, - "loss": 0.4332, + "epoch": 0.31, + "grad_norm": 0.28678391350476534, + "learning_rate": 1.6265759807583452e-05, + "loss": 0.2104, "step": 6660 }, { - "epoch": 0.38, - "grad_norm": 0.40166375360596235, - "learning_rate": 1.4155688930241274e-05, - "loss": 0.218, + "epoch": 0.31, + "grad_norm": 0.3718243736984594, + "learning_rate": 1.6264600112994253e-05, + "loss": 0.1991, "step": 6661 }, { - "epoch": 0.38, - "grad_norm": 0.375701225006493, - "learning_rate": 1.4153996243456898e-05, - "loss": 0.2991, + "epoch": 0.31, + "grad_norm": 0.728963213666723, + "learning_rate": 1.6263440279712844e-05, + "loss": 0.4662, "step": 6662 }, { - "epoch": 0.38, - "grad_norm": 0.2775566069575302, - "learning_rate": 1.4152303412819808e-05, - "loss": 0.2017, + "epoch": 0.31, + "grad_norm": 0.41942227561214906, + "learning_rate": 1.626228030776491e-05, + "loss": 0.3217, "step": 6663 }, { - "epoch": 0.38, - "grad_norm": 0.7846062358543829, - "learning_rate": 1.4150610438388633e-05, - "loss": 0.3898, + "epoch": 0.31, + "grad_norm": 0.32813790885906, + "learning_rate": 1.626112019717612e-05, + "loss": 0.2429, "step": 6664 }, { - "epoch": 0.38, - "grad_norm": 0.35540285474583694, - "learning_rate": 1.4148917320221992e-05, - "loss": 0.2879, + "epoch": 0.31, + "grad_norm": 0.977314052579278, + "learning_rate": 1.6259959947972164e-05, + "loss": 0.6341, "step": 6665 }, { - "epoch": 0.38, - "grad_norm": 0.36580621119695167, - "learning_rate": 1.4147224058378525e-05, - "loss": 0.3285, + "epoch": 0.31, + "grad_norm": 0.3370864614640248, + "learning_rate": 1.625879956017873e-05, + "loss": 0.1958, "step": 6666 }, { - "epoch": 0.38, - "grad_norm": 0.8255865521291464, - "learning_rate": 1.4145530652916868e-05, - "loss": 0.3617, + "epoch": 0.31, + "grad_norm": 0.3203806194043662, + "learning_rate": 1.6257639033821506e-05, + "loss": 0.2275, "step": 6667 }, { - "epoch": 0.38, - "grad_norm": 0.35219097025016854, - "learning_rate": 1.4143837103895663e-05, - "loss": 0.278, + "epoch": 0.31, + "grad_norm": 0.5701309424295458, + "learning_rate": 1.6256478368926182e-05, + "loss": 0.3468, "step": 6668 }, { - "epoch": 0.38, - "grad_norm": 0.29669356912673234, - "learning_rate": 1.4142143411373559e-05, - "loss": 0.2179, + "epoch": 0.31, + "grad_norm": 0.7790299465689874, + "learning_rate": 1.625531756551846e-05, + "loss": 0.4701, "step": 6669 }, { - "epoch": 0.38, - "grad_norm": 0.4085564007164582, - "learning_rate": 1.4140449575409203e-05, - "loss": 0.2935, + "epoch": 0.31, + "grad_norm": 0.32906499967314945, + "learning_rate": 1.6254156623624037e-05, + "loss": 0.2211, "step": 6670 }, { - "epoch": 0.38, - "grad_norm": 0.3408342696053366, - "learning_rate": 1.4138755596061257e-05, - "loss": 0.2819, + "epoch": 0.31, + "grad_norm": 0.5755530672111476, + "learning_rate": 1.625299554326861e-05, + "loss": 0.4002, "step": 6671 }, { - "epoch": 0.38, - "grad_norm": 0.8311186813808776, - "learning_rate": 1.4137061473388383e-05, - "loss": 0.617, + "epoch": 0.31, + "grad_norm": 0.32966558010924363, + "learning_rate": 1.625183432447789e-05, + "loss": 0.2232, "step": 6672 }, { - "epoch": 0.38, - "grad_norm": 0.5219159840804157, - "learning_rate": 1.4135367207449248e-05, - "loss": 0.3674, + "epoch": 0.31, + "grad_norm": 0.2876879845530426, + "learning_rate": 1.6250672967277585e-05, + "loss": 0.2169, "step": 6673 }, { - "epoch": 0.38, - "grad_norm": 0.29763937422590947, - "learning_rate": 1.4133672798302525e-05, - "loss": 0.2223, + "epoch": 0.31, + "grad_norm": 0.757826477631533, + "learning_rate": 1.6249511471693408e-05, + "loss": 0.3658, "step": 6674 }, { - "epoch": 0.38, - "grad_norm": 0.25470244060159813, - "learning_rate": 1.4131978246006892e-05, - "loss": 0.1708, + "epoch": 0.31, + "grad_norm": 0.37199607250562394, + "learning_rate": 1.6248349837751064e-05, + "loss": 0.3546, "step": 6675 }, { - "epoch": 0.38, - "grad_norm": 0.6082879025289717, - "learning_rate": 1.4130283550621027e-05, - "loss": 0.4386, + "epoch": 0.31, + "grad_norm": 0.6385973486357023, + "learning_rate": 1.624718806547628e-05, + "loss": 0.3386, "step": 6676 }, { - "epoch": 0.38, - "grad_norm": 0.3082473816953717, - "learning_rate": 1.4128588712203626e-05, - "loss": 0.1922, + "epoch": 0.31, + "grad_norm": 0.39124219593765885, + "learning_rate": 1.624602615489477e-05, + "loss": 0.2736, "step": 6677 }, { - "epoch": 0.38, - "grad_norm": 0.40828261098657215, - "learning_rate": 1.4126893730813369e-05, - "loss": 0.3571, + "epoch": 0.31, + "grad_norm": 0.2912565711689406, + "learning_rate": 1.6244864106032268e-05, + "loss": 0.2344, "step": 6678 }, { - "epoch": 0.38, - "grad_norm": 1.284813336317904, - "learning_rate": 1.4125198606508963e-05, - "loss": 0.587, + "epoch": 0.31, + "grad_norm": 0.45834645015404796, + "learning_rate": 1.624370191891449e-05, + "loss": 0.2745, "step": 6679 }, { - "epoch": 0.38, - "grad_norm": 0.3249047265979789, - "learning_rate": 1.4123503339349105e-05, - "loss": 0.2049, + "epoch": 0.31, + "grad_norm": 0.527540308502391, + "learning_rate": 1.624253959356717e-05, + "loss": 0.3159, "step": 6680 }, { - "epoch": 0.38, - "grad_norm": 0.27026754771556555, - "learning_rate": 1.4121807929392505e-05, - "loss": 0.2416, + "epoch": 0.31, + "grad_norm": 1.5229778699898082, + "learning_rate": 1.6241377130016038e-05, + "loss": 0.839, "step": 6681 }, { - "epoch": 0.38, - "grad_norm": 0.4587471969379784, - "learning_rate": 1.4120112376697873e-05, - "loss": 0.3916, + "epoch": 0.31, + "grad_norm": 0.42971366386805065, + "learning_rate": 1.6240214528286832e-05, + "loss": 0.3053, "step": 6682 }, { - "epoch": 0.38, - "grad_norm": 0.30805966674421015, - "learning_rate": 1.4118416681323925e-05, - "loss": 0.1992, + "epoch": 0.31, + "grad_norm": 0.3052613118371382, + "learning_rate": 1.623905178840529e-05, + "loss": 0.2679, "step": 6683 }, { - "epoch": 0.38, - "grad_norm": 1.2854672987600315, - "learning_rate": 1.4116720843329385e-05, - "loss": 0.8468, + "epoch": 0.31, + "grad_norm": 1.3014448745623601, + "learning_rate": 1.6237888910397154e-05, + "loss": 0.6817, "step": 6684 }, { - "epoch": 0.38, - "grad_norm": 0.5225364984100592, - "learning_rate": 1.4115024862772981e-05, - "loss": 0.3391, + "epoch": 0.31, + "grad_norm": 0.28575253932999406, + "learning_rate": 1.6236725894288175e-05, + "loss": 0.2282, "step": 6685 }, { - "epoch": 0.38, - "grad_norm": 0.3050026501666236, - "learning_rate": 1.4113328739713442e-05, - "loss": 0.2727, + "epoch": 0.31, + "grad_norm": 0.5340218479287223, + "learning_rate": 1.623556274010409e-05, + "loss": 0.378, "step": 6686 }, { - "epoch": 0.38, - "grad_norm": 0.7048532439613371, - "learning_rate": 1.4111632474209506e-05, - "loss": 0.3857, + "epoch": 0.31, + "grad_norm": 0.27982679677075867, + "learning_rate": 1.623439944787066e-05, + "loss": 0.2111, "step": 6687 }, { - "epoch": 0.38, - "grad_norm": 0.2856578199252998, - "learning_rate": 1.4109936066319915e-05, - "loss": 0.2344, + "epoch": 0.31, + "grad_norm": 0.38337725462738864, + "learning_rate": 1.623323601761363e-05, + "loss": 0.2649, "step": 6688 }, { - "epoch": 0.38, - "grad_norm": 0.38790911949598295, - "learning_rate": 1.4108239516103412e-05, - "loss": 0.2793, + "epoch": 0.31, + "grad_norm": 1.2048162832634715, + "learning_rate": 1.6232072449358768e-05, + "loss": 0.6965, "step": 6689 }, { - "epoch": 0.38, - "grad_norm": 0.42389402150691363, - "learning_rate": 1.4106542823618754e-05, - "loss": 0.2845, + "epoch": 0.31, + "grad_norm": 0.4394263726569237, + "learning_rate": 1.6230908743131823e-05, + "loss": 0.2986, "step": 6690 }, { - "epoch": 0.38, - "grad_norm": 1.1114232450461785, - "learning_rate": 1.4104845988924694e-05, - "loss": 0.6795, + "epoch": 0.31, + "grad_norm": 0.35074423813786554, + "learning_rate": 1.622974489895857e-05, + "loss": 0.2692, "step": 6691 }, { - "epoch": 0.38, - "grad_norm": 0.31314498273119074, - "learning_rate": 1.4103149012079994e-05, - "loss": 0.2629, + "epoch": 0.31, + "grad_norm": 0.45193036295581984, + "learning_rate": 1.6228580916864767e-05, + "loss": 0.3796, "step": 6692 }, { - "epoch": 0.38, - "grad_norm": 0.4220924174883716, - "learning_rate": 1.4101451893143418e-05, - "loss": 0.2873, + "epoch": 0.31, + "grad_norm": 0.20823536556111047, + "learning_rate": 1.6227416796876183e-05, + "loss": 0.1192, "step": 6693 }, { - "epoch": 0.38, - "grad_norm": 0.34138109317922455, - "learning_rate": 1.4099754632173744e-05, - "loss": 0.2751, + "epoch": 0.31, + "grad_norm": 0.6476606239893536, + "learning_rate": 1.6226252539018597e-05, + "loss": 0.3743, "step": 6694 }, { - "epoch": 0.38, - "grad_norm": 0.404482535836102, - "learning_rate": 1.409805722922974e-05, - "loss": 0.2508, + "epoch": 0.31, + "grad_norm": 0.35599435692443465, + "learning_rate": 1.6225088143317777e-05, + "loss": 0.3013, "step": 6695 }, { - "epoch": 0.38, - "grad_norm": 0.33534481802465277, - "learning_rate": 1.409635968437019e-05, - "loss": 0.2233, + "epoch": 0.31, + "grad_norm": 0.4938401320624271, + "learning_rate": 1.622392360979951e-05, + "loss": 0.3729, "step": 6696 }, { - "epoch": 0.38, - "grad_norm": 0.3605268365663909, - "learning_rate": 1.409466199765388e-05, - "loss": 0.3063, + "epoch": 0.31, + "grad_norm": 0.567718113436878, + "learning_rate": 1.6222758938489566e-05, + "loss": 0.3546, "step": 6697 }, { - "epoch": 0.38, - "grad_norm": 0.39065776412591735, - "learning_rate": 1.4092964169139603e-05, - "loss": 0.2807, + "epoch": 0.31, + "grad_norm": 0.33765679097510365, + "learning_rate": 1.6221594129413743e-05, + "loss": 0.2465, "step": 6698 }, { - "epoch": 0.38, - "grad_norm": 0.44905297298310376, - "learning_rate": 1.409126619888615e-05, - "loss": 0.3731, + "epoch": 0.31, + "grad_norm": 0.32090428334192345, + "learning_rate": 1.622042918259782e-05, + "loss": 0.2692, "step": 6699 }, { - "epoch": 0.38, - "grad_norm": 0.34506324518643255, - "learning_rate": 1.4089568086952327e-05, - "loss": 0.2082, + "epoch": 0.31, + "grad_norm": 0.3373045259337222, + "learning_rate": 1.621926409806759e-05, + "loss": 0.2069, "step": 6700 }, { - "epoch": 0.39, - "grad_norm": 0.39996919181229135, - "learning_rate": 1.4087869833396936e-05, - "loss": 0.2956, + "epoch": 0.31, + "grad_norm": 0.519017569319671, + "learning_rate": 1.6218098875848846e-05, + "loss": 0.3807, "step": 6701 }, { - "epoch": 0.39, - "grad_norm": 0.3419182623503886, - "learning_rate": 1.408617143827879e-05, - "loss": 0.2966, + "epoch": 0.31, + "grad_norm": 0.9547243282030283, + "learning_rate": 1.621693351596739e-05, + "loss": 0.5102, "step": 6702 }, { - "epoch": 0.39, - "grad_norm": 0.35602932018929834, - "learning_rate": 1.40844729016567e-05, - "loss": 0.1709, + "epoch": 0.31, + "grad_norm": 0.325505484926452, + "learning_rate": 1.6215768018449015e-05, + "loss": 0.2578, "step": 6703 }, { - "epoch": 0.39, - "grad_norm": 0.33780685153209516, - "learning_rate": 1.4082774223589492e-05, - "loss": 0.2794, + "epoch": 0.31, + "grad_norm": 0.5270273182324773, + "learning_rate": 1.6214602383319527e-05, + "loss": 0.363, "step": 6704 }, { - "epoch": 0.39, - "grad_norm": 0.3433742555355464, - "learning_rate": 1.4081075404135987e-05, - "loss": 0.3471, + "epoch": 0.31, + "grad_norm": 0.5844785943443738, + "learning_rate": 1.621343661060473e-05, + "loss": 0.3161, "step": 6705 }, { - "epoch": 0.39, - "grad_norm": 0.698436131545571, - "learning_rate": 1.4079376443355016e-05, - "loss": 0.0464, + "epoch": 0.31, + "grad_norm": 0.2641593182842511, + "learning_rate": 1.6212270700330438e-05, + "loss": 0.1691, "step": 6706 }, { - "epoch": 0.39, - "grad_norm": 0.3513574365202577, - "learning_rate": 1.4077677341305414e-05, - "loss": 0.2612, + "epoch": 0.31, + "grad_norm": 0.37250703081140096, + "learning_rate": 1.6211104652522462e-05, + "loss": 0.3196, "step": 6707 }, { - "epoch": 0.39, - "grad_norm": 1.17360579573699, - "learning_rate": 1.4075978098046022e-05, - "loss": 0.7839, + "epoch": 0.31, + "grad_norm": 1.0609618031098604, + "learning_rate": 1.6209938467206612e-05, + "loss": 0.4588, "step": 6708 }, { - "epoch": 0.39, - "grad_norm": 0.26317161420211993, - "learning_rate": 1.4074278713635683e-05, - "loss": 0.2323, + "epoch": 0.31, + "grad_norm": 0.3954895806936831, + "learning_rate": 1.6208772144408712e-05, + "loss": 0.2258, "step": 6709 }, { - "epoch": 0.39, - "grad_norm": 0.2971917186585945, - "learning_rate": 1.4072579188133247e-05, - "loss": 0.2247, + "epoch": 0.31, + "grad_norm": 0.5140490469489751, + "learning_rate": 1.6207605684154577e-05, + "loss": 0.3586, "step": 6710 }, { - "epoch": 0.39, - "grad_norm": 0.6271548854008792, - "learning_rate": 1.407087952159757e-05, - "loss": 0.4772, + "epoch": 0.31, + "grad_norm": 0.3198506020047123, + "learning_rate": 1.6206439086470037e-05, + "loss": 0.2858, "step": 6711 }, { - "epoch": 0.39, - "grad_norm": 0.871101430569621, - "learning_rate": 1.406917971408751e-05, - "loss": 0.6613, + "epoch": 0.31, + "grad_norm": 0.8625590486821628, + "learning_rate": 1.6205272351380917e-05, + "loss": 0.5312, "step": 6712 }, { - "epoch": 0.39, - "grad_norm": 0.3915942662772745, - "learning_rate": 1.4067479765661929e-05, - "loss": 0.2208, + "epoch": 0.31, + "grad_norm": 0.36562163202819825, + "learning_rate": 1.6204105478913052e-05, + "loss": 0.2109, "step": 6713 }, { - "epoch": 0.39, - "grad_norm": 0.33841239049328914, - "learning_rate": 1.4065779676379702e-05, - "loss": 0.3083, + "epoch": 0.31, + "grad_norm": 0.8021963167604096, + "learning_rate": 1.620293846909226e-05, + "loss": 0.412, "step": 6714 }, { - "epoch": 0.39, - "grad_norm": 0.30049951964028676, - "learning_rate": 1.4064079446299699e-05, - "loss": 0.2178, + "epoch": 0.31, + "grad_norm": 0.45388094873874923, + "learning_rate": 1.62017713219444e-05, + "loss": 0.3029, "step": 6715 }, { - "epoch": 0.39, - "grad_norm": 0.38831455943753335, - "learning_rate": 1.4062379075480799e-05, - "loss": 0.2218, + "epoch": 0.31, + "grad_norm": 0.39095581712637106, + "learning_rate": 1.6200604037495295e-05, + "loss": 0.2252, "step": 6716 }, { - "epoch": 0.39, - "grad_norm": 0.291218423496063, - "learning_rate": 1.4060678563981886e-05, - "loss": 0.317, + "epoch": 0.31, + "grad_norm": 1.0627967619207583, + "learning_rate": 1.6199436615770796e-05, + "loss": 0.7262, "step": 6717 }, { - "epoch": 0.39, - "grad_norm": 1.0342262669644686, - "learning_rate": 1.4058977911861846e-05, - "loss": 0.6031, + "epoch": 0.31, + "grad_norm": 0.3545428142235433, + "learning_rate": 1.6198269056796746e-05, + "loss": 0.2523, "step": 6718 }, { - "epoch": 0.39, - "grad_norm": 0.35540629091607656, - "learning_rate": 1.405727711917958e-05, - "loss": 0.2125, + "epoch": 0.31, + "grad_norm": 0.3517835082515496, + "learning_rate": 1.619710136059899e-05, + "loss": 0.2231, "step": 6719 }, { - "epoch": 0.39, - "grad_norm": 0.3154034414211785, - "learning_rate": 1.405557618599398e-05, - "loss": 0.2661, + "epoch": 0.31, + "grad_norm": 0.7363759592821211, + "learning_rate": 1.6195933527203385e-05, + "loss": 0.4622, "step": 6720 }, { - "epoch": 0.39, - "grad_norm": 0.3914215878276067, - "learning_rate": 1.4053875112363953e-05, - "loss": 0.3172, + "epoch": 0.31, + "grad_norm": 0.6184428541104103, + "learning_rate": 1.6194765556635782e-05, + "loss": 0.3789, "step": 6721 }, { - "epoch": 0.39, - "grad_norm": 0.5473122810280361, - "learning_rate": 1.40521738983484e-05, - "loss": 0.3438, + "epoch": 0.31, + "grad_norm": 0.3786599318888344, + "learning_rate": 1.619359744892204e-05, + "loss": 0.2309, "step": 6722 }, { - "epoch": 0.39, - "grad_norm": 0.2795370643280423, - "learning_rate": 1.4050472544006243e-05, - "loss": 0.2083, + "epoch": 0.31, + "grad_norm": 0.41944427748634533, + "learning_rate": 1.6192429204088022e-05, + "loss": 0.3473, "step": 6723 }, { - "epoch": 0.39, - "grad_norm": 1.4035101146683626, - "learning_rate": 1.4048771049396397e-05, - "loss": 0.7028, + "epoch": 0.31, + "grad_norm": 0.633881108809735, + "learning_rate": 1.6191260822159587e-05, + "loss": 0.3757, "step": 6724 }, { - "epoch": 0.39, - "grad_norm": 0.2976029002338224, - "learning_rate": 1.4047069414577782e-05, - "loss": 0.2697, + "epoch": 0.31, + "grad_norm": 0.4637489300912422, + "learning_rate": 1.6190092303162607e-05, + "loss": 0.3034, "step": 6725 }, { - "epoch": 0.39, - "grad_norm": 0.35788245762048565, - "learning_rate": 1.4045367639609326e-05, - "loss": 0.265, + "epoch": 0.31, + "grad_norm": 0.4003159739131099, + "learning_rate": 1.6188923647122946e-05, + "loss": 0.223, "step": 6726 }, { - "epoch": 0.39, - "grad_norm": 0.8534396855898208, - "learning_rate": 1.4043665724549967e-05, - "loss": 0.5141, + "epoch": 0.31, + "grad_norm": 0.42649843040556173, + "learning_rate": 1.618775485406648e-05, + "loss": 0.2642, "step": 6727 }, { - "epoch": 0.39, - "grad_norm": 0.2832789254792039, - "learning_rate": 1.4041963669458633e-05, - "loss": 0.2318, + "epoch": 0.31, + "grad_norm": 0.46464470932303426, + "learning_rate": 1.618658592401909e-05, + "loss": 0.3414, "step": 6728 }, { - "epoch": 0.39, - "grad_norm": 0.3004873857950981, - "learning_rate": 1.4040261474394275e-05, - "loss": 0.1911, + "epoch": 0.31, + "grad_norm": 0.9591152387680215, + "learning_rate": 1.6185416857006648e-05, + "loss": 0.4963, "step": 6729 }, { - "epoch": 0.39, - "grad_norm": 0.48748591817012543, - "learning_rate": 1.4038559139415832e-05, - "loss": 0.3871, + "epoch": 0.31, + "grad_norm": 0.5672483732745518, + "learning_rate": 1.6184247653055042e-05, + "loss": 0.4149, "step": 6730 }, { - "epoch": 0.39, - "grad_norm": 0.66305837104567, - "learning_rate": 1.4036856664582263e-05, - "loss": 0.4026, + "epoch": 0.31, + "grad_norm": 0.32972760903041165, + "learning_rate": 1.6183078312190148e-05, + "loss": 0.2746, "step": 6731 }, { - "epoch": 0.39, - "grad_norm": 0.4050811425920618, - "learning_rate": 1.403515404995252e-05, - "loss": 0.2587, + "epoch": 0.31, + "grad_norm": 0.35917607074949526, + "learning_rate": 1.6181908834437862e-05, + "loss": 0.1825, "step": 6732 }, { - "epoch": 0.39, - "grad_norm": 0.41824055428183066, - "learning_rate": 1.4033451295585565e-05, - "loss": 0.3413, + "epoch": 0.31, + "grad_norm": 0.806829928330583, + "learning_rate": 1.6180739219824073e-05, + "loss": 0.5079, "step": 6733 }, { - "epoch": 0.39, - "grad_norm": 0.37066586491062015, - "learning_rate": 1.4031748401540366e-05, - "loss": 0.2305, + "epoch": 0.31, + "grad_norm": 0.4899453701906641, + "learning_rate": 1.6179569468374673e-05, + "loss": 0.3172, "step": 6734 }, { - "epoch": 0.39, - "grad_norm": 0.29315654585139084, - "learning_rate": 1.4030045367875893e-05, - "loss": 0.2388, + "epoch": 0.31, + "grad_norm": 0.47240583651985885, + "learning_rate": 1.617839958011556e-05, + "loss": 0.2773, "step": 6735 }, { - "epoch": 0.39, - "grad_norm": 0.8745220798442661, - "learning_rate": 1.4028342194651123e-05, - "loss": 0.4191, + "epoch": 0.31, + "grad_norm": 0.5300140555706498, + "learning_rate": 1.617722955507264e-05, + "loss": 0.3969, "step": 6736 }, { - "epoch": 0.39, - "grad_norm": 0.30931597379685477, - "learning_rate": 1.4026638881925032e-05, - "loss": 0.2733, + "epoch": 0.31, + "grad_norm": 0.3825746144884539, + "learning_rate": 1.6176059393271807e-05, + "loss": 0.3226, "step": 6737 }, { - "epoch": 0.39, - "grad_norm": 0.37488368146363077, - "learning_rate": 1.4024935429756614e-05, - "loss": 0.3517, + "epoch": 0.31, + "grad_norm": 0.5488936585847455, + "learning_rate": 1.6174889094738975e-05, + "loss": 0.2657, "step": 6738 }, { - "epoch": 0.39, - "grad_norm": 0.548881557197987, - "learning_rate": 1.4023231838204854e-05, - "loss": 0.372, + "epoch": 0.31, + "grad_norm": 0.34170981470557205, + "learning_rate": 1.6173718659500046e-05, + "loss": 0.1949, "step": 6739 }, { - "epoch": 0.39, - "grad_norm": 0.31575575686443624, - "learning_rate": 1.4021528107328749e-05, - "loss": 0.1803, + "epoch": 0.31, + "grad_norm": 0.40246048147600033, + "learning_rate": 1.617254808758094e-05, + "loss": 0.293, "step": 6740 }, { - "epoch": 0.39, - "grad_norm": 0.2777086707952842, - "learning_rate": 1.4019824237187296e-05, - "loss": 0.2545, + "epoch": 0.31, + "grad_norm": 1.5602604552794073, + "learning_rate": 1.617137737900757e-05, + "loss": 0.8928, "step": 6741 }, { - "epoch": 0.39, - "grad_norm": 1.014625121565217, - "learning_rate": 1.4018120227839505e-05, - "loss": 0.3794, + "epoch": 0.31, + "grad_norm": 0.4150834756675873, + "learning_rate": 1.6170206533805845e-05, + "loss": 0.2699, "step": 6742 }, { - "epoch": 0.39, - "grad_norm": 0.5247262023251844, - "learning_rate": 1.4016416079344382e-05, - "loss": 0.373, + "epoch": 0.31, + "grad_norm": 0.35616587627774376, + "learning_rate": 1.6169035552001698e-05, + "loss": 0.3272, "step": 6743 }, { - "epoch": 0.39, - "grad_norm": 0.3867050547624269, - "learning_rate": 1.4014711791760944e-05, - "loss": 0.3268, + "epoch": 0.31, + "grad_norm": 0.4449961110690607, + "learning_rate": 1.616786443362105e-05, + "loss": 0.3113, "step": 6744 }, { - "epoch": 0.39, - "grad_norm": 0.35604241188905456, - "learning_rate": 1.401300736514821e-05, - "loss": 0.2967, + "epoch": 0.31, + "grad_norm": 0.2224810647045958, + "learning_rate": 1.616669317868983e-05, + "loss": 0.1054, "step": 6745 }, { - "epoch": 0.39, - "grad_norm": 0.4154383606703178, - "learning_rate": 1.4011302799565205e-05, - "loss": 0.3106, + "epoch": 0.31, + "grad_norm": 0.49738218882020835, + "learning_rate": 1.6165521787233963e-05, + "loss": 0.3378, "step": 6746 }, { - "epoch": 0.39, - "grad_norm": 0.250345897544341, - "learning_rate": 1.4009598095070951e-05, - "loss": 0.1927, + "epoch": 0.31, + "grad_norm": 0.4117795768770408, + "learning_rate": 1.616435025927939e-05, + "loss": 0.326, "step": 6747 }, { - "epoch": 0.39, - "grad_norm": 1.2188115276047022, - "learning_rate": 1.4007893251724491e-05, - "loss": 0.7797, + "epoch": 0.31, + "grad_norm": 0.8571403058004048, + "learning_rate": 1.616317859485204e-05, + "loss": 0.3727, "step": 6748 }, { - "epoch": 0.39, - "grad_norm": 0.29690370719853887, - "learning_rate": 1.400618826958486e-05, - "loss": 0.2267, + "epoch": 0.31, + "grad_norm": 0.3997159562974615, + "learning_rate": 1.6162006793977858e-05, + "loss": 0.318, "step": 6749 }, { - "epoch": 0.39, - "grad_norm": 0.40303575141635606, - "learning_rate": 1.4004483148711101e-05, - "loss": 0.3435, + "epoch": 0.31, + "grad_norm": 0.410027533778778, + "learning_rate": 1.6160834856682783e-05, + "loss": 0.3185, "step": 6750 }, { - "epoch": 0.39, - "grad_norm": 0.9008436992328396, - "learning_rate": 1.4002777889162262e-05, - "loss": 0.4848, - "step": 6751 + "epoch": 0.31, + "grad_norm": 0.30433183712987544, + "learning_rate": 1.6159662782992767e-05, + "loss": 0.2025, + "step": 6751 }, { - "epoch": 0.39, - "grad_norm": 0.2933102711442764, - "learning_rate": 1.4001072490997399e-05, - "loss": 0.1314, + "epoch": 0.31, + "grad_norm": 0.31155054079936434, + "learning_rate": 1.615849057293375e-05, + "loss": 0.2178, "step": 6752 }, { - "epoch": 0.39, - "grad_norm": 0.2828190568585241, - "learning_rate": 1.3999366954275566e-05, - "loss": 0.2767, + "epoch": 0.31, + "grad_norm": 1.2629097236999247, + "learning_rate": 1.6157318226531685e-05, + "loss": 0.5071, "step": 6753 }, { - "epoch": 0.39, - "grad_norm": 0.362682035058518, - "learning_rate": 1.3997661279055826e-05, - "loss": 0.247, + "epoch": 0.31, + "grad_norm": 0.4682042607277238, + "learning_rate": 1.6156145743812532e-05, + "loss": 0.3344, "step": 6754 }, { - "epoch": 0.39, - "grad_norm": 0.6611501611701132, - "learning_rate": 1.399595546539725e-05, - "loss": 0.3083, + "epoch": 0.31, + "grad_norm": 0.3357680303839436, + "learning_rate": 1.6154973124802248e-05, + "loss": 0.2289, "step": 6755 }, { - "epoch": 0.39, - "grad_norm": 0.3474578684143908, - "learning_rate": 1.3994249513358907e-05, - "loss": 0.3028, + "epoch": 0.31, + "grad_norm": 1.2869987732237733, + "learning_rate": 1.6153800369526788e-05, + "loss": 0.6715, "step": 6756 }, { - "epoch": 0.39, - "grad_norm": 0.38528825517022264, - "learning_rate": 1.3992543422999876e-05, - "loss": 0.3243, + "epoch": 0.31, + "grad_norm": 0.2746826192713602, + "learning_rate": 1.6152627478012116e-05, + "loss": 0.1636, "step": 6757 }, { - "epoch": 0.39, - "grad_norm": 0.3992245764777032, - "learning_rate": 1.3990837194379236e-05, - "loss": 0.1723, + "epoch": 0.31, + "grad_norm": 0.39425936674462725, + "learning_rate": 1.6151454450284206e-05, + "loss": 0.228, "step": 6758 }, { - "epoch": 0.39, - "grad_norm": 0.24633718428561147, - "learning_rate": 1.3989130827556077e-05, - "loss": 0.2043, + "epoch": 0.31, + "grad_norm": 0.9537556790267768, + "learning_rate": 1.6150281286369024e-05, + "loss": 0.3704, "step": 6759 }, { - "epoch": 0.39, - "grad_norm": 1.7078159518251719, - "learning_rate": 1.398742432258949e-05, - "loss": 0.8635, + "epoch": 0.31, + "grad_norm": 1.069674269395746, + "learning_rate": 1.614910798629254e-05, + "loss": 0.6016, "step": 6760 }, { - "epoch": 0.39, - "grad_norm": 0.386580417191464, - "learning_rate": 1.398571767953857e-05, - "loss": 0.3186, + "epoch": 0.31, + "grad_norm": 0.3306242853561329, + "learning_rate": 1.6147934550080734e-05, + "loss": 0.2041, "step": 6761 }, { - "epoch": 0.39, - "grad_norm": 0.4046253648674059, - "learning_rate": 1.3984010898462417e-05, - "loss": 0.2438, + "epoch": 0.31, + "grad_norm": 0.40916384354161917, + "learning_rate": 1.614676097775958e-05, + "loss": 0.33, "step": 6762 }, { - "epoch": 0.39, - "grad_norm": 0.8689183740842735, - "learning_rate": 1.398230397942014e-05, - "loss": 0.5434, + "epoch": 0.31, + "grad_norm": 0.26442418691484854, + "learning_rate": 1.6145587269355062e-05, + "loss": 0.1729, "step": 6763 }, { - "epoch": 0.39, - "grad_norm": 0.4089398395770051, - "learning_rate": 1.3980596922470844e-05, - "loss": 0.3337, + "epoch": 0.31, + "grad_norm": 0.45136439396279865, + "learning_rate": 1.6144413424893163e-05, + "loss": 0.2993, "step": 6764 }, { - "epoch": 0.39, - "grad_norm": 0.29438732424321856, - "learning_rate": 1.397888972767365e-05, - "loss": 0.2378, + "epoch": 0.31, + "grad_norm": 1.394868004123701, + "learning_rate": 1.614323944439988e-05, + "loss": 0.4107, "step": 6765 }, { - "epoch": 0.39, - "grad_norm": 0.3592290968908871, - "learning_rate": 1.3977182395087674e-05, - "loss": 0.2221, + "epoch": 0.31, + "grad_norm": 0.5002144570948106, + "learning_rate": 1.6142065327901192e-05, + "loss": 0.3427, "step": 6766 }, { - "epoch": 0.39, - "grad_norm": 0.6970432495304754, - "learning_rate": 1.3975474924772043e-05, - "loss": 0.3725, + "epoch": 0.31, + "grad_norm": 0.37886809342006733, + "learning_rate": 1.6140891075423095e-05, + "loss": 0.2702, "step": 6767 }, { - "epoch": 0.39, - "grad_norm": 0.4009812575724056, - "learning_rate": 1.3973767316785887e-05, - "loss": 0.2633, + "epoch": 0.31, + "grad_norm": 0.862629977415794, + "learning_rate": 1.6139716686991592e-05, + "loss": 0.3901, "step": 6768 }, { - "epoch": 0.39, - "grad_norm": 0.37223344888144816, - "learning_rate": 1.397205957118834e-05, - "loss": 0.3468, + "epoch": 0.31, + "grad_norm": 0.2935159245210967, + "learning_rate": 1.6138542162632677e-05, + "loss": 0.1577, "step": 6769 }, { - "epoch": 0.39, - "grad_norm": 1.57109691824902, - "learning_rate": 1.397035168803854e-05, - "loss": 0.762, + "epoch": 0.31, + "grad_norm": 0.515303048678815, + "learning_rate": 1.6137367502372356e-05, + "loss": 0.2746, "step": 6770 }, { - "epoch": 0.39, - "grad_norm": 0.2339637760874292, - "learning_rate": 1.3968643667395634e-05, - "loss": 0.1738, + "epoch": 0.31, + "grad_norm": 0.656812904560812, + "learning_rate": 1.6136192706236635e-05, + "loss": 0.3252, "step": 6771 }, { - "epoch": 0.39, - "grad_norm": 0.32562666939438184, - "learning_rate": 1.3966935509318766e-05, - "loss": 0.2632, + "epoch": 0.31, + "grad_norm": 1.3988466734238543, + "learning_rate": 1.613501777425152e-05, + "loss": 0.7831, "step": 6772 }, { - "epoch": 0.39, - "grad_norm": 0.7221752928764033, - "learning_rate": 1.3965227213867093e-05, - "loss": 0.4831, + "epoch": 0.31, + "grad_norm": 0.3874902729059708, + "learning_rate": 1.6133842706443025e-05, + "loss": 0.2384, "step": 6773 }, { - "epoch": 0.39, - "grad_norm": 0.33874134003021095, - "learning_rate": 1.3963518781099774e-05, - "loss": 0.2641, + "epoch": 0.31, + "grad_norm": 0.5984432874780572, + "learning_rate": 1.6132667502837164e-05, + "loss": 0.4009, "step": 6774 }, { - "epoch": 0.39, - "grad_norm": 0.9198892609799831, - "learning_rate": 1.3961810211075965e-05, - "loss": 0.4748, + "epoch": 0.31, + "grad_norm": 0.2732991728496859, + "learning_rate": 1.6131492163459955e-05, + "loss": 0.1915, "step": 6775 }, { - "epoch": 0.39, - "grad_norm": 0.41627147942332504, - "learning_rate": 1.3960101503854843e-05, - "loss": 0.314, + "epoch": 0.31, + "grad_norm": 0.36326131384568094, + "learning_rate": 1.613031668833742e-05, + "loss": 0.315, "step": 6776 }, { - "epoch": 0.39, - "grad_norm": 0.32450432247327654, - "learning_rate": 1.3958392659495575e-05, - "loss": 0.2792, + "epoch": 0.31, + "grad_norm": 0.8692199684281178, + "learning_rate": 1.6129141077495583e-05, + "loss": 0.4525, "step": 6777 }, { - "epoch": 0.39, - "grad_norm": 0.3360326187425709, - "learning_rate": 1.3956683678057342e-05, - "loss": 0.1615, + "epoch": 0.31, + "grad_norm": 0.5128647275606913, + "learning_rate": 1.6127965330960468e-05, + "loss": 0.3042, "step": 6778 }, { - "epoch": 0.39, - "grad_norm": 0.7677364221803948, - "learning_rate": 1.395497455959932e-05, - "loss": 0.3848, + "epoch": 0.31, + "grad_norm": 0.463054745040407, + "learning_rate": 1.612678944875811e-05, + "loss": 0.2903, "step": 6779 }, { - "epoch": 0.39, - "grad_norm": 0.3772113181888643, - "learning_rate": 1.39532653041807e-05, - "loss": 0.2806, + "epoch": 0.31, + "grad_norm": 0.5973145410063831, + "learning_rate": 1.6125613430914533e-05, + "loss": 0.393, "step": 6780 }, { - "epoch": 0.39, - "grad_norm": 0.6590453255458756, - "learning_rate": 1.3951555911860672e-05, - "loss": 0.2858, + "epoch": 0.31, + "grad_norm": 0.4236879474704736, + "learning_rate": 1.612443727745578e-05, + "loss": 0.321, "step": 6781 }, { - "epoch": 0.39, - "grad_norm": 0.8705894982837394, - "learning_rate": 1.3949846382698433e-05, - "loss": 0.4125, + "epoch": 0.31, + "grad_norm": 0.4136210512327562, + "learning_rate": 1.612326098840789e-05, + "loss": 0.2482, "step": 6782 }, { - "epoch": 0.39, - "grad_norm": 0.3819991956597844, - "learning_rate": 1.3948136716753183e-05, - "loss": 0.2831, + "epoch": 0.31, + "grad_norm": 0.24866531118196758, + "learning_rate": 1.6122084563796906e-05, + "loss": 0.2327, "step": 6783 }, { - "epoch": 0.39, - "grad_norm": 0.4511179604074903, - "learning_rate": 1.394642691408413e-05, - "loss": 0.3855, + "epoch": 0.31, + "grad_norm": 1.4657352652578608, + "learning_rate": 1.6120908003648867e-05, + "loss": 0.5785, "step": 6784 }, { - "epoch": 0.39, - "grad_norm": 0.23375528534045187, - "learning_rate": 1.394471697475048e-05, - "loss": 0.1753, + "epoch": 0.31, + "grad_norm": 0.4151820642974993, + "learning_rate": 1.6119731307989822e-05, + "loss": 0.3108, "step": 6785 }, { - "epoch": 0.39, - "grad_norm": 0.3867215053995416, - "learning_rate": 1.3943006898811453e-05, - "loss": 0.2849, + "epoch": 0.31, + "grad_norm": 0.49679111405320464, + "learning_rate": 1.611855447684583e-05, + "loss": 0.3482, "step": 6786 }, { - "epoch": 0.39, - "grad_norm": 1.1350554649970255, - "learning_rate": 1.3941296686326266e-05, - "loss": 0.8263, + "epoch": 0.31, + "grad_norm": 1.2816141259276508, + "learning_rate": 1.6117377510242937e-05, + "loss": 0.5277, "step": 6787 }, { - "epoch": 0.39, - "grad_norm": 0.3652869785590026, - "learning_rate": 1.3939586337354146e-05, - "loss": 0.2795, + "epoch": 0.31, + "grad_norm": 0.5528848412690877, + "learning_rate": 1.61162004082072e-05, + "loss": 0.2533, "step": 6788 }, { - "epoch": 0.39, - "grad_norm": 0.3622856983259643, - "learning_rate": 1.3937875851954316e-05, - "loss": 0.2834, + "epoch": 0.31, + "grad_norm": 0.4769770828112979, + "learning_rate": 1.6115023170764682e-05, + "loss": 0.323, "step": 6789 }, { - "epoch": 0.39, - "grad_norm": 0.5852464787006104, - "learning_rate": 1.3936165230186018e-05, - "loss": 0.4318, + "epoch": 0.31, + "grad_norm": 0.480064379038675, + "learning_rate": 1.6113845797941446e-05, + "loss": 0.3585, "step": 6790 }, { - "epoch": 0.39, - "grad_norm": 0.2725903077636448, - "learning_rate": 1.3934454472108488e-05, - "loss": 0.1255, + "epoch": 0.31, + "grad_norm": 0.24738841261086383, + "learning_rate": 1.6112668289763552e-05, + "loss": 0.1739, "step": 6791 }, { - "epoch": 0.39, - "grad_norm": 0.3542987477803968, - "learning_rate": 1.3932743577780967e-05, - "loss": 0.2905, + "epoch": 0.31, + "grad_norm": 1.3255656447705793, + "learning_rate": 1.6111490646257078e-05, + "loss": 0.6968, "step": 6792 }, { - "epoch": 0.39, - "grad_norm": 0.3643810295443851, - "learning_rate": 1.3931032547262707e-05, - "loss": 0.2526, + "epoch": 0.31, + "grad_norm": 0.9341280895214518, + "learning_rate": 1.611031286744809e-05, + "loss": 0.4414, "step": 6793 }, { - "epoch": 0.39, - "grad_norm": 0.6951650698020321, - "learning_rate": 1.3929321380612955e-05, - "loss": 0.3213, + "epoch": 0.31, + "grad_norm": 0.2871733134202783, + "learning_rate": 1.6109134953362664e-05, + "loss": 0.232, "step": 6794 }, { - "epoch": 0.39, - "grad_norm": 0.3221414003316139, - "learning_rate": 1.3927610077890976e-05, - "loss": 0.2688, + "epoch": 0.31, + "grad_norm": 0.5507399183982784, + "learning_rate": 1.610795690402688e-05, + "loss": 0.3022, "step": 6795 }, { - "epoch": 0.39, - "grad_norm": 0.6841427532021138, - "learning_rate": 1.3925898639156028e-05, - "loss": 0.4703, + "epoch": 0.31, + "grad_norm": 0.6050850138232671, + "learning_rate": 1.6106778719466817e-05, + "loss": 0.3637, "step": 6796 }, { - "epoch": 0.39, - "grad_norm": 0.2893061685049914, - "learning_rate": 1.3924187064467378e-05, - "loss": 0.2869, + "epoch": 0.31, + "grad_norm": 0.45738647184609826, + "learning_rate": 1.6105600399708556e-05, + "loss": 0.1876, "step": 6797 }, { - "epoch": 0.39, - "grad_norm": 0.3150811635114615, - "learning_rate": 1.3922475353884302e-05, - "loss": 0.2133, + "epoch": 0.31, + "grad_norm": 0.343693379784756, + "learning_rate": 1.6104421944778186e-05, + "loss": 0.3293, "step": 6798 }, { - "epoch": 0.39, - "grad_norm": 0.39697628073105073, - "learning_rate": 1.3920763507466071e-05, - "loss": 0.2859, + "epoch": 0.31, + "grad_norm": 0.6359526776662212, + "learning_rate": 1.6103243354701806e-05, + "loss": 0.4819, "step": 6799 }, { - "epoch": 0.39, - "grad_norm": 0.3824519346405898, - "learning_rate": 1.3919051525271968e-05, - "loss": 0.3316, + "epoch": 0.31, + "grad_norm": 0.70958675028496, + "learning_rate": 1.610206462950549e-05, + "loss": 0.3624, "step": 6800 }, { - "epoch": 0.39, - "grad_norm": 0.3120471764511407, - "learning_rate": 1.3917339407361278e-05, - "loss": 0.1981, + "epoch": 0.31, + "grad_norm": 0.4072201845114903, + "learning_rate": 1.6100885769215352e-05, + "loss": 0.2725, "step": 6801 }, { - "epoch": 0.39, - "grad_norm": 0.7719291568422046, - "learning_rate": 1.3915627153793294e-05, - "loss": 0.4703, + "epoch": 0.31, + "grad_norm": 0.4006641066705243, + "learning_rate": 1.6099706773857477e-05, + "loss": 0.3466, "step": 6802 }, { - "epoch": 0.39, - "grad_norm": 1.0406178609357577, - "learning_rate": 1.3913914764627311e-05, - "loss": 0.7213, + "epoch": 0.31, + "grad_norm": 0.31521109442414585, + "learning_rate": 1.609852764345798e-05, + "loss": 0.1346, "step": 6803 }, { - "epoch": 0.39, - "grad_norm": 0.3082881655989037, - "learning_rate": 1.3912202239922627e-05, - "loss": 0.1947, + "epoch": 0.31, + "grad_norm": 0.35542823126621176, + "learning_rate": 1.6097348378042955e-05, + "loss": 0.2928, "step": 6804 }, { - "epoch": 0.39, - "grad_norm": 0.269808735942457, - "learning_rate": 1.3910489579738548e-05, - "loss": 0.2328, + "epoch": 0.31, + "grad_norm": 0.7152261799285524, + "learning_rate": 1.6096168977638512e-05, + "loss": 0.4879, "step": 6805 }, { - "epoch": 0.39, - "grad_norm": 0.8449664005507034, - "learning_rate": 1.3908776784134382e-05, - "loss": 0.4975, + "epoch": 0.31, + "grad_norm": 0.29095419748280965, + "learning_rate": 1.6094989442270763e-05, + "loss": 0.2804, "step": 6806 }, { - "epoch": 0.39, - "grad_norm": 0.3668433609014333, - "learning_rate": 1.3907063853169445e-05, - "loss": 0.2513, + "epoch": 0.31, + "grad_norm": 0.4843826211116314, + "learning_rate": 1.6093809771965828e-05, + "loss": 0.2903, "step": 6807 }, { - "epoch": 0.39, - "grad_norm": 0.3828472985391112, - "learning_rate": 1.3905350786903053e-05, - "loss": 0.3159, + "epoch": 0.31, + "grad_norm": 0.5383422719547504, + "learning_rate": 1.609262996674981e-05, + "loss": 0.3184, "step": 6808 }, { - "epoch": 0.39, - "grad_norm": 0.9917346778146909, - "learning_rate": 1.3903637585394534e-05, - "loss": 0.6777, + "epoch": 0.31, + "grad_norm": 0.29424727688192087, + "learning_rate": 1.6091450026648842e-05, + "loss": 0.2202, "step": 6809 }, { - "epoch": 0.39, - "grad_norm": 0.34320658920451136, - "learning_rate": 1.390192424870321e-05, - "loss": 0.2877, + "epoch": 0.31, + "grad_norm": 0.32762336311741225, + "learning_rate": 1.609026995168904e-05, + "loss": 0.2742, "step": 6810 }, { - "epoch": 0.39, - "grad_norm": 0.22123271814015424, - "learning_rate": 1.3900210776888421e-05, - "loss": 0.1037, + "epoch": 0.31, + "grad_norm": 0.7601029061192826, + "learning_rate": 1.6089089741896532e-05, + "loss": 0.4783, "step": 6811 }, { - "epoch": 0.39, - "grad_norm": 0.43597104711910406, - "learning_rate": 1.38984971700095e-05, - "loss": 0.3482, + "epoch": 0.31, + "grad_norm": 0.42621298129095053, + "learning_rate": 1.6087909397297446e-05, + "loss": 0.3401, "step": 6812 }, { - "epoch": 0.39, - "grad_norm": 0.3876067777475436, - "learning_rate": 1.3896783428125789e-05, - "loss": 0.2848, + "epoch": 0.31, + "grad_norm": 0.5727437044560925, + "learning_rate": 1.6086728917917912e-05, + "loss": 0.372, "step": 6813 }, { - "epoch": 0.39, - "grad_norm": 0.814104832790337, - "learning_rate": 1.3895069551296634e-05, - "loss": 0.3313, + "epoch": 0.31, + "grad_norm": 0.3958965101183971, + "learning_rate": 1.608554830378407e-05, + "loss": 0.3055, "step": 6814 }, { - "epoch": 0.39, - "grad_norm": 1.0191123045537875, - "learning_rate": 1.389335553958139e-05, - "loss": 0.7098, + "epoch": 0.31, + "grad_norm": 0.3082188131968349, + "learning_rate": 1.6084367554922046e-05, + "loss": 0.2364, "step": 6815 }, { - "epoch": 0.39, - "grad_norm": 0.3189777727484337, - "learning_rate": 1.3891641393039408e-05, - "loss": 0.2773, + "epoch": 0.31, + "grad_norm": 0.6309798807923693, + "learning_rate": 1.6083186671357996e-05, + "loss": 0.3967, "step": 6816 }, { - "epoch": 0.39, - "grad_norm": 0.3103356437264256, - "learning_rate": 1.3889927111730056e-05, - "loss": 0.1763, + "epoch": 0.31, + "grad_norm": 0.2946102930878342, + "learning_rate": 1.6082005653118052e-05, + "loss": 0.2331, "step": 6817 }, { - "epoch": 0.39, - "grad_norm": 0.6834230208026675, - "learning_rate": 1.3888212695712693e-05, - "loss": 0.3407, + "epoch": 0.31, + "grad_norm": 0.7326357993089266, + "learning_rate": 1.6080824500228367e-05, + "loss": 0.3505, "step": 6818 }, { - "epoch": 0.39, - "grad_norm": 0.4373386910283233, - "learning_rate": 1.3886498145046689e-05, - "loss": 0.2706, + "epoch": 0.31, + "grad_norm": 0.45068825860403194, + "learning_rate": 1.6079643212715088e-05, + "loss": 0.3618, "step": 6819 }, { - "epoch": 0.39, - "grad_norm": 0.33425168275995765, - "learning_rate": 1.3884783459791427e-05, - "loss": 0.2554, + "epoch": 0.31, + "grad_norm": 0.7403188061324231, + "learning_rate": 1.6078461790604366e-05, + "loss": 0.4669, "step": 6820 }, { - "epoch": 0.39, - "grad_norm": 0.9990408288161097, - "learning_rate": 1.3883068640006277e-05, - "loss": 0.6442, + "epoch": 0.31, + "grad_norm": 0.4815423705489495, + "learning_rate": 1.607728023392236e-05, + "loss": 0.3256, "step": 6821 }, { - "epoch": 0.39, - "grad_norm": 0.3974999866738344, - "learning_rate": 1.3881353685750627e-05, - "loss": 0.2808, + "epoch": 0.31, + "grad_norm": 0.31322535261786, + "learning_rate": 1.607609854269523e-05, + "loss": 0.3044, "step": 6822 }, { - "epoch": 0.39, - "grad_norm": 0.48249560737044717, - "learning_rate": 1.3879638597083864e-05, - "loss": 0.3902, + "epoch": 0.31, + "grad_norm": 0.2588897743374257, + "learning_rate": 1.607491671694913e-05, + "loss": 0.1357, "step": 6823 }, { - "epoch": 0.39, - "grad_norm": 0.406224053807835, - "learning_rate": 1.387792337406539e-05, - "loss": 0.2945, + "epoch": 0.31, + "grad_norm": 0.6505474252827345, + "learning_rate": 1.607373475671023e-05, + "loss": 0.3622, "step": 6824 }, { - "epoch": 0.39, - "grad_norm": 0.2699756257137786, - "learning_rate": 1.3876208016754589e-05, - "loss": 0.2, + "epoch": 0.31, + "grad_norm": 0.45338916104718013, + "learning_rate": 1.6072552662004696e-05, + "loss": 0.3423, "step": 6825 }, { - "epoch": 0.39, - "grad_norm": 0.35033622402874065, - "learning_rate": 1.3874492525210874e-05, - "loss": 0.2657, + "epoch": 0.31, + "grad_norm": 0.4115278810209507, + "learning_rate": 1.60713704328587e-05, + "loss": 0.2984, "step": 6826 }, { - "epoch": 0.39, - "grad_norm": 0.923795988793285, - "learning_rate": 1.387277689949365e-05, - "loss": 0.4945, + "epoch": 0.31, + "grad_norm": 0.42939497297794194, + "learning_rate": 1.6070188069298413e-05, + "loss": 0.2536, "step": 6827 }, { - "epoch": 0.39, - "grad_norm": 0.29261064970858314, - "learning_rate": 1.3871061139662328e-05, - "loss": 0.2662, + "epoch": 0.31, + "grad_norm": 0.45023240812222515, + "learning_rate": 1.6069005571350017e-05, + "loss": 0.3614, "step": 6828 }, { - "epoch": 0.39, - "grad_norm": 0.5097583064881249, - "learning_rate": 1.3869345245776326e-05, - "loss": 0.3505, + "epoch": 0.31, + "grad_norm": 0.2823525598651837, + "learning_rate": 1.6067822939039684e-05, + "loss": 0.2358, "step": 6829 }, { - "epoch": 0.39, - "grad_norm": 0.3765657158030522, - "learning_rate": 1.3867629217895067e-05, - "loss": 0.2203, + "epoch": 0.31, + "grad_norm": 0.30143471812810824, + "learning_rate": 1.6066640172393596e-05, + "loss": 0.2172, "step": 6830 }, { - "epoch": 0.39, - "grad_norm": 0.3066487278928788, - "learning_rate": 1.3865913056077968e-05, - "loss": 0.2238, + "epoch": 0.31, + "grad_norm": 0.5948840656659351, + "learning_rate": 1.606545727143795e-05, + "loss": 0.426, "step": 6831 }, { - "epoch": 0.39, - "grad_norm": 0.49241965675255883, - "learning_rate": 1.3864196760384471e-05, - "loss": 0.3577, + "epoch": 0.31, + "grad_norm": 0.6713991171828799, + "learning_rate": 1.606427423619892e-05, + "loss": 0.5151, "step": 6832 }, { - "epoch": 0.39, - "grad_norm": 0.47556465523233393, - "learning_rate": 1.3862480330874004e-05, - "loss": 0.2933, + "epoch": 0.31, + "grad_norm": 0.34501182915948037, + "learning_rate": 1.60630910667027e-05, + "loss": 0.2542, "step": 6833 }, { - "epoch": 0.39, - "grad_norm": 0.3757822961632396, - "learning_rate": 1.3860763767606012e-05, - "loss": 0.2787, + "epoch": 0.31, + "grad_norm": 0.3574889925513999, + "learning_rate": 1.6061907762975493e-05, + "loss": 0.3016, "step": 6834 }, { - "epoch": 0.39, - "grad_norm": 0.7440805037762387, - "learning_rate": 1.3859047070639933e-05, - "loss": 0.4623, + "epoch": 0.31, + "grad_norm": 0.34461397672738825, + "learning_rate": 1.606072432504349e-05, + "loss": 0.1627, "step": 6835 }, { - "epoch": 0.39, - "grad_norm": 0.33732921252747394, - "learning_rate": 1.3857330240035223e-05, - "loss": 0.3338, + "epoch": 0.31, + "grad_norm": 0.4450290332471284, + "learning_rate": 1.605954075293289e-05, + "loss": 0.1395, "step": 6836 }, { - "epoch": 0.39, - "grad_norm": 0.18536598268984897, - "learning_rate": 1.385561327585133e-05, - "loss": 0.0748, + "epoch": 0.31, + "grad_norm": 0.45673990853789603, + "learning_rate": 1.60583570466699e-05, + "loss": 0.354, "step": 6837 }, { - "epoch": 0.39, - "grad_norm": 0.30528617287475135, - "learning_rate": 1.3853896178147717e-05, - "loss": 0.2542, + "epoch": 0.31, + "grad_norm": 0.44715701488297044, + "learning_rate": 1.605717320628072e-05, + "loss": 0.3432, "step": 6838 }, { - "epoch": 0.39, - "grad_norm": 0.48011544732265776, - "learning_rate": 1.3852178946983845e-05, - "loss": 0.4014, + "epoch": 0.31, + "grad_norm": 0.6940711367704417, + "learning_rate": 1.605598923179157e-05, + "loss": 0.4356, "step": 6839 }, { - "epoch": 0.39, - "grad_norm": 0.32213207603181976, - "learning_rate": 1.385046158241918e-05, - "loss": 0.2102, + "epoch": 0.31, + "grad_norm": 0.38083703912190275, + "learning_rate": 1.6054805123228648e-05, + "loss": 0.2554, "step": 6840 }, { - "epoch": 0.39, - "grad_norm": 0.6276796859084134, - "learning_rate": 1.3848744084513197e-05, - "loss": 0.3767, + "epoch": 0.31, + "grad_norm": 0.34331494545670194, + "learning_rate": 1.605362088061818e-05, + "loss": 0.1757, "step": 6841 }, { - "epoch": 0.39, - "grad_norm": 0.9645205479290853, - "learning_rate": 1.3847026453325371e-05, - "loss": 0.6006, + "epoch": 0.31, + "grad_norm": 0.3336766412842956, + "learning_rate": 1.605243650398638e-05, + "loss": 0.2941, "step": 6842 }, { - "epoch": 0.39, - "grad_norm": 0.18739949526105895, - "learning_rate": 1.3845308688915187e-05, - "loss": 0.1148, + "epoch": 0.31, + "grad_norm": 0.36675317407101293, + "learning_rate": 1.605125199335947e-05, + "loss": 0.2458, "step": 6843 }, { - "epoch": 0.39, - "grad_norm": 0.29294272479257805, - "learning_rate": 1.3843590791342125e-05, - "loss": 0.2939, + "epoch": 0.31, + "grad_norm": 0.8139713344227841, + "learning_rate": 1.6050067348763675e-05, + "loss": 0.4264, "step": 6844 }, { - "epoch": 0.39, - "grad_norm": 1.0560695569212466, - "learning_rate": 1.3841872760665682e-05, - "loss": 0.7409, + "epoch": 0.31, + "grad_norm": 0.3545051459600527, + "learning_rate": 1.6048882570225215e-05, + "loss": 0.3277, "step": 6845 }, { - "epoch": 0.39, - "grad_norm": 0.40634206076714197, - "learning_rate": 1.3840154596945352e-05, - "loss": 0.2672, + "epoch": 0.31, + "grad_norm": 0.3662930018112263, + "learning_rate": 1.6047697657770327e-05, + "loss": 0.2165, "step": 6846 }, { - "epoch": 0.39, - "grad_norm": 0.5534173099885904, - "learning_rate": 1.383843630024063e-05, - "loss": 0.4431, + "epoch": 0.31, + "grad_norm": 0.33413171516541573, + "learning_rate": 1.6046512611425243e-05, + "loss": 0.1861, "step": 6847 }, { - "epoch": 0.39, - "grad_norm": 0.35250494584408637, - "learning_rate": 1.3836717870611025e-05, - "loss": 0.3131, + "epoch": 0.31, + "grad_norm": 0.598947737971673, + "learning_rate": 1.6045327431216197e-05, + "loss": 0.3753, "step": 6848 }, { - "epoch": 0.39, - "grad_norm": 0.3773994520684638, - "learning_rate": 1.3834999308116045e-05, - "loss": 0.2855, + "epoch": 0.31, + "grad_norm": 0.4506697504356536, + "learning_rate": 1.6044142117169427e-05, + "loss": 0.2664, "step": 6849 }, { - "epoch": 0.39, - "grad_norm": 0.22077046425320704, - "learning_rate": 1.3833280612815204e-05, - "loss": 0.1251, + "epoch": 0.31, + "grad_norm": 0.48025022548378704, + "learning_rate": 1.6042956669311176e-05, + "loss": 0.3435, "step": 6850 }, { - "epoch": 0.39, - "grad_norm": 0.4475654638391578, - "learning_rate": 1.383156178476802e-05, - "loss": 0.42, + "epoch": 0.31, + "grad_norm": 0.5896149408113857, + "learning_rate": 1.604177108766769e-05, + "loss": 0.3381, "step": 6851 }, { - "epoch": 0.39, - "grad_norm": 0.3264987173804153, - "learning_rate": 1.3829842824034016e-05, - "loss": 0.3119, + "epoch": 0.31, + "grad_norm": 0.4317496372531082, + "learning_rate": 1.6040585372265213e-05, + "loss": 0.3112, "step": 6852 }, { - "epoch": 0.39, - "grad_norm": 0.4714716770949217, - "learning_rate": 1.382812373067272e-05, - "loss": 0.3138, + "epoch": 0.31, + "grad_norm": 0.2691848151848456, + "learning_rate": 1.6039399523130003e-05, + "loss": 0.2217, "step": 6853 }, { - "epoch": 0.39, - "grad_norm": 0.8911497620521466, - "learning_rate": 1.3826404504743662e-05, - "loss": 0.57, + "epoch": 0.31, + "grad_norm": 0.47747361973103597, + "learning_rate": 1.6038213540288304e-05, + "loss": 0.2363, "step": 6854 }, { - "epoch": 0.39, - "grad_norm": 0.33170515753882024, - "learning_rate": 1.3824685146306385e-05, - "loss": 0.2292, + "epoch": 0.31, + "grad_norm": 0.3886684146665068, + "learning_rate": 1.6037027423766375e-05, + "loss": 0.2785, "step": 6855 }, { - "epoch": 0.39, - "grad_norm": 0.28216547828961547, - "learning_rate": 1.3822965655420422e-05, - "loss": 0.2139, + "epoch": 0.31, + "grad_norm": 0.9390365614975763, + "learning_rate": 1.603584117359048e-05, + "loss": 0.3946, "step": 6856 }, { - "epoch": 0.39, - "grad_norm": 0.735537001474033, - "learning_rate": 1.3821246032145324e-05, - "loss": 0.4583, + "epoch": 0.32, + "grad_norm": 0.40074826452245976, + "learning_rate": 1.6034654789786878e-05, + "loss": 0.3285, "step": 6857 }, { - "epoch": 0.39, - "grad_norm": 0.5961310337286932, - "learning_rate": 1.381952627654064e-05, - "loss": 0.4259, + "epoch": 0.32, + "grad_norm": 0.409586810672038, + "learning_rate": 1.603346827238184e-05, + "loss": 0.2989, "step": 6858 }, { - "epoch": 0.39, - "grad_norm": 0.3900981399016567, - "learning_rate": 1.3817806388665926e-05, - "loss": 0.2397, + "epoch": 0.32, + "grad_norm": 0.31078953147650473, + "learning_rate": 1.6032281621401626e-05, + "loss": 0.1139, "step": 6859 }, { - "epoch": 0.39, - "grad_norm": 0.37331750009126125, - "learning_rate": 1.381608636858074e-05, - "loss": 0.3157, + "epoch": 0.32, + "grad_norm": 0.45640974303079584, + "learning_rate": 1.6031094836872506e-05, + "loss": 0.2964, "step": 6860 }, { - "epoch": 0.39, - "grad_norm": 0.5407319177531821, - "learning_rate": 1.3814366216344647e-05, - "loss": 0.3703, + "epoch": 0.32, + "grad_norm": 0.3763014255049807, + "learning_rate": 1.6029907918820768e-05, + "loss": 0.2779, "step": 6861 }, { - "epoch": 0.39, - "grad_norm": 0.22703111036296625, - "learning_rate": 1.3812645932017217e-05, - "loss": 0.1943, + "epoch": 0.32, + "grad_norm": 0.6784209985122777, + "learning_rate": 1.6028720867272673e-05, + "loss": 0.3237, "step": 6862 }, { - "epoch": 0.39, - "grad_norm": 0.653693685219927, - "learning_rate": 1.3810925515658022e-05, - "loss": 0.3258, + "epoch": 0.32, + "grad_norm": 0.8847086099764302, + "learning_rate": 1.6027533682254514e-05, + "loss": 0.5215, "step": 6863 }, { - "epoch": 0.39, - "grad_norm": 0.4038868654554686, - "learning_rate": 1.3809204967326641e-05, - "loss": 0.3235, + "epoch": 0.32, + "grad_norm": 0.4232336388396326, + "learning_rate": 1.6026346363792565e-05, + "loss": 0.3084, "step": 6864 }, { - "epoch": 0.39, - "grad_norm": 0.40131278450352875, - "learning_rate": 1.3807484287082655e-05, - "loss": 0.3271, + "epoch": 0.32, + "grad_norm": 0.507968126895754, + "learning_rate": 1.602515891191312e-05, + "loss": 0.4027, "step": 6865 }, { - "epoch": 0.39, - "grad_norm": 0.8587126265813383, - "learning_rate": 1.3805763474985651e-05, - "loss": 0.2415, + "epoch": 0.32, + "grad_norm": 0.2919721032057138, + "learning_rate": 1.602397132664246e-05, + "loss": 0.1933, "step": 6866 }, { - "epoch": 0.39, - "grad_norm": 0.3657681750663915, - "learning_rate": 1.3804042531095223e-05, - "loss": 0.3043, + "epoch": 0.32, + "grad_norm": 0.40227782258568073, + "learning_rate": 1.6022783608006882e-05, + "loss": 0.2841, "step": 6867 }, { - "epoch": 0.39, - "grad_norm": 0.31855912993435875, - "learning_rate": 1.3802321455470967e-05, - "loss": 0.2917, + "epoch": 0.32, + "grad_norm": 0.9535769571636706, + "learning_rate": 1.6021595756032676e-05, + "loss": 0.5107, "step": 6868 }, { - "epoch": 0.39, - "grad_norm": 0.4853379729560478, - "learning_rate": 1.3800600248172478e-05, - "loss": 0.2178, + "epoch": 0.32, + "grad_norm": 0.4060843341233045, + "learning_rate": 1.602040777074615e-05, + "loss": 0.2514, "step": 6869 }, { - "epoch": 0.39, - "grad_norm": 0.39579207655536114, - "learning_rate": 1.3798878909259368e-05, - "loss": 0.3008, + "epoch": 0.32, + "grad_norm": 0.4409029956950468, + "learning_rate": 1.6019219652173593e-05, + "loss": 0.3047, "step": 6870 }, { - "epoch": 0.39, - "grad_norm": 0.5117750783727549, - "learning_rate": 1.3797157438791244e-05, - "loss": 0.4075, + "epoch": 0.32, + "grad_norm": 0.43729588069683784, + "learning_rate": 1.601803140034132e-05, + "loss": 0.298, "step": 6871 }, { - "epoch": 0.39, - "grad_norm": 0.38032603698465695, - "learning_rate": 1.3795435836827724e-05, - "loss": 0.3377, + "epoch": 0.32, + "grad_norm": 0.44032164230768894, + "learning_rate": 1.6016843015275626e-05, + "loss": 0.232, "step": 6872 }, { - "epoch": 0.39, - "grad_norm": 0.5204418610641315, - "learning_rate": 1.3793714103428421e-05, - "loss": 0.1617, + "epoch": 0.32, + "grad_norm": 0.326167030678317, + "learning_rate": 1.6015654497002834e-05, + "loss": 0.272, "step": 6873 }, { - "epoch": 0.39, - "grad_norm": 0.2955007971124637, - "learning_rate": 1.3791992238652965e-05, - "loss": 0.2467, + "epoch": 0.32, + "grad_norm": 0.5167953468890346, + "learning_rate": 1.6014465845549242e-05, + "loss": 0.3728, "step": 6874 }, { - "epoch": 0.4, - "grad_norm": 0.3308877907838149, - "learning_rate": 1.3790270242560977e-05, - "loss": 0.2977, + "epoch": 0.32, + "grad_norm": 0.8922550098225157, + "learning_rate": 1.6013277060941176e-05, + "loss": 0.4295, "step": 6875 }, { - "epoch": 0.4, - "grad_norm": 0.38992600679999917, - "learning_rate": 1.3788548115212095e-05, - "loss": 0.2571, + "epoch": 0.32, + "grad_norm": 0.38770091810482626, + "learning_rate": 1.6012088143204953e-05, + "loss": 0.2686, "step": 6876 }, { - "epoch": 0.4, - "grad_norm": 0.38401415930593574, - "learning_rate": 1.3786825856665958e-05, - "loss": 0.3058, + "epoch": 0.32, + "grad_norm": 0.5403345297611002, + "learning_rate": 1.6010899092366893e-05, + "loss": 0.3632, "step": 6877 }, { - "epoch": 0.4, - "grad_norm": 0.7709559475093668, - "learning_rate": 1.3785103466982199e-05, - "loss": 0.5874, + "epoch": 0.32, + "grad_norm": 0.4529593587532332, + "learning_rate": 1.600970990845332e-05, + "loss": 0.3954, "step": 6878 }, { - "epoch": 0.4, - "grad_norm": 0.3114444710448705, - "learning_rate": 1.3783380946220474e-05, - "loss": 0.2394, + "epoch": 0.32, + "grad_norm": 0.27534562575417915, + "learning_rate": 1.6008520591490557e-05, + "loss": 0.1938, "step": 6879 }, { - "epoch": 0.4, - "grad_norm": 0.3432166855056662, - "learning_rate": 1.3781658294440427e-05, - "loss": 0.3115, + "epoch": 0.32, + "grad_norm": 0.45557094117888947, + "learning_rate": 1.6007331141504945e-05, + "loss": 0.2648, "step": 6880 }, { - "epoch": 0.4, - "grad_norm": 0.6522356404592244, - "learning_rate": 1.3779935511701717e-05, - "loss": 0.4908, + "epoch": 0.32, + "grad_norm": 0.43010774682390496, + "learning_rate": 1.600614155852281e-05, + "loss": 0.3301, "step": 6881 }, { - "epoch": 0.4, - "grad_norm": 0.21070036258642488, - "learning_rate": 1.3778212598064002e-05, - "loss": 0.1286, + "epoch": 0.32, + "grad_norm": 0.3469888570446096, + "learning_rate": 1.600495184257049e-05, + "loss": 0.203, "step": 6882 }, { - "epoch": 0.4, - "grad_norm": 0.27838016788335157, - "learning_rate": 1.3776489553586949e-05, - "loss": 0.2634, + "epoch": 0.32, + "grad_norm": 0.8671778549060267, + "learning_rate": 1.6003761993674324e-05, + "loss": 0.4917, "step": 6883 }, { - "epoch": 0.4, - "grad_norm": 0.5184621555490639, - "learning_rate": 1.3774766378330221e-05, - "loss": 0.4013, + "epoch": 0.32, + "grad_norm": 0.3943633702846707, + "learning_rate": 1.600257201186065e-05, + "loss": 0.379, "step": 6884 }, { - "epoch": 0.4, - "grad_norm": 0.615158987776567, - "learning_rate": 1.3773043072353503e-05, - "loss": 0.4176, + "epoch": 0.32, + "grad_norm": 0.3383425821711801, + "learning_rate": 1.6001381897155824e-05, + "loss": 0.2013, "step": 6885 }, { - "epoch": 0.4, - "grad_norm": 0.32917103198871883, - "learning_rate": 1.3771319635716459e-05, - "loss": 0.2412, + "epoch": 0.32, + "grad_norm": 0.36101508544224437, + "learning_rate": 1.6000191649586184e-05, + "loss": 0.3015, "step": 6886 }, { - "epoch": 0.4, - "grad_norm": 0.48537442050696666, - "learning_rate": 1.376959606847878e-05, - "loss": 0.3794, + "epoch": 0.32, + "grad_norm": 0.41799623857106905, + "learning_rate": 1.5999001269178082e-05, + "loss": 0.2791, "step": 6887 }, { - "epoch": 0.4, - "grad_norm": 0.437233197013368, - "learning_rate": 1.376787237070015e-05, - "loss": 0.3628, + "epoch": 0.32, + "grad_norm": 0.36177114568788626, + "learning_rate": 1.5997810755957873e-05, + "loss": 0.2103, "step": 6888 }, { - "epoch": 0.4, - "grad_norm": 0.3363888104947281, - "learning_rate": 1.3766148542440265e-05, - "loss": 0.2289, + "epoch": 0.32, + "grad_norm": 0.4635813671941839, + "learning_rate": 1.599662010995192e-05, + "loss": 0.3202, "step": 6889 }, { - "epoch": 0.4, - "grad_norm": 0.2641410487640712, - "learning_rate": 1.3764424583758816e-05, - "loss": 0.1768, + "epoch": 0.32, + "grad_norm": 0.8769488708957407, + "learning_rate": 1.599542933118657e-05, + "loss": 0.4475, "step": 6890 }, { - "epoch": 0.4, - "grad_norm": 0.3430245182460297, - "learning_rate": 1.3762700494715506e-05, - "loss": 0.3178, + "epoch": 0.32, + "grad_norm": 0.39010327570036185, + "learning_rate": 1.59942384196882e-05, + "loss": 0.297, "step": 6891 }, { - "epoch": 0.4, - "grad_norm": 0.3694883870067206, - "learning_rate": 1.3760976275370039e-05, - "loss": 0.2562, + "epoch": 0.32, + "grad_norm": 0.3335678281191913, + "learning_rate": 1.5993047375483162e-05, + "loss": 0.1796, "step": 6892 }, { - "epoch": 0.4, - "grad_norm": 0.6870317668549498, - "learning_rate": 1.3759251925782127e-05, - "loss": 0.4852, + "epoch": 0.32, + "grad_norm": 0.3353713916587556, + "learning_rate": 1.599185619859784e-05, + "loss": 0.2677, "step": 6893 }, { - "epoch": 0.4, - "grad_norm": 1.2038599651172182, - "learning_rate": 1.3757527446011479e-05, - "loss": 0.8427, + "epoch": 0.32, + "grad_norm": 0.35712774063039415, + "learning_rate": 1.599066488905859e-05, + "loss": 0.286, "step": 6894 }, { - "epoch": 0.4, - "grad_norm": 0.2434935746268074, - "learning_rate": 1.375580283611782e-05, - "loss": 0.1944, + "epoch": 0.32, + "grad_norm": 0.8281197344738832, + "learning_rate": 1.5989473446891797e-05, + "loss": 0.3607, "step": 6895 }, { - "epoch": 0.4, - "grad_norm": 0.3256197247270627, - "learning_rate": 1.3754078096160871e-05, - "loss": 0.2661, + "epoch": 0.32, + "grad_norm": 0.8488193424114462, + "learning_rate": 1.598828187212383e-05, + "loss": 0.4748, "step": 6896 }, { - "epoch": 0.4, - "grad_norm": 0.7596028889004636, - "learning_rate": 1.3752353226200359e-05, - "loss": 0.4223, + "epoch": 0.32, + "grad_norm": 0.3078338726199657, + "learning_rate": 1.598709016478108e-05, + "loss": 0.28, "step": 6897 }, { - "epoch": 0.4, - "grad_norm": 0.3579206665965782, - "learning_rate": 1.3750628226296012e-05, - "loss": 0.2608, + "epoch": 0.32, + "grad_norm": 0.4576072838980302, + "learning_rate": 1.5985898324889916e-05, + "loss": 0.2575, "step": 6898 }, { - "epoch": 0.4, - "grad_norm": 0.42991083239765604, - "learning_rate": 1.3748903096507576e-05, - "loss": 0.2457, + "epoch": 0.32, + "grad_norm": 0.32795162253703297, + "learning_rate": 1.598470635247674e-05, + "loss": 0.1884, "step": 6899 }, { - "epoch": 0.4, - "grad_norm": 0.46276695668103407, - "learning_rate": 1.3747177836894783e-05, - "loss": 0.4311, + "epoch": 0.32, + "grad_norm": 0.4561446002325696, + "learning_rate": 1.5983514247567932e-05, + "loss": 0.2971, "step": 6900 }, { - "epoch": 0.4, - "grad_norm": 0.33938255692060043, - "learning_rate": 1.3745452447517384e-05, - "loss": 0.253, + "epoch": 0.32, + "grad_norm": 0.48078808325838135, + "learning_rate": 1.5982322010189882e-05, + "loss": 0.35, "step": 6901 }, { - "epoch": 0.4, - "grad_norm": 0.33146128208950326, - "learning_rate": 1.3743726928435129e-05, - "loss": 0.1271, + "epoch": 0.32, + "grad_norm": 1.4042664619064698, + "learning_rate": 1.598112964036899e-05, + "loss": 0.4052, "step": 6902 }, { - "epoch": 0.4, - "grad_norm": 0.4516192355561343, - "learning_rate": 1.3742001279707771e-05, - "loss": 0.3194, + "epoch": 0.32, + "grad_norm": 0.46527242320449574, + "learning_rate": 1.5979937138131653e-05, + "loss": 0.2957, "step": 6903 }, { - "epoch": 0.4, - "grad_norm": 0.3831757519966381, - "learning_rate": 1.3740275501395068e-05, - "loss": 0.2835, + "epoch": 0.32, + "grad_norm": 0.5875960600114221, + "learning_rate": 1.5978744503504272e-05, + "loss": 0.4124, "step": 6904 }, { - "epoch": 0.4, - "grad_norm": 0.7703354652013342, - "learning_rate": 1.3738549593556787e-05, - "loss": 0.255, + "epoch": 0.32, + "grad_norm": 0.27732684730113033, + "learning_rate": 1.5977551736513254e-05, + "loss": 0.1848, "step": 6905 }, { - "epoch": 0.4, - "grad_norm": 0.6825758957008587, - "learning_rate": 1.3736823556252694e-05, - "loss": 0.5165, + "epoch": 0.32, + "grad_norm": 0.4752967015586555, + "learning_rate": 1.5976358837184997e-05, + "loss": 0.2687, "step": 6906 }, { - "epoch": 0.4, - "grad_norm": 0.3866563201141378, - "learning_rate": 1.373509738954256e-05, - "loss": 0.3017, + "epoch": 0.32, + "grad_norm": 0.9484632343741927, + "learning_rate": 1.5975165805545914e-05, + "loss": 0.3972, "step": 6907 }, { - "epoch": 0.4, - "grad_norm": 0.2428939493291775, - "learning_rate": 1.3733371093486168e-05, - "loss": 0.2285, + "epoch": 0.32, + "grad_norm": 0.9767531084441508, + "learning_rate": 1.597397264162242e-05, + "loss": 0.3719, "step": 6908 }, { - "epoch": 0.4, - "grad_norm": 0.6721461211190297, - "learning_rate": 1.3731644668143291e-05, - "loss": 0.4576, + "epoch": 0.32, + "grad_norm": 0.3074732388545118, + "learning_rate": 1.597277934544093e-05, + "loss": 0.2789, "step": 6909 }, { - "epoch": 0.4, - "grad_norm": 0.3443745352273169, - "learning_rate": 1.3729918113573723e-05, - "loss": 0.2499, + "epoch": 0.32, + "grad_norm": 0.4866662058408034, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.3899, "step": 6910 }, { - "epoch": 0.4, - "grad_norm": 0.3699209177126759, - "learning_rate": 1.3728191429837247e-05, - "loss": 0.3297, + "epoch": 0.32, + "grad_norm": 0.33342035515963886, + "learning_rate": 1.5970392356409638e-05, + "loss": 0.1841, "step": 6911 }, { - "epoch": 0.4, - "grad_norm": 0.9218896511237294, - "learning_rate": 1.3726464616993667e-05, - "loss": 0.5562, + "epoch": 0.32, + "grad_norm": 0.435606684206042, + "learning_rate": 1.5969198663612683e-05, + "loss": 0.3018, "step": 6912 }, { - "epoch": 0.4, - "grad_norm": 0.36061835491190963, - "learning_rate": 1.3724737675102774e-05, - "loss": 0.251, + "epoch": 0.32, + "grad_norm": 0.47893228425258727, + "learning_rate": 1.596800483866342e-05, + "loss": 0.3487, "step": 6913 }, { - "epoch": 0.4, - "grad_norm": 0.6697499130698259, - "learning_rate": 1.3723010604224381e-05, - "loss": 0.4177, + "epoch": 0.32, + "grad_norm": 0.44275612396158515, + "learning_rate": 1.596681088158828e-05, + "loss": 0.2256, "step": 6914 }, { - "epoch": 0.4, - "grad_norm": 0.22123181145234125, - "learning_rate": 1.3721283404418283e-05, - "loss": 0.1735, + "epoch": 0.32, + "grad_norm": 0.4220416337744027, + "learning_rate": 1.5965616792413704e-05, + "loss": 0.2228, "step": 6915 }, { - "epoch": 0.4, - "grad_norm": 0.31925352597321077, - "learning_rate": 1.3719556075744306e-05, - "loss": 0.2455, + "epoch": 0.32, + "grad_norm": 1.3529763468449862, + "learning_rate": 1.596442257116612e-05, + "loss": 0.8071, "step": 6916 }, { - "epoch": 0.4, - "grad_norm": 0.9239303856324638, - "learning_rate": 1.3717828618262261e-05, - "loss": 0.5894, + "epoch": 0.32, + "grad_norm": 0.44418141764394653, + "learning_rate": 1.5963228217871965e-05, + "loss": 0.3291, "step": 6917 }, { - "epoch": 0.4, - "grad_norm": 0.5162709979789786, - "learning_rate": 1.3716101032031972e-05, - "loss": 0.3261, + "epoch": 0.32, + "grad_norm": 0.3396552657633624, + "learning_rate": 1.5962033732557685e-05, + "loss": 0.2538, "step": 6918 }, { - "epoch": 0.4, - "grad_norm": 0.29582580195856606, - "learning_rate": 1.371437331711326e-05, - "loss": 0.2552, + "epoch": 0.32, + "grad_norm": 0.3915801776030961, + "learning_rate": 1.596083911524973e-05, + "loss": 0.3043, "step": 6919 }, { - "epoch": 0.4, - "grad_norm": 0.8585276824390256, - "learning_rate": 1.3712645473565964e-05, - "loss": 0.4624, + "epoch": 0.32, + "grad_norm": 0.3648653081976593, + "learning_rate": 1.595964436597454e-05, + "loss": 0.2684, "step": 6920 }, { - "epoch": 0.4, - "grad_norm": 0.3036535597617637, - "learning_rate": 1.3710917501449911e-05, - "loss": 0.1622, + "epoch": 0.32, + "grad_norm": 0.33500108351701374, + "learning_rate": 1.5958449484758565e-05, + "loss": 0.2106, "step": 6921 }, { - "epoch": 0.4, - "grad_norm": 0.375439624708647, - "learning_rate": 1.3709189400824948e-05, - "loss": 0.2807, + "epoch": 0.32, + "grad_norm": 0.690530127555515, + "learning_rate": 1.595725447162826e-05, + "loss": 0.4264, "step": 6922 }, { - "epoch": 0.4, - "grad_norm": 0.38903684691791973, - "learning_rate": 1.3707461171750916e-05, - "loss": 0.2963, + "epoch": 0.32, + "grad_norm": 0.8618071242197098, + "learning_rate": 1.595605932661009e-05, + "loss": 0.4837, "step": 6923 }, { - "epoch": 0.4, - "grad_norm": 1.3030138881612745, - "learning_rate": 1.370573281428766e-05, - "loss": 0.8033, + "epoch": 0.32, + "grad_norm": 0.35652332215607885, + "learning_rate": 1.59548640497305e-05, + "loss": 0.2158, "step": 6924 }, { - "epoch": 0.4, - "grad_norm": 0.3366170348258912, - "learning_rate": 1.3704004328495037e-05, - "loss": 0.2032, + "epoch": 0.32, + "grad_norm": 0.3031567683003244, + "learning_rate": 1.5953668641015966e-05, + "loss": 0.2465, "step": 6925 }, { - "epoch": 0.4, - "grad_norm": 0.8395341835724895, - "learning_rate": 1.3702275714432905e-05, - "loss": 0.4634, + "epoch": 0.32, + "grad_norm": 0.5282492146970877, + "learning_rate": 1.595247310049294e-05, + "loss": 0.3094, "step": 6926 }, { - "epoch": 0.4, - "grad_norm": 0.22712153378717959, - "learning_rate": 1.3700546972161121e-05, - "loss": 0.2303, + "epoch": 0.32, + "grad_norm": 0.6403949276638996, + "learning_rate": 1.59512774281879e-05, + "loss": 0.3388, "step": 6927 }, { - "epoch": 0.4, - "grad_norm": 0.3062410578990784, - "learning_rate": 1.3698818101739554e-05, - "loss": 0.1977, + "epoch": 0.32, + "grad_norm": 0.39067733403251004, + "learning_rate": 1.5950081624127313e-05, + "loss": 0.2657, "step": 6928 }, { - "epoch": 0.4, - "grad_norm": 1.0418163317880769, - "learning_rate": 1.3697089103228081e-05, - "loss": 0.7044, + "epoch": 0.32, + "grad_norm": 0.5381938789802505, + "learning_rate": 1.5948885688337653e-05, + "loss": 0.4074, "step": 6929 }, { - "epoch": 0.4, - "grad_norm": 0.7665050260008015, - "learning_rate": 1.3695359976686568e-05, - "loss": 0.52, + "epoch": 0.32, + "grad_norm": 0.4861501759524133, + "learning_rate": 1.5947689620845395e-05, + "loss": 0.3484, "step": 6930 }, { - "epoch": 0.4, - "grad_norm": 0.2718208821347409, - "learning_rate": 1.3693630722174898e-05, - "loss": 0.225, + "epoch": 0.32, + "grad_norm": 0.30761780885397155, + "learning_rate": 1.5946493421677024e-05, + "loss": 0.1808, "step": 6931 }, { - "epoch": 0.4, - "grad_norm": 0.5994959857163412, - "learning_rate": 1.3691901339752955e-05, - "loss": 0.48, + "epoch": 0.32, + "grad_norm": 0.37904072435830366, + "learning_rate": 1.594529709085902e-05, + "loss": 0.2693, "step": 6932 }, { - "epoch": 0.4, - "grad_norm": 0.42763359444722393, - "learning_rate": 1.3690171829480628e-05, - "loss": 0.279, + "epoch": 0.32, + "grad_norm": 0.3356277272991689, + "learning_rate": 1.594410062841787e-05, + "loss": 0.266, "step": 6933 }, { - "epoch": 0.4, - "grad_norm": 0.2819399462682956, - "learning_rate": 1.3688442191417805e-05, - "loss": 0.169, + "epoch": 0.32, + "grad_norm": 0.5315972328413664, + "learning_rate": 1.594290403438006e-05, + "loss": 0.3077, "step": 6934 }, { - "epoch": 0.4, - "grad_norm": 0.3855701660647398, - "learning_rate": 1.3686712425624393e-05, - "loss": 0.3225, + "epoch": 0.32, + "grad_norm": 0.6409617624888739, + "learning_rate": 1.594170730877208e-05, + "loss": 0.4547, "step": 6935 }, { - "epoch": 0.4, - "grad_norm": 0.9462468602813544, - "learning_rate": 1.3684982532160285e-05, - "loss": 0.5966, + "epoch": 0.32, + "grad_norm": 0.4423873108982728, + "learning_rate": 1.594051045162043e-05, + "loss": 0.2881, "step": 6936 }, { - "epoch": 0.4, - "grad_norm": 0.34783075864199253, - "learning_rate": 1.3683252511085391e-05, - "loss": 0.2796, + "epoch": 0.32, + "grad_norm": 0.35771566609468075, + "learning_rate": 1.59393134629516e-05, + "loss": 0.26, "step": 6937 }, { - "epoch": 0.4, - "grad_norm": 0.702993674672384, - "learning_rate": 1.3681522362459623e-05, - "loss": 0.3554, + "epoch": 0.32, + "grad_norm": 0.3161700659163172, + "learning_rate": 1.5938116342792102e-05, + "loss": 0.1864, "step": 6938 }, { - "epoch": 0.4, - "grad_norm": 0.2971323853404743, - "learning_rate": 1.3679792086342892e-05, - "loss": 0.2779, + "epoch": 0.32, + "grad_norm": 0.909307031811636, + "learning_rate": 1.5936919091168425e-05, + "loss": 0.5266, "step": 6939 }, { - "epoch": 0.4, - "grad_norm": 0.2884143111686183, - "learning_rate": 1.367806168279512e-05, - "loss": 0.2304, + "epoch": 0.32, + "grad_norm": 0.37653560140862996, + "learning_rate": 1.5935721708107084e-05, + "loss": 0.3006, "step": 6940 }, { - "epoch": 0.4, - "grad_norm": 0.5212153825592891, - "learning_rate": 1.3676331151876227e-05, - "loss": 0.2851, + "epoch": 0.32, + "grad_norm": 0.3637480262662844, + "learning_rate": 1.5934524193634585e-05, + "loss": 0.3126, "step": 6941 }, { - "epoch": 0.4, - "grad_norm": 0.4431896490235908, - "learning_rate": 1.3674600493646146e-05, - "loss": 0.3398, + "epoch": 0.32, + "grad_norm": 0.6448110803857646, + "learning_rate": 1.593332654777744e-05, + "loss": 0.4071, "step": 6942 }, { - "epoch": 0.4, - "grad_norm": 0.4475954113556971, - "learning_rate": 1.367286970816481e-05, - "loss": 0.2763, + "epoch": 0.32, + "grad_norm": 0.4629095737874534, + "learning_rate": 1.593212877056216e-05, + "loss": 0.3411, "step": 6943 }, { - "epoch": 0.4, - "grad_norm": 0.669305965541524, - "learning_rate": 1.3671138795492155e-05, - "loss": 0.3324, + "epoch": 0.32, + "grad_norm": 0.274595404618627, + "learning_rate": 1.5930930862015272e-05, + "loss": 0.1848, "step": 6944 }, { - "epoch": 0.4, - "grad_norm": 0.3080889359061354, - "learning_rate": 1.3669407755688117e-05, - "loss": 0.1614, + "epoch": 0.32, + "grad_norm": 0.6586069668335975, + "learning_rate": 1.592973282216329e-05, + "loss": 0.3532, "step": 6945 }, { - "epoch": 0.4, - "grad_norm": 0.40932396074492605, - "learning_rate": 1.366767658881265e-05, - "loss": 0.2977, + "epoch": 0.32, + "grad_norm": 0.41901383214740723, + "learning_rate": 1.5928534651032737e-05, + "loss": 0.3426, "step": 6946 }, { - "epoch": 0.4, - "grad_norm": 0.32701274366526195, - "learning_rate": 1.36659452949257e-05, - "loss": 0.2876, + "epoch": 0.32, + "grad_norm": 0.8287847070146014, + "learning_rate": 1.592733634865014e-05, + "loss": 0.4165, "step": 6947 }, { - "epoch": 0.4, - "grad_norm": 0.7057822249995881, - "learning_rate": 1.3664213874087223e-05, - "loss": 0.3661, + "epoch": 0.32, + "grad_norm": 0.4093970543107728, + "learning_rate": 1.5926137915042033e-05, + "loss": 0.3005, "step": 6948 }, { - "epoch": 0.4, - "grad_norm": 0.3797999918281963, - "learning_rate": 1.3662482326357172e-05, - "loss": 0.28, + "epoch": 0.32, + "grad_norm": 0.3470183858810759, + "learning_rate": 1.592493935023494e-05, + "loss": 0.2914, "step": 6949 }, { - "epoch": 0.4, - "grad_norm": 0.5054705360107299, - "learning_rate": 1.366075065179552e-05, - "loss": 0.373, + "epoch": 0.32, + "grad_norm": 0.2251401644423978, + "learning_rate": 1.59237406542554e-05, + "loss": 0.1113, "step": 6950 }, { - "epoch": 0.4, - "grad_norm": 0.4181068108631131, - "learning_rate": 1.3659018850462226e-05, - "loss": 0.2382, + "epoch": 0.32, + "grad_norm": 0.4184922996960351, + "learning_rate": 1.5922541827129958e-05, + "loss": 0.2996, "step": 6951 }, { - "epoch": 0.4, - "grad_norm": 0.2658521324344886, - "learning_rate": 1.3657286922417272e-05, - "loss": 0.1952, + "epoch": 0.32, + "grad_norm": 0.5393464053393179, + "learning_rate": 1.592134286888514e-05, + "loss": 0.3966, "step": 6952 }, { - "epoch": 0.4, - "grad_norm": 0.33355700539346833, - "learning_rate": 1.3655554867720623e-05, - "loss": 0.2598, + "epoch": 0.32, + "grad_norm": 0.42479808828378685, + "learning_rate": 1.5920143779547503e-05, + "loss": 0.345, "step": 6953 }, { - "epoch": 0.4, - "grad_norm": 0.7434397980880477, - "learning_rate": 1.3653822686432271e-05, - "loss": 0.3192, + "epoch": 0.32, + "grad_norm": 0.3879686329522963, + "learning_rate": 1.5918944559143587e-05, + "loss": 0.1949, "step": 6954 }, { - "epoch": 0.4, - "grad_norm": 0.30312485770426095, - "learning_rate": 1.3652090378612198e-05, - "loss": 0.2706, + "epoch": 0.32, + "grad_norm": 0.5552847108088039, + "learning_rate": 1.5917745207699944e-05, + "loss": 0.3647, "step": 6955 }, { - "epoch": 0.4, - "grad_norm": 1.0086654469423801, - "learning_rate": 1.3650357944320387e-05, - "loss": 0.6798, + "epoch": 0.32, + "grad_norm": 0.26603469896501525, + "learning_rate": 1.5916545725243124e-05, + "loss": 0.2171, "step": 6956 }, { - "epoch": 0.4, - "grad_norm": 0.2880087205351191, - "learning_rate": 1.3648625383616841e-05, - "loss": 0.1437, + "epoch": 0.32, + "grad_norm": 0.7996686853383073, + "learning_rate": 1.5915346111799685e-05, + "loss": 0.2921, "step": 6957 }, { - "epoch": 0.4, - "grad_norm": 0.29183807559357594, - "learning_rate": 1.3646892696561554e-05, - "loss": 0.2153, + "epoch": 0.32, + "grad_norm": 0.37037930645833633, + "learning_rate": 1.5914146367396184e-05, + "loss": 0.302, "step": 6958 }, { - "epoch": 0.4, - "grad_norm": 0.36485493480482983, - "learning_rate": 1.3645159883214528e-05, - "loss": 0.3043, + "epoch": 0.32, + "grad_norm": 0.812935821607108, + "learning_rate": 1.5912946492059183e-05, + "loss": 0.6086, "step": 6959 }, { - "epoch": 0.4, - "grad_norm": 0.6233292098008584, - "learning_rate": 1.3643426943635774e-05, - "loss": 0.4441, + "epoch": 0.32, + "grad_norm": 0.37040842776695465, + "learning_rate": 1.5911746485815243e-05, + "loss": 0.2238, "step": 6960 }, { - "epoch": 0.4, - "grad_norm": 0.35202902887011656, - "learning_rate": 1.36416938778853e-05, - "loss": 0.1655, + "epoch": 0.32, + "grad_norm": 0.3946849282870186, + "learning_rate": 1.5910546348690937e-05, + "loss": 0.2737, "step": 6961 }, { - "epoch": 0.4, - "grad_norm": 0.4126226172553717, - "learning_rate": 1.3639960686023126e-05, - "loss": 0.3344, + "epoch": 0.32, + "grad_norm": 0.29992381253370365, + "learning_rate": 1.590934608071283e-05, + "loss": 0.2081, "step": 6962 }, { - "epoch": 0.4, - "grad_norm": 0.37428085092700025, - "learning_rate": 1.3638227368109268e-05, - "loss": 0.3312, + "epoch": 0.32, + "grad_norm": 0.6996118821176629, + "learning_rate": 1.59081456819075e-05, + "loss": 0.2856, "step": 6963 }, { - "epoch": 0.4, - "grad_norm": 0.30610032926280945, - "learning_rate": 1.3636493924203756e-05, - "loss": 0.1934, + "epoch": 0.32, + "grad_norm": 0.3245246847693063, + "learning_rate": 1.5906945152301513e-05, + "loss": 0.3091, "step": 6964 }, { - "epoch": 0.4, - "grad_norm": 0.31909149409640514, - "learning_rate": 1.3634760354366612e-05, - "loss": 0.2953, + "epoch": 0.32, + "grad_norm": 0.5125440038117659, + "learning_rate": 1.5905744491921458e-05, + "loss": 0.3695, "step": 6965 }, { - "epoch": 0.4, - "grad_norm": 0.816373087029119, - "learning_rate": 1.3633026658657872e-05, - "loss": 0.6193, + "epoch": 0.32, + "grad_norm": 0.81201944495728, + "learning_rate": 1.590454370079391e-05, + "loss": 0.4692, "step": 6966 }, { - "epoch": 0.4, - "grad_norm": 0.2950274786523385, - "learning_rate": 1.3631292837137577e-05, - "loss": 0.2384, + "epoch": 0.32, + "grad_norm": 0.3531949158404305, + "learning_rate": 1.5903342778945456e-05, + "loss": 0.2292, "step": 6967 }, { - "epoch": 0.4, - "grad_norm": 0.34463380268308075, - "learning_rate": 1.3629558889865768e-05, - "loss": 0.2693, + "epoch": 0.32, + "grad_norm": 0.30026128821558595, + "learning_rate": 1.5902141726402683e-05, + "loss": 0.2429, "step": 6968 }, { - "epoch": 0.4, - "grad_norm": 1.445343970358979, - "learning_rate": 1.3627824816902494e-05, - "loss": 0.7602, + "epoch": 0.32, + "grad_norm": 0.7398692022710958, + "learning_rate": 1.5900940543192178e-05, + "loss": 0.3977, "step": 6969 }, { - "epoch": 0.4, - "grad_norm": 0.2910854511165072, - "learning_rate": 1.3626090618307796e-05, - "loss": 0.2366, + "epoch": 0.32, + "grad_norm": 0.2961299475918896, + "learning_rate": 1.589973922934054e-05, + "loss": 0.1787, "step": 6970 }, { - "epoch": 0.4, - "grad_norm": 0.3727574615944739, - "learning_rate": 1.3624356294141738e-05, - "loss": 0.347, + "epoch": 0.32, + "grad_norm": 1.2727301200189884, + "learning_rate": 1.589853778487436e-05, + "loss": 0.8791, "step": 6971 }, { - "epoch": 0.4, - "grad_norm": 0.4251921909257409, - "learning_rate": 1.3622621844464379e-05, - "loss": 0.2888, + "epoch": 0.32, + "grad_norm": 0.3480884788234686, + "learning_rate": 1.589733620982024e-05, + "loss": 0.268, "step": 6972 }, { - "epoch": 0.4, - "grad_norm": 0.3357843692589245, - "learning_rate": 1.362088726933578e-05, - "loss": 0.2384, + "epoch": 0.32, + "grad_norm": 0.4112824919959956, + "learning_rate": 1.589613450420478e-05, + "loss": 0.2496, "step": 6973 }, { - "epoch": 0.4, - "grad_norm": 0.36993298536045216, - "learning_rate": 1.361915256881601e-05, - "loss": 0.2396, + "epoch": 0.32, + "grad_norm": 0.7164931737780263, + "learning_rate": 1.5894932668054584e-05, + "loss": 0.4524, "step": 6974 }, { - "epoch": 0.4, - "grad_norm": 0.3614041412393671, - "learning_rate": 1.3617417742965144e-05, - "loss": 0.3096, + "epoch": 0.32, + "grad_norm": 0.42534252649023646, + "learning_rate": 1.589373070139626e-05, + "loss": 0.2674, "step": 6975 }, { - "epoch": 0.4, - "grad_norm": 0.37952388985458196, - "learning_rate": 1.3615682791843257e-05, - "loss": 0.3231, + "epoch": 0.32, + "grad_norm": 0.3203618831423938, + "learning_rate": 1.589252860425642e-05, + "loss": 0.2409, "step": 6976 }, { - "epoch": 0.4, - "grad_norm": 0.3901412474468786, - "learning_rate": 1.3613947715510429e-05, - "loss": 0.3072, + "epoch": 0.32, + "grad_norm": 0.37633900464818065, + "learning_rate": 1.5891326376661673e-05, + "loss": 0.295, "step": 6977 }, { - "epoch": 0.4, - "grad_norm": 0.36218398221854425, - "learning_rate": 1.3612212514026746e-05, - "loss": 0.3417, + "epoch": 0.32, + "grad_norm": 1.6632348828260697, + "learning_rate": 1.589012401863864e-05, + "loss": 0.8379, "step": 6978 }, { - "epoch": 0.4, - "grad_norm": 0.8055590911647899, - "learning_rate": 1.3610477187452303e-05, - "loss": 0.4594, + "epoch": 0.32, + "grad_norm": 0.4397446089791949, + "learning_rate": 1.5888921530213938e-05, + "loss": 0.2884, "step": 6979 }, { - "epoch": 0.4, - "grad_norm": 0.2289632841507798, - "learning_rate": 1.3608741735847186e-05, - "loss": 0.1503, + "epoch": 0.32, + "grad_norm": 0.405569344813435, + "learning_rate": 1.5887718911414193e-05, + "loss": 0.2987, "step": 6980 }, { - "epoch": 0.4, - "grad_norm": 0.6391920773085977, - "learning_rate": 1.3607006159271503e-05, - "loss": 0.4503, + "epoch": 0.32, + "grad_norm": 0.7348671276608425, + "learning_rate": 1.5886516162266022e-05, + "loss": 0.5122, "step": 6981 }, { - "epoch": 0.4, - "grad_norm": 0.3301091566046304, - "learning_rate": 1.3605270457785346e-05, - "loss": 0.3016, + "epoch": 0.32, + "grad_norm": 0.41252584472183856, + "learning_rate": 1.5885313282796055e-05, + "loss": 0.2909, "step": 6982 }, { - "epoch": 0.4, - "grad_norm": 0.3309863876921998, - "learning_rate": 1.3603534631448831e-05, - "loss": 0.2646, + "epoch": 0.32, + "grad_norm": 0.3462711642240843, + "learning_rate": 1.5884110273030926e-05, + "loss": 0.1531, "step": 6983 }, { - "epoch": 0.4, - "grad_norm": 1.2898443402936977, - "learning_rate": 1.3601798680322068e-05, - "loss": 0.7883, + "epoch": 0.32, + "grad_norm": 0.32032695032253367, + "learning_rate": 1.5882907132997267e-05, + "loss": 0.243, "step": 6984 }, { - "epoch": 0.4, - "grad_norm": 0.34812557437044317, - "learning_rate": 1.3600062604465168e-05, - "loss": 0.2385, + "epoch": 0.32, + "grad_norm": 0.40311796625937124, + "learning_rate": 1.5881703862721714e-05, + "loss": 0.2963, "step": 6985 }, { - "epoch": 0.4, - "grad_norm": 0.277167302217143, - "learning_rate": 1.3598326403938255e-05, - "loss": 0.2632, + "epoch": 0.32, + "grad_norm": 0.670350134136847, + "learning_rate": 1.588050046223091e-05, + "loss": 0.3703, "step": 6986 }, { - "epoch": 0.4, - "grad_norm": 0.5174657148756132, - "learning_rate": 1.3596590078801458e-05, - "loss": 0.3016, + "epoch": 0.32, + "grad_norm": 0.8122544777904941, + "learning_rate": 1.587929693155149e-05, + "loss": 0.5043, "step": 6987 }, { - "epoch": 0.4, - "grad_norm": 0.4792618762975345, - "learning_rate": 1.3594853629114896e-05, - "loss": 0.3499, + "epoch": 0.32, + "grad_norm": 0.46970269922932073, + "learning_rate": 1.58780932707101e-05, + "loss": 0.2701, "step": 6988 }, { - "epoch": 0.4, - "grad_norm": 0.41025013782946984, - "learning_rate": 1.359311705493871e-05, - "loss": 0.312, + "epoch": 0.32, + "grad_norm": 0.33628963076202206, + "learning_rate": 1.5876889479733393e-05, + "loss": 0.2215, "step": 6989 }, { - "epoch": 0.4, - "grad_norm": 0.3738632934541031, - "learning_rate": 1.3591380356333038e-05, - "loss": 0.2848, + "epoch": 0.32, + "grad_norm": 0.36926985031194653, + "learning_rate": 1.587568555864802e-05, + "loss": 0.2319, "step": 6990 }, { - "epoch": 0.4, - "grad_norm": 0.5999221057686619, - "learning_rate": 1.3589643533358013e-05, - "loss": 0.3353, + "epoch": 0.32, + "grad_norm": 0.41933988387072885, + "learning_rate": 1.5874481507480627e-05, + "loss": 0.2744, "step": 6991 }, { - "epoch": 0.4, - "grad_norm": 0.32699926243927424, - "learning_rate": 1.358790658607379e-05, - "loss": 0.2355, + "epoch": 0.32, + "grad_norm": 0.3858673053069211, + "learning_rate": 1.587327732625788e-05, + "loss": 0.3337, "step": 6992 }, { - "epoch": 0.4, - "grad_norm": 0.43293676610358867, - "learning_rate": 1.3586169514540512e-05, - "loss": 0.2488, + "epoch": 0.32, + "grad_norm": 0.6208726978720848, + "learning_rate": 1.5872073015006428e-05, + "loss": 0.2952, "step": 6993 }, { - "epoch": 0.4, - "grad_norm": 0.32886181991925645, - "learning_rate": 1.3584432318818344e-05, - "loss": 0.3064, + "epoch": 0.32, + "grad_norm": 0.45439751346288976, + "learning_rate": 1.5870868573752942e-05, + "loss": 0.3141, "step": 6994 }, { - "epoch": 0.4, - "grad_norm": 0.35377350978253397, - "learning_rate": 1.3582694998967434e-05, - "loss": 0.316, + "epoch": 0.32, + "grad_norm": 0.5046595391458979, + "learning_rate": 1.5869664002524087e-05, + "loss": 0.3503, "step": 6995 }, { - "epoch": 0.4, - "grad_norm": 0.685901668579314, - "learning_rate": 1.3580957555047953e-05, - "loss": 0.4758, + "epoch": 0.32, + "grad_norm": 0.29785386679867737, + "learning_rate": 1.5868459301346523e-05, + "loss": 0.2154, "step": 6996 }, { - "epoch": 0.4, - "grad_norm": 0.6877362501402533, - "learning_rate": 1.3579219987120065e-05, - "loss": 0.4549, + "epoch": 0.32, + "grad_norm": 0.3985196186107559, + "learning_rate": 1.586725447024693e-05, + "loss": 0.2923, "step": 6997 }, { - "epoch": 0.4, - "grad_norm": 0.27094743803612736, - "learning_rate": 1.3577482295243944e-05, - "loss": 0.2349, + "epoch": 0.32, + "grad_norm": 0.6091645423934269, + "learning_rate": 1.5866049509251977e-05, + "loss": 0.4636, "step": 6998 }, { - "epoch": 0.4, - "grad_norm": 0.3247502461542125, - "learning_rate": 1.3575744479479764e-05, - "loss": 0.2856, + "epoch": 0.32, + "grad_norm": 0.4994393555808652, + "learning_rate": 1.586484441838834e-05, + "loss": 0.2702, "step": 6999 }, { - "epoch": 0.4, - "grad_norm": 0.6147448714871042, - "learning_rate": 1.3574006539887707e-05, - "loss": 0.2349, + "epoch": 0.32, + "grad_norm": 0.2987715008595132, + "learning_rate": 1.5863639197682702e-05, + "loss": 0.2693, "step": 7000 }, { - "epoch": 0.4, - "grad_norm": 0.38562569593393564, - "learning_rate": 1.3572268476527954e-05, - "loss": 0.2908, + "epoch": 0.32, + "grad_norm": 1.5194518614292296, + "learning_rate": 1.5862433847161743e-05, + "loss": 0.8884, "step": 7001 }, { - "epoch": 0.4, - "grad_norm": 0.3642951593240954, - "learning_rate": 1.3570530289460701e-05, - "loss": 0.3264, + "epoch": 0.32, + "grad_norm": 0.3236844969139569, + "learning_rate": 1.5861228366852148e-05, + "loss": 0.1952, "step": 7002 }, { - "epoch": 0.4, - "grad_norm": 0.5235074663626574, - "learning_rate": 1.3568791978746137e-05, - "loss": 0.2494, + "epoch": 0.32, + "grad_norm": 0.38884563513378256, + "learning_rate": 1.5860022756780605e-05, + "loss": 0.287, "step": 7003 }, { - "epoch": 0.4, - "grad_norm": 0.2952802191250271, - "learning_rate": 1.356705354444446e-05, - "loss": 0.2524, + "epoch": 0.32, + "grad_norm": 0.4468560354570671, + "learning_rate": 1.585881701697381e-05, + "loss": 0.3274, "step": 7004 }, { - "epoch": 0.4, - "grad_norm": 0.946358029879268, - "learning_rate": 1.3565314986615871e-05, - "loss": 0.4903, + "epoch": 0.32, + "grad_norm": 1.0251347470159884, + "learning_rate": 1.585761114745845e-05, + "loss": 0.6627, "step": 7005 }, { - "epoch": 0.4, - "grad_norm": 0.23563139165705427, - "learning_rate": 1.3563576305320579e-05, - "loss": 0.1791, + "epoch": 0.32, + "grad_norm": 0.3168888464016101, + "learning_rate": 1.585640514826123e-05, + "loss": 0.2009, "step": 7006 }, { - "epoch": 0.4, - "grad_norm": 0.40333290157473234, - "learning_rate": 1.356183750061879e-05, - "loss": 0.3251, + "epoch": 0.32, + "grad_norm": 1.511274961618501, + "learning_rate": 1.585519901940884e-05, + "loss": 0.8503, "step": 7007 }, { - "epoch": 0.4, - "grad_norm": 1.2051917632433689, - "learning_rate": 1.3560098572570725e-05, - "loss": 0.821, + "epoch": 0.32, + "grad_norm": 0.30665772093699484, + "learning_rate": 1.5853992760927985e-05, + "loss": 0.279, "step": 7008 }, { - "epoch": 0.4, - "grad_norm": 0.37974187511583635, - "learning_rate": 1.35583595212366e-05, - "loss": 0.1655, + "epoch": 0.32, + "grad_norm": 0.35810481255151594, + "learning_rate": 1.5852786372845374e-05, + "loss": 0.2125, "step": 7009 }, { - "epoch": 0.4, - "grad_norm": 0.3522486679741345, - "learning_rate": 1.3556620346676633e-05, - "loss": 0.2931, + "epoch": 0.32, + "grad_norm": 0.5058657560529324, + "learning_rate": 1.5851579855187718e-05, + "loss": 0.3144, "step": 7010 }, { - "epoch": 0.4, - "grad_norm": 0.6771158949431482, - "learning_rate": 1.355488104895106e-05, - "loss": 0.3763, + "epoch": 0.32, + "grad_norm": 0.4901265802305454, + "learning_rate": 1.585037320798172e-05, + "loss": 0.3428, "step": 7011 }, { - "epoch": 0.4, - "grad_norm": 0.2299304342135175, - "learning_rate": 1.3553141628120107e-05, - "loss": 0.1646, + "epoch": 0.32, + "grad_norm": 0.39933863050402013, + "learning_rate": 1.5849166431254097e-05, + "loss": 0.199, "step": 7012 }, { - "epoch": 0.4, - "grad_norm": 0.33815698261225796, - "learning_rate": 1.3551402084244014e-05, - "loss": 0.2277, + "epoch": 0.32, + "grad_norm": 0.9381234933348189, + "learning_rate": 1.584795952503157e-05, + "loss": 0.6108, "step": 7013 }, { - "epoch": 0.4, - "grad_norm": 0.3550244974142543, - "learning_rate": 1.3549662417383018e-05, - "loss": 0.3468, + "epoch": 0.32, + "grad_norm": 0.587697812519193, + "learning_rate": 1.584675248934085e-05, + "loss": 0.4646, "step": 7014 }, { - "epoch": 0.4, - "grad_norm": 1.499478939324147, - "learning_rate": 1.354792262759737e-05, - "loss": 0.7654, + "epoch": 0.32, + "grad_norm": 0.41601005932308976, + "learning_rate": 1.584554532420867e-05, + "loss": 0.309, "step": 7015 }, { - "epoch": 0.4, - "grad_norm": 0.33123396540469313, - "learning_rate": 1.3546182714947309e-05, - "loss": 0.2452, + "epoch": 0.32, + "grad_norm": 0.2783392801683017, + "learning_rate": 1.5844338029661746e-05, + "loss": 0.1693, "step": 7016 }, { - "epoch": 0.4, - "grad_norm": 0.30626982106831946, - "learning_rate": 1.3544442679493095e-05, - "loss": 0.2432, + "epoch": 0.32, + "grad_norm": 1.5745887702044628, + "learning_rate": 1.5843130605726816e-05, + "loss": 0.8246, "step": 7017 }, { - "epoch": 0.4, - "grad_norm": 0.36381971525818, - "learning_rate": 1.3542702521294981e-05, - "loss": 0.3203, + "epoch": 0.32, + "grad_norm": 0.40984878499469385, + "learning_rate": 1.5841923052430605e-05, + "loss": 0.2883, "step": 7018 }, { - "epoch": 0.4, - "grad_norm": 0.2968935067872526, - "learning_rate": 1.3540962240413233e-05, - "loss": 0.2144, + "epoch": 0.32, + "grad_norm": 0.7584016875381563, + "learning_rate": 1.584071536979985e-05, + "loss": 0.4103, "step": 7019 }, { - "epoch": 0.4, - "grad_norm": 0.9758630593144416, - "learning_rate": 1.3539221836908113e-05, - "loss": 0.6957, + "epoch": 0.32, + "grad_norm": 0.3989450099102825, + "learning_rate": 1.583950755786128e-05, + "loss": 0.3519, "step": 7020 }, { - "epoch": 0.4, - "grad_norm": 0.47204156166840916, - "learning_rate": 1.3537481310839897e-05, - "loss": 0.351, + "epoch": 0.32, + "grad_norm": 0.43722143984576023, + "learning_rate": 1.5838299616641647e-05, + "loss": 0.2907, "step": 7021 }, { - "epoch": 0.4, - "grad_norm": 0.31181724975707065, - "learning_rate": 1.353574066226885e-05, - "loss": 0.2339, + "epoch": 0.32, + "grad_norm": 0.2233912390190278, + "learning_rate": 1.583709154616768e-05, + "loss": 0.0734, "step": 7022 }, { - "epoch": 0.4, - "grad_norm": 0.8749376420778082, - "learning_rate": 1.353399989125526e-05, - "loss": 0.4849, + "epoch": 0.32, + "grad_norm": 0.40367170609808656, + "learning_rate": 1.5835883346466137e-05, + "loss": 0.3338, "step": 7023 }, { - "epoch": 0.4, - "grad_norm": 0.2859353718730429, - "learning_rate": 1.3532258997859404e-05, - "loss": 0.197, + "epoch": 0.32, + "grad_norm": 0.5874177778425868, + "learning_rate": 1.5834675017563764e-05, + "loss": 0.2972, "step": 7024 }, { - "epoch": 0.4, - "grad_norm": 0.3325129256222981, - "learning_rate": 1.3530517982141574e-05, - "loss": 0.2746, + "epoch": 0.32, + "grad_norm": 0.4923285045067426, + "learning_rate": 1.5833466559487305e-05, + "loss": 0.297, "step": 7025 }, { - "epoch": 0.4, - "grad_norm": 0.3507133854637492, - "learning_rate": 1.3528776844162052e-05, - "loss": 0.277, + "epoch": 0.32, + "grad_norm": 0.7148566082214294, + "learning_rate": 1.5832257972263523e-05, + "loss": 0.4415, "step": 7026 }, { - "epoch": 0.4, - "grad_norm": 0.5776205911314057, - "learning_rate": 1.3527035583981145e-05, - "loss": 0.4077, + "epoch": 0.32, + "grad_norm": 0.46271961459001154, + "learning_rate": 1.5831049255919166e-05, + "loss": 0.2911, "step": 7027 }, { - "epoch": 0.4, - "grad_norm": 0.3720017793660261, - "learning_rate": 1.3525294201659145e-05, - "loss": 0.2941, + "epoch": 0.32, + "grad_norm": 0.2899847274702938, + "learning_rate": 1.5829840410481e-05, + "loss": 0.2454, "step": 7028 }, { - "epoch": 0.4, - "grad_norm": 0.6266844281338557, - "learning_rate": 1.3523552697256359e-05, - "loss": 0.3631, + "epoch": 0.32, + "grad_norm": 0.3245711229305609, + "learning_rate": 1.5828631435975784e-05, + "loss": 0.1707, "step": 7029 }, { - "epoch": 0.4, - "grad_norm": 0.3606261379793363, - "learning_rate": 1.3521811070833095e-05, - "loss": 0.2194, + "epoch": 0.32, + "grad_norm": 0.48453886811842634, + "learning_rate": 1.582742233243029e-05, + "loss": 0.3028, "step": 7030 }, { - "epoch": 0.4, - "grad_norm": 0.37657437220486956, - "learning_rate": 1.3520069322449663e-05, - "loss": 0.2835, + "epoch": 0.32, + "grad_norm": 0.6697341837538948, + "learning_rate": 1.5826213099871283e-05, + "loss": 0.417, "step": 7031 }, { - "epoch": 0.4, - "grad_norm": 0.8620238092878418, - "learning_rate": 1.3518327452166385e-05, - "loss": 0.4009, + "epoch": 0.32, + "grad_norm": 0.43280378742972203, + "learning_rate": 1.582500373832553e-05, + "loss": 0.263, "step": 7032 }, { - "epoch": 0.4, - "grad_norm": 0.654820668093201, - "learning_rate": 1.3516585460043576e-05, - "loss": 0.4591, + "epoch": 0.32, + "grad_norm": 0.4382249961580811, + "learning_rate": 1.5823794247819807e-05, + "loss": 0.278, "step": 7033 }, { - "epoch": 0.4, - "grad_norm": 0.3013622890908199, - "learning_rate": 1.3514843346141566e-05, - "loss": 0.2671, + "epoch": 0.32, + "grad_norm": 0.3122594246376777, + "learning_rate": 1.58225846283809e-05, + "loss": 0.2171, "step": 7034 }, { - "epoch": 0.4, - "grad_norm": 0.4458870442813115, - "learning_rate": 1.3513101110520678e-05, - "loss": 0.3693, + "epoch": 0.32, + "grad_norm": 0.4925201368329097, + "learning_rate": 1.5821374880035573e-05, + "loss": 0.3129, "step": 7035 }, { - "epoch": 0.4, - "grad_norm": 0.19906848799136267, - "learning_rate": 1.3511358753241254e-05, - "loss": 0.1152, + "epoch": 0.32, + "grad_norm": 0.32176347621818263, + "learning_rate": 1.582016500281062e-05, + "loss": 0.2646, "step": 7036 }, { - "epoch": 0.4, - "grad_norm": 0.32092656053178614, - "learning_rate": 1.3509616274363623e-05, - "loss": 0.2576, + "epoch": 0.32, + "grad_norm": 0.7783341204397229, + "learning_rate": 1.5818954996732824e-05, + "loss": 0.5031, "step": 7037 }, { - "epoch": 0.4, - "grad_norm": 0.3677600497999778, - "learning_rate": 1.3507873673948137e-05, - "loss": 0.3478, + "epoch": 0.32, + "grad_norm": 0.6571167030321027, + "learning_rate": 1.5817744861828973e-05, + "loss": 0.3726, "step": 7038 }, { - "epoch": 0.4, - "grad_norm": 0.9170207065182442, - "learning_rate": 1.3506130952055132e-05, - "loss": 0.4109, + "epoch": 0.32, + "grad_norm": 0.4290360708082677, + "learning_rate": 1.5816534598125858e-05, + "loss": 0.292, "step": 7039 }, { - "epoch": 0.4, - "grad_norm": 0.35086745820449605, - "learning_rate": 1.3504388108744966e-05, - "loss": 0.3033, + "epoch": 0.32, + "grad_norm": 0.43748331329321793, + "learning_rate": 1.5815324205650272e-05, + "loss": 0.3258, "step": 7040 }, { - "epoch": 0.4, - "grad_norm": 1.099592946999764, - "learning_rate": 1.3502645144077987e-05, - "loss": 0.5843, + "epoch": 0.32, + "grad_norm": 0.29741116283590613, + "learning_rate": 1.5814113684429012e-05, + "loss": 0.2266, "step": 7041 }, { - "epoch": 0.4, - "grad_norm": 0.212123843237808, - "learning_rate": 1.350090205811456e-05, - "loss": 0.1806, + "epoch": 0.32, + "grad_norm": 0.4432495160657537, + "learning_rate": 1.581290303448888e-05, + "loss": 0.2383, "step": 7042 }, { - "epoch": 0.4, - "grad_norm": 0.33900519569136367, - "learning_rate": 1.3499158850915044e-05, - "loss": 0.2861, + "epoch": 0.32, + "grad_norm": 0.6461177350711316, + "learning_rate": 1.5811692255856677e-05, + "loss": 0.3963, "step": 7043 }, { - "epoch": 0.4, - "grad_norm": 0.8621314063592519, - "learning_rate": 1.3497415522539807e-05, - "loss": 0.6942, + "epoch": 0.32, + "grad_norm": 0.39291935499020886, + "learning_rate": 1.5810481348559206e-05, + "loss": 0.3386, "step": 7044 }, { - "epoch": 0.4, - "grad_norm": 0.3746493330911956, - "learning_rate": 1.3495672073049221e-05, - "loss": 0.3091, + "epoch": 0.32, + "grad_norm": 0.3627894285941001, + "learning_rate": 1.5809270312623285e-05, + "loss": 0.1818, "step": 7045 }, { - "epoch": 0.4, - "grad_norm": 0.3969087626583301, - "learning_rate": 1.3493928502503664e-05, - "loss": 0.2964, + "epoch": 0.32, + "grad_norm": 0.3104216583452634, + "learning_rate": 1.580805914807571e-05, + "loss": 0.2475, "step": 7046 }, { - "epoch": 0.4, - "grad_norm": 0.48381748985794387, - "learning_rate": 1.3492184810963512e-05, - "loss": 0.3625, + "epoch": 0.32, + "grad_norm": 0.3187025212366548, + "learning_rate": 1.580684785494331e-05, + "loss": 0.2702, "step": 7047 }, { - "epoch": 0.4, - "grad_norm": 0.3345842253885811, - "learning_rate": 1.349044099848915e-05, - "loss": 0.2105, + "epoch": 0.32, + "grad_norm": 0.40618861964824365, + "learning_rate": 1.5805636433252892e-05, + "loss": 0.2201, "step": 7048 }, { - "epoch": 0.4, - "grad_norm": 0.30381301666422506, - "learning_rate": 1.3488697065140964e-05, - "loss": 0.1968, + "epoch": 0.32, + "grad_norm": 0.5029632200939427, + "learning_rate": 1.5804424883031278e-05, + "loss": 0.3887, "step": 7049 }, { - "epoch": 0.41, - "grad_norm": 0.41334840598392936, - "learning_rate": 1.348695301097935e-05, - "loss": 0.3586, + "epoch": 0.32, + "grad_norm": 1.353741252296431, + "learning_rate": 1.5803213204305296e-05, + "loss": 0.706, "step": 7050 }, { - "epoch": 0.41, - "grad_norm": 0.7832888653278328, - "learning_rate": 1.3485208836064705e-05, - "loss": 0.4857, + "epoch": 0.32, + "grad_norm": 0.3222564140371361, + "learning_rate": 1.5802001397101766e-05, + "loss": 0.2327, "step": 7051 }, { - "epoch": 0.41, - "grad_norm": 0.33241008219443263, - "learning_rate": 1.3483464540457428e-05, - "loss": 0.2076, + "epoch": 0.32, + "grad_norm": 0.29503784486134943, + "learning_rate": 1.5800789461447513e-05, + "loss": 0.2503, "step": 7052 }, { - "epoch": 0.41, - "grad_norm": 0.49777611048133946, - "learning_rate": 1.348172012421792e-05, - "loss": 0.3706, + "epoch": 0.32, + "grad_norm": 0.4649224153251475, + "learning_rate": 1.5799577397369374e-05, + "loss": 0.3034, "step": 7053 }, { - "epoch": 0.41, - "grad_norm": 0.5163034090447539, - "learning_rate": 1.3479975587406595e-05, - "loss": 0.3745, + "epoch": 0.32, + "grad_norm": 0.544466315090083, + "learning_rate": 1.5798365204894182e-05, + "loss": 0.3806, "step": 7054 }, { - "epoch": 0.41, - "grad_norm": 0.22940568788016039, - "learning_rate": 1.3478230930083868e-05, - "loss": 0.1524, + "epoch": 0.32, + "grad_norm": 0.4165995865076928, + "learning_rate": 1.5797152884048772e-05, + "loss": 0.2623, "step": 7055 }, { - "epoch": 0.41, - "grad_norm": 0.9077703091017053, - "learning_rate": 1.3476486152310152e-05, - "loss": 0.444, + "epoch": 0.32, + "grad_norm": 0.40300126813059706, + "learning_rate": 1.5795940434859988e-05, + "loss": 0.3143, "step": 7056 }, { - "epoch": 0.41, - "grad_norm": 0.5325747150064066, - "learning_rate": 1.3474741254145868e-05, - "loss": 0.397, + "epoch": 0.32, + "grad_norm": 0.40222924253914133, + "learning_rate": 1.5794727857354667e-05, + "loss": 0.2679, "step": 7057 }, { - "epoch": 0.41, - "grad_norm": 0.2938576127412327, - "learning_rate": 1.3472996235651446e-05, - "loss": 0.227, + "epoch": 0.32, + "grad_norm": 0.4795952423234212, + "learning_rate": 1.5793515151559654e-05, + "loss": 0.2749, "step": 7058 }, { - "epoch": 0.41, - "grad_norm": 1.2376508865292748, - "learning_rate": 1.3471251096887312e-05, - "loss": 0.6284, + "epoch": 0.32, + "grad_norm": 0.26735847199324964, + "learning_rate": 1.57923023175018e-05, + "loss": 0.2571, "step": 7059 }, { - "epoch": 0.41, - "grad_norm": 0.400031588718824, - "learning_rate": 1.3469505837913903e-05, - "loss": 0.2609, + "epoch": 0.32, + "grad_norm": 0.4861939241519787, + "learning_rate": 1.5791089355207958e-05, + "loss": 0.3244, "step": 7060 }, { - "epoch": 0.41, - "grad_norm": 0.3362257964775687, - "learning_rate": 1.3467760458791656e-05, - "loss": 0.2845, + "epoch": 0.32, + "grad_norm": 0.5088622258196305, + "learning_rate": 1.5789876264704976e-05, + "loss": 0.3102, "step": 7061 }, { - "epoch": 0.41, - "grad_norm": 0.4483199711195374, - "learning_rate": 1.3466014959581013e-05, - "loss": 0.2888, + "epoch": 0.32, + "grad_norm": 0.941350111566803, + "learning_rate": 1.5788663046019716e-05, + "loss": 0.599, "step": 7062 }, { - "epoch": 0.41, - "grad_norm": 1.0500941292938073, - "learning_rate": 1.3464269340342422e-05, - "loss": 0.6551, + "epoch": 0.32, + "grad_norm": 0.4654436940082658, + "learning_rate": 1.5787449699179035e-05, + "loss": 0.335, "step": 7063 }, { - "epoch": 0.41, - "grad_norm": 0.35210336824936767, - "learning_rate": 1.346252360113633e-05, - "loss": 0.2358, + "epoch": 0.32, + "grad_norm": 0.3494005532652234, + "learning_rate": 1.5786236224209794e-05, + "loss": 0.2364, "step": 7064 }, { - "epoch": 0.41, - "grad_norm": 0.3657147809379971, - "learning_rate": 1.3460777742023202e-05, - "loss": 0.2663, + "epoch": 0.32, + "grad_norm": 0.4565802442528608, + "learning_rate": 1.578502262113886e-05, + "loss": 0.3434, "step": 7065 }, { - "epoch": 0.41, - "grad_norm": 0.503373542099447, - "learning_rate": 1.3459031763063482e-05, - "loss": 0.365, + "epoch": 0.32, + "grad_norm": 0.3930450147236665, + "learning_rate": 1.57838088899931e-05, + "loss": 0.2468, "step": 7066 }, { - "epoch": 0.41, - "grad_norm": 0.42739082768414866, - "learning_rate": 1.3457285664317645e-05, - "loss": 0.3174, + "epoch": 0.32, + "grad_norm": 0.3462006485523297, + "learning_rate": 1.578259503079939e-05, + "loss": 0.2708, "step": 7067 }, { - "epoch": 0.41, - "grad_norm": 0.358482347181096, - "learning_rate": 1.3455539445846151e-05, - "loss": 0.2484, + "epoch": 0.32, + "grad_norm": 0.5048257621632538, + "learning_rate": 1.5781381043584598e-05, + "loss": 0.3174, "step": 7068 }, { - "epoch": 0.41, - "grad_norm": 0.3661152740979383, - "learning_rate": 1.3453793107709476e-05, - "loss": 0.3302, + "epoch": 0.32, + "grad_norm": 0.5950023493788619, + "learning_rate": 1.5780166928375597e-05, + "loss": 0.3548, "step": 7069 }, { - "epoch": 0.41, - "grad_norm": 0.3814990470872192, - "learning_rate": 1.3452046649968091e-05, - "loss": 0.2572, + "epoch": 0.32, + "grad_norm": 0.40542166133443314, + "learning_rate": 1.5778952685199277e-05, + "loss": 0.3176, "step": 7070 }, { - "epoch": 0.41, - "grad_norm": 0.3260559821888633, - "learning_rate": 1.3450300072682485e-05, - "loss": 0.1877, + "epoch": 0.32, + "grad_norm": 0.3648558735746394, + "learning_rate": 1.5777738314082514e-05, + "loss": 0.3044, "step": 7071 }, { - "epoch": 0.41, - "grad_norm": 0.823857743699866, - "learning_rate": 1.3448553375913132e-05, - "loss": 0.4651, + "epoch": 0.32, + "grad_norm": 0.33630819743275625, + "learning_rate": 1.5776523815052192e-05, + "loss": 0.2185, "step": 7072 }, { - "epoch": 0.41, - "grad_norm": 0.35011088470733437, - "learning_rate": 1.3446806559720525e-05, - "loss": 0.3223, + "epoch": 0.32, + "grad_norm": 0.43126215720563094, + "learning_rate": 1.57753091881352e-05, + "loss": 0.3397, "step": 7073 }, { - "epoch": 0.41, - "grad_norm": 0.35841773511333275, - "learning_rate": 1.3445059624165156e-05, - "loss": 0.3149, + "epoch": 0.32, + "grad_norm": 0.32908474271947774, + "learning_rate": 1.5774094433358426e-05, + "loss": 0.1656, "step": 7074 }, { - "epoch": 0.41, - "grad_norm": 0.9612427340225015, - "learning_rate": 1.3443312569307517e-05, - "loss": 0.5688, + "epoch": 0.33, + "grad_norm": 0.37716287577339264, + "learning_rate": 1.5772879550748773e-05, + "loss": 0.307, "step": 7075 }, { - "epoch": 0.41, - "grad_norm": 0.25064310434852977, - "learning_rate": 1.3441565395208114e-05, - "loss": 0.1798, + "epoch": 0.33, + "grad_norm": 0.4232597156547772, + "learning_rate": 1.5771664540333123e-05, + "loss": 0.2849, "step": 7076 }, { - "epoch": 0.41, - "grad_norm": 0.5838118579171294, - "learning_rate": 1.343981810192745e-05, - "loss": 0.3781, + "epoch": 0.33, + "grad_norm": 0.60882281616766, + "learning_rate": 1.5770449402138387e-05, + "loss": 0.3704, "step": 7077 }, { - "epoch": 0.41, - "grad_norm": 0.40143952342120387, - "learning_rate": 1.3438070689526033e-05, - "loss": 0.2896, + "epoch": 0.33, + "grad_norm": 0.5124361998287736, + "learning_rate": 1.5769234136191464e-05, + "loss": 0.2911, "step": 7078 }, { - "epoch": 0.41, - "grad_norm": 0.36313108466885224, - "learning_rate": 1.3436323158064373e-05, - "loss": 0.2885, + "epoch": 0.33, + "grad_norm": 0.4623270719524845, + "learning_rate": 1.5768018742519258e-05, + "loss": 0.3218, "step": 7079 }, { - "epoch": 0.41, - "grad_norm": 0.7995956838032602, - "learning_rate": 1.3434575507602991e-05, - "loss": 0.5193, + "epoch": 0.33, + "grad_norm": 0.29380197925881496, + "learning_rate": 1.5766803221148676e-05, + "loss": 0.215, "step": 7080 }, { - "epoch": 0.41, - "grad_norm": 0.4270076800549399, - "learning_rate": 1.3432827738202407e-05, - "loss": 0.2954, + "epoch": 0.33, + "grad_norm": 0.6255721310019001, + "learning_rate": 1.5765587572106623e-05, + "loss": 0.2463, "step": 7081 }, { - "epoch": 0.41, - "grad_norm": 0.3050621499863082, - "learning_rate": 1.3431079849923153e-05, - "loss": 0.1848, + "epoch": 0.33, + "grad_norm": 0.4028652804287961, + "learning_rate": 1.5764371795420023e-05, + "loss": 0.3133, "step": 7082 }, { - "epoch": 0.41, - "grad_norm": 0.3510960749297572, - "learning_rate": 1.3429331842825742e-05, - "loss": 0.2506, + "epoch": 0.33, + "grad_norm": 0.4195086038770725, + "learning_rate": 1.576315589111579e-05, + "loss": 0.3431, "step": 7083 }, { - "epoch": 0.41, - "grad_norm": 0.6715341783485381, - "learning_rate": 1.342758371697072e-05, - "loss": 0.3628, + "epoch": 0.33, + "grad_norm": 0.5466980537753369, + "learning_rate": 1.5761939859220836e-05, + "loss": 0.1564, "step": 7084 }, { - "epoch": 0.41, - "grad_norm": 0.4085613591910861, - "learning_rate": 1.342583547241862e-05, - "loss": 0.2762, + "epoch": 0.33, + "grad_norm": 0.4364585525160943, + "learning_rate": 1.576072369976209e-05, + "loss": 0.3446, "step": 7085 }, { - "epoch": 0.41, - "grad_norm": 0.32885154493809926, - "learning_rate": 1.3424087109229986e-05, - "loss": 0.3057, + "epoch": 0.33, + "grad_norm": 0.3108104218036844, + "learning_rate": 1.575950741276647e-05, + "loss": 0.1915, "step": 7086 }, { - "epoch": 0.41, - "grad_norm": 1.1248597962403253, - "learning_rate": 1.3422338627465364e-05, - "loss": 0.7771, + "epoch": 0.33, + "grad_norm": 0.31965716798944344, + "learning_rate": 1.575829099826091e-05, + "loss": 0.2636, "step": 7087 }, { - "epoch": 0.41, - "grad_norm": 0.2362658079430318, - "learning_rate": 1.3420590027185301e-05, - "loss": 0.1333, + "epoch": 0.33, + "grad_norm": 0.41521259698088897, + "learning_rate": 1.5757074456272333e-05, + "loss": 0.3252, "step": 7088 }, { - "epoch": 0.41, - "grad_norm": 0.27743124046471523, - "learning_rate": 1.3418841308450353e-05, - "loss": 0.2448, + "epoch": 0.33, + "grad_norm": 0.8762257288004384, + "learning_rate": 1.575585778682768e-05, + "loss": 0.5277, "step": 7089 }, { - "epoch": 0.41, - "grad_norm": 1.0089479383803033, - "learning_rate": 1.3417092471321076e-05, - "loss": 0.439, + "epoch": 0.33, + "grad_norm": 0.5259842598684246, + "learning_rate": 1.5754640989953883e-05, + "loss": 0.2338, "step": 7090 }, { - "epoch": 0.41, - "grad_norm": 0.34819709868420223, - "learning_rate": 1.3415343515858035e-05, - "loss": 0.2192, + "epoch": 0.33, + "grad_norm": 0.3559979703712719, + "learning_rate": 1.5753424065677877e-05, + "loss": 0.314, "step": 7091 }, { - "epoch": 0.41, - "grad_norm": 0.5070973840014404, - "learning_rate": 1.3413594442121796e-05, - "loss": 0.3956, + "epoch": 0.33, + "grad_norm": 0.3232071324443672, + "learning_rate": 1.5752207014026612e-05, + "loss": 0.2576, "step": 7092 }, { - "epoch": 0.41, - "grad_norm": 0.3532321667025554, - "learning_rate": 1.3411845250172928e-05, - "loss": 0.2936, + "epoch": 0.33, + "grad_norm": 0.40866753029234226, + "learning_rate": 1.5750989835027026e-05, + "loss": 0.2895, "step": 7093 }, { - "epoch": 0.41, - "grad_norm": 0.3256958107973817, - "learning_rate": 1.3410095940072004e-05, - "loss": 0.1915, + "epoch": 0.33, + "grad_norm": 0.37266436007685355, + "learning_rate": 1.574977252870607e-05, + "loss": 0.2585, "step": 7094 }, { - "epoch": 0.41, - "grad_norm": 0.26218402620283915, - "learning_rate": 1.3408346511879604e-05, - "loss": 0.1904, + "epoch": 0.33, + "grad_norm": 0.4114777726193609, + "learning_rate": 1.5748555095090687e-05, + "loss": 0.341, "step": 7095 }, { - "epoch": 0.41, - "grad_norm": 0.9728861537090818, - "learning_rate": 1.340659696565631e-05, - "loss": 0.461, + "epoch": 0.33, + "grad_norm": 0.6554168182537494, + "learning_rate": 1.574733753420784e-05, + "loss": 0.4785, "step": 7096 }, { - "epoch": 0.41, - "grad_norm": 0.29866630802365746, - "learning_rate": 1.3404847301462713e-05, - "loss": 0.2247, + "epoch": 0.33, + "grad_norm": 0.3472327629826033, + "learning_rate": 1.5746119846084474e-05, + "loss": 0.2206, "step": 7097 }, { - "epoch": 0.41, - "grad_norm": 0.5127063422711399, - "learning_rate": 1.3403097519359397e-05, - "loss": 0.3778, + "epoch": 0.33, + "grad_norm": 0.3499202776527631, + "learning_rate": 1.5744902030747558e-05, + "loss": 0.2326, "step": 7098 }, { - "epoch": 0.41, - "grad_norm": 1.0938596974308756, - "learning_rate": 1.3401347619406966e-05, - "loss": 0.8696, + "epoch": 0.33, + "grad_norm": 0.3741230742793275, + "learning_rate": 1.5743684088224044e-05, + "loss": 0.3324, "step": 7099 }, { - "epoch": 0.41, - "grad_norm": 0.3440710027431961, - "learning_rate": 1.3399597601666008e-05, - "loss": 0.214, + "epoch": 0.33, + "grad_norm": 0.34978562932828566, + "learning_rate": 1.57424660185409e-05, + "loss": 0.2506, "step": 7100 }, { - "epoch": 0.41, - "grad_norm": 0.33249458020431705, - "learning_rate": 1.3397847466197133e-05, - "loss": 0.2699, + "epoch": 0.33, + "grad_norm": 1.0987027031706893, + "learning_rate": 1.57412478217251e-05, + "loss": 0.6012, "step": 7101 }, { - "epoch": 0.41, - "grad_norm": 0.3150673143037075, - "learning_rate": 1.3396097213060943e-05, - "loss": 0.3037, + "epoch": 0.33, + "grad_norm": 1.418759263897151, + "learning_rate": 1.57400294978036e-05, + "loss": 0.7911, "step": 7102 }, { - "epoch": 0.41, - "grad_norm": 0.9226442021708258, - "learning_rate": 1.3394346842318058e-05, - "loss": 0.5105, + "epoch": 0.33, + "grad_norm": 0.28126870436763585, + "learning_rate": 1.573881104680338e-05, + "loss": 0.2318, "step": 7103 }, { - "epoch": 0.41, - "grad_norm": 0.3788209793940605, - "learning_rate": 1.3392596354029084e-05, - "loss": 0.2471, + "epoch": 0.33, + "grad_norm": 0.7506369913067107, + "learning_rate": 1.5737592468751416e-05, + "loss": 0.4781, "step": 7104 }, { - "epoch": 0.41, - "grad_norm": 0.36634771446717296, - "learning_rate": 1.3390845748254645e-05, - "loss": 0.3481, + "epoch": 0.33, + "grad_norm": 0.4172636683387461, + "learning_rate": 1.5736373763674687e-05, + "loss": 0.3167, "step": 7105 }, { - "epoch": 0.41, - "grad_norm": 0.6158781133726381, - "learning_rate": 1.3389095025055363e-05, - "loss": 0.4241, + "epoch": 0.33, + "grad_norm": 0.3112558738276658, + "learning_rate": 1.5735154931600167e-05, + "loss": 0.2301, "step": 7106 }, { - "epoch": 0.41, - "grad_norm": 0.3631589753231446, - "learning_rate": 1.3387344184491869e-05, - "loss": 0.2813, + "epoch": 0.33, + "grad_norm": 0.37786987762603524, + "learning_rate": 1.5733935972554845e-05, + "loss": 0.2581, "step": 7107 }, { - "epoch": 0.41, - "grad_norm": 0.27955123400877324, - "learning_rate": 1.3385593226624787e-05, - "loss": 0.1621, + "epoch": 0.33, + "grad_norm": 1.327503534836091, + "learning_rate": 1.573271688656571e-05, + "loss": 0.8152, "step": 7108 }, { - "epoch": 0.41, - "grad_norm": 0.3783695100880318, - "learning_rate": 1.338384215151476e-05, - "loss": 0.2813, + "epoch": 0.33, + "grad_norm": 0.3698643869827666, + "learning_rate": 1.5731497673659745e-05, + "loss": 0.2929, "step": 7109 }, { - "epoch": 0.41, - "grad_norm": 0.6041099103124075, - "learning_rate": 1.3382090959222425e-05, - "loss": 0.3282, + "epoch": 0.33, + "grad_norm": 0.6790864194175057, + "learning_rate": 1.573027833386395e-05, + "loss": 0.3466, "step": 7110 }, { - "epoch": 0.41, - "grad_norm": 1.1035000418322451, - "learning_rate": 1.3380339649808425e-05, - "loss": 0.4968, + "epoch": 0.33, + "grad_norm": 0.34638630134117615, + "learning_rate": 1.572905886720531e-05, + "loss": 0.3208, "step": 7111 }, { - "epoch": 0.41, - "grad_norm": 0.4884716979614059, - "learning_rate": 1.337858822333341e-05, - "loss": 0.3715, + "epoch": 0.33, + "grad_norm": 0.39089769934773166, + "learning_rate": 1.5727839273710832e-05, + "loss": 0.2821, "step": 7112 }, { - "epoch": 0.41, - "grad_norm": 0.3107210491119011, - "learning_rate": 1.3376836679858026e-05, - "loss": 0.2974, + "epoch": 0.33, + "grad_norm": 0.24847650391447956, + "learning_rate": 1.5726619553407514e-05, + "loss": 0.1385, "step": 7113 }, { - "epoch": 0.41, - "grad_norm": 0.22911849054150063, - "learning_rate": 1.3375085019442937e-05, - "loss": 0.1643, + "epoch": 0.33, + "grad_norm": 0.457963662632025, + "learning_rate": 1.5725399706322354e-05, + "loss": 0.3384, "step": 7114 }, { - "epoch": 0.41, - "grad_norm": 0.5691552869077963, - "learning_rate": 1.3373333242148796e-05, - "loss": 0.3392, + "epoch": 0.33, + "grad_norm": 0.40754667812445927, + "learning_rate": 1.572417973248237e-05, + "loss": 0.2885, "step": 7115 }, { - "epoch": 0.41, - "grad_norm": 0.49816342959997756, - "learning_rate": 1.337158134803627e-05, - "loss": 0.3428, + "epoch": 0.33, + "grad_norm": 0.6041748004387976, + "learning_rate": 1.5722959631914557e-05, + "loss": 0.3255, "step": 7116 }, { - "epoch": 0.41, - "grad_norm": 0.4375022231297627, - "learning_rate": 1.3369829337166031e-05, - "loss": 0.3051, + "epoch": 0.33, + "grad_norm": 0.811632165477902, + "learning_rate": 1.5721739404645937e-05, + "loss": 0.5461, "step": 7117 }, { - "epoch": 0.41, - "grad_norm": 0.5267758958092926, - "learning_rate": 1.3368077209598744e-05, - "loss": 0.3613, + "epoch": 0.33, + "grad_norm": 0.32102186820855, + "learning_rate": 1.572051905070352e-05, + "loss": 0.2463, "step": 7118 }, { - "epoch": 0.41, - "grad_norm": 0.3807706588481771, - "learning_rate": 1.3366324965395088e-05, - "loss": 0.3324, + "epoch": 0.33, + "grad_norm": 0.31020247295364595, + "learning_rate": 1.5719298570114324e-05, + "loss": 0.2534, "step": 7119 }, { - "epoch": 0.41, - "grad_norm": 0.23055407507354644, - "learning_rate": 1.3364572604615744e-05, - "loss": 0.156, + "epoch": 0.33, + "grad_norm": 1.3857281878838819, + "learning_rate": 1.5718077962905372e-05, + "loss": 0.5051, "step": 7120 }, { - "epoch": 0.41, - "grad_norm": 0.47250351493960807, - "learning_rate": 1.3362820127321391e-05, - "loss": 0.3263, + "epoch": 0.33, + "grad_norm": 0.3955549776279057, + "learning_rate": 1.5716857229103684e-05, + "loss": 0.3045, "step": 7121 }, { - "epoch": 0.41, - "grad_norm": 0.40387974042821045, - "learning_rate": 1.3361067533572726e-05, - "loss": 0.2879, + "epoch": 0.33, + "grad_norm": 0.9215115648902779, + "learning_rate": 1.5715636368736286e-05, + "loss": 0.4584, "step": 7122 }, { - "epoch": 0.41, - "grad_norm": 0.6065089402375893, - "learning_rate": 1.3359314823430436e-05, - "loss": 0.5049, + "epoch": 0.33, + "grad_norm": 0.36169753622232714, + "learning_rate": 1.571441538183021e-05, + "loss": 0.2523, "step": 7123 }, { - "epoch": 0.41, - "grad_norm": 0.48384521210853454, - "learning_rate": 1.335756199695522e-05, - "loss": 0.3052, + "epoch": 0.33, + "grad_norm": 0.37251684751015984, + "learning_rate": 1.571319426841248e-05, + "loss": 0.2716, "step": 7124 }, { - "epoch": 0.41, - "grad_norm": 0.30084930743731136, - "learning_rate": 1.3355809054207774e-05, - "loss": 0.2649, + "epoch": 0.33, + "grad_norm": 0.33448749099587655, + "learning_rate": 1.571197302851014e-05, + "loss": 0.1892, "step": 7125 }, { - "epoch": 0.41, - "grad_norm": 0.9024344911983133, - "learning_rate": 1.3354055995248805e-05, - "loss": 0.6037, + "epoch": 0.33, + "grad_norm": 0.66609176016729, + "learning_rate": 1.5710751662150224e-05, + "loss": 0.3015, "step": 7126 }, { - "epoch": 0.41, - "grad_norm": 0.19286218457362345, - "learning_rate": 1.335230282013902e-05, - "loss": 0.1028, + "epoch": 0.33, + "grad_norm": 0.47153187488750503, + "learning_rate": 1.5709530169359767e-05, + "loss": 0.2829, "step": 7127 }, { - "epoch": 0.41, - "grad_norm": 0.3553929943930889, - "learning_rate": 1.3350549528939135e-05, - "loss": 0.3059, + "epoch": 0.33, + "grad_norm": 1.32082095765461, + "learning_rate": 1.5708308550165818e-05, + "loss": 0.4778, "step": 7128 }, { - "epoch": 0.41, - "grad_norm": 0.3556388786278789, - "learning_rate": 1.3348796121709862e-05, - "loss": 0.3285, + "epoch": 0.33, + "grad_norm": 0.4432579614150149, + "learning_rate": 1.5707086804595417e-05, + "loss": 0.2222, "step": 7129 }, { - "epoch": 0.41, - "grad_norm": 0.6547730919119816, - "learning_rate": 1.3347042598511926e-05, - "loss": 0.2819, + "epoch": 0.33, + "grad_norm": 0.48765120003463264, + "learning_rate": 1.570586493267561e-05, + "loss": 0.3146, "step": 7130 }, { - "epoch": 0.41, - "grad_norm": 0.36796600659289685, - "learning_rate": 1.3345288959406045e-05, - "loss": 0.2986, + "epoch": 0.33, + "grad_norm": 0.33470262723482036, + "learning_rate": 1.570464293443346e-05, + "loss": 0.2772, "step": 7131 }, { - "epoch": 0.41, - "grad_norm": 0.37729407521418595, - "learning_rate": 1.3343535204452953e-05, - "loss": 0.224, + "epoch": 0.33, + "grad_norm": 1.039026491174222, + "learning_rate": 1.570342080989601e-05, + "loss": 0.5921, "step": 7132 }, { - "epoch": 0.41, - "grad_norm": 0.2750629669350354, - "learning_rate": 1.3341781333713381e-05, - "loss": 0.1908, + "epoch": 0.33, + "grad_norm": 0.3758338848060208, + "learning_rate": 1.5702198559090322e-05, + "loss": 0.2065, "step": 7133 }, { - "epoch": 0.41, - "grad_norm": 0.44690214839091336, - "learning_rate": 1.3340027347248068e-05, - "loss": 0.3296, + "epoch": 0.33, + "grad_norm": 0.7322717127393783, + "learning_rate": 1.5700976182043452e-05, + "loss": 0.377, "step": 7134 }, { - "epoch": 0.41, - "grad_norm": 1.1545043346285377, - "learning_rate": 1.3338273245117745e-05, - "loss": 0.4603, + "epoch": 0.33, + "grad_norm": 0.5079023901244897, + "learning_rate": 1.5699753678782466e-05, + "loss": 0.3723, "step": 7135 }, { - "epoch": 0.41, - "grad_norm": 0.40566944235161556, - "learning_rate": 1.3336519027383168e-05, - "loss": 0.3405, + "epoch": 0.33, + "grad_norm": 0.3229311670899918, + "learning_rate": 1.5698531049334428e-05, + "loss": 0.2147, "step": 7136 }, { - "epoch": 0.41, - "grad_norm": 0.31670328181132457, - "learning_rate": 1.3334764694105079e-05, - "loss": 0.2335, + "epoch": 0.33, + "grad_norm": 0.33137566175899336, + "learning_rate": 1.5697308293726403e-05, + "loss": 0.223, "step": 7137 }, { - "epoch": 0.41, - "grad_norm": 0.8579819449562148, - "learning_rate": 1.3333010245344232e-05, - "loss": 0.5027, + "epoch": 0.33, + "grad_norm": 0.5091467081996569, + "learning_rate": 1.569608541198546e-05, + "loss": 0.4034, "step": 7138 }, { - "epoch": 0.41, - "grad_norm": 0.30341933853948494, - "learning_rate": 1.3331255681161386e-05, - "loss": 0.17, + "epoch": 0.33, + "grad_norm": 0.3175224511941382, + "learning_rate": 1.569486240413868e-05, + "loss": 0.2416, "step": 7139 }, { - "epoch": 0.41, - "grad_norm": 0.38527026955972754, - "learning_rate": 1.3329501001617294e-05, - "loss": 0.2346, + "epoch": 0.33, + "grad_norm": 0.9687388130809695, + "learning_rate": 1.5693639270213138e-05, + "loss": 0.52, "step": 7140 }, { - "epoch": 0.41, - "grad_norm": 0.5092949576398142, - "learning_rate": 1.332774620677273e-05, - "loss": 0.3329, + "epoch": 0.33, + "grad_norm": 1.1882606778332527, + "learning_rate": 1.5692416010235905e-05, + "loss": 0.6448, "step": 7141 }, { - "epoch": 0.41, - "grad_norm": 1.7811634793100883, - "learning_rate": 1.3325991296688455e-05, - "loss": 0.7817, + "epoch": 0.33, + "grad_norm": 0.3868377381438459, + "learning_rate": 1.569119262423407e-05, + "loss": 0.2711, "step": 7142 }, { - "epoch": 0.41, - "grad_norm": 0.3345933775300051, - "learning_rate": 1.3324236271425245e-05, - "loss": 0.2055, + "epoch": 0.33, + "grad_norm": 0.2979496682498275, + "learning_rate": 1.5689969112234717e-05, + "loss": 0.1972, "step": 7143 }, { - "epoch": 0.41, - "grad_norm": 1.1826649808614909, - "learning_rate": 1.3322481131043876e-05, - "loss": 0.6385, + "epoch": 0.33, + "grad_norm": 0.5885685019469463, + "learning_rate": 1.568874547426493e-05, + "loss": 0.3385, "step": 7144 }, { - "epoch": 0.41, - "grad_norm": 0.25436360208374936, - "learning_rate": 1.332072587560513e-05, - "loss": 0.2162, + "epoch": 0.33, + "grad_norm": 0.4007307443970035, + "learning_rate": 1.56875217103518e-05, + "loss": 0.2714, "step": 7145 }, { - "epoch": 0.41, - "grad_norm": 0.3245351746881674, - "learning_rate": 1.3318970505169786e-05, - "loss": 0.2445, + "epoch": 0.33, + "grad_norm": 1.0913132740372513, + "learning_rate": 1.5686297820522423e-05, + "loss": 0.3769, "step": 7146 }, { - "epoch": 0.41, - "grad_norm": 0.7222227073899474, - "learning_rate": 1.3317215019798639e-05, - "loss": 0.497, + "epoch": 0.33, + "grad_norm": 0.39495196122521425, + "learning_rate": 1.5685073804803892e-05, + "loss": 0.3058, "step": 7147 }, { - "epoch": 0.41, - "grad_norm": 0.3898074555572452, - "learning_rate": 1.3315459419552477e-05, - "loss": 0.3174, + "epoch": 0.33, + "grad_norm": 0.5968129159623581, + "learning_rate": 1.5683849663223306e-05, + "loss": 0.3158, "step": 7148 }, { - "epoch": 0.41, - "grad_norm": 0.357549111885812, - "learning_rate": 1.33137037044921e-05, - "loss": 0.2622, + "epoch": 0.33, + "grad_norm": 0.2646926232108532, + "learning_rate": 1.568262539580777e-05, + "loss": 0.1615, "step": 7149 }, { - "epoch": 0.41, - "grad_norm": 0.8507678942147663, - "learning_rate": 1.3311947874678306e-05, - "loss": 0.2878, + "epoch": 0.33, + "grad_norm": 0.6336038388159662, + "learning_rate": 1.568140100258438e-05, + "loss": 0.3484, "step": 7150 }, { - "epoch": 0.41, - "grad_norm": 0.33806956641060104, - "learning_rate": 1.3310191930171898e-05, - "loss": 0.2648, + "epoch": 0.33, + "grad_norm": 0.7403507754082647, + "learning_rate": 1.5680176483580248e-05, + "loss": 0.3975, "step": 7151 }, { - "epoch": 0.41, - "grad_norm": 0.41027889521502914, - "learning_rate": 1.3308435871033687e-05, - "loss": 0.2778, + "epoch": 0.33, + "grad_norm": 0.48286920819962725, + "learning_rate": 1.5678951838822487e-05, + "loss": 0.2738, "step": 7152 }, { - "epoch": 0.41, - "grad_norm": 0.3561677104384658, - "learning_rate": 1.3306679697324485e-05, - "loss": 0.3, + "epoch": 0.33, + "grad_norm": 1.1208821195765513, + "learning_rate": 1.5677727068338203e-05, + "loss": 0.6201, "step": 7153 }, { - "epoch": 0.41, - "grad_norm": 0.4096178462824716, - "learning_rate": 1.3304923409105104e-05, - "loss": 0.2723, + "epoch": 0.33, + "grad_norm": 0.47186658346814064, + "learning_rate": 1.5676502172154514e-05, + "loss": 0.2815, "step": 7154 }, { - "epoch": 0.41, - "grad_norm": 0.38934669547306056, - "learning_rate": 1.3303167006436371e-05, - "loss": 0.278, + "epoch": 0.33, + "grad_norm": 0.2834258173205169, + "learning_rate": 1.567527715029854e-05, + "loss": 0.242, "step": 7155 }, { - "epoch": 0.41, - "grad_norm": 0.37412796658975134, - "learning_rate": 1.3301410489379103e-05, - "loss": 0.2778, + "epoch": 0.33, + "grad_norm": 0.6348510350104531, + "learning_rate": 1.5674052002797396e-05, + "loss": 0.3902, "step": 7156 }, { - "epoch": 0.41, - "grad_norm": 0.7818448980631783, - "learning_rate": 1.3299653857994135e-05, - "loss": 0.4927, + "epoch": 0.33, + "grad_norm": 0.4710698356386421, + "learning_rate": 1.567282672967821e-05, + "loss": 0.3092, "step": 7157 }, { - "epoch": 0.41, - "grad_norm": 0.3268093806978687, - "learning_rate": 1.3297897112342294e-05, - "loss": 0.2912, + "epoch": 0.33, + "grad_norm": 0.35536440317264967, + "learning_rate": 1.5671601330968112e-05, + "loss": 0.27, "step": 7158 }, { - "epoch": 0.41, - "grad_norm": 0.686057582809523, - "learning_rate": 1.3296140252484417e-05, - "loss": 0.419, + "epoch": 0.33, + "grad_norm": 0.5243560848875567, + "learning_rate": 1.5670375806694226e-05, + "loss": 0.2436, "step": 7159 }, { - "epoch": 0.41, - "grad_norm": 0.2794205927146474, - "learning_rate": 1.3294383278481346e-05, - "loss": 0.2731, + "epoch": 0.33, + "grad_norm": 0.514432307671831, + "learning_rate": 1.566915015688368e-05, + "loss": 0.333, "step": 7160 }, { - "epoch": 0.41, - "grad_norm": 0.306089163013698, - "learning_rate": 1.3292626190393923e-05, - "loss": 0.2232, + "epoch": 0.33, + "grad_norm": 0.3757433316908943, + "learning_rate": 1.5667924381563618e-05, + "loss": 0.2427, "step": 7161 }, { - "epoch": 0.41, - "grad_norm": 1.0595231500511526, - "learning_rate": 1.3290868988282999e-05, - "loss": 0.5485, + "epoch": 0.33, + "grad_norm": 0.44377785401053943, + "learning_rate": 1.566669848076117e-05, + "loss": 0.3049, "step": 7162 }, { - "epoch": 0.41, - "grad_norm": 0.7733042625073977, - "learning_rate": 1.328911167220942e-05, - "loss": 0.3675, + "epoch": 0.33, + "grad_norm": 0.3539044950454571, + "learning_rate": 1.5665472454503484e-05, + "loss": 0.2658, "step": 7163 }, { - "epoch": 0.41, - "grad_norm": 0.3460305444373183, - "learning_rate": 1.3287354242234047e-05, - "loss": 0.2629, + "epoch": 0.33, + "grad_norm": 1.2991584943025944, + "learning_rate": 1.5664246302817695e-05, + "loss": 0.7675, "step": 7164 }, { - "epoch": 0.41, - "grad_norm": 0.3792220485449821, - "learning_rate": 1.3285596698417738e-05, - "loss": 0.3372, + "epoch": 0.33, + "grad_norm": 0.3161897144435263, + "learning_rate": 1.566302002573095e-05, + "loss": 0.1418, "step": 7165 }, { - "epoch": 0.41, - "grad_norm": 0.2128910770084968, - "learning_rate": 1.3283839040821355e-05, - "loss": 0.1166, + "epoch": 0.33, + "grad_norm": 0.42169995020107076, + "learning_rate": 1.5661793623270404e-05, + "loss": 0.2565, "step": 7166 }, { - "epoch": 0.41, - "grad_norm": 0.35463706912168896, - "learning_rate": 1.3282081269505771e-05, - "loss": 0.2832, + "epoch": 0.33, + "grad_norm": 0.43209305825695626, + "learning_rate": 1.5660567095463202e-05, + "loss": 0.3358, "step": 7167 }, { - "epoch": 0.41, - "grad_norm": 0.5153840336512678, - "learning_rate": 1.3280323384531852e-05, - "loss": 0.3804, + "epoch": 0.33, + "grad_norm": 0.6754068686278579, + "learning_rate": 1.56593404423365e-05, + "loss": 0.4258, "step": 7168 }, { - "epoch": 0.41, - "grad_norm": 0.5089755863319722, - "learning_rate": 1.3278565385960476e-05, - "loss": 0.3391, + "epoch": 0.33, + "grad_norm": 0.4043740072161991, + "learning_rate": 1.5658113663917455e-05, + "loss": 0.165, "step": 7169 }, { - "epoch": 0.41, - "grad_norm": 0.4031836019221641, - "learning_rate": 1.3276807273852522e-05, - "loss": 0.2773, + "epoch": 0.33, + "grad_norm": 0.3548682311398915, + "learning_rate": 1.5656886760233227e-05, + "loss": 0.2689, "step": 7170 }, { - "epoch": 0.41, - "grad_norm": 0.48049808170008806, - "learning_rate": 1.3275049048268869e-05, - "loss": 0.4095, + "epoch": 0.33, + "grad_norm": 0.363160094008727, + "learning_rate": 1.565565973131098e-05, + "loss": 0.2869, "step": 7171 }, { - "epoch": 0.41, - "grad_norm": 0.2558281539455274, - "learning_rate": 1.327329070927041e-05, - "loss": 0.1998, + "epoch": 0.33, + "grad_norm": 0.4189473866378317, + "learning_rate": 1.5654432577177874e-05, + "loss": 0.1886, "step": 7172 }, { - "epoch": 0.41, - "grad_norm": 0.38630900592037487, - "learning_rate": 1.3271532256918036e-05, - "loss": 0.295, + "epoch": 0.33, + "grad_norm": 0.41373198295723707, + "learning_rate": 1.5653205297861082e-05, + "loss": 0.3504, "step": 7173 }, { - "epoch": 0.41, - "grad_norm": 0.39008618040812376, - "learning_rate": 1.326977369127264e-05, - "loss": 0.2581, + "epoch": 0.33, + "grad_norm": 0.5595121133142632, + "learning_rate": 1.5651977893387776e-05, + "loss": 0.3828, "step": 7174 }, { - "epoch": 0.41, - "grad_norm": 0.6205933584816866, - "learning_rate": 1.326801501239512e-05, - "loss": 0.4402, + "epoch": 0.33, + "grad_norm": 0.3361240314977648, + "learning_rate": 1.5650750363785126e-05, + "loss": 0.2348, "step": 7175 }, { - "epoch": 0.41, - "grad_norm": 0.31882272613528434, - "learning_rate": 1.3266256220346383e-05, - "loss": 0.1977, + "epoch": 0.33, + "grad_norm": 0.3661761405136072, + "learning_rate": 1.5649522709080306e-05, + "loss": 0.2792, "step": 7176 }, { - "epoch": 0.41, - "grad_norm": 0.4407270029368251, - "learning_rate": 1.3264497315187334e-05, - "loss": 0.3436, + "epoch": 0.33, + "grad_norm": 0.4383883539439163, + "learning_rate": 1.56482949293005e-05, + "loss": 0.2411, "step": 7177 }, { - "epoch": 0.41, - "grad_norm": 0.38673637353069634, - "learning_rate": 1.326273829697888e-05, - "loss": 0.244, + "epoch": 0.33, + "grad_norm": 0.3124234668800184, + "learning_rate": 1.564706702447289e-05, + "loss": 0.2349, "step": 7178 }, { - "epoch": 0.41, - "grad_norm": 0.27113858546133, - "learning_rate": 1.3260979165781942e-05, - "loss": 0.1703, + "epoch": 0.33, + "grad_norm": 0.5445968592040877, + "learning_rate": 1.564583899462466e-05, + "loss": 0.3727, "step": 7179 }, { - "epoch": 0.41, - "grad_norm": 0.4419932070737161, - "learning_rate": 1.3259219921657436e-05, - "loss": 0.3672, + "epoch": 0.33, + "grad_norm": 0.6688811591722729, + "learning_rate": 1.5644610839782992e-05, + "loss": 0.4952, "step": 7180 }, { - "epoch": 0.41, - "grad_norm": 0.45245896039209665, - "learning_rate": 1.3257460564666283e-05, - "loss": 0.3697, + "epoch": 0.33, + "grad_norm": 0.6398563219462625, + "learning_rate": 1.564338255997508e-05, + "loss": 0.4357, "step": 7181 }, { - "epoch": 0.41, - "grad_norm": 0.32808870623561165, - "learning_rate": 1.3255701094869408e-05, - "loss": 0.1927, + "epoch": 0.33, + "grad_norm": 0.36291319982309433, + "learning_rate": 1.5642154155228124e-05, + "loss": 0.2415, "step": 7182 }, { - "epoch": 0.41, - "grad_norm": 1.1927858978015542, - "learning_rate": 1.325394151232775e-05, - "loss": 0.7927, + "epoch": 0.33, + "grad_norm": 0.31247966006971944, + "learning_rate": 1.5640925625569305e-05, + "loss": 0.2215, "step": 7183 }, { - "epoch": 0.41, - "grad_norm": 0.4014879560091093, - "learning_rate": 1.3252181817102235e-05, - "loss": 0.3279, + "epoch": 0.33, + "grad_norm": 0.5795375405427187, + "learning_rate": 1.5639696971025836e-05, + "loss": 0.4005, "step": 7184 }, { - "epoch": 0.41, - "grad_norm": 0.23244466150471954, - "learning_rate": 1.3250422009253802e-05, - "loss": 0.1628, + "epoch": 0.33, + "grad_norm": 0.3837492249362803, + "learning_rate": 1.5638468191624906e-05, + "loss": 0.2778, "step": 7185 }, { - "epoch": 0.41, - "grad_norm": 0.4222245281834194, - "learning_rate": 1.3248662088843395e-05, - "loss": 0.2875, + "epoch": 0.33, + "grad_norm": 0.3910527205603508, + "learning_rate": 1.5637239287393725e-05, + "loss": 0.3343, "step": 7186 }, { - "epoch": 0.41, - "grad_norm": 0.6467322169281083, - "learning_rate": 1.3246902055931961e-05, - "loss": 0.4428, + "epoch": 0.33, + "grad_norm": 0.8912829329638559, + "learning_rate": 1.56360102583595e-05, + "loss": 0.4828, "step": 7187 }, { - "epoch": 0.41, - "grad_norm": 0.38186519736687974, - "learning_rate": 1.3245141910580446e-05, - "loss": 0.3217, + "epoch": 0.33, + "grad_norm": 0.3555184524458391, + "learning_rate": 1.5634781104549442e-05, + "loss": 0.2364, "step": 7188 }, { - "epoch": 0.41, - "grad_norm": 0.3362029065075712, - "learning_rate": 1.324338165284981e-05, - "loss": 0.2656, + "epoch": 0.33, + "grad_norm": 0.31836586253936744, + "learning_rate": 1.5633551825990763e-05, + "loss": 0.2458, "step": 7189 }, { - "epoch": 0.41, - "grad_norm": 0.40943953574665665, - "learning_rate": 1.3241621282801002e-05, - "loss": 0.267, + "epoch": 0.33, + "grad_norm": 0.34067311977772946, + "learning_rate": 1.5632322422710674e-05, + "loss": 0.3025, "step": 7190 }, { - "epoch": 0.41, - "grad_norm": 0.4200553653950379, - "learning_rate": 1.3239860800494993e-05, - "loss": 0.3042, + "epoch": 0.33, + "grad_norm": 0.420791213050203, + "learning_rate": 1.5631092894736398e-05, + "loss": 0.2512, "step": 7191 }, { - "epoch": 0.41, - "grad_norm": 0.4031697492165725, - "learning_rate": 1.3238100205992739e-05, - "loss": 0.286, + "epoch": 0.33, + "grad_norm": 1.5326139215859171, + "learning_rate": 1.562986324209515e-05, + "loss": 0.7641, "step": 7192 }, { - "epoch": 0.41, - "grad_norm": 0.4600727603852913, - "learning_rate": 1.3236339499355217e-05, - "loss": 0.3152, + "epoch": 0.33, + "grad_norm": 0.7316824576386303, + "learning_rate": 1.5628633464814153e-05, + "loss": 0.5138, "step": 7193 }, { - "epoch": 0.41, - "grad_norm": 0.3986539572582897, - "learning_rate": 1.3234578680643394e-05, - "loss": 0.3215, + "epoch": 0.33, + "grad_norm": 0.3210373902959111, + "learning_rate": 1.5627403562920638e-05, + "loss": 0.2865, "step": 7194 }, { - "epoch": 0.41, - "grad_norm": 0.4169919148480683, - "learning_rate": 1.3232817749918256e-05, - "loss": 0.3092, + "epoch": 0.33, + "grad_norm": 0.3207851570263174, + "learning_rate": 1.5626173536441835e-05, + "loss": 0.1531, "step": 7195 }, { - "epoch": 0.41, - "grad_norm": 0.3654175654707768, - "learning_rate": 1.3231056707240775e-05, - "loss": 0.3141, + "epoch": 0.33, + "grad_norm": 0.46722250405108007, + "learning_rate": 1.5624943385404966e-05, + "loss": 0.3859, "step": 7196 }, { - "epoch": 0.41, - "grad_norm": 0.36406119272639237, - "learning_rate": 1.322929555267194e-05, - "loss": 0.3191, + "epoch": 0.33, + "grad_norm": 0.5154957649384645, + "learning_rate": 1.5623713109837276e-05, + "loss": 0.3171, "step": 7197 }, { - "epoch": 0.41, - "grad_norm": 0.31941887730802226, - "learning_rate": 1.3227534286272741e-05, - "loss": 0.2926, + "epoch": 0.33, + "grad_norm": 0.4333503285662579, + "learning_rate": 1.5622482709766e-05, + "loss": 0.2562, "step": 7198 }, { - "epoch": 0.41, - "grad_norm": 0.2620408151493418, - "learning_rate": 1.3225772908104165e-05, - "loss": 0.0687, + "epoch": 0.33, + "grad_norm": 0.6168185256551033, + "learning_rate": 1.562125218521837e-05, + "loss": 0.3749, "step": 7199 }, { - "epoch": 0.41, - "grad_norm": 0.30135243734827055, - "learning_rate": 1.3224011418227215e-05, - "loss": 0.268, + "epoch": 0.33, + "grad_norm": 0.4536718044436831, + "learning_rate": 1.562002153622164e-05, + "loss": 0.3389, "step": 7200 }, { - "epoch": 0.41, - "grad_norm": 0.5366585174326642, - "learning_rate": 1.3222249816702885e-05, - "loss": 0.3695, + "epoch": 0.33, + "grad_norm": 0.5081990660446354, + "learning_rate": 1.561879076280305e-05, + "loss": 0.3676, "step": 7201 }, { - "epoch": 0.41, - "grad_norm": 0.590033821849479, - "learning_rate": 1.3220488103592184e-05, - "loss": 0.381, + "epoch": 0.33, + "grad_norm": 0.3333958215393233, + "learning_rate": 1.561755986498984e-05, + "loss": 0.2586, "step": 7202 }, { - "epoch": 0.41, - "grad_norm": 0.3385399492527422, - "learning_rate": 1.3218726278956117e-05, - "loss": 0.2911, + "epoch": 0.33, + "grad_norm": 0.33908555835291326, + "learning_rate": 1.561632884280928e-05, + "loss": 0.2192, "step": 7203 }, { - "epoch": 0.41, - "grad_norm": 0.39860811495487514, - "learning_rate": 1.32169643428557e-05, - "loss": 0.393, + "epoch": 0.33, + "grad_norm": 0.7707277378235458, + "learning_rate": 1.5615097696288605e-05, + "loss": 0.4571, "step": 7204 }, { - "epoch": 0.41, - "grad_norm": 0.25190821405668745, - "learning_rate": 1.3215202295351946e-05, - "loss": 0.1631, + "epoch": 0.33, + "grad_norm": 0.9867961497140618, + "learning_rate": 1.561386642545508e-05, + "loss": 0.5279, "step": 7205 }, { - "epoch": 0.41, - "grad_norm": 0.5825783438242356, - "learning_rate": 1.3213440136505872e-05, - "loss": 0.3373, + "epoch": 0.33, + "grad_norm": 0.3560124344464534, + "learning_rate": 1.5612635030335966e-05, + "loss": 0.2779, "step": 7206 }, { - "epoch": 0.41, - "grad_norm": 0.46201854347371046, - "learning_rate": 1.3211677866378505e-05, - "loss": 0.3525, + "epoch": 0.33, + "grad_norm": 0.7713873920601063, + "learning_rate": 1.561140351095852e-05, + "loss": 0.4286, "step": 7207 }, { - "epoch": 0.41, - "grad_norm": 0.33494447559061813, - "learning_rate": 1.3209915485030872e-05, - "loss": 0.2768, + "epoch": 0.33, + "grad_norm": 0.21018184911171545, + "learning_rate": 1.561017186735001e-05, + "loss": 0.071, "step": 7208 }, { - "epoch": 0.41, - "grad_norm": 0.5209714849109807, - "learning_rate": 1.3208152992524004e-05, - "loss": 0.334, + "epoch": 0.33, + "grad_norm": 0.33550706806427805, + "learning_rate": 1.56089400995377e-05, + "loss": 0.2474, "step": 7209 }, { - "epoch": 0.41, - "grad_norm": 0.37421682429920466, - "learning_rate": 1.3206390388918937e-05, - "loss": 0.3097, + "epoch": 0.33, + "grad_norm": 0.4218696544981165, + "learning_rate": 1.560770820754886e-05, + "loss": 0.329, "step": 7210 }, { - "epoch": 0.41, - "grad_norm": 0.30114301634692187, - "learning_rate": 1.3204627674276706e-05, - "loss": 0.2041, + "epoch": 0.33, + "grad_norm": 0.5104159400023495, + "learning_rate": 1.5606476191410772e-05, + "loss": 0.3157, "step": 7211 }, { - "epoch": 0.41, - "grad_norm": 0.30929089763537454, - "learning_rate": 1.320286484865836e-05, - "loss": 0.2373, + "epoch": 0.33, + "grad_norm": 0.3875085172092279, + "learning_rate": 1.5605244051150698e-05, + "loss": 0.2946, "step": 7212 }, { - "epoch": 0.41, - "grad_norm": 0.5349211331103826, - "learning_rate": 1.3201101912124938e-05, - "loss": 0.4244, + "epoch": 0.33, + "grad_norm": 0.7809115165370621, + "learning_rate": 1.5604011786795927e-05, + "loss": 0.4467, "step": 7213 }, { - "epoch": 0.41, - "grad_norm": 0.7905658849528135, - "learning_rate": 1.31993388647375e-05, - "loss": 0.4311, + "epoch": 0.33, + "grad_norm": 0.27610778806682057, + "learning_rate": 1.5602779398373735e-05, + "loss": 0.1915, "step": 7214 }, { - "epoch": 0.41, - "grad_norm": 0.36725135920394, - "learning_rate": 1.3197575706557089e-05, - "loss": 0.2345, + "epoch": 0.33, + "grad_norm": 0.3759967090479265, + "learning_rate": 1.5601546885911406e-05, + "loss": 0.2962, "step": 7215 }, { - "epoch": 0.41, - "grad_norm": 0.32845897394315215, - "learning_rate": 1.3195812437644771e-05, - "loss": 0.3105, + "epoch": 0.33, + "grad_norm": 0.4907483496752828, + "learning_rate": 1.560031424943623e-05, + "loss": 0.33, "step": 7216 }, { - "epoch": 0.41, - "grad_norm": 0.29800837631567817, - "learning_rate": 1.3194049058061606e-05, - "loss": 0.1937, + "epoch": 0.33, + "grad_norm": 0.36305100430257287, + "learning_rate": 1.559908148897549e-05, + "loss": 0.257, "step": 7217 }, { - "epoch": 0.41, - "grad_norm": 0.3439065437611227, - "learning_rate": 1.3192285567868662e-05, - "loss": 0.2248, + "epoch": 0.33, + "grad_norm": 0.40223797398827604, + "learning_rate": 1.5597848604556488e-05, + "loss": 0.2926, "step": 7218 }, { - "epoch": 0.41, - "grad_norm": 0.5481495321388145, - "learning_rate": 1.3190521967127e-05, - "loss": 0.425, + "epoch": 0.33, + "grad_norm": 0.8040309229597306, + "learning_rate": 1.5596615596206512e-05, + "loss": 0.4309, "step": 7219 }, { - "epoch": 0.41, - "grad_norm": 0.5796551641203623, - "learning_rate": 1.3188758255897705e-05, - "loss": 0.3267, + "epoch": 0.33, + "grad_norm": 0.40595266153971077, + "learning_rate": 1.5595382463952858e-05, + "loss": 0.2479, "step": 7220 }, { - "epoch": 0.41, - "grad_norm": 0.5415045151729231, - "learning_rate": 1.3186994434241845e-05, - "loss": 0.2433, + "epoch": 0.33, + "grad_norm": 0.2827865230782367, + "learning_rate": 1.559414920782283e-05, + "loss": 0.1776, "step": 7221 }, { - "epoch": 0.41, - "grad_norm": 0.5321969246900784, - "learning_rate": 1.3185230502220508e-05, - "loss": 0.3406, + "epoch": 0.33, + "grad_norm": 0.4094213171883501, + "learning_rate": 1.559291582784373e-05, + "loss": 0.3498, "step": 7222 }, { - "epoch": 0.41, - "grad_norm": 0.2422636781051529, - "learning_rate": 1.3183466459894774e-05, - "loss": 0.2162, + "epoch": 0.33, + "grad_norm": 0.8675898338255126, + "learning_rate": 1.559168232404287e-05, + "loss": 0.5771, "step": 7223 }, { - "epoch": 0.42, - "grad_norm": 0.4012142713831807, - "learning_rate": 1.3181702307325732e-05, - "loss": 0.3343, + "epoch": 0.33, + "grad_norm": 0.3984800330069923, + "learning_rate": 1.5590448696447545e-05, + "loss": 0.2487, "step": 7224 }, { - "epoch": 0.42, - "grad_norm": 0.4794094927724806, - "learning_rate": 1.3179938044574478e-05, - "loss": 0.2723, + "epoch": 0.33, + "grad_norm": 0.44839662806816233, + "learning_rate": 1.5589214945085076e-05, + "loss": 0.3634, "step": 7225 }, { - "epoch": 0.42, - "grad_norm": 0.9303568882746546, - "learning_rate": 1.3178173671702106e-05, - "loss": 0.4344, + "epoch": 0.33, + "grad_norm": 0.5079989575213073, + "learning_rate": 1.5587981069982775e-05, + "loss": 0.3815, "step": 7226 }, { - "epoch": 0.42, - "grad_norm": 0.6859661533697369, - "learning_rate": 1.3176409188769715e-05, - "loss": 0.4222, + "epoch": 0.33, + "grad_norm": 0.20657435904280966, + "learning_rate": 1.5586747071167962e-05, + "loss": 0.1468, "step": 7227 }, { - "epoch": 0.42, - "grad_norm": 0.27217942492278485, - "learning_rate": 1.3174644595838411e-05, - "loss": 0.2338, + "epoch": 0.33, + "grad_norm": 1.395722828491078, + "learning_rate": 1.5585512948667947e-05, + "loss": 0.765, "step": 7228 }, { - "epoch": 0.42, - "grad_norm": 0.3033095269716523, - "learning_rate": 1.3172879892969302e-05, - "loss": 0.1795, + "epoch": 0.33, + "grad_norm": 0.5152327862917196, + "learning_rate": 1.5584278702510064e-05, + "loss": 0.364, "step": 7229 }, { - "epoch": 0.42, - "grad_norm": 0.7835735876374561, - "learning_rate": 1.3171115080223498e-05, - "loss": 0.4627, + "epoch": 0.33, + "grad_norm": 0.2845438497631263, + "learning_rate": 1.558304433272163e-05, + "loss": 0.2385, "step": 7230 }, { - "epoch": 0.42, - "grad_norm": 0.6595555477893355, - "learning_rate": 1.3169350157662115e-05, - "loss": 0.2827, + "epoch": 0.33, + "grad_norm": 1.1032499243277467, + "learning_rate": 1.558180983932998e-05, + "loss": 0.5609, "step": 7231 }, { - "epoch": 0.42, - "grad_norm": 0.45468179409236015, - "learning_rate": 1.3167585125346271e-05, - "loss": 0.3494, + "epoch": 0.33, + "grad_norm": 0.4557111944237515, + "learning_rate": 1.5580575222362435e-05, + "loss": 0.2997, "step": 7232 }, { - "epoch": 0.42, - "grad_norm": 0.6287105703435774, - "learning_rate": 1.3165819983337093e-05, - "loss": 0.3983, + "epoch": 0.33, + "grad_norm": 0.383603110348889, + "learning_rate": 1.5579340481846338e-05, + "loss": 0.2532, "step": 7233 }, { - "epoch": 0.42, - "grad_norm": 0.4100430193967899, - "learning_rate": 1.3164054731695706e-05, - "loss": 0.2633, + "epoch": 0.33, + "grad_norm": 0.3359675817572818, + "learning_rate": 1.5578105617809013e-05, + "loss": 0.238, "step": 7234 }, { - "epoch": 0.42, - "grad_norm": 0.3711852345127576, - "learning_rate": 1.3162289370483239e-05, - "loss": 0.2647, + "epoch": 0.33, + "grad_norm": 0.7486837059712673, + "learning_rate": 1.557687063027781e-05, + "loss": 0.45, "step": 7235 }, { - "epoch": 0.42, - "grad_norm": 0.37139285226115515, - "learning_rate": 1.3160523899760824e-05, - "loss": 0.3054, + "epoch": 0.33, + "grad_norm": 0.4398333842353121, + "learning_rate": 1.557563551928007e-05, + "loss": 0.2762, "step": 7236 }, { - "epoch": 0.42, - "grad_norm": 0.4224536498097396, - "learning_rate": 1.3158758319589604e-05, - "loss": 0.3383, + "epoch": 0.33, + "grad_norm": 0.36979728767995995, + "learning_rate": 1.5574400284843127e-05, + "loss": 0.2753, "step": 7237 }, { - "epoch": 0.42, - "grad_norm": 0.5765797692400455, - "learning_rate": 1.3156992630030719e-05, - "loss": 0.2837, + "epoch": 0.33, + "grad_norm": 0.43559393296177684, + "learning_rate": 1.5573164926994338e-05, + "loss": 0.3262, "step": 7238 }, { - "epoch": 0.42, - "grad_norm": 0.38557712218290724, - "learning_rate": 1.3155226831145316e-05, - "loss": 0.2873, + "epoch": 0.33, + "grad_norm": 0.3998039766746459, + "learning_rate": 1.5571929445761048e-05, + "loss": 0.3135, "step": 7239 }, { - "epoch": 0.42, - "grad_norm": 0.31621584525691937, - "learning_rate": 1.3153460922994543e-05, - "loss": 0.2885, + "epoch": 0.33, + "grad_norm": 0.46343463954721115, + "learning_rate": 1.5570693841170613e-05, + "loss": 0.2523, "step": 7240 }, { - "epoch": 0.42, - "grad_norm": 0.29877105273955934, - "learning_rate": 1.3151694905639553e-05, - "loss": 0.164, + "epoch": 0.33, + "grad_norm": 0.5534276790453356, + "learning_rate": 1.556945811325038e-05, + "loss": 0.3947, "step": 7241 }, { - "epoch": 0.42, - "grad_norm": 0.5334188626320423, - "learning_rate": 1.3149928779141506e-05, - "loss": 0.4122, + "epoch": 0.33, + "grad_norm": 0.303043670407606, + "learning_rate": 1.5568222262027716e-05, + "loss": 0.2527, "step": 7242 }, { - "epoch": 0.42, - "grad_norm": 0.3828319018088226, - "learning_rate": 1.3148162543561557e-05, - "loss": 0.3108, + "epoch": 0.33, + "grad_norm": 0.31537841830908697, + "learning_rate": 1.5566986287529976e-05, + "loss": 0.1797, "step": 7243 }, { - "epoch": 0.42, - "grad_norm": 0.27405350590100247, - "learning_rate": 1.3146396198960881e-05, - "loss": 0.2322, + "epoch": 0.33, + "grad_norm": 1.174326367371531, + "learning_rate": 1.5565750189784528e-05, + "loss": 0.5412, "step": 7244 }, { - "epoch": 0.42, - "grad_norm": 0.7653235271564569, - "learning_rate": 1.3144629745400632e-05, - "loss": 0.3892, + "epoch": 0.33, + "grad_norm": 0.29494614213006176, + "learning_rate": 1.5564513968818733e-05, + "loss": 0.2231, "step": 7245 }, { - "epoch": 0.42, - "grad_norm": 0.40187932943275734, - "learning_rate": 1.3142863182941996e-05, - "loss": 0.2975, + "epoch": 0.33, + "grad_norm": 0.4217980039726196, + "learning_rate": 1.5563277624659962e-05, + "loss": 0.339, "step": 7246 }, { - "epoch": 0.42, - "grad_norm": 0.3764866541385056, - "learning_rate": 1.3141096511646141e-05, - "loss": 0.256, + "epoch": 0.33, + "grad_norm": 0.8200991445494084, + "learning_rate": 1.5562041157335587e-05, + "loss": 0.3719, "step": 7247 }, { - "epoch": 0.42, - "grad_norm": 0.5491287247419514, - "learning_rate": 1.3139329731574248e-05, - "loss": 0.402, + "epoch": 0.33, + "grad_norm": 0.40060375451114766, + "learning_rate": 1.556080456687298e-05, + "loss": 0.2718, "step": 7248 }, { - "epoch": 0.42, - "grad_norm": 0.33817869550264207, - "learning_rate": 1.3137562842787502e-05, - "loss": 0.314, + "epoch": 0.33, + "grad_norm": 0.4415866101219472, + "learning_rate": 1.555956785329952e-05, + "loss": 0.2416, "step": 7249 }, { - "epoch": 0.42, - "grad_norm": 1.1455881396931549, - "learning_rate": 1.3135795845347091e-05, - "loss": 0.786, + "epoch": 0.33, + "grad_norm": 0.3792975682597965, + "learning_rate": 1.5558331016642586e-05, + "loss": 0.2779, "step": 7250 }, { - "epoch": 0.42, - "grad_norm": 0.260305077900469, - "learning_rate": 1.3134028739314204e-05, - "loss": 0.1759, + "epoch": 0.33, + "grad_norm": 0.37915029551331114, + "learning_rate": 1.5557094056929566e-05, + "loss": 0.286, "step": 7251 }, { - "epoch": 0.42, - "grad_norm": 0.3841956692601113, - "learning_rate": 1.3132261524750038e-05, - "loss": 0.2806, + "epoch": 0.33, + "grad_norm": 1.9586424349610925, + "learning_rate": 1.555585697418783e-05, + "loss": 0.4531, "step": 7252 }, { - "epoch": 0.42, - "grad_norm": 1.0728971327464343, - "learning_rate": 1.3130494201715786e-05, - "loss": 0.5737, + "epoch": 0.33, + "grad_norm": 0.3824188545006304, + "learning_rate": 1.5554619768444784e-05, + "loss": 0.3059, "step": 7253 }, { - "epoch": 0.42, - "grad_norm": 0.4996657504738954, - "learning_rate": 1.312872677027266e-05, - "loss": 0.2811, + "epoch": 0.33, + "grad_norm": 0.41475810516595324, + "learning_rate": 1.5553382439727803e-05, + "loss": 0.2916, "step": 7254 }, { - "epoch": 0.42, - "grad_norm": 0.4259459115097978, - "learning_rate": 1.3126959230481855e-05, - "loss": 0.3429, + "epoch": 0.33, + "grad_norm": 0.27718589411617034, + "learning_rate": 1.5552144988064292e-05, + "loss": 0.2064, "step": 7255 }, { - "epoch": 0.42, - "grad_norm": 0.3836148834285065, - "learning_rate": 1.312519158240459e-05, - "loss": 0.3439, + "epoch": 0.33, + "grad_norm": 1.0152361741711482, + "learning_rate": 1.5550907413481643e-05, + "loss": 0.3491, "step": 7256 }, { - "epoch": 0.42, - "grad_norm": 0.195603144277449, - "learning_rate": 1.3123423826102074e-05, - "loss": 0.1038, + "epoch": 0.33, + "grad_norm": 0.445658861119678, + "learning_rate": 1.554966971600725e-05, + "loss": 0.2971, "step": 7257 }, { - "epoch": 0.42, - "grad_norm": 0.3946116029677441, - "learning_rate": 1.3121655961635523e-05, - "loss": 0.3305, + "epoch": 0.33, + "grad_norm": 0.41529695397042315, + "learning_rate": 1.5548431895668515e-05, + "loss": 0.3225, "step": 7258 }, { - "epoch": 0.42, - "grad_norm": 0.429020225728886, - "learning_rate": 1.311988798906616e-05, - "loss": 0.3417, + "epoch": 0.33, + "grad_norm": 0.9078236749585062, + "learning_rate": 1.5547193952492856e-05, + "loss": 0.5104, "step": 7259 }, { - "epoch": 0.42, - "grad_norm": 0.43981050593951315, - "learning_rate": 1.3118119908455214e-05, - "loss": 0.2962, + "epoch": 0.33, + "grad_norm": 0.35151250102631537, + "learning_rate": 1.554595588650766e-05, + "loss": 0.2266, "step": 7260 }, { - "epoch": 0.42, - "grad_norm": 0.36302632134687873, - "learning_rate": 1.3116351719863906e-05, - "loss": 0.297, + "epoch": 0.33, + "grad_norm": 0.27864828666307884, + "learning_rate": 1.554471769774035e-05, + "loss": 0.2153, "step": 7261 }, { - "epoch": 0.42, - "grad_norm": 0.44066367931176303, - "learning_rate": 1.3114583423353476e-05, - "loss": 0.3472, + "epoch": 0.33, + "grad_norm": 1.68318183005361, + "learning_rate": 1.5543479386218334e-05, + "loss": 0.784, "step": 7262 }, { - "epoch": 0.42, - "grad_norm": 0.301064575674817, - "learning_rate": 1.3112815018985154e-05, - "loss": 0.2503, + "epoch": 0.33, + "grad_norm": 0.34525833982700205, + "learning_rate": 1.5542240951969028e-05, + "loss": 0.2149, "step": 7263 }, { - "epoch": 0.42, - "grad_norm": 0.31585777990855474, - "learning_rate": 1.311104650682018e-05, - "loss": 0.2119, + "epoch": 0.33, + "grad_norm": 0.8238686503566567, + "learning_rate": 1.5541002395019847e-05, + "loss": 0.4544, "step": 7264 }, { - "epoch": 0.42, - "grad_norm": 0.7170533675281239, - "learning_rate": 1.3109277886919802e-05, - "loss": 0.4184, + "epoch": 0.33, + "grad_norm": 0.3939276721743022, + "learning_rate": 1.5539763715398215e-05, + "loss": 0.3259, "step": 7265 }, { - "epoch": 0.42, - "grad_norm": 0.7914186842307775, - "learning_rate": 1.3107509159345262e-05, - "loss": 0.5291, + "epoch": 0.33, + "grad_norm": 0.3980155903036159, + "learning_rate": 1.5538524913131556e-05, + "loss": 0.2277, "step": 7266 }, { - "epoch": 0.42, - "grad_norm": 0.267323184964963, - "learning_rate": 1.3105740324157817e-05, - "loss": 0.2167, + "epoch": 0.33, + "grad_norm": 0.31045819182200635, + "learning_rate": 1.5537285988247285e-05, + "loss": 0.1754, "step": 7267 }, { - "epoch": 0.42, - "grad_norm": 0.4627191275190162, - "learning_rate": 1.3103971381418713e-05, - "loss": 0.3843, + "epoch": 0.33, + "grad_norm": 0.8494943481266864, + "learning_rate": 1.5536046940772848e-05, + "loss": 0.4817, "step": 7268 }, { - "epoch": 0.42, - "grad_norm": 0.2892535892426708, - "learning_rate": 1.310220233118922e-05, - "loss": 0.1798, + "epoch": 0.33, + "grad_norm": 0.3808360227914345, + "learning_rate": 1.5534807770735663e-05, + "loss": 0.2822, "step": 7269 }, { - "epoch": 0.42, - "grad_norm": 0.42638022005244247, - "learning_rate": 1.3100433173530589e-05, - "loss": 0.222, + "epoch": 0.33, + "grad_norm": 0.400736253809875, + "learning_rate": 1.5533568478163172e-05, + "loss": 0.3085, "step": 7270 }, { - "epoch": 0.42, - "grad_norm": 0.6288156366173488, - "learning_rate": 1.3098663908504091e-05, - "loss": 0.3793, + "epoch": 0.33, + "grad_norm": 1.0427840238340422, + "learning_rate": 1.5532329063082806e-05, + "loss": 0.6733, "step": 7271 }, { - "epoch": 0.42, - "grad_norm": 0.5008266918389817, - "learning_rate": 1.3096894536170994e-05, - "loss": 0.3658, + "epoch": 0.33, + "grad_norm": 0.7706327793184947, + "learning_rate": 1.5531089525522006e-05, + "loss": 0.4136, "step": 7272 }, { - "epoch": 0.42, - "grad_norm": 0.34497131150322446, - "learning_rate": 1.3095125056592575e-05, - "loss": 0.2732, + "epoch": 0.33, + "grad_norm": 0.2513051568602051, + "learning_rate": 1.5529849865508215e-05, + "loss": 0.2204, "step": 7273 }, { - "epoch": 0.42, - "grad_norm": 0.9667430455753272, - "learning_rate": 1.3093355469830107e-05, - "loss": 0.5544, + "epoch": 0.33, + "grad_norm": 0.46764522216603815, + "learning_rate": 1.5528610083068877e-05, + "loss": 0.3368, "step": 7274 }, { - "epoch": 0.42, - "grad_norm": 0.2635365400899789, - "learning_rate": 1.3091585775944873e-05, - "loss": 0.2102, + "epoch": 0.33, + "grad_norm": 0.5585406392653689, + "learning_rate": 1.552737017823144e-05, + "loss": 0.384, "step": 7275 }, { - "epoch": 0.42, - "grad_norm": 0.3788616946807546, - "learning_rate": 1.3089815974998154e-05, - "loss": 0.275, + "epoch": 0.33, + "grad_norm": 0.3797885734576441, + "learning_rate": 1.5526130151023358e-05, + "loss": 0.2823, "step": 7276 }, { - "epoch": 0.42, - "grad_norm": 0.897185868184525, - "learning_rate": 1.3088046067051243e-05, - "loss": 0.3707, + "epoch": 0.33, + "grad_norm": 0.460146659543978, + "learning_rate": 1.5524890001472076e-05, + "loss": 0.3282, "step": 7277 }, { - "epoch": 0.42, - "grad_norm": 0.9592855330664415, - "learning_rate": 1.308627605216543e-05, - "loss": 0.624, + "epoch": 0.33, + "grad_norm": 0.43894175133531294, + "learning_rate": 1.552364972960506e-05, + "loss": 0.2927, "step": 7278 }, { - "epoch": 0.42, - "grad_norm": 0.347671499106493, - "learning_rate": 1.308450593040201e-05, - "loss": 0.2543, + "epoch": 0.33, + "grad_norm": 0.24954728538905976, + "learning_rate": 1.552240933544976e-05, + "loss": 0.1691, "step": 7279 }, { - "epoch": 0.42, - "grad_norm": 0.4070033059546702, - "learning_rate": 1.3082735701822281e-05, - "loss": 0.2806, + "epoch": 0.33, + "grad_norm": 0.7539887492684836, + "learning_rate": 1.5521168819033642e-05, + "loss": 0.5176, "step": 7280 }, { - "epoch": 0.42, - "grad_norm": 0.30304605216105446, - "learning_rate": 1.3080965366487548e-05, - "loss": 0.1902, + "epoch": 0.33, + "grad_norm": 0.3402457634166276, + "learning_rate": 1.5519928180384164e-05, + "loss": 0.272, "step": 7281 }, { - "epoch": 0.42, - "grad_norm": 0.45872537263094, - "learning_rate": 1.3079194924459118e-05, - "loss": 0.2992, + "epoch": 0.33, + "grad_norm": 0.398858163035011, + "learning_rate": 1.5518687419528794e-05, + "loss": 0.3225, "step": 7282 }, { - "epoch": 0.42, - "grad_norm": 0.5951235637057674, - "learning_rate": 1.3077424375798295e-05, - "loss": 0.2988, - "step": 7283 + "epoch": 0.33, + "grad_norm": 1.0527400418521429, + "learning_rate": 1.551744653649501e-05, + "loss": 0.4963, + "step": 7283 }, { - "epoch": 0.42, - "grad_norm": 1.182568248414101, - "learning_rate": 1.3075653720566404e-05, - "loss": 0.7021, + "epoch": 0.33, + "grad_norm": 0.4876798596333133, + "learning_rate": 1.5516205531310272e-05, + "loss": 0.293, "step": 7284 }, { - "epoch": 0.42, - "grad_norm": 0.34373368560518014, - "learning_rate": 1.3073882958824755e-05, - "loss": 0.2923, + "epoch": 0.33, + "grad_norm": 0.42954597479704787, + "learning_rate": 1.5514964404002066e-05, + "loss": 0.325, "step": 7285 }, { - "epoch": 0.42, - "grad_norm": 1.157375136004901, - "learning_rate": 1.307211209063467e-05, - "loss": 0.7641, + "epoch": 0.33, + "grad_norm": 0.3277277479915988, + "learning_rate": 1.5513723154597858e-05, + "loss": 0.2122, "step": 7286 }, { - "epoch": 0.42, - "grad_norm": 0.2623129816600877, - "learning_rate": 1.3070341116057476e-05, - "loss": 0.2098, + "epoch": 0.33, + "grad_norm": 0.42357581791313226, + "learning_rate": 1.5512481783125134e-05, + "loss": 0.3186, "step": 7287 }, { - "epoch": 0.42, - "grad_norm": 0.3824276930333361, - "learning_rate": 1.3068570035154503e-05, - "loss": 0.2981, + "epoch": 0.33, + "grad_norm": 1.7015549735365534, + "learning_rate": 1.551124028961138e-05, + "loss": 0.4452, "step": 7288 }, { - "epoch": 0.42, - "grad_norm": 0.4109022486432033, - "learning_rate": 1.306679884798708e-05, - "loss": 0.2912, + "epoch": 0.33, + "grad_norm": 0.36763289855824954, + "learning_rate": 1.5509998674084076e-05, + "loss": 0.272, "step": 7289 }, { - "epoch": 0.42, - "grad_norm": 0.5582932916423002, - "learning_rate": 1.3065027554616547e-05, - "loss": 0.302, + "epoch": 0.33, + "grad_norm": 0.6470444214710763, + "learning_rate": 1.550875693657071e-05, + "loss": 0.3612, "step": 7290 }, { - "epoch": 0.42, - "grad_norm": 0.34401297473123493, - "learning_rate": 1.3063256155104239e-05, - "loss": 0.2613, + "epoch": 0.33, + "grad_norm": 0.44763101653933857, + "learning_rate": 1.5507515077098776e-05, + "loss": 0.365, "step": 7291 }, { - "epoch": 0.42, - "grad_norm": 0.5353972926544106, - "learning_rate": 1.3061484649511503e-05, - "loss": 0.4175, + "epoch": 0.33, + "grad_norm": 0.3321645545268741, + "learning_rate": 1.5506273095695767e-05, + "loss": 0.2654, "step": 7292 }, { - "epoch": 0.42, - "grad_norm": 0.5389049209563237, - "learning_rate": 1.3059713037899683e-05, - "loss": 0.3414, + "epoch": 0.34, + "grad_norm": 0.409088843036453, + "learning_rate": 1.550503099238918e-05, + "loss": 0.3239, "step": 7293 }, { - "epoch": 0.42, - "grad_norm": 0.40682939769048926, - "learning_rate": 1.3057941320330134e-05, - "loss": 0.2576, + "epoch": 0.34, + "grad_norm": 0.3434822726566993, + "learning_rate": 1.5503788767206512e-05, + "loss": 0.2453, "step": 7294 }, { - "epoch": 0.42, - "grad_norm": 0.270271026585561, - "learning_rate": 1.3056169496864208e-05, - "loss": 0.2742, + "epoch": 0.34, + "grad_norm": 1.2835784228491813, + "learning_rate": 1.5502546420175266e-05, + "loss": 0.7984, "step": 7295 }, { - "epoch": 0.42, - "grad_norm": 0.28083438824338586, - "learning_rate": 1.3054397567563266e-05, - "loss": 0.134, + "epoch": 0.34, + "grad_norm": 0.480717554256865, + "learning_rate": 1.5501303951322942e-05, + "loss": 0.1235, "step": 7296 }, { - "epoch": 0.42, - "grad_norm": 0.3797228091663637, - "learning_rate": 1.3052625532488663e-05, - "loss": 0.2747, + "epoch": 0.34, + "grad_norm": 0.3081454384504014, + "learning_rate": 1.5500061360677055e-05, + "loss": 0.2864, "step": 7297 }, { - "epoch": 0.42, - "grad_norm": 0.8666369485609857, - "learning_rate": 1.3050853391701774e-05, - "loss": 0.6257, + "epoch": 0.34, + "grad_norm": 0.6629919427998986, + "learning_rate": 1.549881864826511e-05, + "loss": 0.466, "step": 7298 }, { - "epoch": 0.42, - "grad_norm": 0.3821053180321841, - "learning_rate": 1.304908114526396e-05, - "loss": 0.3131, + "epoch": 0.34, + "grad_norm": 0.255953260423203, + "learning_rate": 1.5497575814114615e-05, + "loss": 0.1495, "step": 7299 }, { - "epoch": 0.42, - "grad_norm": 0.327705833281437, - "learning_rate": 1.3047308793236599e-05, - "loss": 0.2423, + "epoch": 0.34, + "grad_norm": 0.5778440832622904, + "learning_rate": 1.5496332858253095e-05, + "loss": 0.3863, "step": 7300 }, { - "epoch": 0.42, - "grad_norm": 0.32553038184470945, - "learning_rate": 1.3045536335681064e-05, - "loss": 0.2019, + "epoch": 0.34, + "grad_norm": 0.45265301241439254, + "learning_rate": 1.5495089780708062e-05, + "loss": 0.3496, "step": 7301 }, { - "epoch": 0.42, - "grad_norm": 1.0963164215457855, - "learning_rate": 1.3043763772658739e-05, - "loss": 0.7163, + "epoch": 0.34, + "grad_norm": 0.39439851004760107, + "learning_rate": 1.5493846581507037e-05, + "loss": 0.2316, "step": 7302 }, { - "epoch": 0.42, - "grad_norm": 0.2779405820004573, - "learning_rate": 1.3041991104231004e-05, - "loss": 0.2391, + "epoch": 0.34, + "grad_norm": 0.5251924834347689, + "learning_rate": 1.5492603260677543e-05, + "loss": 0.3958, "step": 7303 }, { - "epoch": 0.42, - "grad_norm": 1.1690442501518032, - "learning_rate": 1.3040218330459249e-05, - "loss": 0.795, + "epoch": 0.34, + "grad_norm": 0.7488466539809208, + "learning_rate": 1.549135981824711e-05, + "loss": 0.4411, "step": 7304 }, { - "epoch": 0.42, - "grad_norm": 0.6931092185091348, - "learning_rate": 1.3038445451404862e-05, - "loss": 0.4273, + "epoch": 0.34, + "grad_norm": 0.3270073220641253, + "learning_rate": 1.5490116254243258e-05, + "loss": 0.2137, "step": 7305 }, { - "epoch": 0.42, - "grad_norm": 0.33132146745020336, - "learning_rate": 1.3036672467129241e-05, - "loss": 0.2127, + "epoch": 0.34, + "grad_norm": 0.37670808538205175, + "learning_rate": 1.5488872568693527e-05, + "loss": 0.2387, "step": 7306 }, { - "epoch": 0.42, - "grad_norm": 0.3629514950779212, - "learning_rate": 1.3034899377693782e-05, - "loss": 0.313, + "epoch": 0.34, + "grad_norm": 1.0137888817084433, + "learning_rate": 1.5487628761625447e-05, + "loss": 0.7319, "step": 7307 }, { - "epoch": 0.42, - "grad_norm": 0.297556760270983, - "learning_rate": 1.3033126183159887e-05, - "loss": 0.2063, + "epoch": 0.34, + "grad_norm": 1.072991964754532, + "learning_rate": 1.5486384833066557e-05, + "loss": 0.4958, "step": 7308 }, { - "epoch": 0.42, - "grad_norm": 0.2941708532876371, - "learning_rate": 1.3031352883588965e-05, - "loss": 0.2085, + "epoch": 0.34, + "grad_norm": 0.30827171842582807, + "learning_rate": 1.548514078304439e-05, + "loss": 0.2322, "step": 7309 }, { - "epoch": 0.42, - "grad_norm": 0.7852519937723851, - "learning_rate": 1.3029579479042423e-05, - "loss": 0.5261, + "epoch": 0.34, + "grad_norm": 0.6801808728909504, + "learning_rate": 1.5483896611586494e-05, + "loss": 0.4395, "step": 7310 }, { - "epoch": 0.42, - "grad_norm": 0.4725984968530518, - "learning_rate": 1.3027805969581674e-05, - "loss": 0.327, + "epoch": 0.34, + "grad_norm": 0.29364941078367696, + "learning_rate": 1.5482652318720418e-05, + "loss": 0.1574, "step": 7311 }, { - "epoch": 0.42, - "grad_norm": 0.758050405985813, - "learning_rate": 1.3026032355268132e-05, - "loss": 0.3737, + "epoch": 0.34, + "grad_norm": 0.39312722141277956, + "learning_rate": 1.54814079044737e-05, + "loss": 0.2532, "step": 7312 }, { - "epoch": 0.42, - "grad_norm": 0.27727090117219044, - "learning_rate": 1.3024258636163221e-05, - "loss": 0.2148, + "epoch": 0.34, + "grad_norm": 0.44994090441442663, + "learning_rate": 1.5480163368873894e-05, + "loss": 0.3485, "step": 7313 }, { - "epoch": 0.42, - "grad_norm": 0.27413178777534775, - "learning_rate": 1.3022484812328365e-05, - "loss": 0.2706, + "epoch": 0.34, + "grad_norm": 0.8034370671407369, + "learning_rate": 1.547891871194855e-05, + "loss": 0.4271, "step": 7314 }, { - "epoch": 0.42, - "grad_norm": 0.5412617681591296, - "learning_rate": 1.3020710883824987e-05, - "loss": 0.3523, + "epoch": 0.34, + "grad_norm": 0.4445151350677581, + "learning_rate": 1.547767393372523e-05, + "loss": 0.2811, "step": 7315 }, { - "epoch": 0.42, - "grad_norm": 0.45275149176644774, - "learning_rate": 1.3018936850714524e-05, - "loss": 0.2881, + "epoch": 0.34, + "grad_norm": 1.6979193415557734, + "learning_rate": 1.5476429034231487e-05, + "loss": 0.8639, "step": 7316 }, { - "epoch": 0.42, - "grad_norm": 0.7923988739792034, - "learning_rate": 1.3017162713058404e-05, - "loss": 0.4677, + "epoch": 0.34, + "grad_norm": 0.3074120943064323, + "learning_rate": 1.5475184013494885e-05, + "loss": 0.2474, "step": 7317 }, { - "epoch": 0.42, - "grad_norm": 0.39895495917725043, - "learning_rate": 1.3015388470918072e-05, - "loss": 0.2999, + "epoch": 0.34, + "grad_norm": 0.3282388596142066, + "learning_rate": 1.5473938871542986e-05, + "loss": 0.1787, "step": 7318 }, { - "epoch": 0.42, - "grad_norm": 0.23533187055257196, - "learning_rate": 1.3013614124354969e-05, - "loss": 0.201, + "epoch": 0.34, + "grad_norm": 1.3040770555683425, + "learning_rate": 1.5472693608403355e-05, + "loss": 0.7642, "step": 7319 }, { - "epoch": 0.42, - "grad_norm": 0.41061255560760335, - "learning_rate": 1.3011839673430536e-05, - "loss": 0.2942, + "epoch": 0.34, + "grad_norm": 0.6894958110766907, + "learning_rate": 1.5471448224103563e-05, + "loss": 0.361, "step": 7320 }, { - "epoch": 0.42, - "grad_norm": 0.3836948488403221, - "learning_rate": 1.3010065118206223e-05, - "loss": 0.3036, + "epoch": 0.34, + "grad_norm": 0.4347101599532764, + "learning_rate": 1.547020271867118e-05, + "loss": 0.3015, "step": 7321 }, { - "epoch": 0.42, - "grad_norm": 0.8039043999491806, - "learning_rate": 1.3008290458743486e-05, - "loss": 0.3215, + "epoch": 0.34, + "grad_norm": 0.4254386839523883, + "learning_rate": 1.546895709213378e-05, + "loss": 0.3134, "step": 7322 }, { - "epoch": 0.42, - "grad_norm": 0.3526267061237842, - "learning_rate": 1.3006515695103779e-05, - "loss": 0.3402, + "epoch": 0.34, + "grad_norm": 0.3849815591436858, + "learning_rate": 1.5467711344518943e-05, + "loss": 0.1687, "step": 7323 }, { - "epoch": 0.42, - "grad_norm": 0.3874565419841915, - "learning_rate": 1.3004740827348563e-05, - "loss": 0.2971, + "epoch": 0.34, + "grad_norm": 0.38342134387308424, + "learning_rate": 1.5466465475854246e-05, + "loss": 0.2456, "step": 7324 }, { - "epoch": 0.42, - "grad_norm": 0.4916981175451748, - "learning_rate": 1.3002965855539303e-05, - "loss": 0.3836, + "epoch": 0.34, + "grad_norm": 0.469572052101874, + "learning_rate": 1.5465219486167273e-05, + "loss": 0.3026, "step": 7325 }, { - "epoch": 0.42, - "grad_norm": 0.1963129197249394, - "learning_rate": 1.300119077973746e-05, - "loss": 0.1841, + "epoch": 0.34, + "grad_norm": 1.1263510303295086, + "learning_rate": 1.5463973375485605e-05, + "loss": 0.4743, "step": 7326 }, { - "epoch": 0.42, - "grad_norm": 0.3374193053149697, - "learning_rate": 1.2999415600004515e-05, - "loss": 0.2758, + "epoch": 0.34, + "grad_norm": 0.39128333220231315, + "learning_rate": 1.5462727143836834e-05, + "loss": 0.2959, "step": 7327 }, { - "epoch": 0.42, - "grad_norm": 0.9488092760590442, - "learning_rate": 1.2997640316401934e-05, - "loss": 0.4981, + "epoch": 0.34, + "grad_norm": 0.3757492257109844, + "learning_rate": 1.5461480791248553e-05, + "loss": 0.2822, "step": 7328 }, { - "epoch": 0.42, - "grad_norm": 0.6829150533818018, - "learning_rate": 1.2995864928991198e-05, - "loss": 0.3648, + "epoch": 0.34, + "grad_norm": 0.5367070461021922, + "learning_rate": 1.5460234317748345e-05, + "loss": 0.2654, "step": 7329 }, { - "epoch": 0.42, - "grad_norm": 0.5376235231735698, - "learning_rate": 1.2994089437833788e-05, - "loss": 0.3335, + "epoch": 0.34, + "grad_norm": 0.2875925377917111, + "learning_rate": 1.545898772336382e-05, + "loss": 0.2352, "step": 7330 }, { - "epoch": 0.42, - "grad_norm": 0.30612015196427483, - "learning_rate": 1.2992313842991189e-05, - "loss": 0.2881, + "epoch": 0.34, + "grad_norm": 1.2084110881132275, + "learning_rate": 1.545774100812256e-05, + "loss": 0.418, "step": 7331 }, { - "epoch": 0.42, - "grad_norm": 0.35369907883631363, - "learning_rate": 1.2990538144524894e-05, - "loss": 0.2176, + "epoch": 0.34, + "grad_norm": 0.5133562420013291, + "learning_rate": 1.5456494172052175e-05, + "loss": 0.3603, "step": 7332 }, { - "epoch": 0.42, - "grad_norm": 0.6509986938076566, - "learning_rate": 1.2988762342496386e-05, - "loss": 0.366, + "epoch": 0.34, + "grad_norm": 0.3511194580199034, + "learning_rate": 1.5455247215180273e-05, + "loss": 0.3013, "step": 7333 }, { - "epoch": 0.42, - "grad_norm": 0.4988119608715147, - "learning_rate": 1.298698643696717e-05, - "loss": 0.3416, + "epoch": 0.34, + "grad_norm": 1.238638097809351, + "learning_rate": 1.5454000137534455e-05, + "loss": 0.6843, "step": 7334 }, { - "epoch": 0.42, - "grad_norm": 0.2772638591631809, - "learning_rate": 1.2985210427998743e-05, - "loss": 0.2226, + "epoch": 0.34, + "grad_norm": 0.34904139164487696, + "learning_rate": 1.545275293914233e-05, + "loss": 0.1749, "step": 7335 }, { - "epoch": 0.42, - "grad_norm": 0.3951269297273973, - "learning_rate": 1.2983434315652606e-05, - "loss": 0.2974, + "epoch": 0.34, + "grad_norm": 0.40864715840179605, + "learning_rate": 1.5451505620031505e-05, + "loss": 0.2927, "step": 7336 }, { - "epoch": 0.42, - "grad_norm": 0.5102487142923199, - "learning_rate": 1.2981658099990266e-05, - "loss": 0.3945, + "epoch": 0.34, + "grad_norm": 0.4305989599640417, + "learning_rate": 1.5450258180229606e-05, + "loss": 0.3232, "step": 7337 }, { - "epoch": 0.42, - "grad_norm": 0.4414157270716968, - "learning_rate": 1.2979881781073235e-05, - "loss": 0.3039, + "epoch": 0.34, + "grad_norm": 1.033748741272164, + "learning_rate": 1.544901061976424e-05, + "loss": 0.3634, "step": 7338 }, { - "epoch": 0.42, - "grad_norm": 0.2909371552105041, - "learning_rate": 1.2978105358963026e-05, - "loss": 0.2469, + "epoch": 0.34, + "grad_norm": 0.39759818288280063, + "learning_rate": 1.544776293866303e-05, + "loss": 0.2884, "step": 7339 }, { - "epoch": 0.42, - "grad_norm": 0.5649634546078088, - "learning_rate": 1.2976328833721157e-05, - "loss": 0.3579, + "epoch": 0.34, + "grad_norm": 0.3496864587160516, + "learning_rate": 1.5446515136953603e-05, + "loss": 0.2485, "step": 7340 }, { - "epoch": 0.42, - "grad_norm": 0.3848501314342395, - "learning_rate": 1.2974552205409147e-05, - "loss": 0.2769, + "epoch": 0.34, + "grad_norm": 0.39967120012355933, + "learning_rate": 1.544526721466358e-05, + "loss": 0.2078, "step": 7341 }, { - "epoch": 0.42, - "grad_norm": 0.3049961532656385, - "learning_rate": 1.2972775474088524e-05, - "loss": 0.2088, + "epoch": 0.34, + "grad_norm": 0.34444800664401537, + "learning_rate": 1.5444019171820588e-05, + "loss": 0.2487, "step": 7342 }, { - "epoch": 0.42, - "grad_norm": 0.34617722088590974, - "learning_rate": 1.297099863982081e-05, - "loss": 0.3243, + "epoch": 0.34, + "grad_norm": 0.8556852570631819, + "learning_rate": 1.544277100845226e-05, + "loss": 0.4622, "step": 7343 }, { - "epoch": 0.42, - "grad_norm": 0.7259079907052219, - "learning_rate": 1.2969221702667547e-05, - "loss": 0.5144, + "epoch": 0.34, + "grad_norm": 0.4586721112103685, + "learning_rate": 1.5441522724586225e-05, + "loss": 0.2816, "step": 7344 }, { - "epoch": 0.42, - "grad_norm": 0.38372525596694496, - "learning_rate": 1.2967444662690261e-05, - "loss": 0.2622, + "epoch": 0.34, + "grad_norm": 0.4092204511702058, + "learning_rate": 1.544027432025012e-05, + "loss": 0.298, "step": 7345 }, { - "epoch": 0.42, - "grad_norm": 0.5209288626419487, - "learning_rate": 1.2965667519950494e-05, - "loss": 0.3831, + "epoch": 0.34, + "grad_norm": 0.5581460267402473, + "learning_rate": 1.543902579547159e-05, + "loss": 0.3323, "step": 7346 }, { - "epoch": 0.42, - "grad_norm": 0.23884674114338472, - "learning_rate": 1.2963890274509789e-05, - "loss": 0.2358, + "epoch": 0.34, + "grad_norm": 1.5691261067187017, + "learning_rate": 1.5437777150278268e-05, + "loss": 0.8669, "step": 7347 }, { - "epoch": 0.42, - "grad_norm": 0.38743989121030176, - "learning_rate": 1.2962112926429691e-05, - "loss": 0.1331, + "epoch": 0.34, + "grad_norm": 0.36315022344227127, + "learning_rate": 1.54365283846978e-05, + "loss": 0.239, "step": 7348 }, { - "epoch": 0.42, - "grad_norm": 0.43142029339243576, - "learning_rate": 1.2960335475771748e-05, - "loss": 0.3201, + "epoch": 0.34, + "grad_norm": 0.4427822082767784, + "learning_rate": 1.5435279498757835e-05, + "loss": 0.3429, "step": 7349 }, { - "epoch": 0.42, - "grad_norm": 0.3556038132379808, - "learning_rate": 1.2958557922597516e-05, - "loss": 0.34, + "epoch": 0.34, + "grad_norm": 0.938060928123741, + "learning_rate": 1.5434030492486023e-05, + "loss": 0.6152, "step": 7350 }, { - "epoch": 0.42, - "grad_norm": 0.5629136553826217, - "learning_rate": 1.2956780266968552e-05, - "loss": 0.409, + "epoch": 0.34, + "grad_norm": 0.26164154313858956, + "learning_rate": 1.543278136591001e-05, + "loss": 0.1521, "step": 7351 }, { - "epoch": 0.42, - "grad_norm": 0.3375499747099566, - "learning_rate": 1.2955002508946413e-05, - "loss": 0.2461, + "epoch": 0.34, + "grad_norm": 1.86534581940236, + "learning_rate": 1.5431532119057454e-05, + "loss": 0.798, "step": 7352 }, { - "epoch": 0.42, - "grad_norm": 0.26063028491401785, - "learning_rate": 1.2953224648592664e-05, - "loss": 0.1594, + "epoch": 0.34, + "grad_norm": 0.43616199251134036, + "learning_rate": 1.543028275195601e-05, + "loss": 0.3096, "step": 7353 }, { - "epoch": 0.42, - "grad_norm": 0.3286104317910206, - "learning_rate": 1.2951446685968874e-05, - "loss": 0.2815, + "epoch": 0.34, + "grad_norm": 0.39084898288994674, + "learning_rate": 1.542903326463334e-05, + "loss": 0.2532, "step": 7354 }, { - "epoch": 0.42, - "grad_norm": 0.33363688183935714, - "learning_rate": 1.294966862113661e-05, - "loss": 0.2217, + "epoch": 0.34, + "grad_norm": 0.7460740534665524, + "learning_rate": 1.542778365711711e-05, + "loss": 0.4902, "step": 7355 }, { - "epoch": 0.42, - "grad_norm": 0.6185356603885251, - "learning_rate": 1.2947890454157448e-05, - "loss": 0.4454, + "epoch": 0.34, + "grad_norm": 0.3996947780335268, + "learning_rate": 1.542653392943498e-05, + "loss": 0.3127, "step": 7356 }, { - "epoch": 0.42, - "grad_norm": 0.5052412376086751, - "learning_rate": 1.294611218509297e-05, - "loss": 0.3758, + "epoch": 0.34, + "grad_norm": 0.3450781590464792, + "learning_rate": 1.542528408161462e-05, + "loss": 0.2014, "step": 7357 }, { - "epoch": 0.42, - "grad_norm": 0.31425887157261073, - "learning_rate": 1.2944333814004748e-05, - "loss": 0.2567, + "epoch": 0.34, + "grad_norm": 0.35199887402860136, + "learning_rate": 1.5424034113683697e-05, + "loss": 0.2251, "step": 7358 }, { - "epoch": 0.42, - "grad_norm": 0.3024510934501898, - "learning_rate": 1.2942555340954377e-05, - "loss": 0.2408, + "epoch": 0.34, + "grad_norm": 0.8443066001551855, + "learning_rate": 1.542278402566989e-05, + "loss": 0.461, "step": 7359 }, { - "epoch": 0.42, - "grad_norm": 0.2721480122115268, - "learning_rate": 1.294077676600344e-05, - "loss": 0.1685, + "epoch": 0.34, + "grad_norm": 0.4544159028718788, + "learning_rate": 1.5421533817600868e-05, + "loss": 0.2638, "step": 7360 }, { - "epoch": 0.42, - "grad_norm": 0.4334281254358977, - "learning_rate": 1.293899808921353e-05, - "loss": 0.3266, + "epoch": 0.34, + "grad_norm": 0.4018558336195311, + "learning_rate": 1.542028348950431e-05, + "loss": 0.2909, "step": 7361 }, { - "epoch": 0.42, - "grad_norm": 0.33199319188044935, - "learning_rate": 1.2937219310646242e-05, - "loss": 0.2896, + "epoch": 0.34, + "grad_norm": 1.2312465536476065, + "learning_rate": 1.5419033041407906e-05, + "loss": 0.7561, "step": 7362 }, { - "epoch": 0.42, - "grad_norm": 0.5113244424261909, - "learning_rate": 1.2935440430363177e-05, - "loss": 0.3779, + "epoch": 0.34, + "grad_norm": 0.38075197235910907, + "learning_rate": 1.5417782473339325e-05, + "loss": 0.2523, "step": 7363 }, { - "epoch": 0.42, - "grad_norm": 0.4415669571896982, - "learning_rate": 1.2933661448425933e-05, - "loss": 0.3187, + "epoch": 0.34, + "grad_norm": 0.24227287055955618, + "learning_rate": 1.5416531785326267e-05, + "loss": 0.1864, "step": 7364 }, { - "epoch": 0.42, - "grad_norm": 0.21104509788054693, - "learning_rate": 1.2931882364896125e-05, - "loss": 0.1266, + "epoch": 0.34, + "grad_norm": 1.0785080266234088, + "learning_rate": 1.5415280977396417e-05, + "loss": 0.5329, "step": 7365 }, { - "epoch": 0.42, - "grad_norm": 0.39429657285673275, - "learning_rate": 1.2930103179835352e-05, - "loss": 0.3014, + "epoch": 0.34, + "grad_norm": 0.35761441314569825, + "learning_rate": 1.5414030049577466e-05, + "loss": 0.2959, "step": 7366 }, { - "epoch": 0.42, - "grad_norm": 0.34096887677103693, - "learning_rate": 1.2928323893305233e-05, - "loss": 0.2779, + "epoch": 0.34, + "grad_norm": 0.9020323711900209, + "learning_rate": 1.5412779001897105e-05, + "loss": 0.3927, "step": 7367 }, { - "epoch": 0.42, - "grad_norm": 0.5398849722107039, - "learning_rate": 1.2926544505367384e-05, - "loss": 0.3547, + "epoch": 0.34, + "grad_norm": 0.40620721788680136, + "learning_rate": 1.5411527834383032e-05, + "loss": 0.3265, "step": 7368 }, { - "epoch": 0.42, - "grad_norm": 0.7583480793502161, - "learning_rate": 1.2924765016083427e-05, - "loss": 0.5297, + "epoch": 0.34, + "grad_norm": 0.3817695189331422, + "learning_rate": 1.5410276547062953e-05, + "loss": 0.2895, "step": 7369 }, { - "epoch": 0.42, - "grad_norm": 0.30692372439687576, - "learning_rate": 1.2922985425514977e-05, - "loss": 0.279, + "epoch": 0.34, + "grad_norm": 0.2760804363651166, + "learning_rate": 1.540902513996456e-05, + "loss": 0.0976, "step": 7370 }, { - "epoch": 0.42, - "grad_norm": 0.270890793342731, - "learning_rate": 1.2921205733723672e-05, - "loss": 0.1777, + "epoch": 0.34, + "grad_norm": 1.0737188966455447, + "learning_rate": 1.540777361311557e-05, + "loss": 0.4769, "step": 7371 }, { - "epoch": 0.42, - "grad_norm": 0.33009884553113095, - "learning_rate": 1.2919425940771138e-05, - "loss": 0.2584, + "epoch": 0.34, + "grad_norm": 0.3319918272140716, + "learning_rate": 1.5406521966543682e-05, + "loss": 0.2823, "step": 7372 }, { - "epoch": 0.42, - "grad_norm": 0.3529582845492279, - "learning_rate": 1.2917646046719007e-05, - "loss": 0.304, + "epoch": 0.34, + "grad_norm": 0.5099080476005865, + "learning_rate": 1.540527020027661e-05, + "loss": 0.3625, "step": 7373 }, { - "epoch": 0.42, - "grad_norm": 0.4422830853360155, - "learning_rate": 1.2915866051628923e-05, - "loss": 0.3514, + "epoch": 0.34, + "grad_norm": 0.8021986789118483, + "learning_rate": 1.540401831434206e-05, + "loss": 0.498, "step": 7374 }, { - "epoch": 0.42, - "grad_norm": 0.3936738090536721, - "learning_rate": 1.291408595556252e-05, - "loss": 0.258, + "epoch": 0.34, + "grad_norm": 0.4089391632910359, + "learning_rate": 1.540276630876776e-05, + "loss": 0.2847, "step": 7375 }, { - "epoch": 0.42, - "grad_norm": 0.3645622895731119, - "learning_rate": 1.2912305758581444e-05, - "loss": 0.3064, + "epoch": 0.34, + "grad_norm": 0.2714364479928965, + "learning_rate": 1.5401514183581418e-05, + "loss": 0.2322, "step": 7376 }, { - "epoch": 0.42, - "grad_norm": 0.8760977353616266, - "learning_rate": 1.2910525460747346e-05, - "loss": 0.4878, + "epoch": 0.34, + "grad_norm": 0.49052451952927545, + "learning_rate": 1.5400261938810755e-05, + "loss": 0.3522, "step": 7377 }, { - "epoch": 0.42, - "grad_norm": 0.1923060008162484, - "learning_rate": 1.290874506212188e-05, - "loss": 0.1669, + "epoch": 0.34, + "grad_norm": 0.4078628594878965, + "learning_rate": 1.5399009574483502e-05, + "loss": 0.2679, "step": 7378 }, { - "epoch": 0.42, - "grad_norm": 0.402353938670631, - "learning_rate": 1.2906964562766691e-05, - "loss": 0.3496, + "epoch": 0.34, + "grad_norm": 0.5789472513464345, + "learning_rate": 1.539775709062738e-05, + "loss": 0.3932, "step": 7379 }, { - "epoch": 0.42, - "grad_norm": 0.7286714606166612, - "learning_rate": 1.290518396274345e-05, - "loss": 0.437, + "epoch": 0.34, + "grad_norm": 0.3841698273356213, + "learning_rate": 1.5396504487270118e-05, + "loss": 0.2864, "step": 7380 }, { - "epoch": 0.42, - "grad_norm": 0.35226470412228356, - "learning_rate": 1.290340326211381e-05, - "loss": 0.2338, + "epoch": 0.34, + "grad_norm": 0.39943907862912165, + "learning_rate": 1.5395251764439446e-05, + "loss": 0.2762, "step": 7381 }, { - "epoch": 0.42, - "grad_norm": 0.3621217534989928, - "learning_rate": 1.290162246093944e-05, - "loss": 0.2912, + "epoch": 0.34, + "grad_norm": 0.29954546107529756, + "learning_rate": 1.53939989221631e-05, + "loss": 0.2206, "step": 7382 }, { - "epoch": 0.42, - "grad_norm": 0.4861495391034015, - "learning_rate": 1.289984155928201e-05, - "loss": 0.3546, + "epoch": 0.34, + "grad_norm": 0.6063921387719042, + "learning_rate": 1.539274596046882e-05, + "loss": 0.5113, "step": 7383 }, { - "epoch": 0.42, - "grad_norm": 0.64343988125234, - "learning_rate": 1.289806055720319e-05, - "loss": 0.2483, + "epoch": 0.34, + "grad_norm": 0.30672464938880906, + "learning_rate": 1.539149287938434e-05, + "loss": 0.2193, "step": 7384 }, { - "epoch": 0.42, - "grad_norm": 0.3655898241983234, - "learning_rate": 1.2896279454764659e-05, - "loss": 0.2909, + "epoch": 0.34, + "grad_norm": 0.5050504068049764, + "learning_rate": 1.5390239678937403e-05, + "loss": 0.3965, "step": 7385 }, { - "epoch": 0.42, - "grad_norm": 0.29398028827370154, - "learning_rate": 1.28944982520281e-05, - "loss": 0.2718, + "epoch": 0.34, + "grad_norm": 1.4887813619116648, + "learning_rate": 1.538898635915576e-05, + "loss": 0.8324, "step": 7386 }, { - "epoch": 0.42, - "grad_norm": 1.1800139635163578, - "learning_rate": 1.2892716949055184e-05, - "loss": 0.7545, + "epoch": 0.34, + "grad_norm": 0.3421941765892758, + "learning_rate": 1.5387732920067146e-05, + "loss": 0.194, "step": 7387 }, { - "epoch": 0.42, - "grad_norm": 0.3023709880759031, - "learning_rate": 1.2890935545907608e-05, - "loss": 0.2228, + "epoch": 0.34, + "grad_norm": 0.4664760207787426, + "learning_rate": 1.538647936169932e-05, + "loss": 0.3651, "step": 7388 }, { - "epoch": 0.42, - "grad_norm": 0.6965321600545177, - "learning_rate": 1.2889154042647056e-05, - "loss": 0.4642, + "epoch": 0.34, + "grad_norm": 0.38854910918919533, + "learning_rate": 1.5385225684080032e-05, + "loss": 0.3194, "step": 7389 }, { - "epoch": 0.42, - "grad_norm": 0.35911214934551505, - "learning_rate": 1.2887372439335224e-05, - "loss": 0.3357, + "epoch": 0.34, + "grad_norm": 0.2759203334053479, + "learning_rate": 1.5383971887237042e-05, + "loss": 0.174, "step": 7390 }, { - "epoch": 0.42, - "grad_norm": 0.29662533744407216, - "learning_rate": 1.2885590736033808e-05, - "loss": 0.2187, + "epoch": 0.34, + "grad_norm": 1.1720049999591449, + "learning_rate": 1.53827179711981e-05, + "loss": 0.6836, "step": 7391 }, { - "epoch": 0.42, - "grad_norm": 0.27718955489176644, - "learning_rate": 1.2883808932804512e-05, - "loss": 0.1913, + "epoch": 0.34, + "grad_norm": 0.3919618407232766, + "learning_rate": 1.5381463935990967e-05, + "loss": 0.3323, "step": 7392 }, { - "epoch": 0.42, - "grad_norm": 0.4086851848096156, - "learning_rate": 1.2882027029709034e-05, - "loss": 0.3497, + "epoch": 0.34, + "grad_norm": 0.3147333987404266, + "learning_rate": 1.538020978164341e-05, + "loss": 0.0758, "step": 7393 }, { - "epoch": 0.42, - "grad_norm": 0.3038815146416857, - "learning_rate": 1.2880245026809085e-05, - "loss": 0.2307, + "epoch": 0.34, + "grad_norm": 0.44173726411433945, + "learning_rate": 1.53789555081832e-05, + "loss": 0.3726, "step": 7394 }, { - "epoch": 0.42, - "grad_norm": 1.500569565176652, - "learning_rate": 1.2878462924166374e-05, - "loss": 0.661, + "epoch": 0.34, + "grad_norm": 0.28256007490076435, + "learning_rate": 1.5377701115638096e-05, + "loss": 0.2302, "step": 7395 }, { - "epoch": 0.42, - "grad_norm": 0.6963263635658477, - "learning_rate": 1.2876680721842616e-05, - "loss": 0.4307, + "epoch": 0.34, + "grad_norm": 0.42893203206038816, + "learning_rate": 1.5376446604035874e-05, + "loss": 0.273, "step": 7396 }, { - "epoch": 0.42, - "grad_norm": 0.333813168785862, - "learning_rate": 1.2874898419899528e-05, - "loss": 0.2022, + "epoch": 0.34, + "grad_norm": 0.3847872569536585, + "learning_rate": 1.5375191973404303e-05, + "loss": 0.2739, "step": 7397 }, { - "epoch": 0.43, - "grad_norm": 0.24787304670295343, - "learning_rate": 1.287311601839883e-05, - "loss": 0.2155, + "epoch": 0.34, + "grad_norm": 1.0296614026962205, + "learning_rate": 1.5373937223771163e-05, + "loss": 0.6673, "step": 7398 }, { - "epoch": 0.43, - "grad_norm": 0.7433576356848729, - "learning_rate": 1.2871333517402251e-05, - "loss": 0.481, + "epoch": 0.34, + "grad_norm": 0.5817193537553901, + "learning_rate": 1.5372682355164232e-05, + "loss": 0.3482, "step": 7399 }, { - "epoch": 0.43, - "grad_norm": 0.3939320064161549, - "learning_rate": 1.2869550916971512e-05, - "loss": 0.2881, + "epoch": 0.34, + "grad_norm": 0.3272074305825856, + "learning_rate": 1.5371427367611293e-05, + "loss": 0.2701, "step": 7400 }, { - "epoch": 0.43, - "grad_norm": 0.8308971616875137, - "learning_rate": 1.2867768217168353e-05, - "loss": 0.3781, + "epoch": 0.34, + "grad_norm": 0.7872163492659507, + "learning_rate": 1.537017226114013e-05, + "loss": 0.5491, "step": 7401 }, { - "epoch": 0.43, - "grad_norm": 0.3497647575246814, - "learning_rate": 1.28659854180545e-05, - "loss": 0.3359, + "epoch": 0.34, + "grad_norm": 0.28011974507690085, + "learning_rate": 1.536891703577853e-05, + "loss": 0.1558, "step": 7402 }, { - "epoch": 0.43, - "grad_norm": 0.36233416373040706, - "learning_rate": 1.2864202519691698e-05, - "loss": 0.2455, + "epoch": 0.34, + "grad_norm": 0.38297167108067376, + "learning_rate": 1.5367661691554282e-05, + "loss": 0.2797, "step": 7403 }, { - "epoch": 0.43, - "grad_norm": 0.21250382304746632, - "learning_rate": 1.2862419522141684e-05, - "loss": 0.1056, + "epoch": 0.34, + "grad_norm": 0.44674679049483046, + "learning_rate": 1.5366406228495173e-05, + "loss": 0.3293, "step": 7404 }, { - "epoch": 0.43, - "grad_norm": 0.48055525319680503, - "learning_rate": 1.2860636425466207e-05, - "loss": 0.353, + "epoch": 0.34, + "grad_norm": 0.44660925094256254, + "learning_rate": 1.5365150646629004e-05, + "loss": 0.3293, "step": 7405 }, { - "epoch": 0.43, - "grad_norm": 0.2929548307001326, - "learning_rate": 1.285885322972701e-05, - "loss": 0.267, + "epoch": 0.34, + "grad_norm": 0.4113891216351563, + "learning_rate": 1.5363894945983567e-05, + "loss": 0.3141, "step": 7406 }, { - "epoch": 0.43, - "grad_norm": 0.9742707277009065, - "learning_rate": 1.2857069934985851e-05, - "loss": 0.3784, + "epoch": 0.34, + "grad_norm": 0.3462404533984853, + "learning_rate": 1.5362639126586673e-05, + "loss": 0.2135, "step": 7407 }, { - "epoch": 0.43, - "grad_norm": 0.7845705775706029, - "learning_rate": 1.2855286541304481e-05, - "loss": 0.4938, + "epoch": 0.34, + "grad_norm": 0.30868257800846194, + "learning_rate": 1.5361383188466113e-05, + "loss": 0.2776, "step": 7408 }, { - "epoch": 0.43, - "grad_norm": 0.3324160422402967, - "learning_rate": 1.2853503048744664e-05, - "loss": 0.2625, + "epoch": 0.34, + "grad_norm": 0.5389168960232992, + "learning_rate": 1.53601271316497e-05, + "loss": 0.3855, "step": 7409 }, { - "epoch": 0.43, - "grad_norm": 0.2891167340341053, - "learning_rate": 1.2851719457368157e-05, - "loss": 0.2068, + "epoch": 0.34, + "grad_norm": 0.6772778026739175, + "learning_rate": 1.5358870956165236e-05, + "loss": 0.4117, "step": 7410 }, { - "epoch": 0.43, - "grad_norm": 0.42210199453028185, - "learning_rate": 1.2849935767236729e-05, - "loss": 0.3184, + "epoch": 0.34, + "grad_norm": 0.5157806534004208, + "learning_rate": 1.5357614662040533e-05, + "loss": 0.3433, "step": 7411 }, { - "epoch": 0.43, - "grad_norm": 0.36308547586520445, - "learning_rate": 1.284815197841215e-05, - "loss": 0.3046, + "epoch": 0.34, + "grad_norm": 0.3463625131416063, + "learning_rate": 1.535635824930341e-05, + "loss": 0.3019, "step": 7412 }, { - "epoch": 0.43, - "grad_norm": 1.1643204041845043, - "learning_rate": 1.2846368090956185e-05, - "loss": 0.4728, + "epoch": 0.34, + "grad_norm": 0.34017885423262006, + "learning_rate": 1.5355101717981668e-05, + "loss": 0.2059, "step": 7413 }, { - "epoch": 0.43, - "grad_norm": 0.31753775412680657, - "learning_rate": 1.284458410493062e-05, - "loss": 0.2662, + "epoch": 0.34, + "grad_norm": 0.45054604936019377, + "learning_rate": 1.5353845068103145e-05, + "loss": 0.2522, "step": 7414 }, { - "epoch": 0.43, - "grad_norm": 0.38438890919644864, - "learning_rate": 1.2842800020397226e-05, - "loss": 0.2863, + "epoch": 0.34, + "grad_norm": 0.4521110168199353, + "learning_rate": 1.535258829969565e-05, + "loss": 0.3136, "step": 7415 }, { - "epoch": 0.43, - "grad_norm": 0.23075903001714582, - "learning_rate": 1.2841015837417792e-05, - "loss": 0.1904, + "epoch": 0.34, + "grad_norm": 0.41067475335875075, + "learning_rate": 1.5351331412787004e-05, + "loss": 0.3066, "step": 7416 }, { - "epoch": 0.43, - "grad_norm": 0.3437999763100494, - "learning_rate": 1.2839231556054101e-05, - "loss": 0.2819, + "epoch": 0.34, + "grad_norm": 0.6088159607865851, + "learning_rate": 1.5350074407405046e-05, + "loss": 0.3619, "step": 7417 }, { - "epoch": 0.43, - "grad_norm": 0.3856629904474758, - "learning_rate": 1.2837447176367944e-05, - "loss": 0.2964, + "epoch": 0.34, + "grad_norm": 0.42153144735752185, + "learning_rate": 1.5348817283577592e-05, + "loss": 0.3403, "step": 7418 }, { - "epoch": 0.43, - "grad_norm": 0.6560835963344139, - "learning_rate": 1.2835662698421112e-05, - "loss": 0.3934, + "epoch": 0.34, + "grad_norm": 0.9740823187645358, + "learning_rate": 1.534756004133248e-05, + "loss": 0.2614, "step": 7419 }, { - "epoch": 0.43, - "grad_norm": 0.7348691624408498, - "learning_rate": 1.2833878122275407e-05, - "loss": 0.2447, + "epoch": 0.34, + "grad_norm": 0.28870250007105147, + "learning_rate": 1.534630268069754e-05, + "loss": 0.2422, "step": 7420 }, { - "epoch": 0.43, - "grad_norm": 0.34732153973656654, - "learning_rate": 1.283209344799262e-05, - "loss": 0.2776, + "epoch": 0.34, + "grad_norm": 0.4448792083275207, + "learning_rate": 1.5345045201700614e-05, + "loss": 0.3507, "step": 7421 }, { - "epoch": 0.43, - "grad_norm": 0.29962271123904893, - "learning_rate": 1.283030867563456e-05, - "loss": 0.2691, + "epoch": 0.34, + "grad_norm": 0.8636266578613819, + "learning_rate": 1.534378760436954e-05, + "loss": 0.5364, "step": 7422 }, { - "epoch": 0.43, - "grad_norm": 0.7215084346836231, - "learning_rate": 1.282852380526303e-05, - "loss": 0.4093, + "epoch": 0.34, + "grad_norm": 0.30035283359769777, + "learning_rate": 1.5342529888732152e-05, + "loss": 0.2025, "step": 7423 }, { - "epoch": 0.43, - "grad_norm": 0.3187780573476562, - "learning_rate": 1.2826738836939844e-05, - "loss": 0.2666, + "epoch": 0.34, + "grad_norm": 0.3411670466454486, + "learning_rate": 1.534127205481631e-05, + "loss": 0.309, "step": 7424 }, { - "epoch": 0.43, - "grad_norm": 0.3051997295007057, - "learning_rate": 1.2824953770726813e-05, - "loss": 0.2971, + "epoch": 0.34, + "grad_norm": 1.0763322034249985, + "learning_rate": 1.5340014102649853e-05, + "loss": 0.5384, "step": 7425 }, { - "epoch": 0.43, - "grad_norm": 0.5136628961032486, - "learning_rate": 1.2823168606685756e-05, - "loss": 0.3615, + "epoch": 0.34, + "grad_norm": 0.2812896741343816, + "learning_rate": 1.5338756032260628e-05, + "loss": 0.1605, "step": 7426 }, { - "epoch": 0.43, - "grad_norm": 0.3726773739401942, - "learning_rate": 1.2821383344878491e-05, - "loss": 0.2014, + "epoch": 0.34, + "grad_norm": 0.5749663122594008, + "learning_rate": 1.5337497843676486e-05, + "loss": 0.3857, "step": 7427 }, { - "epoch": 0.43, - "grad_norm": 1.1260492763383276, - "learning_rate": 1.2819597985366843e-05, - "loss": 0.5823, + "epoch": 0.34, + "grad_norm": 0.39827004840795666, + "learning_rate": 1.533623953692529e-05, + "loss": 0.3244, "step": 7428 }, { - "epoch": 0.43, - "grad_norm": 0.49804903503850456, - "learning_rate": 1.2817812528212635e-05, - "loss": 0.3953, + "epoch": 0.34, + "grad_norm": 0.38405146896738, + "learning_rate": 1.533498111203489e-05, + "loss": 0.1877, "step": 7429 }, { - "epoch": 0.43, - "grad_norm": 0.278617679213449, - "learning_rate": 1.2816026973477702e-05, - "loss": 0.2334, + "epoch": 0.34, + "grad_norm": 0.42747989819313825, + "learning_rate": 1.5333722569033155e-05, + "loss": 0.3116, "step": 7430 }, { - "epoch": 0.43, - "grad_norm": 0.8077408746603993, - "learning_rate": 1.2814241321223876e-05, - "loss": 0.5714, + "epoch": 0.34, + "grad_norm": 0.5631259746134563, + "learning_rate": 1.533246390794794e-05, + "loss": 0.4154, "step": 7431 }, { - "epoch": 0.43, - "grad_norm": 0.2938940132824784, - "learning_rate": 1.2812455571512996e-05, - "loss": 0.178, + "epoch": 0.34, + "grad_norm": 0.39337118104131297, + "learning_rate": 1.5331205128807115e-05, + "loss": 0.216, "step": 7432 }, { - "epoch": 0.43, - "grad_norm": 0.31693402945068383, - "learning_rate": 1.28106697244069e-05, - "loss": 0.1713, + "epoch": 0.34, + "grad_norm": 0.39615780435011894, + "learning_rate": 1.5329946231638547e-05, + "loss": 0.3215, "step": 7433 }, { - "epoch": 0.43, - "grad_norm": 0.39056029624267147, - "learning_rate": 1.2808883779967429e-05, - "loss": 0.2973, + "epoch": 0.34, + "grad_norm": 0.5783803326651233, + "learning_rate": 1.5328687216470107e-05, + "loss": 0.451, "step": 7434 }, { - "epoch": 0.43, - "grad_norm": 0.8784300285705219, - "learning_rate": 1.2807097738256436e-05, - "loss": 0.4679, + "epoch": 0.34, + "grad_norm": 0.28589090781261717, + "learning_rate": 1.5327428083329666e-05, + "loss": 0.2123, "step": 7435 }, { - "epoch": 0.43, - "grad_norm": 0.6096405649192743, - "learning_rate": 1.2805311599335768e-05, - "loss": 0.3627, + "epoch": 0.34, + "grad_norm": 0.3202665731246141, + "learning_rate": 1.5326168832245102e-05, + "loss": 0.2074, "step": 7436 }, { - "epoch": 0.43, - "grad_norm": 0.280902071047559, - "learning_rate": 1.2803525363267281e-05, - "loss": 0.2223, + "epoch": 0.34, + "grad_norm": 1.2945968531203285, + "learning_rate": 1.53249094632443e-05, + "loss": 0.6154, "step": 7437 }, { - "epoch": 0.43, - "grad_norm": 0.33005696907181864, - "learning_rate": 1.2801739030112833e-05, - "loss": 0.2775, + "epoch": 0.34, + "grad_norm": 0.8224152641369826, + "learning_rate": 1.5323649976355123e-05, + "loss": 0.4852, "step": 7438 }, { - "epoch": 0.43, - "grad_norm": 0.5559933672937305, - "learning_rate": 1.279995259993428e-05, - "loss": 0.3064, + "epoch": 0.34, + "grad_norm": 0.3536179545599055, + "learning_rate": 1.5322390371605473e-05, + "loss": 0.2556, "step": 7439 }, { - "epoch": 0.43, - "grad_norm": 0.39420878524487984, - "learning_rate": 1.279816607279349e-05, - "loss": 0.2677, + "epoch": 0.34, + "grad_norm": 0.5272178587023302, + "learning_rate": 1.532113064902323e-05, + "loss": 0.41, "step": 7440 }, { - "epoch": 0.43, - "grad_norm": 0.5102506758920938, - "learning_rate": 1.279637944875233e-05, - "loss": 0.3898, + "epoch": 0.34, + "grad_norm": 0.3253688956313545, + "learning_rate": 1.5319870808636283e-05, + "loss": 0.1583, "step": 7441 }, { - "epoch": 0.43, - "grad_norm": 0.3100867254059578, - "learning_rate": 1.2794592727872665e-05, - "loss": 0.2904, + "epoch": 0.34, + "grad_norm": 0.3885265717992272, + "learning_rate": 1.531861085047252e-05, + "loss": 0.2243, "step": 7442 }, { - "epoch": 0.43, - "grad_norm": 0.2778272955079186, - "learning_rate": 1.279280591021638e-05, - "loss": 0.1926, + "epoch": 0.34, + "grad_norm": 0.5426092885015029, + "learning_rate": 1.5317350774559846e-05, + "loss": 0.4036, "step": 7443 }, { - "epoch": 0.43, - "grad_norm": 0.3652081080576812, - "learning_rate": 1.2791018995845343e-05, - "loss": 0.2066, + "epoch": 0.34, + "grad_norm": 0.47425564815526683, + "learning_rate": 1.5316090580926142e-05, + "loss": 0.3365, "step": 7444 }, { - "epoch": 0.43, - "grad_norm": 0.4003295351901793, - "learning_rate": 1.278923198482144e-05, - "loss": 0.2906, + "epoch": 0.34, + "grad_norm": 0.3638417196434101, + "learning_rate": 1.5314830269599325e-05, + "loss": 0.251, "step": 7445 }, { - "epoch": 0.43, - "grad_norm": 0.31584749571079734, - "learning_rate": 1.2787444877206552e-05, - "loss": 0.2526, + "epoch": 0.34, + "grad_norm": 0.6034329735415199, + "learning_rate": 1.5313569840607285e-05, + "loss": 0.491, "step": 7446 }, { - "epoch": 0.43, - "grad_norm": 0.6723729764280729, - "learning_rate": 1.2785657673062567e-05, - "loss": 0.4328, + "epoch": 0.34, + "grad_norm": 0.2816419348045927, + "learning_rate": 1.531230929397793e-05, + "loss": 0.2131, "step": 7447 }, { - "epoch": 0.43, - "grad_norm": 0.4069163041642835, - "learning_rate": 1.2783870372451377e-05, - "loss": 0.3099, + "epoch": 0.34, + "grad_norm": 0.37760397647907457, + "learning_rate": 1.5311048629739165e-05, + "loss": 0.3218, "step": 7448 }, { - "epoch": 0.43, - "grad_norm": 0.5471645897491344, - "learning_rate": 1.278208297543488e-05, - "loss": 0.3764, + "epoch": 0.34, + "grad_norm": 0.3239141979729318, + "learning_rate": 1.5309787847918905e-05, + "loss": 0.117, "step": 7449 }, { - "epoch": 0.43, - "grad_norm": 0.22582651123557201, - "learning_rate": 1.2780295482074965e-05, - "loss": 0.1677, + "epoch": 0.34, + "grad_norm": 0.9030421158672063, + "learning_rate": 1.530852694854506e-05, + "loss": 0.4943, "step": 7450 }, { - "epoch": 0.43, - "grad_norm": 0.38504551814048305, - "learning_rate": 1.2778507892433538e-05, - "loss": 0.2699, + "epoch": 0.34, + "grad_norm": 0.37429477607621975, + "learning_rate": 1.530726593164554e-05, + "loss": 0.3023, "step": 7451 }, { - "epoch": 0.43, - "grad_norm": 0.5300421426970018, - "learning_rate": 1.2776720206572502e-05, - "loss": 0.3693, + "epoch": 0.34, + "grad_norm": 0.3761822738189655, + "learning_rate": 1.5306004797248274e-05, + "loss": 0.3101, "step": 7452 }, { - "epoch": 0.43, - "grad_norm": 0.4316438122749434, - "learning_rate": 1.2774932424553763e-05, - "loss": 0.3068, + "epoch": 0.34, + "grad_norm": 0.4680229755743139, + "learning_rate": 1.5304743545381167e-05, + "loss": 0.2984, "step": 7453 }, { - "epoch": 0.43, - "grad_norm": 0.6039224321850404, - "learning_rate": 1.2773144546439235e-05, - "loss": 0.3833, + "epoch": 0.34, + "grad_norm": 0.27341575051143846, + "learning_rate": 1.530348217607216e-05, + "loss": 0.2215, "step": 7454 }, { - "epoch": 0.43, - "grad_norm": 0.41397690083601735, - "learning_rate": 1.2771356572290834e-05, - "loss": 0.3263, + "epoch": 0.34, + "grad_norm": 0.46700699220111536, + "learning_rate": 1.5302220689349164e-05, + "loss": 0.2893, "step": 7455 }, { - "epoch": 0.43, - "grad_norm": 0.22875703408665155, - "learning_rate": 1.2769568502170469e-05, - "loss": 0.1126, + "epoch": 0.34, + "grad_norm": 0.5203159400463176, + "learning_rate": 1.5300959085240116e-05, + "loss": 0.3764, "step": 7456 }, { - "epoch": 0.43, - "grad_norm": 0.38290759108779704, - "learning_rate": 1.276778033614007e-05, - "loss": 0.2816, + "epoch": 0.34, + "grad_norm": 0.37016794019196847, + "learning_rate": 1.529969736377294e-05, + "loss": 0.3002, "step": 7457 }, { - "epoch": 0.43, - "grad_norm": 0.3645825413303027, - "learning_rate": 1.2765992074261555e-05, - "loss": 0.3286, + "epoch": 0.34, + "grad_norm": 0.9185725531214263, + "learning_rate": 1.5298435524975572e-05, + "loss": 0.5663, "step": 7458 }, { - "epoch": 0.43, - "grad_norm": 0.7066287348903324, - "learning_rate": 1.276420371659685e-05, - "loss": 0.3722, + "epoch": 0.34, + "grad_norm": 0.3486328200511371, + "learning_rate": 1.529717356887595e-05, + "loss": 0.3062, "step": 7459 }, { - "epoch": 0.43, - "grad_norm": 0.6244111465506637, - "learning_rate": 1.2762415263207895e-05, - "loss": 0.3676, + "epoch": 0.34, + "grad_norm": 0.29142841504966366, + "learning_rate": 1.5295911495502013e-05, + "loss": 0.2395, "step": 7460 }, { - "epoch": 0.43, - "grad_norm": 0.2867319890884178, - "learning_rate": 1.2760626714156614e-05, - "loss": 0.2733, + "epoch": 0.34, + "grad_norm": 0.4455480554847195, + "learning_rate": 1.529464930488169e-05, + "loss": 0.2378, "step": 7461 }, { - "epoch": 0.43, - "grad_norm": 0.37856860799168346, - "learning_rate": 1.2758838069504952e-05, - "loss": 0.2388, + "epoch": 0.34, + "grad_norm": 0.7359637657650808, + "learning_rate": 1.5293386997042943e-05, + "loss": 0.3277, "step": 7462 }, { - "epoch": 0.43, - "grad_norm": 0.27983356612817223, - "learning_rate": 1.2757049329314842e-05, - "loss": 0.1823, + "epoch": 0.34, + "grad_norm": 0.38545745589505565, + "learning_rate": 1.529212457201371e-05, + "loss": 0.2857, "step": 7463 }, { - "epoch": 0.43, - "grad_norm": 0.675249528883432, - "learning_rate": 1.2755260493648235e-05, - "loss": 0.3907, + "epoch": 0.34, + "grad_norm": 0.41056274313660246, + "learning_rate": 1.5290862029821935e-05, + "loss": 0.3461, "step": 7464 }, { - "epoch": 0.43, - "grad_norm": 0.3990296921748976, - "learning_rate": 1.2753471562567074e-05, - "loss": 0.3226, + "epoch": 0.34, + "grad_norm": 0.2976494354578043, + "learning_rate": 1.5289599370495576e-05, + "loss": 0.1503, "step": 7465 }, { - "epoch": 0.43, - "grad_norm": 0.3446096915891851, - "learning_rate": 1.2751682536133313e-05, - "loss": 0.2236, + "epoch": 0.34, + "grad_norm": 0.39757251881996175, + "learning_rate": 1.5288336594062586e-05, + "loss": 0.2953, "step": 7466 }, { - "epoch": 0.43, - "grad_norm": 0.5313072118472417, - "learning_rate": 1.2749893414408903e-05, - "loss": 0.3751, + "epoch": 0.34, + "grad_norm": 0.29660060828542445, + "learning_rate": 1.5287073700550923e-05, + "loss": 0.2518, "step": 7467 }, { - "epoch": 0.43, - "grad_norm": 0.265255518955803, - "learning_rate": 1.2748104197455804e-05, - "loss": 0.1889, + "epoch": 0.34, + "grad_norm": 1.2487615456532466, + "learning_rate": 1.528581068998854e-05, + "loss": 0.3927, "step": 7468 }, { - "epoch": 0.43, - "grad_norm": 0.3086628494538964, - "learning_rate": 1.274631488533597e-05, - "loss": 0.235, + "epoch": 0.34, + "grad_norm": 0.341170981655371, + "learning_rate": 1.5284547562403403e-05, + "loss": 0.2572, "step": 7469 }, { - "epoch": 0.43, - "grad_norm": 0.3955694497758381, - "learning_rate": 1.2744525478111379e-05, - "loss": 0.3382, + "epoch": 0.34, + "grad_norm": 1.0567752346894428, + "learning_rate": 1.5283284317823478e-05, + "loss": 0.7389, "step": 7470 }, { - "epoch": 0.43, - "grad_norm": 0.7593369964854633, - "learning_rate": 1.274273597584398e-05, - "loss": 0.5741, + "epoch": 0.34, + "grad_norm": 0.33374293652781567, + "learning_rate": 1.528202095627673e-05, + "loss": 0.2647, "step": 7471 }, { - "epoch": 0.43, - "grad_norm": 0.38222913352863014, - "learning_rate": 1.2740946378595758e-05, - "loss": 0.1657, + "epoch": 0.34, + "grad_norm": 0.30698538525180963, + "learning_rate": 1.528075747779113e-05, + "loss": 0.2374, "step": 7472 }, { - "epoch": 0.43, - "grad_norm": 0.3301246287937124, - "learning_rate": 1.2739156686428675e-05, - "loss": 0.2843, + "epoch": 0.34, + "grad_norm": 0.549488764202552, + "learning_rate": 1.5279493882394648e-05, + "loss": 0.3092, "step": 7473 }, { - "epoch": 0.43, - "grad_norm": 0.47079369177307934, - "learning_rate": 1.2737366899404718e-05, - "loss": 0.309, + "epoch": 0.34, + "grad_norm": 0.9672870777023324, + "learning_rate": 1.527823017011526e-05, + "loss": 0.5008, "step": 7474 }, { - "epoch": 0.43, - "grad_norm": 0.6375823621608645, - "learning_rate": 1.273557701758586e-05, - "loss": 0.4102, + "epoch": 0.34, + "grad_norm": 0.268426252447338, + "learning_rate": 1.527696634098094e-05, + "loss": 0.2245, "step": 7475 }, { - "epoch": 0.43, - "grad_norm": 0.3015373234296537, - "learning_rate": 1.2733787041034092e-05, - "loss": 0.2052, + "epoch": 0.34, + "grad_norm": 0.5063629176984931, + "learning_rate": 1.5275702395019675e-05, + "loss": 0.3793, "step": 7476 }, { - "epoch": 0.43, - "grad_norm": 0.3550705089146975, - "learning_rate": 1.2731996969811393e-05, - "loss": 0.2863, + "epoch": 0.34, + "grad_norm": 0.7662958564459699, + "learning_rate": 1.5274438332259442e-05, + "loss": 0.6107, "step": 7477 }, { - "epoch": 0.43, - "grad_norm": 0.7378788393900801, - "learning_rate": 1.2730206803979754e-05, - "loss": 0.5696, + "epoch": 0.34, + "grad_norm": 0.27659298797488513, + "learning_rate": 1.527317415272823e-05, + "loss": 0.1717, "step": 7478 }, { - "epoch": 0.43, - "grad_norm": 0.3263003907992609, - "learning_rate": 1.2728416543601177e-05, - "loss": 0.2251, + "epoch": 0.34, + "grad_norm": 0.3390568180645851, + "learning_rate": 1.5271909856454024e-05, + "loss": 0.2908, "step": 7479 }, { - "epoch": 0.43, - "grad_norm": 0.775695398769665, - "learning_rate": 1.2726626188737647e-05, - "loss": 0.4716, + "epoch": 0.34, + "grad_norm": 0.8323832038949739, + "learning_rate": 1.5270645443464817e-05, + "loss": 0.4798, "step": 7480 }, { - "epoch": 0.43, - "grad_norm": 0.33048671823166975, - "learning_rate": 1.2724835739451172e-05, - "loss": 0.2793, + "epoch": 0.34, + "grad_norm": 0.3289274546627059, + "learning_rate": 1.526938091378859e-05, + "loss": 0.2005, "step": 7481 }, { - "epoch": 0.43, - "grad_norm": 0.2443173066345539, - "learning_rate": 1.272304519580375e-05, - "loss": 0.1975, + "epoch": 0.34, + "grad_norm": 1.2108898337498082, + "learning_rate": 1.5268116267453358e-05, + "loss": 0.8383, "step": 7482 }, { - "epoch": 0.43, - "grad_norm": 0.37993338584071, - "learning_rate": 1.2721254557857394e-05, - "loss": 0.255, + "epoch": 0.34, + "grad_norm": 0.4080409825021209, + "learning_rate": 1.5266851504487105e-05, + "loss": 0.3201, "step": 7483 }, { - "epoch": 0.43, - "grad_norm": 0.5994907663851344, - "learning_rate": 1.2719463825674105e-05, - "loss": 0.3759, + "epoch": 0.34, + "grad_norm": 0.32407646629228454, + "learning_rate": 1.5265586624917842e-05, + "loss": 0.2061, "step": 7484 }, { - "epoch": 0.43, - "grad_norm": 0.35639175499384124, - "learning_rate": 1.2717672999315904e-05, - "loss": 0.2441, + "epoch": 0.34, + "grad_norm": 0.5629160279005104, + "learning_rate": 1.526432162877356e-05, + "loss": 0.4575, "step": 7485 }, { - "epoch": 0.43, - "grad_norm": 0.5549223599700548, - "learning_rate": 1.2715882078844804e-05, - "loss": 0.3907, + "epoch": 0.34, + "grad_norm": 0.2948531088485579, + "learning_rate": 1.5263056516082273e-05, + "loss": 0.1963, "step": 7486 }, { - "epoch": 0.43, - "grad_norm": 0.5760563913885721, - "learning_rate": 1.2714091064322824e-05, - "loss": 0.3488, + "epoch": 0.34, + "grad_norm": 0.36735111558375133, + "learning_rate": 1.5261791286871986e-05, + "loss": 0.3104, "step": 7487 }, { - "epoch": 0.43, - "grad_norm": 0.30595159376311326, - "learning_rate": 1.2712299955811987e-05, - "loss": 0.2508, + "epoch": 0.34, + "grad_norm": 0.47347670733903713, + "learning_rate": 1.526052594117071e-05, + "loss": 0.2843, "step": 7488 }, { - "epoch": 0.43, - "grad_norm": 0.2638652449536984, - "learning_rate": 1.2710508753374317e-05, - "loss": 0.2126, + "epoch": 0.34, + "grad_norm": 1.8179242295709765, + "learning_rate": 1.5259260479006465e-05, + "loss": 0.7135, "step": 7489 }, { - "epoch": 0.43, - "grad_norm": 1.237358245192641, - "learning_rate": 1.2708717457071849e-05, - "loss": 0.7199, + "epoch": 0.34, + "grad_norm": 0.33706820734056187, + "learning_rate": 1.525799490040726e-05, + "loss": 0.2956, "step": 7490 }, { - "epoch": 0.43, - "grad_norm": 0.36818599307685185, - "learning_rate": 1.270692606696661e-05, - "loss": 0.2631, + "epoch": 0.34, + "grad_norm": 0.3606023174329244, + "learning_rate": 1.5256729205401112e-05, + "loss": 0.2892, "step": 7491 }, { - "epoch": 0.43, - "grad_norm": 0.471986003217928, - "learning_rate": 1.2705134583120638e-05, - "loss": 0.3152, + "epoch": 0.34, + "grad_norm": 0.3100718256304107, + "learning_rate": 1.525546339401605e-05, + "loss": 0.1919, "step": 7492 }, { - "epoch": 0.43, - "grad_norm": 0.42344872331296374, - "learning_rate": 1.2703343005595972e-05, - "loss": 0.3291, + "epoch": 0.34, + "grad_norm": 0.3816464395095946, + "learning_rate": 1.5254197466280091e-05, + "loss": 0.2915, "step": 7493 }, { - "epoch": 0.43, - "grad_norm": 0.36850684943447043, - "learning_rate": 1.2701551334454652e-05, - "loss": 0.2939, + "epoch": 0.34, + "grad_norm": 1.0806042282622113, + "learning_rate": 1.5252931422221266e-05, + "loss": 0.5796, "step": 7494 }, { - "epoch": 0.43, - "grad_norm": 0.21811476150078793, - "learning_rate": 1.2699759569758727e-05, - "loss": 0.1028, + "epoch": 0.34, + "grad_norm": 0.4271866844900743, + "learning_rate": 1.5251665261867602e-05, + "loss": 0.3301, "step": 7495 }, { - "epoch": 0.43, - "grad_norm": 0.6464980054789515, - "learning_rate": 1.2697967711570243e-05, - "loss": 0.4877, + "epoch": 0.34, + "grad_norm": 0.3831815223534995, + "learning_rate": 1.525039898524713e-05, + "loss": 0.293, "step": 7496 }, { - "epoch": 0.43, - "grad_norm": 0.28044990754809157, - "learning_rate": 1.2696175759951254e-05, - "loss": 0.2786, + "epoch": 0.34, + "grad_norm": 0.6244372298677882, + "learning_rate": 1.5249132592387888e-05, + "loss": 0.3659, "step": 7497 }, { - "epoch": 0.43, - "grad_norm": 0.6001584093412066, - "learning_rate": 1.2694383714963818e-05, - "loss": 0.3558, + "epoch": 0.34, + "grad_norm": 0.26633684243163896, + "learning_rate": 1.5247866083317907e-05, + "loss": 0.2006, "step": 7498 }, { - "epoch": 0.43, - "grad_norm": 0.6795937624798115, - "learning_rate": 1.2692591576669983e-05, - "loss": 0.443, + "epoch": 0.34, + "grad_norm": 0.41427157436356155, + "learning_rate": 1.5246599458065228e-05, + "loss": 0.2664, "step": 7499 }, { - "epoch": 0.43, - "grad_norm": 0.2836693554883349, - "learning_rate": 1.2690799345131824e-05, - "loss": 0.2405, + "epoch": 0.34, + "grad_norm": 0.4423490591743375, + "learning_rate": 1.5245332716657892e-05, + "loss": 0.3273, "step": 7500 }, { - "epoch": 0.43, - "grad_norm": 0.3201123731313662, - "learning_rate": 1.2689007020411394e-05, - "loss": 0.2822, + "epoch": 0.34, + "grad_norm": 0.6046665380529342, + "learning_rate": 1.5244065859123949e-05, + "loss": 0.3737, "step": 7501 }, { - "epoch": 0.43, - "grad_norm": 0.36880094607275504, - "learning_rate": 1.268721460257077e-05, - "loss": 0.2344, + "epoch": 0.34, + "grad_norm": 0.4927001615674848, + "learning_rate": 1.5242798885491442e-05, + "loss": 0.3128, "step": 7502 }, { - "epoch": 0.43, - "grad_norm": 0.42415078679604923, - "learning_rate": 1.2685422091672017e-05, - "loss": 0.3176, + "epoch": 0.34, + "grad_norm": 0.36468526505287974, + "learning_rate": 1.5241531795788417e-05, + "loss": 0.2972, "step": 7503 }, { - "epoch": 0.43, - "grad_norm": 1.0388651965272275, - "learning_rate": 1.2683629487777219e-05, - "loss": 0.4227, + "epoch": 0.34, + "grad_norm": 0.2542059378937968, + "learning_rate": 1.5240264590042935e-05, + "loss": 0.0722, "step": 7504 }, { - "epoch": 0.43, - "grad_norm": 0.26138461355474946, - "learning_rate": 1.268183679094844e-05, - "loss": 0.2142, + "epoch": 0.34, + "grad_norm": 0.45555414330273286, + "learning_rate": 1.5238997268283042e-05, + "loss": 0.3005, "step": 7505 }, { - "epoch": 0.43, - "grad_norm": 0.38880693292751656, - "learning_rate": 1.2680044001247774e-05, - "loss": 0.3062, + "epoch": 0.34, + "grad_norm": 0.5887346619932107, + "learning_rate": 1.5237729830536798e-05, + "loss": 0.4516, "step": 7506 }, { - "epoch": 0.43, - "grad_norm": 0.28854957691060595, - "learning_rate": 1.2678251118737293e-05, - "loss": 0.2059, + "epoch": 0.34, + "grad_norm": 0.3578551983361436, + "learning_rate": 1.523646227683227e-05, + "loss": 0.2811, "step": 7507 }, { - "epoch": 0.43, - "grad_norm": 0.6409188766262047, - "learning_rate": 1.2676458143479095e-05, - "loss": 0.306, + "epoch": 0.34, + "grad_norm": 0.4550691073689091, + "learning_rate": 1.5235194607197508e-05, + "loss": 0.3108, "step": 7508 }, { - "epoch": 0.43, - "grad_norm": 0.3106921649626693, - "learning_rate": 1.2674665075535264e-05, - "loss": 0.2657, + "epoch": 0.34, + "grad_norm": 0.5318724983277151, + "learning_rate": 1.5233926821660585e-05, + "loss": 0.3677, "step": 7509 }, { - "epoch": 0.43, - "grad_norm": 0.9982593919162711, - "learning_rate": 1.26728719149679e-05, - "loss": 0.4393, + "epoch": 0.35, + "grad_norm": 0.2806655152821678, + "learning_rate": 1.5232658920249566e-05, + "loss": 0.1812, "step": 7510 }, { - "epoch": 0.43, - "grad_norm": 1.0040110349618188, - "learning_rate": 1.2671078661839093e-05, - "loss": 0.2504, + "epoch": 0.35, + "grad_norm": 0.263792744228147, + "learning_rate": 1.5231390902992522e-05, + "loss": 0.234, "step": 7511 }, { - "epoch": 0.43, - "grad_norm": 0.27760287035235526, - "learning_rate": 1.2669285316210948e-05, - "loss": 0.2207, + "epoch": 0.35, + "grad_norm": 1.2966600076615102, + "learning_rate": 1.5230122769917528e-05, + "loss": 0.7995, "step": 7512 }, { - "epoch": 0.43, - "grad_norm": 0.33738457628735213, - "learning_rate": 1.2667491878145568e-05, - "loss": 0.2674, + "epoch": 0.35, + "grad_norm": 0.6890544113894183, + "learning_rate": 1.5228854521052655e-05, + "loss": 0.454, "step": 7513 }, { - "epoch": 0.43, - "grad_norm": 0.8728551700618518, - "learning_rate": 1.266569834770506e-05, - "loss": 0.4935, + "epoch": 0.35, + "grad_norm": 0.3170781589725387, + "learning_rate": 1.5227586156425982e-05, + "loss": 0.2305, "step": 7514 }, { - "epoch": 0.43, - "grad_norm": 0.5237880393460295, - "learning_rate": 1.266390472495153e-05, - "loss": 0.2501, + "epoch": 0.35, + "grad_norm": 0.38436896926568165, + "learning_rate": 1.522631767606559e-05, + "loss": 0.3147, "step": 7515 }, { - "epoch": 0.43, - "grad_norm": 1.2321177960808543, - "learning_rate": 1.2662111009947096e-05, - "loss": 0.7564, + "epoch": 0.35, + "grad_norm": 0.4538049040640755, + "learning_rate": 1.5225049079999561e-05, + "loss": 0.3114, "step": 7516 }, { - "epoch": 0.43, - "grad_norm": 0.3852958785860179, - "learning_rate": 1.2660317202753873e-05, - "loss": 0.308, + "epoch": 0.35, + "grad_norm": 0.3536241715541373, + "learning_rate": 1.522378036825598e-05, + "loss": 0.1675, "step": 7517 }, { - "epoch": 0.43, - "grad_norm": 0.2878776513060847, - "learning_rate": 1.2658523303433979e-05, - "loss": 0.2114, + "epoch": 0.35, + "grad_norm": 0.3847510700233104, + "learning_rate": 1.5222511540862941e-05, + "loss": 0.3244, "step": 7518 }, { - "epoch": 0.43, - "grad_norm": 0.3866565714290425, - "learning_rate": 1.2656729312049536e-05, - "loss": 0.2787, + "epoch": 0.35, + "grad_norm": 0.4314688330144183, + "learning_rate": 1.5221242597848527e-05, + "loss": 0.3334, "step": 7519 }, { - "epoch": 0.43, - "grad_norm": 0.44929076242454746, - "learning_rate": 1.265493522866267e-05, - "loss": 0.3211, + "epoch": 0.35, + "grad_norm": 0.22922207842728282, + "learning_rate": 1.5219973539240838e-05, + "loss": 0.0957, "step": 7520 }, { - "epoch": 0.43, - "grad_norm": 0.31019924286079437, - "learning_rate": 1.2653141053335513e-05, - "loss": 0.2021, + "epoch": 0.35, + "grad_norm": 0.460065962373246, + "learning_rate": 1.521870436506796e-05, + "loss": 0.3394, "step": 7521 }, { - "epoch": 0.43, - "grad_norm": 0.5257400746389835, - "learning_rate": 1.2651346786130199e-05, - "loss": 0.3338, + "epoch": 0.35, + "grad_norm": 0.33771746726605406, + "learning_rate": 1.5217435075358e-05, + "loss": 0.2672, "step": 7522 }, { - "epoch": 0.43, - "grad_norm": 0.7700473026585529, - "learning_rate": 1.2649552427108856e-05, - "loss": 0.4551, + "epoch": 0.35, + "grad_norm": 0.4096262723596227, + "learning_rate": 1.5216165670139055e-05, + "loss": 0.2944, "step": 7523 }, { - "epoch": 0.43, - "grad_norm": 0.37836722990428473, - "learning_rate": 1.2647757976333626e-05, - "loss": 0.2697, + "epoch": 0.35, + "grad_norm": 0.37129980981951305, + "learning_rate": 1.521489614943923e-05, + "loss": 0.2975, "step": 7524 }, { - "epoch": 0.43, - "grad_norm": 0.40109303353556514, - "learning_rate": 1.2645963433866653e-05, - "loss": 0.2891, + "epoch": 0.35, + "grad_norm": 0.6952761247654623, + "learning_rate": 1.5213626513286632e-05, + "loss": 0.5164, "step": 7525 }, { - "epoch": 0.43, - "grad_norm": 0.5801053269128108, - "learning_rate": 1.2644168799770075e-05, - "loss": 0.4479, + "epoch": 0.35, + "grad_norm": 0.3666491862133794, + "learning_rate": 1.5212356761709368e-05, + "loss": 0.1817, "step": 7526 }, { - "epoch": 0.43, - "grad_norm": 0.3038245233952448, - "learning_rate": 1.2642374074106052e-05, - "loss": 0.2673, + "epoch": 0.35, + "grad_norm": 0.3191021051242505, + "learning_rate": 1.5211086894735547e-05, + "loss": 0.2605, "step": 7527 }, { - "epoch": 0.43, - "grad_norm": 0.23955365444396562, - "learning_rate": 1.2640579256936723e-05, - "loss": 0.195, + "epoch": 0.35, + "grad_norm": 1.0857125271459127, + "learning_rate": 1.5209816912393284e-05, + "loss": 0.6784, "step": 7528 }, { - "epoch": 0.43, - "grad_norm": 0.9894905671520695, - "learning_rate": 1.263878434832425e-05, - "loss": 0.5351, + "epoch": 0.35, + "grad_norm": 0.6088230829130713, + "learning_rate": 1.5208546814710701e-05, + "loss": 0.3808, "step": 7529 }, { - "epoch": 0.43, - "grad_norm": 0.33128274197373925, - "learning_rate": 1.2636989348330791e-05, - "loss": 0.2585, + "epoch": 0.35, + "grad_norm": 0.3633523535313251, + "learning_rate": 1.5207276601715906e-05, + "loss": 0.269, "step": 7530 }, { - "epoch": 0.43, - "grad_norm": 0.7002081987136007, - "learning_rate": 1.2635194257018501e-05, - "loss": 0.3638, + "epoch": 0.35, + "grad_norm": 0.38317521185310766, + "learning_rate": 1.5206006273437031e-05, + "loss": 0.3545, "step": 7531 }, { - "epoch": 0.43, - "grad_norm": 0.3775323790192974, - "learning_rate": 1.2633399074449548e-05, - "loss": 0.3146, + "epoch": 0.35, + "grad_norm": 0.243114007169776, + "learning_rate": 1.5204735829902188e-05, + "loss": 0.1825, "step": 7532 }, { - "epoch": 0.43, - "grad_norm": 0.33698150077075945, - "learning_rate": 1.2631603800686099e-05, - "loss": 0.279, + "epoch": 0.35, + "grad_norm": 0.501772504164025, + "learning_rate": 1.5203465271139517e-05, + "loss": 0.2926, "step": 7533 }, { - "epoch": 0.43, - "grad_norm": 0.18218076324549323, - "learning_rate": 1.2629808435790322e-05, - "loss": 0.1041, + "epoch": 0.35, + "grad_norm": 0.401238621785067, + "learning_rate": 1.5202194597177134e-05, + "loss": 0.3192, "step": 7534 }, { - "epoch": 0.43, - "grad_norm": 1.185980333284646, - "learning_rate": 1.2628012979824394e-05, - "loss": 0.7281, + "epoch": 0.35, + "grad_norm": 0.924095619600899, + "learning_rate": 1.520092380804318e-05, + "loss": 0.5454, "step": 7535 }, { - "epoch": 0.43, - "grad_norm": 0.34323992321440966, - "learning_rate": 1.262621743285049e-05, - "loss": 0.262, + "epoch": 0.35, + "grad_norm": 0.37424485973635235, + "learning_rate": 1.5199652903765784e-05, + "loss": 0.3403, "step": 7536 }, { - "epoch": 0.43, - "grad_norm": 0.37375107807679175, - "learning_rate": 1.2624421794930785e-05, - "loss": 0.3399, + "epoch": 0.35, + "grad_norm": 0.3083202719942609, + "learning_rate": 1.5198381884373088e-05, + "loss": 0.1979, "step": 7537 }, { - "epoch": 0.43, - "grad_norm": 0.9873288871171293, - "learning_rate": 1.262262606612747e-05, - "loss": 0.3846, + "epoch": 0.35, + "grad_norm": 0.5007422101297211, + "learning_rate": 1.5197110749893225e-05, + "loss": 0.3345, "step": 7538 }, { - "epoch": 0.43, - "grad_norm": 0.4163728309097746, - "learning_rate": 1.2620830246502729e-05, - "loss": 0.2865, + "epoch": 0.35, + "grad_norm": 0.3306098922238994, + "learning_rate": 1.5195839500354337e-05, + "loss": 0.257, "step": 7539 }, { - "epoch": 0.43, - "grad_norm": 0.29402576393144825, - "learning_rate": 1.2619034336118742e-05, - "loss": 0.2692, + "epoch": 0.35, + "grad_norm": 0.7484481808203862, + "learning_rate": 1.5194568135784573e-05, + "loss": 0.3601, "step": 7540 }, { - "epoch": 0.43, - "grad_norm": 0.3024954021730123, - "learning_rate": 1.2617238335037717e-05, - "loss": 0.222, + "epoch": 0.35, + "grad_norm": 0.5132107700274147, + "learning_rate": 1.5193296656212075e-05, + "loss": 0.4012, "step": 7541 }, { - "epoch": 0.43, - "grad_norm": 0.4399043946526552, - "learning_rate": 1.2615442243321837e-05, - "loss": 0.2605, + "epoch": 0.35, + "grad_norm": 0.36927532916218836, + "learning_rate": 1.5192025061664994e-05, + "loss": 0.2886, "step": 7542 }, { - "epoch": 0.43, - "grad_norm": 0.6267856143786402, - "learning_rate": 1.2613646061033303e-05, - "loss": 0.3875, + "epoch": 0.35, + "grad_norm": 0.37028677569356977, + "learning_rate": 1.5190753352171485e-05, + "loss": 0.2739, "step": 7543 }, { - "epoch": 0.43, - "grad_norm": 0.383120587325467, - "learning_rate": 1.261184978823432e-05, - "loss": 0.2875, + "epoch": 0.35, + "grad_norm": 0.30138740496114497, + "learning_rate": 1.51894815277597e-05, + "loss": 0.1564, "step": 7544 }, { - "epoch": 0.43, - "grad_norm": 0.3409458484018371, - "learning_rate": 1.261005342498709e-05, - "loss": 0.277, + "epoch": 0.35, + "grad_norm": 0.42385233806185363, + "learning_rate": 1.5188209588457791e-05, + "loss": 0.3134, "step": 7545 }, { - "epoch": 0.43, - "grad_norm": 0.25057745497441336, - "learning_rate": 1.2608256971353825e-05, - "loss": 0.1904, + "epoch": 0.35, + "grad_norm": 0.5152810910436364, + "learning_rate": 1.5186937534293926e-05, + "loss": 0.2671, "step": 7546 }, { - "epoch": 0.43, - "grad_norm": 0.9892133398630651, - "learning_rate": 1.2606460427396729e-05, - "loss": 0.0705, + "epoch": 0.35, + "grad_norm": 0.43127471855073274, + "learning_rate": 1.518566536529626e-05, + "loss": 0.3013, "step": 7547 }, { - "epoch": 0.43, - "grad_norm": 0.3220890480663572, - "learning_rate": 1.2604663793178025e-05, - "loss": 0.2779, + "epoch": 0.35, + "grad_norm": 0.4227657429301361, + "learning_rate": 1.5184393081492966e-05, + "loss": 0.3489, "step": 7548 }, { - "epoch": 0.43, - "grad_norm": 0.3346067280468399, - "learning_rate": 1.2602867068759921e-05, - "loss": 0.3242, + "epoch": 0.35, + "grad_norm": 0.8779681589153319, + "learning_rate": 1.5183120682912203e-05, + "loss": 0.6061, "step": 7549 }, { - "epoch": 0.43, - "grad_norm": 0.9802497675330882, - "learning_rate": 1.2601070254204645e-05, - "loss": 0.7108, + "epoch": 0.35, + "grad_norm": 0.24829017852142088, + "learning_rate": 1.5181848169582143e-05, + "loss": 0.1925, "step": 7550 }, { - "epoch": 0.43, - "grad_norm": 0.3310026474654711, - "learning_rate": 1.2599273349574417e-05, - "loss": 0.2248, + "epoch": 0.35, + "grad_norm": 0.3374194623370902, + "learning_rate": 1.518057554153096e-05, + "loss": 0.2403, "step": 7551 }, { - "epoch": 0.43, - "grad_norm": 0.4692480509388019, - "learning_rate": 1.2597476354931466e-05, - "loss": 0.3678, + "epoch": 0.35, + "grad_norm": 0.8005939118982769, + "learning_rate": 1.5179302798786827e-05, + "loss": 0.5152, "step": 7552 }, { - "epoch": 0.43, - "grad_norm": 0.26325901123873297, - "learning_rate": 1.2595679270338019e-05, - "loss": 0.2001, + "epoch": 0.35, + "grad_norm": 0.5611860342144768, + "learning_rate": 1.5178029941377924e-05, + "loss": 0.3069, "step": 7553 }, { - "epoch": 0.43, - "grad_norm": 0.3018764077905086, - "learning_rate": 1.2593882095856314e-05, - "loss": 0.2367, + "epoch": 0.35, + "grad_norm": 0.41647710662864074, + "learning_rate": 1.5176756969332428e-05, + "loss": 0.3213, "step": 7554 }, { - "epoch": 0.43, - "grad_norm": 0.6035545300935379, - "learning_rate": 1.259208483154858e-05, - "loss": 0.4614, + "epoch": 0.35, + "grad_norm": 0.38664346700979757, + "learning_rate": 1.5175483882678519e-05, + "loss": 0.3363, "step": 7555 }, { - "epoch": 0.43, - "grad_norm": 0.3817271648715765, - "learning_rate": 1.2590287477477063e-05, - "loss": 0.3635, + "epoch": 0.35, + "grad_norm": 0.17927402266255998, + "learning_rate": 1.5174210681444388e-05, + "loss": 0.0927, "step": 7556 }, { - "epoch": 0.43, - "grad_norm": 0.3606104677787802, - "learning_rate": 1.2588490033703999e-05, - "loss": 0.1954, + "epoch": 0.35, + "grad_norm": 0.40567816147288766, + "learning_rate": 1.5172937365658217e-05, + "loss": 0.2843, "step": 7557 }, { - "epoch": 0.43, - "grad_norm": 0.33357966342409084, - "learning_rate": 1.2586692500291639e-05, - "loss": 0.2763, + "epoch": 0.35, + "grad_norm": 0.5283520845558017, + "learning_rate": 1.51716639353482e-05, + "loss": 0.348, "step": 7558 }, { - "epoch": 0.43, - "grad_norm": 0.4054777299779856, - "learning_rate": 1.2584894877302224e-05, - "loss": 0.3124, + "epoch": 0.35, + "grad_norm": 0.9836608529368737, + "learning_rate": 1.5170390390542529e-05, + "loss": 0.3047, "step": 7559 }, { - "epoch": 0.43, - "grad_norm": 0.2770112585553754, - "learning_rate": 1.2583097164798015e-05, - "loss": 0.2086, + "epoch": 0.35, + "grad_norm": 0.3906558081242866, + "learning_rate": 1.5169116731269395e-05, + "loss": 0.2966, "step": 7560 }, { - "epoch": 0.43, - "grad_norm": 0.33701165254254206, - "learning_rate": 1.2581299362841262e-05, - "loss": 0.3406, + "epoch": 0.35, + "grad_norm": 1.2684109880625636, + "learning_rate": 1.5167842957557004e-05, + "loss": 0.7875, "step": 7561 }, { - "epoch": 0.43, - "grad_norm": 1.5593734746120151, - "learning_rate": 1.2579501471494222e-05, - "loss": 0.7475, + "epoch": 0.35, + "grad_norm": 0.31159977117813276, + "learning_rate": 1.5166569069433545e-05, + "loss": 0.2429, "step": 7562 }, { - "epoch": 0.43, - "grad_norm": 0.6377691899908655, - "learning_rate": 1.2577703490819157e-05, - "loss": 0.3114, + "epoch": 0.35, + "grad_norm": 0.2812627631005785, + "learning_rate": 1.5165295066927231e-05, + "loss": 0.1775, "step": 7563 }, { - "epoch": 0.43, - "grad_norm": 0.27445686268636504, - "learning_rate": 1.2575905420878332e-05, - "loss": 0.2575, + "epoch": 0.35, + "grad_norm": 1.0080329600784457, + "learning_rate": 1.5164020950066259e-05, + "loss": 0.5499, "step": 7564 }, { - "epoch": 0.43, - "grad_norm": 0.26622696284658587, - "learning_rate": 1.2574107261734012e-05, - "loss": 0.2002, + "epoch": 0.35, + "grad_norm": 0.9775537408268301, + "learning_rate": 1.5162746718878843e-05, + "loss": 0.4402, "step": 7565 }, { - "epoch": 0.43, - "grad_norm": 0.37561943751948673, - "learning_rate": 1.2572309013448467e-05, - "loss": 0.2893, + "epoch": 0.35, + "grad_norm": 0.34102108756192173, + "learning_rate": 1.5161472373393186e-05, + "loss": 0.2269, "step": 7566 }, { - "epoch": 0.43, - "grad_norm": 0.4144161359543641, - "learning_rate": 1.2570510676083974e-05, - "loss": 0.2954, + "epoch": 0.35, + "grad_norm": 0.42193899661358886, + "learning_rate": 1.516019791363751e-05, + "loss": 0.3601, "step": 7567 }, { - "epoch": 0.43, - "grad_norm": 0.36156343975898747, - "learning_rate": 1.2568712249702806e-05, - "loss": 0.3277, + "epoch": 0.35, + "grad_norm": 0.3621361342381857, + "learning_rate": 1.5158923339640026e-05, + "loss": 0.1865, "step": 7568 }, { - "epoch": 0.43, - "grad_norm": 0.41364106807789164, - "learning_rate": 1.2566913734367244e-05, - "loss": 0.2982, + "epoch": 0.35, + "grad_norm": 0.4256351463922086, + "learning_rate": 1.5157648651428948e-05, + "loss": 0.2196, "step": 7569 }, { - "epoch": 0.43, - "grad_norm": 0.4642819832894536, - "learning_rate": 1.2565115130139565e-05, - "loss": 0.2253, + "epoch": 0.35, + "grad_norm": 0.7870718793752617, + "learning_rate": 1.5156373849032501e-05, + "loss": 0.3496, "step": 7570 }, { - "epoch": 0.43, - "grad_norm": 0.5140464591085009, - "learning_rate": 1.2563316437082065e-05, - "loss": 0.4326, + "epoch": 0.35, + "grad_norm": 1.3327880687335067, + "learning_rate": 1.5155098932478906e-05, + "loss": 0.5292, "step": 7571 }, { - "epoch": 0.44, - "grad_norm": 0.26369296742189074, - "learning_rate": 1.2561517655257025e-05, - "loss": 0.2555, + "epoch": 0.35, + "grad_norm": 0.3405054545020583, + "learning_rate": 1.5153823901796395e-05, + "loss": 0.2106, "step": 7572 }, { - "epoch": 0.44, - "grad_norm": 0.25556862333416436, - "learning_rate": 1.255971878472674e-05, - "loss": 0.1509, + "epoch": 0.35, + "grad_norm": 1.0741856026735557, + "learning_rate": 1.5152548757013183e-05, + "loss": 0.7554, "step": 7573 }, { - "epoch": 0.44, - "grad_norm": 1.0276481868351715, - "learning_rate": 1.2557919825553502e-05, - "loss": 0.6818, + "epoch": 0.35, + "grad_norm": 0.4320639637568891, + "learning_rate": 1.5151273498157513e-05, + "loss": 0.3321, "step": 7574 }, { - "epoch": 0.44, - "grad_norm": 0.6329669628544323, - "learning_rate": 1.255612077779961e-05, - "loss": 0.3306, + "epoch": 0.35, + "grad_norm": 0.32477358409682133, + "learning_rate": 1.5149998125257608e-05, + "loss": 0.2305, "step": 7575 }, { - "epoch": 0.44, - "grad_norm": 0.32794636640693425, - "learning_rate": 1.2554321641527367e-05, - "loss": 0.2998, + "epoch": 0.35, + "grad_norm": 0.8171054108796164, + "learning_rate": 1.5148722638341711e-05, + "loss": 0.2892, "step": 7576 }, { - "epoch": 0.44, - "grad_norm": 0.5110473328508144, - "learning_rate": 1.2552522416799073e-05, - "loss": 0.3151, + "epoch": 0.35, + "grad_norm": 1.6467837539717225, + "learning_rate": 1.5147447037438055e-05, + "loss": 0.8094, "step": 7577 }, { - "epoch": 0.44, - "grad_norm": 0.41058569117740423, - "learning_rate": 1.2550723103677039e-05, - "loss": 0.2154, + "epoch": 0.35, + "grad_norm": 0.41701858396939506, + "learning_rate": 1.5146171322574885e-05, + "loss": 0.2826, "step": 7578 }, { - "epoch": 0.44, - "grad_norm": 0.32708584999844575, - "learning_rate": 1.2548923702223574e-05, - "loss": 0.251, + "epoch": 0.35, + "grad_norm": 0.5081375639789141, + "learning_rate": 1.5144895493780441e-05, + "loss": 0.2605, "step": 7579 }, { - "epoch": 0.44, - "grad_norm": 0.3670176202785912, - "learning_rate": 1.254712421250099e-05, - "loss": 0.2829, + "epoch": 0.35, + "grad_norm": 0.8876155800421572, + "learning_rate": 1.5143619551082972e-05, + "loss": 0.5119, "step": 7580 }, { - "epoch": 0.44, - "grad_norm": 0.5745557867723556, - "learning_rate": 1.2545324634571604e-05, - "loss": 0.3194, + "epoch": 0.35, + "grad_norm": 0.3537848449426154, + "learning_rate": 1.5142343494510718e-05, + "loss": 0.2866, "step": 7581 }, { - "epoch": 0.44, - "grad_norm": 0.40854419253185176, - "learning_rate": 1.2543524968497738e-05, - "loss": 0.3315, + "epoch": 0.35, + "grad_norm": 0.42436352312052644, + "learning_rate": 1.5141067324091939e-05, + "loss": 0.2973, "step": 7582 }, { - "epoch": 0.44, - "grad_norm": 0.6312594387495574, - "learning_rate": 1.2541725214341708e-05, - "loss": 0.3503, + "epoch": 0.35, + "grad_norm": 0.30803405189709004, + "learning_rate": 1.5139791039854883e-05, + "loss": 0.2091, "step": 7583 }, { - "epoch": 0.44, - "grad_norm": 0.3346389065280756, - "learning_rate": 1.2539925372165847e-05, - "loss": 0.2932, + "epoch": 0.35, + "grad_norm": 0.39497355367382236, + "learning_rate": 1.5138514641827809e-05, + "loss": 0.2356, "step": 7584 }, { - "epoch": 0.44, - "grad_norm": 0.23633536868367974, - "learning_rate": 1.2538125442032474e-05, - "loss": 0.2035, + "epoch": 0.35, + "grad_norm": 1.3422919264751791, + "learning_rate": 1.5137238130038973e-05, + "loss": 0.4163, "step": 7585 }, { - "epoch": 0.44, - "grad_norm": 0.9129265657545105, - "learning_rate": 1.2536325424003932e-05, - "loss": 0.5529, + "epoch": 0.35, + "grad_norm": 0.444213110946341, + "learning_rate": 1.5135961504516634e-05, + "loss": 0.326, "step": 7586 }, { - "epoch": 0.44, - "grad_norm": 0.38335161706444687, - "learning_rate": 1.2534525318142546e-05, - "loss": 0.3163, + "epoch": 0.35, + "grad_norm": 0.3936246885820805, + "learning_rate": 1.5134684765289059e-05, + "loss": 0.2807, "step": 7587 }, { - "epoch": 0.44, - "grad_norm": 0.37815106189321185, - "learning_rate": 1.2532725124510658e-05, - "loss": 0.3199, + "epoch": 0.35, + "grad_norm": 0.5301838391452397, + "learning_rate": 1.5133407912384513e-05, + "loss": 0.3674, "step": 7588 }, { - "epoch": 0.44, - "grad_norm": 0.45945715714105756, - "learning_rate": 1.2530924843170609e-05, - "loss": 0.3619, + "epoch": 0.35, + "grad_norm": 0.326394672139095, + "learning_rate": 1.5132130945831262e-05, + "loss": 0.2131, "step": 7589 }, { - "epoch": 0.44, - "grad_norm": 0.25719376284057816, - "learning_rate": 1.2529124474184742e-05, - "loss": 0.154, + "epoch": 0.35, + "grad_norm": 0.3666651160668729, + "learning_rate": 1.513085386565758e-05, + "loss": 0.2636, "step": 7590 }, { - "epoch": 0.44, - "grad_norm": 0.352646974082044, - "learning_rate": 1.25273240176154e-05, - "loss": 0.2434, + "epoch": 0.35, + "grad_norm": 0.6096838271605424, + "learning_rate": 1.5129576671891737e-05, + "loss": 0.4235, "step": 7591 }, { - "epoch": 0.44, - "grad_norm": 0.40108702245255196, - "learning_rate": 1.2525523473524938e-05, - "loss": 0.3153, + "epoch": 0.35, + "grad_norm": 1.0194321285021413, + "learning_rate": 1.512829936456201e-05, + "loss": 0.3706, "step": 7592 }, { - "epoch": 0.44, - "grad_norm": 0.5606921794556875, - "learning_rate": 1.2523722841975707e-05, - "loss": 0.153, + "epoch": 0.35, + "grad_norm": 0.3962853125563876, + "learning_rate": 1.5127021943696678e-05, + "loss": 0.3147, "step": 7593 }, { - "epoch": 0.44, - "grad_norm": 0.35881739246190164, - "learning_rate": 1.2521922123030061e-05, - "loss": 0.3164, + "epoch": 0.35, + "grad_norm": 0.40398912277396476, + "learning_rate": 1.5125744409324022e-05, + "loss": 0.3557, "step": 7594 }, { - "epoch": 0.44, - "grad_norm": 0.4775366714403366, - "learning_rate": 1.252012131675036e-05, - "loss": 0.3781, + "epoch": 0.35, + "grad_norm": 0.1944773788288787, + "learning_rate": 1.5124466761472326e-05, + "loss": 0.1061, "step": 7595 }, { - "epoch": 0.44, - "grad_norm": 0.3417720512306077, - "learning_rate": 1.2518320423198968e-05, - "loss": 0.2006, + "epoch": 0.35, + "grad_norm": 0.3481602901099738, + "learning_rate": 1.5123189000169874e-05, + "loss": 0.2643, "step": 7596 }, { - "epoch": 0.44, - "grad_norm": 0.2217803683706333, - "learning_rate": 1.251651944243825e-05, - "loss": 0.207, + "epoch": 0.35, + "grad_norm": 1.1299204314982574, + "learning_rate": 1.5121911125444956e-05, + "loss": 0.6227, "step": 7597 }, { - "epoch": 0.44, - "grad_norm": 0.9289910609233664, - "learning_rate": 1.2514718374530568e-05, - "loss": 0.6753, + "epoch": 0.35, + "grad_norm": 0.6183005053027494, + "learning_rate": 1.5120633137325861e-05, + "loss": 0.3213, "step": 7598 }, { - "epoch": 0.44, - "grad_norm": 0.5061395116029092, - "learning_rate": 1.2512917219538297e-05, - "loss": 0.2249, + "epoch": 0.35, + "grad_norm": 0.40217222536614167, + "learning_rate": 1.5119355035840884e-05, + "loss": 0.2745, "step": 7599 }, { - "epoch": 0.44, - "grad_norm": 0.3169813552029995, - "learning_rate": 1.2511115977523813e-05, - "loss": 0.3035, + "epoch": 0.35, + "grad_norm": 1.211868304264924, + "learning_rate": 1.5118076821018322e-05, + "loss": 0.5877, "step": 7600 }, { - "epoch": 0.44, - "grad_norm": 0.7382839722814659, - "learning_rate": 1.2509314648549491e-05, - "loss": 0.5035, + "epoch": 0.35, + "grad_norm": 0.32740997237068087, + "learning_rate": 1.5116798492886472e-05, + "loss": 0.2256, "step": 7601 }, { - "epoch": 0.44, - "grad_norm": 0.7547998463825323, - "learning_rate": 1.2507513232677707e-05, - "loss": 0.397, + "epoch": 0.35, + "grad_norm": 0.3351996380454803, + "learning_rate": 1.5115520051473636e-05, + "loss": 0.2276, "step": 7602 }, { - "epoch": 0.44, - "grad_norm": 0.2598908072442766, - "learning_rate": 1.250571172997085e-05, - "loss": 0.1986, + "epoch": 0.35, + "grad_norm": 0.5361811550270056, + "learning_rate": 1.511424149680811e-05, + "loss": 0.385, "step": 7603 }, { - "epoch": 0.44, - "grad_norm": 0.40470480560147853, - "learning_rate": 1.2503910140491305e-05, - "loss": 0.3349, + "epoch": 0.35, + "grad_norm": 0.7855661597264844, + "learning_rate": 1.5112962828918214e-05, + "loss": 0.4592, "step": 7604 }, { - "epoch": 0.44, - "grad_norm": 0.5543636608248991, - "learning_rate": 1.2502108464301456e-05, - "loss": 0.3531, + "epoch": 0.35, + "grad_norm": 0.34497535939865176, + "learning_rate": 1.5111684047832245e-05, + "loss": 0.1994, "step": 7605 }, { - "epoch": 0.44, - "grad_norm": 0.4062889895351509, - "learning_rate": 1.2500306701463702e-05, - "loss": 0.2834, + "epoch": 0.35, + "grad_norm": 0.3457506564380581, + "learning_rate": 1.5110405153578517e-05, + "loss": 0.3025, "step": 7606 }, { - "epoch": 0.44, - "grad_norm": 0.3447667781654027, - "learning_rate": 1.2498504852040433e-05, - "loss": 0.2808, + "epoch": 0.35, + "grad_norm": 0.4824846860815226, + "learning_rate": 1.5109126146185347e-05, + "loss": 0.3205, "step": 7607 }, { - "epoch": 0.44, - "grad_norm": 0.363996062685284, - "learning_rate": 1.2496702916094048e-05, - "loss": 0.2969, + "epoch": 0.35, + "grad_norm": 0.3200978072050853, + "learning_rate": 1.5107847025681048e-05, + "loss": 0.193, "step": 7608 }, { - "epoch": 0.44, - "grad_norm": 0.26788663410836405, - "learning_rate": 1.2494900893686949e-05, - "loss": 0.1817, + "epoch": 0.35, + "grad_norm": 0.5544670726121207, + "learning_rate": 1.5106567792093938e-05, + "loss": 0.3305, "step": 7609 }, { - "epoch": 0.44, - "grad_norm": 0.9228599503181674, - "learning_rate": 1.2493098784881539e-05, - "loss": 0.5611, + "epoch": 0.35, + "grad_norm": 0.42115896993822066, + "learning_rate": 1.510528844545234e-05, + "loss": 0.3451, "step": 7610 }, { - "epoch": 0.44, - "grad_norm": 0.3804507345579722, - "learning_rate": 1.2491296589740224e-05, - "loss": 0.3399, + "epoch": 0.35, + "grad_norm": 0.3147934078938412, + "learning_rate": 1.5104008985784572e-05, + "loss": 0.2052, "step": 7611 }, { - "epoch": 0.44, - "grad_norm": 0.2948051354691912, - "learning_rate": 1.2489494308325415e-05, - "loss": 0.2742, + "epoch": 0.35, + "grad_norm": 1.0450622854914953, + "learning_rate": 1.5102729413118971e-05, + "loss": 0.5182, "step": 7612 }, { - "epoch": 0.44, - "grad_norm": 0.3363350955735608, - "learning_rate": 1.2487691940699529e-05, - "loss": 0.2011, + "epoch": 0.35, + "grad_norm": 1.2360645630668692, + "learning_rate": 1.5101449727483855e-05, + "loss": 0.8049, "step": 7613 }, { - "epoch": 0.44, - "grad_norm": 1.1140463461241292, - "learning_rate": 1.2485889486924974e-05, - "loss": 0.6561, + "epoch": 0.35, + "grad_norm": 0.30681133462817733, + "learning_rate": 1.5100169928907562e-05, + "loss": 0.2734, "step": 7614 }, { - "epoch": 0.44, - "grad_norm": 0.3030326221824844, - "learning_rate": 1.2484086947064175e-05, - "loss": 0.2679, + "epoch": 0.35, + "grad_norm": 0.3237638544247339, + "learning_rate": 1.5098890017418419e-05, + "loss": 0.2182, "step": 7615 }, { - "epoch": 0.44, - "grad_norm": 0.36992495041043094, - "learning_rate": 1.2482284321179552e-05, - "loss": 0.2854, + "epoch": 0.35, + "grad_norm": 0.4749421238351701, + "learning_rate": 1.5097609993044767e-05, + "loss": 0.3092, "step": 7616 }, { - "epoch": 0.44, - "grad_norm": 0.7049813104276725, - "learning_rate": 1.248048160933353e-05, - "loss": 0.3893, + "epoch": 0.35, + "grad_norm": 0.4131216903242655, + "learning_rate": 1.5096329855814942e-05, + "loss": 0.2847, "step": 7617 }, { - "epoch": 0.44, - "grad_norm": 0.2756630402796699, - "learning_rate": 1.2478678811588535e-05, - "loss": 0.2303, + "epoch": 0.35, + "grad_norm": 0.3839366551721288, + "learning_rate": 1.5095049605757285e-05, + "loss": 0.2527, "step": 7618 }, { - "epoch": 0.44, - "grad_norm": 0.26760176350237963, - "learning_rate": 1.2476875928006999e-05, - "loss": 0.1611, + "epoch": 0.35, + "grad_norm": 1.0316988508153542, + "learning_rate": 1.5093769242900145e-05, + "loss": 0.6807, "step": 7619 }, { - "epoch": 0.44, - "grad_norm": 0.41829783407336724, - "learning_rate": 1.2475072958651358e-05, - "loss": 0.3304, + "epoch": 0.35, + "grad_norm": 0.42735204352842515, + "learning_rate": 1.5092488767271858e-05, + "loss": 0.2907, "step": 7620 }, { - "epoch": 0.44, - "grad_norm": 0.35645273997227866, - "learning_rate": 1.2473269903584046e-05, - "loss": 0.2964, + "epoch": 0.35, + "grad_norm": 0.4392654067677224, + "learning_rate": 1.509120817890078e-05, + "loss": 0.3263, "step": 7621 }, { - "epoch": 0.44, - "grad_norm": 0.8604531292814376, - "learning_rate": 1.2471466762867506e-05, - "loss": 0.3573, + "epoch": 0.35, + "grad_norm": 0.2729869543529425, + "learning_rate": 1.5089927477815258e-05, + "loss": 0.2209, "step": 7622 }, { - "epoch": 0.44, - "grad_norm": 0.3153244327743291, - "learning_rate": 1.2469663536564177e-05, - "loss": 0.3042, + "epoch": 0.35, + "grad_norm": 0.8064194296670322, + "learning_rate": 1.5088646664043652e-05, + "loss": 0.3873, "step": 7623 }, { - "epoch": 0.44, - "grad_norm": 0.3374073034408949, - "learning_rate": 1.2467860224736501e-05, - "loss": 0.2689, + "epoch": 0.35, + "grad_norm": 0.44523674137514896, + "learning_rate": 1.5087365737614308e-05, + "loss": 0.3043, "step": 7624 }, { - "epoch": 0.44, - "grad_norm": 0.2715934504776164, - "learning_rate": 1.2466056827446937e-05, - "loss": 0.1519, + "epoch": 0.35, + "grad_norm": 0.38740331821457247, + "learning_rate": 1.5086084698555594e-05, + "loss": 0.2754, "step": 7625 }, { - "epoch": 0.44, - "grad_norm": 0.4920404791571963, - "learning_rate": 1.2464253344757929e-05, - "loss": 0.1767, + "epoch": 0.35, + "grad_norm": 0.4375651543201528, + "learning_rate": 1.5084803546895863e-05, + "loss": 0.2616, "step": 7626 }, { - "epoch": 0.44, - "grad_norm": 0.40765441951874637, - "learning_rate": 1.2462449776731935e-05, - "loss": 0.3151, + "epoch": 0.35, + "grad_norm": 0.6177360691352405, + "learning_rate": 1.5083522282663486e-05, + "loss": 0.3665, "step": 7627 }, { - "epoch": 0.44, - "grad_norm": 0.4929507662219086, - "learning_rate": 1.246064612343141e-05, - "loss": 0.3614, + "epoch": 0.35, + "grad_norm": 0.23558660247991184, + "learning_rate": 1.5082240905886825e-05, + "loss": 0.1082, "step": 7628 }, { - "epoch": 0.44, - "grad_norm": 0.8465290944998883, - "learning_rate": 1.2458842384918815e-05, - "loss": 0.3429, + "epoch": 0.35, + "grad_norm": 0.422250524836257, + "learning_rate": 1.5080959416594246e-05, + "loss": 0.2757, "step": 7629 }, { - "epoch": 0.44, - "grad_norm": 0.37771971215198696, - "learning_rate": 1.2457038561256616e-05, - "loss": 0.2933, + "epoch": 0.35, + "grad_norm": 0.3274222668178951, + "learning_rate": 1.5079677814814124e-05, + "loss": 0.317, "step": 7630 }, { - "epoch": 0.44, - "grad_norm": 0.25817417503857, - "learning_rate": 1.2455234652507276e-05, - "loss": 0.2179, + "epoch": 0.35, + "grad_norm": 0.9961536923718076, + "learning_rate": 1.507839610057483e-05, + "loss": 0.491, "step": 7631 }, { - "epoch": 0.44, - "grad_norm": 1.003852559033286, - "learning_rate": 1.2453430658733265e-05, - "loss": 0.2929, + "epoch": 0.35, + "grad_norm": 0.43286476213631525, + "learning_rate": 1.5077114273904743e-05, + "loss": 0.3061, "step": 7632 }, { - "epoch": 0.44, - "grad_norm": 0.33167390172321803, - "learning_rate": 1.2451626579997056e-05, - "loss": 0.2602, + "epoch": 0.35, + "grad_norm": 0.616657586769195, + "learning_rate": 1.5075832334832239e-05, + "loss": 0.3839, "step": 7633 }, { - "epoch": 0.44, - "grad_norm": 0.8273407599930446, - "learning_rate": 1.2449822416361123e-05, - "loss": 0.4349, + "epoch": 0.35, + "grad_norm": 0.2599163145138823, + "learning_rate": 1.5074550283385699e-05, + "loss": 0.1791, "step": 7634 }, { - "epoch": 0.44, - "grad_norm": 0.3353811089623452, - "learning_rate": 1.2448018167887947e-05, - "loss": 0.264, + "epoch": 0.35, + "grad_norm": 0.4176350237068114, + "learning_rate": 1.5073268119593504e-05, + "loss": 0.2739, "step": 7635 }, { - "epoch": 0.44, - "grad_norm": 0.34648324200161335, - "learning_rate": 1.2446213834640007e-05, - "loss": 0.2492, + "epoch": 0.35, + "grad_norm": 0.6147367108104824, + "learning_rate": 1.5071985843484047e-05, + "loss": 0.3929, "step": 7636 }, { - "epoch": 0.44, - "grad_norm": 0.30946096151194946, - "learning_rate": 1.2444409416679786e-05, - "loss": 0.2142, + "epoch": 0.35, + "grad_norm": 0.48234697499563384, + "learning_rate": 1.507070345508571e-05, + "loss": 0.3586, "step": 7637 }, { - "epoch": 0.44, - "grad_norm": 0.813049180954248, - "learning_rate": 1.2442604914069773e-05, - "loss": 0.4697, + "epoch": 0.35, + "grad_norm": 0.2850951220253969, + "learning_rate": 1.5069420954426886e-05, + "loss": 0.222, "step": 7638 }, { - "epoch": 0.44, - "grad_norm": 0.3757559066901632, - "learning_rate": 1.2440800326872457e-05, - "loss": 0.2301, + "epoch": 0.35, + "grad_norm": 0.5010867255937782, + "learning_rate": 1.5068138341535964e-05, + "loss": 0.4048, "step": 7639 }, { - "epoch": 0.44, - "grad_norm": 0.4310357553833105, - "learning_rate": 1.2438995655150332e-05, - "loss": 0.3578, + "epoch": 0.35, + "grad_norm": 0.30703609605610654, + "learning_rate": 1.506685561644135e-05, + "loss": 0.2229, "step": 7640 }, { - "epoch": 0.44, - "grad_norm": 1.0897373835543203, - "learning_rate": 1.243719089896589e-05, - "loss": 0.7668, + "epoch": 0.35, + "grad_norm": 0.32916150211071815, + "learning_rate": 1.506557277917143e-05, + "loss": 0.074, "step": 7641 }, { - "epoch": 0.44, - "grad_norm": 0.2947300212873115, - "learning_rate": 1.2435386058381634e-05, - "loss": 0.2155, + "epoch": 0.35, + "grad_norm": 0.34304553881879934, + "learning_rate": 1.5064289829754618e-05, + "loss": 0.2958, "step": 7642 }, { - "epoch": 0.44, - "grad_norm": 0.2607968228356934, - "learning_rate": 1.243358113346006e-05, - "loss": 0.2048, + "epoch": 0.35, + "grad_norm": 0.6518775278263368, + "learning_rate": 1.5063006768219306e-05, + "loss": 0.4409, "step": 7643 }, { - "epoch": 0.44, - "grad_norm": 0.444883989342041, - "learning_rate": 1.243177612426368e-05, - "loss": 0.368, + "epoch": 0.35, + "grad_norm": 0.5026966366970531, + "learning_rate": 1.5061723594593903e-05, + "loss": 0.2161, "step": 7644 }, { - "epoch": 0.44, - "grad_norm": 0.3069080743699087, - "learning_rate": 1.2429971030854993e-05, - "loss": 0.1943, + "epoch": 0.35, + "grad_norm": 0.37209818630973324, + "learning_rate": 1.506044030890682e-05, + "loss": 0.3005, "step": 7645 }, { - "epoch": 0.44, - "grad_norm": 0.8069208624859093, - "learning_rate": 1.2428165853296517e-05, - "loss": 0.4731, + "epoch": 0.35, + "grad_norm": 0.5776690749178832, + "learning_rate": 1.5059156911186465e-05, + "loss": 0.4037, "step": 7646 }, { - "epoch": 0.44, - "grad_norm": 0.3425899314113828, - "learning_rate": 1.2426360591650761e-05, - "loss": 0.2992, + "epoch": 0.35, + "grad_norm": 0.2043797321336634, + "learning_rate": 1.5057873401461253e-05, + "loss": 0.1303, "step": 7647 }, { - "epoch": 0.44, - "grad_norm": 0.3365953563747308, - "learning_rate": 1.2424555245980241e-05, - "loss": 0.2127, + "epoch": 0.35, + "grad_norm": 0.6148729492844035, + "learning_rate": 1.5056589779759599e-05, + "loss": 0.3955, "step": 7648 }, { - "epoch": 0.44, - "grad_norm": 0.281929789738832, - "learning_rate": 1.2422749816347479e-05, - "loss": 0.1837, + "epoch": 0.35, + "grad_norm": 0.4793994118030412, + "learning_rate": 1.5055306046109922e-05, + "loss": 0.3697, "step": 7649 }, { - "epoch": 0.44, - "grad_norm": 0.8104273831878697, - "learning_rate": 1.2420944302814992e-05, - "loss": 0.4816, + "epoch": 0.35, + "grad_norm": 0.4287614043512806, + "learning_rate": 1.5054022200540636e-05, + "loss": 0.3156, "step": 7650 }, { - "epoch": 0.44, - "grad_norm": 0.34569478851091506, - "learning_rate": 1.2419138705445314e-05, - "loss": 0.2962, + "epoch": 0.35, + "grad_norm": 0.39242122953709163, + "learning_rate": 1.5052738243080173e-05, + "loss": 0.2716, "step": 7651 }, { - "epoch": 0.44, - "grad_norm": 0.3634484166137741, - "learning_rate": 1.241733302430096e-05, - "loss": 0.2862, + "epoch": 0.35, + "grad_norm": 0.42138512885165935, + "learning_rate": 1.5051454173756956e-05, + "loss": 0.2941, "step": 7652 }, { - "epoch": 0.44, - "grad_norm": 1.0261468850569297, - "learning_rate": 1.2415527259444471e-05, - "loss": 0.6782, + "epoch": 0.35, + "grad_norm": 0.48147549615505125, + "learning_rate": 1.5050169992599412e-05, + "loss": 0.231, "step": 7653 }, { - "epoch": 0.44, - "grad_norm": 0.3215611242030219, - "learning_rate": 1.2413721410938373e-05, - "loss": 0.2798, + "epoch": 0.35, + "grad_norm": 0.3392201113875556, + "learning_rate": 1.5048885699635972e-05, + "loss": 0.2527, "step": 7654 }, { - "epoch": 0.44, - "grad_norm": 0.2449067046131677, - "learning_rate": 1.2411915478845211e-05, - "loss": 0.1634, + "epoch": 0.35, + "grad_norm": 0.7620454601538579, + "learning_rate": 1.5047601294895069e-05, + "loss": 0.4223, "step": 7655 }, { - "epoch": 0.44, - "grad_norm": 0.4588366813097026, - "learning_rate": 1.2410109463227519e-05, - "loss": 0.3376, + "epoch": 0.35, + "grad_norm": 0.6455478219550163, + "learning_rate": 1.5046316778405137e-05, + "loss": 0.4275, "step": 7656 }, { - "epoch": 0.44, - "grad_norm": 0.3169675064491585, - "learning_rate": 1.240830336414784e-05, - "loss": 0.3055, + "epoch": 0.35, + "grad_norm": 0.4306450400178657, + "learning_rate": 1.5045032150194617e-05, + "loss": 0.2519, "step": 7657 }, { - "epoch": 0.44, - "grad_norm": 0.8641165712233401, - "learning_rate": 1.2406497181668717e-05, - "loss": 0.4004, + "epoch": 0.35, + "grad_norm": 0.3788328239620782, + "learning_rate": 1.5043747410291945e-05, + "loss": 0.3242, "step": 7658 }, { - "epoch": 0.44, - "grad_norm": 0.34837061522076307, - "learning_rate": 1.2404690915852701e-05, - "loss": 0.3421, + "epoch": 0.35, + "grad_norm": 0.2880517418249159, + "learning_rate": 1.5042462558725568e-05, + "loss": 0.1721, "step": 7659 }, { - "epoch": 0.44, - "grad_norm": 0.3212739319107255, - "learning_rate": 1.2402884566762341e-05, - "loss": 0.2886, + "epoch": 0.35, + "grad_norm": 0.40647069405350517, + "learning_rate": 1.504117759552393e-05, + "loss": 0.2661, "step": 7660 }, { - "epoch": 0.44, - "grad_norm": 0.2776234930677816, - "learning_rate": 1.2401078134460194e-05, - "loss": 0.1348, + "epoch": 0.35, + "grad_norm": 0.37664810413778893, + "learning_rate": 1.503989252071548e-05, + "loss": 0.3385, "step": 7661 }, { - "epoch": 0.44, - "grad_norm": 0.41273755963460756, - "learning_rate": 1.2399271619008812e-05, - "loss": 0.3599, + "epoch": 0.35, + "grad_norm": 0.7359037639572519, + "learning_rate": 1.5038607334328666e-05, + "loss": 0.4013, "step": 7662 }, { - "epoch": 0.44, - "grad_norm": 0.42377632401126736, - "learning_rate": 1.2397465020470757e-05, - "loss": 0.2785, + "epoch": 0.35, + "grad_norm": 0.4041293370839083, + "learning_rate": 1.503732203639194e-05, + "loss": 0.2908, "step": 7663 }, { - "epoch": 0.44, - "grad_norm": 0.3625811131545241, - "learning_rate": 1.2395658338908594e-05, - "loss": 0.3151, + "epoch": 0.35, + "grad_norm": 0.7867829645627987, + "learning_rate": 1.5036036626933763e-05, + "loss": 0.2581, "step": 7664 }, { - "epoch": 0.44, - "grad_norm": 0.7998829300948974, - "learning_rate": 1.2393851574384886e-05, - "loss": 0.4532, + "epoch": 0.35, + "grad_norm": 0.25621004984259216, + "learning_rate": 1.5034751105982585e-05, + "loss": 0.2155, "step": 7665 }, { - "epoch": 0.44, - "grad_norm": 0.40151280132662404, - "learning_rate": 1.23920447269622e-05, - "loss": 0.2937, + "epoch": 0.35, + "grad_norm": 0.4355670620186746, + "learning_rate": 1.5033465473566873e-05, + "loss": 0.3089, "step": 7666 }, { - "epoch": 0.44, - "grad_norm": 0.3260653072198325, - "learning_rate": 1.2390237796703107e-05, - "loss": 0.3084, + "epoch": 0.35, + "grad_norm": 0.6308414772533807, + "learning_rate": 1.5032179729715087e-05, + "loss": 0.3388, "step": 7667 }, { - "epoch": 0.44, - "grad_norm": 0.5939761261701787, - "learning_rate": 1.238843078367018e-05, - "loss": 0.3467, + "epoch": 0.35, + "grad_norm": 0.6213229663823759, + "learning_rate": 1.5030893874455688e-05, + "loss": 0.3687, "step": 7668 }, { - "epoch": 0.44, - "grad_norm": 0.2672933275218854, - "learning_rate": 1.2386623687926001e-05, - "loss": 0.2261, + "epoch": 0.35, + "grad_norm": 0.379547874469156, + "learning_rate": 1.502960790781715e-05, + "loss": 0.2898, "step": 7669 }, { - "epoch": 0.44, - "grad_norm": 0.4603784285403028, - "learning_rate": 1.2384816509533145e-05, - "loss": 0.3053, + "epoch": 0.35, + "grad_norm": 0.408334640795198, + "learning_rate": 1.5028321829827942e-05, + "loss": 0.2667, "step": 7670 }, { - "epoch": 0.44, - "grad_norm": 0.32124928699513156, - "learning_rate": 1.2383009248554195e-05, - "loss": 0.245, + "epoch": 0.35, + "grad_norm": 0.40065966203811404, + "learning_rate": 1.5027035640516533e-05, + "loss": 0.2451, "step": 7671 }, { - "epoch": 0.44, - "grad_norm": 0.34815018711320994, - "learning_rate": 1.2381201905051738e-05, - "loss": 0.2903, + "epoch": 0.35, + "grad_norm": 0.4174767569956399, + "learning_rate": 1.5025749339911401e-05, + "loss": 0.336, "step": 7672 }, { - "epoch": 0.44, - "grad_norm": 0.630884248029316, - "learning_rate": 1.2379394479088363e-05, - "loss": 0.4409, + "epoch": 0.35, + "grad_norm": 0.33774688670473024, + "learning_rate": 1.5024462928041021e-05, + "loss": 0.2526, "step": 7673 }, { - "epoch": 0.44, - "grad_norm": 0.37651909244520654, - "learning_rate": 1.2377586970726658e-05, - "loss": 0.2451, + "epoch": 0.35, + "grad_norm": 0.5522583782308245, + "learning_rate": 1.5023176404933875e-05, + "loss": 0.3528, "step": 7674 }, { - "epoch": 0.44, - "grad_norm": 0.282839422926257, - "learning_rate": 1.2375779380029218e-05, - "loss": 0.2584, + "epoch": 0.35, + "grad_norm": 0.45323000335035246, + "learning_rate": 1.5021889770618445e-05, + "loss": 0.3123, "step": 7675 }, { - "epoch": 0.44, - "grad_norm": 0.4063717632353682, - "learning_rate": 1.2373971707058643e-05, - "loss": 0.2808, + "epoch": 0.35, + "grad_norm": 1.0091943037805404, + "learning_rate": 1.5020603025123215e-05, + "loss": 0.627, "step": 7676 }, { - "epoch": 0.44, - "grad_norm": 0.9496003441841634, - "learning_rate": 1.2372163951877525e-05, - "loss": 0.6778, + "epoch": 0.35, + "grad_norm": 0.4204407410105205, + "learning_rate": 1.5019316168476673e-05, + "loss": 0.2827, "step": 7677 }, { - "epoch": 0.44, - "grad_norm": 0.2875371706876149, - "learning_rate": 1.2370356114548476e-05, - "loss": 0.2067, + "epoch": 0.35, + "grad_norm": 0.34255019450096147, + "learning_rate": 1.5018029200707312e-05, + "loss": 0.2953, "step": 7678 }, { - "epoch": 0.44, - "grad_norm": 0.3650829049658475, - "learning_rate": 1.2368548195134094e-05, - "loss": 0.3043, + "epoch": 0.35, + "grad_norm": 0.3134957641459456, + "learning_rate": 1.5016742121843617e-05, + "loss": 0.185, "step": 7679 }, { - "epoch": 0.44, - "grad_norm": 0.7485902503018145, - "learning_rate": 1.2366740193696991e-05, - "loss": 0.4163, + "epoch": 0.35, + "grad_norm": 0.4978238301625696, + "learning_rate": 1.5015454931914088e-05, + "loss": 0.2701, "step": 7680 }, { - "epoch": 0.44, - "grad_norm": 0.23644972909627532, - "learning_rate": 1.2364932110299775e-05, - "loss": 0.1315, + "epoch": 0.35, + "grad_norm": 0.35924586076609377, + "learning_rate": 1.501416763094722e-05, + "loss": 0.2907, "step": 7681 }, { - "epoch": 0.44, - "grad_norm": 0.3908523069948069, - "learning_rate": 1.2363123945005064e-05, - "loss": 0.2984, + "epoch": 0.35, + "grad_norm": 0.4996443820322406, + "learning_rate": 1.5012880218971515e-05, + "loss": 0.336, "step": 7682 }, { - "epoch": 0.44, - "grad_norm": 0.36885901856842995, - "learning_rate": 1.2361315697875469e-05, - "loss": 0.3257, + "epoch": 0.35, + "grad_norm": 0.5616092042389272, + "learning_rate": 1.5011592696015474e-05, + "loss": 0.3253, "step": 7683 }, { - "epoch": 0.44, - "grad_norm": 0.3138719168813281, - "learning_rate": 1.2359507368973618e-05, - "loss": 0.1951, + "epoch": 0.35, + "grad_norm": 0.3920430722015312, + "learning_rate": 1.5010305062107598e-05, + "loss": 0.3268, "step": 7684 }, { - "epoch": 0.44, - "grad_norm": 0.6338918277920743, - "learning_rate": 1.2357698958362123e-05, - "loss": 0.3682, + "epoch": 0.35, + "grad_norm": 0.28130144604997476, + "learning_rate": 1.50090173172764e-05, + "loss": 0.2035, "step": 7685 }, { - "epoch": 0.44, - "grad_norm": 1.075166134031894, - "learning_rate": 1.2355890466103619e-05, - "loss": 0.6696, + "epoch": 0.35, + "grad_norm": 0.4323259333906215, + "learning_rate": 1.5007729461550384e-05, + "loss": 0.27, "step": 7686 }, { - "epoch": 0.44, - "grad_norm": 0.2538342119211804, - "learning_rate": 1.2354081892260728e-05, - "loss": 0.2455, + "epoch": 0.35, + "grad_norm": 0.3746408540182315, + "learning_rate": 1.5006441494958065e-05, + "loss": 0.2834, "step": 7687 }, { - "epoch": 0.44, - "grad_norm": 0.2697614085126694, - "learning_rate": 1.2352273236896082e-05, - "loss": 0.2123, + "epoch": 0.35, + "grad_norm": 0.8267041857808203, + "learning_rate": 1.5005153417527955e-05, + "loss": 0.5503, "step": 7688 }, { - "epoch": 0.44, - "grad_norm": 1.115602045075998, - "learning_rate": 1.2350464500072314e-05, - "loss": 0.5415, + "epoch": 0.35, + "grad_norm": 0.35781716106855976, + "learning_rate": 1.5003865229288576e-05, + "loss": 0.3256, "step": 7689 }, { - "epoch": 0.44, - "grad_norm": 0.40017360263197127, - "learning_rate": 1.2348655681852064e-05, - "loss": 0.299, + "epoch": 0.35, + "grad_norm": 0.35196877169483254, + "learning_rate": 1.5002576930268444e-05, + "loss": 0.2438, "step": 7690 }, { - "epoch": 0.44, - "grad_norm": 0.27748444108314924, - "learning_rate": 1.2346846782297966e-05, - "loss": 0.2675, + "epoch": 0.35, + "grad_norm": 0.29976794823650044, + "learning_rate": 1.5001288520496076e-05, + "loss": 0.187, "step": 7691 }, { - "epoch": 0.44, - "grad_norm": 1.024308978329414, - "learning_rate": 1.2345037801472669e-05, - "loss": 0.5259, + "epoch": 0.35, + "grad_norm": 0.7963967688505865, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5699, "step": 7692 }, { - "epoch": 0.44, - "grad_norm": 0.2658736169881998, - "learning_rate": 1.2343228739438811e-05, - "loss": 0.2082, + "epoch": 0.35, + "grad_norm": 0.3045204299775969, + "learning_rate": 1.4998711368808748e-05, + "loss": 0.2335, "step": 7693 }, { - "epoch": 0.44, - "grad_norm": 0.5155626562375746, - "learning_rate": 1.2341419596259038e-05, - "loss": 0.295, + "epoch": 0.35, + "grad_norm": 0.46379940345193477, + "learning_rate": 1.499742262695084e-05, + "loss": 0.3597, "step": 7694 }, { - "epoch": 0.44, - "grad_norm": 0.26905222497297376, - "learning_rate": 1.2339610371996012e-05, - "loss": 0.2577, + "epoch": 0.35, + "grad_norm": 0.7466706018466931, + "learning_rate": 1.4996133774454813e-05, + "loss": 0.4758, "step": 7695 }, { - "epoch": 0.44, - "grad_norm": 0.3606434958043496, - "learning_rate": 1.2337801066712376e-05, - "loss": 0.3027, + "epoch": 0.35, + "grad_norm": 0.32355291719558715, + "learning_rate": 1.49948448113492e-05, + "loss": 0.2046, "step": 7696 }, { - "epoch": 0.44, - "grad_norm": 0.49890333651717567, - "learning_rate": 1.2335991680470792e-05, - "loss": 0.2738, + "epoch": 0.35, + "grad_norm": 0.24033206281582406, + "learning_rate": 1.4993555737662537e-05, + "loss": 0.2273, "step": 7697 }, { - "epoch": 0.44, - "grad_norm": 0.39467269562385565, - "learning_rate": 1.2334182213333912e-05, - "loss": 0.3395, + "epoch": 0.35, + "grad_norm": 1.4812987952261059, + "learning_rate": 1.4992266553423363e-05, + "loss": 0.7697, "step": 7698 }, { - "epoch": 0.44, - "grad_norm": 0.5731151803650315, - "learning_rate": 1.2332372665364406e-05, - "loss": 0.3394, + "epoch": 0.35, + "grad_norm": 0.31304246170596267, + "learning_rate": 1.4990977258660218e-05, + "loss": 0.2012, "step": 7699 }, { - "epoch": 0.44, - "grad_norm": 0.4065624798269097, - "learning_rate": 1.2330563036624934e-05, - "loss": 0.3214, + "epoch": 0.35, + "grad_norm": 0.8968750469262925, + "learning_rate": 1.4989687853401647e-05, + "loss": 0.44, "step": 7700 }, { - "epoch": 0.44, - "grad_norm": 0.36053450120448083, - "learning_rate": 1.2328753327178164e-05, - "loss": 0.2515, + "epoch": 0.35, + "grad_norm": 0.3889300157166741, + "learning_rate": 1.4988398337676198e-05, + "loss": 0.3494, "step": 7701 }, { - "epoch": 0.44, - "grad_norm": 0.5689075010121288, - "learning_rate": 1.2326943537086766e-05, - "loss": 0.3741, + "epoch": 0.35, + "grad_norm": 0.3392461051254853, + "learning_rate": 1.4987108711512417e-05, + "loss": 0.2476, "step": 7702 }, { - "epoch": 0.44, - "grad_norm": 0.2783654959597047, - "learning_rate": 1.2325133666413414e-05, - "loss": 0.2591, + "epoch": 0.35, + "grad_norm": 0.29171322683745304, + "learning_rate": 1.4985818974938855e-05, + "loss": 0.1125, "step": 7703 }, { - "epoch": 0.44, - "grad_norm": 1.3908455924860992, - "learning_rate": 1.2323323715220783e-05, - "loss": 0.2327, + "epoch": 0.35, + "grad_norm": 0.5450539871834215, + "learning_rate": 1.4984529127984064e-05, + "loss": 0.4419, "step": 7704 }, { - "epoch": 0.44, - "grad_norm": 0.47826736293204375, - "learning_rate": 1.2321513683571553e-05, - "loss": 0.3268, + "epoch": 0.35, + "grad_norm": 0.3283305667759528, + "learning_rate": 1.4983239170676606e-05, + "loss": 0.2857, "step": 7705 }, { - "epoch": 0.44, - "grad_norm": 0.4121896236281999, - "learning_rate": 1.2319703571528403e-05, - "loss": 0.3567, + "epoch": 0.35, + "grad_norm": 0.7781263567606692, + "learning_rate": 1.4981949103045033e-05, + "loss": 0.2515, "step": 7706 }, { - "epoch": 0.44, - "grad_norm": 0.35279691238740696, - "learning_rate": 1.231789337915402e-05, - "loss": 0.2763, + "epoch": 0.35, + "grad_norm": 0.7129518940959256, + "learning_rate": 1.498065892511791e-05, + "loss": 0.4767, "step": 7707 }, { - "epoch": 0.44, - "grad_norm": 0.37557769110490014, - "learning_rate": 1.2316083106511085e-05, - "loss": 0.2277, + "epoch": 0.35, + "grad_norm": 0.4355172629132053, + "learning_rate": 1.4979368636923799e-05, + "loss": 0.2423, "step": 7708 }, { - "epoch": 0.44, - "grad_norm": 0.3360999778532407, - "learning_rate": 1.2314272753662295e-05, - "loss": 0.2438, + "epoch": 0.35, + "grad_norm": 0.406451466054222, + "learning_rate": 1.4978078238491267e-05, + "loss": 0.2506, "step": 7709 }, { - "epoch": 0.44, - "grad_norm": 0.46270153126000013, - "learning_rate": 1.2312462320670335e-05, - "loss": 0.2576, + "epoch": 0.35, + "grad_norm": 0.8510379029456987, + "learning_rate": 1.4976787729848876e-05, + "loss": 0.5211, "step": 7710 }, { - "epoch": 0.44, - "grad_norm": 0.3572732447396155, - "learning_rate": 1.2310651807597909e-05, - "loss": 0.233, + "epoch": 0.35, + "grad_norm": 0.43072614021521766, + "learning_rate": 1.4975497111025205e-05, + "loss": 0.2869, "step": 7711 }, { - "epoch": 0.44, - "grad_norm": 0.4912302945710772, - "learning_rate": 1.2308841214507708e-05, - "loss": 0.3823, + "epoch": 0.35, + "grad_norm": 0.6109401829733558, + "learning_rate": 1.4974206382048821e-05, + "loss": 0.2221, "step": 7712 }, { - "epoch": 0.44, - "grad_norm": 0.5918930747562702, - "learning_rate": 1.2307030541462435e-05, - "loss": 0.4092, + "epoch": 0.35, + "grad_norm": 0.37449759308013053, + "learning_rate": 1.4972915542948307e-05, + "loss": 0.3323, "step": 7713 }, { - "epoch": 0.44, - "grad_norm": 0.27316585495061246, - "learning_rate": 1.2305219788524794e-05, - "loss": 0.224, + "epoch": 0.35, + "grad_norm": 0.36826296259947433, + "learning_rate": 1.497162459375223e-05, + "loss": 0.2771, "step": 7714 }, { - "epoch": 0.44, - "grad_norm": 0.2925029836392248, - "learning_rate": 1.230340895575749e-05, - "loss": 0.248, + "epoch": 0.35, + "grad_norm": 1.000364802798678, + "learning_rate": 1.4970333534489179e-05, + "loss": 0.6142, "step": 7715 }, { - "epoch": 0.44, - "grad_norm": 0.5948366275202696, - "learning_rate": 1.2301598043223233e-05, - "loss": 0.2956, + "epoch": 0.35, + "grad_norm": 0.49316606147010944, + "learning_rate": 1.4969042365187733e-05, + "loss": 0.3156, "step": 7716 }, { - "epoch": 0.44, - "grad_norm": 0.5585403674560185, - "learning_rate": 1.2299787050984736e-05, - "loss": 0.2098, + "epoch": 0.35, + "grad_norm": 0.3433967400706558, + "learning_rate": 1.4967751085876478e-05, + "loss": 0.2627, "step": 7717 }, { - "epoch": 0.44, - "grad_norm": 0.357036950905096, - "learning_rate": 1.2297975979104711e-05, - "loss": 0.3173, + "epoch": 0.35, + "grad_norm": 0.5341254820092465, + "learning_rate": 1.4966459696584003e-05, + "loss": 0.3337, "step": 7718 }, { - "epoch": 0.44, - "grad_norm": 0.3489053320187153, - "learning_rate": 1.2296164827645875e-05, - "loss": 0.3219, + "epoch": 0.35, + "grad_norm": 0.28607003233058537, + "learning_rate": 1.49651681973389e-05, + "loss": 0.1145, "step": 7719 }, { - "epoch": 0.44, - "grad_norm": 0.26655390566547604, - "learning_rate": 1.2294353596670954e-05, - "loss": 0.1146, + "epoch": 0.35, + "grad_norm": 0.3984425535122404, + "learning_rate": 1.4963876588169755e-05, + "loss": 0.2787, "step": 7720 }, { - "epoch": 0.44, - "grad_norm": 0.22260566173399035, - "learning_rate": 1.2292542286242663e-05, - "loss": 0.1856, + "epoch": 0.35, + "grad_norm": 0.42526240417533256, + "learning_rate": 1.4962584869105165e-05, + "loss": 0.3325, "step": 7721 }, { - "epoch": 0.44, - "grad_norm": 0.5210406943968808, - "learning_rate": 1.2290730896423733e-05, - "loss": 0.3649, + "epoch": 0.35, + "grad_norm": 0.764820698383291, + "learning_rate": 1.4961293040173732e-05, + "loss": 0.3994, "step": 7722 }, { - "epoch": 0.44, - "grad_norm": 0.30909289929601924, - "learning_rate": 1.2288919427276889e-05, - "loss": 0.2107, + "epoch": 0.35, + "grad_norm": 0.38728910270694583, + "learning_rate": 1.4960001101404049e-05, + "loss": 0.3046, "step": 7723 }, { - "epoch": 0.44, - "grad_norm": 0.3394742788735173, - "learning_rate": 1.2287107878864868e-05, - "loss": 0.3283, + "epoch": 0.35, + "grad_norm": 1.1857177378591928, + "learning_rate": 1.4958709052824726e-05, + "loss": 0.6226, "step": 7724 }, { - "epoch": 0.44, - "grad_norm": 0.6441934367210518, - "learning_rate": 1.2285296251250396e-05, - "loss": 0.4857, + "epoch": 0.35, + "grad_norm": 0.24510107023633101, + "learning_rate": 1.4957416894464365e-05, + "loss": 0.1817, "step": 7725 }, { - "epoch": 0.44, - "grad_norm": 0.33547352374751016, - "learning_rate": 1.2283484544496214e-05, - "loss": 0.3212, + "epoch": 0.35, + "grad_norm": 0.44042378925609915, + "learning_rate": 1.4956124626351569e-05, + "loss": 0.294, "step": 7726 }, { - "epoch": 0.44, - "grad_norm": 0.22535303095029596, - "learning_rate": 1.2281672758665057e-05, - "loss": 0.1664, + "epoch": 0.35, + "grad_norm": 0.5992472934075173, + "learning_rate": 1.495483224851495e-05, + "loss": 0.3541, "step": 7727 }, { - "epoch": 0.44, - "grad_norm": 1.1198897473384355, - "learning_rate": 1.2279860893819677e-05, - "loss": 0.5423, + "epoch": 0.36, + "grad_norm": 0.4858652438128188, + "learning_rate": 1.4953539760983123e-05, + "loss": 0.328, "step": 7728 }, { - "epoch": 0.44, - "grad_norm": 0.5315956158941079, - "learning_rate": 1.2278048950022807e-05, - "loss": 0.4195, + "epoch": 0.36, + "grad_norm": 0.4153391474138831, + "learning_rate": 1.4952247163784699e-05, + "loss": 0.221, "step": 7729 }, { - "epoch": 0.44, - "grad_norm": 0.3316998991025966, - "learning_rate": 1.2276236927337201e-05, - "loss": 0.2396, + "epoch": 0.36, + "grad_norm": 0.5950909605946109, + "learning_rate": 1.4950954456948294e-05, + "loss": 0.4104, "step": 7730 }, { - "epoch": 0.44, - "grad_norm": 0.3949812567558907, - "learning_rate": 1.2274424825825608e-05, - "loss": 0.3378, + "epoch": 0.36, + "grad_norm": 0.2941639226048363, + "learning_rate": 1.4949661640502534e-05, + "loss": 0.1822, "step": 7731 }, { - "epoch": 0.44, - "grad_norm": 0.5902242463857145, - "learning_rate": 1.2272612645550783e-05, - "loss": 0.4106, + "epoch": 0.36, + "grad_norm": 0.3339222126817955, + "learning_rate": 1.4948368714476031e-05, + "loss": 0.2084, "step": 7732 }, { - "epoch": 0.44, - "grad_norm": 0.27268811226890377, - "learning_rate": 1.2270800386575479e-05, - "loss": 0.182, + "epoch": 0.36, + "grad_norm": 0.4458916348847113, + "learning_rate": 1.4947075678897417e-05, + "loss": 0.3505, "step": 7733 }, { - "epoch": 0.44, - "grad_norm": 0.30230494310639255, - "learning_rate": 1.2268988048962454e-05, - "loss": 0.2506, + "epoch": 0.36, + "grad_norm": 0.827883699899436, + "learning_rate": 1.4945782533795312e-05, + "loss": 0.5298, "step": 7734 }, { - "epoch": 0.44, - "grad_norm": 0.7007259407472975, - "learning_rate": 1.2267175632774472e-05, - "loss": 0.4328, + "epoch": 0.36, + "grad_norm": 0.34221002409591883, + "learning_rate": 1.494448927919835e-05, + "loss": 0.2503, "step": 7735 }, { - "epoch": 0.44, - "grad_norm": 0.3078098222416652, - "learning_rate": 1.2265363138074294e-05, - "loss": 0.2572, + "epoch": 0.36, + "grad_norm": 0.4829806350476349, + "learning_rate": 1.4943195915135164e-05, + "loss": 0.3654, "step": 7736 }, { - "epoch": 0.44, - "grad_norm": 0.7233622730142071, - "learning_rate": 1.226355056492469e-05, - "loss": 0.5583, + "epoch": 0.36, + "grad_norm": 0.2522977925675003, + "learning_rate": 1.4941902441634382e-05, + "loss": 0.2067, "step": 7737 }, { - "epoch": 0.44, - "grad_norm": 0.3706652593809387, - "learning_rate": 1.2261737913388424e-05, - "loss": 0.3142, + "epoch": 0.36, + "grad_norm": 0.3248101548722159, + "learning_rate": 1.494060885872464e-05, + "loss": 0.228, "step": 7738 }, { - "epoch": 0.44, - "grad_norm": 0.3325676696521458, - "learning_rate": 1.2259925183528275e-05, - "loss": 0.2802, + "epoch": 0.36, + "grad_norm": 0.9739493727312746, + "learning_rate": 1.4939315166434587e-05, + "loss": 0.6741, "step": 7739 }, { - "epoch": 0.44, - "grad_norm": 0.285868913478341, - "learning_rate": 1.225811237540701e-05, - "loss": 0.1341, + "epoch": 0.36, + "grad_norm": 0.4600819355638349, + "learning_rate": 1.4938021364792849e-05, + "loss": 0.4051, "step": 7740 }, { - "epoch": 0.44, - "grad_norm": 0.6615062653955808, - "learning_rate": 1.2256299489087416e-05, - "loss": 0.3796, + "epoch": 0.36, + "grad_norm": 0.37952075397481044, + "learning_rate": 1.4936727453828084e-05, + "loss": 0.2897, "step": 7741 }, { - "epoch": 0.44, - "grad_norm": 0.29847265124901856, - "learning_rate": 1.2254486524632263e-05, - "loss": 0.2744, + "epoch": 0.36, + "grad_norm": 0.494547835450643, + "learning_rate": 1.4935433433568928e-05, + "loss": 0.286, "step": 7742 }, { - "epoch": 0.44, - "grad_norm": 0.39855067333182725, - "learning_rate": 1.225267348210434e-05, - "loss": 0.2633, + "epoch": 0.36, + "grad_norm": 0.2468075657762374, + "learning_rate": 1.4934139304044033e-05, + "loss": 0.1567, "step": 7743 }, { - "epoch": 0.44, - "grad_norm": 0.6883013273514436, - "learning_rate": 1.225086036156643e-05, - "loss": 0.475, + "epoch": 0.36, + "grad_norm": 0.41247573255124825, + "learning_rate": 1.4932845065282049e-05, + "loss": 0.2699, "step": 7744 }, { - "epoch": 0.44, - "grad_norm": 0.3010695239911448, - "learning_rate": 1.2249047163081325e-05, - "loss": 0.2446, + "epoch": 0.36, + "grad_norm": 0.3027500351803961, + "learning_rate": 1.4931550717311631e-05, + "loss": 0.2631, "step": 7745 }, { - "epoch": 0.45, - "grad_norm": 0.265481799290773, - "learning_rate": 1.2247233886711811e-05, - "loss": 0.1961, + "epoch": 0.36, + "grad_norm": 0.6105508173776636, + "learning_rate": 1.493025626016143e-05, + "loss": 0.4396, "step": 7746 }, { - "epoch": 0.45, - "grad_norm": 0.6405863195516796, - "learning_rate": 1.2245420532520687e-05, - "loss": 0.3769, + "epoch": 0.36, + "grad_norm": 0.6433401918505083, + "learning_rate": 1.492896169386011e-05, + "loss": 0.3324, "step": 7747 }, { - "epoch": 0.45, - "grad_norm": 0.3782124832205112, - "learning_rate": 1.2243607100570743e-05, - "loss": 0.3033, + "epoch": 0.36, + "grad_norm": 0.3325590133324734, + "learning_rate": 1.4927667018436329e-05, + "loss": 0.2662, "step": 7748 }, { - "epoch": 0.45, - "grad_norm": 0.8096337104484459, - "learning_rate": 1.2241793590924785e-05, - "loss": 0.4681, + "epoch": 0.36, + "grad_norm": 0.2687559377333157, + "learning_rate": 1.4926372233918748e-05, + "loss": 0.2029, "step": 7749 }, { - "epoch": 0.45, - "grad_norm": 0.2962372025335575, - "learning_rate": 1.2239980003645606e-05, - "loss": 0.2728, + "epoch": 0.36, + "grad_norm": 0.7050268010839703, + "learning_rate": 1.4925077340336037e-05, + "loss": 0.3963, "step": 7750 }, { - "epoch": 0.45, - "grad_norm": 0.4048492625045128, - "learning_rate": 1.2238166338796021e-05, - "loss": 0.3013, + "epoch": 0.36, + "grad_norm": 0.46462923554628316, + "learning_rate": 1.4923782337716857e-05, + "loss": 0.3402, "step": 7751 }, { - "epoch": 0.45, - "grad_norm": 0.307624574114885, - "learning_rate": 1.2236352596438832e-05, - "loss": 0.1997, + "epoch": 0.36, + "grad_norm": 0.530941931348958, + "learning_rate": 1.4922487226089881e-05, + "loss": 0.3117, "step": 7752 }, { - "epoch": 0.45, - "grad_norm": 1.1069574333300105, - "learning_rate": 1.2234538776636846e-05, - "loss": 0.3514, + "epoch": 0.36, + "grad_norm": 0.33025233495241746, + "learning_rate": 1.4921192005483783e-05, + "loss": 0.2743, "step": 7753 }, { - "epoch": 0.45, - "grad_norm": 0.31546488061455996, - "learning_rate": 1.223272487945288e-05, - "loss": 0.2837, + "epoch": 0.36, + "grad_norm": 0.4935835185317433, + "learning_rate": 1.4919896675927238e-05, + "loss": 0.3958, "step": 7754 }, { - "epoch": 0.45, - "grad_norm": 0.4835867800375805, - "learning_rate": 1.2230910904949747e-05, - "loss": 0.3979, + "epoch": 0.36, + "grad_norm": 0.21426978207574748, + "learning_rate": 1.4918601237448925e-05, + "loss": 0.0752, "step": 7755 }, { - "epoch": 0.45, - "grad_norm": 0.959949705436744, - "learning_rate": 1.2229096853190265e-05, - "loss": 0.3485, + "epoch": 0.36, + "grad_norm": 0.33190740819757114, + "learning_rate": 1.4917305690077517e-05, + "loss": 0.2542, "step": 7756 }, { - "epoch": 0.45, - "grad_norm": 0.3530160623767107, - "learning_rate": 1.2227282724237254e-05, - "loss": 0.2498, + "epoch": 0.36, + "grad_norm": 0.36897115364795396, + "learning_rate": 1.4916010033841702e-05, + "loss": 0.3372, "step": 7757 }, { - "epoch": 0.45, - "grad_norm": 0.5273841705816225, - "learning_rate": 1.2225468518153543e-05, - "loss": 0.2861, + "epoch": 0.36, + "grad_norm": 0.8169951342206778, + "learning_rate": 1.4914714268770162e-05, + "loss": 0.3886, "step": 7758 }, { - "epoch": 0.45, - "grad_norm": 0.9535026076828771, - "learning_rate": 1.2223654235001948e-05, - "loss": 0.2452, + "epoch": 0.36, + "grad_norm": 0.4114012127335106, + "learning_rate": 1.4913418394891586e-05, + "loss": 0.2869, "step": 7759 }, { - "epoch": 0.45, - "grad_norm": 0.36487906220355076, - "learning_rate": 1.2221839874845303e-05, - "loss": 0.2854, + "epoch": 0.36, + "grad_norm": 0.5771681194701475, + "learning_rate": 1.4912122412234665e-05, + "loss": 0.3654, "step": 7760 }, { - "epoch": 0.45, - "grad_norm": 1.1723647175488505, - "learning_rate": 1.2220025437746437e-05, - "loss": 0.8119, + "epoch": 0.36, + "grad_norm": 0.24522022364610135, + "learning_rate": 1.4910826320828085e-05, + "loss": 0.189, "step": 7761 }, { - "epoch": 0.45, - "grad_norm": 0.395196663415754, - "learning_rate": 1.221821092376819e-05, - "loss": 0.2566, + "epoch": 0.36, + "grad_norm": 0.6115058390436778, + "learning_rate": 1.4909530120700542e-05, + "loss": 0.34, "step": 7762 }, { - "epoch": 0.45, - "grad_norm": 0.43346883361783417, - "learning_rate": 1.2216396332973391e-05, - "loss": 0.3076, + "epoch": 0.36, + "grad_norm": 0.4266171602863389, + "learning_rate": 1.4908233811880737e-05, + "loss": 0.3283, "step": 7763 }, { - "epoch": 0.45, - "grad_norm": 0.8082014848351015, - "learning_rate": 1.2214581665424883e-05, - "loss": 0.442, + "epoch": 0.36, + "grad_norm": 0.4012109715726487, + "learning_rate": 1.4906937394397362e-05, + "loss": 0.3491, "step": 7764 }, { - "epoch": 0.45, - "grad_norm": 0.2674446403474515, - "learning_rate": 1.2212766921185506e-05, - "loss": 0.2374, + "epoch": 0.36, + "grad_norm": 0.4084112474140164, + "learning_rate": 1.4905640868279128e-05, + "loss": 0.1656, "step": 7765 }, { - "epoch": 0.45, - "grad_norm": 0.28619470020545024, - "learning_rate": 1.2210952100318107e-05, - "loss": 0.1977, + "epoch": 0.36, + "grad_norm": 0.4775386224395846, + "learning_rate": 1.490434423355473e-05, + "loss": 0.3445, "step": 7766 }, { - "epoch": 0.45, - "grad_norm": 0.5161481861718609, - "learning_rate": 1.220913720288553e-05, - "loss": 0.4137, + "epoch": 0.36, + "grad_norm": 0.43953484370791385, + "learning_rate": 1.490304749025288e-05, + "loss": 0.2808, "step": 7767 }, { - "epoch": 0.45, - "grad_norm": 1.4577512973298372, - "learning_rate": 1.2207322228950628e-05, - "loss": 0.7202, + "epoch": 0.36, + "grad_norm": 0.42510950162149563, + "learning_rate": 1.490175063840228e-05, + "loss": 0.2435, "step": 7768 }, { - "epoch": 0.45, - "grad_norm": 0.3080743765473305, - "learning_rate": 1.2205507178576252e-05, - "loss": 0.1984, + "epoch": 0.36, + "grad_norm": 0.3742307647408235, + "learning_rate": 1.4900453678031648e-05, + "loss": 0.3288, "step": 7769 }, { - "epoch": 0.45, - "grad_norm": 0.439692529348634, - "learning_rate": 1.220369205182526e-05, - "loss": 0.3539, + "epoch": 0.36, + "grad_norm": 0.4733216652930082, + "learning_rate": 1.4899156609169693e-05, + "loss": 0.2591, "step": 7770 }, { - "epoch": 0.45, - "grad_norm": 0.2810154705395384, - "learning_rate": 1.2201876848760505e-05, - "loss": 0.2121, + "epoch": 0.36, + "grad_norm": 0.3275064916987912, + "learning_rate": 1.4897859431845135e-05, + "loss": 0.235, "step": 7771 }, { - "epoch": 0.45, - "grad_norm": 0.34147306795497445, - "learning_rate": 1.2200061569444848e-05, - "loss": 0.2114, + "epoch": 0.36, + "grad_norm": 0.32275580791758907, + "learning_rate": 1.4896562146086688e-05, + "loss": 0.2729, "step": 7772 }, { - "epoch": 0.45, - "grad_norm": 1.1078075037797748, - "learning_rate": 1.2198246213941156e-05, - "loss": 0.6738, + "epoch": 0.36, + "grad_norm": 1.0341525979507533, + "learning_rate": 1.4895264751923075e-05, + "loss": 0.5519, "step": 7773 }, { - "epoch": 0.45, - "grad_norm": 0.43075098792032634, - "learning_rate": 1.2196430782312292e-05, - "loss": 0.3043, + "epoch": 0.36, + "grad_norm": 0.39680009365542435, + "learning_rate": 1.4893967249383017e-05, + "loss": 0.2581, "step": 7774 }, { - "epoch": 0.45, - "grad_norm": 0.3794600600488902, - "learning_rate": 1.2194615274621122e-05, - "loss": 0.2685, + "epoch": 0.36, + "grad_norm": 0.545285040204286, + "learning_rate": 1.4892669638495246e-05, + "loss": 0.3783, "step": 7775 }, { - "epoch": 0.45, - "grad_norm": 0.5827109851181835, - "learning_rate": 1.2192799690930521e-05, - "loss": 0.3391, + "epoch": 0.36, + "grad_norm": 0.2774160551576022, + "learning_rate": 1.4891371919288478e-05, + "loss": 0.2349, "step": 7776 }, { - "epoch": 0.45, - "grad_norm": 0.35902548591666494, - "learning_rate": 1.2190984031303361e-05, - "loss": 0.2472, + "epoch": 0.36, + "grad_norm": 0.6715648090914152, + "learning_rate": 1.4890074091791453e-05, + "loss": 0.4031, "step": 7777 }, { - "epoch": 0.45, - "grad_norm": 0.3640745890902366, - "learning_rate": 1.2189168295802519e-05, - "loss": 0.2622, + "epoch": 0.36, + "grad_norm": 0.38754611699503205, + "learning_rate": 1.4888776156032905e-05, + "loss": 0.2495, "step": 7778 }, { - "epoch": 0.45, - "grad_norm": 0.34088465488055364, - "learning_rate": 1.218735248449087e-05, - "loss": 0.1616, + "epoch": 0.36, + "grad_norm": 0.5739918941718014, + "learning_rate": 1.488747811204156e-05, + "loss": 0.4566, "step": 7779 }, { - "epoch": 0.45, - "grad_norm": 0.7991062158846808, - "learning_rate": 1.21855365974313e-05, - "loss": 0.4441, + "epoch": 0.36, + "grad_norm": 0.38215697134655247, + "learning_rate": 1.4886179959846161e-05, + "loss": 0.3076, "step": 7780 }, { - "epoch": 0.45, - "grad_norm": 0.3664535149552145, - "learning_rate": 1.2183720634686693e-05, - "loss": 0.2753, + "epoch": 0.36, + "grad_norm": 0.36295234732575, + "learning_rate": 1.4884881699475444e-05, + "loss": 0.217, "step": 7781 }, { - "epoch": 0.45, - "grad_norm": 0.37060343525461587, - "learning_rate": 1.2181904596319933e-05, - "loss": 0.2746, + "epoch": 0.36, + "grad_norm": 0.32531548143010985, + "learning_rate": 1.488358333095816e-05, + "loss": 0.2034, "step": 7782 }, { - "epoch": 0.45, - "grad_norm": 0.26177238030517913, - "learning_rate": 1.2180088482393911e-05, - "loss": 0.1789, + "epoch": 0.36, + "grad_norm": 1.4258753296914577, + "learning_rate": 1.4882284854323046e-05, + "loss": 0.7234, "step": 7783 }, { - "epoch": 0.45, - "grad_norm": 0.3335241264892765, - "learning_rate": 1.2178272292971519e-05, - "loss": 0.2824, + "epoch": 0.36, + "grad_norm": 0.25709501174008975, + "learning_rate": 1.488098626959885e-05, + "loss": 0.2203, "step": 7784 }, { - "epoch": 0.45, - "grad_norm": 0.7427978808671635, - "learning_rate": 1.217645602811565e-05, - "loss": 0.484, + "epoch": 0.36, + "grad_norm": 0.671774870135189, + "learning_rate": 1.4879687576814321e-05, + "loss": 0.4725, "step": 7785 }, { - "epoch": 0.45, - "grad_norm": 0.4443305120052916, - "learning_rate": 1.2174639687889202e-05, - "loss": 0.3144, + "epoch": 0.36, + "grad_norm": 0.7245898679451107, + "learning_rate": 1.4878388775998213e-05, + "loss": 0.4229, "step": 7786 }, { - "epoch": 0.45, - "grad_norm": 0.39557505313196073, - "learning_rate": 1.2172823272355077e-05, - "loss": 0.2631, + "epoch": 0.36, + "grad_norm": 0.27899485682105696, + "learning_rate": 1.4877089867179279e-05, + "loss": 0.1771, "step": 7787 }, { - "epoch": 0.45, - "grad_norm": 0.6536016348918546, - "learning_rate": 1.2171006781576173e-05, - "loss": 0.3575, + "epoch": 0.36, + "grad_norm": 0.4215972194127377, + "learning_rate": 1.4875790850386278e-05, + "loss": 0.3543, "step": 7788 }, { - "epoch": 0.45, - "grad_norm": 0.2895298768999007, - "learning_rate": 1.2169190215615401e-05, - "loss": 0.1717, + "epoch": 0.36, + "grad_norm": 0.45163028722179144, + "learning_rate": 1.4874491725647966e-05, + "loss": 0.3017, "step": 7789 }, { - "epoch": 0.45, - "grad_norm": 0.3057102143177872, - "learning_rate": 1.2167373574535661e-05, - "loss": 0.258, + "epoch": 0.36, + "grad_norm": 0.40182115791885586, + "learning_rate": 1.4873192492993108e-05, + "loss": 0.3093, "step": 7790 }, { - "epoch": 0.45, - "grad_norm": 0.5624184239293547, - "learning_rate": 1.2165556858399874e-05, - "loss": 0.443, + "epoch": 0.36, + "grad_norm": 1.0182945471476186, + "learning_rate": 1.487189315245046e-05, + "loss": 0.3651, "step": 7791 }, { - "epoch": 0.45, - "grad_norm": 0.4411597730654119, - "learning_rate": 1.2163740067270941e-05, - "loss": 0.2398, + "epoch": 0.36, + "grad_norm": 0.3433225963607869, + "learning_rate": 1.4870593704048797e-05, + "loss": 0.3, "step": 7792 }, { - "epoch": 0.45, - "grad_norm": 0.33775600014775914, - "learning_rate": 1.2161923201211783e-05, - "loss": 0.273, + "epoch": 0.36, + "grad_norm": 0.3855147290128399, + "learning_rate": 1.4869294147816882e-05, + "loss": 0.3057, "step": 7793 }, { - "epoch": 0.45, - "grad_norm": 0.354861755129926, - "learning_rate": 1.2160106260285316e-05, - "loss": 0.293, + "epoch": 0.36, + "grad_norm": 0.3476959585570811, + "learning_rate": 1.4867994483783485e-05, + "loss": 0.194, "step": 7794 }, { - "epoch": 0.45, - "grad_norm": 0.7224381640648487, - "learning_rate": 1.2158289244554467e-05, - "loss": 0.3158, + "epoch": 0.36, + "grad_norm": 0.3291448556784333, + "learning_rate": 1.4866694711977387e-05, + "loss": 0.2068, "step": 7795 }, { - "epoch": 0.45, - "grad_norm": 0.34561253546241905, - "learning_rate": 1.2156472154082149e-05, - "loss": 0.255, + "epoch": 0.36, + "grad_norm": 0.4661788272851446, + "learning_rate": 1.4865394832427359e-05, + "loss": 0.3505, "step": 7796 }, { - "epoch": 0.45, - "grad_norm": 1.1561107168749716, - "learning_rate": 1.2154654988931296e-05, - "loss": 0.7711, + "epoch": 0.36, + "grad_norm": 0.49707687985821103, + "learning_rate": 1.4864094845162176e-05, + "loss": 0.3077, "step": 7797 }, { - "epoch": 0.45, - "grad_norm": 0.37902420717857044, - "learning_rate": 1.2152837749164834e-05, - "loss": 0.2881, + "epoch": 0.36, + "grad_norm": 1.0456008003947193, + "learning_rate": 1.4862794750210618e-05, + "loss": 0.4252, "step": 7798 }, { - "epoch": 0.45, - "grad_norm": 0.3027625498266799, - "learning_rate": 1.215102043484569e-05, - "loss": 0.224, + "epoch": 0.36, + "grad_norm": 0.39351834082360715, + "learning_rate": 1.4861494547601476e-05, + "loss": 0.3077, "step": 7799 }, { - "epoch": 0.45, - "grad_norm": 0.4722136725028722, - "learning_rate": 1.2149203046036803e-05, - "loss": 0.2767, + "epoch": 0.36, + "grad_norm": 0.3359619855240162, + "learning_rate": 1.4860194237363529e-05, + "loss": 0.2657, "step": 7800 }, { - "epoch": 0.45, - "grad_norm": 0.47601793386394814, - "learning_rate": 1.2147385582801106e-05, - "loss": 0.3417, + "epoch": 0.36, + "grad_norm": 0.36573436025086803, + "learning_rate": 1.4858893819525566e-05, + "loss": 0.1942, "step": 7801 }, { - "epoch": 0.45, - "grad_norm": 0.30416584956647924, - "learning_rate": 1.214556804520154e-05, - "loss": 0.2099, + "epoch": 0.36, + "grad_norm": 0.3332011760622113, + "learning_rate": 1.4857593294116374e-05, + "loss": 0.2754, "step": 7802 }, { - "epoch": 0.45, - "grad_norm": 0.5322689707569284, - "learning_rate": 1.2143750433301043e-05, - "loss": 0.3731, + "epoch": 0.36, + "grad_norm": 0.6039928726613857, + "learning_rate": 1.4856292661164752e-05, + "loss": 0.4356, "step": 7803 }, { - "epoch": 0.45, - "grad_norm": 0.48119429572557804, - "learning_rate": 1.2141932747162564e-05, - "loss": 0.3049, + "epoch": 0.36, + "grad_norm": 0.3630117179081526, + "learning_rate": 1.4854991920699489e-05, + "loss": 0.2599, "step": 7804 }, { - "epoch": 0.45, - "grad_norm": 0.2579226364490064, - "learning_rate": 1.2140114986849043e-05, - "loss": 0.138, + "epoch": 0.36, + "grad_norm": 0.359519962779008, + "learning_rate": 1.4853691072749385e-05, + "loss": 0.2881, "step": 7805 }, { - "epoch": 0.45, - "grad_norm": 0.32117319040447845, - "learning_rate": 1.2138297152423432e-05, - "loss": 0.2956, + "epoch": 0.36, + "grad_norm": 1.3777172877747461, + "learning_rate": 1.4852390117343241e-05, + "loss": 0.7615, "step": 7806 }, { - "epoch": 0.45, - "grad_norm": 0.9370941699181503, - "learning_rate": 1.2136479243948683e-05, - "loss": 0.6731, + "epoch": 0.36, + "grad_norm": 0.5887963994130917, + "learning_rate": 1.4851089054509852e-05, + "loss": 0.3318, "step": 7807 }, { - "epoch": 0.45, - "grad_norm": 0.3129022751134737, - "learning_rate": 1.2134661261487752e-05, - "loss": 0.1771, + "epoch": 0.36, + "grad_norm": 0.3148495586312449, + "learning_rate": 1.484978788427803e-05, + "loss": 0.2984, "step": 7808 }, { - "epoch": 0.45, - "grad_norm": 0.3969024582867962, - "learning_rate": 1.2132843205103591e-05, - "loss": 0.322, + "epoch": 0.36, + "grad_norm": 0.4805865433174144, + "learning_rate": 1.484848660667658e-05, + "loss": 0.2931, "step": 7809 }, { - "epoch": 0.45, - "grad_norm": 0.4938649637259429, - "learning_rate": 1.2131025074859164e-05, - "loss": 0.3403, + "epoch": 0.36, + "grad_norm": 0.3478883102181937, + "learning_rate": 1.4847185221734306e-05, + "loss": 0.1368, "step": 7810 }, { - "epoch": 0.45, - "grad_norm": 0.21944148398620011, - "learning_rate": 1.2129206870817428e-05, - "loss": 0.1346, + "epoch": 0.36, + "grad_norm": 0.4151252874908043, + "learning_rate": 1.4845883729480024e-05, + "loss": 0.3202, "step": 7811 }, { - "epoch": 0.45, - "grad_norm": 0.3246560530948743, - "learning_rate": 1.2127388593041348e-05, - "loss": 0.24, + "epoch": 0.36, + "grad_norm": 0.43112096262158095, + "learning_rate": 1.4844582129942546e-05, + "loss": 0.3458, "step": 7812 }, { - "epoch": 0.45, - "grad_norm": 0.48767280290232373, - "learning_rate": 1.2125570241593894e-05, - "loss": 0.3602, + "epoch": 0.36, + "grad_norm": 0.6330890758935299, + "learning_rate": 1.4843280423150692e-05, + "loss": 0.3452, "step": 7813 }, { - "epoch": 0.45, - "grad_norm": 0.33893538071778284, - "learning_rate": 1.2123751816538037e-05, - "loss": 0.2849, + "epoch": 0.36, + "grad_norm": 0.38028321962458783, + "learning_rate": 1.4841978609133274e-05, + "loss": 0.2931, "step": 7814 }, { - "epoch": 0.45, - "grad_norm": 0.49358569403480806, - "learning_rate": 1.2121933317936741e-05, - "loss": 0.3024, + "epoch": 0.36, + "grad_norm": 0.32305049548798515, + "learning_rate": 1.4840676687919117e-05, + "loss": 0.1921, "step": 7815 }, { - "epoch": 0.45, - "grad_norm": 0.6051859788689352, - "learning_rate": 1.2120114745852989e-05, - "loss": 0.4997, + "epoch": 0.36, + "grad_norm": 0.35247332815829113, + "learning_rate": 1.4839374659537047e-05, + "loss": 0.325, "step": 7816 }, { - "epoch": 0.45, - "grad_norm": 0.2764055452780025, - "learning_rate": 1.2118296100349753e-05, - "loss": 0.2308, + "epoch": 0.36, + "grad_norm": 0.36784026367040007, + "learning_rate": 1.483807252401588e-05, + "loss": 0.2262, "step": 7817 }, { - "epoch": 0.45, - "grad_norm": 0.25970750334520953, - "learning_rate": 1.2116477381490013e-05, - "loss": 0.1922, + "epoch": 0.36, + "grad_norm": 1.0739606124948378, + "learning_rate": 1.4836770281384456e-05, + "loss": 0.7776, "step": 7818 }, { - "epoch": 0.45, - "grad_norm": 0.8639977301196575, - "learning_rate": 1.2114658589336754e-05, - "loss": 0.5905, + "epoch": 0.36, + "grad_norm": 0.7955495594178713, + "learning_rate": 1.4835467931671597e-05, + "loss": 0.4915, "step": 7819 }, { - "epoch": 0.45, - "grad_norm": 0.4017731721362114, - "learning_rate": 1.2112839723952958e-05, - "loss": 0.2922, + "epoch": 0.36, + "grad_norm": 0.26523101678262884, + "learning_rate": 1.4834165474906139e-05, + "loss": 0.2348, "step": 7820 }, { - "epoch": 0.45, - "grad_norm": 0.3604215580984441, - "learning_rate": 1.2111020785401615e-05, - "loss": 0.2731, + "epoch": 0.36, + "grad_norm": 0.2875616605412137, + "learning_rate": 1.4832862911116917e-05, + "loss": 0.204, "step": 7821 }, { - "epoch": 0.45, - "grad_norm": 0.6402061764469947, - "learning_rate": 1.210920177374571e-05, - "loss": 0.3718, + "epoch": 0.36, + "grad_norm": 1.8374595968464622, + "learning_rate": 1.4831560240332769e-05, + "loss": 0.6774, "step": 7822 }, { - "epoch": 0.45, - "grad_norm": 0.6287622775741888, - "learning_rate": 1.210738268904824e-05, - "loss": 0.3071, + "epoch": 0.36, + "grad_norm": 0.3591524367459379, + "learning_rate": 1.4830257462582533e-05, + "loss": 0.2252, "step": 7823 }, { - "epoch": 0.45, - "grad_norm": 0.23030235373629632, - "learning_rate": 1.21055635313722e-05, - "loss": 0.1565, + "epoch": 0.36, + "grad_norm": 0.4348323718918711, + "learning_rate": 1.4828954577895051e-05, + "loss": 0.3103, "step": 7824 }, { - "epoch": 0.45, - "grad_norm": 0.36849606576333455, - "learning_rate": 1.2103744300780586e-05, - "loss": 0.3311, + "epoch": 0.36, + "grad_norm": 0.9635652804767664, + "learning_rate": 1.4827651586299172e-05, + "loss": 0.4392, "step": 7825 }, { - "epoch": 0.45, - "grad_norm": 0.5539057972494684, - "learning_rate": 1.2101924997336393e-05, - "loss": 0.3156, + "epoch": 0.36, + "grad_norm": 0.3344395841305085, + "learning_rate": 1.4826348487823737e-05, + "loss": 0.205, "step": 7826 }, { - "epoch": 0.45, - "grad_norm": 0.39605459800732945, - "learning_rate": 1.2100105621102631e-05, - "loss": 0.3243, + "epoch": 0.36, + "grad_norm": 0.3311737333956951, + "learning_rate": 1.4825045282497598e-05, + "loss": 0.1857, "step": 7827 }, { - "epoch": 0.45, - "grad_norm": 0.8298682413471936, - "learning_rate": 1.20982861721423e-05, - "loss": 0.462, + "epoch": 0.36, + "grad_norm": 0.4317332179036389, + "learning_rate": 1.4823741970349608e-05, + "loss": 0.33, "step": 7828 }, { - "epoch": 0.45, - "grad_norm": 0.38887716172679526, - "learning_rate": 1.209646665051841e-05, - "loss": 0.2785, + "epoch": 0.36, + "grad_norm": 0.3901225200296523, + "learning_rate": 1.482243855140862e-05, + "loss": 0.2666, "step": 7829 }, { - "epoch": 0.45, - "grad_norm": 0.22922781355910388, - "learning_rate": 1.2094647056293969e-05, - "loss": 0.2256, + "epoch": 0.36, + "grad_norm": 1.3315418016923604, + "learning_rate": 1.4821135025703491e-05, + "loss": 0.3777, "step": 7830 }, { - "epoch": 0.45, - "grad_norm": 0.6853732992858654, - "learning_rate": 1.2092827389531992e-05, - "loss": 0.3065, + "epoch": 0.36, + "grad_norm": 0.4125330967983228, + "learning_rate": 1.481983139326308e-05, + "loss": 0.3349, "step": 7831 }, { - "epoch": 0.45, - "grad_norm": 0.5288262475482187, - "learning_rate": 1.209100765029549e-05, - "loss": 0.3367, + "epoch": 0.36, + "grad_norm": 0.43664740143919506, + "learning_rate": 1.4818527654116244e-05, + "loss": 0.2689, "step": 7832 }, { - "epoch": 0.45, - "grad_norm": 0.3683993478374688, - "learning_rate": 1.2089187838647484e-05, - "loss": 0.2997, + "epoch": 0.36, + "grad_norm": 0.3208479633938048, + "learning_rate": 1.4817223808291851e-05, + "loss": 0.171, "step": 7833 }, { - "epoch": 0.45, - "grad_norm": 0.46618559440736795, - "learning_rate": 1.2087367954650992e-05, - "loss": 0.3075, + "epoch": 0.36, + "grad_norm": 0.6909219444727045, + "learning_rate": 1.4815919855818766e-05, + "loss": 0.2952, "step": 7834 }, { - "epoch": 0.45, - "grad_norm": 0.3954069825574937, - "learning_rate": 1.208554799836904e-05, - "loss": 0.2891, + "epoch": 0.36, + "grad_norm": 0.5140361748698637, + "learning_rate": 1.4814615796725858e-05, + "loss": 0.2851, "step": 7835 }, { - "epoch": 0.45, - "grad_norm": 0.2476792511905212, - "learning_rate": 1.2083727969864652e-05, - "loss": 0.2011, + "epoch": 0.36, + "grad_norm": 0.6842546017757796, + "learning_rate": 1.4813311631041996e-05, + "loss": 0.2913, "step": 7836 }, { - "epoch": 0.45, - "grad_norm": 0.4296079969449888, - "learning_rate": 1.208190786920085e-05, - "loss": 0.2942, + "epoch": 0.36, + "grad_norm": 1.4476760441102245, + "learning_rate": 1.4812007358796054e-05, + "loss": 0.4931, "step": 7837 }, { - "epoch": 0.45, - "grad_norm": 0.6835585045500028, - "learning_rate": 1.208008769644067e-05, - "loss": 0.3775, + "epoch": 0.36, + "grad_norm": 0.4069654960558933, + "learning_rate": 1.4810702980016909e-05, + "loss": 0.278, "step": 7838 }, { - "epoch": 0.45, - "grad_norm": 0.429431380987189, - "learning_rate": 1.2078267451647141e-05, - "loss": 0.319, + "epoch": 0.36, + "grad_norm": 0.36574871739631487, + "learning_rate": 1.480939849473343e-05, + "loss": 0.2569, "step": 7839 }, { - "epoch": 0.45, - "grad_norm": 0.9934331425210245, - "learning_rate": 1.20764471348833e-05, - "loss": 0.6671, + "epoch": 0.36, + "grad_norm": 0.33657563163694953, + "learning_rate": 1.4808093902974512e-05, + "loss": 0.1968, "step": 7840 }, { - "epoch": 0.45, - "grad_norm": 0.35648383354079055, - "learning_rate": 1.2074626746212183e-05, - "loss": 0.2447, + "epoch": 0.36, + "grad_norm": 0.4742847426973587, + "learning_rate": 1.4806789204769023e-05, + "loss": 0.2926, "step": 7841 }, { - "epoch": 0.45, - "grad_norm": 0.2711537520251157, - "learning_rate": 1.2072806285696836e-05, - "loss": 0.2366, + "epoch": 0.36, + "grad_norm": 1.3767674557240077, + "learning_rate": 1.4805484400145856e-05, + "loss": 0.4907, "step": 7842 }, { - "epoch": 0.45, - "grad_norm": 0.5954785393208696, - "learning_rate": 1.2070985753400293e-05, - "loss": 0.3112, + "epoch": 0.36, + "grad_norm": 0.56125278220848, + "learning_rate": 1.4804179489133896e-05, + "loss": 0.3001, "step": 7843 }, { - "epoch": 0.45, - "grad_norm": 0.506182932788288, - "learning_rate": 1.2069165149385603e-05, - "loss": 0.1937, + "epoch": 0.36, + "grad_norm": 0.3708613289363053, + "learning_rate": 1.4802874471762034e-05, + "loss": 0.297, "step": 7844 }, { - "epoch": 0.45, - "grad_norm": 0.3082872471946073, - "learning_rate": 1.2067344473715813e-05, - "loss": 0.2891, + "epoch": 0.36, + "grad_norm": 1.3387905667086102, + "learning_rate": 1.4801569348059158e-05, + "loss": 0.7449, "step": 7845 }, { - "epoch": 0.45, - "grad_norm": 1.3423903304220317, - "learning_rate": 1.2065523726453971e-05, - "loss": 0.7992, + "epoch": 0.36, + "grad_norm": 0.24962037965703626, + "learning_rate": 1.4800264118054164e-05, + "loss": 0.1377, "step": 7846 }, { - "epoch": 0.45, - "grad_norm": 0.4917154874110692, - "learning_rate": 1.206370290766313e-05, - "loss": 0.1749, + "epoch": 0.36, + "grad_norm": 0.44834685204531427, + "learning_rate": 1.4798958781775949e-05, + "loss": 0.2869, "step": 7847 }, { - "epoch": 0.45, - "grad_norm": 0.241763552867306, - "learning_rate": 1.206188201740635e-05, - "loss": 0.2102, + "epoch": 0.36, + "grad_norm": 0.39958852727916466, + "learning_rate": 1.4797653339253418e-05, + "loss": 0.3118, "step": 7848 }, { - "epoch": 0.45, - "grad_norm": 0.3783400126203032, - "learning_rate": 1.206006105574668e-05, - "loss": 0.3344, + "epoch": 0.36, + "grad_norm": 0.9075717803086096, + "learning_rate": 1.4796347790515458e-05, + "loss": 0.4521, "step": 7849 }, { - "epoch": 0.45, - "grad_norm": 0.73194324677599, - "learning_rate": 1.205824002274719e-05, - "loss": 0.4001, + "epoch": 0.36, + "grad_norm": 0.4846194757919043, + "learning_rate": 1.4795042135590984e-05, + "loss": 0.2819, "step": 7850 }, { - "epoch": 0.45, - "grad_norm": 0.31024938969861154, - "learning_rate": 1.2056418918470931e-05, - "loss": 0.2304, + "epoch": 0.36, + "grad_norm": 0.6310186984864378, + "learning_rate": 1.4793736374508898e-05, + "loss": 0.3485, "step": 7851 }, { - "epoch": 0.45, - "grad_norm": 0.9587907228244877, - "learning_rate": 1.2054597742980975e-05, - "loss": 0.6535, + "epoch": 0.36, + "grad_norm": 0.27679613071230025, + "learning_rate": 1.479243050729811e-05, + "loss": 0.195, "step": 7852 }, { - "epoch": 0.45, - "grad_norm": 0.353162787883959, - "learning_rate": 1.2052776496340389e-05, - "loss": 0.2805, + "epoch": 0.36, + "grad_norm": 0.4883150077277149, + "learning_rate": 1.4791124533987529e-05, + "loss": 0.2984, "step": 7853 }, { - "epoch": 0.45, - "grad_norm": 0.32028660740744386, - "learning_rate": 1.205095517861224e-05, - "loss": 0.2061, + "epoch": 0.36, + "grad_norm": 0.5010279218325235, + "learning_rate": 1.4789818454606069e-05, + "loss": 0.3807, "step": 7854 }, { - "epoch": 0.45, - "grad_norm": 0.43666200782343506, - "learning_rate": 1.2049133789859602e-05, - "loss": 0.3048, + "epoch": 0.36, + "grad_norm": 0.42157385868804226, + "learning_rate": 1.4788512269182644e-05, + "loss": 0.3327, "step": 7855 }, { - "epoch": 0.45, - "grad_norm": 0.364837631487842, - "learning_rate": 1.2047312330145549e-05, - "loss": 0.2357, + "epoch": 0.36, + "grad_norm": 0.40346775485871417, + "learning_rate": 1.478720597774617e-05, + "loss": 0.2016, "step": 7856 }, { - "epoch": 0.45, - "grad_norm": 0.2910287527496673, - "learning_rate": 1.204549079953316e-05, - "loss": 0.253, + "epoch": 0.36, + "grad_norm": 0.6244678470236887, + "learning_rate": 1.4785899580325575e-05, + "loss": 0.3865, "step": 7857 }, { - "epoch": 0.45, - "grad_norm": 0.8730968420317119, - "learning_rate": 1.2043669198085509e-05, - "loss": 0.6054, + "epoch": 0.36, + "grad_norm": 0.3220452116200983, + "learning_rate": 1.4784593076949772e-05, + "loss": 0.2133, "step": 7858 }, { - "epoch": 0.45, - "grad_norm": 1.4669922543739415, - "learning_rate": 1.2041847525865681e-05, - "loss": 0.7951, + "epoch": 0.36, + "grad_norm": 0.3150546508299406, + "learning_rate": 1.4783286467647693e-05, + "loss": 0.2465, "step": 7859 }, { - "epoch": 0.45, - "grad_norm": 0.3773913635551674, - "learning_rate": 1.2040025782936766e-05, - "loss": 0.2398, + "epoch": 0.36, + "grad_norm": 0.4247304489678315, + "learning_rate": 1.478197975244826e-05, + "loss": 0.3519, "step": 7860 }, { - "epoch": 0.45, - "grad_norm": 0.2937030728988133, - "learning_rate": 1.2038203969361841e-05, - "loss": 0.2758, + "epoch": 0.36, + "grad_norm": 1.3221130619484365, + "learning_rate": 1.4780672931380404e-05, + "loss": 0.8256, "step": 7861 }, { - "epoch": 0.45, - "grad_norm": 0.39829030321756786, - "learning_rate": 1.2036382085204004e-05, - "loss": 0.2639, + "epoch": 0.36, + "grad_norm": 0.35784978331638584, + "learning_rate": 1.4779366004473057e-05, + "loss": 0.2162, "step": 7862 }, { - "epoch": 0.45, - "grad_norm": 0.35718100232276506, - "learning_rate": 1.2034560130526341e-05, - "loss": 0.2594, + "epoch": 0.36, + "grad_norm": 1.3485660639077333, + "learning_rate": 1.4778058971755154e-05, + "loss": 0.7468, "step": 7863 }, { - "epoch": 0.45, - "grad_norm": 0.8819076750989475, - "learning_rate": 1.2032738105391945e-05, - "loss": 0.4058, + "epoch": 0.36, + "grad_norm": 0.41328308533914754, + "learning_rate": 1.477675183325563e-05, + "loss": 0.3547, "step": 7864 }, { - "epoch": 0.45, - "grad_norm": 0.37511149307403135, - "learning_rate": 1.2030916009863921e-05, - "loss": 0.3146, + "epoch": 0.36, + "grad_norm": 0.47410654010358383, + "learning_rate": 1.4775444589003423e-05, + "loss": 0.3158, "step": 7865 }, { - "epoch": 0.45, - "grad_norm": 0.3427182179012295, - "learning_rate": 1.2029093844005359e-05, - "loss": 0.2962, + "epoch": 0.36, + "grad_norm": 0.26471436289337624, + "learning_rate": 1.4774137239027478e-05, + "loss": 0.1481, "step": 7866 }, { - "epoch": 0.45, - "grad_norm": 0.6080154357699522, - "learning_rate": 1.2027271607879368e-05, - "loss": 0.362, + "epoch": 0.36, + "grad_norm": 0.4832334330896569, + "learning_rate": 1.4772829783356735e-05, + "loss": 0.37, "step": 7867 }, { - "epoch": 0.45, - "grad_norm": 0.24903988506822783, - "learning_rate": 1.2025449301549046e-05, - "loss": 0.1698, + "epoch": 0.36, + "grad_norm": 1.0606737178225087, + "learning_rate": 1.477152222202014e-05, + "loss": 0.4994, "step": 7868 }, { - "epoch": 0.45, - "grad_norm": 0.3080660901489215, - "learning_rate": 1.2023626925077503e-05, - "loss": 0.272, + "epoch": 0.36, + "grad_norm": 0.4027624659810944, + "learning_rate": 1.4770214555046641e-05, + "loss": 0.2399, "step": 7869 }, { - "epoch": 0.45, - "grad_norm": 1.1014977729702664, - "learning_rate": 1.2021804478527845e-05, - "loss": 0.401, + "epoch": 0.36, + "grad_norm": 0.6097939129384986, + "learning_rate": 1.4768906782465191e-05, + "loss": 0.441, "step": 7870 }, { - "epoch": 0.45, - "grad_norm": 0.6438849569428409, - "learning_rate": 1.2019981961963185e-05, - "loss": 0.4141, + "epoch": 0.36, + "grad_norm": 0.40794221594144514, + "learning_rate": 1.4767598904304738e-05, + "loss": 0.3193, "step": 7871 }, { - "epoch": 0.45, - "grad_norm": 0.3717529729312076, - "learning_rate": 1.2018159375446636e-05, - "loss": 0.2579, + "epoch": 0.36, + "grad_norm": 0.27930766864815143, + "learning_rate": 1.4766290920594246e-05, + "loss": 0.1902, "step": 7872 }, { - "epoch": 0.45, - "grad_norm": 0.3212381811770286, - "learning_rate": 1.2016336719041318e-05, - "loss": 0.2701, + "epoch": 0.36, + "grad_norm": 1.3486073932357248, + "learning_rate": 1.4764982831362662e-05, + "loss": 0.8363, "step": 7873 }, { - "epoch": 0.45, - "grad_norm": 0.38203566172353803, - "learning_rate": 1.2014513992810344e-05, - "loss": 0.2637, + "epoch": 0.36, + "grad_norm": 0.6144086859802763, + "learning_rate": 1.4763674636638953e-05, + "loss": 0.3423, "step": 7874 }, { - "epoch": 0.45, - "grad_norm": 0.3642964971611716, - "learning_rate": 1.2012691196816836e-05, - "loss": 0.275, + "epoch": 0.36, + "grad_norm": 0.2866945476137367, + "learning_rate": 1.4762366336452076e-05, + "loss": 0.2483, "step": 7875 }, { - "epoch": 0.45, - "grad_norm": 0.37088618583675204, - "learning_rate": 1.2010868331123922e-05, - "loss": 0.2343, + "epoch": 0.36, + "grad_norm": 0.6851916229472477, + "learning_rate": 1.4761057930831002e-05, + "loss": 0.4337, "step": 7876 }, { - "epoch": 0.45, - "grad_norm": 0.40984124709798897, - "learning_rate": 1.2009045395794723e-05, - "loss": 0.283, + "epoch": 0.36, + "grad_norm": 0.5971826068425716, + "learning_rate": 1.4759749419804695e-05, + "loss": 0.3384, "step": 7877 }, { - "epoch": 0.45, - "grad_norm": 0.31850183923540143, - "learning_rate": 1.2007222390892365e-05, - "loss": 0.2847, + "epoch": 0.36, + "grad_norm": 0.2899109982770521, + "learning_rate": 1.4758440803402121e-05, + "loss": 0.2257, "step": 7878 }, { - "epoch": 0.45, - "grad_norm": 0.6904602282111612, - "learning_rate": 1.2005399316479984e-05, - "loss": 0.4745, + "epoch": 0.36, + "grad_norm": 0.3749389145746594, + "learning_rate": 1.4757132081652252e-05, + "loss": 0.258, "step": 7879 }, { - "epoch": 0.45, - "grad_norm": 0.28231886869155365, - "learning_rate": 1.200357617262071e-05, - "loss": 0.1993, + "epoch": 0.36, + "grad_norm": 0.4596507876383496, + "learning_rate": 1.4755823254584067e-05, + "loss": 0.3191, "step": 7880 }, { - "epoch": 0.45, - "grad_norm": 0.2864075909628802, - "learning_rate": 1.2001752959377681e-05, - "loss": 0.2689, + "epoch": 0.36, + "grad_norm": 0.5316077432732018, + "learning_rate": 1.4754514322226536e-05, + "loss": 0.3551, "step": 7881 }, { - "epoch": 0.45, - "grad_norm": 0.5250714104982417, - "learning_rate": 1.1999929676814036e-05, - "loss": 0.2763, + "epoch": 0.36, + "grad_norm": 0.7087777964089552, + "learning_rate": 1.4753205284608642e-05, + "loss": 0.357, "step": 7882 }, { - "epoch": 0.45, - "grad_norm": 0.6084203947302514, - "learning_rate": 1.1998106324992906e-05, - "loss": 0.3492, + "epoch": 0.36, + "grad_norm": 0.32519538989043967, + "learning_rate": 1.4751896141759365e-05, + "loss": 0.2771, "step": 7883 }, { - "epoch": 0.45, - "grad_norm": 0.3261697352200898, - "learning_rate": 1.1996282903977442e-05, - "loss": 0.3021, + "epoch": 0.36, + "grad_norm": 0.6411091437119668, + "learning_rate": 1.4750586893707687e-05, + "loss": 0.3432, "step": 7884 }, { - "epoch": 0.45, - "grad_norm": 0.3600944615199071, - "learning_rate": 1.1994459413830785e-05, - "loss": 0.3117, + "epoch": 0.36, + "grad_norm": 0.2674743095274538, + "learning_rate": 1.4749277540482594e-05, + "loss": 0.1276, "step": 7885 }, { - "epoch": 0.45, - "grad_norm": 0.43975626688178493, - "learning_rate": 1.1992635854616088e-05, - "loss": 0.2537, + "epoch": 0.36, + "grad_norm": 0.9305203339120885, + "learning_rate": 1.474796808211307e-05, + "loss": 0.4692, "step": 7886 }, { - "epoch": 0.45, - "grad_norm": 0.3502728085639639, - "learning_rate": 1.1990812226396496e-05, - "loss": 0.3062, + "epoch": 0.36, + "grad_norm": 0.32261790119606776, + "learning_rate": 1.4746658518628113e-05, + "loss": 0.2825, "step": 7887 }, { - "epoch": 0.45, - "grad_norm": 0.43356883571119836, - "learning_rate": 1.1988988529235161e-05, - "loss": 0.305, + "epoch": 0.36, + "grad_norm": 0.492738703007906, + "learning_rate": 1.4745348850056708e-05, + "loss": 0.3242, "step": 7888 }, { - "epoch": 0.45, - "grad_norm": 0.29485545554777076, - "learning_rate": 1.198716476319524e-05, - "loss": 0.2916, + "epoch": 0.36, + "grad_norm": 0.8626728663106363, + "learning_rate": 1.4744039076427855e-05, + "loss": 0.3639, "step": 7889 }, { - "epoch": 0.45, - "grad_norm": 0.31064543092327446, - "learning_rate": 1.198534092833989e-05, - "loss": 0.2028, + "epoch": 0.36, + "grad_norm": 0.34171653536367097, + "learning_rate": 1.4742729197770551e-05, + "loss": 0.2413, "step": 7890 }, { - "epoch": 0.45, - "grad_norm": 0.8897180222566816, - "learning_rate": 1.1983517024732266e-05, - "loss": 0.59, + "epoch": 0.36, + "grad_norm": 0.3856710106346861, + "learning_rate": 1.4741419214113794e-05, + "loss": 0.336, "step": 7891 }, { - "epoch": 0.45, - "grad_norm": 0.3955907200246761, - "learning_rate": 1.1981693052435537e-05, - "loss": 0.3412, + "epoch": 0.36, + "grad_norm": 0.29951083161481323, + "learning_rate": 1.4740109125486582e-05, + "loss": 0.1222, "step": 7892 }, { - "epoch": 0.45, - "grad_norm": 0.29039852394862853, - "learning_rate": 1.1979869011512859e-05, - "loss": 0.2017, + "epoch": 0.36, + "grad_norm": 0.39610128264818445, + "learning_rate": 1.4738798931917924e-05, + "loss": 0.3514, "step": 7893 }, { - "epoch": 0.45, - "grad_norm": 0.39928201842865546, - "learning_rate": 1.1978044902027403e-05, - "loss": 0.3085, + "epoch": 0.36, + "grad_norm": 0.8783801048081712, + "learning_rate": 1.4737488633436825e-05, + "loss": 0.5725, "step": 7894 }, { - "epoch": 0.45, - "grad_norm": 0.4353633841389933, - "learning_rate": 1.1976220724042336e-05, - "loss": 0.2926, + "epoch": 0.36, + "grad_norm": 0.30549693646298526, + "learning_rate": 1.4736178230072295e-05, + "loss": 0.2359, "step": 7895 }, { - "epoch": 0.45, - "grad_norm": 0.32887704414837243, - "learning_rate": 1.1974396477620833e-05, - "loss": 0.1961, + "epoch": 0.36, + "grad_norm": 0.4282791303883486, + "learning_rate": 1.4734867721853341e-05, + "loss": 0.3233, "step": 7896 }, { - "epoch": 0.45, - "grad_norm": 0.34286785641908873, - "learning_rate": 1.1972572162826061e-05, - "loss": 0.314, + "epoch": 0.36, + "grad_norm": 1.3717924910965442, + "learning_rate": 1.4733557108808983e-05, + "loss": 0.837, "step": 7897 }, { - "epoch": 0.45, - "grad_norm": 1.3682061627663276, - "learning_rate": 1.1970747779721203e-05, - "loss": 0.7447, + "epoch": 0.36, + "grad_norm": 0.4023643378076422, + "learning_rate": 1.473224639096823e-05, + "loss": 0.2132, "step": 7898 }, { - "epoch": 0.45, - "grad_norm": 0.29070012715612625, - "learning_rate": 1.1968923328369433e-05, - "loss": 0.219, + "epoch": 0.36, + "grad_norm": 0.3448370395064776, + "learning_rate": 1.4730935568360103e-05, + "loss": 0.2977, "step": 7899 }, { - "epoch": 0.45, - "grad_norm": 0.4741431492632248, - "learning_rate": 1.1967098808833935e-05, - "loss": 0.3583, + "epoch": 0.36, + "grad_norm": 0.3585050401691324, + "learning_rate": 1.4729624641013622e-05, + "loss": 0.2672, "step": 7900 }, { - "epoch": 0.45, - "grad_norm": 0.298824920933953, - "learning_rate": 1.196527422117789e-05, - "loss": 0.2645, + "epoch": 0.36, + "grad_norm": 0.38230463246587654, + "learning_rate": 1.4728313608957812e-05, + "loss": 0.1898, "step": 7901 }, { - "epoch": 0.45, - "grad_norm": 0.24902107345337846, - "learning_rate": 1.1963449565464478e-05, - "loss": 0.2042, + "epoch": 0.36, + "grad_norm": 0.6300879809765629, + "learning_rate": 1.4727002472221695e-05, + "loss": 0.3924, "step": 7902 }, { - "epoch": 0.45, - "grad_norm": 1.0625977661145665, - "learning_rate": 1.1961624841756896e-05, - "loss": 0.3038, + "epoch": 0.36, + "grad_norm": 0.39544912362634554, + "learning_rate": 1.4725691230834295e-05, + "loss": 0.3392, "step": 7903 }, { - "epoch": 0.45, - "grad_norm": 0.42317283623319885, - "learning_rate": 1.1959800050118328e-05, - "loss": 0.3523, + "epoch": 0.36, + "grad_norm": 0.6209054387540943, + "learning_rate": 1.472437988482465e-05, + "loss": 0.4153, "step": 7904 }, { - "epoch": 0.45, - "grad_norm": 0.3517066275803351, - "learning_rate": 1.195797519061197e-05, - "loss": 0.2577, + "epoch": 0.36, + "grad_norm": 0.361204160532707, + "learning_rate": 1.4723068434221788e-05, + "loss": 0.2555, "step": 7905 }, { - "epoch": 0.45, - "grad_norm": 0.7735868151817158, - "learning_rate": 1.1956150263301014e-05, - "loss": 0.3493, + "epoch": 0.36, + "grad_norm": 0.3448970376454606, + "learning_rate": 1.4721756879054743e-05, + "loss": 0.1952, "step": 7906 }, { - "epoch": 0.45, - "grad_norm": 0.310163938160124, - "learning_rate": 1.1954325268248662e-05, - "loss": 0.1994, + "epoch": 0.36, + "grad_norm": 0.40748728629967584, + "learning_rate": 1.472044521935255e-05, + "loss": 0.3115, "step": 7907 }, { - "epoch": 0.45, - "grad_norm": 0.397983349336781, - "learning_rate": 1.1952500205518107e-05, - "loss": 0.241, + "epoch": 0.36, + "grad_norm": 0.3186705918019939, + "learning_rate": 1.4719133455144252e-05, + "loss": 0.1954, "step": 7908 }, { - "epoch": 0.45, - "grad_norm": 0.3896181687123652, - "learning_rate": 1.1950675075172557e-05, - "loss": 0.2484, + "epoch": 0.36, + "grad_norm": 0.8681171058775894, + "learning_rate": 1.4717821586458884e-05, + "loss": 0.6072, "step": 7909 }, { - "epoch": 0.45, - "grad_norm": 0.9368903933280383, - "learning_rate": 1.1948849877275209e-05, - "loss": 0.5781, + "epoch": 0.36, + "grad_norm": 0.6189510283099342, + "learning_rate": 1.4716509613325497e-05, + "loss": 0.3696, "step": 7910 }, { - "epoch": 0.45, - "grad_norm": 0.39187076583274866, - "learning_rate": 1.1947024611889276e-05, - "loss": 0.2909, + "epoch": 0.36, + "grad_norm": 0.2997072648652239, + "learning_rate": 1.471519753577313e-05, + "loss": 0.224, "step": 7911 }, { - "epoch": 0.45, - "grad_norm": 0.3288277529256562, - "learning_rate": 1.1945199279077962e-05, - "loss": 0.2785, + "epoch": 0.36, + "grad_norm": 0.30499374642371, + "learning_rate": 1.4713885353830835e-05, + "loss": 0.1816, "step": 7912 }, { - "epoch": 0.45, - "grad_norm": 0.5272823310712186, - "learning_rate": 1.1943373878904482e-05, - "loss": 0.2873, + "epoch": 0.36, + "grad_norm": 0.6811118302366835, + "learning_rate": 1.4712573067527665e-05, + "loss": 0.4057, "step": 7913 }, { - "epoch": 0.45, - "grad_norm": 0.29573508428328504, - "learning_rate": 1.194154841143205e-05, - "loss": 0.1921, + "epoch": 0.36, + "grad_norm": 0.39607287294458915, + "learning_rate": 1.4711260676892664e-05, + "loss": 0.2485, "step": 7914 }, { - "epoch": 0.45, - "grad_norm": 0.6374701531641034, - "learning_rate": 1.1939722876723878e-05, - "loss": 0.4224, + "epoch": 0.36, + "grad_norm": 0.363824877807546, + "learning_rate": 1.4709948181954894e-05, + "loss": 0.308, "step": 7915 }, { - "epoch": 0.45, - "grad_norm": 0.3905831304108459, - "learning_rate": 1.1937897274843184e-05, - "loss": 0.2766, + "epoch": 0.36, + "grad_norm": 0.6801429841160178, + "learning_rate": 1.4708635582743412e-05, + "loss": 0.396, "step": 7916 }, { - "epoch": 0.45, - "grad_norm": 0.41233010441397044, - "learning_rate": 1.1936071605853195e-05, - "loss": 0.286, + "epoch": 0.36, + "grad_norm": 0.3852123363715314, + "learning_rate": 1.4707322879287277e-05, + "loss": 0.2913, "step": 7917 }, { - "epoch": 0.45, - "grad_norm": 1.0236772511840988, - "learning_rate": 1.1934245869817127e-05, - "loss": 0.4651, + "epoch": 0.36, + "grad_norm": 0.18182427487734024, + "learning_rate": 1.4706010071615548e-05, + "loss": 0.0881, "step": 7918 }, { - "epoch": 0.45, - "grad_norm": 0.27799086007368734, - "learning_rate": 1.193242006679821e-05, - "loss": 0.1277, + "epoch": 0.36, + "grad_norm": 0.3677885832689667, + "learning_rate": 1.4704697159757294e-05, + "loss": 0.3203, "step": 7919 }, { - "epoch": 0.46, - "grad_norm": 0.2699895568941243, - "learning_rate": 1.1930594196859667e-05, - "loss": 0.2208, + "epoch": 0.36, + "grad_norm": 0.4127680168204437, + "learning_rate": 1.4703384143741578e-05, + "loss": 0.2991, "step": 7920 }, { - "epoch": 0.46, - "grad_norm": 0.4589470961563092, - "learning_rate": 1.1928768260064732e-05, - "loss": 0.3091, + "epoch": 0.36, + "grad_norm": 0.9476618834183624, + "learning_rate": 1.4702071023597469e-05, + "loss": 0.3927, "step": 7921 }, { - "epoch": 0.46, - "grad_norm": 1.4691715710811628, - "learning_rate": 1.1926942256476632e-05, - "loss": 0.3769, + "epoch": 0.36, + "grad_norm": 0.4260168722688486, + "learning_rate": 1.470075779935404e-05, + "loss": 0.3501, "step": 7922 }, { - "epoch": 0.46, - "grad_norm": 0.37149958982181935, - "learning_rate": 1.1925116186158605e-05, - "loss": 0.2761, + "epoch": 0.36, + "grad_norm": 0.3108313647986387, + "learning_rate": 1.4699444471040366e-05, + "loss": 0.2775, "step": 7923 }, { - "epoch": 0.46, - "grad_norm": 0.5020463855050367, - "learning_rate": 1.192329004917389e-05, - "loss": 0.3984, + "epoch": 0.36, + "grad_norm": 0.20386817269294166, + "learning_rate": 1.469813103868552e-05, + "loss": 0.1156, "step": 7924 }, { - "epoch": 0.46, - "grad_norm": 0.31379760183411154, - "learning_rate": 1.1921463845585722e-05, - "loss": 0.2095, + "epoch": 0.36, + "grad_norm": 1.2270832462906907, + "learning_rate": 1.469681750231858e-05, + "loss": 0.6753, "step": 7925 }, { - "epoch": 0.46, - "grad_norm": 0.304322085714914, - "learning_rate": 1.1919637575457343e-05, - "loss": 0.1815, + "epoch": 0.36, + "grad_norm": 0.3622557090887446, + "learning_rate": 1.4695503861968627e-05, + "loss": 0.2843, "step": 7926 }, { - "epoch": 0.46, - "grad_norm": 0.4280709827379945, - "learning_rate": 1.1917811238852e-05, - "loss": 0.3252, + "epoch": 0.36, + "grad_norm": 0.5879505062303672, + "learning_rate": 1.4694190117664747e-05, + "loss": 0.2963, "step": 7927 }, { - "epoch": 0.46, - "grad_norm": 0.5484844773978607, - "learning_rate": 1.1915984835832934e-05, - "loss": 0.3202, + "epoch": 0.36, + "grad_norm": 1.0449068552269294, + "learning_rate": 1.4692876269436021e-05, + "loss": 0.5348, "step": 7928 }, { - "epoch": 0.46, - "grad_norm": 0.49450017929156964, - "learning_rate": 1.1914158366463392e-05, - "loss": 0.2197, + "epoch": 0.36, + "grad_norm": 0.3716668457941565, + "learning_rate": 1.4691562317311533e-05, + "loss": 0.2616, "step": 7929 }, { - "epoch": 0.46, - "grad_norm": 0.6972758363104524, - "learning_rate": 1.1912331830806634e-05, - "loss": 0.4078, + "epoch": 0.36, + "grad_norm": 0.31224075015169955, + "learning_rate": 1.4690248261320383e-05, + "loss": 0.1878, "step": 7930 }, { - "epoch": 0.46, - "grad_norm": 1.163113774171106, - "learning_rate": 1.1910505228925903e-05, - "loss": 0.6485, + "epoch": 0.36, + "grad_norm": 0.3823552913583326, + "learning_rate": 1.4688934101491654e-05, + "loss": 0.2697, "step": 7931 }, { - "epoch": 0.46, - "grad_norm": 0.23145347746277942, - "learning_rate": 1.1908678560884462e-05, - "loss": 0.1715, + "epoch": 0.36, + "grad_norm": 0.38544328469743905, + "learning_rate": 1.4687619837854446e-05, + "loss": 0.2965, "step": 7932 }, { - "epoch": 0.46, - "grad_norm": 0.30815546200945876, - "learning_rate": 1.190685182674556e-05, - "loss": 0.2405, + "epoch": 0.36, + "grad_norm": 1.0319126120574171, + "learning_rate": 1.468630547043785e-05, + "loss": 0.456, "step": 7933 }, { - "epoch": 0.46, - "grad_norm": 0.6610420498319607, - "learning_rate": 1.1905025026572466e-05, - "loss": 0.4497, + "epoch": 0.36, + "grad_norm": 0.44713541274359253, + "learning_rate": 1.4684990999270967e-05, + "loss": 0.2449, "step": 7934 }, { - "epoch": 0.46, - "grad_norm": 0.3484729311489025, - "learning_rate": 1.1903198160428433e-05, - "loss": 0.2436, + "epoch": 0.36, + "grad_norm": 0.41193796182559833, + "learning_rate": 1.46836764243829e-05, + "loss": 0.3045, "step": 7935 }, { - "epoch": 0.46, - "grad_norm": 0.37479821138076064, - "learning_rate": 1.190137122837673e-05, - "loss": 0.3104, + "epoch": 0.36, + "grad_norm": 0.3508664970758144, + "learning_rate": 1.468236174580275e-05, + "loss": 0.2796, "step": 7936 }, { - "epoch": 0.46, - "grad_norm": 0.903404747222253, - "learning_rate": 1.1899544230480623e-05, - "loss": 0.5389, + "epoch": 0.36, + "grad_norm": 0.3092557933293933, + "learning_rate": 1.4681046963559627e-05, + "loss": 0.1842, "step": 7937 }, { - "epoch": 0.46, - "grad_norm": 0.3793020472891658, - "learning_rate": 1.1897717166803384e-05, - "loss": 0.2626, + "epoch": 0.36, + "grad_norm": 0.41481532539277405, + "learning_rate": 1.4679732077682634e-05, + "loss": 0.2982, "step": 7938 }, { - "epoch": 0.46, - "grad_norm": 0.21536537499616923, - "learning_rate": 1.189589003740828e-05, - "loss": 0.158, + "epoch": 0.36, + "grad_norm": 0.5574293810218082, + "learning_rate": 1.4678417088200883e-05, + "loss": 0.3214, "step": 7939 }, { - "epoch": 0.46, - "grad_norm": 0.37013074965928505, - "learning_rate": 1.1894062842358585e-05, - "loss": 0.3346, + "epoch": 0.36, + "grad_norm": 0.95554345530316, + "learning_rate": 1.4677101995143485e-05, + "loss": 0.4674, "step": 7940 }, { - "epoch": 0.46, - "grad_norm": 0.4442486156485244, - "learning_rate": 1.1892235581717575e-05, - "loss": 0.2893, + "epoch": 0.36, + "grad_norm": 0.33058097614808263, + "learning_rate": 1.467578679853956e-05, + "loss": 0.2423, "step": 7941 }, { - "epoch": 0.46, - "grad_norm": 0.5788706585727145, - "learning_rate": 1.1890408255548526e-05, - "loss": 0.2968, + "epoch": 0.36, + "grad_norm": 0.26044777945431324, + "learning_rate": 1.4674471498418222e-05, + "loss": 0.1978, "step": 7942 }, { - "epoch": 0.46, - "grad_norm": 1.205983665744992, - "learning_rate": 1.1888580863914724e-05, - "loss": 0.613, + "epoch": 0.36, + "grad_norm": 0.5269813827628365, + "learning_rate": 1.467315609480859e-05, + "loss": 0.3923, "step": 7943 }, { - "epoch": 0.46, - "grad_norm": 0.3300548857885787, - "learning_rate": 1.188675340687945e-05, - "loss": 0.2639, + "epoch": 0.36, + "grad_norm": 0.3098751073540968, + "learning_rate": 1.4671840587739782e-05, + "loss": 0.2437, "step": 7944 }, { - "epoch": 0.46, - "grad_norm": 0.23304407517691592, - "learning_rate": 1.188492588450598e-05, - "loss": 0.1743, + "epoch": 0.36, + "grad_norm": 0.7314225034006392, + "learning_rate": 1.4670524977240929e-05, + "loss": 0.5218, "step": 7945 }, { - "epoch": 0.46, - "grad_norm": 0.6386065237872193, - "learning_rate": 1.1883098296857614e-05, - "loss": 0.444, + "epoch": 0.37, + "grad_norm": 0.5604349577158864, + "learning_rate": 1.4669209263341156e-05, + "loss": 0.4049, "step": 7946 }, { - "epoch": 0.46, - "grad_norm": 0.5258883988322739, - "learning_rate": 1.188127064399763e-05, - "loss": 0.2976, + "epoch": 0.37, + "grad_norm": 0.34016908362061904, + "learning_rate": 1.4667893446069588e-05, + "loss": 0.2359, "step": 7947 }, { - "epoch": 0.46, - "grad_norm": 0.30515301220401475, - "learning_rate": 1.1879442925989327e-05, - "loss": 0.2492, + "epoch": 0.37, + "grad_norm": 1.2806633215675647, + "learning_rate": 1.4666577525455359e-05, + "loss": 0.7104, "step": 7948 }, { - "epoch": 0.46, - "grad_norm": 0.9557336485183242, - "learning_rate": 1.1877615142895995e-05, - "loss": 0.5884, + "epoch": 0.37, + "grad_norm": 0.3691276647491108, + "learning_rate": 1.4665261501527602e-05, + "loss": 0.2811, "step": 7949 }, { - "epoch": 0.46, - "grad_norm": 0.58178474444118, - "learning_rate": 1.1875787294780932e-05, - "loss": 0.324, + "epoch": 0.37, + "grad_norm": 0.3217065540986312, + "learning_rate": 1.4663945374315449e-05, + "loss": 0.2361, "step": 7950 }, { - "epoch": 0.46, - "grad_norm": 0.4263233009317428, - "learning_rate": 1.1873959381707437e-05, - "loss": 0.323, + "epoch": 0.37, + "grad_norm": 0.5234297630236737, + "learning_rate": 1.4662629143848045e-05, + "loss": 0.3545, "step": 7951 }, { - "epoch": 0.46, - "grad_norm": 0.2837105656508334, - "learning_rate": 1.1872131403738807e-05, - "loss": 0.2144, + "epoch": 0.37, + "grad_norm": 1.0610173942891301, + "learning_rate": 1.466131281015452e-05, + "loss": 0.6923, "step": 7952 }, { - "epoch": 0.46, - "grad_norm": 0.3091377428079506, - "learning_rate": 1.1870303360938349e-05, - "loss": 0.2136, + "epoch": 0.37, + "grad_norm": 0.4066451580341624, + "learning_rate": 1.4659996373264027e-05, + "loss": 0.1763, "step": 7953 }, { - "epoch": 0.46, - "grad_norm": 0.38310899231449197, - "learning_rate": 1.1868475253369362e-05, - "loss": 0.313, + "epoch": 0.37, + "grad_norm": 0.30718121880010313, + "learning_rate": 1.4658679833205705e-05, + "loss": 0.2561, "step": 7954 }, { - "epoch": 0.46, - "grad_norm": 0.9306600839745718, - "learning_rate": 1.1866647081095162e-05, - "loss": 0.2579, + "epoch": 0.37, + "grad_norm": 0.39582908930815763, + "learning_rate": 1.46573631900087e-05, + "loss": 0.3028, "step": 7955 }, { - "epoch": 0.46, - "grad_norm": 0.3050132731937429, - "learning_rate": 1.186481884417905e-05, - "loss": 0.2768, + "epoch": 0.37, + "grad_norm": 0.4308606047808635, + "learning_rate": 1.465604644370216e-05, + "loss": 0.2918, "step": 7956 }, { - "epoch": 0.46, - "grad_norm": 0.42166795211866515, - "learning_rate": 1.1862990542684345e-05, - "loss": 0.3322, + "epoch": 0.37, + "grad_norm": 0.4949526990915972, + "learning_rate": 1.4654729594315245e-05, + "loss": 0.3158, "step": 7957 }, { - "epoch": 0.46, - "grad_norm": 0.2862765123153871, - "learning_rate": 1.1861162176674354e-05, - "loss": 0.1944, + "epoch": 0.37, + "grad_norm": 0.4352898980238516, + "learning_rate": 1.4653412641877099e-05, + "loss": 0.3235, "step": 7958 }, { - "epoch": 0.46, - "grad_norm": 0.3983928765816261, - "learning_rate": 1.1859333746212403e-05, - "loss": 0.3066, + "epoch": 0.37, + "grad_norm": 0.4124904737182277, + "learning_rate": 1.4652095586416884e-05, + "loss": 0.3006, "step": 7959 }, { - "epoch": 0.46, - "grad_norm": 0.37464310126796313, - "learning_rate": 1.1857505251361801e-05, - "loss": 0.3424, + "epoch": 0.37, + "grad_norm": 0.4549120262659146, + "learning_rate": 1.465077842796376e-05, + "loss": 0.3107, "step": 7960 }, { - "epoch": 0.46, - "grad_norm": 0.4032873786677103, - "learning_rate": 1.1855676692185872e-05, - "loss": 0.2819, + "epoch": 0.37, + "grad_norm": 0.5637242397746781, + "learning_rate": 1.4649461166546877e-05, + "loss": 0.4341, "step": 7961 }, { - "epoch": 0.46, - "grad_norm": 0.4679357898041959, - "learning_rate": 1.1853848068747938e-05, - "loss": 0.3131, + "epoch": 0.37, + "grad_norm": 0.28111213130277996, + "learning_rate": 1.464814380219541e-05, + "loss": 0.1985, "step": 7962 }, { - "epoch": 0.46, - "grad_norm": 0.3412178021471056, - "learning_rate": 1.1852019381111326e-05, - "loss": 0.3276, + "epoch": 0.37, + "grad_norm": 0.3545821316310394, + "learning_rate": 1.4646826334938516e-05, + "loss": 0.2733, "step": 7963 }, { - "epoch": 0.46, - "grad_norm": 0.2875821893013411, - "learning_rate": 1.185019062933936e-05, - "loss": 0.2642, + "epoch": 0.37, + "grad_norm": 1.916469835146868, + "learning_rate": 1.4645508764805368e-05, + "loss": 0.867, "step": 7964 }, { - "epoch": 0.46, - "grad_norm": 0.4411680471167332, - "learning_rate": 1.1848361813495376e-05, - "loss": 0.2624, + "epoch": 0.37, + "grad_norm": 0.4289015987938433, + "learning_rate": 1.4644191091825132e-05, + "loss": 0.2952, "step": 7965 }, { - "epoch": 0.46, - "grad_norm": 0.2880036099235008, - "learning_rate": 1.18465329336427e-05, - "loss": 0.2542, + "epoch": 0.37, + "grad_norm": 0.43287004660534717, + "learning_rate": 1.4642873316026982e-05, + "loss": 0.2844, "step": 7966 }, { - "epoch": 0.46, - "grad_norm": 0.4021660522898735, - "learning_rate": 1.1844703989844666e-05, - "loss": 0.3471, + "epoch": 0.37, + "grad_norm": 0.45822651917734164, + "learning_rate": 1.4641555437440088e-05, + "loss": 0.3597, "step": 7967 }, { - "epoch": 0.46, - "grad_norm": 0.33634227392857685, - "learning_rate": 1.1842874982164616e-05, - "loss": 0.2271, + "epoch": 0.37, + "grad_norm": 0.35311949847482255, + "learning_rate": 1.4640237456093636e-05, + "loss": 0.2458, "step": 7968 }, { - "epoch": 0.46, - "grad_norm": 0.3303900188393209, - "learning_rate": 1.1841045910665881e-05, - "loss": 0.3177, + "epoch": 0.37, + "grad_norm": 0.48707537427700254, + "learning_rate": 1.4638919372016796e-05, + "loss": 0.3445, "step": 7969 }, { - "epoch": 0.46, - "grad_norm": 0.9671616145587876, - "learning_rate": 1.1839216775411808e-05, - "loss": 0.609, + "epoch": 0.37, + "grad_norm": 0.2933314698164553, + "learning_rate": 1.4637601185238753e-05, + "loss": 0.2222, "step": 7970 }, { - "epoch": 0.46, - "grad_norm": 0.35308979972001275, - "learning_rate": 1.1837387576465735e-05, - "loss": 0.2297, + "epoch": 0.37, + "grad_norm": 0.47918523378380506, + "learning_rate": 1.4636282895788689e-05, + "loss": 0.3265, "step": 7971 }, { - "epoch": 0.46, - "grad_norm": 0.24389530242129417, - "learning_rate": 1.183555831389101e-05, - "loss": 0.2464, + "epoch": 0.37, + "grad_norm": 0.5620082348171247, + "learning_rate": 1.4634964503695792e-05, + "loss": 0.4098, "step": 7972 }, { - "epoch": 0.46, - "grad_norm": 0.4290048907771581, - "learning_rate": 1.183372898775098e-05, - "loss": 0.2936, + "epoch": 0.37, + "grad_norm": 0.41554754790742304, + "learning_rate": 1.4633646008989245e-05, + "loss": 0.2601, "step": 7973 }, { - "epoch": 0.46, - "grad_norm": 0.4132601964727188, - "learning_rate": 1.1831899598108993e-05, - "loss": 0.2794, + "epoch": 0.37, + "grad_norm": 0.4391490946720585, + "learning_rate": 1.4632327411698244e-05, + "loss": 0.2964, "step": 7974 }, { - "epoch": 0.46, - "grad_norm": 0.4167507268336926, - "learning_rate": 1.1830070145028404e-05, - "loss": 0.3419, + "epoch": 0.37, + "grad_norm": 0.35979452627317526, + "learning_rate": 1.4631008711851977e-05, + "loss": 0.3149, "step": 7975 }, { - "epoch": 0.46, - "grad_norm": 0.4073307537683485, - "learning_rate": 1.1828240628572563e-05, - "loss": 0.3289, + "epoch": 0.37, + "grad_norm": 0.4462055948579366, + "learning_rate": 1.4629689909479641e-05, + "loss": 0.166, "step": 7976 }, { - "epoch": 0.46, - "grad_norm": 0.7557884514763259, - "learning_rate": 1.182641104880483e-05, - "loss": 0.4952, + "epoch": 0.37, + "grad_norm": 0.4650248868492633, + "learning_rate": 1.4628371004610434e-05, + "loss": 0.2555, "step": 7977 }, { - "epoch": 0.46, - "grad_norm": 0.26140284114058343, - "learning_rate": 1.1824581405788558e-05, - "loss": 0.1731, + "epoch": 0.37, + "grad_norm": 0.3188051069245708, + "learning_rate": 1.4627051997273553e-05, + "loss": 0.3218, "step": 7978 }, { - "epoch": 0.46, - "grad_norm": 0.39551359172857126, - "learning_rate": 1.1822751699587109e-05, - "loss": 0.2648, + "epoch": 0.37, + "grad_norm": 0.740762007040696, + "learning_rate": 1.46257328874982e-05, + "loss": 0.349, "step": 7979 }, { - "epoch": 0.46, - "grad_norm": 0.3736897265914453, - "learning_rate": 1.1820921930263851e-05, - "loss": 0.2989, + "epoch": 0.37, + "grad_norm": 0.4354668947724718, + "learning_rate": 1.4624413675313577e-05, + "loss": 0.3068, "step": 7980 }, { - "epoch": 0.46, - "grad_norm": 0.38382012833460544, - "learning_rate": 1.1819092097882141e-05, - "loss": 0.231, + "epoch": 0.37, + "grad_norm": 0.545635119385423, + "learning_rate": 1.4623094360748895e-05, + "loss": 0.4421, "step": 7981 }, { - "epoch": 0.46, - "grad_norm": 1.296301622420184, - "learning_rate": 1.1817262202505353e-05, - "loss": 0.4792, + "epoch": 0.37, + "grad_norm": 0.28604889240642384, + "learning_rate": 1.4621774943833358e-05, + "loss": 0.2373, "step": 7982 }, { - "epoch": 0.46, - "grad_norm": 0.5060071372814816, - "learning_rate": 1.1815432244196849e-05, - "loss": 0.3495, + "epoch": 0.37, + "grad_norm": 0.2551424749260242, + "learning_rate": 1.462045542459618e-05, + "loss": 0.1791, "step": 7983 }, { - "epoch": 0.46, - "grad_norm": 0.285491659038382, - "learning_rate": 1.181360222302001e-05, - "loss": 0.2228, + "epoch": 0.37, + "grad_norm": 0.4782568669574651, + "learning_rate": 1.4619135803066573e-05, + "loss": 0.3854, "step": 7984 }, { - "epoch": 0.46, - "grad_norm": 0.43293902333173934, - "learning_rate": 1.18117721390382e-05, - "loss": 0.2953, + "epoch": 0.37, + "grad_norm": 0.7936927435364832, + "learning_rate": 1.461781607927375e-05, + "loss": 0.5208, "step": 7985 }, { - "epoch": 0.46, - "grad_norm": 0.42922801670662475, - "learning_rate": 1.1809941992314799e-05, - "loss": 0.2395, + "epoch": 0.37, + "grad_norm": 0.29132110275660117, + "learning_rate": 1.4616496253246931e-05, + "loss": 0.2214, "step": 7986 }, { - "epoch": 0.46, - "grad_norm": 0.33588685351110065, - "learning_rate": 1.1808111782913188e-05, - "loss": 0.2126, + "epoch": 0.37, + "grad_norm": 0.5277803275108777, + "learning_rate": 1.4615176325015332e-05, + "loss": 0.3618, "step": 7987 }, { - "epoch": 0.46, - "grad_norm": 0.40812800461735654, - "learning_rate": 1.180628151089674e-05, - "loss": 0.3165, + "epoch": 0.37, + "grad_norm": 0.29609419222625566, + "learning_rate": 1.4613856294608178e-05, + "loss": 0.2035, "step": 7988 }, { - "epoch": 0.46, - "grad_norm": 0.6953194615167572, - "learning_rate": 1.1804451176328844e-05, - "loss": 0.524, + "epoch": 0.37, + "grad_norm": 0.3965303688114541, + "learning_rate": 1.4612536162054694e-05, + "loss": 0.2469, "step": 7989 }, { - "epoch": 0.46, - "grad_norm": 0.38056672727143354, - "learning_rate": 1.1802620779272877e-05, - "loss": 0.3416, + "epoch": 0.37, + "grad_norm": 0.3730951390092551, + "learning_rate": 1.4611215927384103e-05, + "loss": 0.3542, "step": 7990 }, { - "epoch": 0.46, - "grad_norm": 0.4374708121864055, - "learning_rate": 1.1800790319792234e-05, - "loss": 0.3152, + "epoch": 0.37, + "grad_norm": 0.475740104862716, + "learning_rate": 1.4609895590625635e-05, + "loss": 0.3555, "step": 7991 }, { - "epoch": 0.46, - "grad_norm": 0.24034260281344746, - "learning_rate": 1.1798959797950298e-05, - "loss": 0.1912, + "epoch": 0.37, + "grad_norm": 0.8080030082141457, + "learning_rate": 1.4608575151808526e-05, + "loss": 0.4257, "step": 7992 }, { - "epoch": 0.46, - "grad_norm": 0.3737299347380316, - "learning_rate": 1.1797129213810462e-05, - "loss": 0.3117, + "epoch": 0.37, + "grad_norm": 0.36773386971342104, + "learning_rate": 1.4607254610962001e-05, + "loss": 0.2525, "step": 7993 }, { - "epoch": 0.46, - "grad_norm": 0.8087123591015803, - "learning_rate": 1.179529856743612e-05, - "loss": 0.3435, + "epoch": 0.37, + "grad_norm": 0.30063479440270674, + "learning_rate": 1.46059339681153e-05, + "loss": 0.2634, "step": 7994 }, { - "epoch": 0.46, - "grad_norm": 0.3312821317125005, - "learning_rate": 1.1793467858890666e-05, - "loss": 0.2908, + "epoch": 0.37, + "grad_norm": 0.5509987118400429, + "learning_rate": 1.4604613223297661e-05, + "loss": 0.389, "step": 7995 }, { - "epoch": 0.46, - "grad_norm": 0.37260236653991463, - "learning_rate": 1.1791637088237493e-05, - "loss": 0.2907, + "epoch": 0.37, + "grad_norm": 0.2804161171950647, + "learning_rate": 1.4603292376538325e-05, + "loss": 0.2215, "step": 7996 }, { - "epoch": 0.46, - "grad_norm": 0.5993123611192235, - "learning_rate": 1.1789806255540008e-05, - "loss": 0.3494, + "epoch": 0.37, + "grad_norm": 1.6034458191076457, + "learning_rate": 1.4601971427866527e-05, + "loss": 0.7563, "step": 7997 }, { - "epoch": 0.46, - "grad_norm": 0.23486965767951468, - "learning_rate": 1.1787975360861607e-05, - "loss": 0.1612, + "epoch": 0.37, + "grad_norm": 0.36151741144295596, + "learning_rate": 1.4600650377311523e-05, + "loss": 0.2918, "step": 7998 }, { - "epoch": 0.46, - "grad_norm": 0.36941228758433226, - "learning_rate": 1.1786144404265701e-05, - "loss": 0.3084, + "epoch": 0.37, + "grad_norm": 0.3628868281688461, + "learning_rate": 1.459932922490255e-05, + "loss": 0.281, "step": 7999 }, { - "epoch": 0.46, - "grad_norm": 0.38811622550655456, - "learning_rate": 1.1784313385815685e-05, - "loss": 0.2744, + "epoch": 0.37, + "grad_norm": 0.6754704718248982, + "learning_rate": 1.4598007970668864e-05, + "loss": 0.5219, "step": 8000 }, { - "epoch": 0.46, - "grad_norm": 0.8777252991056873, - "learning_rate": 1.1782482305574976e-05, - "loss": 0.5783, + "epoch": 0.37, + "grad_norm": 0.4893416568201468, + "learning_rate": 1.4596686614639711e-05, + "loss": 0.3385, "step": 8001 }, { - "epoch": 0.46, - "grad_norm": 0.33300708206449414, - "learning_rate": 1.178065116360698e-05, - "loss": 0.2683, + "epoch": 0.37, + "grad_norm": 0.268571154790321, + "learning_rate": 1.4595365156844347e-05, + "loss": 0.2088, "step": 8002 }, { - "epoch": 0.46, - "grad_norm": 0.3506847663910355, - "learning_rate": 1.1778819959975114e-05, - "loss": 0.3212, + "epoch": 0.37, + "grad_norm": 0.3532852324644539, + "learning_rate": 1.4594043597312026e-05, + "loss": 0.2828, "step": 8003 }, { - "epoch": 0.46, - "grad_norm": 0.2039203231034237, - "learning_rate": 1.1776988694742786e-05, - "loss": 0.0902, + "epoch": 0.37, + "grad_norm": 0.6471508509849552, + "learning_rate": 1.459272193607201e-05, + "loss": 0.3422, "step": 8004 }, { - "epoch": 0.46, - "grad_norm": 0.32754667156190614, - "learning_rate": 1.1775157367973417e-05, - "loss": 0.2659, + "epoch": 0.37, + "grad_norm": 0.49560777106379306, + "learning_rate": 1.4591400173153555e-05, + "loss": 0.3336, "step": 8005 }, { - "epoch": 0.46, - "grad_norm": 0.9840863478213498, - "learning_rate": 1.1773325979730428e-05, - "loss": 0.6268, + "epoch": 0.37, + "grad_norm": 0.3650982076948691, + "learning_rate": 1.4590078308585927e-05, + "loss": 0.2824, "step": 8006 }, { - "epoch": 0.46, - "grad_norm": 0.4432248748342312, - "learning_rate": 1.1771494530077233e-05, - "loss": 0.3049, + "epoch": 0.37, + "grad_norm": 0.42206199653928783, + "learning_rate": 1.4588756342398391e-05, + "loss": 0.3005, "step": 8007 }, { - "epoch": 0.46, - "grad_norm": 0.3207501681884594, - "learning_rate": 1.1769663019077258e-05, - "loss": 0.2882, + "epoch": 0.37, + "grad_norm": 0.3229834078313099, + "learning_rate": 1.458743427462021e-05, + "loss": 0.2155, "step": 8008 }, { - "epoch": 0.46, - "grad_norm": 0.8120058617367432, - "learning_rate": 1.176783144679393e-05, - "loss": 0.5619, + "epoch": 0.37, + "grad_norm": 0.29701145820685576, + "learning_rate": 1.4586112105280656e-05, + "loss": 0.2055, "step": 8009 }, { - "epoch": 0.46, - "grad_norm": 0.20825762019102417, - "learning_rate": 1.1765999813290674e-05, - "loss": 0.0947, + "epoch": 0.37, + "grad_norm": 0.4481510108506722, + "learning_rate": 1.4584789834408996e-05, + "loss": 0.3244, "step": 8010 }, { - "epoch": 0.46, - "grad_norm": 0.4053812780070775, - "learning_rate": 1.1764168118630922e-05, - "loss": 0.277, + "epoch": 0.37, + "grad_norm": 0.422876623188142, + "learning_rate": 1.4583467462034512e-05, + "loss": 0.3201, "step": 8011 }, { - "epoch": 0.46, - "grad_norm": 0.3988311894219567, - "learning_rate": 1.1762336362878104e-05, - "loss": 0.3246, + "epoch": 0.37, + "grad_norm": 0.6249250607455239, + "learning_rate": 1.4582144988186478e-05, + "loss": 0.3641, "step": 8012 }, { - "epoch": 0.46, - "grad_norm": 0.6500249623410269, - "learning_rate": 1.1760504546095653e-05, - "loss": 0.4227, + "epoch": 0.37, + "grad_norm": 0.8859015110191942, + "learning_rate": 1.4580822412894168e-05, + "loss": 0.4503, "step": 8013 }, { - "epoch": 0.46, - "grad_norm": 0.29550813912741236, - "learning_rate": 1.1758672668347005e-05, - "loss": 0.19, + "epoch": 0.37, + "grad_norm": 0.31746829573320323, + "learning_rate": 1.4579499736186864e-05, + "loss": 0.2702, "step": 8014 }, { - "epoch": 0.46, - "grad_norm": 0.34503398766813, - "learning_rate": 1.1756840729695598e-05, - "loss": 0.342, + "epoch": 0.37, + "grad_norm": 0.19655667254801149, + "learning_rate": 1.457817695809385e-05, + "loss": 0.1117, "step": 8015 }, { - "epoch": 0.46, - "grad_norm": 0.28919837746058225, - "learning_rate": 1.1755008730204873e-05, - "loss": 0.2108, + "epoch": 0.37, + "grad_norm": 0.5492147687185173, + "learning_rate": 1.457685407864441e-05, + "loss": 0.3534, "step": 8016 }, { - "epoch": 0.46, - "grad_norm": 0.31556082209556957, - "learning_rate": 1.1753176669938269e-05, - "loss": 0.1865, + "epoch": 0.37, + "grad_norm": 0.3906783478305109, + "learning_rate": 1.4575531097867834e-05, + "loss": 0.3238, "step": 8017 }, { - "epoch": 0.46, - "grad_norm": 0.8571833591537861, - "learning_rate": 1.1751344548959233e-05, - "loss": 0.5349, + "epoch": 0.37, + "grad_norm": 0.3992223382717474, + "learning_rate": 1.4574208015793407e-05, + "loss": 0.3093, "step": 8018 }, { - "epoch": 0.46, - "grad_norm": 0.3899301745997571, - "learning_rate": 1.174951236733121e-05, - "loss": 0.3314, + "epoch": 0.37, + "grad_norm": 0.44315464009185956, + "learning_rate": 1.4572884832450427e-05, + "loss": 0.2287, "step": 8019 }, { - "epoch": 0.46, - "grad_norm": 0.2910575101476483, - "learning_rate": 1.1747680125117654e-05, - "loss": 0.2382, + "epoch": 0.37, + "grad_norm": 0.36875858245248444, + "learning_rate": 1.4571561547868182e-05, + "loss": 0.2836, "step": 8020 }, { - "epoch": 0.46, - "grad_norm": 0.9408216363736895, - "learning_rate": 1.1745847822382004e-05, - "loss": 0.6722, + "epoch": 0.37, + "grad_norm": 0.4653032759768289, + "learning_rate": 1.4570238162075969e-05, + "loss": 0.2676, "step": 8021 }, { - "epoch": 0.46, - "grad_norm": 0.3182946462400599, - "learning_rate": 1.174401545918772e-05, - "loss": 0.2214, + "epoch": 0.37, + "grad_norm": 0.32209944375250216, + "learning_rate": 1.456891467510309e-05, + "loss": 0.2332, "step": 8022 }, { - "epoch": 0.46, - "grad_norm": 0.2663466323044907, - "learning_rate": 1.1742183035598258e-05, - "loss": 0.2274, + "epoch": 0.37, + "grad_norm": 0.441351734883321, + "learning_rate": 1.456759108697884e-05, + "loss": 0.3106, "step": 8023 }, { - "epoch": 0.46, - "grad_norm": 0.49731607824725593, - "learning_rate": 1.1740350551677073e-05, - "loss": 0.3698, + "epoch": 0.37, + "grad_norm": 0.654544008503864, + "learning_rate": 1.456626739773253e-05, + "loss": 0.4452, "step": 8024 }, { - "epoch": 0.46, - "grad_norm": 0.9450188765195661, - "learning_rate": 1.1738518007487621e-05, - "loss": 0.4426, + "epoch": 0.37, + "grad_norm": 0.3647808442251664, + "learning_rate": 1.456494360739346e-05, + "loss": 0.2568, "step": 8025 }, { - "epoch": 0.46, - "grad_norm": 0.31989191955177276, - "learning_rate": 1.1736685403093367e-05, - "loss": 0.2602, + "epoch": 0.37, + "grad_norm": 0.3539861803083868, + "learning_rate": 1.4563619715990939e-05, + "loss": 0.294, "step": 8026 }, { - "epoch": 0.46, - "grad_norm": 0.4274492080687954, - "learning_rate": 1.1734852738557772e-05, - "loss": 0.2698, + "epoch": 0.37, + "grad_norm": 0.3486686042009448, + "learning_rate": 1.4562295723554272e-05, + "loss": 0.1726, "step": 8027 }, { - "epoch": 0.46, - "grad_norm": 0.32934834987845174, - "learning_rate": 1.1733020013944301e-05, - "loss": 0.2708, + "epoch": 0.37, + "grad_norm": 0.7341945958988937, + "learning_rate": 1.4560971630112779e-05, + "loss": 0.286, "step": 8028 }, { - "epoch": 0.46, - "grad_norm": 0.2622235332469892, - "learning_rate": 1.1731187229316418e-05, - "loss": 0.2088, + "epoch": 0.37, + "grad_norm": 0.37637362903680693, + "learning_rate": 1.4559647435695768e-05, + "loss": 0.3177, "step": 8029 }, { - "epoch": 0.46, - "grad_norm": 1.6319701577728358, - "learning_rate": 1.1729354384737602e-05, - "loss": 0.3641, + "epoch": 0.37, + "grad_norm": 0.405284206088478, + "learning_rate": 1.455832314033256e-05, + "loss": 0.3565, "step": 8030 }, { - "epoch": 0.46, - "grad_norm": 0.39856243611000813, - "learning_rate": 1.1727521480271315e-05, - "loss": 0.3438, + "epoch": 0.37, + "grad_norm": 1.9913398430753853, + "learning_rate": 1.4556998744052466e-05, + "loss": 0.7404, "step": 8031 }, { - "epoch": 0.46, - "grad_norm": 0.3740268094501319, - "learning_rate": 1.172568851598103e-05, - "loss": 0.2601, + "epoch": 0.37, + "grad_norm": 0.3448339217456296, + "learning_rate": 1.4555674246884816e-05, + "loss": 0.2276, "step": 8032 }, { - "epoch": 0.46, - "grad_norm": 0.49836553108072196, - "learning_rate": 1.1723855491930232e-05, - "loss": 0.3013, + "epoch": 0.37, + "grad_norm": 0.36053317996356626, + "learning_rate": 1.4554349648858928e-05, + "loss": 0.1867, "step": 8033 }, { - "epoch": 0.46, - "grad_norm": 0.3562893968298132, - "learning_rate": 1.1722022408182388e-05, - "loss": 0.288, + "epoch": 0.37, + "grad_norm": 0.4726418571400346, + "learning_rate": 1.4553024950004129e-05, + "loss": 0.3274, "step": 8034 }, { - "epoch": 0.46, - "grad_norm": 0.3640195155365308, - "learning_rate": 1.1720189264800983e-05, - "loss": 0.2428, + "epoch": 0.37, + "grad_norm": 0.376644859459166, + "learning_rate": 1.4551700150349746e-05, + "loss": 0.2505, "step": 8035 }, { - "epoch": 0.46, - "grad_norm": 0.706087521346157, - "learning_rate": 1.1718356061849496e-05, - "loss": 0.2559, + "epoch": 0.37, + "grad_norm": 2.175901953623385, + "learning_rate": 1.4550375249925106e-05, + "loss": 0.8313, "step": 8036 }, { - "epoch": 0.46, - "grad_norm": 1.2503475403720288, - "learning_rate": 1.1716522799391417e-05, - "loss": 0.5743, + "epoch": 0.37, + "grad_norm": 0.4715688639245605, + "learning_rate": 1.4549050248759546e-05, + "loss": 0.3191, "step": 8037 }, { - "epoch": 0.46, - "grad_norm": 0.4138155307392456, - "learning_rate": 1.1714689477490224e-05, - "loss": 0.3201, + "epoch": 0.37, + "grad_norm": 0.40813108551233657, + "learning_rate": 1.4547725146882396e-05, + "loss": 0.2419, "step": 8038 }, { - "epoch": 0.46, - "grad_norm": 0.3644446915489398, - "learning_rate": 1.1712856096209411e-05, - "loss": 0.292, + "epoch": 0.37, + "grad_norm": 0.32460135125273704, + "learning_rate": 1.4546399944322998e-05, + "loss": 0.1819, "step": 8039 }, { - "epoch": 0.46, - "grad_norm": 0.7135645309500714, - "learning_rate": 1.1711022655612461e-05, - "loss": 0.2894, + "epoch": 0.37, + "grad_norm": 0.6802466804452063, + "learning_rate": 1.4545074641110684e-05, + "loss": 0.4107, "step": 8040 }, { - "epoch": 0.46, - "grad_norm": 0.28773646997277114, - "learning_rate": 1.1709189155762872e-05, - "loss": 0.2276, + "epoch": 0.37, + "grad_norm": 0.3555828740566606, + "learning_rate": 1.4543749237274798e-05, + "loss": 0.2484, "step": 8041 }, { - "epoch": 0.46, - "grad_norm": 0.8314554766196716, - "learning_rate": 1.1707355596724135e-05, - "loss": 0.4062, + "epoch": 0.37, + "grad_norm": 0.5293511349965812, + "learning_rate": 1.4542423732844684e-05, + "loss": 0.3662, "step": 8042 }, { - "epoch": 0.46, - "grad_norm": 0.34693532859102244, - "learning_rate": 1.1705521978559748e-05, - "loss": 0.215, + "epoch": 0.37, + "grad_norm": 0.9536441657203929, + "learning_rate": 1.4541098127849686e-05, + "loss": 0.4382, "step": 8043 }, { - "epoch": 0.46, - "grad_norm": 0.3694538811668248, - "learning_rate": 1.1703688301333211e-05, - "loss": 0.2752, + "epoch": 0.37, + "grad_norm": 0.3998095093885355, + "learning_rate": 1.4539772422319151e-05, + "loss": 0.2806, "step": 8044 }, { - "epoch": 0.46, - "grad_norm": 1.349991276656821, - "learning_rate": 1.1701854565108019e-05, - "loss": 0.7906, + "epoch": 0.37, + "grad_norm": 0.2599654016165371, + "learning_rate": 1.4538446616282435e-05, + "loss": 0.1919, "step": 8045 }, { - "epoch": 0.46, - "grad_norm": 0.4942671800850725, - "learning_rate": 1.1700020769947675e-05, - "loss": 0.3041, + "epoch": 0.37, + "grad_norm": 0.5015844075892185, + "learning_rate": 1.453712070976888e-05, + "loss": 0.3635, "step": 8046 }, { - "epoch": 0.46, - "grad_norm": 0.28376124490302723, - "learning_rate": 1.1698186915915689e-05, - "loss": 0.258, + "epoch": 0.37, + "grad_norm": 0.36512299858641384, + "learning_rate": 1.4535794702807849e-05, + "loss": 0.2826, "step": 8047 }, { - "epoch": 0.46, - "grad_norm": 0.5974320339927423, - "learning_rate": 1.1696353003075558e-05, - "loss": 0.4175, + "epoch": 0.37, + "grad_norm": 0.7780282750486254, + "learning_rate": 1.4534468595428695e-05, + "loss": 0.5078, "step": 8048 }, { - "epoch": 0.46, - "grad_norm": 0.25196002764368464, - "learning_rate": 1.16945190314908e-05, - "loss": 0.1402, + "epoch": 0.37, + "grad_norm": 0.424220793708698, + "learning_rate": 1.4533142387660774e-05, + "loss": 0.3505, "step": 8049 }, { - "epoch": 0.46, - "grad_norm": 0.5777653906969942, - "learning_rate": 1.1692685001224918e-05, - "loss": 0.335, + "epoch": 0.37, + "grad_norm": 0.42625350930980693, + "learning_rate": 1.4531816079533452e-05, + "loss": 0.3056, "step": 8050 }, { - "epoch": 0.46, - "grad_norm": 0.45541649563077186, - "learning_rate": 1.1690850912341427e-05, - "loss": 0.2842, + "epoch": 0.37, + "grad_norm": 0.40515935262875485, + "learning_rate": 1.4530489671076087e-05, + "loss": 0.1621, "step": 8051 }, { - "epoch": 0.46, - "grad_norm": 0.9243817653395691, - "learning_rate": 1.1689016764903841e-05, - "loss": 0.4757, + "epoch": 0.37, + "grad_norm": 0.43297166869404236, + "learning_rate": 1.4529163162318049e-05, + "loss": 0.3214, "step": 8052 }, { - "epoch": 0.46, - "grad_norm": 0.4541786271595083, - "learning_rate": 1.168718255897568e-05, - "loss": 0.2181, + "epoch": 0.37, + "grad_norm": 0.35201827911462164, + "learning_rate": 1.4527836553288701e-05, + "loss": 0.2601, "step": 8053 }, { - "epoch": 0.46, - "grad_norm": 0.3405747715638682, - "learning_rate": 1.1685348294620457e-05, - "loss": 0.2851, + "epoch": 0.37, + "grad_norm": 0.4504865676055261, + "learning_rate": 1.4526509844017422e-05, + "loss": 0.2874, "step": 8054 }, { - "epoch": 0.46, - "grad_norm": 0.31051270814141485, - "learning_rate": 1.1683513971901697e-05, - "loss": 0.2689, + "epoch": 0.37, + "grad_norm": 0.9297956553604682, + "learning_rate": 1.4525183034533572e-05, + "loss": 0.6048, "step": 8055 }, { - "epoch": 0.46, - "grad_norm": 0.3512555459275891, - "learning_rate": 1.168167959088292e-05, - "loss": 0.2056, + "epoch": 0.37, + "grad_norm": 0.3730699503991378, + "learning_rate": 1.4523856124866536e-05, + "loss": 0.2746, "step": 8056 }, { - "epoch": 0.46, - "grad_norm": 0.5055296842698704, - "learning_rate": 1.1679845151627648e-05, - "loss": 0.3748, + "epoch": 0.37, + "grad_norm": 0.8923270077576486, + "learning_rate": 1.452252911504568e-05, + "loss": 0.5308, "step": 8057 }, { - "epoch": 0.46, - "grad_norm": 0.7602648728429585, - "learning_rate": 1.1678010654199417e-05, - "loss": 0.4803, + "epoch": 0.37, + "grad_norm": 0.2699751089111817, + "learning_rate": 1.4521202005100392e-05, + "loss": 0.2193, "step": 8058 }, { - "epoch": 0.46, - "grad_norm": 0.41065482545930426, - "learning_rate": 1.1676176098661742e-05, - "loss": 0.2424, + "epoch": 0.37, + "grad_norm": 0.38553112362435155, + "learning_rate": 1.4519874795060048e-05, + "loss": 0.2894, "step": 8059 }, { - "epoch": 0.46, - "grad_norm": 0.47665228757374284, - "learning_rate": 1.1674341485078167e-05, - "loss": 0.3772, + "epoch": 0.37, + "grad_norm": 0.45159444370752067, + "learning_rate": 1.4518547484954033e-05, + "loss": 0.3391, "step": 8060 }, { - "epoch": 0.46, - "grad_norm": 0.4010258063879982, - "learning_rate": 1.1672506813512217e-05, - "loss": 0.3169, + "epoch": 0.37, + "grad_norm": 0.3825125598512783, + "learning_rate": 1.4517220074811729e-05, + "loss": 0.2463, "step": 8061 }, { - "epoch": 0.46, - "grad_norm": 0.3090499814345548, - "learning_rate": 1.1670672084027425e-05, - "loss": 0.2161, + "epoch": 0.37, + "grad_norm": 0.3849553801244855, + "learning_rate": 1.451589256466253e-05, + "loss": 0.2753, "step": 8062 }, { - "epoch": 0.46, - "grad_norm": 0.32229611953247095, - "learning_rate": 1.1668837296687332e-05, - "loss": 0.2474, + "epoch": 0.37, + "grad_norm": 0.7650084573978162, + "learning_rate": 1.4514564954535819e-05, + "loss": 0.4657, "step": 8063 }, { - "epoch": 0.46, - "grad_norm": 0.723866973557168, - "learning_rate": 1.1667002451555476e-05, - "loss": 0.4764, + "epoch": 0.37, + "grad_norm": 0.4333268174377405, + "learning_rate": 1.4513237244460995e-05, + "loss": 0.198, "step": 8064 }, { - "epoch": 0.46, - "grad_norm": 0.38413806263337497, - "learning_rate": 1.1665167548695395e-05, - "loss": 0.2978, + "epoch": 0.37, + "grad_norm": 0.3649220846152661, + "learning_rate": 1.4511909434467444e-05, + "loss": 0.2895, "step": 8065 }, { - "epoch": 0.46, - "grad_norm": 0.443860016995288, - "learning_rate": 1.1663332588170637e-05, - "loss": 0.2366, + "epoch": 0.37, + "grad_norm": 0.415473897899505, + "learning_rate": 1.4510581524584571e-05, + "loss": 0.3636, "step": 8066 }, { - "epoch": 0.46, - "grad_norm": 0.34763785680591497, - "learning_rate": 1.1661497570044737e-05, - "loss": 0.3424, + "epoch": 0.37, + "grad_norm": 0.30697044659719647, + "learning_rate": 1.4509253514841769e-05, + "loss": 0.1109, "step": 8067 }, { - "epoch": 0.46, - "grad_norm": 0.8125107733973791, - "learning_rate": 1.1659662494381255e-05, - "loss": 0.4655, + "epoch": 0.37, + "grad_norm": 0.3762822036066887, + "learning_rate": 1.4507925405268437e-05, + "loss": 0.296, "step": 8068 }, { - "epoch": 0.46, - "grad_norm": 0.23319806165726573, - "learning_rate": 1.1657827361243725e-05, - "loss": 0.1526, + "epoch": 0.37, + "grad_norm": 0.5363757838522297, + "learning_rate": 1.4506597195893983e-05, + "loss": 0.3862, "step": 8069 }, { - "epoch": 0.46, - "grad_norm": 0.38548295708245645, - "learning_rate": 1.1655992170695709e-05, - "loss": 0.3449, + "epoch": 0.37, + "grad_norm": 0.4813313634789555, + "learning_rate": 1.450526888674781e-05, + "loss": 0.3489, "step": 8070 }, { - "epoch": 0.46, - "grad_norm": 0.6652817103965797, - "learning_rate": 1.1654156922800757e-05, - "loss": 0.4387, + "epoch": 0.37, + "grad_norm": 0.23232794222403005, + "learning_rate": 1.4503940477859329e-05, + "loss": 0.1742, "step": 8071 }, { - "epoch": 0.46, - "grad_norm": 0.40606341336922763, - "learning_rate": 1.1652321617622418e-05, - "loss": 0.2247, + "epoch": 0.37, + "grad_norm": 1.2613382524063457, + "learning_rate": 1.4502611969257947e-05, + "loss": 0.777, "step": 8072 }, { - "epoch": 0.46, - "grad_norm": 1.2211351878797165, - "learning_rate": 1.1650486255224254e-05, - "loss": 0.7863, + "epoch": 0.37, + "grad_norm": 0.3202289642631333, + "learning_rate": 1.4501283360973073e-05, + "loss": 0.2686, "step": 8073 }, { - "epoch": 0.46, - "grad_norm": 0.2775457316895789, - "learning_rate": 1.1648650835669821e-05, - "loss": 0.1874, + "epoch": 0.37, + "grad_norm": 0.3702647826983844, + "learning_rate": 1.4499954653034123e-05, + "loss": 0.2357, "step": 8074 }, { - "epoch": 0.46, - "grad_norm": 0.23958314554752064, - "learning_rate": 1.1646815359022683e-05, - "loss": 0.2295, + "epoch": 0.37, + "grad_norm": 0.7259843664084988, + "learning_rate": 1.4498625845470518e-05, + "loss": 0.4323, "step": 8075 }, { - "epoch": 0.46, - "grad_norm": 0.5807486031966581, - "learning_rate": 1.1644979825346397e-05, - "loss": 0.4551, + "epoch": 0.37, + "grad_norm": 1.0928180213495502, + "learning_rate": 1.4497296938311669e-05, + "loss": 0.6414, "step": 8076 }, { - "epoch": 0.46, - "grad_norm": 0.5713200590272652, - "learning_rate": 1.1643144234704531e-05, - "loss": 0.3726, + "epoch": 0.37, + "grad_norm": 0.34071987657911107, + "learning_rate": 1.4495967931587003e-05, + "loss": 0.2116, "step": 8077 }, { - "epoch": 0.46, - "grad_norm": 0.4064945951489773, - "learning_rate": 1.1641308587160654e-05, - "loss": 0.3286, + "epoch": 0.37, + "grad_norm": 0.41112295076863176, + "learning_rate": 1.4494638825325938e-05, + "loss": 0.328, "step": 8078 }, { - "epoch": 0.46, - "grad_norm": 0.3521036176468557, - "learning_rate": 1.1639472882778328e-05, - "loss": 0.2556, + "epoch": 0.37, + "grad_norm": 0.36314544481567956, + "learning_rate": 1.4493309619557903e-05, + "loss": 0.2385, "step": 8079 }, { - "epoch": 0.46, - "grad_norm": 0.563728121201371, - "learning_rate": 1.1637637121621126e-05, - "loss": 0.3345, + "epoch": 0.37, + "grad_norm": 0.3332147159866553, + "learning_rate": 1.4491980314312324e-05, + "loss": 0.1959, "step": 8080 }, { - "epoch": 0.46, - "grad_norm": 0.38309492806397877, - "learning_rate": 1.1635801303752622e-05, - "loss": 0.3351, + "epoch": 0.37, + "grad_norm": 0.4486453044276179, + "learning_rate": 1.4490650909618626e-05, + "loss": 0.339, "step": 8081 }, { - "epoch": 0.46, - "grad_norm": 0.3018626635818792, - "learning_rate": 1.1633965429236389e-05, - "loss": 0.2433, + "epoch": 0.37, + "grad_norm": 1.3046603885399826, + "learning_rate": 1.4489321405506249e-05, + "loss": 0.6039, "step": 8082 }, { - "epoch": 0.46, - "grad_norm": 0.27316504593409413, - "learning_rate": 1.1632129498136005e-05, - "loss": 0.2163, + "epoch": 0.37, + "grad_norm": 0.4638141581238621, + "learning_rate": 1.4487991802004625e-05, + "loss": 0.3174, "step": 8083 }, { - "epoch": 0.46, - "grad_norm": 0.5375141536539753, - "learning_rate": 1.1630293510515043e-05, - "loss": 0.3382, + "epoch": 0.37, + "grad_norm": 0.22334586381428592, + "learning_rate": 1.4486662099143185e-05, + "loss": 0.1543, "step": 8084 }, { - "epoch": 0.46, - "grad_norm": 0.9905869281952622, - "learning_rate": 1.1628457466437091e-05, - "loss": 0.5418, + "epoch": 0.37, + "grad_norm": 0.39841130964991045, + "learning_rate": 1.4485332296951368e-05, + "loss": 0.3155, "step": 8085 }, { - "epoch": 0.46, - "grad_norm": 0.43572613249512804, - "learning_rate": 1.1626621365965725e-05, - "loss": 0.2925, + "epoch": 0.37, + "grad_norm": 0.40266154801848825, + "learning_rate": 1.4484002395458624e-05, + "loss": 0.2942, "step": 8086 }, { - "epoch": 0.46, - "grad_norm": 0.29459633182802186, - "learning_rate": 1.162478520916453e-05, - "loss": 0.267, + "epoch": 0.37, + "grad_norm": 0.4285046575652066, + "learning_rate": 1.4482672394694381e-05, + "loss": 0.2944, "step": 8087 }, { - "epoch": 0.46, - "grad_norm": 0.35473443242054237, - "learning_rate": 1.1622948996097095e-05, - "loss": 0.2073, + "epoch": 0.37, + "grad_norm": 1.0399857910914847, + "learning_rate": 1.4481342294688095e-05, + "loss": 0.6765, "step": 8088 }, { - "epoch": 0.46, - "grad_norm": 0.6581069323859764, - "learning_rate": 1.1621112726827004e-05, - "loss": 0.4389, + "epoch": 0.37, + "grad_norm": 0.27826370025961555, + "learning_rate": 1.4480012095469212e-05, + "loss": 0.2657, "step": 8089 }, { - "epoch": 0.46, - "grad_norm": 0.36928184336908865, - "learning_rate": 1.161927640141785e-05, - "loss": 0.306, + "epoch": 0.37, + "grad_norm": 0.47245128907897627, + "learning_rate": 1.4478681797067177e-05, + "loss": 0.2905, "step": 8090 }, { - "epoch": 0.46, - "grad_norm": 0.3721782809106861, - "learning_rate": 1.1617440019933226e-05, - "loss": 0.3317, + "epoch": 0.37, + "grad_norm": 0.4204030978730879, + "learning_rate": 1.4477351399511442e-05, + "loss": 0.344, "step": 8091 }, { - "epoch": 0.46, - "grad_norm": 0.4796932591821559, - "learning_rate": 1.1615603582436723e-05, - "loss": 0.2115, + "epoch": 0.37, + "grad_norm": 0.33994301941192795, + "learning_rate": 1.4476020902831464e-05, + "loss": 0.2802, "step": 8092 }, { - "epoch": 0.46, - "grad_norm": 0.3904149073219585, - "learning_rate": 1.1613767088991935e-05, - "loss": 0.3234, + "epoch": 0.37, + "grad_norm": 0.27278899255742867, + "learning_rate": 1.4474690307056695e-05, + "loss": 0.2008, "step": 8093 }, { - "epoch": 0.47, - "grad_norm": 0.36772963613224796, - "learning_rate": 1.1611930539662463e-05, - "loss": 0.2398, + "epoch": 0.37, + "grad_norm": 1.0989553363859383, + "learning_rate": 1.44733596122166e-05, + "loss": 0.6389, "step": 8094 }, { - "epoch": 0.47, - "grad_norm": 0.23813307940661996, - "learning_rate": 1.1610093934511908e-05, - "loss": 0.1875, + "epoch": 0.37, + "grad_norm": 0.7174319425326565, + "learning_rate": 1.4472028818340632e-05, + "loss": 0.3997, "step": 8095 }, { - "epoch": 0.47, - "grad_norm": 0.4083627297184952, - "learning_rate": 1.1608257273603864e-05, - "loss": 0.3244, + "epoch": 0.37, + "grad_norm": 0.43043280549769275, + "learning_rate": 1.4470697925458258e-05, + "loss": 0.3359, "step": 8096 }, { - "epoch": 0.47, - "grad_norm": 1.3618253703159187, - "learning_rate": 1.1606420557001945e-05, - "loss": 0.7872, + "epoch": 0.37, + "grad_norm": 0.40546881578014465, + "learning_rate": 1.4469366933598938e-05, + "loss": 0.2923, "step": 8097 }, { - "epoch": 0.47, - "grad_norm": 0.282233577421707, - "learning_rate": 1.160458378476975e-05, - "loss": 0.2427, + "epoch": 0.37, + "grad_norm": 0.7012003848679189, + "learning_rate": 1.4468035842792143e-05, + "loss": 0.3312, "step": 8098 }, { - "epoch": 0.47, - "grad_norm": 0.3926305634323061, - "learning_rate": 1.1602746956970886e-05, - "loss": 0.3108, + "epoch": 0.37, + "grad_norm": 0.26961485819138026, + "learning_rate": 1.446670465306734e-05, + "loss": 0.2108, "step": 8099 }, { - "epoch": 0.47, - "grad_norm": 0.8545494870759214, - "learning_rate": 1.1600910073668964e-05, - "loss": 0.5018, + "epoch": 0.37, + "grad_norm": 0.8683246083681054, + "learning_rate": 1.4465373364454001e-05, + "loss": 0.3428, "step": 8100 }, { - "epoch": 0.47, - "grad_norm": 0.2593570986614807, - "learning_rate": 1.1599073134927597e-05, - "loss": 0.1651, + "epoch": 0.37, + "grad_norm": 0.3765437512663046, + "learning_rate": 1.4464041976981602e-05, + "loss": 0.2809, "step": 8101 }, { - "epoch": 0.47, - "grad_norm": 0.34342933446892515, - "learning_rate": 1.1597236140810394e-05, - "loss": 0.217, + "epoch": 0.37, + "grad_norm": 0.4044416045096495, + "learning_rate": 1.4462710490679614e-05, + "loss": 0.3448, "step": 8102 }, { - "epoch": 0.47, - "grad_norm": 0.3786898079471971, - "learning_rate": 1.1595399091380972e-05, - "loss": 0.3624, + "epoch": 0.37, + "grad_norm": 0.6968941292712909, + "learning_rate": 1.4461378905577518e-05, + "loss": 0.3526, "step": 8103 }, { - "epoch": 0.47, - "grad_norm": 0.661006336761479, - "learning_rate": 1.159356198670295e-05, - "loss": 0.4046, + "epoch": 0.37, + "grad_norm": 0.38395395000015514, + "learning_rate": 1.4460047221704791e-05, + "loss": 0.2343, "step": 8104 }, { - "epoch": 0.47, - "grad_norm": 0.33321983617894935, - "learning_rate": 1.1591724826839943e-05, - "loss": 0.2469, + "epoch": 0.37, + "grad_norm": 0.2830876233267711, + "learning_rate": 1.445871543909092e-05, + "loss": 0.2394, "step": 8105 }, { - "epoch": 0.47, - "grad_norm": 0.3513617843380767, - "learning_rate": 1.1589887611855574e-05, - "loss": 0.3298, + "epoch": 0.37, + "grad_norm": 1.7952650045326775, + "learning_rate": 1.4457383557765385e-05, + "loss": 0.8264, "step": 8106 }, { - "epoch": 0.47, - "grad_norm": 0.2512576604987261, - "learning_rate": 1.1588050341813466e-05, - "loss": 0.1484, + "epoch": 0.37, + "grad_norm": 0.36648782546590747, + "learning_rate": 1.4456051577757675e-05, + "loss": 0.2232, "step": 8107 }, { - "epoch": 0.47, - "grad_norm": 0.2924953972160349, - "learning_rate": 1.1586213016777244e-05, - "loss": 0.2089, + "epoch": 0.37, + "grad_norm": 0.5557893341907113, + "learning_rate": 1.4454719499097274e-05, + "loss": 0.3735, "step": 8108 }, { - "epoch": 0.47, - "grad_norm": 0.7746893833833915, - "learning_rate": 1.158437563681053e-05, - "loss": 0.4779, + "epoch": 0.37, + "grad_norm": 0.44457788567359824, + "learning_rate": 1.445338732181368e-05, + "loss": 0.3399, "step": 8109 }, { - "epoch": 0.47, - "grad_norm": 0.3601070577324971, - "learning_rate": 1.1582538201976958e-05, - "loss": 0.3198, + "epoch": 0.37, + "grad_norm": 0.377295470876217, + "learning_rate": 1.4452055045936384e-05, + "loss": 0.2182, "step": 8110 }, { - "epoch": 0.47, - "grad_norm": 0.27886876485206846, - "learning_rate": 1.1580700712340159e-05, - "loss": 0.2167, + "epoch": 0.37, + "grad_norm": 0.27680526318806714, + "learning_rate": 1.4450722671494878e-05, + "loss": 0.1883, "step": 8111 }, { - "epoch": 0.47, - "grad_norm": 0.8927279441564796, - "learning_rate": 1.1578863167963761e-05, - "loss": 0.58, + "epoch": 0.37, + "grad_norm": 1.0434715039722036, + "learning_rate": 1.4449390198518663e-05, + "loss": 0.7002, "step": 8112 }, { - "epoch": 0.47, - "grad_norm": 0.25440229767955796, - "learning_rate": 1.1577025568911395e-05, - "loss": 0.1733, + "epoch": 0.37, + "grad_norm": 0.3046090265375082, + "learning_rate": 1.4448057627037238e-05, + "loss": 0.2192, "step": 8113 }, { - "epoch": 0.47, - "grad_norm": 0.28882003772125364, - "learning_rate": 1.1575187915246706e-05, - "loss": 0.262, + "epoch": 0.37, + "grad_norm": 0.3921660931816046, + "learning_rate": 1.4446724957080105e-05, + "loss": 0.3254, "step": 8114 }, { - "epoch": 0.47, - "grad_norm": 0.47526159258141804, - "learning_rate": 1.1573350207033324e-05, - "loss": 0.3023, + "epoch": 0.37, + "grad_norm": 0.6994491471845978, + "learning_rate": 1.4445392188676764e-05, + "loss": 0.4547, "step": 8115 }, { - "epoch": 0.47, - "grad_norm": 0.7238904758177274, - "learning_rate": 1.1571512444334894e-05, - "loss": 0.4253, + "epoch": 0.37, + "grad_norm": 0.26871783575947744, + "learning_rate": 1.4444059321856725e-05, + "loss": 0.0719, "step": 8116 }, { - "epoch": 0.47, - "grad_norm": 0.3652372666944264, - "learning_rate": 1.1569674627215057e-05, - "loss": 0.2946, + "epoch": 0.37, + "grad_norm": 0.2792463394041331, + "learning_rate": 1.4442726356649497e-05, + "loss": 0.2444, "step": 8117 }, { - "epoch": 0.47, - "grad_norm": 0.3398527200099904, - "learning_rate": 1.1567836755737452e-05, - "loss": 0.2535, + "epoch": 0.37, + "grad_norm": 1.1861117440030566, + "learning_rate": 1.4441393293084589e-05, + "loss": 0.6726, "step": 8118 }, { - "epoch": 0.47, - "grad_norm": 0.25940567321868835, - "learning_rate": 1.156599882996573e-05, - "loss": 0.1895, + "epoch": 0.37, + "grad_norm": 0.7797025946871594, + "learning_rate": 1.4440060131191515e-05, + "loss": 0.4208, "step": 8119 }, { - "epoch": 0.47, - "grad_norm": 0.41358852547742536, - "learning_rate": 1.1564160849963533e-05, - "loss": 0.2941, + "epoch": 0.37, + "grad_norm": 0.3785789840265222, + "learning_rate": 1.443872687099979e-05, + "loss": 0.2703, "step": 8120 }, { - "epoch": 0.47, - "grad_norm": 0.6890446909789003, - "learning_rate": 1.1562322815794516e-05, - "loss": 0.3646, + "epoch": 0.37, + "grad_norm": 0.4021594988038672, + "learning_rate": 1.4437393512538926e-05, + "loss": 0.3176, "step": 8121 }, { - "epoch": 0.47, - "grad_norm": 0.39128113875580217, - "learning_rate": 1.1560484727522323e-05, - "loss": 0.3401, + "epoch": 0.37, + "grad_norm": 0.5744238816396605, + "learning_rate": 1.443606005583845e-05, + "loss": 0.3289, "step": 8122 }, { - "epoch": 0.47, - "grad_norm": 0.3501552171817226, - "learning_rate": 1.1558646585210615e-05, - "loss": 0.2678, + "epoch": 0.37, + "grad_norm": 0.24735474269333255, + "learning_rate": 1.4434726500927879e-05, + "loss": 0.1651, "step": 8123 }, { - "epoch": 0.47, - "grad_norm": 0.8310288981112209, - "learning_rate": 1.1556808388923043e-05, - "loss": 0.3499, + "epoch": 0.37, + "grad_norm": 1.211791538324062, + "learning_rate": 1.443339284783674e-05, + "loss": 0.6564, "step": 8124 }, { - "epoch": 0.47, - "grad_norm": 0.32194066075690564, - "learning_rate": 1.155497013872326e-05, - "loss": 0.2148, + "epoch": 0.37, + "grad_norm": 0.40119978313973204, + "learning_rate": 1.4432059096594553e-05, + "loss": 0.2828, "step": 8125 }, { - "epoch": 0.47, - "grad_norm": 0.3097208230540672, - "learning_rate": 1.1553131834674929e-05, - "loss": 0.2589, + "epoch": 0.37, + "grad_norm": 0.49271095317703384, + "learning_rate": 1.443072524723085e-05, + "loss": 0.2801, "step": 8126 }, { - "epoch": 0.47, - "grad_norm": 0.5418465886898102, - "learning_rate": 1.1551293476841712e-05, - "loss": 0.3533, + "epoch": 0.37, + "grad_norm": 0.9114656136689938, + "learning_rate": 1.442939129977516e-05, + "loss": 0.5515, "step": 8127 }, { - "epoch": 0.47, - "grad_norm": 0.9933329488460488, - "learning_rate": 1.1549455065287267e-05, - "loss": 0.5601, + "epoch": 0.37, + "grad_norm": 0.6944807003011761, + "learning_rate": 1.4428057254257014e-05, + "loss": 0.2886, "step": 8128 }, { - "epoch": 0.47, - "grad_norm": 0.33251164007014095, - "learning_rate": 1.1547616600075262e-05, - "loss": 0.2702, + "epoch": 0.37, + "grad_norm": 0.3967429256594281, + "learning_rate": 1.442672311070595e-05, + "loss": 0.2535, "step": 8129 }, { - "epoch": 0.47, - "grad_norm": 0.48264580396110907, - "learning_rate": 1.1545778081269356e-05, - "loss": 0.3385, + "epoch": 0.37, + "grad_norm": 0.3170589010166853, + "learning_rate": 1.4425388869151506e-05, + "loss": 0.2206, "step": 8130 }, { - "epoch": 0.47, - "grad_norm": 0.26561831151431287, - "learning_rate": 1.1543939508933226e-05, - "loss": 0.1731, + "epoch": 0.37, + "grad_norm": 0.6541096200829364, + "learning_rate": 1.4424054529623214e-05, + "loss": 0.3823, "step": 8131 }, { - "epoch": 0.47, - "grad_norm": 0.38986731532730423, - "learning_rate": 1.1542100883130534e-05, - "loss": 0.2678, + "epoch": 0.37, + "grad_norm": 0.428923930151477, + "learning_rate": 1.4422720092150622e-05, + "loss": 0.3368, "step": 8132 }, { - "epoch": 0.47, - "grad_norm": 0.8541658768168554, - "learning_rate": 1.1540262203924957e-05, - "loss": 0.4704, + "epoch": 0.37, + "grad_norm": 0.4077737286229152, + "learning_rate": 1.4421385556763268e-05, + "loss": 0.2913, "step": 8133 }, { - "epoch": 0.47, - "grad_norm": 0.37102575891994377, - "learning_rate": 1.1538423471380162e-05, - "loss": 0.2628, + "epoch": 0.37, + "grad_norm": 0.780064609035624, + "learning_rate": 1.4420050923490696e-05, + "loss": 0.377, "step": 8134 }, { - "epoch": 0.47, - "grad_norm": 0.4084559161761699, - "learning_rate": 1.1536584685559833e-05, - "loss": 0.2971, + "epoch": 0.37, + "grad_norm": 0.2901392780899888, + "learning_rate": 1.4418716192362458e-05, + "loss": 0.2318, "step": 8135 }, { - "epoch": 0.47, - "grad_norm": 1.090623885447246, - "learning_rate": 1.1534745846527643e-05, - "loss": 0.5709, + "epoch": 0.37, + "grad_norm": 0.30757732191557946, + "learning_rate": 1.4417381363408107e-05, + "loss": 0.2323, "step": 8136 }, { - "epoch": 0.47, - "grad_norm": 0.5691462758797022, - "learning_rate": 1.1532906954347265e-05, - "loss": 0.2653, + "epoch": 0.37, + "grad_norm": 0.4548606743024571, + "learning_rate": 1.4416046436657187e-05, + "loss": 0.3587, "step": 8137 }, { - "epoch": 0.47, - "grad_norm": 0.3616131380469849, - "learning_rate": 1.1531068009082388e-05, - "loss": 0.2771, + "epoch": 0.37, + "grad_norm": 0.45732355159535487, + "learning_rate": 1.4414711412139254e-05, + "loss": 0.303, "step": 8138 }, { - "epoch": 0.47, - "grad_norm": 0.35732383039321136, - "learning_rate": 1.1529229010796693e-05, - "loss": 0.2351, + "epoch": 0.37, + "grad_norm": 1.0822017475316053, + "learning_rate": 1.4413376289883868e-05, + "loss": 0.3963, "step": 8139 }, { - "epoch": 0.47, - "grad_norm": 1.2853466987734752, - "learning_rate": 1.152738995955386e-05, - "loss": 0.8433, + "epoch": 0.37, + "grad_norm": 0.45383526659823176, + "learning_rate": 1.4412041069920582e-05, + "loss": 0.3112, "step": 8140 }, { - "epoch": 0.47, - "grad_norm": 0.2917511597672303, - "learning_rate": 1.1525550855417579e-05, - "loss": 0.2001, + "epoch": 0.37, + "grad_norm": 0.3372666816026221, + "learning_rate": 1.4410705752278957e-05, + "loss": 0.2814, "step": 8141 }, { - "epoch": 0.47, - "grad_norm": 0.41454996938168326, - "learning_rate": 1.152371169845154e-05, - "loss": 0.322, + "epoch": 0.37, + "grad_norm": 0.32524645401580127, + "learning_rate": 1.4409370336988564e-05, + "loss": 0.169, "step": 8142 }, { - "epoch": 0.47, - "grad_norm": 0.7889175233489341, - "learning_rate": 1.152187248871943e-05, - "loss": 0.4598, + "epoch": 0.37, + "grad_norm": 0.5685776051589425, + "learning_rate": 1.4408034824078959e-05, + "loss": 0.3688, "step": 8143 }, { - "epoch": 0.47, - "grad_norm": 0.39376249011371567, - "learning_rate": 1.1520033226284942e-05, - "loss": 0.2423, + "epoch": 0.37, + "grad_norm": 0.4376313230990527, + "learning_rate": 1.440669921357971e-05, + "loss": 0.3087, "step": 8144 }, { - "epoch": 0.47, - "grad_norm": 0.42249860389750094, - "learning_rate": 1.1518193911211763e-05, - "loss": 0.3088, + "epoch": 0.37, + "grad_norm": 0.4569660570837889, + "learning_rate": 1.4405363505520386e-05, + "loss": 0.3339, "step": 8145 }, { - "epoch": 0.47, - "grad_norm": 0.3832570482048342, - "learning_rate": 1.1516354543563603e-05, - "loss": 0.3023, + "epoch": 0.37, + "grad_norm": 0.4012077787576056, + "learning_rate": 1.4404027699930563e-05, + "loss": 0.0717, "step": 8146 }, { - "epoch": 0.47, - "grad_norm": 0.27028430753402555, - "learning_rate": 1.1514515123404144e-05, - "loss": 0.1744, + "epoch": 0.37, + "grad_norm": 0.4509739968710311, + "learning_rate": 1.4402691796839812e-05, + "loss": 0.3429, "step": 8147 }, { - "epoch": 0.47, - "grad_norm": 0.9814466672326471, - "learning_rate": 1.1512675650797093e-05, - "loss": 0.5945, + "epoch": 0.37, + "grad_norm": 0.377727085417594, + "learning_rate": 1.4401355796277707e-05, + "loss": 0.2985, "step": 8148 }, { - "epoch": 0.47, - "grad_norm": 0.5236065950219225, - "learning_rate": 1.1510836125806148e-05, - "loss": 0.3632, + "epoch": 0.37, + "grad_norm": 0.32875092038198983, + "learning_rate": 1.4400019698273826e-05, + "loss": 0.2148, "step": 8149 }, { - "epoch": 0.47, - "grad_norm": 0.2567177557850539, - "learning_rate": 1.1508996548495015e-05, - "loss": 0.2272, + "epoch": 0.37, + "grad_norm": 0.40830849008245623, + "learning_rate": 1.4398683502857748e-05, + "loss": 0.2902, "step": 8150 }, { - "epoch": 0.47, - "grad_norm": 0.48664804419956864, - "learning_rate": 1.1507156918927396e-05, - "loss": 0.2979, + "epoch": 0.37, + "grad_norm": 1.0769865822653262, + "learning_rate": 1.4397347210059059e-05, + "loss": 0.5932, "step": 8151 }, { - "epoch": 0.47, - "grad_norm": 0.4403235305691835, - "learning_rate": 1.1505317237166997e-05, - "loss": 0.342, + "epoch": 0.37, + "grad_norm": 0.38318973891823555, + "learning_rate": 1.439601081990734e-05, + "loss": 0.2475, "step": 8152 }, { - "epoch": 0.47, - "grad_norm": 0.4455779532667353, - "learning_rate": 1.1503477503277526e-05, - "loss": 0.304, + "epoch": 0.37, + "grad_norm": 0.3825986230766878, + "learning_rate": 1.4394674332432182e-05, + "loss": 0.2794, "step": 8153 }, { - "epoch": 0.47, - "grad_norm": 0.3233973459258135, - "learning_rate": 1.1501637717322695e-05, - "loss": 0.2459, + "epoch": 0.37, + "grad_norm": 0.5371953677536965, + "learning_rate": 1.4393337747663168e-05, + "loss": 0.4304, "step": 8154 }, { - "epoch": 0.47, - "grad_norm": 0.6969251101798604, - "learning_rate": 1.1499797879366214e-05, - "loss": 0.4513, + "epoch": 0.37, + "grad_norm": 0.3018934112398851, + "learning_rate": 1.4392001065629888e-05, + "loss": 0.1138, "step": 8155 }, { - "epoch": 0.47, - "grad_norm": 0.4039155950028945, - "learning_rate": 1.1497957989471798e-05, - "loss": 0.2988, + "epoch": 0.37, + "grad_norm": 0.420633317378892, + "learning_rate": 1.439066428636194e-05, + "loss": 0.3172, "step": 8156 }, { - "epoch": 0.47, - "grad_norm": 0.5008602945483102, - "learning_rate": 1.1496118047703162e-05, - "loss": 0.2993, + "epoch": 0.37, + "grad_norm": 0.3274556845005094, + "learning_rate": 1.4389327409888916e-05, + "loss": 0.2437, "step": 8157 }, { - "epoch": 0.47, - "grad_norm": 0.29040380927590353, - "learning_rate": 1.1494278054124019e-05, - "loss": 0.2655, + "epoch": 0.37, + "grad_norm": 1.016920581456003, + "learning_rate": 1.438799043624041e-05, + "loss": 0.572, "step": 8158 }, { - "epoch": 0.47, - "grad_norm": 0.39909885373138854, - "learning_rate": 1.1492438008798093e-05, - "loss": 0.264, + "epoch": 0.37, + "grad_norm": 0.3438425472043183, + "learning_rate": 1.438665336544603e-05, + "loss": 0.2424, "step": 8159 }, { - "epoch": 0.47, - "grad_norm": 0.4596488600608055, - "learning_rate": 1.1490597911789104e-05, - "loss": 0.2365, + "epoch": 0.37, + "grad_norm": 0.4852958734828313, + "learning_rate": 1.4385316197535373e-05, + "loss": 0.3629, "step": 8160 }, { - "epoch": 0.47, - "grad_norm": 0.5358031775312709, - "learning_rate": 1.1488757763160771e-05, - "loss": 0.3619, + "epoch": 0.37, + "grad_norm": 0.7604373800181158, + "learning_rate": 1.4383978932538038e-05, + "loss": 0.3557, "step": 8161 }, { - "epoch": 0.47, - "grad_norm": 0.289051100704963, - "learning_rate": 1.148691756297682e-05, - "loss": 0.2643, + "epoch": 0.37, + "grad_norm": 0.36121423116163215, + "learning_rate": 1.4382641570483635e-05, + "loss": 0.2032, "step": 8162 }, { - "epoch": 0.47, - "grad_norm": 0.46616362480908935, - "learning_rate": 1.1485077311300983e-05, - "loss": 0.2833, + "epoch": 0.38, + "grad_norm": 0.37971718292547935, + "learning_rate": 1.4381304111401775e-05, + "loss": 0.2528, "step": 8163 }, { - "epoch": 0.47, - "grad_norm": 0.43853018014503176, - "learning_rate": 1.1483237008196978e-05, - "loss": 0.3051, + "epoch": 0.38, + "grad_norm": 0.3247067814956869, + "learning_rate": 1.4379966555322061e-05, + "loss": 0.2716, "step": 8164 }, { - "epoch": 0.47, - "grad_norm": 0.27216529737512757, - "learning_rate": 1.1481396653728542e-05, - "loss": 0.2282, + "epoch": 0.38, + "grad_norm": 0.370823121370817, + "learning_rate": 1.4378628902274112e-05, + "loss": 0.2502, "step": 8165 }, { - "epoch": 0.47, - "grad_norm": 0.3954700014749519, - "learning_rate": 1.14795562479594e-05, - "loss": 0.3425, + "epoch": 0.38, + "grad_norm": 0.5896055000558335, + "learning_rate": 1.4377291152287538e-05, + "loss": 0.4204, "step": 8166 }, { - "epoch": 0.47, - "grad_norm": 0.639378666232857, - "learning_rate": 1.1477715790953293e-05, - "loss": 0.3417, + "epoch": 0.38, + "grad_norm": 0.7751249439047946, + "learning_rate": 1.4375953305391956e-05, + "loss": 0.5046, "step": 8167 }, { - "epoch": 0.47, - "grad_norm": 0.3230244326253486, - "learning_rate": 1.1475875282773948e-05, - "loss": 0.2642, + "epoch": 0.38, + "grad_norm": 0.3569893717916408, + "learning_rate": 1.4374615361616985e-05, + "loss": 0.2225, "step": 8168 }, { - "epoch": 0.47, - "grad_norm": 1.0780614602536482, - "learning_rate": 1.1474034723485108e-05, - "loss": 0.6736, + "epoch": 0.38, + "grad_norm": 0.3487667059860045, + "learning_rate": 1.437327732099225e-05, + "loss": 0.2331, "step": 8169 }, { - "epoch": 0.47, - "grad_norm": 0.299867058226962, - "learning_rate": 1.1472194113150507e-05, - "loss": 0.2598, + "epoch": 0.38, + "grad_norm": 0.8249438071964644, + "learning_rate": 1.4371939183547368e-05, + "loss": 0.4399, "step": 8170 }, { - "epoch": 0.47, - "grad_norm": 0.2996405103989052, - "learning_rate": 1.1470353451833889e-05, - "loss": 0.2208, + "epoch": 0.38, + "grad_norm": 0.32898537178932663, + "learning_rate": 1.4370600949311967e-05, + "loss": 0.2841, "step": 8171 }, { - "epoch": 0.47, - "grad_norm": 0.62197502201416, - "learning_rate": 1.1468512739598991e-05, - "loss": 0.3585, + "epoch": 0.38, + "grad_norm": 0.4560493355111065, + "learning_rate": 1.4369262618315672e-05, + "loss": 0.2589, "step": 8172 }, { - "epoch": 0.47, - "grad_norm": 0.28867732255449474, - "learning_rate": 1.1466671976509564e-05, - "loss": 0.2173, + "epoch": 0.38, + "grad_norm": 1.152462184301265, + "learning_rate": 1.4367924190588114e-05, + "loss": 0.631, "step": 8173 }, { - "epoch": 0.47, - "grad_norm": 0.613279875657845, - "learning_rate": 1.1464831162629346e-05, - "loss": 0.3476, + "epoch": 0.38, + "grad_norm": 0.37560296611791244, + "learning_rate": 1.4366585666158924e-05, + "loss": 0.2774, "step": 8174 }, { - "epoch": 0.47, - "grad_norm": 0.3803647956262368, - "learning_rate": 1.146299029802209e-05, - "loss": 0.3359, + "epoch": 0.38, + "grad_norm": 0.1922203290623445, + "learning_rate": 1.4365247045057732e-05, + "loss": 0.1059, "step": 8175 }, { - "epoch": 0.47, - "grad_norm": 1.0181850562146808, - "learning_rate": 1.1461149382751544e-05, - "loss": 0.6673, + "epoch": 0.38, + "grad_norm": 0.4279193601201062, + "learning_rate": 1.4363908327314185e-05, + "loss": 0.3036, "step": 8176 }, { - "epoch": 0.47, - "grad_norm": 0.7603144529101236, - "learning_rate": 1.1459308416881454e-05, - "loss": 0.2102, + "epoch": 0.38, + "grad_norm": 0.3777746994396621, + "learning_rate": 1.4362569512957912e-05, + "loss": 0.289, "step": 8177 }, { - "epoch": 0.47, - "grad_norm": 0.30168260451446044, - "learning_rate": 1.145746740047558e-05, - "loss": 0.2995, + "epoch": 0.38, + "grad_norm": 0.8190570137297476, + "learning_rate": 1.4361230602018551e-05, + "loss": 0.2977, "step": 8178 }, { - "epoch": 0.47, - "grad_norm": 0.3321460276508209, - "learning_rate": 1.1455626333597672e-05, - "loss": 0.2085, + "epoch": 0.38, + "grad_norm": 1.2220837858016589, + "learning_rate": 1.435989159452575e-05, + "loss": 0.8406, "step": 8179 }, { - "epoch": 0.47, - "grad_norm": 0.34455866728257245, - "learning_rate": 1.1453785216311484e-05, - "loss": 0.2043, + "epoch": 0.38, + "grad_norm": 0.3693450586663808, + "learning_rate": 1.4358552490509152e-05, + "loss": 0.2599, "step": 8180 }, { - "epoch": 0.47, - "grad_norm": 0.3685518357521418, - "learning_rate": 1.1451944048680779e-05, - "loss": 0.3475, + "epoch": 0.38, + "grad_norm": 0.36190938695966485, + "learning_rate": 1.4357213289998402e-05, + "loss": 0.1645, "step": 8181 }, { - "epoch": 0.47, - "grad_norm": 0.4831933139625984, - "learning_rate": 1.1450102830769314e-05, - "loss": 0.3685, + "epoch": 0.38, + "grad_norm": 0.9532698894067595, + "learning_rate": 1.4355873993023149e-05, + "loss": 0.4447, "step": 8182 }, { - "epoch": 0.47, - "grad_norm": 0.3531600940862889, - "learning_rate": 1.1448261562640848e-05, - "loss": 0.2355, + "epoch": 0.38, + "grad_norm": 0.476370849568272, + "learning_rate": 1.4354534599613045e-05, + "loss": 0.2957, "step": 8183 }, { - "epoch": 0.47, - "grad_norm": 0.439610405654752, - "learning_rate": 1.1446420244359148e-05, - "loss": 0.3821, + "epoch": 0.38, + "grad_norm": 0.38947420523982107, + "learning_rate": 1.4353195109797742e-05, + "loss": 0.3335, "step": 8184 }, { - "epoch": 0.47, - "grad_norm": 0.26939221681386455, - "learning_rate": 1.1444578875987978e-05, - "loss": 0.2208, + "epoch": 0.38, + "grad_norm": 0.48843814860484114, + "learning_rate": 1.4351855523606895e-05, + "loss": 0.3199, "step": 8185 }, { - "epoch": 0.47, - "grad_norm": 0.27650453372703404, - "learning_rate": 1.1442737457591102e-05, - "loss": 0.2099, + "epoch": 0.38, + "grad_norm": 0.35179459738077856, + "learning_rate": 1.4350515841070159e-05, + "loss": 0.2515, "step": 8186 }, { - "epoch": 0.47, - "grad_norm": 1.3629929252444442, - "learning_rate": 1.144089598923229e-05, - "loss": 0.7209, + "epoch": 0.38, + "grad_norm": 0.8533334882972816, + "learning_rate": 1.4349176062217197e-05, + "loss": 0.4837, "step": 8187 }, { - "epoch": 0.47, - "grad_norm": 0.6214268287048519, - "learning_rate": 1.1439054470975312e-05, - "loss": 0.506, + "epoch": 0.38, + "grad_norm": 0.3758362055607333, + "learning_rate": 1.4347836187077665e-05, + "loss": 0.2903, "step": 8188 }, { - "epoch": 0.47, - "grad_norm": 0.3119158539908204, - "learning_rate": 1.143721290288394e-05, - "loss": 0.2878, + "epoch": 0.38, + "grad_norm": 0.2955437267555108, + "learning_rate": 1.4346496215681237e-05, + "loss": 0.2421, "step": 8189 }, { - "epoch": 0.47, - "grad_norm": 0.3318970742581742, - "learning_rate": 1.1435371285021948e-05, - "loss": 0.262, + "epoch": 0.38, + "grad_norm": 0.48187030322490054, + "learning_rate": 1.4345156148057569e-05, + "loss": 0.3413, "step": 8190 }, { - "epoch": 0.47, - "grad_norm": 0.2773017863670644, - "learning_rate": 1.1433529617453108e-05, - "loss": 0.1932, + "epoch": 0.38, + "grad_norm": 1.0071674295529929, + "learning_rate": 1.434381598423633e-05, + "loss": 0.5564, "step": 8191 }, { - "epoch": 0.47, - "grad_norm": 0.8760997978912737, - "learning_rate": 1.1431687900241201e-05, - "loss": 0.4663, + "epoch": 0.38, + "grad_norm": 0.3314808494783778, + "learning_rate": 1.4342475724247194e-05, + "loss": 0.2826, "step": 8192 }, { - "epoch": 0.47, - "grad_norm": 0.3367584318928723, - "learning_rate": 1.142984613345e-05, - "loss": 0.2346, + "epoch": 0.38, + "grad_norm": 0.5616032074099029, + "learning_rate": 1.4341135368119829e-05, + "loss": 0.3901, "step": 8193 }, { - "epoch": 0.47, - "grad_norm": 0.4579062321186222, - "learning_rate": 1.1428004317143293e-05, - "loss": 0.3515, + "epoch": 0.38, + "grad_norm": 0.7617297336530914, + "learning_rate": 1.4339794915883913e-05, + "loss": 0.2899, "step": 8194 }, { - "epoch": 0.47, - "grad_norm": 0.6150169647018473, - "learning_rate": 1.1426162451384857e-05, - "loss": 0.3871, + "epoch": 0.38, + "grad_norm": 0.2824971092991806, + "learning_rate": 1.4338454367569122e-05, + "loss": 0.221, "step": 8195 }, { - "epoch": 0.47, - "grad_norm": 0.3216805849443447, - "learning_rate": 1.1424320536238478e-05, - "loss": 0.2268, + "epoch": 0.38, + "grad_norm": 0.5290168829688746, + "learning_rate": 1.4337113723205127e-05, + "loss": 0.4032, "step": 8196 }, { - "epoch": 0.47, - "grad_norm": 0.25155788365831927, - "learning_rate": 1.1422478571767937e-05, - "loss": 0.206, + "epoch": 0.38, + "grad_norm": 0.6235443281168692, + "learning_rate": 1.4335772982821618e-05, + "loss": 0.3837, "step": 8197 }, { - "epoch": 0.47, - "grad_norm": 0.34325115785592575, - "learning_rate": 1.1420636558037026e-05, - "loss": 0.2802, + "epoch": 0.38, + "grad_norm": 0.3771828397558655, + "learning_rate": 1.4334432146448272e-05, + "loss": 0.2226, "step": 8198 }, { - "epoch": 0.47, - "grad_norm": 0.3558996480535589, - "learning_rate": 1.1418794495109528e-05, - "loss": 0.2655, + "epoch": 0.38, + "grad_norm": 0.7929446769568814, + "learning_rate": 1.4333091214114778e-05, + "loss": 0.4607, "step": 8199 }, { - "epoch": 0.47, - "grad_norm": 0.7393643667231588, - "learning_rate": 1.1416952383049244e-05, - "loss": 0.4348, + "epoch": 0.38, + "grad_norm": 0.3515116027312931, + "learning_rate": 1.4331750185850821e-05, + "loss": 0.3106, "step": 8200 }, { - "epoch": 0.47, - "grad_norm": 0.3308899478444616, - "learning_rate": 1.1415110221919958e-05, - "loss": 0.2869, + "epoch": 0.38, + "grad_norm": 0.2551655177347878, + "learning_rate": 1.433040906168609e-05, + "loss": 0.1327, "step": 8201 }, { - "epoch": 0.47, - "grad_norm": 0.4330551569974486, - "learning_rate": 1.1413268011785463e-05, - "loss": 0.3299, + "epoch": 0.38, + "grad_norm": 0.33681451534038465, + "learning_rate": 1.4329067841650274e-05, + "loss": 0.2049, "step": 8202 }, { - "epoch": 0.47, - "grad_norm": 0.16466774797088232, - "learning_rate": 1.1411425752709561e-05, - "loss": 0.0933, + "epoch": 0.38, + "grad_norm": 0.5442830045690067, + "learning_rate": 1.4327726525773069e-05, + "loss": 0.4134, "step": 8203 }, { - "epoch": 0.47, - "grad_norm": 0.5658043381450948, - "learning_rate": 1.1409583444756043e-05, - "loss": 0.3813, + "epoch": 0.38, + "grad_norm": 0.31528552034531016, + "learning_rate": 1.4326385114084172e-05, + "loss": 0.2284, "step": 8204 }, { - "epoch": 0.47, - "grad_norm": 0.39733186363612516, - "learning_rate": 1.1407741087988713e-05, - "loss": 0.311, + "epoch": 0.38, + "grad_norm": 0.6830644309840728, + "learning_rate": 1.4325043606613274e-05, + "loss": 0.3603, "step": 8205 }, { - "epoch": 0.47, - "grad_norm": 0.5293473050356182, - "learning_rate": 1.1405898682471367e-05, - "loss": 0.3052, + "epoch": 0.38, + "grad_norm": 1.290346700820485, + "learning_rate": 1.4323702003390084e-05, + "loss": 0.6148, "step": 8206 }, { - "epoch": 0.47, - "grad_norm": 0.3863878784564842, - "learning_rate": 1.1404056228267813e-05, - "loss": 0.3073, + "epoch": 0.38, + "grad_norm": 0.2575142791168545, + "learning_rate": 1.4322360304444296e-05, + "loss": 0.1823, "step": 8207 }, { - "epoch": 0.47, - "grad_norm": 0.5500023359847825, - "learning_rate": 1.140221372544185e-05, - "loss": 0.3302, + "epoch": 0.38, + "grad_norm": 0.34372574308053383, + "learning_rate": 1.4321018509805617e-05, + "loss": 0.2604, "step": 8208 }, { - "epoch": 0.47, - "grad_norm": 0.26832299732179304, - "learning_rate": 1.1400371174057287e-05, - "loss": 0.2081, + "epoch": 0.38, + "grad_norm": 1.3397963864650597, + "learning_rate": 1.4319676619503754e-05, + "loss": 0.8465, "step": 8209 }, { - "epoch": 0.47, - "grad_norm": 0.3489058273162678, - "learning_rate": 1.139852857417793e-05, - "loss": 0.2296, + "epoch": 0.38, + "grad_norm": 0.41605963002925217, + "learning_rate": 1.4318334633568415e-05, + "loss": 0.2905, "step": 8210 }, { - "epoch": 0.47, - "grad_norm": 0.35245208526279237, - "learning_rate": 1.139668592586759e-05, - "loss": 0.2888, + "epoch": 0.38, + "grad_norm": 0.5057832879966968, + "learning_rate": 1.4316992552029305e-05, + "loss": 0.3177, "step": 8211 }, { - "epoch": 0.47, - "grad_norm": 0.6348054017588185, - "learning_rate": 1.1394843229190076e-05, - "loss": 0.3672, + "epoch": 0.38, + "grad_norm": 0.44220618867046024, + "learning_rate": 1.4315650374916143e-05, + "loss": 0.3329, "step": 8212 }, { - "epoch": 0.47, - "grad_norm": 0.3565702444562063, - "learning_rate": 1.1393000484209202e-05, - "loss": 0.2839, + "epoch": 0.38, + "grad_norm": 0.34443428837699497, + "learning_rate": 1.4314308102258643e-05, + "loss": 0.2111, "step": 8213 }, { - "epoch": 0.47, - "grad_norm": 0.35892910995714983, - "learning_rate": 1.139115769098878e-05, - "loss": 0.2883, + "epoch": 0.38, + "grad_norm": 0.3342774080954743, + "learning_rate": 1.4312965734086519e-05, + "loss": 0.1568, "step": 8214 }, { - "epoch": 0.47, - "grad_norm": 0.4652118043069549, - "learning_rate": 1.1389314849592626e-05, - "loss": 0.2623, + "epoch": 0.38, + "grad_norm": 0.5348091614613677, + "learning_rate": 1.4311623270429488e-05, + "loss": 0.4093, "step": 8215 }, { - "epoch": 0.47, - "grad_norm": 0.31534488534631117, - "learning_rate": 1.1387471960084557e-05, - "loss": 0.1603, + "epoch": 0.38, + "grad_norm": 0.3518870455826157, + "learning_rate": 1.4310280711317274e-05, + "loss": 0.278, "step": 8216 }, { - "epoch": 0.47, - "grad_norm": 0.3043071870765772, - "learning_rate": 1.1385629022528397e-05, - "loss": 0.2729, + "epoch": 0.38, + "grad_norm": 0.46793460911610657, + "learning_rate": 1.4308938056779602e-05, + "loss": 0.3051, "step": 8217 }, { - "epoch": 0.47, - "grad_norm": 0.791220490302, - "learning_rate": 1.1383786036987963e-05, - "loss": 0.433, + "epoch": 0.38, + "grad_norm": 1.251438584228942, + "learning_rate": 1.4307595306846194e-05, + "loss": 0.5727, "step": 8218 }, { - "epoch": 0.47, - "grad_norm": 0.3690113603730788, - "learning_rate": 1.1381943003527077e-05, - "loss": 0.1939, + "epoch": 0.38, + "grad_norm": 0.33538450523095514, + "learning_rate": 1.4306252461546777e-05, + "loss": 0.1887, "step": 8219 }, { - "epoch": 0.47, - "grad_norm": 0.3844076124295093, - "learning_rate": 1.1380099922209564e-05, - "loss": 0.3177, + "epoch": 0.38, + "grad_norm": 0.33891000885463735, + "learning_rate": 1.4304909520911079e-05, + "loss": 0.2221, "step": 8220 }, { - "epoch": 0.47, - "grad_norm": 0.3525552292700522, - "learning_rate": 1.1378256793099251e-05, - "loss": 0.3186, + "epoch": 0.38, + "grad_norm": 0.6147435561695751, + "learning_rate": 1.4303566484968836e-05, + "loss": 0.4614, "step": 8221 }, { - "epoch": 0.47, - "grad_norm": 0.2606543369638635, - "learning_rate": 1.1376413616259965e-05, - "loss": 0.1299, + "epoch": 0.38, + "grad_norm": 0.6739274115676755, + "learning_rate": 1.4302223353749776e-05, + "loss": 0.3476, "step": 8222 }, { - "epoch": 0.47, - "grad_norm": 0.45789735825217354, - "learning_rate": 1.1374570391755532e-05, - "loss": 0.3216, + "epoch": 0.38, + "grad_norm": 0.500693917552505, + "learning_rate": 1.430088012728364e-05, + "loss": 0.3466, "step": 8223 }, { - "epoch": 0.47, - "grad_norm": 0.7984697434570868, - "learning_rate": 1.137272711964979e-05, - "loss": 0.4888, + "epoch": 0.38, + "grad_norm": 0.3785647625340663, + "learning_rate": 1.4299536805600162e-05, + "loss": 0.2482, "step": 8224 }, { - "epoch": 0.47, - "grad_norm": 0.31774601929632706, - "learning_rate": 1.1370883800006562e-05, - "loss": 0.2252, + "epoch": 0.38, + "grad_norm": 0.6809297901984518, + "learning_rate": 1.4298193388729085e-05, + "loss": 0.3459, "step": 8225 }, { - "epoch": 0.47, - "grad_norm": 0.42364764216329703, - "learning_rate": 1.1369040432889691e-05, - "loss": 0.3239, + "epoch": 0.38, + "grad_norm": 0.2940116330640593, + "learning_rate": 1.4296849876700147e-05, + "loss": 0.2244, "step": 8226 }, { - "epoch": 0.47, - "grad_norm": 0.43654263319431763, - "learning_rate": 1.1367197018363005e-05, - "loss": 0.2615, + "epoch": 0.38, + "grad_norm": 0.5055469737966245, + "learning_rate": 1.4295506269543094e-05, + "loss": 0.3049, "step": 8227 }, { - "epoch": 0.47, - "grad_norm": 0.5638770827996676, - "learning_rate": 1.1365353556490348e-05, - "loss": 0.3034, + "epoch": 0.38, + "grad_norm": 0.32497666957214005, + "learning_rate": 1.429416256728767e-05, + "loss": 0.2787, "step": 8228 }, { - "epoch": 0.47, - "grad_norm": 0.26140699917356136, - "learning_rate": 1.1363510047335553e-05, - "loss": 0.2156, + "epoch": 0.38, + "grad_norm": 0.5563397653226816, + "learning_rate": 1.429281876996363e-05, + "loss": 0.4059, "step": 8229 }, { - "epoch": 0.47, - "grad_norm": 0.8208422504947541, - "learning_rate": 1.1361666490962468e-05, - "loss": 0.4557, + "epoch": 0.38, + "grad_norm": 0.8293007549369125, + "learning_rate": 1.4291474877600719e-05, + "loss": 0.3414, "step": 8230 }, { - "epoch": 0.47, - "grad_norm": 0.9822644350984603, - "learning_rate": 1.1359822887434927e-05, - "loss": 0.7397, + "epoch": 0.38, + "grad_norm": 0.3154983003339873, + "learning_rate": 1.4290130890228691e-05, + "loss": 0.215, "step": 8231 }, { - "epoch": 0.47, - "grad_norm": 0.3314402613856236, - "learning_rate": 1.1357979236816781e-05, - "loss": 0.2076, + "epoch": 0.38, + "grad_norm": 0.3432024288293899, + "learning_rate": 1.42887868078773e-05, + "loss": 0.2873, "step": 8232 }, { - "epoch": 0.47, - "grad_norm": 0.4487027140708006, - "learning_rate": 1.135613553917187e-05, - "loss": 0.3122, + "epoch": 0.38, + "grad_norm": 0.508429358465658, + "learning_rate": 1.42874426305763e-05, + "loss": 0.3167, "step": 8233 }, { - "epoch": 0.47, - "grad_norm": 0.9682417335085425, - "learning_rate": 1.1354291794564045e-05, - "loss": 0.4434, + "epoch": 0.38, + "grad_norm": 0.36930622311605776, + "learning_rate": 1.4286098358355454e-05, + "loss": 0.2438, "step": 8234 }, { - "epoch": 0.47, - "grad_norm": 0.2746870381544497, - "learning_rate": 1.1352448003057153e-05, - "loss": 0.1845, + "epoch": 0.38, + "grad_norm": 0.6014374445171841, + "learning_rate": 1.428475399124452e-05, + "loss": 0.3574, "step": 8235 }, { - "epoch": 0.47, - "grad_norm": 0.3943326196154475, - "learning_rate": 1.1350604164715044e-05, - "loss": 0.237, + "epoch": 0.38, + "grad_norm": 0.3992719536261348, + "learning_rate": 1.4283409529273264e-05, + "loss": 0.3409, "step": 8236 }, { - "epoch": 0.47, - "grad_norm": 0.3423516469836019, - "learning_rate": 1.1348760279601572e-05, - "loss": 0.3115, + "epoch": 0.38, + "grad_norm": 0.7516816042485746, + "learning_rate": 1.4282064972471447e-05, + "loss": 0.1686, "step": 8237 }, { - "epoch": 0.47, - "grad_norm": 0.3458819009956223, - "learning_rate": 1.134691634778059e-05, - "loss": 0.219, + "epoch": 0.38, + "grad_norm": 0.32408571371041206, + "learning_rate": 1.4280720320868842e-05, + "loss": 0.2472, "step": 8238 }, { - "epoch": 0.47, - "grad_norm": 0.8749680292286744, - "learning_rate": 1.1345072369315951e-05, - "loss": 0.4917, + "epoch": 0.38, + "grad_norm": 0.4998774123811883, + "learning_rate": 1.4279375574495213e-05, + "loss": 0.3805, "step": 8239 }, { - "epoch": 0.47, - "grad_norm": 0.444178078876986, - "learning_rate": 1.1343228344271515e-05, - "loss": 0.352, + "epoch": 0.38, + "grad_norm": 0.3312330896032824, + "learning_rate": 1.4278030733380335e-05, + "loss": 0.2028, "step": 8240 }, { - "epoch": 0.47, - "grad_norm": 0.7944684734767636, - "learning_rate": 1.1341384272711138e-05, - "loss": 0.2822, + "epoch": 0.38, + "grad_norm": 0.3263480030716372, + "learning_rate": 1.4276685797553977e-05, + "loss": 0.2666, "step": 8241 }, { - "epoch": 0.47, - "grad_norm": 0.28858447543694066, - "learning_rate": 1.1339540154698682e-05, - "loss": 0.1913, + "epoch": 0.38, + "grad_norm": 1.1651884783421904, + "learning_rate": 1.427534076704592e-05, + "loss": 0.5726, "step": 8242 }, { - "epoch": 0.47, - "grad_norm": 0.33636234607029175, - "learning_rate": 1.133769599029801e-05, - "loss": 0.2435, + "epoch": 0.38, + "grad_norm": 0.3411234157170078, + "learning_rate": 1.4273995641885935e-05, + "loss": 0.2167, "step": 8243 }, { - "epoch": 0.47, - "grad_norm": 0.37425119175237026, - "learning_rate": 1.1335851779572979e-05, - "loss": 0.2879, + "epoch": 0.38, + "grad_norm": 0.324672025747547, + "learning_rate": 1.427265042210381e-05, + "loss": 0.2751, "step": 8244 }, { - "epoch": 0.47, - "grad_norm": 0.3420784443581969, - "learning_rate": 1.1334007522587462e-05, - "loss": 0.2764, + "epoch": 0.38, + "grad_norm": 0.6773576992257967, + "learning_rate": 1.4271305107729323e-05, + "loss": 0.4632, "step": 8245 }, { - "epoch": 0.47, - "grad_norm": 0.626268923474981, - "learning_rate": 1.1332163219405318e-05, - "loss": 0.4184, + "epoch": 0.38, + "grad_norm": 0.7871594143109953, + "learning_rate": 1.4269959698792254e-05, + "loss": 0.6308, "step": 8246 }, { - "epoch": 0.47, - "grad_norm": 0.33327889942284983, - "learning_rate": 1.1330318870090427e-05, - "loss": 0.2542, + "epoch": 0.38, + "grad_norm": 0.27081595429461885, + "learning_rate": 1.4268614195322397e-05, + "loss": 0.1842, "step": 8247 }, { - "epoch": 0.47, - "grad_norm": 0.3209884215108911, - "learning_rate": 1.1328474474706643e-05, - "loss": 0.224, + "epoch": 0.38, + "grad_norm": 0.4328919037659009, + "learning_rate": 1.4267268597349532e-05, + "loss": 0.3012, "step": 8248 }, { - "epoch": 0.47, - "grad_norm": 0.38396143689121437, - "learning_rate": 1.132663003331785e-05, - "loss": 0.2768, + "epoch": 0.38, + "grad_norm": 0.6474695241528116, + "learning_rate": 1.4265922904903455e-05, + "loss": 0.3572, "step": 8249 }, { - "epoch": 0.47, - "grad_norm": 0.34285888950900517, - "learning_rate": 1.1324785545987911e-05, - "loss": 0.2586, + "epoch": 0.38, + "grad_norm": 0.3901395456490721, + "learning_rate": 1.4264577118013956e-05, + "loss": 0.2612, "step": 8250 }, { - "epoch": 0.47, - "grad_norm": 0.7016183186742044, - "learning_rate": 1.1322941012780707e-05, - "loss": 0.3433, + "epoch": 0.38, + "grad_norm": 0.5435741516641559, + "learning_rate": 1.426323123671083e-05, + "loss": 0.4197, "step": 8251 }, { - "epoch": 0.47, - "grad_norm": 0.4224569861018966, - "learning_rate": 1.1321096433760116e-05, - "loss": 0.3503, + "epoch": 0.38, + "grad_norm": 0.41057205997666657, + "learning_rate": 1.4261885261023874e-05, + "loss": 0.3085, "step": 8252 }, { - "epoch": 0.47, - "grad_norm": 0.2995614695059995, - "learning_rate": 1.1319251808990009e-05, - "loss": 0.2572, + "epoch": 0.38, + "grad_norm": 0.3208076110916021, + "learning_rate": 1.4260539190982885e-05, + "loss": 0.2044, "step": 8253 }, { - "epoch": 0.47, - "grad_norm": 1.2672865639780329, - "learning_rate": 1.1317407138534268e-05, - "loss": 0.714, + "epoch": 0.38, + "grad_norm": 0.39738324868320724, + "learning_rate": 1.4259193026617666e-05, + "loss": 0.2586, "step": 8254 }, { - "epoch": 0.47, - "grad_norm": 0.20313439742018613, - "learning_rate": 1.1315562422456776e-05, - "loss": 0.1281, + "epoch": 0.38, + "grad_norm": 0.5767641286449069, + "learning_rate": 1.4257846767958017e-05, + "loss": 0.3441, "step": 8255 }, { - "epoch": 0.47, - "grad_norm": 0.37114924445860703, - "learning_rate": 1.1313717660821413e-05, - "loss": 0.2819, + "epoch": 0.38, + "grad_norm": 0.32950558612937514, + "learning_rate": 1.4256500415033743e-05, + "loss": 0.2495, "step": 8256 }, { - "epoch": 0.47, - "grad_norm": 0.4130806594357466, - "learning_rate": 1.1311872853692065e-05, - "loss": 0.3215, + "epoch": 0.38, + "grad_norm": 0.8659393031052606, + "learning_rate": 1.4255153967874655e-05, + "loss": 0.5898, "step": 8257 }, { - "epoch": 0.47, - "grad_norm": 0.7315074555515565, - "learning_rate": 1.1310028001132615e-05, - "loss": 0.2889, + "epoch": 0.38, + "grad_norm": 1.2129542811941005, + "learning_rate": 1.4253807426510557e-05, + "loss": 0.7702, "step": 8258 }, { - "epoch": 0.47, - "grad_norm": 0.3518162924006683, - "learning_rate": 1.1308183103206956e-05, - "loss": 0.2678, + "epoch": 0.38, + "grad_norm": 0.296508594716759, + "learning_rate": 1.4252460790971267e-05, + "loss": 0.2256, "step": 8259 }, { - "epoch": 0.47, - "grad_norm": 1.263822710183851, - "learning_rate": 1.1306338159978968e-05, - "loss": 0.8082, + "epoch": 0.38, + "grad_norm": 0.3024211221431034, + "learning_rate": 1.4251114061286591e-05, + "loss": 0.2054, "step": 8260 }, { - "epoch": 0.47, - "grad_norm": 0.2290140963302349, - "learning_rate": 1.1304493171512548e-05, - "loss": 0.1771, + "epoch": 0.38, + "grad_norm": 0.629455061463019, + "learning_rate": 1.4249767237486344e-05, + "loss": 0.404, "step": 8261 }, { - "epoch": 0.47, - "grad_norm": 0.3609070041446921, - "learning_rate": 1.1302648137871584e-05, - "loss": 0.2591, + "epoch": 0.38, + "grad_norm": 0.3592135681012521, + "learning_rate": 1.4248420319600351e-05, + "loss": 0.3016, "step": 8262 }, { - "epoch": 0.47, - "grad_norm": 0.7500955160282625, - "learning_rate": 1.1300803059119969e-05, - "loss": 0.4461, + "epoch": 0.38, + "grad_norm": 0.42935572212673834, + "learning_rate": 1.4247073307658422e-05, + "loss": 0.3038, "step": 8263 }, { - "epoch": 0.47, - "grad_norm": 0.47247522252554397, - "learning_rate": 1.1298957935321604e-05, - "loss": 0.4008, + "epoch": 0.38, + "grad_norm": 0.4893976846326565, + "learning_rate": 1.4245726201690386e-05, + "loss": 0.3563, "step": 8264 }, { - "epoch": 0.47, - "grad_norm": 0.29187071697633193, - "learning_rate": 1.129711276654038e-05, - "loss": 0.2102, + "epoch": 0.38, + "grad_norm": 0.28904360618292674, + "learning_rate": 1.4244379001726065e-05, + "loss": 0.2514, "step": 8265 }, { - "epoch": 0.47, - "grad_norm": 0.9689601512300138, - "learning_rate": 1.1295267552840198e-05, - "loss": 0.7327, + "epoch": 0.38, + "grad_norm": 0.3715148170386901, + "learning_rate": 1.4243031707795283e-05, + "loss": 0.1751, "step": 8266 }, { - "epoch": 0.47, - "grad_norm": 0.39459373406513565, - "learning_rate": 1.1293422294284955e-05, - "loss": 0.255, + "epoch": 0.38, + "grad_norm": 0.3769245694937176, + "learning_rate": 1.4241684319927869e-05, + "loss": 0.3347, "step": 8267 }, { - "epoch": 0.48, - "grad_norm": 0.2841527542011416, - "learning_rate": 1.1291576990938556e-05, - "loss": 0.234, + "epoch": 0.38, + "grad_norm": 0.4310484106575423, + "learning_rate": 1.424033683815365e-05, + "loss": 0.3046, "step": 8268 }, { - "epoch": 0.48, - "grad_norm": 0.26846990348811833, - "learning_rate": 1.1289731642864896e-05, - "loss": 0.2712, + "epoch": 0.38, + "grad_norm": 0.6512186791516351, + "learning_rate": 1.4238989262502461e-05, + "loss": 0.3629, "step": 8269 }, { - "epoch": 0.48, - "grad_norm": 1.2160805062145421, - "learning_rate": 1.1287886250127888e-05, - "loss": 0.7619, + "epoch": 0.38, + "grad_norm": 0.5867256433960768, + "learning_rate": 1.4237641593004137e-05, + "loss": 0.4039, "step": 8270 }, { - "epoch": 0.48, - "grad_norm": 0.3180662479564137, - "learning_rate": 1.1286040812791431e-05, - "loss": 0.2088, + "epoch": 0.38, + "grad_norm": 0.4412019197574116, + "learning_rate": 1.4236293829688512e-05, + "loss": 0.3168, "step": 8271 }, { - "epoch": 0.48, - "grad_norm": 1.2780419499498312, - "learning_rate": 1.1284195330919443e-05, - "loss": 0.763, + "epoch": 0.38, + "grad_norm": 0.3316372908240092, + "learning_rate": 1.4234945972585425e-05, + "loss": 0.2509, "step": 8272 }, { - "epoch": 0.48, - "grad_norm": 0.34751151421346865, - "learning_rate": 1.128234980457582e-05, - "loss": 0.3257, + "epoch": 0.38, + "grad_norm": 0.8027012524175728, + "learning_rate": 1.4233598021724713e-05, + "loss": 0.3549, "step": 8273 }, { - "epoch": 0.48, - "grad_norm": 0.29037446054088023, - "learning_rate": 1.1280504233824481e-05, - "loss": 0.2382, + "epoch": 0.38, + "grad_norm": 0.38313271633334883, + "learning_rate": 1.4232249977136223e-05, + "loss": 0.2873, "step": 8274 }, { - "epoch": 0.48, - "grad_norm": 0.400642610476773, - "learning_rate": 1.1278658618729334e-05, - "loss": 0.2889, + "epoch": 0.38, + "grad_norm": 0.37618331604683003, + "learning_rate": 1.42309018388498e-05, + "loss": 0.3376, "step": 8275 }, { - "epoch": 0.48, - "grad_norm": 0.28690418079254904, - "learning_rate": 1.1276812959354295e-05, - "loss": 0.2466, + "epoch": 0.38, + "grad_norm": 1.020892246020979, + "learning_rate": 1.4229553606895283e-05, + "loss": 0.5781, "step": 8276 }, { - "epoch": 0.48, - "grad_norm": 0.3976401918567556, - "learning_rate": 1.1274967255763278e-05, - "loss": 0.2747, + "epoch": 0.38, + "grad_norm": 0.35412187163161724, + "learning_rate": 1.422820528130253e-05, + "loss": 0.2652, "step": 8277 }, { - "epoch": 0.48, - "grad_norm": 0.4454863521161837, - "learning_rate": 1.1273121508020202e-05, - "loss": 0.2982, + "epoch": 0.38, + "grad_norm": 0.3469304159858399, + "learning_rate": 1.4226856862101386e-05, + "loss": 0.2316, "step": 8278 }, { - "epoch": 0.48, - "grad_norm": 0.5729809893476822, - "learning_rate": 1.1271275716188978e-05, - "loss": 0.4256, + "epoch": 0.38, + "grad_norm": 0.40010826622359014, + "learning_rate": 1.4225508349321703e-05, + "loss": 0.2923, "step": 8279 }, { - "epoch": 0.48, - "grad_norm": 0.410761763093016, - "learning_rate": 1.1269429880333533e-05, - "loss": 0.2653, + "epoch": 0.38, + "grad_norm": 0.38629007148138467, + "learning_rate": 1.422415974299334e-05, + "loss": 0.2742, "step": 8280 }, { - "epoch": 0.48, - "grad_norm": 0.2517747570353783, - "learning_rate": 1.1267584000517788e-05, - "loss": 0.2289, + "epoch": 0.38, + "grad_norm": 1.4801700007586074, + "learning_rate": 1.4222811043146151e-05, + "loss": 0.6828, "step": 8281 }, { - "epoch": 0.48, - "grad_norm": 0.4175701025964965, - "learning_rate": 1.1265738076805663e-05, - "loss": 0.2465, + "epoch": 0.38, + "grad_norm": 0.7985265986567396, + "learning_rate": 1.4221462249809995e-05, + "loss": 0.3236, "step": 8282 }, { - "epoch": 0.48, - "grad_norm": 0.4040133712318601, - "learning_rate": 1.1263892109261081e-05, - "loss": 0.2884, + "epoch": 0.38, + "grad_norm": 0.3486461059814203, + "learning_rate": 1.4220113363014737e-05, + "loss": 0.2847, "step": 8283 }, { - "epoch": 0.48, - "grad_norm": 0.3315685666860372, - "learning_rate": 1.126204609794797e-05, - "loss": 0.2601, + "epoch": 0.38, + "grad_norm": 0.6958512211605081, + "learning_rate": 1.4218764382790232e-05, + "loss": 0.4726, "step": 8284 }, { - "epoch": 0.48, - "grad_norm": 0.6738959505588366, - "learning_rate": 1.1260200042930257e-05, - "loss": 0.4439, + "epoch": 0.38, + "grad_norm": 0.3062411974786232, + "learning_rate": 1.4217415309166351e-05, + "loss": 0.2239, "step": 8285 }, { - "epoch": 0.48, - "grad_norm": 0.3713996287944544, - "learning_rate": 1.125835394427187e-05, - "loss": 0.3126, + "epoch": 0.38, + "grad_norm": 0.3752021628541434, + "learning_rate": 1.421606614217296e-05, + "loss": 0.212, "step": 8286 }, { - "epoch": 0.48, - "grad_norm": 0.26116662030586907, - "learning_rate": 1.1256507802036742e-05, - "loss": 0.1798, + "epoch": 0.38, + "grad_norm": 0.4256591722831189, + "learning_rate": 1.4214716881839927e-05, + "loss": 0.3223, "step": 8287 }, { - "epoch": 0.48, - "grad_norm": 0.2923099580818043, - "learning_rate": 1.12546616162888e-05, - "loss": 0.241, + "epoch": 0.38, + "grad_norm": 1.286491178139137, + "learning_rate": 1.4213367528197127e-05, + "loss": 0.8123, "step": 8288 }, { - "epoch": 0.48, - "grad_norm": 0.4591105534812718, - "learning_rate": 1.1252815387091984e-05, - "loss": 0.293, + "epoch": 0.38, + "grad_norm": 0.3167630392597797, + "learning_rate": 1.421201808127443e-05, + "loss": 0.1936, "step": 8289 }, { - "epoch": 0.48, - "grad_norm": 0.5414758905754559, - "learning_rate": 1.1250969114510221e-05, - "loss": 0.4012, + "epoch": 0.38, + "grad_norm": 0.6625154944774565, + "learning_rate": 1.4210668541101713e-05, + "loss": 0.4146, "step": 8290 }, { - "epoch": 0.48, - "grad_norm": 0.8942911584626979, - "learning_rate": 1.1249122798607454e-05, - "loss": 0.3676, + "epoch": 0.38, + "grad_norm": 0.3311778954071521, + "learning_rate": 1.4209318907708849e-05, + "loss": 0.2565, "step": 8291 }, { - "epoch": 0.48, - "grad_norm": 0.35940592339450533, - "learning_rate": 1.1247276439447616e-05, - "loss": 0.2682, + "epoch": 0.38, + "grad_norm": 0.25608659693305835, + "learning_rate": 1.4207969181125724e-05, + "loss": 0.1672, "step": 8292 }, { - "epoch": 0.48, - "grad_norm": 0.3883606131416445, - "learning_rate": 1.124543003709465e-05, - "loss": 0.3171, + "epoch": 0.38, + "grad_norm": 1.042094085495781, + "learning_rate": 1.4206619361382217e-05, + "loss": 0.5922, "step": 8293 }, { - "epoch": 0.48, - "grad_norm": 0.2069420790155224, - "learning_rate": 1.1243583591612495e-05, - "loss": 0.1109, + "epoch": 0.38, + "grad_norm": 1.1386649402286328, + "learning_rate": 1.4205269448508212e-05, + "loss": 0.7319, "step": 8294 }, { - "epoch": 0.48, - "grad_norm": 0.3893463873924719, - "learning_rate": 1.1241737103065096e-05, - "loss": 0.2684, + "epoch": 0.38, + "grad_norm": 0.2925751990324721, + "learning_rate": 1.4203919442533597e-05, + "loss": 0.2352, "step": 8295 }, { - "epoch": 0.48, - "grad_norm": 0.44638528446469994, - "learning_rate": 1.1239890571516389e-05, - "loss": 0.3573, + "epoch": 0.38, + "grad_norm": 0.47186431465199724, + "learning_rate": 1.4202569343488252e-05, + "loss": 0.3621, "step": 8296 }, { - "epoch": 0.48, - "grad_norm": 0.9358041698450383, - "learning_rate": 1.123804399703033e-05, - "loss": 0.3285, + "epoch": 0.38, + "grad_norm": 0.28595574622766345, + "learning_rate": 1.4201219151402073e-05, + "loss": 0.1503, "step": 8297 }, { - "epoch": 0.48, - "grad_norm": 0.6012185686365062, - "learning_rate": 1.1236197379670861e-05, - "loss": 0.3471, + "epoch": 0.38, + "grad_norm": 0.35606121252602985, + "learning_rate": 1.4199868866304956e-05, + "loss": 0.2787, "step": 8298 }, { - "epoch": 0.48, - "grad_norm": 0.36799061504790626, - "learning_rate": 1.1234350719501927e-05, - "loss": 0.3477, + "epoch": 0.38, + "grad_norm": 0.3502827033922692, + "learning_rate": 1.4198518488226784e-05, + "loss": 0.2661, "step": 8299 }, { - "epoch": 0.48, - "grad_norm": 0.22412798408524393, - "learning_rate": 1.1232504016587482e-05, - "loss": 0.1747, + "epoch": 0.38, + "grad_norm": 0.8021081853826463, + "learning_rate": 1.4197168017197462e-05, + "loss": 0.6023, "step": 8300 }, { - "epoch": 0.48, - "grad_norm": 0.4149487453115047, - "learning_rate": 1.1230657270991476e-05, - "loss": 0.304, + "epoch": 0.38, + "grad_norm": 0.33458848921008755, + "learning_rate": 1.4195817453246887e-05, + "loss": 0.2604, "step": 8301 }, { - "epoch": 0.48, - "grad_norm": 0.5459475622031438, - "learning_rate": 1.1228810482777859e-05, - "loss": 0.3991, + "epoch": 0.38, + "grad_norm": 0.6828502740801683, + "learning_rate": 1.4194466796404958e-05, + "loss": 0.3457, "step": 8302 }, { - "epoch": 0.48, - "grad_norm": 0.9562637175288264, - "learning_rate": 1.1226963652010592e-05, - "loss": 0.5005, + "epoch": 0.38, + "grad_norm": 0.3023315998949838, + "learning_rate": 1.4193116046701572e-05, + "loss": 0.2546, "step": 8303 }, { - "epoch": 0.48, - "grad_norm": 0.27131366109251476, - "learning_rate": 1.1225116778753622e-05, - "loss": 0.2124, + "epoch": 0.38, + "grad_norm": 0.4738507904651978, + "learning_rate": 1.4191765204166643e-05, + "loss": 0.3487, "step": 8304 }, { - "epoch": 0.48, - "grad_norm": 0.37477020048094534, - "learning_rate": 1.1223269863070913e-05, - "loss": 0.3101, + "epoch": 0.38, + "grad_norm": 0.33281990801805617, + "learning_rate": 1.419041426883007e-05, + "loss": 0.1806, "step": 8305 }, { - "epoch": 0.48, - "grad_norm": 0.37990887770967674, - "learning_rate": 1.1221422905026424e-05, - "loss": 0.1822, + "epoch": 0.38, + "grad_norm": 0.637455605167356, + "learning_rate": 1.4189063240721766e-05, + "loss": 0.367, "step": 8306 }, { - "epoch": 0.48, - "grad_norm": 0.4007847446197605, - "learning_rate": 1.1219575904684109e-05, - "loss": 0.2274, + "epoch": 0.38, + "grad_norm": 0.41095119840036776, + "learning_rate": 1.418771211987164e-05, + "loss": 0.2842, "step": 8307 }, { - "epoch": 0.48, - "grad_norm": 0.3859141568843464, - "learning_rate": 1.1217728862107932e-05, - "loss": 0.3088, + "epoch": 0.38, + "grad_norm": 0.39602692584253674, + "learning_rate": 1.41863609063096e-05, + "loss": 0.2876, "step": 8308 }, { - "epoch": 0.48, - "grad_norm": 0.5814554045581455, - "learning_rate": 1.1215881777361858e-05, - "loss": 0.3877, + "epoch": 0.38, + "grad_norm": 0.30913304420687365, + "learning_rate": 1.4185009600065565e-05, + "loss": 0.21, "step": 8309 }, { - "epoch": 0.48, - "grad_norm": 0.31317402052160326, - "learning_rate": 1.1214034650509853e-05, - "loss": 0.2069, + "epoch": 0.38, + "grad_norm": 0.4305256156674565, + "learning_rate": 1.418365820116945e-05, + "loss": 0.291, "step": 8310 }, { - "epoch": 0.48, - "grad_norm": 1.3864302814986327, - "learning_rate": 1.1212187481615875e-05, - "loss": 0.6831, + "epoch": 0.38, + "grad_norm": 0.3446494696603676, + "learning_rate": 1.4182306709651177e-05, + "loss": 0.2971, "step": 8311 }, { - "epoch": 0.48, - "grad_norm": 0.24158373982842526, - "learning_rate": 1.1210340270743903e-05, - "loss": 0.2158, + "epoch": 0.38, + "grad_norm": 0.8826954763928107, + "learning_rate": 1.4180955125540665e-05, + "loss": 0.381, "step": 8312 }, { - "epoch": 0.48, - "grad_norm": 0.38673341680171747, - "learning_rate": 1.1208493017957893e-05, - "loss": 0.2194, + "epoch": 0.38, + "grad_norm": 0.3581151018596223, + "learning_rate": 1.4179603448867836e-05, + "loss": 0.2849, "step": 8313 }, { - "epoch": 0.48, - "grad_norm": 0.3480815756689843, - "learning_rate": 1.1206645723321825e-05, - "loss": 0.3104, + "epoch": 0.38, + "grad_norm": 0.8359842363230909, + "learning_rate": 1.4178251679662614e-05, + "loss": 0.566, "step": 8314 }, { - "epoch": 0.48, - "grad_norm": 1.142989272685945, - "learning_rate": 1.1204798386899669e-05, - "loss": 0.8243, + "epoch": 0.38, + "grad_norm": 0.37843562191127134, + "learning_rate": 1.4176899817954928e-05, + "loss": 0.2519, "step": 8315 }, { - "epoch": 0.48, - "grad_norm": 0.334355108496965, - "learning_rate": 1.1202951008755395e-05, - "loss": 0.2723, + "epoch": 0.38, + "grad_norm": 0.30080564416317757, + "learning_rate": 1.4175547863774703e-05, + "loss": 0.2298, "step": 8316 }, { - "epoch": 0.48, - "grad_norm": 0.342405895746166, - "learning_rate": 1.1201103588952979e-05, - "loss": 0.2292, + "epoch": 0.38, + "grad_norm": 0.49607200884047337, + "learning_rate": 1.4174195817151875e-05, + "loss": 0.3165, "step": 8317 }, { - "epoch": 0.48, - "grad_norm": 0.35075106621619223, - "learning_rate": 1.11992561275564e-05, - "loss": 0.2068, + "epoch": 0.38, + "grad_norm": 0.7897007575682163, + "learning_rate": 1.4172843678116375e-05, + "loss": 0.3481, "step": 8318 }, { - "epoch": 0.48, - "grad_norm": 0.5953512589583299, - "learning_rate": 1.1197408624629626e-05, - "loss": 0.4117, + "epoch": 0.38, + "grad_norm": 0.3373544346959952, + "learning_rate": 1.4171491446698138e-05, + "loss": 0.2736, "step": 8319 }, { - "epoch": 0.48, - "grad_norm": 0.27076852878493896, - "learning_rate": 1.119556108023665e-05, - "loss": 0.2509, + "epoch": 0.38, + "grad_norm": 1.3269886988420891, + "learning_rate": 1.4170139122927102e-05, + "loss": 0.665, "step": 8320 }, { - "epoch": 0.48, - "grad_norm": 1.2379250749740365, - "learning_rate": 1.119371349444144e-05, - "loss": 0.8403, + "epoch": 0.38, + "grad_norm": 0.3243934960004902, + "learning_rate": 1.4168786706833201e-05, + "loss": 0.1458, "step": 8321 }, { - "epoch": 0.48, - "grad_norm": 0.6147005884618181, - "learning_rate": 1.1191865867307987e-05, - "loss": 0.4054, + "epoch": 0.38, + "grad_norm": 0.33387000967091074, + "learning_rate": 1.4167434198446385e-05, + "loss": 0.1778, "step": 8322 }, { - "epoch": 0.48, - "grad_norm": 0.3673949040504266, - "learning_rate": 1.1190018198900267e-05, - "loss": 0.2359, + "epoch": 0.38, + "grad_norm": 0.43336848222540136, + "learning_rate": 1.4166081597796585e-05, + "loss": 0.2884, "step": 8323 }, { - "epoch": 0.48, - "grad_norm": 0.2579786089458528, - "learning_rate": 1.118817048928227e-05, - "loss": 0.2306, + "epoch": 0.38, + "grad_norm": 0.9538955930483495, + "learning_rate": 1.4164728904913763e-05, + "loss": 0.4561, "step": 8324 }, { - "epoch": 0.48, - "grad_norm": 0.5153506800417209, - "learning_rate": 1.1186322738517983e-05, - "loss": 0.3424, + "epoch": 0.38, + "grad_norm": 0.41910496610832654, + "learning_rate": 1.4163376119827852e-05, + "loss": 0.1941, "step": 8325 }, { - "epoch": 0.48, - "grad_norm": 0.2880843628827029, - "learning_rate": 1.1184474946671384e-05, - "loss": 0.2012, + "epoch": 0.38, + "grad_norm": 0.5271766647170193, + "learning_rate": 1.4162023242568808e-05, + "loss": 0.3904, "step": 8326 }, { - "epoch": 0.48, - "grad_norm": 1.1330484123308262, - "learning_rate": 1.1182627113806475e-05, - "loss": 0.7822, + "epoch": 0.38, + "grad_norm": 0.3869840551181553, + "learning_rate": 1.4160670273166577e-05, + "loss": 0.3283, "step": 8327 }, { - "epoch": 0.48, - "grad_norm": 0.29700312651214944, - "learning_rate": 1.1180779239987233e-05, - "loss": 0.2688, + "epoch": 0.38, + "grad_norm": 0.271840515044541, + "learning_rate": 1.4159317211651121e-05, + "loss": 0.1418, "step": 8328 }, { - "epoch": 0.48, - "grad_norm": 0.3926248997185353, - "learning_rate": 1.1178931325277662e-05, - "loss": 0.3018, + "epoch": 0.38, + "grad_norm": 0.5354729510812137, + "learning_rate": 1.4157964058052386e-05, + "loss": 0.2977, "step": 8329 }, { - "epoch": 0.48, - "grad_norm": 0.4880065387650843, - "learning_rate": 1.1177083369741749e-05, - "loss": 0.3457, + "epoch": 0.38, + "grad_norm": 0.9964154731159086, + "learning_rate": 1.415661081240034e-05, + "loss": 0.4394, "step": 8330 }, { - "epoch": 0.48, - "grad_norm": 0.4622472597385853, - "learning_rate": 1.117523537344349e-05, - "loss": 0.3208, + "epoch": 0.38, + "grad_norm": 0.3260220703713384, + "learning_rate": 1.4155257474724933e-05, + "loss": 0.2387, "step": 8331 }, { - "epoch": 0.48, - "grad_norm": 0.26773122651731995, - "learning_rate": 1.1173387336446879e-05, - "loss": 0.246, + "epoch": 0.38, + "grad_norm": 0.5818842654350443, + "learning_rate": 1.4153904045056133e-05, + "loss": 0.3696, "step": 8332 }, { - "epoch": 0.48, - "grad_norm": 0.27469813985436253, - "learning_rate": 1.1171539258815916e-05, - "loss": 0.2141, + "epoch": 0.38, + "grad_norm": 1.0454566898057644, + "learning_rate": 1.4152550523423899e-05, + "loss": 0.6645, "step": 8333 }, { - "epoch": 0.48, - "grad_norm": 0.6729771240210307, - "learning_rate": 1.1169691140614597e-05, - "loss": 0.4272, + "epoch": 0.38, + "grad_norm": 0.25305444840855956, + "learning_rate": 1.4151196909858198e-05, + "loss": 0.1572, "step": 8334 }, { - "epoch": 0.48, - "grad_norm": 0.40650217240748127, - "learning_rate": 1.1167842981906927e-05, - "loss": 0.3098, + "epoch": 0.38, + "grad_norm": 0.46436510899334427, + "learning_rate": 1.4149843204389e-05, + "loss": 0.3453, "step": 8335 }, { - "epoch": 0.48, - "grad_norm": 0.33788023906972464, - "learning_rate": 1.1165994782756902e-05, - "loss": 0.2863, + "epoch": 0.38, + "grad_norm": 1.2816850198830303, + "learning_rate": 1.4148489407046274e-05, + "loss": 0.4429, "step": 8336 }, { - "epoch": 0.48, - "grad_norm": 0.8965463163501143, - "learning_rate": 1.1164146543228529e-05, - "loss": 0.4183, + "epoch": 0.38, + "grad_norm": 0.4543095119203716, + "learning_rate": 1.414713551785999e-05, + "loss": 0.2968, "step": 8337 }, { - "epoch": 0.48, - "grad_norm": 0.37608376944922794, - "learning_rate": 1.116229826338581e-05, - "loss": 0.2967, + "epoch": 0.38, + "grad_norm": 0.33814364495431637, + "learning_rate": 1.4145781536860121e-05, + "loss": 0.157, "step": 8338 }, { - "epoch": 0.48, - "grad_norm": 1.1709144692691702, - "learning_rate": 1.1160449943292754e-05, - "loss": 0.7408, + "epoch": 0.38, + "grad_norm": 0.5369432056711996, + "learning_rate": 1.4144427464076646e-05, + "loss": 0.3656, "step": 8339 }, { - "epoch": 0.48, - "grad_norm": 0.20606643679960182, - "learning_rate": 1.1158601583013365e-05, - "loss": 0.1803, + "epoch": 0.38, + "grad_norm": 0.6198441021701011, + "learning_rate": 1.414307329953954e-05, + "loss": 0.3451, "step": 8340 }, { - "epoch": 0.48, - "grad_norm": 0.3825272882807248, - "learning_rate": 1.1156753182611655e-05, - "loss": 0.2924, + "epoch": 0.38, + "grad_norm": 0.6267565658793902, + "learning_rate": 1.4141719043278785e-05, + "loss": 0.2989, "step": 8341 }, { - "epoch": 0.48, - "grad_norm": 0.757439344922993, - "learning_rate": 1.1154904742151628e-05, - "loss": 0.408, + "epoch": 0.38, + "grad_norm": 0.3364937231571777, + "learning_rate": 1.4140364695324365e-05, + "loss": 0.2504, "step": 8342 }, { - "epoch": 0.48, - "grad_norm": 0.47983847004271224, - "learning_rate": 1.1153056261697303e-05, - "loss": 0.2154, + "epoch": 0.38, + "grad_norm": 0.8974299082522351, + "learning_rate": 1.4139010255706259e-05, + "loss": 0.5453, "step": 8343 }, { - "epoch": 0.48, - "grad_norm": 0.31408770009618836, - "learning_rate": 1.1151207741312688e-05, - "loss": 0.2766, + "epoch": 0.38, + "grad_norm": 0.2984702575793649, + "learning_rate": 1.4137655724454453e-05, + "loss": 0.1837, "step": 8344 }, { - "epoch": 0.48, - "grad_norm": 1.1777256065853905, - "learning_rate": 1.11493591810618e-05, - "loss": 0.8841, + "epoch": 0.38, + "grad_norm": 1.2317971121554436, + "learning_rate": 1.4136301101598942e-05, + "loss": 0.5802, "step": 8345 }, { - "epoch": 0.48, - "grad_norm": 0.14795722092036293, - "learning_rate": 1.1147510581008654e-05, - "loss": 0.0727, + "epoch": 0.38, + "grad_norm": 0.5169261515583555, + "learning_rate": 1.4134946387169705e-05, + "loss": 0.3726, "step": 8346 }, { - "epoch": 0.48, - "grad_norm": 0.44407525535508613, - "learning_rate": 1.114566194121726e-05, - "loss": 0.3331, + "epoch": 0.38, + "grad_norm": 0.37980059124722226, + "learning_rate": 1.4133591581196746e-05, + "loss": 0.3058, "step": 8347 }, { - "epoch": 0.48, - "grad_norm": 0.4429171903471908, - "learning_rate": 1.1143813261751648e-05, - "loss": 0.3368, + "epoch": 0.38, + "grad_norm": 0.7735793752953474, + "learning_rate": 1.413223668371005e-05, + "loss": 0.3914, "step": 8348 }, { - "epoch": 0.48, - "grad_norm": 0.5326441778644128, - "learning_rate": 1.1141964542675831e-05, - "loss": 0.2415, + "epoch": 0.38, + "grad_norm": 0.3346244705925037, + "learning_rate": 1.4130881694739617e-05, + "loss": 0.2256, "step": 8349 }, { - "epoch": 0.48, - "grad_norm": 0.35629373156998423, - "learning_rate": 1.1140115784053828e-05, - "loss": 0.2704, + "epoch": 0.38, + "grad_norm": 0.4122299241979648, + "learning_rate": 1.4129526614315448e-05, + "loss": 0.2699, "step": 8350 }, { - "epoch": 0.48, - "grad_norm": 0.473832740368251, - "learning_rate": 1.1138266985949668e-05, - "loss": 0.3667, + "epoch": 0.38, + "grad_norm": 0.4209972973913896, + "learning_rate": 1.4128171442467535e-05, + "loss": 0.2708, "step": 8351 }, { - "epoch": 0.48, - "grad_norm": 0.24716345117047317, - "learning_rate": 1.113641814842737e-05, - "loss": 0.1892, + "epoch": 0.38, + "grad_norm": 0.5985139734984927, + "learning_rate": 1.4126816179225889e-05, + "loss": 0.3478, "step": 8352 }, { - "epoch": 0.48, - "grad_norm": 0.37177419017363733, - "learning_rate": 1.1134569271550959e-05, - "loss": 0.2241, + "epoch": 0.38, + "grad_norm": 0.40030918772224366, + "learning_rate": 1.4125460824620509e-05, + "loss": 0.3038, "step": 8353 }, { - "epoch": 0.48, - "grad_norm": 0.6795655987417271, - "learning_rate": 1.1132720355384466e-05, - "loss": 0.433, + "epoch": 0.38, + "grad_norm": 0.387367158986872, + "learning_rate": 1.4124105378681403e-05, + "loss": 0.2657, "step": 8354 }, { - "epoch": 0.48, - "grad_norm": 0.4546558880720396, - "learning_rate": 1.1130871399991912e-05, - "loss": 0.3476, + "epoch": 0.38, + "grad_norm": 0.4387209029325645, + "learning_rate": 1.4122749841438576e-05, + "loss": 0.3061, "step": 8355 }, { - "epoch": 0.48, - "grad_norm": 0.2735908648714051, - "learning_rate": 1.1129022405437333e-05, - "loss": 0.2299, + "epoch": 0.38, + "grad_norm": 0.4044172792348891, + "learning_rate": 1.4121394212922043e-05, + "loss": 0.2619, "step": 8356 }, { - "epoch": 0.48, - "grad_norm": 1.1504777351766975, - "learning_rate": 1.1127173371784755e-05, - "loss": 0.7199, + "epoch": 0.38, + "grad_norm": 0.5365427164354623, + "learning_rate": 1.4120038493161814e-05, + "loss": 0.2597, "step": 8357 }, { - "epoch": 0.48, - "grad_norm": 0.320853590136227, - "learning_rate": 1.112532429909821e-05, - "loss": 0.22, + "epoch": 0.38, + "grad_norm": 0.3957579301613287, + "learning_rate": 1.4118682682187903e-05, + "loss": 0.2931, "step": 8358 }, { - "epoch": 0.48, - "grad_norm": 0.2922420371940266, - "learning_rate": 1.1123475187441735e-05, - "loss": 0.2365, + "epoch": 0.38, + "grad_norm": 0.3717424904229399, + "learning_rate": 1.411732678003033e-05, + "loss": 0.3024, "step": 8359 }, { - "epoch": 0.48, - "grad_norm": 0.4621379502333083, - "learning_rate": 1.1121626036879362e-05, - "loss": 0.3615, + "epoch": 0.38, + "grad_norm": 1.3263915218207296, + "learning_rate": 1.4115970786719109e-05, + "loss": 0.8239, "step": 8360 }, { - "epoch": 0.48, - "grad_norm": 0.7091643725104544, - "learning_rate": 1.1119776847475128e-05, - "loss": 0.3798, + "epoch": 0.38, + "grad_norm": 0.2742231609470218, + "learning_rate": 1.411461470228426e-05, + "loss": 0.1239, "step": 8361 }, { - "epoch": 0.48, - "grad_norm": 0.32042337282411815, - "learning_rate": 1.1117927619293072e-05, - "loss": 0.2344, + "epoch": 0.38, + "grad_norm": 0.31242284483508165, + "learning_rate": 1.4113258526755808e-05, + "loss": 0.2428, "step": 8362 }, { - "epoch": 0.48, - "grad_norm": 0.46225350017426375, - "learning_rate": 1.1116078352397226e-05, - "loss": 0.3815, + "epoch": 0.38, + "grad_norm": 0.4240100975667483, + "learning_rate": 1.4111902260163776e-05, + "loss": 0.3457, "step": 8363 }, { - "epoch": 0.48, - "grad_norm": 0.44794763153718986, - "learning_rate": 1.1114229046851639e-05, - "loss": 0.3283, + "epoch": 0.38, + "grad_norm": 0.5959482051496351, + "learning_rate": 1.4110545902538193e-05, + "loss": 0.2535, "step": 8364 }, { - "epoch": 0.48, - "grad_norm": 0.35863928811423473, - "learning_rate": 1.1112379702720346e-05, - "loss": 0.2993, + "epoch": 0.38, + "grad_norm": 0.4177513987410519, + "learning_rate": 1.4109189453909085e-05, + "loss": 0.3469, "step": 8365 }, { - "epoch": 0.48, - "grad_norm": 0.2997559968892406, - "learning_rate": 1.1110530320067395e-05, - "loss": 0.1826, + "epoch": 0.38, + "grad_norm": 0.5453172208280188, + "learning_rate": 1.4107832914306482e-05, + "loss": 0.4051, "step": 8366 }, { - "epoch": 0.48, - "grad_norm": 0.31177442430328983, - "learning_rate": 1.110868089895682e-05, - "loss": 0.2818, + "epoch": 0.38, + "grad_norm": 0.3586084145948304, + "learning_rate": 1.410647628376042e-05, + "loss": 0.2226, "step": 8367 }, { - "epoch": 0.48, - "grad_norm": 0.34426675271911766, - "learning_rate": 1.1106831439452678e-05, - "loss": 0.2813, + "epoch": 0.38, + "grad_norm": 0.2418591975043847, + "learning_rate": 1.4105119562300928e-05, + "loss": 0.1918, "step": 8368 }, { - "epoch": 0.48, - "grad_norm": 1.159122972842692, - "learning_rate": 1.1104981941619008e-05, - "loss": 0.5404, + "epoch": 0.38, + "grad_norm": 0.6902292244709147, + "learning_rate": 1.4103762749958048e-05, + "loss": 0.455, "step": 8369 }, { - "epoch": 0.48, - "grad_norm": 0.6308084552291687, - "learning_rate": 1.1103132405519866e-05, - "loss": 0.4178, + "epoch": 0.38, + "grad_norm": 0.3351449713623426, + "learning_rate": 1.4102405846761813e-05, + "loss": 0.2478, "step": 8370 }, { - "epoch": 0.48, - "grad_norm": 0.3508909818733613, - "learning_rate": 1.1101282831219292e-05, - "loss": 0.2844, + "epoch": 0.38, + "grad_norm": 0.49323792866826704, + "learning_rate": 1.410104885274227e-05, + "loss": 0.3139, "step": 8371 }, { - "epoch": 0.48, - "grad_norm": 0.24925963458866746, - "learning_rate": 1.1099433218781342e-05, - "loss": 0.1824, + "epoch": 0.38, + "grad_norm": 1.0110069175331307, + "learning_rate": 1.4099691767929455e-05, + "loss": 0.6657, "step": 8372 }, { - "epoch": 0.48, - "grad_norm": 1.0146895770631508, - "learning_rate": 1.1097583568270068e-05, - "loss": 0.5618, + "epoch": 0.38, + "grad_norm": 0.38350253352465036, + "learning_rate": 1.4098334592353418e-05, + "loss": 0.2799, "step": 8373 }, { - "epoch": 0.48, - "grad_norm": 0.3402111836733542, - "learning_rate": 1.1095733879749517e-05, - "loss": 0.2561, + "epoch": 0.38, + "grad_norm": 0.3809796130037751, + "learning_rate": 1.4096977326044199e-05, + "loss": 0.2574, "step": 8374 }, { - "epoch": 0.48, - "grad_norm": 0.44394453768906555, - "learning_rate": 1.1093884153283755e-05, - "loss": 0.2666, + "epoch": 0.38, + "grad_norm": 0.38894388978141226, + "learning_rate": 1.4095619969031854e-05, + "loss": 0.3504, "step": 8375 }, { - "epoch": 0.48, - "grad_norm": 0.5030724555789422, - "learning_rate": 1.1092034388936827e-05, - "loss": 0.3637, + "epoch": 0.38, + "grad_norm": 0.35202368400193174, + "learning_rate": 1.4094262521346429e-05, + "loss": 0.2049, "step": 8376 }, { - "epoch": 0.48, - "grad_norm": 0.3053604077245766, - "learning_rate": 1.1090184586772798e-05, - "loss": 0.2541, + "epoch": 0.38, + "grad_norm": 0.44750788603893854, + "learning_rate": 1.4092904983017981e-05, + "loss": 0.2484, "step": 8377 }, { - "epoch": 0.48, - "grad_norm": 0.26072680670778176, - "learning_rate": 1.1088334746855724e-05, - "loss": 0.1699, + "epoch": 0.38, + "grad_norm": 0.3739361510890599, + "learning_rate": 1.4091547354076558e-05, + "loss": 0.3233, "step": 8378 }, { - "epoch": 0.48, - "grad_norm": 0.33213697117973734, - "learning_rate": 1.1086484869249664e-05, - "loss": 0.2501, + "epoch": 0.38, + "grad_norm": 1.2724970426941196, + "learning_rate": 1.4090189634552221e-05, + "loss": 0.7023, "step": 8379 }, { - "epoch": 0.48, - "grad_norm": 0.3411697209711852, - "learning_rate": 1.1084634954018679e-05, - "loss": 0.2749, + "epoch": 0.38, + "grad_norm": 0.3268668473401568, + "learning_rate": 1.408883182447503e-05, + "loss": 0.2206, "step": 8380 }, { - "epoch": 0.48, - "grad_norm": 0.7937308936897097, - "learning_rate": 1.1082785001226833e-05, - "loss": 0.527, + "epoch": 0.39, + "grad_norm": 0.3779306796608664, + "learning_rate": 1.4087473923875039e-05, + "loss": 0.2583, "step": 8381 }, { - "epoch": 0.48, - "grad_norm": 0.5427995751624755, - "learning_rate": 1.108093501093819e-05, - "loss": 0.2829, + "epoch": 0.39, + "grad_norm": 0.395837958980147, + "learning_rate": 1.4086115932782316e-05, + "loss": 0.2936, "step": 8382 }, { - "epoch": 0.48, - "grad_norm": 0.37423001686239854, - "learning_rate": 1.1079084983216812e-05, - "loss": 0.294, + "epoch": 0.39, + "grad_norm": 0.3989767050434608, + "learning_rate": 1.4084757851226926e-05, + "loss": 0.2023, "step": 8383 }, { - "epoch": 0.48, - "grad_norm": 0.2610551272973318, - "learning_rate": 1.107723491812677e-05, - "loss": 0.2371, + "epoch": 0.39, + "grad_norm": 1.2778752243504499, + "learning_rate": 1.4083399679238936e-05, + "loss": 0.823, "step": 8384 }, { - "epoch": 0.48, - "grad_norm": 0.2848863442961672, - "learning_rate": 1.1075384815732126e-05, - "loss": 0.1655, + "epoch": 0.39, + "grad_norm": 0.6473078055819477, + "learning_rate": 1.4082041416848409e-05, + "loss": 0.456, "step": 8385 }, { - "epoch": 0.48, - "grad_norm": 0.36620040221470535, - "learning_rate": 1.1073534676096953e-05, - "loss": 0.3269, + "epoch": 0.39, + "grad_norm": 0.32157155140672566, + "learning_rate": 1.408068306408542e-05, + "loss": 0.3073, "step": 8386 }, { - "epoch": 0.48, - "grad_norm": 0.40895567597138205, - "learning_rate": 1.107168449928532e-05, - "loss": 0.3493, + "epoch": 0.39, + "grad_norm": 0.42496594944160104, + "learning_rate": 1.4079324620980042e-05, + "loss": 0.2947, "step": 8387 }, { - "epoch": 0.48, - "grad_norm": 0.9694638436873771, - "learning_rate": 1.1069834285361299e-05, - "loss": 0.3581, + "epoch": 0.39, + "grad_norm": 0.25490104677522024, + "learning_rate": 1.4077966087562348e-05, + "loss": 0.1624, "step": 8388 }, { - "epoch": 0.48, - "grad_norm": 0.3307306363709223, - "learning_rate": 1.1067984034388963e-05, - "loss": 0.2843, + "epoch": 0.39, + "grad_norm": 0.41096192040201357, + "learning_rate": 1.4076607463862417e-05, + "loss": 0.2942, "step": 8389 }, { - "epoch": 0.48, - "grad_norm": 0.2628285063571431, - "learning_rate": 1.1066133746432388e-05, - "loss": 0.1729, + "epoch": 0.39, + "grad_norm": 0.3830051081627851, + "learning_rate": 1.4075248749910323e-05, + "loss": 0.2854, "step": 8390 }, { - "epoch": 0.48, - "grad_norm": 0.39208782700167216, - "learning_rate": 1.1064283421555643e-05, - "loss": 0.2981, + "epoch": 0.39, + "grad_norm": 0.5900140540608588, + "learning_rate": 1.407388994573615e-05, + "loss": 0.3997, "step": 8391 }, { - "epoch": 0.48, - "grad_norm": 0.32337473433052144, - "learning_rate": 1.1062433059822813e-05, - "loss": 0.2115, + "epoch": 0.39, + "grad_norm": 0.4188817398600617, + "learning_rate": 1.4072531051369983e-05, + "loss": 0.3371, "step": 8392 }, { - "epoch": 0.48, - "grad_norm": 0.9751414436431275, - "learning_rate": 1.106058266129797e-05, - "loss": 0.4711, + "epoch": 0.39, + "grad_norm": 0.3733857060062306, + "learning_rate": 1.4071172066841898e-05, + "loss": 0.2264, "step": 8393 }, { - "epoch": 0.48, - "grad_norm": 0.917279257161967, - "learning_rate": 1.1058732226045195e-05, - "loss": 0.504, + "epoch": 0.39, + "grad_norm": 0.2716939649546834, + "learning_rate": 1.4069812992181992e-05, + "loss": 0.2271, "step": 8394 }, { - "epoch": 0.48, - "grad_norm": 0.27816877474586016, - "learning_rate": 1.1056881754128568e-05, - "loss": 0.2211, + "epoch": 0.39, + "grad_norm": 0.39234828651491327, + "learning_rate": 1.4068453827420352e-05, + "loss": 0.2745, "step": 8395 }, { - "epoch": 0.48, - "grad_norm": 0.4602594483651638, - "learning_rate": 1.1055031245612172e-05, - "loss": 0.3818, + "epoch": 0.39, + "grad_norm": 0.7365202622938131, + "learning_rate": 1.4067094572587059e-05, + "loss": 0.4841, "step": 8396 }, { - "epoch": 0.48, - "grad_norm": 0.2664228010575755, - "learning_rate": 1.1053180700560086e-05, - "loss": 0.1863, + "epoch": 0.39, + "grad_norm": 0.7547279656587319, + "learning_rate": 1.4065735227712213e-05, + "loss": 0.4704, "step": 8397 }, { - "epoch": 0.48, - "grad_norm": 0.36439137476338745, - "learning_rate": 1.1051330119036404e-05, - "loss": 0.2241, + "epoch": 0.39, + "grad_norm": 0.32114775016531544, + "learning_rate": 1.4064375792825909e-05, + "loss": 0.2829, "step": 8398 }, { - "epoch": 0.48, - "grad_norm": 0.638885939720972, - "learning_rate": 1.1049479501105202e-05, - "loss": 0.3925, + "epoch": 0.39, + "grad_norm": 0.46779251729312765, + "learning_rate": 1.4063016267958242e-05, + "loss": 0.3586, "step": 8399 }, { - "epoch": 0.48, - "grad_norm": 0.4978465949232517, - "learning_rate": 1.1047628846830571e-05, - "loss": 0.3931, + "epoch": 0.39, + "grad_norm": 0.25185112691888867, + "learning_rate": 1.4061656653139312e-05, + "loss": 0.0957, "step": 8400 }, { - "epoch": 0.48, - "grad_norm": 0.33079743434334197, - "learning_rate": 1.1045778156276596e-05, - "loss": 0.1901, + "epoch": 0.39, + "grad_norm": 0.38763830760633167, + "learning_rate": 1.4060296948399219e-05, + "loss": 0.297, "step": 8401 }, { - "epoch": 0.48, - "grad_norm": 0.3101247786660534, - "learning_rate": 1.104392742950737e-05, - "loss": 0.2588, + "epoch": 0.39, + "grad_norm": 0.38427400808556933, + "learning_rate": 1.4058937153768062e-05, + "loss": 0.3255, "step": 8402 }, { - "epoch": 0.48, - "grad_norm": 0.2805944667132338, - "learning_rate": 1.104207666658698e-05, - "loss": 0.2896, + "epoch": 0.39, + "grad_norm": 0.817437157993242, + "learning_rate": 1.4057577269275951e-05, + "loss": 0.3598, "step": 8403 }, { - "epoch": 0.48, - "grad_norm": 0.3730808898739765, - "learning_rate": 1.1040225867579522e-05, - "loss": 0.2604, + "epoch": 0.39, + "grad_norm": 0.357050636439786, + "learning_rate": 1.4056217294952985e-05, + "loss": 0.2792, "step": 8404 }, { - "epoch": 0.48, - "grad_norm": 0.5112786993131957, - "learning_rate": 1.1038375032549085e-05, - "loss": 0.33, + "epoch": 0.39, + "grad_norm": 1.079065450734748, + "learning_rate": 1.4054857230829284e-05, + "loss": 0.6757, "step": 8405 }, { - "epoch": 0.48, - "grad_norm": 0.9322563183427266, - "learning_rate": 1.1036524161559767e-05, - "loss": 0.5786, + "epoch": 0.39, + "grad_norm": 0.2423752338683816, + "learning_rate": 1.4053497076934948e-05, + "loss": 0.1776, "step": 8406 }, { - "epoch": 0.48, - "grad_norm": 0.355904824217904, - "learning_rate": 1.103467325467566e-05, - "loss": 0.2834, + "epoch": 0.39, + "grad_norm": 0.39856406086672413, + "learning_rate": 1.4052136833300096e-05, + "loss": 0.2933, "step": 8407 }, { - "epoch": 0.48, - "grad_norm": 0.2743782333531795, - "learning_rate": 1.1032822311960866e-05, - "loss": 0.2112, + "epoch": 0.39, + "grad_norm": 0.9826072317811388, + "learning_rate": 1.4050776499954836e-05, + "loss": 0.5867, "step": 8408 }, { - "epoch": 0.48, - "grad_norm": 0.4259052635545149, - "learning_rate": 1.1030971333479477e-05, - "loss": 0.3138, + "epoch": 0.39, + "grad_norm": 0.5999054965175215, + "learning_rate": 1.4049416076929294e-05, + "loss": 0.3108, "step": 8409 }, { - "epoch": 0.48, - "grad_norm": 0.37998773225911436, - "learning_rate": 1.1029120319295597e-05, - "loss": 0.3443, + "epoch": 0.39, + "grad_norm": 0.41138789647026763, + "learning_rate": 1.404805556425358e-05, + "loss": 0.31, "step": 8410 }, { - "epoch": 0.48, - "grad_norm": 0.34994216114806836, - "learning_rate": 1.1027269269473324e-05, - "loss": 0.2692, + "epoch": 0.39, + "grad_norm": 0.48498338479675973, + "learning_rate": 1.4046694961957816e-05, + "loss": 0.3664, "step": 8411 }, { - "epoch": 0.48, - "grad_norm": 1.0286726329274982, - "learning_rate": 1.102541818407676e-05, - "loss": 0.6472, + "epoch": 0.39, + "grad_norm": 0.28553114364080506, + "learning_rate": 1.4045334270072129e-05, + "loss": 0.1605, "step": 8412 }, { - "epoch": 0.48, - "grad_norm": 0.39969223773471535, - "learning_rate": 1.1023567063170008e-05, - "loss": 0.3095, + "epoch": 0.39, + "grad_norm": 0.3395833842939998, + "learning_rate": 1.4043973488626638e-05, + "loss": 0.2023, "step": 8413 }, { - "epoch": 0.48, - "grad_norm": 0.4792364386637519, - "learning_rate": 1.1021715906817172e-05, - "loss": 0.2102, + "epoch": 0.39, + "grad_norm": 0.470706823202285, + "learning_rate": 1.4042612617651475e-05, + "loss": 0.3458, "step": 8414 }, { - "epoch": 0.48, - "grad_norm": 0.24998225105266594, - "learning_rate": 1.101986471508236e-05, - "loss": 0.2621, + "epoch": 0.39, + "grad_norm": 1.024884683962352, + "learning_rate": 1.404125165717676e-05, + "loss": 0.471, "step": 8415 }, { - "epoch": 0.48, - "grad_norm": 0.3296303617196106, - "learning_rate": 1.1018013488029675e-05, - "loss": 0.272, + "epoch": 0.39, + "grad_norm": 0.3463040562914271, + "learning_rate": 1.4039890607232631e-05, + "loss": 0.2106, "step": 8416 }, { - "epoch": 0.48, - "grad_norm": 0.831873250616347, - "learning_rate": 1.1016162225723227e-05, - "loss": 0.5823, + "epoch": 0.39, + "grad_norm": 0.8753635101713028, + "learning_rate": 1.4038529467849216e-05, + "loss": 0.5764, "step": 8417 }, { - "epoch": 0.48, - "grad_norm": 0.3007402335020122, - "learning_rate": 1.1014310928227125e-05, - "loss": 0.2131, + "epoch": 0.39, + "grad_norm": 0.40906770315930097, + "learning_rate": 1.4037168239056653e-05, + "loss": 0.3254, "step": 8418 }, { - "epoch": 0.48, - "grad_norm": 0.364565455538296, - "learning_rate": 1.101245959560548e-05, - "loss": 0.2672, + "epoch": 0.39, + "grad_norm": 0.2648254490613112, + "learning_rate": 1.4035806920885076e-05, + "loss": 0.1844, "step": 8419 }, { - "epoch": 0.48, - "grad_norm": 0.3912456702815622, - "learning_rate": 1.1010608227922401e-05, - "loss": 0.3275, + "epoch": 0.39, + "grad_norm": 0.9510440019615164, + "learning_rate": 1.4034445513364623e-05, + "loss": 0.4473, "step": 8420 }, { - "epoch": 0.48, - "grad_norm": 0.4532113401173461, - "learning_rate": 1.1008756825242007e-05, - "loss": 0.2709, + "epoch": 0.39, + "grad_norm": 0.5837046686067077, + "learning_rate": 1.4033084016525434e-05, + "loss": 0.38, "step": 8421 }, { - "epoch": 0.48, - "grad_norm": 0.5767832010251636, - "learning_rate": 1.10069053876284e-05, - "loss": 0.3767, + "epoch": 0.39, + "grad_norm": 0.29198736947130144, + "learning_rate": 1.4031722430397654e-05, + "loss": 0.1977, "step": 8422 }, { - "epoch": 0.48, - "grad_norm": 0.3190711161352703, - "learning_rate": 1.100505391514571e-05, - "loss": 0.309, + "epoch": 0.39, + "grad_norm": 1.091303170510882, + "learning_rate": 1.4030360755011423e-05, + "loss": 0.6062, "step": 8423 }, { - "epoch": 0.48, - "grad_norm": 0.2566852014475039, - "learning_rate": 1.1003202407858042e-05, - "loss": 0.1297, + "epoch": 0.39, + "grad_norm": 0.41463392639862046, + "learning_rate": 1.4028998990396892e-05, + "loss": 0.2174, "step": 8424 }, { - "epoch": 0.48, - "grad_norm": 0.3887013888366776, - "learning_rate": 1.1001350865829519e-05, - "loss": 0.2731, + "epoch": 0.39, + "grad_norm": 0.3820207331337777, + "learning_rate": 1.4027637136584204e-05, + "loss": 0.2732, "step": 8425 }, { - "epoch": 0.48, - "grad_norm": 0.49546719830955543, - "learning_rate": 1.0999499289124259e-05, - "loss": 0.371, + "epoch": 0.39, + "grad_norm": 0.39145817235672925, + "learning_rate": 1.4026275193603512e-05, + "loss": 0.284, "step": 8426 }, { - "epoch": 0.48, - "grad_norm": 0.3685020261407154, - "learning_rate": 1.0997647677806381e-05, - "loss": 0.318, + "epoch": 0.39, + "grad_norm": 0.4699027382928074, + "learning_rate": 1.402491316148497e-05, + "loss": 0.3157, "step": 8427 }, { - "epoch": 0.48, - "grad_norm": 0.323905474118738, - "learning_rate": 1.0995796031940004e-05, - "loss": 0.2349, + "epoch": 0.39, + "grad_norm": 0.44876850117075945, + "learning_rate": 1.4023551040258726e-05, + "loss": 0.2683, "step": 8428 }, { - "epoch": 0.48, - "grad_norm": 0.5429412404412678, - "learning_rate": 1.0993944351589257e-05, - "loss": 0.4474, + "epoch": 0.39, + "grad_norm": 0.5694311260236654, + "learning_rate": 1.4022188829954942e-05, + "loss": 0.2653, "step": 8429 }, { - "epoch": 0.48, - "grad_norm": 0.2698732310913593, - "learning_rate": 1.0992092636818261e-05, - "loss": 0.1615, + "epoch": 0.39, + "grad_norm": 0.4293590810229933, + "learning_rate": 1.4020826530603775e-05, + "loss": 0.3149, "step": 8430 }, { - "epoch": 0.48, - "grad_norm": 0.2698012591809379, - "learning_rate": 1.0990240887691135e-05, - "loss": 0.2257, + "epoch": 0.39, + "grad_norm": 0.41571343746067607, + "learning_rate": 1.4019464142235384e-05, + "loss": 0.2928, "step": 8431 }, { - "epoch": 0.48, - "grad_norm": 0.5174891868486703, - "learning_rate": 1.0988389104272012e-05, - "loss": 0.3601, + "epoch": 0.39, + "grad_norm": 0.544280640505497, + "learning_rate": 1.4018101664879928e-05, + "loss": 0.3351, "step": 8432 }, { - "epoch": 0.48, - "grad_norm": 0.6007606961336437, - "learning_rate": 1.098653728662502e-05, - "loss": 0.4257, + "epoch": 0.39, + "grad_norm": 0.5099393882434021, + "learning_rate": 1.4016739098567578e-05, + "loss": 0.2912, "step": 8433 }, { - "epoch": 0.48, - "grad_norm": 0.32474766627803614, - "learning_rate": 1.098468543481428e-05, - "loss": 0.2144, + "epoch": 0.39, + "grad_norm": 0.3061957128159386, + "learning_rate": 1.4015376443328492e-05, + "loss": 0.2655, "step": 8434 }, { - "epoch": 0.48, - "grad_norm": 0.39732993885930923, - "learning_rate": 1.0982833548903926e-05, - "loss": 0.3183, + "epoch": 0.39, + "grad_norm": 0.3032689568287926, + "learning_rate": 1.4014013699192844e-05, + "loss": 0.1833, "step": 8435 }, { - "epoch": 0.48, - "grad_norm": 0.26185582493114695, - "learning_rate": 1.0980981628958091e-05, - "loss": 0.2002, + "epoch": 0.39, + "grad_norm": 0.8849504701893046, + "learning_rate": 1.40126508661908e-05, + "loss": 0.5205, "step": 8436 }, { - "epoch": 0.48, - "grad_norm": 0.3630987442855988, - "learning_rate": 1.0979129675040902e-05, - "loss": 0.1708, + "epoch": 0.39, + "grad_norm": 0.39717313317905134, + "learning_rate": 1.4011287944352534e-05, + "loss": 0.3223, "step": 8437 }, { - "epoch": 0.48, - "grad_norm": 0.4030675818976453, - "learning_rate": 1.0977277687216497e-05, - "loss": 0.3411, + "epoch": 0.39, + "grad_norm": 0.33860011089813125, + "learning_rate": 1.4009924933708216e-05, + "loss": 0.3001, "step": 8438 }, { - "epoch": 0.48, - "grad_norm": 0.37339136570184556, - "learning_rate": 1.0975425665549005e-05, - "loss": 0.3173, + "epoch": 0.39, + "grad_norm": 0.3014424498564491, + "learning_rate": 1.400856183428803e-05, + "loss": 0.1792, "step": 8439 }, { - "epoch": 0.48, - "grad_norm": 0.8845059457123733, - "learning_rate": 1.0973573610102566e-05, - "loss": 0.4562, + "epoch": 0.39, + "grad_norm": 0.3488479813098447, + "learning_rate": 1.4007198646122146e-05, + "loss": 0.2338, "step": 8440 }, { - "epoch": 0.48, - "grad_norm": 0.31660898433428125, - "learning_rate": 1.0971721520941312e-05, - "loss": 0.239, + "epoch": 0.39, + "grad_norm": 1.0454745472972045, + "learning_rate": 1.4005835369240748e-05, + "loss": 0.6543, "step": 8441 }, { - "epoch": 0.49, - "grad_norm": 0.23668371578828956, - "learning_rate": 1.0969869398129385e-05, - "loss": 0.1924, + "epoch": 0.39, + "grad_norm": 0.3697541376870566, + "learning_rate": 1.4004472003674015e-05, + "loss": 0.3042, "step": 8442 }, { - "epoch": 0.49, - "grad_norm": 0.40438627675862854, - "learning_rate": 1.0968017241730922e-05, - "loss": 0.3176, + "epoch": 0.39, + "grad_norm": 0.3816200180279807, + "learning_rate": 1.400310854945213e-05, + "loss": 0.3064, "step": 8443 }, { - "epoch": 0.49, - "grad_norm": 0.4396742144328353, - "learning_rate": 1.0966165051810066e-05, - "loss": 0.2638, + "epoch": 0.39, + "grad_norm": 0.9658734496525833, + "learning_rate": 1.4001745006605281e-05, + "loss": 0.494, "step": 8444 }, { - "epoch": 0.49, - "grad_norm": 0.8565682090328727, - "learning_rate": 1.0964312828430952e-05, - "loss": 0.4941, + "epoch": 0.39, + "grad_norm": 0.37319705630787964, + "learning_rate": 1.4000381375163652e-05, + "loss": 0.2762, "step": 8445 }, { - "epoch": 0.49, - "grad_norm": 0.644386355055355, - "learning_rate": 1.096246057165773e-05, - "loss": 0.3571, + "epoch": 0.39, + "grad_norm": 0.39397362636972316, + "learning_rate": 1.3999017655157436e-05, + "loss": 0.2403, "step": 8446 }, { - "epoch": 0.49, - "grad_norm": 0.2632623269668236, - "learning_rate": 1.0960608281554536e-05, - "loss": 0.258, + "epoch": 0.39, + "grad_norm": 0.3135257580097957, + "learning_rate": 1.3997653846616825e-05, + "loss": 0.2279, "step": 8447 }, { - "epoch": 0.49, - "grad_norm": 0.3898501466449026, - "learning_rate": 1.0958755958185521e-05, - "loss": 0.2782, + "epoch": 0.39, + "grad_norm": 0.6525687511957984, + "learning_rate": 1.399628994957201e-05, + "loss": 0.3726, "step": 8448 }, { - "epoch": 0.49, - "grad_norm": 0.39076503964584275, - "learning_rate": 1.0956903601614827e-05, - "loss": 0.2519, + "epoch": 0.39, + "grad_norm": 0.4407266394218482, + "learning_rate": 1.3994925964053187e-05, + "loss": 0.3021, "step": 8449 }, { - "epoch": 0.49, - "grad_norm": 0.34799939748395714, - "learning_rate": 1.0955051211906607e-05, - "loss": 0.2699, + "epoch": 0.39, + "grad_norm": 0.36298166467163834, + "learning_rate": 1.3993561890090555e-05, + "loss": 0.2882, "step": 8450 }, { - "epoch": 0.49, - "grad_norm": 0.34692010363267356, - "learning_rate": 1.0953198789125e-05, - "loss": 0.3284, + "epoch": 0.39, + "grad_norm": 0.9384397449173023, + "learning_rate": 1.399219772771431e-05, + "loss": 0.602, "step": 8451 }, { - "epoch": 0.49, - "grad_norm": 0.5508850692361121, - "learning_rate": 1.095134633333416e-05, - "loss": 0.3315, + "epoch": 0.39, + "grad_norm": 0.25441765582543197, + "learning_rate": 1.3990833476954654e-05, + "loss": 0.1476, "step": 8452 }, { - "epoch": 0.49, - "grad_norm": 0.38972264224855135, - "learning_rate": 1.0949493844598237e-05, - "loss": 0.3421, + "epoch": 0.39, + "grad_norm": 0.30130512182544705, + "learning_rate": 1.3989469137841796e-05, + "loss": 0.2638, "step": 8453 }, { - "epoch": 0.49, - "grad_norm": 0.2861316486348512, - "learning_rate": 1.0947641322981387e-05, - "loss": 0.2046, + "epoch": 0.39, + "grad_norm": 0.5048584270870148, + "learning_rate": 1.3988104710405935e-05, + "loss": 0.346, "step": 8454 }, { - "epoch": 0.49, - "grad_norm": 0.4901872853125752, - "learning_rate": 1.0945788768547754e-05, - "loss": 0.3408, + "epoch": 0.39, + "grad_norm": 0.36874212917559185, + "learning_rate": 1.3986740194677276e-05, + "loss": 0.2467, "step": 8455 }, { - "epoch": 0.49, - "grad_norm": 0.34229442973648827, - "learning_rate": 1.0943936181361501e-05, - "loss": 0.3144, + "epoch": 0.39, + "grad_norm": 0.5650466546811083, + "learning_rate": 1.3985375590686032e-05, + "loss": 0.411, "step": 8456 }, { - "epoch": 0.49, - "grad_norm": 0.30739139755930395, - "learning_rate": 1.0942083561486775e-05, - "loss": 0.1327, + "epoch": 0.39, + "grad_norm": 0.5477315217213206, + "learning_rate": 1.3984010898462417e-05, + "loss": 0.378, "step": 8457 }, { - "epoch": 0.49, - "grad_norm": 0.3992928835707834, - "learning_rate": 1.0940230908987737e-05, - "loss": 0.3175, + "epoch": 0.39, + "grad_norm": 0.24891331773560105, + "learning_rate": 1.3982646118036636e-05, + "loss": 0.1791, "step": 8458 }, { - "epoch": 0.49, - "grad_norm": 0.2975848893390521, - "learning_rate": 1.0938378223928545e-05, - "loss": 0.2855, + "epoch": 0.39, + "grad_norm": 0.34171174029909035, + "learning_rate": 1.3981281249438912e-05, + "loss": 0.256, "step": 8459 }, { - "epoch": 0.49, - "grad_norm": 0.6888931226475903, - "learning_rate": 1.0936525506373353e-05, - "loss": 0.3482, + "epoch": 0.39, + "grad_norm": 0.6123843231220961, + "learning_rate": 1.3979916292699459e-05, + "loss": 0.4523, "step": 8460 }, { - "epoch": 0.49, - "grad_norm": 0.5397706730536465, - "learning_rate": 1.0934672756386324e-05, - "loss": 0.4013, + "epoch": 0.39, + "grad_norm": 0.35757927094487424, + "learning_rate": 1.3978551247848493e-05, + "loss": 0.2186, "step": 8461 }, { - "epoch": 0.49, - "grad_norm": 0.2018669105288497, - "learning_rate": 1.0932819974031616e-05, - "loss": 0.1768, + "epoch": 0.39, + "grad_norm": 0.4092818692279441, + "learning_rate": 1.3977186114916237e-05, + "loss": 0.3525, "step": 8462 }, { - "epoch": 0.49, - "grad_norm": 0.3994888465967652, - "learning_rate": 1.0930967159373393e-05, - "loss": 0.2612, + "epoch": 0.39, + "grad_norm": 1.0717378054467916, + "learning_rate": 1.3975820893932915e-05, + "loss": 0.6972, "step": 8463 }, { - "epoch": 0.49, - "grad_norm": 0.7874372028522038, - "learning_rate": 1.0929114312475818e-05, - "loss": 0.4882, + "epoch": 0.39, + "grad_norm": 0.4293976133490958, + "learning_rate": 1.3974455584928748e-05, + "loss": 0.2555, "step": 8464 }, { - "epoch": 0.49, - "grad_norm": 0.3514672262598415, - "learning_rate": 1.0927261433403055e-05, - "loss": 0.3112, + "epoch": 0.39, + "grad_norm": 0.2643967117599458, + "learning_rate": 1.397309018793397e-05, + "loss": 0.2137, "step": 8465 }, { - "epoch": 0.49, - "grad_norm": 0.5423505007021057, - "learning_rate": 1.092540852221927e-05, - "loss": 0.3962, + "epoch": 0.39, + "grad_norm": 0.45123197854750835, + "learning_rate": 1.3971724702978797e-05, + "loss": 0.3619, "step": 8466 }, { - "epoch": 0.49, - "grad_norm": 0.47049811026281263, - "learning_rate": 1.0923555578988624e-05, - "loss": 0.2611, + "epoch": 0.39, + "grad_norm": 0.9838178837311349, + "learning_rate": 1.3970359130093472e-05, + "loss": 0.556, "step": 8467 }, { - "epoch": 0.49, - "grad_norm": 0.39710438531386655, - "learning_rate": 1.0921702603775288e-05, - "loss": 0.2879, + "epoch": 0.39, + "grad_norm": 0.3711195720528197, + "learning_rate": 1.396899346930822e-05, + "loss": 0.252, "step": 8468 }, { - "epoch": 0.49, - "grad_norm": 0.2792052805726842, - "learning_rate": 1.0919849596643434e-05, - "loss": 0.1829, + "epoch": 0.39, + "grad_norm": 0.4184727112273616, + "learning_rate": 1.396762772065328e-05, + "loss": 0.3446, "step": 8469 }, { - "epoch": 0.49, - "grad_norm": 0.3047677379880632, - "learning_rate": 1.0917996557657224e-05, - "loss": 0.2355, + "epoch": 0.39, + "grad_norm": 0.6404714497359931, + "learning_rate": 1.3966261884158886e-05, + "loss": 0.3513, "step": 8470 }, { - "epoch": 0.49, - "grad_norm": 0.40934080266730394, - "learning_rate": 1.0916143486880836e-05, - "loss": 0.3056, + "epoch": 0.39, + "grad_norm": 0.2883200748739, + "learning_rate": 1.396489595985528e-05, + "loss": 0.24, "step": 8471 }, { - "epoch": 0.49, - "grad_norm": 0.8182633785891162, - "learning_rate": 1.0914290384378436e-05, - "loss": 0.4636, + "epoch": 0.39, + "grad_norm": 0.4235786280547083, + "learning_rate": 1.3963529947772694e-05, + "loss": 0.2843, "step": 8472 }, { - "epoch": 0.49, - "grad_norm": 0.5738428159353489, - "learning_rate": 1.09124372502142e-05, - "loss": 0.284, + "epoch": 0.39, + "grad_norm": 0.3946153849328818, + "learning_rate": 1.3962163847941377e-05, + "loss": 0.2833, "step": 8473 }, { - "epoch": 0.49, - "grad_norm": 0.2757029445511652, - "learning_rate": 1.09105840844523e-05, - "loss": 0.2102, + "epoch": 0.39, + "grad_norm": 0.3555982280832828, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2972, "step": 8474 }, { - "epoch": 0.49, - "grad_norm": 0.24592269658330476, - "learning_rate": 1.0908730887156915e-05, - "loss": 0.2472, + "epoch": 0.39, + "grad_norm": 0.9023830755959277, + "learning_rate": 1.3959431385153518e-05, + "loss": 0.4539, "step": 8475 }, { - "epoch": 0.49, - "grad_norm": 0.4877436876419471, - "learning_rate": 1.090687765839222e-05, - "loss": 0.2014, + "epoch": 0.39, + "grad_norm": 0.4427341069784527, + "learning_rate": 1.3958065022257477e-05, + "loss": 0.3485, "step": 8476 }, { - "epoch": 0.49, - "grad_norm": 0.3949928498732002, - "learning_rate": 1.0905024398222386e-05, - "loss": 0.3212, + "epoch": 0.39, + "grad_norm": 0.44892885144620304, + "learning_rate": 1.395669857173369e-05, + "loss": 0.3297, "step": 8477 }, { - "epoch": 0.49, - "grad_norm": 0.5094201877628184, - "learning_rate": 1.09031711067116e-05, - "loss": 0.3381, + "epoch": 0.39, + "grad_norm": 0.271523710271832, + "learning_rate": 1.3955332033612408e-05, + "loss": 0.1984, "step": 8478 }, { - "epoch": 0.49, - "grad_norm": 0.9709515948124112, - "learning_rate": 1.0901317783924032e-05, - "loss": 0.4758, + "epoch": 0.39, + "grad_norm": 0.664420495817769, + "learning_rate": 1.3953965407923886e-05, + "loss": 0.3441, "step": 8479 }, { - "epoch": 0.49, - "grad_norm": 0.3008208066982179, - "learning_rate": 1.0899464429923874e-05, - "loss": 0.2053, + "epoch": 0.39, + "grad_norm": 0.44802949150614396, + "learning_rate": 1.3952598694698383e-05, + "loss": 0.3437, "step": 8480 }, { - "epoch": 0.49, - "grad_norm": 0.2425174984408598, - "learning_rate": 1.0897611044775299e-05, - "loss": 0.1676, + "epoch": 0.39, + "grad_norm": 0.3887416718120399, + "learning_rate": 1.3951231893966153e-05, + "loss": 0.2813, "step": 8481 }, { - "epoch": 0.49, - "grad_norm": 0.3567217475863464, - "learning_rate": 1.0895757628542492e-05, - "loss": 0.3243, + "epoch": 0.39, + "grad_norm": 0.762944238041198, + "learning_rate": 1.3949865005757457e-05, + "loss": 0.4188, "step": 8482 }, { - "epoch": 0.49, - "grad_norm": 0.3865743507662977, - "learning_rate": 1.0893904181289637e-05, - "loss": 0.2332, + "epoch": 0.39, + "grad_norm": 0.41592069846814067, + "learning_rate": 1.394849803010256e-05, + "loss": 0.345, "step": 8483 }, { - "epoch": 0.49, - "grad_norm": 0.9081131636400291, - "learning_rate": 1.0892050703080918e-05, - "loss": 0.4564, + "epoch": 0.39, + "grad_norm": 0.2950019950437227, + "learning_rate": 1.394713096703172e-05, + "loss": 0.1558, "step": 8484 }, { - "epoch": 0.49, - "grad_norm": 1.337498254558959, - "learning_rate": 1.0890197193980523e-05, - "loss": 0.8662, + "epoch": 0.39, + "grad_norm": 0.38871049944651764, + "learning_rate": 1.3945763816575201e-05, + "loss": 0.2958, "step": 8485 }, { - "epoch": 0.49, - "grad_norm": 0.3029460382510471, - "learning_rate": 1.0888343654052636e-05, - "loss": 0.2082, + "epoch": 0.39, + "grad_norm": 0.3876362689948571, + "learning_rate": 1.394439657876328e-05, + "loss": 0.2703, "step": 8486 }, { - "epoch": 0.49, - "grad_norm": 0.22595061674438283, - "learning_rate": 1.0886490083361445e-05, - "loss": 0.2207, + "epoch": 0.39, + "grad_norm": 0.847185267272819, + "learning_rate": 1.394302925362622e-05, + "loss": 0.5585, "step": 8487 }, { - "epoch": 0.49, - "grad_norm": 0.6513624736080968, - "learning_rate": 1.0884636481971145e-05, - "loss": 0.4554, + "epoch": 0.39, + "grad_norm": 0.550811164903522, + "learning_rate": 1.3941661841194289e-05, + "loss": 0.308, "step": 8488 }, { - "epoch": 0.49, - "grad_norm": 0.3546626256247301, - "learning_rate": 1.0882782849945917e-05, - "loss": 0.2338, + "epoch": 0.39, + "grad_norm": 0.305925136493401, + "learning_rate": 1.3940294341497766e-05, + "loss": 0.2999, "step": 8489 }, { - "epoch": 0.49, - "grad_norm": 0.4004379725262802, - "learning_rate": 1.088092918734996e-05, - "loss": 0.3369, + "epoch": 0.39, + "grad_norm": 0.5358463537813835, + "learning_rate": 1.3938926754566922e-05, + "loss": 0.3734, "step": 8490 }, { - "epoch": 0.49, - "grad_norm": 1.06392676133309, - "learning_rate": 1.0879075494247459e-05, - "loss": 0.6164, + "epoch": 0.39, + "grad_norm": 0.2133394742991905, + "learning_rate": 1.393755908043204e-05, + "loss": 0.1076, "step": 8491 }, { - "epoch": 0.49, - "grad_norm": 0.33643344970466654, - "learning_rate": 1.0877221770702618e-05, - "loss": 0.2818, + "epoch": 0.39, + "grad_norm": 0.45524234141582276, + "learning_rate": 1.393619131912339e-05, + "loss": 0.3638, "step": 8492 }, { - "epoch": 0.49, - "grad_norm": 0.20948852930825718, - "learning_rate": 1.087536801677962e-05, - "loss": 0.1037, + "epoch": 0.39, + "grad_norm": 0.41980553631650835, + "learning_rate": 1.393482347067126e-05, + "loss": 0.3377, "step": 8493 }, { - "epoch": 0.49, - "grad_norm": 0.3585673923696074, - "learning_rate": 1.0873514232542665e-05, - "loss": 0.3189, + "epoch": 0.39, + "grad_norm": 0.8103515758561848, + "learning_rate": 1.3933455535105932e-05, + "loss": 0.3086, "step": 8494 }, { - "epoch": 0.49, - "grad_norm": 0.35729435806128756, - "learning_rate": 1.0871660418055954e-05, - "loss": 0.2793, + "epoch": 0.39, + "grad_norm": 0.42955318250226254, + "learning_rate": 1.393208751245769e-05, + "loss": 0.3205, "step": 8495 }, { - "epoch": 0.49, - "grad_norm": 0.616608207430578, - "learning_rate": 1.0869806573383675e-05, - "loss": 0.362, + "epoch": 0.39, + "grad_norm": 0.5213164709636929, + "learning_rate": 1.3930719402756818e-05, + "loss": 0.3206, "step": 8496 }, { - "epoch": 0.49, - "grad_norm": 1.3825893173323816, - "learning_rate": 1.0867952698590036e-05, - "loss": 0.6839, + "epoch": 0.39, + "grad_norm": 0.22754755576489064, + "learning_rate": 1.3929351206033607e-05, + "loss": 0.1973, "step": 8497 }, { - "epoch": 0.49, - "grad_norm": 0.27447229366512443, - "learning_rate": 1.0866098793739229e-05, - "loss": 0.2518, + "epoch": 0.39, + "grad_norm": 0.40256385885824786, + "learning_rate": 1.3927982922318349e-05, + "loss": 0.299, "step": 8498 }, { - "epoch": 0.49, - "grad_norm": 0.21800662896904763, - "learning_rate": 1.0864244858895461e-05, - "loss": 0.1464, + "epoch": 0.39, + "grad_norm": 0.7297263084128381, + "learning_rate": 1.3926614551641335e-05, + "loss": 0.4933, "step": 8499 }, { - "epoch": 0.49, - "grad_norm": 0.5799123938435271, - "learning_rate": 1.086239089412293e-05, - "loss": 0.4081, + "epoch": 0.39, + "grad_norm": 0.7301142126172254, + "learning_rate": 1.392524609403286e-05, + "loss": 0.4733, "step": 8500 }, { - "epoch": 0.49, - "grad_norm": 0.3538200919550498, - "learning_rate": 1.086053689948584e-05, - "loss": 0.2998, + "epoch": 0.39, + "grad_norm": 0.3265960954692485, + "learning_rate": 1.3923877549523219e-05, + "loss": 0.2285, "step": 8501 }, { - "epoch": 0.49, - "grad_norm": 0.441401374507026, - "learning_rate": 1.085868287504839e-05, - "loss": 0.3884, + "epoch": 0.39, + "grad_norm": 0.3469803655585575, + "learning_rate": 1.392250891814271e-05, + "loss": 0.2375, "step": 8502 }, { - "epoch": 0.49, - "grad_norm": 0.4445663999073777, - "learning_rate": 1.0856828820874794e-05, - "loss": 0.2894, + "epoch": 0.39, + "grad_norm": 0.4321739886589292, + "learning_rate": 1.3921140199921635e-05, + "loss": 0.2752, "step": 8503 }, { - "epoch": 0.49, - "grad_norm": 0.3221303501831563, - "learning_rate": 1.0854974737029248e-05, - "loss": 0.2564, + "epoch": 0.39, + "grad_norm": 0.3460432679318932, + "learning_rate": 1.3919771394890293e-05, + "loss": 0.223, "step": 8504 }, { - "epoch": 0.49, - "grad_norm": 0.2526415990634436, - "learning_rate": 1.0853120623575968e-05, - "loss": 0.1755, + "epoch": 0.39, + "grad_norm": 0.4348710954226238, + "learning_rate": 1.391840250307899e-05, + "loss": 0.3391, "step": 8505 }, { - "epoch": 0.49, - "grad_norm": 0.3396586056153518, - "learning_rate": 1.0851266480579155e-05, - "loss": 0.2631, + "epoch": 0.39, + "grad_norm": 0.8265907255909595, + "learning_rate": 1.3917033524518035e-05, + "loss": 0.5123, "step": 8506 }, { - "epoch": 0.49, - "grad_norm": 0.32619397347087004, - "learning_rate": 1.0849412308103023e-05, - "loss": 0.2644, + "epoch": 0.39, + "grad_norm": 0.3489211464382827, + "learning_rate": 1.3915664459237735e-05, + "loss": 0.2117, "step": 8507 }, { - "epoch": 0.49, - "grad_norm": 0.8049881277903056, - "learning_rate": 1.0847558106211775e-05, - "loss": 0.6005, + "epoch": 0.39, + "grad_norm": 0.9873339184232549, + "learning_rate": 1.3914295307268396e-05, + "loss": 0.5917, "step": 8508 }, { - "epoch": 0.49, - "grad_norm": 0.39477084899592646, - "learning_rate": 1.0845703874969629e-05, - "loss": 0.2584, + "epoch": 0.39, + "grad_norm": 0.25125081952338174, + "learning_rate": 1.3912926068640326e-05, + "loss": 0.2104, "step": 8509 }, { - "epoch": 0.49, - "grad_norm": 0.3284641100010038, - "learning_rate": 1.0843849614440793e-05, - "loss": 0.2518, + "epoch": 0.39, + "grad_norm": 0.392106905491276, + "learning_rate": 1.3911556743383852e-05, + "loss": 0.2346, "step": 8510 }, { - "epoch": 0.49, - "grad_norm": 0.3164534348135442, - "learning_rate": 1.0841995324689482e-05, - "loss": 0.2347, + "epoch": 0.39, + "grad_norm": 0.8862667865136324, + "learning_rate": 1.3910187331529277e-05, + "loss": 0.4758, "step": 8511 }, { - "epoch": 0.49, - "grad_norm": 0.6746338727776406, - "learning_rate": 1.0840141005779907e-05, - "loss": 0.3469, + "epoch": 0.39, + "grad_norm": 0.8121733769609694, + "learning_rate": 1.3908817833106927e-05, + "loss": 0.4823, "step": 8512 }, { - "epoch": 0.49, - "grad_norm": 0.3406299935814668, - "learning_rate": 1.0838286657776289e-05, - "loss": 0.2758, + "epoch": 0.39, + "grad_norm": 0.33753175505956434, + "learning_rate": 1.3907448248147112e-05, + "loss": 0.2917, "step": 8513 }, { - "epoch": 0.49, - "grad_norm": 0.368507523241613, - "learning_rate": 1.0836432280742837e-05, - "loss": 0.3614, + "epoch": 0.39, + "grad_norm": 0.4311536816934009, + "learning_rate": 1.3906078576680165e-05, + "loss": 0.2489, "step": 8514 }, { - "epoch": 0.49, - "grad_norm": 0.4196989763584233, - "learning_rate": 1.0834577874743772e-05, - "loss": 0.2835, + "epoch": 0.39, + "grad_norm": 0.4118459922090234, + "learning_rate": 1.3904708818736397e-05, + "loss": 0.298, "step": 8515 }, { - "epoch": 0.49, - "grad_norm": 0.286344189141085, - "learning_rate": 1.0832723439843313e-05, - "loss": 0.2135, + "epoch": 0.39, + "grad_norm": 0.40259324114076384, + "learning_rate": 1.3903338974346143e-05, + "loss": 0.3133, "step": 8516 }, { - "epoch": 0.49, - "grad_norm": 0.8103735048771367, - "learning_rate": 1.0830868976105677e-05, - "loss": 0.4709, + "epoch": 0.39, + "grad_norm": 0.5330180654191928, + "learning_rate": 1.3901969043539727e-05, + "loss": 0.3049, "step": 8517 }, { - "epoch": 0.49, - "grad_norm": 0.3806559083984567, - "learning_rate": 1.0829014483595081e-05, - "loss": 0.3363, + "epoch": 0.39, + "grad_norm": 0.4918108665273485, + "learning_rate": 1.3900599026347474e-05, + "loss": 0.3012, "step": 8518 }, { - "epoch": 0.49, - "grad_norm": 0.22981873993975727, - "learning_rate": 1.0827159962375753e-05, - "loss": 0.1717, + "epoch": 0.39, + "grad_norm": 0.37210821466478, + "learning_rate": 1.3899228922799721e-05, + "loss": 0.2698, "step": 8519 }, { - "epoch": 0.49, - "grad_norm": 1.1201661013138515, - "learning_rate": 1.0825305412511906e-05, - "loss": 0.7243, + "epoch": 0.39, + "grad_norm": 0.4926874018782073, + "learning_rate": 1.3897858732926794e-05, + "loss": 0.2857, "step": 8520 }, { - "epoch": 0.49, - "grad_norm": 0.29503616397144117, - "learning_rate": 1.0823450834067772e-05, - "loss": 0.2378, + "epoch": 0.39, + "grad_norm": 0.39544578983252554, + "learning_rate": 1.3896488456759034e-05, + "loss": 0.2839, "step": 8521 }, { - "epoch": 0.49, - "grad_norm": 0.2551033595031021, - "learning_rate": 1.0821596227107572e-05, - "loss": 0.2024, + "epoch": 0.39, + "grad_norm": 0.3521972793437678, + "learning_rate": 1.3895118094326776e-05, + "loss": 0.2801, "step": 8522 }, { - "epoch": 0.49, - "grad_norm": 0.7899047872546144, - "learning_rate": 1.0819741591695526e-05, - "loss": 0.4307, + "epoch": 0.39, + "grad_norm": 0.5917044218146319, + "learning_rate": 1.3893747645660357e-05, + "loss": 0.3738, "step": 8523 }, { - "epoch": 0.49, - "grad_norm": 0.6356314127947628, - "learning_rate": 1.0817886927895866e-05, - "loss": 0.4684, + "epoch": 0.39, + "grad_norm": 0.33872910584403115, + "learning_rate": 1.3892377110790117e-05, + "loss": 0.24, "step": 8524 }, { - "epoch": 0.49, - "grad_norm": 0.344138473561077, - "learning_rate": 1.0816032235772816e-05, - "loss": 0.194, + "epoch": 0.39, + "grad_norm": 0.29950024922731255, + "learning_rate": 1.38910064897464e-05, + "loss": 0.2535, "step": 8525 }, { - "epoch": 0.49, - "grad_norm": 0.3089788067804815, - "learning_rate": 1.0814177515390605e-05, - "loss": 0.297, + "epoch": 0.39, + "grad_norm": 1.3657862338424902, + "learning_rate": 1.3889635782559548e-05, + "loss": 0.7538, "step": 8526 }, { - "epoch": 0.49, - "grad_norm": 0.32417727591946566, - "learning_rate": 1.081232276681346e-05, - "loss": 0.2092, + "epoch": 0.39, + "grad_norm": 0.6790184248671534, + "learning_rate": 1.388826498925991e-05, + "loss": 0.354, "step": 8527 }, { - "epoch": 0.49, - "grad_norm": 0.4153972292522417, - "learning_rate": 1.0810467990105617e-05, - "loss": 0.2856, + "epoch": 0.39, + "grad_norm": 0.36538731649848455, + "learning_rate": 1.3886894109877832e-05, + "loss": 0.2871, "step": 8528 }, { - "epoch": 0.49, - "grad_norm": 0.655625681624118, - "learning_rate": 1.0808613185331297e-05, - "loss": 0.3203, + "epoch": 0.39, + "grad_norm": 0.38833651903061167, + "learning_rate": 1.3885523144443665e-05, + "loss": 0.3105, "step": 8529 }, { - "epoch": 0.49, - "grad_norm": 0.3785813411354443, - "learning_rate": 1.0806758352554743e-05, - "loss": 0.3061, + "epoch": 0.39, + "grad_norm": 0.25278595991068525, + "learning_rate": 1.3884152092987762e-05, + "loss": 0.1321, "step": 8530 }, { - "epoch": 0.49, - "grad_norm": 0.37824841761056816, - "learning_rate": 1.0804903491840178e-05, - "loss": 0.2755, + "epoch": 0.39, + "grad_norm": 0.40154922520378683, + "learning_rate": 1.3882780955540474e-05, + "loss": 0.255, "step": 8531 }, { - "epoch": 0.49, - "grad_norm": 0.21373532506861836, - "learning_rate": 1.080304860325184e-05, - "loss": 0.1536, + "epoch": 0.39, + "grad_norm": 0.5363540839521053, + "learning_rate": 1.3881409732132159e-05, + "loss": 0.3501, "step": 8532 }, { - "epoch": 0.49, - "grad_norm": 0.4860820363746777, - "learning_rate": 1.0801193686853964e-05, - "loss": 0.3688, + "epoch": 0.39, + "grad_norm": 0.3891319896615226, + "learning_rate": 1.388003842279317e-05, + "loss": 0.2721, "step": 8533 }, { - "epoch": 0.49, - "grad_norm": 0.2978394874490292, - "learning_rate": 1.0799338742710788e-05, - "loss": 0.24, + "epoch": 0.39, + "grad_norm": 0.3924241639750246, + "learning_rate": 1.3878667027553871e-05, + "loss": 0.3081, "step": 8534 }, { - "epoch": 0.49, - "grad_norm": 0.48456413182199815, - "learning_rate": 1.0797483770886542e-05, - "loss": 0.2991, + "epoch": 0.39, + "grad_norm": 0.9071713896189254, + "learning_rate": 1.3877295546444627e-05, + "loss": 0.507, "step": 8535 }, { - "epoch": 0.49, - "grad_norm": 0.6425265613421929, - "learning_rate": 1.0795628771445467e-05, - "loss": 0.4898, + "epoch": 0.39, + "grad_norm": 0.2350539384135555, + "learning_rate": 1.3875923979495793e-05, + "loss": 0.1817, "step": 8536 }, { - "epoch": 0.49, - "grad_norm": 0.3882124012170663, - "learning_rate": 1.0793773744451804e-05, - "loss": 0.2956, + "epoch": 0.39, + "grad_norm": 0.3756452380790013, + "learning_rate": 1.3874552326737736e-05, + "loss": 0.2782, "step": 8537 }, { - "epoch": 0.49, - "grad_norm": 0.29591018567833727, - "learning_rate": 1.079191868996979e-05, - "loss": 0.2537, + "epoch": 0.39, + "grad_norm": 0.7953992082280263, + "learning_rate": 1.3873180588200827e-05, + "loss": 0.5154, "step": 8538 }, { - "epoch": 0.49, - "grad_norm": 0.275873528349889, - "learning_rate": 1.0790063608063664e-05, - "loss": 0.2111, + "epoch": 0.39, + "grad_norm": 0.8467157834770388, + "learning_rate": 1.3871808763915434e-05, + "loss": 0.4623, "step": 8539 }, { - "epoch": 0.49, - "grad_norm": 0.309022281433369, - "learning_rate": 1.078820849879767e-05, - "loss": 0.2702, + "epoch": 0.39, + "grad_norm": 0.3635755082421646, + "learning_rate": 1.3870436853911924e-05, + "loss": 0.2254, "step": 8540 }, { - "epoch": 0.49, - "grad_norm": 0.7002771887809298, - "learning_rate": 1.0786353362236051e-05, - "loss": 0.4943, + "epoch": 0.39, + "grad_norm": 0.39805891412900407, + "learning_rate": 1.3869064858220673e-05, + "loss": 0.3426, "step": 8541 }, { - "epoch": 0.49, - "grad_norm": 0.3367810286858073, - "learning_rate": 1.0784498198443048e-05, - "loss": 0.267, + "epoch": 0.39, + "grad_norm": 0.4985862714740669, + "learning_rate": 1.3867692776872057e-05, + "loss": 0.3133, "step": 8542 }, { - "epoch": 0.49, - "grad_norm": 0.6251041919918563, - "learning_rate": 1.0782643007482908e-05, - "loss": 0.3424, + "epoch": 0.39, + "grad_norm": 0.2543250971402099, + "learning_rate": 1.3866320609896449e-05, + "loss": 0.1796, "step": 8543 }, { - "epoch": 0.49, - "grad_norm": 0.4196247509404695, - "learning_rate": 1.0780787789419868e-05, - "loss": 0.3126, + "epoch": 0.39, + "grad_norm": 0.8933081037109439, + "learning_rate": 1.3864948357324228e-05, + "loss": 0.5262, "step": 8544 }, { - "epoch": 0.49, - "grad_norm": 0.222604196010212, - "learning_rate": 1.0778932544318185e-05, - "loss": 0.1972, + "epoch": 0.39, + "grad_norm": 0.3785326900899198, + "learning_rate": 1.3863576019185776e-05, + "loss": 0.3467, "step": 8545 }, { - "epoch": 0.49, - "grad_norm": 0.5323880097633098, - "learning_rate": 1.0777077272242103e-05, - "loss": 0.3519, + "epoch": 0.39, + "grad_norm": 0.34074301743836977, + "learning_rate": 1.3862203595511476e-05, + "loss": 0.2149, "step": 8546 }, { - "epoch": 0.49, - "grad_norm": 0.3522018391320144, - "learning_rate": 1.0775221973255866e-05, - "loss": 0.3304, + "epoch": 0.39, + "grad_norm": 1.4116896385471303, + "learning_rate": 1.3860831086331711e-05, + "loss": 0.8098, "step": 8547 }, { - "epoch": 0.49, - "grad_norm": 0.7935920052225027, - "learning_rate": 1.0773366647423724e-05, - "loss": 0.3471, + "epoch": 0.39, + "grad_norm": 0.3725347958256613, + "learning_rate": 1.3859458491676868e-05, + "loss": 0.3533, "step": 8548 }, { - "epoch": 0.49, - "grad_norm": 0.3614599034269359, - "learning_rate": 1.0771511294809933e-05, - "loss": 0.2682, + "epoch": 0.39, + "grad_norm": 0.24237380130949845, + "learning_rate": 1.3858085811577333e-05, + "loss": 0.136, "step": 8549 }, { - "epoch": 0.49, - "grad_norm": 0.30816488025273653, - "learning_rate": 1.0769655915478734e-05, - "loss": 0.2914, + "epoch": 0.39, + "grad_norm": 0.4108720602231526, + "learning_rate": 1.3856713046063494e-05, + "loss": 0.2878, "step": 8550 }, { - "epoch": 0.49, - "grad_norm": 0.41552829457555635, - "learning_rate": 1.076780050949439e-05, - "loss": 0.2073, + "epoch": 0.39, + "grad_norm": 0.7534602379688196, + "learning_rate": 1.3855340195165748e-05, + "loss": 0.4351, "step": 8551 }, { - "epoch": 0.49, - "grad_norm": 0.3957754702487912, - "learning_rate": 1.0765945076921143e-05, - "loss": 0.3081, + "epoch": 0.39, + "grad_norm": 0.7357473279493222, + "learning_rate": 1.3853967258914483e-05, + "loss": 0.4393, "step": 8552 }, { - "epoch": 0.49, - "grad_norm": 0.3438857205809205, - "learning_rate": 1.0764089617823252e-05, + "epoch": 0.39, + "grad_norm": 0.3476098133818706, + "learning_rate": 1.3852594237340103e-05, "loss": 0.2495, "step": 8553 }, { - "epoch": 0.49, - "grad_norm": 0.3916725126911459, - "learning_rate": 1.0762234132264969e-05, - "loss": 0.3022, + "epoch": 0.39, + "grad_norm": 0.3298992903722264, + "learning_rate": 1.3851221130472994e-05, + "loss": 0.2206, "step": 8554 }, { - "epoch": 0.49, - "grad_norm": 0.3472478565503047, - "learning_rate": 1.0760378620310551e-05, - "loss": 0.2252, + "epoch": 0.39, + "grad_norm": 0.47718717855671533, + "learning_rate": 1.3849847938343564e-05, + "loss": 0.2811, "step": 8555 }, { - "epoch": 0.49, - "grad_norm": 0.5140747501812202, - "learning_rate": 1.0758523082024255e-05, - "loss": 0.39, + "epoch": 0.39, + "grad_norm": 0.40029906194730525, + "learning_rate": 1.3848474660982208e-05, + "loss": 0.3039, "step": 8556 }, { - "epoch": 0.49, - "grad_norm": 0.46369297394140513, - "learning_rate": 1.0756667517470337e-05, - "loss": 0.3532, + "epoch": 0.39, + "grad_norm": 0.4841693401790011, + "learning_rate": 1.3847101298419334e-05, + "loss": 0.3788, "step": 8557 }, { - "epoch": 0.49, - "grad_norm": 0.2763335707004524, - "learning_rate": 1.0754811926713053e-05, - "loss": 0.233, + "epoch": 0.39, + "grad_norm": 0.4468751660134071, + "learning_rate": 1.3845727850685347e-05, + "loss": 0.2963, "step": 8558 }, { - "epoch": 0.49, - "grad_norm": 0.2642442656817401, - "learning_rate": 1.075295630981667e-05, - "loss": 0.1955, + "epoch": 0.39, + "grad_norm": 0.45348599711579485, + "learning_rate": 1.3844354317810651e-05, + "loss": 0.3194, "step": 8559 }, { - "epoch": 0.49, - "grad_norm": 0.8901340050690897, - "learning_rate": 1.0751100666845437e-05, - "loss": 0.5739, + "epoch": 0.39, + "grad_norm": 0.3666102181793443, + "learning_rate": 1.3842980699825655e-05, + "loss": 0.3151, "step": 8560 }, { - "epoch": 0.49, - "grad_norm": 0.33780611056603577, - "learning_rate": 1.0749244997863624e-05, - "loss": 0.1916, + "epoch": 0.39, + "grad_norm": 0.3558561702445882, + "learning_rate": 1.384160699676077e-05, + "loss": 0.2033, "step": 8561 }, { - "epoch": 0.49, - "grad_norm": 0.314626187389568, - "learning_rate": 1.0747389302935487e-05, - "loss": 0.2898, + "epoch": 0.39, + "grad_norm": 0.4306755303296113, + "learning_rate": 1.384023320864641e-05, + "loss": 0.2978, "step": 8562 }, { - "epoch": 0.49, - "grad_norm": 0.5661987660912312, - "learning_rate": 1.074553358212529e-05, - "loss": 0.4068, + "epoch": 0.39, + "grad_norm": 0.42462646802794346, + "learning_rate": 1.3838859335512986e-05, + "loss": 0.2937, "step": 8563 }, { - "epoch": 0.49, - "grad_norm": 0.22613700731661518, - "learning_rate": 1.07436778354973e-05, - "loss": 0.1042, + "epoch": 0.39, + "grad_norm": 0.3206359744488347, + "learning_rate": 1.3837485377390918e-05, + "loss": 0.2668, "step": 8564 }, { - "epoch": 0.49, - "grad_norm": 0.26126861145961344, - "learning_rate": 1.0741822063115774e-05, - "loss": 0.2493, + "epoch": 0.39, + "grad_norm": 0.4073148246289972, + "learning_rate": 1.3836111334310622e-05, + "loss": 0.3229, "step": 8565 }, { - "epoch": 0.49, - "grad_norm": 0.48082663751471855, - "learning_rate": 1.0739966265044985e-05, - "loss": 0.3952, + "epoch": 0.39, + "grad_norm": 0.2926870897542749, + "learning_rate": 1.3834737206302519e-05, + "loss": 0.1226, "step": 8566 }, { - "epoch": 0.49, - "grad_norm": 0.776697669372256, - "learning_rate": 1.0738110441349194e-05, - "loss": 0.568, + "epoch": 0.39, + "grad_norm": 0.392373176057603, + "learning_rate": 1.3833362993397028e-05, + "loss": 0.3063, "step": 8567 }, { - "epoch": 0.49, - "grad_norm": 0.35344836698827203, - "learning_rate": 1.0736254592092674e-05, - "loss": 0.2445, + "epoch": 0.39, + "grad_norm": 0.46194554178905367, + "learning_rate": 1.3831988695624576e-05, + "loss": 0.352, "step": 8568 }, { - "epoch": 0.49, - "grad_norm": 0.41759160329707223, - "learning_rate": 1.0734398717339687e-05, - "loss": 0.3361, + "epoch": 0.39, + "grad_norm": 0.27144045868036837, + "learning_rate": 1.3830614313015587e-05, + "loss": 0.2196, "step": 8569 }, { - "epoch": 0.49, - "grad_norm": 0.36406742647874546, - "learning_rate": 1.0732542817154505e-05, - "loss": 0.268, + "epoch": 0.39, + "grad_norm": 0.6702440454370174, + "learning_rate": 1.382923984560049e-05, + "loss": 0.3708, "step": 8570 }, { - "epoch": 0.49, - "grad_norm": 0.21027776814527477, - "learning_rate": 1.0730686891601394e-05, - "loss": 0.1567, + "epoch": 0.39, + "grad_norm": 0.43832872911757315, + "learning_rate": 1.3827865293409715e-05, + "loss": 0.3446, "step": 8571 }, { - "epoch": 0.49, - "grad_norm": 1.0486613084863996, - "learning_rate": 1.072883094074463e-05, - "loss": 0.66, + "epoch": 0.39, + "grad_norm": 0.3147556300917032, + "learning_rate": 1.3826490656473692e-05, + "loss": 0.2626, "step": 8572 }, { - "epoch": 0.49, - "grad_norm": 0.30409813856147094, - "learning_rate": 1.0726974964648478e-05, - "loss": 0.2823, + "epoch": 0.39, + "grad_norm": 0.5029217273093667, + "learning_rate": 1.382511593482285e-05, + "loss": 0.3283, "step": 8573 }, { - "epoch": 0.49, - "grad_norm": 0.3632538787528237, - "learning_rate": 1.072511896337722e-05, - "loss": 0.2477, + "epoch": 0.39, + "grad_norm": 0.48749862615751444, + "learning_rate": 1.382374112848763e-05, + "loss": 0.342, "step": 8574 }, { - "epoch": 0.49, - "grad_norm": 0.6178813686029212, - "learning_rate": 1.0723262936995118e-05, - "loss": 0.4058, + "epoch": 0.39, + "grad_norm": 0.21966296374757854, + "learning_rate": 1.3822366237498466e-05, + "loss": 0.098, "step": 8575 }, { - "epoch": 0.49, - "grad_norm": 0.5408942484954933, - "learning_rate": 1.0721406885566455e-05, - "loss": 0.3104, + "epoch": 0.39, + "grad_norm": 0.32125786250437016, + "learning_rate": 1.3820991261885798e-05, + "loss": 0.2688, "step": 8576 }, { - "epoch": 0.49, - "grad_norm": 0.22758907901569544, - "learning_rate": 1.07195508091555e-05, - "loss": 0.1969, + "epoch": 0.39, + "grad_norm": 0.3841107663733378, + "learning_rate": 1.381961620168007e-05, + "loss": 0.3168, "step": 8577 }, { - "epoch": 0.49, - "grad_norm": 0.29891880470997145, - "learning_rate": 1.0717694707826534e-05, - "loss": 0.2564, + "epoch": 0.39, + "grad_norm": 0.9182309082448304, + "learning_rate": 1.3818241056911715e-05, + "loss": 0.5026, "step": 8578 }, { - "epoch": 0.49, - "grad_norm": 0.6764541746498948, - "learning_rate": 1.0715838581643829e-05, - "loss": 0.4602, + "epoch": 0.39, + "grad_norm": 0.41796742487398103, + "learning_rate": 1.3816865827611187e-05, + "loss": 0.2587, "step": 8579 }, { - "epoch": 0.49, - "grad_norm": 0.3694126858365215, - "learning_rate": 1.0713982430671668e-05, - "loss": 0.3039, + "epoch": 0.39, + "grad_norm": 0.38813807138056006, + "learning_rate": 1.3815490513808925e-05, + "loss": 0.328, "step": 8580 }, { - "epoch": 0.49, - "grad_norm": 0.3388532266528095, - "learning_rate": 1.0712126254974325e-05, - "loss": 0.2766, + "epoch": 0.39, + "grad_norm": 0.2744726806605093, + "learning_rate": 1.3814115115535382e-05, + "loss": 0.2007, "step": 8581 }, { - "epoch": 0.49, - "grad_norm": 0.8196371361869945, - "learning_rate": 1.0710270054616077e-05, - "loss": 0.3736, + "epoch": 0.39, + "grad_norm": 0.3476757208034699, + "learning_rate": 1.3812739632821006e-05, + "loss": 0.2098, "step": 8582 }, { - "epoch": 0.49, - "grad_norm": 0.2575004355415229, - "learning_rate": 1.070841382966121e-05, - "loss": 0.2168, + "epoch": 0.39, + "grad_norm": 0.5380871886146342, + "learning_rate": 1.3811364065696251e-05, + "loss": 0.3998, "step": 8583 }, { - "epoch": 0.49, - "grad_norm": 0.3064963483696533, - "learning_rate": 1.0706557580174002e-05, - "loss": 0.1623, + "epoch": 0.39, + "grad_norm": 0.3314041636451239, + "learning_rate": 1.3809988414191566e-05, + "loss": 0.3139, "step": 8584 }, { - "epoch": 0.49, - "grad_norm": 0.30059761729633716, - "learning_rate": 1.0704701306218737e-05, - "loss": 0.2986, + "epoch": 0.39, + "grad_norm": 0.6697829072305175, + "learning_rate": 1.3808612678337415e-05, + "loss": 0.2801, "step": 8585 }, { - "epoch": 0.49, - "grad_norm": 0.3834769871882269, - "learning_rate": 1.0702845007859697e-05, - "loss": 0.2874, + "epoch": 0.39, + "grad_norm": 0.3923519072713705, + "learning_rate": 1.3807236858164243e-05, + "loss": 0.3145, "step": 8586 }, { - "epoch": 0.49, - "grad_norm": 0.7050746353337347, - "learning_rate": 1.0700988685161162e-05, - "loss": 0.3589, + "epoch": 0.39, + "grad_norm": 0.35885279339079024, + "learning_rate": 1.3805860953702522e-05, + "loss": 0.2019, "step": 8587 }, { - "epoch": 0.49, - "grad_norm": 0.8554268457849118, - "learning_rate": 1.069913233818742e-05, - "loss": 0.5188, + "epoch": 0.39, + "grad_norm": 0.3960835542833339, + "learning_rate": 1.3804484964982705e-05, + "loss": 0.3114, "step": 8588 }, { - "epoch": 0.49, - "grad_norm": 0.2600277824616293, - "learning_rate": 1.0697275967002754e-05, - "loss": 0.26, + "epoch": 0.39, + "grad_norm": 0.3471894063103433, + "learning_rate": 1.3803108892035259e-05, + "loss": 0.2416, "step": 8589 }, { - "epoch": 0.49, - "grad_norm": 0.2661738871990709, - "learning_rate": 1.069541957167145e-05, - "loss": 0.1824, + "epoch": 0.39, + "grad_norm": 0.6102921391444078, + "learning_rate": 1.3801732734890645e-05, + "loss": 0.4456, "step": 8590 }, { - "epoch": 0.49, - "grad_norm": 0.5728659494558793, - "learning_rate": 1.06935631522578e-05, - "loss": 0.2859, + "epoch": 0.39, + "grad_norm": 0.7977910041127219, + "learning_rate": 1.3800356493579336e-05, + "loss": 0.4504, "step": 8591 }, { - "epoch": 0.49, - "grad_norm": 0.36724855285541735, - "learning_rate": 1.0691706708826084e-05, - "loss": 0.3184, + "epoch": 0.39, + "grad_norm": 0.27585825792928814, + "learning_rate": 1.3798980168131795e-05, + "loss": 0.2409, "step": 8592 }, { - "epoch": 0.49, - "grad_norm": 0.36684883389905065, - "learning_rate": 1.0689850241440598e-05, - "loss": 0.3254, + "epoch": 0.39, + "grad_norm": 0.47023750366455985, + "learning_rate": 1.3797603758578496e-05, + "loss": 0.298, "step": 8593 }, { - "epoch": 0.49, - "grad_norm": 0.448707775246444, - "learning_rate": 1.0687993750165623e-05, - "loss": 0.2209, + "epoch": 0.39, + "grad_norm": 0.361229902662536, + "learning_rate": 1.379622726494991e-05, + "loss": 0.245, "step": 8594 }, { - "epoch": 0.49, - "grad_norm": 0.40597125420971497, - "learning_rate": 1.0686137235065458e-05, - "loss": 0.3169, + "epoch": 0.39, + "grad_norm": 0.3450307824261173, + "learning_rate": 1.3794850687276508e-05, + "loss": 0.274, "step": 8595 }, { - "epoch": 0.49, - "grad_norm": 0.34023890211065355, - "learning_rate": 1.068428069620439e-05, - "loss": 0.2522, + "epoch": 0.39, + "grad_norm": 0.3480710410329912, + "learning_rate": 1.379347402558877e-05, + "loss": 0.3059, "step": 8596 }, { - "epoch": 0.49, - "grad_norm": 0.41372972524648555, - "learning_rate": 1.0682424133646712e-05, - "loss": 0.275, + "epoch": 0.39, + "grad_norm": 0.49593250225412516, + "learning_rate": 1.3792097279917175e-05, + "loss": 0.3011, "step": 8597 }, { - "epoch": 0.49, - "grad_norm": 0.3490980753166338, - "learning_rate": 1.068056754745671e-05, - "loss": 0.2738, + "epoch": 0.39, + "grad_norm": 0.41360550626745235, + "learning_rate": 1.3790720450292201e-05, + "loss": 0.2331, "step": 8598 }, { - "epoch": 0.49, - "grad_norm": 0.9919442403924039, - "learning_rate": 1.0678710937698689e-05, - "loss": 0.7263, + "epoch": 0.4, + "grad_norm": 0.29420693063586745, + "learning_rate": 1.3789343536744329e-05, + "loss": 0.1491, "step": 8599 }, { - "epoch": 0.49, - "grad_norm": 0.477348484888539, - "learning_rate": 1.0676854304436936e-05, - "loss": 0.1544, + "epoch": 0.4, + "grad_norm": 0.30848477002234603, + "learning_rate": 1.3787966539304046e-05, + "loss": 0.2668, "step": 8600 }, { - "epoch": 0.49, - "grad_norm": 0.29238634288399545, - "learning_rate": 1.0674997647735745e-05, - "loss": 0.2659, + "epoch": 0.4, + "grad_norm": 0.4493017563020744, + "learning_rate": 1.378658945800183e-05, + "loss": 0.3468, "step": 8601 }, { - "epoch": 0.49, - "grad_norm": 0.4137595889493302, - "learning_rate": 1.0673140967659418e-05, - "loss": 0.2705, + "epoch": 0.4, + "grad_norm": 0.6156918714497636, + "learning_rate": 1.3785212292868178e-05, + "loss": 0.3641, "step": 8602 }, { - "epoch": 0.49, - "grad_norm": 0.7976162801001311, - "learning_rate": 1.0671284264272249e-05, - "loss": 0.4373, + "epoch": 0.4, + "grad_norm": 0.5021558443670983, + "learning_rate": 1.3783835043933569e-05, + "loss": 0.3243, "step": 8603 }, { - "epoch": 0.49, - "grad_norm": 0.35688291511303527, - "learning_rate": 1.066942753763853e-05, - "loss": 0.2226, + "epoch": 0.4, + "grad_norm": 0.36670039871869275, + "learning_rate": 1.3782457711228503e-05, + "loss": 0.3004, "step": 8604 }, { - "epoch": 0.49, - "grad_norm": 0.28563980083841095, - "learning_rate": 1.0667570787822568e-05, - "loss": 0.2589, + "epoch": 0.4, + "grad_norm": 0.3383115096058593, + "learning_rate": 1.3781080294783467e-05, + "loss": 0.2066, "step": 8605 }, { - "epoch": 0.49, - "grad_norm": 0.8815030483266307, - "learning_rate": 1.0665714014888657e-05, - "loss": 0.5098, + "epoch": 0.4, + "grad_norm": 0.9327065962507903, + "learning_rate": 1.377970279462896e-05, + "loss": 0.5537, "step": 8606 }, { - "epoch": 0.49, - "grad_norm": 0.3215761513411512, - "learning_rate": 1.0663857218901097e-05, - "loss": 0.2103, + "epoch": 0.4, + "grad_norm": 0.36457560839679215, + "learning_rate": 1.3778325210795474e-05, + "loss": 0.2849, "step": 8607 }, { - "epoch": 0.49, - "grad_norm": 0.9842923959569092, - "learning_rate": 1.0662000399924193e-05, - "loss": 0.4303, + "epoch": 0.4, + "grad_norm": 0.28168257873170016, + "learning_rate": 1.3776947543313508e-05, + "loss": 0.2259, "step": 8608 }, { - "epoch": 0.49, - "grad_norm": 0.34745505755938316, - "learning_rate": 1.066014355802224e-05, - "loss": 0.3268, + "epoch": 0.4, + "grad_norm": 0.7442547475732373, + "learning_rate": 1.3775569792213565e-05, + "loss": 0.4388, "step": 8609 }, { - "epoch": 0.49, - "grad_norm": 0.31255754616013104, - "learning_rate": 1.0658286693259544e-05, - "loss": 0.1985, + "epoch": 0.4, + "grad_norm": 0.3979109251120208, + "learning_rate": 1.3774191957526144e-05, + "loss": 0.2649, "step": 8610 }, { - "epoch": 0.49, - "grad_norm": 0.273327258396647, - "learning_rate": 1.065642980570041e-05, - "loss": 0.1981, + "epoch": 0.4, + "grad_norm": 0.6701917452710291, + "learning_rate": 1.3772814039281754e-05, + "loss": 0.2384, "step": 8611 }, { - "epoch": 0.49, - "grad_norm": 0.4237158009498341, - "learning_rate": 1.0654572895409142e-05, - "loss": 0.3071, + "epoch": 0.4, + "grad_norm": 0.3538263325465825, + "learning_rate": 1.3771436037510897e-05, + "loss": 0.3426, "step": 8612 }, { - "epoch": 0.49, - "grad_norm": 0.3323909110807824, - "learning_rate": 1.065271596245004e-05, - "loss": 0.2247, + "epoch": 0.4, + "grad_norm": 0.39564838793694423, + "learning_rate": 1.377005795224408e-05, + "loss": 0.3283, "step": 8613 }, { - "epoch": 0.49, - "grad_norm": 1.197242401674588, - "learning_rate": 1.0650859006887412e-05, - "loss": 0.4439, + "epoch": 0.4, + "grad_norm": 0.8961983213977719, + "learning_rate": 1.3768679783511814e-05, + "loss": 0.5772, "step": 8614 }, { - "epoch": 0.49, - "grad_norm": 1.1780449474286145, - "learning_rate": 1.0649002028785564e-05, - "loss": 0.7566, + "epoch": 0.4, + "grad_norm": 0.25300801278634716, + "learning_rate": 1.376730153134461e-05, + "loss": 0.152, "step": 8615 }, { - "epoch": 0.5, - "grad_norm": 0.3025044117036119, - "learning_rate": 1.0647145028208808e-05, - "loss": 0.2614, + "epoch": 0.4, + "grad_norm": 0.34023061137446936, + "learning_rate": 1.376592319577298e-05, + "loss": 0.2856, "step": 8616 }, { - "epoch": 0.5, - "grad_norm": 0.21862780541713553, - "learning_rate": 1.0645288005221443e-05, - "loss": 0.1848, + "epoch": 0.4, + "grad_norm": 0.9183032076479685, + "learning_rate": 1.376454477682744e-05, + "loss": 0.5284, "step": 8617 }, { - "epoch": 0.5, - "grad_norm": 0.9973641936045772, - "learning_rate": 1.0643430959887786e-05, - "loss": 0.5432, + "epoch": 0.4, + "grad_norm": 0.47746295526648286, + "learning_rate": 1.3763166274538509e-05, + "loss": 0.2674, "step": 8618 }, { - "epoch": 0.5, - "grad_norm": 0.3686939049542852, - "learning_rate": 1.064157389227214e-05, - "loss": 0.2805, + "epoch": 0.4, + "grad_norm": 0.4164348285000339, + "learning_rate": 1.3761787688936701e-05, + "loss": 0.3309, "step": 8619 }, { - "epoch": 0.5, - "grad_norm": 1.3729969071937, - "learning_rate": 1.063971680243882e-05, - "loss": 0.3485, + "epoch": 0.4, + "grad_norm": 0.3906352575147355, + "learning_rate": 1.376040902005254e-05, + "loss": 0.3595, "step": 8620 }, { - "epoch": 0.5, - "grad_norm": 0.41373947900301006, - "learning_rate": 1.063785969045213e-05, - "loss": 0.3296, + "epoch": 0.4, + "grad_norm": 0.20291304617321065, + "learning_rate": 1.3759030267916549e-05, + "loss": 0.0731, "step": 8621 }, { - "epoch": 0.5, - "grad_norm": 0.3333660307427822, - "learning_rate": 1.063600255637639e-05, - "loss": 0.2753, + "epoch": 0.4, + "grad_norm": 0.40933179720041696, + "learning_rate": 1.375765143255925e-05, + "loss": 0.2979, "step": 8622 }, { - "epoch": 0.5, - "grad_norm": 0.1846235788447922, - "learning_rate": 1.0634145400275906e-05, - "loss": 0.0857, + "epoch": 0.4, + "grad_norm": 0.9888147321932022, + "learning_rate": 1.3756272514011169e-05, + "loss": 0.5189, "step": 8623 }, { - "epoch": 0.5, - "grad_norm": 0.4198005388095872, - "learning_rate": 1.0632288222214998e-05, - "loss": 0.333, + "epoch": 0.4, + "grad_norm": 0.3854084683881432, + "learning_rate": 1.3754893512302838e-05, + "loss": 0.3037, "step": 8624 }, { - "epoch": 0.5, - "grad_norm": 0.4751118320166682, - "learning_rate": 1.0630431022257975e-05, - "loss": 0.2883, + "epoch": 0.4, + "grad_norm": 0.3997433447191998, + "learning_rate": 1.375351442746478e-05, + "loss": 0.2932, "step": 8625 }, { - "epoch": 0.5, - "grad_norm": 1.5403740119919342, - "learning_rate": 1.062857380046915e-05, - "loss": 0.3654, + "epoch": 0.4, + "grad_norm": 0.41829591931841, + "learning_rate": 1.3752135259527533e-05, + "loss": 0.2915, "step": 8626 }, { - "epoch": 0.5, - "grad_norm": 0.906474785955322, - "learning_rate": 1.0626716556912845e-05, - "loss": 0.656, + "epoch": 0.4, + "grad_norm": 0.4677416379920142, + "learning_rate": 1.3750756008521626e-05, + "loss": 0.2019, "step": 8627 }, { - "epoch": 0.5, - "grad_norm": 0.40934538461942965, - "learning_rate": 1.062485929165337e-05, - "loss": 0.3045, + "epoch": 0.4, + "grad_norm": 0.27172381162365083, + "learning_rate": 1.3749376674477598e-05, + "loss": 0.2302, "step": 8628 }, { - "epoch": 0.5, - "grad_norm": 0.2483934265695432, - "learning_rate": 1.0623002004755045e-05, - "loss": 0.2398, + "epoch": 0.4, + "grad_norm": 0.985807238685192, + "learning_rate": 1.3747997257425982e-05, + "loss": 0.5732, "step": 8629 }, { - "epoch": 0.5, - "grad_norm": 0.8469478281869853, - "learning_rate": 1.0621144696282187e-05, - "loss": 0.2506, + "epoch": 0.4, + "grad_norm": 0.7186049324915126, + "learning_rate": 1.374661775739732e-05, + "loss": 0.4207, "step": 8630 }, { - "epoch": 0.5, - "grad_norm": 0.41460984148063096, - "learning_rate": 1.0619287366299116e-05, - "loss": 0.2685, + "epoch": 0.4, + "grad_norm": 0.3186680560885979, + "learning_rate": 1.374523817442215e-05, + "loss": 0.2202, "step": 8631 }, { - "epoch": 0.5, - "grad_norm": 0.7057658416397729, - "learning_rate": 1.061743001487015e-05, - "loss": 0.3861, + "epoch": 0.4, + "grad_norm": 0.4006043365775831, + "learning_rate": 1.3743858508531018e-05, + "loss": 0.3563, "step": 8632 }, { - "epoch": 0.5, - "grad_norm": 0.44006599321635453, - "learning_rate": 1.0615572642059608e-05, - "loss": 0.2791, + "epoch": 0.4, + "grad_norm": 0.28666518186036966, + "learning_rate": 1.3742478759754466e-05, + "loss": 0.1563, "step": 8633 }, { - "epoch": 0.5, - "grad_norm": 0.35366409363198736, - "learning_rate": 1.0613715247931811e-05, - "loss": 0.2718, + "epoch": 0.4, + "grad_norm": 0.3055597939352883, + "learning_rate": 1.3741098928123044e-05, + "loss": 0.1989, "step": 8634 }, { - "epoch": 0.5, - "grad_norm": 0.28522502179300874, - "learning_rate": 1.0611857832551088e-05, - "loss": 0.1987, + "epoch": 0.4, + "grad_norm": 1.333643226836793, + "learning_rate": 1.3739719013667297e-05, + "loss": 0.4676, "step": 8635 }, { - "epoch": 0.5, - "grad_norm": 0.4444119647481306, - "learning_rate": 1.0610000395981748e-05, - "loss": 0.2836, + "epoch": 0.4, + "grad_norm": 0.4556038645961717, + "learning_rate": 1.3738339016417774e-05, + "loss": 0.3234, "step": 8636 }, { - "epoch": 0.5, - "grad_norm": 0.29795136052669713, - "learning_rate": 1.0608142938288122e-05, - "loss": 0.2719, + "epoch": 0.4, + "grad_norm": 0.3514975195170233, + "learning_rate": 1.373695893640503e-05, + "loss": 0.2029, "step": 8637 }, { - "epoch": 0.5, - "grad_norm": 0.7921438236185548, - "learning_rate": 1.0606285459534531e-05, - "loss": 0.4997, + "epoch": 0.4, + "grad_norm": 0.8892435360895058, + "learning_rate": 1.3735578773659612e-05, + "loss": 0.6352, "step": 8638 }, { - "epoch": 0.5, - "grad_norm": 1.113545353925166, - "learning_rate": 1.0604427959785305e-05, - "loss": 0.5553, + "epoch": 0.4, + "grad_norm": 0.2697349763991779, + "learning_rate": 1.3734198528212086e-05, + "loss": 0.218, "step": 8639 }, { - "epoch": 0.5, - "grad_norm": 0.35533823163207906, - "learning_rate": 1.0602570439104758e-05, - "loss": 0.2753, + "epoch": 0.4, + "grad_norm": 0.40669278570064593, + "learning_rate": 1.3732818200092998e-05, + "loss": 0.2968, "step": 8640 }, { - "epoch": 0.5, - "grad_norm": 0.4131396122160858, - "learning_rate": 1.0600712897557229e-05, - "loss": 0.3235, + "epoch": 0.4, + "grad_norm": 0.9426623030004377, + "learning_rate": 1.3731437789332917e-05, + "loss": 0.3378, "step": 8641 }, { - "epoch": 0.5, - "grad_norm": 0.578787262125609, - "learning_rate": 1.0598855335207032e-05, - "loss": 0.3251, + "epoch": 0.4, + "grad_norm": 1.1173107795000576, + "learning_rate": 1.37300572959624e-05, + "loss": 0.5854, "step": 8642 }, { - "epoch": 0.5, - "grad_norm": 0.2622329097119207, - "learning_rate": 1.0596997752118505e-05, - "loss": 0.1882, + "epoch": 0.4, + "grad_norm": 0.3343465471799973, + "learning_rate": 1.372867672001201e-05, + "loss": 0.2864, "step": 8643 }, { - "epoch": 0.5, - "grad_norm": 1.6319084606170982, - "learning_rate": 1.0595140148355971e-05, - "loss": 0.7732, + "epoch": 0.4, + "grad_norm": 0.37236350398819035, + "learning_rate": 1.3727296061512307e-05, + "loss": 0.286, "step": 8644 }, { - "epoch": 0.5, - "grad_norm": 0.38998737359035557, - "learning_rate": 1.059328252398376e-05, - "loss": 0.329, + "epoch": 0.4, + "grad_norm": 0.2729334026807821, + "learning_rate": 1.3725915320493865e-05, + "loss": 0.1515, "step": 8645 }, { - "epoch": 0.5, - "grad_norm": 0.3543340025838131, - "learning_rate": 1.0591424879066199e-05, - "loss": 0.1859, + "epoch": 0.4, + "grad_norm": 0.3879874357179999, + "learning_rate": 1.3724534496987248e-05, + "loss": 0.2687, "step": 8646 }, { - "epoch": 0.5, - "grad_norm": 0.4751509222241394, - "learning_rate": 1.058956721366762e-05, - "loss": 0.4029, + "epoch": 0.4, + "grad_norm": 0.82792246903581, + "learning_rate": 1.372315359102303e-05, + "loss": 0.3446, "step": 8647 }, { - "epoch": 0.5, - "grad_norm": 0.40849080293214024, - "learning_rate": 1.0587709527852354e-05, - "loss": 0.3217, + "epoch": 0.4, + "grad_norm": 0.5621181163852842, + "learning_rate": 1.3721772602631775e-05, + "loss": 0.3575, "step": 8648 }, { - "epoch": 0.5, - "grad_norm": 0.2225141351938953, - "learning_rate": 1.0585851821684731e-05, - "loss": 0.1289, + "epoch": 0.4, + "grad_norm": 0.3461963442451924, + "learning_rate": 1.3720391531844066e-05, + "loss": 0.2708, "step": 8649 }, { - "epoch": 0.5, - "grad_norm": 0.35872861250269855, - "learning_rate": 1.0583994095229086e-05, - "loss": 0.2722, + "epoch": 0.4, + "grad_norm": 0.939207857358149, + "learning_rate": 1.371901037869047e-05, + "loss": 0.4102, "step": 8650 }, { - "epoch": 0.5, - "grad_norm": 1.3321392581467473, - "learning_rate": 1.0582136348549751e-05, - "loss": 0.7329, + "epoch": 0.4, + "grad_norm": 0.27595802012328063, + "learning_rate": 1.371762914320157e-05, + "loss": 0.2277, "step": 8651 }, { - "epoch": 0.5, - "grad_norm": 0.2864610099756312, - "learning_rate": 1.0580278581711062e-05, - "loss": 0.1966, + "epoch": 0.4, + "grad_norm": 0.4058081968038128, + "learning_rate": 1.3716247825407947e-05, + "loss": 0.2582, "step": 8652 }, { - "epoch": 0.5, - "grad_norm": 0.36678325852272914, - "learning_rate": 1.0578420794777347e-05, - "loss": 0.3385, + "epoch": 0.4, + "grad_norm": 0.5763086174772531, + "learning_rate": 1.3714866425340176e-05, + "loss": 0.4197, "step": 8653 }, { - "epoch": 0.5, - "grad_norm": 0.6607280902420078, - "learning_rate": 1.0576562987812946e-05, - "loss": 0.4297, + "epoch": 0.4, + "grad_norm": 0.665686817200467, + "learning_rate": 1.3713484943028843e-05, + "loss": 0.4188, "step": 8654 }, { - "epoch": 0.5, - "grad_norm": 0.26838225983595704, - "learning_rate": 1.057470516088219e-05, - "loss": 0.2015, + "epoch": 0.4, + "grad_norm": 0.4198170071023023, + "learning_rate": 1.3712103378504532e-05, + "loss": 0.2519, "step": 8655 }, { - "epoch": 0.5, - "grad_norm": 0.23625021150066397, - "learning_rate": 1.0572847314049424e-05, - "loss": 0.2171, + "epoch": 0.4, + "grad_norm": 0.41561431362723383, + "learning_rate": 1.3710721731797831e-05, + "loss": 0.3321, "step": 8656 }, { - "epoch": 0.5, - "grad_norm": 1.3904254222964338, - "learning_rate": 1.0570989447378977e-05, - "loss": 0.8084, + "epoch": 0.4, + "grad_norm": 0.44795318829467673, + "learning_rate": 1.3709340002939327e-05, + "loss": 0.2395, "step": 8657 }, { - "epoch": 0.5, - "grad_norm": 0.3996383469966337, - "learning_rate": 1.056913156093519e-05, - "loss": 0.3012, + "epoch": 0.4, + "grad_norm": 0.45358081113982307, + "learning_rate": 1.3707958191959609e-05, + "loss": 0.2958, "step": 8658 }, { - "epoch": 0.5, - "grad_norm": 0.47744810436214125, - "learning_rate": 1.0567273654782402e-05, - "loss": 0.2646, + "epoch": 0.4, + "grad_norm": 0.3314426129950775, + "learning_rate": 1.3706576298889273e-05, + "loss": 0.2741, "step": 8659 }, { - "epoch": 0.5, - "grad_norm": 0.37398280158270103, - "learning_rate": 1.0565415728984954e-05, - "loss": 0.3085, + "epoch": 0.4, + "grad_norm": 0.5002454360741769, + "learning_rate": 1.370519432375891e-05, + "loss": 0.2632, "step": 8660 }, { - "epoch": 0.5, - "grad_norm": 0.2956477490756631, - "learning_rate": 1.0563557783607182e-05, - "loss": 0.173, + "epoch": 0.4, + "grad_norm": 0.3639338605036829, + "learning_rate": 1.3703812266599113e-05, + "loss": 0.291, "step": 8661 }, { - "epoch": 0.5, - "grad_norm": 0.26479803730153023, - "learning_rate": 1.0561699818713427e-05, - "loss": 0.1764, + "epoch": 0.4, + "grad_norm": 0.8353738319844893, + "learning_rate": 1.3702430127440484e-05, + "loss": 0.5764, "step": 8662 }, { - "epoch": 0.5, - "grad_norm": 1.1463907876064328, - "learning_rate": 1.0559841834368032e-05, - "loss": 0.6209, + "epoch": 0.4, + "grad_norm": 0.37522208023988485, + "learning_rate": 1.3701047906313619e-05, + "loss": 0.274, "step": 8663 }, { - "epoch": 0.5, - "grad_norm": 0.3129762774498092, - "learning_rate": 1.055798383063534e-05, - "loss": 0.2687, + "epoch": 0.4, + "grad_norm": 0.38085539086061726, + "learning_rate": 1.3699665603249121e-05, + "loss": 0.2747, "step": 8664 }, { - "epoch": 0.5, - "grad_norm": 0.3635218135502235, - "learning_rate": 1.0556125807579691e-05, - "loss": 0.3221, + "epoch": 0.4, + "grad_norm": 0.346566065682707, + "learning_rate": 1.3698283218277594e-05, + "loss": 0.2474, "step": 8665 }, { - "epoch": 0.5, - "grad_norm": 0.692553553878542, - "learning_rate": 1.0554267765265428e-05, - "loss": 0.3318, + "epoch": 0.4, + "grad_norm": 1.2668059028589966, + "learning_rate": 1.3696900751429642e-05, + "loss": 0.6696, "step": 8666 }, { - "epoch": 0.5, - "grad_norm": 0.3232964697528742, - "learning_rate": 1.0552409703756896e-05, - "loss": 0.2057, + "epoch": 0.4, + "grad_norm": 0.3625954250602369, + "learning_rate": 1.369551820273587e-05, + "loss": 0.2241, "step": 8667 }, { - "epoch": 0.5, - "grad_norm": 0.2696835374164365, - "learning_rate": 1.0550551623118442e-05, - "loss": 0.2448, + "epoch": 0.4, + "grad_norm": 0.43985536436081485, + "learning_rate": 1.3694135572226883e-05, + "loss": 0.3336, "step": 8668 }, { - "epoch": 0.5, - "grad_norm": 0.4353801277831827, - "learning_rate": 1.0548693523414408e-05, - "loss": 0.3043, + "epoch": 0.4, + "grad_norm": 0.7519150845287341, + "learning_rate": 1.3692752859933299e-05, + "loss": 0.537, "step": 8669 }, { - "epoch": 0.5, - "grad_norm": 0.6110278457480871, - "learning_rate": 1.0546835404709142e-05, - "loss": 0.3705, + "epoch": 0.4, + "grad_norm": 0.33074245750827436, + "learning_rate": 1.3691370065885723e-05, + "loss": 0.2453, "step": 8670 }, { - "epoch": 0.5, - "grad_norm": 0.4375873666361446, - "learning_rate": 1.0544977267066986e-05, - "loss": 0.3336, + "epoch": 0.4, + "grad_norm": 0.34771745181015057, + "learning_rate": 1.3689987190114775e-05, + "loss": 0.2403, "step": 8671 }, { - "epoch": 0.5, - "grad_norm": 0.3254332303228954, - "learning_rate": 1.0543119110552293e-05, - "loss": 0.2477, + "epoch": 0.4, + "grad_norm": 0.30891014395848365, + "learning_rate": 1.3688604232651064e-05, + "loss": 0.2659, "step": 8672 }, { - "epoch": 0.5, - "grad_norm": 0.4354120294911844, - "learning_rate": 1.054126093522941e-05, - "loss": 0.3037, + "epoch": 0.4, + "grad_norm": 0.3363764272038372, + "learning_rate": 1.3687221193525211e-05, + "loss": 0.1941, "step": 8673 }, { - "epoch": 0.5, - "grad_norm": 0.2439818838843636, - "learning_rate": 1.053940274116268e-05, - "loss": 0.1891, + "epoch": 0.4, + "grad_norm": 0.5653594502781631, + "learning_rate": 1.3685838072767832e-05, + "loss": 0.4083, "step": 8674 }, { - "epoch": 0.5, - "grad_norm": 1.0463292921514271, - "learning_rate": 1.0537544528416462e-05, - "loss": 0.396, + "epoch": 0.4, + "grad_norm": 0.4060637346788626, + "learning_rate": 1.3684454870409554e-05, + "loss": 0.341, "step": 8675 }, { - "epoch": 0.5, - "grad_norm": 0.2922783263343222, - "learning_rate": 1.0535686297055095e-05, - "loss": 0.2758, + "epoch": 0.4, + "grad_norm": 0.3721382384025377, + "learning_rate": 1.3683071586480997e-05, + "loss": 0.2102, "step": 8676 }, { - "epoch": 0.5, - "grad_norm": 0.33915434052209564, - "learning_rate": 1.0533828047142936e-05, - "loss": 0.313, + "epoch": 0.4, + "grad_norm": 0.3370288345469713, + "learning_rate": 1.3681688221012784e-05, + "loss": 0.2007, "step": 8677 }, { - "epoch": 0.5, - "grad_norm": 1.2238073735137502, - "learning_rate": 1.0531969778744333e-05, - "loss": 0.7319, + "epoch": 0.4, + "grad_norm": 0.46616576075335564, + "learning_rate": 1.368030477403554e-05, + "loss": 0.2926, "step": 8678 }, { - "epoch": 0.5, - "grad_norm": 0.24234025299640638, - "learning_rate": 1.0530111491923642e-05, - "loss": 0.1626, + "epoch": 0.4, + "grad_norm": 0.40971001522562034, + "learning_rate": 1.3678921245579898e-05, + "loss": 0.3056, "step": 8679 }, { - "epoch": 0.5, - "grad_norm": 0.2732911508199734, - "learning_rate": 1.0528253186745212e-05, - "loss": 0.2467, + "epoch": 0.4, + "grad_norm": 0.3187020563129749, + "learning_rate": 1.3677537635676484e-05, + "loss": 0.2672, "step": 8680 }, { - "epoch": 0.5, - "grad_norm": 0.5100215847515718, - "learning_rate": 1.05263948632734e-05, - "loss": 0.4121, + "epoch": 0.4, + "grad_norm": 0.6481375902780272, + "learning_rate": 1.367615394435593e-05, + "loss": 0.4201, "step": 8681 }, { - "epoch": 0.5, - "grad_norm": 0.7570537100397893, - "learning_rate": 1.052453652157255e-05, - "loss": 0.2958, + "epoch": 0.4, + "grad_norm": 0.42457878461365256, + "learning_rate": 1.3674770171648875e-05, + "loss": 0.2884, "step": 8682 }, { - "epoch": 0.5, - "grad_norm": 0.40521850584754937, - "learning_rate": 1.0522678161707028e-05, - "loss": 0.3425, + "epoch": 0.4, + "grad_norm": 0.27816786957335066, + "learning_rate": 1.3673386317585946e-05, + "loss": 0.1944, "step": 8683 }, { - "epoch": 0.5, - "grad_norm": 0.35722274671332144, - "learning_rate": 1.0520819783741183e-05, - "loss": 0.3157, + "epoch": 0.4, + "grad_norm": 0.3436104352230831, + "learning_rate": 1.3672002382197787e-05, + "loss": 0.266, "step": 8684 }, { - "epoch": 0.5, - "grad_norm": 0.378465025509005, - "learning_rate": 1.0518961387739371e-05, - "loss": 0.1266, + "epoch": 0.4, + "grad_norm": 0.4409790981947072, + "learning_rate": 1.3670618365515034e-05, + "loss": 0.3099, "step": 8685 }, { - "epoch": 0.5, - "grad_norm": 0.28385498048625774, - "learning_rate": 1.0517102973765947e-05, - "loss": 0.2468, + "epoch": 0.4, + "grad_norm": 0.4570934278665918, + "learning_rate": 1.3669234267568325e-05, + "loss": 0.2897, "step": 8686 }, { - "epoch": 0.5, - "grad_norm": 0.6575638629333442, - "learning_rate": 1.0515244541885272e-05, - "loss": 0.4461, + "epoch": 0.4, + "grad_norm": 0.37401780532354695, + "learning_rate": 1.3667850088388308e-05, + "loss": 0.3325, "step": 8687 }, { - "epoch": 0.5, - "grad_norm": 0.2883829375610862, - "learning_rate": 1.0513386092161698e-05, - "loss": 0.2233, + "epoch": 0.4, + "grad_norm": 0.46844276185607947, + "learning_rate": 1.3666465828005626e-05, + "loss": 0.3226, "step": 8688 }, { - "epoch": 0.5, - "grad_norm": 0.2855510500547569, - "learning_rate": 1.0511527624659585e-05, - "loss": 0.2701, + "epoch": 0.4, + "grad_norm": 0.4911838502081208, + "learning_rate": 1.3665081486450924e-05, + "loss": 0.2571, "step": 8689 }, { - "epoch": 0.5, - "grad_norm": 0.9124086442455803, - "learning_rate": 1.0509669139443298e-05, - "loss": 0.5854, + "epoch": 0.4, + "grad_norm": 0.34333891409734957, + "learning_rate": 1.3663697063754853e-05, + "loss": 0.2179, "step": 8690 }, { - "epoch": 0.5, - "grad_norm": 0.3336901059066564, - "learning_rate": 1.0507810636577183e-05, - "loss": 0.1928, + "epoch": 0.4, + "grad_norm": 0.4003986229681892, + "learning_rate": 1.3662312559948054e-05, + "loss": 0.2954, "step": 8691 }, { - "epoch": 0.5, - "grad_norm": 0.263736365371567, - "learning_rate": 1.0505952116125613e-05, - "loss": 0.2334, + "epoch": 0.4, + "grad_norm": 0.3772491817417094, + "learning_rate": 1.3660927975061188e-05, + "loss": 0.3221, "step": 8692 }, { - "epoch": 0.5, - "grad_norm": 0.731716892313344, - "learning_rate": 1.0504093578152939e-05, - "loss": 0.4945, + "epoch": 0.4, + "grad_norm": 0.6132215052412152, + "learning_rate": 1.3659543309124906e-05, + "loss": 0.3512, "step": 8693 }, { - "epoch": 0.5, - "grad_norm": 0.5263262316670798, - "learning_rate": 1.050223502272353e-05, - "loss": 0.3654, + "epoch": 0.4, + "grad_norm": 0.59524903827153, + "learning_rate": 1.3658158562169862e-05, + "loss": 0.3449, "step": 8694 }, { - "epoch": 0.5, - "grad_norm": 0.2641345245374191, - "learning_rate": 1.050037644990174e-05, - "loss": 0.1886, + "epoch": 0.4, + "grad_norm": 0.38310771066277083, + "learning_rate": 1.3656773734226714e-05, + "loss": 0.3077, "step": 8695 }, { - "epoch": 0.5, - "grad_norm": 0.36928392656217834, - "learning_rate": 1.0498517859751937e-05, - "loss": 0.3137, + "epoch": 0.4, + "grad_norm": 0.22732684083697202, + "learning_rate": 1.3655388825326117e-05, + "loss": 0.1623, "step": 8696 }, { - "epoch": 0.5, - "grad_norm": 0.609889087343759, - "learning_rate": 1.0496659252338481e-05, - "loss": 0.3196, + "epoch": 0.4, + "grad_norm": 0.5544553603461901, + "learning_rate": 1.3654003835498737e-05, + "loss": 0.3875, "step": 8697 }, { - "epoch": 0.5, - "grad_norm": 0.32459381957992633, - "learning_rate": 1.049480062772574e-05, - "loss": 0.2378, + "epoch": 0.4, + "grad_norm": 0.4317430241903873, + "learning_rate": 1.3652618764775231e-05, + "loss": 0.3441, "step": 8698 }, { - "epoch": 0.5, - "grad_norm": 0.7666747207357101, - "learning_rate": 1.0492941985978068e-05, - "loss": 0.4289, + "epoch": 0.4, + "grad_norm": 0.3325118401168639, + "learning_rate": 1.365123361318627e-05, + "loss": 0.2403, "step": 8699 }, { - "epoch": 0.5, - "grad_norm": 0.3039731492970164, - "learning_rate": 1.049108332715984e-05, - "loss": 0.2918, + "epoch": 0.4, + "grad_norm": 0.6576025063661516, + "learning_rate": 1.3649848380762513e-05, + "loss": 0.3405, "step": 8700 }, { - "epoch": 0.5, - "grad_norm": 0.30935907905723153, - "learning_rate": 1.048922465133542e-05, - "loss": 0.1984, + "epoch": 0.4, + "grad_norm": 0.4256055319216024, + "learning_rate": 1.3648463067534632e-05, + "loss": 0.3268, "step": 8701 }, { - "epoch": 0.5, - "grad_norm": 0.3842240130182527, - "learning_rate": 1.0487365958569168e-05, - "loss": 0.2887, + "epoch": 0.4, + "grad_norm": 0.32938920931524, + "learning_rate": 1.3647077673533294e-05, + "loss": 0.1575, "step": 8702 }, { - "epoch": 0.5, - "grad_norm": 1.4564138910898665, - "learning_rate": 1.0485507248925455e-05, - "loss": 0.7804, + "epoch": 0.4, + "grad_norm": 0.30266021076061217, + "learning_rate": 1.3645692198789173e-05, + "loss": 0.2705, "step": 8703 }, { - "epoch": 0.5, - "grad_norm": 0.2959826982997899, - "learning_rate": 1.0483648522468648e-05, - "loss": 0.2633, + "epoch": 0.4, + "grad_norm": 0.5634310216697219, + "learning_rate": 1.3644306643332939e-05, + "loss": 0.385, "step": 8704 }, { - "epoch": 0.5, - "grad_norm": 0.6727655285294992, - "learning_rate": 1.0481789779263112e-05, - "loss": 0.3068, + "epoch": 0.4, + "grad_norm": 0.46310496006354945, + "learning_rate": 1.3642921007195269e-05, + "loss": 0.3233, "step": 8705 }, { - "epoch": 0.5, - "grad_norm": 0.8354526542235158, - "learning_rate": 1.0479931019373218e-05, - "loss": 0.5327, + "epoch": 0.4, + "grad_norm": 0.3478454414447397, + "learning_rate": 1.3641535290406837e-05, + "loss": 0.2004, "step": 8706 }, { - "epoch": 0.5, - "grad_norm": 0.28734045291129795, - "learning_rate": 1.0478072242863329e-05, - "loss": 0.2392, + "epoch": 0.4, + "grad_norm": 0.4427521005433169, + "learning_rate": 1.3640149492998326e-05, + "loss": 0.3428, "step": 8707 }, { - "epoch": 0.5, - "grad_norm": 0.2747471101459275, - "learning_rate": 1.0476213449797823e-05, - "loss": 0.2075, + "epoch": 0.4, + "grad_norm": 0.5419645688317273, + "learning_rate": 1.3638763615000412e-05, + "loss": 0.4006, "step": 8708 }, { - "epoch": 0.5, - "grad_norm": 0.7019374127703849, - "learning_rate": 1.0474354640241065e-05, - "loss": 0.4068, + "epoch": 0.4, + "grad_norm": 0.6814457043018262, + "learning_rate": 1.363737765644378e-05, + "loss": 0.3025, "step": 8709 }, { - "epoch": 0.5, - "grad_norm": 0.36357077422014367, - "learning_rate": 1.0472495814257426e-05, - "loss": 0.294, + "epoch": 0.4, + "grad_norm": 0.3978273375443263, + "learning_rate": 1.3635991617359111e-05, + "loss": 0.3283, "step": 8710 }, { - "epoch": 0.5, - "grad_norm": 0.6829568268139392, - "learning_rate": 1.0470636971911277e-05, - "loss": 0.3449, + "epoch": 0.4, + "grad_norm": 0.2570544564849038, + "learning_rate": 1.3634605497777094e-05, + "loss": 0.2137, "step": 8711 }, { - "epoch": 0.5, - "grad_norm": 0.3321892388960226, - "learning_rate": 1.046877811326699e-05, - "loss": 0.3047, + "epoch": 0.4, + "grad_norm": 0.42478702714075917, + "learning_rate": 1.3633219297728415e-05, + "loss": 0.1695, "step": 8712 }, { - "epoch": 0.5, - "grad_norm": 0.3844356675302627, - "learning_rate": 1.0466919238388937e-05, - "loss": 0.2823, + "epoch": 0.4, + "grad_norm": 0.4460605113562629, + "learning_rate": 1.363183301724376e-05, + "loss": 0.3134, "step": 8713 }, { - "epoch": 0.5, - "grad_norm": 0.2569912841925734, - "learning_rate": 1.046506034734149e-05, - "loss": 0.1119, + "epoch": 0.4, + "grad_norm": 0.5812377045420741, + "learning_rate": 1.3630446656353823e-05, + "loss": 0.3869, "step": 8714 }, { - "epoch": 0.5, - "grad_norm": 0.5010661824434921, - "learning_rate": 1.0463201440189026e-05, - "loss": 0.3222, + "epoch": 0.4, + "grad_norm": 0.4867522575059979, + "learning_rate": 1.3629060215089296e-05, + "loss": 0.3507, "step": 8715 }, { - "epoch": 0.5, - "grad_norm": 0.3171331861477249, - "learning_rate": 1.0461342516995911e-05, - "loss": 0.2934, + "epoch": 0.4, + "grad_norm": 0.3371382537219159, + "learning_rate": 1.3627673693480874e-05, + "loss": 0.2492, "step": 8716 }, { - "epoch": 0.5, - "grad_norm": 0.43958228142956374, - "learning_rate": 1.0459483577826531e-05, - "loss": 0.3712, + "epoch": 0.4, + "grad_norm": 0.33416232002628654, + "learning_rate": 1.3626287091559254e-05, + "loss": 0.1652, "step": 8717 }, { - "epoch": 0.5, - "grad_norm": 0.3325947509858215, - "learning_rate": 1.0457624622745249e-05, - "loss": 0.2327, + "epoch": 0.4, + "grad_norm": 0.6589556116555825, + "learning_rate": 1.362490040935513e-05, + "loss": 0.3624, "step": 8718 }, { - "epoch": 0.5, - "grad_norm": 0.4079029698952522, - "learning_rate": 1.0455765651816447e-05, - "loss": 0.2972, + "epoch": 0.4, + "grad_norm": 0.2947981378644011, + "learning_rate": 1.3623513646899207e-05, + "loss": 0.2291, "step": 8719 }, { - "epoch": 0.5, - "grad_norm": 0.23506748013692239, - "learning_rate": 1.0453906665104503e-05, - "loss": 0.2195, + "epoch": 0.4, + "grad_norm": 0.8265033885426144, + "learning_rate": 1.3622126804222185e-05, + "loss": 0.5417, "step": 8720 }, { - "epoch": 0.5, - "grad_norm": 0.5733380652274306, - "learning_rate": 1.045204766267379e-05, - "loss": 0.3302, + "epoch": 0.4, + "grad_norm": 0.5095397186311522, + "learning_rate": 1.3620739881354763e-05, + "loss": 0.3741, "step": 8721 }, { - "epoch": 0.5, - "grad_norm": 0.3160060820791429, - "learning_rate": 1.0450188644588684e-05, - "loss": 0.2767, + "epoch": 0.4, + "grad_norm": 0.4328123526839169, + "learning_rate": 1.3619352878327653e-05, + "loss": 0.2303, "step": 8722 }, { - "epoch": 0.5, - "grad_norm": 0.4478524014302271, - "learning_rate": 1.0448329610913566e-05, - "loss": 0.3633, + "epoch": 0.4, + "grad_norm": 0.3511611936312261, + "learning_rate": 1.3617965795171558e-05, + "loss": 0.2616, "step": 8723 }, { - "epoch": 0.5, - "grad_norm": 0.3882152266604375, - "learning_rate": 1.0446470561712811e-05, - "loss": 0.2507, + "epoch": 0.4, + "grad_norm": 0.38555917957038816, + "learning_rate": 1.3616578631917186e-05, + "loss": 0.2259, "step": 8724 }, { - "epoch": 0.5, - "grad_norm": 0.34054329567829944, - "learning_rate": 1.0444611497050802e-05, - "loss": 0.2896, + "epoch": 0.4, + "grad_norm": 0.3858369579159727, + "learning_rate": 1.3615191388595248e-05, + "loss": 0.2404, "step": 8725 }, { - "epoch": 0.5, - "grad_norm": 0.2992192854650521, - "learning_rate": 1.0442752416991912e-05, - "loss": 0.1979, + "epoch": 0.4, + "grad_norm": 0.5416035939255519, + "learning_rate": 1.361380406523646e-05, + "loss": 0.3507, "step": 8726 }, { - "epoch": 0.5, - "grad_norm": 0.45370200964194424, - "learning_rate": 1.0440893321600529e-05, - "loss": 0.2756, + "epoch": 0.4, + "grad_norm": 0.5008441631654945, + "learning_rate": 1.3612416661871532e-05, + "loss": 0.4134, "step": 8727 }, { - "epoch": 0.5, - "grad_norm": 0.32295994917909365, - "learning_rate": 1.0439034210941029e-05, - "loss": 0.2951, + "epoch": 0.4, + "grad_norm": 0.39276595495173466, + "learning_rate": 1.3611029178531179e-05, + "loss": 0.3108, "step": 8728 }, { - "epoch": 0.5, - "grad_norm": 1.5271479543748887, - "learning_rate": 1.043717508507779e-05, - "loss": 0.6307, + "epoch": 0.4, + "grad_norm": 0.2376502035588918, + "learning_rate": 1.3609641615246121e-05, + "loss": 0.1226, "step": 8729 }, { - "epoch": 0.5, - "grad_norm": 1.2596569844559131, - "learning_rate": 1.0435315944075202e-05, - "loss": 0.8572, + "epoch": 0.4, + "grad_norm": 0.5517193603015693, + "learning_rate": 1.3608253972047078e-05, + "loss": 0.3564, "step": 8730 }, { - "epoch": 0.5, - "grad_norm": 0.3131215205253095, - "learning_rate": 1.0433456787997636e-05, - "loss": 0.198, + "epoch": 0.4, + "grad_norm": 0.3222165623987306, + "learning_rate": 1.3606866248964771e-05, + "loss": 0.2714, "step": 8731 }, { - "epoch": 0.5, - "grad_norm": 0.24968167949562506, - "learning_rate": 1.0431597616909483e-05, - "loss": 0.2025, + "epoch": 0.4, + "grad_norm": 0.5802938614525948, + "learning_rate": 1.3605478446029918e-05, + "loss": 0.3458, "step": 8732 }, { - "epoch": 0.5, - "grad_norm": 0.48335571779497233, - "learning_rate": 1.0429738430875123e-05, - "loss": 0.4133, + "epoch": 0.4, + "grad_norm": 0.8822950210543202, + "learning_rate": 1.3604090563273249e-05, + "loss": 0.5559, "step": 8733 }, { - "epoch": 0.5, - "grad_norm": 0.31200844208398004, - "learning_rate": 1.042787922995894e-05, - "loss": 0.1918, + "epoch": 0.4, + "grad_norm": 0.4154395050784564, + "learning_rate": 1.3602702600725488e-05, + "loss": 0.2906, "step": 8734 }, { - "epoch": 0.5, - "grad_norm": 0.45930456874730524, - "learning_rate": 1.0426020014225313e-05, - "loss": 0.3733, + "epoch": 0.4, + "grad_norm": 0.3304901320183655, + "learning_rate": 1.3601314558417365e-05, + "loss": 0.2656, "step": 8735 }, { - "epoch": 0.5, - "grad_norm": 0.40373907057363134, - "learning_rate": 1.0424160783738637e-05, - "loss": 0.342, + "epoch": 0.4, + "grad_norm": 0.24383654093517726, + "learning_rate": 1.3599926436379609e-05, + "loss": 0.1377, "step": 8736 }, { - "epoch": 0.5, - "grad_norm": 0.3350587314333596, - "learning_rate": 1.042230153856329e-05, - "loss": 0.2144, + "epoch": 0.4, + "grad_norm": 0.38804398660356565, + "learning_rate": 1.359853823464295e-05, + "loss": 0.2966, "step": 8737 }, { - "epoch": 0.5, - "grad_norm": 0.3563876657519294, - "learning_rate": 1.0420442278763658e-05, - "loss": 0.2922, + "epoch": 0.4, + "grad_norm": 0.74164266417967, + "learning_rate": 1.3597149953238122e-05, + "loss": 0.3563, "step": 8738 }, { - "epoch": 0.5, - "grad_norm": 0.31504314484019597, - "learning_rate": 1.0418583004404128e-05, - "loss": 0.2687, + "epoch": 0.4, + "grad_norm": 0.3693261980991238, + "learning_rate": 1.3595761592195861e-05, + "loss": 0.3253, "step": 8739 }, { - "epoch": 0.5, - "grad_norm": 0.30162193745736154, - "learning_rate": 1.0416723715549086e-05, - "loss": 0.2186, + "epoch": 0.4, + "grad_norm": 0.3928545254552329, + "learning_rate": 1.3594373151546904e-05, + "loss": 0.2891, "step": 8740 }, { - "epoch": 0.5, - "grad_norm": 1.3170050829277982, - "learning_rate": 1.041486441226292e-05, - "loss": 0.8277, + "epoch": 0.4, + "grad_norm": 1.0163685219589829, + "learning_rate": 1.3592984631321995e-05, + "loss": 0.6771, "step": 8741 }, { - "epoch": 0.5, - "grad_norm": 1.3552064424071086, - "learning_rate": 1.0413005094610018e-05, - "loss": 0.8414, + "epoch": 0.4, + "grad_norm": 0.24996498743501916, + "learning_rate": 1.3591596031551865e-05, + "loss": 0.1724, "step": 8742 }, { - "epoch": 0.5, - "grad_norm": 0.380534543132684, - "learning_rate": 1.0411145762654767e-05, - "loss": 0.2638, + "epoch": 0.4, + "grad_norm": 0.3374841038315729, + "learning_rate": 1.3590207352267259e-05, + "loss": 0.2553, "step": 8743 }, { - "epoch": 0.5, - "grad_norm": 0.36920878357651454, - "learning_rate": 1.0409286416461557e-05, - "loss": 0.2848, + "epoch": 0.4, + "grad_norm": 0.9775017869969177, + "learning_rate": 1.3588818593498926e-05, + "loss": 0.4144, "step": 8744 }, { - "epoch": 0.5, - "grad_norm": 0.2959147152051227, - "learning_rate": 1.0407427056094772e-05, - "loss": 0.2261, + "epoch": 0.4, + "grad_norm": 0.5228173664923256, + "learning_rate": 1.3587429755277604e-05, + "loss": 0.3035, "step": 8745 }, { - "epoch": 0.5, - "grad_norm": 0.34193258538459975, - "learning_rate": 1.040556768161881e-05, - "loss": 0.2593, + "epoch": 0.4, + "grad_norm": 0.40143151656805903, + "learning_rate": 1.3586040837634049e-05, + "loss": 0.3044, "step": 8746 }, { - "epoch": 0.5, - "grad_norm": 0.45975870258676144, - "learning_rate": 1.0403708293098054e-05, - "loss": 0.2501, + "epoch": 0.4, + "grad_norm": 0.40744837410384854, + "learning_rate": 1.3584651840599003e-05, + "loss": 0.3194, "step": 8747 }, { - "epoch": 0.5, - "grad_norm": 0.4738679517364789, - "learning_rate": 1.04018488905969e-05, - "loss": 0.3933, + "epoch": 0.4, + "grad_norm": 0.3273173938903404, + "learning_rate": 1.3583262764203222e-05, + "loss": 0.1651, "step": 8748 }, { - "epoch": 0.5, - "grad_norm": 0.3316696976128548, - "learning_rate": 1.0399989474179735e-05, - "loss": 0.2681, + "epoch": 0.4, + "grad_norm": 0.36273459660802265, + "learning_rate": 1.3581873608477457e-05, + "loss": 0.2799, "step": 8749 }, { - "epoch": 0.5, - "grad_norm": 0.567887887250414, - "learning_rate": 1.0398130043910949e-05, - "loss": 0.3258, + "epoch": 0.4, + "grad_norm": 0.46580172255496183, + "learning_rate": 1.3580484373452462e-05, + "loss": 0.3351, "step": 8750 }, { - "epoch": 0.5, - "grad_norm": 0.2892458714833695, - "learning_rate": 1.0396270599854939e-05, - "loss": 0.2585, + "epoch": 0.4, + "grad_norm": 0.43703808875263966, + "learning_rate": 1.3579095059158993e-05, + "loss": 0.2981, "step": 8751 }, { - "epoch": 0.5, - "grad_norm": 0.4389296101630068, - "learning_rate": 1.0394411142076092e-05, - "loss": 0.2814, + "epoch": 0.4, + "grad_norm": 0.3806374779632683, + "learning_rate": 1.357770566562781e-05, + "loss": 0.3024, "step": 8752 }, { - "epoch": 0.5, - "grad_norm": 0.35415454086871245, - "learning_rate": 1.039255167063881e-05, - "loss": 0.2628, + "epoch": 0.4, + "grad_norm": 1.2703854131245793, + "learning_rate": 1.3576316192889673e-05, + "loss": 0.6439, "step": 8753 }, { - "epoch": 0.5, - "grad_norm": 0.6841238312775954, - "learning_rate": 1.0390692185607479e-05, - "loss": 0.3674, + "epoch": 0.4, + "grad_norm": 0.28833614921138023, + "learning_rate": 1.3574926640975341e-05, + "loss": 0.2472, "step": 8754 }, { - "epoch": 0.5, - "grad_norm": 0.41943936748101157, - "learning_rate": 1.0388832687046493e-05, - "loss": 0.3115, + "epoch": 0.4, + "grad_norm": 0.3237312757462981, + "learning_rate": 1.3573537009915579e-05, + "loss": 0.2366, "step": 8755 }, { - "epoch": 0.5, - "grad_norm": 0.3136131249500698, - "learning_rate": 1.0386973175020248e-05, - "loss": 0.3109, + "epoch": 0.4, + "grad_norm": 0.43458492915488356, + "learning_rate": 1.357214729974115e-05, + "loss": 0.2776, "step": 8756 }, { - "epoch": 0.5, - "grad_norm": 0.2241031296136957, - "learning_rate": 1.0385113649593137e-05, - "loss": 0.1017, + "epoch": 0.4, + "grad_norm": 1.160604188342031, + "learning_rate": 1.3570757510482827e-05, + "loss": 0.6972, "step": 8757 }, { - "epoch": 0.5, - "grad_norm": 0.25406672869397945, - "learning_rate": 1.0383254110829557e-05, - "loss": 0.2081, + "epoch": 0.4, + "grad_norm": 0.37660898340391047, + "learning_rate": 1.356936764217137e-05, + "loss": 0.2177, "step": 8758 }, { - "epoch": 0.5, - "grad_norm": 0.3567819115485671, - "learning_rate": 1.0381394558793907e-05, - "loss": 0.3352, + "epoch": 0.4, + "grad_norm": 0.4168055795214102, + "learning_rate": 1.3567977694837557e-05, + "loss": 0.3485, "step": 8759 }, { - "epoch": 0.5, - "grad_norm": 0.8288749721657916, - "learning_rate": 1.0379534993550574e-05, - "loss": 0.4522, + "epoch": 0.4, + "grad_norm": 0.5456244585980462, + "learning_rate": 1.3566587668512154e-05, + "loss": 0.4018, "step": 8760 }, { - "epoch": 0.5, - "grad_norm": 0.35424045018318084, - "learning_rate": 1.0377675415163965e-05, - "loss": 0.2828, + "epoch": 0.4, + "grad_norm": 0.3181215819371659, + "learning_rate": 1.3565197563225937e-05, + "loss": 0.1911, "step": 8761 }, { - "epoch": 0.5, - "grad_norm": 0.7610154560364215, - "learning_rate": 1.0375815823698471e-05, - "loss": 0.4219, + "epoch": 0.4, + "grad_norm": 0.2860589771072182, + "learning_rate": 1.3563807379009684e-05, + "loss": 0.2242, "step": 8762 }, { - "epoch": 0.5, - "grad_norm": 0.3571386552082526, - "learning_rate": 1.0373956219218495e-05, - "loss": 0.2569, + "epoch": 0.4, + "grad_norm": 0.5655925113097174, + "learning_rate": 1.356241711589417e-05, + "loss": 0.3868, "step": 8763 }, { - "epoch": 0.5, - "grad_norm": 0.27089130035377157, - "learning_rate": 1.0372096601788426e-05, - "loss": 0.2144, + "epoch": 0.4, + "grad_norm": 0.3604462044358751, + "learning_rate": 1.3561026773910176e-05, + "loss": 0.1971, "step": 8764 }, { - "epoch": 0.5, - "grad_norm": 0.4068884998575498, - "learning_rate": 1.0370236971472671e-05, - "loss": 0.251, + "epoch": 0.4, + "grad_norm": 0.7971389515926197, + "learning_rate": 1.355963635308848e-05, + "loss": 0.4281, "step": 8765 }, { - "epoch": 0.5, - "grad_norm": 0.7412488012692768, - "learning_rate": 1.0368377328335623e-05, - "loss": 0.5212, + "epoch": 0.4, + "grad_norm": 0.5015987774431565, + "learning_rate": 1.3558245853459864e-05, + "loss": 0.359, "step": 8766 }, { - "epoch": 0.5, - "grad_norm": 0.27216313191409564, - "learning_rate": 1.0366517672441687e-05, - "loss": 0.2245, + "epoch": 0.4, + "grad_norm": 0.315408197933237, + "learning_rate": 1.3556855275055116e-05, + "loss": 0.2694, "step": 8767 }, { - "epoch": 0.5, - "grad_norm": 0.429758312937244, - "learning_rate": 1.0364658003855256e-05, - "loss": 0.3467, + "epoch": 0.4, + "grad_norm": 0.24332873938752128, + "learning_rate": 1.3555464617905018e-05, + "loss": 0.1392, "step": 8768 }, { - "epoch": 0.5, - "grad_norm": 0.4300319006050432, - "learning_rate": 1.0362798322640736e-05, - "loss": 0.2656, + "epoch": 0.4, + "grad_norm": 1.295748051023343, + "learning_rate": 1.3554073882040366e-05, + "loss": 0.7932, "step": 8769 }, { - "epoch": 0.5, - "grad_norm": 0.2356666590382897, - "learning_rate": 1.0360938628862527e-05, - "loss": 0.1391, + "epoch": 0.4, + "grad_norm": 0.3740338536927202, + "learning_rate": 1.3552683067491941e-05, + "loss": 0.271, "step": 8770 }, { - "epoch": 0.5, - "grad_norm": 0.3233756808129512, - "learning_rate": 1.0359078922585029e-05, - "loss": 0.2775, + "epoch": 0.4, + "grad_norm": 0.3912770601481214, + "learning_rate": 1.3551292174290537e-05, + "loss": 0.2534, "step": 8771 }, { - "epoch": 0.5, - "grad_norm": 0.8330387344459389, - "learning_rate": 1.0357219203872641e-05, - "loss": 0.4525, + "epoch": 0.4, + "grad_norm": 0.8456408483422618, + "learning_rate": 1.3549901202466946e-05, + "loss": 0.4322, "step": 8772 }, { - "epoch": 0.5, - "grad_norm": 0.34761547025612005, - "learning_rate": 1.035535947278977e-05, - "loss": 0.2182, + "epoch": 0.4, + "grad_norm": 0.43363651859754715, + "learning_rate": 1.3548510152051963e-05, + "loss": 0.2873, "step": 8773 }, { - "epoch": 0.5, - "grad_norm": 0.4854843573284205, - "learning_rate": 1.035349972940081e-05, - "loss": 0.3956, + "epoch": 0.4, + "grad_norm": 0.25187771510946716, + "learning_rate": 1.3547119023076387e-05, + "loss": 0.1466, "step": 8774 }, { - "epoch": 0.5, - "grad_norm": 0.3489720860066471, - "learning_rate": 1.0351639973770175e-05, - "loss": 0.2953, + "epoch": 0.4, + "grad_norm": 0.5666171022432636, + "learning_rate": 1.3545727815571015e-05, + "loss": 0.4353, "step": 8775 }, { - "epoch": 0.5, - "grad_norm": 0.29659453460115837, - "learning_rate": 1.0349780205962264e-05, - "loss": 0.1995, + "epoch": 0.4, + "grad_norm": 0.38116419268481283, + "learning_rate": 1.3544336529566645e-05, + "loss": 0.2848, "step": 8776 }, { - "epoch": 0.5, - "grad_norm": 0.2555721583072774, - "learning_rate": 1.0347920426041475e-05, - "loss": 0.188, + "epoch": 0.4, + "grad_norm": 0.7957296759240025, + "learning_rate": 1.354294516509408e-05, + "loss": 0.3549, "step": 8777 }, { - "epoch": 0.5, - "grad_norm": 1.1484304454055863, - "learning_rate": 1.034606063407222e-05, - "loss": 0.4487, + "epoch": 0.4, + "grad_norm": 0.3902767055187228, + "learning_rate": 1.3541553722184127e-05, + "loss": 0.3059, "step": 8778 }, { - "epoch": 0.5, - "grad_norm": 0.3225354838444197, - "learning_rate": 1.0344200830118899e-05, - "loss": 0.2934, + "epoch": 0.4, + "grad_norm": 0.4161552057413789, + "learning_rate": 1.3540162200867584e-05, + "loss": 0.2948, "step": 8779 }, { - "epoch": 0.5, - "grad_norm": 0.36739556613949326, - "learning_rate": 1.0342341014245918e-05, - "loss": 0.2392, + "epoch": 0.4, + "grad_norm": 0.280184106790731, + "learning_rate": 1.3538770601175264e-05, + "loss": 0.1938, "step": 8780 }, { - "epoch": 0.5, - "grad_norm": 0.7747310744381001, - "learning_rate": 1.0340481186517678e-05, - "loss": 0.4761, + "epoch": 0.4, + "grad_norm": 0.46962895127779297, + "learning_rate": 1.3537378923137973e-05, + "loss": 0.3248, "step": 8781 }, { - "epoch": 0.5, - "grad_norm": 0.27809320794294334, - "learning_rate": 1.0338621346998596e-05, - "loss": 0.2168, + "epoch": 0.4, + "grad_norm": 0.4976584257328208, + "learning_rate": 1.3535987166786523e-05, + "loss": 0.2931, "step": 8782 }, { - "epoch": 0.5, - "grad_norm": 0.399339632887001, - "learning_rate": 1.0336761495753067e-05, - "loss": 0.2646, + "epoch": 0.4, + "grad_norm": 0.4084182638417186, + "learning_rate": 1.3534595332151726e-05, + "loss": 0.3388, "step": 8783 }, { - "epoch": 0.5, - "grad_norm": 0.7950306926717371, - "learning_rate": 1.0334901632845504e-05, - "loss": 0.3684, + "epoch": 0.4, + "grad_norm": 0.7730585071991901, + "learning_rate": 1.3533203419264393e-05, + "loss": 0.3367, "step": 8784 }, { - "epoch": 0.5, - "grad_norm": 0.37529210841211885, - "learning_rate": 1.0333041758340312e-05, - "loss": 0.2767, + "epoch": 0.4, + "grad_norm": 0.4123965169271035, + "learning_rate": 1.3531811428155341e-05, + "loss": 0.2535, "step": 8785 }, { - "epoch": 0.5, - "grad_norm": 0.2740845981263997, - "learning_rate": 1.0331181872301898e-05, - "loss": 0.1405, + "epoch": 0.4, + "grad_norm": 0.31925579814729654, + "learning_rate": 1.3530419358855392e-05, + "loss": 0.2513, "step": 8786 }, { - "epoch": 0.5, - "grad_norm": 0.3420145569049249, - "learning_rate": 1.0329321974794671e-05, - "loss": 0.2961, + "epoch": 0.4, + "grad_norm": 0.8571686224642743, + "learning_rate": 1.3529027211395355e-05, + "loss": 0.5658, "step": 8787 }, { - "epoch": 0.5, - "grad_norm": 0.5831138037987301, - "learning_rate": 1.0327462065883036e-05, - "loss": 0.3274, + "epoch": 0.4, + "grad_norm": 0.2649254092656279, + "learning_rate": 1.3527634985806062e-05, + "loss": 0.2152, "step": 8788 }, { - "epoch": 0.5, - "grad_norm": 0.2896592046347293, - "learning_rate": 1.0325602145631403e-05, - "loss": 0.223, + "epoch": 0.4, + "grad_norm": 0.7384508851908317, + "learning_rate": 1.3526242682118329e-05, + "loss": 0.4227, "step": 8789 }, { - "epoch": 0.51, - "grad_norm": 0.43954585956748027, - "learning_rate": 1.0323742214104185e-05, - "loss": 0.3921, + "epoch": 0.4, + "grad_norm": 0.35304242065128766, + "learning_rate": 1.3524850300362982e-05, + "loss": 0.2494, "step": 8790 }, { - "epoch": 0.51, - "grad_norm": 0.3625988463122244, - "learning_rate": 1.0321882271365786e-05, - "loss": 0.2806, + "epoch": 0.4, + "grad_norm": 0.3314067479142375, + "learning_rate": 1.3523457840570844e-05, + "loss": 0.2675, "step": 8791 }, { - "epoch": 0.51, - "grad_norm": 0.3160804484447476, - "learning_rate": 1.0320022317480618e-05, - "loss": 0.2677, + "epoch": 0.4, + "grad_norm": 0.3634245034497393, + "learning_rate": 1.3522065302772747e-05, + "loss": 0.2311, "step": 8792 }, { - "epoch": 0.51, - "grad_norm": 1.0366060736983413, - "learning_rate": 1.031816235251309e-05, - "loss": 0.3159, + "epoch": 0.4, + "grad_norm": 0.7662109567161808, + "learning_rate": 1.3520672686999519e-05, + "loss": 0.5526, "step": 8793 }, { - "epoch": 0.51, - "grad_norm": 0.5889250828589246, - "learning_rate": 1.0316302376527616e-05, - "loss": 0.3359, + "epoch": 0.4, + "grad_norm": 0.3406564499553857, + "learning_rate": 1.3519279993281993e-05, + "loss": 0.2314, "step": 8794 }, { - "epoch": 0.51, - "grad_norm": 0.270751161601078, - "learning_rate": 1.0314442389588603e-05, - "loss": 0.2913, + "epoch": 0.4, + "grad_norm": 0.4027767667837165, + "learning_rate": 1.3517887221650998e-05, + "loss": 0.3502, "step": 8795 }, { - "epoch": 0.51, - "grad_norm": 0.6323219923443495, - "learning_rate": 1.0312582391760462e-05, - "loss": 0.3475, + "epoch": 0.4, + "grad_norm": 0.5168517183996955, + "learning_rate": 1.3516494372137368e-05, + "loss": 0.2205, "step": 8796 }, { - "epoch": 0.51, - "grad_norm": 0.4125990956262591, - "learning_rate": 1.0310722383107608e-05, - "loss": 0.2992, + "epoch": 0.4, + "grad_norm": 0.36940013599399546, + "learning_rate": 1.3515101444771945e-05, + "loss": 0.1741, "step": 8797 }, { - "epoch": 0.51, - "grad_norm": 0.23697489736616656, - "learning_rate": 1.030886236369445e-05, - "loss": 0.1779, + "epoch": 0.4, + "grad_norm": 0.3278759079232815, + "learning_rate": 1.3513708439585562e-05, + "loss": 0.3069, "step": 8798 }, { - "epoch": 0.51, - "grad_norm": 0.3704019970521971, - "learning_rate": 1.0307002333585404e-05, - "loss": 0.2598, + "epoch": 0.4, + "grad_norm": 0.7914289778572776, + "learning_rate": 1.3512315356609062e-05, + "loss": 0.4718, "step": 8799 }, { - "epoch": 0.51, - "grad_norm": 0.44374597809610156, - "learning_rate": 1.0305142292844876e-05, - "loss": 0.2887, + "epoch": 0.4, + "grad_norm": 0.3692063948905585, + "learning_rate": 1.3510922195873286e-05, + "loss": 0.2103, "step": 8800 }, { - "epoch": 0.51, - "grad_norm": 0.4554383476417022, - "learning_rate": 1.0303282241537287e-05, - "loss": 0.3395, + "epoch": 0.4, + "grad_norm": 0.28623811007505745, + "learning_rate": 1.3509528957409077e-05, + "loss": 0.2289, "step": 8801 }, { - "epoch": 0.51, - "grad_norm": 0.3874203617171426, - "learning_rate": 1.0301422179727045e-05, - "loss": 0.3243, + "epoch": 0.4, + "grad_norm": 0.3508211496165993, + "learning_rate": 1.3508135641247278e-05, + "loss": 0.3151, "step": 8802 }, { - "epoch": 0.51, - "grad_norm": 0.3125693395295528, - "learning_rate": 1.0299562107478569e-05, - "loss": 0.2679, + "epoch": 0.4, + "grad_norm": 0.3853368375265332, + "learning_rate": 1.3506742247418734e-05, + "loss": 0.1969, "step": 8803 }, { - "epoch": 0.51, - "grad_norm": 0.25728668270074057, - "learning_rate": 1.0297702024856268e-05, - "loss": 0.1966, + "epoch": 0.4, + "grad_norm": 0.5694448536698385, + "learning_rate": 1.3505348775954302e-05, + "loss": 0.3991, "step": 8804 }, { - "epoch": 0.51, - "grad_norm": 1.0480113904388246, - "learning_rate": 1.0295841931924559e-05, - "loss": 0.5405, + "epoch": 0.4, + "grad_norm": 1.0583924042259596, + "learning_rate": 1.3503955226884822e-05, + "loss": 0.4524, "step": 8805 }, { - "epoch": 0.51, - "grad_norm": 0.663364129540765, - "learning_rate": 1.0293981828747857e-05, - "loss": 0.2988, + "epoch": 0.4, + "grad_norm": 0.3499912837666646, + "learning_rate": 1.3502561600241155e-05, + "loss": 0.3053, "step": 8806 }, { - "epoch": 0.51, - "grad_norm": 0.2966797163973741, - "learning_rate": 1.0292121715390576e-05, - "loss": 0.2727, + "epoch": 0.4, + "grad_norm": 0.4278409958859237, + "learning_rate": 1.3501167896054146e-05, + "loss": 0.2694, "step": 8807 }, { - "epoch": 0.51, - "grad_norm": 0.6970721071112972, - "learning_rate": 1.0290261591917132e-05, - "loss": 0.5367, + "epoch": 0.4, + "grad_norm": 0.3394277437513173, + "learning_rate": 1.3499774114354655e-05, + "loss": 0.1867, "step": 8808 }, { - "epoch": 0.51, - "grad_norm": 0.38124847024879227, - "learning_rate": 1.0288401458391943e-05, - "loss": 0.1685, + "epoch": 0.4, + "grad_norm": 0.39467608906437435, + "learning_rate": 1.3498380255173537e-05, + "loss": 0.296, "step": 8809 }, { - "epoch": 0.51, - "grad_norm": 0.2504095296687366, - "learning_rate": 1.0286541314879424e-05, - "loss": 0.2031, + "epoch": 0.4, + "grad_norm": 0.30980146303961265, + "learning_rate": 1.3496986318541656e-05, + "loss": 0.2449, "step": 8810 }, { - "epoch": 0.51, - "grad_norm": 0.3809688844646517, - "learning_rate": 1.028468116144399e-05, - "loss": 0.3369, + "epoch": 0.4, + "grad_norm": 1.0645314964474912, + "learning_rate": 1.3495592304489869e-05, + "loss": 0.4639, "step": 8811 }, { - "epoch": 0.51, - "grad_norm": 0.4955121367154073, - "learning_rate": 1.028282099815006e-05, - "loss": 0.2312, + "epoch": 0.4, + "grad_norm": 0.42701640784780215, + "learning_rate": 1.3494198213049035e-05, + "loss": 0.3244, "step": 8812 }, { - "epoch": 0.51, - "grad_norm": 0.4093725872251935, - "learning_rate": 1.0280960825062054e-05, - "loss": 0.3284, + "epoch": 0.4, + "grad_norm": 0.2829388090291178, + "learning_rate": 1.3492804044250016e-05, + "loss": 0.1751, "step": 8813 }, { - "epoch": 0.51, - "grad_norm": 1.1636539511340667, - "learning_rate": 1.0279100642244382e-05, - "loss": 0.8014, + "epoch": 0.4, + "grad_norm": 0.3086710418980984, + "learning_rate": 1.3491409798123687e-05, + "loss": 0.2762, "step": 8814 }, { - "epoch": 0.51, - "grad_norm": 0.28726866422333247, - "learning_rate": 1.027724044976147e-05, - "loss": 0.2295, + "epoch": 0.4, + "grad_norm": 0.6855261581233567, + "learning_rate": 1.3490015474700908e-05, + "loss": 0.4613, "step": 8815 }, { - "epoch": 0.51, - "grad_norm": 0.24419854470114155, - "learning_rate": 1.0275380247677733e-05, - "loss": 0.1984, + "epoch": 0.4, + "grad_norm": 0.3601747557741626, + "learning_rate": 1.348862107401255e-05, + "loss": 0.2451, "step": 8816 }, { - "epoch": 0.51, - "grad_norm": 0.9213705414253189, - "learning_rate": 1.0273520036057587e-05, - "loss": 0.517, + "epoch": 0.41, + "grad_norm": 0.5733870734956155, + "learning_rate": 1.3487226596089489e-05, + "loss": 0.3685, "step": 8817 }, { - "epoch": 0.51, - "grad_norm": 0.4880627076951752, - "learning_rate": 1.0271659814965457e-05, - "loss": 0.3437, + "epoch": 0.41, + "grad_norm": 0.38578006423209626, + "learning_rate": 1.3485832040962588e-05, + "loss": 0.2633, "step": 8818 }, { - "epoch": 0.51, - "grad_norm": 0.25429892494500267, - "learning_rate": 1.0269799584465758e-05, - "loss": 0.2382, + "epoch": 0.41, + "grad_norm": 0.44872581050769716, + "learning_rate": 1.3484437408662725e-05, + "loss": 0.3314, "step": 8819 }, { - "epoch": 0.51, - "grad_norm": 1.1443560930208179, - "learning_rate": 1.0267939344622912e-05, - "loss": 0.7465, + "epoch": 0.41, + "grad_norm": 0.26979532585769717, + "learning_rate": 1.3483042699220774e-05, + "loss": 0.0721, "step": 8820 }, { - "epoch": 0.51, - "grad_norm": 0.47916608663820603, - "learning_rate": 1.0266079095501338e-05, - "loss": 0.3078, + "epoch": 0.41, + "grad_norm": 0.5324370097246657, + "learning_rate": 1.348164791266762e-05, + "loss": 0.3189, "step": 8821 }, { - "epoch": 0.51, - "grad_norm": 0.22200518903738178, - "learning_rate": 1.0264218837165459e-05, - "loss": 0.1626, + "epoch": 0.41, + "grad_norm": 0.327657570172009, + "learning_rate": 1.3480253049034131e-05, + "loss": 0.3021, "step": 8822 }, { - "epoch": 0.51, - "grad_norm": 0.38383767303997884, - "learning_rate": 1.0262358569679686e-05, - "loss": 0.3217, + "epoch": 0.41, + "grad_norm": 0.6549123818959309, + "learning_rate": 1.3478858108351198e-05, + "loss": 0.3519, "step": 8823 }, { - "epoch": 0.51, - "grad_norm": 0.6558207841443644, - "learning_rate": 1.0260498293108452e-05, - "loss": 0.4036, + "epoch": 0.41, + "grad_norm": 0.668904420527574, + "learning_rate": 1.3477463090649701e-05, + "loss": 0.4421, "step": 8824 }, { - "epoch": 0.51, - "grad_norm": 0.3591505119739801, - "learning_rate": 1.025863800751617e-05, - "loss": 0.2283, + "epoch": 0.41, + "grad_norm": 0.31253938196621533, + "learning_rate": 1.347606799596052e-05, + "loss": 0.2224, "step": 8825 }, { - "epoch": 0.51, - "grad_norm": 0.36957746518241613, - "learning_rate": 1.025677771296727e-05, - "loss": 0.3354, + "epoch": 0.41, + "grad_norm": 0.3038172141773224, + "learning_rate": 1.3474672824314541e-05, + "loss": 0.2126, "step": 8826 }, { - "epoch": 0.51, - "grad_norm": 1.4188426062019395, - "learning_rate": 1.0254917409526163e-05, - "loss": 0.7654, + "epoch": 0.41, + "grad_norm": 0.589882032785637, + "learning_rate": 1.3473277575742659e-05, + "loss": 0.2683, "step": 8827 }, { - "epoch": 0.51, - "grad_norm": 0.30075617474235133, - "learning_rate": 1.0253057097257281e-05, - "loss": 0.2479, + "epoch": 0.41, + "grad_norm": 0.4467383661546802, + "learning_rate": 1.3471882250275757e-05, + "loss": 0.3037, "step": 8828 }, { - "epoch": 0.51, - "grad_norm": 0.3521081179987658, - "learning_rate": 1.025119677622504e-05, - "loss": 0.2155, + "epoch": 0.41, + "grad_norm": 0.8575324043004919, + "learning_rate": 1.347048684794473e-05, + "loss": 0.4459, "step": 8829 }, { - "epoch": 0.51, - "grad_norm": 0.4290538039923925, - "learning_rate": 1.0249336446493869e-05, - "loss": 0.3082, + "epoch": 0.41, + "grad_norm": 0.2952587775980895, + "learning_rate": 1.3469091368780468e-05, + "loss": 0.2321, "step": 8830 }, { - "epoch": 0.51, - "grad_norm": 0.28813142501039557, - "learning_rate": 1.0247476108128183e-05, - "loss": 0.2763, + "epoch": 0.41, + "grad_norm": 0.6272553379159224, + "learning_rate": 1.3467695812813866e-05, + "loss": 0.4043, "step": 8831 }, { - "epoch": 0.51, - "grad_norm": 0.7845090507131877, - "learning_rate": 1.0245615761192414e-05, - "loss": 0.4415, + "epoch": 0.41, + "grad_norm": 0.30869951284163244, + "learning_rate": 1.3466300180075822e-05, + "loss": 0.1901, "step": 8832 }, { - "epoch": 0.51, - "grad_norm": 0.8072316896704939, - "learning_rate": 1.024375540575098e-05, - "loss": 0.4035, + "epoch": 0.41, + "grad_norm": 0.718881542439528, + "learning_rate": 1.3464904470597231e-05, + "loss": 0.2659, "step": 8833 }, { - "epoch": 0.51, - "grad_norm": 0.2726303021214843, - "learning_rate": 1.0241895041868306e-05, - "loss": 0.2369, + "epoch": 0.41, + "grad_norm": 0.29640618684949604, + "learning_rate": 1.3463508684408997e-05, + "loss": 0.2882, "step": 8834 }, { - "epoch": 0.51, - "grad_norm": 0.36492778874634096, - "learning_rate": 1.024003466960882e-05, - "loss": 0.2715, + "epoch": 0.41, + "grad_norm": 0.6868297550384124, + "learning_rate": 1.3462112821542016e-05, + "loss": 0.4649, "step": 8835 }, { - "epoch": 0.51, - "grad_norm": 0.360313855746173, - "learning_rate": 1.0238174289036942e-05, - "loss": 0.2589, + "epoch": 0.41, + "grad_norm": 0.5954246100377746, + "learning_rate": 1.3460716882027199e-05, + "loss": 0.2606, "step": 8836 }, { - "epoch": 0.51, - "grad_norm": 0.36305815876103154, - "learning_rate": 1.0236313900217099e-05, - "loss": 0.2774, + "epoch": 0.41, + "grad_norm": 0.3956742335343249, + "learning_rate": 1.345932086589544e-05, + "loss": 0.2899, "step": 8837 }, { - "epoch": 0.51, - "grad_norm": 0.35402837649284924, - "learning_rate": 1.0234453503213715e-05, - "loss": 0.286, + "epoch": 0.41, + "grad_norm": 0.3321499429220548, + "learning_rate": 1.3457924773177655e-05, + "loss": 0.2794, "step": 8838 }, { - "epoch": 0.51, - "grad_norm": 0.6168437114744936, - "learning_rate": 1.0232593098091215e-05, - "loss": 0.3838, + "epoch": 0.41, + "grad_norm": 0.43358493692564626, + "learning_rate": 1.3456528603904746e-05, + "loss": 0.1419, "step": 8839 }, { - "epoch": 0.51, - "grad_norm": 0.4155028386232177, - "learning_rate": 1.0230732684914029e-05, - "loss": 0.3072, + "epoch": 0.41, + "grad_norm": 0.5142859662281241, + "learning_rate": 1.3455132358107626e-05, + "loss": 0.3401, "step": 8840 }, { - "epoch": 0.51, - "grad_norm": 0.5305176733751163, - "learning_rate": 1.022887226374658e-05, - "loss": 0.4033, + "epoch": 0.41, + "grad_norm": 1.3893144746205917, + "learning_rate": 1.3453736035817206e-05, + "loss": 0.7966, "step": 8841 }, { - "epoch": 0.51, - "grad_norm": 0.20583299188599177, - "learning_rate": 1.022701183465329e-05, - "loss": 0.1745, + "epoch": 0.41, + "grad_norm": 0.33691657051846896, + "learning_rate": 1.34523396370644e-05, + "loss": 0.292, "step": 8842 }, { - "epoch": 0.51, - "grad_norm": 0.3890161245544743, - "learning_rate": 1.0225151397698597e-05, - "loss": 0.3028, + "epoch": 0.41, + "grad_norm": 0.34617687913703377, + "learning_rate": 1.3450943161880118e-05, + "loss": 0.2001, "step": 8843 }, { - "epoch": 0.51, - "grad_norm": 0.9808698140562502, - "learning_rate": 1.0223290952946914e-05, - "loss": 0.6994, + "epoch": 0.41, + "grad_norm": 0.2700806796461998, + "learning_rate": 1.3449546610295285e-05, + "loss": 0.1868, "step": 8844 }, { - "epoch": 0.51, - "grad_norm": 0.6121490823974144, - "learning_rate": 1.0221430500462677e-05, - "loss": 0.3077, + "epoch": 0.41, + "grad_norm": 0.4537734020686143, + "learning_rate": 1.3448149982340812e-05, + "loss": 0.3629, "step": 8845 }, { - "epoch": 0.51, - "grad_norm": 0.3066633720162846, - "learning_rate": 1.0219570040310312e-05, - "loss": 0.2655, + "epoch": 0.41, + "grad_norm": 0.3111853603913905, + "learning_rate": 1.3446753278047623e-05, + "loss": 0.2312, "step": 8846 }, { - "epoch": 0.51, - "grad_norm": 0.34538918095313825, - "learning_rate": 1.0217709572554247e-05, - "loss": 0.3273, + "epoch": 0.41, + "grad_norm": 0.8188901394909537, + "learning_rate": 1.3445356497446637e-05, + "loss": 0.5686, "step": 8847 }, { - "epoch": 0.51, - "grad_norm": 0.1859650911764527, - "learning_rate": 1.0215849097258905e-05, - "loss": 0.0884, + "epoch": 0.41, + "grad_norm": 0.7270922676575237, + "learning_rate": 1.344395964056878e-05, + "loss": 0.4779, "step": 8848 }, { - "epoch": 0.51, - "grad_norm": 0.3703599398367777, - "learning_rate": 1.0213988614488721e-05, - "loss": 0.289, + "epoch": 0.41, + "grad_norm": 0.3851836188579967, + "learning_rate": 1.3442562707444977e-05, + "loss": 0.2294, "step": 8849 }, { - "epoch": 0.51, - "grad_norm": 0.4604316756843436, - "learning_rate": 1.0212128124308121e-05, - "loss": 0.3816, + "epoch": 0.41, + "grad_norm": 0.36486827087896073, + "learning_rate": 1.3441165698106151e-05, + "loss": 0.2903, "step": 8850 }, { - "epoch": 0.51, - "grad_norm": 0.38250000158150516, - "learning_rate": 1.0210267626781532e-05, - "loss": 0.2585, + "epoch": 0.41, + "grad_norm": 0.6918410527416364, + "learning_rate": 1.3439768612583235e-05, + "loss": 0.4341, "step": 8851 }, { - "epoch": 0.51, - "grad_norm": 0.350693722826386, - "learning_rate": 1.0208407121973383e-05, - "loss": 0.3005, + "epoch": 0.41, + "grad_norm": 0.20101754153820162, + "learning_rate": 1.3438371450907155e-05, + "loss": 0.15, "step": 8852 }, { - "epoch": 0.51, - "grad_norm": 0.7957509715262783, - "learning_rate": 1.0206546609948107e-05, - "loss": 0.4827, + "epoch": 0.41, + "grad_norm": 0.36068821915518895, + "learning_rate": 1.343697421310885e-05, + "loss": 0.3159, "step": 8853 }, { - "epoch": 0.51, - "grad_norm": 0.235842723340732, - "learning_rate": 1.020468609077013e-05, - "loss": 0.216, + "epoch": 0.41, + "grad_norm": 1.1394677951882988, + "learning_rate": 1.3435576899219243e-05, + "loss": 0.6246, "step": 8854 }, { - "epoch": 0.51, - "grad_norm": 0.30607668526376075, - "learning_rate": 1.0202825564503885e-05, - "loss": 0.2021, + "epoch": 0.41, + "grad_norm": 0.372099713828924, + "learning_rate": 1.3434179509269278e-05, + "loss": 0.2701, "step": 8855 }, { - "epoch": 0.51, - "grad_norm": 1.1314724300561396, - "learning_rate": 1.0200965031213795e-05, - "loss": 0.7834, + "epoch": 0.41, + "grad_norm": 0.7275006693515084, + "learning_rate": 1.3432782043289887e-05, + "loss": 0.3347, "step": 8856 }, { - "epoch": 0.51, - "grad_norm": 0.7721586850426669, - "learning_rate": 1.0199104490964296e-05, - "loss": 0.4185, + "epoch": 0.41, + "grad_norm": 0.3983435069008562, + "learning_rate": 1.343138450131201e-05, + "loss": 0.3555, "step": 8857 }, { - "epoch": 0.51, - "grad_norm": 0.3153541338301299, - "learning_rate": 1.0197243943819816e-05, - "loss": 0.2, + "epoch": 0.41, + "grad_norm": 0.28979993223117084, + "learning_rate": 1.342998688336659e-05, + "loss": 0.2268, "step": 8858 }, { - "epoch": 0.51, - "grad_norm": 0.3830402764051056, - "learning_rate": 1.0195383389844789e-05, - "loss": 0.3396, + "epoch": 0.41, + "grad_norm": 0.29095056498331384, + "learning_rate": 1.3428589189484564e-05, + "loss": 0.1237, "step": 8859 }, { - "epoch": 0.51, - "grad_norm": 0.2716412665480148, - "learning_rate": 1.0193522829103643e-05, - "loss": 0.183, + "epoch": 0.41, + "grad_norm": 0.6707278199042213, + "learning_rate": 1.3427191419696876e-05, + "loss": 0.4639, "step": 8860 }, { - "epoch": 0.51, - "grad_norm": 0.3956289122519081, - "learning_rate": 1.0191662261660809e-05, - "loss": 0.2267, + "epoch": 0.41, + "grad_norm": 0.36855426718374623, + "learning_rate": 1.3425793574034476e-05, + "loss": 0.2795, "step": 8861 }, { - "epoch": 0.51, - "grad_norm": 0.3719950768589626, - "learning_rate": 1.018980168758072e-05, - "loss": 0.3198, + "epoch": 0.41, + "grad_norm": 0.4087644984246095, + "learning_rate": 1.3424395652528308e-05, + "loss": 0.2953, "step": 8862 }, { - "epoch": 0.51, - "grad_norm": 1.073061393618599, - "learning_rate": 1.0187941106927803e-05, - "loss": 0.4103, + "epoch": 0.41, + "grad_norm": 0.82146841635332, + "learning_rate": 1.3422997655209318e-05, + "loss": 0.3998, "step": 8863 }, { - "epoch": 0.51, - "grad_norm": 0.3268498389256042, - "learning_rate": 1.0186080519766499e-05, - "loss": 0.2171, + "epoch": 0.41, + "grad_norm": 0.40874296998314763, + "learning_rate": 1.3421599582108462e-05, + "loss": 0.3036, "step": 8864 }, { - "epoch": 0.51, - "grad_norm": 0.932570603873562, - "learning_rate": 1.0184219926161229e-05, - "loss": 0.5862, + "epoch": 0.41, + "grad_norm": 0.2759018259216273, + "learning_rate": 1.342020143325669e-05, + "loss": 0.1836, "step": 8865 }, { - "epoch": 0.51, - "grad_norm": 0.2876069287491476, - "learning_rate": 1.0182359326176437e-05, - "loss": 0.2418, + "epoch": 0.41, + "grad_norm": 0.780031928076596, + "learning_rate": 1.3418803208684951e-05, + "loss": 0.4358, "step": 8866 }, { - "epoch": 0.51, - "grad_norm": 0.2760938844417518, - "learning_rate": 1.0180498719876546e-05, - "loss": 0.2243, + "epoch": 0.41, + "grad_norm": 0.360339998618353, + "learning_rate": 1.3417404908424207e-05, + "loss": 0.2532, "step": 8867 }, { - "epoch": 0.51, - "grad_norm": 0.9738896002035565, - "learning_rate": 1.0178638107325993e-05, - "loss": 0.5697, + "epoch": 0.41, + "grad_norm": 0.7480806614099489, + "learning_rate": 1.341600653250541e-05, + "loss": 0.3903, "step": 8868 }, { - "epoch": 0.51, - "grad_norm": 0.9192592820798557, - "learning_rate": 1.0176777488589206e-05, - "loss": 0.4044, + "epoch": 0.41, + "grad_norm": 0.3692848632557834, + "learning_rate": 1.3414608080959521e-05, + "loss": 0.2725, "step": 8869 }, { - "epoch": 0.51, - "grad_norm": 0.2846380947105442, - "learning_rate": 1.0174916863730628e-05, - "loss": 0.244, + "epoch": 0.41, + "grad_norm": 0.3499696672606069, + "learning_rate": 1.34132095538175e-05, + "loss": 0.2732, "step": 8870 }, { - "epoch": 0.51, - "grad_norm": 0.4158621955491559, - "learning_rate": 1.0173056232814684e-05, - "loss": 0.2808, + "epoch": 0.41, + "grad_norm": 0.30073652702981063, + "learning_rate": 1.3411810951110311e-05, + "loss": 0.2172, "step": 8871 }, { - "epoch": 0.51, - "grad_norm": 0.4635268387198839, - "learning_rate": 1.0171195595905811e-05, - "loss": 0.2996, + "epoch": 0.41, + "grad_norm": 1.0619183971128496, + "learning_rate": 1.3410412272868915e-05, + "loss": 0.4187, "step": 8872 }, { - "epoch": 0.51, - "grad_norm": 0.33203805455122837, - "learning_rate": 1.0169334953068442e-05, - "loss": 0.2639, + "epoch": 0.41, + "grad_norm": 0.4114597958579147, + "learning_rate": 1.340901351912428e-05, + "loss": 0.2948, "step": 8873 }, { - "epoch": 0.51, - "grad_norm": 0.3697048151949442, - "learning_rate": 1.0167474304367011e-05, - "loss": 0.2824, + "epoch": 0.41, + "grad_norm": 0.41833466660848806, + "learning_rate": 1.3407614689907368e-05, + "loss": 0.3419, "step": 8874 }, { - "epoch": 0.51, - "grad_norm": 0.4844233725817165, - "learning_rate": 1.0165613649865951e-05, - "loss": 0.3137, + "epoch": 0.41, + "grad_norm": 0.9584470255701635, + "learning_rate": 1.3406215785249153e-05, + "loss": 0.2469, "step": 8875 }, { - "epoch": 0.51, - "grad_norm": 0.3984242851912228, - "learning_rate": 1.0163752989629698e-05, - "loss": 0.2471, + "epoch": 0.41, + "grad_norm": 0.28298234149738544, + "learning_rate": 1.3404816805180603e-05, + "loss": 0.2226, "step": 8876 }, { - "epoch": 0.51, - "grad_norm": 0.47180498037132856, - "learning_rate": 1.0161892323722684e-05, - "loss": 0.274, + "epoch": 0.41, + "grad_norm": 0.4766201009716676, + "learning_rate": 1.3403417749732693e-05, + "loss": 0.2939, "step": 8877 }, { - "epoch": 0.51, - "grad_norm": 0.3669597020882024, - "learning_rate": 1.0160031652209348e-05, - "loss": 0.2947, + "epoch": 0.41, + "grad_norm": 0.4622338815087517, + "learning_rate": 1.340201861893639e-05, + "loss": 0.2867, "step": 8878 }, { - "epoch": 0.51, - "grad_norm": 0.386735586286777, - "learning_rate": 1.0158170975154121e-05, - "loss": 0.2804, + "epoch": 0.41, + "grad_norm": 0.3794872248387296, + "learning_rate": 1.3400619412822675e-05, + "loss": 0.2809, "step": 8879 }, { - "epoch": 0.51, - "grad_norm": 0.472778223664258, - "learning_rate": 1.015631029262144e-05, - "loss": 0.3634, + "epoch": 0.41, + "grad_norm": 0.6966273280311868, + "learning_rate": 1.3399220131422524e-05, + "loss": 0.4611, "step": 8880 }, { - "epoch": 0.51, - "grad_norm": 0.31498560183851326, - "learning_rate": 1.0154449604675745e-05, - "loss": 0.2056, + "epoch": 0.41, + "grad_norm": 0.43061467780981344, + "learning_rate": 1.3397820774766917e-05, + "loss": 0.3098, "step": 8881 }, { - "epoch": 0.51, - "grad_norm": 0.31106123240309985, - "learning_rate": 1.015258891138146e-05, - "loss": 0.2597, + "epoch": 0.41, + "grad_norm": 0.3274878429252231, + "learning_rate": 1.3396421342886832e-05, + "loss": 0.1881, "step": 8882 }, { - "epoch": 0.51, - "grad_norm": 0.32163140709549864, - "learning_rate": 1.0150728212803034e-05, - "loss": 0.248, + "epoch": 0.41, + "grad_norm": 0.4954802742931216, + "learning_rate": 1.3395021835813251e-05, + "loss": 0.3005, "step": 8883 }, { - "epoch": 0.51, - "grad_norm": 1.0163468760891674, - "learning_rate": 1.0148867509004892e-05, - "loss": 0.3417, + "epoch": 0.41, + "grad_norm": 0.6593296652337535, + "learning_rate": 1.3393622253577158e-05, + "loss": 0.4505, "step": 8884 }, { - "epoch": 0.51, - "grad_norm": 0.3535521516167228, - "learning_rate": 1.0147006800051475e-05, - "loss": 0.273, + "epoch": 0.41, + "grad_norm": 0.30096762982435143, + "learning_rate": 1.3392222596209541e-05, + "loss": 0.195, "step": 8885 }, { - "epoch": 0.51, - "grad_norm": 0.32974085632081673, - "learning_rate": 1.0145146086007219e-05, - "loss": 0.3063, + "epoch": 0.41, + "grad_norm": 0.3007338612402302, + "learning_rate": 1.3390822863741384e-05, + "loss": 0.2582, "step": 8886 }, { - "epoch": 0.51, - "grad_norm": 0.37848673394257054, - "learning_rate": 1.0143285366936562e-05, - "loss": 0.2336, + "epoch": 0.41, + "grad_norm": 1.0239192141914364, + "learning_rate": 1.3389423056203679e-05, + "loss": 0.5669, "step": 8887 }, { - "epoch": 0.51, - "grad_norm": 0.27078939239282207, - "learning_rate": 1.0141424642903936e-05, - "loss": 0.208, + "epoch": 0.41, + "grad_norm": 0.34995993379880824, + "learning_rate": 1.3388023173627413e-05, + "loss": 0.1901, "step": 8888 }, { - "epoch": 0.51, - "grad_norm": 1.0221622920030797, - "learning_rate": 1.0139563913973787e-05, - "loss": 0.5142, + "epoch": 0.41, + "grad_norm": 0.319735617535886, + "learning_rate": 1.338662321604358e-05, + "loss": 0.2712, "step": 8889 }, { - "epoch": 0.51, - "grad_norm": 0.34970550608599016, - "learning_rate": 1.0137703180210538e-05, - "loss": 0.2834, + "epoch": 0.41, + "grad_norm": 1.4907212178380154, + "learning_rate": 1.3385223183483169e-05, + "loss": 0.5113, "step": 8890 }, { - "epoch": 0.51, - "grad_norm": 0.35189717797611997, - "learning_rate": 1.0135842441678639e-05, - "loss": 0.2754, + "epoch": 0.41, + "grad_norm": 0.38749623322000315, + "learning_rate": 1.3383823075977185e-05, + "loss": 0.2429, "step": 8891 }, { - "epoch": 0.51, - "grad_norm": 0.6613352038736564, - "learning_rate": 1.0133981698442519e-05, - "loss": 0.411, + "epoch": 0.41, + "grad_norm": 0.45320028208011, + "learning_rate": 1.3382422893556617e-05, + "loss": 0.3141, "step": 8892 }, { - "epoch": 0.51, - "grad_norm": 0.49333120019293597, - "learning_rate": 1.013212095056662e-05, - "loss": 0.4239, + "epoch": 0.41, + "grad_norm": 0.4131838699877759, + "learning_rate": 1.3381022636252466e-05, + "loss": 0.3442, "step": 8893 }, { - "epoch": 0.51, - "grad_norm": 0.2691281560347184, - "learning_rate": 1.0130260198115376e-05, - "loss": 0.2087, + "epoch": 0.41, + "grad_norm": 0.42251298101776746, + "learning_rate": 1.3379622304095734e-05, + "loss": 0.2637, "step": 8894 }, { - "epoch": 0.51, - "grad_norm": 0.25133156816965974, - "learning_rate": 1.012839944115323e-05, - "loss": 0.1896, + "epoch": 0.41, + "grad_norm": 1.0288784137594331, + "learning_rate": 1.337822189711742e-05, + "loss": 0.4013, "step": 8895 }, { - "epoch": 0.51, - "grad_norm": 0.644446483143422, - "learning_rate": 1.0126538679744615e-05, - "loss": 0.4346, + "epoch": 0.41, + "grad_norm": 0.9357166800909519, + "learning_rate": 1.337682141534853e-05, + "loss": 0.4471, "step": 8896 }, { - "epoch": 0.51, - "grad_norm": 0.31080535051286085, - "learning_rate": 1.0124677913953971e-05, - "loss": 0.2094, + "epoch": 0.41, + "grad_norm": 0.2971423875894324, + "learning_rate": 1.3375420858820067e-05, + "loss": 0.2584, "step": 8897 }, { - "epoch": 0.51, - "grad_norm": 0.2925056576358622, - "learning_rate": 1.0122817143845736e-05, - "loss": 0.29, + "epoch": 0.41, + "grad_norm": 0.23648286514364847, + "learning_rate": 1.337402022756304e-05, + "loss": 0.1466, "step": 8898 }, { - "epoch": 0.51, - "grad_norm": 1.1700861136940273, - "learning_rate": 1.0120956369484352e-05, - "loss": 0.8421, + "epoch": 0.41, + "grad_norm": 1.3099631984023072, + "learning_rate": 1.3372619521608459e-05, + "loss": 0.5497, "step": 8899 }, { - "epoch": 0.51, - "grad_norm": 0.18646929013358124, - "learning_rate": 1.011909559093425e-05, - "loss": 0.1259, + "epoch": 0.41, + "grad_norm": 0.44831279957872217, + "learning_rate": 1.3371218740987334e-05, + "loss": 0.2918, "step": 8900 }, { - "epoch": 0.51, - "grad_norm": 0.5031315658177309, - "learning_rate": 1.0117234808259875e-05, - "loss": 0.3329, + "epoch": 0.41, + "grad_norm": 0.344533256630212, + "learning_rate": 1.3369817885730667e-05, + "loss": 0.2547, "step": 8901 }, { - "epoch": 0.51, - "grad_norm": 0.3837696227726515, - "learning_rate": 1.0115374021525664e-05, - "loss": 0.2908, + "epoch": 0.41, + "grad_norm": 0.9313573102771588, + "learning_rate": 1.3368416955869487e-05, + "loss": 0.4577, "step": 8902 }, { - "epoch": 0.51, - "grad_norm": 0.3224777264779916, - "learning_rate": 1.0113513230796052e-05, - "loss": 0.2378, + "epoch": 0.41, + "grad_norm": 0.4081045282033635, + "learning_rate": 1.3367015951434798e-05, + "loss": 0.2677, "step": 8903 }, { - "epoch": 0.51, - "grad_norm": 0.7486941413631881, - "learning_rate": 1.0111652436135486e-05, - "loss": 0.4879, + "epoch": 0.41, + "grad_norm": 0.2693074964926061, + "learning_rate": 1.3365614872457627e-05, + "loss": 0.1497, "step": 8904 }, { - "epoch": 0.51, - "grad_norm": 0.4577781436154072, - "learning_rate": 1.01097916376084e-05, - "loss": 0.4012, + "epoch": 0.41, + "grad_norm": 0.4084497218868415, + "learning_rate": 1.3364213718968981e-05, + "loss": 0.3142, "step": 8905 }, { - "epoch": 0.51, - "grad_norm": 0.3227090641491666, - "learning_rate": 1.0107930835279234e-05, - "loss": 0.2838, + "epoch": 0.41, + "grad_norm": 0.45328938099614763, + "learning_rate": 1.3362812490999888e-05, + "loss": 0.3199, "step": 8906 }, { - "epoch": 0.51, - "grad_norm": 0.25374936217959587, - "learning_rate": 1.0106070029212424e-05, - "loss": 0.1262, + "epoch": 0.41, + "grad_norm": 0.5529362532730133, + "learning_rate": 1.3361411188581368e-05, + "loss": 0.3945, "step": 8907 }, { - "epoch": 0.51, - "grad_norm": 0.6524870029201337, - "learning_rate": 1.0104209219472418e-05, - "loss": 0.3958, + "epoch": 0.41, + "grad_norm": 0.9329231184712226, + "learning_rate": 1.3360009811744444e-05, + "loss": 0.3928, "step": 8908 }, { - "epoch": 0.51, - "grad_norm": 0.37586082968520385, - "learning_rate": 1.010234840612365e-05, - "loss": 0.2897, + "epoch": 0.41, + "grad_norm": 0.3042853210492065, + "learning_rate": 1.3358608360520138e-05, + "loss": 0.2768, "step": 8909 }, { - "epoch": 0.51, - "grad_norm": 0.3307571873747576, - "learning_rate": 1.010048758923056e-05, - "loss": 0.2648, + "epoch": 0.41, + "grad_norm": 0.25993578280891794, + "learning_rate": 1.3357206834939483e-05, + "loss": 0.2158, "step": 8910 }, { - "epoch": 0.51, - "grad_norm": 0.8419980738165997, - "learning_rate": 1.0098626768857591e-05, - "loss": 0.6101, + "epoch": 0.41, + "grad_norm": 1.1308517700880316, + "learning_rate": 1.3355805235033503e-05, + "loss": 0.2575, "step": 8911 }, { - "epoch": 0.51, - "grad_norm": 0.6204223015862753, - "learning_rate": 1.009676594506918e-05, - "loss": 0.3407, + "epoch": 0.41, + "grad_norm": 0.42482511897671343, + "learning_rate": 1.3354403560833232e-05, + "loss": 0.305, "step": 8912 }, { - "epoch": 0.51, - "grad_norm": 0.28043986557667766, - "learning_rate": 1.0094905117929767e-05, - "loss": 0.1755, + "epoch": 0.41, + "grad_norm": 0.4338363460367922, + "learning_rate": 1.3353001812369696e-05, + "loss": 0.317, "step": 8913 }, { - "epoch": 0.51, - "grad_norm": 0.35706107090418515, - "learning_rate": 1.0093044287503797e-05, - "loss": 0.3173, + "epoch": 0.41, + "grad_norm": 0.7129310475429949, + "learning_rate": 1.3351599989673934e-05, + "loss": 0.333, "step": 8914 }, { - "epoch": 0.51, - "grad_norm": 0.564795786408195, - "learning_rate": 1.0091183453855706e-05, - "loss": 0.2933, + "epoch": 0.41, + "grad_norm": 0.35652947805842033, + "learning_rate": 1.3350198092776977e-05, + "loss": 0.2611, "step": 8915 }, { - "epoch": 0.51, - "grad_norm": 0.37689587316033274, - "learning_rate": 1.0089322617049936e-05, - "loss": 0.3088, + "epoch": 0.41, + "grad_norm": 0.43798474629042206, + "learning_rate": 1.3348796121709862e-05, + "loss": 0.2925, "step": 8916 }, { - "epoch": 0.51, - "grad_norm": 0.4513654962243863, - "learning_rate": 1.0087461777150926e-05, - "loss": 0.2704, + "epoch": 0.41, + "grad_norm": 0.3153409776203602, + "learning_rate": 1.334739407650363e-05, + "loss": 0.2183, "step": 8917 }, { - "epoch": 0.51, - "grad_norm": 0.3927024710302205, - "learning_rate": 1.0085600934223121e-05, - "loss": 0.2996, + "epoch": 0.41, + "grad_norm": 0.40619524857895895, + "learning_rate": 1.3345991957189322e-05, + "loss": 0.2589, "step": 8918 }, { - "epoch": 0.51, - "grad_norm": 0.26729943984490195, - "learning_rate": 1.008374008833096e-05, - "loss": 0.2113, + "epoch": 0.41, + "grad_norm": 0.5089885280562814, + "learning_rate": 1.3344589763797973e-05, + "loss": 0.3426, "step": 8919 }, { - "epoch": 0.51, - "grad_norm": 0.6478463082866031, - "learning_rate": 1.0081879239538881e-05, - "loss": 0.332, + "epoch": 0.41, + "grad_norm": 0.5057943231440046, + "learning_rate": 1.3343187496360632e-05, + "loss": 0.3982, "step": 8920 }, { - "epoch": 0.51, - "grad_norm": 0.3996713800799195, - "learning_rate": 1.0080018387911328e-05, - "loss": 0.2707, + "epoch": 0.41, + "grad_norm": 0.3578569427712817, + "learning_rate": 1.3341785154908342e-05, + "loss": 0.2208, "step": 8921 }, { - "epoch": 0.51, - "grad_norm": 0.3221994879121444, - "learning_rate": 1.0078157533512742e-05, - "loss": 0.3001, + "epoch": 0.41, + "grad_norm": 0.33046451908412405, + "learning_rate": 1.334038273947215e-05, + "loss": 0.2296, "step": 8922 }, { - "epoch": 0.51, - "grad_norm": 0.8538514540552568, - "learning_rate": 1.0076296676407565e-05, - "loss": 0.404, + "epoch": 0.41, + "grad_norm": 0.4452025283139508, + "learning_rate": 1.3338980250083102e-05, + "loss": 0.294, "step": 8923 }, { - "epoch": 0.51, - "grad_norm": 0.41161279723696287, - "learning_rate": 1.0074435816660235e-05, - "loss": 0.2951, + "epoch": 0.41, + "grad_norm": 0.3848990428093696, + "learning_rate": 1.3337577686772252e-05, + "loss": 0.215, "step": 8924 }, { - "epoch": 0.51, - "grad_norm": 0.35715732608291045, - "learning_rate": 1.00725749543352e-05, - "loss": 0.2687, + "epoch": 0.41, + "grad_norm": 0.2971459010138684, + "learning_rate": 1.3336175049570646e-05, + "loss": 0.3198, "step": 8925 }, { - "epoch": 0.51, - "grad_norm": 0.3368658460212253, - "learning_rate": 1.0070714089496891e-05, - "loss": 0.2479, + "epoch": 0.41, + "grad_norm": 0.8949721688818129, + "learning_rate": 1.3334772338509341e-05, + "loss": 0.5932, "step": 8926 }, { - "epoch": 0.51, - "grad_norm": 0.44449844022617024, - "learning_rate": 1.006885322220976e-05, - "loss": 0.3089, + "epoch": 0.41, + "grad_norm": 0.3556011302631098, + "learning_rate": 1.3333369553619388e-05, + "loss": 0.2007, "step": 8927 }, { - "epoch": 0.51, - "grad_norm": 0.5794718664509232, - "learning_rate": 1.0066992352538245e-05, - "loss": 0.3989, + "epoch": 0.41, + "grad_norm": 0.2767542376674371, + "learning_rate": 1.333196669493185e-05, + "loss": 0.2055, "step": 8928 }, { - "epoch": 0.51, - "grad_norm": 0.40439067942242224, - "learning_rate": 1.0065131480546788e-05, - "loss": 0.327, + "epoch": 0.41, + "grad_norm": 0.3832328229036367, + "learning_rate": 1.333056376247778e-05, + "loss": 0.3078, "step": 8929 }, { - "epoch": 0.51, - "grad_norm": 0.7094782461959981, - "learning_rate": 1.006327060629983e-05, - "loss": 0.2071, + "epoch": 0.41, + "grad_norm": 0.3833514183119835, + "learning_rate": 1.3329160756288237e-05, + "loss": 0.2567, "step": 8930 }, { - "epoch": 0.51, - "grad_norm": 0.32681962706815443, - "learning_rate": 1.0061409729861814e-05, - "loss": 0.3041, + "epoch": 0.41, + "grad_norm": 0.4874578743272871, + "learning_rate": 1.3327757676394284e-05, + "loss": 0.3634, "step": 8931 }, { - "epoch": 0.51, - "grad_norm": 0.29148296068996754, - "learning_rate": 1.0059548851297178e-05, - "loss": 0.2023, + "epoch": 0.41, + "grad_norm": 1.1772221293806329, + "learning_rate": 1.3326354522826983e-05, + "loss": 0.7295, "step": 8932 }, { - "epoch": 0.51, - "grad_norm": 0.3429142652730741, - "learning_rate": 1.0057687970670372e-05, - "loss": 0.2153, + "epoch": 0.41, + "grad_norm": 0.3060047758006496, + "learning_rate": 1.3324951295617398e-05, + "loss": 0.2648, "step": 8933 }, { - "epoch": 0.51, - "grad_norm": 0.34103484654619365, - "learning_rate": 1.005582708804583e-05, - "loss": 0.2975, + "epoch": 0.41, + "grad_norm": 0.4279310065372146, + "learning_rate": 1.3323547994796597e-05, + "loss": 0.2496, "step": 8934 }, { - "epoch": 0.51, - "grad_norm": 1.0197996496950272, - "learning_rate": 1.0053966203488003e-05, - "loss": 0.677, + "epoch": 0.41, + "grad_norm": 0.4266465051863254, + "learning_rate": 1.3322144620395648e-05, + "loss": 0.2212, "step": 8935 }, { - "epoch": 0.51, - "grad_norm": 0.36527832406679867, - "learning_rate": 1.0052105317061327e-05, - "loss": 0.182, + "epoch": 0.41, + "grad_norm": 0.5185478312006149, + "learning_rate": 1.3320741172445616e-05, + "loss": 0.3454, "step": 8936 }, { - "epoch": 0.51, - "grad_norm": 0.29033341390121514, - "learning_rate": 1.0050244428830246e-05, - "loss": 0.2933, + "epoch": 0.41, + "grad_norm": 0.3129177121334974, + "learning_rate": 1.3319337650977579e-05, + "loss": 0.2555, "step": 8937 }, { - "epoch": 0.51, - "grad_norm": 0.42647799922546814, - "learning_rate": 1.0048383538859202e-05, - "loss": 0.354, + "epoch": 0.41, + "grad_norm": 1.3915010247032102, + "learning_rate": 1.3317934056022603e-05, + "loss": 0.8919, "step": 8938 }, { - "epoch": 0.51, - "grad_norm": 0.1574000968867463, - "learning_rate": 1.0046522647212642e-05, - "loss": 0.087, + "epoch": 0.41, + "grad_norm": 0.8352484302333893, + "learning_rate": 1.3316530387611766e-05, + "loss": 0.4441, "step": 8939 }, { - "epoch": 0.51, - "grad_norm": 0.41188216839380715, - "learning_rate": 1.0044661753955001e-05, - "loss": 0.3561, + "epoch": 0.41, + "grad_norm": 0.2898286228083498, + "learning_rate": 1.331512664577614e-05, + "loss": 0.1757, "step": 8940 }, { - "epoch": 0.51, - "grad_norm": 0.4381041332600563, - "learning_rate": 1.0042800859150726e-05, - "loss": 0.3384, + "epoch": 0.41, + "grad_norm": 0.3249272638365113, + "learning_rate": 1.331372283054681e-05, + "loss": 0.2975, "step": 8941 }, { - "epoch": 0.51, - "grad_norm": 0.3689628610234497, - "learning_rate": 1.0040939962864258e-05, - "loss": 0.311, + "epoch": 0.41, + "grad_norm": 0.6108816346014615, + "learning_rate": 1.3312318941954846e-05, + "loss": 0.3701, "step": 8942 }, { - "epoch": 0.51, - "grad_norm": 0.3731679048957679, - "learning_rate": 1.0039079065160042e-05, - "loss": 0.2616, + "epoch": 0.41, + "grad_norm": 0.3740221654399229, + "learning_rate": 1.3310914980031335e-05, + "loss": 0.234, "step": 8943 }, { - "epoch": 0.51, - "grad_norm": 0.41815692396401, - "learning_rate": 1.0037218166102518e-05, - "loss": 0.2991, + "epoch": 0.41, + "grad_norm": 1.3205964361097111, + "learning_rate": 1.3309510944807355e-05, + "loss": 0.8254, "step": 8944 }, { - "epoch": 0.51, - "grad_norm": 0.2680388741950821, - "learning_rate": 1.0035357265756134e-05, - "loss": 0.2294, + "epoch": 0.41, + "grad_norm": 0.341928256271378, + "learning_rate": 1.3308106836313996e-05, + "loss": 0.2684, "step": 8945 }, { - "epoch": 0.51, - "grad_norm": 0.3967633759634121, - "learning_rate": 1.003349636418533e-05, - "loss": 0.2559, + "epoch": 0.41, + "grad_norm": 0.4637535038594764, + "learning_rate": 1.330670265458234e-05, + "loss": 0.3169, "step": 8946 }, { - "epoch": 0.51, - "grad_norm": 0.7089779370160822, - "learning_rate": 1.0031635461454544e-05, - "loss": 0.5292, + "epoch": 0.41, + "grad_norm": 0.8777589709084394, + "learning_rate": 1.3305298399643474e-05, + "loss": 0.362, "step": 8947 }, { - "epoch": 0.51, - "grad_norm": 0.8142086961246247, - "learning_rate": 1.0029774557628224e-05, - "loss": 0.3636, + "epoch": 0.41, + "grad_norm": 0.28814130025307433, + "learning_rate": 1.3303894071528485e-05, + "loss": 0.2245, "step": 8948 }, { - "epoch": 0.51, - "grad_norm": 0.3104295700200426, - "learning_rate": 1.0027913652770813e-05, - "loss": 0.2351, + "epoch": 0.41, + "grad_norm": 0.31895025537374105, + "learning_rate": 1.3302489670268466e-05, + "loss": 0.2438, "step": 8949 }, { - "epoch": 0.51, - "grad_norm": 0.3667967386304453, - "learning_rate": 1.0026052746946756e-05, - "loss": 0.2733, + "epoch": 0.41, + "grad_norm": 0.9849895134683403, + "learning_rate": 1.3301085195894507e-05, + "loss": 0.5536, "step": 8950 }, { - "epoch": 0.51, - "grad_norm": 0.45760292541987996, - "learning_rate": 1.002419184022049e-05, - "loss": 0.282, + "epoch": 0.41, + "grad_norm": 0.6518251820710435, + "learning_rate": 1.3299680648437707e-05, + "loss": 0.3751, "step": 8951 }, { - "epoch": 0.51, - "grad_norm": 0.2975732477040278, - "learning_rate": 1.0022330932656463e-05, - "loss": 0.2097, + "epoch": 0.41, + "grad_norm": 0.4527310430043478, + "learning_rate": 1.3298276027929158e-05, + "loss": 0.3278, "step": 8952 }, { - "epoch": 0.51, - "grad_norm": 0.36123061495278136, - "learning_rate": 1.0020470024319115e-05, - "loss": 0.2913, + "epoch": 0.41, + "grad_norm": 0.5137272781195599, + "learning_rate": 1.3296871334399955e-05, + "loss": 0.2968, "step": 8953 }, { - "epoch": 0.51, - "grad_norm": 0.6903017856807934, - "learning_rate": 1.0018609115272896e-05, - "loss": 0.3646, + "epoch": 0.41, + "grad_norm": 0.2697688705554102, + "learning_rate": 1.3295466567881198e-05, + "loss": 0.169, "step": 8954 }, { - "epoch": 0.51, - "grad_norm": 0.34900373266645707, - "learning_rate": 1.0016748205582238e-05, - "loss": 0.2853, + "epoch": 0.41, + "grad_norm": 0.40870246536529564, + "learning_rate": 1.3294061728403986e-05, + "loss": 0.2903, "step": 8955 }, { - "epoch": 0.51, - "grad_norm": 0.6639895995416654, - "learning_rate": 1.0014887295311595e-05, - "loss": 0.3339, + "epoch": 0.41, + "grad_norm": 0.6238460702430969, + "learning_rate": 1.3292656815999426e-05, + "loss": 0.4113, "step": 8956 }, { - "epoch": 0.51, - "grad_norm": 0.21779948373374317, - "learning_rate": 1.0013026384525404e-05, - "loss": 0.2023, + "epoch": 0.41, + "grad_norm": 0.4140332092046376, + "learning_rate": 1.3291251830698615e-05, + "loss": 0.242, "step": 8957 }, { - "epoch": 0.51, - "grad_norm": 0.3989690662409945, - "learning_rate": 1.0011165473288108e-05, - "loss": 0.3183, + "epoch": 0.41, + "grad_norm": 0.41323845411892096, + "learning_rate": 1.3289846772532663e-05, + "loss": 0.3406, "step": 8958 }, { - "epoch": 0.51, - "grad_norm": 0.9027589397240701, - "learning_rate": 1.0009304561664154e-05, - "loss": 0.359, + "epoch": 0.41, + "grad_norm": 0.6255248437792479, + "learning_rate": 1.328844164153267e-05, + "loss": 0.4566, "step": 8959 }, { - "epoch": 0.51, - "grad_norm": 0.6025413935849674, - "learning_rate": 1.0007443649717985e-05, - "loss": 0.3867, + "epoch": 0.41, + "grad_norm": 0.2921177105939241, + "learning_rate": 1.3287036437729753e-05, + "loss": 0.1713, "step": 8960 }, { - "epoch": 0.51, - "grad_norm": 0.3535480063313833, - "learning_rate": 1.0005582737514039e-05, - "loss": 0.2922, + "epoch": 0.41, + "grad_norm": 0.27839447788952537, + "learning_rate": 1.3285631161155013e-05, + "loss": 0.2628, "step": 8961 }, { - "epoch": 0.51, - "grad_norm": 0.34940809513436644, - "learning_rate": 1.0003721825116766e-05, - "loss": 0.2565, + "epoch": 0.41, + "grad_norm": 1.2975321286625572, + "learning_rate": 1.3284225811839568e-05, + "loss": 0.8504, "step": 8962 }, { - "epoch": 0.51, - "grad_norm": 0.23572550353217794, - "learning_rate": 1.0001860912590604e-05, - "loss": 0.1681, + "epoch": 0.41, + "grad_norm": 0.521140196841499, + "learning_rate": 1.3282820389814527e-05, + "loss": 0.2734, "step": 8963 }, { - "epoch": 0.52, - "grad_norm": 0.4371986239915237, - "learning_rate": 1e-05, - "loss": 0.3286, + "epoch": 0.41, + "grad_norm": 0.3842503400147209, + "learning_rate": 1.3281414895111011e-05, + "loss": 0.2814, "step": 8964 }, { - "epoch": 0.52, - "grad_norm": 0.5882681268821435, - "learning_rate": 9.998139087409399e-06, - "loss": 0.3003, + "epoch": 0.41, + "grad_norm": 0.5213603419034515, + "learning_rate": 1.3280009327760129e-05, + "loss": 0.3707, "step": 8965 }, { - "epoch": 0.52, - "grad_norm": 0.998562898170738, - "learning_rate": 9.996278174883236e-06, - "loss": 0.4324, + "epoch": 0.41, + "grad_norm": 0.4221994415334029, + "learning_rate": 1.3278603687793003e-05, + "loss": 0.1708, "step": 8966 }, { - "epoch": 0.52, - "grad_norm": 0.32535718394732693, - "learning_rate": 9.994417262485963e-06, - "loss": 0.2808, + "epoch": 0.41, + "grad_norm": 0.3107497415006731, + "learning_rate": 1.327719797524075e-05, + "loss": 0.2412, "step": 8967 }, { - "epoch": 0.52, - "grad_norm": 0.8845442812310932, - "learning_rate": 9.992556350282018e-06, - "loss": 0.5168, + "epoch": 0.41, + "grad_norm": 0.5167556919738907, + "learning_rate": 1.3275792190134493e-05, + "loss": 0.389, "step": 8968 }, { - "epoch": 0.52, - "grad_norm": 0.2605945618913011, - "learning_rate": 9.990695438335847e-06, - "loss": 0.2, + "epoch": 0.41, + "grad_norm": 0.35225293364639143, + "learning_rate": 1.3274386332505353e-05, + "loss": 0.2446, "step": 8969 }, { - "epoch": 0.52, - "grad_norm": 0.3912375752348602, - "learning_rate": 9.988834526711893e-06, - "loss": 0.267, + "epoch": 0.41, + "grad_norm": 0.3991350944388709, + "learning_rate": 1.3272980402384459e-05, + "loss": 0.2457, "step": 8970 }, { - "epoch": 0.52, - "grad_norm": 0.8202549742402676, - "learning_rate": 9.9869736154746e-06, - "loss": 0.4242, + "epoch": 0.41, + "grad_norm": 0.6628228444375425, + "learning_rate": 1.3271574399802931e-05, + "loss": 0.4298, "step": 8971 }, { - "epoch": 0.52, - "grad_norm": 0.2838554077787522, - "learning_rate": 9.985112704688406e-06, - "loss": 0.1891, + "epoch": 0.41, + "grad_norm": 0.3757419282214292, + "learning_rate": 1.3270168324791896e-05, + "loss": 0.3108, "step": 8972 }, { - "epoch": 0.52, - "grad_norm": 0.2958047596139349, - "learning_rate": 9.983251794417763e-06, - "loss": 0.2796, + "epoch": 0.41, + "grad_norm": 0.25862282789655244, + "learning_rate": 1.3268762177382492e-05, + "loss": 0.1749, "step": 8973 }, { - "epoch": 0.52, - "grad_norm": 1.146356366991916, - "learning_rate": 9.981390884727106e-06, - "loss": 0.6581, + "epoch": 0.41, + "grad_norm": 0.5382103070163592, + "learning_rate": 1.3267355957605839e-05, + "loss": 0.3864, "step": 8974 }, { - "epoch": 0.52, - "grad_norm": 0.4452802708748933, - "learning_rate": 9.979529975680885e-06, - "loss": 0.2432, + "epoch": 0.41, + "grad_norm": 0.9606969651938579, + "learning_rate": 1.3265949665493077e-05, + "loss": 0.4271, "step": 8975 }, { - "epoch": 0.52, - "grad_norm": 0.38232368072779344, - "learning_rate": 9.977669067343537e-06, - "loss": 0.3037, + "epoch": 0.41, + "grad_norm": 0.30954799608278394, + "learning_rate": 1.3264543301075336e-05, + "loss": 0.2379, "step": 8976 }, { - "epoch": 0.52, - "grad_norm": 0.3709480557645404, - "learning_rate": 9.975808159779512e-06, - "loss": 0.2857, + "epoch": 0.41, + "grad_norm": 0.4569552160956904, + "learning_rate": 1.3263136864383755e-05, + "loss": 0.3555, "step": 8977 }, { - "epoch": 0.52, - "grad_norm": 0.2848080628979088, - "learning_rate": 9.973947253053248e-06, - "loss": 0.1488, + "epoch": 0.41, + "grad_norm": 1.2361221481596918, + "learning_rate": 1.3261730355449464e-05, + "loss": 0.6905, "step": 8978 }, { - "epoch": 0.52, - "grad_norm": 0.36194839241988114, - "learning_rate": 9.972086347229187e-06, - "loss": 0.3028, + "epoch": 0.41, + "grad_norm": 0.32699749434924574, + "learning_rate": 1.3260323774303612e-05, + "loss": 0.225, "step": 8979 }, { - "epoch": 0.52, - "grad_norm": 0.9172902205994092, - "learning_rate": 9.970225442371778e-06, - "loss": 0.5909, + "epoch": 0.41, + "grad_norm": 0.5436810652585612, + "learning_rate": 1.3258917120977327e-05, + "loss": 0.4361, "step": 8980 }, { - "epoch": 0.52, - "grad_norm": 0.3406603301566798, - "learning_rate": 9.968364538545461e-06, - "loss": 0.3361, + "epoch": 0.41, + "grad_norm": 0.41270259371223267, + "learning_rate": 1.3257510395501766e-05, + "loss": 0.2856, "step": 8981 }, { - "epoch": 0.52, - "grad_norm": 0.3044827668809311, - "learning_rate": 9.966503635814677e-06, - "loss": 0.2007, + "epoch": 0.41, + "grad_norm": 0.328897229364264, + "learning_rate": 1.325610359790806e-05, + "loss": 0.2705, "step": 8982 }, { - "epoch": 0.52, - "grad_norm": 0.5498909403562776, - "learning_rate": 9.96464273424387e-06, - "loss": 0.4395, + "epoch": 0.41, + "grad_norm": 0.31606888868494354, + "learning_rate": 1.325469672822736e-05, + "loss": 0.1124, "step": 8983 }, { - "epoch": 0.52, - "grad_norm": 0.2707344192696009, - "learning_rate": 9.962781833897484e-06, - "loss": 0.2111, + "epoch": 0.41, + "grad_norm": 0.4166250327652007, + "learning_rate": 1.3253289786490812e-05, + "loss": 0.3023, "step": 8984 }, { - "epoch": 0.52, - "grad_norm": 0.27766021070239455, - "learning_rate": 9.960920934839963e-06, - "loss": 0.2124, + "epoch": 0.41, + "grad_norm": 0.3704296370329955, + "learning_rate": 1.325188277272956e-05, + "loss": 0.2855, "step": 8985 }, { - "epoch": 0.52, - "grad_norm": 1.0646904612166812, - "learning_rate": 9.959060037135745e-06, - "loss": 0.5268, + "epoch": 0.41, + "grad_norm": 0.8221563950085035, + "learning_rate": 1.3250475686974762e-05, + "loss": 0.3869, "step": 8986 }, { - "epoch": 0.52, - "grad_norm": 0.7117116899008378, - "learning_rate": 9.95719914084928e-06, - "loss": 0.4817, + "epoch": 0.41, + "grad_norm": 0.4896714148057406, + "learning_rate": 1.3249068529257562e-05, + "loss": 0.295, "step": 8987 }, { - "epoch": 0.52, - "grad_norm": 0.2762098719031825, - "learning_rate": 9.955338246045004e-06, - "loss": 0.2282, + "epoch": 0.41, + "grad_norm": 0.38847287241181544, + "learning_rate": 1.324766129960912e-05, + "loss": 0.3066, "step": 8988 }, { - "epoch": 0.52, - "grad_norm": 0.3723392795742885, - "learning_rate": 9.953477352787363e-06, - "loss": 0.3297, + "epoch": 0.41, + "grad_norm": 0.26426543681400977, + "learning_rate": 1.3246253998060584e-05, + "loss": 0.2125, "step": 8989 }, { - "epoch": 0.52, - "grad_norm": 0.4044795205958676, - "learning_rate": 9.9516164611408e-06, - "loss": 0.2681, + "epoch": 0.41, + "grad_norm": 0.7794595006630068, + "learning_rate": 1.3244846624643115e-05, + "loss": 0.4922, "step": 8990 }, { - "epoch": 0.52, - "grad_norm": 0.2391132673895818, - "learning_rate": 9.949755571169757e-06, - "loss": 0.2128, + "epoch": 0.41, + "grad_norm": 0.3946328849607798, + "learning_rate": 1.3243439179387867e-05, + "loss": 0.2861, "step": 8991 }, { - "epoch": 0.52, - "grad_norm": 0.9771484815283424, - "learning_rate": 9.947894682938676e-06, - "loss": 0.2187, + "epoch": 0.41, + "grad_norm": 0.4065140732351681, + "learning_rate": 1.3242031662326003e-05, + "loss": 0.2953, "step": 8992 }, { - "epoch": 0.52, - "grad_norm": 0.3742803679785533, - "learning_rate": 9.946033796511999e-06, - "loss": 0.3154, + "epoch": 0.41, + "grad_norm": 0.7482514676758332, + "learning_rate": 1.324062407348868e-05, + "loss": 0.427, "step": 8993 }, { - "epoch": 0.52, - "grad_norm": 0.36770528833525723, - "learning_rate": 9.944172911954173e-06, - "loss": 0.2913, + "epoch": 0.41, + "grad_norm": 0.3422742743190214, + "learning_rate": 1.3239216412907068e-05, + "loss": 0.2555, "step": 8994 }, { - "epoch": 0.52, - "grad_norm": 0.7614209857492386, - "learning_rate": 9.942312029329631e-06, - "loss": 0.4018, + "epoch": 0.41, + "grad_norm": 0.3789458853092186, + "learning_rate": 1.3237808680612323e-05, + "loss": 0.2641, "step": 8995 }, { - "epoch": 0.52, - "grad_norm": 0.27484815842745103, - "learning_rate": 9.940451148702826e-06, - "loss": 0.2612, + "epoch": 0.41, + "grad_norm": 0.3639989799434384, + "learning_rate": 1.3236400876635616e-05, + "loss": 0.2484, "step": 8996 }, { - "epoch": 0.52, - "grad_norm": 0.31502147368921235, - "learning_rate": 9.938590270138191e-06, - "loss": 0.2551, + "epoch": 0.41, + "grad_norm": 0.35703365534003695, + "learning_rate": 1.3234993001008112e-05, + "loss": 0.27, "step": 8997 }, { - "epoch": 0.52, - "grad_norm": 0.43320342033214754, - "learning_rate": 9.936729393700176e-06, - "loss": 0.1657, + "epoch": 0.41, + "grad_norm": 0.8437915356119837, + "learning_rate": 1.3233585053760982e-05, + "loss": 0.4841, "step": 8998 }, { - "epoch": 0.52, - "grad_norm": 0.7310020498893697, - "learning_rate": 9.934868519453215e-06, - "loss": 0.4141, + "epoch": 0.41, + "grad_norm": 0.7005382855928975, + "learning_rate": 1.3232177034925395e-05, + "loss": 0.3746, "step": 8999 }, { - "epoch": 0.52, - "grad_norm": 0.3868343055255914, - "learning_rate": 9.933007647461758e-06, - "loss": 0.2496, + "epoch": 0.41, + "grad_norm": 0.3236407265419388, + "learning_rate": 1.3230768944532526e-05, + "loss": 0.2606, "step": 9000 }, { - "epoch": 0.52, - "grad_norm": 0.304909472398119, - "learning_rate": 9.931146777790241e-06, - "loss": 0.2584, + "epoch": 0.41, + "grad_norm": 0.28562255660616803, + "learning_rate": 1.3229360782613543e-05, + "loss": 0.2207, "step": 9001 }, { - "epoch": 0.52, - "grad_norm": 0.43745718846557247, - "learning_rate": 9.929285910503112e-06, - "loss": 0.2569, + "epoch": 0.41, + "grad_norm": 0.8322587227320745, + "learning_rate": 1.3227952549199625e-05, + "loss": 0.41, "step": 9002 }, { - "epoch": 0.52, - "grad_norm": 0.2749356993094986, - "learning_rate": 9.927425045664804e-06, - "loss": 0.202, + "epoch": 0.41, + "grad_norm": 0.4007713361278303, + "learning_rate": 1.322654424432195e-05, + "loss": 0.2708, "step": 9003 }, { - "epoch": 0.52, - "grad_norm": 0.44467210424965653, - "learning_rate": 9.925564183339768e-06, - "loss": 0.3516, + "epoch": 0.41, + "grad_norm": 0.4753209483485224, + "learning_rate": 1.3225135868011694e-05, + "loss": 0.3227, "step": 9004 }, { - "epoch": 0.52, - "grad_norm": 0.3609803021729023, - "learning_rate": 9.92370332359244e-06, - "loss": 0.2799, + "epoch": 0.41, + "grad_norm": 0.9340170792592087, + "learning_rate": 1.322372742030004e-05, + "loss": 0.395, "step": 9005 }, { - "epoch": 0.52, - "grad_norm": 0.32493117789295467, - "learning_rate": 9.92184246648726e-06, - "loss": 0.2776, + "epoch": 0.41, + "grad_norm": 0.42288668978760635, + "learning_rate": 1.3222318901218168e-05, + "loss": 0.2859, "step": 9006 }, { - "epoch": 0.52, - "grad_norm": 1.0347469550216204, - "learning_rate": 9.919981612088676e-06, - "loss": 0.7681, + "epoch": 0.41, + "grad_norm": 0.2874551564156162, + "learning_rate": 1.3220910310797259e-05, + "loss": 0.1942, "step": 9007 }, { - "epoch": 0.52, - "grad_norm": 0.33748772354766315, - "learning_rate": 9.91812076046112e-06, - "loss": 0.2435, + "epoch": 0.41, + "grad_norm": 0.42457515340392443, + "learning_rate": 1.3219501649068502e-05, + "loss": 0.3277, "step": 9008 }, { - "epoch": 0.52, - "grad_norm": 0.26513883317931297, - "learning_rate": 9.916259911669044e-06, - "loss": 0.2091, + "epoch": 0.41, + "grad_norm": 0.3611574555598921, + "learning_rate": 1.3218092916063081e-05, + "loss": 0.1983, "step": 9009 }, { - "epoch": 0.52, - "grad_norm": 0.46014096183328146, - "learning_rate": 9.914399065776879e-06, - "loss": 0.2877, + "epoch": 0.41, + "grad_norm": 1.037041625603372, + "learning_rate": 1.3216684111812184e-05, + "loss": 0.4415, "step": 9010 }, { - "epoch": 0.52, - "grad_norm": 0.6082292573499609, - "learning_rate": 9.912538222849074e-06, - "loss": 0.3486, + "epoch": 0.41, + "grad_norm": 1.1366824801234976, + "learning_rate": 1.3215275236347002e-05, + "loss": 0.6449, "step": 9011 }, { - "epoch": 0.52, - "grad_norm": 0.34178859785901966, - "learning_rate": 9.910677382950064e-06, - "loss": 0.2791, + "epoch": 0.41, + "grad_norm": 0.30405615441726225, + "learning_rate": 1.3213866289698725e-05, + "loss": 0.2213, "step": 9012 }, { - "epoch": 0.52, - "grad_norm": 0.3524107406208595, - "learning_rate": 9.908816546144296e-06, - "loss": 0.3128, + "epoch": 0.41, + "grad_norm": 0.2733217459002581, + "learning_rate": 1.3212457271898545e-05, + "loss": 0.2317, "step": 9013 }, { - "epoch": 0.52, - "grad_norm": 0.2773592503100069, - "learning_rate": 9.906955712496203e-06, - "loss": 0.1512, + "epoch": 0.41, + "grad_norm": 1.6052998956694644, + "learning_rate": 1.3211048182977657e-05, + "loss": 0.7396, "step": 9014 }, { - "epoch": 0.52, - "grad_norm": 0.3265634728938896, - "learning_rate": 9.905094882070234e-06, - "loss": 0.2592, + "epoch": 0.41, + "grad_norm": 0.47472761337952085, + "learning_rate": 1.3209639022967257e-05, + "loss": 0.2349, "step": 9015 }, { - "epoch": 0.52, - "grad_norm": 0.40937969659931533, - "learning_rate": 9.903234054930824e-06, - "loss": 0.3167, + "epoch": 0.41, + "grad_norm": 0.46122250480679783, + "learning_rate": 1.3208229791898544e-05, + "loss": 0.3561, "step": 9016 }, { - "epoch": 0.52, - "grad_norm": 0.33132514739426283, - "learning_rate": 9.901373231142416e-06, - "loss": 0.3214, + "epoch": 0.41, + "grad_norm": 1.212655520943246, + "learning_rate": 1.3206820489802716e-05, + "loss": 0.7041, "step": 9017 }, { - "epoch": 0.52, - "grad_norm": 0.35046526567785413, - "learning_rate": 9.899512410769443e-06, - "loss": 0.1871, + "epoch": 0.41, + "grad_norm": 0.35926243825252124, + "learning_rate": 1.3205411116710973e-05, + "loss": 0.1941, "step": 9018 }, { - "epoch": 0.52, - "grad_norm": 0.4758272095091834, - "learning_rate": 9.897651593876356e-06, - "loss": 0.4102, + "epoch": 0.41, + "grad_norm": 0.3021669973641154, + "learning_rate": 1.3204001672654514e-05, + "loss": 0.183, "step": 9019 }, { - "epoch": 0.52, - "grad_norm": 0.32645946398502895, - "learning_rate": 9.895790780527585e-06, - "loss": 0.2765, + "epoch": 0.41, + "grad_norm": 0.3955366758126534, + "learning_rate": 1.3202592157664549e-05, + "loss": 0.3261, "step": 9020 }, { - "epoch": 0.52, - "grad_norm": 0.33879706747924987, - "learning_rate": 9.89392997078758e-06, - "loss": 0.1954, + "epoch": 0.41, + "grad_norm": 0.3686715990911606, + "learning_rate": 1.320118257177228e-05, + "loss": 0.2779, "step": 9021 }, { - "epoch": 0.52, - "grad_norm": 0.420195059981251, - "learning_rate": 9.892069164720771e-06, - "loss": 0.3489, + "epoch": 0.41, + "grad_norm": 0.9016014570712456, + "learning_rate": 1.3199772915008912e-05, + "loss": 0.3908, "step": 9022 }, { - "epoch": 0.52, - "grad_norm": 0.27953102565373356, - "learning_rate": 9.890208362391606e-06, - "loss": 0.1685, + "epoch": 0.41, + "grad_norm": 0.511404004774544, + "learning_rate": 1.3198363187405661e-05, + "loss": 0.4023, "step": 9023 }, { - "epoch": 0.52, - "grad_norm": 0.33324986313949784, - "learning_rate": 9.888347563864517e-06, - "loss": 0.2261, + "epoch": 0.41, + "grad_norm": 0.42377473910471614, + "learning_rate": 1.3196953388993727e-05, + "loss": 0.3006, "step": 9024 }, { - "epoch": 0.52, - "grad_norm": 0.4158775305414562, - "learning_rate": 9.886486769203951e-06, - "loss": 0.3704, + "epoch": 0.41, + "grad_norm": 0.22634842551984938, + "learning_rate": 1.3195543519804327e-05, + "loss": 0.1648, "step": 9025 }, { - "epoch": 0.52, - "grad_norm": 1.1118021729267935, - "learning_rate": 9.884625978474341e-06, - "loss": 0.6197, + "epoch": 0.41, + "grad_norm": 0.6971480296711122, + "learning_rate": 1.3194133579868672e-05, + "loss": 0.4696, "step": 9026 }, { - "epoch": 0.52, - "grad_norm": 0.2940205641682392, - "learning_rate": 9.88276519174013e-06, - "loss": 0.2177, + "epoch": 0.41, + "grad_norm": 0.5921164469941854, + "learning_rate": 1.319272356921798e-05, + "loss": 0.3463, "step": 9027 }, { - "epoch": 0.52, - "grad_norm": 0.632974645298959, - "learning_rate": 9.880904409065753e-06, - "loss": 0.4462, + "epoch": 0.41, + "grad_norm": 0.31370138563369404, + "learning_rate": 1.3191313487883465e-05, + "loss": 0.2473, "step": 9028 }, { - "epoch": 0.52, - "grad_norm": 0.21338348316802783, - "learning_rate": 9.879043630515651e-06, - "loss": 0.2196, + "epoch": 0.41, + "grad_norm": 1.1788590339276992, + "learning_rate": 1.3189903335896345e-05, + "loss": 0.6487, "step": 9029 }, { - "epoch": 0.52, - "grad_norm": 0.4146681584696092, - "learning_rate": 9.877182856154267e-06, - "loss": 0.3047, + "epoch": 0.41, + "grad_norm": 0.44322783386724995, + "learning_rate": 1.3188493113287841e-05, + "loss": 0.2771, "step": 9030 }, { - "epoch": 0.52, - "grad_norm": 0.4489759474178878, - "learning_rate": 9.87532208604603e-06, - "loss": 0.2851, + "epoch": 0.41, + "grad_norm": 0.25340010044512534, + "learning_rate": 1.3187082820089172e-05, + "loss": 0.1785, "step": 9031 }, { - "epoch": 0.52, - "grad_norm": 0.36796592998808814, - "learning_rate": 9.873461320255388e-06, - "loss": 0.3119, + "epoch": 0.41, + "grad_norm": 0.48904608267518923, + "learning_rate": 1.318567245633156e-05, + "loss": 0.3818, "step": 9032 }, { - "epoch": 0.52, - "grad_norm": 0.3727674532757231, - "learning_rate": 9.871600558846772e-06, - "loss": 0.2888, + "epoch": 0.41, + "grad_norm": 0.40949237155104234, + "learning_rate": 1.3184262022046233e-05, + "loss": 0.3392, "step": 9033 }, { - "epoch": 0.52, - "grad_norm": 0.4707617476263641, - "learning_rate": 9.869739801884627e-06, - "loss": 0.2876, + "epoch": 0.42, + "grad_norm": 0.5395896984840701, + "learning_rate": 1.318285151726441e-05, + "loss": 0.4071, "step": 9034 }, { - "epoch": 0.52, - "grad_norm": 0.2443750988765055, - "learning_rate": 9.867879049433383e-06, - "loss": 0.1874, + "epoch": 0.42, + "grad_norm": 0.49329290235317624, + "learning_rate": 1.3181440942017325e-05, + "loss": 0.289, "step": 9035 }, { - "epoch": 0.52, - "grad_norm": 0.39422198057692154, - "learning_rate": 9.866018301557484e-06, - "loss": 0.2668, + "epoch": 0.42, + "grad_norm": 0.3437047905679964, + "learning_rate": 1.31800302963362e-05, + "loss": 0.2698, "step": 9036 }, { - "epoch": 0.52, - "grad_norm": 0.29803699521115307, - "learning_rate": 9.864157558321364e-06, - "loss": 0.2592, + "epoch": 0.42, + "grad_norm": 0.6136823663012815, + "learning_rate": 1.3178619580252275e-05, + "loss": 0.3834, "step": 9037 }, { - "epoch": 0.52, - "grad_norm": 0.4831922205500076, - "learning_rate": 9.862296819789464e-06, - "loss": 0.3992, + "epoch": 0.42, + "grad_norm": 0.3333903952554828, + "learning_rate": 1.3177208793796772e-05, + "loss": 0.2159, "step": 9038 }, { - "epoch": 0.52, - "grad_norm": 0.5305541289799832, - "learning_rate": 9.860436086026218e-06, - "loss": 0.3633, + "epoch": 0.42, + "grad_norm": 0.4208993055674658, + "learning_rate": 1.317579793700093e-05, + "loss": 0.2742, "step": 9039 }, { - "epoch": 0.52, - "grad_norm": 0.2684294559842977, - "learning_rate": 9.858575357096064e-06, - "loss": 0.2333, + "epoch": 0.42, + "grad_norm": 0.45215343136563835, + "learning_rate": 1.317438700989598e-05, + "loss": 0.297, "step": 9040 }, { - "epoch": 0.52, - "grad_norm": 0.25245525380867007, - "learning_rate": 9.85671463306344e-06, - "loss": 0.1646, + "epoch": 0.42, + "grad_norm": 0.7606301033542168, + "learning_rate": 1.3172976012513165e-05, + "loss": 0.4487, "step": 9041 }, { - "epoch": 0.52, - "grad_norm": 0.5212038708909201, - "learning_rate": 9.854853913992783e-06, - "loss": 0.3353, + "epoch": 0.42, + "grad_norm": 0.6217067865690995, + "learning_rate": 1.3171564944883717e-05, + "loss": 0.336, "step": 9042 }, { - "epoch": 0.52, - "grad_norm": 0.3956254025587982, - "learning_rate": 9.852993199948527e-06, - "loss": 0.3331, + "epoch": 0.42, + "grad_norm": 0.3856566763331818, + "learning_rate": 1.3170153807038878e-05, + "loss": 0.3241, "step": 9043 }, { - "epoch": 0.52, - "grad_norm": 0.3427049716407958, - "learning_rate": 9.85113249099511e-06, - "loss": 0.276, + "epoch": 0.42, + "grad_norm": 0.2831202508617231, + "learning_rate": 1.3168742599009892e-05, + "loss": 0.2465, "step": 9044 }, { - "epoch": 0.52, - "grad_norm": 0.46855115446728846, - "learning_rate": 9.849271787196971e-06, - "loss": 0.3368, + "epoch": 0.42, + "grad_norm": 0.3173145897046266, + "learning_rate": 1.3167331320827994e-05, + "loss": 0.1695, "step": 9045 }, { - "epoch": 0.52, - "grad_norm": 0.3783074851487932, - "learning_rate": 9.847411088618539e-06, - "loss": 0.3037, + "epoch": 0.42, + "grad_norm": 0.44482702383221334, + "learning_rate": 1.3165919972524437e-05, + "loss": 0.3378, "step": 9046 }, { - "epoch": 0.52, - "grad_norm": 0.18974821301931535, - "learning_rate": 9.845550395324259e-06, - "loss": 0.0865, + "epoch": 0.42, + "grad_norm": 0.5090139106707258, + "learning_rate": 1.3164508554130461e-05, + "loss": 0.3617, "step": 9047 }, { - "epoch": 0.52, - "grad_norm": 0.2976246395263267, - "learning_rate": 9.843689707378558e-06, - "loss": 0.2641, + "epoch": 0.42, + "grad_norm": 0.38552243847284595, + "learning_rate": 1.3163097065677316e-05, + "loss": 0.2134, "step": 9048 }, { - "epoch": 0.52, - "grad_norm": 0.3856321462242667, - "learning_rate": 9.841829024845882e-06, - "loss": 0.3264, + "epoch": 0.42, + "grad_norm": 0.41395654200245086, + "learning_rate": 1.3161685507196251e-05, + "loss": 0.3414, "step": 9049 }, { - "epoch": 0.52, - "grad_norm": 0.8229565405098527, - "learning_rate": 9.839968347790657e-06, - "loss": 0.3381, + "epoch": 0.42, + "grad_norm": 0.6687275341297567, + "learning_rate": 1.3160273878718516e-05, + "loss": 0.4311, "step": 9050 }, { - "epoch": 0.52, - "grad_norm": 0.5464022009127502, - "learning_rate": 9.83810767627732e-06, - "loss": 0.3216, + "epoch": 0.42, + "grad_norm": 0.20731555804597276, + "learning_rate": 1.3158862180275362e-05, + "loss": 0.1229, "step": 9051 }, { - "epoch": 0.52, - "grad_norm": 0.3146404356039652, - "learning_rate": 9.836247010370308e-06, - "loss": 0.2941, + "epoch": 0.42, + "grad_norm": 0.3201130342557867, + "learning_rate": 1.3157450411898047e-05, + "loss": 0.2847, "step": 9052 }, { - "epoch": 0.52, - "grad_norm": 0.29591631958317766, - "learning_rate": 9.834386350134052e-06, - "loss": 0.1869, + "epoch": 0.42, + "grad_norm": 1.2516016246283954, + "learning_rate": 1.3156038573617822e-05, + "loss": 0.6587, "step": 9053 }, { - "epoch": 0.52, - "grad_norm": 0.7751683065263755, - "learning_rate": 9.832525695632994e-06, - "loss": 0.4217, + "epoch": 0.42, + "grad_norm": 0.33043805948141686, + "learning_rate": 1.3154626665465947e-05, + "loss": 0.2204, "step": 9054 }, { - "epoch": 0.52, - "grad_norm": 0.4249413501512791, - "learning_rate": 9.830665046931563e-06, - "loss": 0.2905, + "epoch": 0.42, + "grad_norm": 0.5255089380781177, + "learning_rate": 1.3153214687473673e-05, + "loss": 0.3812, "step": 9055 }, { - "epoch": 0.52, - "grad_norm": 0.5985386409545421, - "learning_rate": 9.828804404094192e-06, - "loss": 0.3324, + "epoch": 0.42, + "grad_norm": 0.3916441191377418, + "learning_rate": 1.3151802639672267e-05, + "loss": 0.3433, "step": 9056 }, { - "epoch": 0.52, - "grad_norm": 0.20868793648929493, - "learning_rate": 9.82694376718532e-06, - "loss": 0.0741, + "epoch": 0.42, + "grad_norm": 0.24695801472687676, + "learning_rate": 1.3150390522092987e-05, + "loss": 0.1647, "step": 9057 }, { - "epoch": 0.52, - "grad_norm": 0.3726465468468453, - "learning_rate": 9.825083136269375e-06, - "loss": 0.3055, + "epoch": 0.42, + "grad_norm": 0.5439125914026678, + "learning_rate": 1.3148978334767101e-05, + "loss": 0.3825, "step": 9058 }, { - "epoch": 0.52, - "grad_norm": 0.4228458238529731, - "learning_rate": 9.823222511410795e-06, - "loss": 0.2778, + "epoch": 0.42, + "grad_norm": 0.30476705087573086, + "learning_rate": 1.3147566077725869e-05, + "loss": 0.274, "step": 9059 }, { - "epoch": 0.52, - "grad_norm": 0.29394146644376107, - "learning_rate": 9.82136189267401e-06, - "loss": 0.2364, + "epoch": 0.42, + "grad_norm": 0.6579463224877468, + "learning_rate": 1.3146153751000554e-05, + "loss": 0.3945, "step": 9060 }, { - "epoch": 0.52, - "grad_norm": 0.43066936972759234, - "learning_rate": 9.819501280123458e-06, - "loss": 0.3554, + "epoch": 0.42, + "grad_norm": 0.3498031954752658, + "learning_rate": 1.314474135462243e-05, + "loss": 0.2511, "step": 9061 }, { - "epoch": 0.52, - "grad_norm": 0.6720219973395819, - "learning_rate": 9.817640673823566e-06, - "loss": 0.4414, + "epoch": 0.42, + "grad_norm": 0.914907492923861, + "learning_rate": 1.3143328888622761e-05, + "loss": 0.5218, "step": 9062 }, { - "epoch": 0.52, - "grad_norm": 0.22057283222630394, - "learning_rate": 9.815780073838773e-06, - "loss": 0.0968, + "epoch": 0.42, + "grad_norm": 0.45569315747089423, + "learning_rate": 1.3141916353032822e-05, + "loss": 0.3481, "step": 9063 }, { - "epoch": 0.52, - "grad_norm": 0.27013861542171497, - "learning_rate": 9.813919480233503e-06, - "loss": 0.283, + "epoch": 0.42, + "grad_norm": 0.2901017738049763, + "learning_rate": 1.3140503747883884e-05, + "loss": 0.2382, "step": 9064 }, { - "epoch": 0.52, - "grad_norm": 0.7716994426637789, - "learning_rate": 9.812058893072199e-06, - "loss": 0.5575, + "epoch": 0.42, + "grad_norm": 0.2517876353948198, + "learning_rate": 1.313909107320722e-05, + "loss": 0.1549, "step": 9065 }, { - "epoch": 0.52, - "grad_norm": 0.34090727687569566, - "learning_rate": 9.810198312419284e-06, - "loss": 0.2088, + "epoch": 0.42, + "grad_norm": 0.7543588272489495, + "learning_rate": 1.3137678329034103e-05, + "loss": 0.3788, "step": 9066 }, { - "epoch": 0.52, - "grad_norm": 0.4536499925740262, - "learning_rate": 9.808337738339194e-06, - "loss": 0.355, + "epoch": 0.42, + "grad_norm": 0.30092046058393357, + "learning_rate": 1.3136265515395812e-05, + "loss": 0.2467, "step": 9067 }, { - "epoch": 0.52, - "grad_norm": 0.3564000526722853, - "learning_rate": 9.80647717089636e-06, - "loss": 0.3234, + "epoch": 0.42, + "grad_norm": 0.4747967146662547, + "learning_rate": 1.3134852632323625e-05, + "loss": 0.3632, "step": 9068 }, { - "epoch": 0.52, - "grad_norm": 0.23213456762043275, - "learning_rate": 9.804616610155215e-06, - "loss": 0.1778, + "epoch": 0.42, + "grad_norm": 0.8241360203770558, + "learning_rate": 1.3133439679848824e-05, + "loss": 0.4519, "step": 9069 }, { - "epoch": 0.52, - "grad_norm": 0.3640646952779975, - "learning_rate": 9.802756056180187e-06, - "loss": 0.2301, + "epoch": 0.42, + "grad_norm": 0.27199366746236864, + "learning_rate": 1.3132026658002688e-05, + "loss": 0.1864, "step": 9070 }, { - "epoch": 0.52, - "grad_norm": 1.2155181820109497, - "learning_rate": 9.800895509035708e-06, - "loss": 0.8132, + "epoch": 0.42, + "grad_norm": 0.3546102812268334, + "learning_rate": 1.3130613566816501e-05, + "loss": 0.2332, "step": 9071 }, { - "epoch": 0.52, - "grad_norm": 0.3652622733588144, - "learning_rate": 9.799034968786209e-06, - "loss": 0.2981, + "epoch": 0.42, + "grad_norm": 0.5070129459303292, + "learning_rate": 1.3129200406321545e-05, + "loss": 0.2983, "step": 9072 }, { - "epoch": 0.52, - "grad_norm": 0.3160744756567268, - "learning_rate": 9.797174435496119e-06, - "loss": 0.2346, + "epoch": 0.42, + "grad_norm": 0.4609219149436638, + "learning_rate": 1.312778717654911e-05, + "loss": 0.3297, "step": 9073 }, { - "epoch": 0.52, - "grad_norm": 0.3198722182667888, - "learning_rate": 9.795313909229872e-06, - "loss": 0.2408, + "epoch": 0.42, + "grad_norm": 0.875544674467338, + "learning_rate": 1.312637387753048e-05, + "loss": 0.3444, "step": 9074 }, { - "epoch": 0.52, - "grad_norm": 0.43873354265547654, - "learning_rate": 9.793453390051894e-06, - "loss": 0.3458, + "epoch": 0.42, + "grad_norm": 0.3049291187219597, + "learning_rate": 1.3124960509296945e-05, + "loss": 0.2663, "step": 9075 }, { - "epoch": 0.52, - "grad_norm": 0.27745929926602364, - "learning_rate": 9.791592878026617e-06, - "loss": 0.2203, + "epoch": 0.42, + "grad_norm": 0.42384806756131305, + "learning_rate": 1.3123547071879801e-05, + "loss": 0.3256, "step": 9076 }, { - "epoch": 0.52, - "grad_norm": 0.962358758398474, - "learning_rate": 9.789732373218468e-06, - "loss": 0.5281, + "epoch": 0.42, + "grad_norm": 0.44608303538832583, + "learning_rate": 1.312213356531033e-05, + "loss": 0.2501, "step": 9077 }, { - "epoch": 0.52, - "grad_norm": 0.5943049810697584, - "learning_rate": 9.78787187569188e-06, - "loss": 0.3826, + "epoch": 0.42, + "grad_norm": 0.4496466565786788, + "learning_rate": 1.3120719989619832e-05, + "loss": 0.2731, "step": 9078 }, { - "epoch": 0.52, - "grad_norm": 0.3278974230441489, - "learning_rate": 9.786011385511279e-06, - "loss": 0.2963, + "epoch": 0.42, + "grad_norm": 0.3987297095840659, + "learning_rate": 1.3119306344839601e-05, + "loss": 0.2838, "step": 9079 }, { - "epoch": 0.52, - "grad_norm": 0.3273067224989936, - "learning_rate": 9.784150902741095e-06, - "loss": 0.2385, + "epoch": 0.42, + "grad_norm": 0.3903273144247722, + "learning_rate": 1.3117892631000936e-05, + "loss": 0.2726, "step": 9080 }, { - "epoch": 0.52, - "grad_norm": 0.27902999532971506, - "learning_rate": 9.782290427445755e-06, - "loss": 0.1677, + "epoch": 0.42, + "grad_norm": 0.7691705027011358, + "learning_rate": 1.311647884813513e-05, + "loss": 0.4326, "step": 9081 }, { - "epoch": 0.52, - "grad_norm": 0.3192097580526225, - "learning_rate": 9.78042995968969e-06, - "loss": 0.2732, + "epoch": 0.42, + "grad_norm": 0.39006601073096353, + "learning_rate": 1.3115064996273492e-05, + "loss": 0.3233, "step": 9082 }, { - "epoch": 0.52, - "grad_norm": 0.6567345097224239, - "learning_rate": 9.778569499537327e-06, - "loss": 0.3247, + "epoch": 0.42, + "grad_norm": 0.38396612125624047, + "learning_rate": 1.311365107544731e-05, + "loss": 0.3196, "step": 9083 }, { - "epoch": 0.52, - "grad_norm": 0.30540297396342236, - "learning_rate": 9.77670904705309e-06, - "loss": 0.2962, + "epoch": 0.42, + "grad_norm": 0.3762054034873624, + "learning_rate": 1.31122370856879e-05, + "loss": 0.1806, "step": 9084 }, { - "epoch": 0.52, - "grad_norm": 0.368732504569253, - "learning_rate": 9.77484860230141e-06, - "loss": 0.292, + "epoch": 0.42, + "grad_norm": 0.30564800900531475, + "learning_rate": 1.3110823027026558e-05, + "loss": 0.2373, "step": 9085 }, { - "epoch": 0.52, - "grad_norm": 0.8805145259459297, - "learning_rate": 9.772988165346715e-06, - "loss": 0.4137, + "epoch": 0.42, + "grad_norm": 1.718601985082989, + "learning_rate": 1.310940889949459e-05, + "loss": 0.7979, "step": 9086 }, { - "epoch": 0.52, - "grad_norm": 0.22124704588757194, - "learning_rate": 9.771127736253426e-06, - "loss": 0.1537, + "epoch": 0.42, + "grad_norm": 0.3381244216576654, + "learning_rate": 1.3107994703123312e-05, + "loss": 0.2444, "step": 9087 }, { - "epoch": 0.52, - "grad_norm": 0.3510303095693387, - "learning_rate": 9.769267315085976e-06, - "loss": 0.2932, + "epoch": 0.42, + "grad_norm": 0.42401327681745565, + "learning_rate": 1.3106580437944023e-05, + "loss": 0.3151, "step": 9088 }, { - "epoch": 0.52, - "grad_norm": 0.4614132599295984, - "learning_rate": 9.767406901908787e-06, - "loss": 0.2882, + "epoch": 0.42, + "grad_norm": 0.6727120921260503, + "learning_rate": 1.310516610398804e-05, + "loss": 0.441, "step": 9089 }, { - "epoch": 0.52, - "grad_norm": 0.8664585977799852, - "learning_rate": 9.76554649678629e-06, - "loss": 0.4554, + "epoch": 0.42, + "grad_norm": 0.26345175681186583, + "learning_rate": 1.3103751701286667e-05, + "loss": 0.179, "step": 9090 }, { - "epoch": 0.52, - "grad_norm": 0.3287018378571944, - "learning_rate": 9.763686099782905e-06, - "loss": 0.2582, + "epoch": 0.42, + "grad_norm": 0.4211460415658565, + "learning_rate": 1.3102337229871224e-05, + "loss": 0.2995, "step": 9091 }, { - "epoch": 0.52, - "grad_norm": 0.31568488663477967, - "learning_rate": 9.761825710963063e-06, - "loss": 0.2942, + "epoch": 0.42, + "grad_norm": 0.44217693251947277, + "learning_rate": 1.3100922689773028e-05, + "loss": 0.3232, "step": 9092 }, { - "epoch": 0.52, - "grad_norm": 0.20206609002791392, - "learning_rate": 9.759965330391182e-06, - "loss": 0.0898, + "epoch": 0.42, + "grad_norm": 0.36679176236124206, + "learning_rate": 1.3099508081023391e-05, + "loss": 0.1986, "step": 9093 }, { - "epoch": 0.52, - "grad_norm": 0.3151578729799421, - "learning_rate": 9.758104958131696e-06, - "loss": 0.2367, + "epoch": 0.42, + "grad_norm": 0.4568985063735754, + "learning_rate": 1.309809340365363e-05, + "loss": 0.3479, "step": 9094 }, { - "epoch": 0.52, - "grad_norm": 0.741103555846677, - "learning_rate": 9.756244594249024e-06, - "loss": 0.4884, + "epoch": 0.42, + "grad_norm": 0.3707018228039586, + "learning_rate": 1.3096678657695072e-05, + "loss": 0.3172, "step": 9095 }, { - "epoch": 0.52, - "grad_norm": 0.368566194853679, - "learning_rate": 9.754384238807589e-06, - "loss": 0.2917, + "epoch": 0.42, + "grad_norm": 0.8199785924300981, + "learning_rate": 1.3095263843179029e-05, + "loss": 0.5059, "step": 9096 }, { - "epoch": 0.52, - "grad_norm": 0.32908075448316854, - "learning_rate": 9.752523891871819e-06, - "loss": 0.2796, + "epoch": 0.42, + "grad_norm": 0.24773077829773296, + "learning_rate": 1.309384896013683e-05, + "loss": 0.1649, "step": 9097 }, { - "epoch": 0.52, - "grad_norm": 0.9739561754616147, - "learning_rate": 9.750663553506134e-06, - "loss": 0.6682, + "epoch": 0.42, + "grad_norm": 0.49796003383230464, + "learning_rate": 1.3092434008599795e-05, + "loss": 0.2792, "step": 9098 }, { - "epoch": 0.52, - "grad_norm": 0.19140722525879253, - "learning_rate": 9.748803223774962e-06, - "loss": 0.1553, + "epoch": 0.42, + "grad_norm": 0.4055082678909088, + "learning_rate": 1.3091018988599254e-05, + "loss": 0.3154, "step": 9099 }, { - "epoch": 0.52, - "grad_norm": 0.29655050143646866, - "learning_rate": 9.746942902742722e-06, - "loss": 0.2873, + "epoch": 0.42, + "grad_norm": 0.38677445096568425, + "learning_rate": 1.308960390016653e-05, + "loss": 0.232, "step": 9100 }, { - "epoch": 0.52, - "grad_norm": 0.8291151161681248, - "learning_rate": 9.745082590473839e-06, - "loss": 0.4544, + "epoch": 0.42, + "grad_norm": 0.8059031550255847, + "learning_rate": 1.3088188743332955e-05, + "loss": 0.4787, "step": 9101 }, { - "epoch": 0.52, - "grad_norm": 0.6396141518635055, - "learning_rate": 9.743222287032734e-06, - "loss": 0.3046, + "epoch": 0.42, + "grad_norm": 0.4813228173866095, + "learning_rate": 1.3086773518129853e-05, + "loss": 0.266, "step": 9102 }, { - "epoch": 0.52, - "grad_norm": 0.3562190167610645, - "learning_rate": 9.741361992483832e-06, - "loss": 0.2791, + "epoch": 0.42, + "grad_norm": 0.2710552364501726, + "learning_rate": 1.3085358224588565e-05, + "loss": 0.2299, "step": 9103 }, { - "epoch": 0.52, - "grad_norm": 0.3584912974204283, - "learning_rate": 9.739501706891551e-06, - "loss": 0.3235, + "epoch": 0.42, + "grad_norm": 0.5454247305922598, + "learning_rate": 1.308394286274042e-05, + "loss": 0.2701, "step": 9104 }, { - "epoch": 0.52, - "grad_norm": 0.28189812469588293, - "learning_rate": 9.737641430320315e-06, - "loss": 0.1743, + "epoch": 0.42, + "grad_norm": 0.8539115277521903, + "learning_rate": 1.308252743261675e-05, + "loss": 0.528, "step": 9105 }, { - "epoch": 0.52, - "grad_norm": 0.30844703586005295, - "learning_rate": 9.735781162834546e-06, - "loss": 0.1798, + "epoch": 0.42, + "grad_norm": 0.2960486453416966, + "learning_rate": 1.3081111934248895e-05, + "loss": 0.2383, "step": 9106 }, { - "epoch": 0.52, - "grad_norm": 0.8857786094208216, - "learning_rate": 9.733920904498664e-06, - "loss": 0.3613, + "epoch": 0.42, + "grad_norm": 0.3864224262719897, + "learning_rate": 1.3079696367668192e-05, + "loss": 0.3131, "step": 9107 }, { - "epoch": 0.52, - "grad_norm": 0.4731519913769104, - "learning_rate": 9.73206065537709e-06, - "loss": 0.3541, + "epoch": 0.42, + "grad_norm": 0.9678461363381875, + "learning_rate": 1.3078280732905976e-05, + "loss": 0.6467, "step": 9108 }, { - "epoch": 0.52, - "grad_norm": 0.291075703344724, - "learning_rate": 9.730200415534242e-06, - "loss": 0.1994, + "epoch": 0.42, + "grad_norm": 0.3844811970569384, + "learning_rate": 1.3076865029993595e-05, + "loss": 0.2839, "step": 9109 }, { - "epoch": 0.52, - "grad_norm": 1.2261999869951392, - "learning_rate": 9.728340185034545e-06, - "loss": 0.7143, + "epoch": 0.42, + "grad_norm": 0.21323739682973292, + "learning_rate": 1.3075449258962384e-05, + "loss": 0.091, "step": 9110 }, { - "epoch": 0.52, - "grad_norm": 0.31050079628050553, - "learning_rate": 9.726479963942412e-06, - "loss": 0.2554, + "epoch": 0.42, + "grad_norm": 0.3934755159192334, + "learning_rate": 1.3074033419843697e-05, + "loss": 0.3263, "step": 9111 }, { - "epoch": 0.52, - "grad_norm": 0.26466322118910507, - "learning_rate": 9.72461975232227e-06, - "loss": 0.1923, + "epoch": 0.42, + "grad_norm": 0.41849630898208734, + "learning_rate": 1.3072617512668869e-05, + "loss": 0.2617, "step": 9112 }, { - "epoch": 0.52, - "grad_norm": 0.6613580619617312, - "learning_rate": 9.72275955023853e-06, - "loss": 0.4344, + "epoch": 0.42, + "grad_norm": 0.5321544990901166, + "learning_rate": 1.307120153746925e-05, + "loss": 0.3266, "step": 9113 }, { - "epoch": 0.52, - "grad_norm": 1.3485587659150748, - "learning_rate": 9.720899357755618e-06, - "loss": 0.8047, + "epoch": 0.42, + "grad_norm": 0.4980210889786207, + "learning_rate": 1.306978549427619e-05, + "loss": 0.3316, "step": 9114 }, { - "epoch": 0.52, - "grad_norm": 0.29397938026410353, - "learning_rate": 9.719039174937948e-06, - "loss": 0.2338, + "epoch": 0.42, + "grad_norm": 0.34847259181197665, + "learning_rate": 1.3068369383121036e-05, + "loss": 0.2571, "step": 9115 }, { - "epoch": 0.52, - "grad_norm": 0.4540959149910563, - "learning_rate": 9.717179001849942e-06, - "loss": 0.3446, + "epoch": 0.42, + "grad_norm": 0.24092964261768782, + "learning_rate": 1.3066953204035145e-05, + "loss": 0.1592, "step": 9116 }, { - "epoch": 0.52, - "grad_norm": 0.7506127708683249, - "learning_rate": 9.715318838556014e-06, - "loss": 0.4407, + "epoch": 0.42, + "grad_norm": 0.6700819670552765, + "learning_rate": 1.3065536957049863e-05, + "loss": 0.4273, "step": 9117 }, { - "epoch": 0.52, - "grad_norm": 0.3762547929412095, - "learning_rate": 9.71345868512058e-06, - "loss": 0.2911, + "epoch": 0.42, + "grad_norm": 0.3582286079732149, + "learning_rate": 1.3064120642196549e-05, + "loss": 0.2809, "step": 9118 }, { - "epoch": 0.52, - "grad_norm": 0.32214352086262227, - "learning_rate": 9.711598541608062e-06, - "loss": 0.2218, + "epoch": 0.42, + "grad_norm": 0.3687328280600011, + "learning_rate": 1.3062704259506559e-05, + "loss": 0.288, "step": 9119 }, { - "epoch": 0.52, - "grad_norm": 0.3327082599332806, - "learning_rate": 9.709738408082873e-06, - "loss": 0.2906, + "epoch": 0.42, + "grad_norm": 1.6245068934609712, + "learning_rate": 1.3061287809011243e-05, + "loss": 0.6703, "step": 9120 }, { - "epoch": 0.52, - "grad_norm": 0.3958060769664337, - "learning_rate": 9.707878284609429e-06, - "loss": 0.2693, + "epoch": 0.42, + "grad_norm": 0.3897479416867728, + "learning_rate": 1.3059871290741968e-05, + "loss": 0.2855, "step": 9121 }, { - "epoch": 0.52, - "grad_norm": 0.4592079526507075, - "learning_rate": 9.706018171252148e-06, - "loss": 0.2672, + "epoch": 0.42, + "grad_norm": 0.3384079666272606, + "learning_rate": 1.3058454704730092e-05, + "loss": 0.1815, "step": 9122 }, { - "epoch": 0.52, - "grad_norm": 0.34486135538857204, - "learning_rate": 9.704158068075445e-06, - "loss": 0.314, + "epoch": 0.42, + "grad_norm": 0.40188875530537144, + "learning_rate": 1.305703805100698e-05, + "loss": 0.292, "step": 9123 }, { - "epoch": 0.52, - "grad_norm": 0.3742732158714571, - "learning_rate": 9.702297975143737e-06, - "loss": 0.2434, + "epoch": 0.42, + "grad_norm": 0.37598848497130544, + "learning_rate": 1.3055621329603988e-05, + "loss": 0.269, "step": 9124 }, { - "epoch": 0.52, - "grad_norm": 0.2477396220146489, - "learning_rate": 9.700437892521434e-06, - "loss": 0.1747, + "epoch": 0.42, + "grad_norm": 1.4317095788728902, + "learning_rate": 1.3054204540552483e-05, + "loss": 0.8335, "step": 9125 }, { - "epoch": 0.52, - "grad_norm": 1.2186332134145217, - "learning_rate": 9.698577820272958e-06, - "loss": 0.81, + "epoch": 0.42, + "grad_norm": 0.36952538033272525, + "learning_rate": 1.3052787683883837e-05, + "loss": 0.2457, "step": 9126 }, { - "epoch": 0.52, - "grad_norm": 0.4225976525036132, - "learning_rate": 9.696717758462716e-06, - "loss": 0.2788, + "epoch": 0.42, + "grad_norm": 0.3904765445753142, + "learning_rate": 1.3051370759629411e-05, + "loss": 0.2879, "step": 9127 }, { - "epoch": 0.52, - "grad_norm": 0.2970628514194815, - "learning_rate": 9.694857707155126e-06, - "loss": 0.2496, + "epoch": 0.42, + "grad_norm": 0.3172336014665754, + "learning_rate": 1.3049953767820583e-05, + "loss": 0.18, "step": 9128 }, { - "epoch": 0.52, - "grad_norm": 0.8264539934908051, - "learning_rate": 9.6929976664146e-06, - "loss": 0.4638, + "epoch": 0.42, + "grad_norm": 0.6173133042736832, + "learning_rate": 1.3048536708488712e-05, + "loss": 0.3628, "step": 9129 }, { - "epoch": 0.52, - "grad_norm": 0.33276779372939486, - "learning_rate": 9.691137636305554e-06, - "loss": 0.2866, + "epoch": 0.42, + "grad_norm": 0.4559953869090191, + "learning_rate": 1.304711958166518e-05, + "loss": 0.2933, "step": 9130 }, { - "epoch": 0.52, - "grad_norm": 0.3552299923460887, - "learning_rate": 9.689277616892396e-06, - "loss": 0.3332, + "epoch": 0.42, + "grad_norm": 0.3590272121156264, + "learning_rate": 1.3045702387381355e-05, + "loss": 0.3395, "step": 9131 }, { - "epoch": 0.52, - "grad_norm": 0.21249879155980383, - "learning_rate": 9.687417608239541e-06, - "loss": 0.1038, + "epoch": 0.42, + "grad_norm": 0.9009812412257739, + "learning_rate": 1.3044285125668614e-05, + "loss": 0.3408, "step": 9132 }, { - "epoch": 0.52, - "grad_norm": 0.3190251853080398, - "learning_rate": 9.6855576104114e-06, - "loss": 0.2811, + "epoch": 0.42, + "grad_norm": 0.47319866934921134, + "learning_rate": 1.3042867796558338e-05, + "loss": 0.298, "step": 9133 }, { - "epoch": 0.52, - "grad_norm": 1.1275659444191837, - "learning_rate": 9.683697623472387e-06, - "loss": 0.6108, + "epoch": 0.42, + "grad_norm": 0.4806836273199474, + "learning_rate": 1.3041450400081901e-05, + "loss": 0.2924, "step": 9134 }, { - "epoch": 0.52, - "grad_norm": 0.4722264674934552, - "learning_rate": 9.681837647486912e-06, - "loss": 0.2835, + "epoch": 0.42, + "grad_norm": 0.3712659723457226, + "learning_rate": 1.3040032936270683e-05, + "loss": 0.309, "step": 9135 }, { - "epoch": 0.52, - "grad_norm": 0.3269246466042898, - "learning_rate": 9.679977682519385e-06, - "loss": 0.2699, + "epoch": 0.42, + "grad_norm": 0.23668364509236967, + "learning_rate": 1.3038615405156066e-05, + "loss": 0.1738, "step": 9136 }, { - "epoch": 0.52, - "grad_norm": 1.6250715059284526, - "learning_rate": 9.678117728634217e-06, - "loss": 0.6513, + "epoch": 0.42, + "grad_norm": 1.3518357107034507, + "learning_rate": 1.3037197806769429e-05, + "loss": 0.7925, "step": 9137 }, { - "epoch": 0.53, - "grad_norm": 0.23143067024166109, - "learning_rate": 9.676257785895817e-06, - "loss": 0.134, + "epoch": 0.42, + "grad_norm": 1.1133863622803866, + "learning_rate": 1.3035780141142164e-05, + "loss": 0.6159, "step": 9138 }, { - "epoch": 0.53, - "grad_norm": 0.39965635338532934, - "learning_rate": 9.674397854368598e-06, - "loss": 0.2694, + "epoch": 0.42, + "grad_norm": 0.2867399540529521, + "learning_rate": 1.303436240830565e-05, + "loss": 0.2244, "step": 9139 }, { - "epoch": 0.53, - "grad_norm": 0.3096227242107919, - "learning_rate": 9.672537934116966e-06, - "loss": 0.3045, + "epoch": 0.42, + "grad_norm": 0.7665825500675789, + "learning_rate": 1.3032944608291279e-05, + "loss": 0.4874, "step": 9140 }, { - "epoch": 0.53, - "grad_norm": 0.5551639828567845, - "learning_rate": 9.670678025205332e-06, - "loss": 0.3214, + "epoch": 0.42, + "grad_norm": 0.34021334321100616, + "learning_rate": 1.3031526741130435e-05, + "loss": 0.2393, "step": 9141 }, { - "epoch": 0.53, - "grad_norm": 0.3793778776155998, - "learning_rate": 9.668818127698103e-06, - "loss": 0.2593, + "epoch": 0.42, + "grad_norm": 0.3753465199102772, + "learning_rate": 1.3030108806854516e-05, + "loss": 0.2207, "step": 9142 }, { - "epoch": 0.53, - "grad_norm": 0.3052842446630093, - "learning_rate": 9.66695824165969e-06, - "loss": 0.2919, + "epoch": 0.42, + "grad_norm": 0.41536192194990573, + "learning_rate": 1.3028690805494901e-05, + "loss": 0.3207, "step": 9143 }, { - "epoch": 0.53, - "grad_norm": 0.3974048517058976, - "learning_rate": 9.665098367154496e-06, - "loss": 0.2662, + "epoch": 0.42, + "grad_norm": 0.9797439368270568, + "learning_rate": 1.3027272737082997e-05, + "loss": 0.6143, "step": 9144 }, { - "epoch": 0.53, - "grad_norm": 0.30419717373936883, - "learning_rate": 9.663238504246933e-06, - "loss": 0.2007, + "epoch": 0.42, + "grad_norm": 0.327025625133217, + "learning_rate": 1.3025854601650187e-05, + "loss": 0.2013, "step": 9145 }, { - "epoch": 0.53, - "grad_norm": 0.3827763477642118, - "learning_rate": 9.661378653001404e-06, - "loss": 0.2769, + "epoch": 0.42, + "grad_norm": 0.6735357941268292, + "learning_rate": 1.3024436399227877e-05, + "loss": 0.4301, "step": 9146 }, { - "epoch": 0.53, - "grad_norm": 0.3473256096785234, - "learning_rate": 9.65951881348232e-06, - "loss": 0.3079, + "epoch": 0.42, + "grad_norm": 0.2586462509406168, + "learning_rate": 1.3023018129847459e-05, + "loss": 0.1995, "step": 9147 }, { - "epoch": 0.53, - "grad_norm": 0.3339712098319444, - "learning_rate": 9.657658985754085e-06, - "loss": 0.162, + "epoch": 0.42, + "grad_norm": 0.6068403340114009, + "learning_rate": 1.3021599793540335e-05, + "loss": 0.3291, "step": 9148 }, { - "epoch": 0.53, - "grad_norm": 0.507041220843939, - "learning_rate": 9.655799169881103e-06, - "loss": 0.3754, + "epoch": 0.42, + "grad_norm": 0.4117029347481617, + "learning_rate": 1.30201813903379e-05, + "loss": 0.2746, "step": 9149 }, { - "epoch": 0.53, - "grad_norm": 1.3073406626204165, - "learning_rate": 9.653939365927785e-06, - "loss": 0.7838, + "epoch": 0.42, + "grad_norm": 0.39771834131571226, + "learning_rate": 1.3018762920271559e-05, + "loss": 0.3067, "step": 9150 }, { - "epoch": 0.53, - "grad_norm": 0.28633539371312033, - "learning_rate": 9.652079573958529e-06, - "loss": 0.2202, + "epoch": 0.42, + "grad_norm": 0.5819094996013092, + "learning_rate": 1.3017344383372721e-05, + "loss": 0.3505, "step": 9151 }, { - "epoch": 0.53, - "grad_norm": 0.3691162201113786, - "learning_rate": 9.650219794037741e-06, - "loss": 0.238, + "epoch": 0.42, + "grad_norm": 0.39513389438569263, + "learning_rate": 1.3015925779672784e-05, + "loss": 0.2916, "step": 9152 }, { - "epoch": 0.53, - "grad_norm": 0.41623336946895095, - "learning_rate": 9.648360026229828e-06, - "loss": 0.3067, + "epoch": 0.42, + "grad_norm": 0.5005286404328148, + "learning_rate": 1.301450710920316e-05, + "loss": 0.2897, "step": 9153 }, { - "epoch": 0.53, - "grad_norm": 0.35646845738442257, - "learning_rate": 9.646500270599191e-06, - "loss": 0.2647, + "epoch": 0.42, + "grad_norm": 0.424679186075288, + "learning_rate": 1.301308837199525e-05, + "loss": 0.3035, "step": 9154 }, { - "epoch": 0.53, - "grad_norm": 0.28283345328135584, - "learning_rate": 9.644640527210235e-06, - "loss": 0.2443, + "epoch": 0.42, + "grad_norm": 0.27621476396891415, + "learning_rate": 1.3011669568080469e-05, + "loss": 0.2136, "step": 9155 }, { - "epoch": 0.53, - "grad_norm": 1.1947336670116657, - "learning_rate": 9.642780796127362e-06, - "loss": 0.6928, + "epoch": 0.42, + "grad_norm": 1.309873643197906, + "learning_rate": 1.3010250697490225e-05, + "loss": 0.7549, "step": 9156 }, { - "epoch": 0.53, - "grad_norm": 0.5466204845370135, - "learning_rate": 9.640921077414975e-06, - "loss": 0.3285, + "epoch": 0.42, + "grad_norm": 0.524616900068898, + "learning_rate": 1.3008831760255933e-05, + "loss": 0.3012, "step": 9157 }, { - "epoch": 0.53, - "grad_norm": 0.35102520321136116, - "learning_rate": 9.639061371137475e-06, - "loss": 0.2716, + "epoch": 0.42, + "grad_norm": 0.49302414551542845, + "learning_rate": 1.3007412756409009e-05, + "loss": 0.285, "step": 9158 }, { - "epoch": 0.53, - "grad_norm": 0.24634222677994205, - "learning_rate": 9.637201677359266e-06, - "loss": 0.2251, + "epoch": 0.42, + "grad_norm": 0.4065408395581251, + "learning_rate": 1.3005993685980862e-05, + "loss": 0.3263, "step": 9159 }, { - "epoch": 0.53, - "grad_norm": 0.5712291239043696, - "learning_rate": 9.635341996144747e-06, - "loss": 0.3271, + "epoch": 0.42, + "grad_norm": 0.4202475079823806, + "learning_rate": 1.300457454900291e-05, + "loss": 0.2773, "step": 9160 }, { - "epoch": 0.53, - "grad_norm": 0.34496339742721027, - "learning_rate": 9.633482327558316e-06, - "loss": 0.2331, + "epoch": 0.42, + "grad_norm": 0.5290986590331028, + "learning_rate": 1.3003155345506575e-05, + "loss": 0.3457, "step": 9161 }, { - "epoch": 0.53, - "grad_norm": 0.8177903308392893, - "learning_rate": 9.63162267166438e-06, - "loss": 0.6087, + "epoch": 0.42, + "grad_norm": 0.2791501826422814, + "learning_rate": 1.3001736075523277e-05, + "loss": 0.1966, "step": 9162 }, { - "epoch": 0.53, - "grad_norm": 0.3266062325787849, - "learning_rate": 9.629763028527332e-06, - "loss": 0.2651, + "epoch": 0.42, + "grad_norm": 0.5913920665516836, + "learning_rate": 1.3000316739084433e-05, + "loss": 0.3711, "step": 9163 }, { - "epoch": 0.53, - "grad_norm": 0.42567311427029086, - "learning_rate": 9.627903398211577e-06, - "loss": 0.2801, + "epoch": 0.42, + "grad_norm": 0.43271288303366157, + "learning_rate": 1.299889733622147e-05, + "loss": 0.3207, "step": 9164 }, { - "epoch": 0.53, - "grad_norm": 0.3037908672972681, - "learning_rate": 9.626043780781508e-06, - "loss": 0.1905, + "epoch": 0.42, + "grad_norm": 0.895806232506536, + "learning_rate": 1.299747786696581e-05, + "loss": 0.4164, "step": 9165 }, { - "epoch": 0.53, - "grad_norm": 0.623542999401843, - "learning_rate": 9.62418417630153e-06, - "loss": 0.3826, + "epoch": 0.42, + "grad_norm": 0.7309719822108601, + "learning_rate": 1.299605833134888e-05, + "loss": 0.3862, "step": 9166 }, { - "epoch": 0.53, - "grad_norm": 0.31378311285481736, - "learning_rate": 9.622324584836036e-06, - "loss": 0.2816, + "epoch": 0.42, + "grad_norm": 0.3249870916905667, + "learning_rate": 1.2994638729402102e-05, + "loss": 0.3045, "step": 9167 }, { - "epoch": 0.53, - "grad_norm": 1.04329856978667, - "learning_rate": 9.620465006449427e-06, - "loss": 0.4929, + "epoch": 0.42, + "grad_norm": 0.35519963442672736, + "learning_rate": 1.2993219061156914e-05, + "loss": 0.2087, "step": 9168 }, { - "epoch": 0.53, - "grad_norm": 0.6656212816613781, - "learning_rate": 9.618605441206098e-06, - "loss": 0.3594, + "epoch": 0.42, + "grad_norm": 0.6788800371377359, + "learning_rate": 1.2991799326644736e-05, + "loss": 0.3557, "step": 9169 }, { - "epoch": 0.53, - "grad_norm": 0.3644528569994132, - "learning_rate": 9.616745889170446e-06, - "loss": 0.3054, + "epoch": 0.42, + "grad_norm": 0.38044301789460366, + "learning_rate": 1.299037952589701e-05, + "loss": 0.2897, "step": 9170 }, { - "epoch": 0.53, - "grad_norm": 0.1996589631527687, - "learning_rate": 9.614886350406865e-06, - "loss": 0.1666, + "epoch": 0.42, + "grad_norm": 0.5125383907599383, + "learning_rate": 1.298895965894516e-05, + "loss": 0.2873, "step": 9171 }, { - "epoch": 0.53, - "grad_norm": 0.3726302590229509, - "learning_rate": 9.613026824979757e-06, - "loss": 0.2799, + "epoch": 0.42, + "grad_norm": 0.6207478436073067, + "learning_rate": 1.2987539725820624e-05, + "loss": 0.3831, "step": 9172 }, { - "epoch": 0.53, - "grad_norm": 0.5874746934630642, - "learning_rate": 9.61116731295351e-06, - "loss": 0.3598, + "epoch": 0.42, + "grad_norm": 0.3220543021453989, + "learning_rate": 1.2986119726554836e-05, + "loss": 0.2414, "step": 9173 }, { - "epoch": 0.53, - "grad_norm": 0.4754181343382663, - "learning_rate": 9.609307814392525e-06, - "loss": 0.3157, + "epoch": 0.42, + "grad_norm": 0.5422675052890263, + "learning_rate": 1.2984699661179238e-05, + "loss": 0.3747, "step": 9174 }, { - "epoch": 0.53, - "grad_norm": 0.3358430754062846, - "learning_rate": 9.607448329361193e-06, - "loss": 0.2765, + "epoch": 0.42, + "grad_norm": 0.3314111570386824, + "learning_rate": 1.2983279529725268e-05, + "loss": 0.2078, "step": 9175 }, { - "epoch": 0.53, - "grad_norm": 0.368547482274709, - "learning_rate": 9.605588857923906e-06, - "loss": 0.3226, + "epoch": 0.42, + "grad_norm": 0.4257128289906499, + "learning_rate": 1.2981859332224362e-05, + "loss": 0.3167, "step": 9176 }, { - "epoch": 0.53, - "grad_norm": 0.24855846818078145, - "learning_rate": 9.603729400145063e-06, - "loss": 0.1304, + "epoch": 0.42, + "grad_norm": 0.9338535350838162, + "learning_rate": 1.2980439068707964e-05, + "loss": 0.5831, "step": 9177 }, { - "epoch": 0.53, - "grad_norm": 0.8221257736419765, - "learning_rate": 9.601869956089051e-06, - "loss": 0.3965, + "epoch": 0.42, + "grad_norm": 0.321498461574755, + "learning_rate": 1.2979018739207518e-05, + "loss": 0.2257, "step": 9178 }, { - "epoch": 0.53, - "grad_norm": 0.2844103771443083, - "learning_rate": 9.60001052582027e-06, - "loss": 0.2756, + "epoch": 0.42, + "grad_norm": 0.39523796936286, + "learning_rate": 1.297759834375447e-05, + "loss": 0.3283, "step": 9179 }, { - "epoch": 0.53, - "grad_norm": 0.5660900591073189, - "learning_rate": 9.598151109403102e-06, - "loss": 0.4276, + "epoch": 0.42, + "grad_norm": 0.6338557855334457, + "learning_rate": 1.297617788238026e-05, + "loss": 0.4115, "step": 9180 }, { - "epoch": 0.53, - "grad_norm": 0.4704157923383228, - "learning_rate": 9.596291706901946e-06, - "loss": 0.2179, + "epoch": 0.42, + "grad_norm": 0.21045759130221833, + "learning_rate": 1.2974757355116344e-05, + "loss": 0.0737, "step": 9181 }, { - "epoch": 0.53, - "grad_norm": 0.4820397687697669, - "learning_rate": 9.59443231838119e-06, - "loss": 0.3491, + "epoch": 0.42, + "grad_norm": 0.3887189494908164, + "learning_rate": 1.2973336761994168e-05, + "loss": 0.2888, "step": 9182 }, { - "epoch": 0.53, - "grad_norm": 0.38983163889496963, - "learning_rate": 9.59257294390523e-06, - "loss": 0.3285, + "epoch": 0.42, + "grad_norm": 0.5567940460733649, + "learning_rate": 1.297191610304518e-05, + "loss": 0.3766, "step": 9183 }, { - "epoch": 0.53, - "grad_norm": 0.2131714527848812, - "learning_rate": 9.59071358353845e-06, - "loss": 0.1013, + "epoch": 0.42, + "grad_norm": 0.4884782477352658, + "learning_rate": 1.2970495378300834e-05, + "loss": 0.2632, "step": 9184 }, { - "epoch": 0.53, - "grad_norm": 0.3957734107129791, - "learning_rate": 9.588854237345238e-06, - "loss": 0.3476, + "epoch": 0.42, + "grad_norm": 0.38951540177713306, + "learning_rate": 1.2969074587792583e-05, + "loss": 0.3208, "step": 9185 }, { - "epoch": 0.53, - "grad_norm": 0.9320135709989978, - "learning_rate": 9.586994905389985e-06, - "loss": 0.4693, + "epoch": 0.42, + "grad_norm": 0.3699008799668862, + "learning_rate": 1.2967653731551881e-05, + "loss": 0.3473, "step": 9186 }, { - "epoch": 0.53, - "grad_norm": 0.3124984898779434, - "learning_rate": 9.585135587737085e-06, - "loss": 0.2266, + "epoch": 0.42, + "grad_norm": 0.4470966365802915, + "learning_rate": 1.2966232809610189e-05, + "loss": 0.2744, "step": 9187 }, { - "epoch": 0.53, - "grad_norm": 0.45270701652538803, - "learning_rate": 9.583276284450917e-06, - "loss": 0.3355, + "epoch": 0.42, + "grad_norm": 0.26081461929945565, + "learning_rate": 1.2964811821998961e-05, + "loss": 0.1676, "step": 9188 }, { - "epoch": 0.53, - "grad_norm": 0.44931577222450564, - "learning_rate": 9.581416995595877e-06, - "loss": 0.2828, + "epoch": 0.42, + "grad_norm": 1.8002601435944738, + "learning_rate": 1.2963390768749655e-05, + "loss": 0.8674, "step": 9189 }, { - "epoch": 0.53, - "grad_norm": 0.23532191378371217, - "learning_rate": 9.579557721236345e-06, - "loss": 0.1269, + "epoch": 0.42, + "grad_norm": 0.3572184913664882, + "learning_rate": 1.2961969649893732e-05, + "loss": 0.3244, "step": 9190 }, { - "epoch": 0.53, - "grad_norm": 0.34700214444362804, - "learning_rate": 9.577698461436715e-06, - "loss": 0.2821, + "epoch": 0.42, + "grad_norm": 0.35352476648188086, + "learning_rate": 1.2960548465462658e-05, + "loss": 0.2573, "step": 9191 }, { - "epoch": 0.53, - "grad_norm": 1.1069957482466108, - "learning_rate": 9.575839216261366e-06, - "loss": 0.4473, + "epoch": 0.42, + "grad_norm": 0.7518776254676474, + "learning_rate": 1.2959127215487894e-05, + "loss": 0.5121, "step": 9192 }, { - "epoch": 0.53, - "grad_norm": 0.539025147105325, - "learning_rate": 9.573979985774689e-06, - "loss": 0.3448, + "epoch": 0.42, + "grad_norm": 0.28183382216949426, + "learning_rate": 1.2957705900000907e-05, + "loss": 0.1672, "step": 9193 }, { - "epoch": 0.53, - "grad_norm": 0.4325392202671178, - "learning_rate": 9.572120770041065e-06, - "loss": 0.2551, + "epoch": 0.42, + "grad_norm": 0.3042201610839729, + "learning_rate": 1.2956284519033165e-05, + "loss": 0.2327, "step": 9194 }, { - "epoch": 0.53, - "grad_norm": 0.37278555312821754, - "learning_rate": 9.570261569124882e-06, - "loss": 0.3115, + "epoch": 0.42, + "grad_norm": 0.4945094068591413, + "learning_rate": 1.2954863072616127e-05, + "loss": 0.4178, "step": 9195 }, { - "epoch": 0.53, - "grad_norm": 0.31715591922240705, - "learning_rate": 9.568402383090519e-06, - "loss": 0.1692, + "epoch": 0.42, + "grad_norm": 0.5972086658647482, + "learning_rate": 1.295344156078127e-05, + "loss": 0.4252, "step": 9196 }, { - "epoch": 0.53, - "grad_norm": 0.3931850717706773, - "learning_rate": 9.566543212002365e-06, - "loss": 0.2381, + "epoch": 0.42, + "grad_norm": 0.38583420616495356, + "learning_rate": 1.2952019983560062e-05, + "loss": 0.2876, "step": 9197 }, { - "epoch": 0.53, - "grad_norm": 0.4006785161291414, - "learning_rate": 9.564684055924801e-06, - "loss": 0.3082, + "epoch": 0.42, + "grad_norm": 0.34613346335715556, + "learning_rate": 1.295059834098398e-05, + "loss": 0.2926, "step": 9198 }, { - "epoch": 0.53, - "grad_norm": 0.9563471812307568, - "learning_rate": 9.562824914922211e-06, - "loss": 0.6652, + "epoch": 0.42, + "grad_norm": 0.3269169500766363, + "learning_rate": 1.2949176633084494e-05, + "loss": 0.1794, "step": 9199 }, { - "epoch": 0.53, - "grad_norm": 0.29294573478695535, - "learning_rate": 9.560965789058975e-06, - "loss": 0.2166, + "epoch": 0.42, + "grad_norm": 0.3320053570230296, + "learning_rate": 1.294775485989308e-05, + "loss": 0.2315, "step": 9200 }, { - "epoch": 0.53, - "grad_norm": 1.056246726061743, - "learning_rate": 9.559106678399473e-06, - "loss": 0.5694, + "epoch": 0.42, + "grad_norm": 0.9209511929468019, + "learning_rate": 1.2946333021441211e-05, + "loss": 0.3717, "step": 9201 }, { - "epoch": 0.53, - "grad_norm": 0.27936225502157397, - "learning_rate": 9.55724758300809e-06, - "loss": 0.1959, + "epoch": 0.42, + "grad_norm": 0.35655274450370195, + "learning_rate": 1.2944911117760372e-05, + "loss": 0.2979, "step": 9202 }, { - "epoch": 0.53, - "grad_norm": 0.3162580378410517, - "learning_rate": 9.555388502949201e-06, - "loss": 0.2596, + "epoch": 0.42, + "grad_norm": 0.33188512255103086, + "learning_rate": 1.2943489148882038e-05, + "loss": 0.277, "step": 9203 }, { - "epoch": 0.53, - "grad_norm": 0.6572106044140896, - "learning_rate": 9.553529438287192e-06, - "loss": 0.4197, + "epoch": 0.42, + "grad_norm": 0.6369611405567255, + "learning_rate": 1.294206711483769e-05, + "loss": 0.3754, "step": 9204 }, { - "epoch": 0.53, - "grad_norm": 0.8055700596054751, - "learning_rate": 9.551670389086438e-06, - "loss": 0.5843, + "epoch": 0.42, + "grad_norm": 0.26858314289529717, + "learning_rate": 1.2940645015658814e-05, + "loss": 0.1571, "step": 9205 }, { - "epoch": 0.53, - "grad_norm": 0.372228470556821, - "learning_rate": 9.54981135541132e-06, - "loss": 0.2797, + "epoch": 0.42, + "grad_norm": 0.459168709094042, + "learning_rate": 1.2939222851376891e-05, + "loss": 0.2715, "step": 9206 }, { - "epoch": 0.53, - "grad_norm": 0.3561420751933189, - "learning_rate": 9.547952337326214e-06, - "loss": 0.2531, + "epoch": 0.42, + "grad_norm": 0.435507454972667, + "learning_rate": 1.2937800622023407e-05, + "loss": 0.2786, "step": 9207 }, { - "epoch": 0.53, - "grad_norm": 0.37148428650642595, - "learning_rate": 9.546093334895498e-06, - "loss": 0.2567, + "epoch": 0.42, + "grad_norm": 0.6383914352932758, + "learning_rate": 1.2936378327629849e-05, + "loss": 0.4205, "step": 9208 }, { - "epoch": 0.53, - "grad_norm": 0.36363796098136053, - "learning_rate": 9.544234348183553e-06, - "loss": 0.279, + "epoch": 0.42, + "grad_norm": 0.3834138532187214, + "learning_rate": 1.2934955968227705e-05, + "loss": 0.335, "step": 9209 }, { - "epoch": 0.53, - "grad_norm": 0.2959650585604881, - "learning_rate": 9.542375377254753e-06, - "loss": 0.238, + "epoch": 0.42, + "grad_norm": 0.37856879915810826, + "learning_rate": 1.2933533543848462e-05, + "loss": 0.3254, "step": 9210 }, { - "epoch": 0.53, - "grad_norm": 0.6060688718356312, - "learning_rate": 9.54051642217347e-06, - "loss": 0.399, + "epoch": 0.42, + "grad_norm": 0.2764526077080524, + "learning_rate": 1.2932111054523615e-05, + "loss": 0.1516, "step": 9211 }, { - "epoch": 0.53, - "grad_norm": 0.3963007962854568, - "learning_rate": 9.538657483004088e-06, - "loss": 0.3126, + "epoch": 0.42, + "grad_norm": 0.28489638212999746, + "learning_rate": 1.2930688500284659e-05, + "loss": 0.2403, "step": 9212 }, { - "epoch": 0.53, - "grad_norm": 0.7595275160558781, - "learning_rate": 9.536798559810978e-06, - "loss": 0.3205, + "epoch": 0.42, + "grad_norm": 0.8867247900480723, + "learning_rate": 1.292926588116308e-05, + "loss": 0.5108, "step": 9213 }, { - "epoch": 0.53, - "grad_norm": 0.3413631598251066, - "learning_rate": 9.53493965265851e-06, - "loss": 0.3021, + "epoch": 0.42, + "grad_norm": 0.3365305180228177, + "learning_rate": 1.2927843197190377e-05, + "loss": 0.2747, "step": 9214 }, { - "epoch": 0.53, - "grad_norm": 0.327046762074509, - "learning_rate": 9.533080761611066e-06, - "loss": 0.2814, + "epoch": 0.42, + "grad_norm": 0.38319802896539457, + "learning_rate": 1.2926420448398051e-05, + "loss": 0.2953, "step": 9215 }, { - "epoch": 0.53, - "grad_norm": 0.21901892281945962, - "learning_rate": 9.53122188673301e-06, - "loss": 0.1508, + "epoch": 0.42, + "grad_norm": 1.0872457181583672, + "learning_rate": 1.2924997634817593e-05, + "loss": 0.7089, "step": 9216 }, { - "epoch": 0.53, - "grad_norm": 1.0974836030044757, - "learning_rate": 9.529363028088725e-06, - "loss": 0.7183, + "epoch": 0.42, + "grad_norm": 0.27469806576565337, + "learning_rate": 1.2923574756480512e-05, + "loss": 0.1937, "step": 9217 }, { - "epoch": 0.53, - "grad_norm": 0.34926035459810145, - "learning_rate": 9.52750418574258e-06, - "loss": 0.265, + "epoch": 0.42, + "grad_norm": 0.36918583653454734, + "learning_rate": 1.2922151813418298e-05, + "loss": 0.2883, "step": 9218 }, { - "epoch": 0.53, - "grad_norm": 0.3801483295500776, - "learning_rate": 9.525645359758939e-06, - "loss": 0.3289, + "epoch": 0.42, + "grad_norm": 0.4498237502137872, + "learning_rate": 1.2920728805662462e-05, + "loss": 0.2928, "step": 9219 }, { - "epoch": 0.53, - "grad_norm": 1.1846574314137546, - "learning_rate": 9.523786550202182e-06, - "loss": 0.3465, + "epoch": 0.42, + "grad_norm": 1.0715695875812654, + "learning_rate": 1.2919305733244503e-05, + "loss": 0.3994, "step": 9220 }, { - "epoch": 0.53, - "grad_norm": 0.35928822346783335, - "learning_rate": 9.521927757136673e-06, - "loss": 0.2593, + "epoch": 0.42, + "grad_norm": 0.36717051230277065, + "learning_rate": 1.2917882596195932e-05, + "loss": 0.2651, "step": 9221 }, { - "epoch": 0.53, - "grad_norm": 0.25623013450271037, - "learning_rate": 9.520068980626789e-06, - "loss": 0.1978, + "epoch": 0.42, + "grad_norm": 0.38108542109729615, + "learning_rate": 1.291645939454825e-05, + "loss": 0.3117, "step": 9222 }, { - "epoch": 0.53, - "grad_norm": 0.37505431562660235, - "learning_rate": 9.518210220736892e-06, - "loss": 0.2664, + "epoch": 0.42, + "grad_norm": 1.3081138690257212, + "learning_rate": 1.2915036128332972e-05, + "loss": 0.6199, "step": 9223 }, { - "epoch": 0.53, - "grad_norm": 0.37891600500710415, - "learning_rate": 9.516351477531357e-06, - "loss": 0.2994, + "epoch": 0.42, + "grad_norm": 0.28556975276899965, + "learning_rate": 1.29136127975816e-05, + "loss": 0.1705, "step": 9224 }, { - "epoch": 0.53, - "grad_norm": 0.9228496278274424, - "learning_rate": 9.51449275107455e-06, - "loss": 0.4677, + "epoch": 0.42, + "grad_norm": 0.7478664881648842, + "learning_rate": 1.2912189402325647e-05, + "loss": 0.3451, "step": 9225 }, { - "epoch": 0.53, - "grad_norm": 0.3824480382733829, - "learning_rate": 9.512634041430835e-06, - "loss": 0.2635, + "epoch": 0.42, + "grad_norm": 0.40576814872766537, + "learning_rate": 1.2910765942596632e-05, + "loss": 0.3509, "step": 9226 }, { - "epoch": 0.53, - "grad_norm": 0.41175544519260576, - "learning_rate": 9.510775348664584e-06, - "loss": 0.3034, + "epoch": 0.42, + "grad_norm": 0.31359612060460024, + "learning_rate": 1.2909342418426062e-05, + "loss": 0.2196, "step": 9227 }, { - "epoch": 0.53, - "grad_norm": 0.26939742805578254, - "learning_rate": 9.508916672840161e-06, - "loss": 0.1751, + "epoch": 0.42, + "grad_norm": 0.9812326793123388, + "learning_rate": 1.2907918829845456e-05, + "loss": 0.6035, "step": 9228 }, { - "epoch": 0.53, - "grad_norm": 1.172159570979836, - "learning_rate": 9.507058014021933e-06, - "loss": 0.5465, + "epoch": 0.42, + "grad_norm": 0.49906772845363845, + "learning_rate": 1.290649517688633e-05, + "loss": 0.3985, "step": 9229 }, { - "epoch": 0.53, - "grad_norm": 0.35814475799141565, - "learning_rate": 9.505199372274264e-06, - "loss": 0.27, + "epoch": 0.42, + "grad_norm": 0.2602044477218655, + "learning_rate": 1.2905071459580201e-05, + "loss": 0.1951, "step": 9230 }, { - "epoch": 0.53, - "grad_norm": 0.43551068922587277, - "learning_rate": 9.50334074766152e-06, - "loss": 0.3128, + "epoch": 0.42, + "grad_norm": 0.7006466237762627, + "learning_rate": 1.2903647677958588e-05, + "loss": 0.4565, "step": 9231 }, { - "epoch": 0.53, - "grad_norm": 0.7961712293430698, - "learning_rate": 9.501482140248064e-06, - "loss": 0.4733, + "epoch": 0.42, + "grad_norm": 0.42734410659168814, + "learning_rate": 1.2902223832053018e-05, + "loss": 0.3224, "step": 9232 }, { - "epoch": 0.53, - "grad_norm": 0.3121898207774114, - "learning_rate": 9.499623550098262e-06, - "loss": 0.1957, + "epoch": 0.42, + "grad_norm": 0.36413259084467886, + "learning_rate": 1.2900799921895004e-05, + "loss": 0.1991, "step": 9233 }, { - "epoch": 0.53, - "grad_norm": 0.2227896224663921, - "learning_rate": 9.497764977276473e-06, - "loss": 0.2213, + "epoch": 0.42, + "grad_norm": 0.4079955549655896, + "learning_rate": 1.2899375947516082e-05, + "loss": 0.3517, "step": 9234 }, { - "epoch": 0.53, - "grad_norm": 1.5631757758484857, - "learning_rate": 9.495906421847063e-06, - "loss": 0.792, + "epoch": 0.42, + "grad_norm": 0.9609102035568737, + "learning_rate": 1.2897951908947768e-05, + "loss": 0.5279, "step": 9235 }, { - "epoch": 0.53, - "grad_norm": 0.3107139886169333, - "learning_rate": 9.49404788387439e-06, - "loss": 0.2195, + "epoch": 0.42, + "grad_norm": 0.44847967024400237, + "learning_rate": 1.2896527806221592e-05, + "loss": 0.2955, "step": 9236 }, { - "epoch": 0.53, - "grad_norm": 0.7244261519601902, - "learning_rate": 9.492189363422819e-06, - "loss": 0.4441, + "epoch": 0.42, + "grad_norm": 0.341410058789977, + "learning_rate": 1.2895103639369083e-05, + "loss": 0.2861, "step": 9237 }, { - "epoch": 0.53, - "grad_norm": 0.3605582089400721, - "learning_rate": 9.490330860556707e-06, - "loss": 0.3125, + "epoch": 0.42, + "grad_norm": 0.33387151505557394, + "learning_rate": 1.2893679408421766e-05, + "loss": 0.2516, "step": 9238 }, { - "epoch": 0.53, - "grad_norm": 0.2873919445850608, - "learning_rate": 9.488472375340417e-06, - "loss": 0.2106, + "epoch": 0.42, + "grad_norm": 0.4938747131041917, + "learning_rate": 1.2892255113411181e-05, + "loss": 0.3239, "step": 9239 }, { - "epoch": 0.53, - "grad_norm": 0.39888390668387463, - "learning_rate": 9.486613907838306e-06, - "loss": 0.2769, + "epoch": 0.42, + "grad_norm": 0.35092957640965905, + "learning_rate": 1.2890830754368855e-05, + "loss": 0.1915, "step": 9240 }, { - "epoch": 0.53, - "grad_norm": 0.4867865905958677, - "learning_rate": 9.484755458114732e-06, - "loss": 0.3455, + "epoch": 0.42, + "grad_norm": 0.4552596237003159, + "learning_rate": 1.288940633132632e-05, + "loss": 0.3607, "step": 9241 }, { - "epoch": 0.53, - "grad_norm": 0.310940342891282, - "learning_rate": 9.482897026234056e-06, - "loss": 0.2665, + "epoch": 0.42, + "grad_norm": 0.3384308263839799, + "learning_rate": 1.2887981844315114e-05, + "loss": 0.2652, "step": 9242 }, { - "epoch": 0.53, - "grad_norm": 0.501526397698931, - "learning_rate": 9.48103861226063e-06, - "loss": 0.3035, + "epoch": 0.42, + "grad_norm": 0.7182167582623974, + "learning_rate": 1.2886557293366773e-05, + "loss": 0.3612, "step": 9243 }, { - "epoch": 0.53, - "grad_norm": 0.7521124857164626, - "learning_rate": 9.47918021625882e-06, - "loss": 0.5031, + "epoch": 0.42, + "grad_norm": 0.42771286647029166, + "learning_rate": 1.2885132678512834e-05, + "loss": 0.3032, "step": 9244 }, { - "epoch": 0.53, - "grad_norm": 0.393541576127973, - "learning_rate": 9.477321838292972e-06, + "epoch": 0.42, + "grad_norm": 0.35597363157577383, + "learning_rate": 1.288370799978484e-05, "loss": 0.279, "step": 9245 }, { - "epoch": 0.53, - "grad_norm": 0.3212033534001573, - "learning_rate": 9.475463478427451e-06, - "loss": 0.2417, + "epoch": 0.42, + "grad_norm": 0.2781024329452538, + "learning_rate": 1.2882283257214332e-05, + "loss": 0.2105, "step": 9246 }, { - "epoch": 0.53, - "grad_norm": 0.7874596505559587, - "learning_rate": 9.473605136726602e-06, - "loss": 0.6043, + "epoch": 0.42, + "grad_norm": 0.6965117482475031, + "learning_rate": 1.288085845083285e-05, + "loss": 0.4606, "step": 9247 }, { - "epoch": 0.53, - "grad_norm": 0.3961777779705941, - "learning_rate": 9.471746813254788e-06, - "loss": 0.2912, + "epoch": 0.42, + "grad_norm": 0.31677890119118707, + "learning_rate": 1.2879433580671937e-05, + "loss": 0.287, "step": 9248 }, { - "epoch": 0.53, - "grad_norm": 0.27725278619850396, - "learning_rate": 9.469888508076357e-06, - "loss": 0.1882, + "epoch": 0.42, + "grad_norm": 0.7265120111951784, + "learning_rate": 1.287800864676314e-05, + "loss": 0.4457, "step": 9249 }, { - "epoch": 0.53, - "grad_norm": 0.3362006708506701, - "learning_rate": 9.468030221255667e-06, - "loss": 0.3169, + "epoch": 0.42, + "grad_norm": 0.3513048664155185, + "learning_rate": 1.2876583649138005e-05, + "loss": 0.2815, "step": 9250 }, { - "epoch": 0.53, - "grad_norm": 0.38625457503857924, - "learning_rate": 9.46617195285707e-06, - "loss": 0.2824, + "epoch": 0.42, + "grad_norm": 0.4569076711507936, + "learning_rate": 1.2875158587828082e-05, + "loss": 0.2611, "step": 9251 }, { - "epoch": 0.53, - "grad_norm": 0.4559670161202051, - "learning_rate": 9.464313702944912e-06, - "loss": 0.272, + "epoch": 0.43, + "grad_norm": 0.2718861630962785, + "learning_rate": 1.2873733462864919e-05, + "loss": 0.2011, "step": 9252 }, { - "epoch": 0.53, - "grad_norm": 0.8027237790991724, - "learning_rate": 9.462455471583545e-06, - "loss": 0.5867, + "epoch": 0.43, + "grad_norm": 0.41604911877923323, + "learning_rate": 1.2872308274280067e-05, + "loss": 0.2941, "step": 9253 }, { - "epoch": 0.53, - "grad_norm": 0.2779288614551335, - "learning_rate": 9.460597258837325e-06, - "loss": 0.2458, + "epoch": 0.43, + "grad_norm": 0.4634007956067311, + "learning_rate": 1.2870883022105079e-05, + "loss": 0.3283, "step": 9254 }, { - "epoch": 0.53, - "grad_norm": 0.43085408967342126, - "learning_rate": 9.458739064770595e-06, - "loss": 0.351, + "epoch": 0.43, + "grad_norm": 0.5334835499328268, + "learning_rate": 1.2869457706371503e-05, + "loss": 0.4032, "step": 9255 }, { - "epoch": 0.53, - "grad_norm": 0.22237328741189238, - "learning_rate": 9.456880889447712e-06, - "loss": 0.093, + "epoch": 0.43, + "grad_norm": 1.0337936929917264, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.5082, "step": 9256 }, { - "epoch": 0.53, - "grad_norm": 0.3228147343712859, - "learning_rate": 9.455022732933017e-06, - "loss": 0.2727, + "epoch": 0.43, + "grad_norm": 0.4497371917837551, + "learning_rate": 1.2866606884354831e-05, + "loss": 0.3157, "step": 9257 }, { - "epoch": 0.53, - "grad_norm": 0.4869726882315915, - "learning_rate": 9.453164595290865e-06, - "loss": 0.3442, + "epoch": 0.43, + "grad_norm": 0.2698418805435096, + "learning_rate": 1.2865181378134845e-05, + "loss": 0.2238, "step": 9258 }, { - "epoch": 0.53, - "grad_norm": 0.4765999794871249, - "learning_rate": 9.451306476585595e-06, - "loss": 0.3115, + "epoch": 0.43, + "grad_norm": 0.6420644624710392, + "learning_rate": 1.2863755808482505e-05, + "loss": 0.3353, "step": 9259 }, { - "epoch": 0.53, - "grad_norm": 0.37056859041713597, - "learning_rate": 9.449448376881563e-06, - "loss": 0.2723, + "epoch": 0.43, + "grad_norm": 0.4271061651692161, + "learning_rate": 1.2862330175429374e-05, + "loss": 0.298, "step": 9260 }, { - "epoch": 0.53, - "grad_norm": 0.9036552153671678, - "learning_rate": 9.447590296243106e-06, - "loss": 0.4958, + "epoch": 0.43, + "grad_norm": 0.4165571392190505, + "learning_rate": 1.2860904479007008e-05, + "loss": 0.3478, "step": 9261 }, { - "epoch": 0.53, - "grad_norm": 0.21923658796039047, - "learning_rate": 9.445732234734576e-06, - "loss": 0.1774, + "epoch": 0.43, + "grad_norm": 0.5816711263495682, + "learning_rate": 1.2859478719246976e-05, + "loss": 0.4165, "step": 9262 }, { - "epoch": 0.53, - "grad_norm": 0.36316978936914124, - "learning_rate": 9.443874192420312e-06, - "loss": 0.2585, + "epoch": 0.43, + "grad_norm": 0.3272428994892283, + "learning_rate": 1.285805289618084e-05, + "loss": 0.2162, "step": 9263 }, { - "epoch": 0.53, - "grad_norm": 0.5077816681609485, - "learning_rate": 9.442016169364664e-06, - "loss": 0.3649, + "epoch": 0.43, + "grad_norm": 0.3898187144407041, + "learning_rate": 1.285662700984017e-05, + "loss": 0.2356, "step": 9264 }, { - "epoch": 0.53, - "grad_norm": 0.33319325267717365, - "learning_rate": 9.440158165631972e-06, - "loss": 0.2898, + "epoch": 0.43, + "grad_norm": 0.43108173504372005, + "learning_rate": 1.2855201060256528e-05, + "loss": 0.3388, "step": 9265 }, { - "epoch": 0.53, - "grad_norm": 0.4992737656729488, - "learning_rate": 9.438300181286576e-06, - "loss": 0.284, + "epoch": 0.43, + "grad_norm": 0.3311893827795691, + "learning_rate": 1.285377504746149e-05, + "loss": 0.231, "step": 9266 }, { - "epoch": 0.53, - "grad_norm": 0.4975012780414002, - "learning_rate": 9.436442216392823e-06, - "loss": 0.4145, + "epoch": 0.43, + "grad_norm": 1.2658118343812825, + "learning_rate": 1.2852348971486618e-05, + "loss": 0.5285, "step": 9267 }, { - "epoch": 0.53, - "grad_norm": 0.2694784425314511, - "learning_rate": 9.43458427101505e-06, - "loss": 0.1441, + "epoch": 0.43, + "grad_norm": 1.3165124603576754, + "learning_rate": 1.2850922832363493e-05, + "loss": 0.8713, "step": 9268 }, { - "epoch": 0.53, - "grad_norm": 0.3240022249400918, - "learning_rate": 9.4327263452176e-06, - "loss": 0.1771, + "epoch": 0.43, + "grad_norm": 0.4208343047056562, + "learning_rate": 1.2849496630123683e-05, + "loss": 0.2025, "step": 9269 }, { - "epoch": 0.53, - "grad_norm": 0.3343235297742924, - "learning_rate": 9.430868439064813e-06, - "loss": 0.3089, + "epoch": 0.43, + "grad_norm": 0.38286662246203435, + "learning_rate": 1.2848070364798763e-05, + "loss": 0.3088, "step": 9270 }, { - "epoch": 0.53, - "grad_norm": 0.6910139457529847, - "learning_rate": 9.429010552621027e-06, - "loss": 0.4781, + "epoch": 0.43, + "grad_norm": 0.34771023238961446, + "learning_rate": 1.2846644036420313e-05, + "loss": 0.2501, "step": 9271 }, { - "epoch": 0.53, - "grad_norm": 0.35862362126432507, - "learning_rate": 9.42715268595058e-06, - "loss": 0.243, + "epoch": 0.43, + "grad_norm": 0.38705870108181156, + "learning_rate": 1.2845217645019906e-05, + "loss": 0.1998, "step": 9272 }, { - "epoch": 0.53, - "grad_norm": 0.49851971643361387, - "learning_rate": 9.425294839117812e-06, - "loss": 0.391, + "epoch": 0.43, + "grad_norm": 0.4198386033394339, + "learning_rate": 1.284379119062912e-05, + "loss": 0.3412, "step": 9273 }, { - "epoch": 0.53, - "grad_norm": 0.23422338164763837, - "learning_rate": 9.423437012187057e-06, - "loss": 0.2075, + "epoch": 0.43, + "grad_norm": 0.5441535957920729, + "learning_rate": 1.284236467327954e-05, + "loss": 0.3874, "step": 9274 }, { - "epoch": 0.53, - "grad_norm": 0.3086926262711173, - "learning_rate": 9.421579205222657e-06, - "loss": 0.2067, + "epoch": 0.43, + "grad_norm": 0.704644004348439, + "learning_rate": 1.2840938093002745e-05, + "loss": 0.4272, "step": 9275 }, { - "epoch": 0.53, - "grad_norm": 1.310073728398527, - "learning_rate": 9.41972141828894e-06, - "loss": 0.6654, + "epoch": 0.43, + "grad_norm": 0.3921787667686933, + "learning_rate": 1.2839511449830323e-05, + "loss": 0.2941, "step": 9276 }, { - "epoch": 0.53, - "grad_norm": 0.5841986899196692, - "learning_rate": 9.41786365145025e-06, - "loss": 0.3337, + "epoch": 0.43, + "grad_norm": 0.3507548039206577, + "learning_rate": 1.283808474379385e-05, + "loss": 0.2594, "step": 9277 }, { - "epoch": 0.53, - "grad_norm": 0.2536795044961282, - "learning_rate": 9.416005904770916e-06, - "loss": 0.2111, + "epoch": 0.43, + "grad_norm": 0.3166677398320318, + "learning_rate": 1.2836657974924915e-05, + "loss": 0.2091, "step": 9278 }, { - "epoch": 0.53, - "grad_norm": 1.2215967277636537, - "learning_rate": 9.414148178315268e-06, - "loss": 0.8136, + "epoch": 0.43, + "grad_norm": 0.41400884141288585, + "learning_rate": 1.283523114325511e-05, + "loss": 0.2528, "step": 9279 }, { - "epoch": 0.53, - "grad_norm": 0.3058690669119812, - "learning_rate": 9.412290472147648e-06, - "loss": 0.1969, + "epoch": 0.43, + "grad_norm": 1.3707460866928427, + "learning_rate": 1.2833804248816018e-05, + "loss": 0.8134, "step": 9280 }, { - "epoch": 0.53, - "grad_norm": 0.7589007743382404, - "learning_rate": 9.41043278633238e-06, - "loss": 0.409, + "epoch": 0.43, + "grad_norm": 0.38484701445474057, + "learning_rate": 1.283237729163923e-05, + "loss": 0.2783, "step": 9281 }, { - "epoch": 0.53, - "grad_norm": 0.3057591447527336, - "learning_rate": 9.408575120933804e-06, - "loss": 0.2498, + "epoch": 0.43, + "grad_norm": 0.41817841082458845, + "learning_rate": 1.2830950271756341e-05, + "loss": 0.2583, "step": 9282 }, { - "epoch": 0.53, - "grad_norm": 0.6995045368657311, - "learning_rate": 9.406717476016242e-06, - "loss": 0.4688, + "epoch": 0.43, + "grad_norm": 0.4407431743626982, + "learning_rate": 1.2829523189198942e-05, + "loss": 0.2824, "step": 9283 }, { - "epoch": 0.53, - "grad_norm": 0.5984000260809333, - "learning_rate": 9.40485985164403e-06, - "loss": 0.3472, + "epoch": 0.43, + "grad_norm": 0.3176283830955911, + "learning_rate": 1.2828096043998627e-05, + "loss": 0.1914, "step": 9284 }, { - "epoch": 0.53, - "grad_norm": 0.375960869641853, - "learning_rate": 9.403002247881499e-06, - "loss": 0.2511, + "epoch": 0.43, + "grad_norm": 0.363969820657224, + "learning_rate": 1.2826668836186988e-05, + "loss": 0.2561, "step": 9285 }, { - "epoch": 0.53, - "grad_norm": 0.2200191456962238, - "learning_rate": 9.40114466479297e-06, - "loss": 0.209, + "epoch": 0.43, + "grad_norm": 0.5237257890566737, + "learning_rate": 1.2825241565795628e-05, + "loss": 0.4138, "step": 9286 }, { - "epoch": 0.53, - "grad_norm": 0.5939482634092973, - "learning_rate": 9.399287102442776e-06, - "loss": 0.333, + "epoch": 0.43, + "grad_norm": 0.48437039925801284, + "learning_rate": 1.2823814232856143e-05, + "loss": 0.3118, "step": 9287 }, { - "epoch": 0.53, - "grad_norm": 0.3786700230040074, - "learning_rate": 9.397429560895243e-06, - "loss": 0.2799, + "epoch": 0.43, + "grad_norm": 0.47353915829006527, + "learning_rate": 1.2822386837400132e-05, + "loss": 0.3333, "step": 9288 }, { - "epoch": 0.53, - "grad_norm": 0.47843379765938265, - "learning_rate": 9.395572040214702e-06, - "loss": 0.3526, + "epoch": 0.43, + "grad_norm": 0.414526081073365, + "learning_rate": 1.2820959379459194e-05, + "loss": 0.2603, "step": 9289 }, { - "epoch": 0.53, - "grad_norm": 0.3725937972585066, - "learning_rate": 9.393714540465474e-06, - "loss": 0.2929, + "epoch": 0.43, + "grad_norm": 0.24897793223696746, + "learning_rate": 1.281953185906494e-05, + "loss": 0.1359, "step": 9290 }, { - "epoch": 0.53, - "grad_norm": 0.34822543851312543, - "learning_rate": 9.391857061711883e-06, - "loss": 0.2587, + "epoch": 0.43, + "grad_norm": 0.36089582952149185, + "learning_rate": 1.2818104276248962e-05, + "loss": 0.2842, "step": 9291 }, { - "epoch": 0.53, - "grad_norm": 0.2866067925550406, - "learning_rate": 9.389999604018258e-06, - "loss": 0.1637, + "epoch": 0.43, + "grad_norm": 0.8192955616657643, + "learning_rate": 1.2816676631042874e-05, + "loss": 0.4148, "step": 9292 }, { - "epoch": 0.53, - "grad_norm": 0.3738816323207938, - "learning_rate": 9.388142167448917e-06, - "loss": 0.3084, + "epoch": 0.43, + "grad_norm": 0.3524332559756306, + "learning_rate": 1.281524892347828e-05, + "loss": 0.2896, "step": 9293 }, { - "epoch": 0.53, - "grad_norm": 0.29667286372725454, - "learning_rate": 9.38628475206819e-06, - "loss": 0.2788, - "step": 9294 + "epoch": 0.43, + "grad_norm": 0.3625218614291083, + "learning_rate": 1.2813821153586789e-05, + "loss": 0.3285, + "step": 9294 }, { - "epoch": 0.53, - "grad_norm": 0.6007497819778793, - "learning_rate": 9.384427357940394e-06, - "loss": 0.3506, + "epoch": 0.43, + "grad_norm": 0.9369178190642404, + "learning_rate": 1.2812393321400008e-05, + "loss": 0.3677, "step": 9295 }, { - "epoch": 0.53, - "grad_norm": 0.41917867119031543, - "learning_rate": 9.382569985129854e-06, - "loss": 0.3215, + "epoch": 0.43, + "grad_norm": 0.2696797954693466, + "learning_rate": 1.2810965426949551e-05, + "loss": 0.1501, "step": 9296 }, { - "epoch": 0.53, - "grad_norm": 0.5345523972264065, - "learning_rate": 9.380712633700887e-06, - "loss": 0.37, + "epoch": 0.43, + "grad_norm": 0.2890507106504111, + "learning_rate": 1.2809537470267029e-05, + "loss": 0.2707, "step": 9297 }, { - "epoch": 0.53, - "grad_norm": 0.20961728479650352, - "learning_rate": 9.378855303717817e-06, - "loss": 0.1741, + "epoch": 0.43, + "grad_norm": 1.001910033546983, + "learning_rate": 1.2808109451384054e-05, + "loss": 0.3726, "step": 9298 }, { - "epoch": 0.53, - "grad_norm": 0.5007677585180572, - "learning_rate": 9.376997995244957e-06, - "loss": 0.3596, + "epoch": 0.43, + "grad_norm": 0.4942897489319573, + "learning_rate": 1.2806681370332244e-05, + "loss": 0.3285, "step": 9299 }, { - "epoch": 0.53, - "grad_norm": 0.30960347623118034, - "learning_rate": 9.375140708346634e-06, - "loss": 0.2864, + "epoch": 0.43, + "grad_norm": 0.41120020767750787, + "learning_rate": 1.2805253227143214e-05, + "loss": 0.3094, "step": 9300 }, { - "epoch": 0.53, - "grad_norm": 0.3443777763500569, - "learning_rate": 9.373283443087159e-06, - "loss": 0.2645, + "epoch": 0.43, + "grad_norm": 0.3610766477986101, + "learning_rate": 1.2803825021848577e-05, + "loss": 0.3106, "step": 9301 }, { - "epoch": 0.53, - "grad_norm": 0.7207968905840805, - "learning_rate": 9.371426199530853e-06, - "loss": 0.4504, + "epoch": 0.43, + "grad_norm": 0.1848146151695569, + "learning_rate": 1.2802396754479958e-05, + "loss": 0.0699, "step": 9302 }, { - "epoch": 0.53, - "grad_norm": 0.3588460744152287, - "learning_rate": 9.369568977742028e-06, - "loss": 0.3213, + "epoch": 0.43, + "grad_norm": 0.4052299901142605, + "learning_rate": 1.2800968425068977e-05, + "loss": 0.2971, "step": 9303 }, { - "epoch": 0.53, - "grad_norm": 0.6060702782939552, - "learning_rate": 9.367711777785004e-06, - "loss": 0.324, + "epoch": 0.43, + "grad_norm": 1.1993166168178706, + "learning_rate": 1.2799540033647255e-05, + "loss": 0.4536, "step": 9304 }, { - "epoch": 0.53, - "grad_norm": 0.3480442813272276, - "learning_rate": 9.365854599724096e-06, - "loss": 0.2939, + "epoch": 0.43, + "grad_norm": 0.32194111221947364, + "learning_rate": 1.2798111580246416e-05, + "loss": 0.237, "step": 9305 }, { - "epoch": 0.53, - "grad_norm": 0.23986228711116675, - "learning_rate": 9.363997443623612e-06, - "loss": 0.253, + "epoch": 0.43, + "grad_norm": 0.3961794664125094, + "learning_rate": 1.2796683064898081e-05, + "loss": 0.3093, "step": 9306 }, { - "epoch": 0.53, - "grad_norm": 0.4254434470268579, - "learning_rate": 9.362140309547873e-06, - "loss": 0.2891, + "epoch": 0.43, + "grad_norm": 1.5462192707150104, + "learning_rate": 1.279525448763388e-05, + "loss": 0.9516, "step": 9307 }, { - "epoch": 0.53, - "grad_norm": 0.4305528719251428, - "learning_rate": 9.360283197561185e-06, - "loss": 0.1963, + "epoch": 0.43, + "grad_norm": 0.18914600244791074, + "learning_rate": 1.2793825848485435e-05, + "loss": 0.0959, "step": 9308 }, { - "epoch": 0.53, - "grad_norm": 0.32013724856043474, - "learning_rate": 9.358426107727862e-06, - "loss": 0.2812, + "epoch": 0.43, + "grad_norm": 0.3096574501319118, + "learning_rate": 1.2792397147484384e-05, + "loss": 0.2649, "step": 9309 }, { - "epoch": 0.53, - "grad_norm": 0.49958189678242726, - "learning_rate": 9.356569040112216e-06, - "loss": 0.351, + "epoch": 0.43, + "grad_norm": 1.2853785205518868, + "learning_rate": 1.2790968384662348e-05, + "loss": 0.4844, "step": 9310 }, { - "epoch": 0.53, - "grad_norm": 0.4889885463858359, - "learning_rate": 9.354711994778558e-06, - "loss": 0.2716, + "epoch": 0.43, + "grad_norm": 0.8248647768908512, + "learning_rate": 1.2789539560050965e-05, + "loss": 0.3154, "step": 9311 }, { - "epoch": 0.54, - "grad_norm": 0.2855203325007413, - "learning_rate": 9.352854971791192e-06, - "loss": 0.2248, + "epoch": 0.43, + "grad_norm": 0.3658096888201, + "learning_rate": 1.2788110673681859e-05, + "loss": 0.2902, "step": 9312 }, { - "epoch": 0.54, - "grad_norm": 0.27880863601709716, - "learning_rate": 9.350997971214434e-06, - "loss": 0.2507, + "epoch": 0.43, + "grad_norm": 0.4915760628579432, + "learning_rate": 1.2786681725586677e-05, + "loss": 0.3642, "step": 9313 }, { - "epoch": 0.54, - "grad_norm": 0.3792599482042661, - "learning_rate": 9.349140993112588e-06, - "loss": 0.2169, + "epoch": 0.43, + "grad_norm": 0.3716185062901843, + "learning_rate": 1.2785252715797044e-05, + "loss": 0.2299, "step": 9314 }, { - "epoch": 0.54, - "grad_norm": 0.38684532344974176, - "learning_rate": 9.347284037549962e-06, - "loss": 0.3065, + "epoch": 0.43, + "grad_norm": 0.36733762789403324, + "learning_rate": 1.2783823644344598e-05, + "loss": 0.2022, "step": 9315 }, { - "epoch": 0.54, - "grad_norm": 0.8003084968892227, - "learning_rate": 9.34542710459086e-06, - "loss": 0.4174, + "epoch": 0.43, + "grad_norm": 1.2950186586496824, + "learning_rate": 1.2782394511260983e-05, + "loss": 0.4518, "step": 9316 }, { - "epoch": 0.54, - "grad_norm": 0.3193010117283392, - "learning_rate": 9.343570194299591e-06, - "loss": 0.2854, + "epoch": 0.43, + "grad_norm": 0.4012955608603009, + "learning_rate": 1.2780965316577833e-05, + "loss": 0.3106, "step": 9317 }, { - "epoch": 0.54, - "grad_norm": 0.32379332391684124, - "learning_rate": 9.341713306740457e-06, - "loss": 0.2349, + "epoch": 0.43, + "grad_norm": 0.3445466186469441, + "learning_rate": 1.2779536060326793e-05, + "loss": 0.2222, "step": 9318 }, { - "epoch": 0.54, - "grad_norm": 0.30827057912219974, - "learning_rate": 9.339856441977767e-06, - "loss": 0.2344, + "epoch": 0.43, + "grad_norm": 0.8184373286714502, + "learning_rate": 1.2778106742539502e-05, + "loss": 0.5672, "step": 9319 }, { - "epoch": 0.54, - "grad_norm": 1.1331119563905843, - "learning_rate": 9.337999600075814e-06, - "loss": 0.6408, + "epoch": 0.43, + "grad_norm": 0.2989657090615629, + "learning_rate": 1.2776677363247607e-05, + "loss": 0.2237, "step": 9320 }, { - "epoch": 0.54, - "grad_norm": 0.2831832360300413, - "learning_rate": 9.336142781098908e-06, - "loss": 0.2151, + "epoch": 0.43, + "grad_norm": 0.30900918197882027, + "learning_rate": 1.277524792248275e-05, + "loss": 0.2249, "step": 9321 }, { - "epoch": 0.54, - "grad_norm": 0.5405839352824342, - "learning_rate": 9.33428598511135e-06, - "loss": 0.3459, + "epoch": 0.43, + "grad_norm": 0.544796978332322, + "learning_rate": 1.277381842027658e-05, + "loss": 0.332, "step": 9322 }, { - "epoch": 0.54, - "grad_norm": 0.9654227292116724, - "learning_rate": 9.332429212177438e-06, - "loss": 0.4725, + "epoch": 0.43, + "grad_norm": 0.8295120421820827, + "learning_rate": 1.2772388856660744e-05, + "loss": 0.5224, "step": 9323 }, { - "epoch": 0.54, - "grad_norm": 0.23482472356001713, - "learning_rate": 9.330572462361474e-06, - "loss": 0.1729, + "epoch": 0.43, + "grad_norm": 0.4316388336314948, + "learning_rate": 1.277095923166689e-05, + "loss": 0.3021, "step": 9324 }, { - "epoch": 0.54, - "grad_norm": 0.4684689393595989, - "learning_rate": 9.328715735727758e-06, - "loss": 0.3859, + "epoch": 0.43, + "grad_norm": 0.3799684844825383, + "learning_rate": 1.2769529545326669e-05, + "loss": 0.2436, "step": 9325 }, { - "epoch": 0.54, - "grad_norm": 0.297606052083825, - "learning_rate": 9.326859032340585e-06, - "loss": 0.2425, + "epoch": 0.43, + "grad_norm": 0.4850281374257871, + "learning_rate": 1.2768099797671734e-05, + "loss": 0.2688, "step": 9326 }, { - "epoch": 0.54, - "grad_norm": 0.4108345649916392, - "learning_rate": 9.325002352264257e-06, - "loss": 0.2403, + "epoch": 0.43, + "grad_norm": 0.3431443769690651, + "learning_rate": 1.2766669988733734e-05, + "loss": 0.2871, "step": 9327 }, { - "epoch": 0.54, - "grad_norm": 0.9941058792175076, - "learning_rate": 9.323145695563067e-06, - "loss": 0.4418, + "epoch": 0.43, + "grad_norm": 0.3566606445597884, + "learning_rate": 1.2765240118544328e-05, + "loss": 0.2492, "step": 9328 }, { - "epoch": 0.54, - "grad_norm": 0.32359946359922775, - "learning_rate": 9.321289062301313e-06, - "loss": 0.2928, + "epoch": 0.43, + "grad_norm": 0.5097445652037658, + "learning_rate": 1.2763810187135177e-05, + "loss": 0.3977, "step": 9329 }, { - "epoch": 0.54, - "grad_norm": 0.3409617393200946, - "learning_rate": 9.319432452543292e-06, - "loss": 0.2698, + "epoch": 0.43, + "grad_norm": 0.3757814362775745, + "learning_rate": 1.2762380194537927e-05, + "loss": 0.276, "step": 9330 }, { - "epoch": 0.54, - "grad_norm": 0.24943573775102337, - "learning_rate": 9.317575866353293e-06, - "loss": 0.151, + "epoch": 0.43, + "grad_norm": 0.8006749527216803, + "learning_rate": 1.2760950140784244e-05, + "loss": 0.2923, "step": 9331 }, { - "epoch": 0.54, - "grad_norm": 0.47506315731861976, - "learning_rate": 9.315719303795614e-06, - "loss": 0.2959, + "epoch": 0.43, + "grad_norm": 0.46078463533789227, + "learning_rate": 1.2759520025905783e-05, + "loss": 0.3487, "step": 9332 }, { - "epoch": 0.54, - "grad_norm": 0.36402547396764845, - "learning_rate": 9.313862764934543e-06, - "loss": 0.2759, + "epoch": 0.43, + "grad_norm": 0.31828217656758173, + "learning_rate": 1.275808984993421e-05, + "loss": 0.2799, "step": 9333 }, { - "epoch": 0.54, - "grad_norm": 0.5439363586977312, - "learning_rate": 9.312006249834378e-06, - "loss": 0.2909, + "epoch": 0.43, + "grad_norm": 0.44326460259013684, + "learning_rate": 1.2756659612901188e-05, + "loss": 0.2287, "step": 9334 }, { - "epoch": 0.54, - "grad_norm": 0.7236335627092366, - "learning_rate": 9.310149758559405e-06, - "loss": 0.3895, + "epoch": 0.43, + "grad_norm": 0.8391256229892663, + "learning_rate": 1.2755229314838376e-05, + "loss": 0.5699, "step": 9335 }, { - "epoch": 0.54, - "grad_norm": 0.36068005871087383, - "learning_rate": 9.30829329117392e-06, - "loss": 0.2747, + "epoch": 0.43, + "grad_norm": 0.3128622031074628, + "learning_rate": 1.2753798955777442e-05, + "loss": 0.2325, "step": 9336 }, { - "epoch": 0.54, - "grad_norm": 0.3165484712620228, - "learning_rate": 9.306436847742203e-06, - "loss": 0.2673, + "epoch": 0.43, + "grad_norm": 0.4011887162391267, + "learning_rate": 1.2752368535750054e-05, + "loss": 0.3336, "step": 9337 }, { - "epoch": 0.54, - "grad_norm": 0.38534131183340886, - "learning_rate": 9.304580428328552e-06, - "loss": 0.2414, + "epoch": 0.43, + "grad_norm": 0.5089257739353394, + "learning_rate": 1.275093805478788e-05, + "loss": 0.3208, "step": 9338 }, { - "epoch": 0.54, - "grad_norm": 0.3115474788513869, - "learning_rate": 9.30272403299725e-06, - "loss": 0.2647, + "epoch": 0.43, + "grad_norm": 0.37985286666620316, + "learning_rate": 1.274950751292259e-05, + "loss": 0.3044, "step": 9339 }, { - "epoch": 0.54, - "grad_norm": 0.3285219921891418, - "learning_rate": 9.300867661812585e-06, - "loss": 0.1915, + "epoch": 0.43, + "grad_norm": 0.47106033402320757, + "learning_rate": 1.2748076910185854e-05, + "loss": 0.336, "step": 9340 }, { - "epoch": 0.54, - "grad_norm": 0.40017987691315415, - "learning_rate": 9.29901131483884e-06, - "loss": 0.3231, + "epoch": 0.43, + "grad_norm": 0.33114496840275004, + "learning_rate": 1.2746646246609341e-05, + "loss": 0.2746, "step": 9341 }, { - "epoch": 0.54, - "grad_norm": 0.34327243727753615, - "learning_rate": 9.297154992140307e-06, - "loss": 0.2567, + "epoch": 0.43, + "grad_norm": 0.38078784490602813, + "learning_rate": 1.274521552222473e-05, + "loss": 0.2677, "step": 9342 }, { - "epoch": 0.54, - "grad_norm": 1.2467554100617364, - "learning_rate": 9.295298693781267e-06, - "loss": 0.7486, + "epoch": 0.43, + "grad_norm": 0.44074706887799464, + "learning_rate": 1.274378473706369e-05, + "loss": 0.3018, "step": 9343 }, { - "epoch": 0.54, - "grad_norm": 0.3513490054263849, - "learning_rate": 9.293442419825998e-06, - "loss": 0.2287, + "epoch": 0.43, + "grad_norm": 0.4188651720631131, + "learning_rate": 1.2742353891157905e-05, + "loss": 0.297, "step": 9344 }, { - "epoch": 0.54, - "grad_norm": 0.29446477079644806, - "learning_rate": 9.291586170338793e-06, - "loss": 0.2604, + "epoch": 0.43, + "grad_norm": 0.3087815084045747, + "learning_rate": 1.2740922984539043e-05, + "loss": 0.2589, "step": 9345 }, { - "epoch": 0.54, - "grad_norm": 0.3906785495034913, - "learning_rate": 9.289729945383924e-06, - "loss": 0.2681, + "epoch": 0.43, + "grad_norm": 0.9511424435288572, + "learning_rate": 1.2739492017238793e-05, + "loss": 0.5156, "step": 9346 }, { - "epoch": 0.54, - "grad_norm": 0.693760446359131, - "learning_rate": 9.28787374502568e-06, - "loss": 0.2316, + "epoch": 0.43, + "grad_norm": 0.2804427348362116, + "learning_rate": 1.2738060989288827e-05, + "loss": 0.142, "step": 9347 }, { - "epoch": 0.54, - "grad_norm": 0.34998798570881384, - "learning_rate": 9.286017569328334e-06, - "loss": 0.2665, + "epoch": 0.43, + "grad_norm": 0.27217675731479063, + "learning_rate": 1.2736629900720832e-05, + "loss": 0.221, "step": 9348 }, { - "epoch": 0.54, - "grad_norm": 0.3792652287586318, - "learning_rate": 9.284161418356171e-06, - "loss": 0.3214, + "epoch": 0.43, + "grad_norm": 0.3688374112719879, + "learning_rate": 1.2735198751566484e-05, + "loss": 0.2931, "step": 9349 }, { - "epoch": 0.54, - "grad_norm": 0.6013268840193028, - "learning_rate": 9.282305292173467e-06, - "loss": 0.3168, + "epoch": 0.43, + "grad_norm": 0.645820516888122, + "learning_rate": 1.2733767541857476e-05, + "loss": 0.428, "step": 9350 }, { - "epoch": 0.54, - "grad_norm": 0.40681134523342405, - "learning_rate": 9.280449190844501e-06, - "loss": 0.2882, + "epoch": 0.43, + "grad_norm": 0.3012673018601823, + "learning_rate": 1.2732336271625486e-05, + "loss": 0.2065, "step": 9351 }, { - "epoch": 0.54, - "grad_norm": 0.32759946772814447, - "learning_rate": 9.278593114433547e-06, - "loss": 0.2228, + "epoch": 0.43, + "grad_norm": 1.0196639318946006, + "learning_rate": 1.2730904940902209e-05, + "loss": 0.6425, "step": 9352 }, { - "epoch": 0.54, - "grad_norm": 0.38246028250584957, - "learning_rate": 9.276737063004884e-06, - "loss": 0.2469, + "epoch": 0.43, + "grad_norm": 0.3787475509752184, + "learning_rate": 1.2729473549719324e-05, + "loss": 0.3297, "step": 9353 }, { - "epoch": 0.54, - "grad_norm": 0.3570238331886624, - "learning_rate": 9.274881036622785e-06, - "loss": 0.2766, + "epoch": 0.43, + "grad_norm": 0.36313741274140404, + "learning_rate": 1.2728042098108529e-05, + "loss": 0.2075, "step": 9354 }, { - "epoch": 0.54, - "grad_norm": 0.8536521626653547, - "learning_rate": 9.273025035351526e-06, - "loss": 0.5547, + "epoch": 0.43, + "grad_norm": 0.3425708528290884, + "learning_rate": 1.2726610586101509e-05, + "loss": 0.2159, "step": 9355 }, { - "epoch": 0.54, - "grad_norm": 0.6642206879063963, - "learning_rate": 9.271169059255376e-06, - "loss": 0.4194, + "epoch": 0.43, + "grad_norm": 0.38054785969483185, + "learning_rate": 1.2725179013729961e-05, + "loss": 0.3202, "step": 9356 }, { - "epoch": 0.54, - "grad_norm": 0.2544235207525964, - "learning_rate": 9.269313108398611e-06, - "loss": 0.2227, + "epoch": 0.43, + "grad_norm": 0.3693017945803192, + "learning_rate": 1.2723747381025572e-05, + "loss": 0.1998, "step": 9357 }, { - "epoch": 0.54, - "grad_norm": 0.27520770254521076, - "learning_rate": 9.2674571828455e-06, - "loss": 0.1867, + "epoch": 0.43, + "grad_norm": 1.1594884823754317, + "learning_rate": 1.2722315688020046e-05, + "loss": 0.6269, "step": 9358 }, { - "epoch": 0.54, - "grad_norm": 0.9026816205634214, - "learning_rate": 9.265601282660318e-06, - "loss": 0.5432, + "epoch": 0.43, + "grad_norm": 0.4299145292575415, + "learning_rate": 1.2720883934745071e-05, + "loss": 0.3057, "step": 9359 }, { - "epoch": 0.54, - "grad_norm": 0.3427639614610134, - "learning_rate": 9.263745407907329e-06, - "loss": 0.2059, + "epoch": 0.43, + "grad_norm": 0.3201252041069684, + "learning_rate": 1.2719452121232349e-05, + "loss": 0.1987, "step": 9360 }, { - "epoch": 0.54, - "grad_norm": 0.4234555347458187, - "learning_rate": 9.261889558650809e-06, - "loss": 0.3324, + "epoch": 0.43, + "grad_norm": 0.2828518438157333, + "learning_rate": 1.271802024751358e-05, + "loss": 0.2772, "step": 9361 }, { - "epoch": 0.54, - "grad_norm": 1.0330872273748664, - "learning_rate": 9.260033734955018e-06, - "loss": 0.421, + "epoch": 0.43, + "grad_norm": 0.6496512794784586, + "learning_rate": 1.2716588313620459e-05, + "loss": 0.4307, "step": 9362 }, { - "epoch": 0.54, - "grad_norm": 0.3320606941363923, - "learning_rate": 9.25817793688423e-06, - "loss": 0.2124, + "epoch": 0.43, + "grad_norm": 0.45488374724617453, + "learning_rate": 1.2715156319584692e-05, + "loss": 0.2921, "step": 9363 }, { - "epoch": 0.54, - "grad_norm": 0.32162030886140774, - "learning_rate": 9.256322164502704e-06, - "loss": 0.1961, + "epoch": 0.43, + "grad_norm": 0.33503804431897677, + "learning_rate": 1.2713724265437983e-05, + "loss": 0.2795, "step": 9364 }, { - "epoch": 0.54, - "grad_norm": 0.3775542765593719, - "learning_rate": 9.254466417874714e-06, - "loss": 0.3093, + "epoch": 0.43, + "grad_norm": 1.3396300894241442, + "learning_rate": 1.2712292151212034e-05, + "loss": 0.7817, "step": 9365 }, { - "epoch": 0.54, - "grad_norm": 0.3245176121995681, - "learning_rate": 9.252610697064516e-06, - "loss": 0.1544, + "epoch": 0.43, + "grad_norm": 0.4034591287152584, + "learning_rate": 1.2710859976938548e-05, + "loss": 0.2992, "step": 9366 }, { - "epoch": 0.54, - "grad_norm": 0.6532298265771281, - "learning_rate": 9.25075500213638e-06, - "loss": 0.3814, + "epoch": 0.43, + "grad_norm": 0.5122151056418088, + "learning_rate": 1.2709427742649238e-05, + "loss": 0.2966, "step": 9367 }, { - "epoch": 0.54, - "grad_norm": 0.39797220000426015, - "learning_rate": 9.248899333154565e-06, - "loss": 0.3394, + "epoch": 0.43, + "grad_norm": 0.25821475837525226, + "learning_rate": 1.2707995448375807e-05, + "loss": 0.235, "step": 9368 }, { - "epoch": 0.54, - "grad_norm": 0.3901046536766521, - "learning_rate": 9.247043690183334e-06, - "loss": 0.2712, + "epoch": 0.43, + "grad_norm": 0.4034461318359468, + "learning_rate": 1.2706563094149967e-05, + "loss": 0.2725, "step": 9369 }, { - "epoch": 0.54, - "grad_norm": 0.2983187384453381, - "learning_rate": 9.245188073286949e-06, - "loss": 0.1997, + "epoch": 0.43, + "grad_norm": 1.004871897039284, + "learning_rate": 1.270513068000343e-05, + "loss": 0.3679, "step": 9370 }, { - "epoch": 0.54, - "grad_norm": 0.4306936639740022, - "learning_rate": 9.243332482529665e-06, - "loss": 0.3333, + "epoch": 0.43, + "grad_norm": 0.7881206852937602, + "learning_rate": 1.2703698205967907e-05, + "loss": 0.5385, "step": 9371 }, { - "epoch": 0.54, - "grad_norm": 0.4124403701710802, - "learning_rate": 9.241476917975748e-06, - "loss": 0.2959, + "epoch": 0.43, + "grad_norm": 0.35178470040104903, + "learning_rate": 1.2702265672075108e-05, + "loss": 0.2788, "step": 9372 }, { - "epoch": 0.54, - "grad_norm": 0.3857979000578478, - "learning_rate": 9.239621379689452e-06, - "loss": 0.2773, + "epoch": 0.43, + "grad_norm": 0.34987363199215055, + "learning_rate": 1.2700833078356759e-05, + "loss": 0.2815, "step": 9373 }, { - "epoch": 0.54, - "grad_norm": 0.7877835736477719, - "learning_rate": 9.237765867735035e-06, - "loss": 0.5544, + "epoch": 0.43, + "grad_norm": 0.27874537305099073, + "learning_rate": 1.2699400424844563e-05, + "loss": 0.1939, "step": 9374 }, { - "epoch": 0.54, - "grad_norm": 0.3805108940267185, - "learning_rate": 9.235910382176751e-06, - "loss": 0.2811, + "epoch": 0.43, + "grad_norm": 0.6675182692172947, + "learning_rate": 1.2697967711570243e-05, + "loss": 0.3747, "step": 9375 }, { - "epoch": 0.54, - "grad_norm": 0.2183372803331831, - "learning_rate": 9.234054923078862e-06, - "loss": 0.1758, + "epoch": 0.43, + "grad_norm": 0.33233795917513964, + "learning_rate": 1.2696534938565524e-05, + "loss": 0.2921, "step": 9376 }, { - "epoch": 0.54, - "grad_norm": 0.44649442283170376, - "learning_rate": 9.232199490505613e-06, - "loss": 0.3603, + "epoch": 0.43, + "grad_norm": 0.5763156229044814, + "learning_rate": 1.2695102105862114e-05, + "loss": 0.3211, "step": 9377 }, { - "epoch": 0.54, - "grad_norm": 0.3408016275041556, - "learning_rate": 9.230344084521266e-06, - "loss": 0.2609, + "epoch": 0.43, + "grad_norm": 0.4269783880724685, + "learning_rate": 1.2693669213491741e-05, + "loss": 0.2767, "step": 9378 }, { - "epoch": 0.54, - "grad_norm": 0.8404217815307761, - "learning_rate": 9.228488705190069e-06, - "loss": 0.3583, + "epoch": 0.43, + "grad_norm": 0.4655623224224928, + "learning_rate": 1.269223626148613e-05, + "loss": 0.3596, "step": 9379 }, { - "epoch": 0.54, - "grad_norm": 0.34473996252645167, - "learning_rate": 9.226633352576276e-06, - "loss": 0.3451, + "epoch": 0.43, + "grad_norm": 0.2605431209672585, + "learning_rate": 1.2690803249877003e-05, + "loss": 0.1899, "step": 9380 }, { - "epoch": 0.54, - "grad_norm": 0.3742015521440866, - "learning_rate": 9.224778026744135e-06, - "loss": 0.2612, + "epoch": 0.43, + "grad_norm": 0.3014886577535861, + "learning_rate": 1.2689370178696088e-05, + "loss": 0.2006, "step": 9381 }, { - "epoch": 0.54, - "grad_norm": 0.9236809927403062, - "learning_rate": 9.222922727757899e-06, - "loss": 0.5384, + "epoch": 0.43, + "grad_norm": 0.5630434249393794, + "learning_rate": 1.2687937047975108e-05, + "loss": 0.4216, "step": 9382 }, { - "epoch": 0.54, - "grad_norm": 0.2849500682429431, - "learning_rate": 9.221067455681817e-06, - "loss": 0.1571, + "epoch": 0.43, + "grad_norm": 0.7273132245068582, + "learning_rate": 1.2686503857745787e-05, + "loss": 0.3639, "step": 9383 }, { - "epoch": 0.54, - "grad_norm": 0.4049939669342111, - "learning_rate": 9.219212210580132e-06, - "loss": 0.318, + "epoch": 0.43, + "grad_norm": 0.34119290798276586, + "learning_rate": 1.2685070608039865e-05, + "loss": 0.2836, "step": 9384 }, { - "epoch": 0.54, - "grad_norm": 0.29913254424780833, - "learning_rate": 9.217356992517097e-06, - "loss": 0.2886, + "epoch": 0.43, + "grad_norm": 0.3791565190202857, + "learning_rate": 1.2683637298889067e-05, + "loss": 0.3171, "step": 9385 }, { - "epoch": 0.54, - "grad_norm": 1.0583760086865348, - "learning_rate": 9.215501801556954e-06, - "loss": 0.4293, + "epoch": 0.43, + "grad_norm": 0.2618065671854353, + "learning_rate": 1.2682203930325123e-05, + "loss": 0.0977, "step": 9386 }, { - "epoch": 0.54, - "grad_norm": 0.61751445391918, - "learning_rate": 9.213646637763954e-06, - "loss": 0.3031, + "epoch": 0.43, + "grad_norm": 0.4073330570746324, + "learning_rate": 1.2680770502379773e-05, + "loss": 0.2627, "step": 9387 }, { - "epoch": 0.54, - "grad_norm": 0.4187489970847992, - "learning_rate": 9.211791501202333e-06, - "loss": 0.3314, + "epoch": 0.43, + "grad_norm": 0.5760195819526027, + "learning_rate": 1.2679337015084747e-05, + "loss": 0.3774, "step": 9388 }, { - "epoch": 0.54, - "grad_norm": 0.2328530107323132, - "learning_rate": 9.209936391936339e-06, - "loss": 0.2078, + "epoch": 0.43, + "grad_norm": 0.3959896687954381, + "learning_rate": 1.2677903468471781e-05, + "loss": 0.3224, "step": 9389 }, { - "epoch": 0.54, - "grad_norm": 0.42827604942936515, - "learning_rate": 9.208081310030216e-06, - "loss": 0.273, + "epoch": 0.43, + "grad_norm": 0.3604525759897088, + "learning_rate": 1.2676469862572614e-05, + "loss": 0.2447, "step": 9390 }, { - "epoch": 0.54, - "grad_norm": 0.5354159507363013, - "learning_rate": 9.2062262555482e-06, - "loss": 0.3933, + "epoch": 0.43, + "grad_norm": 1.1364314143266312, + "learning_rate": 1.2675036197418984e-05, + "loss": 0.5899, "step": 9391 }, { - "epoch": 0.54, - "grad_norm": 0.4463089490948037, - "learning_rate": 9.204371228554538e-06, - "loss": 0.2974, + "epoch": 0.43, + "grad_norm": 0.28841422983357917, + "learning_rate": 1.2673602473042628e-05, + "loss": 0.2655, "step": 9392 }, { - "epoch": 0.54, - "grad_norm": 0.3010100233617082, - "learning_rate": 9.202516229113462e-06, - "loss": 0.2662, + "epoch": 0.43, + "grad_norm": 0.22556161400212862, + "learning_rate": 1.2672168689475293e-05, + "loss": 0.0978, "step": 9393 }, { - "epoch": 0.54, - "grad_norm": 0.508972715684767, - "learning_rate": 9.200661257289217e-06, - "loss": 0.3506, + "epoch": 0.43, + "grad_norm": 0.437416348480585, + "learning_rate": 1.2670734846748717e-05, + "loss": 0.3243, "step": 9394 }, { - "epoch": 0.54, - "grad_norm": 0.3217573305101514, - "learning_rate": 9.19880631314604e-06, - "loss": 0.2201, + "epoch": 0.43, + "grad_norm": 0.6683355974541376, + "learning_rate": 1.2669300944894647e-05, + "loss": 0.4078, "step": 9395 }, { - "epoch": 0.54, - "grad_norm": 0.29336545819378834, - "learning_rate": 9.196951396748164e-06, - "loss": 0.2371, + "epoch": 0.43, + "grad_norm": 0.3017049616391861, + "learning_rate": 1.2667866983944825e-05, + "loss": 0.2194, "step": 9396 }, { - "epoch": 0.54, - "grad_norm": 0.4756760206599183, - "learning_rate": 9.195096508159826e-06, - "loss": 0.323, + "epoch": 0.43, + "grad_norm": 0.45566119415040446, + "learning_rate": 1.2666432963931e-05, + "loss": 0.3404, "step": 9397 }, { - "epoch": 0.54, - "grad_norm": 1.0804796916074983, - "learning_rate": 9.193241647445262e-06, - "loss": 0.77, + "epoch": 0.43, + "grad_norm": 0.4276431915377918, + "learning_rate": 1.266499888488492e-05, + "loss": 0.203, "step": 9398 }, { - "epoch": 0.54, - "grad_norm": 0.3241278568868132, - "learning_rate": 9.191386814668704e-06, - "loss": 0.201, + "epoch": 0.43, + "grad_norm": 0.2626220352638622, + "learning_rate": 1.2663564746838335e-05, + "loss": 0.1448, "step": 9399 }, { - "epoch": 0.54, - "grad_norm": 1.8403532514129441, - "learning_rate": 9.189532009894387e-06, - "loss": 0.7033, + "epoch": 0.43, + "grad_norm": 0.35847557019536114, + "learning_rate": 1.266213054982299e-05, + "loss": 0.3096, "step": 9400 }, { - "epoch": 0.54, - "grad_norm": 0.32997349588142116, - "learning_rate": 9.187677233186541e-06, - "loss": 0.318, + "epoch": 0.43, + "grad_norm": 0.9299157735124534, + "learning_rate": 1.2660696293870642e-05, + "loss": 0.4168, "step": 9401 }, { - "epoch": 0.54, - "grad_norm": 0.37909775087026976, - "learning_rate": 9.185822484609397e-06, - "loss": 0.2387, + "epoch": 0.43, + "grad_norm": 0.5441320061557557, + "learning_rate": 1.2659261979013043e-05, + "loss": 0.3712, "step": 9402 }, { - "epoch": 0.54, - "grad_norm": 0.26048756876240237, - "learning_rate": 9.183967764227188e-06, - "loss": 0.2072, + "epoch": 0.43, + "grad_norm": 0.3747397797899474, + "learning_rate": 1.2657827605281944e-05, + "loss": 0.2215, "step": 9403 }, { - "epoch": 0.54, - "grad_norm": 0.3638139283443146, - "learning_rate": 9.182113072104137e-06, - "loss": 0.3321, + "epoch": 0.43, + "grad_norm": 0.4249229705858929, + "learning_rate": 1.2656393172709107e-05, + "loss": 0.3287, "step": 9404 }, { - "epoch": 0.54, - "grad_norm": 0.7221700317212014, - "learning_rate": 9.180258408304478e-06, - "loss": 0.3916, + "epoch": 0.43, + "grad_norm": 0.2344257536169026, + "learning_rate": 1.2654958681326286e-05, + "loss": 0.145, "step": 9405 }, { - "epoch": 0.54, - "grad_norm": 0.3277302216404148, - "learning_rate": 9.178403772892433e-06, - "loss": 0.2227, + "epoch": 0.43, + "grad_norm": 0.4224690329888101, + "learning_rate": 1.2653524131165238e-05, + "loss": 0.2686, "step": 9406 }, { - "epoch": 0.54, - "grad_norm": 0.6393616769102479, - "learning_rate": 9.176549165932231e-06, - "loss": 0.3885, + "epoch": 0.43, + "grad_norm": 1.1100567522037954, + "learning_rate": 1.265208952225772e-05, + "loss": 0.4848, "step": 9407 }, { - "epoch": 0.54, - "grad_norm": 0.3951954935016409, - "learning_rate": 9.174694587488097e-06, - "loss": 0.2902, + "epoch": 0.43, + "grad_norm": 0.36267938912720626, + "learning_rate": 1.2650654854635498e-05, + "loss": 0.3018, "step": 9408 }, { - "epoch": 0.54, - "grad_norm": 0.22636610131498916, - "learning_rate": 9.17284003762425e-06, - "loss": 0.1794, + "epoch": 0.43, + "grad_norm": 0.41927720920381195, + "learning_rate": 1.264922012833033e-05, + "loss": 0.2426, "step": 9409 }, { - "epoch": 0.54, - "grad_norm": 1.2675540511328809, - "learning_rate": 9.170985516404922e-06, - "loss": 0.7407, + "epoch": 0.43, + "grad_norm": 0.5038614684791575, + "learning_rate": 1.2647785343373986e-05, + "loss": 0.3012, "step": 9410 }, { - "epoch": 0.54, - "grad_norm": 0.59893272808341, - "learning_rate": 9.169131023894325e-06, - "loss": 0.2856, + "epoch": 0.43, + "grad_norm": 0.4140824423670641, + "learning_rate": 1.2646350499798226e-05, + "loss": 0.231, "step": 9411 }, { - "epoch": 0.54, - "grad_norm": 0.28230999589030675, - "learning_rate": 9.16727656015669e-06, - "loss": 0.2509, + "epoch": 0.43, + "grad_norm": 0.3321038828553742, + "learning_rate": 1.2644915597634815e-05, + "loss": 0.2473, "step": 9412 }, { - "epoch": 0.54, - "grad_norm": 0.8586813691539498, - "learning_rate": 9.165422125256228e-06, - "loss": 0.4289, + "epoch": 0.43, + "grad_norm": 1.1892279437893527, + "learning_rate": 1.2643480636915522e-05, + "loss": 0.4599, "step": 9413 }, { - "epoch": 0.54, - "grad_norm": 0.5314017571523516, - "learning_rate": 9.163567719257164e-06, - "loss": 0.3422, + "epoch": 0.43, + "grad_norm": 0.9127410702927927, + "learning_rate": 1.2642045617672114e-05, + "loss": 0.5234, "step": 9414 }, { - "epoch": 0.54, - "grad_norm": 0.31946600380526713, - "learning_rate": 9.161713342223711e-06, - "loss": 0.2, + "epoch": 0.43, + "grad_norm": 0.4284539268960658, + "learning_rate": 1.2640610539936363e-05, + "loss": 0.3135, "step": 9415 }, { - "epoch": 0.54, - "grad_norm": 0.3639625388147878, - "learning_rate": 9.159858994220092e-06, - "loss": 0.3099, + "epoch": 0.43, + "grad_norm": 0.40918361329265673, + "learning_rate": 1.263917540374004e-05, + "loss": 0.2482, "step": 9416 }, { - "epoch": 0.54, - "grad_norm": 0.2937050738295473, - "learning_rate": 9.15800467531052e-06, - "loss": 0.1719, + "epoch": 0.43, + "grad_norm": 0.4202514850170202, + "learning_rate": 1.2637740209114918e-05, + "loss": 0.259, "step": 9417 }, { - "epoch": 0.54, - "grad_norm": 0.42166472914570036, - "learning_rate": 9.156150385559208e-06, - "loss": 0.3034, + "epoch": 0.43, + "grad_norm": 0.4318377129770745, + "learning_rate": 1.2636304956092773e-05, + "loss": 0.3213, "step": 9418 }, { - "epoch": 0.54, - "grad_norm": 1.4713264661584862, - "learning_rate": 9.154296125030371e-06, - "loss": 0.3491, + "epoch": 0.43, + "grad_norm": 1.1940034487157207, + "learning_rate": 1.2634869644705374e-05, + "loss": 0.409, "step": 9419 }, { - "epoch": 0.54, - "grad_norm": 0.28848784892017665, - "learning_rate": 9.15244189378823e-06, - "loss": 0.2629, + "epoch": 0.43, + "grad_norm": 0.3978571900016964, + "learning_rate": 1.26334342749845e-05, + "loss": 0.3133, "step": 9420 }, { - "epoch": 0.54, - "grad_norm": 0.5160241916906537, - "learning_rate": 9.150587691896984e-06, - "loss": 0.344, + "epoch": 0.43, + "grad_norm": 0.31418940102939463, + "learning_rate": 1.263199884696193e-05, + "loss": 0.2404, "step": 9421 }, { - "epoch": 0.54, - "grad_norm": 0.183426975433477, - "learning_rate": 9.14873351942085e-06, - "loss": 0.1326, + "epoch": 0.43, + "grad_norm": 0.7516631652311326, + "learning_rate": 1.2630563360669444e-05, + "loss": 0.304, "step": 9422 }, { - "epoch": 0.54, - "grad_norm": 0.7740940117037703, - "learning_rate": 9.146879376424037e-06, - "loss": 0.3699, + "epoch": 0.43, + "grad_norm": 0.6254121282941459, + "learning_rate": 1.2629127816138818e-05, + "loss": 0.3666, "step": 9423 }, { - "epoch": 0.54, - "grad_norm": 0.36265406185090515, - "learning_rate": 9.145025262970757e-06, - "loss": 0.2904, + "epoch": 0.43, + "grad_norm": 0.41268360849468866, + "learning_rate": 1.2627692213401836e-05, + "loss": 0.2876, "step": 9424 }, { - "epoch": 0.54, - "grad_norm": 0.7197643231905059, - "learning_rate": 9.143171179125212e-06, - "loss": 0.2937, + "epoch": 0.43, + "grad_norm": 0.33417791235787325, + "learning_rate": 1.2626256552490283e-05, + "loss": 0.2339, "step": 9425 }, { - "epoch": 0.54, - "grad_norm": 0.6953661847377259, - "learning_rate": 9.141317124951613e-06, - "loss": 0.349, + "epoch": 0.43, + "grad_norm": 0.39089160640967713, + "learning_rate": 1.2624820833435939e-05, + "loss": 0.246, "step": 9426 }, { - "epoch": 0.54, - "grad_norm": 0.32798387026370746, - "learning_rate": 9.139463100514166e-06, - "loss": 0.2851, + "epoch": 0.43, + "grad_norm": 0.4087341467962649, + "learning_rate": 1.2623385056270592e-05, + "loss": 0.3029, "step": 9427 }, { - "epoch": 0.54, - "grad_norm": 0.3583724552430915, - "learning_rate": 9.137609105877075e-06, - "loss": 0.2518, + "epoch": 0.43, + "grad_norm": 0.4454686000017699, + "learning_rate": 1.2621949221026028e-05, + "loss": 0.3529, "step": 9428 }, { - "epoch": 0.54, - "grad_norm": 0.21343072518400563, - "learning_rate": 9.135755141104544e-06, - "loss": 0.1528, + "epoch": 0.43, + "grad_norm": 0.717996982393554, + "learning_rate": 1.2620513327734038e-05, + "loss": 0.3451, "step": 9429 }, { - "epoch": 0.54, - "grad_norm": 0.4644618331947836, - "learning_rate": 9.133901206260773e-06, - "loss": 0.3436, + "epoch": 0.43, + "grad_norm": 0.361365982504197, + "learning_rate": 1.2619077376426407e-05, + "loss": 0.2819, "step": 9430 }, { - "epoch": 0.54, - "grad_norm": 0.8170445704875847, - "learning_rate": 9.132047301409968e-06, - "loss": 0.4853, + "epoch": 0.43, + "grad_norm": 0.54715594172966, + "learning_rate": 1.2617641367134928e-05, + "loss": 0.4256, "step": 9431 }, { - "epoch": 0.54, - "grad_norm": 0.27928105311252444, - "learning_rate": 9.130193426616327e-06, - "loss": 0.2082, + "epoch": 0.43, + "grad_norm": 0.2926911651619761, + "learning_rate": 1.2616205299891388e-05, + "loss": 0.1709, "step": 9432 }, { - "epoch": 0.54, - "grad_norm": 0.41359155985409485, - "learning_rate": 9.12833958194405e-06, - "loss": 0.31, + "epoch": 0.43, + "grad_norm": 0.28034504306147434, + "learning_rate": 1.2614769174727588e-05, + "loss": 0.2356, "step": 9433 }, { - "epoch": 0.54, - "grad_norm": 1.1546078883310094, - "learning_rate": 9.126485767457336e-06, - "loss": 0.7811, + "epoch": 0.43, + "grad_norm": 0.8988749745559306, + "learning_rate": 1.2613332991675318e-05, + "loss": 0.4677, "step": 9434 }, { - "epoch": 0.54, - "grad_norm": 0.3413134026821165, - "learning_rate": 9.124631983220384e-06, - "loss": 0.1997, + "epoch": 0.43, + "grad_norm": 0.38634978397743036, + "learning_rate": 1.2611896750766377e-05, + "loss": 0.2878, "step": 9435 }, { - "epoch": 0.54, - "grad_norm": 0.3295985332780704, - "learning_rate": 9.122778229297387e-06, - "loss": 0.2877, + "epoch": 0.43, + "grad_norm": 0.3418576000897716, + "learning_rate": 1.2610460452032556e-05, + "loss": 0.2742, "step": 9436 }, { - "epoch": 0.54, - "grad_norm": 0.3604904526688763, - "learning_rate": 9.120924505752543e-06, - "loss": 0.2817, + "epoch": 0.43, + "grad_norm": 1.1357984964185623, + "learning_rate": 1.2609024095505655e-05, + "loss": 0.6399, "step": 9437 }, { - "epoch": 0.54, - "grad_norm": 0.45457565674240885, - "learning_rate": 9.119070812650044e-06, - "loss": 0.155, + "epoch": 0.43, + "grad_norm": 0.2963124518074352, + "learning_rate": 1.260758768121748e-05, + "loss": 0.1909, "step": 9438 }, { - "epoch": 0.54, - "grad_norm": 0.4349575002134113, - "learning_rate": 9.117217150054087e-06, - "loss": 0.3293, + "epoch": 0.43, + "grad_norm": 0.29919215416021205, + "learning_rate": 1.2606151209199822e-05, + "loss": 0.2139, "step": 9439 }, { - "epoch": 0.54, - "grad_norm": 0.40328109153502684, - "learning_rate": 9.115363518028858e-06, - "loss": 0.3267, + "epoch": 0.43, + "grad_norm": 0.3691023082466476, + "learning_rate": 1.260471467948449e-05, + "loss": 0.3199, "step": 9440 }, { - "epoch": 0.54, - "grad_norm": 0.7613078562304955, - "learning_rate": 9.113509916638557e-06, - "loss": 0.2908, + "epoch": 0.43, + "grad_norm": 0.6727305416223096, + "learning_rate": 1.2603278092103288e-05, + "loss": 0.4493, "step": 9441 }, { - "epoch": 0.54, - "grad_norm": 0.27332115307343685, - "learning_rate": 9.111656345947367e-06, - "loss": 0.2487, + "epoch": 0.43, + "grad_norm": 0.32917621419629844, + "learning_rate": 1.2601841447088017e-05, + "loss": 0.2165, "step": 9442 }, { - "epoch": 0.54, - "grad_norm": 0.4121158185394958, - "learning_rate": 9.10980280601948e-06, - "loss": 0.2577, + "epoch": 0.43, + "grad_norm": 1.170248146272205, + "learning_rate": 1.2600404744470481e-05, + "loss": 0.7662, "step": 9443 }, { - "epoch": 0.54, - "grad_norm": 0.34272848135119255, - "learning_rate": 9.107949296919084e-06, - "loss": 0.3144, + "epoch": 0.43, + "grad_norm": 0.2744027059196429, + "learning_rate": 1.2598967984282494e-05, + "loss": 0.2388, "step": 9444 }, { - "epoch": 0.54, - "grad_norm": 0.3178424358331697, - "learning_rate": 9.106095818710367e-06, - "loss": 0.2214, + "epoch": 0.43, + "grad_norm": 0.25818215850327236, + "learning_rate": 1.2597531166555857e-05, + "loss": 0.1647, "step": 9445 }, { - "epoch": 0.54, - "grad_norm": 0.8274851482459838, - "learning_rate": 9.10424237145751e-06, - "loss": 0.4761, + "epoch": 0.43, + "grad_norm": 0.7190296411178465, + "learning_rate": 1.2596094291322388e-05, + "loss": 0.4211, "step": 9446 }, { - "epoch": 0.54, - "grad_norm": 0.4605336475712775, - "learning_rate": 9.102388955224703e-06, - "loss": 0.3781, + "epoch": 0.43, + "grad_norm": 0.3740443930494949, + "learning_rate": 1.259465735861389e-05, + "loss": 0.3133, "step": 9447 }, { - "epoch": 0.54, - "grad_norm": 0.2601899726482571, - "learning_rate": 9.10053557007613e-06, - "loss": 0.2162, + "epoch": 0.43, + "grad_norm": 0.3334026807035171, + "learning_rate": 1.2593220368462178e-05, + "loss": 0.2047, "step": 9448 }, { - "epoch": 0.54, - "grad_norm": 0.2737288914687979, - "learning_rate": 9.098682216075968e-06, - "loss": 0.1548, + "epoch": 0.43, + "grad_norm": 1.2744951776544007, + "learning_rate": 1.259178332089907e-05, + "loss": 0.7327, "step": 9449 }, { - "epoch": 0.54, - "grad_norm": 0.7389906969176646, - "learning_rate": 9.096828893288404e-06, - "loss": 0.3916, + "epoch": 0.43, + "grad_norm": 0.497689882630781, + "learning_rate": 1.2590346215956372e-05, + "loss": 0.291, "step": 9450 }, { - "epoch": 0.54, - "grad_norm": 0.35483877986484735, - "learning_rate": 9.094975601777615e-06, - "loss": 0.231, + "epoch": 0.43, + "grad_norm": 0.30007265761149987, + "learning_rate": 1.2588909053665912e-05, + "loss": 0.2332, "step": 9451 }, { - "epoch": 0.54, - "grad_norm": 0.38272983487780293, - "learning_rate": 9.093122341607782e-06, - "loss": 0.3315, + "epoch": 0.43, + "grad_norm": 0.4947529682315777, + "learning_rate": 1.2587471834059498e-05, + "loss": 0.3003, "step": 9452 }, { - "epoch": 0.54, - "grad_norm": 0.7223595246219131, - "learning_rate": 9.091269112843084e-06, - "loss": 0.4254, + "epoch": 0.43, + "grad_norm": 0.8107848461877484, + "learning_rate": 1.2586034557168951e-05, + "loss": 0.5135, "step": 9453 }, { - "epoch": 0.54, - "grad_norm": 0.34796801665638943, - "learning_rate": 9.089415915547702e-06, - "loss": 0.2188, + "epoch": 0.43, + "grad_norm": 0.3751737978543826, + "learning_rate": 1.2584597223026092e-05, + "loss": 0.266, "step": 9454 }, { - "epoch": 0.54, - "grad_norm": 0.30394563246761563, - "learning_rate": 9.087562749785805e-06, - "loss": 0.1796, + "epoch": 0.43, + "grad_norm": 1.0233463136645495, + "learning_rate": 1.258315983166274e-05, + "loss": 0.566, "step": 9455 }, { - "epoch": 0.54, - "grad_norm": 0.3333343056858731, - "learning_rate": 9.085709615621567e-06, - "loss": 0.2878, + "epoch": 0.43, + "grad_norm": 0.3377751533254102, + "learning_rate": 1.2581722383110719e-05, + "loss": 0.2749, "step": 9456 }, { - "epoch": 0.54, - "grad_norm": 0.3613631132845819, - "learning_rate": 9.083856513119169e-06, - "loss": 0.2873, + "epoch": 0.43, + "grad_norm": 0.3419054223813326, + "learning_rate": 1.2580284877401853e-05, + "loss": 0.2623, "step": 9457 }, { - "epoch": 0.54, - "grad_norm": 0.6852597899585396, - "learning_rate": 9.082003442342779e-06, - "loss": 0.3644, + "epoch": 0.43, + "grad_norm": 0.6575813959770792, + "learning_rate": 1.2578847314567968e-05, + "loss": 0.2501, "step": 9458 }, { - "epoch": 0.54, - "grad_norm": 0.4615449689732116, - "learning_rate": 9.080150403356571e-06, - "loss": 0.3572, + "epoch": 0.43, + "grad_norm": 0.38777597362819477, + "learning_rate": 1.257740969464089e-05, + "loss": 0.2975, "step": 9459 }, { - "epoch": 0.54, - "grad_norm": 0.27980817096660626, - "learning_rate": 9.078297396224716e-06, - "loss": 0.2563, + "epoch": 0.43, + "grad_norm": 0.43275220632395844, + "learning_rate": 1.2575972017652442e-05, + "loss": 0.3012, "step": 9460 }, { - "epoch": 0.54, - "grad_norm": 0.25145383400299104, - "learning_rate": 9.07644442101138e-06, - "loss": 0.1177, + "epoch": 0.43, + "grad_norm": 0.49163558230656573, + "learning_rate": 1.257453428363446e-05, + "loss": 0.3028, "step": 9461 }, { - "epoch": 0.54, - "grad_norm": 1.4001380538324593, - "learning_rate": 9.074591477780736e-06, - "loss": 0.7624, + "epoch": 0.43, + "grad_norm": 0.6275284392053907, + "learning_rate": 1.2573096492618766e-05, + "loss": 0.3684, "step": 9462 }, { - "epoch": 0.54, - "grad_norm": 0.322723513333794, - "learning_rate": 9.072738566596948e-06, - "loss": 0.2826, + "epoch": 0.43, + "grad_norm": 0.3776346850865423, + "learning_rate": 1.25716586446372e-05, + "loss": 0.2854, "step": 9463 }, { - "epoch": 0.54, - "grad_norm": 0.3481210569513449, - "learning_rate": 9.070885687524184e-06, - "loss": 0.2795, + "epoch": 0.43, + "grad_norm": 0.3588057533578193, + "learning_rate": 1.2570220739721588e-05, + "loss": 0.3425, "step": 9464 }, { - "epoch": 0.54, - "grad_norm": 1.013606524113796, - "learning_rate": 9.069032840626608e-06, - "loss": 0.6801, + "epoch": 0.43, + "grad_norm": 0.1714204211179907, + "learning_rate": 1.2568782777903768e-05, + "loss": 0.0656, "step": 9465 }, { - "epoch": 0.54, - "grad_norm": 0.3272263147205583, - "learning_rate": 9.067180025968387e-06, - "loss": 0.2762, + "epoch": 0.43, + "grad_norm": 0.3560142739096228, + "learning_rate": 1.2567344759215571e-05, + "loss": 0.2507, "step": 9466 }, { - "epoch": 0.54, - "grad_norm": 0.20755738352285572, - "learning_rate": 9.065327243613679e-06, - "loss": 0.0841, + "epoch": 0.43, + "grad_norm": 0.5442199859267705, + "learning_rate": 1.2565906683688836e-05, + "loss": 0.4249, "step": 9467 }, { - "epoch": 0.54, - "grad_norm": 0.35855047633435044, - "learning_rate": 9.06347449362665e-06, - "loss": 0.3212, + "epoch": 0.43, + "grad_norm": 0.4741256619713081, + "learning_rate": 1.25644685513554e-05, + "loss": 0.2616, "step": 9468 }, { - "epoch": 0.54, - "grad_norm": 0.3261481520927009, - "learning_rate": 9.061621776071458e-06, - "loss": 0.2756, + "epoch": 0.43, + "grad_norm": 0.34331080104918166, + "learning_rate": 1.2563030362247105e-05, + "loss": 0.2826, "step": 9469 }, { - "epoch": 0.54, - "grad_norm": 0.6254025257211152, - "learning_rate": 9.059769091012265e-06, - "loss": 0.3968, + "epoch": 0.44, + "grad_norm": 0.3244728122383523, + "learning_rate": 1.2561592116395785e-05, + "loss": 0.227, "step": 9470 }, { - "epoch": 0.54, - "grad_norm": 0.3530171497321099, - "learning_rate": 9.057916438513226e-06, - "loss": 0.2633, + "epoch": 0.44, + "grad_norm": 0.34244100844683467, + "learning_rate": 1.2560153813833283e-05, + "loss": 0.2656, "step": 9471 }, { - "epoch": 0.54, - "grad_norm": 0.36080066395287175, - "learning_rate": 9.056063818638502e-06, - "loss": 0.3188, + "epoch": 0.44, + "grad_norm": 0.3455335480342148, + "learning_rate": 1.2558715454591444e-05, + "loss": 0.2815, "step": 9472 }, { - "epoch": 0.54, - "grad_norm": 0.2664798556440693, - "learning_rate": 9.054211231452248e-06, - "loss": 0.2171, + "epoch": 0.44, + "grad_norm": 1.433864542368922, + "learning_rate": 1.2557277038702109e-05, + "loss": 0.8198, "step": 9473 }, { - "epoch": 0.54, - "grad_norm": 0.5965216255280918, - "learning_rate": 9.052358677018615e-06, - "loss": 0.2632, + "epoch": 0.44, + "grad_norm": 0.6082788976385156, + "learning_rate": 1.2555838566197129e-05, + "loss": 0.3512, "step": 9474 }, { - "epoch": 0.54, - "grad_norm": 0.376392912726472, - "learning_rate": 9.050506155401764e-06, - "loss": 0.2729, + "epoch": 0.44, + "grad_norm": 0.3207895346819442, + "learning_rate": 1.2554400037108345e-05, + "loss": 0.2582, "step": 9475 }, { - "epoch": 0.54, - "grad_norm": 0.3404934744042724, - "learning_rate": 9.048653666665841e-06, - "loss": 0.3248, + "epoch": 0.44, + "grad_norm": 0.5110578991809333, + "learning_rate": 1.2552961451467609e-05, + "loss": 0.3536, "step": 9476 }, { - "epoch": 0.54, - "grad_norm": 1.040043861261599, - "learning_rate": 9.046801210875002e-06, - "loss": 0.4977, + "epoch": 0.44, + "grad_norm": 0.3064113085637141, + "learning_rate": 1.2551522809306762e-05, + "loss": 0.1849, "step": 9477 }, { - "epoch": 0.54, - "grad_norm": 0.3204967432613155, - "learning_rate": 9.044948788093396e-06, - "loss": 0.2685, + "epoch": 0.44, + "grad_norm": 0.3529613272022501, + "learning_rate": 1.2550084110657663e-05, + "loss": 0.189, "step": 9478 }, { - "epoch": 0.54, - "grad_norm": 0.22589665188179028, - "learning_rate": 9.043096398385174e-06, - "loss": 0.208, + "epoch": 0.44, + "grad_norm": 0.41888908145484505, + "learning_rate": 1.2548645355552156e-05, + "loss": 0.3396, "step": 9479 }, { - "epoch": 0.54, - "grad_norm": 0.4867779438407952, - "learning_rate": 9.041244041814479e-06, - "loss": 0.3523, + "epoch": 0.44, + "grad_norm": 0.5902939809583563, + "learning_rate": 1.2547206544022102e-05, + "loss": 0.3784, "step": 9480 }, { - "epoch": 0.54, - "grad_norm": 0.3340647249545658, - "learning_rate": 9.039391718445466e-06, - "loss": 0.2391, + "epoch": 0.44, + "grad_norm": 0.3936840695842908, + "learning_rate": 1.2545767676099345e-05, + "loss": 0.2183, "step": 9481 }, { - "epoch": 0.54, - "grad_norm": 0.8060812933217341, - "learning_rate": 9.03753942834227e-06, - "loss": 0.5286, + "epoch": 0.44, + "grad_norm": 0.5497509177495692, + "learning_rate": 1.2544328751815749e-05, + "loss": 0.3452, "step": 9482 }, { - "epoch": 0.54, - "grad_norm": 0.518908014472298, - "learning_rate": 9.03568717156905e-06, - "loss": 0.3698, + "epoch": 0.44, + "grad_norm": 0.2714434631891859, + "learning_rate": 1.2542889771203166e-05, + "loss": 0.2326, "step": 9483 }, { - "epoch": 0.54, - "grad_norm": 0.28165729999453204, - "learning_rate": 9.033834948189936e-06, - "loss": 0.2253, + "epoch": 0.44, + "grad_norm": 0.3131408589993388, + "learning_rate": 1.2541450734293452e-05, + "loss": 0.1892, "step": 9484 }, { - "epoch": 0.54, - "grad_norm": 1.099977564464965, - "learning_rate": 9.031982758269078e-06, - "loss": 0.626, + "epoch": 0.44, + "grad_norm": 0.9590983860750499, + "learning_rate": 1.2540011641118472e-05, + "loss": 0.5866, "step": 9485 }, { - "epoch": 0.55, - "grad_norm": 0.40518128341349524, - "learning_rate": 9.030130601870615e-06, - "loss": 0.2398, + "epoch": 0.44, + "grad_norm": 0.6811746208258505, + "learning_rate": 1.2538572491710079e-05, + "loss": 0.415, "step": 9486 }, { - "epoch": 0.55, - "grad_norm": 0.27175186242628224, - "learning_rate": 9.02827847905869e-06, - "loss": 0.2198, + "epoch": 0.44, + "grad_norm": 0.2845844611578793, + "learning_rate": 1.2537133286100141e-05, + "loss": 0.2226, "step": 9487 }, { - "epoch": 0.55, - "grad_norm": 0.47784160961695693, - "learning_rate": 9.02642638989744e-06, - "loss": 0.3479, + "epoch": 0.44, + "grad_norm": 0.619186101438695, + "learning_rate": 1.2535694024320514e-05, + "loss": 0.3727, "step": 9488 }, { - "epoch": 0.55, - "grad_norm": 1.1214209878128412, - "learning_rate": 9.024574334451002e-06, - "loss": 0.7194, + "epoch": 0.44, + "grad_norm": 0.3795551826258032, + "learning_rate": 1.2534254706403068e-05, + "loss": 0.2372, "step": 9489 }, { - "epoch": 0.55, - "grad_norm": 0.32215965041453914, - "learning_rate": 9.02272231278351e-06, - "loss": 0.1616, + "epoch": 0.44, + "grad_norm": 0.44608843674598136, + "learning_rate": 1.2532815332379661e-05, + "loss": 0.2593, "step": 9490 }, { - "epoch": 0.55, - "grad_norm": 0.28186208190846546, - "learning_rate": 9.020870324959103e-06, - "loss": 0.2528, + "epoch": 0.44, + "grad_norm": 0.4387537115416498, + "learning_rate": 1.253137590228217e-05, + "loss": 0.274, "step": 9491 }, { - "epoch": 0.55, - "grad_norm": 0.4286246139050462, - "learning_rate": 9.019018371041914e-06, - "loss": 0.3669, + "epoch": 0.44, + "grad_norm": 0.5750553798334395, + "learning_rate": 1.2529936416142452e-05, + "loss": 0.3689, "step": 9492 }, { - "epoch": 0.55, - "grad_norm": 0.433043538846795, - "learning_rate": 9.017166451096077e-06, - "loss": 0.2856, + "epoch": 0.44, + "grad_norm": 0.3786398037234063, + "learning_rate": 1.2528496873992384e-05, + "loss": 0.2763, "step": 9493 }, { - "epoch": 0.55, - "grad_norm": 0.252036692216471, - "learning_rate": 9.015314565185724e-06, - "loss": 0.1777, + "epoch": 0.44, + "grad_norm": 0.7242925874880445, + "learning_rate": 1.2527057275863828e-05, + "loss": 0.3485, "step": 9494 }, { - "epoch": 0.55, - "grad_norm": 0.3567792880638821, - "learning_rate": 9.013462713374986e-06, - "loss": 0.3031, + "epoch": 0.44, + "grad_norm": 0.2569446270666328, + "learning_rate": 1.252561762178866e-05, + "loss": 0.2235, "step": 9495 }, { - "epoch": 0.55, - "grad_norm": 0.4525301068269673, - "learning_rate": 9.01161089572799e-06, - "loss": 0.3151, + "epoch": 0.44, + "grad_norm": 0.48866000515209407, + "learning_rate": 1.2524177911798753e-05, + "loss": 0.3191, "step": 9496 }, { - "epoch": 0.55, - "grad_norm": 0.4602195249201141, - "learning_rate": 9.009759112308867e-06, - "loss": 0.2832, + "epoch": 0.44, + "grad_norm": 0.5841411511787737, + "learning_rate": 1.252273814592598e-05, + "loss": 0.3102, "step": 9497 }, { - "epoch": 0.55, - "grad_norm": 0.6256387629393889, - "learning_rate": 9.007907363181742e-06, - "loss": 0.4464, + "epoch": 0.44, + "grad_norm": 0.8300876382045795, + "learning_rate": 1.2521298324202217e-05, + "loss": 0.5125, "step": 9498 }, { - "epoch": 0.55, - "grad_norm": 0.2897790404691268, - "learning_rate": 9.006055648410745e-06, - "loss": 0.2242, + "epoch": 0.44, + "grad_norm": 0.3994239912705845, + "learning_rate": 1.2519858446659339e-05, + "loss": 0.299, "step": 9499 }, { - "epoch": 0.55, - "grad_norm": 0.2947409062044821, - "learning_rate": 9.004203968059997e-06, - "loss": 0.203, + "epoch": 0.44, + "grad_norm": 0.3263345393824156, + "learning_rate": 1.2518418513329223e-05, + "loss": 0.2377, "step": 9500 }, { - "epoch": 0.55, - "grad_norm": 0.8779701484144261, - "learning_rate": 9.002352322193622e-06, - "loss": 0.5903, + "epoch": 0.44, + "grad_norm": 0.31140039536698183, + "learning_rate": 1.2516978524243747e-05, + "loss": 0.1856, "step": 9501 }, { - "epoch": 0.55, - "grad_norm": 0.31908124321909886, - "learning_rate": 9.000500710875746e-06, - "loss": 0.2542, + "epoch": 0.44, + "grad_norm": 0.6278373526893891, + "learning_rate": 1.2515538479434795e-05, + "loss": 0.3373, "step": 9502 }, { - "epoch": 0.55, - "grad_norm": 0.450366955751652, - "learning_rate": 8.998649134170484e-06, - "loss": 0.2701, + "epoch": 0.44, + "grad_norm": 0.3079186405326252, + "learning_rate": 1.251409837893424e-05, + "loss": 0.2746, "step": 9503 }, { - "epoch": 0.55, - "grad_norm": 0.5082350399387127, - "learning_rate": 8.996797592141962e-06, - "loss": 0.337, + "epoch": 0.44, + "grad_norm": 0.9697008656036147, + "learning_rate": 1.2512658222773975e-05, + "loss": 0.382, "step": 9504 }, { - "epoch": 0.55, - "grad_norm": 0.2484863804487214, - "learning_rate": 8.994946084854294e-06, - "loss": 0.2122, + "epoch": 0.44, + "grad_norm": 0.4282804534788201, + "learning_rate": 1.2511218010985879e-05, + "loss": 0.3342, "step": 9505 }, { - "epoch": 0.55, - "grad_norm": 0.3488933962122594, - "learning_rate": 8.9930946123716e-06, - "loss": 0.2352, + "epoch": 0.44, + "grad_norm": 0.52761505470986, + "learning_rate": 1.2509777743601834e-05, + "loss": 0.3429, "step": 9506 }, { - "epoch": 0.55, - "grad_norm": 0.3152421095593874, - "learning_rate": 8.991243174757997e-06, - "loss": 0.2676, + "epoch": 0.44, + "grad_norm": 0.2835547740002297, + "learning_rate": 1.2508337420653729e-05, + "loss": 0.2025, "step": 9507 }, { - "epoch": 0.55, - "grad_norm": 0.4266176579389591, - "learning_rate": 8.9893917720776e-06, - "loss": 0.3114, + "epoch": 0.44, + "grad_norm": 0.3496414928349116, + "learning_rate": 1.2506897042173454e-05, + "loss": 0.23, "step": 9508 }, { - "epoch": 0.55, - "grad_norm": 0.4938255856927612, - "learning_rate": 8.987540404394521e-06, - "loss": 0.3598, + "epoch": 0.44, + "grad_norm": 0.6993862738815619, + "learning_rate": 1.2505456608192889e-05, + "loss": 0.351, "step": 9509 }, { - "epoch": 0.55, - "grad_norm": 0.3394737874270989, - "learning_rate": 8.985689071772877e-06, - "loss": 0.2153, + "epoch": 0.44, + "grad_norm": 0.9001929853737437, + "learning_rate": 1.2504016118743936e-05, + "loss": 0.5513, "step": 9510 }, { - "epoch": 0.55, - "grad_norm": 0.37853025059595635, - "learning_rate": 8.983837774276774e-06, - "loss": 0.2724, + "epoch": 0.44, + "grad_norm": 0.2821800165107059, + "learning_rate": 1.250257557385848e-05, + "loss": 0.2676, "step": 9511 }, { - "epoch": 0.55, - "grad_norm": 0.2888364011937344, - "learning_rate": 8.981986511970327e-06, - "loss": 0.2864, + "epoch": 0.44, + "grad_norm": 0.5643722533773718, + "learning_rate": 1.2501134973568407e-05, + "loss": 0.3817, "step": 9512 }, { - "epoch": 0.55, - "grad_norm": 0.28390144484902724, - "learning_rate": 8.980135284917644e-06, - "loss": 0.1234, + "epoch": 0.44, + "grad_norm": 0.4061162839403097, + "learning_rate": 1.249969431790562e-05, + "loss": 0.19, "step": 9513 }, { - "epoch": 0.55, - "grad_norm": 0.35464412918329336, - "learning_rate": 8.97828409318283e-06, - "loss": 0.2683, + "epoch": 0.44, + "grad_norm": 0.44805134819715936, + "learning_rate": 1.2498253606902007e-05, + "loss": 0.306, "step": 9514 }, { - "epoch": 0.55, - "grad_norm": 0.286350130274255, - "learning_rate": 8.976432936829995e-06, - "loss": 0.2999, + "epoch": 0.44, + "grad_norm": 0.35436118099807895, + "learning_rate": 1.249681284058947e-05, + "loss": 0.3191, "step": 9515 }, { - "epoch": 0.55, - "grad_norm": 0.6943883642781716, - "learning_rate": 8.974581815923242e-06, - "loss": 0.3359, + "epoch": 0.44, + "grad_norm": 0.7658228637031458, + "learning_rate": 1.2495372018999904e-05, + "loss": 0.5499, "step": 9516 }, { - "epoch": 0.55, - "grad_norm": 0.40022381769441084, - "learning_rate": 8.972730730526679e-06, - "loss": 0.243, + "epoch": 0.44, + "grad_norm": 0.23863211714930668, + "learning_rate": 1.2493931142165202e-05, + "loss": 0.0986, "step": 9517 }, { - "epoch": 0.55, - "grad_norm": 0.5276954077075993, - "learning_rate": 8.970879680704404e-06, - "loss": 0.4043, + "epoch": 0.44, + "grad_norm": 0.450162260873162, + "learning_rate": 1.2492490210117272e-05, + "loss": 0.2862, "step": 9518 }, { - "epoch": 0.55, - "grad_norm": 0.28500783585719586, - "learning_rate": 8.969028666520524e-06, - "loss": 0.2542, + "epoch": 0.44, + "grad_norm": 0.3795813389571156, + "learning_rate": 1.249104922288801e-05, + "loss": 0.3352, "step": 9519 }, { - "epoch": 0.55, - "grad_norm": 0.24384593736610496, - "learning_rate": 8.967177688039135e-06, - "loss": 0.1619, + "epoch": 0.44, + "grad_norm": 0.549055750396541, + "learning_rate": 1.2489608180509316e-05, + "loss": 0.2412, "step": 9520 }, { - "epoch": 0.55, - "grad_norm": 0.48283497815577264, - "learning_rate": 8.96532674532434e-06, - "loss": 0.3448, + "epoch": 0.44, + "grad_norm": 0.335807905986383, + "learning_rate": 1.2488167083013101e-05, + "loss": 0.2777, "step": 9521 }, { - "epoch": 0.55, - "grad_norm": 0.8189859030284328, - "learning_rate": 8.963475838440237e-06, - "loss": 0.4574, + "epoch": 0.44, + "grad_norm": 1.2033157174734814, + "learning_rate": 1.2486725930431263e-05, + "loss": 0.7831, "step": 9522 }, { - "epoch": 0.55, - "grad_norm": 0.2617952258223295, - "learning_rate": 8.961624967450917e-06, - "loss": 0.2188, + "epoch": 0.44, + "grad_norm": 0.26491861254859733, + "learning_rate": 1.2485284722795711e-05, + "loss": 0.1878, "step": 9523 }, { - "epoch": 0.55, - "grad_norm": 0.48019246869526944, - "learning_rate": 8.959774132420481e-06, - "loss": 0.3712, + "epoch": 0.44, + "grad_norm": 0.47528350940347347, + "learning_rate": 1.2483843460138348e-05, + "loss": 0.3625, "step": 9524 }, { - "epoch": 0.55, - "grad_norm": 0.29000128064258673, - "learning_rate": 8.957923333413024e-06, - "loss": 0.2129, + "epoch": 0.44, + "grad_norm": 0.6774648563052934, + "learning_rate": 1.2482402142491087e-05, + "loss": 0.4422, "step": 9525 }, { - "epoch": 0.55, - "grad_norm": 0.526363917290331, - "learning_rate": 8.956072570492635e-06, - "loss": 0.2352, + "epoch": 0.44, + "grad_norm": 0.3881143480506948, + "learning_rate": 1.2480960769885835e-05, + "loss": 0.2559, "step": 9526 }, { - "epoch": 0.55, - "grad_norm": 0.2704870327838035, - "learning_rate": 8.954221843723409e-06, - "loss": 0.2755, + "epoch": 0.44, + "grad_norm": 0.3980774464370524, + "learning_rate": 1.24795193423545e-05, + "loss": 0.3456, "step": 9527 }, { - "epoch": 0.55, - "grad_norm": 1.0257404142025073, - "learning_rate": 8.952371153169435e-06, - "loss": 0.4987, + "epoch": 0.44, + "grad_norm": 0.525139313913425, + "learning_rate": 1.2478077859929e-05, + "loss": 0.3824, "step": 9528 }, { - "epoch": 0.55, - "grad_norm": 0.2940447139600711, - "learning_rate": 8.950520498894803e-06, - "loss": 0.1177, + "epoch": 0.44, + "grad_norm": 0.23809505073485704, + "learning_rate": 1.2476636322641245e-05, + "loss": 0.1483, "step": 9529 }, { - "epoch": 0.55, - "grad_norm": 0.38604536877936113, - "learning_rate": 8.9486698809636e-06, - "loss": 0.3443, + "epoch": 0.44, + "grad_norm": 0.4016254269233144, + "learning_rate": 1.2475194730523148e-05, + "loss": 0.2266, "step": 9530 }, { - "epoch": 0.55, - "grad_norm": 0.33418039366402746, - "learning_rate": 8.946819299439915e-06, - "loss": 0.3424, + "epoch": 0.44, + "grad_norm": 0.4150109764932381, + "learning_rate": 1.2473753083606621e-05, + "loss": 0.3114, "step": 9531 }, { - "epoch": 0.55, - "grad_norm": 0.5842996172246837, - "learning_rate": 8.944968754387832e-06, - "loss": 0.3918, + "epoch": 0.44, + "grad_norm": 0.6666840336836596, + "learning_rate": 1.247231138192359e-05, + "loss": 0.3405, "step": 9532 }, { - "epoch": 0.55, - "grad_norm": 0.20671343129546188, - "learning_rate": 8.943118245871437e-06, - "loss": 0.1549, + "epoch": 0.44, + "grad_norm": 0.3622340205516867, + "learning_rate": 1.2470869625505964e-05, + "loss": 0.2465, "step": 9533 }, { - "epoch": 0.55, - "grad_norm": 0.8778299994060808, - "learning_rate": 8.941267773954809e-06, - "loss": 0.475, + "epoch": 0.44, + "grad_norm": 0.3208603394078894, + "learning_rate": 1.246942781438567e-05, + "loss": 0.2606, "step": 9534 }, { - "epoch": 0.55, - "grad_norm": 0.3016216513617832, - "learning_rate": 8.939417338702034e-06, - "loss": 0.285, + "epoch": 0.44, + "grad_norm": 0.28379015156960213, + "learning_rate": 1.2467985948594622e-05, + "loss": 0.2245, "step": 9535 }, { - "epoch": 0.55, - "grad_norm": 0.3695515460906184, - "learning_rate": 8.93756694017719e-06, - "loss": 0.2673, + "epoch": 0.44, + "grad_norm": 0.31982995306285394, + "learning_rate": 1.2466544028164744e-05, + "loss": 0.2178, "step": 9536 }, { - "epoch": 0.55, - "grad_norm": 0.7614491699387651, - "learning_rate": 8.935716578444358e-06, - "loss": 0.4398, + "epoch": 0.44, + "grad_norm": 0.6717124978923377, + "learning_rate": 1.2465102053127957e-05, + "loss": 0.426, "step": 9537 }, { - "epoch": 0.55, - "grad_norm": 0.341722836767795, - "learning_rate": 8.933866253567615e-06, - "loss": 0.2352, + "epoch": 0.44, + "grad_norm": 0.862258560502964, + "learning_rate": 1.246366002351619e-05, + "loss": 0.5074, "step": 9538 }, { - "epoch": 0.55, - "grad_norm": 0.25011501068791103, - "learning_rate": 8.932015965611039e-06, - "loss": 0.2378, + "epoch": 0.44, + "grad_norm": 0.27785369165041746, + "learning_rate": 1.246221793936136e-05, + "loss": 0.2453, "step": 9539 }, { - "epoch": 0.55, - "grad_norm": 0.39852031588177406, - "learning_rate": 8.930165714638705e-06, - "loss": 0.1926, + "epoch": 0.44, + "grad_norm": 1.3446003332045038, + "learning_rate": 1.2460775800695404e-05, + "loss": 0.7774, "step": 9540 }, { - "epoch": 0.55, - "grad_norm": 0.5183265685725699, - "learning_rate": 8.928315500714682e-06, - "loss": 0.3365, + "epoch": 0.44, + "grad_norm": 0.2611312429197009, + "learning_rate": 1.2459333607550241e-05, + "loss": 0.1425, "step": 9541 }, { - "epoch": 0.55, - "grad_norm": 0.3523020413766487, - "learning_rate": 8.92646532390305e-06, - "loss": 0.2559, + "epoch": 0.44, + "grad_norm": 0.41386162811816574, + "learning_rate": 1.2457891359957801e-05, + "loss": 0.3185, "step": 9542 }, { - "epoch": 0.55, - "grad_norm": 0.4905252780482375, - "learning_rate": 8.924615184267876e-06, - "loss": 0.3216, + "epoch": 0.44, + "grad_norm": 0.3634473264801504, + "learning_rate": 1.2456449057950015e-05, + "loss": 0.2814, "step": 9543 }, { - "epoch": 0.55, - "grad_norm": 0.5270482680361739, - "learning_rate": 8.922765081873235e-06, - "loss": 0.3081, + "epoch": 0.44, + "grad_norm": 0.8451981203061738, + "learning_rate": 1.2455006701558815e-05, + "loss": 0.3914, "step": 9544 }, { - "epoch": 0.55, - "grad_norm": 0.25970806818561254, - "learning_rate": 8.92091501678319e-06, - "loss": 0.2239, + "epoch": 0.44, + "grad_norm": 0.37556247253808395, + "learning_rate": 1.2453564290816132e-05, + "loss": 0.2961, "step": 9545 }, { - "epoch": 0.55, - "grad_norm": 0.3026984576541297, - "learning_rate": 8.919064989061813e-06, - "loss": 0.194, + "epoch": 0.44, + "grad_norm": 0.4222578598088155, + "learning_rate": 1.2452121825753902e-05, + "loss": 0.3013, "step": 9546 }, { - "epoch": 0.55, - "grad_norm": 0.4164454229618692, - "learning_rate": 8.917214998773169e-06, - "loss": 0.3015, + "epoch": 0.44, + "grad_norm": 0.2842831040712511, + "learning_rate": 1.2450679306404059e-05, + "loss": 0.2382, "step": 9547 }, { - "epoch": 0.55, - "grad_norm": 0.32582574801627273, - "learning_rate": 8.915365045981323e-06, - "loss": 0.285, + "epoch": 0.44, + "grad_norm": 0.3344938558149986, + "learning_rate": 1.2449236732798536e-05, + "loss": 0.2449, "step": 9548 }, { - "epoch": 0.55, - "grad_norm": 0.7217319400615111, - "learning_rate": 8.913515130750336e-06, - "loss": 0.3338, + "epoch": 0.44, + "grad_norm": 0.5414165756598206, + "learning_rate": 1.2447794104969271e-05, + "loss": 0.3516, "step": 9549 }, { - "epoch": 0.55, - "grad_norm": 0.7820623728911443, - "learning_rate": 8.911665253144277e-06, - "loss": 0.4671, + "epoch": 0.44, + "grad_norm": 0.3832912520369787, + "learning_rate": 1.2446351422948207e-05, + "loss": 0.2918, "step": 9550 }, { - "epoch": 0.55, - "grad_norm": 0.24558093961398544, - "learning_rate": 8.9098154132272e-06, - "loss": 0.2646, + "epoch": 0.44, + "grad_norm": 0.3360145377394796, + "learning_rate": 1.2444908686767278e-05, + "loss": 0.2925, "step": 9551 }, { - "epoch": 0.55, - "grad_norm": 0.19719586673338096, - "learning_rate": 8.907965611063173e-06, - "loss": 0.073, + "epoch": 0.44, + "grad_norm": 0.8557389540022056, + "learning_rate": 1.2443465896458429e-05, + "loss": 0.544, "step": 9552 }, { - "epoch": 0.55, - "grad_norm": 0.5547086944121592, - "learning_rate": 8.906115846716247e-06, - "loss": 0.3461, + "epoch": 0.44, + "grad_norm": 0.447660407622772, + "learning_rate": 1.24420230520536e-05, + "loss": 0.2699, "step": 9553 }, { - "epoch": 0.55, - "grad_norm": 0.3694099603084277, - "learning_rate": 8.904266120250483e-06, - "loss": 0.3258, + "epoch": 0.44, + "grad_norm": 0.35897826002603556, + "learning_rate": 1.2440580153584732e-05, + "loss": 0.2753, "step": 9554 }, { - "epoch": 0.55, - "grad_norm": 0.4425574975466733, - "learning_rate": 8.902416431729939e-06, - "loss": 0.2769, + "epoch": 0.44, + "grad_norm": 0.29429391263681925, + "learning_rate": 1.2439137201083772e-05, + "loss": 0.2608, "step": 9555 }, { - "epoch": 0.55, - "grad_norm": 0.5256141648470348, - "learning_rate": 8.900566781218665e-06, - "loss": 0.3479, + "epoch": 0.44, + "grad_norm": 0.4911344184125994, + "learning_rate": 1.2437694194582668e-05, + "loss": 0.1434, "step": 9556 }, { - "epoch": 0.55, - "grad_norm": 0.3022772910244815, - "learning_rate": 8.898717168780713e-06, - "loss": 0.2375, + "epoch": 0.44, + "grad_norm": 0.40721635466757367, + "learning_rate": 1.243625113411336e-05, + "loss": 0.3257, "step": 9557 }, { - "epoch": 0.55, - "grad_norm": 0.409281807309569, - "learning_rate": 8.896867594480141e-06, - "loss": 0.2499, + "epoch": 0.44, + "grad_norm": 0.5311028757325035, + "learning_rate": 1.2434808019707804e-05, + "loss": 0.4216, "step": 9558 }, { - "epoch": 0.55, - "grad_norm": 0.3148261067696356, - "learning_rate": 8.895018058380995e-06, - "loss": 0.2314, + "epoch": 0.44, + "grad_norm": 0.3743964493118494, + "learning_rate": 1.2433364851397944e-05, + "loss": 0.263, "step": 9559 }, { - "epoch": 0.55, - "grad_norm": 0.3935927137427299, - "learning_rate": 8.893168560547327e-06, - "loss": 0.3369, + "epoch": 0.44, + "grad_norm": 0.376240469157283, + "learning_rate": 1.243192162921573e-05, + "loss": 0.3259, "step": 9560 }, { - "epoch": 0.55, - "grad_norm": 0.6398746892434829, - "learning_rate": 8.891319101043181e-06, - "loss": 0.4199, + "epoch": 0.44, + "grad_norm": 0.2862136385513368, + "learning_rate": 1.2430478353193115e-05, + "loss": 0.15, "step": 9561 }, { - "epoch": 0.55, - "grad_norm": 0.39721004210862404, - "learning_rate": 8.889469679932612e-06, - "loss": 0.2113, + "epoch": 0.44, + "grad_norm": 0.32892232958813616, + "learning_rate": 1.2429035023362055e-05, + "loss": 0.2555, "step": 9562 }, { - "epoch": 0.55, - "grad_norm": 0.28574456478066207, - "learning_rate": 8.887620297279656e-06, - "loss": 0.2957, + "epoch": 0.44, + "grad_norm": 0.3940948777682774, + "learning_rate": 1.2427591639754496e-05, + "loss": 0.2998, "step": 9563 }, { - "epoch": 0.55, - "grad_norm": 0.26193897143332595, - "learning_rate": 8.885770953148364e-06, - "loss": 0.1744, + "epoch": 0.44, + "grad_norm": 1.2919151231278225, + "learning_rate": 1.2426148202402405e-05, + "loss": 0.8189, "step": 9564 }, { - "epoch": 0.55, - "grad_norm": 0.6740258025951248, - "learning_rate": 8.883921647602777e-06, - "loss": 0.2764, + "epoch": 0.44, + "grad_norm": 0.6080089539265596, + "learning_rate": 1.2424704711337723e-05, + "loss": 0.4203, "step": 9565 }, { - "epoch": 0.55, - "grad_norm": 0.3760220070940269, - "learning_rate": 8.882072380706931e-06, - "loss": 0.3045, + "epoch": 0.44, + "grad_norm": 0.33166402354490326, + "learning_rate": 1.242326116659242e-05, + "loss": 0.2008, "step": 9566 }, { - "epoch": 0.55, - "grad_norm": 0.3810517214167689, - "learning_rate": 8.880223152524875e-06, - "loss": 0.3385, + "epoch": 0.44, + "grad_norm": 0.2381784833668846, + "learning_rate": 1.2421817568198446e-05, + "loss": 0.2185, "step": 9567 }, { - "epoch": 0.55, - "grad_norm": 0.9536008985955754, - "learning_rate": 8.87837396312064e-06, - "loss": 0.5612, + "epoch": 0.44, + "grad_norm": 0.8431144710530735, + "learning_rate": 1.2420373916187771e-05, + "loss": 0.4615, "step": 9568 }, { - "epoch": 0.55, - "grad_norm": 0.22293813348609617, - "learning_rate": 8.876524812558269e-06, - "loss": 0.1594, + "epoch": 0.44, + "grad_norm": 0.38840062715512647, + "learning_rate": 1.2418930210592348e-05, + "loss": 0.218, "step": 9569 }, { - "epoch": 0.55, - "grad_norm": 0.3957805630650527, - "learning_rate": 8.874675700901791e-06, - "loss": 0.2501, + "epoch": 0.44, + "grad_norm": 0.4073936431648435, + "learning_rate": 1.2417486451444144e-05, + "loss": 0.3462, "step": 9570 }, { - "epoch": 0.55, - "grad_norm": 0.38770512159826404, - "learning_rate": 8.87282662821525e-06, - "loss": 0.3198, + "epoch": 0.44, + "grad_norm": 0.7888506067044139, + "learning_rate": 1.2416042638775116e-05, + "loss": 0.4172, "step": 9571 }, { - "epoch": 0.55, - "grad_norm": 0.3391420199028762, - "learning_rate": 8.87097759456267e-06, - "loss": 0.2365, + "epoch": 0.44, + "grad_norm": 0.3186256468721699, + "learning_rate": 1.2414598772617233e-05, + "loss": 0.2345, "step": 9572 }, { - "epoch": 0.55, - "grad_norm": 1.202499855422108, - "learning_rate": 8.869128600008092e-06, - "loss": 0.818, + "epoch": 0.44, + "grad_norm": 0.2938581933553553, + "learning_rate": 1.2413154853002462e-05, + "loss": 0.1918, "step": 9573 }, { - "epoch": 0.55, - "grad_norm": 0.45885122466342415, - "learning_rate": 8.867279644615537e-06, - "loss": 0.3281, + "epoch": 0.44, + "grad_norm": 0.36637135558619166, + "learning_rate": 1.2411710879962767e-05, + "loss": 0.2848, "step": 9574 }, { - "epoch": 0.55, - "grad_norm": 0.31119385570500957, - "learning_rate": 8.865430728449043e-06, - "loss": 0.2231, + "epoch": 0.44, + "grad_norm": 0.3728737557494692, + "learning_rate": 1.241026685353012e-05, + "loss": 0.2203, "step": 9575 }, { - "epoch": 0.55, - "grad_norm": 0.2595652472913027, - "learning_rate": 8.863581851572633e-06, - "loss": 0.1539, + "epoch": 0.44, + "grad_norm": 0.8934077931088511, + "learning_rate": 1.2408822773736487e-05, + "loss": 0.4695, "step": 9576 }, { - "epoch": 0.55, - "grad_norm": 0.4554085077648678, - "learning_rate": 8.861733014050334e-06, - "loss": 0.3657, + "epoch": 0.44, + "grad_norm": 0.6666598305683288, + "learning_rate": 1.240737864061384e-05, + "loss": 0.4639, "step": 9577 }, { - "epoch": 0.55, - "grad_norm": 0.3500096854930678, - "learning_rate": 8.859884215946174e-06, - "loss": 0.2237, + "epoch": 0.44, + "grad_norm": 0.3579599228603201, + "learning_rate": 1.2405934454194146e-05, + "loss": 0.2823, "step": 9578 }, { - "epoch": 0.55, - "grad_norm": 0.3774828819594551, - "learning_rate": 8.858035457324172e-06, - "loss": 0.3163, + "epoch": 0.44, + "grad_norm": 0.37676047320879386, + "learning_rate": 1.2404490214509385e-05, + "loss": 0.2755, "step": 9579 }, { - "epoch": 0.55, - "grad_norm": 0.7309407493660592, - "learning_rate": 8.856186738248355e-06, - "loss": 0.4135, + "epoch": 0.44, + "grad_norm": 0.30817480657786145, + "learning_rate": 1.2403045921591528e-05, + "loss": 0.2043, "step": 9580 }, { - "epoch": 0.55, - "grad_norm": 0.3706488154985834, - "learning_rate": 8.85433805878274e-06, - "loss": 0.288, + "epoch": 0.44, + "grad_norm": 0.3493268863369354, + "learning_rate": 1.2401601575472552e-05, + "loss": 0.2614, "step": 9581 }, { - "epoch": 0.55, - "grad_norm": 0.21088115991413472, - "learning_rate": 8.85248941899135e-06, - "loss": 0.1959, + "epoch": 0.44, + "grad_norm": 0.6789197777552087, + "learning_rate": 1.2400157176184428e-05, + "loss": 0.3293, "step": 9582 }, { - "epoch": 0.55, - "grad_norm": 0.850973894592171, - "learning_rate": 8.850640818938202e-06, - "loss": 0.4281, + "epoch": 0.44, + "grad_norm": 0.5807957038525572, + "learning_rate": 1.2398712723759141e-05, + "loss": 0.3912, "step": 9583 }, { - "epoch": 0.55, - "grad_norm": 0.31743512024875603, - "learning_rate": 8.848792258687312e-06, - "loss": 0.2678, + "epoch": 0.44, + "grad_norm": 0.47637302397872444, + "learning_rate": 1.2397268218228664e-05, + "loss": 0.2932, "step": 9584 }, { - "epoch": 0.55, - "grad_norm": 0.773444547415871, - "learning_rate": 8.846943738302697e-06, - "loss": 0.5385, + "epoch": 0.44, + "grad_norm": 0.2510626410695625, + "learning_rate": 1.2395823659624982e-05, + "loss": 0.1329, "step": 9585 }, { - "epoch": 0.55, - "grad_norm": 0.40078285238849626, - "learning_rate": 8.845095257848372e-06, - "loss": 0.3058, + "epoch": 0.44, + "grad_norm": 0.39358706924813713, + "learning_rate": 1.239437904798007e-05, + "loss": 0.3275, "step": 9586 }, { - "epoch": 0.55, - "grad_norm": 0.364521054432038, - "learning_rate": 8.843246817388345e-06, - "loss": 0.2559, + "epoch": 0.44, + "grad_norm": 0.4331028826493053, + "learning_rate": 1.2392934383325917e-05, + "loss": 0.2577, "step": 9587 }, { - "epoch": 0.55, - "grad_norm": 1.3184618070839194, - "learning_rate": 8.841398416986635e-06, - "loss": 0.3191, + "epoch": 0.44, + "grad_norm": 0.603658254894565, + "learning_rate": 1.2391489665694501e-05, + "loss": 0.3197, "step": 9588 }, { - "epoch": 0.55, - "grad_norm": 0.3073635659674038, - "learning_rate": 8.83955005670725e-06, - "loss": 0.2267, + "epoch": 0.44, + "grad_norm": 1.294903917659896, + "learning_rate": 1.2390044895117807e-05, + "loss": 0.6753, "step": 9589 }, { - "epoch": 0.55, - "grad_norm": 0.2940086234938814, - "learning_rate": 8.837701736614194e-06, - "loss": 0.2482, + "epoch": 0.44, + "grad_norm": 0.3628923058748068, + "learning_rate": 1.2388600071627825e-05, + "loss": 0.2377, "step": 9590 }, { - "epoch": 0.55, - "grad_norm": 0.29719764315455904, - "learning_rate": 8.835853456771476e-06, - "loss": 0.2412, + "epoch": 0.44, + "grad_norm": 0.3904723514891969, + "learning_rate": 1.2387155195256537e-05, + "loss": 0.3198, "step": 9591 }, { - "epoch": 0.55, - "grad_norm": 0.9759795203201609, - "learning_rate": 8.834005217243103e-06, - "loss": 0.593, + "epoch": 0.44, + "grad_norm": 0.2103867084893919, + "learning_rate": 1.2385710266035937e-05, + "loss": 0.0967, "step": 9592 }, { - "epoch": 0.55, - "grad_norm": 0.3481287823907185, - "learning_rate": 8.832157018093078e-06, - "loss": 0.2459, + "epoch": 0.44, + "grad_norm": 0.32897157379292796, + "learning_rate": 1.238426528399801e-05, + "loss": 0.28, "step": 9593 }, { - "epoch": 0.55, - "grad_norm": 0.5531869767322063, - "learning_rate": 8.830308859385408e-06, - "loss": 0.3371, + "epoch": 0.44, + "grad_norm": 0.447313185224507, + "learning_rate": 1.2382820249174747e-05, + "loss": 0.3432, "step": 9594 }, { - "epoch": 0.55, - "grad_norm": 0.34635142391335155, - "learning_rate": 8.828460741184089e-06, - "loss": 0.2135, + "epoch": 0.44, + "grad_norm": 0.4637941626461378, + "learning_rate": 1.2381375161598141e-05, + "loss": 0.283, "step": 9595 }, { - "epoch": 0.55, - "grad_norm": 0.4122181775243568, - "learning_rate": 8.826612663553126e-06, - "loss": 0.2867, + "epoch": 0.44, + "grad_norm": 0.37435580844485417, + "learning_rate": 1.2379930021300184e-05, + "loss": 0.2708, "step": 9596 }, { - "epoch": 0.55, - "grad_norm": 0.4546733222643512, - "learning_rate": 8.824764626556514e-06, - "loss": 0.3519, + "epoch": 0.44, + "grad_norm": 1.4307387727731835, + "learning_rate": 1.2378484828312868e-05, + "loss": 0.689, "step": 9597 }, { - "epoch": 0.55, - "grad_norm": 0.30491457595218185, - "learning_rate": 8.822916630258255e-06, - "loss": 0.2441, + "epoch": 0.44, + "grad_norm": 0.24653470870973798, + "learning_rate": 1.2377039582668193e-05, + "loss": 0.1887, "step": 9598 }, { - "epoch": 0.55, - "grad_norm": 0.35806364278988395, - "learning_rate": 8.82106867472234e-06, - "loss": 0.2882, + "epoch": 0.44, + "grad_norm": 0.33488964789200004, + "learning_rate": 1.2375594284398154e-05, + "loss": 0.2924, "step": 9599 }, { - "epoch": 0.55, - "grad_norm": 0.6824198286469831, - "learning_rate": 8.819220760012768e-06, - "loss": 0.4241, + "epoch": 0.44, + "grad_norm": 0.649248743732412, + "learning_rate": 1.2374148933534744e-05, + "loss": 0.4185, "step": 9600 }, { - "epoch": 0.55, - "grad_norm": 0.2923506374089054, - "learning_rate": 8.81737288619353e-06, - "loss": 0.1254, + "epoch": 0.44, + "grad_norm": 0.4971778169313545, + "learning_rate": 1.2372703530109967e-05, + "loss": 0.287, "step": 9601 }, { - "epoch": 0.55, - "grad_norm": 0.3244416849157942, - "learning_rate": 8.815525053328617e-06, - "loss": 0.2596, + "epoch": 0.44, + "grad_norm": 0.467947894336131, + "learning_rate": 1.2371258074155818e-05, + "loss": 0.296, "step": 9602 }, { - "epoch": 0.55, - "grad_norm": 0.28496715222960844, - "learning_rate": 8.81367726148202e-06, - "loss": 0.2617, + "epoch": 0.44, + "grad_norm": 0.37579042849010924, + "learning_rate": 1.2369812565704302e-05, + "loss": 0.3226, "step": 9603 }, { - "epoch": 0.55, - "grad_norm": 0.7812979157651535, - "learning_rate": 8.811829510717731e-06, - "loss": 0.3444, + "epoch": 0.44, + "grad_norm": 0.4307594614227347, + "learning_rate": 1.236836700478742e-05, + "loss": 0.3097, "step": 9604 }, { - "epoch": 0.55, - "grad_norm": 0.31375336364159706, - "learning_rate": 8.809981801099735e-06, - "loss": 0.2695, + "epoch": 0.44, + "grad_norm": 0.36834214981869845, + "learning_rate": 1.2366921391437179e-05, + "loss": 0.2087, "step": 9605 }, { - "epoch": 0.55, - "grad_norm": 0.392233640141547, - "learning_rate": 8.808134132692015e-06, - "loss": 0.3176, + "epoch": 0.44, + "grad_norm": 0.3501598262689063, + "learning_rate": 1.2365475725685574e-05, + "loss": 0.2913, "step": 9606 }, { - "epoch": 0.55, - "grad_norm": 1.3762085157918527, - "learning_rate": 8.806286505558563e-06, - "loss": 0.6777, + "epoch": 0.44, + "grad_norm": 0.5125241091625219, + "learning_rate": 1.2364030007564618e-05, + "loss": 0.3171, "step": 9607 }, { - "epoch": 0.55, - "grad_norm": 0.3082493565251616, - "learning_rate": 8.804438919763352e-06, - "loss": 0.219, + "epoch": 0.44, + "grad_norm": 0.3555258453868249, + "learning_rate": 1.2362584237106315e-05, + "loss": 0.1781, "step": 9608 }, { - "epoch": 0.55, - "grad_norm": 0.4539330693613155, - "learning_rate": 8.802591375370375e-06, - "loss": 0.2814, + "epoch": 0.44, + "grad_norm": 0.5361810015285421, + "learning_rate": 1.2361138414342676e-05, + "loss": 0.3812, "step": 9609 }, { - "epoch": 0.55, - "grad_norm": 0.30749822562633533, - "learning_rate": 8.800743872443605e-06, - "loss": 0.2762, + "epoch": 0.44, + "grad_norm": 0.4307069918427517, + "learning_rate": 1.2359692539305707e-05, + "loss": 0.3552, "step": 9610 }, { - "epoch": 0.55, - "grad_norm": 0.3004335057158179, - "learning_rate": 8.798896411047024e-06, - "loss": 0.2093, + "epoch": 0.44, + "grad_norm": 0.35790148179259906, + "learning_rate": 1.2358246612027422e-05, + "loss": 0.2498, "step": 9611 }, { - "epoch": 0.55, - "grad_norm": 0.651005317404619, - "learning_rate": 8.797048991244606e-06, - "loss": 0.4288, + "epoch": 0.44, + "grad_norm": 0.9053816119011324, + "learning_rate": 1.2356800632539827e-05, + "loss": 0.5611, "step": 9612 }, { - "epoch": 0.55, - "grad_norm": 0.9232587293760873, - "learning_rate": 8.795201613100334e-06, - "loss": 0.627, + "epoch": 0.44, + "grad_norm": 0.3088654507360139, + "learning_rate": 1.235535460087494e-05, + "loss": 0.2003, "step": 9613 }, { - "epoch": 0.55, - "grad_norm": 0.2907613834716408, - "learning_rate": 8.793354276678176e-06, - "loss": 0.1997, + "epoch": 0.44, + "grad_norm": 0.2871338908174627, + "learning_rate": 1.2353908517064768e-05, + "loss": 0.2236, "step": 9614 }, { - "epoch": 0.55, - "grad_norm": 0.3186326974408788, - "learning_rate": 8.791506982042107e-06, - "loss": 0.2948, + "epoch": 0.44, + "grad_norm": 1.0277085790305298, + "learning_rate": 1.2352462381141335e-05, + "loss": 0.5663, "step": 9615 }, { - "epoch": 0.55, - "grad_norm": 0.5159427141535858, - "learning_rate": 8.789659729256099e-06, - "loss": 0.2764, + "epoch": 0.44, + "grad_norm": 0.7203127520736397, + "learning_rate": 1.235101619313665e-05, + "loss": 0.412, "step": 9616 }, { - "epoch": 0.55, - "grad_norm": 0.3412013433538205, - "learning_rate": 8.787812518384125e-06, - "loss": 0.1942, + "epoch": 0.44, + "grad_norm": 0.3995772490589311, + "learning_rate": 1.2349569953082734e-05, + "loss": 0.2818, "step": 9617 }, { - "epoch": 0.55, - "grad_norm": 0.35941120379477665, - "learning_rate": 8.78596534949015e-06, - "loss": 0.283, + "epoch": 0.44, + "grad_norm": 0.33827905472787684, + "learning_rate": 1.2348123661011602e-05, + "loss": 0.2514, "step": 9618 }, { - "epoch": 0.55, - "grad_norm": 1.0007832706509505, - "learning_rate": 8.784118222638142e-06, - "loss": 0.6069, + "epoch": 0.44, + "grad_norm": 0.31993889063108794, + "learning_rate": 1.2346677316955275e-05, + "loss": 0.1938, "step": 9619 }, { - "epoch": 0.55, - "grad_norm": 0.4190917204278424, - "learning_rate": 8.78227113789207e-06, - "loss": 0.3474, + "epoch": 0.44, + "grad_norm": 0.638384747400576, + "learning_rate": 1.2345230920945774e-05, + "loss": 0.3077, "step": 9620 }, { - "epoch": 0.55, - "grad_norm": 0.25232330543617304, - "learning_rate": 8.780424095315893e-06, - "loss": 0.2104, + "epoch": 0.44, + "grad_norm": 0.40529294768659263, + "learning_rate": 1.234378447301512e-05, + "loss": 0.2734, "step": 9621 }, { - "epoch": 0.55, - "grad_norm": 0.3665822398797408, - "learning_rate": 8.778577094973579e-06, - "loss": 0.3238, + "epoch": 0.44, + "grad_norm": 0.40724537667499666, + "learning_rate": 1.234233797319534e-05, + "loss": 0.3284, "step": 9622 }, { - "epoch": 0.55, - "grad_norm": 0.41883098987828316, - "learning_rate": 8.77673013692909e-06, - "loss": 0.3045, + "epoch": 0.44, + "grad_norm": 0.5813297011056503, + "learning_rate": 1.2340891421518453e-05, + "loss": 0.2169, "step": 9623 }, { - "epoch": 0.55, - "grad_norm": 0.3007795637019158, - "learning_rate": 8.77488322124638e-06, - "loss": 0.1753, + "epoch": 0.44, + "grad_norm": 0.363407278448993, + "learning_rate": 1.2339444818016488e-05, + "loss": 0.2774, "step": 9624 }, { - "epoch": 0.55, - "grad_norm": 1.344525945362686, - "learning_rate": 8.773036347989413e-06, - "loss": 0.7523, + "epoch": 0.44, + "grad_norm": 0.2528992805256744, + "learning_rate": 1.2337998162721466e-05, + "loss": 0.2131, "step": 9625 }, { - "epoch": 0.55, - "grad_norm": 0.26911175627901446, - "learning_rate": 8.771189517222143e-06, - "loss": 0.265, + "epoch": 0.44, + "grad_norm": 0.39581694018725494, + "learning_rate": 1.233655145566542e-05, + "loss": 0.281, "step": 9626 }, { - "epoch": 0.55, - "grad_norm": 0.42709579508130063, - "learning_rate": 8.769342729008529e-06, - "loss": 0.2923, + "epoch": 0.44, + "grad_norm": 0.40460639478854604, + "learning_rate": 1.2335104696880376e-05, + "loss": 0.2885, "step": 9627 }, { - "epoch": 0.55, - "grad_norm": 0.6199825882275222, - "learning_rate": 8.767495983412521e-06, - "loss": 0.3981, + "epoch": 0.44, + "grad_norm": 0.7825841157818485, + "learning_rate": 1.2333657886398367e-05, + "loss": 0.4903, "step": 9628 }, { - "epoch": 0.55, - "grad_norm": 0.25178532363408446, - "learning_rate": 8.765649280498076e-06, - "loss": 0.197, + "epoch": 0.44, + "grad_norm": 0.6105665996378035, + "learning_rate": 1.2332211024251418e-05, + "loss": 0.3968, "step": 9629 }, { - "epoch": 0.55, - "grad_norm": 0.2845849191707805, - "learning_rate": 8.763802620329146e-06, - "loss": 0.2088, + "epoch": 0.44, + "grad_norm": 0.2927046306155492, + "learning_rate": 1.2330764110471567e-05, + "loss": 0.2887, "step": 9630 }, { - "epoch": 0.55, - "grad_norm": 1.0896261704932444, - "learning_rate": 8.761956002969672e-06, - "loss": 0.7473, + "epoch": 0.44, + "grad_norm": 0.2497398390452131, + "learning_rate": 1.2329317145090844e-05, + "loss": 0.1103, "step": 9631 }, { - "epoch": 0.55, - "grad_norm": 0.5818223951584665, - "learning_rate": 8.760109428483613e-06, - "loss": 0.3236, + "epoch": 0.44, + "grad_norm": 0.4189561970016693, + "learning_rate": 1.2327870128141284e-05, + "loss": 0.2483, "step": 9632 }, { - "epoch": 0.55, - "grad_norm": 0.41990213718374075, - "learning_rate": 8.758262896934909e-06, - "loss": 0.3098, + "epoch": 0.44, + "grad_norm": 0.6527335113034519, + "learning_rate": 1.2326423059654927e-05, + "loss": 0.3651, "step": 9633 }, { - "epoch": 0.55, - "grad_norm": 0.399969974428762, - "learning_rate": 8.756416408387507e-06, - "loss": 0.2806, + "epoch": 0.44, + "grad_norm": 0.4082084727137672, + "learning_rate": 1.2324975939663801e-05, + "loss": 0.2906, "step": 9634 }, { - "epoch": 0.55, - "grad_norm": 0.3299745775632892, - "learning_rate": 8.754569962905351e-06, - "loss": 0.2081, + "epoch": 0.44, + "grad_norm": 0.42908281375043117, + "learning_rate": 1.232352876819995e-05, + "loss": 0.3115, "step": 9635 }, { - "epoch": 0.55, - "grad_norm": 0.3008769243224494, - "learning_rate": 8.752723560552386e-06, - "loss": 0.2482, + "epoch": 0.44, + "grad_norm": 0.5956308543988919, + "learning_rate": 1.2322081545295412e-05, + "loss": 0.3898, "step": 9636 }, { - "epoch": 0.55, - "grad_norm": 0.9013635666683019, - "learning_rate": 8.750877201392547e-06, - "loss": 0.3913, + "epoch": 0.44, + "grad_norm": 0.23004935655931547, + "learning_rate": 1.2320634270982226e-05, + "loss": 0.1604, "step": 9637 }, { - "epoch": 0.55, - "grad_norm": 0.28342819302573247, - "learning_rate": 8.749030885489782e-06, - "loss": 0.2537, + "epoch": 0.44, + "grad_norm": 0.4620577546254248, + "learning_rate": 1.2319186945292434e-05, + "loss": 0.3208, "step": 9638 }, { - "epoch": 0.55, - "grad_norm": 0.5810023181066697, - "learning_rate": 8.747184612908019e-06, - "loss": 0.3466, + "epoch": 0.44, + "grad_norm": 0.38688860714694545, + "learning_rate": 1.2317739568258078e-05, + "loss": 0.294, "step": 9639 }, { - "epoch": 0.55, - "grad_norm": 0.6679312576871723, - "learning_rate": 8.745338383711202e-06, - "loss": 0.3194, + "epoch": 0.44, + "grad_norm": 0.766648741152106, + "learning_rate": 1.2316292139911204e-05, + "loss": 0.3594, "step": 9640 }, { - "epoch": 0.55, - "grad_norm": 0.29402336429450904, - "learning_rate": 8.74349219796326e-06, - "loss": 0.2023, + "epoch": 0.44, + "grad_norm": 0.63975493027773, + "learning_rate": 1.2314844660283853e-05, + "loss": 0.361, "step": 9641 }, { - "epoch": 0.55, - "grad_norm": 0.26832438600589964, - "learning_rate": 8.741646055728133e-06, - "loss": 0.2494, + "epoch": 0.44, + "grad_norm": 0.33159347563531844, + "learning_rate": 1.231339712940807e-05, + "loss": 0.3052, "step": 9642 }, { - "epoch": 0.55, - "grad_norm": 1.104583917262513, - "learning_rate": 8.739799957069747e-06, - "loss": 0.681, + "epoch": 0.44, + "grad_norm": 0.4428056012950506, + "learning_rate": 1.2311949547315906e-05, + "loss": 0.2905, "step": 9643 }, { - "epoch": 0.55, - "grad_norm": 0.35403449470833537, - "learning_rate": 8.737953902052031e-06, - "loss": 0.2109, + "epoch": 0.44, + "grad_norm": 0.5629543441522619, + "learning_rate": 1.2310501914039407e-05, + "loss": 0.2437, "step": 9644 }, { - "epoch": 0.55, - "grad_norm": 0.6163331604318542, - "learning_rate": 8.736107890738922e-06, - "loss": 0.3423, + "epoch": 0.44, + "grad_norm": 0.3593639546762421, + "learning_rate": 1.2309054229610625e-05, + "loss": 0.2855, "step": 9645 }, { - "epoch": 0.55, - "grad_norm": 0.44648031150704204, - "learning_rate": 8.73426192319434e-06, - "loss": 0.3083, + "epoch": 0.44, + "grad_norm": 0.3592582670912627, + "learning_rate": 1.2307606494061608e-05, + "loss": 0.292, "step": 9646 }, { - "epoch": 0.55, - "grad_norm": 0.32973426498978503, - "learning_rate": 8.732415999482214e-06, - "loss": 0.2018, + "epoch": 0.44, + "grad_norm": 0.4170304508839579, + "learning_rate": 1.2306158707424402e-05, + "loss": 0.2035, "step": 9647 }, { - "epoch": 0.55, - "grad_norm": 0.29871322069317574, - "learning_rate": 8.730570119666465e-06, - "loss": 0.18, + "epoch": 0.44, + "grad_norm": 0.43586187249573893, + "learning_rate": 1.2304710869731072e-05, + "loss": 0.346, "step": 9648 }, { - "epoch": 0.55, - "grad_norm": 0.5177502319297812, - "learning_rate": 8.728724283811024e-06, - "loss": 0.3988, + "epoch": 0.44, + "grad_norm": 0.437043393776642, + "learning_rate": 1.2303262981013657e-05, + "loss": 0.2845, "step": 9649 }, { - "epoch": 0.55, - "grad_norm": 0.3241226683385959, - "learning_rate": 8.7268784919798e-06, - "loss": 0.2099, + "epoch": 0.44, + "grad_norm": 0.36173202652707226, + "learning_rate": 1.2301815041304226e-05, + "loss": 0.2572, "step": 9650 }, { - "epoch": 0.55, - "grad_norm": 0.45021331398071146, - "learning_rate": 8.725032744236723e-06, - "loss": 0.3137, + "epoch": 0.44, + "grad_norm": 0.42077006517873183, + "learning_rate": 1.2300367050634825e-05, + "loss": 0.3072, "step": 9651 }, { - "epoch": 0.55, - "grad_norm": 0.7969625967830009, - "learning_rate": 8.723187040645704e-06, - "loss": 0.5277, + "epoch": 0.44, + "grad_norm": 0.5205610596943908, + "learning_rate": 1.2298919009037518e-05, + "loss": 0.3335, "step": 9652 }, { - "epoch": 0.55, - "grad_norm": 0.27981093470443125, - "learning_rate": 8.721341381270668e-06, - "loss": 0.0984, + "epoch": 0.44, + "grad_norm": 0.3147560017699276, + "learning_rate": 1.2297470916544354e-05, + "loss": 0.2309, "step": 9653 }, { - "epoch": 0.55, - "grad_norm": 0.2597047101805497, - "learning_rate": 8.719495766175519e-06, - "loss": 0.2336, + "epoch": 0.44, + "grad_norm": 0.37854837728116186, + "learning_rate": 1.2296022773187404e-05, + "loss": 0.2977, "step": 9654 }, { - "epoch": 0.55, - "grad_norm": 1.1135473319190738, - "learning_rate": 8.717650195424182e-06, - "loss": 0.6095, + "epoch": 0.44, + "grad_norm": 0.9365465464747741, + "learning_rate": 1.2294574578998717e-05, + "loss": 0.5633, "step": 9655 }, { - "epoch": 0.55, - "grad_norm": 0.6191392989069763, - "learning_rate": 8.715804669080559e-06, - "loss": 0.3826, + "epoch": 0.44, + "grad_norm": 0.5488220345811485, + "learning_rate": 1.2293126334010365e-05, + "loss": 0.3653, "step": 9656 }, { - "epoch": 0.55, - "grad_norm": 0.3043730092485221, - "learning_rate": 8.713959187208572e-06, - "loss": 0.2419, + "epoch": 0.44, + "grad_norm": 0.2627226276630762, + "learning_rate": 1.2291678038254406e-05, + "loss": 0.1614, "step": 9657 }, { - "epoch": 0.55, - "grad_norm": 0.4603338423923025, - "learning_rate": 8.712113749872117e-06, - "loss": 0.3772, + "epoch": 0.44, + "grad_norm": 0.32992006937915863, + "learning_rate": 1.2290229691762903e-05, + "loss": 0.2584, "step": 9658 }, { - "epoch": 0.55, - "grad_norm": 0.5274180041754517, - "learning_rate": 8.710268357135109e-06, - "loss": 0.3372, + "epoch": 0.44, + "grad_norm": 1.3531818343867195, + "learning_rate": 1.228878129456792e-05, + "loss": 0.8383, "step": 9659 }, { - "epoch": 0.56, - "grad_norm": 0.20931527382171677, - "learning_rate": 8.70842300906145e-06, - "loss": 0.1499, + "epoch": 0.44, + "grad_norm": 0.34147047026405486, + "learning_rate": 1.2287332846701528e-05, + "loss": 0.2165, "step": 9660 }, { - "epoch": 0.56, - "grad_norm": 0.45032579061938405, - "learning_rate": 8.70657770571505e-06, - "loss": 0.3645, + "epoch": 0.44, + "grad_norm": 0.47166849528965465, + "learning_rate": 1.2285884348195792e-05, + "loss": 0.3452, "step": 9661 }, { - "epoch": 0.56, - "grad_norm": 0.4065200216698055, - "learning_rate": 8.704732447159807e-06, - "loss": 0.2926, + "epoch": 0.44, + "grad_norm": 0.46533522669274496, + "learning_rate": 1.2284435799082774e-05, + "loss": 0.3235, "step": 9662 }, { - "epoch": 0.56, - "grad_norm": 0.41762152764098875, - "learning_rate": 8.702887233459625e-06, - "loss": 0.2756, + "epoch": 0.44, + "grad_norm": 0.3053715938859802, + "learning_rate": 1.2282987199394556e-05, + "loss": 0.1649, "step": 9663 }, { - "epoch": 0.56, - "grad_norm": 1.3246494443880283, - "learning_rate": 8.7010420646784e-06, - "loss": 0.8345, + "epoch": 0.44, + "grad_norm": 0.5211688582173295, + "learning_rate": 1.22815385491632e-05, + "loss": 0.2321, "step": 9664 }, { - "epoch": 0.56, - "grad_norm": 0.34809237419193545, - "learning_rate": 8.699196940880032e-06, - "loss": 0.2729, + "epoch": 0.44, + "grad_norm": 0.43790209331438057, + "learning_rate": 1.2280089848420778e-05, + "loss": 0.3001, "step": 9665 }, { - "epoch": 0.56, - "grad_norm": 0.2492864391185059, - "learning_rate": 8.69735186212842e-06, - "loss": 0.2019, + "epoch": 0.44, + "grad_norm": 0.34836144457036783, + "learning_rate": 1.2278641097199362e-05, + "loss": 0.241, "step": 9666 }, { - "epoch": 0.56, - "grad_norm": 0.5684745090978655, - "learning_rate": 8.695506828487457e-06, - "loss": 0.285, + "epoch": 0.44, + "grad_norm": 0.9077883607530999, + "learning_rate": 1.2277192295531033e-05, + "loss": 0.4616, "step": 9667 }, { - "epoch": 0.56, - "grad_norm": 0.4835198325712554, - "learning_rate": 8.693661840021035e-06, - "loss": 0.3404, + "epoch": 0.44, + "grad_norm": 0.6257765567985331, + "learning_rate": 1.2275743443447858e-05, + "loss": 0.3713, "step": 9668 }, { - "epoch": 0.56, - "grad_norm": 0.41002756921375183, - "learning_rate": 8.691816896793049e-06, - "loss": 0.3262, + "epoch": 0.44, + "grad_norm": 0.369322750500985, + "learning_rate": 1.2274294540981917e-05, + "loss": 0.2782, "step": 9669 }, { - "epoch": 0.56, - "grad_norm": 0.3301324479001951, - "learning_rate": 8.689971998867386e-06, - "loss": 0.271, + "epoch": 0.44, + "grad_norm": 0.22909056633402966, + "learning_rate": 1.227284558816529e-05, + "loss": 0.1752, "step": 9670 }, { - "epoch": 0.56, - "grad_norm": 0.8899810603812406, - "learning_rate": 8.688127146307938e-06, - "loss": 0.4589, + "epoch": 0.44, + "grad_norm": 0.9120830419115059, + "learning_rate": 1.2271396585030049e-05, + "loss": 0.5374, "step": 9671 }, { - "epoch": 0.56, - "grad_norm": 0.26078813677723967, - "learning_rate": 8.68628233917859e-06, - "loss": 0.2169, + "epoch": 0.44, + "grad_norm": 0.4000589686572815, + "learning_rate": 1.2269947531608277e-05, + "loss": 0.3016, "step": 9672 }, { - "epoch": 0.56, - "grad_norm": 0.2721884189437094, - "learning_rate": 8.684437577543227e-06, - "loss": 0.2406, + "epoch": 0.44, + "grad_norm": 0.48044750546188886, + "learning_rate": 1.2268498427932055e-05, + "loss": 0.3028, "step": 9673 }, { - "epoch": 0.56, - "grad_norm": 0.5468489073374028, - "learning_rate": 8.682592861465735e-06, - "loss": 0.3952, + "epoch": 0.44, + "grad_norm": 0.7377510286532568, + "learning_rate": 1.2267049274033465e-05, + "loss": 0.415, "step": 9674 }, { - "epoch": 0.56, - "grad_norm": 0.3647467307258367, - "learning_rate": 8.680748191009995e-06, - "loss": 0.295, + "epoch": 0.44, + "grad_norm": 0.3928056313699874, + "learning_rate": 1.226560006994459e-05, + "loss": 0.2965, "step": 9675 }, { - "epoch": 0.56, - "grad_norm": 0.7706378980657368, - "learning_rate": 8.67890356623989e-06, - "loss": 0.3905, + "epoch": 0.44, + "grad_norm": 0.2211153595281202, + "learning_rate": 1.2264150815697512e-05, + "loss": 0.1213, "step": 9676 }, { - "epoch": 0.56, - "grad_norm": 0.3748336812202746, - "learning_rate": 8.677058987219294e-06, - "loss": 0.3049, + "epoch": 0.44, + "grad_norm": 0.3989834535149172, + "learning_rate": 1.2262701511324315e-05, + "loss": 0.3244, "step": 9677 }, { - "epoch": 0.56, - "grad_norm": 0.2454214308007065, - "learning_rate": 8.675214454012092e-06, - "loss": 0.233, + "epoch": 0.44, + "grad_norm": 0.3654257044198351, + "learning_rate": 1.2261252156857091e-05, + "loss": 0.2725, "step": 9678 }, { - "epoch": 0.56, - "grad_norm": 0.45322202546424467, - "learning_rate": 8.673369966682154e-06, - "loss": 0.2341, + "epoch": 0.44, + "grad_norm": 0.7791841085327847, + "learning_rate": 1.2259802752327921e-05, + "loss": 0.4339, "step": 9679 }, { - "epoch": 0.56, - "grad_norm": 0.5365199502435255, - "learning_rate": 8.67152552529336e-06, - "loss": 0.3428, + "epoch": 0.44, + "grad_norm": 0.8153146536859531, + "learning_rate": 1.2258353297768897e-05, + "loss": 0.2951, "step": 9680 }, { - "epoch": 0.56, - "grad_norm": 0.4117158142211994, - "learning_rate": 8.669681129909578e-06, - "loss": 0.3221, + "epoch": 0.44, + "grad_norm": 0.3531856259134076, + "learning_rate": 1.2256903793212107e-05, + "loss": 0.2708, "step": 9681 }, { - "epoch": 0.56, - "grad_norm": 0.34998858461194654, - "learning_rate": 8.667836780594682e-06, - "loss": 0.336, + "epoch": 0.44, + "grad_norm": 0.34553279678939053, + "learning_rate": 1.2255454238689643e-05, + "loss": 0.25, "step": 9682 }, { - "epoch": 0.56, - "grad_norm": 0.3850112115715607, - "learning_rate": 8.665992477412541e-06, - "loss": 0.1246, + "epoch": 0.44, + "grad_norm": 0.32491390701370587, + "learning_rate": 1.2254004634233596e-05, + "loss": 0.1675, "step": 9683 }, { - "epoch": 0.56, - "grad_norm": 0.3888304992876215, - "learning_rate": 8.664148220427023e-06, - "loss": 0.3117, + "epoch": 0.44, + "grad_norm": 0.4298912036748761, + "learning_rate": 1.225255497987606e-05, + "loss": 0.3139, "step": 9684 }, { - "epoch": 0.56, - "grad_norm": 0.38035497224060644, - "learning_rate": 8.662304009701994e-06, - "loss": 0.3066, + "epoch": 0.44, + "grad_norm": 0.545577302055996, + "learning_rate": 1.2251105275649125e-05, + "loss": 0.3838, "step": 9685 }, { - "epoch": 0.56, - "grad_norm": 0.2546173089481329, - "learning_rate": 8.66045984530132e-06, - "loss": 0.1335, + "epoch": 0.44, + "grad_norm": 0.435414894881538, + "learning_rate": 1.224965552158489e-05, + "loss": 0.2543, "step": 9686 }, { - "epoch": 0.56, - "grad_norm": 0.35621827719168836, - "learning_rate": 8.658615727288863e-06, - "loss": 0.2854, + "epoch": 0.45, + "grad_norm": 0.38494241083985226, + "learning_rate": 1.224820571771545e-05, + "loss": 0.2814, "step": 9687 }, { - "epoch": 0.56, - "grad_norm": 1.0338373334263178, - "learning_rate": 8.656771655728487e-06, - "loss": 0.6266, + "epoch": 0.45, + "grad_norm": 0.31640813574011656, + "learning_rate": 1.2246755864072903e-05, + "loss": 0.1806, "step": 9688 }, { - "epoch": 0.56, - "grad_norm": 0.3064225676834638, - "learning_rate": 8.65492763068405e-06, - "loss": 0.2283, + "epoch": 0.45, + "grad_norm": 0.31102503739315834, + "learning_rate": 1.2245305960689346e-05, + "loss": 0.2644, "step": 9689 }, { - "epoch": 0.56, - "grad_norm": 0.33605010056283907, - "learning_rate": 8.653083652219417e-06, - "loss": 0.302, + "epoch": 0.45, + "grad_norm": 0.37351233648265275, + "learning_rate": 1.2243856007596879e-05, + "loss": 0.2678, "step": 9690 }, { - "epoch": 0.56, - "grad_norm": 0.6969521748952202, - "learning_rate": 8.651239720398433e-06, - "loss": 0.4025, + "epoch": 0.45, + "grad_norm": 0.5664346786615366, + "learning_rate": 1.2242406004827605e-05, + "loss": 0.4753, "step": 9691 }, { - "epoch": 0.56, - "grad_norm": 0.22137321403803517, - "learning_rate": 8.64939583528496e-06, - "loss": 0.114, + "epoch": 0.45, + "grad_norm": 1.163119450270987, + "learning_rate": 1.2240955952413618e-05, + "loss": 0.6101, "step": 9692 }, { - "epoch": 0.56, - "grad_norm": 0.31067351486742123, - "learning_rate": 8.647551996942852e-06, - "loss": 0.2666, + "epoch": 0.45, + "grad_norm": 0.33797792996651504, + "learning_rate": 1.2239505850387032e-05, + "loss": 0.1908, "step": 9693 }, { - "epoch": 0.56, - "grad_norm": 0.3017237884993278, - "learning_rate": 8.645708205435959e-06, - "loss": 0.237, + "epoch": 0.45, + "grad_norm": 0.2624590551502283, + "learning_rate": 1.2238055698779943e-05, + "loss": 0.2089, "step": 9694 }, { - "epoch": 0.56, - "grad_norm": 0.7947918989546467, - "learning_rate": 8.643864460828135e-06, - "loss": 0.4867, - "step": 9695 + "epoch": 0.45, + "grad_norm": 0.6643778601093144, + "learning_rate": 1.2236605497624456e-05, + "loss": 0.4655, + "step": 9695 }, { - "epoch": 0.56, - "grad_norm": 0.30815653925994546, - "learning_rate": 8.642020763183224e-06, - "loss": 0.2316, + "epoch": 0.45, + "grad_norm": 0.2935776948767263, + "learning_rate": 1.2235155246952679e-05, + "loss": 0.2358, "step": 9696 }, { - "epoch": 0.56, - "grad_norm": 0.4541266781213096, - "learning_rate": 8.640177112565078e-06, - "loss": 0.3681, + "epoch": 0.45, + "grad_norm": 0.3863133014200058, + "learning_rate": 1.223370494679672e-05, + "loss": 0.3187, "step": 9697 }, { - "epoch": 0.56, - "grad_norm": 0.2920545878428287, - "learning_rate": 8.638333509037537e-06, - "loss": 0.2506, + "epoch": 0.45, + "grad_norm": 1.2816111024089614, + "learning_rate": 1.223225459718869e-05, + "loss": 0.6438, "step": 9698 }, { - "epoch": 0.56, - "grad_norm": 0.3173736734585809, - "learning_rate": 8.63648995266445e-06, - "loss": 0.211, + "epoch": 0.45, + "grad_norm": 0.3301988037120032, + "learning_rate": 1.223080419816069e-05, + "loss": 0.2184, "step": 9699 }, { - "epoch": 0.56, - "grad_norm": 0.38440130858759364, - "learning_rate": 8.634646443509656e-06, - "loss": 0.2679, + "epoch": 0.45, + "grad_norm": 0.4745226708584871, + "learning_rate": 1.2229353749744835e-05, + "loss": 0.2643, "step": 9700 }, { - "epoch": 0.56, - "grad_norm": 0.32061793907154607, - "learning_rate": 8.632802981636998e-06, - "loss": 0.3013, + "epoch": 0.45, + "grad_norm": 0.3999480638430245, + "learning_rate": 1.2227903251973239e-05, + "loss": 0.3165, "step": 9701 }, { - "epoch": 0.56, - "grad_norm": 0.31661079459501085, - "learning_rate": 8.630959567110314e-06, - "loss": 0.2308, + "epoch": 0.45, + "grad_norm": 0.36929209564575416, + "learning_rate": 1.2226452704878009e-05, + "loss": 0.2425, "step": 9702 }, { - "epoch": 0.56, - "grad_norm": 0.839441115520653, - "learning_rate": 8.629116199993441e-06, - "loss": 0.4207, + "epoch": 0.45, + "grad_norm": 1.3582211691518813, + "learning_rate": 1.2225002108491264e-05, + "loss": 0.811, "step": 9703 }, { - "epoch": 0.56, - "grad_norm": 1.1032675562865573, - "learning_rate": 8.627272880350214e-06, - "loss": 0.7347, + "epoch": 0.45, + "grad_norm": 0.48611364316709277, + "learning_rate": 1.2223551462845118e-05, + "loss": 0.3014, "step": 9704 }, { - "epoch": 0.56, - "grad_norm": 0.26791299634997684, - "learning_rate": 8.62542960824447e-06, - "loss": 0.212, + "epoch": 0.45, + "grad_norm": 0.37539057064334597, + "learning_rate": 1.2222100767971686e-05, + "loss": 0.2842, "step": 9705 }, { - "epoch": 0.56, - "grad_norm": 0.2568316260810858, - "learning_rate": 8.623586383740037e-06, - "loss": 0.2, + "epoch": 0.45, + "grad_norm": 0.33383080871257836, + "learning_rate": 1.2220650023903085e-05, + "loss": 0.2446, "step": 9706 }, { - "epoch": 0.56, - "grad_norm": 1.0746648875443938, - "learning_rate": 8.621743206900752e-06, - "loss": 0.407, + "epoch": 0.45, + "grad_norm": 0.654155837491421, + "learning_rate": 1.2219199230671428e-05, + "loss": 0.3976, "step": 9707 }, { - "epoch": 0.56, - "grad_norm": 0.3060183253023324, - "learning_rate": 8.619900077790439e-06, - "loss": 0.2603, + "epoch": 0.45, + "grad_norm": 0.4316797456072796, + "learning_rate": 1.2217748388308844e-05, + "loss": 0.2912, "step": 9708 }, { - "epoch": 0.56, - "grad_norm": 0.3354086747479937, - "learning_rate": 8.618056996472925e-06, - "loss": 0.2808, + "epoch": 0.45, + "grad_norm": 0.3113384635270941, + "learning_rate": 1.2216297496847445e-05, + "loss": 0.2596, "step": 9709 }, { - "epoch": 0.56, - "grad_norm": 1.25565098457294, - "learning_rate": 8.616213963012042e-06, - "loss": 0.7637, + "epoch": 0.45, + "grad_norm": 0.31844514497911575, + "learning_rate": 1.2214846556319357e-05, + "loss": 0.1936, "step": 9710 }, { - "epoch": 0.56, - "grad_norm": 0.34005069327044857, - "learning_rate": 8.614370977471604e-06, - "loss": 0.27, + "epoch": 0.45, + "grad_norm": 0.43440287691212437, + "learning_rate": 1.2213395566756701e-05, + "loss": 0.2897, "step": 9711 }, { - "epoch": 0.56, - "grad_norm": 0.18000557563902286, - "learning_rate": 8.612528039915444e-06, - "loss": 0.0875, + "epoch": 0.45, + "grad_norm": 0.4958796569335049, + "learning_rate": 1.2211944528191602e-05, + "loss": 0.2987, "step": 9712 }, { - "epoch": 0.56, - "grad_norm": 0.5228609692059021, - "learning_rate": 8.610685150407376e-06, - "loss": 0.3247, + "epoch": 0.45, + "grad_norm": 0.4126437930230702, + "learning_rate": 1.2210493440656179e-05, + "loss": 0.3226, "step": 9713 }, { - "epoch": 0.56, - "grad_norm": 0.3874828243771713, - "learning_rate": 8.608842309011224e-06, - "loss": 0.2822, + "epoch": 0.45, + "grad_norm": 0.4094317950000837, + "learning_rate": 1.2209042304182565e-05, + "loss": 0.3074, "step": 9714 }, { - "epoch": 0.56, - "grad_norm": 0.9765194619225318, - "learning_rate": 8.606999515790801e-06, - "loss": 0.2146, + "epoch": 0.45, + "grad_norm": 0.2250550662466489, + "learning_rate": 1.220759111880288e-05, + "loss": 0.1588, "step": 9715 }, { - "epoch": 0.56, - "grad_norm": 0.4766873680274542, - "learning_rate": 8.605156770809926e-06, - "loss": 0.3851, + "epoch": 0.45, + "grad_norm": 1.244877091671976, + "learning_rate": 1.220613988454926e-05, + "loss": 0.7579, "step": 9716 }, { - "epoch": 0.56, - "grad_norm": 0.33428061403293535, - "learning_rate": 8.603314074132411e-06, - "loss": 0.2666, + "epoch": 0.45, + "grad_norm": 0.3036630664209244, + "learning_rate": 1.2204688601453827e-05, + "loss": 0.2693, "step": 9717 }, { - "epoch": 0.56, - "grad_norm": 0.29411656533246155, - "learning_rate": 8.60147142582207e-06, - "loss": 0.2137, + "epoch": 0.45, + "grad_norm": 0.525547726921368, + "learning_rate": 1.2203237269548713e-05, + "loss": 0.3557, "step": 9718 }, { - "epoch": 0.56, - "grad_norm": 0.45824528014285415, - "learning_rate": 8.599628825942713e-06, - "loss": 0.2705, + "epoch": 0.45, + "grad_norm": 0.6155026243694358, + "learning_rate": 1.2201785888866049e-05, + "loss": 0.3545, "step": 9719 }, { - "epoch": 0.56, - "grad_norm": 0.37961707152946494, - "learning_rate": 8.597786274558152e-06, - "loss": 0.2672, + "epoch": 0.45, + "grad_norm": 0.3570954748087456, + "learning_rate": 1.2200334459437967e-05, + "loss": 0.2876, "step": 9720 }, { - "epoch": 0.56, - "grad_norm": 0.375183535077221, - "learning_rate": 8.595943771732187e-06, - "loss": 0.3041, + "epoch": 0.45, + "grad_norm": 0.5068079239030496, + "learning_rate": 1.2198882981296604e-05, + "loss": 0.3732, "step": 9721 }, { - "epoch": 0.56, - "grad_norm": 1.2665256641960105, - "learning_rate": 8.594101317528634e-06, - "loss": 0.4257, + "epoch": 0.45, + "grad_norm": 0.2678773984369712, + "learning_rate": 1.219743145447409e-05, + "loss": 0.1779, "step": 9722 }, { - "epoch": 0.56, - "grad_norm": 0.3198305090286384, - "learning_rate": 8.59225891201129e-06, - "loss": 0.227, + "epoch": 0.45, + "grad_norm": 0.4195719551579831, + "learning_rate": 1.2195979879002562e-05, + "loss": 0.2813, "step": 9723 }, { - "epoch": 0.56, - "grad_norm": 0.5603332070743713, - "learning_rate": 8.590416555243962e-06, - "loss": 0.4177, + "epoch": 0.45, + "grad_norm": 0.4683178673355252, + "learning_rate": 1.2194528254914154e-05, + "loss": 0.3275, "step": 9724 }, { - "epoch": 0.56, - "grad_norm": 0.34251976719886146, - "learning_rate": 8.588574247290444e-06, - "loss": 0.2623, + "epoch": 0.45, + "grad_norm": 0.36522845933001624, + "learning_rate": 1.2193076582241006e-05, + "loss": 0.2769, "step": 9725 }, { - "epoch": 0.56, - "grad_norm": 0.26791062951199285, - "learning_rate": 8.586731988214542e-06, - "loss": 0.21, + "epoch": 0.45, + "grad_norm": 0.45219249121897936, + "learning_rate": 1.2191624861015255e-05, + "loss": 0.2922, "step": 9726 }, { - "epoch": 0.56, - "grad_norm": 0.43863608264963566, - "learning_rate": 8.584889778080049e-06, - "loss": 0.2461, + "epoch": 0.45, + "grad_norm": 0.539546942913735, + "learning_rate": 1.2190173091269042e-05, + "loss": 0.3785, "step": 9727 }, { - "epoch": 0.56, - "grad_norm": 0.4854419789559304, - "learning_rate": 8.583047616950761e-06, - "loss": 0.3134, + "epoch": 0.45, + "grad_norm": 0.2338016503542038, + "learning_rate": 1.2188721273034511e-05, + "loss": 0.1728, "step": 9728 }, { - "epoch": 0.56, - "grad_norm": 0.32119730040485317, - "learning_rate": 8.581205504890474e-06, - "loss": 0.2772, + "epoch": 0.45, + "grad_norm": 0.4390010563633753, + "learning_rate": 1.2187269406343798e-05, + "loss": 0.3097, "step": 9729 }, { - "epoch": 0.56, - "grad_norm": 0.8707909117456779, - "learning_rate": 8.57936344196298e-06, - "loss": 0.4635, + "epoch": 0.45, + "grad_norm": 0.4913244430598165, + "learning_rate": 1.2185817491229049e-05, + "loss": 0.3738, "step": 9730 }, { - "epoch": 0.56, - "grad_norm": 0.49345414113964153, - "learning_rate": 8.577521428232067e-06, - "loss": 0.2946, + "epoch": 0.45, + "grad_norm": 1.0648646722406994, + "learning_rate": 1.2184365527722406e-05, + "loss": 0.6025, "step": 9731 }, { - "epoch": 0.56, - "grad_norm": 0.23250652886101789, - "learning_rate": 8.575679463761527e-06, - "loss": 0.1565, + "epoch": 0.45, + "grad_norm": 0.36604230013530015, + "learning_rate": 1.2182913515856016e-05, + "loss": 0.2161, "step": 9732 }, { - "epoch": 0.56, - "grad_norm": 0.3752357585942652, - "learning_rate": 8.573837548615144e-06, - "loss": 0.3268, + "epoch": 0.45, + "grad_norm": 0.3736570465282811, + "learning_rate": 1.2181461455662026e-05, + "loss": 0.3012, "step": 9733 }, { - "epoch": 0.56, - "grad_norm": 1.1076609438672396, - "learning_rate": 8.57199568285671e-06, - "loss": 0.6822, + "epoch": 0.45, + "grad_norm": 0.3401483893757466, + "learning_rate": 1.2180009347172583e-05, + "loss": 0.2062, "step": 9734 }, { - "epoch": 0.56, - "grad_norm": 0.31491087231924286, - "learning_rate": 8.570153866550002e-06, - "loss": 0.2089, + "epoch": 0.45, + "grad_norm": 0.3670483470021049, + "learning_rate": 1.2178557190419833e-05, + "loss": 0.2276, "step": 9735 }, { - "epoch": 0.56, - "grad_norm": 1.3889797767245986, - "learning_rate": 8.568312099758802e-06, - "loss": 0.4492, + "epoch": 0.45, + "grad_norm": 0.4891308733721713, + "learning_rate": 1.2177104985435929e-05, + "loss": 0.3528, "step": 9736 }, { - "epoch": 0.56, - "grad_norm": 0.36769358781100087, - "learning_rate": 8.566470382546896e-06, - "loss": 0.3141, + "epoch": 0.45, + "grad_norm": 0.37094413377213675, + "learning_rate": 1.2175652732253012e-05, + "loss": 0.3165, "step": 9737 }, { - "epoch": 0.56, - "grad_norm": 0.21757133357345376, - "learning_rate": 8.564628714978055e-06, - "loss": 0.1293, + "epoch": 0.45, + "grad_norm": 0.35858412828135794, + "learning_rate": 1.2174200430903244e-05, + "loss": 0.2138, "step": 9738 }, { - "epoch": 0.56, - "grad_norm": 0.34371264630242915, - "learning_rate": 8.562787097116063e-06, - "loss": 0.2301, + "epoch": 0.45, + "grad_norm": 1.3779449738366647, + "learning_rate": 1.2172748081418775e-05, + "loss": 0.799, "step": 9739 }, { - "epoch": 0.56, - "grad_norm": 0.36254630132426113, - "learning_rate": 8.56094552902469e-06, - "loss": 0.354, + "epoch": 0.45, + "grad_norm": 0.33925978883796937, + "learning_rate": 1.2171295683831761e-05, + "loss": 0.299, "step": 9740 }, { - "epoch": 0.56, - "grad_norm": 0.321207211418575, - "learning_rate": 8.559104010767713e-06, - "loss": 0.1934, + "epoch": 0.45, + "grad_norm": 0.27037012395734517, + "learning_rate": 1.2169843238174346e-05, + "loss": 0.222, "step": 9741 }, { - "epoch": 0.56, - "grad_norm": 1.018284315811994, - "learning_rate": 8.5572625424089e-06, - "loss": 0.4464, + "epoch": 0.45, + "grad_norm": 0.37367918505594144, + "learning_rate": 1.2168390744478697e-05, + "loss": 0.2903, "step": 9742 }, { - "epoch": 0.56, - "grad_norm": 1.050265768211607, - "learning_rate": 8.555421124012026e-06, - "loss": 0.5515, + "epoch": 0.45, + "grad_norm": 1.1503831981016803, + "learning_rate": 1.2166938202776966e-05, + "loss": 0.5653, "step": 9743 }, { - "epoch": 0.56, - "grad_norm": 0.23753029210938723, - "learning_rate": 8.553579755640853e-06, - "loss": 0.1735, + "epoch": 0.45, + "grad_norm": 0.8689418238862675, + "learning_rate": 1.2165485613101314e-05, + "loss": 0.4739, "step": 9744 }, { - "epoch": 0.56, - "grad_norm": 0.3188849467880673, - "learning_rate": 8.551738437359154e-06, - "loss": 0.2574, + "epoch": 0.45, + "grad_norm": 0.2835147128858741, + "learning_rate": 1.2164032975483894e-05, + "loss": 0.2395, "step": 9745 }, { - "epoch": 0.56, - "grad_norm": 0.7259221869316915, - "learning_rate": 8.549897169230689e-06, - "loss": 0.5058, + "epoch": 0.45, + "grad_norm": 0.6140801373803363, + "learning_rate": 1.216258028995687e-05, + "loss": 0.4019, "step": 9746 }, { - "epoch": 0.56, - "grad_norm": 0.4847398425340026, - "learning_rate": 8.548055951319223e-06, - "loss": 0.3058, + "epoch": 0.45, + "grad_norm": 0.7213564285035652, + "learning_rate": 1.2161127556552405e-05, + "loss": 0.3406, "step": 9747 }, { - "epoch": 0.56, - "grad_norm": 0.4390574813917582, - "learning_rate": 8.546214783688518e-06, - "loss": 0.2942, + "epoch": 0.45, + "grad_norm": 0.22960229144515007, + "learning_rate": 1.2159674775302659e-05, + "loss": 0.1588, "step": 9748 }, { - "epoch": 0.56, - "grad_norm": 0.3452402290361021, - "learning_rate": 8.544373666402331e-06, - "loss": 0.3128, + "epoch": 0.45, + "grad_norm": 0.41566153736692524, + "learning_rate": 1.2158221946239791e-05, + "loss": 0.2962, "step": 9749 }, { - "epoch": 0.56, - "grad_norm": 0.3458191816422412, - "learning_rate": 8.542532599524422e-06, - "loss": 0.2582, + "epoch": 0.45, + "grad_norm": 0.6512553975021785, + "learning_rate": 1.2156769069395973e-05, + "loss": 0.3645, "step": 9750 }, { - "epoch": 0.56, - "grad_norm": 0.18158561259795064, - "learning_rate": 8.540691583118545e-06, - "loss": 0.0899, + "epoch": 0.45, + "grad_norm": 0.4059432971407028, + "learning_rate": 1.2155316144803366e-05, + "loss": 0.2608, "step": 9751 }, { - "epoch": 0.56, - "grad_norm": 0.4406492459517275, - "learning_rate": 8.53885061724846e-06, - "loss": 0.3035, + "epoch": 0.45, + "grad_norm": 0.5295887338838029, + "learning_rate": 1.2153863172494137e-05, + "loss": 0.3682, "step": 9752 }, { - "epoch": 0.56, - "grad_norm": 0.3702396463379801, - "learning_rate": 8.537009701977909e-06, - "loss": 0.276, + "epoch": 0.45, + "grad_norm": 0.4306915664495503, + "learning_rate": 1.2152410152500454e-05, + "loss": 0.3252, "step": 9753 }, { - "epoch": 0.56, - "grad_norm": 0.8471022600906709, - "learning_rate": 8.535168837370656e-06, - "loss": 0.357, + "epoch": 0.45, + "grad_norm": 0.3088914041703925, + "learning_rate": 1.2150957084854482e-05, + "loss": 0.1925, "step": 9754 }, { - "epoch": 0.56, - "grad_norm": 0.8915584935820783, - "learning_rate": 8.533328023490438e-06, - "loss": 0.5318, + "epoch": 0.45, + "grad_norm": 0.45589358501535854, + "learning_rate": 1.2149503969588397e-05, + "loss": 0.3086, "step": 9755 }, { - "epoch": 0.56, - "grad_norm": 0.3460314169075071, - "learning_rate": 8.531487260401009e-06, - "loss": 0.2315, + "epoch": 0.45, + "grad_norm": 0.3760141854306172, + "learning_rate": 1.214805080673436e-05, + "loss": 0.2941, "step": 9756 }, { - "epoch": 0.56, - "grad_norm": 0.2540429650530294, - "learning_rate": 8.529646548166113e-06, - "loss": 0.2456, + "epoch": 0.45, + "grad_norm": 0.35035999799741574, + "learning_rate": 1.2146597596324554e-05, + "loss": 0.2815, "step": 9757 }, { - "epoch": 0.56, - "grad_norm": 0.8162276591758156, - "learning_rate": 8.527805886849496e-06, - "loss": 0.3404, + "epoch": 0.45, + "grad_norm": 0.6224921295160596, + "learning_rate": 1.2145144338391143e-05, + "loss": 0.3504, "step": 9758 }, { - "epoch": 0.56, - "grad_norm": 0.6224711881741437, - "learning_rate": 8.525965276514897e-06, - "loss": 0.4223, + "epoch": 0.45, + "grad_norm": 0.42800079302071287, + "learning_rate": 1.2143691032966308e-05, + "loss": 0.2637, "step": 9759 }, { - "epoch": 0.56, - "grad_norm": 0.41310833028155763, - "learning_rate": 8.524124717226057e-06, - "loss": 0.3289, + "epoch": 0.45, + "grad_norm": 0.25494414624199474, + "learning_rate": 1.2142237680082215e-05, + "loss": 0.1951, "step": 9760 }, { - "epoch": 0.56, - "grad_norm": 0.3572582038454073, - "learning_rate": 8.522284209046713e-06, - "loss": 0.2494, + "epoch": 0.45, + "grad_norm": 0.3359462720547074, + "learning_rate": 1.2140784279771046e-05, + "loss": 0.273, "step": 9761 }, { - "epoch": 0.56, - "grad_norm": 0.3742903886740477, - "learning_rate": 8.520443752040604e-06, - "loss": 0.2831, + "epoch": 0.45, + "grad_norm": 0.9511380781922887, + "learning_rate": 1.2139330832064975e-05, + "loss": 0.5621, "step": 9762 }, { - "epoch": 0.56, - "grad_norm": 0.2706597483671145, - "learning_rate": 8.518603346271463e-06, - "loss": 0.2184, + "epoch": 0.45, + "grad_norm": 0.3903353731323217, + "learning_rate": 1.2137877336996185e-05, + "loss": 0.3151, "step": 9763 }, { - "epoch": 0.56, - "grad_norm": 0.38218243499476895, - "learning_rate": 8.516762991803027e-06, - "loss": 0.2738, + "epoch": 0.45, + "grad_norm": 0.33316748092213944, + "learning_rate": 1.2136423794596848e-05, + "loss": 0.2795, "step": 9764 }, { - "epoch": 0.56, - "grad_norm": 0.4708346428419067, - "learning_rate": 8.51492268869902e-06, - "loss": 0.2916, + "epoch": 0.45, + "grad_norm": 0.7749806065546372, + "learning_rate": 1.2134970204899148e-05, + "loss": 0.3924, "step": 9765 }, { - "epoch": 0.56, - "grad_norm": 0.5177037495364069, - "learning_rate": 8.513082437023182e-06, - "loss": 0.4067, + "epoch": 0.45, + "grad_norm": 0.31004463128406, + "learning_rate": 1.2133516567935266e-05, + "loss": 0.2179, "step": 9766 }, { - "epoch": 0.56, - "grad_norm": 0.8805166112100425, - "learning_rate": 8.511242236839232e-06, - "loss": 0.2842, + "epoch": 0.45, + "grad_norm": 0.27513628064909423, + "learning_rate": 1.2132062883737383e-05, + "loss": 0.1274, "step": 9767 }, { - "epoch": 0.56, - "grad_norm": 0.32887364777368694, - "learning_rate": 8.509402088210901e-06, - "loss": 0.2746, + "epoch": 0.45, + "grad_norm": 0.4523267302554366, + "learning_rate": 1.2130609152337683e-05, + "loss": 0.3202, "step": 9768 }, { - "epoch": 0.56, - "grad_norm": 0.26738679639436275, - "learning_rate": 8.507561991201908e-06, - "loss": 0.2453, + "epoch": 0.45, + "grad_norm": 0.37371469218567344, + "learning_rate": 1.2129155373768351e-05, + "loss": 0.3105, "step": 9769 }, { - "epoch": 0.56, - "grad_norm": 0.41725219260940183, - "learning_rate": 8.505721945875985e-06, - "loss": 0.3137, + "epoch": 0.45, + "grad_norm": 0.6639688486753837, + "learning_rate": 1.2127701548061571e-05, + "loss": 0.4584, "step": 9770 }, { - "epoch": 0.56, - "grad_norm": 0.3031029677592879, - "learning_rate": 8.503881952296842e-06, - "loss": 0.2186, + "epoch": 0.45, + "grad_norm": 0.5043326619893206, + "learning_rate": 1.2126247675249525e-05, + "loss": 0.1665, "step": 9771 }, { - "epoch": 0.56, - "grad_norm": 1.254047894329755, - "learning_rate": 8.502042010528205e-06, - "loss": 0.7768, + "epoch": 0.45, + "grad_norm": 0.29739973373773143, + "learning_rate": 1.212479375536441e-05, + "loss": 0.2397, "step": 9772 }, { - "epoch": 0.56, - "grad_norm": 0.3559282773742175, - "learning_rate": 8.50020212063379e-06, - "loss": 0.3021, + "epoch": 0.45, + "grad_norm": 0.3276105582455289, + "learning_rate": 1.2123339788438404e-05, + "loss": 0.2624, "step": 9773 }, { - "epoch": 0.56, - "grad_norm": 0.3623989422829272, - "learning_rate": 8.498362282677308e-06, - "loss": 0.1237, + "epoch": 0.45, + "grad_norm": 0.5537175826070679, + "learning_rate": 1.2121885774503707e-05, + "loss": 0.2522, "step": 9774 }, { - "epoch": 0.56, - "grad_norm": 0.33483177445849416, - "learning_rate": 8.496522496722476e-06, - "loss": 0.2634, + "epoch": 0.45, + "grad_norm": 0.4160929478233597, + "learning_rate": 1.2120431713592501e-05, + "loss": 0.3199, "step": 9775 }, { - "epoch": 0.56, - "grad_norm": 0.32082119699761735, - "learning_rate": 8.494682762833004e-06, - "loss": 0.2741, + "epoch": 0.45, + "grad_norm": 0.46208741405327036, + "learning_rate": 1.2118977605736983e-05, + "loss": 0.3689, "step": 9776 }, { - "epoch": 0.56, - "grad_norm": 0.3887285850178219, - "learning_rate": 8.492843081072609e-06, - "loss": 0.1544, + "epoch": 0.45, + "grad_norm": 0.3334562461233583, + "learning_rate": 1.211752345096934e-05, + "loss": 0.2055, "step": 9777 }, { - "epoch": 0.56, - "grad_norm": 0.4023507557929457, - "learning_rate": 8.491003451504987e-06, - "loss": 0.3336, + "epoch": 0.45, + "grad_norm": 0.3984728491694812, + "learning_rate": 1.211606924932177e-05, + "loss": 0.3037, "step": 9778 }, { - "epoch": 0.56, - "grad_norm": 0.8923515179116659, - "learning_rate": 8.489163874193854e-06, - "loss": 0.5183, + "epoch": 0.45, + "grad_norm": 0.3081280490235535, + "learning_rate": 1.2114615000826466e-05, + "loss": 0.2178, "step": 9779 }, { - "epoch": 0.56, - "grad_norm": 0.32433786243686613, - "learning_rate": 8.487324349202909e-06, - "loss": 0.2238, + "epoch": 0.45, + "grad_norm": 0.34121178701248567, + "learning_rate": 1.2113160705515626e-05, + "loss": 0.2503, "step": 9780 }, { - "epoch": 0.56, - "grad_norm": 0.3837847982567723, - "learning_rate": 8.485484876595859e-06, - "loss": 0.3084, + "epoch": 0.45, + "grad_norm": 0.3645815776055045, + "learning_rate": 1.2111706363421442e-05, + "loss": 0.287, "step": 9781 }, { - "epoch": 0.56, - "grad_norm": 0.45428916728106616, - "learning_rate": 8.4836454564364e-06, - "loss": 0.2587, + "epoch": 0.45, + "grad_norm": 0.7007580616821898, + "learning_rate": 1.2110251974576117e-05, + "loss": 0.4935, "step": 9782 }, { - "epoch": 0.56, - "grad_norm": 0.5838701470386773, - "learning_rate": 8.481806088788235e-06, - "loss": 0.3887, + "epoch": 0.45, + "grad_norm": 1.6116781110259237, + "learning_rate": 1.2108797539011847e-05, + "loss": 0.6702, "step": 9783 }, { - "epoch": 0.56, - "grad_norm": 0.23669142326985326, - "learning_rate": 8.47996677371506e-06, - "loss": 0.2022, + "epoch": 0.45, + "grad_norm": 0.286998580309271, + "learning_rate": 1.2107343056760829e-05, + "loss": 0.2225, "step": 9784 }, { - "epoch": 0.56, - "grad_norm": 0.4869737162516183, - "learning_rate": 8.478127511280571e-06, - "loss": 0.399, + "epoch": 0.45, + "grad_norm": 0.3631676797283083, + "learning_rate": 1.210588852785527e-05, + "loss": 0.2818, "step": 9785 }, { - "epoch": 0.56, - "grad_norm": 0.5174517432230594, - "learning_rate": 8.476288301548458e-06, - "loss": 0.3913, + "epoch": 0.45, + "grad_norm": 0.6485406394530175, + "learning_rate": 1.2104433952327366e-05, + "loss": 0.4345, "step": 9786 }, { - "epoch": 0.56, - "grad_norm": 0.36426826902545983, - "learning_rate": 8.474449144582419e-06, - "loss": 0.2611, + "epoch": 0.45, + "grad_norm": 0.35574802495082974, + "learning_rate": 1.2102979330209325e-05, + "loss": 0.2388, "step": 9787 }, { - "epoch": 0.56, - "grad_norm": 0.339033501439424, - "learning_rate": 8.472610040446142e-06, - "loss": 0.3378, + "epoch": 0.45, + "grad_norm": 0.3424847531270152, + "learning_rate": 1.2101524661533347e-05, + "loss": 0.2634, "step": 9788 }, { - "epoch": 0.56, - "grad_norm": 0.6482947546336639, - "learning_rate": 8.470770989203309e-06, - "loss": 0.4807, + "epoch": 0.45, + "grad_norm": 0.40872203304728333, + "learning_rate": 1.210006994633164e-05, + "loss": 0.3069, "step": 9789 }, { - "epoch": 0.56, - "grad_norm": 0.2700529166944222, - "learning_rate": 8.468931990917613e-06, - "loss": 0.1869, + "epoch": 0.45, + "grad_norm": 0.3378375924408665, + "learning_rate": 1.2098615184636403e-05, + "loss": 0.2169, "step": 9790 }, { - "epoch": 0.56, - "grad_norm": 0.38726787939089174, - "learning_rate": 8.467093045652736e-06, - "loss": 0.2809, + "epoch": 0.45, + "grad_norm": 0.4240667148873578, + "learning_rate": 1.2097160376479855e-05, + "loss": 0.3078, "step": 9791 }, { - "epoch": 0.56, - "grad_norm": 0.3210957803126272, - "learning_rate": 8.465254153472362e-06, - "loss": 0.2913, + "epoch": 0.45, + "grad_norm": 0.3416360335363821, + "learning_rate": 1.2095705521894196e-05, + "loss": 0.3078, "step": 9792 }, { - "epoch": 0.56, - "grad_norm": 0.3114223586414909, - "learning_rate": 8.463415314440172e-06, - "loss": 0.2626, + "epoch": 0.45, + "grad_norm": 0.34859788217475013, + "learning_rate": 1.2094250620911636e-05, + "loss": 0.2308, "step": 9793 }, { - "epoch": 0.56, - "grad_norm": 0.9172670409997502, - "learning_rate": 8.46157652861984e-06, - "loss": 0.6035, + "epoch": 0.45, + "grad_norm": 0.3999443815089186, + "learning_rate": 1.2092795673564384e-05, + "loss": 0.2647, "step": 9794 }, { - "epoch": 0.56, - "grad_norm": 0.7608940459805851, - "learning_rate": 8.45973779607505e-06, - "loss": 0.4936, + "epoch": 0.45, + "grad_norm": 0.7802817232056284, + "learning_rate": 1.2091340679884656e-05, + "loss": 0.4076, "step": 9795 }, { - "epoch": 0.56, - "grad_norm": 0.2258225225155695, - "learning_rate": 8.457899116869469e-06, - "loss": 0.2257, + "epoch": 0.45, + "grad_norm": 0.39252456727000395, + "learning_rate": 1.208988563990466e-05, + "loss": 0.2851, "step": 9796 }, { - "epoch": 0.56, - "grad_norm": 0.2627940231472568, - "learning_rate": 8.45606049106678e-06, - "loss": 0.1897, + "epoch": 0.45, + "grad_norm": 0.33136039596870226, + "learning_rate": 1.208843055365661e-05, + "loss": 0.2858, "step": 9797 }, { - "epoch": 0.56, - "grad_norm": 0.5623646815898292, - "learning_rate": 8.454221918730646e-06, - "loss": 0.3763, + "epoch": 0.45, + "grad_norm": 0.7234533770186289, + "learning_rate": 1.208697542117272e-05, + "loss": 0.5125, "step": 9798 }, { - "epoch": 0.56, - "grad_norm": 0.3464750026436955, - "learning_rate": 8.452383399924743e-06, - "loss": 0.2948, + "epoch": 0.45, + "grad_norm": 0.400788613151131, + "learning_rate": 1.2085520242485205e-05, + "loss": 0.2743, "step": 9799 }, { - "epoch": 0.56, - "grad_norm": 0.3381683606193286, - "learning_rate": 8.450544934712736e-06, - "loss": 0.2535, + "epoch": 0.45, + "grad_norm": 0.22915453909584654, + "learning_rate": 1.2084065017626282e-05, + "loss": 0.1778, "step": 9800 }, { - "epoch": 0.56, - "grad_norm": 0.6811041565579721, - "learning_rate": 8.44870652315829e-06, - "loss": 0.4924, + "epoch": 0.45, + "grad_norm": 0.9230868217101192, + "learning_rate": 1.2082609746628169e-05, + "loss": 0.5609, "step": 9801 }, { - "epoch": 0.56, - "grad_norm": 0.2585541466426452, - "learning_rate": 8.446868165325073e-06, - "loss": 0.2242, + "epoch": 0.45, + "grad_norm": 0.3455153426624651, + "learning_rate": 1.2081154429523084e-05, + "loss": 0.289, "step": 9802 }, { - "epoch": 0.56, - "grad_norm": 0.4408860747267185, - "learning_rate": 8.445029861276742e-06, - "loss": 0.1794, + "epoch": 0.45, + "grad_norm": 0.904840523925394, + "learning_rate": 1.2079699066343242e-05, + "loss": 0.3584, "step": 9803 }, { - "epoch": 0.56, - "grad_norm": 0.30415946757221135, - "learning_rate": 8.443191611076962e-06, - "loss": 0.2894, + "epoch": 0.45, + "grad_norm": 0.3680123567835089, + "learning_rate": 1.2078243657120871e-05, + "loss": 0.3227, "step": 9804 }, { - "epoch": 0.56, - "grad_norm": 0.3788792995472617, - "learning_rate": 8.441353414789386e-06, - "loss": 0.2975, + "epoch": 0.45, + "grad_norm": 0.3580933864450864, + "learning_rate": 1.2076788201888187e-05, + "loss": 0.2752, "step": 9805 }, { - "epoch": 0.56, - "grad_norm": 1.3016544580089682, - "learning_rate": 8.439515272477679e-06, - "loss": 0.6672, + "epoch": 0.45, + "grad_norm": 0.49760555510209187, + "learning_rate": 1.2075332700677418e-05, + "loss": 0.3219, "step": 9806 }, { - "epoch": 0.56, - "grad_norm": 0.4190179098500511, - "learning_rate": 8.437677184205488e-06, - "loss": 0.2239, + "epoch": 0.45, + "grad_norm": 0.32365328037705554, + "learning_rate": 1.2073877153520776e-05, + "loss": 0.1982, "step": 9807 }, { - "epoch": 0.56, - "grad_norm": 0.3089991674098943, - "learning_rate": 8.43583915003647e-06, - "loss": 0.2844, + "epoch": 0.45, + "grad_norm": 0.2988868872201726, + "learning_rate": 1.2072421560450497e-05, + "loss": 0.2504, "step": 9808 }, { - "epoch": 0.56, - "grad_norm": 0.2639149079109884, - "learning_rate": 8.434001170034273e-06, - "loss": 0.2243, + "epoch": 0.45, + "grad_norm": 1.4911225225553888, + "learning_rate": 1.2070965921498801e-05, + "loss": 0.457, "step": 9809 }, { - "epoch": 0.56, - "grad_norm": 0.556166511583863, - "learning_rate": 8.432163244262551e-06, - "loss": 0.3236, + "epoch": 0.45, + "grad_norm": 0.713714900647917, + "learning_rate": 1.2069510236697918e-05, + "loss": 0.335, "step": 9810 }, { - "epoch": 0.56, - "grad_norm": 0.32418630784592684, - "learning_rate": 8.430325372784946e-06, - "loss": 0.2629, + "epoch": 0.45, + "grad_norm": 0.3976964988004283, + "learning_rate": 1.2068054506080071e-05, + "loss": 0.2771, "step": 9811 }, { - "epoch": 0.56, - "grad_norm": 0.3574565696512591, - "learning_rate": 8.428487555665108e-06, - "loss": 0.3277, + "epoch": 0.45, + "grad_norm": 0.3511786209754486, + "learning_rate": 1.206659872967749e-05, + "loss": 0.2752, "step": 9812 }, { - "epoch": 0.56, - "grad_norm": 0.43378655974248065, - "learning_rate": 8.426649792966679e-06, - "loss": 0.2564, + "epoch": 0.45, + "grad_norm": 0.2573427025749381, + "learning_rate": 1.2065142907522405e-05, + "loss": 0.1016, "step": 9813 }, { - "epoch": 0.56, - "grad_norm": 0.3671253402319884, - "learning_rate": 8.424812084753297e-06, - "loss": 0.2683, + "epoch": 0.45, + "grad_norm": 0.4215648233180496, + "learning_rate": 1.2063687039647045e-05, + "loss": 0.2876, "step": 9814 }, { - "epoch": 0.56, - "grad_norm": 0.6749395956942922, - "learning_rate": 8.422974431088607e-06, - "loss": 0.3903, + "epoch": 0.45, + "grad_norm": 2.169463531768768, + "learning_rate": 1.2062231126083645e-05, + "loss": 0.4939, "step": 9815 }, { - "epoch": 0.56, - "grad_norm": 0.2000129236440116, - "learning_rate": 8.421136832036242e-06, - "loss": 0.1704, + "epoch": 0.45, + "grad_norm": 0.3767853167882123, + "learning_rate": 1.2060775166864435e-05, + "loss": 0.2606, "step": 9816 }, { - "epoch": 0.56, - "grad_norm": 0.3516750238446244, - "learning_rate": 8.419299287659844e-06, - "loss": 0.2996, + "epoch": 0.45, + "grad_norm": 0.3675148869718269, + "learning_rate": 1.205931916202165e-05, + "loss": 0.2465, "step": 9817 }, { - "epoch": 0.56, - "grad_norm": 1.055794532902847, - "learning_rate": 8.417461798023042e-06, - "loss": 0.6188, + "epoch": 0.45, + "grad_norm": 0.24739530758944192, + "learning_rate": 1.2057863111587521e-05, + "loss": 0.158, "step": 9818 }, { - "epoch": 0.56, - "grad_norm": 0.4547799461366989, - "learning_rate": 8.41562436318947e-06, - "loss": 0.3617, + "epoch": 0.45, + "grad_norm": 0.9310876030736751, + "learning_rate": 1.205640701559429e-05, + "loss": 0.5798, "step": 9819 }, { - "epoch": 0.56, - "grad_norm": 0.28796199443351, - "learning_rate": 8.413786983222758e-06, - "loss": 0.2079, + "epoch": 0.45, + "grad_norm": 0.3587609416274913, + "learning_rate": 1.2054950874074185e-05, + "loss": 0.2299, "step": 9820 }, { - "epoch": 0.56, - "grad_norm": 0.828814734820839, - "learning_rate": 8.411949658186536e-06, - "loss": 0.3814, + "epoch": 0.45, + "grad_norm": 1.0910233222151184, + "learning_rate": 1.2053494687059453e-05, + "loss": 0.4773, "step": 9821 }, { - "epoch": 0.56, - "grad_norm": 0.29849571473835407, - "learning_rate": 8.410112388144426e-06, - "loss": 0.2056, + "epoch": 0.45, + "grad_norm": 1.402513700023617, + "learning_rate": 1.2052038454582325e-05, + "loss": 0.8226, "step": 9822 }, { - "epoch": 0.56, - "grad_norm": 0.3716093461533886, - "learning_rate": 8.408275173160059e-06, - "loss": 0.207, + "epoch": 0.45, + "grad_norm": 0.3847890491427266, + "learning_rate": 1.2050582176675045e-05, + "loss": 0.211, "step": 9823 }, { - "epoch": 0.56, - "grad_norm": 0.3931711629255428, - "learning_rate": 8.406438013297052e-06, - "loss": 0.3198, + "epoch": 0.45, + "grad_norm": 0.4451760185603885, + "learning_rate": 1.204912585336985e-05, + "loss": 0.3247, "step": 9824 }, { - "epoch": 0.56, - "grad_norm": 0.8020438520926569, - "learning_rate": 8.404600908619033e-06, - "loss": 0.5594, + "epoch": 0.45, + "grad_norm": 0.28257036452890893, + "learning_rate": 1.2047669484698985e-05, + "loss": 0.2049, "step": 9825 }, { - "epoch": 0.56, - "grad_norm": 0.39734590436170697, - "learning_rate": 8.40276385918961e-06, - "loss": 0.2346, + "epoch": 0.45, + "grad_norm": 0.3477096002316174, + "learning_rate": 1.204621307069469e-05, + "loss": 0.2494, "step": 9826 }, { - "epoch": 0.56, - "grad_norm": 0.8506087767154734, - "learning_rate": 8.40092686507241e-06, - "loss": 0.4187, + "epoch": 0.45, + "grad_norm": 1.0404416534333325, + "learning_rate": 1.204475661138921e-05, + "loss": 0.5631, "step": 9827 }, { - "epoch": 0.56, - "grad_norm": 0.255074633086988, - "learning_rate": 8.39908992633104e-06, - "loss": 0.2158, - "step": 9828 + "epoch": 0.45, + "grad_norm": 0.4775966066776573, + "learning_rate": 1.204330010681479e-05, + "loss": 0.3728, + "step": 9828 }, { - "epoch": 0.56, - "grad_norm": 0.31639947374690586, - "learning_rate": 8.39725304302912e-06, - "loss": 0.1998, + "epoch": 0.45, + "grad_norm": 0.40197001356967377, + "learning_rate": 1.2041843557003674e-05, + "loss": 0.1955, "step": 9829 }, { - "epoch": 0.56, - "grad_norm": 1.2054591386012041, - "learning_rate": 8.395416215230255e-06, - "loss": 0.7528, + "epoch": 0.45, + "grad_norm": 0.3493036876943971, + "learning_rate": 1.2040386961988111e-05, + "loss": 0.2356, "step": 9830 }, { - "epoch": 0.56, - "grad_norm": 0.4489425221494689, - "learning_rate": 8.39357944299806e-06, - "loss": 0.3473, + "epoch": 0.45, + "grad_norm": 0.4340541695765449, + "learning_rate": 1.2038930321800346e-05, + "loss": 0.2801, "step": 9831 }, { - "epoch": 0.56, - "grad_norm": 0.3019501530801851, - "learning_rate": 8.391742726396138e-06, - "loss": 0.2471, + "epoch": 0.45, + "grad_norm": 0.42660088999761014, + "learning_rate": 1.203747363647263e-05, + "loss": 0.2919, "step": 9832 }, { - "epoch": 0.56, - "grad_norm": 0.6973811792398468, - "learning_rate": 8.389906065488099e-06, - "loss": 0.3255, + "epoch": 0.45, + "grad_norm": 0.45173310947220213, + "learning_rate": 1.2036016906037208e-05, + "loss": 0.277, "step": 9833 }, { - "epoch": 0.57, - "grad_norm": 0.28463618303054367, - "learning_rate": 8.38806946033754e-06, - "loss": 0.1894, + "epoch": 0.45, + "grad_norm": 1.5439288365132544, + "learning_rate": 1.2034560130526341e-05, + "loss": 0.8163, "step": 9834 }, { - "epoch": 0.57, - "grad_norm": 0.35151178079415824, - "learning_rate": 8.386232911008069e-06, - "loss": 0.2611, + "epoch": 0.45, + "grad_norm": 0.44370932924822437, + "learning_rate": 1.2033103309972268e-05, + "loss": 0.2872, "step": 9835 }, { - "epoch": 0.57, - "grad_norm": 0.40041939562222845, - "learning_rate": 8.38439641756328e-06, - "loss": 0.2496, + "epoch": 0.45, + "grad_norm": 0.3395466984360209, + "learning_rate": 1.203164644440725e-05, + "loss": 0.2456, "step": 9836 }, { - "epoch": 0.57, - "grad_norm": 0.876402219608769, - "learning_rate": 8.382559980066778e-06, - "loss": 0.4565, + "epoch": 0.45, + "grad_norm": 0.5640636664426414, + "learning_rate": 1.2030189533863534e-05, + "loss": 0.2837, "step": 9837 }, { - "epoch": 0.57, - "grad_norm": 0.31643695032595764, - "learning_rate": 8.380723598582152e-06, - "loss": 0.2451, + "epoch": 0.45, + "grad_norm": 0.4929367045310672, + "learning_rate": 1.2028732578373381e-05, + "loss": 0.3143, "step": 9838 }, { - "epoch": 0.57, - "grad_norm": 0.9533380440004469, - "learning_rate": 8.378887273172997e-06, - "loss": 0.3012, + "epoch": 0.45, + "grad_norm": 0.314763231174707, + "learning_rate": 1.2027275577969046e-05, + "loss": 0.1592, "step": 9839 }, { - "epoch": 0.57, - "grad_norm": 0.20603783465622966, - "learning_rate": 8.37705100390291e-06, - "loss": 0.2236, + "epoch": 0.45, + "grad_norm": 0.45987812327961775, + "learning_rate": 1.2025818532682783e-05, + "loss": 0.3678, "step": 9840 }, { - "epoch": 0.57, - "grad_norm": 0.37732886480516004, - "learning_rate": 8.375214790835471e-06, - "loss": 0.2641, + "epoch": 0.45, + "grad_norm": 0.3935448290689008, + "learning_rate": 1.2024361442546849e-05, + "loss": 0.2689, "step": 9841 }, { - "epoch": 0.57, - "grad_norm": 0.48964154358706147, - "learning_rate": 8.37337863403428e-06, - "loss": 0.2918, + "epoch": 0.45, + "grad_norm": 0.8941428513169435, + "learning_rate": 1.2022904307593502e-05, + "loss": 0.3277, "step": 9842 }, { - "epoch": 0.57, - "grad_norm": 0.4250539613089976, - "learning_rate": 8.371542533562912e-06, - "loss": 0.3089, + "epoch": 0.45, + "grad_norm": 0.4541836223235423, + "learning_rate": 1.2021447127855005e-05, + "loss": 0.3599, "step": 9843 }, { - "epoch": 0.57, - "grad_norm": 0.35552109627010425, - "learning_rate": 8.369706489484958e-06, - "loss": 0.2745, + "epoch": 0.45, + "grad_norm": 0.30117761435340923, + "learning_rate": 1.2019989903363616e-05, + "loss": 0.2526, "step": 9844 }, { - "epoch": 0.57, - "grad_norm": 0.9900011631599458, - "learning_rate": 8.367870501863999e-06, - "loss": 0.4337, + "epoch": 0.45, + "grad_norm": 0.31439134566434324, + "learning_rate": 1.2018532634151598e-05, + "loss": 0.1847, "step": 9845 }, { - "epoch": 0.57, - "grad_norm": 0.18633542192754032, - "learning_rate": 8.366034570763614e-06, - "loss": 0.0689, + "epoch": 0.45, + "grad_norm": 1.320955756950874, + "learning_rate": 1.2017075320251215e-05, + "loss": 0.5562, "step": 9846 }, { - "epoch": 0.57, - "grad_norm": 0.3414535184442479, - "learning_rate": 8.36419869624738e-06, - "loss": 0.2813, + "epoch": 0.45, + "grad_norm": 0.6641414379747985, + "learning_rate": 1.2015617961694727e-05, + "loss": 0.4212, "step": 9847 }, { - "epoch": 0.57, - "grad_norm": 0.37358328219071146, - "learning_rate": 8.362362878378876e-06, - "loss": 0.329, + "epoch": 0.45, + "grad_norm": 0.3322556877763977, + "learning_rate": 1.2014160558514398e-05, + "loss": 0.2817, "step": 9848 }, { - "epoch": 0.57, - "grad_norm": 0.8248357384042281, - "learning_rate": 8.360527117221675e-06, - "loss": 0.3291, + "epoch": 0.45, + "grad_norm": 0.8939247092135392, + "learning_rate": 1.2012703110742498e-05, + "loss": 0.3594, "step": 9849 }, { - "epoch": 0.57, - "grad_norm": 0.36147984520054255, - "learning_rate": 8.358691412839351e-06, - "loss": 0.3034, + "epoch": 0.45, + "grad_norm": 0.3274390566188609, + "learning_rate": 1.201124561841129e-05, + "loss": 0.1882, "step": 9850 }, { - "epoch": 0.57, - "grad_norm": 0.5104748583363516, - "learning_rate": 8.35685576529547e-06, - "loss": 0.3727, + "epoch": 0.45, + "grad_norm": 0.3426358085861394, + "learning_rate": 1.2009788081553042e-05, + "loss": 0.2647, "step": 9851 }, { - "epoch": 0.57, - "grad_norm": 0.40198951064850447, - "learning_rate": 8.355020174653605e-06, - "loss": 0.2165, + "epoch": 0.45, + "grad_norm": 0.40052228770882586, + "learning_rate": 1.2008330500200025e-05, + "loss": 0.2815, "step": 9852 }, { - "epoch": 0.57, - "grad_norm": 0.24647184833766747, - "learning_rate": 8.35318464097732e-06, - "loss": 0.2172, + "epoch": 0.45, + "grad_norm": 0.44757964211627904, + "learning_rate": 1.2006872874384505e-05, + "loss": 0.3171, "step": 9853 }, { - "epoch": 0.57, - "grad_norm": 0.3810295500607417, - "learning_rate": 8.35134916433018e-06, - "loss": 0.2782, + "epoch": 0.45, + "grad_norm": 0.5155837001346982, + "learning_rate": 1.2005415204138753e-05, + "loss": 0.3767, "step": 9854 }, { - "epoch": 0.57, - "grad_norm": 0.3857340949697676, - "learning_rate": 8.349513744775748e-06, - "loss": 0.289, + "epoch": 0.45, + "grad_norm": 0.49261191226632833, + "learning_rate": 1.2003957489495043e-05, + "loss": 0.2855, "step": 9855 }, { - "epoch": 0.57, - "grad_norm": 0.34685648534680974, - "learning_rate": 8.347678382377584e-06, - "loss": 0.2889, + "epoch": 0.45, + "grad_norm": 0.35595148383086916, + "learning_rate": 1.2002499730485643e-05, + "loss": 0.2745, "step": 9856 }, { - "epoch": 0.57, - "grad_norm": 1.0213706600766048, - "learning_rate": 8.345843077199247e-06, - "loss": 0.604, + "epoch": 0.45, + "grad_norm": 0.2824074841956882, + "learning_rate": 1.2001041927142833e-05, + "loss": 0.1969, "step": 9857 }, { - "epoch": 0.57, - "grad_norm": 0.43460712379136257, - "learning_rate": 8.344007829304291e-06, - "loss": 0.2663, + "epoch": 0.45, + "grad_norm": 1.176496021438052, + "learning_rate": 1.1999584079498883e-05, + "loss": 0.7641, "step": 9858 }, { - "epoch": 0.57, - "grad_norm": 0.2396752936639507, - "learning_rate": 8.342172638756276e-06, - "loss": 0.1397, + "epoch": 0.45, + "grad_norm": 0.38426625399968406, + "learning_rate": 1.1998126187586064e-05, + "loss": 0.2196, "step": 9859 }, { - "epoch": 0.57, - "grad_norm": 0.3169017785415661, - "learning_rate": 8.34033750561875e-06, - "loss": 0.29, + "epoch": 0.45, + "grad_norm": 0.31508060860118714, + "learning_rate": 1.199666825143666e-05, + "loss": 0.2836, "step": 9860 }, { - "epoch": 0.57, - "grad_norm": 0.9931863567969424, - "learning_rate": 8.338502429955264e-06, - "loss": 0.4568, + "epoch": 0.45, + "grad_norm": 0.6994740764647053, + "learning_rate": 1.1995210271082944e-05, + "loss": 0.4165, "step": 9861 }, { - "epoch": 0.57, - "grad_norm": 0.3480195681104575, - "learning_rate": 8.33666741182937e-06, - "loss": 0.1948, + "epoch": 0.45, + "grad_norm": 0.3213200341219081, + "learning_rate": 1.1993752246557197e-05, + "loss": 0.2073, "step": 9862 }, { - "epoch": 0.57, - "grad_norm": 0.37565505477248806, - "learning_rate": 8.334832451304607e-06, - "loss": 0.3106, + "epoch": 0.45, + "grad_norm": 0.31620948065138504, + "learning_rate": 1.1992294177891697e-05, + "loss": 0.1869, "step": 9863 }, { - "epoch": 0.57, - "grad_norm": 0.45130905555980977, - "learning_rate": 8.332997548444528e-06, - "loss": 0.3433, + "epoch": 0.45, + "grad_norm": 0.37732292842176324, + "learning_rate": 1.1990836065118725e-05, + "loss": 0.3314, "step": 9864 }, { - "epoch": 0.57, - "grad_norm": 0.3033398640505855, - "learning_rate": 8.331162703312671e-06, - "loss": 0.1934, + "epoch": 0.45, + "grad_norm": 0.4371786052422833, + "learning_rate": 1.1989377908270559e-05, + "loss": 0.2103, "step": 9865 }, { - "epoch": 0.57, - "grad_norm": 0.5363775633765079, - "learning_rate": 8.329327915972578e-06, - "loss": 0.2919, + "epoch": 0.45, + "grad_norm": 0.4273489372030156, + "learning_rate": 1.1987919707379486e-05, + "loss": 0.3445, "step": 9866 }, { - "epoch": 0.57, - "grad_norm": 0.747351620662827, - "learning_rate": 8.32749318648779e-06, - "loss": 0.3933, + "epoch": 0.45, + "grad_norm": 0.3803194276353355, + "learning_rate": 1.1986461462477783e-05, + "loss": 0.3278, "step": 9867 }, { - "epoch": 0.57, - "grad_norm": 0.26553280669644175, - "learning_rate": 8.325658514921838e-06, - "loss": 0.211, + "epoch": 0.45, + "grad_norm": 0.3992800754820834, + "learning_rate": 1.198500317359774e-05, + "loss": 0.1265, "step": 9868 }, { - "epoch": 0.57, - "grad_norm": 0.4145040507342814, - "learning_rate": 8.32382390133826e-06, - "loss": 0.227, + "epoch": 0.45, + "grad_norm": 0.3167209282769644, + "learning_rate": 1.1983544840771639e-05, + "loss": 0.2525, "step": 9869 }, { - "epoch": 0.57, - "grad_norm": 1.1950814437419346, - "learning_rate": 8.321989345800587e-06, - "loss": 0.6061, + "epoch": 0.45, + "grad_norm": 0.4433473920545439, + "learning_rate": 1.198208646403177e-05, + "loss": 0.2906, "step": 9870 }, { - "epoch": 0.57, - "grad_norm": 0.3002830485967934, - "learning_rate": 8.320154848372353e-06, - "loss": 0.2718, + "epoch": 0.45, + "grad_norm": 0.4587377413914146, + "learning_rate": 1.1980628043410417e-05, + "loss": 0.3146, "step": 9871 }, { - "epoch": 0.57, - "grad_norm": 0.3226666488160495, - "learning_rate": 8.318320409117082e-06, - "loss": 0.2688, + "epoch": 0.45, + "grad_norm": 0.31101859211616145, + "learning_rate": 1.1979169578939863e-05, + "loss": 0.2589, "step": 9872 }, { - "epoch": 0.57, - "grad_norm": 0.37089315357089786, - "learning_rate": 8.316486028098306e-06, - "loss": 0.2992, + "epoch": 0.45, + "grad_norm": 1.1648152548915247, + "learning_rate": 1.1977711070652405e-05, + "loss": 0.5716, "step": 9873 }, { - "epoch": 0.57, - "grad_norm": 0.29002209727779943, - "learning_rate": 8.314651705379544e-06, - "loss": 0.2231, + "epoch": 0.45, + "grad_norm": 0.5591310212453935, + "learning_rate": 1.197625251858033e-05, + "loss": 0.3483, "step": 9874 }, { - "epoch": 0.57, - "grad_norm": 0.3455087689241484, - "learning_rate": 8.312817441024324e-06, - "loss": 0.2336, + "epoch": 0.45, + "grad_norm": 0.24482419041979983, + "learning_rate": 1.1974793922755931e-05, + "loss": 0.2116, "step": 9875 }, { - "epoch": 0.57, - "grad_norm": 0.5069761719351095, - "learning_rate": 8.31098323509616e-06, - "loss": 0.3668, + "epoch": 0.45, + "grad_norm": 1.3333560926228034, + "learning_rate": 1.1973335283211494e-05, + "loss": 0.8547, "step": 9876 }, { - "epoch": 0.57, - "grad_norm": 0.38171523506523025, - "learning_rate": 8.309149087658576e-06, - "loss": 0.278, + "epoch": 0.45, + "grad_norm": 0.5544080391728592, + "learning_rate": 1.197187659997932e-05, + "loss": 0.3456, "step": 9877 }, { - "epoch": 0.57, - "grad_norm": 0.37069169080183434, - "learning_rate": 8.307314998775087e-06, - "loss": 0.2917, + "epoch": 0.45, + "grad_norm": 0.30995905476407815, + "learning_rate": 1.1970417873091694e-05, + "loss": 0.2078, "step": 9878 }, { - "epoch": 0.57, - "grad_norm": 0.3392341659426202, - "learning_rate": 8.305480968509204e-06, - "loss": 0.3168, + "epoch": 0.45, + "grad_norm": 0.38920138673578636, + "learning_rate": 1.1968959102580917e-05, + "loss": 0.3105, "step": 9879 }, { - "epoch": 0.57, - "grad_norm": 0.5341042702280032, - "learning_rate": 8.303646996924445e-06, - "loss": 0.3165, + "epoch": 0.45, + "grad_norm": 0.5877853632541026, + "learning_rate": 1.1967500288479286e-05, + "loss": 0.2957, "step": 9880 }, { - "epoch": 0.57, - "grad_norm": 0.2169102274872214, - "learning_rate": 8.301813084084315e-06, - "loss": 0.1475, + "epoch": 0.45, + "grad_norm": 0.299097380123809, + "learning_rate": 1.1966041430819093e-05, + "loss": 0.174, "step": 9881 }, { - "epoch": 0.57, - "grad_norm": 0.5998300732121216, - "learning_rate": 8.299979230052327e-06, - "loss": 0.4361, + "epoch": 0.45, + "grad_norm": 0.6740685784022121, + "learning_rate": 1.1964582529632636e-05, + "loss": 0.4685, "step": 9882 }, { - "epoch": 0.57, - "grad_norm": 0.31913217855811254, - "learning_rate": 8.298145434891983e-06, - "loss": 0.2759, + "epoch": 0.45, + "grad_norm": 0.3936863531525683, + "learning_rate": 1.1963123584952216e-05, + "loss": 0.3088, "step": 9883 }, { - "epoch": 0.57, - "grad_norm": 0.3384502713432209, - "learning_rate": 8.296311698666792e-06, - "loss": 0.3219, + "epoch": 0.45, + "grad_norm": 0.3230673172312626, + "learning_rate": 1.1961664596810132e-05, + "loss": 0.3076, "step": 9884 }, { - "epoch": 0.57, - "grad_norm": 0.7524840483991162, - "learning_rate": 8.29447802144025e-06, - "loss": 0.4665, + "epoch": 0.45, + "grad_norm": 0.3079285530118613, + "learning_rate": 1.1960205565238685e-05, + "loss": 0.1353, "step": 9885 }, { - "epoch": 0.57, - "grad_norm": 0.2888469947900723, - "learning_rate": 8.292644403275865e-06, - "loss": 0.2242, + "epoch": 0.45, + "grad_norm": 0.6791099530219162, + "learning_rate": 1.1958746490270178e-05, + "loss": 0.4013, "step": 9886 }, { - "epoch": 0.57, - "grad_norm": 0.2532786012933536, - "learning_rate": 8.290810844237128e-06, - "loss": 0.2485, + "epoch": 0.45, + "grad_norm": 0.27475832200133365, + "learning_rate": 1.195728737193691e-05, + "loss": 0.2418, "step": 9887 }, { - "epoch": 0.57, - "grad_norm": 0.7263770853364647, - "learning_rate": 8.28897734438754e-06, - "loss": 0.2903, + "epoch": 0.45, + "grad_norm": 0.32133589862570605, + "learning_rate": 1.1955828210271187e-05, + "loss": 0.2818, "step": 9888 }, { - "epoch": 0.57, - "grad_norm": 0.3729535266775858, - "learning_rate": 8.28714390379059e-06, - "loss": 0.2678, + "epoch": 0.45, + "grad_norm": 0.7307020481778115, + "learning_rate": 1.1954369005305308e-05, + "loss": 0.4087, "step": 9889 }, { - "epoch": 0.57, - "grad_norm": 0.4971089032472461, - "learning_rate": 8.285310522509777e-06, - "loss": 0.3529, + "epoch": 0.45, + "grad_norm": 0.27730206076175884, + "learning_rate": 1.1952909757071587e-05, + "loss": 0.2384, "step": 9890 }, { - "epoch": 0.57, - "grad_norm": 0.32657011764466537, - "learning_rate": 8.283477200608585e-06, - "loss": 0.2734, + "epoch": 0.45, + "grad_norm": 0.3371990170578978, + "learning_rate": 1.1951450465602326e-05, + "loss": 0.259, "step": 9891 }, { - "epoch": 0.57, - "grad_norm": 0.4085045169813695, - "learning_rate": 8.281643938150504e-06, - "loss": 0.2973, + "epoch": 0.45, + "grad_norm": 0.8661464633744825, + "learning_rate": 1.1949991130929833e-05, + "loss": 0.3902, "step": 9892 }, { - "epoch": 0.57, - "grad_norm": 0.25379677627879427, - "learning_rate": 8.27981073519902e-06, - "loss": 0.1814, + "epoch": 0.45, + "grad_norm": 0.39570760398043525, + "learning_rate": 1.1948531753086415e-05, + "loss": 0.3243, "step": 9893 }, { - "epoch": 0.57, - "grad_norm": 0.8729493209016004, - "learning_rate": 8.277977591817617e-06, - "loss": 0.3863, + "epoch": 0.45, + "grad_norm": 0.7596920450160195, + "learning_rate": 1.1947072332104381e-05, + "loss": 0.3484, "step": 9894 }, { - "epoch": 0.57, - "grad_norm": 0.30599910372112143, - "learning_rate": 8.276144508069775e-06, - "loss": 0.2213, + "epoch": 0.45, + "grad_norm": 0.321648116380903, + "learning_rate": 1.1945612868016041e-05, + "loss": 0.2655, "step": 9895 }, { - "epoch": 0.57, - "grad_norm": 0.38495626365299385, - "learning_rate": 8.274311484018975e-06, - "loss": 0.3128, + "epoch": 0.45, + "grad_norm": 0.4545342733738934, + "learning_rate": 1.194415336085371e-05, + "loss": 0.3285, "step": 9896 }, { - "epoch": 0.57, - "grad_norm": 0.9544093017904444, - "learning_rate": 8.27247851972869e-06, - "loss": 0.6904, + "epoch": 0.45, + "grad_norm": 0.29334201039261004, + "learning_rate": 1.1942693810649695e-05, + "loss": 0.1654, "step": 9897 }, { - "epoch": 0.57, - "grad_norm": 0.4332396370100671, - "learning_rate": 8.270645615262405e-06, - "loss": 0.1635, + "epoch": 0.45, + "grad_norm": 0.3847209842026497, + "learning_rate": 1.1941234217436315e-05, + "loss": 0.1227, "step": 9898 }, { - "epoch": 0.57, - "grad_norm": 0.21958238359771295, - "learning_rate": 8.268812770683583e-06, - "loss": 0.2311, + "epoch": 0.45, + "grad_norm": 0.4135355616341419, + "learning_rate": 1.1939774581245878e-05, + "loss": 0.3212, "step": 9899 }, { - "epoch": 0.57, - "grad_norm": 0.7528076032824318, - "learning_rate": 8.266979986055704e-06, - "loss": 0.3034, + "epoch": 0.45, + "grad_norm": 0.4250930230310954, + "learning_rate": 1.1938314902110701e-05, + "loss": 0.3181, "step": 9900 }, { - "epoch": 0.57, - "grad_norm": 0.2277184270609133, - "learning_rate": 8.265147261442232e-06, - "loss": 0.0727, + "epoch": 0.45, + "grad_norm": 0.5455250109658105, + "learning_rate": 1.1936855180063102e-05, + "loss": 0.3129, "step": 9901 }, { - "epoch": 0.57, - "grad_norm": 0.3900048604849335, - "learning_rate": 8.263314596906636e-06, - "loss": 0.3282, + "epoch": 0.45, + "grad_norm": 0.4059397815569489, + "learning_rate": 1.1935395415135393e-05, + "loss": 0.3016, "step": 9902 }, { - "epoch": 0.57, - "grad_norm": 0.3516000182623754, - "learning_rate": 8.261481992512382e-06, - "loss": 0.3266, + "epoch": 0.45, + "grad_norm": 0.2617455222505545, + "learning_rate": 1.19339356073599e-05, + "loss": 0.216, "step": 9903 }, { - "epoch": 0.57, - "grad_norm": 0.41670733313075314, - "learning_rate": 8.25964944832293e-06, - "loss": 0.1948, + "epoch": 0.45, + "grad_norm": 0.5803570776410119, + "learning_rate": 1.1932475756768933e-05, + "loss": 0.1574, "step": 9904 }, { - "epoch": 0.57, - "grad_norm": 0.309400465234977, - "learning_rate": 8.257816964401745e-06, - "loss": 0.2583, + "epoch": 0.46, + "grad_norm": 0.3936338782340999, + "learning_rate": 1.1931015863394818e-05, + "loss": 0.2918, "step": 9905 }, { - "epoch": 0.57, - "grad_norm": 0.4465142891914701, - "learning_rate": 8.255984540812281e-06, - "loss": 0.3008, + "epoch": 0.46, + "grad_norm": 0.6880842529572427, + "learning_rate": 1.1929555927269866e-05, + "loss": 0.4387, "step": 9906 }, { - "epoch": 0.57, - "grad_norm": 0.28221141190811483, - "learning_rate": 8.254152177618e-06, - "loss": 0.2664, + "epoch": 0.46, + "grad_norm": 0.3303837993545562, + "learning_rate": 1.1928095948426413e-05, + "loss": 0.2291, "step": 9907 }, { - "epoch": 0.57, - "grad_norm": 0.3505287455300988, - "learning_rate": 8.252319874882351e-06, - "loss": 0.2389, + "epoch": 0.46, + "grad_norm": 0.3670635646926979, + "learning_rate": 1.192663592689677e-05, + "loss": 0.295, "step": 9908 }, { - "epoch": 0.57, - "grad_norm": 1.0994942283600821, - "learning_rate": 8.250487632668793e-06, - "loss": 0.7445, + "epoch": 0.46, + "grad_norm": 0.3077128779601124, + "learning_rate": 1.1925175862713266e-05, + "loss": 0.1725, "step": 9909 }, { - "epoch": 0.57, - "grad_norm": 0.522218227682, - "learning_rate": 8.248655451040768e-06, - "loss": 0.2764, + "epoch": 0.46, + "grad_norm": 0.6334251586941848, + "learning_rate": 1.1923715755908223e-05, + "loss": 0.4326, "step": 9910 }, { - "epoch": 0.57, - "grad_norm": 0.2631576808456909, - "learning_rate": 8.246823330061734e-06, - "loss": 0.2347, + "epoch": 0.46, + "grad_norm": 0.28847088873818116, + "learning_rate": 1.192225560651397e-05, + "loss": 0.2265, "step": 9911 }, { - "epoch": 0.57, - "grad_norm": 0.40714845276580147, - "learning_rate": 8.24499126979513e-06, - "loss": 0.281, + "epoch": 0.46, + "grad_norm": 0.7631666592526921, + "learning_rate": 1.1920795414562826e-05, + "loss": 0.5227, "step": 9912 }, { - "epoch": 0.57, - "grad_norm": 0.28989171280742454, - "learning_rate": 8.243159270304406e-06, - "loss": 0.1528, + "epoch": 0.46, + "grad_norm": 1.1749461419021119, + "learning_rate": 1.1919335180087126e-05, + "loss": 0.847, "step": 9913 }, { - "epoch": 0.57, - "grad_norm": 0.3758477950586018, - "learning_rate": 8.241327331652997e-06, - "loss": 0.2369, + "epoch": 0.46, + "grad_norm": 0.311823246546308, + "learning_rate": 1.1917874903119194e-05, + "loss": 0.1948, "step": 9914 }, { - "epoch": 0.57, - "grad_norm": 0.35371479814125156, - "learning_rate": 8.23949545390435e-06, - "loss": 0.3125, + "epoch": 0.46, + "grad_norm": 0.2472200517274404, + "learning_rate": 1.1916414583691361e-05, + "loss": 0.2141, "step": 9915 }, { - "epoch": 0.57, - "grad_norm": 0.7429248777607208, - "learning_rate": 8.237663637121897e-06, - "loss": 0.4045, + "epoch": 0.46, + "grad_norm": 0.7626581093143945, + "learning_rate": 1.1914954221835955e-05, + "loss": 0.3537, "step": 9916 }, { - "epoch": 0.57, - "grad_norm": 0.32476985877232545, - "learning_rate": 8.23583188136908e-06, - "loss": 0.2445, + "epoch": 0.46, + "grad_norm": 0.3481459782528476, + "learning_rate": 1.191349381758531e-05, + "loss": 0.2311, "step": 9917 }, { - "epoch": 0.57, - "grad_norm": 0.2535536568991639, - "learning_rate": 8.234000186709327e-06, - "loss": 0.2115, + "epoch": 0.46, + "grad_norm": 0.7331413037321006, + "learning_rate": 1.1912033370971756e-05, + "loss": 0.5156, "step": 9918 }, { - "epoch": 0.57, - "grad_norm": 0.424295206181053, - "learning_rate": 8.232168553206072e-06, - "loss": 0.3238, + "epoch": 0.46, + "grad_norm": 0.35566066218461456, + "learning_rate": 1.1910572882027623e-05, + "loss": 0.3039, "step": 9919 }, { - "epoch": 0.57, - "grad_norm": 0.32505346775808885, - "learning_rate": 8.230336980922744e-06, - "loss": 0.2858, + "epoch": 0.46, + "grad_norm": 0.3795137161497298, + "learning_rate": 1.190911235078525e-05, + "loss": 0.296, "step": 9920 }, { - "epoch": 0.57, - "grad_norm": 0.9237249641595617, - "learning_rate": 8.228505469922769e-06, - "loss": 0.4477, + "epoch": 0.46, + "grad_norm": 0.2535394418761758, + "learning_rate": 1.1907651777276967e-05, + "loss": 0.1216, "step": 9921 }, { - "epoch": 0.57, - "grad_norm": 0.4630004104417319, - "learning_rate": 8.226674020269576e-06, - "loss": 0.3362, + "epoch": 0.46, + "grad_norm": 0.7488300069094642, + "learning_rate": 1.1906191161535119e-05, + "loss": 0.4561, "step": 9922 }, { - "epoch": 0.57, - "grad_norm": 0.2790719952889474, - "learning_rate": 8.224842632026583e-06, - "loss": 0.2751, + "epoch": 0.46, + "grad_norm": 0.2746399923359979, + "learning_rate": 1.190473050359203e-05, + "loss": 0.2527, "step": 9923 }, { - "epoch": 0.57, - "grad_norm": 0.7399588708936765, - "learning_rate": 8.223011305257214e-06, - "loss": 0.2648, + "epoch": 0.46, + "grad_norm": 0.46329624575458994, + "learning_rate": 1.1903269803480045e-05, + "loss": 0.3201, "step": 9924 }, { - "epoch": 0.57, - "grad_norm": 0.24700257914947743, - "learning_rate": 8.221180040024887e-06, - "loss": 0.1378, + "epoch": 0.46, + "grad_norm": 1.4276736312772949, + "learning_rate": 1.19018090612315e-05, + "loss": 0.7823, "step": 9925 }, { - "epoch": 0.57, - "grad_norm": 0.36109041110206347, - "learning_rate": 8.21934883639302e-06, - "loss": 0.2974, + "epoch": 0.46, + "grad_norm": 0.32784192284998426, + "learning_rate": 1.1900348276878738e-05, + "loss": 0.2365, "step": 9926 }, { - "epoch": 0.57, - "grad_norm": 0.36150839242761174, - "learning_rate": 8.217517694425027e-06, - "loss": 0.2756, + "epoch": 0.46, + "grad_norm": 0.26209183107872597, + "learning_rate": 1.1898887450454093e-05, + "loss": 0.2043, "step": 9927 }, { - "epoch": 0.57, - "grad_norm": 0.5338739079601947, - "learning_rate": 8.215686614184317e-06, - "loss": 0.3363, + "epoch": 0.46, + "grad_norm": 0.5636331979362261, + "learning_rate": 1.1897426581989913e-05, + "loss": 0.3169, "step": 9928 }, { - "epoch": 0.57, - "grad_norm": 0.3856577519384225, - "learning_rate": 8.213855595734306e-06, - "loss": 0.3088, + "epoch": 0.46, + "grad_norm": 0.3531844670292111, + "learning_rate": 1.1895965671518534e-05, + "loss": 0.2807, "step": 9929 }, { - "epoch": 0.57, - "grad_norm": 0.4388607395567644, - "learning_rate": 8.212024639138398e-06, - "loss": 0.2918, + "epoch": 0.46, + "grad_norm": 0.9268695413847821, + "learning_rate": 1.1894504719072307e-05, + "loss": 0.408, "step": 9930 }, { - "epoch": 0.57, - "grad_norm": 0.22893949698281893, - "learning_rate": 8.210193744459997e-06, - "loss": 0.1762, + "epoch": 0.46, + "grad_norm": 0.37456140485854067, + "learning_rate": 1.1893043724683568e-05, + "loss": 0.3252, "step": 9931 }, { - "epoch": 0.57, - "grad_norm": 0.38411850632766775, - "learning_rate": 8.20836291176251e-06, - "loss": 0.3018, + "epoch": 0.46, + "grad_norm": 0.3337654133468087, + "learning_rate": 1.1891582688384666e-05, + "loss": 0.2732, "step": 9932 }, { - "epoch": 0.57, - "grad_norm": 0.7225551464888974, - "learning_rate": 8.20653214110934e-06, - "loss": 0.5503, + "epoch": 0.46, + "grad_norm": 1.07336037739983, + "learning_rate": 1.1890121610207947e-05, + "loss": 0.5646, "step": 9933 }, { - "epoch": 0.57, - "grad_norm": 0.4470092332721713, - "learning_rate": 8.204701432563886e-06, - "loss": 0.2851, + "epoch": 0.46, + "grad_norm": 0.25964141165240134, + "learning_rate": 1.188866049018576e-05, + "loss": 0.2528, "step": 9934 }, { - "epoch": 0.57, - "grad_norm": 0.28828857751457004, - "learning_rate": 8.202870786189541e-06, - "loss": 0.2635, + "epoch": 0.46, + "grad_norm": 0.4386058924901409, + "learning_rate": 1.1887199328350448e-05, + "loss": 0.2648, "step": 9935 }, { - "epoch": 0.57, - "grad_norm": 0.9999090217246076, - "learning_rate": 8.201040202049705e-06, - "loss": 0.6211, + "epoch": 0.46, + "grad_norm": 0.398351022470657, + "learning_rate": 1.1885738124734359e-05, + "loss": 0.2892, "step": 9936 }, { - "epoch": 0.57, - "grad_norm": 0.1682762823248837, - "learning_rate": 8.199209680207768e-06, - "loss": 0.0922, + "epoch": 0.46, + "grad_norm": 0.4536883075573066, + "learning_rate": 1.1884276879369846e-05, + "loss": 0.2626, "step": 9937 }, { - "epoch": 0.57, - "grad_norm": 0.35653206640661295, - "learning_rate": 8.197379220727124e-06, - "loss": 0.2672, + "epoch": 0.46, + "grad_norm": 0.38837597515236055, + "learning_rate": 1.188281559228926e-05, + "loss": 0.2737, "step": 9938 }, { - "epoch": 0.57, - "grad_norm": 0.34611855850166906, - "learning_rate": 8.19554882367116e-06, - "loss": 0.3211, + "epoch": 0.46, + "grad_norm": 0.39877837219033074, + "learning_rate": 1.1881354263524954e-05, + "loss": 0.3366, "step": 9939 }, { - "epoch": 0.57, - "grad_norm": 0.7223852385709048, - "learning_rate": 8.193718489103261e-06, - "loss": 0.3111, + "epoch": 0.46, + "grad_norm": 0.49402524934098146, + "learning_rate": 1.1879892893109276e-05, + "loss": 0.2327, "step": 9940 }, { - "epoch": 0.57, - "grad_norm": 0.3878265662993998, - "learning_rate": 8.191888217086813e-06, - "loss": 0.3133, + "epoch": 0.46, + "grad_norm": 0.29265428000924, + "learning_rate": 1.1878431481074581e-05, + "loss": 0.2187, "step": 9941 }, { - "epoch": 0.57, - "grad_norm": 0.9687515986579172, - "learning_rate": 8.190058007685203e-06, - "loss": 0.5916, + "epoch": 0.46, + "grad_norm": 1.0914368040113336, + "learning_rate": 1.1876970027453223e-05, + "loss": 0.6868, "step": 9942 }, { - "epoch": 0.57, - "grad_norm": 0.21520660321755714, - "learning_rate": 8.188227860961804e-06, - "loss": 0.1676, + "epoch": 0.46, + "grad_norm": 0.3902454261858537, + "learning_rate": 1.1875508532277558e-05, + "loss": 0.2819, "step": 9943 }, { - "epoch": 0.57, - "grad_norm": 0.343814550581402, - "learning_rate": 8.186397776979992e-06, - "loss": 0.2711, + "epoch": 0.46, + "grad_norm": 0.35081192657451765, + "learning_rate": 1.187404699557994e-05, + "loss": 0.2649, "step": 9944 }, { - "epoch": 0.57, - "grad_norm": 0.9159296933741887, - "learning_rate": 8.184567755803153e-06, - "loss": 0.4973, + "epoch": 0.46, + "grad_norm": 0.6471659102116764, + "learning_rate": 1.187258541739273e-05, + "loss": 0.4158, "step": 9945 }, { - "epoch": 0.57, - "grad_norm": 0.38092338926104646, - "learning_rate": 8.18273779749465e-06, - "loss": 0.3144, + "epoch": 0.46, + "grad_norm": 0.4613188426994112, + "learning_rate": 1.1871123797748285e-05, + "loss": 0.3479, "step": 9946 }, { - "epoch": 0.57, - "grad_norm": 0.29189600354117673, - "learning_rate": 8.180907902117862e-06, - "loss": 0.1929, + "epoch": 0.46, + "grad_norm": 0.22975847548854253, + "learning_rate": 1.1869662136678961e-05, + "loss": 0.184, "step": 9947 }, { - "epoch": 0.57, - "grad_norm": 1.1424900738622954, - "learning_rate": 8.179078069736152e-06, - "loss": 0.6561, + "epoch": 0.46, + "grad_norm": 0.3790048834431771, + "learning_rate": 1.1868200434217118e-05, + "loss": 0.2082, "step": 9948 }, { - "epoch": 0.57, - "grad_norm": 0.22268099279160083, - "learning_rate": 8.177248300412893e-06, - "loss": 0.1552, + "epoch": 0.46, + "grad_norm": 1.3551484766539017, + "learning_rate": 1.1866738690395119e-05, + "loss": 0.8304, "step": 9949 }, { - "epoch": 0.57, - "grad_norm": 0.3583785102766591, - "learning_rate": 8.175418594211445e-06, - "loss": 0.2161, + "epoch": 0.46, + "grad_norm": 0.32535832966142647, + "learning_rate": 1.1865276905245325e-05, + "loss": 0.2242, "step": 9950 }, { - "epoch": 0.57, - "grad_norm": 0.5628579774742724, - "learning_rate": 8.173588951195175e-06, - "loss": 0.3217, + "epoch": 0.46, + "grad_norm": 0.3770566395613993, + "learning_rate": 1.1863815078800098e-05, + "loss": 0.3251, "step": 9951 }, { - "epoch": 0.57, - "grad_norm": 0.9119904572848944, - "learning_rate": 8.171759371427439e-06, - "loss": 0.492, + "epoch": 0.46, + "grad_norm": 0.4496230494640166, + "learning_rate": 1.1862353211091801e-05, + "loss": 0.3407, "step": 9952 }, { - "epoch": 0.57, - "grad_norm": 0.2712431380235841, - "learning_rate": 8.169929854971598e-06, - "loss": 0.2144, + "epoch": 0.46, + "grad_norm": 0.23653133617366773, + "learning_rate": 1.1860891302152799e-05, + "loss": 0.1268, "step": 9953 }, { - "epoch": 0.57, - "grad_norm": 0.41873206115862327, - "learning_rate": 8.168100401891007e-06, - "loss": 0.3233, + "epoch": 0.46, + "grad_norm": 0.596460487512924, + "learning_rate": 1.185942935201546e-05, + "loss": 0.4363, "step": 9954 }, { - "epoch": 0.57, - "grad_norm": 0.41447916444623933, - "learning_rate": 8.166271012249022e-06, - "loss": 0.26, + "epoch": 0.46, + "grad_norm": 0.3862863752900431, + "learning_rate": 1.1857967360712142e-05, + "loss": 0.3503, "step": 9955 }, { - "epoch": 0.57, - "grad_norm": 0.2745882143555028, - "learning_rate": 8.164441686108991e-06, - "loss": 0.1833, + "epoch": 0.46, + "grad_norm": 0.33660560748983254, + "learning_rate": 1.1856505328275221e-05, + "loss": 0.2113, "step": 9956 }, { - "epoch": 0.57, - "grad_norm": 0.8934275316001319, - "learning_rate": 8.162612423534266e-06, - "loss": 0.4585, + "epoch": 0.46, + "grad_norm": 0.4269648453307695, + "learning_rate": 1.185504325473706e-05, + "loss": 0.3691, "step": 9957 }, { - "epoch": 0.57, - "grad_norm": 0.37576440877617384, - "learning_rate": 8.160783224588196e-06, - "loss": 0.3174, + "epoch": 0.46, + "grad_norm": 0.45667565812390193, + "learning_rate": 1.185358114013003e-05, + "loss": 0.3255, "step": 9958 }, { - "epoch": 0.57, - "grad_norm": 0.34228438562146457, - "learning_rate": 8.15895408933412e-06, - "loss": 0.2592, + "epoch": 0.46, + "grad_norm": 0.351376706289423, + "learning_rate": 1.1852118984486498e-05, + "loss": 0.2915, "step": 9959 }, { - "epoch": 0.57, - "grad_norm": 1.0015700431321963, - "learning_rate": 8.157125017835389e-06, - "loss": 0.2788, + "epoch": 0.46, + "grad_norm": 0.21055874523444296, + "learning_rate": 1.1850656787838839e-05, + "loss": 0.1509, "step": 9960 }, { - "epoch": 0.57, - "grad_norm": 0.3365784712512407, - "learning_rate": 8.15529601015534e-06, - "loss": 0.2207, + "epoch": 0.46, + "grad_norm": 0.720376522029616, + "learning_rate": 1.1849194550219421e-05, + "loss": 0.5095, "step": 9961 }, { - "epoch": 0.57, - "grad_norm": 0.3051365494564319, - "learning_rate": 8.153467066357305e-06, - "loss": 0.2439, + "epoch": 0.46, + "grad_norm": 0.38867734529433834, + "learning_rate": 1.184773227166062e-05, + "loss": 0.3024, "step": 9962 }, { - "epoch": 0.57, - "grad_norm": 0.4885135862243723, - "learning_rate": 8.15163818650463e-06, - "loss": 0.3013, + "epoch": 0.46, + "grad_norm": 0.335671074685415, + "learning_rate": 1.1846269952194804e-05, + "loss": 0.2538, "step": 9963 }, { - "epoch": 0.57, - "grad_norm": 0.38036239490366125, - "learning_rate": 8.149809370660643e-06, - "loss": 0.2774, + "epoch": 0.46, + "grad_norm": 1.0600234296015851, + "learning_rate": 1.1844807591854354e-05, + "loss": 0.5572, "step": 9964 }, { - "epoch": 0.57, - "grad_norm": 0.39821866202603395, - "learning_rate": 8.14798061888868e-06, - "loss": 0.2925, + "epoch": 0.46, + "grad_norm": 0.30521236993003353, + "learning_rate": 1.1843345190671642e-05, + "loss": 0.2294, "step": 9965 }, { - "epoch": 0.57, - "grad_norm": 0.3459147936489671, - "learning_rate": 8.146151931252067e-06, - "loss": 0.2577, + "epoch": 0.46, + "grad_norm": 0.5079665718404314, + "learning_rate": 1.184188274867904e-05, + "loss": 0.2849, "step": 9966 }, { - "epoch": 0.57, - "grad_norm": 0.590203539009197, - "learning_rate": 8.144323307814133e-06, - "loss": 0.3359, + "epoch": 0.46, + "grad_norm": 0.4317216784124042, + "learning_rate": 1.1840420265908934e-05, + "loss": 0.3274, "step": 9967 }, { - "epoch": 0.57, - "grad_norm": 0.3644950630409327, - "learning_rate": 8.142494748638204e-06, - "loss": 0.2875, + "epoch": 0.46, + "grad_norm": 0.343065309880247, + "learning_rate": 1.1838957742393692e-05, + "loss": 0.3103, "step": 9968 }, { - "epoch": 0.57, - "grad_norm": 0.38221492079631614, - "learning_rate": 8.140666253787602e-06, - "loss": 0.2538, + "epoch": 0.46, + "grad_norm": 0.3980142194212502, + "learning_rate": 1.1837495178165706e-05, + "loss": 0.1821, "step": 9969 }, { - "epoch": 0.57, - "grad_norm": 0.399983798988496, - "learning_rate": 8.138837823325647e-06, - "loss": 0.3374, + "epoch": 0.46, + "grad_norm": 0.35446937461373595, + "learning_rate": 1.183603257325734e-05, + "loss": 0.295, "step": 9970 }, { - "epoch": 0.57, - "grad_norm": 0.35302886052202936, - "learning_rate": 8.137009457315658e-06, - "loss": 0.2862, + "epoch": 0.46, + "grad_norm": 0.6911963616725106, + "learning_rate": 1.1834569927700988e-05, + "loss": 0.3232, "step": 9971 }, { - "epoch": 0.57, - "grad_norm": 0.40507971925164943, - "learning_rate": 8.135181155820953e-06, - "loss": 0.2151, + "epoch": 0.46, + "grad_norm": 0.3668883650477848, + "learning_rate": 1.1833107241529023e-05, + "loss": 0.2728, "step": 9972 }, { - "epoch": 0.57, - "grad_norm": 0.5666935537735343, - "learning_rate": 8.13335291890484e-06, - "loss": 0.3137, + "epoch": 0.46, + "grad_norm": 0.7049872039823518, + "learning_rate": 1.183164451477383e-05, + "loss": 0.3494, "step": 9973 }, { - "epoch": 0.57, - "grad_norm": 0.32067561902428615, - "learning_rate": 8.13152474663064e-06, - "loss": 0.2686, + "epoch": 0.46, + "grad_norm": 0.4128469845793274, + "learning_rate": 1.1830181747467794e-05, + "loss": 0.2879, "step": 9974 }, { - "epoch": 0.57, - "grad_norm": 0.33814247044172163, - "learning_rate": 8.129696639061654e-06, - "loss": 0.286, + "epoch": 0.46, + "grad_norm": 0.29210906609087, + "learning_rate": 1.1828718939643298e-05, + "loss": 0.2432, "step": 9975 }, { - "epoch": 0.57, - "grad_norm": 0.3246915939454487, - "learning_rate": 8.127868596261198e-06, - "loss": 0.1764, + "epoch": 0.46, + "grad_norm": 1.0282089677745876, + "learning_rate": 1.1827256091332726e-05, + "loss": 0.336, "step": 9976 }, { - "epoch": 0.57, - "grad_norm": 0.26615674398341954, - "learning_rate": 8.126040618292566e-06, - "loss": 0.2069, + "epoch": 0.46, + "grad_norm": 0.5862158510123182, + "learning_rate": 1.1825793202568467e-05, + "loss": 0.3543, "step": 9977 }, { - "epoch": 0.57, - "grad_norm": 0.4585736080937011, - "learning_rate": 8.124212705219071e-06, - "loss": 0.3394, + "epoch": 0.46, + "grad_norm": 0.3615488792859387, + "learning_rate": 1.1824330273382904e-05, + "loss": 0.3005, "step": 9978 }, { - "epoch": 0.57, - "grad_norm": 0.4703515008785008, - "learning_rate": 8.122384857104006e-06, - "loss": 0.2791, + "epoch": 0.46, + "grad_norm": 0.5779046108189536, + "learning_rate": 1.1822867303808429e-05, + "loss": 0.3073, "step": 9979 }, { - "epoch": 0.57, - "grad_norm": 0.3081159193556577, - "learning_rate": 8.120557074010677e-06, - "loss": 0.251, + "epoch": 0.46, + "grad_norm": 0.4123805359267903, + "learning_rate": 1.1821404293877428e-05, + "loss": 0.295, "step": 9980 }, { - "epoch": 0.57, - "grad_norm": 1.0902723825131568, - "learning_rate": 8.118729356002371e-06, - "loss": 0.6268, + "epoch": 0.46, + "grad_norm": 0.3219027977000585, + "learning_rate": 1.1819941243622292e-05, + "loss": 0.2092, "step": 9981 }, { - "epoch": 0.57, - "grad_norm": 0.3609668953923149, - "learning_rate": 8.11690170314239e-06, - "loss": 0.3442, + "epoch": 0.46, + "grad_norm": 0.3632084879187309, + "learning_rate": 1.1818478153075412e-05, + "loss": 0.2638, "step": 9982 }, { - "epoch": 0.57, - "grad_norm": 0.23808650024305908, - "learning_rate": 8.115074115494022e-06, - "loss": 0.1346, + "epoch": 0.46, + "grad_norm": 0.4333576162175848, + "learning_rate": 1.1817015022269175e-05, + "loss": 0.2589, "step": 9983 }, { - "epoch": 0.57, - "grad_norm": 0.34721727640723965, - "learning_rate": 8.113246593120554e-06, - "loss": 0.2742, + "epoch": 0.46, + "grad_norm": 0.5388033499749486, + "learning_rate": 1.181555185123598e-05, + "loss": 0.3673, "step": 9984 }, { - "epoch": 0.57, - "grad_norm": 0.8939576818059674, - "learning_rate": 8.111419136085278e-06, - "loss": 0.4012, + "epoch": 0.46, + "grad_norm": 0.720768040349031, + "learning_rate": 1.1814088640008215e-05, + "loss": 0.4269, "step": 9985 }, { - "epoch": 0.57, - "grad_norm": 0.37324261637531364, - "learning_rate": 8.109591744451472e-06, - "loss": 0.2016, + "epoch": 0.46, + "grad_norm": 0.2866352493752822, + "learning_rate": 1.1812625388618275e-05, + "loss": 0.2332, "step": 9986 }, { - "epoch": 0.57, - "grad_norm": 0.407045014028079, - "learning_rate": 8.107764418282427e-06, - "loss": 0.3083, + "epoch": 0.46, + "grad_norm": 0.2628642338439257, + "learning_rate": 1.1811162097098559e-05, + "loss": 0.1872, "step": 9987 }, { - "epoch": 0.57, - "grad_norm": 1.0490869448387106, - "learning_rate": 8.105937157641416e-06, - "loss": 0.6916, + "epoch": 0.46, + "grad_norm": 1.1211379493555538, + "learning_rate": 1.1809698765481458e-05, + "loss": 0.5937, "step": 9988 }, { - "epoch": 0.57, - "grad_norm": 0.2294748663492301, - "learning_rate": 8.104109962591722e-06, - "loss": 0.1641, + "epoch": 0.46, + "grad_norm": 0.278649613867333, + "learning_rate": 1.180823539379937e-05, + "loss": 0.1206, "step": 9989 }, { - "epoch": 0.57, - "grad_norm": 0.28437388048864876, - "learning_rate": 8.102282833196616e-06, - "loss": 0.2555, + "epoch": 0.46, + "grad_norm": 0.45990426333653806, + "learning_rate": 1.1806771982084694e-05, + "loss": 0.3134, "step": 9990 }, { - "epoch": 0.57, - "grad_norm": 0.774892354817154, - "learning_rate": 8.100455769519377e-06, - "loss": 0.4188, + "epoch": 0.46, + "grad_norm": 0.8295897500802334, + "learning_rate": 1.1805308530369826e-05, + "loss": 0.3658, "step": 9991 }, { - "epoch": 0.57, - "grad_norm": 0.29569982792830685, - "learning_rate": 8.09862877162327e-06, - "loss": 0.1961, + "epoch": 0.46, + "grad_norm": 0.37033929569541063, + "learning_rate": 1.1803845038687171e-05, + "loss": 0.1199, "step": 9992 }, { - "epoch": 0.57, - "grad_norm": 1.0072369091205406, - "learning_rate": 8.096801839571569e-06, - "loss": 0.6177, + "epoch": 0.46, + "grad_norm": 0.28367780366899964, + "learning_rate": 1.1802381507069125e-05, + "loss": 0.2148, "step": 9993 }, { - "epoch": 0.57, - "grad_norm": 0.3855864777216197, - "learning_rate": 8.094974973427541e-06, - "loss": 0.3251, + "epoch": 0.46, + "grad_norm": 0.3966903362216353, + "learning_rate": 1.1800917935548086e-05, + "loss": 0.311, "step": 9994 }, { - "epoch": 0.57, - "grad_norm": 0.39142238746136504, - "learning_rate": 8.093148173254445e-06, - "loss": 0.2776, + "epoch": 0.46, + "grad_norm": 0.3058701451982702, + "learning_rate": 1.1799454324156463e-05, + "loss": 0.1153, "step": 9995 }, { - "epoch": 0.57, - "grad_norm": 0.23644514454316207, - "learning_rate": 8.091321439115543e-06, - "loss": 0.1532, + "epoch": 0.46, + "grad_norm": 0.4138375822555433, + "learning_rate": 1.1797990672926652e-05, + "loss": 0.3425, "step": 9996 }, { - "epoch": 0.57, - "grad_norm": 0.7087803679347151, - "learning_rate": 8.089494771074102e-06, - "loss": 0.4177, + "epoch": 0.46, + "grad_norm": 1.0261541459760841, + "learning_rate": 1.1796526981891063e-05, + "loss": 0.5243, "step": 9997 }, { - "epoch": 0.57, - "grad_norm": 0.2797651307049413, - "learning_rate": 8.08766816919337e-06, - "loss": 0.2555, + "epoch": 0.46, + "grad_norm": 0.3397860160358067, + "learning_rate": 1.1795063251082098e-05, + "loss": 0.2942, "step": 9998 }, { - "epoch": 0.57, - "grad_norm": 0.4959220267266461, - "learning_rate": 8.085841633536611e-06, - "loss": 0.2582, + "epoch": 0.46, + "grad_norm": 0.2875764422949156, + "learning_rate": 1.1793599480532163e-05, + "loss": 0.1837, "step": 9999 }, { - "epoch": 0.57, - "grad_norm": 1.0009644158738145, - "learning_rate": 8.084015164167071e-06, - "loss": 0.6752, + "epoch": 0.46, + "grad_norm": 0.48693126612691257, + "learning_rate": 1.179213567027366e-05, + "loss": 0.2685, "step": 10000 }, { - "epoch": 0.57, - "grad_norm": 0.32313335118258313, - "learning_rate": 8.082188761148007e-06, - "loss": 0.1926, + "epoch": 0.46, + "grad_norm": 0.5262746196859454, + "learning_rate": 1.1790671820339007e-05, + "loss": 0.3271, "step": 10001 }, { - "epoch": 0.57, - "grad_norm": 0.2669066233121326, - "learning_rate": 8.08036242454266e-06, - "loss": 0.2554, + "epoch": 0.46, + "grad_norm": 0.331191491793911, + "learning_rate": 1.17892079307606e-05, + "loss": 0.256, "step": 10002 }, { - "epoch": 0.57, - "grad_norm": 0.4574088631849031, - "learning_rate": 8.078536154414283e-06, - "loss": 0.2827, + "epoch": 0.46, + "grad_norm": 0.47533247846589405, + "learning_rate": 1.1787744001570858e-05, + "loss": 0.3955, "step": 10003 }, { - "epoch": 0.57, - "grad_norm": 0.507204777496856, - "learning_rate": 8.076709950826113e-06, - "loss": 0.2304, + "epoch": 0.46, + "grad_norm": 0.634528387425142, + "learning_rate": 1.1786280032802186e-05, + "loss": 0.3471, "step": 10004 }, { - "epoch": 0.57, - "grad_norm": 0.38781786620956266, - "learning_rate": 8.074883813841397e-06, - "loss": 0.2686, + "epoch": 0.46, + "grad_norm": 0.23419412428601935, + "learning_rate": 1.1784816024486996e-05, + "loss": 0.1506, "step": 10005 }, { - "epoch": 0.57, - "grad_norm": 0.36301702077513615, - "learning_rate": 8.073057743523371e-06, + "epoch": 0.46, + "grad_norm": 0.3628402176831377, + "learning_rate": 1.1783351976657698e-05, "loss": 0.3049, "step": 10006 }, { - "epoch": 0.57, - "grad_norm": 0.4861354405825026, - "learning_rate": 8.071231739935272e-06, - "loss": 0.3207, + "epoch": 0.46, + "grad_norm": 0.7129072502403367, + "learning_rate": 1.1781887889346706e-05, + "loss": 0.4136, "step": 10007 }, { - "epoch": 0.58, - "grad_norm": 0.35373086697907874, - "learning_rate": 8.069405803140338e-06, - "loss": 0.3251, + "epoch": 0.46, + "grad_norm": 0.34800764173189175, + "learning_rate": 1.1780423762586435e-05, + "loss": 0.2354, "step": 10008 }, { - "epoch": 0.58, - "grad_norm": 0.2171049695260447, - "learning_rate": 8.067579933201793e-06, - "loss": 0.0853, + "epoch": 0.46, + "grad_norm": 1.0608697482293123, + "learning_rate": 1.1778959596409296e-05, + "loss": 0.6971, "step": 10009 }, { - "epoch": 0.58, - "grad_norm": 0.26086832974147256, - "learning_rate": 8.065754130182876e-06, - "loss": 0.2698, + "epoch": 0.46, + "grad_norm": 0.35001696954576883, + "learning_rate": 1.1777495390847709e-05, + "loss": 0.2668, "step": 10010 }, { - "epoch": 0.58, - "grad_norm": 0.5370388860704075, - "learning_rate": 8.063928394146806e-06, - "loss": 0.4125, + "epoch": 0.46, + "grad_norm": 0.3899176525881227, + "learning_rate": 1.1776031145934085e-05, + "loss": 0.3123, "step": 10011 }, { - "epoch": 0.58, - "grad_norm": 0.7740071781472809, - "learning_rate": 8.062102725156818e-06, - "loss": 0.3091, + "epoch": 0.46, + "grad_norm": 0.33632989599035634, + "learning_rate": 1.1774566861700845e-05, + "loss": 0.1211, "step": 10012 }, { - "epoch": 0.58, - "grad_norm": 0.36948235193836254, - "learning_rate": 8.060277123276125e-06, - "loss": 0.3018, + "epoch": 0.46, + "grad_norm": 0.7870621695475499, + "learning_rate": 1.17731025381804e-05, + "loss": 0.3774, "step": 10013 }, { - "epoch": 0.58, - "grad_norm": 0.40449012511592974, - "learning_rate": 8.058451588567954e-06, - "loss": 0.3211, + "epoch": 0.46, + "grad_norm": 0.3232130074405032, + "learning_rate": 1.177163817540518e-05, + "loss": 0.2801, "step": 10014 }, { - "epoch": 0.58, - "grad_norm": 0.23658106450923316, - "learning_rate": 8.05662612109552e-06, - "loss": 0.1578, + "epoch": 0.46, + "grad_norm": 1.0085214397916038, + "learning_rate": 1.1770173773407594e-05, + "loss": 0.4783, "step": 10015 }, { - "epoch": 0.58, - "grad_norm": 0.3824604885675158, - "learning_rate": 8.05480072092204e-06, - "loss": 0.2864, + "epoch": 0.46, + "grad_norm": 0.6170896757165163, + "learning_rate": 1.1768709332220072e-05, + "loss": 0.4236, "step": 10016 }, { - "epoch": 0.58, - "grad_norm": 0.5705382545976666, - "learning_rate": 8.052975388110726e-06, - "loss": 0.3922, + "epoch": 0.46, + "grad_norm": 0.24345223369578334, + "learning_rate": 1.1767244851875023e-05, + "loss": 0.1897, "step": 10017 }, { - "epoch": 0.58, - "grad_norm": 0.3746308523465573, - "learning_rate": 8.051150122724793e-06, - "loss": 0.2776, + "epoch": 0.46, + "grad_norm": 0.45952341848879974, + "learning_rate": 1.1765780332404882e-05, + "loss": 0.2689, "step": 10018 }, { - "epoch": 0.58, - "grad_norm": 0.6099067258468585, - "learning_rate": 8.049324924827447e-06, - "loss": 0.3158, + "epoch": 0.46, + "grad_norm": 0.983075136703261, + "learning_rate": 1.1764315773842063e-05, + "loss": 0.3733, "step": 10019 }, { - "epoch": 0.58, - "grad_norm": 0.38826376456495426, - "learning_rate": 8.047499794481894e-06, - "loss": 0.3084, + "epoch": 0.46, + "grad_norm": 0.39421875605177625, + "learning_rate": 1.1762851176218994e-05, + "loss": 0.289, "step": 10020 }, { - "epoch": 0.58, - "grad_norm": 0.24314199497277397, - "learning_rate": 8.045674731751338e-06, - "loss": 0.2092, + "epoch": 0.46, + "grad_norm": 0.942509375955075, + "learning_rate": 1.1761386539568101e-05, + "loss": 0.5045, "step": 10021 }, { - "epoch": 0.58, - "grad_norm": 0.31113245462098016, - "learning_rate": 8.043849736698986e-06, - "loss": 0.1859, + "epoch": 0.46, + "grad_norm": 0.36049146647764785, + "learning_rate": 1.1759921863921807e-05, + "loss": 0.2921, "step": 10022 }, { - "epoch": 0.58, - "grad_norm": 0.37576660120750494, - "learning_rate": 8.04202480938803e-06, - "loss": 0.3239, + "epoch": 0.46, + "grad_norm": 0.4522278003230106, + "learning_rate": 1.1758457149312539e-05, + "loss": 0.3353, "step": 10023 }, { - "epoch": 0.58, - "grad_norm": 0.7344082199207784, - "learning_rate": 8.040199949881672e-06, - "loss": 0.4354, + "epoch": 0.46, + "grad_norm": 0.387448627058914, + "learning_rate": 1.1756992395772722e-05, + "loss": 0.2124, "step": 10024 }, { - "epoch": 0.58, - "grad_norm": 0.45688911337935884, - "learning_rate": 8.038375158243108e-06, - "loss": 0.2071, + "epoch": 0.46, + "grad_norm": 1.0168979438401233, + "learning_rate": 1.1755527603334789e-05, + "loss": 0.3412, "step": 10025 }, { - "epoch": 0.58, - "grad_norm": 0.2761160102991935, - "learning_rate": 8.036550434535522e-06, - "loss": 0.2761, + "epoch": 0.46, + "grad_norm": 0.29098794628053565, + "learning_rate": 1.1754062772031166e-05, + "loss": 0.2588, "step": 10026 }, { - "epoch": 0.58, - "grad_norm": 1.061528670972983, - "learning_rate": 8.034725778822114e-06, - "loss": 0.5102, + "epoch": 0.46, + "grad_norm": 0.49414261762359024, + "learning_rate": 1.1752597901894285e-05, + "loss": 0.4067, "step": 10027 }, { - "epoch": 0.58, - "grad_norm": 0.16482060521116781, - "learning_rate": 8.032901191166071e-06, - "loss": 0.0969, + "epoch": 0.46, + "grad_norm": 0.8832224154941617, + "learning_rate": 1.1751132992956576e-05, + "loss": 0.2319, "step": 10028 }, { - "epoch": 0.58, - "grad_norm": 0.30696431779330907, - "learning_rate": 8.03107667163057e-06, - "loss": 0.2962, + "epoch": 0.46, + "grad_norm": 0.3452635533653455, + "learning_rate": 1.1749668045250468e-05, + "loss": 0.2581, "step": 10029 }, { - "epoch": 0.58, - "grad_norm": 0.5561323310489525, - "learning_rate": 8.029252220278802e-06, - "loss": 0.3496, + "epoch": 0.46, + "grad_norm": 0.4524947484214568, + "learning_rate": 1.1748203058808397e-05, + "loss": 0.329, "step": 10030 }, { - "epoch": 0.58, - "grad_norm": 0.45238876227242075, - "learning_rate": 8.02742783717394e-06, - "loss": 0.2456, + "epoch": 0.46, + "grad_norm": 0.24481683816584915, + "learning_rate": 1.1746738033662795e-05, + "loss": 0.1296, "step": 10031 }, { - "epoch": 0.58, - "grad_norm": 0.5396238256737506, - "learning_rate": 8.025603522379172e-06, - "loss": 0.3749, + "epoch": 0.46, + "grad_norm": 0.4237164562582197, + "learning_rate": 1.1745272969846095e-05, + "loss": 0.3036, "step": 10032 }, { - "epoch": 0.58, - "grad_norm": 0.3195175238113092, - "learning_rate": 8.023779275957668e-06, - "loss": 0.2389, + "epoch": 0.46, + "grad_norm": 1.2774864905348657, + "learning_rate": 1.1743807867390735e-05, + "loss": 0.773, "step": 10033 }, { - "epoch": 0.58, - "grad_norm": 0.24479568402304927, - "learning_rate": 8.021955097972602e-06, - "loss": 0.2131, + "epoch": 0.46, + "grad_norm": 0.3426542215106476, + "learning_rate": 1.1742342726329152e-05, + "loss": 0.2422, "step": 10034 }, { - "epoch": 0.58, - "grad_norm": 0.3826033703959546, - "learning_rate": 8.020130988487146e-06, - "loss": 0.2464, + "epoch": 0.46, + "grad_norm": 0.37401341473111255, + "learning_rate": 1.1740877546693779e-05, + "loss": 0.2992, "step": 10035 }, { - "epoch": 0.58, - "grad_norm": 0.7693896830289918, - "learning_rate": 8.01830694756447e-06, - "loss": 0.4276, + "epoch": 0.46, + "grad_norm": 0.7844990791731473, + "learning_rate": 1.1739412328517052e-05, + "loss": 0.4441, "step": 10036 }, { - "epoch": 0.58, - "grad_norm": 0.3963773983506641, - "learning_rate": 8.016482975267738e-06, - "loss": 0.2943, + "epoch": 0.46, + "grad_norm": 0.26717972784740507, + "learning_rate": 1.1737947071831415e-05, + "loss": 0.1982, "step": 10037 }, { - "epoch": 0.58, - "grad_norm": 0.28363379615809237, - "learning_rate": 8.014659071660113e-06, - "loss": 0.2321, + "epoch": 0.46, + "grad_norm": 0.3694765856263234, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.199, "step": 10038 }, { - "epoch": 0.58, - "grad_norm": 0.41409455127608996, - "learning_rate": 8.012835236804764e-06, - "loss": 0.2602, + "epoch": 0.46, + "grad_norm": 0.5577101472482502, + "learning_rate": 1.1735016443063162e-05, + "loss": 0.3804, "step": 10039 }, { - "epoch": 0.58, - "grad_norm": 0.33374553909505866, - "learning_rate": 8.01101147076484e-06, - "loss": 0.2047, + "epoch": 0.46, + "grad_norm": 0.8761907294366034, + "learning_rate": 1.1733551071045429e-05, + "loss": 0.5249, "step": 10040 }, { - "epoch": 0.58, - "grad_norm": 0.3514808777272365, - "learning_rate": 8.009187773603508e-06, - "loss": 0.246, + "epoch": 0.46, + "grad_norm": 0.3492945697820342, + "learning_rate": 1.1732085660648543e-05, + "loss": 0.1985, "step": 10041 }, { - "epoch": 0.58, - "grad_norm": 0.3632666739681491, - "learning_rate": 8.007364145383914e-06, - "loss": 0.3166, + "epoch": 0.46, + "grad_norm": 0.4172582836194634, + "learning_rate": 1.173062021190495e-05, + "loss": 0.3381, "step": 10042 }, { - "epoch": 0.58, - "grad_norm": 0.809247352433276, - "learning_rate": 8.005540586169216e-06, - "loss": 0.4513, + "epoch": 0.46, + "grad_norm": 0.2718675904894732, + "learning_rate": 1.1729154724847093e-05, + "loss": 0.1515, "step": 10043 }, { - "epoch": 0.58, - "grad_norm": 0.31999804489355055, - "learning_rate": 8.003717096022561e-06, - "loss": 0.2042, + "epoch": 0.46, + "grad_norm": 0.3417266288523314, + "learning_rate": 1.172768919950742e-05, + "loss": 0.2119, "step": 10044 }, { - "epoch": 0.58, - "grad_norm": 0.45816885307178257, - "learning_rate": 8.001893675007098e-06, - "loss": 0.3312, + "epoch": 0.46, + "grad_norm": 0.5379451675445792, + "learning_rate": 1.172622363591837e-05, + "loss": 0.4336, "step": 10045 }, { - "epoch": 0.58, - "grad_norm": 0.218345571007771, - "learning_rate": 8.00007032318597e-06, - "loss": 0.1845, + "epoch": 0.46, + "grad_norm": 0.5106053664108272, + "learning_rate": 1.1724758034112395e-05, + "loss": 0.346, "step": 10046 }, { - "epoch": 0.58, - "grad_norm": 0.337060392878494, - "learning_rate": 7.99824704062232e-06, - "loss": 0.2972, + "epoch": 0.46, + "grad_norm": 0.34315683299857874, + "learning_rate": 1.1723292394121937e-05, + "loss": 0.2597, "step": 10047 }, { - "epoch": 0.58, - "grad_norm": 0.7301838529307065, - "learning_rate": 7.996423827379292e-06, - "loss": 0.3609, + "epoch": 0.46, + "grad_norm": 0.6462392634525331, + "learning_rate": 1.172182671597945e-05, + "loss": 0.3431, "step": 10048 }, { - "epoch": 0.58, - "grad_norm": 0.32799631732078843, - "learning_rate": 7.994600683520018e-06, - "loss": 0.3072, + "epoch": 0.46, + "grad_norm": 0.3445759201366232, + "learning_rate": 1.1720360999717374e-05, + "loss": 0.1953, "step": 10049 }, { - "epoch": 0.58, - "grad_norm": 0.4005912109187204, - "learning_rate": 7.992777609107638e-06, - "loss": 0.3005, + "epoch": 0.46, + "grad_norm": 0.29286083060104, + "learning_rate": 1.1718895245368167e-05, + "loss": 0.2606, "step": 10050 }, { - "epoch": 0.58, - "grad_norm": 0.8157284221590041, - "learning_rate": 7.99095460420528e-06, - "loss": 0.3047, + "epoch": 0.46, + "grad_norm": 1.064674360482196, + "learning_rate": 1.1717429452964275e-05, + "loss": 0.5577, "step": 10051 }, { - "epoch": 0.58, - "grad_norm": 0.3615354345631635, - "learning_rate": 7.989131668876081e-06, - "loss": 0.2347, + "epoch": 0.46, + "grad_norm": 0.6968155836386715, + "learning_rate": 1.1715963622538154e-05, + "loss": 0.4205, "step": 10052 }, { - "epoch": 0.58, - "grad_norm": 0.3831280211895221, - "learning_rate": 7.987308803183164e-06, - "loss": 0.3028, + "epoch": 0.46, + "grad_norm": 0.3449299380611466, + "learning_rate": 1.1714497754122247e-05, + "loss": 0.2838, "step": 10053 }, { - "epoch": 0.58, - "grad_norm": 0.3977051857632209, - "learning_rate": 7.985486007189658e-06, - "loss": 0.2917, + "epoch": 0.46, + "grad_norm": 0.3722150608048308, + "learning_rate": 1.1713031847749013e-05, + "loss": 0.2586, "step": 10054 }, { - "epoch": 0.58, - "grad_norm": 0.3577210765059678, - "learning_rate": 7.983663280958682e-06, - "loss": 0.2307, + "epoch": 0.46, + "grad_norm": 0.5082005346233696, + "learning_rate": 1.1711565903450907e-05, + "loss": 0.3316, "step": 10055 }, { - "epoch": 0.58, - "grad_norm": 0.37861640099556787, - "learning_rate": 7.981840624553364e-06, - "loss": 0.2822, + "epoch": 0.46, + "grad_norm": 0.38249320169277673, + "learning_rate": 1.1710099921260378e-05, + "loss": 0.3035, "step": 10056 }, { - "epoch": 0.58, - "grad_norm": 0.3719306516421558, - "learning_rate": 7.980018038036815e-06, - "loss": 0.313, + "epoch": 0.46, + "grad_norm": 0.37052627275505423, + "learning_rate": 1.1708633901209891e-05, + "loss": 0.2155, "step": 10057 }, { - "epoch": 0.58, - "grad_norm": 0.7053848501492725, - "learning_rate": 7.978195521472157e-06, - "loss": 0.3032, + "epoch": 0.46, + "grad_norm": 0.5639801095558974, + "learning_rate": 1.1707167843331893e-05, + "loss": 0.3629, "step": 10058 }, { - "epoch": 0.58, - "grad_norm": 0.3060816409264136, - "learning_rate": 7.976373074922498e-06, - "loss": 0.2589, + "epoch": 0.46, + "grad_norm": 0.42879980352996244, + "learning_rate": 1.1705701747658842e-05, + "loss": 0.2854, "step": 10059 }, { - "epoch": 0.58, - "grad_norm": 0.7008923801011527, - "learning_rate": 7.974550698450956e-06, - "loss": 0.5585, + "epoch": 0.46, + "grad_norm": 0.46889373778146665, + "learning_rate": 1.17042356142232e-05, + "loss": 0.3393, "step": 10060 }, { - "epoch": 0.58, - "grad_norm": 0.22270529904066977, - "learning_rate": 7.972728392120634e-06, - "loss": 0.1837, + "epoch": 0.46, + "grad_norm": 0.3467921033670613, + "learning_rate": 1.1702769443057425e-05, + "loss": 0.2681, "step": 10061 }, { - "epoch": 0.58, - "grad_norm": 0.2558814298202647, - "learning_rate": 7.970906155994646e-06, - "loss": 0.2174, + "epoch": 0.46, + "grad_norm": 0.43823880404190485, + "learning_rate": 1.1701303234193972e-05, + "loss": 0.3282, "step": 10062 }, { - "epoch": 0.58, - "grad_norm": 1.0344165023559295, - "learning_rate": 7.969083990136084e-06, - "loss": 0.5085, + "epoch": 0.46, + "grad_norm": 0.36819436456423654, + "learning_rate": 1.1699836987665312e-05, + "loss": 0.2773, "step": 10063 }, { - "epoch": 0.58, - "grad_norm": 0.7397533513729492, - "learning_rate": 7.967261894608058e-06, - "loss": 0.3023, + "epoch": 0.46, + "grad_norm": 0.6810566865838882, + "learning_rate": 1.1698370703503895e-05, + "loss": 0.3255, "step": 10064 }, { - "epoch": 0.58, - "grad_norm": 0.298771462575836, - "learning_rate": 7.965439869473664e-06, - "loss": 0.2718, + "epoch": 0.46, + "grad_norm": 0.2813410771509748, + "learning_rate": 1.1696904381742188e-05, + "loss": 0.2148, "step": 10065 }, { - "epoch": 0.58, - "grad_norm": 0.492680151076164, - "learning_rate": 7.963617914796002e-06, - "loss": 0.3987, + "epoch": 0.46, + "grad_norm": 0.38198903431816406, + "learning_rate": 1.1695438022412653e-05, + "loss": 0.3306, "step": 10066 }, { - "epoch": 0.58, - "grad_norm": 0.17351851611022887, - "learning_rate": 7.961796030638162e-06, - "loss": 0.096, + "epoch": 0.46, + "grad_norm": 0.9591736329880927, + "learning_rate": 1.1693971625547756e-05, + "loss": 0.2603, "step": 10067 }, { - "epoch": 0.58, - "grad_norm": 0.34316673447226265, - "learning_rate": 7.95997421706324e-06, - "loss": 0.2893, + "epoch": 0.46, + "grad_norm": 0.38755000727143635, + "learning_rate": 1.1692505191179957e-05, + "loss": 0.2854, "step": 10068 }, { - "epoch": 0.58, - "grad_norm": 0.3809433763662325, - "learning_rate": 7.95815247413432e-06, - "loss": 0.3244, + "epoch": 0.46, + "grad_norm": 0.9468788599281269, + "learning_rate": 1.1691038719341727e-05, + "loss": 0.5708, "step": 10069 }, { - "epoch": 0.58, - "grad_norm": 0.8279519992417633, - "learning_rate": 7.956330801914495e-06, - "loss": 0.3853, + "epoch": 0.46, + "grad_norm": 0.28448224741662526, + "learning_rate": 1.168957221006553e-05, + "loss": 0.2396, "step": 10070 }, { - "epoch": 0.58, - "grad_norm": 0.3236230187941723, - "learning_rate": 7.954509200466845e-06, - "loss": 0.2006, + "epoch": 0.46, + "grad_norm": 0.27914646512412794, + "learning_rate": 1.168810566338383e-05, + "loss": 0.2059, "step": 10071 }, { - "epoch": 0.58, - "grad_norm": 0.9845789891659482, - "learning_rate": 7.952687669854453e-06, - "loss": 0.7532, + "epoch": 0.46, + "grad_norm": 1.1471242556890526, + "learning_rate": 1.1686639079329099e-05, + "loss": 0.6389, "step": 10072 }, { - "epoch": 0.58, - "grad_norm": 0.2719432805518241, - "learning_rate": 7.950866210140401e-06, - "loss": 0.2321, + "epoch": 0.46, + "grad_norm": 0.38827093268989066, + "learning_rate": 1.1685172457933804e-05, + "loss": 0.3276, "step": 10073 }, { - "epoch": 0.58, - "grad_norm": 0.25669260749844697, - "learning_rate": 7.949044821387761e-06, - "loss": 0.1774, + "epoch": 0.46, + "grad_norm": 0.31971591487797246, + "learning_rate": 1.1683705799230416e-05, + "loss": 0.214, "step": 10074 }, { - "epoch": 0.58, - "grad_norm": 0.8589351191506319, - "learning_rate": 7.947223503659613e-06, - "loss": 0.4398, + "epoch": 0.46, + "grad_norm": 0.834221192586116, + "learning_rate": 1.1682239103251405e-05, + "loss": 0.5776, "step": 10075 }, { - "epoch": 0.58, - "grad_norm": 0.8104627329921519, - "learning_rate": 7.945402257019026e-06, - "loss": 0.4334, + "epoch": 0.46, + "grad_norm": 0.4418135536538834, + "learning_rate": 1.168077237002924e-05, + "loss": 0.3399, "step": 10076 }, { - "epoch": 0.58, - "grad_norm": 0.28954849191593923, - "learning_rate": 7.943581081529072e-06, - "loss": 0.2135, + "epoch": 0.46, + "grad_norm": 0.2433392767901945, + "learning_rate": 1.1679305599596394e-05, + "loss": 0.139, "step": 10077 }, { - "epoch": 0.58, - "grad_norm": 0.5205399924008268, - "learning_rate": 7.941759977252815e-06, - "loss": 0.4581, + "epoch": 0.46, + "grad_norm": 0.35031849479653154, + "learning_rate": 1.167783879198534e-05, + "loss": 0.2985, "step": 10078 }, { - "epoch": 0.58, - "grad_norm": 0.28954303976259166, - "learning_rate": 7.939938944253321e-06, - "loss": 0.1576, + "epoch": 0.46, + "grad_norm": 1.0479222544180729, + "learning_rate": 1.1676371947228554e-05, + "loss": 0.6134, "step": 10079 }, { - "epoch": 0.58, - "grad_norm": 0.3201876263622221, - "learning_rate": 7.938117982593653e-06, - "loss": 0.203, + "epoch": 0.46, + "grad_norm": 0.35472402349721904, + "learning_rate": 1.1674905065358508e-05, + "loss": 0.1908, "step": 10080 }, { - "epoch": 0.58, - "grad_norm": 0.6596627329890012, - "learning_rate": 7.936297092336872e-06, - "loss": 0.3682, + "epoch": 0.46, + "grad_norm": 0.3950027183429008, + "learning_rate": 1.1673438146407681e-05, + "loss": 0.3295, "step": 10081 }, { - "epoch": 0.58, - "grad_norm": 0.5906861427138822, - "learning_rate": 7.934476273546032e-06, - "loss": 0.3605, + "epoch": 0.46, + "grad_norm": 0.5189445655197306, + "learning_rate": 1.1671971190408544e-05, + "loss": 0.4005, "step": 10082 }, { - "epoch": 0.58, - "grad_norm": 0.30195190043595455, - "learning_rate": 7.932655526284192e-06, - "loss": 0.2666, + "epoch": 0.46, + "grad_norm": 0.19542021191802572, + "learning_rate": 1.1670504197393577e-05, + "loss": 0.1261, "step": 10083 }, { - "epoch": 0.58, - "grad_norm": 0.992932228624963, - "learning_rate": 7.930834850614399e-06, - "loss": 0.5499, + "epoch": 0.46, + "grad_norm": 0.5282637306389921, + "learning_rate": 1.1669037167395256e-05, + "loss": 0.3552, "step": 10084 }, { - "epoch": 0.58, - "grad_norm": 0.28475196579428813, - "learning_rate": 7.92901424659971e-06, - "loss": 0.2582, + "epoch": 0.46, + "grad_norm": 0.37666133888041864, + "learning_rate": 1.1667570100446062e-05, + "loss": 0.3314, "step": 10085 }, { - "epoch": 0.58, - "grad_norm": 0.32554733501197314, - "learning_rate": 7.927193714303166e-06, - "loss": 0.2448, + "epoch": 0.46, + "grad_norm": 0.4183750925885515, + "learning_rate": 1.1666102996578473e-05, + "loss": 0.2925, "step": 10086 }, { - "epoch": 0.58, - "grad_norm": 0.38443162661917063, - "learning_rate": 7.925373253787817e-06, - "loss": 0.2281, + "epoch": 0.46, + "grad_norm": 0.43029546417603587, + "learning_rate": 1.1664635855824969e-05, + "loss": 0.2905, "step": 10087 }, { - "epoch": 0.58, - "grad_norm": 0.4907863733563514, - "learning_rate": 7.923552865116701e-06, - "loss": 0.3353, + "epoch": 0.46, + "grad_norm": 0.7582523291449792, + "learning_rate": 1.166316867821803e-05, + "loss": 0.5447, "step": 10088 }, { - "epoch": 0.58, - "grad_norm": 0.39379032559924637, - "learning_rate": 7.92173254835286e-06, - "loss": 0.2464, + "epoch": 0.46, + "grad_norm": 0.30026263312907653, + "learning_rate": 1.1661701463790142e-05, + "loss": 0.2335, "step": 10089 }, { - "epoch": 0.58, - "grad_norm": 0.37205057590132345, - "learning_rate": 7.919912303559334e-06, - "loss": 0.2769, + "epoch": 0.46, + "grad_norm": 0.27797907014048207, + "learning_rate": 1.1660234212573782e-05, + "loss": 0.1912, "step": 10090 }, { - "epoch": 0.58, - "grad_norm": 0.37416042075941736, - "learning_rate": 7.91809213079915e-06, - "loss": 0.2306, + "epoch": 0.46, + "grad_norm": 0.9879002182889989, + "learning_rate": 1.1658766924601439e-05, + "loss": 0.5006, "step": 10091 }, { - "epoch": 0.58, - "grad_norm": 0.36138527919034713, - "learning_rate": 7.916272030135353e-06, - "loss": 0.2915, + "epoch": 0.46, + "grad_norm": 0.5551307056286501, + "learning_rate": 1.1657299599905596e-05, + "loss": 0.3847, "step": 10092 }, { - "epoch": 0.58, - "grad_norm": 0.3876317520648567, - "learning_rate": 7.91445200163096e-06, - "loss": 0.2792, + "epoch": 0.46, + "grad_norm": 0.38600070831419114, + "learning_rate": 1.1655832238518739e-05, + "loss": 0.2488, "step": 10093 }, { - "epoch": 0.58, - "grad_norm": 0.3951992348618812, - "learning_rate": 7.912632045349008e-06, - "loss": 0.2647, + "epoch": 0.46, + "grad_norm": 0.5049749729702442, + "learning_rate": 1.1654364840473348e-05, + "loss": 0.401, "step": 10094 }, { - "epoch": 0.58, - "grad_norm": 0.33494661203789694, - "learning_rate": 7.910812161352517e-06, - "loss": 0.2683, + "epoch": 0.46, + "grad_norm": 0.5878774863194353, + "learning_rate": 1.1652897405801913e-05, + "loss": 0.3225, "step": 10095 }, { - "epoch": 0.58, - "grad_norm": 0.9173367452830046, - "learning_rate": 7.908992349704515e-06, - "loss": 0.7132, + "epoch": 0.46, + "grad_norm": 0.24560335841127917, + "learning_rate": 1.1651429934536923e-05, + "loss": 0.1602, "step": 10096 }, { - "epoch": 0.58, - "grad_norm": 0.3487243075292584, - "learning_rate": 7.907172610468015e-06, - "loss": 0.2215, + "epoch": 0.46, + "grad_norm": 0.399986882390551, + "learning_rate": 1.1649962426710868e-05, + "loss": 0.3273, "step": 10097 }, { - "epoch": 0.58, - "grad_norm": 0.32608359551160127, - "learning_rate": 7.905352943706035e-06, - "loss": 0.2646, + "epoch": 0.46, + "grad_norm": 0.6193514136707808, + "learning_rate": 1.1648494882356236e-05, + "loss": 0.3437, "step": 10098 }, { - "epoch": 0.58, - "grad_norm": 0.42778870571825733, - "learning_rate": 7.903533349481596e-06, - "loss": 0.2771, + "epoch": 0.46, + "grad_norm": 0.3940600291985681, + "learning_rate": 1.1647027301505515e-05, + "loss": 0.3417, "step": 10099 }, { - "epoch": 0.58, - "grad_norm": 0.29251505997739846, - "learning_rate": 7.901713827857705e-06, - "loss": 0.1692, + "epoch": 0.46, + "grad_norm": 1.0711245132108864, + "learning_rate": 1.1645559684191199e-05, + "loss": 0.4641, "step": 10100 }, { - "epoch": 0.58, - "grad_norm": 0.28561704547379524, - "learning_rate": 7.899894378897374e-06, - "loss": 0.275, + "epoch": 0.46, + "grad_norm": 0.3375833101781181, + "learning_rate": 1.1644092030445773e-05, + "loss": 0.3003, "step": 10101 }, { - "epoch": 0.58, - "grad_norm": 1.1525092467625975, - "learning_rate": 7.898075002663612e-06, - "loss": 0.7976, + "epoch": 0.46, + "grad_norm": 0.25513721523809546, + "learning_rate": 1.1642624340301738e-05, + "loss": 0.2068, "step": 10102 }, { - "epoch": 0.58, - "grad_norm": 0.6225456716829496, - "learning_rate": 7.89625569921942e-06, - "loss": 0.335, + "epoch": 0.46, + "grad_norm": 0.7934123075779773, + "learning_rate": 1.164115661379158e-05, + "loss": 0.3336, "step": 10103 }, { - "epoch": 0.58, - "grad_norm": 0.3281355566608314, - "learning_rate": 7.894436468627804e-06, - "loss": 0.2564, + "epoch": 0.46, + "grad_norm": 0.5065106547257751, + "learning_rate": 1.1639688850947798e-05, + "loss": 0.3483, "step": 10104 }, { - "epoch": 0.58, - "grad_norm": 0.36381880629843233, - "learning_rate": 7.892617310951761e-06, - "loss": 0.3175, + "epoch": 0.46, + "grad_norm": 0.3577604358511736, + "learning_rate": 1.1638221051802887e-05, + "loss": 0.2941, "step": 10105 }, { - "epoch": 0.58, - "grad_norm": 0.2159222699318665, - "learning_rate": 7.890798226254291e-06, - "loss": 0.1054, + "epoch": 0.46, + "grad_norm": 0.4589062341799034, + "learning_rate": 1.1636753216389339e-05, + "loss": 0.2771, "step": 10106 }, { - "epoch": 0.58, - "grad_norm": 0.35946385596297437, - "learning_rate": 7.888979214598387e-06, - "loss": 0.2295, + "epoch": 0.46, + "grad_norm": 0.4281054672833545, + "learning_rate": 1.163528534473965e-05, + "loss": 0.3081, "step": 10107 }, { - "epoch": 0.58, - "grad_norm": 0.4936241577077283, - "learning_rate": 7.887160276047045e-06, - "loss": 0.376, + "epoch": 0.46, + "grad_norm": 0.2824307114296, + "learning_rate": 1.1633817436886323e-05, + "loss": 0.2066, "step": 10108 }, { - "epoch": 0.58, - "grad_norm": 0.33073585211025036, - "learning_rate": 7.885341410663248e-06, - "loss": 0.2954, + "epoch": 0.46, + "grad_norm": 0.44703723851206306, + "learning_rate": 1.1632349492861853e-05, + "loss": 0.2768, "step": 10109 }, { - "epoch": 0.58, - "grad_norm": 0.34079160331467057, - "learning_rate": 7.883522618509989e-06, - "loss": 0.2056, + "epoch": 0.46, + "grad_norm": 0.6647260680728823, + "learning_rate": 1.1630881512698737e-05, + "loss": 0.3909, "step": 10110 }, { - "epoch": 0.58, - "grad_norm": 0.351615537794108, - "learning_rate": 7.881703899650249e-06, - "loss": 0.2591, + "epoch": 0.46, + "grad_norm": 0.44158533127606425, + "learning_rate": 1.1629413496429476e-05, + "loss": 0.3167, "step": 10111 }, { - "epoch": 0.58, - "grad_norm": 0.39871038149815147, - "learning_rate": 7.879885254147014e-06, - "loss": 0.2566, + "epoch": 0.46, + "grad_norm": 1.074768989574492, + "learning_rate": 1.162794544408657e-05, + "loss": 0.7815, "step": 10112 }, { - "epoch": 0.58, - "grad_norm": 0.24185991194383902, - "learning_rate": 7.878066682063262e-06, - "loss": 0.2185, + "epoch": 0.46, + "grad_norm": 0.32357173280404755, + "learning_rate": 1.1626477355702523e-05, + "loss": 0.2302, "step": 10113 }, { - "epoch": 0.58, - "grad_norm": 1.558532170715619, - "learning_rate": 7.876248183461967e-06, - "loss": 0.7215, + "epoch": 0.46, + "grad_norm": 0.23911827121319967, + "learning_rate": 1.1625009231309832e-05, + "loss": 0.199, "step": 10114 }, { - "epoch": 0.58, - "grad_norm": 0.6384019101854085, - "learning_rate": 7.874429758406108e-06, - "loss": 0.3869, + "epoch": 0.46, + "grad_norm": 0.8236922628410613, + "learning_rate": 1.1623541070941005e-05, + "loss": 0.4067, "step": 10115 }, { - "epoch": 0.58, - "grad_norm": 0.3028402041371424, - "learning_rate": 7.872611406958653e-06, - "loss": 0.2154, + "epoch": 0.46, + "grad_norm": 0.4590700241122029, + "learning_rate": 1.1622072874628546e-05, + "loss": 0.1673, "step": 10116 }, { - "epoch": 0.58, - "grad_norm": 0.3622538842476559, - "learning_rate": 7.870793129182577e-06, - "loss": 0.2996, + "epoch": 0.46, + "grad_norm": 0.2901775791358577, + "learning_rate": 1.1620604642404954e-05, + "loss": 0.2678, "step": 10117 }, { - "epoch": 0.58, - "grad_norm": 0.27173806317093663, - "learning_rate": 7.86897492514084e-06, - "loss": 0.1832, + "epoch": 0.46, + "grad_norm": 1.1149126751571659, + "learning_rate": 1.1619136374302735e-05, + "loss": 0.7265, "step": 10118 }, { - "epoch": 0.58, - "grad_norm": 0.30861485287327556, - "learning_rate": 7.86715679489641e-06, - "loss": 0.2098, + "epoch": 0.46, + "grad_norm": 0.5095702673853327, + "learning_rate": 1.16176680703544e-05, + "loss": 0.1956, "step": 10119 }, { - "epoch": 0.58, - "grad_norm": 1.2068126960800758, - "learning_rate": 7.86533873851225e-06, - "loss": 0.6609, + "epoch": 0.46, + "grad_norm": 0.36228373996312824, + "learning_rate": 1.161619973059245e-05, + "loss": 0.2735, "step": 10120 }, { - "epoch": 0.58, - "grad_norm": 0.36384977954229525, - "learning_rate": 7.863520756051317e-06, - "loss": 0.3154, + "epoch": 0.46, + "grad_norm": 0.37977997556040965, + "learning_rate": 1.16147313550494e-05, + "loss": 0.2926, "step": 10121 }, { - "epoch": 0.58, - "grad_norm": 0.4049015882904989, - "learning_rate": 7.861702847576568e-06, - "loss": 0.293, + "epoch": 0.46, + "grad_norm": 0.29049154264179633, + "learning_rate": 1.1613262943757752e-05, + "loss": 0.1378, "step": 10122 }, { - "epoch": 0.58, - "grad_norm": 0.5157148425875564, - "learning_rate": 7.859885013150959e-06, - "loss": 0.2408, + "epoch": 0.47, + "grad_norm": 0.40635621024717733, + "learning_rate": 1.1611794496750019e-05, + "loss": 0.3375, "step": 10123 }, { - "epoch": 0.58, - "grad_norm": 0.23292577160493733, - "learning_rate": 7.858067252837437e-06, - "loss": 0.2067, + "epoch": 0.47, + "grad_norm": 1.285520796142388, + "learning_rate": 1.1610326014058706e-05, + "loss": 0.6434, "step": 10124 }, { - "epoch": 0.58, - "grad_norm": 0.5576874400588665, - "learning_rate": 7.856249566698957e-06, - "loss": 0.3294, + "epoch": 0.47, + "grad_norm": 0.3871601265255735, + "learning_rate": 1.160885749571633e-05, + "loss": 0.3031, "step": 10125 }, { - "epoch": 0.58, - "grad_norm": 0.331359061847375, - "learning_rate": 7.854431954798463e-06, - "loss": 0.2648, + "epoch": 0.47, + "grad_norm": 0.3585881971646937, + "learning_rate": 1.1607388941755397e-05, + "loss": 0.2273, "step": 10126 }, { - "epoch": 0.58, - "grad_norm": 0.5790983417813569, - "learning_rate": 7.852614417198894e-06, - "loss": 0.3896, + "epoch": 0.47, + "grad_norm": 0.49237165609772104, + "learning_rate": 1.1605920352208424e-05, + "loss": 0.3163, "step": 10127 }, { - "epoch": 0.58, - "grad_norm": 0.5886527390523273, - "learning_rate": 7.850796953963198e-06, - "loss": 0.3371, + "epoch": 0.47, + "grad_norm": 0.3113632537840092, + "learning_rate": 1.1604451727107927e-05, + "loss": 0.2083, "step": 10128 }, { - "epoch": 0.58, - "grad_norm": 0.2781699195992228, - "learning_rate": 7.848979565154314e-06, - "loss": 0.2399, + "epoch": 0.47, + "grad_norm": 0.2899162748184306, + "learning_rate": 1.1602983066486407e-05, + "loss": 0.2368, "step": 10129 }, { - "epoch": 0.58, - "grad_norm": 0.25233740774202973, - "learning_rate": 7.847162250835171e-06, - "loss": 0.1594, + "epoch": 0.47, + "grad_norm": 1.3046450864715353, + "learning_rate": 1.160151437037639e-05, + "loss": 0.7733, "step": 10130 }, { - "epoch": 0.58, - "grad_norm": 0.5636084765264904, - "learning_rate": 7.845345011068709e-06, - "loss": 0.4195, + "epoch": 0.47, + "grad_norm": 0.7221710243487537, + "learning_rate": 1.1600045638810387e-05, + "loss": 0.5441, "step": 10131 }, { - "epoch": 0.58, - "grad_norm": 0.29482534737792404, - "learning_rate": 7.843527845917855e-06, - "loss": 0.2604, + "epoch": 0.47, + "grad_norm": 0.31307580511611083, + "learning_rate": 1.159857687182092e-05, + "loss": 0.2396, "step": 10132 }, { - "epoch": 0.58, - "grad_norm": 0.44948437825709303, - "learning_rate": 7.84171075544554e-06, - "loss": 0.3688, + "epoch": 0.47, + "grad_norm": 0.28238246997415495, + "learning_rate": 1.1597108069440498e-05, + "loss": 0.2605, "step": 10133 }, { - "epoch": 0.58, - "grad_norm": 0.386973790913395, - "learning_rate": 7.839893739714686e-06, - "loss": 0.2615, + "epoch": 0.47, + "grad_norm": 0.5083964742205858, + "learning_rate": 1.1595639231701642e-05, + "loss": 0.2701, "step": 10134 }, { - "epoch": 0.58, - "grad_norm": 0.4697085406586142, - "learning_rate": 7.83807679878822e-06, - "loss": 0.3673, + "epoch": 0.47, + "grad_norm": 0.32711623583178534, + "learning_rate": 1.1594170358636873e-05, + "loss": 0.2183, "step": 10135 }, { - "epoch": 0.58, - "grad_norm": 0.2578535107262174, - "learning_rate": 7.836259932729062e-06, - "loss": 0.203, + "epoch": 0.47, + "grad_norm": 1.0660538605992356, + "learning_rate": 1.159270145027871e-05, + "loss": 0.64, "step": 10136 }, { - "epoch": 0.58, - "grad_norm": 0.3996683546043715, - "learning_rate": 7.834443141600131e-06, - "loss": 0.3033, + "epoch": 0.47, + "grad_norm": 0.3637501740379312, + "learning_rate": 1.1591232506659668e-05, + "loss": 0.3231, "step": 10137 }, { - "epoch": 0.58, - "grad_norm": 0.5165287449268846, - "learning_rate": 7.83262642546434e-06, - "loss": 0.3747, + "epoch": 0.47, + "grad_norm": 0.3288145824895556, + "learning_rate": 1.1589763527812275e-05, + "loss": 0.2928, "step": 10138 }, { - "epoch": 0.58, - "grad_norm": 0.4618223329699041, - "learning_rate": 7.830809784384602e-06, - "loss": 0.2441, + "epoch": 0.47, + "grad_norm": 0.7209188924215945, + "learning_rate": 1.158829451376905e-05, + "loss": 0.3338, "step": 10139 }, { - "epoch": 0.58, - "grad_norm": 0.30623960016940566, - "learning_rate": 7.828993218423829e-06, - "loss": 0.2678, + "epoch": 0.47, + "grad_norm": 0.2515148906195833, + "learning_rate": 1.1586825464562515e-05, + "loss": 0.2012, "step": 10140 }, { - "epoch": 0.58, - "grad_norm": 0.35976134707155416, - "learning_rate": 7.827176727644925e-06, - "loss": 0.3079, + "epoch": 0.47, + "grad_norm": 0.32950260892852024, + "learning_rate": 1.1585356380225193e-05, + "loss": 0.2841, "step": 10141 }, { - "epoch": 0.58, - "grad_norm": 0.28324400036157693, - "learning_rate": 7.825360312110801e-06, - "loss": 0.1523, + "epoch": 0.47, + "grad_norm": 0.9641189761444086, + "learning_rate": 1.1583887260789608e-05, + "loss": 0.3995, "step": 10142 }, { - "epoch": 0.58, - "grad_norm": 0.7363848108703631, - "learning_rate": 7.823543971884353e-06, - "loss": 0.3966, + "epoch": 0.47, + "grad_norm": 0.6865735991543428, + "learning_rate": 1.1582418106288286e-05, + "loss": 0.3578, "step": 10143 }, { - "epoch": 0.58, - "grad_norm": 0.3260077080948101, - "learning_rate": 7.821727707028486e-06, - "loss": 0.2802, + "epoch": 0.47, + "grad_norm": 0.3752926440043566, + "learning_rate": 1.1580948916753751e-05, + "loss": 0.2857, "step": 10144 }, { - "epoch": 0.58, - "grad_norm": 0.27632507657679467, - "learning_rate": 7.81991151760609e-06, - "loss": 0.2405, + "epoch": 0.47, + "grad_norm": 0.34351153336140117, + "learning_rate": 1.1579479692218534e-05, + "loss": 0.2534, "step": 10145 }, { - "epoch": 0.58, - "grad_norm": 0.3488908220712134, - "learning_rate": 7.81809540368007e-06, - "loss": 0.1139, + "epoch": 0.47, + "grad_norm": 0.22869140603523305, + "learning_rate": 1.1578010432715159e-05, + "loss": 0.1466, "step": 10146 }, { - "epoch": 0.58, - "grad_norm": 0.37529873675347386, - "learning_rate": 7.81627936531331e-06, - "loss": 0.2968, + "epoch": 0.47, + "grad_norm": 0.384761339676635, + "learning_rate": 1.1576541138276154e-05, + "loss": 0.2821, "step": 10147 }, { - "epoch": 0.58, - "grad_norm": 0.3596295347448901, - "learning_rate": 7.814463402568703e-06, - "loss": 0.3217, + "epoch": 0.47, + "grad_norm": 0.5105976001308454, + "learning_rate": 1.1575071808934042e-05, + "loss": 0.2987, "step": 10148 }, { - "epoch": 0.58, - "grad_norm": 0.6919007612501898, - "learning_rate": 7.812647515509131e-06, - "loss": 0.245, + "epoch": 0.47, + "grad_norm": 0.5901954839109667, + "learning_rate": 1.1573602444721363e-05, + "loss": 0.3622, "step": 10149 }, { - "epoch": 0.58, - "grad_norm": 0.36005726352566714, - "learning_rate": 7.810831704197486e-06, - "loss": 0.3193, + "epoch": 0.47, + "grad_norm": 0.33350458793944854, + "learning_rate": 1.1572133045670642e-05, + "loss": 0.2823, "step": 10150 }, { - "epoch": 0.58, - "grad_norm": 0.3847714209623518, - "learning_rate": 7.80901596869664e-06, - "loss": 0.2601, + "epoch": 0.47, + "grad_norm": 1.323818656901192, + "learning_rate": 1.1570663611814411e-05, + "loss": 0.5051, "step": 10151 }, { - "epoch": 0.58, - "grad_norm": 0.21052130026797297, - "learning_rate": 7.807200309069482e-06, - "loss": 0.1744, + "epoch": 0.47, + "grad_norm": 0.28338214582361987, + "learning_rate": 1.15691941431852e-05, + "loss": 0.2243, "step": 10152 }, { - "epoch": 0.58, - "grad_norm": 0.40305583102744774, - "learning_rate": 7.805384725378881e-06, - "loss": 0.3051, + "epoch": 0.47, + "grad_norm": 0.4077989680410675, + "learning_rate": 1.1567724639815546e-05, + "loss": 0.2665, "step": 10153 }, { - "epoch": 0.58, - "grad_norm": 1.0107321479306213, - "learning_rate": 7.803569217687711e-06, - "loss": 0.5662, + "epoch": 0.47, + "grad_norm": 0.554358578283371, + "learning_rate": 1.1566255101737976e-05, + "loss": 0.3248, "step": 10154 }, { - "epoch": 0.58, - "grad_norm": 0.6122037063934481, - "learning_rate": 7.801753786058847e-06, - "loss": 0.2446, + "epoch": 0.47, + "grad_norm": 0.9698303720443032, + "learning_rate": 1.156478552898503e-05, + "loss": 0.342, "step": 10155 }, { - "epoch": 0.58, - "grad_norm": 0.5214675627145766, - "learning_rate": 7.799938430555152e-06, - "loss": 0.3822, + "epoch": 0.47, + "grad_norm": 0.3800940413104027, + "learning_rate": 1.156331592158924e-05, + "loss": 0.2768, "step": 10156 }, { - "epoch": 0.58, - "grad_norm": 0.35482096271578345, - "learning_rate": 7.798123151239497e-06, - "loss": 0.3462, + "epoch": 0.47, + "grad_norm": 0.44187168502477925, + "learning_rate": 1.1561846279583142e-05, + "loss": 0.331, "step": 10157 }, { - "epoch": 0.58, - "grad_norm": 0.24059969586075008, - "learning_rate": 7.79630794817474e-06, - "loss": 0.1636, + "epoch": 0.47, + "grad_norm": 0.7720060610582207, + "learning_rate": 1.1560376602999272e-05, + "loss": 0.3012, "step": 10158 }, { - "epoch": 0.58, - "grad_norm": 0.39992023315441017, - "learning_rate": 7.794492821423747e-06, - "loss": 0.2323, + "epoch": 0.47, + "grad_norm": 0.3809622337637473, + "learning_rate": 1.1558906891870167e-05, + "loss": 0.2872, "step": 10159 }, { - "epoch": 0.58, - "grad_norm": 0.36592149517228917, - "learning_rate": 7.79267777104937e-06, - "loss": 0.3129, + "epoch": 0.47, + "grad_norm": 0.5929966023069846, + "learning_rate": 1.1557437146228368e-05, + "loss": 0.2618, "step": 10160 }, { - "epoch": 0.58, - "grad_norm": 0.6464934747226485, - "learning_rate": 7.79086279711447e-06, - "loss": 0.3873, + "epoch": 0.47, + "grad_norm": 0.3767094534754204, + "learning_rate": 1.155596736610641e-05, + "loss": 0.264, "step": 10161 }, { - "epoch": 0.58, - "grad_norm": 0.32063480202839684, - "learning_rate": 7.789047899681893e-06, - "loss": 0.2247, + "epoch": 0.47, + "grad_norm": 0.3305448337736627, + "learning_rate": 1.1554497551536836e-05, + "loss": 0.2001, "step": 10162 }, { - "epoch": 0.58, - "grad_norm": 0.2778589463458745, - "learning_rate": 7.787233078814497e-06, - "loss": 0.2119, + "epoch": 0.47, + "grad_norm": 1.0091538655296923, + "learning_rate": 1.1553027702552184e-05, + "loss": 0.5113, "step": 10163 }, { - "epoch": 0.58, - "grad_norm": 0.3474062922703908, - "learning_rate": 7.785418334575122e-06, - "loss": 0.3213, + "epoch": 0.47, + "grad_norm": 0.37395529881074846, + "learning_rate": 1.1551557819184995e-05, + "loss": 0.3261, "step": 10164 }, { - "epoch": 0.58, - "grad_norm": 0.3227767768744291, - "learning_rate": 7.783603667026616e-06, - "loss": 0.2222, + "epoch": 0.47, + "grad_norm": 0.3275817512209674, + "learning_rate": 1.155008790146781e-05, + "loss": 0.194, "step": 10165 }, { - "epoch": 0.58, - "grad_norm": 0.7112062417756925, - "learning_rate": 7.781789076231815e-06, - "loss": 0.387, + "epoch": 0.47, + "grad_norm": 0.643705748089485, + "learning_rate": 1.1548617949433174e-05, + "loss": 0.4454, "step": 10166 }, { - "epoch": 0.58, - "grad_norm": 0.5940613791092809, - "learning_rate": 7.779974562253568e-06, - "loss": 0.3781, + "epoch": 0.47, + "grad_norm": 0.3176775556513125, + "learning_rate": 1.1547147963113629e-05, + "loss": 0.2036, "step": 10167 }, { - "epoch": 0.58, - "grad_norm": 0.24669557310007448, - "learning_rate": 7.778160125154702e-06, - "loss": 0.2217, + "epoch": 0.47, + "grad_norm": 0.38662721924187926, + "learning_rate": 1.154567794254172e-05, + "loss": 0.2001, "step": 10168 }, { - "epoch": 0.58, - "grad_norm": 1.3234491850879115, - "learning_rate": 7.776345764998059e-06, - "loss": 0.8241, + "epoch": 0.47, + "grad_norm": 0.42723856561523843, + "learning_rate": 1.1544207887749992e-05, + "loss": 0.3135, "step": 10169 }, { - "epoch": 0.58, - "grad_norm": 0.2135022809142091, - "learning_rate": 7.774531481846464e-06, - "loss": 0.1409, + "epoch": 0.47, + "grad_norm": 1.1742859992214958, + "learning_rate": 1.1542737798770989e-05, + "loss": 0.6796, "step": 10170 }, { - "epoch": 0.58, - "grad_norm": 0.33929896012752037, - "learning_rate": 7.77271727576275e-06, - "loss": 0.2906, + "epoch": 0.47, + "grad_norm": 0.3351483656920204, + "learning_rate": 1.1541267675637256e-05, + "loss": 0.2276, "step": 10171 }, { - "epoch": 0.58, - "grad_norm": 0.423664471917811, - "learning_rate": 7.770903146809738e-06, - "loss": 0.2725, + "epoch": 0.47, + "grad_norm": 0.4592562570752391, + "learning_rate": 1.1539797518381344e-05, + "loss": 0.3451, "step": 10172 }, { - "epoch": 0.58, - "grad_norm": 0.6369728062982355, - "learning_rate": 7.769089095050258e-06, - "loss": 0.3813, + "epoch": 0.47, + "grad_norm": 0.3768267462197124, + "learning_rate": 1.1538327327035799e-05, + "loss": 0.2628, "step": 10173 }, { - "epoch": 0.58, - "grad_norm": 0.3777810720627077, - "learning_rate": 7.767275120547123e-06, - "loss": 0.2917, + "epoch": 0.47, + "grad_norm": 0.3028772591952371, + "learning_rate": 1.1536857101633168e-05, + "loss": 0.2216, "step": 10174 }, { - "epoch": 0.58, - "grad_norm": 0.24151161950766775, - "learning_rate": 7.765461223363158e-06, - "loss": 0.1624, + "epoch": 0.47, + "grad_norm": 0.8891457891304007, + "learning_rate": 1.1535386842206006e-05, + "loss": 0.3362, "step": 10175 }, { - "epoch": 0.58, - "grad_norm": 0.26233751290851975, - "learning_rate": 7.76364740356117e-06, - "loss": 0.2287, + "epoch": 0.47, + "grad_norm": 0.38591006043636733, + "learning_rate": 1.1533916548786856e-05, + "loss": 0.3243, "step": 10176 }, { - "epoch": 0.58, - "grad_norm": 0.4027707555176561, - "learning_rate": 7.76183366120398e-06, - "loss": 0.333, + "epoch": 0.47, + "grad_norm": 0.38706503699571043, + "learning_rate": 1.1532446221408274e-05, + "loss": 0.2729, "step": 10177 }, { - "epoch": 0.58, - "grad_norm": 0.9316383400032427, - "learning_rate": 7.760019996354396e-06, - "loss": 0.3163, + "epoch": 0.47, + "grad_norm": 0.6405423489008124, + "learning_rate": 1.1530975860102805e-05, + "loss": 0.3426, "step": 10178 }, { - "epoch": 0.58, - "grad_norm": 0.900838231219768, - "learning_rate": 7.75820640907522e-06, - "loss": 0.6042, + "epoch": 0.47, + "grad_norm": 0.32671437326514413, + "learning_rate": 1.152950546490301e-05, + "loss": 0.1948, "step": 10179 }, { - "epoch": 0.58, - "grad_norm": 0.26204651119783984, - "learning_rate": 7.75639289942926e-06, - "loss": 0.2604, + "epoch": 0.47, + "grad_norm": 0.4362351742549119, + "learning_rate": 1.1528035035841438e-05, + "loss": 0.2797, "step": 10180 }, { - "epoch": 0.58, - "grad_norm": 0.4479463747107403, - "learning_rate": 7.754579467479318e-06, - "loss": 0.2993, + "epoch": 0.47, + "grad_norm": 0.3217913873289673, + "learning_rate": 1.1526564572950643e-05, + "loss": 0.2483, "step": 10181 }, { - "epoch": 0.59, - "grad_norm": 0.33894552794765487, - "learning_rate": 7.752766113288192e-06, - "loss": 0.1901, + "epoch": 0.47, + "grad_norm": 0.6902061746318561, + "learning_rate": 1.1525094076263177e-05, + "loss": 0.4961, "step": 10182 }, { - "epoch": 0.59, - "grad_norm": 0.3762907960074976, - "learning_rate": 7.750952836918679e-06, - "loss": 0.2689, + "epoch": 0.47, + "grad_norm": 0.42200558154878137, + "learning_rate": 1.1523623545811603e-05, + "loss": 0.2739, "step": 10183 }, { - "epoch": 0.59, - "grad_norm": 0.414454215601207, - "learning_rate": 7.749139638433573e-06, - "loss": 0.3139, + "epoch": 0.47, + "grad_norm": 0.31679117784970634, + "learning_rate": 1.1522152981628465e-05, + "loss": 0.2676, "step": 10184 }, { - "epoch": 0.59, - "grad_norm": 0.586844077895301, - "learning_rate": 7.747326517895662e-06, - "loss": 0.135, + "epoch": 0.47, + "grad_norm": 1.2663529047614424, + "learning_rate": 1.1520682383746334e-05, + "loss": 0.651, "step": 10185 }, { - "epoch": 0.59, - "grad_norm": 0.32530862088865625, - "learning_rate": 7.74551347536774e-06, - "loss": 0.2699, + "epoch": 0.47, + "grad_norm": 0.24770425953787112, + "learning_rate": 1.151921175219776e-05, + "loss": 0.1583, "step": 10186 }, { - "epoch": 0.59, - "grad_norm": 1.1672584420814525, - "learning_rate": 7.743700510912588e-06, - "loss": 0.6975, + "epoch": 0.47, + "grad_norm": 0.5926337662263135, + "learning_rate": 1.1517741087015297e-05, + "loss": 0.3936, "step": 10187 }, { - "epoch": 0.59, - "grad_norm": 0.24495780308060752, - "learning_rate": 7.741887624592992e-06, - "loss": 0.2082, + "epoch": 0.47, + "grad_norm": 0.36496979234608184, + "learning_rate": 1.1516270388231513e-05, + "loss": 0.2693, "step": 10188 }, { - "epoch": 0.59, - "grad_norm": 0.36084956108999233, - "learning_rate": 7.740074816471727e-06, - "loss": 0.277, + "epoch": 0.47, + "grad_norm": 0.3565303636247562, + "learning_rate": 1.1514799655878964e-05, + "loss": 0.3038, "step": 10189 }, { - "epoch": 0.59, - "grad_norm": 0.40954337704045063, - "learning_rate": 7.738262086611578e-06, - "loss": 0.2866, + "epoch": 0.47, + "grad_norm": 0.7211374844523986, + "learning_rate": 1.151332888999021e-05, + "loss": 0.5078, "step": 10190 }, { - "epoch": 0.59, - "grad_norm": 0.36418034114752734, - "learning_rate": 7.736449435075314e-06, - "loss": 0.2365, + "epoch": 0.47, + "grad_norm": 0.29169914698755656, + "learning_rate": 1.151185809059781e-05, + "loss": 0.1417, "step": 10191 }, { - "epoch": 0.59, - "grad_norm": 0.35156018638606407, - "learning_rate": 7.734636861925706e-06, - "loss": 0.2822, + "epoch": 0.47, + "grad_norm": 0.2848598452822274, + "learning_rate": 1.1510387257734332e-05, + "loss": 0.2144, "step": 10192 }, { - "epoch": 0.59, - "grad_norm": 0.4751770010443096, - "learning_rate": 7.732824367225531e-06, - "loss": 0.3951, + "epoch": 0.47, + "grad_norm": 0.4000164331422373, + "learning_rate": 1.1508916391432337e-05, + "loss": 0.3232, "step": 10193 }, { - "epoch": 0.59, - "grad_norm": 0.6841245197795643, - "learning_rate": 7.731011951037547e-06, - "loss": 0.2865, + "epoch": 0.47, + "grad_norm": 0.6592543777555149, + "learning_rate": 1.1507445491724387e-05, + "loss": 0.3274, "step": 10194 }, { - "epoch": 0.59, - "grad_norm": 0.3483252023779255, - "learning_rate": 7.729199613424523e-06, - "loss": 0.2767, + "epoch": 0.47, + "grad_norm": 0.3744945105204196, + "learning_rate": 1.1505974558643045e-05, + "loss": 0.2813, "step": 10195 }, { - "epoch": 0.59, - "grad_norm": 0.24313842780450307, - "learning_rate": 7.727387354449217e-06, - "loss": 0.2301, + "epoch": 0.47, + "grad_norm": 0.48982917350955646, + "learning_rate": 1.1504503592220879e-05, + "loss": 0.3489, "step": 10196 }, { - "epoch": 0.59, - "grad_norm": 1.0930305495887416, - "learning_rate": 7.725575174174395e-06, - "loss": 0.6972, + "epoch": 0.47, + "grad_norm": 0.34494703773002466, + "learning_rate": 1.150303259249045e-05, + "loss": 0.2195, "step": 10197 }, { - "epoch": 0.59, - "grad_norm": 0.28099806807697314, - "learning_rate": 7.723763072662804e-06, - "loss": 0.2012, + "epoch": 0.47, + "grad_norm": 0.3435925037013483, + "learning_rate": 1.1501561559484334e-05, + "loss": 0.1931, "step": 10198 }, { - "epoch": 0.59, - "grad_norm": 0.8655932878922294, - "learning_rate": 7.721951049977196e-06, - "loss": 0.4398, + "epoch": 0.47, + "grad_norm": 0.4365245082354594, + "learning_rate": 1.1500090493235088e-05, + "loss": 0.3136, "step": 10199 }, { - "epoch": 0.59, - "grad_norm": 0.36319675934158235, - "learning_rate": 7.72013910618033e-06, - "loss": 0.3273, + "epoch": 0.47, + "grad_norm": 0.491218393856028, + "learning_rate": 1.1498619393775287e-05, + "loss": 0.3142, "step": 10200 }, { - "epoch": 0.59, - "grad_norm": 0.31370298470256497, - "learning_rate": 7.718327241334944e-06, - "loss": 0.2333, + "epoch": 0.47, + "grad_norm": 0.3744755480645695, + "learning_rate": 1.1497148261137495e-05, + "loss": 0.2145, "step": 10201 }, { - "epoch": 0.59, - "grad_norm": 0.2954558356084548, - "learning_rate": 7.716515455503791e-06, - "loss": 0.201, + "epoch": 0.47, + "grad_norm": 0.5684253867297732, + "learning_rate": 1.1495677095354283e-05, + "loss": 0.4221, "step": 10202 }, { - "epoch": 0.59, - "grad_norm": 0.4505771810527777, - "learning_rate": 7.71470374874961e-06, - "loss": 0.322, + "epoch": 0.47, + "grad_norm": 0.8930526729422205, + "learning_rate": 1.1494205896458224e-05, + "loss": 0.5626, "step": 10203 }, { - "epoch": 0.59, - "grad_norm": 0.27661425856954885, - "learning_rate": 7.712892121135136e-06, - "loss": 0.2182, + "epoch": 0.47, + "grad_norm": 0.23963355279530044, + "learning_rate": 1.1492734664481886e-05, + "loss": 0.1649, "step": 10204 }, { - "epoch": 0.59, - "grad_norm": 1.2487777153308748, - "learning_rate": 7.711080572723113e-06, - "loss": 0.7325, + "epoch": 0.47, + "grad_norm": 0.29581266240009396, + "learning_rate": 1.1491263399457841e-05, + "loss": 0.2571, "step": 10205 }, { - "epoch": 0.59, - "grad_norm": 0.7749108515324832, - "learning_rate": 7.709269103576269e-06, - "loss": 0.3924, + "epoch": 0.47, + "grad_norm": 0.7106020016739378, + "learning_rate": 1.148979210141866e-05, + "loss": 0.4159, "step": 10206 }, { - "epoch": 0.59, - "grad_norm": 0.31900576203015873, - "learning_rate": 7.70745771375734e-06, - "loss": 0.1904, + "epoch": 0.47, + "grad_norm": 0.36174752534838955, + "learning_rate": 1.1488320770396919e-05, + "loss": 0.2142, "step": 10207 }, { - "epoch": 0.59, - "grad_norm": 0.3050746601489578, - "learning_rate": 7.70564640332905e-06, - "loss": 0.2411, + "epoch": 0.47, + "grad_norm": 0.3289790523624033, + "learning_rate": 1.1486849406425189e-05, + "loss": 0.2841, "step": 10208 }, { - "epoch": 0.59, - "grad_norm": 0.4378828524958338, - "learning_rate": 7.703835172354127e-06, - "loss": 0.251, + "epoch": 0.47, + "grad_norm": 1.1218822824462256, + "learning_rate": 1.148537800953605e-05, + "loss": 0.474, "step": 10209 }, { - "epoch": 0.59, - "grad_norm": 0.32933250069623154, - "learning_rate": 7.702024020895292e-06, - "loss": 0.241, + "epoch": 0.47, + "grad_norm": 0.2687911136569279, + "learning_rate": 1.1483906579762072e-05, + "loss": 0.1649, "step": 10210 }, { - "epoch": 0.59, - "grad_norm": 0.8241027953814687, - "learning_rate": 7.70021294901527e-06, - "loss": 0.3369, + "epoch": 0.47, + "grad_norm": 0.5015248941606871, + "learning_rate": 1.148243511713583e-05, + "loss": 0.295, "step": 10211 }, { - "epoch": 0.59, - "grad_norm": 0.3543263899051379, - "learning_rate": 7.69840195677677e-06, - "loss": 0.3201, + "epoch": 0.47, + "grad_norm": 0.37488584104066136, + "learning_rate": 1.1480963621689904e-05, + "loss": 0.3237, "step": 10212 }, { - "epoch": 0.59, - "grad_norm": 0.4099252481168575, - "learning_rate": 7.696591044242513e-06, - "loss": 0.3004, + "epoch": 0.47, + "grad_norm": 0.43109118287211484, + "learning_rate": 1.1479492093456874e-05, + "loss": 0.3259, "step": 10213 }, { - "epoch": 0.59, - "grad_norm": 0.2820915527848439, - "learning_rate": 7.694780211475209e-06, - "loss": 0.166, + "epoch": 0.47, + "grad_norm": 0.5191117791983719, + "learning_rate": 1.1478020532469311e-05, + "loss": 0.285, "step": 10214 }, { - "epoch": 0.59, - "grad_norm": 0.3524674573849591, - "learning_rate": 7.692969458537568e-06, - "loss": 0.3248, + "epoch": 0.47, + "grad_norm": 1.648772730083126, + "learning_rate": 1.1476548938759803e-05, + "loss": 0.6368, "step": 10215 }, { - "epoch": 0.59, - "grad_norm": 0.43080167749690207, - "learning_rate": 7.691158785492294e-06, - "loss": 0.3089, + "epoch": 0.47, + "grad_norm": 0.33326809680769903, + "learning_rate": 1.1475077312360921e-05, + "loss": 0.2618, "step": 10216 }, { - "epoch": 0.59, - "grad_norm": 0.6922394291237297, - "learning_rate": 7.689348192402095e-06, - "loss": 0.2959, + "epoch": 0.47, + "grad_norm": 0.33912314075181815, + "learning_rate": 1.1473605653305248e-05, + "loss": 0.209, "step": 10217 }, { - "epoch": 0.59, - "grad_norm": 0.9169189348266817, - "learning_rate": 7.687537679329668e-06, - "loss": 0.507, + "epoch": 0.47, + "grad_norm": 0.40825646366260115, + "learning_rate": 1.1472133961625368e-05, + "loss": 0.3281, "step": 10218 }, { - "epoch": 0.59, - "grad_norm": 0.3090444070036527, - "learning_rate": 7.685727246337709e-06, - "loss": 0.2482, + "epoch": 0.47, + "grad_norm": 0.9115765089233979, + "learning_rate": 1.147066223735386e-05, + "loss": 0.5649, "step": 10219 }, { - "epoch": 0.59, - "grad_norm": 0.26794700844517866, - "learning_rate": 7.683916893488918e-06, - "loss": 0.2302, + "epoch": 0.47, + "grad_norm": 0.2647785821512999, + "learning_rate": 1.1469190480523308e-05, + "loss": 0.234, "step": 10220 }, { - "epoch": 0.59, - "grad_norm": 0.29467839115121064, - "learning_rate": 7.682106620845984e-06, - "loss": 0.1529, + "epoch": 0.47, + "grad_norm": 1.0759309611842651, + "learning_rate": 1.1467718691166296e-05, + "loss": 0.6602, "step": 10221 }, { - "epoch": 0.59, - "grad_norm": 0.35073917502149066, - "learning_rate": 7.6802964284716e-06, - "loss": 0.2598, + "epoch": 0.47, + "grad_norm": 0.58331837780062, + "learning_rate": 1.1466246869315407e-05, + "loss": 0.3291, "step": 10222 }, { - "epoch": 0.59, - "grad_norm": 1.202371112662339, - "learning_rate": 7.678486316428449e-06, - "loss": 0.4024, + "epoch": 0.47, + "grad_norm": 0.3630081565728696, + "learning_rate": 1.1464775015003223e-05, + "loss": 0.2737, "step": 10223 }, { - "epoch": 0.59, - "grad_norm": 0.3608808093489747, - "learning_rate": 7.676676284779217e-06, - "loss": 0.276, + "epoch": 0.47, + "grad_norm": 0.3375725919254272, + "learning_rate": 1.1463303128262332e-05, + "loss": 0.2627, "step": 10224 }, { - "epoch": 0.59, - "grad_norm": 0.32842709343076865, - "learning_rate": 7.674866333586586e-06, - "loss": 0.2837, + "epoch": 0.47, + "grad_norm": 0.3891784965956488, + "learning_rate": 1.146183120912532e-05, + "loss": 0.2516, "step": 10225 }, { - "epoch": 0.59, - "grad_norm": 0.9377059450455212, - "learning_rate": 7.673056462913235e-06, - "loss": 0.5578, + "epoch": 0.47, + "grad_norm": 0.4823553011029802, + "learning_rate": 1.1460359257624775e-05, + "loss": 0.3168, "step": 10226 }, { - "epoch": 0.59, - "grad_norm": 0.22635475620796228, - "learning_rate": 7.671246672821837e-06, - "loss": 0.1744, + "epoch": 0.47, + "grad_norm": 0.8738341770979363, + "learning_rate": 1.1458887273793284e-05, + "loss": 0.3351, "step": 10227 }, { - "epoch": 0.59, - "grad_norm": 0.34285947837407843, - "learning_rate": 7.669436963375067e-06, - "loss": 0.2717, + "epoch": 0.47, + "grad_norm": 0.33517463305405243, + "learning_rate": 1.1457415257663436e-05, + "loss": 0.2987, "step": 10228 }, { - "epoch": 0.59, - "grad_norm": 1.1313131442190563, - "learning_rate": 7.667627334635595e-06, - "loss": 0.46, + "epoch": 0.47, + "grad_norm": 0.40067792989432044, + "learning_rate": 1.1455943209267817e-05, + "loss": 0.292, "step": 10229 }, { - "epoch": 0.59, - "grad_norm": 0.8438766470568757, - "learning_rate": 7.665817786666088e-06, - "loss": 0.3498, + "epoch": 0.47, + "grad_norm": 0.29907360793259075, + "learning_rate": 1.145447112863902e-05, + "loss": 0.1861, "step": 10230 }, { - "epoch": 0.59, - "grad_norm": 0.4222669207775579, - "learning_rate": 7.664008319529215e-06, - "loss": 0.3328, + "epoch": 0.47, + "grad_norm": 0.3974779189034825, + "learning_rate": 1.1452999015809629e-05, + "loss": 0.2666, "step": 10231 }, { - "epoch": 0.59, - "grad_norm": 0.3097461985519247, - "learning_rate": 7.66219893328763e-06, - "loss": 0.2693, + "epoch": 0.47, + "grad_norm": 0.4168640175920408, + "learning_rate": 1.1451526870812247e-05, + "loss": 0.3396, "step": 10232 }, { - "epoch": 0.59, - "grad_norm": 0.4686433548216791, - "learning_rate": 7.660389628003993e-06, - "loss": 0.3129, + "epoch": 0.47, + "grad_norm": 0.4938468291372108, + "learning_rate": 1.1450054693679455e-05, + "loss": 0.2815, "step": 10233 }, { - "epoch": 0.59, - "grad_norm": 0.42214270318871294, - "learning_rate": 7.658580403740965e-06, - "loss": 0.2034, + "epoch": 0.47, + "grad_norm": 0.49496518119075433, + "learning_rate": 1.1448582484443853e-05, + "loss": 0.3112, "step": 10234 }, { - "epoch": 0.59, - "grad_norm": 0.5099855157917578, - "learning_rate": 7.656771260561195e-06, - "loss": 0.3126, + "epoch": 0.47, + "grad_norm": 0.40971457986124626, + "learning_rate": 1.1447110243138029e-05, + "loss": 0.3512, "step": 10235 }, { - "epoch": 0.59, - "grad_norm": 0.2866231632582778, - "learning_rate": 7.654962198527338e-06, - "loss": 0.2458, + "epoch": 0.47, + "grad_norm": 0.28601452017274603, + "learning_rate": 1.1445637969794578e-05, + "loss": 0.2, "step": 10236 }, { - "epoch": 0.59, - "grad_norm": 0.2955888837904505, - "learning_rate": 7.653153217702036e-06, - "loss": 0.1982, + "epoch": 0.47, + "grad_norm": 0.8234833694499625, + "learning_rate": 1.1444165664446097e-05, + "loss": 0.5126, "step": 10237 }, { - "epoch": 0.59, - "grad_norm": 0.38594128734646455, - "learning_rate": 7.651344318147941e-06, - "loss": 0.24, + "epoch": 0.47, + "grad_norm": 0.2910269256410644, + "learning_rate": 1.1442693327125179e-05, + "loss": 0.2373, "step": 10238 }, { - "epoch": 0.59, - "grad_norm": 0.493756437636117, - "learning_rate": 7.649535499927688e-06, - "loss": 0.3752, + "epoch": 0.47, + "grad_norm": 0.445956859376595, + "learning_rate": 1.1441220957864421e-05, + "loss": 0.3613, "step": 10239 }, { - "epoch": 0.59, - "grad_norm": 0.25207716691183124, - "learning_rate": 7.647726763103923e-06, - "loss": 0.2299, + "epoch": 0.47, + "grad_norm": 0.4212051157676677, + "learning_rate": 1.1439748556696422e-05, + "loss": 0.2633, "step": 10240 }, { - "epoch": 0.59, - "grad_norm": 0.7780560932689861, - "learning_rate": 7.645918107739274e-06, - "loss": 0.4704, + "epoch": 0.47, + "grad_norm": 0.4105376894196895, + "learning_rate": 1.1438276123653777e-05, + "loss": 0.2974, "step": 10241 }, { - "epoch": 0.59, - "grad_norm": 0.3710784879455275, - "learning_rate": 7.644109533896384e-06, - "loss": 0.2544, + "epoch": 0.47, + "grad_norm": 0.44955678308112823, + "learning_rate": 1.1436803658769082e-05, + "loss": 0.25, "step": 10242 }, { - "epoch": 0.59, - "grad_norm": 0.28552083656844823, - "learning_rate": 7.642301041637879e-06, - "loss": 0.2177, + "epoch": 0.47, + "grad_norm": 0.4034492310112792, + "learning_rate": 1.1435331162074944e-05, + "loss": 0.2406, "step": 10243 }, { - "epoch": 0.59, - "grad_norm": 0.47267057079111247, - "learning_rate": 7.640492631026387e-06, - "loss": 0.3323, + "epoch": 0.47, + "grad_norm": 0.2614146797009579, + "learning_rate": 1.143385863360395e-05, + "loss": 0.2495, "step": 10244 }, { - "epoch": 0.59, - "grad_norm": 0.7320353696301287, - "learning_rate": 7.638684302124533e-06, - "loss": 0.4111, + "epoch": 0.47, + "grad_norm": 0.9287387766195588, + "learning_rate": 1.1432386073388718e-05, + "loss": 0.5735, "step": 10245 }, { - "epoch": 0.59, - "grad_norm": 0.4219758171502595, - "learning_rate": 7.63687605499494e-06, - "loss": 0.2955, + "epoch": 0.47, + "grad_norm": 0.4761256895277955, + "learning_rate": 1.1430913481461831e-05, + "loss": 0.2654, "step": 10246 }, { - "epoch": 0.59, - "grad_norm": 0.36371007108240666, - "learning_rate": 7.635067889700228e-06, - "loss": 0.2585, + "epoch": 0.47, + "grad_norm": 0.41709491754546196, + "learning_rate": 1.1429440857855903e-05, + "loss": 0.3101, "step": 10247 }, { - "epoch": 0.59, - "grad_norm": 0.24237484085577543, - "learning_rate": 7.633259806303012e-06, - "loss": 0.2022, + "epoch": 0.47, + "grad_norm": 0.39695366596301934, + "learning_rate": 1.142796820260353e-05, + "loss": 0.3024, "step": 10248 }, { - "epoch": 0.59, - "grad_norm": 0.5130982347891825, - "learning_rate": 7.63145180486591e-06, - "loss": 0.2591, + "epoch": 0.47, + "grad_norm": 0.42567568059294664, + "learning_rate": 1.1426495515737314e-05, + "loss": 0.2345, "step": 10249 }, { - "epoch": 0.59, - "grad_norm": 0.3943639466503955, - "learning_rate": 7.629643885451527e-06, - "loss": 0.212, + "epoch": 0.47, + "grad_norm": 0.3062563473505486, + "learning_rate": 1.1425022797289869e-05, + "loss": 0.2184, "step": 10250 }, { - "epoch": 0.59, - "grad_norm": 0.42780601563784476, - "learning_rate": 7.627836048122477e-06, - "loss": 0.3231, + "epoch": 0.47, + "grad_norm": 0.4846357718123354, + "learning_rate": 1.142355004729379e-05, + "loss": 0.2796, "step": 10251 }, { - "epoch": 0.59, - "grad_norm": 0.5485089590488623, - "learning_rate": 7.626028292941361e-06, - "loss": 0.3429, + "epoch": 0.47, + "grad_norm": 0.4793455563062799, + "learning_rate": 1.1422077265781684e-05, + "loss": 0.3047, "step": 10252 }, { - "epoch": 0.59, - "grad_norm": 0.38528395368808604, - "learning_rate": 7.624220619970784e-06, - "loss": 0.2701, + "epoch": 0.47, + "grad_norm": 0.37087493666425386, + "learning_rate": 1.1420604452786158e-05, + "loss": 0.2371, "step": 10253 }, { - "epoch": 0.59, - "grad_norm": 0.27098911193826375, - "learning_rate": 7.622413029273343e-06, - "loss": 0.1638, + "epoch": 0.47, + "grad_norm": 1.2210868350158257, + "learning_rate": 1.1419131608339817e-05, + "loss": 0.6439, "step": 10254 }, { - "epoch": 0.59, - "grad_norm": 0.301421858486742, - "learning_rate": 7.62060552091164e-06, - "loss": 0.2811, + "epoch": 0.47, + "grad_norm": 0.4855698945279773, + "learning_rate": 1.1417658732475272e-05, + "loss": 0.3809, "step": 10255 }, { - "epoch": 0.59, - "grad_norm": 0.3772924937493043, - "learning_rate": 7.618798094948262e-06, - "loss": 0.2382, + "epoch": 0.47, + "grad_norm": 0.2637302181082653, + "learning_rate": 1.1416185825225128e-05, + "loss": 0.2055, "step": 10256 }, { - "epoch": 0.59, - "grad_norm": 0.6013924440847807, - "learning_rate": 7.616990751445806e-06, - "loss": 0.4171, + "epoch": 0.47, + "grad_norm": 0.4818568528380723, + "learning_rate": 1.1414712886621997e-05, + "loss": 0.3007, "step": 10257 }, { - "epoch": 0.59, - "grad_norm": 0.39066029970772426, - "learning_rate": 7.615183490466858e-06, - "loss": 0.2633, + "epoch": 0.47, + "grad_norm": 0.4345055090756826, + "learning_rate": 1.1413239916698486e-05, + "loss": 0.2686, "step": 10258 }, { - "epoch": 0.59, - "grad_norm": 0.36730340180357374, - "learning_rate": 7.613376312074001e-06, - "loss": 0.3297, + "epoch": 0.47, + "grad_norm": 0.38136010183540703, + "learning_rate": 1.14117669154872e-05, + "loss": 0.2156, "step": 10259 }, { - "epoch": 0.59, - "grad_norm": 0.2195929862317336, - "learning_rate": 7.611569216329821e-06, - "loss": 0.1535, + "epoch": 0.47, + "grad_norm": 0.3898164021292159, + "learning_rate": 1.1410293883020762e-05, + "loss": 0.3046, "step": 10260 }, { - "epoch": 0.59, - "grad_norm": 0.40569141532500297, - "learning_rate": 7.609762203296896e-06, - "loss": 0.2855, + "epoch": 0.47, + "grad_norm": 1.0238676149240145, + "learning_rate": 1.1408820819331771e-05, + "loss": 0.663, "step": 10261 }, { - "epoch": 0.59, - "grad_norm": 0.566404154555934, - "learning_rate": 7.607955273037804e-06, - "loss": 0.4164, + "epoch": 0.47, + "grad_norm": 0.33782345567894206, + "learning_rate": 1.140734772445285e-05, + "loss": 0.2364, "step": 10262 }, { - "epoch": 0.59, - "grad_norm": 0.3857204802136643, - "learning_rate": 7.606148425615117e-06, - "loss": 0.2772, + "epoch": 0.47, + "grad_norm": 0.4340291922830695, + "learning_rate": 1.1405874598416608e-05, + "loss": 0.3574, "step": 10263 }, { - "epoch": 0.59, - "grad_norm": 0.608997704118891, - "learning_rate": 7.604341661091409e-06, - "loss": 0.3627, + "epoch": 0.47, + "grad_norm": 0.27849993533083767, + "learning_rate": 1.1404401441255652e-05, + "loss": 0.1976, "step": 10264 }, { - "epoch": 0.59, - "grad_norm": 0.4045959883896538, - "learning_rate": 7.602534979529246e-06, - "loss": 0.3597, + "epoch": 0.47, + "grad_norm": 0.4449873625792461, + "learning_rate": 1.1402928253002606e-05, + "loss": 0.2765, "step": 10265 }, { - "epoch": 0.59, - "grad_norm": 0.20847234205880777, - "learning_rate": 7.600728380991191e-06, - "loss": 0.1583, + "epoch": 0.47, + "grad_norm": 1.0691891015940598, + "learning_rate": 1.1401455033690076e-05, + "loss": 0.3985, "step": 10266 }, { - "epoch": 0.59, - "grad_norm": 0.39509917853189525, - "learning_rate": 7.598921865539811e-06, - "loss": 0.2954, + "epoch": 0.47, + "grad_norm": 0.34771383805519146, + "learning_rate": 1.1399981783350685e-05, + "loss": 0.3077, "step": 10267 }, { - "epoch": 0.59, - "grad_norm": 0.33854641736778623, - "learning_rate": 7.597115433237664e-06, - "loss": 0.2913, + "epoch": 0.47, + "grad_norm": 0.39605919001193546, + "learning_rate": 1.1398508502017047e-05, + "loss": 0.2845, "step": 10268 }, { - "epoch": 0.59, - "grad_norm": 0.5218405160890431, - "learning_rate": 7.5953090841473035e-06, - "loss": 0.3258, + "epoch": 0.47, + "grad_norm": 0.6816193445514331, + "learning_rate": 1.1397035189721779e-05, + "loss": 0.3356, "step": 10269 }, { - "epoch": 0.59, - "grad_norm": 0.5757741501759367, - "learning_rate": 7.593502818331289e-06, - "loss": 0.3423, + "epoch": 0.47, + "grad_norm": 0.31991346895367145, + "learning_rate": 1.1395561846497495e-05, + "loss": 0.2039, "step": 10270 }, { - "epoch": 0.59, - "grad_norm": 0.2601837400754058, - "learning_rate": 7.5916966358521645e-06, - "loss": 0.2893, + "epoch": 0.47, + "grad_norm": 0.35895653795127924, + "learning_rate": 1.139408847237682e-05, + "loss": 0.248, "step": 10271 }, { - "epoch": 0.59, - "grad_norm": 0.43141156338626285, - "learning_rate": 7.589890536772486e-06, - "loss": 0.2517, + "epoch": 0.47, + "grad_norm": 0.3809574960695648, + "learning_rate": 1.1392615067392368e-05, + "loss": 0.2787, "step": 10272 }, { - "epoch": 0.59, - "grad_norm": 0.2912064861117238, - "learning_rate": 7.588084521154791e-06, - "loss": 0.1262, + "epoch": 0.47, + "grad_norm": 0.6639648479341921, + "learning_rate": 1.1391141631576762e-05, + "loss": 0.4803, "step": 10273 }, { - "epoch": 0.59, - "grad_norm": 0.39090096704594307, - "learning_rate": 7.586278589061628e-06, - "loss": 0.3112, + "epoch": 0.47, + "grad_norm": 0.3423545991517534, + "learning_rate": 1.1389668164962622e-05, + "loss": 0.2647, "step": 10274 }, { - "epoch": 0.59, - "grad_norm": 0.50511253032244, - "learning_rate": 7.584472740555533e-06, - "loss": 0.3057, + "epoch": 0.47, + "grad_norm": 0.31776632972741903, + "learning_rate": 1.1388194667582565e-05, + "loss": 0.2879, "step": 10275 }, { - "epoch": 0.59, - "grad_norm": 0.3452619407210834, - "learning_rate": 7.582666975699043e-06, - "loss": 0.1574, + "epoch": 0.47, + "grad_norm": 0.2779867389835172, + "learning_rate": 1.138672113946922e-05, + "loss": 0.161, "step": 10276 }, { - "epoch": 0.59, - "grad_norm": 0.37024431661053503, - "learning_rate": 7.5808612945546915e-06, - "loss": 0.3077, + "epoch": 0.47, + "grad_norm": 0.36519260585574503, + "learning_rate": 1.1385247580655203e-05, + "loss": 0.2814, "step": 10277 }, { - "epoch": 0.59, - "grad_norm": 0.44955413310834985, - "learning_rate": 7.5790556971850095e-06, - "loss": 0.2907, + "epoch": 0.47, + "grad_norm": 0.9885177349014758, + "learning_rate": 1.138377399117314e-05, + "loss": 0.6528, "step": 10278 }, { - "epoch": 0.59, - "grad_norm": 0.22616688185308217, - "learning_rate": 7.577250183652523e-06, - "loss": 0.1954, + "epoch": 0.47, + "grad_norm": 0.43579339112583904, + "learning_rate": 1.1382300371055655e-05, + "loss": 0.2766, "step": 10279 }, { - "epoch": 0.59, - "grad_norm": 0.4488134009283226, - "learning_rate": 7.575444754019762e-06, - "loss": 0.3517, + "epoch": 0.47, + "grad_norm": 0.39417927095653055, + "learning_rate": 1.1380826720335372e-05, + "loss": 0.2855, "step": 10280 }, { - "epoch": 0.59, - "grad_norm": 0.6806739153782752, - "learning_rate": 7.5736394083492414e-06, - "loss": 0.4589, + "epoch": 0.47, + "grad_norm": 0.8743122534537983, + "learning_rate": 1.1379353039044915e-05, + "loss": 0.6, "step": 10281 }, { - "epoch": 0.59, - "grad_norm": 0.38788290607606046, - "learning_rate": 7.571834146703486e-06, - "loss": 0.1904, + "epoch": 0.47, + "grad_norm": 0.2000181944542249, + "learning_rate": 1.1377879327216914e-05, + "loss": 0.0901, "step": 10282 }, { - "epoch": 0.59, - "grad_norm": 0.3014316610003963, - "learning_rate": 7.57002896914501e-06, - "loss": 0.2887, + "epoch": 0.47, + "grad_norm": 0.3458166170039005, + "learning_rate": 1.1376405584883989e-05, + "loss": 0.2575, "step": 10283 }, { - "epoch": 0.59, - "grad_norm": 0.4034575913681455, - "learning_rate": 7.568223875736325e-06, - "loss": 0.2604, + "epoch": 0.47, + "grad_norm": 0.39648603757402784, + "learning_rate": 1.1374931812078774e-05, + "loss": 0.3212, "step": 10284 }, { - "epoch": 0.59, - "grad_norm": 0.6416524913164146, - "learning_rate": 7.566418866539944e-06, - "loss": 0.3879, + "epoch": 0.47, + "grad_norm": 0.6135255567182756, + "learning_rate": 1.137345800883389e-05, + "loss": 0.3187, "step": 10285 }, { - "epoch": 0.59, - "grad_norm": 0.23814333645697722, - "learning_rate": 7.5646139416183705e-06, - "loss": 0.1818, + "epoch": 0.47, + "grad_norm": 0.3716330568238397, + "learning_rate": 1.1371984175181975e-05, + "loss": 0.2929, "step": 10286 }, { - "epoch": 0.59, - "grad_norm": 0.35517257625022275, - "learning_rate": 7.562809101034114e-06, - "loss": 0.3117, + "epoch": 0.47, + "grad_norm": 0.5060002001710548, + "learning_rate": 1.1370510311155649e-05, + "loss": 0.3895, "step": 10287 }, { - "epoch": 0.59, - "grad_norm": 1.2153446890355475, - "learning_rate": 7.56100434484967e-06, - "loss": 0.7712, + "epoch": 0.47, + "grad_norm": 0.25080205312299664, + "learning_rate": 1.1369036416787547e-05, + "loss": 0.1821, "step": 10288 }, { - "epoch": 0.59, - "grad_norm": 0.30753262429141587, - "learning_rate": 7.559199673127545e-06, - "loss": 0.1986, + "epoch": 0.47, + "grad_norm": 0.3124648326795265, + "learning_rate": 1.1367562492110295e-05, + "loss": 0.1889, "step": 10289 }, { - "epoch": 0.59, - "grad_norm": 0.7300295591145592, - "learning_rate": 7.557395085930227e-06, - "loss": 0.4217, + "epoch": 0.47, + "grad_norm": 0.69978826119039, + "learning_rate": 1.1366088537156531e-05, + "loss": 0.46, "step": 10290 }, { - "epoch": 0.59, - "grad_norm": 0.32301624520605615, - "learning_rate": 7.555590583320214e-06, - "loss": 0.2986, + "epoch": 0.47, + "grad_norm": 0.37295039236498245, + "learning_rate": 1.136461455195888e-05, + "loss": 0.3156, "step": 10291 }, { - "epoch": 0.59, - "grad_norm": 0.255707287279116, - "learning_rate": 7.553786165359993e-06, - "loss": 0.1879, + "epoch": 0.47, + "grad_norm": 0.33243915659654566, + "learning_rate": 1.1363140536549985e-05, + "loss": 0.2319, "step": 10292 }, { - "epoch": 0.59, - "grad_norm": 0.41385565707671423, - "learning_rate": 7.551981832112054e-06, - "loss": 0.2826, + "epoch": 0.47, + "grad_norm": 0.9001388826056123, + "learning_rate": 1.1361666490962468e-05, + "loss": 0.6388, "step": 10293 }, { - "epoch": 0.59, - "grad_norm": 0.38640746910827395, - "learning_rate": 7.550177583638876e-06, - "loss": 0.3198, + "epoch": 0.47, + "grad_norm": 0.297744693106454, + "learning_rate": 1.1360192415228966e-05, + "loss": 0.1802, "step": 10294 }, { - "epoch": 0.59, - "grad_norm": 0.2565067070850015, - "learning_rate": 7.548373420002945e-06, - "loss": 0.2217, + "epoch": 0.47, + "grad_norm": 0.2961885815447325, + "learning_rate": 1.1358718309382117e-05, + "loss": 0.2233, "step": 10295 }, { - "epoch": 0.59, - "grad_norm": 0.7864258593626743, - "learning_rate": 7.546569341266737e-06, - "loss": 0.4573, + "epoch": 0.47, + "grad_norm": 0.5072913810635437, + "learning_rate": 1.1357244173454554e-05, + "loss": 0.3217, "step": 10296 }, { - "epoch": 0.59, - "grad_norm": 0.5094781014286576, - "learning_rate": 7.544765347492727e-06, - "loss": 0.3626, + "epoch": 0.47, + "grad_norm": 0.7473094718037766, + "learning_rate": 1.1355770007478911e-05, + "loss": 0.4135, "step": 10297 }, { - "epoch": 0.59, - "grad_norm": 0.3531764204079477, - "learning_rate": 7.542961438743389e-06, - "loss": 0.2905, + "epoch": 0.47, + "grad_norm": 0.3205890023637088, + "learning_rate": 1.135429581148783e-05, + "loss": 0.2159, "step": 10298 }, { - "epoch": 0.59, - "grad_norm": 0.22608108952594327, - "learning_rate": 7.54115761508119e-06, - "loss": 0.1933, + "epoch": 0.47, + "grad_norm": 0.48876937748996746, + "learning_rate": 1.1352821585513944e-05, + "loss": 0.3569, "step": 10299 }, { - "epoch": 0.59, - "grad_norm": 0.5899693029472266, - "learning_rate": 7.539353876568594e-06, - "loss": 0.4156, + "epoch": 0.47, + "grad_norm": 0.3382032736580527, + "learning_rate": 1.135134732958989e-05, + "loss": 0.2527, "step": 10300 }, { - "epoch": 0.59, - "grad_norm": 0.3815342442871858, - "learning_rate": 7.537550223268071e-06, - "loss": 0.3133, + "epoch": 0.47, + "grad_norm": 0.28534189833192425, + "learning_rate": 1.1349873043748309e-05, + "loss": 0.2197, "step": 10301 }, { - "epoch": 0.59, - "grad_norm": 0.7859397443567046, - "learning_rate": 7.5357466552420745e-06, - "loss": 0.2873, + "epoch": 0.47, + "grad_norm": 0.8703848331355342, + "learning_rate": 1.1348398728021839e-05, + "loss": 0.3579, "step": 10302 }, { - "epoch": 0.59, - "grad_norm": 0.46196064535471065, - "learning_rate": 7.533943172553068e-06, - "loss": 0.336, + "epoch": 0.47, + "grad_norm": 0.3884191793569075, + "learning_rate": 1.1346924382443123e-05, + "loss": 0.3269, "step": 10303 }, { - "epoch": 0.59, - "grad_norm": 0.3143589100536943, - "learning_rate": 7.5321397752635e-06, - "loss": 0.2493, + "epoch": 0.47, + "grad_norm": 0.6195843442666087, + "learning_rate": 1.13454500070448e-05, + "loss": 0.3559, "step": 10304 }, { - "epoch": 0.59, - "grad_norm": 0.1956301211002063, - "learning_rate": 7.53033646343583e-06, - "loss": 0.116, + "epoch": 0.47, + "grad_norm": 0.4020835947342085, + "learning_rate": 1.134397560185951e-05, + "loss": 0.264, "step": 10305 }, { - "epoch": 0.59, - "grad_norm": 0.33209653400560535, - "learning_rate": 7.528533237132498e-06, - "loss": 0.3106, + "epoch": 0.47, + "grad_norm": 0.31855783027763895, + "learning_rate": 1.1342501166919892e-05, + "loss": 0.2141, "step": 10306 }, { - "epoch": 0.59, - "grad_norm": 0.324432066107435, - "learning_rate": 7.526730096415957e-06, - "loss": 0.2727, + "epoch": 0.47, + "grad_norm": 0.4312139279807672, + "learning_rate": 1.1341026702258597e-05, + "loss": 0.2995, "step": 10307 }, { - "epoch": 0.59, - "grad_norm": 0.6904170847854164, - "learning_rate": 7.524927041348646e-06, - "loss": 0.4153, + "epoch": 0.47, + "grad_norm": 0.7585701943419573, + "learning_rate": 1.1339552207908261e-05, + "loss": 0.2959, "step": 10308 }, { - "epoch": 0.59, - "grad_norm": 0.5469584121601104, - "learning_rate": 7.523124071993004e-06, - "loss": 0.2054, + "epoch": 0.47, + "grad_norm": 1.5999188780741584, + "learning_rate": 1.133807768390153e-05, + "loss": 0.8873, "step": 10309 }, { - "epoch": 0.59, - "grad_norm": 0.34453117937491873, - "learning_rate": 7.521321188411469e-06, - "loss": 0.2789, + "epoch": 0.47, + "grad_norm": 0.42757841261243795, + "learning_rate": 1.133660313027105e-05, + "loss": 0.3068, "step": 10310 }, { - "epoch": 0.59, - "grad_norm": 0.2578278431274199, - "learning_rate": 7.519518390666474e-06, - "loss": 0.237, + "epoch": 0.47, + "grad_norm": 0.2867664246976474, + "learning_rate": 1.1335128547049465e-05, + "loss": 0.2456, "step": 10311 }, { - "epoch": 0.59, - "grad_norm": 0.22170453038599644, - "learning_rate": 7.517715678820452e-06, - "loss": 0.1511, + "epoch": 0.47, + "grad_norm": 1.1522713996381815, + "learning_rate": 1.133365393426942e-05, + "loss": 0.6862, "step": 10312 }, { - "epoch": 0.59, - "grad_norm": 0.3583110798163475, - "learning_rate": 7.515913052935827e-06, - "loss": 0.2837, + "epoch": 0.47, + "grad_norm": 0.2802975675512129, + "learning_rate": 1.133217929196356e-05, + "loss": 0.225, "step": 10313 }, { - "epoch": 0.59, - "grad_norm": 0.7549106305530019, - "learning_rate": 7.514110513075028e-06, - "loss": 0.4292, + "epoch": 0.47, + "grad_norm": 0.5019230593365495, + "learning_rate": 1.133070462016454e-05, + "loss": 0.3705, "step": 10314 }, { - "epoch": 0.59, - "grad_norm": 0.291936518934735, - "learning_rate": 7.512308059300474e-06, - "loss": 0.2353, + "epoch": 0.47, + "grad_norm": 0.27335855591609165, + "learning_rate": 1.1329229918904998e-05, + "loss": 0.2194, "step": 10315 }, { - "epoch": 0.59, - "grad_norm": 0.3369054517703914, - "learning_rate": 7.510505691674586e-06, - "loss": 0.2834, + "epoch": 0.47, + "grad_norm": 0.35374739141694717, + "learning_rate": 1.1327755188217589e-05, + "loss": 0.2562, "step": 10316 }, { - "epoch": 0.59, - "grad_norm": 0.2868584847338976, - "learning_rate": 7.5087034102597775e-06, - "loss": 0.1644, + "epoch": 0.47, + "grad_norm": 1.1337975192663041, + "learning_rate": 1.1326280428134955e-05, + "loss": 0.7098, "step": 10317 }, { - "epoch": 0.59, - "grad_norm": 0.2998619310436137, - "learning_rate": 7.506901215118465e-06, - "loss": 0.2527, + "epoch": 0.47, + "grad_norm": 0.43959187821650814, + "learning_rate": 1.1324805638689755e-05, + "loss": 0.2591, "step": 10318 }, { - "epoch": 0.59, - "grad_norm": 0.36454420205043037, - "learning_rate": 7.505099106313053e-06, - "loss": 0.2659, + "epoch": 0.47, + "grad_norm": 0.31528131134359977, + "learning_rate": 1.1323330819914633e-05, + "loss": 0.2631, "step": 10319 }, { - "epoch": 0.59, - "grad_norm": 1.0795147065212092, - "learning_rate": 7.503297083905955e-06, - "loss": 0.3916, + "epoch": 0.47, + "grad_norm": 0.4423086109707012, + "learning_rate": 1.1321855971842243e-05, + "loss": 0.3337, "step": 10320 }, { - "epoch": 0.59, - "grad_norm": 1.1310884841179982, - "learning_rate": 7.5014951479595684e-06, - "loss": 0.5808, + "epoch": 0.47, + "grad_norm": 0.18445951665694407, + "learning_rate": 1.1320381094505237e-05, + "loss": 0.1166, "step": 10321 }, { - "epoch": 0.59, - "grad_norm": 0.2958718654952075, - "learning_rate": 7.499693298536301e-06, - "loss": 0.1898, + "epoch": 0.47, + "grad_norm": 0.6696726318044355, + "learning_rate": 1.1318906187936264e-05, + "loss": 0.3999, "step": 10322 }, { - "epoch": 0.59, - "grad_norm": 0.28992196328142583, - "learning_rate": 7.497891535698546e-06, - "loss": 0.2596, + "epoch": 0.47, + "grad_norm": 0.33117515248752394, + "learning_rate": 1.1317431252167982e-05, + "loss": 0.2855, "step": 10323 }, { - "epoch": 0.59, - "grad_norm": 1.1602792649842537, - "learning_rate": 7.496089859508697e-06, - "loss": 0.5329, + "epoch": 0.47, + "grad_norm": 0.5654776779593655, + "learning_rate": 1.1315956287233041e-05, + "loss": 0.3441, "step": 10324 }, { - "epoch": 0.59, - "grad_norm": 0.3745377074556588, - "learning_rate": 7.494288270029152e-06, - "loss": 0.2368, + "epoch": 0.47, + "grad_norm": 0.5920476639011267, + "learning_rate": 1.1314481293164094e-05, + "loss": 0.3681, "step": 10325 }, { - "epoch": 0.59, - "grad_norm": 0.31340930333139033, - "learning_rate": 7.492486767322293e-06, - "loss": 0.2712, + "epoch": 0.47, + "grad_norm": 0.3288642528875592, + "learning_rate": 1.1313006269993798e-05, + "loss": 0.2635, "step": 10326 }, { - "epoch": 0.59, - "grad_norm": 0.46084638494659036, - "learning_rate": 7.490685351450513e-06, - "loss": 0.3427, + "epoch": 0.47, + "grad_norm": 0.30848342236159026, + "learning_rate": 1.1311531217754811e-05, + "loss": 0.2537, "step": 10327 }, { - "epoch": 0.59, - "grad_norm": 0.3007749876753642, - "learning_rate": 7.488884022476189e-06, - "loss": 0.1969, + "epoch": 0.47, + "grad_norm": 0.35074493176966903, + "learning_rate": 1.1310056136479788e-05, + "loss": 0.2037, "step": 10328 }, { - "epoch": 0.59, - "grad_norm": 1.0453565752421365, - "learning_rate": 7.487082780461704e-06, - "loss": 0.5567, + "epoch": 0.47, + "grad_norm": 0.5465964774385761, + "learning_rate": 1.1308581026201382e-05, + "loss": 0.379, "step": 10329 }, { - "epoch": 0.59, - "grad_norm": 0.40266828324910653, - "learning_rate": 7.485281625469432e-06, - "loss": 0.3253, + "epoch": 0.47, + "grad_norm": 0.7923109677071862, + "learning_rate": 1.1307105886952252e-05, + "loss": 0.4294, "step": 10330 }, { - "epoch": 0.59, - "grad_norm": 0.33743676652775795, - "learning_rate": 7.483480557561753e-06, - "loss": 0.2332, + "epoch": 0.47, + "grad_norm": 0.2705009215670562, + "learning_rate": 1.1305630718765062e-05, + "loss": 0.2311, "step": 10331 }, { - "epoch": 0.59, - "grad_norm": 0.4111867303618263, - "learning_rate": 7.481679576801035e-06, - "loss": 0.2775, + "epoch": 0.47, + "grad_norm": 0.52122179232924, + "learning_rate": 1.1304155521672462e-05, + "loss": 0.3922, "step": 10332 }, { - "epoch": 0.59, - "grad_norm": 0.4693763211900533, - "learning_rate": 7.479878683249642e-06, - "loss": 0.2668, + "epoch": 0.47, + "grad_norm": 0.4841463485034858, + "learning_rate": 1.130268029570712e-05, + "loss": 0.3063, "step": 10333 }, { - "epoch": 0.59, - "grad_norm": 0.503106972794314, - "learning_rate": 7.478077876969943e-06, - "loss": 0.2492, + "epoch": 0.47, + "grad_norm": 0.2555582088417973, + "learning_rate": 1.1301205040901688e-05, + "loss": 0.1522, "step": 10334 }, { - "epoch": 0.59, - "grad_norm": 0.32620964008429154, - "learning_rate": 7.476277158024299e-06, - "loss": 0.2576, + "epoch": 0.47, + "grad_norm": 0.34725897140016204, + "learning_rate": 1.1299729757288831e-05, + "loss": 0.283, "step": 10335 }, { - "epoch": 0.59, - "grad_norm": 0.6618248472899589, - "learning_rate": 7.474476526475066e-06, - "loss": 0.4087, + "epoch": 0.47, + "grad_norm": 0.7768461671055017, + "learning_rate": 1.129825444490121e-05, + "loss": 0.3859, "step": 10336 }, { - "epoch": 0.59, - "grad_norm": 0.42152985701301704, - "learning_rate": 7.4726759823846054e-06, - "loss": 0.2839, + "epoch": 0.47, + "grad_norm": 0.3524429920000527, + "learning_rate": 1.129677910377149e-05, + "loss": 0.2158, "step": 10337 }, { - "epoch": 0.59, - "grad_norm": 0.2510070165287194, - "learning_rate": 7.470875525815263e-06, - "loss": 0.2232, + "epoch": 0.47, + "grad_norm": 0.5036851506241941, + "learning_rate": 1.1295303733932327e-05, + "loss": 0.3666, "step": 10338 }, { - "epoch": 0.59, - "grad_norm": 0.42724280486977506, - "learning_rate": 7.4690751568293955e-06, - "loss": 0.2797, + "epoch": 0.47, + "grad_norm": 0.2857278056397454, + "learning_rate": 1.129382833541639e-05, + "loss": 0.2792, "step": 10339 }, { - "epoch": 0.59, - "grad_norm": 0.37480903963553386, - "learning_rate": 7.467274875489345e-06, - "loss": 0.2427, + "epoch": 0.48, + "grad_norm": 0.8514205573203432, + "learning_rate": 1.1292352908256339e-05, + "loss": 0.5409, "step": 10340 }, { - "epoch": 0.59, - "grad_norm": 0.41897775646501684, - "learning_rate": 7.465474681857459e-06, - "loss": 0.2865, + "epoch": 0.48, + "grad_norm": 0.3151899311681576, + "learning_rate": 1.1290877452484839e-05, + "loss": 0.2029, "step": 10341 }, { - "epoch": 0.59, - "grad_norm": 0.3516394732341963, - "learning_rate": 7.463674575996072e-06, - "loss": 0.3245, + "epoch": 0.48, + "grad_norm": 0.6038445200055288, + "learning_rate": 1.1289401968134559e-05, + "loss": 0.3361, "step": 10342 }, { - "epoch": 0.59, - "grad_norm": 0.3376477205658888, - "learning_rate": 7.461874557967528e-06, - "loss": 0.2722, + "epoch": 0.48, + "grad_norm": 0.47885151197512227, + "learning_rate": 1.1287926455238161e-05, + "loss": 0.3372, "step": 10343 }, { - "epoch": 0.59, - "grad_norm": 0.2938787637187035, - "learning_rate": 7.4600746278341575e-06, - "loss": 0.1601, + "epoch": 0.48, + "grad_norm": 0.3287649613145602, + "learning_rate": 1.1286450913828313e-05, + "loss": 0.2238, "step": 10344 }, { - "epoch": 0.59, - "grad_norm": 0.4272505094482967, - "learning_rate": 7.458274785658295e-06, - "loss": 0.2518, + "epoch": 0.48, + "grad_norm": 1.0318625080307138, + "learning_rate": 1.128497534393768e-05, + "loss": 0.6484, "step": 10345 }, { - "epoch": 0.59, - "grad_norm": 0.32516909890328166, - "learning_rate": 7.4564750315022645e-06, - "loss": 0.2598, + "epoch": 0.48, + "grad_norm": 0.3484331906177394, + "learning_rate": 1.1283499745598934e-05, + "loss": 0.224, "step": 10346 }, { - "epoch": 0.59, - "grad_norm": 0.36114353203460814, - "learning_rate": 7.454675365428397e-06, - "loss": 0.305, + "epoch": 0.48, + "grad_norm": 0.2508115041356779, + "learning_rate": 1.1282024118844739e-05, + "loss": 0.2142, "step": 10347 }, { - "epoch": 0.59, - "grad_norm": 0.5091804731487372, - "learning_rate": 7.452875787499012e-06, - "loss": 0.3558, + "epoch": 0.48, + "grad_norm": 0.8771639175612558, + "learning_rate": 1.1280548463707768e-05, + "loss": 0.4832, "step": 10348 }, { - "epoch": 0.59, - "grad_norm": 0.3474769886170156, - "learning_rate": 7.451076297776427e-06, - "loss": 0.2804, + "epoch": 0.48, + "grad_norm": 0.5115356403139549, + "learning_rate": 1.1279072780220683e-05, + "loss": 0.305, "step": 10349 }, { - "epoch": 0.59, - "grad_norm": 0.45404733187295016, - "learning_rate": 7.4492768963229635e-06, - "loss": 0.3461, + "epoch": 0.48, + "grad_norm": 0.4584300789461833, + "learning_rate": 1.1277597068416164e-05, + "loss": 0.2214, "step": 10350 }, { - "epoch": 0.59, - "grad_norm": 0.22564809690050838, - "learning_rate": 7.447477583200928e-06, - "loss": 0.1469, + "epoch": 0.48, + "grad_norm": 0.42663526425557635, + "learning_rate": 1.1276121328326877e-05, + "loss": 0.3658, "step": 10351 }, { - "epoch": 0.59, - "grad_norm": 0.41250335520063286, - "learning_rate": 7.445678358472637e-06, - "loss": 0.2752, + "epoch": 0.48, + "grad_norm": 0.6576434355627522, + "learning_rate": 1.1274645559985493e-05, + "loss": 0.3718, "step": 10352 }, { - "epoch": 0.59, - "grad_norm": 0.43262651395981655, - "learning_rate": 7.443879222200392e-06, - "loss": 0.3522, + "epoch": 0.48, + "grad_norm": 0.4721620815114536, + "learning_rate": 1.1273169763424686e-05, + "loss": 0.3281, "step": 10353 }, { - "epoch": 0.59, - "grad_norm": 0.3365479924421112, - "learning_rate": 7.442080174446502e-06, - "loss": 0.2761, + "epoch": 0.48, + "grad_norm": 0.4333122680678041, + "learning_rate": 1.1271693938677125e-05, + "loss": 0.2161, "step": 10354 }, { - "epoch": 0.59, - "grad_norm": 0.39423987473222083, - "learning_rate": 7.440281215273262e-06, - "loss": 0.2763, + "epoch": 0.48, + "grad_norm": 0.3954661912479092, + "learning_rate": 1.1270218085775486e-05, + "loss": 0.2726, "step": 10355 }, { - "epoch": 0.6, - "grad_norm": 0.4763702671153097, - "learning_rate": 7.438482344742977e-06, - "loss": 0.3736, + "epoch": 0.48, + "grad_norm": 0.3980889572487219, + "learning_rate": 1.1268742204752443e-05, + "loss": 0.3231, "step": 10356 }, { - "epoch": 0.6, - "grad_norm": 0.24311212151397302, - "learning_rate": 7.436683562917937e-06, - "loss": 0.1081, + "epoch": 0.48, + "grad_norm": 0.8014019329086285, + "learning_rate": 1.1267266295640674e-05, + "loss": 0.465, "step": 10357 }, { - "epoch": 0.6, - "grad_norm": 0.31815626651918666, - "learning_rate": 7.4348848698604345e-06, - "loss": 0.2684, + "epoch": 0.48, + "grad_norm": 0.5926428564063171, + "learning_rate": 1.1265790358472845e-05, + "loss": 0.3616, "step": 10358 }, { - "epoch": 0.6, - "grad_norm": 0.3554260506520077, - "learning_rate": 7.433086265632759e-06, - "loss": 0.3245, - "step": 10359 + "epoch": 0.48, + "grad_norm": 0.28229545465090206, + "learning_rate": 1.1264314393281644e-05, + "loss": 0.2734, + "step": 10359 }, { - "epoch": 0.6, - "grad_norm": 0.7899975431739951, - "learning_rate": 7.431287750297196e-06, - "loss": 0.5236, + "epoch": 0.48, + "grad_norm": 0.2784193430339106, + "learning_rate": 1.1262838400099733e-05, + "loss": 0.1592, "step": 10360 }, { - "epoch": 0.6, - "grad_norm": 0.324058350565275, - "learning_rate": 7.429489323916028e-06, - "loss": 0.2124, + "epoch": 0.48, + "grad_norm": 0.7779545616168673, + "learning_rate": 1.1261362378959804e-05, + "loss": 0.3976, "step": 10361 }, { - "epoch": 0.6, - "grad_norm": 0.30049014342614966, - "learning_rate": 7.427690986551534e-06, - "loss": 0.2998, + "epoch": 0.48, + "grad_norm": 0.36453024874176576, + "learning_rate": 1.1259886329894525e-05, + "loss": 0.272, "step": 10362 }, { - "epoch": 0.6, - "grad_norm": 0.2735467911648441, - "learning_rate": 7.42589273826599e-06, - "loss": 0.1753, + "epoch": 0.48, + "grad_norm": 0.379909447301651, + "learning_rate": 1.1258410252936579e-05, + "loss": 0.2593, "step": 10363 }, { - "epoch": 0.6, - "grad_norm": 0.3070856253154252, - "learning_rate": 7.42409457912167e-06, - "loss": 0.214, + "epoch": 0.48, + "grad_norm": 0.8047358860883154, + "learning_rate": 1.1256934148118638e-05, + "loss": 0.3858, "step": 10364 }, { - "epoch": 0.6, - "grad_norm": 0.7289098597123872, - "learning_rate": 7.422296509180844e-06, - "loss": 0.3994, + "epoch": 0.48, + "grad_norm": 0.37764827946750706, + "learning_rate": 1.1255458015473389e-05, + "loss": 0.3089, "step": 10365 }, { - "epoch": 0.6, - "grad_norm": 0.3466921745387132, - "learning_rate": 7.420498528505783e-06, - "loss": 0.3249, + "epoch": 0.48, + "grad_norm": 0.4906176457167061, + "learning_rate": 1.1253981855033506e-05, + "loss": 0.2446, "step": 10366 }, { - "epoch": 0.6, - "grad_norm": 0.30005732279309993, - "learning_rate": 7.418700637158742e-06, - "loss": 0.2126, + "epoch": 0.48, + "grad_norm": 0.28792063780380467, + "learning_rate": 1.1252505666831678e-05, + "loss": 0.2103, "step": 10367 }, { - "epoch": 0.6, - "grad_norm": 1.4382862328192985, - "learning_rate": 7.416902835201989e-06, - "loss": 0.7743, + "epoch": 0.48, + "grad_norm": 0.42098664105199396, + "learning_rate": 1.1251029450900583e-05, + "loss": 0.3091, "step": 10368 }, { - "epoch": 0.6, - "grad_norm": 0.3143363107928643, - "learning_rate": 7.415105122697777e-06, - "loss": 0.2053, + "epoch": 0.48, + "grad_norm": 1.316902525492053, + "learning_rate": 1.12495532072729e-05, + "loss": 0.7238, "step": 10369 }, { - "epoch": 0.6, - "grad_norm": 0.27691108069319204, - "learning_rate": 7.413307499708367e-06, - "loss": 0.2016, + "epoch": 0.48, + "grad_norm": 0.5179790073499815, + "learning_rate": 1.1248076935981313e-05, + "loss": 0.2572, "step": 10370 }, { - "epoch": 0.6, - "grad_norm": 0.47985595670757103, - "learning_rate": 7.411509966296004e-06, - "loss": 0.297, + "epoch": 0.48, + "grad_norm": 0.340750577520889, + "learning_rate": 1.1246600637058504e-05, + "loss": 0.2853, "step": 10371 }, { - "epoch": 0.6, - "grad_norm": 0.9858514760671856, - "learning_rate": 7.409712522522942e-06, - "loss": 0.7043, + "epoch": 0.48, + "grad_norm": 0.42242178433156535, + "learning_rate": 1.1245124310537162e-05, + "loss": 0.3107, "step": 10372 }, { - "epoch": 0.6, - "grad_norm": 0.6016526088054295, - "learning_rate": 7.407915168451423e-06, - "loss": 0.3614, + "epoch": 0.48, + "grad_norm": 0.22737409600273112, + "learning_rate": 1.1243647956449962e-05, + "loss": 0.0977, "step": 10373 }, { - "epoch": 0.6, - "grad_norm": 0.29457207851404604, - "learning_rate": 7.40611790414369e-06, - "loss": 0.24, + "epoch": 0.48, + "grad_norm": 0.4625992143785026, + "learning_rate": 1.1242171574829599e-05, + "loss": 0.3135, "step": 10374 }, { - "epoch": 0.6, - "grad_norm": 0.5271657134892388, - "learning_rate": 7.404320729661982e-06, - "loss": 0.2722, + "epoch": 0.48, + "grad_norm": 0.5067547115062119, + "learning_rate": 1.1240695165708756e-05, + "loss": 0.3233, "step": 10375 }, { - "epoch": 0.6, - "grad_norm": 0.4621286015570544, - "learning_rate": 7.402523645068536e-06, - "loss": 0.3243, + "epoch": 0.48, + "grad_norm": 0.873340469094776, + "learning_rate": 1.1239218729120116e-05, + "loss": 0.3334, "step": 10376 }, { - "epoch": 0.6, - "grad_norm": 0.26337030099005204, - "learning_rate": 7.400726650425585e-06, - "loss": 0.2056, + "epoch": 0.48, + "grad_norm": 0.3766129159465697, + "learning_rate": 1.1237742265096365e-05, + "loss": 0.2573, "step": 10377 }, { - "epoch": 0.6, - "grad_norm": 0.337834575079801, - "learning_rate": 7.3989297457953565e-06, - "loss": 0.3024, + "epoch": 0.48, + "grad_norm": 0.38241036402717354, + "learning_rate": 1.1236265773670196e-05, + "loss": 0.3241, "step": 10378 }, { - "epoch": 0.6, - "grad_norm": 0.5600185258598154, - "learning_rate": 7.3971329312400805e-06, - "loss": 0.4083, + "epoch": 0.48, + "grad_norm": 0.27457262063928434, + "learning_rate": 1.1234789254874292e-05, + "loss": 0.1985, "step": 10379 }, { - "epoch": 0.6, - "grad_norm": 0.4112617760836703, - "learning_rate": 7.395336206821979e-06, - "loss": 0.2426, + "epoch": 0.48, + "grad_norm": 0.34112933437618415, + "learning_rate": 1.123331270874135e-05, + "loss": 0.2137, "step": 10380 }, { - "epoch": 0.6, - "grad_norm": 0.615001228721209, - "learning_rate": 7.393539572603274e-06, - "loss": 0.3951, + "epoch": 0.48, + "grad_norm": 0.9560648293149798, + "learning_rate": 1.1231836135304048e-05, + "loss": 0.4868, "step": 10381 }, { - "epoch": 0.6, - "grad_norm": 0.2863126488000933, - "learning_rate": 7.391743028646179e-06, - "loss": 0.2512, + "epoch": 0.48, + "grad_norm": 0.42202100817100363, + "learning_rate": 1.1230359534595083e-05, + "loss": 0.3213, "step": 10382 }, { - "epoch": 0.6, - "grad_norm": 0.31806544952047155, - "learning_rate": 7.3899465750129116e-06, - "loss": 0.2348, + "epoch": 0.48, + "grad_norm": 0.3286914144069076, + "learning_rate": 1.1228882906647142e-05, + "loss": 0.2192, "step": 10383 }, { - "epoch": 0.6, - "grad_norm": 0.27767063661119495, - "learning_rate": 7.388150211765682e-06, - "loss": 0.1522, + "epoch": 0.48, + "grad_norm": 1.1711469445351945, + "learning_rate": 1.1227406251492918e-05, + "loss": 0.6999, "step": 10384 }, { - "epoch": 0.6, - "grad_norm": 0.40574913552979075, - "learning_rate": 7.3863539389667e-06, - "loss": 0.2952, + "epoch": 0.48, + "grad_norm": 0.3110423390905839, + "learning_rate": 1.1225929569165107e-05, + "loss": 0.164, "step": 10385 }, { - "epoch": 0.6, - "grad_norm": 0.3449818810488741, - "learning_rate": 7.384557756678166e-06, - "loss": 0.2971, + "epoch": 0.48, + "grad_norm": 0.2548628792637335, + "learning_rate": 1.1224452859696392e-05, + "loss": 0.2194, "step": 10386 }, { - "epoch": 0.6, - "grad_norm": 0.4392748651396246, - "learning_rate": 7.382761664962287e-06, - "loss": 0.2963, + "epoch": 0.48, + "grad_norm": 0.6524479101520946, + "learning_rate": 1.1222976123119473e-05, + "loss": 0.35, "step": 10387 }, { - "epoch": 0.6, - "grad_norm": 0.5556219375952675, - "learning_rate": 7.380965663881259e-06, - "loss": 0.3425, + "epoch": 0.48, + "grad_norm": 0.8857400083685697, + "learning_rate": 1.122149935946704e-05, + "loss": 0.5202, "step": 10388 }, { - "epoch": 0.6, - "grad_norm": 0.2330582030181791, - "learning_rate": 7.379169753497275e-06, - "loss": 0.2051, + "epoch": 0.48, + "grad_norm": 0.35600713296597736, + "learning_rate": 1.1220022568771788e-05, + "loss": 0.1922, "step": 10389 }, { - "epoch": 0.6, - "grad_norm": 0.3359612146899458, - "learning_rate": 7.377373933872531e-06, - "loss": 0.2831, + "epoch": 0.48, + "grad_norm": 0.38658788927666876, + "learning_rate": 1.1218545751066414e-05, + "loss": 0.3249, "step": 10390 }, { - "epoch": 0.6, - "grad_norm": 0.7637438385972759, - "learning_rate": 7.375578205069213e-06, - "loss": 0.3904, + "epoch": 0.48, + "grad_norm": 0.2527668790943352, + "learning_rate": 1.1217068906383613e-05, + "loss": 0.1617, "step": 10391 }, { - "epoch": 0.6, - "grad_norm": 0.3545649951392472, - "learning_rate": 7.373782567149514e-06, - "loss": 0.2763, + "epoch": 0.48, + "grad_norm": 0.3834348457702718, + "learning_rate": 1.1215592034756077e-05, + "loss": 0.2592, "step": 10392 }, { - "epoch": 0.6, - "grad_norm": 0.6164382213937144, - "learning_rate": 7.371987020175606e-06, - "loss": 0.3046, + "epoch": 0.48, + "grad_norm": 0.9949861829669839, + "learning_rate": 1.1214115136216506e-05, + "loss": 0.3621, "step": 10393 }, { - "epoch": 0.6, - "grad_norm": 0.3547602337809164, - "learning_rate": 7.370191564209679e-06, - "loss": 0.2542, + "epoch": 0.48, + "grad_norm": 0.3859834539232413, + "learning_rate": 1.1212638210797594e-05, + "loss": 0.3407, "step": 10394 }, { - "epoch": 0.6, - "grad_norm": 0.2775262885159086, - "learning_rate": 7.368396199313901e-06, - "loss": 0.2352, + "epoch": 0.48, + "grad_norm": 0.30591351181056525, + "learning_rate": 1.1211161258532042e-05, + "loss": 0.24, "step": 10395 }, { - "epoch": 0.6, - "grad_norm": 0.4603446729991688, - "learning_rate": 7.3666009255504534e-06, - "loss": 0.2823, + "epoch": 0.48, + "grad_norm": 0.742687768117351, + "learning_rate": 1.1209684279452546e-05, + "loss": 0.3602, "step": 10396 }, { - "epoch": 0.6, - "grad_norm": 0.41311951116192697, - "learning_rate": 7.364805742981499e-06, - "loss": 0.2084, + "epoch": 0.48, + "grad_norm": 0.30733927886895707, + "learning_rate": 1.120820727359181e-05, + "loss": 0.1833, "step": 10397 }, { - "epoch": 0.6, - "grad_norm": 0.26981663565088704, - "learning_rate": 7.363010651669211e-06, - "loss": 0.2726, + "epoch": 0.48, + "grad_norm": 0.372134978281447, + "learning_rate": 1.1206730240982528e-05, + "loss": 0.2581, "step": 10398 }, { - "epoch": 0.6, - "grad_norm": 0.7117455054970516, - "learning_rate": 7.361215651675753e-06, - "loss": 0.398, + "epoch": 0.48, + "grad_norm": 0.46219096621048067, + "learning_rate": 1.1205253181657399e-05, + "loss": 0.3009, "step": 10399 }, { - "epoch": 0.6, - "grad_norm": 0.4345400774009875, - "learning_rate": 7.359420743063282e-06, - "loss": 0.1569, + "epoch": 0.48, + "grad_norm": 1.134721235434766, + "learning_rate": 1.1203776095649128e-05, + "loss": 0.7256, "step": 10400 }, { - "epoch": 0.6, - "grad_norm": 0.3219544373698642, - "learning_rate": 7.357625925893954e-06, - "loss": 0.2259, + "epoch": 0.48, + "grad_norm": 0.3412777286918592, + "learning_rate": 1.1202298982990411e-05, + "loss": 0.2431, "step": 10401 }, { - "epoch": 0.6, - "grad_norm": 0.2688178658293408, - "learning_rate": 7.355831200229928e-06, - "loss": 0.2649, + "epoch": 0.48, + "grad_norm": 0.5081468439957402, + "learning_rate": 1.1200821843713957e-05, + "loss": 0.3602, "step": 10402 }, { - "epoch": 0.6, - "grad_norm": 0.3136728201989251, - "learning_rate": 7.354036566133354e-06, - "loss": 0.2091, + "epoch": 0.48, + "grad_norm": 0.2477793485426281, + "learning_rate": 1.1199344677852466e-05, + "loss": 0.172, "step": 10403 }, { - "epoch": 0.6, - "grad_norm": 0.4715407097290975, - "learning_rate": 7.3522420236663805e-06, - "loss": 0.3276, + "epoch": 0.48, + "grad_norm": 0.3075261009742982, + "learning_rate": 1.1197867485438639e-05, + "loss": 0.2802, "step": 10404 }, { - "epoch": 0.6, - "grad_norm": 0.4956804964246323, - "learning_rate": 7.350447572891148e-06, - "loss": 0.3307, + "epoch": 0.48, + "grad_norm": 0.6733694983512934, + "learning_rate": 1.1196390266505177e-05, + "loss": 0.4787, "step": 10405 }, { - "epoch": 0.6, - "grad_norm": 0.32953209301390884, - "learning_rate": 7.348653213869807e-06, - "loss": 0.1928, + "epoch": 0.48, + "grad_norm": 0.3575938386729694, + "learning_rate": 1.119491302108479e-05, + "loss": 0.2811, "step": 10406 }, { - "epoch": 0.6, - "grad_norm": 0.40875599533641027, - "learning_rate": 7.346858946664488e-06, - "loss": 0.3065, + "epoch": 0.48, + "grad_norm": 0.4654503493358451, + "learning_rate": 1.1193435749210177e-05, + "loss": 0.287, "step": 10407 }, { - "epoch": 0.6, - "grad_norm": 0.2591224485745581, - "learning_rate": 7.345064771337332e-06, - "loss": 0.1801, + "epoch": 0.48, + "grad_norm": 0.534033289151677, + "learning_rate": 1.1191958450914051e-05, + "loss": 0.3269, "step": 10408 }, { - "epoch": 0.6, - "grad_norm": 0.4714728778705214, - "learning_rate": 7.343270687950468e-06, - "loss": 0.36, + "epoch": 0.48, + "grad_norm": 0.4256576207131305, + "learning_rate": 1.1190481126229114e-05, + "loss": 0.3059, "step": 10409 }, { - "epoch": 0.6, - "grad_norm": 0.28898646326817645, - "learning_rate": 7.341476696566026e-06, - "loss": 0.2228, + "epoch": 0.48, + "grad_norm": 0.453286447449527, + "learning_rate": 1.1189003775188072e-05, + "loss": 0.2999, "step": 10410 }, { - "epoch": 0.6, - "grad_norm": 0.7426477752866582, - "learning_rate": 7.33968279724613e-06, - "loss": 0.4396, + "epoch": 0.48, + "grad_norm": 0.25140840652982527, + "learning_rate": 1.1187526397823629e-05, + "loss": 0.2267, "step": 10411 }, { - "epoch": 0.6, - "grad_norm": 1.1024879194552417, - "learning_rate": 7.337888990052906e-06, - "loss": 0.5185, + "epoch": 0.48, + "grad_norm": 1.0343000981659312, + "learning_rate": 1.1186048994168498e-05, + "loss": 0.5581, "step": 10412 }, { - "epoch": 0.6, - "grad_norm": 0.23906818831035612, - "learning_rate": 7.336095275048474e-06, - "loss": 0.1846, + "epoch": 0.48, + "grad_norm": 0.3732133320214956, + "learning_rate": 1.1184571564255386e-05, + "loss": 0.2902, "step": 10413 }, { - "epoch": 0.6, - "grad_norm": 0.3227646272577517, - "learning_rate": 7.334301652294944e-06, - "loss": 0.2678, + "epoch": 0.48, + "grad_norm": 0.395518793607198, + "learning_rate": 1.1183094108117001e-05, + "loss": 0.3347, "step": 10414 }, { - "epoch": 0.6, - "grad_norm": 0.5806326984574692, - "learning_rate": 7.332508121854435e-06, - "loss": 0.4073, + "epoch": 0.48, + "grad_norm": 0.9108237040742793, + "learning_rate": 1.1181616625786054e-05, + "loss": 0.504, "step": 10415 }, { - "epoch": 0.6, - "grad_norm": 0.34300519661195633, - "learning_rate": 7.330714683789053e-06, - "loss": 0.2575, + "epoch": 0.48, + "grad_norm": 0.32794292198632086, + "learning_rate": 1.1180139117295252e-05, + "loss": 0.2453, "step": 10416 }, { - "epoch": 0.6, - "grad_norm": 0.5015264244296673, - "learning_rate": 7.32892133816091e-06, - "loss": 0.3357, + "epoch": 0.48, + "grad_norm": 0.4609480277122226, + "learning_rate": 1.1178661582677309e-05, + "loss": 0.3029, "step": 10417 }, { - "epoch": 0.6, - "grad_norm": 0.42152132629199246, - "learning_rate": 7.327128085032103e-06, - "loss": 0.3183, + "epoch": 0.48, + "grad_norm": 0.38068689510527065, + "learning_rate": 1.1177184021964931e-05, + "loss": 0.3185, "step": 10418 }, { - "epoch": 0.6, - "grad_norm": 0.3411341644741504, - "learning_rate": 7.325334924464737e-06, - "loss": 0.2233, + "epoch": 0.48, + "grad_norm": 0.2548184733522785, + "learning_rate": 1.1175706435190836e-05, + "loss": 0.1718, "step": 10419 }, { - "epoch": 0.6, - "grad_norm": 0.4057527596698589, - "learning_rate": 7.323541856520908e-06, - "loss": 0.3004, + "epoch": 0.48, + "grad_norm": 1.385584649017272, + "learning_rate": 1.1174228822387731e-05, + "loss": 0.6268, "step": 10420 }, { - "epoch": 0.6, - "grad_norm": 0.3473167164819613, - "learning_rate": 7.32174888126271e-06, - "loss": 0.2923, + "epoch": 0.48, + "grad_norm": 0.7638754660959325, + "learning_rate": 1.1172751183588337e-05, + "loss": 0.4243, "step": 10421 }, { - "epoch": 0.6, - "grad_norm": 0.36276706144912185, - "learning_rate": 7.3199559987522305e-06, - "loss": 0.288, + "epoch": 0.48, + "grad_norm": 0.26955637367568647, + "learning_rate": 1.1171273518825358e-05, + "loss": 0.2323, "step": 10422 }, { - "epoch": 0.6, - "grad_norm": 0.41822234452984713, - "learning_rate": 7.3181632090515635e-06, - "loss": 0.2161, + "epoch": 0.48, + "grad_norm": 0.41421620458973796, + "learning_rate": 1.1169795828131516e-05, + "loss": 0.1927, "step": 10423 }, { - "epoch": 0.6, - "grad_norm": 0.5414425489075652, - "learning_rate": 7.316370512222785e-06, - "loss": 0.3239, + "epoch": 0.48, + "grad_norm": 0.4002641911615186, + "learning_rate": 1.1168318111539518e-05, + "loss": 0.2619, "step": 10424 }, { - "epoch": 0.6, - "grad_norm": 0.378916321206655, - "learning_rate": 7.314577908327982e-06, - "loss": 0.2869, + "epoch": 0.48, + "grad_norm": 0.3372361609461174, + "learning_rate": 1.1166840369082083e-05, + "loss": 0.1617, "step": 10425 }, { - "epoch": 0.6, - "grad_norm": 0.33481596176071216, - "learning_rate": 7.312785397429231e-06, - "loss": 0.2687, + "epoch": 0.48, + "grad_norm": 0.3318177550830705, + "learning_rate": 1.1165362600791927e-05, + "loss": 0.3035, "step": 10426 }, { - "epoch": 0.6, - "grad_norm": 0.5608026074266522, - "learning_rate": 7.310992979588607e-06, - "loss": 0.3707, + "epoch": 0.48, + "grad_norm": 0.7190438087088021, + "learning_rate": 1.116388480670177e-05, + "loss": 0.4008, "step": 10427 }, { - "epoch": 0.6, - "grad_norm": 0.319216032928723, - "learning_rate": 7.30920065486818e-06, - "loss": 0.2487, + "epoch": 0.48, + "grad_norm": 0.616686482761905, + "learning_rate": 1.1162406986844322e-05, + "loss": 0.277, "step": 10428 }, { - "epoch": 0.6, - "grad_norm": 0.23822935077111004, - "learning_rate": 7.307408423330016e-06, - "loss": 0.1923, + "epoch": 0.48, + "grad_norm": 0.38363452276997356, + "learning_rate": 1.1160929141252303e-05, + "loss": 0.2536, "step": 10429 }, { - "epoch": 0.6, - "grad_norm": 1.1734266520628216, - "learning_rate": 7.305616285036186e-06, - "loss": 0.5906, + "epoch": 0.48, + "grad_norm": 0.40029414295024074, + "learning_rate": 1.1159451269958434e-05, + "loss": 0.3627, "step": 10430 }, { - "epoch": 0.6, - "grad_norm": 0.306891867740223, - "learning_rate": 7.303824240048744e-06, - "loss": 0.2423, + "epoch": 0.48, + "grad_norm": 0.2748184490233543, + "learning_rate": 1.1157973372995429e-05, + "loss": 0.1667, "step": 10431 }, { - "epoch": 0.6, - "grad_norm": 0.6941208094628137, - "learning_rate": 7.3020322884297565e-06, - "loss": 0.3179, + "epoch": 0.48, + "grad_norm": 0.3509352153472403, + "learning_rate": 1.1156495450396013e-05, + "loss": 0.2693, "step": 10432 }, { - "epoch": 0.6, - "grad_norm": 0.31808162658254446, - "learning_rate": 7.300240430241278e-06, - "loss": 0.2968, + "epoch": 0.48, + "grad_norm": 0.7587009043983467, + "learning_rate": 1.1155017502192899e-05, + "loss": 0.4336, "step": 10433 }, { - "epoch": 0.6, - "grad_norm": 0.4021974465762369, - "learning_rate": 7.298448665545352e-06, - "loss": 0.2697, + "epoch": 0.48, + "grad_norm": 0.32325222741894793, + "learning_rate": 1.1153539528418813e-05, + "loss": 0.2756, "step": 10434 }, { - "epoch": 0.6, - "grad_norm": 0.2515530206184095, - "learning_rate": 7.296656994404034e-06, - "loss": 0.1669, + "epoch": 0.48, + "grad_norm": 0.4938881979964008, + "learning_rate": 1.115206152910647e-05, + "loss": 0.272, "step": 10435 }, { - "epoch": 0.6, - "grad_norm": 0.5861578574807791, - "learning_rate": 7.294865416879366e-06, - "loss": 0.1531, + "epoch": 0.48, + "grad_norm": 0.405804646060389, + "learning_rate": 1.1150583504288598e-05, + "loss": 0.25, "step": 10436 }, { - "epoch": 0.6, - "grad_norm": 0.34651374064157503, - "learning_rate": 7.293073933033394e-06, - "loss": 0.2677, + "epoch": 0.48, + "grad_norm": 0.2806747439880976, + "learning_rate": 1.1149105453997915e-05, + "loss": 0.2208, "step": 10437 }, { - "epoch": 0.6, - "grad_norm": 0.37290784368695534, - "learning_rate": 7.291282542928158e-06, - "loss": 0.3219, + "epoch": 0.48, + "grad_norm": 0.3283333334967406, + "learning_rate": 1.1147627378267147e-05, + "loss": 0.2728, "step": 10438 }, { - "epoch": 0.6, - "grad_norm": 0.8520457116127256, - "learning_rate": 7.289491246625686e-06, - "loss": 0.3167, + "epoch": 0.48, + "grad_norm": 0.9404324281567027, + "learning_rate": 1.1146149277129013e-05, + "loss": 0.4163, "step": 10439 }, { - "epoch": 0.6, - "grad_norm": 0.3610971456454583, - "learning_rate": 7.287700044188019e-06, - "loss": 0.2736, + "epoch": 0.48, + "grad_norm": 0.4173360998227774, + "learning_rate": 1.114467115061624e-05, + "loss": 0.2693, "step": 10440 }, { - "epoch": 0.6, - "grad_norm": 0.27075648165360194, - "learning_rate": 7.28590893567718e-06, - "loss": 0.2578, + "epoch": 0.48, + "grad_norm": 0.5734416808103949, + "learning_rate": 1.114319299876155e-05, + "loss": 0.3988, "step": 10441 }, { - "epoch": 0.6, - "grad_norm": 0.467437287940649, - "learning_rate": 7.2841179211552005e-06, - "loss": 0.1746, + "epoch": 0.48, + "grad_norm": 0.3347746686570038, + "learning_rate": 1.114171482159767e-05, + "loss": 0.2858, "step": 10442 }, { - "epoch": 0.6, - "grad_norm": 0.3203387589486802, - "learning_rate": 7.282327000684099e-06, - "loss": 0.2521, + "epoch": 0.48, + "grad_norm": 0.2695008301412517, + "learning_rate": 1.1140236619157322e-05, + "loss": 0.185, "step": 10443 }, { - "epoch": 0.6, - "grad_norm": 1.0366281064937823, - "learning_rate": 7.280536174325897e-06, - "loss": 0.4184, + "epoch": 0.48, + "grad_norm": 0.6402666690788119, + "learning_rate": 1.1138758391473235e-05, + "loss": 0.3683, "step": 10444 }, { - "epoch": 0.6, - "grad_norm": 0.32022139792761206, - "learning_rate": 7.27874544214261e-06, - "loss": 0.2512, + "epoch": 0.48, + "grad_norm": 0.2789258281802634, + "learning_rate": 1.1137280138578137e-05, + "loss": 0.2223, "step": 10445 }, { - "epoch": 0.6, - "grad_norm": 0.31124170915094695, - "learning_rate": 7.276954804196252e-06, - "loss": 0.2607, + "epoch": 0.48, + "grad_norm": 0.6394255792266542, + "learning_rate": 1.113580186050475e-05, + "loss": 0.3373, "step": 10446 }, { - "epoch": 0.6, - "grad_norm": 0.27313298584475565, - "learning_rate": 7.2751642605488305e-06, - "loss": 0.1748, + "epoch": 0.48, + "grad_norm": 0.4030214831218338, + "learning_rate": 1.1134323557285806e-05, + "loss": 0.3375, "step": 10447 }, { - "epoch": 0.6, - "grad_norm": 1.4671493965457703, - "learning_rate": 7.273373811262356e-06, - "loss": 0.7225, + "epoch": 0.48, + "grad_norm": 0.7383010094661877, + "learning_rate": 1.113284522895403e-05, + "loss": 0.4267, "step": 10448 }, { - "epoch": 0.6, - "grad_norm": 0.30145172575689483, - "learning_rate": 7.271583456398827e-06, - "loss": 0.2257, + "epoch": 0.48, + "grad_norm": 0.5460729461816785, + "learning_rate": 1.1131366875542154e-05, + "loss": 0.3546, "step": 10449 }, { - "epoch": 0.6, - "grad_norm": 0.7093270985040872, - "learning_rate": 7.269793196020247e-06, - "loss": 0.3663, + "epoch": 0.48, + "grad_norm": 0.30763357299544747, + "learning_rate": 1.1129888497082905e-05, + "loss": 0.2865, "step": 10450 }, { - "epoch": 0.6, - "grad_norm": 1.0567717494520565, - "learning_rate": 7.26800303018861e-06, - "loss": 0.7185, + "epoch": 0.48, + "grad_norm": 0.22325843858625022, + "learning_rate": 1.1128410093609017e-05, + "loss": 0.1183, "step": 10451 }, { - "epoch": 0.6, - "grad_norm": 0.3041414305207806, - "learning_rate": 7.266212958965912e-06, - "loss": 0.2066, + "epoch": 0.48, + "grad_norm": 0.6126246244005373, + "learning_rate": 1.1126931665153213e-05, + "loss": 0.3401, "step": 10452 }, { - "epoch": 0.6, - "grad_norm": 0.37389481781956213, - "learning_rate": 7.264422982414143e-06, - "loss": 0.3267, + "epoch": 0.48, + "grad_norm": 0.4676837632395992, + "learning_rate": 1.112545321174823e-05, + "loss": 0.3529, "step": 10453 }, { - "epoch": 0.6, - "grad_norm": 0.25075241124657155, - "learning_rate": 7.2626331005952845e-06, - "loss": 0.1912, + "epoch": 0.48, + "grad_norm": 0.3892445801631183, + "learning_rate": 1.1123974733426794e-05, + "loss": 0.3265, "step": 10454 }, { - "epoch": 0.6, - "grad_norm": 0.3700868988947285, - "learning_rate": 7.260843313571328e-06, - "loss": 0.2145, + "epoch": 0.48, + "grad_norm": 0.4302163898061417, + "learning_rate": 1.1122496230221644e-05, + "loss": 0.2371, "step": 10455 }, { - "epoch": 0.6, - "grad_norm": 0.9142862153982831, - "learning_rate": 7.259053621404246e-06, - "loss": 0.4353, + "epoch": 0.48, + "grad_norm": 0.49071261732353244, + "learning_rate": 1.1121017702165511e-05, + "loss": 0.3573, "step": 10456 }, { - "epoch": 0.6, - "grad_norm": 0.3873001010445203, - "learning_rate": 7.2572640241560225e-06, - "loss": 0.3062, + "epoch": 0.48, + "grad_norm": 0.2751448161761606, + "learning_rate": 1.1119539149291125e-05, + "loss": 0.2085, "step": 10457 }, { - "epoch": 0.6, - "grad_norm": 0.3067948858367847, - "learning_rate": 7.255474521888624e-06, - "loss": 0.1958, + "epoch": 0.48, + "grad_norm": 0.2787839345259726, + "learning_rate": 1.111806057163122e-05, + "loss": 0.1997, "step": 10458 }, { - "epoch": 0.6, - "grad_norm": 0.3374418148179951, - "learning_rate": 7.253685114664029e-06, - "loss": 0.232, + "epoch": 0.48, + "grad_norm": 0.5340163881751548, + "learning_rate": 1.1116581969218533e-05, + "loss": 0.3978, "step": 10459 }, { - "epoch": 0.6, - "grad_norm": 0.45806778271637055, - "learning_rate": 7.251895802544197e-06, - "loss": 0.3377, + "epoch": 0.48, + "grad_norm": 0.6928228228360358, + "learning_rate": 1.1115103342085799e-05, + "loss": 0.4762, "step": 10460 }, { - "epoch": 0.6, - "grad_norm": 0.4224168796644813, - "learning_rate": 7.250106585591098e-06, - "loss": 0.2808, + "epoch": 0.48, + "grad_norm": 0.380686297683117, + "learning_rate": 1.1113624690265747e-05, + "loss": 0.2457, "step": 10461 }, { - "epoch": 0.6, - "grad_norm": 0.4315638186581728, - "learning_rate": 7.2483174638666876e-06, - "loss": 0.2695, + "epoch": 0.48, + "grad_norm": 0.34721988564697875, + "learning_rate": 1.111214601379112e-05, + "loss": 0.3077, "step": 10462 }, { - "epoch": 0.6, - "grad_norm": 1.1670725205025345, - "learning_rate": 7.246528437432927e-06, - "loss": 0.7757, + "epoch": 0.48, + "grad_norm": 0.28904915072329407, + "learning_rate": 1.1110667312694654e-05, + "loss": 0.1434, "step": 10463 }, { - "epoch": 0.6, - "grad_norm": 0.3893678908800406, - "learning_rate": 7.244739506351765e-06, - "loss": 0.2822, + "epoch": 0.48, + "grad_norm": 0.4132329147682992, + "learning_rate": 1.1109188587009083e-05, + "loss": 0.1284, "step": 10464 }, { - "epoch": 0.6, - "grad_norm": 0.27797550100587226, - "learning_rate": 7.242950670685159e-06, - "loss": 0.222, + "epoch": 0.48, + "grad_norm": 0.40050171431398346, + "learning_rate": 1.1107709836767145e-05, + "loss": 0.3495, "step": 10465 }, { - "epoch": 0.6, - "grad_norm": 0.4390055988054794, - "learning_rate": 7.2411619304950535e-06, - "loss": 0.26, + "epoch": 0.48, + "grad_norm": 0.40062463100196644, + "learning_rate": 1.1106231062001577e-05, + "loss": 0.3264, "step": 10466 }, { - "epoch": 0.6, - "grad_norm": 0.32031797829067743, - "learning_rate": 7.239373285843392e-06, - "loss": 0.2501, + "epoch": 0.48, + "grad_norm": 0.6595916567624143, + "learning_rate": 1.1104752262745122e-05, + "loss": 0.3891, "step": 10467 }, { - "epoch": 0.6, - "grad_norm": 0.6696812537335045, - "learning_rate": 7.237584736792112e-06, - "loss": 0.3122, + "epoch": 0.48, + "grad_norm": 0.3393951274548719, + "learning_rate": 1.1103273439030516e-05, + "loss": 0.2405, "step": 10468 }, { - "epoch": 0.6, - "grad_norm": 0.34998214322910576, - "learning_rate": 7.235796283403153e-06, - "loss": 0.3351, + "epoch": 0.48, + "grad_norm": 0.25121963057674984, + "learning_rate": 1.11017945908905e-05, + "loss": 0.1368, "step": 10469 }, { - "epoch": 0.6, - "grad_norm": 0.3469237876049126, - "learning_rate": 7.234007925738451e-06, - "loss": 0.2644, + "epoch": 0.48, + "grad_norm": 0.3887917716849789, + "learning_rate": 1.1100315718357815e-05, + "loss": 0.297, "step": 10470 }, { - "epoch": 0.6, - "grad_norm": 1.4179928240007145, - "learning_rate": 7.2322196638599365e-06, - "loss": 0.5999, + "epoch": 0.48, + "grad_norm": 0.362258615457602, + "learning_rate": 1.1098836821465198e-05, + "loss": 0.2399, "step": 10471 }, { - "epoch": 0.6, - "grad_norm": 0.4999890912597717, - "learning_rate": 7.230431497829533e-06, - "loss": 0.2942, + "epoch": 0.48, + "grad_norm": 0.836081355682549, + "learning_rate": 1.1097357900245394e-05, + "loss": 0.443, "step": 10472 }, { - "epoch": 0.6, - "grad_norm": 0.2984927447908153, - "learning_rate": 7.228643427709172e-06, - "loss": 0.2701, + "epoch": 0.48, + "grad_norm": 0.3868486433914077, + "learning_rate": 1.1095878954731142e-05, + "loss": 0.3141, "step": 10473 }, { - "epoch": 0.6, - "grad_norm": 0.28578890500066373, - "learning_rate": 7.226855453560766e-06, - "loss": 0.1861, + "epoch": 0.48, + "grad_norm": 0.3521297342992916, + "learning_rate": 1.1094399984955189e-05, + "loss": 0.213, "step": 10474 }, { - "epoch": 0.6, - "grad_norm": 0.8567557080685262, - "learning_rate": 7.2250675754462384e-06, - "loss": 0.4243, + "epoch": 0.48, + "grad_norm": 0.3000543897222317, + "learning_rate": 1.1092920990950276e-05, + "loss": 0.1672, "step": 10475 }, { - "epoch": 0.6, - "grad_norm": 0.38207266662063444, - "learning_rate": 7.2232797934275e-06, - "loss": 0.2831, + "epoch": 0.48, + "grad_norm": 0.5572693038545837, + "learning_rate": 1.1091441972749143e-05, + "loss": 0.3156, "step": 10476 }, { - "epoch": 0.6, - "grad_norm": 0.32210840344326147, - "learning_rate": 7.221492107566466e-06, - "loss": 0.285, + "epoch": 0.48, + "grad_norm": 0.4057833305067021, + "learning_rate": 1.1089962930384535e-05, + "loss": 0.2602, "step": 10477 }, { - "epoch": 0.6, - "grad_norm": 0.527301928692191, - "learning_rate": 7.2197045179250395e-06, - "loss": 0.2243, + "epoch": 0.48, + "grad_norm": 0.4324789851934438, + "learning_rate": 1.1088483863889198e-05, + "loss": 0.3499, "step": 10478 }, { - "epoch": 0.6, - "grad_norm": 0.3561268181099593, - "learning_rate": 7.217917024565124e-06, - "loss": 0.2425, + "epoch": 0.48, + "grad_norm": 0.6105258851223422, + "learning_rate": 1.1087004773295881e-05, + "loss": 0.3558, "step": 10479 }, { - "epoch": 0.6, - "grad_norm": 0.38431117081131044, - "learning_rate": 7.216129627548625e-06, - "loss": 0.271, + "epoch": 0.48, + "grad_norm": 0.44911456048286996, + "learning_rate": 1.1085525658637327e-05, + "loss": 0.3338, "step": 10480 }, { - "epoch": 0.6, - "grad_norm": 0.35747867342515993, - "learning_rate": 7.214342326937434e-06, - "loss": 0.2597, + "epoch": 0.48, + "grad_norm": 0.2743707012878145, + "learning_rate": 1.1084046519946279e-05, + "loss": 0.2263, "step": 10481 }, { - "epoch": 0.6, - "grad_norm": 0.3926495922665949, - "learning_rate": 7.212555122793452e-06, - "loss": 0.2489, + "epoch": 0.48, + "grad_norm": 0.4518151786986908, + "learning_rate": 1.1082567357255484e-05, + "loss": 0.2925, "step": 10482 }, { - "epoch": 0.6, - "grad_norm": 0.4978920915555641, - "learning_rate": 7.210768015178563e-06, - "loss": 0.3426, + "epoch": 0.48, + "grad_norm": 0.36213720835369695, + "learning_rate": 1.1081088170597694e-05, + "loss": 0.2738, "step": 10483 }, { - "epoch": 0.6, - "grad_norm": 0.7036657391861827, - "learning_rate": 7.208981004154661e-06, - "loss": 0.4456, + "epoch": 0.48, + "grad_norm": 0.6329368495177247, + "learning_rate": 1.107960896000565e-05, + "loss": 0.3422, "step": 10484 }, { - "epoch": 0.6, - "grad_norm": 0.26724477915832684, - "learning_rate": 7.2071940897836235e-06, - "loss": 0.2103, + "epoch": 0.48, + "grad_norm": 0.3359221937165981, + "learning_rate": 1.1078129725512108e-05, + "loss": 0.2858, "step": 10485 }, { - "epoch": 0.6, - "grad_norm": 0.24639680661311067, - "learning_rate": 7.205407272127336e-06, - "loss": 0.181, + "epoch": 0.48, + "grad_norm": 0.36158589568386224, + "learning_rate": 1.1076650467149812e-05, + "loss": 0.2786, "step": 10486 }, { - "epoch": 0.6, - "grad_norm": 1.1345625130187684, - "learning_rate": 7.203620551247675e-06, - "loss": 0.7537, + "epoch": 0.48, + "grad_norm": 0.28435259863671286, + "learning_rate": 1.1075171184951512e-05, + "loss": 0.0954, "step": 10487 }, { - "epoch": 0.6, - "grad_norm": 0.32662252851637746, - "learning_rate": 7.201833927206514e-06, - "loss": 0.206, + "epoch": 0.48, + "grad_norm": 0.37313586731579, + "learning_rate": 1.1073691878949958e-05, + "loss": 0.2526, "step": 10488 }, { - "epoch": 0.6, - "grad_norm": 0.3779858157814377, - "learning_rate": 7.200047400065722e-06, - "loss": 0.2848, + "epoch": 0.48, + "grad_norm": 0.3548702928360621, + "learning_rate": 1.10722125491779e-05, + "loss": 0.2932, "step": 10489 }, { - "epoch": 0.6, - "grad_norm": 0.7855434602278417, - "learning_rate": 7.198260969887171e-06, - "loss": 0.4231, - "step": 10490 + "epoch": 0.48, + "grad_norm": 0.46231028756359355, + "learning_rate": 1.1070733195668093e-05, + "loss": 0.3072, + "step": 10490 }, { - "epoch": 0.6, - "grad_norm": 0.22071700621294713, - "learning_rate": 7.196474636732722e-06, - "loss": 0.1673, + "epoch": 0.48, + "grad_norm": 0.8279114477556175, + "learning_rate": 1.106925381845328e-05, + "loss": 0.4493, "step": 10491 }, { - "epoch": 0.6, - "grad_norm": 0.41724723056633095, - "learning_rate": 7.194688400664232e-06, - "loss": 0.2648, + "epoch": 0.48, + "grad_norm": 0.3998047182720395, + "learning_rate": 1.1067774417566225e-05, + "loss": 0.2833, "step": 10492 }, { - "epoch": 0.6, - "grad_norm": 0.33254747854591676, - "learning_rate": 7.192902261743566e-06, - "loss": 0.3185, + "epoch": 0.48, + "grad_norm": 0.5277023979282565, + "learning_rate": 1.1066294993039668e-05, + "loss": 0.335, "step": 10493 }, { - "epoch": 0.6, - "grad_norm": 0.23978557568988323, - "learning_rate": 7.191116220032572e-06, - "loss": 0.0635, + "epoch": 0.48, + "grad_norm": 0.24838392954017763, + "learning_rate": 1.1064815544906368e-05, + "loss": 0.1847, "step": 10494 }, { - "epoch": 0.6, - "grad_norm": 0.45877974911373315, - "learning_rate": 7.189330275593104e-06, - "loss": 0.2984, + "epoch": 0.48, + "grad_norm": 0.3825003401325567, + "learning_rate": 1.1063336073199078e-05, + "loss": 0.2771, "step": 10495 }, { - "epoch": 0.6, - "grad_norm": 0.36561107076275134, - "learning_rate": 7.187544428487006e-06, - "loss": 0.2965, + "epoch": 0.48, + "grad_norm": 0.6841718039066883, + "learning_rate": 1.1061856577950554e-05, + "loss": 0.4698, "step": 10496 }, { - "epoch": 0.6, - "grad_norm": 0.6271544762259579, - "learning_rate": 7.1857586787761246e-06, - "loss": 0.3096, + "epoch": 0.48, + "grad_norm": 0.35380129930150256, + "learning_rate": 1.1060377059193548e-05, + "loss": 0.2558, "step": 10497 }, { - "epoch": 0.6, - "grad_norm": 0.22321373342280323, - "learning_rate": 7.183973026522297e-06, - "loss": 0.1597, + "epoch": 0.48, + "grad_norm": 0.4199770391428978, + "learning_rate": 1.1058897516960817e-05, + "loss": 0.3034, "step": 10498 }, { - "epoch": 0.6, - "grad_norm": 1.1306791168483041, - "learning_rate": 7.182187471787365e-06, - "loss": 0.7098, + "epoch": 0.48, + "grad_norm": 0.4474972150871595, + "learning_rate": 1.1057417951285112e-05, + "loss": 0.3073, "step": 10499 }, { - "epoch": 0.6, - "grad_norm": 0.5794004017247366, - "learning_rate": 7.180402014633159e-06, - "loss": 0.2908, + "epoch": 0.48, + "grad_norm": 0.49897879676509915, + "learning_rate": 1.1055938362199194e-05, + "loss": 0.2342, "step": 10500 }, { - "epoch": 0.6, - "grad_norm": 0.27354208770268323, - "learning_rate": 7.178616655121513e-06, - "loss": 0.2454, + "epoch": 0.48, + "grad_norm": 0.3172087798784572, + "learning_rate": 1.1054458749735818e-05, + "loss": 0.2517, "step": 10501 }, { - "epoch": 0.6, - "grad_norm": 1.158877404681521, - "learning_rate": 7.176831393314248e-06, - "loss": 0.6185, + "epoch": 0.48, + "grad_norm": 0.46509604767043045, + "learning_rate": 1.1052979113927744e-05, + "loss": 0.361, "step": 10502 }, { - "epoch": 0.6, - "grad_norm": 0.5729698953409218, - "learning_rate": 7.175046229273191e-06, - "loss": 0.3404, + "epoch": 0.48, + "grad_norm": 0.9198672748557136, + "learning_rate": 1.1051499454807729e-05, + "loss": 0.4044, "step": 10503 }, { - "epoch": 0.6, - "grad_norm": 0.2731478718166219, - "learning_rate": 7.17326116306016e-06, - "loss": 0.196, + "epoch": 0.48, + "grad_norm": 0.37011417945137387, + "learning_rate": 1.1050019772408526e-05, + "loss": 0.2976, "step": 10504 }, { - "epoch": 0.6, - "grad_norm": 0.34421103264516534, - "learning_rate": 7.171476194736975e-06, - "loss": 0.3172, + "epoch": 0.48, + "grad_norm": 0.5606465573241878, + "learning_rate": 1.1048540066762898e-05, + "loss": 0.3638, "step": 10505 }, { - "epoch": 0.6, - "grad_norm": 0.4021245776566278, - "learning_rate": 7.169691324365447e-06, - "loss": 0.2735, + "epoch": 0.48, + "grad_norm": 0.4762704360096836, + "learning_rate": 1.1047060337903603e-05, + "loss": 0.3391, "step": 10506 }, { - "epoch": 0.6, - "grad_norm": 0.4057698150407605, - "learning_rate": 7.167906552007387e-06, - "loss": 0.2176, + "epoch": 0.48, + "grad_norm": 0.28192907145802976, + "learning_rate": 1.1045580585863403e-05, + "loss": 0.1841, "step": 10507 }, { - "epoch": 0.6, - "grad_norm": 0.39037278889321536, - "learning_rate": 7.166121877724599e-06, - "loss": 0.3269, + "epoch": 0.48, + "grad_norm": 0.42639506962391144, + "learning_rate": 1.1044100810675054e-05, + "loss": 0.2773, "step": 10508 }, { - "epoch": 0.6, - "grad_norm": 0.42136169762460707, - "learning_rate": 7.164337301578892e-06, - "loss": 0.2538, + "epoch": 0.48, + "grad_norm": 0.4012117677692907, + "learning_rate": 1.1042621012371322e-05, + "loss": 0.3397, "step": 10509 }, { - "epoch": 0.6, - "grad_norm": 0.3061269281320481, - "learning_rate": 7.162552823632059e-06, - "loss": 0.2145, + "epoch": 0.48, + "grad_norm": 0.3403334699642882, + "learning_rate": 1.1041141190984966e-05, + "loss": 0.1884, "step": 10510 }, { - "epoch": 0.6, - "grad_norm": 0.6243819662075308, - "learning_rate": 7.160768443945902e-06, - "loss": 0.3622, + "epoch": 0.48, + "grad_norm": 0.9449126721424425, + "learning_rate": 1.1039661346548745e-05, + "loss": 0.4054, "step": 10511 }, { - "epoch": 0.6, - "grad_norm": 0.39212060454543934, - "learning_rate": 7.15898416258221e-06, - "loss": 0.3269, + "epoch": 0.48, + "grad_norm": 0.4581157169501, + "learning_rate": 1.1038181479095422e-05, + "loss": 0.3594, "step": 10512 }, { - "epoch": 0.6, - "grad_norm": 0.29126800036976597, - "learning_rate": 7.157199979602777e-06, - "loss": 0.2787, + "epoch": 0.48, + "grad_norm": 0.35248765887577094, + "learning_rate": 1.1036701588657766e-05, + "loss": 0.1952, "step": 10513 }, { - "epoch": 0.6, - "grad_norm": 0.4578357233133374, - "learning_rate": 7.155415895069385e-06, - "loss": 0.1526, + "epoch": 0.48, + "grad_norm": 0.35938071392328425, + "learning_rate": 1.1035221675268533e-05, + "loss": 0.2798, "step": 10514 }, { - "epoch": 0.6, - "grad_norm": 0.7504728210353762, - "learning_rate": 7.153631909043818e-06, - "loss": 0.3463, + "epoch": 0.48, + "grad_norm": 0.45964199401677747, + "learning_rate": 1.1033741738960496e-05, + "loss": 0.3127, "step": 10515 }, { - "epoch": 0.6, - "grad_norm": 0.39710388219731657, - "learning_rate": 7.151848021587855e-06, - "loss": 0.2964, + "epoch": 0.48, + "grad_norm": 0.314595576041925, + "learning_rate": 1.1032261779766404e-05, + "loss": 0.2003, "step": 10516 }, { - "epoch": 0.6, - "grad_norm": 0.3785129422319976, - "learning_rate": 7.150064232763274e-06, - "loss": 0.2846, + "epoch": 0.48, + "grad_norm": 0.4109678016229127, + "learning_rate": 1.1030781797719037e-05, + "loss": 0.3061, "step": 10517 }, { - "epoch": 0.6, - "grad_norm": 0.5141174513877448, - "learning_rate": 7.1482805426318465e-06, - "loss": 0.314, + "epoch": 0.48, + "grad_norm": 1.019781075819112, + "learning_rate": 1.1029301792851152e-05, + "loss": 0.4227, "step": 10518 }, { - "epoch": 0.6, - "grad_norm": 0.2443119966552638, - "learning_rate": 7.146496951255339e-06, - "loss": 0.2207, + "epoch": 0.48, + "grad_norm": 0.37620617565730624, + "learning_rate": 1.102782176519552e-05, + "loss": 0.2765, "step": 10519 }, { - "epoch": 0.6, - "grad_norm": 0.30675794437787113, - "learning_rate": 7.144713458695521e-06, - "loss": 0.2244, + "epoch": 0.48, + "grad_norm": 0.27311145885174176, + "learning_rate": 1.1026341714784902e-05, + "loss": 0.1645, "step": 10520 }, { - "epoch": 0.6, - "grad_norm": 0.5894451000930164, - "learning_rate": 7.1429300650141505e-06, - "loss": 0.3308, + "epoch": 0.48, + "grad_norm": 0.30181095603023833, + "learning_rate": 1.102486164165207e-05, + "loss": 0.2375, "step": 10521 }, { - "epoch": 0.6, - "grad_norm": 0.3686690194975854, - "learning_rate": 7.141146770272993e-06, - "loss": 0.3187, + "epoch": 0.48, + "grad_norm": 0.33833045992181804, + "learning_rate": 1.102338154582979e-05, + "loss": 0.2686, "step": 10522 }, { - "epoch": 0.6, - "grad_norm": 0.6833298869205651, - "learning_rate": 7.139363574533797e-06, - "loss": 0.3821, + "epoch": 0.48, + "grad_norm": 0.7750944009035459, + "learning_rate": 1.1021901427350825e-05, + "loss": 0.3324, "step": 10523 }, { - "epoch": 0.6, - "grad_norm": 0.28139648241122606, - "learning_rate": 7.137580477858319e-06, - "loss": 0.231, + "epoch": 0.48, + "grad_norm": 0.7863860443154413, + "learning_rate": 1.1020421286247948e-05, + "loss": 0.4526, "step": 10524 }, { - "epoch": 0.6, - "grad_norm": 0.41488090474727446, - "learning_rate": 7.1357974803083044e-06, - "loss": 0.3398, + "epoch": 0.48, + "grad_norm": 0.3178439958999361, + "learning_rate": 1.1018941122553929e-05, + "loss": 0.2569, "step": 10525 }, { - "epoch": 0.6, - "grad_norm": 0.30323806929317104, - "learning_rate": 7.134014581945501e-06, - "loss": 0.1871, + "epoch": 0.48, + "grad_norm": 0.4358999354861604, + "learning_rate": 1.1017460936301536e-05, + "loss": 0.2602, "step": 10526 }, { - "epoch": 0.6, - "grad_norm": 0.3600629089927518, - "learning_rate": 7.132231782831649e-06, - "loss": 0.1966, + "epoch": 0.48, + "grad_norm": 0.35421558713601325, + "learning_rate": 1.1015980727523537e-05, + "loss": 0.2384, "step": 10527 }, { - "epoch": 0.6, - "grad_norm": 0.50493454952615, - "learning_rate": 7.130449083028488e-06, - "loss": 0.3759, + "epoch": 0.48, + "grad_norm": 0.43305797235979415, + "learning_rate": 1.1014500496252705e-05, + "loss": 0.2697, "step": 10528 }, { - "epoch": 0.6, - "grad_norm": 0.4093641362730516, - "learning_rate": 7.1286664825977505e-06, - "loss": 0.3145, + "epoch": 0.48, + "grad_norm": 0.45587729597697507, + "learning_rate": 1.1013020242521809e-05, + "loss": 0.3329, "step": 10529 }, { - "epoch": 0.6, - "grad_norm": 0.6043612819812036, - "learning_rate": 7.1268839816011695e-06, - "loss": 0.2636, + "epoch": 0.48, + "grad_norm": 1.3164447007635103, + "learning_rate": 1.1011539966363623e-05, + "loss": 0.3682, "step": 10530 }, { - "epoch": 0.61, - "grad_norm": 0.24807553607158486, - "learning_rate": 7.125101580100474e-06, - "loss": 0.2093, + "epoch": 0.48, + "grad_norm": 0.47031391207400264, + "learning_rate": 1.1010059667810912e-05, + "loss": 0.3183, "step": 10531 }, { - "epoch": 0.61, - "grad_norm": 0.2676598452867699, - "learning_rate": 7.123319278157385e-06, - "loss": 0.2648, + "epoch": 0.48, + "grad_norm": 0.541001824597019, + "learning_rate": 1.1008579346896458e-05, + "loss": 0.3847, "step": 10532 }, { - "epoch": 0.61, - "grad_norm": 0.5550732423895852, - "learning_rate": 7.121537075833629e-06, - "loss": 0.1338, + "epoch": 0.48, + "grad_norm": 0.2551189672906195, + "learning_rate": 1.100709900365303e-05, + "loss": 0.1855, "step": 10533 }, { - "epoch": 0.61, - "grad_norm": 0.36036269402932336, - "learning_rate": 7.119754973190915e-06, - "loss": 0.2997, + "epoch": 0.48, + "grad_norm": 0.4412149642273209, + "learning_rate": 1.1005618638113398e-05, + "loss": 0.271, "step": 10534 }, { - "epoch": 0.61, - "grad_norm": 0.9122378994044027, - "learning_rate": 7.11797297029097e-06, - "loss": 0.3795, + "epoch": 0.48, + "grad_norm": 0.8296472941408071, + "learning_rate": 1.1004138250310341e-05, + "loss": 0.3771, "step": 10535 }, { - "epoch": 0.61, - "grad_norm": 0.3485600601021822, - "learning_rate": 7.116191067195494e-06, - "loss": 0.3105, + "epoch": 0.48, + "grad_norm": 0.9559094775531285, + "learning_rate": 1.1002657840276627e-05, + "loss": 0.3384, "step": 10536 }, { - "epoch": 0.61, - "grad_norm": 0.30727339279065635, - "learning_rate": 7.114409263966195e-06, - "loss": 0.1943, + "epoch": 0.48, + "grad_norm": 0.31850810093317444, + "learning_rate": 1.1001177408045038e-05, + "loss": 0.2703, "step": 10537 }, { - "epoch": 0.61, - "grad_norm": 0.2655701703598823, - "learning_rate": 7.11262756066478e-06, - "loss": 0.1737, + "epoch": 0.48, + "grad_norm": 0.5227635775065415, + "learning_rate": 1.0999696953648344e-05, + "loss": 0.3825, "step": 10538 }, { - "epoch": 0.61, - "grad_norm": 0.6735221155033528, - "learning_rate": 7.110845957352948e-06, - "loss": 0.3591, + "epoch": 0.48, + "grad_norm": 0.4110410218746679, + "learning_rate": 1.0998216477119327e-05, + "loss": 0.1906, "step": 10539 }, { - "epoch": 0.61, - "grad_norm": 0.27521311307189994, - "learning_rate": 7.109064454092398e-06, - "loss": 0.2416, + "epoch": 0.48, + "grad_norm": 0.35454569312027584, + "learning_rate": 1.0996735978490756e-05, + "loss": 0.2947, "step": 10540 }, { - "epoch": 0.61, - "grad_norm": 0.854134216843187, - "learning_rate": 7.1072830509448185e-06, - "loss": 0.4508, + "epoch": 0.48, + "grad_norm": 0.3566371808788487, + "learning_rate": 1.0995255457795412e-05, + "loss": 0.3369, "step": 10541 }, { - "epoch": 0.61, - "grad_norm": 0.6999782759176724, - "learning_rate": 7.105501747971906e-06, - "loss": 0.4854, + "epoch": 0.48, + "grad_norm": 0.4737570145416877, + "learning_rate": 1.0993774915066071e-05, + "loss": 0.275, "step": 10542 }, { - "epoch": 0.61, - "grad_norm": 0.3321506116907094, - "learning_rate": 7.103720545235342e-06, - "loss": 0.2268, + "epoch": 0.48, + "grad_norm": 0.41171487031609777, + "learning_rate": 1.099229435033551e-05, + "loss": 0.2055, "step": 10543 }, { - "epoch": 0.61, - "grad_norm": 0.233533515493403, - "learning_rate": 7.10193944279681e-06, - "loss": 0.2295, + "epoch": 0.48, + "grad_norm": 1.3289081313675228, + "learning_rate": 1.0990813763636511e-05, + "loss": 0.8209, "step": 10544 }, { - "epoch": 0.61, - "grad_norm": 0.7011227141792372, - "learning_rate": 7.100158440717993e-06, - "loss": 0.3943, + "epoch": 0.48, + "grad_norm": 0.433228971571981, + "learning_rate": 1.098933315500185e-05, + "loss": 0.3369, "step": 10545 }, { - "epoch": 0.61, - "grad_norm": 0.3676451737884998, - "learning_rate": 7.098377539060562e-06, - "loss": 0.273, + "epoch": 0.48, + "grad_norm": 0.36195300256043683, + "learning_rate": 1.0987852524464304e-05, + "loss": 0.2461, "step": 10546 }, { - "epoch": 0.61, - "grad_norm": 0.7447326581606627, - "learning_rate": 7.096596737886194e-06, - "loss": 0.3583, + "epoch": 0.48, + "grad_norm": 0.4460752123238476, + "learning_rate": 1.0986371872056658e-05, + "loss": 0.3054, "step": 10547 }, { - "epoch": 0.61, - "grad_norm": 0.32197795833723225, - "learning_rate": 7.0948160372565534e-06, - "loss": 0.2779, + "epoch": 0.48, + "grad_norm": 0.4145390648947652, + "learning_rate": 1.0984891197811686e-05, + "loss": 0.2768, "step": 10548 }, { - "epoch": 0.61, - "grad_norm": 0.3593098276921128, - "learning_rate": 7.093035437233311e-06, - "loss": 0.2799, + "epoch": 0.48, + "grad_norm": 0.30891539996897377, + "learning_rate": 1.0983410501762175e-05, + "loss": 0.2065, "step": 10549 }, { - "epoch": 0.61, - "grad_norm": 0.1792310565543837, - "learning_rate": 7.091254937878125e-06, - "loss": 0.084, + "epoch": 0.48, + "grad_norm": 0.5882592365379976, + "learning_rate": 1.0981929783940904e-05, + "loss": 0.4163, "step": 10550 }, { - "epoch": 0.61, - "grad_norm": 1.0643531416224592, - "learning_rate": 7.089474539252656e-06, - "loss": 0.3914, + "epoch": 0.48, + "grad_norm": 1.3073025517397505, + "learning_rate": 1.0980449044380654e-05, + "loss": 0.4083, "step": 10551 }, { - "epoch": 0.61, - "grad_norm": 0.250677757331347, - "learning_rate": 7.087694241418558e-06, - "loss": 0.2396, + "epoch": 0.48, + "grad_norm": 0.4176127926343007, + "learning_rate": 1.0978968283114207e-05, + "loss": 0.2156, "step": 10552 }, { - "epoch": 0.61, - "grad_norm": 0.3975599441810498, - "learning_rate": 7.085914044437485e-06, - "loss": 0.2871, + "epoch": 0.48, + "grad_norm": 0.30070549285084547, + "learning_rate": 1.0977487500174342e-05, + "loss": 0.2571, "step": 10553 }, { - "epoch": 0.61, - "grad_norm": 0.9224701306896524, - "learning_rate": 7.084133948371081e-06, - "loss": 0.6096, + "epoch": 0.48, + "grad_norm": 0.4488736716399582, + "learning_rate": 1.0976006695593849e-05, + "loss": 0.2828, "step": 10554 }, { - "epoch": 0.61, - "grad_norm": 0.3374955882849087, - "learning_rate": 7.082353953280995e-06, - "loss": 0.2439, + "epoch": 0.48, + "grad_norm": 0.6858772807518426, + "learning_rate": 1.0974525869405506e-05, + "loss": 0.3399, "step": 10555 }, { - "epoch": 0.61, - "grad_norm": 0.21337914040021594, - "learning_rate": 7.080574059228866e-06, - "loss": 0.1718, + "epoch": 0.48, + "grad_norm": 0.3560952445689576, + "learning_rate": 1.0973045021642103e-05, + "loss": 0.266, "step": 10556 }, { - "epoch": 0.61, - "grad_norm": 1.2038444194000044, - "learning_rate": 7.07879426627633e-06, - "loss": 0.3988, + "epoch": 0.48, + "grad_norm": 0.518651057955851, + "learning_rate": 1.097156415233642e-05, + "loss": 0.3857, "step": 10557 }, { - "epoch": 0.61, - "grad_norm": 0.30269484417471876, - "learning_rate": 7.077014574485025e-06, - "loss": 0.24, + "epoch": 0.49, + "grad_norm": 0.476061161862377, + "learning_rate": 1.0970083261521243e-05, + "loss": 0.3056, "step": 10558 }, { - "epoch": 0.61, - "grad_norm": 1.2401651526461381, - "learning_rate": 7.075234983916577e-06, - "loss": 0.7724, + "epoch": 0.49, + "grad_norm": 0.29437660462027354, + "learning_rate": 1.0968602349229356e-05, + "loss": 0.177, "step": 10559 }, { - "epoch": 0.61, - "grad_norm": 0.30485211566640963, - "learning_rate": 7.073455494632618e-06, - "loss": 0.2483, + "epoch": 0.49, + "grad_norm": 0.3670634193355905, + "learning_rate": 1.0967121415493546e-05, + "loss": 0.2626, "step": 10560 }, { - "epoch": 0.61, - "grad_norm": 0.46309273195641676, - "learning_rate": 7.071676106694767e-06, - "loss": 0.2658, + "epoch": 0.49, + "grad_norm": 0.3399862421465084, + "learning_rate": 1.0965640460346603e-05, + "loss": 0.2473, "step": 10561 }, { - "epoch": 0.61, - "grad_norm": 0.32683809943240216, - "learning_rate": 7.06989682016465e-06, - "loss": 0.2232, + "epoch": 0.49, + "grad_norm": 0.4786808125648303, + "learning_rate": 1.096415948382131e-05, + "loss": 0.2723, "step": 10562 }, { - "epoch": 0.61, - "grad_norm": 0.7879418679957149, - "learning_rate": 7.068117635103877e-06, - "loss": 0.2834, + "epoch": 0.49, + "grad_norm": 0.5972191172481575, + "learning_rate": 1.0962678485950455e-05, + "loss": 0.4096, "step": 10563 }, { - "epoch": 0.61, - "grad_norm": 0.3973588991375235, - "learning_rate": 7.066338551574066e-06, - "loss": 0.29, + "epoch": 0.49, + "grad_norm": 0.3976040684743492, + "learning_rate": 1.0961197466766826e-05, + "loss": 0.2799, "step": 10564 }, { - "epoch": 0.61, - "grad_norm": 0.4464487646966575, - "learning_rate": 7.064559569636824e-06, - "loss": 0.3641, + "epoch": 0.49, + "grad_norm": 0.33488019046954265, + "learning_rate": 1.0959716426303214e-05, + "loss": 0.2375, "step": 10565 }, { - "epoch": 0.61, - "grad_norm": 0.885248157283508, - "learning_rate": 7.062780689353758e-06, - "loss": 0.3694, + "epoch": 0.49, + "grad_norm": 0.30337444480723963, + "learning_rate": 1.09582353645924e-05, + "loss": 0.2168, "step": 10566 }, { - "epoch": 0.61, - "grad_norm": 0.3276928966908981, - "learning_rate": 7.06100191078647e-06, - "loss": 0.2578, + "epoch": 0.49, + "grad_norm": 0.9142107611906051, + "learning_rate": 1.0956754281667182e-05, + "loss": 0.4948, "step": 10567 }, { - "epoch": 0.61, - "grad_norm": 0.35618267701930156, - "learning_rate": 7.0592232339965664e-06, - "loss": 0.3178, + "epoch": 0.49, + "grad_norm": 0.36824464406337265, + "learning_rate": 1.0955273177560347e-05, + "loss": 0.3096, "step": 10568 }, { - "epoch": 0.61, - "grad_norm": 0.3768634547209685, - "learning_rate": 7.057444659045627e-06, - "loss": 0.2148, + "epoch": 0.49, + "grad_norm": 0.3660790486324102, + "learning_rate": 1.095379205230468e-05, + "loss": 0.2971, "step": 10569 }, { - "epoch": 0.61, - "grad_norm": 0.25590767906812234, - "learning_rate": 7.055666185995256e-06, - "loss": 0.2129, + "epoch": 0.49, + "grad_norm": 0.5826729897583479, + "learning_rate": 1.0952310905932982e-05, + "loss": 0.2965, "step": 10570 }, { - "epoch": 0.61, - "grad_norm": 0.5079144727780354, - "learning_rate": 7.053887814907036e-06, - "loss": 0.3775, + "epoch": 0.49, + "grad_norm": 0.43161789614674323, + "learning_rate": 1.0950829738478034e-05, + "loss": 0.3266, "step": 10571 }, { - "epoch": 0.61, - "grad_norm": 0.47995334935170597, - "learning_rate": 7.0521095458425555e-06, - "loss": 0.3491, + "epoch": 0.49, + "grad_norm": 0.24840161677097314, + "learning_rate": 1.0949348549972635e-05, + "loss": 0.1779, "step": 10572 }, { - "epoch": 0.61, - "grad_norm": 0.3018031483350368, - "learning_rate": 7.050331378863395e-06, - "loss": 0.1862, + "epoch": 0.49, + "grad_norm": 0.7086796526919262, + "learning_rate": 1.0947867340449572e-05, + "loss": 0.425, "step": 10573 }, { - "epoch": 0.61, - "grad_norm": 0.7799879504359073, - "learning_rate": 7.048553314031132e-06, - "loss": 0.4413, + "epoch": 0.49, + "grad_norm": 0.3755966136282917, + "learning_rate": 1.094638610994164e-05, + "loss": 0.3242, "step": 10574 }, { - "epoch": 0.61, - "grad_norm": 0.34979493541325096, - "learning_rate": 7.04677535140734e-06, - "loss": 0.2612, + "epoch": 0.49, + "grad_norm": 0.8603995071148285, + "learning_rate": 1.0944904858481636e-05, + "loss": 0.3633, "step": 10575 }, { - "epoch": 0.61, - "grad_norm": 0.2285568510508859, - "learning_rate": 7.0449974910535916e-06, - "loss": 0.1851, + "epoch": 0.49, + "grad_norm": 0.38640523064264576, + "learning_rate": 1.0943423586102343e-05, + "loss": 0.3018, "step": 10576 }, { - "epoch": 0.61, - "grad_norm": 1.0304051087015562, - "learning_rate": 7.043219733031452e-06, - "loss": 0.627, + "epoch": 0.49, + "grad_norm": 0.3417474325186601, + "learning_rate": 1.0941942292836562e-05, + "loss": 0.2926, "step": 10577 }, { - "epoch": 0.61, - "grad_norm": 1.0139546657916503, - "learning_rate": 7.041442077402487e-06, - "loss": 0.6937, + "epoch": 0.49, + "grad_norm": 0.2157028748813911, + "learning_rate": 1.0940460978717087e-05, + "loss": 0.1095, "step": 10578 }, { - "epoch": 0.61, - "grad_norm": 0.30342367295446254, - "learning_rate": 7.0396645242282535e-06, - "loss": 0.2022, + "epoch": 0.49, + "grad_norm": 0.4074903364256941, + "learning_rate": 1.0938979643776715e-05, + "loss": 0.3048, "step": 10579 }, { - "epoch": 0.61, - "grad_norm": 0.3986109508000254, - "learning_rate": 7.037887073570313e-06, - "loss": 0.3156, + "epoch": 0.49, + "grad_norm": 0.5207802912047126, + "learning_rate": 1.0937498288048239e-05, + "loss": 0.3688, "step": 10580 }, { - "epoch": 0.61, - "grad_norm": 0.4877704840722932, - "learning_rate": 7.036109725490214e-06, - "loss": 0.2859, + "epoch": 0.49, + "grad_norm": 0.32101012214923214, + "learning_rate": 1.0936016911564451e-05, + "loss": 0.3113, "step": 10581 }, { - "epoch": 0.61, - "grad_norm": 0.30519393626632985, - "learning_rate": 7.03433248004951e-06, - "loss": 0.1873, + "epoch": 0.49, + "grad_norm": 0.36960418388080785, + "learning_rate": 1.0934535514358153e-05, + "loss": 0.1866, "step": 10582 }, { - "epoch": 0.61, - "grad_norm": 0.41057454427454104, - "learning_rate": 7.032555337309743e-06, - "loss": 0.3229, + "epoch": 0.49, + "grad_norm": 0.5318717651146868, + "learning_rate": 1.0933054096462136e-05, + "loss": 0.3415, "step": 10583 }, { - "epoch": 0.61, - "grad_norm": 0.3539347623622069, - "learning_rate": 7.030778297332457e-06, - "loss": 0.3001, + "epoch": 0.49, + "grad_norm": 0.2521014887560583, + "learning_rate": 1.0931572657909207e-05, + "loss": 0.2116, "step": 10584 }, { - "epoch": 0.61, - "grad_norm": 0.3887406352092294, - "learning_rate": 7.0290013601791905e-06, - "loss": 0.2601, + "epoch": 0.49, + "grad_norm": 0.5267893329999455, + "learning_rate": 1.0930091198732152e-05, + "loss": 0.2639, "step": 10585 }, { - "epoch": 0.61, - "grad_norm": 0.47380536198285, - "learning_rate": 7.027224525911479e-06, - "loss": 0.2907, + "epoch": 0.49, + "grad_norm": 0.3785844060009913, + "learning_rate": 1.0928609718963777e-05, + "loss": 0.3229, "step": 10586 }, { - "epoch": 0.61, - "grad_norm": 0.4561919825253446, - "learning_rate": 7.025447794590856e-06, - "loss": 0.3529, + "epoch": 0.49, + "grad_norm": 0.8488818690686816, + "learning_rate": 1.0927128218636875e-05, + "loss": 0.545, "step": 10587 }, { - "epoch": 0.61, - "grad_norm": 0.24317420310642943, - "learning_rate": 7.023671166278845e-06, - "loss": 0.2204, + "epoch": 0.49, + "grad_norm": 0.32838103523500994, + "learning_rate": 1.0925646697784251e-05, + "loss": 0.2502, "step": 10588 }, { - "epoch": 0.61, - "grad_norm": 0.30300578946259576, - "learning_rate": 7.021894641036977e-06, - "loss": 0.1961, + "epoch": 0.49, + "grad_norm": 0.4331605240552264, + "learning_rate": 1.0924165156438697e-05, + "loss": 0.308, "step": 10589 }, { - "epoch": 0.61, - "grad_norm": 0.7928562012811823, - "learning_rate": 7.020118218926767e-06, - "loss": 0.5509, + "epoch": 0.49, + "grad_norm": 0.2857572823818981, + "learning_rate": 1.092268359463302e-05, + "loss": 0.1849, "step": 10590 }, { - "epoch": 0.61, - "grad_norm": 0.31281127151054094, - "learning_rate": 7.018341900009738e-06, - "loss": 0.2595, + "epoch": 0.49, + "grad_norm": 0.6646503165177715, + "learning_rate": 1.0921202012400019e-05, + "loss": 0.2715, "step": 10591 }, { - "epoch": 0.61, - "grad_norm": 0.378034705279621, - "learning_rate": 7.0165656843473965e-06, - "loss": 0.2803, + "epoch": 0.49, + "grad_norm": 0.38230972442953065, + "learning_rate": 1.0919720409772491e-05, + "loss": 0.3174, "step": 10592 }, { - "epoch": 0.61, - "grad_norm": 0.9236198996788825, - "learning_rate": 7.0147895720012596e-06, - "loss": 0.5131, + "epoch": 0.49, + "grad_norm": 0.5585228848785955, + "learning_rate": 1.091823878678324e-05, + "loss": 0.3986, "step": 10593 }, { - "epoch": 0.61, - "grad_norm": 0.24966395258526514, - "learning_rate": 7.01301356303283e-06, - "loss": 0.1917, + "epoch": 0.49, + "grad_norm": 0.848493367272372, + "learning_rate": 1.0916757143465068e-05, + "loss": 0.4971, "step": 10594 }, { - "epoch": 0.61, - "grad_norm": 0.9200006479669353, - "learning_rate": 7.011237657503615e-06, - "loss": 0.5364, + "epoch": 0.49, + "grad_norm": 0.3418417369915704, + "learning_rate": 1.0915275479850777e-05, + "loss": 0.2279, "step": 10595 }, { - "epoch": 0.61, - "grad_norm": 0.2708229012641202, - "learning_rate": 7.009461855475111e-06, - "loss": 0.2601, + "epoch": 0.49, + "grad_norm": 0.2951115298218996, + "learning_rate": 1.0913793795973167e-05, + "loss": 0.2784, "step": 10596 }, { - "epoch": 0.61, - "grad_norm": 0.42431345222551275, - "learning_rate": 7.00768615700881e-06, - "loss": 0.3102, + "epoch": 0.49, + "grad_norm": 0.5572261579387174, + "learning_rate": 1.0912312091865045e-05, + "loss": 0.3834, "step": 10597 }, { - "epoch": 0.61, - "grad_norm": 0.49933535085134173, - "learning_rate": 7.005910562166213e-06, - "loss": 0.3538, + "epoch": 0.49, + "grad_norm": 0.26349140837445534, + "learning_rate": 1.0910830367559212e-05, + "loss": 0.1709, "step": 10598 }, { - "epoch": 0.61, - "grad_norm": 0.32711074269410184, - "learning_rate": 7.004135071008803e-06, - "loss": 0.244, + "epoch": 0.49, + "grad_norm": 1.252730453767162, + "learning_rate": 1.0909348623088472e-05, + "loss": 0.7936, "step": 10599 }, { - "epoch": 0.61, - "grad_norm": 0.3911638518785674, - "learning_rate": 7.0023596835980676e-06, - "loss": 0.2621, + "epoch": 0.49, + "grad_norm": 0.3223358149457774, + "learning_rate": 1.090786685848563e-05, + "loss": 0.2738, "step": 10600 }, { - "epoch": 0.61, - "grad_norm": 0.549319709524697, - "learning_rate": 7.000584399995486e-06, - "loss": 0.4431, + "epoch": 0.49, + "grad_norm": 0.39213706291046607, + "learning_rate": 1.090638507378349e-05, + "loss": 0.2496, "step": 10601 }, { - "epoch": 0.61, - "grad_norm": 0.4383392566950631, - "learning_rate": 6.998809220262541e-06, - "loss": 0.2549, + "epoch": 0.49, + "grad_norm": 0.6880017009555858, + "learning_rate": 1.0904903269014856e-05, + "loss": 0.4372, "step": 10602 }, { - "epoch": 0.61, - "grad_norm": 0.39079099029956776, - "learning_rate": 6.997034144460702e-06, - "loss": 0.2778, + "epoch": 0.49, + "grad_norm": 0.3748487966853121, + "learning_rate": 1.090342144421254e-05, + "loss": 0.2548, "step": 10603 }, { - "epoch": 0.61, - "grad_norm": 0.24881046372954682, - "learning_rate": 6.995259172651441e-06, - "loss": 0.2452, + "epoch": 0.49, + "grad_norm": 0.2973458648932974, + "learning_rate": 1.0901939599409343e-05, + "loss": 0.2278, "step": 10604 }, { - "epoch": 0.61, - "grad_norm": 1.0911990822931978, - "learning_rate": 6.993484304896225e-06, - "loss": 0.2585, + "epoch": 0.49, + "grad_norm": 0.37848135021478935, + "learning_rate": 1.0900457734638074e-05, + "loss": 0.3283, "step": 10605 }, { - "epoch": 0.61, - "grad_norm": 0.3859135519020631, - "learning_rate": 6.991709541256517e-06, - "loss": 0.271, + "epoch": 0.49, + "grad_norm": 1.6364873597114724, + "learning_rate": 1.0898975849931535e-05, + "loss": 0.8297, "step": 10606 }, { - "epoch": 0.61, - "grad_norm": 0.38547240651856923, - "learning_rate": 6.98993488179378e-06, - "loss": 0.328, + "epoch": 0.49, + "grad_norm": 0.3610541667210801, + "learning_rate": 1.089749394532254e-05, + "loss": 0.2947, "step": 10607 }, { - "epoch": 0.61, - "grad_norm": 0.496427375438408, - "learning_rate": 6.988160326569471e-06, - "loss": 0.2881, + "epoch": 0.49, + "grad_norm": 0.4185592943048797, + "learning_rate": 1.0896012020843892e-05, + "loss": 0.2823, "step": 10608 }, { - "epoch": 0.61, - "grad_norm": 0.2807034932115483, - "learning_rate": 6.986385875645036e-06, - "loss": 0.2477, + "epoch": 0.49, + "grad_norm": 0.7135207282144652, + "learning_rate": 1.0894530076528404e-05, + "loss": 0.4283, "step": 10609 }, { - "epoch": 0.61, - "grad_norm": 0.36666726706200375, - "learning_rate": 6.984611529081931e-06, - "loss": 0.2671, + "epoch": 0.49, + "grad_norm": 0.3628564157355788, + "learning_rate": 1.0893048112408882e-05, + "loss": 0.2889, "step": 10610 }, { - "epoch": 0.61, - "grad_norm": 0.3784004456555454, - "learning_rate": 6.982837286941598e-06, - "loss": 0.2992, + "epoch": 0.49, + "grad_norm": 0.331634254493673, + "learning_rate": 1.0891566128518133e-05, + "loss": 0.1571, "step": 10611 }, { - "epoch": 0.61, - "grad_norm": 0.2947344402549206, - "learning_rate": 6.981063149285481e-06, - "loss": 0.1862, + "epoch": 0.49, + "grad_norm": 0.3060658979703778, + "learning_rate": 1.0890084124888971e-05, + "loss": 0.2638, "step": 10612 }, { - "epoch": 0.61, - "grad_norm": 1.2011332586742978, - "learning_rate": 6.979289116175014e-06, - "loss": 0.7797, + "epoch": 0.49, + "grad_norm": 0.395272086306695, + "learning_rate": 1.0888602101554202e-05, + "loss": 0.2862, "step": 10613 }, { - "epoch": 0.61, - "grad_norm": 0.6269447977839719, - "learning_rate": 6.977515187671639e-06, - "loss": 0.3863, + "epoch": 0.49, + "grad_norm": 0.9437320732050818, + "learning_rate": 1.088712005854664e-05, + "loss": 0.3437, "step": 10614 }, { - "epoch": 0.61, - "grad_norm": 0.28525954215061056, - "learning_rate": 6.975741363836781e-06, - "loss": 0.2034, + "epoch": 0.49, + "grad_norm": 0.8433383927800244, + "learning_rate": 1.0885637995899099e-05, + "loss": 0.4365, "step": 10615 }, { - "epoch": 0.61, - "grad_norm": 0.23251089488001686, - "learning_rate": 6.973967644731872e-06, - "loss": 0.2167, + "epoch": 0.49, + "grad_norm": 0.39863194323674406, + "learning_rate": 1.0884155913644382e-05, + "loss": 0.3032, "step": 10616 }, { - "epoch": 0.61, - "grad_norm": 1.136268898483301, - "learning_rate": 6.972194030418329e-06, - "loss": 0.4865, + "epoch": 0.49, + "grad_norm": 0.2690947126743883, + "learning_rate": 1.0882673811815306e-05, + "loss": 0.2192, "step": 10617 }, { - "epoch": 0.61, - "grad_norm": 0.35214424867594674, - "learning_rate": 6.97042052095758e-06, - "loss": 0.1561, + "epoch": 0.49, + "grad_norm": 0.35836881143509813, + "learning_rate": 1.0881191690444684e-05, + "loss": 0.2256, "step": 10618 }, { - "epoch": 0.61, - "grad_norm": 0.3186430841898647, - "learning_rate": 6.968647116411036e-06, - "loss": 0.2863, + "epoch": 0.49, + "grad_norm": 0.4346163685143305, + "learning_rate": 1.0879709549565323e-05, + "loss": 0.2979, "step": 10619 }, { - "epoch": 0.61, - "grad_norm": 0.5067307018719389, - "learning_rate": 6.966873816840114e-06, - "loss": 0.3595, + "epoch": 0.49, + "grad_norm": 0.6881563396993722, + "learning_rate": 1.0878227389210046e-05, + "loss": 0.3289, "step": 10620 }, { - "epoch": 0.61, - "grad_norm": 0.33263369625653055, - "learning_rate": 6.96510062230622e-06, - "loss": 0.1733, + "epoch": 0.49, + "grad_norm": 0.8161821407890882, + "learning_rate": 1.087674520941166e-05, + "loss": 0.2802, "step": 10621 }, { - "epoch": 0.61, - "grad_norm": 0.23286758847398575, - "learning_rate": 6.963327532870763e-06, - "loss": 0.1742, + "epoch": 0.49, + "grad_norm": 0.4021588127220264, + "learning_rate": 1.0875263010202977e-05, + "loss": 0.3001, "step": 10622 }, { - "epoch": 0.61, - "grad_norm": 0.4730282585689458, - "learning_rate": 6.961554548595142e-06, - "loss": 0.326, + "epoch": 0.49, + "grad_norm": 0.4706051959295663, + "learning_rate": 1.0873780791616816e-05, + "loss": 0.338, "step": 10623 }, { - "epoch": 0.61, - "grad_norm": 0.3619899467460046, - "learning_rate": 6.959781669540754e-06, - "loss": 0.2461, + "epoch": 0.49, + "grad_norm": 0.32423140455329363, + "learning_rate": 1.0872298553685988e-05, + "loss": 0.2271, "step": 10624 }, { - "epoch": 0.61, - "grad_norm": 0.3841805210107079, - "learning_rate": 6.958008895769e-06, - "loss": 0.2792, + "epoch": 0.49, + "grad_norm": 0.42833020936734045, + "learning_rate": 1.0870816296443317e-05, + "loss": 0.2751, "step": 10625 }, { - "epoch": 0.61, - "grad_norm": 0.7631851504994283, - "learning_rate": 6.956236227341262e-06, - "loss": 0.5202, + "epoch": 0.49, + "grad_norm": 0.8629160951960159, + "learning_rate": 1.0869334019921608e-05, + "loss": 0.3963, "step": 10626 }, { - "epoch": 0.61, - "grad_norm": 0.2999124611334779, - "learning_rate": 6.954463664318937e-06, - "loss": 0.2546, + "epoch": 0.49, + "grad_norm": 0.4310006714810583, + "learning_rate": 1.0867851724153683e-05, + "loss": 0.237, "step": 10627 }, { - "epoch": 0.61, - "grad_norm": 0.24820162596488865, - "learning_rate": 6.952691206763402e-06, - "loss": 0.1693, + "epoch": 0.49, + "grad_norm": 0.3093428330672653, + "learning_rate": 1.0866369409172357e-05, + "loss": 0.2499, "step": 10628 }, { - "epoch": 0.61, - "grad_norm": 1.2347189617039587, - "learning_rate": 6.950918854736041e-06, - "loss": 0.5083, + "epoch": 0.49, + "grad_norm": 1.4060090308091724, + "learning_rate": 1.0864887075010447e-05, + "loss": 0.905, "step": 10629 }, { - "epoch": 0.61, - "grad_norm": 0.5522730367103457, - "learning_rate": 6.949146608298227e-06, - "loss": 0.3568, + "epoch": 0.49, + "grad_norm": 0.3563798835613191, + "learning_rate": 1.086340472170077e-05, + "loss": 0.1859, "step": 10630 }, { - "epoch": 0.61, - "grad_norm": 0.3104954162518697, - "learning_rate": 6.947374467511336e-06, - "loss": 0.2486, + "epoch": 0.49, + "grad_norm": 0.34326311197639064, + "learning_rate": 1.0861922349276147e-05, + "loss": 0.2675, "step": 10631 }, { - "epoch": 0.61, - "grad_norm": 0.4520113873626869, - "learning_rate": 6.945602432436736e-06, - "loss": 0.3982, + "epoch": 0.49, + "grad_norm": 0.4150440829291692, + "learning_rate": 1.0860439957769392e-05, + "loss": 0.3054, "step": 10632 }, { - "epoch": 0.61, - "grad_norm": 0.4093916598564017, - "learning_rate": 6.9438305031357935e-06, - "loss": 0.2927, + "epoch": 0.49, + "grad_norm": 0.9535068235207886, + "learning_rate": 1.0858957547213326e-05, + "loss": 0.5629, "step": 10633 }, { - "epoch": 0.61, - "grad_norm": 0.3338504922517524, - "learning_rate": 6.9420586796698655e-06, - "loss": 0.219, + "epoch": 0.49, + "grad_norm": 0.35170511408267097, + "learning_rate": 1.0857475117640766e-05, + "loss": 0.2005, "step": 10634 }, { - "epoch": 0.61, - "grad_norm": 0.25665018793378996, - "learning_rate": 6.940286962100318e-06, - "loss": 0.1841, + "epoch": 0.49, + "grad_norm": 1.2420079671525877, + "learning_rate": 1.0855992669084536e-05, + "loss": 0.7487, "step": 10635 }, { - "epoch": 0.61, - "grad_norm": 0.6598078007493531, - "learning_rate": 6.938515350488503e-06, - "loss": 0.4409, + "epoch": 0.49, + "grad_norm": 0.3090264168405756, + "learning_rate": 1.0854510201577451e-05, + "loss": 0.2635, "step": 10636 }, { - "epoch": 0.61, - "grad_norm": 0.3300845858986062, - "learning_rate": 6.936743844895768e-06, - "loss": 0.2797, + "epoch": 0.49, + "grad_norm": 0.3800803067633803, + "learning_rate": 1.0853027715152336e-05, + "loss": 0.2008, "step": 10637 }, { - "epoch": 0.61, - "grad_norm": 0.4680762857712185, - "learning_rate": 6.934972445383459e-06, - "loss": 0.2779, + "epoch": 0.49, + "grad_norm": 0.46630868993904695, + "learning_rate": 1.0851545209842009e-05, + "loss": 0.2984, "step": 10638 }, { - "epoch": 0.61, - "grad_norm": 0.4200145663298372, - "learning_rate": 6.933201152012925e-06, - "loss": 0.3078, + "epoch": 0.49, + "grad_norm": 0.398957963454377, + "learning_rate": 1.0850062685679292e-05, + "loss": 0.3159, "step": 10639 }, { - "epoch": 0.61, - "grad_norm": 0.27962340097798727, - "learning_rate": 6.931429964845501e-06, - "loss": 0.2241, + "epoch": 0.49, + "grad_norm": 0.3430221090492787, + "learning_rate": 1.0848580142697006e-05, + "loss": 0.1869, "step": 10640 }, { - "epoch": 0.61, - "grad_norm": 0.53746389745485, - "learning_rate": 6.929658883942527e-06, - "loss": 0.1577, + "epoch": 0.49, + "grad_norm": 1.09828167344895, + "learning_rate": 1.0847097580927974e-05, + "loss": 0.6335, "step": 10641 }, { - "epoch": 0.61, - "grad_norm": 0.4959989835213774, - "learning_rate": 6.927887909365333e-06, - "loss": 0.3364, + "epoch": 0.49, + "grad_norm": 0.6372480909358598, + "learning_rate": 1.0845615000405018e-05, + "loss": 0.4535, "step": 10642 }, { - "epoch": 0.61, - "grad_norm": 0.29723094294413305, - "learning_rate": 6.92611704117525e-06, - "loss": 0.2759, + "epoch": 0.49, + "grad_norm": 0.32720255286140965, + "learning_rate": 1.0844132401160958e-05, + "loss": 0.2762, "step": 10643 }, { - "epoch": 0.61, - "grad_norm": 0.8817731319299101, - "learning_rate": 6.924346279433599e-06, - "loss": 0.4836, + "epoch": 0.49, + "grad_norm": 0.2757530633703458, + "learning_rate": 1.0842649783228624e-05, + "loss": 0.1752, "step": 10644 }, { - "epoch": 0.61, - "grad_norm": 0.603693846570787, - "learning_rate": 6.922575624201706e-06, - "loss": 0.379, + "epoch": 0.49, + "grad_norm": 1.5266151392590883, + "learning_rate": 1.0841167146640834e-05, + "loss": 0.6542, "step": 10645 }, { - "epoch": 0.61, - "grad_norm": 0.31016181140642807, - "learning_rate": 6.920805075540886e-06, - "loss": 0.2332, + "epoch": 0.49, + "grad_norm": 0.392955445253096, + "learning_rate": 1.0839684491430415e-05, + "loss": 0.2982, "step": 10646 }, { - "epoch": 0.61, - "grad_norm": 0.27221079855815977, - "learning_rate": 6.919034633512456e-06, - "loss": 0.2433, + "epoch": 0.49, + "grad_norm": 0.7533056761169569, + "learning_rate": 1.0838201817630189e-05, + "loss": 0.356, "step": 10647 }, { - "epoch": 0.61, - "grad_norm": 0.6010295063712859, - "learning_rate": 6.917264298177724e-06, - "loss": 0.267, + "epoch": 0.49, + "grad_norm": 0.3749672107369942, + "learning_rate": 1.0836719125272986e-05, + "loss": 0.3253, "step": 10648 }, { - "epoch": 0.61, - "grad_norm": 0.4000427804407078, - "learning_rate": 6.915494069597993e-06, - "loss": 0.3225, + "epoch": 0.49, + "grad_norm": 0.39622495268889213, + "learning_rate": 1.0835236414391622e-05, + "loss": 0.256, "step": 10649 }, { - "epoch": 0.61, - "grad_norm": 1.0407707069428398, - "learning_rate": 6.913723947834574e-06, - "loss": 0.6438, + "epoch": 0.49, + "grad_norm": 0.19048305983119787, + "learning_rate": 1.0833753685018935e-05, + "loss": 0.0725, "step": 10650 }, { - "epoch": 0.61, - "grad_norm": 0.2765627603584837, - "learning_rate": 6.9119539329487585e-06, - "loss": 0.2274, + "epoch": 0.49, + "grad_norm": 0.38142334606071937, + "learning_rate": 1.0832270937187745e-05, + "loss": 0.3342, "step": 10651 }, { - "epoch": 0.61, - "grad_norm": 0.5224307700746306, - "learning_rate": 6.9101840250018485e-06, - "loss": 0.3428, + "epoch": 0.49, + "grad_norm": 0.38650519873211114, + "learning_rate": 1.0830788170930876e-05, + "loss": 0.2578, "step": 10652 }, { - "epoch": 0.61, - "grad_norm": 0.35898777209765914, - "learning_rate": 6.908414224055129e-06, - "loss": 0.2134, + "epoch": 0.49, + "grad_norm": 0.5293537690764069, + "learning_rate": 1.0829305386281158e-05, + "loss": 0.296, "step": 10653 }, { - "epoch": 0.61, - "grad_norm": 0.7316390060143079, - "learning_rate": 6.906644530169896e-06, - "loss": 0.2693, + "epoch": 0.49, + "grad_norm": 0.8112702564889863, + "learning_rate": 1.0827822583271418e-05, + "loss": 0.4097, "step": 10654 }, { - "epoch": 0.61, - "grad_norm": 0.2562741289721085, - "learning_rate": 6.904874943407427e-06, - "loss": 0.2569, + "epoch": 0.49, + "grad_norm": 0.44259759590065606, + "learning_rate": 1.0826339761934483e-05, + "loss": 0.2916, "step": 10655 }, { - "epoch": 0.61, - "grad_norm": 1.057082723143132, - "learning_rate": 6.903105463829007e-06, - "loss": 0.7312, + "epoch": 0.49, + "grad_norm": 0.28259821921716455, + "learning_rate": 1.0824856922303183e-05, + "loss": 0.2435, "step": 10656 }, { - "epoch": 0.61, - "grad_norm": 0.6832390986635581, - "learning_rate": 6.901336091495912e-06, - "loss": 0.159, + "epoch": 0.49, + "grad_norm": 0.3155867563215487, + "learning_rate": 1.0823374064410348e-05, + "loss": 0.1561, "step": 10657 }, { - "epoch": 0.61, - "grad_norm": 0.2674851553546089, - "learning_rate": 6.899566826469415e-06, - "loss": 0.2218, + "epoch": 0.49, + "grad_norm": 0.42516065761004934, + "learning_rate": 1.0821891188288803e-05, + "loss": 0.3111, "step": 10658 }, { - "epoch": 0.61, - "grad_norm": 0.3579968138322811, - "learning_rate": 6.897797668810784e-06, - "loss": 0.2911, + "epoch": 0.49, + "grad_norm": 0.6337045826593662, + "learning_rate": 1.082040829397138e-05, + "loss": 0.397, "step": 10659 }, { - "epoch": 0.61, - "grad_norm": 0.5464163869475679, - "learning_rate": 6.896028618581287e-06, - "loss": 0.3132, + "epoch": 0.49, + "grad_norm": 0.42032059547767175, + "learning_rate": 1.0818925381490904e-05, + "loss": 0.2626, "step": 10660 }, { - "epoch": 0.61, - "grad_norm": 0.3000321311735532, - "learning_rate": 6.894259675842188e-06, - "loss": 0.2106, + "epoch": 0.49, + "grad_norm": 0.40809108791157067, + "learning_rate": 1.081744245088021e-05, + "loss": 0.247, "step": 10661 }, { - "epoch": 0.61, - "grad_norm": 1.2299271449853884, - "learning_rate": 6.892490840654739e-06, - "loss": 0.8311, + "epoch": 0.49, + "grad_norm": 0.2482786630310706, + "learning_rate": 1.0815959502172133e-05, + "loss": 0.1908, "step": 10662 }, { - "epoch": 0.61, - "grad_norm": 0.30179860641260736, - "learning_rate": 6.890722113080201e-06, - "loss": 0.2821, + "epoch": 0.49, + "grad_norm": 0.4565851764908628, + "learning_rate": 1.0814476535399496e-05, + "loss": 0.2775, "step": 10663 }, { - "epoch": 0.61, - "grad_norm": 0.34845039622272467, - "learning_rate": 6.888953493179819e-06, - "loss": 0.2284, + "epoch": 0.49, + "grad_norm": 0.2860235106911526, + "learning_rate": 1.0812993550595131e-05, + "loss": 0.2477, "step": 10664 }, { - "epoch": 0.61, - "grad_norm": 0.5995945108100336, - "learning_rate": 6.88718498101485e-06, - "loss": 0.3831, + "epoch": 0.49, + "grad_norm": 0.7484367457429841, + "learning_rate": 1.0811510547791878e-05, + "loss": 0.4959, "step": 10665 }, { - "epoch": 0.61, - "grad_norm": 0.25283872062952506, - "learning_rate": 6.885416576646525e-06, - "loss": 0.1791, + "epoch": 0.49, + "grad_norm": 0.8290799323261989, + "learning_rate": 1.081002752702256e-05, + "loss": 0.3549, "step": 10666 }, { - "epoch": 0.61, - "grad_norm": 0.2785774684194344, - "learning_rate": 6.883648280136094e-06, - "loss": 0.223, + "epoch": 0.49, + "grad_norm": 0.378170469503295, + "learning_rate": 1.0808544488320014e-05, + "loss": 0.2898, "step": 10667 }, { - "epoch": 0.61, - "grad_norm": 1.1132804366543598, - "learning_rate": 6.881880091544786e-06, - "loss": 0.7453, + "epoch": 0.49, + "grad_norm": 0.41989594270813685, + "learning_rate": 1.0807061431717072e-05, + "loss": 0.3192, "step": 10668 }, { - "epoch": 0.61, - "grad_norm": 0.8623000767888052, - "learning_rate": 6.880112010933839e-06, - "loss": 0.5088, + "epoch": 0.49, + "grad_norm": 0.29147581458370114, + "learning_rate": 1.080557835724657e-05, + "loss": 0.2136, "step": 10669 }, { - "epoch": 0.61, - "grad_norm": 0.3251227489998159, - "learning_rate": 6.878344038364481e-06, - "loss": 0.2274, + "epoch": 0.49, + "grad_norm": 0.3513360284895416, + "learning_rate": 1.0804095264941338e-05, + "loss": 0.2122, "step": 10670 }, { - "epoch": 0.61, - "grad_norm": 0.36645085483371054, - "learning_rate": 6.8765761738979305e-06, - "loss": 0.3253, + "epoch": 0.49, + "grad_norm": 0.47939196941232093, + "learning_rate": 1.0802612154834211e-05, + "loss": 0.386, "step": 10671 }, { - "epoch": 0.61, - "grad_norm": 0.26849276242470155, - "learning_rate": 6.874808417595415e-06, - "loss": 0.1771, + "epoch": 0.49, + "grad_norm": 0.41302445464470483, + "learning_rate": 1.0801129026958025e-05, + "loss": 0.3437, "step": 10672 }, { - "epoch": 0.61, - "grad_norm": 0.32862638825590396, - "learning_rate": 6.87304076951815e-06, - "loss": 0.2591, + "epoch": 0.49, + "grad_norm": 0.40562839485155766, + "learning_rate": 1.0799645881345612e-05, + "loss": 0.1666, "step": 10673 }, { - "epoch": 0.61, - "grad_norm": 0.4298652986509392, - "learning_rate": 6.871273229727346e-06, - "loss": 0.2987, + "epoch": 0.49, + "grad_norm": 0.3118309109725975, + "learning_rate": 1.0798162718029816e-05, + "loss": 0.2538, "step": 10674 }, { - "epoch": 0.61, - "grad_norm": 0.5045099892284276, - "learning_rate": 6.869505798284217e-06, - "loss": 0.3762, + "epoch": 0.49, + "grad_norm": 0.29334225964249144, + "learning_rate": 1.0796679537043461e-05, + "loss": 0.261, "step": 10675 }, { - "epoch": 0.61, - "grad_norm": 0.32738198225492715, - "learning_rate": 6.867738475249967e-06, - "loss": 0.28, + "epoch": 0.49, + "grad_norm": 0.3526373695136828, + "learning_rate": 1.0795196338419392e-05, + "loss": 0.2103, "step": 10676 }, { - "epoch": 0.61, - "grad_norm": 0.6053803182315074, - "learning_rate": 6.8659712606858e-06, - "loss": 0.292, + "epoch": 0.49, + "grad_norm": 0.5149197368860986, + "learning_rate": 1.0793713122190439e-05, + "loss": 0.354, "step": 10677 }, { - "epoch": 0.61, - "grad_norm": 0.2784146187816311, - "learning_rate": 6.8642041546529115e-06, - "loss": 0.2397, + "epoch": 0.49, + "grad_norm": 1.2464857242689833, + "learning_rate": 1.0792229888389447e-05, + "loss": 0.6495, "step": 10678 }, { - "epoch": 0.61, - "grad_norm": 0.25915374026684634, - "learning_rate": 6.8624371572125e-06, - "loss": 0.1908, + "epoch": 0.49, + "grad_norm": 0.3035593289342247, + "learning_rate": 1.0790746637049247e-05, + "loss": 0.2107, "step": 10679 }, { - "epoch": 0.61, - "grad_norm": 1.1034569399626564, - "learning_rate": 6.860670268425754e-06, - "loss": 0.5397, + "epoch": 0.49, + "grad_norm": 0.28983493967421964, + "learning_rate": 1.0789263368202678e-05, + "loss": 0.2325, "step": 10680 }, { - "epoch": 0.61, - "grad_norm": 0.6580196847714953, - "learning_rate": 6.858903488353863e-06, - "loss": 0.3914, + "epoch": 0.49, + "grad_norm": 0.4467403772660091, + "learning_rate": 1.0787780081882579e-05, + "loss": 0.2994, "step": 10681 }, { - "epoch": 0.61, - "grad_norm": 0.3087985735861544, - "learning_rate": 6.857136817058007e-06, - "loss": 0.2731, + "epoch": 0.49, + "grad_norm": 0.5410587027553821, + "learning_rate": 1.0786296778121787e-05, + "loss": 0.3318, "step": 10682 }, { - "epoch": 0.61, - "grad_norm": 0.30721390171367746, - "learning_rate": 6.855370254599369e-06, - "loss": 0.2289, + "epoch": 0.49, + "grad_norm": 0.3982441070615042, + "learning_rate": 1.0784813456953143e-05, + "loss": 0.2392, "step": 10683 }, { - "epoch": 0.61, - "grad_norm": 0.22683232782377963, - "learning_rate": 6.853603801039124e-06, - "loss": 0.156, + "epoch": 0.49, + "grad_norm": 0.38012384535553245, + "learning_rate": 1.0783330118409488e-05, + "loss": 0.3186, "step": 10684 }, { - "epoch": 0.61, - "grad_norm": 0.3619861963806639, - "learning_rate": 6.8518374564384434e-06, - "loss": 0.2869, + "epoch": 0.49, + "grad_norm": 0.4227901344992665, + "learning_rate": 1.0781846762523658e-05, + "loss": 0.2921, "step": 10685 }, { - "epoch": 0.61, - "grad_norm": 1.2769919962248444, - "learning_rate": 6.850071220858496e-06, - "loss": 0.6362, + "epoch": 0.49, + "grad_norm": 0.522593100103755, + "learning_rate": 1.0780363389328494e-05, + "loss": 0.2567, "step": 10686 }, { - "epoch": 0.61, - "grad_norm": 0.36211924335233864, - "learning_rate": 6.84830509436045e-06, - "loss": 0.2483, + "epoch": 0.49, + "grad_norm": 0.2779076100227694, + "learning_rate": 1.0778879998856836e-05, + "loss": 0.2309, "step": 10687 }, { - "epoch": 0.61, - "grad_norm": 0.372473918353896, - "learning_rate": 6.846539077005461e-06, - "loss": 0.2733, + "epoch": 0.49, + "grad_norm": 0.44870291121511924, + "learning_rate": 1.0777396591141524e-05, + "loss": 0.3046, "step": 10688 }, { - "epoch": 0.61, - "grad_norm": 0.7227237641851568, - "learning_rate": 6.844773168854686e-06, - "loss": 0.4258, + "epoch": 0.49, + "grad_norm": 0.6995097276285175, + "learning_rate": 1.0775913166215403e-05, + "loss": 0.2816, "step": 10689 }, { - "epoch": 0.61, - "grad_norm": 0.25478526632383197, - "learning_rate": 6.843007369969283e-06, - "loss": 0.1721, + "epoch": 0.49, + "grad_norm": 1.0627708581212783, + "learning_rate": 1.077442972411131e-05, + "loss": 0.6678, "step": 10690 }, { - "epoch": 0.61, - "grad_norm": 0.2592583923802607, - "learning_rate": 6.841241680410398e-06, - "loss": 0.2386, + "epoch": 0.49, + "grad_norm": 0.4472772152760946, + "learning_rate": 1.0772946264862092e-05, + "loss": 0.3219, "step": 10691 }, { - "epoch": 0.61, - "grad_norm": 0.417897430422118, - "learning_rate": 6.83947610023918e-06, - "loss": 0.3193, + "epoch": 0.49, + "grad_norm": 0.3130977368733739, + "learning_rate": 1.0771462788500588e-05, + "loss": 0.2171, "step": 10692 }, { - "epoch": 0.61, - "grad_norm": 0.8573806813691548, - "learning_rate": 6.837710629516765e-06, - "loss": 0.3226, + "epoch": 0.49, + "grad_norm": 0.4558020702968821, + "learning_rate": 1.0769979295059642e-05, + "loss": 0.3173, "step": 10693 }, { - "epoch": 0.61, - "grad_norm": 0.32550275471629125, - "learning_rate": 6.835945268304298e-06, - "loss": 0.2556, + "epoch": 0.49, + "grad_norm": 0.35555044999252905, + "learning_rate": 1.0768495784572092e-05, + "loss": 0.2088, "step": 10694 }, { - "epoch": 0.61, - "grad_norm": 0.3535437138265614, - "learning_rate": 6.834180016662908e-06, - "loss": 0.3155, + "epoch": 0.49, + "grad_norm": 0.34514699827686085, + "learning_rate": 1.0767012257070793e-05, + "loss": 0.2802, "step": 10695 }, { - "epoch": 0.61, - "grad_norm": 0.9949706011291491, - "learning_rate": 6.8324148746537286e-06, - "loss": 0.3106, + "epoch": 0.49, + "grad_norm": 0.47043578081251564, + "learning_rate": 1.0765528712588575e-05, + "loss": 0.2901, "step": 10696 }, { - "epoch": 0.61, - "grad_norm": 0.31547765324720395, - "learning_rate": 6.830649842337885e-06, - "loss": 0.2539, + "epoch": 0.49, + "grad_norm": 0.5529885611359966, + "learning_rate": 1.0764045151158293e-05, + "loss": 0.3779, "step": 10697 }, { - "epoch": 0.61, - "grad_norm": 1.0524343916133838, - "learning_rate": 6.828884919776504e-06, - "loss": 0.5325, + "epoch": 0.49, + "grad_norm": 0.409959459238842, + "learning_rate": 1.0762561572812789e-05, + "loss": 0.323, "step": 10698 }, { - "epoch": 0.61, - "grad_norm": 0.44576274356069756, - "learning_rate": 6.827120107030698e-06, - "loss": 0.3259, + "epoch": 0.49, + "grad_norm": 0.3247614878719977, + "learning_rate": 1.0761077977584905e-05, + "loss": 0.2693, "step": 10699 }, { - "epoch": 0.61, - "grad_norm": 0.22639385024816566, - "learning_rate": 6.82535540416159e-06, - "loss": 0.1372, + "epoch": 0.49, + "grad_norm": 0.3302206070332327, + "learning_rate": 1.0759594365507491e-05, + "loss": 0.2424, "step": 10700 }, { - "epoch": 0.61, - "grad_norm": 1.1446799361802587, - "learning_rate": 6.823590811230287e-06, - "loss": 0.6339, + "epoch": 0.49, + "grad_norm": 0.4050594115569477, + "learning_rate": 1.0758110736613385e-05, + "loss": 0.321, "step": 10701 }, { - "epoch": 0.61, - "grad_norm": 0.3551463442681128, - "learning_rate": 6.821826328297896e-06, - "loss": 0.3097, + "epoch": 0.49, + "grad_norm": 0.3178704082220715, + "learning_rate": 1.0756627090935441e-05, + "loss": 0.1467, "step": 10702 }, { - "epoch": 0.61, - "grad_norm": 0.34226727600361334, - "learning_rate": 6.820061955425527e-06, - "loss": 0.1847, + "epoch": 0.49, + "grad_norm": 0.33370061945729507, + "learning_rate": 1.07551434285065e-05, + "loss": 0.2925, "step": 10703 }, { - "epoch": 0.61, - "grad_norm": 0.5462335453113937, - "learning_rate": 6.818297692674273e-06, - "loss": 0.3826, + "epoch": 0.49, + "grad_norm": 0.39123781820110953, + "learning_rate": 1.0753659749359416e-05, + "loss": 0.2855, "step": 10704 }, { - "epoch": 0.62, - "grad_norm": 0.41512218730788686, - "learning_rate": 6.81653354010523e-06, - "loss": 0.2327, + "epoch": 0.49, + "grad_norm": 0.5971650476848853, + "learning_rate": 1.0752176053527025e-05, + "loss": 0.3404, "step": 10705 }, { - "epoch": 0.62, - "grad_norm": 0.2849103841623909, - "learning_rate": 6.8147694977794975e-06, - "loss": 0.1901, + "epoch": 0.49, + "grad_norm": 0.40610297438547477, + "learning_rate": 1.0750692341042187e-05, + "loss": 0.2835, "step": 10706 }, { - "epoch": 0.62, - "grad_norm": 0.35804096430942806, - "learning_rate": 6.813005565758158e-06, - "loss": 0.3045, + "epoch": 0.49, + "grad_norm": 0.4171271164122111, + "learning_rate": 1.0749208611937739e-05, + "loss": 0.3402, "step": 10707 }, { - "epoch": 0.62, - "grad_norm": 0.959727916273778, - "learning_rate": 6.8112417441022995e-06, - "loss": 0.5161, + "epoch": 0.49, + "grad_norm": 0.2526297188239567, + "learning_rate": 1.0747724866246539e-05, + "loss": 0.2199, "step": 10708 }, { - "epoch": 0.62, - "grad_norm": 0.41141512882499015, - "learning_rate": 6.809478032873002e-06, - "loss": 0.3103, + "epoch": 0.49, + "grad_norm": 0.5764065034152747, + "learning_rate": 1.0746241104001429e-05, + "loss": 0.2342, "step": 10709 }, { - "epoch": 0.62, - "grad_norm": 0.28673660446727955, - "learning_rate": 6.807714432131343e-06, - "loss": 0.2599, + "epoch": 0.49, + "grad_norm": 0.41101913639268867, + "learning_rate": 1.074475732523526e-05, + "loss": 0.2788, "step": 10710 }, { - "epoch": 0.62, - "grad_norm": 0.6706818173645254, - "learning_rate": 6.805950941938395e-06, - "loss": 0.4362, + "epoch": 0.49, + "grad_norm": 0.3631026062093693, + "learning_rate": 1.074327352998088e-05, + "loss": 0.34, "step": 10711 }, { - "epoch": 0.62, - "grad_norm": 0.2853954047662548, - "learning_rate": 6.804187562355231e-06, - "loss": 0.2041, + "epoch": 0.49, + "grad_norm": 0.47791827249084246, + "learning_rate": 1.0741789718271143e-05, + "loss": 0.1421, "step": 10712 }, { - "epoch": 0.62, - "grad_norm": 0.3040463174783864, - "learning_rate": 6.802424293442914e-06, - "loss": 0.1679, + "epoch": 0.49, + "grad_norm": 0.38180976741596595, + "learning_rate": 1.0740305890138896e-05, + "loss": 0.3219, "step": 10713 }, { - "epoch": 0.62, - "grad_norm": 0.3824192547178403, - "learning_rate": 6.800661135262505e-06, - "loss": 0.3014, + "epoch": 0.49, + "grad_norm": 0.3132540773234309, + "learning_rate": 1.073882204561699e-05, + "loss": 0.1504, "step": 10714 }, { - "epoch": 0.62, - "grad_norm": 0.3310073163699963, - "learning_rate": 6.7988980878750636e-06, - "loss": 0.2803, + "epoch": 0.49, + "grad_norm": 0.31882027602109486, + "learning_rate": 1.0737338184738277e-05, + "loss": 0.2391, "step": 10715 }, { - "epoch": 0.62, - "grad_norm": 0.900982088435711, - "learning_rate": 6.797135151341643e-06, - "loss": 0.3219, + "epoch": 0.49, + "grad_norm": 0.3401453663074988, + "learning_rate": 1.0735854307535607e-05, + "loss": 0.2905, "step": 10716 }, { - "epoch": 0.62, - "grad_norm": 0.42156896906947466, - "learning_rate": 6.7953723257232955e-06, - "loss": 0.3128, + "epoch": 0.49, + "grad_norm": 1.0921216325888152, + "learning_rate": 1.073437041404183e-05, + "loss": 0.5041, "step": 10717 }, { - "epoch": 0.62, - "grad_norm": 0.2688101831184305, - "learning_rate": 6.793609611081064e-06, - "loss": 0.2541, + "epoch": 0.49, + "grad_norm": 0.45499948096842907, + "learning_rate": 1.0732886504289802e-05, + "loss": 0.214, "step": 10718 }, { - "epoch": 0.62, - "grad_norm": 0.3140732036560362, - "learning_rate": 6.791847007475998e-06, - "loss": 0.1823, + "epoch": 0.49, + "grad_norm": 0.3454641512324536, + "learning_rate": 1.073140257831237e-05, + "loss": 0.2725, "step": 10719 }, { - "epoch": 0.62, - "grad_norm": 1.0008202280028524, - "learning_rate": 6.7900845149691285e-06, - "loss": 0.631, + "epoch": 0.49, + "grad_norm": 0.3631696473573739, + "learning_rate": 1.0729918636142392e-05, + "loss": 0.2737, "step": 10720 }, { - "epoch": 0.62, - "grad_norm": 0.42576299687385705, - "learning_rate": 6.7883221336214965e-06, - "loss": 0.2948, + "epoch": 0.49, + "grad_norm": 0.4237599613033983, + "learning_rate": 1.0728434677812722e-05, + "loss": 0.2661, "step": 10721 }, { - "epoch": 0.62, - "grad_norm": 0.42623039582871874, - "learning_rate": 6.7865598634941295e-06, - "loss": 0.3172, + "epoch": 0.49, + "grad_norm": 0.3376005097313622, + "learning_rate": 1.0726950703356204e-05, + "loss": 0.2203, "step": 10722 }, { - "epoch": 0.62, - "grad_norm": 0.45379008885941363, - "learning_rate": 6.784797704648058e-06, - "loss": 0.2807, + "epoch": 0.49, + "grad_norm": 0.38073067840459335, + "learning_rate": 1.0725466712805704e-05, + "loss": 0.3284, "step": 10723 }, { - "epoch": 0.62, - "grad_norm": 0.3814091792954231, - "learning_rate": 6.7830356571443016e-06, - "loss": 0.2453, + "epoch": 0.49, + "grad_norm": 0.7746230814307562, + "learning_rate": 1.0723982706194065e-05, + "loss": 0.4933, "step": 10724 }, { - "epoch": 0.62, - "grad_norm": 0.2705471683167067, - "learning_rate": 6.7812737210438836e-06, - "loss": 0.1876, + "epoch": 0.49, + "grad_norm": 0.3373284436144719, + "learning_rate": 1.072249868355415e-05, + "loss": 0.2174, "step": 10725 }, { - "epoch": 0.62, - "grad_norm": 0.3377882881037869, - "learning_rate": 6.77951189640782e-06, - "loss": 0.2592, + "epoch": 0.49, + "grad_norm": 0.29619639994340635, + "learning_rate": 1.072101464491881e-05, + "loss": 0.2367, "step": 10726 }, { - "epoch": 0.62, - "grad_norm": 0.3912153744933908, - "learning_rate": 6.777750183297117e-06, - "loss": 0.26, + "epoch": 0.49, + "grad_norm": 0.37497113708768215, + "learning_rate": 1.0719530590320902e-05, + "loss": 0.3368, "step": 10727 }, { - "epoch": 0.62, - "grad_norm": 0.43342392314247236, - "learning_rate": 6.77598858177279e-06, - "loss": 0.3417, + "epoch": 0.49, + "grad_norm": 0.3441584402297922, + "learning_rate": 1.0718046519793276e-05, + "loss": 0.2483, "step": 10728 }, { - "epoch": 0.62, - "grad_norm": 0.9386708011142606, - "learning_rate": 6.774227091895835e-06, - "loss": 0.3769, + "epoch": 0.49, + "grad_norm": 0.9748468241471883, + "learning_rate": 1.0716562433368796e-05, + "loss": 0.5552, "step": 10729 }, { - "epoch": 0.62, - "grad_norm": 0.3215869104097005, - "learning_rate": 6.772465713727262e-06, - "loss": 0.2572, + "epoch": 0.49, + "grad_norm": 1.19400143789318, + "learning_rate": 1.0715078331080314e-05, + "loss": 0.7843, "step": 10730 }, { - "epoch": 0.62, - "grad_norm": 0.21444487048175678, - "learning_rate": 6.77070444732806e-06, - "loss": 0.1924, + "epoch": 0.49, + "grad_norm": 0.28104220881453945, + "learning_rate": 1.0713594212960684e-05, + "loss": 0.2247, "step": 10731 }, { - "epoch": 0.62, - "grad_norm": 0.7105654768008676, - "learning_rate": 6.768943292759226e-06, - "loss": 0.3145, + "epoch": 0.49, + "grad_norm": 0.782764002798639, + "learning_rate": 1.0712110079042768e-05, + "loss": 0.4602, "step": 10732 }, { - "epoch": 0.62, - "grad_norm": 0.8871355072259255, - "learning_rate": 6.767182250081744e-06, - "loss": 0.2807, + "epoch": 0.49, + "grad_norm": 0.4063728334239336, + "learning_rate": 1.0710625929359422e-05, + "loss": 0.3066, "step": 10733 }, { - "epoch": 0.62, - "grad_norm": 0.36136097231280245, - "learning_rate": 6.765421319356605e-06, - "loss": 0.3122, + "epoch": 0.49, + "grad_norm": 0.2842750633759195, + "learning_rate": 1.0709141763943502e-05, + "loss": 0.2306, "step": 10734 }, { - "epoch": 0.62, - "grad_norm": 0.45744692480747945, - "learning_rate": 6.763660500644783e-06, - "loss": 0.3692, + "epoch": 0.49, + "grad_norm": 0.35823541579209517, + "learning_rate": 1.0707657582827867e-05, + "loss": 0.2647, "step": 10735 }, { - "epoch": 0.62, - "grad_norm": 0.31629793854850224, - "learning_rate": 6.761899794007262e-06, - "loss": 0.2086, + "epoch": 0.49, + "grad_norm": 1.1948685031222255, + "learning_rate": 1.0706173386045373e-05, + "loss": 0.6795, "step": 10736 }, { - "epoch": 0.62, - "grad_norm": 0.25178578119165373, - "learning_rate": 6.760139199505014e-06, - "loss": 0.1469, + "epoch": 0.49, + "grad_norm": 0.37082182938493163, + "learning_rate": 1.0704689173628882e-05, + "loss": 0.2641, "step": 10737 }, { - "epoch": 0.62, - "grad_norm": 0.3273390465451645, - "learning_rate": 6.758378717199004e-06, - "loss": 0.2983, + "epoch": 0.49, + "grad_norm": 0.7566463268668385, + "learning_rate": 1.0703204945611254e-05, + "loss": 0.3018, "step": 10738 }, { - "epoch": 0.62, - "grad_norm": 0.58071175528015, - "learning_rate": 6.756618347150196e-06, - "loss": 0.2238, + "epoch": 0.49, + "grad_norm": 0.32559458942662833, + "learning_rate": 1.0701720702025344e-05, + "loss": 0.3034, "step": 10739 }, { - "epoch": 0.62, - "grad_norm": 0.3900166059812892, - "learning_rate": 6.7548580894195585e-06, - "loss": 0.3206, + "epoch": 0.49, + "grad_norm": 0.4250176516484628, + "learning_rate": 1.0700236442904017e-05, + "loss": 0.3004, "step": 10740 }, { - "epoch": 0.62, - "grad_norm": 2.5014095471100632, - "learning_rate": 6.753097944068043e-06, - "loss": 0.6091, + "epoch": 0.49, + "grad_norm": 0.2636505953531897, + "learning_rate": 1.0698752168280126e-05, + "loss": 0.1302, "step": 10741 }, { - "epoch": 0.62, - "grad_norm": 0.27374873890951196, - "learning_rate": 6.7513379111566105e-06, - "loss": 0.2153, + "epoch": 0.49, + "grad_norm": 0.45078208495904315, + "learning_rate": 1.0697267878186538e-05, + "loss": 0.3505, "step": 10742 }, { - "epoch": 0.62, - "grad_norm": 0.2331727972943417, - "learning_rate": 6.749577990746202e-06, - "loss": 0.2058, + "epoch": 0.49, + "grad_norm": 0.3552488512346412, + "learning_rate": 1.069578357265611e-05, + "loss": 0.2663, "step": 10743 }, { - "epoch": 0.62, - "grad_norm": 0.6106539407307946, - "learning_rate": 6.74781818289777e-06, - "loss": 0.3983, + "epoch": 0.49, + "grad_norm": 0.8433590416512513, + "learning_rate": 1.0694299251721708e-05, + "loss": 0.3122, "step": 10744 }, { - "epoch": 0.62, - "grad_norm": 0.3191253890892391, - "learning_rate": 6.746058487672253e-06, - "loss": 0.1622, + "epoch": 0.49, + "grad_norm": 1.0117519060213622, + "learning_rate": 1.0692814915416186e-05, + "loss": 0.5295, "step": 10745 }, { - "epoch": 0.62, - "grad_norm": 0.2832733787647948, - "learning_rate": 6.744298905130593e-06, - "loss": 0.2955, + "epoch": 0.49, + "grad_norm": 0.2639455185458164, + "learning_rate": 1.0691330563772408e-05, + "loss": 0.2007, "step": 10746 }, { - "epoch": 0.62, - "grad_norm": 1.2000591687611528, - "learning_rate": 6.74253943533372e-06, - "loss": 0.7825, + "epoch": 0.49, + "grad_norm": 0.2903387475603825, + "learning_rate": 1.0689846196823241e-05, + "loss": 0.2478, "step": 10747 }, { - "epoch": 0.62, - "grad_norm": 0.5827279940418081, - "learning_rate": 6.740780078342568e-06, - "loss": 0.304, + "epoch": 0.49, + "grad_norm": 1.1399263651724851, + "learning_rate": 1.0688361814601542e-05, + "loss": 0.4923, "step": 10748 }, { - "epoch": 0.62, - "grad_norm": 0.2421944177584202, - "learning_rate": 6.7390208342180595e-06, - "loss": 0.1781, + "epoch": 0.49, + "grad_norm": 0.3278533066596291, + "learning_rate": 1.0686877417140175e-05, + "loss": 0.2559, "step": 10749 }, { - "epoch": 0.62, - "grad_norm": 0.3432118231078927, - "learning_rate": 6.737261703021123e-06, - "loss": 0.3119, + "epoch": 0.49, + "grad_norm": 1.102989476769933, + "learning_rate": 1.0685393004472009e-05, + "loss": 0.4062, "step": 10750 }, { - "epoch": 0.62, - "grad_norm": 0.6051458957393929, - "learning_rate": 6.735502684812669e-06, - "loss": 0.371, + "epoch": 0.49, + "grad_norm": 0.34859304766190213, + "learning_rate": 1.06839085766299e-05, + "loss": 0.2456, "step": 10751 }, { - "epoch": 0.62, - "grad_norm": 0.4037052100199354, - "learning_rate": 6.73374377965362e-06, - "loss": 0.2492, + "epoch": 0.49, + "grad_norm": 0.3494895642775547, + "learning_rate": 1.0682424133646712e-05, + "loss": 0.2561, "step": 10752 }, { - "epoch": 0.62, - "grad_norm": 0.9394090165245456, - "learning_rate": 6.731984987604882e-06, - "loss": 0.5348, + "epoch": 0.49, + "grad_norm": 0.3253290798617155, + "learning_rate": 1.0680939675555313e-05, + "loss": 0.1885, "step": 10753 }, { - "epoch": 0.62, - "grad_norm": 0.29799092639937913, - "learning_rate": 6.730226308727363e-06, - "loss": 0.2664, + "epoch": 0.49, + "grad_norm": 0.38840125192380504, + "learning_rate": 1.0679455202388565e-05, + "loss": 0.2654, "step": 10754 }, { - "epoch": 0.62, - "grad_norm": 0.44911316464676126, - "learning_rate": 6.728467743081968e-06, - "loss": 0.2381, + "epoch": 0.49, + "grad_norm": 0.3343844024741166, + "learning_rate": 1.0677970714179332e-05, + "loss": 0.2459, "step": 10755 }, { - "epoch": 0.62, - "grad_norm": 0.3233394168263766, - "learning_rate": 6.726709290729592e-06, - "loss": 0.2382, + "epoch": 0.49, + "grad_norm": 0.8075885400467577, + "learning_rate": 1.0676486210960486e-05, + "loss": 0.4087, "step": 10756 }, { - "epoch": 0.62, - "grad_norm": 0.5916566364028347, - "learning_rate": 6.724950951731135e-06, - "loss": 0.3323, + "epoch": 0.49, + "grad_norm": 0.44633978789498896, + "learning_rate": 1.0675001692764886e-05, + "loss": 0.2099, "step": 10757 }, { - "epoch": 0.62, - "grad_norm": 0.2714572194543101, - "learning_rate": 6.723192726147482e-06, - "loss": 0.2394, + "epoch": 0.49, + "grad_norm": 0.4194208391871709, + "learning_rate": 1.0673517159625395e-05, + "loss": 0.2483, "step": 10758 }, { - "epoch": 0.62, - "grad_norm": 1.0438216384131889, - "learning_rate": 6.721434614039528e-06, - "loss": 0.6214, + "epoch": 0.49, + "grad_norm": 0.3027930547010427, + "learning_rate": 1.0672032611574887e-05, + "loss": 0.2464, "step": 10759 }, { - "epoch": 0.62, - "grad_norm": 0.8915962859834544, - "learning_rate": 6.719676615468149e-06, - "loss": 0.4534, + "epoch": 0.49, + "grad_norm": 0.9573748988839156, + "learning_rate": 1.0670548048646224e-05, + "loss": 0.5016, "step": 10760 }, { - "epoch": 0.62, - "grad_norm": 0.3335692130553307, - "learning_rate": 6.717918730494231e-06, - "loss": 0.2893, + "epoch": 0.49, + "grad_norm": 0.3495914415566046, + "learning_rate": 1.0669063470872271e-05, + "loss": 0.1965, "step": 10761 }, { - "epoch": 0.62, - "grad_norm": 0.23904715452470016, - "learning_rate": 6.716160959178644e-06, - "loss": 0.2044, + "epoch": 0.49, + "grad_norm": 0.4180381958315438, + "learning_rate": 1.06675788782859e-05, + "loss": 0.3614, "step": 10762 }, { - "epoch": 0.62, - "grad_norm": 0.77472616741585, - "learning_rate": 6.714403301582263e-06, - "loss": 0.4212, + "epoch": 0.49, + "grad_norm": 0.5375715089934817, + "learning_rate": 1.0666094270919978e-05, + "loss": 0.3693, "step": 10763 }, { - "epoch": 0.62, - "grad_norm": 0.3341935232637106, - "learning_rate": 6.712645757765952e-06, - "loss": 0.272, + "epoch": 0.49, + "grad_norm": 0.32621506596607897, + "learning_rate": 1.0664609648807369e-05, + "loss": 0.207, "step": 10764 }, { - "epoch": 0.62, - "grad_norm": 0.8951143831388909, - "learning_rate": 6.710888327790581e-06, - "loss": 0.3213, + "epoch": 0.49, + "grad_norm": 0.2927650635349133, + "learning_rate": 1.0663125011980942e-05, + "loss": 0.2038, "step": 10765 }, { - "epoch": 0.62, - "grad_norm": 0.29522917110829133, - "learning_rate": 6.709131011717005e-06, - "loss": 0.2728, + "epoch": 0.49, + "grad_norm": 0.4948211262709542, + "learning_rate": 1.0661640360473566e-05, + "loss": 0.3648, "step": 10766 }, { - "epoch": 0.62, - "grad_norm": 0.3995050553221213, - "learning_rate": 6.707373809606077e-06, - "loss": 0.3016, + "epoch": 0.49, + "grad_norm": 0.34376765299597767, + "learning_rate": 1.0660155694318108e-05, + "loss": 0.2354, "step": 10767 }, { - "epoch": 0.62, - "grad_norm": 0.33594474411352, - "learning_rate": 6.705616721518655e-06, - "loss": 0.1888, + "epoch": 0.49, + "grad_norm": 0.9546727424251255, + "learning_rate": 1.0658671013547446e-05, + "loss": 0.4507, "step": 10768 }, { - "epoch": 0.62, - "grad_norm": 0.2893697612282927, - "learning_rate": 6.703859747515584e-06, - "loss": 0.1952, + "epoch": 0.49, + "grad_norm": 1.3046482047476735, + "learning_rate": 1.0657186318194437e-05, + "loss": 0.653, "step": 10769 }, { - "epoch": 0.62, - "grad_norm": 0.27035597075734846, - "learning_rate": 6.702102887657709e-06, - "loss": 0.2757, + "epoch": 0.49, + "grad_norm": 0.3446701950919525, + "learning_rate": 1.0655701608291959e-05, + "loss": 0.2679, "step": 10770 }, { - "epoch": 0.62, - "grad_norm": 0.8535729454967663, - "learning_rate": 6.7003461420058715e-06, - "loss": 0.3506, + "epoch": 0.49, + "grad_norm": 0.3231523452460592, + "learning_rate": 1.0654216883872876e-05, + "loss": 0.2017, "step": 10771 }, { - "epoch": 0.62, - "grad_norm": 0.5549308852351893, - "learning_rate": 6.6985895106209005e-06, - "loss": 0.3496, + "epoch": 0.49, + "grad_norm": 0.44000324995062373, + "learning_rate": 1.0652732144970066e-05, + "loss": 0.3288, "step": 10772 }, { - "epoch": 0.62, - "grad_norm": 0.36265593609148106, - "learning_rate": 6.696832993563636e-06, - "loss": 0.2992, + "epoch": 0.49, + "grad_norm": 0.34299125657546825, + "learning_rate": 1.0651247391616395e-05, + "loss": 0.2731, "step": 10773 }, { - "epoch": 0.62, - "grad_norm": 0.3523666206537711, - "learning_rate": 6.695076590894899e-06, - "loss": 0.2899, + "epoch": 0.49, + "grad_norm": 0.9851606272736743, + "learning_rate": 1.0649762623844733e-05, + "loss": 0.3819, "step": 10774 }, { - "epoch": 0.62, - "grad_norm": 0.2912704248423908, - "learning_rate": 6.693320302675521e-06, - "loss": 0.1405, + "epoch": 0.49, + "grad_norm": 0.37632614646509693, + "learning_rate": 1.0648277841687956e-05, + "loss": 0.2959, "step": 10775 }, { - "epoch": 0.62, - "grad_norm": 0.3451346608345089, - "learning_rate": 6.6915641289663154e-06, - "loss": 0.2796, + "epoch": 0.5, + "grad_norm": 0.6681409765548855, + "learning_rate": 1.0646793045178925e-05, + "loss": 0.4481, "step": 10776 }, { - "epoch": 0.62, - "grad_norm": 0.3192817030035515, - "learning_rate": 6.689808069828105e-06, - "loss": 0.2486, + "epoch": 0.5, + "grad_norm": 0.21520073592461034, + "learning_rate": 1.0645308234350525e-05, + "loss": 0.1538, "step": 10777 }, { - "epoch": 0.62, - "grad_norm": 0.38933740283548857, - "learning_rate": 6.688052125321698e-06, - "loss": 0.2439, + "epoch": 0.5, + "grad_norm": 0.4099511083646181, + "learning_rate": 1.064382340923562e-05, + "loss": 0.3353, "step": 10778 }, { - "epoch": 0.62, - "grad_norm": 0.3694365237432283, - "learning_rate": 6.686296295507903e-06, - "loss": 0.2851, + "epoch": 0.5, + "grad_norm": 0.6083651559097929, + "learning_rate": 1.0642338569867086e-05, + "loss": 0.3554, "step": 10779 }, { - "epoch": 0.62, - "grad_norm": 0.4782734090964818, - "learning_rate": 6.684540580447525e-06, - "loss": 0.2538, + "epoch": 0.5, + "grad_norm": 0.34487059716258284, + "learning_rate": 1.0640853716277797e-05, + "loss": 0.2628, "step": 10780 }, { - "epoch": 0.62, - "grad_norm": 0.4149927816314323, - "learning_rate": 6.682784980201363e-06, - "loss": 0.2624, + "epoch": 0.5, + "grad_norm": 1.0124394049726113, + "learning_rate": 1.0639368848500624e-05, + "loss": 0.5962, "step": 10781 }, { - "epoch": 0.62, - "grad_norm": 0.2933724186560608, - "learning_rate": 6.6810294948302165e-06, - "loss": 0.2648, + "epoch": 0.5, + "grad_norm": 0.4781928756058893, + "learning_rate": 1.0637883966568438e-05, + "loss": 0.325, "step": 10782 }, { - "epoch": 0.62, - "grad_norm": 0.46882515485188336, - "learning_rate": 6.679274124394874e-06, - "loss": 0.2259, + "epoch": 0.5, + "grad_norm": 0.26572435862100374, + "learning_rate": 1.0636399070514118e-05, + "loss": 0.2297, "step": 10783 }, { - "epoch": 0.62, - "grad_norm": 0.5844321532824087, - "learning_rate": 6.677518868956128e-06, - "loss": 0.2892, + "epoch": 0.5, + "grad_norm": 0.548778630875012, + "learning_rate": 1.0634914160370536e-05, + "loss": 0.328, "step": 10784 }, { - "epoch": 0.62, - "grad_norm": 0.29395991528266874, - "learning_rate": 6.675763728574758e-06, - "loss": 0.2712, + "epoch": 0.5, + "grad_norm": 0.41978860626118325, + "learning_rate": 1.0633429236170565e-05, + "loss": 0.3166, "step": 10785 }, { - "epoch": 0.62, - "grad_norm": 0.47841665979336606, - "learning_rate": 6.67400870331155e-06, - "loss": 0.3708, + "epoch": 0.5, + "grad_norm": 0.2954584757614555, + "learning_rate": 1.0631944297947083e-05, + "loss": 0.2495, "step": 10786 }, { - "epoch": 0.62, - "grad_norm": 0.7244194775477389, - "learning_rate": 6.672253793227273e-06, - "loss": 0.4866, + "epoch": 0.5, + "grad_norm": 0.4203474053476572, + "learning_rate": 1.0630459345732964e-05, + "loss": 0.2513, "step": 10787 }, { - "epoch": 0.62, - "grad_norm": 0.3039119401991573, - "learning_rate": 6.670498998382708e-06, - "loss": 0.2247, + "epoch": 0.5, + "grad_norm": 0.4038093560777572, + "learning_rate": 1.062897437956108e-05, + "loss": 0.2849, "step": 10788 }, { - "epoch": 0.62, - "grad_norm": 0.35386920706771574, - "learning_rate": 6.668744318838618e-06, - "loss": 0.2584, + "epoch": 0.5, + "grad_norm": 0.32968406490076513, + "learning_rate": 1.062748939946431e-05, + "loss": 0.2053, "step": 10789 }, { - "epoch": 0.62, - "grad_norm": 0.31571310808178393, - "learning_rate": 6.66698975465577e-06, - "loss": 0.2504, + "epoch": 0.5, + "grad_norm": 0.3226771139419802, + "learning_rate": 1.0626004405475531e-05, + "loss": 0.2925, "step": 10790 }, { - "epoch": 0.62, - "grad_norm": 0.3214123531686496, - "learning_rate": 6.665235305894925e-06, - "loss": 0.2037, + "epoch": 0.5, + "grad_norm": 0.3508119249826307, + "learning_rate": 1.062451939762762e-05, + "loss": 0.2604, "step": 10791 }, { - "epoch": 0.62, - "grad_norm": 0.902925238719976, - "learning_rate": 6.663480972616835e-06, - "loss": 0.4962, + "epoch": 0.5, + "grad_norm": 1.2693555560306609, + "learning_rate": 1.0623034375953447e-05, + "loss": 0.7882, "step": 10792 }, { - "epoch": 0.62, - "grad_norm": 0.3389188225905232, - "learning_rate": 6.661726754882256e-06, - "loss": 0.2932, + "epoch": 0.5, + "grad_norm": 0.32363961872713526, + "learning_rate": 1.0621549340485895e-05, + "loss": 0.1308, "step": 10793 }, { - "epoch": 0.62, - "grad_norm": 0.3137494677922765, - "learning_rate": 6.659972652751936e-06, - "loss": 0.2132, + "epoch": 0.5, + "grad_norm": 0.43152299050792026, + "learning_rate": 1.0620064291257839e-05, + "loss": 0.2763, "step": 10794 }, { - "epoch": 0.62, - "grad_norm": 0.39839423898137716, - "learning_rate": 6.658218666286621e-06, - "loss": 0.3217, + "epoch": 0.5, + "grad_norm": 0.357500513025324, + "learning_rate": 1.0618579228302157e-05, + "loss": 0.3092, "step": 10795 }, { - "epoch": 0.62, - "grad_norm": 0.40810260556052064, - "learning_rate": 6.656464795547048e-06, - "loss": 0.2755, + "epoch": 0.5, + "grad_norm": 0.6891916062377235, + "learning_rate": 1.0617094151651728e-05, + "loss": 0.4516, "step": 10796 }, { - "epoch": 0.62, - "grad_norm": 0.34635085809256516, - "learning_rate": 6.654711040593957e-06, - "loss": 0.2657, + "epoch": 0.5, + "grad_norm": 0.3243649533562527, + "learning_rate": 1.0615609061339431e-05, + "loss": 0.158, "step": 10797 }, { - "epoch": 0.62, - "grad_norm": 0.3303025437976195, - "learning_rate": 6.652957401488076e-06, - "loss": 0.2489, + "epoch": 0.5, + "grad_norm": 0.27491934792676875, + "learning_rate": 1.0614123957398142e-05, + "loss": 0.242, "step": 10798 }, { - "epoch": 0.62, - "grad_norm": 1.1771107826528375, - "learning_rate": 6.651203878290139e-06, - "loss": 0.7729, + "epoch": 0.5, + "grad_norm": 0.34439776213334206, + "learning_rate": 1.0612638839860736e-05, + "loss": 0.2707, "step": 10799 }, { - "epoch": 0.62, - "grad_norm": 0.3086966114240212, - "learning_rate": 6.649450471060865e-06, - "loss": 0.245, + "epoch": 0.5, + "grad_norm": 0.4960974529821682, + "learning_rate": 1.06111537087601e-05, + "loss": 0.1878, "step": 10800 }, { - "epoch": 0.62, - "grad_norm": 0.42800613630194834, - "learning_rate": 6.64769717986098e-06, - "loss": 0.2697, + "epoch": 0.5, + "grad_norm": 0.46768195754492564, + "learning_rate": 1.060966856412911e-05, + "loss": 0.3298, "step": 10801 }, { - "epoch": 0.62, - "grad_norm": 0.26776956632235843, - "learning_rate": 6.6459440047511955e-06, - "loss": 0.2168, + "epoch": 0.5, + "grad_norm": 0.5408954127233903, + "learning_rate": 1.0608183406000645e-05, + "loss": 0.377, "step": 10802 }, { - "epoch": 0.62, - "grad_norm": 0.3370355453206775, - "learning_rate": 6.6441909457922286e-06, - "loss": 0.2665, + "epoch": 0.5, + "grad_norm": 0.32753278935486535, + "learning_rate": 1.0606698234407587e-05, + "loss": 0.23, "step": 10803 }, { - "epoch": 0.62, - "grad_norm": 1.0228746361906365, - "learning_rate": 6.642438003044781e-06, - "loss": 0.3131, + "epoch": 0.5, + "grad_norm": 0.34149329887450186, + "learning_rate": 1.0605213049382814e-05, + "loss": 0.2719, "step": 10804 }, { - "epoch": 0.62, - "grad_norm": 0.35770512175069535, - "learning_rate": 6.640685176569568e-06, - "loss": 0.324, + "epoch": 0.5, + "grad_norm": 0.4439961312816496, + "learning_rate": 1.060372785095921e-05, + "loss": 0.236, "step": 10805 }, { - "epoch": 0.62, - "grad_norm": 0.31252442730763696, - "learning_rate": 6.638932466427277e-06, - "loss": 0.248, + "epoch": 0.5, + "grad_norm": 0.25953949040876356, + "learning_rate": 1.0602242639169649e-05, + "loss": 0.2235, "step": 10806 }, { - "epoch": 0.62, - "grad_norm": 0.7175209362515863, - "learning_rate": 6.637179872678612e-06, - "loss": 0.3296, + "epoch": 0.5, + "grad_norm": 0.5259770183749032, + "learning_rate": 1.0600757414047019e-05, + "loss": 0.3604, "step": 10807 }, { - "epoch": 0.62, - "grad_norm": 0.23272726866411692, - "learning_rate": 6.635427395384262e-06, - "loss": 0.1369, + "epoch": 0.5, + "grad_norm": 0.6469408790526499, + "learning_rate": 1.0599272175624193e-05, + "loss": 0.4549, "step": 10808 }, { - "epoch": 0.62, - "grad_norm": 0.4007091085654962, - "learning_rate": 6.633675034604918e-06, - "loss": 0.2966, + "epoch": 0.5, + "grad_norm": 0.6578517121909913, + "learning_rate": 1.0597786923934066e-05, + "loss": 0.3936, "step": 10809 }, { - "epoch": 0.62, - "grad_norm": 0.348395608259044, - "learning_rate": 6.6319227904012605e-06, - "loss": 0.2952, + "epoch": 0.5, + "grad_norm": 0.3253176222137389, + "learning_rate": 1.0596301659009512e-05, + "loss": 0.2317, "step": 10810 }, { - "epoch": 0.62, - "grad_norm": 0.7075777072812005, - "learning_rate": 6.630170662833974e-06, - "loss": 0.2966, + "epoch": 0.5, + "grad_norm": 0.24421515247892855, + "learning_rate": 1.059481638088341e-05, + "loss": 0.2018, "step": 10811 }, { - "epoch": 0.62, - "grad_norm": 0.359803224019601, - "learning_rate": 6.62841865196373e-06, - "loss": 0.2858, + "epoch": 0.5, + "grad_norm": 0.8783169941153536, + "learning_rate": 1.0593331089588648e-05, + "loss": 0.3311, "step": 10812 }, { - "epoch": 0.62, - "grad_norm": 0.3924621489778972, - "learning_rate": 6.626666757851208e-06, - "loss": 0.3285, + "epoch": 0.5, + "grad_norm": 0.3863254161674765, + "learning_rate": 1.0591845785158108e-05, + "loss": 0.2653, "step": 10813 }, { - "epoch": 0.62, - "grad_norm": 0.3379307119515423, - "learning_rate": 6.624914980557067e-06, - "loss": 0.1459, + "epoch": 0.5, + "grad_norm": 0.3553612754972421, + "learning_rate": 1.0590360467624673e-05, + "loss": 0.3151, "step": 10814 }, { - "epoch": 0.62, - "grad_norm": 0.25824750797348944, - "learning_rate": 6.623163320141977e-06, - "loss": 0.2055, + "epoch": 0.5, + "grad_norm": 0.8799962388047933, + "learning_rate": 1.0588875137021228e-05, + "loss": 0.5138, "step": 10815 }, { - "epoch": 0.62, - "grad_norm": 0.9254204327897717, - "learning_rate": 6.621411776666593e-06, - "loss": 0.5132, + "epoch": 0.5, + "grad_norm": 0.3688801725877234, + "learning_rate": 1.0587389793380652e-05, + "loss": 0.2301, "step": 10816 }, { - "epoch": 0.62, - "grad_norm": 0.3290210750926486, - "learning_rate": 6.619660350191577e-06, - "loss": 0.2528, + "epoch": 0.5, + "grad_norm": 0.4527972515046298, + "learning_rate": 1.0585904436735835e-05, + "loss": 0.2149, "step": 10817 }, { - "epoch": 0.62, - "grad_norm": 0.3258960929973952, - "learning_rate": 6.617909040777578e-06, - "loss": 0.2723, + "epoch": 0.5, + "grad_norm": 0.35133140799945833, + "learning_rate": 1.0584419067119655e-05, + "loss": 0.3043, "step": 10818 }, { - "epoch": 0.62, - "grad_norm": 0.6898961033010748, - "learning_rate": 6.6161578484852405e-06, - "loss": 0.4741, + "epoch": 0.5, + "grad_norm": 0.3286325043499027, + "learning_rate": 1.0582933684565003e-05, + "loss": 0.2188, "step": 10819 }, { - "epoch": 0.62, - "grad_norm": 0.1955724790230162, - "learning_rate": 6.614406773375215e-06, - "loss": 0.0904, + "epoch": 0.5, + "grad_norm": 1.4208958306656765, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.7471, "step": 10820 }, { - "epoch": 0.62, - "grad_norm": 0.31062886283654306, - "learning_rate": 6.612655815508135e-06, - "loss": 0.2714, + "epoch": 0.5, + "grad_norm": 0.7718131004498932, + "learning_rate": 1.0579962880771813e-05, + "loss": 0.5496, "step": 10821 }, { - "epoch": 0.62, - "grad_norm": 0.5003777175917457, - "learning_rate": 6.610904974944638e-06, - "loss": 0.3493, + "epoch": 0.5, + "grad_norm": 0.2834451110466474, + "learning_rate": 1.0578477459599046e-05, + "loss": 0.2791, "step": 10822 }, { - "epoch": 0.62, - "grad_norm": 0.6498721179269221, - "learning_rate": 6.609154251745356e-06, - "loss": 0.4102, + "epoch": 0.5, + "grad_norm": 0.27917093969019297, + "learning_rate": 1.0576992025619344e-05, + "loss": 0.1396, "step": 10823 }, { - "epoch": 0.62, - "grad_norm": 0.31094275540668453, - "learning_rate": 6.607403645970919e-06, - "loss": 0.2138, + "epoch": 0.5, + "grad_norm": 0.500750297579144, + "learning_rate": 1.0575506578865598e-05, + "loss": 0.3335, "step": 10824 }, { - "epoch": 0.62, - "grad_norm": 0.35222845924002294, - "learning_rate": 6.605653157681945e-06, - "loss": 0.308, + "epoch": 0.5, + "grad_norm": 0.40930629160329773, + "learning_rate": 1.0574021119370688e-05, + "loss": 0.3341, "step": 10825 }, { - "epoch": 0.62, - "grad_norm": 0.5014401149672492, - "learning_rate": 6.603902786939058e-06, - "loss": 0.3462, + "epoch": 0.5, + "grad_norm": 0.39987935088565585, + "learning_rate": 1.0572535647167505e-05, + "loss": 0.2633, "step": 10826 }, { - "epoch": 0.62, - "grad_norm": 0.23552220497581033, - "learning_rate": 6.60215253380287e-06, - "loss": 0.1319, + "epoch": 0.5, + "grad_norm": 0.6692530253469197, + "learning_rate": 1.0571050162288935e-05, + "loss": 0.3999, "step": 10827 }, { - "epoch": 0.62, - "grad_norm": 0.7787992405401837, - "learning_rate": 6.600402398333995e-06, - "loss": 0.3407, + "epoch": 0.5, + "grad_norm": 0.4147580587617464, + "learning_rate": 1.0569564664767867e-05, + "loss": 0.3112, "step": 10828 }, { - "epoch": 0.62, - "grad_norm": 0.36508668218733975, - "learning_rate": 6.598652380593037e-06, - "loss": 0.3031, + "epoch": 0.5, + "grad_norm": 0.5812865950283781, + "learning_rate": 1.0568079154637181e-05, + "loss": 0.3418, "step": 10829 }, { - "epoch": 0.62, - "grad_norm": 0.3085726407226889, - "learning_rate": 6.596902480640603e-06, - "loss": 0.1881, + "epoch": 0.5, + "grad_norm": 0.28204608831286854, + "learning_rate": 1.0566593631929773e-05, + "loss": 0.2554, "step": 10830 }, { - "epoch": 0.62, - "grad_norm": 0.7405743438567102, - "learning_rate": 6.595152698537289e-06, - "loss": 0.5434, + "epoch": 0.5, + "grad_norm": 0.29964942665373784, + "learning_rate": 1.0565108096678526e-05, + "loss": 0.2211, "step": 10831 }, { - "epoch": 0.62, - "grad_norm": 1.4452690524635368, - "learning_rate": 6.59340303434369e-06, - "loss": 0.7422, + "epoch": 0.5, + "grad_norm": 0.6948239314783347, + "learning_rate": 1.0563622548916332e-05, + "loss": 0.4141, "step": 10832 }, { - "epoch": 0.62, - "grad_norm": 0.21649306649578795, - "learning_rate": 6.591653488120398e-06, - "loss": 0.1843, + "epoch": 0.5, + "grad_norm": 0.9506403556827606, + "learning_rate": 1.0562136988676079e-05, + "loss": 0.5632, "step": 10833 }, { - "epoch": 0.62, - "grad_norm": 0.34346378810527417, - "learning_rate": 6.589904059927998e-06, - "loss": 0.2565, + "epoch": 0.5, + "grad_norm": 0.291525342840716, + "learning_rate": 1.0560651415990655e-05, + "loss": 0.2592, "step": 10834 }, { - "epoch": 0.62, - "grad_norm": 0.692637088445479, - "learning_rate": 6.588154749827076e-06, - "loss": 0.4296, + "epoch": 0.5, + "grad_norm": 0.5936558736230145, + "learning_rate": 1.055916583089295e-05, + "loss": 0.4145, "step": 10835 }, { - "epoch": 0.62, - "grad_norm": 0.41281261424206633, - "learning_rate": 6.586405557878206e-06, - "loss": 0.287, + "epoch": 0.5, + "grad_norm": 0.24436399187483046, + "learning_rate": 1.0557680233415849e-05, + "loss": 0.0717, "step": 10836 }, { - "epoch": 0.62, - "grad_norm": 0.32134511517364006, - "learning_rate": 6.584656484141967e-06, - "loss": 0.2603, + "epoch": 0.5, + "grad_norm": 0.29807326992599154, + "learning_rate": 1.0556194623592247e-05, + "loss": 0.2318, "step": 10837 }, { - "epoch": 0.62, - "grad_norm": 0.80282010542682, - "learning_rate": 6.582907528678928e-06, - "loss": 0.4662, + "epoch": 0.5, + "grad_norm": 0.39456333565601415, + "learning_rate": 1.0554709001455032e-05, + "loss": 0.3132, "step": 10838 }, { - "epoch": 0.62, - "grad_norm": 0.33148783656373965, - "learning_rate": 6.5811586915496515e-06, - "loss": 0.2478, + "epoch": 0.5, + "grad_norm": 0.485877781353574, + "learning_rate": 1.0553223367037095e-05, + "loss": 0.3125, "step": 10839 }, { - "epoch": 0.62, - "grad_norm": 0.37101099026547674, - "learning_rate": 6.579409972814703e-06, - "loss": 0.1527, + "epoch": 0.5, + "grad_norm": 0.34917632521900976, + "learning_rate": 1.0551737720371322e-05, + "loss": 0.2736, "step": 10840 }, { - "epoch": 0.62, - "grad_norm": 0.3358065314278554, - "learning_rate": 6.577661372534639e-06, - "loss": 0.3153, + "epoch": 0.5, + "grad_norm": 0.7141503684796044, + "learning_rate": 1.0550252061490614e-05, + "loss": 0.4242, "step": 10841 }, { - "epoch": 0.62, - "grad_norm": 0.3190398698999832, - "learning_rate": 6.575912890770017e-06, - "loss": 0.2436, + "epoch": 0.5, + "grad_norm": 0.22358704826378117, + "learning_rate": 1.054876639042785e-05, + "loss": 0.1824, "step": 10842 }, { - "epoch": 0.62, - "grad_norm": 0.8837230043317458, - "learning_rate": 6.574164527581383e-06, - "loss": 0.4759, + "epoch": 0.5, + "grad_norm": 0.3978079102492898, + "learning_rate": 1.054728070721593e-05, + "loss": 0.2626, "step": 10843 }, { - "epoch": 0.62, - "grad_norm": 0.47649999430816803, - "learning_rate": 6.5724162830292835e-06, - "loss": 0.3134, + "epoch": 0.5, + "grad_norm": 0.3848898049144023, + "learning_rate": 1.0545795011887743e-05, + "loss": 0.2506, "step": 10844 }, { - "epoch": 0.62, - "grad_norm": 0.3922580154206632, - "learning_rate": 6.570668157174263e-06, - "loss": 0.2656, + "epoch": 0.5, + "grad_norm": 0.37338207222799513, + "learning_rate": 1.0544309304476179e-05, + "loss": 0.2435, "step": 10845 }, { - "epoch": 0.62, - "grad_norm": 0.23499303324710186, - "learning_rate": 6.568920150076854e-06, - "loss": 0.1829, + "epoch": 0.5, + "grad_norm": 0.38498981449243597, + "learning_rate": 1.054282358501413e-05, + "loss": 0.2844, "step": 10846 }, { - "epoch": 0.62, - "grad_norm": 0.6347531042335608, - "learning_rate": 6.567172261797594e-06, - "loss": 0.417, + "epoch": 0.5, + "grad_norm": 0.9745387287820225, + "learning_rate": 1.0541337853534492e-05, + "loss": 0.4159, "step": 10847 }, { - "epoch": 0.62, - "grad_norm": 0.39361758639994104, - "learning_rate": 6.5654244923970105e-06, - "loss": 0.2663, + "epoch": 0.5, + "grad_norm": 0.4631785386828675, + "learning_rate": 1.0539852110070155e-05, + "loss": 0.2186, "step": 10848 }, { - "epoch": 0.62, - "grad_norm": 0.3087842118794902, - "learning_rate": 6.56367684193563e-06, - "loss": 0.2918, + "epoch": 0.5, + "grad_norm": 0.25409007134739114, + "learning_rate": 1.0538366354654016e-05, + "loss": 0.1699, "step": 10849 }, { - "epoch": 0.62, - "grad_norm": 1.0242043905970053, - "learning_rate": 6.561929310473971e-06, - "loss": 0.2512, + "epoch": 0.5, + "grad_norm": 0.4141517312730471, + "learning_rate": 1.053688058731896e-05, + "loss": 0.3243, "step": 10850 }, { - "epoch": 0.62, - "grad_norm": 0.4242460543027238, - "learning_rate": 6.560181898072554e-06, - "loss": 0.3042, + "epoch": 0.5, + "grad_norm": 1.0049346176090797, + "learning_rate": 1.053539480809789e-05, + "loss": 0.5016, "step": 10851 }, { - "epoch": 0.62, - "grad_norm": 0.5240664744635831, - "learning_rate": 6.558434604791888e-06, - "loss": 0.3468, + "epoch": 0.5, + "grad_norm": 0.411183784603024, + "learning_rate": 1.053390901702369e-05, + "loss": 0.2373, "step": 10852 }, { - "epoch": 0.62, - "grad_norm": 0.22341814224458637, - "learning_rate": 6.556687430692486e-06, - "loss": 0.1698, + "epoch": 0.5, + "grad_norm": 0.47400615948304, + "learning_rate": 1.0532423214129262e-05, + "loss": 0.3505, "step": 10853 }, { - "epoch": 0.62, - "grad_norm": 0.39096154916868614, - "learning_rate": 6.5549403758348485e-06, - "loss": 0.2807, + "epoch": 0.5, + "grad_norm": 0.5713871604370441, + "learning_rate": 1.0530937399447496e-05, + "loss": 0.3382, "step": 10854 }, { - "epoch": 0.62, - "grad_norm": 0.5309980404520158, - "learning_rate": 6.553193440279479e-06, - "loss": 0.3881, + "epoch": 0.5, + "grad_norm": 0.19563225960294914, + "learning_rate": 1.0529451573011286e-05, + "loss": 0.1409, "step": 10855 }, { - "epoch": 0.62, - "grad_norm": 0.4725538900448848, - "learning_rate": 6.551446624086873e-06, - "loss": 0.2522, + "epoch": 0.5, + "grad_norm": 1.3547123747177319, + "learning_rate": 1.0527965734853536e-05, + "loss": 0.7743, "step": 10856 }, { - "epoch": 0.62, - "grad_norm": 0.3080050015821684, - "learning_rate": 6.549699927317519e-06, - "loss": 0.2818, + "epoch": 0.5, + "grad_norm": 0.6556285973460387, + "learning_rate": 1.0526479885007126e-05, + "loss": 0.365, "step": 10857 }, { - "epoch": 0.62, - "grad_norm": 0.5715657627569951, - "learning_rate": 6.5479533500319105e-06, - "loss": 0.35, + "epoch": 0.5, + "grad_norm": 0.3428099069581227, + "learning_rate": 1.0524994023504961e-05, + "loss": 0.2383, "step": 10858 }, { - "epoch": 0.62, - "grad_norm": 0.2846118138159484, - "learning_rate": 6.546206892290527e-06, - "loss": 0.1529, + "epoch": 0.5, + "grad_norm": 0.9898684310762809, + "learning_rate": 1.0523508150379933e-05, + "loss": 0.5155, "step": 10859 }, { - "epoch": 0.62, - "grad_norm": 0.3320757102260107, - "learning_rate": 6.544460554153853e-06, - "loss": 0.2813, + "epoch": 0.5, + "grad_norm": 0.435803461121866, + "learning_rate": 1.052202226566494e-05, + "loss": 0.2688, "step": 10860 }, { - "epoch": 0.62, - "grad_norm": 0.36196437928371844, - "learning_rate": 6.542714335682359e-06, - "loss": 0.2973, + "epoch": 0.5, + "grad_norm": 0.3429406156614093, + "learning_rate": 1.052053636939288e-05, + "loss": 0.2706, "step": 10861 }, { - "epoch": 0.62, - "grad_norm": 1.141389722693649, - "learning_rate": 6.54096823693652e-06, - "loss": 0.4734, + "epoch": 0.5, + "grad_norm": 0.30070908553798237, + "learning_rate": 1.0519050461596643e-05, + "loss": 0.2092, "step": 10862 }, { - "epoch": 0.62, - "grad_norm": 0.6271265112294695, - "learning_rate": 6.5392222579768015e-06, - "loss": 0.2178, + "epoch": 0.5, + "grad_norm": 0.8618793313346347, + "learning_rate": 1.0517564542309128e-05, + "loss": 0.4173, "step": 10863 }, { - "epoch": 0.62, - "grad_norm": 0.3753408561849779, - "learning_rate": 6.537476398863669e-06, - "loss": 0.313, + "epoch": 0.5, + "grad_norm": 0.4556668778896537, + "learning_rate": 1.0516078611563237e-05, + "loss": 0.2696, "step": 10864 }, { - "epoch": 0.62, - "grad_norm": 0.33153725793470623, - "learning_rate": 6.535730659657577e-06, - "loss": 0.2618, + "epoch": 0.5, + "grad_norm": 0.4067618522811854, + "learning_rate": 1.0514592669391862e-05, + "loss": 0.271, "step": 10865 }, { - "epoch": 0.62, - "grad_norm": 0.32391570737586556, - "learning_rate": 6.533985040418988e-06, - "loss": 0.1546, + "epoch": 0.5, + "grad_norm": 0.5633859474238586, + "learning_rate": 1.0513106715827897e-05, + "loss": 0.3502, "step": 10866 }, { - "epoch": 0.62, - "grad_norm": 0.3128620256105588, - "learning_rate": 6.532239541208343e-06, - "loss": 0.2564, + "epoch": 0.5, + "grad_norm": 0.38844360873415845, + "learning_rate": 1.0511620750904248e-05, + "loss": 0.3071, "step": 10867 }, { - "epoch": 0.62, - "grad_norm": 0.4533892575766774, - "learning_rate": 6.530494162086098e-06, - "loss": 0.3441, + "epoch": 0.5, + "grad_norm": 0.45420182734489783, + "learning_rate": 1.0510134774653808e-05, + "loss": 0.2233, "step": 10868 }, { - "epoch": 0.62, - "grad_norm": 0.28938108521272216, - "learning_rate": 6.5287489031126875e-06, - "loss": 0.2152, + "epoch": 0.5, + "grad_norm": 0.4444677254443105, + "learning_rate": 1.0508648787109477e-05, + "loss": 0.3274, "step": 10869 }, { - "epoch": 0.62, - "grad_norm": 0.46521166367836747, - "learning_rate": 6.527003764348555e-06, - "loss": 0.3301, + "epoch": 0.5, + "grad_norm": 0.31284839931053543, + "learning_rate": 1.0507162788304148e-05, + "loss": 0.2734, "step": 10870 }, { - "epoch": 0.62, - "grad_norm": 1.1228207484521606, - "learning_rate": 6.5252587458541325e-06, - "loss": 0.5712, + "epoch": 0.5, + "grad_norm": 0.335468458938466, + "learning_rate": 1.0505676778270727e-05, + "loss": 0.1723, "step": 10871 }, { - "epoch": 0.62, - "grad_norm": 0.5512502762042788, - "learning_rate": 6.523513847689854e-06, - "loss": 0.3708, + "epoch": 0.5, + "grad_norm": 1.247284135234285, + "learning_rate": 1.0504190757042108e-05, + "loss": 0.5805, "step": 10872 }, { - "epoch": 0.62, - "grad_norm": 0.19166979867318232, - "learning_rate": 6.521769069916136e-06, - "loss": 0.1914, + "epoch": 0.5, + "grad_norm": 0.2811821387029117, + "learning_rate": 1.0502704724651192e-05, + "loss": 0.2165, "step": 10873 }, { - "epoch": 0.62, - "grad_norm": 0.9560087983933109, - "learning_rate": 6.520024412593409e-06, - "loss": 0.4243, + "epoch": 0.5, + "grad_norm": 0.40171205166525964, + "learning_rate": 1.0501218681130878e-05, + "loss": 0.3287, "step": 10874 }, { - "epoch": 0.62, - "grad_norm": 0.5336182223969617, - "learning_rate": 6.518279875782083e-06, - "loss": 0.3538, + "epoch": 0.5, + "grad_norm": 0.762981366002367, + "learning_rate": 1.0499732626514067e-05, + "loss": 0.3396, "step": 10875 }, { - "epoch": 0.62, - "grad_norm": 0.3886374618315691, - "learning_rate": 6.516535459542579e-06, - "loss": 0.2293, + "epoch": 0.5, + "grad_norm": 0.3836054798623004, + "learning_rate": 1.0498246560833653e-05, + "loss": 0.2501, "step": 10876 }, { - "epoch": 0.62, - "grad_norm": 0.40788284876238157, - "learning_rate": 6.514791163935299e-06, - "loss": 0.3177, + "epoch": 0.5, + "grad_norm": 0.4490986178773323, + "learning_rate": 1.0496760484122546e-05, + "loss": 0.2566, "step": 10877 }, { - "epoch": 0.62, - "grad_norm": 0.5755561972390177, - "learning_rate": 6.513046989020653e-06, - "loss": 0.3556, + "epoch": 0.5, + "grad_norm": 0.39095447011221346, + "learning_rate": 1.0495274396413635e-05, + "loss": 0.2534, "step": 10878 }, { - "epoch": 0.63, - "grad_norm": 0.23691729198980177, - "learning_rate": 6.5113029348590384e-06, - "loss": 0.152, + "epoch": 0.5, + "grad_norm": 0.35520958885797627, + "learning_rate": 1.0493788297739831e-05, + "loss": 0.2592, "step": 10879 }, { - "epoch": 0.63, - "grad_norm": 0.4475169294764376, - "learning_rate": 6.509559001510854e-06, - "loss": 0.2962, + "epoch": 0.5, + "grad_norm": 0.8742815399813149, + "learning_rate": 1.0492302188134032e-05, + "loss": 0.4647, "step": 10880 }, { - "epoch": 0.63, - "grad_norm": 0.7213839327976953, - "learning_rate": 6.5078151890364916e-06, - "loss": 0.3129, + "epoch": 0.5, + "grad_norm": 0.34256995935076606, + "learning_rate": 1.049081606762913e-05, + "loss": 0.2764, "step": 10881 }, { - "epoch": 0.63, - "grad_norm": 0.3567362019743413, - "learning_rate": 6.50607149749634e-06, - "loss": 0.2346, + "epoch": 0.5, + "grad_norm": 0.43773143195426606, + "learning_rate": 1.0489329936258037e-05, + "loss": 0.2789, "step": 10882 }, { - "epoch": 0.63, - "grad_norm": 0.8685527444154926, - "learning_rate": 6.504327926950782e-06, - "loss": 0.5479, + "epoch": 0.5, + "grad_norm": 0.27470968302460486, + "learning_rate": 1.048784379405365e-05, + "loss": 0.2099, "step": 10883 }, { - "epoch": 0.63, - "grad_norm": 0.42927802250755426, - "learning_rate": 6.502584477460195e-06, - "loss": 0.3064, + "epoch": 0.5, + "grad_norm": 0.97604513807605, + "learning_rate": 1.0486357641048872e-05, + "loss": 0.2962, "step": 10884 }, { - "epoch": 0.63, - "grad_norm": 0.2919765365381944, - "learning_rate": 6.50084114908496e-06, - "loss": 0.2674, + "epoch": 0.5, + "grad_norm": 0.45804569514200544, + "learning_rate": 1.0484871477276605e-05, + "loss": 0.2632, "step": 10885 }, { - "epoch": 0.63, - "grad_norm": 0.5838456550127608, - "learning_rate": 6.4990979418854436e-06, - "loss": 0.2418, + "epoch": 0.5, + "grad_norm": 0.3311143080285236, + "learning_rate": 1.0483385302769751e-05, + "loss": 0.289, "step": 10886 }, { - "epoch": 0.63, - "grad_norm": 0.33539401157915516, - "learning_rate": 6.497354855922016e-06, - "loss": 0.2074, + "epoch": 0.5, + "grad_norm": 0.8801719538491538, + "learning_rate": 1.048189911756121e-05, + "loss": 0.5088, "step": 10887 }, { - "epoch": 0.63, - "grad_norm": 0.36912521934440096, - "learning_rate": 6.495611891255038e-06, - "loss": 0.2864, + "epoch": 0.5, + "grad_norm": 0.35028560257865166, + "learning_rate": 1.0480412921683889e-05, + "loss": 0.2225, "step": 10888 }, { - "epoch": 0.63, - "grad_norm": 0.332611184205583, - "learning_rate": 6.493869047944872e-06, - "loss": 0.2492, + "epoch": 0.5, + "grad_norm": 0.24964343135958217, + "learning_rate": 1.0478926715170687e-05, + "loss": 0.2261, "step": 10889 }, { - "epoch": 0.63, - "grad_norm": 0.5711716515617774, - "learning_rate": 6.4921263260518664e-06, - "loss": 0.413, + "epoch": 0.5, + "grad_norm": 1.5628801636401644, + "learning_rate": 1.0477440498054512e-05, + "loss": 0.5982, "step": 10890 }, { - "epoch": 0.63, - "grad_norm": 0.34918005553161635, - "learning_rate": 6.490383725636377e-06, - "loss": 0.2892, + "epoch": 0.5, + "grad_norm": 0.3710797622479978, + "learning_rate": 1.0475954270368265e-05, + "loss": 0.208, "step": 10891 }, { - "epoch": 0.63, - "grad_norm": 0.321025479817607, - "learning_rate": 6.488641246758749e-06, - "loss": 0.2723, + "epoch": 0.5, + "grad_norm": 0.8138146731173193, + "learning_rate": 1.0474468032144846e-05, + "loss": 0.385, "step": 10892 }, { - "epoch": 0.63, - "grad_norm": 0.2324885317833231, - "learning_rate": 6.486898889479323e-06, - "loss": 0.1738, + "epoch": 0.5, + "grad_norm": 0.421100019065941, + "learning_rate": 1.0472981783417162e-05, + "loss": 0.303, "step": 10893 }, { - "epoch": 0.63, - "grad_norm": 0.38333854647224463, - "learning_rate": 6.485156653858438e-06, - "loss": 0.3174, + "epoch": 0.5, + "grad_norm": 0.331172523192777, + "learning_rate": 1.0471495524218121e-05, + "loss": 0.2163, "step": 10894 }, { - "epoch": 0.63, - "grad_norm": 1.008010824991491, - "learning_rate": 6.483414539956426e-06, - "loss": 0.3672, + "epoch": 0.5, + "grad_norm": 0.317756227905293, + "learning_rate": 1.0470009254580622e-05, + "loss": 0.1833, "step": 10895 }, { - "epoch": 0.63, - "grad_norm": 0.27193926530077744, - "learning_rate": 6.48167254783362e-06, - "loss": 0.2538, + "epoch": 0.5, + "grad_norm": 0.8821148693157057, + "learning_rate": 1.0468522974537567e-05, + "loss": 0.4935, "step": 10896 }, { - "epoch": 0.63, - "grad_norm": 0.41061131285869107, - "learning_rate": 6.479930677550338e-06, - "loss": 0.3058, + "epoch": 0.5, + "grad_norm": 0.3673361027049786, + "learning_rate": 1.0467036684121869e-05, + "loss": 0.2989, "step": 10897 }, { - "epoch": 0.63, - "grad_norm": 0.6100862999061782, - "learning_rate": 6.478188929166909e-06, - "loss": 0.3987, + "epoch": 0.5, + "grad_norm": 0.3589798578521269, + "learning_rate": 1.046555038336643e-05, + "loss": 0.2821, "step": 10898 }, { - "epoch": 0.63, - "grad_norm": 0.15737671574549414, - "learning_rate": 6.476447302743643e-06, - "loss": 0.0739, + "epoch": 0.5, + "grad_norm": 0.9321374682184632, + "learning_rate": 1.0464064072304152e-05, + "loss": 0.6071, "step": 10899 }, { - "epoch": 0.63, - "grad_norm": 0.3672614455951174, - "learning_rate": 6.474705798340857e-06, - "loss": 0.3108, + "epoch": 0.5, + "grad_norm": 0.6341214168393893, + "learning_rate": 1.0462577750967941e-05, + "loss": 0.3616, "step": 10900 }, { - "epoch": 0.63, - "grad_norm": 0.37121567073539297, - "learning_rate": 6.472964416018857e-06, - "loss": 0.3178, + "epoch": 0.5, + "grad_norm": 0.27638776912020274, + "learning_rate": 1.0461091419390707e-05, + "loss": 0.2102, "step": 10901 }, { - "epoch": 0.63, - "grad_norm": 0.5386380238554062, - "learning_rate": 6.471223155837949e-06, - "loss": 0.2583, + "epoch": 0.5, + "grad_norm": 0.4439847761153678, + "learning_rate": 1.0459605077605353e-05, + "loss": 0.249, "step": 10902 }, { - "epoch": 0.63, - "grad_norm": 0.3702006272772363, - "learning_rate": 6.469482017858428e-06, - "loss": 0.2944, + "epoch": 0.5, + "grad_norm": 0.5958962099716135, + "learning_rate": 1.0458118725644788e-05, + "loss": 0.3686, "step": 10903 }, { - "epoch": 0.63, - "grad_norm": 0.3343193983572874, - "learning_rate": 6.4677410021405975e-06, - "loss": 0.3281, + "epoch": 0.5, + "grad_norm": 0.4035251922515375, + "learning_rate": 1.0456632363541913e-05, + "loss": 0.2748, "step": 10904 }, { - "epoch": 0.63, - "grad_norm": 0.15994451590807493, - "learning_rate": 6.46600010874474e-06, - "loss": 0.0712, + "epoch": 0.5, + "grad_norm": 0.4168429011256215, + "learning_rate": 1.0455145991329639e-05, + "loss": 0.3264, "step": 10905 }, { - "epoch": 0.63, - "grad_norm": 0.35321148685659404, - "learning_rate": 6.4642593377311515e-06, - "loss": 0.2867, + "epoch": 0.5, + "grad_norm": 0.4291690764468666, + "learning_rate": 1.0453659609040868e-05, + "loss": 0.2955, "step": 10906 }, { - "epoch": 0.63, - "grad_norm": 1.1587678178893002, - "learning_rate": 6.462518689160109e-06, - "loss": 0.568, + "epoch": 0.5, + "grad_norm": 0.2757052344906834, + "learning_rate": 1.0452173216708515e-05, + "loss": 0.1621, "step": 10907 }, { - "epoch": 0.63, - "grad_norm": 0.30479496232056075, - "learning_rate": 6.460778163091891e-06, - "loss": 0.2539, + "epoch": 0.5, + "grad_norm": 0.7198774767685849, + "learning_rate": 1.045068681436548e-05, + "loss": 0.4927, "step": 10908 }, { - "epoch": 0.63, - "grad_norm": 0.35727687245385015, - "learning_rate": 6.45903775958677e-06, - "loss": 0.2846, + "epoch": 0.5, + "grad_norm": 0.3096798546801564, + "learning_rate": 1.0449200402044674e-05, + "loss": 0.2604, "step": 10909 }, { - "epoch": 0.63, - "grad_norm": 2.4649919245936944, - "learning_rate": 6.457297478705023e-06, - "loss": 0.7402, + "epoch": 0.5, + "grad_norm": 0.3801091554125602, + "learning_rate": 1.0447713979779006e-05, + "loss": 0.308, "step": 10910 }, { - "epoch": 0.63, - "grad_norm": 0.2619218553658152, - "learning_rate": 6.45555732050691e-06, - "loss": 0.1705, + "epoch": 0.5, + "grad_norm": 0.9222941504763218, + "learning_rate": 1.0446227547601378e-05, + "loss": 0.4719, "step": 10911 }, { - "epoch": 0.63, - "grad_norm": 0.33591317094101153, - "learning_rate": 6.4538172850526955e-06, - "loss": 0.2024, + "epoch": 0.5, + "grad_norm": 0.41731611639871197, + "learning_rate": 1.0444741105544705e-05, + "loss": 0.2869, "step": 10912 }, { - "epoch": 0.63, - "grad_norm": 0.341833146387076, - "learning_rate": 6.452077372402634e-06, - "loss": 0.3078, + "epoch": 0.5, + "grad_norm": 0.4075029196754148, + "learning_rate": 1.044325465364189e-05, + "loss": 0.3071, "step": 10913 }, { - "epoch": 0.63, - "grad_norm": 0.5891021497920335, - "learning_rate": 6.450337582616983e-06, - "loss": 0.3681, + "epoch": 0.5, + "grad_norm": 0.3446128564891179, + "learning_rate": 1.0441768191925848e-05, + "loss": 0.2116, "step": 10914 }, { - "epoch": 0.63, - "grad_norm": 0.34289810526304676, - "learning_rate": 6.448597915755988e-06, - "loss": 0.2137, + "epoch": 0.5, + "grad_norm": 0.43155647726063834, + "learning_rate": 1.044028172042948e-05, + "loss": 0.2894, "step": 10915 }, { - "epoch": 0.63, - "grad_norm": 0.359263118619319, - "learning_rate": 6.446858371879896e-06, - "loss": 0.312, + "epoch": 0.5, + "grad_norm": 0.5842143192713741, + "learning_rate": 1.0438795239185704e-05, + "loss": 0.3502, "step": 10916 }, { - "epoch": 0.63, - "grad_norm": 0.28334773046172257, - "learning_rate": 6.445118951048942e-06, - "loss": 0.173, + "epoch": 0.5, + "grad_norm": 0.39970246716336244, + "learning_rate": 1.0437308748227419e-05, + "loss": 0.2882, "step": 10917 }, { - "epoch": 0.63, - "grad_norm": 0.30036622389977835, - "learning_rate": 6.44337965332337e-06, - "loss": 0.19, + "epoch": 0.5, + "grad_norm": 0.6601530977308432, + "learning_rate": 1.0435822247587544e-05, + "loss": 0.3817, "step": 10918 }, { - "epoch": 0.63, - "grad_norm": 0.749494957815434, - "learning_rate": 6.4416404787634045e-06, - "loss": 0.4418, + "epoch": 0.5, + "grad_norm": 0.39650530181159, + "learning_rate": 1.043433573729898e-05, + "loss": 0.3369, "step": 10919 }, { - "epoch": 0.63, - "grad_norm": 0.3391009390947701, - "learning_rate": 6.439901427429278e-06, - "loss": 0.3278, + "epoch": 0.5, + "grad_norm": 0.37490496063762124, + "learning_rate": 1.0432849217394645e-05, + "loss": 0.2613, "step": 10920 }, { - "epoch": 0.63, - "grad_norm": 0.4741224416315272, - "learning_rate": 6.438162499381212e-06, - "loss": 0.2293, + "epoch": 0.5, + "grad_norm": 0.4074541069519762, + "learning_rate": 1.0431362687907445e-05, + "loss": 0.3016, "step": 10921 }, { - "epoch": 0.63, - "grad_norm": 0.9750360327136708, - "learning_rate": 6.4364236946794234e-06, - "loss": 0.6323, + "epoch": 0.5, + "grad_norm": 0.3254109694058706, + "learning_rate": 1.042987614887029e-05, + "loss": 0.2461, "step": 10922 }, { - "epoch": 0.63, - "grad_norm": 0.24827351464326222, - "learning_rate": 6.434685013384132e-06, - "loss": 0.1511, + "epoch": 0.5, + "grad_norm": 1.3503541464467523, + "learning_rate": 1.0428389600316092e-05, + "loss": 0.7921, "step": 10923 }, { - "epoch": 0.63, - "grad_norm": 0.288203382381012, - "learning_rate": 6.432946455555542e-06, - "loss": 0.2516, + "epoch": 0.5, + "grad_norm": 0.30620043898111016, + "learning_rate": 1.042690304227776e-05, + "loss": 0.1173, "step": 10924 }, { - "epoch": 0.63, - "grad_norm": 0.463203852610262, - "learning_rate": 6.4312080212538665e-06, - "loss": 0.2672, + "epoch": 0.5, + "grad_norm": 0.33389068950972156, + "learning_rate": 1.0425416474788209e-05, + "loss": 0.2795, "step": 10925 }, { - "epoch": 0.63, - "grad_norm": 0.7318010513536188, - "learning_rate": 6.4294697105393e-06, - "loss": 0.4274, + "epoch": 0.5, + "grad_norm": 0.6268606255790703, + "learning_rate": 1.0423929897880343e-05, + "loss": 0.3996, "step": 10926 }, { - "epoch": 0.63, - "grad_norm": 0.3059976691995668, - "learning_rate": 6.427731523472047e-06, - "loss": 0.2783, + "epoch": 0.5, + "grad_norm": 0.23315679535593764, + "learning_rate": 1.0422443311587083e-05, + "loss": 0.1469, "step": 10927 }, { - "epoch": 0.63, - "grad_norm": 0.3061553802177348, - "learning_rate": 6.425993460112297e-06, - "loss": 0.2512, + "epoch": 0.5, + "grad_norm": 0.533691663205979, + "learning_rate": 1.0420956715941331e-05, + "loss": 0.3127, "step": 10928 }, { - "epoch": 0.63, - "grad_norm": 0.2644528421377624, - "learning_rate": 6.424255520520239e-06, - "loss": 0.1606, + "epoch": 0.5, + "grad_norm": 0.34280900553976923, + "learning_rate": 1.0419470110976004e-05, + "loss": 0.3081, "step": 10929 }, { - "epoch": 0.63, - "grad_norm": 0.3216069885908651, - "learning_rate": 6.422517704756057e-06, - "loss": 0.2645, + "epoch": 0.5, + "grad_norm": 0.35131805238793096, + "learning_rate": 1.0417983496724013e-05, + "loss": 0.2102, "step": 10930 }, { - "epoch": 0.63, - "grad_norm": 1.0873383787909738, - "learning_rate": 6.420780012879937e-06, - "loss": 0.3225, + "epoch": 0.5, + "grad_norm": 0.5464281988934574, + "learning_rate": 1.0416496873218271e-05, + "loss": 0.339, "step": 10931 }, { - "epoch": 0.63, - "grad_norm": 0.32131116306748664, - "learning_rate": 6.419042444952048e-06, - "loss": 0.2816, + "epoch": 0.5, + "grad_norm": 0.7383772090603596, + "learning_rate": 1.041501024049169e-05, + "loss": 0.3921, "step": 10932 }, { - "epoch": 0.63, - "grad_norm": 0.3490888556275218, - "learning_rate": 6.417305001032567e-06, - "loss": 0.2824, + "epoch": 0.5, + "grad_norm": 0.226172039066383, + "learning_rate": 1.0413523598577182e-05, + "loss": 0.1876, "step": 10933 }, { - "epoch": 0.63, - "grad_norm": 1.0053545569436766, - "learning_rate": 6.415567681181658e-06, - "loss": 0.4126, + "epoch": 0.5, + "grad_norm": 0.41271641921042634, + "learning_rate": 1.0412036947507658e-05, + "loss": 0.2332, "step": 10934 }, { - "epoch": 0.63, - "grad_norm": 0.3229247233746252, - "learning_rate": 6.413830485459488e-06, - "loss": 0.2473, + "epoch": 0.5, + "grad_norm": 1.0090279042895118, + "learning_rate": 1.0410550287316035e-05, + "loss": 0.6684, "step": 10935 }, { - "epoch": 0.63, - "grad_norm": 0.3380905219858116, - "learning_rate": 6.412093413926213e-06, - "loss": 0.2688, + "epoch": 0.5, + "grad_norm": 1.1705944740833938, + "learning_rate": 1.0409063618035222e-05, + "loss": 0.456, "step": 10936 }, { - "epoch": 0.63, - "grad_norm": 0.554045753814118, - "learning_rate": 6.410356466641989e-06, - "loss": 0.298, + "epoch": 0.5, + "grad_norm": 0.2923442724355534, + "learning_rate": 1.040757693969814e-05, + "loss": 0.2351, "step": 10937 }, { - "epoch": 0.63, - "grad_norm": 0.9802965063335598, - "learning_rate": 6.408619643666967e-06, - "loss": 0.3922, + "epoch": 0.5, + "grad_norm": 0.6308654023504193, + "learning_rate": 1.0406090252337693e-05, + "loss": 0.4298, "step": 10938 }, { - "epoch": 0.63, - "grad_norm": 0.31719321797306044, - "learning_rate": 6.40688294506129e-06, - "loss": 0.2581, + "epoch": 0.5, + "grad_norm": 0.2833128195985193, + "learning_rate": 1.04046035559868e-05, + "loss": 0.1634, "step": 10939 }, { - "epoch": 0.63, - "grad_norm": 0.3305320623675954, - "learning_rate": 6.405146370885107e-06, - "loss": 0.2959, + "epoch": 0.5, + "grad_norm": 0.3617117484426621, + "learning_rate": 1.0403116850678376e-05, + "loss": 0.2313, "step": 10940 }, { - "epoch": 0.63, - "grad_norm": 0.2577489342671599, - "learning_rate": 6.403409921198548e-06, - "loss": 0.1013, + "epoch": 0.5, + "grad_norm": 0.4068810820362841, + "learning_rate": 1.0401630136445332e-05, + "loss": 0.3683, "step": 10941 }, { - "epoch": 0.63, - "grad_norm": 0.3603206692947732, - "learning_rate": 6.401673596061747e-06, - "loss": 0.2713, + "epoch": 0.5, + "grad_norm": 0.7046554896419595, + "learning_rate": 1.0400143413320582e-05, + "loss": 0.407, "step": 10942 }, { - "epoch": 0.63, - "grad_norm": 1.004731951323438, - "learning_rate": 6.399937395534837e-06, - "loss": 0.4131, + "epoch": 0.5, + "grad_norm": 0.4122696700413633, + "learning_rate": 1.0398656681337042e-05, + "loss": 0.2582, "step": 10943 }, { - "epoch": 0.63, - "grad_norm": 0.26455269733315795, - "learning_rate": 6.398201319677937e-06, - "loss": 0.2145, + "epoch": 0.5, + "grad_norm": 1.6393517902238877, + "learning_rate": 1.0397169940527632e-05, + "loss": 0.783, "step": 10944 }, { - "epoch": 0.63, - "grad_norm": 0.31962065915692844, - "learning_rate": 6.396465368551172e-06, - "loss": 0.2746, + "epoch": 0.5, + "grad_norm": 0.27306102576747404, + "learning_rate": 1.0395683190925261e-05, + "loss": 0.2369, "step": 10945 }, { - "epoch": 0.63, - "grad_norm": 1.1313165310387425, - "learning_rate": 6.394729542214657e-06, - "loss": 0.606, + "epoch": 0.5, + "grad_norm": 0.23986544086345535, + "learning_rate": 1.0394196432562843e-05, + "loss": 0.1718, "step": 10946 }, { - "epoch": 0.63, - "grad_norm": 0.33812898844419714, - "learning_rate": 6.392993840728503e-06, - "loss": 0.2592, + "epoch": 0.5, + "grad_norm": 1.1443658046257628, + "learning_rate": 1.0392709665473298e-05, + "loss": 0.7575, "step": 10947 }, { - "epoch": 0.63, - "grad_norm": 0.3277772485530417, - "learning_rate": 6.391258264152818e-06, - "loss": 0.2676, + "epoch": 0.5, + "grad_norm": 0.5790385132234466, + "learning_rate": 1.0391222889689538e-05, + "loss": 0.3589, "step": 10948 }, { - "epoch": 0.63, - "grad_norm": 0.7622917250054192, - "learning_rate": 6.389522812547701e-06, - "loss": 0.4147, + "epoch": 0.5, + "grad_norm": 0.3397915777222104, + "learning_rate": 1.038973610524448e-05, + "loss": 0.2851, "step": 10949 }, { - "epoch": 0.63, - "grad_norm": 0.2837893256968949, - "learning_rate": 6.3877874859732556e-06, - "loss": 0.1997, + "epoch": 0.5, + "grad_norm": 0.4731018438805068, + "learning_rate": 1.0388249312171046e-05, + "loss": 0.3052, "step": 10950 }, { - "epoch": 0.63, - "grad_norm": 0.3097014346057528, - "learning_rate": 6.386052284489575e-06, - "loss": 0.1805, + "epoch": 0.5, + "grad_norm": 0.39169263977964364, + "learning_rate": 1.0386762510502139e-05, + "loss": 0.2089, "step": 10951 }, { - "epoch": 0.63, - "grad_norm": 0.3016138484836072, - "learning_rate": 6.3843172081567474e-06, - "loss": 0.2933, + "epoch": 0.5, + "grad_norm": 0.3149583358027602, + "learning_rate": 1.0385275700270688e-05, + "loss": 0.2441, "step": 10952 }, { - "epoch": 0.63, - "grad_norm": 0.7273665127231179, - "learning_rate": 6.382582257034858e-06, - "loss": 0.4313, + "epoch": 0.5, + "grad_norm": 0.37132215698082294, + "learning_rate": 1.03837888815096e-05, + "loss": 0.286, "step": 10953 }, { - "epoch": 0.63, - "grad_norm": 0.3243297248965466, - "learning_rate": 6.380847431183992e-06, - "loss": 0.188, + "epoch": 0.5, + "grad_norm": 0.7762712916439668, + "learning_rate": 1.0382302054251799e-05, + "loss": 0.4246, "step": 10954 }, { - "epoch": 0.63, - "grad_norm": 0.3351138381356627, - "learning_rate": 6.379112730664222e-06, - "loss": 0.2968, + "epoch": 0.5, + "grad_norm": 0.3318439167508191, + "learning_rate": 1.0380815218530197e-05, + "loss": 0.2849, "step": 10955 }, { - "epoch": 0.63, - "grad_norm": 0.3233328725092727, - "learning_rate": 6.377378155535625e-06, - "loss": 0.2423, + "epoch": 0.5, + "grad_norm": 0.3425402393153413, + "learning_rate": 1.0379328374377715e-05, + "loss": 0.2391, "step": 10956 }, { - "epoch": 0.63, - "grad_norm": 0.2918832243355452, - "learning_rate": 6.375643705858263e-06, - "loss": 0.2093, + "epoch": 0.5, + "grad_norm": 0.470377333207402, + "learning_rate": 1.0377841521827268e-05, + "loss": 0.2374, "step": 10957 }, { - "epoch": 0.63, - "grad_norm": 1.0029877128585176, - "learning_rate": 6.373909381692207e-06, - "loss": 0.5905, + "epoch": 0.5, + "grad_norm": 0.266832987483294, + "learning_rate": 1.0376354660911772e-05, + "loss": 0.2101, "step": 10958 }, { - "epoch": 0.63, - "grad_norm": 0.5027694083886098, - "learning_rate": 6.372175183097511e-06, - "loss": 0.3369, + "epoch": 0.5, + "grad_norm": 0.6719407026251564, + "learning_rate": 1.037486779166415e-05, + "loss": 0.3738, "step": 10959 }, { - "epoch": 0.63, - "grad_norm": 0.3078451694449207, - "learning_rate": 6.370441110134233e-06, - "loss": 0.2701, + "epoch": 0.5, + "grad_norm": 0.38071576971336674, + "learning_rate": 1.0373380914117313e-05, + "loss": 0.3329, "step": 10960 }, { - "epoch": 0.63, - "grad_norm": 0.41308557058341894, - "learning_rate": 6.3687071628624244e-06, - "loss": 0.2483, + "epoch": 0.5, + "grad_norm": 0.346423742973191, + "learning_rate": 1.0371894028304184e-05, + "loss": 0.2904, "step": 10961 }, { - "epoch": 0.63, - "grad_norm": 0.45906823035018285, - "learning_rate": 6.366973341342128e-06, - "loss": 0.3574, + "epoch": 0.5, + "grad_norm": 1.3876393021986768, + "learning_rate": 1.037040713425768e-05, + "loss": 0.7139, "step": 10962 }, { - "epoch": 0.63, - "grad_norm": 0.34703098652586084, - "learning_rate": 6.365239645633392e-06, - "loss": 0.2705, + "epoch": 0.5, + "grad_norm": 0.30609658202738504, + "learning_rate": 1.036892023201072e-05, + "loss": 0.1699, "step": 10963 }, { - "epoch": 0.63, - "grad_norm": 0.26359333923079414, - "learning_rate": 6.3635060757962485e-06, - "loss": 0.1914, + "epoch": 0.5, + "grad_norm": 0.38894806507442203, + "learning_rate": 1.0367433321596216e-05, + "loss": 0.329, "step": 10964 }, { - "epoch": 0.63, - "grad_norm": 0.5789237996497145, - "learning_rate": 6.361772631890735e-06, - "loss": 0.3915, + "epoch": 0.5, + "grad_norm": 0.41442709369564207, + "learning_rate": 1.0365946403047098e-05, + "loss": 0.3269, "step": 10965 }, { - "epoch": 0.63, - "grad_norm": 0.3167200194383066, - "learning_rate": 6.360039313976875e-06, - "loss": 0.2645, + "epoch": 0.5, + "grad_norm": 0.7681640707075716, + "learning_rate": 1.0364459476396276e-05, + "loss": 0.3248, "step": 10966 }, { - "epoch": 0.63, - "grad_norm": 1.3052671121934587, - "learning_rate": 6.3583061221147015e-06, - "loss": 0.2044, + "epoch": 0.5, + "grad_norm": 0.40340739975557066, + "learning_rate": 1.0362972541676678e-05, + "loss": 0.2685, "step": 10967 }, { - "epoch": 0.63, - "grad_norm": 0.3444639163727557, - "learning_rate": 6.356573056364227e-06, - "loss": 0.3192, + "epoch": 0.5, + "grad_norm": 0.35310313404924815, + "learning_rate": 1.0361485598921213e-05, + "loss": 0.2189, "step": 10968 }, { - "epoch": 0.63, - "grad_norm": 0.5307240095449588, - "learning_rate": 6.354840116785473e-06, - "loss": 0.3267, + "epoch": 0.5, + "grad_norm": 0.3517614313657591, + "learning_rate": 1.0359998648162805e-05, + "loss": 0.2094, "step": 10969 }, { - "epoch": 0.63, - "grad_norm": 0.25389779069515805, - "learning_rate": 6.353107303438447e-06, - "loss": 0.1643, + "epoch": 0.5, + "grad_norm": 0.3343792623918144, + "learning_rate": 1.0358511689434376e-05, + "loss": 0.2467, "step": 10970 }, { - "epoch": 0.63, - "grad_norm": 0.4045083838798069, - "learning_rate": 6.351374616383161e-06, - "loss": 0.3156, + "epoch": 0.5, + "grad_norm": 0.8353299690725504, + "learning_rate": 1.0357024722768843e-05, + "loss": 0.4269, "step": 10971 }, { - "epoch": 0.63, - "grad_norm": 0.6028023578671253, - "learning_rate": 6.349642055679613e-06, - "loss": 0.3478, + "epoch": 0.5, + "grad_norm": 0.38283065164636604, + "learning_rate": 1.0355537748199128e-05, + "loss": 0.2629, "step": 10972 }, { - "epoch": 0.63, - "grad_norm": 0.4182115635796257, - "learning_rate": 6.347909621387809e-06, - "loss": 0.2903, + "epoch": 0.5, + "grad_norm": 0.34676541908543557, + "learning_rate": 1.0354050765758148e-05, + "loss": 0.2648, "step": 10973 }, { - "epoch": 0.63, - "grad_norm": 1.0543676184663318, - "learning_rate": 6.346177313567732e-06, - "loss": 0.4733, + "epoch": 0.5, + "grad_norm": 0.46444387032238416, + "learning_rate": 1.0352563775478828e-05, + "loss": 0.2721, "step": 10974 }, { - "epoch": 0.63, - "grad_norm": 0.2926592072070201, - "learning_rate": 6.34444513227938e-06, - "loss": 0.2497, + "epoch": 0.5, + "grad_norm": 1.5275244150572005, + "learning_rate": 1.0351076777394082e-05, + "loss": 0.6063, "step": 10975 }, { - "epoch": 0.63, - "grad_norm": 0.24753098131598456, - "learning_rate": 6.342713077582733e-06, - "loss": 0.2406, + "epoch": 0.5, + "grad_norm": 0.32751616291954194, + "learning_rate": 1.0349589771536836e-05, + "loss": 0.2195, "step": 10976 }, { - "epoch": 0.63, - "grad_norm": 0.5753767227553651, - "learning_rate": 6.340981149537777e-06, - "loss": 0.3157, + "epoch": 0.5, + "grad_norm": 0.395455680560916, + "learning_rate": 1.0348102757940008e-05, + "loss": 0.3455, "step": 10977 }, { - "epoch": 0.63, - "grad_norm": 0.3953220853323611, - "learning_rate": 6.339249348204485e-06, - "loss": 0.2584, + "epoch": 0.5, + "grad_norm": 0.8568088916795507, + "learning_rate": 1.0346615736636522e-05, + "loss": 0.5895, "step": 10978 }, { - "epoch": 0.63, - "grad_norm": 0.5473609875755407, - "learning_rate": 6.337517673642833e-06, - "loss": 0.3299, + "epoch": 0.5, + "grad_norm": 0.2251148149751252, + "learning_rate": 1.03451287076593e-05, + "loss": 0.1477, "step": 10979 }, { - "epoch": 0.63, - "grad_norm": 0.3745830109479958, - "learning_rate": 6.335786125912784e-06, - "loss": 0.2747, + "epoch": 0.5, + "grad_norm": 1.3948912100838498, + "learning_rate": 1.0343641671041259e-05, + "loss": 0.6137, "step": 10980 }, { - "epoch": 0.63, - "grad_norm": 0.40675845180689163, - "learning_rate": 6.3340547050743055e-06, - "loss": 0.3007, + "epoch": 0.5, + "grad_norm": 0.39548698238259355, + "learning_rate": 1.0342154626815321e-05, + "loss": 0.3108, "step": 10981 }, { - "epoch": 0.63, - "grad_norm": 0.690730580115114, - "learning_rate": 6.332323411187353e-06, - "loss": 0.3359, + "epoch": 0.5, + "grad_norm": 0.30743040559451984, + "learning_rate": 1.0340667575014412e-05, + "loss": 0.2305, "step": 10982 }, { - "epoch": 0.63, - "grad_norm": 0.2285847486188215, - "learning_rate": 6.330592244311885e-06, - "loss": 0.1904, + "epoch": 0.5, + "grad_norm": 0.7381738071248749, + "learning_rate": 1.0339180515671447e-05, + "loss": 0.4809, "step": 10983 }, { - "epoch": 0.63, - "grad_norm": 0.3287690816760705, - "learning_rate": 6.328861204507848e-06, - "loss": 0.2549, + "epoch": 0.5, + "grad_norm": 0.4072206828153819, + "learning_rate": 1.0337693448819357e-05, + "loss": 0.3617, "step": 10984 }, { - "epoch": 0.63, - "grad_norm": 1.4816162350887843, - "learning_rate": 6.327130291835192e-06, - "loss": 0.8568, + "epoch": 0.5, + "grad_norm": 0.33722157496957406, + "learning_rate": 1.0336206374491058e-05, + "loss": 0.1908, "step": 10985 }, { - "epoch": 0.63, - "grad_norm": 1.1107702877033343, - "learning_rate": 6.325399506353855e-06, - "loss": 0.8058, + "epoch": 0.5, + "grad_norm": 0.32698856960693284, + "learning_rate": 1.0334719292719475e-05, + "loss": 0.2035, "step": 10986 }, { - "epoch": 0.63, - "grad_norm": 0.32706815193793987, - "learning_rate": 6.323668848123774e-06, - "loss": 0.19, + "epoch": 0.5, + "grad_norm": 0.6667163496608453, + "learning_rate": 1.0333232203537528e-05, + "loss": 0.4291, "step": 10987 }, { - "epoch": 0.63, - "grad_norm": 0.3039169646645536, - "learning_rate": 6.321938317204886e-06, - "loss": 0.2826, + "epoch": 0.5, + "grad_norm": 0.439332253060987, + "learning_rate": 1.033174510697814e-05, + "loss": 0.2794, "step": 10988 }, { - "epoch": 0.63, - "grad_norm": 0.34350298686577224, - "learning_rate": 6.320207913657111e-06, - "loss": 0.223, + "epoch": 0.5, + "grad_norm": 0.36037835896217024, + "learning_rate": 1.0330258003074238e-05, + "loss": 0.2735, "step": 10989 }, { - "epoch": 0.63, - "grad_norm": 0.43173203039681673, - "learning_rate": 6.3184776375403814e-06, - "loss": 0.1458, + "epoch": 0.5, + "grad_norm": 1.0991663982993012, + "learning_rate": 1.0328770891858739e-05, + "loss": 0.6658, "step": 10990 }, { - "epoch": 0.63, - "grad_norm": 0.32354358590641585, - "learning_rate": 6.3167474889146096e-06, - "loss": 0.292, + "epoch": 0.5, + "grad_norm": 0.3641591490136669, + "learning_rate": 1.0327283773364571e-05, + "loss": 0.2472, "step": 10991 }, { - "epoch": 0.63, - "grad_norm": 0.4753435688955507, - "learning_rate": 6.315017467839717e-06, - "loss": 0.3936, + "epoch": 0.5, + "grad_norm": 0.23437937545218776, + "learning_rate": 1.0325796647624655e-05, + "loss": 0.1696, "step": 10992 }, { - "epoch": 0.63, - "grad_norm": 0.37915678416069587, - "learning_rate": 6.313287574375609e-06, - "loss": 0.2156, + "epoch": 0.51, + "grad_norm": 1.0369622737034812, + "learning_rate": 1.0324309514671918e-05, + "loss": 0.4836, "step": 10993 }, { - "epoch": 0.63, - "grad_norm": 0.4090258131741896, - "learning_rate": 6.311557808582196e-06, - "loss": 0.3467, + "epoch": 0.51, + "grad_norm": 0.3763981052147931, + "learning_rate": 1.0322822374539276e-05, + "loss": 0.2859, "step": 10994 }, { - "epoch": 0.63, - "grad_norm": 0.3414960965925519, - "learning_rate": 6.309828170519376e-06, - "loss": 0.2537, + "epoch": 0.51, + "grad_norm": 0.6915443796131604, + "learning_rate": 1.0321335227259661e-05, + "loss": 0.3736, "step": 10995 }, { - "epoch": 0.63, - "grad_norm": 0.2555111542785431, - "learning_rate": 6.308098660247049e-06, - "loss": 0.1574, + "epoch": 0.51, + "grad_norm": 0.46033592393109823, + "learning_rate": 1.0319848072865993e-05, + "loss": 0.3273, "step": 10996 }, { - "epoch": 0.63, - "grad_norm": 0.53907874280059, - "learning_rate": 6.306369277825104e-06, - "loss": 0.3552, + "epoch": 0.51, + "grad_norm": 0.33996324283685775, + "learning_rate": 1.0318360911391198e-05, + "loss": 0.2416, "step": 10997 }, { - "epoch": 0.63, - "grad_norm": 0.8653136044186649, - "learning_rate": 6.304640023313435e-06, - "loss": 0.5482, + "epoch": 0.51, + "grad_norm": 0.21218199318159608, + "learning_rate": 1.0316873742868199e-05, + "loss": 0.0906, "step": 10998 }, { - "epoch": 0.63, - "grad_norm": 0.2936106670828241, - "learning_rate": 6.302910896771921e-06, - "loss": 0.2673, + "epoch": 0.51, + "grad_norm": 0.6553244582405582, + "learning_rate": 1.0315386567329921e-05, + "loss": 0.4053, "step": 10999 }, { - "epoch": 0.63, - "grad_norm": 0.3666631543606978, - "learning_rate": 6.301181898260444e-06, - "loss": 0.2542, + "epoch": 0.51, + "grad_norm": 0.29094045317347333, + "learning_rate": 1.0313899384809286e-05, + "loss": 0.2502, "step": 11000 }, { - "epoch": 0.63, - "grad_norm": 0.42116700454476547, - "learning_rate": 6.299453027838881e-06, - "loss": 0.2626, + "epoch": 0.51, + "grad_norm": 0.5164140744715733, + "learning_rate": 1.0312412195339222e-05, + "loss": 0.3466, "step": 11001 }, { - "epoch": 0.63, - "grad_norm": 0.33832310295296864, - "learning_rate": 6.297724285567098e-06, - "loss": 0.2159, + "epoch": 0.51, + "grad_norm": 0.8544883586570818, + "learning_rate": 1.0310924998952655e-05, + "loss": 0.4663, "step": 11002 }, { - "epoch": 0.63, - "grad_norm": 0.3384603298577127, - "learning_rate": 6.295995671504965e-06, - "loss": 0.2478, + "epoch": 0.51, + "grad_norm": 0.44703310529145185, + "learning_rate": 1.030943779568251e-05, + "loss": 0.3094, "step": 11003 }, { - "epoch": 0.63, - "grad_norm": 0.4787703629140393, - "learning_rate": 6.294267185712342e-06, - "loss": 0.361, + "epoch": 0.51, + "grad_norm": 0.2588407630012365, + "learning_rate": 1.0307950585561705e-05, + "loss": 0.2127, "step": 11004 }, { - "epoch": 0.63, - "grad_norm": 0.588450481399667, - "learning_rate": 6.292538828249087e-06, - "loss": 0.355, + "epoch": 0.51, + "grad_norm": 0.4794553487699966, + "learning_rate": 1.0306463368623174e-05, + "loss": 0.313, "step": 11005 }, { - "epoch": 0.63, - "grad_norm": 0.32064689647937217, - "learning_rate": 6.290810599175052e-06, - "loss": 0.234, + "epoch": 0.51, + "grad_norm": 0.4575444882011544, + "learning_rate": 1.0304976144899839e-05, + "loss": 0.2408, "step": 11006 }, { - "epoch": 0.63, - "grad_norm": 0.3023253847261538, - "learning_rate": 6.289082498550091e-06, - "loss": 0.2633, + "epoch": 0.51, + "grad_norm": 0.5148704658342735, + "learning_rate": 1.0303488914424624e-05, + "loss": 0.3921, "step": 11007 }, { - "epoch": 0.63, - "grad_norm": 0.33301628269246675, - "learning_rate": 6.287354526434042e-06, - "loss": 0.1976, + "epoch": 0.51, + "grad_norm": 0.3635600628818487, + "learning_rate": 1.030200167723046e-05, + "loss": 0.2548, "step": 11008 }, { - "epoch": 0.63, - "grad_norm": 0.3279184303807924, - "learning_rate": 6.285626682886743e-06, - "loss": 0.2236, + "epoch": 0.51, + "grad_norm": 0.39388683648969564, + "learning_rate": 1.0300514433350268e-05, + "loss": 0.2533, "step": 11009 }, { - "epoch": 0.63, - "grad_norm": 0.7138001020794481, - "learning_rate": 6.283898967968034e-06, - "loss": 0.4225, + "epoch": 0.51, + "grad_norm": 0.2681368570225848, + "learning_rate": 1.0299027182816979e-05, + "loss": 0.201, "step": 11010 }, { - "epoch": 0.63, - "grad_norm": 0.35085811541230716, - "learning_rate": 6.282171381737742e-06, - "loss": 0.2892, + "epoch": 0.51, + "grad_norm": 0.6084317703029285, + "learning_rate": 1.0297539925663511e-05, + "loss": 0.4504, "step": 11011 }, { - "epoch": 0.63, - "grad_norm": 0.35646638300198263, - "learning_rate": 6.280443924255697e-06, - "loss": 0.3308, + "epoch": 0.51, + "grad_norm": 0.2900179962026342, + "learning_rate": 1.0296052661922799e-05, + "loss": 0.2168, "step": 11012 }, { - "epoch": 0.63, - "grad_norm": 0.9594251689271127, - "learning_rate": 6.27871659558172e-06, - "loss": 0.2799, + "epoch": 0.51, + "grad_norm": 0.7121251470426043, + "learning_rate": 1.0294565391627766e-05, + "loss": 0.4035, "step": 11013 }, { - "epoch": 0.63, - "grad_norm": 0.2198364833708466, - "learning_rate": 6.276989395775625e-06, - "loss": 0.1496, + "epoch": 0.51, + "grad_norm": 1.3765180809227844, + "learning_rate": 1.0293078114811341e-05, + "loss": 0.7935, "step": 11014 }, { - "epoch": 0.63, - "grad_norm": 0.28285740443505136, - "learning_rate": 6.275262324897229e-06, - "loss": 0.2867, + "epoch": 0.51, + "grad_norm": 0.3028412415396035, + "learning_rate": 1.0291590831506448e-05, + "loss": 0.1932, "step": 11015 }, { - "epoch": 0.63, - "grad_norm": 0.7428097831158089, - "learning_rate": 6.273535383006336e-06, - "loss": 0.319, + "epoch": 0.51, + "grad_norm": 0.505283728868044, + "learning_rate": 1.0290103541746015e-05, + "loss": 0.3679, "step": 11016 }, { - "epoch": 0.63, - "grad_norm": 0.5076884926327849, - "learning_rate": 6.271808570162754e-06, - "loss": 0.3514, + "epoch": 0.51, + "grad_norm": 0.38772457846229813, + "learning_rate": 1.028861624556297e-05, + "loss": 0.2904, "step": 11017 }, { - "epoch": 0.63, - "grad_norm": 0.3783077303287608, - "learning_rate": 6.27008188642628e-06, - "loss": 0.3001, + "epoch": 0.51, + "grad_norm": 0.24977925955522015, + "learning_rate": 1.0287128942990237e-05, + "loss": 0.1733, "step": 11018 }, { - "epoch": 0.63, - "grad_norm": 0.3307956499614347, - "learning_rate": 6.268355331856713e-06, - "loss": 0.2285, + "epoch": 0.51, + "grad_norm": 1.1362177076012334, + "learning_rate": 1.0285641634060745e-05, + "loss": 0.6657, "step": 11019 }, { - "epoch": 0.63, - "grad_norm": 0.2362548513026544, - "learning_rate": 6.266628906513836e-06, - "loss": 0.1491, + "epoch": 0.51, + "grad_norm": 0.39449784649860237, + "learning_rate": 1.0284154318807426e-05, + "loss": 0.3218, "step": 11020 }, { - "epoch": 0.63, - "grad_norm": 0.42279259430228205, - "learning_rate": 6.264902610457442e-06, - "loss": 0.316, + "epoch": 0.51, + "grad_norm": 0.34483865933176594, + "learning_rate": 1.0282666997263205e-05, + "loss": 0.071, "step": 11021 }, { - "epoch": 0.63, - "grad_norm": 0.575956482245312, - "learning_rate": 6.263176443747309e-06, - "loss": 0.3045, + "epoch": 0.51, + "grad_norm": 0.39267986100505314, + "learning_rate": 1.0281179669461006e-05, + "loss": 0.3304, "step": 11022 }, { - "epoch": 0.63, - "grad_norm": 0.48818995665095016, - "learning_rate": 6.261450406443217e-06, - "loss": 0.3887, + "epoch": 0.51, + "grad_norm": 0.28735006796092893, + "learning_rate": 1.0279692335433762e-05, + "loss": 0.2425, "step": 11023 }, { - "epoch": 0.63, - "grad_norm": 0.3348148241071795, - "learning_rate": 6.259724498604933e-06, - "loss": 0.2703, + "epoch": 0.51, + "grad_norm": 0.39880100407061914, + "learning_rate": 1.0278204995214396e-05, + "loss": 0.2707, "step": 11024 }, { - "epoch": 0.63, - "grad_norm": 1.1273760167300912, - "learning_rate": 6.257998720292233e-06, - "loss": 0.5308, + "epoch": 0.51, + "grad_norm": 0.3841482063529238, + "learning_rate": 1.0276717648835843e-05, + "loss": 0.253, "step": 11025 }, { - "epoch": 0.63, - "grad_norm": 0.1848169085706313, - "learning_rate": 6.256273071564874e-06, - "loss": 0.088, + "epoch": 0.51, + "grad_norm": 1.1661155048610288, + "learning_rate": 1.0275230296331027e-05, + "loss": 0.6591, "step": 11026 }, { - "epoch": 0.63, - "grad_norm": 0.25545609687182125, - "learning_rate": 6.254547552482617e-06, - "loss": 0.2522, + "epoch": 0.51, + "grad_norm": 0.5803760305081045, + "learning_rate": 1.0273742937732877e-05, + "loss": 0.3469, "step": 11027 }, { - "epoch": 0.63, - "grad_norm": 0.7623754548706887, - "learning_rate": 6.25282216310522e-06, - "loss": 0.368, + "epoch": 0.51, + "grad_norm": 0.29909570471365665, + "learning_rate": 1.0272255573074323e-05, + "loss": 0.255, "step": 11028 }, { - "epoch": 0.63, - "grad_norm": 0.4726985840995557, - "learning_rate": 6.2510969034924265e-06, - "loss": 0.3078, + "epoch": 0.51, + "grad_norm": 0.8090921312617481, + "learning_rate": 1.0270768202388293e-05, + "loss": 0.481, "step": 11029 }, { - "epoch": 0.63, - "grad_norm": 0.3572847810692785, - "learning_rate": 6.249371773703989e-06, - "loss": 0.276, + "epoch": 0.51, + "grad_norm": 0.23083177159951437, + "learning_rate": 1.0269280825707714e-05, + "loss": 0.1357, "step": 11030 }, { - "epoch": 0.63, - "grad_norm": 0.3708677390096252, - "learning_rate": 6.247646773799645e-06, - "loss": 0.3205, + "epoch": 0.51, + "grad_norm": 0.40498719838244374, + "learning_rate": 1.0267793443065519e-05, + "loss": 0.276, "step": 11031 }, { - "epoch": 0.63, - "grad_norm": 0.25304006716718513, - "learning_rate": 6.245921903839132e-06, - "loss": 0.123, + "epoch": 0.51, + "grad_norm": 0.3999375361250701, + "learning_rate": 1.0266306054494637e-05, + "loss": 0.3144, "step": 11032 }, { - "epoch": 0.63, - "grad_norm": 0.3925028301065731, - "learning_rate": 6.24419716388218e-06, - "loss": 0.2976, + "epoch": 0.51, + "grad_norm": 0.4111834757576695, + "learning_rate": 1.0264818660027993e-05, + "loss": 0.29, "step": 11033 }, { - "epoch": 0.63, - "grad_norm": 0.8941776485349109, - "learning_rate": 6.242472553988521e-06, - "loss": 0.4912, + "epoch": 0.51, + "grad_norm": 0.4987383428746754, + "learning_rate": 1.0263331259698521e-05, + "loss": 0.2901, "step": 11034 }, { - "epoch": 0.63, - "grad_norm": 0.3501794684964001, - "learning_rate": 6.240748074217875e-06, - "loss": 0.3189, + "epoch": 0.51, + "grad_norm": 0.2792968209507697, + "learning_rate": 1.0261843853539146e-05, + "loss": 0.169, "step": 11035 }, { - "epoch": 0.63, - "grad_norm": 0.3330008477936822, - "learning_rate": 6.239023724629962e-06, - "loss": 0.217, + "epoch": 0.51, + "grad_norm": 0.41905061574570557, + "learning_rate": 1.0260356441582801e-05, + "loss": 0.2442, "step": 11036 }, { - "epoch": 0.63, - "grad_norm": 1.1036007794647276, - "learning_rate": 6.237299505284495e-06, - "loss": 0.616, + "epoch": 0.51, + "grad_norm": 0.5643798811830548, + "learning_rate": 1.0258869023862417e-05, + "loss": 0.3793, "step": 11037 }, { - "epoch": 0.63, - "grad_norm": 0.3554645171715663, - "learning_rate": 6.235575416241185e-06, - "loss": 0.3099, + "epoch": 0.51, + "grad_norm": 0.7166090505061682, + "learning_rate": 1.025738160041092e-05, + "loss": 0.3767, "step": 11038 }, { - "epoch": 0.63, - "grad_norm": 0.26021400716218845, - "learning_rate": 6.233851457559736e-06, - "loss": 0.2198, + "epoch": 0.51, + "grad_norm": 0.5682703194037347, + "learning_rate": 1.0255894171261244e-05, + "loss": 0.3431, "step": 11039 }, { - "epoch": 0.63, - "grad_norm": 0.35160884390360947, - "learning_rate": 6.232127629299849e-06, - "loss": 0.2377, + "epoch": 0.51, + "grad_norm": 0.29190235193587893, + "learning_rate": 1.025440673644632e-05, + "loss": 0.2632, "step": 11040 }, { - "epoch": 0.63, - "grad_norm": 1.0184502874359644, - "learning_rate": 6.230403931521224e-06, - "loss": 0.7252, + "epoch": 0.51, + "grad_norm": 0.36187728275762626, + "learning_rate": 1.025291929599907e-05, + "loss": 0.1938, "step": 11041 }, { - "epoch": 0.63, - "grad_norm": 0.27161208909389467, - "learning_rate": 6.228680364283546e-06, - "loss": 0.1799, + "epoch": 0.51, + "grad_norm": 0.39440781236173883, + "learning_rate": 1.0251431849952436e-05, + "loss": 0.2241, "step": 11042 }, { - "epoch": 0.63, - "grad_norm": 0.3497995921510243, - "learning_rate": 6.226956927646504e-06, - "loss": 0.3126, + "epoch": 0.51, + "grad_norm": 0.3762656525175716, + "learning_rate": 1.024994439833934e-05, + "loss": 0.2801, "step": 11043 }, { - "epoch": 0.63, - "grad_norm": 0.6248331696318262, - "learning_rate": 6.225233621669782e-06, - "loss": 0.4057, + "epoch": 0.51, + "grad_norm": 0.38880791323659164, + "learning_rate": 1.0248456941192721e-05, + "loss": 0.2843, "step": 11044 }, { - "epoch": 0.63, - "grad_norm": 0.3119749624889606, - "learning_rate": 6.2235104464130545e-06, - "loss": 0.2302, + "epoch": 0.51, + "grad_norm": 0.569461206901041, + "learning_rate": 1.02469694785455e-05, + "loss": 0.3005, "step": 11045 }, { - "epoch": 0.63, - "grad_norm": 0.3756060506238427, - "learning_rate": 6.221787401936002e-06, - "loss": 0.2444, + "epoch": 0.51, + "grad_norm": 0.3820406078797207, + "learning_rate": 1.0245482010430614e-05, + "loss": 0.3164, "step": 11046 }, { - "epoch": 0.63, - "grad_norm": 0.34670055217013945, - "learning_rate": 6.220064488298285e-06, - "loss": 0.3051, + "epoch": 0.51, + "grad_norm": 0.9472376581983436, + "learning_rate": 1.0243994536880992e-05, + "loss": 0.2226, "step": 11047 }, { - "epoch": 0.63, - "grad_norm": 0.249174577275586, - "learning_rate": 6.2183417055595765e-06, - "loss": 0.2063, + "epoch": 0.51, + "grad_norm": 0.26675873590984084, + "learning_rate": 1.0242507057929567e-05, + "loss": 0.2244, "step": 11048 }, { - "epoch": 0.63, - "grad_norm": 1.0123588001422943, - "learning_rate": 6.216619053779529e-06, - "loss": 0.2817, + "epoch": 0.51, + "grad_norm": 0.4331316144123683, + "learning_rate": 1.024101957360927e-05, + "loss": 0.3305, "step": 11049 }, { - "epoch": 0.63, - "grad_norm": 0.36445402991388354, - "learning_rate": 6.214896533017803e-06, - "loss": 0.3022, + "epoch": 0.51, + "grad_norm": 0.9469171435862428, + "learning_rate": 1.0239532083953032e-05, + "loss": 0.4979, "step": 11050 }, { - "epoch": 0.63, - "grad_norm": 0.3092688596231202, - "learning_rate": 6.213174143334046e-06, - "loss": 0.2614, + "epoch": 0.51, + "grad_norm": 0.29055961217834547, + "learning_rate": 1.0238044588993785e-05, + "loss": 0.1972, "step": 11051 }, { - "epoch": 0.63, - "grad_norm": 0.25530111498148117, - "learning_rate": 6.211451884787907e-06, - "loss": 0.1012, + "epoch": 0.51, + "grad_norm": 0.3428076247520623, + "learning_rate": 1.0236557088764455e-05, + "loss": 0.2816, "step": 11052 }, { - "epoch": 0.64, - "grad_norm": 0.4294403584004139, - "learning_rate": 6.209729757439026e-06, - "loss": 0.3003, + "epoch": 0.51, + "grad_norm": 1.1127425737648, + "learning_rate": 1.0235069583297985e-05, + "loss": 0.6239, "step": 11053 }, { - "epoch": 0.64, - "grad_norm": 0.4008704554780664, - "learning_rate": 6.208007761347039e-06, - "loss": 0.2886, + "epoch": 0.51, + "grad_norm": 0.2687023460359806, + "learning_rate": 1.0233582072627297e-05, + "loss": 0.1561, "step": 11054 }, { - "epoch": 0.64, - "grad_norm": 0.27196718819293497, - "learning_rate": 6.206285896571582e-06, - "loss": 0.24, + "epoch": 0.51, + "grad_norm": 0.607335502556524, + "learning_rate": 1.023209455678533e-05, + "loss": 0.3873, "step": 11055 }, { - "epoch": 0.64, - "grad_norm": 0.5460900718518568, - "learning_rate": 6.20456416317228e-06, - "loss": 0.332, + "epoch": 0.51, + "grad_norm": 0.46210042673315166, + "learning_rate": 1.0230607035805013e-05, + "loss": 0.3126, "step": 11056 }, { - "epoch": 0.64, - "grad_norm": 0.369685863562834, - "learning_rate": 6.202842561208759e-06, - "loss": 0.2815, + "epoch": 0.51, + "grad_norm": 0.45564722924557644, + "learning_rate": 1.0229119509719278e-05, + "loss": 0.1828, "step": 11057 }, { - "epoch": 0.64, - "grad_norm": 0.5100637075537648, - "learning_rate": 6.201121090740634e-06, - "loss": 0.2929, + "epoch": 0.51, + "grad_norm": 0.41967665158636935, + "learning_rate": 1.0227631978561057e-05, + "loss": 0.3054, "step": 11058 }, { - "epoch": 0.64, - "grad_norm": 0.316403402827174, - "learning_rate": 6.199399751827525e-06, - "loss": 0.2596, + "epoch": 0.51, + "grad_norm": 0.5031241223818567, + "learning_rate": 1.0226144442363286e-05, + "loss": 0.37, "step": 11059 }, { - "epoch": 0.64, - "grad_norm": 0.3212154547207368, - "learning_rate": 6.197678544529037e-06, - "loss": 0.2387, + "epoch": 0.51, + "grad_norm": 0.3893517353524682, + "learning_rate": 1.0224656901158891e-05, + "loss": 0.2362, "step": 11060 }, { - "epoch": 0.64, - "grad_norm": 0.5344441277836296, - "learning_rate": 6.195957468904781e-06, - "loss": 0.3082, + "epoch": 0.51, + "grad_norm": 0.37879346296714017, + "learning_rate": 1.0223169354980811e-05, + "loss": 0.2803, "step": 11061 }, { - "epoch": 0.64, - "grad_norm": 0.3320258149067593, - "learning_rate": 6.19423652501435e-06, - "loss": 0.2633, + "epoch": 0.51, + "grad_norm": 0.5394255934981825, + "learning_rate": 1.022168180386198e-05, + "loss": 0.4137, "step": 11062 }, { - "epoch": 0.64, - "grad_norm": 0.3367919993055683, - "learning_rate": 6.192515712917348e-06, - "loss": 0.2523, + "epoch": 0.51, + "grad_norm": 0.2870044177986189, + "learning_rate": 1.0220194247835323e-05, + "loss": 0.1972, "step": 11063 }, { - "epoch": 0.64, - "grad_norm": 1.0809686276937551, - "learning_rate": 6.19079503267336e-06, - "loss": 0.6319, + "epoch": 0.51, + "grad_norm": 0.27755654435699006, + "learning_rate": 1.0218706686933778e-05, + "loss": 0.2135, "step": 11064 }, { - "epoch": 0.64, - "grad_norm": 0.19407826999737932, - "learning_rate": 6.189074484341979e-06, - "loss": 0.098, + "epoch": 0.51, + "grad_norm": 1.2609809748743745, + "learning_rate": 1.0217219121190275e-05, + "loss": 0.5953, "step": 11065 }, { - "epoch": 0.64, - "grad_norm": 0.31577201724467685, - "learning_rate": 6.187354067982785e-06, - "loss": 0.2449, + "epoch": 0.51, + "grad_norm": 0.745515201422701, + "learning_rate": 1.0215731550637755e-05, + "loss": 0.4625, "step": 11066 }, { - "epoch": 0.64, - "grad_norm": 0.3403924479356509, - "learning_rate": 6.185633783655354e-06, - "loss": 0.302, + "epoch": 0.51, + "grad_norm": 0.28242285496201786, + "learning_rate": 1.0214243975309145e-05, + "loss": 0.244, "step": 11067 }, { - "epoch": 0.64, - "grad_norm": 0.6402462185739675, - "learning_rate": 6.183913631419263e-06, - "loss": 0.3312, + "epoch": 0.51, + "grad_norm": 0.4765595891423058, + "learning_rate": 1.0212756395237382e-05, + "loss": 0.3436, "step": 11068 }, { - "epoch": 0.64, - "grad_norm": 0.3382422319781781, - "learning_rate": 6.182193611334075e-06, - "loss": 0.258, + "epoch": 0.51, + "grad_norm": 0.2670918624973917, + "learning_rate": 1.0211268810455392e-05, + "loss": 0.1784, "step": 11069 }, { - "epoch": 0.64, - "grad_norm": 1.2736662713881572, - "learning_rate": 6.180473723459361e-06, - "loss": 0.7406, + "epoch": 0.51, + "grad_norm": 0.4106546836890868, + "learning_rate": 1.0209781220996118e-05, + "loss": 0.2257, "step": 11070 }, { - "epoch": 0.64, - "grad_norm": 0.25041480842921693, - "learning_rate": 6.178753967854677e-06, - "loss": 0.2126, + "epoch": 0.51, + "grad_norm": 0.4893760740504242, + "learning_rate": 1.0208293626892489e-05, + "loss": 0.3352, "step": 11071 }, { - "epoch": 0.64, - "grad_norm": 0.4063472874123882, - "learning_rate": 6.17703434457958e-06, - "loss": 0.2927, + "epoch": 0.51, + "grad_norm": 0.4558216031942054, + "learning_rate": 1.020680602817744e-05, + "loss": 0.3545, "step": 11072 }, { - "epoch": 0.64, - "grad_norm": 0.586628542560072, - "learning_rate": 6.175314853693617e-06, - "loss": 0.3441, + "epoch": 0.51, + "grad_norm": 0.3314692422293178, + "learning_rate": 1.0205318424883906e-05, + "loss": 0.2297, "step": 11073 }, { - "epoch": 0.64, - "grad_norm": 0.27702603918539737, - "learning_rate": 6.173595495256338e-06, - "loss": 0.2467, + "epoch": 0.51, + "grad_norm": 0.5926536833698635, + "learning_rate": 1.0203830817044819e-05, + "loss": 0.457, "step": 11074 }, { - "epoch": 0.64, - "grad_norm": 0.35689733543792085, - "learning_rate": 6.1718762693272846e-06, - "loss": 0.1613, + "epoch": 0.51, + "grad_norm": 0.27225545356059627, + "learning_rate": 1.0202343204693113e-05, + "loss": 0.2077, "step": 11075 }, { - "epoch": 0.64, - "grad_norm": 0.46985107766144624, - "learning_rate": 6.170157175965988e-06, - "loss": 0.3795, + "epoch": 0.51, + "grad_norm": 0.31931900465639884, + "learning_rate": 1.0200855587861724e-05, + "loss": 0.2819, "step": 11076 }, { - "epoch": 0.64, - "grad_norm": 0.7798022987856814, - "learning_rate": 6.168438215231984e-06, - "loss": 0.499, + "epoch": 0.51, + "grad_norm": 0.31775347338410187, + "learning_rate": 1.0199367966583586e-05, + "loss": 0.1046, "step": 11077 }, { - "epoch": 0.64, - "grad_norm": 0.2888764256268748, - "learning_rate": 6.166719387184802e-06, - "loss": 0.1906, + "epoch": 0.51, + "grad_norm": 0.7086257884375718, + "learning_rate": 1.0197880340891633e-05, + "loss": 0.4505, "step": 11078 }, { - "epoch": 0.64, - "grad_norm": 0.32604639861053947, - "learning_rate": 6.16500069188396e-06, - "loss": 0.3296, + "epoch": 0.51, + "grad_norm": 0.3533880149938981, + "learning_rate": 1.0196392710818802e-05, + "loss": 0.2954, "step": 11079 }, { - "epoch": 0.64, - "grad_norm": 0.2507700628659636, - "learning_rate": 6.163282129388981e-06, - "loss": 0.1362, + "epoch": 0.51, + "grad_norm": 0.4068241679258334, + "learning_rate": 1.0194905076398025e-05, + "loss": 0.2945, "step": 11080 }, { - "epoch": 0.64, - "grad_norm": 0.31590120501365204, - "learning_rate": 6.1615636997593745e-06, - "loss": 0.2005, + "epoch": 0.51, + "grad_norm": 0.4413309684829476, + "learning_rate": 1.0193417437662238e-05, + "loss": 0.2144, "step": 11081 }, { - "epoch": 0.64, - "grad_norm": 0.3610564542876327, - "learning_rate": 6.159845403054654e-06, - "loss": 0.332, + "epoch": 0.51, + "grad_norm": 0.28626933198560534, + "learning_rate": 1.0191929794644374e-05, + "loss": 0.2145, "step": 11082 }, { - "epoch": 0.64, - "grad_norm": 1.2608533543659348, - "learning_rate": 6.15812723933432e-06, - "loss": 0.7569, + "epoch": 0.51, + "grad_norm": 0.47148906328932133, + "learning_rate": 1.0190442147377368e-05, + "loss": 0.2696, "step": 11083 }, { - "epoch": 0.64, - "grad_norm": 0.32773053299135346, - "learning_rate": 6.1564092086578765e-06, - "loss": 0.2223, + "epoch": 0.51, + "grad_norm": 0.4398633553057758, + "learning_rate": 1.0188954495894156e-05, + "loss": 0.356, "step": 11084 }, { - "epoch": 0.64, - "grad_norm": 0.8451695808115459, - "learning_rate": 6.154691311084816e-06, - "loss": 0.4394, + "epoch": 0.51, + "grad_norm": 0.3605489273302093, + "learning_rate": 1.018746684022768e-05, + "loss": 0.269, "step": 11085 }, { - "epoch": 0.64, - "grad_norm": 0.21285824327119043, - "learning_rate": 6.152973546674631e-06, - "loss": 0.2111, + "epoch": 0.51, + "grad_norm": 2.0889720699607484, + "learning_rate": 1.0185979180410862e-05, + "loss": 0.5646, "step": 11086 }, { - "epoch": 0.64, - "grad_norm": 0.3162598405639108, - "learning_rate": 6.151255915486804e-06, - "loss": 0.2425, + "epoch": 0.51, + "grad_norm": 0.3720447660940607, + "learning_rate": 1.0184491516476646e-05, + "loss": 0.2983, "step": 11087 }, { - "epoch": 0.64, - "grad_norm": 0.9304264514798442, - "learning_rate": 6.1495384175808224e-06, - "loss": 0.3778, + "epoch": 0.51, + "grad_norm": 0.2969989919841698, + "learning_rate": 1.0183003848457967e-05, + "loss": 0.2161, "step": 11088 }, { - "epoch": 0.64, - "grad_norm": 0.7140149435298279, - "learning_rate": 6.147821053016159e-06, - "loss": 0.4178, + "epoch": 0.51, + "grad_norm": 0.47486043090329283, + "learning_rate": 1.0181516176387758e-05, + "loss": 0.2681, "step": 11089 }, { - "epoch": 0.64, - "grad_norm": 0.3320040093447702, - "learning_rate": 6.146103821852286e-06, - "loss": 0.245, + "epoch": 0.51, + "grad_norm": 0.5689962329665976, + "learning_rate": 1.0180028500298956e-05, + "loss": 0.3177, "step": 11090 }, { - "epoch": 0.64, - "grad_norm": 0.35658129823456214, - "learning_rate": 6.144386724148674e-06, - "loss": 0.2434, + "epoch": 0.51, + "grad_norm": 0.3786057278409134, + "learning_rate": 1.0178540820224499e-05, + "loss": 0.2877, "step": 11091 }, { - "epoch": 0.64, - "grad_norm": 0.27249855103507575, - "learning_rate": 6.142669759964781e-06, - "loss": 0.1735, + "epoch": 0.51, + "grad_norm": 0.3728506807663648, + "learning_rate": 1.0177053136197317e-05, + "loss": 0.324, "step": 11092 }, { - "epoch": 0.64, - "grad_norm": 0.5892126845666474, - "learning_rate": 6.140952929360071e-06, - "loss": 0.3012, + "epoch": 0.51, + "grad_norm": 0.26833464584916766, + "learning_rate": 1.0175565448250348e-05, + "loss": 0.1377, "step": 11093 }, { - "epoch": 0.64, - "grad_norm": 0.24465733364449624, - "learning_rate": 6.139236232393993e-06, - "loss": 0.2379, + "epoch": 0.51, + "grad_norm": 0.3663347916132832, + "learning_rate": 1.0174077756416531e-05, + "loss": 0.2888, "step": 11094 }, { - "epoch": 0.64, - "grad_norm": 0.9972780676501973, - "learning_rate": 6.137519669126e-06, - "loss": 0.4295, + "epoch": 0.51, + "grad_norm": 0.32273087605780454, + "learning_rate": 1.01725900607288e-05, + "loss": 0.2327, "step": 11095 }, { - "epoch": 0.64, - "grad_norm": 0.4816808218850385, - "learning_rate": 6.135803239615532e-06, - "loss": 0.337, + "epoch": 0.51, + "grad_norm": 0.7003760566807062, + "learning_rate": 1.0171102361220093e-05, + "loss": 0.3337, "step": 11096 }, { - "epoch": 0.64, - "grad_norm": 0.2635477301210972, - "learning_rate": 6.134086943922034e-06, - "loss": 0.183, + "epoch": 0.51, + "grad_norm": 0.34411071652747965, + "learning_rate": 1.0169614657923347e-05, + "loss": 0.2546, "step": 11097 }, { - "epoch": 0.64, - "grad_norm": 0.2615090099141512, - "learning_rate": 6.132370782104937e-06, - "loss": 0.2292, + "epoch": 0.51, + "grad_norm": 1.318305182181283, + "learning_rate": 1.016812695087149e-05, + "loss": 0.8254, "step": 11098 }, { - "epoch": 0.64, - "grad_norm": 0.38708784016928843, - "learning_rate": 6.130654754223676e-06, - "loss": 0.2688, + "epoch": 0.51, + "grad_norm": 0.40275534848577, + "learning_rate": 1.0166639240097467e-05, + "loss": 0.2597, "step": 11099 }, { - "epoch": 0.64, - "grad_norm": 0.5112058574827448, - "learning_rate": 6.128938860337672e-06, - "loss": 0.3646, + "epoch": 0.51, + "grad_norm": 0.26243218270302904, + "learning_rate": 1.0165151525634212e-05, + "loss": 0.204, "step": 11100 }, { - "epoch": 0.64, - "grad_norm": 0.7461043466785471, - "learning_rate": 6.127223100506351e-06, - "loss": 0.3124, + "epoch": 0.51, + "grad_norm": 0.46647970031651376, + "learning_rate": 1.0163663807514658e-05, + "loss": 0.2833, "step": 11101 }, { - "epoch": 0.64, - "grad_norm": 0.27170610461361683, - "learning_rate": 6.125507474789125e-06, - "loss": 0.2624, + "epoch": 0.51, + "grad_norm": 1.2420320391880806, + "learning_rate": 1.016217608577175e-05, + "loss": 0.4895, "step": 11102 }, { - "epoch": 0.64, - "grad_norm": 0.5161349562940573, - "learning_rate": 6.123791983245411e-06, - "loss": 0.3863, + "epoch": 0.51, + "grad_norm": 0.305149182704416, + "learning_rate": 1.016068836043842e-05, + "loss": 0.228, "step": 11103 }, { - "epoch": 0.64, - "grad_norm": 0.21589302078484102, - "learning_rate": 6.122076625934612e-06, - "loss": 0.1027, + "epoch": 0.51, + "grad_norm": 0.5353289828393174, + "learning_rate": 1.01592006315476e-05, + "loss": 0.4009, "step": 11104 }, { - "epoch": 0.64, - "grad_norm": 0.3764534243966187, - "learning_rate": 6.120361402916135e-06, - "loss": 0.2836, + "epoch": 0.51, + "grad_norm": 0.9313808310150433, + "learning_rate": 1.0157712899132235e-05, + "loss": 0.5912, "step": 11105 }, { - "epoch": 0.64, - "grad_norm": 0.37975502883, - "learning_rate": 6.118646314249376e-06, - "loss": 0.3184, + "epoch": 0.51, + "grad_norm": 0.2734611193126308, + "learning_rate": 1.0156225163225258e-05, + "loss": 0.1553, "step": 11106 }, { - "epoch": 0.64, - "grad_norm": 0.6739943039680442, - "learning_rate": 6.116931359993725e-06, - "loss": 0.3041, + "epoch": 0.51, + "grad_norm": 0.30535790167524024, + "learning_rate": 1.0154737423859606e-05, + "loss": 0.2667, "step": 11107 }, { - "epoch": 0.64, - "grad_norm": 0.37209910595351725, - "learning_rate": 6.115216540208577e-06, - "loss": 0.2577, + "epoch": 0.51, + "grad_norm": 0.835876974205203, + "learning_rate": 1.0153249681068216e-05, + "loss": 0.4347, "step": 11108 }, { - "epoch": 0.64, - "grad_norm": 0.5528497756841371, - "learning_rate": 6.1135018549533146e-06, - "loss": 0.3213, + "epoch": 0.51, + "grad_norm": 0.3283353816686232, + "learning_rate": 1.0151761934884028e-05, + "loss": 0.1955, "step": 11109 }, { - "epoch": 0.64, - "grad_norm": 0.21630491935510923, - "learning_rate": 6.111787304287312e-06, - "loss": 0.1668, + "epoch": 0.51, + "grad_norm": 1.2255012875432325, + "learning_rate": 1.0150274185339974e-05, + "loss": 0.8265, "step": 11110 }, { - "epoch": 0.64, - "grad_norm": 0.6091154648621666, - "learning_rate": 6.11007288826995e-06, - "loss": 0.3458, + "epoch": 0.51, + "grad_norm": 0.37852485985237627, + "learning_rate": 1.0148786432468995e-05, + "loss": 0.3051, "step": 11111 }, { - "epoch": 0.64, - "grad_norm": 0.35893455503756927, - "learning_rate": 6.108358606960595e-06, - "loss": 0.31, + "epoch": 0.51, + "grad_norm": 0.3533096269035579, + "learning_rate": 1.0147298676304027e-05, + "loss": 0.1886, "step": 11112 }, { - "epoch": 0.64, - "grad_norm": 0.7157112727794527, - "learning_rate": 6.1066444604186156e-06, - "loss": 0.4223, + "epoch": 0.51, + "grad_norm": 0.9719589734282299, + "learning_rate": 1.0145810916878011e-05, + "loss": 0.4137, "step": 11113 }, { - "epoch": 0.64, - "grad_norm": 0.26562757365475465, - "learning_rate": 6.104930448703369e-06, - "loss": 0.2132, + "epoch": 0.51, + "grad_norm": 0.3110445773409582, + "learning_rate": 1.0144323154223881e-05, + "loss": 0.2175, "step": 11114 }, { - "epoch": 0.64, - "grad_norm": 0.3765400989158581, - "learning_rate": 6.1032165718742154e-06, - "loss": 0.296, + "epoch": 0.51, + "grad_norm": 0.3448933787285444, + "learning_rate": 1.0142835388374577e-05, + "loss": 0.2828, "step": 11115 }, { - "epoch": 0.64, - "grad_norm": 0.4711689009780174, - "learning_rate": 6.1015028299905025e-06, - "loss": 0.2469, + "epoch": 0.51, + "grad_norm": 0.3931999193714425, + "learning_rate": 1.0141347619363031e-05, + "loss": 0.2775, "step": 11116 }, { - "epoch": 0.64, - "grad_norm": 0.27771652468962005, - "learning_rate": 6.0997892231115805e-06, - "loss": 0.1337, + "epoch": 0.51, + "grad_norm": 1.5693789764760682, + "learning_rate": 1.0139859847222188e-05, + "loss": 0.821, "step": 11117 }, { - "epoch": 0.64, - "grad_norm": 0.2506367775544241, - "learning_rate": 6.098075751296792e-06, - "loss": 0.272, + "epoch": 0.51, + "grad_norm": 0.46335901055483497, + "learning_rate": 1.0138372071984981e-05, + "loss": 0.2697, "step": 11118 }, { - "epoch": 0.64, - "grad_norm": 0.7113697594382398, - "learning_rate": 6.096362414605468e-06, - "loss": 0.4545, + "epoch": 0.51, + "grad_norm": 0.5613642350450226, + "learning_rate": 1.013688429368435e-05, + "loss": 0.2838, "step": 11119 }, { - "epoch": 0.64, - "grad_norm": 0.31511420097201853, - "learning_rate": 6.0946492130969494e-06, - "loss": 0.1979, + "epoch": 0.51, + "grad_norm": 0.30490186720772, + "learning_rate": 1.0135396512353235e-05, + "loss": 0.202, "step": 11120 }, { - "epoch": 0.64, - "grad_norm": 0.48430785170819585, - "learning_rate": 6.092936146830557e-06, - "loss": 0.3365, + "epoch": 0.51, + "grad_norm": 0.337587792984933, + "learning_rate": 1.013390872802457e-05, + "loss": 0.265, "step": 11121 }, { - "epoch": 0.64, - "grad_norm": 0.34387547840728383, - "learning_rate": 6.091223215865621e-06, - "loss": 0.2893, + "epoch": 0.51, + "grad_norm": 0.887161139634556, + "learning_rate": 1.0132420940731296e-05, + "loss": 0.5464, "step": 11122 }, { - "epoch": 0.64, - "grad_norm": 0.23731111105713615, - "learning_rate": 6.089510420261455e-06, - "loss": 0.1881, + "epoch": 0.51, + "grad_norm": 0.37721152658395274, + "learning_rate": 1.0130933150506345e-05, + "loss": 0.3038, "step": 11123 }, { - "epoch": 0.64, - "grad_norm": 0.3714432234815818, - "learning_rate": 6.087797760077376e-06, - "loss": 0.265, + "epoch": 0.51, + "grad_norm": 0.3546766963781115, + "learning_rate": 1.0129445357382665e-05, + "loss": 0.2647, "step": 11124 }, { - "epoch": 0.64, - "grad_norm": 0.7696874757906164, - "learning_rate": 6.086085235372692e-06, - "loss": 0.488, + "epoch": 0.51, + "grad_norm": 0.6832454112668832, + "learning_rate": 1.012795756139319e-05, + "loss": 0.3235, "step": 11125 }, { - "epoch": 0.64, - "grad_norm": 0.29488211422426086, - "learning_rate": 6.084372846206709e-06, - "loss": 0.2643, + "epoch": 0.51, + "grad_norm": 0.27656822087422844, + "learning_rate": 1.0126469762570856e-05, + "loss": 0.209, "step": 11126 }, { - "epoch": 0.64, - "grad_norm": 0.3843671048783373, - "learning_rate": 6.0826605926387226e-06, - "loss": 0.2295, + "epoch": 0.51, + "grad_norm": 0.43252051357957694, + "learning_rate": 1.0124981960948603e-05, + "loss": 0.2931, "step": 11127 }, { - "epoch": 0.64, - "grad_norm": 0.28084906133884785, - "learning_rate": 6.080948474728036e-06, - "loss": 0.1574, + "epoch": 0.51, + "grad_norm": 0.4455438810013382, + "learning_rate": 1.0123494156559372e-05, + "loss": 0.3339, "step": 11128 }, { - "epoch": 0.64, - "grad_norm": 0.8237786686226553, - "learning_rate": 6.079236492533931e-06, - "loss": 0.3501, + "epoch": 0.51, + "grad_norm": 0.8139513224632986, + "learning_rate": 1.0122006349436097e-05, + "loss": 0.3379, "step": 11129 }, { - "epoch": 0.64, - "grad_norm": 0.2653302823814511, - "learning_rate": 6.077524646115701e-06, - "loss": 0.2377, + "epoch": 0.51, + "grad_norm": 0.3782009746274829, + "learning_rate": 1.012051853961172e-05, + "loss": 0.2403, "step": 11130 }, { - "epoch": 0.64, - "grad_norm": 0.790921659400872, - "learning_rate": 6.075812935532623e-06, - "loss": 0.5031, + "epoch": 0.51, + "grad_norm": 0.3451349101496271, + "learning_rate": 1.0119030727119177e-05, + "loss": 0.3159, "step": 11131 }, { - "epoch": 0.64, - "grad_norm": 0.732432702456682, - "learning_rate": 6.074101360843973e-06, - "loss": 0.4053, + "epoch": 0.51, + "grad_norm": 0.16693877286038192, + "learning_rate": 1.0117542911991414e-05, + "loss": 0.0735, "step": 11132 }, { - "epoch": 0.64, - "grad_norm": 0.3450032202174695, - "learning_rate": 6.072389922109027e-06, - "loss": 0.2106, + "epoch": 0.51, + "grad_norm": 0.4084853527808433, + "learning_rate": 1.0116055094261358e-05, + "loss": 0.2923, "step": 11133 }, { - "epoch": 0.64, - "grad_norm": 0.3755738306929151, - "learning_rate": 6.070678619387045e-06, - "loss": 0.2833, + "epoch": 0.51, + "grad_norm": 0.5909122892454841, + "learning_rate": 1.0114567273961957e-05, + "loss": 0.3923, "step": 11134 }, { - "epoch": 0.64, - "grad_norm": 0.923733611804049, - "learning_rate": 6.068967452737296e-06, - "loss": 0.3767, + "epoch": 0.51, + "grad_norm": 0.43637866951388304, + "learning_rate": 1.0113079451126147e-05, + "loss": 0.2731, "step": 11135 }, { - "epoch": 0.64, - "grad_norm": 0.2933452155571611, - "learning_rate": 6.067256422219034e-06, - "loss": 0.2224, + "epoch": 0.51, + "grad_norm": 0.45436883259395205, + "learning_rate": 1.0111591625786866e-05, + "loss": 0.3079, "step": 11136 }, { - "epoch": 0.64, - "grad_norm": 0.29408326213010355, - "learning_rate": 6.065545527891514e-06, - "loss": 0.1626, + "epoch": 0.51, + "grad_norm": 0.5848877178866841, + "learning_rate": 1.0110103797977056e-05, + "loss": 0.3894, "step": 11137 }, { - "epoch": 0.64, - "grad_norm": 0.3024053064991005, - "learning_rate": 6.063834769813982e-06, - "loss": 0.2829, + "epoch": 0.51, + "grad_norm": 0.28410091510233076, + "learning_rate": 1.0108615967729651e-05, + "loss": 0.1863, "step": 11138 }, { - "epoch": 0.64, - "grad_norm": 0.441073941280975, - "learning_rate": 6.062124148045685e-06, - "loss": 0.3102, + "epoch": 0.51, + "grad_norm": 0.2774330184663932, + "learning_rate": 1.0107128135077594e-05, + "loss": 0.2257, "step": 11139 }, { - "epoch": 0.64, - "grad_norm": 1.033460670183201, - "learning_rate": 6.060413662645856e-06, - "loss": 0.3068, + "epoch": 0.51, + "grad_norm": 1.297269609552035, + "learning_rate": 1.0105640300053825e-05, + "loss": 0.8584, "step": 11140 }, { - "epoch": 0.64, - "grad_norm": 0.3800496285695335, - "learning_rate": 6.058703313673735e-06, - "loss": 0.3042, + "epoch": 0.51, + "grad_norm": 0.6296851065967783, + "learning_rate": 1.010415246269128e-05, + "loss": 0.4334, "step": 11141 }, { - "epoch": 0.64, - "grad_norm": 0.3069885259774946, - "learning_rate": 6.0569931011885504e-06, - "loss": 0.2701, + "epoch": 0.51, + "grad_norm": 0.38546578461733194, + "learning_rate": 1.01026646230229e-05, + "loss": 0.227, "step": 11142 }, { - "epoch": 0.64, - "grad_norm": 0.261025683645621, - "learning_rate": 6.055283025249526e-06, - "loss": 0.1452, + "epoch": 0.51, + "grad_norm": 0.36798934964874497, + "learning_rate": 1.0101176781081625e-05, + "loss": 0.3093, "step": 11143 }, { - "epoch": 0.64, - "grad_norm": 0.30579045165578644, - "learning_rate": 6.053573085915875e-06, - "loss": 0.1788, + "epoch": 0.51, + "grad_norm": 0.44068530217699664, + "learning_rate": 1.0099688936900393e-05, + "loss": 0.2462, "step": 11144 }, { - "epoch": 0.64, - "grad_norm": 0.41347157047586297, - "learning_rate": 6.0518632832468215e-06, - "loss": 0.3075, + "epoch": 0.51, + "grad_norm": 0.350311681114434, + "learning_rate": 1.0098201090512145e-05, + "loss": 0.1592, "step": 11145 }, { - "epoch": 0.64, - "grad_norm": 0.509072555332188, - "learning_rate": 6.050153617301571e-06, - "loss": 0.2722, + "epoch": 0.51, + "grad_norm": 0.42932559713064206, + "learning_rate": 1.0096713241949818e-05, + "loss": 0.3418, "step": 11146 }, { - "epoch": 0.64, - "grad_norm": 1.0137824837962006, - "learning_rate": 6.048444088139334e-06, - "loss": 0.4353, + "epoch": 0.51, + "grad_norm": 0.35953925831838174, + "learning_rate": 1.0095225391246353e-05, + "loss": 0.3393, "step": 11147 }, { - "epoch": 0.64, - "grad_norm": 0.32813531727045664, - "learning_rate": 6.0467346958193056e-06, - "loss": 0.253, + "epoch": 0.51, + "grad_norm": 0.20626637457769326, + "learning_rate": 1.009373753843469e-05, + "loss": 0.0922, "step": 11148 }, { - "epoch": 0.64, - "grad_norm": 0.3297149666866975, - "learning_rate": 6.045025440400684e-06, - "loss": 0.2803, + "epoch": 0.51, + "grad_norm": 0.4588956955044267, + "learning_rate": 1.0092249683547767e-05, + "loss": 0.2919, "step": 11149 }, { - "epoch": 0.64, - "grad_norm": 0.2517094708835535, - "learning_rate": 6.043316321942663e-06, - "loss": 0.1736, + "epoch": 0.51, + "grad_norm": 0.34776968576124057, + "learning_rate": 1.009076182661853e-05, + "loss": 0.2481, "step": 11150 }, { - "epoch": 0.64, - "grad_norm": 0.36579356484526454, - "learning_rate": 6.0416073405044274e-06, - "loss": 0.2721, + "epoch": 0.51, + "grad_norm": 0.4035561576114649, + "learning_rate": 1.0089273967679908e-05, + "loss": 0.2858, "step": 11151 }, { - "epoch": 0.64, - "grad_norm": 1.009514354771467, - "learning_rate": 6.039898496145159e-06, - "loss": 0.4483, + "epoch": 0.51, + "grad_norm": 0.3527852687609695, + "learning_rate": 1.0087786106764849e-05, + "loss": 0.2759, "step": 11152 }, { - "epoch": 0.64, - "grad_norm": 0.46789848161172504, - "learning_rate": 6.038189788924036e-06, - "loss": 0.2603, + "epoch": 0.51, + "grad_norm": 0.7045963099686833, + "learning_rate": 1.008629824390629e-05, + "loss": 0.4592, "step": 11153 }, { - "epoch": 0.64, - "grad_norm": 0.28716040798789005, - "learning_rate": 6.03648121890023e-06, - "loss": 0.2665, + "epoch": 0.51, + "grad_norm": 0.4065355394552864, + "learning_rate": 1.0084810379137171e-05, + "loss": 0.213, "step": 11154 }, { - "epoch": 0.64, - "grad_norm": 1.2217392161585139, - "learning_rate": 6.03477278613291e-06, - "loss": 0.7532, + "epoch": 0.51, + "grad_norm": 0.3352115365960826, + "learning_rate": 1.008332251249043e-05, + "loss": 0.2497, "step": 11155 }, { - "epoch": 0.64, - "grad_norm": 0.20730714099122807, - "learning_rate": 6.033064490681238e-06, - "loss": 0.1146, + "epoch": 0.51, + "grad_norm": 1.0664716385509445, + "learning_rate": 1.0081834643999013e-05, + "loss": 0.554, "step": 11156 }, { - "epoch": 0.64, - "grad_norm": 0.4212369317329507, - "learning_rate": 6.031356332604369e-06, - "loss": 0.2847, + "epoch": 0.51, + "grad_norm": 0.5800152037704983, + "learning_rate": 1.0080346773695852e-05, + "loss": 0.3827, "step": 11157 }, { - "epoch": 0.64, - "grad_norm": 0.3950415550882442, - "learning_rate": 6.029648311961462e-06, - "loss": 0.3112, + "epoch": 0.51, + "grad_norm": 0.42900808729035916, + "learning_rate": 1.0078858901613893e-05, + "loss": 0.2614, "step": 11158 }, { - "epoch": 0.64, - "grad_norm": 0.5599324152074959, - "learning_rate": 6.027940428811662e-06, - "loss": 0.1964, - "step": 11159 + "epoch": 0.51, + "grad_norm": 0.37349791529218285, + "learning_rate": 1.0077371027786072e-05, + "loss": 0.3323, + "step": 11159 }, { - "epoch": 0.64, - "grad_norm": 0.3724627817967568, - "learning_rate": 6.026232683214115e-06, - "loss": 0.2767, + "epoch": 0.51, + "grad_norm": 0.24955603822488587, + "learning_rate": 1.0075883152245334e-05, + "loss": 0.1691, "step": 11160 }, { - "epoch": 0.64, - "grad_norm": 0.4752167622283187, - "learning_rate": 6.024525075227959e-06, - "loss": 0.3687, + "epoch": 0.51, + "grad_norm": 0.4946665323779758, + "learning_rate": 1.0074395275024613e-05, + "loss": 0.2561, "step": 11161 }, { - "epoch": 0.64, - "grad_norm": 0.21954940331324588, - "learning_rate": 6.02281760491233e-06, - "loss": 0.1958, + "epoch": 0.51, + "grad_norm": 0.3476103384693661, + "learning_rate": 1.0072907396156854e-05, + "loss": 0.3094, "step": 11162 }, { - "epoch": 0.64, - "grad_norm": 0.33151403738859675, - "learning_rate": 6.021110272326354e-06, - "loss": 0.2124, + "epoch": 0.51, + "grad_norm": 0.8422198629867995, + "learning_rate": 1.0071419515674997e-05, + "loss": 0.4948, "step": 11163 }, { - "epoch": 0.64, - "grad_norm": 0.515384369202005, - "learning_rate": 6.0194030775291605e-06, - "loss": 0.3731, + "epoch": 0.51, + "grad_norm": 0.3729865894270189, + "learning_rate": 1.0069931633611978e-05, + "loss": 0.2891, "step": 11164 }, { - "epoch": 0.64, - "grad_norm": 0.4626758506358267, - "learning_rate": 6.017696020579864e-06, - "loss": 0.3026, + "epoch": 0.51, + "grad_norm": 0.2972319733650828, + "learning_rate": 1.006844375000074e-05, + "loss": 0.1808, "step": 11165 }, { - "epoch": 0.64, - "grad_norm": 0.26893646286578476, - "learning_rate": 6.015989101537586e-06, - "loss": 0.2204, + "epoch": 0.51, + "grad_norm": 0.5009550121719311, + "learning_rate": 1.0066955864874223e-05, + "loss": 0.322, "step": 11166 }, { - "epoch": 0.64, - "grad_norm": 1.1058572603376675, - "learning_rate": 6.0142823204614335e-06, - "loss": 0.703, + "epoch": 0.51, + "grad_norm": 0.3493637813342311, + "learning_rate": 1.0065467978265371e-05, + "loss": 0.2818, "step": 11167 }, { - "epoch": 0.64, - "grad_norm": 0.3964445682878911, - "learning_rate": 6.012575677410512e-06, - "loss": 0.2333, + "epoch": 0.51, + "grad_norm": 1.0220257264761619, + "learning_rate": 1.0063980090207119e-05, + "loss": 0.3339, "step": 11168 }, { - "epoch": 0.64, - "grad_norm": 0.27704603202525324, - "learning_rate": 6.010869172443923e-06, - "loss": 0.2202, + "epoch": 0.51, + "grad_norm": 0.5615986811009043, + "learning_rate": 1.0062492200732413e-05, + "loss": 0.3697, "step": 11169 }, { - "epoch": 0.64, - "grad_norm": 0.3433573699670649, - "learning_rate": 6.0091628056207655e-06, - "loss": 0.2727, + "epoch": 0.51, + "grad_norm": 0.3455543214289564, + "learning_rate": 1.0061004309874183e-05, + "loss": 0.2905, "step": 11170 }, { - "epoch": 0.64, - "grad_norm": 0.9905163871846923, - "learning_rate": 6.007456577000128e-06, - "loss": 0.4821, + "epoch": 0.51, + "grad_norm": 0.3423145742822945, + "learning_rate": 1.005951641766538e-05, + "loss": 0.2574, "step": 11171 }, { - "epoch": 0.64, - "grad_norm": 0.35401947805745243, - "learning_rate": 6.005750486641095e-06, - "loss": 0.2056, + "epoch": 0.51, + "grad_norm": 0.2393818136195504, + "learning_rate": 1.005802852413894e-05, + "loss": 0.1509, "step": 11172 }, { - "epoch": 0.64, - "grad_norm": 0.5022193592434819, - "learning_rate": 6.004044534602753e-06, - "loss": 0.3671, + "epoch": 0.51, + "grad_norm": 0.4073349823978717, + "learning_rate": 1.0056540629327804e-05, + "loss": 0.2843, "step": 11173 }, { - "epoch": 0.64, - "grad_norm": 0.3889968137655978, - "learning_rate": 6.002338720944174e-06, - "loss": 0.3389, + "epoch": 0.51, + "grad_norm": 0.47295796194697265, + "learning_rate": 1.0055052733264916e-05, + "loss": 0.258, "step": 11174 }, { - "epoch": 0.64, - "grad_norm": 0.32816842990029704, - "learning_rate": 6.000633045724438e-06, - "loss": 0.2712, + "epoch": 0.51, + "grad_norm": 0.44165954125187035, + "learning_rate": 1.0053564835983212e-05, + "loss": 0.3051, "step": 11175 }, { - "epoch": 0.64, - "grad_norm": 0.2643109087774337, - "learning_rate": 5.998927509002608e-06, - "loss": 0.136, + "epoch": 0.51, + "grad_norm": 0.43851128554636953, + "learning_rate": 1.0052076937515633e-05, + "loss": 0.3454, "step": 11176 }, { - "epoch": 0.64, - "grad_norm": 0.34977099868176076, - "learning_rate": 5.997222110837742e-06, - "loss": 0.2911, + "epoch": 0.51, + "grad_norm": 0.8199366589898711, + "learning_rate": 1.0050589037895122e-05, + "loss": 0.523, "step": 11177 }, { - "epoch": 0.64, - "grad_norm": 0.41325953968084655, - "learning_rate": 5.995516851288904e-06, - "loss": 0.2771, + "epoch": 0.51, + "grad_norm": 0.2733779867214866, + "learning_rate": 1.0049101137154617e-05, + "loss": 0.1903, "step": 11178 }, { - "epoch": 0.64, - "grad_norm": 0.5048678658640868, - "learning_rate": 5.9938117304151445e-06, - "loss": 0.3181, + "epoch": 0.51, + "grad_norm": 0.32619786994552724, + "learning_rate": 1.0047613235327063e-05, + "loss": 0.2509, "step": 11179 }, { - "epoch": 0.64, - "grad_norm": 0.5855639790646341, - "learning_rate": 5.992106748275513e-06, - "loss": 0.3861, + "epoch": 0.51, + "grad_norm": 0.8350626947716103, + "learning_rate": 1.0046125332445396e-05, + "loss": 0.4659, "step": 11180 }, { - "epoch": 0.64, - "grad_norm": 0.3731060722176844, - "learning_rate": 5.990401904929051e-06, - "loss": 0.2846, + "epoch": 0.51, + "grad_norm": 0.6936321210038078, + "learning_rate": 1.0044637428542559e-05, + "loss": 0.2969, "step": 11181 }, { - "epoch": 0.64, - "grad_norm": 0.2450716350813472, - "learning_rate": 5.988697200434801e-06, - "loss": 0.1784, + "epoch": 0.51, + "grad_norm": 0.36990743731921427, + "learning_rate": 1.0043149523651492e-05, + "loss": 0.3158, "step": 11182 }, { - "epoch": 0.64, - "grad_norm": 1.588488452674224, - "learning_rate": 5.986992634851794e-06, - "loss": 0.7367, + "epoch": 0.51, + "grad_norm": 0.4087959050685897, + "learning_rate": 1.0041661617805134e-05, + "loss": 0.3317, "step": 11183 }, { - "epoch": 0.64, - "grad_norm": 0.33778700717578847, - "learning_rate": 5.985288208239057e-06, - "loss": 0.2592, + "epoch": 0.51, + "grad_norm": 0.1922514543858804, + "learning_rate": 1.0040173711036431e-05, + "loss": 0.0833, "step": 11184 }, { - "epoch": 0.64, - "grad_norm": 0.38914509602391384, - "learning_rate": 5.98358392065562e-06, - "loss": 0.2963, + "epoch": 0.51, + "grad_norm": 0.3759150054440568, + "learning_rate": 1.0038685803378321e-05, + "loss": 0.2984, "step": 11185 }, { - "epoch": 0.64, - "grad_norm": 0.780635173262054, - "learning_rate": 5.981879772160497e-06, - "loss": 0.3924, + "epoch": 0.51, + "grad_norm": 0.39279462500015266, + "learning_rate": 1.0037197894863744e-05, + "loss": 0.3286, "step": 11186 }, { - "epoch": 0.64, - "grad_norm": 0.31361656933869875, - "learning_rate": 5.980175762812705e-06, - "loss": 0.2679, + "epoch": 0.51, + "grad_norm": 0.7265277052619795, + "learning_rate": 1.0035709985525639e-05, + "loss": 0.2795, "step": 11187 }, { - "epoch": 0.64, - "grad_norm": 0.4583142651536605, - "learning_rate": 5.978471892671254e-06, - "loss": 0.2751, + "epoch": 0.51, + "grad_norm": 0.3871245017582081, + "learning_rate": 1.0034222075396954e-05, + "loss": 0.2785, "step": 11188 }, { - "epoch": 0.64, - "grad_norm": 0.2585065185246987, - "learning_rate": 5.976768161795149e-06, - "loss": 0.1919, + "epoch": 0.51, + "grad_norm": 1.3284169911656147, + "learning_rate": 1.003273416451062e-05, + "loss": 0.7689, "step": 11189 }, { - "epoch": 0.64, - "grad_norm": 0.3744518103024148, - "learning_rate": 5.975064570243387e-06, - "loss": 0.2544, + "epoch": 0.51, + "grad_norm": 0.30639494721357774, + "learning_rate": 1.0031246252899585e-05, + "loss": 0.2726, "step": 11190 }, { - "epoch": 0.64, - "grad_norm": 1.2344220965569719, - "learning_rate": 5.973361118074969e-06, - "loss": 0.8546, + "epoch": 0.51, + "grad_norm": 0.265589377946439, + "learning_rate": 1.002975834059679e-05, + "loss": 0.1709, "step": 11191 }, { - "epoch": 0.64, - "grad_norm": 0.9938392858124002, - "learning_rate": 5.97165780534888e-06, - "loss": 0.3239, + "epoch": 0.51, + "grad_norm": 0.7955172943110976, + "learning_rate": 1.0028270427635175e-05, + "loss": 0.4264, "step": 11192 }, { - "epoch": 0.64, - "grad_norm": 0.2816535477742484, - "learning_rate": 5.969954632124111e-06, - "loss": 0.248, + "epoch": 0.51, + "grad_norm": 0.6993300931080364, + "learning_rate": 1.0026782514047675e-05, + "loss": 0.4032, "step": 11193 }, { - "epoch": 0.64, - "grad_norm": 0.5002961541221576, - "learning_rate": 5.968251598459636e-06, - "loss": 0.3412, + "epoch": 0.51, + "grad_norm": 0.3412634157856217, + "learning_rate": 1.002529459986724e-05, + "loss": 0.2158, "step": 11194 }, { - "epoch": 0.64, - "grad_norm": 0.2849071332633207, - "learning_rate": 5.966548704414436e-06, - "loss": 0.1259, + "epoch": 0.51, + "grad_norm": 0.3758819272844595, + "learning_rate": 1.0023806685126803e-05, + "loss": 0.3144, "step": 11195 }, { - "epoch": 0.64, - "grad_norm": 0.38572866452620297, - "learning_rate": 5.964845950047484e-06, - "loss": 0.2577, + "epoch": 0.51, + "grad_norm": 0.23917139646982116, + "learning_rate": 1.0022318769859311e-05, + "loss": 0.1312, "step": 11196 }, { - "epoch": 0.64, - "grad_norm": 0.3144011371244218, - "learning_rate": 5.96314333541774e-06, - "loss": 0.2866, + "epoch": 0.51, + "grad_norm": 0.33747696853205955, + "learning_rate": 1.0020830854097708e-05, + "loss": 0.1998, "step": 11197 }, { - "epoch": 0.64, - "grad_norm": 1.0859733305344872, - "learning_rate": 5.961440860584169e-06, - "loss": 0.4253, + "epoch": 0.51, + "grad_norm": 0.46826489130353466, + "learning_rate": 1.0019342937874923e-05, + "loss": 0.3178, "step": 11198 }, { - "epoch": 0.64, - "grad_norm": 0.31667468432575874, - "learning_rate": 5.959738525605727e-06, - "loss": 0.1599, + "epoch": 0.51, + "grad_norm": 0.9646338159483399, + "learning_rate": 1.0017855021223908e-05, + "loss": 0.4758, "step": 11199 }, { - "epoch": 0.64, - "grad_norm": 0.3265305092885602, - "learning_rate": 5.958036330541368e-06, - "loss": 0.2316, + "epoch": 0.51, + "grad_norm": 0.29606727463378635, + "learning_rate": 1.0016367104177596e-05, + "loss": 0.202, "step": 11200 }, { - "epoch": 0.64, - "grad_norm": 0.2730721162752112, - "learning_rate": 5.956334275450035e-06, - "loss": 0.2342, + "epoch": 0.51, + "grad_norm": 1.0422383555108723, + "learning_rate": 1.0014879186768936e-05, + "loss": 0.6075, "step": 11201 }, { - "epoch": 0.64, - "grad_norm": 0.315786099286474, - "learning_rate": 5.954632360390673e-06, - "loss": 0.1946, + "epoch": 0.51, + "grad_norm": 0.36708381040153193, + "learning_rate": 1.0013391269030863e-05, + "loss": 0.3222, "step": 11202 }, { - "epoch": 0.64, - "grad_norm": 0.8586307279331272, - "learning_rate": 5.9529305854222185e-06, - "loss": 0.4156, + "epoch": 0.51, + "grad_norm": 0.2870291472069312, + "learning_rate": 1.0011903350996321e-05, + "loss": 0.2151, "step": 11203 }, { - "epoch": 0.64, - "grad_norm": 0.8222346678946091, - "learning_rate": 5.951228950603605e-06, - "loss": 0.5293, + "epoch": 0.51, + "grad_norm": 0.5161731641855376, + "learning_rate": 1.001041543269825e-05, + "loss": 0.2559, "step": 11204 }, { - "epoch": 0.64, - "grad_norm": 0.26079077888105623, - "learning_rate": 5.949527455993756e-06, - "loss": 0.2104, + "epoch": 0.51, + "grad_norm": 1.387145976415137, + "learning_rate": 1.0008927514169593e-05, + "loss": 0.7648, "step": 11205 }, { - "epoch": 0.64, - "grad_norm": 0.4512259533268354, - "learning_rate": 5.947826101651599e-06, - "loss": 0.3257, + "epoch": 0.51, + "grad_norm": 0.2941844415284564, + "learning_rate": 1.0007439595443284e-05, + "loss": 0.2497, "step": 11206 }, { - "epoch": 0.64, - "grad_norm": 0.32723592880119196, - "learning_rate": 5.946124887636049e-06, - "loss": 0.2213, + "epoch": 0.51, + "grad_norm": 0.45092554426041004, + "learning_rate": 1.0005951676552277e-05, + "loss": 0.2796, "step": 11207 }, { - "epoch": 0.64, - "grad_norm": 0.29865161019025654, - "learning_rate": 5.944423814006022e-06, - "loss": 0.2226, + "epoch": 0.51, + "grad_norm": 0.7936677439200935, + "learning_rate": 1.0004463757529501e-05, + "loss": 0.4903, "step": 11208 }, { - "epoch": 0.64, - "grad_norm": 0.35149010732275404, - "learning_rate": 5.9427228808204216e-06, - "loss": 0.3059, + "epoch": 0.51, + "grad_norm": 0.33338521114220404, + "learning_rate": 1.0002975838407904e-05, + "loss": 0.2656, "step": 11209 }, { - "epoch": 0.64, - "grad_norm": 0.7682225597339026, - "learning_rate": 5.941022088138158e-06, - "loss": 0.4489, + "epoch": 0.51, + "grad_norm": 0.3685438092494757, + "learning_rate": 1.0001487919220422e-05, + "loss": 0.2723, "step": 11210 }, { - "epoch": 0.64, - "grad_norm": 0.30994659665733854, - "learning_rate": 5.939321436018119e-06, - "loss": 0.2518, + "epoch": 0.52, + "grad_norm": 0.2971943943916271, + "learning_rate": 1e-05, + "loss": 0.1866, "step": 11211 }, { - "epoch": 0.64, - "grad_norm": 0.5923893515591114, - "learning_rate": 5.937620924519207e-06, - "loss": 0.0215, + "epoch": 0.52, + "grad_norm": 0.33473866336032776, + "learning_rate": 9.998512080779581e-06, + "loss": 0.2558, "step": 11212 }, { - "epoch": 0.64, - "grad_norm": 0.22704968985868573, - "learning_rate": 5.935920553700305e-06, - "loss": 0.216, + "epoch": 0.52, + "grad_norm": 0.9026448678234319, + "learning_rate": 9.9970241615921e-06, + "loss": 0.3847, "step": 11213 }, { - "epoch": 0.64, - "grad_norm": 0.5706346113528733, - "learning_rate": 5.934220323620303e-06, - "loss": 0.3533, + "epoch": 0.52, + "grad_norm": 0.3277416205435476, + "learning_rate": 9.9955362424705e-06, + "loss": 0.2882, "step": 11214 }, { - "epoch": 0.64, - "grad_norm": 0.37401496655785443, - "learning_rate": 5.932520234338073e-06, - "loss": 0.2737, + "epoch": 0.52, + "grad_norm": 0.3722278351398582, + "learning_rate": 9.994048323447728e-06, + "loss": 0.2672, "step": 11215 }, { - "epoch": 0.64, - "grad_norm": 0.4734541768133188, - "learning_rate": 5.930820285912495e-06, - "loss": 0.4071, + "epoch": 0.52, + "grad_norm": 0.41749483195574033, + "learning_rate": 9.992560404556717e-06, + "loss": 0.2839, "step": 11216 }, { - "epoch": 0.64, - "grad_norm": 0.3539907526938965, - "learning_rate": 5.9291204784024335e-06, - "loss": 0.2553, + "epoch": 0.52, + "grad_norm": 0.2935212728183298, + "learning_rate": 9.991072485830412e-06, + "loss": 0.1747, "step": 11217 }, { - "epoch": 0.64, - "grad_norm": 0.3659412287344784, - "learning_rate": 5.9274208118667565e-06, - "loss": 0.2484, + "epoch": 0.52, + "grad_norm": 0.38045787413717364, + "learning_rate": 9.989584567301751e-06, + "loss": 0.2632, "step": 11218 }, { - "epoch": 0.64, - "grad_norm": 0.3113800892023193, - "learning_rate": 5.92572128636432e-06, - "loss": 0.1939, + "epoch": 0.52, + "grad_norm": 0.5516680955671937, + "learning_rate": 9.98809664900368e-06, + "loss": 0.4078, "step": 11219 }, { - "epoch": 0.64, - "grad_norm": 0.5020546282550093, - "learning_rate": 5.924021901953983e-06, - "loss": 0.2901, + "epoch": 0.52, + "grad_norm": 0.6095561966577813, + "learning_rate": 9.986608730969139e-06, + "loss": 0.3166, "step": 11220 }, { - "epoch": 0.64, - "grad_norm": 0.286313888428949, - "learning_rate": 5.922322658694591e-06, - "loss": 0.2489, + "epoch": 0.52, + "grad_norm": 0.33893447454938064, + "learning_rate": 9.98512081323107e-06, + "loss": 0.2665, "step": 11221 }, { - "epoch": 0.64, - "grad_norm": 1.2063295574915158, - "learning_rate": 5.920623556644987e-06, - "loss": 0.7685, + "epoch": 0.52, + "grad_norm": 0.3786551367630742, + "learning_rate": 9.983632895822405e-06, + "loss": 0.3031, "step": 11222 }, { - "epoch": 0.64, - "grad_norm": 0.422377259307182, - "learning_rate": 5.918924595864017e-06, - "loss": 0.3199, + "epoch": 0.52, + "grad_norm": 0.200723937055775, + "learning_rate": 9.982144978776096e-06, + "loss": 0.0994, "step": 11223 }, { - "epoch": 0.64, - "grad_norm": 0.5829496855100268, - "learning_rate": 5.917225776410511e-06, - "loss": 0.3525, + "epoch": 0.52, + "grad_norm": 0.34562802744888405, + "learning_rate": 9.980657062125079e-06, + "loss": 0.2652, "step": 11224 }, { - "epoch": 0.64, - "grad_norm": 0.2512331675050159, - "learning_rate": 5.915527098343302e-06, - "loss": 0.206, + "epoch": 0.52, + "grad_norm": 1.2122371224355302, + "learning_rate": 9.979169145902297e-06, + "loss": 0.5971, "step": 11225 }, { - "epoch": 0.64, - "grad_norm": 0.3745193433663806, - "learning_rate": 5.913828561721214e-06, - "loss": 0.2792, + "epoch": 0.52, + "grad_norm": 0.4319519760350492, + "learning_rate": 9.977681230140689e-06, + "loss": 0.2942, "step": 11226 }, { - "epoch": 0.65, - "grad_norm": 0.3169721307067816, - "learning_rate": 5.912130166603066e-06, - "loss": 0.2322, + "epoch": 0.52, + "grad_norm": 0.39573129478995217, + "learning_rate": 9.976193314873199e-06, + "loss": 0.2792, "step": 11227 }, { - "epoch": 0.65, - "grad_norm": 0.4240458248846178, - "learning_rate": 5.910431913047674e-06, - "loss": 0.2787, + "epoch": 0.52, + "grad_norm": 1.1944256466158096, + "learning_rate": 9.974705400132764e-06, + "loss": 0.5797, "step": 11228 }, { - "epoch": 0.65, - "grad_norm": 0.30928589966688147, - "learning_rate": 5.908733801113851e-06, - "loss": 0.2619, + "epoch": 0.52, + "grad_norm": 0.32227825350044303, + "learning_rate": 9.973217485952329e-06, + "loss": 0.2195, "step": 11229 }, { - "epoch": 0.65, - "grad_norm": 0.48813601012960506, - "learning_rate": 5.907035830860399e-06, - "loss": 0.3456, + "epoch": 0.52, + "grad_norm": 0.28344778821538724, + "learning_rate": 9.971729572364832e-06, + "loss": 0.2113, "step": 11230 }, { - "epoch": 0.65, - "grad_norm": 0.5823485197085368, - "learning_rate": 5.905338002346122e-06, - "loss": 0.314, + "epoch": 0.52, + "grad_norm": 0.5204925423101915, + "learning_rate": 9.970241659403212e-06, + "loss": 0.3675, "step": 11231 }, { - "epoch": 0.65, - "grad_norm": 0.5900398729201102, - "learning_rate": 5.9036403156298125e-06, - "loss": 0.2937, + "epoch": 0.52, + "grad_norm": 0.699345778993736, + "learning_rate": 9.968753747100417e-06, + "loss": 0.3945, "step": 11232 }, { - "epoch": 0.65, - "grad_norm": 0.23746694604233817, - "learning_rate": 5.901942770770264e-06, - "loss": 0.2338, + "epoch": 0.52, + "grad_norm": 0.36399141131955065, + "learning_rate": 9.967265835489384e-06, + "loss": 0.1773, "step": 11233 }, { - "epoch": 0.65, - "grad_norm": 0.30105969621280293, - "learning_rate": 5.900245367826258e-06, - "loss": 0.1899, + "epoch": 0.52, + "grad_norm": 0.30951729750322865, + "learning_rate": 9.965777924603053e-06, + "loss": 0.2813, "step": 11234 }, { - "epoch": 0.65, - "grad_norm": 0.6304257335183989, - "learning_rate": 5.898548106856583e-06, - "loss": 0.3072, + "epoch": 0.52, + "grad_norm": 0.38650993302524245, + "learning_rate": 9.964290014474361e-06, + "loss": 0.2628, "step": 11235 }, { - "epoch": 0.65, - "grad_norm": 0.4284540690270265, - "learning_rate": 5.896850987920009e-06, - "loss": 0.2833, + "epoch": 0.52, + "grad_norm": 0.34970039826059546, + "learning_rate": 9.96280210513626e-06, + "loss": 0.1945, "step": 11236 }, { - "epoch": 0.65, - "grad_norm": 0.38906981985827127, - "learning_rate": 5.895154011075308e-06, - "loss": 0.3207, + "epoch": 0.52, + "grad_norm": 0.4841620370183531, + "learning_rate": 9.961314196621682e-06, + "loss": 0.3121, "step": 11237 }, { - "epoch": 0.65, - "grad_norm": 0.5208798117829698, - "learning_rate": 5.893457176381248e-06, - "loss": 0.1669, + "epoch": 0.52, + "grad_norm": 0.3833296125253442, + "learning_rate": 9.95982628896357e-06, + "loss": 0.3079, "step": 11238 }, { - "epoch": 0.65, - "grad_norm": 0.39622311730384707, - "learning_rate": 5.891760483896587e-06, - "loss": 0.3249, + "epoch": 0.52, + "grad_norm": 0.33390159611624315, + "learning_rate": 9.958338382194866e-06, + "loss": 0.1924, "step": 11239 }, { - "epoch": 0.65, - "grad_norm": 0.38496094157001054, - "learning_rate": 5.890063933680087e-06, - "loss": 0.2616, + "epoch": 0.52, + "grad_norm": 0.992739510221968, + "learning_rate": 9.956850476348512e-06, + "loss": 0.5727, "step": 11240 }, { - "epoch": 0.65, - "grad_norm": 0.22651439846232713, - "learning_rate": 5.8883675257904936e-06, - "loss": 0.1797, + "epoch": 0.52, + "grad_norm": 1.3499892433771807, + "learning_rate": 9.955362571457445e-06, + "loss": 0.7246, "step": 11241 }, { - "epoch": 0.65, - "grad_norm": 0.38679464880782893, - "learning_rate": 5.886671260286558e-06, - "loss": 0.3031, + "epoch": 0.52, + "grad_norm": 0.3185155876068158, + "learning_rate": 9.953874667554608e-06, + "loss": 0.2615, "step": 11242 }, { - "epoch": 0.65, - "grad_norm": 0.7959092880908866, - "learning_rate": 5.884975137227018e-06, - "loss": 0.3909, + "epoch": 0.52, + "grad_norm": 0.35312634375467317, + "learning_rate": 9.952386764672942e-06, + "loss": 0.2053, "step": 11243 }, { - "epoch": 0.65, - "grad_norm": 0.3346253622241024, - "learning_rate": 5.883279156670616e-06, - "loss": 0.1724, + "epoch": 0.52, + "grad_norm": 0.44030432490179977, + "learning_rate": 9.950898862845385e-06, + "loss": 0.2938, "step": 11244 }, { - "epoch": 0.65, - "grad_norm": 0.30811296607418726, - "learning_rate": 5.881583318676078e-06, - "loss": 0.2709, + "epoch": 0.52, + "grad_norm": 0.34866247325376065, + "learning_rate": 9.949410962104881e-06, + "loss": 0.235, "step": 11245 }, { - "epoch": 0.65, - "grad_norm": 0.3766558287583714, - "learning_rate": 5.879887623302131e-06, - "loss": 0.3055, + "epoch": 0.52, + "grad_norm": 0.38645176329131437, + "learning_rate": 9.94792306248437e-06, + "loss": 0.258, "step": 11246 }, { - "epoch": 0.65, - "grad_norm": 0.23333225379841346, - "learning_rate": 5.8781920706075e-06, - "loss": 0.1275, + "epoch": 0.52, + "grad_norm": 0.8863989591481527, + "learning_rate": 9.946435164016793e-06, + "loss": 0.5814, "step": 11247 }, { - "epoch": 0.65, - "grad_norm": 0.5988756712228822, - "learning_rate": 5.876496660650899e-06, - "loss": 0.3427, + "epoch": 0.52, + "grad_norm": 0.4353429256861426, + "learning_rate": 9.944947266735084e-06, + "loss": 0.3107, "step": 11248 }, { - "epoch": 0.65, - "grad_norm": 0.37202182636931236, - "learning_rate": 5.874801393491041e-06, - "loss": 0.301, + "epoch": 0.52, + "grad_norm": 0.7305859198866698, + "learning_rate": 9.943459370672197e-06, + "loss": 0.2872, "step": 11249 }, { - "epoch": 0.65, - "grad_norm": 0.705704673592431, - "learning_rate": 5.873106269186635e-06, - "loss": 0.3855, + "epoch": 0.52, + "grad_norm": 0.24294457256971033, + "learning_rate": 9.941971475861063e-06, + "loss": 0.2204, "step": 11250 }, { - "epoch": 0.65, - "grad_norm": 0.3430225343190707, - "learning_rate": 5.871411287796379e-06, - "loss": 0.229, + "epoch": 0.52, + "grad_norm": 0.5799440963268557, + "learning_rate": 9.940483582334625e-06, + "loss": 0.2872, "step": 11251 }, { - "epoch": 0.65, - "grad_norm": 0.3532476705395196, - "learning_rate": 5.869716449378975e-06, - "loss": 0.3249, + "epoch": 0.52, + "grad_norm": 0.4541260190286222, + "learning_rate": 9.938995690125819e-06, + "loss": 0.3084, "step": 11252 }, { - "epoch": 0.65, - "grad_norm": 0.21846891869803547, - "learning_rate": 5.8680217539931106e-06, - "loss": 0.147, + "epoch": 0.52, + "grad_norm": 0.34213894040798587, + "learning_rate": 9.937507799267592e-06, + "loss": 0.2791, "step": 11253 }, { - "epoch": 0.65, - "grad_norm": 0.3593853724638047, - "learning_rate": 5.866327201697477e-06, - "loss": 0.2326, + "epoch": 0.52, + "grad_norm": 0.456746841725573, + "learning_rate": 9.936019909792882e-06, + "loss": 0.3071, "step": 11254 }, { - "epoch": 0.65, - "grad_norm": 0.7948326466630459, - "learning_rate": 5.864632792550753e-06, - "loss": 0.4388, + "epoch": 0.52, + "grad_norm": 0.5328886240404139, + "learning_rate": 9.934532021734632e-06, + "loss": 0.3467, "step": 11255 }, { - "epoch": 0.65, - "grad_norm": 0.4301961477693214, - "learning_rate": 5.862938526611619e-06, - "loss": 0.2876, + "epoch": 0.52, + "grad_norm": 0.22139082571634994, + "learning_rate": 9.933044135125777e-06, + "loss": 0.1058, "step": 11256 }, { - "epoch": 0.65, - "grad_norm": 0.27854050842859307, - "learning_rate": 5.861244403938744e-06, - "loss": 0.232, + "epoch": 0.52, + "grad_norm": 0.45115088557737576, + "learning_rate": 9.931556249999262e-06, + "loss": 0.325, "step": 11257 }, { - "epoch": 0.65, - "grad_norm": 1.2624624061673055, - "learning_rate": 5.859550424590801e-06, - "loss": 0.7603, + "epoch": 0.52, + "grad_norm": 0.3327462141462597, + "learning_rate": 9.930068366388026e-06, + "loss": 0.3022, "step": 11258 }, { - "epoch": 0.65, - "grad_norm": 0.3089760684352078, - "learning_rate": 5.857856588626445e-06, - "loss": 0.182, + "epoch": 0.52, + "grad_norm": 0.8672935438844294, + "learning_rate": 9.928580484325008e-06, + "loss": 0.4771, "step": 11259 }, { - "epoch": 0.65, - "grad_norm": 0.23799079783274543, - "learning_rate": 5.856162896104339e-06, - "loss": 0.2428, + "epoch": 0.52, + "grad_norm": 0.38600209057425866, + "learning_rate": 9.927092603843149e-06, + "loss": 0.288, "step": 11260 }, { - "epoch": 0.65, - "grad_norm": 0.874747782088808, - "learning_rate": 5.854469347083134e-06, - "loss": 0.5215, + "epoch": 0.52, + "grad_norm": 0.5213589895469523, + "learning_rate": 9.925604724975389e-06, + "loss": 0.3664, "step": 11261 }, { - "epoch": 0.65, - "grad_norm": 0.5987070815263701, - "learning_rate": 5.852775941621476e-06, - "loss": 0.2821, + "epoch": 0.52, + "grad_norm": 0.22231424609114156, + "learning_rate": 9.92411684775467e-06, + "loss": 0.1709, "step": 11262 }, { - "epoch": 0.65, - "grad_norm": 0.3813507046247867, - "learning_rate": 5.851082679778011e-06, - "loss": 0.2997, + "epoch": 0.52, + "grad_norm": 0.40907901381302847, + "learning_rate": 9.92262897221393e-06, + "loss": 0.2596, "step": 11263 }, { - "epoch": 0.65, - "grad_norm": 0.3432165598746957, - "learning_rate": 5.8493895616113714e-06, - "loss": 0.2648, + "epoch": 0.52, + "grad_norm": 0.5730078095353418, + "learning_rate": 9.921141098386112e-06, + "loss": 0.3276, "step": 11264 }, { - "epoch": 0.65, - "grad_norm": 0.49326946299327873, - "learning_rate": 5.847696587180195e-06, - "loss": 0.3269, + "epoch": 0.52, + "grad_norm": 0.5177099329467808, + "learning_rate": 9.919653226304148e-06, + "loss": 0.3828, "step": 11265 }, { - "epoch": 0.65, - "grad_norm": 0.3997606558563779, - "learning_rate": 5.846003756543106e-06, - "loss": 0.309, + "epoch": 0.52, + "grad_norm": 0.31081082913775293, + "learning_rate": 9.918165356000989e-06, + "loss": 0.2191, "step": 11266 }, { - "epoch": 0.65, - "grad_norm": 0.2854817732641904, - "learning_rate": 5.844311069758729e-06, - "loss": 0.138, + "epoch": 0.52, + "grad_norm": 0.5301305128225989, + "learning_rate": 9.916677487509572e-06, + "loss": 0.3618, "step": 11267 }, { - "epoch": 0.65, - "grad_norm": 0.359787125128394, - "learning_rate": 5.842618526885679e-06, - "loss": 0.2986, + "epoch": 0.52, + "grad_norm": 0.2783692418220253, + "learning_rate": 9.915189620862834e-06, + "loss": 0.1747, "step": 11268 }, { - "epoch": 0.65, - "grad_norm": 0.34338489317305765, - "learning_rate": 5.840926127982573e-06, - "loss": 0.2825, + "epoch": 0.52, + "grad_norm": 0.21618053580410504, + "learning_rate": 9.91370175609371e-06, + "loss": 0.0636, "step": 11269 }, { - "epoch": 0.65, - "grad_norm": 0.7416336040716597, - "learning_rate": 5.839233873108016e-06, - "loss": 0.3722, + "epoch": 0.52, + "grad_norm": 0.3232330796749964, + "learning_rate": 9.912213893235152e-06, + "loss": 0.2903, "step": 11270 }, { - "epoch": 0.65, - "grad_norm": 0.6592265725324533, - "learning_rate": 5.837541762320609e-06, - "loss": 0.4155, + "epoch": 0.52, + "grad_norm": 0.5895960344730679, + "learning_rate": 9.910726032320093e-06, + "loss": 0.4033, "step": 11271 }, { - "epoch": 0.65, - "grad_norm": 0.3190564215746574, - "learning_rate": 5.835849795678954e-06, - "loss": 0.2622, + "epoch": 0.52, + "grad_norm": 0.42060888505678057, + "learning_rate": 9.909238173381475e-06, + "loss": 0.2008, "step": 11272 }, { - "epoch": 0.65, - "grad_norm": 0.19298277235341782, - "learning_rate": 5.834157973241643e-06, - "loss": 0.1614, + "epoch": 0.52, + "grad_norm": 0.33757020396779014, + "learning_rate": 9.907750316452234e-06, + "loss": 0.3106, "step": 11273 }, { - "epoch": 0.65, - "grad_norm": 0.7677838234055707, - "learning_rate": 5.83246629506726e-06, - "loss": 0.4286, + "epoch": 0.52, + "grad_norm": 0.5283602641719688, + "learning_rate": 9.906262461565312e-06, + "loss": 0.3552, "step": 11274 }, { - "epoch": 0.65, - "grad_norm": 0.3608993586517985, - "learning_rate": 5.830774761214392e-06, - "loss": 0.303, + "epoch": 0.52, + "grad_norm": 0.1942295307555217, + "learning_rate": 9.904774608753649e-06, + "loss": 0.12, "step": 11275 }, { - "epoch": 0.65, - "grad_norm": 0.5166381381403379, - "learning_rate": 5.829083371741609e-06, - "loss": 0.3991, + "epoch": 0.52, + "grad_norm": 0.8435723647042749, + "learning_rate": 9.903286758050185e-06, + "loss": 0.3501, "step": 11276 }, { - "epoch": 0.65, - "grad_norm": 0.5044253316790134, - "learning_rate": 5.827392126707499e-06, - "loss": 0.2718, + "epoch": 0.52, + "grad_norm": 0.5686861867986577, + "learning_rate": 9.90179890948786e-06, + "loss": 0.3161, "step": 11277 }, { - "epoch": 0.65, - "grad_norm": 0.3718745113889626, - "learning_rate": 5.825701026170616e-06, - "loss": 0.3091, + "epoch": 0.52, + "grad_norm": 0.43755240658705796, + "learning_rate": 9.900311063099608e-06, + "loss": 0.3186, "step": 11278 }, { - "epoch": 0.65, - "grad_norm": 0.28428241777519586, - "learning_rate": 5.824010070189523e-06, - "loss": 0.2016, + "epoch": 0.52, + "grad_norm": 0.3752232116993393, + "learning_rate": 9.898823218918378e-06, + "loss": 0.2695, "step": 11279 }, { - "epoch": 0.65, - "grad_norm": 0.30819648744024025, - "learning_rate": 5.8223192588227836e-06, - "loss": 0.2175, + "epoch": 0.52, + "grad_norm": 0.3873780875450621, + "learning_rate": 9.897335376977104e-06, + "loss": 0.2608, "step": 11280 }, { - "epoch": 0.65, - "grad_norm": 0.3475980694959061, - "learning_rate": 5.820628592128952e-06, - "loss": 0.275, + "epoch": 0.52, + "grad_norm": 0.29040409170693654, + "learning_rate": 9.895847537308724e-06, + "loss": 0.2196, "step": 11281 }, { - "epoch": 0.65, - "grad_norm": 0.711060174788503, - "learning_rate": 5.81893807016657e-06, - "loss": 0.4232, + "epoch": 0.52, + "grad_norm": 0.31076278805811025, + "learning_rate": 9.894359699946177e-06, + "loss": 0.2396, "step": 11282 }, { - "epoch": 0.65, - "grad_norm": 0.5564311587922617, - "learning_rate": 5.817247692994179e-06, - "loss": 0.2324, + "epoch": 0.52, + "grad_norm": 0.741094386318254, + "learning_rate": 9.892871864922407e-06, + "loss": 0.4055, "step": 11283 }, { - "epoch": 0.65, - "grad_norm": 0.39698374618945803, - "learning_rate": 5.815557460670326e-06, - "loss": 0.2794, + "epoch": 0.52, + "grad_norm": 0.6175234579844633, + "learning_rate": 9.891384032270352e-06, + "loss": 0.4118, "step": 11284 }, { - "epoch": 0.65, - "grad_norm": 0.24435255765312222, - "learning_rate": 5.813867373253537e-06, - "loss": 0.2001, + "epoch": 0.52, + "grad_norm": 0.3890266334144168, + "learning_rate": 9.889896202022949e-06, + "loss": 0.2347, "step": 11285 }, { - "epoch": 0.65, - "grad_norm": 0.7686243817440529, - "learning_rate": 5.8121774308023415e-06, - "loss": 0.4895, + "epoch": 0.52, + "grad_norm": 0.34556806342497937, + "learning_rate": 9.888408374213134e-06, + "loss": 0.3193, "step": 11286 }, { - "epoch": 0.65, - "grad_norm": 0.31839227350161425, - "learning_rate": 5.810487633375261e-06, - "loss": 0.2133, + "epoch": 0.52, + "grad_norm": 0.276107078393333, + "learning_rate": 9.886920548873855e-06, + "loss": 0.1546, "step": 11287 }, { - "epoch": 0.65, - "grad_norm": 0.4042563230540896, - "learning_rate": 5.80879798103081e-06, - "loss": 0.3192, + "epoch": 0.52, + "grad_norm": 0.37905498470849025, + "learning_rate": 9.885432726038044e-06, + "loss": 0.2516, "step": 11288 }, { - "epoch": 0.65, - "grad_norm": 0.76019850506445, - "learning_rate": 5.807108473827508e-06, - "loss": 0.4039, + "epoch": 0.52, + "grad_norm": 0.4098254633697511, + "learning_rate": 9.883944905738643e-06, + "loss": 0.3253, "step": 11289 }, { - "epoch": 0.65, - "grad_norm": 0.30882169987209035, - "learning_rate": 5.80541911182386e-06, - "loss": 0.2067, + "epoch": 0.52, + "grad_norm": 0.7697523784411152, + "learning_rate": 9.882457088008591e-06, + "loss": 0.4142, "step": 11290 }, { - "epoch": 0.65, - "grad_norm": 0.2653860564848561, - "learning_rate": 5.803729895078368e-06, - "loss": 0.1633, + "epoch": 0.52, + "grad_norm": 0.4037712267513204, + "learning_rate": 9.880969272880823e-06, + "loss": 0.3068, "step": 11291 }, { - "epoch": 0.65, - "grad_norm": 0.37243490114112704, - "learning_rate": 5.802040823649524e-06, - "loss": 0.3116, + "epoch": 0.52, + "grad_norm": 0.9777081574517789, + "learning_rate": 9.879481460388283e-06, + "loss": 0.2415, "step": 11292 }, { - "epoch": 0.65, - "grad_norm": 0.2908543475298292, - "learning_rate": 5.800351897595832e-06, - "loss": 0.2062, + "epoch": 0.52, + "grad_norm": 0.2543500946524688, + "learning_rate": 9.877993650563906e-06, + "loss": 0.2068, "step": 11293 }, { - "epoch": 0.65, - "grad_norm": 0.7543086085745012, - "learning_rate": 5.7986631169757715e-06, - "loss": 0.4082, + "epoch": 0.52, + "grad_norm": 0.3466093786374618, + "learning_rate": 9.876505843440633e-06, + "loss": 0.2906, "step": 11294 }, { - "epoch": 0.65, - "grad_norm": 0.7486544921576171, - "learning_rate": 5.796974481847827e-06, - "loss": 0.4599, + "epoch": 0.52, + "grad_norm": 0.7228353620429991, + "learning_rate": 9.875018039051397e-06, + "loss": 0.3118, "step": 11295 }, { - "epoch": 0.65, - "grad_norm": 0.2232167232788598, - "learning_rate": 5.795285992270472e-06, - "loss": 0.216, + "epoch": 0.52, + "grad_norm": 0.6457441899501678, + "learning_rate": 9.873530237429147e-06, + "loss": 0.3875, "step": 11296 }, { - "epoch": 0.65, - "grad_norm": 0.47861387869738203, - "learning_rate": 5.793597648302185e-06, - "loss": 0.258, + "epoch": 0.52, + "grad_norm": 0.3750135617048421, + "learning_rate": 9.872042438606814e-06, + "loss": 0.308, "step": 11297 }, { - "epoch": 0.65, - "grad_norm": 0.4226253650595357, - "learning_rate": 5.791909450001432e-06, - "loss": 0.3018, + "epoch": 0.52, + "grad_norm": 0.4834283979178994, + "learning_rate": 9.87055464261734e-06, + "loss": 0.2526, "step": 11298 }, { - "epoch": 0.65, - "grad_norm": 0.32740022436539834, - "learning_rate": 5.790221397426672e-06, - "loss": 0.2581, + "epoch": 0.52, + "grad_norm": 0.38295621715687606, + "learning_rate": 9.869066849493653e-06, + "loss": 0.2284, "step": 11299 }, { - "epoch": 0.65, - "grad_norm": 0.37377043137715343, - "learning_rate": 5.7885334906363656e-06, - "loss": 0.2573, + "epoch": 0.52, + "grad_norm": 0.4213315086374267, + "learning_rate": 9.867579059268707e-06, + "loss": 0.316, "step": 11300 }, { - "epoch": 0.65, - "grad_norm": 1.0508988726333814, - "learning_rate": 5.786845729688958e-06, - "loss": 0.5739, + "epoch": 0.52, + "grad_norm": 0.30341307995900973, + "learning_rate": 9.866091271975433e-06, + "loss": 0.2413, "step": 11301 }, { - "epoch": 0.65, - "grad_norm": 0.39097791401524284, - "learning_rate": 5.785158114642906e-06, - "loss": 0.2412, + "epoch": 0.52, + "grad_norm": 0.52109829567585, + "learning_rate": 9.864603487646768e-06, + "loss": 0.3096, "step": 11302 }, { - "epoch": 0.65, - "grad_norm": 0.2146535619035754, - "learning_rate": 5.783470645556648e-06, - "loss": 0.1288, + "epoch": 0.52, + "grad_norm": 0.4270480410419949, + "learning_rate": 9.863115706315652e-06, + "loss": 0.3173, "step": 11303 }, { - "epoch": 0.65, - "grad_norm": 0.30364120884793583, - "learning_rate": 5.781783322488619e-06, - "loss": 0.2825, + "epoch": 0.52, + "grad_norm": 1.0263401120999824, + "learning_rate": 9.86162792801502e-06, + "loss": 0.6095, "step": 11304 }, { - "epoch": 0.65, - "grad_norm": 0.35964780048498673, - "learning_rate": 5.78009614549725e-06, - "loss": 0.2912, + "epoch": 0.52, + "grad_norm": 0.35577829920607223, + "learning_rate": 9.860140152777815e-06, + "loss": 0.2679, "step": 11305 }, { - "epoch": 0.65, - "grad_norm": 0.7982606825690333, - "learning_rate": 5.778409114640973e-06, - "loss": 0.3289, + "epoch": 0.52, + "grad_norm": 0.35757300119462043, + "learning_rate": 9.858652380636972e-06, + "loss": 0.2686, "step": 11306 }, { - "epoch": 0.65, - "grad_norm": 1.2002096946727734, - "learning_rate": 5.776722229978206e-06, - "loss": 0.5363, + "epoch": 0.52, + "grad_norm": 0.2927006155894064, + "learning_rate": 9.857164611625428e-06, + "loss": 0.2103, "step": 11307 }, { - "epoch": 0.65, - "grad_norm": 0.26494917021712633, - "learning_rate": 5.775035491567367e-06, - "loss": 0.2465, + "epoch": 0.52, + "grad_norm": 0.5198037093704636, + "learning_rate": 9.85567684577612e-06, + "loss": 0.2643, "step": 11308 }, { - "epoch": 0.65, - "grad_norm": 0.2207873813549719, - "learning_rate": 5.773348899466864e-06, - "loss": 0.1466, + "epoch": 0.52, + "grad_norm": 0.3113552847165077, + "learning_rate": 9.85418908312199e-06, + "loss": 0.2696, "step": 11309 }, { - "epoch": 0.65, - "grad_norm": 0.8187241696416505, - "learning_rate": 5.7716624537351105e-06, - "loss": 0.4156, + "epoch": 0.52, + "grad_norm": 0.5339207126231291, + "learning_rate": 9.852701323695974e-06, + "loss": 0.3749, "step": 11310 }, { - "epoch": 0.65, - "grad_norm": 0.3221642458006116, - "learning_rate": 5.769976154430507e-06, - "loss": 0.2634, + "epoch": 0.52, + "grad_norm": 0.5587483919713606, + "learning_rate": 9.851213567531008e-06, + "loss": 0.2957, "step": 11311 }, { - "epoch": 0.65, - "grad_norm": 0.3845684244489153, - "learning_rate": 5.768290001611446e-06, - "loss": 0.2996, + "epoch": 0.52, + "grad_norm": 0.367251509035229, + "learning_rate": 9.849725814660027e-06, + "loss": 0.3273, "step": 11312 }, { - "epoch": 0.65, - "grad_norm": 0.9866515649350825, - "learning_rate": 5.7666039953363155e-06, - "loss": 0.3581, + "epoch": 0.52, + "grad_norm": 0.28592611477158114, + "learning_rate": 9.848238065115975e-06, + "loss": 0.1959, "step": 11313 }, { - "epoch": 0.65, - "grad_norm": 0.3284899038074513, - "learning_rate": 5.76491813566351e-06, - "loss": 0.2465, + "epoch": 0.52, + "grad_norm": 0.3443597095437619, + "learning_rate": 9.846750318931788e-06, + "loss": 0.2423, "step": 11314 }, { - "epoch": 0.65, - "grad_norm": 1.3074311625669295, - "learning_rate": 5.763232422651407e-06, - "loss": 0.5833, + "epoch": 0.52, + "grad_norm": 0.3884438100932372, + "learning_rate": 9.8452625761404e-06, + "loss": 0.2972, "step": 11315 }, { - "epoch": 0.65, - "grad_norm": 0.2768885144280665, - "learning_rate": 5.761546856358384e-06, - "loss": 0.2403, + "epoch": 0.52, + "grad_norm": 0.8485851369986501, + "learning_rate": 9.843774836774744e-06, + "loss": 0.4583, "step": 11316 }, { - "epoch": 0.65, - "grad_norm": 0.34299468161640334, - "learning_rate": 5.759861436842806e-06, - "loss": 0.2699, + "epoch": 0.52, + "grad_norm": 0.3285333780022082, + "learning_rate": 9.842287100867765e-06, + "loss": 0.2978, "step": 11317 }, { - "epoch": 0.65, - "grad_norm": 0.846850459365096, - "learning_rate": 5.7581761641630485e-06, - "loss": 0.5027, + "epoch": 0.52, + "grad_norm": 0.34938287201288365, + "learning_rate": 9.840799368452401e-06, + "loss": 0.2293, "step": 11318 }, { - "epoch": 0.65, - "grad_norm": 0.25050625975393576, - "learning_rate": 5.756491038377469e-06, - "loss": 0.211, + "epoch": 0.52, + "grad_norm": 0.30362335455866146, + "learning_rate": 9.839311639561584e-06, + "loss": 0.1962, "step": 11319 }, { - "epoch": 0.65, - "grad_norm": 0.39637983568228946, - "learning_rate": 5.754806059544421e-06, - "loss": 0.2666, + "epoch": 0.52, + "grad_norm": 0.8043207420081717, + "learning_rate": 9.837823914228253e-06, + "loss": 0.4987, "step": 11320 }, { - "epoch": 0.65, - "grad_norm": 0.5585156954349844, - "learning_rate": 5.753121227722254e-06, - "loss": 0.3317, + "epoch": 0.52, + "grad_norm": 0.28869154554550674, + "learning_rate": 9.83633619248534e-06, + "loss": 0.2234, "step": 11321 }, { - "epoch": 0.65, - "grad_norm": 0.4894209756295622, - "learning_rate": 5.7514365429693186e-06, - "loss": 0.2484, + "epoch": 0.52, + "grad_norm": 0.5543187565832229, + "learning_rate": 9.834848474365792e-06, + "loss": 0.3635, "step": 11322 }, { - "epoch": 0.65, - "grad_norm": 0.39527617870681714, - "learning_rate": 5.749752005343954e-06, - "loss": 0.2635, + "epoch": 0.52, + "grad_norm": 0.702133414958866, + "learning_rate": 9.833360759902536e-06, + "loss": 0.4383, "step": 11323 }, { - "epoch": 0.65, - "grad_norm": 0.33485572046557144, - "learning_rate": 5.7480676149044945e-06, - "loss": 0.2856, + "epoch": 0.52, + "grad_norm": 0.3067174406482953, + "learning_rate": 9.831873049128513e-06, + "loss": 0.1878, "step": 11324 }, { - "epoch": 0.65, - "grad_norm": 0.4181430086739952, - "learning_rate": 5.746383371709267e-06, - "loss": 0.2432, + "epoch": 0.52, + "grad_norm": 0.2298526231679964, + "learning_rate": 9.830385342076659e-06, + "loss": 0.2125, "step": 11325 }, { - "epoch": 0.65, - "grad_norm": 0.28865695519714385, - "learning_rate": 5.7446992758166035e-06, - "loss": 0.1886, + "epoch": 0.52, + "grad_norm": 1.2998040450816835, + "learning_rate": 9.828897638779909e-06, + "loss": 0.763, "step": 11326 }, { - "epoch": 0.65, - "grad_norm": 0.43098022642011, - "learning_rate": 5.743015327284822e-06, - "loss": 0.332, + "epoch": 0.52, + "grad_norm": 0.33562898688016185, + "learning_rate": 9.827409939271201e-06, + "loss": 0.2123, "step": 11327 }, { - "epoch": 0.65, - "grad_norm": 0.46720475592837557, - "learning_rate": 5.7413315261722355e-06, - "loss": 0.3363, + "epoch": 0.52, + "grad_norm": 0.9228658731387412, + "learning_rate": 9.825922243583472e-06, + "loss": 0.3987, "step": 11328 }, { - "epoch": 0.65, - "grad_norm": 0.3041795818931702, - "learning_rate": 5.739647872537157e-06, - "loss": 0.2173, + "epoch": 0.52, + "grad_norm": 0.3723737939589882, + "learning_rate": 9.824434551749652e-06, + "loss": 0.3106, "step": 11329 }, { - "epoch": 0.65, - "grad_norm": 1.087779922116115, - "learning_rate": 5.737964366437885e-06, - "loss": 0.8105, + "epoch": 0.52, + "grad_norm": 0.33547978258230177, + "learning_rate": 9.822946863802686e-06, + "loss": 0.2535, "step": 11330 }, { - "epoch": 0.65, - "grad_norm": 0.23944258282874695, - "learning_rate": 5.736281007932727e-06, - "loss": 0.181, + "epoch": 0.52, + "grad_norm": 0.29328485358672146, + "learning_rate": 9.821459179775506e-06, + "loss": 0.1058, "step": 11331 }, { - "epoch": 0.65, - "grad_norm": 0.27234929845391487, - "learning_rate": 5.734597797079974e-06, - "loss": 0.2099, + "epoch": 0.52, + "grad_norm": 0.5077012902668878, + "learning_rate": 9.819971499701046e-06, + "loss": 0.4041, "step": 11332 }, { - "epoch": 0.65, - "grad_norm": 0.7745522496107576, - "learning_rate": 5.732914733937917e-06, - "loss": 0.4524, + "epoch": 0.52, + "grad_norm": 0.2989586151903439, + "learning_rate": 9.818483823612249e-06, + "loss": 0.2661, "step": 11333 }, { - "epoch": 0.65, - "grad_norm": 0.7024537449114042, - "learning_rate": 5.731231818564834e-06, - "loss": 0.4207, + "epoch": 0.52, + "grad_norm": 0.7657812988543198, + "learning_rate": 9.816996151542034e-06, + "loss": 0.2283, "step": 11334 }, { - "epoch": 0.65, - "grad_norm": 0.3230484251573659, - "learning_rate": 5.729549051019014e-06, - "loss": 0.2026, + "epoch": 0.52, + "grad_norm": 0.6749205299943981, + "learning_rate": 9.815508483523355e-06, + "loss": 0.4603, "step": 11335 }, { - "epoch": 0.65, - "grad_norm": 0.3556886103604453, - "learning_rate": 5.7278664313587275e-06, - "loss": 0.3461, + "epoch": 0.52, + "grad_norm": 0.44628229994162955, + "learning_rate": 9.81402081958914e-06, + "loss": 0.2986, "step": 11336 }, { - "epoch": 0.65, - "grad_norm": 0.30108299641494074, - "learning_rate": 5.726183959642242e-06, - "loss": 0.1966, + "epoch": 0.52, + "grad_norm": 0.29996002564445495, + "learning_rate": 9.812533159772327e-06, + "loss": 0.2469, "step": 11337 }, { - "epoch": 0.65, - "grad_norm": 0.6097747958982636, - "learning_rate": 5.724501635927818e-06, - "loss": 0.3249, + "epoch": 0.52, + "grad_norm": 0.7651913848861398, + "learning_rate": 9.811045504105844e-06, + "loss": 0.5218, "step": 11338 }, { - "epoch": 0.65, - "grad_norm": 0.37914027796049266, - "learning_rate": 5.722819460273723e-06, - "loss": 0.2742, + "epoch": 0.52, + "grad_norm": 0.4012912215015603, + "learning_rate": 9.809557852622634e-06, + "loss": 0.2843, "step": 11339 }, { - "epoch": 0.65, - "grad_norm": 0.35501758807427714, - "learning_rate": 5.7211374327382066e-06, - "loss": 0.2916, + "epoch": 0.52, + "grad_norm": 0.6749920705288126, + "learning_rate": 9.808070205355631e-06, + "loss": 0.2226, "step": 11340 }, { - "epoch": 0.65, - "grad_norm": 0.5940137811131664, - "learning_rate": 5.719455553379516e-06, - "loss": 0.3368, + "epoch": 0.52, + "grad_norm": 0.37475171715598016, + "learning_rate": 9.806582562337768e-06, + "loss": 0.3168, "step": 11341 }, { - "epoch": 0.65, - "grad_norm": 0.36694725493194763, - "learning_rate": 5.717773822255896e-06, - "loss": 0.2532, + "epoch": 0.52, + "grad_norm": 0.36189697779983065, + "learning_rate": 9.805094923601975e-06, + "loss": 0.2834, "step": 11342 }, { - "epoch": 0.65, - "grad_norm": 0.23800875625112516, - "learning_rate": 5.71609223942558e-06, - "loss": 0.2069, + "epoch": 0.52, + "grad_norm": 0.8689978307195035, + "learning_rate": 9.8036072891812e-06, + "loss": 0.511, "step": 11343 }, { - "epoch": 0.65, - "grad_norm": 0.37304610142960154, - "learning_rate": 5.7144108049468106e-06, - "loss": 0.2867, + "epoch": 0.52, + "grad_norm": 0.4106378196832395, + "learning_rate": 9.802119659108369e-06, + "loss": 0.2731, "step": 11344 }, { - "epoch": 0.65, - "grad_norm": 0.36018737364414755, - "learning_rate": 5.712729518877813e-06, - "loss": 0.2725, + "epoch": 0.52, + "grad_norm": 0.3179697524653702, + "learning_rate": 9.800632033416417e-06, + "loss": 0.2674, "step": 11345 }, { - "epoch": 0.65, - "grad_norm": 1.454786122363037, - "learning_rate": 5.711048381276801e-06, - "loss": 0.838, + "epoch": 0.52, + "grad_norm": 0.41790338075564004, + "learning_rate": 9.799144412138276e-06, + "loss": 0.3271, "step": 11346 }, { - "epoch": 0.65, - "grad_norm": 0.4161241033055131, - "learning_rate": 5.709367392202003e-06, - "loss": 0.2913, + "epoch": 0.52, + "grad_norm": 0.2652859563513943, + "learning_rate": 9.797656795306887e-06, + "loss": 0.103, "step": 11347 }, { - "epoch": 0.65, - "grad_norm": 0.28370028246480244, - "learning_rate": 5.707686551711628e-06, - "loss": 0.2552, + "epoch": 0.52, + "grad_norm": 0.3431985531842182, + "learning_rate": 9.796169182955184e-06, + "loss": 0.2484, "step": 11348 }, { - "epoch": 0.65, - "grad_norm": 0.2784506630346675, - "learning_rate": 5.706005859863883e-06, - "loss": 0.1635, + "epoch": 0.52, + "grad_norm": 0.38953964463762425, + "learning_rate": 9.794681575116097e-06, + "loss": 0.3164, "step": 11349 }, { - "epoch": 0.65, - "grad_norm": 0.36016396685166263, - "learning_rate": 5.704325316716966e-06, - "loss": 0.3082, + "epoch": 0.52, + "grad_norm": 0.7037691358095647, + "learning_rate": 9.793193971822563e-06, + "loss": 0.3649, "step": 11350 }, { - "epoch": 0.65, - "grad_norm": 0.4467132067514661, - "learning_rate": 5.702644922329083e-06, - "loss": 0.3476, + "epoch": 0.52, + "grad_norm": 0.323916426803517, + "learning_rate": 9.791706373107513e-06, + "loss": 0.2573, "step": 11351 }, { - "epoch": 0.65, - "grad_norm": 0.327846838982423, - "learning_rate": 5.70096467675842e-06, - "loss": 0.2569, + "epoch": 0.52, + "grad_norm": 0.9684206391950299, + "learning_rate": 9.790218779003883e-06, + "loss": 0.564, "step": 11352 }, { - "epoch": 0.65, - "grad_norm": 0.367416999026494, - "learning_rate": 5.699284580063167e-06, - "loss": 0.2946, + "epoch": 0.52, + "grad_norm": 0.22811918207022813, + "learning_rate": 9.78873118954461e-06, + "loss": 0.1758, "step": 11353 }, { - "epoch": 0.65, - "grad_norm": 0.5459229122762558, - "learning_rate": 5.697604632301504e-06, - "loss": 0.4332, + "epoch": 0.52, + "grad_norm": 0.4433453636392534, + "learning_rate": 9.787243604762625e-06, + "loss": 0.2696, "step": 11354 }, { - "epoch": 0.65, - "grad_norm": 0.25668900132390404, - "learning_rate": 5.695924833531603e-06, - "loss": 0.2039, + "epoch": 0.52, + "grad_norm": 0.5587809723680933, + "learning_rate": 9.785756024690856e-06, + "loss": 0.3483, "step": 11355 }, { - "epoch": 0.65, - "grad_norm": 0.4629903377649034, - "learning_rate": 5.6942451838116445e-06, - "loss": 0.2769, + "epoch": 0.52, + "grad_norm": 0.4007653472814551, + "learning_rate": 9.784268449362247e-06, + "loss": 0.3269, "step": 11356 }, { - "epoch": 0.65, - "grad_norm": 0.2880039976122195, - "learning_rate": 5.69256568319979e-06, - "loss": 0.2513, + "epoch": 0.52, + "grad_norm": 0.38592473956239026, + "learning_rate": 9.782780878809726e-06, + "loss": 0.2137, "step": 11357 }, { - "epoch": 0.65, - "grad_norm": 1.0645379082384252, - "learning_rate": 5.6908863317542e-06, - "loss": 0.3649, + "epoch": 0.52, + "grad_norm": 0.5862428367105833, + "learning_rate": 9.781293313066227e-06, + "loss": 0.4022, "step": 11358 }, { - "epoch": 0.65, - "grad_norm": 0.3809085828908202, - "learning_rate": 5.689207129533027e-06, - "loss": 0.3278, + "epoch": 0.52, + "grad_norm": 0.2938954387758226, + "learning_rate": 9.779805752164679e-06, + "loss": 0.1842, "step": 11359 }, { - "epoch": 0.65, - "grad_norm": 0.33329868938159685, - "learning_rate": 5.687528076594432e-06, - "loss": 0.3125, + "epoch": 0.52, + "grad_norm": 0.3232736996143368, + "learning_rate": 9.778318196138024e-06, + "loss": 0.2014, "step": 11360 }, { - "epoch": 0.65, - "grad_norm": 0.7300849087139614, - "learning_rate": 5.685849172996551e-06, - "loss": 0.411, + "epoch": 0.52, + "grad_norm": 0.3694542875218248, + "learning_rate": 9.776830645019192e-06, + "loss": 0.2971, "step": 11361 }, { - "epoch": 0.65, - "grad_norm": 0.4960419001943459, - "learning_rate": 5.6841704187975296e-06, - "loss": 0.2599, + "epoch": 0.52, + "grad_norm": 0.7968486225014186, + "learning_rate": 9.775343098841112e-06, + "loss": 0.4654, "step": 11362 }, { - "epoch": 0.65, - "grad_norm": 0.21882316289564832, - "learning_rate": 5.682491814055497e-06, - "loss": 0.2088, + "epoch": 0.52, + "grad_norm": 0.3572834455559596, + "learning_rate": 9.77385555763672e-06, + "loss": 0.2328, "step": 11363 }, { - "epoch": 0.65, - "grad_norm": 0.4726847855526382, - "learning_rate": 5.680813358828592e-06, - "loss": 0.3169, + "epoch": 0.52, + "grad_norm": 0.477523655943538, + "learning_rate": 9.772368021438943e-06, + "loss": 0.3552, "step": 11364 }, { - "epoch": 0.65, - "grad_norm": 0.30338445447650797, - "learning_rate": 5.679135053174932e-06, - "loss": 0.1485, + "epoch": 0.52, + "grad_norm": 0.265109374309628, + "learning_rate": 9.770880490280724e-06, + "loss": 0.1987, "step": 11365 }, { - "epoch": 0.65, - "grad_norm": 0.4310514017451592, - "learning_rate": 5.677456897152641e-06, - "loss": 0.3493, + "epoch": 0.52, + "grad_norm": 0.35509421018407383, + "learning_rate": 9.76939296419499e-06, + "loss": 0.2093, "step": 11366 }, { - "epoch": 0.65, - "grad_norm": 0.33338075483482793, - "learning_rate": 5.6757788908198316e-06, - "loss": 0.2944, + "epoch": 0.52, + "grad_norm": 1.0692402796967047, + "learning_rate": 9.767905443214673e-06, + "loss": 0.6192, "step": 11367 }, { - "epoch": 0.65, - "grad_norm": 0.4127582369100299, - "learning_rate": 5.674101034234609e-06, - "loss": 0.239, + "epoch": 0.52, + "grad_norm": 0.47784561463110303, + "learning_rate": 9.766417927372703e-06, + "loss": 0.373, "step": 11368 }, { - "epoch": 0.65, - "grad_norm": 0.3351444220704729, - "learning_rate": 5.672423327455085e-06, - "loss": 0.2188, + "epoch": 0.52, + "grad_norm": 0.29653148175576055, + "learning_rate": 9.764930416702018e-06, + "loss": 0.2624, "step": 11369 }, { - "epoch": 0.65, - "grad_norm": 0.4345390366631468, - "learning_rate": 5.670745770539356e-06, - "loss": 0.2443, + "epoch": 0.52, + "grad_norm": 0.45777707551587066, + "learning_rate": 9.763442911235546e-06, + "loss": 0.2722, "step": 11370 }, { - "epoch": 0.65, - "grad_norm": 0.24478960709092923, - "learning_rate": 5.669068363545516e-06, - "loss": 0.2213, + "epoch": 0.52, + "grad_norm": 0.34652033969508744, + "learning_rate": 9.761955411006221e-06, + "loss": 0.2287, "step": 11371 }, { - "epoch": 0.65, - "grad_norm": 0.4821758480038891, - "learning_rate": 5.667391106531647e-06, - "loss": 0.3277, + "epoch": 0.52, + "grad_norm": 0.4339725801221266, + "learning_rate": 9.760467916046971e-06, + "loss": 0.2984, "step": 11372 }, { - "epoch": 0.65, - "grad_norm": 0.7428114884812085, - "learning_rate": 5.665713999555842e-06, - "loss": 0.3687, + "epoch": 0.52, + "grad_norm": 0.2964375909671723, + "learning_rate": 9.758980426390732e-06, + "loss": 0.2499, "step": 11373 }, { - "epoch": 0.65, - "grad_norm": 0.7485401252645069, - "learning_rate": 5.6640370426761735e-06, - "loss": 0.4149, + "epoch": 0.52, + "grad_norm": 0.6822223269178299, + "learning_rate": 9.757492942070436e-06, + "loss": 0.4439, "step": 11374 }, { - "epoch": 0.65, - "grad_norm": 0.23688874268890042, - "learning_rate": 5.662360235950717e-06, - "loss": 0.1964, + "epoch": 0.52, + "grad_norm": 0.5350618239062891, + "learning_rate": 9.756005463119011e-06, + "loss": 0.3142, "step": 11375 }, { - "epoch": 0.65, - "grad_norm": 0.31576975671951585, - "learning_rate": 5.6606835794375346e-06, - "loss": 0.254, + "epoch": 0.52, + "grad_norm": 0.3241055541072353, + "learning_rate": 9.754517989569386e-06, + "loss": 0.2485, "step": 11376 }, { - "epoch": 0.65, - "grad_norm": 0.7108945036865616, - "learning_rate": 5.659007073194697e-06, - "loss": 0.4271, + "epoch": 0.52, + "grad_norm": 0.3218213588275265, + "learning_rate": 9.753030521454502e-06, + "loss": 0.2014, "step": 11377 }, { - "epoch": 0.65, - "grad_norm": 0.33881004573315643, - "learning_rate": 5.657330717280258e-06, - "loss": 0.2427, + "epoch": 0.52, + "grad_norm": 0.5953684261381536, + "learning_rate": 9.751543058807282e-06, + "loss": 0.2674, "step": 11378 }, { - "epoch": 0.65, - "grad_norm": 0.35024537485757967, - "learning_rate": 5.655654511752274e-06, - "loss": 0.2942, + "epoch": 0.52, + "grad_norm": 0.4028297162092129, + "learning_rate": 9.750055601660662e-06, + "loss": 0.299, "step": 11379 }, { - "epoch": 0.65, - "grad_norm": 0.6233162286325228, - "learning_rate": 5.653978456668779e-06, - "loss": 0.3727, + "epoch": 0.52, + "grad_norm": 0.5187622947193219, + "learning_rate": 9.748568150047568e-06, + "loss": 0.2899, "step": 11380 }, { - "epoch": 0.65, - "grad_norm": 0.23743888501996108, - "learning_rate": 5.652302552087827e-06, - "loss": 0.1682, + "epoch": 0.52, + "grad_norm": 0.3304507039502457, + "learning_rate": 9.747080704000928e-06, + "loss": 0.2843, "step": 11381 }, { - "epoch": 0.65, - "grad_norm": 0.42937799121144055, - "learning_rate": 5.6506267980674515e-06, - "loss": 0.2655, + "epoch": 0.52, + "grad_norm": 0.4789894900450128, + "learning_rate": 9.745593263553683e-06, + "loss": 0.3613, "step": 11382 }, { - "epoch": 0.65, - "grad_norm": 0.3103419789202479, - "learning_rate": 5.648951194665683e-06, - "loss": 0.294, + "epoch": 0.52, + "grad_norm": 0.20835030026720966, + "learning_rate": 9.744105828738758e-06, + "loss": 0.0714, "step": 11383 }, { - "epoch": 0.65, - "grad_norm": 0.32653676277691335, - "learning_rate": 5.647275741940543e-06, - "loss": 0.2316, + "epoch": 0.52, + "grad_norm": 0.34496789877472606, + "learning_rate": 9.742618399589082e-06, + "loss": 0.2837, "step": 11384 }, { - "epoch": 0.65, - "grad_norm": 0.6254068439076284, - "learning_rate": 5.645600439950061e-06, - "loss": 0.4076, + "epoch": 0.52, + "grad_norm": 0.3628646663858775, + "learning_rate": 9.741130976137585e-06, + "loss": 0.3122, "step": 11385 }, { - "epoch": 0.65, - "grad_norm": 0.8407360463120334, - "learning_rate": 5.643925288752248e-06, - "loss": 0.4822, + "epoch": 0.52, + "grad_norm": 0.6510925053002953, + "learning_rate": 9.7396435584172e-06, + "loss": 0.3648, "step": 11386 }, { - "epoch": 0.65, - "grad_norm": 0.35286540111542963, - "learning_rate": 5.642250288405116e-06, - "loss": 0.2868, + "epoch": 0.52, + "grad_norm": 0.4253298923912513, + "learning_rate": 9.738156146460857e-06, + "loss": 0.289, "step": 11387 }, { - "epoch": 0.65, - "grad_norm": 0.2163730802370492, - "learning_rate": 5.6405754389666635e-06, - "loss": 0.1894, + "epoch": 0.52, + "grad_norm": 0.5823188576601506, + "learning_rate": 9.736668740301485e-06, + "loss": 0.3535, "step": 11388 }, { - "epoch": 0.65, - "grad_norm": 0.5102026088195443, - "learning_rate": 5.638900740494901e-06, - "loss": 0.3567, + "epoch": 0.52, + "grad_norm": 0.23488840267704067, + "learning_rate": 9.735181339972007e-06, + "loss": 0.1812, "step": 11389 }, { - "epoch": 0.65, - "grad_norm": 0.39499275164180747, - "learning_rate": 5.637226193047818e-06, - "loss": 0.293, + "epoch": 0.52, + "grad_norm": 0.5207796888508791, + "learning_rate": 9.733693945505366e-06, + "loss": 0.3079, "step": 11390 }, { - "epoch": 0.65, - "grad_norm": 0.36184693966404863, - "learning_rate": 5.635551796683405e-06, - "loss": 0.2894, + "epoch": 0.52, + "grad_norm": 0.4079513190936559, + "learning_rate": 9.732206556934484e-06, + "loss": 0.336, "step": 11391 }, { - "epoch": 0.65, - "grad_norm": 1.1693194311109523, - "learning_rate": 5.633877551459646e-06, - "loss": 0.5964, + "epoch": 0.52, + "grad_norm": 0.39151629863631365, + "learning_rate": 9.73071917429229e-06, + "loss": 0.3152, "step": 11392 }, { - "epoch": 0.65, - "grad_norm": 0.3155464664914508, - "learning_rate": 5.6322034574345145e-06, - "loss": 0.2347, + "epoch": 0.52, + "grad_norm": 0.34194062562220184, + "learning_rate": 9.729231797611712e-06, + "loss": 0.1516, "step": 11393 }, { - "epoch": 0.65, - "grad_norm": 0.20528795990945872, - "learning_rate": 5.630529514665993e-06, - "loss": 0.1466, + "epoch": 0.52, + "grad_norm": 0.44007652946439585, + "learning_rate": 9.727744426925678e-06, + "loss": 0.3056, "step": 11394 }, { - "epoch": 0.65, - "grad_norm": 0.4960690183431173, - "learning_rate": 5.628855723212048e-06, - "loss": 0.3488, + "epoch": 0.52, + "grad_norm": 0.5124702481539362, + "learning_rate": 9.726257062267124e-06, + "loss": 0.3255, "step": 11395 }, { - "epoch": 0.65, - "grad_norm": 0.32121279561481897, - "learning_rate": 5.62718208313064e-06, - "loss": 0.2645, + "epoch": 0.52, + "grad_norm": 0.3641520661523698, + "learning_rate": 9.724769703668976e-06, + "loss": 0.2195, "step": 11396 }, { - "epoch": 0.65, - "grad_norm": 0.7035742236195769, - "learning_rate": 5.625508594479725e-06, - "loss": 0.3335, + "epoch": 0.52, + "grad_norm": 0.32891859657381134, + "learning_rate": 9.72328235116416e-06, + "loss": 0.2864, "step": 11397 }, { - "epoch": 0.65, - "grad_norm": 0.7735193256482944, - "learning_rate": 5.6238352573172635e-06, - "loss": 0.4442, + "epoch": 0.52, + "grad_norm": 0.475697650649854, + "learning_rate": 9.721795004785604e-06, + "loss": 0.2183, "step": 11398 }, { - "epoch": 0.65, - "grad_norm": 0.26150894556463467, - "learning_rate": 5.622162071701198e-06, - "loss": 0.2663, + "epoch": 0.52, + "grad_norm": 0.401004429903904, + "learning_rate": 9.720307664566241e-06, + "loss": 0.2291, "step": 11399 }, { - "epoch": 0.65, - "grad_norm": 0.27248367312206967, - "learning_rate": 5.6204890376894735e-06, - "loss": 0.1802, + "epoch": 0.52, + "grad_norm": 0.28910835961316905, + "learning_rate": 9.718820330538999e-06, + "loss": 0.2737, "step": 11400 }, { - "epoch": 0.66, - "grad_norm": 0.6700607475194417, - "learning_rate": 5.61881615534002e-06, - "loss": 0.2683, + "epoch": 0.52, + "grad_norm": 1.0152568760270457, + "learning_rate": 9.7173330027368e-06, + "loss": 0.5041, "step": 11401 }, { - "epoch": 0.66, - "grad_norm": 0.3534029225730532, - "learning_rate": 5.617143424710778e-06, - "loss": 0.3028, + "epoch": 0.52, + "grad_norm": 0.4011210892819425, + "learning_rate": 9.715845681192576e-06, + "loss": 0.2455, "step": 11402 }, { - "epoch": 0.66, - "grad_norm": 0.3681851782516326, - "learning_rate": 5.615470845859672e-06, - "loss": 0.2829, + "epoch": 0.52, + "grad_norm": 0.5155930391421709, + "learning_rate": 9.714358365939256e-06, + "loss": 0.3598, "step": 11403 }, { - "epoch": 0.66, - "grad_norm": 0.5710242786946949, - "learning_rate": 5.613798418844623e-06, - "loss": 0.2027, + "epoch": 0.52, + "grad_norm": 0.2363770099116954, + "learning_rate": 9.712871057009768e-06, + "loss": 0.2071, "step": 11404 }, { - "epoch": 0.66, - "grad_norm": 0.37252093370364825, - "learning_rate": 5.6121261437235445e-06, - "loss": 0.2886, + "epoch": 0.52, + "grad_norm": 0.6507549608403904, + "learning_rate": 9.711383754437035e-06, + "loss": 0.4115, "step": 11405 }, { - "epoch": 0.66, - "grad_norm": 0.30991829143518573, - "learning_rate": 5.6104540205543445e-06, - "loss": 0.1872, + "epoch": 0.52, + "grad_norm": 0.3935240946495296, + "learning_rate": 9.709896458253985e-06, + "loss": 0.2266, "step": 11406 }, { - "epoch": 0.66, - "grad_norm": 0.3134833617394962, - "learning_rate": 5.608782049394938e-06, - "loss": 0.2361, + "epoch": 0.52, + "grad_norm": 0.565479502819885, + "learning_rate": 9.708409168493554e-06, + "loss": 0.3616, "step": 11407 }, { - "epoch": 0.66, - "grad_norm": 0.3926136787003498, - "learning_rate": 5.60711023030322e-06, - "loss": 0.3061, + "epoch": 0.52, + "grad_norm": 0.36281857444155796, + "learning_rate": 9.706921885188662e-06, + "loss": 0.2865, "step": 11408 }, { - "epoch": 0.66, - "grad_norm": 0.9496778327004386, - "learning_rate": 5.605438563337087e-06, - "loss": 0.7436, + "epoch": 0.52, + "grad_norm": 0.34970632129672796, + "learning_rate": 9.705434608372238e-06, + "loss": 0.2077, "step": 11409 }, { - "epoch": 0.66, - "grad_norm": 0.4551020413608209, - "learning_rate": 5.6037670485544215e-06, - "loss": 0.2394, + "epoch": 0.52, + "grad_norm": 0.27419626997472696, + "learning_rate": 9.703947338077206e-06, + "loss": 0.2048, "step": 11410 }, { - "epoch": 0.66, - "grad_norm": 0.2614793357489382, - "learning_rate": 5.60209568601312e-06, - "loss": 0.2574, + "epoch": 0.52, + "grad_norm": 1.4491909940359593, + "learning_rate": 9.702460074336489e-06, + "loss": 0.7524, "step": 11411 }, { - "epoch": 0.66, - "grad_norm": 0.4548863557191528, - "learning_rate": 5.600424475771058e-06, - "loss": 0.2345, + "epoch": 0.52, + "grad_norm": 0.2504680531291981, + "learning_rate": 9.700972817183026e-06, + "loss": 0.2172, "step": 11412 }, { - "epoch": 0.66, - "grad_norm": 0.6171749330064369, - "learning_rate": 5.5987534178861e-06, - "loss": 0.3804, + "epoch": 0.52, + "grad_norm": 0.7419744938866444, + "learning_rate": 9.699485566649734e-06, + "loss": 0.4234, "step": 11413 }, { - "epoch": 0.66, - "grad_norm": 0.310695729368847, - "learning_rate": 5.5970825124161255e-06, - "loss": 0.2084, + "epoch": 0.52, + "grad_norm": 0.6374358339232042, + "learning_rate": 9.697998322769544e-06, + "loss": 0.4044, "step": 11414 }, { - "epoch": 0.66, - "grad_norm": 0.2857621372988274, - "learning_rate": 5.595411759418995e-06, - "loss": 0.2615, + "epoch": 0.52, + "grad_norm": 0.26159092038567966, + "learning_rate": 9.696511085575377e-06, + "loss": 0.164, "step": 11415 }, { - "epoch": 0.66, - "grad_norm": 0.9880804735860319, - "learning_rate": 5.5937411589525655e-06, - "loss": 0.5304, + "epoch": 0.52, + "grad_norm": 0.4110981291628727, + "learning_rate": 9.695023855100164e-06, + "loss": 0.3239, "step": 11416 }, { - "epoch": 0.66, - "grad_norm": 0.29446071991324213, - "learning_rate": 5.592070711074691e-06, - "loss": 0.1841, + "epoch": 0.52, + "grad_norm": 0.3713800293175979, + "learning_rate": 9.69353663137683e-06, + "loss": 0.259, "step": 11417 }, { - "epoch": 0.66, - "grad_norm": 0.7950217222504905, - "learning_rate": 5.590400415843214e-06, - "loss": 0.4106, + "epoch": 0.52, + "grad_norm": 0.43293500310566646, + "learning_rate": 9.692049414438298e-06, + "loss": 0.2842, "step": 11418 }, { - "epoch": 0.66, - "grad_norm": 0.2692860474214037, - "learning_rate": 5.5887302733159835e-06, - "loss": 0.2516, + "epoch": 0.52, + "grad_norm": 1.1789619839219372, + "learning_rate": 9.690562204317496e-06, + "loss": 0.3271, "step": 11419 }, { - "epoch": 0.66, - "grad_norm": 0.28812730174273965, - "learning_rate": 5.587060283550835e-06, - "loss": 0.2093, + "epoch": 0.52, + "grad_norm": 0.3415861038345586, + "learning_rate": 9.689075001047348e-06, + "loss": 0.2745, "step": 11420 }, { - "epoch": 0.66, - "grad_norm": 0.4135254933127519, - "learning_rate": 5.585390446605598e-06, - "loss": 0.2756, + "epoch": 0.52, + "grad_norm": 0.38176274871500615, + "learning_rate": 9.68758780466078e-06, + "loss": 0.2599, "step": 11421 }, { - "epoch": 0.66, - "grad_norm": 0.3391811932650513, - "learning_rate": 5.583720762538097e-06, - "loss": 0.302, + "epoch": 0.52, + "grad_norm": 0.32576720649743524, + "learning_rate": 9.686100615190718e-06, + "loss": 0.1784, "step": 11422 }, { - "epoch": 0.66, - "grad_norm": 0.5581481167123312, - "learning_rate": 5.58205123140616e-06, - "loss": 0.1832, + "epoch": 0.52, + "grad_norm": 0.35462859933227153, + "learning_rate": 9.684613432670085e-06, + "loss": 0.2277, "step": 11423 }, { - "epoch": 0.66, - "grad_norm": 0.8472758677182067, - "learning_rate": 5.5803818532676e-06, - "loss": 0.402, + "epoch": 0.52, + "grad_norm": 0.5310161959798916, + "learning_rate": 9.683126257131801e-06, + "loss": 0.2938, "step": 11424 }, { - "epoch": 0.66, - "grad_norm": 0.8200728581102539, - "learning_rate": 5.578712628180225e-06, - "loss": 0.4662, + "epoch": 0.52, + "grad_norm": 0.6213188308901539, + "learning_rate": 9.681639088608803e-06, + "loss": 0.2943, "step": 11425 }, { - "epoch": 0.66, - "grad_norm": 0.3875008712683409, - "learning_rate": 5.577043556201838e-06, - "loss": 0.2686, + "epoch": 0.52, + "grad_norm": 0.860346333678611, + "learning_rate": 9.68015192713401e-06, + "loss": 0.3872, "step": 11426 }, { - "epoch": 0.66, - "grad_norm": 0.2416269514785201, - "learning_rate": 5.575374637390246e-06, - "loss": 0.2104, + "epoch": 0.52, + "grad_norm": 0.4108038617526493, + "learning_rate": 9.678664772740342e-06, + "loss": 0.2645, "step": 11427 }, { - "epoch": 0.66, - "grad_norm": 0.4898967331695952, - "learning_rate": 5.573705871803241e-06, - "loss": 0.2438, + "epoch": 0.52, + "grad_norm": 0.37620974391353745, + "learning_rate": 9.677177625460724e-06, + "loss": 0.2642, "step": 11428 }, { - "epoch": 0.66, - "grad_norm": 0.3172534747512469, - "learning_rate": 5.57203725949861e-06, - "loss": 0.2663, + "epoch": 0.53, + "grad_norm": 0.2999353062838426, + "learning_rate": 9.675690485328085e-06, + "loss": 0.195, "step": 11429 }, { - "epoch": 0.66, - "grad_norm": 0.9321900723666333, - "learning_rate": 5.570368800534139e-06, - "loss": 0.3019, + "epoch": 0.53, + "grad_norm": 0.36140638604317643, + "learning_rate": 9.674203352375346e-06, + "loss": 0.2791, "step": 11430 }, { - "epoch": 0.66, - "grad_norm": 0.4761349234786449, - "learning_rate": 5.568700494967603e-06, - "loss": 0.3314, + "epoch": 0.53, + "grad_norm": 0.7273629207317491, + "learning_rate": 9.672716226635432e-06, + "loss": 0.4268, "step": 11431 }, { - "epoch": 0.66, - "grad_norm": 0.32465852015725005, - "learning_rate": 5.567032342856781e-06, - "loss": 0.2739, + "epoch": 0.53, + "grad_norm": 0.3728610525623341, + "learning_rate": 9.671229108141263e-06, + "loss": 0.2594, "step": 11432 }, { - "epoch": 0.66, - "grad_norm": 0.1793227954302825, - "learning_rate": 5.565364344259438e-06, - "loss": 0.1204, + "epoch": 0.53, + "grad_norm": 0.34425995831737904, + "learning_rate": 9.669741996925765e-06, + "loss": 0.2621, "step": 11433 }, { - "epoch": 0.66, - "grad_norm": 0.3672817087183113, - "learning_rate": 5.563696499233337e-06, - "loss": 0.3124, + "epoch": 0.53, + "grad_norm": 1.3685462751774609, + "learning_rate": 9.668254893021862e-06, + "loss": 0.7272, "step": 11434 }, { - "epoch": 0.66, - "grad_norm": 0.33119602560147793, - "learning_rate": 5.562028807836233e-06, - "loss": 0.2328, + "epoch": 0.53, + "grad_norm": 0.5532222278961284, + "learning_rate": 9.666767796462477e-06, + "loss": 0.2928, "step": 11435 }, { - "epoch": 0.66, - "grad_norm": 0.7227754858322918, - "learning_rate": 5.560361270125884e-06, - "loss": 0.3104, + "epoch": 0.53, + "grad_norm": 0.27917039264465465, + "learning_rate": 9.665280707280528e-06, + "loss": 0.2671, "step": 11436 }, { - "epoch": 0.66, - "grad_norm": 1.0485837440591883, - "learning_rate": 5.558693886160032e-06, - "loss": 0.5809, + "epoch": 0.53, + "grad_norm": 0.43656386636292366, + "learning_rate": 9.663793625508945e-06, + "loss": 0.293, "step": 11437 }, { - "epoch": 0.66, - "grad_norm": 0.32761246481860695, - "learning_rate": 5.557026655996422e-06, - "loss": 0.2612, + "epoch": 0.53, + "grad_norm": 0.28771681496248114, + "learning_rate": 9.662306551180646e-06, + "loss": 0.1219, "step": 11438 }, { - "epoch": 0.66, - "grad_norm": 0.37388839229218224, - "learning_rate": 5.555359579692782e-06, - "loss": 0.3131, + "epoch": 0.53, + "grad_norm": 0.3954165854733189, + "learning_rate": 9.660819484328555e-06, + "loss": 0.2817, "step": 11439 }, { - "epoch": 0.66, - "grad_norm": 0.339682737117197, - "learning_rate": 5.553692657306853e-06, - "loss": 0.1579, + "epoch": 0.53, + "grad_norm": 0.4100873445124824, + "learning_rate": 9.659332424985593e-06, + "loss": 0.3259, "step": 11440 }, { - "epoch": 0.66, - "grad_norm": 0.33306031343411013, - "learning_rate": 5.552025888896356e-06, - "loss": 0.2583, + "epoch": 0.53, + "grad_norm": 0.6575522012656696, + "learning_rate": 9.65784537318468e-06, + "loss": 0.3048, "step": 11441 }, { - "epoch": 0.66, - "grad_norm": 0.9423663180233574, - "learning_rate": 5.550359274519012e-06, - "loss": 0.4231, + "epoch": 0.53, + "grad_norm": 0.408816462969731, + "learning_rate": 9.656358328958743e-06, + "loss": 0.3029, "step": 11442 }, { - "epoch": 0.66, - "grad_norm": 0.3801574172959991, - "learning_rate": 5.54869281423253e-06, - "loss": 0.2682, + "epoch": 0.53, + "grad_norm": 0.32895365550896283, + "learning_rate": 9.654871292340703e-06, + "loss": 0.2107, "step": 11443 }, { - "epoch": 0.66, - "grad_norm": 0.37924059353551987, - "learning_rate": 5.547026508094629e-06, - "loss": 0.2664, + "epoch": 0.53, + "grad_norm": 0.3625582672326299, + "learning_rate": 9.65338426336348e-06, + "loss": 0.3024, "step": 11444 }, { - "epoch": 0.66, - "grad_norm": 0.5090379498547105, - "learning_rate": 5.545360356163009e-06, - "loss": 0.3892, + "epoch": 0.53, + "grad_norm": 0.3515425836853656, + "learning_rate": 9.651897242059992e-06, + "loss": 0.2165, "step": 11445 }, { - "epoch": 0.66, - "grad_norm": 0.22891711398429887, - "learning_rate": 5.54369435849537e-06, - "loss": 0.182, + "epoch": 0.53, + "grad_norm": 1.0243762632578093, + "learning_rate": 9.650410228463165e-06, + "loss": 0.6793, "step": 11446 }, { - "epoch": 0.66, - "grad_norm": 0.47266754852632326, - "learning_rate": 5.5420285151493995e-06, - "loss": 0.3223, + "epoch": 0.53, + "grad_norm": 0.7515515438749892, + "learning_rate": 9.648923222605921e-06, + "loss": 0.3942, "step": 11447 }, { - "epoch": 0.66, - "grad_norm": 0.6746597708337676, - "learning_rate": 5.540362826182791e-06, - "loss": 0.3292, + "epoch": 0.53, + "grad_norm": 0.2511288921956044, + "learning_rate": 9.647436224521179e-06, + "loss": 0.2136, "step": 11448 }, { - "epoch": 0.66, - "grad_norm": 1.2132277880660471, - "learning_rate": 5.538697291653228e-06, - "loss": 0.7311, + "epoch": 0.53, + "grad_norm": 0.3010626523734168, + "learning_rate": 9.645949234241855e-06, + "loss": 0.1905, "step": 11449 }, { - "epoch": 0.66, - "grad_norm": 0.3082460870951119, - "learning_rate": 5.537031911618385e-06, - "loss": 0.2031, + "epoch": 0.53, + "grad_norm": 1.6032184236839973, + "learning_rate": 9.644462251800876e-06, + "loss": 0.792, "step": 11450 }, { - "epoch": 0.66, - "grad_norm": 0.3597103355681424, - "learning_rate": 5.53536668613593e-06, - "loss": 0.3081, + "epoch": 0.53, + "grad_norm": 0.322202416207073, + "learning_rate": 9.64297527723116e-06, + "loss": 0.2098, "step": 11451 }, { - "epoch": 0.66, - "grad_norm": 0.41272000121983643, - "learning_rate": 5.5337016152635396e-06, - "loss": 0.2683, + "epoch": 0.53, + "grad_norm": 0.35350893925203253, + "learning_rate": 9.641488310565628e-06, + "loss": 0.3195, "step": 11452 }, { - "epoch": 0.66, - "grad_norm": 0.3017020610938978, - "learning_rate": 5.53203669905887e-06, - "loss": 0.2304, + "epoch": 0.53, + "grad_norm": 0.8993610221272335, + "learning_rate": 9.640001351837198e-06, + "loss": 0.434, "step": 11453 }, { - "epoch": 0.66, - "grad_norm": 0.43437243190707603, - "learning_rate": 5.530371937579577e-06, - "loss": 0.2326, + "epoch": 0.53, + "grad_norm": 0.32850023361112574, + "learning_rate": 9.638514401078789e-06, + "loss": 0.2184, "step": 11454 }, { - "epoch": 0.66, - "grad_norm": 0.3317204639922343, - "learning_rate": 5.528707330883308e-06, - "loss": 0.2883, + "epoch": 0.53, + "grad_norm": 0.3094838477869714, + "learning_rate": 9.637027458323327e-06, + "loss": 0.1956, "step": 11455 }, { - "epoch": 0.66, - "grad_norm": 0.3327247393119727, - "learning_rate": 5.527042879027715e-06, - "loss": 0.2176, + "epoch": 0.53, + "grad_norm": 0.3892056663489639, + "learning_rate": 9.635540523603725e-06, + "loss": 0.3149, "step": 11456 }, { - "epoch": 0.66, - "grad_norm": 1.3365246378891962, - "learning_rate": 5.525378582070438e-06, - "loss": 0.7339, + "epoch": 0.53, + "grad_norm": 0.40090164086707025, + "learning_rate": 9.634053596952907e-06, + "loss": 0.2714, "step": 11457 }, { - "epoch": 0.66, - "grad_norm": 0.3413124951106821, - "learning_rate": 5.523714440069104e-06, - "loss": 0.3221, + "epoch": 0.53, + "grad_norm": 0.837736371935118, + "learning_rate": 9.632566678403784e-06, + "loss": 0.3355, "step": 11458 }, { - "epoch": 0.66, - "grad_norm": 0.3312639650942997, - "learning_rate": 5.522050453081349e-06, - "loss": 0.2081, + "epoch": 0.53, + "grad_norm": 0.3958149049475104, + "learning_rate": 9.631079767989284e-06, + "loss": 0.3129, "step": 11459 }, { - "epoch": 0.66, - "grad_norm": 0.25778157585553163, - "learning_rate": 5.5203866211647904e-06, - "loss": 0.1958, + "epoch": 0.53, + "grad_norm": 0.43668091784294355, + "learning_rate": 9.629592865742323e-06, + "loss": 0.2782, "step": 11460 }, { - "epoch": 0.66, - "grad_norm": 0.9463391804603914, - "learning_rate": 5.518722944377053e-06, - "loss": 0.507, + "epoch": 0.53, + "grad_norm": 0.32951508316024725, + "learning_rate": 9.62810597169582e-06, + "loss": 0.1818, "step": 11461 }, { - "epoch": 0.66, - "grad_norm": 0.37823758384435996, - "learning_rate": 5.517059422775748e-06, - "loss": 0.3032, + "epoch": 0.53, + "grad_norm": 0.43646122909435, + "learning_rate": 9.626619085882689e-06, + "loss": 0.2697, "step": 11462 }, { - "epoch": 0.66, - "grad_norm": 0.2921793595846805, - "learning_rate": 5.515396056418482e-06, - "loss": 0.2408, + "epoch": 0.53, + "grad_norm": 0.46476465391500404, + "learning_rate": 9.625132208335854e-06, + "loss": 0.3046, "step": 11463 }, { - "epoch": 0.66, - "grad_norm": 0.5883154221839674, - "learning_rate": 5.513732845362856e-06, - "loss": 0.4082, + "epoch": 0.53, + "grad_norm": 0.3834897371664547, + "learning_rate": 9.62364533908823e-06, + "loss": 0.2607, "step": 11464 }, { - "epoch": 0.66, - "grad_norm": 0.2867425278106484, - "learning_rate": 5.51206978966647e-06, - "loss": 0.1848, + "epoch": 0.53, + "grad_norm": 0.8226910349365744, + "learning_rate": 9.622158478172736e-06, + "loss": 0.4101, "step": 11465 }, { - "epoch": 0.66, - "grad_norm": 0.24205597069931772, - "learning_rate": 5.510406889386914e-06, - "loss": 0.2085, + "epoch": 0.53, + "grad_norm": 0.38305506334375866, + "learning_rate": 9.620671625622287e-06, + "loss": 0.2823, "step": 11466 }, { - "epoch": 0.66, - "grad_norm": 1.2740952812449393, - "learning_rate": 5.5087441445817765e-06, - "loss": 0.7298, + "epoch": 0.53, + "grad_norm": 0.376581375482681, + "learning_rate": 9.619184781469804e-06, + "loss": 0.2515, "step": 11467 }, { - "epoch": 0.66, - "grad_norm": 0.5389920094979618, - "learning_rate": 5.507081555308631e-06, - "loss": 0.3645, + "epoch": 0.53, + "grad_norm": 0.2899337141137829, + "learning_rate": 9.617697945748204e-06, + "loss": 0.1654, "step": 11468 }, { - "epoch": 0.66, - "grad_norm": 0.3339186256662309, - "learning_rate": 5.505419121625062e-06, - "loss": 0.2649, + "epoch": 0.53, + "grad_norm": 0.3702992939202412, + "learning_rate": 9.616211118490404e-06, + "loss": 0.2566, "step": 11469 }, { - "epoch": 0.66, - "grad_norm": 0.3448488174392606, - "learning_rate": 5.503756843588635e-06, - "loss": 0.3087, + "epoch": 0.53, + "grad_norm": 0.7611140009130796, + "learning_rate": 9.614724299729319e-06, + "loss": 0.3857, "step": 11470 }, { - "epoch": 0.66, - "grad_norm": 0.3839012951403879, - "learning_rate": 5.502094721256916e-06, - "loss": 0.2503, + "epoch": 0.53, + "grad_norm": 0.4185914253704048, + "learning_rate": 9.613237489497861e-06, + "loss": 0.2841, "step": 11471 }, { - "epoch": 0.66, - "grad_norm": 0.24422334705090812, - "learning_rate": 5.500432754687464e-06, - "loss": 0.1323, + "epoch": 0.53, + "grad_norm": 0.3556707860652959, + "learning_rate": 9.611750687828958e-06, + "loss": 0.2665, "step": 11472 }, { - "epoch": 0.66, - "grad_norm": 1.0341671776790258, - "learning_rate": 5.498770943937828e-06, - "loss": 0.6798, + "epoch": 0.53, + "grad_norm": 1.31587248664105, + "learning_rate": 9.610263894755523e-06, + "loss": 0.6628, "step": 11473 }, { - "epoch": 0.66, - "grad_norm": 0.2849180917522527, - "learning_rate": 5.497109289065563e-06, - "loss": 0.2485, + "epoch": 0.53, + "grad_norm": 0.24921336836317573, + "learning_rate": 9.608777110310467e-06, + "loss": 0.1312, "step": 11474 }, { - "epoch": 0.66, - "grad_norm": 0.5072468134724325, - "learning_rate": 5.495447790128211e-06, - "loss": 0.3332, + "epoch": 0.53, + "grad_norm": 0.4261668666381127, + "learning_rate": 9.607290334526704e-06, + "loss": 0.2881, "step": 11475 }, { - "epoch": 0.66, - "grad_norm": 0.6364321717273036, - "learning_rate": 5.493786447183308e-06, - "loss": 0.3118, + "epoch": 0.53, + "grad_norm": 0.33894494953454546, + "learning_rate": 9.605803567437158e-06, + "loss": 0.2807, "step": 11476 }, { - "epoch": 0.66, - "grad_norm": 0.603457958394203, - "learning_rate": 5.4921252602883834e-06, - "loss": 0.2669, + "epoch": 0.53, + "grad_norm": 0.6633081680127221, + "learning_rate": 9.604316809074742e-06, + "loss": 0.4015, "step": 11477 }, { - "epoch": 0.66, - "grad_norm": 0.24790100570896947, - "learning_rate": 5.490464229500969e-06, - "loss": 0.2388, + "epoch": 0.53, + "grad_norm": 0.5041318621952977, + "learning_rate": 9.602830059472371e-06, + "loss": 0.2413, "step": 11478 }, { - "epoch": 0.66, - "grad_norm": 0.26144460223892885, - "learning_rate": 5.488803354878587e-06, - "loss": 0.2021, + "epoch": 0.53, + "grad_norm": 0.5517624589329848, + "learning_rate": 9.601343318662956e-06, + "loss": 0.3919, "step": 11479 }, { - "epoch": 0.66, - "grad_norm": 0.5581526805294116, - "learning_rate": 5.487142636478749e-06, - "loss": 0.3137, + "epoch": 0.53, + "grad_norm": 0.23437734206630984, + "learning_rate": 9.59985658667942e-06, + "loss": 0.1855, "step": 11480 }, { - "epoch": 0.66, - "grad_norm": 0.46154960985058663, - "learning_rate": 5.485482074358968e-06, - "loss": 0.3153, + "epoch": 0.53, + "grad_norm": 0.4424236742549651, + "learning_rate": 9.598369863554674e-06, + "loss": 0.2494, "step": 11481 }, { - "epoch": 0.66, - "grad_norm": 0.34216719846377985, - "learning_rate": 5.48382166857675e-06, - "loss": 0.2507, + "epoch": 0.53, + "grad_norm": 0.4978427178608124, + "learning_rate": 9.59688314932163e-06, + "loss": 0.3735, "step": 11482 }, { - "epoch": 0.66, - "grad_norm": 0.5615865278512538, - "learning_rate": 5.482161419189591e-06, - "loss": 0.2432, + "epoch": 0.53, + "grad_norm": 0.40522052234231914, + "learning_rate": 9.595396444013206e-06, + "loss": 0.3219, "step": 11483 }, { - "epoch": 0.66, - "grad_norm": 0.23731768796120098, - "learning_rate": 5.4805013262549885e-06, - "loss": 0.1988, + "epoch": 0.53, + "grad_norm": 0.32504473580746174, + "learning_rate": 9.59390974766231e-06, + "loss": 0.1787, "step": 11484 }, { - "epoch": 0.66, - "grad_norm": 1.286312030117045, - "learning_rate": 5.478841389830427e-06, - "loss": 0.4339, + "epoch": 0.53, + "grad_norm": 0.5151314633756906, + "learning_rate": 9.592423060301866e-06, + "loss": 0.3633, "step": 11485 }, { - "epoch": 0.66, - "grad_norm": 0.30921205394689105, - "learning_rate": 5.477181609973399e-06, - "loss": 0.2737, + "epoch": 0.53, + "grad_norm": 0.28335899724641594, + "learning_rate": 9.590936381964781e-06, + "loss": 0.2114, "step": 11486 }, { - "epoch": 0.66, - "grad_norm": 0.38646319079933794, - "learning_rate": 5.475521986741377e-06, - "loss": 0.2982, + "epoch": 0.53, + "grad_norm": 0.3576648760206836, + "learning_rate": 9.58944971268397e-06, + "loss": 0.233, "step": 11487 }, { - "epoch": 0.66, - "grad_norm": 1.0393640176508394, - "learning_rate": 5.4738625201918324e-06, - "loss": 0.5382, + "epoch": 0.53, + "grad_norm": 0.36092478501131753, + "learning_rate": 9.587963052492344e-06, + "loss": 0.3166, "step": 11488 }, { - "epoch": 0.66, - "grad_norm": 0.2587507359924883, - "learning_rate": 5.472203210382231e-06, - "loss": 0.0732, + "epoch": 0.53, + "grad_norm": 1.285784762132118, + "learning_rate": 9.586476401422822e-06, + "loss": 0.8697, "step": 11489 }, { - "epoch": 0.66, - "grad_norm": 0.25179914104749707, - "learning_rate": 5.470544057370042e-06, - "loss": 0.2673, + "epoch": 0.53, + "grad_norm": 0.3271433148304856, + "learning_rate": 9.584989759508313e-06, + "loss": 0.2034, "step": 11490 }, { - "epoch": 0.66, - "grad_norm": 0.26847000724967895, - "learning_rate": 5.468885061212716e-06, - "loss": 0.1861, + "epoch": 0.53, + "grad_norm": 1.165163853497228, + "learning_rate": 9.583503126781734e-06, + "loss": 0.572, "step": 11491 }, { - "epoch": 0.66, - "grad_norm": 0.33831228269888364, - "learning_rate": 5.467226221967707e-06, - "loss": 0.2408, + "epoch": 0.53, + "grad_norm": 0.3473864364153112, + "learning_rate": 9.582016503275989e-06, + "loss": 0.3194, "step": 11492 }, { - "epoch": 0.66, - "grad_norm": 0.49982395528851653, - "learning_rate": 5.465567539692455e-06, - "loss": 0.3434, + "epoch": 0.53, + "grad_norm": 0.35292453358758263, + "learning_rate": 9.580529889023998e-06, + "loss": 0.2684, "step": 11493 }, { - "epoch": 0.66, - "grad_norm": 0.3667710672250758, - "learning_rate": 5.463909014444409e-06, - "loss": 0.316, + "epoch": 0.53, + "grad_norm": 0.22612845669227344, + "learning_rate": 9.579043284058672e-06, + "loss": 0.1339, "step": 11494 }, { - "epoch": 0.66, - "grad_norm": 0.5919667616125639, - "learning_rate": 5.462250646280997e-06, - "loss": 0.174, + "epoch": 0.53, + "grad_norm": 0.35773299212606535, + "learning_rate": 9.577556688412922e-06, + "loss": 0.3267, "step": 11495 }, { - "epoch": 0.66, - "grad_norm": 0.2919001787559771, - "learning_rate": 5.460592435259651e-06, - "loss": 0.2295, + "epoch": 0.53, + "grad_norm": 0.9268158973363738, + "learning_rate": 9.576070102119657e-06, + "loss": 0.4907, "step": 11496 }, { - "epoch": 0.66, - "grad_norm": 0.45198607078143405, - "learning_rate": 5.458934381437793e-06, - "loss": 0.3138, + "epoch": 0.53, + "grad_norm": 0.3782578944241578, + "learning_rate": 9.574583525211795e-06, + "loss": 0.2062, "step": 11497 }, { - "epoch": 0.66, - "grad_norm": 0.29738566743482925, - "learning_rate": 5.457276484872839e-06, - "loss": 0.2406, + "epoch": 0.53, + "grad_norm": 0.676121135766827, + "learning_rate": 9.573096957722243e-06, + "loss": 0.3766, "step": 11498 }, { - "epoch": 0.66, - "grad_norm": 0.34804859280565137, - "learning_rate": 5.455618745622209e-06, - "loss": 0.2933, + "epoch": 0.53, + "grad_norm": 0.41218950110309965, + "learning_rate": 9.571610399683911e-06, + "loss": 0.271, "step": 11499 }, { - "epoch": 0.66, - "grad_norm": 1.1076657921418147, - "learning_rate": 5.453961163743304e-06, - "loss": 0.6148, + "epoch": 0.53, + "grad_norm": 0.22735672429179699, + "learning_rate": 9.570123851129715e-06, + "loss": 0.1822, "step": 11500 }, { - "epoch": 0.66, - "grad_norm": 0.7959871319468619, - "learning_rate": 5.452303739293532e-06, - "loss": 0.3982, + "epoch": 0.53, + "grad_norm": 1.3586771869746004, + "learning_rate": 9.568637312092555e-06, + "loss": 0.7776, "step": 11501 }, { - "epoch": 0.66, - "grad_norm": 0.2500213796943707, - "learning_rate": 5.4506464723302784e-06, - "loss": 0.2233, + "epoch": 0.53, + "grad_norm": 0.590202482734045, + "learning_rate": 9.567150782605358e-06, + "loss": 0.3229, "step": 11502 }, { - "epoch": 0.66, - "grad_norm": 0.39779781556021504, - "learning_rate": 5.448989362910949e-06, - "loss": 0.2594, + "epoch": 0.53, + "grad_norm": 0.30884804634170515, + "learning_rate": 9.565664262701023e-06, + "loss": 0.2464, "step": 11503 }, { - "epoch": 0.66, - "grad_norm": 0.5295136834773769, - "learning_rate": 5.447332411092921e-06, - "loss": 0.3786, + "epoch": 0.53, + "grad_norm": 0.7531336961030599, + "learning_rate": 9.564177752412463e-06, + "loss": 0.3844, "step": 11504 }, { - "epoch": 0.66, - "grad_norm": 0.23936075071175678, - "learning_rate": 5.445675616933576e-06, - "loss": 0.1819, + "epoch": 0.53, + "grad_norm": 0.689126686314734, + "learning_rate": 9.562691251772583e-06, + "loss": 0.3107, "step": 11505 }, { - "epoch": 0.66, - "grad_norm": 0.34567033680043985, - "learning_rate": 5.444018980490284e-06, - "loss": 0.2925, + "epoch": 0.53, + "grad_norm": 0.24896524377844456, + "learning_rate": 9.5612047608143e-06, + "loss": 0.2016, "step": 11506 }, { - "epoch": 0.66, - "grad_norm": 0.7141315652138195, - "learning_rate": 5.4423625018204226e-06, - "loss": 0.3823, + "epoch": 0.53, + "grad_norm": 0.35394589055156567, + "learning_rate": 9.55971827957052e-06, + "loss": 0.2626, "step": 11507 }, { - "epoch": 0.66, - "grad_norm": 0.32578121816347494, - "learning_rate": 5.440706180981352e-06, - "loss": 0.2104, + "epoch": 0.53, + "grad_norm": 0.3955376312495729, + "learning_rate": 9.558231808074157e-06, + "loss": 0.2767, "step": 11508 }, { - "epoch": 0.66, - "grad_norm": 0.633359243897076, - "learning_rate": 5.439050018030432e-06, - "loss": 0.368, + "epoch": 0.53, + "grad_norm": 0.5132952207694231, + "learning_rate": 9.55674534635811e-06, + "loss": 0.3359, "step": 11509 }, { - "epoch": 0.66, - "grad_norm": 0.3036594850395294, - "learning_rate": 5.437394013025012e-06, - "loss": 0.2951, + "epoch": 0.53, + "grad_norm": 0.9645366780122393, + "learning_rate": 9.555258894455298e-06, + "loss": 0.3421, "step": 11510 }, { - "epoch": 0.66, - "grad_norm": 0.3218116260567253, - "learning_rate": 5.435738166022437e-06, - "loss": 0.2107, + "epoch": 0.53, + "grad_norm": 0.31286417768425295, + "learning_rate": 9.553772452398625e-06, + "loss": 0.2619, "step": 11511 }, { - "epoch": 0.66, - "grad_norm": 0.27971976076190586, - "learning_rate": 5.434082477080058e-06, - "loss": 0.1691, + "epoch": 0.53, + "grad_norm": 0.5820607574168946, + "learning_rate": 9.552286020221e-06, + "loss": 0.351, "step": 11512 }, { - "epoch": 0.66, - "grad_norm": 0.37628575012172416, - "learning_rate": 5.432426946255206e-06, - "loss": 0.2813, + "epoch": 0.53, + "grad_norm": 0.21536951924965123, + "learning_rate": 9.55079959795533e-06, + "loss": 0.1352, "step": 11513 }, { - "epoch": 0.66, - "grad_norm": 0.29629151440877577, - "learning_rate": 5.4307715736052125e-06, - "loss": 0.2711, + "epoch": 0.53, + "grad_norm": 0.8762832582216499, + "learning_rate": 9.549313185634523e-06, + "loss": 0.3782, "step": 11514 }, { - "epoch": 0.66, - "grad_norm": 0.7761394695030567, - "learning_rate": 5.429116359187403e-06, - "loss": 0.3176, + "epoch": 0.53, + "grad_norm": 0.31732354238067395, + "learning_rate": 9.54782678329149e-06, + "loss": 0.2645, "step": 11515 }, { - "epoch": 0.66, - "grad_norm": 1.1914877581468113, - "learning_rate": 5.427461303059096e-06, - "loss": 0.8457, + "epoch": 0.53, + "grad_norm": 0.5904115097706445, + "learning_rate": 9.546340390959134e-06, + "loss": 0.2894, "step": 11516 }, { - "epoch": 0.66, - "grad_norm": 0.3173092424468873, - "learning_rate": 5.425806405277609e-06, - "loss": 0.2569, + "epoch": 0.53, + "grad_norm": 0.8290513744586341, + "learning_rate": 9.544854008670366e-06, + "loss": 0.3992, "step": 11517 }, { - "epoch": 0.66, - "grad_norm": 0.24185940664168293, - "learning_rate": 5.424151665900246e-06, - "loss": 0.1908, + "epoch": 0.53, + "grad_norm": 0.30181430877360416, + "learning_rate": 9.543367636458089e-06, + "loss": 0.227, "step": 11518 }, { - "epoch": 0.66, - "grad_norm": 1.2671168707668683, - "learning_rate": 5.422497084984317e-06, - "loss": 0.5888, + "epoch": 0.53, + "grad_norm": 0.4059646242566253, + "learning_rate": 9.541881274355214e-06, + "loss": 0.3388, "step": 11519 }, { - "epoch": 0.66, - "grad_norm": 0.3204691857610319, - "learning_rate": 5.420842662587118e-06, - "loss": 0.2625, + "epoch": 0.53, + "grad_norm": 0.2561918372961943, + "learning_rate": 9.54039492239465e-06, + "loss": 0.1189, "step": 11520 }, { - "epoch": 0.66, - "grad_norm": 0.837128435683929, - "learning_rate": 5.41918839876594e-06, - "loss": 0.3109, + "epoch": 0.53, + "grad_norm": 0.4021804568327228, + "learning_rate": 9.538908580609296e-06, + "loss": 0.3159, "step": 11521 }, { - "epoch": 0.66, - "grad_norm": 0.28996702322416396, - "learning_rate": 5.41753429357807e-06, - "loss": 0.2688, + "epoch": 0.53, + "grad_norm": 0.9089202504352452, + "learning_rate": 9.537422249032059e-06, + "loss": 0.475, "step": 11522 }, { - "epoch": 0.66, - "grad_norm": 0.3468731094237857, - "learning_rate": 5.4158803470807875e-06, - "loss": 0.2796, + "epoch": 0.53, + "grad_norm": 0.2974682866304729, + "learning_rate": 9.53593592769585e-06, + "loss": 0.2212, "step": 11523 }, { - "epoch": 0.66, - "grad_norm": 0.25427661796028395, - "learning_rate": 5.414226559331375e-06, - "loss": 0.1582, + "epoch": 0.53, + "grad_norm": 0.42981547972156187, + "learning_rate": 9.534449616633574e-06, + "loss": 0.3246, "step": 11524 }, { - "epoch": 0.66, - "grad_norm": 0.28924380253126614, - "learning_rate": 5.4125729303871e-06, - "loss": 0.229, + "epoch": 0.53, + "grad_norm": 1.283905459616831, + "learning_rate": 9.532963315878133e-06, + "loss": 0.7906, "step": 11525 }, { - "epoch": 0.66, - "grad_norm": 0.371977089760644, - "learning_rate": 5.410919460305226e-06, - "loss": 0.2852, + "epoch": 0.53, + "grad_norm": 0.3347864899338461, + "learning_rate": 9.531477025462433e-06, + "loss": 0.1852, "step": 11526 }, { - "epoch": 0.66, - "grad_norm": 0.7166682064653169, - "learning_rate": 5.409266149143011e-06, - "loss": 0.4062, + "epoch": 0.53, + "grad_norm": 0.34079882891682317, + "learning_rate": 9.529990745419381e-06, + "loss": 0.2821, "step": 11527 }, { - "epoch": 0.66, - "grad_norm": 0.5786444654582379, - "learning_rate": 5.407612996957716e-06, - "loss": 0.3038, + "epoch": 0.53, + "grad_norm": 0.34799159191397055, + "learning_rate": 9.528504475781882e-06, + "loss": 0.2716, "step": 11528 }, { - "epoch": 0.66, - "grad_norm": 0.34804236614724643, - "learning_rate": 5.405960003806585e-06, - "loss": 0.2652, + "epoch": 0.53, + "grad_norm": 0.3997992685807397, + "learning_rate": 9.52701821658284e-06, + "loss": 0.1911, "step": 11529 }, { - "epoch": 0.66, - "grad_norm": 0.22885662878947813, - "learning_rate": 5.4043071697468604e-06, - "loss": 0.2186, + "epoch": 0.53, + "grad_norm": 0.6219409621018265, + "learning_rate": 9.525531967855159e-06, + "loss": 0.3631, "step": 11530 }, { - "epoch": 0.66, - "grad_norm": 0.6317423866420934, - "learning_rate": 5.4026544948357795e-06, - "loss": 0.2647, + "epoch": 0.53, + "grad_norm": 0.39933345292519146, + "learning_rate": 9.524045729631738e-06, + "loss": 0.3516, "step": 11531 }, { - "epoch": 0.66, - "grad_norm": 0.3832186602134028, - "learning_rate": 5.401001979130578e-06, - "loss": 0.273, + "epoch": 0.53, + "grad_norm": 1.1166430840848653, + "learning_rate": 9.52255950194549e-06, + "loss": 0.3612, "step": 11532 }, { - "epoch": 0.66, - "grad_norm": 0.46543784414296346, - "learning_rate": 5.399349622688479e-06, - "loss": 0.3423, + "epoch": 0.53, + "grad_norm": 0.30316440093048463, + "learning_rate": 9.521073284829315e-06, + "loss": 0.2382, "step": 11533 }, { - "epoch": 0.66, - "grad_norm": 0.444348249465614, - "learning_rate": 5.397697425566707e-06, - "loss": 0.2682, + "epoch": 0.53, + "grad_norm": 0.2512861874036437, + "learning_rate": 9.519587078316115e-06, + "loss": 0.1424, "step": 11534 }, { - "epoch": 0.66, - "grad_norm": 0.32841907606679893, - "learning_rate": 5.396045387822474e-06, - "loss": 0.2644, + "epoch": 0.53, + "grad_norm": 0.3804224115024147, + "learning_rate": 9.51810088243879e-06, + "loss": 0.308, "step": 11535 }, { - "epoch": 0.66, - "grad_norm": 0.29513111111681184, - "learning_rate": 5.394393509512987e-06, - "loss": 0.1942, + "epoch": 0.53, + "grad_norm": 0.34004487062489147, + "learning_rate": 9.516614697230252e-06, + "loss": 0.2052, "step": 11536 }, { - "epoch": 0.66, - "grad_norm": 0.4622638865061429, - "learning_rate": 5.392741790695459e-06, - "loss": 0.346, + "epoch": 0.53, + "grad_norm": 0.9817272443613348, + "learning_rate": 9.515128522723398e-06, + "loss": 0.5686, "step": 11537 }, { - "epoch": 0.66, - "grad_norm": 0.2652974325826972, - "learning_rate": 5.391090231427086e-06, - "loss": 0.2254, + "epoch": 0.53, + "grad_norm": 0.4493710073024638, + "learning_rate": 9.513642358951133e-06, + "loss": 0.3506, "step": 11538 }, { - "epoch": 0.66, - "grad_norm": 0.8429674842171873, - "learning_rate": 5.389438831765059e-06, - "loss": 0.4517, + "epoch": 0.53, + "grad_norm": 0.2845727005865016, + "learning_rate": 9.51215620594635e-06, + "loss": 0.2114, "step": 11539 }, { - "epoch": 0.66, - "grad_norm": 1.2820194430463543, - "learning_rate": 5.387787591766562e-06, - "loss": 0.8453, + "epoch": 0.53, + "grad_norm": 0.27225981121147663, + "learning_rate": 9.510670063741965e-06, + "loss": 0.1699, "step": 11540 }, { - "epoch": 0.66, - "grad_norm": 0.25818239711603863, - "learning_rate": 5.386136511488789e-06, - "loss": 0.211, + "epoch": 0.53, + "grad_norm": 0.65804836733012, + "learning_rate": 9.509183932370872e-06, + "loss": 0.4474, "step": 11541 }, { - "epoch": 0.66, - "grad_norm": 0.4809465676464306, - "learning_rate": 5.384485590988908e-06, - "loss": 0.3567, + "epoch": 0.53, + "grad_norm": 0.3542132514944083, + "learning_rate": 9.507697811865975e-06, + "loss": 0.2488, "step": 11542 }, { - "epoch": 0.66, - "grad_norm": 0.5760262213588997, - "learning_rate": 5.382834830324093e-06, - "loss": 0.2892, + "epoch": 0.53, + "grad_norm": 0.40180888189751734, + "learning_rate": 9.506211702260172e-06, + "loss": 0.3024, "step": 11543 }, { - "epoch": 0.66, - "grad_norm": 0.2266235624622428, - "learning_rate": 5.381184229551506e-06, - "loss": 0.1549, + "epoch": 0.53, + "grad_norm": 0.8475062103142702, + "learning_rate": 9.504725603586365e-06, + "loss": 0.3678, "step": 11544 }, { - "epoch": 0.66, - "grad_norm": 0.5227017467399918, - "learning_rate": 5.379533788728313e-06, - "loss": 0.3697, + "epoch": 0.53, + "grad_norm": 0.38515022982294955, + "learning_rate": 9.503239515877457e-06, + "loss": 0.2637, "step": 11545 }, { - "epoch": 0.66, - "grad_norm": 0.48189898826149535, - "learning_rate": 5.377883507911668e-06, - "loss": 0.4144, + "epoch": 0.53, + "grad_norm": 0.2119658776146596, + "learning_rate": 9.501753439166348e-06, + "loss": 0.0901, "step": 11546 }, { - "epoch": 0.66, - "grad_norm": 0.2966848658482328, - "learning_rate": 5.376233387158722e-06, - "loss": 0.1897, + "epoch": 0.53, + "grad_norm": 0.36360739451863777, + "learning_rate": 9.500267373485938e-06, + "loss": 0.2743, "step": 11547 }, { - "epoch": 0.66, - "grad_norm": 0.6944979318698845, - "learning_rate": 5.3745834265266054e-06, - "loss": 0.4308, + "epoch": 0.53, + "grad_norm": 0.3693671248971325, + "learning_rate": 9.498781318869123e-06, + "loss": 0.298, "step": 11548 }, { - "epoch": 0.66, - "grad_norm": 0.2628428496235638, - "learning_rate": 5.372933626072472e-06, - "loss": 0.2565, + "epoch": 0.53, + "grad_norm": 0.8571189591953227, + "learning_rate": 9.497295275348811e-06, + "loss": 0.3751, "step": 11549 }, { - "epoch": 0.66, - "grad_norm": 0.3982433001251939, - "learning_rate": 5.371283985853446e-06, - "loss": 0.2565, + "epoch": 0.53, + "grad_norm": 0.45386829398684075, + "learning_rate": 9.495809242957897e-06, + "loss": 0.3567, "step": 11550 }, { - "epoch": 0.66, - "grad_norm": 0.31675267485780206, - "learning_rate": 5.369634505926658e-06, - "loss": 0.1933, + "epoch": 0.53, + "grad_norm": 0.31929915068615444, + "learning_rate": 9.494323221729278e-06, + "loss": 0.2912, "step": 11551 }, { - "epoch": 0.66, - "grad_norm": 1.1224053933698808, - "learning_rate": 5.367985186349223e-06, - "loss": 0.7319, + "epoch": 0.53, + "grad_norm": 0.21952264411421285, + "learning_rate": 9.492837211695852e-06, + "loss": 0.0995, "step": 11552 }, { - "epoch": 0.66, - "grad_norm": 0.3152515760305445, - "learning_rate": 5.3663360271782675e-06, - "loss": 0.2678, + "epoch": 0.53, + "grad_norm": 1.2729390185636968, + "learning_rate": 9.491351212890528e-06, + "loss": 0.6984, "step": 11553 }, { - "epoch": 0.66, - "grad_norm": 0.3356411560582424, - "learning_rate": 5.364687028470894e-06, - "loss": 0.2476, + "epoch": 0.53, + "grad_norm": 0.3377661202367108, + "learning_rate": 9.489865225346195e-06, + "loss": 0.2711, "step": 11554 }, { - "epoch": 0.66, - "grad_norm": 0.6719810096510042, - "learning_rate": 5.363038190284211e-06, - "loss": 0.4357, + "epoch": 0.53, + "grad_norm": 0.4662325011503825, + "learning_rate": 9.488379249095755e-06, + "loss": 0.2979, "step": 11555 }, { - "epoch": 0.66, - "grad_norm": 0.2516818101597573, - "learning_rate": 5.36138951267531e-06, - "loss": 0.2007, + "epoch": 0.53, + "grad_norm": 0.8504686812067698, + "learning_rate": 9.486893284172103e-06, + "loss": 0.5068, "step": 11556 }, { - "epoch": 0.66, - "grad_norm": 0.2570646063899158, - "learning_rate": 5.359740995701297e-06, - "loss": 0.1896, + "epoch": 0.53, + "grad_norm": 0.35756610182445453, + "learning_rate": 9.485407330608142e-06, + "loss": 0.2743, "step": 11557 }, { - "epoch": 0.66, - "grad_norm": 1.0684404877013456, - "learning_rate": 5.358092639419252e-06, - "loss": 0.7644, + "epoch": 0.53, + "grad_norm": 0.3112988437634165, + "learning_rate": 9.483921388436767e-06, + "loss": 0.179, "step": 11558 }, { - "epoch": 0.66, - "grad_norm": 0.313824020741263, - "learning_rate": 5.356444443886262e-06, - "loss": 0.2709, + "epoch": 0.53, + "grad_norm": 0.35275450669082886, + "learning_rate": 9.482435457690873e-06, + "loss": 0.2561, "step": 11559 }, { - "epoch": 0.66, - "grad_norm": 0.7877441786641255, - "learning_rate": 5.3547964091593955e-06, - "loss": 0.3024, + "epoch": 0.53, + "grad_norm": 0.3348128247596534, + "learning_rate": 9.480949538403362e-06, + "loss": 0.2738, "step": 11560 }, { - "epoch": 0.66, - "grad_norm": 0.3567833071601899, - "learning_rate": 5.353148535295733e-06, - "loss": 0.3206, + "epoch": 0.53, + "grad_norm": 0.8063144239181748, + "learning_rate": 9.479463630607124e-06, + "loss": 0.4397, "step": 11561 }, { - "epoch": 0.66, - "grad_norm": 0.24311132372342875, - "learning_rate": 5.351500822352338e-06, - "loss": 0.1998, + "epoch": 0.53, + "grad_norm": 0.4395064645676462, + "learning_rate": 9.477977734335061e-06, + "loss": 0.2702, "step": 11562 }, { - "epoch": 0.66, - "grad_norm": 0.4527933293358373, - "learning_rate": 5.3498532703862685e-06, - "loss": 0.2622, + "epoch": 0.53, + "grad_norm": 0.41951147537300915, + "learning_rate": 9.47649184962007e-06, + "loss": 0.3008, "step": 11563 }, { - "epoch": 0.66, - "grad_norm": 0.4517449777092792, - "learning_rate": 5.34820587945458e-06, - "loss": 0.2963, + "epoch": 0.53, + "grad_norm": 0.3483407211210602, + "learning_rate": 9.475005976495044e-06, + "loss": 0.269, "step": 11564 }, { - "epoch": 0.66, - "grad_norm": 0.4205026001874601, - "learning_rate": 5.34655864961432e-06, - "loss": 0.2904, + "epoch": 0.53, + "grad_norm": 0.32277374727350777, + "learning_rate": 9.473520114992876e-06, + "loss": 0.1725, "step": 11565 }, { - "epoch": 0.66, - "grad_norm": 0.5018741672324546, - "learning_rate": 5.344911580922536e-06, - "loss": 0.3114, + "epoch": 0.53, + "grad_norm": 0.4954189472973688, + "learning_rate": 9.472034265146467e-06, + "loss": 0.2993, "step": 11566 }, { - "epoch": 0.66, - "grad_norm": 0.3871254278340785, - "learning_rate": 5.343264673436264e-06, - "loss": 0.1549, + "epoch": 0.53, + "grad_norm": 0.340603564636974, + "learning_rate": 9.470548426988716e-06, + "loss": 0.3096, "step": 11567 }, { - "epoch": 0.66, - "grad_norm": 0.392704311459362, - "learning_rate": 5.341617927212537e-06, - "loss": 0.2629, + "epoch": 0.53, + "grad_norm": 1.0560843765898078, + "learning_rate": 9.469062600552509e-06, + "loss": 0.4219, "step": 11568 }, { - "epoch": 0.66, - "grad_norm": 0.2654887678301144, - "learning_rate": 5.339971342308377e-06, - "loss": 0.2367, + "epoch": 0.53, + "grad_norm": 0.3683445869916497, + "learning_rate": 9.46757678587074e-06, + "loss": 0.2813, "step": 11569 }, { - "epoch": 0.66, - "grad_norm": 0.9922960293520513, - "learning_rate": 5.33832491878081e-06, - "loss": 0.5467, + "epoch": 0.53, + "grad_norm": 0.23680972300463468, + "learning_rate": 9.466090982976311e-06, + "loss": 0.186, "step": 11570 }, { - "epoch": 0.66, - "grad_norm": 0.41226520432516683, - "learning_rate": 5.3366786566868545e-06, - "loss": 0.2878, + "epoch": 0.53, + "grad_norm": 0.4975384315526279, + "learning_rate": 9.464605191902114e-06, + "loss": 0.3663, "step": 11571 }, { - "epoch": 0.66, - "grad_norm": 0.4855856005335587, - "learning_rate": 5.335032556083515e-06, - "loss": 0.3625, + "epoch": 0.53, + "grad_norm": 0.35518988058665824, + "learning_rate": 9.463119412681041e-06, + "loss": 0.2283, "step": 11572 }, { - "epoch": 0.66, - "grad_norm": 0.37447574962277336, - "learning_rate": 5.333386617027793e-06, - "loss": 0.2506, + "epoch": 0.53, + "grad_norm": 0.7382612936696473, + "learning_rate": 9.46163364534599e-06, + "loss": 0.4927, "step": 11573 }, { - "epoch": 0.66, - "grad_norm": 0.32366398711854405, - "learning_rate": 5.331740839576697e-06, - "loss": 0.2546, + "epoch": 0.53, + "grad_norm": 0.526748975236103, + "learning_rate": 9.460147889929845e-06, + "loss": 0.3644, "step": 11574 }, { - "epoch": 0.67, - "grad_norm": 0.2625217824484109, - "learning_rate": 5.330095223787214e-06, - "loss": 0.1951, + "epoch": 0.53, + "grad_norm": 0.28599848534733724, + "learning_rate": 9.45866214646551e-06, + "loss": 0.2204, "step": 11575 }, { - "epoch": 0.67, - "grad_norm": 0.779074955761385, - "learning_rate": 5.3284497697163325e-06, - "loss": 0.5442, + "epoch": 0.53, + "grad_norm": 1.3775656086321737, + "learning_rate": 9.457176414985872e-06, + "loss": 0.7204, "step": 11576 }, { - "epoch": 0.67, - "grad_norm": 0.27009579975714293, - "learning_rate": 5.326804477421035e-06, - "loss": 0.2266, + "epoch": 0.53, + "grad_norm": 0.34335188546405027, + "learning_rate": 9.455690695523826e-06, + "loss": 0.2519, "step": 11577 }, { - "epoch": 0.67, - "grad_norm": 0.42870519581357974, - "learning_rate": 5.325159346958293e-06, - "loss": 0.3498, + "epoch": 0.53, + "grad_norm": 0.28402440275357393, + "learning_rate": 9.45420498811226e-06, + "loss": 0.2217, "step": 11578 }, { - "epoch": 0.67, - "grad_norm": 1.0770530022845466, - "learning_rate": 5.323514378385086e-06, - "loss": 0.5653, + "epoch": 0.53, + "grad_norm": 0.5638977114741563, + "learning_rate": 9.452719292784074e-06, + "loss": 0.3835, "step": 11579 }, { - "epoch": 0.67, - "grad_norm": 0.25141483196515735, - "learning_rate": 5.321869571758375e-06, - "loss": 0.1638, + "epoch": 0.53, + "grad_norm": 1.0865335337319042, + "learning_rate": 9.451233609572153e-06, + "loss": 0.6611, "step": 11580 }, { - "epoch": 0.67, - "grad_norm": 0.31187570399237746, - "learning_rate": 5.32022492713512e-06, - "loss": 0.2445, + "epoch": 0.53, + "grad_norm": 0.3090676807047347, + "learning_rate": 9.449747938509392e-06, + "loss": 0.1635, "step": 11581 }, { - "epoch": 0.67, - "grad_norm": 0.7460901109990338, - "learning_rate": 5.318580444572276e-06, - "loss": 0.4426, + "epoch": 0.53, + "grad_norm": 0.2941631381155313, + "learning_rate": 9.448262279628678e-06, + "loss": 0.2542, "step": 11582 }, { - "epoch": 0.67, - "grad_norm": 0.43048688929882034, - "learning_rate": 5.316936124126788e-06, - "loss": 0.2262, + "epoch": 0.53, + "grad_norm": 0.38016154823497406, + "learning_rate": 9.446776632962909e-06, + "loss": 0.2909, "step": 11583 }, { - "epoch": 0.67, - "grad_norm": 0.40015207220498034, - "learning_rate": 5.3152919658556e-06, - "loss": 0.3021, + "epoch": 0.53, + "grad_norm": 0.41781451795973, + "learning_rate": 9.44529099854497e-06, + "loss": 0.2856, "step": 11584 }, { - "epoch": 0.67, - "grad_norm": 0.3664291203534989, - "learning_rate": 5.313647969815647e-06, - "loss": 0.2887, + "epoch": 0.53, + "grad_norm": 0.45518012458921936, + "learning_rate": 9.443805376407758e-06, + "loss": 0.2907, "step": 11585 }, { - "epoch": 0.67, - "grad_norm": 0.351670785679463, - "learning_rate": 5.312004136063866e-06, - "loss": 0.184, + "epoch": 0.53, + "grad_norm": 0.4009928071656808, + "learning_rate": 9.442319766584153e-06, + "loss": 0.3456, "step": 11586 }, { - "epoch": 0.67, - "grad_norm": 0.31282841073476086, - "learning_rate": 5.310360464657183e-06, - "loss": 0.2668, + "epoch": 0.53, + "grad_norm": 0.47711975918400323, + "learning_rate": 9.440834169107054e-06, + "loss": 0.3089, "step": 11587 }, { - "epoch": 0.67, - "grad_norm": 0.9626155482266147, - "learning_rate": 5.308716955652513e-06, - "loss": 0.4174, + "epoch": 0.53, + "grad_norm": 0.5121742086899269, + "learning_rate": 9.439348584009347e-06, + "loss": 0.267, "step": 11588 }, { - "epoch": 0.67, - "grad_norm": 0.2985410671209383, - "learning_rate": 5.3070736091067734e-06, - "loss": 0.267, + "epoch": 0.53, + "grad_norm": 0.6531649277935633, + "learning_rate": 9.437863011323923e-06, + "loss": 0.3848, "step": 11589 }, { - "epoch": 0.67, - "grad_norm": 0.30113331654362685, - "learning_rate": 5.30543042507687e-06, - "loss": 0.2067, + "epoch": 0.53, + "grad_norm": 0.24067517557129425, + "learning_rate": 9.43637745108367e-06, + "loss": 0.1725, "step": 11590 }, { - "epoch": 0.67, - "grad_norm": 1.0546116846708513, - "learning_rate": 5.303787403619711e-06, - "loss": 0.4606, + "epoch": 0.53, + "grad_norm": 0.31685247756225854, + "learning_rate": 9.434891903321475e-06, + "loss": 0.2724, "step": 11591 }, { - "epoch": 0.67, - "grad_norm": 0.3379662424665475, - "learning_rate": 5.302144544792194e-06, - "loss": 0.2209, + "epoch": 0.53, + "grad_norm": 1.5858732160329463, + "learning_rate": 9.43340636807023e-06, + "loss": 0.6234, "step": 11592 }, { - "epoch": 0.67, - "grad_norm": 0.2875890580534507, - "learning_rate": 5.300501848651209e-06, - "loss": 0.244, + "epoch": 0.53, + "grad_norm": 0.38199825809520005, + "learning_rate": 9.431920845362822e-06, + "loss": 0.2611, "step": 11593 }, { - "epoch": 0.67, - "grad_norm": 1.0134444255063182, - "learning_rate": 5.298859315253639e-06, - "loss": 0.3835, + "epoch": 0.53, + "grad_norm": 0.507419548204257, + "learning_rate": 9.43043533523214e-06, + "loss": 0.2867, "step": 11594 }, { - "epoch": 0.67, - "grad_norm": 0.41250760331534936, - "learning_rate": 5.297216944656371e-06, - "loss": 0.3064, + "epoch": 0.53, + "grad_norm": 0.4611194863326474, + "learning_rate": 9.428949837711068e-06, + "loss": 0.364, "step": 11595 }, { - "epoch": 0.67, - "grad_norm": 0.30618938735708207, - "learning_rate": 5.29557473691628e-06, - "loss": 0.1658, + "epoch": 0.53, + "grad_norm": 0.3771586328895342, + "learning_rate": 9.427464352832498e-06, + "loss": 0.2779, "step": 11596 }, { - "epoch": 0.67, - "grad_norm": 0.3628548944099711, - "learning_rate": 5.293932692090233e-06, - "loss": 0.3127, + "epoch": 0.53, + "grad_norm": 0.46763979781345666, + "learning_rate": 9.425978880629315e-06, + "loss": 0.2696, "step": 11597 }, { - "epoch": 0.67, - "grad_norm": 0.3740872155732483, - "learning_rate": 5.292290810235092e-06, - "loss": 0.2613, + "epoch": 0.53, + "grad_norm": 0.26537678001470155, + "learning_rate": 9.424493421134407e-06, + "loss": 0.2059, "step": 11598 }, { - "epoch": 0.67, - "grad_norm": 0.5642476827593211, - "learning_rate": 5.29064909140772e-06, - "loss": 0.2667, + "epoch": 0.53, + "grad_norm": 0.4549239375220082, + "learning_rate": 9.423007974380656e-06, + "loss": 0.269, "step": 11599 }, { - "epoch": 0.67, - "grad_norm": 0.5899163594087162, - "learning_rate": 5.289007535664967e-06, - "loss": 0.3414, + "epoch": 0.53, + "grad_norm": 0.5387294193592074, + "learning_rate": 9.421522540400955e-06, + "loss": 0.3382, "step": 11600 }, { - "epoch": 0.67, - "grad_norm": 0.40825253228739095, - "learning_rate": 5.287366143063682e-06, - "loss": 0.3243, + "epoch": 0.53, + "grad_norm": 0.4921534222947431, + "learning_rate": 9.42003711922819e-06, + "loss": 0.2348, "step": 11601 }, { - "epoch": 0.67, - "grad_norm": 0.2748846662378343, - "learning_rate": 5.285724913660704e-06, - "loss": 0.2326, + "epoch": 0.53, + "grad_norm": 0.4355996686401445, + "learning_rate": 9.418551710895243e-06, + "loss": 0.2964, "step": 11602 }, { - "epoch": 0.67, - "grad_norm": 0.43119599601784436, - "learning_rate": 5.284083847512866e-06, - "loss": 0.1601, + "epoch": 0.53, + "grad_norm": 0.353586162835902, + "learning_rate": 9.417066315435002e-06, + "loss": 0.3084, "step": 11603 }, { - "epoch": 0.67, - "grad_norm": 0.8477040777808765, - "learning_rate": 5.282442944677005e-06, - "loss": 0.4426, + "epoch": 0.53, + "grad_norm": 0.3245198321431988, + "learning_rate": 9.415580932880347e-06, + "loss": 0.1529, "step": 11604 }, { - "epoch": 0.67, - "grad_norm": 0.27256531636671766, - "learning_rate": 5.280802205209943e-06, - "loss": 0.2666, + "epoch": 0.53, + "grad_norm": 0.43866629070919116, + "learning_rate": 9.414095563264169e-06, + "loss": 0.2727, "step": 11605 }, { - "epoch": 0.67, - "grad_norm": 0.5546628272888353, - "learning_rate": 5.279161629168497e-06, - "loss": 0.3214, + "epoch": 0.53, + "grad_norm": 0.326232925175616, + "learning_rate": 9.41261020661935e-06, + "loss": 0.3093, "step": 11606 }, { - "epoch": 0.67, - "grad_norm": 0.5307811121875399, - "learning_rate": 5.2775212166094755e-06, - "loss": 0.3207, + "epoch": 0.53, + "grad_norm": 0.8985547972415693, + "learning_rate": 9.411124862978777e-06, + "loss": 0.3161, "step": 11607 }, { - "epoch": 0.67, - "grad_norm": 0.2547089293698558, - "learning_rate": 5.275880967589697e-06, - "loss": 0.2159, + "epoch": 0.53, + "grad_norm": 0.384382698777126, + "learning_rate": 9.409639532375327e-06, + "loss": 0.2723, "step": 11608 }, { - "epoch": 0.67, - "grad_norm": 0.3482597708001019, - "learning_rate": 5.274240882165958e-06, - "loss": 0.2253, + "epoch": 0.53, + "grad_norm": 0.5687459015449872, + "learning_rate": 9.408154214841894e-06, + "loss": 0.4402, "step": 11609 }, { - "epoch": 0.67, - "grad_norm": 0.5292646057639918, - "learning_rate": 5.272600960395051e-06, - "loss": 0.283, + "epoch": 0.53, + "grad_norm": 0.3075215507334488, + "learning_rate": 9.406668910411356e-06, + "loss": 0.2522, "step": 11610 }, { - "epoch": 0.67, - "grad_norm": 0.3723792760127392, - "learning_rate": 5.270961202333769e-06, - "loss": 0.292, + "epoch": 0.53, + "grad_norm": 0.3330528900470736, + "learning_rate": 9.405183619116594e-06, + "loss": 0.1898, "step": 11611 }, { - "epoch": 0.67, - "grad_norm": 0.7733747358805955, - "learning_rate": 5.2693216080388984e-06, - "loss": 0.4955, + "epoch": 0.53, + "grad_norm": 0.6521972590743307, + "learning_rate": 9.40369834099049e-06, + "loss": 0.3641, "step": 11612 }, { - "epoch": 0.67, - "grad_norm": 0.29065692377208907, - "learning_rate": 5.267682177567219e-06, - "loss": 0.2262, + "epoch": 0.53, + "grad_norm": 0.8606685124260647, + "learning_rate": 9.402213076065937e-06, + "loss": 0.4956, "step": 11613 }, { - "epoch": 0.67, - "grad_norm": 0.37946091941589666, - "learning_rate": 5.266042910975501e-06, - "loss": 0.3012, + "epoch": 0.53, + "grad_norm": 0.313928014452957, + "learning_rate": 9.400727824375809e-06, + "loss": 0.2166, "step": 11614 }, { - "epoch": 0.67, - "grad_norm": 0.28954371908158133, - "learning_rate": 5.264403808320514e-06, - "loss": 0.1924, + "epoch": 0.53, + "grad_norm": 0.5764658073112854, + "learning_rate": 9.399242585952988e-06, + "loss": 0.4243, "step": 11615 }, { - "epoch": 0.67, - "grad_norm": 0.36618728513841026, - "learning_rate": 5.26276486965902e-06, - "loss": 0.2213, + "epoch": 0.53, + "grad_norm": 0.3298526325049756, + "learning_rate": 9.397757360830353e-06, + "loss": 0.2105, "step": 11616 }, { - "epoch": 0.67, - "grad_norm": 0.3013556099336767, - "learning_rate": 5.261126095047774e-06, - "loss": 0.2854, + "epoch": 0.53, + "grad_norm": 0.5771189911617128, + "learning_rate": 9.396272149040794e-06, + "loss": 0.2352, "step": 11617 }, { - "epoch": 0.67, - "grad_norm": 0.6303827060308433, - "learning_rate": 5.259487484543528e-06, - "loss": 0.4274, + "epoch": 0.53, + "grad_norm": 0.49172257415506077, + "learning_rate": 9.394786950617188e-06, + "loss": 0.3418, "step": 11618 }, { - "epoch": 0.67, - "grad_norm": 0.3363842194928553, - "learning_rate": 5.257849038203022e-06, - "loss": 0.1905, + "epoch": 0.53, + "grad_norm": 0.5149435213148109, + "learning_rate": 9.393301765592415e-06, + "loss": 0.375, "step": 11619 }, { - "epoch": 0.67, - "grad_norm": 0.2881946093992692, - "learning_rate": 5.256210756083004e-06, - "loss": 0.2326, + "epoch": 0.53, + "grad_norm": 0.9021804416550401, + "learning_rate": 9.391816593999357e-06, + "loss": 0.4766, "step": 11620 }, { - "epoch": 0.67, - "grad_norm": 0.2732752542283715, - "learning_rate": 5.254572638240204e-06, - "loss": 0.2327, + "epoch": 0.53, + "grad_norm": 0.3268495065524992, + "learning_rate": 9.39033143587089e-06, + "loss": 0.2375, "step": 11621 }, { - "epoch": 0.67, - "grad_norm": 0.6126870326894789, - "learning_rate": 5.252934684731349e-06, - "loss": 0.2707, + "epoch": 0.53, + "grad_norm": 0.27504092355111404, + "learning_rate": 9.388846291239902e-06, + "loss": 0.2466, "step": 11622 }, { - "epoch": 0.67, - "grad_norm": 0.38004426464239194, - "learning_rate": 5.251296895613158e-06, - "loss": 0.2675, + "epoch": 0.53, + "grad_norm": 0.5137307119130171, + "learning_rate": 9.387361160139267e-06, + "loss": 0.284, "step": 11623 }, { - "epoch": 0.67, - "grad_norm": 0.3487591686090327, - "learning_rate": 5.249659270942355e-06, - "loss": 0.3091, + "epoch": 0.53, + "grad_norm": 0.31304615858142565, + "learning_rate": 9.385876042601865e-06, + "loss": 0.2163, "step": 11624 }, { - "epoch": 0.67, - "grad_norm": 0.809507380773592, - "learning_rate": 5.248021810775647e-06, - "loss": 0.4803, + "epoch": 0.53, + "grad_norm": 1.484826023617579, + "learning_rate": 9.384390938660572e-06, + "loss": 0.794, "step": 11625 }, { - "epoch": 0.67, - "grad_norm": 0.24443885048236402, - "learning_rate": 5.2463845151697404e-06, - "loss": 0.1772, + "epoch": 0.53, + "grad_norm": 0.34399974112785403, + "learning_rate": 9.382905848348274e-06, + "loss": 0.295, "step": 11626 }, { - "epoch": 0.67, - "grad_norm": 0.4368490630538348, - "learning_rate": 5.2447473841813335e-06, - "loss": 0.2457, + "epoch": 0.53, + "grad_norm": 0.4001701551471713, + "learning_rate": 9.381420771697845e-06, + "loss": 0.2766, "step": 11627 }, { - "epoch": 0.67, - "grad_norm": 0.4995512757478243, - "learning_rate": 5.243110417867117e-06, - "loss": 0.3012, + "epoch": 0.53, + "grad_norm": 0.6618474873869882, + "learning_rate": 9.379935708742164e-06, + "loss": 0.4498, "step": 11628 }, { - "epoch": 0.67, - "grad_norm": 0.29444216072671153, - "learning_rate": 5.241473616283783e-06, - "loss": 0.2354, + "epoch": 0.53, + "grad_norm": 0.504418556713321, + "learning_rate": 9.378450659514107e-06, + "loss": 0.3443, "step": 11629 }, { - "epoch": 0.67, - "grad_norm": 1.4348647745786949, - "learning_rate": 5.239836979488015e-06, - "loss": 0.7923, + "epoch": 0.53, + "grad_norm": 0.25900473644456884, + "learning_rate": 9.376965624046555e-06, + "loss": 0.2027, "step": 11630 }, { - "epoch": 0.67, - "grad_norm": 0.7504570213240344, - "learning_rate": 5.238200507536488e-06, - "loss": 0.4943, + "epoch": 0.53, + "grad_norm": 0.3309850488210643, + "learning_rate": 9.375480602372384e-06, + "loss": 0.2393, "step": 11631 }, { - "epoch": 0.67, - "grad_norm": 0.31611857707224705, - "learning_rate": 5.23656420048587e-06, - "loss": 0.2007, + "epoch": 0.53, + "grad_norm": 0.6905169250464249, + "learning_rate": 9.373995594524474e-06, + "loss": 0.387, "step": 11632 }, { - "epoch": 0.67, - "grad_norm": 0.2280572315656847, - "learning_rate": 5.23492805839283e-06, - "loss": 0.2107, + "epoch": 0.53, + "grad_norm": 0.4302111475287135, + "learning_rate": 9.372510600535693e-06, + "loss": 0.2958, "step": 11633 }, { - "epoch": 0.67, - "grad_norm": 0.5290475222067625, - "learning_rate": 5.233292081314027e-06, - "loss": 0.3361, + "epoch": 0.53, + "grad_norm": 0.4074766723107201, + "learning_rate": 9.371025620438922e-06, + "loss": 0.2863, "step": 11634 }, { - "epoch": 0.67, - "grad_norm": 0.33595680323497873, - "learning_rate": 5.231656269306116e-06, - "loss": 0.2121, + "epoch": 0.53, + "grad_norm": 0.5041505438944777, + "learning_rate": 9.369540654267039e-06, + "loss": 0.2909, "step": 11635 }, { - "epoch": 0.67, - "grad_norm": 0.34149735897754147, - "learning_rate": 5.230020622425738e-06, - "loss": 0.2989, + "epoch": 0.53, + "grad_norm": 0.3913208284044223, + "learning_rate": 9.368055702052919e-06, + "loss": 0.2658, "step": 11636 }, { - "epoch": 0.67, - "grad_norm": 0.5854348227990583, - "learning_rate": 5.228385140729545e-06, - "loss": 0.4182, + "epoch": 0.53, + "grad_norm": 0.31003779035931117, + "learning_rate": 9.366570763829439e-06, + "loss": 0.1776, "step": 11637 }, { - "epoch": 0.67, - "grad_norm": 0.37979442690169474, - "learning_rate": 5.226749824274169e-06, - "loss": 0.2815, + "epoch": 0.53, + "grad_norm": 0.44953459287440634, + "learning_rate": 9.365085839629466e-06, + "loss": 0.3031, "step": 11638 }, { - "epoch": 0.67, - "grad_norm": 0.4566569153966813, - "learning_rate": 5.225114673116243e-06, - "loss": 0.239, + "epoch": 0.53, + "grad_norm": 0.3896207379468654, + "learning_rate": 9.363600929485885e-06, + "loss": 0.2965, "step": 11639 }, { - "epoch": 0.67, - "grad_norm": 0.2744528609459148, - "learning_rate": 5.223479687312388e-06, - "loss": 0.2327, + "epoch": 0.53, + "grad_norm": 0.6087830701546962, + "learning_rate": 9.362116033431566e-06, + "loss": 0.3416, "step": 11640 }, { - "epoch": 0.67, - "grad_norm": 0.36138822589171504, - "learning_rate": 5.2218448669192235e-06, - "loss": 0.2638, + "epoch": 0.53, + "grad_norm": 0.8222679475513474, + "learning_rate": 9.360631151499382e-06, + "loss": 0.4121, "step": 11641 }, { - "epoch": 0.67, - "grad_norm": 0.8235685157243273, - "learning_rate": 5.220210211993371e-06, - "loss": 0.457, + "epoch": 0.53, + "grad_norm": 0.3004690274617724, + "learning_rate": 9.359146283722206e-06, + "loss": 0.2761, "step": 11642 }, { - "epoch": 0.67, - "grad_norm": 1.145506050031028, - "learning_rate": 5.21857572259143e-06, - "loss": 0.6852, + "epoch": 0.53, + "grad_norm": 0.22471035452567548, + "learning_rate": 9.357661430132916e-06, + "loss": 0.1054, "step": 11643 }, { - "epoch": 0.67, - "grad_norm": 0.32843776235279604, - "learning_rate": 5.216941398770009e-06, - "loss": 0.2588, + "epoch": 0.53, + "grad_norm": 0.6200494504459275, + "learning_rate": 9.356176590764382e-06, + "loss": 0.4299, "step": 11644 }, { - "epoch": 0.67, - "grad_norm": 0.33328103317540847, - "learning_rate": 5.215307240585696e-06, - "loss": 0.2655, + "epoch": 0.53, + "grad_norm": 0.40263729573447943, + "learning_rate": 9.35469176564948e-06, + "loss": 0.3069, "step": 11645 }, { - "epoch": 0.67, - "grad_norm": 0.34161222318740736, - "learning_rate": 5.213673248095092e-06, - "loss": 0.2008, + "epoch": 0.54, + "grad_norm": 0.35644280595544175, + "learning_rate": 9.353206954821075e-06, + "loss": 0.3157, "step": 11646 }, { - "epoch": 0.67, - "grad_norm": 0.34405362854914645, - "learning_rate": 5.212039421354779e-06, - "loss": 0.2812, + "epoch": 0.54, + "grad_norm": 0.4422490387852672, + "learning_rate": 9.35172215831205e-06, + "loss": 0.2071, "step": 11647 }, { - "epoch": 0.67, - "grad_norm": 0.3367695864863821, - "learning_rate": 5.2104057604213335e-06, - "loss": 0.2562, + "epoch": 0.54, + "grad_norm": 0.29127712832389857, + "learning_rate": 9.350237376155269e-06, + "loss": 0.2306, "step": 11648 }, { - "epoch": 0.67, - "grad_norm": 0.7079692534150634, - "learning_rate": 5.208772265351332e-06, - "loss": 0.426, + "epoch": 0.54, + "grad_norm": 0.423903823747645, + "learning_rate": 9.348752608383608e-06, + "loss": 0.2455, "step": 11649 }, { - "epoch": 0.67, - "grad_norm": 0.35211947295926666, - "learning_rate": 5.207138936201339e-06, - "loss": 0.2587, + "epoch": 0.54, + "grad_norm": 0.3107040305171492, + "learning_rate": 9.347267855029939e-06, + "loss": 0.2396, "step": 11650 }, { - "epoch": 0.67, - "grad_norm": 0.6453440156091795, - "learning_rate": 5.205505773027919e-06, - "loss": 0.3904, + "epoch": 0.54, + "grad_norm": 0.4258717083161907, + "learning_rate": 9.345783116127122e-06, + "loss": 0.311, "step": 11651 }, { - "epoch": 0.67, - "grad_norm": 0.2491559057765381, - "learning_rate": 5.203872775887628e-06, - "loss": 0.1939, + "epoch": 0.54, + "grad_norm": 0.572067195715333, + "learning_rate": 9.344298391708043e-06, + "loss": 0.408, "step": 11652 }, { - "epoch": 0.67, - "grad_norm": 0.362328629160573, - "learning_rate": 5.202239944837013e-06, - "loss": 0.283, + "epoch": 0.54, + "grad_norm": 0.37066031201809835, + "learning_rate": 9.342813681805564e-06, + "loss": 0.2209, "step": 11653 }, { - "epoch": 0.67, - "grad_norm": 0.4889478459733575, - "learning_rate": 5.200607279932626e-06, - "loss": 0.3449, + "epoch": 0.54, + "grad_norm": 0.3168676582140073, + "learning_rate": 9.341328986452558e-06, + "loss": 0.264, "step": 11654 }, { - "epoch": 0.67, - "grad_norm": 0.6228592787338032, - "learning_rate": 5.198974781231003e-06, - "loss": 0.308, + "epoch": 0.54, + "grad_norm": 0.29010760126124835, + "learning_rate": 9.33984430568189e-06, + "loss": 0.1817, "step": 11655 }, { - "epoch": 0.67, - "grad_norm": 0.30679920540472044, - "learning_rate": 5.197342448788676e-06, - "loss": 0.2397, + "epoch": 0.54, + "grad_norm": 0.5473738858833251, + "learning_rate": 9.338359639526436e-06, + "loss": 0.2607, "step": 11656 }, { - "epoch": 0.67, - "grad_norm": 0.3359198585706779, - "learning_rate": 5.19571028266217e-06, - "loss": 0.3035, + "epoch": 0.54, + "grad_norm": 0.3652733708898023, + "learning_rate": 9.336874988019063e-06, + "loss": 0.3039, "step": 11657 }, { - "epoch": 0.67, - "grad_norm": 0.22988331423007832, - "learning_rate": 5.194078282908015e-06, - "loss": 0.0879, + "epoch": 0.54, + "grad_norm": 0.38603016526081996, + "learning_rate": 9.335390351192636e-06, + "loss": 0.3444, "step": 11658 }, { - "epoch": 0.67, - "grad_norm": 0.32083574318893576, - "learning_rate": 5.192446449582722e-06, - "loss": 0.2683, + "epoch": 0.54, + "grad_norm": 1.7112014898914996, + "learning_rate": 9.333905729080024e-06, + "loss": 0.8273, "step": 11659 }, { - "epoch": 0.67, - "grad_norm": 0.37311945503442323, - "learning_rate": 5.190814782742801e-06, - "loss": 0.3295, + "epoch": 0.54, + "grad_norm": 0.3518696755351413, + "learning_rate": 9.332421121714101e-06, + "loss": 0.2064, "step": 11660 }, { - "epoch": 0.67, - "grad_norm": 0.9174082847992806, - "learning_rate": 5.1891832824447545e-06, - "loss": 0.3407, + "epoch": 0.54, + "grad_norm": 0.28890595250732565, + "learning_rate": 9.33093652912773e-06, + "loss": 0.1632, "step": 11661 }, { - "epoch": 0.67, - "grad_norm": 0.311522227954505, - "learning_rate": 5.1875519487450865e-06, - "loss": 0.2712, + "epoch": 0.54, + "grad_norm": 0.41623436207766035, + "learning_rate": 9.329451951353781e-06, + "loss": 0.3274, "step": 11662 }, { - "epoch": 0.67, - "grad_norm": 0.7068267725620065, - "learning_rate": 5.185920781700288e-06, - "loss": 0.4194, + "epoch": 0.54, + "grad_norm": 0.39106761355405417, + "learning_rate": 9.32796738842512e-06, + "loss": 0.2206, "step": 11663 }, { - "epoch": 0.67, - "grad_norm": 0.3690138293865834, - "learning_rate": 5.184289781366847e-06, - "loss": 0.2892, + "epoch": 0.54, + "grad_norm": 1.363003718700666, + "learning_rate": 9.326482840374606e-06, + "loss": 0.87, "step": 11664 }, { - "epoch": 0.67, - "grad_norm": 0.2054627742441291, - "learning_rate": 5.182658947801242e-06, - "loss": 0.1386, + "epoch": 0.54, + "grad_norm": 0.43384043733285793, + "learning_rate": 9.324998307235117e-06, + "loss": 0.3083, "step": 11665 }, { - "epoch": 0.67, - "grad_norm": 1.2891895203008483, - "learning_rate": 5.1810282810599475e-06, - "loss": 0.6783, + "epoch": 0.54, + "grad_norm": 0.31649052573784386, + "learning_rate": 9.323513789039517e-06, + "loss": 0.2354, "step": 11666 }, { - "epoch": 0.67, - "grad_norm": 0.8395194551653319, - "learning_rate": 5.17939778119944e-06, - "loss": 0.412, + "epoch": 0.54, + "grad_norm": 0.3389514946724946, + "learning_rate": 9.322029285820669e-06, + "loss": 0.1909, "step": 11667 }, { - "epoch": 0.67, - "grad_norm": 0.2798409401034943, - "learning_rate": 5.1777674482761805e-06, - "loss": 0.2069, + "epoch": 0.54, + "grad_norm": 0.5623254675463492, + "learning_rate": 9.320544797611436e-06, + "loss": 0.3962, "step": 11668 }, { - "epoch": 0.67, - "grad_norm": 0.44920481661776707, - "learning_rate": 5.176137282346627e-06, - "loss": 0.3641, + "epoch": 0.54, + "grad_norm": 0.41028675953833244, + "learning_rate": 9.31906032444469e-06, + "loss": 0.2331, "step": 11669 }, { - "epoch": 0.67, - "grad_norm": 0.29918314203834345, - "learning_rate": 5.174507283467228e-06, - "loss": 0.1879, + "epoch": 0.54, + "grad_norm": 0.41513545072717595, + "learning_rate": 9.317575866353293e-06, + "loss": 0.3514, "step": 11670 }, { - "epoch": 0.67, - "grad_norm": 0.32543040157224057, - "learning_rate": 5.172877451694438e-06, - "loss": 0.197, + "epoch": 0.54, + "grad_norm": 0.9287869047694566, + "learning_rate": 9.316091423370105e-06, + "loss": 0.4506, "step": 11671 }, { - "epoch": 0.67, - "grad_norm": 0.36804171214608106, - "learning_rate": 5.171247787084694e-06, - "loss": 0.3286, + "epoch": 0.54, + "grad_norm": 0.3885442245023446, + "learning_rate": 9.314606995527994e-06, + "loss": 0.2682, "step": 11672 }, { - "epoch": 0.67, - "grad_norm": 0.9862885552626823, - "learning_rate": 5.169618289694432e-06, - "loss": 0.3905, + "epoch": 0.54, + "grad_norm": 0.2802318521153969, + "learning_rate": 9.313122582859826e-06, + "loss": 0.1939, "step": 11673 }, { - "epoch": 0.67, - "grad_norm": 0.2984786136741482, - "learning_rate": 5.167988959580077e-06, - "loss": 0.1931, + "epoch": 0.54, + "grad_norm": 0.47667283084442197, + "learning_rate": 9.311638185398461e-06, + "loss": 0.3297, "step": 11674 }, { - "epoch": 0.67, - "grad_norm": 1.0670030322829498, - "learning_rate": 5.16635979679806e-06, - "loss": 0.5376, + "epoch": 0.54, + "grad_norm": 0.35765135438946277, + "learning_rate": 9.310153803176765e-06, + "loss": 0.2575, "step": 11675 }, { - "epoch": 0.67, - "grad_norm": 0.34866352029633485, - "learning_rate": 5.1647308014047955e-06, - "loss": 0.2992, + "epoch": 0.54, + "grad_norm": 1.0472080587585906, + "learning_rate": 9.308669436227592e-06, + "loss": 0.4618, "step": 11676 }, { - "epoch": 0.67, - "grad_norm": 0.33441485196590653, - "learning_rate": 5.163101973456696e-06, - "loss": 0.2794, + "epoch": 0.54, + "grad_norm": 0.33633738585763256, + "learning_rate": 9.307185084583816e-06, + "loss": 0.288, "step": 11677 }, { - "epoch": 0.67, - "grad_norm": 0.2906375868270081, - "learning_rate": 5.161473313010162e-06, - "loss": 0.15, + "epoch": 0.54, + "grad_norm": 0.336502292606778, + "learning_rate": 9.305700748278296e-06, + "loss": 0.2521, "step": 11678 }, { - "epoch": 0.67, - "grad_norm": 0.9677833792997231, - "learning_rate": 5.159844820121605e-06, - "loss": 0.4, + "epoch": 0.54, + "grad_norm": 0.4048952944452784, + "learning_rate": 9.304216427343894e-06, + "loss": 0.1611, "step": 11679 }, { - "epoch": 0.67, - "grad_norm": 0.2778530544704572, - "learning_rate": 5.158216494847412e-06, - "loss": 0.2421, + "epoch": 0.54, + "grad_norm": 0.44317456840682923, + "learning_rate": 9.302732121813467e-06, + "loss": 0.2925, "step": 11680 }, { - "epoch": 0.67, - "grad_norm": 0.44108767628976453, - "learning_rate": 5.156588337243974e-06, - "loss": 0.2371, + "epoch": 0.54, + "grad_norm": 0.3629013014908533, + "learning_rate": 9.301247831719876e-06, + "loss": 0.2761, "step": 11681 }, { - "epoch": 0.67, - "grad_norm": 0.9779485327404348, - "learning_rate": 5.154960347367675e-06, - "loss": 0.4578, + "epoch": 0.54, + "grad_norm": 0.3878242196618716, + "learning_rate": 9.299763557095986e-06, + "loss": 0.2757, "step": 11682 }, { - "epoch": 0.67, - "grad_norm": 0.27024184345027874, - "learning_rate": 5.153332525274888e-06, - "loss": 0.225, + "epoch": 0.54, + "grad_norm": 1.065162077148229, + "learning_rate": 9.298279297974659e-06, + "loss": 0.5394, "step": 11683 }, { - "epoch": 0.67, - "grad_norm": 0.3015393644257497, - "learning_rate": 5.1517048710219895e-06, - "loss": 0.2489, + "epoch": 0.54, + "grad_norm": 0.3264921792546553, + "learning_rate": 9.29679505438875e-06, + "loss": 0.2382, "step": 11684 }, { - "epoch": 0.67, - "grad_norm": 0.8142713116682633, - "learning_rate": 5.150077384665342e-06, - "loss": 0.397, + "epoch": 0.54, + "grad_norm": 0.8193978850515296, + "learning_rate": 9.29531082637112e-06, + "loss": 0.4485, "step": 11685 }, { - "epoch": 0.67, - "grad_norm": 0.333100189552544, - "learning_rate": 5.148450066261303e-06, - "loss": 0.2676, + "epoch": 0.54, + "grad_norm": 0.30081617178664427, + "learning_rate": 9.293826613954629e-06, + "loss": 0.2231, "step": 11686 }, { - "epoch": 0.67, - "grad_norm": 0.4376687450485571, - "learning_rate": 5.146822915866232e-06, - "loss": 0.24, + "epoch": 0.54, + "grad_norm": 0.396169861059128, + "learning_rate": 9.292342417172138e-06, + "loss": 0.2979, "step": 11687 }, { - "epoch": 0.67, - "grad_norm": 0.3483112994923384, - "learning_rate": 5.145195933536476e-06, - "loss": 0.2511, + "epoch": 0.54, + "grad_norm": 0.5032826500275135, + "learning_rate": 9.290858236056503e-06, + "loss": 0.3435, "step": 11688 }, { - "epoch": 0.67, - "grad_norm": 0.4132548196316583, - "learning_rate": 5.143569119328376e-06, - "loss": 0.2765, + "epoch": 0.54, + "grad_norm": 0.31658410608483184, + "learning_rate": 9.289374070640581e-06, + "loss": 0.2508, "step": 11689 }, { - "epoch": 0.67, - "grad_norm": 0.7860273994083556, - "learning_rate": 5.141942473298264e-06, - "loss": 0.2614, + "epoch": 0.54, + "grad_norm": 0.38163147474502807, + "learning_rate": 9.287889920957236e-06, + "loss": 0.2762, "step": 11690 }, { - "epoch": 0.67, - "grad_norm": 0.3396154895768527, - "learning_rate": 5.140315995502478e-06, - "loss": 0.2806, + "epoch": 0.54, + "grad_norm": 0.7824481203912594, + "learning_rate": 9.28640578703932e-06, + "loss": 0.4238, "step": 11691 }, { - "epoch": 0.67, - "grad_norm": 0.4005484919825302, - "learning_rate": 5.1386896859973425e-06, - "loss": 0.2488, + "epoch": 0.54, + "grad_norm": 0.39616453155576553, + "learning_rate": 9.284921668919692e-06, + "loss": 0.1707, "step": 11692 }, { - "epoch": 0.67, - "grad_norm": 0.33537309338762605, - "learning_rate": 5.1370635448391736e-06, - "loss": 0.2494, + "epoch": 0.54, + "grad_norm": 0.3454923519403278, + "learning_rate": 9.283437566631209e-06, + "loss": 0.2626, "step": 11693 }, { - "epoch": 0.67, - "grad_norm": 1.2551963101728314, - "learning_rate": 5.135437572084284e-06, - "loss": 0.2223, + "epoch": 0.54, + "grad_norm": 0.36159524598091314, + "learning_rate": 9.281953480206725e-06, + "loss": 0.3256, "step": 11694 }, { - "epoch": 0.67, - "grad_norm": 0.39248745343059227, - "learning_rate": 5.133811767788979e-06, - "loss": 0.2952, + "epoch": 0.54, + "grad_norm": 0.3388010346526541, + "learning_rate": 9.280469409679102e-06, + "loss": 0.1039, "step": 11695 }, { - "epoch": 0.67, - "grad_norm": 0.29648525882978316, - "learning_rate": 5.132186132009567e-06, - "loss": 0.2726, + "epoch": 0.54, + "grad_norm": 0.32889107040152904, + "learning_rate": 9.278985355081193e-06, + "loss": 0.2684, "step": 11696 }, { - "epoch": 0.67, - "grad_norm": 0.9013794775813733, - "learning_rate": 5.13056066480234e-06, - "loss": 0.357, + "epoch": 0.54, + "grad_norm": 0.5037149651234222, + "learning_rate": 9.277501316445854e-06, + "loss": 0.3698, "step": 11697 }, { - "epoch": 0.67, - "grad_norm": 0.34343934566226364, - "learning_rate": 5.128935366223588e-06, - "loss": 0.2625, + "epoch": 0.54, + "grad_norm": 0.5077007471135834, + "learning_rate": 9.276017293805936e-06, + "loss": 0.3993, "step": 11698 }, { - "epoch": 0.67, - "grad_norm": 0.2714716915290179, - "learning_rate": 5.12731023632959e-06, - "loss": 0.1737, + "epoch": 0.54, + "grad_norm": 0.2620827818208293, + "learning_rate": 9.2745332871943e-06, + "loss": 0.1654, "step": 11699 }, { - "epoch": 0.67, - "grad_norm": 0.3910270132526932, - "learning_rate": 5.125685275176633e-06, - "loss": 0.3165, + "epoch": 0.54, + "grad_norm": 1.3831590743269435, + "learning_rate": 9.273049296643798e-06, + "loss": 0.8403, "step": 11700 }, { - "epoch": 0.67, - "grad_norm": 0.35071460165870216, - "learning_rate": 5.124060482820986e-06, - "loss": 0.1882, + "epoch": 0.54, + "grad_norm": 0.28698881801479265, + "learning_rate": 9.271565322187283e-06, + "loss": 0.2469, "step": 11701 }, { - "epoch": 0.67, - "grad_norm": 0.9215729327737139, - "learning_rate": 5.122435859318915e-06, - "loss": 0.4102, + "epoch": 0.54, + "grad_norm": 0.3290629204536742, + "learning_rate": 9.27008136385761e-06, + "loss": 0.2126, "step": 11702 }, { - "epoch": 0.67, - "grad_norm": 0.45771396649945395, - "learning_rate": 5.120811404726675e-06, - "loss": 0.3709, + "epoch": 0.54, + "grad_norm": 0.6519353019369623, + "learning_rate": 9.268597421687631e-06, + "loss": 0.4266, "step": 11703 }, { - "epoch": 0.67, - "grad_norm": 0.27670028601173474, - "learning_rate": 5.119187119100533e-06, - "loss": 0.2096, + "epoch": 0.54, + "grad_norm": 1.2492380899637494, + "learning_rate": 9.267113495710203e-06, + "loss": 0.6571, "step": 11704 }, { - "epoch": 0.67, - "grad_norm": 0.26230396682039764, - "learning_rate": 5.117563002496728e-06, - "loss": 0.168, + "epoch": 0.54, + "grad_norm": 0.30799574502210647, + "learning_rate": 9.265629585958173e-06, + "loss": 0.201, "step": 11705 }, { - "epoch": 0.67, - "grad_norm": 1.0274803578898541, - "learning_rate": 5.115939054971508e-06, - "loss": 0.4535, + "epoch": 0.54, + "grad_norm": 0.37735684909753636, + "learning_rate": 9.264145692464394e-06, + "loss": 0.315, "step": 11706 }, { - "epoch": 0.67, - "grad_norm": 0.6142974067442903, - "learning_rate": 5.114315276581108e-06, - "loss": 0.2165, + "epoch": 0.54, + "grad_norm": 0.35323059992538514, + "learning_rate": 9.262661815261726e-06, + "loss": 0.2292, "step": 11707 }, { - "epoch": 0.67, - "grad_norm": 0.2637060378536606, - "learning_rate": 5.1126916673817575e-06, - "loss": 0.2753, + "epoch": 0.54, + "grad_norm": 0.30951337940811935, + "learning_rate": 9.261177954383014e-06, + "loss": 0.1934, "step": 11708 }, { - "epoch": 0.67, - "grad_norm": 1.119474613305846, - "learning_rate": 5.111068227429686e-06, - "loss": 0.6991, + "epoch": 0.54, + "grad_norm": 0.39216085184940486, + "learning_rate": 9.259694109861107e-06, + "loss": 0.3177, "step": 11709 }, { - "epoch": 0.67, - "grad_norm": 0.3746375472716624, - "learning_rate": 5.109444956781113e-06, - "loss": 0.1687, + "epoch": 0.54, + "grad_norm": 1.3660324486377504, + "learning_rate": 9.258210281728862e-06, + "loss": 0.5812, "step": 11710 }, { - "epoch": 0.67, - "grad_norm": 0.26177824937585215, - "learning_rate": 5.10782185549225e-06, - "loss": 0.2256, + "epoch": 0.54, + "grad_norm": 0.44098786757010494, + "learning_rate": 9.256726470019121e-06, + "loss": 0.2762, "step": 11711 }, { - "epoch": 0.67, - "grad_norm": 0.48526546688989247, - "learning_rate": 5.106198923619302e-06, - "loss": 0.3127, + "epoch": 0.54, + "grad_norm": 0.22001286754437122, + "learning_rate": 9.255242674764742e-06, + "loss": 0.1498, "step": 11712 }, { - "epoch": 0.67, - "grad_norm": 0.6618222141302967, - "learning_rate": 5.10457616121848e-06, - "loss": 0.2913, + "epoch": 0.54, + "grad_norm": 0.37089302180149, + "learning_rate": 9.253758895998575e-06, + "loss": 0.3131, "step": 11713 }, { - "epoch": 0.67, - "grad_norm": 0.45600963586374926, - "learning_rate": 5.102953568345973e-06, - "loss": 0.2749, + "epoch": 0.54, + "grad_norm": 0.4385983391000386, + "learning_rate": 9.252275133753466e-06, + "loss": 0.2794, "step": 11714 }, { - "epoch": 0.67, - "grad_norm": 0.4655809092972486, - "learning_rate": 5.101331145057975e-06, - "loss": 0.3785, + "epoch": 0.54, + "grad_norm": 0.6514131600036096, + "learning_rate": 9.250791388062263e-06, + "loss": 0.2822, "step": 11715 }, { - "epoch": 0.67, - "grad_norm": 0.36798216280033647, - "learning_rate": 5.0997088914106685e-06, - "loss": 0.2713, + "epoch": 0.54, + "grad_norm": 1.0679832644691192, + "learning_rate": 9.249307658957817e-06, + "loss": 0.6274, "step": 11716 }, { - "epoch": 0.67, - "grad_norm": 0.21140015509665294, - "learning_rate": 5.098086807460232e-06, - "loss": 0.1602, + "epoch": 0.54, + "grad_norm": 0.28284915146840306, + "learning_rate": 9.247823946472978e-06, + "loss": 0.2374, "step": 11717 }, { - "epoch": 0.67, - "grad_norm": 0.7371500660216942, - "learning_rate": 5.096464893262838e-06, - "loss": 0.3948, + "epoch": 0.54, + "grad_norm": 0.47598752314670184, + "learning_rate": 9.24634025064059e-06, + "loss": 0.2769, "step": 11718 }, { - "epoch": 0.67, - "grad_norm": 0.3635716936363465, - "learning_rate": 5.094843148874654e-06, - "loss": 0.2994, + "epoch": 0.54, + "grad_norm": 0.5375053465251965, + "learning_rate": 9.244856571493502e-06, + "loss": 0.2979, "step": 11719 }, { - "epoch": 0.67, - "grad_norm": 0.3033816677570989, - "learning_rate": 5.0932215743518375e-06, - "loss": 0.2358, + "epoch": 0.54, + "grad_norm": 0.3669957796236485, + "learning_rate": 9.243372909064564e-06, + "loss": 0.2588, "step": 11720 }, { - "epoch": 0.67, - "grad_norm": 1.0560147653471157, - "learning_rate": 5.0916001697505506e-06, - "loss": 0.6911, + "epoch": 0.54, + "grad_norm": 0.28852017958288007, + "learning_rate": 9.241889263386618e-06, + "loss": 0.1947, "step": 11721 }, { - "epoch": 0.67, - "grad_norm": 0.3864890302897584, - "learning_rate": 5.089978935126939e-06, - "loss": 0.2657, + "epoch": 0.54, + "grad_norm": 1.0661933004402595, + "learning_rate": 9.240405634492515e-06, + "loss": 0.6298, "step": 11722 }, { - "epoch": 0.67, - "grad_norm": 0.22419003265245574, - "learning_rate": 5.088357870537146e-06, - "loss": 0.1348, + "epoch": 0.54, + "grad_norm": 0.6273475437122229, + "learning_rate": 9.238922022415095e-06, + "loss": 0.3381, "step": 11723 }, { - "epoch": 0.67, - "grad_norm": 0.3287953682638831, - "learning_rate": 5.086736976037304e-06, - "loss": 0.3103, + "epoch": 0.54, + "grad_norm": 0.40273542611871793, + "learning_rate": 9.23743842718721e-06, + "loss": 0.344, "step": 11724 }, { - "epoch": 0.67, - "grad_norm": 0.7126230773256188, - "learning_rate": 5.085116251683554e-06, - "loss": 0.3805, + "epoch": 0.54, + "grad_norm": 0.38936299256955526, + "learning_rate": 9.235954848841708e-06, + "loss": 0.287, "step": 11725 }, { - "epoch": 0.67, - "grad_norm": 0.34080945340313146, - "learning_rate": 5.083495697532016e-06, - "loss": 0.2898, + "epoch": 0.54, + "grad_norm": 0.6402911988383123, + "learning_rate": 9.234471287411427e-06, + "loss": 0.3524, "step": 11726 }, { - "epoch": 0.67, - "grad_norm": 0.32289359281894736, - "learning_rate": 5.081875313638811e-06, - "loss": 0.2766, + "epoch": 0.54, + "grad_norm": 0.25480388390569253, + "learning_rate": 9.232987742929214e-06, + "loss": 0.1834, "step": 11727 }, { - "epoch": 0.67, - "grad_norm": 0.7495934803198708, - "learning_rate": 5.080255100060048e-06, - "loss": 0.3571, + "epoch": 0.54, + "grad_norm": 1.0472047430212839, + "learning_rate": 9.231504215427906e-06, + "loss": 0.3271, "step": 11728 }, { - "epoch": 0.67, - "grad_norm": 0.2572948506856289, - "learning_rate": 5.078635056851844e-06, - "loss": 0.2007, + "epoch": 0.54, + "grad_norm": 0.3670156199905095, + "learning_rate": 9.230020704940361e-06, + "loss": 0.266, "step": 11729 }, { - "epoch": 0.67, - "grad_norm": 0.5697537663129776, - "learning_rate": 5.077015184070296e-06, - "loss": 0.2481, + "epoch": 0.54, + "grad_norm": 0.38175122821227114, + "learning_rate": 9.228537211499415e-06, + "loss": 0.3192, "step": 11730 }, { - "epoch": 0.67, - "grad_norm": 0.33024815771221205, - "learning_rate": 5.075395481771501e-06, - "loss": 0.2487, + "epoch": 0.54, + "grad_norm": 0.665170701756217, + "learning_rate": 9.227053735137911e-06, + "loss": 0.3251, "step": 11731 }, { - "epoch": 0.67, - "grad_norm": 0.29132435493008624, - "learning_rate": 5.073775950011548e-06, - "loss": 0.2737, + "epoch": 0.54, + "grad_norm": 0.2952840968826415, + "learning_rate": 9.225570275888692e-06, + "loss": 0.211, "step": 11732 }, { - "epoch": 0.67, - "grad_norm": 1.1874298610861767, - "learning_rate": 5.072156588846519e-06, - "loss": 0.5421, + "epoch": 0.54, + "grad_norm": 0.30784671402876257, + "learning_rate": 9.2240868337846e-06, + "loss": 0.2654, "step": 11733 }, { - "epoch": 0.67, - "grad_norm": 0.5883251543734425, - "learning_rate": 5.070537398332498e-06, - "loss": 0.3345, + "epoch": 0.54, + "grad_norm": 1.5845608661243697, + "learning_rate": 9.222603408858479e-06, + "loss": 0.6848, "step": 11734 }, { - "epoch": 0.67, - "grad_norm": 0.27343202258408916, - "learning_rate": 5.068918378525555e-06, - "loss": 0.2571, + "epoch": 0.54, + "grad_norm": 0.44170935422139157, + "learning_rate": 9.22112000114317e-06, + "loss": 0.2225, "step": 11735 }, { - "epoch": 0.67, - "grad_norm": 0.6437468881803801, - "learning_rate": 5.067299529481758e-06, - "loss": 0.2749, + "epoch": 0.54, + "grad_norm": 0.5822223480235589, + "learning_rate": 9.21963661067151e-06, + "loss": 0.3921, "step": 11736 }, { - "epoch": 0.67, - "grad_norm": 0.34159851863097196, - "learning_rate": 5.065680851257162e-06, - "loss": 0.1786, + "epoch": 0.54, + "grad_norm": 0.38097048647408255, + "learning_rate": 9.218153237476347e-06, + "loss": 0.3226, "step": 11737 }, { - "epoch": 0.67, - "grad_norm": 0.37613981223297227, - "learning_rate": 5.0640623439078285e-06, - "loss": 0.2851, + "epoch": 0.54, + "grad_norm": 0.35596313530092244, + "learning_rate": 9.216669881590515e-06, + "loss": 0.2068, "step": 11738 }, { - "epoch": 0.67, - "grad_norm": 0.365571260834238, - "learning_rate": 5.062444007489804e-06, - "loss": 0.2933, + "epoch": 0.54, + "grad_norm": 0.27248729064045973, + "learning_rate": 9.215186543046859e-06, + "loss": 0.1677, "step": 11739 }, { - "epoch": 0.67, - "grad_norm": 0.346912717482984, - "learning_rate": 5.060825842059132e-06, - "loss": 0.1916, + "epoch": 0.54, + "grad_norm": 1.1048807784267523, + "learning_rate": 9.213703221878217e-06, + "loss": 0.5724, "step": 11740 }, { - "epoch": 0.67, - "grad_norm": 0.47141640995945183, - "learning_rate": 5.059207847671845e-06, - "loss": 0.3235, + "epoch": 0.54, + "grad_norm": 0.3201440479629725, + "learning_rate": 9.212219918117423e-06, + "loss": 0.2218, "step": 11741 }, { - "epoch": 0.67, - "grad_norm": 0.45849898304823045, - "learning_rate": 5.05759002438398e-06, - "loss": 0.2979, + "epoch": 0.54, + "grad_norm": 0.3877990882605535, + "learning_rate": 9.210736631797323e-06, + "loss": 0.3227, "step": 11742 }, { - "epoch": 0.67, - "grad_norm": 0.25376962264594627, - "learning_rate": 5.055972372251562e-06, - "loss": 0.1864, + "epoch": 0.54, + "grad_norm": 0.8665899517857275, + "learning_rate": 9.209253362950756e-06, + "loss": 0.4682, "step": 11743 }, { - "epoch": 0.67, - "grad_norm": 0.3635867761463181, - "learning_rate": 5.054354891330607e-06, - "loss": 0.2763, + "epoch": 0.54, + "grad_norm": 0.20263733383781116, + "learning_rate": 9.207770111610558e-06, + "loss": 0.0699, "step": 11744 }, { - "epoch": 0.67, - "grad_norm": 1.209100047476115, - "learning_rate": 5.05273758167713e-06, - "loss": 0.7096, + "epoch": 0.54, + "grad_norm": 0.27644202449833455, + "learning_rate": 9.206286877809561e-06, + "loss": 0.2435, "step": 11745 }, { - "epoch": 0.67, - "grad_norm": 0.5569931214330035, - "learning_rate": 5.051120443347134e-06, - "loss": 0.2018, + "epoch": 0.54, + "grad_norm": 1.2389493161484852, + "learning_rate": 9.20480366158061e-06, + "loss": 0.5767, "step": 11746 }, { - "epoch": 0.67, - "grad_norm": 0.2892336497399014, - "learning_rate": 5.049503476396627e-06, - "loss": 0.2789, + "epoch": 0.54, + "grad_norm": 0.7697942041233611, + "learning_rate": 9.203320462956542e-06, + "loss": 0.3713, "step": 11747 }, { - "epoch": 0.67, - "grad_norm": 0.43334647063447024, - "learning_rate": 5.047886680881603e-06, - "loss": 0.3457, + "epoch": 0.54, + "grad_norm": 0.40568154309710064, + "learning_rate": 9.201837281970189e-06, + "loss": 0.2619, "step": 11748 }, { - "epoch": 0.68, - "grad_norm": 0.165429228208656, - "learning_rate": 5.0462700568580495e-06, - "loss": 0.0847, + "epoch": 0.54, + "grad_norm": 0.4170892453425178, + "learning_rate": 9.200354118654388e-06, + "loss": 0.3199, "step": 11749 }, { - "epoch": 0.68, - "grad_norm": 0.4192615834692852, - "learning_rate": 5.044653604381952e-06, - "loss": 0.3126, + "epoch": 0.54, + "grad_norm": 0.5786696817018457, + "learning_rate": 9.198870973041977e-06, + "loss": 0.3088, "step": 11750 }, { - "epoch": 0.68, - "grad_norm": 0.3438659632511523, - "learning_rate": 5.043037323509285e-06, - "loss": 0.3159, + "epoch": 0.54, + "grad_norm": 0.24595719680342173, + "learning_rate": 9.197387845165792e-06, + "loss": 0.1654, "step": 11751 }, { - "epoch": 0.68, - "grad_norm": 0.6009103614653322, - "learning_rate": 5.041421214296025e-06, - "loss": 0.375, + "epoch": 0.54, + "grad_norm": 1.0613192706043042, + "learning_rate": 9.195904735058667e-06, + "loss": 0.564, "step": 11752 }, { - "epoch": 0.68, - "grad_norm": 0.32373127606272445, - "learning_rate": 5.039805276798128e-06, - "loss": 0.2451, + "epoch": 0.54, + "grad_norm": 0.3275506925455265, + "learning_rate": 9.19442164275343e-06, + "loss": 0.2624, "step": 11753 }, { - "epoch": 0.68, - "grad_norm": 1.0671526753829008, - "learning_rate": 5.0381895110715676e-06, - "loss": 0.5263, + "epoch": 0.54, + "grad_norm": 0.41732927715402635, + "learning_rate": 9.19293856828293e-06, + "loss": 0.2603, "step": 11754 }, { - "epoch": 0.68, - "grad_norm": 0.23259453773066574, - "learning_rate": 5.03657391717229e-06, - "loss": 0.1934, + "epoch": 0.54, + "grad_norm": 0.853511541896152, + "learning_rate": 9.191455511679988e-06, + "loss": 0.5283, "step": 11755 }, { - "epoch": 0.68, - "grad_norm": 0.3365964910715124, - "learning_rate": 5.0349584951562445e-06, - "loss": 0.2178, + "epoch": 0.54, + "grad_norm": 0.5844273255196977, + "learning_rate": 9.189972472977445e-06, + "loss": 0.2569, "step": 11756 }, { - "epoch": 0.68, - "grad_norm": 1.0692943385938543, - "learning_rate": 5.033343245079373e-06, - "loss": 0.7361, + "epoch": 0.54, + "grad_norm": 0.28738762893968806, + "learning_rate": 9.188489452208127e-06, + "loss": 0.2362, "step": 11757 }, { - "epoch": 0.68, - "grad_norm": 0.5576953322811902, - "learning_rate": 5.031728166997607e-06, - "loss": 0.3471, + "epoch": 0.54, + "grad_norm": 0.3241662313616925, + "learning_rate": 9.187006449404867e-06, + "loss": 0.2219, "step": 11758 }, { - "epoch": 0.68, - "grad_norm": 0.2808777177692286, - "learning_rate": 5.0301132609668845e-06, - "loss": 0.2165, + "epoch": 0.54, + "grad_norm": 0.580633524159367, + "learning_rate": 9.185523464600506e-06, + "loss": 0.3758, "step": 11759 }, { - "epoch": 0.68, - "grad_norm": 0.5072273706254729, - "learning_rate": 5.028498527043126e-06, - "loss": 0.3641, + "epoch": 0.54, + "grad_norm": 0.3879732705561156, + "learning_rate": 9.18404049782787e-06, + "loss": 0.2849, "step": 11760 }, { - "epoch": 0.68, - "grad_norm": 0.2782055835137195, - "learning_rate": 5.026883965282252e-06, - "loss": 0.1837, + "epoch": 0.54, + "grad_norm": 0.3760149772286431, + "learning_rate": 9.18255754911979e-06, + "loss": 0.2715, "step": 11761 }, { - "epoch": 0.68, - "grad_norm": 0.322179200138884, - "learning_rate": 5.025269575740166e-06, - "loss": 0.222, + "epoch": 0.54, + "grad_norm": 0.8634905954045263, + "learning_rate": 9.181074618509097e-06, + "loss": 0.4201, "step": 11762 }, { - "epoch": 0.68, - "grad_norm": 0.3592214488272502, - "learning_rate": 5.023655358472786e-06, - "loss": 0.3299, + "epoch": 0.54, + "grad_norm": 0.2857079586336548, + "learning_rate": 9.179591706028626e-06, + "loss": 0.226, "step": 11763 }, { - "epoch": 0.68, - "grad_norm": 0.6839481155341293, - "learning_rate": 5.022041313536006e-06, - "loss": 0.4145, + "epoch": 0.54, + "grad_norm": 0.3111934446404846, + "learning_rate": 9.178108811711202e-06, + "loss": 0.2144, "step": 11764 }, { - "epoch": 0.68, - "grad_norm": 0.302911265362752, - "learning_rate": 5.020427440985721e-06, - "loss": 0.2343, + "epoch": 0.54, + "grad_norm": 0.39948621928221306, + "learning_rate": 9.176625935589657e-06, + "loss": 0.3213, "step": 11765 }, { - "epoch": 0.68, - "grad_norm": 1.2681543614082758, - "learning_rate": 5.018813740877817e-06, - "loss": 0.234, + "epoch": 0.54, + "grad_norm": 0.3966612751084353, + "learning_rate": 9.175143077696818e-06, + "loss": 0.2983, "step": 11766 }, { - "epoch": 0.68, - "grad_norm": 0.23444856503788336, - "learning_rate": 5.0172002132681815e-06, - "loss": 0.2076, + "epoch": 0.54, + "grad_norm": 0.8917503790897656, + "learning_rate": 9.173660238065519e-06, + "loss": 0.3729, "step": 11767 }, { - "epoch": 0.68, - "grad_norm": 0.32344719208450956, - "learning_rate": 5.0155868582126886e-06, - "loss": 0.262, + "epoch": 0.54, + "grad_norm": 0.448262125383949, + "learning_rate": 9.172177416728584e-06, + "loss": 0.2974, "step": 11768 }, { - "epoch": 0.68, - "grad_norm": 0.8788977231247462, - "learning_rate": 5.0139736757672095e-06, - "loss": 0.3568, + "epoch": 0.54, + "grad_norm": 0.34330871468696605, + "learning_rate": 9.170694613718845e-06, + "loss": 0.2699, "step": 11769 }, { - "epoch": 0.68, - "grad_norm": 0.5589684828125013, - "learning_rate": 5.012360665987607e-06, - "loss": 0.3362, + "epoch": 0.54, + "grad_norm": 0.2577080291206743, + "learning_rate": 9.169211829069129e-06, + "loss": 0.1494, "step": 11770 }, { - "epoch": 0.68, - "grad_norm": 0.2517561774348396, - "learning_rate": 5.010747828929736e-06, - "loss": 0.2556, + "epoch": 0.54, + "grad_norm": 0.5871604841866879, + "learning_rate": 9.167729062812256e-06, + "loss": 0.3608, "step": 11771 }, { - "epoch": 0.68, - "grad_norm": 1.1552259575630777, - "learning_rate": 5.009135164649457e-06, - "loss": 0.2947, + "epoch": 0.54, + "grad_norm": 0.4158101012583032, + "learning_rate": 9.166246314981066e-06, + "loss": 0.3119, "step": 11772 }, { - "epoch": 0.68, - "grad_norm": 0.35356777969622866, - "learning_rate": 5.007522673202613e-06, - "loss": 0.2245, + "epoch": 0.54, + "grad_norm": 0.38083852072478935, + "learning_rate": 9.164763585608379e-06, + "loss": 0.3059, "step": 11773 }, { - "epoch": 0.68, - "grad_norm": 0.33910329035869946, - "learning_rate": 5.005910354645043e-06, - "loss": 0.2669, + "epoch": 0.54, + "grad_norm": 0.35566389357598654, + "learning_rate": 9.16328087472702e-06, + "loss": 0.0687, "step": 11774 }, { - "epoch": 0.68, - "grad_norm": 0.3495175522409411, - "learning_rate": 5.0042982090325805e-06, - "loss": 0.3104, + "epoch": 0.54, + "grad_norm": 0.4229299559826986, + "learning_rate": 9.161798182369809e-06, + "loss": 0.3093, "step": 11775 }, { - "epoch": 0.68, - "grad_norm": 0.622377722237779, - "learning_rate": 5.002686236421059e-06, - "loss": 0.2137, + "epoch": 0.54, + "grad_norm": 0.3763830717552729, + "learning_rate": 9.160315508569587e-06, + "loss": 0.2907, "step": 11776 }, { - "epoch": 0.68, - "grad_norm": 0.34094064564462573, - "learning_rate": 5.0010744368663e-06, - "loss": 0.2648, + "epoch": 0.54, + "grad_norm": 0.2850816016617573, + "learning_rate": 9.158832853359167e-06, + "loss": 0.1948, "step": 11777 }, { - "epoch": 0.68, - "grad_norm": 1.1026202721985212, - "learning_rate": 4.999462810424116e-06, - "loss": 0.4879, + "epoch": 0.54, + "grad_norm": 0.40507525725455573, + "learning_rate": 9.157350216771379e-06, + "loss": 0.263, "step": 11778 }, { - "epoch": 0.68, - "grad_norm": 0.3465454930026479, - "learning_rate": 4.9978513571503175e-06, - "loss": 0.2476, + "epoch": 0.54, + "grad_norm": 1.2206266241756674, + "learning_rate": 9.155867598839042e-06, + "loss": 0.5766, "step": 11779 }, { - "epoch": 0.68, - "grad_norm": 0.295672774198805, - "learning_rate": 4.996240077100713e-06, - "loss": 0.242, + "epoch": 0.54, + "grad_norm": 0.3806652774536837, + "learning_rate": 9.154384999594985e-06, + "loss": 0.2507, "step": 11780 }, { - "epoch": 0.68, - "grad_norm": 0.6049472071811616, - "learning_rate": 4.994628970331102e-06, - "loss": 0.3999, + "epoch": 0.54, + "grad_norm": 0.3524388530149945, + "learning_rate": 9.15290241907203e-06, + "loss": 0.2451, "step": 11781 }, { - "epoch": 0.68, - "grad_norm": 0.29679226403336495, - "learning_rate": 4.993018036897274e-06, - "loss": 0.1961, + "epoch": 0.54, + "grad_norm": 0.7180814612700875, + "learning_rate": 9.151419857302997e-06, + "loss": 0.385, "step": 11782 }, { - "epoch": 0.68, - "grad_norm": 0.26024884929211345, - "learning_rate": 4.991407276855016e-06, - "loss": 0.2608, + "epoch": 0.54, + "grad_norm": 0.26947866846550655, + "learning_rate": 9.14993731432071e-06, + "loss": 0.1221, "step": 11783 }, { - "epoch": 0.68, - "grad_norm": 0.41637954941656796, - "learning_rate": 4.989796690260108e-06, - "loss": 0.2302, + "epoch": 0.54, + "grad_norm": 0.35551347087258284, + "learning_rate": 9.148454790157993e-06, + "loss": 0.2778, "step": 11784 }, { - "epoch": 0.68, - "grad_norm": 1.0573213732138746, - "learning_rate": 4.988186277168325e-06, - "loss": 0.3642, + "epoch": 0.54, + "grad_norm": 0.30523006271363934, + "learning_rate": 9.146972284847665e-06, + "loss": 0.2543, "step": 11785 }, { - "epoch": 0.68, - "grad_norm": 0.32321682893392756, - "learning_rate": 4.9865760376354365e-06, - "loss": 0.2565, + "epoch": 0.54, + "grad_norm": 0.9596008431398434, + "learning_rate": 9.14548979842255e-06, + "loss": 0.5304, "step": 11786 }, { - "epoch": 0.68, - "grad_norm": 0.3490489850595347, - "learning_rate": 4.9849659717172e-06, - "loss": 0.2924, + "epoch": 0.54, + "grad_norm": 0.45840696915878626, + "learning_rate": 9.144007330915469e-06, + "loss": 0.2286, "step": 11787 }, { - "epoch": 0.68, - "grad_norm": 0.4404451474161239, - "learning_rate": 4.98335607946938e-06, - "loss": 0.3545, + "epoch": 0.54, + "grad_norm": 0.606249565030041, + "learning_rate": 9.142524882359234e-06, + "loss": 0.2928, "step": 11788 }, { - "epoch": 0.68, - "grad_norm": 0.23778656116027025, - "learning_rate": 4.981746360947724e-06, - "loss": 0.1595, + "epoch": 0.54, + "grad_norm": 0.5354836941467207, + "learning_rate": 9.141042452786677e-06, + "loss": 0.3854, "step": 11789 }, { - "epoch": 0.68, - "grad_norm": 1.3534561202280206, - "learning_rate": 4.980136816207974e-06, - "loss": 0.6599, + "epoch": 0.54, + "grad_norm": 0.32076393707763484, + "learning_rate": 9.13956004223061e-06, + "loss": 0.1904, "step": 11790 }, { - "epoch": 0.68, - "grad_norm": 0.3459827769144183, - "learning_rate": 4.978527445305869e-06, - "loss": 0.3084, + "epoch": 0.54, + "grad_norm": 0.2771889320698763, + "learning_rate": 9.13807765072386e-06, + "loss": 0.1656, "step": 11791 }, { - "epoch": 0.68, - "grad_norm": 0.2924247907379295, - "learning_rate": 4.976918248297145e-06, - "loss": 0.1902, + "epoch": 0.54, + "grad_norm": 0.374554057238784, + "learning_rate": 9.136595278299232e-06, + "loss": 0.313, "step": 11792 }, { - "epoch": 0.68, - "grad_norm": 0.524691825002447, - "learning_rate": 4.9753092252375245e-06, - "loss": 0.396, + "epoch": 0.54, + "grad_norm": 0.36102194161686546, + "learning_rate": 9.135112924989555e-06, + "loss": 0.2315, "step": 11793 }, { - "epoch": 0.68, - "grad_norm": 0.36268677838139596, - "learning_rate": 4.973700376182732e-06, - "loss": 0.3425, + "epoch": 0.54, + "grad_norm": 0.7740935792251299, + "learning_rate": 9.133630590827646e-06, + "loss": 0.4283, "step": 11794 }, { - "epoch": 0.68, - "grad_norm": 0.2732204910654022, - "learning_rate": 4.972091701188478e-06, - "loss": 0.1814, + "epoch": 0.54, + "grad_norm": 0.8061229661665564, + "learning_rate": 9.132148275846322e-06, + "loss": 0.5376, "step": 11795 }, { - "epoch": 0.68, - "grad_norm": 0.2662254490724855, - "learning_rate": 4.970483200310468e-06, - "loss": 0.1567, + "epoch": 0.54, + "grad_norm": 0.3329793498380328, + "learning_rate": 9.130665980078394e-06, + "loss": 0.2167, "step": 11796 }, { - "epoch": 0.68, - "grad_norm": 0.6628178672031443, - "learning_rate": 4.968874873604414e-06, - "loss": 0.4117, + "epoch": 0.54, + "grad_norm": 0.23055121123074543, + "learning_rate": 9.129183703556687e-06, + "loss": 0.2039, "step": 11797 }, { - "epoch": 0.68, - "grad_norm": 0.31714829171919484, - "learning_rate": 4.967266721126005e-06, - "loss": 0.1911, + "epoch": 0.54, + "grad_norm": 0.9337471676199127, + "learning_rate": 9.127701446314013e-06, + "loss": 0.4027, "step": 11798 }, { - "epoch": 0.68, - "grad_norm": 0.3405285791476989, - "learning_rate": 4.965658742930934e-06, - "loss": 0.3089, + "epoch": 0.54, + "grad_norm": 0.3465239999456, + "learning_rate": 9.126219208383188e-06, + "loss": 0.2668, "step": 11799 }, { - "epoch": 0.68, - "grad_norm": 1.0963642128915825, - "learning_rate": 4.964050939074881e-06, - "loss": 0.6414, + "epoch": 0.54, + "grad_norm": 0.35354192922023686, + "learning_rate": 9.124736989797028e-06, + "loss": 0.2543, "step": 11800 }, { - "epoch": 0.68, - "grad_norm": 0.2131071516902079, - "learning_rate": 4.962443309613529e-06, - "loss": 0.1658, + "epoch": 0.54, + "grad_norm": 1.0951520511256436, + "learning_rate": 9.123254790588346e-06, + "loss": 0.665, "step": 11801 }, { - "epoch": 0.68, - "grad_norm": 0.3574377659432091, - "learning_rate": 4.96083585460255e-06, - "loss": 0.2421, + "epoch": 0.54, + "grad_norm": 0.3530882370436018, + "learning_rate": 9.121772610789959e-06, + "loss": 0.2531, "step": 11802 }, { - "epoch": 0.68, - "grad_norm": 0.4570436088730508, - "learning_rate": 4.95922857409761e-06, - "loss": 0.3248, + "epoch": 0.54, + "grad_norm": 0.2335556021716112, + "learning_rate": 9.120290450434678e-06, + "loss": 0.1077, "step": 11803 }, { - "epoch": 0.68, - "grad_norm": 0.3235139120968666, - "learning_rate": 4.9576214681543626e-06, - "loss": 0.2579, + "epoch": 0.54, + "grad_norm": 0.49430505471762104, + "learning_rate": 9.118808309555323e-06, + "loss": 0.3072, "step": 11804 }, { - "epoch": 0.68, - "grad_norm": 0.8548199042981579, - "learning_rate": 4.956014536828471e-06, - "loss": 0.3425, + "epoch": 0.54, + "grad_norm": 0.3925051521775017, + "learning_rate": 9.117326188184696e-06, + "loss": 0.2909, "step": 11805 }, { - "epoch": 0.68, - "grad_norm": 0.3368131841542381, - "learning_rate": 4.954407780175578e-06, - "loss": 0.3074, + "epoch": 0.54, + "grad_norm": 0.9568778097199951, + "learning_rate": 9.11584408635562e-06, + "loss": 0.327, "step": 11806 }, { - "epoch": 0.68, - "grad_norm": 0.3103485250750534, - "learning_rate": 4.952801198251328e-06, - "loss": 0.2388, + "epoch": 0.54, + "grad_norm": 1.153122845252285, + "learning_rate": 9.114362004100905e-06, + "loss": 0.7138, "step": 11807 }, { - "epoch": 0.68, - "grad_norm": 0.2644081194182972, - "learning_rate": 4.95119479111135e-06, - "loss": 0.091, + "epoch": 0.54, + "grad_norm": 0.3591160629562497, + "learning_rate": 9.112879941453361e-06, + "loss": 0.2587, "step": 11808 }, { - "epoch": 0.68, - "grad_norm": 0.811557495156051, - "learning_rate": 4.949588558811285e-06, - "loss": 0.3806, + "epoch": 0.54, + "grad_norm": 0.24837658650669117, + "learning_rate": 9.111397898445798e-06, + "loss": 0.1786, "step": 11809 }, { - "epoch": 0.68, - "grad_norm": 0.4076560589755643, - "learning_rate": 4.947982501406749e-06, - "loss": 0.2817, + "epoch": 0.54, + "grad_norm": 1.0046858414902224, + "learning_rate": 9.109915875111032e-06, + "loss": 0.4097, "step": 11810 }, { - "epoch": 0.68, - "grad_norm": 0.29270911302935926, - "learning_rate": 4.946376618953364e-06, - "loss": 0.2591, + "epoch": 0.54, + "grad_norm": 0.440712195311647, + "learning_rate": 9.10843387148187e-06, + "loss": 0.29, "step": 11811 }, { - "epoch": 0.68, - "grad_norm": 0.9784414407077597, - "learning_rate": 4.944770911506739e-06, - "loss": 0.536, + "epoch": 0.54, + "grad_norm": 0.38741481103051595, + "learning_rate": 9.106951887591123e-06, + "loss": 0.3299, "step": 11812 }, { - "epoch": 0.68, - "grad_norm": 0.3713203487743198, - "learning_rate": 4.9431653791224744e-06, - "loss": 0.2449, + "epoch": 0.54, + "grad_norm": 0.47143590848608313, + "learning_rate": 9.105469923471599e-06, + "loss": 0.2797, "step": 11813 }, { - "epoch": 0.68, - "grad_norm": 0.26768673340053445, - "learning_rate": 4.941560021856181e-06, - "loss": 0.2029, + "epoch": 0.54, + "grad_norm": 0.3396606957200545, + "learning_rate": 9.10398797915611e-06, + "loss": 0.2624, "step": 11814 }, { - "epoch": 0.68, - "grad_norm": 0.4008652783655156, - "learning_rate": 4.9399548397634455e-06, - "loss": 0.2715, + "epoch": 0.54, + "grad_norm": 0.7526467285562537, + "learning_rate": 9.102506054677462e-06, + "loss": 0.4395, "step": 11815 }, { - "epoch": 0.68, - "grad_norm": 0.3935508821109515, - "learning_rate": 4.938349832899856e-06, - "loss": 0.2394, + "epoch": 0.54, + "grad_norm": 0.3541252807405396, + "learning_rate": 9.101024150068467e-06, + "loss": 0.2693, "step": 11816 }, { - "epoch": 0.68, - "grad_norm": 0.49750616813772636, - "learning_rate": 4.9367450013209905e-06, - "loss": 0.3878, + "epoch": 0.54, + "grad_norm": 0.2787188827962047, + "learning_rate": 9.09954226536193e-06, + "loss": 0.2285, "step": 11817 }, { - "epoch": 0.68, - "grad_norm": 0.3181744075806371, - "learning_rate": 4.935140345082436e-06, - "loss": 0.2385, + "epoch": 0.54, + "grad_norm": 0.39751221059998965, + "learning_rate": 9.098060400590657e-06, + "loss": 0.2689, "step": 11818 }, { - "epoch": 0.68, - "grad_norm": 0.42534101653733136, - "learning_rate": 4.93353586423975e-06, - "loss": 0.3122, + "epoch": 0.54, + "grad_norm": 0.8949340531125743, + "learning_rate": 9.096578555787462e-06, + "loss": 0.5405, "step": 11819 }, { - "epoch": 0.68, - "grad_norm": 0.27271478591220527, - "learning_rate": 4.9319315588484954e-06, - "loss": 0.2234, + "epoch": 0.54, + "grad_norm": 0.32291750460531427, + "learning_rate": 9.095096730985145e-06, + "loss": 0.237, "step": 11820 }, { - "epoch": 0.68, - "grad_norm": 0.6360729424545637, - "learning_rate": 4.930327428964235e-06, - "loss": 0.3097, + "epoch": 0.54, + "grad_norm": 0.5698087836804717, + "learning_rate": 9.093614926216515e-06, + "loss": 0.3993, "step": 11821 }, { - "epoch": 0.68, - "grad_norm": 0.32253078555839093, - "learning_rate": 4.9287234746425195e-06, - "loss": 0.2473, + "epoch": 0.54, + "grad_norm": 0.5012497983219936, + "learning_rate": 9.092133141514371e-06, + "loss": 0.2339, "step": 11822 }, { - "epoch": 0.68, - "grad_norm": 0.34936474152769675, - "learning_rate": 4.927119695938891e-06, - "loss": 0.3168, + "epoch": 0.54, + "grad_norm": 0.2792818336966272, + "learning_rate": 9.090651376911532e-06, + "loss": 0.2037, "step": 11823 }, { - "epoch": 0.68, - "grad_norm": 0.9379810581274167, - "learning_rate": 4.925516092908891e-06, - "loss": 0.3485, + "epoch": 0.54, + "grad_norm": 0.5410362717887204, + "learning_rate": 9.089169632440792e-06, + "loss": 0.3914, "step": 11824 }, { - "epoch": 0.68, - "grad_norm": 0.31432849488872344, - "learning_rate": 4.923912665608045e-06, - "loss": 0.2603, + "epoch": 0.54, + "grad_norm": 0.5527244329819719, + "learning_rate": 9.087687908134959e-06, + "loss": 0.3923, "step": 11825 }, { - "epoch": 0.68, - "grad_norm": 0.4927456068221821, - "learning_rate": 4.9223094140918894e-06, - "loss": 0.2566, + "epoch": 0.54, + "grad_norm": 0.3577616434257504, + "learning_rate": 9.086206204026834e-06, + "loss": 0.1946, "step": 11826 }, { - "epoch": 0.68, - "grad_norm": 0.31740940794607303, - "learning_rate": 4.920706338415941e-06, - "loss": 0.2725, + "epoch": 0.54, + "grad_norm": 0.6785985284342616, + "learning_rate": 9.084724520149226e-06, + "loss": 0.4166, "step": 11827 }, { - "epoch": 0.68, - "grad_norm": 0.35754575878697037, - "learning_rate": 4.919103438635713e-06, - "loss": 0.1741, + "epoch": 0.54, + "grad_norm": 0.3597716910895084, + "learning_rate": 9.083242856534935e-06, + "loss": 0.3222, "step": 11828 }, { - "epoch": 0.68, - "grad_norm": 0.4985427045345408, - "learning_rate": 4.91750071480671e-06, - "loss": 0.3825, + "epoch": 0.54, + "grad_norm": 0.29897549704083254, + "learning_rate": 9.081761213216763e-06, + "loss": 0.125, "step": 11829 }, { - "epoch": 0.68, - "grad_norm": 0.33708352874029796, - "learning_rate": 4.915898166984443e-06, - "loss": 0.2834, + "epoch": 0.54, + "grad_norm": 0.3563106573655419, + "learning_rate": 9.080279590227514e-06, + "loss": 0.2363, "step": 11830 }, { - "epoch": 0.68, - "grad_norm": 0.5475661290159132, - "learning_rate": 4.914295795224404e-06, - "loss": 0.1531, + "epoch": 0.54, + "grad_norm": 0.5196085394329052, + "learning_rate": 9.078797987599983e-06, + "loss": 0.4066, "step": 11831 }, { - "epoch": 0.68, - "grad_norm": 0.38665851593133643, - "learning_rate": 4.912693599582083e-06, - "loss": 0.3186, + "epoch": 0.54, + "grad_norm": 0.3182692762594009, + "learning_rate": 9.07731640536698e-06, + "loss": 0.1989, "step": 11832 }, { - "epoch": 0.68, - "grad_norm": 0.2821804515532111, - "learning_rate": 4.91109158011296e-06, - "loss": 0.191, + "epoch": 0.54, + "grad_norm": 0.6100782896133956, + "learning_rate": 9.075834843561305e-06, + "loss": 0.3486, "step": 11833 }, { - "epoch": 0.68, - "grad_norm": 0.2936069143811596, - "learning_rate": 4.909489736872521e-06, - "loss": 0.1984, + "epoch": 0.54, + "grad_norm": 1.3852521995572378, + "learning_rate": 9.074353302215755e-06, + "loss": 0.6042, "step": 11834 }, { - "epoch": 0.68, - "grad_norm": 0.5737718239366725, - "learning_rate": 4.907888069916234e-06, - "loss": 0.317, + "epoch": 0.54, + "grad_norm": 0.2790419103783699, + "learning_rate": 9.072871781363125e-06, + "loss": 0.1645, "step": 11835 }, { - "epoch": 0.68, - "grad_norm": 1.2137512039405558, - "learning_rate": 4.906286579299563e-06, - "loss": 0.6646, + "epoch": 0.54, + "grad_norm": 0.30682098628181514, + "learning_rate": 9.071390281036225e-06, + "loss": 0.2557, "step": 11836 }, { - "epoch": 0.68, - "grad_norm": 0.36728729290760725, - "learning_rate": 4.904685265077969e-06, - "loss": 0.19, + "epoch": 0.54, + "grad_norm": 1.5603228155869955, + "learning_rate": 9.069908801267853e-06, + "loss": 0.8318, "step": 11837 }, { - "epoch": 0.68, - "grad_norm": 0.30145925210114205, - "learning_rate": 4.903084127306901e-06, - "loss": 0.2932, + "epoch": 0.54, + "grad_norm": 0.4194742067223031, + "learning_rate": 9.0684273420908e-06, + "loss": 0.3008, "step": 11838 }, { - "epoch": 0.68, - "grad_norm": 0.44813104620717403, - "learning_rate": 4.901483166041815e-06, - "loss": 0.272, + "epoch": 0.54, + "grad_norm": 0.5475818016228682, + "learning_rate": 9.066945903537862e-06, + "loss": 0.2909, "step": 11839 }, { - "epoch": 0.68, - "grad_norm": 0.3188875124250454, - "learning_rate": 4.899882381338147e-06, - "loss": 0.2182, + "epoch": 0.54, + "grad_norm": 0.37007775103491114, + "learning_rate": 9.065464485641849e-06, + "loss": 0.3128, "step": 11840 }, { - "epoch": 0.68, - "grad_norm": 0.3501168527999765, - "learning_rate": 4.898281773251333e-06, - "loss": 0.254, + "epoch": 0.54, + "grad_norm": 0.2918843392682149, + "learning_rate": 9.06398308843555e-06, + "loss": 0.227, "step": 11841 }, { - "epoch": 0.68, - "grad_norm": 0.3725052385902562, - "learning_rate": 4.896681341836798e-06, - "loss": 0.3149, + "epoch": 0.54, + "grad_norm": 0.300116940729314, + "learning_rate": 9.062501711951766e-06, + "loss": 0.1426, "step": 11842 }, { - "epoch": 0.68, - "grad_norm": 0.3919431187346392, - "learning_rate": 4.895081087149974e-06, - "loss": 0.2764, + "epoch": 0.54, + "grad_norm": 0.4535662949441457, + "learning_rate": 9.061020356223285e-06, + "loss": 0.3749, "step": 11843 }, { - "epoch": 0.68, - "grad_norm": 0.447679970754077, - "learning_rate": 4.8934810092462705e-06, - "loss": 0.283, + "epoch": 0.54, + "grad_norm": 0.36144442444833325, + "learning_rate": 9.059539021282913e-06, + "loss": 0.2937, "step": 11844 }, { - "epoch": 0.68, - "grad_norm": 0.27528776173796865, - "learning_rate": 4.891881108181101e-06, - "loss": 0.1694, + "epoch": 0.54, + "grad_norm": 0.43987568205387334, + "learning_rate": 9.05805770716344e-06, + "loss": 0.2907, "step": 11845 }, { - "epoch": 0.68, - "grad_norm": 0.2515033888633789, - "learning_rate": 4.890281384009865e-06, - "loss": 0.2506, + "epoch": 0.54, + "grad_norm": 1.2693595244816196, + "learning_rate": 9.05657641389766e-06, + "loss": 0.5362, "step": 11846 }, { - "epoch": 0.68, - "grad_norm": 0.4647181444523978, - "learning_rate": 4.8886818367879686e-06, - "loss": 0.2796, + "epoch": 0.54, + "grad_norm": 0.33835908791600655, + "learning_rate": 9.05509514151837e-06, + "loss": 0.194, "step": 11847 }, { - "epoch": 0.68, - "grad_norm": 0.6205120870967442, - "learning_rate": 4.8870824665708e-06, - "loss": 0.4248, + "epoch": 0.54, + "grad_norm": 0.21939623798901306, + "learning_rate": 9.053613890058362e-06, + "loss": 0.2079, "step": 11848 }, { - "epoch": 0.68, - "grad_norm": 0.6572729053267969, - "learning_rate": 4.885483273413747e-06, - "loss": 0.3524, + "epoch": 0.54, + "grad_norm": 0.6067823500047931, + "learning_rate": 9.052132659550431e-06, + "loss": 0.4281, "step": 11849 }, { - "epoch": 0.68, - "grad_norm": 0.26634984300357595, - "learning_rate": 4.883884257372188e-06, - "loss": 0.2635, + "epoch": 0.54, + "grad_norm": 0.598036160362092, + "learning_rate": 9.05065145002737e-06, + "loss": 0.337, "step": 11850 }, { - "epoch": 0.68, - "grad_norm": 0.21217690977582984, - "learning_rate": 4.882285418501497e-06, - "loss": 0.1465, + "epoch": 0.54, + "grad_norm": 0.4178477164980959, + "learning_rate": 9.04917026152197e-06, + "loss": 0.3061, "step": 11851 }, { - "epoch": 0.68, - "grad_norm": 0.823238938356525, - "learning_rate": 4.88068675685704e-06, - "loss": 0.5252, + "epoch": 0.54, + "grad_norm": 0.3538169125567631, + "learning_rate": 9.04768909406702e-06, + "loss": 0.2505, "step": 11852 }, { - "epoch": 0.68, - "grad_norm": 0.346625383613797, - "learning_rate": 4.879088272494184e-06, - "loss": 0.2784, + "epoch": 0.54, + "grad_norm": 0.5843021237082427, + "learning_rate": 9.046207947695321e-06, + "loss": 0.2633, "step": 11853 }, { - "epoch": 0.68, - "grad_norm": 0.3474056837835713, - "learning_rate": 4.877489965468274e-06, - "loss": 0.2554, + "epoch": 0.54, + "grad_norm": 0.24799342040782396, + "learning_rate": 9.044726822439658e-06, + "loss": 0.2028, "step": 11854 }, { - "epoch": 0.68, - "grad_norm": 0.6079999000177458, - "learning_rate": 4.875891835834672e-06, - "loss": 0.3113, + "epoch": 0.54, + "grad_norm": 0.5474643161308999, + "learning_rate": 9.043245718332821e-06, + "loss": 0.2846, "step": 11855 }, { - "epoch": 0.68, - "grad_norm": 0.37799226612572256, - "learning_rate": 4.874293883648714e-06, - "loss": 0.3038, + "epoch": 0.54, + "grad_norm": 0.3481956129383209, + "learning_rate": 9.041764635407602e-06, + "loss": 0.2926, "step": 11856 }, { - "epoch": 0.68, - "grad_norm": 0.43480049777111607, - "learning_rate": 4.8726961089657385e-06, - "loss": 0.1556, + "epoch": 0.54, + "grad_norm": 0.4957189851958987, + "learning_rate": 9.040283573696791e-06, + "loss": 0.3414, "step": 11857 }, { - "epoch": 0.68, - "grad_norm": 0.255060595881029, - "learning_rate": 4.871098511841073e-06, - "loss": 0.2094, + "epoch": 0.54, + "grad_norm": 0.9560524433233588, + "learning_rate": 9.038802533233178e-06, + "loss": 0.3027, "step": 11858 }, { - "epoch": 0.68, - "grad_norm": 0.37794696995047256, - "learning_rate": 4.8695010923300505e-06, - "loss": 0.2933, + "epoch": 0.54, + "grad_norm": 0.31861216303255263, + "learning_rate": 9.037321514049549e-06, + "loss": 0.2266, "step": 11859 }, { - "epoch": 0.68, - "grad_norm": 0.703634639952293, - "learning_rate": 4.867903850487983e-06, - "loss": 0.3383, + "epoch": 0.54, + "grad_norm": 0.32814070314485694, + "learning_rate": 9.035840516178695e-06, + "loss": 0.2838, "step": 11860 }, { - "epoch": 0.68, - "grad_norm": 0.5711648018980142, - "learning_rate": 4.866306786370184e-06, - "loss": 0.3568, + "epoch": 0.54, + "grad_norm": 0.50559648032967, + "learning_rate": 9.034359539653399e-06, + "loss": 0.3059, "step": 11861 }, { - "epoch": 0.68, - "grad_norm": 0.3153893572774383, - "learning_rate": 4.864709900031961e-06, - "loss": 0.3094, + "epoch": 0.54, + "grad_norm": 0.40011584883822304, + "learning_rate": 9.032878584506455e-06, + "loss": 0.2205, "step": 11862 }, { - "epoch": 0.68, - "grad_norm": 0.46738359154640297, - "learning_rate": 4.86311319152861e-06, - "loss": 0.3345, + "epoch": 0.54, + "grad_norm": 0.5550201101669583, + "learning_rate": 9.031397650770648e-06, + "loss": 0.3756, "step": 11863 }, { - "epoch": 0.68, - "grad_norm": 0.174118788912665, - "learning_rate": 4.8615166609154315e-06, - "loss": 0.1225, + "epoch": 0.55, + "grad_norm": 0.38205764702113265, + "learning_rate": 9.029916738478762e-06, + "loss": 0.3064, "step": 11864 }, { - "epoch": 0.68, - "grad_norm": 0.5155715979717324, - "learning_rate": 4.85992030824771e-06, - "loss": 0.3517, + "epoch": 0.55, + "grad_norm": 0.6132049096025071, + "learning_rate": 9.02843584766358e-06, + "loss": 0.1673, "step": 11865 }, { - "epoch": 0.68, - "grad_norm": 0.39699707131238365, - "learning_rate": 4.858324133580727e-06, - "loss": 0.3108, + "epoch": 0.55, + "grad_norm": 0.29098658353015133, + "learning_rate": 9.0269549783579e-06, + "loss": 0.2281, "step": 11866 }, { - "epoch": 0.68, - "grad_norm": 0.723771118865917, - "learning_rate": 4.856728136969755e-06, - "loss": 0.265, + "epoch": 0.55, + "grad_norm": 0.4622545108670501, + "learning_rate": 9.025474130594495e-06, + "loss": 0.3512, "step": 11867 }, { - "epoch": 0.68, - "grad_norm": 0.34812703384012267, - "learning_rate": 4.85513231847007e-06, - "loss": 0.258, + "epoch": 0.55, + "grad_norm": 0.35739991410872585, + "learning_rate": 9.023993304406156e-06, + "loss": 0.2129, "step": 11868 }, { - "epoch": 0.68, - "grad_norm": 0.46778392750169046, - "learning_rate": 4.853536678136932e-06, - "loss": 0.3343, + "epoch": 0.55, + "grad_norm": 0.3094439430048366, + "learning_rate": 9.022512499825658e-06, + "loss": 0.2452, "step": 11869 }, { - "epoch": 0.68, - "grad_norm": 0.26524386390299387, - "learning_rate": 4.851941216025597e-06, - "loss": 0.1666, + "epoch": 0.55, + "grad_norm": 1.1004926512280166, + "learning_rate": 9.021031716885797e-06, + "loss": 0.5324, "step": 11870 }, { - "epoch": 0.68, - "grad_norm": 0.3529335727394295, - "learning_rate": 4.850345932191313e-06, - "loss": 0.2873, + "epoch": 0.55, + "grad_norm": 0.3323659043326884, + "learning_rate": 9.01955095561935e-06, + "loss": 0.2108, "step": 11871 }, { - "epoch": 0.68, - "grad_norm": 0.6736113996854561, - "learning_rate": 4.848750826689332e-06, - "loss": 0.373, + "epoch": 0.55, + "grad_norm": 0.2651446781288627, + "learning_rate": 9.018070216059098e-06, + "loss": 0.256, "step": 11872 }, { - "epoch": 0.68, - "grad_norm": 0.2454207027025233, - "learning_rate": 4.8471558995748865e-06, - "loss": 0.1576, + "epoch": 0.55, + "grad_norm": 0.7384624686624097, + "learning_rate": 9.016589498237825e-06, + "loss": 0.4374, "step": 11873 }, { - "epoch": 0.68, - "grad_norm": 0.29709806190197335, - "learning_rate": 4.845561150903212e-06, - "loss": 0.2612, + "epoch": 0.55, + "grad_norm": 0.8289305177604985, + "learning_rate": 9.015108802188314e-06, + "loss": 0.5241, "step": 11874 }, { - "epoch": 0.68, - "grad_norm": 1.1809594524008666, - "learning_rate": 4.843966580729533e-06, - "loss": 0.6127, + "epoch": 0.55, + "grad_norm": 0.2843442724573891, + "learning_rate": 9.013628127943345e-06, + "loss": 0.1742, "step": 11875 }, { - "epoch": 0.68, - "grad_norm": 0.6862481533740115, - "learning_rate": 4.842372189109066e-06, - "loss": 0.4077, + "epoch": 0.55, + "grad_norm": 0.39186448912650274, + "learning_rate": 9.012147475535698e-06, + "loss": 0.2955, "step": 11876 }, { - "epoch": 0.68, - "grad_norm": 0.6072253682769295, - "learning_rate": 4.840777976097032e-06, - "loss": 0.2227, + "epoch": 0.55, + "grad_norm": 0.6189472330096473, + "learning_rate": 9.010666844998154e-06, + "loss": 0.3162, "step": 11877 }, { - "epoch": 0.68, - "grad_norm": 0.35301852333241995, - "learning_rate": 4.839183941748635e-06, - "loss": 0.3078, + "epoch": 0.55, + "grad_norm": 0.44327473998813544, + "learning_rate": 9.00918623636349e-06, + "loss": 0.2531, "step": 11878 }, { - "epoch": 0.68, - "grad_norm": 0.2620722298074846, - "learning_rate": 4.837590086119076e-06, - "loss": 0.1697, + "epoch": 0.55, + "grad_norm": 0.5206410581033497, + "learning_rate": 9.007705649664491e-06, + "loss": 0.3643, "step": 11879 }, { - "epoch": 0.68, - "grad_norm": 0.32669040181183934, - "learning_rate": 4.835996409263546e-06, - "loss": 0.2134, + "epoch": 0.55, + "grad_norm": 0.4267329918750843, + "learning_rate": 9.006225084933932e-06, + "loss": 0.312, "step": 11880 }, { - "epoch": 0.68, - "grad_norm": 1.089646351240924, - "learning_rate": 4.834402911237243e-06, - "loss": 0.4845, + "epoch": 0.55, + "grad_norm": 0.2917122436907841, + "learning_rate": 9.004744542204593e-06, + "loss": 0.1882, "step": 11881 }, { - "epoch": 0.68, - "grad_norm": 0.29818673852673244, - "learning_rate": 4.832809592095344e-06, - "loss": 0.2822, + "epoch": 0.55, + "grad_norm": 0.5611276748425978, + "learning_rate": 9.003264021509243e-06, + "loss": 0.2842, "step": 11882 }, { - "epoch": 0.68, - "grad_norm": 0.329051440364525, - "learning_rate": 4.831216451893027e-06, - "loss": 0.2178, + "epoch": 0.55, + "grad_norm": 0.3987042029575161, + "learning_rate": 9.001783522880675e-06, + "loss": 0.3232, "step": 11883 }, { - "epoch": 0.68, - "grad_norm": 0.6968295293375228, - "learning_rate": 4.829623490685459e-06, - "loss": 0.4373, + "epoch": 0.55, + "grad_norm": 0.33095713034068386, + "learning_rate": 9.000303046351658e-06, + "loss": 0.2285, "step": 11884 }, { - "epoch": 0.68, - "grad_norm": 0.23540721441096973, - "learning_rate": 4.828030708527814e-06, - "loss": 0.1897, + "epoch": 0.55, + "grad_norm": 0.8358069353286274, + "learning_rate": 8.998822591954967e-06, + "loss": 0.4945, "step": 11885 }, { - "epoch": 0.68, - "grad_norm": 0.34736558596523004, - "learning_rate": 4.826438105475239e-06, - "loss": 0.2225, + "epoch": 0.55, + "grad_norm": 1.0681155620055311, + "learning_rate": 8.997342159723372e-06, + "loss": 0.6454, "step": 11886 }, { - "epoch": 0.68, - "grad_norm": 1.0944632104089678, - "learning_rate": 4.824845681582892e-06, - "loss": 0.5581, + "epoch": 0.55, + "grad_norm": 0.2500990833033169, + "learning_rate": 8.995861749689662e-06, + "loss": 0.1979, "step": 11887 }, { - "epoch": 0.68, - "grad_norm": 0.8324240460651077, - "learning_rate": 4.82325343690591e-06, - "loss": 0.414, + "epoch": 0.55, + "grad_norm": 0.25660745447452915, + "learning_rate": 8.994381361886603e-06, + "loss": 0.1879, "step": 11888 }, { - "epoch": 0.68, - "grad_norm": 0.32586291593223726, - "learning_rate": 4.821661371499444e-06, - "loss": 0.2691, + "epoch": 0.55, + "grad_norm": 0.7077093096196782, + "learning_rate": 8.992900996346973e-06, + "loss": 0.3951, "step": 11889 }, { - "epoch": 0.68, - "grad_norm": 0.33201111450208354, - "learning_rate": 4.820069485418622e-06, - "loss": 0.259, + "epoch": 0.55, + "grad_norm": 0.3565319964044789, + "learning_rate": 8.991420653103546e-06, + "loss": 0.2909, "step": 11890 }, { - "epoch": 0.68, - "grad_norm": 0.2894218031621524, - "learning_rate": 4.818477778718571e-06, - "loss": 0.1965, + "epoch": 0.55, + "grad_norm": 0.44688066210211536, + "learning_rate": 8.98994033218909e-06, + "loss": 0.2719, "step": 11891 }, { - "epoch": 0.68, - "grad_norm": 0.34646058218222653, - "learning_rate": 4.8168862514544075e-06, - "loss": 0.2612, + "epoch": 0.55, + "grad_norm": 0.44271720189703945, + "learning_rate": 8.98846003363638e-06, + "loss": 0.3167, "step": 11892 }, { - "epoch": 0.68, - "grad_norm": 0.9011059324528964, - "learning_rate": 4.815294903681254e-06, - "loss": 0.0465, + "epoch": 0.55, + "grad_norm": 0.30369461949143023, + "learning_rate": 8.986979757478195e-06, + "loss": 0.2227, "step": 11893 }, { - "epoch": 0.68, - "grad_norm": 0.38189133211873966, - "learning_rate": 4.813703735454216e-06, - "loss": 0.2818, + "epoch": 0.55, + "grad_norm": 0.42038434180472234, + "learning_rate": 8.985499503747298e-06, + "loss": 0.1844, "step": 11894 }, { - "epoch": 0.68, - "grad_norm": 0.37774095648872624, - "learning_rate": 4.812112746828394e-06, - "loss": 0.2809, + "epoch": 0.55, + "grad_norm": 0.3500522380221386, + "learning_rate": 8.984019272476465e-06, + "loss": 0.3086, "step": 11895 }, { - "epoch": 0.68, - "grad_norm": 0.9196950014411767, - "learning_rate": 4.810521937858881e-06, - "loss": 0.5367, + "epoch": 0.55, + "grad_norm": 0.39151272476517607, + "learning_rate": 8.982539063698468e-06, + "loss": 0.2803, "step": 11896 }, { - "epoch": 0.68, - "grad_norm": 0.25821957577363874, - "learning_rate": 4.808931308600774e-06, - "loss": 0.2529, + "epoch": 0.55, + "grad_norm": 0.6653153442715919, + "learning_rate": 8.981058877446073e-06, + "loss": 0.3167, "step": 11897 }, { - "epoch": 0.68, - "grad_norm": 0.33995182083802594, - "learning_rate": 4.807340859109152e-06, - "loss": 0.2719, + "epoch": 0.55, + "grad_norm": 0.6027038682025772, + "learning_rate": 8.979578713752055e-06, + "loss": 0.4087, "step": 11898 }, { - "epoch": 0.68, - "grad_norm": 0.408344759125669, - "learning_rate": 4.805750589439092e-06, - "loss": 0.1797, + "epoch": 0.55, + "grad_norm": 0.40504333571494705, + "learning_rate": 8.978098572649176e-06, + "loss": 0.3019, "step": 11899 }, { - "epoch": 0.68, - "grad_norm": 0.577276240726866, - "learning_rate": 4.804160499645667e-06, - "loss": 0.3866, + "epoch": 0.55, + "grad_norm": 0.2592786579362508, + "learning_rate": 8.976618454170212e-06, + "loss": 0.2296, "step": 11900 }, { - "epoch": 0.68, - "grad_norm": 0.33678995159423364, - "learning_rate": 4.802570589783937e-06, - "loss": 0.2425, + "epoch": 0.55, + "grad_norm": 0.6916368474250763, + "learning_rate": 8.975138358347931e-06, + "loss": 0.3107, "step": 11901 }, { - "epoch": 0.68, - "grad_norm": 0.3749231267150535, - "learning_rate": 4.800980859908967e-06, - "loss": 0.3353, + "epoch": 0.55, + "grad_norm": 0.362077249581779, + "learning_rate": 8.973658285215101e-06, + "loss": 0.2634, "step": 11902 }, { - "epoch": 0.68, - "grad_norm": 0.39713015624552944, - "learning_rate": 4.799391310075806e-06, - "loss": 0.1311, + "epoch": 0.55, + "grad_norm": 0.3821333947443045, + "learning_rate": 8.97217823480448e-06, + "loss": 0.3107, "step": 11903 }, { - "epoch": 0.68, - "grad_norm": 0.2695977078762357, - "learning_rate": 4.7978019403395e-06, - "loss": 0.2272, + "epoch": 0.55, + "grad_norm": 0.9189977796612226, + "learning_rate": 8.970698207148848e-06, + "loss": 0.5339, "step": 11904 }, { - "epoch": 0.68, - "grad_norm": 0.5527853027413822, - "learning_rate": 4.796212750755087e-06, - "loss": 0.3386, + "epoch": 0.55, + "grad_norm": 0.3655146397281494, + "learning_rate": 8.969218202280964e-06, + "loss": 0.2548, "step": 11905 }, { - "epoch": 0.68, - "grad_norm": 0.5264787695999982, - "learning_rate": 4.794623741377605e-06, - "loss": 0.2782, + "epoch": 0.55, + "grad_norm": 0.3911935267797988, + "learning_rate": 8.967738220233597e-06, + "loss": 0.2042, "step": 11906 }, { - "epoch": 0.68, - "grad_norm": 0.3201557404278092, - "learning_rate": 4.79303491226208e-06, - "loss": 0.2416, + "epoch": 0.55, + "grad_norm": 0.37758109943956747, + "learning_rate": 8.96625826103951e-06, + "loss": 0.2734, "step": 11907 }, { - "epoch": 0.68, - "grad_norm": 1.1192785579323365, - "learning_rate": 4.791446263463531e-06, - "loss": 0.7994, + "epoch": 0.55, + "grad_norm": 0.35640658064075487, + "learning_rate": 8.964778324731467e-06, + "loss": 0.2438, "step": 11908 }, { - "epoch": 0.68, - "grad_norm": 0.26545596616417333, - "learning_rate": 4.7898577950369704e-06, - "loss": 0.2056, + "epoch": 0.55, + "grad_norm": 1.6368110205812663, + "learning_rate": 8.963298411342236e-06, + "loss": 0.7069, "step": 11909 }, { - "epoch": 0.68, - "grad_norm": 0.24110527748871047, - "learning_rate": 4.788269507037415e-06, - "loss": 0.1904, + "epoch": 0.55, + "grad_norm": 0.6983923990929997, + "learning_rate": 8.96181852090458e-06, + "loss": 0.2931, "step": 11910 }, { - "epoch": 0.68, - "grad_norm": 0.8106310137327224, - "learning_rate": 4.786681399519862e-06, - "loss": 0.416, + "epoch": 0.55, + "grad_norm": 0.2793484679495848, + "learning_rate": 8.96033865345126e-06, + "loss": 0.2654, "step": 11911 }, { - "epoch": 0.68, - "grad_norm": 0.7495867803826378, - "learning_rate": 4.785093472539307e-06, - "loss": 0.3306, + "epoch": 0.55, + "grad_norm": 0.6899307236122483, + "learning_rate": 8.958858809015036e-06, + "loss": 0.3966, "step": 11912 }, { - "epoch": 0.68, - "grad_norm": 0.2882924469905718, - "learning_rate": 4.783505726150738e-06, - "loss": 0.2652, + "epoch": 0.55, + "grad_norm": 0.34828482576864805, + "learning_rate": 8.957378987628682e-06, + "loss": 0.2154, "step": 11913 }, { - "epoch": 0.68, - "grad_norm": 0.4408666504381776, - "learning_rate": 4.781918160409145e-06, - "loss": 0.3201, + "epoch": 0.55, + "grad_norm": 0.32251749757909354, + "learning_rate": 8.95589918932495e-06, + "loss": 0.1842, "step": 11914 }, { - "epoch": 0.68, - "grad_norm": 0.4572909264973247, - "learning_rate": 4.780330775369501e-06, - "loss": 0.2594, + "epoch": 0.55, + "grad_norm": 0.39425689691051585, + "learning_rate": 8.954419414136602e-06, + "loss": 0.3082, "step": 11915 }, { - "epoch": 0.68, - "grad_norm": 0.2702213205694998, - "learning_rate": 4.778743571086779e-06, - "loss": 0.1873, + "epoch": 0.55, + "grad_norm": 1.197984969771984, + "learning_rate": 8.952939662096397e-06, + "loss": 0.7476, "step": 11916 }, { - "epoch": 0.68, - "grad_norm": 0.5390198271391285, - "learning_rate": 4.777156547615942e-06, - "loss": 0.2586, + "epoch": 0.55, + "grad_norm": 0.3085798553802743, + "learning_rate": 8.951459933237103e-06, + "loss": 0.1882, "step": 11917 }, { - "epoch": 0.68, - "grad_norm": 0.5337637998050275, - "learning_rate": 4.775569705011945e-06, - "loss": 0.361, + "epoch": 0.55, + "grad_norm": 0.7431781619259294, + "learning_rate": 8.949980227591476e-06, + "loss": 0.4028, "step": 11918 }, { - "epoch": 0.68, - "grad_norm": 0.30304924911307957, - "learning_rate": 4.773983043329753e-06, - "loss": 0.1909, + "epoch": 0.55, + "grad_norm": 0.2949509626744957, + "learning_rate": 8.948500545192274e-06, + "loss": 0.2528, "step": 11919 }, { - "epoch": 0.68, - "grad_norm": 1.1874428884612134, - "learning_rate": 4.7723965626243e-06, - "loss": 0.7611, + "epoch": 0.55, + "grad_norm": 0.24006285337238, + "learning_rate": 8.947020886072258e-06, + "loss": 0.1675, "step": 11920 }, { - "epoch": 0.68, - "grad_norm": 0.33172829141311116, - "learning_rate": 4.770810262950524e-06, - "loss": 0.2931, + "epoch": 0.55, + "grad_norm": 1.2160646409147715, + "learning_rate": 8.945541250264182e-06, + "loss": 0.6558, "step": 11921 }, { - "epoch": 0.68, - "grad_norm": 0.31447195769036773, - "learning_rate": 4.769224144363368e-06, - "loss": 0.2002, + "epoch": 0.55, + "grad_norm": 1.1450981483073202, + "learning_rate": 8.944061637800808e-06, + "loss": 0.6818, "step": 11922 }, { - "epoch": 0.69, - "grad_norm": 0.49881291519374765, - "learning_rate": 4.767638206917755e-06, - "loss": 0.2958, + "epoch": 0.55, + "grad_norm": 0.30884588531532053, + "learning_rate": 8.942582048714891e-06, + "loss": 0.2287, "step": 11923 }, { - "epoch": 0.69, - "grad_norm": 0.3918596183860913, - "learning_rate": 4.766052450668606e-06, - "loss": 0.2039, + "epoch": 0.55, + "grad_norm": 0.6349107866218481, + "learning_rate": 8.941102483039188e-06, + "loss": 0.3514, "step": 11924 }, { - "epoch": 0.69, - "grad_norm": 0.27808781938656596, - "learning_rate": 4.764466875670836e-06, - "loss": 0.2249, + "epoch": 0.55, + "grad_norm": 0.27052262468975724, + "learning_rate": 8.939622940806456e-06, + "loss": 0.1456, "step": 11925 }, { - "epoch": 0.69, - "grad_norm": 0.4956137686581436, - "learning_rate": 4.762881481979349e-06, - "loss": 0.3887, + "epoch": 0.55, + "grad_norm": 0.32268023987257255, + "learning_rate": 8.93814342204945e-06, + "loss": 0.2583, "step": 11926 }, { - "epoch": 0.69, - "grad_norm": 1.111755849842336, - "learning_rate": 4.761296269649054e-06, - "loss": 0.4972, + "epoch": 0.55, + "grad_norm": 0.37183543159288124, + "learning_rate": 8.936663926800926e-06, + "loss": 0.2427, "step": 11927 }, { - "epoch": 0.69, - "grad_norm": 0.30684184917618834, - "learning_rate": 4.759711238734844e-06, - "loss": 0.2521, + "epoch": 0.55, + "grad_norm": 0.8572066118160747, + "learning_rate": 8.935184455093637e-06, + "loss": 0.5737, "step": 11928 }, { - "epoch": 0.69, - "grad_norm": 0.4735692273389909, - "learning_rate": 4.75812638929161e-06, - "loss": 0.2749, + "epoch": 0.55, + "grad_norm": 0.3620050591650081, + "learning_rate": 8.933705006960333e-06, + "loss": 0.2892, "step": 11929 }, { - "epoch": 0.69, - "grad_norm": 0.2613868405056514, - "learning_rate": 4.756541721374228e-06, - "loss": 0.2068, + "epoch": 0.55, + "grad_norm": 0.7461454013971124, + "learning_rate": 8.932225582433779e-06, + "loss": 0.3354, "step": 11930 }, { - "epoch": 0.69, - "grad_norm": 0.33203601499476443, - "learning_rate": 4.7549572350375864e-06, - "loss": 0.248, + "epoch": 0.55, + "grad_norm": 0.26971288129210313, + "learning_rate": 8.930746181546723e-06, + "loss": 0.238, "step": 11931 }, { - "epoch": 0.69, - "grad_norm": 0.9132904905924238, - "learning_rate": 4.753372930336548e-06, - "loss": 0.4699, + "epoch": 0.55, + "grad_norm": 0.41491734911341543, + "learning_rate": 8.929266804331912e-06, + "loss": 0.2765, "step": 11932 }, { - "epoch": 0.69, - "grad_norm": 0.32872137130223583, - "learning_rate": 4.751788807325981e-06, - "loss": 0.2959, + "epoch": 0.55, + "grad_norm": 0.28769518004868905, + "learning_rate": 8.927787450822099e-06, + "loss": 0.1758, "step": 11933 }, { - "epoch": 0.69, - "grad_norm": 0.396983128779614, - "learning_rate": 4.750204866060738e-06, - "loss": 0.2726, + "epoch": 0.55, + "grad_norm": 0.5474531416647711, + "learning_rate": 8.926308121050042e-06, + "loss": 0.3458, "step": 11934 }, { - "epoch": 0.69, - "grad_norm": 0.5185433454178232, - "learning_rate": 4.748621106595679e-06, - "loss": 0.2699, + "epoch": 0.55, + "grad_norm": 0.46994000017203785, + "learning_rate": 8.92482881504849e-06, + "loss": 0.304, "step": 11935 }, { - "epoch": 0.69, - "grad_norm": 0.2455213486640146, - "learning_rate": 4.747037528985644e-06, - "loss": 0.1981, + "epoch": 0.55, + "grad_norm": 0.40049241422106846, + "learning_rate": 8.923349532850191e-06, + "loss": 0.2795, "step": 11936 }, { - "epoch": 0.69, - "grad_norm": 0.4123647936906814, - "learning_rate": 4.745454133285474e-06, - "loss": 0.2777, + "epoch": 0.55, + "grad_norm": 0.30971522364837467, + "learning_rate": 8.921870274487896e-06, + "loss": 0.1903, "step": 11937 }, { - "epoch": 0.69, - "grad_norm": 0.3633484612973121, - "learning_rate": 4.743870919549998e-06, - "loss": 0.3307, + "epoch": 0.55, + "grad_norm": 0.45800706148017134, + "learning_rate": 8.92039103999435e-06, + "loss": 0.314, "step": 11938 }, { - "epoch": 0.69, - "grad_norm": 0.6143787094869488, - "learning_rate": 4.74228788783405e-06, - "loss": 0.3097, + "epoch": 0.55, + "grad_norm": 0.3191680637813942, + "learning_rate": 8.918911829402311e-06, + "loss": 0.2918, "step": 11939 }, { - "epoch": 0.69, - "grad_norm": 0.3848915895137807, - "learning_rate": 4.740705038192444e-06, - "loss": 0.2994, + "epoch": 0.55, + "grad_norm": 0.8715304630563817, + "learning_rate": 8.917432642744519e-06, + "loss": 0.3607, "step": 11940 }, { - "epoch": 0.69, - "grad_norm": 0.303569188063324, - "learning_rate": 4.7391223706799994e-06, - "loss": 0.2996, + "epoch": 0.55, + "grad_norm": 0.3297736574001783, + "learning_rate": 8.915953480053726e-06, + "loss": 0.2782, "step": 11941 }, { - "epoch": 0.69, - "grad_norm": 0.22025983968170426, - "learning_rate": 4.73753988535152e-06, - "loss": 0.0816, + "epoch": 0.55, + "grad_norm": 0.7901504814030177, + "learning_rate": 8.914474341362677e-06, + "loss": 0.4654, "step": 11942 }, { - "epoch": 0.69, - "grad_norm": 0.3947879607288585, - "learning_rate": 4.735957582261803e-06, - "loss": 0.2905, + "epoch": 0.55, + "grad_norm": 0.3385953701893658, + "learning_rate": 8.91299522670412e-06, + "loss": 0.2559, "step": 11943 }, { - "epoch": 0.69, - "grad_norm": 0.5295208092043296, - "learning_rate": 4.7343754614656536e-06, - "loss": 0.3748, + "epoch": 0.55, + "grad_norm": 0.26240276168600873, + "learning_rate": 8.911516136110803e-06, + "loss": 0.2007, "step": 11944 }, { - "epoch": 0.69, - "grad_norm": 0.3434557916596737, - "learning_rate": 4.732793523017856e-06, - "loss": 0.2484, + "epoch": 0.55, + "grad_norm": 0.42579703963147303, + "learning_rate": 8.910037069615468e-06, + "loss": 0.275, "step": 11945 }, { - "epoch": 0.69, - "grad_norm": 0.4718786351336028, - "learning_rate": 4.73121176697319e-06, - "loss": 0.3152, + "epoch": 0.55, + "grad_norm": 0.534312683379889, + "learning_rate": 8.908558027250859e-06, + "loss": 0.3053, "step": 11946 }, { - "epoch": 0.69, - "grad_norm": 0.5218283957356089, - "learning_rate": 4.729630193386433e-06, - "loss": 0.3238, + "epoch": 0.55, + "grad_norm": 0.29425277504442326, + "learning_rate": 8.907079009049728e-06, + "loss": 0.2577, "step": 11947 }, { - "epoch": 0.69, - "grad_norm": 0.158404783519397, - "learning_rate": 4.728048802312358e-06, - "loss": 0.0912, + "epoch": 0.55, + "grad_norm": 1.2675648379049664, + "learning_rate": 8.905600015044813e-06, + "loss": 0.7705, "step": 11948 }, { - "epoch": 0.69, - "grad_norm": 0.27999339984469956, - "learning_rate": 4.726467593805726e-06, - "loss": 0.2589, + "epoch": 0.55, + "grad_norm": 0.3402363256162234, + "learning_rate": 8.904121045268862e-06, + "loss": 0.1329, "step": 11949 }, { - "epoch": 0.69, - "grad_norm": 0.5390317862693036, - "learning_rate": 4.724886567921295e-06, - "loss": 0.4137, + "epoch": 0.55, + "grad_norm": 0.3306486469876794, + "learning_rate": 8.902642099754611e-06, + "loss": 0.186, "step": 11950 }, { - "epoch": 0.69, - "grad_norm": 0.5653285205882527, - "learning_rate": 4.723305724713812e-06, - "loss": 0.3685, + "epoch": 0.55, + "grad_norm": 0.27497502828706905, + "learning_rate": 8.901163178534804e-06, + "loss": 0.2787, "step": 11951 }, { - "epoch": 0.69, - "grad_norm": 0.4366394060085917, - "learning_rate": 4.721725064238028e-06, - "loss": 0.1037, + "epoch": 0.55, + "grad_norm": 0.7767299915185887, + "learning_rate": 8.899684281642189e-06, + "loss": 0.4064, "step": 11952 }, { - "epoch": 0.69, - "grad_norm": 0.28421369940574165, - "learning_rate": 4.720144586548681e-06, - "loss": 0.2764, + "epoch": 0.55, + "grad_norm": 0.3731491520905725, + "learning_rate": 8.898205409109503e-06, + "loss": 0.1906, "step": 11953 }, { - "epoch": 0.69, - "grad_norm": 0.3944676448912269, - "learning_rate": 4.718564291700497e-06, - "loss": 0.2295, + "epoch": 0.55, + "grad_norm": 0.5400836506518383, + "learning_rate": 8.896726560969486e-06, + "loss": 0.3601, "step": 11954 }, { - "epoch": 0.69, - "grad_norm": 0.34010771117451527, - "learning_rate": 4.7169841797482005e-06, - "loss": 0.1541, + "epoch": 0.55, + "grad_norm": 0.35889372931241625, + "learning_rate": 8.89524773725488e-06, + "loss": 0.3097, "step": 11955 }, { - "epoch": 0.69, - "grad_norm": 0.3757324807553204, - "learning_rate": 4.7154042507465195e-06, - "loss": 0.2925, + "epoch": 0.55, + "grad_norm": 0.21806108303222296, + "learning_rate": 8.893768937998425e-06, + "loss": 0.1236, "step": 11956 }, { - "epoch": 0.69, - "grad_norm": 0.2737937072179737, - "learning_rate": 4.713824504750161e-06, - "loss": 0.2567, + "epoch": 0.55, + "grad_norm": 0.45110614749666156, + "learning_rate": 8.89229016323286e-06, + "loss": 0.2655, "step": 11957 }, { - "epoch": 0.69, - "grad_norm": 0.33085688299480903, - "learning_rate": 4.7122449418138325e-06, - "loss": 0.1568, + "epoch": 0.55, + "grad_norm": 0.9502639970997538, + "learning_rate": 8.890811412990923e-06, + "loss": 0.4785, "step": 11958 }, { - "epoch": 0.69, - "grad_norm": 0.5549992341677822, - "learning_rate": 4.710665561992232e-06, - "loss": 0.3311, + "epoch": 0.55, + "grad_norm": 0.28074274645875097, + "learning_rate": 8.88933268730535e-06, + "loss": 0.2193, "step": 11959 }, { - "epoch": 0.69, - "grad_norm": 0.43458610126481007, - "learning_rate": 4.709086365340057e-06, - "loss": 0.2455, + "epoch": 0.55, + "grad_norm": 0.5586783289622752, + "learning_rate": 8.887853986208883e-06, + "loss": 0.3813, "step": 11960 }, { - "epoch": 0.69, - "grad_norm": 0.24458519823897504, - "learning_rate": 4.707507351911995e-06, - "loss": 0.2202, + "epoch": 0.55, + "grad_norm": 1.1120165776800148, + "learning_rate": 8.886375309734257e-06, + "loss": 0.6174, "step": 11961 }, { - "epoch": 0.69, - "grad_norm": 0.4809860354275576, - "learning_rate": 4.705928521762726e-06, - "loss": 0.3509, + "epoch": 0.55, + "grad_norm": 0.2392298532593129, + "learning_rate": 8.884896657914208e-06, + "loss": 0.1611, "step": 11962 }, { - "epoch": 0.69, - "grad_norm": 0.6523562611627876, - "learning_rate": 4.7043498749469204e-06, - "loss": 0.2662, + "epoch": 0.55, + "grad_norm": 0.37356346499112647, + "learning_rate": 8.883418030781468e-06, + "loss": 0.2975, "step": 11963 }, { - "epoch": 0.69, - "grad_norm": 0.6412526764903204, - "learning_rate": 4.702771411519256e-06, - "loss": 0.3287, + "epoch": 0.55, + "grad_norm": 1.0204850220444457, + "learning_rate": 8.88193942836878e-06, + "loss": 0.4824, "step": 11964 }, { - "epoch": 0.69, - "grad_norm": 0.2694814585384461, - "learning_rate": 4.701193131534389e-06, - "loss": 0.2403, + "epoch": 0.55, + "grad_norm": 0.4394118431045616, + "learning_rate": 8.880460850708877e-06, + "loss": 0.2822, "step": 11965 }, { - "epoch": 0.69, - "grad_norm": 0.9540802506457705, - "learning_rate": 4.699615035046975e-06, - "loss": 0.5154, + "epoch": 0.55, + "grad_norm": 0.2964547840079321, + "learning_rate": 8.878982297834492e-06, + "loss": 0.1477, "step": 11966 }, { - "epoch": 0.69, - "grad_norm": 0.3832799734593423, - "learning_rate": 4.698037122111665e-06, - "loss": 0.3095, + "epoch": 0.55, + "grad_norm": 0.38670111706394944, + "learning_rate": 8.877503769778358e-06, + "loss": 0.3148, "step": 11967 }, { - "epoch": 0.69, - "grad_norm": 0.5151002559842178, - "learning_rate": 4.696459392783098e-06, - "loss": 0.2608, + "epoch": 0.55, + "grad_norm": 0.5321987397161865, + "learning_rate": 8.876025266573206e-06, + "loss": 0.3413, "step": 11968 }, { - "epoch": 0.69, - "grad_norm": 0.2970906609548814, - "learning_rate": 4.694881847115918e-06, - "loss": 0.2769, + "epoch": 0.55, + "grad_norm": 0.5038676985928764, + "learning_rate": 8.874546788251773e-06, + "loss": 0.281, "step": 11969 }, { - "epoch": 0.69, - "grad_norm": 0.3673952699013813, - "learning_rate": 4.69330448516475e-06, - "loss": 0.2208, + "epoch": 0.55, + "grad_norm": 0.30080831038884753, + "learning_rate": 8.87306833484679e-06, + "loss": 0.2583, "step": 11970 }, { - "epoch": 0.69, - "grad_norm": 0.3823198217572113, - "learning_rate": 4.691727306984222e-06, - "loss": 0.2333, + "epoch": 0.55, + "grad_norm": 0.9018782641473774, + "learning_rate": 8.871589906390988e-06, + "loss": 0.5578, "step": 11971 }, { - "epoch": 0.69, - "grad_norm": 0.4797977748585798, - "learning_rate": 4.690150312628944e-06, - "loss": 0.3882, + "epoch": 0.55, + "grad_norm": 0.2725778831370997, + "learning_rate": 8.870111502917097e-06, + "loss": 0.1848, "step": 11972 }, { - "epoch": 0.69, - "grad_norm": 0.42047099420085404, - "learning_rate": 4.688573502153536e-06, - "loss": 0.3205, + "epoch": 0.55, + "grad_norm": 1.3155925963290773, + "learning_rate": 8.868633124457848e-06, + "loss": 0.7015, "step": 11973 }, { - "epoch": 0.69, - "grad_norm": 0.35808423918623256, - "learning_rate": 4.6869968756126e-06, - "loss": 0.2417, + "epoch": 0.55, + "grad_norm": 0.5318628317206986, + "learning_rate": 8.867154771045973e-06, + "loss": 0.2976, "step": 11974 }, { - "epoch": 0.69, - "grad_norm": 0.2642813721183281, - "learning_rate": 4.685420433060732e-06, - "loss": 0.1715, + "epoch": 0.55, + "grad_norm": 0.3067103475537007, + "learning_rate": 8.865676442714199e-06, + "loss": 0.2714, "step": 11975 }, { - "epoch": 0.69, - "grad_norm": 0.42438116021427397, - "learning_rate": 4.683844174552523e-06, - "loss": 0.2682, + "epoch": 0.55, + "grad_norm": 0.6867981071016549, + "learning_rate": 8.86419813949525e-06, + "loss": 0.3627, "step": 11976 }, { - "epoch": 0.69, - "grad_norm": 0.2952126454257907, - "learning_rate": 4.682268100142567e-06, - "loss": 0.2697, + "epoch": 0.55, + "grad_norm": 0.34009171363857127, + "learning_rate": 8.862719861421866e-06, + "loss": 0.1915, "step": 11977 }, { - "epoch": 0.69, - "grad_norm": 1.3042987725864317, - "learning_rate": 4.680692209885436e-06, - "loss": 0.3308, + "epoch": 0.55, + "grad_norm": 0.3396337476704829, + "learning_rate": 8.861241608526768e-06, + "loss": 0.2512, "step": 11978 }, { - "epoch": 0.69, - "grad_norm": 0.50954991915773, - "learning_rate": 4.679116503835706e-06, - "loss": 0.3273, + "epoch": 0.55, + "grad_norm": 0.3508046945428364, + "learning_rate": 8.859763380842683e-06, + "loss": 0.2419, "step": 11979 }, { - "epoch": 0.69, - "grad_norm": 0.35979928673775013, - "learning_rate": 4.6775409820479415e-06, - "loss": 0.311, + "epoch": 0.55, + "grad_norm": 0.6718522790576175, + "learning_rate": 8.858285178402335e-06, + "loss": 0.3306, "step": 11980 }, { - "epoch": 0.69, - "grad_norm": 0.34865340009068274, - "learning_rate": 4.675965644576701e-06, - "loss": 0.2457, + "epoch": 0.55, + "grad_norm": 0.4577767900038282, + "learning_rate": 8.856807001238452e-06, + "loss": 0.3342, "step": 11981 }, { - "epoch": 0.69, - "grad_norm": 0.2343614065070297, - "learning_rate": 4.674390491476545e-06, - "loss": 0.161, + "epoch": 0.55, + "grad_norm": 0.3753635095980729, + "learning_rate": 8.855328849383761e-06, + "loss": 0.2585, "step": 11982 }, { - "epoch": 0.69, - "grad_norm": 0.39514869929001734, - "learning_rate": 4.672815522802018e-06, - "loss": 0.3022, + "epoch": 0.55, + "grad_norm": 0.4331005721413234, + "learning_rate": 8.853850722870989e-06, + "loss": 0.3018, "step": 11983 }, { - "epoch": 0.69, - "grad_norm": 0.42193017601516436, - "learning_rate": 4.671240738607659e-06, - "loss": 0.2649, + "epoch": 0.55, + "grad_norm": 0.3970536680822463, + "learning_rate": 8.852372621732856e-06, + "loss": 0.2683, "step": 11984 }, { - "epoch": 0.69, - "grad_norm": 0.3575772895676162, - "learning_rate": 4.669666138948001e-06, - "loss": 0.3064, + "epoch": 0.55, + "grad_norm": 0.4432730571369166, + "learning_rate": 8.850894546002085e-06, + "loss": 0.2231, "step": 11985 }, { - "epoch": 0.69, - "grad_norm": 0.37733439670751967, - "learning_rate": 4.668091723877584e-06, - "loss": 0.2944, + "epoch": 0.55, + "grad_norm": 0.4473410966693785, + "learning_rate": 8.849416495711403e-06, + "loss": 0.3042, "step": 11986 }, { - "epoch": 0.69, - "grad_norm": 0.363482865984895, - "learning_rate": 4.666517493450916e-06, - "loss": 0.1889, + "epoch": 0.55, + "grad_norm": 0.3608431350744341, + "learning_rate": 8.847938470893533e-06, + "loss": 0.2791, "step": 11987 }, { - "epoch": 0.69, - "grad_norm": 0.2920184473283872, - "learning_rate": 4.664943447722514e-06, - "loss": 0.1943, + "epoch": 0.55, + "grad_norm": 1.3340588083468055, + "learning_rate": 8.846460471581192e-06, + "loss": 0.8145, "step": 11988 }, { - "epoch": 0.69, - "grad_norm": 0.268324034496146, - "learning_rate": 4.6633695867468955e-06, - "loss": 0.2651, + "epoch": 0.55, + "grad_norm": 0.23646920895177737, + "learning_rate": 8.844982497807104e-06, + "loss": 0.1126, "step": 11989 }, { - "epoch": 0.69, - "grad_norm": 0.6601723005250368, - "learning_rate": 4.661795910578558e-06, - "loss": 0.382, + "epoch": 0.55, + "grad_norm": 0.2975044295581151, + "learning_rate": 8.843504549603993e-06, + "loss": 0.2323, "step": 11990 }, { - "epoch": 0.69, - "grad_norm": 0.6064327864030062, - "learning_rate": 4.660222419271999e-06, - "loss": 0.2746, + "epoch": 0.55, + "grad_norm": 0.35933584846402805, + "learning_rate": 8.842026627004574e-06, + "loss": 0.3341, "step": 11991 }, { - "epoch": 0.69, - "grad_norm": 0.3675959255251272, - "learning_rate": 4.658649112881709e-06, - "loss": 0.3073, + "epoch": 0.55, + "grad_norm": 0.42488731823978176, + "learning_rate": 8.84054873004157e-06, + "loss": 0.2275, "step": 11992 }, { - "epoch": 0.69, - "grad_norm": 0.36307823733203226, - "learning_rate": 4.657075991462165e-06, - "loss": 0.3006, + "epoch": 0.55, + "grad_norm": 0.423454274171741, + "learning_rate": 8.839070858747697e-06, + "loss": 0.3318, "step": 11993 }, { - "epoch": 0.69, - "grad_norm": 0.2183623643397165, - "learning_rate": 4.6555030550678544e-06, - "loss": 0.0842, + "epoch": 0.55, + "grad_norm": 0.4838204249276851, + "learning_rate": 8.83759301315568e-06, + "loss": 0.3821, "step": 11994 }, { - "epoch": 0.69, - "grad_norm": 0.35620400135270036, - "learning_rate": 4.6539303037532435e-06, - "loss": 0.2911, + "epoch": 0.55, + "grad_norm": 0.319655871467485, + "learning_rate": 8.836115193298232e-06, + "loss": 0.2025, "step": 11995 }, { - "epoch": 0.69, - "grad_norm": 0.9075387144091024, - "learning_rate": 4.652357737572796e-06, - "loss": 0.392, + "epoch": 0.55, + "grad_norm": 0.23942928131271157, + "learning_rate": 8.834637399208076e-06, + "loss": 0.1924, "step": 11996 }, { - "epoch": 0.69, - "grad_norm": 0.30833975615137077, - "learning_rate": 4.650785356580967e-06, - "loss": 0.2674, + "epoch": 0.55, + "grad_norm": 0.7282807717283084, + "learning_rate": 8.833159630917922e-06, + "loss": 0.4181, "step": 11997 }, { - "epoch": 0.69, - "grad_norm": 0.36252082852686, - "learning_rate": 4.649213160832213e-06, - "loss": 0.3089, + "epoch": 0.55, + "grad_norm": 0.29349185538462813, + "learning_rate": 8.831681888460484e-06, + "loss": 0.2456, "step": 11998 }, { - "epoch": 0.69, - "grad_norm": 1.0088006244221261, - "learning_rate": 4.647641150380978e-06, - "loss": 0.562, + "epoch": 0.55, + "grad_norm": 0.4030794767902304, + "learning_rate": 8.830204171868487e-06, + "loss": 0.281, "step": 11999 }, { - "epoch": 0.69, - "grad_norm": 0.17551431395096048, - "learning_rate": 4.6460693252817e-06, - "loss": 0.1311, + "epoch": 0.55, + "grad_norm": 0.9516729543701261, + "learning_rate": 8.828726481174643e-06, + "loss": 0.5827, "step": 12000 }, { - "epoch": 0.69, - "grad_norm": 0.32796221667677156, - "learning_rate": 4.644497685588808e-06, - "loss": 0.2919, + "epoch": 0.55, + "grad_norm": 0.42236630063156705, + "learning_rate": 8.827248816411666e-06, + "loss": 0.2993, "step": 12001 }, { - "epoch": 0.69, - "grad_norm": 1.211749796355846, - "learning_rate": 4.642926231356734e-06, - "loss": 0.3921, + "epoch": 0.55, + "grad_norm": 0.34156172398438306, + "learning_rate": 8.825771177612269e-06, + "loss": 0.2339, "step": 12002 }, { - "epoch": 0.69, - "grad_norm": 0.6840428587949849, - "learning_rate": 4.641354962639894e-06, - "loss": 0.4082, + "epoch": 0.55, + "grad_norm": 0.38401867620663854, + "learning_rate": 8.824293564809166e-06, + "loss": 0.3158, "step": 12003 }, { - "epoch": 0.69, - "grad_norm": 0.3276160490754549, - "learning_rate": 4.639783879492701e-06, - "loss": 0.2112, + "epoch": 0.55, + "grad_norm": 0.3107882646696273, + "learning_rate": 8.822815978035072e-06, + "loss": 0.1715, "step": 12004 }, { - "epoch": 0.69, - "grad_norm": 0.3450703208904191, - "learning_rate": 4.638212981969562e-06, - "loss": 0.2976, + "epoch": 0.55, + "grad_norm": 0.41587757530185343, + "learning_rate": 8.821338417322696e-06, + "loss": 0.2415, "step": 12005 }, { - "epoch": 0.69, - "grad_norm": 0.22299481634481944, - "learning_rate": 4.636642270124874e-06, - "loss": 0.1401, + "epoch": 0.55, + "grad_norm": 0.3781782068438, + "learning_rate": 8.819860882704751e-06, + "loss": 0.313, "step": 12006 }, { - "epoch": 0.69, - "grad_norm": 0.4736605076802758, - "learning_rate": 4.6350717440130366e-06, - "loss": 0.2279, + "epoch": 0.55, + "grad_norm": 1.1858673132868942, + "learning_rate": 8.81838337421395e-06, + "loss": 0.5966, "step": 12007 }, { - "epoch": 0.69, - "grad_norm": 0.6337811756742268, - "learning_rate": 4.633501403688434e-06, - "loss": 0.3114, + "epoch": 0.55, + "grad_norm": 0.3031844145490355, + "learning_rate": 8.816905891883002e-06, + "loss": 0.2046, "step": 12008 }, { - "epoch": 0.69, - "grad_norm": 1.0475717388652215, - "learning_rate": 4.631931249205447e-06, - "loss": 0.5844, + "epoch": 0.55, + "grad_norm": 0.3215837287830184, + "learning_rate": 8.81542843574462e-06, + "loss": 0.2309, "step": 12009 }, { - "epoch": 0.69, - "grad_norm": 0.29755432704987433, - "learning_rate": 4.630361280618446e-06, - "loss": 0.2081, + "epoch": 0.55, + "grad_norm": 0.3687449564220821, + "learning_rate": 8.813951005831507e-06, + "loss": 0.3064, "step": 12010 }, { - "epoch": 0.69, - "grad_norm": 1.1889278166919943, - "learning_rate": 4.628791497981807e-06, - "loss": 0.6442, + "epoch": 0.55, + "grad_norm": 0.3311277846298865, + "learning_rate": 8.812473602176373e-06, + "loss": 0.1936, "step": 12011 }, { - "epoch": 0.69, - "grad_norm": 0.2764738843094473, - "learning_rate": 4.627221901349887e-06, - "loss": 0.2578, + "epoch": 0.55, + "grad_norm": 1.250981447290825, + "learning_rate": 8.810996224811933e-06, + "loss": 0.7244, "step": 12012 }, { - "epoch": 0.69, - "grad_norm": 0.24776928880783564, - "learning_rate": 4.625652490777042e-06, - "loss": 0.1999, + "epoch": 0.55, + "grad_norm": 0.7042402386612823, + "learning_rate": 8.80951887377089e-06, + "loss": 0.3762, "step": 12013 }, { - "epoch": 0.69, - "grad_norm": 0.7327164556440673, - "learning_rate": 4.624083266317616e-06, - "loss": 0.3085, + "epoch": 0.55, + "grad_norm": 0.3297862835025889, + "learning_rate": 8.808041549085954e-06, + "loss": 0.2994, "step": 12014 }, { - "epoch": 0.69, - "grad_norm": 1.1927932070668756, - "learning_rate": 4.62251422802596e-06, - "loss": 0.7523, + "epoch": 0.55, + "grad_norm": 0.41110558822388815, + "learning_rate": 8.806564250789823e-06, + "loss": 0.2982, "step": 12015 }, { - "epoch": 0.69, - "grad_norm": 0.3228198567523802, - "learning_rate": 4.620945375956404e-06, - "loss": 0.2437, + "epoch": 0.55, + "grad_norm": 0.23555947633486063, + "learning_rate": 8.805086978915215e-06, + "loss": 0.1446, "step": 12016 }, { - "epoch": 0.69, - "grad_norm": 0.5191991671756184, - "learning_rate": 4.619376710163279e-06, - "loss": 0.2589, + "epoch": 0.55, + "grad_norm": 0.3759370181846773, + "learning_rate": 8.803609733494826e-06, + "loss": 0.2434, "step": 12017 }, { - "epoch": 0.69, - "grad_norm": 0.7583758410617182, - "learning_rate": 4.617808230700907e-06, - "loss": 0.42, + "epoch": 0.55, + "grad_norm": 0.3257131124348913, + "learning_rate": 8.802132514561368e-06, + "loss": 0.2491, "step": 12018 }, { - "epoch": 0.69, - "grad_norm": 0.33941374151587894, - "learning_rate": 4.6162399376236e-06, - "loss": 0.2747, + "epoch": 0.55, + "grad_norm": 0.7656360339514674, + "learning_rate": 8.800655322147539e-06, + "loss": 0.4078, "step": 12019 }, { - "epoch": 0.69, - "grad_norm": 0.2758703659931883, - "learning_rate": 4.614671830985681e-06, - "loss": 0.2094, + "epoch": 0.55, + "grad_norm": 0.3959445253797471, + "learning_rate": 8.799178156286046e-06, + "loss": 0.2784, "step": 12020 }, { - "epoch": 0.69, - "grad_norm": 0.354847088036182, - "learning_rate": 4.613103910841441e-06, - "loss": 0.21, + "epoch": 0.55, + "grad_norm": 0.38909344306332827, + "learning_rate": 8.79770101700959e-06, + "loss": 0.227, "step": 12021 }, { - "epoch": 0.69, - "grad_norm": 0.3544508126717817, - "learning_rate": 4.611536177245176e-06, - "loss": 0.2592, + "epoch": 0.55, + "grad_norm": 0.2874065431082309, + "learning_rate": 8.796223904350878e-06, + "loss": 0.2674, "step": 12022 }, { - "epoch": 0.69, - "grad_norm": 1.0195307111162017, - "learning_rate": 4.609968630251187e-06, - "loss": 0.3661, + "epoch": 0.55, + "grad_norm": 0.4122689258544816, + "learning_rate": 8.794746818342603e-06, + "loss": 0.3078, "step": 12023 }, { - "epoch": 0.69, - "grad_norm": 0.3860554430872647, - "learning_rate": 4.608401269913751e-06, - "loss": 0.2795, + "epoch": 0.55, + "grad_norm": 1.097776493706162, + "learning_rate": 8.793269759017475e-06, + "loss": 0.4353, "step": 12024 }, { - "epoch": 0.69, - "grad_norm": 0.39062012651266825, - "learning_rate": 4.606834096287148e-06, - "loss": 0.2738, + "epoch": 0.55, + "grad_norm": 0.8309541728271063, + "learning_rate": 8.791792726408194e-06, + "loss": 0.4321, "step": 12025 }, { - "epoch": 0.69, - "grad_norm": 0.3576834430909374, - "learning_rate": 4.605267109425645e-06, - "loss": 0.2074, + "epoch": 0.55, + "grad_norm": 0.29053784392307386, + "learning_rate": 8.790315720547456e-06, + "loss": 0.2366, "step": 12026 }, { - "epoch": 0.69, - "grad_norm": 1.050741505415178, - "learning_rate": 4.6037003093835135e-06, - "loss": 0.4651, + "epoch": 0.55, + "grad_norm": 0.4265672607253519, + "learning_rate": 8.788838741467961e-06, + "loss": 0.3421, "step": 12027 }, { - "epoch": 0.69, - "grad_norm": 0.3340140311716859, - "learning_rate": 4.602133696215007e-06, - "loss": 0.2685, + "epoch": 0.55, + "grad_norm": 0.21670371639272284, + "learning_rate": 8.787361789202406e-06, + "loss": 0.0906, "step": 12028 }, { - "epoch": 0.69, - "grad_norm": 0.3868998595979161, - "learning_rate": 4.6005672699743795e-06, - "loss": 0.3129, + "epoch": 0.55, + "grad_norm": 0.3404766667164344, + "learning_rate": 8.785884863783498e-06, + "loss": 0.2564, "step": 12029 }, { - "epoch": 0.69, - "grad_norm": 0.6958039333913612, - "learning_rate": 4.599001030715876e-06, - "loss": 0.2917, + "epoch": 0.55, + "grad_norm": 0.3912450325470597, + "learning_rate": 8.784407965243926e-06, + "loss": 0.3004, "step": 12030 }, { - "epoch": 0.69, - "grad_norm": 0.3372161814167908, - "learning_rate": 4.59743497849373e-06, - "loss": 0.274, + "epoch": 0.55, + "grad_norm": 0.9787767926254624, + "learning_rate": 8.782931093616392e-06, + "loss": 0.3409, "step": 12031 }, { - "epoch": 0.69, - "grad_norm": 0.3879938736639436, - "learning_rate": 4.5958691133621815e-06, - "loss": 0.3127, + "epoch": 0.55, + "grad_norm": 0.3387538536114813, + "learning_rate": 8.781454248933586e-06, + "loss": 0.2634, "step": 12032 }, { - "epoch": 0.69, - "grad_norm": 0.14895293830728926, - "learning_rate": 4.594303435375454e-06, - "loss": 0.1142, + "epoch": 0.55, + "grad_norm": 1.2035584318068064, + "learning_rate": 8.779977431228213e-06, + "loss": 0.5478, "step": 12033 }, { - "epoch": 0.69, - "grad_norm": 0.322727915954783, - "learning_rate": 4.592737944587766e-06, - "loss": 0.2661, + "epoch": 0.55, + "grad_norm": 0.20558018365004155, + "learning_rate": 8.778500640532963e-06, + "loss": 0.1592, "step": 12034 }, { - "epoch": 0.69, - "grad_norm": 1.1372554738979654, - "learning_rate": 4.591172641053326e-06, - "loss": 0.5845, + "epoch": 0.55, + "grad_norm": 0.3897127770619501, + "learning_rate": 8.777023876880532e-06, + "loss": 0.2835, "step": 12035 }, { - "epoch": 0.69, - "grad_norm": 0.42201688444677404, - "learning_rate": 4.589607524826351e-06, - "loss": 0.2639, + "epoch": 0.55, + "grad_norm": 1.0989633803004761, + "learning_rate": 8.77554714030361e-06, + "loss": 0.4923, "step": 12036 }, { - "epoch": 0.69, - "grad_norm": 0.3165929336205737, - "learning_rate": 4.588042595961032e-06, - "loss": 0.2747, + "epoch": 0.55, + "grad_norm": 0.6097157262689636, + "learning_rate": 8.774070430834898e-06, + "loss": 0.2856, "step": 12037 }, { - "epoch": 0.69, - "grad_norm": 0.40563172992912694, - "learning_rate": 4.586477854511566e-06, - "loss": 0.2555, + "epoch": 0.55, + "grad_norm": 0.4183619078638152, + "learning_rate": 8.772593748507083e-06, + "loss": 0.2759, "step": 12038 }, { - "epoch": 0.69, - "grad_norm": 0.3947859210351605, - "learning_rate": 4.584913300532135e-06, - "loss": 0.2764, + "epoch": 0.55, + "grad_norm": 0.49880110316623244, + "learning_rate": 8.771117093352861e-06, + "loss": 0.3491, "step": 12039 }, { - "epoch": 0.69, - "grad_norm": 0.28751873344914913, - "learning_rate": 4.583348934076929e-06, - "loss": 0.1917, + "epoch": 0.55, + "grad_norm": 0.31057170995754196, + "learning_rate": 8.769640465404922e-06, + "loss": 0.2039, "step": 12040 }, { - "epoch": 0.69, - "grad_norm": 0.3649164575475577, - "learning_rate": 4.581784755200115e-06, - "loss": 0.2989, + "epoch": 0.55, + "grad_norm": 0.2974341327603078, + "learning_rate": 8.768163864695953e-06, + "loss": 0.1922, "step": 12041 }, { - "epoch": 0.69, - "grad_norm": 0.7260664375548328, - "learning_rate": 4.580220763955863e-06, - "loss": 0.3636, + "epoch": 0.55, + "grad_norm": 0.44147151235301285, + "learning_rate": 8.766687291258653e-06, + "loss": 0.3282, "step": 12042 }, { - "epoch": 0.69, - "grad_norm": 0.29090161691547395, - "learning_rate": 4.578656960398328e-06, - "loss": 0.1897, + "epoch": 0.55, + "grad_norm": 0.9843206258956417, + "learning_rate": 8.76521074512571e-06, + "loss": 0.4223, "step": 12043 }, { - "epoch": 0.69, - "grad_norm": 0.3837560896918546, - "learning_rate": 4.577093344581674e-06, - "loss": 0.316, + "epoch": 0.55, + "grad_norm": 0.32996100101104375, + "learning_rate": 8.763734226329809e-06, + "loss": 0.2028, "step": 12044 }, { - "epoch": 0.69, - "grad_norm": 0.3944502241833138, - "learning_rate": 4.575529916560043e-06, - "loss": 0.2643, + "epoch": 0.55, + "grad_norm": 1.0957435518742933, + "learning_rate": 8.762257734903635e-06, + "loss": 0.5338, "step": 12045 }, { - "epoch": 0.69, - "grad_norm": 0.28381455429202385, - "learning_rate": 4.573966676387579e-06, - "loss": 0.2047, + "epoch": 0.55, + "grad_norm": 0.396917324501631, + "learning_rate": 8.760781270879889e-06, + "loss": 0.336, "step": 12046 }, { - "epoch": 0.69, - "grad_norm": 0.5857048817229604, - "learning_rate": 4.5724036241184144e-06, - "loss": 0.2819, + "epoch": 0.55, + "grad_norm": 0.2529929989488865, + "learning_rate": 8.759304834291248e-06, + "loss": 0.1708, "step": 12047 }, { - "epoch": 0.69, - "grad_norm": 0.3373748374080497, - "learning_rate": 4.5708407598066766e-06, - "loss": 0.2939, + "epoch": 0.55, + "grad_norm": 0.8797059442867133, + "learning_rate": 8.757828425170403e-06, + "loss": 0.4366, "step": 12048 }, { - "epoch": 0.69, - "grad_norm": 0.285983173917611, - "learning_rate": 4.569278083506492e-06, - "loss": 0.1824, + "epoch": 0.55, + "grad_norm": 0.5154756744209222, + "learning_rate": 8.756352043550038e-06, + "loss": 0.3382, "step": 12049 }, { - "epoch": 0.69, - "grad_norm": 1.1284028289918933, - "learning_rate": 4.567715595271976e-06, - "loss": 0.5352, + "epoch": 0.55, + "grad_norm": 0.22362575038940657, + "learning_rate": 8.754875689462843e-06, + "loss": 0.1816, "step": 12050 }, { - "epoch": 0.69, - "grad_norm": 1.1099867271284871, - "learning_rate": 4.566153295157233e-06, - "loss": 0.7988, + "epoch": 0.55, + "grad_norm": 1.29676257439492, + "learning_rate": 8.753399362941499e-06, + "loss": 0.608, "step": 12051 }, { - "epoch": 0.69, - "grad_norm": 0.2915399836846607, - "learning_rate": 4.5645911832163654e-06, - "loss": 0.2437, + "epoch": 0.55, + "grad_norm": 0.3990289284007032, + "learning_rate": 8.751923064018692e-06, + "loss": 0.2287, "step": 12052 }, { - "epoch": 0.69, - "grad_norm": 0.35039432313772084, - "learning_rate": 4.563029259503474e-06, - "loss": 0.2095, + "epoch": 0.55, + "grad_norm": 0.3403326258277737, + "learning_rate": 8.750446792727103e-06, + "loss": 0.2698, "step": 12053 }, { - "epoch": 0.69, - "grad_norm": 0.41625909045888376, - "learning_rate": 4.561467524072651e-06, - "loss": 0.2736, + "epoch": 0.55, + "grad_norm": 0.41565576498811224, + "learning_rate": 8.74897054909942e-06, + "loss": 0.2664, "step": 12054 }, { - "epoch": 0.69, - "grad_norm": 0.308228128875875, - "learning_rate": 4.5599059769779654e-06, - "loss": 0.2516, + "epoch": 0.55, + "grad_norm": 0.4351278614087777, + "learning_rate": 8.747494333168323e-06, + "loss": 0.2791, "step": 12055 }, { - "epoch": 0.69, - "grad_norm": 0.34633389816855337, - "learning_rate": 4.558344618273506e-06, - "loss": 0.2532, + "epoch": 0.55, + "grad_norm": 0.40722765813421624, + "learning_rate": 8.746018144966495e-06, + "loss": 0.256, "step": 12056 }, { - "epoch": 0.69, - "grad_norm": 1.1119503132303514, - "learning_rate": 4.556783448013338e-06, - "loss": 0.7745, + "epoch": 0.55, + "grad_norm": 0.5239466587438371, + "learning_rate": 8.744541984526616e-06, + "loss": 0.2625, "step": 12057 }, { - "epoch": 0.69, - "grad_norm": 0.505912833656344, - "learning_rate": 4.555222466251525e-06, - "loss": 0.3091, + "epoch": 0.55, + "grad_norm": 0.3885071837563332, + "learning_rate": 8.743065851881364e-06, + "loss": 0.3376, "step": 12058 }, { - "epoch": 0.69, - "grad_norm": 0.30427640882311047, - "learning_rate": 4.553661673042123e-06, - "loss": 0.2245, + "epoch": 0.55, + "grad_norm": 0.4065261607424598, + "learning_rate": 8.741589747063426e-06, + "loss": 0.2239, "step": 12059 }, { - "epoch": 0.69, - "grad_norm": 0.2822146065378598, - "learning_rate": 4.552101068439181e-06, - "loss": 0.2482, + "epoch": 0.55, + "grad_norm": 0.5399567678158372, + "learning_rate": 8.74011367010548e-06, + "loss": 0.3077, "step": 12060 }, { - "epoch": 0.69, - "grad_norm": 0.3780466110606745, - "learning_rate": 4.550540652496748e-06, - "loss": 0.2474, + "epoch": 0.55, + "grad_norm": 0.45652742757003495, + "learning_rate": 8.738637621040202e-06, + "loss": 0.2503, "step": 12061 }, { - "epoch": 0.69, - "grad_norm": 0.5094716111081256, - "learning_rate": 4.548980425268857e-06, - "loss": 0.2451, + "epoch": 0.55, + "grad_norm": 0.2873170587304599, + "learning_rate": 8.737161599900267e-06, + "loss": 0.2447, "step": 12062 }, { - "epoch": 0.69, - "grad_norm": 1.2888871968821543, - "learning_rate": 4.5474203868095415e-06, - "loss": 0.4701, + "epoch": 0.55, + "grad_norm": 0.30762283018979064, + "learning_rate": 8.73568560671836e-06, + "loss": 0.1721, "step": 12063 }, { - "epoch": 0.69, - "grad_norm": 0.2587492497087311, - "learning_rate": 4.545860537172818e-06, - "loss": 0.2568, + "epoch": 0.55, + "grad_norm": 0.957083522560443, + "learning_rate": 8.734209641527157e-06, + "loss": 0.4992, "step": 12064 }, { - "epoch": 0.69, - "grad_norm": 0.46174224389574064, - "learning_rate": 4.5443008764127135e-06, - "loss": 0.3415, + "epoch": 0.55, + "grad_norm": 0.4275929876464102, + "learning_rate": 8.732733704359331e-06, + "loss": 0.305, "step": 12065 }, { - "epoch": 0.69, - "grad_norm": 0.24079285479608747, - "learning_rate": 4.542741404583235e-06, - "loss": 0.1051, + "epoch": 0.55, + "grad_norm": 0.33508925606584844, + "learning_rate": 8.731257795247558e-06, + "loss": 0.3024, "step": 12066 }, { - "epoch": 0.69, - "grad_norm": 0.42779305694074776, - "learning_rate": 4.541182121738388e-06, - "loss": 0.2387, + "epoch": 0.55, + "grad_norm": 0.3189441061505665, + "learning_rate": 8.729781914224517e-06, + "loss": 0.1839, "step": 12067 }, { - "epoch": 0.69, - "grad_norm": 0.28243846220049185, - "learning_rate": 4.539623027932165e-06, - "loss": 0.2727, + "epoch": 0.55, + "grad_norm": 0.2936189463971261, + "learning_rate": 8.728306061322879e-06, + "loss": 0.2079, "step": 12068 }, { - "epoch": 0.69, - "grad_norm": 0.7645806896247914, - "learning_rate": 4.538064123218565e-06, - "loss": 0.34, + "epoch": 0.55, + "grad_norm": 1.0867408537636722, + "learning_rate": 8.72683023657532e-06, + "loss": 0.5639, "step": 12069 }, { - "epoch": 0.69, - "grad_norm": 0.34980810066617957, - "learning_rate": 4.53650540765157e-06, - "loss": 0.2661, + "epoch": 0.55, + "grad_norm": 0.40596548790664233, + "learning_rate": 8.725354440014512e-06, + "loss": 0.2702, "step": 12070 }, { - "epoch": 0.69, - "grad_norm": 0.5862188882028873, - "learning_rate": 4.534946881285158e-06, - "loss": 0.3972, + "epoch": 0.55, + "grad_norm": 0.3498077115694771, + "learning_rate": 8.723878671673127e-06, + "loss": 0.2695, "step": 12071 }, { - "epoch": 0.69, - "grad_norm": 0.21256587958188933, - "learning_rate": 4.533388544173301e-06, - "loss": 0.169, + "epoch": 0.55, + "grad_norm": 1.060320666679604, + "learning_rate": 8.722402931583839e-06, + "loss": 0.4815, "step": 12072 }, { - "epoch": 0.69, - "grad_norm": 0.3133687014658715, - "learning_rate": 4.531830396369959e-06, - "loss": 0.2401, + "epoch": 0.55, + "grad_norm": 0.3512309137902775, + "learning_rate": 8.720927219779319e-06, + "loss": 0.2621, "step": 12073 }, { - "epoch": 0.69, - "grad_norm": 1.1903642275444457, - "learning_rate": 4.530272437929099e-06, - "loss": 0.6457, + "epoch": 0.55, + "grad_norm": 0.37278184971887085, + "learning_rate": 8.719451536292239e-06, + "loss": 0.2241, "step": 12074 }, { - "epoch": 0.69, - "grad_norm": 0.3660557403373204, - "learning_rate": 4.528714668904669e-06, - "loss": 0.2687, + "epoch": 0.55, + "grad_norm": 0.3092564428550018, + "learning_rate": 8.717975881155261e-06, + "loss": 0.225, "step": 12075 }, { - "epoch": 0.69, - "grad_norm": 0.5739283505931541, - "learning_rate": 4.527157089350616e-06, - "loss": 0.3292, + "epoch": 0.55, + "grad_norm": 0.910333943371931, + "learning_rate": 8.716500254401067e-06, + "loss": 0.3269, "step": 12076 }, { - "epoch": 0.69, - "grad_norm": 0.4039211294031608, - "learning_rate": 4.525599699320873e-06, - "loss": 0.3117, + "epoch": 0.55, + "grad_norm": 0.4146545995514526, + "learning_rate": 8.715024656062321e-06, + "loss": 0.2581, "step": 12077 }, { - "epoch": 0.69, - "grad_norm": 0.30685207485541593, - "learning_rate": 4.52404249886938e-06, - "loss": 0.1924, + "epoch": 0.55, + "grad_norm": 0.33083560233192044, + "learning_rate": 8.71354908617169e-06, + "loss": 0.293, "step": 12078 }, { - "epoch": 0.69, - "grad_norm": 0.287329467106963, - "learning_rate": 4.5224854880500615e-06, - "loss": 0.0706, + "epoch": 0.55, + "grad_norm": 1.0467048271811226, + "learning_rate": 8.71207354476184e-06, + "loss": 0.6423, "step": 12079 }, { - "epoch": 0.69, - "grad_norm": 0.27929117817412435, - "learning_rate": 4.520928666916834e-06, - "loss": 0.2665, + "epoch": 0.55, + "grad_norm": 0.25244907094249064, + "learning_rate": 8.710598031865444e-06, + "loss": 0.1468, "step": 12080 }, { - "epoch": 0.69, - "grad_norm": 0.7506304080374441, - "learning_rate": 4.519372035523607e-06, - "loss": 0.3991, + "epoch": 0.55, + "grad_norm": 0.29069464484614976, + "learning_rate": 8.709122547515163e-06, + "loss": 0.2473, "step": 12081 }, { - "epoch": 0.69, - "grad_norm": 0.4259104980903786, - "learning_rate": 4.517815593924295e-06, - "loss": 0.2043, + "epoch": 0.56, + "grad_norm": 0.5493560144031064, + "learning_rate": 8.707647091743665e-06, + "loss": 0.3434, "step": 12082 }, { - "epoch": 0.69, - "grad_norm": 0.3686775706552464, - "learning_rate": 4.5162593421727926e-06, - "loss": 0.3217, + "epoch": 0.56, + "grad_norm": 0.45346816912496, + "learning_rate": 8.706171664583613e-06, + "loss": 0.2426, "step": 12083 }, { - "epoch": 0.69, - "grad_norm": 0.4937075304767455, - "learning_rate": 4.514703280322995e-06, - "loss": 0.3287, + "epoch": 0.56, + "grad_norm": 0.554773173869556, + "learning_rate": 8.704696266067676e-06, + "loss": 0.3824, "step": 12084 }, { - "epoch": 0.69, - "grad_norm": 0.2030857822006487, - "learning_rate": 4.513147408428786e-06, - "loss": 0.1249, + "epoch": 0.56, + "grad_norm": 0.5269683132127034, + "learning_rate": 8.703220896228515e-06, + "loss": 0.4071, "step": 12085 }, { - "epoch": 0.69, - "grad_norm": 0.5124655651149372, - "learning_rate": 4.5115917265440425e-06, - "loss": 0.3465, + "epoch": 0.56, + "grad_norm": 0.26536870224085796, + "learning_rate": 8.701745555098793e-06, + "loss": 0.1839, "step": 12086 }, { - "epoch": 0.69, - "grad_norm": 0.4821076032344685, - "learning_rate": 4.510036234722645e-06, - "loss": 0.3284, + "epoch": 0.56, + "grad_norm": 0.34480091374865607, + "learning_rate": 8.700270242711174e-06, + "loss": 0.2088, "step": 12087 }, { - "epoch": 0.69, - "grad_norm": 0.2927910612808501, - "learning_rate": 4.5084809330184605e-06, - "loss": 0.2206, + "epoch": 0.56, + "grad_norm": 0.6692146415829521, + "learning_rate": 8.698794959098314e-06, + "loss": 0.3888, "step": 12088 }, { - "epoch": 0.69, - "grad_norm": 0.5136650655164572, - "learning_rate": 4.506925821485338e-06, - "loss": 0.3624, + "epoch": 0.56, + "grad_norm": 0.29535850900869326, + "learning_rate": 8.697319704292884e-06, + "loss": 0.2043, "step": 12089 }, { - "epoch": 0.69, - "grad_norm": 0.2936563962334066, - "learning_rate": 4.505370900177142e-06, - "loss": 0.1928, + "epoch": 0.56, + "grad_norm": 0.395418791236929, + "learning_rate": 8.695844478327541e-06, + "loss": 0.3112, "step": 12090 }, { - "epoch": 0.69, - "grad_norm": 0.3934100319552002, - "learning_rate": 4.503816169147715e-06, - "loss": 0.2727, + "epoch": 0.56, + "grad_norm": 1.020479196761933, + "learning_rate": 8.694369281234945e-06, + "loss": 0.6468, "step": 12091 }, { - "epoch": 0.69, - "grad_norm": 0.28160661063648673, - "learning_rate": 4.502261628450898e-06, - "loss": 0.2303, + "epoch": 0.56, + "grad_norm": 0.3690321253269678, + "learning_rate": 8.692894113047748e-06, + "loss": 0.2251, "step": 12092 }, { - "epoch": 0.69, - "grad_norm": 0.5788519867248023, - "learning_rate": 4.5007072781405205e-06, - "loss": 0.3512, + "epoch": 0.56, + "grad_norm": 0.26451933624404217, + "learning_rate": 8.691418973798621e-06, + "loss": 0.213, "step": 12093 }, { - "epoch": 0.69, - "grad_norm": 0.6096906100436412, - "learning_rate": 4.4991531182704166e-06, - "loss": 0.3835, + "epoch": 0.56, + "grad_norm": 0.4349829082853501, + "learning_rate": 8.689943863520215e-06, + "loss": 0.3357, "step": 12094 }, { - "epoch": 0.69, - "grad_norm": 0.28201262052309894, - "learning_rate": 4.497599148894404e-06, - "loss": 0.2467, + "epoch": 0.56, + "grad_norm": 0.8771696615448499, + "learning_rate": 8.688468782245192e-06, + "loss": 0.412, "step": 12095 }, { - "epoch": 0.69, - "grad_norm": 0.33592745821692854, - "learning_rate": 4.496045370066296e-06, - "loss": 0.2624, + "epoch": 0.56, + "grad_norm": 0.3995132364516439, + "learning_rate": 8.686993730006202e-06, + "loss": 0.2328, "step": 12096 }, { - "epoch": 0.7, - "grad_norm": 0.3753798828666515, - "learning_rate": 4.494491781839901e-06, - "loss": 0.2396, + "epoch": 0.56, + "grad_norm": 0.38632855670177463, + "learning_rate": 8.685518706835909e-06, + "loss": 0.3075, "step": 12097 }, { - "epoch": 0.7, - "grad_norm": 0.3387213970720124, - "learning_rate": 4.492938384269015e-06, - "loss": 0.249, + "epoch": 0.56, + "grad_norm": 0.5879456966206789, + "learning_rate": 8.684043712766962e-06, + "loss": 0.3132, "step": 12098 }, { - "epoch": 0.7, - "grad_norm": 0.33277551615459394, - "learning_rate": 4.491385177407439e-06, - "loss": 0.293, + "epoch": 0.56, + "grad_norm": 0.36582937446088176, + "learning_rate": 8.682568747832023e-06, + "loss": 0.2348, "step": 12099 }, { - "epoch": 0.7, - "grad_norm": 0.7391401050418194, - "learning_rate": 4.489832161308958e-06, - "loss": 0.4181, + "epoch": 0.56, + "grad_norm": 0.4896582608066107, + "learning_rate": 8.681093812063739e-06, + "loss": 0.3081, "step": 12100 }, { - "epoch": 0.7, - "grad_norm": 0.3603244564137188, - "learning_rate": 4.488279336027353e-06, - "loss": 0.3054, + "epoch": 0.56, + "grad_norm": 0.3836812537596712, + "learning_rate": 8.679618905494765e-06, + "loss": 0.2769, "step": 12101 }, { - "epoch": 0.7, - "grad_norm": 0.3353997447238762, - "learning_rate": 4.486726701616393e-06, - "loss": 0.0981, + "epoch": 0.56, + "grad_norm": 0.3484784604433825, + "learning_rate": 8.678144028157758e-06, + "loss": 0.2905, "step": 12102 }, { - "epoch": 0.7, - "grad_norm": 0.2523472651585536, - "learning_rate": 4.485174258129854e-06, - "loss": 0.2128, + "epoch": 0.56, + "grad_norm": 1.0479378538221573, + "learning_rate": 8.676669180085369e-06, + "loss": 0.4337, "step": 12103 }, { - "epoch": 0.7, - "grad_norm": 0.29579376641255173, - "learning_rate": 4.483622005621493e-06, - "loss": 0.2761, + "epoch": 0.56, + "grad_norm": 0.5421737039367766, + "learning_rate": 8.67519436131025e-06, + "loss": 0.3703, "step": 12104 }, { - "epoch": 0.7, - "grad_norm": 0.67653447878386, - "learning_rate": 4.4820699441450655e-06, - "loss": 0.3091, + "epoch": 0.56, + "grad_norm": 0.43947241166762246, + "learning_rate": 8.673719571865045e-06, + "loss": 0.2947, "step": 12105 }, { - "epoch": 0.7, - "grad_norm": 0.6401876821418024, - "learning_rate": 4.4805180737543145e-06, - "loss": 0.4376, + "epoch": 0.56, + "grad_norm": 0.24921961520559055, + "learning_rate": 8.672244811782416e-06, + "loss": 0.1885, "step": 12106 }, { - "epoch": 0.7, - "grad_norm": 0.3921815615134285, - "learning_rate": 4.478966394502988e-06, - "loss": 0.3295, + "epoch": 0.56, + "grad_norm": 0.6412450819669681, + "learning_rate": 8.670770081095005e-06, + "loss": 0.3019, "step": 12107 }, { - "epoch": 0.7, - "grad_norm": 0.3475348263674097, - "learning_rate": 4.4774149064448195e-06, - "loss": 0.2545, + "epoch": 0.56, + "grad_norm": 0.46005836278543133, + "learning_rate": 8.669295379835467e-06, + "loss": 0.3225, "step": 12108 }, { - "epoch": 0.7, - "grad_norm": 0.36534583192677034, - "learning_rate": 4.475863609633534e-06, - "loss": 0.2564, + "epoch": 0.56, + "grad_norm": 0.3334958102490018, + "learning_rate": 8.667820708036441e-06, + "loss": 0.27, "step": 12109 }, { - "epoch": 0.7, - "grad_norm": 0.3668623445667178, - "learning_rate": 4.474312504122854e-06, - "loss": 0.2889, + "epoch": 0.56, + "grad_norm": 0.6426044984608472, + "learning_rate": 8.666346065730583e-06, + "loss": 0.3594, "step": 12110 }, { - "epoch": 0.7, - "grad_norm": 0.25610872600300666, - "learning_rate": 4.472761589966493e-06, - "loss": 0.2137, + "epoch": 0.56, + "grad_norm": 0.3424909965785827, + "learning_rate": 8.664871452950538e-06, + "loss": 0.3004, "step": 12111 }, { - "epoch": 0.7, - "grad_norm": 0.561497219037444, - "learning_rate": 4.471210867218161e-06, - "loss": 0.3434, + "epoch": 0.56, + "grad_norm": 0.26765923835118355, + "learning_rate": 8.663396869728953e-06, + "loss": 0.1383, "step": 12112 }, { - "epoch": 0.7, - "grad_norm": 0.37026942810244867, - "learning_rate": 4.4696603359315604e-06, - "loss": 0.3116, + "epoch": 0.56, + "grad_norm": 0.35210022771984856, + "learning_rate": 8.661922316098472e-06, + "loss": 0.294, "step": 12113 }, { - "epoch": 0.7, - "grad_norm": 0.7497924302268585, - "learning_rate": 4.468109996160385e-06, - "loss": 0.4706, + "epoch": 0.56, + "grad_norm": 0.39717433736412966, + "learning_rate": 8.66044779209174e-06, + "loss": 0.3081, "step": 12114 }, { - "epoch": 0.7, - "grad_norm": 0.3076550596172886, - "learning_rate": 4.466559847958318e-06, - "loss": 0.2392, + "epoch": 0.56, + "grad_norm": 0.8170779609219889, + "learning_rate": 8.658973297741406e-06, + "loss": 0.6013, "step": 12115 }, { - "epoch": 0.7, - "grad_norm": 0.3388503851948695, - "learning_rate": 4.46500989137905e-06, - "loss": 0.2912, + "epoch": 0.56, + "grad_norm": 0.6329941894039852, + "learning_rate": 8.65749883308011e-06, + "loss": 0.2735, "step": 12116 }, { - "epoch": 0.7, - "grad_norm": 0.3243942487719419, - "learning_rate": 4.463460126476251e-06, - "loss": 0.2058, + "epoch": 0.56, + "grad_norm": 0.3042072479130231, + "learning_rate": 8.656024398140495e-06, + "loss": 0.285, "step": 12117 }, { - "epoch": 0.7, - "grad_norm": 0.46445889232908727, - "learning_rate": 4.46191055330359e-06, - "loss": 0.2573, + "epoch": 0.56, + "grad_norm": 0.5189996414630461, + "learning_rate": 8.654549992955203e-06, + "loss": 0.3282, "step": 12118 }, { - "epoch": 0.7, - "grad_norm": 0.26405584070347937, - "learning_rate": 4.460361171914724e-06, - "loss": 0.2552, + "epoch": 0.56, + "grad_norm": 0.24187174057280092, + "learning_rate": 8.653075617556879e-06, + "loss": 0.1076, "step": 12119 }, { - "epoch": 0.7, - "grad_norm": 0.48086867532454386, - "learning_rate": 4.458811982363317e-06, - "loss": 0.2995, + "epoch": 0.56, + "grad_norm": 0.430266890248924, + "learning_rate": 8.651601271978162e-06, + "loss": 0.3348, "step": 12120 }, { - "epoch": 0.7, - "grad_norm": 0.6352621011962262, - "learning_rate": 4.457262984703015e-06, - "loss": 0.2686, + "epoch": 0.56, + "grad_norm": 0.36795004582379204, + "learning_rate": 8.650126956251696e-06, + "loss": 0.2939, "step": 12121 }, { - "epoch": 0.7, - "grad_norm": 0.2669809765909583, - "learning_rate": 4.455714178987456e-06, - "loss": 0.2236, + "epoch": 0.56, + "grad_norm": 0.719364124669118, + "learning_rate": 8.64865267041011e-06, + "loss": 0.2767, "step": 12122 }, { - "epoch": 0.7, - "grad_norm": 0.26373342863482124, - "learning_rate": 4.454165565270272e-06, - "loss": 0.2267, + "epoch": 0.56, + "grad_norm": 0.4043313951179866, + "learning_rate": 8.64717841448606e-06, + "loss": 0.282, "step": 12123 }, { - "epoch": 0.7, - "grad_norm": 0.47415110258209775, - "learning_rate": 4.452617143605099e-06, - "loss": 0.2042, + "epoch": 0.56, + "grad_norm": 0.48788202139674147, + "learning_rate": 8.645704188512173e-06, + "loss": 0.2994, "step": 12124 }, { - "epoch": 0.7, - "grad_norm": 0.37038910052018265, - "learning_rate": 4.451068914045556e-06, - "loss": 0.3001, + "epoch": 0.56, + "grad_norm": 0.2405309844582279, + "learning_rate": 8.644229992521092e-06, + "loss": 0.1882, "step": 12125 }, { - "epoch": 0.7, - "grad_norm": 0.7943156231991123, - "learning_rate": 4.449520876645258e-06, - "loss": 0.372, + "epoch": 0.56, + "grad_norm": 0.41080006042089867, + "learning_rate": 8.642755826545448e-06, + "loss": 0.3032, "step": 12126 }, { - "epoch": 0.7, - "grad_norm": 0.30751059252070245, - "learning_rate": 4.447973031457809e-06, - "loss": 0.2835, + "epoch": 0.56, + "grad_norm": 0.697861652310811, + "learning_rate": 8.641281690617886e-06, + "loss": 0.4605, "step": 12127 }, { - "epoch": 0.7, - "grad_norm": 0.31206151442044566, - "learning_rate": 4.4464253785368205e-06, - "loss": 0.1877, + "epoch": 0.56, + "grad_norm": 0.708843040572815, + "learning_rate": 8.639807584771036e-06, + "loss": 0.4533, "step": 12128 }, { - "epoch": 0.7, - "grad_norm": 0.2591005552248067, - "learning_rate": 4.4448779179358815e-06, - "loss": 0.1728, + "epoch": 0.56, + "grad_norm": 0.28422475962820065, + "learning_rate": 8.638333509037537e-06, + "loss": 0.2154, "step": 12129 }, { - "epoch": 0.7, - "grad_norm": 1.248204418833787, - "learning_rate": 4.443330649708581e-06, - "loss": 0.7986, + "epoch": 0.56, + "grad_norm": 0.3553995867958154, + "learning_rate": 8.63685946345002e-06, + "loss": 0.2497, "step": 12130 }, { - "epoch": 0.7, - "grad_norm": 0.2736973391515441, - "learning_rate": 4.441783573908498e-06, - "loss": 0.2117, + "epoch": 0.56, + "grad_norm": 0.4664045815236763, + "learning_rate": 8.63538544804112e-06, + "loss": 0.2637, "step": 12131 }, { - "epoch": 0.7, - "grad_norm": 0.5426384659039937, - "learning_rate": 4.440236690589215e-06, - "loss": 0.3368, + "epoch": 0.56, + "grad_norm": 0.3106996953047385, + "learning_rate": 8.633911462843472e-06, + "loss": 0.2174, "step": 12132 }, { - "epoch": 0.7, - "grad_norm": 0.9915066429385122, - "learning_rate": 4.438689999804295e-06, - "loss": 0.4082, + "epoch": 0.56, + "grad_norm": 0.4078470250622167, + "learning_rate": 8.632437507889707e-06, + "loss": 0.3118, "step": 12133 }, { - "epoch": 0.7, - "grad_norm": 0.2815498231479696, - "learning_rate": 4.437143501607302e-06, - "loss": 0.1841, + "epoch": 0.56, + "grad_norm": 0.8623188118934189, + "learning_rate": 8.630963583212458e-06, + "loss": 0.4157, "step": 12134 }, { - "epoch": 0.7, - "grad_norm": 0.21873970118674596, - "learning_rate": 4.435597196051789e-06, - "loss": 0.2132, + "epoch": 0.56, + "grad_norm": 0.3272544556945812, + "learning_rate": 8.629489688844353e-06, + "loss": 0.1998, "step": 12135 }, { - "epoch": 0.7, - "grad_norm": 1.2154872539731227, - "learning_rate": 4.434051083191304e-06, - "loss": 0.742, + "epoch": 0.56, + "grad_norm": 1.0315416012625427, + "learning_rate": 8.628015824818028e-06, + "loss": 0.5152, "step": 12136 }, { - "epoch": 0.7, - "grad_norm": 0.3401125230433799, - "learning_rate": 4.432505163079394e-06, - "loss": 0.1953, + "epoch": 0.56, + "grad_norm": 0.25469559508549583, + "learning_rate": 8.626541991166113e-06, + "loss": 0.2285, "step": 12137 }, { - "epoch": 0.7, - "grad_norm": 1.0219315897519718, - "learning_rate": 4.4309594357695895e-06, - "loss": 0.4175, + "epoch": 0.56, + "grad_norm": 0.3063539257844163, + "learning_rate": 8.625068187921231e-06, + "loss": 0.222, "step": 12138 }, { - "epoch": 0.7, - "grad_norm": 0.37693401492937234, - "learning_rate": 4.429413901315421e-06, - "loss": 0.3112, + "epoch": 0.56, + "grad_norm": 0.8578195238735736, + "learning_rate": 8.623594415116013e-06, + "loss": 0.4166, "step": 12139 }, { - "epoch": 0.7, - "grad_norm": 0.3591687661022238, - "learning_rate": 4.4278685597704065e-06, - "loss": 0.2951, + "epoch": 0.56, + "grad_norm": 0.8017656309839689, + "learning_rate": 8.62212067278309e-06, + "loss": 0.4905, "step": 12140 }, { - "epoch": 0.7, - "grad_norm": 0.434384821341122, - "learning_rate": 4.426323411188067e-06, - "loss": 0.1519, + "epoch": 0.56, + "grad_norm": 0.32775088971283983, + "learning_rate": 8.620646960955087e-06, + "loss": 0.2757, "step": 12141 }, { - "epoch": 0.7, - "grad_norm": 0.2933368604620994, - "learning_rate": 4.424778455621908e-06, - "loss": 0.25, + "epoch": 0.56, + "grad_norm": 0.4597058637600257, + "learning_rate": 8.619173279664633e-06, + "loss": 0.243, "step": 12142 }, { - "epoch": 0.7, - "grad_norm": 0.32729484470318493, - "learning_rate": 4.4232336931254324e-06, - "loss": 0.2727, + "epoch": 0.56, + "grad_norm": 0.3677673562886798, + "learning_rate": 8.617699628944346e-06, + "loss": 0.2409, "step": 12143 }, { - "epoch": 0.7, - "grad_norm": 0.7742472208938666, - "learning_rate": 4.42168912375213e-06, - "loss": 0.2935, + "epoch": 0.56, + "grad_norm": 0.4127577571625338, + "learning_rate": 8.616226008826863e-06, + "loss": 0.2503, "step": 12144 }, { - "epoch": 0.7, - "grad_norm": 0.790094629215079, - "learning_rate": 4.420144747555497e-06, - "loss": 0.3164, + "epoch": 0.56, + "grad_norm": 0.418445022040065, + "learning_rate": 8.6147524193448e-06, + "loss": 0.271, "step": 12145 }, { - "epoch": 0.7, - "grad_norm": 0.355497378756577, - "learning_rate": 4.418600564589012e-06, - "loss": 0.273, + "epoch": 0.56, + "grad_norm": 0.47577158919892376, + "learning_rate": 8.613278860530784e-06, + "loss": 0.2723, "step": 12146 }, { - "epoch": 0.7, - "grad_norm": 0.33562205802520717, - "learning_rate": 4.417056574906148e-06, - "loss": 0.2513, + "epoch": 0.56, + "grad_norm": 0.3882829402976484, + "learning_rate": 8.611805332417438e-06, + "loss": 0.299, "step": 12147 }, { - "epoch": 0.7, - "grad_norm": 0.9439148205438117, - "learning_rate": 4.415512778560376e-06, - "loss": 0.5202, + "epoch": 0.56, + "grad_norm": 0.4869964076547249, + "learning_rate": 8.610331835037383e-06, + "loss": 0.2675, "step": 12148 }, { - "epoch": 0.7, - "grad_norm": 0.31573271535824526, - "learning_rate": 4.413969175605152e-06, - "loss": 0.2504, + "epoch": 0.56, + "grad_norm": 0.35313175997503493, + "learning_rate": 8.60885836842324e-06, + "loss": 0.2813, "step": 12149 }, { - "epoch": 0.7, - "grad_norm": 0.30110495517556174, - "learning_rate": 4.412425766093939e-06, - "loss": 0.1944, + "epoch": 0.56, + "grad_norm": 0.3545971297571187, + "learning_rate": 8.607384932607635e-06, + "loss": 0.2677, "step": 12150 }, { - "epoch": 0.7, - "grad_norm": 0.5043638552372725, - "learning_rate": 4.410882550080182e-06, - "loss": 0.3179, + "epoch": 0.56, + "grad_norm": 0.6443502224815294, + "learning_rate": 8.605911527623186e-06, + "loss": 0.3515, "step": 12151 }, { - "epoch": 0.7, - "grad_norm": 0.33032605242595947, - "learning_rate": 4.409339527617321e-06, - "loss": 0.2619, + "epoch": 0.56, + "grad_norm": 0.3432484243188843, + "learning_rate": 8.604438153502506e-06, + "loss": 0.2396, "step": 12152 }, { - "epoch": 0.7, - "grad_norm": 1.2303501312225467, - "learning_rate": 4.407796698758788e-06, - "loss": 0.6729, + "epoch": 0.56, + "grad_norm": 0.277963297287344, + "learning_rate": 8.602964810278225e-06, + "loss": 0.2345, "step": 12153 }, { - "epoch": 0.7, - "grad_norm": 0.46335850324152866, - "learning_rate": 4.40625406355802e-06, - "loss": 0.2766, + "epoch": 0.56, + "grad_norm": 1.641393479569556, + "learning_rate": 8.601491497982956e-06, + "loss": 0.6431, "step": 12154 }, { - "epoch": 0.7, - "grad_norm": 0.2846449360546112, - "learning_rate": 4.404711622068436e-06, - "loss": 0.2599, + "epoch": 0.56, + "grad_norm": 0.7810315904394448, + "learning_rate": 8.60001821664932e-06, + "loss": 0.3187, "step": 12155 }, { - "epoch": 0.7, - "grad_norm": 0.7503421917007584, - "learning_rate": 4.40316937434344e-06, - "loss": 0.4376, + "epoch": 0.56, + "grad_norm": 0.3474634253626934, + "learning_rate": 8.598544966309926e-06, + "loss": 0.2619, "step": 12156 }, { - "epoch": 0.7, - "grad_norm": 0.2820811862011241, - "learning_rate": 4.401627320436453e-06, - "loss": 0.1031, + "epoch": 0.56, + "grad_norm": 0.40978884890491546, + "learning_rate": 8.597071746997399e-06, + "loss": 0.3234, "step": 12157 }, { - "epoch": 0.7, - "grad_norm": 0.33872988149067856, - "learning_rate": 4.40008546040087e-06, - "loss": 0.2569, + "epoch": 0.56, + "grad_norm": 0.2598257580304664, + "learning_rate": 8.595598558744351e-06, + "loss": 0.1357, "step": 12158 }, { - "epoch": 0.7, - "grad_norm": 0.3510877902653597, - "learning_rate": 4.3985437942900865e-06, - "loss": 0.3007, + "epoch": 0.56, + "grad_norm": 0.4061192614377386, + "learning_rate": 8.594125401583397e-06, + "loss": 0.289, "step": 12159 }, { - "epoch": 0.7, - "grad_norm": 0.8283159971853021, - "learning_rate": 4.397002322157492e-06, - "loss": 0.3384, + "epoch": 0.56, + "grad_norm": 0.5356915346565578, + "learning_rate": 8.592652275547153e-06, + "loss": 0.3201, "step": 12160 }, { - "epoch": 0.7, - "grad_norm": 0.3278628379074249, - "learning_rate": 4.395461044056462e-06, - "loss": 0.2431, + "epoch": 0.56, + "grad_norm": 0.3848580712658988, + "learning_rate": 8.591179180668227e-06, + "loss": 0.2396, "step": 12161 }, { - "epoch": 0.7, - "grad_norm": 0.4197960471554202, - "learning_rate": 4.393919960040377e-06, - "loss": 0.2814, + "epoch": 0.56, + "grad_norm": 0.383663763353386, + "learning_rate": 8.589706116979241e-06, + "loss": 0.2821, "step": 12162 }, { - "epoch": 0.7, - "grad_norm": 0.2417208244068282, - "learning_rate": 4.392379070162604e-06, - "loss": 0.1892, + "epoch": 0.56, + "grad_norm": 0.7289583063960312, + "learning_rate": 8.588233084512801e-06, + "loss": 0.4797, "step": 12163 }, { - "epoch": 0.7, - "grad_norm": 0.3290819278571764, - "learning_rate": 4.390838374476503e-06, - "loss": 0.2373, + "epoch": 0.56, + "grad_norm": 0.2265295807231736, + "learning_rate": 8.586760083301519e-06, + "loss": 0.1705, "step": 12164 }, { - "epoch": 0.7, - "grad_norm": 0.9639347889069623, - "learning_rate": 4.3892978730354245e-06, - "loss": 0.5774, + "epoch": 0.56, + "grad_norm": 0.3650074408151643, + "learning_rate": 8.585287113378006e-06, + "loss": 0.2866, "step": 12165 }, { - "epoch": 0.7, - "grad_norm": 0.32514156426728147, - "learning_rate": 4.387757565892722e-06, - "loss": 0.3137, + "epoch": 0.56, + "grad_norm": 0.8848424419267559, + "learning_rate": 8.583814174774873e-06, + "loss": 0.4913, "step": 12166 }, { - "epoch": 0.7, - "grad_norm": 0.2997269568806797, - "learning_rate": 4.386217453101735e-06, - "loss": 0.1819, + "epoch": 0.56, + "grad_norm": 0.661133540222615, + "learning_rate": 8.582341267524733e-06, + "loss": 0.4102, "step": 12167 }, { - "epoch": 0.7, - "grad_norm": 0.44106553840301743, - "learning_rate": 4.384677534715794e-06, - "loss": 0.2737, + "epoch": 0.56, + "grad_norm": 0.3357431072546675, + "learning_rate": 8.580868391660186e-06, + "loss": 0.2173, "step": 12168 }, { - "epoch": 0.7, - "grad_norm": 0.40065246533447957, - "learning_rate": 4.383137810788226e-06, - "loss": 0.2289, + "epoch": 0.56, + "grad_norm": 0.37970655856010216, + "learning_rate": 8.579395547213844e-06, + "loss": 0.3189, "step": 12169 }, { - "epoch": 0.7, - "grad_norm": 0.3319847681859942, - "learning_rate": 4.381598281372358e-06, - "loss": 0.1977, + "epoch": 0.56, + "grad_norm": 0.5213368559657799, + "learning_rate": 8.57792273421832e-06, + "loss": 0.3244, "step": 12170 }, { - "epoch": 0.7, - "grad_norm": 0.3532188786608427, - "learning_rate": 4.3800589465215e-06, - "loss": 0.3221, + "epoch": 0.56, + "grad_norm": 0.2805746365219365, + "learning_rate": 8.576449952706213e-06, + "loss": 0.1719, "step": 12171 }, { - "epoch": 0.7, - "grad_norm": 0.8397267713928125, - "learning_rate": 4.378519806288959e-06, - "loss": 0.3961, + "epoch": 0.56, + "grad_norm": 0.8445387806104611, + "learning_rate": 8.574977202710135e-06, + "loss": 0.4685, "step": 12172 }, { - "epoch": 0.7, - "grad_norm": 0.32192933397243806, - "learning_rate": 4.376980860728031e-06, - "loss": 0.2236, + "epoch": 0.56, + "grad_norm": 0.3621125659722737, + "learning_rate": 8.573504484262684e-06, + "loss": 0.3156, "step": 12173 }, { - "epoch": 0.7, - "grad_norm": 1.0490275553766555, - "learning_rate": 4.375442109892019e-06, - "loss": 0.527, + "epoch": 0.56, + "grad_norm": 0.30865931289735926, + "learning_rate": 8.572031797396473e-06, + "loss": 0.1993, "step": 12174 }, { - "epoch": 0.7, - "grad_norm": 0.22301783924245172, - "learning_rate": 4.373903553834203e-06, - "loss": 0.2092, + "epoch": 0.56, + "grad_norm": 1.2974983223794978, + "learning_rate": 8.570559142144102e-06, + "loss": 0.7434, "step": 12175 }, { - "epoch": 0.7, - "grad_norm": 0.3305834602905949, - "learning_rate": 4.372365192607866e-06, - "loss": 0.2539, + "epoch": 0.56, + "grad_norm": 0.38661142469154014, + "learning_rate": 8.569086518538172e-06, + "loss": 0.3354, "step": 12176 }, { - "epoch": 0.7, - "grad_norm": 1.0340970094704132, - "learning_rate": 4.370827026266281e-06, - "loss": 0.2975, + "epoch": 0.56, + "grad_norm": 0.23021367270157592, + "learning_rate": 8.567613926611287e-06, + "loss": 0.1309, "step": 12177 }, { - "epoch": 0.7, - "grad_norm": 0.37116083450859394, - "learning_rate": 4.36928905486271e-06, - "loss": 0.299, + "epoch": 0.56, + "grad_norm": 0.3587175980648574, + "learning_rate": 8.566141366396048e-06, + "loss": 0.278, "step": 12178 }, { - "epoch": 0.7, - "grad_norm": 0.29651541573485524, - "learning_rate": 4.3677512784504195e-06, - "loss": 0.2427, + "epoch": 0.56, + "grad_norm": 0.5600552244288344, + "learning_rate": 8.56466883792506e-06, + "loss": 0.3731, "step": 12179 }, { - "epoch": 0.7, - "grad_norm": 1.0371072230107874, - "learning_rate": 4.366213697082661e-06, - "loss": 0.3959, + "epoch": 0.56, + "grad_norm": 0.6031314381984104, + "learning_rate": 8.56319634123092e-06, + "loss": 0.3132, "step": 12180 }, { - "epoch": 0.7, - "grad_norm": 0.3476298224017206, - "learning_rate": 4.3646763108126796e-06, - "loss": 0.1951, + "epoch": 0.56, + "grad_norm": 0.2748011088240663, + "learning_rate": 8.561723876346228e-06, + "loss": 0.2461, "step": 12181 }, { - "epoch": 0.7, - "grad_norm": 0.651854837110252, - "learning_rate": 4.363139119693712e-06, - "loss": 0.3743, + "epoch": 0.56, + "grad_norm": 0.3077726725664092, + "learning_rate": 8.56025144330358e-06, + "loss": 0.2059, "step": 12182 }, { - "epoch": 0.7, - "grad_norm": 0.3424674523302693, - "learning_rate": 4.361602123778998e-06, - "loss": 0.2393, + "epoch": 0.56, + "grad_norm": 0.4000626028051199, + "learning_rate": 8.55877904213558e-06, + "loss": 0.2386, "step": 12183 }, { - "epoch": 0.7, - "grad_norm": 1.009983692526136, - "learning_rate": 4.360065323121759e-06, - "loss": 0.4591, + "epoch": 0.56, + "grad_norm": 0.39626546830682496, + "learning_rate": 8.557306672874825e-06, + "loss": 0.2747, "step": 12184 }, { - "epoch": 0.7, - "grad_norm": 0.4010361679084553, - "learning_rate": 4.358528717775217e-06, - "loss": 0.2785, + "epoch": 0.56, + "grad_norm": 0.4664917790823666, + "learning_rate": 8.555834335553908e-06, + "loss": 0.3257, "step": 12185 }, { - "epoch": 0.7, - "grad_norm": 0.35234124130496747, - "learning_rate": 4.356992307792578e-06, - "loss": 0.2476, + "epoch": 0.56, + "grad_norm": 0.41328949852093566, + "learning_rate": 8.554362030205424e-06, + "loss": 0.2759, "step": 12186 }, { - "epoch": 0.7, - "grad_norm": 0.23746666293127477, - "learning_rate": 4.355456093227056e-06, - "loss": 0.1907, + "epoch": 0.56, + "grad_norm": 0.49546115808013835, + "learning_rate": 8.552889756861972e-06, + "loss": 0.3035, "step": 12187 }, { - "epoch": 0.7, - "grad_norm": 0.39902282031332437, - "learning_rate": 4.353920074131848e-06, - "loss": 0.2616, + "epoch": 0.56, + "grad_norm": 0.35446449916160533, + "learning_rate": 8.55141751555615e-06, + "loss": 0.304, "step": 12188 }, { - "epoch": 0.7, - "grad_norm": 0.437743857169606, - "learning_rate": 4.352384250560147e-06, - "loss": 0.3208, + "epoch": 0.56, + "grad_norm": 0.3126638866833582, + "learning_rate": 8.549945306320547e-06, + "loss": 0.2175, "step": 12189 }, { - "epoch": 0.7, - "grad_norm": 0.3195775949375206, - "learning_rate": 4.350848622565131e-06, - "loss": 0.239, + "epoch": 0.56, + "grad_norm": 0.4697372500024799, + "learning_rate": 8.548473129187757e-06, + "loss": 0.2854, "step": 12190 }, { - "epoch": 0.7, - "grad_norm": 0.3694916234759852, - "learning_rate": 4.349313190199988e-06, - "loss": 0.28, + "epoch": 0.56, + "grad_norm": 0.4581908978174879, + "learning_rate": 8.54700098419037e-06, + "loss": 0.2778, "step": 12191 }, { - "epoch": 0.7, - "grad_norm": 0.5313708496767161, - "learning_rate": 4.347777953517885e-06, - "loss": 0.3363, + "epoch": 0.56, + "grad_norm": 0.3588548872870379, + "learning_rate": 8.545528871360983e-06, + "loss": 0.2666, "step": 12192 }, { - "epoch": 0.7, - "grad_norm": 0.19723128611524043, - "learning_rate": 4.3462429125719884e-06, - "loss": 0.0851, + "epoch": 0.56, + "grad_norm": 0.3872369348798836, + "learning_rate": 8.544056790732187e-06, + "loss": 0.3067, "step": 12193 }, { - "epoch": 0.7, - "grad_norm": 0.3500548154240149, - "learning_rate": 4.344708067415454e-06, - "loss": 0.263, + "epoch": 0.56, + "grad_norm": 0.27344526734229596, + "learning_rate": 8.542584742336568e-06, + "loss": 0.1215, "step": 12194 }, { - "epoch": 0.7, - "grad_norm": 0.38467142893601863, - "learning_rate": 4.34317341810144e-06, - "loss": 0.2988, + "epoch": 0.56, + "grad_norm": 0.41658011002886075, + "learning_rate": 8.541112726206718e-06, + "loss": 0.2675, "step": 12195 }, { - "epoch": 0.7, - "grad_norm": 0.6932149267264256, - "learning_rate": 4.341638964683086e-06, - "loss": 0.3442, + "epoch": 0.56, + "grad_norm": 0.45158672756614165, + "learning_rate": 8.539640742375226e-06, + "loss": 0.3518, "step": 12196 }, { - "epoch": 0.7, - "grad_norm": 0.3163438844619936, - "learning_rate": 4.3401047072135315e-06, - "loss": 0.2652, + "epoch": 0.56, + "grad_norm": 0.2914082902460317, + "learning_rate": 8.538168790874683e-06, + "loss": 0.2112, "step": 12197 }, { - "epoch": 0.7, - "grad_norm": 1.2479930111845223, - "learning_rate": 4.338570645745904e-06, - "loss": 0.5147, + "epoch": 0.56, + "grad_norm": 0.6086763730866136, + "learning_rate": 8.536696871737673e-06, + "loss": 0.3309, "step": 12198 }, { - "epoch": 0.7, - "grad_norm": 0.21571739388095768, - "learning_rate": 4.337036780333336e-06, - "loss": 0.1895, + "epoch": 0.56, + "grad_norm": 0.4115296773770633, + "learning_rate": 8.535224984996779e-06, + "loss": 0.3525, "step": 12199 }, { - "epoch": 0.7, - "grad_norm": 0.5681859271955713, - "learning_rate": 4.33550311102894e-06, - "loss": 0.3091, + "epoch": 0.56, + "grad_norm": 0.35375840691280214, + "learning_rate": 8.533753130684596e-06, + "loss": 0.2546, "step": 12200 }, { - "epoch": 0.7, - "grad_norm": 0.4052305180015126, - "learning_rate": 4.333969637885827e-06, - "loss": 0.3169, + "epoch": 0.56, + "grad_norm": 0.5581319351832283, + "learning_rate": 8.532281308833706e-06, + "loss": 0.3358, "step": 12201 }, { - "epoch": 0.7, - "grad_norm": 0.33747729303466445, - "learning_rate": 4.332436360957104e-06, - "loss": 0.3031, + "epoch": 0.56, + "grad_norm": 0.4799169639064649, + "learning_rate": 8.530809519476697e-06, + "loss": 0.3291, "step": 12202 }, { - "epoch": 0.7, - "grad_norm": 0.34873869056387957, - "learning_rate": 4.3309032802958605e-06, - "loss": 0.1527, + "epoch": 0.56, + "grad_norm": 0.26887190617570306, + "learning_rate": 8.52933776264614e-06, + "loss": 0.0956, "step": 12203 }, { - "epoch": 0.7, - "grad_norm": 0.4009960812786949, - "learning_rate": 4.329370395955198e-06, - "loss": 0.301, + "epoch": 0.56, + "grad_norm": 0.34603870095644224, + "learning_rate": 8.527866038374633e-06, + "loss": 0.2952, "step": 12204 }, { - "epoch": 0.7, - "grad_norm": 0.6642800794115589, - "learning_rate": 4.3278377079881935e-06, - "loss": 0.3961, + "epoch": 0.56, + "grad_norm": 0.3847119778336983, + "learning_rate": 8.526394346694755e-06, + "loss": 0.3127, "step": 12205 }, { - "epoch": 0.7, - "grad_norm": 0.30282343673125794, - "learning_rate": 4.326305216447926e-06, - "loss": 0.2192, + "epoch": 0.56, + "grad_norm": 0.9762341618015461, + "learning_rate": 8.524922687639084e-06, + "loss": 0.4625, "step": 12206 }, { - "epoch": 0.7, - "grad_norm": 0.23522905895128113, - "learning_rate": 4.32477292138746e-06, - "loss": 0.2117, + "epoch": 0.56, + "grad_norm": 0.38205248827501354, + "learning_rate": 8.523451061240202e-06, + "loss": 0.2528, "step": 12207 }, { - "epoch": 0.7, - "grad_norm": 1.2941628507913063, - "learning_rate": 4.3232408228598685e-06, - "loss": 0.6561, + "epoch": 0.56, + "grad_norm": 0.38754565611288994, + "learning_rate": 8.52197946753069e-06, + "loss": 0.3014, "step": 12208 }, { - "epoch": 0.7, - "grad_norm": 0.35384968435253333, - "learning_rate": 4.321708920918203e-06, - "loss": 0.2012, + "epoch": 0.56, + "grad_norm": 0.26346209831925566, + "learning_rate": 8.520507906543129e-06, + "loss": 0.2067, "step": 12209 }, { - "epoch": 0.7, - "grad_norm": 0.3530687951144725, - "learning_rate": 4.320177215615513e-06, - "loss": 0.2766, + "epoch": 0.56, + "grad_norm": 0.3361025858136923, + "learning_rate": 8.519036378310098e-06, + "loss": 0.1954, "step": 12210 }, { - "epoch": 0.7, - "grad_norm": 0.5241546712916147, - "learning_rate": 4.318645707004839e-06, - "loss": 0.3251, + "epoch": 0.56, + "grad_norm": 0.6097712512375393, + "learning_rate": 8.517564882864173e-06, + "loss": 0.3873, "step": 12211 }, { - "epoch": 0.7, - "grad_norm": 0.36823411666661837, - "learning_rate": 4.317114395139222e-06, - "loss": 0.2413, + "epoch": 0.56, + "grad_norm": 0.33294253806331114, + "learning_rate": 8.516093420237931e-06, + "loss": 0.2873, "step": 12212 }, { - "epoch": 0.7, - "grad_norm": 0.2259296402410964, - "learning_rate": 4.3155832800716905e-06, - "loss": 0.1746, + "epoch": 0.56, + "grad_norm": 0.7119791421549855, + "learning_rate": 8.514621990463954e-06, + "loss": 0.2668, "step": 12213 }, { - "epoch": 0.7, - "grad_norm": 0.35860128935561664, - "learning_rate": 4.314052361855265e-06, - "loss": 0.3109, + "epoch": 0.56, + "grad_norm": 0.42278946882098206, + "learning_rate": 8.513150593574813e-06, + "loss": 0.3234, "step": 12214 }, { - "epoch": 0.7, - "grad_norm": 0.5859399504943746, - "learning_rate": 4.312521640542961e-06, - "loss": 0.4153, + "epoch": 0.56, + "grad_norm": 0.3128778316950503, + "learning_rate": 8.511679229603084e-06, + "loss": 0.1857, "step": 12215 }, { - "epoch": 0.7, - "grad_norm": 0.35181741535289196, - "learning_rate": 4.310991116187786e-06, - "loss": 0.2138, + "epoch": 0.56, + "grad_norm": 0.34927475244001355, + "learning_rate": 8.51020789858134e-06, + "loss": 0.277, "step": 12216 }, { - "epoch": 0.7, - "grad_norm": 0.5705186817412079, - "learning_rate": 4.309460788842747e-06, - "loss": 0.3709, + "epoch": 0.56, + "grad_norm": 0.33454256558069395, + "learning_rate": 8.50873660054216e-06, + "loss": 0.2403, "step": 12217 }, { - "epoch": 0.7, - "grad_norm": 0.3280362062848473, - "learning_rate": 4.307930658560836e-06, - "loss": 0.2838, + "epoch": 0.56, + "grad_norm": 0.9702380619660157, + "learning_rate": 8.507265335518117e-06, + "loss": 0.4392, "step": 12218 }, { - "epoch": 0.7, - "grad_norm": 0.25938678514674396, - "learning_rate": 4.306400725395041e-06, - "loss": 0.1739, + "epoch": 0.56, + "grad_norm": 0.7632386694883394, + "learning_rate": 8.50579410354178e-06, + "loss": 0.408, "step": 12219 }, { - "epoch": 0.7, - "grad_norm": 0.3705889257477343, - "learning_rate": 4.304870989398341e-06, - "loss": 0.2753, + "epoch": 0.56, + "grad_norm": 0.27695884814597393, + "learning_rate": 8.504322904645717e-06, + "loss": 0.237, "step": 12220 }, { - "epoch": 0.7, - "grad_norm": 0.7826792297259539, - "learning_rate": 4.303341450623717e-06, - "loss": 0.3297, + "epoch": 0.56, + "grad_norm": 0.46338973248245197, + "learning_rate": 8.502851738862507e-06, + "loss": 0.2492, "step": 12221 }, { - "epoch": 0.7, - "grad_norm": 0.25656625473960787, - "learning_rate": 4.301812109124134e-06, - "loss": 0.2215, + "epoch": 0.56, + "grad_norm": 0.3843856866205924, + "learning_rate": 8.501380606224716e-06, + "loss": 0.2465, "step": 12222 }, { - "epoch": 0.7, - "grad_norm": 0.7552748960134755, - "learning_rate": 4.300282964952553e-06, - "loss": 0.3884, + "epoch": 0.56, + "grad_norm": 0.48915247081570984, + "learning_rate": 8.499909506764914e-06, + "loss": 0.2569, "step": 12223 }, { - "epoch": 0.7, - "grad_norm": 0.6690995984223479, - "learning_rate": 4.2987540181619265e-06, - "loss": 0.3853, + "epoch": 0.56, + "grad_norm": 0.39755627024894946, + "learning_rate": 8.49843844051567e-06, + "loss": 0.3135, "step": 12224 }, { - "epoch": 0.7, - "grad_norm": 0.2465990758728293, - "learning_rate": 4.2972252688052055e-06, - "loss": 0.1742, + "epoch": 0.56, + "grad_norm": 0.5427120294148579, + "learning_rate": 8.49696740750955e-06, + "loss": 0.2877, "step": 12225 }, { - "epoch": 0.7, - "grad_norm": 0.3394607609597814, - "learning_rate": 4.295696716935326e-06, - "loss": 0.2924, + "epoch": 0.56, + "grad_norm": 0.4146484601502723, + "learning_rate": 8.495496407779124e-06, + "loss": 0.2223, "step": 12226 }, { - "epoch": 0.7, - "grad_norm": 0.3112479870105338, - "learning_rate": 4.294168362605224e-06, - "loss": 0.2147, + "epoch": 0.56, + "grad_norm": 0.3092562101851163, + "learning_rate": 8.49402544135696e-06, + "loss": 0.1906, "step": 12227 }, { - "epoch": 0.7, - "grad_norm": 0.3997767864323401, - "learning_rate": 4.292640205867824e-06, - "loss": 0.2892, + "epoch": 0.56, + "grad_norm": 0.3138838781379907, + "learning_rate": 8.492554508275618e-06, + "loss": 0.2593, "step": 12228 }, { - "epoch": 0.7, - "grad_norm": 1.072222696374089, - "learning_rate": 4.291112246776052e-06, - "loss": 0.2978, + "epoch": 0.56, + "grad_norm": 0.41752587300089833, + "learning_rate": 8.491083608567663e-06, + "loss": 0.2852, "step": 12229 }, { - "epoch": 0.7, - "grad_norm": 0.2996227549076089, - "learning_rate": 4.2895844853828165e-06, - "loss": 0.2741, + "epoch": 0.56, + "grad_norm": 0.8513710820205609, + "learning_rate": 8.48961274226567e-06, + "loss": 0.348, "step": 12230 }, { - "epoch": 0.7, - "grad_norm": 0.40168932524681566, - "learning_rate": 4.288056921741024e-06, - "loss": 0.3001, + "epoch": 0.56, + "grad_norm": 0.6285612552495742, + "learning_rate": 8.488141909402192e-06, + "loss": 0.3291, "step": 12231 }, { - "epoch": 0.7, - "grad_norm": 0.2768315986265087, - "learning_rate": 4.286529555903572e-06, - "loss": 0.1643, + "epoch": 0.56, + "grad_norm": 0.32532520349079674, + "learning_rate": 8.486671110009797e-06, + "loss": 0.2761, "step": 12232 }, { - "epoch": 0.7, - "grad_norm": 0.41305074420474364, - "learning_rate": 4.285002387923359e-06, - "loss": 0.2157, + "epoch": 0.56, + "grad_norm": 0.3014111629217826, + "learning_rate": 8.485200344121038e-06, + "loss": 0.1957, "step": 12233 }, { - "epoch": 0.7, - "grad_norm": 0.3232120840888913, - "learning_rate": 4.283475417853268e-06, - "loss": 0.2766, + "epoch": 0.56, + "grad_norm": 0.9152635228464191, + "learning_rate": 8.483729611768488e-06, + "loss": 0.5334, "step": 12234 }, { - "epoch": 0.7, - "grad_norm": 0.7148956900870683, - "learning_rate": 4.2819486457461765e-06, - "loss": 0.2733, + "epoch": 0.56, + "grad_norm": 0.3522411408534553, + "learning_rate": 8.482258912984705e-06, + "loss": 0.2544, "step": 12235 }, { - "epoch": 0.7, - "grad_norm": 0.8019061600739739, - "learning_rate": 4.280422071654955e-06, - "loss": 0.4053, + "epoch": 0.56, + "grad_norm": 0.2879316845982022, + "learning_rate": 8.480788247802246e-06, + "loss": 0.2376, "step": 12236 }, { - "epoch": 0.7, - "grad_norm": 0.3394041675400556, - "learning_rate": 4.278895695632474e-06, - "loss": 0.2784, + "epoch": 0.56, + "grad_norm": 0.7531629689728865, + "learning_rate": 8.479317616253671e-06, + "loss": 0.4686, "step": 12237 }, { - "epoch": 0.7, - "grad_norm": 0.3341011586741274, - "learning_rate": 4.27736951773159e-06, - "loss": 0.2744, + "epoch": 0.56, + "grad_norm": 0.38888973406978433, + "learning_rate": 8.477847018371534e-06, + "loss": 0.2731, "step": 12238 }, { - "epoch": 0.7, - "grad_norm": 0.35152470282886145, - "learning_rate": 4.275843538005153e-06, - "loss": 0.2208, + "epoch": 0.56, + "grad_norm": 1.082679360651809, + "learning_rate": 8.476376454188402e-06, + "loss": 0.2473, "step": 12239 }, { - "epoch": 0.7, - "grad_norm": 0.370400992897311, - "learning_rate": 4.274317756506008e-06, - "loss": 0.2785, + "epoch": 0.56, + "grad_norm": 0.33926809878027236, + "learning_rate": 8.474905923736825e-06, + "loss": 0.2951, "step": 12240 }, { - "epoch": 0.7, - "grad_norm": 0.4693231268558691, - "learning_rate": 4.2727921732869894e-06, - "loss": 0.2495, + "epoch": 0.56, + "grad_norm": 0.391925832358534, + "learning_rate": 8.473435427049362e-06, + "loss": 0.2772, "step": 12241 }, { - "epoch": 0.7, - "grad_norm": 0.3241862213090096, - "learning_rate": 4.271266788400935e-06, - "loss": 0.2379, + "epoch": 0.56, + "grad_norm": 0.9215804538714935, + "learning_rate": 8.471964964158565e-06, + "loss": 0.5506, "step": 12242 }, { - "epoch": 0.7, - "grad_norm": 0.3449514730274378, - "learning_rate": 4.269741601900667e-06, - "loss": 0.2681, + "epoch": 0.56, + "grad_norm": 0.24938738179554498, + "learning_rate": 8.470494535096994e-06, + "loss": 0.1554, "step": 12243 }, { - "epoch": 0.7, - "grad_norm": 1.1807650540650891, - "learning_rate": 4.268216613838998e-06, - "loss": 0.6882, + "epoch": 0.56, + "grad_norm": 0.30343710640133315, + "learning_rate": 8.469024139897197e-06, + "loss": 0.2716, "step": 12244 }, { - "epoch": 0.7, - "grad_norm": 0.36468184086103056, - "learning_rate": 4.266691824268739e-06, - "loss": 0.2161, + "epoch": 0.56, + "grad_norm": 0.9621075217184689, + "learning_rate": 8.467553778591733e-06, + "loss": 0.5603, "step": 12245 }, { - "epoch": 0.7, - "grad_norm": 0.29744735921142973, - "learning_rate": 4.2651672332427e-06, - "loss": 0.264, + "epoch": 0.56, + "grad_norm": 0.5267145703134086, + "learning_rate": 8.466083451213145e-06, + "loss": 0.2554, "step": 12246 }, { - "epoch": 0.7, - "grad_norm": 0.476773513987374, - "learning_rate": 4.263642840813672e-06, - "loss": 0.2558, + "epoch": 0.56, + "grad_norm": 0.39749061629440513, + "learning_rate": 8.464613157793996e-06, + "loss": 0.3041, "step": 12247 }, { - "epoch": 0.7, - "grad_norm": 0.5281422302262536, - "learning_rate": 4.262118647034447e-06, - "loss": 0.132, + "epoch": 0.56, + "grad_norm": 0.36752547917807277, + "learning_rate": 8.463142898366834e-06, + "loss": 0.3156, "step": 12248 }, { - "epoch": 0.7, - "grad_norm": 0.3918244540090564, - "learning_rate": 4.260594651957801e-06, - "loss": 0.3159, + "epoch": 0.56, + "grad_norm": 0.19354851847672563, + "learning_rate": 8.461672672964204e-06, + "loss": 0.0739, "step": 12249 }, { - "epoch": 0.7, - "grad_norm": 0.3831510721886654, - "learning_rate": 4.25907085563652e-06, - "loss": 0.3368, + "epoch": 0.56, + "grad_norm": 0.4511532821598407, + "learning_rate": 8.460202481618658e-06, + "loss": 0.3217, "step": 12250 }, { - "epoch": 0.7, - "grad_norm": 0.518415226887272, - "learning_rate": 4.257547258123369e-06, - "loss": 0.2439, + "epoch": 0.56, + "grad_norm": 0.828478575156366, + "learning_rate": 8.458732324362744e-06, + "loss": 0.5049, "step": 12251 }, { - "epoch": 0.7, - "grad_norm": 0.3912430449227991, - "learning_rate": 4.256023859471109e-06, - "loss": 0.2949, + "epoch": 0.56, + "grad_norm": 0.33677243313494404, + "learning_rate": 8.457262201229015e-06, + "loss": 0.2576, "step": 12252 }, { - "epoch": 0.7, - "grad_norm": 0.31137797694610614, - "learning_rate": 4.254500659732496e-06, - "loss": 0.1849, + "epoch": 0.56, + "grad_norm": 0.3607525356344563, + "learning_rate": 8.45579211225001e-06, + "loss": 0.2745, "step": 12253 }, { - "epoch": 0.7, - "grad_norm": 0.36367056958104427, - "learning_rate": 4.2529776589602735e-06, - "loss": 0.3097, + "epoch": 0.56, + "grad_norm": 0.39625136918530396, + "learning_rate": 8.454322057458282e-06, + "loss": 0.2592, "step": 12254 }, { - "epoch": 0.7, - "grad_norm": 0.28821543574715613, - "learning_rate": 4.251454857207193e-06, - "loss": 0.1862, + "epoch": 0.56, + "grad_norm": 0.39880183924414137, + "learning_rate": 8.452852036886373e-06, + "loss": 0.2331, "step": 12255 }, { - "epoch": 0.7, - "grad_norm": 0.7304920210259426, - "learning_rate": 4.249932254525985e-06, - "loss": 0.4968, + "epoch": 0.56, + "grad_norm": 0.23728147736271077, + "learning_rate": 8.451382050566828e-06, + "loss": 0.2168, "step": 12256 }, { - "epoch": 0.7, - "grad_norm": 0.4805174118924019, - "learning_rate": 4.24840985096937e-06, - "loss": 0.3333, + "epoch": 0.56, + "grad_norm": 0.8820760947840431, + "learning_rate": 8.449912098532193e-06, + "loss": 0.4586, "step": 12257 }, { - "epoch": 0.7, - "grad_norm": 0.24509749223680857, - "learning_rate": 4.246887646590077e-06, - "loss": 0.2109, + "epoch": 0.56, + "grad_norm": 0.710098155367854, + "learning_rate": 8.44844218081501e-06, + "loss": 0.4384, "step": 12258 }, { - "epoch": 0.7, - "grad_norm": 0.30668412572681175, - "learning_rate": 4.245365641440818e-06, - "loss": 0.2188, + "epoch": 0.56, + "grad_norm": 0.31337951869611874, + "learning_rate": 8.446972297447819e-06, + "loss": 0.2152, "step": 12259 }, { - "epoch": 0.7, - "grad_norm": 1.0420482711700545, - "learning_rate": 4.243843835574299e-06, - "loss": 0.4492, + "epoch": 0.56, + "grad_norm": 0.3766201087040202, + "learning_rate": 8.445502448463167e-06, + "loss": 0.3308, "step": 12260 }, { - "epoch": 0.7, - "grad_norm": 0.29947922607947813, - "learning_rate": 4.242322229043218e-06, - "loss": 0.2126, + "epoch": 0.56, + "grad_norm": 0.28772847815407715, + "learning_rate": 8.444032633893593e-06, + "loss": 0.1575, "step": 12261 }, { - "epoch": 0.7, - "grad_norm": 0.34042241000669604, - "learning_rate": 4.240800821900274e-06, - "loss": 0.2878, + "epoch": 0.56, + "grad_norm": 0.3243789689030855, + "learning_rate": 8.442562853771637e-06, + "loss": 0.1933, "step": 12262 }, { - "epoch": 0.7, - "grad_norm": 0.8679278842448671, - "learning_rate": 4.23927961419815e-06, - "loss": 0.4032, + "epoch": 0.56, + "grad_norm": 0.7609796041097836, + "learning_rate": 8.441093108129833e-06, + "loss": 0.4319, "step": 12263 }, { - "epoch": 0.7, - "grad_norm": 0.3370249737144881, - "learning_rate": 4.237758605989523e-06, - "loss": 0.247, + "epoch": 0.56, + "grad_norm": 0.3668232256734244, + "learning_rate": 8.43962339700073e-06, + "loss": 0.3153, "step": 12264 }, { - "epoch": 0.7, - "grad_norm": 0.15104224638072863, - "learning_rate": 4.236237797327071e-06, - "loss": 0.071, + "epoch": 0.56, + "grad_norm": 0.32084289603252325, + "learning_rate": 8.438153720416861e-06, + "loss": 0.187, "step": 12265 }, { - "epoch": 0.7, - "grad_norm": 0.37245443877274914, - "learning_rate": 4.2347171882634505e-06, - "loss": 0.3221, + "epoch": 0.56, + "grad_norm": 0.9539874704255746, + "learning_rate": 8.436684078410764e-06, + "loss": 0.6051, "step": 12266 }, { - "epoch": 0.7, - "grad_norm": 0.3957859482382533, - "learning_rate": 4.2331967788513295e-06, - "loss": 0.2676, + "epoch": 0.56, + "grad_norm": 0.2541484428081228, + "learning_rate": 8.435214471014976e-06, + "loss": 0.1911, "step": 12267 }, { - "epoch": 0.7, - "grad_norm": 0.4817458267951734, - "learning_rate": 4.231676569143357e-06, - "loss": 0.2875, + "epoch": 0.56, + "grad_norm": 0.4139397817949519, + "learning_rate": 8.433744898262026e-06, + "loss": 0.2709, "step": 12268 }, { - "epoch": 0.7, - "grad_norm": 0.33385576628286906, - "learning_rate": 4.230156559192177e-06, - "loss": 0.3, + "epoch": 0.56, + "grad_norm": 0.5154275074548234, + "learning_rate": 8.432275360184458e-06, + "loss": 0.3021, "step": 12269 }, { - "epoch": 0.7, - "grad_norm": 0.3606609119285642, - "learning_rate": 4.228636749050422e-06, - "loss": 0.2815, + "epoch": 0.56, + "grad_norm": 1.0430868653628655, + "learning_rate": 8.430805856814802e-06, + "loss": 0.5234, "step": 12270 }, { - "epoch": 0.71, - "grad_norm": 0.28662475275203025, - "learning_rate": 4.227117138770733e-06, - "loss": 0.1133, + "epoch": 0.56, + "grad_norm": 0.36072743708953225, + "learning_rate": 8.429336388185594e-06, + "loss": 0.2578, "step": 12271 }, { - "epoch": 0.71, - "grad_norm": 1.1924756559482446, - "learning_rate": 4.225597728405729e-06, - "loss": 0.5055, + "epoch": 0.56, + "grad_norm": 0.38156912630191214, + "learning_rate": 8.42786695432936e-06, + "loss": 0.2855, "step": 12272 }, { - "epoch": 0.71, - "grad_norm": 0.33127274692223163, - "learning_rate": 4.224078518008028e-06, - "loss": 0.2566, + "epoch": 0.56, + "grad_norm": 0.3148772494330761, + "learning_rate": 8.42639755527864e-06, + "loss": 0.1895, "step": 12273 }, { - "epoch": 0.71, - "grad_norm": 0.39603641748968726, - "learning_rate": 4.222559507630235e-06, - "loss": 0.2764, + "epoch": 0.56, + "grad_norm": 0.3315200950484738, + "learning_rate": 8.42492819106596e-06, + "loss": 0.2662, "step": 12274 }, { - "epoch": 0.71, - "grad_norm": 1.1371302801557202, - "learning_rate": 4.221040697324962e-06, - "loss": 0.8109, + "epoch": 0.56, + "grad_norm": 0.5078759545969014, + "learning_rate": 8.423458861723853e-06, + "loss": 0.2963, "step": 12275 }, { - "epoch": 0.71, - "grad_norm": 0.35190049784212624, - "learning_rate": 4.2195220871448005e-06, - "loss": 0.2424, + "epoch": 0.56, + "grad_norm": 0.5067876304115886, + "learning_rate": 8.421989567284841e-06, + "loss": 0.3864, "step": 12276 }, { - "epoch": 0.71, - "grad_norm": 0.27023187100195273, - "learning_rate": 4.218003677142342e-06, - "loss": 0.2132, + "epoch": 0.56, + "grad_norm": 0.3094634640457943, + "learning_rate": 8.420520307781468e-06, + "loss": 0.2635, "step": 12277 }, { - "epoch": 0.71, - "grad_norm": 0.48459025177133447, - "learning_rate": 4.216485467370163e-06, - "loss": 0.2421, + "epoch": 0.56, + "grad_norm": 1.145670721769822, + "learning_rate": 8.41905108324625e-06, + "loss": 0.3763, "step": 12278 }, { - "epoch": 0.71, - "grad_norm": 0.30829697404074013, - "learning_rate": 4.214967457880846e-06, - "loss": 0.2431, + "epoch": 0.56, + "grad_norm": 0.23026724450965205, + "learning_rate": 8.417581893711717e-06, + "loss": 0.2001, "step": 12279 }, { - "epoch": 0.71, - "grad_norm": 0.6722535491136014, - "learning_rate": 4.213449648726958e-06, - "loss": 0.4095, + "epoch": 0.56, + "grad_norm": 0.3225302041955291, + "learning_rate": 8.416112739210393e-06, + "loss": 0.2305, "step": 12280 }, { - "epoch": 0.71, - "grad_norm": 0.30788458676053476, - "learning_rate": 4.211932039961061e-06, - "loss": 0.2747, + "epoch": 0.56, + "grad_norm": 0.6723651076262414, + "learning_rate": 8.414643619774809e-06, + "loss": 0.3846, "step": 12281 }, { - "epoch": 0.71, - "grad_norm": 0.3495341382200821, - "learning_rate": 4.210414631635707e-06, - "loss": 0.2409, + "epoch": 0.56, + "grad_norm": 0.8609229764986652, + "learning_rate": 8.413174535437486e-06, + "loss": 0.3982, "step": 12282 }, { - "epoch": 0.71, - "grad_norm": 1.1231503038430486, - "learning_rate": 4.208897423803443e-06, - "loss": 0.5617, + "epoch": 0.56, + "grad_norm": 0.4244626369481598, + "learning_rate": 8.411705486230952e-06, + "loss": 0.2768, "step": 12283 }, { - "epoch": 0.71, - "grad_norm": 0.3074058591745467, - "learning_rate": 4.207380416516815e-06, - "loss": 0.162, + "epoch": 0.56, + "grad_norm": 0.31803690351561287, + "learning_rate": 8.410236472187727e-06, + "loss": 0.2884, "step": 12284 }, { - "epoch": 0.71, - "grad_norm": 0.4244766221986541, - "learning_rate": 4.2058636098283545e-06, - "loss": 0.294, + "epoch": 0.56, + "grad_norm": 0.4684941091565667, + "learning_rate": 8.408767493340333e-06, + "loss": 0.2216, "step": 12285 }, { - "epoch": 0.71, - "grad_norm": 0.31602488202105283, - "learning_rate": 4.204347003790588e-06, - "loss": 0.3086, + "epoch": 0.56, + "grad_norm": 0.4400555014856853, + "learning_rate": 8.407298549721294e-06, + "loss": 0.2604, "step": 12286 }, { - "epoch": 0.71, - "grad_norm": 0.9753506360078198, - "learning_rate": 4.202830598456032e-06, - "loss": 0.5152, + "epoch": 0.56, + "grad_norm": 0.3181900605068484, + "learning_rate": 8.40582964136313e-06, + "loss": 0.267, "step": 12287 }, { - "epoch": 0.71, - "grad_norm": 0.4134773260026491, - "learning_rate": 4.201314393877206e-06, - "loss": 0.2844, + "epoch": 0.56, + "grad_norm": 0.455355698615622, + "learning_rate": 8.404360768298361e-06, + "loss": 0.2362, "step": 12288 }, { - "epoch": 0.71, - "grad_norm": 0.3680992182220188, - "learning_rate": 4.199798390106613e-06, - "loss": 0.2835, + "epoch": 0.56, + "grad_norm": 0.3541057167771725, + "learning_rate": 8.402891930559504e-06, + "loss": 0.267, "step": 12289 }, { - "epoch": 0.71, - "grad_norm": 0.29708444907868753, - "learning_rate": 4.198282587196757e-06, - "loss": 0.2302, + "epoch": 0.56, + "grad_norm": 0.9169651701659455, + "learning_rate": 8.401423128179085e-06, + "loss": 0.5216, "step": 12290 }, { - "epoch": 0.71, - "grad_norm": 0.2937281945675394, - "learning_rate": 4.196766985200118e-06, - "loss": 0.1866, + "epoch": 0.56, + "grad_norm": 0.40015061744475117, + "learning_rate": 8.399954361189614e-06, + "loss": 0.2484, "step": 12291 }, { - "epoch": 0.71, - "grad_norm": 0.6844793590759544, - "learning_rate": 4.195251584169192e-06, - "loss": 0.4549, + "epoch": 0.56, + "grad_norm": 0.3822281441654599, + "learning_rate": 8.398485629623613e-06, + "loss": 0.266, "step": 12292 }, { - "epoch": 0.71, - "grad_norm": 0.5098854869023844, - "learning_rate": 4.193736384156455e-06, - "loss": 0.3973, + "epoch": 0.56, + "grad_norm": 0.3786164173749657, + "learning_rate": 8.397016933513593e-06, + "loss": 0.2389, "step": 12293 }, { - "epoch": 0.71, - "grad_norm": 0.2564448270712793, - "learning_rate": 4.192221385214377e-06, - "loss": 0.2072, + "epoch": 0.56, + "grad_norm": 1.4443129571587467, + "learning_rate": 8.395548272892078e-06, + "loss": 0.7676, "step": 12294 }, { - "epoch": 0.71, - "grad_norm": 0.5101193979062211, - "learning_rate": 4.190706587395418e-06, - "loss": 0.2752, + "epoch": 0.56, + "grad_norm": 0.3126518650778811, + "learning_rate": 8.394079647791578e-06, + "loss": 0.2093, "step": 12295 }, { - "epoch": 0.71, - "grad_norm": 0.4351332109330603, - "learning_rate": 4.189191990752044e-06, - "loss": 0.271, + "epoch": 0.56, + "grad_norm": 0.3866842476804744, + "learning_rate": 8.392611058244606e-06, + "loss": 0.3096, "step": 12296 }, { - "epoch": 0.71, - "grad_norm": 0.26640761033236676, - "learning_rate": 4.187677595336702e-06, - "loss": 0.2164, + "epoch": 0.56, + "grad_norm": 0.707850410247406, + "learning_rate": 8.391142504283674e-06, + "loss": 0.4348, "step": 12297 }, { - "epoch": 0.71, - "grad_norm": 0.7739642089022813, - "learning_rate": 4.186163401201835e-06, - "loss": 0.3333, + "epoch": 0.56, + "grad_norm": 0.3310082850903642, + "learning_rate": 8.389673985941295e-06, + "loss": 0.2255, "step": 12298 }, { - "epoch": 0.71, - "grad_norm": 0.9586181627625986, - "learning_rate": 4.184649408399876e-06, - "loss": 0.6132, + "epoch": 0.57, + "grad_norm": 0.35445335164359526, + "learning_rate": 8.388205503249985e-06, + "loss": 0.2472, "step": 12299 }, { - "epoch": 0.71, - "grad_norm": 0.3301622040533963, - "learning_rate": 4.183135616983261e-06, - "loss": 0.1892, + "epoch": 0.57, + "grad_norm": 0.34069700015315174, + "learning_rate": 8.38673705624225e-06, + "loss": 0.26, "step": 12300 }, { - "epoch": 0.71, - "grad_norm": 0.4567133495984465, - "learning_rate": 4.181622027004409e-06, - "loss": 0.3046, + "epoch": 0.57, + "grad_norm": 0.3627539339991464, + "learning_rate": 8.385268644950603e-06, + "loss": 0.1894, "step": 12301 }, { - "epoch": 0.71, - "grad_norm": 0.46370032678492656, - "learning_rate": 4.1801086385157366e-06, - "loss": 0.3539, + "epoch": 0.57, + "grad_norm": 0.5728298758163443, + "learning_rate": 8.38380026940755e-06, + "loss": 0.399, "step": 12302 }, { - "epoch": 0.71, - "grad_norm": 0.4066621177522798, - "learning_rate": 4.178595451569648e-06, - "loss": 0.2692, + "epoch": 0.57, + "grad_norm": 0.43630309822364827, + "learning_rate": 8.382331929645603e-06, + "loss": 0.3012, "step": 12303 }, { - "epoch": 0.71, - "grad_norm": 0.20560129040918335, - "learning_rate": 4.177082466218553e-06, - "loss": 0.1409, + "epoch": 0.57, + "grad_norm": 0.40155062616462034, + "learning_rate": 8.380863625697267e-06, + "loss": 0.2132, "step": 12304 }, { - "epoch": 0.71, - "grad_norm": 0.35498442687133175, - "learning_rate": 4.17556968251484e-06, - "loss": 0.3217, + "epoch": 0.57, + "grad_norm": 0.3753800892917308, + "learning_rate": 8.379395357595051e-06, + "loss": 0.2518, "step": 12305 }, { - "epoch": 0.71, - "grad_norm": 0.6429202460112974, - "learning_rate": 4.1740571005109e-06, - "loss": 0.3263, + "epoch": 0.57, + "grad_norm": 0.46736156988416844, + "learning_rate": 8.37792712537146e-06, + "loss": 0.2982, "step": 12306 }, { - "epoch": 0.71, - "grad_norm": 0.35829501157032695, - "learning_rate": 4.1725447202591115e-06, - "loss": 0.2361, + "epoch": 0.57, + "grad_norm": 0.4145352274247496, + "learning_rate": 8.376458929058996e-06, + "loss": 0.2966, "step": 12307 }, { - "epoch": 0.71, - "grad_norm": 0.6702766382148628, - "learning_rate": 4.171032541811846e-06, - "loss": 0.3579, + "epoch": 0.57, + "grad_norm": 0.33414861418633585, + "learning_rate": 8.37499076869017e-06, + "loss": 0.2594, "step": 12308 }, { - "epoch": 0.71, - "grad_norm": 0.302468040201284, - "learning_rate": 4.169520565221476e-06, - "loss": 0.2649, + "epoch": 0.57, + "grad_norm": 0.7731945129074849, + "learning_rate": 8.373522644297482e-06, + "loss": 0.4021, "step": 12309 }, { - "epoch": 0.71, - "grad_norm": 0.19513857944378782, - "learning_rate": 4.1680087905403575e-06, - "loss": 0.1667, + "epoch": 0.57, + "grad_norm": 0.42692925623859, + "learning_rate": 8.37205455591343e-06, + "loss": 0.2893, "step": 12310 }, { - "epoch": 0.71, - "grad_norm": 1.3169755916645531, - "learning_rate": 4.166497217820844e-06, - "loss": 0.7212, + "epoch": 0.57, + "grad_norm": 0.29640537218097945, + "learning_rate": 8.370586503570526e-06, + "loss": 0.2078, "step": 12311 }, { - "epoch": 0.71, - "grad_norm": 0.3903916545542141, - "learning_rate": 4.164985847115279e-06, - "loss": 0.2538, + "epoch": 0.57, + "grad_norm": 0.31602330787739547, + "learning_rate": 8.369118487301265e-06, + "loss": 0.2341, "step": 12312 }, { - "epoch": 0.71, - "grad_norm": 0.3307900193032403, - "learning_rate": 4.163474678476004e-06, - "loss": 0.2559, + "epoch": 0.57, + "grad_norm": 0.4435462887731274, + "learning_rate": 8.367650507138149e-06, + "loss": 0.301, "step": 12313 }, { - "epoch": 0.71, - "grad_norm": 0.7210475091920264, - "learning_rate": 4.161963711955351e-06, - "loss": 0.3969, + "epoch": 0.57, + "grad_norm": 0.5634089595076581, + "learning_rate": 8.366182563113682e-06, + "loss": 0.2748, "step": 12314 }, { - "epoch": 0.71, - "grad_norm": 0.4130701912582865, - "learning_rate": 4.1604529476056446e-06, - "loss": 0.2791, + "epoch": 0.57, + "grad_norm": 0.4104412633006264, + "learning_rate": 8.36471465526035e-06, + "loss": 0.3178, "step": 12315 }, { - "epoch": 0.71, - "grad_norm": 0.33158894769484887, - "learning_rate": 4.158942385479198e-06, - "loss": 0.2499, + "epoch": 0.57, + "grad_norm": 0.43374480809673693, + "learning_rate": 8.363246783610663e-06, + "loss": 0.2798, "step": 12316 }, { - "epoch": 0.71, - "grad_norm": 0.26527730099168056, - "learning_rate": 4.157432025628327e-06, - "loss": 0.2226, + "epoch": 0.57, + "grad_norm": 0.49163498912273956, + "learning_rate": 8.361778948197116e-06, + "loss": 0.2443, "step": 12317 }, { - "epoch": 0.71, - "grad_norm": 0.4024841563367093, - "learning_rate": 4.155921868105336e-06, - "loss": 0.2536, + "epoch": 0.57, + "grad_norm": 0.3245370180339963, + "learning_rate": 8.360311149052205e-06, + "loss": 0.2061, "step": 12318 }, { - "epoch": 0.71, - "grad_norm": 0.4715939185767074, - "learning_rate": 4.154411912962518e-06, - "loss": 0.3245, + "epoch": 0.57, + "grad_norm": 0.3357430319742506, + "learning_rate": 8.358843386208421e-06, + "loss": 0.2801, "step": 12319 }, { - "epoch": 0.71, - "grad_norm": 0.49784484653712924, - "learning_rate": 4.152902160252165e-06, - "loss": 0.2754, + "epoch": 0.57, + "grad_norm": 0.4172117202657959, + "learning_rate": 8.357375659698266e-06, + "loss": 0.3011, "step": 12320 }, { - "epoch": 0.71, - "grad_norm": 0.3167715593401071, - "learning_rate": 4.151392610026554e-06, - "loss": 0.2556, + "epoch": 0.57, + "grad_norm": 0.7636735713563897, + "learning_rate": 8.35590796955423e-06, + "loss": 0.3229, "step": 12321 }, { - "epoch": 0.71, - "grad_norm": 0.5362943498624293, - "learning_rate": 4.149883262337969e-06, - "loss": 0.361, + "epoch": 0.57, + "grad_norm": 0.5829176456898921, + "learning_rate": 8.354440315808808e-06, + "loss": 0.3054, "step": 12322 }, { - "epoch": 0.71, - "grad_norm": 0.2270767119225587, - "learning_rate": 4.148374117238676e-06, - "loss": 0.1251, + "epoch": 0.57, + "grad_norm": 0.35329106033621605, + "learning_rate": 8.352972698494484e-06, + "loss": 0.2806, "step": 12323 }, { - "epoch": 0.71, - "grad_norm": 0.5482866300378484, - "learning_rate": 4.1468651747809366e-06, - "loss": 0.2773, + "epoch": 0.57, + "grad_norm": 0.2385204701281666, + "learning_rate": 8.351505117643767e-06, + "loss": 0.1502, "step": 12324 }, { - "epoch": 0.71, - "grad_norm": 0.26991337180134006, - "learning_rate": 4.145356435017003e-06, - "loss": 0.262, + "epoch": 0.57, + "grad_norm": 0.566322246283762, + "learning_rate": 8.350037573289133e-06, + "loss": 0.3042, "step": 12325 }, { - "epoch": 0.71, - "grad_norm": 0.8129780685542745, - "learning_rate": 4.143847897999124e-06, - "loss": 0.3138, + "epoch": 0.57, + "grad_norm": 0.4141413599260994, + "learning_rate": 8.34857006546308e-06, + "loss": 0.3187, "step": 12326 }, { - "epoch": 0.71, - "grad_norm": 0.6230381356562327, - "learning_rate": 4.142339563779542e-06, - "loss": 0.3043, + "epoch": 0.57, + "grad_norm": 0.3297787432007817, + "learning_rate": 8.34710259419809e-06, + "loss": 0.2385, "step": 12327 }, { - "epoch": 0.71, - "grad_norm": 0.4264045585072286, - "learning_rate": 4.140831432410484e-06, - "loss": 0.3266, + "epoch": 0.57, + "grad_norm": 0.5739844718882392, + "learning_rate": 8.345635159526654e-06, + "loss": 0.2817, "step": 12328 }, { - "epoch": 0.71, - "grad_norm": 0.3738143640469887, - "learning_rate": 4.139323503944186e-06, - "loss": 0.3328, + "epoch": 0.57, + "grad_norm": 0.4187237643756576, + "learning_rate": 8.344167761481266e-06, + "loss": 0.3128, "step": 12329 }, { - "epoch": 0.71, - "grad_norm": 0.2431647354371285, - "learning_rate": 4.1378157784328625e-06, - "loss": 0.1254, + "epoch": 0.57, + "grad_norm": 0.24042062409301138, + "learning_rate": 8.342700400094407e-06, + "loss": 0.1375, "step": 12330 }, { - "epoch": 0.71, - "grad_norm": 0.5894238839413188, - "learning_rate": 4.136308255928726e-06, - "loss": 0.3434, + "epoch": 0.57, + "grad_norm": 0.2791592107264087, + "learning_rate": 8.341233075398563e-06, + "loss": 0.2448, "step": 12331 }, { - "epoch": 0.71, - "grad_norm": 1.2000110180565327, - "learning_rate": 4.134800936483983e-06, - "loss": 0.6542, + "epoch": 0.57, + "grad_norm": 0.5411815087418221, + "learning_rate": 8.339765787426218e-06, + "loss": 0.3431, "step": 12332 }, { - "epoch": 0.71, - "grad_norm": 0.267646159596712, - "learning_rate": 4.1332938201508285e-06, - "loss": 0.2183, + "epoch": 0.57, + "grad_norm": 0.5520264174346495, + "learning_rate": 8.338298536209861e-06, + "loss": 0.31, "step": 12333 }, { - "epoch": 0.71, - "grad_norm": 0.5490669982217542, - "learning_rate": 4.13178690698146e-06, - "loss": 0.3078, + "epoch": 0.57, + "grad_norm": 0.3476942740972606, + "learning_rate": 8.336831321781973e-06, + "loss": 0.1875, "step": 12334 }, { - "epoch": 0.71, - "grad_norm": 0.40224921477044767, - "learning_rate": 4.130280197028058e-06, - "loss": 0.2917, + "epoch": 0.57, + "grad_norm": 0.38915487620435996, + "learning_rate": 8.335364144175036e-06, + "loss": 0.2818, "step": 12335 }, { - "epoch": 0.71, - "grad_norm": 0.398436666474072, - "learning_rate": 4.128773690342801e-06, - "loss": 0.2357, + "epoch": 0.57, + "grad_norm": 0.5367657339956364, + "learning_rate": 8.33389700342153e-06, + "loss": 0.3837, "step": 12336 }, { - "epoch": 0.71, - "grad_norm": 0.3085871901913578, - "learning_rate": 4.127267386977854e-06, - "loss": 0.2875, + "epoch": 0.57, + "grad_norm": 0.5648056153426586, + "learning_rate": 8.332429899553941e-06, + "loss": 0.281, "step": 12337 }, { - "epoch": 0.71, - "grad_norm": 0.41757727862480615, - "learning_rate": 4.125761286985389e-06, - "loss": 0.2504, + "epoch": 0.57, + "grad_norm": 0.35499618059166976, + "learning_rate": 8.330962832604747e-06, + "loss": 0.3156, "step": 12338 }, { - "epoch": 0.71, - "grad_norm": 0.5953337494643126, - "learning_rate": 4.124255390417558e-06, - "loss": 0.2831, + "epoch": 0.57, + "grad_norm": 0.24185408363033395, + "learning_rate": 8.329495802606428e-06, + "loss": 0.212, "step": 12339 }, { - "epoch": 0.71, - "grad_norm": 0.3973615565133707, - "learning_rate": 4.122749697326511e-06, - "loss": 0.2354, + "epoch": 0.57, + "grad_norm": 0.423425225371304, + "learning_rate": 8.328028809591456e-06, + "loss": 0.1555, "step": 12340 }, { - "epoch": 0.71, - "grad_norm": 0.3576969019418588, - "learning_rate": 4.121244207764384e-06, - "loss": 0.3025, + "epoch": 0.57, + "grad_norm": 0.41636324662122903, + "learning_rate": 8.32656185359232e-06, + "loss": 0.3148, "step": 12341 }, { - "epoch": 0.71, - "grad_norm": 0.6161027575033992, - "learning_rate": 4.119738921783323e-06, - "loss": 0.3604, + "epoch": 0.57, + "grad_norm": 0.41438110745259765, + "learning_rate": 8.325094934641493e-06, + "loss": 0.345, "step": 12342 }, { - "epoch": 0.71, - "grad_norm": 0.22322602570107805, - "learning_rate": 4.118233839435449e-06, - "loss": 0.1689, + "epoch": 0.57, + "grad_norm": 0.46363697417222777, + "learning_rate": 8.323628052771448e-06, + "loss": 0.3108, "step": 12343 }, { - "epoch": 0.71, - "grad_norm": 0.5065158568778143, - "learning_rate": 4.1167289607728845e-06, - "loss": 0.3368, + "epoch": 0.57, + "grad_norm": 0.31613961305280897, + "learning_rate": 8.322161208014663e-06, + "loss": 0.2495, "step": 12344 }, { - "epoch": 0.71, - "grad_norm": 0.4057898445010308, - "learning_rate": 4.1152242858477435e-06, - "loss": 0.2837, + "epoch": 0.57, + "grad_norm": 0.26446155877630106, + "learning_rate": 8.320694400403608e-06, + "loss": 0.1691, "step": 12345 }, { - "epoch": 0.71, - "grad_norm": 0.31512309786509607, - "learning_rate": 4.113719814712127e-06, - "loss": 0.2346, + "epoch": 0.57, + "grad_norm": 0.7584094529468643, + "learning_rate": 8.319227629970763e-06, + "loss": 0.3558, "step": 12346 }, { - "epoch": 0.71, - "grad_norm": 0.6702265380981722, - "learning_rate": 4.112215547418145e-06, - "loss": 0.4688, + "epoch": 0.57, + "grad_norm": 0.2467619352272183, + "learning_rate": 8.317760896748598e-06, + "loss": 0.2238, "step": 12347 }, { - "epoch": 0.71, - "grad_norm": 0.38723962767239795, - "learning_rate": 4.110711484017886e-06, - "loss": 0.3277, + "epoch": 0.57, + "grad_norm": 0.786507250461491, + "learning_rate": 8.316294200769587e-06, + "loss": 0.4751, "step": 12348 }, { - "epoch": 0.71, - "grad_norm": 0.238084010943153, - "learning_rate": 4.1092076245634346e-06, - "loss": 0.1993, + "epoch": 0.57, + "grad_norm": 0.5635684491854968, + "learning_rate": 8.314827542066198e-06, + "loss": 0.3764, "step": 12349 }, { - "epoch": 0.71, - "grad_norm": 0.4857992938940668, - "learning_rate": 4.107703969106867e-06, - "loss": 0.2588, + "epoch": 0.57, + "grad_norm": 0.3394966276185355, + "learning_rate": 8.313360920670903e-06, + "loss": 0.2272, "step": 12350 }, { - "epoch": 0.71, - "grad_norm": 0.617699439344278, - "learning_rate": 4.10620051770026e-06, - "loss": 0.3235, + "epoch": 0.57, + "grad_norm": 0.29952073495419357, + "learning_rate": 8.311894336616173e-06, + "loss": 0.2579, "step": 12351 }, { - "epoch": 0.71, - "grad_norm": 0.4087980308933262, - "learning_rate": 4.104697270395676e-06, - "loss": 0.3254, + "epoch": 0.57, + "grad_norm": 0.3351909188305221, + "learning_rate": 8.310427789934475e-06, + "loss": 0.1936, "step": 12352 }, { - "epoch": 0.71, - "grad_norm": 0.34914826637736407, - "learning_rate": 4.103194227245172e-06, - "loss": 0.2729, + "epoch": 0.57, + "grad_norm": 0.40280897316861747, + "learning_rate": 8.308961280658275e-06, + "loss": 0.2342, "step": 12353 }, { - "epoch": 0.71, - "grad_norm": 0.5393994201005855, - "learning_rate": 4.101691388300795e-06, - "loss": 0.3091, + "epoch": 0.57, + "grad_norm": 0.5517372185741795, + "learning_rate": 8.307494808820045e-06, + "loss": 0.3587, "step": 12354 }, { - "epoch": 0.71, - "grad_norm": 0.42117791947230276, - "learning_rate": 4.100188753614595e-06, - "loss": 0.2889, + "epoch": 0.57, + "grad_norm": 0.42896098357748164, + "learning_rate": 8.306028374452249e-06, + "loss": 0.3308, "step": 12355 }, { - "epoch": 0.71, - "grad_norm": 0.2679960829980229, - "learning_rate": 4.098686323238604e-06, - "loss": 0.1591, + "epoch": 0.57, + "grad_norm": 0.34143708362417374, + "learning_rate": 8.30456197758735e-06, + "loss": 0.2636, "step": 12356 }, { - "epoch": 0.71, - "grad_norm": 0.3767455945961782, - "learning_rate": 4.097184097224853e-06, - "loss": 0.263, + "epoch": 0.57, + "grad_norm": 0.2138957221817499, + "learning_rate": 8.303095618257817e-06, + "loss": 0.1137, "step": 12357 }, { - "epoch": 0.71, - "grad_norm": 0.3979800959449096, - "learning_rate": 4.095682075625363e-06, - "loss": 0.2972, + "epoch": 0.57, + "grad_norm": 0.48671452439179286, + "learning_rate": 8.301629296496107e-06, + "loss": 0.3235, "step": 12358 }, { - "epoch": 0.71, - "grad_norm": 0.7153844837257711, - "learning_rate": 4.094180258492147e-06, - "loss": 0.3261, + "epoch": 0.57, + "grad_norm": 0.29215022679804736, + "learning_rate": 8.30016301233469e-06, + "loss": 0.264, "step": 12359 }, { - "epoch": 0.71, - "grad_norm": 0.33698757282514463, - "learning_rate": 4.092678645877217e-06, - "loss": 0.3312, + "epoch": 0.57, + "grad_norm": 0.6852034843517372, + "learning_rate": 8.29869676580603e-06, + "loss": 0.3167, "step": 12360 }, { - "epoch": 0.71, - "grad_norm": 0.32736161830074756, - "learning_rate": 4.09117723783257e-06, - "loss": 0.2544, + "epoch": 0.57, + "grad_norm": 0.8100227846775483, + "learning_rate": 8.29723055694258e-06, + "loss": 0.4972, "step": 12361 }, { - "epoch": 0.71, - "grad_norm": 0.29749907521333085, - "learning_rate": 4.089676034410198e-06, - "loss": 0.1178, + "epoch": 0.57, + "grad_norm": 0.36596297005250733, + "learning_rate": 8.295764385776801e-06, + "loss": 0.2798, "step": 12362 }, { - "epoch": 0.71, - "grad_norm": 0.7603494708438945, - "learning_rate": 4.088175035662095e-06, - "loss": 0.4403, + "epoch": 0.57, + "grad_norm": 0.3444347362357057, + "learning_rate": 8.29429825234116e-06, + "loss": 0.2532, "step": 12363 }, { - "epoch": 0.71, - "grad_norm": 0.3803334125043377, - "learning_rate": 4.086674241640235e-06, - "loss": 0.2635, + "epoch": 0.57, + "grad_norm": 0.286030448178334, + "learning_rate": 8.29283215666811e-06, + "loss": 0.1828, "step": 12364 }, { - "epoch": 0.71, - "grad_norm": 0.3682851850725568, - "learning_rate": 4.085173652396593e-06, - "loss": 0.3203, + "epoch": 0.57, + "grad_norm": 0.37295889348453226, + "learning_rate": 8.291366098790114e-06, + "loss": 0.2933, "step": 12365 }, { - "epoch": 0.71, - "grad_norm": 0.5811058200485942, - "learning_rate": 4.083673267983128e-06, - "loss": 0.2581, + "epoch": 0.57, + "grad_norm": 0.7567154038039623, + "learning_rate": 8.28990007873962e-06, + "loss": 0.3287, "step": 12366 }, { - "epoch": 0.71, - "grad_norm": 0.34836499493050266, - "learning_rate": 4.0821730884518085e-06, - "loss": 0.2758, + "epoch": 0.57, + "grad_norm": 0.35881415309839687, + "learning_rate": 8.288434096549096e-06, + "loss": 0.3132, "step": 12367 }, { - "epoch": 0.71, - "grad_norm": 0.2767104843102272, - "learning_rate": 4.08067311385458e-06, - "loss": 0.1553, + "epoch": 0.57, + "grad_norm": 0.37109836359347176, + "learning_rate": 8.286968152250989e-06, + "loss": 0.2674, "step": 12368 }, { - "epoch": 0.71, - "grad_norm": 0.30352089800280146, - "learning_rate": 4.079173344243387e-06, - "loss": 0.2286, + "epoch": 0.57, + "grad_norm": 0.9766100134664008, + "learning_rate": 8.285502245877757e-06, + "loss": 0.5622, "step": 12369 }, { - "epoch": 0.71, - "grad_norm": 0.32423430858686303, - "learning_rate": 4.077673779670166e-06, - "loss": 0.2643, + "epoch": 0.57, + "grad_norm": 0.23036099474315122, + "learning_rate": 8.284036377461848e-06, + "loss": 0.1636, "step": 12370 }, { - "epoch": 0.71, - "grad_norm": 0.6190946400648354, - "learning_rate": 4.076174420186844e-06, - "loss": 0.4023, + "epoch": 0.57, + "grad_norm": 0.3915760855123803, + "learning_rate": 8.282570547035726e-06, + "loss": 0.2974, "step": 12371 }, { - "epoch": 0.71, - "grad_norm": 0.3159074215735569, - "learning_rate": 4.07467526584535e-06, - "loss": 0.2145, + "epoch": 0.57, + "grad_norm": 0.9085164552755735, + "learning_rate": 8.281104754631836e-06, + "loss": 0.4167, "step": 12372 }, { - "epoch": 0.71, - "grad_norm": 0.30664118077870395, - "learning_rate": 4.073176316697598e-06, - "loss": 0.2746, + "epoch": 0.57, + "grad_norm": 0.5499504465950326, + "learning_rate": 8.279639000282629e-06, + "loss": 0.2759, "step": 12373 }, { - "epoch": 0.71, - "grad_norm": 0.3017502296489269, - "learning_rate": 4.071677572795495e-06, - "loss": 0.1723, + "epoch": 0.57, + "grad_norm": 0.36719988891307936, + "learning_rate": 8.278173284020557e-06, + "loss": 0.2782, "step": 12374 }, { - "epoch": 0.71, - "grad_norm": 0.4615780722507267, - "learning_rate": 4.0701790341909386e-06, - "loss": 0.2075, + "epoch": 0.57, + "grad_norm": 0.3496606429658165, + "learning_rate": 8.276707605878063e-06, + "loss": 0.2932, "step": 12375 }, { - "epoch": 0.71, - "grad_norm": 0.3855982088594073, - "learning_rate": 4.068680700935831e-06, - "loss": 0.3036, + "epoch": 0.57, + "grad_norm": 0.3213788260554583, + "learning_rate": 8.275241965887606e-06, + "loss": 0.1556, "step": 12376 }, { - "epoch": 0.71, - "grad_norm": 0.36671811376397273, - "learning_rate": 4.0671825730820555e-06, - "loss": 0.3023, + "epoch": 0.57, + "grad_norm": 0.33004758968524384, + "learning_rate": 8.273776364081632e-06, + "loss": 0.2833, "step": 12377 }, { - "epoch": 0.71, - "grad_norm": 1.2575854114457314, - "learning_rate": 4.065684650681493e-06, - "loss": 0.75, + "epoch": 0.57, + "grad_norm": 0.4652463008922134, + "learning_rate": 8.272310800492584e-06, + "loss": 0.3157, "step": 12378 }, { - "epoch": 0.71, - "grad_norm": 0.34502650981498256, - "learning_rate": 4.064186933786012e-06, - "loss": 0.2024, + "epoch": 0.57, + "grad_norm": 0.3963285494475821, + "learning_rate": 8.270845275152909e-06, + "loss": 0.261, "step": 12379 }, { - "epoch": 0.71, - "grad_norm": 0.22705977337178385, - "learning_rate": 4.062689422447487e-06, - "loss": 0.2112, + "epoch": 0.57, + "grad_norm": 0.36151111087124244, + "learning_rate": 8.269379788095053e-06, + "loss": 0.2883, "step": 12380 }, { - "epoch": 0.71, - "grad_norm": 0.640804257150291, - "learning_rate": 4.061192116717771e-06, - "loss": 0.4038, + "epoch": 0.57, + "grad_norm": 1.2563761730621403, + "learning_rate": 8.26791433935146e-06, + "loss": 0.6024, "step": 12381 }, { - "epoch": 0.71, - "grad_norm": 0.33188078306411595, - "learning_rate": 4.0596950166487146e-06, - "loss": 0.2323, + "epoch": 0.57, + "grad_norm": 0.2757890557973591, + "learning_rate": 8.266448928954576e-06, + "loss": 0.2593, "step": 12382 }, { - "epoch": 0.71, - "grad_norm": 1.2416745783711352, - "learning_rate": 4.058198122292167e-06, - "loss": 0.6851, + "epoch": 0.57, + "grad_norm": 0.35760234358959275, + "learning_rate": 8.264983556936841e-06, + "loss": 0.2328, "step": 12383 }, { - "epoch": 0.71, - "grad_norm": 0.33509544879795256, - "learning_rate": 4.0567014336999584e-06, - "loss": 0.2977, + "epoch": 0.57, + "grad_norm": 0.42104959897857963, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2612, "step": 12384 }, { - "epoch": 0.71, - "grad_norm": 0.35947747921768164, - "learning_rate": 4.055204950923927e-06, - "loss": 0.205, + "epoch": 0.57, + "grad_norm": 0.9887674191536168, + "learning_rate": 8.262052928168589e-06, + "loss": 0.6629, "step": 12385 }, { - "epoch": 0.71, - "grad_norm": 0.32272880351486805, - "learning_rate": 4.053708674015893e-06, - "loss": 0.1713, + "epoch": 0.57, + "grad_norm": 0.3231988184591064, + "learning_rate": 8.260587671482951e-06, + "loss": 0.1939, "step": 12386 }, { - "epoch": 0.71, - "grad_norm": 0.5147264850139122, - "learning_rate": 4.052212603027672e-06, - "loss": 0.3099, + "epoch": 0.57, + "grad_norm": 0.3716017017037349, + "learning_rate": 8.259122453306228e-06, + "loss": 0.301, "step": 12387 }, { - "epoch": 0.71, - "grad_norm": 0.2537057854401847, - "learning_rate": 4.050716738011068e-06, - "loss": 0.1977, + "epoch": 0.57, + "grad_norm": 0.6026389594277747, + "learning_rate": 8.25765727367085e-06, + "loss": 0.3981, "step": 12388 }, { - "epoch": 0.71, - "grad_norm": 0.5015595237780548, - "learning_rate": 4.049221079017892e-06, - "loss": 0.3651, + "epoch": 0.57, + "grad_norm": 0.2959017554143114, + "learning_rate": 8.256192132609266e-06, + "loss": 0.191, "step": 12389 }, { - "epoch": 0.71, - "grad_norm": 1.2806204433781172, - "learning_rate": 4.0477256260999344e-06, - "loss": 0.6888, + "epoch": 0.57, + "grad_norm": 0.2483599588069115, + "learning_rate": 8.254727030153908e-06, + "loss": 0.1881, "step": 12390 }, { - "epoch": 0.71, - "grad_norm": 0.3961070936444837, - "learning_rate": 4.046230379308982e-06, - "loss": 0.2544, + "epoch": 0.57, + "grad_norm": 0.5218630992226893, + "learning_rate": 8.25326196633721e-06, + "loss": 0.4024, "step": 12391 }, { - "epoch": 0.71, - "grad_norm": 0.24351026173987872, - "learning_rate": 4.0447353386968155e-06, - "loss": 0.2007, + "epoch": 0.57, + "grad_norm": 0.3233240626980372, + "learning_rate": 8.251796941191605e-06, + "loss": 0.1899, "step": 12392 }, { - "epoch": 0.71, - "grad_norm": 0.5317384655177135, - "learning_rate": 4.043240504315209e-06, - "loss": 0.3628, + "epoch": 0.57, + "grad_norm": 0.9057577591052176, + "learning_rate": 8.250331954749534e-06, + "loss": 0.414, "step": 12393 }, { - "epoch": 0.71, - "grad_norm": 0.36782911635286475, - "learning_rate": 4.041745876215927e-06, - "loss": 0.3094, + "epoch": 0.57, + "grad_norm": 0.4689033724729866, + "learning_rate": 8.24886700704343e-06, + "loss": 0.3672, "step": 12394 }, { - "epoch": 0.71, - "grad_norm": 0.26601632760932586, - "learning_rate": 4.040251454450729e-06, - "loss": 0.1666, + "epoch": 0.57, + "grad_norm": 0.29289236885933145, + "learning_rate": 8.24740209810572e-06, + "loss": 0.2603, "step": 12395 }, { - "epoch": 0.71, - "grad_norm": 0.36052225817436, - "learning_rate": 4.038757239071364e-06, - "loss": 0.3037, + "epoch": 0.57, + "grad_norm": 0.27872756981612334, + "learning_rate": 8.245937227968836e-06, + "loss": 0.1319, "step": 12396 }, { - "epoch": 0.71, - "grad_norm": 0.3159879747015827, - "learning_rate": 4.037263230129583e-06, - "loss": 0.2501, + "epoch": 0.57, + "grad_norm": 1.4317620298695066, + "learning_rate": 8.244472396665215e-06, + "loss": 0.858, "step": 12397 }, { - "epoch": 0.71, - "grad_norm": 0.6144153528165102, - "learning_rate": 4.035769427677118e-06, - "loss": 0.2974, + "epoch": 0.57, + "grad_norm": 0.3389507964243794, + "learning_rate": 8.243007604227282e-06, + "loss": 0.2417, "step": 12398 }, { - "epoch": 0.71, - "grad_norm": 0.4335431228327503, - "learning_rate": 4.034275831765702e-06, - "loss": 0.2698, + "epoch": 0.57, + "grad_norm": 0.3600797151606209, + "learning_rate": 8.241542850687466e-06, + "loss": 0.2283, "step": 12399 }, { - "epoch": 0.71, - "grad_norm": 0.2705356897806853, - "learning_rate": 4.032782442447055e-06, - "loss": 0.249, + "epoch": 0.57, + "grad_norm": 1.166964003307873, + "learning_rate": 8.240078136078195e-06, + "loss": 0.3802, "step": 12400 }, { - "epoch": 0.71, - "grad_norm": 0.27198063920594956, - "learning_rate": 4.031289259772898e-06, - "loss": 0.1899, + "epoch": 0.57, + "grad_norm": 0.3383773601837302, + "learning_rate": 8.238613460431902e-06, + "loss": 0.2477, "step": 12401 }, { - "epoch": 0.71, - "grad_norm": 0.9726204979369423, - "learning_rate": 4.029796283794938e-06, - "loss": 0.5632, + "epoch": 0.57, + "grad_norm": 0.22146615153481458, + "learning_rate": 8.237148823781008e-06, + "loss": 0.1392, "step": 12402 }, { - "epoch": 0.71, - "grad_norm": 0.3511151019796598, - "learning_rate": 4.028303514564876e-06, - "loss": 0.2668, + "epoch": 0.57, + "grad_norm": 0.484298914712896, + "learning_rate": 8.23568422615794e-06, + "loss": 0.3555, "step": 12403 }, { - "epoch": 0.71, - "grad_norm": 0.46440585552108493, - "learning_rate": 4.026810952134402e-06, - "loss": 0.2985, + "epoch": 0.57, + "grad_norm": 0.34085736574902065, + "learning_rate": 8.234219667595123e-06, + "loss": 0.2505, "step": 12404 }, { - "epoch": 0.71, - "grad_norm": 0.3997705620102345, - "learning_rate": 4.025318596555212e-06, - "loss": 0.2284, + "epoch": 0.57, + "grad_norm": 1.0624227973674418, + "learning_rate": 8.232755148124977e-06, + "loss": 0.3223, "step": 12405 }, { - "epoch": 0.71, - "grad_norm": 0.24602939683462624, - "learning_rate": 4.023826447878982e-06, - "loss": 0.2192, + "epoch": 0.57, + "grad_norm": 0.37652724744219923, + "learning_rate": 8.231290667779931e-06, + "loss": 0.327, "step": 12406 }, { - "epoch": 0.71, - "grad_norm": 1.1948345929606112, - "learning_rate": 4.022334506157386e-06, - "loss": 0.8185, + "epoch": 0.57, + "grad_norm": 0.3922742983999391, + "learning_rate": 8.22982622659241e-06, + "loss": 0.2295, "step": 12407 }, { - "epoch": 0.71, - "grad_norm": 0.33598971886925083, - "learning_rate": 4.020842771442085e-06, - "loss": 0.2542, + "epoch": 0.57, + "grad_norm": 0.2791941550733342, + "learning_rate": 8.228361824594827e-06, + "loss": 0.2, "step": 12408 }, { - "epoch": 0.71, - "grad_norm": 0.42240133665361956, - "learning_rate": 4.019351243784745e-06, - "loss": 0.2873, + "epoch": 0.57, + "grad_norm": 0.4762164290786062, + "learning_rate": 8.2268974618196e-06, + "loss": 0.3031, "step": 12409 }, { - "epoch": 0.71, - "grad_norm": 0.5062951214860231, - "learning_rate": 4.017859923237014e-06, - "loss": 0.3348, + "epoch": 0.57, + "grad_norm": 0.46365458648694297, + "learning_rate": 8.22543313829916e-06, + "loss": 0.3029, "step": 12410 }, { - "epoch": 0.71, - "grad_norm": 0.34173636455748163, - "learning_rate": 4.016368809850537e-06, - "loss": 0.2076, + "epoch": 0.57, + "grad_norm": 0.5724800502322915, + "learning_rate": 8.223968854065918e-06, + "loss": 0.3312, "step": 12411 }, { - "epoch": 0.71, - "grad_norm": 0.3865772364660087, - "learning_rate": 4.01487790367695e-06, - "loss": 0.2696, + "epoch": 0.57, + "grad_norm": 1.2673897735447883, + "learning_rate": 8.222504609152295e-06, + "loss": 0.3363, "step": 12412 }, { - "epoch": 0.71, - "grad_norm": 0.3557344917335959, - "learning_rate": 4.013387204767881e-06, - "loss": 0.3212, + "epoch": 0.57, + "grad_norm": 0.40223367770599583, + "learning_rate": 8.221040403590704e-06, + "loss": 0.2689, "step": 12413 }, { - "epoch": 0.71, - "grad_norm": 0.23541794800684201, - "learning_rate": 4.01189671317496e-06, - "loss": 0.0956, + "epoch": 0.57, + "grad_norm": 0.2787998568034056, + "learning_rate": 8.219576237413568e-06, + "loss": 0.2475, "step": 12414 }, { - "epoch": 0.71, - "grad_norm": 0.2951339678252151, - "learning_rate": 4.0104064289497965e-06, - "loss": 0.2571, + "epoch": 0.57, + "grad_norm": 1.3542287516963254, + "learning_rate": 8.218112110653297e-06, + "loss": 0.5502, "step": 12415 }, { - "epoch": 0.71, - "grad_norm": 0.33535513867717087, - "learning_rate": 4.008916352144002e-06, - "loss": 0.302, + "epoch": 0.57, + "grad_norm": 0.2955149527241956, + "learning_rate": 8.216648023342307e-06, + "loss": 0.2131, "step": 12416 }, { - "epoch": 0.71, - "grad_norm": 1.12245964652745, - "learning_rate": 4.007426482809172e-06, - "loss": 0.5287, + "epoch": 0.57, + "grad_norm": 1.016787540048818, + "learning_rate": 8.21518397551301e-06, + "loss": 0.3979, "step": 12417 }, { - "epoch": 0.71, - "grad_norm": 0.29781734362720846, - "learning_rate": 4.0059368209969106e-06, - "loss": 0.186, + "epoch": 0.57, + "grad_norm": 0.37578326516777144, + "learning_rate": 8.213719967197818e-06, + "loss": 0.266, "step": 12418 }, { - "epoch": 0.71, - "grad_norm": 1.0779840391036732, - "learning_rate": 4.004447366758798e-06, - "loss": 0.7548, + "epoch": 0.57, + "grad_norm": 0.35435315444939663, + "learning_rate": 8.212255998429146e-06, + "loss": 0.2826, "step": 12419 }, { - "epoch": 0.71, - "grad_norm": 0.22319618891727672, - "learning_rate": 4.002958120146415e-06, - "loss": 0.2061, + "epoch": 0.57, + "grad_norm": 0.43313529004640994, + "learning_rate": 8.210792069239401e-06, + "loss": 0.2281, "step": 12420 }, { - "epoch": 0.71, - "grad_norm": 0.3003774906221277, - "learning_rate": 4.001469081211332e-06, - "loss": 0.2077, + "epoch": 0.57, + "grad_norm": 1.0984868694252856, + "learning_rate": 8.209328179660998e-06, + "loss": 0.5229, "step": 12421 }, { - "epoch": 0.71, - "grad_norm": 0.6334985632628138, - "learning_rate": 3.99998025000512e-06, - "loss": 0.3586, + "epoch": 0.57, + "grad_norm": 0.3477079155075459, + "learning_rate": 8.207864329726338e-06, + "loss": 0.2229, "step": 12422 }, { - "epoch": 0.71, - "grad_norm": 1.0323028337477798, - "learning_rate": 3.998491626579334e-06, - "loss": 0.4385, + "epoch": 0.57, + "grad_norm": 0.42983207572983373, + "learning_rate": 8.206400519467839e-06, + "loss": 0.3093, "step": 12423 }, { - "epoch": 0.71, - "grad_norm": 0.22590105407607644, - "learning_rate": 3.997003210985524e-06, - "loss": 0.2142, + "epoch": 0.57, + "grad_norm": 0.4803253007753997, + "learning_rate": 8.204936748917904e-06, + "loss": 0.2709, "step": 12424 }, { - "epoch": 0.71, - "grad_norm": 1.2186487143817915, - "learning_rate": 3.995515003275235e-06, - "loss": 0.7572, + "epoch": 0.57, + "grad_norm": 0.3434384332900092, + "learning_rate": 8.20347301810894e-06, + "loss": 0.1641, "step": 12425 }, { - "epoch": 0.71, - "grad_norm": 0.320271494595398, - "learning_rate": 3.9940270035000036e-06, - "loss": 0.2219, + "epoch": 0.57, + "grad_norm": 0.3093570204364109, + "learning_rate": 8.20200932707335e-06, + "loss": 0.287, "step": 12426 }, { - "epoch": 0.71, - "grad_norm": 0.5789444426466166, - "learning_rate": 3.992539211711359e-06, - "loss": 0.3055, + "epoch": 0.57, + "grad_norm": 0.8032423300693617, + "learning_rate": 8.20054567584354e-06, + "loss": 0.4817, "step": 12427 }, { - "epoch": 0.71, - "grad_norm": 0.2750210705197789, - "learning_rate": 3.991051627960822e-06, - "loss": 0.2382, + "epoch": 0.57, + "grad_norm": 0.34757534088613523, + "learning_rate": 8.199082064451916e-06, + "loss": 0.2145, "step": 12428 }, { - "epoch": 0.71, - "grad_norm": 1.1626005918199633, - "learning_rate": 3.989564252299907e-06, - "loss": 0.6162, + "epoch": 0.57, + "grad_norm": 0.2822963453604566, + "learning_rate": 8.19761849293088e-06, + "loss": 0.2185, "step": 12429 }, { - "epoch": 0.71, - "grad_norm": 0.6137501022840672, - "learning_rate": 3.988077084780126e-06, - "loss": 0.3585, + "epoch": 0.57, + "grad_norm": 0.3702796510005095, + "learning_rate": 8.19615496131283e-06, + "loss": 0.299, "step": 12430 }, { - "epoch": 0.71, - "grad_norm": 0.3564797669004418, - "learning_rate": 3.986590125452977e-06, - "loss": 0.2574, + "epoch": 0.57, + "grad_norm": 0.4137639722220958, + "learning_rate": 8.194691469630174e-06, + "loss": 0.1824, "step": 12431 }, { - "epoch": 0.71, - "grad_norm": 0.3595173618942061, - "learning_rate": 3.985103374369954e-06, - "loss": 0.3004, + "epoch": 0.57, + "grad_norm": 0.5323005131427144, + "learning_rate": 8.193228017915309e-06, + "loss": 0.4215, "step": 12432 }, { - "epoch": 0.71, - "grad_norm": 0.5610988564310685, - "learning_rate": 3.983616831582538e-06, - "loss": 0.3092, + "epoch": 0.57, + "grad_norm": 0.9311564601728092, + "learning_rate": 8.191764606200633e-06, + "loss": 0.4534, "step": 12433 }, { - "epoch": 0.71, - "grad_norm": 0.22865917764490962, - "learning_rate": 3.9821304971422155e-06, - "loss": 0.153, + "epoch": 0.57, + "grad_norm": 0.3149471324540098, + "learning_rate": 8.190301234518547e-06, + "loss": 0.2756, "step": 12434 }, { - "epoch": 0.71, - "grad_norm": 1.0396124580607748, - "learning_rate": 3.980644371100457e-06, - "loss": 0.4856, + "epoch": 0.57, + "grad_norm": 0.5249088376428949, + "learning_rate": 8.188837902901441e-06, + "loss": 0.2558, "step": 12435 }, { - "epoch": 0.71, - "grad_norm": 0.2712024160349323, - "learning_rate": 3.979158453508724e-06, - "loss": 0.2664, + "epoch": 0.57, + "grad_norm": 0.25703269409942686, + "learning_rate": 8.187374611381726e-06, + "loss": 0.1457, "step": 12436 }, { - "epoch": 0.71, - "grad_norm": 0.45818436095554166, - "learning_rate": 3.977672744418475e-06, - "loss": 0.2934, + "epoch": 0.57, + "grad_norm": 0.4132464407406081, + "learning_rate": 8.18591135999179e-06, + "loss": 0.2578, "step": 12437 }, { - "epoch": 0.71, - "grad_norm": 0.7060226833636093, - "learning_rate": 3.976187243881156e-06, - "loss": 0.3856, + "epoch": 0.57, + "grad_norm": 0.3041335441454722, + "learning_rate": 8.184448148764024e-06, + "loss": 0.2481, "step": 12438 }, { - "epoch": 0.71, - "grad_norm": 0.28119753881737464, - "learning_rate": 3.974701951948218e-06, - "loss": 0.207, + "epoch": 0.57, + "grad_norm": 0.8117770327175959, + "learning_rate": 8.182984977730826e-06, + "loss": 0.482, "step": 12439 }, { - "epoch": 0.71, - "grad_norm": 0.3156999954213079, - "learning_rate": 3.973216868671092e-06, - "loss": 0.2725, + "epoch": 0.57, + "grad_norm": 0.4122792101600311, + "learning_rate": 8.18152184692459e-06, + "loss": 0.3015, "step": 12440 }, { - "epoch": 0.71, - "grad_norm": 0.3275821325716865, - "learning_rate": 3.9717319941012054e-06, - "loss": 0.0955, + "epoch": 0.57, + "grad_norm": 0.313747636165733, + "learning_rate": 8.18005875637771e-06, + "loss": 0.1781, "step": 12441 }, { - "epoch": 0.71, - "grad_norm": 0.4230111041588665, - "learning_rate": 3.970247328289979e-06, - "loss": 0.3222, + "epoch": 0.57, + "grad_norm": 0.3062370537936647, + "learning_rate": 8.178595706122574e-06, + "loss": 0.2756, "step": 12442 }, { - "epoch": 0.71, - "grad_norm": 0.5330638370371196, - "learning_rate": 3.96876287128883e-06, - "loss": 0.3889, + "epoch": 0.57, + "grad_norm": 1.0103859000339337, + "learning_rate": 8.177132696191573e-06, + "loss": 0.3966, "step": 12443 }, { - "epoch": 0.71, - "grad_norm": 0.48238254940743125, - "learning_rate": 3.967278623149165e-06, - "loss": 0.2652, + "epoch": 0.57, + "grad_norm": 0.40139422837911387, + "learning_rate": 8.175669726617097e-06, + "loss": 0.2424, "step": 12444 }, { - "epoch": 0.72, - "grad_norm": 0.6377281456624062, - "learning_rate": 3.965794583922382e-06, - "loss": 0.27, + "epoch": 0.57, + "grad_norm": 0.5407441836549812, + "learning_rate": 8.174206797431537e-06, + "loss": 0.3529, "step": 12445 }, { - "epoch": 0.72, - "grad_norm": 0.24998486209393883, - "learning_rate": 3.964310753659869e-06, - "loss": 0.1951, + "epoch": 0.57, + "grad_norm": 0.40317056526756295, + "learning_rate": 8.172743908667277e-06, + "loss": 0.3095, "step": 12446 }, { - "epoch": 0.72, - "grad_norm": 0.3267863704261433, - "learning_rate": 3.9628271324130185e-06, - "loss": 0.2329, + "epoch": 0.57, + "grad_norm": 0.42489331987648654, + "learning_rate": 8.171281060356705e-06, + "loss": 0.339, "step": 12447 }, { - "epoch": 0.72, - "grad_norm": 0.5376672392151259, - "learning_rate": 3.961343720233204e-06, - "loss": 0.2847, + "epoch": 0.57, + "grad_norm": 0.17159769055253393, + "learning_rate": 8.16981825253221e-06, + "loss": 0.0726, "step": 12448 }, { - "epoch": 0.72, - "grad_norm": 0.41943787136242416, - "learning_rate": 3.9598605171717976e-06, - "loss": 0.3178, + "epoch": 0.57, + "grad_norm": 0.6731544408584548, + "learning_rate": 8.168355485226173e-06, + "loss": 0.39, "step": 12449 }, { - "epoch": 0.72, - "grad_norm": 0.6504493113239271, - "learning_rate": 3.958377523280162e-06, - "loss": 0.3003, + "epoch": 0.57, + "grad_norm": 0.2874904813776581, + "learning_rate": 8.166892758470982e-06, + "loss": 0.2817, "step": 12450 }, { - "epoch": 0.72, - "grad_norm": 0.6739410311061353, - "learning_rate": 3.956894738609649e-06, - "loss": 0.3241, + "epoch": 0.57, + "grad_norm": 0.6633613225421824, + "learning_rate": 8.165430072299017e-06, + "loss": 0.3199, "step": 12451 }, { - "epoch": 0.72, - "grad_norm": 0.25881186952173607, - "learning_rate": 3.955412163211615e-06, - "loss": 0.2511, + "epoch": 0.57, + "grad_norm": 0.5769082173635337, + "learning_rate": 8.16396742674266e-06, + "loss": 0.3491, "step": 12452 }, { - "epoch": 0.72, - "grad_norm": 0.4730776211188233, - "learning_rate": 3.953929797137398e-06, - "loss": 0.2741, + "epoch": 0.57, + "grad_norm": 0.3251954097440529, + "learning_rate": 8.162504821834296e-06, + "loss": 0.2609, "step": 12453 }, { - "epoch": 0.72, - "grad_norm": 0.32294979003618474, - "learning_rate": 3.9524476404383324e-06, - "loss": 0.1982, + "epoch": 0.57, + "grad_norm": 0.26880435688326815, + "learning_rate": 8.16104225760631e-06, + "loss": 0.1841, "step": 12454 }, { - "epoch": 0.72, - "grad_norm": 0.4427233885442675, - "learning_rate": 3.9509656931657405e-06, - "loss": 0.3236, + "epoch": 0.57, + "grad_norm": 0.6593908460085663, + "learning_rate": 8.15957973409107e-06, + "loss": 0.3456, "step": 12455 }, { - "epoch": 0.72, - "grad_norm": 0.5624680637454126, - "learning_rate": 3.949483955370951e-06, - "loss": 0.3349, + "epoch": 0.57, + "grad_norm": 0.4019859074904042, + "learning_rate": 8.158117251320958e-06, + "loss": 0.3045, "step": 12456 }, { - "epoch": 0.72, - "grad_norm": 0.3926441797137969, - "learning_rate": 3.9480024271052715e-06, - "loss": 0.1955, + "epoch": 0.57, + "grad_norm": 0.6605321846349567, + "learning_rate": 8.15665480932836e-06, + "loss": 0.4457, "step": 12457 }, { - "epoch": 0.72, - "grad_norm": 0.33987897951718266, - "learning_rate": 3.946521108420008e-06, - "loss": 0.2365, + "epoch": 0.57, + "grad_norm": 0.2708970998161733, + "learning_rate": 8.155192408145647e-06, + "loss": 0.2264, "step": 12458 }, { - "epoch": 0.72, - "grad_norm": 0.3372391079565309, - "learning_rate": 3.945039999366458e-06, - "loss": 0.2494, + "epoch": 0.57, + "grad_norm": 0.5402518195015316, + "learning_rate": 8.153730047805198e-06, + "loss": 0.3464, "step": 12459 }, { - "epoch": 0.72, - "grad_norm": 0.3119762747576032, - "learning_rate": 3.9435590999959115e-06, - "loss": 0.2094, + "epoch": 0.57, + "grad_norm": 0.3463248154201205, + "learning_rate": 8.152267728339382e-06, + "loss": 0.2422, "step": 12460 }, { - "epoch": 0.72, - "grad_norm": 0.5354947724202691, - "learning_rate": 3.942078410359655e-06, - "loss": 0.3259, + "epoch": 0.57, + "grad_norm": 0.5064206343411938, + "learning_rate": 8.15080544978058e-06, + "loss": 0.2461, "step": 12461 }, { - "epoch": 0.72, - "grad_norm": 0.7620520172406171, - "learning_rate": 3.940597930508962e-06, - "loss": 0.4483, + "epoch": 0.57, + "grad_norm": 0.2840657758305156, + "learning_rate": 8.149343212161163e-06, + "loss": 0.2515, "step": 12462 }, { - "epoch": 0.72, - "grad_norm": 0.3951157539623328, - "learning_rate": 3.939117660495098e-06, - "loss": 0.2072, + "epoch": 0.57, + "grad_norm": 0.6627502765015306, + "learning_rate": 8.147881015513505e-06, + "loss": 0.4602, "step": 12463 }, { - "epoch": 0.72, - "grad_norm": 0.30130478030699875, - "learning_rate": 3.937637600369332e-06, - "loss": 0.2685, + "epoch": 0.57, + "grad_norm": 0.5664572059038284, + "learning_rate": 8.146418859869975e-06, + "loss": 0.2433, "step": 12464 }, { - "epoch": 0.72, - "grad_norm": 0.317880848548765, - "learning_rate": 3.936157750182915e-06, - "loss": 0.1775, + "epoch": 0.57, + "grad_norm": 0.3765739237751285, + "learning_rate": 8.144956745262944e-06, + "loss": 0.277, "step": 12465 }, { - "epoch": 0.72, - "grad_norm": 0.7654264548014176, - "learning_rate": 3.934678109987096e-06, - "loss": 0.3159, + "epoch": 0.57, + "grad_norm": 0.29502516818982955, + "learning_rate": 8.143494671724784e-06, + "loss": 0.2515, "step": 12466 }, { - "epoch": 0.72, - "grad_norm": 0.35211292954046547, - "learning_rate": 3.933198679833108e-06, - "loss": 0.2436, + "epoch": 0.57, + "grad_norm": 0.2949079316320679, + "learning_rate": 8.142032639287861e-06, + "loss": 0.1248, "step": 12467 }, { - "epoch": 0.72, - "grad_norm": 0.48058464826750036, - "learning_rate": 3.931719459772193e-06, - "loss": 0.4169, + "epoch": 0.57, + "grad_norm": 0.3822384776154844, + "learning_rate": 8.140570647984547e-06, + "loss": 0.3162, "step": 12468 }, { - "epoch": 0.72, - "grad_norm": 0.7896326662596463, - "learning_rate": 3.9302404498555725e-06, - "loss": 0.3491, + "epoch": 0.57, + "grad_norm": 1.4129630009859606, + "learning_rate": 8.139108697847201e-06, + "loss": 0.79, "step": 12469 }, { - "epoch": 0.72, - "grad_norm": 0.1967202253818277, - "learning_rate": 3.928761650134464e-06, - "loss": 0.149, + "epoch": 0.57, + "grad_norm": 0.35011490265178713, + "learning_rate": 8.1376467889082e-06, + "loss": 0.2756, "step": 12470 }, { - "epoch": 0.72, - "grad_norm": 0.5161953619328937, - "learning_rate": 3.927283060660075e-06, - "loss": 0.326, + "epoch": 0.57, + "grad_norm": 0.3820766663129361, + "learning_rate": 8.136184921199904e-06, + "loss": 0.2188, "step": 12471 }, { - "epoch": 0.72, - "grad_norm": 0.4444720336390856, - "learning_rate": 3.925804681483614e-06, - "loss": 0.3244, + "epoch": 0.57, + "grad_norm": 0.27827514541470605, + "learning_rate": 8.134723094754679e-06, + "loss": 0.1636, "step": 12472 }, { - "epoch": 0.72, - "grad_norm": 0.36218587264417024, - "learning_rate": 3.924326512656279e-06, - "loss": 0.243, + "epoch": 0.57, + "grad_norm": 0.42491064725440575, + "learning_rate": 8.133261309604881e-06, + "loss": 0.319, "step": 12473 }, { - "epoch": 0.72, - "grad_norm": 1.308785521351059, - "learning_rate": 3.922848554229254e-06, - "loss": 0.7986, + "epoch": 0.57, + "grad_norm": 0.2958681542869471, + "learning_rate": 8.131799565782884e-06, + "loss": 0.2171, "step": 12474 }, { - "epoch": 0.72, - "grad_norm": 0.37676328938957093, - "learning_rate": 3.921370806253722e-06, - "loss": 0.2643, + "epoch": 0.57, + "grad_norm": 0.720910375386371, + "learning_rate": 8.130337863321042e-06, + "loss": 0.4501, "step": 12475 }, { - "epoch": 0.72, - "grad_norm": 0.29437300765355345, - "learning_rate": 3.919893268780854e-06, - "loss": 0.2193, + "epoch": 0.57, + "grad_norm": 0.7362239965509798, + "learning_rate": 8.128876202251719e-06, + "loss": 0.4947, "step": 12476 }, { - "epoch": 0.72, - "grad_norm": 0.3144016115644878, - "learning_rate": 3.918415941861825e-06, - "loss": 0.2123, + "epoch": 0.57, + "grad_norm": 0.37897847573113214, + "learning_rate": 8.127414582607272e-06, + "loss": 0.2142, "step": 12477 }, { - "epoch": 0.72, - "grad_norm": 0.49162002682484635, - "learning_rate": 3.91693882554779e-06, - "loss": 0.2872, + "epoch": 0.57, + "grad_norm": 0.3739905570542845, + "learning_rate": 8.125953004420061e-06, + "loss": 0.2968, "step": 12478 }, { - "epoch": 0.72, - "grad_norm": 0.36777889700838373, - "learning_rate": 3.915461919889903e-06, - "loss": 0.3064, + "epoch": 0.57, + "grad_norm": 0.7084940774992083, + "learning_rate": 8.124491467722446e-06, + "loss": 0.4007, "step": 12479 }, { - "epoch": 0.72, - "grad_norm": 0.3299043967705451, - "learning_rate": 3.913985224939303e-06, - "loss": 0.2594, + "epoch": 0.57, + "grad_norm": 0.2065382822005008, + "learning_rate": 8.123029972546782e-06, + "loss": 0.143, "step": 12480 }, { - "epoch": 0.72, - "grad_norm": 0.790198056679378, - "learning_rate": 3.912508740747137e-06, - "loss": 0.3766, + "epoch": 0.57, + "grad_norm": 0.3876459832523706, + "learning_rate": 8.121568518925424e-06, + "loss": 0.3077, "step": 12481 }, { - "epoch": 0.72, - "grad_norm": 0.34173548287831607, - "learning_rate": 3.911032467364531e-06, - "loss": 0.2535, + "epoch": 0.57, + "grad_norm": 1.0955839718431823, + "learning_rate": 8.120107106890726e-06, + "loss": 0.6572, "step": 12482 }, { - "epoch": 0.72, - "grad_norm": 0.2538296150592013, - "learning_rate": 3.909556404842609e-06, - "loss": 0.1983, + "epoch": 0.57, + "grad_norm": 0.3569165523728648, + "learning_rate": 8.118645736475051e-06, + "loss": 0.2578, "step": 12483 }, { - "epoch": 0.72, - "grad_norm": 0.8236400542230081, - "learning_rate": 3.908080553232484e-06, - "loss": 0.4423, + "epoch": 0.57, + "grad_norm": 0.6492416582049085, + "learning_rate": 8.117184407710743e-06, + "loss": 0.2934, "step": 12484 }, { - "epoch": 0.72, - "grad_norm": 0.3028413178151663, - "learning_rate": 3.906604912585271e-06, - "loss": 0.2541, + "epoch": 0.57, + "grad_norm": 0.40233132736261773, + "learning_rate": 8.115723120630159e-06, + "loss": 0.3011, "step": 12485 }, { - "epoch": 0.72, - "grad_norm": 0.951138237415222, - "learning_rate": 3.905129482952067e-06, - "loss": 0.4526, + "epoch": 0.57, + "grad_norm": 0.2624374978066074, + "learning_rate": 8.114261875265643e-06, + "loss": 0.1997, "step": 12486 }, { - "epoch": 0.72, - "grad_norm": 0.3129589276357312, - "learning_rate": 3.903654264383967e-06, - "loss": 0.2508, + "epoch": 0.57, + "grad_norm": 0.29734490956247445, + "learning_rate": 8.112800671649557e-06, + "loss": 0.1312, "step": 12487 }, { - "epoch": 0.72, - "grad_norm": 0.3663928277836569, - "learning_rate": 3.902179256932058e-06, - "loss": 0.2743, + "epoch": 0.57, + "grad_norm": 0.7807367233554806, + "learning_rate": 8.111339509814245e-06, + "loss": 0.4675, "step": 12488 }, { - "epoch": 0.72, - "grad_norm": 0.776248988057081, - "learning_rate": 3.900704460647416e-06, - "loss": 0.29, + "epoch": 0.57, + "grad_norm": 0.37387641436265884, + "learning_rate": 8.109878389792055e-06, + "loss": 0.2793, "step": 12489 }, { - "epoch": 0.72, - "grad_norm": 0.32241816415937624, - "learning_rate": 3.89922987558112e-06, - "loss": 0.1965, + "epoch": 0.57, + "grad_norm": 0.3845747260479208, + "learning_rate": 8.108417311615336e-06, + "loss": 0.2558, "step": 12490 }, { - "epoch": 0.72, - "grad_norm": 0.2956955229384876, - "learning_rate": 3.897755501784231e-06, - "loss": 0.2823, + "epoch": 0.57, + "grad_norm": 0.6206724529713612, + "learning_rate": 8.106956275316433e-06, + "loss": 0.4, "step": 12491 }, { - "epoch": 0.72, - "grad_norm": 0.49019798055890323, - "learning_rate": 3.896281339307805e-06, - "loss": 0.3925, + "epoch": 0.57, + "grad_norm": 0.35668872045109684, + "learning_rate": 8.105495280927696e-06, + "loss": 0.2601, "step": 12492 }, { - "epoch": 0.72, - "grad_norm": 0.8539817933659437, - "learning_rate": 3.8948073882028945e-06, + "epoch": 0.57, + "grad_norm": 0.2372328514099953, + "learning_rate": 8.104034328481468e-06, "loss": 0.1781, "step": 12493 }, { - "epoch": 0.72, - "grad_norm": 0.3545876480577167, - "learning_rate": 3.893333648520542e-06, - "loss": 0.2658, + "epoch": 0.57, + "grad_norm": 0.8630802371627697, + "learning_rate": 8.10257341801009e-06, + "loss": 0.5461, "step": 12494 }, { - "epoch": 0.72, - "grad_norm": 0.3817268627937381, - "learning_rate": 3.891860120311784e-06, - "loss": 0.3108, + "epoch": 0.57, + "grad_norm": 0.366086123693029, + "learning_rate": 8.101112549545908e-06, + "loss": 0.2511, "step": 12495 }, { - "epoch": 0.72, - "grad_norm": 0.4087311870618779, - "learning_rate": 3.890386803627642e-06, - "loss": 0.1709, + "epoch": 0.57, + "grad_norm": 0.8657018978144175, + "learning_rate": 8.099651723121267e-06, + "loss": 0.3853, "step": 12496 }, { - "epoch": 0.72, - "grad_norm": 0.4015110688746216, - "learning_rate": 3.888913698519145e-06, - "loss": 0.3142, + "epoch": 0.57, + "grad_norm": 0.3906777218534867, + "learning_rate": 8.098190938768503e-06, + "loss": 0.2514, "step": 12497 }, { - "epoch": 0.72, - "grad_norm": 0.4151975450528388, - "learning_rate": 3.887440805037306e-06, - "loss": 0.2699, + "epoch": 0.57, + "grad_norm": 0.33402678133117836, + "learning_rate": 8.09673019651996e-06, + "loss": 0.2298, "step": 12498 }, { - "epoch": 0.72, - "grad_norm": 0.3269623807011317, - "learning_rate": 3.885968123233128e-06, - "loss": 0.233, + "epoch": 0.57, + "grad_norm": 0.34129716769269874, + "learning_rate": 8.095269496407972e-06, + "loss": 0.222, "step": 12499 }, { - "epoch": 0.72, - "grad_norm": 0.35990831564539366, - "learning_rate": 3.884495653157611e-06, - "loss": 0.292, + "epoch": 0.57, + "grad_norm": 0.9013801734073409, + "learning_rate": 8.093808838464884e-06, + "loss": 0.3809, "step": 12500 }, { - "epoch": 0.72, - "grad_norm": 0.67394425063061, - "learning_rate": 3.883023394861742e-06, - "loss": 0.3572, + "epoch": 0.57, + "grad_norm": 0.39387452502888753, + "learning_rate": 8.092348222723034e-06, + "loss": 0.2537, "step": 12501 }, { - "epoch": 0.72, - "grad_norm": 0.33215648058639724, - "learning_rate": 3.881551348396515e-06, - "loss": 0.1395, + "epoch": 0.57, + "grad_norm": 0.4908113845834032, + "learning_rate": 8.090887649214755e-06, + "loss": 0.3218, "step": 12502 }, { - "epoch": 0.72, - "grad_norm": 0.2825531167285477, - "learning_rate": 3.880079513812901e-06, - "loss": 0.2577, + "epoch": 0.57, + "grad_norm": 1.3932149890811034, + "learning_rate": 8.089427117972379e-06, + "loss": 0.2244, "step": 12503 }, { - "epoch": 0.72, - "grad_norm": 0.3434631773890578, - "learning_rate": 3.878607891161871e-06, - "loss": 0.2822, + "epoch": 0.57, + "grad_norm": 0.27325876785340175, + "learning_rate": 8.087966629028247e-06, + "loss": 0.1955, "step": 12504 }, { - "epoch": 0.72, - "grad_norm": 0.8360101546679122, - "learning_rate": 3.8771364804943825e-06, - "loss": 0.4662, + "epoch": 0.57, + "grad_norm": 0.6780950161412388, + "learning_rate": 8.086506182414692e-06, + "loss": 0.3881, "step": 12505 }, { - "epoch": 0.72, - "grad_norm": 0.31128367410087177, - "learning_rate": 3.8756652818613975e-06, - "loss": 0.2256, + "epoch": 0.57, + "grad_norm": 0.3649718590466074, + "learning_rate": 8.085045778164049e-06, + "loss": 0.2652, "step": 12506 }, { - "epoch": 0.72, - "grad_norm": 0.3889226384732306, - "learning_rate": 3.8741942953138616e-06, - "loss": 0.3229, + "epoch": 0.57, + "grad_norm": 0.3704535583965833, + "learning_rate": 8.083585416308642e-06, + "loss": 0.2704, "step": 12507 }, { - "epoch": 0.72, - "grad_norm": 1.4681783448097931, - "learning_rate": 3.872723520902713e-06, - "loss": 0.7554, + "epoch": 0.57, + "grad_norm": 0.7668013343495043, + "learning_rate": 8.082125096880808e-06, + "loss": 0.4164, "step": 12508 }, { - "epoch": 0.72, - "grad_norm": 0.2748888158937971, - "learning_rate": 3.87125295867888e-06, - "loss": 0.2006, + "epoch": 0.57, + "grad_norm": 0.40252819197572576, + "learning_rate": 8.080664819912877e-06, + "loss": 0.3291, "step": 12509 }, { - "epoch": 0.72, - "grad_norm": 0.27882312944220194, - "learning_rate": 3.8697826086933e-06, - "loss": 0.1742, + "epoch": 0.57, + "grad_norm": 0.3372206058233182, + "learning_rate": 8.079204585437177e-06, + "loss": 0.1967, "step": 12510 }, { - "epoch": 0.72, - "grad_norm": 0.3533427566146823, - "learning_rate": 3.868312470996884e-06, - "loss": 0.3008, + "epoch": 0.57, + "grad_norm": 0.5575128779912731, + "learning_rate": 8.077744393486036e-06, + "loss": 0.3846, "step": 12511 }, { - "epoch": 0.72, - "grad_norm": 0.3111571689105808, - "learning_rate": 3.866842545640542e-06, - "loss": 0.1881, + "epoch": 0.57, + "grad_norm": 0.636988304950756, + "learning_rate": 8.076284244091779e-06, + "loss": 0.4156, "step": 12512 }, { - "epoch": 0.72, - "grad_norm": 0.6455308980072993, - "learning_rate": 3.86537283267518e-06, - "loss": 0.3791, + "epoch": 0.57, + "grad_norm": 0.3011571854953526, + "learning_rate": 8.074824137286738e-06, + "loss": 0.1913, "step": 12513 }, { - "epoch": 0.72, - "grad_norm": 0.4809467316313205, - "learning_rate": 3.863903332151689e-06, - "loss": 0.3647, + "epoch": 0.57, + "grad_norm": 0.3005690470286853, + "learning_rate": 8.073364073103234e-06, + "loss": 0.2532, "step": 12514 }, { - "epoch": 0.72, - "grad_norm": 0.40863296174877306, - "learning_rate": 3.862434044120966e-06, - "loss": 0.2974, + "epoch": 0.57, + "grad_norm": 1.0750069108132279, + "learning_rate": 8.071904051573592e-06, + "loss": 0.5573, "step": 12515 }, { - "epoch": 0.72, - "grad_norm": 0.20722922763128526, - "learning_rate": 3.860964968633888e-06, - "loss": 0.1662, + "epoch": 0.57, + "grad_norm": 0.32269095283556476, + "learning_rate": 8.070444072730132e-06, + "loss": 0.1944, "step": 12516 }, { - "epoch": 0.72, - "grad_norm": 0.5038681834454446, - "learning_rate": 3.859496105741328e-06, - "loss": 0.3576, + "epoch": 0.58, + "grad_norm": 0.30407980089671194, + "learning_rate": 8.068984136605187e-06, + "loss": 0.2547, "step": 12517 }, { - "epoch": 0.72, - "grad_norm": 0.3730892496342433, - "learning_rate": 3.858027455494152e-06, - "loss": 0.271, + "epoch": 0.58, + "grad_norm": 0.8934025157688004, + "learning_rate": 8.06752424323107e-06, + "loss": 0.568, "step": 12518 }, { - "epoch": 0.72, - "grad_norm": 0.37789847744452215, - "learning_rate": 3.856559017943223e-06, - "loss": 0.2646, + "epoch": 0.58, + "grad_norm": 0.35157622106586794, + "learning_rate": 8.066064392640106e-06, + "loss": 0.2316, "step": 12519 }, { - "epoch": 0.72, - "grad_norm": 1.2120655893657206, - "learning_rate": 3.8550907931393925e-06, - "loss": 0.5642, + "epoch": 0.58, + "grad_norm": 0.3886878663802641, + "learning_rate": 8.064604584864607e-06, + "loss": 0.2693, "step": 12520 }, { - "epoch": 0.72, - "grad_norm": 0.32447720363858357, - "learning_rate": 3.853622781133503e-06, - "loss": 0.2848, + "epoch": 0.58, + "grad_norm": 0.3614275275720635, + "learning_rate": 8.0631448199369e-06, + "loss": 0.2895, "step": 12521 }, { - "epoch": 0.72, - "grad_norm": 0.3132306682340447, - "learning_rate": 3.852154981976388e-06, - "loss": 0.2106, + "epoch": 0.58, + "grad_norm": 0.3603961092805273, + "learning_rate": 8.0616850978893e-06, + "loss": 0.2837, "step": 12522 }, { - "epoch": 0.72, - "grad_norm": 0.43512513969447275, - "learning_rate": 3.8506873957188865e-06, - "loss": 0.3212, + "epoch": 0.58, + "grad_norm": 0.9109855957212859, + "learning_rate": 8.060225418754125e-06, + "loss": 0.3651, "step": 12523 }, { - "epoch": 0.72, - "grad_norm": 0.32854106565529423, - "learning_rate": 3.849220022411815e-06, - "loss": 0.261, + "epoch": 0.58, + "grad_norm": 0.7645880523581638, + "learning_rate": 8.058765782563688e-06, + "loss": 0.3986, "step": 12524 }, { - "epoch": 0.72, - "grad_norm": 0.4033956109292242, - "learning_rate": 3.84775286210599e-06, - "loss": 0.1076, + "epoch": 0.58, + "grad_norm": 0.2810392851338441, + "learning_rate": 8.057306189350305e-06, + "loss": 0.25, "step": 12525 }, { - "epoch": 0.72, - "grad_norm": 0.4484468926497038, - "learning_rate": 3.846285914852216e-06, - "loss": 0.3562, + "epoch": 0.58, + "grad_norm": 0.2310976151121476, + "learning_rate": 8.055846639146292e-06, + "loss": 0.139, "step": 12526 }, { - "epoch": 0.72, - "grad_norm": 0.2682409012322284, - "learning_rate": 3.844819180701302e-06, - "loss": 0.2414, + "epoch": 0.58, + "grad_norm": 0.9821769487500752, + "learning_rate": 8.054387131983962e-06, + "loss": 0.5024, "step": 12527 }, { - "epoch": 0.72, - "grad_norm": 1.2722056250509972, - "learning_rate": 3.843352659704032e-06, - "loss": 0.6141, + "epoch": 0.58, + "grad_norm": 0.42843692044450804, + "learning_rate": 8.052927667895624e-06, + "loss": 0.2828, "step": 12528 }, { - "epoch": 0.72, - "grad_norm": 0.4828470045708207, - "learning_rate": 3.841886351911195e-06, - "loss": 0.227, + "epoch": 0.58, + "grad_norm": 0.3391632759506232, + "learning_rate": 8.05146824691359e-06, + "loss": 0.2559, "step": 12529 }, { - "epoch": 0.72, - "grad_norm": 0.30225508543561413, - "learning_rate": 3.840420257373565e-06, - "loss": 0.245, + "epoch": 0.58, + "grad_norm": 1.1488297881680636, + "learning_rate": 8.050008869070172e-06, + "loss": 0.4057, "step": 12530 }, { - "epoch": 0.72, - "grad_norm": 0.29599291538165057, - "learning_rate": 3.83895437614192e-06, - "loss": 0.246, + "epoch": 0.58, + "grad_norm": 0.3698351660343765, + "learning_rate": 8.048549534397679e-06, + "loss": 0.2269, "step": 12531 }, { - "epoch": 0.72, - "grad_norm": 1.1752706306155982, - "learning_rate": 3.837488708267021e-06, - "loss": 0.3813, + "epoch": 0.58, + "grad_norm": 0.22585768041778964, + "learning_rate": 8.047090242928416e-06, + "loss": 0.1394, "step": 12532 }, { - "epoch": 0.72, - "grad_norm": 0.43554275734913983, - "learning_rate": 3.836023253799621e-06, - "loss": 0.2608, + "epoch": 0.58, + "grad_norm": 0.3938117946272398, + "learning_rate": 8.045630994694692e-06, + "loss": 0.2955, "step": 12533 }, { - "epoch": 0.72, - "grad_norm": 0.4608820204490836, - "learning_rate": 3.834558012790469e-06, - "loss": 0.347, + "epoch": 0.58, + "grad_norm": 0.4226909304057653, + "learning_rate": 8.044171789728816e-06, + "loss": 0.2942, "step": 12534 }, { - "epoch": 0.72, - "grad_norm": 0.2756554502113018, - "learning_rate": 3.833092985290311e-06, - "loss": 0.2264, + "epoch": 0.58, + "grad_norm": 0.5634467242269736, + "learning_rate": 8.042712628063094e-06, + "loss": 0.3734, "step": 12535 }, { - "epoch": 0.72, - "grad_norm": 0.39145909647593713, - "learning_rate": 3.831628171349877e-06, - "loss": 0.2618, + "epoch": 0.58, + "grad_norm": 1.3491688688008365, + "learning_rate": 8.041253509729825e-06, + "loss": 0.336, "step": 12536 }, { - "epoch": 0.72, - "grad_norm": 0.49976557663593313, - "learning_rate": 3.8301635710198946e-06, - "loss": 0.2346, + "epoch": 0.58, + "grad_norm": 0.2954357993854914, + "learning_rate": 8.03979443476132e-06, + "loss": 0.258, "step": 12537 }, { - "epoch": 0.72, - "grad_norm": 0.4174397744132169, - "learning_rate": 3.828699184351079e-06, - "loss": 0.2437, + "epoch": 0.58, + "grad_norm": 0.2706554027162987, + "learning_rate": 8.038335403189868e-06, + "loss": 0.2044, "step": 12538 }, { - "epoch": 0.72, - "grad_norm": 0.276496262939729, - "learning_rate": 3.8272350113941494e-06, - "loss": 0.2565, + "epoch": 0.58, + "grad_norm": 0.9811512680948671, + "learning_rate": 8.036876415047786e-06, + "loss": 0.2276, "step": 12539 }, { - "epoch": 0.72, - "grad_norm": 0.5865133389721945, - "learning_rate": 3.825771052199805e-06, - "loss": 0.4208, + "epoch": 0.58, + "grad_norm": 0.4686787196730729, + "learning_rate": 8.035417470367368e-06, + "loss": 0.3077, "step": 12540 }, { - "epoch": 0.72, - "grad_norm": 0.8880953147875467, - "learning_rate": 3.824307306818745e-06, - "loss": 0.4872, + "epoch": 0.58, + "grad_norm": 0.46839624518546885, + "learning_rate": 8.033958569180912e-06, + "loss": 0.3248, "step": 12541 }, { - "epoch": 0.72, - "grad_norm": 0.2268528697619566, - "learning_rate": 3.822843775301656e-06, - "loss": 0.1535, + "epoch": 0.58, + "grad_norm": 0.8658598301058729, + "learning_rate": 8.032499711520717e-06, + "loss": 0.3044, "step": 12542 }, { - "epoch": 0.72, - "grad_norm": 0.29010451586633673, - "learning_rate": 3.821380457699217e-06, - "loss": 0.2439, + "epoch": 0.58, + "grad_norm": 0.34766056563632164, + "learning_rate": 8.031040897419084e-06, + "loss": 0.2779, "step": 12543 }, { - "epoch": 0.72, - "grad_norm": 1.2260624615809157, - "learning_rate": 3.819917354062113e-06, - "loss": 0.6806, + "epoch": 0.58, + "grad_norm": 0.4023602701488176, + "learning_rate": 8.02958212690831e-06, + "loss": 0.2396, "step": 12544 }, { - "epoch": 0.72, - "grad_norm": 0.2824308471763668, - "learning_rate": 3.8184544644410026e-06, - "loss": 0.2042, + "epoch": 0.58, + "grad_norm": 0.35876524670454457, + "learning_rate": 8.028123400020686e-06, + "loss": 0.2035, "step": 12545 }, { - "epoch": 0.72, - "grad_norm": 0.776619086277665, - "learning_rate": 3.816991788886551e-06, - "loss": 0.4011, + "epoch": 0.58, + "grad_norm": 0.4494014621544313, + "learning_rate": 8.026664716788506e-06, + "loss": 0.2683, "step": 12546 }, { - "epoch": 0.72, - "grad_norm": 0.3530785405930903, - "learning_rate": 3.815529327449402e-06, - "loss": 0.312, + "epoch": 0.58, + "grad_norm": 0.48402231522786154, + "learning_rate": 8.025206077244072e-06, + "loss": 0.3759, "step": 12547 }, { - "epoch": 0.72, - "grad_norm": 0.32265673340370904, - "learning_rate": 3.8140670801802114e-06, - "loss": 0.1984, + "epoch": 0.58, + "grad_norm": 0.45194413819769047, + "learning_rate": 8.023747481419674e-06, + "loss": 0.3741, "step": 12548 }, { - "epoch": 0.72, - "grad_norm": 0.2522603406858445, - "learning_rate": 3.8126050471296116e-06, - "loss": 0.1534, + "epoch": 0.58, + "grad_norm": 0.3680181159338744, + "learning_rate": 8.022288929347599e-06, + "loss": 0.2091, "step": 12549 }, { - "epoch": 0.72, - "grad_norm": 0.3592744062620737, - "learning_rate": 3.811143228348233e-06, - "loss": 0.3229, + "epoch": 0.58, + "grad_norm": 0.32448754821138226, + "learning_rate": 8.020830421060137e-06, + "loss": 0.2475, "step": 12550 }, { - "epoch": 0.72, - "grad_norm": 0.33175709985865126, - "learning_rate": 3.809681623886694e-06, - "loss": 0.1751, + "epoch": 0.58, + "grad_norm": 0.5119490706206589, + "learning_rate": 8.019371956589588e-06, + "loss": 0.2276, "step": 12551 }, { - "epoch": 0.72, - "grad_norm": 0.5046438803543758, - "learning_rate": 3.8082202337956187e-06, - "loss": 0.3209, + "epoch": 0.58, + "grad_norm": 0.34538058686291784, + "learning_rate": 8.017913535968234e-06, + "loss": 0.2048, "step": 12552 }, { - "epoch": 0.72, - "grad_norm": 1.0417354951774573, - "learning_rate": 3.80675905812561e-06, - "loss": 0.524, + "epoch": 0.58, + "grad_norm": 0.33584169019823745, + "learning_rate": 8.016455159228363e-06, + "loss": 0.2912, "step": 12553 }, { - "epoch": 0.72, - "grad_norm": 0.3264891220161526, - "learning_rate": 3.805298096927269e-06, - "loss": 0.2089, + "epoch": 0.58, + "grad_norm": 0.7784194308493582, + "learning_rate": 8.014996826402263e-06, + "loss": 0.528, "step": 12554 }, { - "epoch": 0.72, - "grad_norm": 0.2294114627721482, - "learning_rate": 3.803837350251188e-06, - "loss": 0.2131, + "epoch": 0.58, + "grad_norm": 0.32368324024993933, + "learning_rate": 8.013538537522219e-06, + "loss": 0.1974, "step": 12555 }, { - "epoch": 0.72, - "grad_norm": 1.2952230032564946, - "learning_rate": 3.8023768181479493e-06, - "loss": 0.8189, + "epoch": 0.58, + "grad_norm": 0.30192809124081077, + "learning_rate": 8.01208029262052e-06, + "loss": 0.1932, "step": 12556 }, { - "epoch": 0.72, - "grad_norm": 0.48200138614488336, - "learning_rate": 3.800916500668139e-06, - "loss": 0.3078, + "epoch": 0.58, + "grad_norm": 0.41836740439964365, + "learning_rate": 8.010622091729444e-06, + "loss": 0.2946, "step": 12557 }, { - "epoch": 0.72, - "grad_norm": 0.2545052188435359, - "learning_rate": 3.7994563978623243e-06, - "loss": 0.2414, + "epoch": 0.58, + "grad_norm": 0.3853546307528533, + "learning_rate": 8.00916393488128e-06, + "loss": 0.232, "step": 12558 }, { - "epoch": 0.72, - "grad_norm": 1.1399172653767695, - "learning_rate": 3.7979965097810667e-06, - "loss": 0.5805, + "epoch": 0.58, + "grad_norm": 0.5015878029593724, + "learning_rate": 8.007705822108305e-06, + "loss": 0.3632, "step": 12559 }, { - "epoch": 0.72, - "grad_norm": 0.4024592476572722, - "learning_rate": 3.7965368364749244e-06, - "loss": 0.2655, + "epoch": 0.58, + "grad_norm": 1.0909867155640225, + "learning_rate": 8.006247753442805e-06, + "loss": 0.7113, "step": 12560 }, { - "epoch": 0.72, - "grad_norm": 0.2189980848773631, - "learning_rate": 3.7950773779944437e-06, - "loss": 0.1199, + "epoch": 0.58, + "grad_norm": 0.33987331106490626, + "learning_rate": 8.004789728917059e-06, + "loss": 0.2905, "step": 12561 }, { - "epoch": 0.72, - "grad_norm": 0.31760219818640645, - "learning_rate": 3.793618134390168e-06, - "loss": 0.3045, + "epoch": 0.58, + "grad_norm": 0.3679740839261454, + "learning_rate": 8.003331748563344e-06, + "loss": 0.2307, "step": 12562 }, { - "epoch": 0.72, - "grad_norm": 0.3990174933506131, - "learning_rate": 3.792159105712625e-06, - "loss": 0.2764, + "epoch": 0.58, + "grad_norm": 0.3239617159083856, + "learning_rate": 8.001873812413936e-06, + "loss": 0.2314, "step": 12563 }, { - "epoch": 0.72, - "grad_norm": 0.49668825548514706, - "learning_rate": 3.7907002920123482e-06, - "loss": 0.2711, + "epoch": 0.58, + "grad_norm": 0.4588379688612307, + "learning_rate": 8.00041592050112e-06, + "loss": 0.3148, "step": 12564 }, { - "epoch": 0.72, - "grad_norm": 1.2066436895640726, - "learning_rate": 3.7892416933398534e-06, - "loss": 0.5866, + "epoch": 0.58, + "grad_norm": 0.2666343635475842, + "learning_rate": 7.99895807285717e-06, + "loss": 0.2365, "step": 12565 }, { - "epoch": 0.72, - "grad_norm": 0.325699463496659, - "learning_rate": 3.7877833097456527e-06, - "loss": 0.259, + "epoch": 0.58, + "grad_norm": 1.2850071211710368, + "learning_rate": 7.99750026951436e-06, + "loss": 0.7672, "step": 12566 }, { - "epoch": 0.72, - "grad_norm": 0.2975927184507548, - "learning_rate": 3.786325141280248e-06, - "loss": 0.2501, + "epoch": 0.58, + "grad_norm": 0.7461355930196935, + "learning_rate": 7.996042510504963e-06, + "loss": 0.3431, "step": 12567 }, { - "epoch": 0.72, - "grad_norm": 0.43994328199667015, - "learning_rate": 3.7848671879941334e-06, - "loss": 0.2036, + "epoch": 0.58, + "grad_norm": 0.2805438155926942, + "learning_rate": 7.994584795861248e-06, + "loss": 0.1719, "step": 12568 }, { - "epoch": 0.72, - "grad_norm": 0.6533205368181108, - "learning_rate": 3.783409449937804e-06, - "loss": 0.3164, + "epoch": 0.58, + "grad_norm": 0.2713886302334592, + "learning_rate": 7.993127125615498e-06, + "loss": 0.2428, "step": 12569 }, { - "epoch": 0.72, - "grad_norm": 0.3752405058155906, - "learning_rate": 3.7819519271617377e-06, - "loss": 0.3023, + "epoch": 0.58, + "grad_norm": 0.6307198542543352, + "learning_rate": 7.991669499799978e-06, + "loss": 0.3941, "step": 12570 }, { - "epoch": 0.72, - "grad_norm": 0.3617519561068833, - "learning_rate": 3.7804946197164096e-06, - "loss": 0.2507, + "epoch": 0.58, + "grad_norm": 0.3468610344511736, + "learning_rate": 7.990211918446961e-06, + "loss": 0.222, "step": 12571 }, { - "epoch": 0.72, - "grad_norm": 0.594735705756224, - "learning_rate": 3.779037527652282e-06, - "loss": 0.3228, + "epoch": 0.58, + "grad_norm": 1.3039111211953105, + "learning_rate": 7.988754381588712e-06, + "loss": 0.7556, "step": 12572 }, { - "epoch": 0.72, - "grad_norm": 0.2359376307305643, - "learning_rate": 3.77758065101982e-06, - "loss": 0.1857, + "epoch": 0.58, + "grad_norm": 0.32071360620227096, + "learning_rate": 7.987296889257505e-06, + "loss": 0.2648, "step": 12573 }, { - "epoch": 0.72, - "grad_norm": 0.35542427665950116, - "learning_rate": 3.7761239898694724e-06, - "loss": 0.2582, + "epoch": 0.58, + "grad_norm": 0.43378719543085736, + "learning_rate": 7.985839441485604e-06, + "loss": 0.287, "step": 12574 }, { - "epoch": 0.72, - "grad_norm": 0.5222486191552719, - "learning_rate": 3.774667544251683e-06, - "loss": 0.309, + "epoch": 0.58, + "grad_norm": 0.5992632740044342, + "learning_rate": 7.984382038305278e-06, + "loss": 0.3066, "step": 12575 }, { - "epoch": 0.72, - "grad_norm": 0.41862420835222075, - "learning_rate": 3.773211314216887e-06, - "loss": 0.3407, + "epoch": 0.58, + "grad_norm": 0.2788417247655016, + "learning_rate": 7.982924679748789e-06, + "loss": 0.2237, "step": 12576 }, { - "epoch": 0.72, - "grad_norm": 1.3031113013277529, - "learning_rate": 3.7717552998155184e-06, - "loss": 0.3212, + "epoch": 0.58, + "grad_norm": 0.2703859297933438, + "learning_rate": 7.981467365848405e-06, + "loss": 0.2307, "step": 12577 }, { - "epoch": 0.72, - "grad_norm": 0.3062819204901965, - "learning_rate": 3.770299501097995e-06, - "loss": 0.2475, + "epoch": 0.58, + "grad_norm": 0.9936824965760298, + "learning_rate": 7.980010096636387e-06, + "loss": 0.5441, "step": 12578 }, { - "epoch": 0.72, - "grad_norm": 0.29069925258267515, - "learning_rate": 3.768843918114733e-06, - "loss": 0.2404, + "epoch": 0.58, + "grad_norm": 0.6704327879376025, + "learning_rate": 7.978552872145e-06, + "loss": 0.3623, "step": 12579 }, { - "epoch": 0.72, - "grad_norm": 0.4692780214015884, - "learning_rate": 3.767388550916138e-06, - "loss": 0.2927, + "epoch": 0.58, + "grad_norm": 0.41489905911827907, + "learning_rate": 7.9770956924065e-06, + "loss": 0.3125, "step": 12580 }, { - "epoch": 0.72, - "grad_norm": 0.3419993754544002, - "learning_rate": 3.7659333995526047e-06, - "loss": 0.2067, + "epoch": 0.58, + "grad_norm": 0.39821486208140056, + "learning_rate": 7.975638557453155e-06, + "loss": 0.2826, "step": 12581 }, { - "epoch": 0.72, - "grad_norm": 0.5372210817123204, - "learning_rate": 3.7644784640745346e-06, - "loss": 0.3642, + "epoch": 0.58, + "grad_norm": 0.28891383774287344, + "learning_rate": 7.974181467317222e-06, + "loss": 0.1633, "step": 12582 }, { - "epoch": 0.72, - "grad_norm": 0.3720534268613179, - "learning_rate": 3.763023744532307e-06, - "loss": 0.3042, + "epoch": 0.58, + "grad_norm": 0.37276327839125256, + "learning_rate": 7.972724422030957e-06, + "loss": 0.2613, "step": 12583 }, { - "epoch": 0.72, - "grad_norm": 0.3256481791308857, - "learning_rate": 3.761569240976298e-06, - "loss": 0.1504, + "epoch": 0.58, + "grad_norm": 0.5296359728156753, + "learning_rate": 7.971267421626624e-06, + "loss": 0.4101, "step": 12584 }, { - "epoch": 0.72, - "grad_norm": 0.4828728938480592, - "learning_rate": 3.7601149534568757e-06, - "loss": 0.3779, + "epoch": 0.58, + "grad_norm": 0.4454463832865182, + "learning_rate": 7.969810466136466e-06, + "loss": 0.2414, "step": 12585 }, { - "epoch": 0.72, - "grad_norm": 0.313460913744006, - "learning_rate": 3.7586608820244076e-06, - "loss": 0.3181, + "epoch": 0.58, + "grad_norm": 0.37110394714560857, + "learning_rate": 7.968353555592754e-06, + "loss": 0.2564, "step": 12586 }, { - "epoch": 0.72, - "grad_norm": 0.18315300828709036, - "learning_rate": 3.7572070267292438e-06, - "loss": 0.0854, + "epoch": 0.58, + "grad_norm": 0.6892518185472958, + "learning_rate": 7.966896690027734e-06, + "loss": 0.3907, "step": 12587 }, { - "epoch": 0.72, - "grad_norm": 0.33954649548166543, - "learning_rate": 3.7557533876217325e-06, - "loss": 0.2789, + "epoch": 0.58, + "grad_norm": 0.29306298979400774, + "learning_rate": 7.965439869473664e-06, + "loss": 0.1567, "step": 12588 }, { - "epoch": 0.72, - "grad_norm": 1.1214543359616003, - "learning_rate": 3.7542999647522094e-06, - "loss": 0.5101, + "epoch": 0.58, + "grad_norm": 0.24921091001228685, + "learning_rate": 7.963983093962792e-06, + "loss": 0.2224, "step": 12589 }, { - "epoch": 0.72, - "grad_norm": 0.3266141732037362, - "learning_rate": 3.7528467581710137e-06, - "loss": 0.2917, + "epoch": 0.58, + "grad_norm": 1.3396481506724143, + "learning_rate": 7.962526363527372e-06, + "loss": 0.7508, "step": 12590 }, { - "epoch": 0.72, - "grad_norm": 0.3392692953249234, - "learning_rate": 3.7513937679284664e-06, - "loss": 0.2326, + "epoch": 0.58, + "grad_norm": 0.5210697713419054, + "learning_rate": 7.961069678199658e-06, + "loss": 0.2443, "step": 12591 }, { - "epoch": 0.72, - "grad_norm": 0.704023761433763, - "learning_rate": 3.749940994074884e-06, - "loss": 0.4103, + "epoch": 0.58, + "grad_norm": 0.3197146093217933, + "learning_rate": 7.959613038011892e-06, + "loss": 0.2786, "step": 12592 }, { - "epoch": 0.72, - "grad_norm": 0.3830258897079524, - "learning_rate": 3.7484884366605758e-06, - "loss": 0.2555, + "epoch": 0.58, + "grad_norm": 0.4774819893334619, + "learning_rate": 7.958156442996325e-06, + "loss": 0.3719, "step": 12593 }, { - "epoch": 0.72, - "grad_norm": 0.24571652256923193, - "learning_rate": 3.7470360957358442e-06, - "loss": 0.2247, + "epoch": 0.58, + "grad_norm": 0.5340840542738716, + "learning_rate": 7.956699893185213e-06, + "loss": 0.1479, "step": 12594 }, { - "epoch": 0.72, - "grad_norm": 0.4350436646267506, - "learning_rate": 3.7455839713509844e-06, - "loss": 0.2822, + "epoch": 0.58, + "grad_norm": 0.3524340723754244, + "learning_rate": 7.955243388610794e-06, + "loss": 0.2429, "step": 12595 }, { - "epoch": 0.72, - "grad_norm": 0.5916263926081792, - "learning_rate": 3.7441320635562828e-06, - "loss": 0.3318, + "epoch": 0.58, + "grad_norm": 0.49929082941615566, + "learning_rate": 7.953786929305315e-06, + "loss": 0.386, "step": 12596 }, { - "epoch": 0.72, - "grad_norm": 0.3253913954279248, - "learning_rate": 3.7426803724020143e-06, - "loss": 0.2366, + "epoch": 0.58, + "grad_norm": 0.30312209599273787, + "learning_rate": 7.952330515301022e-06, + "loss": 0.2419, "step": 12597 }, { - "epoch": 0.72, - "grad_norm": 0.3462154083645354, - "learning_rate": 3.7412288979384604e-06, - "loss": 0.2957, + "epoch": 0.58, + "grad_norm": 0.3517978594723385, + "learning_rate": 7.950874146630152e-06, + "loss": 0.2213, "step": 12598 }, { - "epoch": 0.72, - "grad_norm": 0.36298865887781684, - "learning_rate": 3.739777640215879e-06, - "loss": 0.2289, + "epoch": 0.58, + "grad_norm": 0.6740411118302502, + "learning_rate": 7.949417823324958e-06, + "loss": 0.4333, "step": 12599 }, { - "epoch": 0.72, - "grad_norm": 0.37829378006780723, - "learning_rate": 3.7383265992845297e-06, - "loss": 0.2398, + "epoch": 0.58, + "grad_norm": 0.3405168737369513, + "learning_rate": 7.947961545417677e-06, + "loss": 0.2957, "step": 12600 }, { - "epoch": 0.72, - "grad_norm": 0.4908548396772708, - "learning_rate": 3.736875775194657e-06, - "loss": 0.2657, + "epoch": 0.58, + "grad_norm": 0.27730768999340255, + "learning_rate": 7.94650531294055e-06, + "loss": 0.1676, "step": 12601 }, { - "epoch": 0.72, - "grad_norm": 0.34330907741617966, - "learning_rate": 3.7354251679965103e-06, - "loss": 0.2757, + "epoch": 0.58, + "grad_norm": 0.40300658984119136, + "learning_rate": 7.945049125925815e-06, + "loss": 0.2716, "step": 12602 }, { - "epoch": 0.72, - "grad_norm": 0.3923379452795223, - "learning_rate": 3.7339747777403212e-06, - "loss": 0.2949, + "epoch": 0.58, + "grad_norm": 0.6671260172681813, + "learning_rate": 7.943592984405714e-06, + "loss": 0.3891, "step": 12603 }, { - "epoch": 0.72, - "grad_norm": 0.8516521788185919, - "learning_rate": 3.7325246044763164e-06, - "loss": 0.2947, + "epoch": 0.58, + "grad_norm": 0.28061672707988833, + "learning_rate": 7.94213688841248e-06, + "loss": 0.2275, "step": 12604 }, { - "epoch": 0.72, - "grad_norm": 1.2060849155640518, - "learning_rate": 3.7310746482547143e-06, - "loss": 0.7756, + "epoch": 0.58, + "grad_norm": 0.49158423093495524, + "learning_rate": 7.940680837978353e-06, + "loss": 0.3354, "step": 12605 }, { - "epoch": 0.72, - "grad_norm": 0.2613915751889735, - "learning_rate": 3.729624909125724e-06, - "loss": 0.2592, + "epoch": 0.58, + "grad_norm": 1.1317518225799188, + "learning_rate": 7.939224833135567e-06, + "loss": 0.5889, "step": 12606 }, { - "epoch": 0.72, - "grad_norm": 0.25492878460525664, - "learning_rate": 3.7281753871395575e-06, - "loss": 0.1748, + "epoch": 0.58, + "grad_norm": 0.2902423319471402, + "learning_rate": 7.937768873916358e-06, + "loss": 0.2053, "step": 12607 }, { - "epoch": 0.72, - "grad_norm": 0.6764847935708693, - "learning_rate": 3.726726082346408e-06, - "loss": 0.3877, + "epoch": 0.58, + "grad_norm": 0.5203033092001017, + "learning_rate": 7.936312960352957e-06, + "loss": 0.4001, "step": 12608 }, { - "epoch": 0.72, - "grad_norm": 0.3204321858680907, - "learning_rate": 3.725276994796463e-06, - "loss": 0.2423, + "epoch": 0.58, + "grad_norm": 0.32220372637452793, + "learning_rate": 7.934857092477599e-06, + "loss": 0.2722, "step": 12609 }, { - "epoch": 0.72, - "grad_norm": 0.3596000242651779, - "learning_rate": 3.7238281245399032e-06, - "loss": 0.2481, + "epoch": 0.58, + "grad_norm": 0.3475609924164586, + "learning_rate": 7.933401270322512e-06, + "loss": 0.2823, "step": 12610 }, { - "epoch": 0.72, - "grad_norm": 0.7745774779469131, - "learning_rate": 3.72237947162691e-06, - "loss": 0.5167, + "epoch": 0.58, + "grad_norm": 0.37695932096699447, + "learning_rate": 7.931945493919932e-06, + "loss": 0.1051, "step": 12611 }, { - "epoch": 0.72, - "grad_norm": 0.35232144352690364, - "learning_rate": 3.7209310361076445e-06, - "loss": 0.2963, + "epoch": 0.58, + "grad_norm": 0.39458721459345736, + "learning_rate": 7.930489763302085e-06, + "loss": 0.2924, "step": 12612 }, { - "epoch": 0.72, - "grad_norm": 0.24440360054586446, - "learning_rate": 3.719482818032267e-06, - "loss": 0.1043, + "epoch": 0.58, + "grad_norm": 0.36523231724918903, + "learning_rate": 7.929034078501202e-06, + "loss": 0.2929, "step": 12613 }, { - "epoch": 0.72, - "grad_norm": 0.3260683902895594, - "learning_rate": 3.7180348174509275e-06, - "loss": 0.2785, + "epoch": 0.58, + "grad_norm": 0.9362589753683518, + "learning_rate": 7.927578439549506e-06, + "loss": 0.3702, "step": 12614 }, { - "epoch": 0.72, - "grad_norm": 0.34981957635207345, - "learning_rate": 3.7165870344137746e-06, - "loss": 0.2824, + "epoch": 0.58, + "grad_norm": 0.3575964060566967, + "learning_rate": 7.926122846479224e-06, + "loss": 0.2638, "step": 12615 }, { - "epoch": 0.72, - "grad_norm": 1.22822645248553, - "learning_rate": 3.715139468970942e-06, - "loss": 0.6816, + "epoch": 0.58, + "grad_norm": 0.38522296828818736, + "learning_rate": 7.924667299322585e-06, + "loss": 0.274, "step": 12616 }, { - "epoch": 0.72, - "grad_norm": 0.3974630087585372, - "learning_rate": 3.7136921211725595e-06, - "loss": 0.2532, + "epoch": 0.58, + "grad_norm": 0.30684398423890547, + "learning_rate": 7.923211798111815e-06, + "loss": 0.2289, "step": 12617 }, { - "epoch": 0.72, - "grad_norm": 0.34005021932049256, - "learning_rate": 3.7122449910687495e-06, - "loss": 0.2688, + "epoch": 0.58, + "grad_norm": 0.8098901239714268, + "learning_rate": 7.92175634287913e-06, + "loss": 0.4745, "step": 12618 }, { - "epoch": 0.73, - "grad_norm": 0.3135211846099563, - "learning_rate": 3.710798078709621e-06, - "loss": 0.2613, + "epoch": 0.58, + "grad_norm": 0.36607871518936536, + "learning_rate": 7.920300933656758e-06, + "loss": 0.2922, "step": 12619 }, { - "epoch": 0.73, - "grad_norm": 0.3350723208691652, - "learning_rate": 3.7093513841452876e-06, - "loss": 0.1817, + "epoch": 0.58, + "grad_norm": 0.3236687201078858, + "learning_rate": 7.91884557047692e-06, + "loss": 0.2664, "step": 12620 }, { - "epoch": 0.73, - "grad_norm": 0.3935505259433406, - "learning_rate": 3.7079049074258465e-06, - "loss": 0.3123, + "epoch": 0.58, + "grad_norm": 0.7948109816623913, + "learning_rate": 7.917390253371835e-06, + "loss": 0.4017, "step": 12621 }, { - "epoch": 0.73, - "grad_norm": 0.36173769233262987, - "learning_rate": 3.7064586486013865e-06, - "loss": 0.3023, - "step": 12622 + "epoch": 0.58, + "grad_norm": 0.3347989140896034, + "learning_rate": 7.915934982373723e-06, + "loss": 0.2506, + "step": 12622 }, { - "epoch": 0.73, - "grad_norm": 1.1886936765534302, - "learning_rate": 3.7050126077219908e-06, - "loss": 0.4164, + "epoch": 0.58, + "grad_norm": 0.31445494204293517, + "learning_rate": 7.914479757514798e-06, + "loss": 0.1825, "step": 12623 }, { - "epoch": 0.73, - "grad_norm": 0.31195240262411766, - "learning_rate": 3.70356678483774e-06, - "loss": 0.2534, + "epoch": 0.58, + "grad_norm": 0.34446354462157114, + "learning_rate": 7.913024578827284e-06, + "loss": 0.238, "step": 12624 }, { - "epoch": 0.73, - "grad_norm": 0.43851398106112804, - "learning_rate": 3.702121179998701e-06, - "loss": 0.3313, + "epoch": 0.58, + "grad_norm": 0.3802373636255726, + "learning_rate": 7.911569446343394e-06, + "loss": 0.2823, "step": 12625 }, { - "epoch": 0.73, - "grad_norm": 0.3608270575366872, - "learning_rate": 3.7006757932549355e-06, - "loss": 0.2095, + "epoch": 0.58, + "grad_norm": 0.8926159130597905, + "learning_rate": 7.910114360095345e-06, + "loss": 0.4582, "step": 12626 }, { - "epoch": 0.73, - "grad_norm": 0.2572942190550343, - "learning_rate": 3.6992306246564923e-06, - "loss": 0.2211, + "epoch": 0.58, + "grad_norm": 0.8681639670852823, + "learning_rate": 7.908659320115349e-06, + "loss": 0.334, "step": 12627 }, { - "epoch": 0.73, - "grad_norm": 1.2467478374867251, - "learning_rate": 3.697785674253428e-06, - "loss": 0.6807, + "epoch": 0.58, + "grad_norm": 0.29752523392779107, + "learning_rate": 7.907204326435616e-06, + "loss": 0.2555, "step": 12628 }, { - "epoch": 0.73, - "grad_norm": 0.4778900262033747, - "learning_rate": 3.696340942095772e-06, - "loss": 0.3873, + "epoch": 0.58, + "grad_norm": 0.2815656824090016, + "learning_rate": 7.905749379088366e-06, + "loss": 0.1923, "step": 12629 }, { - "epoch": 0.73, - "grad_norm": 0.2611211344507252, - "learning_rate": 3.6948964282335576e-06, - "loss": 0.2184, + "epoch": 0.58, + "grad_norm": 0.978202880847337, + "learning_rate": 7.904294478105806e-06, + "loss": 0.3791, "step": 12630 }, { - "epoch": 0.73, - "grad_norm": 0.6519226464029577, - "learning_rate": 3.693452132716806e-06, - "loss": 0.3749, + "epoch": 0.58, + "grad_norm": 0.3341207522871565, + "learning_rate": 7.90283962352015e-06, + "loss": 0.247, "step": 12631 }, { - "epoch": 0.73, - "grad_norm": 0.37288735997878936, - "learning_rate": 3.6920080555955396e-06, - "loss": 0.2098, + "epoch": 0.58, + "grad_norm": 0.5185402863867649, + "learning_rate": 7.901384815363595e-06, + "loss": 0.3246, "step": 12632 }, { - "epoch": 0.73, - "grad_norm": 0.2948673783502911, - "learning_rate": 3.6905641969197626e-06, - "loss": 0.1943, + "epoch": 0.58, + "grad_norm": 1.3103867725155687, + "learning_rate": 7.899930053668362e-06, + "loss": 0.3698, "step": 12633 }, { - "epoch": 0.73, - "grad_norm": 0.34946669359400256, - "learning_rate": 3.689120556739475e-06, - "loss": 0.3015, + "epoch": 0.58, + "grad_norm": 0.6587853889627598, + "learning_rate": 7.898475338466655e-06, + "loss": 0.2963, "step": 12634 }, { - "epoch": 0.73, - "grad_norm": 1.262967952928276, - "learning_rate": 3.687677135104669e-06, - "loss": 0.7467, + "epoch": 0.58, + "grad_norm": 0.2601060435970856, + "learning_rate": 7.897020669790678e-06, + "loss": 0.1745, "step": 12635 }, { - "epoch": 0.73, - "grad_norm": 0.2994023778760764, - "learning_rate": 3.6862339320653353e-06, - "loss": 0.1952, + "epoch": 0.58, + "grad_norm": 0.3850765855647958, + "learning_rate": 7.895566047672635e-06, + "loss": 0.3138, "step": 12636 }, { - "epoch": 0.73, - "grad_norm": 0.6864311781365865, - "learning_rate": 3.6847909476714495e-06, - "loss": 0.3597, + "epoch": 0.58, + "grad_norm": 0.33389922883343787, + "learning_rate": 7.894111472144733e-06, + "loss": 0.1913, "step": 12637 }, { - "epoch": 0.73, - "grad_norm": 0.2598362738797895, - "learning_rate": 3.683348181972981e-06, - "loss": 0.2505, + "epoch": 0.58, + "grad_norm": 1.161787970066977, + "learning_rate": 7.892656943239172e-06, + "loss": 0.4289, "step": 12638 }, { - "epoch": 0.73, - "grad_norm": 0.3127138752691672, - "learning_rate": 3.68190563501989e-06, - "loss": 0.1789, + "epoch": 0.58, + "grad_norm": 1.1998857445695863, + "learning_rate": 7.891202460988158e-06, + "loss": 0.6669, "step": 12639 }, { - "epoch": 0.73, - "grad_norm": 0.47588630014745503, - "learning_rate": 3.6804633068621388e-06, - "loss": 0.3032, + "epoch": 0.58, + "grad_norm": 0.26249887024212365, + "learning_rate": 7.889748025423882e-06, + "loss": 0.2055, "step": 12640 }, { - "epoch": 0.73, - "grad_norm": 0.34987437967800145, - "learning_rate": 3.6790211975496714e-06, - "loss": 0.3093, + "epoch": 0.58, + "grad_norm": 0.276589762392002, + "learning_rate": 7.88829363657856e-06, + "loss": 0.2091, "step": 12641 }, { - "epoch": 0.73, - "grad_norm": 0.3070022736126839, - "learning_rate": 3.6775793071324283e-06, - "loss": 0.2453, + "epoch": 0.58, + "grad_norm": 1.5612438129931288, + "learning_rate": 7.886839294484378e-06, + "loss": 0.8209, "step": 12642 }, { - "epoch": 0.73, - "grad_norm": 0.7542534218968645, - "learning_rate": 3.6761376356603385e-06, - "loss": 0.3064, + "epoch": 0.58, + "grad_norm": 0.3407378826065463, + "learning_rate": 7.885384999173536e-06, + "loss": 0.2279, "step": 12643 }, { - "epoch": 0.73, - "grad_norm": 0.39024087041958594, - "learning_rate": 3.674696183183334e-06, - "loss": 0.225, + "epoch": 0.58, + "grad_norm": 0.3801962232979422, + "learning_rate": 7.883930750678234e-06, + "loss": 0.3204, "step": 12644 }, { - "epoch": 0.73, - "grad_norm": 0.2456900281354834, - "learning_rate": 3.6732549497513292e-06, - "loss": 0.2106, + "epoch": 0.58, + "grad_norm": 1.2450762092730006, + "learning_rate": 7.88247654903066e-06, + "loss": 0.7545, "step": 12645 }, { - "epoch": 0.73, - "grad_norm": 0.34175475930678456, - "learning_rate": 3.6718139354142326e-06, - "loss": 0.2592, + "epoch": 0.58, + "grad_norm": 0.34530719906204305, + "learning_rate": 7.88102239426302e-06, + "loss": 0.1896, "step": 12646 }, { - "epoch": 0.73, - "grad_norm": 0.8023321846699596, - "learning_rate": 3.670373140221947e-06, - "loss": 0.452, + "epoch": 0.58, + "grad_norm": 0.3443003768136979, + "learning_rate": 7.8795682864075e-06, + "loss": 0.2042, "step": 12647 }, { - "epoch": 0.73, - "grad_norm": 0.3495489226786334, - "learning_rate": 3.6689325642243643e-06, - "loss": 0.2793, + "epoch": 0.58, + "grad_norm": 0.3722375167670876, + "learning_rate": 7.878114225496296e-06, + "loss": 0.3331, "step": 12648 }, { - "epoch": 0.73, - "grad_norm": 0.7430728352602514, - "learning_rate": 3.6674922074713783e-06, - "loss": 0.3452, + "epoch": 0.58, + "grad_norm": 0.3494477264956144, + "learning_rate": 7.876660211561596e-06, + "loss": 0.2944, "step": 12649 }, { - "epoch": 0.73, - "grad_norm": 0.3347746450775112, - "learning_rate": 3.6660520700128642e-06, - "loss": 0.3015, + "epoch": 0.58, + "grad_norm": 0.8727556846387707, + "learning_rate": 7.875206244635594e-06, + "loss": 0.3558, "step": 12650 }, { - "epoch": 0.73, - "grad_norm": 0.24771906724986534, - "learning_rate": 3.6646121518986954e-06, - "loss": 0.2161, + "epoch": 0.58, + "grad_norm": 0.5443354494664977, + "learning_rate": 7.873752324750476e-06, + "loss": 0.4219, "step": 12651 }, { - "epoch": 0.73, - "grad_norm": 0.3430615488225278, - "learning_rate": 3.6631724531787314e-06, - "loss": 0.113, + "epoch": 0.58, + "grad_norm": 0.5220989721043423, + "learning_rate": 7.872298451938434e-06, + "loss": 0.2709, "step": 12652 }, { - "epoch": 0.73, - "grad_norm": 0.38006058780149277, - "learning_rate": 3.6617329739028373e-06, - "loss": 0.2922, + "epoch": 0.58, + "grad_norm": 0.2557293559042787, + "learning_rate": 7.870844626231652e-06, + "loss": 0.1589, "step": 12653 }, { - "epoch": 0.73, - "grad_norm": 0.3295860270105801, - "learning_rate": 3.660293714120856e-06, - "loss": 0.2493, + "epoch": 0.58, + "grad_norm": 0.6468448425487455, + "learning_rate": 7.869390847662319e-06, + "loss": 0.4049, "step": 12654 }, { - "epoch": 0.73, - "grad_norm": 0.8069415336157884, - "learning_rate": 3.6588546738826325e-06, - "loss": 0.4577, + "epoch": 0.58, + "grad_norm": 0.5540984556558914, + "learning_rate": 7.86793711626262e-06, + "loss": 0.3679, "step": 12655 }, { - "epoch": 0.73, - "grad_norm": 0.44498482845603343, - "learning_rate": 3.6574158532379944e-06, - "loss": 0.1469, + "epoch": 0.58, + "grad_norm": 0.28212708911982204, + "learning_rate": 7.866483432064737e-06, + "loss": 0.236, "step": 12656 }, { - "epoch": 0.73, - "grad_norm": 0.40957331326092905, - "learning_rate": 3.6559772522367765e-06, - "loss": 0.3075, + "epoch": 0.58, + "grad_norm": 1.250844795160495, + "learning_rate": 7.865029795100857e-06, + "loss": 0.679, "step": 12657 }, { - "epoch": 0.73, - "grad_norm": 0.24643548781053554, - "learning_rate": 3.6545388709287933e-06, - "loss": 0.2624, + "epoch": 0.58, + "grad_norm": 0.4539509024420482, + "learning_rate": 7.863576205403153e-06, + "loss": 0.2975, "step": 12658 }, { - "epoch": 0.73, - "grad_norm": 0.7845058080505076, - "learning_rate": 3.653100709363856e-06, - "loss": 0.3092, + "epoch": 0.58, + "grad_norm": 0.2533246325859676, + "learning_rate": 7.862122663003819e-06, + "loss": 0.1633, "step": 12659 }, { - "epoch": 0.73, - "grad_norm": 0.38306157889681686, - "learning_rate": 3.651662767591768e-06, - "loss": 0.2797, + "epoch": 0.58, + "grad_norm": 0.5112533225780336, + "learning_rate": 7.860669167935028e-06, + "loss": 0.3594, "step": 12660 }, { - "epoch": 0.73, - "grad_norm": 0.39024767404061234, - "learning_rate": 3.650225045662322e-06, - "loss": 0.3401, + "epoch": 0.58, + "grad_norm": 0.457637034124689, + "learning_rate": 7.85921572022896e-06, + "loss": 0.2962, "step": 12661 }, { - "epoch": 0.73, - "grad_norm": 0.46207148468081677, - "learning_rate": 3.6487875436253173e-06, - "loss": 0.2373, + "epoch": 0.58, + "grad_norm": 0.5726592026191093, + "learning_rate": 7.857762319917787e-06, + "loss": 0.3787, "step": 12662 }, { - "epoch": 0.73, - "grad_norm": 0.3593077845975231, - "learning_rate": 3.6473502615305233e-06, - "loss": 0.2678, + "epoch": 0.58, + "grad_norm": 0.44461902655318186, + "learning_rate": 7.856308967033697e-06, + "loss": 0.2465, "step": 12663 }, { - "epoch": 0.73, - "grad_norm": 0.3862979083597893, - "learning_rate": 3.645913199427713e-06, - "loss": 0.2191, + "epoch": 0.58, + "grad_norm": 0.3370231121170109, + "learning_rate": 7.854855661608858e-06, + "loss": 0.2521, "step": 12664 }, { - "epoch": 0.73, - "grad_norm": 0.34427342537308164, - "learning_rate": 3.6444763573666586e-06, - "loss": 0.2569, + "epoch": 0.58, + "grad_norm": 0.6361696779083015, + "learning_rate": 7.853402403675449e-06, + "loss": 0.3577, "step": 12665 }, { - "epoch": 0.73, - "grad_norm": 0.3287239171784687, - "learning_rate": 3.643039735397115e-06, - "loss": 0.2551, + "epoch": 0.58, + "grad_norm": 0.39936835506328316, + "learning_rate": 7.85194919326564e-06, + "loss": 0.2058, "step": 12666 }, { - "epoch": 0.73, - "grad_norm": 1.2073091689438713, - "learning_rate": 3.6416033335688306e-06, - "loss": 0.8047, + "epoch": 0.58, + "grad_norm": 0.42491252578065297, + "learning_rate": 7.850496030411608e-06, + "loss": 0.2971, "step": 12667 }, { - "epoch": 0.73, - "grad_norm": 1.14833529116643, - "learning_rate": 3.640167151931547e-06, - "loss": 0.4932, + "epoch": 0.58, + "grad_norm": 0.3483353506919157, + "learning_rate": 7.84904291514552e-06, + "loss": 0.2917, "step": 12668 }, { - "epoch": 0.73, - "grad_norm": 0.2683170080436344, - "learning_rate": 3.6387311905350053e-06, - "loss": 0.2103, + "epoch": 0.58, + "grad_norm": 0.8263498947622043, + "learning_rate": 7.84758984749955e-06, + "loss": 0.4065, "step": 12669 }, { - "epoch": 0.73, - "grad_norm": 0.27189369694643967, - "learning_rate": 3.637295449428928e-06, - "loss": 0.2041, + "epoch": 0.58, + "grad_norm": 0.6099462694633945, + "learning_rate": 7.846136827505866e-06, + "loss": 0.3497, "step": 12670 }, { - "epoch": 0.73, - "grad_norm": 0.5783715207547905, - "learning_rate": 3.6358599286630367e-06, - "loss": 0.3914, + "epoch": 0.58, + "grad_norm": 0.39457532516270394, + "learning_rate": 7.844683855196637e-06, + "loss": 0.3152, "step": 12671 }, { - "epoch": 0.73, - "grad_norm": 0.2876284884270302, - "learning_rate": 3.634424628287041e-06, - "loss": 0.191, + "epoch": 0.58, + "grad_norm": 0.2796453804615942, + "learning_rate": 7.843230930604028e-06, + "loss": 0.2327, "step": 12672 }, { - "epoch": 0.73, - "grad_norm": 0.47055640385816155, - "learning_rate": 3.632989548350645e-06, - "loss": 0.3705, + "epoch": 0.58, + "grad_norm": 0.3201605047618988, + "learning_rate": 7.841778053760212e-06, + "loss": 0.1372, "step": 12673 }, { - "epoch": 0.73, - "grad_norm": 0.526309013570079, - "learning_rate": 3.631554688903549e-06, - "loss": 0.3535, + "epoch": 0.58, + "grad_norm": 0.4163178647357595, + "learning_rate": 7.840325224697348e-06, + "loss": 0.3281, "step": 12674 }, { - "epoch": 0.73, - "grad_norm": 0.3751698398541402, - "learning_rate": 3.6301200499954416e-06, - "loss": 0.1871, + "epoch": 0.58, + "grad_norm": 0.522060078342285, + "learning_rate": 7.838872443447596e-06, + "loss": 0.3445, "step": 12675 }, { - "epoch": 0.73, - "grad_norm": 0.3542442085533244, - "learning_rate": 3.6286856316760023e-06, - "loss": 0.2631, + "epoch": 0.58, + "grad_norm": 0.3910218572249062, + "learning_rate": 7.837419710043131e-06, + "loss": 0.1934, "step": 12676 }, { - "epoch": 0.73, - "grad_norm": 0.273470471007027, - "learning_rate": 3.6272514339949015e-06, - "loss": 0.2747, + "epoch": 0.58, + "grad_norm": 0.45377159324605604, + "learning_rate": 7.835967024516107e-06, + "loss": 0.3159, "step": 12677 }, { - "epoch": 0.73, - "grad_norm": 0.5734636947485215, - "learning_rate": 3.6258174570018133e-06, - "loss": 0.3421, + "epoch": 0.58, + "grad_norm": 0.7614369084645728, + "learning_rate": 7.834514386898693e-06, + "loss": 0.4169, "step": 12678 }, { - "epoch": 0.73, - "grad_norm": 0.3687365959759761, - "learning_rate": 3.6243837007463933e-06, - "loss": 0.262, + "epoch": 0.58, + "grad_norm": 0.1921810064205715, + "learning_rate": 7.833061797223035e-06, + "loss": 0.1252, "step": 12679 }, { - "epoch": 0.73, - "grad_norm": 1.2296519963082793, - "learning_rate": 3.6229501652782904e-06, - "loss": 0.5555, + "epoch": 0.58, + "grad_norm": 0.33386065259883574, + "learning_rate": 7.831609255521305e-06, + "loss": 0.3009, "step": 12680 }, { - "epoch": 0.73, - "grad_norm": 0.31969153444544296, - "learning_rate": 3.6215168506471466e-06, - "loss": 0.2874, + "epoch": 0.58, + "grad_norm": 1.2262781943184184, + "learning_rate": 7.830156761825656e-06, + "loss": 0.6402, "step": 12681 }, { - "epoch": 0.73, - "grad_norm": 0.4076483909163558, - "learning_rate": 3.6200837569026036e-06, - "loss": 0.2688, + "epoch": 0.58, + "grad_norm": 0.38619140964693904, + "learning_rate": 7.828704316168245e-06, + "loss": 0.2211, "step": 12682 }, { - "epoch": 0.73, - "grad_norm": 0.41300092423729834, - "learning_rate": 3.618650884094285e-06, - "loss": 0.2668, + "epoch": 0.58, + "grad_norm": 0.5245862977006677, + "learning_rate": 7.827251918581225e-06, + "loss": 0.3574, "step": 12683 }, { - "epoch": 0.73, - "grad_norm": 0.3944044972280825, - "learning_rate": 3.617218232271812e-06, - "loss": 0.2958, + "epoch": 0.58, + "grad_norm": 0.3830797185728154, + "learning_rate": 7.825799569096758e-06, + "loss": 0.3257, "step": 12684 }, { - "epoch": 0.73, - "grad_norm": 0.25016697584075415, - "learning_rate": 3.615785801484797e-06, - "loss": 0.1869, + "epoch": 0.58, + "grad_norm": 0.28003312299068583, + "learning_rate": 7.82434726774699e-06, + "loss": 0.1564, "step": 12685 }, { - "epoch": 0.73, - "grad_norm": 0.4950885923845611, - "learning_rate": 3.6143535917828422e-06, - "loss": 0.3181, + "epoch": 0.58, + "grad_norm": 0.5898295070891924, + "learning_rate": 7.822895014564078e-06, + "loss": 0.4206, "step": 12686 }, { - "epoch": 0.73, - "grad_norm": 0.369686886187919, - "learning_rate": 3.612921603215551e-06, - "loss": 0.263, + "epoch": 0.58, + "grad_norm": 0.2960106671095541, + "learning_rate": 7.821442809580172e-06, + "loss": 0.2642, "step": 12687 }, { - "epoch": 0.73, - "grad_norm": 0.5134965413780258, - "learning_rate": 3.6114898358325103e-06, - "loss": 0.2762, + "epoch": 0.58, + "grad_norm": 0.8707744762969479, + "learning_rate": 7.81999065282742e-06, + "loss": 0.3994, "step": 12688 }, { - "epoch": 0.73, - "grad_norm": 0.3598094168898819, - "learning_rate": 3.6100582896833012e-06, - "loss": 0.3195, + "epoch": 0.58, + "grad_norm": 0.34795852024580404, + "learning_rate": 7.818538544337975e-06, + "loss": 0.2397, "step": 12689 }, { - "epoch": 0.73, - "grad_norm": 0.5903895067372618, - "learning_rate": 3.6086269648174965e-06, - "loss": 0.3883, + "epoch": 0.58, + "grad_norm": 0.8652227310579472, + "learning_rate": 7.817086484143987e-06, + "loss": 0.4585, "step": 12690 }, { - "epoch": 0.73, - "grad_norm": 0.285502437384372, - "learning_rate": 3.607195861284668e-06, - "loss": 0.2375, + "epoch": 0.58, + "grad_norm": 0.4319232767910534, + "learning_rate": 7.815634472277597e-06, + "loss": 0.3185, "step": 12691 }, { - "epoch": 0.73, - "grad_norm": 0.29724734450311907, - "learning_rate": 3.605764979134372e-06, - "loss": 0.1829, + "epoch": 0.58, + "grad_norm": 0.2768057891485402, + "learning_rate": 7.814182508770953e-06, + "loss": 0.2307, "step": 12692 }, { - "epoch": 0.73, - "grad_norm": 0.38014274107318985, - "learning_rate": 3.6043343184161593e-06, - "loss": 0.2531, + "epoch": 0.58, + "grad_norm": 0.28279437703032895, + "learning_rate": 7.812730593656203e-06, + "loss": 0.1893, "step": 12693 }, { - "epoch": 0.73, - "grad_norm": 0.3741144539894074, - "learning_rate": 3.602903879179571e-06, - "loss": 0.3121, + "epoch": 0.58, + "grad_norm": 0.7098006772698936, + "learning_rate": 7.811278726965492e-06, + "loss": 0.3637, "step": 12694 }, { - "epoch": 0.73, - "grad_norm": 0.9836418738839882, - "learning_rate": 3.601473661474154e-06, - "loss": 0.3773, + "epoch": 0.58, + "grad_norm": 0.3179244428979831, + "learning_rate": 7.80982690873096e-06, + "loss": 0.2473, "step": 12695 }, { - "epoch": 0.73, - "grad_norm": 0.5388543502057055, - "learning_rate": 3.600043665349424e-06, - "loss": 0.33, + "epoch": 0.58, + "grad_norm": 0.5316893464638233, + "learning_rate": 7.808375138984747e-06, + "loss": 0.3898, "step": 12696 }, { - "epoch": 0.73, - "grad_norm": 0.2299485082398649, - "learning_rate": 3.5986138908549073e-06, - "loss": 0.2319, + "epoch": 0.58, + "grad_norm": 0.8630942169624378, + "learning_rate": 7.806923417758999e-06, + "loss": 0.569, "step": 12697 }, { - "epoch": 0.73, - "grad_norm": 0.4261332556361552, - "learning_rate": 3.597184338040114e-06, - "loss": 0.166, + "epoch": 0.58, + "grad_norm": 0.26791479259116613, + "learning_rate": 7.805471745085851e-06, + "loss": 0.1892, "step": 12698 }, { - "epoch": 0.73, - "grad_norm": 0.4785686820285072, - "learning_rate": 3.595755006954553e-06, - "loss": 0.3013, + "epoch": 0.58, + "grad_norm": 0.34395399660954734, + "learning_rate": 7.804020120997443e-06, + "loss": 0.2629, "step": 12699 }, { - "epoch": 0.73, - "grad_norm": 0.40973404733494684, - "learning_rate": 3.5943258976477226e-06, - "loss": 0.2931, + "epoch": 0.58, + "grad_norm": 0.38971885476184925, + "learning_rate": 7.802568545525913e-06, + "loss": 0.2776, "step": 12700 }, { - "epoch": 0.73, - "grad_norm": 0.32627133475863995, - "learning_rate": 3.5928970101691096e-06, - "loss": 0.2481, + "epoch": 0.58, + "grad_norm": 0.4322676376663041, + "learning_rate": 7.801117018703398e-06, + "loss": 0.3044, "step": 12701 }, { - "epoch": 0.73, - "grad_norm": 0.5170098162809887, - "learning_rate": 3.5914683445681954e-06, - "loss": 0.3466, + "epoch": 0.58, + "grad_norm": 0.8293628503423306, + "learning_rate": 7.799665540562034e-06, + "loss": 0.3316, "step": 12702 }, { - "epoch": 0.73, - "grad_norm": 0.287299691816571, - "learning_rate": 3.59003990089446e-06, - "loss": 0.2333, + "epoch": 0.58, + "grad_norm": 0.33691500869657975, + "learning_rate": 7.798214111133954e-06, + "loss": 0.2716, "step": 12703 }, { - "epoch": 0.73, - "grad_norm": 0.3970243877751687, - "learning_rate": 3.588611679197366e-06, - "loss": 0.2657, + "epoch": 0.58, + "grad_norm": 0.41667108627821636, + "learning_rate": 7.796762730451292e-06, + "loss": 0.3238, "step": 12704 }, { - "epoch": 0.73, - "grad_norm": 0.2731068096527432, - "learning_rate": 3.587183679526375e-06, - "loss": 0.2241, + "epoch": 0.58, + "grad_norm": 0.47536467680254574, + "learning_rate": 7.795311398546174e-06, + "loss": 0.2352, "step": 12705 }, { - "epoch": 0.73, - "grad_norm": 0.5270425973716926, - "learning_rate": 3.585755901930934e-06, - "loss": 0.3451, + "epoch": 0.58, + "grad_norm": 0.4091655862535293, + "learning_rate": 7.793860115450744e-06, + "loss": 0.221, "step": 12706 }, { - "epoch": 0.73, - "grad_norm": 1.2638374003320183, - "learning_rate": 3.5843283464604927e-06, - "loss": 0.7912, + "epoch": 0.58, + "grad_norm": 0.3843159760743542, + "learning_rate": 7.792408881197122e-06, + "loss": 0.2866, "step": 12707 }, { - "epoch": 0.73, - "grad_norm": 0.4955313922411486, - "learning_rate": 3.582901013164486e-06, - "loss": 0.1773, + "epoch": 0.58, + "grad_norm": 0.33659983901033974, + "learning_rate": 7.79095769581744e-06, + "loss": 0.2308, "step": 12708 }, { - "epoch": 0.73, - "grad_norm": 0.25806863051136353, - "learning_rate": 3.5814739020923405e-06, - "loss": 0.2594, + "epoch": 0.58, + "grad_norm": 0.9919849593589294, + "learning_rate": 7.789506559343821e-06, + "loss": 0.5709, "step": 12709 }, { - "epoch": 0.73, - "grad_norm": 0.2920727776909075, - "learning_rate": 3.5800470132934785e-06, - "loss": 0.2262, + "epoch": 0.58, + "grad_norm": 0.38730906487439765, + "learning_rate": 7.788055471808401e-06, + "loss": 0.2979, "step": 12710 }, { - "epoch": 0.73, - "grad_norm": 0.7188461489974537, - "learning_rate": 3.5786203468173087e-06, - "loss": 0.2492, + "epoch": 0.58, + "grad_norm": 0.34654807857656456, + "learning_rate": 7.7866044332433e-06, + "loss": 0.2979, "step": 12711 }, { - "epoch": 0.73, - "grad_norm": 0.3632276005566369, - "learning_rate": 3.5771939027132428e-06, - "loss": 0.2854, + "epoch": 0.58, + "grad_norm": 0.3990331716414633, + "learning_rate": 7.785153443680646e-06, + "loss": 0.172, "step": 12712 }, { - "epoch": 0.73, - "grad_norm": 0.3989870298151256, - "learning_rate": 3.5757676810306775e-06, - "loss": 0.3184, + "epoch": 0.58, + "grad_norm": 0.3129950457641804, + "learning_rate": 7.783702503152557e-06, + "loss": 0.251, "step": 12713 }, { - "epoch": 0.73, - "grad_norm": 0.3194280116657528, - "learning_rate": 3.5743416818189993e-06, - "loss": 0.1514, + "epoch": 0.58, + "grad_norm": 1.739392246224489, + "learning_rate": 7.78225161169116e-06, + "loss": 0.7767, "step": 12714 }, { - "epoch": 0.73, - "grad_norm": 0.353739585397453, - "learning_rate": 3.5729159051275895e-06, - "loss": 0.2931, + "epoch": 0.58, + "grad_norm": 0.2967349941365548, + "learning_rate": 7.780800769328574e-06, + "loss": 0.2143, "step": 12715 }, { - "epoch": 0.73, - "grad_norm": 0.6406462606108349, - "learning_rate": 3.5714903510058296e-06, - "loss": 0.3994, + "epoch": 0.58, + "grad_norm": 0.3733989155471841, + "learning_rate": 7.77934997609692e-06, + "loss": 0.3081, "step": 12716 }, { - "epoch": 0.73, - "grad_norm": 0.2038712219236113, - "learning_rate": 3.570065019503082e-06, - "loss": 0.1946, + "epoch": 0.58, + "grad_norm": 0.7559852686228888, + "learning_rate": 7.777899232028319e-06, + "loss": 0.4853, "step": 12717 }, { - "epoch": 0.73, - "grad_norm": 0.3350444004903638, - "learning_rate": 3.5686399106687064e-06, - "loss": 0.2076, + "epoch": 0.58, + "grad_norm": 0.2637166826170659, + "learning_rate": 7.776448537154883e-06, + "loss": 0.1651, "step": 12718 }, { - "epoch": 0.73, - "grad_norm": 1.097246714693747, - "learning_rate": 3.567215024552051e-06, - "loss": 0.6239, + "epoch": 0.58, + "grad_norm": 0.3934348397547351, + "learning_rate": 7.774997891508737e-06, + "loss": 0.2546, "step": 12719 }, { - "epoch": 0.73, - "grad_norm": 0.46965475632734666, - "learning_rate": 3.5657903612024658e-06, - "loss": 0.3749, + "epoch": 0.58, + "grad_norm": 0.37041073652678824, + "learning_rate": 7.773547295121994e-06, + "loss": 0.2788, "step": 12720 }, { - "epoch": 0.73, - "grad_norm": 0.263875280635042, - "learning_rate": 3.5643659206692837e-06, - "loss": 0.2184, + "epoch": 0.58, + "grad_norm": 0.44447437946952, + "learning_rate": 7.772096748026768e-06, + "loss": 0.1916, "step": 12721 }, { - "epoch": 0.73, - "grad_norm": 0.5866450994990379, - "learning_rate": 3.562941703001832e-06, - "loss": 0.3771, + "epoch": 0.58, + "grad_norm": 0.46225300955705867, + "learning_rate": 7.770646250255167e-06, + "loss": 0.3167, "step": 12722 }, { - "epoch": 0.73, - "grad_norm": 0.19861196590825053, - "learning_rate": 3.5615177082494334e-06, - "loss": 0.1384, + "epoch": 0.58, + "grad_norm": 0.3786172634701655, + "learning_rate": 7.769195801839313e-06, + "loss": 0.3065, "step": 12723 }, { - "epoch": 0.73, - "grad_norm": 0.3572080133883536, - "learning_rate": 3.5600939364613963e-06, - "loss": 0.217, + "epoch": 0.58, + "grad_norm": 0.806230981507373, + "learning_rate": 7.767745402811316e-06, + "loss": 0.4504, "step": 12724 }, { - "epoch": 0.73, - "grad_norm": 0.41415889915049825, - "learning_rate": 3.5586703876870333e-06, - "loss": 0.3052, + "epoch": 0.58, + "grad_norm": 0.240571146952024, + "learning_rate": 7.766295053203285e-06, + "loss": 0.1593, "step": 12725 }, { - "epoch": 0.73, - "grad_norm": 0.5154925408780823, - "learning_rate": 3.557247061975636e-06, - "loss": 0.3153, + "epoch": 0.58, + "grad_norm": 0.49002974751190337, + "learning_rate": 7.764844753047321e-06, + "loss": 0.2816, "step": 12726 }, { - "epoch": 0.73, - "grad_norm": 0.34276841548401615, - "learning_rate": 3.5558239593764978e-06, - "loss": 0.2438, + "epoch": 0.58, + "grad_norm": 0.3750389632967279, + "learning_rate": 7.763394502375547e-06, + "loss": 0.3069, "step": 12727 }, { - "epoch": 0.73, - "grad_norm": 0.42795797835213084, - "learning_rate": 3.554401079938894e-06, - "loss": 0.329, + "epoch": 0.58, + "grad_norm": 0.3287739378802034, + "learning_rate": 7.76194430122006e-06, + "loss": 0.2244, "step": 12728 }, { - "epoch": 0.73, - "grad_norm": 0.234860424746158, - "learning_rate": 3.552978423712111e-06, - "loss": 0.1943, + "epoch": 0.58, + "grad_norm": 0.6701579747563696, + "learning_rate": 7.760494149612971e-06, + "loss": 0.4415, "step": 12729 }, { - "epoch": 0.73, - "grad_norm": 0.36464014173959514, - "learning_rate": 3.5515559907454045e-06, - "loss": 0.2748, + "epoch": 0.58, + "grad_norm": 0.44360422701781166, + "learning_rate": 7.759044047586382e-06, + "loss": 0.243, "step": 12730 }, { - "epoch": 0.73, - "grad_norm": 0.9060753945623665, - "learning_rate": 3.550133781088033e-06, - "loss": 0.5051, + "epoch": 0.58, + "grad_norm": 0.2824708068320173, + "learning_rate": 7.757593995172399e-06, + "loss": 0.2213, "step": 12731 }, { - "epoch": 0.73, - "grad_norm": 0.3529980523946801, - "learning_rate": 3.5487117947892558e-06, - "loss": 0.2867, + "epoch": 0.58, + "grad_norm": 0.4048384631868413, + "learning_rate": 7.756143992403123e-06, + "loss": 0.1737, "step": 12732 }, { - "epoch": 0.73, - "grad_norm": 0.3674842048475259, - "learning_rate": 3.5472900318983105e-06, - "loss": 0.2548, + "epoch": 0.58, + "grad_norm": 0.840620303570009, + "learning_rate": 7.754694039310658e-06, + "loss": 0.4689, "step": 12733 }, { - "epoch": 0.73, - "grad_norm": 0.8401740489172301, - "learning_rate": 3.545868492464435e-06, - "loss": 0.3047, + "epoch": 0.58, + "grad_norm": 0.3689585276893205, + "learning_rate": 7.7532441359271e-06, + "loss": 0.2329, "step": 12734 }, { - "epoch": 0.73, - "grad_norm": 0.27141746267388867, - "learning_rate": 3.544447176536855e-06, - "loss": 0.1553, + "epoch": 0.59, + "grad_norm": 0.4099765586685885, + "learning_rate": 7.75179428228455e-06, + "loss": 0.342, "step": 12735 }, { - "epoch": 0.73, - "grad_norm": 0.32057223006156993, - "learning_rate": 3.543026084164789e-06, - "loss": 0.2634, + "epoch": 0.59, + "grad_norm": 1.02779099374064, + "learning_rate": 7.750344478415113e-06, + "loss": 0.5829, "step": 12736 }, { - "epoch": 0.73, - "grad_norm": 0.3573243078189275, - "learning_rate": 3.5416052153974546e-06, - "loss": 0.2645, + "epoch": 0.59, + "grad_norm": 0.35310801221176674, + "learning_rate": 7.748894724350879e-06, + "loss": 0.2561, "step": 12737 }, { - "epoch": 0.73, - "grad_norm": 0.6295162887590943, - "learning_rate": 3.5401845702840543e-06, - "loss": 0.4241, + "epoch": 0.59, + "grad_norm": 0.2230389078281909, + "learning_rate": 7.747445020123945e-06, + "loss": 0.0902, "step": 12738 }, { - "epoch": 0.73, - "grad_norm": 0.38271116549580053, - "learning_rate": 3.5387641488737855e-06, - "loss": 0.2994, + "epoch": 0.59, + "grad_norm": 0.3783188802839845, + "learning_rate": 7.745995365766404e-06, + "loss": 0.3079, "step": 12739 }, { - "epoch": 0.73, - "grad_norm": 0.44089101642573486, - "learning_rate": 3.5373439512158315e-06, - "loss": 0.2251, + "epoch": 0.59, + "grad_norm": 0.444669157569832, + "learning_rate": 7.744545761310358e-06, + "loss": 0.2916, "step": 12740 }, { - "epoch": 0.73, - "grad_norm": 0.2504442391142626, - "learning_rate": 3.5359239773593833e-06, - "loss": 0.2017, + "epoch": 0.59, + "grad_norm": 0.5806950974618498, + "learning_rate": 7.743096206787894e-06, + "loss": 0.313, "step": 12741 }, { - "epoch": 0.73, - "grad_norm": 0.3315826635141327, - "learning_rate": 3.534504227353609e-06, - "loss": 0.2718, + "epoch": 0.59, + "grad_norm": 0.5175243670311491, + "learning_rate": 7.741646702231106e-06, + "loss": 0.3229, "step": 12742 }, { - "epoch": 0.73, - "grad_norm": 1.0141255217066432, - "learning_rate": 3.5330847012476754e-06, - "loss": 0.6677, + "epoch": 0.59, + "grad_norm": 0.35181954183854286, + "learning_rate": 7.74019724767208e-06, + "loss": 0.2699, "step": 12743 }, { - "epoch": 0.73, - "grad_norm": 0.2908904563347626, - "learning_rate": 3.5316653990907367e-06, - "loss": 0.2313, + "epoch": 0.59, + "grad_norm": 0.2701476654552088, + "learning_rate": 7.738747843142912e-06, + "loss": 0.1537, "step": 12744 }, { - "epoch": 0.73, - "grad_norm": 0.35460691259874044, - "learning_rate": 3.5302463209319514e-06, - "loss": 0.317, + "epoch": 0.59, + "grad_norm": 0.6194240777205317, + "learning_rate": 7.737298488675687e-06, + "loss": 0.3751, "step": 12745 }, { - "epoch": 0.73, - "grad_norm": 0.6951267542723861, - "learning_rate": 3.5288274668204568e-06, - "loss": 0.4621, + "epoch": 0.59, + "grad_norm": 0.3610492826118389, + "learning_rate": 7.735849184302493e-06, + "loss": 0.2717, "step": 12746 }, { - "epoch": 0.73, - "grad_norm": 0.15127146330290486, - "learning_rate": 3.527408836805389e-06, - "loss": 0.0716, + "epoch": 0.59, + "grad_norm": 0.34197919673688043, + "learning_rate": 7.734399930055412e-06, + "loss": 0.2726, "step": 12747 }, { - "epoch": 0.73, - "grad_norm": 0.297568465211192, - "learning_rate": 3.525990430935876e-06, - "loss": 0.2372, + "epoch": 0.59, + "grad_norm": 1.5824121641839315, + "learning_rate": 7.732950725966537e-06, + "loss": 0.6288, "step": 12748 }, { - "epoch": 0.73, - "grad_norm": 0.321758839001798, - "learning_rate": 3.524572249261031e-06, - "loss": 0.3338, + "epoch": 0.59, + "grad_norm": 0.37736818535626143, + "learning_rate": 7.731501572067946e-06, + "loss": 0.2863, "step": 12749 }, { - "epoch": 0.73, - "grad_norm": 0.5864810260206559, - "learning_rate": 3.5231542918299753e-06, - "loss": 0.3112, + "epoch": 0.59, + "grad_norm": 0.3283829294932836, + "learning_rate": 7.730052468391726e-06, + "loss": 0.2144, "step": 12750 }, { - "epoch": 0.73, - "grad_norm": 0.2883308570588384, - "learning_rate": 3.5217365586918073e-06, - "loss": 0.2406, + "epoch": 0.59, + "grad_norm": 0.3596897938440602, + "learning_rate": 7.728603414969956e-06, + "loss": 0.28, "step": 12751 }, { - "epoch": 0.73, - "grad_norm": 0.47306256806256525, - "learning_rate": 3.5203190498956242e-06, - "loss": 0.3095, + "epoch": 0.59, + "grad_norm": 0.32879325812867227, + "learning_rate": 7.727154411834712e-06, + "loss": 0.2668, "step": 12752 }, { - "epoch": 0.73, - "grad_norm": 0.49735107729836175, - "learning_rate": 3.518901765490509e-06, - "loss": 0.347, + "epoch": 0.59, + "grad_norm": 1.3049994606863364, + "learning_rate": 7.725705459018084e-06, + "loss": 0.7871, "step": 12753 }, { - "epoch": 0.73, - "grad_norm": 0.17686563294193186, - "learning_rate": 3.517484705525551e-06, - "loss": 0.1394, - "step": 12754 + "epoch": 0.59, + "grad_norm": 0.3290970797401489, + "learning_rate": 7.724256556552145e-06, + "loss": 0.2436, + "step": 12754 }, { - "epoch": 0.73, - "grad_norm": 0.8933579186696481, - "learning_rate": 3.5160678700498197e-06, - "loss": 0.5273, + "epoch": 0.59, + "grad_norm": 0.3563676335902697, + "learning_rate": 7.722807704468973e-06, + "loss": 0.2476, "step": 12755 }, { - "epoch": 0.73, - "grad_norm": 0.3495559430890266, - "learning_rate": 3.5146512591123783e-06, - "loss": 0.3032, + "epoch": 0.59, + "grad_norm": 0.32055420690503067, + "learning_rate": 7.721358902800638e-06, + "loss": 0.2064, "step": 12756 }, { - "epoch": 0.73, - "grad_norm": 0.28892578502445293, - "learning_rate": 3.513234872762282e-06, - "loss": 0.1972, + "epoch": 0.59, + "grad_norm": 0.5539872210323883, + "learning_rate": 7.719910151579225e-06, + "loss": 0.3239, "step": 12757 }, { - "epoch": 0.73, - "grad_norm": 1.0657258271815264, - "learning_rate": 3.511818711048587e-06, - "loss": 0.5283, + "epoch": 0.59, + "grad_norm": 0.40156659972048003, + "learning_rate": 7.718461450836805e-06, + "loss": 0.248, "step": 12758 }, { - "epoch": 0.73, - "grad_norm": 0.2885840837702499, - "learning_rate": 3.5104027740203305e-06, - "loss": 0.1883, + "epoch": 0.59, + "grad_norm": 0.3251181043971125, + "learning_rate": 7.717012800605447e-06, + "loss": 0.2906, "step": 12759 }, { - "epoch": 0.73, - "grad_norm": 0.2869873138895025, - "learning_rate": 3.5089870617265465e-06, - "loss": 0.1858, + "epoch": 0.59, + "grad_norm": 1.0709108370939942, + "learning_rate": 7.715564200917226e-06, + "loss": 0.318, "step": 12760 }, { - "epoch": 0.73, - "grad_norm": 0.352194227970117, - "learning_rate": 3.5075715742162586e-06, - "loss": 0.3088, + "epoch": 0.59, + "grad_norm": 0.4021975417533534, + "learning_rate": 7.714115651804213e-06, + "loss": 0.2663, "step": 12761 }, { - "epoch": 0.73, - "grad_norm": 0.7226569774131096, - "learning_rate": 3.506156311538491e-06, - "loss": 0.4724, + "epoch": 0.59, + "grad_norm": 0.3901642471537993, + "learning_rate": 7.712667153298474e-06, + "loss": 0.2715, "step": 12762 }, { - "epoch": 0.73, - "grad_norm": 0.29774972113115467, - "learning_rate": 3.504741273742254e-06, - "loss": 0.194, + "epoch": 0.59, + "grad_norm": 0.37933918555287227, + "learning_rate": 7.711218705432082e-06, + "loss": 0.2902, "step": 12763 }, { - "epoch": 0.73, - "grad_norm": 0.33947635583751484, - "learning_rate": 3.503326460876545e-06, - "loss": 0.2951, + "epoch": 0.59, + "grad_norm": 0.2368246483065897, + "learning_rate": 7.709770308237102e-06, + "loss": 0.1601, "step": 12764 }, { - "epoch": 0.73, - "grad_norm": 1.1678877911120926, - "learning_rate": 3.5019118729903566e-06, - "loss": 0.4789, + "epoch": 0.59, + "grad_norm": 1.215552913484588, + "learning_rate": 7.708321961745597e-06, + "loss": 0.7275, "step": 12765 }, { - "epoch": 0.73, - "grad_norm": 0.3045109971875889, - "learning_rate": 3.5004975101326854e-06, - "loss": 0.2444, + "epoch": 0.59, + "grad_norm": 1.1616422435047316, + "learning_rate": 7.706873665989638e-06, + "loss": 0.5561, "step": 12766 }, { - "epoch": 0.73, - "grad_norm": 0.5454583862261513, - "learning_rate": 3.4990833723525054e-06, - "loss": 0.2266, + "epoch": 0.59, + "grad_norm": 0.24761467343326626, + "learning_rate": 7.705425421001285e-06, + "loss": 0.2217, "step": 12767 }, { - "epoch": 0.73, - "grad_norm": 0.3329777623579333, - "learning_rate": 3.497669459698788e-06, - "loss": 0.3025, + "epoch": 0.59, + "grad_norm": 0.8164870011449455, + "learning_rate": 7.703977226812602e-06, + "loss": 0.5097, "step": 12768 }, { - "epoch": 0.73, - "grad_norm": 0.3145139009440519, - "learning_rate": 3.496255772220495e-06, - "loss": 0.2587, + "epoch": 0.59, + "grad_norm": 0.33185071084642837, + "learning_rate": 7.702529083455646e-06, + "loss": 0.236, "step": 12769 }, { - "epoch": 0.73, - "grad_norm": 0.23984063415861187, - "learning_rate": 3.4948423099665883e-06, - "loss": 0.1022, + "epoch": 0.59, + "grad_norm": 0.35506545002646184, + "learning_rate": 7.701080990962487e-06, + "loss": 0.2058, "step": 12770 }, { - "epoch": 0.73, - "grad_norm": 0.8220955217574739, - "learning_rate": 3.493429072986013e-06, - "loss": 0.402, + "epoch": 0.59, + "grad_norm": 0.4094554319150414, + "learning_rate": 7.699632949365177e-06, + "loss": 0.316, "step": 12771 }, { - "epoch": 0.73, - "grad_norm": 0.2833979664828243, - "learning_rate": 3.492016061327709e-06, - "loss": 0.249, + "epoch": 0.59, + "grad_norm": 0.9157542452112523, + "learning_rate": 7.698184958695781e-06, + "loss": 0.5007, "step": 12772 }, { - "epoch": 0.73, - "grad_norm": 0.4655948518759838, - "learning_rate": 3.490603275040605e-06, - "loss": 0.2649, + "epoch": 0.59, + "grad_norm": 0.3401130446672728, + "learning_rate": 7.696737018986342e-06, + "loss": 0.1985, "step": 12773 }, { - "epoch": 0.73, - "grad_norm": 0.39936457950351845, - "learning_rate": 3.4891907141736324e-06, - "loss": 0.289, + "epoch": 0.59, + "grad_norm": 0.756741130196454, + "learning_rate": 7.695289130268933e-06, + "loss": 0.3878, "step": 12774 }, { - "epoch": 0.73, - "grad_norm": 0.2320835341969602, - "learning_rate": 3.487778378775707e-06, - "loss": 0.1958, + "epoch": 0.59, + "grad_norm": 0.24944867575725807, + "learning_rate": 7.6938412925756e-06, + "loss": 0.2195, "step": 12775 }, { - "epoch": 0.73, - "grad_norm": 0.3366992622728923, - "learning_rate": 3.4863662688957355e-06, - "loss": 0.2385, + "epoch": 0.59, + "grad_norm": 0.6578990555319902, + "learning_rate": 7.692393505938397e-06, + "loss": 0.3611, "step": 12776 }, { - "epoch": 0.73, - "grad_norm": 0.8735030497179362, - "learning_rate": 3.4849543845826195e-06, - "loss": 0.373, + "epoch": 0.59, + "grad_norm": 0.39317868149535645, + "learning_rate": 7.690945770389377e-06, + "loss": 0.2456, "step": 12777 }, { - "epoch": 0.73, - "grad_norm": 0.3820901509586576, - "learning_rate": 3.4835427258852507e-06, - "loss": 0.2542, + "epoch": 0.59, + "grad_norm": 0.3644619708417844, + "learning_rate": 7.689498085960594e-06, + "loss": 0.2963, "step": 12778 }, { - "epoch": 0.73, - "grad_norm": 0.4889596251920278, - "learning_rate": 3.4821312928525197e-06, - "loss": 0.3594, + "epoch": 0.59, + "grad_norm": 0.657989171976488, + "learning_rate": 7.688050452684096e-06, + "loss": 0.3327, "step": 12779 }, { - "epoch": 0.73, - "grad_norm": 0.3395884327853301, - "learning_rate": 3.4807200855333024e-06, - "loss": 0.2753, + "epoch": 0.59, + "grad_norm": 0.45478683551937554, + "learning_rate": 7.686602870591933e-06, + "loss": 0.266, "step": 12780 }, { - "epoch": 0.73, - "grad_norm": 0.41027269658708754, - "learning_rate": 3.479309103976467e-06, - "loss": 0.2878, + "epoch": 0.59, + "grad_norm": 0.5398651142568992, + "learning_rate": 7.685155339716152e-06, + "loss": 0.2379, "step": 12781 }, { - "epoch": 0.73, - "grad_norm": 0.2492148324473185, - "learning_rate": 3.4778983482308746e-06, - "loss": 0.1902, + "epoch": 0.59, + "grad_norm": 0.4093107308539042, + "learning_rate": 7.683707860088801e-06, + "loss": 0.3016, "step": 12782 }, { - "epoch": 0.73, - "grad_norm": 0.7729463052691775, - "learning_rate": 3.4764878183453855e-06, - "loss": 0.2883, + "epoch": 0.59, + "grad_norm": 0.22838055257729695, + "learning_rate": 7.682260431741924e-06, + "loss": 0.201, "step": 12783 }, { - "epoch": 0.73, - "grad_norm": 0.2636022559064381, - "learning_rate": 3.475077514368842e-06, - "loss": 0.2694, + "epoch": 0.59, + "grad_norm": 1.279961711598058, + "learning_rate": 7.68081305470757e-06, + "loss": 0.6938, "step": 12784 }, { - "epoch": 0.73, - "grad_norm": 0.4722767331960266, - "learning_rate": 3.4736674363500846e-06, - "loss": 0.3413, + "epoch": 0.59, + "grad_norm": 0.4013193650335751, + "learning_rate": 7.679365729017779e-06, + "loss": 0.2952, "step": 12785 }, { - "epoch": 0.73, - "grad_norm": 0.9849108722772745, - "learning_rate": 3.472257584337939e-06, - "loss": 0.527, + "epoch": 0.59, + "grad_norm": 0.46222146840440503, + "learning_rate": 7.67791845470459e-06, + "loss": 0.2852, "step": 12786 }, { - "epoch": 0.73, - "grad_norm": 0.2514472326522464, - "learning_rate": 3.470847958381236e-06, - "loss": 0.2084, + "epoch": 0.59, + "grad_norm": 0.3732974169639896, + "learning_rate": 7.676471231800052e-06, + "loss": 0.3603, "step": 12787 }, { - "epoch": 0.73, - "grad_norm": 0.2962379312373279, - "learning_rate": 3.469438558528787e-06, - "loss": 0.2507, + "epoch": 0.59, + "grad_norm": 0.4112471419339967, + "learning_rate": 7.6750240603362e-06, + "loss": 0.3156, "step": 12788 }, { - "epoch": 0.73, - "grad_norm": 0.7197590638637732, - "learning_rate": 3.468029384829401e-06, - "loss": 0.2857, + "epoch": 0.59, + "grad_norm": 0.531298729203499, + "learning_rate": 7.673576940345078e-06, + "loss": 0.3305, "step": 12789 }, { - "epoch": 0.73, - "grad_norm": 0.34947417036129547, - "learning_rate": 3.466620437331876e-06, - "loss": 0.2543, + "epoch": 0.59, + "grad_norm": 0.20477291820338664, + "learning_rate": 7.672129871858715e-06, + "loss": 0.1768, "step": 12790 }, { - "epoch": 0.73, - "grad_norm": 1.3024804970661898, - "learning_rate": 3.4652117160850006e-06, - "loss": 0.5634, + "epoch": 0.59, + "grad_norm": 0.5384016917926084, + "learning_rate": 7.670682854909158e-06, + "loss": 0.3411, "step": 12791 }, { - "epoch": 0.73, - "grad_norm": 0.35195261144446016, - "learning_rate": 3.463803221137566e-06, - "loss": 0.2948, + "epoch": 0.59, + "grad_norm": 0.4069783142140041, + "learning_rate": 7.669235889528436e-06, + "loss": 0.3094, "step": 12792 }, { - "epoch": 0.74, - "grad_norm": 0.3084578488343683, - "learning_rate": 3.462394952538345e-06, - "loss": 0.1994, + "epoch": 0.59, + "grad_norm": 0.9646550019903518, + "learning_rate": 7.667788975748584e-06, + "loss": 0.3837, "step": 12793 }, { - "epoch": 0.74, - "grad_norm": 0.22815859921668444, - "learning_rate": 3.460986910336106e-06, - "loss": 0.1535, + "epoch": 0.59, + "grad_norm": 0.6975876498445063, + "learning_rate": 7.666342113601638e-06, + "loss": 0.3814, "step": 12794 }, { - "epoch": 0.74, - "grad_norm": 0.7811335529012565, - "learning_rate": 3.459579094579605e-06, - "loss": 0.3796, + "epoch": 0.59, + "grad_norm": 0.31653024690046894, + "learning_rate": 7.664895303119625e-06, + "loss": 0.289, "step": 12795 }, { - "epoch": 0.74, - "grad_norm": 0.27942275007513806, - "learning_rate": 3.4581715053176023e-06, - "loss": 0.1953, + "epoch": 0.59, + "grad_norm": 0.3495541646132096, + "learning_rate": 7.663448544334583e-06, + "loss": 0.1983, "step": 12796 }, { - "epoch": 0.74, - "grad_norm": 0.3920486415648585, - "learning_rate": 3.456764142598843e-06, - "loss": 0.3051, + "epoch": 0.59, + "grad_norm": 0.6760886582871586, + "learning_rate": 7.662001837278538e-06, + "loss": 0.3736, "step": 12797 }, { - "epoch": 0.74, - "grad_norm": 1.016852076671105, - "learning_rate": 3.455357006472052e-06, - "loss": 0.657, + "epoch": 0.59, + "grad_norm": 0.3287275872272099, + "learning_rate": 7.660555181983517e-06, + "loss": 0.2721, "step": 12798 }, { - "epoch": 0.74, - "grad_norm": 0.33966130409641193, - "learning_rate": 3.4539500969859706e-06, - "loss": 0.1705, + "epoch": 0.59, + "grad_norm": 0.49386304434525125, + "learning_rate": 7.659108578481547e-06, + "loss": 0.2748, "step": 12799 }, { - "epoch": 0.74, - "grad_norm": 0.22172259172292844, - "learning_rate": 3.4525434141893166e-06, - "loss": 0.2172, + "epoch": 0.59, + "grad_norm": 0.6350300918549228, + "learning_rate": 7.657662026804663e-06, + "loss": 0.4218, "step": 12800 }, { - "epoch": 0.74, - "grad_norm": 0.7058709583737947, - "learning_rate": 3.4511369581308017e-06, - "loss": 0.3947, + "epoch": 0.59, + "grad_norm": 0.29050876339873366, + "learning_rate": 7.656215526984881e-06, + "loss": 0.234, "step": 12801 }, { - "epoch": 0.74, - "grad_norm": 0.3354166197148681, - "learning_rate": 3.449730728859132e-06, - "loss": 0.1666, + "epoch": 0.59, + "grad_norm": 0.585248268041225, + "learning_rate": 7.654769079054229e-06, + "loss": 0.3403, "step": 12802 }, { - "epoch": 0.74, - "grad_norm": 0.5337470482155633, - "learning_rate": 3.4483247264230034e-06, - "loss": 0.3789, + "epoch": 0.59, + "grad_norm": 0.2916575748229832, + "learning_rate": 7.653322683044726e-06, + "loss": 0.1869, "step": 12803 }, { - "epoch": 0.74, - "grad_norm": 0.33592213852392117, - "learning_rate": 3.4469189508711098e-06, - "loss": 0.3046, + "epoch": 0.59, + "grad_norm": 0.4056545917373161, + "learning_rate": 7.6518763389884e-06, + "loss": 0.3014, "step": 12804 }, { - "epoch": 0.74, - "grad_norm": 0.6246678196271466, - "learning_rate": 3.445513402252132e-06, - "loss": 0.3147, + "epoch": 0.59, + "grad_norm": 1.014743588396409, + "learning_rate": 7.65043004691727e-06, + "loss": 0.534, "step": 12805 }, { - "epoch": 0.74, - "grad_norm": 0.31427524782070476, - "learning_rate": 3.444108080614743e-06, - "loss": 0.204, + "epoch": 0.59, + "grad_norm": 0.2913004727458303, + "learning_rate": 7.648983806863353e-06, + "loss": 0.2218, "step": 12806 }, { - "epoch": 0.74, - "grad_norm": 0.48310103519188197, - "learning_rate": 3.4427029860076056e-06, - "loss": 0.2649, + "epoch": 0.59, + "grad_norm": 0.40914832217632163, + "learning_rate": 7.647537618858667e-06, + "loss": 0.3413, "step": 12807 }, { - "epoch": 0.74, - "grad_norm": 0.26865324112297956, - "learning_rate": 3.441298118479386e-06, - "loss": 0.2608, + "epoch": 0.59, + "grad_norm": 0.5957950155155142, + "learning_rate": 7.646091482935232e-06, + "loss": 0.3739, "step": 12808 }, { - "epoch": 0.74, - "grad_norm": 0.5907160387384897, - "learning_rate": 3.4398934780787297e-06, - "loss": 0.2304, + "epoch": 0.59, + "grad_norm": 0.16086236302141288, + "learning_rate": 7.644645399125063e-06, + "loss": 0.0718, "step": 12809 }, { - "epoch": 0.74, - "grad_norm": 1.1524510092382603, - "learning_rate": 3.43848906485428e-06, - "loss": 0.7557, + "epoch": 0.59, + "grad_norm": 0.3365292818234121, + "learning_rate": 7.643199367460176e-06, + "loss": 0.2683, "step": 12810 }, { - "epoch": 0.74, - "grad_norm": 0.38459895702479086, - "learning_rate": 3.4370848788546695e-06, - "loss": 0.2584, + "epoch": 0.59, + "grad_norm": 0.5027099060763884, + "learning_rate": 7.641753387972583e-06, + "loss": 0.3656, "step": 12811 }, { - "epoch": 0.74, - "grad_norm": 0.2760382853690984, - "learning_rate": 3.4356809201285303e-06, - "loss": 0.241, + "epoch": 0.59, + "grad_norm": 0.5960161477661496, + "learning_rate": 7.640307460694294e-06, + "loss": 0.2564, "step": 12812 }, { - "epoch": 0.74, - "grad_norm": 0.32014199369104235, - "learning_rate": 3.4342771887244784e-06, - "loss": 0.2341, + "epoch": 0.59, + "grad_norm": 0.36484368577096477, + "learning_rate": 7.638861585657327e-06, + "loss": 0.2779, "step": 12813 }, { - "epoch": 0.74, - "grad_norm": 0.5749750430673263, - "learning_rate": 3.4328736846911247e-06, - "loss": 0.3045, + "epoch": 0.59, + "grad_norm": 0.36625999974295237, + "learning_rate": 7.637415762893687e-06, + "loss": 0.2886, "step": 12814 }, { - "epoch": 0.74, - "grad_norm": 0.38232768236464837, - "learning_rate": 3.4314704080770744e-06, - "loss": 0.2291, + "epoch": 0.59, + "grad_norm": 0.39660244433373076, + "learning_rate": 7.635969992435387e-06, + "loss": 0.2307, "step": 12815 }, { - "epoch": 0.74, - "grad_norm": 0.3446190128141687, - "learning_rate": 3.4300673589309163e-06, - "loss": 0.3249, + "epoch": 0.59, + "grad_norm": 0.29514312472976933, + "learning_rate": 7.634524274314427e-06, + "loss": 0.1662, "step": 12816 }, { - "epoch": 0.74, - "grad_norm": 0.5823714362679814, - "learning_rate": 3.428664537301247e-06, - "loss": 0.2962, + "epoch": 0.59, + "grad_norm": 1.4393208659335313, + "learning_rate": 7.633078608562825e-06, + "loss": 0.8331, "step": 12817 }, { - "epoch": 0.74, - "grad_norm": 0.3685845728880524, - "learning_rate": 3.4272619432366427e-06, - "loss": 0.3151, + "epoch": 0.59, + "grad_norm": 0.34499480269669247, + "learning_rate": 7.631632995212584e-06, + "loss": 0.307, "step": 12818 }, { - "epoch": 0.74, - "grad_norm": 0.2341879964186778, - "learning_rate": 3.425859576785674e-06, - "loss": 0.1659, + "epoch": 0.59, + "grad_norm": 0.3711622614853309, + "learning_rate": 7.630187434295701e-06, + "loss": 0.255, "step": 12819 }, { - "epoch": 0.74, - "grad_norm": 0.4030323576618036, - "learning_rate": 3.4244574379969032e-06, - "loss": 0.2683, + "epoch": 0.59, + "grad_norm": 0.7951413385460965, + "learning_rate": 7.628741925844183e-06, + "loss": 0.4492, "step": 12820 }, { - "epoch": 0.74, - "grad_norm": 0.4474160431672498, - "learning_rate": 3.4230555269188903e-06, - "loss": 0.2913, + "epoch": 0.59, + "grad_norm": 0.28342384725328973, + "learning_rate": 7.6272964698900356e-06, + "loss": 0.1825, "step": 12821 }, { - "epoch": 0.74, - "grad_norm": 1.0297083176155641, - "learning_rate": 3.4216538436001836e-06, - "loss": 0.5263, + "epoch": 0.59, + "grad_norm": 0.31005228266203044, + "learning_rate": 7.6258510664652585e-06, + "loss": 0.2298, "step": 12822 }, { - "epoch": 0.74, - "grad_norm": 0.38588698705698504, - "learning_rate": 3.4202523880893202e-06, - "loss": 0.2781, + "epoch": 0.59, + "grad_norm": 0.5221430731418525, + "learning_rate": 7.624405715601851e-06, + "loss": 0.3881, "step": 12823 }, { - "epoch": 0.74, - "grad_norm": 0.30214966289449274, - "learning_rate": 3.4188511604348297e-06, - "loss": 0.2844, + "epoch": 0.59, + "grad_norm": 0.6664092076245254, + "learning_rate": 7.6229604173318095e-06, + "loss": 0.3967, "step": 12824 }, { - "epoch": 0.74, - "grad_norm": 0.21761652521172148, - "learning_rate": 3.417450160685245e-06, - "loss": 0.0645, + "epoch": 0.59, + "grad_norm": 0.3746288616707745, + "learning_rate": 7.6215151716871325e-06, + "loss": 0.2918, "step": 12825 }, { - "epoch": 0.74, - "grad_norm": 0.32300800479962954, - "learning_rate": 3.416049388889078e-06, - "loss": 0.1809, + "epoch": 0.59, + "grad_norm": 0.365670511328936, + "learning_rate": 7.620069978699819e-06, + "loss": 0.2783, "step": 12826 }, { - "epoch": 0.74, - "grad_norm": 0.3752187308252932, - "learning_rate": 3.4146488450948367e-06, - "loss": 0.2808, + "epoch": 0.59, + "grad_norm": 0.36337934937584165, + "learning_rate": 7.618624838401863e-06, + "loss": 0.2329, "step": 12827 }, { - "epoch": 0.74, - "grad_norm": 0.3238753458144825, - "learning_rate": 3.413248529351023e-06, - "loss": 0.2652, + "epoch": 0.59, + "grad_norm": 0.2973558764509843, + "learning_rate": 7.617179750825257e-06, + "loss": 0.2247, "step": 12828 }, { - "epoch": 0.74, - "grad_norm": 0.4298149619699671, - "learning_rate": 3.411848441706127e-06, - "loss": 0.2901, + "epoch": 0.59, + "grad_norm": 0.773967598551542, + "learning_rate": 7.615734716001992e-06, + "loss": 0.3495, "step": 12829 }, { - "epoch": 0.74, - "grad_norm": 0.4810941246561356, - "learning_rate": 3.410448582208642e-06, - "loss": 0.3489, + "epoch": 0.59, + "grad_norm": 0.3691076236509977, + "learning_rate": 7.614289733964067e-06, + "loss": 0.2959, "step": 12830 }, { - "epoch": 0.74, - "grad_norm": 0.5252502309396361, - "learning_rate": 3.409048950907037e-06, - "loss": 0.3361, + "epoch": 0.59, + "grad_norm": 0.3332786627056567, + "learning_rate": 7.612844804743466e-06, + "loss": 0.2581, "step": 12831 }, { - "epoch": 0.74, - "grad_norm": 0.1994109208173046, - "learning_rate": 3.4076495478497795e-06, - "loss": 0.1346, + "epoch": 0.59, + "grad_norm": 0.8161709175043685, + "learning_rate": 7.61139992837218e-06, + "loss": 0.3472, "step": 12832 }, { - "epoch": 0.74, - "grad_norm": 0.4101863021105691, - "learning_rate": 3.406250373085337e-06, - "loss": 0.3074, + "epoch": 0.59, + "grad_norm": 0.2651327540304119, + "learning_rate": 7.609955104882194e-06, + "loss": 0.1268, "step": 12833 }, { - "epoch": 0.74, - "grad_norm": 0.6969992266110001, - "learning_rate": 3.4048514266621612e-06, - "loss": 0.4448, + "epoch": 0.59, + "grad_norm": 0.29201991084979567, + "learning_rate": 7.6085103343055024e-06, + "loss": 0.2458, "step": 12834 }, { - "epoch": 0.74, - "grad_norm": 0.4233808236326501, - "learning_rate": 3.403452708628697e-06, - "loss": 0.238, + "epoch": 0.59, + "grad_norm": 0.4137579831347058, + "learning_rate": 7.607065616674088e-06, + "loss": 0.2418, "step": 12835 }, { - "epoch": 0.74, - "grad_norm": 0.29572459303480436, - "learning_rate": 3.4020542190333795e-06, - "loss": 0.2596, + "epoch": 0.59, + "grad_norm": 0.5811358087880002, + "learning_rate": 7.605620952019932e-06, + "loss": 0.3819, "step": 12836 }, { - "epoch": 0.74, - "grad_norm": 0.49815984479914355, - "learning_rate": 3.4006559579246425e-06, - "loss": 0.2174, + "epoch": 0.59, + "grad_norm": 0.3648243408675913, + "learning_rate": 7.6041763403750206e-06, + "loss": 0.3217, "step": 12837 }, { - "epoch": 0.74, - "grad_norm": 0.2761033725607673, - "learning_rate": 3.3992579253509062e-06, - "loss": 0.0935, + "epoch": 0.59, + "grad_norm": 0.38939675541487206, + "learning_rate": 7.602731781771338e-06, + "loss": 0.3153, "step": 12838 }, { - "epoch": 0.74, - "grad_norm": 0.32754610459515865, - "learning_rate": 3.3978601213605842e-06, - "loss": 0.2857, + "epoch": 0.59, + "grad_norm": 0.35250109303940275, + "learning_rate": 7.601287276240862e-06, + "loss": 0.1459, "step": 12839 }, { - "epoch": 0.74, - "grad_norm": 0.4294008648810787, - "learning_rate": 3.3964625460020827e-06, - "loss": 0.311, + "epoch": 0.59, + "grad_norm": 0.2841498491678493, + "learning_rate": 7.599842823815574e-06, + "loss": 0.2142, "step": 12840 }, { - "epoch": 0.74, - "grad_norm": 0.6594774691330908, - "learning_rate": 3.395065199323796e-06, - "loss": 0.3494, + "epoch": 0.59, + "grad_norm": 1.0463220421002493, + "learning_rate": 7.5983984245274535e-06, + "loss": 0.5708, "step": 12841 }, { - "epoch": 0.74, - "grad_norm": 0.32425053227691586, - "learning_rate": 3.393668081374121e-06, - "loss": 0.2053, + "epoch": 0.59, + "grad_norm": 0.31571986377054756, + "learning_rate": 7.596954078408474e-06, + "loss": 0.2545, "step": 12842 }, { - "epoch": 0.74, - "grad_norm": 0.5089151987838674, - "learning_rate": 3.3922711922014352e-06, - "loss": 0.3626, + "epoch": 0.59, + "grad_norm": 0.3822621139113642, + "learning_rate": 7.595509785490618e-06, + "loss": 0.261, "step": 12843 }, { - "epoch": 0.74, - "grad_norm": 0.21503853351438224, - "learning_rate": 3.3908745318541146e-06, - "loss": 0.1865, + "epoch": 0.59, + "grad_norm": 1.1027278261238769, + "learning_rate": 7.5940655458058575e-06, + "loss": 0.6561, "step": 12844 }, { - "epoch": 0.74, - "grad_norm": 0.31100684636950715, - "learning_rate": 3.389478100380521e-06, - "loss": 0.2, + "epoch": 0.59, + "grad_norm": 0.2703387372667672, + "learning_rate": 7.592621359386167e-06, + "loss": 0.1657, "step": 12845 }, { - "epoch": 0.74, - "grad_norm": 0.7679876511139743, - "learning_rate": 3.3880818978290196e-06, - "loss": 0.3937, + "epoch": 0.59, + "grad_norm": 0.41805180238997125, + "learning_rate": 7.591177226263515e-06, + "loss": 0.2625, "step": 12846 }, { - "epoch": 0.74, - "grad_norm": 0.32560271064088603, - "learning_rate": 3.386685924247959e-06, - "loss": 0.2838, + "epoch": 0.59, + "grad_norm": 0.36889009839411935, + "learning_rate": 7.589733146469884e-06, + "loss": 0.2835, "step": 12847 }, { - "epoch": 0.74, - "grad_norm": 0.32516907110621057, - "learning_rate": 3.3852901796856796e-06, - "loss": 0.21, + "epoch": 0.59, + "grad_norm": 0.7746047983416644, + "learning_rate": 7.588289120037236e-06, + "loss": 0.3399, "step": 12848 }, { - "epoch": 0.74, - "grad_norm": 1.2717231476692796, - "learning_rate": 3.3838946641905134e-06, - "loss": 0.4662, + "epoch": 0.59, + "grad_norm": 0.3607688395276889, + "learning_rate": 7.586845146997542e-06, + "loss": 0.2534, "step": 12849 }, { - "epoch": 0.74, - "grad_norm": 0.2335607078138597, - "learning_rate": 3.382499377810794e-06, - "loss": 0.1513, + "epoch": 0.59, + "grad_norm": 0.36356604323136266, + "learning_rate": 7.585401227382767e-06, + "loss": 0.304, "step": 12850 }, { - "epoch": 0.74, - "grad_norm": 0.2814240890847466, - "learning_rate": 3.3811043205948366e-06, - "loss": 0.2342, + "epoch": 0.59, + "grad_norm": 1.300763438943114, + "learning_rate": 7.583957361224886e-06, + "loss": 0.649, "step": 12851 }, { - "epoch": 0.74, - "grad_norm": 0.5990969852514557, - "learning_rate": 3.3797094925909526e-06, - "loss": 0.3364, + "epoch": 0.59, + "grad_norm": 0.25596065809993546, + "learning_rate": 7.58251354855586e-06, + "loss": 0.1581, "step": 12852 }, { - "epoch": 0.74, - "grad_norm": 1.1668272839736702, - "learning_rate": 3.378314893847443e-06, - "loss": 0.7382, + "epoch": 0.59, + "grad_norm": 0.517787628309515, + "learning_rate": 7.581069789407654e-06, + "loss": 0.288, "step": 12853 }, { - "epoch": 0.74, - "grad_norm": 0.3361253693355029, - "learning_rate": 3.3769205244126013e-06, - "loss": 0.2619, + "epoch": 0.59, + "grad_norm": 0.40401200213242383, + "learning_rate": 7.579626083812232e-06, + "loss": 0.3168, "step": 12854 }, { - "epoch": 0.74, - "grad_norm": 0.357664507868556, - "learning_rate": 3.3755263843347196e-06, - "loss": 0.2275, + "epoch": 0.59, + "grad_norm": 0.30688570534735093, + "learning_rate": 7.578182431801553e-06, + "loss": 0.2074, "step": 12855 }, { - "epoch": 0.74, - "grad_norm": 0.26691235584553286, - "learning_rate": 3.3741324736620752e-06, - "loss": 0.1768, + "epoch": 0.59, + "grad_norm": 1.0377389161180632, + "learning_rate": 7.576738833407583e-06, + "loss": 0.5494, "step": 12856 }, { - "epoch": 0.74, - "grad_norm": 0.3316452448222894, - "learning_rate": 3.3727387924429377e-06, - "loss": 0.2648, + "epoch": 0.59, + "grad_norm": 0.4733115634627761, + "learning_rate": 7.57529528866228e-06, + "loss": 0.3815, "step": 12857 }, { - "epoch": 0.74, - "grad_norm": 1.095159106391374, - "learning_rate": 3.371345340725568e-06, - "loss": 0.2964, + "epoch": 0.59, + "grad_norm": 0.2350094857062686, + "learning_rate": 7.573851797597602e-06, + "loss": 0.173, "step": 12858 }, { - "epoch": 0.74, - "grad_norm": 0.29582495886406085, - "learning_rate": 3.3699521185582274e-06, - "loss": 0.2747, + "epoch": 0.59, + "grad_norm": 0.6189103680759699, + "learning_rate": 7.572408360245504e-06, + "loss": 0.4336, "step": 12859 }, { - "epoch": 0.74, - "grad_norm": 0.34966377054219355, - "learning_rate": 3.3685591259891592e-06, + "epoch": 0.59, + "grad_norm": 0.44556698438964754, + "learning_rate": 7.570964976637949e-06, "loss": 0.2708, "step": 12860 }, { - "epoch": 0.74, - "grad_norm": 1.3699008142557696, - "learning_rate": 3.367166363066604e-06, - "loss": 0.2283, + "epoch": 0.59, + "grad_norm": 0.3209194729865041, + "learning_rate": 7.569521646806888e-06, + "loss": 0.1842, "step": 12861 }, { - "epoch": 0.74, - "grad_norm": 0.37940865289293624, - "learning_rate": 3.3657738298387886e-06, - "loss": 0.2389, + "epoch": 0.59, + "grad_norm": 0.39003086522875924, + "learning_rate": 7.568078370784274e-06, + "loss": 0.3208, "step": 12862 }, { - "epoch": 0.74, - "grad_norm": 0.29870327609057534, - "learning_rate": 3.3643815263539438e-06, - "loss": 0.2815, + "epoch": 0.59, + "grad_norm": 1.0004981500905583, + "learning_rate": 7.566635148602057e-06, + "loss": 0.4338, "step": 12863 }, { - "epoch": 0.74, - "grad_norm": 0.6121185536613202, - "learning_rate": 3.3629894526602847e-06, - "loss": 0.2784, + "epoch": 0.59, + "grad_norm": 0.44192144372532094, + "learning_rate": 7.565191980292197e-06, + "loss": 0.2814, "step": 12864 }, { - "epoch": 0.74, - "grad_norm": 0.39370032195369115, - "learning_rate": 3.361597608806012e-06, - "loss": 0.2998, + "epoch": 0.59, + "grad_norm": 0.47067132542285656, + "learning_rate": 7.563748865886642e-06, + "loss": 0.2741, "step": 12865 }, { - "epoch": 0.74, - "grad_norm": 0.3425273598020543, - "learning_rate": 3.360205994839326e-06, - "loss": 0.2651, + "epoch": 0.59, + "grad_norm": 0.32129290949204087, + "learning_rate": 7.562305805417337e-06, + "loss": 0.2439, "step": 12866 }, { - "epoch": 0.74, - "grad_norm": 0.38555273129152684, - "learning_rate": 3.358814610808424e-06, - "loss": 0.3122, + "epoch": 0.59, + "grad_norm": 0.4718526296014951, + "learning_rate": 7.560862798916229e-06, + "loss": 0.3357, "step": 12867 }, { - "epoch": 0.74, - "grad_norm": 0.6651039322509703, - "learning_rate": 3.3574234567614862e-06, - "loss": 0.2088, + "epoch": 0.59, + "grad_norm": 0.3636897969039732, + "learning_rate": 7.55941984641527e-06, + "loss": 0.1895, "step": 12868 }, { - "epoch": 0.74, - "grad_norm": 0.3512050670033464, - "learning_rate": 3.356032532746688e-06, - "loss": 0.2761, + "epoch": 0.59, + "grad_norm": 0.5305479271488595, + "learning_rate": 7.557976947946404e-06, + "loss": 0.381, "step": 12869 }, { - "epoch": 0.74, - "grad_norm": 0.5160563391559385, - "learning_rate": 3.354641838812195e-06, - "loss": 0.3093, + "epoch": 0.59, + "grad_norm": 0.3030235842661243, + "learning_rate": 7.556534103541575e-06, + "loss": 0.2769, "step": 12870 }, { - "epoch": 0.74, - "grad_norm": 0.2944111295460084, - "learning_rate": 3.353251375006171e-06, - "loss": 0.2444, + "epoch": 0.59, + "grad_norm": 0.7859037631330339, + "learning_rate": 7.555091313232725e-06, + "loss": 0.3216, "step": 12871 }, { - "epoch": 0.74, - "grad_norm": 0.23733746897925087, - "learning_rate": 3.3518611413767675e-06, - "loss": 0.2007, + "epoch": 0.59, + "grad_norm": 0.41619394394085024, + "learning_rate": 7.5536485770517955e-06, + "loss": 0.2639, "step": 12872 }, { - "epoch": 0.74, - "grad_norm": 1.2606823606756283, - "learning_rate": 3.3504711379721267e-06, - "loss": 0.4898, + "epoch": 0.59, + "grad_norm": 0.35468994877570575, + "learning_rate": 7.5522058950307305e-06, + "loss": 0.2647, "step": 12873 }, { - "epoch": 0.74, - "grad_norm": 0.8636552913098463, - "learning_rate": 3.3490813648403808e-06, - "loss": 0.3021, + "epoch": 0.59, + "grad_norm": 0.29911493401788203, + "learning_rate": 7.550763267201469e-06, + "loss": 0.2041, "step": 12874 }, { - "epoch": 0.74, - "grad_norm": 0.2671159928686584, - "learning_rate": 3.347691822029665e-06, - "loss": 0.2538, + "epoch": 0.59, + "grad_norm": 0.8765168263795583, + "learning_rate": 7.549320693595946e-06, + "loss": 0.425, "step": 12875 }, { - "epoch": 0.74, - "grad_norm": 0.5173905617386279, - "learning_rate": 3.346302509588095e-06, - "loss": 0.3694, + "epoch": 0.59, + "grad_norm": 0.39418334619394546, + "learning_rate": 7.5478781742461e-06, + "loss": 0.2806, "step": 12876 }, { - "epoch": 0.74, - "grad_norm": 0.17909842675389065, - "learning_rate": 3.344913427563784e-06, - "loss": 0.1285, + "epoch": 0.59, + "grad_norm": 0.7639826852802636, + "learning_rate": 7.546435709183871e-06, + "loss": 0.4758, "step": 12877 }, { - "epoch": 0.74, - "grad_norm": 0.30187736747915783, - "learning_rate": 3.343524576004833e-06, - "loss": 0.2529, + "epoch": 0.59, + "grad_norm": 0.32194552157801365, + "learning_rate": 7.544993298441189e-06, + "loss": 0.2472, "step": 12878 }, { - "epoch": 0.74, - "grad_norm": 0.34252991641349273, - "learning_rate": 3.342135954959338e-06, - "loss": 0.2783, + "epoch": 0.59, + "grad_norm": 0.45605385201732895, + "learning_rate": 7.5435509420499896e-06, + "loss": 0.3076, "step": 12879 }, { - "epoch": 0.74, - "grad_norm": 0.9488231871291415, - "learning_rate": 3.3407475644753907e-06, - "loss": 0.3745, + "epoch": 0.59, + "grad_norm": 0.2826810367449615, + "learning_rate": 7.5421086400422e-06, + "loss": 0.2107, "step": 12880 }, { - "epoch": 0.74, - "grad_norm": 0.29818740197209903, - "learning_rate": 3.3393594046010693e-06, - "loss": 0.208, + "epoch": 0.59, + "grad_norm": 0.47706502200032874, + "learning_rate": 7.5406663924497615e-06, + "loss": 0.2693, "step": 12881 }, { - "epoch": 0.74, - "grad_norm": 1.210855898826414, - "learning_rate": 3.3379714753844463e-06, - "loss": 0.633, + "epoch": 0.59, + "grad_norm": 0.4114497182822956, + "learning_rate": 7.539224199304598e-06, + "loss": 0.2667, "step": 12882 }, { - "epoch": 0.74, - "grad_norm": 0.25512255050159116, - "learning_rate": 3.3365837768735798e-06, - "loss": 0.2485, + "epoch": 0.59, + "grad_norm": 0.5675418669488211, + "learning_rate": 7.537782060638641e-06, + "loss": 0.3727, "step": 12883 }, { - "epoch": 0.74, - "grad_norm": 0.29905505580538666, - "learning_rate": 3.335196309116534e-06, - "loss": 0.1868, + "epoch": 0.59, + "grad_norm": 1.0185805933549796, + "learning_rate": 7.536339976483815e-06, + "loss": 0.482, "step": 12884 }, { - "epoch": 0.74, - "grad_norm": 0.5038307443785937, - "learning_rate": 3.3338090721613547e-06, - "loss": 0.2664, + "epoch": 0.59, + "grad_norm": 0.4570731343768416, + "learning_rate": 7.534897946872042e-06, + "loss": 0.2786, "step": 12885 }, { - "epoch": 0.74, - "grad_norm": 0.5952356862239482, - "learning_rate": 3.332422066056079e-06, - "loss": 0.3309, + "epoch": 0.59, + "grad_norm": 0.2397819651941745, + "learning_rate": 7.533455971835257e-06, + "loss": 0.2128, "step": 12886 }, { - "epoch": 0.74, - "grad_norm": 0.2904352494406116, - "learning_rate": 3.3310352908487387e-06, - "loss": 0.1837, + "epoch": 0.59, + "grad_norm": 0.7727184499499008, + "learning_rate": 7.532014051405381e-06, + "loss": 0.3074, "step": 12887 }, { - "epoch": 0.74, - "grad_norm": 0.5566544726291275, - "learning_rate": 3.3296487465873617e-06, - "loss": 0.3727, + "epoch": 0.59, + "grad_norm": 0.41957746360385845, + "learning_rate": 7.530572185614333e-06, + "loss": 0.2807, "step": 12888 }, { - "epoch": 0.74, - "grad_norm": 1.338582329143749, - "learning_rate": 3.328262433319962e-06, - "loss": 0.7545, + "epoch": 0.59, + "grad_norm": 0.41918086966068613, + "learning_rate": 7.529130374494036e-06, + "loss": 0.3197, "step": 12889 }, { - "epoch": 0.74, - "grad_norm": 0.18720255509199032, - "learning_rate": 3.3268763510945477e-06, - "loss": 0.1371, + "epoch": 0.59, + "grad_norm": 0.5367698944695509, + "learning_rate": 7.527688618076413e-06, + "loss": 0.3761, "step": 12890 }, { - "epoch": 0.74, - "grad_norm": 0.3551963786547063, - "learning_rate": 3.325490499959114e-06, - "loss": 0.2981, + "epoch": 0.59, + "grad_norm": 0.32439921311449227, + "learning_rate": 7.52624691639338e-06, + "loss": 0.2158, "step": 12891 }, { - "epoch": 0.74, - "grad_norm": 0.6958333900137992, - "learning_rate": 3.3241048799616616e-06, - "loss": 0.4138, + "epoch": 0.59, + "grad_norm": 0.2833917110667752, + "learning_rate": 7.524805269476858e-06, + "loss": 0.1687, "step": 12892 }, { - "epoch": 0.74, - "grad_norm": 0.3327651923692982, - "learning_rate": 3.3227194911501705e-06, - "loss": 0.267, + "epoch": 0.59, + "grad_norm": 0.46308838947875286, + "learning_rate": 7.523363677358757e-06, + "loss": 0.3309, "step": 12893 }, { - "epoch": 0.74, - "grad_norm": 1.2262784695854978, - "learning_rate": 3.3213343335726157e-06, - "loss": 0.3285, + "epoch": 0.59, + "grad_norm": 0.32273526539448355, + "learning_rate": 7.521922140071003e-06, + "loss": 0.2257, "step": 12894 }, { - "epoch": 0.74, - "grad_norm": 0.35721902129772304, - "learning_rate": 3.3199494072769657e-06, - "loss": 0.3184, + "epoch": 0.59, + "grad_norm": 1.4159908074856369, + "learning_rate": 7.520480657645502e-06, + "loss": 0.6065, "step": 12895 }, { - "epoch": 0.74, - "grad_norm": 0.34515026641036906, - "learning_rate": 3.3185647123111776e-06, - "loss": 0.2565, + "epoch": 0.59, + "grad_norm": 1.1676292066866414, + "learning_rate": 7.519039230114169e-06, + "loss": 0.7432, "step": 12896 }, { - "epoch": 0.74, - "grad_norm": 0.4102070107736664, - "learning_rate": 3.3171802487232087e-06, - "loss": 0.1409, + "epoch": 0.59, + "grad_norm": 0.3871499384259446, + "learning_rate": 7.5175978575089135e-06, + "loss": 0.2035, "step": 12897 }, { - "epoch": 0.74, - "grad_norm": 0.6009426131881008, - "learning_rate": 3.3157960165610035e-06, - "loss": 0.333, + "epoch": 0.59, + "grad_norm": 0.3523947189248126, + "learning_rate": 7.516156539861652e-06, + "loss": 0.2911, "step": 12898 }, { - "epoch": 0.74, - "grad_norm": 0.26019221615739213, - "learning_rate": 3.314412015872489e-06, - "loss": 0.253, + "epoch": 0.59, + "grad_norm": 0.38413073574532336, + "learning_rate": 7.514715277204292e-06, + "loss": 0.2387, "step": 12899 }, { - "epoch": 0.74, - "grad_norm": 1.239600152449063, - "learning_rate": 3.313028246705603e-06, - "loss": 0.3466, + "epoch": 0.59, + "grad_norm": 0.3841868225721464, + "learning_rate": 7.513274069568739e-06, + "loss": 0.1866, "step": 12900 }, { - "epoch": 0.74, - "grad_norm": 0.4657976696878919, - "learning_rate": 3.3116447091082593e-06, - "loss": 0.3098, + "epoch": 0.59, + "grad_norm": 0.41499574535414846, + "learning_rate": 7.511832916986902e-06, + "loss": 0.2892, "step": 12901 }, { - "epoch": 0.74, - "grad_norm": 0.3989409195000362, - "learning_rate": 3.310261403128373e-06, - "loss": 0.2568, + "epoch": 0.59, + "grad_norm": 0.5461189823082333, + "learning_rate": 7.510391819490683e-06, + "loss": 0.4186, "step": 12902 }, { - "epoch": 0.74, - "grad_norm": 0.28897844817397594, - "learning_rate": 3.3088783288138436e-06, - "loss": 0.2548, + "epoch": 0.59, + "grad_norm": 0.6138453773191452, + "learning_rate": 7.508950777111993e-06, + "loss": 0.3808, "step": 12903 }, { - "epoch": 0.74, - "grad_norm": 0.4652505440048004, - "learning_rate": 3.307495486212572e-06, - "loss": 0.2714, + "epoch": 0.59, + "grad_norm": 0.4387332854452208, + "learning_rate": 7.507509789882732e-06, + "loss": 0.2667, "step": 12904 }, { - "epoch": 0.74, - "grad_norm": 0.388391044884601, - "learning_rate": 3.306112875372445e-06, - "loss": 0.2605, + "epoch": 0.59, + "grad_norm": 0.3950588510717907, + "learning_rate": 7.506068857834801e-06, + "loss": 0.2761, "step": 12905 }, { - "epoch": 0.74, - "grad_norm": 0.5730762996066526, - "learning_rate": 3.3047304963413407e-06, - "loss": 0.3871, + "epoch": 0.59, + "grad_norm": 0.2847227124557217, + "learning_rate": 7.504627981000101e-06, + "loss": 0.2151, "step": 12906 }, { - "epoch": 0.74, - "grad_norm": 0.3801978509249738, - "learning_rate": 3.3033483491671316e-06, - "loss": 0.2487, + "epoch": 0.59, + "grad_norm": 0.4038562941356066, + "learning_rate": 7.503187159410533e-06, + "loss": 0.2479, "step": 12907 }, { - "epoch": 0.74, - "grad_norm": 0.4299270873716904, - "learning_rate": 3.3019664338976787e-06, - "loss": 0.2827, + "epoch": 0.59, + "grad_norm": 1.354104507667754, + "learning_rate": 7.501746393097995e-06, + "loss": 0.8238, "step": 12908 }, { - "epoch": 0.74, - "grad_norm": 0.463795814167745, - "learning_rate": 3.300584750580842e-06, - "loss": 0.3093, + "epoch": 0.59, + "grad_norm": 0.34265097540483663, + "learning_rate": 7.500305682094385e-06, + "loss": 0.2646, "step": 12909 }, { - "epoch": 0.74, - "grad_norm": 0.2269061260272887, - "learning_rate": 3.2992032992644686e-06, - "loss": 0.1444, + "epoch": 0.59, + "grad_norm": 0.4049564492703838, + "learning_rate": 7.498865026431593e-06, + "loss": 0.2515, "step": 12910 }, { - "epoch": 0.74, - "grad_norm": 0.2822473309634225, - "learning_rate": 3.2978220799963955e-06, - "loss": 0.2462, + "epoch": 0.59, + "grad_norm": 0.5021444033513522, + "learning_rate": 7.497424426141524e-06, + "loss": 0.312, "step": 12911 }, { - "epoch": 0.74, - "grad_norm": 1.558699342627396, - "learning_rate": 3.2964410928244526e-06, - "loss": 0.739, + "epoch": 0.59, + "grad_norm": 0.2936307844217129, + "learning_rate": 7.495983881256067e-06, + "loss": 0.2014, "step": 12912 }, { - "epoch": 0.74, - "grad_norm": 0.8887185637630161, - "learning_rate": 3.2950603377964706e-06, - "loss": 0.3034, + "epoch": 0.59, + "grad_norm": 0.3606542715199571, + "learning_rate": 7.494543391807112e-06, + "loss": 0.2401, "step": 12913 }, { - "epoch": 0.74, - "grad_norm": 0.3173946544253534, - "learning_rate": 3.29367981496026e-06, - "loss": 0.2621, + "epoch": 0.59, + "grad_norm": 0.4870336436072836, + "learning_rate": 7.493102957826552e-06, + "loss": 0.3833, "step": 12914 }, { - "epoch": 0.74, - "grad_norm": 0.3141577245945441, - "learning_rate": 3.29229952436363e-06, - "loss": 0.2978, + "epoch": 0.59, + "grad_norm": 0.5262801541303066, + "learning_rate": 7.49166257934627e-06, + "loss": 0.2958, "step": 12915 }, { - "epoch": 0.74, - "grad_norm": 0.3278313577965663, - "learning_rate": 3.2909194660543742e-06, - "loss": 0.1864, + "epoch": 0.59, + "grad_norm": 0.4231854016782443, + "learning_rate": 7.4902222563981675e-06, + "loss": 0.3421, "step": 12916 }, { - "epoch": 0.74, - "grad_norm": 0.37208100190566185, - "learning_rate": 3.289539640080294e-06, - "loss": 0.1933, + "epoch": 0.59, + "grad_norm": 0.36273439945042396, + "learning_rate": 7.488781989014124e-06, + "loss": 0.2514, "step": 12917 }, { - "epoch": 0.74, - "grad_norm": 1.3163936504011386, - "learning_rate": 3.288160046489166e-06, - "loss": 0.8189, + "epoch": 0.59, + "grad_norm": 0.2746929236774449, + "learning_rate": 7.487341777226027e-06, + "loss": 0.1506, "step": 12918 }, { - "epoch": 0.74, - "grad_norm": 0.3683715557013782, - "learning_rate": 3.2867806853287675e-06, - "loss": 0.2944, + "epoch": 0.59, + "grad_norm": 0.3936245599877834, + "learning_rate": 7.48590162106576e-06, + "loss": 0.2899, "step": 12919 }, { - "epoch": 0.74, - "grad_norm": 0.37802142040575226, - "learning_rate": 3.2854015566468643e-06, - "loss": 0.1896, + "epoch": 0.59, + "grad_norm": 0.7090525804302139, + "learning_rate": 7.484461520565209e-06, + "loss": 0.3783, "step": 12920 }, { - "epoch": 0.74, - "grad_norm": 0.6399011890671984, - "learning_rate": 3.284022660491214e-06, - "loss": 0.3545, + "epoch": 0.59, + "grad_norm": 0.32721952551972744, + "learning_rate": 7.483021475756257e-06, + "loss": 0.2609, "step": 12921 }, { - "epoch": 0.74, - "grad_norm": 0.22873817154729156, - "learning_rate": 3.2826439969095737e-06, - "loss": 0.2049, + "epoch": 0.59, + "grad_norm": 0.3550835500263203, + "learning_rate": 7.481581486670783e-06, + "loss": 0.3103, "step": 12922 }, { - "epoch": 0.74, - "grad_norm": 0.3336483088508467, - "learning_rate": 3.281265565949683e-06, - "loss": 0.1561, + "epoch": 0.59, + "grad_norm": 1.0249920983102796, + "learning_rate": 7.480141553340665e-06, + "loss": 0.3376, "step": 12923 }, { - "epoch": 0.74, - "grad_norm": 0.5578981476572488, - "learning_rate": 3.2798873676592755e-06, - "loss": 0.4109, + "epoch": 0.59, + "grad_norm": 0.24647472121299804, + "learning_rate": 7.478701675797786e-06, + "loss": 0.1414, "step": 12924 }, { - "epoch": 0.74, - "grad_norm": 0.6593018831054235, - "learning_rate": 3.2785094020860777e-06, - "loss": 0.3806, + "epoch": 0.59, + "grad_norm": 0.28260057752083656, + "learning_rate": 7.4772618540740225e-06, + "loss": 0.2898, "step": 12925 }, { - "epoch": 0.74, - "grad_norm": 0.36905433775237373, - "learning_rate": 3.277131669277813e-06, - "loss": 0.2017, + "epoch": 0.59, + "grad_norm": 0.9032353049464689, + "learning_rate": 7.47582208820125e-06, + "loss": 0.3498, "step": 12926 }, { - "epoch": 0.74, - "grad_norm": 0.3000027094105889, - "learning_rate": 3.275754169282189e-06, - "loss": 0.2773, + "epoch": 0.59, + "grad_norm": 0.511033236305105, + "learning_rate": 7.47438237821134e-06, + "loss": 0.3477, "step": 12927 }, { - "epoch": 0.74, - "grad_norm": 0.42583234394958763, - "learning_rate": 3.2743769021469096e-06, - "loss": 0.2012, + "epoch": 0.59, + "grad_norm": 0.39565591681249984, + "learning_rate": 7.472942724136174e-06, + "loss": 0.2844, "step": 12928 }, { - "epoch": 0.74, - "grad_norm": 0.2963137515632942, - "learning_rate": 3.2729998679196663e-06, - "loss": 0.1866, + "epoch": 0.59, + "grad_norm": 0.4067107530259097, + "learning_rate": 7.47150312600762e-06, + "loss": 0.3023, "step": 12929 }, { - "epoch": 0.74, - "grad_norm": 0.2859908008194674, - "learning_rate": 3.2716230666481506e-06, - "loss": 0.2344, + "epoch": 0.59, + "grad_norm": 0.1721850524228638, + "learning_rate": 7.470063583857552e-06, + "loss": 0.0729, "step": 12930 }, { - "epoch": 0.74, - "grad_norm": 0.458580153617439, - "learning_rate": 3.2702464983800386e-06, - "loss": 0.3144, + "epoch": 0.59, + "grad_norm": 0.43829835967392805, + "learning_rate": 7.468624097717836e-06, + "loss": 0.3041, "step": 12931 }, { - "epoch": 0.74, - "grad_norm": 0.3739316025400197, - "learning_rate": 3.2688701631630047e-06, - "loss": 0.2944, + "epoch": 0.59, + "grad_norm": 0.6352649910966798, + "learning_rate": 7.467184667620337e-06, + "loss": 0.4056, "step": 12932 }, { - "epoch": 0.74, - "grad_norm": 0.5003894526988127, - "learning_rate": 3.2674940610447005e-06, - "loss": 0.2209, + "epoch": 0.59, + "grad_norm": 0.330768838277074, + "learning_rate": 7.465745293596934e-06, + "loss": 0.2376, "step": 12933 }, { - "epoch": 0.74, - "grad_norm": 0.2815996714466963, - "learning_rate": 3.2661181920727913e-06, - "loss": 0.2559, + "epoch": 0.59, + "grad_norm": 0.4090030683472436, + "learning_rate": 7.464305975679488e-06, + "loss": 0.3319, "step": 12934 }, { - "epoch": 0.74, - "grad_norm": 0.31949144720216227, - "learning_rate": 3.2647425562949196e-06, - "loss": 0.2264, + "epoch": 0.59, + "grad_norm": 1.4835769819423026, + "learning_rate": 7.462866713899863e-06, + "loss": 0.7763, "step": 12935 }, { - "epoch": 0.74, - "grad_norm": 0.43227858764097016, - "learning_rate": 3.263367153758723e-06, - "loss": 0.2649, + "epoch": 0.59, + "grad_norm": 0.17897073925182497, + "learning_rate": 7.461427508289922e-06, + "loss": 0.0858, "step": 12936 }, { - "epoch": 0.74, - "grad_norm": 0.6654739405314112, - "learning_rate": 3.26199198451183e-06, - "loss": 0.3732, + "epoch": 0.59, + "grad_norm": 0.2675707804343146, + "learning_rate": 7.459988358881532e-06, + "loss": 0.2664, "step": 12937 }, { - "epoch": 0.74, - "grad_norm": 0.4519785519202604, - "learning_rate": 3.2606170486018662e-06, - "loss": 0.312, + "epoch": 0.59, + "grad_norm": 0.6790215164307541, + "learning_rate": 7.458549265706549e-06, + "loss": 0.4128, "step": 12938 }, { - "epoch": 0.74, - "grad_norm": 0.29017049811434276, - "learning_rate": 3.2592423460764457e-06, - "loss": 0.2275, + "epoch": 0.59, + "grad_norm": 0.6852820439773292, + "learning_rate": 7.457110228796838e-06, + "loss": 0.271, "step": 12939 }, { - "epoch": 0.74, - "grad_norm": 0.36334147109219417, - "learning_rate": 3.257867876983173e-06, - "loss": 0.1923, + "epoch": 0.59, + "grad_norm": 0.3705358249848323, + "learning_rate": 7.455671248184253e-06, + "loss": 0.2981, "step": 12940 }, { - "epoch": 0.74, - "grad_norm": 0.5450956167249345, - "learning_rate": 3.256493641369641e-06, - "loss": 0.315, + "epoch": 0.59, + "grad_norm": 0.4157289229678683, + "learning_rate": 7.454232323900656e-06, + "loss": 0.3336, "step": 12941 }, { - "epoch": 0.74, - "grad_norm": 0.4020490717875685, - "learning_rate": 3.2551196392834496e-06, - "loss": 0.3194, + "epoch": 0.59, + "grad_norm": 0.45687373389646935, + "learning_rate": 7.452793455977903e-06, + "loss": 0.2965, "step": 12942 }, { - "epoch": 0.74, - "grad_norm": 0.32817468224538004, - "learning_rate": 3.2537458707721735e-06, - "loss": 0.2755, + "epoch": 0.59, + "grad_norm": 0.2827466822253932, + "learning_rate": 7.451354644447847e-06, + "loss": 0.1825, "step": 12943 }, { - "epoch": 0.74, - "grad_norm": 0.5816663916046452, - "learning_rate": 3.252372335883388e-06, - "loss": 0.3265, + "epoch": 0.59, + "grad_norm": 1.0328241254967943, + "learning_rate": 7.449915889342343e-06, + "loss": 0.3834, "step": 12944 }, { - "epoch": 0.74, - "grad_norm": 0.3815486034011904, - "learning_rate": 3.250999034664659e-06, - "loss": 0.2811, + "epoch": 0.59, + "grad_norm": 0.3528911549065219, + "learning_rate": 7.448477190693238e-06, + "loss": 0.3115, "step": 12945 }, { - "epoch": 0.74, - "grad_norm": 0.3526554552274751, - "learning_rate": 3.24962596716354e-06, - "loss": 0.2452, + "epoch": 0.59, + "grad_norm": 0.3518699507900261, + "learning_rate": 7.447038548532395e-06, + "loss": 0.2176, "step": 12946 }, { - "epoch": 0.74, - "grad_norm": 0.24509700046609045, - "learning_rate": 3.2482531334275856e-06, - "loss": 0.1689, + "epoch": 0.59, + "grad_norm": 1.018753186019547, + "learning_rate": 7.445599962891656e-06, + "loss": 0.5263, "step": 12947 }, { - "epoch": 0.74, - "grad_norm": 0.3338020441543314, - "learning_rate": 3.2468805335043363e-06, - "loss": 0.3091, + "epoch": 0.59, + "grad_norm": 0.32103850373717274, + "learning_rate": 7.444161433802874e-06, + "loss": 0.2499, "step": 12948 }, { - "epoch": 0.74, - "grad_norm": 0.9020955309045062, - "learning_rate": 3.2455081674413226e-06, - "loss": 0.3683, + "epoch": 0.59, + "grad_norm": 0.31167196264725294, + "learning_rate": 7.44272296129789e-06, + "loss": 0.2251, "step": 12949 }, { - "epoch": 0.74, - "grad_norm": 0.3050120934170287, - "learning_rate": 3.2441360352860675e-06, - "loss": 0.2703, + "epoch": 0.59, + "grad_norm": 0.6712217770428013, + "learning_rate": 7.441284545408558e-06, + "loss": 0.2855, "step": 12950 }, { - "epoch": 0.74, - "grad_norm": 0.41887257623271296, - "learning_rate": 3.2427641370860953e-06, - "loss": 0.273, + "epoch": 0.59, + "grad_norm": 0.7841371738004029, + "learning_rate": 7.4398461861667214e-06, + "loss": 0.509, "step": 12951 }, { - "epoch": 0.74, - "grad_norm": 0.42530719140404794, - "learning_rate": 3.241392472888909e-06, - "loss": 0.1314, + "epoch": 0.6, + "grad_norm": 0.4020468075489816, + "learning_rate": 7.438407883604221e-06, + "loss": 0.2711, "step": 12952 }, { - "epoch": 0.74, - "grad_norm": 0.38376438245705596, - "learning_rate": 3.240021042742012e-06, - "loss": 0.2803, + "epoch": 0.6, + "grad_norm": 0.36936884797303865, + "learning_rate": 7.4369696377529e-06, + "loss": 0.2532, "step": 12953 }, { - "epoch": 0.74, - "grad_norm": 0.47996045275196836, - "learning_rate": 3.2386498466928916e-06, - "loss": 0.3495, + "epoch": 0.6, + "grad_norm": 0.45911041645853273, + "learning_rate": 7.435531448644603e-06, + "loss": 0.2501, "step": 12954 }, { - "epoch": 0.74, - "grad_norm": 0.3492844356549713, - "learning_rate": 3.237278884789039e-06, - "loss": 0.3225, + "epoch": 0.6, + "grad_norm": 0.36566309824678284, + "learning_rate": 7.434093316311167e-06, + "loss": 0.2602, "step": 12955 }, { - "epoch": 0.74, - "grad_norm": 0.21621202245976492, - "learning_rate": 3.235908157077929e-06, - "loss": 0.0953, + "epoch": 0.6, + "grad_norm": 0.4160880116754914, + "learning_rate": 7.432655240784433e-06, + "loss": 0.238, "step": 12956 }, { - "epoch": 0.74, - "grad_norm": 0.37093987547274837, - "learning_rate": 3.234537663607028e-06, - "loss": 0.2762, + "epoch": 0.6, + "grad_norm": 0.5553620355590713, + "learning_rate": 7.431217222096233e-06, + "loss": 0.3861, "step": 12957 }, { - "epoch": 0.74, - "grad_norm": 0.33780033787498953, - "learning_rate": 3.233167404423797e-06, - "loss": 0.2939, + "epoch": 0.6, + "grad_norm": 0.3145543464002043, + "learning_rate": 7.429779260278411e-06, + "loss": 0.2552, "step": 12958 }, { - "epoch": 0.74, - "grad_norm": 0.8582542909743728, - "learning_rate": 3.231797379575684e-06, - "loss": 0.2335, + "epoch": 0.6, + "grad_norm": 0.9477306390869384, + "learning_rate": 7.428341355362803e-06, + "loss": 0.2483, "step": 12959 }, { - "epoch": 0.74, - "grad_norm": 0.34765139094691233, - "learning_rate": 3.230427589110141e-06, - "loss": 0.2791, + "epoch": 0.6, + "grad_norm": 0.40991439409074804, + "learning_rate": 7.426903507381235e-06, + "loss": 0.3306, "step": 12960 }, { - "epoch": 0.74, - "grad_norm": 1.1022929147980127, - "learning_rate": 3.229058033074599e-06, - "loss": 0.7693, + "epoch": 0.6, + "grad_norm": 0.2794768696400758, + "learning_rate": 7.4254657163655456e-06, + "loss": 0.238, "step": 12961 }, { - "epoch": 0.74, - "grad_norm": 0.2048980960713727, - "learning_rate": 3.227688711516486e-06, - "loss": 0.18, + "epoch": 0.6, + "grad_norm": 0.3976899003163253, + "learning_rate": 7.4240279823475584e-06, + "loss": 0.191, "step": 12962 }, { - "epoch": 0.74, - "grad_norm": 0.3108323466882076, - "learning_rate": 3.2263196244832183e-06, - "loss": 0.243, + "epoch": 0.6, + "grad_norm": 0.7362834362347954, + "learning_rate": 7.422590305359112e-06, + "loss": 0.4724, "step": 12963 }, { - "epoch": 0.74, - "grad_norm": 1.351882544006569, - "learning_rate": 3.224950772022214e-06, - "loss": 0.5611, + "epoch": 0.6, + "grad_norm": 0.31000032295001145, + "learning_rate": 7.421152685432034e-06, + "loss": 0.2344, "step": 12964 }, { - "epoch": 0.74, - "grad_norm": 0.633610003120019, - "learning_rate": 3.223582154180873e-06, - "loss": 0.2477, + "epoch": 0.6, + "grad_norm": 0.37723975421260075, + "learning_rate": 7.419715122598149e-06, + "loss": 0.3395, "step": 12965 }, { - "epoch": 0.74, - "grad_norm": 0.293079591213699, - "learning_rate": 3.2222137710065915e-06, - "loss": 0.2608, + "epoch": 0.6, + "grad_norm": 0.5409301455347854, + "learning_rate": 7.418277616889282e-06, + "loss": 0.2828, "step": 12966 }, { - "epoch": 0.75, - "grad_norm": 1.2513694259705406, - "learning_rate": 3.2208456225467554e-06, - "loss": 0.748, + "epoch": 0.6, + "grad_norm": 0.3649752644331592, + "learning_rate": 7.416840168337263e-06, + "loss": 0.2516, "step": 12967 }, { - "epoch": 0.75, - "grad_norm": 0.2295037376326925, - "learning_rate": 3.219477708848743e-06, - "loss": 0.1619, + "epoch": 0.6, + "grad_norm": 0.573393814439926, + "learning_rate": 7.415402776973913e-06, + "loss": 0.289, "step": 12968 }, { - "epoch": 0.75, - "grad_norm": 0.3458317104854778, - "learning_rate": 3.2181100299599268e-06, - "loss": 0.2019, + "epoch": 0.6, + "grad_norm": 0.3522208471237922, + "learning_rate": 7.413965442831054e-06, + "loss": 0.265, "step": 12969 }, { - "epoch": 0.75, - "grad_norm": 0.3743554598909583, - "learning_rate": 3.2167425859276678e-06, - "loss": 0.3023, + "epoch": 0.6, + "grad_norm": 0.33196103352826073, + "learning_rate": 7.412528165940505e-06, + "loss": 0.2303, "step": 12970 }, { - "epoch": 0.75, - "grad_norm": 0.5944735307244252, - "learning_rate": 3.215375376799319e-06, - "loss": 0.3297, + "epoch": 0.6, + "grad_norm": 0.4561045034561405, + "learning_rate": 7.411090946334092e-06, + "loss": 0.2507, "step": 12971 }, { - "epoch": 0.75, - "grad_norm": 0.3758967024239241, - "learning_rate": 3.214008402622232e-06, - "loss": 0.2157, + "epoch": 0.6, + "grad_norm": 0.4875120685821009, + "learning_rate": 7.409653784043629e-06, + "loss": 0.2934, "step": 12972 }, { - "epoch": 0.75, - "grad_norm": 1.0402206032842936, - "learning_rate": 3.2126416634437428e-06, - "loss": 0.6525, + "epoch": 0.6, + "grad_norm": 0.29673461547138197, + "learning_rate": 7.408216679100935e-06, + "loss": 0.2512, "step": 12973 }, { - "epoch": 0.75, - "grad_norm": 0.2605416145760141, - "learning_rate": 3.2112751593111803e-06, - "loss": 0.2228, + "epoch": 0.6, + "grad_norm": 0.8601398064876824, + "learning_rate": 7.4067796315378256e-06, + "loss": 0.5544, "step": 12974 }, { - "epoch": 0.75, - "grad_norm": 0.2663291579048332, - "learning_rate": 3.2099088902718635e-06, - "loss": 0.1584, + "epoch": 0.6, + "grad_norm": 0.2612260558981168, + "learning_rate": 7.405342641386113e-06, + "loss": 0.1607, "step": 12975 }, { - "epoch": 0.75, - "grad_norm": 0.7818741848797203, - "learning_rate": 3.2085428563731137e-06, - "loss": 0.4055, + "epoch": 0.6, + "grad_norm": 0.26771645064023897, + "learning_rate": 7.4039057086776165e-06, + "loss": 0.212, "step": 12976 }, { - "epoch": 0.75, - "grad_norm": 1.0304535099556142, - "learning_rate": 3.207177057662233e-06, - "loss": 0.4526, + "epoch": 0.6, + "grad_norm": 0.3650315700412918, + "learning_rate": 7.402468833444147e-06, + "loss": 0.2876, "step": 12977 }, { - "epoch": 0.75, - "grad_norm": 0.23643939698926889, - "learning_rate": 3.205811494186518e-06, - "loss": 0.2101, + "epoch": 0.6, + "grad_norm": 0.8963960601936947, + "learning_rate": 7.401032015717513e-06, + "loss": 0.3917, "step": 12978 }, { - "epoch": 0.75, - "grad_norm": 1.1088209673773897, - "learning_rate": 3.2044461659932557e-06, - "loss": 0.7036, + "epoch": 0.6, + "grad_norm": 0.34486202619409023, + "learning_rate": 7.3995952555295215e-06, + "loss": 0.2134, "step": 12979 }, { - "epoch": 0.75, - "grad_norm": 0.286646221853249, - "learning_rate": 3.2030810731297334e-06, - "loss": 0.1906, + "epoch": 0.6, + "grad_norm": 1.0658749133171492, + "learning_rate": 7.398158552911987e-06, + "loss": 0.5699, "step": 12980 }, { - "epoch": 0.75, - "grad_norm": 0.3675670697198052, - "learning_rate": 3.2017162156432222e-06, - "loss": 0.2737, + "epoch": 0.6, + "grad_norm": 0.36930349359416115, + "learning_rate": 7.3967219078967155e-06, + "loss": 0.3199, "step": 12981 }, { - "epoch": 0.75, - "grad_norm": 0.42808874432795885, - "learning_rate": 3.2003515935809858e-06, - "loss": 0.248, + "epoch": 0.6, + "grad_norm": 0.292476351533126, + "learning_rate": 7.395285320515513e-06, + "loss": 0.1822, "step": 12982 }, { - "epoch": 0.75, - "grad_norm": 0.9420101336635036, - "learning_rate": 3.1989872069902804e-06, - "loss": 0.3599, + "epoch": 0.6, + "grad_norm": 0.3303378213665017, + "learning_rate": 7.39384879080018e-06, + "loss": 0.2117, "step": 12983 }, { - "epoch": 0.75, - "grad_norm": 0.3487176167214536, - "learning_rate": 3.197623055918354e-06, - "loss": 0.2668, + "epoch": 0.6, + "grad_norm": 0.40988130099756026, + "learning_rate": 7.392412318782524e-06, + "loss": 0.3246, "step": 12984 }, { - "epoch": 0.75, - "grad_norm": 1.4368984598467063, - "learning_rate": 3.196259140412451e-06, - "loss": 0.4066, + "epoch": 0.6, + "grad_norm": 0.3255998657446926, + "learning_rate": 7.390975904494346e-06, + "loss": 0.1881, "step": 12985 }, { - "epoch": 0.75, - "grad_norm": 0.2809539051773061, - "learning_rate": 3.1948954605198014e-06, - "loss": 0.2199, + "epoch": 0.6, + "grad_norm": 1.216094188090242, + "learning_rate": 7.389539547967448e-06, + "loss": 0.658, "step": 12986 }, { - "epoch": 0.75, - "grad_norm": 0.3451613359950105, - "learning_rate": 3.193532016287629e-06, - "loss": 0.2908, + "epoch": 0.6, + "grad_norm": 0.43338775316463984, + "learning_rate": 7.388103249233627e-06, + "loss": 0.2883, "step": 12987 }, { - "epoch": 0.75, - "grad_norm": 0.5062091570808256, - "learning_rate": 3.1921688077631476e-06, - "loss": 0.2377, + "epoch": 0.6, + "grad_norm": 0.3177226572434137, + "learning_rate": 7.3866670083246835e-06, + "loss": 0.1984, "step": 12988 }, { - "epoch": 0.75, - "grad_norm": 0.39386024451333884, - "learning_rate": 3.19080583499357e-06, - "loss": 0.2658, + "epoch": 0.6, + "grad_norm": 0.3132683181742891, + "learning_rate": 7.385230825272414e-06, + "loss": 0.2614, "step": 12989 }, { - "epoch": 0.75, - "grad_norm": 0.3344590966805647, - "learning_rate": 3.189443098026094e-06, - "loss": 0.2519, + "epoch": 0.6, + "grad_norm": 0.6705016622637922, + "learning_rate": 7.383794700108614e-06, + "loss": 0.463, "step": 12990 }, { - "epoch": 0.75, - "grad_norm": 0.41114489266247534, - "learning_rate": 3.188080596907911e-06, - "loss": 0.2871, + "epoch": 0.6, + "grad_norm": 0.45946291080906965, + "learning_rate": 7.382358632865079e-06, + "loss": 0.2974, "step": 12991 }, { - "epoch": 0.75, - "grad_norm": 0.3871947212487157, - "learning_rate": 3.1867183316862005e-06, - "loss": 0.2061, + "epoch": 0.6, + "grad_norm": 0.31939324130768193, + "learning_rate": 7.380922623573594e-06, + "loss": 0.2458, "step": 12992 }, { - "epoch": 0.75, - "grad_norm": 0.3329933325144057, - "learning_rate": 3.1853563024081446e-06, - "loss": 0.2933, + "epoch": 0.6, + "grad_norm": 1.185453823755025, + "learning_rate": 7.379486672265964e-06, + "loss": 0.7071, "step": 12993 }, { - "epoch": 0.75, - "grad_norm": 0.3416280510762696, - "learning_rate": 3.183994509120907e-06, - "loss": 0.3046, + "epoch": 0.6, + "grad_norm": 0.41128090508604503, + "learning_rate": 7.378050778973973e-06, + "loss": 0.254, "step": 12994 }, { - "epoch": 0.75, - "grad_norm": 0.5561745393411215, - "learning_rate": 3.182632951871646e-06, - "loss": 0.1458, + "epoch": 0.6, + "grad_norm": 0.6022905690643766, + "learning_rate": 7.376614943729412e-06, + "loss": 0.2889, "step": 12995 }, { - "epoch": 0.75, - "grad_norm": 0.32559993723873837, - "learning_rate": 3.18127163070751e-06, - "loss": 0.2581, + "epoch": 0.6, + "grad_norm": 0.23883759389694836, + "learning_rate": 7.375179166564062e-06, + "loss": 0.2227, "step": 12996 }, { - "epoch": 0.75, - "grad_norm": 0.5157714793821341, - "learning_rate": 3.1799105456756463e-06, - "loss": 0.3919, + "epoch": 0.6, + "grad_norm": 0.36485720940927485, + "learning_rate": 7.373743447509721e-06, + "loss": 0.253, "step": 12997 }, { - "epoch": 0.75, - "grad_norm": 0.45047672562360686, - "learning_rate": 3.1785496968231877e-06, - "loss": 0.2458, + "epoch": 0.6, + "grad_norm": 0.93658813794875, + "learning_rate": 7.372307786598168e-06, + "loss": 0.3241, "step": 12998 }, { - "epoch": 0.75, - "grad_norm": 0.3044374430749099, - "learning_rate": 3.1771890841972643e-06, - "loss": 0.2535, + "epoch": 0.6, + "grad_norm": 0.9687499360416071, + "learning_rate": 7.3708721838611865e-06, + "loss": 0.5506, "step": 12999 }, { - "epoch": 0.75, - "grad_norm": 0.4077212584271979, - "learning_rate": 3.1758287078449812e-06, - "loss": 0.2976, + "epoch": 0.6, + "grad_norm": 0.36090762209432337, + "learning_rate": 7.36943663933056e-06, + "loss": 0.2649, "step": 13000 }, { - "epoch": 0.75, - "grad_norm": 0.2829686186815421, - "learning_rate": 3.174468567813461e-06, - "loss": 0.1982, + "epoch": 0.6, + "grad_norm": 0.4165782447203431, + "learning_rate": 7.368001153038073e-06, + "loss": 0.27, "step": 13001 }, { - "epoch": 0.75, - "grad_norm": 0.3156669845287359, - "learning_rate": 3.1731086641497997e-06, - "loss": 0.2602, + "epoch": 0.6, + "grad_norm": 0.2846790148806605, + "learning_rate": 7.366565725015504e-06, + "loss": 0.1851, "step": 13002 }, { - "epoch": 0.75, - "grad_norm": 1.0682277454930609, - "learning_rate": 3.171748996901093e-06, - "loss": 0.7463, + "epoch": 0.6, + "grad_norm": 0.7004557034314004, + "learning_rate": 7.36513035529463e-06, + "loss": 0.3156, "step": 13003 }, { - "epoch": 0.75, - "grad_norm": 0.9602481865968809, - "learning_rate": 3.1703895661144213e-06, - "loss": 0.3494, + "epoch": 0.6, + "grad_norm": 0.3730580383521489, + "learning_rate": 7.363695043907233e-06, + "loss": 0.2893, "step": 13004 }, { - "epoch": 0.75, - "grad_norm": 0.2875938503824787, - "learning_rate": 3.1690303718368675e-06, - "loss": 0.1851, + "epoch": 0.6, + "grad_norm": 0.4915608245532158, + "learning_rate": 7.36225979088508e-06, + "loss": 0.2915, "step": 13005 }, { - "epoch": 0.75, - "grad_norm": 0.3280084081282049, - "learning_rate": 3.1676714141154998e-06, - "loss": 0.2983, + "epoch": 0.6, + "grad_norm": 0.4084196751072741, + "learning_rate": 7.360824596259961e-06, + "loss": 0.2409, "step": 13006 }, { - "epoch": 0.75, - "grad_norm": 0.28378818890057067, - "learning_rate": 3.1663126929973766e-06, - "loss": 0.1949, + "epoch": 0.6, + "grad_norm": 0.5228129468718864, + "learning_rate": 7.35938946006364e-06, + "loss": 0.3332, "step": 13007 }, { - "epoch": 0.75, - "grad_norm": 0.29700206228395926, - "learning_rate": 3.1649542085295503e-06, - "loss": 0.1941, + "epoch": 0.6, + "grad_norm": 0.2788677511320499, + "learning_rate": 7.3579543823278894e-06, + "loss": 0.2006, "step": 13008 }, { - "epoch": 0.75, - "grad_norm": 0.4696627068472638, - "learning_rate": 3.163595960759063e-06, - "loss": 0.3623, + "epoch": 0.6, + "grad_norm": 0.3024163517332204, + "learning_rate": 7.35651936308448e-06, + "loss": 0.2165, "step": 13009 }, { - "epoch": 0.75, - "grad_norm": 0.47099670577215524, - "learning_rate": 3.162237949732957e-06, - "loss": 0.3376, + "epoch": 0.6, + "grad_norm": 0.5248515133621819, + "learning_rate": 7.355084402365188e-06, + "loss": 0.3477, "step": 13010 }, { - "epoch": 0.75, - "grad_norm": 0.2863904511902697, - "learning_rate": 3.1608801754982564e-06, - "loss": 0.1883, + "epoch": 0.6, + "grad_norm": 0.8199514678979798, + "learning_rate": 7.353649500201778e-06, + "loss": 0.3291, "step": 13011 }, { - "epoch": 0.75, - "grad_norm": 0.33279925478274053, - "learning_rate": 3.1595226381019817e-06, - "loss": 0.1941, + "epoch": 0.6, + "grad_norm": 0.34492604200175475, + "learning_rate": 7.352214656626017e-06, + "loss": 0.2775, "step": 13012 }, { - "epoch": 0.75, - "grad_norm": 0.5530973813912018, - "learning_rate": 3.15816533759114e-06, - "loss": 0.3521, + "epoch": 0.6, + "grad_norm": 0.36172735503415093, + "learning_rate": 7.350779871669669e-06, + "loss": 0.3159, "step": 13013 }, { - "epoch": 0.75, - "grad_norm": 0.23414052147715458, - "learning_rate": 3.1568082740127425e-06, - "loss": 0.2081, + "epoch": 0.6, + "grad_norm": 0.19480498299918567, + "learning_rate": 7.3493451453645035e-06, + "loss": 0.0898, "step": 13014 }, { - "epoch": 0.75, - "grad_norm": 0.9721095642376917, - "learning_rate": 3.1554514474137797e-06, - "loss": 0.5654, + "epoch": 0.6, + "grad_norm": 0.4153662119312081, + "learning_rate": 7.347910477742284e-06, + "loss": 0.2333, "step": 13015 }, { - "epoch": 0.75, - "grad_norm": 0.5991458451958498, - "learning_rate": 3.154094857841239e-06, - "loss": 0.397, + "epoch": 0.6, + "grad_norm": 0.5679187037766412, + "learning_rate": 7.346475868834768e-06, + "loss": 0.3993, "step": 13016 }, { - "epoch": 0.75, - "grad_norm": 0.31615058172984073, - "learning_rate": 3.152738505342097e-06, - "loss": 0.2359, + "epoch": 0.6, + "grad_norm": 0.36144328483528043, + "learning_rate": 7.345041318673717e-06, + "loss": 0.3082, "step": 13017 }, { - "epoch": 0.75, - "grad_norm": 0.36932228495972225, - "learning_rate": 3.1513823899633276e-06, - "loss": 0.2451, + "epoch": 0.6, + "grad_norm": 0.3649302728864425, + "learning_rate": 7.343606827290895e-06, + "loss": 0.2297, "step": 13018 }, { - "epoch": 0.75, - "grad_norm": 0.2985834736662441, - "learning_rate": 3.1500265117518926e-06, - "loss": 0.2106, + "epoch": 0.6, + "grad_norm": 1.1931353273253702, + "learning_rate": 7.342172394718057e-06, + "loss": 0.621, "step": 13019 }, { - "epoch": 0.75, - "grad_norm": 0.31142451629486045, - "learning_rate": 3.148670870754744e-06, - "loss": 0.2453, - "step": 13020 + "epoch": 0.6, + "grad_norm": 0.30483494760970925, + "learning_rate": 7.340738020986961e-06, + "loss": 0.2679, + "step": 13020 }, { - "epoch": 0.75, - "grad_norm": 0.7538480855399459, - "learning_rate": 3.1473154670188255e-06, - "loss": 0.3306, + "epoch": 0.6, + "grad_norm": 0.27629496027533135, + "learning_rate": 7.339303706129361e-06, + "loss": 0.0929, "step": 13021 }, { - "epoch": 0.75, - "grad_norm": 0.3409916931889605, - "learning_rate": 3.145960300591081e-06, - "loss": 0.2929, + "epoch": 0.6, + "grad_norm": 0.4027911985525034, + "learning_rate": 7.337869450177011e-06, + "loss": 0.3012, "step": 13022 }, { - "epoch": 0.75, - "grad_norm": 0.3780237167598833, - "learning_rate": 3.1446053715184367e-06, - "loss": 0.2681, + "epoch": 0.6, + "grad_norm": 0.6064316470883844, + "learning_rate": 7.336435253161667e-06, + "loss": 0.4015, "step": 13023 }, { - "epoch": 0.75, - "grad_norm": 0.5302115363553981, - "learning_rate": 3.1432506798478134e-06, - "loss": 0.2266, + "epoch": 0.6, + "grad_norm": 0.32408597405033895, + "learning_rate": 7.335001115115084e-06, + "loss": 0.2241, "step": 13024 }, { - "epoch": 0.75, - "grad_norm": 0.20758940749083343, - "learning_rate": 3.1418962256261256e-06, - "loss": 0.2004, + "epoch": 0.6, + "grad_norm": 0.43004749980329615, + "learning_rate": 7.333567036069003e-06, + "loss": 0.3052, "step": 13025 }, { - "epoch": 0.75, - "grad_norm": 0.3761950098958392, - "learning_rate": 3.1405420089002713e-06, - "loss": 0.2534, + "epoch": 0.6, + "grad_norm": 0.45510377943488306, + "learning_rate": 7.332133016055175e-06, + "loss": 0.2364, "step": 13026 }, { - "epoch": 0.75, - "grad_norm": 0.6177190474844302, - "learning_rate": 3.1391880297171574e-06, - "loss": 0.2827, + "epoch": 0.6, + "grad_norm": 0.2675529744256249, + "learning_rate": 7.330699055105354e-06, + "loss": 0.1364, "step": 13027 }, { - "epoch": 0.75, - "grad_norm": 0.686885988108865, - "learning_rate": 3.1378342881236657e-06, - "loss": 0.3982, + "epoch": 0.6, + "grad_norm": 0.32229859800169836, + "learning_rate": 7.329265153251285e-06, + "loss": 0.2885, "step": 13028 }, { - "epoch": 0.75, - "grad_norm": 0.42586124579859064, - "learning_rate": 3.1364807841666776e-06, - "loss": 0.2896, + "epoch": 0.6, + "grad_norm": 0.7200967573502258, + "learning_rate": 7.327831310524711e-06, + "loss": 0.3695, "step": 13029 }, { - "epoch": 0.75, - "grad_norm": 0.2834899923338788, - "learning_rate": 3.1351275178930616e-06, - "loss": 0.2551, + "epoch": 0.6, + "grad_norm": 0.5214081462208413, + "learning_rate": 7.326397526957374e-06, + "loss": 0.4025, "step": 13030 }, { - "epoch": 0.75, - "grad_norm": 0.17052564170743853, - "learning_rate": 3.133774489349688e-06, - "loss": 0.0897, + "epoch": 0.6, + "grad_norm": 0.4098261913459206, + "learning_rate": 7.32496380258102e-06, + "loss": 0.2278, "step": 13031 }, { - "epoch": 0.75, - "grad_norm": 0.3672592846418563, - "learning_rate": 3.1324216985834088e-06, - "loss": 0.2728, + "epoch": 0.6, + "grad_norm": 0.385961412976362, + "learning_rate": 7.323530137427391e-06, + "loss": 0.3014, "step": 13032 }, { - "epoch": 0.75, - "grad_norm": 0.41687149656453454, - "learning_rate": 3.1310691456410703e-06, - "loss": 0.3083, + "epoch": 0.6, + "grad_norm": 0.27119181186433017, + "learning_rate": 7.322096531528222e-06, + "loss": 0.155, "step": 13033 }, { - "epoch": 0.75, - "grad_norm": 0.5042667901150454, - "learning_rate": 3.1297168305695125e-06, - "loss": 0.2768, + "epoch": 0.6, + "grad_norm": 0.397251200496358, + "learning_rate": 7.320662984915258e-06, + "loss": 0.2392, "step": 13034 }, { - "epoch": 0.75, - "grad_norm": 0.3323112138388973, - "learning_rate": 3.128364753415565e-06, - "loss": 0.2576, + "epoch": 0.6, + "grad_norm": 0.9293902533522632, + "learning_rate": 7.31922949762023e-06, + "loss": 0.4463, "step": 13035 }, { - "epoch": 0.75, - "grad_norm": 1.2650359396451536, - "learning_rate": 3.127012914226051e-06, - "loss": 0.4992, + "epoch": 0.6, + "grad_norm": 0.33847977650002437, + "learning_rate": 7.317796069674878e-06, + "loss": 0.2888, "step": 13036 }, { - "epoch": 0.75, - "grad_norm": 0.256286588856045, - "learning_rate": 3.125661313047783e-06, - "loss": 0.2135, + "epoch": 0.6, + "grad_norm": 0.509630429493513, + "learning_rate": 7.316362701110938e-06, + "loss": 0.2358, "step": 13037 }, { - "epoch": 0.75, - "grad_norm": 0.30430831676033854, - "learning_rate": 3.1243099499275666e-06, - "loss": 0.2453, + "epoch": 0.6, + "grad_norm": 0.5152972863342689, + "learning_rate": 7.314929391960139e-06, + "loss": 0.2653, "step": 13038 }, { - "epoch": 0.75, - "grad_norm": 0.7948641796041248, - "learning_rate": 3.1229588249122034e-06, - "loss": 0.4419, + "epoch": 0.6, + "grad_norm": 0.3814692966666743, + "learning_rate": 7.3134961422542125e-06, + "loss": 0.2221, "step": 13039 }, { - "epoch": 0.75, - "grad_norm": 0.46946317228487927, - "learning_rate": 3.12160793804848e-06, - "loss": 0.2173, + "epoch": 0.6, + "grad_norm": 0.3128663462681856, + "learning_rate": 7.312062952024896e-06, + "loss": 0.2326, "step": 13040 }, { - "epoch": 0.75, - "grad_norm": 0.34864930416461026, - "learning_rate": 3.120257289383178e-06, - "loss": 0.2764, + "epoch": 0.6, + "grad_norm": 1.2265954788348128, + "learning_rate": 7.310629821303916e-06, + "loss": 0.4221, "step": 13041 }, { - "epoch": 0.75, - "grad_norm": 0.4017789633656141, - "learning_rate": 3.1189068789630672e-06, - "loss": 0.2943, + "epoch": 0.6, + "grad_norm": 0.8704467366316939, + "learning_rate": 7.309196750123001e-06, + "loss": 0.4628, "step": 13042 }, { - "epoch": 0.75, - "grad_norm": 0.4361244313994006, - "learning_rate": 3.117556706834919e-06, - "loss": 0.2576, + "epoch": 0.6, + "grad_norm": 0.38942320723238094, + "learning_rate": 7.30776373851387e-06, + "loss": 0.2862, "step": 13043 }, { - "epoch": 0.75, - "grad_norm": 0.727950689186441, - "learning_rate": 3.116206773045486e-06, - "loss": 0.212, + "epoch": 0.6, + "grad_norm": 0.38730732047861177, + "learning_rate": 7.30633078650826e-06, + "loss": 0.2501, "step": 13044 }, { - "epoch": 0.75, - "grad_norm": 0.33404541531367393, - "learning_rate": 3.1148570776415153e-06, - "loss": 0.2929, + "epoch": 0.6, + "grad_norm": 0.44193429372339893, + "learning_rate": 7.30489789413789e-06, + "loss": 0.2099, "step": 13045 }, { - "epoch": 0.75, - "grad_norm": 0.3258378879850681, - "learning_rate": 3.1135076206697456e-06, - "loss": 0.2331, + "epoch": 0.6, + "grad_norm": 0.8911154465731558, + "learning_rate": 7.303465061434483e-06, + "loss": 0.2892, "step": 13046 }, { - "epoch": 0.75, - "grad_norm": 0.3613632019699203, - "learning_rate": 3.112158402176915e-06, - "loss": 0.1676, + "epoch": 0.6, + "grad_norm": 1.2495053771731763, + "learning_rate": 7.3020322884297565e-06, + "loss": 0.4139, "step": 13047 }, { - "epoch": 0.75, - "grad_norm": 0.5140961520189742, - "learning_rate": 3.110809422209742e-06, - "loss": 0.353, + "epoch": 0.6, + "grad_norm": 0.3103308970579325, + "learning_rate": 7.300599575155441e-06, + "loss": 0.2681, "step": 13048 }, { - "epoch": 0.75, - "grad_norm": 0.34242223527610316, - "learning_rate": 3.109460680814942e-06, - "loss": 0.2875, + "epoch": 0.6, + "grad_norm": 0.2995619785578511, + "learning_rate": 7.299166921643246e-06, + "loss": 0.2209, "step": 13049 }, { - "epoch": 0.75, - "grad_norm": 0.4790141871496615, - "learning_rate": 3.108112178039222e-06, - "loss": 0.2166, + "epoch": 0.6, + "grad_norm": 1.2103027052171522, + "learning_rate": 7.297734327924892e-06, + "loss": 0.2877, "step": 13050 }, { - "epoch": 0.75, - "grad_norm": 0.5128694160336205, - "learning_rate": 3.106763913929278e-06, - "loss": 0.3545, + "epoch": 0.6, + "grad_norm": 0.6478452626427619, + "learning_rate": 7.296301794032097e-06, + "loss": 0.3283, "step": 13051 }, { - "epoch": 0.75, - "grad_norm": 0.25136946231934676, - "learning_rate": 3.1054158885318075e-06, - "loss": 0.1828, + "epoch": 0.6, + "grad_norm": 0.36476800654657787, + "learning_rate": 7.294869319996571e-06, + "loss": 0.2999, "step": 13052 }, { - "epoch": 0.75, - "grad_norm": 0.27467417356526297, - "learning_rate": 3.104068101893487e-06, - "loss": 0.232, + "epoch": 0.6, + "grad_norm": 0.39234819969857054, + "learning_rate": 7.2934369058500355e-06, + "loss": 0.2139, "step": 13053 }, { - "epoch": 0.75, - "grad_norm": 0.5452312702861496, - "learning_rate": 3.102720554060993e-06, - "loss": 0.3419, + "epoch": 0.6, + "grad_norm": 0.3845981612873682, + "learning_rate": 7.292004551624196e-06, + "loss": 0.2726, "step": 13054 }, { - "epoch": 0.75, - "grad_norm": 0.770701859262707, - "learning_rate": 3.101373245080985e-06, - "loss": 0.4232, + "epoch": 0.6, + "grad_norm": 0.4641540653785157, + "learning_rate": 7.290572257350768e-06, + "loss": 0.3076, "step": 13055 }, { - "epoch": 0.75, - "grad_norm": 0.3705818865576376, - "learning_rate": 3.100026175000128e-06, - "loss": 0.2777, + "epoch": 0.6, + "grad_norm": 0.3755342998765627, + "learning_rate": 7.289140023061452e-06, + "loss": 0.3029, "step": 13056 }, { - "epoch": 0.75, - "grad_norm": 0.3255903926001107, - "learning_rate": 3.0986793438650686e-06, - "loss": 0.2491, + "epoch": 0.6, + "grad_norm": 0.6510659785724403, + "learning_rate": 7.287707848787968e-06, + "loss": 0.3148, "step": 13057 }, { - "epoch": 0.75, - "grad_norm": 0.29833688651425433, - "learning_rate": 3.097332751722447e-06, - "loss": 0.265, + "epoch": 0.6, + "grad_norm": 0.31994317698089164, + "learning_rate": 7.286275734562019e-06, + "loss": 0.2631, "step": 13058 }, { - "epoch": 0.75, - "grad_norm": 0.280230038707804, - "learning_rate": 3.095986398618892e-06, - "loss": 0.1964, + "epoch": 0.6, + "grad_norm": 0.5030936424675906, + "learning_rate": 7.28484368041531e-06, + "loss": 0.3969, "step": 13059 }, { - "epoch": 0.75, - "grad_norm": 0.5305521770532992, - "learning_rate": 3.094640284601034e-06, - "loss": 0.2259, + "epoch": 0.6, + "grad_norm": 0.27252219634268565, + "learning_rate": 7.283411686379543e-06, + "loss": 0.1692, "step": 13060 }, { - "epoch": 0.75, - "grad_norm": 0.3592098971698116, - "learning_rate": 3.093294409715486e-06, - "loss": 0.3027, + "epoch": 0.6, + "grad_norm": 0.28007333258290534, + "learning_rate": 7.281979752486423e-06, + "loss": 0.2081, "step": 13061 }, { - "epoch": 0.75, - "grad_norm": 0.7451073577020624, - "learning_rate": 3.0919487740088563e-06, - "loss": 0.3797, + "epoch": 0.6, + "grad_norm": 0.8754471020163299, + "learning_rate": 7.280547878767654e-06, + "loss": 0.5395, "step": 13062 }, { - "epoch": 0.75, - "grad_norm": 0.3298793993154702, - "learning_rate": 3.090603377527742e-06, - "loss": 0.2196, + "epoch": 0.6, + "grad_norm": 0.4185104039530996, + "learning_rate": 7.279116065254932e-06, + "loss": 0.2721, "step": 13063 }, { - "epoch": 0.75, - "grad_norm": 0.23678147433076321, - "learning_rate": 3.0892582203187337e-06, - "loss": 0.184, + "epoch": 0.6, + "grad_norm": 0.31987776487947406, + "learning_rate": 7.277684311979959e-06, + "loss": 0.2725, "step": 13064 }, { - "epoch": 0.75, - "grad_norm": 0.36824332059382947, - "learning_rate": 3.087913302428419e-06, - "loss": 0.2789, + "epoch": 0.6, + "grad_norm": 1.1787505201373472, + "learning_rate": 7.276252618974428e-06, + "loss": 0.6367, "step": 13065 }, { - "epoch": 0.75, - "grad_norm": 0.3972684606203196, - "learning_rate": 3.0865686239033687e-06, - "loss": 0.2218, + "epoch": 0.6, + "grad_norm": 0.30342178765555655, + "learning_rate": 7.274820986270043e-06, + "loss": 0.2223, "step": 13066 }, { - "epoch": 0.75, - "grad_norm": 0.7531669824152212, - "learning_rate": 3.085224184790151e-06, - "loss": 0.3647, + "epoch": 0.6, + "grad_norm": 0.3576018603811819, + "learning_rate": 7.273389413898495e-06, + "loss": 0.2066, "step": 13067 }, { - "epoch": 0.75, - "grad_norm": 0.5677699683736633, - "learning_rate": 3.083879985135322e-06, - "loss": 0.3255, + "epoch": 0.6, + "grad_norm": 0.4193576528825786, + "learning_rate": 7.2719579018914756e-06, + "loss": 0.3127, "step": 13068 }, { - "epoch": 0.75, - "grad_norm": 0.25851571654009503, - "learning_rate": 3.082536024985431e-06, - "loss": 0.2738, + "epoch": 0.6, + "grad_norm": 0.6913150708142002, + "learning_rate": 7.270526450280675e-06, + "loss": 0.3989, "step": 13069 }, { - "epoch": 0.75, - "grad_norm": 0.9987692197544271, - "learning_rate": 3.0811923043870206e-06, - "loss": 0.4699, + "epoch": 0.6, + "grad_norm": 0.3915456066879617, + "learning_rate": 7.269095059097793e-06, + "loss": 0.2175, "step": 13070 }, { - "epoch": 0.75, - "grad_norm": 0.2454516431873957, - "learning_rate": 3.0798488233866196e-06, - "loss": 0.1554, + "epoch": 0.6, + "grad_norm": 1.3355515963637203, + "learning_rate": 7.267663728374517e-06, + "loss": 0.7556, "step": 13071 }, { - "epoch": 0.75, - "grad_norm": 0.3892847958002378, - "learning_rate": 3.0785055820307595e-06, - "loss": 0.2817, + "epoch": 0.6, + "grad_norm": 0.2810694827883178, + "learning_rate": 7.266232458142529e-06, + "loss": 0.2236, "step": 13072 }, { - "epoch": 0.75, - "grad_norm": 0.3326192433773473, - "learning_rate": 3.077162580365953e-06, - "loss": 0.2553, + "epoch": 0.6, + "grad_norm": 0.2636793126359312, + "learning_rate": 7.264801248433516e-06, + "loss": 0.1707, "step": 13073 }, { - "epoch": 0.75, - "grad_norm": 0.6536009982343787, - "learning_rate": 3.07581981843871e-06, - "loss": 0.3113, + "epoch": 0.6, + "grad_norm": 0.7198684750370714, + "learning_rate": 7.263370099279173e-06, + "loss": 0.4095, "step": 13074 }, { - "epoch": 0.75, - "grad_norm": 0.36056431727318683, - "learning_rate": 3.0744772962955283e-06, - "loss": 0.2842, + "epoch": 0.6, + "grad_norm": 0.3841803241461575, + "learning_rate": 7.261939010711175e-06, + "loss": 0.3199, "step": 13075 }, { - "epoch": 0.75, - "grad_norm": 0.26550009542834724, - "learning_rate": 3.0731350139828963e-06, - "loss": 0.2026, + "epoch": 0.6, + "grad_norm": 0.3353259106184257, + "learning_rate": 7.260507982761211e-06, + "loss": 0.2013, "step": 13076 }, { - "epoch": 0.75, - "grad_norm": 0.23690984887180597, - "learning_rate": 3.071792971547305e-06, - "loss": 0.2057, + "epoch": 0.6, + "grad_norm": 1.3137467478344542, + "learning_rate": 7.259077015460956e-06, + "loss": 0.876, "step": 13077 }, { - "epoch": 0.75, - "grad_norm": 0.5352459519460655, - "learning_rate": 3.0704511690352246e-06, - "loss": 0.3448, + "epoch": 0.6, + "grad_norm": 0.4609138630836044, + "learning_rate": 7.257646108842098e-06, + "loss": 0.2413, "step": 13078 }, { - "epoch": 0.75, - "grad_norm": 0.7568358868361343, - "learning_rate": 3.0691096064931226e-06, - "loss": 0.3506, + "epoch": 0.6, + "grad_norm": 0.2668068985743164, + "learning_rate": 7.25621526293631e-06, + "loss": 0.2116, "step": 13079 }, { - "epoch": 0.75, - "grad_norm": 0.8462214742386458, - "learning_rate": 3.0677682839674526e-06, - "loss": 0.2057, + "epoch": 0.6, + "grad_norm": 0.3546271827372109, + "learning_rate": 7.254784477775274e-06, + "loss": 0.2472, "step": 13080 }, { - "epoch": 0.75, - "grad_norm": 0.2447690890806674, - "learning_rate": 3.0664272015046735e-06, - "loss": 0.2524, + "epoch": 0.6, + "grad_norm": 0.8105485037615277, + "learning_rate": 7.253353753390662e-06, + "loss": 0.4661, "step": 13081 }, { - "epoch": 0.75, - "grad_norm": 1.2331150403392583, - "learning_rate": 3.0650863591512215e-06, - "loss": 0.6398, + "epoch": 0.6, + "grad_norm": 0.3470179315261528, + "learning_rate": 7.251923089814149e-06, + "loss": 0.2305, "step": 13082 }, { - "epoch": 0.75, - "grad_norm": 0.32002625401700546, - "learning_rate": 3.063745756953531e-06, - "loss": 0.1126, + "epoch": 0.6, + "grad_norm": 0.8192897524069326, + "learning_rate": 7.250492487077412e-06, + "loss": 0.5378, "step": 13083 }, { - "epoch": 0.75, - "grad_norm": 0.38562212812440144, - "learning_rate": 3.062405394958022e-06, - "loss": 0.2825, + "epoch": 0.6, + "grad_norm": 0.30696030657633144, + "learning_rate": 7.2490619452121226e-06, + "loss": 0.2671, "step": 13084 }, { - "epoch": 0.75, - "grad_norm": 0.4755195083621667, - "learning_rate": 3.061065273211121e-06, - "loss": 0.2858, + "epoch": 0.6, + "grad_norm": 0.34659660102493167, + "learning_rate": 7.247631464249949e-06, + "loss": 0.2684, "step": 13085 }, { - "epoch": 0.75, - "grad_norm": 0.5813518421794341, - "learning_rate": 3.0597253917592308e-06, - "loss": 0.1095, + "epoch": 0.6, + "grad_norm": 0.5281285543453738, + "learning_rate": 7.246201044222558e-06, + "loss": 0.2282, "step": 13086 }, { - "epoch": 0.75, - "grad_norm": 0.38714185646623833, - "learning_rate": 3.0583857506487514e-06, - "loss": 0.33, + "epoch": 0.6, + "grad_norm": 0.3364294968566659, + "learning_rate": 7.2447706851616265e-06, + "loss": 0.2867, "step": 13087 }, { - "epoch": 0.75, - "grad_norm": 1.2848101672311991, - "learning_rate": 3.057046349926075e-06, - "loss": 0.7524, + "epoch": 0.6, + "grad_norm": 0.39739548134382874, + "learning_rate": 7.243340387098816e-06, + "loss": 0.2646, "step": 13088 }, { - "epoch": 0.75, - "grad_norm": 0.2332084154602774, - "learning_rate": 3.0557071896375824e-06, - "loss": 0.1926, + "epoch": 0.6, + "grad_norm": 0.4898473015620399, + "learning_rate": 7.241910150065795e-06, + "loss": 0.2973, "step": 13089 }, { - "epoch": 0.75, - "grad_norm": 0.3755476958237447, - "learning_rate": 3.054368269829654e-06, - "loss": 0.2989, + "epoch": 0.6, + "grad_norm": 0.6350048478381867, + "learning_rate": 7.240479974094219e-06, + "loss": 0.335, "step": 13090 }, { - "epoch": 0.75, - "grad_norm": 0.46469134330943285, - "learning_rate": 3.0530295905486527e-06, - "loss": 0.2978, + "epoch": 0.6, + "grad_norm": 0.3911313223915429, + "learning_rate": 7.23904985921576e-06, + "loss": 0.3012, "step": 13091 }, { - "epoch": 0.75, - "grad_norm": 0.4148719623586969, - "learning_rate": 3.0516911518409387e-06, - "loss": 0.3245, + "epoch": 0.6, + "grad_norm": 0.3293102712141958, + "learning_rate": 7.2376198054620765e-06, + "loss": 0.3134, "step": 13092 }, { - "epoch": 0.75, - "grad_norm": 0.28103354751520443, - "learning_rate": 3.0503529537528585e-06, - "loss": 0.2178, + "epoch": 0.6, + "grad_norm": 0.2168469566508136, + "learning_rate": 7.236189812864828e-06, + "loss": 0.0713, "step": 13093 }, { - "epoch": 0.75, - "grad_norm": 1.1477225314022288, - "learning_rate": 3.04901499633076e-06, - "loss": 0.7313, + "epoch": 0.6, + "grad_norm": 0.37346744029676804, + "learning_rate": 7.234759881455673e-06, + "loss": 0.2949, "step": 13094 }, { - "epoch": 0.75, - "grad_norm": 0.6204403347702722, - "learning_rate": 3.047677279620973e-06, - "loss": 0.3122, + "epoch": 0.6, + "grad_norm": 0.5244964040940353, + "learning_rate": 7.233330011266266e-06, + "loss": 0.4032, "step": 13095 }, { - "epoch": 0.75, - "grad_norm": 0.3277311313482158, - "learning_rate": 3.0463398036698222e-06, - "loss": 0.2242, + "epoch": 0.6, + "grad_norm": 0.5124745625575606, + "learning_rate": 7.23190020232827e-06, + "loss": 0.2596, "step": 13096 }, { - "epoch": 0.75, - "grad_norm": 0.24065037007139628, - "learning_rate": 3.0450025685236227e-06, - "loss": 0.2127, + "epoch": 0.6, + "grad_norm": 0.3357911546907022, + "learning_rate": 7.230470454673335e-06, + "loss": 0.2764, "step": 13097 }, { - "epoch": 0.75, - "grad_norm": 1.3160438437281266, - "learning_rate": 3.043665574228688e-06, - "loss": 0.677, + "epoch": 0.6, + "grad_norm": 0.3595309202513587, + "learning_rate": 7.2290407683331154e-06, + "loss": 0.2458, "step": 13098 }, { - "epoch": 0.75, - "grad_norm": 0.305915681969301, - "learning_rate": 3.042328820831315e-06, - "loss": 0.2111, + "epoch": 0.6, + "grad_norm": 0.36232954794909916, + "learning_rate": 7.227611143339259e-06, + "loss": 0.2552, "step": 13099 }, { - "epoch": 0.75, - "grad_norm": 0.47529902211212993, - "learning_rate": 3.040992308377796e-06, - "loss": 0.3485, + "epoch": 0.6, + "grad_norm": 0.34473565927659955, + "learning_rate": 7.2261815797234235e-06, + "loss": 0.2813, "step": 13100 }, { - "epoch": 0.75, - "grad_norm": 0.4365889963384517, - "learning_rate": 3.0396560369144145e-06, - "loss": 0.3292, + "epoch": 0.6, + "grad_norm": 1.388602177037523, + "learning_rate": 7.224752077517253e-06, + "loss": 0.9302, "step": 13101 }, { - "epoch": 0.75, - "grad_norm": 0.3014203717649501, - "learning_rate": 3.038320006487445e-06, - "loss": 0.2112, + "epoch": 0.6, + "grad_norm": 0.6646202327224993, + "learning_rate": 7.223322636752397e-06, + "loss": 0.327, "step": 13102 }, { - "epoch": 0.75, - "grad_norm": 0.2850257890850647, - "learning_rate": 3.036984217143154e-06, - "loss": 0.1755, + "epoch": 0.6, + "grad_norm": 0.2888319502877227, + "learning_rate": 7.221893257460497e-06, + "loss": 0.2509, "step": 13103 }, { - "epoch": 0.75, - "grad_norm": 0.3429237306537405, - "learning_rate": 3.0356486689278e-06, - "loss": 0.3056, + "epoch": 0.6, + "grad_norm": 0.4848289437571862, + "learning_rate": 7.220463939673208e-06, + "loss": 0.3521, "step": 13104 }, { - "epoch": 0.75, - "grad_norm": 0.3024864803605085, - "learning_rate": 3.034313361887631e-06, - "loss": 0.242, + "epoch": 0.6, + "grad_norm": 0.3001986487997207, + "learning_rate": 7.219034683422168e-06, + "loss": 0.208, "step": 13105 }, { - "epoch": 0.75, - "grad_norm": 0.8906428296674906, - "learning_rate": 3.0329782960688926e-06, - "loss": 0.3281, + "epoch": 0.6, + "grad_norm": 0.3983782047441542, + "learning_rate": 7.21760548873902e-06, + "loss": 0.1903, "step": 13106 }, { - "epoch": 0.75, - "grad_norm": 0.5836982031317877, - "learning_rate": 3.031643471517817e-06, - "loss": 0.3389, + "epoch": 0.6, + "grad_norm": 0.3597121848378269, + "learning_rate": 7.216176355655402e-06, + "loss": 0.3058, "step": 13107 }, { - "epoch": 0.75, - "grad_norm": 0.3551032461668801, - "learning_rate": 3.0303088882806276e-06, - "loss": 0.2684, + "epoch": 0.6, + "grad_norm": 0.5297023645799521, + "learning_rate": 7.214747284202959e-06, + "loss": 0.3301, "step": 13108 }, { - "epoch": 0.75, - "grad_norm": 0.2158504718862656, - "learning_rate": 3.028974546403539e-06, - "loss": 0.1674, + "epoch": 0.6, + "grad_norm": 0.33270782231456103, + "learning_rate": 7.213318274413327e-06, + "loss": 0.1957, "step": 13109 }, { - "epoch": 0.75, - "grad_norm": 0.7293506132465545, - "learning_rate": 3.027640445932766e-06, - "loss": 0.4317, + "epoch": 0.6, + "grad_norm": 0.6024887159370106, + "learning_rate": 7.211889326318142e-06, + "loss": 0.374, "step": 13110 }, { - "epoch": 0.75, - "grad_norm": 0.36427266642415257, - "learning_rate": 3.0263065869145035e-06, - "loss": 0.26, + "epoch": 0.6, + "grad_norm": 0.24926937904326515, + "learning_rate": 7.210460439949041e-06, + "loss": 0.2284, "step": 13111 }, { - "epoch": 0.75, - "grad_norm": 0.5653448581871906, - "learning_rate": 3.024972969394944e-06, - "loss": 0.2791, + "epoch": 0.6, + "grad_norm": 0.33146743523244254, + "learning_rate": 7.2090316153376535e-06, + "loss": 0.1954, "step": 13112 }, { - "epoch": 0.75, - "grad_norm": 0.4739343898820716, - "learning_rate": 3.023639593420271e-06, - "loss": 0.3312, + "epoch": 0.6, + "grad_norm": 0.8843966720103299, + "learning_rate": 7.2076028525156195e-06, + "loss": 0.5543, "step": 13113 }, { - "epoch": 0.75, - "grad_norm": 0.36681901328564676, - "learning_rate": 3.022306459036656e-06, - "loss": 0.2701, + "epoch": 0.6, + "grad_norm": 0.6574004998628649, + "learning_rate": 7.206174151514567e-06, + "loss": 0.4081, "step": 13114 }, { - "epoch": 0.75, - "grad_norm": 0.29844337218234673, - "learning_rate": 3.0209735662902706e-06, - "loss": 0.1129, + "epoch": 0.6, + "grad_norm": 0.2850333959323658, + "learning_rate": 7.204745512366125e-06, + "loss": 0.2159, "step": 13115 }, { - "epoch": 0.75, - "grad_norm": 0.35840107821791856, - "learning_rate": 3.019640915227271e-06, - "loss": 0.2941, + "epoch": 0.6, + "grad_norm": 0.49514731601911993, + "learning_rate": 7.20331693510192e-06, + "loss": 0.3335, "step": 13116 }, { - "epoch": 0.75, - "grad_norm": 0.3529763050954908, - "learning_rate": 3.0183085058938068e-06, - "loss": 0.2667, + "epoch": 0.6, + "grad_norm": 0.3326449206352974, + "learning_rate": 7.201888419753587e-06, + "loss": 0.173, "step": 13117 }, { - "epoch": 0.75, - "grad_norm": 0.8960601500229003, - "learning_rate": 3.016976338336015e-06, - "loss": 0.3917, + "epoch": 0.6, + "grad_norm": 0.4281277722623995, + "learning_rate": 7.200459966352748e-06, + "loss": 0.2776, "step": 13118 }, { - "epoch": 0.75, - "grad_norm": 0.8699740633932406, - "learning_rate": 3.015644412600036e-06, - "loss": 0.2787, + "epoch": 0.6, + "grad_norm": 0.4053609795866975, + "learning_rate": 7.199031574931027e-06, + "loss": 0.2634, "step": 13119 }, { - "epoch": 0.75, - "grad_norm": 0.316723083079564, - "learning_rate": 3.0143127287319895e-06, - "loss": 0.2392, + "epoch": 0.6, + "grad_norm": 0.5841591522089046, + "learning_rate": 7.197603245520042e-06, + "loss": 0.3691, "step": 13120 }, { - "epoch": 0.75, - "grad_norm": 0.2449187149952753, - "learning_rate": 3.012981286777994e-06, - "loss": 0.2233, + "epoch": 0.6, + "grad_norm": 0.34590770275186555, + "learning_rate": 7.196174978151424e-06, + "loss": 0.2778, "step": 13121 }, { - "epoch": 0.75, - "grad_norm": 1.1979911990400887, - "learning_rate": 3.0116500867841525e-06, - "loss": 0.4074, + "epoch": 0.6, + "grad_norm": 0.982147838879129, + "learning_rate": 7.194746772856791e-06, + "loss": 0.3198, "step": 13122 }, { - "epoch": 0.75, - "grad_norm": 0.3560474829287622, - "learning_rate": 3.0103191287965715e-06, - "loss": 0.2798, + "epoch": 0.6, + "grad_norm": 0.27399008437980904, + "learning_rate": 7.19331862966776e-06, + "loss": 0.2459, "step": 13123 }, { - "epoch": 0.75, - "grad_norm": 0.9490243159614561, - "learning_rate": 3.008988412861338e-06, - "loss": 0.3888, + "epoch": 0.6, + "grad_norm": 0.44101589753002157, + "learning_rate": 7.191890548615949e-06, + "loss": 0.2392, "step": 13124 }, { - "epoch": 0.75, - "grad_norm": 0.34330007122672573, - "learning_rate": 3.007657939024535e-06, - "loss": 0.2458, + "epoch": 0.6, + "grad_norm": 0.5261414721553771, + "learning_rate": 7.190462529732973e-06, + "loss": 0.2864, "step": 13125 }, { - "epoch": 0.75, - "grad_norm": 0.31736090452794763, - "learning_rate": 3.006327707332235e-06, - "loss": 0.2582, + "epoch": 0.6, + "grad_norm": 0.8098704283372421, + "learning_rate": 7.189034573050451e-06, + "loss": 0.4568, "step": 13126 }, { - "epoch": 0.75, - "grad_norm": 0.4724601701873738, - "learning_rate": 3.004997717830508e-06, - "loss": 0.2203, + "epoch": 0.6, + "grad_norm": 0.3799932434086642, + "learning_rate": 7.187606678599994e-06, + "loss": 0.2568, "step": 13127 }, { - "epoch": 0.75, - "grad_norm": 0.2326895772207429, - "learning_rate": 3.003667970565409e-06, - "loss": 0.1987, + "epoch": 0.6, + "grad_norm": 0.35886065222063157, + "learning_rate": 7.1861788464132145e-06, + "loss": 0.2558, "step": 13128 }, { - "epoch": 0.75, - "grad_norm": 0.3605528897187082, - "learning_rate": 3.002338465582988e-06, - "loss": 0.2826, + "epoch": 0.6, + "grad_norm": 0.3113100544688846, + "learning_rate": 7.184751076521721e-06, + "loss": 0.1669, "step": 13129 }, { - "epoch": 0.75, - "grad_norm": 0.9734472280455081, - "learning_rate": 3.0010092029292835e-06, - "loss": 0.3781, + "epoch": 0.6, + "grad_norm": 0.577164657310814, + "learning_rate": 7.183323368957129e-06, + "loss": 0.304, "step": 13130 }, { - "epoch": 0.75, - "grad_norm": 1.0805909522215216, - "learning_rate": 2.9996801826503275e-06, - "loss": 0.5344, + "epoch": 0.6, + "grad_norm": 0.2732634728482863, + "learning_rate": 7.181895723751041e-06, + "loss": 0.29, "step": 13131 }, { - "epoch": 0.75, - "grad_norm": 0.2773513817656313, - "learning_rate": 2.9983514047921493e-06, - "loss": 0.1889, + "epoch": 0.6, + "grad_norm": 0.9889330476369504, + "learning_rate": 7.180468140935066e-06, + "loss": 0.3566, "step": 13132 }, { - "epoch": 0.75, - "grad_norm": 0.35829546695635645, - "learning_rate": 2.9970228694007598e-06, - "loss": 0.3274, + "epoch": 0.6, + "grad_norm": 0.4300639901054414, + "learning_rate": 7.179040620540805e-06, + "loss": 0.2936, "step": 13133 }, { - "epoch": 0.75, - "grad_norm": 0.4805023126371464, - "learning_rate": 2.995694576522168e-06, - "loss": 0.2938, + "epoch": 0.6, + "grad_norm": 0.5620263635845721, + "learning_rate": 7.17761316259987e-06, + "loss": 0.4056, "step": 13134 }, { - "epoch": 0.75, - "grad_norm": 0.32261178589923567, - "learning_rate": 2.9943665262023714e-06, - "loss": 0.2114, + "epoch": 0.6, + "grad_norm": 0.2699209338443238, + "learning_rate": 7.17618576714386e-06, + "loss": 0.1945, "step": 13135 }, { - "epoch": 0.75, - "grad_norm": 0.36506769087255786, - "learning_rate": 2.993038718487361e-06, - "loss": 0.3249, + "epoch": 0.6, + "grad_norm": 0.3214509676689571, + "learning_rate": 7.1747584342043764e-06, + "loss": 0.2184, "step": 13136 }, { - "epoch": 0.75, - "grad_norm": 0.3643438925093802, - "learning_rate": 2.991711153423118e-06, - "loss": 0.215, + "epoch": 0.6, + "grad_norm": 0.542614847663522, + "learning_rate": 7.173331163813012e-06, + "loss": 0.3215, "step": 13137 }, { - "epoch": 0.75, - "grad_norm": 0.33487426259480946, - "learning_rate": 2.9903838310556133e-06, - "loss": 0.1942, + "epoch": 0.6, + "grad_norm": 0.9677512418286224, + "learning_rate": 7.171903956001376e-06, + "loss": 0.5347, "step": 13138 }, { - "epoch": 0.75, - "grad_norm": 0.4488417837759212, - "learning_rate": 2.989056751430819e-06, - "loss": 0.2472, + "epoch": 0.6, + "grad_norm": 0.26087101500569765, + "learning_rate": 7.170476810801059e-06, + "loss": 0.2518, "step": 13139 }, { - "epoch": 0.75, - "grad_norm": 0.3362232531745725, - "learning_rate": 2.987729914594687e-06, - "loss": 0.3241, + "epoch": 0.6, + "grad_norm": 0.5661829148563724, + "learning_rate": 7.16904972824366e-06, + "loss": 0.3726, "step": 13140 }, { - "epoch": 0.76, - "grad_norm": 0.29966928249023517, - "learning_rate": 2.9864033205931675e-06, - "loss": 0.2217, + "epoch": 0.6, + "grad_norm": 0.4083842363928816, + "learning_rate": 7.16762270836077e-06, + "loss": 0.169, "step": 13141 }, { - "epoch": 0.76, - "grad_norm": 0.863128083275095, - "learning_rate": 2.9850769694721982e-06, - "loss": 0.5081, + "epoch": 0.6, + "grad_norm": 0.43453981416469795, + "learning_rate": 7.1661957511839845e-06, + "loss": 0.2907, "step": 13142 }, { - "epoch": 0.76, - "grad_norm": 0.39978155517146674, - "learning_rate": 2.9837508612777087e-06, - "loss": 0.2601, + "epoch": 0.6, + "grad_norm": 0.3351949044624958, + "learning_rate": 7.164768856744893e-06, + "loss": 0.3025, "step": 13143 }, { - "epoch": 0.76, - "grad_norm": 0.2719235906065331, - "learning_rate": 2.9824249960556294e-06, - "loss": 0.2627, + "epoch": 0.6, + "grad_norm": 0.8491619468982453, + "learning_rate": 7.163342025075088e-06, + "loss": 0.5269, "step": 13144 }, { - "epoch": 0.76, - "grad_norm": 0.5339971689860459, - "learning_rate": 2.9810993738518702e-06, - "loss": 0.2528, + "epoch": 0.6, + "grad_norm": 0.2561555983965833, + "learning_rate": 7.161915256206155e-06, + "loss": 0.0949, "step": 13145 }, { - "epoch": 0.76, - "grad_norm": 0.7843067034359767, - "learning_rate": 2.9797739947123383e-06, - "loss": 0.4102, + "epoch": 0.6, + "grad_norm": 0.42102602782236065, + "learning_rate": 7.1604885501696815e-06, + "loss": 0.3285, "step": 13146 }, { - "epoch": 0.76, - "grad_norm": 0.3894676336923343, - "learning_rate": 2.9784488586829272e-06, - "loss": 0.2814, + "epoch": 0.6, + "grad_norm": 0.3904455758843244, + "learning_rate": 7.159061906997257e-06, + "loss": 0.2974, "step": 13147 }, { - "epoch": 0.76, - "grad_norm": 0.2722933405316551, - "learning_rate": 2.9771239658095342e-06, - "loss": 0.249, + "epoch": 0.6, + "grad_norm": 0.4742208812482457, + "learning_rate": 7.157635326720462e-06, + "loss": 0.2292, "step": 13148 }, { - "epoch": 0.76, - "grad_norm": 0.2812989717087355, - "learning_rate": 2.975799316138035e-06, - "loss": 0.187, + "epoch": 0.6, + "grad_norm": 0.32667372418057317, + "learning_rate": 7.156208809370884e-06, + "loss": 0.2579, "step": 13149 }, { - "epoch": 0.76, - "grad_norm": 0.39908579003212574, - "learning_rate": 2.9744749097143046e-06, - "loss": 0.2796, + "epoch": 0.6, + "grad_norm": 1.2769451002380916, + "learning_rate": 7.1547823549800966e-06, + "loss": 0.7609, "step": 13150 }, { - "epoch": 0.76, - "grad_norm": 0.6090914983354809, - "learning_rate": 2.9731507465842025e-06, - "loss": 0.2293, + "epoch": 0.6, + "grad_norm": 0.2663221220738006, + "learning_rate": 7.15335596357969e-06, + "loss": 0.1776, "step": 13151 }, { - "epoch": 0.76, - "grad_norm": 0.4057610007364994, - "learning_rate": 2.97182682679359e-06, - "loss": 0.304, - "step": 13152 + "epoch": 0.6, + "grad_norm": 0.4315092470684004, + "learning_rate": 7.151929635201238e-06, + "loss": 0.301, + "step": 13152 }, { - "epoch": 0.76, - "grad_norm": 0.4026742727902014, - "learning_rate": 2.970503150388313e-06, - "loss": 0.3015, + "epoch": 0.6, + "grad_norm": 0.746033657499269, + "learning_rate": 7.15050336987632e-06, + "loss": 0.4079, "step": 13153 }, { - "epoch": 0.76, - "grad_norm": 0.29655660277141555, - "learning_rate": 2.96917971741421e-06, - "loss": 0.1956, + "epoch": 0.6, + "grad_norm": 0.49221890733294776, + "learning_rate": 7.149077167636514e-06, + "loss": 0.2274, "step": 13154 }, { - "epoch": 0.76, - "grad_norm": 0.4737388090525767, - "learning_rate": 2.9678565279171113e-06, - "loss": 0.2305, + "epoch": 0.6, + "grad_norm": 0.4089863733488511, + "learning_rate": 7.1476510285133824e-06, + "loss": 0.3227, "step": 13155 }, { - "epoch": 0.76, - "grad_norm": 0.25693414461923525, - "learning_rate": 2.9665335819428354e-06, - "loss": 0.2463, + "epoch": 0.6, + "grad_norm": 0.5196987957961049, + "learning_rate": 7.146224952538514e-06, + "loss": 0.4168, "step": 13156 }, { - "epoch": 0.76, - "grad_norm": 0.5539166843080455, - "learning_rate": 2.9652108795372016e-06, - "loss": 0.323, + "epoch": 0.6, + "grad_norm": 0.25876996963615884, + "learning_rate": 7.144798939743475e-06, + "loss": 0.1627, "step": 13157 }, { - "epoch": 0.76, - "grad_norm": 0.6583760802800553, - "learning_rate": 2.963888420746013e-06, - "loss": 0.2997, + "epoch": 0.6, + "grad_norm": 0.42257976480295784, + "learning_rate": 7.143372990159835e-06, + "loss": 0.2161, "step": 13158 }, { - "epoch": 0.76, - "grad_norm": 0.3184635760317434, - "learning_rate": 2.962566205615065e-06, - "loss": 0.2428, + "epoch": 0.6, + "grad_norm": 0.4163779030551335, + "learning_rate": 7.141947103819163e-06, + "loss": 0.3254, "step": 13159 }, { - "epoch": 0.76, - "grad_norm": 0.45352708893623567, - "learning_rate": 2.9612442341901448e-06, - "loss": 0.3603, + "epoch": 0.6, + "grad_norm": 0.5505659038364246, + "learning_rate": 7.140521280753028e-06, + "loss": 0.3328, "step": 13160 }, { - "epoch": 0.76, - "grad_norm": 0.22507968518561244, - "learning_rate": 2.9599225065170356e-06, - "loss": 0.1509, + "epoch": 0.6, + "grad_norm": 0.38824565133695726, + "learning_rate": 7.139095520992996e-06, + "loss": 0.2399, "step": 13161 }, { - "epoch": 0.76, - "grad_norm": 0.32301562997476935, - "learning_rate": 2.9586010226415085e-06, - "loss": 0.2435, + "epoch": 0.6, + "grad_norm": 0.4053966803143022, + "learning_rate": 7.137669824570631e-06, + "loss": 0.267, "step": 13162 }, { - "epoch": 0.76, - "grad_norm": 0.9494394947854288, - "learning_rate": 2.9572797826093256e-06, - "loss": 0.4594, + "epoch": 0.6, + "grad_norm": 0.329445484208078, + "learning_rate": 7.136244191517494e-06, + "loss": 0.2281, "step": 13163 }, { - "epoch": 0.76, - "grad_norm": 0.36294253708825114, - "learning_rate": 2.9559587864662365e-06, - "loss": 0.2539, + "epoch": 0.6, + "grad_norm": 0.33579671661220045, + "learning_rate": 7.134818621865157e-06, + "loss": 0.2038, "step": 13164 }, { - "epoch": 0.76, - "grad_norm": 0.6077925906601935, - "learning_rate": 2.9546380342579962e-06, - "loss": 0.3771, + "epoch": 0.6, + "grad_norm": 0.6889386419283249, + "learning_rate": 7.133393115645172e-06, + "loss": 0.3853, "step": 13165 }, { - "epoch": 0.76, - "grad_norm": 0.3826961493987988, - "learning_rate": 2.953317526030337e-06, - "loss": 0.3284, + "epoch": 0.6, + "grad_norm": 0.7665483240974319, + "learning_rate": 7.131967672889101e-06, + "loss": 0.3561, "step": 13166 }, { - "epoch": 0.76, - "grad_norm": 0.26520872395312056, - "learning_rate": 2.9519972618289894e-06, - "loss": 0.2096, + "epoch": 0.6, + "grad_norm": 0.26731556018905, + "learning_rate": 7.1305422936284965e-06, + "loss": 0.2294, "step": 13167 }, { - "epoch": 0.76, - "grad_norm": 0.3666621982903667, - "learning_rate": 2.9506772416996732e-06, - "loss": 0.1855, + "epoch": 0.6, + "grad_norm": 1.397854622643993, + "learning_rate": 7.129116977894924e-06, + "loss": 0.7462, "step": 13168 }, { - "epoch": 0.76, - "grad_norm": 0.3930965638463575, - "learning_rate": 2.9493574656881006e-06, - "loss": 0.29, + "epoch": 0.6, + "grad_norm": 0.25075032625970506, + "learning_rate": 7.1276917257199356e-06, + "loss": 0.1753, "step": 13169 }, { - "epoch": 0.76, - "grad_norm": 0.8084312110357765, - "learning_rate": 2.9480379338399757e-06, - "loss": 0.3901, + "epoch": 0.61, + "grad_norm": 0.35370386768326073, + "learning_rate": 7.126266537135082e-06, + "loss": 0.2772, "step": 13170 }, { - "epoch": 0.76, - "grad_norm": 0.3213576415786909, - "learning_rate": 2.9467186462009943e-06, - "loss": 0.1748, + "epoch": 0.61, + "grad_norm": 0.3674322599884452, + "learning_rate": 7.124841412171921e-06, + "loss": 0.2805, "step": 13171 }, { - "epoch": 0.76, - "grad_norm": 0.2886036323658359, - "learning_rate": 2.94539960281684e-06, - "loss": 0.2953, + "epoch": 0.61, + "grad_norm": 0.8088994855314864, + "learning_rate": 7.1234163508619954e-06, + "loss": 0.3802, "step": 13172 }, { - "epoch": 0.76, - "grad_norm": 0.44952697496270905, - "learning_rate": 2.944080803733197e-06, - "loss": 0.2489, + "epoch": 0.61, + "grad_norm": 0.44167816405217686, + "learning_rate": 7.121991353236861e-06, + "loss": 0.2998, "step": 13173 }, { - "epoch": 0.76, - "grad_norm": 0.24913717295057006, - "learning_rate": 2.942762248995733e-06, - "loss": 0.1602, + "epoch": 0.61, + "grad_norm": 0.4712836300934926, + "learning_rate": 7.1205664193280655e-06, + "loss": 0.3011, "step": 13174 }, { - "epoch": 0.76, - "grad_norm": 0.5314385966365966, - "learning_rate": 2.9414439386501082e-06, - "loss": 0.3329, + "epoch": 0.61, + "grad_norm": 0.2827029762469823, + "learning_rate": 7.119141549167154e-06, + "loss": 0.212, "step": 13175 }, { - "epoch": 0.76, - "grad_norm": 0.3573835288969693, - "learning_rate": 2.9401258727419723e-06, - "loss": 0.3176, + "epoch": 0.61, + "grad_norm": 0.2877340600060954, + "learning_rate": 7.11771674278567e-06, + "loss": 0.2162, "step": 13176 }, { - "epoch": 0.76, - "grad_norm": 0.31895803358139446, - "learning_rate": 2.938808051316978e-06, - "loss": 0.1936, + "epoch": 0.61, + "grad_norm": 0.6321776589843386, + "learning_rate": 7.116292000215161e-06, + "loss": 0.3229, "step": 13177 }, { - "epoch": 0.76, - "grad_norm": 0.5156004074194172, - "learning_rate": 2.937490474420758e-06, - "loss": 0.3777, + "epoch": 0.61, + "grad_norm": 0.3999298250012933, + "learning_rate": 7.114867321487169e-06, + "loss": 0.2838, "step": 13178 }, { - "epoch": 0.76, - "grad_norm": 0.3266568107674821, - "learning_rate": 2.9361731420989382e-06, - "loss": 0.1728, + "epoch": 0.61, + "grad_norm": 0.3425629391831343, + "learning_rate": 7.113442706633233e-06, + "loss": 0.2786, "step": 13179 }, { - "epoch": 0.76, - "grad_norm": 0.2562266580469743, - "learning_rate": 2.9348560543971383e-06, - "loss": 0.2493, + "epoch": 0.61, + "grad_norm": 0.9869457622589638, + "learning_rate": 7.112018155684888e-06, + "loss": 0.5356, "step": 13180 }, { - "epoch": 0.76, - "grad_norm": 0.4862114642170611, - "learning_rate": 2.933539211360966e-06, - "loss": 0.2606, + "epoch": 0.61, + "grad_norm": 0.4832428014949986, + "learning_rate": 7.110593668673682e-06, + "loss": 0.281, "step": 13181 }, { - "epoch": 0.76, - "grad_norm": 0.8436076426893645, - "learning_rate": 2.932222613036032e-06, - "loss": 0.4518, + "epoch": 0.61, + "grad_norm": 0.35335443390835214, + "learning_rate": 7.109169245631149e-06, + "loss": 0.3076, "step": 13182 }, { - "epoch": 0.76, - "grad_norm": 0.8093127081004269, - "learning_rate": 2.930906259467924e-06, - "loss": 0.3676, + "epoch": 0.61, + "grad_norm": 0.2950247985858858, + "learning_rate": 7.1077448865888236e-06, + "loss": 0.2472, "step": 13183 }, { - "epoch": 0.76, - "grad_norm": 0.24389401278509923, - "learning_rate": 2.9295901507022275e-06, - "loss": 0.2305, + "epoch": 0.61, + "grad_norm": 0.745021081118966, + "learning_rate": 7.106320591578237e-06, + "loss": 0.1374, "step": 13184 }, { - "epoch": 0.76, - "grad_norm": 0.5109376582936659, - "learning_rate": 2.928274286784517e-06, - "loss": 0.2928, + "epoch": 0.61, + "grad_norm": 0.35025670181340446, + "learning_rate": 7.10489636063092e-06, + "loss": 0.2634, "step": 13185 }, { - "epoch": 0.76, - "grad_norm": 0.6477468458115577, - "learning_rate": 2.9269586677603677e-06, - "loss": 0.3776, + "epoch": 0.61, + "grad_norm": 0.5484912969264011, + "learning_rate": 7.10347219377841e-06, + "loss": 0.4036, "step": 13186 }, { - "epoch": 0.76, - "grad_norm": 0.28910265146819936, - "learning_rate": 2.9256432936753354e-06, - "loss": 0.1995, + "epoch": 0.61, + "grad_norm": 0.37595529345835094, + "learning_rate": 7.102048091052235e-06, + "loss": 0.2506, "step": 13187 }, { - "epoch": 0.76, - "grad_norm": 0.350700075545137, - "learning_rate": 2.924328164574972e-06, - "loss": 0.3169, + "epoch": 0.61, + "grad_norm": 0.38514353561772385, + "learning_rate": 7.1006240524839225e-06, + "loss": 0.3255, "step": 13188 }, { - "epoch": 0.76, - "grad_norm": 0.7686568143004432, - "learning_rate": 2.923013280504816e-06, - "loss": 0.4884, + "epoch": 0.61, + "grad_norm": 0.2714241392126355, + "learning_rate": 7.099200078104995e-06, + "loss": 0.1407, "step": 13189 }, { - "epoch": 0.76, - "grad_norm": 0.3587136303027198, - "learning_rate": 2.9216986415104097e-06, - "loss": 0.2156, + "epoch": 0.61, + "grad_norm": 0.3403392197765243, + "learning_rate": 7.097776167946986e-06, + "loss": 0.2412, "step": 13190 }, { - "epoch": 0.76, - "grad_norm": 0.7374740757701493, - "learning_rate": 2.9203842476372747e-06, - "loss": 0.3926, + "epoch": 0.61, + "grad_norm": 0.37813807431389335, + "learning_rate": 7.096352322041414e-06, + "loss": 0.2753, "step": 13191 }, { - "epoch": 0.76, - "grad_norm": 0.2838512447883044, - "learning_rate": 2.9190700989309285e-06, - "loss": 0.2682, + "epoch": 0.61, + "grad_norm": 1.4587232457790513, + "learning_rate": 7.094928540419804e-06, + "loss": 0.7653, "step": 13192 }, { - "epoch": 0.76, - "grad_norm": 0.290622311729339, - "learning_rate": 2.9177561954368804e-06, - "loss": 0.2525, + "epoch": 0.61, + "grad_norm": 0.689564675335786, + "learning_rate": 7.093504823113674e-06, + "loss": 0.4063, "step": 13193 }, { - "epoch": 0.76, - "grad_norm": 0.5359851867679772, - "learning_rate": 2.916442537200629e-06, - "loss": 0.1526, + "epoch": 0.61, + "grad_norm": 0.34403370475593453, + "learning_rate": 7.0920811701545474e-06, + "loss": 0.1856, "step": 13194 }, { - "epoch": 0.76, - "grad_norm": 0.37080513662796255, - "learning_rate": 2.9151291242676692e-06, - "loss": 0.2884, + "epoch": 0.61, + "grad_norm": 0.2742343126233367, + "learning_rate": 7.090657581573941e-06, + "loss": 0.2229, "step": 13195 }, { - "epoch": 0.76, - "grad_norm": 0.30793946557758933, - "learning_rate": 2.9138159566834834e-06, - "loss": 0.2773, + "epoch": 0.61, + "grad_norm": 0.9116441205076875, + "learning_rate": 7.089234057403373e-06, + "loss": 0.4537, "step": 13196 }, { - "epoch": 0.76, - "grad_norm": 0.8039012607707117, - "learning_rate": 2.912503034493547e-06, - "loss": 0.2705, + "epoch": 0.61, + "grad_norm": 0.3561583406500787, + "learning_rate": 7.087810597674351e-06, + "loss": 0.2109, "step": 13197 }, { - "epoch": 0.76, - "grad_norm": 0.5254037076250225, - "learning_rate": 2.911190357743322e-06, - "loss": 0.3449, + "epoch": 0.61, + "grad_norm": 0.3944290660533092, + "learning_rate": 7.0863872024184025e-06, + "loss": 0.3168, "step": 13198 }, { - "epoch": 0.76, - "grad_norm": 0.387990735879651, - "learning_rate": 2.909877926478274e-06, - "loss": 0.3074, + "epoch": 0.61, + "grad_norm": 0.966833204298043, + "learning_rate": 7.084963871667032e-06, + "loss": 0.375, "step": 13199 }, { - "epoch": 0.76, - "grad_norm": 0.22161277090349127, - "learning_rate": 2.9085657407438485e-06, - "loss": 0.1872, + "epoch": 0.61, + "grad_norm": 0.3449343416853031, + "learning_rate": 7.0835406054517505e-06, + "loss": 0.2194, "step": 13200 }, { - "epoch": 0.76, - "grad_norm": 0.5582834510824618, - "learning_rate": 2.9072538005854855e-06, - "loss": 0.3282, + "epoch": 0.61, + "grad_norm": 0.3581267708625302, + "learning_rate": 7.082117403804074e-06, + "loss": 0.1945, "step": 13201 }, { - "epoch": 0.76, - "grad_norm": 0.40851544103697307, - "learning_rate": 2.9059421060486193e-06, - "loss": 0.3156, + "epoch": 0.61, + "grad_norm": 0.38050943772535994, + "learning_rate": 7.080694266755497e-06, + "loss": 0.3193, "step": 13202 }, { - "epoch": 0.76, - "grad_norm": 0.35263245779401653, - "learning_rate": 2.904630657178672e-06, - "loss": 0.2656, + "epoch": 0.61, + "grad_norm": 0.2937967540633318, + "learning_rate": 7.0792711943375406e-06, + "loss": 0.2005, "step": 13203 }, { - "epoch": 0.76, - "grad_norm": 0.6917076508307758, - "learning_rate": 2.903319454021061e-06, - "loss": 0.3521, + "epoch": 0.61, + "grad_norm": 1.218071774332793, + "learning_rate": 7.077848186581705e-06, + "loss": 0.4616, "step": 13204 }, { - "epoch": 0.76, - "grad_norm": 0.34619152479873244, - "learning_rate": 2.9020084966211913e-06, - "loss": 0.2813, + "epoch": 0.61, + "grad_norm": 0.8635243969689705, + "learning_rate": 7.076425243519494e-06, + "loss": 0.4277, "step": 13205 }, { - "epoch": 0.76, - "grad_norm": 0.2818608501668898, - "learning_rate": 2.900697785024459e-06, - "loss": 0.1854, + "epoch": 0.61, + "grad_norm": 0.36676066954631537, + "learning_rate": 7.0750023651824086e-06, + "loss": 0.2501, "step": 13206 }, { - "epoch": 0.76, - "grad_norm": 0.2772904606806348, - "learning_rate": 2.89938731927626e-06, - "loss": 0.2237, + "epoch": 0.61, + "grad_norm": 0.34594653848202417, + "learning_rate": 7.073579551601952e-06, + "loss": 0.2301, "step": 13207 }, { - "epoch": 0.76, - "grad_norm": 0.3456570326485675, - "learning_rate": 2.8980770994219743e-06, - "loss": 0.2759, + "epoch": 0.61, + "grad_norm": 0.3322400983187017, + "learning_rate": 7.072156802809626e-06, + "loss": 0.2164, "step": 13208 }, { - "epoch": 0.76, - "grad_norm": 0.6722228995159082, - "learning_rate": 2.8967671255069717e-06, - "loss": 0.3808, + "epoch": 0.61, + "grad_norm": 0.3444796556916447, + "learning_rate": 7.070734118836925e-06, + "loss": 0.2752, "step": 13209 }, { - "epoch": 0.76, - "grad_norm": 0.5544282846657669, - "learning_rate": 2.8954573975766156e-06, - "loss": 0.1777, + "epoch": 0.61, + "grad_norm": 0.596934595652926, + "learning_rate": 7.069311499715344e-06, + "loss": 0.2818, "step": 13210 }, { - "epoch": 0.76, - "grad_norm": 0.33520447957453975, - "learning_rate": 2.8941479156762675e-06, - "loss": 0.2917, + "epoch": 0.61, + "grad_norm": 0.5772617227785102, + "learning_rate": 7.067888945476386e-06, + "loss": 0.3564, "step": 13211 }, { - "epoch": 0.76, - "grad_norm": 0.23424922457156772, - "learning_rate": 2.892838679851272e-06, - "loss": 0.1917, + "epoch": 0.61, + "grad_norm": 0.40755943259517474, + "learning_rate": 7.066466456151541e-06, + "loss": 0.2915, "step": 13212 }, { - "epoch": 0.76, - "grad_norm": 0.3888829363306553, - "learning_rate": 2.891529690146966e-06, - "loss": 0.2193, + "epoch": 0.61, + "grad_norm": 0.23153860773606208, + "learning_rate": 7.065044031772301e-06, + "loss": 0.1289, "step": 13213 }, { - "epoch": 0.76, - "grad_norm": 0.39275318593597286, - "learning_rate": 2.8902209466086794e-06, - "loss": 0.3079, + "epoch": 0.61, + "grad_norm": 0.38833186655934354, + "learning_rate": 7.063621672370157e-06, + "loss": 0.2997, "step": 13214 }, { - "epoch": 0.76, - "grad_norm": 0.6074206260942908, - "learning_rate": 2.8889124492817377e-06, - "loss": 0.3461, + "epoch": 0.61, + "grad_norm": 0.3918095409614361, + "learning_rate": 7.062199377976595e-06, + "loss": 0.2507, "step": 13215 }, { - "epoch": 0.76, - "grad_norm": 0.4121332734309223, - "learning_rate": 2.887604198211453e-06, - "loss": 0.2094, + "epoch": 0.61, + "grad_norm": 0.46972123010949823, + "learning_rate": 7.06077714862311e-06, + "loss": 0.2875, "step": 13216 }, { - "epoch": 0.76, - "grad_norm": 0.4086837165721845, - "learning_rate": 2.886296193443129e-06, - "loss": 0.2966, + "epoch": 0.61, + "grad_norm": 1.1703365226642282, + "learning_rate": 7.059354984341189e-06, + "loss": 0.5967, "step": 13217 }, { - "epoch": 0.76, - "grad_norm": 0.31241744718481923, - "learning_rate": 2.8849884350220614e-06, - "loss": 0.2266, + "epoch": 0.61, + "grad_norm": 0.36156675160805446, + "learning_rate": 7.057932885162312e-06, + "loss": 0.2521, "step": 13218 }, { - "epoch": 0.76, - "grad_norm": 0.3312463483694761, - "learning_rate": 2.883680922993536e-06, - "loss": 0.2859, + "epoch": 0.61, + "grad_norm": 0.4192115205045195, + "learning_rate": 7.0565108511179635e-06, + "loss": 0.326, "step": 13219 }, { - "epoch": 0.76, - "grad_norm": 0.3602296698578734, - "learning_rate": 2.882373657402836e-06, - "loss": 0.2236, + "epoch": 0.61, + "grad_norm": 0.23077055175410224, + "learning_rate": 7.055088882239631e-06, + "loss": 0.098, "step": 13220 }, { - "epoch": 0.76, - "grad_norm": 0.6188669949771659, - "learning_rate": 2.8810666382952314e-06, - "loss": 0.3728, + "epoch": 0.61, + "grad_norm": 0.34818336750425904, + "learning_rate": 7.053666978558791e-06, + "loss": 0.2569, "step": 13221 }, { - "epoch": 0.76, - "grad_norm": 1.3765443199588423, - "learning_rate": 2.879759865715982e-06, - "loss": 0.5777, + "epoch": 0.61, + "grad_norm": 0.47756221390437303, + "learning_rate": 7.052245140106926e-06, + "loss": 0.3416, "step": 13222 }, { - "epoch": 0.76, - "grad_norm": 0.24491184844570574, - "learning_rate": 2.87845333971034e-06, - "loss": 0.2082, + "epoch": 0.61, + "grad_norm": 0.48399079563732117, + "learning_rate": 7.050823366915509e-06, + "loss": 0.2714, "step": 13223 }, { - "epoch": 0.76, - "grad_norm": 0.31949829603425767, - "learning_rate": 2.877147060323555e-06, - "loss": 0.2706, + "epoch": 0.61, + "grad_norm": 0.3856934684886553, + "learning_rate": 7.049401659016023e-06, + "loss": 0.2611, "step": 13224 }, { - "epoch": 0.76, - "grad_norm": 0.8517481211049055, - "learning_rate": 2.875841027600862e-06, - "loss": 0.4386, + "epoch": 0.61, + "grad_norm": 1.2912415538386326, + "learning_rate": 7.04798001643994e-06, + "loss": 0.561, "step": 13225 }, { - "epoch": 0.76, - "grad_norm": 0.2866011692470818, - "learning_rate": 2.8745352415874872e-06, - "loss": 0.2287, + "epoch": 0.61, + "grad_norm": 0.21113464758715922, + "learning_rate": 7.0465584392187345e-06, + "loss": 0.1796, "step": 13226 }, { - "epoch": 0.76, - "grad_norm": 0.3154714117522481, - "learning_rate": 2.873229702328647e-06, - "loss": 0.251, + "epoch": 0.61, + "grad_norm": 0.3489388705698056, + "learning_rate": 7.045136927383874e-06, + "loss": 0.2675, "step": 13227 }, { - "epoch": 0.76, - "grad_norm": 0.47557481422269005, - "learning_rate": 2.8719244098695597e-06, - "loss": 0.3048, + "epoch": 0.61, + "grad_norm": 0.7320256661961151, + "learning_rate": 7.043715480966839e-06, + "loss": 0.4193, "step": 13228 }, { - "epoch": 0.76, - "grad_norm": 0.28793328291945003, - "learning_rate": 2.8706193642554237e-06, - "loss": 0.1866, + "epoch": 0.61, + "grad_norm": 0.44714865521354447, + "learning_rate": 7.042294099999096e-06, + "loss": 0.2883, "step": 13229 }, { - "epoch": 0.76, - "grad_norm": 1.1838012367861193, - "learning_rate": 2.8693145655314327e-06, - "loss": 0.6212, + "epoch": 0.61, + "grad_norm": 0.41776620968393785, + "learning_rate": 7.040872784512107e-06, + "loss": 0.2908, "step": 13230 }, { - "epoch": 0.76, - "grad_norm": 0.3389412390749291, - "learning_rate": 2.86801001374277e-06, - "loss": 0.284, + "epoch": 0.61, + "grad_norm": 0.3838355888205946, + "learning_rate": 7.039451534537345e-06, + "loss": 0.3194, "step": 13231 }, { - "epoch": 0.76, - "grad_norm": 0.3654452826305494, - "learning_rate": 2.8667057089346127e-06, - "loss": 0.2889, + "epoch": 0.61, + "grad_norm": 0.4201385036254127, + "learning_rate": 7.0380303501062675e-06, + "loss": 0.3288, "step": 13232 }, { - "epoch": 0.76, - "grad_norm": 0.14945398474158922, - "learning_rate": 2.865401651152132e-06, - "loss": 0.0971, + "epoch": 0.61, + "grad_norm": 0.3902013936431746, + "learning_rate": 7.036609231250346e-06, + "loss": 0.1943, "step": 13233 }, { - "epoch": 0.76, - "grad_norm": 0.8662144879164575, - "learning_rate": 2.864097840440485e-06, - "loss": 0.3408, + "epoch": 0.61, + "grad_norm": 0.31405309228437783, + "learning_rate": 7.035188178001042e-06, + "loss": 0.2969, "step": 13234 }, { - "epoch": 0.76, - "grad_norm": 0.33595790732355874, - "learning_rate": 2.8627942768448234e-06, - "loss": 0.2453, + "epoch": 0.61, + "grad_norm": 0.4443853799132858, + "learning_rate": 7.033767190389814e-06, + "loss": 0.2871, "step": 13235 }, { - "epoch": 0.76, - "grad_norm": 0.34996311712384964, - "learning_rate": 2.861490960410289e-06, - "loss": 0.2498, + "epoch": 0.61, + "grad_norm": 0.3375871779312744, + "learning_rate": 7.032346268448118e-06, + "loss": 0.1564, "step": 13236 }, { - "epoch": 0.76, - "grad_norm": 0.4919206950943628, - "learning_rate": 2.8601878911820168e-06, - "loss": 0.3234, + "epoch": 0.61, + "grad_norm": 0.5793804394085306, + "learning_rate": 7.030925412207419e-06, + "loss": 0.3442, "step": 13237 }, { - "epoch": 0.76, - "grad_norm": 0.3451700720073584, - "learning_rate": 2.8588850692051296e-06, - "loss": 0.2718, + "epoch": 0.61, + "grad_norm": 0.350426211631335, + "learning_rate": 7.029504621699169e-06, + "loss": 0.3084, "step": 13238 }, { - "epoch": 0.76, - "grad_norm": 0.1936044629355527, - "learning_rate": 2.857582494524742e-06, - "loss": 0.1738, + "epoch": 0.61, + "grad_norm": 0.3327481888262889, + "learning_rate": 7.028083896954825e-06, + "loss": 0.2254, "step": 13239 }, { - "epoch": 0.76, - "grad_norm": 1.196637382273675, - "learning_rate": 2.8562801671859697e-06, - "loss": 0.5177, + "epoch": 0.61, + "grad_norm": 0.8765198477215321, + "learning_rate": 7.026663238005835e-06, + "loss": 0.5121, "step": 13240 }, { - "epoch": 0.76, - "grad_norm": 0.3138689667270411, - "learning_rate": 2.8549780872339073e-06, - "loss": 0.2528, + "epoch": 0.61, + "grad_norm": 0.2864856508660541, + "learning_rate": 7.025242644883659e-06, + "loss": 0.1913, "step": 13241 }, { - "epoch": 0.76, - "grad_norm": 0.7183731018697147, - "learning_rate": 2.8536762547136464e-06, - "loss": 0.3813, + "epoch": 0.61, + "grad_norm": 0.26518996452147225, + "learning_rate": 7.023822117619742e-06, + "loss": 0.2307, "step": 13242 }, { - "epoch": 0.76, - "grad_norm": 0.3149057673584101, - "learning_rate": 2.85237466967027e-06, - "loss": 0.2506, + "epoch": 0.61, + "grad_norm": 1.1541231292746266, + "learning_rate": 7.022401656245535e-06, + "loss": 0.5283, "step": 13243 }, { - "epoch": 0.76, - "grad_norm": 0.31630528529321505, - "learning_rate": 2.851073332148848e-06, - "loss": 0.2517, + "epoch": 0.61, + "grad_norm": 0.7745811158052468, + "learning_rate": 7.020981260792484e-06, + "loss": 0.395, "step": 13244 }, { - "epoch": 0.76, - "grad_norm": 0.4393966081472135, - "learning_rate": 2.849772242194453e-06, - "loss": 0.2605, + "epoch": 0.61, + "grad_norm": 0.4020693223690801, + "learning_rate": 7.019560931292038e-06, + "loss": 0.2943, "step": 13245 }, { - "epoch": 0.76, - "grad_norm": 0.3954421837785276, - "learning_rate": 2.8484713998521364e-06, - "loss": 0.1002, + "epoch": 0.61, + "grad_norm": 0.3073561538290007, + "learning_rate": 7.018140667775642e-06, + "loss": 0.2411, "step": 13246 }, { - "epoch": 0.76, - "grad_norm": 0.25876846616548765, - "learning_rate": 2.847170805166949e-06, - "loss": 0.2416, + "epoch": 0.61, + "grad_norm": 0.2996032766957375, + "learning_rate": 7.016720470274736e-06, + "loss": 0.1915, "step": 13247 }, { - "epoch": 0.76, - "grad_norm": 0.47757103299646225, - "learning_rate": 2.8458704581839247e-06, - "loss": 0.3262, + "epoch": 0.61, + "grad_norm": 0.616236488826098, + "learning_rate": 7.015300338820766e-06, + "loss": 0.2799, "step": 13248 }, { - "epoch": 0.76, - "grad_norm": 0.9076141858611277, - "learning_rate": 2.844570358948103e-06, - "loss": 0.3371, + "epoch": 0.61, + "grad_norm": 0.4419147198751879, + "learning_rate": 7.013880273445164e-06, + "loss": 0.2458, "step": 13249 }, { - "epoch": 0.76, - "grad_norm": 0.3070964861074088, - "learning_rate": 2.843270507504502e-06, - "loss": 0.2474, + "epoch": 0.61, + "grad_norm": 0.4794183373129288, + "learning_rate": 7.0124602741793794e-06, + "loss": 0.3022, "step": 13250 }, { - "epoch": 0.76, - "grad_norm": 0.37473028523472246, - "learning_rate": 2.8419709038981345e-06, - "loss": 0.2904, + "epoch": 0.61, + "grad_norm": 0.6108087650643558, + "learning_rate": 7.0110403410548445e-06, + "loss": 0.2847, "step": 13251 }, { - "epoch": 0.76, - "grad_norm": 0.1318432605062941, - "learning_rate": 2.840671548174004e-06, - "loss": 0.0704, + "epoch": 0.61, + "grad_norm": 0.4075570218469928, + "learning_rate": 7.009620474102995e-06, + "loss": 0.2697, "step": 13252 }, { - "epoch": 0.76, - "grad_norm": 0.39967919119913864, - "learning_rate": 2.8393724403771137e-06, - "loss": 0.2531, + "epoch": 0.61, + "grad_norm": 0.27818188541525096, + "learning_rate": 7.0082006733552646e-06, + "loss": 0.213, "step": 13253 }, { - "epoch": 0.76, - "grad_norm": 1.0046598432534393, - "learning_rate": 2.8380735805524475e-06, - "loss": 0.3925, + "epoch": 0.61, + "grad_norm": 0.3947332515512756, + "learning_rate": 7.00678093884309e-06, + "loss": 0.2698, "step": 13254 }, { - "epoch": 0.76, - "grad_norm": 0.3777098827872954, - "learning_rate": 2.8367749687449853e-06, - "loss": 0.334, + "epoch": 0.61, + "grad_norm": 0.3848307493098855, + "learning_rate": 7.005361270597899e-06, + "loss": 0.2506, "step": 13255 }, { - "epoch": 0.76, - "grad_norm": 0.3147497604198872, - "learning_rate": 2.835476604999695e-06, - "loss": 0.1906, + "epoch": 0.61, + "grad_norm": 0.9683634804008813, + "learning_rate": 7.003941668651125e-06, + "loss": 0.4545, "step": 13256 }, { - "epoch": 0.76, - "grad_norm": 0.4294056542690755, - "learning_rate": 2.8341784893615443e-06, - "loss": 0.2689, + "epoch": 0.61, + "grad_norm": 0.6032631413756341, + "learning_rate": 7.0025221330341905e-06, + "loss": 0.314, "step": 13257 }, { - "epoch": 0.76, - "grad_norm": 0.43842450172880576, - "learning_rate": 2.8328806218754855e-06, - "loss": 0.2174, + "epoch": 0.61, + "grad_norm": 0.2902885745894445, + "learning_rate": 7.001102663778533e-06, + "loss": 0.2605, "step": 13258 }, { - "epoch": 0.76, - "grad_norm": 0.25153714414036416, - "learning_rate": 2.831583002586461e-06, - "loss": 0.2005, + "epoch": 0.61, + "grad_norm": 0.23296764618120724, + "learning_rate": 6.99968326091557e-06, + "loss": 0.0996, "step": 13259 }, { - "epoch": 0.76, - "grad_norm": 0.5546683602382102, - "learning_rate": 2.83028563153941e-06, - "loss": 0.3272, + "epoch": 0.61, + "grad_norm": 0.4564579128740561, + "learning_rate": 6.998263924476727e-06, + "loss": 0.2967, "step": 13260 }, { - "epoch": 0.76, - "grad_norm": 1.3431270553899364, - "learning_rate": 2.8289885087792557e-06, - "loss": 0.6881, + "epoch": 0.61, + "grad_norm": 0.4949144013324813, + "learning_rate": 6.996844654493429e-06, + "loss": 0.3361, "step": 13261 }, { - "epoch": 0.76, - "grad_norm": 0.2682221801490365, - "learning_rate": 2.827691634350924e-06, - "loss": 0.2002, + "epoch": 0.61, + "grad_norm": 0.33530624190760694, + "learning_rate": 6.9954254509970905e-06, + "loss": 0.2731, "step": 13262 }, { - "epoch": 0.76, - "grad_norm": 0.3618717416925984, - "learning_rate": 2.826395008299323e-06, - "loss": 0.29, + "epoch": 0.61, + "grad_norm": 0.4162845160538152, + "learning_rate": 6.994006314019141e-06, + "loss": 0.294, "step": 13263 }, { - "epoch": 0.76, - "grad_norm": 0.3130034952304038, - "learning_rate": 2.8250986306693553e-06, - "loss": 0.1539, + "epoch": 0.61, + "grad_norm": 0.5807107999290808, + "learning_rate": 6.992587243590996e-06, + "loss": 0.399, "step": 13264 }, { - "epoch": 0.76, - "grad_norm": 0.3121843996017311, - "learning_rate": 2.823802501505909e-06, - "loss": 0.2223, + "epoch": 0.61, + "grad_norm": 0.2301457138065459, + "learning_rate": 6.991168239744068e-06, + "loss": 0.1513, "step": 13265 }, { - "epoch": 0.76, - "grad_norm": 0.7111069282689343, - "learning_rate": 2.8225066208538765e-06, - "loss": 0.4252, + "epoch": 0.61, + "grad_norm": 0.4996920700842822, + "learning_rate": 6.989749302509776e-06, + "loss": 0.3578, "step": 13266 }, { - "epoch": 0.76, - "grad_norm": 0.3587339369972459, - "learning_rate": 2.821210988758132e-06, - "loss": 0.3243, + "epoch": 0.61, + "grad_norm": 0.39319750080528976, + "learning_rate": 6.988330431919534e-06, + "loss": 0.2866, "step": 13267 }, { - "epoch": 0.76, - "grad_norm": 0.31053986252393684, - "learning_rate": 2.8199156052635412e-06, - "loss": 0.2829, + "epoch": 0.61, + "grad_norm": 0.8177512581954891, + "learning_rate": 6.986911628004753e-06, + "loss": 0.3096, "step": 13268 }, { - "epoch": 0.76, - "grad_norm": 1.3607437392178636, - "learning_rate": 2.8186204704149643e-06, - "loss": 0.2799, + "epoch": 0.61, + "grad_norm": 0.6587424620188117, + "learning_rate": 6.985492890796846e-06, + "loss": 0.3595, "step": 13269 }, { - "epoch": 0.76, - "grad_norm": 0.259545723920443, - "learning_rate": 2.817325584257252e-06, - "loss": 0.2053, + "epoch": 0.61, + "grad_norm": 0.29956451407525114, + "learning_rate": 6.984074220327217e-06, + "loss": 0.271, "step": 13270 }, { - "epoch": 0.76, - "grad_norm": 0.36841259735356224, - "learning_rate": 2.8160309468352465e-06, - "loss": 0.3017, + "epoch": 0.61, + "grad_norm": 0.45141752724755735, + "learning_rate": 6.982655616627282e-06, + "loss": 0.2302, "step": 13271 }, { - "epoch": 0.76, - "grad_norm": 0.38905719201812056, - "learning_rate": 2.81473655819378e-06, - "loss": 0.2515, + "epoch": 0.61, + "grad_norm": 0.5980308970073374, + "learning_rate": 6.981237079728442e-06, + "loss": 0.2204, "step": 13272 }, { - "epoch": 0.76, - "grad_norm": 1.0396985241949528, - "learning_rate": 2.813442418377674e-06, - "loss": 0.7514, + "epoch": 0.61, + "grad_norm": 0.3110007732456863, + "learning_rate": 6.979818609662104e-06, + "loss": 0.2402, "step": 13273 }, { - "epoch": 0.76, - "grad_norm": 0.39836820205825924, - "learning_rate": 2.812148527431752e-06, - "loss": 0.2428, + "epoch": 0.61, + "grad_norm": 0.36906311448099194, + "learning_rate": 6.978400206459668e-06, + "loss": 0.2866, "step": 13274 }, { - "epoch": 0.76, - "grad_norm": 0.305449725807176, - "learning_rate": 2.8108548854008166e-06, - "loss": 0.242, + "epoch": 0.61, + "grad_norm": 0.4611047151940982, + "learning_rate": 6.976981870152541e-06, + "loss": 0.1835, "step": 13275 }, { - "epoch": 0.76, - "grad_norm": 0.44681122919529676, - "learning_rate": 2.8095614923296676e-06, - "loss": 0.2259, + "epoch": 0.61, + "grad_norm": 0.4321954179240608, + "learning_rate": 6.975563600772126e-06, + "loss": 0.3195, "step": 13276 }, { - "epoch": 0.76, - "grad_norm": 0.3816614658218707, - "learning_rate": 2.8082683482630912e-06, - "loss": 0.2823, + "epoch": 0.61, + "grad_norm": 0.47446984703925477, + "learning_rate": 6.974145398349814e-06, + "loss": 0.2624, "step": 13277 }, { - "epoch": 0.76, - "grad_norm": 0.2946987437084205, - "learning_rate": 2.806975453245877e-06, - "loss": 0.2036, + "epoch": 0.61, + "grad_norm": 0.31696536885933657, + "learning_rate": 6.972727262917008e-06, + "loss": 0.2256, "step": 13278 }, { - "epoch": 0.76, - "grad_norm": 0.49942919812183445, - "learning_rate": 2.8056828073227925e-06, - "loss": 0.3453, + "epoch": 0.61, + "grad_norm": 0.4382728838794006, + "learning_rate": 6.971309194505098e-06, + "loss": 0.2948, "step": 13279 }, { - "epoch": 0.76, - "grad_norm": 0.40850999837166446, - "learning_rate": 2.804390410538603e-06, - "loss": 0.3256, + "epoch": 0.61, + "grad_norm": 0.5031982280657289, + "learning_rate": 6.969891193145489e-06, + "loss": 0.3118, "step": 13280 }, { - "epoch": 0.76, - "grad_norm": 0.5696317559539408, - "learning_rate": 2.803098262938062e-06, - "loss": 0.3323, + "epoch": 0.61, + "grad_norm": 0.2992853722339447, + "learning_rate": 6.968473258869566e-06, + "loss": 0.2177, "step": 13281 }, { - "epoch": 0.76, - "grad_norm": 0.4290707314341114, - "learning_rate": 2.801806364565921e-06, - "loss": 0.2762, + "epoch": 0.61, + "grad_norm": 0.37623675287915465, + "learning_rate": 6.9670553917087246e-06, + "loss": 0.2867, "step": 13282 }, { - "epoch": 0.76, - "grad_norm": 0.26581716980266745, - "learning_rate": 2.8005147154669166e-06, - "loss": 0.2421, - "step": 13283 + "epoch": 0.61, + "grad_norm": 0.7987918096646779, + "learning_rate": 6.9656375916943505e-06, + "loss": 0.4614, + "step": 13283 }, { - "epoch": 0.76, - "grad_norm": 0.299496740652704, - "learning_rate": 2.7992233156857784e-06, - "loss": 0.1897, + "epoch": 0.61, + "grad_norm": 0.6711136549712603, + "learning_rate": 6.964219858857839e-06, + "loss": 0.343, "step": 13284 }, { - "epoch": 0.76, - "grad_norm": 1.0457263308117586, - "learning_rate": 2.7979321652672266e-06, - "loss": 0.5193, + "epoch": 0.61, + "grad_norm": 0.27241354568695225, + "learning_rate": 6.962802193230574e-06, + "loss": 0.1578, "step": 13285 }, { - "epoch": 0.76, - "grad_norm": 0.3313512064073587, - "learning_rate": 2.79664126425597e-06, - "loss": 0.2552, + "epoch": 0.61, + "grad_norm": 0.29399386377274095, + "learning_rate": 6.961384594843939e-06, + "loss": 0.2704, "step": 13286 }, { - "epoch": 0.76, - "grad_norm": 0.3630065685033784, - "learning_rate": 2.795350612696721e-06, - "loss": 0.276, + "epoch": 0.61, + "grad_norm": 1.3735825512963098, + "learning_rate": 6.959967063729321e-06, + "loss": 0.8432, "step": 13287 }, { - "epoch": 0.76, - "grad_norm": 0.6448201348910892, - "learning_rate": 2.794060210634171e-06, - "loss": 0.2862, + "epoch": 0.61, + "grad_norm": 0.319565969108888, + "learning_rate": 6.958549599918103e-06, + "loss": 0.2058, "step": 13288 }, { - "epoch": 0.76, - "grad_norm": 0.6742071686259051, - "learning_rate": 2.7927700581130046e-06, - "loss": 0.3216, + "epoch": 0.61, + "grad_norm": 0.4863582146485311, + "learning_rate": 6.957132203441666e-06, + "loss": 0.3224, "step": 13289 }, { - "epoch": 0.76, - "grad_norm": 0.2330554908362521, - "learning_rate": 2.7914801551778994e-06, - "loss": 0.2058, + "epoch": 0.61, + "grad_norm": 0.4234258689341677, + "learning_rate": 6.955714874331388e-06, + "loss": 0.3172, "step": 13290 }, { - "epoch": 0.76, - "grad_norm": 0.31746453612708525, - "learning_rate": 2.7901905018735287e-06, - "loss": 0.2666, + "epoch": 0.61, + "grad_norm": 0.32153606237302085, + "learning_rate": 6.95429761261865e-06, + "loss": 0.1663, "step": 13291 }, { - "epoch": 0.76, - "grad_norm": 0.6076098034746343, - "learning_rate": 2.7889010982445508e-06, - "loss": 0.3607, + "epoch": 0.61, + "grad_norm": 0.49008023733572054, + "learning_rate": 6.952880418334822e-06, + "loss": 0.2735, "step": 13292 }, { - "epoch": 0.76, - "grad_norm": 0.3652652449404285, - "learning_rate": 2.7876119443356177e-06, - "loss": 0.3044, + "epoch": 0.61, + "grad_norm": 0.4141911271186079, + "learning_rate": 6.951463291511289e-06, + "loss": 0.3184, "step": 13293 }, { - "epoch": 0.76, - "grad_norm": 0.441685084264975, - "learning_rate": 2.7863230401913698e-06, - "loss": 0.3271, + "epoch": 0.61, + "grad_norm": 0.2905234190982208, + "learning_rate": 6.9500462321794214e-06, + "loss": 0.2211, "step": 13294 }, { - "epoch": 0.76, - "grad_norm": 0.28157585778291394, - "learning_rate": 2.7850343858564487e-06, - "loss": 0.1958, + "epoch": 0.61, + "grad_norm": 0.7947607701200965, + "learning_rate": 6.948629240370592e-06, + "loss": 0.394, "step": 13295 }, { - "epoch": 0.76, - "grad_norm": 0.2538193604013508, - "learning_rate": 2.7837459813754765e-06, - "loss": 0.1991, + "epoch": 0.61, + "grad_norm": 0.5644798627247649, + "learning_rate": 6.947212316116164e-06, + "loss": 0.3577, "step": 13296 }, { - "epoch": 0.76, - "grad_norm": 1.1707178137167724, - "learning_rate": 2.782457826793069e-06, - "loss": 0.6914, + "epoch": 0.61, + "grad_norm": 0.37898938366177054, + "learning_rate": 6.945795459447517e-06, + "loss": 0.2992, "step": 13297 }, { - "epoch": 0.76, - "grad_norm": 0.4149696440496968, - "learning_rate": 2.781169922153838e-06, - "loss": 0.1949, + "epoch": 0.61, + "grad_norm": 0.22212031680559113, + "learning_rate": 6.944378670396016e-06, + "loss": 0.1703, "step": 13298 }, { - "epoch": 0.76, - "grad_norm": 0.2861185610671488, - "learning_rate": 2.7798822675023795e-06, - "loss": 0.2823, + "epoch": 0.61, + "grad_norm": 0.7539079032897804, + "learning_rate": 6.942961948993026e-06, + "loss": 0.4961, "step": 13299 }, { - "epoch": 0.76, - "grad_norm": 0.7178983508096588, - "learning_rate": 2.7785948628832904e-06, - "loss": 0.3858, + "epoch": 0.61, + "grad_norm": 0.37266457123857255, + "learning_rate": 6.941545295269909e-06, + "loss": 0.27, "step": 13300 }, { - "epoch": 0.76, - "grad_norm": 0.4103924325196549, - "learning_rate": 2.7773077083411502e-06, - "loss": 0.1833, + "epoch": 0.61, + "grad_norm": 0.38438273520229865, + "learning_rate": 6.940128709258034e-06, + "loss": 0.2841, "step": 13301 }, { - "epoch": 0.76, - "grad_norm": 0.244686535238677, - "learning_rate": 2.776020803920533e-06, - "loss": 0.1998, + "epoch": 0.61, + "grad_norm": 0.7446501517362355, + "learning_rate": 6.93871219098876e-06, + "loss": 0.4113, "step": 13302 }, { - "epoch": 0.76, - "grad_norm": 0.355293710352619, - "learning_rate": 2.774734149666005e-06, - "loss": 0.3177, + "epoch": 0.61, + "grad_norm": 0.3879636463843152, + "learning_rate": 6.937295740493448e-06, + "loss": 0.2651, "step": 13303 }, { - "epoch": 0.76, - "grad_norm": 0.2862677955158507, - "learning_rate": 2.773447745622123e-06, - "loss": 0.2068, + "epoch": 0.61, + "grad_norm": 0.2521511669756142, + "learning_rate": 6.935879357803453e-06, + "loss": 0.124, "step": 13304 }, { - "epoch": 0.76, - "grad_norm": 0.6852767845171588, - "learning_rate": 2.7721615918334355e-06, - "loss": 0.4025, + "epoch": 0.61, + "grad_norm": 0.378772770535479, + "learning_rate": 6.9344630429501395e-06, + "loss": 0.3198, "step": 13305 }, { - "epoch": 0.76, - "grad_norm": 0.36885958549727094, - "learning_rate": 2.7708756883444776e-06, - "loss": 0.297, + "epoch": 0.61, + "grad_norm": 0.374211412125898, + "learning_rate": 6.9330467959648594e-06, + "loss": 0.2653, "step": 13306 }, { - "epoch": 0.76, - "grad_norm": 0.5868284645412355, - "learning_rate": 2.7695900351997864e-06, - "loss": 0.3458, + "epoch": 0.61, + "grad_norm": 0.6539568689989769, + "learning_rate": 6.931630616878967e-06, + "loss": 0.3937, "step": 13307 }, { - "epoch": 0.76, - "grad_norm": 0.27339180895296544, - "learning_rate": 2.7683046324438822e-06, - "loss": 0.1766, + "epoch": 0.61, + "grad_norm": 1.1114677756185225, + "learning_rate": 6.930214505723816e-06, + "loss": 0.2607, "step": 13308 }, { - "epoch": 0.76, - "grad_norm": 0.36055577891955803, - "learning_rate": 2.7670194801212768e-06, - "loss": 0.2466, + "epoch": 0.61, + "grad_norm": 0.2747438841453921, + "learning_rate": 6.928798462530751e-06, + "loss": 0.2372, "step": 13309 }, { - "epoch": 0.76, - "grad_norm": 0.41817920256675367, - "learning_rate": 2.7657345782764765e-06, - "loss": 0.3027, + "epoch": 0.61, + "grad_norm": 0.36853898722400935, + "learning_rate": 6.927382487331134e-06, + "loss": 0.282, "step": 13310 }, { - "epoch": 0.76, - "grad_norm": 0.2847934263341108, - "learning_rate": 2.7644499269539728e-06, - "loss": 0.242, + "epoch": 0.61, + "grad_norm": 0.2567367038577743, + "learning_rate": 6.925966580156307e-06, + "loss": 0.1401, "step": 13311 }, { - "epoch": 0.76, - "grad_norm": 0.9291208397012233, - "learning_rate": 2.7631655261982605e-06, - "loss": 0.4407, + "epoch": 0.61, + "grad_norm": 0.3988918828921113, + "learning_rate": 6.924550741037616e-06, + "loss": 0.2814, "step": 13312 }, { - "epoch": 0.76, - "grad_norm": 0.7871185563291784, - "learning_rate": 2.7618813760538145e-06, - "loss": 0.3596, + "epoch": 0.61, + "grad_norm": 0.48703845415777197, + "learning_rate": 6.923134970006406e-06, + "loss": 0.3532, "step": 13313 }, { - "epoch": 0.76, - "grad_norm": 0.2395530050943581, - "learning_rate": 2.7605974765651057e-06, - "loss": 0.2066, + "epoch": 0.61, + "grad_norm": 0.4037358482857964, + "learning_rate": 6.921719267094025e-06, + "loss": 0.2336, "step": 13314 }, { - "epoch": 0.77, - "grad_norm": 0.43243853087168727, - "learning_rate": 2.759313827776592e-06, - "loss": 0.2994, + "epoch": 0.61, + "grad_norm": 0.3785049680740953, + "learning_rate": 6.9203036323318125e-06, + "loss": 0.2726, "step": 13315 }, { - "epoch": 0.77, - "grad_norm": 0.499861531513214, - "learning_rate": 2.758030429732732e-06, - "loss": 0.3264, + "epoch": 0.61, + "grad_norm": 0.26611801316774814, + "learning_rate": 6.918888065751109e-06, + "loss": 0.1463, "step": 13316 }, { - "epoch": 0.77, - "grad_norm": 0.4019932945242539, - "learning_rate": 2.7567472824779663e-06, - "loss": 0.2627, + "epoch": 0.61, + "grad_norm": 0.3106606135247731, + "learning_rate": 6.917472567383252e-06, + "loss": 0.2535, "step": 13317 }, { - "epoch": 0.77, - "grad_norm": 0.35582583706274945, - "learning_rate": 2.7554643860567308e-06, - "loss": 0.2845, + "epoch": 0.61, + "grad_norm": 0.3930960677460471, + "learning_rate": 6.916057137259584e-06, + "loss": 0.2738, "step": 13318 }, { - "epoch": 0.77, - "grad_norm": 0.5481213981526601, - "learning_rate": 2.75418174051345e-06, - "loss": 0.2395, + "epoch": 0.61, + "grad_norm": 0.6919710512102732, + "learning_rate": 6.914641775411437e-06, + "loss": 0.423, "step": 13319 }, { - "epoch": 0.77, - "grad_norm": 0.41611532347043945, - "learning_rate": 2.7528993458925457e-06, - "loss": 0.3025, + "epoch": 0.61, + "grad_norm": 1.1562742740568586, + "learning_rate": 6.913226481870149e-06, + "loss": 0.5072, "step": 13320 }, { - "epoch": 0.77, - "grad_norm": 0.3799576843178077, - "learning_rate": 2.751617202238427e-06, - "loss": 0.1868, + "epoch": 0.61, + "grad_norm": 0.312359362151665, + "learning_rate": 6.911811256667052e-06, + "loss": 0.1829, "step": 13321 }, { - "epoch": 0.77, - "grad_norm": 0.2908916015181389, - "learning_rate": 2.750335309595491e-06, - "loss": 0.2636, + "epoch": 0.61, + "grad_norm": 0.25539123242755546, + "learning_rate": 6.910396099833471e-06, + "loss": 0.2171, "step": 13322 }, { - "epoch": 0.77, - "grad_norm": 0.43567171245253916, - "learning_rate": 2.7490536680081325e-06, - "loss": 0.2941, + "epoch": 0.61, + "grad_norm": 0.754773230804693, + "learning_rate": 6.908981011400751e-06, + "loss": 0.4655, "step": 13323 }, { - "epoch": 0.77, - "grad_norm": 0.4234412711982623, - "learning_rate": 2.7477722775207303e-06, - "loss": 0.1738, + "epoch": 0.61, + "grad_norm": 0.31727478570742296, + "learning_rate": 6.907565991400209e-06, + "loss": 0.2291, "step": 13324 }, { - "epoch": 0.77, - "grad_norm": 0.5576484292752899, - "learning_rate": 2.746491138177666e-06, - "loss": 0.2826, + "epoch": 0.61, + "grad_norm": 0.4241917678490096, + "learning_rate": 6.906151039863176e-06, + "loss": 0.3272, "step": 13325 }, { - "epoch": 0.77, - "grad_norm": 0.3903393434473755, - "learning_rate": 2.745210250023301e-06, - "loss": 0.3149, + "epoch": 0.61, + "grad_norm": 1.4163860398847878, + "learning_rate": 6.904736156820973e-06, + "loss": 0.6315, "step": 13326 }, { - "epoch": 0.77, - "grad_norm": 0.34900301912911175, - "learning_rate": 2.743929613101993e-06, - "loss": 0.2765, + "epoch": 0.61, + "grad_norm": 0.3015662923899679, + "learning_rate": 6.90332134230493e-06, + "loss": 0.2068, "step": 13327 }, { - "epoch": 0.77, - "grad_norm": 0.8648744675226305, - "learning_rate": 2.7426492274580883e-06, - "loss": 0.3671, + "epoch": 0.61, + "grad_norm": 0.5550024492076097, + "learning_rate": 6.90190659634637e-06, + "loss": 0.3304, "step": 13328 }, { - "epoch": 0.77, - "grad_norm": 0.3467725051148224, - "learning_rate": 2.7413690931359316e-06, - "loss": 0.3054, + "epoch": 0.61, + "grad_norm": 0.35426135694143507, + "learning_rate": 6.900491918976613e-06, + "loss": 0.3051, "step": 13329 }, { - "epoch": 0.77, - "grad_norm": 0.22494972847791722, - "learning_rate": 2.7400892101798504e-06, - "loss": 0.2063, + "epoch": 0.61, + "grad_norm": 0.35336340830976487, + "learning_rate": 6.899077310226973e-06, + "loss": 0.2399, "step": 13330 }, { - "epoch": 0.77, - "grad_norm": 0.7541721363633993, - "learning_rate": 2.7388095786341682e-06, - "loss": 0.1194, + "epoch": 0.61, + "grad_norm": 1.1594833771305124, + "learning_rate": 6.897662770128776e-06, + "loss": 0.6752, "step": 13331 }, { - "epoch": 0.77, - "grad_norm": 0.36934648699727984, - "learning_rate": 2.7375301985431947e-06, - "loss": 0.2673, + "epoch": 0.61, + "grad_norm": 0.5402441592250512, + "learning_rate": 6.896248298713336e-06, + "loss": 0.2621, "step": 13332 }, { - "epoch": 0.77, - "grad_norm": 0.8084421696547506, - "learning_rate": 2.736251069951241e-06, - "loss": 0.4386, + "epoch": 0.61, + "grad_norm": 0.356529365027446, + "learning_rate": 6.894833896011967e-06, + "loss": 0.2491, "step": 13333 }, { - "epoch": 0.77, - "grad_norm": 0.29545160567203277, - "learning_rate": 2.734972192902601e-06, - "loss": 0.2284, + "epoch": 0.61, + "grad_norm": 0.3579151918867826, + "learning_rate": 6.89341956205598e-06, + "loss": 0.2446, "step": 13334 }, { - "epoch": 0.77, - "grad_norm": 0.36633179660452714, - "learning_rate": 2.733693567441561e-06, - "loss": 0.3288, + "epoch": 0.61, + "grad_norm": 0.6122528508336325, + "learning_rate": 6.892005296876692e-06, + "loss": 0.4095, "step": 13335 }, { - "epoch": 0.77, - "grad_norm": 0.24642775837098382, - "learning_rate": 2.732415193612401e-06, - "loss": 0.1621, + "epoch": 0.61, + "grad_norm": 0.4369749360113549, + "learning_rate": 6.89059110050541e-06, + "loss": 0.2708, "step": 13336 }, { - "epoch": 0.77, - "grad_norm": 0.5111131791764394, - "learning_rate": 2.73113707145939e-06, - "loss": 0.1199, + "epoch": 0.61, + "grad_norm": 0.3203649719722516, + "learning_rate": 6.889176972973446e-06, + "loss": 0.2483, "step": 13337 }, { - "epoch": 0.77, - "grad_norm": 0.37770395293264974, - "learning_rate": 2.7298592010267887e-06, - "loss": 0.3027, + "epoch": 0.61, + "grad_norm": 0.3055920849384419, + "learning_rate": 6.887762914312104e-06, + "loss": 0.1876, "step": 13338 }, { - "epoch": 0.77, - "grad_norm": 0.3905628714529268, - "learning_rate": 2.7285815823588513e-06, - "loss": 0.2994, + "epoch": 0.61, + "grad_norm": 0.4274910206833901, + "learning_rate": 6.886348924552689e-06, + "loss": 0.2454, "step": 13339 }, { - "epoch": 0.77, - "grad_norm": 0.5113595561516364, - "learning_rate": 2.7273042154998188e-06, - "loss": 0.2487, + "epoch": 0.61, + "grad_norm": 0.5573080972518692, + "learning_rate": 6.884935003726512e-06, + "loss": 0.2827, "step": 13340 }, { - "epoch": 0.77, - "grad_norm": 0.40329210177460667, - "learning_rate": 2.726027100493931e-06, - "loss": 0.3088, + "epoch": 0.61, + "grad_norm": 0.3833045861421186, + "learning_rate": 6.883521151864872e-06, + "loss": 0.309, "step": 13341 }, { - "epoch": 0.77, - "grad_norm": 0.2326471863617773, - "learning_rate": 2.724750237385412e-06, - "loss": 0.2229, + "epoch": 0.61, + "grad_norm": 0.4328125702819307, + "learning_rate": 6.882107368999069e-06, + "loss": 0.2749, "step": 13342 }, { - "epoch": 0.77, - "grad_norm": 1.0041471856553938, - "learning_rate": 2.723473626218479e-06, - "loss": 0.4367, + "epoch": 0.61, + "grad_norm": 0.2032869647555686, + "learning_rate": 6.8806936551604e-06, + "loss": 0.1465, "step": 13343 }, { - "epoch": 0.77, - "grad_norm": 0.3045613574682275, - "learning_rate": 2.722197267037339e-06, - "loss": 0.1968, + "epoch": 0.61, + "grad_norm": 1.215086187797997, + "learning_rate": 6.879280010380169e-06, + "loss": 0.5807, "step": 13344 }, { - "epoch": 0.77, - "grad_norm": 0.5701953085284389, - "learning_rate": 2.7209211598861975e-06, - "loss": 0.3772, + "epoch": 0.61, + "grad_norm": 0.31897184246136756, + "learning_rate": 6.877866434689673e-06, + "loss": 0.2533, "step": 13345 }, { - "epoch": 0.77, - "grad_norm": 0.3510620371677265, - "learning_rate": 2.719645304809242e-06, - "loss": 0.294, + "epoch": 0.61, + "grad_norm": 0.4718437950136446, + "learning_rate": 6.8764529281202055e-06, + "loss": 0.3094, "step": 13346 }, { - "epoch": 0.77, - "grad_norm": 0.2921036385897743, - "learning_rate": 2.7183697018506584e-06, - "loss": 0.2072, + "epoch": 0.61, + "grad_norm": 0.5962997410687163, + "learning_rate": 6.875039490703055e-06, + "loss": 0.3362, "step": 13347 }, { - "epoch": 0.77, - "grad_norm": 0.2440216161862196, - "learning_rate": 2.7170943510546177e-06, - "loss": 0.1587, + "epoch": 0.61, + "grad_norm": 0.36627603212143084, + "learning_rate": 6.8736261224695225e-06, + "loss": 0.2738, "step": 13348 }, { - "epoch": 0.77, - "grad_norm": 0.822130467598231, - "learning_rate": 2.715819252465284e-06, - "loss": 0.5094, + "epoch": 0.61, + "grad_norm": 0.4965533502840909, + "learning_rate": 6.872212823450895e-06, + "loss": 0.3562, "step": 13349 }, { - "epoch": 0.77, - "grad_norm": 0.24005749395345125, - "learning_rate": 2.714544406126819e-06, - "loss": 0.2119, + "epoch": 0.61, + "grad_norm": 0.2788625095921907, + "learning_rate": 6.870799593678459e-06, + "loss": 0.1623, "step": 13350 }, { - "epoch": 0.77, - "grad_norm": 0.6209963089409438, - "learning_rate": 2.713269812083369e-06, - "loss": 0.3862, + "epoch": 0.61, + "grad_norm": 0.4249080093679798, + "learning_rate": 6.869386433183505e-06, + "loss": 0.2815, "step": 13351 }, { - "epoch": 0.77, - "grad_norm": 1.0539666138783272, - "learning_rate": 2.711995470379071e-06, - "loss": 0.6506, + "epoch": 0.61, + "grad_norm": 0.6252054392448368, + "learning_rate": 6.867973341997315e-06, + "loss": 0.3506, "step": 13352 }, { - "epoch": 0.77, - "grad_norm": 0.33989522147319307, - "learning_rate": 2.7107213810580536e-06, - "loss": 0.1925, + "epoch": 0.61, + "grad_norm": 0.3937552232681003, + "learning_rate": 6.866560320151179e-06, + "loss": 0.2669, "step": 13353 }, { - "epoch": 0.77, - "grad_norm": 0.2547793738056594, - "learning_rate": 2.709447544164444e-06, - "loss": 0.2418, + "epoch": 0.61, + "grad_norm": 0.44337814059992464, + "learning_rate": 6.865147367676378e-06, + "loss": 0.2488, "step": 13354 }, { - "epoch": 0.77, - "grad_norm": 0.3889272161112719, - "learning_rate": 2.708173959742353e-06, - "loss": 0.2009, + "epoch": 0.61, + "grad_norm": 0.5904760318124197, + "learning_rate": 6.863734484604193e-06, + "loss": 0.3884, "step": 13355 }, { - "epoch": 0.77, - "grad_norm": 0.36124339525567295, - "learning_rate": 2.7069006278358844e-06, - "loss": 0.2852, + "epoch": 0.61, + "grad_norm": 0.21147098596756062, + "learning_rate": 6.862321670965899e-06, + "loss": 0.1616, "step": 13356 }, { - "epoch": 0.77, - "grad_norm": 0.866977479010928, - "learning_rate": 2.70562754848913e-06, - "loss": 0.29, + "epoch": 0.61, + "grad_norm": 0.40525540888794503, + "learning_rate": 6.860908926792784e-06, + "loss": 0.2586, "step": 13357 }, { - "epoch": 0.77, - "grad_norm": 0.32296997474230627, - "learning_rate": 2.704354721746183e-06, - "loss": 0.2969, + "epoch": 0.61, + "grad_norm": 0.5671453986436225, + "learning_rate": 6.859496252116119e-06, + "loss": 0.3153, "step": 13358 }, { - "epoch": 0.77, - "grad_norm": 0.34896980548098877, - "learning_rate": 2.703082147651118e-06, - "loss": 0.2693, + "epoch": 0.61, + "grad_norm": 1.147317961326644, + "learning_rate": 6.858083646967181e-06, + "loss": 0.5571, "step": 13359 }, { - "epoch": 0.77, - "grad_norm": 0.2370600806689802, - "learning_rate": 2.7018098262480053e-06, - "loss": 0.103, + "epoch": 0.61, + "grad_norm": 0.3654008986061631, + "learning_rate": 6.85667111137724e-06, + "loss": 0.2108, "step": 13360 }, { - "epoch": 0.77, - "grad_norm": 0.5435084819332936, - "learning_rate": 2.700537757580901e-06, - "loss": 0.3418, + "epoch": 0.61, + "grad_norm": 0.31961388280847464, + "learning_rate": 6.8552586453775735e-06, + "loss": 0.2938, "step": 13361 }, { - "epoch": 0.77, - "grad_norm": 0.31758347380995683, - "learning_rate": 2.699265941693863e-06, - "loss": 0.2566, + "epoch": 0.61, + "grad_norm": 0.26214442367828417, + "learning_rate": 6.853846248999449e-06, + "loss": 0.1676, "step": 13362 }, { - "epoch": 0.77, - "grad_norm": 0.4792746559962163, - "learning_rate": 2.6979943786309315e-06, - "loss": 0.2685, + "epoch": 0.61, + "grad_norm": 0.38559620428908054, + "learning_rate": 6.852433922274138e-06, + "loss": 0.2223, "step": 13363 }, { - "epoch": 0.77, - "grad_norm": 1.2601019771202395, - "learning_rate": 2.6967230684361413e-06, - "loss": 0.7461, + "epoch": 0.61, + "grad_norm": 0.5227940340239714, + "learning_rate": 6.851021665232902e-06, + "loss": 0.3492, "step": 13364 }, { - "epoch": 0.77, - "grad_norm": 0.3264807957462211, - "learning_rate": 2.6954520111535166e-06, - "loss": 0.251, + "epoch": 0.61, + "grad_norm": 0.36161136759086693, + "learning_rate": 6.849609477907015e-06, + "loss": 0.3078, "step": 13365 }, { - "epoch": 0.77, - "grad_norm": 0.23864470808248076, - "learning_rate": 2.694181206827071e-06, - "loss": 0.1862, + "epoch": 0.61, + "grad_norm": 0.3098877668923679, + "learning_rate": 6.848197360327736e-06, + "loss": 0.2054, "step": 13366 }, { - "epoch": 0.77, - "grad_norm": 0.6936911184017519, - "learning_rate": 2.69291065550082e-06, - "loss": 0.3615, + "epoch": 0.61, + "grad_norm": 1.4746593345176537, + "learning_rate": 6.846785312526331e-06, + "loss": 0.8591, "step": 13367 }, { - "epoch": 0.77, - "grad_norm": 0.311294629785046, - "learning_rate": 2.691640357218759e-06, - "loss": 0.2626, + "epoch": 0.61, + "grad_norm": 0.3372660004060218, + "learning_rate": 6.84537333453406e-06, + "loss": 0.2465, "step": 13368 }, { - "epoch": 0.77, - "grad_norm": 1.1184392357841415, - "learning_rate": 2.690370312024878e-06, - "loss": 0.4929, + "epoch": 0.61, + "grad_norm": 0.3141500257553667, + "learning_rate": 6.843961426382179e-06, + "loss": 0.2105, "step": 13369 }, { - "epoch": 0.77, - "grad_norm": 0.32713334536191796, - "learning_rate": 2.6891005199631558e-06, - "loss": 0.2735, + "epoch": 0.61, + "grad_norm": 0.3419022566749706, + "learning_rate": 6.8425495881019565e-06, + "loss": 0.2642, "step": 13370 }, { - "epoch": 0.77, - "grad_norm": 0.3248615650269381, - "learning_rate": 2.6878309810775738e-06, - "loss": 0.2617, + "epoch": 0.61, + "grad_norm": 1.12285067283773, + "learning_rate": 6.841137819724639e-06, + "loss": 0.5649, "step": 13371 }, { - "epoch": 0.77, - "grad_norm": 1.4369216717341033, - "learning_rate": 2.6865616954120878e-06, - "loss": 0.5723, + "epoch": 0.61, + "grad_norm": 0.8687496145373245, + "learning_rate": 6.839726121281488e-06, + "loss": 0.5314, "step": 13372 }, { - "epoch": 0.77, - "grad_norm": 0.5156635728031603, - "learning_rate": 2.6852926630106558e-06, - "loss": 0.2641, + "epoch": 0.61, + "grad_norm": 0.28070199760988074, + "learning_rate": 6.83831449280375e-06, + "loss": 0.2421, "step": 13373 }, { - "epoch": 0.77, - "grad_norm": 0.3373075279153563, - "learning_rate": 2.6840238839172206e-06, - "loss": 0.2585, + "epoch": 0.61, + "grad_norm": 0.6408630261668267, + "learning_rate": 6.836902934322684e-06, + "loss": 0.3844, "step": 13374 }, { - "epoch": 0.77, - "grad_norm": 0.36128908717495617, - "learning_rate": 2.682755358175728e-06, - "loss": 0.1984, + "epoch": 0.61, + "grad_norm": 0.5637484308529603, + "learning_rate": 6.835491445869542e-06, + "loss": 0.3104, "step": 13375 }, { - "epoch": 0.77, - "grad_norm": 0.71767892274232, - "learning_rate": 2.6814870858301013e-06, - "loss": 0.2892, + "epoch": 0.61, + "grad_norm": 0.23709119577451992, + "learning_rate": 6.834080027475567e-06, + "loss": 0.1602, "step": 13376 }, { - "epoch": 0.77, - "grad_norm": 0.36937216939202855, - "learning_rate": 2.6802190669242634e-06, - "loss": 0.2787, + "epoch": 0.61, + "grad_norm": 0.38115026952114867, + "learning_rate": 6.832668679172007e-06, + "loss": 0.2938, "step": 13377 }, { - "epoch": 0.77, - "grad_norm": 0.3642005381982544, - "learning_rate": 2.6789513015021207e-06, - "loss": 0.2875, + "epoch": 0.61, + "grad_norm": 0.7513890058953042, + "learning_rate": 6.831257400990113e-06, + "loss": 0.3919, "step": 13378 }, { - "epoch": 0.77, - "grad_norm": 0.44540040503421996, - "learning_rate": 2.6776837896075824e-06, - "loss": 0.2292, + "epoch": 0.61, + "grad_norm": 0.3914294822413991, + "learning_rate": 6.829846192961125e-06, + "loss": 0.2391, "step": 13379 }, { - "epoch": 0.77, - "grad_norm": 0.33180184811006774, - "learning_rate": 2.6764165312845402e-06, - "loss": 0.2614, + "epoch": 0.61, + "grad_norm": 0.6106318983631007, + "learning_rate": 6.828435055116286e-06, + "loss": 0.3414, "step": 13380 }, { - "epoch": 0.77, - "grad_norm": 0.32777961922851084, - "learning_rate": 2.675149526576879e-06, - "loss": 0.2594, + "epoch": 0.61, + "grad_norm": 0.3975554521889576, + "learning_rate": 6.82702398748684e-06, + "loss": 0.2929, "step": 13381 }, { - "epoch": 0.77, - "grad_norm": 0.4898636512120215, - "learning_rate": 2.67388277552847e-06, - "loss": 0.3877, + "epoch": 0.61, + "grad_norm": 0.2959016361647113, + "learning_rate": 6.82561299010402e-06, + "loss": 0.1854, "step": 13382 }, { - "epoch": 0.77, - "grad_norm": 0.2721793650080163, - "learning_rate": 2.67261627818319e-06, - "loss": 0.197, + "epoch": 0.61, + "grad_norm": 0.4308284805834394, + "learning_rate": 6.8242020629990736e-06, + "loss": 0.2808, "step": 13383 }, { - "epoch": 0.77, - "grad_norm": 0.7355116149856278, - "learning_rate": 2.671350034584893e-06, - "loss": 0.3658, + "epoch": 0.61, + "grad_norm": 0.37166919695496264, + "learning_rate": 6.822791206203232e-06, + "loss": 0.288, "step": 13384 }, { - "epoch": 0.77, - "grad_norm": 0.47438055021689923, - "learning_rate": 2.670084044777429e-06, - "loss": 0.3412, + "epoch": 0.61, + "grad_norm": 0.3383684141607946, + "learning_rate": 6.821380419747729e-06, + "loss": 0.2784, "step": 13385 }, { - "epoch": 0.77, - "grad_norm": 0.2641302345028114, - "learning_rate": 2.668818308804636e-06, - "loss": 0.2146, + "epoch": 0.61, + "grad_norm": 0.734082300088229, + "learning_rate": 6.819969703663798e-06, + "loss": 0.3092, "step": 13386 }, { - "epoch": 0.77, - "grad_norm": 0.25613544160514196, - "learning_rate": 2.6675528267103534e-06, - "loss": 0.1642, + "epoch": 0.61, + "grad_norm": 0.4281720335127633, + "learning_rate": 6.818559057982676e-06, + "loss": 0.277, "step": 13387 }, { - "epoch": 0.77, - "grad_norm": 1.011992324523561, - "learning_rate": 2.6662875985384007e-06, - "loss": 0.6767, + "epoch": 0.62, + "grad_norm": 0.26244132801338854, + "learning_rate": 6.817148482735594e-06, + "loss": 0.1863, "step": 13388 }, { - "epoch": 0.77, - "grad_norm": 0.3127942387817402, - "learning_rate": 2.665022624332593e-06, - "loss": 0.2063, + "epoch": 0.62, + "grad_norm": 0.3365421577728329, + "learning_rate": 6.815737977953773e-06, + "loss": 0.2703, "step": 13389 }, { - "epoch": 0.77, - "grad_norm": 0.3537706267658949, - "learning_rate": 2.6637579041367357e-06, - "loss": 0.2924, + "epoch": 0.62, + "grad_norm": 0.8714532501283128, + "learning_rate": 6.81432754366844e-06, + "loss": 0.5102, "step": 13390 }, { - "epoch": 0.77, - "grad_norm": 0.71475916044793, - "learning_rate": 2.6624934379946243e-06, - "loss": 0.3884, + "epoch": 0.62, + "grad_norm": 0.45886033829610806, + "learning_rate": 6.81291717991083e-06, + "loss": 0.2945, "step": 13391 }, { - "epoch": 0.77, - "grad_norm": 0.21694765840296173, - "learning_rate": 2.661229225950054e-06, - "loss": 0.1554, + "epoch": 0.62, + "grad_norm": 0.4145486435410062, + "learning_rate": 6.8115068867121604e-06, + "loss": 0.276, "step": 13392 }, { - "epoch": 0.77, - "grad_norm": 0.35457329355787714, - "learning_rate": 2.659965268046798e-06, - "loss": 0.2834, + "epoch": 0.62, + "grad_norm": 0.8673398144930178, + "learning_rate": 6.810096664103656e-06, + "loss": 0.4092, "step": 13393 }, { - "epoch": 0.77, - "grad_norm": 0.4947678076948455, - "learning_rate": 2.6587015643286295e-06, - "loss": 0.3978, + "epoch": 0.62, + "grad_norm": 0.2740705382515217, + "learning_rate": 6.8086865121165356e-06, + "loss": 0.2175, "step": 13394 }, { - "epoch": 0.77, - "grad_norm": 0.5838803034932754, - "learning_rate": 2.657438114839308e-06, - "loss": 0.3296, + "epoch": 0.62, + "grad_norm": 0.26165990222225316, + "learning_rate": 6.807276430782022e-06, + "loss": 0.132, "step": 13395 }, { - "epoch": 0.77, - "grad_norm": 0.3963018840650698, - "learning_rate": 2.6561749196225915e-06, - "loss": 0.2608, + "epoch": 0.62, + "grad_norm": 0.41137605991850584, + "learning_rate": 6.80586642013133e-06, + "loss": 0.3042, "step": 13396 }, { - "epoch": 0.77, - "grad_norm": 0.3411749326825493, - "learning_rate": 2.654911978722222e-06, - "loss": 0.2992, + "epoch": 0.62, + "grad_norm": 0.3359857439529303, + "learning_rate": 6.804456480195677e-06, + "loss": 0.3094, "step": 13397 }, { - "epoch": 0.77, - "grad_norm": 0.4121763965343542, - "learning_rate": 2.6536492921819346e-06, - "loss": 0.2847, + "epoch": 0.62, + "grad_norm": 0.6389024554221187, + "learning_rate": 6.803046611006278e-06, + "loss": 0.3817, "step": 13398 }, { - "epoch": 0.77, - "grad_norm": 0.23315859992533955, - "learning_rate": 2.6523868600454526e-06, - "loss": 0.143, + "epoch": 0.62, + "grad_norm": 0.47320648134687343, + "learning_rate": 6.801636812594343e-06, + "loss": 0.1514, "step": 13399 }, { - "epoch": 0.77, - "grad_norm": 1.2290226217364666, - "learning_rate": 2.6511246823565016e-06, - "loss": 0.6549, + "epoch": 0.62, + "grad_norm": 0.2966466033327947, + "learning_rate": 6.800227084991089e-06, + "loss": 0.229, "step": 13400 }, { - "epoch": 0.77, - "grad_norm": 0.48684221439801073, - "learning_rate": 2.649862759158787e-06, - "loss": 0.2985, + "epoch": 0.62, + "grad_norm": 0.2850719524246659, + "learning_rate": 6.798817428227724e-06, + "loss": 0.2413, "step": 13401 }, { - "epoch": 0.77, - "grad_norm": 0.27189307366780724, - "learning_rate": 2.648601090496008e-06, - "loss": 0.2455, + "epoch": 0.62, + "grad_norm": 0.47959802059900786, + "learning_rate": 6.797407842335455e-06, + "loss": 0.2269, "step": 13402 }, { - "epoch": 0.77, - "grad_norm": 1.2851418916503219, - "learning_rate": 2.6473396764118575e-06, - "loss": 0.5728, + "epoch": 0.62, + "grad_norm": 0.39117437332585386, + "learning_rate": 6.7959983273454855e-06, + "loss": 0.3037, "step": 13403 }, { - "epoch": 0.77, - "grad_norm": 0.3704241967181858, - "learning_rate": 2.646078516950018e-06, - "loss": 0.2355, + "epoch": 0.62, + "grad_norm": 0.47785960180717135, + "learning_rate": 6.79458888328903e-06, + "loss": 0.3332, "step": 13404 }, { - "epoch": 0.77, - "grad_norm": 0.32679798644361935, - "learning_rate": 2.6448176121541634e-06, - "loss": 0.2539, + "epoch": 0.62, + "grad_norm": 0.35853028910776097, + "learning_rate": 6.793179510197287e-06, + "loss": 0.2115, "step": 13405 }, { - "epoch": 0.77, - "grad_norm": 0.42009343086991197, - "learning_rate": 2.643556962067958e-06, - "loss": 0.2625, + "epoch": 0.62, + "grad_norm": 0.42455106243715884, + "learning_rate": 6.791770208101458e-06, + "loss": 0.3109, "step": 13406 }, { - "epoch": 0.77, - "grad_norm": 0.3364463651284445, - "learning_rate": 2.6422965667350566e-06, - "loss": 0.2599, + "epoch": 0.62, + "grad_norm": 0.3180175148375077, + "learning_rate": 6.7903609770327425e-06, + "loss": 0.2184, "step": 13407 }, { - "epoch": 0.77, - "grad_norm": 0.4603678526160474, - "learning_rate": 2.6410364261991108e-06, - "loss": 0.2815, + "epoch": 0.62, + "grad_norm": 0.3288491965942752, + "learning_rate": 6.7889518170223445e-06, + "loss": 0.2468, "step": 13408 }, { - "epoch": 0.77, - "grad_norm": 0.34026034801807636, - "learning_rate": 2.6397765405037577e-06, - "loss": 0.2485, + "epoch": 0.62, + "grad_norm": 0.40399461277998727, + "learning_rate": 6.787542728101457e-06, + "loss": 0.2926, "step": 13409 }, { - "epoch": 0.77, - "grad_norm": 0.32456868640721753, - "learning_rate": 2.6385169096926265e-06, - "loss": 0.2346, + "epoch": 0.62, + "grad_norm": 0.8302834851534248, + "learning_rate": 6.786133710301279e-06, + "loss": 0.5066, "step": 13410 }, { - "epoch": 0.77, - "grad_norm": 0.49966563733385355, - "learning_rate": 2.637257533809334e-06, - "loss": 0.2526, + "epoch": 0.62, + "grad_norm": 1.7014620763672794, + "learning_rate": 6.784724763653002e-06, + "loss": 0.7792, "step": 13411 }, { - "epoch": 0.77, - "grad_norm": 0.6283074877544407, - "learning_rate": 2.6359984128975013e-06, - "loss": 0.33, + "epoch": 0.62, + "grad_norm": 0.2771498120281875, + "learning_rate": 6.783315888187818e-06, + "loss": 0.2093, "step": 13412 }, { - "epoch": 0.77, - "grad_norm": 0.36892761734499885, - "learning_rate": 2.6347395470007254e-06, - "loss": 0.2784, + "epoch": 0.62, + "grad_norm": 0.3006268015841587, + "learning_rate": 6.781907083936922e-06, + "loss": 0.2457, "step": 13413 }, { - "epoch": 0.77, - "grad_norm": 0.27568567981994324, - "learning_rate": 2.6334809361626034e-06, - "loss": 0.2485, + "epoch": 0.62, + "grad_norm": 0.6443147491811781, + "learning_rate": 6.7804983509315015e-06, + "loss": 0.4055, "step": 13414 }, { - "epoch": 0.77, - "grad_norm": 1.3477396319559145, - "learning_rate": 2.632222580426719e-06, - "loss": 0.2197, + "epoch": 0.62, + "grad_norm": 0.3327888673285415, + "learning_rate": 6.779089689202744e-06, + "loss": 0.2226, "step": 13415 }, { - "epoch": 0.77, - "grad_norm": 0.5419492347153053, - "learning_rate": 2.6309644798366474e-06, - "loss": 0.2762, + "epoch": 0.62, + "grad_norm": 0.3425138994864374, + "learning_rate": 6.777681098781833e-06, + "loss": 0.2399, "step": 13416 }, { - "epoch": 0.77, - "grad_norm": 0.3174658239757126, - "learning_rate": 2.6297066344359612e-06, - "loss": 0.2882, + "epoch": 0.62, + "grad_norm": 0.46525286247544295, + "learning_rate": 6.776272579699963e-06, + "loss": 0.3238, "step": 13417 }, { - "epoch": 0.77, - "grad_norm": 0.47016789004810144, - "learning_rate": 2.6284490442682186e-06, - "loss": 0.3389, + "epoch": 0.62, + "grad_norm": 0.37205906208094514, + "learning_rate": 6.774864131988309e-06, + "loss": 0.2238, "step": 13418 }, { - "epoch": 0.77, - "grad_norm": 0.3946797301594869, - "learning_rate": 2.6271917093769673e-06, - "loss": 0.2183, + "epoch": 0.62, + "grad_norm": 0.43678725838365934, + "learning_rate": 6.773455755678054e-06, + "loss": 0.3014, "step": 13419 }, { - "epoch": 0.77, - "grad_norm": 0.23602412255899952, - "learning_rate": 2.6259346298057476e-06, - "loss": 0.1842, + "epoch": 0.62, + "grad_norm": 0.369635075772253, + "learning_rate": 6.7720474508003755e-06, + "loss": 0.3283, "step": 13420 }, { - "epoch": 0.77, - "grad_norm": 0.34069892961807774, - "learning_rate": 2.6246778055980983e-06, - "loss": 0.2678, + "epoch": 0.62, + "grad_norm": 0.3662025150537096, + "learning_rate": 6.770639217386459e-06, + "loss": 0.2277, "step": 13421 }, { - "epoch": 0.77, - "grad_norm": 0.5221130511938119, - "learning_rate": 2.6234212367975375e-06, - "loss": 0.1991, + "epoch": 0.62, + "grad_norm": 0.5127706818605707, + "learning_rate": 6.7692310554674775e-06, + "loss": 0.3421, "step": 13422 }, { - "epoch": 0.77, - "grad_norm": 0.6077899342263603, - "learning_rate": 2.6221649234475845e-06, - "loss": 0.4096, + "epoch": 0.62, + "grad_norm": 0.7866663167104675, + "learning_rate": 6.767822965074607e-06, + "loss": 0.4178, "step": 13423 }, { - "epoch": 0.77, - "grad_norm": 0.6059573421941735, - "learning_rate": 2.620908865591738e-06, - "loss": 0.3912, + "epoch": 0.62, + "grad_norm": 0.3824868749707829, + "learning_rate": 6.766414946239018e-06, + "loss": 0.2878, "step": 13424 }, { - "epoch": 0.77, - "grad_norm": 0.25779870510643993, - "learning_rate": 2.619653063273504e-06, - "loss": 0.2187, + "epoch": 0.62, + "grad_norm": 0.31514165238858877, + "learning_rate": 6.765006998991889e-06, + "loss": 0.271, "step": 13425 }, { - "epoch": 0.77, - "grad_norm": 0.3211535151394236, - "learning_rate": 2.618397516536367e-06, - "loss": 0.2455, + "epoch": 0.62, + "grad_norm": 0.8583009413402867, + "learning_rate": 6.763599123364386e-06, + "loss": 0.4209, "step": 13426 }, { - "epoch": 0.77, - "grad_norm": 0.42875680879462036, - "learning_rate": 2.6171422254238067e-06, - "loss": 0.2418, + "epoch": 0.62, + "grad_norm": 0.33961009311629115, + "learning_rate": 6.762191319387678e-06, + "loss": 0.2637, "step": 13427 }, { - "epoch": 0.77, - "grad_norm": 0.4063859681100399, - "learning_rate": 2.6158871899792927e-06, - "loss": 0.1835, + "epoch": 0.62, + "grad_norm": 0.22781086565203268, + "learning_rate": 6.760783587092936e-06, + "loss": 0.1724, "step": 13428 }, { - "epoch": 0.77, - "grad_norm": 0.3628029769796745, - "learning_rate": 2.6146324102462862e-06, - "loss": 0.2992, + "epoch": 0.62, + "grad_norm": 1.0185572312100901, + "learning_rate": 6.75937592651132e-06, + "loss": 0.5024, "step": 13429 }, { - "epoch": 0.77, - "grad_norm": 0.5550301775668914, - "learning_rate": 2.6133778862682433e-06, - "loss": 0.3215, + "epoch": 0.62, + "grad_norm": 0.36219398999337843, + "learning_rate": 6.757968337674001e-06, + "loss": 0.2596, "step": 13430 }, { - "epoch": 0.77, - "grad_norm": 0.7979129683971643, - "learning_rate": 2.612123618088608e-06, - "loss": 0.4975, + "epoch": 0.62, + "grad_norm": 0.7307791786558051, + "learning_rate": 6.756560820612137e-06, + "loss": 0.3242, "step": 13431 }, { - "epoch": 0.77, - "grad_norm": 0.1949401131221945, - "learning_rate": 2.610869605750813e-06, - "loss": 0.138, + "epoch": 0.62, + "grad_norm": 0.3674203393977306, + "learning_rate": 6.755153375356891e-06, + "loss": 0.3202, "step": 13432 }, { - "epoch": 0.77, - "grad_norm": 0.35522383806764823, - "learning_rate": 2.6096158492982837e-06, - "loss": 0.2854, + "epoch": 0.62, + "grad_norm": 0.3357170514431487, + "learning_rate": 6.753746001939416e-06, + "loss": 0.2773, "step": 13433 }, { - "epoch": 0.77, - "grad_norm": 0.8215836187570505, - "learning_rate": 2.6083623487744423e-06, - "loss": 0.4089, + "epoch": 0.62, + "grad_norm": 0.49724624865556893, + "learning_rate": 6.752338700390881e-06, + "loss": 0.2422, "step": 13434 }, { - "epoch": 0.77, - "grad_norm": 0.32566860474390014, - "learning_rate": 2.6071091042226947e-06, - "loss": 0.2306, + "epoch": 0.62, + "grad_norm": 0.35246492626908804, + "learning_rate": 6.75093147074244e-06, + "loss": 0.1768, "step": 13435 }, { - "epoch": 0.77, - "grad_norm": 0.7199078363127668, - "learning_rate": 2.6058561156864415e-06, - "loss": 0.4138, + "epoch": 0.62, + "grad_norm": 0.28356269635467823, + "learning_rate": 6.7495243130252415e-06, + "loss": 0.2313, "step": 13436 }, { - "epoch": 0.77, - "grad_norm": 0.3209376148836106, - "learning_rate": 2.60460338320907e-06, - "loss": 0.3103, + "epoch": 0.62, + "grad_norm": 0.8458566086516052, + "learning_rate": 6.748117227270439e-06, + "loss": 0.3898, "step": 13437 }, { - "epoch": 0.77, - "grad_norm": 0.25042851717939346, - "learning_rate": 2.603350906833971e-06, - "loss": 0.1709, + "epoch": 0.62, + "grad_norm": 0.5621487706187629, + "learning_rate": 6.74671021350919e-06, + "loss": 0.3047, "step": 13438 }, { - "epoch": 0.77, - "grad_norm": 0.482862170243484, - "learning_rate": 2.6020986866045085e-06, - "loss": 0.2197, + "epoch": 0.62, + "grad_norm": 0.38914373729341334, + "learning_rate": 6.7453032717726416e-06, + "loss": 0.2771, "step": 13439 }, { - "epoch": 0.77, - "grad_norm": 0.5706303570276031, - "learning_rate": 2.600846722564051e-06, - "loss": 0.2956, + "epoch": 0.62, + "grad_norm": 0.2884000429154594, + "learning_rate": 6.743896402091941e-06, + "loss": 0.2421, "step": 13440 }, { - "epoch": 0.77, - "grad_norm": 0.2703381171096475, - "learning_rate": 2.59959501475595e-06, - "loss": 0.233, + "epoch": 0.62, + "grad_norm": 0.2960152004770081, + "learning_rate": 6.742489604498237e-06, + "loss": 0.0927, "step": 13441 }, { - "epoch": 0.77, - "grad_norm": 0.8217067922733584, - "learning_rate": 2.5983435632235586e-06, - "loss": 0.4331, + "epoch": 0.62, + "grad_norm": 0.4255825737909948, + "learning_rate": 6.741082879022671e-06, + "loss": 0.2808, "step": 13442 }, { - "epoch": 0.77, - "grad_norm": 0.4817791637134085, - "learning_rate": 2.597092368010212e-06, - "loss": 0.2811, + "epoch": 0.62, + "grad_norm": 1.0015148640382894, + "learning_rate": 6.739676225696392e-06, + "loss": 0.409, "step": 13443 }, { - "epoch": 0.77, - "grad_norm": 0.42074169351000873, - "learning_rate": 2.5958414291592384e-06, - "loss": 0.3039, + "epoch": 0.62, + "grad_norm": 0.36968843538834517, + "learning_rate": 6.738269644550538e-06, + "loss": 0.2715, "step": 13444 }, { - "epoch": 0.77, - "grad_norm": 0.22653893104331904, - "learning_rate": 2.594590746713953e-06, - "loss": 0.1724, + "epoch": 0.62, + "grad_norm": 0.35961199497053314, + "learning_rate": 6.736863135616251e-06, + "loss": 0.2532, "step": 13445 }, { - "epoch": 0.77, - "grad_norm": 0.5405885473781558, - "learning_rate": 2.5933403207176766e-06, - "loss": 0.2775, + "epoch": 0.62, + "grad_norm": 0.31734163711113594, + "learning_rate": 6.7354566989246665e-06, + "loss": 0.1709, "step": 13446 }, { - "epoch": 0.77, - "grad_norm": 0.3693178605084256, - "learning_rate": 2.5920901512137052e-06, - "loss": 0.296, + "epoch": 0.62, + "grad_norm": 1.1538689303295533, + "learning_rate": 6.7340503345069264e-06, + "loss": 0.4795, "step": 13447 }, { - "epoch": 0.77, - "grad_norm": 0.5107901097900251, - "learning_rate": 2.5908402382453337e-06, - "loss": 0.291, + "epoch": 0.62, + "grad_norm": 0.2527398877963983, + "learning_rate": 6.732644042394164e-06, + "loss": 0.2161, "step": 13448 }, { - "epoch": 0.77, - "grad_norm": 0.39069716983926167, - "learning_rate": 2.589590581855843e-06, - "loss": 0.2842, + "epoch": 0.62, + "grad_norm": 1.0743780660800497, + "learning_rate": 6.7312378226175135e-06, + "loss": 0.3704, "step": 13449 }, { - "epoch": 0.77, - "grad_norm": 0.3709867346737115, - "learning_rate": 2.588341182088514e-06, - "loss": 0.2768, + "epoch": 0.62, + "grad_norm": 1.3649647864154875, + "learning_rate": 6.729831675208103e-06, + "loss": 0.7868, "step": 13450 }, { - "epoch": 0.77, - "grad_norm": 0.30386942428806196, - "learning_rate": 2.587092038986613e-06, - "loss": 0.1273, + "epoch": 0.62, + "grad_norm": 0.32281818024577347, + "learning_rate": 6.728425600197072e-06, + "loss": 0.2078, "step": 13451 }, { - "epoch": 0.77, - "grad_norm": 0.5989646935983985, - "learning_rate": 2.5858431525933946e-06, - "loss": 0.3982, + "epoch": 0.62, + "grad_norm": 0.3993358600248972, + "learning_rate": 6.727019597615545e-06, + "loss": 0.2834, "step": 13452 }, { - "epoch": 0.77, - "grad_norm": 0.24380382171509266, - "learning_rate": 2.5845945229521095e-06, - "loss": 0.2705, + "epoch": 0.62, + "grad_norm": 0.34466100941584016, + "learning_rate": 6.7256136674946505e-06, + "loss": 0.2088, "step": 13453 }, { - "epoch": 0.77, - "grad_norm": 0.7642863991718087, - "learning_rate": 2.5833461501059933e-06, - "loss": 0.3247, + "epoch": 0.62, + "grad_norm": 0.4061849971330678, + "learning_rate": 6.724207809865508e-06, + "loss": 0.2272, "step": 13454 }, { - "epoch": 0.77, - "grad_norm": 0.7580810104910045, - "learning_rate": 2.5820980340982847e-06, - "loss": 0.4048, + "epoch": 0.62, + "grad_norm": 1.1239183653135179, + "learning_rate": 6.7228020247592515e-06, + "loss": 0.5097, "step": 13455 }, { - "epoch": 0.77, - "grad_norm": 0.30818453796959994, - "learning_rate": 2.5808501749722024e-06, - "loss": 0.2299, + "epoch": 0.62, + "grad_norm": 0.4229416473725185, + "learning_rate": 6.721396312207e-06, + "loss": 0.3597, "step": 13456 }, { - "epoch": 0.77, - "grad_norm": 0.27604039325913704, - "learning_rate": 2.5796025727709595e-06, - "loss": 0.2518, + "epoch": 0.62, + "grad_norm": 0.36592049760286427, + "learning_rate": 6.719990672239873e-06, + "loss": 0.1855, "step": 13457 }, { - "epoch": 0.77, - "grad_norm": 0.8081791607239174, - "learning_rate": 2.5783552275377567e-06, - "loss": 0.2606, + "epoch": 0.62, + "grad_norm": 0.3878084654086433, + "learning_rate": 6.718585104888993e-06, + "loss": 0.267, "step": 13458 }, { - "epoch": 0.77, - "grad_norm": 0.374970126576345, - "learning_rate": 2.577108139315797e-06, - "loss": 0.2767, + "epoch": 0.62, + "grad_norm": 0.3783029285320806, + "learning_rate": 6.717179610185473e-06, + "loss": 0.272, "step": 13459 }, { - "epoch": 0.77, - "grad_norm": 1.1719009812907486, - "learning_rate": 2.575861308148263e-06, - "loss": 0.748, + "epoch": 0.62, + "grad_norm": 0.41421337091575466, + "learning_rate": 6.715774188160434e-06, + "loss": 0.2792, "step": 13460 }, { - "epoch": 0.77, - "grad_norm": 0.28830064194617916, - "learning_rate": 2.574614734078332e-06, - "loss": 0.2245, + "epoch": 0.62, + "grad_norm": 0.38201081103613405, + "learning_rate": 6.71436883884499e-06, + "loss": 0.2611, "step": 13461 }, { - "epoch": 0.77, - "grad_norm": 0.40624340668557385, - "learning_rate": 2.5733684171491713e-06, - "loss": 0.3191, + "epoch": 0.62, + "grad_norm": 1.3217153086747997, + "learning_rate": 6.712963562270252e-06, + "loss": 0.8192, "step": 13462 }, { - "epoch": 0.77, - "grad_norm": 0.31625190451738877, - "learning_rate": 2.5721223574039466e-06, - "loss": 0.1969, + "epoch": 0.62, + "grad_norm": 0.4258920785975871, + "learning_rate": 6.7115583584673294e-06, + "loss": 0.2502, "step": 13463 }, { - "epoch": 0.77, - "grad_norm": 0.42735286500023417, - "learning_rate": 2.570876554885804e-06, - "loss": 0.2341, + "epoch": 0.62, + "grad_norm": 0.3106395061290788, + "learning_rate": 6.710153227467342e-06, + "loss": 0.2289, "step": 13464 }, { - "epoch": 0.77, - "grad_norm": 0.3077642913701503, - "learning_rate": 2.5696310096378875e-06, - "loss": 0.2794, + "epoch": 0.62, + "grad_norm": 0.5877860328354798, + "learning_rate": 6.708748169301389e-06, + "loss": 0.2921, "step": 13465 }, { - "epoch": 0.77, - "grad_norm": 1.1827315444704343, - "learning_rate": 2.568385721703329e-06, - "loss": 0.7581, + "epoch": 0.62, + "grad_norm": 0.4510729527949029, + "learning_rate": 6.707343184000579e-06, + "loss": 0.3165, "step": 13466 }, { - "epoch": 0.77, - "grad_norm": 1.4823498393056806, - "learning_rate": 2.5671406911252506e-06, - "loss": 0.2698, + "epoch": 0.62, + "grad_norm": 0.3079313779018472, + "learning_rate": 6.705938271596013e-06, + "loss": 0.1639, "step": 13467 }, { - "epoch": 0.77, - "grad_norm": 0.3486648926153057, - "learning_rate": 2.5658959179467734e-06, - "loss": 0.2548, + "epoch": 0.62, + "grad_norm": 0.3793296968298386, + "learning_rate": 6.704533432118804e-06, + "loss": 0.2901, "step": 13468 }, { - "epoch": 0.77, - "grad_norm": 0.3394923711719963, - "learning_rate": 2.5646514022110013e-06, - "loss": 0.2921, + "epoch": 0.62, + "grad_norm": 0.3941715291265191, + "learning_rate": 6.703128665600047e-06, + "loss": 0.2511, "step": 13469 }, { - "epoch": 0.77, - "grad_norm": 0.4458713311099046, - "learning_rate": 2.563407143961032e-06, - "loss": 0.2497, + "epoch": 0.62, + "grad_norm": 1.2781675938984973, + "learning_rate": 6.701723972070845e-06, + "loss": 0.307, "step": 13470 }, { - "epoch": 0.77, - "grad_norm": 0.23033084641571222, - "learning_rate": 2.5621631432399496e-06, - "loss": 0.1549, + "epoch": 0.62, + "grad_norm": 0.5007784284424243, + "learning_rate": 6.700319351562295e-06, + "loss": 0.3301, "step": 13471 }, { - "epoch": 0.77, - "grad_norm": 0.470600873287133, - "learning_rate": 2.5609194000908434e-06, - "loss": 0.3824, + "epoch": 0.62, + "grad_norm": 0.2962199301239696, + "learning_rate": 6.698914804105492e-06, + "loss": 0.2725, "step": 13472 }, { - "epoch": 0.77, - "grad_norm": 0.5083980754662374, - "learning_rate": 2.5596759145567763e-06, - "loss": 0.3503, + "epoch": 0.62, + "grad_norm": 0.2782068150804055, + "learning_rate": 6.697510329731536e-06, + "loss": 0.157, "step": 13473 }, { - "epoch": 0.77, - "grad_norm": 0.3457918738633052, - "learning_rate": 2.5584326866808084e-06, - "loss": 0.1994, + "epoch": 0.62, + "grad_norm": 0.7440124466178986, + "learning_rate": 6.696105928471519e-06, + "loss": 0.5213, "step": 13474 }, { - "epoch": 0.77, - "grad_norm": 0.8561374677835701, - "learning_rate": 2.557189716506e-06, - "loss": 0.3989, + "epoch": 0.62, + "grad_norm": 0.7127907512326165, + "learning_rate": 6.694701600356531e-06, + "loss": 0.393, "step": 13475 }, { - "epoch": 0.77, - "grad_norm": 0.3040416433764094, - "learning_rate": 2.555947004075392e-06, - "loss": 0.2597, + "epoch": 0.62, + "grad_norm": 0.3677233555560248, + "learning_rate": 6.693297345417662e-06, + "loss": 0.2758, "step": 13476 }, { - "epoch": 0.77, - "grad_norm": 0.22530902436018296, - "learning_rate": 2.5547045494320187e-06, - "loss": 0.1685, + "epoch": 0.62, + "grad_norm": 0.7985519142259734, + "learning_rate": 6.691893163686005e-06, + "loss": 0.3188, "step": 13477 }, { - "epoch": 0.77, - "grad_norm": 1.4681940576762507, - "learning_rate": 2.5534623526189075e-06, - "loss": 0.7422, + "epoch": 0.62, + "grad_norm": 0.3907876334295463, + "learning_rate": 6.690489055192646e-06, + "loss": 0.1396, "step": 13478 }, { - "epoch": 0.77, - "grad_norm": 0.8041062456322301, - "learning_rate": 2.5522204136790707e-06, - "loss": 0.4247, + "epoch": 0.62, + "grad_norm": 0.345863167914341, + "learning_rate": 6.689085019968669e-06, + "loss": 0.2438, "step": 13479 }, { - "epoch": 0.77, - "grad_norm": 0.3134178270242085, - "learning_rate": 2.5509787326555245e-06, - "loss": 0.2055, + "epoch": 0.62, + "grad_norm": 0.3783344118322687, + "learning_rate": 6.6876810580451545e-06, + "loss": 0.2657, "step": 13480 }, { - "epoch": 0.77, - "grad_norm": 0.37965284021285, - "learning_rate": 2.5497373095912638e-06, - "loss": 0.3178, + "epoch": 0.62, + "grad_norm": 0.4343064505170676, + "learning_rate": 6.686277169453193e-06, + "loss": 0.2777, "step": 13481 }, { - "epoch": 0.77, - "grad_norm": 0.3664967026991713, - "learning_rate": 2.5484961445292798e-06, - "loss": 0.2107, + "epoch": 0.62, + "grad_norm": 0.666444687901114, + "learning_rate": 6.684873354223862e-06, + "loss": 0.3352, "step": 13482 }, { - "epoch": 0.77, - "grad_norm": 0.348010313926068, - "learning_rate": 2.5472552375125514e-06, - "loss": 0.2591, + "epoch": 0.62, + "grad_norm": 0.4838406563508897, + "learning_rate": 6.683469612388239e-06, + "loss": 0.2534, "step": 13483 }, { - "epoch": 0.77, - "grad_norm": 0.27596022773032486, - "learning_rate": 2.546014588584057e-06, - "loss": 0.2083, + "epoch": 0.62, + "grad_norm": 0.3194564123018437, + "learning_rate": 6.682065943977399e-06, + "loss": 0.2639, "step": 13484 }, { - "epoch": 0.77, - "grad_norm": 0.5443468371920548, - "learning_rate": 2.5447741977867556e-06, - "loss": 0.3054, + "epoch": 0.62, + "grad_norm": 0.2676109107466966, + "learning_rate": 6.6806623490224234e-06, + "loss": 0.19, "step": 13485 }, { - "epoch": 0.77, - "grad_norm": 0.32076582873013293, - "learning_rate": 2.543534065163604e-06, - "loss": 0.2684, + "epoch": 0.62, + "grad_norm": 1.2457416357936884, + "learning_rate": 6.679258827554384e-06, + "loss": 0.7245, "step": 13486 }, { - "epoch": 0.77, - "grad_norm": 0.7357780117610295, - "learning_rate": 2.542294190757544e-06, - "loss": 0.3032, + "epoch": 0.62, + "grad_norm": 0.36899346696751806, + "learning_rate": 6.677855379604355e-06, + "loss": 0.2063, "step": 13487 }, { - "epoch": 0.77, - "grad_norm": 0.33348082391285816, - "learning_rate": 2.541054574611518e-06, - "loss": 0.2992, + "epoch": 0.62, + "grad_norm": 0.343492131064617, + "learning_rate": 6.6764520052034054e-06, + "loss": 0.2834, "step": 13488 }, { - "epoch": 0.78, - "grad_norm": 0.24713867181114496, - "learning_rate": 2.539815216768452e-06, - "loss": 0.2075, + "epoch": 0.62, + "grad_norm": 0.8327053881189924, + "learning_rate": 6.675048704382603e-06, + "loss": 0.3967, "step": 13489 }, { - "epoch": 0.78, - "grad_norm": 0.2824735073527296, - "learning_rate": 2.5385761172712642e-06, - "loss": 0.1629, + "epoch": 0.62, + "grad_norm": 0.32990200827519667, + "learning_rate": 6.67364547717302e-06, + "loss": 0.2066, "step": 13490 }, { - "epoch": 0.78, - "grad_norm": 0.7765884467550676, - "learning_rate": 2.537337276162861e-06, - "loss": 0.4139, + "epoch": 0.62, + "grad_norm": 0.28119829940692465, + "learning_rate": 6.67224232360572e-06, + "loss": 0.1727, "step": 13491 }, { - "epoch": 0.78, - "grad_norm": 0.31663661173967594, - "learning_rate": 2.5360986934861507e-06, - "loss": 0.2695, + "epoch": 0.62, + "grad_norm": 0.366735138486504, + "learning_rate": 6.670839243711768e-06, + "loss": 0.3136, "step": 13492 }, { - "epoch": 0.78, - "grad_norm": 0.3654158248056001, - "learning_rate": 2.5348603692840214e-06, - "loss": 0.3131, + "epoch": 0.62, + "grad_norm": 0.44540652266741043, + "learning_rate": 6.669436237522223e-06, + "loss": 0.1861, "step": 13493 }, { - "epoch": 0.78, - "grad_norm": 0.5894688790078348, - "learning_rate": 2.5336223035993566e-06, - "loss": 0.1376, + "epoch": 0.62, + "grad_norm": 0.45641235260931867, + "learning_rate": 6.6680333050681535e-06, + "loss": 0.3255, "step": 13494 }, { - "epoch": 0.78, - "grad_norm": 0.3125708863235182, - "learning_rate": 2.53238449647503e-06, - "loss": 0.2578, + "epoch": 0.62, + "grad_norm": 0.36160167808414756, + "learning_rate": 6.666630446380614e-06, + "loss": 0.2789, "step": 13495 }, { - "epoch": 0.78, - "grad_norm": 1.1793852459395195, - "learning_rate": 2.5311469479539043e-06, - "loss": 0.7588, + "epoch": 0.62, + "grad_norm": 0.42847561708183446, + "learning_rate": 6.665227661490664e-06, + "loss": 0.1141, "step": 13496 }, { - "epoch": 0.78, - "grad_norm": 0.2854328646753722, - "learning_rate": 2.5299096580788416e-06, - "loss": 0.2246, + "epoch": 0.62, + "grad_norm": 0.34679169113342967, + "learning_rate": 6.6638249504293565e-06, + "loss": 0.2504, "step": 13497 }, { - "epoch": 0.78, - "grad_norm": 0.411565998534182, - "learning_rate": 2.5286726268926864e-06, - "loss": 0.299, + "epoch": 0.62, + "grad_norm": 0.41407911669513575, + "learning_rate": 6.662422313227751e-06, + "loss": 0.2759, "step": 13498 }, { - "epoch": 0.78, - "grad_norm": 0.47853799166222705, - "learning_rate": 2.5274358544382773e-06, - "loss": 0.3148, + "epoch": 0.62, + "grad_norm": 0.4150506483783012, + "learning_rate": 6.661019749916899e-06, + "loss": 0.2941, "step": 13499 }, { - "epoch": 0.78, - "grad_norm": 0.32126409083539537, - "learning_rate": 2.5261993407584394e-06, - "loss": 0.2218, + "epoch": 0.62, + "grad_norm": 0.2773672562255504, + "learning_rate": 6.659617260527855e-06, + "loss": 0.2424, "step": 13500 }, { - "epoch": 0.78, - "grad_norm": 0.35565643253633095, - "learning_rate": 2.5249630858960006e-06, - "loss": 0.2473, + "epoch": 0.62, + "grad_norm": 1.2838819425827355, + "learning_rate": 6.658214845091664e-06, + "loss": 0.6325, "step": 13501 }, { - "epoch": 0.78, - "grad_norm": 1.0739827262463106, - "learning_rate": 2.5237270898937684e-06, - "loss": 0.7007, + "epoch": 0.62, + "grad_norm": 0.5909897065863796, + "learning_rate": 6.65681250363937e-06, + "loss": 0.3172, "step": 13502 }, { - "epoch": 0.78, - "grad_norm": 0.4259404404044619, - "learning_rate": 2.522491352794545e-06, - "loss": 0.2166, + "epoch": 0.62, + "grad_norm": 0.2537293165029303, + "learning_rate": 6.655410236202029e-06, + "loss": 0.2182, "step": 13503 }, { - "epoch": 0.78, - "grad_norm": 0.32529796843916775, - "learning_rate": 2.521255874641122e-06, - "loss": 0.2448, + "epoch": 0.62, + "grad_norm": 1.2197102302697673, + "learning_rate": 6.654008042810682e-06, + "loss": 0.802, "step": 13504 }, { - "epoch": 0.78, - "grad_norm": 0.24437696676073684, - "learning_rate": 2.5200206554762897e-06, - "loss": 0.2097, + "epoch": 0.62, + "grad_norm": 0.5073364028729574, + "learning_rate": 6.652605923496372e-06, + "loss": 0.326, "step": 13505 }, { - "epoch": 0.78, - "grad_norm": 1.2143331452246588, - "learning_rate": 2.5187856953428237e-06, - "loss": 0.4804, + "epoch": 0.62, + "grad_norm": 0.28322402275180153, + "learning_rate": 6.651203878290139e-06, + "loss": 0.1886, "step": 13506 }, { - "epoch": 0.78, - "grad_norm": 0.31524982654847683, - "learning_rate": 2.5175509942834843e-06, - "loss": 0.2062, + "epoch": 0.62, + "grad_norm": 0.39249863478245, + "learning_rate": 6.649801907223026e-06, + "loss": 0.3182, "step": 13507 }, { - "epoch": 0.78, - "grad_norm": 0.3559843324635449, - "learning_rate": 2.516316552341028e-06, - "loss": 0.3224, + "epoch": 0.62, + "grad_norm": 0.6417425941291718, + "learning_rate": 6.648400010326071e-06, + "loss": 0.3536, "step": 13508 }, { - "epoch": 0.78, - "grad_norm": 0.5991992837613603, - "learning_rate": 2.515082369558212e-06, - "loss": 0.3749, + "epoch": 0.62, + "grad_norm": 0.2937066946165478, + "learning_rate": 6.646998187630308e-06, + "loss": 0.1727, "step": 13509 }, { - "epoch": 0.78, - "grad_norm": 0.24645117400291955, - "learning_rate": 2.513848445977771e-06, - "loss": 0.182, + "epoch": 0.62, + "grad_norm": 0.7515154896469476, + "learning_rate": 6.645596439166771e-06, + "loss": 0.4306, "step": 13510 }, { - "epoch": 0.78, - "grad_norm": 0.48245437149625137, - "learning_rate": 2.5126147816424364e-06, - "loss": 0.2498, + "epoch": 0.62, + "grad_norm": 0.3881477218260446, + "learning_rate": 6.644194764966499e-06, + "loss": 0.2987, "step": 13511 }, { - "epoch": 0.78, - "grad_norm": 0.3648211240975149, - "learning_rate": 2.5113813765949267e-06, - "loss": 0.3079, + "epoch": 0.62, + "grad_norm": 0.34565792456014566, + "learning_rate": 6.642793165060521e-06, + "loss": 0.2906, "step": 13512 }, { - "epoch": 0.78, - "grad_norm": 0.28731932512581504, - "learning_rate": 2.5101482308779625e-06, - "loss": 0.1939, + "epoch": 0.62, + "grad_norm": 0.39395375236212493, + "learning_rate": 6.6413916394798665e-06, + "loss": 0.1328, "step": 13513 }, { - "epoch": 0.78, - "grad_norm": 0.9704475832943743, - "learning_rate": 2.508915344534242e-06, - "loss": 0.4582, + "epoch": 0.62, + "grad_norm": 0.8183444754834605, + "learning_rate": 6.639990188255559e-06, + "loss": 0.3757, "step": 13514 }, { - "epoch": 0.78, - "grad_norm": 0.6266296632397085, - "learning_rate": 2.50768271760646e-06, - "loss": 0.3931, + "epoch": 0.62, + "grad_norm": 0.3037058313122601, + "learning_rate": 6.638588811418635e-06, + "loss": 0.2288, "step": 13515 }, { - "epoch": 0.78, - "grad_norm": 0.27616660652349634, - "learning_rate": 2.5064503501373017e-06, - "loss": 0.2008, + "epoch": 0.62, + "grad_norm": 0.3585138180864113, + "learning_rate": 6.6371875090001136e-06, + "loss": 0.2639, "step": 13516 }, { - "epoch": 0.78, - "grad_norm": 0.2513559465711763, - "learning_rate": 2.505218242169448e-06, - "loss": 0.2077, + "epoch": 0.62, + "grad_norm": 0.6925361484520756, + "learning_rate": 6.6357862810310215e-06, + "loss": 0.342, "step": 13517 }, { - "epoch": 0.78, - "grad_norm": 1.3205543433577367, - "learning_rate": 2.5039863937455645e-06, - "loss": 0.484, + "epoch": 0.62, + "grad_norm": 0.2779713093908614, + "learning_rate": 6.63438512754238e-06, + "loss": 0.2057, "step": 13518 }, { - "epoch": 0.78, - "grad_norm": 0.6244415397855417, - "learning_rate": 2.5027548049083094e-06, - "loss": 0.3689, + "epoch": 0.62, + "grad_norm": 0.3914515420958655, + "learning_rate": 6.632984048565202e-06, + "loss": 0.2587, "step": 13519 }, { - "epoch": 0.78, - "grad_norm": 0.3004803209648444, - "learning_rate": 2.5015234757003326e-06, - "loss": 0.2546, + "epoch": 0.62, + "grad_norm": 0.8447351147748743, + "learning_rate": 6.631583044130516e-06, + "loss": 0.3886, "step": 13520 }, { - "epoch": 0.78, - "grad_norm": 0.7853617262016618, - "learning_rate": 2.500292406164273e-06, - "loss": 0.3941, + "epoch": 0.62, + "grad_norm": 0.40104828827966543, + "learning_rate": 6.630182114269334e-06, + "loss": 0.27, "step": 13521 }, { - "epoch": 0.78, - "grad_norm": 0.3676844253504336, - "learning_rate": 2.4990615963427688e-06, - "loss": 0.2752, + "epoch": 0.62, + "grad_norm": 0.7451099333818421, + "learning_rate": 6.628781259012673e-06, + "loss": 0.3169, "step": 13522 }, { - "epoch": 0.78, - "grad_norm": 0.22875365405484152, - "learning_rate": 2.4978310462784373e-06, - "loss": 0.1272, + "epoch": 0.62, + "grad_norm": 0.343501669260469, + "learning_rate": 6.627380478391543e-06, + "loss": 0.2704, "step": 13523 }, { - "epoch": 0.78, - "grad_norm": 0.35103757648372264, - "learning_rate": 2.496600756013895e-06, - "loss": 0.3124, + "epoch": 0.62, + "grad_norm": 0.38434460145562876, + "learning_rate": 6.625979772436961e-06, + "loss": 0.3096, "step": 13524 }, { - "epoch": 0.78, - "grad_norm": 0.3743025090473022, - "learning_rate": 2.4953707255917426e-06, - "loss": 0.2631, + "epoch": 0.62, + "grad_norm": 0.33125826989359125, + "learning_rate": 6.624579141179937e-06, + "loss": 0.2018, "step": 13525 }, { - "epoch": 0.78, - "grad_norm": 0.4822393785473432, - "learning_rate": 2.4941409550545824e-06, - "loss": 0.2804, + "epoch": 0.62, + "grad_norm": 0.5176780281028744, + "learning_rate": 6.623178584651475e-06, + "loss": 0.1098, "step": 13526 }, { - "epoch": 0.78, - "grad_norm": 1.1309185431941364, - "learning_rate": 2.492911444444999e-06, - "loss": 0.5591, + "epoch": 0.62, + "grad_norm": 0.4105483439709261, + "learning_rate": 6.6217781028825815e-06, + "loss": 0.3069, "step": 13527 }, { - "epoch": 0.78, - "grad_norm": 0.25582945570068455, - "learning_rate": 2.491682193805568e-06, - "loss": 0.2465, + "epoch": 0.62, + "grad_norm": 0.4091475743802213, + "learning_rate": 6.620377695904267e-06, + "loss": 0.3014, "step": 13528 }, { - "epoch": 0.78, - "grad_norm": 0.24118639331888805, - "learning_rate": 2.4904532031788577e-06, - "loss": 0.1293, + "epoch": 0.62, + "grad_norm": 0.5346846208175418, + "learning_rate": 6.618977363747538e-06, + "loss": 0.313, "step": 13529 }, { - "epoch": 0.78, - "grad_norm": 1.2057509069942007, - "learning_rate": 2.489224472607432e-06, - "loss": 0.5991, + "epoch": 0.62, + "grad_norm": 0.39864704552911057, + "learning_rate": 6.617577106443387e-06, + "loss": 0.2877, "step": 13530 }, { - "epoch": 0.78, - "grad_norm": 0.4075751183842601, - "learning_rate": 2.487996002133841e-06, - "loss": 0.2751, + "epoch": 0.62, + "grad_norm": 0.24378561639885932, + "learning_rate": 6.61617692402282e-06, + "loss": 0.2284, "step": 13531 }, { - "epoch": 0.78, - "grad_norm": 0.3821138342755005, - "learning_rate": 2.486767791800625e-06, - "loss": 0.2959, + "epoch": 0.62, + "grad_norm": 0.6434848560694352, + "learning_rate": 6.61477681651683e-06, + "loss": 0.1386, "step": 13532 }, { - "epoch": 0.78, - "grad_norm": 0.4553999305831225, - "learning_rate": 2.4855398416503173e-06, - "loss": 0.3063, + "epoch": 0.62, + "grad_norm": 0.3822749425931364, + "learning_rate": 6.613376783956423e-06, + "loss": 0.292, "step": 13533 }, { - "epoch": 0.78, - "grad_norm": 0.39885559025142264, - "learning_rate": 2.4843121517254386e-06, - "loss": 0.2575, + "epoch": 0.62, + "grad_norm": 0.6954658844577782, + "learning_rate": 6.61197682637259e-06, + "loss": 0.4242, "step": 13534 }, { - "epoch": 0.78, - "grad_norm": 0.2631468133379456, - "learning_rate": 2.4830847220685096e-06, - "loss": 0.2009, + "epoch": 0.62, + "grad_norm": 0.3236155639502606, + "learning_rate": 6.610576943796325e-06, + "loss": 0.2222, "step": 13535 }, { - "epoch": 0.78, - "grad_norm": 0.33484657861419903, - "learning_rate": 2.4818575527220347e-06, - "loss": 0.2423, + "epoch": 0.62, + "grad_norm": 0.3741457286097807, + "learning_rate": 6.609177136258618e-06, + "loss": 0.2939, "step": 13536 }, { - "epoch": 0.78, - "grad_norm": 0.5743804627272125, - "learning_rate": 2.4806306437285075e-06, - "loss": 0.3096, + "epoch": 0.62, + "grad_norm": 0.2875022439771004, + "learning_rate": 6.60777740379046e-06, + "loss": 0.1681, "step": 13537 }, { - "epoch": 0.78, - "grad_norm": 0.36102512843751833, - "learning_rate": 2.479403995130416e-06, - "loss": 0.2962, + "epoch": 0.62, + "grad_norm": 0.7205991992527593, + "learning_rate": 6.606377746422845e-06, + "loss": 0.4434, "step": 13538 }, { - "epoch": 0.78, - "grad_norm": 0.4681753138150729, - "learning_rate": 2.4781776069702446e-06, - "loss": 0.2943, + "epoch": 0.62, + "grad_norm": 0.27628887701497773, + "learning_rate": 6.604978164186752e-06, + "loss": 0.2185, "step": 13539 }, { - "epoch": 0.78, - "grad_norm": 0.40481623216969953, - "learning_rate": 2.4769514792904603e-06, - "loss": 0.2884, + "epoch": 0.62, + "grad_norm": 0.6922191734043246, + "learning_rate": 6.603578657113172e-06, + "loss": 0.4262, "step": 13540 }, { - "epoch": 0.78, - "grad_norm": 0.2316043899089237, - "learning_rate": 2.4757256121335182e-06, - "loss": 0.195, + "epoch": 0.62, + "grad_norm": 1.2247751713094377, + "learning_rate": 6.602179225233088e-06, + "loss": 0.768, "step": 13541 }, { - "epoch": 0.78, - "grad_norm": 0.8906998454080058, - "learning_rate": 2.4745000055418767e-06, - "loss": 0.2673, + "epoch": 0.62, + "grad_norm": 0.3359134785710888, + "learning_rate": 6.600779868577479e-06, + "loss": 0.182, "step": 13542 }, { - "epoch": 0.78, - "grad_norm": 0.3756343888756985, - "learning_rate": 2.4732746595579772e-06, - "loss": 0.2692, + "epoch": 0.62, + "grad_norm": 0.2462573463784811, + "learning_rate": 6.599380587177329e-06, + "loss": 0.2157, "step": 13543 }, { - "epoch": 0.78, - "grad_norm": 0.289010965803477, - "learning_rate": 2.4720495742242522e-06, - "loss": 0.2824, + "epoch": 0.62, + "grad_norm": 0.6638396313500866, + "learning_rate": 6.597981381063612e-06, + "loss": 0.297, "step": 13544 }, { - "epoch": 0.78, - "grad_norm": 1.2453049542327923, - "learning_rate": 2.4708247495831263e-06, - "loss": 0.7404, + "epoch": 0.62, + "grad_norm": 0.37277797544143787, + "learning_rate": 6.59658225026731e-06, + "loss": 0.2258, "step": 13545 }, { - "epoch": 0.78, - "grad_norm": 0.36664623886660264, - "learning_rate": 2.4696001856770137e-06, - "loss": 0.1901, + "epoch": 0.62, + "grad_norm": 0.7964061584648231, + "learning_rate": 6.595183194819399e-06, + "loss": 0.4656, "step": 13546 }, { - "epoch": 0.78, - "grad_norm": 0.32350893720100893, - "learning_rate": 2.468375882548325e-06, - "loss": 0.2296, + "epoch": 0.62, + "grad_norm": 0.32931497769222684, + "learning_rate": 6.5937842147508515e-06, + "loss": 0.2863, "step": 13547 }, { - "epoch": 0.78, - "grad_norm": 0.28448727451578076, - "learning_rate": 2.4671518402394554e-06, - "loss": 0.2343, + "epoch": 0.62, + "grad_norm": 0.3948796101686391, + "learning_rate": 6.5923853100926375e-06, + "loss": 0.2909, "step": 13548 }, { - "epoch": 0.78, - "grad_norm": 0.38941415911483, - "learning_rate": 2.4659280587927935e-06, - "loss": 0.2319, + "epoch": 0.62, + "grad_norm": 0.2518172731649503, + "learning_rate": 6.590986480875723e-06, + "loss": 0.1136, "step": 13549 }, { - "epoch": 0.78, - "grad_norm": 0.540433968384648, - "learning_rate": 2.464704538250717e-06, - "loss": 0.2962, + "epoch": 0.62, + "grad_norm": 0.804080672849914, + "learning_rate": 6.589587727131086e-06, + "loss": 0.4448, "step": 13550 }, { - "epoch": 0.78, - "grad_norm": 1.0027508666664908, - "learning_rate": 2.463481278655601e-06, - "loss": 0.6688, + "epoch": 0.62, + "grad_norm": 0.28545152414040403, + "learning_rate": 6.58818904888969e-06, + "loss": 0.2574, "step": 13551 }, { - "epoch": 0.78, - "grad_norm": 0.2635421406849195, - "learning_rate": 2.4622582800498042e-06, - "loss": 0.2202, + "epoch": 0.62, + "grad_norm": 0.4943696904867765, + "learning_rate": 6.586790446182501e-06, + "loss": 0.2902, "step": 13552 }, { - "epoch": 0.78, - "grad_norm": 0.3419175327603542, - "learning_rate": 2.4610355424756782e-06, - "loss": 0.2265, + "epoch": 0.62, + "grad_norm": 1.3112347802196245, + "learning_rate": 6.58539191904048e-06, + "loss": 0.7529, "step": 13553 }, { - "epoch": 0.78, - "grad_norm": 0.43706452382046834, - "learning_rate": 2.4598130659755647e-06, - "loss": 0.2644, + "epoch": 0.62, + "grad_norm": 0.33234850837298635, + "learning_rate": 6.583993467494592e-06, + "loss": 0.2774, "step": 13554 }, { - "epoch": 0.78, - "grad_norm": 0.47010643053265033, - "learning_rate": 2.4585908505918034e-06, - "loss": 0.1736, + "epoch": 0.62, + "grad_norm": 0.261054235047595, + "learning_rate": 6.5825950915757964e-06, + "loss": 0.201, "step": 13555 }, { - "epoch": 0.78, - "grad_norm": 0.2701575469928234, - "learning_rate": 2.4573688963667176e-06, - "loss": 0.2637, + "epoch": 0.62, + "grad_norm": 0.5538101293883746, + "learning_rate": 6.581196791315052e-06, + "loss": 0.3016, "step": 13556 }, { - "epoch": 0.78, - "grad_norm": 1.1200006981818171, - "learning_rate": 2.4561472033426213e-06, - "loss": 0.8067, + "epoch": 0.62, + "grad_norm": 0.3450811139550611, + "learning_rate": 6.579798566743314e-06, + "loss": 0.2522, "step": 13557 }, { - "epoch": 0.78, - "grad_norm": 0.8991589933463543, - "learning_rate": 2.4549257715618234e-06, - "loss": 0.3886, + "epoch": 0.62, + "grad_norm": 1.0680655851367749, + "learning_rate": 6.578400417891539e-06, + "loss": 0.3974, "step": 13558 }, { - "epoch": 0.78, - "grad_norm": 0.2317283658849086, - "learning_rate": 2.4537046010666187e-06, - "loss": 0.1782, + "epoch": 0.62, + "grad_norm": 0.38586406433553, + "learning_rate": 6.577002344790684e-06, + "loss": 0.3192, "step": 13559 }, { - "epoch": 0.78, - "grad_norm": 0.3356415177168732, - "learning_rate": 2.452483691899302e-06, - "loss": 0.2841, + "epoch": 0.62, + "grad_norm": 0.3513168492728097, + "learning_rate": 6.575604347471696e-06, + "loss": 0.2524, "step": 13560 }, { - "epoch": 0.78, - "grad_norm": 0.4554604862081215, - "learning_rate": 2.45126304410215e-06, - "loss": 0.2736, + "epoch": 0.62, + "grad_norm": 1.1187370232006462, + "learning_rate": 6.574206425965528e-06, + "loss": 0.4263, "step": 13561 }, { - "epoch": 0.78, - "grad_norm": 0.33245473228630973, - "learning_rate": 2.450042657717435e-06, - "loss": 0.2231, + "epoch": 0.62, + "grad_norm": 0.32255015788801844, + "learning_rate": 6.572808580303124e-06, + "loss": 0.2365, "step": 13562 }, { - "epoch": 0.78, - "grad_norm": 1.1404646769499847, - "learning_rate": 2.4488225327874147e-06, - "loss": 0.657, + "epoch": 0.62, + "grad_norm": 0.3814831841104404, + "learning_rate": 6.571410810515439e-06, + "loss": 0.229, "step": 13563 }, { - "epoch": 0.78, - "grad_norm": 0.33060524305523453, - "learning_rate": 2.4476026693543485e-06, - "loss": 0.2643, + "epoch": 0.62, + "grad_norm": 0.3654973383307629, + "learning_rate": 6.570013116633413e-06, + "loss": 0.2554, "step": 13564 }, { - "epoch": 0.78, - "grad_norm": 0.4029416650173851, - "learning_rate": 2.4463830674604773e-06, - "loss": 0.2251, + "epoch": 0.62, + "grad_norm": 0.5656393675167831, + "learning_rate": 6.568615498687993e-06, + "loss": 0.2579, "step": 13565 }, { - "epoch": 0.78, - "grad_norm": 0.532161244083854, - "learning_rate": 2.4451637271480357e-06, - "loss": 0.3595, + "epoch": 0.62, + "grad_norm": 0.40017997220653473, + "learning_rate": 6.567217956710115e-06, + "loss": 0.2584, "step": 13566 }, { - "epoch": 0.78, - "grad_norm": 0.22270028050530902, - "learning_rate": 2.4439446484592466e-06, - "loss": 0.1721, + "epoch": 0.62, + "grad_norm": 0.3513324958219387, + "learning_rate": 6.565820490730725e-06, + "loss": 0.2933, "step": 13567 }, { - "epoch": 0.78, - "grad_norm": 0.5652419091857782, - "learning_rate": 2.442725831436331e-06, - "loss": 0.2662, + "epoch": 0.62, + "grad_norm": 0.4540643968902497, + "learning_rate": 6.564423100780759e-06, + "loss": 0.2126, "step": 13568 }, { - "epoch": 0.78, - "grad_norm": 0.8543186545019308, - "learning_rate": 2.4415072761214963e-06, - "loss": 0.5066, + "epoch": 0.62, + "grad_norm": 0.2592219254927489, + "learning_rate": 6.563025786891155e-06, + "loss": 0.2073, "step": 13569 }, { - "epoch": 0.78, - "grad_norm": 0.5957280739948273, - "learning_rate": 2.4402889825569396e-06, - "loss": 0.3107, + "epoch": 0.62, + "grad_norm": 1.2120595394033797, + "learning_rate": 6.561628549092844e-06, + "loss": 0.7161, "step": 13570 }, { - "epoch": 0.78, - "grad_norm": 0.4228437651054475, - "learning_rate": 2.4390709507848497e-06, - "loss": 0.2888, + "epoch": 0.62, + "grad_norm": 0.3368745478916684, + "learning_rate": 6.560231387416766e-06, + "loss": 0.2568, "step": 13571 }, { - "epoch": 0.78, - "grad_norm": 0.3197597496407284, - "learning_rate": 2.4378531808474048e-06, - "loss": 0.2583, + "epoch": 0.62, + "grad_norm": 0.38017450052947815, + "learning_rate": 6.558834301893851e-06, + "loss": 0.291, "step": 13572 }, { - "epoch": 0.78, - "grad_norm": 0.2274135399939389, - "learning_rate": 2.4366356727867847e-06, - "loss": 0.156, + "epoch": 0.62, + "grad_norm": 0.7528033112160798, + "learning_rate": 6.557437292555027e-06, + "loss": 0.3998, "step": 13573 }, { - "epoch": 0.78, - "grad_norm": 0.45599695397853623, - "learning_rate": 2.435418426645144e-06, - "loss": 0.2762, + "epoch": 0.62, + "grad_norm": 0.4660636674683085, + "learning_rate": 6.556040359431219e-06, + "loss": 0.3625, "step": 13574 }, { - "epoch": 0.78, - "grad_norm": 0.3557905396710193, - "learning_rate": 2.4342014424646343e-06, - "loss": 0.2661, + "epoch": 0.62, + "grad_norm": 0.22445241674572333, + "learning_rate": 6.554643502553365e-06, + "loss": 0.1799, "step": 13575 }, { - "epoch": 0.78, - "grad_norm": 0.7923026226601533, - "learning_rate": 2.4329847202874058e-06, - "loss": 0.3719, + "epoch": 0.62, + "grad_norm": 0.44690457393576005, + "learning_rate": 6.55324672195238e-06, + "loss": 0.2624, "step": 13576 }, { - "epoch": 0.78, - "grad_norm": 0.7790919251786718, - "learning_rate": 2.4317682601555913e-06, - "loss": 0.2882, + "epoch": 0.62, + "grad_norm": 1.189648060758862, + "learning_rate": 6.551850017659192e-06, + "loss": 0.6777, "step": 13577 }, { - "epoch": 0.78, - "grad_norm": 0.9627289488428679, - "learning_rate": 2.4305520621113175e-06, - "loss": 0.2932, + "epoch": 0.62, + "grad_norm": 0.41413053172059355, + "learning_rate": 6.550453389704721e-06, + "loss": 0.2021, "step": 13578 }, { - "epoch": 0.78, - "grad_norm": 0.2430678228878389, - "learning_rate": 2.4293361261966965e-06, - "loss": 0.2217, + "epoch": 0.62, + "grad_norm": 0.4623761277410905, + "learning_rate": 6.5490568381198815e-06, + "loss": 0.3202, "step": 13579 }, { - "epoch": 0.78, - "grad_norm": 0.24952073494447208, - "learning_rate": 2.4281204524538425e-06, - "loss": 0.2056, + "epoch": 0.62, + "grad_norm": 0.49373473530849976, + "learning_rate": 6.547660362935603e-06, + "loss": 0.3176, "step": 13580 }, { - "epoch": 0.78, - "grad_norm": 1.1887519774137303, - "learning_rate": 2.426905040924853e-06, - "loss": 0.7538, + "epoch": 0.62, + "grad_norm": 0.25492930925364227, + "learning_rate": 6.546263964182796e-06, + "loss": 0.1377, "step": 13581 }, { - "epoch": 0.78, - "grad_norm": 0.6848642705531716, - "learning_rate": 2.4256898916518145e-06, - "loss": 0.2607, + "epoch": 0.62, + "grad_norm": 0.5759292252415109, + "learning_rate": 6.544867641892376e-06, + "loss": 0.443, "step": 13582 }, { - "epoch": 0.78, - "grad_norm": 0.34377998627378237, - "learning_rate": 2.4244750046768105e-06, - "loss": 0.2902, + "epoch": 0.62, + "grad_norm": 0.37642791123404024, + "learning_rate": 6.543471396095256e-06, + "loss": 0.2965, "step": 13583 }, { - "epoch": 0.78, - "grad_norm": 0.35628235811761977, - "learning_rate": 2.4232603800419087e-06, - "loss": 0.3136, + "epoch": 0.62, + "grad_norm": 0.3966915695437229, + "learning_rate": 6.542075226822349e-06, + "loss": 0.1939, "step": 13584 }, { - "epoch": 0.78, - "grad_norm": 0.1818921796340036, - "learning_rate": 2.4220460177891757e-06, - "loss": 0.0816, + "epoch": 0.62, + "grad_norm": 0.47137929420304037, + "learning_rate": 6.540679134104562e-06, + "loss": 0.3519, "step": 13585 }, { - "epoch": 0.78, - "grad_norm": 0.3849502874315825, - "learning_rate": 2.4208319179606643e-06, - "loss": 0.3033, + "epoch": 0.62, + "grad_norm": 0.515896333962505, + "learning_rate": 6.539283117972805e-06, + "loss": 0.3161, "step": 13586 }, { - "epoch": 0.78, - "grad_norm": 0.47780436818363103, - "learning_rate": 2.419618080598417e-06, - "loss": 0.3461, + "epoch": 0.62, + "grad_norm": 0.32782025648902746, + "learning_rate": 6.537887178457984e-06, + "loss": 0.2537, "step": 13587 }, { - "epoch": 0.78, - "grad_norm": 0.4695138835770297, - "learning_rate": 2.418404505744467e-06, - "loss": 0.222, + "epoch": 0.62, + "grad_norm": 0.21341923500148452, + "learning_rate": 6.536491315591006e-06, + "loss": 0.1504, "step": 13588 }, { - "epoch": 0.78, - "grad_norm": 0.5030258966165988, - "learning_rate": 2.4171911934408464e-06, - "loss": 0.2913, + "epoch": 0.62, + "grad_norm": 0.8379827003431878, + "learning_rate": 6.5350955294027715e-06, + "loss": 0.4556, "step": 13589 }, { - "epoch": 0.78, - "grad_norm": 0.9470056441285403, - "learning_rate": 2.4159781437295684e-06, - "loss": 0.4908, + "epoch": 0.62, + "grad_norm": 0.4327063589434647, + "learning_rate": 6.533699819924182e-06, + "loss": 0.3112, "step": 13590 }, { - "epoch": 0.78, - "grad_norm": 0.2590739492221378, - "learning_rate": 2.414765356652641e-06, - "loss": 0.1915, + "epoch": 0.62, + "grad_norm": 0.3309764422731282, + "learning_rate": 6.532304187186138e-06, + "loss": 0.2564, "step": 13591 }, { - "epoch": 0.78, - "grad_norm": 0.2734733858017858, - "learning_rate": 2.4135528322520597e-06, - "loss": 0.2396, + "epoch": 0.62, + "grad_norm": 1.1003021709932568, + "learning_rate": 6.530908631219533e-06, + "loss": 0.5603, "step": 13592 }, { - "epoch": 0.78, - "grad_norm": 0.47194836624512676, - "learning_rate": 2.4123405705698213e-06, - "loss": 0.2789, + "epoch": 0.62, + "grad_norm": 0.3220238624452519, + "learning_rate": 6.5295131520552725e-06, + "loss": 0.2231, "step": 13593 }, { - "epoch": 0.78, - "grad_norm": 0.7394128282774476, - "learning_rate": 2.4111285716479015e-06, - "loss": 0.3719, + "epoch": 0.62, + "grad_norm": 0.5519985755696906, + "learning_rate": 6.528117749724248e-06, + "loss": 0.294, "step": 13594 }, { - "epoch": 0.78, - "grad_norm": 0.2604512556637114, - "learning_rate": 2.4099168355282743e-06, - "loss": 0.1993, + "epoch": 0.62, + "grad_norm": 0.380345351740475, + "learning_rate": 6.526722424257346e-06, + "loss": 0.3062, "step": 13595 }, { - "epoch": 0.78, - "grad_norm": 0.49077072597667926, - "learning_rate": 2.4087053622529e-06, - "loss": 0.351, + "epoch": 0.62, + "grad_norm": 0.3367410491272575, + "learning_rate": 6.525327175685459e-06, + "loss": 0.2723, "step": 13596 }, { - "epoch": 0.78, - "grad_norm": 1.1799394846564877, - "learning_rate": 2.4074941518637295e-06, - "loss": 0.5699, + "epoch": 0.62, + "grad_norm": 0.38463427642600034, + "learning_rate": 6.5239320040394836e-06, + "loss": 0.1588, "step": 13597 }, { - "epoch": 0.78, - "grad_norm": 0.3030824419466015, - "learning_rate": 2.406283204402714e-06, - "loss": 0.2124, + "epoch": 0.62, + "grad_norm": 0.3956691344379368, + "learning_rate": 6.522536909350303e-06, + "loss": 0.2897, "step": 13598 }, { - "epoch": 0.78, - "grad_norm": 0.46405446573905923, - "learning_rate": 2.405072519911783e-06, - "loss": 0.31, + "epoch": 0.62, + "grad_norm": 0.6990117335351764, + "learning_rate": 6.521141891648804e-06, + "loss": 0.3605, "step": 13599 }, { - "epoch": 0.78, - "grad_norm": 0.32419622170740764, - "learning_rate": 2.4038620984328655e-06, - "loss": 0.2435, + "epoch": 0.62, + "grad_norm": 0.29000361808172953, + "learning_rate": 6.519746950965868e-06, + "loss": 0.2359, "step": 13600 }, { - "epoch": 0.78, - "grad_norm": 0.21380776837026694, - "learning_rate": 2.4026519400078728e-06, - "loss": 0.1533, + "epoch": 0.62, + "grad_norm": 0.5730095770691935, + "learning_rate": 6.518352087332384e-06, + "loss": 0.323, "step": 13601 }, { - "epoch": 0.78, - "grad_norm": 1.3300209163280692, - "learning_rate": 2.401442044678721e-06, - "loss": 0.5964, + "epoch": 0.62, + "grad_norm": 0.41486784338328975, + "learning_rate": 6.516957300779227e-06, + "loss": 0.3106, "step": 13602 }, { - "epoch": 0.78, - "grad_norm": 0.35887510821296326, - "learning_rate": 2.4002324124873033e-06, - "loss": 0.2981, + "epoch": 0.62, + "grad_norm": 0.2895026703735662, + "learning_rate": 6.515562591337279e-06, + "loss": 0.2512, "step": 13603 }, { - "epoch": 0.78, - "grad_norm": 0.2783468021005403, - "learning_rate": 2.3990230434755112e-06, - "loss": 0.1819, + "epoch": 0.62, + "grad_norm": 1.1280972075387121, + "learning_rate": 6.514167959037415e-06, + "loss": 0.3003, "step": 13604 }, { - "epoch": 0.78, - "grad_norm": 0.7379956763914063, - "learning_rate": 2.3978139376852206e-06, - "loss": 0.4326, + "epoch": 0.62, + "grad_norm": 0.6038107088624585, + "learning_rate": 6.5127734039105154e-06, + "loss": 0.3317, "step": 13605 }, { - "epoch": 0.78, - "grad_norm": 0.3247249377272244, - "learning_rate": 2.3966050951583096e-06, - "loss": 0.1835, + "epoch": 0.63, + "grad_norm": 0.3556698684340373, + "learning_rate": 6.51137892598745e-06, + "loss": 0.2905, "step": 13606 }, { - "epoch": 0.78, - "grad_norm": 0.3669611320334294, - "learning_rate": 2.39539651593664e-06, - "loss": 0.2728, + "epoch": 0.63, + "grad_norm": 0.5091940594138323, + "learning_rate": 6.509984525299094e-06, + "loss": 0.2775, "step": 13607 }, { - "epoch": 0.78, - "grad_norm": 0.35511621181556124, - "learning_rate": 2.3941882000620586e-06, - "loss": 0.2297, + "epoch": 0.63, + "grad_norm": 0.44259149965369876, + "learning_rate": 6.508590201876317e-06, + "loss": 0.3178, "step": 13608 }, { - "epoch": 0.78, - "grad_norm": 1.2103759075595983, - "learning_rate": 2.3929801475764113e-06, - "loss": 0.498, + "epoch": 0.63, + "grad_norm": 0.2651758019393575, + "learning_rate": 6.507195955749983e-06, + "loss": 0.1761, "step": 13609 }, { - "epoch": 0.78, - "grad_norm": 0.33080955912933513, - "learning_rate": 2.391772358521536e-06, - "loss": 0.2461, + "epoch": 0.63, + "grad_norm": 0.3843186926675106, + "learning_rate": 6.505801786950969e-06, + "loss": 0.2547, "step": 13610 }, { - "epoch": 0.78, - "grad_norm": 0.37747048144770934, - "learning_rate": 2.3905648329392574e-06, - "loss": 0.2751, + "epoch": 0.63, + "grad_norm": 0.4165351540492922, + "learning_rate": 6.504407695510135e-06, + "loss": 0.2513, "step": 13611 }, { - "epoch": 0.78, - "grad_norm": 0.8894374852257119, - "learning_rate": 2.389357570871391e-06, - "loss": 0.4819, + "epoch": 0.63, + "grad_norm": 0.5717073071296611, + "learning_rate": 6.5030136814583475e-06, + "loss": 0.3512, "step": 13612 }, { - "epoch": 0.78, - "grad_norm": 0.24421514760385282, - "learning_rate": 2.3881505723597422e-06, - "loss": 0.202, + "epoch": 0.63, + "grad_norm": 0.9462675654888323, + "learning_rate": 6.501619744826462e-06, + "loss": 0.3942, "step": 13613 }, { - "epoch": 0.78, - "grad_norm": 0.40684410356900697, - "learning_rate": 2.386943837446114e-06, - "loss": 0.1062, + "epoch": 0.63, + "grad_norm": 0.2596385621002387, + "learning_rate": 6.500225885645346e-06, + "loss": 0.2158, "step": 13614 }, { - "epoch": 0.78, - "grad_norm": 0.3562939479119407, - "learning_rate": 2.385737366172294e-06, - "loss": 0.3223, + "epoch": 0.63, + "grad_norm": 0.2650466362583132, + "learning_rate": 6.498832103945857e-06, + "loss": 0.1827, "step": 13615 }, { - "epoch": 0.78, - "grad_norm": 0.34011491819599793, - "learning_rate": 2.3845311585800612e-06, - "loss": 0.2739, + "epoch": 0.63, + "grad_norm": 1.2108509039715882, + "learning_rate": 6.49743839975885e-06, + "loss": 0.4793, "step": 13616 }, { - "epoch": 0.78, - "grad_norm": 0.7845879219744151, - "learning_rate": 2.3833252147111853e-06, - "loss": 0.2916, + "epoch": 0.63, + "grad_norm": 0.2804825833058116, + "learning_rate": 6.4960447731151785e-06, + "loss": 0.1193, "step": 13617 }, { - "epoch": 0.78, - "grad_norm": 0.343820832697006, - "learning_rate": 2.382119534607431e-06, - "loss": 0.2475, + "epoch": 0.63, + "grad_norm": 0.38142806029102955, + "learning_rate": 6.494651224045702e-06, + "loss": 0.2884, "step": 13618 }, { - "epoch": 0.78, - "grad_norm": 0.25729081529380954, - "learning_rate": 2.38091411831055e-06, - "loss": 0.2516, + "epoch": 0.63, + "grad_norm": 0.6542733355473932, + "learning_rate": 6.493257752581268e-06, + "loss": 0.3421, "step": 13619 }, { - "epoch": 0.78, - "grad_norm": 0.5159852122593639, - "learning_rate": 2.379708965862285e-06, - "loss": 0.2322, + "epoch": 0.63, + "grad_norm": 0.30062518581652486, + "learning_rate": 6.491864358752728e-06, + "loss": 0.1144, "step": 13620 }, { - "epoch": 0.78, - "grad_norm": 0.8836787867400838, - "learning_rate": 2.3785040773043686e-06, - "loss": 0.3155, + "epoch": 0.63, + "grad_norm": 0.2915309033095364, + "learning_rate": 6.490471042590929e-06, + "loss": 0.2113, "step": 13621 }, { - "epoch": 0.78, - "grad_norm": 0.4244576302501707, - "learning_rate": 2.3772994526785308e-06, - "loss": 0.2892, + "epoch": 0.63, + "grad_norm": 0.3781819429036915, + "learning_rate": 6.489077804126717e-06, + "loss": 0.3169, "step": 13622 }, { - "epoch": 0.78, - "grad_norm": 0.29146153581979123, - "learning_rate": 2.376095092026486e-06, - "loss": 0.289, + "epoch": 0.63, + "grad_norm": 0.24554877796376093, + "learning_rate": 6.487684643390941e-06, + "loss": 0.1041, "step": 13623 }, { - "epoch": 0.78, - "grad_norm": 0.6333592123809318, - "learning_rate": 2.37489099538994e-06, - "loss": 0.2585, + "epoch": 0.63, + "grad_norm": 0.3710530595122849, + "learning_rate": 6.486291560414441e-06, + "loss": 0.3026, "step": 13624 }, { - "epoch": 0.78, - "grad_norm": 0.3367141969825039, - "learning_rate": 2.3736871628105907e-06, - "loss": 0.2642, + "epoch": 0.63, + "grad_norm": 0.7890588175173323, + "learning_rate": 6.48489855522806e-06, + "loss": 0.4993, "step": 13625 }, { - "epoch": 0.78, - "grad_norm": 0.2573235517922094, - "learning_rate": 2.372483594330124e-06, - "loss": 0.1519, + "epoch": 0.63, + "grad_norm": 0.3263755436709602, + "learning_rate": 6.483505627862632e-06, + "loss": 0.2909, "step": 13626 }, { - "epoch": 0.78, - "grad_norm": 0.3441524091740002, - "learning_rate": 2.3712802899902256e-06, - "loss": 0.258, + "epoch": 0.63, + "grad_norm": 0.26265339357003836, + "learning_rate": 6.482112778349005e-06, + "loss": 0.1732, "step": 13627 }, { - "epoch": 0.78, - "grad_norm": 0.31942754964550046, - "learning_rate": 2.3700772498325617e-06, - "loss": 0.2458, + "epoch": 0.63, + "grad_norm": 0.4774459288676462, + "learning_rate": 6.480720006718011e-06, + "loss": 0.2661, "step": 13628 }, { - "epoch": 0.78, - "grad_norm": 0.7120882141052624, - "learning_rate": 2.3688744738987955e-06, - "loss": 0.3966, + "epoch": 0.63, + "grad_norm": 0.5910513082801484, + "learning_rate": 6.479327313000483e-06, + "loss": 0.3357, "step": 13629 }, { - "epoch": 0.78, - "grad_norm": 1.3333702259355196, - "learning_rate": 2.3676719622305754e-06, - "loss": 0.2666, + "epoch": 0.63, + "grad_norm": 0.32338598774722543, + "learning_rate": 6.477934697227254e-06, + "loss": 0.2462, "step": 13630 }, { - "epoch": 0.78, - "grad_norm": 0.2738749937750857, - "learning_rate": 2.3664697148695494e-06, - "loss": 0.2535, + "epoch": 0.63, + "grad_norm": 0.4450418972773433, + "learning_rate": 6.476542159429158e-06, + "loss": 0.3512, "step": 13631 }, { - "epoch": 0.78, - "grad_norm": 0.24507540728163543, - "learning_rate": 2.365267731857349e-06, - "loss": 0.1795, + "epoch": 0.63, + "grad_norm": 0.5839848323571413, + "learning_rate": 6.475149699637022e-06, + "loss": 0.325, "step": 13632 }, { - "epoch": 0.78, - "grad_norm": 0.7427338888923851, - "learning_rate": 2.3640660132356e-06, - "loss": 0.3918, + "epoch": 0.63, + "grad_norm": 0.2150093790563464, + "learning_rate": 6.473757317881675e-06, + "loss": 0.147, "step": 13633 }, { - "epoch": 0.78, - "grad_norm": 0.3615965217499706, - "learning_rate": 2.362864559045912e-06, - "loss": 0.2184, + "epoch": 0.63, + "grad_norm": 0.37276436347433134, + "learning_rate": 6.47236501419394e-06, + "loss": 0.2913, "step": 13634 }, { - "epoch": 0.78, - "grad_norm": 0.34559986606325616, - "learning_rate": 2.3616633693298996e-06, - "loss": 0.2857, + "epoch": 0.63, + "grad_norm": 0.7088512573983962, + "learning_rate": 6.4709727886046455e-06, + "loss": 0.4263, "step": 13635 }, { - "epoch": 0.78, - "grad_norm": 1.076346574627362, - "learning_rate": 2.360462444129156e-06, - "loss": 0.6005, + "epoch": 0.63, + "grad_norm": 0.3217880742746967, + "learning_rate": 6.4695806411446125e-06, + "loss": 0.2284, "step": 13636 }, { - "epoch": 0.78, - "grad_norm": 0.2748717407836768, - "learning_rate": 2.3592617834852694e-06, - "loss": 0.1638, + "epoch": 0.63, + "grad_norm": 1.0747593688227615, + "learning_rate": 6.4681885718446624e-06, + "loss": 0.6763, "step": 13637 }, { - "epoch": 0.78, - "grad_norm": 0.4605339516609135, - "learning_rate": 2.358061387439818e-06, - "loss": 0.3061, + "epoch": 0.63, + "grad_norm": 0.37255231144988693, + "learning_rate": 6.466796580735611e-06, + "loss": 0.2954, "step": 13638 }, { - "epoch": 0.78, - "grad_norm": 0.36174894634119437, - "learning_rate": 2.356861256034371e-06, - "loss": 0.3071, + "epoch": 0.63, + "grad_norm": 0.3922717309648967, + "learning_rate": 6.4654046678482765e-06, + "loss": 0.2734, "step": 13639 }, { - "epoch": 0.78, - "grad_norm": 0.40535929812470445, - "learning_rate": 2.355661389310492e-06, - "loss": 0.2054, + "epoch": 0.63, + "grad_norm": 0.2500497906705041, + "learning_rate": 6.4640128332134774e-06, + "loss": 0.1114, "step": 13640 }, { - "epoch": 0.78, - "grad_norm": 0.5112923460526705, - "learning_rate": 2.354461787309733e-06, - "loss": 0.3737, + "epoch": 0.63, + "grad_norm": 0.6240687126576941, + "learning_rate": 6.46262107686203e-06, + "loss": 0.3444, "step": 13641 }, { - "epoch": 0.78, - "grad_norm": 0.5087292017954329, - "learning_rate": 2.353262450073628e-06, - "loss": 0.3251, + "epoch": 0.63, + "grad_norm": 0.2746684977192115, + "learning_rate": 6.46122939882474e-06, + "loss": 0.2448, "step": 13642 }, { - "epoch": 0.78, - "grad_norm": 0.29548431419215865, - "learning_rate": 2.3520633776437187e-06, - "loss": 0.199, + "epoch": 0.63, + "grad_norm": 1.2084918136604883, + "learning_rate": 6.459837799132416e-06, + "loss": 0.4871, "step": 13643 }, { - "epoch": 0.78, - "grad_norm": 0.26742981963554274, - "learning_rate": 2.3508645700615253e-06, - "loss": 0.1937, + "epoch": 0.63, + "grad_norm": 0.7224313470811684, + "learning_rate": 6.458446277815876e-06, + "loss": 0.4363, "step": 13644 }, { - "epoch": 0.78, - "grad_norm": 0.7458493819978979, - "learning_rate": 2.3496660273685633e-06, - "loss": 0.3586, + "epoch": 0.63, + "grad_norm": 0.2301581105292881, + "learning_rate": 6.45705483490592e-06, + "loss": 0.1827, "step": 13645 }, { - "epoch": 0.78, - "grad_norm": 0.398854225716011, - "learning_rate": 2.348467749606335e-06, - "loss": 0.2928, + "epoch": 0.63, + "grad_norm": 0.3500829726178428, + "learning_rate": 6.455663470433358e-06, + "loss": 0.2475, "step": 13646 }, { - "epoch": 0.78, - "grad_norm": 0.2920827396093392, - "learning_rate": 2.347269736816341e-06, - "loss": 0.2593, + "epoch": 0.63, + "grad_norm": 0.5678009054775089, + "learning_rate": 6.454272184428987e-06, + "loss": 0.3846, "step": 13647 }, { - "epoch": 0.78, - "grad_norm": 1.4477857127683085, - "learning_rate": 2.3460719890400687e-06, - "loss": 0.748, + "epoch": 0.63, + "grad_norm": 0.35479739755105305, + "learning_rate": 6.452880976923614e-06, + "loss": 0.2848, "step": 13648 }, { - "epoch": 0.78, - "grad_norm": 0.45883127272812063, - "learning_rate": 2.3448745063189937e-06, - "loss": 0.2472, + "epoch": 0.63, + "grad_norm": 0.9924227603975603, + "learning_rate": 6.451489847948039e-06, + "loss": 0.4607, "step": 13649 }, { - "epoch": 0.78, - "grad_norm": 0.2593031866666469, - "learning_rate": 2.3436772886945847e-06, - "loss": 0.1717, + "epoch": 0.63, + "grad_norm": 0.32170812810026933, + "learning_rate": 6.450098797533057e-06, + "loss": 0.2713, "step": 13650 }, { - "epoch": 0.78, - "grad_norm": 0.3450260325923007, - "learning_rate": 2.3424803362083005e-06, - "loss": 0.3068, + "epoch": 0.63, + "grad_norm": 0.3945097247760993, + "learning_rate": 6.4487078257094685e-06, + "loss": 0.2879, "step": 13651 }, { - "epoch": 0.78, - "grad_norm": 0.4138606948993418, - "learning_rate": 2.3412836489015945e-06, - "loss": 0.2737, + "epoch": 0.63, + "grad_norm": 0.33951515707295765, + "learning_rate": 6.447316932508063e-06, + "loss": 0.2364, "step": 13652 }, { - "epoch": 0.78, - "grad_norm": 0.46342922046762425, - "learning_rate": 2.340087226815907e-06, - "loss": 0.2678, + "epoch": 0.63, + "grad_norm": 0.7187630399193446, + "learning_rate": 6.445926117959638e-06, + "loss": 0.2936, "step": 13653 }, { - "epoch": 0.78, - "grad_norm": 0.5116239699745199, - "learning_rate": 2.338891069992669e-06, - "loss": 0.3142, + "epoch": 0.63, + "grad_norm": 0.28793544651631053, + "learning_rate": 6.4445353820949826e-06, + "loss": 0.2512, "step": 13654 }, { - "epoch": 0.78, - "grad_norm": 0.3018463186424405, - "learning_rate": 2.3376951784733014e-06, - "loss": 0.2575, + "epoch": 0.63, + "grad_norm": 0.46735270042750343, + "learning_rate": 6.4431447249448875e-06, + "loss": 0.3717, "step": 13655 }, { - "epoch": 0.78, - "grad_norm": 0.5042604633777217, - "learning_rate": 2.336499552299223e-06, - "loss": 0.3364, + "epoch": 0.63, + "grad_norm": 1.319575368106341, + "learning_rate": 6.441754146540137e-06, + "loss": 0.2372, "step": 13656 }, { - "epoch": 0.78, - "grad_norm": 0.3450572288678373, - "learning_rate": 2.3353041915118357e-06, - "loss": 0.1711, + "epoch": 0.63, + "grad_norm": 0.3786108169757764, + "learning_rate": 6.4403636469115225e-06, + "loss": 0.2575, "step": 13657 }, { - "epoch": 0.78, - "grad_norm": 0.5783242085477613, - "learning_rate": 2.3341090961525347e-06, - "loss": 0.3407, + "epoch": 0.63, + "grad_norm": 0.41239527876538173, + "learning_rate": 6.438973226089828e-06, + "loss": 0.3107, "step": 13658 }, { - "epoch": 0.78, - "grad_norm": 0.26315711604683223, - "learning_rate": 2.3329142662627026e-06, - "loss": 0.2855, + "epoch": 0.63, + "grad_norm": 0.3007128347499269, + "learning_rate": 6.437582884105835e-06, + "loss": 0.1245, "step": 13659 }, { - "epoch": 0.78, - "grad_norm": 1.3037750222352267, - "learning_rate": 2.3317197018837233e-06, - "loss": 0.3251, + "epoch": 0.63, + "grad_norm": 0.39411157741329433, + "learning_rate": 6.436192620990318e-06, + "loss": 0.282, "step": 13660 }, { - "epoch": 0.78, - "grad_norm": 0.6003607719198905, - "learning_rate": 2.330525403056961e-06, - "loss": 0.3302, + "epoch": 0.63, + "grad_norm": 1.2785307167030902, + "learning_rate": 6.434802436774065e-06, + "loss": 0.712, "step": 13661 }, { - "epoch": 0.78, - "grad_norm": 0.397572728143814, - "learning_rate": 2.329331369823774e-06, - "loss": 0.3095, + "epoch": 0.63, + "grad_norm": 0.33815883677112824, + "learning_rate": 6.4334123314878495e-06, + "loss": 0.2395, "step": 13662 }, { - "epoch": 0.79, - "grad_norm": 0.23107023596230547, - "learning_rate": 2.3281376022255107e-06, - "loss": 0.1822, + "epoch": 0.63, + "grad_norm": 0.35841567956965986, + "learning_rate": 6.4320223051624485e-06, + "loss": 0.2777, "step": 13663 }, { - "epoch": 0.79, - "grad_norm": 0.5463080263376949, - "learning_rate": 2.326944100303511e-06, - "loss": 0.323, + "epoch": 0.63, + "grad_norm": 0.7329798118941226, + "learning_rate": 6.430632357828632e-06, + "loss": 0.418, "step": 13664 }, { - "epoch": 0.79, - "grad_norm": 0.389194949108078, - "learning_rate": 2.32575086409911e-06, - "loss": 0.3304, + "epoch": 0.63, + "grad_norm": 0.2909518970827277, + "learning_rate": 6.429242489517178e-06, + "loss": 0.2164, "step": 13665 }, { - "epoch": 0.79, - "grad_norm": 0.5020850413515092, - "learning_rate": 2.3245578936536263e-06, - "loss": 0.2403, + "epoch": 0.63, + "grad_norm": 0.3413314932462295, + "learning_rate": 6.427852700258852e-06, + "loss": 0.1978, "step": 13666 }, { - "epoch": 0.79, - "grad_norm": 0.3099713566620966, - "learning_rate": 2.323365189008372e-06, - "loss": 0.2437, + "epoch": 0.63, + "grad_norm": 0.4607850684279235, + "learning_rate": 6.4264629900844255e-06, + "loss": 0.3709, "step": 13667 }, { - "epoch": 0.79, - "grad_norm": 0.4927324531266989, - "learning_rate": 2.3221727502046487e-06, - "loss": 0.3289, + "epoch": 0.63, + "grad_norm": 0.787700481262018, + "learning_rate": 6.425073359024664e-06, + "loss": 0.4396, "step": 13668 }, { - "epoch": 0.79, - "grad_norm": 0.5103081683200503, - "learning_rate": 2.3209805772837557e-06, - "loss": 0.2563, + "epoch": 0.63, + "grad_norm": 0.315676722552752, + "learning_rate": 6.4236838071103305e-06, + "loss": 0.1971, "step": 13669 }, { - "epoch": 0.79, - "grad_norm": 0.23731099735038388, - "learning_rate": 2.3197886702869756e-06, - "loss": 0.1511, + "epoch": 0.63, + "grad_norm": 0.3801232797865906, + "learning_rate": 6.4222943343721925e-06, + "loss": 0.3284, "step": 13670 }, { - "epoch": 0.79, - "grad_norm": 0.2998413324216504, - "learning_rate": 2.3185970292555827e-06, - "loss": 0.294, + "epoch": 0.63, + "grad_norm": 0.28457958440644565, + "learning_rate": 6.420904940841011e-06, + "loss": 0.1725, "step": 13671 }, { - "epoch": 0.79, - "grad_norm": 1.1268300194743812, - "learning_rate": 2.317405654230842e-06, - "loss": 0.5717, + "epoch": 0.63, + "grad_norm": 0.30231106681913206, + "learning_rate": 6.419515626547543e-06, + "loss": 0.1964, "step": 13672 }, { - "epoch": 0.79, - "grad_norm": 0.42392272265792735, - "learning_rate": 2.3162145452540164e-06, - "loss": 0.2239, + "epoch": 0.63, + "grad_norm": 0.5394933021987132, + "learning_rate": 6.418126391522544e-06, + "loss": 0.4365, "step": 13673 }, { - "epoch": 0.79, - "grad_norm": 0.5320072799016502, - "learning_rate": 2.3150237023663503e-06, - "loss": 0.3257, + "epoch": 0.63, + "grad_norm": 0.5144866507395176, + "learning_rate": 6.41673723579678e-06, + "loss": 0.3224, "step": 13674 }, { - "epoch": 0.79, - "grad_norm": 0.3519616895462257, - "learning_rate": 2.3138331256090853e-06, - "loss": 0.2953, + "epoch": 0.63, + "grad_norm": 0.35353036505905033, + "learning_rate": 6.415348159400998e-06, + "loss": 0.2745, "step": 13675 }, { - "epoch": 0.79, - "grad_norm": 0.2006587422306295, - "learning_rate": 2.312642815023444e-06, - "loss": 0.1078, + "epoch": 0.63, + "grad_norm": 0.765089869569537, + "learning_rate": 6.4139591623659545e-06, + "loss": 0.3172, "step": 13676 }, { - "epoch": 0.79, - "grad_norm": 0.3494649945683863, - "learning_rate": 2.311452770650653e-06, - "loss": 0.2756, + "epoch": 0.63, + "grad_norm": 0.25564180106080453, + "learning_rate": 6.412570244722396e-06, + "loss": 0.1862, "step": 13677 }, { - "epoch": 0.79, - "grad_norm": 0.27624763479241404, - "learning_rate": 2.3102629925319233e-06, - "loss": 0.2441, + "epoch": 0.63, + "grad_norm": 0.3249209962482771, + "learning_rate": 6.411181406501077e-06, + "loss": 0.2713, "step": 13678 }, { - "epoch": 0.79, - "grad_norm": 0.5364171797371735, - "learning_rate": 2.3090734807084545e-06, - "loss": 0.2007, + "epoch": 0.63, + "grad_norm": 0.9521741275109614, + "learning_rate": 6.409792647732743e-06, + "loss": 0.5482, "step": 13679 }, { - "epoch": 0.79, - "grad_norm": 0.3835854642103055, - "learning_rate": 2.307884235221438e-06, - "loss": 0.2958, + "epoch": 0.63, + "grad_norm": 0.9310621420367279, + "learning_rate": 6.408403968448139e-06, + "loss": 0.4427, "step": 13680 }, { - "epoch": 0.79, - "grad_norm": 0.5006981875240444, - "learning_rate": 2.3066952561120616e-06, - "loss": 0.2308, + "epoch": 0.63, + "grad_norm": 0.3400771452829275, + "learning_rate": 6.40701536867801e-06, + "loss": 0.2632, "step": 13681 }, { - "epoch": 0.79, - "grad_norm": 0.3935079231579874, - "learning_rate": 2.3055065434214983e-06, - "loss": 0.2848, + "epoch": 0.63, + "grad_norm": 0.3742603939816222, + "learning_rate": 6.405626848453095e-06, + "loss": 0.24, "step": 13682 }, { - "epoch": 0.79, - "grad_norm": 0.2531872170122483, - "learning_rate": 2.3043180971909128e-06, - "loss": 0.2199, + "epoch": 0.63, + "grad_norm": 0.43053178239634077, + "learning_rate": 6.40423840780414e-06, + "loss": 0.2576, "step": 13683 }, { - "epoch": 0.79, - "grad_norm": 0.42624480063631087, - "learning_rate": 2.3031299174614572e-06, - "loss": 0.2557, + "epoch": 0.63, + "grad_norm": 0.35900300601984575, + "learning_rate": 6.402850046761881e-06, + "loss": 0.2584, "step": 13684 }, { - "epoch": 0.79, - "grad_norm": 0.6061150551108329, - "learning_rate": 2.3019420042742856e-06, - "loss": 0.3503, + "epoch": 0.63, + "grad_norm": 0.3087640385141549, + "learning_rate": 6.401461765357055e-06, + "loss": 0.1897, "step": 13685 }, { - "epoch": 0.79, - "grad_norm": 0.24096818349359286, - "learning_rate": 2.3007543576705303e-06, - "loss": 0.2254, + "epoch": 0.63, + "grad_norm": 0.6398156660662767, + "learning_rate": 6.400073563620392e-06, + "loss": 0.346, "step": 13686 }, { - "epoch": 0.79, - "grad_norm": 1.5103883995679313, - "learning_rate": 2.299566977691321e-06, - "loss": 0.508, + "epoch": 0.63, + "grad_norm": 0.40800513509260283, + "learning_rate": 6.398685441582639e-06, + "loss": 0.2627, "step": 13687 }, { - "epoch": 0.79, - "grad_norm": 0.5508991805415692, - "learning_rate": 2.2983798643777755e-06, - "loss": 0.3538, + "epoch": 0.63, + "grad_norm": 0.5119749047887007, + "learning_rate": 6.397297399274516e-06, + "loss": 0.3618, "step": 13688 }, { - "epoch": 0.79, - "grad_norm": 0.3684502029643531, - "learning_rate": 2.297193017771002e-06, - "loss": 0.2364, + "epoch": 0.63, + "grad_norm": 0.3693317921303293, + "learning_rate": 6.395909436726755e-06, + "loss": 0.2554, "step": 13689 }, { - "epoch": 0.79, - "grad_norm": 0.28819135601703405, - "learning_rate": 2.296006437912106e-06, - "loss": 0.2523, + "epoch": 0.63, + "grad_norm": 0.4522658965947409, + "learning_rate": 6.394521553970083e-06, + "loss": 0.2938, "step": 13690 }, { - "epoch": 0.79, - "grad_norm": 0.37804061739241884, - "learning_rate": 2.2948201248421754e-06, - "loss": 0.2241, + "epoch": 0.63, + "grad_norm": 0.3613441109908906, + "learning_rate": 6.393133751035232e-06, + "loss": 0.2717, "step": 13691 }, { - "epoch": 0.79, - "grad_norm": 0.40514152634974704, - "learning_rate": 2.2936340786022926e-06, - "loss": 0.2199, + "epoch": 0.63, + "grad_norm": 0.7450006655157405, + "learning_rate": 6.3917460279529234e-06, + "loss": 0.3142, "step": 13692 }, { - "epoch": 0.79, - "grad_norm": 1.195907334850774, - "learning_rate": 2.2924482992335272e-06, - "loss": 0.4781, + "epoch": 0.63, + "grad_norm": 0.28875317335404266, + "learning_rate": 6.390358384753881e-06, + "loss": 0.2294, "step": 13693 }, { - "epoch": 0.79, - "grad_norm": 0.3126316257768985, - "learning_rate": 2.291262786776949e-06, - "loss": 0.2628, + "epoch": 0.63, + "grad_norm": 0.736803354950209, + "learning_rate": 6.388970821468823e-06, + "loss": 0.2977, "step": 13694 }, { - "epoch": 0.79, - "grad_norm": 0.3317162176271463, - "learning_rate": 2.2900775412736086e-06, - "loss": 0.2689, + "epoch": 0.63, + "grad_norm": 0.8732637761398607, + "learning_rate": 6.387583338128471e-06, + "loss": 0.2358, "step": 13695 }, { - "epoch": 0.79, - "grad_norm": 0.502522546503961, - "learning_rate": 2.288892562764552e-06, - "loss": 0.2287, + "epoch": 0.63, + "grad_norm": 0.36785106754924785, + "learning_rate": 6.386195934763544e-06, + "loss": 0.2587, "step": 13696 }, { - "epoch": 0.79, - "grad_norm": 0.4025773704640404, - "learning_rate": 2.28770785129081e-06, - "loss": 0.2352, + "epoch": 0.63, + "grad_norm": 0.9234216359502381, + "learning_rate": 6.384808611404755e-06, + "loss": 0.4895, "step": 13697 }, { - "epoch": 0.79, - "grad_norm": 0.3119324736608787, - "learning_rate": 2.286523406893418e-06, - "loss": 0.2736, + "epoch": 0.63, + "grad_norm": 0.34322946877814786, + "learning_rate": 6.383421368082818e-06, + "loss": 0.2483, "step": 13698 }, { - "epoch": 0.79, - "grad_norm": 0.46774707777608343, - "learning_rate": 2.285339229613388e-06, - "loss": 0.2402, + "epoch": 0.63, + "grad_norm": 0.3053665024614012, + "learning_rate": 6.3820342048284465e-06, + "loss": 0.2229, "step": 13699 }, { - "epoch": 0.79, - "grad_norm": 0.7445728385464858, - "learning_rate": 2.2841553194917288e-06, - "loss": 0.5074, + "epoch": 0.63, + "grad_norm": 1.3147600100620773, + "learning_rate": 6.380647121672352e-06, + "loss": 0.6294, "step": 13700 }, { - "epoch": 0.79, - "grad_norm": 0.3503867920780646, - "learning_rate": 2.2829716765694397e-06, - "loss": 0.2881, + "epoch": 0.63, + "grad_norm": 0.38417229925348206, + "learning_rate": 6.3792601186452405e-06, + "loss": 0.2841, "step": 13701 }, { - "epoch": 0.79, - "grad_norm": 0.31801113224197763, - "learning_rate": 2.2817883008875065e-06, - "loss": 0.2597, + "epoch": 0.63, + "grad_norm": 0.313920513820115, + "learning_rate": 6.377873195777822e-06, + "loss": 0.1972, "step": 13702 }, { - "epoch": 0.79, - "grad_norm": 0.2962235175090054, - "learning_rate": 2.2806051924869144e-06, - "loss": 0.1875, + "epoch": 0.63, + "grad_norm": 0.8918396797533942, + "learning_rate": 6.376486353100795e-06, + "loss": 0.5183, "step": 13703 }, { - "epoch": 0.79, - "grad_norm": 0.3308268216739318, - "learning_rate": 2.2794223514086333e-06, - "loss": 0.25, + "epoch": 0.63, + "grad_norm": 0.45705671226387096, + "learning_rate": 6.375099590644871e-06, + "loss": 0.2761, "step": 13704 }, { - "epoch": 0.79, - "grad_norm": 1.2976921395533858, - "learning_rate": 2.2782397776936237e-06, - "loss": 0.2965, + "epoch": 0.63, + "grad_norm": 0.2564019470009641, + "learning_rate": 6.373712908440749e-06, + "loss": 0.1278, "step": 13705 }, { - "epoch": 0.79, - "grad_norm": 0.3459148821492143, - "learning_rate": 2.277057471382836e-06, - "loss": 0.3103, + "epoch": 0.63, + "grad_norm": 0.33196542045305477, + "learning_rate": 6.37232630651913e-06, + "loss": 0.2766, "step": 13706 }, { - "epoch": 0.79, - "grad_norm": 0.3780665332879215, - "learning_rate": 2.2758754325172194e-06, - "loss": 0.2764, + "epoch": 0.63, + "grad_norm": 1.1164767911733577, + "learning_rate": 6.370939784910706e-06, + "loss": 0.6048, "step": 13707 }, { - "epoch": 0.79, - "grad_norm": 0.7272877027788468, - "learning_rate": 2.274693661137707e-06, - "loss": 0.3907, + "epoch": 0.63, + "grad_norm": 0.35332186116560255, + "learning_rate": 6.369553343646178e-06, + "loss": 0.1713, "step": 13708 }, { - "epoch": 0.79, - "grad_norm": 0.16950972666281172, - "learning_rate": 2.273512157285215e-06, - "loss": 0.0706, + "epoch": 0.63, + "grad_norm": 0.3947149481265831, + "learning_rate": 6.368166982756243e-06, + "loss": 0.3179, "step": 13709 }, { - "epoch": 0.79, - "grad_norm": 0.40555071835046586, - "learning_rate": 2.272330921000667e-06, - "loss": 0.2703, + "epoch": 0.63, + "grad_norm": 0.5182953904646964, + "learning_rate": 6.366780702271589e-06, + "loss": 0.355, "step": 13710 }, { - "epoch": 0.79, - "grad_norm": 0.3774955734684031, - "learning_rate": 2.271149952324968e-06, - "loss": 0.3077, + "epoch": 0.63, + "grad_norm": 0.20305948052983017, + "learning_rate": 6.365394502222909e-06, + "loss": 0.1249, "step": 13711 }, { - "epoch": 0.79, - "grad_norm": 0.6892753385861591, - "learning_rate": 2.2699692512990135e-06, - "loss": 0.2721, + "epoch": 0.63, + "grad_norm": 0.5406257565557732, + "learning_rate": 6.364008382640889e-06, + "loss": 0.3672, "step": 13712 }, { - "epoch": 0.79, - "grad_norm": 0.341879768930826, - "learning_rate": 2.268788817963692e-06, - "loss": 0.2822, + "epoch": 0.63, + "grad_norm": 0.3770059422882819, + "learning_rate": 6.362622343556222e-06, + "loss": 0.2953, "step": 13713 }, { - "epoch": 0.79, - "grad_norm": 0.36570922717372284, - "learning_rate": 2.2676086523598773e-06, - "loss": 0.3289, + "epoch": 0.63, + "grad_norm": 0.4272615762919037, + "learning_rate": 6.3612363849995895e-06, + "loss": 0.276, "step": 13714 }, { - "epoch": 0.79, - "grad_norm": 0.3003448965263176, - "learning_rate": 2.266428754528446e-06, - "loss": 0.0964, + "epoch": 0.63, + "grad_norm": 0.4510293751241945, + "learning_rate": 6.359850507001677e-06, + "loss": 0.2646, "step": 13715 }, { - "epoch": 0.79, - "grad_norm": 0.33720536672084483, - "learning_rate": 2.2652491245102537e-06, - "loss": 0.2491, + "epoch": 0.63, + "grad_norm": 0.8202970601738923, + "learning_rate": 6.358464709593164e-06, + "loss": 0.5591, "step": 13716 }, { - "epoch": 0.79, - "grad_norm": 1.161787269663438, - "learning_rate": 2.2640697623461517e-06, - "loss": 0.5815, + "epoch": 0.63, + "grad_norm": 0.2665364364650663, + "learning_rate": 6.357078992804735e-06, + "loss": 0.2033, "step": 13717 }, { - "epoch": 0.79, - "grad_norm": 0.29078226584492334, - "learning_rate": 2.262890668076979e-06, - "loss": 0.2155, + "epoch": 0.63, + "grad_norm": 0.2690265011818245, + "learning_rate": 6.3556933566670656e-06, + "loss": 0.1883, "step": 13718 }, { - "epoch": 0.79, - "grad_norm": 0.36384414626106976, - "learning_rate": 2.2617118417435725e-06, - "loss": 0.2685, + "epoch": 0.63, + "grad_norm": 1.0311816113904653, + "learning_rate": 6.3543078012108325e-06, + "loss": 0.5251, "step": 13719 }, { - "epoch": 0.79, - "grad_norm": 0.9616109497695068, - "learning_rate": 2.260533283386751e-06, - "loss": 0.4991, + "epoch": 0.63, + "grad_norm": 0.5303796504390793, + "learning_rate": 6.352922326466706e-06, + "loss": 0.2914, "step": 13720 }, { - "epoch": 0.79, - "grad_norm": 0.30110611270374804, - "learning_rate": 2.25935499304733e-06, - "loss": 0.1999, + "epoch": 0.63, + "grad_norm": 0.33378868077554313, + "learning_rate": 6.35153693246537e-06, + "loss": 0.261, "step": 13721 }, { - "epoch": 0.79, - "grad_norm": 0.23948729147878228, - "learning_rate": 2.2581769707661107e-06, - "loss": 0.2069, + "epoch": 0.63, + "grad_norm": 0.49427344656861777, + "learning_rate": 6.350151619237489e-06, + "loss": 0.4257, "step": 13722 }, { - "epoch": 0.79, - "grad_norm": 1.4181780988657122, - "learning_rate": 2.256999216583892e-06, - "loss": 0.4937, + "epoch": 0.63, + "grad_norm": 0.6792563770797668, + "learning_rate": 6.348766386813734e-06, + "loss": 0.3144, "step": 13723 }, { - "epoch": 0.79, - "grad_norm": 0.7846008419436977, - "learning_rate": 2.2558217305414564e-06, - "loss": 0.4049, + "epoch": 0.63, + "grad_norm": 0.2346819912657847, + "learning_rate": 6.347381235224769e-06, + "loss": 0.1538, "step": 13724 }, { - "epoch": 0.79, - "grad_norm": 0.2869992147760968, - "learning_rate": 2.2546445126795822e-06, - "loss": 0.1819, + "epoch": 0.63, + "grad_norm": 0.40565107886905327, + "learning_rate": 6.345996164501265e-06, + "loss": 0.3201, "step": 13725 }, { - "epoch": 0.79, - "grad_norm": 0.34869844428817903, - "learning_rate": 2.2534675630390366e-06, - "loss": 0.3234, + "epoch": 0.63, + "grad_norm": 0.5445717105696417, + "learning_rate": 6.3446111746738845e-06, + "loss": 0.2953, "step": 13726 }, { - "epoch": 0.79, - "grad_norm": 0.3008860845713572, - "learning_rate": 2.2522908816605716e-06, - "loss": 0.1825, + "epoch": 0.63, + "grad_norm": 0.3654269738542627, + "learning_rate": 6.34322626577329e-06, + "loss": 0.2966, "step": 13727 }, { - "epoch": 0.79, - "grad_norm": 0.28978102239520515, - "learning_rate": 2.251114468584944e-06, - "loss": 0.1858, + "epoch": 0.63, + "grad_norm": 1.1138406636687501, + "learning_rate": 6.341841437830141e-06, + "loss": 0.4359, "step": 13728 }, { - "epoch": 0.79, - "grad_norm": 0.8083556054139469, - "learning_rate": 2.2499383238528894e-06, - "loss": 0.4195, + "epoch": 0.63, + "grad_norm": 0.3242864616456861, + "learning_rate": 6.340456690875095e-06, + "loss": 0.2549, "step": 13729 }, { - "epoch": 0.79, - "grad_norm": 0.3769533565114214, - "learning_rate": 2.2487624475051364e-06, - "loss": 0.2837, + "epoch": 0.63, + "grad_norm": 0.26193009627805075, + "learning_rate": 6.3390720249388125e-06, + "loss": 0.2096, "step": 13730 }, { - "epoch": 0.79, - "grad_norm": 0.39109306811626965, - "learning_rate": 2.2475868395824043e-06, - "loss": 0.2734, + "epoch": 0.63, + "grad_norm": 0.9040563962700809, + "learning_rate": 6.337687440051947e-06, + "loss": 0.3076, "step": 13731 }, { - "epoch": 0.79, - "grad_norm": 0.7981003418145786, - "learning_rate": 2.2464115001254096e-06, - "loss": 0.3579, + "epoch": 0.63, + "grad_norm": 0.5394438846851447, + "learning_rate": 6.336302936245154e-06, + "loss": 0.364, "step": 13732 }, { - "epoch": 0.79, - "grad_norm": 0.4237085330933973, - "learning_rate": 2.245236429174851e-06, - "loss": 0.2282, + "epoch": 0.63, + "grad_norm": 0.3342277310675126, + "learning_rate": 6.334918513549075e-06, + "loss": 0.276, "step": 13733 }, { - "epoch": 0.79, - "grad_norm": 0.26938040168964505, - "learning_rate": 2.244061626771421e-06, - "loss": 0.2416, + "epoch": 0.63, + "grad_norm": 0.45667063085563714, + "learning_rate": 6.333534171994375e-06, + "loss": 0.2553, "step": 13734 }, { - "epoch": 0.79, - "grad_norm": 0.33512199491603817, - "learning_rate": 2.2428870929558012e-06, - "loss": 0.2038, + "epoch": 0.63, + "grad_norm": 0.41788456258604867, + "learning_rate": 6.3321499116116935e-06, + "loss": 0.2517, "step": 13735 }, { - "epoch": 0.79, - "grad_norm": 0.7622171998110745, - "learning_rate": 2.2417128277686694e-06, - "loss": 0.4113, + "epoch": 0.63, + "grad_norm": 0.26932353338718423, + "learning_rate": 6.330765732431679e-06, + "loss": 0.1857, "step": 13736 }, { - "epoch": 0.79, - "grad_norm": 0.34080168091289903, - "learning_rate": 2.2405388312506903e-06, - "loss": 0.2377, + "epoch": 0.63, + "grad_norm": 0.45766510743207534, + "learning_rate": 6.329381634484968e-06, + "loss": 0.2537, "step": 13737 }, { - "epoch": 0.79, - "grad_norm": 0.34730323144893105, - "learning_rate": 2.239365103442517e-06, - "loss": 0.2583, + "epoch": 0.63, + "grad_norm": 0.7010241701619224, + "learning_rate": 6.327997617802215e-06, + "loss": 0.3513, "step": 13738 }, { - "epoch": 0.79, - "grad_norm": 1.0072696237450594, - "learning_rate": 2.238191644384794e-06, - "loss": 0.5047, + "epoch": 0.63, + "grad_norm": 0.41947279795343034, + "learning_rate": 6.326613682414056e-06, + "loss": 0.3073, "step": 13739 }, { - "epoch": 0.79, - "grad_norm": 0.24308109364594496, - "learning_rate": 2.237018454118163e-06, - "loss": 0.2012, + "epoch": 0.63, + "grad_norm": 1.1480612882879153, + "learning_rate": 6.325229828351129e-06, + "loss": 0.679, "step": 13740 }, { - "epoch": 0.79, - "grad_norm": 0.5470089584947886, - "learning_rate": 2.2358455326832496e-06, - "loss": 0.2059, + "epoch": 0.63, + "grad_norm": 0.32480684195167947, + "learning_rate": 6.32384605564407e-06, + "loss": 0.2077, "step": 13741 }, { - "epoch": 0.79, - "grad_norm": 0.34034621021093414, - "learning_rate": 2.234672880120674e-06, - "loss": 0.2917, + "epoch": 0.63, + "grad_norm": 0.24007822422054026, + "learning_rate": 6.322462364323519e-06, + "loss": 0.1948, "step": 13742 }, { - "epoch": 0.79, - "grad_norm": 0.3159765482745945, - "learning_rate": 2.233500496471037e-06, - "loss": 0.2669, + "epoch": 0.63, + "grad_norm": 0.694605311345928, + "learning_rate": 6.321078754420105e-06, + "loss": 0.4073, "step": 13743 }, { - "epoch": 0.79, - "grad_norm": 1.1613176017519513, - "learning_rate": 2.2323283817749463e-06, - "loss": 0.7208, + "epoch": 0.63, + "grad_norm": 0.5355223471537685, + "learning_rate": 6.319695225964463e-06, + "loss": 0.1449, "step": 13744 }, { - "epoch": 0.79, - "grad_norm": 0.338617416056063, - "learning_rate": 2.2311565360729903e-06, - "loss": 0.2283, + "epoch": 0.63, + "grad_norm": 0.33674014777401, + "learning_rate": 6.318311778987221e-06, + "loss": 0.272, "step": 13745 }, { - "epoch": 0.79, - "grad_norm": 0.3873334398545606, - "learning_rate": 2.2299849594057487e-06, - "loss": 0.2524, + "epoch": 0.63, + "grad_norm": 1.295079136532708, + "learning_rate": 6.316928413519006e-06, + "loss": 0.7799, "step": 13746 }, { - "epoch": 0.79, - "grad_norm": 0.2770440136651813, - "learning_rate": 2.2288136518137914e-06, - "loss": 0.2116, + "epoch": 0.63, + "grad_norm": 0.5217432078256229, + "learning_rate": 6.315545129590448e-06, + "loss": 0.1773, "step": 13747 }, { - "epoch": 0.79, - "grad_norm": 0.678257433651019, - "learning_rate": 2.227642613337686e-06, - "loss": 0.2842, + "epoch": 0.63, + "grad_norm": 0.3231974442890065, + "learning_rate": 6.314161927232169e-06, + "loss": 0.2664, "step": 13748 }, { - "epoch": 0.79, - "grad_norm": 0.34415897142683244, - "learning_rate": 2.2264718440179835e-06, - "loss": 0.2728, + "epoch": 0.63, + "grad_norm": 0.38828426320675563, + "learning_rate": 6.312778806474795e-06, + "loss": 0.3129, "step": 13749 }, { - "epoch": 0.79, - "grad_norm": 0.3417732021973484, - "learning_rate": 2.2253013438952253e-06, - "loss": 0.2948, + "epoch": 0.63, + "grad_norm": 0.3571371151129754, + "learning_rate": 6.311395767348938e-06, + "loss": 0.1247, "step": 13750 }, { - "epoch": 0.79, - "grad_norm": 1.7331090075356483, - "learning_rate": 2.224131113009945e-06, - "loss": 0.1448, + "epoch": 0.63, + "grad_norm": 0.42979275131340433, + "learning_rate": 6.310012809885229e-06, + "loss": 0.3204, "step": 13751 }, { - "epoch": 0.79, - "grad_norm": 0.35091864102953746, - "learning_rate": 2.222961151402674e-06, - "loss": 0.2522, + "epoch": 0.63, + "grad_norm": 1.4035874272265616, + "learning_rate": 6.308629934114279e-06, + "loss": 0.7434, "step": 13752 }, { - "epoch": 0.79, - "grad_norm": 0.44152654129417956, - "learning_rate": 2.2217914591139222e-06, - "loss": 0.3178, + "epoch": 0.63, + "grad_norm": 0.34702872092395576, + "learning_rate": 6.307247140066705e-06, + "loss": 0.279, "step": 13753 }, { - "epoch": 0.79, - "grad_norm": 0.2511167710167237, - "learning_rate": 2.2206220361841978e-06, - "loss": 0.14, + "epoch": 0.63, + "grad_norm": 0.38003206886919433, + "learning_rate": 6.305864427773117e-06, + "loss": 0.2108, "step": 13754 }, { - "epoch": 0.79, - "grad_norm": 0.31288446501141626, - "learning_rate": 2.2194528826539984e-06, - "loss": 0.2369, + "epoch": 0.63, + "grad_norm": 0.4165638893636752, + "learning_rate": 6.304481797264132e-06, + "loss": 0.2653, "step": 13755 }, { - "epoch": 0.79, - "grad_norm": 1.312231502877169, - "learning_rate": 2.218283998563808e-06, - "loss": 0.6555, + "epoch": 0.63, + "grad_norm": 0.36245159531049326, + "learning_rate": 6.303099248570361e-06, + "loss": 0.2321, "step": 13756 }, { - "epoch": 0.79, - "grad_norm": 0.5150153647059441, - "learning_rate": 2.2171153839541114e-06, - "loss": 0.3475, + "epoch": 0.63, + "grad_norm": 0.2774919465268556, + "learning_rate": 6.301716781722406e-06, + "loss": 0.2381, "step": 13757 }, { - "epoch": 0.79, - "grad_norm": 0.27675543088755755, - "learning_rate": 2.2159470388653737e-06, - "loss": 0.2179, + "epoch": 0.63, + "grad_norm": 1.2276952956324017, + "learning_rate": 6.30033439675088e-06, + "loss": 0.7805, "step": 13758 }, { - "epoch": 0.79, - "grad_norm": 0.8129461695250663, - "learning_rate": 2.2147789633380555e-06, - "loss": 0.401, + "epoch": 0.63, + "grad_norm": 0.8940534217727629, + "learning_rate": 6.298952093686381e-06, + "loss": 0.4074, "step": 13759 }, { - "epoch": 0.79, - "grad_norm": 0.3186014787104289, - "learning_rate": 2.213611157412605e-06, - "loss": 0.1735, + "epoch": 0.63, + "grad_norm": 0.3456620895163242, + "learning_rate": 6.297569872559519e-06, + "loss": 0.2209, "step": 13760 }, { - "epoch": 0.79, - "grad_norm": 0.2907238321322088, - "learning_rate": 2.2124436211294676e-06, - "loss": 0.1941, + "epoch": 0.63, + "grad_norm": 0.32388983657721465, + "learning_rate": 6.29618773340089e-06, + "loss": 0.257, "step": 13761 }, { - "epoch": 0.79, - "grad_norm": 0.4140887679030627, - "learning_rate": 2.2112763545290728e-06, - "loss": 0.3082, + "epoch": 0.63, + "grad_norm": 0.43187005562652625, + "learning_rate": 6.294805676241096e-06, + "loss": 0.2125, "step": 13762 }, { - "epoch": 0.79, - "grad_norm": 1.1476560055281388, - "learning_rate": 2.2101093576518416e-06, - "loss": 0.3793, + "epoch": 0.63, + "grad_norm": 0.327565988280973, + "learning_rate": 6.29342370111073e-06, + "loss": 0.2117, "step": 13763 }, { - "epoch": 0.79, - "grad_norm": 0.3858913981085996, - "learning_rate": 2.2089426305381865e-06, - "loss": 0.2011, + "epoch": 0.63, + "grad_norm": 1.3101844022331988, + "learning_rate": 6.292041808040393e-06, + "loss": 0.5824, "step": 13764 }, { - "epoch": 0.79, - "grad_norm": 0.39061485494929044, - "learning_rate": 2.2077761732285165e-06, - "loss": 0.3134, + "epoch": 0.63, + "grad_norm": 0.3630391763503653, + "learning_rate": 6.2906599970606774e-06, + "loss": 0.2898, "step": 13765 }, { - "epoch": 0.79, - "grad_norm": 0.31571158619901163, - "learning_rate": 2.206609985763222e-06, - "loss": 0.2483, + "epoch": 0.63, + "grad_norm": 0.4119547461254126, + "learning_rate": 6.2892782682021745e-06, + "loss": 0.2975, "step": 13766 }, { - "epoch": 0.79, - "grad_norm": 0.3321054971659981, - "learning_rate": 2.2054440681826896e-06, - "loss": 0.1801, + "epoch": 0.63, + "grad_norm": 1.0186738941013866, + "learning_rate": 6.2878966214954684e-06, + "loss": 0.2993, "step": 13767 }, { - "epoch": 0.79, - "grad_norm": 0.34644794016271147, - "learning_rate": 2.2042784205272927e-06, - "loss": 0.2443, + "epoch": 0.63, + "grad_norm": 0.25555927257468164, + "learning_rate": 6.286515056971158e-06, + "loss": 0.2118, "step": 13768 }, { - "epoch": 0.79, - "grad_norm": 0.48747689057722515, - "learning_rate": 2.203113042837396e-06, - "loss": 0.324, + "epoch": 0.63, + "grad_norm": 0.3033190996462018, + "learning_rate": 6.285133574659827e-06, + "loss": 0.2639, "step": 13769 }, { - "epoch": 0.79, - "grad_norm": 0.3253743126407448, - "learning_rate": 2.2019479351533625e-06, - "loss": 0.2552, + "epoch": 0.63, + "grad_norm": 1.2839030349428027, + "learning_rate": 6.283752174592057e-06, + "loss": 0.3702, "step": 13770 }, { - "epoch": 0.79, - "grad_norm": 0.5069718524012903, - "learning_rate": 2.2007830975155366e-06, - "loss": 0.2568, + "epoch": 0.63, + "grad_norm": 0.581101276380584, + "learning_rate": 6.28237085679843e-06, + "loss": 0.3424, "step": 13771 }, { - "epoch": 0.79, - "grad_norm": 1.1996810800114002, - "learning_rate": 2.199618529964257e-06, - "loss": 0.6718, + "epoch": 0.63, + "grad_norm": 0.397314944956453, + "learning_rate": 6.280989621309531e-06, + "loss": 0.2929, "step": 13772 }, { - "epoch": 0.79, - "grad_norm": 0.3826603271787008, - "learning_rate": 2.198454232539848e-06, - "loss": 0.2895, + "epoch": 0.63, + "grad_norm": 0.3448589704186857, + "learning_rate": 6.279608468155938e-06, + "loss": 0.2678, "step": 13773 }, { - "epoch": 0.79, - "grad_norm": 0.19718297829960185, - "learning_rate": 2.1972902052826384e-06, - "loss": 0.1763, + "epoch": 0.63, + "grad_norm": 0.2404728873630171, + "learning_rate": 6.278227397368227e-06, + "loss": 0.1757, "step": 13774 }, { - "epoch": 0.79, - "grad_norm": 0.6762841450323746, - "learning_rate": 2.1961264482329326e-06, - "loss": 0.3604, + "epoch": 0.63, + "grad_norm": 0.38915532673048614, + "learning_rate": 6.276846408976975e-06, + "loss": 0.2954, "step": 13775 }, { - "epoch": 0.79, - "grad_norm": 0.523526720693821, - "learning_rate": 2.194962961431032e-06, - "loss": 0.2805, + "epoch": 0.63, + "grad_norm": 0.45847900170359007, + "learning_rate": 6.275465503012752e-06, + "loss": 0.2712, "step": 13776 }, { - "epoch": 0.79, - "grad_norm": 0.348195317352207, - "learning_rate": 2.1937997449172287e-06, - "loss": 0.2318, + "epoch": 0.63, + "grad_norm": 0.47792238292786554, + "learning_rate": 6.274084679506136e-06, + "loss": 0.3043, "step": 13777 }, { - "epoch": 0.79, - "grad_norm": 0.49968051927791735, - "learning_rate": 2.192636798731804e-06, - "loss": 0.3517, + "epoch": 0.63, + "grad_norm": 0.464636138439792, + "learning_rate": 6.272703938487694e-06, + "loss": 0.2577, "step": 13778 }, { - "epoch": 0.79, - "grad_norm": 0.3862758209154229, - "learning_rate": 2.1914741229150315e-06, - "loss": 0.2706, + "epoch": 0.63, + "grad_norm": 1.1589077901843, + "learning_rate": 6.271323279987995e-06, + "loss": 0.4516, "step": 13779 }, { - "epoch": 0.79, - "grad_norm": 0.20524174445611812, - "learning_rate": 2.1903117175071754e-06, - "loss": 0.1168, + "epoch": 0.63, + "grad_norm": 0.2787768114311294, + "learning_rate": 6.2699427040376e-06, + "loss": 0.2013, "step": 13780 }, { - "epoch": 0.79, - "grad_norm": 0.33607769190284076, - "learning_rate": 2.1891495825484856e-06, - "loss": 0.2942, + "epoch": 0.63, + "grad_norm": 0.349771309466261, + "learning_rate": 6.268562210667084e-06, + "loss": 0.276, "step": 13781 }, { - "epoch": 0.79, - "grad_norm": 0.681396311351655, - "learning_rate": 2.1879877180792117e-06, - "loss": 0.3332, + "epoch": 0.63, + "grad_norm": 0.5176736161197101, + "learning_rate": 6.2671817999070025e-06, + "loss": 0.2696, "step": 13782 }, { - "epoch": 0.79, - "grad_norm": 0.3897404025541323, - "learning_rate": 2.186826124139587e-06, - "loss": 0.2887, + "epoch": 0.63, + "grad_norm": 0.8468243367785466, + "learning_rate": 6.265801471787919e-06, + "loss": 0.3251, "step": 13783 }, { - "epoch": 0.79, - "grad_norm": 1.199733904627748, - "learning_rate": 2.185664800769839e-06, - "loss": 0.3483, + "epoch": 0.63, + "grad_norm": 0.35129874440924846, + "learning_rate": 6.264421226340387e-06, + "loss": 0.2546, "step": 13784 }, { - "epoch": 0.79, - "grad_norm": 0.3624521652254083, - "learning_rate": 2.1845037480101793e-06, - "loss": 0.2673, + "epoch": 0.63, + "grad_norm": 0.3759174751782755, + "learning_rate": 6.263041063594973e-06, + "loss": 0.2953, "step": 13785 }, { - "epoch": 0.79, - "grad_norm": 0.23852031573910368, - "learning_rate": 2.183342965900821e-06, - "loss": 0.2303, + "epoch": 0.63, + "grad_norm": 0.7570133406425583, + "learning_rate": 6.261660983582229e-06, + "loss": 0.265, "step": 13786 }, { - "epoch": 0.79, - "grad_norm": 0.6022579071701457, - "learning_rate": 2.18218245448196e-06, - "loss": 0.2888, + "epoch": 0.63, + "grad_norm": 0.4235969342607545, + "learning_rate": 6.260280986332707e-06, + "loss": 0.2966, "step": 13787 }, { - "epoch": 0.79, - "grad_norm": 0.29140171805652, - "learning_rate": 2.1810222137937855e-06, - "loss": 0.1838, + "epoch": 0.63, + "grad_norm": 0.5807382242288651, + "learning_rate": 6.258901071876959e-06, + "loss": 0.2806, "step": 13788 }, { - "epoch": 0.79, - "grad_norm": 0.3210911846698981, - "learning_rate": 2.1798622438764716e-06, - "loss": 0.2784, + "epoch": 0.63, + "grad_norm": 0.3389385748791269, + "learning_rate": 6.257521240245534e-06, + "loss": 0.2628, "step": 13789 }, { - "epoch": 0.79, - "grad_norm": 0.4872387173728299, - "learning_rate": 2.1787025447701947e-06, - "loss": 0.2583, + "epoch": 0.63, + "grad_norm": 0.3252777992767435, + "learning_rate": 6.2561414914689835e-06, + "loss": 0.1831, "step": 13790 }, { - "epoch": 0.79, - "grad_norm": 0.3987621580560564, - "learning_rate": 2.177543116515113e-06, - "loss": 0.286, + "epoch": 0.63, + "grad_norm": 1.0098525961729672, + "learning_rate": 6.254761825577853e-06, + "loss": 0.5458, "step": 13791 }, { - "epoch": 0.79, - "grad_norm": 0.4894773404094698, - "learning_rate": 2.176383959151377e-06, - "loss": 0.3393, + "epoch": 0.63, + "grad_norm": 0.3968447798630403, + "learning_rate": 6.253382242602685e-06, + "loss": 0.3164, "step": 13792 }, { - "epoch": 0.79, - "grad_norm": 0.2736323672900733, - "learning_rate": 2.175225072719127e-06, - "loss": 0.2155, + "epoch": 0.63, + "grad_norm": 0.29033535383010767, + "learning_rate": 6.252002742574021e-06, + "loss": 0.1846, "step": 13793 }, { - "epoch": 0.79, - "grad_norm": 0.2835026201314597, - "learning_rate": 2.174066457258495e-06, - "loss": 0.1832, + "epoch": 0.63, + "grad_norm": 0.7837029670902719, + "learning_rate": 6.250623325522407e-06, + "loss": 0.3983, "step": 13794 }, { - "epoch": 0.79, - "grad_norm": 0.5998237761061286, - "learning_rate": 2.172908112809606e-06, - "loss": 0.3561, + "epoch": 0.63, + "grad_norm": 0.3142161876321316, + "learning_rate": 6.249243991478377e-06, + "loss": 0.1931, "step": 13795 }, { - "epoch": 0.79, - "grad_norm": 1.1528927782639422, - "learning_rate": 2.1717500394125735e-06, - "loss": 0.6232, + "epoch": 0.63, + "grad_norm": 0.3109255037552431, + "learning_rate": 6.247864740472471e-06, + "loss": 0.1968, "step": 13796 }, { - "epoch": 0.79, - "grad_norm": 0.2608272389082933, - "learning_rate": 2.1705922371075005e-06, - "loss": 0.2213, + "epoch": 0.63, + "grad_norm": 0.3805655914974935, + "learning_rate": 6.246485572535219e-06, + "loss": 0.3177, "step": 13797 }, { - "epoch": 0.79, - "grad_norm": 0.5306546366421715, - "learning_rate": 2.169434705934479e-06, - "loss": 0.308, + "epoch": 0.63, + "grad_norm": 1.1887776219483017, + "learning_rate": 6.245106487697163e-06, + "loss": 0.6946, "step": 13798 }, { - "epoch": 0.79, - "grad_norm": 0.4938087384739634, - "learning_rate": 2.1682774459335987e-06, - "loss": 0.2987, + "epoch": 0.63, + "grad_norm": 0.32318422530199165, + "learning_rate": 6.243727485988833e-06, + "loss": 0.2237, "step": 13799 }, { - "epoch": 0.79, - "grad_norm": 0.1622853869015634, - "learning_rate": 2.1671204571449345e-06, - "loss": 0.0694, + "epoch": 0.63, + "grad_norm": 0.6341461435601496, + "learning_rate": 6.2423485674407545e-06, + "loss": 0.3293, "step": 13800 }, { - "epoch": 0.79, - "grad_norm": 0.3014907139564795, - "learning_rate": 2.165963739608552e-06, - "loss": 0.274, + "epoch": 0.63, + "grad_norm": 0.3745215622135042, + "learning_rate": 6.240969732083451e-06, + "loss": 0.2887, "step": 13801 }, { - "epoch": 0.79, - "grad_norm": 0.4934739195565253, - "learning_rate": 2.164807293364506e-06, - "loss": 0.3363, + "epoch": 0.63, + "grad_norm": 0.25342319944131736, + "learning_rate": 6.2395909799474605e-06, + "loss": 0.1946, "step": 13802 }, { - "epoch": 0.79, - "grad_norm": 0.4667111156007383, - "learning_rate": 2.1636511184528484e-06, - "loss": 0.1946, + "epoch": 0.63, + "grad_norm": 1.1935357493032865, + "learning_rate": 6.238212311063301e-06, + "loss": 0.2767, "step": 13803 }, { - "epoch": 0.79, - "grad_norm": 0.4077960646746423, - "learning_rate": 2.162495214913616e-06, - "loss": 0.3125, + "epoch": 0.63, + "grad_norm": 0.3708317267370427, + "learning_rate": 6.236833725461495e-06, + "loss": 0.3173, "step": 13804 }, { - "epoch": 0.79, - "grad_norm": 0.32122695724294886, - "learning_rate": 2.1613395827868366e-06, - "loss": 0.2702, + "epoch": 0.63, + "grad_norm": 0.33766389898660537, + "learning_rate": 6.235455223172563e-06, + "loss": 0.2804, "step": 13805 }, { - "epoch": 0.79, - "grad_norm": 0.14965512931372155, - "learning_rate": 2.160184222112531e-06, - "loss": 0.0691, + "epoch": 0.63, + "grad_norm": 0.8228778388406415, + "learning_rate": 6.234076804227023e-06, + "loss": 0.3091, "step": 13806 }, { - "epoch": 0.79, - "grad_norm": 0.4639504102647323, - "learning_rate": 2.159029132930707e-06, - "loss": 0.2847, + "epoch": 0.63, + "grad_norm": 0.2565533211124841, + "learning_rate": 6.232698468655394e-06, + "loss": 0.171, "step": 13807 }, { - "epoch": 0.79, - "grad_norm": 1.364104155468768, - "learning_rate": 2.1578743152813676e-06, - "loss": 0.7156, + "epoch": 0.63, + "grad_norm": 0.44081102251320176, + "learning_rate": 6.23132021648819e-06, + "loss": 0.2918, "step": 13808 }, { - "epoch": 0.79, - "grad_norm": 0.3139451351224941, - "learning_rate": 2.156719769204505e-06, - "loss": 0.2972, + "epoch": 0.63, + "grad_norm": 0.3301721845631173, + "learning_rate": 6.2299420477559236e-06, + "loss": 0.2423, "step": 13809 }, { - "epoch": 0.79, - "grad_norm": 0.3411874232698844, - "learning_rate": 2.155565494740098e-06, - "loss": 0.2292, + "epoch": 0.63, + "grad_norm": 0.7226649009552698, + "learning_rate": 6.228563962489106e-06, + "loss": 0.4122, "step": 13810 }, { - "epoch": 0.79, - "grad_norm": 1.5518338776407492, - "learning_rate": 2.1544114919281223e-06, - "loss": 0.616, + "epoch": 0.63, + "grad_norm": 0.4437286595349055, + "learning_rate": 6.2271859607182485e-06, + "loss": 0.2558, "step": 13811 }, { - "epoch": 0.79, - "grad_norm": 0.20569821108771164, - "learning_rate": 2.153257760808538e-06, - "loss": 0.146, + "epoch": 0.63, + "grad_norm": 0.32175072605886906, + "learning_rate": 6.225808042473857e-06, + "loss": 0.2602, "step": 13812 }, { - "epoch": 0.79, - "grad_norm": 0.2839430285999692, - "learning_rate": 2.152104301421302e-06, - "loss": 0.2202, + "epoch": 0.63, + "grad_norm": 1.2620952097698657, + "learning_rate": 6.224430207786438e-06, + "loss": 0.5556, "step": 13813 }, { - "epoch": 0.79, - "grad_norm": 0.4522044299979987, - "learning_rate": 2.150951113806351e-06, - "loss": 0.3257, + "epoch": 0.63, + "grad_norm": 0.22852143156537716, + "learning_rate": 6.223052456686492e-06, + "loss": 0.147, "step": 13814 }, { - "epoch": 0.79, - "grad_norm": 0.4669588799542236, - "learning_rate": 2.1497981980036297e-06, - "loss": 0.3026, + "epoch": 0.63, + "grad_norm": 0.6108052527491664, + "learning_rate": 6.221674789204528e-06, + "loss": 0.315, "step": 13815 }, { - "epoch": 0.79, - "grad_norm": 0.37379373753426265, - "learning_rate": 2.1486455540530593e-06, - "loss": 0.2319, + "epoch": 0.63, + "grad_norm": 0.387141918175387, + "learning_rate": 6.220297205371044e-06, + "loss": 0.2604, "step": 13816 }, { - "epoch": 0.79, - "grad_norm": 0.3565690261654594, - "learning_rate": 2.1474931819945555e-06, - "loss": 0.3044, + "epoch": 0.63, + "grad_norm": 0.39394833829626136, + "learning_rate": 6.218919705216535e-06, + "loss": 0.2788, "step": 13817 }, { - "epoch": 0.79, - "grad_norm": 0.27704374870348236, - "learning_rate": 2.1463410818680253e-06, - "loss": 0.1727, + "epoch": 0.63, + "grad_norm": 0.9707860431142171, + "learning_rate": 6.217542288771502e-06, + "loss": 0.509, "step": 13818 }, { - "epoch": 0.79, - "grad_norm": 0.3302566985170461, - "learning_rate": 2.1451892537133624e-06, - "loss": 0.2454, + "epoch": 0.63, + "grad_norm": 0.4125274018699957, + "learning_rate": 6.2161649560664305e-06, + "loss": 0.1309, "step": 13819 }, { - "epoch": 0.79, - "grad_norm": 0.41198364612002364, - "learning_rate": 2.1440376975704614e-06, - "loss": 0.2576, + "epoch": 0.63, + "grad_norm": 0.2975174398203126, + "learning_rate": 6.214787707131825e-06, + "loss": 0.2077, "step": 13820 }, { - "epoch": 0.79, - "grad_norm": 0.42590625607150334, - "learning_rate": 2.142886413479197e-06, - "loss": 0.3075, + "epoch": 0.63, + "grad_norm": 0.3783390283103983, + "learning_rate": 6.213410541998171e-06, + "loss": 0.2867, "step": 13821 }, { - "epoch": 0.79, - "grad_norm": 0.3323355752642059, - "learning_rate": 2.1417354014794378e-06, - "loss": 0.2635, + "epoch": 0.63, + "grad_norm": 0.7034140413601049, + "learning_rate": 6.2120334606959585e-06, + "loss": 0.3001, "step": 13822 }, { - "epoch": 0.79, - "grad_norm": 1.287156112327123, - "learning_rate": 2.1405846616110416e-06, - "loss": 0.3389, + "epoch": 0.64, + "grad_norm": 0.3496260938226288, + "learning_rate": 6.2106564632556725e-06, + "loss": 0.2778, "step": 13823 }, { - "epoch": 0.79, - "grad_norm": 0.27619945652710726, - "learning_rate": 2.1394341939138618e-06, - "loss": 0.149, + "epoch": 0.64, + "grad_norm": 0.4982366281086838, + "learning_rate": 6.2092795497078005e-06, + "loss": 0.4129, "step": 13824 }, { - "epoch": 0.79, - "grad_norm": 0.2555642275552039, - "learning_rate": 2.1382839984277395e-06, - "loss": 0.2574, + "epoch": 0.64, + "grad_norm": 0.38148030233120556, + "learning_rate": 6.207902720082828e-06, + "loss": 0.2003, "step": 13825 }, { - "epoch": 0.79, - "grad_norm": 0.687890090018683, - "learning_rate": 2.137134075192504e-06, - "loss": 0.2607, + "epoch": 0.64, + "grad_norm": 0.3150141927675991, + "learning_rate": 6.206525974411233e-06, + "loss": 0.1689, "step": 13826 }, { - "epoch": 0.79, - "grad_norm": 0.6251334392979514, - "learning_rate": 2.135984424247974e-06, - "loss": 0.3902, + "epoch": 0.64, + "grad_norm": 0.4217405810631256, + "learning_rate": 6.205149312723493e-06, + "loss": 0.2696, "step": 13827 }, { - "epoch": 0.79, - "grad_norm": 0.35576024720872323, - "learning_rate": 2.1348350456339684e-06, - "loss": 0.2901, + "epoch": 0.64, + "grad_norm": 0.4006029610244969, + "learning_rate": 6.203772735050096e-06, + "loss": 0.3145, "step": 13828 }, { - "epoch": 0.79, - "grad_norm": 0.33393058087833427, - "learning_rate": 2.1336859393902864e-06, - "loss": 0.2403, + "epoch": 0.64, + "grad_norm": 0.33783835907181453, + "learning_rate": 6.2023962414215085e-06, + "loss": 0.1938, "step": 13829 }, { - "epoch": 0.79, - "grad_norm": 0.3272136365997171, - "learning_rate": 2.1325371055567236e-06, - "loss": 0.16, + "epoch": 0.64, + "grad_norm": 0.5434171226608157, + "learning_rate": 6.201019831868209e-06, + "loss": 0.4013, "step": 13830 }, { - "epoch": 0.79, - "grad_norm": 0.38288933342849923, - "learning_rate": 2.1313885441730607e-06, - "loss": 0.2884, + "epoch": 0.64, + "grad_norm": 1.0090888598456478, + "learning_rate": 6.199643506420665e-06, + "loss": 0.5232, "step": 13831 }, { - "epoch": 0.79, - "grad_norm": 0.6989908981679257, - "learning_rate": 2.1302402552790723e-06, - "loss": 0.3487, + "epoch": 0.64, + "grad_norm": 0.24112058864962477, + "learning_rate": 6.198267265109356e-06, + "loss": 0.1655, "step": 13832 }, { - "epoch": 0.79, - "grad_norm": 0.26166129345641775, - "learning_rate": 2.1290922389145284e-06, - "loss": 0.2073, + "epoch": 0.64, + "grad_norm": 0.3089787222497205, + "learning_rate": 6.196891107964744e-06, + "loss": 0.2483, "step": 13833 }, { - "epoch": 0.79, - "grad_norm": 0.38623162282324663, - "learning_rate": 2.1279444951191806e-06, - "loss": 0.3036, + "epoch": 0.64, + "grad_norm": 0.6607546406989968, + "learning_rate": 6.195515035017298e-06, + "loss": 0.4199, "step": 13834 }, { - "epoch": 0.79, - "grad_norm": 1.1632051177823823, - "learning_rate": 2.1267970239327773e-06, - "loss": 0.5774, + "epoch": 0.64, + "grad_norm": 0.36292554460857707, + "learning_rate": 6.194139046297482e-06, + "loss": 0.2037, "step": 13835 }, { - "epoch": 0.79, - "grad_norm": 0.27563291221288966, - "learning_rate": 2.1256498253950518e-06, - "loss": 0.1642, + "epoch": 0.64, + "grad_norm": 0.33342794001173687, + "learning_rate": 6.192763141835758e-06, + "loss": 0.2984, "step": 13836 }, { - "epoch": 0.8, - "grad_norm": 0.27018901283686697, - "learning_rate": 2.124502899545737e-06, - "loss": 0.2479, + "epoch": 0.64, + "grad_norm": 1.3753346489803266, + "learning_rate": 6.19138732166259e-06, + "loss": 0.5578, "step": 13837 }, { - "epoch": 0.8, - "grad_norm": 0.5005766607922495, - "learning_rate": 2.1233562464245483e-06, - "loss": 0.2709, + "epoch": 0.64, + "grad_norm": 0.23794930341799886, + "learning_rate": 6.190011585808435e-06, + "loss": 0.1577, "step": 13838 }, { - "epoch": 0.8, - "grad_norm": 1.167362155705901, - "learning_rate": 2.122209866071194e-06, - "loss": 0.4289, + "epoch": 0.64, + "grad_norm": 0.4425281161262238, + "learning_rate": 6.188635934303752e-06, + "loss": 0.2631, "step": 13839 }, { - "epoch": 0.8, - "grad_norm": 0.3193880909220907, - "learning_rate": 2.12106375852537e-06, - "loss": 0.2474, + "epoch": 0.64, + "grad_norm": 0.36526958270148396, + "learning_rate": 6.187260367178996e-06, + "loss": 0.2975, "step": 13840 }, { - "epoch": 0.8, - "grad_norm": 0.35005318057106716, - "learning_rate": 2.119917923826773e-06, - "loss": 0.2889, + "epoch": 0.64, + "grad_norm": 0.44411900500269885, + "learning_rate": 6.185884884464621e-06, + "loss": 0.2981, "step": 13841 }, { - "epoch": 0.8, - "grad_norm": 0.33854876303697296, - "learning_rate": 2.118772362015078e-06, - "loss": 0.0908, + "epoch": 0.64, + "grad_norm": 0.5117546562318249, + "learning_rate": 6.1845094861910785e-06, + "loss": 0.2673, "step": 13842 }, { - "epoch": 0.8, - "grad_norm": 0.3633576105580375, - "learning_rate": 2.117627073129961e-06, - "loss": 0.2801, + "epoch": 0.64, + "grad_norm": 1.6357588189948409, + "learning_rate": 6.183134172388819e-06, + "loss": 0.671, "step": 13843 }, { - "epoch": 0.8, - "grad_norm": 0.42219924579575663, - "learning_rate": 2.1164820572110734e-06, - "loss": 0.2845, + "epoch": 0.64, + "grad_norm": 0.3219142247783382, + "learning_rate": 6.181758943088285e-06, + "loss": 0.2308, "step": 13844 }, { - "epoch": 0.8, - "grad_norm": 0.35881211061754714, - "learning_rate": 2.115337314298077e-06, - "loss": 0.3175, + "epoch": 0.64, + "grad_norm": 0.3248482963926374, + "learning_rate": 6.180383798319934e-06, + "loss": 0.2063, "step": 13845 }, { - "epoch": 0.8, - "grad_norm": 0.2868125822886127, - "learning_rate": 2.1141928444306094e-06, - "loss": 0.2033, + "epoch": 0.64, + "grad_norm": 0.47513926969734743, + "learning_rate": 6.1790087381142035e-06, + "loss": 0.2781, "step": 13846 }, { - "epoch": 0.8, - "grad_norm": 1.1661974844904361, - "learning_rate": 2.113048647648305e-06, - "loss": 0.4777, + "epoch": 0.64, + "grad_norm": 0.8153096064493197, + "learning_rate": 6.177633762501537e-06, + "loss": 0.4888, "step": 13847 }, { - "epoch": 0.8, - "grad_norm": 0.3302764354075652, - "learning_rate": 2.1119047239907833e-06, - "loss": 0.2756, + "epoch": 0.64, + "grad_norm": 0.2788604779574639, + "learning_rate": 6.176258871512375e-06, + "loss": 0.2307, "step": 13848 }, { - "epoch": 0.8, - "grad_norm": 0.29027735567796065, - "learning_rate": 2.110761073497665e-06, - "loss": 0.1903, + "epoch": 0.64, + "grad_norm": 1.2597578340547748, + "learning_rate": 6.174884065177151e-06, + "loss": 0.5809, "step": 13849 }, { - "epoch": 0.8, - "grad_norm": 0.48117388677328654, - "learning_rate": 2.1096176962085513e-06, - "loss": 0.262, + "epoch": 0.64, + "grad_norm": 0.5159120857930166, + "learning_rate": 6.1735093435263115e-06, + "loss": 0.2892, "step": 13850 }, { - "epoch": 0.8, - "grad_norm": 0.4337568500734956, - "learning_rate": 2.1084745921630377e-06, - "loss": 0.3096, + "epoch": 0.64, + "grad_norm": 0.4033173649713765, + "learning_rate": 6.172134706590287e-06, + "loss": 0.2612, "step": 13851 }, { - "epoch": 0.8, - "grad_norm": 0.284048549328376, - "learning_rate": 2.107331761400707e-06, - "loss": 0.1813, + "epoch": 0.64, + "grad_norm": 0.27440239769189817, + "learning_rate": 6.170760154399511e-06, + "loss": 0.2372, "step": 13852 }, { - "epoch": 0.8, - "grad_norm": 0.3678509849823864, - "learning_rate": 2.1061892039611407e-06, - "loss": 0.2978, + "epoch": 0.64, + "grad_norm": 0.30397031180476486, + "learning_rate": 6.169385686984413e-06, + "loss": 0.1856, "step": 13853 }, { - "epoch": 0.8, - "grad_norm": 0.9895870132408121, - "learning_rate": 2.105046919883903e-06, - "loss": 0.4123, + "epoch": 0.64, + "grad_norm": 0.445288737964441, + "learning_rate": 6.168011304375425e-06, + "loss": 0.3277, "step": 13854 }, { - "epoch": 0.8, - "grad_norm": 0.30690631485293857, - "learning_rate": 2.1039049092085507e-06, - "loss": 0.2116, + "epoch": 0.64, + "grad_norm": 1.030096009150229, + "learning_rate": 6.166637006602975e-06, + "loss": 0.2895, "step": 13855 }, { - "epoch": 0.8, - "grad_norm": 0.2885275065069769, - "learning_rate": 2.102763171974629e-06, - "loss": 0.232, + "epoch": 0.64, + "grad_norm": 0.3240408300281562, + "learning_rate": 6.165262793697486e-06, + "loss": 0.2805, "step": 13856 }, { - "epoch": 0.8, - "grad_norm": 0.39902371245566787, - "learning_rate": 2.1016217082216815e-06, - "loss": 0.2799, + "epoch": 0.64, + "grad_norm": 0.44105739849606385, + "learning_rate": 6.16388866568938e-06, + "loss": 0.3376, "step": 13857 }, { - "epoch": 0.8, - "grad_norm": 0.31971621759090113, - "learning_rate": 2.100480517989235e-06, - "loss": 0.259, + "epoch": 0.64, + "grad_norm": 0.33080055670480035, + "learning_rate": 6.162514622609085e-06, + "loss": 0.1723, "step": 13858 }, { - "epoch": 0.8, - "grad_norm": 0.8751263325898722, - "learning_rate": 2.099339601316809e-06, - "loss": 0.2952, + "epoch": 0.64, + "grad_norm": 0.42811216257442963, + "learning_rate": 6.161140664487017e-06, + "loss": 0.2963, "step": 13859 }, { - "epoch": 0.8, - "grad_norm": 0.34455456165955256, - "learning_rate": 2.098198958243911e-06, - "loss": 0.303, + "epoch": 0.64, + "grad_norm": 0.42182911761851494, + "learning_rate": 6.159766791353594e-06, + "loss": 0.3297, "step": 13860 }, { - "epoch": 0.8, - "grad_norm": 0.3229178337704223, - "learning_rate": 2.0970585888100425e-06, - "loss": 0.266, + "epoch": 0.64, + "grad_norm": 0.4246334565540588, + "learning_rate": 6.15839300323923e-06, + "loss": 0.2433, "step": 13861 }, { - "epoch": 0.8, - "grad_norm": 0.8776049310548688, - "learning_rate": 2.0959184930546973e-06, - "loss": 0.2564, + "epoch": 0.64, + "grad_norm": 0.546302453946891, + "learning_rate": 6.157019300174346e-06, + "loss": 0.3158, "step": 13862 }, { - "epoch": 0.8, - "grad_norm": 0.47569729397496585, - "learning_rate": 2.0947786710173545e-06, - "loss": 0.2821, + "epoch": 0.64, + "grad_norm": 0.36793126644265056, + "learning_rate": 6.15564568218935e-06, + "loss": 0.2937, "step": 13863 }, { - "epoch": 0.8, - "grad_norm": 0.2504288310318657, - "learning_rate": 2.0936391227374874e-06, - "loss": 0.2158, + "epoch": 0.64, + "grad_norm": 0.2586025359807072, + "learning_rate": 6.154272149314658e-06, + "loss": 0.1977, "step": 13864 }, { - "epoch": 0.8, - "grad_norm": 0.35032213602144774, - "learning_rate": 2.0924998482545535e-06, - "loss": 0.2503, + "epoch": 0.64, + "grad_norm": 0.8491509831041123, + "learning_rate": 6.152898701580669e-06, + "loss": 0.4995, "step": 13865 }, { - "epoch": 0.8, - "grad_norm": 0.6552856340397468, - "learning_rate": 2.0913608476080138e-06, - "loss": 0.3592, + "epoch": 0.64, + "grad_norm": 0.2797668430226198, + "learning_rate": 6.151525339017792e-06, + "loss": 0.2568, "step": 13866 }, { - "epoch": 0.8, - "grad_norm": 0.3133790814738462, - "learning_rate": 2.090222120837306e-06, - "loss": 0.2509, + "epoch": 0.64, + "grad_norm": 0.5233379929794071, + "learning_rate": 6.150152061656439e-06, + "loss": 0.3949, "step": 13867 }, { - "epoch": 0.8, - "grad_norm": 0.47362331697258986, - "learning_rate": 2.089083667981868e-06, - "loss": 0.2264, + "epoch": 0.64, + "grad_norm": 0.42831173166798225, + "learning_rate": 6.148778869527009e-06, + "loss": 0.2522, "step": 13868 }, { - "epoch": 0.8, - "grad_norm": 0.4945426270079345, - "learning_rate": 2.087945489081119e-06, - "loss": 0.4079, + "epoch": 0.64, + "grad_norm": 0.3650486511638581, + "learning_rate": 6.147405762659902e-06, + "loss": 0.2894, "step": 13869 }, { - "epoch": 0.8, - "grad_norm": 0.3993652479527239, - "learning_rate": 2.0868075841744795e-06, - "loss": 0.2696, + "epoch": 0.64, + "grad_norm": 0.45030704401442284, + "learning_rate": 6.146032741085517e-06, + "loss": 0.2596, "step": 13870 }, { - "epoch": 0.8, - "grad_norm": 0.26067501072855986, - "learning_rate": 2.0856699533013535e-06, - "loss": 0.1919, + "epoch": 0.64, + "grad_norm": 0.40058833805660227, + "learning_rate": 6.1446598048342556e-06, + "loss": 0.2283, "step": 13871 }, { - "epoch": 0.8, - "grad_norm": 0.3616882633023731, - "learning_rate": 2.0845325965011375e-06, - "loss": 0.264, + "epoch": 0.64, + "grad_norm": 0.23021803412515876, + "learning_rate": 6.143286953936509e-06, + "loss": 0.2357, "step": 13872 }, { - "epoch": 0.8, - "grad_norm": 0.3935744589887008, - "learning_rate": 2.083395513813217e-06, - "loss": 0.277, + "epoch": 0.64, + "grad_norm": 0.8726318140317489, + "learning_rate": 6.141914188422673e-06, + "loss": 0.4504, "step": 13873 }, { - "epoch": 0.8, - "grad_norm": 0.662955242143687, - "learning_rate": 2.082258705276966e-06, - "loss": 0.3611, + "epoch": 0.64, + "grad_norm": 0.5490746117191487, + "learning_rate": 6.140541508323132e-06, + "loss": 0.2492, "step": 13874 }, { - "epoch": 0.8, - "grad_norm": 1.0458268635013401, - "learning_rate": 2.0811221709317587e-06, - "loss": 0.5303, + "epoch": 0.64, + "grad_norm": 0.4298402951210946, + "learning_rate": 6.139168913668291e-06, + "loss": 0.2919, "step": 13875 }, { - "epoch": 0.8, - "grad_norm": 0.23084717455090384, - "learning_rate": 2.0799859108169496e-06, - "loss": 0.2034, + "epoch": 0.64, + "grad_norm": 0.35829775744823256, + "learning_rate": 6.137796404488525e-06, + "loss": 0.2846, "step": 13876 }, { - "epoch": 0.8, - "grad_norm": 0.3093465003844103, - "learning_rate": 2.0788499249718887e-06, - "loss": 0.2502, + "epoch": 0.64, + "grad_norm": 0.36655387780840537, + "learning_rate": 6.136423980814225e-06, + "loss": 0.2259, "step": 13877 }, { - "epoch": 0.8, - "grad_norm": 1.0381983387677298, - "learning_rate": 2.077714213435914e-06, - "loss": 0.2885, + "epoch": 0.64, + "grad_norm": 0.3227119233334314, + "learning_rate": 6.135051642675775e-06, + "loss": 0.2316, "step": 13878 }, { - "epoch": 0.8, - "grad_norm": 0.32579748606769765, - "learning_rate": 2.0765787762483545e-06, - "loss": 0.2467, + "epoch": 0.64, + "grad_norm": 0.4054198865398271, + "learning_rate": 6.1336793901035526e-06, + "loss": 0.2775, "step": 13879 }, { - "epoch": 0.8, - "grad_norm": 1.1957787010002299, - "learning_rate": 2.075443613448532e-06, - "loss": 0.4765, + "epoch": 0.64, + "grad_norm": 0.4528290235364106, + "learning_rate": 6.132307223127945e-06, + "loss": 0.3279, "step": 13880 }, { - "epoch": 0.8, - "grad_norm": 0.33500602148308517, - "learning_rate": 2.0743087250757544e-06, - "loss": 0.2632, + "epoch": 0.64, + "grad_norm": 0.38655686394596395, + "learning_rate": 6.130935141779328e-06, + "loss": 0.2297, "step": 13881 }, { - "epoch": 0.8, - "grad_norm": 0.3294045154993524, - "learning_rate": 2.073174111169327e-06, - "loss": 0.2471, + "epoch": 0.64, + "grad_norm": 1.4234348894720836, + "learning_rate": 6.12956314608808e-06, + "loss": 0.5589, "step": 13882 }, { - "epoch": 0.8, - "grad_norm": 0.8524795933817103, - "learning_rate": 2.072039771768539e-06, - "loss": 0.3734, + "epoch": 0.64, + "grad_norm": 0.556487246186019, + "learning_rate": 6.128191236084569e-06, + "loss": 0.3656, "step": 13883 }, { - "epoch": 0.8, - "grad_norm": 0.21219881196196438, - "learning_rate": 2.0709057069126726e-06, - "loss": 0.2046, + "epoch": 0.64, + "grad_norm": 0.2992721562379536, + "learning_rate": 6.126819411799175e-06, + "loss": 0.2106, "step": 13884 }, { - "epoch": 0.8, - "grad_norm": 0.3405533089230494, - "learning_rate": 2.0697719166410013e-06, - "loss": 0.2002, + "epoch": 0.64, + "grad_norm": 0.4949216110377767, + "learning_rate": 6.125447673262266e-06, + "loss": 0.2879, "step": 13885 }, { - "epoch": 0.8, - "grad_norm": 1.5915861638020246, - "learning_rate": 2.068638400992784e-06, - "loss": 0.757, + "epoch": 0.64, + "grad_norm": 0.46199684277353253, + "learning_rate": 6.124076020504213e-06, + "loss": 0.2475, "step": 13886 }, { - "epoch": 0.8, - "grad_norm": 1.2417936476128963, - "learning_rate": 2.0675051600072817e-06, - "loss": 0.7115, + "epoch": 0.64, + "grad_norm": 0.32834354245058006, + "learning_rate": 6.122704453555377e-06, + "loss": 0.2013, "step": 13887 }, { - "epoch": 0.8, - "grad_norm": 0.2680697096757186, - "learning_rate": 2.0663721937237334e-06, - "loss": 0.1963, + "epoch": 0.64, + "grad_norm": 0.37220094871738957, + "learning_rate": 6.1213329724461305e-06, + "loss": 0.3009, "step": 13888 }, { - "epoch": 0.8, - "grad_norm": 0.3929872162313532, - "learning_rate": 2.0652395021813752e-06, - "loss": 0.3059, + "epoch": 0.64, + "grad_norm": 1.2475134386857376, + "learning_rate": 6.1199615772068324e-06, + "loss": 0.8039, "step": 13889 }, { - "epoch": 0.8, - "grad_norm": 0.36593534101852726, - "learning_rate": 2.064107085419429e-06, - "loss": 0.1969, + "epoch": 0.64, + "grad_norm": 0.3503292505677632, + "learning_rate": 6.118590267867847e-06, + "loss": 0.2185, "step": 13890 }, { - "epoch": 0.8, - "grad_norm": 0.32655675523124694, - "learning_rate": 2.062974943477116e-06, - "loss": 0.1665, + "epoch": 0.64, + "grad_norm": 0.524903265245972, + "learning_rate": 6.117219044459527e-06, + "loss": 0.3496, "step": 13891 }, { - "epoch": 0.8, - "grad_norm": 0.29384082638874254, - "learning_rate": 2.0618430763936402e-06, - "loss": 0.2684, + "epoch": 0.64, + "grad_norm": 0.24995885801324022, + "learning_rate": 6.11584790701224e-06, + "loss": 0.1936, "step": 13892 }, { - "epoch": 0.8, - "grad_norm": 1.209531727244767, - "learning_rate": 2.0607114842081966e-06, - "loss": 0.7586, + "epoch": 0.64, + "grad_norm": 0.36988544562524966, + "learning_rate": 6.114476855556337e-06, + "loss": 0.2789, "step": 13893 }, { - "epoch": 0.8, - "grad_norm": 0.3941791338393727, - "learning_rate": 2.0595801669599704e-06, - "loss": 0.2967, + "epoch": 0.64, + "grad_norm": 1.0742786117667407, + "learning_rate": 6.113105890122172e-06, + "loss": 0.3657, "step": 13894 }, { - "epoch": 0.8, - "grad_norm": 0.5237764192291912, - "learning_rate": 2.0584491246881443e-06, - "loss": 0.2825, + "epoch": 0.64, + "grad_norm": 0.32995531069287776, + "learning_rate": 6.111735010740094e-06, + "loss": 0.2895, "step": 13895 }, { - "epoch": 0.8, - "grad_norm": 0.2794762521852401, - "learning_rate": 2.0573183574318832e-06, - "loss": 0.249, + "epoch": 0.64, + "grad_norm": 0.3677734611032791, + "learning_rate": 6.110364217440453e-06, + "loss": 0.2752, "step": 13896 }, { - "epoch": 0.8, - "grad_norm": 0.2774111684414447, - "learning_rate": 2.0561878652303458e-06, - "loss": 0.1959, + "epoch": 0.64, + "grad_norm": 0.7325666516433144, + "learning_rate": 6.108993510253602e-06, + "loss": 0.2996, "step": 13897 }, { - "epoch": 0.8, - "grad_norm": 0.5438540602781272, - "learning_rate": 2.0550576481226814e-06, - "loss": 0.212, + "epoch": 0.64, + "grad_norm": 0.3334666735054576, + "learning_rate": 6.1076228892098856e-06, + "loss": 0.1932, "step": 13898 }, { - "epoch": 0.8, - "grad_norm": 0.6511150660244753, - "learning_rate": 2.0539277061480256e-06, - "loss": 0.4511, + "epoch": 0.64, + "grad_norm": 0.3271418452175672, + "learning_rate": 6.106252354339647e-06, + "loss": 0.2548, "step": 13899 }, { - "epoch": 0.8, - "grad_norm": 0.2423417364066042, - "learning_rate": 2.0527980393455147e-06, - "loss": 0.2466, + "epoch": 0.64, + "grad_norm": 0.3636125574228562, + "learning_rate": 6.104881905673226e-06, + "loss": 0.2606, "step": 13900 }, { - "epoch": 0.8, - "grad_norm": 0.47122913695901136, - "learning_rate": 2.051668647754267e-06, - "loss": 0.2733, + "epoch": 0.64, + "grad_norm": 0.779745658858173, + "learning_rate": 6.1035115432409675e-06, + "loss": 0.4159, "step": 13901 }, { - "epoch": 0.8, - "grad_norm": 0.27193278941468646, - "learning_rate": 2.0505395314133915e-06, - "loss": 0.156, + "epoch": 0.64, + "grad_norm": 0.36380940366596587, + "learning_rate": 6.102141267073207e-06, + "loss": 0.2518, "step": 13902 }, { - "epoch": 0.8, - "grad_norm": 0.5962094572756668, - "learning_rate": 2.049410690361987e-06, - "loss": 0.304, + "epoch": 0.64, + "grad_norm": 0.3859740720756227, + "learning_rate": 6.100771077200284e-06, + "loss": 0.287, "step": 13903 }, { - "epoch": 0.8, - "grad_norm": 0.26728260818607263, - "learning_rate": 2.0482821246391515e-06, - "loss": 0.2332, + "epoch": 0.64, + "grad_norm": 0.3567722990613763, + "learning_rate": 6.0994009736525275e-06, + "loss": 0.2055, "step": 13904 }, { - "epoch": 0.8, - "grad_norm": 0.7884511286921686, - "learning_rate": 2.0471538342839637e-06, - "loss": 0.4379, + "epoch": 0.64, + "grad_norm": 0.3350838698558275, + "learning_rate": 6.098030956460277e-06, + "loss": 0.2453, "step": 13905 }, { - "epoch": 0.8, - "grad_norm": 0.5466734579354183, - "learning_rate": 2.0460258193354963e-06, - "loss": 0.3473, + "epoch": 0.64, + "grad_norm": 1.1713263439209174, + "learning_rate": 6.09666102565386e-06, + "loss": 0.687, "step": 13906 }, { - "epoch": 0.8, - "grad_norm": 0.39502877345893916, - "learning_rate": 2.0448980798328113e-06, - "loss": 0.3107, + "epoch": 0.64, + "grad_norm": 0.31835001909591154, + "learning_rate": 6.095291181263605e-06, + "loss": 0.2613, "step": 13907 }, { - "epoch": 0.8, - "grad_norm": 0.19179045423602922, - "learning_rate": 2.043770615814966e-06, - "loss": 0.1632, + "epoch": 0.64, + "grad_norm": 0.33645966347971534, + "learning_rate": 6.093921423319842e-06, + "loss": 0.2752, "step": 13908 }, { - "epoch": 0.8, - "grad_norm": 0.5438731088748233, - "learning_rate": 2.0426434273210016e-06, - "loss": 0.2428, + "epoch": 0.64, + "grad_norm": 1.1395949319757779, + "learning_rate": 6.0925517518528875e-06, + "loss": 0.5575, "step": 13909 }, { - "epoch": 0.8, - "grad_norm": 0.3807065412872983, - "learning_rate": 2.041516514389954e-06, - "loss": 0.2902, + "epoch": 0.64, + "grad_norm": 0.21868314525692864, + "learning_rate": 6.091182166893077e-06, + "loss": 0.0851, "step": 13910 }, { - "epoch": 0.8, - "grad_norm": 0.7299876932540644, - "learning_rate": 2.0403898770608466e-06, - "loss": 0.3083, + "epoch": 0.64, + "grad_norm": 0.35297265048325643, + "learning_rate": 6.089812668470727e-06, + "loss": 0.2729, "step": 13911 }, { - "epoch": 0.8, - "grad_norm": 0.3073229067569145, - "learning_rate": 2.0392635153726958e-06, - "loss": 0.2584, + "epoch": 0.64, + "grad_norm": 0.3697626885697134, + "learning_rate": 6.088443256616154e-06, + "loss": 0.2976, "step": 13912 }, { - "epoch": 0.8, - "grad_norm": 0.38588900994842523, - "learning_rate": 2.0381374293645072e-06, - "loss": 0.2999, + "epoch": 0.64, + "grad_norm": 0.6527010514620597, + "learning_rate": 6.087073931359672e-06, + "loss": 0.2948, "step": 13913 }, { - "epoch": 0.8, - "grad_norm": 0.47918109962924327, - "learning_rate": 2.0370116190752763e-06, - "loss": 0.1199, + "epoch": 0.64, + "grad_norm": 0.38464961289826516, + "learning_rate": 6.085704692731609e-06, + "loss": 0.2826, "step": 13914 }, { - "epoch": 0.8, - "grad_norm": 0.3032568846110088, - "learning_rate": 2.035886084543989e-06, - "loss": 0.19, + "epoch": 0.64, + "grad_norm": 0.5107882224340381, + "learning_rate": 6.08433554076227e-06, + "loss": 0.3637, "step": 13915 }, { - "epoch": 0.8, - "grad_norm": 0.3048366839804276, - "learning_rate": 2.0347608258096263e-06, - "loss": 0.2932, + "epoch": 0.64, + "grad_norm": 0.2585879510200286, + "learning_rate": 6.0829664754819665e-06, + "loss": 0.1975, "step": 13916 }, { - "epoch": 0.8, - "grad_norm": 0.860092659438108, - "learning_rate": 2.0336358429111534e-06, - "loss": 0.3054, + "epoch": 0.64, + "grad_norm": 0.2918190694749011, + "learning_rate": 6.08159749692101e-06, + "loss": 0.1788, "step": 13917 }, { - "epoch": 0.8, - "grad_norm": 0.410237488915634, - "learning_rate": 2.0325111358875295e-06, - "loss": 0.2969, + "epoch": 0.64, + "grad_norm": 0.7832952021845467, + "learning_rate": 6.0802286051097095e-06, + "loss": 0.3904, "step": 13918 }, { - "epoch": 0.8, - "grad_norm": 0.5844699302701855, - "learning_rate": 2.031386704777698e-06, - "loss": 0.3557, + "epoch": 0.64, + "grad_norm": 0.3975248059853997, + "learning_rate": 6.078859800078369e-06, + "loss": 0.3066, "step": 13919 }, { - "epoch": 0.8, - "grad_norm": 0.3807335069363862, - "learning_rate": 2.0302625496206065e-06, - "loss": 0.2834, + "epoch": 0.64, + "grad_norm": 0.3111389377780675, + "learning_rate": 6.077491081857294e-06, + "loss": 0.2158, "step": 13920 }, { - "epoch": 0.8, - "grad_norm": 0.19674851030297955, - "learning_rate": 2.0291386704551795e-06, - "loss": 0.0822, + "epoch": 0.64, + "grad_norm": 1.2448699420253515, + "learning_rate": 6.076122450476785e-06, + "loss": 0.5928, "step": 13921 }, { - "epoch": 0.8, - "grad_norm": 0.4132502470459073, - "learning_rate": 2.028015067320338e-06, - "loss": 0.3011, + "epoch": 0.64, + "grad_norm": 0.28182896674334956, + "learning_rate": 6.074753905967144e-06, + "loss": 0.171, "step": 13922 }, { - "epoch": 0.8, - "grad_norm": 0.6473087562464866, - "learning_rate": 2.0268917402549914e-06, - "loss": 0.3178, + "epoch": 0.64, + "grad_norm": 0.2744877444389626, + "learning_rate": 6.073385448358668e-06, + "loss": 0.2008, "step": 13923 }, { - "epoch": 0.8, - "grad_norm": 0.36673530265989734, - "learning_rate": 2.0257686892980387e-06, - "loss": 0.2243, + "epoch": 0.64, + "grad_norm": 0.5417619103404065, + "learning_rate": 6.072017077681654e-06, + "loss": 0.3616, "step": 13924 }, { - "epoch": 0.8, - "grad_norm": 0.36203099449274834, - "learning_rate": 2.0246459144883767e-06, - "loss": 0.2867, + "epoch": 0.64, + "grad_norm": 0.7672529626488681, + "learning_rate": 6.070648793966396e-06, + "loss": 0.3799, "step": 13925 }, { - "epoch": 0.8, - "grad_norm": 0.5054331029168437, - "learning_rate": 2.023523415864883e-06, - "loss": 0.2783, + "epoch": 0.64, + "grad_norm": 0.32359873885082413, + "learning_rate": 6.0692805972431835e-06, + "loss": 0.2014, "step": 13926 }, { - "epoch": 0.8, - "grad_norm": 0.28828519889572235, - "learning_rate": 2.02240119346643e-06, - "loss": 0.0938, + "epoch": 0.64, + "grad_norm": 0.5243716294771018, + "learning_rate": 6.067912487542312e-06, + "loss": 0.3559, "step": 13927 }, { - "epoch": 0.8, - "grad_norm": 0.3016104369032031, - "learning_rate": 2.0212792473318788e-06, - "loss": 0.2868, + "epoch": 0.64, + "grad_norm": 0.3636343897453809, + "learning_rate": 6.066544464894069e-06, + "loss": 0.2521, "step": 13928 }, { - "epoch": 0.8, - "grad_norm": 0.6227814296163309, - "learning_rate": 2.020157577500086e-06, - "loss": 0.379, + "epoch": 0.64, + "grad_norm": 0.29228912996241135, + "learning_rate": 6.065176529328743e-06, + "loss": 0.2133, "step": 13929 }, { - "epoch": 0.8, - "grad_norm": 0.36615003349825465, - "learning_rate": 2.019036184009894e-06, - "loss": 0.2155, + "epoch": 0.64, + "grad_norm": 0.78084620471736, + "learning_rate": 6.063808680876611e-06, + "loss": 0.2924, "step": 13930 }, { - "epoch": 0.8, - "grad_norm": 0.40065400364964354, - "learning_rate": 2.0179150669001347e-06, - "loss": 0.3056, + "epoch": 0.64, + "grad_norm": 0.36541293649495693, + "learning_rate": 6.062440919567965e-06, + "loss": 0.3049, "step": 13931 }, { - "epoch": 0.8, - "grad_norm": 0.359059617026297, - "learning_rate": 2.0167942262096317e-06, - "loss": 0.3046, + "epoch": 0.64, + "grad_norm": 0.6332530676964685, + "learning_rate": 6.06107324543308e-06, + "loss": 0.33, "step": 13932 }, { - "epoch": 0.8, - "grad_norm": 0.3025116844483224, - "learning_rate": 2.0156736619772034e-06, - "loss": 0.1751, + "epoch": 0.64, + "grad_norm": 0.400946193123766, + "learning_rate": 6.059705658502239e-06, + "loss": 0.2524, "step": 13933 }, { - "epoch": 0.8, - "grad_norm": 0.3910522596201756, - "learning_rate": 2.0145533742416536e-06, - "loss": 0.2408, + "epoch": 0.64, + "grad_norm": 0.27081255944141536, + "learning_rate": 6.058338158805714e-06, + "loss": 0.2061, "step": 13934 }, { - "epoch": 0.8, - "grad_norm": 0.8924655555675018, - "learning_rate": 2.013433363041777e-06, - "loss": 0.4662, + "epoch": 0.64, + "grad_norm": 0.42267808350852526, + "learning_rate": 6.056970746373785e-06, + "loss": 0.2491, "step": 13935 }, { - "epoch": 0.8, - "grad_norm": 0.3086428880041467, - "learning_rate": 2.012313628416359e-06, - "loss": 0.2813, + "epoch": 0.64, + "grad_norm": 0.553848875594435, + "learning_rate": 6.055603421236723e-06, + "loss": 0.2793, "step": 13936 }, { - "epoch": 0.8, - "grad_norm": 0.3740806939413502, - "learning_rate": 2.0111941704041738e-06, - "loss": 0.2224, + "epoch": 0.64, + "grad_norm": 1.4924285532992687, + "learning_rate": 6.0542361834248e-06, + "loss": 0.7511, "step": 13937 }, { - "epoch": 0.8, - "grad_norm": 1.312160171802165, - "learning_rate": 2.0100749890439943e-06, - "loss": 0.5937, + "epoch": 0.64, + "grad_norm": 0.4664070939459533, + "learning_rate": 6.052869032968285e-06, + "loss": 0.2863, "step": 13938 }, { - "epoch": 0.8, - "grad_norm": 0.39192858923914753, - "learning_rate": 2.0089560843745737e-06, - "loss": 0.2937, + "epoch": 0.64, + "grad_norm": 0.3343228077422523, + "learning_rate": 6.051501969897442e-06, + "loss": 0.2613, "step": 13939 }, { - "epoch": 0.8, - "grad_norm": 0.28806615219896364, - "learning_rate": 2.0078374564346605e-06, - "loss": 0.2294, + "epoch": 0.64, + "grad_norm": 1.2548638085173232, + "learning_rate": 6.050134994242545e-06, + "loss": 0.6256, "step": 13940 }, { - "epoch": 0.8, - "grad_norm": 0.28823469322612705, - "learning_rate": 2.0067191052629897e-06, - "loss": 0.1929, + "epoch": 0.64, + "grad_norm": 0.3018821161366112, + "learning_rate": 6.048768106033851e-06, + "loss": 0.2233, "step": 13941 }, { - "epoch": 0.8, - "grad_norm": 1.1471591644672199, - "learning_rate": 2.0056010308982954e-06, - "loss": 0.6341, + "epoch": 0.64, + "grad_norm": 0.5044790701983395, + "learning_rate": 6.0474013053016215e-06, + "loss": 0.3903, "step": 13942 }, { - "epoch": 0.8, - "grad_norm": 0.30332531713716976, - "learning_rate": 2.0044832333792942e-06, - "loss": 0.1903, + "epoch": 0.64, + "grad_norm": 0.2605724013141853, + "learning_rate": 6.0460345920761156e-06, + "loss": 0.2053, "step": 13943 }, { - "epoch": 0.8, - "grad_norm": 0.37927445804426185, - "learning_rate": 2.003365712744694e-06, - "loss": 0.286, + "epoch": 0.64, + "grad_norm": 0.39157420952536914, + "learning_rate": 6.0446679663875955e-06, + "loss": 0.2716, "step": 13944 }, { - "epoch": 0.8, - "grad_norm": 0.8720692601753824, - "learning_rate": 2.0022484690331957e-06, - "loss": 0.3939, + "epoch": 0.64, + "grad_norm": 1.1322705480340391, + "learning_rate": 6.043301428266314e-06, + "loss": 0.6276, "step": 13945 }, { - "epoch": 0.8, - "grad_norm": 0.326152282045429, - "learning_rate": 2.0011315022834887e-06, - "loss": 0.2417, + "epoch": 0.64, + "grad_norm": 0.4697571477942363, + "learning_rate": 6.041934977742526e-06, + "loss": 0.2576, "step": 13946 }, { - "epoch": 0.8, - "grad_norm": 0.25737955441315835, - "learning_rate": 2.000014812534253e-06, - "loss": 0.121, + "epoch": 0.64, + "grad_norm": 0.32730788884032985, + "learning_rate": 6.040568614846481e-06, + "loss": 0.254, "step": 13947 }, { - "epoch": 0.8, - "grad_norm": 0.3269429544155623, - "learning_rate": 1.9988983998241616e-06, - "loss": 0.2851, + "epoch": 0.64, + "grad_norm": 0.4768511755553711, + "learning_rate": 6.039202339608432e-06, + "loss": 0.3406, "step": 13948 }, { - "epoch": 0.8, - "grad_norm": 0.262006983013765, - "learning_rate": 1.9977822641918722e-06, - "loss": 0.2173, + "epoch": 0.64, + "grad_norm": 0.18741304137980996, + "learning_rate": 6.037836152058627e-06, + "loss": 0.1196, "step": 13949 }, { - "epoch": 0.8, - "grad_norm": 1.0942238387697292, - "learning_rate": 1.996666405676041e-06, - "loss": 0.2634, + "epoch": 0.64, + "grad_norm": 0.6236998409670669, + "learning_rate": 6.03647005222731e-06, + "loss": 0.3323, "step": 13950 }, { - "epoch": 0.8, - "grad_norm": 0.31107848659817255, - "learning_rate": 1.9955508243153075e-06, - "loss": 0.2901, + "epoch": 0.64, + "grad_norm": 0.29247676644612164, + "learning_rate": 6.0351040401447235e-06, + "loss": 0.276, "step": 13951 }, { - "epoch": 0.8, - "grad_norm": 0.36351275425339735, - "learning_rate": 1.9944355201483057e-06, - "loss": 0.2643, + "epoch": 0.64, + "grad_norm": 0.5904979871966565, + "learning_rate": 6.0337381158411145e-06, + "loss": 0.3046, "step": 13952 }, { - "epoch": 0.8, - "grad_norm": 0.27098007640215643, - "learning_rate": 1.993320493213654e-06, - "loss": 0.1226, + "epoch": 0.64, + "grad_norm": 0.5426700279207406, + "learning_rate": 6.032372279346721e-06, + "loss": 0.2971, "step": 13953 }, { - "epoch": 0.8, - "grad_norm": 0.3990952691061257, - "learning_rate": 1.992205743549972e-06, - "loss": 0.2757, + "epoch": 0.64, + "grad_norm": 0.33207282727189236, + "learning_rate": 6.031006530691781e-06, + "loss": 0.2501, "step": 13954 }, { - "epoch": 0.8, - "grad_norm": 0.3607559119535387, - "learning_rate": 1.991091271195862e-06, - "loss": 0.2886, + "epoch": 0.64, + "grad_norm": 0.25432608529295825, + "learning_rate": 6.0296408699065325e-06, + "loss": 0.2256, "step": 13955 }, { - "epoch": 0.8, - "grad_norm": 0.32360319442150737, - "learning_rate": 1.989977076189916e-06, - "loss": 0.254, + "epoch": 0.64, + "grad_norm": 0.3761620633003791, + "learning_rate": 6.028275297021202e-06, + "loss": 0.2097, "step": 13956 }, { - "epoch": 0.8, - "grad_norm": 0.5376372309933372, - "learning_rate": 1.9888631585707165e-06, - "loss": 0.3056, + "epoch": 0.64, + "grad_norm": 0.5474882994913526, + "learning_rate": 6.026909812066034e-06, + "loss": 0.3329, "step": 13957 }, { - "epoch": 0.8, - "grad_norm": 0.4290201609712683, - "learning_rate": 1.987749518376845e-06, - "loss": 0.2751, + "epoch": 0.64, + "grad_norm": 0.909234634916144, + "learning_rate": 6.025544415071256e-06, + "loss": 0.4046, "step": 13958 }, { - "epoch": 0.8, - "grad_norm": 0.49871685728533977, - "learning_rate": 1.986636155646862e-06, - "loss": 0.3373, + "epoch": 0.64, + "grad_norm": 0.28396838035334615, + "learning_rate": 6.024179106067091e-06, + "loss": 0.2153, "step": 13959 }, { - "epoch": 0.8, - "grad_norm": 0.27146044669137814, - "learning_rate": 1.985523070419324e-06, - "loss": 0.1775, + "epoch": 0.64, + "grad_norm": 0.5486000001338303, + "learning_rate": 6.022813885083764e-06, + "loss": 0.3759, "step": 13960 }, { - "epoch": 0.8, - "grad_norm": 0.2442071282951535, - "learning_rate": 1.984410262732779e-06, - "loss": 0.1945, + "epoch": 0.64, + "grad_norm": 0.5346700890347228, + "learning_rate": 6.021448752151508e-06, + "loss": 0.313, "step": 13961 }, { - "epoch": 0.8, - "grad_norm": 0.6881344071763977, - "learning_rate": 1.9832977326257587e-06, - "loss": 0.3537, + "epoch": 0.64, + "grad_norm": 0.27240022956792126, + "learning_rate": 6.020083707300545e-06, + "loss": 0.1554, "step": 13962 }, { - "epoch": 0.8, - "grad_norm": 0.32311184584433045, - "learning_rate": 1.9821854801367947e-06, - "loss": 0.2321, + "epoch": 0.64, + "grad_norm": 0.3437014226746102, + "learning_rate": 6.01871875056109e-06, + "loss": 0.2829, "step": 13963 }, { - "epoch": 0.8, - "grad_norm": 0.29921551388686407, - "learning_rate": 1.981073505304404e-06, - "loss": 0.2423, + "epoch": 0.64, + "grad_norm": 0.8756539480670127, + "learning_rate": 6.017353881963364e-06, + "loss": 0.3714, "step": 13964 }, { - "epoch": 0.8, - "grad_norm": 1.1952034277542511, - "learning_rate": 1.9799618081670925e-06, - "loss": 0.6672, + "epoch": 0.64, + "grad_norm": 0.39445150001458174, + "learning_rate": 6.015989101537586e-06, + "loss": 0.1974, "step": 13965 }, { - "epoch": 0.8, - "grad_norm": 0.16565970362132512, - "learning_rate": 1.978850388763356e-06, - "loss": 0.0987, + "epoch": 0.64, + "grad_norm": 0.531613488517085, + "learning_rate": 6.01462440931397e-06, + "loss": 0.3365, "step": 13966 }, { - "epoch": 0.8, - "grad_norm": 0.28583421178478075, - "learning_rate": 1.977739247131688e-06, - "loss": 0.2643, + "epoch": 0.64, + "grad_norm": 0.27926532394902526, + "learning_rate": 6.0132598053227275e-06, + "loss": 0.2386, "step": 13967 }, { - "epoch": 0.8, - "grad_norm": 0.45246208200159804, - "learning_rate": 1.976628383310566e-06, - "loss": 0.3104, + "epoch": 0.64, + "grad_norm": 0.8889284503431224, + "learning_rate": 6.011895289594072e-06, + "loss": 0.4573, "step": 13968 }, { - "epoch": 0.8, - "grad_norm": 0.7345956913222442, - "learning_rate": 1.9755177973384575e-06, - "loss": 0.2461, + "epoch": 0.64, + "grad_norm": 0.3081466617744832, + "learning_rate": 6.010530862158207e-06, + "loss": 0.1961, "step": 13969 }, { - "epoch": 0.8, - "grad_norm": 0.3468748142933442, - "learning_rate": 1.9744074892538203e-06, - "loss": 0.2622, + "epoch": 0.64, + "grad_norm": 0.5674442571956718, + "learning_rate": 6.0091665230453465e-06, + "loss": 0.3636, "step": 13970 }, { - "epoch": 0.8, - "grad_norm": 0.4893550654394881, - "learning_rate": 1.9732974590951083e-06, - "loss": 0.3401, + "epoch": 0.64, + "grad_norm": 0.45967609372796786, + "learning_rate": 6.007802272285693e-06, + "loss": 0.2932, "step": 13971 }, { - "epoch": 0.8, - "grad_norm": 0.313095481399203, - "learning_rate": 1.972187706900761e-06, - "loss": 0.256, + "epoch": 0.64, + "grad_norm": 0.34513440300235276, + "learning_rate": 6.006438109909449e-06, + "loss": 0.2095, "step": 13972 }, { - "epoch": 0.8, - "grad_norm": 0.28891551172507257, - "learning_rate": 1.9710782327092083e-06, - "loss": 0.1845, + "epoch": 0.64, + "grad_norm": 1.244036587386846, + "learning_rate": 6.005074035946813e-06, + "loss": 0.6816, "step": 13973 }, { - "epoch": 0.8, - "grad_norm": 0.45100541255152954, - "learning_rate": 1.9699690365588674e-06, - "loss": 0.3064, + "epoch": 0.64, + "grad_norm": 0.3129660059868295, + "learning_rate": 6.003710050427991e-06, + "loss": 0.2303, "step": 13974 }, { - "epoch": 0.8, - "grad_norm": 0.329722050488096, - "learning_rate": 1.9688601184881572e-06, - "loss": 0.2946, + "epoch": 0.64, + "grad_norm": 0.23754626454349723, + "learning_rate": 6.002346153383176e-06, + "loss": 0.2045, "step": 13975 }, { - "epoch": 0.8, - "grad_norm": 0.28484365336909273, - "learning_rate": 1.9677514785354747e-06, - "loss": 0.1848, + "epoch": 0.64, + "grad_norm": 0.7097344391241067, + "learning_rate": 6.0009823448425675e-06, + "loss": 0.4256, "step": 13976 }, { - "epoch": 0.8, - "grad_norm": 1.0415921858447894, - "learning_rate": 1.966643116739214e-06, - "loss": 0.5841, + "epoch": 0.64, + "grad_norm": 0.6539634740846677, + "learning_rate": 5.99961862483635e-06, + "loss": 0.3563, "step": 13977 }, { - "epoch": 0.8, - "grad_norm": 0.945207146977298, - "learning_rate": 1.9655350331377563e-06, - "loss": 0.4686, + "epoch": 0.64, + "grad_norm": 0.39489466383579075, + "learning_rate": 5.998254993394723e-06, + "loss": 0.2283, "step": 13978 }, { - "epoch": 0.8, - "grad_norm": 0.32279578400528663, - "learning_rate": 1.964427227769475e-06, - "loss": 0.1987, + "epoch": 0.64, + "grad_norm": 0.39325741087960653, + "learning_rate": 5.996891450547874e-06, + "loss": 0.3226, "step": 13979 }, { - "epoch": 0.8, - "grad_norm": 0.3187970274152468, - "learning_rate": 1.9633197006727333e-06, - "loss": 0.2472, + "epoch": 0.64, + "grad_norm": 0.6192811217527082, + "learning_rate": 5.995527996325989e-06, + "loss": 0.3368, "step": 13980 }, { - "epoch": 0.8, - "grad_norm": 0.4706050178267718, - "learning_rate": 1.9622124518858855e-06, - "loss": 0.215, + "epoch": 0.64, + "grad_norm": 0.4128657996975594, + "learning_rate": 5.994164630759255e-06, + "loss": 0.3086, "step": 13981 }, { - "epoch": 0.8, - "grad_norm": 0.32904291628440924, - "learning_rate": 1.9611054814472707e-06, - "loss": 0.2531, + "epoch": 0.64, + "grad_norm": 0.33984521067875934, + "learning_rate": 5.992801353877855e-06, + "loss": 0.1981, "step": 13982 }, { - "epoch": 0.8, - "grad_norm": 0.3076825890196617, - "learning_rate": 1.959998789395231e-06, - "loss": 0.2521, + "epoch": 0.64, + "grad_norm": 0.41028605702400667, + "learning_rate": 5.991438165711972e-06, + "loss": 0.2792, "step": 13983 }, { - "epoch": 0.8, - "grad_norm": 0.808660652729481, - "learning_rate": 1.9588923757680878e-06, - "loss": 0.496, + "epoch": 0.64, + "grad_norm": 0.39863345365823605, + "learning_rate": 5.990075066291785e-06, + "loss": 0.3193, "step": 13984 }, { - "epoch": 0.8, - "grad_norm": 0.3336398442274031, - "learning_rate": 1.9577862406041558e-06, - "loss": 0.2611, + "epoch": 0.64, + "grad_norm": 1.1181275958111858, + "learning_rate": 5.98871205564747e-06, + "loss": 0.4314, "step": 13985 }, { - "epoch": 0.8, - "grad_norm": 0.741654787071884, - "learning_rate": 1.956680383941737e-06, - "loss": 0.1866, + "epoch": 0.64, + "grad_norm": 0.710955903206158, + "learning_rate": 5.987349133809201e-06, + "loss": 0.3395, "step": 13986 }, { - "epoch": 0.8, - "grad_norm": 0.2751490630762147, - "learning_rate": 1.9555748058191337e-06, - "loss": 0.2453, + "epoch": 0.64, + "grad_norm": 0.28092670876968545, + "learning_rate": 5.98598630080716e-06, + "loss": 0.2555, "step": 13987 }, { - "epoch": 0.8, - "grad_norm": 0.3371685877572128, - "learning_rate": 1.9544695062746286e-06, - "loss": 0.2371, + "epoch": 0.64, + "grad_norm": 0.297032847695535, + "learning_rate": 5.984623556671511e-06, + "loss": 0.1534, "step": 13988 }, { - "epoch": 0.8, - "grad_norm": 0.8680164276785397, - "learning_rate": 1.9533644853464996e-06, - "loss": 0.4721, + "epoch": 0.64, + "grad_norm": 0.792822963610027, + "learning_rate": 5.9832609014324284e-06, + "loss": 0.3679, "step": 13989 }, { - "epoch": 0.8, - "grad_norm": 0.47686024734591553, - "learning_rate": 1.952259743073012e-06, - "loss": 0.3312, + "epoch": 0.64, + "grad_norm": 0.3954742641776457, + "learning_rate": 5.981898335120072e-06, + "loss": 0.2726, "step": 13990 }, { - "epoch": 0.8, - "grad_norm": 0.39545690708722075, - "learning_rate": 1.9511552794924194e-06, - "loss": 0.2362, + "epoch": 0.64, + "grad_norm": 0.34688056729845823, + "learning_rate": 5.980535857764619e-06, + "loss": 0.2569, "step": 13991 }, { - "epoch": 0.8, - "grad_norm": 0.36269709669257977, - "learning_rate": 1.9500510946429772e-06, - "loss": 0.2391, + "epoch": 0.64, + "grad_norm": 0.7045285732138477, + "learning_rate": 5.979173469396226e-06, + "loss": 0.392, "step": 13992 }, { - "epoch": 0.8, - "grad_norm": 0.31630341720542904, - "learning_rate": 1.9489471885629196e-06, - "loss": 0.1847, + "epoch": 0.64, + "grad_norm": 0.3613347498255459, + "learning_rate": 5.97781117004506e-06, + "loss": 0.2853, "step": 13993 }, { - "epoch": 0.8, - "grad_norm": 0.40712626311337746, - "learning_rate": 1.9478435612904744e-06, - "loss": 0.2752, + "epoch": 0.64, + "grad_norm": 0.45592041115748355, + "learning_rate": 5.9764489597412744e-06, + "loss": 0.2312, "step": 13994 }, { - "epoch": 0.8, - "grad_norm": 0.2942320985895319, - "learning_rate": 1.946740212863858e-06, - "loss": 0.288, + "epoch": 0.64, + "grad_norm": 0.2567543445725462, + "learning_rate": 5.975086838515034e-06, + "loss": 0.1833, "step": 13995 }, { - "epoch": 0.8, - "grad_norm": 0.7675447935881212, - "learning_rate": 1.945637143321284e-06, - "loss": 0.3028, + "epoch": 0.64, + "grad_norm": 0.39636881108234673, + "learning_rate": 5.973724806396491e-06, + "loss": 0.2762, "step": 13996 }, { - "epoch": 0.8, - "grad_norm": 0.400006777628331, - "learning_rate": 1.9445343527009497e-06, - "loss": 0.2919, + "epoch": 0.64, + "grad_norm": 1.230784619844159, + "learning_rate": 5.972362863415799e-06, + "loss": 0.7232, "step": 13997 }, { - "epoch": 0.8, - "grad_norm": 0.3556966067366299, - "learning_rate": 1.9434318410410435e-06, - "loss": 0.247, + "epoch": 0.64, + "grad_norm": 0.4410252887551768, + "learning_rate": 5.9710010096031135e-06, + "loss": 0.2438, "step": 13998 }, { - "epoch": 0.8, - "grad_norm": 0.25185255386753236, - "learning_rate": 1.942329608379745e-06, - "loss": 0.183, + "epoch": 0.64, + "grad_norm": 0.3164557210632191, + "learning_rate": 5.969639244988579e-06, + "loss": 0.2731, "step": 13999 }, { - "epoch": 0.8, - "grad_norm": 0.31969515942588655, - "learning_rate": 1.9412276547552276e-06, - "loss": 0.2485, + "epoch": 0.64, + "grad_norm": 0.4763283789656363, + "learning_rate": 5.96827756960235e-06, + "loss": 0.2578, "step": 14000 }, { - "epoch": 0.8, - "grad_norm": 0.9991830838318048, - "learning_rate": 1.9401259802056495e-06, - "loss": 0.5334, + "epoch": 0.64, + "grad_norm": 0.2873672902424474, + "learning_rate": 5.966915983474569e-06, + "loss": 0.0913, "step": 14001 }, { - "epoch": 0.8, - "grad_norm": 0.42432293133392635, - "learning_rate": 1.9390245847691625e-06, - "loss": 0.2674, + "epoch": 0.64, + "grad_norm": 0.43585165425091266, + "learning_rate": 5.965554486635381e-06, + "loss": 0.3022, "step": 14002 }, { - "epoch": 0.8, - "grad_norm": 0.257524155935746, - "learning_rate": 1.9379234684839075e-06, - "loss": 0.244, + "epoch": 0.64, + "grad_norm": 0.37288706557421614, + "learning_rate": 5.964193079114925e-06, + "loss": 0.3229, "step": 14003 }, { - "epoch": 0.8, - "grad_norm": 1.2512902112125146, - "learning_rate": 1.9368226313880134e-06, - "loss": 0.5929, + "epoch": 0.64, + "grad_norm": 0.6191245196238206, + "learning_rate": 5.962831760943348e-06, + "loss": 0.3169, "step": 14004 }, { - "epoch": 0.8, - "grad_norm": 0.2558079446835865, - "learning_rate": 1.935722073519608e-06, - "loss": 0.0865, + "epoch": 0.64, + "grad_norm": 0.349550917043772, + "learning_rate": 5.961470532150787e-06, + "loss": 0.2685, "step": 14005 }, { - "epoch": 0.8, - "grad_norm": 0.30514259612396377, - "learning_rate": 1.9346217949168e-06, - "loss": 0.2563, + "epoch": 0.64, + "grad_norm": 0.40657768501801067, + "learning_rate": 5.960109392767374e-06, + "loss": 0.3197, "step": 14006 }, { - "epoch": 0.8, - "grad_norm": 0.32676000041131353, - "learning_rate": 1.933521795617692e-06, - "loss": 0.2903, + "epoch": 0.64, + "grad_norm": 0.2940599433282073, + "learning_rate": 5.95874834282324e-06, + "loss": 0.1689, "step": 14007 }, { - "epoch": 0.8, - "grad_norm": 0.6566724718751233, - "learning_rate": 1.932422075660376e-06, - "loss": 0.3683, + "epoch": 0.64, + "grad_norm": 0.3627495523996298, + "learning_rate": 5.95738738234853e-06, + "loss": 0.2055, "step": 14008 }, { - "epoch": 0.8, - "grad_norm": 0.31808490481525514, - "learning_rate": 1.931322635082938e-06, - "loss": 0.175, + "epoch": 0.64, + "grad_norm": 0.8455424806672331, + "learning_rate": 5.956026511373363e-06, + "loss": 0.4845, "step": 14009 }, { - "epoch": 0.8, - "grad_norm": 0.5415586763759216, - "learning_rate": 1.9302234739234507e-06, - "loss": 0.3217, + "epoch": 0.64, + "grad_norm": 0.3906060847573226, + "learning_rate": 5.954665729927873e-06, + "loss": 0.3124, "step": 14010 }, { - "epoch": 0.8, - "grad_norm": 0.2155972087042718, - "learning_rate": 1.9291245922199776e-06, - "loss": 0.2108, + "epoch": 0.64, + "grad_norm": 0.3125920250908797, + "learning_rate": 5.953305038042183e-06, + "loss": 0.2133, "step": 14011 }, { - "epoch": 0.81, - "grad_norm": 0.40196080536904666, - "learning_rate": 1.9280259900105723e-06, - "loss": 0.1741, + "epoch": 0.64, + "grad_norm": 1.3490627749112505, + "learning_rate": 5.951944435746422e-06, + "loss": 0.6816, "step": 14012 }, { - "epoch": 0.81, - "grad_norm": 0.5085855651773016, - "learning_rate": 1.9269276673332806e-06, - "loss": 0.3481, + "epoch": 0.64, + "grad_norm": 0.3762824213821155, + "learning_rate": 5.95058392307071e-06, + "loss": 0.2292, "step": 14013 }, { - "epoch": 0.81, - "grad_norm": 0.6857508871116539, - "learning_rate": 1.9258296242261355e-06, - "loss": 0.4188, + "epoch": 0.64, + "grad_norm": 0.27702605903239613, + "learning_rate": 5.9492235000451645e-06, + "loss": 0.2228, "step": 14014 }, { - "epoch": 0.81, - "grad_norm": 0.24640439130406608, - "learning_rate": 1.9247318607271637e-06, - "loss": 0.2135, + "epoch": 0.64, + "grad_norm": 0.5396572245345912, + "learning_rate": 5.947863166699909e-06, + "loss": 0.3306, "step": 14015 }, { - "epoch": 0.81, - "grad_norm": 0.5920314104837552, - "learning_rate": 1.923634376874378e-06, - "loss": 0.3719, + "epoch": 0.64, + "grad_norm": 0.8835921497815773, + "learning_rate": 5.946502923065054e-06, + "loss": 0.4459, "step": 14016 }, { - "epoch": 0.81, - "grad_norm": 0.25891151164406034, - "learning_rate": 1.9225371727057897e-06, - "loss": 0.1558, + "epoch": 0.64, + "grad_norm": 0.30870793467092883, + "learning_rate": 5.94514276917072e-06, + "loss": 0.1922, "step": 14017 }, { - "epoch": 0.81, - "grad_norm": 0.3841785028881913, - "learning_rate": 1.921440248259391e-06, - "loss": 0.2032, + "epoch": 0.64, + "grad_norm": 0.40783406689933543, + "learning_rate": 5.943782705047016e-06, + "loss": 0.2937, "step": 14018 }, { - "epoch": 0.81, - "grad_norm": 0.2969167120395358, - "learning_rate": 1.9203436035731694e-06, - "loss": 0.2832, + "epoch": 0.64, + "grad_norm": 0.2774443558589636, + "learning_rate": 5.942422730724056e-06, + "loss": 0.1735, "step": 14019 }, { - "epoch": 0.81, - "grad_norm": 0.8309038714782914, - "learning_rate": 1.919247238685098e-06, - "loss": 0.5118, + "epoch": 0.64, + "grad_norm": 0.40546606837626253, + "learning_rate": 5.9410628462319395e-06, + "loss": 0.3179, "step": 14020 }, { - "epoch": 0.81, - "grad_norm": 0.7674720192887823, - "learning_rate": 1.918151153633151e-06, - "loss": 0.2878, + "epoch": 0.64, + "grad_norm": 1.0319937000151396, + "learning_rate": 5.939703051600785e-06, + "loss": 0.3211, "step": 14021 }, { - "epoch": 0.81, - "grad_norm": 0.564361057127029, - "learning_rate": 1.917055348455281e-06, - "loss": 0.2283, + "epoch": 0.64, + "grad_norm": 0.35594690678240204, + "learning_rate": 5.938343346860691e-06, + "loss": 0.3125, "step": 14022 }, { - "epoch": 0.81, - "grad_norm": 0.2814577681747703, - "learning_rate": 1.9159598231894385e-06, - "loss": 0.2676, + "epoch": 0.64, + "grad_norm": 0.36679396768821, + "learning_rate": 5.936983732041762e-06, + "loss": 0.282, "step": 14023 }, { - "epoch": 0.81, - "grad_norm": 0.2937360059194529, - "learning_rate": 1.9148645778735555e-06, - "loss": 0.2098, + "epoch": 0.64, + "grad_norm": 1.0619124674219391, + "learning_rate": 5.935624207174091e-06, + "loss": 0.2938, "step": 14024 }, { - "epoch": 0.81, - "grad_norm": 0.5653575255098695, - "learning_rate": 1.9137696125455672e-06, - "loss": 0.2534, + "epoch": 0.64, + "grad_norm": 0.3139314170072544, + "learning_rate": 5.934264772287788e-06, + "loss": 0.1597, "step": 14025 }, { - "epoch": 0.81, - "grad_norm": 0.5171731933248757, - "learning_rate": 1.91267492724339e-06, - "loss": 0.3388, + "epoch": 0.64, + "grad_norm": 0.4132522604381668, + "learning_rate": 5.932905427412945e-06, + "loss": 0.2819, "step": 14026 }, { - "epoch": 0.81, - "grad_norm": 0.3752775546359505, - "learning_rate": 1.911580522004931e-06, - "loss": 0.2683, + "epoch": 0.64, + "grad_norm": 0.5058327049772476, + "learning_rate": 5.931546172579654e-06, + "loss": 0.2721, "step": 14027 }, { - "epoch": 0.81, - "grad_norm": 0.368645831443498, - "learning_rate": 1.910486396868092e-06, - "loss": 0.2158, + "epoch": 0.64, + "grad_norm": 1.2493164779550003, + "learning_rate": 5.9301870078180115e-06, + "loss": 0.7818, "step": 14028 }, { - "epoch": 0.81, - "grad_norm": 0.30316861367267955, - "learning_rate": 1.909392551870759e-06, - "loss": 0.1882, + "epoch": 0.64, + "grad_norm": 0.36539958269327333, + "learning_rate": 5.928827933158101e-06, + "loss": 0.2529, "step": 14029 }, { - "epoch": 0.81, - "grad_norm": 0.6934510092482077, - "learning_rate": 1.908298987050815e-06, - "loss": 0.3114, + "epoch": 0.64, + "grad_norm": 0.48970531808253404, + "learning_rate": 5.927468948630022e-06, + "loss": 0.3006, "step": 14030 }, { - "epoch": 0.81, - "grad_norm": 0.24347718869590526, - "learning_rate": 1.907205702446131e-06, - "loss": 0.2422, + "epoch": 0.64, + "grad_norm": 0.26278519974333453, + "learning_rate": 5.926110054263853e-06, + "loss": 0.1608, "step": 14031 }, { - "epoch": 0.81, - "grad_norm": 1.259711777802538, - "learning_rate": 1.9061126980945644e-06, - "loss": 0.657, + "epoch": 0.64, + "grad_norm": 0.3522290736438575, + "learning_rate": 5.924751250089681e-06, + "loss": 0.2469, "step": 14032 }, { - "epoch": 0.81, - "grad_norm": 0.572298799907, - "learning_rate": 1.9050199740339648e-06, - "loss": 0.3185, + "epoch": 0.64, + "grad_norm": 0.7797938445854389, + "learning_rate": 5.923392536137587e-06, + "loss": 0.3883, "step": 14033 }, { - "epoch": 0.81, - "grad_norm": 0.42220044917716687, - "learning_rate": 1.9039275303021775e-06, - "loss": 0.2817, + "epoch": 0.64, + "grad_norm": 0.37627157742501927, + "learning_rate": 5.922033912437655e-06, + "loss": 0.2814, "step": 14034 }, { - "epoch": 0.81, - "grad_norm": 0.34749331845809894, - "learning_rate": 1.9028353669370315e-06, - "loss": 0.2275, + "epoch": 0.64, + "grad_norm": 0.40872402295254384, + "learning_rate": 5.9206753790199625e-06, + "loss": 0.2525, "step": 14035 }, { - "epoch": 0.81, - "grad_norm": 0.49512336442006893, - "learning_rate": 1.9017434839763493e-06, - "loss": 0.3007, + "epoch": 0.64, + "grad_norm": 0.5696894184459426, + "learning_rate": 5.919316935914584e-06, + "loss": 0.3534, "step": 14036 }, { - "epoch": 0.81, - "grad_norm": 0.2254669338598251, - "learning_rate": 1.900651881457939e-06, - "loss": 0.1896, + "epoch": 0.64, + "grad_norm": 0.4359473846904344, + "learning_rate": 5.917958583151593e-06, + "loss": 0.272, "step": 14037 }, { - "epoch": 0.81, - "grad_norm": 0.4076412787846548, - "learning_rate": 1.8995605594196086e-06, - "loss": 0.2732, + "epoch": 0.64, + "grad_norm": 0.41465614678037366, + "learning_rate": 5.916600320761068e-06, + "loss": 0.2689, "step": 14038 }, { - "epoch": 0.81, - "grad_norm": 0.32296030460251207, - "learning_rate": 1.8984695178991475e-06, - "loss": 0.2963, + "epoch": 0.64, + "grad_norm": 0.2700984727000162, + "learning_rate": 5.915242148773075e-06, + "loss": 0.2176, "step": 14039 }, { - "epoch": 0.81, - "grad_norm": 0.5481564846950949, - "learning_rate": 1.8973787569343394e-06, - "loss": 0.3206, + "epoch": 0.64, + "grad_norm": 1.344405344456894, + "learning_rate": 5.913884067217686e-06, + "loss": 0.5553, "step": 14040 }, { - "epoch": 0.81, - "grad_norm": 0.6209723598460225, - "learning_rate": 1.8962882765629552e-06, - "loss": 0.2801, + "epoch": 0.65, + "grad_norm": 0.3704167630867199, + "learning_rate": 5.912526076124963e-06, + "loss": 0.2544, "step": 14041 }, { - "epoch": 0.81, - "grad_norm": 0.3055173237798795, - "learning_rate": 1.8951980768227586e-06, - "loss": 0.2729, + "epoch": 0.65, + "grad_norm": 0.41014772977446634, + "learning_rate": 5.911168175524975e-06, + "loss": 0.3125, "step": 14042 }, { - "epoch": 0.81, - "grad_norm": 0.35224208256032374, - "learning_rate": 1.8941081577515053e-06, - "loss": 0.2993, + "epoch": 0.65, + "grad_norm": 0.9713927018185115, + "learning_rate": 5.909810365447781e-06, + "loss": 0.4341, "step": 14043 }, { - "epoch": 0.81, - "grad_norm": 0.28113135249311766, - "learning_rate": 1.8930185193869376e-06, - "loss": 0.1722, + "epoch": 0.65, + "grad_norm": 0.3617616147385406, + "learning_rate": 5.908452645923446e-06, + "loss": 0.2336, "step": 14044 }, { - "epoch": 0.81, - "grad_norm": 0.29298152458518495, - "learning_rate": 1.8919291617667912e-06, - "loss": 0.1652, + "epoch": 0.65, + "grad_norm": 0.428745502892723, + "learning_rate": 5.907095016982024e-06, + "loss": 0.2728, "step": 14045 }, { - "epoch": 0.81, - "grad_norm": 0.45232617515848045, - "learning_rate": 1.8908400849287889e-06, - "loss": 0.2993, + "epoch": 0.65, + "grad_norm": 0.398109947912981, + "learning_rate": 5.905737478653573e-06, + "loss": 0.2958, "step": 14046 }, { - "epoch": 0.81, - "grad_norm": 0.37147528154819126, - "learning_rate": 1.8897512889106451e-06, - "loss": 0.3054, + "epoch": 0.65, + "grad_norm": 0.2326567238674615, + "learning_rate": 5.904380030968149e-06, + "loss": 0.1661, "step": 14047 }, { - "epoch": 0.81, - "grad_norm": 0.5620327131313082, - "learning_rate": 1.8886627737500663e-06, - "loss": 0.2291, + "epoch": 0.65, + "grad_norm": 1.2375449626057426, + "learning_rate": 5.9030226739558035e-06, + "loss": 0.5481, "step": 14048 }, { - "epoch": 0.81, - "grad_norm": 0.26947274618159855, - "learning_rate": 1.8875745394847434e-06, - "loss": 0.2188, + "epoch": 0.65, + "grad_norm": 0.8078774281881038, + "learning_rate": 5.901665407646589e-06, + "loss": 0.4016, "step": 14049 }, { - "epoch": 0.81, - "grad_norm": 0.3342387244403811, - "learning_rate": 1.8864865861523684e-06, - "loss": 0.3209, + "epoch": 0.65, + "grad_norm": 0.2846723660380272, + "learning_rate": 5.900308232070546e-06, + "loss": 0.2191, "step": 14050 }, { - "epoch": 0.81, - "grad_norm": 0.16985618467855765, - "learning_rate": 1.8853989137906137e-06, - "loss": 0.0886, + "epoch": 0.65, + "grad_norm": 0.5024436298825485, + "learning_rate": 5.898951147257733e-06, + "loss": 0.2511, "step": 14051 }, { - "epoch": 0.81, - "grad_norm": 0.38349041687568947, - "learning_rate": 1.8843115224371467e-06, - "loss": 0.2769, + "epoch": 0.65, + "grad_norm": 0.437778933677139, + "learning_rate": 5.897594153238191e-06, + "loss": 0.2854, "step": 14052 }, { - "epoch": 0.81, - "grad_norm": 0.6251738290863288, - "learning_rate": 1.8832244121296217e-06, - "loss": 0.3818, + "epoch": 0.65, + "grad_norm": 0.34278946780478636, + "learning_rate": 5.896237250041958e-06, + "loss": 0.1695, "step": 14053 }, { - "epoch": 0.81, - "grad_norm": 0.3763987776386038, - "learning_rate": 1.8821375829056842e-06, - "loss": 0.2174, + "epoch": 0.65, + "grad_norm": 0.3620538072606337, + "learning_rate": 5.894880437699073e-06, + "loss": 0.2964, "step": 14054 }, { - "epoch": 0.81, - "grad_norm": 0.31215780044633634, - "learning_rate": 1.8810510348029753e-06, - "loss": 0.2912, + "epoch": 0.65, + "grad_norm": 0.7879645578272876, + "learning_rate": 5.893523716239582e-06, + "loss": 0.3843, "step": 14055 }, { - "epoch": 0.81, - "grad_norm": 1.225420066022846, - "learning_rate": 1.8799647678591203e-06, - "loss": 0.7848, + "epoch": 0.65, + "grad_norm": 0.6183889401103069, + "learning_rate": 5.892167085693518e-06, + "loss": 0.3074, "step": 14056 }, { - "epoch": 0.81, - "grad_norm": 0.2622067946912736, - "learning_rate": 1.878878782111736e-06, - "loss": 0.1747, + "epoch": 0.65, + "grad_norm": 0.4354465362710058, + "learning_rate": 5.8908105460909175e-06, + "loss": 0.2495, "step": 14057 }, { - "epoch": 0.81, - "grad_norm": 0.35062197492639324, - "learning_rate": 1.8777930775984277e-06, - "loss": 0.222, + "epoch": 0.65, + "grad_norm": 0.3571420235220315, + "learning_rate": 5.88945409746181e-06, + "loss": 0.3204, "step": 14058 }, { - "epoch": 0.81, - "grad_norm": 0.36092983810301527, - "learning_rate": 1.8767076543567986e-06, - "loss": 0.305, + "epoch": 0.65, + "grad_norm": 0.21444502755462191, + "learning_rate": 5.888097739836225e-06, + "loss": 0.1131, "step": 14059 }, { - "epoch": 0.81, - "grad_norm": 0.5759708595809394, - "learning_rate": 1.8756225124244332e-06, - "loss": 0.3023, + "epoch": 0.65, + "grad_norm": 0.42014320869758087, + "learning_rate": 5.886741473244194e-06, + "loss": 0.2521, "step": 14060 }, { - "epoch": 0.81, - "grad_norm": 0.3373531812343405, - "learning_rate": 1.8745376518389113e-06, - "loss": 0.2121, + "epoch": 0.65, + "grad_norm": 0.8335583293861194, + "learning_rate": 5.885385297715744e-06, + "loss": 0.388, "step": 14061 }, { - "epoch": 0.81, - "grad_norm": 0.3674552882376826, - "learning_rate": 1.8734530726377997e-06, - "loss": 0.3401, + "epoch": 0.65, + "grad_norm": 0.29658374384207536, + "learning_rate": 5.884029213280896e-06, + "loss": 0.2804, "step": 14062 }, { - "epoch": 0.81, - "grad_norm": 0.20581830272049467, - "learning_rate": 1.8723687748586605e-06, - "loss": 0.1439, + "epoch": 0.65, + "grad_norm": 0.5078248457646383, + "learning_rate": 5.882673219969673e-06, + "loss": 0.268, "step": 14063 }, { - "epoch": 0.81, - "grad_norm": 0.3971041780692171, - "learning_rate": 1.8712847585390403e-06, - "loss": 0.2296, + "epoch": 0.65, + "grad_norm": 0.4935444353691137, + "learning_rate": 5.881317317812099e-06, + "loss": 0.2933, "step": 14064 }, { - "epoch": 0.81, - "grad_norm": 0.7877507215245848, - "learning_rate": 1.8702010237164803e-06, - "loss": 0.3901, + "epoch": 0.65, + "grad_norm": 0.31485789609760956, + "learning_rate": 5.87996150683819e-06, + "loss": 0.2039, "step": 14065 }, { - "epoch": 0.81, - "grad_norm": 0.3564769039000346, - "learning_rate": 1.8691175704285091e-06, - "loss": 0.3047, + "epoch": 0.65, + "grad_norm": 0.31179708209101076, + "learning_rate": 5.8786057870779625e-06, + "loss": 0.2434, "step": 14066 }, { - "epoch": 0.81, - "grad_norm": 0.32404927429594854, - "learning_rate": 1.8680343987126448e-06, - "loss": 0.1954, + "epoch": 0.65, + "grad_norm": 0.7656375068291004, + "learning_rate": 5.877250158561425e-06, + "loss": 0.4008, "step": 14067 }, { - "epoch": 0.81, - "grad_norm": 1.125143640483104, - "learning_rate": 1.8669515086064006e-06, - "loss": 0.7126, + "epoch": 0.65, + "grad_norm": 0.4399311701859046, + "learning_rate": 5.875894621318601e-06, + "loss": 0.2915, "step": 14068 }, { - "epoch": 0.81, - "grad_norm": 0.4371310503844042, - "learning_rate": 1.8658689001472775e-06, - "loss": 0.2545, + "epoch": 0.65, + "grad_norm": 0.5576276289419394, + "learning_rate": 5.874539175379494e-06, + "loss": 0.3929, "step": 14069 }, { - "epoch": 0.81, - "grad_norm": 0.26684346084216576, - "learning_rate": 1.8647865733727644e-06, - "loss": 0.2519, + "epoch": 0.65, + "grad_norm": 0.36723683065939416, + "learning_rate": 5.873183820774115e-06, + "loss": 0.273, "step": 14070 }, { - "epoch": 0.81, - "grad_norm": 0.30304389288387373, - "learning_rate": 1.8637045283203391e-06, - "loss": 0.1918, + "epoch": 0.65, + "grad_norm": 0.3406893676365391, + "learning_rate": 5.871828557532465e-06, + "loss": 0.2242, "step": 14071 }, { - "epoch": 0.81, - "grad_norm": 0.9331663827577262, - "learning_rate": 1.8626227650274787e-06, - "loss": 0.4022, + "epoch": 0.65, + "grad_norm": 0.4644958537405138, + "learning_rate": 5.8704733856845545e-06, + "loss": 0.3321, "step": 14072 }, { - "epoch": 0.81, - "grad_norm": 0.3671040035406563, - "learning_rate": 1.8615412835316426e-06, - "loss": 0.2767, + "epoch": 0.65, + "grad_norm": 0.28841420157094566, + "learning_rate": 5.869118305260384e-06, + "loss": 0.2105, "step": 14073 }, { - "epoch": 0.81, - "grad_norm": 0.33313642195654125, - "learning_rate": 1.8604600838702814e-06, - "loss": 0.2546, + "epoch": 0.65, + "grad_norm": 0.611158836154535, + "learning_rate": 5.8677633162899515e-06, + "loss": 0.2855, "step": 14074 }, { - "epoch": 0.81, - "grad_norm": 0.6134663014323616, - "learning_rate": 1.8593791660808357e-06, - "loss": 0.3682, + "epoch": 0.65, + "grad_norm": 0.42617592692666373, + "learning_rate": 5.8664084188032575e-06, + "loss": 0.3183, "step": 14075 }, { - "epoch": 0.81, - "grad_norm": 0.35912139079324334, - "learning_rate": 1.8582985302007405e-06, - "loss": 0.2771, + "epoch": 0.65, + "grad_norm": 1.0436855478240445, + "learning_rate": 5.865053612830296e-06, + "loss": 0.3864, "step": 14076 }, { - "epoch": 0.81, - "grad_norm": 0.3628918133959204, - "learning_rate": 1.8572181762674192e-06, - "loss": 0.1353, + "epoch": 0.65, + "grad_norm": 0.526657586806867, + "learning_rate": 5.863698898401062e-06, + "loss": 0.3342, "step": 14077 }, { - "epoch": 0.81, - "grad_norm": 0.31315581719439217, - "learning_rate": 1.8561381043182803e-06, - "loss": 0.2639, + "epoch": 0.65, + "grad_norm": 0.3017474326605516, + "learning_rate": 5.862344275545548e-06, + "loss": 0.2773, "step": 14078 }, { - "epoch": 0.81, - "grad_norm": 0.3602637118248707, - "learning_rate": 1.8550583143907274e-06, - "loss": 0.2798, + "epoch": 0.65, + "grad_norm": 0.3417635397111297, + "learning_rate": 5.8609897442937455e-06, + "loss": 0.1226, "step": 14079 }, { - "epoch": 0.81, - "grad_norm": 0.7198841723664006, - "learning_rate": 1.8539788065221598e-06, - "loss": 0.351, + "epoch": 0.65, + "grad_norm": 0.7097763065674744, + "learning_rate": 5.859635304675638e-06, + "loss": 0.3446, "step": 14080 }, { - "epoch": 0.81, - "grad_norm": 0.6480899378091401, - "learning_rate": 1.8528995807499528e-06, - "loss": 0.3457, + "epoch": 0.65, + "grad_norm": 0.4278757754877832, + "learning_rate": 5.858280956721217e-06, + "loss": 0.331, "step": 14081 }, { - "epoch": 0.81, - "grad_norm": 0.2939543727068227, - "learning_rate": 1.8518206371114833e-06, - "loss": 0.264, + "epoch": 0.65, + "grad_norm": 0.3825985560055229, + "learning_rate": 5.856926700460464e-06, + "loss": 0.3075, "step": 14082 }, { - "epoch": 0.81, - "grad_norm": 0.2582146968956923, - "learning_rate": 1.8507419756441114e-06, - "loss": 0.193, + "epoch": 0.65, + "grad_norm": 0.4511504948413761, + "learning_rate": 5.8555725359233586e-06, + "loss": 0.2254, "step": 14083 }, { - "epoch": 0.81, - "grad_norm": 1.4650946807988512, - "learning_rate": 1.8496635963851973e-06, - "loss": 0.1613, + "epoch": 0.65, + "grad_norm": 0.5110215449893202, + "learning_rate": 5.85421846313988e-06, + "loss": 0.385, "step": 14084 }, { - "epoch": 0.81, - "grad_norm": 0.30943678757627774, - "learning_rate": 1.8485854993720831e-06, - "loss": 0.2519, + "epoch": 0.65, + "grad_norm": 0.30071075920063023, + "learning_rate": 5.852864482140013e-06, + "loss": 0.2123, "step": 14085 }, { - "epoch": 0.81, - "grad_norm": 0.33098435069101717, - "learning_rate": 1.8475076846421025e-06, - "loss": 0.303, + "epoch": 0.65, + "grad_norm": 0.3069837889145256, + "learning_rate": 5.851510592953729e-06, + "loss": 0.206, "step": 14086 }, { - "epoch": 0.81, - "grad_norm": 0.8136153140125502, - "learning_rate": 1.8464301522325767e-06, - "loss": 0.3076, + "epoch": 0.65, + "grad_norm": 0.5843109546453796, + "learning_rate": 5.850156795611002e-06, + "loss": 0.3289, "step": 14087 }, { - "epoch": 0.81, - "grad_norm": 0.3524686554462998, - "learning_rate": 1.8453529021808282e-06, - "loss": 0.2519, + "epoch": 0.65, + "grad_norm": 0.7342322343790678, + "learning_rate": 5.848803090141806e-06, + "loss": 0.4923, "step": 14088 }, { - "epoch": 0.81, - "grad_norm": 0.4539966783959743, - "learning_rate": 1.8442759345241567e-06, - "loss": 0.2466, + "epoch": 0.65, + "grad_norm": 0.38461868813903305, + "learning_rate": 5.847449476576104e-06, + "loss": 0.2303, "step": 14089 }, { - "epoch": 0.81, - "grad_norm": 0.2479546022298384, - "learning_rate": 1.8431992492998595e-06, - "loss": 0.1885, + "epoch": 0.65, + "grad_norm": 0.3299582547819613, + "learning_rate": 5.84609595494387e-06, + "loss": 0.2902, "step": 14090 }, { - "epoch": 0.81, - "grad_norm": 0.2979881583987387, - "learning_rate": 1.8421228465452213e-06, - "loss": 0.2557, + "epoch": 0.65, + "grad_norm": 0.26534864811694686, + "learning_rate": 5.844742525275069e-06, + "loss": 0.17, "step": 14091 }, { - "epoch": 0.81, - "grad_norm": 0.8722453179515083, - "learning_rate": 1.8410467262975152e-06, - "loss": 0.5817, + "epoch": 0.65, + "grad_norm": 0.44361048012692367, + "learning_rate": 5.843389187599664e-06, + "loss": 0.113, "step": 14092 }, { - "epoch": 0.81, - "grad_norm": 0.5745595604603996, - "learning_rate": 1.8399708885940136e-06, - "loss": 0.2642, + "epoch": 0.65, + "grad_norm": 0.42645125874693257, + "learning_rate": 5.842035941947614e-06, + "loss": 0.3284, "step": 14093 }, { - "epoch": 0.81, - "grad_norm": 0.28733919145554143, - "learning_rate": 1.8388953334719684e-06, - "loss": 0.2482, + "epoch": 0.65, + "grad_norm": 0.3838857290026101, + "learning_rate": 5.840682788348882e-06, + "loss": 0.2973, "step": 14094 }, { - "epoch": 0.81, - "grad_norm": 0.34111214746268476, - "learning_rate": 1.837820060968627e-06, - "loss": 0.2435, + "epoch": 0.65, + "grad_norm": 0.690880998125243, + "learning_rate": 5.839329726833425e-06, + "loss": 0.3819, "step": 14095 }, { - "epoch": 0.81, - "grad_norm": 0.5477066304455874, - "learning_rate": 1.8367450711212232e-06, - "loss": 0.2656, + "epoch": 0.65, + "grad_norm": 0.3968293947113594, + "learning_rate": 5.837976757431198e-06, + "loss": 0.2293, "step": 14096 }, { - "epoch": 0.81, - "grad_norm": 0.2817456644619849, - "learning_rate": 1.8356703639669904e-06, - "loss": 0.1833, + "epoch": 0.65, + "grad_norm": 0.30187270205852207, + "learning_rate": 5.836623880172152e-06, + "loss": 0.1612, "step": 14097 }, { - "epoch": 0.81, - "grad_norm": 0.34795741382488465, - "learning_rate": 1.8345959395431401e-06, - "loss": 0.2963, + "epoch": 0.65, + "grad_norm": 0.3934615622872843, + "learning_rate": 5.83527109508624e-06, + "loss": 0.304, "step": 14098 }, { - "epoch": 0.81, - "grad_norm": 0.6400339452738504, - "learning_rate": 1.8335217978868825e-06, - "loss": 0.4564, + "epoch": 0.65, + "grad_norm": 0.35200652692966583, + "learning_rate": 5.833918402203416e-06, + "loss": 0.2391, "step": 14099 }, { - "epoch": 0.81, - "grad_norm": 0.2979103780116249, - "learning_rate": 1.832447939035411e-06, - "loss": 0.1861, + "epoch": 0.65, + "grad_norm": 0.6820557851729071, + "learning_rate": 5.8325658015536205e-06, + "loss": 0.3974, "step": 14100 }, { - "epoch": 0.81, - "grad_norm": 0.375418491704937, - "learning_rate": 1.8313743630259184e-06, - "loss": 0.1906, + "epoch": 0.65, + "grad_norm": 0.34812750767947015, + "learning_rate": 5.8312132931667994e-06, + "loss": 0.295, "step": 14101 }, { - "epoch": 0.81, - "grad_norm": 0.3613002064565976, - "learning_rate": 1.8303010698955803e-06, - "loss": 0.2885, + "epoch": 0.65, + "grad_norm": 0.32502191251457896, + "learning_rate": 5.829860877072903e-06, + "loss": 0.206, "step": 14102 }, { - "epoch": 0.81, - "grad_norm": 0.331150447620705, - "learning_rate": 1.8292280596815649e-06, - "loss": 0.1993, + "epoch": 0.65, + "grad_norm": 0.29381155656663266, + "learning_rate": 5.828508553301864e-06, + "loss": 0.1668, "step": 14103 }, { - "epoch": 0.81, - "grad_norm": 0.5396453246374567, - "learning_rate": 1.8281553324210278e-06, - "loss": 0.3703, + "epoch": 0.65, + "grad_norm": 0.6288475125107023, + "learning_rate": 5.827156321883629e-06, + "loss": 0.331, "step": 14104 }, { - "epoch": 0.81, - "grad_norm": 1.2441896449953618, - "learning_rate": 1.8270828881511238e-06, - "loss": 0.6757, + "epoch": 0.65, + "grad_norm": 0.37856086051881566, + "learning_rate": 5.825804182848127e-06, + "loss": 0.2459, "step": 14105 }, { - "epoch": 0.81, - "grad_norm": 0.22987355409113447, - "learning_rate": 1.8260107269089865e-06, - "loss": 0.206, + "epoch": 0.65, + "grad_norm": 0.35770407623246614, + "learning_rate": 5.824452136225298e-06, + "loss": 0.2969, "step": 14106 }, { - "epoch": 0.81, - "grad_norm": 1.4125169977730276, - "learning_rate": 1.8249388487317465e-06, - "loss": 0.6422, + "epoch": 0.65, + "grad_norm": 0.6400617567491976, + "learning_rate": 5.823100182045074e-06, + "loss": 0.3684, "step": 14107 }, { - "epoch": 0.81, - "grad_norm": 0.3321933632573765, - "learning_rate": 1.823867253656524e-06, - "loss": 0.1871, + "epoch": 0.65, + "grad_norm": 0.4054847395861062, + "learning_rate": 5.821748320337389e-06, + "loss": 0.3155, "step": 14108 }, { - "epoch": 0.81, - "grad_norm": 0.33090979186783415, - "learning_rate": 1.8227959417204222e-06, - "loss": 0.2569, + "epoch": 0.65, + "grad_norm": 0.2751408132879385, + "learning_rate": 5.82039655113217e-06, + "loss": 0.2118, "step": 14109 }, { - "epoch": 0.81, - "grad_norm": 0.3767301347240297, - "learning_rate": 1.8217249129605496e-06, - "loss": 0.2513, + "epoch": 0.65, + "grad_norm": 0.4736254919197536, + "learning_rate": 5.819044874459335e-06, + "loss": 0.2494, "step": 14110 }, { - "epoch": 0.81, - "grad_norm": 0.9509296598908838, - "learning_rate": 1.820654167413991e-06, - "loss": 0.5372, + "epoch": 0.65, + "grad_norm": 0.33354660241817974, + "learning_rate": 5.8176932903488245e-06, + "loss": 0.2667, "step": 14111 }, { - "epoch": 0.81, - "grad_norm": 0.3118256550146787, - "learning_rate": 1.8195837051178267e-06, - "loss": 0.2466, + "epoch": 0.65, + "grad_norm": 0.7121068681611223, + "learning_rate": 5.81634179883055e-06, + "loss": 0.3407, "step": 14112 }, { - "epoch": 0.81, - "grad_norm": 0.21152041292581364, - "learning_rate": 1.8185135261091247e-06, - "loss": 0.0666, + "epoch": 0.65, + "grad_norm": 0.3555368937200797, + "learning_rate": 5.814990399934439e-06, + "loss": 0.2924, "step": 14113 }, { - "epoch": 0.81, - "grad_norm": 0.2945684219244831, - "learning_rate": 1.817443630424952e-06, - "loss": 0.2494, + "epoch": 0.65, + "grad_norm": 0.396501409591606, + "learning_rate": 5.813639093690404e-06, + "loss": 0.3013, "step": 14114 }, { - "epoch": 0.81, - "grad_norm": 0.3782111624907756, - "learning_rate": 1.8163740181023526e-06, - "loss": 0.2492, + "epoch": 0.65, + "grad_norm": 0.2868657038564414, + "learning_rate": 5.812287880128365e-06, + "loss": 0.0894, "step": 14115 }, { - "epoch": 0.81, - "grad_norm": 0.4657972759725943, - "learning_rate": 1.8153046891783654e-06, - "loss": 0.2856, + "epoch": 0.65, + "grad_norm": 0.39630154708749077, + "learning_rate": 5.810936759278238e-06, + "loss": 0.25, "step": 14116 }, { - "epoch": 0.81, - "grad_norm": 0.340304507867154, - "learning_rate": 1.8142356436900288e-06, - "loss": 0.3124, + "epoch": 0.65, + "grad_norm": 0.32714653874073996, + "learning_rate": 5.809585731169932e-06, + "loss": 0.2666, "step": 14117 }, { - "epoch": 0.81, - "grad_norm": 0.3706279739554896, - "learning_rate": 1.8131668816743586e-06, - "loss": 0.2635, + "epoch": 0.65, + "grad_norm": 0.5399109562101687, + "learning_rate": 5.8082347958333625e-06, + "loss": 0.2699, "step": 14118 }, { - "epoch": 0.81, - "grad_norm": 0.6320484353599375, - "learning_rate": 1.8120984031683686e-06, - "loss": 0.2199, + "epoch": 0.65, + "grad_norm": 0.8554431642166302, + "learning_rate": 5.80688395329843e-06, + "loss": 0.5314, "step": 14119 }, { - "epoch": 0.81, - "grad_norm": 0.3099710789466092, - "learning_rate": 1.811030208209058e-06, - "loss": 0.1903, + "epoch": 0.65, + "grad_norm": 0.3849877892034726, + "learning_rate": 5.8055332035950466e-06, + "loss": 0.2853, "step": 14120 }, { - "epoch": 0.81, - "grad_norm": 0.4168781646528722, - "learning_rate": 1.8099622968334163e-06, - "loss": 0.2884, + "epoch": 0.65, + "grad_norm": 0.5338949265551288, + "learning_rate": 5.804182546753118e-06, + "loss": 0.3385, "step": 14121 }, { - "epoch": 0.81, - "grad_norm": 0.28981006334400455, - "learning_rate": 1.8088946690784314e-06, - "loss": 0.2911, + "epoch": 0.65, + "grad_norm": 0.22869768903670576, + "learning_rate": 5.802831982802539e-06, + "loss": 0.1729, "step": 14122 }, { - "epoch": 0.81, - "grad_norm": 1.0230460545064057, - "learning_rate": 1.8078273249810718e-06, - "loss": 0.4709, + "epoch": 0.65, + "grad_norm": 0.38241936189181336, + "learning_rate": 5.801481511773217e-06, + "loss": 0.2434, "step": 14123 }, { - "epoch": 0.81, - "grad_norm": 0.36665359776477047, - "learning_rate": 1.806760264578299e-06, - "loss": 0.2622, + "epoch": 0.65, + "grad_norm": 0.7834390698914407, + "learning_rate": 5.800131133695046e-06, + "loss": 0.4461, "step": 14124 }, { - "epoch": 0.81, - "grad_norm": 1.4271593257453827, - "learning_rate": 1.8056934879070642e-06, - "loss": 0.5857, + "epoch": 0.65, + "grad_norm": 0.3282578729058527, + "learning_rate": 5.798780848597929e-06, + "loss": 0.2427, "step": 14125 }, { - "epoch": 0.81, - "grad_norm": 0.2838783227056397, - "learning_rate": 1.8046269950043138e-06, - "loss": 0.208, + "epoch": 0.65, + "grad_norm": 0.39201846305756777, + "learning_rate": 5.7974306565117544e-06, + "loss": 0.2929, "step": 14126 }, { - "epoch": 0.81, - "grad_norm": 0.3194121737248507, - "learning_rate": 1.803560785906977e-06, - "loss": 0.2564, + "epoch": 0.65, + "grad_norm": 0.47533860282002594, + "learning_rate": 5.796080557466406e-06, + "loss": 0.2677, "step": 14127 }, { - "epoch": 0.81, - "grad_norm": 0.4439798130696629, - "learning_rate": 1.8024948606519787e-06, - "loss": 0.2703, + "epoch": 0.65, + "grad_norm": 0.48870676441467303, + "learning_rate": 5.794730551491792e-06, + "loss": 0.2066, "step": 14128 }, { - "epoch": 0.81, - "grad_norm": 0.301214908000095, - "learning_rate": 1.8014292192762285e-06, - "loss": 0.2513, + "epoch": 0.65, + "grad_norm": 0.27686320923913305, + "learning_rate": 5.793380638617785e-06, + "loss": 0.2549, "step": 14129 }, { - "epoch": 0.81, - "grad_norm": 0.3262329360305483, - "learning_rate": 1.8003638618166342e-06, - "loss": 0.257, + "epoch": 0.65, + "grad_norm": 0.4491758521915681, + "learning_rate": 5.792030818874279e-06, + "loss": 0.3713, "step": 14130 }, { - "epoch": 0.81, - "grad_norm": 1.184176185516564, - "learning_rate": 1.7992987883100877e-06, - "loss": 0.4787, + "epoch": 0.65, + "grad_norm": 1.2268351057548426, + "learning_rate": 5.790681092291153e-06, + "loss": 0.3928, "step": 14131 }, { - "epoch": 0.81, - "grad_norm": 0.6445822576434146, - "learning_rate": 1.7982339987934705e-06, - "loss": 0.2847, + "epoch": 0.65, + "grad_norm": 0.33786954246303263, + "learning_rate": 5.7893314588982905e-06, + "loss": 0.2693, "step": 14132 }, { - "epoch": 0.81, - "grad_norm": 0.40948848681739713, - "learning_rate": 1.7971694933036576e-06, - "loss": 0.2681, + "epoch": 0.65, + "grad_norm": 0.5407620300106977, + "learning_rate": 5.7879819187255745e-06, + "loss": 0.3268, "step": 14133 }, { - "epoch": 0.81, - "grad_norm": 0.23710816941660431, - "learning_rate": 1.7961052718775096e-06, - "loss": 0.2382, + "epoch": 0.65, + "grad_norm": 0.49344767327277367, + "learning_rate": 5.786632471802876e-06, + "loss": 0.3629, "step": 14134 }, { - "epoch": 0.81, - "grad_norm": 0.47714786473531884, - "learning_rate": 1.7950413345518858e-06, - "loss": 0.2881, + "epoch": 0.65, + "grad_norm": 0.24936355129212784, + "learning_rate": 5.785283118160077e-06, + "loss": 0.1778, "step": 14135 }, { - "epoch": 0.81, - "grad_norm": 0.4062079094273239, - "learning_rate": 1.7939776813636278e-06, - "loss": 0.179, + "epoch": 0.65, + "grad_norm": 0.43376290978721116, + "learning_rate": 5.783933857827044e-06, + "loss": 0.2646, "step": 14136 }, { - "epoch": 0.81, - "grad_norm": 0.3923200485078189, - "learning_rate": 1.7929143123495695e-06, - "loss": 0.2899, + "epoch": 0.65, + "grad_norm": 0.4063260275117608, + "learning_rate": 5.782584690833651e-06, + "loss": 0.334, "step": 14137 }, { - "epoch": 0.81, - "grad_norm": 0.4917783194741124, - "learning_rate": 1.7918512275465338e-06, - "loss": 0.3125, + "epoch": 0.65, + "grad_norm": 0.3059287155354174, + "learning_rate": 5.7812356172097725e-06, + "loss": 0.1861, "step": 14138 }, { - "epoch": 0.81, - "grad_norm": 0.3656188792929695, - "learning_rate": 1.790788426991339e-06, - "loss": 0.1853, + "epoch": 0.65, + "grad_norm": 0.9392788631669434, + "learning_rate": 5.779886636985268e-06, + "loss": 0.4264, "step": 14139 }, { - "epoch": 0.81, - "grad_norm": 0.5413502468999466, - "learning_rate": 1.7897259107207888e-06, - "loss": 0.4162, + "epoch": 0.65, + "grad_norm": 0.43336313756211825, + "learning_rate": 5.778537750190005e-06, + "loss": 0.3307, "step": 14140 }, { - "epoch": 0.81, - "grad_norm": 0.32025923404560214, - "learning_rate": 1.7886636787716761e-06, - "loss": 0.2454, + "epoch": 0.65, + "grad_norm": 0.31510948105453057, + "learning_rate": 5.7771889568538495e-06, + "loss": 0.1886, "step": 14141 }, { - "epoch": 0.81, - "grad_norm": 0.2429282363963751, - "learning_rate": 1.787601731180786e-06, - "loss": 0.173, + "epoch": 0.65, + "grad_norm": 0.3403563397976049, + "learning_rate": 5.775840257006663e-06, + "loss": 0.2883, "step": 14142 }, { - "epoch": 0.81, - "grad_norm": 0.5438855371114105, - "learning_rate": 1.7865400679848953e-06, - "loss": 0.3524, + "epoch": 0.65, + "grad_norm": 0.42484941290585215, + "learning_rate": 5.7744916506782976e-06, + "loss": 0.2322, "step": 14143 }, { - "epoch": 0.81, - "grad_norm": 0.6026762575249064, - "learning_rate": 1.7854786892207709e-06, - "loss": 0.4291, + "epoch": 0.65, + "grad_norm": 0.3451680086973624, + "learning_rate": 5.7731431378986155e-06, + "loss": 0.1851, "step": 14144 }, { - "epoch": 0.81, - "grad_norm": 0.418008323429287, - "learning_rate": 1.7844175949251653e-06, - "loss": 0.2601, + "epoch": 0.65, + "grad_norm": 0.37366765411841035, + "learning_rate": 5.771794718697474e-06, + "loss": 0.3001, "step": 14145 }, { - "epoch": 0.81, - "grad_norm": 0.2651924993585563, - "learning_rate": 1.7833567851348254e-06, - "loss": 0.232, + "epoch": 0.65, + "grad_norm": 0.7995982250513114, + "learning_rate": 5.7704463931047186e-06, + "loss": 0.4337, "step": 14146 }, { - "epoch": 0.81, - "grad_norm": 0.2537043627469472, - "learning_rate": 1.7822962598864868e-06, - "loss": 0.1781, + "epoch": 0.65, + "grad_norm": 0.33748847832534323, + "learning_rate": 5.769098161150206e-06, + "loss": 0.262, "step": 14147 }, { - "epoch": 0.81, - "grad_norm": 0.37665750496002826, - "learning_rate": 1.7812360192168742e-06, - "loss": 0.2524, + "epoch": 0.65, + "grad_norm": 0.2901482807547783, + "learning_rate": 5.76775002286378e-06, + "loss": 0.1651, "step": 14148 }, { - "epoch": 0.81, - "grad_norm": 0.353511059581225, - "learning_rate": 1.7801760631627064e-06, - "loss": 0.2421, + "epoch": 0.65, + "grad_norm": 0.29429213222781453, + "learning_rate": 5.766401978275288e-06, + "loss": 0.2411, "step": 14149 }, { - "epoch": 0.81, - "grad_norm": 0.7572573431074694, - "learning_rate": 1.7791163917606846e-06, - "loss": 0.3338, + "epoch": 0.65, + "grad_norm": 0.34971280364525265, + "learning_rate": 5.7650540274145806e-06, + "loss": 0.2426, "step": 14150 }, { - "epoch": 0.81, - "grad_norm": 0.8686696236646662, - "learning_rate": 1.7780570050475122e-06, - "loss": 0.3671, + "epoch": 0.65, + "grad_norm": 0.9465308797483732, + "learning_rate": 5.763706170311492e-06, + "loss": 0.2854, "step": 14151 }, { - "epoch": 0.81, - "grad_norm": 0.3290711277167447, - "learning_rate": 1.7769979030598706e-06, - "loss": 0.231, + "epoch": 0.65, + "grad_norm": 0.9676438196133452, + "learning_rate": 5.762358406995868e-06, + "loss": 0.5064, "step": 14152 }, { - "epoch": 0.81, - "grad_norm": 0.26873437490942453, - "learning_rate": 1.7759390858344395e-06, - "loss": 0.2475, + "epoch": 0.65, + "grad_norm": 0.2987169408719296, + "learning_rate": 5.761010737497538e-06, + "loss": 0.2686, "step": 14153 }, { - "epoch": 0.81, - "grad_norm": 0.3209443331607646, - "learning_rate": 1.7748805534078805e-06, - "loss": 0.1887, + "epoch": 0.65, + "grad_norm": 0.4712628696276102, + "learning_rate": 5.759663161846352e-06, + "loss": 0.2588, "step": 14154 }, { - "epoch": 0.81, - "grad_norm": 0.423738269074664, - "learning_rate": 1.773822305816857e-06, - "loss": 0.2495, + "epoch": 0.65, + "grad_norm": 0.24464770839307945, + "learning_rate": 5.758315680072137e-06, + "loss": 0.1562, "step": 14155 }, { - "epoch": 0.81, - "grad_norm": 0.8545198353982074, - "learning_rate": 1.7727643430980135e-06, - "loss": 0.411, + "epoch": 0.65, + "grad_norm": 0.407422811341761, + "learning_rate": 5.756968292204721e-06, + "loss": 0.2751, "step": 14156 }, { - "epoch": 0.81, - "grad_norm": 0.37808128113435296, - "learning_rate": 1.7717066652879877e-06, - "loss": 0.3064, + "epoch": 0.65, + "grad_norm": 0.451236921817357, + "learning_rate": 5.755620998273938e-06, + "loss": 0.2947, "step": 14157 }, { - "epoch": 0.81, - "grad_norm": 0.2977203707383954, - "learning_rate": 1.770649272423406e-06, - "loss": 0.276, + "epoch": 0.65, + "grad_norm": 1.2393826991287544, + "learning_rate": 5.754273798309613e-06, + "loss": 0.3248, "step": 14158 }, { - "epoch": 0.81, - "grad_norm": 0.2657906575491931, - "learning_rate": 1.7695921645408832e-06, - "loss": 0.1227, + "epoch": 0.65, + "grad_norm": 0.4035827372022771, + "learning_rate": 5.752926692341581e-06, + "loss": 0.2444, "step": 14159 }, { - "epoch": 0.81, - "grad_norm": 0.33109296938461874, - "learning_rate": 1.7685353416770322e-06, - "loss": 0.2161, + "epoch": 0.65, + "grad_norm": 0.5421600021492852, + "learning_rate": 5.751579680399652e-06, + "loss": 0.3986, "step": 14160 }, { - "epoch": 0.81, - "grad_norm": 0.24742271713944344, - "learning_rate": 1.7674788038684488e-06, - "loss": 0.258, + "epoch": 0.65, + "grad_norm": 0.2206263915601203, + "learning_rate": 5.7502327625136565e-06, + "loss": 0.1588, "step": 14161 }, { - "epoch": 0.81, - "grad_norm": 0.9296375061584038, - "learning_rate": 1.7664225511517196e-06, - "loss": 0.2892, + "epoch": 0.65, + "grad_norm": 0.4731552190960419, + "learning_rate": 5.748885938713413e-06, + "loss": 0.2715, "step": 14162 }, { - "epoch": 0.81, - "grad_norm": 0.427928964518346, - "learning_rate": 1.7653665835634214e-06, - "loss": 0.2758, + "epoch": 0.65, + "grad_norm": 0.6159099671232361, + "learning_rate": 5.747539209028736e-06, + "loss": 0.3448, "step": 14163 }, { - "epoch": 0.81, - "grad_norm": 0.5014102721249964, - "learning_rate": 1.7643109011401272e-06, - "loss": 0.3853, + "epoch": 0.65, + "grad_norm": 1.1071598758855485, + "learning_rate": 5.746192573489446e-06, + "loss": 0.295, "step": 14164 }, { - "epoch": 0.81, - "grad_norm": 0.36698527266326064, - "learning_rate": 1.7632555039183918e-06, - "loss": 0.2581, + "epoch": 0.65, + "grad_norm": 0.3149059586994086, + "learning_rate": 5.744846032125347e-06, + "loss": 0.2753, "step": 14165 }, { - "epoch": 0.81, - "grad_norm": 0.40372847248005095, - "learning_rate": 1.762200391934764e-06, - "loss": 0.2874, + "epoch": 0.65, + "grad_norm": 0.5033871897175037, + "learning_rate": 5.7434995849662566e-06, + "loss": 0.3761, "step": 14166 }, { - "epoch": 0.81, - "grad_norm": 0.5512841267027948, - "learning_rate": 1.7611455652257802e-06, - "loss": 0.3146, + "epoch": 0.65, + "grad_norm": 0.3391029684368323, + "learning_rate": 5.742153232041987e-06, + "loss": 0.168, "step": 14167 }, { - "epoch": 0.81, - "grad_norm": 0.2772187100793543, - "learning_rate": 1.7600910238279745e-06, - "loss": 0.096, + "epoch": 0.65, + "grad_norm": 0.375337184900047, + "learning_rate": 5.740806973382338e-06, + "loss": 0.2639, "step": 14168 }, { - "epoch": 0.81, - "grad_norm": 0.32362380832747006, - "learning_rate": 1.7590367677778607e-06, - "loss": 0.2657, + "epoch": 0.65, + "grad_norm": 0.3568123759604016, + "learning_rate": 5.73946080901712e-06, + "loss": 0.3067, "step": 14169 }, { - "epoch": 0.81, - "grad_norm": 0.3396696390434374, - "learning_rate": 1.7579827971119501e-06, - "loss": 0.2754, + "epoch": 0.65, + "grad_norm": 0.4613382979896434, + "learning_rate": 5.738114738976126e-06, + "loss": 0.2418, "step": 14170 }, { - "epoch": 0.81, - "grad_norm": 0.9538742821374108, - "learning_rate": 1.756929111866741e-06, - "loss": 0.4811, + "epoch": 0.65, + "grad_norm": 0.33662864431754785, + "learning_rate": 5.736768763289172e-06, + "loss": 0.1917, "step": 14171 }, { - "epoch": 0.81, - "grad_norm": 0.6335048928071565, - "learning_rate": 1.7558757120787196e-06, - "loss": 0.2464, + "epoch": 0.65, + "grad_norm": 1.359983792665407, + "learning_rate": 5.735422881986045e-06, + "loss": 0.7775, "step": 14172 }, { - "epoch": 0.81, - "grad_norm": 0.29133111716058585, - "learning_rate": 1.7548225977843703e-06, - "loss": 0.2619, + "epoch": 0.65, + "grad_norm": 0.3852553956596151, + "learning_rate": 5.7340770950965485e-06, + "loss": 0.313, "step": 14173 }, { - "epoch": 0.81, - "grad_norm": 0.23845768488865898, - "learning_rate": 1.7537697690201604e-06, - "loss": 0.1823, + "epoch": 0.65, + "grad_norm": 0.3262234500471043, + "learning_rate": 5.73273140265047e-06, + "loss": 0.2284, "step": 14174 }, { - "epoch": 0.81, - "grad_norm": 0.5880555726294913, - "learning_rate": 1.7527172258225479e-06, - "loss": 0.143, + "epoch": 0.65, + "grad_norm": 0.49914491088904966, + "learning_rate": 5.731385804677605e-06, + "loss": 0.3025, "step": 14175 }, { - "epoch": 0.81, - "grad_norm": 0.3607590540833608, - "learning_rate": 1.7516649682279807e-06, - "loss": 0.2992, + "epoch": 0.65, + "grad_norm": 0.38152383616333674, + "learning_rate": 5.730040301207749e-06, + "loss": 0.2585, "step": 14176 }, { - "epoch": 0.81, - "grad_norm": 0.3758488924017635, - "learning_rate": 1.7506129962729046e-06, - "loss": 0.2974, + "epoch": 0.65, + "grad_norm": 0.32416110086969513, + "learning_rate": 5.72869489227068e-06, + "loss": 0.205, "step": 14177 }, { - "epoch": 0.81, - "grad_norm": 0.6228724462522425, - "learning_rate": 1.7495613099937447e-06, - "loss": 0.2398, + "epoch": 0.65, + "grad_norm": 0.5679739192206334, + "learning_rate": 5.727349577896194e-06, + "loss": 0.3729, "step": 14178 }, { - "epoch": 0.81, - "grad_norm": 0.28615182796675526, - "learning_rate": 1.748509909426922e-06, - "loss": 0.2267, + "epoch": 0.65, + "grad_norm": 0.6570330115989632, + "learning_rate": 5.7260043581140655e-06, + "loss": 0.4017, "step": 14179 }, { - "epoch": 0.81, - "grad_norm": 0.5160793879630342, - "learning_rate": 1.747458794608844e-06, - "loss": 0.2544, + "epoch": 0.65, + "grad_norm": 0.3610192647193398, + "learning_rate": 5.724659232954082e-06, + "loss": 0.2094, "step": 14180 }, { - "epoch": 0.81, - "grad_norm": 0.28378991284710486, - "learning_rate": 1.7464079655759181e-06, - "loss": 0.2048, + "epoch": 0.65, + "grad_norm": 0.2856354020456345, + "learning_rate": 5.723314202446027e-06, + "loss": 0.2482, "step": 14181 }, { - "epoch": 0.81, - "grad_norm": 0.39445167751605087, - "learning_rate": 1.7453574223645265e-06, - "loss": 0.3056, + "epoch": 0.65, + "grad_norm": 0.4684549178345702, + "learning_rate": 5.7219692666196695e-06, + "loss": 0.2509, "step": 14182 }, { - "epoch": 0.81, - "grad_norm": 0.7048727007429841, - "learning_rate": 1.7443071650110532e-06, - "loss": 0.4061, + "epoch": 0.65, + "grad_norm": 0.6149544046599806, + "learning_rate": 5.720624425504788e-06, + "loss": 0.3054, "step": 14183 }, { - "epoch": 0.81, - "grad_norm": 0.511720675101496, - "learning_rate": 1.743257193551865e-06, - "loss": 0.2747, + "epoch": 0.65, + "grad_norm": 0.3406045174449596, + "learning_rate": 5.719279679131162e-06, + "loss": 0.2507, "step": 14184 }, { - "epoch": 0.81, - "grad_norm": 0.27744333813057853, - "learning_rate": 1.742207508023327e-06, - "loss": 0.2208, + "epoch": 0.65, + "grad_norm": 0.4759607349202062, + "learning_rate": 5.717935027528554e-06, + "loss": 0.3284, "step": 14185 }, { - "epoch": 0.82, - "grad_norm": 0.2413983160625698, - "learning_rate": 1.741158108461788e-06, - "loss": 0.1841, + "epoch": 0.65, + "grad_norm": 0.4105633865655265, + "learning_rate": 5.7165904707267415e-06, + "loss": 0.2929, "step": 14186 }, { - "epoch": 0.82, - "grad_norm": 0.9634224363895272, - "learning_rate": 1.7401089949035888e-06, - "loss": 0.4651, + "epoch": 0.65, + "grad_norm": 0.35537583070972373, + "learning_rate": 5.71524600875548e-06, + "loss": 0.1659, "step": 14187 }, { - "epoch": 0.82, - "grad_norm": 0.3454464012521359, - "learning_rate": 1.7390601673850582e-06, - "loss": 0.2216, + "epoch": 0.65, + "grad_norm": 0.38481586145885494, + "learning_rate": 5.713901641644549e-06, + "loss": 0.244, "step": 14188 }, { - "epoch": 0.82, - "grad_norm": 0.36039692383388244, - "learning_rate": 1.7380116259425205e-06, - "loss": 0.2878, + "epoch": 0.65, + "grad_norm": 0.34512305554861017, + "learning_rate": 5.712557369423701e-06, + "loss": 0.2749, "step": 14189 }, { - "epoch": 0.82, - "grad_norm": 1.0262897010065517, - "learning_rate": 1.7369633706122845e-06, - "loss": 0.457, + "epoch": 0.65, + "grad_norm": 0.49328624160947737, + "learning_rate": 5.7112131921227055e-06, + "loss": 0.2585, "step": 14190 }, { - "epoch": 0.82, - "grad_norm": 0.32525366190097454, - "learning_rate": 1.7359154014306523e-06, - "loss": 0.1925, + "epoch": 0.65, + "grad_norm": 0.6098671563062075, + "learning_rate": 5.709869109771312e-06, + "loss": 0.3676, "step": 14191 }, { - "epoch": 0.82, - "grad_norm": 0.25124789227221145, - "learning_rate": 1.7348677184339114e-06, - "loss": 0.173, + "epoch": 0.65, + "grad_norm": 0.44890639166524327, + "learning_rate": 5.708525122399281e-06, + "loss": 0.2799, "step": 14192 }, { - "epoch": 0.82, - "grad_norm": 0.31799897298231583, - "learning_rate": 1.7338203216583493e-06, - "loss": 0.2934, + "epoch": 0.65, + "grad_norm": 0.2915815409652929, + "learning_rate": 5.707181230036372e-06, + "loss": 0.2349, "step": 14193 }, { - "epoch": 0.82, - "grad_norm": 0.3329687176740427, - "learning_rate": 1.732773211140233e-06, - "loss": 0.2172, + "epoch": 0.65, + "grad_norm": 0.35963937531729373, + "learning_rate": 5.705837432712329e-06, + "loss": 0.1951, "step": 14194 }, { - "epoch": 0.82, - "grad_norm": 0.6666999484070938, - "learning_rate": 1.7317263869158252e-06, - "loss": 0.3835, + "epoch": 0.65, + "grad_norm": 0.8418610004963427, + "learning_rate": 5.70449373045691e-06, + "loss": 0.4448, "step": 14195 }, { - "epoch": 0.82, - "grad_norm": 0.840595293551172, - "learning_rate": 1.7306798490213783e-06, - "loss": 0.4352, + "epoch": 0.65, + "grad_norm": 0.39447917298210217, + "learning_rate": 5.703150123299857e-06, + "loss": 0.2683, "step": 14196 }, { - "epoch": 0.82, - "grad_norm": 0.24835261216243176, - "learning_rate": 1.729633597493129e-06, - "loss": 0.2628, + "epoch": 0.65, + "grad_norm": 0.40753951251270465, + "learning_rate": 5.701806611270917e-06, + "loss": 0.2557, "step": 14197 }, { - "epoch": 0.82, - "grad_norm": 0.24067764811215303, - "learning_rate": 1.7285876323673144e-06, - "loss": 0.0807, + "epoch": 0.65, + "grad_norm": 0.6751337569632193, + "learning_rate": 5.700463194399841e-06, + "loss": 0.3357, "step": 14198 }, { - "epoch": 0.82, - "grad_norm": 0.6601672537032992, - "learning_rate": 1.7275419536801552e-06, - "loss": 0.3598, + "epoch": 0.65, + "grad_norm": 0.45264573070798325, + "learning_rate": 5.699119872716363e-06, + "loss": 0.3251, "step": 14199 }, { - "epoch": 0.82, - "grad_norm": 0.366001352720004, - "learning_rate": 1.7264965614678631e-06, - "loss": 0.2781, + "epoch": 0.65, + "grad_norm": 0.21703008589817113, + "learning_rate": 5.697776646250225e-06, + "loss": 0.1646, "step": 14200 }, { - "epoch": 0.82, - "grad_norm": 0.3601613755013287, - "learning_rate": 1.7254514557666358e-06, - "loss": 0.2612, + "epoch": 0.65, + "grad_norm": 0.6595318002984385, + "learning_rate": 5.696433515031169e-06, + "loss": 0.3672, "step": 14201 }, { - "epoch": 0.82, - "grad_norm": 0.847066894311364, - "learning_rate": 1.7244066366126722e-06, - "loss": 0.3791, + "epoch": 0.65, + "grad_norm": 0.41054545419020705, + "learning_rate": 5.695090479088923e-06, + "loss": 0.3133, "step": 14202 }, { - "epoch": 0.82, - "grad_norm": 0.35177083832531264, - "learning_rate": 1.72336210404215e-06, - "loss": 0.2582, + "epoch": 0.65, + "grad_norm": 0.8165759290881441, + "learning_rate": 5.693747538453229e-06, + "loss": 0.3391, "step": 14203 }, { - "epoch": 0.82, - "grad_norm": 0.23594743243529212, - "learning_rate": 1.7223178580912426e-06, - "loss": 0.147, + "epoch": 0.65, + "grad_norm": 0.40005630914203494, + "learning_rate": 5.692404693153807e-06, + "loss": 0.2632, "step": 14204 }, { - "epoch": 0.82, - "grad_norm": 0.41703845501159037, - "learning_rate": 1.7212738987961086e-06, - "loss": 0.3409, + "epoch": 0.65, + "grad_norm": 0.33477709388680915, + "learning_rate": 5.6910619432204e-06, + "loss": 0.2703, "step": 14205 }, { - "epoch": 0.82, - "grad_norm": 0.3426632852760692, - "learning_rate": 1.7202302261929071e-06, - "loss": 0.283, + "epoch": 0.65, + "grad_norm": 0.24210380441457324, + "learning_rate": 5.689719288682724e-06, + "loss": 0.0949, "step": 14206 }, { - "epoch": 0.82, - "grad_norm": 0.7113318712831286, - "learning_rate": 1.7191868403177757e-06, - "loss": 0.3203, + "epoch": 0.65, + "grad_norm": 0.4369507200398018, + "learning_rate": 5.688376729570515e-06, + "loss": 0.2833, "step": 14207 }, { - "epoch": 0.82, - "grad_norm": 0.5202922764450023, - "learning_rate": 1.7181437412068491e-06, - "loss": 0.3489, + "epoch": 0.65, + "grad_norm": 0.5222180691816736, + "learning_rate": 5.687034265913484e-06, + "loss": 0.3264, "step": 14208 }, { - "epoch": 0.82, - "grad_norm": 0.29408898192812816, - "learning_rate": 1.717100928896246e-06, - "loss": 0.2608, + "epoch": 0.65, + "grad_norm": 0.3885298135418425, + "learning_rate": 5.685691897741359e-06, + "loss": 0.3289, "step": 14209 }, { - "epoch": 0.82, - "grad_norm": 0.32304266882818133, - "learning_rate": 1.7160584034220828e-06, - "loss": 0.1571, + "epoch": 0.65, + "grad_norm": 0.36614740960178666, + "learning_rate": 5.6843496250838595e-06, + "loss": 0.1941, "step": 14210 }, { - "epoch": 0.82, - "grad_norm": 0.573358791143515, - "learning_rate": 1.7150161648204622e-06, - "loss": 0.2802, + "epoch": 0.65, + "grad_norm": 0.592447933918337, + "learning_rate": 5.6830074479706964e-06, + "loss": 0.3501, "step": 14211 }, { - "epoch": 0.82, - "grad_norm": 0.30989601757233975, - "learning_rate": 1.713974213127475e-06, - "loss": 0.2593, + "epoch": 0.65, + "grad_norm": 0.26950665642334565, + "learning_rate": 5.681665366431591e-06, + "loss": 0.2287, "step": 14212 }, { - "epoch": 0.82, - "grad_norm": 0.36938090057695033, - "learning_rate": 1.7129325483792048e-06, - "loss": 0.3223, + "epoch": 0.65, + "grad_norm": 0.5695476275524742, + "learning_rate": 5.680323380496249e-06, + "loss": 0.239, "step": 14213 }, { - "epoch": 0.82, - "grad_norm": 1.4585879963319681, - "learning_rate": 1.7118911706117213e-06, - "loss": 0.326, - "step": 14214 + "epoch": 0.65, + "grad_norm": 0.38139558410336155, + "learning_rate": 5.678981490194384e-06, + "loss": 0.304, + "step": 14214 }, { - "epoch": 0.82, - "grad_norm": 0.32485245497389287, - "learning_rate": 1.710850079861095e-06, - "loss": 0.2697, + "epoch": 0.65, + "grad_norm": 0.7483139913001906, + "learning_rate": 5.677639695555708e-06, + "loss": 0.5238, "step": 14215 }, { - "epoch": 0.82, - "grad_norm": 0.44404969059636795, - "learning_rate": 1.7098092761633722e-06, - "loss": 0.247, + "epoch": 0.65, + "grad_norm": 0.3537701716276475, + "learning_rate": 5.67629799660992e-06, + "loss": 0.2305, "step": 14216 }, { - "epoch": 0.82, - "grad_norm": 0.32738510941378074, - "learning_rate": 1.7087687595545943e-06, - "loss": 0.2433, + "epoch": 0.65, + "grad_norm": 0.3765521953950769, + "learning_rate": 5.674956393386726e-06, + "loss": 0.2677, "step": 14217 }, { - "epoch": 0.82, - "grad_norm": 0.34031209603664575, - "learning_rate": 1.7077285300708002e-06, - "loss": 0.2835, + "epoch": 0.65, + "grad_norm": 0.2897579969665024, + "learning_rate": 5.6736148859158305e-06, + "loss": 0.1758, "step": 14218 }, { - "epoch": 0.82, - "grad_norm": 0.8347750409587513, - "learning_rate": 1.706688587748011e-06, - "loss": 0.5498, + "epoch": 0.65, + "grad_norm": 0.548099549788341, + "learning_rate": 5.672273474226934e-06, + "loss": 0.2411, "step": 14219 }, { - "epoch": 0.82, - "grad_norm": 0.2600218973847508, - "learning_rate": 1.7056489326222392e-06, - "loss": 0.2242, + "epoch": 0.65, + "grad_norm": 0.34541878267423803, + "learning_rate": 5.670932158349732e-06, + "loss": 0.2932, "step": 14220 }, { - "epoch": 0.82, - "grad_norm": 0.2922422219720576, - "learning_rate": 1.7046095647294859e-06, - "loss": 0.1806, + "epoch": 0.65, + "grad_norm": 0.4821728946039155, + "learning_rate": 5.669590938313911e-06, + "loss": 0.3186, "step": 14221 }, { - "epoch": 0.82, - "grad_norm": 0.9252160238532011, - "learning_rate": 1.70357048410575e-06, - "loss": 0.3704, + "epoch": 0.65, + "grad_norm": 0.7963979162775936, + "learning_rate": 5.668249814149182e-06, + "loss": 0.3891, "step": 14222 }, { - "epoch": 0.82, - "grad_norm": 0.46332376444231493, - "learning_rate": 1.7025316907870105e-06, - "loss": 0.2813, + "epoch": 0.65, + "grad_norm": 0.35921902691172936, + "learning_rate": 5.666908785885222e-06, + "loss": 0.2077, "step": 14223 }, { - "epoch": 0.82, - "grad_norm": 0.28819936990494277, - "learning_rate": 1.7014931848092409e-06, - "loss": 0.1896, + "epoch": 0.65, + "grad_norm": 0.293057228740933, + "learning_rate": 5.66556785355173e-06, + "loss": 0.2511, "step": 14224 }, { - "epoch": 0.82, - "grad_norm": 0.32904385789124907, - "learning_rate": 1.700454966208407e-06, - "loss": 0.313, + "epoch": 0.65, + "grad_norm": 0.6387911332969055, + "learning_rate": 5.664227017178385e-06, + "loss": 0.3409, "step": 14225 }, { - "epoch": 0.82, - "grad_norm": 0.3804473251777745, - "learning_rate": 1.6994170350204576e-06, - "loss": 0.2184, + "epoch": 0.65, + "grad_norm": 0.27522830199844417, + "learning_rate": 5.662886276794874e-06, + "loss": 0.1684, "step": 14226 }, { - "epoch": 0.82, - "grad_norm": 0.31666399354697766, - "learning_rate": 1.6983793912813418e-06, - "loss": 0.1884, + "epoch": 0.65, + "grad_norm": 1.1993223672192868, + "learning_rate": 5.661545632430885e-06, + "loss": 0.7549, "step": 14227 }, { - "epoch": 0.82, - "grad_norm": 0.35107911647497736, - "learning_rate": 1.6973420350269909e-06, - "loss": 0.2859, + "epoch": 0.65, + "grad_norm": 0.3225440958746963, + "learning_rate": 5.660205084116089e-06, + "loss": 0.2644, "step": 14228 }, { - "epoch": 0.82, - "grad_norm": 0.689539750224186, - "learning_rate": 1.6963049662933273e-06, - "loss": 0.3485, + "epoch": 0.65, + "grad_norm": 0.42340677287258677, + "learning_rate": 5.658864631880174e-06, + "loss": 0.2332, "step": 14229 }, { - "epoch": 0.82, - "grad_norm": 0.2906909140943072, - "learning_rate": 1.6952681851162644e-06, - "loss": 0.1952, + "epoch": 0.65, + "grad_norm": 0.7360893733765348, + "learning_rate": 5.6575242757528095e-06, + "loss": 0.3875, "step": 14230 }, { - "epoch": 0.82, - "grad_norm": 1.1242614537154207, - "learning_rate": 1.6942316915317091e-06, - "loss": 0.7466, + "epoch": 0.65, + "grad_norm": 0.4019577050511025, + "learning_rate": 5.656184015763671e-06, + "loss": 0.2605, "step": 14231 }, { - "epoch": 0.82, - "grad_norm": 0.23330529085667315, - "learning_rate": 1.6931954855755527e-06, - "loss": 0.2129, + "epoch": 0.65, + "grad_norm": 0.2740373031618846, + "learning_rate": 5.654843851942436e-06, + "loss": 0.2272, "step": 14232 }, { - "epoch": 0.82, - "grad_norm": 0.3182776146060527, - "learning_rate": 1.6921595672836811e-06, - "loss": 0.235, + "epoch": 0.65, + "grad_norm": 0.3004929777231637, + "learning_rate": 5.653503784318767e-06, + "loss": 0.2317, "step": 14233 }, { - "epoch": 0.82, - "grad_norm": 1.1253697865393655, - "learning_rate": 1.6911239366919618e-06, - "loss": 0.2787, + "epoch": 0.65, + "grad_norm": 1.5335587741320547, + "learning_rate": 5.652163812922334e-06, + "loss": 0.6379, "step": 14234 }, { - "epoch": 0.82, - "grad_norm": 0.6584813922771027, - "learning_rate": 1.6900885938362677e-06, - "loss": 0.4179, + "epoch": 0.65, + "grad_norm": 0.34352361961153377, + "learning_rate": 5.650823937782803e-06, + "loss": 0.2458, "step": 14235 }, { - "epoch": 0.82, - "grad_norm": 0.31570298371256733, - "learning_rate": 1.6890535387524465e-06, - "loss": 0.2391, + "epoch": 0.65, + "grad_norm": 0.42610722290167297, + "learning_rate": 5.649484158929844e-06, + "loss": 0.2585, "step": 14236 }, { - "epoch": 0.82, - "grad_norm": 0.3460198335510046, - "learning_rate": 1.6880187714763453e-06, - "loss": 0.2658, + "epoch": 0.65, + "grad_norm": 0.9032717675331637, + "learning_rate": 5.648144476393108e-06, + "loss": 0.4389, "step": 14237 }, { - "epoch": 0.82, - "grad_norm": 0.3434216073340945, - "learning_rate": 1.6869842920437961e-06, - "loss": 0.1589, + "epoch": 0.65, + "grad_norm": 0.3372163749507671, + "learning_rate": 5.646804890202258e-06, + "loss": 0.2522, "step": 14238 }, { - "epoch": 0.82, - "grad_norm": 0.37042829811528694, - "learning_rate": 1.6859501004906208e-06, - "loss": 0.2287, + "epoch": 0.65, + "grad_norm": 0.32014044621985355, + "learning_rate": 5.645465400386958e-06, + "loss": 0.1601, "step": 14239 }, { - "epoch": 0.82, - "grad_norm": 0.3684246063480538, - "learning_rate": 1.6849161968526384e-06, - "loss": 0.258, + "epoch": 0.65, + "grad_norm": 0.28536590953643326, + "learning_rate": 5.644126006976851e-06, + "loss": 0.238, "step": 14240 }, { - "epoch": 0.82, - "grad_norm": 0.49687821321936904, - "learning_rate": 1.6838825811656512e-06, - "loss": 0.3434, + "epoch": 0.65, + "grad_norm": 0.42883083663738253, + "learning_rate": 5.6427867100016024e-06, + "loss": 0.2901, "step": 14241 }, { - "epoch": 0.82, - "grad_norm": 0.4044736505228071, - "learning_rate": 1.6828492534654516e-06, - "loss": 0.2857, + "epoch": 0.65, + "grad_norm": 0.9003142029145237, + "learning_rate": 5.641447509490851e-06, + "loss": 0.3061, "step": 14242 }, { - "epoch": 0.82, - "grad_norm": 0.2720523490521247, - "learning_rate": 1.6818162137878224e-06, - "loss": 0.1785, + "epoch": 0.65, + "grad_norm": 0.8907281877991363, + "learning_rate": 5.64010840547425e-06, + "loss": 0.4442, "step": 14243 }, { - "epoch": 0.82, - "grad_norm": 0.2924842153658913, - "learning_rate": 1.6807834621685426e-06, - "loss": 0.2572, + "epoch": 0.65, + "grad_norm": 0.38835959640316525, + "learning_rate": 5.638769397981452e-06, + "loss": 0.2795, "step": 14244 }, { - "epoch": 0.82, - "grad_norm": 0.42620199736044223, - "learning_rate": 1.6797509986433746e-06, - "loss": 0.2771, + "epoch": 0.65, + "grad_norm": 0.2764713961205734, + "learning_rate": 5.637430487042091e-06, + "loss": 0.2083, "step": 14245 }, { - "epoch": 0.82, - "grad_norm": 0.4717554256016822, - "learning_rate": 1.678718823248071e-06, - "loss": 0.3129, + "epoch": 0.65, + "grad_norm": 0.47751155804188555, + "learning_rate": 5.636091672685819e-06, + "loss": 0.2188, "step": 14246 }, { - "epoch": 0.82, - "grad_norm": 0.8840619730677773, - "learning_rate": 1.6776869360183746e-06, - "loss": 0.2366, + "epoch": 0.65, + "grad_norm": 0.3680340927726867, + "learning_rate": 5.634752954942264e-06, + "loss": 0.2606, "step": 14247 }, { - "epoch": 0.82, - "grad_norm": 0.3197993001659107, - "learning_rate": 1.6766553369900241e-06, - "loss": 0.2557, + "epoch": 0.65, + "grad_norm": 0.469372681713889, + "learning_rate": 5.633414333841079e-06, + "loss": 0.315, "step": 14248 }, { - "epoch": 0.82, - "grad_norm": 0.32803477233922695, - "learning_rate": 1.6756240261987434e-06, - "loss": 0.3214, + "epoch": 0.65, + "grad_norm": 0.8238786934035612, + "learning_rate": 5.632075809411892e-06, + "loss": 0.2484, "step": 14249 }, { - "epoch": 0.82, - "grad_norm": 0.3035876936412637, - "learning_rate": 1.6745930036802428e-06, - "loss": 0.1272, + "epoch": 0.65, + "grad_norm": 0.36641447556072176, + "learning_rate": 5.6307373816843324e-06, + "loss": 0.2776, "step": 14250 }, { - "epoch": 0.82, - "grad_norm": 0.32877403922823634, - "learning_rate": 1.6735622694702259e-06, - "loss": 0.2405, + "epoch": 0.65, + "grad_norm": 0.5097054076531459, + "learning_rate": 5.629399050688036e-06, + "loss": 0.3645, "step": 14251 }, { - "epoch": 0.82, - "grad_norm": 1.3599618100090622, - "learning_rate": 1.6725318236043908e-06, - "loss": 0.3988, + "epoch": 0.65, + "grad_norm": 0.2965976728084407, + "learning_rate": 5.628060816452633e-06, + "loss": 0.203, "step": 14252 }, { - "epoch": 0.82, - "grad_norm": 0.3396785730073297, - "learning_rate": 1.6715016661184225e-06, - "loss": 0.2526, + "epoch": 0.65, + "grad_norm": 0.34936276223295415, + "learning_rate": 5.626722679007753e-06, + "loss": 0.2725, "step": 14253 }, { - "epoch": 0.82, - "grad_norm": 0.3797148679905969, - "learning_rate": 1.6704717970479923e-06, - "loss": 0.2691, + "epoch": 0.65, + "grad_norm": 0.7164809327899495, + "learning_rate": 5.625384638383014e-06, + "loss": 0.373, "step": 14254 }, { - "epoch": 0.82, - "grad_norm": 0.5481283074189417, - "learning_rate": 1.6694422164287627e-06, - "loss": 0.3733, + "epoch": 0.65, + "grad_norm": 0.5524030552283329, + "learning_rate": 5.624046694608048e-06, + "loss": 0.2586, "step": 14255 }, { - "epoch": 0.82, - "grad_norm": 0.26139269536422804, - "learning_rate": 1.6684129242963943e-06, - "loss": 0.2224, + "epoch": 0.65, + "grad_norm": 0.31736641554879524, + "learning_rate": 5.622708847712465e-06, + "loss": 0.266, "step": 14256 }, { - "epoch": 0.82, - "grad_norm": 0.40947654935266997, - "learning_rate": 1.6673839206865283e-06, - "loss": 0.2732, + "epoch": 0.65, + "grad_norm": 1.2824608981603112, + "learning_rate": 5.621371097725889e-06, + "loss": 0.741, "step": 14257 }, { - "epoch": 0.82, - "grad_norm": 0.3235516674745909, - "learning_rate": 1.6663552056347975e-06, - "loss": 0.2544, + "epoch": 0.65, + "grad_norm": 0.3233526483378466, + "learning_rate": 5.620033444677942e-06, + "loss": 0.174, "step": 14258 }, { - "epoch": 0.82, - "grad_norm": 1.1695979780325698, - "learning_rate": 1.6653267791768258e-06, - "loss": 0.5732, + "epoch": 0.66, + "grad_norm": 0.3281317592160886, + "learning_rate": 5.618695888598228e-06, + "loss": 0.2737, "step": 14259 }, { - "epoch": 0.82, - "grad_norm": 0.2849039189767827, - "learning_rate": 1.6642986413482321e-06, - "loss": 0.1958, + "epoch": 0.66, + "grad_norm": 0.37129675136988083, + "learning_rate": 5.6173584295163645e-06, + "loss": 0.3006, "step": 14260 }, { - "epoch": 0.82, - "grad_norm": 0.38973166723076286, - "learning_rate": 1.663270792184618e-06, - "loss": 0.2984, + "epoch": 0.66, + "grad_norm": 1.1675457003963716, + "learning_rate": 5.616021067461965e-06, + "loss": 0.5681, "step": 14261 }, { - "epoch": 0.82, - "grad_norm": 0.8836587032487782, - "learning_rate": 1.6622432317215776e-06, - "loss": 0.4592, + "epoch": 0.66, + "grad_norm": 0.3188456827370682, + "learning_rate": 5.614683802464631e-06, + "loss": 0.1863, "step": 14262 }, { - "epoch": 0.82, - "grad_norm": 0.40255447112780424, - "learning_rate": 1.6612159599946954e-06, - "loss": 0.2373, + "epoch": 0.66, + "grad_norm": 1.22554060627318, + "learning_rate": 5.6133466345539745e-06, + "loss": 0.7175, "step": 14263 }, { - "epoch": 0.82, - "grad_norm": 0.20061146179604905, - "learning_rate": 1.660188977039544e-06, - "loss": 0.2094, + "epoch": 0.66, + "grad_norm": 0.31785025218900276, + "learning_rate": 5.612009563759588e-06, + "loss": 0.2579, "step": 14264 }, { - "epoch": 0.82, - "grad_norm": 1.583325395018498, - "learning_rate": 1.659162282891692e-06, - "loss": 0.7721, + "epoch": 0.66, + "grad_norm": 0.3471616472853809, + "learning_rate": 5.610672590111087e-06, + "loss": 0.1933, "step": 14265 }, { - "epoch": 0.82, - "grad_norm": 0.32561047876624966, - "learning_rate": 1.6581358775866907e-06, - "loss": 0.1869, + "epoch": 0.66, + "grad_norm": 0.5013416931190756, + "learning_rate": 5.609335713638066e-06, + "loss": 0.2643, "step": 14266 }, { - "epoch": 0.82, - "grad_norm": 0.5064695995116827, - "learning_rate": 1.6571097611600862e-06, - "loss": 0.3667, + "epoch": 0.66, + "grad_norm": 0.3902198195921654, + "learning_rate": 5.607998934370115e-06, + "loss": 0.3079, "step": 14267 }, { - "epoch": 0.82, - "grad_norm": 0.3729173917284346, - "learning_rate": 1.6560839336474088e-06, - "loss": 0.3192, + "epoch": 0.66, + "grad_norm": 0.3170246782980747, + "learning_rate": 5.606662252336836e-06, + "loss": 0.1883, "step": 14268 }, { - "epoch": 0.82, - "grad_norm": 0.3673667832560838, - "learning_rate": 1.6550583950841891e-06, - "loss": 0.2171, + "epoch": 0.66, + "grad_norm": 1.1078586181018406, + "learning_rate": 5.60532566756782e-06, + "loss": 0.5957, "step": 14269 }, { - "epoch": 0.82, - "grad_norm": 0.2352059959306334, - "learning_rate": 1.6540331455059377e-06, - "loss": 0.1389, + "epoch": 0.66, + "grad_norm": 0.654345179331826, + "learning_rate": 5.603989180092661e-06, + "loss": 0.3383, "step": 14270 }, { - "epoch": 0.82, - "grad_norm": 1.2082826393706474, - "learning_rate": 1.6530081849481595e-06, - "loss": 0.5537, + "epoch": 0.66, + "grad_norm": 0.3604227978209283, + "learning_rate": 5.602652789940941e-06, + "loss": 0.2582, "step": 14271 }, { - "epoch": 0.82, - "grad_norm": 0.27082498922394227, - "learning_rate": 1.6519835134463468e-06, - "loss": 0.2635, + "epoch": 0.66, + "grad_norm": 0.243514146603745, + "learning_rate": 5.601316497142255e-06, + "loss": 0.1579, "step": 14272 }, { - "epoch": 0.82, - "grad_norm": 0.4933710914894767, - "learning_rate": 1.6509591310359886e-06, - "loss": 0.2803, + "epoch": 0.66, + "grad_norm": 1.3728625137128292, + "learning_rate": 5.599980301726178e-06, + "loss": 0.8046, "step": 14273 }, { - "epoch": 0.82, - "grad_norm": 0.6557255957824724, - "learning_rate": 1.649935037752557e-06, - "loss": 0.3822, + "epoch": 0.66, + "grad_norm": 0.34247309405555254, + "learning_rate": 5.598644203722297e-06, + "loss": 0.2689, "step": 14274 }, { - "epoch": 0.82, - "grad_norm": 0.8281936757764693, - "learning_rate": 1.648911233631516e-06, - "loss": 0.3157, + "epoch": 0.66, + "grad_norm": 0.7610271050849037, + "learning_rate": 5.597308203160193e-06, + "loss": 0.3187, "step": 14275 }, { - "epoch": 0.82, - "grad_norm": 0.20637211972769265, - "learning_rate": 1.6478877187083187e-06, - "loss": 0.1921, + "epoch": 0.66, + "grad_norm": 0.35727192818962616, + "learning_rate": 5.595972300069439e-06, + "loss": 0.3158, "step": 14276 }, { - "epoch": 0.82, - "grad_norm": 0.4550300837789682, - "learning_rate": 1.6468644930184097e-06, - "loss": 0.2334, + "epoch": 0.66, + "grad_norm": 0.37046281064243497, + "learning_rate": 5.594636494479615e-06, + "loss": 0.2803, "step": 14277 }, { - "epoch": 0.82, - "grad_norm": 0.5805410905455298, - "learning_rate": 1.6458415565972253e-06, - "loss": 0.3295, + "epoch": 0.66, + "grad_norm": 0.196416306954674, + "learning_rate": 5.593300786420295e-06, + "loss": 0.0691, "step": 14278 }, { - "epoch": 0.82, - "grad_norm": 0.43031247602796713, - "learning_rate": 1.6448189094801891e-06, - "loss": 0.2736, + "epoch": 0.66, + "grad_norm": 0.3566435339245806, + "learning_rate": 5.591965175921046e-06, + "loss": 0.2995, "step": 14279 }, { - "epoch": 0.82, - "grad_norm": 0.36717439759245385, - "learning_rate": 1.6437965517027143e-06, - "loss": 0.2979, + "epoch": 0.66, + "grad_norm": 0.41377142937939826, + "learning_rate": 5.590629663011442e-06, + "loss": 0.2578, "step": 14280 }, { - "epoch": 0.82, - "grad_norm": 0.6289420966546745, - "learning_rate": 1.6427744833002036e-06, - "loss": 0.3179, + "epoch": 0.66, + "grad_norm": 0.6461180749131437, + "learning_rate": 5.589294247721041e-06, + "loss": 0.2715, "step": 14281 }, { - "epoch": 0.82, - "grad_norm": 0.24428017486398262, - "learning_rate": 1.6417527043080583e-06, - "loss": 0.1558, + "epoch": 0.66, + "grad_norm": 0.7606455976898617, + "learning_rate": 5.587958930079422e-06, + "loss": 0.4087, "step": 14282 }, { - "epoch": 0.82, - "grad_norm": 1.1756826326190717, - "learning_rate": 1.6407312147616539e-06, - "loss": 0.4512, + "epoch": 0.66, + "grad_norm": 0.42554643240121875, + "learning_rate": 5.586623710116135e-06, + "loss": 0.2667, "step": 14283 }, { - "epoch": 0.82, - "grad_norm": 0.3007377368607552, - "learning_rate": 1.6397100146963662e-06, - "loss": 0.2467, + "epoch": 0.66, + "grad_norm": 0.28220121077519666, + "learning_rate": 5.58528858786075e-06, + "loss": 0.2611, "step": 14284 }, { - "epoch": 0.82, - "grad_norm": 0.38786311492343356, - "learning_rate": 1.6386891041475639e-06, - "loss": 0.3136, + "epoch": 0.66, + "grad_norm": 0.31114296575308625, + "learning_rate": 5.583953563342821e-06, + "loss": 0.1332, "step": 14285 }, { - "epoch": 0.82, - "grad_norm": 0.719273995861604, - "learning_rate": 1.6376684831505984e-06, - "loss": 0.2903, + "epoch": 0.66, + "grad_norm": 0.41882636979098803, + "learning_rate": 5.582618636591895e-06, + "loss": 0.282, "step": 14286 }, { - "epoch": 0.82, - "grad_norm": 0.616064577176051, - "learning_rate": 1.636648151740814e-06, - "loss": 0.2994, + "epoch": 0.66, + "grad_norm": 0.6236188079933123, + "learning_rate": 5.581283807637543e-06, + "loss": 0.3289, "step": 14287 }, { - "epoch": 0.82, - "grad_norm": 0.29643428083943496, - "learning_rate": 1.6356281099535432e-06, - "loss": 0.2724, + "epoch": 0.66, + "grad_norm": 0.36050526327785865, + "learning_rate": 5.579949076509306e-06, + "loss": 0.2411, "step": 14288 }, { - "epoch": 0.82, - "grad_norm": 0.27973403877242464, - "learning_rate": 1.63460835782411e-06, - "loss": 0.1537, + "epoch": 0.66, + "grad_norm": 0.43022839088968157, + "learning_rate": 5.578614443236738e-06, + "loss": 0.2722, "step": 14289 }, { - "epoch": 0.82, - "grad_norm": 0.3902255474494219, - "learning_rate": 1.633588895387832e-06, - "loss": 0.2725, + "epoch": 0.66, + "grad_norm": 0.26105073750649477, + "learning_rate": 5.577279907849383e-06, + "loss": 0.1784, "step": 14290 }, { - "epoch": 0.82, - "grad_norm": 0.6206832393439994, - "learning_rate": 1.6325697226800109e-06, - "loss": 0.3138, + "epoch": 0.66, + "grad_norm": 0.476573794962954, + "learning_rate": 5.575945470376787e-06, + "loss": 0.2514, "step": 14291 }, { - "epoch": 0.82, - "grad_norm": 0.32000206528220054, - "learning_rate": 1.6315508397359391e-06, - "loss": 0.2513, + "epoch": 0.66, + "grad_norm": 0.2921973754557666, + "learning_rate": 5.574611130848499e-06, + "loss": 0.256, "step": 14292 }, { - "epoch": 0.82, - "grad_norm": 0.9261649174707342, - "learning_rate": 1.6305322465909012e-06, - "loss": 0.3988, + "epoch": 0.66, + "grad_norm": 0.6795554400154031, + "learning_rate": 5.57327688929405e-06, + "loss": 0.4419, "step": 14293 }, { - "epoch": 0.82, - "grad_norm": 0.3895447592390385, - "learning_rate": 1.6295139432801732e-06, - "loss": 0.2633, + "epoch": 0.66, + "grad_norm": 0.7248740068038048, + "learning_rate": 5.5719427457429854e-06, + "loss": 0.3172, "step": 14294 }, { - "epoch": 0.82, - "grad_norm": 0.20039587976898893, - "learning_rate": 1.628495929839018e-06, - "loss": 0.1685, + "epoch": 0.66, + "grad_norm": 0.3891463940957887, + "learning_rate": 5.570608700224844e-06, + "loss": 0.2706, "step": 14295 }, { - "epoch": 0.82, - "grad_norm": 0.678502925193134, - "learning_rate": 1.6274782063026883e-06, - "loss": 0.3645, + "epoch": 0.66, + "grad_norm": 0.3946280731500319, + "learning_rate": 5.5692747527691534e-06, + "loss": 0.3376, "step": 14296 }, { - "epoch": 0.82, - "grad_norm": 0.3716671212985712, - "learning_rate": 1.6264607727064253e-06, - "loss": 0.3074, + "epoch": 0.66, + "grad_norm": 0.3169734056521815, + "learning_rate": 5.567940903405453e-06, + "loss": 0.2096, "step": 14297 }, { - "epoch": 0.82, - "grad_norm": 0.8235463841034898, - "learning_rate": 1.6254436290854691e-06, - "loss": 0.4441, + "epoch": 0.66, + "grad_norm": 0.4451406243635951, + "learning_rate": 5.566607152163261e-06, + "loss": 0.2025, "step": 14298 }, { - "epoch": 0.82, - "grad_norm": 0.6843383792249415, - "learning_rate": 1.62442677547504e-06, - "loss": 0.1194, + "epoch": 0.66, + "grad_norm": 0.5521747921624851, + "learning_rate": 5.565273499072124e-06, + "loss": 0.3353, "step": 14299 }, { - "epoch": 0.82, - "grad_norm": 0.25397188356352135, - "learning_rate": 1.62341021191035e-06, - "loss": 0.267, + "epoch": 0.66, + "grad_norm": 0.3717540889867584, + "learning_rate": 5.563939944161551e-06, + "loss": 0.284, "step": 14300 }, { - "epoch": 0.82, - "grad_norm": 0.32104682332962464, - "learning_rate": 1.6223939384266064e-06, - "loss": 0.1979, + "epoch": 0.66, + "grad_norm": 0.5885112681336554, + "learning_rate": 5.562606487461077e-06, + "loss": 0.1342, "step": 14301 }, { - "epoch": 0.82, - "grad_norm": 0.6705088211808614, - "learning_rate": 1.6213779550589959e-06, - "loss": 0.2417, + "epoch": 0.66, + "grad_norm": 0.3231017626637755, + "learning_rate": 5.561273129000213e-06, + "loss": 0.2444, "step": 14302 }, { - "epoch": 0.82, - "grad_norm": 0.41855789850269925, - "learning_rate": 1.6203622618427105e-06, - "loss": 0.2967, + "epoch": 0.66, + "grad_norm": 0.3116586453642888, + "learning_rate": 5.559939868808486e-06, + "loss": 0.2636, "step": 14303 }, { - "epoch": 0.82, - "grad_norm": 0.3418585073461008, - "learning_rate": 1.6193468588129192e-06, - "loss": 0.311, + "epoch": 0.66, + "grad_norm": 0.3808587296916276, + "learning_rate": 5.558606706915414e-06, + "loss": 0.1936, "step": 14304 }, { - "epoch": 0.82, - "grad_norm": 0.8013089892375115, - "learning_rate": 1.6183317460047853e-06, - "loss": 0.1193, + "epoch": 0.66, + "grad_norm": 0.495299200503403, + "learning_rate": 5.557273643350505e-06, + "loss": 0.3337, "step": 14305 }, { - "epoch": 0.82, - "grad_norm": 0.4221278782395689, - "learning_rate": 1.6173169234534602e-06, - "loss": 0.2809, + "epoch": 0.66, + "grad_norm": 1.261816240602834, + "learning_rate": 5.555940678143279e-06, + "loss": 0.6306, "step": 14306 }, { - "epoch": 0.82, - "grad_norm": 0.286743200585038, - "learning_rate": 1.6163023911940923e-06, - "loss": 0.2143, + "epoch": 0.66, + "grad_norm": 0.3210262523557024, + "learning_rate": 5.55460781132324e-06, + "loss": 0.2122, "step": 14307 }, { - "epoch": 0.82, - "grad_norm": 0.4101065643986548, - "learning_rate": 1.6152881492618123e-06, - "loss": 0.2727, + "epoch": 0.66, + "grad_norm": 0.3026122764285616, + "learning_rate": 5.553275042919899e-06, + "loss": 0.2489, "step": 14308 }, { - "epoch": 0.82, - "grad_norm": 0.3389018916742762, - "learning_rate": 1.614274197691743e-06, - "loss": 0.2422, + "epoch": 0.66, + "grad_norm": 0.5133763491120305, + "learning_rate": 5.5519423729627666e-06, + "loss": 0.2807, "step": 14309 }, { - "epoch": 0.82, - "grad_norm": 1.02805515707719, - "learning_rate": 1.6132605365189945e-06, - "loss": 0.6135, + "epoch": 0.66, + "grad_norm": 0.5352216354302248, + "learning_rate": 5.550609801481339e-06, + "loss": 0.3159, "step": 14310 }, { - "epoch": 0.82, - "grad_norm": 0.46498023154122176, - "learning_rate": 1.6122471657786764e-06, - "loss": 0.336, + "epoch": 0.66, + "grad_norm": 0.4105191469311303, + "learning_rate": 5.5492773285051225e-06, + "loss": 0.2444, "step": 14311 }, { - "epoch": 0.82, - "grad_norm": 0.24162416824879168, - "learning_rate": 1.6112340855058784e-06, - "loss": 0.199, + "epoch": 0.66, + "grad_norm": 0.3834621336207729, + "learning_rate": 5.547944954063616e-06, + "loss": 0.3261, "step": 14312 }, { - "epoch": 0.82, - "grad_norm": 0.4745324261022142, - "learning_rate": 1.6102212957356821e-06, - "loss": 0.2715, + "epoch": 0.66, + "grad_norm": 0.43230706275357317, + "learning_rate": 5.546612678186322e-06, + "loss": 0.2683, "step": 14313 }, { - "epoch": 0.82, - "grad_norm": 0.8109722485644276, - "learning_rate": 1.6092087965031623e-06, - "loss": 0.4024, + "epoch": 0.66, + "grad_norm": 0.5520800308974478, + "learning_rate": 5.54528050090273e-06, + "loss": 0.237, "step": 14314 }, { - "epoch": 0.82, - "grad_norm": 0.29032677627833714, - "learning_rate": 1.6081965878433781e-06, - "loss": 0.1993, + "epoch": 0.66, + "grad_norm": 0.22738306296687652, + "learning_rate": 5.543948422242331e-06, + "loss": 0.233, "step": 14315 }, { - "epoch": 0.82, - "grad_norm": 0.2764951495219204, - "learning_rate": 1.6071846697913907e-06, - "loss": 0.2629, + "epoch": 0.66, + "grad_norm": 0.41821931607414636, + "learning_rate": 5.542616442234618e-06, + "loss": 0.2761, "step": 14316 }, { - "epoch": 0.82, - "grad_norm": 1.0850913168148355, - "learning_rate": 1.6061730423822353e-06, - "loss": 0.4369, + "epoch": 0.66, + "grad_norm": 0.46820250400131486, + "learning_rate": 5.541284560909081e-06, + "loss": 0.2728, "step": 14317 }, { - "epoch": 0.82, - "grad_norm": 0.3368554761044434, - "learning_rate": 1.6051617056509427e-06, - "loss": 0.1969, + "epoch": 0.66, + "grad_norm": 1.1472145734195835, + "learning_rate": 5.539952778295212e-06, + "loss": 0.6372, "step": 14318 }, { - "epoch": 0.82, - "grad_norm": 0.6791350794668766, - "learning_rate": 1.604150659632543e-06, - "loss": 0.3668, + "epoch": 0.66, + "grad_norm": 0.4472733730394902, + "learning_rate": 5.538621094422485e-06, + "loss": 0.3172, "step": 14319 }, { - "epoch": 0.82, - "grad_norm": 0.2824110904939739, - "learning_rate": 1.6031399043620444e-06, - "loss": 0.2334, + "epoch": 0.66, + "grad_norm": 0.2741275945962341, + "learning_rate": 5.537289509320387e-06, + "loss": 0.2116, "step": 14320 }, { - "epoch": 0.82, - "grad_norm": 0.3433389634591825, - "learning_rate": 1.6021294398744491e-06, - "loss": 0.2899, + "epoch": 0.66, + "grad_norm": 0.441946978809499, + "learning_rate": 5.535958023018402e-06, + "loss": 0.2605, "step": 14321 }, { - "epoch": 0.82, - "grad_norm": 0.27915034093289076, - "learning_rate": 1.6011192662047493e-06, - "loss": 0.1297, + "epoch": 0.66, + "grad_norm": 0.3422502400983826, + "learning_rate": 5.534626635546e-06, + "loss": 0.1989, "step": 14322 }, { - "epoch": 0.82, - "grad_norm": 0.36788278715290085, - "learning_rate": 1.6001093833879288e-06, - "loss": 0.3075, + "epoch": 0.66, + "grad_norm": 0.3008213191002857, + "learning_rate": 5.533295346932664e-06, + "loss": 0.2738, "step": 14323 }, { - "epoch": 0.82, - "grad_norm": 0.3496665619411987, - "learning_rate": 1.5990997914589602e-06, - "loss": 0.2855, + "epoch": 0.66, + "grad_norm": 0.47999917826137445, + "learning_rate": 5.531964157207861e-06, + "loss": 0.2768, "step": 14324 }, { - "epoch": 0.82, - "grad_norm": 0.5939639419698534, - "learning_rate": 1.598090490452805e-06, - "loss": 0.2828, + "epoch": 0.66, + "grad_norm": 0.6523211684917575, + "learning_rate": 5.530633066401063e-06, + "loss": 0.3602, "step": 14325 }, { - "epoch": 0.82, - "grad_norm": 0.7697198306103441, - "learning_rate": 1.5970814804044143e-06, - "loss": 0.4741, + "epoch": 0.66, + "grad_norm": 0.3680564045403798, + "learning_rate": 5.529302074541748e-06, + "loss": 0.299, "step": 14326 }, { - "epoch": 0.82, - "grad_norm": 0.3498249717359082, - "learning_rate": 1.5960727613487282e-06, - "loss": 0.2813, + "epoch": 0.66, + "grad_norm": 0.3410408388934398, + "learning_rate": 5.527971181659371e-06, + "loss": 0.2598, "step": 14327 }, { - "epoch": 0.82, - "grad_norm": 0.2114390332677578, - "learning_rate": 1.5950643333206827e-06, - "loss": 0.1838, + "epoch": 0.66, + "grad_norm": 0.3743246906818137, + "learning_rate": 5.526640387783402e-06, + "loss": 0.2245, "step": 14328 }, { - "epoch": 0.82, - "grad_norm": 1.312899098888681, - "learning_rate": 1.5940561963551982e-06, - "loss": 0.3905, + "epoch": 0.66, + "grad_norm": 0.4236182190119762, + "learning_rate": 5.525309692943303e-06, + "loss": 0.3064, "step": 14329 }, { - "epoch": 0.82, - "grad_norm": 0.39726885707439447, - "learning_rate": 1.5930483504871863e-06, - "loss": 0.2603, + "epoch": 0.66, + "grad_norm": 0.36609971659370943, + "learning_rate": 5.523979097168539e-06, + "loss": 0.1605, "step": 14330 }, { - "epoch": 0.82, - "grad_norm": 0.3441962419375129, - "learning_rate": 1.5920407957515472e-06, - "loss": 0.2621, + "epoch": 0.66, + "grad_norm": 0.3140950104501291, + "learning_rate": 5.52264860048856e-06, + "loss": 0.2662, "step": 14331 }, { - "epoch": 0.82, - "grad_norm": 1.244026243921077, - "learning_rate": 1.5910335321831749e-06, - "loss": 0.7247, + "epoch": 0.66, + "grad_norm": 0.4136570380239444, + "learning_rate": 5.521318202932829e-06, + "loss": 0.2771, "step": 14332 }, { - "epoch": 0.82, - "grad_norm": 0.32768146343407234, - "learning_rate": 1.5900265598169507e-06, - "loss": 0.2683, + "epoch": 0.66, + "grad_norm": 0.53866608449905, + "learning_rate": 5.519987904530792e-06, + "loss": 0.3006, "step": 14333 }, { - "epoch": 0.82, - "grad_norm": 0.2208843892022444, - "learning_rate": 1.5890198786877442e-06, - "loss": 0.1797, + "epoch": 0.66, + "grad_norm": 0.4065093063295819, + "learning_rate": 5.518657705311905e-06, + "loss": 0.2942, "step": 14334 }, { - "epoch": 0.82, - "grad_norm": 0.3084248363985326, - "learning_rate": 1.5880134888304155e-06, - "loss": 0.2296, + "epoch": 0.66, + "grad_norm": 0.3888552951178163, + "learning_rate": 5.517327605305623e-06, + "loss": 0.3046, "step": 14335 }, { - "epoch": 0.82, - "grad_norm": 0.3367945172727081, - "learning_rate": 1.58700739027982e-06, - "loss": 0.2464, + "epoch": 0.66, + "grad_norm": 0.2501474266456698, + "learning_rate": 5.515997604541381e-06, + "loss": 0.2011, "step": 14336 }, { - "epoch": 0.82, - "grad_norm": 0.6608801653776333, - "learning_rate": 1.5860015830707976e-06, - "loss": 0.3797, + "epoch": 0.66, + "grad_norm": 0.7760044815122245, + "learning_rate": 5.514667703048632e-06, + "loss": 0.2028, "step": 14337 }, { - "epoch": 0.82, - "grad_norm": 1.5594061912195747, - "learning_rate": 1.5849960672381781e-06, - "loss": 0.3649, + "epoch": 0.66, + "grad_norm": 0.36496197443764716, + "learning_rate": 5.51333790085682e-06, + "loss": 0.2816, "step": 14338 }, { - "epoch": 0.82, - "grad_norm": 0.26050111086502553, - "learning_rate": 1.5839908428167806e-06, - "loss": 0.2343, + "epoch": 0.66, + "grad_norm": 0.4066806178599458, + "learning_rate": 5.512008197995379e-06, + "loss": 0.3044, "step": 14339 }, { - "epoch": 0.82, - "grad_norm": 0.537863403344283, - "learning_rate": 1.5829859098414202e-06, - "loss": 0.3252, + "epoch": 0.66, + "grad_norm": 0.48849882851465426, + "learning_rate": 5.510678594493755e-06, + "loss": 0.139, "step": 14340 }, { - "epoch": 0.82, - "grad_norm": 0.3168205101844687, - "learning_rate": 1.5819812683468971e-06, - "loss": 0.1417, + "epoch": 0.66, + "grad_norm": 0.37560524894971725, + "learning_rate": 5.509349090381371e-06, + "loss": 0.2794, "step": 14341 }, { - "epoch": 0.82, - "grad_norm": 0.36311874865663823, - "learning_rate": 1.5809769183680001e-06, - "loss": 0.2854, + "epoch": 0.66, + "grad_norm": 0.3325329110254885, + "learning_rate": 5.5080196856876796e-06, + "loss": 0.22, "step": 14342 }, { - "epoch": 0.82, - "grad_norm": 0.44223521127969234, - "learning_rate": 1.5799728599395093e-06, - "loss": 0.3553, + "epoch": 0.66, + "grad_norm": 0.31908110212494073, + "learning_rate": 5.5066903804421025e-06, + "loss": 0.2354, "step": 14343 }, { - "epoch": 0.82, - "grad_norm": 0.40043078151688744, - "learning_rate": 1.5789690930961955e-06, - "loss": 0.2374, + "epoch": 0.66, + "grad_norm": 0.36365956549982315, + "learning_rate": 5.505361174674065e-06, + "loss": 0.2826, "step": 14344 }, { - "epoch": 0.82, - "grad_norm": 0.3187473275956427, - "learning_rate": 1.577965617872821e-06, - "loss": 0.2568, + "epoch": 0.66, + "grad_norm": 0.9942855327110379, + "learning_rate": 5.504032068413003e-06, + "loss": 0.4414, "step": 14345 }, { - "epoch": 0.82, - "grad_norm": 0.40970369506063714, - "learning_rate": 1.5769624343041356e-06, - "loss": 0.2775, + "epoch": 0.66, + "grad_norm": 0.5608271277029652, + "learning_rate": 5.50270306168833e-06, + "loss": 0.1874, "step": 14346 }, { - "epoch": 0.82, - "grad_norm": 0.27328936575328555, - "learning_rate": 1.5759595424248798e-06, - "loss": 0.2324, + "epoch": 0.66, + "grad_norm": 0.33407072038868196, + "learning_rate": 5.501374154529487e-06, + "loss": 0.2796, "step": 14347 }, { - "epoch": 0.82, - "grad_norm": 0.3200545522278074, - "learning_rate": 1.5749569422697786e-06, - "loss": 0.2029, + "epoch": 0.66, + "grad_norm": 0.3640256695011617, + "learning_rate": 5.5000453469658766e-06, + "loss": 0.2657, "step": 14348 }, { - "epoch": 0.82, - "grad_norm": 0.7813268400056336, - "learning_rate": 1.57395463387356e-06, - "loss": 0.4164, + "epoch": 0.66, + "grad_norm": 0.5190160215030387, + "learning_rate": 5.498716639026931e-06, + "loss": 0.2438, "step": 14349 }, { - "epoch": 0.82, - "grad_norm": 1.362908227303668, - "learning_rate": 1.572952617270932e-06, - "loss": 0.7034, + "epoch": 0.66, + "grad_norm": 0.386609989311832, + "learning_rate": 5.497388030742057e-06, + "loss": 0.2247, "step": 14350 }, { - "epoch": 0.82, - "grad_norm": 0.2630012527604643, - "learning_rate": 1.5719508924965876e-06, - "loss": 0.2133, + "epoch": 0.66, + "grad_norm": 0.37192685195966674, + "learning_rate": 5.496059522140671e-06, + "loss": 0.292, "step": 14351 }, { - "epoch": 0.82, - "grad_norm": 0.5084932701306548, - "learning_rate": 1.5709494595852238e-06, - "loss": 0.3867, + "epoch": 0.66, + "grad_norm": 0.7898882143323293, + "learning_rate": 5.494731113252192e-06, + "loss": 0.4606, "step": 14352 }, { - "epoch": 0.82, - "grad_norm": 0.4391672927929206, - "learning_rate": 1.569948318571517e-06, - "loss": 0.2645, + "epoch": 0.66, + "grad_norm": 0.3135239208686496, + "learning_rate": 5.493402804106018e-06, + "loss": 0.2068, "step": 14353 }, { - "epoch": 0.82, - "grad_norm": 0.24787677119492113, - "learning_rate": 1.5689474694901386e-06, - "loss": 0.1697, + "epoch": 0.66, + "grad_norm": 0.32408218046667736, + "learning_rate": 5.492074594731565e-06, + "loss": 0.1991, "step": 14354 }, { - "epoch": 0.82, - "grad_norm": 0.5045982631903566, - "learning_rate": 1.5679469123757463e-06, - "loss": 0.331, + "epoch": 0.66, + "grad_norm": 0.3803391427905632, + "learning_rate": 5.490746485158237e-06, + "loss": 0.3036, "step": 14355 }, { - "epoch": 0.82, - "grad_norm": 0.5158521633631827, - "learning_rate": 1.566946647262988e-06, - "loss": 0.3973, + "epoch": 0.66, + "grad_norm": 0.3528469731216006, + "learning_rate": 5.489418475415434e-06, + "loss": 0.2276, "step": 14356 }, { - "epoch": 0.82, - "grad_norm": 0.2975250660129979, - "learning_rate": 1.5659466741865059e-06, - "loss": 0.2084, + "epoch": 0.66, + "grad_norm": 1.2589976042055355, + "learning_rate": 5.48809056553256e-06, + "loss": 0.4672, "step": 14357 }, { - "epoch": 0.82, - "grad_norm": 0.882686185713709, - "learning_rate": 1.5649469931809291e-06, - "loss": 0.4715, + "epoch": 0.66, + "grad_norm": 1.4107037412336747, + "learning_rate": 5.486762755539005e-06, + "loss": 0.8533, "step": 14358 }, { - "epoch": 0.82, - "grad_norm": 0.4060897628478708, - "learning_rate": 1.5639476042808743e-06, - "loss": 0.3158, + "epoch": 0.66, + "grad_norm": 0.2640953104311103, + "learning_rate": 5.4854350454641825e-06, + "loss": 0.2157, "step": 14359 }, { - "epoch": 0.83, - "grad_norm": 0.37822526003360196, - "learning_rate": 1.5629485075209494e-06, - "loss": 0.2885, + "epoch": 0.66, + "grad_norm": 0.8085484146032684, + "learning_rate": 5.484107435337475e-06, + "loss": 0.483, "step": 14360 }, { - "epoch": 0.83, - "grad_norm": 0.26592851936967965, - "learning_rate": 1.5619497029357566e-06, - "loss": 0.1386, + "epoch": 0.66, + "grad_norm": 0.4416780953741353, + "learning_rate": 5.482779925188273e-06, + "loss": 0.2674, "step": 14361 }, { - "epoch": 0.83, - "grad_norm": 0.48297293822615317, - "learning_rate": 1.5609511905598828e-06, - "loss": 0.3686, + "epoch": 0.66, + "grad_norm": 0.2650017487873503, + "learning_rate": 5.481452515045974e-06, + "loss": 0.2202, "step": 14362 }, { - "epoch": 0.83, - "grad_norm": 0.3980489462284402, - "learning_rate": 1.559952970427907e-06, - "loss": 0.2771, + "epoch": 0.66, + "grad_norm": 0.34481439078217285, + "learning_rate": 5.480125204939952e-06, + "loss": 0.2314, "step": 14363 }, { - "epoch": 0.83, - "grad_norm": 0.3248249460067187, - "learning_rate": 1.5589550425743938e-06, - "loss": 0.2326, + "epoch": 0.66, + "grad_norm": 1.2231596893490546, + "learning_rate": 5.478797994899612e-06, + "loss": 0.7127, "step": 14364 }, { - "epoch": 0.83, - "grad_norm": 0.7106496612344982, - "learning_rate": 1.5579574070339077e-06, - "loss": 0.397, + "epoch": 0.66, + "grad_norm": 0.3719375988781338, + "learning_rate": 5.477470884954321e-06, + "loss": 0.2886, "step": 14365 }, { - "epoch": 0.83, - "grad_norm": 0.25155760454843434, - "learning_rate": 1.5569600638409931e-06, - "loss": 0.179, + "epoch": 0.66, + "grad_norm": 1.0233224010643873, + "learning_rate": 5.47614387513347e-06, + "loss": 0.2958, "step": 14366 }, { - "epoch": 0.83, - "grad_norm": 0.23556004098954053, - "learning_rate": 1.5559630130301885e-06, - "loss": 0.1974, + "epoch": 0.66, + "grad_norm": 0.3757674457037407, + "learning_rate": 5.47481696546643e-06, + "loss": 0.3035, "step": 14367 }, { - "epoch": 0.83, - "grad_norm": 1.3610236878509954, - "learning_rate": 1.5549662546360223e-06, - "loss": 0.7535, + "epoch": 0.66, + "grad_norm": 0.3762208577694757, + "learning_rate": 5.473490155982581e-06, + "loss": 0.2925, "step": 14368 }, { - "epoch": 0.83, - "grad_norm": 0.4102886902865449, - "learning_rate": 1.5539697886930082e-06, - "loss": 0.3094, + "epoch": 0.66, + "grad_norm": 0.33938808007686827, + "learning_rate": 5.472163446711301e-06, + "loss": 0.1233, "step": 14369 }, { - "epoch": 0.83, - "grad_norm": 0.4806336247620259, - "learning_rate": 1.5529736152356601e-06, - "loss": 0.2572, + "epoch": 0.66, + "grad_norm": 0.46518643860864695, + "learning_rate": 5.470836837681955e-06, + "loss": 0.3492, "step": 14370 }, { - "epoch": 0.83, - "grad_norm": 0.3447873590811517, - "learning_rate": 1.551977734298472e-06, - "loss": 0.3017, + "epoch": 0.66, + "grad_norm": 0.37648370199421266, + "learning_rate": 5.469510328923915e-06, + "loss": 0.2799, "step": 14371 }, { - "epoch": 0.83, - "grad_norm": 0.3259821776760638, - "learning_rate": 1.5509821459159312e-06, - "loss": 0.2352, + "epoch": 0.66, + "grad_norm": 0.5581998802685205, + "learning_rate": 5.468183920466554e-06, + "loss": 0.2899, "step": 14372 }, { - "epoch": 0.83, - "grad_norm": 0.35766169837779577, - "learning_rate": 1.5499868501225135e-06, - "loss": 0.2043, + "epoch": 0.66, + "grad_norm": 0.8899053321042577, + "learning_rate": 5.466857612339229e-06, + "loss": 0.4631, "step": 14373 }, { - "epoch": 0.83, - "grad_norm": 0.45759062221042746, - "learning_rate": 1.548991846952691e-06, - "loss": 0.2734, + "epoch": 0.66, + "grad_norm": 0.27133659544050065, + "learning_rate": 5.4655314045713115e-06, + "loss": 0.2017, "step": 14374 }, { - "epoch": 0.83, - "grad_norm": 0.26891305999108633, - "learning_rate": 1.5479971364409163e-06, - "loss": 0.2624, + "epoch": 0.66, + "grad_norm": 0.331607300263977, + "learning_rate": 5.464205297192155e-06, + "loss": 0.2512, "step": 14375 }, { - "epoch": 0.83, - "grad_norm": 0.6950747504087013, - "learning_rate": 1.5470027186216386e-06, - "loss": 0.3777, + "epoch": 0.66, + "grad_norm": 1.0751161400848894, + "learning_rate": 5.4628792902311204e-06, + "loss": 0.4687, "step": 14376 }, { - "epoch": 0.83, - "grad_norm": 0.8274621163981223, - "learning_rate": 1.5460085935292902e-06, - "loss": 0.2935, + "epoch": 0.66, + "grad_norm": 0.37034504710116734, + "learning_rate": 5.461553383717566e-06, + "loss": 0.2841, "step": 14377 }, { - "epoch": 0.83, - "grad_norm": 0.5470660206047755, - "learning_rate": 1.5450147611983024e-06, - "loss": 0.2792, + "epoch": 0.66, + "grad_norm": 0.884971947518883, + "learning_rate": 5.46022757768085e-06, + "loss": 0.3778, "step": 14378 }, { - "epoch": 0.83, - "grad_norm": 0.22596268465440378, - "learning_rate": 1.5440212216630902e-06, - "loss": 0.2247, + "epoch": 0.66, + "grad_norm": 0.3632622280557171, + "learning_rate": 5.45890187215032e-06, + "loss": 0.2552, "step": 14379 }, { - "epoch": 0.83, - "grad_norm": 0.4563189462879652, - "learning_rate": 1.54302797495806e-06, - "loss": 0.2061, + "epoch": 0.66, + "grad_norm": 0.3535550387344249, + "learning_rate": 5.457576267155317e-06, + "loss": 0.2652, "step": 14380 }, { - "epoch": 0.83, - "grad_norm": 0.408530641518608, - "learning_rate": 1.5420350211176072e-06, - "loss": 0.2611, + "epoch": 0.66, + "grad_norm": 0.3348489224699886, + "learning_rate": 5.4562507627252055e-06, + "loss": 0.1684, "step": 14381 }, { - "epoch": 0.83, - "grad_norm": 0.4614799902770901, - "learning_rate": 1.541042360176115e-06, - "loss": 0.3243, + "epoch": 0.66, + "grad_norm": 0.36496879557908773, + "learning_rate": 5.4549253588893185e-06, + "loss": 0.2667, "step": 14382 }, { - "epoch": 0.83, - "grad_norm": 0.3767321614513136, - "learning_rate": 1.5400499921679647e-06, - "loss": 0.2978, + "epoch": 0.66, + "grad_norm": 0.3387178351859606, + "learning_rate": 5.4536000556770085e-06, + "loss": 0.2746, "step": 14383 }, { - "epoch": 0.83, - "grad_norm": 0.32608810726150617, - "learning_rate": 1.5390579171275222e-06, - "loss": 0.1694, + "epoch": 0.66, + "grad_norm": 0.9065033237617657, + "learning_rate": 5.452274853117606e-06, + "loss": 0.3664, "step": 14384 }, { - "epoch": 0.83, - "grad_norm": 0.2786817942246057, - "learning_rate": 1.5380661350891346e-06, - "loss": 0.1812, + "epoch": 0.66, + "grad_norm": 0.44045236396572185, + "learning_rate": 5.450949751240456e-06, + "loss": 0.193, "step": 14385 }, { - "epoch": 0.83, - "grad_norm": 1.2423112132808052, - "learning_rate": 1.5370746460871555e-06, - "loss": 0.7655, + "epoch": 0.66, + "grad_norm": 0.39150730856634075, + "learning_rate": 5.449624750074898e-06, + "loss": 0.2301, "step": 14386 }, { - "epoch": 0.83, - "grad_norm": 0.24830800114573304, - "learning_rate": 1.5360834501559185e-06, - "loss": 0.2066, + "epoch": 0.66, + "grad_norm": 0.2653637501881538, + "learning_rate": 5.4482998496502585e-06, + "loss": 0.2546, "step": 14387 }, { - "epoch": 0.83, - "grad_norm": 0.6941033661044869, - "learning_rate": 1.5350925473297462e-06, - "loss": 0.3225, + "epoch": 0.66, + "grad_norm": 0.8552791134012457, + "learning_rate": 5.446975049995873e-06, + "loss": 0.5341, "step": 14388 }, { - "epoch": 0.83, - "grad_norm": 1.3297493516790682, - "learning_rate": 1.5341019376429533e-06, - "loss": 0.4718, + "epoch": 0.66, + "grad_norm": 0.312245914211541, + "learning_rate": 5.445650351141076e-06, + "loss": 0.1868, "step": 14389 }, { - "epoch": 0.83, - "grad_norm": 0.33027397591322355, - "learning_rate": 1.5331116211298492e-06, - "loss": 0.1663, + "epoch": 0.66, + "grad_norm": 0.5971458131692577, + "learning_rate": 5.444325753115186e-06, + "loss": 0.3577, "step": 14390 }, { - "epoch": 0.83, - "grad_norm": 0.3162742331726112, - "learning_rate": 1.532121597824725e-06, - "loss": 0.2882, + "epoch": 0.66, + "grad_norm": 0.6550121440756231, + "learning_rate": 5.443001255947538e-06, + "loss": 0.3292, "step": 14391 }, { - "epoch": 0.83, - "grad_norm": 0.3078259870411867, - "learning_rate": 1.5311318677618658e-06, - "loss": 0.1998, + "epoch": 0.66, + "grad_norm": 0.3114984959470451, + "learning_rate": 5.441676859667445e-06, + "loss": 0.201, "step": 14392 }, { - "epoch": 0.83, - "grad_norm": 0.3203383686794931, - "learning_rate": 1.5301424309755464e-06, - "loss": 0.2119, + "epoch": 0.66, + "grad_norm": 0.2694872293758464, + "learning_rate": 5.440352564304235e-06, + "loss": 0.1715, "step": 14393 }, { - "epoch": 0.83, - "grad_norm": 0.9170395837276937, - "learning_rate": 1.529153287500027e-06, - "loss": 0.5176, + "epoch": 0.66, + "grad_norm": 0.5136769244123666, + "learning_rate": 5.439028369887223e-06, + "loss": 0.3585, "step": 14394 }, { - "epoch": 0.83, - "grad_norm": 0.3675141498763632, - "learning_rate": 1.5281644373695682e-06, - "loss": 0.2876, + "epoch": 0.66, + "grad_norm": 0.30259757898238504, + "learning_rate": 5.43770427644573e-06, + "loss": 0.2241, "step": 14395 }, { - "epoch": 0.83, - "grad_norm": 0.6819063271267362, - "learning_rate": 1.52717588061841e-06, - "loss": 0.26, + "epoch": 0.66, + "grad_norm": 0.8619869832343193, + "learning_rate": 5.436380284009064e-06, + "loss": 0.4856, "step": 14396 }, { - "epoch": 0.83, - "grad_norm": 0.18812990097663482, - "learning_rate": 1.5261876172807865e-06, - "loss": 0.1458, + "epoch": 0.66, + "grad_norm": 1.3114937677966916, + "learning_rate": 5.43505639260654e-06, + "loss": 0.5828, "step": 14397 }, { - "epoch": 0.83, - "grad_norm": 0.4405755811998149, - "learning_rate": 1.5251996473909202e-06, - "loss": 0.3262, + "epoch": 0.66, + "grad_norm": 0.35811882016411706, + "learning_rate": 5.433732602267472e-06, + "loss": 0.2577, "step": 14398 }, { - "epoch": 0.83, - "grad_norm": 0.4274068515417533, - "learning_rate": 1.5242119709830272e-06, - "loss": 0.3208, + "epoch": 0.66, + "grad_norm": 0.2939232054874963, + "learning_rate": 5.432408913021159e-06, + "loss": 0.2068, "step": 14399 }, { - "epoch": 0.83, - "grad_norm": 0.30076883740926846, - "learning_rate": 1.5232245880913088e-06, - "loss": 0.2194, + "epoch": 0.66, + "grad_norm": 0.4261501978524303, + "learning_rate": 5.431085324896914e-06, + "loss": 0.3086, "step": 14400 }, { - "epoch": 0.83, - "grad_norm": 1.2838196412791696, - "learning_rate": 1.5222374987499588e-06, - "loss": 0.536, + "epoch": 0.66, + "grad_norm": 0.32856517158157045, + "learning_rate": 5.429761837924034e-06, + "loss": 0.2822, "step": 14401 }, { - "epoch": 0.83, - "grad_norm": 0.5728611563163679, - "learning_rate": 1.5212507029931578e-06, - "loss": 0.2791, + "epoch": 0.66, + "grad_norm": 0.9080750859580784, + "learning_rate": 5.428438452131821e-06, + "loss": 0.3451, "step": 14402 }, { - "epoch": 0.83, - "grad_norm": 0.22643317321614864, - "learning_rate": 1.5202642008550827e-06, - "loss": 0.2097, + "epoch": 0.66, + "grad_norm": 0.3668578061367511, + "learning_rate": 5.427115167549577e-06, + "loss": 0.3184, "step": 14403 }, { - "epoch": 0.83, - "grad_norm": 0.781228803059026, - "learning_rate": 1.519277992369893e-06, - "loss": 0.3939, + "epoch": 0.66, + "grad_norm": 0.6535115190380515, + "learning_rate": 5.425791984206594e-06, + "loss": 0.3355, "step": 14404 }, { - "epoch": 0.83, - "grad_norm": 0.5420511061950547, - "learning_rate": 1.5182920775717425e-06, - "loss": 0.3284, + "epoch": 0.66, + "grad_norm": 0.2352386557275581, + "learning_rate": 5.424468902132171e-06, + "loss": 0.1518, "step": 14405 }, { - "epoch": 0.83, - "grad_norm": 0.23424207699255722, - "learning_rate": 1.5173064564947714e-06, - "loss": 0.2109, + "epoch": 0.66, + "grad_norm": 0.41175504827423726, + "learning_rate": 5.4231459213555885e-06, + "loss": 0.2909, "step": 14406 }, { - "epoch": 0.83, - "grad_norm": 0.4855977165429987, - "learning_rate": 1.5163211291731116e-06, - "loss": 0.3538, + "epoch": 0.66, + "grad_norm": 0.5536814664288235, + "learning_rate": 5.421823041906151e-06, + "loss": 0.3068, "step": 14407 }, { - "epoch": 0.83, - "grad_norm": 0.7778771909877253, - "learning_rate": 1.5153360956408891e-06, - "loss": 0.3002, + "epoch": 0.66, + "grad_norm": 0.404480077497838, + "learning_rate": 5.420500263813141e-06, + "loss": 0.2555, "step": 14408 }, { - "epoch": 0.83, - "grad_norm": 0.40367431465738035, - "learning_rate": 1.514351355932212e-06, - "loss": 0.2746, + "epoch": 0.66, + "grad_norm": 1.2748727300898761, + "learning_rate": 5.419177587105836e-06, + "loss": 0.514, "step": 14409 }, { - "epoch": 0.83, - "grad_norm": 0.4594177145672264, - "learning_rate": 1.513366910081182e-06, - "loss": 0.2778, + "epoch": 0.66, + "grad_norm": 0.43927943465204355, + "learning_rate": 5.417855011813524e-06, + "loss": 0.2956, "step": 14410 }, { - "epoch": 0.83, - "grad_norm": 0.3612788772247959, - "learning_rate": 1.5123827581218898e-06, - "loss": 0.3111, + "epoch": 0.66, + "grad_norm": 0.26651081775619356, + "learning_rate": 5.416532537965487e-06, + "loss": 0.224, "step": 14411 }, { - "epoch": 0.83, - "grad_norm": 0.3966527721965122, - "learning_rate": 1.5113989000884189e-06, - "loss": 0.303, + "epoch": 0.66, + "grad_norm": 0.627345548578068, + "learning_rate": 5.415210165591005e-06, + "loss": 0.305, "step": 14412 }, { - "epoch": 0.83, - "grad_norm": 0.17782252117943567, - "learning_rate": 1.51041533601484e-06, - "loss": 0.0841, + "epoch": 0.66, + "grad_norm": 0.45388924677966047, + "learning_rate": 5.413887894719347e-06, + "loss": 0.3107, "step": 14413 }, { - "epoch": 0.83, - "grad_norm": 0.3769117084741131, - "learning_rate": 1.5094320659352123e-06, - "loss": 0.2748, + "epoch": 0.66, + "grad_norm": 0.29437828934787064, + "learning_rate": 5.412565725379792e-06, + "loss": 0.24, "step": 14414 }, { - "epoch": 0.83, - "grad_norm": 0.3041994326675657, - "learning_rate": 1.5084490898835857e-06, - "loss": 0.2886, + "epoch": 0.66, + "grad_norm": 0.5397002300790241, + "learning_rate": 5.411243657601612e-06, + "loss": 0.2433, "step": 14415 }, { - "epoch": 0.83, - "grad_norm": 0.6275402594647362, - "learning_rate": 1.5074664078940039e-06, - "loss": 0.2981, + "epoch": 0.66, + "grad_norm": 0.465348874140996, + "learning_rate": 5.4099216914140726e-06, + "loss": 0.2553, "step": 14416 }, { - "epoch": 0.83, - "grad_norm": 0.7745986505170465, - "learning_rate": 1.5064840200004972e-06, - "loss": 0.4663, + "epoch": 0.66, + "grad_norm": 0.3285273820616766, + "learning_rate": 5.408599826846448e-06, + "loss": 0.1946, "step": 14417 }, { - "epoch": 0.83, - "grad_norm": 0.26455906511621774, - "learning_rate": 1.5055019262370807e-06, - "loss": 0.2277, + "epoch": 0.66, + "grad_norm": 0.39442925990231276, + "learning_rate": 5.407278063927992e-06, + "loss": 0.2642, "step": 14418 }, { - "epoch": 0.83, - "grad_norm": 0.2744273616921841, - "learning_rate": 1.5045201266377662e-06, - "loss": 0.191, + "epoch": 0.66, + "grad_norm": 0.33035773943610747, + "learning_rate": 5.405956402687974e-06, + "loss": 0.2458, "step": 14419 }, { - "epoch": 0.83, - "grad_norm": 0.8188194127786869, - "learning_rate": 1.5035386212365554e-06, - "loss": 0.4115, + "epoch": 0.66, + "grad_norm": 1.4305254747842355, + "learning_rate": 5.404634843155657e-06, + "loss": 0.7844, "step": 14420 }, { - "epoch": 0.83, - "grad_norm": 0.3522078195909208, - "learning_rate": 1.502557410067438e-06, - "loss": 0.2702, + "epoch": 0.66, + "grad_norm": 0.4699510254831701, + "learning_rate": 5.4033133853602916e-06, + "loss": 0.1365, "step": 14421 }, { - "epoch": 0.83, - "grad_norm": 0.6756378638949322, - "learning_rate": 1.5015764931643916e-06, - "loss": 0.4208, + "epoch": 0.66, + "grad_norm": 0.38911122580607826, + "learning_rate": 5.401992029331142e-06, + "loss": 0.2823, "step": 14422 }, { - "epoch": 0.83, - "grad_norm": 0.25982068134200115, - "learning_rate": 1.5005958705613833e-06, - "loss": 0.2264, + "epoch": 0.66, + "grad_norm": 0.3505614432978457, + "learning_rate": 5.400670775097449e-06, + "loss": 0.2846, "step": 14423 }, { - "epoch": 0.83, - "grad_norm": 0.3699918270593306, - "learning_rate": 1.4996155422923764e-06, - "loss": 0.3032, + "epoch": 0.66, + "grad_norm": 0.6976714553114252, + "learning_rate": 5.399349622688479e-06, + "loss": 0.3654, "step": 14424 }, { - "epoch": 0.83, - "grad_norm": 0.31773506752213887, - "learning_rate": 1.4986355083913184e-06, - "loss": 0.1678, + "epoch": 0.66, + "grad_norm": 0.3441830327473105, + "learning_rate": 5.398028572133476e-06, + "loss": 0.1555, "step": 14425 }, { - "epoch": 0.83, - "grad_norm": 0.28175897232877206, - "learning_rate": 1.4976557688921478e-06, - "loss": 0.2038, + "epoch": 0.66, + "grad_norm": 0.2687335254148413, + "learning_rate": 5.39670762346168e-06, + "loss": 0.2417, "step": 14426 }, { - "epoch": 0.83, - "grad_norm": 0.3882773152820447, - "learning_rate": 1.4966763238287885e-06, - "loss": 0.2799, + "epoch": 0.66, + "grad_norm": 0.3664692348103668, + "learning_rate": 5.395386776702341e-06, + "loss": 0.2197, "step": 14427 }, { - "epoch": 0.83, - "grad_norm": 0.6931088691932338, - "learning_rate": 1.4956971732351655e-06, - "loss": 0.3976, + "epoch": 0.66, + "grad_norm": 0.4769917503211647, + "learning_rate": 5.3940660318847e-06, + "loss": 0.1694, "step": 14428 }, { - "epoch": 0.83, - "grad_norm": 0.3691483478603107, - "learning_rate": 1.4947183171451841e-06, - "loss": 0.2152, + "epoch": 0.66, + "grad_norm": 0.47721874999718245, + "learning_rate": 5.392745389038003e-06, + "loss": 0.3108, "step": 14429 }, { - "epoch": 0.83, - "grad_norm": 0.38071812733088667, - "learning_rate": 1.4937397555927413e-06, - "loss": 0.2909, + "epoch": 0.66, + "grad_norm": 0.527427882970769, + "learning_rate": 5.391424848191478e-06, + "loss": 0.3072, "step": 14430 }, { - "epoch": 0.83, - "grad_norm": 0.2094977356719134, - "learning_rate": 1.4927614886117248e-06, - "loss": 0.1925, + "epoch": 0.66, + "grad_norm": 0.31976568891665114, + "learning_rate": 5.390104409374364e-06, + "loss": 0.2269, "step": 14431 }, { - "epoch": 0.83, - "grad_norm": 0.5582480502897131, - "learning_rate": 1.4917835162360107e-06, - "loss": 0.2479, + "epoch": 0.66, + "grad_norm": 0.368047782843205, + "learning_rate": 5.3887840726159e-06, + "loss": 0.2687, "step": 14432 }, { - "epoch": 0.83, - "grad_norm": 0.3547248993099505, - "learning_rate": 1.4908058384994684e-06, - "loss": 0.2633, + "epoch": 0.66, + "grad_norm": 0.5424350217804997, + "learning_rate": 5.387463837945308e-06, + "loss": 0.2768, "step": 14433 }, { - "epoch": 0.83, - "grad_norm": 0.33898191546731055, - "learning_rate": 1.4898284554359555e-06, - "loss": 0.2929, + "epoch": 0.66, + "grad_norm": 0.2690367076208699, + "learning_rate": 5.386143705391826e-06, + "loss": 0.2132, "step": 14434 }, { - "epoch": 0.83, - "grad_norm": 1.20134478839783, - "learning_rate": 1.4888513670793159e-06, - "loss": 0.7416, + "epoch": 0.66, + "grad_norm": 0.5433824635285751, + "learning_rate": 5.384823674984671e-06, + "loss": 0.3446, "step": 14435 }, { - "epoch": 0.83, - "grad_norm": 0.31287229804617556, - "learning_rate": 1.4878745734633859e-06, - "loss": 0.1874, + "epoch": 0.66, + "grad_norm": 0.7454809287827713, + "learning_rate": 5.383503746753072e-06, + "loss": 0.416, "step": 14436 }, { - "epoch": 0.83, - "grad_norm": 0.28716920169026183, - "learning_rate": 1.4868980746219953e-06, - "loss": 0.1744, + "epoch": 0.66, + "grad_norm": 0.5757490948557092, + "learning_rate": 5.382183920726254e-06, + "loss": 0.3496, "step": 14437 }, { - "epoch": 0.83, - "grad_norm": 0.3602326317426348, - "learning_rate": 1.485921870588959e-06, - "loss": 0.3254, + "epoch": 0.66, + "grad_norm": 0.3128011414640952, + "learning_rate": 5.38086419693343e-06, + "loss": 0.2422, "step": 14438 }, { - "epoch": 0.83, - "grad_norm": 0.34644487846977096, - "learning_rate": 1.4849459613980821e-06, - "loss": 0.2226, + "epoch": 0.66, + "grad_norm": 0.2836686810207828, + "learning_rate": 5.379544575403823e-06, + "loss": 0.1912, "step": 14439 }, { - "epoch": 0.83, - "grad_norm": 1.7099936095796247, - "learning_rate": 1.4839703470831568e-06, - "loss": 0.6051, + "epoch": 0.66, + "grad_norm": 0.5558319282259456, + "learning_rate": 5.378225056166639e-06, + "loss": 0.3545, "step": 14440 }, { - "epoch": 0.83, - "grad_norm": 1.1426420559205335, - "learning_rate": 1.4829950276779759e-06, - "loss": 0.6309, + "epoch": 0.66, + "grad_norm": 0.40245630060166104, + "learning_rate": 5.376905639251106e-06, + "loss": 0.2542, "step": 14441 }, { - "epoch": 0.83, - "grad_norm": 0.21521110544361674, - "learning_rate": 1.4820200032163102e-06, - "loss": 0.2127, + "epoch": 0.66, + "grad_norm": 0.3877637447004567, + "learning_rate": 5.375586324686423e-06, + "loss": 0.3077, "step": 14442 }, { - "epoch": 0.83, - "grad_norm": 0.5594517974144692, - "learning_rate": 1.481045273731926e-06, - "loss": 0.2786, + "epoch": 0.66, + "grad_norm": 0.8551002646958938, + "learning_rate": 5.374267112501806e-06, + "loss": 0.4739, "step": 14443 }, { - "epoch": 0.83, - "grad_norm": 0.6070929376142952, - "learning_rate": 1.480070839258575e-06, - "loss": 0.3338, + "epoch": 0.66, + "grad_norm": 0.32714889434704203, + "learning_rate": 5.37294800272645e-06, + "loss": 0.2214, "step": 14444 }, { - "epoch": 0.83, - "grad_norm": 0.2462055074718749, - "learning_rate": 1.479096699830007e-06, - "loss": 0.1646, + "epoch": 0.66, + "grad_norm": 0.3305863395603335, + "learning_rate": 5.371628995389568e-06, + "loss": 0.1931, "step": 14445 }, { - "epoch": 0.83, - "grad_norm": 0.3872583829909997, - "learning_rate": 1.4781228554799544e-06, - "loss": 0.3043, + "epoch": 0.66, + "grad_norm": 0.3380465840473011, + "learning_rate": 5.370310090520362e-06, + "loss": 0.2837, "step": 14446 }, { - "epoch": 0.83, - "grad_norm": 0.7217473632643663, - "learning_rate": 1.4771493062421393e-06, - "loss": 0.5007, + "epoch": 0.66, + "grad_norm": 0.3701302079731544, + "learning_rate": 5.3689912881480244e-06, + "loss": 0.2188, "step": 14447 }, { - "epoch": 0.83, - "grad_norm": 0.3543168866164041, - "learning_rate": 1.4761760521502788e-06, - "loss": 0.272, + "epoch": 0.66, + "grad_norm": 1.4614701293298256, + "learning_rate": 5.3676725883017576e-06, + "loss": 0.7856, "step": 14448 }, { - "epoch": 0.83, - "grad_norm": 0.694609659735516, - "learning_rate": 1.4752030932380723e-06, - "loss": 0.2679, + "epoch": 0.66, + "grad_norm": 0.7725963236210721, + "learning_rate": 5.366353991010758e-06, + "loss": 0.4335, "step": 14449 }, { - "epoch": 0.83, - "grad_norm": 0.2668551292038593, - "learning_rate": 1.4742304295392173e-06, - "loss": 0.2415, + "epoch": 0.66, + "grad_norm": 0.28972231802789833, + "learning_rate": 5.365035496304211e-06, + "loss": 0.2729, "step": 14450 }, { - "epoch": 0.83, - "grad_norm": 0.348151555424367, - "learning_rate": 1.4732580610873991e-06, - "loss": 0.2819, + "epoch": 0.66, + "grad_norm": 0.28137335268548186, + "learning_rate": 5.363717104211315e-06, + "loss": 0.121, "step": 14451 }, { - "epoch": 0.83, - "grad_norm": 0.4865809063782709, - "learning_rate": 1.4722859879162831e-06, - "loss": 0.164, + "epoch": 0.66, + "grad_norm": 0.5169297125413294, + "learning_rate": 5.3623988147612495e-06, + "loss": 0.3273, "step": 14452 }, { - "epoch": 0.83, - "grad_norm": 1.3427781510155108, - "learning_rate": 1.471314210059539e-06, - "loss": 0.8253, + "epoch": 0.66, + "grad_norm": 0.4311696837405616, + "learning_rate": 5.361080627983205e-06, + "loss": 0.3157, "step": 14453 }, { - "epoch": 0.83, - "grad_norm": 0.2627245576209838, - "learning_rate": 1.4703427275508175e-06, - "loss": 0.2502, + "epoch": 0.66, + "grad_norm": 0.3319774606502584, + "learning_rate": 5.3597625439063685e-06, + "loss": 0.2323, "step": 14454 }, { - "epoch": 0.83, - "grad_norm": 0.38749675227945757, - "learning_rate": 1.4693715404237595e-06, - "loss": 0.2567, + "epoch": 0.66, + "grad_norm": 0.5716281914623899, + "learning_rate": 5.358444562559912e-06, + "loss": 0.3522, "step": 14455 }, { - "epoch": 0.83, - "grad_norm": 0.48887376832848684, - "learning_rate": 1.4684006487119996e-06, - "loss": 0.2749, + "epoch": 0.66, + "grad_norm": 0.42296957884115344, + "learning_rate": 5.357126683973024e-06, + "loss": 0.3232, "step": 14456 }, { - "epoch": 0.83, - "grad_norm": 0.23372320232067798, - "learning_rate": 1.4674300524491548e-06, - "loss": 0.1902, + "epoch": 0.66, + "grad_norm": 0.6845174987717982, + "learning_rate": 5.355808908174868e-06, + "loss": 0.3203, "step": 14457 }, { - "epoch": 0.83, - "grad_norm": 0.3504977034974888, - "learning_rate": 1.466459751668843e-06, - "loss": 0.2363, + "epoch": 0.66, + "grad_norm": 0.305091352006226, + "learning_rate": 5.354491235194635e-06, + "loss": 0.2421, "step": 14458 }, { - "epoch": 0.83, - "grad_norm": 1.1512347307415474, - "learning_rate": 1.4654897464046624e-06, - "loss": 0.7318, + "epoch": 0.66, + "grad_norm": 0.28851302489540775, + "learning_rate": 5.353173665061485e-06, + "loss": 0.2381, "step": 14459 }, { - "epoch": 0.83, - "grad_norm": 0.33497458339458847, - "learning_rate": 1.4645200366902056e-06, - "loss": 0.2622, + "epoch": 0.66, + "grad_norm": 1.113074031237448, + "learning_rate": 5.351856197804595e-06, + "loss": 0.385, "step": 14460 }, { - "epoch": 0.83, - "grad_norm": 0.6891307734834573, - "learning_rate": 1.4635506225590511e-06, - "loss": 0.3649, + "epoch": 0.66, + "grad_norm": 0.9076056142393685, + "learning_rate": 5.350538833453125e-06, + "loss": 0.476, "step": 14461 }, { - "epoch": 0.83, - "grad_norm": 0.24648296086863, - "learning_rate": 1.4625815040447733e-06, - "loss": 0.1953, + "epoch": 0.66, + "grad_norm": 0.2719306845380426, + "learning_rate": 5.349221572036244e-06, + "loss": 0.2645, "step": 14462 }, { - "epoch": 0.83, - "grad_norm": 0.3198739485686858, - "learning_rate": 1.4616126811809305e-06, - "loss": 0.2548, + "epoch": 0.66, + "grad_norm": 0.6808419677675623, + "learning_rate": 5.3479044135831185e-06, + "loss": 0.3856, "step": 14463 }, { - "epoch": 0.83, - "grad_norm": 0.4785902437377649, - "learning_rate": 1.4606441540010742e-06, - "loss": 0.2262, + "epoch": 0.66, + "grad_norm": 0.2405823195571114, + "learning_rate": 5.346587358122901e-06, + "loss": 0.0721, "step": 14464 }, { - "epoch": 0.83, - "grad_norm": 0.42064172693904334, - "learning_rate": 1.4596759225387401e-06, - "loss": 0.2391, + "epoch": 0.66, + "grad_norm": 0.2775846117014136, + "learning_rate": 5.34527040568476e-06, + "loss": 0.2347, "step": 14465 }, { - "epoch": 0.83, - "grad_norm": 0.3519446079196051, - "learning_rate": 1.4587079868274644e-06, - "loss": 0.2952, + "epoch": 0.66, + "grad_norm": 0.38389553846865637, + "learning_rate": 5.343953556297841e-06, + "loss": 0.3036, "step": 14466 }, { - "epoch": 0.83, - "grad_norm": 0.4042615656613845, - "learning_rate": 1.4577403469007645e-06, - "loss": 0.3237, + "epoch": 0.66, + "grad_norm": 0.4198451374664271, + "learning_rate": 5.3426368099913025e-06, + "loss": 0.2907, "step": 14467 }, { - "epoch": 0.83, - "grad_norm": 0.35400671879270007, - "learning_rate": 1.4567730027921489e-06, - "loss": 0.0845, + "epoch": 0.66, + "grad_norm": 0.34108744701743593, + "learning_rate": 5.3413201667943014e-06, + "loss": 0.2631, "step": 14468 }, { - "epoch": 0.83, - "grad_norm": 0.3576298559652004, - "learning_rate": 1.4558059545351144e-06, - "loss": 0.2974, + "epoch": 0.66, + "grad_norm": 0.7997617181162744, + "learning_rate": 5.340003626735977e-06, + "loss": 0.3685, "step": 14469 }, { - "epoch": 0.83, - "grad_norm": 0.2690274370266391, - "learning_rate": 1.4548392021631541e-06, - "loss": 0.264, + "epoch": 0.66, + "grad_norm": 0.26168178214980903, + "learning_rate": 5.33868718984548e-06, + "loss": 0.1844, "step": 14470 }, { - "epoch": 0.83, - "grad_norm": 1.260476076309206, - "learning_rate": 1.4538727457097447e-06, - "loss": 0.7678, + "epoch": 0.66, + "grad_norm": 0.3791899429974558, + "learning_rate": 5.337370856151958e-06, + "loss": 0.2855, "step": 14471 }, { - "epoch": 0.83, - "grad_norm": 0.32094729111190423, - "learning_rate": 1.4529065852083557e-06, - "loss": 0.2062, + "epoch": 0.66, + "grad_norm": 0.4708361080623944, + "learning_rate": 5.336054625684552e-06, + "loss": 0.2876, "step": 14472 }, { - "epoch": 0.83, - "grad_norm": 0.618603149339349, - "learning_rate": 1.451940720692443e-06, - "loss": 0.3733, + "epoch": 0.66, + "grad_norm": 0.3888497267244696, + "learning_rate": 5.334738498472405e-06, + "loss": 0.2202, "step": 14473 }, { - "epoch": 0.83, - "grad_norm": 0.3205669864381226, - "learning_rate": 1.450975152195454e-06, - "loss": 0.2918, + "epoch": 0.66, + "grad_norm": 0.31617708457312554, + "learning_rate": 5.333422474544641e-06, + "loss": 0.2766, "step": 14474 }, { - "epoch": 0.83, - "grad_norm": 0.3185380448055098, - "learning_rate": 1.4500098797508289e-06, - "loss": 0.1982, + "epoch": 0.66, + "grad_norm": 0.9056347354067686, + "learning_rate": 5.332106553930414e-06, + "loss": 0.387, "step": 14475 }, { - "epoch": 0.83, - "grad_norm": 0.24713688538065756, - "learning_rate": 1.4490449033919952e-06, - "loss": 0.1745, + "epoch": 0.67, + "grad_norm": 0.41903550969979786, + "learning_rate": 5.330790736658846e-06, + "loss": 0.2419, "step": 14476 }, { - "epoch": 0.83, - "grad_norm": 0.7044439761827327, - "learning_rate": 1.4480802231523682e-06, - "loss": 0.4051, + "epoch": 0.67, + "grad_norm": 0.25613455556744275, + "learning_rate": 5.329475022759074e-06, + "loss": 0.1646, "step": 14477 }, { - "epoch": 0.83, - "grad_norm": 0.22536764749533747, - "learning_rate": 1.447115839065354e-06, - "loss": 0.2154, + "epoch": 0.67, + "grad_norm": 0.39574753593715833, + "learning_rate": 5.32815941226022e-06, + "loss": 0.3361, "step": 14478 }, { - "epoch": 0.83, - "grad_norm": 0.754993666762245, - "learning_rate": 1.446151751164352e-06, - "loss": 0.4199, + "epoch": 0.67, + "grad_norm": 0.9777109415088288, + "learning_rate": 5.326843905191413e-06, + "loss": 0.5254, "step": 14479 }, { - "epoch": 0.83, - "grad_norm": 1.5464917011221442, - "learning_rate": 1.4451879594827467e-06, - "loss": 0.4158, + "epoch": 0.67, + "grad_norm": 0.4025870360630933, + "learning_rate": 5.325528501581783e-06, + "loss": 0.2175, "step": 14480 }, { - "epoch": 0.83, - "grad_norm": 0.22848279331701107, - "learning_rate": 1.444224464053916e-06, - "loss": 0.1559, + "epoch": 0.67, + "grad_norm": 0.5087201400407269, + "learning_rate": 5.324213201460442e-06, + "loss": 0.3129, "step": 14481 }, { - "epoch": 0.83, - "grad_norm": 0.26781759688718365, - "learning_rate": 1.44326126491122e-06, - "loss": 0.2418, + "epoch": 0.67, + "grad_norm": 0.4982503374275669, + "learning_rate": 5.322898004856518e-06, + "loss": 0.3086, "step": 14482 }, { - "epoch": 0.83, - "grad_norm": 0.7385480231144739, - "learning_rate": 1.4422983620880215e-06, - "loss": 0.4098, + "epoch": 0.67, + "grad_norm": 0.21573001542193646, + "learning_rate": 5.32158291179912e-06, + "loss": 0.1336, "step": 14483 }, { - "epoch": 0.83, - "grad_norm": 0.5600152349811515, - "learning_rate": 1.4413357556176633e-06, - "loss": 0.3144, + "epoch": 0.67, + "grad_norm": 1.4875484047752525, + "learning_rate": 5.320267922317368e-06, + "loss": 0.8108, "step": 14484 }, { - "epoch": 0.83, - "grad_norm": 0.38866276568802743, - "learning_rate": 1.4403734455334816e-06, - "loss": 0.2366, + "epoch": 0.67, + "grad_norm": 0.5569117814769349, + "learning_rate": 5.318953036440377e-06, + "loss": 0.368, "step": 14485 }, { - "epoch": 0.83, - "grad_norm": 0.3608062477208995, - "learning_rate": 1.4394114318687947e-06, - "loss": 0.2916, + "epoch": 0.67, + "grad_norm": 0.2804371677130325, + "learning_rate": 5.317638254197252e-06, + "loss": 0.2199, "step": 14486 }, { - "epoch": 0.83, - "grad_norm": 0.39215757913532784, - "learning_rate": 1.4384497146569242e-06, - "loss": 0.2683, + "epoch": 0.67, + "grad_norm": 1.0161243814057206, + "learning_rate": 5.3163235756171015e-06, + "loss": 0.4476, "step": 14487 }, { - "epoch": 0.83, - "grad_norm": 0.28871817789259424, - "learning_rate": 1.437488293931173e-06, - "loss": 0.1842, + "epoch": 0.67, + "grad_norm": 0.5548137285529009, + "learning_rate": 5.315009000729032e-06, + "loss": 0.2636, "step": 14488 }, { - "epoch": 0.83, - "grad_norm": 0.7837969352840899, - "learning_rate": 1.436527169724833e-06, - "loss": 0.3826, + "epoch": 0.67, + "grad_norm": 0.35132341809486545, + "learning_rate": 5.313694529562154e-06, + "loss": 0.2707, "step": 14489 }, { - "epoch": 0.83, - "grad_norm": 0.252579482719496, - "learning_rate": 1.4355663420711863e-06, - "loss": 0.2509, + "epoch": 0.67, + "grad_norm": 0.27502581392636205, + "learning_rate": 5.312380162145561e-06, + "loss": 0.2055, "step": 14490 }, { - "epoch": 0.83, - "grad_norm": 0.38934719167353166, - "learning_rate": 1.434605811003511e-06, - "loss": 0.179, + "epoch": 0.67, + "grad_norm": 0.7327273411749387, + "learning_rate": 5.311065898508346e-06, + "loss": 0.3972, "step": 14491 }, { - "epoch": 0.83, - "grad_norm": 1.327017593625259, - "learning_rate": 1.4336455765550684e-06, - "loss": 0.5127, + "epoch": 0.67, + "grad_norm": 0.42404795267894624, + "learning_rate": 5.309751738679621e-06, + "loss": 0.2675, "step": 14492 }, { - "epoch": 0.83, - "grad_norm": 0.3263390069030087, - "learning_rate": 1.4326856387591114e-06, - "loss": 0.1991, + "epoch": 0.67, + "grad_norm": 0.37715546698948915, + "learning_rate": 5.308437682688467e-06, + "loss": 0.2543, "step": 14493 }, { - "epoch": 0.83, - "grad_norm": 0.26938715579382766, - "learning_rate": 1.4317259976488806e-06, - "loss": 0.2504, + "epoch": 0.67, + "grad_norm": 0.4790058220320238, + "learning_rate": 5.307123730563984e-06, + "loss": 0.3344, "step": 14494 }, { - "epoch": 0.83, - "grad_norm": 0.642656981752686, - "learning_rate": 1.4307666532576115e-06, - "loss": 0.3615, + "epoch": 0.67, + "grad_norm": 0.3941499575352521, + "learning_rate": 5.305809882335256e-06, + "loss": 0.2606, "step": 14495 }, { - "epoch": 0.83, - "grad_norm": 0.31108348734056135, - "learning_rate": 1.429807605618525e-06, - "loss": 0.2628, + "epoch": 0.67, + "grad_norm": 0.3878626159935822, + "learning_rate": 5.304496138031373e-06, + "loss": 0.2077, "step": 14496 }, { - "epoch": 0.83, - "grad_norm": 0.5051662006316499, - "learning_rate": 1.4288488547648328e-06, - "loss": 0.2379, + "epoch": 0.67, + "grad_norm": 0.6185891648275387, + "learning_rate": 5.303182497681423e-06, + "loss": 0.3228, "step": 14497 }, { - "epoch": 0.83, - "grad_norm": 0.37356454597055017, - "learning_rate": 1.4278904007297356e-06, - "loss": 0.2442, + "epoch": 0.67, + "grad_norm": 0.30658686780698813, + "learning_rate": 5.3018689613144825e-06, + "loss": 0.2467, "step": 14498 }, { - "epoch": 0.83, - "grad_norm": 0.3072156818751481, - "learning_rate": 1.4269322435464229e-06, - "loss": 0.2397, + "epoch": 0.67, + "grad_norm": 0.39518966408365946, + "learning_rate": 5.3005555289596385e-06, + "loss": 0.157, "step": 14499 }, { - "epoch": 0.83, - "grad_norm": 0.9057485951009321, - "learning_rate": 1.425974383248081e-06, - "loss": 0.516, + "epoch": 0.67, + "grad_norm": 1.3428400026954694, + "learning_rate": 5.299242200645959e-06, + "loss": 0.5602, "step": 14500 }, { - "epoch": 0.83, - "grad_norm": 0.3275530243981576, - "learning_rate": 1.425016819867876e-06, - "loss": 0.2634, + "epoch": 0.67, + "grad_norm": 0.2521340364324956, + "learning_rate": 5.2979289764025336e-06, + "loss": 0.1875, "step": 14501 }, { - "epoch": 0.83, - "grad_norm": 0.5692330083855973, - "learning_rate": 1.4240595534389712e-06, - "loss": 0.2918, + "epoch": 0.67, + "grad_norm": 0.34141494819348706, + "learning_rate": 5.296615856258428e-06, + "loss": 0.2925, "step": 14502 }, { - "epoch": 0.83, - "grad_norm": 0.22938802148421475, - "learning_rate": 1.4231025839945123e-06, - "loss": 0.1967, + "epoch": 0.67, + "grad_norm": 0.9647587631860546, + "learning_rate": 5.295302840242711e-06, + "loss": 0.3035, "step": 14503 }, { - "epoch": 0.83, - "grad_norm": 1.3925463484862073, - "learning_rate": 1.422145911567645e-06, - "loss": 0.192, + "epoch": 0.67, + "grad_norm": 0.34425263885839713, + "learning_rate": 5.293989928384454e-06, + "loss": 0.2665, "step": 14504 }, { - "epoch": 0.83, - "grad_norm": 0.661586160240331, - "learning_rate": 1.4211895361914961e-06, - "loss": 0.2866, + "epoch": 0.67, + "grad_norm": 0.5078619915173804, + "learning_rate": 5.292677120712726e-06, + "loss": 0.2804, "step": 14505 }, { - "epoch": 0.83, - "grad_norm": 0.24761832146932847, - "learning_rate": 1.4202334578991838e-06, - "loss": 0.2535, + "epoch": 0.67, + "grad_norm": 0.37020006467979166, + "learning_rate": 5.2913644172565915e-06, + "loss": 0.233, "step": 14506 }, { - "epoch": 0.83, - "grad_norm": 0.6307606543288709, - "learning_rate": 1.419277676723816e-06, - "loss": 0.3035, + "epoch": 0.67, + "grad_norm": 0.3684262616450512, + "learning_rate": 5.290051818045108e-06, + "loss": 0.2699, "step": 14507 }, { - "epoch": 0.83, - "grad_norm": 0.3912702731263923, - "learning_rate": 1.4183221926984958e-06, - "loss": 0.294, + "epoch": 0.67, + "grad_norm": 0.82036337110452, + "learning_rate": 5.288739323107337e-06, + "loss": 0.4121, "step": 14508 }, { - "epoch": 0.83, - "grad_norm": 0.21325530333087425, - "learning_rate": 1.4173670058563082e-06, - "loss": 0.2035, + "epoch": 0.67, + "grad_norm": 0.37561320921871166, + "learning_rate": 5.2874269324723406e-06, + "loss": 0.2739, "step": 14509 }, { - "epoch": 0.83, - "grad_norm": 0.46726091602695835, - "learning_rate": 1.4164121162303335e-06, - "loss": 0.2975, + "epoch": 0.67, + "grad_norm": 0.40943853202100655, + "learning_rate": 5.286114646169166e-06, + "loss": 0.2491, "step": 14510 }, { - "epoch": 0.83, - "grad_norm": 0.4739114950446037, - "learning_rate": 1.4154575238536373e-06, - "loss": 0.1686, + "epoch": 0.67, + "grad_norm": 0.2966049707262867, + "learning_rate": 5.284802464226874e-06, + "loss": 0.1922, "step": 14511 }, { - "epoch": 0.83, - "grad_norm": 0.4402434355675197, - "learning_rate": 1.4145032287592753e-06, - "loss": 0.3343, + "epoch": 0.67, + "grad_norm": 1.274682575611519, + "learning_rate": 5.283490386674507e-06, + "loss": 0.2488, "step": 14512 }, { - "epoch": 0.83, - "grad_norm": 0.5244827607300405, - "learning_rate": 1.4135492309803e-06, - "loss": 0.3307, + "epoch": 0.67, + "grad_norm": 0.4109310191767567, + "learning_rate": 5.282178413541117e-06, + "loss": 0.2427, "step": 14513 }, { - "epoch": 0.83, - "grad_norm": 0.277632422490554, - "learning_rate": 1.4125955305497453e-06, - "loss": 0.2066, + "epoch": 0.67, + "grad_norm": 0.3613707744784703, + "learning_rate": 5.280866544855753e-06, + "loss": 0.3063, "step": 14514 }, { - "epoch": 0.83, - "grad_norm": 0.32009804462674135, - "learning_rate": 1.4116421275006386e-06, - "loss": 0.2483, + "epoch": 0.67, + "grad_norm": 0.9062320464961248, + "learning_rate": 5.279554780647451e-06, + "loss": 0.4564, "step": 14515 }, { - "epoch": 0.83, - "grad_norm": 0.6266221865588981, - "learning_rate": 1.410689021865993e-06, - "loss": 0.2119, + "epoch": 0.67, + "grad_norm": 0.31368508601698064, + "learning_rate": 5.278243120945262e-06, + "loss": 0.214, "step": 14516 }, { - "epoch": 0.83, - "grad_norm": 0.3426461166367405, - "learning_rate": 1.4097362136788196e-06, - "loss": 0.225, + "epoch": 0.67, + "grad_norm": 0.2505901747215737, + "learning_rate": 5.276931565778212e-06, + "loss": 0.2027, "step": 14517 }, { - "epoch": 0.83, - "grad_norm": 0.32656014034310793, - "learning_rate": 1.408783702972112e-06, - "loss": 0.2931, + "epoch": 0.67, + "grad_norm": 1.5737998800004696, + "learning_rate": 5.27562011517535e-06, + "loss": 0.709, "step": 14518 }, { - "epoch": 0.83, - "grad_norm": 0.8290687136751642, - "learning_rate": 1.4078314897788558e-06, - "loss": 0.4557, + "epoch": 0.67, + "grad_norm": 0.3555593864132874, + "learning_rate": 5.274308769165708e-06, + "loss": 0.2039, "step": 14519 }, { - "epoch": 0.83, - "grad_norm": 0.3310944158287448, - "learning_rate": 1.4068795741320241e-06, - "loss": 0.152, + "epoch": 0.67, + "grad_norm": 0.8154670788423888, + "learning_rate": 5.272997527778311e-06, + "loss": 0.398, "step": 14520 }, { - "epoch": 0.83, - "grad_norm": 0.23025651377461948, - "learning_rate": 1.4059279560645845e-06, - "loss": 0.2156, + "epoch": 0.67, + "grad_norm": 0.39026118425267137, + "learning_rate": 5.2716863910421926e-06, + "loss": 0.2943, "step": 14521 }, { - "epoch": 0.83, - "grad_norm": 0.3482816224844952, - "learning_rate": 1.4049766356094897e-06, - "loss": 0.2382, + "epoch": 0.67, + "grad_norm": 0.32156329941376555, + "learning_rate": 5.270375358986379e-06, + "loss": 0.2022, "step": 14522 }, { - "epoch": 0.83, - "grad_norm": 0.6896887320183515, - "learning_rate": 1.4040256127996842e-06, - "loss": 0.345, + "epoch": 0.67, + "grad_norm": 0.3210097030999681, + "learning_rate": 5.269064431639901e-06, + "loss": 0.1968, "step": 14523 }, { - "epoch": 0.83, - "grad_norm": 0.376874453876573, - "learning_rate": 1.403074887668101e-06, - "loss": 0.2253, + "epoch": 0.67, + "grad_norm": 0.9274995784342149, + "learning_rate": 5.2677536090317726e-06, + "loss": 0.4433, "step": 14524 }, { - "epoch": 0.83, - "grad_norm": 0.34520731206085, - "learning_rate": 1.4021244602476658e-06, - "loss": 0.3182, + "epoch": 0.67, + "grad_norm": 0.3288621853541652, + "learning_rate": 5.266442891191024e-06, + "loss": 0.2715, "step": 14525 }, { - "epoch": 0.83, - "grad_norm": 0.590729596317791, - "learning_rate": 1.401174330571291e-06, - "loss": 0.4018, + "epoch": 0.67, + "grad_norm": 0.3662531061308901, + "learning_rate": 5.2651322781466606e-06, + "loss": 0.2563, "step": 14526 }, { - "epoch": 0.83, - "grad_norm": 0.24916310579425358, - "learning_rate": 1.4002244986718793e-06, - "loss": 0.1518, + "epoch": 0.67, + "grad_norm": 1.0358854833762872, + "learning_rate": 5.263821769927707e-06, + "loss": 0.6452, "step": 14527 }, { - "epoch": 0.83, - "grad_norm": 0.9781155735931566, - "learning_rate": 1.3992749645823224e-06, - "loss": 0.4291, + "epoch": 0.67, + "grad_norm": 0.6295447392154762, + "learning_rate": 5.262511366563179e-06, + "loss": 0.3605, "step": 14528 }, { - "epoch": 0.83, - "grad_norm": 0.37701695438860766, - "learning_rate": 1.3983257283355044e-06, - "loss": 0.2982, + "epoch": 0.67, + "grad_norm": 0.2417549454054387, + "learning_rate": 5.261201068082078e-06, + "loss": 0.2088, "step": 14529 }, { - "epoch": 0.83, - "grad_norm": 0.2968145176708719, - "learning_rate": 1.3973767899642976e-06, - "loss": 0.2351, + "epoch": 0.67, + "grad_norm": 0.46958482547682684, + "learning_rate": 5.259890874513418e-06, + "loss": 0.2323, "step": 14530 }, { - "epoch": 0.83, - "grad_norm": 1.2241899528039462, - "learning_rate": 1.396428149501562e-06, - "loss": 0.8343, + "epoch": 0.67, + "grad_norm": 0.6570349794787905, + "learning_rate": 5.258580785886212e-06, + "loss": 0.3484, "step": 14531 }, { - "epoch": 0.83, - "grad_norm": 0.6203633467767298, - "learning_rate": 1.3954798069801468e-06, - "loss": 0.4185, + "epoch": 0.67, + "grad_norm": 0.39188773039830543, + "learning_rate": 5.2572708022294504e-06, + "loss": 0.2534, "step": 14532 }, { - "epoch": 0.83, - "grad_norm": 0.2830199485054341, - "learning_rate": 1.394531762432899e-06, - "loss": 0.2174, + "epoch": 0.67, + "grad_norm": 0.3870212383853106, + "learning_rate": 5.255960923572148e-06, + "loss": 0.3395, "step": 14533 }, { - "epoch": 0.84, - "grad_norm": 0.3082002175777337, - "learning_rate": 1.3935840158926461e-06, - "loss": 0.2218, + "epoch": 0.67, + "grad_norm": 0.4008293161846491, + "learning_rate": 5.2546511499432885e-06, + "loss": 0.2869, "step": 14534 }, { - "epoch": 0.84, - "grad_norm": 0.5911035214641597, - "learning_rate": 1.3926365673922082e-06, - "loss": 0.3011, + "epoch": 0.67, + "grad_norm": 0.24846248800658954, + "learning_rate": 5.253341481371888e-06, + "loss": 0.1418, "step": 14535 }, { - "epoch": 0.84, - "grad_norm": 0.40215627658380904, - "learning_rate": 1.3916894169643969e-06, - "loss": 0.3002, + "epoch": 0.67, + "grad_norm": 0.8344250754286472, + "learning_rate": 5.252031917886929e-06, + "loss": 0.4467, "step": 14536 }, { - "epoch": 0.84, - "grad_norm": 0.3721036297309638, - "learning_rate": 1.390742564642007e-06, - "loss": 0.2617, + "epoch": 0.67, + "grad_norm": 0.3129821689831803, + "learning_rate": 5.25072245951741e-06, + "loss": 0.2674, "step": 14537 }, { - "epoch": 0.84, - "grad_norm": 0.6464102731862245, - "learning_rate": 1.3897960104578357e-06, - "loss": 0.3488, + "epoch": 0.67, + "grad_norm": 0.3787801011056638, + "learning_rate": 5.249413106292316e-06, + "loss": 0.3206, "step": 14538 }, { - "epoch": 0.84, - "grad_norm": 0.4497840173503336, - "learning_rate": 1.3888497544446578e-06, - "loss": 0.2986, + "epoch": 0.67, + "grad_norm": 1.1539678575075514, + "learning_rate": 5.248103858240636e-06, + "loss": 0.4763, "step": 14539 }, { - "epoch": 0.84, - "grad_norm": 0.3635716294730615, - "learning_rate": 1.3879037966352426e-06, - "loss": 0.192, + "epoch": 0.67, + "grad_norm": 0.39929259462619465, + "learning_rate": 5.246794715391361e-06, + "loss": 0.2769, "step": 14540 }, { - "epoch": 0.84, - "grad_norm": 0.31564871045658416, - "learning_rate": 1.3869581370623464e-06, - "loss": 0.2819, + "epoch": 0.67, + "grad_norm": 0.4039984769911849, + "learning_rate": 5.245485677773465e-06, + "loss": 0.2942, "step": 14541 }, { - "epoch": 0.84, - "grad_norm": 0.35301383748174164, - "learning_rate": 1.3860127757587215e-06, - "loss": 0.2734, + "epoch": 0.67, + "grad_norm": 0.316524545467977, + "learning_rate": 5.2441767454159384e-06, + "loss": 0.1883, "step": 14542 }, { - "epoch": 0.84, - "grad_norm": 0.9236845077921836, - "learning_rate": 1.3850677127571033e-06, - "loss": 0.444, + "epoch": 0.67, + "grad_norm": 0.4009557959596617, + "learning_rate": 5.24286791834775e-06, + "loss": 0.3091, "step": 14543 }, { - "epoch": 0.84, - "grad_norm": 0.8576242151750284, - "learning_rate": 1.3841229480902207e-06, - "loss": 0.449, + "epoch": 0.67, + "grad_norm": 0.6263656068633076, + "learning_rate": 5.241559196597882e-06, + "loss": 0.3374, "step": 14544 }, { - "epoch": 0.84, - "grad_norm": 0.317992615475425, - "learning_rate": 1.3831784817907867e-06, - "loss": 0.27, + "epoch": 0.67, + "grad_norm": 0.32813763935887025, + "learning_rate": 5.240250580195311e-06, + "loss": 0.2579, "step": 14545 }, { - "epoch": 0.84, - "grad_norm": 0.4192884397471493, - "learning_rate": 1.382234313891515e-06, - "loss": 0.3332, + "epoch": 0.67, + "grad_norm": 0.7308668140176084, + "learning_rate": 5.238942069169e-06, + "loss": 0.3127, "step": 14546 }, { - "epoch": 0.84, - "grad_norm": 0.281125353055018, - "learning_rate": 1.3812904444250973e-06, - "loss": 0.1339, + "epoch": 0.67, + "grad_norm": 0.4260901109746353, + "learning_rate": 5.237633663547923e-06, + "loss": 0.311, "step": 14547 }, { - "epoch": 0.84, - "grad_norm": 0.4240781429981899, - "learning_rate": 1.3803468734242208e-06, - "loss": 0.2947, + "epoch": 0.67, + "grad_norm": 0.3713509311804359, + "learning_rate": 5.236325363361051e-06, + "loss": 0.237, "step": 14548 }, { - "epoch": 0.84, - "grad_norm": 0.34789522278090906, - "learning_rate": 1.3794036009215628e-06, - "loss": 0.3019, + "epoch": 0.67, + "grad_norm": 0.4083938079639334, + "learning_rate": 5.23501716863734e-06, + "loss": 0.2869, "step": 14549 }, { - "epoch": 0.84, - "grad_norm": 0.6447680621171787, - "learning_rate": 1.3784606269497835e-06, - "loss": 0.1991, + "epoch": 0.67, + "grad_norm": 0.29802925692894433, + "learning_rate": 5.23370907940576e-06, + "loss": 0.2354, "step": 14550 }, { - "epoch": 0.84, - "grad_norm": 0.42031524895275557, - "learning_rate": 1.377517951541545e-06, - "loss": 0.2907, + "epoch": 0.67, + "grad_norm": 1.4196772923541505, + "learning_rate": 5.232401095695259e-06, + "loss": 0.8012, "step": 14551 }, { - "epoch": 0.84, - "grad_norm": 0.5888956245708498, - "learning_rate": 1.3765755747294906e-06, - "loss": 0.357, + "epoch": 0.67, + "grad_norm": 0.3332856859102056, + "learning_rate": 5.231093217534812e-06, + "loss": 0.1123, "step": 14552 }, { - "epoch": 0.84, - "grad_norm": 0.22556962698291796, - "learning_rate": 1.3756334965462502e-06, - "loss": 0.1829, + "epoch": 0.67, + "grad_norm": 0.28690745049150934, + "learning_rate": 5.229785444953361e-06, + "loss": 0.2608, "step": 14553 }, { - "epoch": 0.84, - "grad_norm": 0.2882609123569876, - "learning_rate": 1.3746917170244522e-06, - "loss": 0.2108, + "epoch": 0.67, + "grad_norm": 0.62958333930175, + "learning_rate": 5.228477777979865e-06, + "loss": 0.3793, "step": 14554 }, { - "epoch": 0.84, - "grad_norm": 1.3360077461689062, - "learning_rate": 1.3737502361967092e-06, - "loss": 0.6013, + "epoch": 0.67, + "grad_norm": 0.23310095082284424, + "learning_rate": 5.2271702166432725e-06, + "loss": 0.1429, "step": 14555 }, { - "epoch": 0.84, - "grad_norm": 0.8700106534469142, - "learning_rate": 1.3728090540956241e-06, - "loss": 0.2877, + "epoch": 0.67, + "grad_norm": 0.6735298537510488, + "learning_rate": 5.225862760972524e-06, + "loss": 0.3429, "step": 14556 }, { - "epoch": 0.84, - "grad_norm": 0.2599726917673043, - "learning_rate": 1.3718681707537895e-06, - "loss": 0.2489, + "epoch": 0.67, + "grad_norm": 0.38710319732601317, + "learning_rate": 5.22455541099658e-06, + "loss": 0.2976, "step": 14557 }, { - "epoch": 0.84, - "grad_norm": 0.4713611148460899, - "learning_rate": 1.3709275862037908e-06, - "loss": 0.3234, + "epoch": 0.67, + "grad_norm": 0.3838107680826825, + "learning_rate": 5.223248166744372e-06, + "loss": 0.2079, "step": 14558 }, { - "epoch": 0.84, - "grad_norm": 0.2975155229884535, - "learning_rate": 1.3699873004781983e-06, - "loss": 0.1767, + "epoch": 0.67, + "grad_norm": 0.5357523091925057, + "learning_rate": 5.221941028244851e-06, + "loss": 0.3661, "step": 14559 }, { - "epoch": 0.84, - "grad_norm": 0.31430545435525203, - "learning_rate": 1.369047313609575e-06, - "loss": 0.1938, + "epoch": 0.67, + "grad_norm": 0.6791392293898774, + "learning_rate": 5.220633995526946e-06, + "loss": 0.368, "step": 14560 }, { - "epoch": 0.84, - "grad_norm": 0.3427307670134785, - "learning_rate": 1.3681076256304715e-06, - "loss": 0.3059, + "epoch": 0.67, + "grad_norm": 0.22039308946115338, + "learning_rate": 5.2193270686195975e-06, + "loss": 0.1771, "step": 14561 }, { - "epoch": 0.84, - "grad_norm": 0.8615199044778867, - "learning_rate": 1.3671682365734273e-06, - "loss": 0.4229, + "epoch": 0.67, + "grad_norm": 0.39682532633604495, + "learning_rate": 5.218020247551745e-06, + "loss": 0.2375, "step": 14562 }, { - "epoch": 0.84, - "grad_norm": 0.31342070762916385, - "learning_rate": 1.3662291464709787e-06, - "loss": 0.2215, + "epoch": 0.67, + "grad_norm": 1.1537155973096151, + "learning_rate": 5.216713532352311e-06, + "loss": 0.7072, "step": 14563 }, { - "epoch": 0.84, - "grad_norm": 0.8984623397142127, - "learning_rate": 1.365290355355644e-06, - "loss": 0.4016, + "epoch": 0.67, + "grad_norm": 0.9202677061614553, + "learning_rate": 5.215406923050228e-06, + "loss": 0.4504, "step": 14564 }, { - "epoch": 0.84, - "grad_norm": 0.2759744627759715, - "learning_rate": 1.3643518632599317e-06, - "loss": 0.2307, + "epoch": 0.67, + "grad_norm": 0.24941903722594946, + "learning_rate": 5.214100419674426e-06, + "loss": 0.2285, "step": 14565 }, { - "epoch": 0.84, - "grad_norm": 0.2279301524681025, - "learning_rate": 1.3634136702163415e-06, - "loss": 0.1532, + "epoch": 0.67, + "grad_norm": 0.6655004754039319, + "learning_rate": 5.212794022253831e-06, + "loss": 0.4032, "step": 14566 }, { - "epoch": 0.84, - "grad_norm": 1.1406842605842138, - "learning_rate": 1.362475776257367e-06, - "loss": 0.7491, + "epoch": 0.67, + "grad_norm": 0.2873851835931111, + "learning_rate": 5.2114877308173615e-06, + "loss": 0.1751, "step": 14567 }, { - "epoch": 0.84, - "grad_norm": 0.586708889053408, - "learning_rate": 1.3615381814154848e-06, - "loss": 0.3272, + "epoch": 0.67, + "grad_norm": 0.3850851030816268, + "learning_rate": 5.210181545393933e-06, + "loss": 0.2249, "step": 14568 }, { - "epoch": 0.84, - "grad_norm": 0.28618586482601904, - "learning_rate": 1.3606008857231634e-06, - "loss": 0.222, + "epoch": 0.67, + "grad_norm": 0.3933198791638301, + "learning_rate": 5.208875466012475e-06, + "loss": 0.3405, "step": 14569 }, { - "epoch": 0.84, - "grad_norm": 0.5022053060065834, - "learning_rate": 1.3596638892128599e-06, - "loss": 0.3399, + "epoch": 0.67, + "grad_norm": 0.7356801470515539, + "learning_rate": 5.207569492701892e-06, + "loss": 0.3468, "step": 14570 }, { - "epoch": 0.84, - "grad_norm": 0.2986255975167238, - "learning_rate": 1.3587271919170276e-06, - "loss": 0.1707, + "epoch": 0.67, + "grad_norm": 0.42252905135937513, + "learning_rate": 5.2062636254911056e-06, + "loss": 0.2409, "step": 14571 }, { - "epoch": 0.84, - "grad_norm": 0.35800026833904114, - "learning_rate": 1.3577907938681e-06, - "loss": 0.2807, + "epoch": 0.67, + "grad_norm": 1.5601929131669112, + "learning_rate": 5.204957864409019e-06, + "loss": 0.8174, "step": 14572 }, { - "epoch": 0.84, - "grad_norm": 0.32434733804341365, - "learning_rate": 1.356854695098505e-06, - "loss": 0.256, + "epoch": 0.67, + "grad_norm": 0.2595881678043744, + "learning_rate": 5.203652209484543e-06, + "loss": 0.2321, "step": 14573 }, { - "epoch": 0.84, - "grad_norm": 0.7751751650216921, - "learning_rate": 1.3559188956406587e-06, - "loss": 0.3757, + "epoch": 0.67, + "grad_norm": 0.2572383173526846, + "learning_rate": 5.202346660746589e-06, + "loss": 0.161, "step": 14574 }, { - "epoch": 0.84, - "grad_norm": 0.3422898662062519, - "learning_rate": 1.354983395526972e-06, - "loss": 0.2493, + "epoch": 0.67, + "grad_norm": 1.257235823245716, + "learning_rate": 5.201041218224052e-06, + "loss": 0.76, "step": 14575 }, { - "epoch": 0.84, - "grad_norm": 0.5236379453340966, - "learning_rate": 1.3540481947898377e-06, - "loss": 0.2442, + "epoch": 0.67, + "grad_norm": 0.5398340397464351, + "learning_rate": 5.19973588194584e-06, + "loss": 0.3059, "step": 14576 }, { - "epoch": 0.84, - "grad_norm": 0.4769062265048514, - "learning_rate": 1.3531132934616432e-06, - "loss": 0.3491, + "epoch": 0.67, + "grad_norm": 0.3585655570912759, + "learning_rate": 5.198430651940846e-06, + "loss": 0.2745, "step": 14577 }, { - "epoch": 0.84, - "grad_norm": 0.26125303282449164, - "learning_rate": 1.3521786915747636e-06, - "loss": 0.1996, + "epoch": 0.67, + "grad_norm": 0.4670169959098867, + "learning_rate": 5.19712552823797e-06, + "loss": 0.2706, "step": 14578 }, { - "epoch": 0.84, - "grad_norm": 0.4615285075486771, - "learning_rate": 1.3512443891615612e-06, - "loss": 0.2125, + "epoch": 0.67, + "grad_norm": 0.38374589266051484, + "learning_rate": 5.195820510866108e-06, + "loss": 0.2145, "step": 14579 }, { - "epoch": 0.84, - "grad_norm": 0.4793211131504592, - "learning_rate": 1.3503103862543964e-06, - "loss": 0.3389, + "epoch": 0.67, + "grad_norm": 0.3349044566274623, + "learning_rate": 5.194515599854147e-06, + "loss": 0.2496, "step": 14580 }, { - "epoch": 0.84, - "grad_norm": 0.27583841894197575, - "learning_rate": 1.3493766828856113e-06, - "loss": 0.2668, + "epoch": 0.67, + "grad_norm": 0.3256332822872563, + "learning_rate": 5.193210795230978e-06, + "loss": 0.2689, "step": 14581 }, { - "epoch": 0.84, - "grad_norm": 1.3888028705359416, - "learning_rate": 1.348443279087539e-06, - "loss": 0.247, + "epoch": 0.67, + "grad_norm": 0.7923327226771925, + "learning_rate": 5.1919060970254895e-06, + "loss": 0.4431, "step": 14582 }, { - "epoch": 0.84, - "grad_norm": 0.5835514940606136, - "learning_rate": 1.3475101748925024e-06, - "loss": 0.2396, + "epoch": 0.67, + "grad_norm": 0.3665672349678015, + "learning_rate": 5.19060150526657e-06, + "loss": 0.2533, "step": 14583 }, { - "epoch": 0.84, - "grad_norm": 0.3265317907950052, - "learning_rate": 1.3465773703328177e-06, - "loss": 0.2626, + "epoch": 0.67, + "grad_norm": 0.3854791441830913, + "learning_rate": 5.1892970199830985e-06, + "loss": 0.2453, "step": 14584 }, { - "epoch": 0.84, - "grad_norm": 0.34205468813191087, - "learning_rate": 1.3456448654407871e-06, - "loss": 0.2876, + "epoch": 0.67, + "grad_norm": 0.44179042092228865, + "learning_rate": 5.1879926412039495e-06, + "loss": 0.2361, "step": 14585 }, { - "epoch": 0.84, - "grad_norm": 0.7978010123803017, - "learning_rate": 1.3447126602487026e-06, - "loss": 0.3144, + "epoch": 0.67, + "grad_norm": 0.24957402625683964, + "learning_rate": 5.186688368958006e-06, + "loss": 0.1954, "step": 14586 }, { - "epoch": 0.84, - "grad_norm": 0.31640964650076336, - "learning_rate": 1.343780754788847e-06, - "loss": 0.2539, + "epoch": 0.67, + "grad_norm": 0.9466836846191297, + "learning_rate": 5.185384203274143e-06, + "loss": 0.3254, "step": 14587 }, { - "epoch": 0.84, - "grad_norm": 0.35450717795972697, - "learning_rate": 1.3428491490934904e-06, - "loss": 0.1691, + "epoch": 0.67, + "grad_norm": 0.4665826774041471, + "learning_rate": 5.184080144181237e-06, + "loss": 0.3146, "step": 14588 }, { - "epoch": 0.84, - "grad_norm": 0.29752438649468915, - "learning_rate": 1.3419178431948964e-06, - "loss": 0.2152, + "epoch": 0.67, + "grad_norm": 0.3141740239043101, + "learning_rate": 5.182776191708151e-06, + "loss": 0.2672, "step": 14589 }, { - "epoch": 0.84, - "grad_norm": 0.34828109234051496, - "learning_rate": 1.3409868371253155e-06, - "loss": 0.266, + "epoch": 0.67, + "grad_norm": 1.429500847373923, + "learning_rate": 5.181472345883758e-06, + "loss": 0.5527, "step": 14590 }, { - "epoch": 0.84, - "grad_norm": 0.400501330494556, - "learning_rate": 1.3400561309169845e-06, - "loss": 0.2795, + "epoch": 0.67, + "grad_norm": 0.41164636390803944, + "learning_rate": 5.180168606736926e-06, + "loss": 0.1536, "step": 14591 }, { - "epoch": 0.84, - "grad_norm": 0.32453042151607786, - "learning_rate": 1.3391257246021404e-06, - "loss": 0.2643, + "epoch": 0.67, + "grad_norm": 0.3978812866067028, + "learning_rate": 5.178864974296511e-06, + "loss": 0.299, "step": 14592 }, { - "epoch": 0.84, - "grad_norm": 0.3316450046499373, - "learning_rate": 1.3381956182130008e-06, - "loss": 0.2635, + "epoch": 0.67, + "grad_norm": 0.4245381122455259, + "learning_rate": 5.177561448591384e-06, + "loss": 0.312, "step": 14593 }, { - "epoch": 0.84, - "grad_norm": 0.5078673764787308, - "learning_rate": 1.3372658117817738e-06, - "loss": 0.2464, + "epoch": 0.67, + "grad_norm": 1.0487498819096945, + "learning_rate": 5.176258029650395e-06, + "loss": 0.2983, "step": 14594 }, { - "epoch": 0.84, - "grad_norm": 1.7956324440457225, - "learning_rate": 1.3363363053406564e-06, - "loss": 0.1765, + "epoch": 0.67, + "grad_norm": 0.37103484986954144, + "learning_rate": 5.174954717502403e-06, + "loss": 0.2416, "step": 14595 }, { - "epoch": 0.84, - "grad_norm": 0.306535566421557, - "learning_rate": 1.3354070989218426e-06, - "loss": 0.2439, + "epoch": 0.67, + "grad_norm": 0.3458927841638489, + "learning_rate": 5.173651512176268e-06, + "loss": 0.2627, "step": 14596 }, { - "epoch": 0.84, - "grad_norm": 0.35559449451805636, - "learning_rate": 1.334478192557509e-06, - "loss": 0.3066, + "epoch": 0.67, + "grad_norm": 0.34819688292822737, + "learning_rate": 5.1723484137008314e-06, + "loss": 0.196, "step": 14597 }, { - "epoch": 0.84, - "grad_norm": 0.9039033717873965, - "learning_rate": 1.333549586279822e-06, - "loss": 0.4907, + "epoch": 0.67, + "grad_norm": 0.3187011606459076, + "learning_rate": 5.17104542210495e-06, + "loss": 0.215, "step": 14598 }, { - "epoch": 0.84, - "grad_norm": 0.23362546523475522, - "learning_rate": 1.3326212801209392e-06, - "loss": 0.1655, + "epoch": 0.67, + "grad_norm": 0.930273354091307, + "learning_rate": 5.169742537417468e-06, + "loss": 0.4121, "step": 14599 }, { - "epoch": 0.84, - "grad_norm": 0.41212155732044253, - "learning_rate": 1.3316932741130106e-06, - "loss": 0.2784, + "epoch": 0.67, + "grad_norm": 0.36489398256743455, + "learning_rate": 5.168439759667234e-06, + "loss": 0.2511, "step": 14600 }, { - "epoch": 0.84, - "grad_norm": 0.37043529185291274, - "learning_rate": 1.3307655682881704e-06, - "loss": 0.2799, + "epoch": 0.67, + "grad_norm": 0.3486897623720474, + "learning_rate": 5.167137088883084e-06, + "loss": 0.2664, "step": 14601 }, { - "epoch": 0.84, - "grad_norm": 0.3008023321460584, - "learning_rate": 1.3298381626785461e-06, - "loss": 0.2106, + "epoch": 0.67, + "grad_norm": 0.5560003608338897, + "learning_rate": 5.165834525093864e-06, + "loss": 0.25, "step": 14602 }, { - "epoch": 0.84, - "grad_norm": 0.6480680200258, - "learning_rate": 1.3289110573162534e-06, - "loss": 0.3836, + "epoch": 0.67, + "grad_norm": 1.6744128575833823, + "learning_rate": 5.164532068328405e-06, + "loss": 0.694, "step": 14603 }, { - "epoch": 0.84, - "grad_norm": 0.3419447209679332, - "learning_rate": 1.3279842522333964e-06, - "loss": 0.3303, + "epoch": 0.67, + "grad_norm": 0.31236617474366957, + "learning_rate": 5.163229718615545e-06, + "loss": 0.2184, "step": 14604 }, { - "epoch": 0.84, - "grad_norm": 0.28926904223399996, - "learning_rate": 1.3270577474620737e-06, - "loss": 0.1862, + "epoch": 0.67, + "grad_norm": 0.40630878965376616, + "learning_rate": 5.161927475984122e-06, + "loss": 0.3099, "step": 14605 }, { - "epoch": 0.84, - "grad_norm": 0.333294240317376, - "learning_rate": 1.326131543034368e-06, - "loss": 0.1797, + "epoch": 0.67, + "grad_norm": 0.8632822595452267, + "learning_rate": 5.160625340462957e-06, + "loss": 0.5503, "step": 14606 }, { - "epoch": 0.84, - "grad_norm": 0.7753789409052013, - "learning_rate": 1.3252056389823542e-06, - "loss": 0.3864, + "epoch": 0.67, + "grad_norm": 0.20241025011720643, + "learning_rate": 5.159323312080883e-06, + "loss": 0.1404, "step": 14607 }, { - "epoch": 0.84, - "grad_norm": 0.4255672538071689, - "learning_rate": 1.3242800353380935e-06, - "loss": 0.2057, + "epoch": 0.67, + "grad_norm": 1.6662521418027079, + "learning_rate": 5.15802139086673e-06, + "loss": 0.691, "step": 14608 }, { - "epoch": 0.84, - "grad_norm": 0.30257530347677447, - "learning_rate": 1.3233547321336449e-06, - "loss": 0.2747, + "epoch": 0.67, + "grad_norm": 0.39478032245865113, + "learning_rate": 5.1567195768493114e-06, + "loss": 0.3136, "step": 14609 }, { - "epoch": 0.84, - "grad_norm": 1.1949937386415568, - "learning_rate": 1.322429729401048e-06, - "loss": 0.6453, + "epoch": 0.67, + "grad_norm": 0.3485609115888265, + "learning_rate": 5.155417870057457e-06, + "loss": 0.2195, "step": 14610 }, { - "epoch": 0.84, - "grad_norm": 0.39220038644195765, - "learning_rate": 1.3215050271723372e-06, - "loss": 0.2691, + "epoch": 0.67, + "grad_norm": 0.8256574255610679, + "learning_rate": 5.154116270519975e-06, + "loss": 0.4641, "step": 14611 }, { - "epoch": 0.84, - "grad_norm": 0.19771103947243748, - "learning_rate": 1.3205806254795316e-06, - "loss": 0.177, + "epoch": 0.67, + "grad_norm": 0.4109823929433938, + "learning_rate": 5.152814778265696e-06, + "loss": 0.3374, "step": 14612 }, { - "epoch": 0.84, - "grad_norm": 1.340854636617087, - "learning_rate": 1.3196565243546477e-06, - "loss": 0.6778, + "epoch": 0.67, + "grad_norm": 0.34838861195995197, + "learning_rate": 5.151513393323426e-06, + "loss": 0.1916, "step": 14613 }, { - "epoch": 0.84, - "grad_norm": 0.40782738073281505, - "learning_rate": 1.3187327238296855e-06, - "loss": 0.2768, + "epoch": 0.67, + "grad_norm": 0.3139484689553587, + "learning_rate": 5.150212115721973e-06, + "loss": 0.1806, "step": 14614 }, { - "epoch": 0.84, - "grad_norm": 0.48597633138533963, - "learning_rate": 1.3178092239366357e-06, - "loss": 0.2581, + "epoch": 0.67, + "grad_norm": 0.866068318251318, + "learning_rate": 5.148910945490152e-06, + "loss": 0.426, "step": 14615 }, { - "epoch": 0.84, - "grad_norm": 0.35702165673584346, - "learning_rate": 1.316886024707479e-06, - "loss": 0.298, + "epoch": 0.67, + "grad_norm": 0.4830096330459205, + "learning_rate": 5.147609882656761e-06, + "loss": 0.3176, "step": 14616 }, { - "epoch": 0.84, - "grad_norm": 0.43957135064561503, - "learning_rate": 1.3159631261741835e-06, - "loss": 0.2841, + "epoch": 0.67, + "grad_norm": 0.30245185015805653, + "learning_rate": 5.146308927250616e-06, + "loss": 0.2526, "step": 14617 }, { - "epoch": 0.84, - "grad_norm": 0.2303763190919167, - "learning_rate": 1.315040528368714e-06, - "loss": 0.1285, + "epoch": 0.67, + "grad_norm": 1.077738213564531, + "learning_rate": 5.14500807930051e-06, + "loss": 0.702, "step": 14618 }, { - "epoch": 0.84, - "grad_norm": 0.7590643493551357, - "learning_rate": 1.3141182313230173e-06, - "loss": 0.3696, + "epoch": 0.67, + "grad_norm": 0.3730299969154624, + "learning_rate": 5.143707338835251e-06, + "loss": 0.2475, "step": 14619 }, { - "epoch": 0.84, - "grad_norm": 0.3120861870291768, - "learning_rate": 1.313196235069033e-06, - "loss": 0.2635, + "epoch": 0.67, + "grad_norm": 0.23886192130143175, + "learning_rate": 5.142406705883627e-06, + "loss": 0.1668, "step": 14620 }, { - "epoch": 0.84, - "grad_norm": 0.3375885377689183, - "learning_rate": 1.3122745396386893e-06, - "loss": 0.2568, + "epoch": 0.67, + "grad_norm": 1.0830211879219933, + "learning_rate": 5.1411061804744365e-06, + "loss": 0.5165, "step": 14621 }, { - "epoch": 0.84, - "grad_norm": 1.0596125325492274, - "learning_rate": 1.311353145063905e-06, - "loss": 0.6659, + "epoch": 0.67, + "grad_norm": 0.34098399581558103, + "learning_rate": 5.1398057626364765e-06, + "loss": 0.2609, "step": 14622 }, { - "epoch": 0.84, - "grad_norm": 0.32892295818439166, - "learning_rate": 1.3104320513765867e-06, - "loss": 0.2509, + "epoch": 0.67, + "grad_norm": 0.6778993252520432, + "learning_rate": 5.138505452398526e-06, + "loss": 0.3105, "step": 14623 }, { - "epoch": 0.84, - "grad_norm": 0.3057490849134694, - "learning_rate": 1.3095112586086322e-06, - "loss": 0.1777, + "epoch": 0.67, + "grad_norm": 0.38095606707446816, + "learning_rate": 5.137205249789382e-06, + "loss": 0.3027, "step": 14624 }, { - "epoch": 0.84, - "grad_norm": 0.35619765249654883, - "learning_rate": 1.3085907667919295e-06, - "loss": 0.2711, + "epoch": 0.67, + "grad_norm": 0.3746784625345085, + "learning_rate": 5.13590515483783e-06, + "loss": 0.2592, "step": 14625 }, { - "epoch": 0.84, - "grad_norm": 0.5930467136699953, - "learning_rate": 1.3076705759583562e-06, - "loss": 0.2686, + "epoch": 0.67, + "grad_norm": 0.2599544173557721, + "learning_rate": 5.134605167572646e-06, + "loss": 0.0827, "step": 14626 }, { - "epoch": 0.84, - "grad_norm": 0.4096537171452396, - "learning_rate": 1.3067506861397771e-06, - "loss": 0.3089, + "epoch": 0.67, + "grad_norm": 0.8198778182332518, + "learning_rate": 5.1333052880226185e-06, + "loss": 0.3891, "step": 14627 }, { - "epoch": 0.84, - "grad_norm": 0.31363251366168626, - "learning_rate": 1.3058310973680478e-06, - "loss": 0.2472, + "epoch": 0.67, + "grad_norm": 0.29980037886734695, + "learning_rate": 5.132005516216512e-06, + "loss": 0.2769, "step": 14628 }, { - "epoch": 0.84, - "grad_norm": 0.6261934366587812, - "learning_rate": 1.3049118096750102e-06, - "loss": 0.322, + "epoch": 0.67, + "grad_norm": 0.48613521157158995, + "learning_rate": 5.130705852183121e-06, + "loss": 0.3265, "step": 14629 }, { - "epoch": 0.84, - "grad_norm": 0.23884814580511757, - "learning_rate": 1.3039928230925058e-06, - "loss": 0.2023, + "epoch": 0.67, + "grad_norm": 1.0616128613456728, + "learning_rate": 5.1294062959512045e-06, + "loss": 0.4118, "step": 14630 }, { - "epoch": 0.84, - "grad_norm": 0.7587479042041084, - "learning_rate": 1.303074137652357e-06, - "loss": 0.2866, + "epoch": 0.67, + "grad_norm": 0.42418816795479464, + "learning_rate": 5.128106847549543e-06, + "loss": 0.2622, "step": 14631 }, { - "epoch": 0.84, - "grad_norm": 0.3200019958475434, - "learning_rate": 1.302155753386376e-06, - "loss": 0.2691, + "epoch": 0.67, + "grad_norm": 0.2590092227785796, + "learning_rate": 5.126807507006899e-06, + "loss": 0.2069, "step": 14632 }, { - "epoch": 0.84, - "grad_norm": 0.38354019565695807, - "learning_rate": 1.3012376703263652e-06, - "loss": 0.3087, + "epoch": 0.67, + "grad_norm": 0.6856431076489268, + "learning_rate": 5.125508274352033e-06, + "loss": 0.2843, "step": 14633 }, { - "epoch": 0.84, - "grad_norm": 1.2891788763038219, - "learning_rate": 1.3003198885041212e-06, - "loss": 0.757, + "epoch": 0.67, + "grad_norm": 0.39001776860541837, + "learning_rate": 5.124209149613724e-06, + "loss": 0.2473, "step": 14634 }, { - "epoch": 0.84, - "grad_norm": 0.3551321237848732, - "learning_rate": 1.2994024079514257e-06, - "loss": 0.161, + "epoch": 0.67, + "grad_norm": 0.6189590924160445, + "learning_rate": 5.1229101328207195e-06, + "loss": 0.3797, "step": 14635 }, { - "epoch": 0.84, - "grad_norm": 0.2695399257198015, - "learning_rate": 1.2984852287000515e-06, - "loss": 0.2375, + "epoch": 0.67, + "grad_norm": 0.33288899893470797, + "learning_rate": 5.12161122400179e-06, + "loss": 0.2662, "step": 14636 }, { - "epoch": 0.84, - "grad_norm": 0.4797545726288128, - "learning_rate": 1.297568350781757e-06, - "loss": 0.3093, + "epoch": 0.67, + "grad_norm": 0.45222479837408003, + "learning_rate": 5.120312423185681e-06, + "loss": 0.2838, "step": 14637 }, { - "epoch": 0.84, - "grad_norm": 0.29754951174458355, - "learning_rate": 1.296651774228298e-06, - "loss": 0.1445, + "epoch": 0.67, + "grad_norm": 0.2766801941072608, + "learning_rate": 5.119013730401152e-06, + "loss": 0.2141, "step": 14638 }, { - "epoch": 0.84, - "grad_norm": 0.573274589294748, - "learning_rate": 1.2957354990714145e-06, - "loss": 0.3267, + "epoch": 0.67, + "grad_norm": 0.7278280117379301, + "learning_rate": 5.11771514567696e-06, + "loss": 0.3998, "step": 14639 }, { - "epoch": 0.84, - "grad_norm": 0.3397778778825856, - "learning_rate": 1.2948195253428364e-06, - "loss": 0.3433, + "epoch": 0.67, + "grad_norm": 0.2717933211794752, + "learning_rate": 5.1164166690418435e-06, + "loss": 0.196, "step": 14640 }, { - "epoch": 0.84, - "grad_norm": 0.3975827403188928, - "learning_rate": 1.2939038530742832e-06, - "loss": 0.1751, + "epoch": 0.67, + "grad_norm": 0.5348678365580526, + "learning_rate": 5.115118300524555e-06, + "loss": 0.327, "step": 14641 }, { - "epoch": 0.84, - "grad_norm": 0.5512201139354378, - "learning_rate": 1.2929884822974626e-06, - "loss": 0.3175, + "epoch": 0.67, + "grad_norm": 1.401259855324682, + "learning_rate": 5.113820040153844e-06, + "loss": 0.7867, "step": 14642 }, { - "epoch": 0.84, - "grad_norm": 0.3345476677624379, - "learning_rate": 1.2920734130440793e-06, - "loss": 0.2317, + "epoch": 0.67, + "grad_norm": 0.32312003204923545, + "learning_rate": 5.112521887958444e-06, + "loss": 0.1893, "step": 14643 }, { - "epoch": 0.84, - "grad_norm": 0.27178999005541327, - "learning_rate": 1.2911586453458203e-06, - "loss": 0.1999, + "epoch": 0.67, + "grad_norm": 0.47114487353831286, + "learning_rate": 5.111223843967101e-06, + "loss": 0.3017, "step": 14644 }, { - "epoch": 0.84, - "grad_norm": 0.393265473755049, - "learning_rate": 1.2902441792343611e-06, - "loss": 0.2757, + "epoch": 0.67, + "grad_norm": 0.40523760330781566, + "learning_rate": 5.109925908208548e-06, + "loss": 0.2701, "step": 14645 }, { - "epoch": 0.84, - "grad_norm": 1.0589891218703797, - "learning_rate": 1.2893300147413702e-06, - "loss": 0.5945, + "epoch": 0.67, + "grad_norm": 0.24304619362661206, + "learning_rate": 5.108628080711523e-06, + "loss": 0.1633, "step": 14646 }, { - "epoch": 0.84, - "grad_norm": 0.7805817018750784, - "learning_rate": 1.2884161518985083e-06, - "loss": 0.356, + "epoch": 0.67, + "grad_norm": 1.302963865550671, + "learning_rate": 5.107330361504757e-06, + "loss": 0.6481, "step": 14647 }, { - "epoch": 0.84, - "grad_norm": 0.24815947867005486, - "learning_rate": 1.2875025907374206e-06, - "loss": 0.2283, + "epoch": 0.67, + "grad_norm": 0.4091695010710248, + "learning_rate": 5.106032750616985e-06, + "loss": 0.3255, "step": 14648 }, { - "epoch": 0.84, - "grad_norm": 0.7997982329037202, - "learning_rate": 1.2865893312897438e-06, - "loss": 0.3963, + "epoch": 0.67, + "grad_norm": 0.3679171288484748, + "learning_rate": 5.1047352480769305e-06, + "loss": 0.0713, "step": 14649 }, { - "epoch": 0.84, - "grad_norm": 0.24864551993847825, - "learning_rate": 1.2856763735871003e-06, - "loss": 0.1527, + "epoch": 0.67, + "grad_norm": 0.3942742904196032, + "learning_rate": 5.1034378539133125e-06, + "loss": 0.285, "step": 14650 }, { - "epoch": 0.84, - "grad_norm": 0.3838687286845385, - "learning_rate": 1.2847637176611128e-06, - "loss": 0.2545, + "epoch": 0.67, + "grad_norm": 0.2802975110859199, + "learning_rate": 5.1021405681548686e-06, + "loss": 0.2355, "step": 14651 }, { - "epoch": 0.84, - "grad_norm": 0.33946848678803093, - "learning_rate": 1.2838513635433824e-06, - "loss": 0.2888, + "epoch": 0.67, + "grad_norm": 0.4282801336462379, + "learning_rate": 5.100843390830308e-06, + "loss": 0.2752, "step": 14652 }, { - "epoch": 0.84, - "grad_norm": 0.5313711952676399, - "learning_rate": 1.2829393112655052e-06, - "loss": 0.3075, + "epoch": 0.67, + "grad_norm": 0.39055988099363154, + "learning_rate": 5.099546321968356e-06, + "loss": 0.2455, "step": 14653 }, { - "epoch": 0.84, - "grad_norm": 0.35368230488042623, - "learning_rate": 1.2820275608590638e-06, - "loss": 0.2496, + "epoch": 0.67, + "grad_norm": 1.2195564825679044, + "learning_rate": 5.098249361597723e-06, + "loss": 0.5319, "step": 14654 }, { - "epoch": 0.84, - "grad_norm": 0.5093236803162801, - "learning_rate": 1.2811161123556337e-06, - "loss": 0.2507, + "epoch": 0.67, + "grad_norm": 0.6186439024732834, + "learning_rate": 5.096952509747124e-06, + "loss": 0.2866, "step": 14655 }, { - "epoch": 0.84, - "grad_norm": 0.2429324623945723, - "learning_rate": 1.2802049657867777e-06, - "loss": 0.2158, + "epoch": 0.67, + "grad_norm": 0.304111159659403, + "learning_rate": 5.095655766445274e-06, + "loss": 0.245, "step": 14656 }, { - "epoch": 0.84, - "grad_norm": 0.4075378255586554, - "learning_rate": 1.2792941211840481e-06, - "loss": 0.2347, + "epoch": 0.67, + "grad_norm": 0.9439484645263461, + "learning_rate": 5.094359131720875e-06, + "loss": 0.4374, "step": 14657 }, { - "epoch": 0.84, - "grad_norm": 0.813489974515319, - "learning_rate": 1.2783835785789867e-06, - "loss": 0.4763, + "epoch": 0.67, + "grad_norm": 0.24898142816710866, + "learning_rate": 5.093062605602637e-06, + "loss": 0.1515, "step": 14658 }, { - "epoch": 0.84, - "grad_norm": 0.5005999740093483, - "learning_rate": 1.277473338003129e-06, - "loss": 0.3164, + "epoch": 0.67, + "grad_norm": 0.38688251322200956, + "learning_rate": 5.091766188119267e-06, + "loss": 0.2715, "step": 14659 }, { - "epoch": 0.84, - "grad_norm": 0.3206738895171215, - "learning_rate": 1.2765633994879933e-06, - "loss": 0.2816, + "epoch": 0.67, + "grad_norm": 0.37973917368503773, + "learning_rate": 5.09046987929946e-06, + "loss": 0.3032, "step": 14660 }, { - "epoch": 0.84, - "grad_norm": 0.5703995448615973, - "learning_rate": 1.2756537630650934e-06, - "loss": 0.2392, + "epoch": 0.67, + "grad_norm": 0.4183488399717887, + "learning_rate": 5.089173679171922e-06, + "loss": 0.2885, "step": 14661 }, { - "epoch": 0.84, - "grad_norm": 0.24137241230342263, - "learning_rate": 1.274744428765926e-06, - "loss": 0.1746, + "epoch": 0.67, + "grad_norm": 0.5369962486947505, + "learning_rate": 5.087877587765339e-06, + "loss": 0.2704, "step": 14662 }, { - "epoch": 0.84, - "grad_norm": 0.3367852646989017, - "learning_rate": 1.2738353966219863e-06, - "loss": 0.2836, + "epoch": 0.67, + "grad_norm": 0.34705495866355146, + "learning_rate": 5.086581605108416e-06, + "loss": 0.1776, "step": 14663 }, { - "epoch": 0.84, - "grad_norm": 0.3384807639190101, - "learning_rate": 1.2729266666647511e-06, - "loss": 0.264, + "epoch": 0.67, + "grad_norm": 0.28243389079598086, + "learning_rate": 5.0852857312298376e-06, + "loss": 0.2804, "step": 14664 }, { - "epoch": 0.84, - "grad_norm": 0.6314320406801325, - "learning_rate": 1.2720182389256896e-06, - "loss": 0.362, + "epoch": 0.67, + "grad_norm": 0.6183600069117005, + "learning_rate": 5.083989966158301e-06, + "loss": 0.4008, "step": 14665 }, { - "epoch": 0.84, - "grad_norm": 0.3457808849956551, - "learning_rate": 1.2711101134362624e-06, - "loss": 0.2846, + "epoch": 0.67, + "grad_norm": 0.7773913401679422, + "learning_rate": 5.082694309922484e-06, + "loss": 0.3369, "step": 14666 }, { - "epoch": 0.84, - "grad_norm": 1.5896610511400546, - "learning_rate": 1.2702022902279132e-06, - "loss": 0.1807, + "epoch": 0.67, + "grad_norm": 0.5271037887908495, + "learning_rate": 5.081398762551078e-06, + "loss": 0.3064, "step": 14667 }, { - "epoch": 0.84, - "grad_norm": 0.20164738798995183, - "learning_rate": 1.2692947693320867e-06, - "loss": 0.1918, + "epoch": 0.67, + "grad_norm": 0.3040545850868143, + "learning_rate": 5.080103324072764e-06, + "loss": 0.2783, "step": 14668 }, { - "epoch": 0.84, - "grad_norm": 0.34464558067401213, - "learning_rate": 1.2683875507802058e-06, - "loss": 0.2875, + "epoch": 0.67, + "grad_norm": 0.4630782412471425, + "learning_rate": 5.078807994516217e-06, + "loss": 0.1771, "step": 14669 }, { - "epoch": 0.84, - "grad_norm": 0.7667195525112961, - "learning_rate": 1.2674806346036895e-06, - "loss": 0.3144, + "epoch": 0.67, + "grad_norm": 0.3901563969419384, + "learning_rate": 5.077512773910122e-06, + "loss": 0.2209, "step": 14670 }, { - "epoch": 0.84, - "grad_norm": 0.524432798062477, - "learning_rate": 1.2665740208339406e-06, - "loss": 0.3225, + "epoch": 0.67, + "grad_norm": 0.3875837574079221, + "learning_rate": 5.0762176622831476e-06, + "loss": 0.2877, "step": 14671 }, { - "epoch": 0.84, - "grad_norm": 0.25184485692333597, - "learning_rate": 1.2656677095023607e-06, - "loss": 0.2584, + "epoch": 0.67, + "grad_norm": 0.40046376048459525, + "learning_rate": 5.074922659663967e-06, + "loss": 0.2602, "step": 14672 }, { - "epoch": 0.84, - "grad_norm": 1.4804813262002852, - "learning_rate": 1.2647617006403312e-06, - "loss": 0.4913, + "epoch": 0.67, + "grad_norm": 0.7766745504382916, + "learning_rate": 5.073627766081256e-06, + "loss": 0.3233, "step": 14673 }, { - "epoch": 0.84, - "grad_norm": 0.16779157793554556, - "learning_rate": 1.2638559942792294e-06, - "loss": 0.0875, + "epoch": 0.67, + "grad_norm": 0.39006154608673754, + "learning_rate": 5.072332981563675e-06, + "loss": 0.2962, "step": 14674 }, { - "epoch": 0.84, - "grad_norm": 0.39519690266180457, - "learning_rate": 1.2629505904504158e-06, - "loss": 0.3048, + "epoch": 0.67, + "grad_norm": 1.725373116463207, + "learning_rate": 5.071038306139895e-06, + "loss": 0.1903, "step": 14675 }, { - "epoch": 0.84, - "grad_norm": 0.3542800298985227, - "learning_rate": 1.2620454891852507e-06, - "loss": 0.3001, + "epoch": 0.67, + "grad_norm": 0.2581697565680685, + "learning_rate": 5.069743739838569e-06, + "loss": 0.2182, "step": 14676 }, { - "epoch": 0.84, - "grad_norm": 0.38034372533991734, - "learning_rate": 1.2611406905150736e-06, - "loss": 0.1732, + "epoch": 0.67, + "grad_norm": 0.3841024665862822, + "learning_rate": 5.068449282688372e-06, + "loss": 0.3137, "step": 14677 }, { - "epoch": 0.84, - "grad_norm": 0.38478117119739746, - "learning_rate": 1.2602361944712193e-06, - "loss": 0.2879, + "epoch": 0.67, + "grad_norm": 0.8409663232284716, + "learning_rate": 5.067154934717956e-06, + "loss": 0.4137, "step": 14678 }, { - "epoch": 0.84, - "grad_norm": 0.5953891231925955, - "learning_rate": 1.2593320010850096e-06, - "loss": 0.329, + "epoch": 0.67, + "grad_norm": 0.277062667314916, + "learning_rate": 5.065860695955971e-06, + "loss": 0.1855, "step": 14679 }, { - "epoch": 0.84, - "grad_norm": 0.4313422987262344, - "learning_rate": 1.258428110387754e-06, - "loss": 0.2362, + "epoch": 0.67, + "grad_norm": 0.36350036481256914, + "learning_rate": 5.064566566431075e-06, + "loss": 0.2941, "step": 14680 }, { - "epoch": 0.84, - "grad_norm": 0.3322900970369777, - "learning_rate": 1.2575245224107602e-06, - "loss": 0.2568, + "epoch": 0.67, + "grad_norm": 1.3013843140238681, + "learning_rate": 5.063272546171918e-06, + "loss": 0.572, "step": 14681 }, { - "epoch": 0.84, - "grad_norm": 0.5778985999746848, - "learning_rate": 1.256621237185316e-06, - "loss": 0.388, + "epoch": 0.67, + "grad_norm": 0.24432780189443856, + "learning_rate": 5.061978635207152e-06, + "loss": 0.142, "step": 14682 }, { - "epoch": 0.84, - "grad_norm": 0.25766138829972673, - "learning_rate": 1.2557182547427016e-06, - "loss": 0.1669, + "epoch": 0.67, + "grad_norm": 0.5461639762016587, + "learning_rate": 5.0606848335654165e-06, + "loss": 0.3646, "step": 14683 }, { - "epoch": 0.84, - "grad_norm": 0.23468154882529396, - "learning_rate": 1.2548155751141867e-06, - "loss": 0.2158, + "epoch": 0.67, + "grad_norm": 0.40207427996034717, + "learning_rate": 5.059391141275358e-06, + "loss": 0.3058, "step": 14684 }, { - "epoch": 0.84, - "grad_norm": 1.4762527134046515, - "learning_rate": 1.2539131983310349e-06, - "loss": 0.5962, + "epoch": 0.67, + "grad_norm": 0.39844805712055875, + "learning_rate": 5.058097558365622e-06, + "loss": 0.1692, "step": 14685 }, { - "epoch": 0.84, - "grad_norm": 1.3406802197347614, - "learning_rate": 1.2530111244244925e-06, - "loss": 0.6108, + "epoch": 0.67, + "grad_norm": 0.43533318072982563, + "learning_rate": 5.056804084864839e-06, + "loss": 0.3156, "step": 14686 }, { - "epoch": 0.84, - "grad_norm": 0.2989282297239217, - "learning_rate": 1.2521093534257977e-06, - "loss": 0.2035, + "epoch": 0.67, + "grad_norm": 0.5459186086601856, + "learning_rate": 5.055510720801653e-06, + "loss": 0.3456, "step": 14687 }, { - "epoch": 0.84, - "grad_norm": 0.32967555204294186, - "learning_rate": 1.2512078853661813e-06, - "loss": 0.2942, + "epoch": 0.67, + "grad_norm": 0.40441756516333693, + "learning_rate": 5.054217466204691e-06, + "loss": 0.2189, "step": 14688 }, { - "epoch": 0.84, - "grad_norm": 0.26575499268107783, - "learning_rate": 1.2503067202768592e-06, - "loss": 0.1926, + "epoch": 0.67, + "grad_norm": 0.38662816666683575, + "learning_rate": 5.052924321102586e-06, + "loss": 0.2863, "step": 14689 }, { - "epoch": 0.84, - "grad_norm": 0.34793399148411513, - "learning_rate": 1.2494058581890388e-06, - "loss": 0.1929, + "epoch": 0.67, + "grad_norm": 0.742275049007806, + "learning_rate": 5.051631285523973e-06, + "loss": 0.3842, "step": 14690 }, { - "epoch": 0.84, - "grad_norm": 0.44843755470370344, - "learning_rate": 1.2485052991339174e-06, - "loss": 0.2978, + "epoch": 0.67, + "grad_norm": 0.24431473115590516, + "learning_rate": 5.050338359497471e-06, + "loss": 0.1733, "step": 14691 }, { - "epoch": 0.84, - "grad_norm": 0.5016887458702256, - "learning_rate": 1.247605043142679e-06, - "loss": 0.2965, + "epoch": 0.67, + "grad_norm": 0.28741182616618494, + "learning_rate": 5.04904554305171e-06, + "loss": 0.2029, "step": 14692 }, { - "epoch": 0.84, - "grad_norm": 0.35071326656103574, - "learning_rate": 1.2467050902465038e-06, - "loss": 0.2073, + "epoch": 0.67, + "grad_norm": 1.2864517357838037, + "learning_rate": 5.0477528362153e-06, + "loss": 0.563, "step": 14693 }, { - "epoch": 0.84, - "grad_norm": 0.7348697816631484, - "learning_rate": 1.2458054404765552e-06, - "loss": 0.4092, + "epoch": 0.68, + "grad_norm": 0.8088013083660608, + "learning_rate": 5.046460239016879e-06, + "loss": 0.4117, "step": 14694 }, { - "epoch": 0.84, - "grad_norm": 0.3464063282503142, - "learning_rate": 1.2449060938639869e-06, - "loss": 0.3186, + "epoch": 0.68, + "grad_norm": 0.3540882142771861, + "learning_rate": 5.045167751485049e-06, + "loss": 0.2369, "step": 14695 }, { - "epoch": 0.84, - "grad_norm": 0.2497142048809561, - "learning_rate": 1.2440070504399426e-06, - "loss": 0.1682, + "epoch": 0.68, + "grad_norm": 0.5992837007908217, + "learning_rate": 5.043875373648435e-06, + "loss": 0.3432, "step": 14696 }, { - "epoch": 0.84, - "grad_norm": 0.5003880498698998, - "learning_rate": 1.243108310235559e-06, - "loss": 0.2321, + "epoch": 0.68, + "grad_norm": 0.29045165097222986, + "learning_rate": 5.042583105535639e-06, + "loss": 0.195, "step": 14697 }, { - "epoch": 0.84, - "grad_norm": 0.6704924239750555, - "learning_rate": 1.2422098732819587e-06, - "loss": 0.3914, + "epoch": 0.68, + "grad_norm": 0.39772476847889165, + "learning_rate": 5.041290947175274e-06, + "loss": 0.2002, "step": 14698 }, { - "epoch": 0.84, - "grad_norm": 0.30569771907205934, - "learning_rate": 1.2413117396102548e-06, - "loss": 0.2449, + "epoch": 0.68, + "grad_norm": 0.547982177662353, + "learning_rate": 5.039998898595952e-06, + "loss": 0.331, "step": 14699 }, { - "epoch": 0.84, - "grad_norm": 0.3158934485374403, - "learning_rate": 1.2404139092515455e-06, - "loss": 0.2607, + "epoch": 0.68, + "grad_norm": 0.45942186184485073, + "learning_rate": 5.0387069598262706e-06, + "loss": 0.3462, "step": 14700 }, { - "epoch": 0.84, - "grad_norm": 0.46902780635018154, - "learning_rate": 1.2395163822369283e-06, - "loss": 0.2458, + "epoch": 0.68, + "grad_norm": 0.36921597787587057, + "learning_rate": 5.037415130894836e-06, + "loss": 0.2192, "step": 14701 }, { - "epoch": 0.84, - "grad_norm": 0.24793381285511187, - "learning_rate": 1.2386191585974815e-06, - "loss": 0.2093, + "epoch": 0.68, + "grad_norm": 0.6765889567274216, + "learning_rate": 5.036123411830249e-06, + "loss": 0.4376, "step": 14702 }, { - "epoch": 0.84, - "grad_norm": 0.568270893805069, - "learning_rate": 1.2377222383642773e-06, - "loss": 0.2455, + "epoch": 0.68, + "grad_norm": 0.30447706646246053, + "learning_rate": 5.0348318026611046e-06, + "loss": 0.2159, "step": 14703 }, { - "epoch": 0.84, - "grad_norm": 0.4898535112631793, - "learning_rate": 1.2368256215683727e-06, - "loss": 0.3084, + "epoch": 0.68, + "grad_norm": 0.3203348270138396, + "learning_rate": 5.033540303416e-06, + "loss": 0.2499, "step": 14704 }, { - "epoch": 0.84, - "grad_norm": 0.3244437239492716, - "learning_rate": 1.235929308240822e-06, - "loss": 0.2521, + "epoch": 0.68, + "grad_norm": 0.42669964659331694, + "learning_rate": 5.032248914123523e-06, + "loss": 0.1027, "step": 14705 }, { - "epoch": 0.84, - "grad_norm": 0.8715598875641944, - "learning_rate": 1.2350332984126623e-06, - "loss": 0.3682, + "epoch": 0.68, + "grad_norm": 0.7949108252448919, + "learning_rate": 5.030957634812268e-06, + "loss": 0.4177, "step": 14706 }, { - "epoch": 0.84, - "grad_norm": 0.32979803200018204, - "learning_rate": 1.2341375921149224e-06, - "loss": 0.2899, + "epoch": 0.68, + "grad_norm": 0.3434610896200157, + "learning_rate": 5.029666465510825e-06, + "loss": 0.2595, "step": 14707 }, { - "epoch": 0.85, - "grad_norm": 0.2545636420010435, - "learning_rate": 1.2332421893786218e-06, - "loss": 0.2019, + "epoch": 0.68, + "grad_norm": 0.34626921900480173, + "learning_rate": 5.0283754062477715e-06, + "loss": 0.2686, "step": 14708 }, { - "epoch": 0.85, - "grad_norm": 0.4792169662322209, - "learning_rate": 1.2323470902347645e-06, - "loss": 0.2032, + "epoch": 0.68, + "grad_norm": 0.4816789328547285, + "learning_rate": 5.027084457051701e-06, + "loss": 0.2449, "step": 14709 }, { - "epoch": 0.85, - "grad_norm": 0.5781818608555402, - "learning_rate": 1.2314522947143526e-06, - "loss": 0.2895, + "epoch": 0.68, + "grad_norm": 0.25761168164653114, + "learning_rate": 5.025793617951178e-06, + "loss": 0.1993, "step": 14710 }, { - "epoch": 0.85, - "grad_norm": 0.4198601434035437, - "learning_rate": 1.23055780284837e-06, - "loss": 0.27, + "epoch": 0.68, + "grad_norm": 0.5173736508885713, + "learning_rate": 5.024502888974798e-06, + "loss": 0.2442, "step": 14711 }, { - "epoch": 0.85, - "grad_norm": 0.2760695555581516, - "learning_rate": 1.2296636146677942e-06, - "loss": 0.2711, + "epoch": 0.68, + "grad_norm": 0.4774113481356877, + "learning_rate": 5.0232122701511245e-06, + "loss": 0.3378, "step": 14712 }, { - "epoch": 0.85, - "grad_norm": 0.8359298804618515, - "learning_rate": 1.2287697302035883e-06, - "loss": 0.2242, + "epoch": 0.68, + "grad_norm": 0.3349170542974688, + "learning_rate": 5.021921761508739e-06, + "loss": 0.284, "step": 14713 }, { - "epoch": 0.85, - "grad_norm": 0.35699267197134954, - "learning_rate": 1.227876149486712e-06, - "loss": 0.2875, + "epoch": 0.68, + "grad_norm": 0.9139303029247234, + "learning_rate": 5.0206313630762035e-06, + "loss": 0.5311, "step": 14714 }, { - "epoch": 0.85, - "grad_norm": 0.25071982956057953, - "learning_rate": 1.226982872548107e-06, - "loss": 0.1969, + "epoch": 0.68, + "grad_norm": 0.34295066019060805, + "learning_rate": 5.019341074882092e-06, + "loss": 0.2827, "step": 14715 }, { - "epoch": 0.85, - "grad_norm": 0.43126805352484987, - "learning_rate": 1.2260898994187075e-06, - "loss": 0.2655, + "epoch": 0.68, + "grad_norm": 0.28534796916316485, + "learning_rate": 5.01805089695497e-06, + "loss": 0.2189, "step": 14716 }, { - "epoch": 0.85, - "grad_norm": 0.3063248769232248, - "learning_rate": 1.2251972301294358e-06, - "loss": 0.2335, + "epoch": 0.68, + "grad_norm": 0.478428991876187, + "learning_rate": 5.016760829323397e-06, + "loss": 0.2269, "step": 14717 }, { - "epoch": 0.85, - "grad_norm": 1.0832029188795427, - "learning_rate": 1.2243048647112078e-06, - "loss": 0.7687, + "epoch": 0.68, + "grad_norm": 0.617738520452347, + "learning_rate": 5.015470872015936e-06, + "loss": 0.2744, "step": 14718 }, { - "epoch": 0.85, - "grad_norm": 0.3293628692072448, - "learning_rate": 1.2234128031949266e-06, - "loss": 0.2336, + "epoch": 0.68, + "grad_norm": 0.39701208465343224, + "learning_rate": 5.01418102506115e-06, + "loss": 0.245, "step": 14719 }, { - "epoch": 0.85, - "grad_norm": 0.3376833571189543, - "learning_rate": 1.222521045611481e-06, - "loss": 0.2651, + "epoch": 0.68, + "grad_norm": 0.3862217174800485, + "learning_rate": 5.0128912884875865e-06, + "loss": 0.3023, "step": 14720 }, { - "epoch": 0.85, - "grad_norm": 0.37187860367678754, - "learning_rate": 1.2216295919917553e-06, - "loss": 0.2057, + "epoch": 0.68, + "grad_norm": 0.3231426794662638, + "learning_rate": 5.011601662323807e-06, + "loss": 0.1351, "step": 14721 }, { - "epoch": 0.85, - "grad_norm": 0.8919618832579139, - "learning_rate": 1.220738442366619e-06, - "loss": 0.4321, + "epoch": 0.68, + "grad_norm": 0.36477671575866233, + "learning_rate": 5.010312146598355e-06, + "loss": 0.2664, "step": 14722 }, { - "epoch": 0.85, - "grad_norm": 0.2582549238915909, - "learning_rate": 1.2198475967669333e-06, - "loss": 0.21, + "epoch": 0.68, + "grad_norm": 0.30314352322622823, + "learning_rate": 5.009022741339784e-06, + "loss": 0.253, "step": 14723 }, { - "epoch": 0.85, - "grad_norm": 0.4988948087103634, - "learning_rate": 1.2189570552235475e-06, - "loss": 0.3751, + "epoch": 0.68, + "grad_norm": 0.8064012702092093, + "learning_rate": 5.007733446576641e-06, + "loss": 0.3016, "step": 14724 }, { - "epoch": 0.85, - "grad_norm": 1.0828966430021156, - "learning_rate": 1.2180668177672984e-06, - "loss": 0.552, + "epoch": 0.68, + "grad_norm": 0.3492462189847072, + "learning_rate": 5.006444262337466e-06, + "loss": 0.2465, "step": 14725 }, { - "epoch": 0.85, - "grad_norm": 0.2535459801459226, - "learning_rate": 1.217176884429021e-06, - "loss": 0.1598, + "epoch": 0.68, + "grad_norm": 1.2021228083277933, + "learning_rate": 5.0051551886508055e-06, + "loss": 0.7554, "step": 14726 }, { - "epoch": 0.85, - "grad_norm": 0.4715556154628492, - "learning_rate": 1.21628725523953e-06, - "loss": 0.3407, + "epoch": 0.68, + "grad_norm": 0.3729402660221099, + "learning_rate": 5.003866225545186e-06, + "loss": 0.2354, "step": 14727 }, { - "epoch": 0.85, - "grad_norm": 0.333192726383167, - "learning_rate": 1.2153979302296338e-06, - "loss": 0.2542, + "epoch": 0.68, + "grad_norm": 0.26144051968486576, + "learning_rate": 5.002577373049162e-06, + "loss": 0.1898, "step": 14728 }, { - "epoch": 0.85, - "grad_norm": 0.3590585580216333, - "learning_rate": 1.2145089094301265e-06, - "loss": 0.1648, + "epoch": 0.68, + "grad_norm": 0.6084253675181527, + "learning_rate": 5.001288631191255e-06, + "loss": 0.2796, "step": 14729 }, { - "epoch": 0.85, - "grad_norm": 0.4915144416030057, - "learning_rate": 1.2136201928718005e-06, - "loss": 0.3565, + "epoch": 0.68, + "grad_norm": 0.8747689929201318, + "learning_rate": 5.000000000000003e-06, + "loss": 0.4638, "step": 14730 }, { - "epoch": 0.85, - "grad_norm": 0.3515123559483987, - "learning_rate": 1.21273178058543e-06, - "loss": 0.2912, + "epoch": 0.68, + "grad_norm": 0.29757202042047215, + "learning_rate": 4.998711479503927e-06, + "loss": 0.2163, "step": 14731 }, { - "epoch": 0.85, - "grad_norm": 0.38349679742074594, - "learning_rate": 1.21184367260178e-06, - "loss": 0.1698, + "epoch": 0.68, + "grad_norm": 0.5158097020511514, + "learning_rate": 4.99742306973156e-06, + "loss": 0.3865, "step": 14732 }, { - "epoch": 0.85, - "grad_norm": 0.40644587846781555, - "learning_rate": 1.2109558689516054e-06, - "loss": 0.3135, + "epoch": 0.68, + "grad_norm": 0.9905145544434747, + "learning_rate": 4.996134770711428e-06, + "loss": 0.4783, "step": 14733 }, { - "epoch": 0.85, - "grad_norm": 0.3399566033908146, - "learning_rate": 1.210068369665649e-06, - "loss": 0.1875, + "epoch": 0.68, + "grad_norm": 0.2479293816000717, + "learning_rate": 4.994846582472046e-06, + "loss": 0.1564, "step": 14734 }, { - "epoch": 0.85, - "grad_norm": 0.31943117545798166, - "learning_rate": 1.2091811747746484e-06, - "loss": 0.2578, + "epoch": 0.68, + "grad_norm": 0.29246346909524173, + "learning_rate": 4.993558505041935e-06, + "loss": 0.2555, "step": 14735 }, { - "epoch": 0.85, - "grad_norm": 0.3502609198287449, - "learning_rate": 1.208294284309327e-06, - "loss": 0.2722, + "epoch": 0.68, + "grad_norm": 0.8763628083509255, + "learning_rate": 4.99227053844962e-06, + "loss": 0.4179, "step": 14736 }, { - "epoch": 0.85, - "grad_norm": 1.4569221299642199, - "learning_rate": 1.2074076983003956e-06, - "loss": 0.6047, + "epoch": 0.68, + "grad_norm": 0.324604122021244, + "learning_rate": 4.990982682723603e-06, + "loss": 0.1932, "step": 14737 }, { - "epoch": 0.85, - "grad_norm": 0.40102742209080916, - "learning_rate": 1.2065214167785554e-06, - "loss": 0.2702, + "epoch": 0.68, + "grad_norm": 1.1132564856622544, + "learning_rate": 4.989694937892406e-06, + "loss": 0.6439, "step": 14738 }, { - "epoch": 0.85, - "grad_norm": 0.29010025220729196, - "learning_rate": 1.2056354397745029e-06, - "loss": 0.2478, + "epoch": 0.68, + "grad_norm": 0.34537545838440503, + "learning_rate": 4.98840730398453e-06, + "loss": 0.2725, "step": 14739 }, { - "epoch": 0.85, - "grad_norm": 0.3096437615449813, - "learning_rate": 1.2047497673189169e-06, - "loss": 0.1753, + "epoch": 0.68, + "grad_norm": 0.3246256539644725, + "learning_rate": 4.987119781028486e-06, + "loss": 0.1796, "step": 14740 }, { - "epoch": 0.85, - "grad_norm": 0.3867388418154201, - "learning_rate": 1.2038643994424682e-06, - "loss": 0.2674, + "epoch": 0.68, + "grad_norm": 0.7880081552717293, + "learning_rate": 4.985832369052778e-06, + "loss": 0.3835, "step": 14741 }, { - "epoch": 0.85, - "grad_norm": 0.4669314450298384, - "learning_rate": 1.2029793361758146e-06, - "loss": 0.2701, + "epoch": 0.68, + "grad_norm": 0.3231283272602558, + "learning_rate": 4.9845450680859144e-06, + "loss": 0.1962, "step": 14742 }, { - "epoch": 0.85, - "grad_norm": 0.36142721745288964, - "learning_rate": 1.2020945775496107e-06, - "loss": 0.2782, + "epoch": 0.68, + "grad_norm": 0.3648686017270063, + "learning_rate": 4.983257878156388e-06, + "loss": 0.2632, "step": 14743 }, { - "epoch": 0.85, - "grad_norm": 0.2954197524558824, - "learning_rate": 1.201210123594494e-06, - "loss": 0.2351, + "epoch": 0.68, + "grad_norm": 0.35358932851013675, + "learning_rate": 4.981970799292689e-06, + "loss": 0.2534, "step": 14744 }, { - "epoch": 0.85, - "grad_norm": 0.6366188960797053, - "learning_rate": 1.200325974341091e-06, - "loss": 0.3014, + "epoch": 0.68, + "grad_norm": 1.609169148585498, + "learning_rate": 4.980683831523328e-06, + "loss": 0.8187, "step": 14745 }, { - "epoch": 0.85, - "grad_norm": 0.23679746876696145, - "learning_rate": 1.199442129820022e-06, - "loss": 0.1938, + "epoch": 0.68, + "grad_norm": 0.3600257578682066, + "learning_rate": 4.979396974876785e-06, + "loss": 0.2539, "step": 14746 }, { - "epoch": 0.85, - "grad_norm": 0.3110930947191409, - "learning_rate": 1.1985585900618912e-06, - "loss": 0.2664, + "epoch": 0.68, + "grad_norm": 0.3667970522553824, + "learning_rate": 4.978110229381558e-06, + "loss": 0.2607, "step": 14747 }, { - "epoch": 0.85, - "grad_norm": 0.5009217757277309, - "learning_rate": 1.1976753550972998e-06, - "loss": 0.3774, + "epoch": 0.68, + "grad_norm": 0.3007026059768069, + "learning_rate": 4.976823595066128e-06, + "loss": 0.16, "step": 14748 }, { - "epoch": 0.85, - "grad_norm": 0.5908704134342048, - "learning_rate": 1.196792424956833e-06, - "loss": 0.2874, + "epoch": 0.68, + "grad_norm": 0.371996171774975, + "learning_rate": 4.9755370719589814e-06, + "loss": 0.2752, "step": 14749 }, { - "epoch": 0.85, - "grad_norm": 0.527797718182077, - "learning_rate": 1.1959097996710656e-06, - "loss": 0.2624, + "epoch": 0.68, + "grad_norm": 0.9378208831618536, + "learning_rate": 4.974250660088604e-06, + "loss": 0.5397, "step": 14750 }, { - "epoch": 0.85, - "grad_norm": 0.25644427630796324, - "learning_rate": 1.1950274792705618e-06, - "loss": 0.2696, + "epoch": 0.68, + "grad_norm": 0.3913092527622445, + "learning_rate": 4.972964359483471e-06, + "loss": 0.3095, "step": 14751 }, { - "epoch": 0.85, - "grad_norm": 0.16068604115031754, - "learning_rate": 1.1941454637858784e-06, - "loss": 0.0712, + "epoch": 0.68, + "grad_norm": 0.33213532740982116, + "learning_rate": 4.971678170172064e-06, + "loss": 0.2577, "step": 14752 }, { - "epoch": 0.85, - "grad_norm": 0.5669102028958434, - "learning_rate": 1.19326375324756e-06, - "loss": 0.3149, + "epoch": 0.68, + "grad_norm": 0.7461866376866146, + "learning_rate": 4.970392092182853e-06, + "loss": 0.2965, "step": 14753 }, { - "epoch": 0.85, - "grad_norm": 0.37777598291555, - "learning_rate": 1.1923823476861395e-06, - "loss": 0.3254, + "epoch": 0.68, + "grad_norm": 0.2782441612667592, + "learning_rate": 4.969106125544314e-06, + "loss": 0.1891, "step": 14754 }, { - "epoch": 0.85, - "grad_norm": 0.3227218948916543, - "learning_rate": 1.1915012471321385e-06, - "loss": 0.2587, + "epoch": 0.68, + "grad_norm": 0.3940320020759056, + "learning_rate": 4.96782027028492e-06, + "loss": 0.2659, "step": 14755 }, { - "epoch": 0.85, - "grad_norm": 0.3981084157797156, - "learning_rate": 1.1906204516160713e-06, - "loss": 0.3059, + "epoch": 0.68, + "grad_norm": 0.4319469675563594, + "learning_rate": 4.966534526433131e-06, + "loss": 0.327, "step": 14756 }, { - "epoch": 0.85, - "grad_norm": 0.5493564974138063, - "learning_rate": 1.189739961168439e-06, - "loss": 0.3309, + "epoch": 0.68, + "grad_norm": 0.8228022061655021, + "learning_rate": 4.965248894017417e-06, + "loss": 0.3224, "step": 14757 }, { - "epoch": 0.85, - "grad_norm": 0.31382816631129834, - "learning_rate": 1.1888597758197319e-06, - "loss": 0.1758, + "epoch": 0.68, + "grad_norm": 0.42108401369986137, + "learning_rate": 4.963963373066238e-06, + "loss": 0.2634, "step": 14758 }, { - "epoch": 0.85, - "grad_norm": 0.25289083122443995, - "learning_rate": 1.1879798956004307e-06, - "loss": 0.2251, + "epoch": 0.68, + "grad_norm": 0.31354728513524527, + "learning_rate": 4.962677963608063e-06, + "loss": 0.2975, "step": 14759 }, { - "epoch": 0.85, - "grad_norm": 0.5491193092701862, - "learning_rate": 1.1871003205410092e-06, - "loss": 0.4062, + "epoch": 0.68, + "grad_norm": 0.19205525749604488, + "learning_rate": 4.961392665671336e-06, + "loss": 0.0718, "step": 14760 }, { - "epoch": 0.85, - "grad_norm": 0.6820129227771387, - "learning_rate": 1.186221050671924e-06, - "loss": 0.3746, + "epoch": 0.68, + "grad_norm": 0.42309891347674833, + "learning_rate": 4.960107479284522e-06, + "loss": 0.2625, "step": 14761 }, { - "epoch": 0.85, - "grad_norm": 0.358774579788031, - "learning_rate": 1.1853420860236253e-06, - "loss": 0.1911, + "epoch": 0.68, + "grad_norm": 0.5705137145477874, + "learning_rate": 4.9588224044760726e-06, + "loss": 0.3966, "step": 14762 }, { - "epoch": 0.85, - "grad_norm": 0.30419959453244505, - "learning_rate": 1.1844634266265487e-06, - "loss": 0.266, + "epoch": 0.68, + "grad_norm": 0.3767112869049141, + "learning_rate": 4.957537441274433e-06, + "loss": 0.2495, "step": 14763 }, { - "epoch": 0.85, - "grad_norm": 0.4638461851125854, - "learning_rate": 1.1835850725111264e-06, - "loss": 0.2343, + "epoch": 0.68, + "grad_norm": 0.4245357138414307, + "learning_rate": 4.956252589708058e-06, + "loss": 0.2831, "step": 14764 }, { - "epoch": 0.85, - "grad_norm": 0.22267885583250036, - "learning_rate": 1.1827070237077743e-06, - "loss": 0.1363, + "epoch": 0.68, + "grad_norm": 0.5629506674671926, + "learning_rate": 4.954967849805387e-06, + "loss": 0.3344, "step": 14765 }, { - "epoch": 0.85, - "grad_norm": 0.5251370761115037, - "learning_rate": 1.1818292802468989e-06, - "loss": 0.3503, + "epoch": 0.68, + "grad_norm": 0.3297783692087681, + "learning_rate": 4.953683221594864e-06, + "loss": 0.1929, "step": 14766 }, { - "epoch": 0.85, - "grad_norm": 0.4038183648154914, - "learning_rate": 1.1809518421588939e-06, - "loss": 0.2937, + "epoch": 0.68, + "grad_norm": 0.27406628703147673, + "learning_rate": 4.952398705104935e-06, + "loss": 0.2161, "step": 14767 }, { - "epoch": 0.85, - "grad_norm": 0.5477079411295382, - "learning_rate": 1.1800747094741493e-06, - "loss": 0.1356, + "epoch": 0.68, + "grad_norm": 1.3134434944668747, + "learning_rate": 4.951114300364031e-06, + "loss": 0.7407, "step": 14768 }, { - "epoch": 0.85, - "grad_norm": 0.42781575837813424, - "learning_rate": 1.1791978822230388e-06, - "loss": 0.2874, + "epoch": 0.68, + "grad_norm": 0.7235504784309669, + "learning_rate": 4.949830007400592e-06, + "loss": 0.3866, "step": 14769 }, { - "epoch": 0.85, - "grad_norm": 0.38030178099469747, - "learning_rate": 1.1783213604359268e-06, - "loss": 0.2965, + "epoch": 0.68, + "grad_norm": 0.3545217274076139, + "learning_rate": 4.948545826243043e-06, + "loss": 0.2093, "step": 14770 }, { - "epoch": 0.85, - "grad_norm": 0.19456608161814554, - "learning_rate": 1.1774451441431655e-06, - "loss": 0.0866, + "epoch": 0.68, + "grad_norm": 0.4051280774489883, + "learning_rate": 4.947261756919828e-06, + "loss": 0.3157, "step": 14771 }, { - "epoch": 0.85, - "grad_norm": 0.39569769749296096, - "learning_rate": 1.1765692333750977e-06, - "loss": 0.2675, + "epoch": 0.68, + "grad_norm": 0.486078796046991, + "learning_rate": 4.945977799459367e-06, + "loss": 0.295, "step": 14772 }, { - "epoch": 0.85, - "grad_norm": 0.6547391841835668, - "learning_rate": 1.17569362816206e-06, - "loss": 0.3861, + "epoch": 0.68, + "grad_norm": 0.36007567221146375, + "learning_rate": 4.944693953890084e-06, + "loss": 0.1487, "step": 14773 }, { - "epoch": 0.85, - "grad_norm": 0.2570354731252588, - "learning_rate": 1.174818328534373e-06, - "loss": 0.1988, + "epoch": 0.68, + "grad_norm": 0.4264932158539147, + "learning_rate": 4.943410220240403e-06, + "loss": 0.3078, "step": 14774 }, { - "epoch": 0.85, - "grad_norm": 0.27903556542289093, - "learning_rate": 1.1739433345223482e-06, - "loss": 0.2137, + "epoch": 0.68, + "grad_norm": 0.37988605208599335, + "learning_rate": 4.9421265985387475e-06, + "loss": 0.3093, "step": 14775 }, { - "epoch": 0.85, - "grad_norm": 1.4667329346849802, - "learning_rate": 1.1730686461562835e-06, - "loss": 0.4638, + "epoch": 0.68, + "grad_norm": 0.282216130598595, + "learning_rate": 4.940843088813537e-06, + "loss": 0.0892, "step": 14776 }, { - "epoch": 0.85, - "grad_norm": 0.5838213979491272, - "learning_rate": 1.172194263466474e-06, - "loss": 0.3362, + "epoch": 0.68, + "grad_norm": 0.44070308990398893, + "learning_rate": 4.939559691093182e-06, + "loss": 0.3004, "step": 14777 }, { - "epoch": 0.85, - "grad_norm": 0.3959474359561361, - "learning_rate": 1.1713201864831968e-06, - "loss": 0.2403, + "epoch": 0.68, + "grad_norm": 0.3346604513323777, + "learning_rate": 4.938276405406097e-06, + "loss": 0.2377, "step": 14778 }, { - "epoch": 0.85, - "grad_norm": 0.33580085060755466, - "learning_rate": 1.1704464152367234e-06, - "loss": 0.2984, + "epoch": 0.68, + "grad_norm": 0.38687967863886735, + "learning_rate": 4.936993231780698e-06, + "loss": 0.2854, "step": 14779 }, { - "epoch": 0.85, - "grad_norm": 0.22515204910858588, - "learning_rate": 1.1695729497573082e-06, - "loss": 0.156, + "epoch": 0.68, + "grad_norm": 0.44802560733967606, + "learning_rate": 4.935710170245385e-06, + "loss": 0.2614, "step": 14780 }, { - "epoch": 0.85, - "grad_norm": 0.41575650449527546, - "learning_rate": 1.168699790075204e-06, - "loss": 0.2199, + "epoch": 0.68, + "grad_norm": 0.811714574731708, + "learning_rate": 4.934427220828572e-06, + "loss": 0.4254, "step": 14781 }, { - "epoch": 0.85, - "grad_norm": 0.5596586766328981, - "learning_rate": 1.1678269362206463e-06, - "loss": 0.3317, + "epoch": 0.68, + "grad_norm": 0.3057507535382125, + "learning_rate": 4.933144383558654e-06, + "loss": 0.1704, "step": 14782 }, { - "epoch": 0.85, - "grad_norm": 0.3552412822573221, - "learning_rate": 1.166954388223862e-06, - "loss": 0.25, + "epoch": 0.68, + "grad_norm": 0.2972984397850378, + "learning_rate": 4.931861658464036e-06, + "loss": 0.2425, "step": 14783 }, { - "epoch": 0.85, - "grad_norm": 0.36014971889060016, - "learning_rate": 1.1660821461150673e-06, - "loss": 0.2429, + "epoch": 0.68, + "grad_norm": 1.2925551863916782, + "learning_rate": 4.930579045573119e-06, + "loss": 0.4941, "step": 14784 }, { - "epoch": 0.85, - "grad_norm": 0.42642570719810025, - "learning_rate": 1.1652102099244667e-06, - "loss": 0.2757, + "epoch": 0.68, + "grad_norm": 0.6386053945303372, + "learning_rate": 4.929296544914294e-06, + "loss": 0.4159, "step": 14785 }, { - "epoch": 0.85, - "grad_norm": 0.2994411664078834, - "learning_rate": 1.1643385796822582e-06, - "loss": 0.2318, + "epoch": 0.68, + "grad_norm": 0.3877866745891255, + "learning_rate": 4.928014156515959e-06, + "loss": 0.2496, "step": 14786 }, { - "epoch": 0.85, - "grad_norm": 0.29557420750535496, - "learning_rate": 1.1634672554186243e-06, - "loss": 0.259, + "epoch": 0.68, + "grad_norm": 0.3791658564329558, + "learning_rate": 4.926731880406495e-06, + "loss": 0.3245, "step": 14787 }, { - "epoch": 0.85, - "grad_norm": 1.675274623281233, - "learning_rate": 1.16259623716374e-06, - "loss": 0.2517, + "epoch": 0.68, + "grad_norm": 0.25743754271604913, + "learning_rate": 4.9254497166143045e-06, + "loss": 0.1917, "step": 14788 }, { - "epoch": 0.85, - "grad_norm": 0.6074414344124812, - "learning_rate": 1.1617255249477677e-06, - "loss": 0.316, + "epoch": 0.68, + "grad_norm": 0.5827414843842658, + "learning_rate": 4.924167665167763e-06, + "loss": 0.2414, "step": 14789 }, { - "epoch": 0.85, - "grad_norm": 0.360726252157882, - "learning_rate": 1.16085511880086e-06, - "loss": 0.2795, + "epoch": 0.68, + "grad_norm": 0.3752255777364377, + "learning_rate": 4.92288572609526e-06, + "loss": 0.2849, "step": 14790 }, { - "epoch": 0.85, - "grad_norm": 0.32731888538386056, - "learning_rate": 1.1599850187531603e-06, - "loss": 0.2495, + "epoch": 0.68, + "grad_norm": 0.9150434762498326, + "learning_rate": 4.921603899425171e-06, + "loss": 0.5469, "step": 14791 }, { - "epoch": 0.85, - "grad_norm": 0.2693011419688268, - "learning_rate": 1.1591152248347959e-06, - "loss": 0.159, + "epoch": 0.68, + "grad_norm": 0.35054748539220487, + "learning_rate": 4.920322185185876e-06, + "loss": 0.2864, "step": 14792 }, { - "epoch": 0.85, - "grad_norm": 0.3694108954988957, - "learning_rate": 1.1582457370758948e-06, - "loss": 0.2588, + "epoch": 0.68, + "grad_norm": 0.27560621302988253, + "learning_rate": 4.919040583405758e-06, + "loss": 0.1714, "step": 14793 }, { - "epoch": 0.85, - "grad_norm": 0.42252599397338797, - "learning_rate": 1.157376555506562e-06, - "loss": 0.2219, + "epoch": 0.68, + "grad_norm": 0.4915111225551854, + "learning_rate": 4.917759094113178e-06, + "loss": 0.3247, "step": 14794 }, { - "epoch": 0.85, - "grad_norm": 0.3670240559105416, - "learning_rate": 1.1565076801568997e-06, - "loss": 0.2801, + "epoch": 0.68, + "grad_norm": 0.313851803134407, + "learning_rate": 4.916477717336514e-06, + "loss": 0.2716, "step": 14795 }, { - "epoch": 0.85, - "grad_norm": 0.4043612092318145, - "learning_rate": 1.1556391110569965e-06, - "loss": 0.2843, + "epoch": 0.68, + "grad_norm": 1.3000684555391218, + "learning_rate": 4.915196453104138e-06, + "loss": 0.2903, "step": 14796 }, { - "epoch": 0.85, - "grad_norm": 1.1551526431797676, - "learning_rate": 1.1547708482369279e-06, - "loss": 0.719, + "epoch": 0.68, + "grad_norm": 0.5951372160835796, + "learning_rate": 4.9139153014444085e-06, + "loss": 0.3209, "step": 14797 }, { - "epoch": 0.85, - "grad_norm": 0.22288561492500292, - "learning_rate": 1.1539028917267668e-06, - "loss": 0.1688, + "epoch": 0.68, + "grad_norm": 0.38133120865768527, + "learning_rate": 4.912634262385695e-06, + "loss": 0.3019, "step": 14798 }, { - "epoch": 0.85, - "grad_norm": 0.2946033880771682, - "learning_rate": 1.1530352415565683e-06, - "loss": 0.2451, + "epoch": 0.68, + "grad_norm": 0.3366729958144585, + "learning_rate": 4.911353335956353e-06, + "loss": 0.2512, "step": 14799 }, { - "epoch": 0.85, - "grad_norm": 0.9260136414904041, - "learning_rate": 1.152167897756379e-06, - "loss": 0.3756, + "epoch": 0.68, + "grad_norm": 0.2388175246421776, + "learning_rate": 4.910072522184742e-06, + "loss": 0.1333, "step": 14800 }, { - "epoch": 0.85, - "grad_norm": 0.46156669228028563, - "learning_rate": 1.1513008603562327e-06, - "loss": 0.2187, + "epoch": 0.68, + "grad_norm": 0.4395797975860606, + "learning_rate": 4.908791821099225e-06, + "loss": 0.2837, "step": 14801 }, { - "epoch": 0.85, - "grad_norm": 0.4098473307632537, - "learning_rate": 1.1504341293861588e-06, - "loss": 0.3288, + "epoch": 0.68, + "grad_norm": 0.484312930410866, + "learning_rate": 4.907511232728145e-06, + "loss": 0.2435, "step": 14802 }, { - "epoch": 0.85, - "grad_norm": 0.3391465615133372, - "learning_rate": 1.149567704876171e-06, - "loss": 0.3124, + "epoch": 0.68, + "grad_norm": 0.4307057355292276, + "learning_rate": 4.906230757099862e-06, + "loss": 0.2746, "step": 14803 }, { - "epoch": 0.85, - "grad_norm": 0.24197185666517396, - "learning_rate": 1.1487015868562723e-06, - "loss": 0.0848, + "epoch": 0.68, + "grad_norm": 0.44561506344904483, + "learning_rate": 4.904950394242715e-06, + "loss": 0.3237, "step": 14804 }, { - "epoch": 0.85, - "grad_norm": 0.30112775030509276, - "learning_rate": 1.147835775356455e-06, - "loss": 0.2428, + "epoch": 0.68, + "grad_norm": 0.9181918157771111, + "learning_rate": 4.903670144185061e-06, + "loss": 0.5401, "step": 14805 }, { - "epoch": 0.85, - "grad_norm": 0.31423519705737857, - "learning_rate": 1.1469702704067064e-06, - "loss": 0.3014, + "epoch": 0.68, + "grad_norm": 0.2504408705271983, + "learning_rate": 4.902390006955236e-06, + "loss": 0.1883, "step": 14806 }, { - "epoch": 0.85, - "grad_norm": 0.7919262173209054, - "learning_rate": 1.146105072036997e-06, - "loss": 0.2429, + "epoch": 0.68, + "grad_norm": 0.3065093310082717, + "learning_rate": 4.901109982581586e-06, + "loss": 0.2268, "step": 14807 }, { - "epoch": 0.85, - "grad_norm": 0.35309423242059595, - "learning_rate": 1.1452401802772884e-06, - "loss": 0.2779, + "epoch": 0.68, + "grad_norm": 0.952536937288202, + "learning_rate": 4.899830071092442e-06, + "loss": 0.4012, "step": 14808 }, { - "epoch": 0.85, - "grad_norm": 1.1765512888066414, - "learning_rate": 1.144375595157532e-06, - "loss": 0.7295, + "epoch": 0.68, + "grad_norm": 0.6006567098501562, + "learning_rate": 4.898550272516145e-06, + "loss": 0.2689, "step": 14809 }, { - "epoch": 0.85, - "grad_norm": 0.26690693768113766, - "learning_rate": 1.143511316707665e-06, - "loss": 0.2349, + "epoch": 0.68, + "grad_norm": 0.3619993931342857, + "learning_rate": 4.897270586881032e-06, + "loss": 0.2794, "step": 14810 }, { - "epoch": 0.85, - "grad_norm": 0.22741761840826571, - "learning_rate": 1.1426473449576225e-06, - "loss": 0.1552, + "epoch": 0.68, + "grad_norm": 0.37986003463477924, + "learning_rate": 4.895991014215427e-06, + "loss": 0.314, "step": 14811 }, { - "epoch": 0.85, - "grad_norm": 0.8487452061726126, - "learning_rate": 1.1417836799373205e-06, - "loss": 0.3599, + "epoch": 0.68, + "grad_norm": 0.20093415540134557, + "learning_rate": 4.894711554547665e-06, + "loss": 0.0832, "step": 14812 }, { - "epoch": 0.85, - "grad_norm": 0.7051579592509625, - "learning_rate": 1.1409203216766706e-06, - "loss": 0.4184, + "epoch": 0.68, + "grad_norm": 0.38006758433398263, + "learning_rate": 4.8934322079060644e-06, + "loss": 0.2585, "step": 14813 }, { - "epoch": 0.85, - "grad_norm": 0.24162397410920866, - "learning_rate": 1.1400572702055657e-06, - "loss": 0.1935, + "epoch": 0.68, + "grad_norm": 0.37298740089267546, + "learning_rate": 4.892152974318955e-06, + "loss": 0.3039, "step": 14814 }, { - "epoch": 0.85, - "grad_norm": 0.45583372486177876, - "learning_rate": 1.1391945255538994e-06, - "loss": 0.3813, + "epoch": 0.68, + "grad_norm": 0.6765187389293709, + "learning_rate": 4.890873853814657e-06, + "loss": 0.2586, "step": 14815 }, { - "epoch": 0.85, - "grad_norm": 0.4161358729060462, - "learning_rate": 1.1383320877515446e-06, - "loss": 0.2391, + "epoch": 0.68, + "grad_norm": 0.3861097933049821, + "learning_rate": 4.889594846421485e-06, + "loss": 0.2847, "step": 14816 }, { - "epoch": 0.85, - "grad_norm": 0.26035907038958234, - "learning_rate": 1.1374699568283698e-06, - "loss": 0.1717, + "epoch": 0.68, + "grad_norm": 1.3699898547801885, + "learning_rate": 4.888315952167757e-06, + "loss": 0.682, "step": 14817 }, { - "epoch": 0.85, - "grad_norm": 0.32519859938343304, - "learning_rate": 1.1366081328142264e-06, - "loss": 0.2887, + "epoch": 0.68, + "grad_norm": 0.30197485055558476, + "learning_rate": 4.887037171081792e-06, + "loss": 0.2478, "step": 14818 }, { - "epoch": 0.85, - "grad_norm": 0.6847648618208944, - "learning_rate": 1.135746615738965e-06, - "loss": 0.3494, + "epoch": 0.68, + "grad_norm": 0.2999222988890184, + "learning_rate": 4.885758503191892e-06, + "loss": 0.1713, "step": 14819 }, { - "epoch": 0.85, - "grad_norm": 0.32858251668966476, - "learning_rate": 1.1348854056324166e-06, - "loss": 0.212, + "epoch": 0.68, + "grad_norm": 0.8210052347031515, + "learning_rate": 4.884479948526373e-06, + "loss": 0.4205, "step": 14820 }, { - "epoch": 0.85, - "grad_norm": 1.1507405287489314, - "learning_rate": 1.1340245025244045e-06, - "loss": 0.7616, + "epoch": 0.68, + "grad_norm": 0.6380770395725553, + "learning_rate": 4.88320150711353e-06, + "loss": 0.3764, "step": 14821 }, { - "epoch": 0.85, - "grad_norm": 0.36743606071499174, - "learning_rate": 1.133163906444742e-06, - "loss": 0.3013, + "epoch": 0.68, + "grad_norm": 0.3453280824752142, + "learning_rate": 4.881923178981681e-06, + "loss": 0.2129, "step": 14822 }, { - "epoch": 0.85, - "grad_norm": 0.3275912685999875, - "learning_rate": 1.132303617423236e-06, - "loss": 0.2387, + "epoch": 0.68, + "grad_norm": 0.36569430452340534, + "learning_rate": 4.880644964159117e-06, + "loss": 0.2874, "step": 14823 }, { - "epoch": 0.85, - "grad_norm": 0.34411631210529814, - "learning_rate": 1.131443635489672e-06, - "loss": 0.1776, + "epoch": 0.68, + "grad_norm": 0.2949922072245876, + "learning_rate": 4.879366862674143e-06, + "loss": 0.1635, "step": 14824 }, { - "epoch": 0.85, - "grad_norm": 1.4080656699672398, - "learning_rate": 1.1305839606738334e-06, - "loss": 0.717, + "epoch": 0.68, + "grad_norm": 0.35838146793826736, + "learning_rate": 4.878088874555047e-06, + "loss": 0.1919, "step": 14825 }, { - "epoch": 0.85, - "grad_norm": 0.25755544714127737, - "learning_rate": 1.129724593005489e-06, - "loss": 0.2429, + "epoch": 0.68, + "grad_norm": 0.37686059616951706, + "learning_rate": 4.876810999830127e-06, + "loss": 0.3216, "step": 14826 }, { - "epoch": 0.85, - "grad_norm": 0.4605922701134536, - "learning_rate": 1.1288655325144027e-06, - "loss": 0.3024, + "epoch": 0.68, + "grad_norm": 0.8733749706309435, + "learning_rate": 4.875533238527678e-06, + "loss": 0.4339, "step": 14827 }, { - "epoch": 0.85, - "grad_norm": 0.8821629142995729, - "learning_rate": 1.1280067792303218e-06, - "loss": 0.4034, + "epoch": 0.68, + "grad_norm": 0.3195899134755221, + "learning_rate": 4.874255590675981e-06, + "loss": 0.194, "step": 14828 }, { - "epoch": 0.85, - "grad_norm": 0.3203927459270146, - "learning_rate": 1.1271483331829835e-06, - "loss": 0.2634, + "epoch": 0.68, + "grad_norm": 1.2292364445862323, + "learning_rate": 4.8729780563033265e-06, + "loss": 0.6327, "step": 14829 }, { - "epoch": 0.85, - "grad_norm": 0.33166642821629216, - "learning_rate": 1.1262901944021165e-06, - "loss": 0.245, + "epoch": 0.68, + "grad_norm": 0.3806679666893542, + "learning_rate": 4.871700635437993e-06, + "loss": 0.3219, "step": 14830 }, { - "epoch": 0.85, - "grad_norm": 0.3227943214683181, - "learning_rate": 1.125432362917439e-06, - "loss": 0.1672, + "epoch": 0.68, + "grad_norm": 0.24660927941562005, + "learning_rate": 4.870423328108266e-06, + "loss": 0.1969, "step": 14831 }, { - "epoch": 0.85, - "grad_norm": 0.31125204319825606, - "learning_rate": 1.1245748387586575e-06, - "loss": 0.2647, + "epoch": 0.68, + "grad_norm": 0.6236478965550891, + "learning_rate": 4.869146134342426e-06, + "loss": 0.2195, "step": 14832 }, { - "epoch": 0.85, - "grad_norm": 0.8780581738774083, - "learning_rate": 1.123717621955468e-06, - "loss": 0.364, + "epoch": 0.68, + "grad_norm": 1.4368335514725643, + "learning_rate": 4.867869054168741e-06, + "loss": 0.7737, "step": 14833 }, { - "epoch": 0.85, - "grad_norm": 0.3650475322050246, - "learning_rate": 1.1228607125375534e-06, - "loss": 0.2866, + "epoch": 0.68, + "grad_norm": 0.2739820229668408, + "learning_rate": 4.86659208761549e-06, + "loss": 0.2423, "step": 14834 }, { - "epoch": 0.85, - "grad_norm": 0.40110708736483236, - "learning_rate": 1.1220041105345935e-06, - "loss": 0.2838, + "epoch": 0.68, + "grad_norm": 0.47790310974207123, + "learning_rate": 4.865315234710941e-06, + "loss": 0.252, "step": 14835 }, { - "epoch": 0.85, - "grad_norm": 0.560535169035649, - "learning_rate": 1.121147815976248e-06, - "loss": 0.3457, + "epoch": 0.68, + "grad_norm": 0.8719823736096346, + "learning_rate": 4.864038495483369e-06, + "loss": 0.447, "step": 14836 }, { - "epoch": 0.85, - "grad_norm": 0.20911265248523273, - "learning_rate": 1.1202918288921727e-06, - "loss": 0.143, + "epoch": 0.68, + "grad_norm": 0.3640277083624418, + "learning_rate": 4.862761869961033e-06, + "loss": 0.2899, "step": 14837 }, { - "epoch": 0.85, - "grad_norm": 0.3534204337682163, - "learning_rate": 1.1194361493120099e-06, - "loss": 0.268, + "epoch": 0.68, + "grad_norm": 0.34044573855380095, + "learning_rate": 4.861485358172192e-06, + "loss": 0.2535, "step": 14838 }, { - "epoch": 0.85, - "grad_norm": 0.47013886187275117, - "learning_rate": 1.118580777265388e-06, - "loss": 0.3605, + "epoch": 0.68, + "grad_norm": 0.3195814331668908, + "learning_rate": 4.8602089601451196e-06, + "loss": 0.1837, "step": 14839 }, { - "epoch": 0.85, - "grad_norm": 0.7626126324582352, - "learning_rate": 1.1177257127819353e-06, - "loss": 0.3003, + "epoch": 0.68, + "grad_norm": 0.38469528548558063, + "learning_rate": 4.858932675908063e-06, + "loss": 0.2612, "step": 14840 }, { - "epoch": 0.85, - "grad_norm": 0.31851659287056955, - "learning_rate": 1.1168709558912583e-06, - "loss": 0.2653, + "epoch": 0.68, + "grad_norm": 1.2369231663264248, + "learning_rate": 4.857656505489285e-06, + "loss": 0.3772, "step": 14841 }, { - "epoch": 0.85, - "grad_norm": 0.3898838568972243, - "learning_rate": 1.116016506622959e-06, - "loss": 0.2784, + "epoch": 0.68, + "grad_norm": 0.34146249157645714, + "learning_rate": 4.856380448917033e-06, + "loss": 0.2846, "step": 14842 }, { - "epoch": 0.85, - "grad_norm": 0.15368938464097603, - "learning_rate": 1.1151623650066224e-06, - "loss": 0.0877, + "epoch": 0.68, + "grad_norm": 0.38473913295869905, + "learning_rate": 4.85510450621956e-06, + "loss": 0.2749, "step": 14843 }, { - "epoch": 0.85, - "grad_norm": 0.30142584311757015, - "learning_rate": 1.114308531071835e-06, - "loss": 0.238, + "epoch": 0.68, + "grad_norm": 0.44097985476407164, + "learning_rate": 4.853828677425119e-06, + "loss": 0.2754, "step": 14844 }, { - "epoch": 0.85, - "grad_norm": 1.2307746884376518, - "learning_rate": 1.1134550048481596e-06, - "loss": 0.5617, + "epoch": 0.68, + "grad_norm": 0.2980316586063471, + "learning_rate": 4.852552962561946e-06, + "loss": 0.1933, "step": 14845 }, { - "epoch": 0.85, - "grad_norm": 0.3237698295002861, - "learning_rate": 1.1126017863651562e-06, - "loss": 0.2563, + "epoch": 0.68, + "grad_norm": 0.360281600356552, + "learning_rate": 4.8512773616582945e-06, + "loss": 0.2931, "step": 14846 }, { - "epoch": 0.85, - "grad_norm": 0.31133573341743925, - "learning_rate": 1.1117488756523677e-06, - "loss": 0.2625, + "epoch": 0.68, + "grad_norm": 0.5288943911274756, + "learning_rate": 4.850001874742395e-06, + "loss": 0.3938, "step": 14847 }, { - "epoch": 0.85, - "grad_norm": 1.3573196658449307, - "learning_rate": 1.1108962727393368e-06, - "loss": 0.5345, + "epoch": 0.68, + "grad_norm": 0.583180873984833, + "learning_rate": 4.8487265018424905e-06, + "loss": 0.2966, "step": 14848 }, { - "epoch": 0.85, - "grad_norm": 0.27633793321812017, - "learning_rate": 1.110043977655585e-06, - "loss": 0.1962, + "epoch": 0.68, + "grad_norm": 0.33249312490259125, + "learning_rate": 4.84745124298682e-06, + "loss": 0.2731, "step": 14849 }, { - "epoch": 0.85, - "grad_norm": 0.23377373058249695, - "learning_rate": 1.109191990430628e-06, - "loss": 0.2015, + "epoch": 0.68, + "grad_norm": 0.3573421545794827, + "learning_rate": 4.8461760982036096e-06, + "loss": 0.2999, "step": 14850 }, { - "epoch": 0.85, - "grad_norm": 1.2731592859606273, - "learning_rate": 1.1083403110939695e-06, - "loss": 0.7351, + "epoch": 0.68, + "grad_norm": 0.17505808618995522, + "learning_rate": 4.844901067521093e-06, + "loss": 0.0952, "step": 14851 }, { - "epoch": 0.85, - "grad_norm": 0.635556152341861, - "learning_rate": 1.107488939675102e-06, - "loss": 0.374, + "epoch": 0.68, + "grad_norm": 0.3503518280295589, + "learning_rate": 4.843626150967498e-06, + "loss": 0.2542, "step": 14852 }, { - "epoch": 0.85, - "grad_norm": 0.3848628568892182, - "learning_rate": 1.1066378762035125e-06, - "loss": 0.1684, + "epoch": 0.68, + "grad_norm": 1.2647861585366738, + "learning_rate": 4.842351348571054e-06, + "loss": 0.6127, "step": 14853 }, { - "epoch": 0.85, - "grad_norm": 0.3072768771702361, - "learning_rate": 1.1057871207086713e-06, - "loss": 0.2661, + "epoch": 0.68, + "grad_norm": 0.38223365733108594, + "learning_rate": 4.841076660359977e-06, + "loss": 0.2544, "step": 14854 }, { - "epoch": 0.85, - "grad_norm": 0.3946099595301403, - "learning_rate": 1.1049366732200383e-06, - "loss": 0.2413, + "epoch": 0.68, + "grad_norm": 0.3600420098474363, + "learning_rate": 4.839802086362489e-06, + "loss": 0.2505, "step": 14855 }, { - "epoch": 0.85, - "grad_norm": 0.3380880546143608, - "learning_rate": 1.104086533767067e-06, - "loss": 0.1868, + "epoch": 0.68, + "grad_norm": 1.0851056698835129, + "learning_rate": 4.838527626606815e-06, + "loss": 0.5067, "step": 14856 }, { - "epoch": 0.85, - "grad_norm": 0.4442492965914318, - "learning_rate": 1.1032367023791957e-06, - "loss": 0.3175, + "epoch": 0.68, + "grad_norm": 0.2866995802392194, + "learning_rate": 4.837253281121159e-06, + "loss": 0.2056, "step": 14857 }, { - "epoch": 0.85, - "grad_norm": 0.2528655784441553, - "learning_rate": 1.1023871790858553e-06, - "loss": 0.2477, + "epoch": 0.68, + "grad_norm": 0.26806860331099647, + "learning_rate": 4.835979049933744e-06, + "loss": 0.202, "step": 14858 }, { - "epoch": 0.85, - "grad_norm": 0.28372712548736984, - "learning_rate": 1.1015379639164625e-06, - "loss": 0.1826, + "epoch": 0.68, + "grad_norm": 0.478370830738979, + "learning_rate": 4.8347049330727725e-06, + "loss": 0.3033, "step": 14859 }, { - "epoch": 0.85, - "grad_norm": 1.1834437417520254, - "learning_rate": 1.100689056900429e-06, - "loss": 0.5269, + "epoch": 0.68, + "grad_norm": 0.6616205420281361, + "learning_rate": 4.833430930566455e-06, + "loss": 0.3799, "step": 14860 }, { - "epoch": 0.85, - "grad_norm": 0.3866428294105834, - "learning_rate": 1.0998404580671507e-06, - "loss": 0.2489, + "epoch": 0.68, + "grad_norm": 0.442247347880616, + "learning_rate": 4.832157042443001e-06, + "loss": 0.178, "step": 14861 }, { - "epoch": 0.85, - "grad_norm": 0.25646265297625703, - "learning_rate": 1.0989921674460146e-06, - "loss": 0.2444, + "epoch": 0.68, + "grad_norm": 0.3421735448917148, + "learning_rate": 4.830883268730605e-06, + "loss": 0.2952, "step": 14862 }, { - "epoch": 0.85, - "grad_norm": 0.44426050608931333, - "learning_rate": 1.0981441850663976e-06, - "loss": 0.263, + "epoch": 0.68, + "grad_norm": 0.4242861954956544, + "learning_rate": 4.8296096094574765e-06, + "loss": 0.2635, "step": 14863 }, { - "epoch": 0.85, - "grad_norm": 0.43875055594403534, - "learning_rate": 1.0972965109576628e-06, - "loss": 0.25, + "epoch": 0.68, + "grad_norm": 0.3167999253470079, + "learning_rate": 4.828336064651798e-06, + "loss": 0.1956, "step": 14864 }, { - "epoch": 0.85, - "grad_norm": 0.4141839426559247, - "learning_rate": 1.0964491451491677e-06, - "loss": 0.2651, + "epoch": 0.68, + "grad_norm": 0.5494529342169818, + "learning_rate": 4.827062634341785e-06, + "loss": 0.2814, "step": 14865 }, { - "epoch": 0.85, - "grad_norm": 0.2750628065825932, - "learning_rate": 1.0956020876702567e-06, - "loss": 0.2278, + "epoch": 0.68, + "grad_norm": 0.41032306548640013, + "learning_rate": 4.825789318555617e-06, + "loss": 0.3286, "step": 14866 }, { - "epoch": 0.85, - "grad_norm": 1.0146697997317078, - "learning_rate": 1.094755338550263e-06, - "loss": 0.5553, + "epoch": 0.68, + "grad_norm": 0.3130695907495629, + "learning_rate": 4.824516117321484e-06, + "loss": 0.1845, "step": 14867 }, { - "epoch": 0.85, - "grad_norm": 0.3163524879755541, - "learning_rate": 1.0939088978185053e-06, - "loss": 0.2456, + "epoch": 0.68, + "grad_norm": 1.2228732425021007, + "learning_rate": 4.823243030667576e-06, + "loss": 0.5381, "step": 14868 }, { - "epoch": 0.85, - "grad_norm": 0.6128769729330104, - "learning_rate": 1.0930627655043036e-06, - "loss": 0.2857, + "epoch": 0.68, + "grad_norm": 1.249935960744163, + "learning_rate": 4.821970058622077e-06, + "loss": 0.7687, "step": 14869 }, { - "epoch": 0.85, - "grad_norm": 0.23060783739572102, - "learning_rate": 1.0922169416369531e-06, - "loss": 0.2126, + "epoch": 0.68, + "grad_norm": 0.28943789304284595, + "learning_rate": 4.820697201213175e-06, + "loss": 0.2461, "step": 14870 }, { - "epoch": 0.85, - "grad_norm": 0.6672465753746528, - "learning_rate": 1.0913714262457486e-06, - "loss": 0.3115, + "epoch": 0.68, + "grad_norm": 0.36903575578846937, + "learning_rate": 4.819424458469041e-06, + "loss": 0.1866, "step": 14871 }, { - "epoch": 0.85, - "grad_norm": 0.3828226055546082, - "learning_rate": 1.0905262193599665e-06, - "loss": 0.3032, + "epoch": 0.68, + "grad_norm": 0.48552047370513435, + "learning_rate": 4.81815183041786e-06, + "loss": 0.2829, "step": 14872 }, { - "epoch": 0.85, - "grad_norm": 0.30441911790214365, - "learning_rate": 1.0896813210088797e-06, - "loss": 0.2394, + "epoch": 0.68, + "grad_norm": 0.36356304839850956, + "learning_rate": 4.816879317087799e-06, + "loss": 0.2505, "step": 14873 }, { - "epoch": 0.85, - "grad_norm": 0.6542472129796099, - "learning_rate": 1.0888367312217452e-06, - "loss": 0.2881, + "epoch": 0.68, + "grad_norm": 0.36315183619714647, + "learning_rate": 4.815606918507036e-06, + "loss": 0.2321, "step": 14874 }, { - "epoch": 0.85, - "grad_norm": 0.37036987367157137, - "learning_rate": 1.0879924500278116e-06, - "loss": 0.2922, + "epoch": 0.68, + "grad_norm": 1.1036323548085407, + "learning_rate": 4.814334634703741e-06, + "loss": 0.6113, "step": 14875 }, { - "epoch": 0.85, - "grad_norm": 0.2711333428099278, - "learning_rate": 1.087148477456317e-06, - "loss": 0.1101, + "epoch": 0.68, + "grad_norm": 0.42716503914857173, + "learning_rate": 4.813062465706077e-06, + "loss": 0.2941, "step": 14876 }, { - "epoch": 0.85, - "grad_norm": 0.3898315605171779, - "learning_rate": 1.0863048135364851e-06, - "loss": 0.2556, - "step": 14877 + "epoch": 0.68, + "grad_norm": 0.7012278421890177, + "learning_rate": 4.811790411542209e-06, + "loss": 0.2652, + "step": 14877 }, { - "epoch": 0.85, - "grad_norm": 0.32269214206454216, - "learning_rate": 1.0854614582975353e-06, - "loss": 0.2821, + "epoch": 0.68, + "grad_norm": 0.27671139947561396, + "learning_rate": 4.810518472240305e-06, + "loss": 0.213, "step": 14878 }, { - "epoch": 0.85, - "grad_norm": 1.4122935758067834, - "learning_rate": 1.084618411768673e-06, - "loss": 0.3141, + "epoch": 0.68, + "grad_norm": 0.5913561470655007, + "learning_rate": 4.809246647828517e-06, + "loss": 0.2587, "step": 14879 }, { - "epoch": 0.85, - "grad_norm": 0.40451562657458473, - "learning_rate": 1.0837756739790916e-06, - "loss": 0.269, + "epoch": 0.68, + "grad_norm": 0.3984335191402875, + "learning_rate": 4.807974938335009e-06, + "loss": 0.2774, "step": 14880 }, { - "epoch": 0.85, - "grad_norm": 0.36283485462304216, - "learning_rate": 1.0829332449579732e-06, - "loss": 0.322, + "epoch": 0.68, + "grad_norm": 0.34053603385071507, + "learning_rate": 4.806703343787924e-06, + "loss": 0.2616, "step": 14881 }, { - "epoch": 0.86, - "grad_norm": 0.28832612026905335, - "learning_rate": 1.0820911247344944e-06, - "loss": 0.1806, + "epoch": 0.68, + "grad_norm": 0.4098913168836748, + "learning_rate": 4.8054318642154294e-06, + "loss": 0.2442, "step": 14882 }, { - "epoch": 0.86, - "grad_norm": 0.26597397855279037, - "learning_rate": 1.0812493133378166e-06, - "loss": 0.1913, + "epoch": 0.68, + "grad_norm": 0.543330196384016, + "learning_rate": 4.804160499645667e-06, + "loss": 0.3039, "step": 14883 }, { - "epoch": 0.86, - "grad_norm": 0.5791713121666998, - "learning_rate": 1.0804078107970917e-06, - "loss": 0.3546, + "epoch": 0.68, + "grad_norm": 0.24514524321959258, + "learning_rate": 4.8028892501067795e-06, + "loss": 0.099, "step": 14884 }, { - "epoch": 0.86, - "grad_norm": 0.5003590603484814, - "learning_rate": 1.0795666171414597e-06, - "loss": 0.3228, + "epoch": 0.68, + "grad_norm": 0.4497937285775187, + "learning_rate": 4.801618115626915e-06, + "loss": 0.2633, "step": 14885 }, { - "epoch": 0.86, - "grad_norm": 0.2956405417591853, - "learning_rate": 1.0787257324000533e-06, - "loss": 0.2204, + "epoch": 0.68, + "grad_norm": 0.3519854854192787, + "learning_rate": 4.800347096234215e-06, + "loss": 0.2749, "step": 14886 }, { - "epoch": 0.86, - "grad_norm": 0.5080100270148686, - "learning_rate": 1.077885156601991e-06, - "loss": 0.3585, + "epoch": 0.68, + "grad_norm": 1.066305748079328, + "learning_rate": 4.799076191956822e-06, + "loss": 0.4689, "step": 14887 }, { - "epoch": 0.86, - "grad_norm": 0.49071658954089276, - "learning_rate": 1.0770448897763818e-06, - "loss": 0.2817, + "epoch": 0.68, + "grad_norm": 0.4616407808136488, + "learning_rate": 4.797805402822866e-06, + "loss": 0.2836, "step": 14888 }, { - "epoch": 0.86, - "grad_norm": 0.15267670632605457, - "learning_rate": 1.0762049319523248e-06, - "loss": 0.0704, + "epoch": 0.68, + "grad_norm": 0.5130236686582241, + "learning_rate": 4.796534728860489e-06, + "loss": 0.3527, "step": 14889 }, { - "epoch": 0.86, - "grad_norm": 0.2648087586985502, - "learning_rate": 1.075365283158908e-06, - "loss": 0.2688, + "epoch": 0.68, + "grad_norm": 0.2216244169018214, + "learning_rate": 4.795264170097813e-06, + "loss": 0.1634, "step": 14890 }, { - "epoch": 0.86, - "grad_norm": 0.6311471408354534, - "learning_rate": 1.0745259434252065e-06, - "loss": 0.3905, + "epoch": 0.68, + "grad_norm": 0.44659515724180926, + "learning_rate": 4.7939937265629725e-06, + "loss": 0.2635, "step": 14891 }, { - "epoch": 0.86, - "grad_norm": 0.671787505272611, - "learning_rate": 1.0736869127802884e-06, - "loss": 0.1974, + "epoch": 0.68, + "grad_norm": 0.6164796242337653, + "learning_rate": 4.792723398284097e-06, + "loss": 0.3666, "step": 14892 }, { - "epoch": 0.86, - "grad_norm": 0.2957218483145482, - "learning_rate": 1.0728481912532062e-06, - "loss": 0.2763, + "epoch": 0.68, + "grad_norm": 0.4740551068924641, + "learning_rate": 4.791453185289302e-06, + "loss": 0.3352, "step": 14893 }, { - "epoch": 0.86, - "grad_norm": 0.32442168752672784, - "learning_rate": 1.07200977887301e-06, - "loss": 0.2365, + "epoch": 0.68, + "grad_norm": 0.34189852179608027, + "learning_rate": 4.790183087606715e-06, + "loss": 0.221, "step": 14894 }, { - "epoch": 0.86, - "grad_norm": 0.3205645128811862, - "learning_rate": 1.0711716756687307e-06, - "loss": 0.0799, + "epoch": 0.68, + "grad_norm": 0.5038264781010451, + "learning_rate": 4.788913105264455e-06, + "loss": 0.3478, "step": 14895 }, { - "epoch": 0.86, - "grad_norm": 0.40600771972047073, - "learning_rate": 1.070333881669392e-06, - "loss": 0.2881, + "epoch": 0.68, + "grad_norm": 0.3425412868835726, + "learning_rate": 4.787643238290635e-06, + "loss": 0.197, "step": 14896 }, { - "epoch": 0.86, - "grad_norm": 0.4958254382676697, - "learning_rate": 1.0694963969040062e-06, - "loss": 0.3264, + "epoch": 0.68, + "grad_norm": 0.30041639802726205, + "learning_rate": 4.786373486713371e-06, + "loss": 0.0696, "step": 14897 }, { - "epoch": 0.86, - "grad_norm": 0.40568174234009224, - "learning_rate": 1.0686592214015766e-06, - "loss": 0.2968, + "epoch": 0.68, + "grad_norm": 0.28530990357442765, + "learning_rate": 4.7851038505607675e-06, + "loss": 0.2877, "step": 14898 }, { - "epoch": 0.86, - "grad_norm": 0.32870509799899306, - "learning_rate": 1.067822355191095e-06, - "loss": 0.235, + "epoch": 0.68, + "grad_norm": 0.6508530150422616, + "learning_rate": 4.783834329860946e-06, + "loss": 0.3997, "step": 14899 }, { - "epoch": 0.86, - "grad_norm": 1.0628361533545403, - "learning_rate": 1.0669857983015408e-06, - "loss": 0.4543, + "epoch": 0.68, + "grad_norm": 0.4840382972907225, + "learning_rate": 4.782564924642e-06, + "loss": 0.1946, "step": 14900 }, { - "epoch": 0.86, - "grad_norm": 0.20065123792574188, - "learning_rate": 1.0661495507618845e-06, - "loss": 0.1679, + "epoch": 0.68, + "grad_norm": 0.3329086261550118, + "learning_rate": 4.781295634932042e-06, + "loss": 0.2969, "step": 14901 }, { - "epoch": 0.86, - "grad_norm": 0.29083032401850334, - "learning_rate": 1.0653136126010832e-06, - "loss": 0.223, + "epoch": 0.68, + "grad_norm": 0.5356908643287572, + "learning_rate": 4.780026460759171e-06, + "loss": 0.3358, "step": 14902 }, { - "epoch": 0.86, - "grad_norm": 0.5620063770534373, - "learning_rate": 1.06447798384809e-06, - "loss": 0.3486, + "epoch": 0.68, + "grad_norm": 0.2654297276694022, + "learning_rate": 4.7787574021514725e-06, + "loss": 0.1259, "step": 14903 }, { - "epoch": 0.86, - "grad_norm": 0.554067422855577, - "learning_rate": 1.0636426645318387e-06, - "loss": 0.3698, + "epoch": 0.68, + "grad_norm": 0.6186378450816218, + "learning_rate": 4.777488459137062e-06, + "loss": 0.3699, "step": 14904 }, { - "epoch": 0.86, - "grad_norm": 0.28406674047779545, - "learning_rate": 1.0628076546812583e-06, - "loss": 0.2318, + "epoch": 0.68, + "grad_norm": 0.465319163108568, + "learning_rate": 4.77621963174402e-06, + "loss": 0.3016, "step": 14905 }, { - "epoch": 0.86, - "grad_norm": 0.5269874383643349, - "learning_rate": 1.0619729543252622e-06, - "loss": 0.3355, + "epoch": 0.68, + "grad_norm": 0.3815498312607489, + "learning_rate": 4.774950920000444e-06, + "loss": 0.2664, "step": 14906 }, { - "epoch": 0.86, - "grad_norm": 0.24121729656072172, - "learning_rate": 1.0611385634927607e-06, - "loss": 0.1645, + "epoch": 0.68, + "grad_norm": 0.40187978997198653, + "learning_rate": 4.773682323934415e-06, + "loss": 0.2531, "step": 14907 }, { - "epoch": 0.86, - "grad_norm": 0.35797358852460026, - "learning_rate": 1.0603044822126463e-06, - "loss": 0.2266, + "epoch": 0.68, + "grad_norm": 0.5957791936281502, + "learning_rate": 4.7724138435740204e-06, + "loss": 0.2858, "step": 14908 }, { - "epoch": 0.86, - "grad_norm": 0.3378453061765035, - "learning_rate": 1.0594707105138024e-06, - "loss": 0.2897, + "epoch": 0.68, + "grad_norm": 0.3001487981441562, + "learning_rate": 4.771145478947351e-06, + "loss": 0.2146, "step": 14909 }, { - "epoch": 0.86, - "grad_norm": 0.8746212764884232, - "learning_rate": 1.0586372484251018e-06, - "loss": 0.391, + "epoch": 0.68, + "grad_norm": 0.2934836827513517, + "learning_rate": 4.769877230082476e-06, + "loss": 0.2151, "step": 14910 }, { - "epoch": 0.86, - "grad_norm": 0.32721325459226824, - "learning_rate": 1.057804095975411e-06, - "loss": 0.2678, + "epoch": 0.68, + "grad_norm": 0.7318927919111695, + "learning_rate": 4.768609097007478e-06, + "loss": 0.3761, "step": 14911 }, { - "epoch": 0.86, - "grad_norm": 1.5419081597400264, - "learning_rate": 1.0569712531935805e-06, - "loss": 0.277, + "epoch": 0.69, + "grad_norm": 0.6152835401895149, + "learning_rate": 4.767341079750437e-06, + "loss": 0.3981, "step": 14912 }, { - "epoch": 0.86, - "grad_norm": 0.2279118449162286, - "learning_rate": 1.0561387201084494e-06, - "loss": 0.206, + "epoch": 0.69, + "grad_norm": 0.43050859800303304, + "learning_rate": 4.766073178339418e-06, + "loss": 0.2325, "step": 14913 }, { - "epoch": 0.86, - "grad_norm": 0.288549030985663, - "learning_rate": 1.0553064967488514e-06, - "loss": 0.2256, + "epoch": 0.69, + "grad_norm": 0.38559529778896723, + "learning_rate": 4.764805392802497e-06, + "loss": 0.2953, "step": 14914 }, { - "epoch": 0.86, - "grad_norm": 0.6606990633306803, - "learning_rate": 1.054474583143602e-06, - "loss": 0.2891, + "epoch": 0.69, + "grad_norm": 0.2681506144282517, + "learning_rate": 4.763537723167733e-06, + "loss": 0.168, "step": 14915 }, { - "epoch": 0.86, - "grad_norm": 0.6655593582916631, - "learning_rate": 1.0536429793215152e-06, - "loss": 0.399, + "epoch": 0.69, + "grad_norm": 0.44967457284459617, + "learning_rate": 4.762270169463202e-06, + "loss": 0.2329, "step": 14916 }, { - "epoch": 0.86, - "grad_norm": 0.24780553816203488, - "learning_rate": 1.0528116853113867e-06, - "loss": 0.2537, + "epoch": 0.69, + "grad_norm": 0.526003252277324, + "learning_rate": 4.76100273171696e-06, + "loss": 0.2964, "step": 14917 }, { - "epoch": 0.86, - "grad_norm": 1.4479855245872628, - "learning_rate": 1.0519807011420057e-06, - "loss": 0.2714, + "epoch": 0.69, + "grad_norm": 0.8599136580628743, + "learning_rate": 4.759735409957069e-06, + "loss": 0.4567, "step": 14918 }, { - "epoch": 0.86, - "grad_norm": 0.6947298411391986, - "learning_rate": 1.051150026842146e-06, - "loss": 0.3632, + "epoch": 0.69, + "grad_norm": 0.3747516610530139, + "learning_rate": 4.7584682042115834e-06, + "loss": 0.2739, "step": 14919 }, { - "epoch": 0.86, - "grad_norm": 0.3823419776922345, - "learning_rate": 1.0503196624405775e-06, - "loss": 0.259, + "epoch": 0.69, + "grad_norm": 0.8507403581316986, + "learning_rate": 4.75720111450856e-06, + "loss": 0.2521, "step": 14920 }, { - "epoch": 0.86, - "grad_norm": 0.2174358692451919, - "learning_rate": 1.0494896079660554e-06, - "loss": 0.1818, + "epoch": 0.69, + "grad_norm": 0.2157055735795716, + "learning_rate": 4.755934140876054e-06, + "loss": 0.1981, "step": 14921 }, { - "epoch": 0.86, - "grad_norm": 0.6206679548783893, - "learning_rate": 1.0486598634473221e-06, - "loss": 0.3312, + "epoch": 0.69, + "grad_norm": 0.37414458891707814, + "learning_rate": 4.7546672833421085e-06, + "loss": 0.2599, "step": 14922 }, { - "epoch": 0.86, - "grad_norm": 0.40084767726771736, - "learning_rate": 1.0478304289131115e-06, - "loss": 0.3024, + "epoch": 0.69, + "grad_norm": 0.7728127803175406, + "learning_rate": 4.753400541934777e-06, + "loss": 0.2996, "step": 14923 }, { - "epoch": 0.86, - "grad_norm": 1.3130018162299788, - "learning_rate": 1.0470013043921523e-06, - "loss": 0.6565, + "epoch": 0.69, + "grad_norm": 0.6429291354655791, + "learning_rate": 4.752133916682098e-06, + "loss": 0.3732, "step": 14924 }, { - "epoch": 0.86, - "grad_norm": 0.317664336895266, - "learning_rate": 1.046172489913151e-06, - "loss": 0.237, + "epoch": 0.69, + "grad_norm": 0.35540801022069973, + "learning_rate": 4.750867407612116e-06, + "loss": 0.3039, "step": 14925 }, { - "epoch": 0.86, - "grad_norm": 0.3425571725675746, - "learning_rate": 1.0453439855048108e-06, - "loss": 0.2797, + "epoch": 0.69, + "grad_norm": 0.470031815820617, + "learning_rate": 4.749601014752872e-06, + "loss": 0.2199, "step": 14926 }, { - "epoch": 0.86, - "grad_norm": 0.30894367529548855, - "learning_rate": 1.0445157911958214e-06, - "loss": 0.1792, + "epoch": 0.69, + "grad_norm": 0.3902044476832956, + "learning_rate": 4.748334738132399e-06, + "loss": 0.2116, "step": 14927 }, { - "epoch": 0.86, - "grad_norm": 1.014875460132905, - "learning_rate": 1.0436879070148675e-06, - "loss": 0.5139, + "epoch": 0.69, + "grad_norm": 0.4395030620168188, + "learning_rate": 4.747068577778734e-06, + "loss": 0.3006, "step": 14928 }, { - "epoch": 0.86, - "grad_norm": 0.26322226528383447, - "learning_rate": 1.042860332990615e-06, - "loss": 0.2515, + "epoch": 0.69, + "grad_norm": 0.31866687351232026, + "learning_rate": 4.745802533719908e-06, + "loss": 0.2199, "step": 14929 }, { - "epoch": 0.86, - "grad_norm": 0.5905553635015627, - "learning_rate": 1.0420330691517256e-06, - "loss": 0.3113, + "epoch": 0.69, + "grad_norm": 0.6565316245308129, + "learning_rate": 4.744536605983952e-06, + "loss": 0.3488, "step": 14930 }, { - "epoch": 0.86, - "grad_norm": 0.5689957343239428, - "learning_rate": 1.0412061155268428e-06, - "loss": 0.2489, + "epoch": 0.69, + "grad_norm": 0.42421880774806314, + "learning_rate": 4.743270794598891e-06, + "loss": 0.3168, "step": 14931 }, { - "epoch": 0.86, - "grad_norm": 0.3543218393322141, - "learning_rate": 1.0403794721446092e-06, - "loss": 0.257, + "epoch": 0.69, + "grad_norm": 1.167132244905609, + "learning_rate": 4.742005099592745e-06, + "loss": 0.4966, "step": 14932 }, { - "epoch": 0.86, - "grad_norm": 0.386805524160091, - "learning_rate": 1.03955313903365e-06, - "loss": 0.3118, + "epoch": 0.69, + "grad_norm": 0.34823517142034527, + "learning_rate": 4.740739520993538e-06, + "loss": 0.2648, "step": 14933 }, { - "epoch": 0.86, - "grad_norm": 0.18668486006155857, - "learning_rate": 1.0387271162225787e-06, - "loss": 0.1076, + "epoch": 0.69, + "grad_norm": 0.34879116638724783, + "learning_rate": 4.739474058829288e-06, + "loss": 0.2876, "step": 14934 }, { - "epoch": 0.86, - "grad_norm": 0.39926953655521297, - "learning_rate": 1.0379014037400014e-06, - "loss": 0.2844, + "epoch": 0.69, + "grad_norm": 0.3298183326037904, + "learning_rate": 4.7382087131280176e-06, + "loss": 0.1926, "step": 14935 }, { - "epoch": 0.86, - "grad_norm": 0.8440325835444368, - "learning_rate": 1.0370760016145142e-06, - "loss": 0.4694, + "epoch": 0.69, + "grad_norm": 0.46072625884738644, + "learning_rate": 4.7369434839177295e-06, + "loss": 0.2487, "step": 14936 }, { - "epoch": 0.86, - "grad_norm": 0.3958906252639677, - "learning_rate": 1.036250909874701e-06, - "loss": 0.3087, + "epoch": 0.69, + "grad_norm": 0.30763665806783286, + "learning_rate": 4.7356783712264405e-06, + "loss": 0.285, "step": 14937 }, { - "epoch": 0.86, - "grad_norm": 0.2876575469257746, - "learning_rate": 1.0354261285491319e-06, - "loss": 0.188, + "epoch": 0.69, + "grad_norm": 0.5602712407012543, + "learning_rate": 4.734413375082163e-06, + "loss": 0.338, "step": 14938 }, { - "epoch": 0.86, - "grad_norm": 0.24698100951360044, - "learning_rate": 1.0346016576663686e-06, - "loss": 0.1631, + "epoch": 0.69, + "grad_norm": 0.5639676348188166, + "learning_rate": 4.7331484955128944e-06, + "loss": 0.2567, "step": 14939 }, { - "epoch": 0.86, - "grad_norm": 0.8047880203921186, - "learning_rate": 1.0337774972549675e-06, - "loss": 0.5828, + "epoch": 0.69, + "grad_norm": 0.36510447638858545, + "learning_rate": 4.731883732546646e-06, + "loss": 0.2795, "step": 14940 }, { - "epoch": 0.86, - "grad_norm": 0.2618812698548808, - "learning_rate": 1.0329536473434653e-06, - "loss": 0.2086, + "epoch": 0.69, + "grad_norm": 0.27023229684251304, + "learning_rate": 4.73061908621141e-06, + "loss": 0.1978, "step": 14941 }, { - "epoch": 0.86, - "grad_norm": 0.513815456683671, - "learning_rate": 1.032130107960393e-06, - "loss": 0.3363, + "epoch": 0.69, + "grad_norm": 0.42939707732208476, + "learning_rate": 4.729354556535188e-06, + "loss": 0.2673, "step": 14942 }, { - "epoch": 0.86, - "grad_norm": 0.6800722467991159, - "learning_rate": 1.0313068791342683e-06, - "loss": 0.3421, + "epoch": 0.69, + "grad_norm": 0.3729610804199242, + "learning_rate": 4.728090143545981e-06, + "loss": 0.2648, "step": 14943 }, { - "epoch": 0.86, - "grad_norm": 0.2965021973445377, - "learning_rate": 1.0304839608936002e-06, - "loss": 0.197, + "epoch": 0.69, + "grad_norm": 0.8126709685486809, + "learning_rate": 4.7268258472717735e-06, + "loss": 0.4806, "step": 14944 }, { - "epoch": 0.86, - "grad_norm": 0.28748538312306504, - "learning_rate": 1.0296613532668875e-06, - "loss": 0.2301, + "epoch": 0.69, + "grad_norm": 0.3366754608428277, + "learning_rate": 4.725561667740559e-06, + "loss": 0.2872, "step": 14945 }, { - "epoch": 0.86, - "grad_norm": 1.1561225785911142, - "learning_rate": 1.0288390562826178e-06, - "loss": 0.697, + "epoch": 0.69, + "grad_norm": 0.3582941860702632, + "learning_rate": 4.7242976049803255e-06, + "loss": 0.2198, "step": 14946 }, { - "epoch": 0.86, - "grad_norm": 0.22153187673332958, - "learning_rate": 1.0280170699692648e-06, - "loss": 0.1684, + "epoch": 0.69, + "grad_norm": 0.26985403521956675, + "learning_rate": 4.723033659019061e-06, + "loss": 0.1546, "step": 14947 }, { - "epoch": 0.86, - "grad_norm": 0.4287418102379653, - "learning_rate": 1.0271953943552938e-06, - "loss": 0.3314, + "epoch": 0.69, + "grad_norm": 0.8429704431916867, + "learning_rate": 4.721769829884747e-06, + "loss": 0.5846, "step": 14948 }, { - "epoch": 0.86, - "grad_norm": 0.5123710533271378, - "learning_rate": 1.0263740294691615e-06, - "loss": 0.3298, + "epoch": 0.69, + "grad_norm": 0.29705912970266835, + "learning_rate": 4.7205061176053575e-06, + "loss": 0.2198, "step": 14949 }, { - "epoch": 0.86, - "grad_norm": 0.31713937270697534, - "learning_rate": 1.0255529753393112e-06, - "loss": 0.2379, + "epoch": 0.69, + "grad_norm": 0.5354843818470001, + "learning_rate": 4.7192425222088745e-06, + "loss": 0.3008, "step": 14950 }, { - "epoch": 0.86, - "grad_norm": 1.6366738751403234, - "learning_rate": 1.0247322319941745e-06, - "loss": 0.2046, + "epoch": 0.69, + "grad_norm": 0.7131002694524821, + "learning_rate": 4.717979043723271e-06, + "loss": 0.3765, "step": 14951 }, { - "epoch": 0.86, - "grad_norm": 0.45705670046593916, - "learning_rate": 1.023911799462174e-06, - "loss": 0.3897, + "epoch": 0.69, + "grad_norm": 0.332799838036901, + "learning_rate": 4.716715682176526e-06, + "loss": 0.1918, "step": 14952 }, { - "epoch": 0.86, - "grad_norm": 0.27150260711708984, - "learning_rate": 1.0230916777717226e-06, - "loss": 0.2514, + "epoch": 0.69, + "grad_norm": 0.24951375927210165, + "learning_rate": 4.7154524375965985e-06, + "loss": 0.2029, "step": 14953 }, { - "epoch": 0.86, - "grad_norm": 0.5578410636590693, - "learning_rate": 1.0222718669512211e-06, - "loss": 0.2062, + "epoch": 0.69, + "grad_norm": 1.3899141597283207, + "learning_rate": 4.714189310011461e-06, + "loss": 0.7985, "step": 14954 }, { - "epoch": 0.86, - "grad_norm": 0.5180872333426131, - "learning_rate": 1.0214523670290587e-06, - "loss": 0.2397, + "epoch": 0.69, + "grad_norm": 0.3154425660497833, + "learning_rate": 4.7129262994490825e-06, + "loss": 0.2014, "step": 14955 }, { - "epoch": 0.86, - "grad_norm": 0.33763059935600714, - "learning_rate": 1.0206331780336154e-06, - "loss": 0.255, + "epoch": 0.69, + "grad_norm": 0.8401163018603621, + "learning_rate": 4.711663405937416e-06, + "loss": 0.3884, "step": 14956 }, { - "epoch": 0.86, - "grad_norm": 0.34511369249002094, - "learning_rate": 1.0198142999932559e-06, - "loss": 0.2375, + "epoch": 0.69, + "grad_norm": 0.36472791637023144, + "learning_rate": 4.710400629504427e-06, + "loss": 0.2884, "step": 14957 }, { - "epoch": 0.86, - "grad_norm": 0.8756372942785727, - "learning_rate": 1.0189957329363465e-06, - "loss": 0.5402, + "epoch": 0.69, + "grad_norm": 0.3608850594886287, + "learning_rate": 4.709137970178067e-06, + "loss": 0.2323, "step": 14958 }, { - "epoch": 0.86, - "grad_norm": 0.40461371137970736, - "learning_rate": 1.0181774768912255e-06, - "loss": 0.2893, + "epoch": 0.69, + "grad_norm": 0.3355383047222562, + "learning_rate": 4.707875427986294e-06, + "loss": 0.0886, "step": 14959 }, { - "epoch": 0.86, - "grad_norm": 0.38107539894620857, - "learning_rate": 1.0173595318862305e-06, - "loss": 0.2672, + "epoch": 0.69, + "grad_norm": 0.5443886521465415, + "learning_rate": 4.70661300295706e-06, + "loss": 0.404, "step": 14960 }, { - "epoch": 0.86, - "grad_norm": 0.22422982508923442, - "learning_rate": 1.01654189794969e-06, - "loss": 0.1835, + "epoch": 0.69, + "grad_norm": 0.3098650552302095, + "learning_rate": 4.7053506951183104e-06, + "loss": 0.2659, "step": 14961 }, { - "epoch": 0.86, - "grad_norm": 0.33600638655085446, - "learning_rate": 1.0157245751099188e-06, - "loss": 0.2601, + "epoch": 0.69, + "grad_norm": 0.7811553672545108, + "learning_rate": 4.704088504497996e-06, + "loss": 0.2241, "step": 14962 }, { - "epoch": 0.86, - "grad_norm": 1.4870843447136277, - "learning_rate": 1.0149075633952178e-06, - "loss": 0.5065, + "epoch": 0.69, + "grad_norm": 0.8146791331884439, + "learning_rate": 4.702826431124051e-06, + "loss": 0.4254, "step": 14963 }, { - "epoch": 0.86, - "grad_norm": 0.4281829927047752, - "learning_rate": 1.0140908628338796e-06, - "loss": 0.2577, + "epoch": 0.69, + "grad_norm": 0.4647002561811673, + "learning_rate": 4.70156447502443e-06, + "loss": 0.2772, "step": 14964 }, { - "epoch": 0.86, - "grad_norm": 0.2609028341284687, - "learning_rate": 1.013274473454191e-06, - "loss": 0.2483, + "epoch": 0.69, + "grad_norm": 0.2867654002298698, + "learning_rate": 4.700302636227062e-06, + "loss": 0.2334, "step": 14965 }, { - "epoch": 0.86, - "grad_norm": 0.6522575513963987, - "learning_rate": 1.0124583952844214e-06, - "loss": 0.3881, + "epoch": 0.69, + "grad_norm": 0.7998249713367785, + "learning_rate": 4.6990409147598896e-06, + "loss": 0.4733, "step": 14966 }, { - "epoch": 0.86, - "grad_norm": 0.2859295010336732, - "learning_rate": 1.0116426283528301e-06, - "loss": 0.0838, + "epoch": 0.69, + "grad_norm": 0.38721610540968415, + "learning_rate": 4.697779310650837e-06, + "loss": 0.2819, "step": 14967 }, { - "epoch": 0.86, - "grad_norm": 0.32065626081543513, - "learning_rate": 1.0108271726876684e-06, - "loss": 0.239, + "epoch": 0.69, + "grad_norm": 0.6496715338990422, + "learning_rate": 4.696517823927842e-06, + "loss": 0.1916, "step": 14968 }, { - "epoch": 0.86, - "grad_norm": 0.34553309967875206, - "learning_rate": 1.0100120283171733e-06, - "loss": 0.2821, + "epoch": 0.69, + "grad_norm": 0.3495899240535417, + "learning_rate": 4.695256454618834e-06, + "loss": 0.2806, "step": 14969 }, { - "epoch": 0.86, - "grad_norm": 0.7021634157856494, - "learning_rate": 1.0091971952695768e-06, - "loss": 0.3325, + "epoch": 0.69, + "grad_norm": 0.3630608488856457, + "learning_rate": 4.693995202751731e-06, + "loss": 0.2759, "step": 14970 }, { - "epoch": 0.86, - "grad_norm": 0.3073095774730255, - "learning_rate": 1.008382673573095e-06, - "loss": 0.2581, + "epoch": 0.69, + "grad_norm": 1.1535030215993571, + "learning_rate": 4.69273406835446e-06, + "loss": 0.515, "step": 14971 }, { - "epoch": 0.86, - "grad_norm": 0.8590640171628728, - "learning_rate": 1.0075684632559346e-06, - "loss": 0.4479, + "epoch": 0.69, + "grad_norm": 0.4603608934653564, + "learning_rate": 4.691473051454945e-06, + "loss": 0.2884, "step": 14972 }, { - "epoch": 0.86, - "grad_norm": 0.22537341360656152, - "learning_rate": 1.0067545643462895e-06, - "loss": 0.2082, + "epoch": 0.69, + "grad_norm": 0.2860082973052143, + "learning_rate": 4.690212152081099e-06, + "loss": 0.2415, "step": 14973 }, { - "epoch": 0.86, - "grad_norm": 0.3298064748892064, - "learning_rate": 1.0059409768723495e-06, - "loss": 0.1858, + "epoch": 0.69, + "grad_norm": 0.4700411634956343, + "learning_rate": 4.6889513702608395e-06, + "loss": 0.259, "step": 14974 }, { - "epoch": 0.86, - "grad_norm": 1.147862075159621, - "learning_rate": 1.0051277008622861e-06, - "loss": 0.5509, + "epoch": 0.69, + "grad_norm": 0.4173174697538321, + "learning_rate": 4.687690706022071e-06, + "loss": 0.0944, "step": 14975 }, { - "epoch": 0.86, - "grad_norm": 0.31479894074709697, - "learning_rate": 1.004314736344264e-06, - "loss": 0.2775, + "epoch": 0.69, + "grad_norm": 0.3783910850717987, + "learning_rate": 4.686430159392718e-06, + "loss": 0.2619, "step": 14976 }, { - "epoch": 0.86, - "grad_norm": 0.3955090912581276, - "learning_rate": 1.0035020833464338e-06, - "loss": 0.1986, + "epoch": 0.69, + "grad_norm": 0.40697252076071944, + "learning_rate": 4.685169730400679e-06, + "loss": 0.3412, "step": 14977 }, { - "epoch": 0.86, - "grad_norm": 0.5281904876913323, - "learning_rate": 1.0026897418969417e-06, - "loss": 0.3507, + "epoch": 0.69, + "grad_norm": 0.8308828651110298, + "learning_rate": 4.683909419073858e-06, + "loss": 0.3495, "step": 14978 }, { - "epoch": 0.86, - "grad_norm": 0.30591248057503395, - "learning_rate": 1.0018777120239165e-06, - "loss": 0.1526, + "epoch": 0.69, + "grad_norm": 0.36814447659125005, + "learning_rate": 4.68264922544016e-06, + "loss": 0.2708, "step": 14979 }, { - "epoch": 0.86, - "grad_norm": 0.37828916984177724, - "learning_rate": 1.0010659937554789e-06, - "loss": 0.1737, + "epoch": 0.69, + "grad_norm": 0.8995159860811099, + "learning_rate": 4.681389149527478e-06, + "loss": 0.5217, "step": 14980 }, { - "epoch": 0.86, - "grad_norm": 0.31146064451236316, - "learning_rate": 1.00025458711974e-06, - "loss": 0.2923, + "epoch": 0.69, + "grad_norm": 0.2510983669284052, + "learning_rate": 4.68012919136372e-06, + "loss": 0.174, "step": 14981 }, { - "epoch": 0.86, - "grad_norm": 0.7401631176839336, - "learning_rate": 9.99443492144795e-07, - "loss": 0.3723, + "epoch": 0.69, + "grad_norm": 0.4244888236716084, + "learning_rate": 4.6788693509767715e-06, + "loss": 0.2682, "step": 14982 }, { - "epoch": 0.86, - "grad_norm": 0.29215619453143027, - "learning_rate": 9.986327088587378e-07, - "loss": 0.2067, + "epoch": 0.69, + "grad_norm": 0.5875702953154892, + "learning_rate": 4.677609628394529e-06, + "loss": 0.3488, "step": 14983 }, { - "epoch": 0.86, - "grad_norm": 0.34927543617015566, - "learning_rate": 9.978222372896417e-07, - "loss": 0.3224, + "epoch": 0.69, + "grad_norm": 0.43257039636052735, + "learning_rate": 4.676350023644878e-06, + "loss": 0.2949, "step": 14984 }, { - "epoch": 0.86, - "grad_norm": 0.42741603016621244, - "learning_rate": 9.970120774655744e-07, - "loss": 0.2293, + "epoch": 0.69, + "grad_norm": 0.3984165724902577, + "learning_rate": 4.675090536755706e-06, + "loss": 0.1888, "step": 14985 }, { - "epoch": 0.86, - "grad_norm": 0.31526568971659064, - "learning_rate": 9.9620222941459e-07, - "loss": 0.2038, + "epoch": 0.69, + "grad_norm": 0.5199818749548794, + "learning_rate": 4.6738311677549e-06, + "loss": 0.3758, "step": 14986 }, { - "epoch": 0.86, - "grad_norm": 0.5250602102861858, - "learning_rate": 9.953926931647372e-07, - "loss": 0.2446, + "epoch": 0.69, + "grad_norm": 0.33890102354294693, + "learning_rate": 4.672571916670335e-06, + "loss": 0.1939, "step": 14987 }, { - "epoch": 0.86, - "grad_norm": 0.3332569485498697, - "learning_rate": 9.945834687440491e-07, - "loss": 0.297, + "epoch": 0.69, + "grad_norm": 0.2943609665776541, + "learning_rate": 4.6713127835298945e-06, + "loss": 0.1899, "step": 14988 }, { - "epoch": 0.86, - "grad_norm": 0.40668710547493864, - "learning_rate": 9.937745561805478e-07, - "loss": 0.2977, + "epoch": 0.69, + "grad_norm": 0.37263026537227945, + "learning_rate": 4.670053768361456e-06, + "loss": 0.3176, "step": 14989 }, { - "epoch": 0.86, - "grad_norm": 0.5698087862797053, - "learning_rate": 9.92965955502244e-07, - "loss": 0.2558, + "epoch": 0.69, + "grad_norm": 0.7312784937929746, + "learning_rate": 4.668794871192885e-06, + "loss": 0.4069, "step": 14990 }, { - "epoch": 0.86, - "grad_norm": 0.252459910213438, - "learning_rate": 9.921576667371458e-07, - "loss": 0.1633, + "epoch": 0.69, + "grad_norm": 0.38486801444559954, + "learning_rate": 4.667536092052063e-06, + "loss": 0.2215, "step": 14991 }, { - "epoch": 0.86, - "grad_norm": 0.3259181171702919, - "learning_rate": 9.91349689913238e-07, - "loss": 0.2698, + "epoch": 0.69, + "grad_norm": 0.509465112216622, + "learning_rate": 4.666277430966848e-06, + "loss": 0.3465, "step": 14992 }, { - "epoch": 0.86, - "grad_norm": 0.33737889420877937, - "learning_rate": 9.90542025058503e-07, - "loss": 0.2462, + "epoch": 0.69, + "grad_norm": 0.2584303745510036, + "learning_rate": 4.665018887965109e-06, + "loss": 0.1957, "step": 14993 }, { - "epoch": 0.86, - "grad_norm": 0.6389429058168169, - "learning_rate": 9.897346722009095e-07, - "loss": 0.3715, + "epoch": 0.69, + "grad_norm": 0.34439544387004356, + "learning_rate": 4.663760463074711e-06, + "loss": 0.1873, "step": 14994 }, { - "epoch": 0.86, - "grad_norm": 0.6251259359040213, - "learning_rate": 9.889276313684171e-07, - "loss": 0.3601, + "epoch": 0.69, + "grad_norm": 1.2085880005113516, + "learning_rate": 4.662502156323517e-06, + "loss": 0.5904, "step": 14995 }, { - "epoch": 0.86, - "grad_norm": 0.2480426106504089, - "learning_rate": 9.88120902588975e-07, - "loss": 0.2357, + "epoch": 0.69, + "grad_norm": 0.4845266201666869, + "learning_rate": 4.6612439677393804e-06, + "loss": 0.3083, "step": 14996 }, { - "epoch": 0.86, - "grad_norm": 0.3518028245212191, - "learning_rate": 9.87314485890517e-07, - "loss": 0.1729, + "epoch": 0.69, + "grad_norm": 0.32110136929403593, + "learning_rate": 4.65998589735015e-06, + "loss": 0.2666, "step": 14997 }, { - "epoch": 0.86, - "grad_norm": 0.6043858429194576, - "learning_rate": 9.8650838130097e-07, - "loss": 0.3186, + "epoch": 0.69, + "grad_norm": 0.5389516032796184, + "learning_rate": 4.658727945183692e-06, + "loss": 0.261, "step": 14998 }, { - "epoch": 0.86, - "grad_norm": 0.37360874956552786, - "learning_rate": 9.857025888482518e-07, - "loss": 0.2994, + "epoch": 0.69, + "grad_norm": 0.26504972786021364, + "learning_rate": 4.657470111267846e-06, + "loss": 0.1581, "step": 14999 }, { - "epoch": 0.86, - "grad_norm": 0.33171762764892554, - "learning_rate": 9.848971085602655e-07, - "loss": 0.2454, + "epoch": 0.69, + "grad_norm": 0.4140625448415751, + "learning_rate": 4.656212395630465e-06, + "loss": 0.2511, "step": 15000 }, { - "epoch": 0.86, - "grad_norm": 0.620980650429492, - "learning_rate": 9.84091940464904e-07, - "loss": 0.2839, + "epoch": 0.69, + "grad_norm": 0.3248407675881738, + "learning_rate": 4.654954798299388e-06, + "loss": 0.2461, "step": 15001 }, { - "epoch": 0.86, - "grad_norm": 0.4069755655302794, - "learning_rate": 9.832870845900488e-07, - "loss": 0.3088, + "epoch": 0.69, + "grad_norm": 0.6745325017232855, + "learning_rate": 4.653697319302461e-06, + "loss": 0.4109, "step": 15002 }, { - "epoch": 0.86, - "grad_norm": 0.17326883779583802, - "learning_rate": 9.824825409635763e-07, - "loss": 0.0697, + "epoch": 0.69, + "grad_norm": 0.6034207022820928, + "learning_rate": 4.652439958667526e-06, + "loss": 0.3047, "step": 15003 }, { - "epoch": 0.86, - "grad_norm": 0.3107041090929514, - "learning_rate": 9.816783096133463e-07, - "loss": 0.2584, + "epoch": 0.69, + "grad_norm": 0.3248430277759104, + "learning_rate": 4.651182716422412e-06, + "loss": 0.2291, "step": 15004 }, { - "epoch": 0.86, - "grad_norm": 0.354031848651155, - "learning_rate": 9.80874390567208e-07, - "loss": 0.2983, + "epoch": 0.69, + "grad_norm": 0.2953053313839936, + "learning_rate": 4.6499255925949575e-06, + "loss": 0.2145, "step": 15005 }, { - "epoch": 0.86, - "grad_norm": 0.8439772860687007, - "learning_rate": 9.800707838530021e-07, - "loss": 0.312, + "epoch": 0.69, + "grad_norm": 0.6595842507948835, + "learning_rate": 4.648668587212998e-06, + "loss": 0.3212, "step": 15006 }, { - "epoch": 0.86, - "grad_norm": 0.41508853480396724, - "learning_rate": 9.792674894985553e-07, - "loss": 0.2951, + "epoch": 0.69, + "grad_norm": 0.3872264679609567, + "learning_rate": 4.647411700304354e-06, + "loss": 0.3035, "step": 15007 }, { - "epoch": 0.86, - "grad_norm": 0.5387336155751442, - "learning_rate": 9.78464507531689e-07, - "loss": 0.3999, + "epoch": 0.69, + "grad_norm": 0.8399595443490007, + "learning_rate": 4.64615493189686e-06, + "loss": 0.2677, "step": 15008 }, { - "epoch": 0.86, - "grad_norm": 0.30233725667514955, - "learning_rate": 9.776618379802093e-07, - "loss": 0.1861, + "epoch": 0.69, + "grad_norm": 0.3391506503121247, + "learning_rate": 4.644898282018333e-06, + "loss": 0.2427, "step": 15009 }, { - "epoch": 0.86, - "grad_norm": 0.4230746134209038, - "learning_rate": 9.768594808719113e-07, - "loss": 0.2633, + "epoch": 0.69, + "grad_norm": 0.6346358599678377, + "learning_rate": 4.643641750696596e-06, + "loss": 0.4105, "step": 15010 }, { - "epoch": 0.86, - "grad_norm": 0.48842838044318104, - "learning_rate": 9.76057436234581e-07, - "loss": 0.3434, + "epoch": 0.69, + "grad_norm": 0.1565622222737067, + "learning_rate": 4.6423853379594675e-06, + "loss": 0.068, "step": 15011 }, { - "epoch": 0.86, - "grad_norm": 0.283475291759844, - "learning_rate": 9.752557040959943e-07, - "loss": 0.2384, + "epoch": 0.69, + "grad_norm": 0.3564921552422663, + "learning_rate": 4.641129043834768e-06, + "loss": 0.2714, "step": 15012 }, { - "epoch": 0.86, - "grad_norm": 0.32374263109755874, - "learning_rate": 9.744542844839145e-07, - "loss": 0.1523, + "epoch": 0.69, + "grad_norm": 0.38050554602880915, + "learning_rate": 4.639872868350307e-06, + "loss": 0.2893, "step": 15013 }, { - "epoch": 0.86, - "grad_norm": 0.3835245373644557, - "learning_rate": 9.736531774260948e-07, - "loss": 0.3108, + "epoch": 0.69, + "grad_norm": 0.7129760998602115, + "learning_rate": 4.638616811533886e-06, + "loss": 0.3344, "step": 15014 }, { - "epoch": 0.86, - "grad_norm": 0.8657684258513281, - "learning_rate": 9.728523829502768e-07, - "loss": 0.365, + "epoch": 0.69, + "grad_norm": 0.40206576290701596, + "learning_rate": 4.637360873413331e-06, + "loss": 0.2923, "step": 15015 }, { - "epoch": 0.86, - "grad_norm": 0.2924587753221655, - "learning_rate": 9.720519010841933e-07, - "loss": 0.2123, + "epoch": 0.69, + "grad_norm": 0.5891769972521699, + "learning_rate": 4.636105054016431e-06, + "loss": 0.3817, "step": 15016 }, { - "epoch": 0.86, - "grad_norm": 0.3928088182505235, - "learning_rate": 9.712517318555637e-07, - "loss": 0.3112, + "epoch": 0.69, + "grad_norm": 0.24451825613786068, + "learning_rate": 4.634849353371e-06, + "loss": 0.1801, "step": 15017 }, { - "epoch": 0.86, - "grad_norm": 0.29042677444468945, - "learning_rate": 9.704518752921e-07, - "loss": 0.1606, + "epoch": 0.69, + "grad_norm": 0.5998182158721055, + "learning_rate": 4.63359377150483e-06, + "loss": 0.3017, "step": 15018 }, { - "epoch": 0.86, - "grad_norm": 0.3775481237884491, - "learning_rate": 9.696523314214978e-07, - "loss": 0.1905, + "epoch": 0.69, + "grad_norm": 0.41838165711364644, + "learning_rate": 4.632338308445723e-06, + "loss": 0.3069, "step": 15019 }, { - "epoch": 0.86, - "grad_norm": 0.2703635013510745, - "learning_rate": 9.688531002714464e-07, - "loss": 0.2672, + "epoch": 0.69, + "grad_norm": 0.3846078401207333, + "learning_rate": 4.631082964221475e-06, + "loss": 0.3131, "step": 15020 }, { - "epoch": 0.86, - "grad_norm": 1.2847216612253773, - "learning_rate": 9.680541818696254e-07, - "loss": 0.5317, + "epoch": 0.69, + "grad_norm": 0.4252564919971829, + "learning_rate": 4.629827738859871e-06, + "loss": 0.1581, "step": 15021 }, { - "epoch": 0.86, - "grad_norm": 0.5357320718859065, - "learning_rate": 9.672555762436997e-07, - "loss": 0.2516, + "epoch": 0.69, + "grad_norm": 0.4279115400867005, + "learning_rate": 4.62857263238871e-06, + "loss": 0.3104, "step": 15022 }, { - "epoch": 0.86, - "grad_norm": 0.29402400287893316, - "learning_rate": 9.66457283421325e-07, - "loss": 0.2488, + "epoch": 0.69, + "grad_norm": 0.45828710742152173, + "learning_rate": 4.627317644835766e-06, + "loss": 0.2549, "step": 15023 }, { - "epoch": 0.86, - "grad_norm": 0.2730938089664483, - "learning_rate": 9.656593034301432e-07, - "loss": 0.2212, + "epoch": 0.69, + "grad_norm": 0.38347991503033996, + "learning_rate": 4.626062776228839e-06, + "loss": 0.1972, "step": 15024 }, { - "epoch": 0.86, - "grad_norm": 0.5329117555031294, - "learning_rate": 9.648616362977959e-07, - "loss": 0.3298, + "epoch": 0.69, + "grad_norm": 0.34558795598347114, + "learning_rate": 4.624808026595702e-06, + "loss": 0.2748, "step": 15025 }, { - "epoch": 0.86, - "grad_norm": 0.3581964345211006, - "learning_rate": 9.640642820518997e-07, - "loss": 0.2328, + "epoch": 0.69, + "grad_norm": 0.49215343863086497, + "learning_rate": 4.623553395964131e-06, + "loss": 0.2695, "step": 15026 }, { - "epoch": 0.86, - "grad_norm": 0.6362827691871921, - "learning_rate": 9.63267240720067e-07, - "loss": 0.3489, + "epoch": 0.69, + "grad_norm": 0.5584687447420302, + "learning_rate": 4.622298884361905e-06, + "loss": 0.2213, "step": 15027 }, { - "epoch": 0.86, - "grad_norm": 0.29267465950559424, - "learning_rate": 9.62470512329904e-07, - "loss": 0.2516, + "epoch": 0.69, + "grad_norm": 0.3039480794746229, + "learning_rate": 4.621044491816801e-06, + "loss": 0.2718, "step": 15028 }, { - "epoch": 0.86, - "grad_norm": 0.36347141522829296, - "learning_rate": 9.616740969089967e-07, - "loss": 0.2125, + "epoch": 0.69, + "grad_norm": 1.1641608643765324, + "learning_rate": 4.619790218356589e-06, + "loss": 0.5415, "step": 15029 }, { - "epoch": 0.86, - "grad_norm": 0.2846777407910682, - "learning_rate": 9.608779944849278e-07, - "loss": 0.1992, + "epoch": 0.69, + "grad_norm": 0.4302613455630632, + "learning_rate": 4.618536064009034e-06, + "loss": 0.2245, "step": 15030 }, { - "epoch": 0.86, - "grad_norm": 1.2644418677329352, - "learning_rate": 9.600822050852654e-07, - "loss": 0.7545, + "epoch": 0.69, + "grad_norm": 0.5022495209187914, + "learning_rate": 4.6172820288019025e-06, + "loss": 0.3425, "step": 15031 }, { - "epoch": 0.86, - "grad_norm": 0.21804919025258576, - "learning_rate": 9.592867287375652e-07, - "loss": 0.2162, + "epoch": 0.69, + "grad_norm": 0.23196792778911693, + "learning_rate": 4.616028112762964e-06, + "loss": 0.1946, "step": 15032 }, { - "epoch": 0.86, - "grad_norm": 0.593650145047429, - "learning_rate": 9.584915654693782e-07, - "loss": 0.3514, + "epoch": 0.69, + "grad_norm": 0.6072750056322345, + "learning_rate": 4.614774315919969e-06, + "loss": 0.3832, "step": 15033 }, { - "epoch": 0.86, - "grad_norm": 0.9895576760248582, - "learning_rate": 9.576967153082406e-07, - "loss": 0.4155, + "epoch": 0.69, + "grad_norm": 0.4122991477652811, + "learning_rate": 4.6135206383006845e-06, + "loss": 0.2311, "step": 15034 }, { - "epoch": 0.86, - "grad_norm": 0.23430158277619348, - "learning_rate": 9.569021782816767e-07, - "loss": 0.1612, + "epoch": 0.69, + "grad_norm": 0.6298830197590604, + "learning_rate": 4.612267079932858e-06, + "loss": 0.3817, "step": 15035 }, { - "epoch": 0.86, - "grad_norm": 0.27556174932560695, - "learning_rate": 9.561079544171992e-07, - "loss": 0.2646, + "epoch": 0.69, + "grad_norm": 0.3437834898698803, + "learning_rate": 4.611013640844245e-06, + "loss": 0.2805, "step": 15036 }, { - "epoch": 0.86, - "grad_norm": 0.808225331928117, - "learning_rate": 9.553140437423157e-07, - "loss": 0.5553, + "epoch": 0.69, + "grad_norm": 0.3642050901893921, + "learning_rate": 4.609760321062601e-06, + "loss": 0.198, "step": 15037 }, { - "epoch": 0.86, - "grad_norm": 0.37205200444797243, - "learning_rate": 9.545204462845192e-07, - "loss": 0.2542, + "epoch": 0.69, + "grad_norm": 0.28778494438632246, + "learning_rate": 4.608507120615664e-06, + "loss": 0.1966, "step": 15038 }, { - "epoch": 0.86, - "grad_norm": 0.6746390579793102, - "learning_rate": 9.537271620712896e-07, - "loss": 0.2945, + "epoch": 0.69, + "grad_norm": 1.5428744841090298, + "learning_rate": 4.607254039531186e-06, + "loss": 0.724, "step": 15039 }, { - "epoch": 0.86, - "grad_norm": 0.3469141740881783, - "learning_rate": 9.529341911300982e-07, - "loss": 0.2678, + "epoch": 0.69, + "grad_norm": 0.24173747007712018, + "learning_rate": 4.606001077836899e-06, + "loss": 0.2062, "step": 15040 }, { - "epoch": 0.86, - "grad_norm": 0.3863948928230087, - "learning_rate": 9.521415334884088e-07, - "loss": 0.2668, + "epoch": 0.69, + "grad_norm": 0.6693224925171881, + "learning_rate": 4.604748235560557e-06, + "loss": 0.3767, "step": 15041 }, { - "epoch": 0.86, - "grad_norm": 0.34364358012632695, - "learning_rate": 9.513491891736681e-07, - "loss": 0.126, + "epoch": 0.69, + "grad_norm": 0.7878288549864655, + "learning_rate": 4.603495512729889e-06, + "loss": 0.4192, "step": 15042 }, { - "epoch": 0.86, - "grad_norm": 0.26545776152588485, - "learning_rate": 9.505571582133166e-07, - "loss": 0.2173, + "epoch": 0.69, + "grad_norm": 0.26175333907545395, + "learning_rate": 4.602242909372625e-06, + "loss": 0.1657, "step": 15043 }, { - "epoch": 0.86, - "grad_norm": 0.34794469679286133, - "learning_rate": 9.497654406347812e-07, - "loss": 0.2684, + "epoch": 0.69, + "grad_norm": 0.3676481122087531, + "learning_rate": 4.6009904255165e-06, + "loss": 0.3234, "step": 15044 }, { - "epoch": 0.86, - "grad_norm": 0.4445266623529254, - "learning_rate": 9.489740364654776e-07, - "loss": 0.2681, + "epoch": 0.69, + "grad_norm": 0.33019661994118316, + "learning_rate": 4.599738061189244e-06, + "loss": 0.1924, "step": 15045 }, { - "epoch": 0.86, - "grad_norm": 0.6025872062236165, - "learning_rate": 9.481829457328162e-07, - "loss": 0.3286, + "epoch": 0.69, + "grad_norm": 0.42358690607095756, + "learning_rate": 4.598485816418586e-06, + "loss": 0.2992, "step": 15046 }, { - "epoch": 0.86, - "grad_norm": 0.37119355184782665, - "learning_rate": 9.473921684641896e-07, - "loss": 0.2974, + "epoch": 0.69, + "grad_norm": 0.869084084008399, + "learning_rate": 4.597233691232244e-06, + "loss": 0.3035, "step": 15047 }, { - "epoch": 0.86, - "grad_norm": 0.3547465998055965, - "learning_rate": 9.466017046869835e-07, - "loss": 0.3058, + "epoch": 0.69, + "grad_norm": 0.3934394188217789, + "learning_rate": 4.595981685657939e-06, + "loss": 0.3038, "step": 15048 }, { - "epoch": 0.86, - "grad_norm": 0.8409961481560209, - "learning_rate": 9.458115544285684e-07, - "loss": 0.3469, + "epoch": 0.69, + "grad_norm": 0.4069058457299068, + "learning_rate": 4.594729799723395e-06, + "loss": 0.2936, "step": 15049 }, { - "epoch": 0.86, - "grad_norm": 0.32572567903383914, - "learning_rate": 9.450217177163123e-07, - "loss": 0.2468, + "epoch": 0.69, + "grad_norm": 0.36806272989234623, + "learning_rate": 4.59347803345632e-06, + "loss": 0.159, "step": 15050 }, { - "epoch": 0.86, - "grad_norm": 0.2476762696013451, - "learning_rate": 9.442321945775646e-07, - "loss": 0.206, + "epoch": 0.69, + "grad_norm": 0.34629022487535427, + "learning_rate": 4.592226386884434e-06, + "loss": 0.1902, "step": 15051 }, { - "epoch": 0.86, - "grad_norm": 0.638595287493113, - "learning_rate": 9.434429850396665e-07, - "loss": 0.2482, + "epoch": 0.69, + "grad_norm": 0.4361584723758657, + "learning_rate": 4.5909748600354395e-06, + "loss": 0.3088, "step": 15052 }, { - "epoch": 0.86, - "grad_norm": 0.33552988930315564, - "learning_rate": 9.426540891299463e-07, - "loss": 0.254, + "epoch": 0.69, + "grad_norm": 0.3950850631882616, + "learning_rate": 4.589723452937049e-06, + "loss": 0.2677, "step": 15053 }, { - "epoch": 0.86, - "grad_norm": 1.286349510665451, - "learning_rate": 9.418655068757276e-07, - "loss": 0.6941, + "epoch": 0.69, + "grad_norm": 0.9255034087534584, + "learning_rate": 4.58847216561697e-06, + "loss": 0.4551, "step": 15054 }, { - "epoch": 0.86, - "grad_norm": 0.39628036912695536, - "learning_rate": 9.410772383043176e-07, - "loss": 0.2364, + "epoch": 0.69, + "grad_norm": 0.3810109154178935, + "learning_rate": 4.587220998102899e-06, + "loss": 0.2618, "step": 15055 }, { - "epoch": 0.87, - "grad_norm": 0.2941849588804919, - "learning_rate": 9.402892834430122e-07, - "loss": 0.2882, + "epoch": 0.69, + "grad_norm": 0.3418873385509642, + "learning_rate": 4.585969950422542e-06, + "loss": 0.2593, "step": 15056 }, { - "epoch": 0.87, - "grad_norm": 0.42903190992151646, - "learning_rate": 9.395016423190984e-07, - "loss": 0.2431, + "epoch": 0.69, + "grad_norm": 0.24905243433512392, + "learning_rate": 4.584719022603583e-06, + "loss": 0.153, "step": 15057 }, { - "epoch": 0.87, - "grad_norm": 0.4266723252802292, - "learning_rate": 9.387143149598543e-07, - "loss": 0.0998, + "epoch": 0.69, + "grad_norm": 0.35401164199015245, + "learning_rate": 4.583468214673734e-06, + "loss": 0.2652, "step": 15058 }, { - "epoch": 0.87, - "grad_norm": 0.3951244070626443, - "learning_rate": 9.379273013925449e-07, - "loss": 0.2723, + "epoch": 0.69, + "grad_norm": 0.7774822584185109, + "learning_rate": 4.582217526660675e-06, + "loss": 0.3546, "step": 15059 }, { - "epoch": 0.87, - "grad_norm": 0.3569081459857391, - "learning_rate": 9.371406016444229e-07, - "loss": 0.3066, + "epoch": 0.69, + "grad_norm": 0.35526367550610033, + "learning_rate": 4.580966958592101e-06, + "loss": 0.2323, "step": 15060 }, { - "epoch": 0.87, - "grad_norm": 0.7314097994167829, - "learning_rate": 9.363542157427297e-07, - "loss": 0.3921, + "epoch": 0.69, + "grad_norm": 0.3297168215369546, + "learning_rate": 4.579716510495692e-06, + "loss": 0.2405, "step": 15061 }, { - "epoch": 0.87, - "grad_norm": 0.2963625225107329, - "learning_rate": 9.355681437147024e-07, - "loss": 0.1872, + "epoch": 0.69, + "grad_norm": 1.3965273513544676, + "learning_rate": 4.578466182399136e-06, + "loss": 0.7704, "step": 15062 }, { - "epoch": 0.87, - "grad_norm": 0.3283347937303761, - "learning_rate": 9.347823855875604e-07, - "loss": 0.2963, + "epoch": 0.69, + "grad_norm": 0.6136862048623709, + "learning_rate": 4.577215974330117e-06, + "loss": 0.2814, "step": 15063 }, { - "epoch": 0.87, - "grad_norm": 0.297389999754831, - "learning_rate": 9.339969413885142e-07, - "loss": 0.2066, + "epoch": 0.69, + "grad_norm": 0.27213986912525256, + "learning_rate": 4.575965886316305e-06, + "loss": 0.2527, "step": 15064 }, { - "epoch": 0.87, - "grad_norm": 0.35212332109937267, - "learning_rate": 9.33211811144763e-07, - "loss": 0.1968, + "epoch": 0.69, + "grad_norm": 0.4717733553515794, + "learning_rate": 4.574715918385382e-06, + "loss": 0.2758, "step": 15065 }, { - "epoch": 0.87, - "grad_norm": 0.8605096147432884, - "learning_rate": 9.324269948834985e-07, - "loss": 0.4698, + "epoch": 0.69, + "grad_norm": 0.36621624599969715, + "learning_rate": 4.573466070565022e-06, + "loss": 0.118, "step": 15066 }, { - "epoch": 0.87, - "grad_norm": 0.30120778723725855, - "learning_rate": 9.316424926318967e-07, - "loss": 0.2745, + "epoch": 0.69, + "grad_norm": 0.39435549379720786, + "learning_rate": 4.572216342882891e-06, + "loss": 0.29, "step": 15067 }, { - "epoch": 0.87, - "grad_norm": 0.3371975813552838, - "learning_rate": 9.30858304417126e-07, - "loss": 0.2051, + "epoch": 0.69, + "grad_norm": 0.3753782789915386, + "learning_rate": 4.570966735366661e-06, + "loss": 0.3287, "step": 15068 }, { - "epoch": 0.87, - "grad_norm": 0.2826029807524462, - "learning_rate": 9.300744302663401e-07, - "loss": 0.2156, + "epoch": 0.69, + "grad_norm": 0.8835055894430496, + "learning_rate": 4.569717248043991e-06, + "loss": 0.2909, "step": 15069 }, { - "epoch": 0.87, - "grad_norm": 1.4581018247895585, - "learning_rate": 9.292908702066883e-07, - "loss": 0.7467, + "epoch": 0.69, + "grad_norm": 0.3818858122866555, + "learning_rate": 4.568467880942548e-06, + "loss": 0.2582, "step": 15070 }, { - "epoch": 0.87, - "grad_norm": 0.3579852541537003, - "learning_rate": 9.285076242653035e-07, - "loss": 0.1911, + "epoch": 0.69, + "grad_norm": 0.2855388232861323, + "learning_rate": 4.567218634089995e-06, + "loss": 0.1952, "step": 15071 }, { - "epoch": 0.87, - "grad_norm": 0.34615695380006883, - "learning_rate": 9.277246924693106e-07, - "loss": 0.2995, + "epoch": 0.69, + "grad_norm": 0.347337322275765, + "learning_rate": 4.565969507513981e-06, + "loss": 0.272, "step": 15072 }, { - "epoch": 0.87, - "grad_norm": 0.6649173448975993, - "learning_rate": 9.269420748458202e-07, - "loss": 0.3692, + "epoch": 0.69, + "grad_norm": 0.38926275079838873, + "learning_rate": 4.5647205012421695e-06, + "loss": 0.2097, "step": 15073 }, { - "epoch": 0.87, - "grad_norm": 0.3270716735112408, - "learning_rate": 9.261597714219351e-07, - "loss": 0.2589, + "epoch": 0.69, + "grad_norm": 1.0642639894620527, + "learning_rate": 4.563471615302198e-06, + "loss": 0.6302, "step": 15074 }, { - "epoch": 0.87, - "grad_norm": 0.5415825274888945, - "learning_rate": 9.253777822247479e-07, - "loss": 0.2645, + "epoch": 0.69, + "grad_norm": 0.808615050259147, + "learning_rate": 4.562222849721735e-06, + "loss": 0.3708, "step": 15075 }, { - "epoch": 0.87, - "grad_norm": 0.28196842106823133, - "learning_rate": 9.24596107281338e-07, - "loss": 0.1962, + "epoch": 0.69, + "grad_norm": 0.24709329285994466, + "learning_rate": 4.560974204528412e-06, + "loss": 0.2252, "step": 15076 }, { - "epoch": 0.87, - "grad_norm": 0.306635893181428, - "learning_rate": 9.238147466187742e-07, - "loss": 0.2415, + "epoch": 0.69, + "grad_norm": 0.32699945775896205, + "learning_rate": 4.559725679749883e-06, + "loss": 0.2016, "step": 15077 }, { - "epoch": 0.87, - "grad_norm": 0.7446253266487983, - "learning_rate": 9.230337002641144e-07, - "loss": 0.3045, + "epoch": 0.69, + "grad_norm": 1.7301375251451545, + "learning_rate": 4.5584772754137785e-06, + "loss": 0.6738, "step": 15078 }, { - "epoch": 0.87, - "grad_norm": 0.3272308542677074, - "learning_rate": 9.222529682444081e-07, - "loss": 0.2904, + "epoch": 0.69, + "grad_norm": 0.34774945525877665, + "learning_rate": 4.557228991547743e-06, + "loss": 0.2125, "step": 15079 }, { - "epoch": 0.87, - "grad_norm": 0.3088128697895234, - "learning_rate": 9.214725505866929e-07, - "loss": 0.2339, + "epoch": 0.69, + "grad_norm": 0.3743729986406214, + "learning_rate": 4.555980828179416e-06, + "loss": 0.2971, "step": 15080 }, { - "epoch": 0.87, - "grad_norm": 0.9928672480486425, - "learning_rate": 9.206924473179913e-07, - "loss": 0.3694, + "epoch": 0.69, + "grad_norm": 0.9762390097429827, + "learning_rate": 4.5547327853364224e-06, + "loss": 0.3857, "step": 15081 }, { - "epoch": 0.87, - "grad_norm": 0.3445131715535893, - "learning_rate": 9.199126584653184e-07, - "loss": 0.1636, + "epoch": 0.69, + "grad_norm": 0.33564905696697234, + "learning_rate": 4.553484863046401e-06, + "loss": 0.1981, "step": 15082 }, { - "epoch": 0.87, - "grad_norm": 0.3972284436044029, - "learning_rate": 9.191331840556816e-07, - "loss": 0.2828, + "epoch": 0.69, + "grad_norm": 0.33717141997507216, + "learning_rate": 4.552237061336972e-06, + "loss": 0.1748, "step": 15083 }, { - "epoch": 0.87, - "grad_norm": 0.27892382887335904, - "learning_rate": 9.183540241160715e-07, - "loss": 0.2442, + "epoch": 0.69, + "grad_norm": 0.40328610858347463, + "learning_rate": 4.550989380235762e-06, + "loss": 0.2909, "step": 15084 }, { - "epoch": 0.87, - "grad_norm": 1.0493720657729202, - "learning_rate": 9.175751786734722e-07, - "loss": 0.5299, + "epoch": 0.69, + "grad_norm": 0.36586554698754253, + "learning_rate": 4.5497418197704e-06, + "loss": 0.2654, "step": 15085 }, { - "epoch": 0.87, - "grad_norm": 0.313880941212933, - "learning_rate": 9.167966477548529e-07, - "loss": 0.2344, + "epoch": 0.69, + "grad_norm": 0.9392539468708888, + "learning_rate": 4.548494379968498e-06, + "loss": 0.3351, "step": 15086 }, { - "epoch": 0.87, - "grad_norm": 0.5243978675286038, - "learning_rate": 9.160184313871745e-07, - "loss": 0.3414, + "epoch": 0.69, + "grad_norm": 0.38618541158994074, + "learning_rate": 4.5472470608576745e-06, + "loss": 0.2983, "step": 15087 }, { - "epoch": 0.87, - "grad_norm": 0.23520310415924797, - "learning_rate": 9.152405295973877e-07, - "loss": 0.1365, + "epoch": 0.69, + "grad_norm": 0.41676256011097607, + "learning_rate": 4.545999862465548e-06, + "loss": 0.2498, "step": 15088 }, { - "epoch": 0.87, - "grad_norm": 0.315451777087604, - "learning_rate": 9.144629424124318e-07, - "loss": 0.2393, + "epoch": 0.69, + "grad_norm": 0.2965360007063014, + "learning_rate": 4.54475278481973e-06, + "loss": 0.1602, "step": 15089 }, { - "epoch": 0.87, - "grad_norm": 0.6343131932703363, - "learning_rate": 9.136856698592323e-07, - "loss": 0.3756, + "epoch": 0.69, + "grad_norm": 0.5585336256320842, + "learning_rate": 4.543505827947827e-06, + "loss": 0.2681, "step": 15090 }, { - "epoch": 0.87, - "grad_norm": 0.33787984330910664, - "learning_rate": 9.129087119647062e-07, - "loss": 0.2509, + "epoch": 0.69, + "grad_norm": 0.4230718777618868, + "learning_rate": 4.5422589918774394e-06, + "loss": 0.283, "step": 15091 }, { - "epoch": 0.87, - "grad_norm": 0.31430488166281095, - "learning_rate": 9.121320687557622e-07, - "loss": 0.247, + "epoch": 0.69, + "grad_norm": 0.34965667524658567, + "learning_rate": 4.5410122766361856e-06, + "loss": 0.244, "step": 15092 }, { - "epoch": 0.87, - "grad_norm": 1.2083642377805037, - "learning_rate": 9.113557402592965e-07, - "loss": 0.5698, + "epoch": 0.69, + "grad_norm": 0.8598517455741441, + "learning_rate": 4.539765682251654e-06, + "loss": 0.4211, "step": 15093 }, { - "epoch": 0.87, - "grad_norm": 0.272352156350627, - "learning_rate": 9.105797265021865e-07, - "loss": 0.123, + "epoch": 0.69, + "grad_norm": 0.3321640258970063, + "learning_rate": 4.538519208751452e-06, + "loss": 0.2394, "step": 15094 }, { - "epoch": 0.87, - "grad_norm": 0.26727226411819544, - "learning_rate": 9.098040275113118e-07, - "loss": 0.2503, + "epoch": 0.69, + "grad_norm": 0.3544791254783406, + "learning_rate": 4.537272856163166e-06, + "loss": 0.2296, "step": 15095 }, { - "epoch": 0.87, - "grad_norm": 0.4165842297184612, - "learning_rate": 9.09028643313532e-07, - "loss": 0.2908, + "epoch": 0.69, + "grad_norm": 0.3370683532149129, + "learning_rate": 4.536026624514395e-06, + "loss": 0.1919, "step": 15096 }, { - "epoch": 0.87, - "grad_norm": 0.8381958437662107, - "learning_rate": 9.082535739357001e-07, - "loss": 0.3653, + "epoch": 0.69, + "grad_norm": 0.3519816524708853, + "learning_rate": 4.534780513832732e-06, + "loss": 0.256, "step": 15097 }, { - "epoch": 0.87, - "grad_norm": 0.3056167343849403, - "learning_rate": 9.074788194046557e-07, - "loss": 0.2505, + "epoch": 0.69, + "grad_norm": 0.7893360944159631, + "learning_rate": 4.533534524145756e-06, + "loss": 0.3928, "step": 15098 }, { - "epoch": 0.87, - "grad_norm": 0.5242561797322017, - "learning_rate": 9.067043797472264e-07, - "loss": 0.321, + "epoch": 0.69, + "grad_norm": 0.35207603650473746, + "learning_rate": 4.532288655481062e-06, + "loss": 0.2638, "step": 15099 }, { - "epoch": 0.87, - "grad_norm": 0.2684495155203418, - "learning_rate": 9.05930254990236e-07, - "loss": 0.2153, + "epoch": 0.69, + "grad_norm": 0.3302165087591495, + "learning_rate": 4.531042907866222e-06, + "loss": 0.2405, "step": 15100 }, { - "epoch": 0.87, - "grad_norm": 0.4350688261100347, - "learning_rate": 9.0515644516049e-07, - "loss": 0.188, + "epoch": 0.69, + "grad_norm": 1.397893228547607, + "learning_rate": 4.5297972813288224e-06, + "loss": 0.5013, "step": 15101 }, { - "epoch": 0.87, - "grad_norm": 0.5120139042621656, - "learning_rate": 9.043829502847845e-07, - "loss": 0.343, + "epoch": 0.69, + "grad_norm": 0.29250874127762, + "learning_rate": 4.528551775896442e-06, + "loss": 0.1265, "step": 15102 }, { - "epoch": 0.87, - "grad_norm": 0.35141044392890897, - "learning_rate": 9.036097703899049e-07, - "loss": 0.329, + "epoch": 0.69, + "grad_norm": 0.42699088595363477, + "learning_rate": 4.527306391596649e-06, + "loss": 0.2846, "step": 15103 }, { - "epoch": 0.87, - "grad_norm": 0.3241126312677885, - "learning_rate": 9.028369055026287e-07, - "loss": 0.1678, + "epoch": 0.69, + "grad_norm": 0.3408772024733025, + "learning_rate": 4.526061128457017e-06, + "loss": 0.2753, "step": 15104 }, { - "epoch": 0.87, - "grad_norm": 0.6065691927483036, - "learning_rate": 9.020643556497211e-07, - "loss": 0.3254, + "epoch": 0.69, + "grad_norm": 0.8400971633950676, + "learning_rate": 4.524815986505116e-06, + "loss": 0.3779, "step": 15105 }, { - "epoch": 0.87, - "grad_norm": 0.7583614764645674, - "learning_rate": 9.012921208579317e-07, - "loss": 0.3871, + "epoch": 0.69, + "grad_norm": 0.4446202030645186, + "learning_rate": 4.5235709657685145e-06, + "loss": 0.2713, "step": 15106 }, { - "epoch": 0.87, - "grad_norm": 0.25091863341591597, - "learning_rate": 9.005202011540037e-07, - "loss": 0.2057, + "epoch": 0.69, + "grad_norm": 0.6187808255775611, + "learning_rate": 4.522326066274775e-06, + "loss": 0.3508, "step": 15107 }, { - "epoch": 0.87, - "grad_norm": 0.290779990812648, - "learning_rate": 8.997485965646724e-07, - "loss": 0.2231, + "epoch": 0.69, + "grad_norm": 0.20849948140057645, + "learning_rate": 4.5210812880514485e-06, + "loss": 0.1751, "step": 15108 }, { - "epoch": 0.87, - "grad_norm": 1.2478961440750131, - "learning_rate": 8.989773071166552e-07, - "loss": 0.8038, + "epoch": 0.69, + "grad_norm": 0.39445035142795265, + "learning_rate": 4.5198366311261096e-06, + "loss": 0.2509, "step": 15109 }, { - "epoch": 0.87, - "grad_norm": 0.2953342710533784, - "learning_rate": 8.982063328366631e-07, - "loss": 0.1998, + "epoch": 0.69, + "grad_norm": 0.4992766067966705, + "learning_rate": 4.518592095526303e-06, + "loss": 0.3234, "step": 15110 }, { - "epoch": 0.87, - "grad_norm": 0.5276949035579362, - "learning_rate": 8.974356737513934e-07, - "loss": 0.3769, + "epoch": 0.69, + "grad_norm": 0.3534522977642188, + "learning_rate": 4.5173476812795865e-06, + "loss": 0.3152, "step": 15111 }, { - "epoch": 0.87, - "grad_norm": 0.4520637612174389, - "learning_rate": 8.966653298875339e-07, - "loss": 0.3065, + "epoch": 0.69, + "grad_norm": 0.3364226341316586, + "learning_rate": 4.516103388413506e-06, + "loss": 0.1766, "step": 15112 }, { - "epoch": 0.87, - "grad_norm": 0.3955320368936747, - "learning_rate": 8.958953012717641e-07, - "loss": 0.308, + "epoch": 0.69, + "grad_norm": 0.5815771648254908, + "learning_rate": 4.514859216955611e-06, + "loss": 0.34, "step": 15113 }, { - "epoch": 0.87, - "grad_norm": 0.25203374514690974, - "learning_rate": 8.951255879307486e-07, - "loss": 0.1288, + "epoch": 0.69, + "grad_norm": 0.29780907026965675, + "learning_rate": 4.5136151669334486e-06, + "loss": 0.1976, "step": 15114 }, { - "epoch": 0.87, - "grad_norm": 0.3478143118831568, - "learning_rate": 8.943561898911424e-07, - "loss": 0.3261, + "epoch": 0.69, + "grad_norm": 0.3409025764998092, + "learning_rate": 4.512371238374556e-06, + "loss": 0.218, "step": 15115 }, { - "epoch": 0.87, - "grad_norm": 0.7966981365506505, - "learning_rate": 8.935871071795876e-07, - "loss": 0.3132, + "epoch": 0.69, + "grad_norm": 0.36585498521769144, + "learning_rate": 4.511127431306478e-06, + "loss": 0.305, "step": 15116 }, { - "epoch": 0.87, - "grad_norm": 0.34943890063486505, - "learning_rate": 8.928183398227219e-07, - "loss": 0.2148, + "epoch": 0.69, + "grad_norm": 1.2031420307941076, + "learning_rate": 4.509883745756745e-06, + "loss": 0.784, "step": 15117 }, { - "epoch": 0.87, - "grad_norm": 0.42402665162699893, - "learning_rate": 8.920498878471651e-07, - "loss": 0.2931, + "epoch": 0.69, + "grad_norm": 0.3259677576015253, + "learning_rate": 4.508640181752893e-06, + "loss": 0.1907, "step": 15118 }, { - "epoch": 0.87, - "grad_norm": 0.374210011293103, - "learning_rate": 8.912817512795302e-07, - "loss": 0.2996, + "epoch": 0.69, + "grad_norm": 1.3912817301305562, + "learning_rate": 4.507396739322461e-06, + "loss": 0.6477, "step": 15119 }, { - "epoch": 0.87, - "grad_norm": 0.2495954819765893, - "learning_rate": 8.905139301464139e-07, - "loss": 0.1498, + "epoch": 0.69, + "grad_norm": 0.3650590828810978, + "learning_rate": 4.506153418492967e-06, + "loss": 0.299, "step": 15120 }, { - "epoch": 0.87, - "grad_norm": 1.1354208775040406, - "learning_rate": 8.897464244744103e-07, - "loss": 0.808, + "epoch": 0.69, + "grad_norm": 0.4426771133591469, + "learning_rate": 4.504910219291941e-06, + "loss": 0.2988, "step": 15121 }, { - "epoch": 0.87, - "grad_norm": 0.691843031359836, - "learning_rate": 8.88979234290096e-07, - "loss": 0.347, + "epoch": 0.69, + "grad_norm": 0.24926609078377604, + "learning_rate": 4.503667141746906e-06, + "loss": 0.131, "step": 15122 }, { - "epoch": 0.87, - "grad_norm": 0.25810167310409116, - "learning_rate": 8.882123596200387e-07, - "loss": 0.2779, + "epoch": 0.69, + "grad_norm": 0.38515243333087734, + "learning_rate": 4.502424185885387e-06, + "loss": 0.3258, "step": 15123 }, { - "epoch": 0.87, - "grad_norm": 0.664422180294338, - "learning_rate": 8.874458004907971e-07, - "loss": 0.2791, + "epoch": 0.69, + "grad_norm": 0.8928850506600022, + "learning_rate": 4.501181351734893e-06, + "loss": 0.4836, "step": 15124 }, { - "epoch": 0.87, - "grad_norm": 0.5426279817665275, - "learning_rate": 8.866795569289122e-07, - "loss": 0.266, + "epoch": 0.69, + "grad_norm": 0.37394642927086474, + "learning_rate": 4.499938639322946e-06, + "loss": 0.1998, "step": 15125 }, { - "epoch": 0.87, - "grad_norm": 0.31902793423548365, - "learning_rate": 8.859136289609272e-07, - "loss": 0.2559, + "epoch": 0.69, + "grad_norm": 0.6374029954078039, + "learning_rate": 4.4986960486770596e-06, + "loss": 0.3806, "step": 15126 }, { - "epoch": 0.87, - "grad_norm": 0.3112222568220345, - "learning_rate": 8.851480166133586e-07, - "loss": 0.2472, + "epoch": 0.69, + "grad_norm": 0.3830519819529997, + "learning_rate": 4.4974535798247365e-06, + "loss": 0.2544, "step": 15127 }, { - "epoch": 0.87, - "grad_norm": 0.2615724470027559, - "learning_rate": 8.843827199127208e-07, - "loss": 0.2101, + "epoch": 0.69, + "grad_norm": 0.23511853335918828, + "learning_rate": 4.4962112327934915e-06, + "loss": 0.1776, "step": 15128 }, { - "epoch": 0.87, - "grad_norm": 0.541148258436055, - "learning_rate": 8.836177388855183e-07, - "loss": 0.3172, + "epoch": 0.7, + "grad_norm": 1.261271615580231, + "learning_rate": 4.494969007610821e-06, + "loss": 0.735, "step": 15129 }, { - "epoch": 0.87, - "grad_norm": 0.4333991198695846, - "learning_rate": 8.82853073558243e-07, - "loss": 0.2728, + "epoch": 0.7, + "grad_norm": 0.6270693890376513, + "learning_rate": 4.493726904304232e-06, + "loss": 0.3243, "step": 15130 }, { - "epoch": 0.87, - "grad_norm": 0.3145467505444975, - "learning_rate": 8.820887239573728e-07, - "loss": 0.2714, + "epoch": 0.7, + "grad_norm": 0.26182362537610016, + "learning_rate": 4.492484922901226e-06, + "loss": 0.2302, "step": 15131 }, { - "epoch": 0.87, - "grad_norm": 0.5162011923967506, - "learning_rate": 8.813246901093763e-07, - "loss": 0.3445, + "epoch": 0.7, + "grad_norm": 0.711115629860134, + "learning_rate": 4.49124306342929e-06, + "loss": 0.4191, "step": 15132 }, { - "epoch": 0.87, - "grad_norm": 0.1839520405237534, - "learning_rate": 8.80560972040716e-07, - "loss": 0.123, + "epoch": 0.7, + "grad_norm": 0.5668940989141893, + "learning_rate": 4.49000132591593e-06, + "loss": 0.297, "step": 15133 }, { - "epoch": 0.87, - "grad_norm": 0.5364721309908936, - "learning_rate": 8.797975697778361e-07, - "loss": 0.3405, + "epoch": 0.7, + "grad_norm": 0.28806013384021917, + "learning_rate": 4.4887597103886194e-06, + "loss": 0.1918, "step": 15134 }, { - "epoch": 0.87, - "grad_norm": 0.2625779164027942, - "learning_rate": 8.790344833471753e-07, - "loss": 0.2696, + "epoch": 0.7, + "grad_norm": 0.3300699088710153, + "learning_rate": 4.487518216874866e-06, + "loss": 0.2474, "step": 15135 }, { - "epoch": 0.87, - "grad_norm": 0.7388355872405135, - "learning_rate": 8.782717127751572e-07, - "loss": 0.3782, + "epoch": 0.7, + "grad_norm": 0.43497766612974953, + "learning_rate": 4.486276845402147e-06, + "loss": 0.2773, "step": 15136 }, { - "epoch": 0.87, - "grad_norm": 0.7507920836315224, - "learning_rate": 8.775092580881961e-07, - "loss": 0.1164, + "epoch": 0.7, + "grad_norm": 0.5812307601851208, + "learning_rate": 4.4850355959979385e-06, + "loss": 0.3283, "step": 15137 }, { - "epoch": 0.87, - "grad_norm": 0.3979987615357961, - "learning_rate": 8.767471193126987e-07, - "loss": 0.2978, + "epoch": 0.7, + "grad_norm": 0.8597001226251257, + "learning_rate": 4.483794468689728e-06, + "loss": 0.302, "step": 15138 }, { - "epoch": 0.87, - "grad_norm": 0.3314991768932052, - "learning_rate": 8.75985296475057e-07, - "loss": 0.2987, + "epoch": 0.7, + "grad_norm": 0.3070103517755589, + "learning_rate": 4.482553463504991e-06, + "loss": 0.2743, "step": 15139 }, { - "epoch": 0.87, - "grad_norm": 0.3167246892166629, - "learning_rate": 8.752237896016513e-07, - "loss": 0.1281, + "epoch": 0.7, + "grad_norm": 0.5568791781738436, + "learning_rate": 4.481312580471208e-06, + "loss": 0.302, "step": 15140 }, { - "epoch": 0.87, - "grad_norm": 0.37153197907359864, - "learning_rate": 8.744625987188516e-07, - "loss": 0.3026, + "epoch": 0.7, + "grad_norm": 0.22182741282696428, + "learning_rate": 4.48007181961584e-06, + "loss": 0.1232, "step": 15141 }, { - "epoch": 0.87, - "grad_norm": 0.50583112778751, - "learning_rate": 8.737017238530221e-07, - "loss": 0.2881, + "epoch": 0.7, + "grad_norm": 0.9159255627133426, + "learning_rate": 4.478831180966366e-06, + "loss": 0.4672, "step": 15142 }, { - "epoch": 0.87, - "grad_norm": 0.30099834150968574, - "learning_rate": 8.729411650305086e-07, - "loss": 0.2031, + "epoch": 0.7, + "grad_norm": 0.33375505148038936, + "learning_rate": 4.477590664550243e-06, + "loss": 0.2759, "step": 15143 }, { - "epoch": 0.87, - "grad_norm": 0.40016934367057977, - "learning_rate": 8.721809222776512e-07, - "loss": 0.3229, + "epoch": 0.7, + "grad_norm": 0.6061394122079311, + "learning_rate": 4.476350270394942e-06, + "loss": 0.2691, "step": 15144 }, { - "epoch": 0.87, - "grad_norm": 1.1087364033476483, - "learning_rate": 8.71420995620773e-07, - "loss": 0.7046, + "epoch": 0.7, + "grad_norm": 0.9824000046889556, + "learning_rate": 4.475109998527926e-06, + "loss": 0.4374, "step": 15145 }, { - "epoch": 0.87, - "grad_norm": 0.24438582944496376, - "learning_rate": 8.706613850861955e-07, - "loss": 0.1804, + "epoch": 0.7, + "grad_norm": 0.30675684260001135, + "learning_rate": 4.473869848976644e-06, + "loss": 0.2207, "step": 15146 }, { - "epoch": 0.87, - "grad_norm": 0.2983420842661307, - "learning_rate": 8.69902090700222e-07, - "loss": 0.2841, + "epoch": 0.7, + "grad_norm": 0.3585500081526208, + "learning_rate": 4.472629821768559e-06, + "loss": 0.2931, "step": 15147 }, { - "epoch": 0.87, - "grad_norm": 0.47140707285888844, - "learning_rate": 8.691431124891458e-07, - "loss": 0.259, + "epoch": 0.7, + "grad_norm": 0.27269290635672777, + "learning_rate": 4.471389916931126e-06, + "loss": 0.1073, "step": 15148 }, { - "epoch": 0.87, - "grad_norm": 0.5977151144145306, - "learning_rate": 8.683844504792516e-07, - "loss": 0.2461, + "epoch": 0.7, + "grad_norm": 0.4446624915191592, + "learning_rate": 4.470150134491789e-06, + "loss": 0.2956, "step": 15149 }, { - "epoch": 0.87, - "grad_norm": 0.3293070448411575, - "learning_rate": 8.676261046968082e-07, - "loss": 0.2188, + "epoch": 0.7, + "grad_norm": 1.025443726870926, + "learning_rate": 4.4689104744779995e-06, + "loss": 0.4788, "step": 15150 }, { - "epoch": 0.87, - "grad_norm": 0.35148732981494435, - "learning_rate": 8.668680751680836e-07, - "loss": 0.3271, + "epoch": 0.7, + "grad_norm": 0.3364710987324278, + "learning_rate": 4.467670936917195e-06, + "loss": 0.2164, "step": 15151 }, { - "epoch": 0.87, - "grad_norm": 0.5480960876299462, - "learning_rate": 8.661103619193235e-07, - "loss": 0.3355, + "epoch": 0.7, + "grad_norm": 0.41522700490765285, + "learning_rate": 4.466431521836832e-06, + "loss": 0.3277, "step": 15152 }, { - "epoch": 0.87, - "grad_norm": 0.3996302396384634, - "learning_rate": 8.653529649767689e-07, - "loss": 0.2401, + "epoch": 0.7, + "grad_norm": 1.6657370420442392, + "learning_rate": 4.465192229264337e-06, + "loss": 0.8457, "step": 15153 }, { - "epoch": 0.87, - "grad_norm": 0.28270246411719746, - "learning_rate": 8.64595884366648e-07, - "loss": 0.194, + "epoch": 0.7, + "grad_norm": 0.4090978091081507, + "learning_rate": 4.463953059227155e-06, + "loss": 0.1666, "step": 15154 }, { - "epoch": 0.87, - "grad_norm": 0.453050144014881, - "learning_rate": 8.638391201151786e-07, - "loss": 0.2769, + "epoch": 0.7, + "grad_norm": 0.340852726846153, + "learning_rate": 4.462714011752715e-06, + "loss": 0.2967, "step": 15155 }, { - "epoch": 0.87, - "grad_norm": 0.29278379222233086, - "learning_rate": 8.630826722485686e-07, - "loss": 0.1997, + "epoch": 0.7, + "grad_norm": 0.44549323991784995, + "learning_rate": 4.461475086868448e-06, + "loss": 0.2552, "step": 15156 }, { - "epoch": 0.87, - "grad_norm": 0.6741223471885249, - "learning_rate": 8.623265407930126e-07, - "loss": 0.4103, + "epoch": 0.7, + "grad_norm": 0.432364113961003, + "learning_rate": 4.460236284601788e-06, + "loss": 0.1771, "step": 15157 }, { - "epoch": 0.87, - "grad_norm": 0.46914591529300426, - "learning_rate": 8.615707257746942e-07, - "loss": 0.3219, + "epoch": 0.7, + "grad_norm": 0.571732574401387, + "learning_rate": 4.4589976049801545e-06, + "loss": 0.3902, "step": 15158 }, { - "epoch": 0.87, - "grad_norm": 0.2827772048682046, - "learning_rate": 8.608152272197901e-07, - "loss": 0.2253, + "epoch": 0.7, + "grad_norm": 0.3730914811006373, + "learning_rate": 4.4577590480309764e-06, + "loss": 0.3091, "step": 15159 }, { - "epoch": 0.87, - "grad_norm": 0.31540560824285985, - "learning_rate": 8.600600451544638e-07, - "loss": 0.1769, + "epoch": 0.7, + "grad_norm": 0.6605139238714878, + "learning_rate": 4.456520613781669e-06, + "loss": 0.3784, "step": 15160 }, { - "epoch": 0.87, - "grad_norm": 1.0518770248703995, - "learning_rate": 8.593051796048623e-07, - "loss": 0.3286, + "epoch": 0.7, + "grad_norm": 0.3670746179168251, + "learning_rate": 4.45528230225965e-06, + "loss": 0.2478, "step": 15161 }, { - "epoch": 0.87, - "grad_norm": 0.3572068024893413, - "learning_rate": 8.58550630597128e-07, - "loss": 0.2964, + "epoch": 0.7, + "grad_norm": 0.347323995347443, + "learning_rate": 4.454044113492343e-06, + "loss": 0.1832, "step": 15162 }, { - "epoch": 0.87, - "grad_norm": 0.30446816276194233, - "learning_rate": 8.577963981573944e-07, - "loss": 0.238, + "epoch": 0.7, + "grad_norm": 0.36546359607808315, + "learning_rate": 4.452806047507149e-06, + "loss": 0.313, "step": 15163 }, { - "epoch": 0.87, - "grad_norm": 0.5673975171436122, - "learning_rate": 8.570424823117785e-07, - "loss": 0.3419, + "epoch": 0.7, + "grad_norm": 0.32087868159019306, + "learning_rate": 4.451568104331483e-06, + "loss": 0.2035, "step": 15164 }, { - "epoch": 0.87, - "grad_norm": 0.38060387474042917, - "learning_rate": 8.56288883086388e-07, - "loss": 0.2754, + "epoch": 0.7, + "grad_norm": 0.8272147742954485, + "learning_rate": 4.450330283992755e-06, + "loss": 0.5227, "step": 15165 }, { - "epoch": 0.87, - "grad_norm": 0.19743050333583234, - "learning_rate": 8.55535600507319e-07, - "loss": 0.1249, + "epoch": 0.7, + "grad_norm": 0.4658055544899943, + "learning_rate": 4.4490925865183625e-06, + "loss": 0.3229, "step": 15166 }, { - "epoch": 0.87, - "grad_norm": 0.4663590562791462, - "learning_rate": 8.547826346006594e-07, - "loss": 0.2865, + "epoch": 0.7, + "grad_norm": 0.24941655836853, + "learning_rate": 4.447855011935714e-06, + "loss": 0.1937, "step": 15167 }, { - "epoch": 0.87, - "grad_norm": 0.3707588567212866, - "learning_rate": 8.540299853924849e-07, - "loss": 0.3017, + "epoch": 0.7, + "grad_norm": 0.29781053769894394, + "learning_rate": 4.446617560272195e-06, + "loss": 0.1612, "step": 15168 }, { - "epoch": 0.87, - "grad_norm": 0.7253472188843394, - "learning_rate": 8.532776529088582e-07, - "loss": 0.304, + "epoch": 0.7, + "grad_norm": 0.7789594669533275, + "learning_rate": 4.44538023155522e-06, + "loss": 0.3679, "step": 15169 }, { - "epoch": 0.87, - "grad_norm": 0.27858164729369395, - "learning_rate": 8.525256371758317e-07, - "loss": 0.2592, + "epoch": 0.7, + "grad_norm": 0.328207736666487, + "learning_rate": 4.444143025812169e-06, + "loss": 0.2383, "step": 15170 }, { - "epoch": 0.87, - "grad_norm": 0.38438389855409005, - "learning_rate": 8.517739382194512e-07, - "loss": 0.2976, + "epoch": 0.7, + "grad_norm": 0.3693174230696078, + "learning_rate": 4.4429059430704404e-06, + "loss": 0.285, "step": 15171 }, { - "epoch": 0.87, - "grad_norm": 0.35220266077902224, - "learning_rate": 8.510225560657459e-07, - "loss": 0.0923, + "epoch": 0.7, + "grad_norm": 0.6970780739660067, + "learning_rate": 4.441668983357417e-06, + "loss": 0.3241, "step": 15172 }, { - "epoch": 0.87, - "grad_norm": 1.3201247021161922, - "learning_rate": 8.50271490740735e-07, - "loss": 0.5593, + "epoch": 0.7, + "grad_norm": 0.415308730160176, + "learning_rate": 4.4404321467004795e-06, + "loss": 0.302, "step": 15173 }, { - "epoch": 0.87, - "grad_norm": 0.32291369852792134, - "learning_rate": 8.495207422704299e-07, - "loss": 0.2695, + "epoch": 0.7, + "grad_norm": 0.2499223827371469, + "learning_rate": 4.439195433127022e-06, + "loss": 0.0821, "step": 15174 }, { - "epoch": 0.87, - "grad_norm": 0.34051148799422554, - "learning_rate": 8.487703106808254e-07, - "loss": 0.2965, + "epoch": 0.7, + "grad_norm": 0.3091369780692317, + "learning_rate": 4.437958842664415e-06, + "loss": 0.2636, "step": 15175 }, { - "epoch": 0.87, - "grad_norm": 1.5064423885781546, - "learning_rate": 8.480201959979139e-07, - "loss": 0.3937, + "epoch": 0.7, + "grad_norm": 0.3821456762521314, + "learning_rate": 4.436722375340042e-06, + "loss": 0.2706, "step": 15176 }, { - "epoch": 0.87, - "grad_norm": 0.31970419618362195, - "learning_rate": 8.472703982476694e-07, - "loss": 0.2563, + "epoch": 0.7, + "grad_norm": 0.8334632285193361, + "learning_rate": 4.435486031181271e-06, + "loss": 0.339, "step": 15177 }, { - "epoch": 0.87, - "grad_norm": 0.23113554443336126, - "learning_rate": 8.465209174560574e-07, - "loss": 0.1821, + "epoch": 0.7, + "grad_norm": 0.4382959379682711, + "learning_rate": 4.434249810215474e-06, + "loss": 0.3194, "step": 15178 }, { - "epoch": 0.87, - "grad_norm": 0.4125928845847924, - "learning_rate": 8.457717536490307e-07, - "loss": 0.2227, + "epoch": 0.7, + "grad_norm": 0.3287157071968033, + "learning_rate": 4.4330137124700266e-06, + "loss": 0.254, "step": 15179 }, { - "epoch": 0.87, - "grad_norm": 0.33512692119389803, - "learning_rate": 8.450229068525351e-07, - "loss": 0.2999, + "epoch": 0.7, + "grad_norm": 0.28609541468971095, + "learning_rate": 4.431777737972287e-06, + "loss": 0.0975, "step": 15180 }, { - "epoch": 0.87, - "grad_norm": 0.612047740136571, - "learning_rate": 8.442743770925044e-07, - "loss": 0.3659, + "epoch": 0.7, + "grad_norm": 1.2223946804272083, + "learning_rate": 4.430541886749621e-06, + "loss": 0.5811, "step": 15181 }, { - "epoch": 0.87, - "grad_norm": 0.32224924038452785, - "learning_rate": 8.435261643948567e-07, - "loss": 0.2515, + "epoch": 0.7, + "grad_norm": 0.3606548208352364, + "learning_rate": 4.429306158829394e-06, + "loss": 0.2969, "step": 15182 }, { - "epoch": 0.87, - "grad_norm": 0.4883487465666448, - "learning_rate": 8.427782687855035e-07, - "loss": 0.264, + "epoch": 0.7, + "grad_norm": 0.41762959200884686, + "learning_rate": 4.4280705542389545e-06, + "loss": 0.2617, "step": 15183 }, { - "epoch": 0.87, - "grad_norm": 0.5277231776949652, - "learning_rate": 8.420306902903464e-07, - "loss": 0.217, + "epoch": 0.7, + "grad_norm": 0.9977943084934451, + "learning_rate": 4.426835073005668e-06, + "loss": 0.4398, "step": 15184 }, { - "epoch": 0.87, - "grad_norm": 0.3979449991906311, - "learning_rate": 8.412834289352734e-07, - "loss": 0.171, + "epoch": 0.7, + "grad_norm": 0.36809478866697803, + "learning_rate": 4.425599715156873e-06, + "loss": 0.2834, "step": 15185 }, { - "epoch": 0.87, - "grad_norm": 0.36033874060148274, - "learning_rate": 8.405364847461606e-07, - "loss": 0.2843, + "epoch": 0.7, + "grad_norm": 0.2718645316486167, + "learning_rate": 4.424364480719935e-06, + "loss": 0.1746, "step": 15186 }, { - "epoch": 0.87, - "grad_norm": 0.3147674767160009, - "learning_rate": 8.397898577488739e-07, - "loss": 0.3099, + "epoch": 0.7, + "grad_norm": 0.35608749242325954, + "learning_rate": 4.42312936972219e-06, + "loss": 0.2466, "step": 15187 }, { - "epoch": 0.87, - "grad_norm": 1.3165994656358568, - "learning_rate": 8.390435479692726e-07, - "loss": 0.7248, + "epoch": 0.7, + "grad_norm": 0.5909139021603691, + "learning_rate": 4.421894382190989e-06, + "loss": 0.2439, "step": 15188 }, { - "epoch": 0.87, - "grad_norm": 0.2795136011073454, - "learning_rate": 8.382975554331985e-07, - "loss": 0.1823, + "epoch": 0.7, + "grad_norm": 0.7648167561706752, + "learning_rate": 4.420659518153667e-06, + "loss": 0.402, "step": 15189 }, { - "epoch": 0.87, - "grad_norm": 0.33718281834093666, - "learning_rate": 8.375518801664873e-07, - "loss": 0.2743, + "epoch": 0.7, + "grad_norm": 0.38322427692784544, + "learning_rate": 4.419424777637565e-06, + "loss": 0.242, "step": 15190 }, { - "epoch": 0.87, - "grad_norm": 0.31695562935276533, - "learning_rate": 8.368065221949595e-07, - "loss": 0.2208, + "epoch": 0.7, + "grad_norm": 0.3684128637202067, + "learning_rate": 4.418190160670025e-06, + "loss": 0.2678, "step": 15191 }, { - "epoch": 0.87, - "grad_norm": 0.3135609234920389, - "learning_rate": 8.360614815444268e-07, - "loss": 0.2054, + "epoch": 0.7, + "grad_norm": 0.3783977894820084, + "learning_rate": 4.416955667278371e-06, + "loss": 0.2546, "step": 15192 }, { - "epoch": 0.87, - "grad_norm": 0.9047987658050868, - "learning_rate": 8.353167582406918e-07, - "loss": 0.4717, + "epoch": 0.7, + "grad_norm": 0.4155417356424887, + "learning_rate": 4.4157212974899395e-06, + "loss": 0.1577, "step": 15193 }, { - "epoch": 0.87, - "grad_norm": 0.32879063579291573, - "learning_rate": 8.345723523095462e-07, - "loss": 0.2928, + "epoch": 0.7, + "grad_norm": 0.36875080693216306, + "learning_rate": 4.414487051332055e-06, + "loss": 0.2681, "step": 15194 }, { - "epoch": 0.87, - "grad_norm": 0.33331989963810826, - "learning_rate": 8.338282637767614e-07, - "loss": 0.1939, + "epoch": 0.7, + "grad_norm": 0.39276530892772926, + "learning_rate": 4.413252928832042e-06, + "loss": 0.3069, "step": 15195 }, { - "epoch": 0.87, - "grad_norm": 0.4331013516301031, - "learning_rate": 8.330844926681114e-07, - "loss": 0.2289, + "epoch": 0.7, + "grad_norm": 1.241198645915899, + "learning_rate": 4.412018930017229e-06, + "loss": 0.4091, "step": 15196 }, { - "epoch": 0.87, - "grad_norm": 0.4826586936930948, - "learning_rate": 8.323410390093523e-07, - "loss": 0.2577, + "epoch": 0.7, + "grad_norm": 0.3544435422459911, + "learning_rate": 4.410785054914928e-06, + "loss": 0.2626, "step": 15197 }, { - "epoch": 0.87, - "grad_norm": 0.20794647329271113, - "learning_rate": 8.315979028262277e-07, - "loss": 0.2144, + "epoch": 0.7, + "grad_norm": 0.297902272210225, + "learning_rate": 4.409551303552457e-06, + "loss": 0.2099, "step": 15198 }, { - "epoch": 0.87, - "grad_norm": 0.9167179052248758, - "learning_rate": 8.308550841444718e-07, - "loss": 0.493, + "epoch": 0.7, + "grad_norm": 0.5250432468484927, + "learning_rate": 4.408317675957134e-06, + "loss": 0.3443, "step": 15199 }, { - "epoch": 0.87, - "grad_norm": 0.9684662978300949, - "learning_rate": 8.301125829898126e-07, - "loss": 0.6385, + "epoch": 0.7, + "grad_norm": 0.41159212794932065, + "learning_rate": 4.407084172156271e-06, + "loss": 0.2098, "step": 15200 }, { - "epoch": 0.87, - "grad_norm": 0.31667062423994347, - "learning_rate": 8.293703993879599e-07, - "loss": 0.262, + "epoch": 0.7, + "grad_norm": 0.8147864667367514, + "learning_rate": 4.4058507921771746e-06, + "loss": 0.4439, "step": 15201 }, { - "epoch": 0.87, - "grad_norm": 0.3174213408244066, - "learning_rate": 8.28628533364616e-07, - "loss": 0.2297, + "epoch": 0.7, + "grad_norm": 0.5850096465388983, + "learning_rate": 4.404617536047145e-06, + "loss": 0.3615, "step": 15202 }, { - "epoch": 0.87, - "grad_norm": 0.6367147811096763, - "learning_rate": 8.278869849454718e-07, - "loss": 0.3327, + "epoch": 0.7, + "grad_norm": 0.2830120917119553, + "learning_rate": 4.4033844037934915e-06, + "loss": 0.2028, "step": 15203 }, { - "epoch": 0.87, - "grad_norm": 0.41740650979949356, - "learning_rate": 8.271457541562045e-07, - "loss": 0.2915, + "epoch": 0.7, + "grad_norm": 1.2862854861268136, + "learning_rate": 4.402151395443513e-06, + "loss": 0.577, "step": 15204 }, { - "epoch": 0.87, - "grad_norm": 0.19492825352143867, - "learning_rate": 8.264048410224879e-07, - "loss": 0.15, + "epoch": 0.7, + "grad_norm": 0.4766336887280671, + "learning_rate": 4.400918511024511e-06, + "loss": 0.2348, "step": 15205 }, { - "epoch": 0.87, - "grad_norm": 0.34968830067182716, - "learning_rate": 8.25664245569976e-07, - "loss": 0.2954, + "epoch": 0.7, + "grad_norm": 0.3037825367387269, + "learning_rate": 4.399685750563772e-06, + "loss": 0.2114, "step": 15206 }, { - "epoch": 0.87, - "grad_norm": 0.38126528864990045, - "learning_rate": 8.249239678243171e-07, - "loss": 0.2521, + "epoch": 0.7, + "grad_norm": 0.5241600431443554, + "learning_rate": 4.398453114088595e-06, + "loss": 0.3615, "step": 15207 }, { - "epoch": 0.87, - "grad_norm": 0.4991098352243059, - "learning_rate": 8.241840078111452e-07, - "loss": 0.2371, + "epoch": 0.7, + "grad_norm": 1.2410112459262153, + "learning_rate": 4.397220601626269e-06, + "loss": 0.69, "step": 15208 }, { - "epoch": 0.87, - "grad_norm": 0.7523629811440098, - "learning_rate": 8.234443655560886e-07, - "loss": 0.3825, + "epoch": 0.7, + "grad_norm": 0.3785205854962509, + "learning_rate": 4.395988213204075e-06, + "loss": 0.1711, "step": 15209 }, { - "epoch": 0.87, - "grad_norm": 0.27570811560535324, - "learning_rate": 8.227050410847592e-07, - "loss": 0.268, + "epoch": 0.7, + "grad_norm": 0.3262711131310727, + "learning_rate": 4.394755948849305e-06, + "loss": 0.2204, "step": 15210 }, { - "epoch": 0.87, - "grad_norm": 0.23902070137790352, - "learning_rate": 8.219660344227587e-07, - "loss": 0.2191, + "epoch": 0.7, + "grad_norm": 0.4013708139699758, + "learning_rate": 4.393523808589233e-06, + "loss": 0.2782, "step": 15211 }, { - "epoch": 0.87, - "grad_norm": 1.005631527918455, - "learning_rate": 8.212273455956787e-07, - "loss": 0.3593, + "epoch": 0.7, + "grad_norm": 0.4123241730500596, + "learning_rate": 4.39229179245114e-06, + "loss": 0.267, "step": 15212 }, { - "epoch": 0.87, - "grad_norm": 0.31857938219013965, - "learning_rate": 8.204889746291022e-07, - "loss": 0.2688, + "epoch": 0.7, + "grad_norm": 1.0177646815586514, + "learning_rate": 4.391059900462305e-06, + "loss": 0.2697, "step": 15213 }, { - "epoch": 0.87, - "grad_norm": 0.38049878062907533, - "learning_rate": 8.197509215485988e-07, - "loss": 0.2892, + "epoch": 0.7, + "grad_norm": 0.36693782192574476, + "learning_rate": 4.389828132649995e-06, + "loss": 0.2817, "step": 15214 }, { - "epoch": 0.87, - "grad_norm": 0.7259267300163725, - "learning_rate": 8.190131863797246e-07, - "loss": 0.2979, + "epoch": 0.7, + "grad_norm": 0.43986466957214104, + "learning_rate": 4.388596489041483e-06, + "loss": 0.2441, "step": 15215 }, { - "epoch": 0.87, - "grad_norm": 0.3431148587829527, - "learning_rate": 8.182757691480303e-07, - "loss": 0.2404, + "epoch": 0.7, + "grad_norm": 0.6462477854929174, + "learning_rate": 4.387364969664034e-06, + "loss": 0.2609, "step": 15216 }, { - "epoch": 0.87, - "grad_norm": 0.4465896566952469, - "learning_rate": 8.175386698790489e-07, - "loss": 0.3118, + "epoch": 0.7, + "grad_norm": 1.102230604291616, + "learning_rate": 4.386133574544921e-06, + "loss": 0.3755, "step": 15217 }, { - "epoch": 0.87, - "grad_norm": 0.27790041083033745, - "learning_rate": 8.168018885983109e-07, - "loss": 0.2146, + "epoch": 0.7, + "grad_norm": 0.24842763666477394, + "learning_rate": 4.384902303711396e-06, + "loss": 0.1861, "step": 15218 }, { - "epoch": 0.87, - "grad_norm": 0.3068585203990971, - "learning_rate": 8.160654253313282e-07, - "loss": 0.2547, + "epoch": 0.7, + "grad_norm": 0.29150297808990167, + "learning_rate": 4.383671157190725e-06, + "loss": 0.2437, "step": 15219 }, { - "epoch": 0.87, - "grad_norm": 0.7012299019399189, - "learning_rate": 8.15329280103605e-07, - "loss": 0.3996, + "epoch": 0.7, + "grad_norm": 1.6465350819487814, + "learning_rate": 4.382440135010159e-06, + "loss": 0.7005, "step": 15220 }, { - "epoch": 0.87, - "grad_norm": 0.3526300473721663, - "learning_rate": 8.145934529406319e-07, - "loss": 0.2639, + "epoch": 0.7, + "grad_norm": 0.3656908091192963, + "learning_rate": 4.381209237196953e-06, + "loss": 0.257, "step": 15221 }, { - "epoch": 0.87, - "grad_norm": 0.4329912494767619, - "learning_rate": 8.13857943867894e-07, - "loss": 0.275, + "epoch": 0.7, + "grad_norm": 0.5116965473359221, + "learning_rate": 4.379978463778364e-06, + "loss": 0.2515, "step": 15222 }, { - "epoch": 0.87, - "grad_norm": 0.5705532148120219, - "learning_rate": 8.13122752910861e-07, - "loss": 0.3568, + "epoch": 0.7, + "grad_norm": 0.48409627170912534, + "learning_rate": 4.378747814781629e-06, + "loss": 0.3307, "step": 15223 }, { - "epoch": 0.87, - "grad_norm": 0.29636431949967834, - "learning_rate": 8.123878800949914e-07, - "loss": 0.2048, + "epoch": 0.7, + "grad_norm": 0.36413435363247004, + "learning_rate": 4.377517290234002e-06, + "loss": 0.2552, "step": 15224 }, { - "epoch": 0.87, - "grad_norm": 0.35704937882642657, - "learning_rate": 8.116533254457337e-07, - "loss": 0.186, + "epoch": 0.7, + "grad_norm": 0.4612892019653575, + "learning_rate": 4.3762868901627265e-06, + "loss": 0.2807, "step": 15225 }, { - "epoch": 0.87, - "grad_norm": 0.3023244294858326, - "learning_rate": 8.109190889885277e-07, - "loss": 0.2889, + "epoch": 0.7, + "grad_norm": 0.2588200211905363, + "learning_rate": 4.375056614595035e-06, + "loss": 0.2088, "step": 15226 }, { - "epoch": 0.87, - "grad_norm": 1.1384083091604984, - "learning_rate": 8.101851707487973e-07, - "loss": 0.4531, + "epoch": 0.7, + "grad_norm": 0.43369271773208273, + "learning_rate": 4.373826463558173e-06, + "loss": 0.2585, "step": 15227 }, { - "epoch": 0.87, - "grad_norm": 0.36463495630098414, - "learning_rate": 8.094515707519623e-07, - "loss": 0.1598, + "epoch": 0.7, + "grad_norm": 0.5453599704955445, + "learning_rate": 4.372596437079362e-06, + "loss": 0.3387, "step": 15228 }, { - "epoch": 0.87, - "grad_norm": 0.544002008754593, - "learning_rate": 8.087182890234202e-07, - "loss": 0.3947, + "epoch": 0.7, + "grad_norm": 0.4316132246014325, + "learning_rate": 4.3713665351858505e-06, + "loss": 0.205, "step": 15229 }, { - "epoch": 0.88, - "grad_norm": 0.25996577539505555, - "learning_rate": 8.079853255885705e-07, - "loss": 0.2442, + "epoch": 0.7, + "grad_norm": 0.4251608638368021, + "learning_rate": 4.370136757904858e-06, + "loss": 0.2816, "step": 15230 }, { - "epoch": 0.88, - "grad_norm": 0.22928359992702024, - "learning_rate": 8.072526804727943e-07, - "loss": 0.1601, + "epoch": 0.7, + "grad_norm": 0.3331584160039521, + "learning_rate": 4.368907105263608e-06, + "loss": 0.2923, "step": 15231 }, { - "epoch": 0.88, - "grad_norm": 0.548664404540612, - "learning_rate": 8.06520353701461e-07, - "loss": 0.369, + "epoch": 0.7, + "grad_norm": 0.4084735270283578, + "learning_rate": 4.367677577289331e-06, + "loss": 0.1432, "step": 15232 }, { - "epoch": 0.88, - "grad_norm": 1.4342997506272013, - "learning_rate": 8.057883452999316e-07, - "loss": 0.5444, + "epoch": 0.7, + "grad_norm": 0.434937037567238, + "learning_rate": 4.366448174009237e-06, + "loss": 0.2471, "step": 15233 }, { - "epoch": 0.88, - "grad_norm": 0.22084093260537044, - "learning_rate": 8.050566552935579e-07, - "loss": 0.2115, + "epoch": 0.7, + "grad_norm": 0.29841579164175874, + "learning_rate": 4.365218895450558e-06, + "loss": 0.2759, "step": 15234 }, { - "epoch": 0.88, - "grad_norm": 1.2855665998775354, - "learning_rate": 8.043252837076776e-07, - "loss": 0.6016, + "epoch": 0.7, + "grad_norm": 0.9832731348807893, + "learning_rate": 4.363989741640498e-06, + "loss": 0.2984, "step": 15235 }, { - "epoch": 0.88, - "grad_norm": 0.43839777602461616, - "learning_rate": 8.03594230567617e-07, - "loss": 0.3084, + "epoch": 0.7, + "grad_norm": 0.46552729126392167, + "learning_rate": 4.362760712606278e-06, + "loss": 0.2725, "step": 15236 }, { - "epoch": 0.88, - "grad_norm": 0.3919309008857394, - "learning_rate": 8.028634958986903e-07, - "loss": 0.271, + "epoch": 0.7, + "grad_norm": 0.5754906200760573, + "learning_rate": 4.3615318083750965e-06, + "loss": 0.3869, "step": 15237 }, { - "epoch": 0.88, - "grad_norm": 0.2905695696643427, - "learning_rate": 8.021330797262072e-07, - "loss": 0.2399, + "epoch": 0.7, + "grad_norm": 0.2890323118812972, + "learning_rate": 4.3603030289741675e-06, + "loss": 0.2278, "step": 15238 }, { - "epoch": 0.88, - "grad_norm": 0.5220398686554123, - "learning_rate": 8.01402982075461e-07, - "loss": 0.2565, + "epoch": 0.7, + "grad_norm": 0.2867669971959479, + "learning_rate": 4.359074374430698e-06, + "loss": 0.1718, "step": 15239 }, { - "epoch": 0.88, - "grad_norm": 0.5903199356384773, - "learning_rate": 8.006732029717335e-07, - "loss": 0.2823, + "epoch": 0.7, + "grad_norm": 0.45979811478350824, + "learning_rate": 4.357845844771881e-06, + "loss": 0.3222, "step": 15240 }, { - "epoch": 0.88, - "grad_norm": 0.3598694129624296, - "learning_rate": 7.999437424402967e-07, - "loss": 0.2532, + "epoch": 0.7, + "grad_norm": 0.9335988604252408, + "learning_rate": 4.356617440024919e-06, + "loss": 0.4251, "step": 15241 }, { - "epoch": 0.88, - "grad_norm": 0.33400510821115537, - "learning_rate": 7.992146005064105e-07, - "loss": 0.3282, + "epoch": 0.7, + "grad_norm": 0.2794366968806048, + "learning_rate": 4.355389160217012e-06, + "loss": 0.2131, "step": 15242 }, { - "epoch": 0.88, - "grad_norm": 0.5293015346627737, - "learning_rate": 7.984857771953303e-07, - "loss": 0.2665, + "epoch": 0.7, + "grad_norm": 0.5344644367526443, + "learning_rate": 4.354161005375344e-06, + "loss": 0.4025, "step": 15243 }, { - "epoch": 0.88, - "grad_norm": 0.21466042777465733, - "learning_rate": 7.977572725322913e-07, - "loss": 0.1591, + "epoch": 0.7, + "grad_norm": 0.30748066704275, + "learning_rate": 4.352932975527113e-06, + "loss": 0.2049, "step": 15244 }, { - "epoch": 0.88, - "grad_norm": 0.4726424277010858, - "learning_rate": 7.970290865425212e-07, - "loss": 0.3258, + "epoch": 0.7, + "grad_norm": 0.39625816507850115, + "learning_rate": 4.351705070699498e-06, + "loss": 0.2202, "step": 15245 }, { - "epoch": 0.88, - "grad_norm": 0.4202027933289129, - "learning_rate": 7.963012192512376e-07, - "loss": 0.2639, + "epoch": 0.7, + "grad_norm": 0.38004122383950417, + "learning_rate": 4.3504772909196945e-06, + "loss": 0.3019, "step": 15246 }, { - "epoch": 0.88, - "grad_norm": 0.3945913782257165, - "learning_rate": 7.955736706836481e-07, - "loss": 0.2548, + "epoch": 0.7, + "grad_norm": 0.5385787284656914, + "learning_rate": 4.3492496362148786e-06, + "loss": 0.3345, "step": 15247 }, { - "epoch": 0.88, - "grad_norm": 0.6633476242535613, - "learning_rate": 7.94846440864947e-07, - "loss": 0.3651, + "epoch": 0.7, + "grad_norm": 0.8627607386966181, + "learning_rate": 4.348022106612226e-06, + "loss": 0.4528, "step": 15248 }, { - "epoch": 0.88, - "grad_norm": 0.3693773855404697, - "learning_rate": 7.941195298203175e-07, - "loss": 0.2616, + "epoch": 0.7, + "grad_norm": 0.3384202699824737, + "learning_rate": 4.346794702138921e-06, + "loss": 0.2327, "step": 15249 }, { - "epoch": 0.88, - "grad_norm": 0.260791519573847, - "learning_rate": 7.933929375749317e-07, - "loss": 0.2281, + "epoch": 0.7, + "grad_norm": 0.28862575366069737, + "learning_rate": 4.345567422822124e-06, + "loss": 0.2534, "step": 15250 }, { - "epoch": 0.88, - "grad_norm": 0.5155000696175669, - "learning_rate": 7.926666641539538e-07, - "loss": 0.1134, + "epoch": 0.7, + "grad_norm": 0.5965613187556168, + "learning_rate": 4.344340268689023e-06, + "loss": 0.2927, "step": 15251 }, { - "epoch": 0.88, - "grad_norm": 0.3980620444030963, - "learning_rate": 7.919407095825337e-07, - "loss": 0.2641, + "epoch": 0.7, + "grad_norm": 0.31121967977888754, + "learning_rate": 4.343113239766774e-06, + "loss": 0.2083, "step": 15252 }, { - "epoch": 0.88, - "grad_norm": 0.5403914162100106, - "learning_rate": 7.912150738858104e-07, - "loss": 0.3727, + "epoch": 0.7, + "grad_norm": 1.540221731632707, + "learning_rate": 4.34188633608255e-06, + "loss": 0.6718, "step": 15253 }, { - "epoch": 0.88, - "grad_norm": 0.3470276410754074, - "learning_rate": 7.904897570889136e-07, - "loss": 0.2602, + "epoch": 0.7, + "grad_norm": 0.3067809960612206, + "learning_rate": 4.3406595576635024e-06, + "loss": 0.2528, "step": 15254 }, { - "epoch": 0.88, - "grad_norm": 0.3700558072591282, - "learning_rate": 7.897647592169578e-07, - "loss": 0.2559, + "epoch": 0.7, + "grad_norm": 0.3898160306616826, + "learning_rate": 4.3394329045368e-06, + "loss": 0.2737, "step": 15255 }, { - "epoch": 0.88, - "grad_norm": 0.531602700604024, - "learning_rate": 7.890400802950548e-07, - "loss": 0.3307, + "epoch": 0.7, + "grad_norm": 0.7241511996085377, + "learning_rate": 4.3382063767296e-06, + "loss": 0.4212, "step": 15256 }, { - "epoch": 0.88, - "grad_norm": 0.25900870477851295, - "learning_rate": 7.883157203482982e-07, - "loss": 0.1727, + "epoch": 0.7, + "grad_norm": 0.5570099951119658, + "learning_rate": 4.336979974269051e-06, + "loss": 0.3305, "step": 15257 }, { - "epoch": 0.88, - "grad_norm": 0.5904195157887109, - "learning_rate": 7.875916794017713e-07, - "loss": 0.2851, + "epoch": 0.7, + "grad_norm": 0.24273102202897184, + "learning_rate": 4.335753697182308e-06, + "loss": 0.1966, "step": 15258 }, { - "epoch": 0.88, - "grad_norm": 0.38738009696102815, - "learning_rate": 7.868679574805472e-07, - "loss": 0.3192, + "epoch": 0.7, + "grad_norm": 0.3712279567200119, + "learning_rate": 4.334527545496521e-06, + "loss": 0.2603, "step": 15259 }, { - "epoch": 0.88, - "grad_norm": 0.6042547956260029, - "learning_rate": 7.861445546096902e-07, - "loss": 0.3113, + "epoch": 0.7, + "grad_norm": 0.6284518683071875, + "learning_rate": 4.333301519238831e-06, + "loss": 0.3791, "step": 15260 }, { - "epoch": 0.88, - "grad_norm": 0.29172509317629647, - "learning_rate": 7.854214708142538e-07, - "loss": 0.2586, + "epoch": 0.7, + "grad_norm": 0.4077154996296781, + "learning_rate": 4.332075618436387e-06, + "loss": 0.3386, "step": 15261 }, { - "epoch": 0.88, - "grad_norm": 0.3643435241482438, - "learning_rate": 7.846987061192723e-07, - "loss": 0.2565, + "epoch": 0.7, + "grad_norm": 0.3946061288676655, + "learning_rate": 4.3308498431163186e-06, + "loss": 0.2689, "step": 15262 }, { - "epoch": 0.88, - "grad_norm": 0.3800601781197826, - "learning_rate": 7.839762605497791e-07, - "loss": 0.1566, + "epoch": 0.7, + "grad_norm": 0.5036357230123738, + "learning_rate": 4.329624193305778e-06, + "loss": 0.2652, "step": 15263 }, { - "epoch": 0.88, - "grad_norm": 0.30248056133318446, - "learning_rate": 7.83254134130793e-07, - "loss": 0.1106, + "epoch": 0.7, + "grad_norm": 0.3655351573272514, + "learning_rate": 4.328398669031889e-06, + "loss": 0.2299, "step": 15264 }, { - "epoch": 0.88, - "grad_norm": 0.3365648909541551, - "learning_rate": 7.825323268873187e-07, - "loss": 0.2817, + "epoch": 0.7, + "grad_norm": 0.3474090068782338, + "learning_rate": 4.327173270321791e-06, + "loss": 0.1746, "step": 15265 }, { - "epoch": 0.88, - "grad_norm": 0.4406402840444798, - "learning_rate": 7.818108388443546e-07, - "loss": 0.3342, + "epoch": 0.7, + "grad_norm": 0.4313793028765869, + "learning_rate": 4.3259479972026085e-06, + "loss": 0.2686, "step": 15266 }, { - "epoch": 0.88, - "grad_norm": 0.7090457854227306, - "learning_rate": 7.810896700268822e-07, - "loss": 0.223, + "epoch": 0.7, + "grad_norm": 0.3565267077601619, + "learning_rate": 4.3247228497014615e-06, + "loss": 0.2799, "step": 15267 }, { - "epoch": 0.88, - "grad_norm": 0.2873213588453266, - "learning_rate": 7.803688204598803e-07, - "loss": 0.2282, + "epoch": 0.7, + "grad_norm": 1.0055404634180432, + "learning_rate": 4.323497827845489e-06, + "loss": 0.315, "step": 15268 }, { - "epoch": 0.88, - "grad_norm": 0.36227730773662065, - "learning_rate": 7.796482901683089e-07, - "loss": 0.2392, + "epoch": 0.7, + "grad_norm": 0.8738501619775048, + "learning_rate": 4.322272931661798e-06, + "loss": 0.396, "step": 15269 }, { - "epoch": 0.88, - "grad_norm": 0.3593253101252005, - "learning_rate": 7.789280791771214e-07, - "loss": 0.1979, + "epoch": 0.7, + "grad_norm": 0.27585104631782204, + "learning_rate": 4.321048161177518e-06, + "loss": 0.2645, "step": 15270 }, { - "epoch": 0.88, - "grad_norm": 0.3621820947758829, - "learning_rate": 7.782081875112568e-07, - "loss": 0.2827, + "epoch": 0.7, + "grad_norm": 0.3066696254851739, + "learning_rate": 4.319823516419753e-06, + "loss": 0.0973, "step": 15271 }, { - "epoch": 0.88, - "grad_norm": 0.6322813462372843, - "learning_rate": 7.774886151956473e-07, - "loss": 0.3907, + "epoch": 0.7, + "grad_norm": 0.620490167366543, + "learning_rate": 4.318598997415621e-06, + "loss": 0.3449, "step": 15272 }, { - "epoch": 0.88, - "grad_norm": 0.2985759612155513, - "learning_rate": 7.767693622552097e-07, - "loss": 0.2031, + "epoch": 0.7, + "grad_norm": 0.42353675525848405, + "learning_rate": 4.317374604192236e-06, + "loss": 0.326, "step": 15273 }, { - "epoch": 0.88, - "grad_norm": 0.40022176584749375, - "learning_rate": 7.76050428714854e-07, - "loss": 0.2846, + "epoch": 0.7, + "grad_norm": 0.3852153855241123, + "learning_rate": 4.316150336776696e-06, + "loss": 0.3238, "step": 15274 }, { - "epoch": 0.88, - "grad_norm": 0.3104592255829074, - "learning_rate": 7.753318145994727e-07, - "loss": 0.1448, + "epoch": 0.7, + "grad_norm": 0.6077263572317136, + "learning_rate": 4.314926195196109e-06, + "loss": 0.2004, "step": 15275 }, { - "epoch": 0.88, - "grad_norm": 0.49120492596421095, - "learning_rate": 7.746135199339556e-07, - "loss": 0.3065, + "epoch": 0.7, + "grad_norm": 0.27750099715007726, + "learning_rate": 4.31370217947758e-06, + "loss": 0.2107, "step": 15276 }, { - "epoch": 0.88, - "grad_norm": 0.28632281845942664, - "learning_rate": 7.738955447431762e-07, - "loss": 0.2348, + "epoch": 0.7, + "grad_norm": 0.5058900067838109, + "learning_rate": 4.312478289648202e-06, + "loss": 0.2397, "step": 15277 }, { - "epoch": 0.88, - "grad_norm": 0.4665069876855719, - "learning_rate": 7.731778890519969e-07, - "loss": 0.3292, + "epoch": 0.7, + "grad_norm": 0.30815081350458856, + "learning_rate": 4.311254525735075e-06, + "loss": 0.226, "step": 15278 }, { - "epoch": 0.88, - "grad_norm": 1.4626098353168415, - "learning_rate": 7.724605528852702e-07, - "loss": 0.5868, + "epoch": 0.7, + "grad_norm": 0.4477222467726712, + "learning_rate": 4.310030887765288e-06, + "loss": 0.3157, "step": 15279 }, { - "epoch": 0.88, - "grad_norm": 0.35047118850359527, - "learning_rate": 7.717435362678361e-07, - "loss": 0.1945, + "epoch": 0.7, + "grad_norm": 0.7484973045031489, + "learning_rate": 4.308807375765932e-06, + "loss": 0.3812, "step": 15280 }, { - "epoch": 0.88, - "grad_norm": 0.23788708299943856, - "learning_rate": 7.71026839224529e-07, - "loss": 0.2045, + "epoch": 0.7, + "grad_norm": 0.4744347175962742, + "learning_rate": 4.307583989764094e-06, + "loss": 0.2174, "step": 15281 }, { - "epoch": 0.88, - "grad_norm": 0.6010442606371211, - "learning_rate": 7.703104617801649e-07, - "loss": 0.3379, + "epoch": 0.7, + "grad_norm": 0.31054964720380523, + "learning_rate": 4.306360729786867e-06, + "loss": 0.2752, "step": 15282 }, { - "epoch": 0.88, - "grad_norm": 0.32129184135251976, - "learning_rate": 7.695944039595526e-07, - "loss": 0.2314, + "epoch": 0.7, + "grad_norm": 0.24985228829383402, + "learning_rate": 4.30513759586132e-06, + "loss": 0.1202, "step": 15283 }, { - "epoch": 0.88, - "grad_norm": 1.2358162206069345, - "learning_rate": 7.688786657874881e-07, - "loss": 0.757, + "epoch": 0.7, + "grad_norm": 0.6048465249555531, + "learning_rate": 4.303914588014538e-06, + "loss": 0.2514, "step": 15284 }, { - "epoch": 0.88, - "grad_norm": 0.302450566459852, - "learning_rate": 7.681632472887601e-07, - "loss": 0.2894, + "epoch": 0.7, + "grad_norm": 0.36516203421122423, + "learning_rate": 4.3026917062736015e-06, + "loss": 0.2985, "step": 15285 }, { - "epoch": 0.88, - "grad_norm": 0.36525775072758804, - "learning_rate": 7.674481484881413e-07, - "loss": 0.2433, + "epoch": 0.7, + "grad_norm": 0.3485125409525159, + "learning_rate": 4.301468950665575e-06, + "loss": 0.3194, "step": 15286 }, { - "epoch": 0.88, - "grad_norm": 0.12990322688717965, - "learning_rate": 7.667333694103962e-07, - "loss": 0.07, + "epoch": 0.7, + "grad_norm": 1.6112054817186665, + "learning_rate": 4.300246321217538e-06, + "loss": 0.7396, "step": 15287 }, { - "epoch": 0.88, - "grad_norm": 0.530598155745847, - "learning_rate": 7.660189100802762e-07, - "loss": 0.3354, + "epoch": 0.7, + "grad_norm": 0.36182032890410737, + "learning_rate": 4.29902381795655e-06, + "loss": 0.2009, "step": 15288 }, { - "epoch": 0.88, - "grad_norm": 0.26335616434420217, - "learning_rate": 7.653047705225258e-07, - "loss": 0.2555, + "epoch": 0.7, + "grad_norm": 0.31005876782673997, + "learning_rate": 4.29780144090968e-06, + "loss": 0.1824, "step": 15289 }, { - "epoch": 0.88, - "grad_norm": 0.4268927274168955, - "learning_rate": 7.645909507618732e-07, - "loss": 0.2675, + "epoch": 0.7, + "grad_norm": 0.37821869657334084, + "learning_rate": 4.296579190103993e-06, + "loss": 0.3167, "step": 15290 }, { - "epoch": 0.88, - "grad_norm": 0.7647099515322016, - "learning_rate": 7.638774508230395e-07, - "loss": 0.4027, + "epoch": 0.7, + "grad_norm": 0.3202556017833389, + "learning_rate": 4.295357065566543e-06, + "loss": 0.2149, "step": 15291 }, { - "epoch": 0.88, - "grad_norm": 0.3398062115060478, - "learning_rate": 7.631642707307319e-07, - "loss": 0.2611, + "epoch": 0.7, + "grad_norm": 1.3071745847842602, + "learning_rate": 4.294135067324389e-06, + "loss": 0.7354, "step": 15292 }, { - "epoch": 0.88, - "grad_norm": 0.2679660624561897, - "learning_rate": 7.624514105096492e-07, - "loss": 0.2, + "epoch": 0.7, + "grad_norm": 0.5090919673158785, + "learning_rate": 4.292913195404587e-06, + "loss": 0.325, "step": 15293 }, { - "epoch": 0.88, - "grad_norm": 0.6026562844344492, - "learning_rate": 7.617388701844764e-07, - "loss": 0.3465, + "epoch": 0.7, + "grad_norm": 0.28805711218767444, + "learning_rate": 4.291691449834187e-06, + "loss": 0.2055, "step": 15294 }, { - "epoch": 0.88, - "grad_norm": 0.25505961167579344, - "learning_rate": 7.610266497798913e-07, - "loss": 0.2145, + "epoch": 0.7, + "grad_norm": 0.31950104136296664, + "learning_rate": 4.290469830640238e-06, + "loss": 0.1844, "step": 15295 }, { - "epoch": 0.88, - "grad_norm": 1.200358480855169, - "learning_rate": 7.603147493205531e-07, - "loss": 0.3905, + "epoch": 0.7, + "grad_norm": 0.6678118621798554, + "learning_rate": 4.2892483378497806e-06, + "loss": 0.3673, "step": 15296 }, { - "epoch": 0.88, - "grad_norm": 0.3565559048959195, - "learning_rate": 7.59603168831119e-07, - "loss": 0.2969, + "epoch": 0.7, + "grad_norm": 0.374113349985378, + "learning_rate": 4.288026971489861e-06, + "loss": 0.2311, "step": 15297 }, { - "epoch": 0.88, - "grad_norm": 0.3149087956944596, - "learning_rate": 7.588919083362301e-07, - "loss": 0.2396, + "epoch": 0.7, + "grad_norm": 0.37379184730111825, + "learning_rate": 4.286805731587519e-06, + "loss": 0.2901, "step": 15298 }, { - "epoch": 0.88, - "grad_norm": 0.6796516619216567, - "learning_rate": 7.581809678605167e-07, - "loss": 0.3689, + "epoch": 0.7, + "grad_norm": 0.9464913903225278, + "learning_rate": 4.2855846181697945e-06, + "loss": 0.4045, "step": 15299 }, { - "epoch": 0.88, - "grad_norm": 0.34306570273858433, - "learning_rate": 7.574703474285971e-07, - "loss": 0.2018, + "epoch": 0.7, + "grad_norm": 0.37999810361032943, + "learning_rate": 4.284363631263716e-06, + "loss": 0.2759, "step": 15300 }, { - "epoch": 0.88, - "grad_norm": 0.253075691212108, - "learning_rate": 7.567600470650849e-07, - "loss": 0.2466, + "epoch": 0.7, + "grad_norm": 0.2649797518446157, + "learning_rate": 4.283142770896318e-06, + "loss": 0.1754, "step": 15301 }, { - "epoch": 0.88, - "grad_norm": 0.41639118533895647, - "learning_rate": 7.560500667945736e-07, - "loss": 0.2633, + "epoch": 0.7, + "grad_norm": 0.5086918035465757, + "learning_rate": 4.281922037094632e-06, + "loss": 0.3281, "step": 15302 }, { - "epoch": 0.88, - "grad_norm": 0.9729597607745787, - "learning_rate": 7.553404066416514e-07, - "loss": 0.2259, + "epoch": 0.7, + "grad_norm": 0.3510898679729995, + "learning_rate": 4.2807014298856775e-06, + "loss": 0.2862, "step": 15303 }, { - "epoch": 0.88, - "grad_norm": 0.36267534769957355, - "learning_rate": 7.546310666308909e-07, - "loss": 0.2609, + "epoch": 0.7, + "grad_norm": 1.1363080873174627, + "learning_rate": 4.279480949296485e-06, + "loss": 0.4166, "step": 15304 }, { - "epoch": 0.88, - "grad_norm": 0.3762109035053571, - "learning_rate": 7.539220467868613e-07, - "loss": 0.2912, + "epoch": 0.7, + "grad_norm": 0.39447667770067746, + "learning_rate": 4.278260595354067e-06, + "loss": 0.3177, "step": 15305 }, { - "epoch": 0.88, - "grad_norm": 0.37831387911988446, - "learning_rate": 7.532133471341141e-07, - "loss": 0.1914, + "epoch": 0.7, + "grad_norm": 0.34960040932559466, + "learning_rate": 4.277040368085444e-06, + "loss": 0.2787, "step": 15306 }, { - "epoch": 0.88, - "grad_norm": 0.2679166493911752, - "learning_rate": 7.525049676971907e-07, - "loss": 0.2282, + "epoch": 0.7, + "grad_norm": 0.3953989447219193, + "learning_rate": 4.275820267517636e-06, + "loss": 0.1433, "step": 15307 }, { - "epoch": 0.88, - "grad_norm": 1.1468120655160323, - "learning_rate": 7.517969085006227e-07, - "loss": 0.7836, + "epoch": 0.7, + "grad_norm": 0.5208892383127405, + "learning_rate": 4.2746002936776465e-06, + "loss": 0.297, "step": 15308 }, { - "epoch": 0.88, - "grad_norm": 0.3255850537957562, - "learning_rate": 7.510891695689282e-07, - "loss": 0.2195, + "epoch": 0.7, + "grad_norm": 0.3281915526683968, + "learning_rate": 4.273380446592492e-06, + "loss": 0.2552, "step": 15309 }, { - "epoch": 0.88, - "grad_norm": 0.35726564990652665, - "learning_rate": 7.503817509266198e-07, - "loss": 0.2898, + "epoch": 0.7, + "grad_norm": 0.39651922581499216, + "learning_rate": 4.272160726289167e-06, + "loss": 0.2585, "step": 15310 }, { - "epoch": 0.88, - "grad_norm": 0.6503155915878299, - "learning_rate": 7.496746525981935e-07, - "loss": 0.3677, + "epoch": 0.7, + "grad_norm": 1.242398122481821, + "learning_rate": 4.270941132794691e-06, + "loss": 0.4497, "step": 15311 }, { - "epoch": 0.88, - "grad_norm": 0.37840816000222754, - "learning_rate": 7.489678746081364e-07, - "loss": 0.2404, + "epoch": 0.7, + "grad_norm": 0.3531697437808292, + "learning_rate": 4.269721666136053e-06, + "loss": 0.2438, "step": 15312 }, { - "epoch": 0.88, - "grad_norm": 0.29412005921689116, - "learning_rate": 7.482614169809222e-07, - "loss": 0.1814, + "epoch": 0.7, + "grad_norm": 0.85249707842036, + "learning_rate": 4.268502326340258e-06, + "loss": 0.4163, "step": 15313 }, { - "epoch": 0.88, - "grad_norm": 0.4733546443356241, - "learning_rate": 7.475552797410191e-07, - "loss": 0.396, + "epoch": 0.7, + "grad_norm": 0.3004810278797122, + "learning_rate": 4.267283113434293e-06, + "loss": 0.2111, "step": 15314 }, { - "epoch": 0.88, - "grad_norm": 0.44759966457147815, - "learning_rate": 7.468494629128786e-07, - "loss": 0.2897, + "epoch": 0.7, + "grad_norm": 0.4020644499421195, + "learning_rate": 4.2660640274451545e-06, + "loss": 0.2596, "step": 15315 }, { - "epoch": 0.88, - "grad_norm": 0.32367457949655865, - "learning_rate": 7.461439665209435e-07, - "loss": 0.2157, + "epoch": 0.7, + "grad_norm": 0.47869915659433593, + "learning_rate": 4.264845068399837e-06, + "loss": 0.3198, "step": 15316 }, { - "epoch": 0.88, - "grad_norm": 0.4080924876089095, - "learning_rate": 7.454387905896432e-07, - "loss": 0.306, + "epoch": 0.7, + "grad_norm": 0.41671310147512824, + "learning_rate": 4.2636262363253174e-06, + "loss": 0.2298, "step": 15317 }, { - "epoch": 0.88, - "grad_norm": 1.2171090379852552, - "learning_rate": 7.447339351434013e-07, - "loss": 0.4721, + "epoch": 0.7, + "grad_norm": 0.42851345268305224, + "learning_rate": 4.262407531248585e-06, + "loss": 0.29, "step": 15318 }, { - "epoch": 0.88, - "grad_norm": 0.2858970690734332, - "learning_rate": 7.44029400206625e-07, - "loss": 0.1938, + "epoch": 0.7, + "grad_norm": 0.7433373801554091, + "learning_rate": 4.261188953196622e-06, + "loss": 0.3993, "step": 15319 }, { - "epoch": 0.88, - "grad_norm": 1.2425084966006583, - "learning_rate": 7.433251858037127e-07, - "loss": 0.7118, + "epoch": 0.7, + "grad_norm": 0.4824341391987971, + "learning_rate": 4.259970502196402e-06, + "loss": 0.1741, "step": 15320 }, { - "epoch": 0.88, - "grad_norm": 0.2196747731628239, - "learning_rate": 7.426212919590503e-07, - "loss": 0.2056, + "epoch": 0.7, + "grad_norm": 0.3402527677811564, + "learning_rate": 4.258752178274906e-06, + "loss": 0.2543, "step": 15321 }, { - "epoch": 0.88, - "grad_norm": 0.29882954862740696, - "learning_rate": 7.419177186970139e-07, - "loss": 0.2113, + "epoch": 0.7, + "grad_norm": 0.39359682227474996, + "learning_rate": 4.257533981459097e-06, + "loss": 0.3179, "step": 15322 }, { - "epoch": 0.88, - "grad_norm": 0.8049704432913872, - "learning_rate": 7.412144660419706e-07, - "loss": 0.4391, + "epoch": 0.7, + "grad_norm": 0.44511844007639406, + "learning_rate": 4.256315911775957e-06, + "loss": 0.0956, "step": 15323 }, { - "epoch": 0.88, - "grad_norm": 0.5968983898094012, - "learning_rate": 7.405115340182723e-07, - "loss": 0.3162, + "epoch": 0.7, + "grad_norm": 0.3528584713468951, + "learning_rate": 4.255097969252448e-06, + "loss": 0.2592, "step": 15324 }, { - "epoch": 0.88, - "grad_norm": 0.28930754291418537, - "learning_rate": 7.398089226502603e-07, - "loss": 0.2629, + "epoch": 0.7, + "grad_norm": 0.6230787044164713, + "learning_rate": 4.253880153915527e-06, + "loss": 0.3441, "step": 15325 }, { - "epoch": 0.88, - "grad_norm": 1.1015836359844582, - "learning_rate": 7.391066319622664e-07, - "loss": 0.5367, + "epoch": 0.7, + "grad_norm": 0.5722242935330832, + "learning_rate": 4.252662465792167e-06, + "loss": 0.3249, "step": 15326 }, { - "epoch": 0.88, - "grad_norm": 0.3365458212983297, - "learning_rate": 7.384046619786123e-07, - "loss": 0.2162, + "epoch": 0.7, + "grad_norm": 0.27571447859213316, + "learning_rate": 4.2514449049093135e-06, + "loss": 0.1762, "step": 15327 }, { - "epoch": 0.88, - "grad_norm": 0.44176905324324517, - "learning_rate": 7.377030127236073e-07, - "loss": 0.2576, + "epoch": 0.7, + "grad_norm": 1.2593432048010116, + "learning_rate": 4.2502274712939355e-06, + "loss": 0.7172, "step": 15328 }, { - "epoch": 0.88, - "grad_norm": 0.2811618795819501, - "learning_rate": 7.370016842215488e-07, - "loss": 0.2339, + "epoch": 0.7, + "grad_norm": 0.275134740221021, + "learning_rate": 4.249010164972976e-06, + "loss": 0.2248, "step": 15329 }, { - "epoch": 0.88, - "grad_norm": 1.281599200325273, - "learning_rate": 7.363006764967228e-07, - "loss": 0.6169, + "epoch": 0.7, + "grad_norm": 0.333883935709833, + "learning_rate": 4.247792985973392e-06, + "loss": 0.2134, "step": 15330 }, { - "epoch": 0.88, - "grad_norm": 0.3883951981613511, - "learning_rate": 7.355999895734067e-07, - "loss": 0.2458, + "epoch": 0.7, + "grad_norm": 0.7890296897483274, + "learning_rate": 4.246575934322124e-06, + "loss": 0.3778, "step": 15331 }, { - "epoch": 0.88, - "grad_norm": 0.35906664183412457, - "learning_rate": 7.348996234758643e-07, - "loss": 0.2571, + "epoch": 0.7, + "grad_norm": 1.3482788625239726, + "learning_rate": 4.24535901004612e-06, + "loss": 0.7107, "step": 15332 }, { - "epoch": 0.88, - "grad_norm": 0.4223644010226172, - "learning_rate": 7.341995782283506e-07, - "loss": 0.2908, + "epoch": 0.7, + "grad_norm": 0.29484148588811754, + "learning_rate": 4.244142213172325e-06, + "loss": 0.1989, "step": 15333 }, { - "epoch": 0.88, - "grad_norm": 0.39088380406576523, - "learning_rate": 7.334998538551042e-07, - "loss": 0.273, + "epoch": 0.7, + "grad_norm": 0.38927109355643214, + "learning_rate": 4.2429255437276696e-06, + "loss": 0.3424, "step": 15334 }, { - "epoch": 0.88, - "grad_norm": 0.3214635287901621, - "learning_rate": 7.328004503803609e-07, - "loss": 0.1508, + "epoch": 0.7, + "grad_norm": 0.40408135239629644, + "learning_rate": 4.241709001739094e-06, + "loss": 0.236, "step": 15335 }, { - "epoch": 0.88, - "grad_norm": 0.5600623509613721, - "learning_rate": 7.321013678283407e-07, - "loss": 0.3081, + "epoch": 0.7, + "grad_norm": 0.3215587394549872, + "learning_rate": 4.240492587233534e-06, + "loss": 0.1837, "step": 15336 }, { - "epoch": 0.88, - "grad_norm": 0.272040944032163, - "learning_rate": 7.314026062232504e-07, - "loss": 0.244, + "epoch": 0.7, + "grad_norm": 0.3868909270445274, + "learning_rate": 4.239276300237916e-06, + "loss": 0.282, "step": 15337 }, { - "epoch": 0.88, - "grad_norm": 1.168178953668496, - "learning_rate": 7.307041655892877e-07, - "loss": 0.7761, + "epoch": 0.7, + "grad_norm": 1.401761542437, + "learning_rate": 4.23806014077917e-06, + "loss": 0.6074, "step": 15338 }, { - "epoch": 0.88, - "grad_norm": 0.5828528196274684, - "learning_rate": 7.300060459506431e-07, - "loss": 0.3033, + "epoch": 0.7, + "grad_norm": 0.39714586362890947, + "learning_rate": 4.236844108884215e-06, + "loss": 0.2626, "step": 15339 }, { - "epoch": 0.88, - "grad_norm": 0.25286178191941955, - "learning_rate": 7.293082473314905e-07, - "loss": 0.2107, + "epoch": 0.7, + "grad_norm": 0.22237809552599133, + "learning_rate": 4.235628204579978e-06, + "loss": 0.1532, "step": 15340 }, { - "epoch": 0.88, - "grad_norm": 0.2580447143807208, - "learning_rate": 7.286107697559952e-07, - "loss": 0.2347, + "epoch": 0.7, + "grad_norm": 0.4123042403345458, + "learning_rate": 4.23441242789338e-06, + "loss": 0.3367, "step": 15341 }, { - "epoch": 0.88, - "grad_norm": 1.3155362942915954, - "learning_rate": 7.279136132483078e-07, - "loss": 0.2337, + "epoch": 0.7, + "grad_norm": 0.4098401997782652, + "learning_rate": 4.2331967788513295e-06, + "loss": 0.2837, "step": 15342 }, { - "epoch": 0.88, - "grad_norm": 0.3069915058726905, - "learning_rate": 7.272167778325756e-07, - "loss": 0.2612, + "epoch": 0.7, + "grad_norm": 0.5357102564055954, + "learning_rate": 4.231981257480749e-06, + "loss": 0.2517, "step": 15343 }, { - "epoch": 0.88, - "grad_norm": 0.872607402488223, - "learning_rate": 7.265202635329272e-07, - "loss": 0.5209, + "epoch": 0.7, + "grad_norm": 1.1471298984080842, + "learning_rate": 4.230765863808537e-06, + "loss": 0.5929, "step": 15344 }, { - "epoch": 0.88, - "grad_norm": 0.33572217596314796, - "learning_rate": 7.258240703734832e-07, - "loss": 0.2534, + "epoch": 0.7, + "grad_norm": 0.301588946012003, + "learning_rate": 4.229550597861615e-06, + "loss": 0.274, "step": 15345 }, { - "epoch": 0.88, - "grad_norm": 0.36957113937852054, - "learning_rate": 7.251281983783532e-07, - "loss": 0.2541, + "epoch": 0.7, + "grad_norm": 0.5204227914579626, + "learning_rate": 4.2283354596668765e-06, + "loss": 0.2712, "step": 15346 }, { - "epoch": 0.88, - "grad_norm": 0.25658032069472986, - "learning_rate": 7.244326475716323e-07, - "loss": 0.1934, + "epoch": 0.71, + "grad_norm": 0.5353373222632442, + "learning_rate": 4.227120449251233e-06, + "loss": 0.2951, "step": 15347 }, { - "epoch": 0.88, - "grad_norm": 0.36518152555370853, - "learning_rate": 7.237374179774125e-07, - "loss": 0.2461, + "epoch": 0.71, + "grad_norm": 0.3597781096970731, + "learning_rate": 4.225905566641575e-06, + "loss": 0.2743, "step": 15348 }, { - "epoch": 0.88, - "grad_norm": 0.3860018083993852, - "learning_rate": 7.230425096197669e-07, - "loss": 0.2512, + "epoch": 0.71, + "grad_norm": 0.27071760758827923, + "learning_rate": 4.224690811864803e-06, + "loss": 0.1835, "step": 15349 }, { - "epoch": 0.88, - "grad_norm": 0.4407144779168672, - "learning_rate": 7.223479225227603e-07, - "loss": 0.3225, + "epoch": 0.71, + "grad_norm": 1.438326998759821, + "learning_rate": 4.223476184947813e-06, + "loss": 0.5187, "step": 15350 }, { - "epoch": 0.88, - "grad_norm": 0.8099477334987211, - "learning_rate": 7.216536567104449e-07, - "loss": 0.4107, + "epoch": 0.71, + "grad_norm": 0.6135019919869543, + "learning_rate": 4.222261685917489e-06, + "loss": 0.311, "step": 15351 }, { - "epoch": 0.88, - "grad_norm": 0.3715733078958953, - "learning_rate": 7.209597122068657e-07, - "loss": 0.1808, + "epoch": 0.71, + "grad_norm": 0.4365307279227308, + "learning_rate": 4.221047314800723e-06, + "loss": 0.3268, "step": 15352 }, { - "epoch": 0.88, - "grad_norm": 0.2045474274396735, - "learning_rate": 7.202660890360524e-07, - "loss": 0.2173, + "epoch": 0.71, + "grad_norm": 0.4219390302659312, + "learning_rate": 4.219833071624404e-06, + "loss": 0.2778, "step": 15353 }, { - "epoch": 0.88, - "grad_norm": 1.600767535273116, - "learning_rate": 7.195727872220248e-07, - "loss": 0.7834, + "epoch": 0.71, + "grad_norm": 0.6347707610036952, + "learning_rate": 4.218618956415406e-06, + "loss": 0.3517, "step": 15354 }, { - "epoch": 0.88, - "grad_norm": 0.2723621726814691, - "learning_rate": 7.188798067887926e-07, - "loss": 0.1995, + "epoch": 0.71, + "grad_norm": 0.24197152271275132, + "learning_rate": 4.217404969200615e-06, + "loss": 0.2025, "step": 15355 }, { - "epoch": 0.88, - "grad_norm": 0.4313450825328893, - "learning_rate": 7.181871477603542e-07, - "loss": 0.3322, + "epoch": 0.71, + "grad_norm": 1.4884478240152692, + "learning_rate": 4.2161911100069005e-06, + "loss": 0.3149, "step": 15356 }, { - "epoch": 0.88, - "grad_norm": 0.43795447255344067, - "learning_rate": 7.174948101606949e-07, - "loss": 0.3239, + "epoch": 0.71, + "grad_norm": 0.30795386481110154, + "learning_rate": 4.214977378861141e-06, + "loss": 0.2648, "step": 15357 }, { - "epoch": 0.88, - "grad_norm": 0.3489367764018725, - "learning_rate": 7.168027940137923e-07, - "loss": 0.1882, + "epoch": 0.71, + "grad_norm": 0.39820627860602775, + "learning_rate": 4.213763775790207e-06, + "loss": 0.3269, "step": 15358 }, { - "epoch": 0.88, - "grad_norm": 0.2626421008762479, - "learning_rate": 7.161110993436093e-07, - "loss": 0.1515, + "epoch": 0.71, + "grad_norm": 0.7240613409280784, + "learning_rate": 4.21255030082097e-06, + "loss": 0.2814, "step": 15359 }, { - "epoch": 0.88, - "grad_norm": 0.3860502345972539, - "learning_rate": 7.15419726174098e-07, - "loss": 0.2858, + "epoch": 0.71, + "grad_norm": 0.3081369324763965, + "learning_rate": 4.21133695398029e-06, + "loss": 0.207, "step": 15360 }, { - "epoch": 0.88, - "grad_norm": 0.36349650078670603, - "learning_rate": 7.147286745292049e-07, - "loss": 0.1971, + "epoch": 0.71, + "grad_norm": 0.24893359969260237, + "learning_rate": 4.210123735295025e-06, + "loss": 0.2571, "step": 15361 }, { - "epoch": 0.88, - "grad_norm": 0.44592750277136384, - "learning_rate": 7.140379444328571e-07, - "loss": 0.3327, + "epoch": 0.71, + "grad_norm": 1.5268217748586561, + "learning_rate": 4.208910644792047e-06, + "loss": 0.661, "step": 15362 }, { - "epoch": 0.88, - "grad_norm": 1.0136525434025818, - "learning_rate": 7.13347535908977e-07, - "loss": 0.5487, + "epoch": 0.71, + "grad_norm": 0.33942435288790757, + "learning_rate": 4.207697682498202e-06, + "loss": 0.2024, "step": 15363 }, { - "epoch": 0.88, - "grad_norm": 0.405578078064654, - "learning_rate": 7.126574489814719e-07, - "loss": 0.2968, + "epoch": 0.71, + "grad_norm": 0.587004253994649, + "learning_rate": 4.206484848440351e-06, + "loss": 0.3671, "step": 15364 }, { - "epoch": 0.88, - "grad_norm": 0.2241058466957011, - "learning_rate": 7.119676836742407e-07, - "loss": 0.1817, + "epoch": 0.71, + "grad_norm": 0.46543224748117207, + "learning_rate": 4.205272142645338e-06, + "loss": 0.3079, "step": 15365 }, { - "epoch": 0.88, - "grad_norm": 0.5356592030749281, - "learning_rate": 7.112782400111684e-07, - "loss": 0.2669, + "epoch": 0.71, + "grad_norm": 0.44760822139769224, + "learning_rate": 4.204059565140015e-06, + "loss": 0.1927, "step": 15366 }, { - "epoch": 0.88, - "grad_norm": 0.6030656386460477, - "learning_rate": 7.105891180161306e-07, - "loss": 0.3748, + "epoch": 0.71, + "grad_norm": 0.25538643422192153, + "learning_rate": 4.20284711595123e-06, + "loss": 0.1812, "step": 15367 }, { - "epoch": 0.88, - "grad_norm": 0.24970782740649353, - "learning_rate": 7.099003177129926e-07, - "loss": 0.2436, + "epoch": 0.71, + "grad_norm": 1.1585266365412363, + "learning_rate": 4.201634795105819e-06, + "loss": 0.5725, "step": 15368 }, { - "epoch": 0.88, - "grad_norm": 1.3865134035245326, - "learning_rate": 7.092118391256076e-07, - "loss": 0.6055, + "epoch": 0.71, + "grad_norm": 0.2781257863193359, + "learning_rate": 4.20042260263063e-06, + "loss": 0.2028, "step": 15369 }, { - "epoch": 0.88, - "grad_norm": 0.7176124074822394, - "learning_rate": 7.085236822778174e-07, - "loss": 0.2688, + "epoch": 0.71, + "grad_norm": 0.3864657368699799, + "learning_rate": 4.199210538552489e-06, + "loss": 0.3064, "step": 15370 }, { - "epoch": 0.88, - "grad_norm": 0.2343011862368419, - "learning_rate": 7.078358471934521e-07, - "loss": 0.1487, + "epoch": 0.71, + "grad_norm": 0.8209636737292687, + "learning_rate": 4.197998602898238e-06, + "loss": 0.4517, "step": 15371 }, { - "epoch": 0.88, - "grad_norm": 0.378432979445646, - "learning_rate": 7.071483338963303e-07, - "loss": 0.3006, + "epoch": 0.71, + "grad_norm": 0.17154239896339205, + "learning_rate": 4.196786795694708e-06, + "loss": 0.0639, "step": 15372 }, { - "epoch": 0.88, - "grad_norm": 0.48086836045064396, - "learning_rate": 7.064611424102641e-07, - "loss": 0.2961, + "epoch": 0.71, + "grad_norm": 0.24529799628577972, + "learning_rate": 4.195575116968722e-06, + "loss": 0.2418, "step": 15373 }, { - "epoch": 0.88, - "grad_norm": 0.37437277656526946, - "learning_rate": 7.057742727590478e-07, - "loss": 0.3221, + "epoch": 0.71, + "grad_norm": 1.3180280157563797, + "learning_rate": 4.1943635667471095e-06, + "loss": 0.5779, "step": 15374 }, { - "epoch": 0.88, - "grad_norm": 0.9436427928262109, - "learning_rate": 7.050877249664701e-07, - "loss": 0.3674, + "epoch": 0.71, + "grad_norm": 0.6785745788672353, + "learning_rate": 4.1931521450566905e-06, + "loss": 0.3442, "step": 15375 }, { - "epoch": 0.88, - "grad_norm": 0.2964656910026224, - "learning_rate": 7.04401499056302e-07, - "loss": 0.2569, + "epoch": 0.71, + "grad_norm": 0.4211542838609887, + "learning_rate": 4.191940851924291e-06, + "loss": 0.2396, "step": 15376 }, { - "epoch": 0.88, - "grad_norm": 0.3690822885989384, - "learning_rate": 7.037155950523123e-07, - "loss": 0.307, + "epoch": 0.71, + "grad_norm": 0.40857305891502943, + "learning_rate": 4.190729687376719e-06, + "loss": 0.3136, "step": 15377 }, { - "epoch": 0.88, - "grad_norm": 0.26884006202752997, - "learning_rate": 7.030300129782519e-07, - "loss": 0.131, + "epoch": 0.71, + "grad_norm": 0.6590719459801646, + "learning_rate": 4.189518651440793e-06, + "loss": 0.2766, "step": 15378 }, { - "epoch": 0.88, - "grad_norm": 0.40264916010642365, - "learning_rate": 7.023447528578631e-07, - "loss": 0.2621, + "epoch": 0.71, + "grad_norm": 0.2509263728055345, + "learning_rate": 4.188307744143328e-06, + "loss": 0.1544, "step": 15379 }, { - "epoch": 0.88, - "grad_norm": 0.35794945612338336, - "learning_rate": 7.016598147148735e-07, - "loss": 0.2974, + "epoch": 0.71, + "grad_norm": 1.2898751786942517, + "learning_rate": 4.1870969655111226e-06, + "loss": 0.6735, "step": 15380 }, { - "epoch": 0.88, - "grad_norm": 0.5066463148507542, - "learning_rate": 7.009751985730062e-07, - "loss": 0.2751, + "epoch": 0.71, + "grad_norm": 0.34253351753028477, + "learning_rate": 4.185886315570993e-06, + "loss": 0.2432, "step": 15381 }, { - "epoch": 0.88, - "grad_norm": 0.8604249835369419, - "learning_rate": 7.00290904455968e-07, - "loss": 0.3744, + "epoch": 0.71, + "grad_norm": 0.606438819113309, + "learning_rate": 4.184675794349733e-06, + "loss": 0.251, "step": 15382 }, { - "epoch": 0.88, - "grad_norm": 0.29377213308048267, - "learning_rate": 6.996069323874555e-07, - "loss": 0.2061, + "epoch": 0.71, + "grad_norm": 0.9874635922233967, + "learning_rate": 4.1834654018741465e-06, + "loss": 0.4552, "step": 15383 }, { - "epoch": 0.88, - "grad_norm": 0.30323452832902037, - "learning_rate": 6.989232823911551e-07, - "loss": 0.2308, + "epoch": 0.71, + "grad_norm": 0.5172631443327038, + "learning_rate": 4.182255138171032e-06, + "loss": 0.2206, "step": 15384 }, { - "epoch": 0.88, - "grad_norm": 0.7647519096570284, - "learning_rate": 6.982399544907403e-07, - "loss": 0.3747, + "epoch": 0.71, + "grad_norm": 0.2532865362489919, + "learning_rate": 4.181045003267179e-06, + "loss": 0.2291, "step": 15385 }, { - "epoch": 0.88, - "grad_norm": 0.36798900089852343, - "learning_rate": 6.975569487098766e-07, - "loss": 0.3002, + "epoch": 0.71, + "grad_norm": 0.3095413238829962, + "learning_rate": 4.179834997189385e-06, + "loss": 0.2067, "step": 15386 }, { - "epoch": 0.88, - "grad_norm": 1.3360407176215778, - "learning_rate": 6.968742650722172e-07, - "loss": 0.7616, + "epoch": 0.71, + "grad_norm": 0.6613421214412327, + "learning_rate": 4.178625119964427e-06, + "loss": 0.3376, "step": 15387 }, { - "epoch": 0.88, - "grad_norm": 0.2787925854981777, - "learning_rate": 6.961919036014009e-07, - "loss": 0.2048, + "epoch": 0.71, + "grad_norm": 0.3784133215651523, + "learning_rate": 4.177415371619105e-06, + "loss": 0.2773, "step": 15388 }, { - "epoch": 0.88, - "grad_norm": 0.4689798303850397, - "learning_rate": 6.955098643210578e-07, - "loss": 0.2941, + "epoch": 0.71, + "grad_norm": 0.39666457897435925, + "learning_rate": 4.176205752180195e-06, + "loss": 0.247, "step": 15389 }, { - "epoch": 0.88, - "grad_norm": 0.641487238466754, - "learning_rate": 6.94828147254809e-07, - "loss": 0.3726, + "epoch": 0.71, + "grad_norm": 0.9223204238789361, + "learning_rate": 4.174996261674473e-06, + "loss": 0.4359, "step": 15390 }, { - "epoch": 0.88, - "grad_norm": 0.2461102831506282, - "learning_rate": 6.941467524262613e-07, - "loss": 0.1119, + "epoch": 0.71, + "grad_norm": 0.30045969276387835, + "learning_rate": 4.17378690012872e-06, + "loss": 0.2161, "step": 15391 }, { - "epoch": 0.88, - "grad_norm": 0.25551098270018807, - "learning_rate": 6.934656798590122e-07, - "loss": 0.281, + "epoch": 0.71, + "grad_norm": 0.3236437430833355, + "learning_rate": 4.172577667569709e-06, + "loss": 0.2095, "step": 15392 }, { - "epoch": 0.88, - "grad_norm": 1.1202496115317333, - "learning_rate": 6.927849295766442e-07, - "loss": 0.5997, + "epoch": 0.71, + "grad_norm": 0.44752825766252413, + "learning_rate": 4.171368564024216e-06, + "loss": 0.3164, "step": 15393 }, { - "epoch": 0.88, - "grad_norm": 0.6445255900823864, - "learning_rate": 6.92104501602735e-07, - "loss": 0.0993, + "epoch": 0.71, + "grad_norm": 0.3802611335532879, + "learning_rate": 4.170159589519002e-06, + "loss": 0.272, "step": 15394 }, { - "epoch": 0.88, - "grad_norm": 0.40685012455812186, - "learning_rate": 6.91424395960848e-07, - "loss": 0.295, + "epoch": 0.71, + "grad_norm": 0.871832960552936, + "learning_rate": 4.168950744080835e-06, + "loss": 0.3656, "step": 15395 }, { - "epoch": 0.88, - "grad_norm": 0.3592962271783745, - "learning_rate": 6.907446126745332e-07, - "loss": 0.3047, + "epoch": 0.71, + "grad_norm": 0.4664369864390812, + "learning_rate": 4.1677420277364825e-06, + "loss": 0.2829, "step": 15396 }, { - "epoch": 0.88, - "grad_norm": 0.2749244052812137, - "learning_rate": 6.900651517673318e-07, - "loss": 0.1568, + "epoch": 0.71, + "grad_norm": 0.3139361129527563, + "learning_rate": 4.166533440512696e-06, + "loss": 0.2915, "step": 15397 }, { - "epoch": 0.88, - "grad_norm": 0.6044148011731777, - "learning_rate": 6.893860132627739e-07, - "loss": 0.3646, + "epoch": 0.71, + "grad_norm": 0.3306167116052387, + "learning_rate": 4.165324982436242e-06, + "loss": 0.1479, "step": 15398 }, { - "epoch": 0.88, - "grad_norm": 0.49796483369447875, - "learning_rate": 6.887071971843783e-07, - "loss": 0.2891, + "epoch": 0.71, + "grad_norm": 0.6097009457182574, + "learning_rate": 4.164116653533864e-06, + "loss": 0.3207, "step": 15399 }, { - "epoch": 0.88, - "grad_norm": 0.33803796040103634, - "learning_rate": 6.880287035556521e-07, - "loss": 0.2728, + "epoch": 0.71, + "grad_norm": 0.41968822457352534, + "learning_rate": 4.16290845383232e-06, + "loss": 0.3001, "step": 15400 }, { - "epoch": 0.88, - "grad_norm": 0.343782499945726, - "learning_rate": 6.873505324000895e-07, - "loss": 0.2097, + "epoch": 0.71, + "grad_norm": 0.3993425681926469, + "learning_rate": 4.161700383358359e-06, + "loss": 0.3501, "step": 15401 }, { - "epoch": 0.88, - "grad_norm": 0.6284160615889932, - "learning_rate": 6.866726837411797e-07, - "loss": 0.3594, + "epoch": 0.71, + "grad_norm": 0.2394965038905432, + "learning_rate": 4.160492442138722e-06, + "loss": 0.0629, "step": 15402 }, { - "epoch": 0.88, - "grad_norm": 0.3844903560072926, - "learning_rate": 6.859951576023937e-07, - "loss": 0.2492, + "epoch": 0.71, + "grad_norm": 0.44358453520715546, + "learning_rate": 4.159284630200158e-06, + "loss": 0.2908, "step": 15403 }, { - "epoch": 0.89, - "grad_norm": 0.22190970897036372, - "learning_rate": 6.853179540071963e-07, - "loss": 0.2188, + "epoch": 0.71, + "grad_norm": 0.37662599930425195, + "learning_rate": 4.1580769475693946e-06, + "loss": 0.2618, "step": 15404 }, { - "epoch": 0.89, - "grad_norm": 0.46152722721730727, - "learning_rate": 6.846410729790342e-07, - "loss": 0.2762, + "epoch": 0.71, + "grad_norm": 0.2931631349319821, + "learning_rate": 4.156869394273186e-06, + "loss": 0.1786, "step": 15405 }, { - "epoch": 0.89, - "grad_norm": 0.6960344758067857, - "learning_rate": 6.839645145413543e-07, - "loss": 0.3359, + "epoch": 0.71, + "grad_norm": 0.4016340172324641, + "learning_rate": 4.1556619703382564e-06, + "loss": 0.3059, "step": 15406 }, { - "epoch": 0.89, - "grad_norm": 0.3315547930125616, - "learning_rate": 6.832882787175809e-07, - "loss": 0.2416, + "epoch": 0.71, + "grad_norm": 1.43793740258636, + "learning_rate": 4.154454675791333e-06, + "loss": 0.632, "step": 15407 }, { - "epoch": 0.89, - "grad_norm": 0.34418183719622625, - "learning_rate": 6.826123655311356e-07, - "loss": 0.2972, + "epoch": 0.71, + "grad_norm": 0.35903608230692824, + "learning_rate": 4.153247510659151e-06, + "loss": 0.2356, "step": 15408 }, { - "epoch": 0.89, - "grad_norm": 0.7768735537156152, - "learning_rate": 6.819367750054217e-07, - "loss": 0.4253, + "epoch": 0.71, + "grad_norm": 0.3487241411859696, + "learning_rate": 4.1520404749684325e-06, + "loss": 0.2438, "step": 15409 }, { - "epoch": 0.89, - "grad_norm": 0.3022317412638133, - "learning_rate": 6.812615071638363e-07, - "loss": 0.2187, + "epoch": 0.71, + "grad_norm": 0.6142249218718109, + "learning_rate": 4.1508335687459065e-06, + "loss": 0.3685, "step": 15410 }, { - "epoch": 0.89, - "grad_norm": 0.30097106667699985, - "learning_rate": 6.805865620297659e-07, - "loss": 0.1357, + "epoch": 0.71, + "grad_norm": 0.24225801761870494, + "learning_rate": 4.149626792018283e-06, + "loss": 0.1132, "step": 15411 }, { - "epoch": 0.89, - "grad_norm": 0.3699431546810101, - "learning_rate": 6.799119396265807e-07, - "loss": 0.2815, + "epoch": 0.71, + "grad_norm": 0.391653656709187, + "learning_rate": 4.1484201448122845e-06, + "loss": 0.308, "step": 15412 }, { - "epoch": 0.89, - "grad_norm": 0.36422006459287853, - "learning_rate": 6.792376399776457e-07, - "loss": 0.2794, + "epoch": 0.71, + "grad_norm": 0.2899521043333745, + "learning_rate": 4.147213627154627e-06, + "loss": 0.2276, "step": 15413 }, { - "epoch": 0.89, - "grad_norm": 0.7376350234779959, - "learning_rate": 6.785636631063075e-07, - "loss": 0.2866, + "epoch": 0.71, + "grad_norm": 1.2284209751430595, + "learning_rate": 4.146007239072016e-06, + "loss": 0.5107, "step": 15414 }, { - "epoch": 0.89, - "grad_norm": 0.4651567840856827, - "learning_rate": 6.778900090359119e-07, - "loss": 0.3252, + "epoch": 0.71, + "grad_norm": 0.3620178294227037, + "learning_rate": 4.144800980591166e-06, + "loss": 0.2105, "step": 15415 }, { - "epoch": 0.89, - "grad_norm": 0.3479007390754298, - "learning_rate": 6.772166777897838e-07, - "loss": 0.2525, + "epoch": 0.71, + "grad_norm": 0.4992675960901863, + "learning_rate": 4.143594851738775e-06, + "loss": 0.3574, "step": 15416 }, { - "epoch": 0.89, - "grad_norm": 0.20645592583378722, - "learning_rate": 6.765436693912408e-07, - "loss": 0.1553, + "epoch": 0.71, + "grad_norm": 0.4902339546530649, + "learning_rate": 4.14238885254155e-06, + "loss": 0.3668, "step": 15417 }, { - "epoch": 0.89, - "grad_norm": 0.5925146855129958, - "learning_rate": 6.758709838635879e-07, - "loss": 0.3312, + "epoch": 0.71, + "grad_norm": 0.33032145933374635, + "learning_rate": 4.141182983026194e-06, + "loss": 0.1801, "step": 15418 }, { - "epoch": 0.89, - "grad_norm": 0.36018604511633256, - "learning_rate": 6.751986212301242e-07, - "loss": 0.2976, + "epoch": 0.71, + "grad_norm": 0.25376341153793225, + "learning_rate": 4.139977243219395e-06, + "loss": 0.1738, "step": 15419 }, { - "epoch": 0.89, - "grad_norm": 0.34434040713944425, - "learning_rate": 6.745265815141311e-07, - "loss": 0.2374, + "epoch": 0.71, + "grad_norm": 0.3655073687889778, + "learning_rate": 4.138771633147856e-06, + "loss": 0.2622, "step": 15420 }, { - "epoch": 0.89, - "grad_norm": 1.2688158338463227, - "learning_rate": 6.73854864738881e-07, - "loss": 0.5905, + "epoch": 0.71, + "grad_norm": 0.35894312552821245, + "learning_rate": 4.1375661528382586e-06, + "loss": 0.2221, "step": 15421 }, { - "epoch": 0.89, - "grad_norm": 0.3130992444743742, - "learning_rate": 6.731834709276353e-07, - "loss": 0.2281, + "epoch": 0.71, + "grad_norm": 0.650551733587448, + "learning_rate": 4.136360802317301e-06, + "loss": 0.3856, "step": 15422 }, { - "epoch": 0.89, - "grad_norm": 0.373687076761885, - "learning_rate": 6.725124001036454e-07, - "loss": 0.2071, + "epoch": 0.71, + "grad_norm": 0.7632758325395279, + "learning_rate": 4.1351555816116615e-06, + "loss": 0.4784, "step": 15423 }, { - "epoch": 0.89, - "grad_norm": 0.49870489961270104, - "learning_rate": 6.718416522901506e-07, - "loss": 0.3269, + "epoch": 0.71, + "grad_norm": 0.3443741227322228, + "learning_rate": 4.133950490748028e-06, + "loss": 0.2125, "step": 15424 }, { - "epoch": 0.89, - "grad_norm": 0.23765638274613635, - "learning_rate": 6.711712275103776e-07, - "loss": 0.221, + "epoch": 0.71, + "grad_norm": 0.2683293507697789, + "learning_rate": 4.132745529753073e-06, + "loss": 0.2085, "step": 15425 }, { - "epoch": 0.89, - "grad_norm": 1.3561355023422061, - "learning_rate": 6.705011257875449e-07, - "loss": 0.5169, + "epoch": 0.71, + "grad_norm": 0.8176282824944413, + "learning_rate": 4.131540698653478e-06, + "loss": 0.4114, "step": 15426 }, { - "epoch": 0.89, - "grad_norm": 0.3737441156518405, - "learning_rate": 6.698313471448547e-07, - "loss": 0.2337, + "epoch": 0.71, + "grad_norm": 0.3227500736794906, + "learning_rate": 4.130335997475918e-06, + "loss": 0.2658, "step": 15427 }, { - "epoch": 0.89, - "grad_norm": 0.28412374842415977, - "learning_rate": 6.691618916055053e-07, - "loss": 0.2634, + "epoch": 0.71, + "grad_norm": 0.3695082738187614, + "learning_rate": 4.1291314262470595e-06, + "loss": 0.2474, "step": 15428 }, { - "epoch": 0.89, - "grad_norm": 1.260617821916191, - "learning_rate": 6.684927591926793e-07, - "loss": 0.6065, + "epoch": 0.71, + "grad_norm": 1.3294382987845506, + "learning_rate": 4.127926984993575e-06, + "loss": 0.6307, "step": 15429 }, { - "epoch": 0.89, - "grad_norm": 0.416719995810165, - "learning_rate": 6.678239499295469e-07, - "loss": 0.1985, + "epoch": 0.71, + "grad_norm": 0.35900728279375327, + "learning_rate": 4.126722673742125e-06, + "loss": 0.2596, "step": 15430 }, { - "epoch": 0.89, - "grad_norm": 0.3731138532303315, - "learning_rate": 6.671554638392696e-07, - "loss": 0.2707, + "epoch": 0.71, + "grad_norm": 0.2600538022535034, + "learning_rate": 4.125518492519375e-06, + "loss": 0.1061, "step": 15431 }, { - "epoch": 0.89, - "grad_norm": 0.26428445121349753, - "learning_rate": 6.664873009449979e-07, - "loss": 0.2391, + "epoch": 0.71, + "grad_norm": 0.41285078363524563, + "learning_rate": 4.124314441351985e-06, + "loss": 0.3172, "step": 15432 }, { - "epoch": 0.89, - "grad_norm": 0.4735796217441745, - "learning_rate": 6.658194612698687e-07, - "loss": 0.2467, + "epoch": 0.71, + "grad_norm": 0.3497006973371404, + "learning_rate": 4.123110520266609e-06, + "loss": 0.2662, "step": 15433 }, { - "epoch": 0.89, - "grad_norm": 0.3392053344013183, - "learning_rate": 6.651519448370092e-07, - "loss": 0.2764, + "epoch": 0.71, + "grad_norm": 0.854000184449874, + "learning_rate": 4.121906729289901e-06, + "loss": 0.2759, "step": 15434 }, { - "epoch": 0.89, - "grad_norm": 0.4614700653109635, - "learning_rate": 6.644847516695385e-07, - "loss": 0.3329, + "epoch": 0.71, + "grad_norm": 1.0324275323190373, + "learning_rate": 4.120703068448515e-06, + "loss": 0.7029, "step": 15435 }, { - "epoch": 0.89, - "grad_norm": 0.3974557145871179, - "learning_rate": 6.638178817905594e-07, - "loss": 0.2162, + "epoch": 0.71, + "grad_norm": 0.3837020625415799, + "learning_rate": 4.119499537769094e-06, + "loss": 0.2517, "step": 15436 }, { - "epoch": 0.89, - "grad_norm": 0.31729301878064864, - "learning_rate": 6.631513352231644e-07, - "loss": 0.2351, + "epoch": 0.71, + "grad_norm": 0.21799785704754804, + "learning_rate": 4.118296137278289e-06, + "loss": 0.1689, "step": 15437 }, { - "epoch": 0.89, - "grad_norm": 0.6014970938156791, - "learning_rate": 6.624851119904385e-07, - "loss": 0.2425, + "epoch": 0.71, + "grad_norm": 0.6904432865575739, + "learning_rate": 4.117092867002731e-06, + "loss": 0.3665, "step": 15438 }, { - "epoch": 0.89, - "grad_norm": 0.46471253231576026, - "learning_rate": 6.618192121154488e-07, - "loss": 0.3678, + "epoch": 0.71, + "grad_norm": 0.4418088612818454, + "learning_rate": 4.115889726969075e-06, + "loss": 0.2674, "step": 15439 }, { - "epoch": 0.89, - "grad_norm": 0.3236888175086646, - "learning_rate": 6.611536356212612e-07, - "loss": 0.2154, + "epoch": 0.71, + "grad_norm": 0.4133079117370439, + "learning_rate": 4.114686717203945e-06, + "loss": 0.3097, "step": 15440 }, { - "epoch": 0.89, - "grad_norm": 0.5426087946165549, - "learning_rate": 6.604883825309205e-07, - "loss": 0.3937, + "epoch": 0.71, + "grad_norm": 0.47971156359274614, + "learning_rate": 4.113483837733982e-06, + "loss": 0.2939, "step": 15441 }, { - "epoch": 0.89, - "grad_norm": 1.5414638027935905, - "learning_rate": 6.598234528674663e-07, - "loss": 0.6158, + "epoch": 0.71, + "grad_norm": 0.3467174072778784, + "learning_rate": 4.112281088585811e-06, + "loss": 0.2453, "step": 15442 }, { - "epoch": 0.89, - "grad_norm": 0.18180585805377175, - "learning_rate": 6.591588466539222e-07, - "loss": 0.1348, + "epoch": 0.71, + "grad_norm": 0.7640903404353571, + "learning_rate": 4.111078469786062e-06, + "loss": 0.4084, "step": 15443 }, { - "epoch": 0.89, - "grad_norm": 0.33920005873970366, - "learning_rate": 6.584945639133067e-07, - "loss": 0.299, + "epoch": 0.71, + "grad_norm": 0.4200625396089796, + "learning_rate": 4.109875981361363e-06, + "loss": 0.263, "step": 15444 }, { - "epoch": 0.89, - "grad_norm": 1.3235160572056892, - "learning_rate": 6.578306046686234e-07, - "loss": 0.6165, + "epoch": 0.71, + "grad_norm": 0.27572537125406327, + "learning_rate": 4.1086736233383285e-06, + "loss": 0.2005, "step": 15445 }, { - "epoch": 0.89, - "grad_norm": 0.31586260327256654, - "learning_rate": 6.57166968942865e-07, - "loss": 0.2081, + "epoch": 0.71, + "grad_norm": 0.557137537019506, + "learning_rate": 4.107471395743586e-06, + "loss": 0.2743, "step": 15446 }, { - "epoch": 0.89, - "grad_norm": 0.4470041807775742, - "learning_rate": 6.565036567590099e-07, - "loss": 0.3292, + "epoch": 0.71, + "grad_norm": 1.02162319936494, + "learning_rate": 4.106269298603744e-06, + "loss": 0.5312, "step": 15447 }, { - "epoch": 0.89, - "grad_norm": 0.50546493909433, - "learning_rate": 6.558406681400342e-07, - "loss": 0.3378, + "epoch": 0.71, + "grad_norm": 0.2934550001426496, + "learning_rate": 4.105067331945419e-06, + "loss": 0.2416, "step": 15448 }, { - "epoch": 0.89, - "grad_norm": 0.33419202257022956, - "learning_rate": 6.55178003108894e-07, - "loss": 0.2611, + "epoch": 0.71, + "grad_norm": 0.5467428592087814, + "learning_rate": 4.103865495795225e-06, + "loss": 0.3415, "step": 15449 }, { - "epoch": 0.89, - "grad_norm": 0.16134420138718436, - "learning_rate": 6.545156616885373e-07, - "loss": 0.071, + "epoch": 0.71, + "grad_norm": 0.5080358849625232, + "learning_rate": 4.102663790179764e-06, + "loss": 0.2317, "step": 15450 }, { - "epoch": 0.89, - "grad_norm": 0.32891260695083896, - "learning_rate": 6.538536439019016e-07, - "loss": 0.3021, + "epoch": 0.71, + "grad_norm": 0.26077960174848236, + "learning_rate": 4.1014622151256415e-06, + "loss": 0.1878, "step": 15451 }, { - "epoch": 0.89, - "grad_norm": 0.37903738502174683, - "learning_rate": 6.531919497719097e-07, - "loss": 0.2468, + "epoch": 0.71, + "grad_norm": 0.5545864511694208, + "learning_rate": 4.100260770659461e-06, + "loss": 0.3488, "step": 15452 }, { - "epoch": 0.89, - "grad_norm": 0.4409361141174233, - "learning_rate": 6.52530579321482e-07, - "loss": 0.2674, + "epoch": 0.71, + "grad_norm": 0.5275921970827687, + "learning_rate": 4.0990594568078235e-06, + "loss": 0.3706, "step": 15453 }, { - "epoch": 0.89, - "grad_norm": 0.5984776888889717, - "learning_rate": 6.51869532573517e-07, - "loss": 0.2518, + "epoch": 0.71, + "grad_norm": 0.3254175683471316, + "learning_rate": 4.0978582735973225e-06, + "loss": 0.1988, "step": 15454 }, { - "epoch": 0.89, - "grad_norm": 0.37448200080698674, - "learning_rate": 6.512088095509095e-07, - "loss": 0.2645, + "epoch": 0.71, + "grad_norm": 0.8330488177349571, + "learning_rate": 4.0966572210545445e-06, + "loss": 0.4093, "step": 15455 }, { - "epoch": 0.89, - "grad_norm": 0.24355986018680098, - "learning_rate": 6.505484102765358e-07, - "loss": 0.1973, + "epoch": 0.71, + "grad_norm": 0.42780915546688275, + "learning_rate": 4.095456299206092e-06, + "loss": 0.2922, "step": 15456 }, { - "epoch": 0.89, - "grad_norm": 0.8738256093138846, - "learning_rate": 6.498883347732709e-07, - "loss": 0.4653, + "epoch": 0.71, + "grad_norm": 0.2642948363764565, + "learning_rate": 4.094255508078544e-06, + "loss": 0.1304, "step": 15457 }, { - "epoch": 0.89, - "grad_norm": 0.39720267810411064, - "learning_rate": 6.492285830639711e-07, - "loss": 0.2898, + "epoch": 0.71, + "grad_norm": 0.38788023576313707, + "learning_rate": 4.093054847698489e-06, + "loss": 0.2502, "step": 15458 }, { - "epoch": 0.89, - "grad_norm": 0.284470585352059, - "learning_rate": 6.485691551714835e-07, - "loss": 0.2527, + "epoch": 0.71, + "grad_norm": 0.5080623078672286, + "learning_rate": 4.091854318092504e-06, + "loss": 0.3538, "step": 15459 }, { - "epoch": 0.89, - "grad_norm": 1.3712482470378675, - "learning_rate": 6.47910051118642e-07, - "loss": 0.5142, + "epoch": 0.71, + "grad_norm": 0.31306630256084184, + "learning_rate": 4.0906539192871695e-06, + "loss": 0.1995, "step": 15460 }, { - "epoch": 0.89, - "grad_norm": 0.3337385065605379, - "learning_rate": 6.472512709282752e-07, - "loss": 0.2579, + "epoch": 0.71, + "grad_norm": 0.6696240704661317, + "learning_rate": 4.0894536513090655e-06, + "loss": 0.3289, "step": 15461 }, { - "epoch": 0.89, - "grad_norm": 0.2548757339390069, - "learning_rate": 6.465928146231937e-07, - "loss": 0.1738, + "epoch": 0.71, + "grad_norm": 1.3234777470300474, + "learning_rate": 4.0882535141847566e-06, + "loss": 0.5747, "step": 15462 }, { - "epoch": 0.89, - "grad_norm": 0.33354702595338487, - "learning_rate": 6.459346822262014e-07, - "loss": 0.2561, + "epoch": 0.71, + "grad_norm": 0.2432404743648824, + "learning_rate": 4.087053507940823e-06, + "loss": 0.1579, "step": 15463 }, { - "epoch": 0.89, - "grad_norm": 0.31942036759526615, - "learning_rate": 6.45276873760089e-07, - "loss": 0.2559, + "epoch": 0.71, + "grad_norm": 0.3150487963413936, + "learning_rate": 4.08585363260382e-06, + "loss": 0.2539, "step": 15464 }, { - "epoch": 0.89, - "grad_norm": 0.8281221679177381, - "learning_rate": 6.44619389247636e-07, - "loss": 0.4434, + "epoch": 0.71, + "grad_norm": 1.3590932860804303, + "learning_rate": 4.084653888200319e-06, + "loss": 0.7879, "step": 15465 }, { - "epoch": 0.89, - "grad_norm": 1.4513030484506004, - "learning_rate": 6.439622287116121e-07, - "loss": 0.2286, + "epoch": 0.71, + "grad_norm": 0.42716404632076094, + "learning_rate": 4.083454274756881e-06, + "loss": 0.2983, "step": 15466 }, { - "epoch": 0.89, - "grad_norm": 0.2902777682810745, - "learning_rate": 6.433053921747734e-07, - "loss": 0.2554, + "epoch": 0.71, + "grad_norm": 0.5275065167875603, + "learning_rate": 4.082254792300061e-06, + "loss": 0.2613, "step": 15467 }, { - "epoch": 0.89, - "grad_norm": 0.22307685318256087, - "learning_rate": 6.42648879659864e-07, - "loss": 0.1937, + "epoch": 0.71, + "grad_norm": 0.3870188429135705, + "learning_rate": 4.0810554408564154e-06, + "loss": 0.3063, "step": 15468 }, { - "epoch": 0.89, - "grad_norm": 0.6450982193385442, - "learning_rate": 6.419926911896246e-07, - "loss": 0.2968, + "epoch": 0.71, + "grad_norm": 0.29796304654025724, + "learning_rate": 4.079856220452498e-06, + "loss": 0.2143, "step": 15469 }, { - "epoch": 0.89, - "grad_norm": 0.40342553770057044, - "learning_rate": 6.413368267867748e-07, - "loss": 0.2651, + "epoch": 0.71, + "grad_norm": 0.26241400572761425, + "learning_rate": 4.078657131114861e-06, + "loss": 0.1331, "step": 15470 }, { - "epoch": 0.89, - "grad_norm": 0.3625069981846449, - "learning_rate": 6.406812864740286e-07, - "loss": 0.3225, + "epoch": 0.71, + "grad_norm": 0.4821450322626492, + "learning_rate": 4.07745817287005e-06, + "loss": 0.3647, "step": 15471 }, { - "epoch": 0.89, - "grad_norm": 0.5406013839654554, - "learning_rate": 6.400260702740857e-07, - "loss": 0.2295, + "epoch": 0.71, + "grad_norm": 0.33132628009783577, + "learning_rate": 4.0762593457445975e-06, + "loss": 0.2662, "step": 15472 }, { - "epoch": 0.89, - "grad_norm": 0.3813327291140086, - "learning_rate": 6.39371178209639e-07, - "loss": 0.2555, + "epoch": 0.71, + "grad_norm": 0.6355957250246723, + "learning_rate": 4.075060649765062e-06, + "loss": 0.2624, "step": 15473 }, { - "epoch": 0.89, - "grad_norm": 0.248468868214602, - "learning_rate": 6.387166103033659e-07, - "loss": 0.1838, + "epoch": 0.71, + "grad_norm": 1.359888500724607, + "learning_rate": 4.07386208495797e-06, + "loss": 0.526, "step": 15474 }, { - "epoch": 0.89, - "grad_norm": 0.3369238767144438, - "learning_rate": 6.38062366577934e-07, - "loss": 0.2943, + "epoch": 0.71, + "grad_norm": 0.3749773361850086, + "learning_rate": 4.072663651349862e-06, + "loss": 0.2257, "step": 15475 }, { - "epoch": 0.89, - "grad_norm": 0.49041949518461936, - "learning_rate": 6.374084470559993e-07, - "loss": 0.2287, + "epoch": 0.71, + "grad_norm": 0.23467209216856855, + "learning_rate": 4.071465348967265e-06, + "loss": 0.2058, "step": 15476 }, { - "epoch": 0.89, - "grad_norm": 0.5345573593990525, - "learning_rate": 6.367548517602062e-07, - "loss": 0.3918, + "epoch": 0.71, + "grad_norm": 0.644663419329526, + "learning_rate": 4.070267177836712e-06, + "loss": 0.3821, "step": 15477 }, { - "epoch": 0.89, - "grad_norm": 1.267321575127367, - "learning_rate": 6.36101580713191e-07, - "loss": 0.4503, + "epoch": 0.71, + "grad_norm": 0.5885374550508444, + "learning_rate": 4.069069137984732e-06, + "loss": 0.304, "step": 15478 }, { - "epoch": 0.89, - "grad_norm": 0.24989474901885625, - "learning_rate": 6.354486339375765e-07, - "loss": 0.2039, + "epoch": 0.71, + "grad_norm": 0.4216145601135524, + "learning_rate": 4.06787122943784e-06, + "loss": 0.3328, "step": 15479 }, { - "epoch": 0.89, - "grad_norm": 0.30215440765844515, - "learning_rate": 6.347960114559726e-07, - "loss": 0.2355, + "epoch": 0.71, + "grad_norm": 0.375932711090284, + "learning_rate": 4.066673452222566e-06, + "loss": 0.2428, "step": 15480 }, { - "epoch": 0.89, - "grad_norm": 0.47055963128076644, - "learning_rate": 6.341437132909778e-07, - "loss": 0.2552, + "epoch": 0.71, + "grad_norm": 0.6334867103430766, + "learning_rate": 4.065475806365415e-06, + "loss": 0.321, "step": 15481 }, { - "epoch": 0.89, - "grad_norm": 0.28025144025055243, - "learning_rate": 6.334917394651863e-07, - "loss": 0.1913, + "epoch": 0.71, + "grad_norm": 0.2270104480960671, + "learning_rate": 4.064278291892918e-06, + "loss": 0.1974, "step": 15482 }, { - "epoch": 0.89, - "grad_norm": 0.47105549542319564, - "learning_rate": 6.328400900011722e-07, - "loss": 0.3661, + "epoch": 0.71, + "grad_norm": 0.5895690351772647, + "learning_rate": 4.063080908831578e-06, + "loss": 0.282, "step": 15483 }, { - "epoch": 0.89, - "grad_norm": 0.5301274686641892, - "learning_rate": 6.321887649215031e-07, - "loss": 0.3488, + "epoch": 0.71, + "grad_norm": 0.3362356554604297, + "learning_rate": 4.061883657207902e-06, + "loss": 0.2687, "step": 15484 }, { - "epoch": 0.89, - "grad_norm": 0.34412718839375167, - "learning_rate": 6.31537764248733e-07, - "loss": 0.1598, + "epoch": 0.71, + "grad_norm": 0.5616330307499463, + "learning_rate": 4.060686537048398e-06, + "loss": 0.3591, "step": 15485 }, { - "epoch": 0.89, - "grad_norm": 0.49793609859674237, - "learning_rate": 6.308870880054085e-07, - "loss": 0.3045, + "epoch": 0.71, + "grad_norm": 1.2092849394172236, + "learning_rate": 4.05948954837957e-06, + "loss": 0.2435, "step": 15486 }, { - "epoch": 0.89, - "grad_norm": 0.2893282719469128, - "learning_rate": 6.302367362140616e-07, - "loss": 0.2616, + "epoch": 0.71, + "grad_norm": 0.280041169211778, + "learning_rate": 4.058292691227922e-06, + "loss": 0.197, "step": 15487 }, { - "epoch": 0.89, - "grad_norm": 0.34383649815869993, - "learning_rate": 6.295867088972141e-07, - "loss": 0.2231, + "epoch": 0.71, + "grad_norm": 0.32558278272001023, + "learning_rate": 4.057095965619943e-06, + "loss": 0.2887, "step": 15488 }, { - "epoch": 0.89, - "grad_norm": 0.3281453009232224, - "learning_rate": 6.289370060773748e-07, - "loss": 0.2295, + "epoch": 0.71, + "grad_norm": 0.5709600545498836, + "learning_rate": 4.0558993715821335e-06, + "loss": 0.2716, "step": 15489 }, { - "epoch": 0.89, - "grad_norm": 0.536206100347878, - "learning_rate": 6.282876277770433e-07, - "loss": 0.2901, + "epoch": 0.71, + "grad_norm": 0.40553544049211937, + "learning_rate": 4.054702909140982e-06, + "loss": 0.2216, "step": 15490 }, { - "epoch": 0.89, - "grad_norm": 0.3532249976082039, - "learning_rate": 6.276385740187097e-07, - "loss": 0.258, + "epoch": 0.71, + "grad_norm": 0.5976325876193552, + "learning_rate": 4.053506578322976e-06, + "loss": 0.3674, "step": 15491 }, { - "epoch": 0.89, - "grad_norm": 0.3603329578354436, - "learning_rate": 6.26989844824849e-07, - "loss": 0.2514, + "epoch": 0.71, + "grad_norm": 0.3837044891064192, + "learning_rate": 4.052310379154607e-06, + "loss": 0.28, "step": 15492 }, { - "epoch": 0.89, - "grad_norm": 0.9432767223998106, - "learning_rate": 6.263414402179269e-07, - "loss": 0.402, + "epoch": 0.71, + "grad_norm": 0.8479501477857144, + "learning_rate": 4.051114311662351e-06, + "loss": 0.1418, "step": 15493 }, { - "epoch": 0.89, - "grad_norm": 0.30261911972964056, - "learning_rate": 6.256933602203963e-07, - "loss": 0.2115, + "epoch": 0.71, + "grad_norm": 0.32227656789213904, + "learning_rate": 4.04991837587269e-06, + "loss": 0.2179, "step": 15494 }, { - "epoch": 0.89, - "grad_norm": 0.23543770198075897, - "learning_rate": 6.250456048547027e-07, - "loss": 0.2335, + "epoch": 0.71, + "grad_norm": 0.5199745669126264, + "learning_rate": 4.048722571812105e-06, + "loss": 0.3449, "step": 15495 }, { - "epoch": 0.89, - "grad_norm": 0.4415768113353055, - "learning_rate": 6.243981741432769e-07, - "loss": 0.2614, + "epoch": 0.71, + "grad_norm": 0.39538358329729445, + "learning_rate": 4.047526899507063e-06, + "loss": 0.2135, "step": 15496 }, { - "epoch": 0.89, - "grad_norm": 0.5812569473819925, - "learning_rate": 6.23751068108539e-07, - "loss": 0.3034, + "epoch": 0.71, + "grad_norm": 0.2966929622529681, + "learning_rate": 4.0463313589840415e-06, + "loss": 0.2312, "step": 15497 }, { - "epoch": 0.89, - "grad_norm": 0.36478079392776935, - "learning_rate": 6.231042867728987e-07, - "loss": 0.2275, + "epoch": 0.71, + "grad_norm": 1.3594233955769435, + "learning_rate": 4.0451359502694986e-06, + "loss": 0.5296, "step": 15498 }, { - "epoch": 0.89, - "grad_norm": 0.3750949965379845, - "learning_rate": 6.224578301587536e-07, - "loss": 0.29, + "epoch": 0.71, + "grad_norm": 0.42253517739176183, + "learning_rate": 4.043940673389913e-06, + "loss": 0.1968, "step": 15499 }, { - "epoch": 0.89, - "grad_norm": 0.266345866146398, - "learning_rate": 6.218116982884903e-07, - "loss": 0.2056, + "epoch": 0.71, + "grad_norm": 0.2702509296339678, + "learning_rate": 4.042745528371741e-06, + "loss": 0.2456, "step": 15500 }, { - "epoch": 0.89, - "grad_norm": 0.5182817369574994, - "learning_rate": 6.211658911844854e-07, - "loss": 0.3603, + "epoch": 0.71, + "grad_norm": 0.6860244995469101, + "learning_rate": 4.041550515241438e-06, + "loss": 0.3879, "step": 15501 }, { - "epoch": 0.89, - "grad_norm": 0.32107800900252353, - "learning_rate": 6.205204088690997e-07, - "loss": 0.1834, + "epoch": 0.71, + "grad_norm": 0.8255041134032126, + "learning_rate": 4.040355634025463e-06, + "loss": 0.5485, "step": 15502 }, { - "epoch": 0.89, - "grad_norm": 0.3540271412785568, - "learning_rate": 6.198752513646911e-07, - "loss": 0.2723, + "epoch": 0.71, + "grad_norm": 0.2775937366121965, + "learning_rate": 4.039160884750271e-06, + "loss": 0.1719, "step": 15503 }, { - "epoch": 0.89, - "grad_norm": 0.5177309374569469, - "learning_rate": 6.192304186935993e-07, - "loss": 0.3293, + "epoch": 0.71, + "grad_norm": 0.37843353911066774, + "learning_rate": 4.0379662674423145e-06, + "loss": 0.2971, "step": 15504 }, { - "epoch": 0.89, - "grad_norm": 0.8842981293585628, - "learning_rate": 6.185859108781544e-07, - "loss": 0.2621, + "epoch": 0.71, + "grad_norm": 0.656048816343922, + "learning_rate": 4.036771782128036e-06, + "loss": 0.3371, "step": 15505 }, { - "epoch": 0.89, - "grad_norm": 0.8063127372023199, - "learning_rate": 6.179417279406752e-07, - "loss": 0.5123, + "epoch": 0.71, + "grad_norm": 0.4527974437626401, + "learning_rate": 4.035577428833886e-06, + "loss": 0.253, "step": 15506 }, { - "epoch": 0.89, - "grad_norm": 0.2330689085737917, - "learning_rate": 6.172978699034715e-07, - "loss": 0.2458, + "epoch": 0.71, + "grad_norm": 0.5442523370270513, + "learning_rate": 4.034383207586299e-06, + "loss": 0.3453, "step": 15507 }, { - "epoch": 0.89, - "grad_norm": 0.2998705740226622, - "learning_rate": 6.166543367888389e-07, - "loss": 0.1521, + "epoch": 0.71, + "grad_norm": 0.38580761851004725, + "learning_rate": 4.033189118411719e-06, + "loss": 0.273, "step": 15508 }, { - "epoch": 0.89, - "grad_norm": 0.5991385238658149, - "learning_rate": 6.160111286190629e-07, - "loss": 0.3678, + "epoch": 0.71, + "grad_norm": 0.2630748879034027, + "learning_rate": 4.031995161336584e-06, + "loss": 0.1789, "step": 15509 }, { - "epoch": 0.89, - "grad_norm": 0.41892626382614, - "learning_rate": 6.153682454164167e-07, - "loss": 0.2752, + "epoch": 0.71, + "grad_norm": 0.6297927967147808, + "learning_rate": 4.030801336387321e-06, + "loss": 0.2493, "step": 15510 }, { - "epoch": 0.89, - "grad_norm": 0.38775365797584355, - "learning_rate": 6.14725687203167e-07, - "loss": 0.2397, + "epoch": 0.71, + "grad_norm": 0.36574754655553965, + "learning_rate": 4.029607643590363e-06, + "loss": 0.2982, "step": 15511 }, { - "epoch": 0.89, - "grad_norm": 0.6237671895525037, - "learning_rate": 6.140834540015617e-07, - "loss": 0.3729, + "epoch": 0.71, + "grad_norm": 0.37282857909359957, + "learning_rate": 4.028414082972141e-06, + "loss": 0.2222, "step": 15512 }, { - "epoch": 0.89, - "grad_norm": 0.29434266789911306, - "learning_rate": 6.134415458338439e-07, - "loss": 0.2485, + "epoch": 0.71, + "grad_norm": 0.8702055671141345, + "learning_rate": 4.027220654559072e-06, + "loss": 0.5004, "step": 15513 }, { - "epoch": 0.89, - "grad_norm": 0.44565125934806743, - "learning_rate": 6.127999627222414e-07, - "loss": 0.238, + "epoch": 0.71, + "grad_norm": 1.2799421300732114, + "learning_rate": 4.026027358377584e-06, + "loss": 0.6382, "step": 15514 }, { - "epoch": 0.89, - "grad_norm": 0.28957696302459607, - "learning_rate": 6.121587046889709e-07, - "loss": 0.2293, + "epoch": 0.71, + "grad_norm": 0.2674578567095945, + "learning_rate": 4.024834194454086e-06, + "loss": 0.218, "step": 15515 }, { - "epoch": 0.89, - "grad_norm": 0.381380545026879, - "learning_rate": 6.115177717562426e-07, - "loss": 0.3005, + "epoch": 0.71, + "grad_norm": 0.2564183407134996, + "learning_rate": 4.023641162815007e-06, + "loss": 0.1997, "step": 15516 }, { - "epoch": 0.89, - "grad_norm": 1.280503866463015, - "learning_rate": 6.108771639462496e-07, - "loss": 0.4066, + "epoch": 0.71, + "grad_norm": 0.6201332092579396, + "learning_rate": 4.02244826348675e-06, + "loss": 0.4014, "step": 15517 }, { - "epoch": 0.89, - "grad_norm": 0.31632331249443907, - "learning_rate": 6.102368812811776e-07, - "loss": 0.2172, + "epoch": 0.71, + "grad_norm": 0.3882574468204005, + "learning_rate": 4.02125549649573e-06, + "loss": 0.2737, "step": 15518 }, { - "epoch": 0.89, - "grad_norm": 0.30631108778593535, - "learning_rate": 6.095969237831956e-07, - "loss": 0.2671, + "epoch": 0.71, + "grad_norm": 0.5117669534890628, + "learning_rate": 4.02006286186835e-06, + "loss": 0.2448, "step": 15519 }, { - "epoch": 0.89, - "grad_norm": 0.4657106174751502, - "learning_rate": 6.089572914744712e-07, - "loss": 0.2943, + "epoch": 0.71, + "grad_norm": 0.42503277751157154, + "learning_rate": 4.0188703596310085e-06, + "loss": 0.3146, "step": 15520 }, { - "epoch": 0.89, - "grad_norm": 0.3192673232648922, - "learning_rate": 6.083179843771513e-07, - "loss": 0.1749, + "epoch": 0.71, + "grad_norm": 0.3056705572460837, + "learning_rate": 4.01767798981012e-06, + "loss": 0.2222, "step": 15521 }, { - "epoch": 0.89, - "grad_norm": 0.5648607556412087, - "learning_rate": 6.076790025133761e-07, - "loss": 0.3423, + "epoch": 0.71, + "grad_norm": 0.4228458858952225, + "learning_rate": 4.016485752432071e-06, + "loss": 0.1496, "step": 15522 }, { - "epoch": 0.89, - "grad_norm": 0.38998891302247396, - "learning_rate": 6.070403459052721e-07, - "loss": 0.3006, + "epoch": 0.71, + "grad_norm": 0.3307591699716963, + "learning_rate": 4.015293647523264e-06, + "loss": 0.3087, "step": 15523 }, { - "epoch": 0.89, - "grad_norm": 0.47612449309706734, - "learning_rate": 6.064020145749572e-07, - "loss": 0.2506, + "epoch": 0.71, + "grad_norm": 0.3916166873677584, + "learning_rate": 4.0141016751100834e-06, + "loss": 0.2905, "step": 15524 }, { - "epoch": 0.89, - "grad_norm": 0.3699572894444798, - "learning_rate": 6.057640085445371e-07, - "loss": 0.2902, + "epoch": 0.71, + "grad_norm": 1.010457837401049, + "learning_rate": 4.012909835218924e-06, + "loss": 0.3168, "step": 15525 }, { - "epoch": 0.89, - "grad_norm": 0.33072600556263054, - "learning_rate": 6.051263278361064e-07, - "loss": 0.2912, + "epoch": 0.71, + "grad_norm": 0.6782563653088336, + "learning_rate": 4.011718127876173e-06, + "loss": 0.4637, "step": 15526 }, { - "epoch": 0.89, - "grad_norm": 0.5229226557439318, - "learning_rate": 6.04488972471744e-07, - "loss": 0.2665, + "epoch": 0.71, + "grad_norm": 0.43052354792375336, + "learning_rate": 4.010526553108207e-06, + "loss": 0.2835, "step": 15527 }, { - "epoch": 0.89, - "grad_norm": 0.23899799542644962, - "learning_rate": 6.038519424735268e-07, - "loss": 0.1533, + "epoch": 0.71, + "grad_norm": 0.2511407380468412, + "learning_rate": 4.0093351109414115e-06, + "loss": 0.2085, "step": 15528 }, { - "epoch": 0.89, - "grad_norm": 1.2521803906393414, - "learning_rate": 6.032152378635125e-07, - "loss": 0.71, + "epoch": 0.71, + "grad_norm": 0.7155107171680686, + "learning_rate": 4.008143801402166e-06, + "loss": 0.3062, "step": 15529 }, { - "epoch": 0.89, - "grad_norm": 0.332009032357602, - "learning_rate": 6.025788586637516e-07, - "loss": 0.3173, + "epoch": 0.71, + "grad_norm": 0.3480921423162831, + "learning_rate": 4.0069526245168375e-06, + "loss": 0.2434, "step": 15530 }, { - "epoch": 0.89, - "grad_norm": 0.3054581468802576, - "learning_rate": 6.019428048962794e-07, - "loss": 0.2127, + "epoch": 0.71, + "grad_norm": 0.4010881115565821, + "learning_rate": 4.005761580311805e-06, + "loss": 0.3214, "step": 15531 }, { - "epoch": 0.89, - "grad_norm": 0.6988792384416537, - "learning_rate": 6.013070765831242e-07, - "loss": 0.3557, + "epoch": 0.71, + "grad_norm": 0.970331530175825, + "learning_rate": 4.004570668813427e-06, + "loss": 0.5249, "step": 15532 }, { - "epoch": 0.89, - "grad_norm": 0.2612692825758358, - "learning_rate": 6.006716737463003e-07, - "loss": 0.1574, + "epoch": 0.71, + "grad_norm": 0.3529049650920527, + "learning_rate": 4.0033798900480845e-06, + "loss": 0.2742, "step": 15533 }, { - "epoch": 0.89, - "grad_norm": 0.35768416541218084, - "learning_rate": 6.000365964078125e-07, - "loss": 0.2071, + "epoch": 0.71, + "grad_norm": 0.33612861577292935, + "learning_rate": 4.002189244042126e-06, + "loss": 0.2142, "step": 15534 }, { - "epoch": 0.89, - "grad_norm": 0.360522914540225, - "learning_rate": 5.99401844589651e-07, - "loss": 0.2844, + "epoch": 0.71, + "grad_norm": 0.37984712109441304, + "learning_rate": 4.000998730821922e-06, + "loss": 0.259, "step": 15535 }, { - "epoch": 0.89, - "grad_norm": 0.7207181920540063, - "learning_rate": 5.987674183138015e-07, - "loss": 0.4223, + "epoch": 0.71, + "grad_norm": 0.35571527283470394, + "learning_rate": 3.999808350413823e-06, + "loss": 0.2463, "step": 15536 }, { - "epoch": 0.89, - "grad_norm": 0.34250474149267907, - "learning_rate": 5.98133317602233e-07, - "loss": 0.2512, + "epoch": 0.71, + "grad_norm": 1.5241663590526002, + "learning_rate": 3.998618102844178e-06, + "loss": 0.7123, "step": 15537 }, { - "epoch": 0.89, - "grad_norm": 0.42762939818118384, - "learning_rate": 5.974995424769026e-07, - "loss": 0.2639, + "epoch": 0.71, + "grad_norm": 0.5970884686858505, + "learning_rate": 3.997427988139351e-06, + "loss": 0.2873, "step": 15538 }, { - "epoch": 0.89, - "grad_norm": 0.28191737906201586, - "learning_rate": 5.968660929597581e-07, - "loss": 0.2076, + "epoch": 0.71, + "grad_norm": 0.2660817022507961, + "learning_rate": 3.996238006325679e-06, + "loss": 0.2633, "step": 15539 }, { - "epoch": 0.89, - "grad_norm": 0.3401995555671143, - "learning_rate": 5.962329690727353e-07, - "loss": 0.2638, + "epoch": 0.71, + "grad_norm": 1.042325415826318, + "learning_rate": 3.995048157429514e-06, + "loss": 0.3831, "step": 15540 }, { - "epoch": 0.89, - "grad_norm": 0.5155883682044767, - "learning_rate": 5.956001708377623e-07, - "loss": 0.1423, - "step": 15541 + "epoch": 0.71, + "grad_norm": 0.3491319425514608, + "learning_rate": 3.993858441477193e-06, + "loss": 0.2155, + "step": 15541 }, { - "epoch": 0.89, - "grad_norm": 0.3712414578625411, - "learning_rate": 5.949676982767505e-07, - "loss": 0.3232, + "epoch": 0.71, + "grad_norm": 0.3055290460243411, + "learning_rate": 3.992668858495055e-06, + "loss": 0.1898, "step": 15542 }, { - "epoch": 0.89, - "grad_norm": 0.32353924162903186, - "learning_rate": 5.943355514116033e-07, - "loss": 0.2585, + "epoch": 0.71, + "grad_norm": 0.38152533496642127, + "learning_rate": 3.991479408509444e-06, + "loss": 0.2871, "step": 15543 }, { - "epoch": 0.89, - "grad_norm": 0.7305438124068102, - "learning_rate": 5.937037302642101e-07, - "loss": 0.3036, + "epoch": 0.71, + "grad_norm": 1.2423997802425857, + "learning_rate": 3.990290091546685e-06, + "loss": 0.7798, "step": 15544 }, { - "epoch": 0.89, - "grad_norm": 0.29228028451163485, - "learning_rate": 5.930722348564533e-07, - "loss": 0.1928, + "epoch": 0.71, + "grad_norm": 0.3374350948541978, + "learning_rate": 3.98910090763311e-06, + "loss": 0.1889, "step": 15545 }, { - "epoch": 0.89, - "grad_norm": 0.34765287698989866, - "learning_rate": 5.924410652102009e-07, - "loss": 0.255, + "epoch": 0.71, + "grad_norm": 0.696889465161503, + "learning_rate": 3.987911856795047e-06, + "loss": 0.3564, "step": 15546 }, { - "epoch": 0.89, - "grad_norm": 0.3763887260898906, - "learning_rate": 5.918102213473087e-07, - "loss": 0.2487, + "epoch": 0.71, + "grad_norm": 0.28336323671469765, + "learning_rate": 3.9867229390588245e-06, + "loss": 0.2354, "step": 15547 }, { - "epoch": 0.89, - "grad_norm": 0.6285264044725666, - "learning_rate": 5.911797032896239e-07, - "loss": 0.3656, + "epoch": 0.71, + "grad_norm": 0.2553198634753402, + "learning_rate": 3.985534154450762e-06, + "loss": 0.1648, "step": 15548 }, { - "epoch": 0.89, - "grad_norm": 0.33477221937072554, - "learning_rate": 5.905495110589821e-07, - "loss": 0.2613, + "epoch": 0.71, + "grad_norm": 1.1638800324545835, + "learning_rate": 3.9843455029971715e-06, + "loss": 0.6614, "step": 15549 }, { - "epoch": 0.89, - "grad_norm": 1.252759326879859, - "learning_rate": 5.89919644677206e-07, - "loss": 0.8002, + "epoch": 0.71, + "grad_norm": 1.2431847610865912, + "learning_rate": 3.983156984724374e-06, + "loss": 0.7071, "step": 15550 }, { - "epoch": 0.89, - "grad_norm": 0.3344029733336263, - "learning_rate": 5.892901041661092e-07, - "loss": 0.2161, + "epoch": 0.71, + "grad_norm": 0.29859397787766245, + "learning_rate": 3.981968599658682e-06, + "loss": 0.2336, "step": 15551 }, { - "epoch": 0.89, - "grad_norm": 0.20909466560736703, - "learning_rate": 5.886608895474888e-07, - "loss": 0.1845, + "epoch": 0.71, + "grad_norm": 0.4689919660796311, + "learning_rate": 3.980780347826409e-06, + "loss": 0.3588, "step": 15552 }, { - "epoch": 0.89, - "grad_norm": 1.1886244262668104, - "learning_rate": 5.880320008431384e-07, - "loss": 0.6513, + "epoch": 0.71, + "grad_norm": 0.24446450545567375, + "learning_rate": 3.979592229253853e-06, + "loss": 0.1407, "step": 15553 }, { - "epoch": 0.89, - "grad_norm": 0.29300410427650325, - "learning_rate": 5.874034380748362e-07, - "loss": 0.2342, + "epoch": 0.71, + "grad_norm": 0.348068369693218, + "learning_rate": 3.978404243967323e-06, + "loss": 0.2633, "step": 15554 }, { - "epoch": 0.89, - "grad_norm": 0.34717227520428, - "learning_rate": 5.867752012643469e-07, - "loss": 0.285, + "epoch": 0.71, + "grad_norm": 0.3618088096735549, + "learning_rate": 3.977216391993123e-06, + "loss": 0.2389, "step": 15555 }, { - "epoch": 0.89, - "grad_norm": 0.7185220879859632, - "learning_rate": 5.861472904334287e-07, - "loss": 0.3823, + "epoch": 0.71, + "grad_norm": 0.8783487316889272, + "learning_rate": 3.9760286733575435e-06, + "loss": 0.5293, "step": 15556 }, { - "epoch": 0.89, - "grad_norm": 0.48824641014498565, - "learning_rate": 5.855197056038231e-07, - "loss": 0.1113, + "epoch": 0.71, + "grad_norm": 0.33048320946368565, + "learning_rate": 3.974841088086887e-06, + "loss": 0.2394, "step": 15557 }, { - "epoch": 0.89, - "grad_norm": 0.34978211070504545, - "learning_rate": 5.848924467972661e-07, - "loss": 0.2534, + "epoch": 0.71, + "grad_norm": 0.7233561189048227, + "learning_rate": 3.973653636207437e-06, + "loss": 0.2841, "step": 15558 }, { - "epoch": 0.89, - "grad_norm": 0.257049533946427, - "learning_rate": 5.842655140354791e-07, - "loss": 0.2372, + "epoch": 0.71, + "grad_norm": 0.3039144161797102, + "learning_rate": 3.972466317745489e-06, + "loss": 0.2658, "step": 15559 }, { - "epoch": 0.89, - "grad_norm": 0.596660439530161, - "learning_rate": 5.836389073401727e-07, - "loss": 0.283, + "epoch": 0.71, + "grad_norm": 0.4171875442895504, + "learning_rate": 3.971279132727329e-06, + "loss": 0.2461, "step": 15560 }, { - "epoch": 0.89, - "grad_norm": 0.31315917280327593, - "learning_rate": 5.830126267330449e-07, - "loss": 0.2764, + "epoch": 0.71, + "grad_norm": 0.2868310384490601, + "learning_rate": 3.970092081179236e-06, + "loss": 0.1683, "step": 15561 }, { - "epoch": 0.89, - "grad_norm": 0.35240639619956515, - "learning_rate": 5.823866722357863e-07, - "loss": 0.3343, + "epoch": 0.71, + "grad_norm": 0.5101438684893173, + "learning_rate": 3.968905163127493e-06, + "loss": 0.3279, "step": 15562 }, { - "epoch": 0.89, - "grad_norm": 1.620476298451587, - "learning_rate": 5.817610438700716e-07, - "loss": 0.5078, + "epoch": 0.71, + "grad_norm": 0.40796046306245215, + "learning_rate": 3.967718378598377e-06, + "loss": 0.2522, "step": 15563 }, { - "epoch": 0.89, - "grad_norm": 0.2704294455548528, - "learning_rate": 5.811357416575681e-07, - "loss": 0.1573, + "epoch": 0.71, + "grad_norm": 0.3896019094271313, + "learning_rate": 3.966531727618165e-06, + "loss": 0.2555, "step": 15564 }, { - "epoch": 0.89, - "grad_norm": 0.4428142185190504, - "learning_rate": 5.805107656199272e-07, - "loss": 0.2494, + "epoch": 0.72, + "grad_norm": 0.2985226605443229, + "learning_rate": 3.965345210213125e-06, + "loss": 0.1793, "step": 15565 }, { - "epoch": 0.89, - "grad_norm": 0.3292874439000769, - "learning_rate": 5.79886115778795e-07, - "loss": 0.2965, + "epoch": 0.72, + "grad_norm": 0.4611526218924979, + "learning_rate": 3.964158826409523e-06, + "loss": 0.2603, "step": 15566 }, { - "epoch": 0.89, - "grad_norm": 0.30624266788213034, - "learning_rate": 5.792617921558008e-07, - "loss": 0.1981, + "epoch": 0.72, + "grad_norm": 0.29972742686438963, + "learning_rate": 3.9629725762336266e-06, + "loss": 0.2661, "step": 15567 }, { - "epoch": 0.89, - "grad_norm": 1.2556421070753658, - "learning_rate": 5.786377947725652e-07, - "loss": 0.736, + "epoch": 0.72, + "grad_norm": 0.765697572376828, + "learning_rate": 3.961786459711699e-06, + "loss": 0.3202, "step": 15568 }, { - "epoch": 0.89, - "grad_norm": 1.5193881640144948, - "learning_rate": 5.780141236506975e-07, - "loss": 0.567, + "epoch": 0.72, + "grad_norm": 0.34647881300648886, + "learning_rate": 3.960600476870003e-06, + "loss": 0.2481, "step": 15569 }, { - "epoch": 0.89, - "grad_norm": 0.22623774423910054, - "learning_rate": 5.77390778811796e-07, - "loss": 0.2157, + "epoch": 0.72, + "grad_norm": 0.8966314841487539, + "learning_rate": 3.959414627734789e-06, + "loss": 0.4436, "step": 15570 }, { - "epoch": 0.89, - "grad_norm": 0.28603043110314136, - "learning_rate": 5.767677602774469e-07, - "loss": 0.1825, + "epoch": 0.72, + "grad_norm": 0.3648837881450559, + "learning_rate": 3.958228912332312e-06, + "loss": 0.2348, "step": 15571 }, { - "epoch": 0.89, - "grad_norm": 0.5968504572934171, - "learning_rate": 5.761450680692249e-07, - "loss": 0.3951, + "epoch": 0.72, + "grad_norm": 0.25766799622912173, + "learning_rate": 3.9570433306888265e-06, + "loss": 0.1973, "step": 15572 }, { - "epoch": 0.89, - "grad_norm": 0.2821106282963114, - "learning_rate": 5.755227022086918e-07, - "loss": 0.1834, + "epoch": 0.72, + "grad_norm": 0.43460842536034927, + "learning_rate": 3.955857882830576e-06, + "loss": 0.2153, "step": 15573 }, { - "epoch": 0.89, - "grad_norm": 0.3511480956550723, - "learning_rate": 5.749006627174048e-07, - "loss": 0.3039, + "epoch": 0.72, + "grad_norm": 0.48359823649628614, + "learning_rate": 3.954672568783809e-06, + "loss": 0.2949, "step": 15574 }, { - "epoch": 0.89, - "grad_norm": 1.2740202637398323, - "learning_rate": 5.742789496169021e-07, - "loss": 0.4536, + "epoch": 0.72, + "grad_norm": 0.3108944990482073, + "learning_rate": 3.95348738857476e-06, + "loss": 0.2503, "step": 15575 }, { - "epoch": 0.89, - "grad_norm": 0.36967814771582713, - "learning_rate": 5.736575629287145e-07, - "loss": 0.2536, + "epoch": 0.72, + "grad_norm": 1.3620705737252354, + "learning_rate": 3.952302342229674e-06, + "loss": 0.7562, "step": 15576 }, { - "epoch": 0.89, - "grad_norm": 0.3182282890886471, - "learning_rate": 5.730365026743579e-07, - "loss": 0.2127, + "epoch": 0.72, + "grad_norm": 0.4647474504029795, + "learning_rate": 3.951117429774789e-06, + "loss": 0.105, "step": 15577 }, { - "epoch": 0.9, - "grad_norm": 0.2597618110444984, - "learning_rate": 5.72415768875344e-07, - "loss": 0.2431, + "epoch": 0.72, + "grad_norm": 0.31476660559122266, + "learning_rate": 3.94993265123633e-06, + "loss": 0.2062, "step": 15578 }, { - "epoch": 0.9, - "grad_norm": 0.40873608355276525, - "learning_rate": 5.717953615531668e-07, - "loss": 0.2492, + "epoch": 0.72, + "grad_norm": 0.28814180412122126, + "learning_rate": 3.948748006640535e-06, + "loss": 0.2771, "step": 15579 }, { - "epoch": 0.9, - "grad_norm": 0.5488195331582982, - "learning_rate": 5.711752807293102e-07, - "loss": 0.2588, + "epoch": 0.72, + "grad_norm": 0.6687811015986703, + "learning_rate": 3.94756349601362e-06, + "loss": 0.4041, "step": 15580 }, { - "epoch": 0.9, - "grad_norm": 1.40896979648835, - "learning_rate": 5.705555264252483e-07, - "loss": 0.5422, + "epoch": 0.72, + "grad_norm": 0.32427902404722525, + "learning_rate": 3.946379119381822e-06, + "loss": 0.1731, "step": 15581 }, { - "epoch": 0.9, - "grad_norm": 0.2511592502937873, - "learning_rate": 5.699360986624414e-07, - "loss": 0.2355, + "epoch": 0.72, + "grad_norm": 0.5709409312348593, + "learning_rate": 3.945194876771352e-06, + "loss": 0.3628, "step": 15582 }, { - "epoch": 0.9, - "grad_norm": 0.5226165719071535, - "learning_rate": 5.693169974623435e-07, - "loss": 0.2665, + "epoch": 0.72, + "grad_norm": 0.3762588365469927, + "learning_rate": 3.944010768208436e-06, + "loss": 0.3002, "step": 15583 }, { - "epoch": 0.9, - "grad_norm": 0.4341487348411721, - "learning_rate": 5.686982228463933e-07, - "loss": 0.2977, + "epoch": 0.72, + "grad_norm": 0.24383970423917428, + "learning_rate": 3.942826793719281e-06, + "loss": 0.1258, "step": 15584 }, { - "epoch": 0.9, - "grad_norm": 0.2937040743182103, - "learning_rate": 5.680797748360168e-07, - "loss": 0.2368, + "epoch": 0.72, + "grad_norm": 0.3365459096654575, + "learning_rate": 3.941642953330102e-06, + "loss": 0.2813, "step": 15585 }, { - "epoch": 0.9, - "grad_norm": 0.24353668748816057, - "learning_rate": 5.674616534526312e-07, - "loss": 0.1934, + "epoch": 0.72, + "grad_norm": 0.8001471991624923, + "learning_rate": 3.9404592470671145e-06, + "loss": 0.4252, "step": 15586 }, { - "epoch": 0.9, - "grad_norm": 1.674275057185165, - "learning_rate": 5.66843858717645e-07, - "loss": 0.4936, + "epoch": 0.72, + "grad_norm": 0.27872356397792214, + "learning_rate": 3.939275674956514e-06, + "loss": 0.2216, "step": 15587 }, { - "epoch": 0.9, - "grad_norm": 0.33392941409136523, - "learning_rate": 5.66226390652449e-07, - "loss": 0.2589, + "epoch": 0.72, + "grad_norm": 0.5953289136224065, + "learning_rate": 3.938092237024509e-06, + "loss": 0.3667, "step": 15588 }, { - "epoch": 0.9, - "grad_norm": 0.5883240032566877, - "learning_rate": 5.656092492784282e-07, - "loss": 0.3758, + "epoch": 0.72, + "grad_norm": 1.2982026881980373, + "learning_rate": 3.936908933297302e-06, + "loss": 0.52, "step": 15589 }, { - "epoch": 0.9, - "grad_norm": 0.3373756640161482, - "learning_rate": 5.649924346169522e-07, - "loss": 0.2541, + "epoch": 0.72, + "grad_norm": 0.23631750740416335, + "learning_rate": 3.935725763801085e-06, + "loss": 0.1499, "step": 15590 }, { - "epoch": 0.9, - "grad_norm": 0.5502096680976466, - "learning_rate": 5.643759466893839e-07, - "loss": 0.3176, + "epoch": 0.72, + "grad_norm": 0.3922146685168824, + "learning_rate": 3.934542728562058e-06, + "loss": 0.3256, "step": 15591 }, { - "epoch": 0.9, - "grad_norm": 0.23347398396256974, - "learning_rate": 5.637597855170707e-07, - "loss": 0.1993, + "epoch": 0.72, + "grad_norm": 0.7871682133919862, + "learning_rate": 3.933359827606402e-06, + "loss": 0.4197, "step": 15592 }, { - "epoch": 0.9, - "grad_norm": 0.3502673113280859, - "learning_rate": 5.631439511213499e-07, - "loss": 0.2337, + "epoch": 0.72, + "grad_norm": 0.4361744491301722, + "learning_rate": 3.932177060960319e-06, + "loss": 0.284, "step": 15593 }, { - "epoch": 0.9, - "grad_norm": 0.3869961913722, - "learning_rate": 5.625284435235478e-07, - "loss": 0.2529, + "epoch": 0.72, + "grad_norm": 0.3009296709869533, + "learning_rate": 3.930994428649989e-06, + "loss": 0.1501, "step": 15594 }, { - "epoch": 0.9, - "grad_norm": 0.676089532643328, - "learning_rate": 5.619132627449797e-07, - "loss": 0.3139, + "epoch": 0.72, + "grad_norm": 0.392340226070996, + "learning_rate": 3.929811930701588e-06, + "loss": 0.3034, "step": 15595 }, { - "epoch": 0.9, - "grad_norm": 1.3271539215905461, - "learning_rate": 5.612984088069507e-07, - "loss": 0.3681, + "epoch": 0.72, + "grad_norm": 0.6018259196858956, + "learning_rate": 3.928629567141305e-06, + "loss": 0.3023, "step": 15596 }, { - "epoch": 0.9, - "grad_norm": 0.41527056555619096, - "learning_rate": 5.606838817307514e-07, - "loss": 0.2618, + "epoch": 0.72, + "grad_norm": 0.40882440878038734, + "learning_rate": 3.9274473379953035e-06, + "loss": 0.2651, "step": 15597 }, { - "epoch": 0.9, - "grad_norm": 0.20296961040711048, - "learning_rate": 5.600696815376639e-07, - "loss": 0.207, + "epoch": 0.72, + "grad_norm": 0.28988786927200716, + "learning_rate": 3.926265243289773e-06, + "loss": 0.2645, "step": 15598 }, { - "epoch": 0.9, - "grad_norm": 0.7358714300620751, - "learning_rate": 5.594558082489565e-07, - "loss": 0.2504, + "epoch": 0.72, + "grad_norm": 0.876440240611924, + "learning_rate": 3.9250832830508715e-06, + "loss": 0.4933, "step": 15599 }, { - "epoch": 0.9, - "grad_norm": 0.37675397571544944, - "learning_rate": 5.58842261885889e-07, - "loss": 0.277, + "epoch": 0.72, + "grad_norm": 0.30477597904492615, + "learning_rate": 3.9239014573047755e-06, + "loss": 0.1748, "step": 15600 }, { - "epoch": 0.9, - "grad_norm": 0.6241152115560328, - "learning_rate": 5.582290424697078e-07, - "loss": 0.2907, + "epoch": 0.72, + "grad_norm": 1.5686087651270901, + "learning_rate": 3.922719766077642e-06, + "loss": 0.5161, "step": 15601 }, { - "epoch": 0.9, - "grad_norm": 0.3432574165161779, - "learning_rate": 5.576161500216481e-07, - "loss": 0.3207, + "epoch": 0.72, + "grad_norm": 0.5292112152332294, + "learning_rate": 3.921538209395634e-06, + "loss": 0.2963, "step": 15602 }, { - "epoch": 0.9, - "grad_norm": 0.32805857188412496, - "learning_rate": 5.570035845629362e-07, - "loss": 0.207, + "epoch": 0.72, + "grad_norm": 0.2782706412515793, + "learning_rate": 3.9203567872849154e-06, + "loss": 0.2886, "step": 15603 }, { - "epoch": 0.9, - "grad_norm": 0.3566758030118385, - "learning_rate": 5.563913461147841e-07, - "loss": 0.2272, + "epoch": 0.72, + "grad_norm": 0.8956216113323225, + "learning_rate": 3.919175499771635e-06, + "loss": 0.3308, "step": 15604 }, { - "epoch": 0.9, - "grad_norm": 0.33717272175655655, - "learning_rate": 5.557794346983936e-07, - "loss": 0.2613, + "epoch": 0.72, + "grad_norm": 0.28405768422545374, + "learning_rate": 3.9179943468819485e-06, + "loss": 0.1661, "step": 15605 }, { - "epoch": 0.9, - "grad_norm": 0.25959042748500355, - "learning_rate": 5.551678503349545e-07, - "loss": 0.2154, + "epoch": 0.72, + "grad_norm": 0.3284563351104206, + "learning_rate": 3.916813328642008e-06, + "loss": 0.226, "step": 15606 }, { - "epoch": 0.9, - "grad_norm": 0.8584149753122465, - "learning_rate": 5.545565930456464e-07, - "loss": 0.4831, + "epoch": 0.72, + "grad_norm": 0.35804816186725547, + "learning_rate": 3.915632445077955e-06, + "loss": 0.2371, "step": 15607 }, { - "epoch": 0.9, - "grad_norm": 1.2299027222034258, - "learning_rate": 5.539456628516382e-07, - "loss": 0.7181, + "epoch": 0.72, + "grad_norm": 0.5579065259712115, + "learning_rate": 3.914451696215937e-06, + "loss": 0.287, "step": 15608 }, { - "epoch": 0.9, - "grad_norm": 0.7561475327791286, - "learning_rate": 5.53335059774085e-07, - "loss": 0.1436, + "epoch": 0.72, + "grad_norm": 0.4045019348095558, + "learning_rate": 3.91327108208209e-06, + "loss": 0.303, "step": 15609 }, { - "epoch": 0.9, - "grad_norm": 0.2262751318435284, - "learning_rate": 5.527247838341332e-07, - "loss": 0.2338, + "epoch": 0.72, + "grad_norm": 0.3455352213924204, + "learning_rate": 3.912090602702556e-06, + "loss": 0.2421, "step": 15610 }, { - "epoch": 0.9, - "grad_norm": 0.40729868452013945, - "learning_rate": 5.521148350529137e-07, - "loss": 0.2774, + "epoch": 0.72, + "grad_norm": 0.3902891227336239, + "learning_rate": 3.910910258103468e-06, + "loss": 0.2919, "step": 15611 }, { - "epoch": 0.9, - "grad_norm": 0.8980545590794237, - "learning_rate": 5.51505213451553e-07, - "loss": 0.3379, + "epoch": 0.72, + "grad_norm": 0.36551291193798424, + "learning_rate": 3.9097300483109625e-06, + "loss": 0.22, "step": 15612 }, { - "epoch": 0.9, - "grad_norm": 0.3863824149031958, - "learning_rate": 5.508959190511609e-07, - "loss": 0.2108, + "epoch": 0.72, + "grad_norm": 0.44767097775473574, + "learning_rate": 3.908549973351164e-06, + "loss": 0.2112, "step": 15613 }, { - "epoch": 0.9, - "grad_norm": 0.33532397215502635, - "learning_rate": 5.502869518728359e-07, - "loss": 0.3238, + "epoch": 0.72, + "grad_norm": 0.3945377141070083, + "learning_rate": 3.907370033250188e-06, + "loss": 0.2978, "step": 15614 }, { - "epoch": 0.9, - "grad_norm": 0.5569176551550571, - "learning_rate": 5.49678311937667e-07, - "loss": 0.3709, + "epoch": 0.72, + "grad_norm": 0.3424204936232039, + "learning_rate": 3.906190228034177e-06, + "loss": 0.2886, "step": 15615 }, { - "epoch": 0.9, - "grad_norm": 0.36933007186294964, - "learning_rate": 5.490699992667326e-07, - "loss": 0.2373, + "epoch": 0.72, + "grad_norm": 1.321649278791491, + "learning_rate": 3.905010557729238e-06, + "loss": 0.7187, "step": 15616 }, { - "epoch": 0.9, - "grad_norm": 0.45251797358873475, - "learning_rate": 5.48462013881097e-07, - "loss": 0.2836, + "epoch": 0.72, + "grad_norm": 0.3895072694694036, + "learning_rate": 3.903831022361493e-06, + "loss": 0.1172, "step": 15617 }, { - "epoch": 0.9, - "grad_norm": 0.23540557748561147, - "learning_rate": 5.478543558018167e-07, - "loss": 0.2141, + "epoch": 0.72, + "grad_norm": 0.28549428771090135, + "learning_rate": 3.9026516219570495e-06, + "loss": 0.2207, "step": 15618 }, { - "epoch": 0.9, - "grad_norm": 0.4132520532778912, - "learning_rate": 5.472470250499328e-07, - "loss": 0.2118, + "epoch": 0.72, + "grad_norm": 0.3647009625656791, + "learning_rate": 3.901472356542023e-06, + "loss": 0.2987, "step": 15619 }, { - "epoch": 0.9, - "grad_norm": 1.2284634022109895, - "learning_rate": 5.466400216464774e-07, - "loss": 0.7019, + "epoch": 0.72, + "grad_norm": 0.5975779603878336, + "learning_rate": 3.9002932261425255e-06, + "loss": 0.2161, "step": 15620 }, { - "epoch": 0.9, - "grad_norm": 0.3909616326979364, - "learning_rate": 5.460333456124722e-07, - "loss": 0.2444, + "epoch": 0.72, + "grad_norm": 0.44850355468851294, + "learning_rate": 3.899114230784652e-06, + "loss": 0.3005, "step": 15621 }, { - "epoch": 0.9, - "grad_norm": 0.280909579303553, - "learning_rate": 5.454269969689252e-07, - "loss": 0.2283, + "epoch": 0.72, + "grad_norm": 0.4889422367028318, + "learning_rate": 3.89793537049451e-06, + "loss": 0.3313, "step": 15622 }, { - "epoch": 0.9, - "grad_norm": 0.6943885695673067, - "learning_rate": 5.448209757368361e-07, - "loss": 0.3467, + "epoch": 0.72, + "grad_norm": 0.3309322500891111, + "learning_rate": 3.896756645298201e-06, + "loss": 0.1944, "step": 15623 }, { - "epoch": 0.9, - "grad_norm": 0.214468125396571, - "learning_rate": 5.442152819371882e-07, - "loss": 0.1708, + "epoch": 0.72, + "grad_norm": 0.2566648621492292, + "learning_rate": 3.8955780552218135e-06, + "loss": 0.1925, "step": 15624 }, { - "epoch": 0.9, - "grad_norm": 0.5495278130056973, - "learning_rate": 5.436099155909592e-07, - "loss": 0.3312, + "epoch": 0.72, + "grad_norm": 0.7542194753727185, + "learning_rate": 3.8943996002914485e-06, + "loss": 0.3993, "step": 15625 }, { - "epoch": 0.9, - "grad_norm": 0.314657048380129, - "learning_rate": 5.430048767191121e-07, - "loss": 0.2532, + "epoch": 0.72, + "grad_norm": 0.3427039037583272, + "learning_rate": 3.893221280533188e-06, + "loss": 0.2214, "step": 15626 }, { - "epoch": 0.9, - "grad_norm": 0.5585893096454551, - "learning_rate": 5.424001653426003e-07, - "loss": 0.3362, + "epoch": 0.72, + "grad_norm": 0.41548833197526674, + "learning_rate": 3.892043095973123e-06, + "loss": 0.2888, "step": 15627 }, { - "epoch": 0.9, - "grad_norm": 0.36554119112794337, - "learning_rate": 5.417957814823627e-07, - "loss": 0.2746, + "epoch": 0.72, + "grad_norm": 1.1570078418309857, + "learning_rate": 3.8908650466373355e-06, + "loss": 0.6182, "step": 15628 }, { - "epoch": 0.9, - "grad_norm": 0.3853663692179569, - "learning_rate": 5.411917251593313e-07, - "loss": 0.2478, + "epoch": 0.72, + "grad_norm": 0.41029184416798337, + "learning_rate": 3.889687132551913e-06, + "loss": 0.1997, "step": 15629 }, { - "epoch": 0.9, - "grad_norm": 0.21621671494700923, - "learning_rate": 5.405879963944238e-07, - "loss": 0.1392, + "epoch": 0.72, + "grad_norm": 0.34696278064543573, + "learning_rate": 3.888509353742927e-06, + "loss": 0.2145, "step": 15630 }, { - "epoch": 0.9, - "grad_norm": 0.4678193698155111, - "learning_rate": 5.39984595208547e-07, - "loss": 0.2854, + "epoch": 0.72, + "grad_norm": 0.3673221662424565, + "learning_rate": 3.887331710236447e-06, + "loss": 0.2936, "step": 15631 }, { - "epoch": 0.9, - "grad_norm": 0.8065746449842706, - "learning_rate": 5.393815216225972e-07, - "loss": 0.3653, + "epoch": 0.72, + "grad_norm": 0.36259148784163087, + "learning_rate": 3.886154202058559e-06, + "loss": 0.2076, "step": 15632 }, { - "epoch": 0.9, - "grad_norm": 0.3145087220616073, - "learning_rate": 5.387787756574592e-07, - "loss": 0.2735, + "epoch": 0.72, + "grad_norm": 0.389142242977095, + "learning_rate": 3.88497682923532e-06, + "loss": 0.2227, "step": 15633 }, { - "epoch": 0.9, - "grad_norm": 0.3790211085952469, - "learning_rate": 5.381763573340049e-07, - "loss": 0.2876, + "epoch": 0.72, + "grad_norm": 0.3768931109329765, + "learning_rate": 3.883799591792804e-06, + "loss": 0.3017, "step": 15634 }, { - "epoch": 0.9, - "grad_norm": 0.9396952767327181, - "learning_rate": 5.375742666730955e-07, - "loss": 0.2666, + "epoch": 0.72, + "grad_norm": 1.3819381680781753, + "learning_rate": 3.882622489757067e-06, + "loss": 0.5256, "step": 15635 }, { - "epoch": 0.9, - "grad_norm": 0.23651179482052748, - "learning_rate": 5.36972503695582e-07, - "loss": 0.1632, + "epoch": 0.72, + "grad_norm": 0.3709581734938562, + "learning_rate": 3.881445523154172e-06, + "loss": 0.2047, "step": 15636 }, { - "epoch": 0.9, - "grad_norm": 0.31046373469158267, - "learning_rate": 5.363710684223045e-07, - "loss": 0.264, + "epoch": 0.72, + "grad_norm": 0.5765386956424883, + "learning_rate": 3.880268692010178e-06, + "loss": 0.1995, "step": 15637 }, { - "epoch": 0.9, - "grad_norm": 0.48144032175554063, - "learning_rate": 5.357699608740907e-07, - "loss": 0.3497, + "epoch": 0.72, + "grad_norm": 0.39150010812542857, + "learning_rate": 3.879091996351135e-06, + "loss": 0.3072, "step": 15638 }, { - "epoch": 0.9, - "grad_norm": 0.538184412368237, - "learning_rate": 5.351691810717552e-07, - "loss": 0.2247, + "epoch": 0.72, + "grad_norm": 0.2971497210331202, + "learning_rate": 3.877915436203099e-06, + "loss": 0.1804, "step": 15639 }, { - "epoch": 0.9, - "grad_norm": 0.42659928158946236, - "learning_rate": 5.345687290361035e-07, - "loss": 0.3106, + "epoch": 0.72, + "grad_norm": 1.4254436596557116, + "learning_rate": 3.876739011592112e-06, + "loss": 0.8242, "step": 15640 }, { - "epoch": 0.9, - "grad_norm": 0.6164374849438661, - "learning_rate": 5.339686047879311e-07, - "loss": 0.3437, + "epoch": 0.72, + "grad_norm": 0.7009587751537443, + "learning_rate": 3.87556272254422e-06, + "loss": 0.3502, "step": 15641 }, { - "epoch": 0.9, - "grad_norm": 0.21254629178819517, - "learning_rate": 5.333688083480182e-07, - "loss": 0.1303, + "epoch": 0.72, + "grad_norm": 0.2942149539446829, + "learning_rate": 3.874386569085471e-06, + "loss": 0.2727, "step": 15642 }, { - "epoch": 0.9, - "grad_norm": 0.4149495701684598, - "learning_rate": 5.327693397371369e-07, - "loss": 0.2945, + "epoch": 0.72, + "grad_norm": 0.4845702885926688, + "learning_rate": 3.873210551241896e-06, + "loss": 0.2854, "step": 15643 }, { - "epoch": 0.9, - "grad_norm": 0.8395100725171276, - "learning_rate": 5.321701989760452e-07, - "loss": 0.4472, + "epoch": 0.72, + "grad_norm": 0.2625763648955105, + "learning_rate": 3.872034669039534e-06, + "loss": 0.1543, "step": 15644 }, { - "epoch": 0.9, - "grad_norm": 0.2746630354052698, - "learning_rate": 5.315713860854921e-07, - "loss": 0.2244, + "epoch": 0.72, + "grad_norm": 0.3943947073440283, + "learning_rate": 3.8708589225044195e-06, + "loss": 0.2894, "step": 15645 }, { - "epoch": 0.9, - "grad_norm": 0.3804893461734729, - "learning_rate": 5.309729010862163e-07, - "loss": 0.3088, + "epoch": 0.72, + "grad_norm": 0.32194494396560275, + "learning_rate": 3.869683311662582e-06, + "loss": 0.2435, "step": 15646 }, { - "epoch": 0.9, - "grad_norm": 1.3065235001536424, - "learning_rate": 5.303747439989415e-07, - "loss": 0.4975, + "epoch": 0.72, + "grad_norm": 0.7082241396307494, + "learning_rate": 3.8685078365400465e-06, + "loss": 0.3448, "step": 15647 }, { - "epoch": 0.9, - "grad_norm": 0.1564879560582931, - "learning_rate": 5.29776914844382e-07, - "loss": 0.0707, + "epoch": 0.72, + "grad_norm": 0.4126253595397479, + "learning_rate": 3.867332497162836e-06, + "loss": 0.2703, "step": 15648 }, { - "epoch": 0.9, - "grad_norm": 0.285253213852128, - "learning_rate": 5.291794136432393e-07, - "loss": 0.2486, + "epoch": 0.72, + "grad_norm": 0.4334016351378079, + "learning_rate": 3.866157293556978e-06, + "loss": 0.1809, "step": 15649 }, { - "epoch": 0.9, - "grad_norm": 0.5030656323905838, - "learning_rate": 5.285822404162066e-07, - "loss": 0.3373, + "epoch": 0.72, + "grad_norm": 0.2778540957382801, + "learning_rate": 3.864982225748481e-06, + "loss": 0.2515, "step": 15650 }, { - "epoch": 0.9, - "grad_norm": 0.7274055892952281, - "learning_rate": 5.279853951839653e-07, - "loss": 0.4009, + "epoch": 0.72, + "grad_norm": 0.3776989518825572, + "learning_rate": 3.863807293763368e-06, + "loss": 0.2627, "step": 15651 }, { - "epoch": 0.9, - "grad_norm": 0.3379741609185097, - "learning_rate": 5.27388877967181e-07, - "loss": 0.2087, + "epoch": 0.72, + "grad_norm": 1.1482531528535536, + "learning_rate": 3.862632497627645e-06, + "loss": 0.397, "step": 15652 }, { - "epoch": 0.9, - "grad_norm": 0.5153461591870591, - "learning_rate": 5.267926887865127e-07, - "loss": 0.2932, + "epoch": 0.72, + "grad_norm": 0.7083691691322206, + "learning_rate": 3.861457837367324e-06, + "loss": 0.3826, "step": 15653 }, { - "epoch": 0.9, - "grad_norm": 0.33147772787988616, - "learning_rate": 5.261968276626062e-07, - "loss": 0.2518, + "epoch": 0.72, + "grad_norm": 0.2937339625804799, + "learning_rate": 3.860283313008412e-06, + "loss": 0.255, "step": 15654 }, { - "epoch": 0.9, - "grad_norm": 0.2169728639824801, - "learning_rate": 5.256012946160971e-07, - "loss": 0.1588, + "epoch": 0.72, + "grad_norm": 0.5169200025397416, + "learning_rate": 3.859108924576906e-06, + "loss": 0.3697, "step": 15655 }, { - "epoch": 0.9, - "grad_norm": 0.7769396824390693, - "learning_rate": 5.250060896676068e-07, - "loss": 0.4372, + "epoch": 0.72, + "grad_norm": 0.25990425762859615, + "learning_rate": 3.857934672098815e-06, + "loss": 0.0935, "step": 15656 }, { - "epoch": 0.9, - "grad_norm": 0.3540478962758659, - "learning_rate": 5.244112128377477e-07, - "loss": 0.2981, + "epoch": 0.72, + "grad_norm": 0.3778646118737523, + "learning_rate": 3.856760555600122e-06, + "loss": 0.258, "step": 15657 }, { - "epoch": 0.9, - "grad_norm": 0.31449744620930736, - "learning_rate": 5.238166641471221e-07, - "loss": 0.2103, + "epoch": 0.72, + "grad_norm": 0.3784369445637982, + "learning_rate": 3.855586575106838e-06, + "loss": 0.3239, "step": 15658 }, { - "epoch": 0.9, - "grad_norm": 1.3570129545424443, - "learning_rate": 5.23222443616318e-07, - "loss": 0.4924, + "epoch": 0.72, + "grad_norm": 0.7680662081639719, + "learning_rate": 3.8544127306449446e-06, + "loss": 0.3065, "step": 15659 }, { - "epoch": 0.9, - "grad_norm": 0.28912954651128253, - "learning_rate": 5.226285512659123e-07, - "loss": 0.1437, + "epoch": 0.72, + "grad_norm": 0.37300712850379814, + "learning_rate": 3.8532390222404245e-06, + "loss": 0.2512, "step": 15660 }, { - "epoch": 0.9, - "grad_norm": 0.2773332070677685, - "learning_rate": 5.220349871164732e-07, - "loss": 0.2187, + "epoch": 0.72, + "grad_norm": 1.3817453729144542, + "learning_rate": 3.852065449919271e-06, + "loss": 0.5215, "step": 15661 }, { - "epoch": 0.9, - "grad_norm": 0.4015892649555536, - "learning_rate": 5.214417511885539e-07, - "loss": 0.308, + "epoch": 0.72, + "grad_norm": 0.20511682523674526, + "learning_rate": 3.850892013707461e-06, + "loss": 0.1566, "step": 15662 }, { - "epoch": 0.9, - "grad_norm": 0.6413421071522666, - "learning_rate": 5.208488435026992e-07, - "loss": 0.4395, + "epoch": 0.72, + "grad_norm": 0.3901461597766473, + "learning_rate": 3.84971871363098e-06, + "loss": 0.2696, "step": 15663 }, { - "epoch": 0.9, - "grad_norm": 0.4767384537292822, - "learning_rate": 5.202562640794429e-07, - "loss": 0.2826, + "epoch": 0.72, + "grad_norm": 0.8933753289728933, + "learning_rate": 3.848545549715795e-06, + "loss": 0.5159, "step": 15664 }, { - "epoch": 0.9, - "grad_norm": 0.38488084441750964, - "learning_rate": 5.196640129393038e-07, - "loss": 0.2458, + "epoch": 0.72, + "grad_norm": 0.48799684387146985, + "learning_rate": 3.847372521987883e-06, + "loss": 0.2574, "step": 15665 }, { - "epoch": 0.9, - "grad_norm": 1.3259987938419826, - "learning_rate": 5.190720901027901e-07, - "loss": 0.5462, + "epoch": 0.72, + "grad_norm": 0.38524738078674114, + "learning_rate": 3.846199630473216e-06, + "loss": 0.2446, "step": 15666 }, { - "epoch": 0.9, - "grad_norm": 0.2478349774975706, - "learning_rate": 5.184804955904066e-07, - "loss": 0.2062, + "epoch": 0.72, + "grad_norm": 0.5486793187087692, + "learning_rate": 3.845026875197755e-06, + "loss": 0.3006, "step": 15667 }, { - "epoch": 0.9, - "grad_norm": 0.5459979152515864, - "learning_rate": 5.178892294226334e-07, - "loss": 0.3124, + "epoch": 0.72, + "grad_norm": 0.33325406962915516, + "learning_rate": 3.84385425618747e-06, + "loss": 0.2185, "step": 15668 }, { - "epoch": 0.9, - "grad_norm": 0.35238957217762756, - "learning_rate": 5.172982916199465e-07, - "loss": 0.3126, + "epoch": 0.72, + "grad_norm": 0.3107367560350991, + "learning_rate": 3.842681773468316e-06, + "loss": 0.1941, "step": 15669 }, { - "epoch": 0.9, - "grad_norm": 0.3040986720543595, - "learning_rate": 5.167076822028149e-07, - "loss": 0.2522, + "epoch": 0.72, + "grad_norm": 0.4015495218843103, + "learning_rate": 3.841509427066252e-06, + "loss": 0.3003, "step": 15670 }, { - "epoch": 0.9, - "grad_norm": 0.23105199235660073, - "learning_rate": 5.16117401191687e-07, - "loss": 0.0663, + "epoch": 0.72, + "grad_norm": 0.8115963857670337, + "learning_rate": 3.840337217007238e-06, + "loss": 0.4071, "step": 15671 }, { - "epoch": 0.9, - "grad_norm": 0.4992153585528372, - "learning_rate": 5.155274486070072e-07, - "loss": 0.3346, + "epoch": 0.72, + "grad_norm": 0.3243923276799991, + "learning_rate": 3.839165143317217e-06, + "loss": 0.188, "step": 15672 }, { - "epoch": 0.9, - "grad_norm": 0.261839200482087, - "learning_rate": 5.149378244692027e-07, - "loss": 0.2322, + "epoch": 0.72, + "grad_norm": 1.237999659161016, + "learning_rate": 3.837993206022146e-06, + "loss": 0.5433, "step": 15673 }, { - "epoch": 0.9, - "grad_norm": 0.5089769058171006, - "learning_rate": 5.143485287986927e-07, - "loss": 0.2294, + "epoch": 0.72, + "grad_norm": 0.40305880319537346, + "learning_rate": 3.836821405147959e-06, + "loss": 0.2913, "step": 15674 }, { - "epoch": 0.9, - "grad_norm": 0.3794901247148647, - "learning_rate": 5.137595616158863e-07, - "loss": 0.2656, + "epoch": 0.72, + "grad_norm": 0.24133924331697731, + "learning_rate": 3.835649740720613e-06, + "loss": 0.1634, "step": 15675 }, { - "epoch": 0.9, - "grad_norm": 0.33643466430036256, - "learning_rate": 5.131709229411785e-07, - "loss": 0.2438, + "epoch": 0.72, + "grad_norm": 0.7559151875857146, + "learning_rate": 3.834478212766036e-06, + "loss": 0.3962, "step": 15676 }, { - "epoch": 0.9, - "grad_norm": 0.3647097642493535, - "learning_rate": 5.12582612794954e-07, - "loss": 0.282, + "epoch": 0.72, + "grad_norm": 0.4852086887700415, + "learning_rate": 3.8333068213101744e-06, + "loss": 0.3541, "step": 15677 }, { - "epoch": 0.9, - "grad_norm": 0.7738986859788042, - "learning_rate": 5.119946311975843e-07, - "loss": 0.2076, + "epoch": 0.72, + "grad_norm": 0.22273746221949853, + "learning_rate": 3.8321355663789504e-06, + "loss": 0.1711, "step": 15678 }, { - "epoch": 0.9, - "grad_norm": 0.34867075246133533, - "learning_rate": 5.114069781694331e-07, - "loss": 0.2548, + "epoch": 0.72, + "grad_norm": 1.498957527909359, + "learning_rate": 3.830964447998302e-06, + "loss": 0.6366, "step": 15679 }, { - "epoch": 0.9, - "grad_norm": 0.6216173151236968, - "learning_rate": 5.108196537308507e-07, - "loss": 0.3724, + "epoch": 0.72, + "grad_norm": 0.380552626234912, + "learning_rate": 3.8297934661941586e-06, + "loss": 0.2046, "step": 15680 }, { - "epoch": 0.9, - "grad_norm": 0.2894107217697507, - "learning_rate": 5.102326579021754e-07, - "loss": 0.2367, + "epoch": 0.72, + "grad_norm": 0.35829599918961436, + "learning_rate": 3.828622620992436e-06, + "loss": 0.2537, "step": 15681 }, { - "epoch": 0.9, - "grad_norm": 0.33805317461218487, - "learning_rate": 5.096459907037344e-07, - "loss": 0.2521, + "epoch": 0.72, + "grad_norm": 0.4029262343448055, + "learning_rate": 3.827451912419062e-06, + "loss": 0.2539, "step": 15682 }, { - "epoch": 0.9, - "grad_norm": 0.30537643202596476, - "learning_rate": 5.090596521558455e-07, - "loss": 0.1848, + "epoch": 0.72, + "grad_norm": 0.5144365403223831, + "learning_rate": 3.826281340499957e-06, + "loss": 0.3457, "step": 15683 }, { - "epoch": 0.9, - "grad_norm": 0.44721678914316987, - "learning_rate": 5.084736422788128e-07, - "loss": 0.2464, + "epoch": 0.72, + "grad_norm": 0.43416946729881545, + "learning_rate": 3.825110905261028e-06, + "loss": 0.2554, "step": 15684 }, { - "epoch": 0.9, - "grad_norm": 0.25725829442115, - "learning_rate": 5.078879610929299e-07, - "loss": 0.2437, + "epoch": 0.72, + "grad_norm": 0.6713422374588778, + "learning_rate": 3.823940606728196e-06, + "loss": 0.2313, "step": 15685 }, { - "epoch": 0.9, - "grad_norm": 0.6813280238885259, - "learning_rate": 5.073026086184785e-07, - "loss": 0.4465, + "epoch": 0.72, + "grad_norm": 0.3935484239194369, + "learning_rate": 3.822770444927363e-06, + "loss": 0.3042, "step": 15686 }, { - "epoch": 0.9, - "grad_norm": 0.22755669708281842, - "learning_rate": 5.067175848757288e-07, - "loss": 0.1386, + "epoch": 0.72, + "grad_norm": 0.4715434427901114, + "learning_rate": 3.8216004198844395e-06, + "loss": 0.3121, "step": 15687 }, { - "epoch": 0.9, - "grad_norm": 0.23563842364379814, - "learning_rate": 5.061328898849416e-07, - "loss": 0.1959, + "epoch": 0.72, + "grad_norm": 0.47420930449002086, + "learning_rate": 3.8204305316253295e-06, + "loss": 0.2842, "step": 15688 }, { - "epoch": 0.9, - "grad_norm": 0.3534225275012728, - "learning_rate": 5.055485236663638e-07, - "loss": 0.2719, + "epoch": 0.72, + "grad_norm": 0.4655929402782526, + "learning_rate": 3.819260780175929e-06, + "loss": 0.2778, "step": 15689 }, { - "epoch": 0.9, - "grad_norm": 0.6615023017392966, - "learning_rate": 5.049644862402336e-07, - "loss": 0.3489, + "epoch": 0.72, + "grad_norm": 0.2908665632089152, + "learning_rate": 3.818091165562142e-06, + "loss": 0.2586, "step": 15690 }, { - "epoch": 0.9, - "grad_norm": 0.30468397890498616, - "learning_rate": 5.043807776267729e-07, - "loss": 0.203, + "epoch": 0.72, + "grad_norm": 0.3741701081638742, + "learning_rate": 3.816921687809851e-06, + "loss": 0.1652, "step": 15691 }, { - "epoch": 0.9, - "grad_norm": 1.2458868096231006, - "learning_rate": 5.037973978461985e-07, - "loss": 0.6232, + "epoch": 0.72, + "grad_norm": 0.8736376588969164, + "learning_rate": 3.815752346944962e-06, + "loss": 0.4414, "step": 15692 }, { - "epoch": 0.9, - "grad_norm": 0.3412161058565019, - "learning_rate": 5.032143469187123e-07, - "loss": 0.3043, + "epoch": 0.72, + "grad_norm": 0.41785909512710767, + "learning_rate": 3.8145831429933523e-06, + "loss": 0.2921, "step": 15693 }, { - "epoch": 0.9, - "grad_norm": 0.2687736892933335, - "learning_rate": 5.026316248645047e-07, - "loss": 0.1856, + "epoch": 0.72, + "grad_norm": 0.32548528205580224, + "learning_rate": 3.8134140759809126e-06, + "loss": 0.2858, "step": 15694 }, { - "epoch": 0.9, - "grad_norm": 0.2705685324835451, - "learning_rate": 5.020492317037539e-07, - "loss": 0.1572, + "epoch": 0.72, + "grad_norm": 0.3136029210610828, + "learning_rate": 3.8122451459335195e-06, + "loss": 0.1695, "step": 15695 }, { - "epoch": 0.9, - "grad_norm": 0.4139012116678503, - "learning_rate": 5.014671674566319e-07, - "loss": 0.3283, + "epoch": 0.72, + "grad_norm": 0.26026711916928963, + "learning_rate": 3.8110763528770543e-06, + "loss": 0.1964, "step": 15696 }, { - "epoch": 0.9, - "grad_norm": 0.304048179285271, - "learning_rate": 5.008854321432932e-07, - "loss": 0.1848, + "epoch": 0.72, + "grad_norm": 1.3996266118335197, + "learning_rate": 3.809907696837398e-06, + "loss": 0.4759, "step": 15697 }, { - "epoch": 0.9, - "grad_norm": 0.5339104269414403, - "learning_rate": 5.003040257838831e-07, - "loss": 0.3313, + "epoch": 0.72, + "grad_norm": 0.5615102118614147, + "learning_rate": 3.808739177840416e-06, + "loss": 0.2631, "step": 15698 }, { - "epoch": 0.9, - "grad_norm": 1.1617194889579199, - "learning_rate": 4.997229483985366e-07, - "loss": 0.4965, + "epoch": 0.72, + "grad_norm": 0.3486638604311125, + "learning_rate": 3.8075707959119845e-06, + "loss": 0.2705, "step": 15699 }, { - "epoch": 0.9, - "grad_norm": 0.4026837415595859, - "learning_rate": 4.991422000073753e-07, - "loss": 0.2646, + "epoch": 0.72, + "grad_norm": 0.9652546826954341, + "learning_rate": 3.8064025510779636e-06, + "loss": 0.4812, "step": 15700 }, { - "epoch": 0.9, - "grad_norm": 0.21280087447699006, - "learning_rate": 4.985617806305121e-07, - "loss": 0.1822, + "epoch": 0.72, + "grad_norm": 0.3485619267211182, + "learning_rate": 3.805234443364221e-06, + "loss": 0.2588, "step": 15701 }, { - "epoch": 0.9, - "grad_norm": 0.7633811247449117, - "learning_rate": 4.979816902880441e-07, - "loss": 0.3467, + "epoch": 0.72, + "grad_norm": 0.29300597530626005, + "learning_rate": 3.80406647279662e-06, + "loss": 0.1689, "step": 15702 }, { - "epoch": 0.9, - "grad_norm": 0.3855766888818858, - "learning_rate": 4.97401929000062e-07, - "loss": 0.2693, + "epoch": 0.72, + "grad_norm": 0.3004916062130135, + "learning_rate": 3.8028986394010124e-06, + "loss": 0.2323, "step": 15703 }, { - "epoch": 0.9, - "grad_norm": 0.34135917725265313, - "learning_rate": 4.968224967866431e-07, - "loss": 0.2474, + "epoch": 0.72, + "grad_norm": 0.7437665926195619, + "learning_rate": 3.8017309432032566e-06, + "loss": 0.2994, "step": 15704 }, { - "epoch": 0.9, - "grad_norm": 0.4960999651515975, - "learning_rate": 4.962433936678523e-07, - "loss": 0.3536, + "epoch": 0.72, + "grad_norm": 0.3822550541202694, + "learning_rate": 3.8005633842292065e-06, + "loss": 0.236, "step": 15705 }, { - "epoch": 0.9, - "grad_norm": 0.4109252916237451, - "learning_rate": 4.956646196637438e-07, - "loss": 0.2418, + "epoch": 0.72, + "grad_norm": 0.34684852686513096, + "learning_rate": 3.799395962504705e-06, + "loss": 0.2891, "step": 15706 }, { - "epoch": 0.9, - "grad_norm": 0.3962821887700003, - "learning_rate": 4.950861747943603e-07, - "loss": 0.203, + "epoch": 0.72, + "grad_norm": 1.274273200512106, + "learning_rate": 3.7982286780556043e-06, + "loss": 0.6597, "step": 15707 }, { - "epoch": 0.9, - "grad_norm": 0.32311958760614523, - "learning_rate": 4.945080590797346e-07, - "loss": 0.2377, + "epoch": 0.72, + "grad_norm": 0.25567467420149104, + "learning_rate": 3.7970615309077364e-06, + "loss": 0.1358, "step": 15708 }, { - "epoch": 0.9, - "grad_norm": 0.2812146473652669, - "learning_rate": 4.939302725398865e-07, - "loss": 0.2372, + "epoch": 0.72, + "grad_norm": 0.2920397335743465, + "learning_rate": 3.7958945210869546e-06, + "loss": 0.2267, "step": 15709 }, { - "epoch": 0.9, - "grad_norm": 1.489036443344534, - "learning_rate": 4.933528151948241e-07, - "loss": 0.2296, + "epoch": 0.72, + "grad_norm": 0.48707749842562026, + "learning_rate": 3.7947276486190843e-06, + "loss": 0.329, "step": 15710 }, { - "epoch": 0.9, - "grad_norm": 1.1842550860891952, - "learning_rate": 4.92775687064545e-07, - "loss": 0.8144, + "epoch": 0.72, + "grad_norm": 0.40741947185938127, + "learning_rate": 3.7935609135299677e-06, + "loss": 0.2258, "step": 15711 }, { - "epoch": 0.9, - "grad_norm": 0.3325210959542368, - "learning_rate": 4.921988881690332e-07, - "loss": 0.2314, + "epoch": 0.72, + "grad_norm": 0.6537090350026551, + "learning_rate": 3.7923943158454267e-06, + "loss": 0.358, "step": 15712 }, { - "epoch": 0.9, - "grad_norm": 0.4057706489840598, - "learning_rate": 4.91622418528267e-07, - "loss": 0.3007, + "epoch": 0.72, + "grad_norm": 0.5366485143662795, + "learning_rate": 3.791227855591293e-06, + "loss": 0.3598, "step": 15713 }, { - "epoch": 0.9, - "grad_norm": 0.33307672193546267, - "learning_rate": 4.910462781622072e-07, - "loss": 0.1754, + "epoch": 0.72, + "grad_norm": 0.24107228739404737, + "learning_rate": 3.790061532793393e-06, + "loss": 0.1686, "step": 15714 }, { - "epoch": 0.9, - "grad_norm": 0.6227748994436654, - "learning_rate": 4.904704670908067e-07, - "loss": 0.3603, + "epoch": 0.72, + "grad_norm": 0.37434731596704085, + "learning_rate": 3.7888953474775424e-06, + "loss": 0.2461, "step": 15715 }, { - "epoch": 0.9, - "grad_norm": 0.42078372345346, - "learning_rate": 4.89894985334003e-07, - "loss": 0.2779, + "epoch": 0.72, + "grad_norm": 0.6857064605693761, + "learning_rate": 3.787729299669566e-06, + "loss": 0.3571, "step": 15716 }, { - "epoch": 0.9, - "grad_norm": 0.30774036410484645, - "learning_rate": 4.893198329117277e-07, - "loss": 0.2554, + "epoch": 0.72, + "grad_norm": 0.3127842792926238, + "learning_rate": 3.7865633893952725e-06, + "loss": 0.1892, "step": 15717 }, { - "epoch": 0.9, - "grad_norm": 0.41423426847798006, - "learning_rate": 4.887450098438984e-07, - "loss": 0.2504, + "epoch": 0.72, + "grad_norm": 0.3514624005747015, + "learning_rate": 3.7853976166804762e-06, + "loss": 0.2894, "step": 15718 }, { - "epoch": 0.9, - "grad_norm": 0.4670151159835837, - "learning_rate": 4.881705161504202e-07, - "loss": 0.3101, + "epoch": 0.72, + "grad_norm": 1.1509287829486181, + "learning_rate": 3.784231981550991e-06, + "loss": 0.6316, "step": 15719 }, { - "epoch": 0.9, - "grad_norm": 0.21198178399623216, - "learning_rate": 4.875963518511872e-07, - "loss": 0.1375, + "epoch": 0.72, + "grad_norm": 0.475808753749805, + "learning_rate": 3.783066484032615e-06, + "loss": 0.2222, "step": 15720 }, { - "epoch": 0.9, - "grad_norm": 0.3436783552350103, - "learning_rate": 4.870225169660836e-07, - "loss": 0.2452, + "epoch": 0.72, + "grad_norm": 0.2543662719721029, + "learning_rate": 3.781901124151155e-06, + "loss": 0.2014, "step": 15721 }, { - "epoch": 0.9, - "grad_norm": 0.6392564449176125, - "learning_rate": 4.864490115149823e-07, - "loss": 0.3374, + "epoch": 0.72, + "grad_norm": 0.4531757019950255, + "learning_rate": 3.7807359019324107e-06, + "loss": 0.3197, "step": 15722 }, { - "epoch": 0.9, - "grad_norm": 0.781527813043474, - "learning_rate": 4.858758355177418e-07, - "loss": 0.337, + "epoch": 0.72, + "grad_norm": 0.8477756234937067, + "learning_rate": 3.779570817402184e-06, + "loss": 0.2629, "step": 15723 }, { - "epoch": 0.9, - "grad_norm": 0.3130251635811382, - "learning_rate": 4.853029889942129e-07, - "loss": 0.2628, + "epoch": 0.72, + "grad_norm": 0.3552710276476677, + "learning_rate": 3.7784058705862624e-06, + "loss": 0.2274, "step": 15724 }, { - "epoch": 0.9, - "grad_norm": 0.35452990662259, - "learning_rate": 4.84730471964231e-07, - "loss": 0.2988, + "epoch": 0.72, + "grad_norm": 0.3890133209647503, + "learning_rate": 3.777241061510433e-06, + "loss": 0.3176, "step": 15725 }, { - "epoch": 0.9, - "grad_norm": 0.41564857624579266, - "learning_rate": 4.841582844476244e-07, - "loss": 0.2544, + "epoch": 0.72, + "grad_norm": 0.6872313548256496, + "learning_rate": 3.776076390200495e-06, + "loss": 0.3484, "step": 15726 }, { - "epoch": 0.9, - "grad_norm": 0.2698526571349199, - "learning_rate": 4.835864264642076e-07, - "loss": 0.1338, + "epoch": 0.72, + "grad_norm": 0.3531996267818355, + "learning_rate": 3.774911856682224e-06, + "loss": 0.2132, "step": 15727 }, { - "epoch": 0.9, - "grad_norm": 0.5453059779722633, - "learning_rate": 4.830148980337834e-07, - "loss": 0.3428, + "epoch": 0.72, + "grad_norm": 0.49536312758731754, + "learning_rate": 3.7737474609814086e-06, + "loss": 0.2708, "step": 15728 }, { - "epoch": 0.9, - "grad_norm": 0.3471356881056745, - "learning_rate": 4.824436991761428e-07, - "loss": 0.3259, + "epoch": 0.72, + "grad_norm": 0.4078905754827501, + "learning_rate": 3.7725832031238187e-06, + "loss": 0.2697, "step": 15729 }, { - "epoch": 0.9, - "grad_norm": 0.34684817296127346, - "learning_rate": 4.818728299110686e-07, - "loss": 0.2164, + "epoch": 0.72, + "grad_norm": 0.3409810135075762, + "learning_rate": 3.771419083135236e-06, + "loss": 0.257, "step": 15730 }, { - "epoch": 0.9, - "grad_norm": 0.8953070877213622, - "learning_rate": 4.813022902583286e-07, - "loss": 0.4185, + "epoch": 0.72, + "grad_norm": 1.4315413263477226, + "learning_rate": 3.7702551010414333e-06, + "loss": 0.4046, "step": 15731 }, { - "epoch": 0.9, - "grad_norm": 0.41813356179096717, - "learning_rate": 4.807320802376824e-07, - "loss": 0.2948, + "epoch": 0.72, + "grad_norm": 0.6336688246350893, + "learning_rate": 3.769091256868177e-06, + "loss": 0.3158, "step": 15732 }, { - "epoch": 0.9, - "grad_norm": 0.1447814308629961, - "learning_rate": 4.801621998688722e-07, - "loss": 0.0716, + "epoch": 0.72, + "grad_norm": 0.4044531896693863, + "learning_rate": 3.767927550641237e-06, + "loss": 0.2844, "step": 15733 }, { - "epoch": 0.9, - "grad_norm": 0.3883904878065946, - "learning_rate": 4.795926491716396e-07, - "loss": 0.3139, + "epoch": 0.72, + "grad_norm": 0.2361510393424766, + "learning_rate": 3.766763982386371e-06, + "loss": 0.1766, "step": 15734 }, { - "epoch": 0.9, - "grad_norm": 0.6516174961256956, - "learning_rate": 4.790234281657025e-07, - "loss": 0.3845, + "epoch": 0.72, + "grad_norm": 0.6185799716474243, + "learning_rate": 3.765600552129344e-06, + "loss": 0.3196, "step": 15735 }, { - "epoch": 0.9, - "grad_norm": 0.3672484706266206, - "learning_rate": 4.784545368707738e-07, - "loss": 0.204, + "epoch": 0.72, + "grad_norm": 0.4171507387718398, + "learning_rate": 3.764437259895913e-06, + "loss": 0.2855, "step": 15736 }, { - "epoch": 0.9, - "grad_norm": 0.32729802797217145, - "learning_rate": 4.778859753065545e-07, - "loss": 0.2943, + "epoch": 0.72, + "grad_norm": 0.3809399995176105, + "learning_rate": 3.7632741057118304e-06, + "loss": 0.2624, "step": 15737 }, { - "epoch": 0.9, - "grad_norm": 0.4014564746228084, - "learning_rate": 4.773177434927356e-07, - "loss": 0.1903, + "epoch": 0.72, + "grad_norm": 0.7217694008642399, + "learning_rate": 3.7621110896028467e-06, + "loss": 0.3815, "step": 15738 }, { - "epoch": 0.9, - "grad_norm": 0.42003454062639795, - "learning_rate": 4.767498414489935e-07, - "loss": 0.2329, + "epoch": 0.72, + "grad_norm": 0.4033870229440153, + "learning_rate": 3.7609482115947115e-06, + "loss": 0.2681, "step": 15739 }, { - "epoch": 0.9, - "grad_norm": 0.25069726353847577, - "learning_rate": 4.7618226919499465e-07, - "loss": 0.233, + "epoch": 0.72, + "grad_norm": 0.3325661066863811, + "learning_rate": 3.7597854717131733e-06, + "loss": 0.1437, "step": 15740 }, { - "epoch": 0.9, - "grad_norm": 0.7447684355120814, - "learning_rate": 4.756150267503934e-07, - "loss": 0.4077, + "epoch": 0.72, + "grad_norm": 0.3356718459499345, + "learning_rate": 3.7586228699839666e-06, + "loss": 0.2491, "step": 15741 }, { - "epoch": 0.9, - "grad_norm": 0.5399344736483198, - "learning_rate": 4.750481141348362e-07, - "loss": 0.3129, + "epoch": 0.72, + "grad_norm": 0.3656771310244979, + "learning_rate": 3.7574604064328336e-06, + "loss": 0.2749, "step": 15742 }, { - "epoch": 0.9, - "grad_norm": 0.34196695458758986, - "learning_rate": 4.7448153136795185e-07, - "loss": 0.2277, + "epoch": 0.72, + "grad_norm": 0.9700776302059902, + "learning_rate": 3.7562980810855144e-06, + "loss": 0.4917, "step": 15743 }, { - "epoch": 0.9, - "grad_norm": 0.38306387196871694, - "learning_rate": 4.739152784693635e-07, - "loss": 0.2897, + "epoch": 0.72, + "grad_norm": 0.6505888843461077, + "learning_rate": 3.755135893967735e-06, + "loss": 0.2754, "step": 15744 }, { - "epoch": 0.9, - "grad_norm": 0.2161941592051655, - "learning_rate": 4.733493554586777e-07, - "loss": 0.1393, + "epoch": 0.72, + "grad_norm": 0.3332711068042054, + "learning_rate": 3.753973845105231e-06, + "loss": 0.2568, "step": 15745 }, { - "epoch": 0.9, - "grad_norm": 0.359758705296901, - "learning_rate": 4.727837623554954e-07, - "loss": 0.2245, + "epoch": 0.72, + "grad_norm": 0.5501707667046273, + "learning_rate": 3.7528119345237224e-06, + "loss": 0.3566, "step": 15746 }, { - "epoch": 0.9, - "grad_norm": 0.6364944282856332, - "learning_rate": 4.722184991794021e-07, - "loss": 0.368, + "epoch": 0.72, + "grad_norm": 0.294511522410627, + "learning_rate": 3.7516501622489365e-06, + "loss": 0.0985, "step": 15747 }, { - "epoch": 0.9, - "grad_norm": 0.29869548642001714, - "learning_rate": 4.7165356594997215e-07, - "loss": 0.2794, + "epoch": 0.72, + "grad_norm": 0.3988610014438023, + "learning_rate": 3.750488528306598e-06, + "loss": 0.314, "step": 15748 }, { - "epoch": 0.9, - "grad_norm": 0.3514622933369766, - "learning_rate": 4.710889626867687e-07, - "loss": 0.2274, + "epoch": 0.72, + "grad_norm": 0.3690077445223635, + "learning_rate": 3.7493270327224162e-06, + "loss": 0.2936, "step": 15749 }, { - "epoch": 0.9, - "grad_norm": 0.534507202273457, - "learning_rate": 4.7052468940934405e-07, - "loss": 0.2445, + "epoch": 0.72, + "grad_norm": 0.6160611994065603, + "learning_rate": 3.748165675522113e-06, + "loss": 0.2441, "step": 15750 }, { - "epoch": 0.9, - "grad_norm": 0.40165702496329014, - "learning_rate": 4.699607461372413e-07, - "loss": 0.2209, + "epoch": 0.72, + "grad_norm": 0.4135960420662529, + "learning_rate": 3.747004456731389e-06, + "loss": 0.2783, "step": 15751 }, { - "epoch": 0.91, - "grad_norm": 0.29933132482387254, - "learning_rate": 4.6939713288998824e-07, - "loss": 0.2888, + "epoch": 0.72, + "grad_norm": 0.51599501583834, + "learning_rate": 3.745843376375966e-06, + "loss": 0.2443, "step": 15752 }, { - "epoch": 0.91, - "grad_norm": 0.39195373443976395, - "learning_rate": 4.6883384968710146e-07, - "loss": 0.275, + "epoch": 0.72, + "grad_norm": 0.24411267553542826, + "learning_rate": 3.7446824344815437e-06, + "loss": 0.176, "step": 15753 }, { - "epoch": 0.91, - "grad_norm": 0.804794875047898, - "learning_rate": 4.682708965480887e-07, - "loss": 0.447, + "epoch": 0.72, + "grad_norm": 0.41036095577992754, + "learning_rate": 3.74352163107382e-06, + "loss": 0.2969, "step": 15754 }, { - "epoch": 0.91, - "grad_norm": 0.38284619581894447, - "learning_rate": 4.677082734924454e-07, - "loss": 0.2555, + "epoch": 0.72, + "grad_norm": 0.7474440896137018, + "learning_rate": 3.7423609661784965e-06, + "loss": 0.4069, "step": 15755 }, { - "epoch": 0.91, - "grad_norm": 0.34044252869584235, - "learning_rate": 4.67145980539655e-07, - "loss": 0.2351, + "epoch": 0.72, + "grad_norm": 0.7842154866045788, + "learning_rate": 3.7412004398212707e-06, + "loss": 0.3861, "step": 15756 }, { - "epoch": 0.91, - "grad_norm": 0.2561805020204338, - "learning_rate": 4.665840177091885e-07, - "loss": 0.1603, + "epoch": 0.72, + "grad_norm": 0.2918373051885403, + "learning_rate": 3.740040052027838e-06, + "loss": 0.2171, "step": 15757 }, { - "epoch": 0.91, - "grad_norm": 0.3286363690720469, - "learning_rate": 4.66022385020507e-07, - "loss": 0.2712, + "epoch": 0.72, + "grad_norm": 0.35582706141741727, + "learning_rate": 3.7388798028238815e-06, + "loss": 0.2321, "step": 15758 }, { - "epoch": 0.91, - "grad_norm": 0.7834022205802545, - "learning_rate": 4.6546108249306163e-07, - "loss": 0.3245, + "epoch": 0.72, + "grad_norm": 0.5944188150613753, + "learning_rate": 3.7377196922350924e-06, + "loss": 0.2547, "step": 15759 }, { - "epoch": 0.91, - "grad_norm": 0.2849494240717211, - "learning_rate": 4.649001101462891e-07, - "loss": 0.2749, + "epoch": 0.72, + "grad_norm": 0.3155941710886627, + "learning_rate": 3.7365597202871564e-06, + "loss": 0.2007, "step": 15760 }, { - "epoch": 0.91, - "grad_norm": 0.3713613181819242, - "learning_rate": 4.6433946799961605e-07, - "loss": 0.2917, + "epoch": 0.72, + "grad_norm": 0.4152677361671202, + "learning_rate": 3.7353998870057484e-06, + "loss": 0.3229, "step": 15761 }, { - "epoch": 0.91, - "grad_norm": 1.3943161939139344, - "learning_rate": 4.6377915607245583e-07, - "loss": 0.218, + "epoch": 0.72, + "grad_norm": 0.8374256117287706, + "learning_rate": 3.7342401924165516e-06, + "loss": 0.4548, "step": 15762 }, { - "epoch": 0.91, - "grad_norm": 0.35360704239027846, - "learning_rate": 4.6321917438421294e-07, - "loss": 0.2162, + "epoch": 0.72, + "grad_norm": 0.30023624381686526, + "learning_rate": 3.7330806365452355e-06, + "loss": 0.1855, "step": 15763 }, { - "epoch": 0.91, - "grad_norm": 0.29520125023410765, - "learning_rate": 4.626595229542818e-07, - "loss": 0.2779, + "epoch": 0.72, + "grad_norm": 1.3322611881831874, + "learning_rate": 3.7319212194174727e-06, + "loss": 0.5595, "step": 15764 }, { - "epoch": 0.91, - "grad_norm": 0.40025625010104504, - "learning_rate": 4.621002018020404e-07, - "loss": 0.3336, + "epoch": 0.72, + "grad_norm": 0.24675612719136056, + "learning_rate": 3.730761941058938e-06, + "loss": 0.2198, "step": 15765 }, { - "epoch": 0.91, - "grad_norm": 0.18183834726311107, - "learning_rate": 4.615412109468587e-07, - "loss": 0.1206, + "epoch": 0.72, + "grad_norm": 0.32197215281158237, + "learning_rate": 3.7296028014952866e-06, + "loss": 0.2036, "step": 15766 }, { - "epoch": 0.91, - "grad_norm": 0.3861098147618549, - "learning_rate": 4.6098255040809447e-07, - "loss": 0.275, + "epoch": 0.72, + "grad_norm": 0.7283613496903513, + "learning_rate": 3.7284438007521896e-06, + "loss": 0.3575, "step": 15767 }, { - "epoch": 0.91, - "grad_norm": 0.39267454900302395, - "learning_rate": 4.604242202050957e-07, - "loss": 0.2835, + "epoch": 0.72, + "grad_norm": 0.8579840306669612, + "learning_rate": 3.727284938855296e-06, + "loss": 0.4802, "step": 15768 }, { - "epoch": 0.91, - "grad_norm": 0.8010348120564672, - "learning_rate": 4.5986622035719575e-07, - "loss": 0.1828, + "epoch": 0.72, + "grad_norm": 0.322197402899127, + "learning_rate": 3.7261262158302745e-06, + "loss": 0.2619, "step": 15769 }, { - "epoch": 0.91, - "grad_norm": 0.35292055566037195, - "learning_rate": 4.59308550883717e-07, - "loss": 0.2744, + "epoch": 0.72, + "grad_norm": 0.5213025328064339, + "learning_rate": 3.7249676317027683e-06, + "loss": 0.2477, "step": 15770 }, { - "epoch": 0.91, - "grad_norm": 0.44243941838987, - "learning_rate": 4.5875121180397276e-07, - "loss": 0.2978, + "epoch": 0.72, + "grad_norm": 0.4197641753216645, + "learning_rate": 3.723809186498434e-06, + "loss": 0.2342, "step": 15771 }, { - "epoch": 0.91, - "grad_norm": 0.29214452693872744, - "learning_rate": 4.581942031372655e-07, - "loss": 0.2196, + "epoch": 0.72, + "grad_norm": 0.3821969383249988, + "learning_rate": 3.7226508802429118e-06, + "loss": 0.2792, "step": 15772 }, { - "epoch": 0.91, - "grad_norm": 0.3078542986697614, - "learning_rate": 4.5763752490288194e-07, - "loss": 0.242, + "epoch": 0.72, + "grad_norm": 0.3574092542502935, + "learning_rate": 3.7214927129618496e-06, + "loss": 0.2605, "step": 15773 }, { - "epoch": 0.91, - "grad_norm": 0.8844199684923539, - "learning_rate": 4.570811771201e-07, - "loss": 0.4156, + "epoch": 0.72, + "grad_norm": 0.48505651992622695, + "learning_rate": 3.7203346846808898e-06, + "loss": 0.2928, "step": 15774 }, { - "epoch": 0.91, - "grad_norm": 0.6335765581868591, - "learning_rate": 4.5652515980818546e-07, - "loss": 0.3745, + "epoch": 0.72, + "grad_norm": 0.34122618850255426, + "learning_rate": 3.719176795425665e-06, + "loss": 0.2384, "step": 15775 }, { - "epoch": 0.91, - "grad_norm": 0.22674350740370733, - "learning_rate": 4.5596947298639614e-07, - "loss": 0.212, + "epoch": 0.72, + "grad_norm": 0.5318886614037652, + "learning_rate": 3.7180190452218157e-06, + "loss": 0.2524, "step": 15776 }, { - "epoch": 0.91, - "grad_norm": 1.7659474533173485, - "learning_rate": 4.554141166739734e-07, - "loss": 0.5087, + "epoch": 0.72, + "grad_norm": 0.3132705936196789, + "learning_rate": 3.7168614340949672e-06, + "loss": 0.2297, "step": 15777 }, { - "epoch": 0.91, - "grad_norm": 0.2534645385419527, - "learning_rate": 4.548590908901496e-07, - "loss": 0.1883, + "epoch": 0.72, + "grad_norm": 0.3519567583977024, + "learning_rate": 3.71570396207075e-06, + "loss": 0.266, "step": 15778 }, { - "epoch": 0.91, - "grad_norm": 0.283779004129321, - "learning_rate": 4.5430439565414263e-07, - "loss": 0.1839, + "epoch": 0.72, + "grad_norm": 0.7179153877037063, + "learning_rate": 3.7145466291747935e-06, + "loss": 0.3297, "step": 15779 }, { - "epoch": 0.91, - "grad_norm": 0.3476013206398067, - "learning_rate": 4.5375003098516613e-07, - "loss": 0.2717, + "epoch": 0.72, + "grad_norm": 0.34588722640204356, + "learning_rate": 3.7133894354327138e-06, + "loss": 0.233, "step": 15780 }, { - "epoch": 0.91, - "grad_norm": 0.5541988538944898, - "learning_rate": 4.5319599690241576e-07, - "loss": 0.3387, + "epoch": 0.72, + "grad_norm": 0.30956707917714277, + "learning_rate": 3.7122323808701323e-06, + "loss": 0.2683, "step": 15781 }, { - "epoch": 0.91, - "grad_norm": 0.34841705919786675, - "learning_rate": 4.5264229342507736e-07, - "loss": 0.2278, + "epoch": 0.73, + "grad_norm": 1.4853247723345238, + "learning_rate": 3.7110754655126703e-06, + "loss": 0.6, "step": 15782 }, { - "epoch": 0.91, - "grad_norm": 1.2826263441085395, - "learning_rate": 4.5208892057232446e-07, - "loss": 0.6443, + "epoch": 0.73, + "grad_norm": 0.7941554650957486, + "learning_rate": 3.7099186893859317e-06, + "loss": 0.2792, "step": 15783 }, { - "epoch": 0.91, - "grad_norm": 0.25060351873355025, - "learning_rate": 4.515358783633228e-07, - "loss": 0.238, + "epoch": 0.73, + "grad_norm": 0.3490487265792109, + "learning_rate": 3.7087620525155343e-06, + "loss": 0.2705, "step": 15784 }, { - "epoch": 0.91, - "grad_norm": 0.2457619049739712, - "learning_rate": 4.5098316681722266e-07, - "loss": 0.1596, + "epoch": 0.73, + "grad_norm": 0.38084321739689575, + "learning_rate": 3.707605554927074e-06, + "loss": 0.2916, "step": 15785 }, { - "epoch": 0.91, - "grad_norm": 0.7165815667752252, - "learning_rate": 4.5043078595316536e-07, - "loss": 0.3654, + "epoch": 0.73, + "grad_norm": 0.25196420904020306, + "learning_rate": 3.70644919664617e-06, + "loss": 0.1134, "step": 15786 }, { - "epoch": 0.91, - "grad_norm": 0.45627630512144096, - "learning_rate": 4.4987873579027784e-07, - "loss": 0.3349, + "epoch": 0.73, + "grad_norm": 0.4452361526463336, + "learning_rate": 3.7052929776984114e-06, + "loss": 0.265, "step": 15787 }, { - "epoch": 0.91, - "grad_norm": 0.3075011835404249, - "learning_rate": 4.493270163476804e-07, - "loss": 0.2568, + "epoch": 0.73, + "grad_norm": 0.606185339522962, + "learning_rate": 3.704136898109403e-06, + "loss": 0.3266, "step": 15788 }, { - "epoch": 0.91, - "grad_norm": 0.48719044332200573, - "learning_rate": 4.4877562764447766e-07, - "loss": 0.2597, + "epoch": 0.73, + "grad_norm": 0.3560367420634984, + "learning_rate": 3.7029809579047314e-06, + "loss": 0.2319, "step": 15789 }, { - "epoch": 0.91, - "grad_norm": 0.5000417018144054, - "learning_rate": 4.4822456969976444e-07, - "loss": 0.2893, + "epoch": 0.73, + "grad_norm": 0.3799742381189488, + "learning_rate": 3.7018251571099927e-06, + "loss": 0.2596, "step": 15790 }, { - "epoch": 0.91, - "grad_norm": 0.2556509931355065, - "learning_rate": 4.4767384253262326e-07, - "loss": 0.2022, + "epoch": 0.73, + "grad_norm": 0.7120201456841941, + "learning_rate": 3.7006694957507782e-06, + "loss": 0.4296, "step": 15791 }, { - "epoch": 0.91, - "grad_norm": 0.3334183800000595, - "learning_rate": 4.4712344616212433e-07, - "loss": 0.246, + "epoch": 0.73, + "grad_norm": 0.2261287645050398, + "learning_rate": 3.6995139738526662e-06, + "loss": 0.1713, "step": 15792 }, { - "epoch": 0.91, - "grad_norm": 0.978515196075592, - "learning_rate": 4.4657338060733246e-07, - "loss": 0.4858, + "epoch": 0.73, + "grad_norm": 0.36994019557106134, + "learning_rate": 3.6983585914412456e-06, + "loss": 0.2833, "step": 15793 }, { - "epoch": 0.91, - "grad_norm": 0.3399680398954725, - "learning_rate": 4.4602364588729243e-07, - "loss": 0.2275, + "epoch": 0.73, + "grad_norm": 1.2552418355706043, + "learning_rate": 3.697203348542089e-06, + "loss": 0.3983, "step": 15794 }, { - "epoch": 0.91, - "grad_norm": 1.5585273086353948, - "learning_rate": 4.454742420210434e-07, - "loss": 0.3749, + "epoch": 0.73, + "grad_norm": 0.6763381095527005, + "learning_rate": 3.6960482451807757e-06, + "loss": 0.3836, "step": 15795 }, { - "epoch": 0.91, - "grad_norm": 0.3308699135535816, - "learning_rate": 4.44925169027608e-07, - "loss": 0.3088, + "epoch": 0.73, + "grad_norm": 0.3249436654470026, + "learning_rate": 3.694893281382881e-06, + "loss": 0.1999, "step": 15796 }, { - "epoch": 0.91, - "grad_norm": 0.24890530106124995, - "learning_rate": 4.4437642692600534e-07, - "loss": 0.1998, + "epoch": 0.73, + "grad_norm": 0.354363746677199, + "learning_rate": 3.69373845717397e-06, + "loss": 0.2905, "step": 15797 }, { - "epoch": 0.91, - "grad_norm": 0.42886569435392907, - "learning_rate": 4.4382801573523595e-07, - "loss": 0.1926, + "epoch": 0.73, + "grad_norm": 0.43969182656759415, + "learning_rate": 3.69258377257961e-06, + "loss": 0.2683, "step": 15798 }, { - "epoch": 0.91, - "grad_norm": 0.3308619773627193, - "learning_rate": 4.4327993547429225e-07, - "loss": 0.2951, + "epoch": 0.73, + "grad_norm": 0.26544143492733324, + "learning_rate": 3.6914292276253705e-06, + "loss": 0.1584, "step": 15799 }, { - "epoch": 0.91, - "grad_norm": 0.32885301500455993, - "learning_rate": 4.427321861621514e-07, - "loss": 0.2736, + "epoch": 0.73, + "grad_norm": 1.1023576773084665, + "learning_rate": 3.6902748223368044e-06, + "loss": 0.4443, "step": 15800 }, { - "epoch": 0.91, - "grad_norm": 1.1618546884805907, - "learning_rate": 4.4218476781778483e-07, - "loss": 0.5503, + "epoch": 0.73, + "grad_norm": 0.4113269380573955, + "learning_rate": 3.689120556739475e-06, + "loss": 0.2979, "step": 15801 }, { - "epoch": 0.91, - "grad_norm": 0.35167729216980037, - "learning_rate": 4.416376804601508e-07, - "loss": 0.1287, + "epoch": 0.73, + "grad_norm": 0.3405148327620765, + "learning_rate": 3.687966430858928e-06, + "loss": 0.1877, "step": 15802 }, { - "epoch": 0.91, - "grad_norm": 0.3502201067992776, - "learning_rate": 4.410909241081918e-07, - "loss": 0.2668, + "epoch": 0.73, + "grad_norm": 1.3077987249900687, + "learning_rate": 3.6868124447207266e-06, + "loss": 0.7262, "step": 15803 }, { - "epoch": 0.91, - "grad_norm": 0.3340832982352592, - "learning_rate": 4.405444987808405e-07, - "loss": 0.2879, + "epoch": 0.73, + "grad_norm": 0.4129017821500772, + "learning_rate": 3.68565859835041e-06, + "loss": 0.3271, "step": 15804 }, { - "epoch": 0.91, - "grad_norm": 0.43546774821008033, - "learning_rate": 4.39998404497024e-07, - "loss": 0.1536, + "epoch": 0.73, + "grad_norm": 0.24827872916603902, + "learning_rate": 3.6845048917735292e-06, + "loss": 0.1267, "step": 15805 }, { - "epoch": 0.91, - "grad_norm": 0.3112353537779798, - "learning_rate": 4.3945264127565166e-07, - "loss": 0.2528, + "epoch": 0.73, + "grad_norm": 0.37638856557267275, + "learning_rate": 3.6833513250156207e-06, + "loss": 0.2391, "step": 15806 }, { - "epoch": 0.91, - "grad_norm": 0.551693833136559, - "learning_rate": 4.389072091356239e-07, - "loss": 0.3786, + "epoch": 0.73, + "grad_norm": 0.8960439361717315, + "learning_rate": 3.6821978981022245e-06, + "loss": 0.4086, "step": 15807 }, { - "epoch": 0.91, - "grad_norm": 0.49880144907478224, - "learning_rate": 4.383621080958267e-07, - "loss": 0.2335, + "epoch": 0.73, + "grad_norm": 0.7040881807631726, + "learning_rate": 3.6810446110588825e-06, + "loss": 0.3676, "step": 15808 }, { - "epoch": 0.91, - "grad_norm": 0.3142694304193792, - "learning_rate": 4.378173381751394e-07, - "loss": 0.2585, + "epoch": 0.73, + "grad_norm": 0.2917073581584961, + "learning_rate": 3.6798914639111184e-06, + "loss": 0.2337, "step": 15809 }, { - "epoch": 0.91, - "grad_norm": 0.40112648470346424, - "learning_rate": 4.372728993924269e-07, - "loss": 0.266, + "epoch": 0.73, + "grad_norm": 0.3127535072909122, + "learning_rate": 3.6787384566844685e-06, + "loss": 0.1901, "step": 15810 }, { - "epoch": 0.91, - "grad_norm": 0.2930212171494269, - "learning_rate": 4.3672879176654303e-07, - "loss": 0.1907, + "epoch": 0.73, + "grad_norm": 0.4021240546886768, + "learning_rate": 3.6775855894044543e-06, + "loss": 0.2556, "step": 15811 }, { - "epoch": 0.91, - "grad_norm": 0.34939185440615567, - "learning_rate": 4.3618501531632717e-07, - "loss": 0.282, + "epoch": 0.73, + "grad_norm": 0.43761701293147537, + "learning_rate": 3.6764328620966016e-06, + "loss": 0.2507, "step": 15812 }, { - "epoch": 0.91, - "grad_norm": 1.1777285605456114, - "learning_rate": 4.3564157006061535e-07, - "loss": 0.685, + "epoch": 0.73, + "grad_norm": 0.5049424195519188, + "learning_rate": 3.6752802747864337e-06, + "loss": 0.3609, "step": 15813 }, { - "epoch": 0.91, - "grad_norm": 0.7188027543719689, - "learning_rate": 4.3509845601822474e-07, - "loss": 0.4602, + "epoch": 0.73, + "grad_norm": 0.45985937754675626, + "learning_rate": 3.6741278274994605e-06, + "loss": 0.2628, "step": 15814 }, { - "epoch": 0.91, - "grad_norm": 0.30789053770206504, - "learning_rate": 4.3455567320796366e-07, - "loss": 0.216, + "epoch": 0.73, + "grad_norm": 0.5559645561162057, + "learning_rate": 3.6729755202612004e-06, + "loss": 0.2916, "step": 15815 }, { - "epoch": 0.91, - "grad_norm": 0.37512687043786214, - "learning_rate": 4.34013221648627e-07, - "loss": 0.3312, + "epoch": 0.73, + "grad_norm": 0.3815890687164551, + "learning_rate": 3.6718233530971657e-06, + "loss": 0.2996, "step": 15816 }, { - "epoch": 0.91, - "grad_norm": 0.3616392023467653, - "learning_rate": 4.3347110135900094e-07, - "loss": 0.1928, + "epoch": 0.73, + "grad_norm": 0.29075647309702546, + "learning_rate": 3.670671326032865e-06, + "loss": 0.1852, "step": 15817 }, { - "epoch": 0.91, - "grad_norm": 0.31199271737258344, - "learning_rate": 4.329293123578604e-07, - "loss": 0.1853, + "epoch": 0.73, + "grad_norm": 0.4918043179184085, + "learning_rate": 3.6695194390938018e-06, + "loss": 0.249, "step": 15818 }, { - "epoch": 0.91, - "grad_norm": 0.4895600138237852, - "learning_rate": 4.3238785466396596e-07, - "loss": 0.3757, + "epoch": 0.73, + "grad_norm": 0.45891643493861695, + "learning_rate": 3.668367692305469e-06, + "loss": 0.2742, "step": 15819 }, { - "epoch": 0.91, - "grad_norm": 0.45924672686202866, - "learning_rate": 4.318467282960681e-07, - "loss": 0.3507, + "epoch": 0.73, + "grad_norm": 0.32350958090533904, + "learning_rate": 3.667216085693379e-06, + "loss": 0.2479, "step": 15820 }, { - "epoch": 0.91, - "grad_norm": 0.3120934451623118, - "learning_rate": 4.3130593327290637e-07, - "loss": 0.1888, + "epoch": 0.73, + "grad_norm": 0.3703779075935711, + "learning_rate": 3.6660646192830196e-06, + "loss": 0.2964, "step": 15821 }, { - "epoch": 0.91, - "grad_norm": 0.770298360679297, - "learning_rate": 4.307654696132102e-07, - "loss": 0.3835, + "epoch": 0.73, + "grad_norm": 0.2927145114658251, + "learning_rate": 3.6649132930998877e-06, + "loss": 0.1264, "step": 15822 }, { - "epoch": 0.91, - "grad_norm": 0.21490372065099173, - "learning_rate": 4.302253373356935e-07, - "loss": 0.2062, + "epoch": 0.73, + "grad_norm": 0.4550638169654456, + "learning_rate": 3.663762107169466e-06, + "loss": 0.2777, "step": 15823 }, { - "epoch": 0.91, - "grad_norm": 0.2986581235557719, - "learning_rate": 4.296855364590624e-07, - "loss": 0.1844, + "epoch": 0.73, + "grad_norm": 0.60642794675878, + "learning_rate": 3.6626110615172437e-06, + "loss": 0.3405, "step": 15824 }, { - "epoch": 0.91, - "grad_norm": 1.069549265915451, - "learning_rate": 4.2914606700200755e-07, - "loss": 0.7423, + "epoch": 0.73, + "grad_norm": 0.2929962274729631, + "learning_rate": 3.661460156168709e-06, + "loss": 0.1956, "step": 15825 }, { - "epoch": 0.91, - "grad_norm": 0.6599039669777897, - "learning_rate": 4.286069289832151e-07, - "loss": 0.3809, + "epoch": 0.73, + "grad_norm": 0.66972801630549, + "learning_rate": 3.660309391149334e-06, + "loss": 0.3371, "step": 15826 }, { - "epoch": 0.91, - "grad_norm": 0.419239808764103, - "learning_rate": 4.280681224213523e-07, - "loss": 0.2807, + "epoch": 0.73, + "grad_norm": 0.40566993629851367, + "learning_rate": 3.659158766484601e-06, + "loss": 0.309, "step": 15827 }, { - "epoch": 0.91, - "grad_norm": 0.31727020613520757, - "learning_rate": 4.2752964733507984e-07, - "loss": 0.2382, + "epoch": 0.73, + "grad_norm": 0.35935895421850533, + "learning_rate": 3.6580082821999787e-06, + "loss": 0.2473, "step": 15828 }, { - "epoch": 0.91, - "grad_norm": 0.29397819184063156, - "learning_rate": 4.2699150374304275e-07, - "loss": 0.1889, + "epoch": 0.73, + "grad_norm": 0.5976165895307988, + "learning_rate": 3.6568579383209414e-06, + "loss": 0.2809, "step": 15829 }, { - "epoch": 0.91, - "grad_norm": 0.4446204152271284, - "learning_rate": 4.2645369166387727e-07, - "loss": 0.2613, + "epoch": 0.73, + "grad_norm": 0.37345924081679094, + "learning_rate": 3.6557077348729576e-06, + "loss": 0.2964, "step": 15830 }, { - "epoch": 0.91, - "grad_norm": 0.39269983702966865, - "learning_rate": 4.259162111162107e-07, - "loss": 0.2553, + "epoch": 0.73, + "grad_norm": 0.2412155392794255, + "learning_rate": 3.654557671881487e-06, + "loss": 0.0899, "step": 15831 }, { - "epoch": 0.91, - "grad_norm": 0.45515828284942017, - "learning_rate": 4.2537906211865375e-07, - "loss": 0.3144, + "epoch": 0.73, + "grad_norm": 0.3573669701331559, + "learning_rate": 3.6534077493719945e-06, + "loss": 0.2777, "step": 15832 }, { - "epoch": 0.91, - "grad_norm": 0.3112784755800918, - "learning_rate": 4.2484224468980815e-07, - "loss": 0.2632, + "epoch": 0.73, + "grad_norm": 0.3806852589436953, + "learning_rate": 3.6522579673699364e-06, + "loss": 0.3183, "step": 15833 }, { - "epoch": 0.91, - "grad_norm": 1.8587599044679906, - "learning_rate": 4.243057588482624e-07, - "loss": 0.2075, + "epoch": 0.73, + "grad_norm": 0.9189779753233617, + "learning_rate": 3.651108325900773e-06, + "loss": 0.4512, "step": 15834 }, { - "epoch": 0.91, - "grad_norm": 0.23261596393289166, - "learning_rate": 4.237696046125994e-07, - "loss": 0.2007, + "epoch": 0.73, + "grad_norm": 0.40914614001581373, + "learning_rate": 3.6499588249899485e-06, + "loss": 0.2366, "step": 15835 }, { - "epoch": 0.91, - "grad_norm": 0.4171560765483998, - "learning_rate": 4.232337820013821e-07, - "loss": 0.2908, + "epoch": 0.73, + "grad_norm": 0.4038414880021001, + "learning_rate": 3.648809464662919e-06, + "loss": 0.2811, "step": 15836 }, { - "epoch": 0.91, - "grad_norm": 0.6894450007615852, - "learning_rate": 4.226982910331656e-07, - "loss": 0.2665, + "epoch": 0.73, + "grad_norm": 0.2624489219011203, + "learning_rate": 3.6476602449451228e-06, + "loss": 0.203, "step": 15837 }, { - "epoch": 0.91, - "grad_norm": 0.631586559127634, - "learning_rate": 4.2216313172649623e-07, - "loss": 0.3694, + "epoch": 0.73, + "grad_norm": 0.35678308729895347, + "learning_rate": 3.6465111658620067e-06, + "loss": 0.1871, "step": 15838 }, { - "epoch": 0.91, - "grad_norm": 0.3701344195302594, - "learning_rate": 4.2162830409990583e-07, - "loss": 0.2664, + "epoch": 0.73, + "grad_norm": 0.5707720403581364, + "learning_rate": 3.645362227439013e-06, + "loss": 0.387, "step": 15839 }, { - "epoch": 0.91, - "grad_norm": 0.32238862938044927, - "learning_rate": 4.2109380817191626e-07, - "loss": 0.2694, + "epoch": 0.73, + "grad_norm": 0.3789732517908, + "learning_rate": 3.644213429701571e-06, + "loss": 0.2802, "step": 15840 }, { - "epoch": 0.91, - "grad_norm": 0.36861815680269605, - "learning_rate": 4.205596439610349e-07, - "loss": 0.1159, + "epoch": 0.73, + "grad_norm": 0.667087200712018, + "learning_rate": 3.6430647726751187e-06, + "loss": 0.2347, "step": 15841 }, { - "epoch": 0.91, - "grad_norm": 0.3877743415272305, - "learning_rate": 4.2002581148576136e-07, - "loss": 0.2594, + "epoch": 0.73, + "grad_norm": 0.4240838509844322, + "learning_rate": 3.6419162563850886e-06, + "loss": 0.3019, "step": 15842 }, { - "epoch": 0.91, - "grad_norm": 0.35220346095956107, - "learning_rate": 4.194923107645821e-07, - "loss": 0.3235, + "epoch": 0.73, + "grad_norm": 0.24809770756173508, + "learning_rate": 3.640767880856901e-06, + "loss": 0.1566, "step": 15843 }, { - "epoch": 0.91, - "grad_norm": 0.5239046538366025, - "learning_rate": 4.189591418159722e-07, - "loss": 0.2708, + "epoch": 0.73, + "grad_norm": 0.35348859222886875, + "learning_rate": 3.6396196461159874e-06, + "loss": 0.2927, "step": 15844 }, { - "epoch": 0.91, - "grad_norm": 0.39195351825581, - "learning_rate": 4.1842630465839586e-07, - "loss": 0.2784, + "epoch": 0.73, + "grad_norm": 0.3396110022707792, + "learning_rate": 3.638471552187757e-06, + "loss": 0.2166, "step": 15845 }, { - "epoch": 0.91, - "grad_norm": 0.5495615089399805, - "learning_rate": 4.178937993103027e-07, - "loss": 0.3197, + "epoch": 0.73, + "grad_norm": 0.730663059887459, + "learning_rate": 3.6373235990976418e-06, + "loss": 0.3652, "step": 15846 }, { - "epoch": 0.91, - "grad_norm": 0.27379809113010045, - "learning_rate": 4.1736162579013694e-07, - "loss": 0.1906, + "epoch": 0.73, + "grad_norm": 0.917159125747844, + "learning_rate": 3.63617578687105e-06, + "loss": 0.4188, "step": 15847 }, { - "epoch": 0.91, - "grad_norm": 0.2684468644693002, - "learning_rate": 4.16829784116326e-07, - "loss": 0.2042, + "epoch": 0.73, + "grad_norm": 0.2573768433462493, + "learning_rate": 3.63502811553339e-06, + "loss": 0.2218, "step": 15848 }, { - "epoch": 0.91, - "grad_norm": 0.4441215646728875, - "learning_rate": 4.1629827430728743e-07, - "loss": 0.2991, + "epoch": 0.73, + "grad_norm": 0.490454631024778, + "learning_rate": 3.633880585110072e-06, + "loss": 0.227, "step": 15849 }, { - "epoch": 0.91, - "grad_norm": 0.7808231682802714, - "learning_rate": 4.157670963814264e-07, - "loss": 0.3301, + "epoch": 0.73, + "grad_norm": 0.3835325518134792, + "learning_rate": 3.6327331956265035e-06, + "loss": 0.2395, "step": 15850 }, { - "epoch": 0.91, - "grad_norm": 0.2581293927684206, - "learning_rate": 4.1523625035713943e-07, - "loss": 0.2583, + "epoch": 0.73, + "grad_norm": 0.33927108437763553, + "learning_rate": 3.6315859471080874e-06, + "loss": 0.2391, "step": 15851 }, { - "epoch": 0.91, - "grad_norm": 0.6004003120556647, - "learning_rate": 4.147057362528095e-07, - "loss": 0.3379, + "epoch": 0.73, + "grad_norm": 0.37452129282462276, + "learning_rate": 3.630438839580217e-06, + "loss": 0.2818, "step": 15852 }, { - "epoch": 0.91, - "grad_norm": 0.5608318585976009, - "learning_rate": 4.141755540868075e-07, - "loss": 0.2723, + "epoch": 0.73, + "grad_norm": 0.6158234640347121, + "learning_rate": 3.6292918730682948e-06, + "loss": 0.3163, "step": 15853 }, { - "epoch": 0.91, - "grad_norm": 0.3815204124123482, - "learning_rate": 4.1364570387749324e-07, - "loss": 0.213, + "epoch": 0.73, + "grad_norm": 0.36019077752292356, + "learning_rate": 3.6281450475977076e-06, + "loss": 0.2053, "step": 15854 }, { - "epoch": 0.91, - "grad_norm": 0.3536760296474735, - "learning_rate": 4.1311618564321534e-07, - "loss": 0.2938, + "epoch": 0.73, + "grad_norm": 0.2899562078186164, + "learning_rate": 3.6269983631938476e-06, + "loss": 0.2086, "step": 15855 }, { - "epoch": 0.91, - "grad_norm": 0.4356058557524649, - "learning_rate": 4.1258699940231353e-07, - "loss": 0.355, + "epoch": 0.73, + "grad_norm": 0.32998516287621893, + "learning_rate": 3.6258518198821045e-06, + "loss": 0.2588, "step": 15856 }, { - "epoch": 0.91, - "grad_norm": 0.18578229436004529, - "learning_rate": 4.120581451731109e-07, - "loss": 0.0713, + "epoch": 0.73, + "grad_norm": 0.44121283494998537, + "learning_rate": 3.624705417687856e-06, + "loss": 0.3152, "step": 15857 }, { - "epoch": 0.91, - "grad_norm": 0.4081226800618796, - "learning_rate": 4.1152962297392297e-07, - "loss": 0.273, + "epoch": 0.73, + "grad_norm": 0.7894060501039268, + "learning_rate": 3.6235591566364847e-06, + "loss": 0.3089, "step": 15858 }, { - "epoch": 0.91, - "grad_norm": 0.37698659558030323, - "learning_rate": 4.110014328230505e-07, - "loss": 0.3109, + "epoch": 0.73, + "grad_norm": 0.6983215888741472, + "learning_rate": 3.6224130367533715e-06, + "loss": 0.286, "step": 15859 }, { - "epoch": 0.91, - "grad_norm": 0.5250626358401506, - "learning_rate": 4.104735747387867e-07, - "loss": 0.2035, + "epoch": 0.73, + "grad_norm": 0.3412941104745918, + "learning_rate": 3.6212670580638833e-06, + "loss": 0.2805, "step": 15860 }, { - "epoch": 0.91, - "grad_norm": 0.3725837020347488, - "learning_rate": 4.099460487394114e-07, - "loss": 0.3016, + "epoch": 0.73, + "grad_norm": 0.3177221756540167, + "learning_rate": 3.6201212205933976e-06, + "loss": 0.1812, "step": 15861 }, { - "epoch": 0.91, - "grad_norm": 1.2275656487782942, - "learning_rate": 4.09418854843191e-07, - "loss": 0.7537, + "epoch": 0.73, + "grad_norm": 0.9081695792132801, + "learning_rate": 3.618975524367272e-06, + "loss": 0.515, "step": 15862 }, { - "epoch": 0.91, - "grad_norm": 0.2383154149448102, - "learning_rate": 4.0889199306838323e-07, - "loss": 0.2219, + "epoch": 0.73, + "grad_norm": 0.35209521859603016, + "learning_rate": 3.617829969410885e-06, + "loss": 0.2509, "step": 15863 }, { - "epoch": 0.91, - "grad_norm": 0.3139643851031367, - "learning_rate": 4.083654634332335e-07, - "loss": 0.1762, + "epoch": 0.73, + "grad_norm": 0.2877598011211002, + "learning_rate": 3.6166845557495924e-06, + "loss": 0.2371, "step": 15864 }, { - "epoch": 0.91, - "grad_norm": 0.9530704152814369, - "learning_rate": 4.07839265955976e-07, - "loss": 0.4449, + "epoch": 0.73, + "grad_norm": 0.984732311909354, + "learning_rate": 3.615539283408748e-06, + "loss": 0.3628, "step": 15865 }, { - "epoch": 0.91, - "grad_norm": 0.5492062970741735, - "learning_rate": 4.073134006548318e-07, - "loss": 0.2962, + "epoch": 0.73, + "grad_norm": 0.3686174434423414, + "learning_rate": 3.6143941524137125e-06, + "loss": 0.2629, "step": 15866 }, { - "epoch": 0.91, - "grad_norm": 0.25208480398575406, - "learning_rate": 4.06787867548013e-07, - "loss": 0.2353, + "epoch": 0.73, + "grad_norm": 0.8081670212701979, + "learning_rate": 3.6132491627898305e-06, + "loss": 0.2407, "step": 15867 }, { - "epoch": 0.91, - "grad_norm": 1.1304469249273643, - "learning_rate": 4.062626666537162e-07, - "loss": 0.8001, + "epoch": 0.73, + "grad_norm": 0.3653937531059826, + "learning_rate": 3.6121043145624624e-06, + "loss": 0.3286, "step": 15868 }, { - "epoch": 0.91, - "grad_norm": 0.19056843516548483, - "learning_rate": 4.0573779799013226e-07, - "loss": 0.1272, + "epoch": 0.73, + "grad_norm": 0.3950692333407417, + "learning_rate": 3.610959607756944e-06, + "loss": 0.2812, "step": 15869 }, { - "epoch": 0.91, - "grad_norm": 0.3996361879336976, - "learning_rate": 4.0521326157543563e-07, - "loss": 0.2277, + "epoch": 0.73, + "grad_norm": 0.9335088565241662, + "learning_rate": 3.6098150423986267e-06, + "loss": 0.5228, "step": 15870 }, { - "epoch": 0.91, - "grad_norm": 0.3637325778437535, - "learning_rate": 4.046890574277895e-07, - "loss": 0.2886, + "epoch": 0.73, + "grad_norm": 0.2308588434369723, + "learning_rate": 3.608670618512842e-06, + "loss": 0.1352, "step": 15871 }, { - "epoch": 0.91, - "grad_norm": 0.5237881779689043, - "learning_rate": 4.0416518556534944e-07, - "loss": 0.3128, + "epoch": 0.73, + "grad_norm": 0.3041436772183461, + "learning_rate": 3.607526336124929e-06, + "loss": 0.2552, "step": 15872 }, { - "epoch": 0.91, - "grad_norm": 0.3676819772907118, - "learning_rate": 4.0364164600625753e-07, - "loss": 0.2235, + "epoch": 0.73, + "grad_norm": 1.2169527996671472, + "learning_rate": 3.6063821952602252e-06, + "loss": 0.5171, "step": 15873 }, { - "epoch": 0.91, - "grad_norm": 0.47406659233561166, - "learning_rate": 4.0311843876864155e-07, - "loss": 0.3483, + "epoch": 0.73, + "grad_norm": 0.5664899401649722, + "learning_rate": 3.605238195944054e-06, + "loss": 0.2455, "step": 15874 }, { - "epoch": 0.91, - "grad_norm": 0.2780747777411099, - "learning_rate": 4.025955638706203e-07, - "loss": 0.2054, + "epoch": 0.73, + "grad_norm": 0.4388004220364488, + "learning_rate": 3.6040943382017467e-06, + "loss": 0.3057, "step": 15875 }, { - "epoch": 0.91, - "grad_norm": 0.28877513130409865, - "learning_rate": 4.020730213303037e-07, - "loss": 0.2163, + "epoch": 0.73, + "grad_norm": 0.3401544019265543, + "learning_rate": 3.6029506220586285e-06, + "loss": 0.2917, "step": 15876 }, { - "epoch": 0.91, - "grad_norm": 0.6012066973017609, - "learning_rate": 4.015508111657862e-07, - "loss": 0.2896, + "epoch": 0.73, + "grad_norm": 0.14703735860297892, + "learning_rate": 3.601807047540016e-06, + "loss": 0.07, "step": 15877 }, { - "epoch": 0.91, - "grad_norm": 0.77096149159783, - "learning_rate": 4.0102893339515e-07, - "loss": 0.3788, + "epoch": 0.73, + "grad_norm": 0.4377622607620792, + "learning_rate": 3.6006636146712304e-06, + "loss": 0.2907, "step": 15878 }, { - "epoch": 0.91, - "grad_norm": 0.24983255991293724, - "learning_rate": 4.005073880364696e-07, - "loss": 0.2577, + "epoch": 0.73, + "grad_norm": 1.0200807972955561, + "learning_rate": 3.599520323477579e-06, + "loss": 0.465, "step": 15879 }, { - "epoch": 0.91, - "grad_norm": 0.8911082860363854, - "learning_rate": 3.999861751078049e-07, - "loss": 0.5244, + "epoch": 0.73, + "grad_norm": 0.33282652038509297, + "learning_rate": 3.5983771739843855e-06, + "loss": 0.2438, "step": 15880 }, { - "epoch": 0.91, - "grad_norm": 0.21183230763143193, - "learning_rate": 3.994652946272071e-07, - "loss": 0.1423, + "epoch": 0.73, + "grad_norm": 0.31979100952939926, + "learning_rate": 3.5972341662169473e-06, + "loss": 0.2605, "step": 15881 }, { - "epoch": 0.91, - "grad_norm": 0.39131821841894676, - "learning_rate": 3.989447466127128e-07, - "loss": 0.2829, + "epoch": 0.73, + "grad_norm": 0.4663040668798485, + "learning_rate": 3.596091300200578e-06, + "loss": 0.2674, "step": 15882 }, { - "epoch": 0.91, - "grad_norm": 0.32030932741388807, - "learning_rate": 3.984245310823498e-07, - "loss": 0.2383, + "epoch": 0.73, + "grad_norm": 0.4439548926474651, + "learning_rate": 3.594948575960574e-06, + "loss": 0.2242, "step": 15883 }, { - "epoch": 0.91, - "grad_norm": 0.5922727533151578, - "learning_rate": 3.9790464805413044e-07, - "loss": 0.3336, + "epoch": 0.73, + "grad_norm": 0.24975242026354447, + "learning_rate": 3.593805993522229e-06, + "loss": 0.2202, "step": 15884 }, { - "epoch": 0.91, - "grad_norm": 0.3930583813091873, - "learning_rate": 3.973850975460614e-07, - "loss": 0.33, + "epoch": 0.73, + "grad_norm": 0.8738545750893891, + "learning_rate": 3.592663552910852e-06, + "loss": 0.4477, "step": 15885 }, { - "epoch": 0.91, - "grad_norm": 0.4788539037882134, - "learning_rate": 3.9686587957613377e-07, - "loss": 0.2603, + "epoch": 0.73, + "grad_norm": 0.6513038967583082, + "learning_rate": 3.5915212541517253e-06, + "loss": 0.3944, "step": 15886 }, { - "epoch": 0.91, - "grad_norm": 0.2870566855876852, - "learning_rate": 3.963469941623288e-07, - "loss": 0.2128, + "epoch": 0.73, + "grad_norm": 0.3108793223415059, + "learning_rate": 3.5903790972701445e-06, + "loss": 0.2033, "step": 15887 }, { - "epoch": 0.91, - "grad_norm": 0.2673197346089193, - "learning_rate": 3.958284413226121e-07, - "loss": 0.2114, + "epoch": 0.73, + "grad_norm": 0.3776517319181402, + "learning_rate": 3.589237082291389e-06, + "loss": 0.2905, "step": 15888 }, { - "epoch": 0.91, - "grad_norm": 0.5354140355201578, - "learning_rate": 3.9531022107494486e-07, - "loss": 0.3267, + "epoch": 0.73, + "grad_norm": 0.2707656722460186, + "learning_rate": 3.588095209240746e-06, + "loss": 0.1451, "step": 15889 }, { - "epoch": 0.91, - "grad_norm": 0.35721314864795733, - "learning_rate": 3.9479233343727165e-07, - "loss": 0.1804, + "epoch": 0.73, + "grad_norm": 0.30505701099863564, + "learning_rate": 3.586953478143499e-06, + "loss": 0.1938, "step": 15890 }, { - "epoch": 0.91, - "grad_norm": 0.28916473867304665, - "learning_rate": 3.9427477842752693e-07, - "loss": 0.2645, + "epoch": 0.73, + "grad_norm": 0.7868764213755611, + "learning_rate": 3.585811889024917e-06, + "loss": 0.3981, "step": 15891 }, { - "epoch": 0.91, - "grad_norm": 1.0814982506679522, - "learning_rate": 3.9375755606363306e-07, - "loss": 0.6821, + "epoch": 0.73, + "grad_norm": 0.3709444659775492, + "learning_rate": 3.5846704419102783e-06, + "loss": 0.3149, "step": 15892 }, { - "epoch": 0.91, - "grad_norm": 0.4496409680340063, - "learning_rate": 3.9324066636350136e-07, - "loss": 0.1017, + "epoch": 0.73, + "grad_norm": 0.3353333619229525, + "learning_rate": 3.583529136824856e-06, + "loss": 0.1954, "step": 15893 }, { - "epoch": 0.91, - "grad_norm": 0.3384135104379383, - "learning_rate": 3.92724109345034e-07, - "loss": 0.2612, + "epoch": 0.73, + "grad_norm": 1.1488152496581867, + "learning_rate": 3.5823879737939114e-06, + "loss": 0.5255, "step": 15894 }, { - "epoch": 0.91, - "grad_norm": 0.2919069332707678, - "learning_rate": 3.922078850261168e-07, - "loss": 0.2677, + "epoch": 0.73, + "grad_norm": 0.2533530937268239, + "learning_rate": 3.581246952842714e-06, + "loss": 0.1876, "step": 15895 }, { - "epoch": 0.91, - "grad_norm": 1.075665763003214, - "learning_rate": 3.9169199342462774e-07, - "loss": 0.1625, + "epoch": 0.73, + "grad_norm": 0.386237148308448, + "learning_rate": 3.58010607399652e-06, + "loss": 0.2882, "step": 15896 }, { - "epoch": 0.91, - "grad_norm": 0.3534241016716571, - "learning_rate": 3.9117643455843016e-07, - "loss": 0.2953, + "epoch": 0.73, + "grad_norm": 0.5358586153510079, + "learning_rate": 3.5789653372805897e-06, + "loss": 0.2635, "step": 15897 }, { - "epoch": 0.91, - "grad_norm": 0.5174200198693831, - "learning_rate": 3.906612084453809e-07, - "loss": 0.3611, + "epoch": 0.73, + "grad_norm": 0.8312343062887351, + "learning_rate": 3.5778247427201784e-06, + "loss": 0.4185, "step": 15898 }, { - "epoch": 0.91, - "grad_norm": 0.408525886252803, - "learning_rate": 3.9014631510332135e-07, - "loss": 0.2211, + "epoch": 0.73, + "grad_norm": 0.34437869024038903, + "learning_rate": 3.5766842903405407e-06, + "loss": 0.2624, "step": 15899 }, { - "epoch": 0.91, - "grad_norm": 0.396189831826831, - "learning_rate": 3.896317545500805e-07, - "loss": 0.2782, + "epoch": 0.73, + "grad_norm": 0.35466906186404656, + "learning_rate": 3.575543980166919e-06, + "loss": 0.2505, "step": 15900 }, { - "epoch": 0.91, - "grad_norm": 0.29171815179801414, - "learning_rate": 3.8911752680347857e-07, - "loss": 0.1854, + "epoch": 0.73, + "grad_norm": 0.25668708925688655, + "learning_rate": 3.5744038122245606e-06, + "loss": 0.143, "step": 15901 }, { - "epoch": 0.91, - "grad_norm": 0.3603406957800757, - "learning_rate": 3.8860363188132356e-07, - "loss": 0.2891, + "epoch": 0.73, + "grad_norm": 0.3516455352547877, + "learning_rate": 3.5732637865387133e-06, + "loss": 0.2559, "step": 15902 }, { - "epoch": 0.91, - "grad_norm": 0.2890968051915757, - "learning_rate": 3.880900698014134e-07, - "loss": 0.1957, + "epoch": 0.73, + "grad_norm": 0.4789143868316892, + "learning_rate": 3.5721239031346067e-06, + "loss": 0.2743, "step": 15903 }, { - "epoch": 0.91, - "grad_norm": 1.2876865594992615, - "learning_rate": 3.8757684058152947e-07, - "loss": 0.7166, + "epoch": 0.73, + "grad_norm": 0.5427761939456859, + "learning_rate": 3.5709841620374864e-06, + "loss": 0.3668, "step": 15904 }, { - "epoch": 0.91, - "grad_norm": 0.602658793155093, - "learning_rate": 3.8706394423944524e-07, - "loss": 0.3449, + "epoch": 0.73, + "grad_norm": 0.3519099646132903, + "learning_rate": 3.5698445632725766e-06, + "loss": 0.248, "step": 15905 }, { - "epoch": 0.91, - "grad_norm": 0.34185570155092393, - "learning_rate": 3.8655138079292444e-07, - "loss": 0.2216, + "epoch": 0.73, + "grad_norm": 1.2820194999232484, + "learning_rate": 3.5687051068651102e-06, + "loss": 0.3294, "step": 15906 }, { - "epoch": 0.91, - "grad_norm": 0.2891611853218796, - "learning_rate": 3.8603915025971605e-07, - "loss": 0.2511, + "epoch": 0.73, + "grad_norm": 0.24558789546550086, + "learning_rate": 3.5675657928403185e-06, + "loss": 0.2165, "step": 15907 }, { - "epoch": 0.91, - "grad_norm": 0.3595831908817427, - "learning_rate": 3.855272526575582e-07, - "loss": 0.2175, + "epoch": 0.73, + "grad_norm": 0.3334280296589789, + "learning_rate": 3.5664266212234157e-06, + "loss": 0.2321, "step": 15908 }, { - "epoch": 0.91, - "grad_norm": 0.31805676469838995, - "learning_rate": 3.8501568800417663e-07, - "loss": 0.2128, + "epoch": 0.73, + "grad_norm": 0.7626541481911457, + "learning_rate": 3.565287592039628e-06, + "loss": 0.3785, "step": 15909 }, { - "epoch": 0.91, - "grad_norm": 0.3700899135798883, - "learning_rate": 3.845044563172895e-07, - "loss": 0.3155, + "epoch": 0.73, + "grad_norm": 0.7874453980946099, + "learning_rate": 3.564148705314171e-06, + "loss": 0.3373, "step": 15910 }, { - "epoch": 0.91, - "grad_norm": 0.6586535137821252, - "learning_rate": 3.8399355761460036e-07, - "loss": 0.3655, + "epoch": 0.73, + "grad_norm": 0.4313019464585188, + "learning_rate": 3.5630099610722613e-06, + "loss": 0.2782, "step": 15911 }, { - "epoch": 0.91, - "grad_norm": 0.3068097109261226, - "learning_rate": 3.8348299191380057e-07, - "loss": 0.2049, + "epoch": 0.73, + "grad_norm": 0.32402061692861933, + "learning_rate": 3.5618713593391076e-06, + "loss": 0.3103, "step": 15912 }, { - "epoch": 0.91, - "grad_norm": 0.288649576944417, - "learning_rate": 3.8297275923256936e-07, - "loss": 0.1484, + "epoch": 0.73, + "grad_norm": 0.5398902674076549, + "learning_rate": 3.5607329001399137e-06, + "loss": 0.1864, "step": 15913 }, { - "epoch": 0.91, - "grad_norm": 0.3339644155133282, - "learning_rate": 3.824628595885793e-07, - "loss": 0.2961, + "epoch": 0.73, + "grad_norm": 0.3972068529717908, + "learning_rate": 3.5595945834998868e-06, + "loss": 0.2455, "step": 15914 }, { - "epoch": 0.91, - "grad_norm": 0.3166963614134166, - "learning_rate": 3.8195329299948737e-07, - "loss": 0.2589, + "epoch": 0.73, + "grad_norm": 0.33091121159075143, + "learning_rate": 3.5584564094442286e-06, + "loss": 0.2495, "step": 15915 }, { - "epoch": 0.91, - "grad_norm": 0.8291946937549082, - "learning_rate": 3.814440594829394e-07, - "loss": 0.3325, + "epoch": 0.73, + "grad_norm": 0.5081384629121154, + "learning_rate": 3.55731837799814e-06, + "loss": 0.2561, "step": 15916 }, { - "epoch": 0.91, - "grad_norm": 0.6188592393447846, - "learning_rate": 3.8093515905656797e-07, - "loss": 0.3916, + "epoch": 0.73, + "grad_norm": 0.34233883619897165, + "learning_rate": 3.55618048918681e-06, + "loss": 0.2556, "step": 15917 }, { - "epoch": 0.91, - "grad_norm": 0.3410052252876215, - "learning_rate": 3.804265917380001e-07, - "loss": 0.2626, + "epoch": 0.73, + "grad_norm": 1.0241393751380483, + "learning_rate": 3.555042743035434e-06, + "loss": 0.4684, "step": 15918 }, { - "epoch": 0.91, - "grad_norm": 0.3440402045586406, - "learning_rate": 3.7991835754484616e-07, - "loss": 0.224, + "epoch": 0.73, + "grad_norm": 0.3529665680172745, + "learning_rate": 3.5539051395692024e-06, + "loss": 0.2501, "step": 15919 }, { - "epoch": 0.91, - "grad_norm": 0.2800763142332023, - "learning_rate": 3.794104564947054e-07, - "loss": 0.1761, + "epoch": 0.73, + "grad_norm": 0.34517162234723736, + "learning_rate": 3.5527676788132947e-06, + "loss": 0.2686, "step": 15920 }, { - "epoch": 0.91, - "grad_norm": 0.31545824296470104, - "learning_rate": 3.789028886051671e-07, - "loss": 0.2512, + "epoch": 0.73, + "grad_norm": 0.36500721147743675, + "learning_rate": 3.5516303607929004e-06, + "loss": 0.1955, "step": 15921 }, { - "epoch": 0.91, - "grad_norm": 0.5130003457284722, - "learning_rate": 3.7839565389380606e-07, - "loss": 0.2595, + "epoch": 0.73, + "grad_norm": 1.4273199284142934, + "learning_rate": 3.5504931855331914e-06, + "loss": 0.6747, "step": 15922 }, { - "epoch": 0.91, - "grad_norm": 0.4206333487665775, - "learning_rate": 3.7788875237819156e-07, - "loss": 0.3264, + "epoch": 0.73, + "grad_norm": 0.33890116153177063, + "learning_rate": 3.5493561530593477e-06, + "loss": 0.2064, "step": 15923 }, { - "epoch": 0.91, - "grad_norm": 0.4075238314358624, - "learning_rate": 3.7738218407587514e-07, - "loss": 0.2339, + "epoch": 0.73, + "grad_norm": 0.37412767685142456, + "learning_rate": 3.548219263396544e-06, + "loss": 0.32, "step": 15924 }, { - "epoch": 0.91, - "grad_norm": 0.40120391979014103, - "learning_rate": 3.768759490044005e-07, - "loss": 0.1618, + "epoch": 0.73, + "grad_norm": 0.8452482295136036, + "learning_rate": 3.547082516569945e-06, + "loss": 0.4046, "step": 15925 }, { - "epoch": 0.92, - "grad_norm": 0.24959491992631752, - "learning_rate": 3.76370047181297e-07, - "loss": 0.2329, + "epoch": 0.73, + "grad_norm": 0.37840655645816307, + "learning_rate": 3.5459459126047226e-06, + "loss": 0.2189, "step": 15926 }, { - "epoch": 0.92, - "grad_norm": 0.35098746815589077, - "learning_rate": 3.7586447862408617e-07, - "loss": 0.2625, + "epoch": 0.73, + "grad_norm": 0.3555086220862457, + "learning_rate": 3.544809451526031e-06, + "loss": 0.2382, "step": 15927 }, { - "epoch": 0.92, - "grad_norm": 0.6981973004763733, - "learning_rate": 3.7535924335027396e-07, - "loss": 0.4138, + "epoch": 0.73, + "grad_norm": 0.4050517731528359, + "learning_rate": 3.5436731333590423e-06, + "loss": 0.2529, "step": 15928 }, { - "epoch": 0.92, - "grad_norm": 0.7501774342098396, - "learning_rate": 3.7485434137735754e-07, - "loss": 0.2783, + "epoch": 0.73, + "grad_norm": 0.404287575542151, + "learning_rate": 3.5425369581289082e-06, + "loss": 0.1704, "step": 15929 }, { - "epoch": 0.92, - "grad_norm": 0.3149510966890482, - "learning_rate": 3.743497727228207e-07, - "loss": 0.2552, + "epoch": 0.73, + "grad_norm": 0.5933353685389976, + "learning_rate": 3.5414009258607794e-06, + "loss": 0.3572, "step": 15930 }, { - "epoch": 0.92, - "grad_norm": 0.3666851503761762, - "learning_rate": 3.738455374041372e-07, - "loss": 0.3152, + "epoch": 0.73, + "grad_norm": 0.45358261516233567, + "learning_rate": 3.5402650365798085e-06, + "loss": 0.2862, "step": 15931 }, { - "epoch": 0.92, - "grad_norm": 0.1810162314400088, - "learning_rate": 3.7334163543876977e-07, - "loss": 0.098, + "epoch": 0.73, + "grad_norm": 0.4437945034926596, + "learning_rate": 3.539129290311144e-06, + "loss": 0.197, "step": 15932 }, { - "epoch": 0.92, - "grad_norm": 0.304726241285453, - "learning_rate": 3.7283806684416777e-07, - "loss": 0.259, + "epoch": 0.73, + "grad_norm": 0.41548132994529546, + "learning_rate": 3.5379936870799327e-06, + "loss": 0.2435, "step": 15933 }, { - "epoch": 0.92, - "grad_norm": 0.3382470948180131, - "learning_rate": 3.723348316377695e-07, - "loss": 0.2932, + "epoch": 0.73, + "grad_norm": 0.5770255063890887, + "learning_rate": 3.5368582269113107e-06, + "loss": 0.3, "step": 15934 }, { - "epoch": 0.92, - "grad_norm": 0.8426507956023479, - "learning_rate": 3.718319298369999e-07, - "loss": 0.3081, + "epoch": 0.73, + "grad_norm": 0.4182096740428568, + "learning_rate": 3.535722909830417e-06, + "loss": 0.3041, "step": 15935 }, { - "epoch": 0.92, - "grad_norm": 0.35343634631778686, - "learning_rate": 3.7132936145927835e-07, - "loss": 0.2407, + "epoch": 0.73, + "grad_norm": 0.8069506946065207, + "learning_rate": 3.5345877358623914e-06, + "loss": 0.2516, "step": 15936 }, { - "epoch": 0.92, - "grad_norm": 1.545706171785692, - "learning_rate": 3.708271265220087e-07, - "loss": 0.5846, + "epoch": 0.73, + "grad_norm": 0.773040329493093, + "learning_rate": 3.5334527050323596e-06, + "loss": 0.3747, "step": 15937 }, { - "epoch": 0.92, - "grad_norm": 0.2584584444218997, - "learning_rate": 3.703252250425782e-07, - "loss": 0.2462, - "step": 15938 + "epoch": 0.73, + "grad_norm": 0.45323681551243933, + "learning_rate": 3.5323178173654547e-06, + "loss": 0.2993, + "step": 15938 }, { - "epoch": 0.92, - "grad_norm": 0.28528019760679263, - "learning_rate": 3.6982365703837286e-07, - "loss": 0.2056, + "epoch": 0.73, + "grad_norm": 0.29006369350188915, + "learning_rate": 3.5311830728867967e-06, + "loss": 0.176, "step": 15939 }, { - "epoch": 0.92, - "grad_norm": 0.6252324364531351, - "learning_rate": 3.6932242252675997e-07, - "loss": 0.3615, + "epoch": 0.73, + "grad_norm": 0.341647903831361, + "learning_rate": 3.53004847162151e-06, + "loss": 0.2508, "step": 15940 }, { - "epoch": 0.92, - "grad_norm": 0.5203778132134718, - "learning_rate": 3.6882152152509674e-07, - "loss": 0.2695, + "epoch": 0.73, + "grad_norm": 0.3910270778763466, + "learning_rate": 3.5289140135947185e-06, + "loss": 0.2877, "step": 15941 }, { - "epoch": 0.92, - "grad_norm": 0.28609701313106567, - "learning_rate": 3.683209540507304e-07, - "loss": 0.1995, + "epoch": 0.73, + "grad_norm": 0.5607472076924694, + "learning_rate": 3.5277796988315303e-06, + "loss": 0.2466, "step": 15942 }, { - "epoch": 0.92, - "grad_norm": 0.4592887764684259, - "learning_rate": 3.678207201209949e-07, - "loss": 0.2873, + "epoch": 0.73, + "grad_norm": 0.42549760041008766, + "learning_rate": 3.5266455273570654e-06, + "loss": 0.3073, "step": 15943 }, { - "epoch": 0.92, - "grad_norm": 0.4645147472870708, - "learning_rate": 3.673208197532152e-07, - "loss": 0.2766, + "epoch": 0.73, + "grad_norm": 0.4456841470349993, + "learning_rate": 3.525511499196422e-06, + "loss": 0.2848, "step": 15944 }, { - "epoch": 0.92, - "grad_norm": 0.34419848756733823, - "learning_rate": 3.6682125296469973e-07, - "loss": 0.1808, + "epoch": 0.73, + "grad_norm": 0.5814306663680847, + "learning_rate": 3.524377614374721e-06, + "loss": 0.2171, "step": 15945 }, { - "epoch": 0.92, - "grad_norm": 0.3378083035715725, - "learning_rate": 3.6632201977275126e-07, - "loss": 0.3049, + "epoch": 0.73, + "grad_norm": 0.262207270275035, + "learning_rate": 3.523243872917055e-06, + "loss": 0.1867, "step": 15946 }, { - "epoch": 0.92, - "grad_norm": 0.45811410825743665, - "learning_rate": 3.658231201946549e-07, - "loss": 0.2619, + "epoch": 0.73, + "grad_norm": 0.3642399082130117, + "learning_rate": 3.5221102748485304e-06, + "loss": 0.2797, "step": 15947 }, { - "epoch": 0.92, - "grad_norm": 0.28931462287240073, - "learning_rate": 3.6532455424769133e-07, - "loss": 0.1857, + "epoch": 0.73, + "grad_norm": 0.3555925069991194, + "learning_rate": 3.5209768201942374e-06, + "loss": 0.2728, "step": 15948 }, { - "epoch": 0.92, - "grad_norm": 1.285888454753432, - "learning_rate": 3.6482632194912436e-07, - "loss": 0.4671, + "epoch": 0.73, + "grad_norm": 0.7754554940258331, + "learning_rate": 3.5198435089792726e-06, + "loss": 0.314, "step": 15949 }, { - "epoch": 0.92, - "grad_norm": 0.36370191667119234, - "learning_rate": 3.64328423316207e-07, - "loss": 0.3186, + "epoch": 0.73, + "grad_norm": 0.7419342943759069, + "learning_rate": 3.5187103412287302e-06, + "loss": 0.3106, "step": 15950 }, { - "epoch": 0.92, - "grad_norm": 0.39727285401672247, - "learning_rate": 3.638308583661809e-07, - "loss": 0.2611, + "epoch": 0.73, + "grad_norm": 0.3501436320336586, + "learning_rate": 3.517577316967692e-06, + "loss": 0.2735, "step": 15951 }, { - "epoch": 0.92, - "grad_norm": 0.42335563840042184, - "learning_rate": 3.633336271162791e-07, - "loss": 0.2897, + "epoch": 0.73, + "grad_norm": 0.2506862773427205, + "learning_rate": 3.5164444362212435e-06, + "loss": 0.1598, "step": 15952 }, { - "epoch": 0.92, - "grad_norm": 0.32788117129585664, - "learning_rate": 3.6283672958371987e-07, - "loss": 0.1735, + "epoch": 0.73, + "grad_norm": 0.5777289646120276, + "learning_rate": 3.5153116990144697e-06, + "loss": 0.3273, "step": 15953 }, { - "epoch": 0.92, - "grad_norm": 0.2670674166828153, - "learning_rate": 3.623401657857095e-07, - "loss": 0.2516, + "epoch": 0.73, + "grad_norm": 0.6083268172832598, + "learning_rate": 3.5141791053724405e-06, + "loss": 0.3162, "step": 15954 }, { - "epoch": 0.92, - "grad_norm": 0.5172019125312697, - "learning_rate": 3.618439357394443e-07, - "loss": 0.2023, + "epoch": 0.73, + "grad_norm": 0.3705204969192318, + "learning_rate": 3.513046655320239e-06, + "loss": 0.2478, "step": 15955 }, { - "epoch": 0.92, - "grad_norm": 0.765943640524781, - "learning_rate": 3.613480394621094e-07, - "loss": 0.3744, + "epoch": 0.73, + "grad_norm": 0.6889725031008458, + "learning_rate": 3.51191434888293e-06, + "loss": 0.3799, "step": 15956 }, { - "epoch": 0.92, - "grad_norm": 0.3254959834274066, - "learning_rate": 3.608524769708788e-07, - "loss": 0.2652, + "epoch": 0.73, + "grad_norm": 0.46082763960038525, + "learning_rate": 3.510782186085583e-06, + "loss": 0.3349, "step": 15957 }, { - "epoch": 0.92, - "grad_norm": 0.29855816184102585, - "learning_rate": 3.6035724828291096e-07, - "loss": 0.2446, + "epoch": 0.73, + "grad_norm": 0.2800832968841728, + "learning_rate": 3.509650166953267e-06, + "loss": 0.1424, "step": 15958 }, { - "epoch": 0.92, - "grad_norm": 0.28100532394478656, - "learning_rate": 3.598623534153578e-07, - "loss": 0.2126, + "epoch": 0.73, + "grad_norm": 0.2673179504775796, + "learning_rate": 3.5085182915110373e-06, + "loss": 0.27, "step": 15959 }, { - "epoch": 0.92, - "grad_norm": 0.42355858312063266, - "learning_rate": 3.593677923853556e-07, - "loss": 0.3086, + "epoch": 0.73, + "grad_norm": 0.522675125364779, + "learning_rate": 3.507386559783961e-06, + "loss": 0.3266, "step": 15960 }, { - "epoch": 0.92, - "grad_norm": 0.5485094544149791, - "learning_rate": 3.5887356521003283e-07, - "loss": 0.2255, + "epoch": 0.73, + "grad_norm": 0.49469698159294917, + "learning_rate": 3.5062549717970796e-06, + "loss": 0.3019, "step": 15961 }, { - "epoch": 0.92, - "grad_norm": 0.3308684754469159, - "learning_rate": 3.583796719065047e-07, - "loss": 0.3017, + "epoch": 0.73, + "grad_norm": 0.41181943279439326, + "learning_rate": 3.5051235275754623e-06, + "loss": 0.1775, "step": 15962 }, { - "epoch": 0.92, - "grad_norm": 0.5477406568092534, - "learning_rate": 3.578861124918731e-07, - "loss": 0.3118, + "epoch": 0.73, + "grad_norm": 0.39701158716181434, + "learning_rate": 3.5039922271441473e-06, + "loss": 0.2975, "step": 15963 }, { - "epoch": 0.92, - "grad_norm": 0.37672975801509384, - "learning_rate": 3.5739288698323107e-07, - "loss": 0.3116, + "epoch": 0.73, + "grad_norm": 0.5182699046311277, + "learning_rate": 3.5028610705281864e-06, + "loss": 0.3525, "step": 15964 }, { - "epoch": 0.92, - "grad_norm": 0.20393972677598093, - "learning_rate": 3.568999953976582e-07, - "loss": 0.154, + "epoch": 0.73, + "grad_norm": 0.6098331511668046, + "learning_rate": 3.501730057752616e-06, + "loss": 0.2617, "step": 15965 }, { - "epoch": 0.92, - "grad_norm": 0.3064175262555501, - "learning_rate": 3.564074377522253e-07, - "loss": 0.2573, + "epoch": 0.73, + "grad_norm": 0.37395907463308353, + "learning_rate": 3.5005991888424793e-06, + "loss": 0.2713, "step": 15966 }, { - "epoch": 0.92, - "grad_norm": 0.5742413490053281, - "learning_rate": 3.5591521406398654e-07, - "loss": 0.3097, + "epoch": 0.73, + "grad_norm": 0.23326878257361724, + "learning_rate": 3.4994684638228148e-06, + "loss": 0.2099, "step": 15967 }, { - "epoch": 0.92, - "grad_norm": 0.5365842583125046, - "learning_rate": 3.554233243499894e-07, - "loss": 0.2825, + "epoch": 0.73, + "grad_norm": 0.6929078652579116, + "learning_rate": 3.498337882718651e-06, + "loss": 0.132, "step": 15968 }, { - "epoch": 0.92, - "grad_norm": 0.43114460143022276, - "learning_rate": 3.5493176862726794e-07, - "loss": 0.253, + "epoch": 0.73, + "grad_norm": 0.4225568386414838, + "learning_rate": 3.49720744555502e-06, + "loss": 0.2847, "step": 15969 }, { - "epoch": 0.92, - "grad_norm": 0.2924940242715081, - "learning_rate": 3.5444054691284535e-07, - "loss": 0.2877, + "epoch": 0.73, + "grad_norm": 0.5077890939177351, + "learning_rate": 3.4960771523569515e-06, + "loss": 0.3473, "step": 15970 }, { - "epoch": 0.92, - "grad_norm": 0.2374504232369525, - "learning_rate": 3.539496592237335e-07, - "loss": 0.1154, + "epoch": 0.73, + "grad_norm": 0.4318510997081486, + "learning_rate": 3.4949470031494625e-06, + "loss": 0.3059, "step": 15971 }, { - "epoch": 0.92, - "grad_norm": 0.2552973385212853, - "learning_rate": 3.5345910557692655e-07, - "loss": 0.1876, + "epoch": 0.73, + "grad_norm": 0.34764467668991683, + "learning_rate": 3.493816997957582e-06, + "loss": 0.2362, "step": 15972 }, { - "epoch": 0.92, - "grad_norm": 0.4709639877224451, - "learning_rate": 3.529688859894176e-07, - "loss": 0.3019, + "epoch": 0.73, + "grad_norm": 0.30149092289962215, + "learning_rate": 3.4926871368063177e-06, + "loss": 0.2097, "step": 15973 }, { - "epoch": 0.92, - "grad_norm": 0.3310973746232077, - "learning_rate": 3.5247900047818193e-07, - "loss": 0.2604, + "epoch": 0.73, + "grad_norm": 0.8910870102133231, + "learning_rate": 3.491557419720689e-06, + "loss": 0.3643, "step": 15974 }, { - "epoch": 0.92, - "grad_norm": 0.43855069686197606, - "learning_rate": 3.5198944906018273e-07, - "loss": 0.269, + "epoch": 0.73, + "grad_norm": 0.26275790862669485, + "learning_rate": 3.4904278467257057e-06, + "loss": 0.2216, "step": 15975 }, { - "epoch": 0.92, - "grad_norm": 0.5369183319927688, - "learning_rate": 3.5150023175237303e-07, - "loss": 0.3707, + "epoch": 0.73, + "grad_norm": 0.931847972943403, + "learning_rate": 3.4892984178463797e-06, + "loss": 0.4693, "step": 15976 }, { - "epoch": 0.92, - "grad_norm": 0.3176682852295683, - "learning_rate": 3.5101134857169704e-07, - "loss": 0.2594, + "epoch": 0.73, + "grad_norm": 0.6117167828880529, + "learning_rate": 3.4881691331077117e-06, + "loss": 0.3512, "step": 15977 }, { - "epoch": 0.92, - "grad_norm": 0.24078063210633607, - "learning_rate": 3.505227995350824e-07, - "loss": 0.1773, + "epoch": 0.73, + "grad_norm": 0.3472049775735102, + "learning_rate": 3.4870399925346955e-06, + "loss": 0.2012, "step": 15978 }, { - "epoch": 0.92, - "grad_norm": 0.5265718725264137, - "learning_rate": 3.5003458465944884e-07, - "loss": 0.3234, + "epoch": 0.73, + "grad_norm": 0.27577459534910365, + "learning_rate": 3.485910996152344e-06, + "loss": 0.2341, "step": 15979 }, { - "epoch": 0.92, - "grad_norm": 0.667710297637546, - "learning_rate": 3.495467039617018e-07, - "loss": 0.3737, + "epoch": 0.73, + "grad_norm": 0.39745666577191496, + "learning_rate": 3.484782143985641e-06, + "loss": 0.2076, "step": 15980 }, { - "epoch": 0.92, - "grad_norm": 0.7002359606242801, - "learning_rate": 3.4905915745873763e-07, - "loss": 0.1409, + "epoch": 0.73, + "grad_norm": 0.4279330621104455, + "learning_rate": 3.4836534360595852e-06, + "loss": 0.2152, "step": 15981 }, { - "epoch": 0.92, - "grad_norm": 0.2570986693620255, - "learning_rate": 3.4857194516744075e-07, - "loss": 0.2817, + "epoch": 0.73, + "grad_norm": 0.4798386238157052, + "learning_rate": 3.482524872399159e-06, + "loss": 0.3091, "step": 15982 }, { - "epoch": 0.92, - "grad_norm": 0.4987963183833602, - "learning_rate": 3.4808506710468204e-07, - "loss": 0.2641, + "epoch": 0.73, + "grad_norm": 0.43937690985847544, + "learning_rate": 3.4813964530293497e-06, + "loss": 0.3256, "step": 15983 }, { - "epoch": 0.92, - "grad_norm": 0.28035991687745376, - "learning_rate": 3.4759852328732136e-07, - "loss": 0.1183, + "epoch": 0.73, + "grad_norm": 0.386533857834818, + "learning_rate": 3.4802681779751433e-06, + "loss": 0.2898, "step": 15984 }, { - "epoch": 0.92, - "grad_norm": 0.3520688668657019, - "learning_rate": 3.4711231373220854e-07, - "loss": 0.2765, + "epoch": 0.73, + "grad_norm": 0.2588281581813228, + "learning_rate": 3.4791400472615133e-06, + "loss": 0.1132, "step": 15985 }, { - "epoch": 0.92, - "grad_norm": 0.3869463660741132, - "learning_rate": 3.466264384561824e-07, - "loss": 0.2959, + "epoch": 0.73, + "grad_norm": 0.5480010795667939, + "learning_rate": 3.4780120609134404e-06, + "loss": 0.3348, "step": 15986 }, { - "epoch": 0.92, - "grad_norm": 0.41454146732657704, - "learning_rate": 3.461408974760683e-07, - "loss": 0.1616, + "epoch": 0.73, + "grad_norm": 0.31419664065496866, + "learning_rate": 3.4768842189558918e-06, + "loss": 0.2513, "step": 15987 }, { - "epoch": 0.92, - "grad_norm": 0.5291939781843723, - "learning_rate": 3.456556908086783e-07, - "loss": 0.4009, + "epoch": 0.73, + "grad_norm": 0.6477000617616433, + "learning_rate": 3.475756521413839e-06, + "loss": 0.2794, "step": 15988 }, { - "epoch": 0.92, - "grad_norm": 0.48867694572595083, - "learning_rate": 3.4517081847081693e-07, - "loss": 0.3361, + "epoch": 0.73, + "grad_norm": 0.7417110251271716, + "learning_rate": 3.4746289683122525e-06, + "loss": 0.4314, "step": 15989 }, { - "epoch": 0.92, - "grad_norm": 0.30895000784527976, - "learning_rate": 3.44686280479275e-07, - "loss": 0.2123, + "epoch": 0.73, + "grad_norm": 0.4080169469670278, + "learning_rate": 3.473501559676088e-06, + "loss": 0.2958, "step": 15990 }, { - "epoch": 0.92, - "grad_norm": 0.4070864437798445, - "learning_rate": 3.442020768508325e-07, - "loss": 0.2457, + "epoch": 0.73, + "grad_norm": 0.41224255354791584, + "learning_rate": 3.4723742955303087e-06, + "loss": 0.2471, "step": 15991 }, { - "epoch": 0.92, - "grad_norm": 0.525896819446171, - "learning_rate": 3.4371820760225606e-07, - "loss": 0.2261, + "epoch": 0.73, + "grad_norm": 0.25961155693087923, + "learning_rate": 3.47124717589987e-06, + "loss": 0.1513, "step": 15992 }, { - "epoch": 0.92, - "grad_norm": 0.389551843659952, - "learning_rate": 3.432346727503033e-07, - "loss": 0.2679, + "epoch": 0.73, + "grad_norm": 0.3572193766172659, + "learning_rate": 3.4701202008097313e-06, + "loss": 0.2709, "step": 15993 }, { - "epoch": 0.92, - "grad_norm": 0.29140564211839626, - "learning_rate": 3.427514723117187e-07, - "loss": 0.2335, + "epoch": 0.73, + "grad_norm": 0.7497176985419289, + "learning_rate": 3.4689933702848365e-06, + "loss": 0.2943, "step": 15994 }, { - "epoch": 0.92, - "grad_norm": 1.1747614721852366, - "learning_rate": 3.4226860630323545e-07, - "loss": 0.604, + "epoch": 0.73, + "grad_norm": 0.31931789406963673, + "learning_rate": 3.4678666843501276e-06, + "loss": 0.2731, "step": 15995 }, { - "epoch": 0.92, - "grad_norm": 0.4861490590325455, - "learning_rate": 3.4178607474157464e-07, - "loss": 0.3133, + "epoch": 0.73, + "grad_norm": 0.3519319825450123, + "learning_rate": 3.466740143030561e-06, + "loss": 0.2663, "step": 15996 }, { - "epoch": 0.92, - "grad_norm": 0.2647163166220459, - "learning_rate": 3.413038776434474e-07, - "loss": 0.1841, + "epoch": 0.73, + "grad_norm": 1.2058378481887444, + "learning_rate": 3.4656137463510676e-06, + "loss": 0.5383, "step": 15997 }, { - "epoch": 0.92, - "grad_norm": 0.30848881097191544, - "learning_rate": 3.408220150255492e-07, - "loss": 0.243, + "epoch": 0.73, + "grad_norm": 0.20477227340452706, + "learning_rate": 3.464487494336591e-06, + "loss": 0.1546, "step": 15998 }, { - "epoch": 0.92, - "grad_norm": 0.7268327398882253, - "learning_rate": 3.403404869045712e-07, - "loss": 0.3952, + "epoch": 0.73, + "grad_norm": 0.3945061938130286, + "learning_rate": 3.4633613870120596e-06, + "loss": 0.2643, "step": 15999 }, { - "epoch": 0.92, - "grad_norm": 0.3471361648604528, - "learning_rate": 3.398592932971878e-07, - "loss": 0.2387, + "epoch": 0.74, + "grad_norm": 0.6923559988319632, + "learning_rate": 3.462235424402407e-06, + "loss": 0.3858, "step": 16000 }, { - "epoch": 0.92, - "grad_norm": 0.4117894608929416, - "learning_rate": 3.393784342200601e-07, - "loss": 0.2862, + "epoch": 0.74, + "grad_norm": 0.7080862989221754, + "learning_rate": 3.4611096065325644e-06, + "loss": 0.235, "step": 16001 }, { - "epoch": 0.92, - "grad_norm": 0.7838377566107934, - "learning_rate": 3.388979096898415e-07, - "loss": 0.3532, + "epoch": 0.74, + "grad_norm": 0.394599354931304, + "learning_rate": 3.4599839334274488e-06, + "loss": 0.3005, "step": 16002 }, { - "epoch": 0.92, - "grad_norm": 0.34177113425113487, - "learning_rate": 3.3841771972317414e-07, - "loss": 0.269, + "epoch": 0.74, + "grad_norm": 0.38462109574640807, + "learning_rate": 3.458858405111989e-06, + "loss": 0.2802, "step": 16003 }, { - "epoch": 0.92, - "grad_norm": 0.301355411444743, - "learning_rate": 3.3793786433668596e-07, - "loss": 0.0814, + "epoch": 0.74, + "grad_norm": 0.3013967479084854, + "learning_rate": 3.4577330216110925e-06, + "loss": 0.138, "step": 16004 }, { - "epoch": 0.92, - "grad_norm": 0.30844575612555714, - "learning_rate": 3.3745834354699247e-07, - "loss": 0.262, + "epoch": 0.74, + "grad_norm": 0.33797329034160273, + "learning_rate": 3.4566077829496892e-06, + "loss": 0.2524, "step": 16005 }, { - "epoch": 0.92, - "grad_norm": 0.3747679957008508, - "learning_rate": 3.3697915737070154e-07, - "loss": 0.306, + "epoch": 0.74, + "grad_norm": 0.4865594372464824, + "learning_rate": 3.4554826891526828e-06, + "loss": 0.3378, "step": 16006 }, { - "epoch": 0.92, - "grad_norm": 0.7646152394209245, - "learning_rate": 3.365003058244076e-07, - "loss": 0.3003, + "epoch": 0.74, + "grad_norm": 0.49078257441504675, + "learning_rate": 3.454357740244978e-06, + "loss": 0.2894, "step": 16007 }, { - "epoch": 0.92, - "grad_norm": 0.536131589767793, - "learning_rate": 3.3602178892469193e-07, - "loss": 0.3079, + "epoch": 0.74, + "grad_norm": 0.33117981759534654, + "learning_rate": 3.453232936251485e-06, + "loss": 0.2449, "step": 16008 }, { - "epoch": 0.92, - "grad_norm": 0.3947217103658755, - "learning_rate": 3.355436066881268e-07, - "loss": 0.283, + "epoch": 0.74, + "grad_norm": 1.4009861010774025, + "learning_rate": 3.452108277197104e-06, + "loss": 0.542, "step": 16009 }, { - "epoch": 0.92, - "grad_norm": 0.22066852034689402, - "learning_rate": 3.3506575913127006e-07, - "loss": 0.1689, + "epoch": 0.74, + "grad_norm": 0.30514851762621314, + "learning_rate": 3.450983763106739e-06, + "loss": 0.2599, "step": 16010 }, { - "epoch": 0.92, - "grad_norm": 0.5902292519591517, - "learning_rate": 3.3458824627067067e-07, - "loss": 0.3642, + "epoch": 0.74, + "grad_norm": 0.3378193693971455, + "learning_rate": 3.449859394005277e-06, + "loss": 0.2239, "step": 16011 }, { - "epoch": 0.92, - "grad_norm": 0.381931762583577, - "learning_rate": 3.3411106812286544e-07, - "loss": 0.3037, + "epoch": 0.74, + "grad_norm": 0.4679353510900042, + "learning_rate": 3.4487351699176155e-06, + "loss": 0.2659, "step": 16012 }, { - "epoch": 0.92, - "grad_norm": 0.33341807366384135, - "learning_rate": 3.336342247043778e-07, - "loss": 0.2449, + "epoch": 0.74, + "grad_norm": 1.2158495681457318, + "learning_rate": 3.4476110908686467e-06, + "loss": 0.7061, "step": 16013 }, { - "epoch": 0.92, - "grad_norm": 0.5143593485060982, - "learning_rate": 3.331577160317201e-07, - "loss": 0.294, + "epoch": 0.74, + "grad_norm": 0.33809495012038887, + "learning_rate": 3.446487156883249e-06, + "loss": 0.1942, "step": 16014 }, { - "epoch": 0.92, - "grad_norm": 0.4661460986029127, - "learning_rate": 3.3268154212139583e-07, - "loss": 0.2655, + "epoch": 0.74, + "grad_norm": 0.3690940110294595, + "learning_rate": 3.4453633679863142e-06, + "loss": 0.2756, "step": 16015 }, { - "epoch": 0.92, - "grad_norm": 0.3431806133508932, - "learning_rate": 3.3220570298989507e-07, - "loss": 0.1848, + "epoch": 0.74, + "grad_norm": 0.639223639505482, + "learning_rate": 3.4442397242027116e-06, + "loss": 0.3782, "step": 16016 }, { - "epoch": 0.92, - "grad_norm": 0.30176925676400573, - "learning_rate": 3.317301986536947e-07, - "loss": 0.2456, + "epoch": 0.74, + "grad_norm": 0.3230169504719691, + "learning_rate": 3.443116225557325e-06, + "loss": 0.183, "step": 16017 }, { - "epoch": 0.92, - "grad_norm": 0.3353558657145527, - "learning_rate": 3.3125502912926044e-07, - "loss": 0.2543, + "epoch": 0.74, + "grad_norm": 0.2992873209342639, + "learning_rate": 3.4419928720750274e-06, + "loss": 0.1996, "step": 16018 }, { - "epoch": 0.92, - "grad_norm": 0.5655804810882348, - "learning_rate": 3.307801944330491e-07, - "loss": 0.3556, + "epoch": 0.74, + "grad_norm": 0.5662242530883589, + "learning_rate": 3.4408696637806837e-06, + "loss": 0.3602, "step": 16019 }, { - "epoch": 0.92, - "grad_norm": 0.33186886555261347, - "learning_rate": 3.303056945815053e-07, - "loss": 0.0592, + "epoch": 0.74, + "grad_norm": 0.3210874080940691, + "learning_rate": 3.4397466006991676e-06, + "loss": 0.1839, "step": 16020 }, { - "epoch": 0.92, - "grad_norm": 0.27348568532345147, - "learning_rate": 3.2983152959105924e-07, - "loss": 0.2488, + "epoch": 0.74, + "grad_norm": 0.9924887460510491, + "learning_rate": 3.438623682855332e-06, + "loss": 0.3814, "step": 16021 }, { - "epoch": 0.92, - "grad_norm": 0.25027602062501897, - "learning_rate": 3.2935769947813003e-07, - "loss": 0.2053, + "epoch": 0.74, + "grad_norm": 0.4994724426068747, + "learning_rate": 3.437500910274052e-06, + "loss": 0.341, "step": 16022 }, { - "epoch": 0.92, - "grad_norm": 0.810186576406879, - "learning_rate": 3.2888420425912783e-07, - "loss": 0.2417, + "epoch": 0.74, + "grad_norm": 0.2861738993342746, + "learning_rate": 3.436378282980175e-06, + "loss": 0.2485, "step": 16023 }, { - "epoch": 0.92, - "grad_norm": 0.347940790008678, - "learning_rate": 3.2841104395045174e-07, - "loss": 0.2546, + "epoch": 0.74, + "grad_norm": 0.35692002166363995, + "learning_rate": 3.435255800998555e-06, + "loss": 0.1218, "step": 16024 }, { - "epoch": 0.92, - "grad_norm": 0.43409577516476583, - "learning_rate": 3.279382185684843e-07, - "loss": 0.332, + "epoch": 0.74, + "grad_norm": 1.3224771398885873, + "learning_rate": 3.434133464354044e-06, + "loss": 0.7636, "step": 16025 }, { - "epoch": 0.92, - "grad_norm": 0.5257069977199804, - "learning_rate": 3.274657281296001e-07, - "loss": 0.317, + "epoch": 0.74, + "grad_norm": 0.33826544814868903, + "learning_rate": 3.433011273071488e-06, + "loss": 0.2387, "step": 16026 }, { - "epoch": 0.92, - "grad_norm": 0.3115977415753507, - "learning_rate": 3.269935726501616e-07, - "loss": 0.1908, + "epoch": 0.74, + "grad_norm": 0.399478138512981, + "learning_rate": 3.4318892271757387e-06, + "loss": 0.24, "step": 16027 }, { - "epoch": 0.92, - "grad_norm": 0.31145969908220766, - "learning_rate": 3.265217521465203e-07, - "loss": 0.1627, + "epoch": 0.74, + "grad_norm": 1.1847104330800728, + "learning_rate": 3.4307673266916275e-06, + "loss": 0.3578, "step": 16028 }, { - "epoch": 0.92, - "grad_norm": 0.304969715955574, - "learning_rate": 3.2605026663501403e-07, - "loss": 0.271, + "epoch": 0.74, + "grad_norm": 0.3506241009764794, + "learning_rate": 3.4296455716439957e-06, + "loss": 0.231, "step": 16029 }, { - "epoch": 0.92, - "grad_norm": 0.3198391503543351, - "learning_rate": 3.2557911613197213e-07, - "loss": 0.2093, + "epoch": 0.74, + "grad_norm": 0.2426353904403282, + "learning_rate": 3.4285239620576814e-06, + "loss": 0.142, "step": 16030 }, { - "epoch": 0.92, - "grad_norm": 0.5495654930323556, - "learning_rate": 3.251083006537081e-07, - "loss": 0.3726, + "epoch": 0.74, + "grad_norm": 0.5237174179937928, + "learning_rate": 3.4274024979575107e-06, + "loss": 0.3844, "step": 16031 }, { - "epoch": 0.92, - "grad_norm": 0.9376399677814717, - "learning_rate": 3.24637820216529e-07, - "loss": 0.4465, + "epoch": 0.74, + "grad_norm": 0.36734531906511436, + "learning_rate": 3.426281179368317e-06, + "loss": 0.2765, "step": 16032 }, { - "epoch": 0.92, - "grad_norm": 0.27483983067880535, - "learning_rate": 3.241676748367251e-07, - "loss": 0.2189, + "epoch": 0.74, + "grad_norm": 0.8252836836871129, + "learning_rate": 3.425160006314918e-06, + "loss": 0.2886, "step": 16033 }, { - "epoch": 0.92, - "grad_norm": 0.3926888204978273, - "learning_rate": 3.2369786453057996e-07, - "loss": 0.2467, + "epoch": 0.74, + "grad_norm": 0.405705796355882, + "learning_rate": 3.4240389788221407e-06, + "loss": 0.299, "step": 16034 }, { - "epoch": 0.92, - "grad_norm": 0.8656459700472929, - "learning_rate": 3.232283893143595e-07, - "loss": 0.4894, + "epoch": 0.74, + "grad_norm": 0.4145311487230021, + "learning_rate": 3.4229180969148048e-06, + "loss": 0.2374, "step": 16035 }, { - "epoch": 0.92, - "grad_norm": 0.347362517901405, - "learning_rate": 3.2275924920432525e-07, - "loss": 0.2104, + "epoch": 0.74, + "grad_norm": 0.2683601079381109, + "learning_rate": 3.42179736061772e-06, + "loss": 0.1894, "step": 16036 }, { - "epoch": 0.92, - "grad_norm": 0.3434981831692561, - "learning_rate": 3.2229044421672183e-07, - "loss": 0.3017, + "epoch": 0.74, + "grad_norm": 0.546523079567316, + "learning_rate": 3.420676769955705e-06, + "loss": 0.3027, "step": 16037 }, { - "epoch": 0.92, - "grad_norm": 0.6020647893512497, - "learning_rate": 3.2182197436778527e-07, - "loss": 0.212, + "epoch": 0.74, + "grad_norm": 0.44651271253850955, + "learning_rate": 3.419556324953558e-06, + "loss": 0.286, "step": 16038 }, { - "epoch": 0.92, - "grad_norm": 0.33810505101496374, - "learning_rate": 3.213538396737359e-07, - "loss": 0.2483, + "epoch": 0.74, + "grad_norm": 0.3930930404730781, + "learning_rate": 3.418436025636099e-06, + "loss": 0.2944, "step": 16039 }, { - "epoch": 0.92, - "grad_norm": 0.31886277314169437, - "learning_rate": 3.2088604015078737e-07, - "loss": 0.0928, + "epoch": 0.74, + "grad_norm": 0.9511791100046201, + "learning_rate": 3.4173158720281197e-06, + "loss": 0.2714, "step": 16040 }, { - "epoch": 0.92, - "grad_norm": 0.3547676364178774, - "learning_rate": 3.20418575815139e-07, - "loss": 0.2903, + "epoch": 0.74, + "grad_norm": 0.43363196530128345, + "learning_rate": 3.416195864154426e-06, + "loss": 0.2828, "step": 16041 }, { - "epoch": 0.92, - "grad_norm": 0.330343531602598, - "learning_rate": 3.1995144668298004e-07, - "loss": 0.2459, + "epoch": 0.74, + "grad_norm": 0.24930233478609265, + "learning_rate": 3.4150760020398056e-06, + "loss": 0.2328, "step": 16042 }, { - "epoch": 0.92, - "grad_norm": 0.8111748137958689, - "learning_rate": 3.1948465277048424e-07, - "loss": 0.3584, + "epoch": 0.74, + "grad_norm": 0.9286879525441522, + "learning_rate": 3.4139562857090568e-06, + "loss": 0.5221, "step": 16043 }, { - "epoch": 0.92, - "grad_norm": 0.35521287147768266, - "learning_rate": 3.190181940938197e-07, - "loss": 0.2285, + "epoch": 0.74, + "grad_norm": 0.28858501914574175, + "learning_rate": 3.412836715186971e-06, + "loss": 0.2167, "step": 16044 }, { - "epoch": 0.92, - "grad_norm": 0.2691973622179739, - "learning_rate": 3.185520706691392e-07, - "loss": 0.2408, + "epoch": 0.74, + "grad_norm": 0.7911303205032798, + "learning_rate": 3.41171729049833e-06, + "loss": 0.4124, "step": 16045 }, { - "epoch": 0.92, - "grad_norm": 1.3561969579912896, - "learning_rate": 3.1808628251258413e-07, - "loss": 0.2051, + "epoch": 0.74, + "grad_norm": 0.4298460707940331, + "learning_rate": 3.4105980116679195e-06, + "loss": 0.2596, "step": 16046 }, { - "epoch": 0.92, - "grad_norm": 0.6097716949414717, - "learning_rate": 3.1762082964028385e-07, - "loss": 0.3688, + "epoch": 0.74, + "grad_norm": 0.3880275859438745, + "learning_rate": 3.409478878720516e-06, + "loss": 0.2662, "step": 16047 }, { - "epoch": 0.92, - "grad_norm": 0.37746575318449793, - "learning_rate": 3.171557120683588e-07, - "loss": 0.2568, + "epoch": 0.74, + "grad_norm": 0.5288520393918172, + "learning_rate": 3.408359891680897e-06, + "loss": 0.2392, "step": 16048 }, { - "epoch": 0.92, - "grad_norm": 0.28599774774097053, - "learning_rate": 3.166909298129139e-07, - "loss": 0.2567, + "epoch": 0.74, + "grad_norm": 0.9013495980025912, + "learning_rate": 3.407241050573841e-06, + "loss": 0.5594, "step": 16049 }, { - "epoch": 0.92, - "grad_norm": 0.2873137393901833, - "learning_rate": 3.162264828900474e-07, - "loss": 0.1658, + "epoch": 0.74, + "grad_norm": 0.32491269761548563, + "learning_rate": 3.406122355424111e-06, + "loss": 0.2146, "step": 16050 }, { - "epoch": 0.92, - "grad_norm": 0.3359843271780522, - "learning_rate": 3.1576237131584084e-07, - "loss": 0.2672, + "epoch": 0.74, + "grad_norm": 0.41222003668588764, + "learning_rate": 3.405003806256476e-06, + "loss": 0.3178, "step": 16051 }, { - "epoch": 0.92, - "grad_norm": 0.8905987280455674, - "learning_rate": 3.1529859510636585e-07, - "loss": 0.4539, + "epoch": 0.74, + "grad_norm": 0.5623822527495987, + "learning_rate": 3.4038854030957035e-06, + "loss": 0.251, "step": 16052 }, { - "epoch": 0.92, - "grad_norm": 0.3531928411300171, - "learning_rate": 3.1483515427768506e-07, - "loss": 0.2499, + "epoch": 0.74, + "grad_norm": 0.3479438946069456, + "learning_rate": 3.402767145966548e-06, + "loss": 0.1561, "step": 16053 }, { - "epoch": 0.92, - "grad_norm": 0.32954114792842953, - "learning_rate": 3.143720488458457e-07, - "loss": 0.2502, + "epoch": 0.74, + "grad_norm": 0.3238819764586656, + "learning_rate": 3.4016490348937735e-06, + "loss": 0.2795, "step": 16054 }, { - "epoch": 0.92, - "grad_norm": 0.3633234307925469, - "learning_rate": 3.139092788268872e-07, - "loss": 0.2719, + "epoch": 0.74, + "grad_norm": 0.8053159834172191, + "learning_rate": 3.400531069902122e-06, + "loss": 0.4176, "step": 16055 }, { - "epoch": 0.92, - "grad_norm": 0.29719624462330824, - "learning_rate": 3.1344684423683214e-07, - "loss": 0.1561, + "epoch": 0.74, + "grad_norm": 0.4021190846936983, + "learning_rate": 3.399413251016359e-06, + "loss": 0.1897, "step": 16056 }, { - "epoch": 0.92, - "grad_norm": 0.27943018114548945, - "learning_rate": 3.1298474509169676e-07, - "loss": 0.2495, + "epoch": 0.74, + "grad_norm": 0.2786882476644866, + "learning_rate": 3.3982955782612216e-06, + "loss": 0.2098, "step": 16057 }, { - "epoch": 0.92, - "grad_norm": 0.8756150438352185, - "learning_rate": 3.1252298140748374e-07, - "loss": 0.3744, + "epoch": 0.74, + "grad_norm": 0.3766644020809064, + "learning_rate": 3.3971780516614607e-06, + "loss": 0.2802, "step": 16058 }, { - "epoch": 0.92, - "grad_norm": 0.719734852026446, - "learning_rate": 3.120615532001836e-07, - "loss": 0.2874, + "epoch": 0.74, + "grad_norm": 0.42466331913823124, + "learning_rate": 3.39606067124181e-06, + "loss": 0.1671, "step": 16059 }, { - "epoch": 0.92, - "grad_norm": 0.3029303968028634, - "learning_rate": 3.1160046048577365e-07, - "loss": 0.2542, + "epoch": 0.74, + "grad_norm": 0.6052575643851409, + "learning_rate": 3.394943437027011e-06, + "loss": 0.3635, "step": 16060 }, { - "epoch": 0.92, - "grad_norm": 0.3691633990742467, - "learning_rate": 3.1113970328022433e-07, - "loss": 0.3045, + "epoch": 0.74, + "grad_norm": 0.8312299966613697, + "learning_rate": 3.393826349041802e-06, + "loss": 0.4673, "step": 16061 }, { - "epoch": 0.92, - "grad_norm": 0.3109473557320388, - "learning_rate": 3.106792815994908e-07, - "loss": 0.1067, + "epoch": 0.74, + "grad_norm": 0.32446423251763346, + "learning_rate": 3.3927094073109077e-06, + "loss": 0.2673, "step": 16062 }, { - "epoch": 0.92, - "grad_norm": 0.332168543197011, - "learning_rate": 3.1021919545951683e-07, - "loss": 0.2444, + "epoch": 0.74, + "grad_norm": 0.43832093772407266, + "learning_rate": 3.3915926118590615e-06, + "loss": 0.2352, "step": 16063 }, { - "epoch": 0.92, - "grad_norm": 0.8139045767802737, - "learning_rate": 3.0975944487623534e-07, - "loss": 0.3702, + "epoch": 0.74, + "grad_norm": 0.26441527666778475, + "learning_rate": 3.3904759627109828e-06, + "loss": 0.1579, "step": 16064 }, { - "epoch": 0.92, - "grad_norm": 0.33745728439735845, - "learning_rate": 3.093000298655668e-07, - "loss": 0.296, + "epoch": 0.74, + "grad_norm": 0.42712135772762283, + "learning_rate": 3.389359459891396e-06, + "loss": 0.2717, "step": 16065 }, { - "epoch": 0.92, - "grad_norm": 0.32467491783135444, - "learning_rate": 3.0884095044342197e-07, - "loss": 0.18, + "epoch": 0.74, + "grad_norm": 0.5522817385320041, + "learning_rate": 3.388243103425022e-06, + "loss": 0.235, "step": 16066 }, { - "epoch": 0.92, - "grad_norm": 0.5142360344956464, - "learning_rate": 3.0838220662569807e-07, - "loss": 0.3945, + "epoch": 0.74, + "grad_norm": 0.6604081093102742, + "learning_rate": 3.3871268933365696e-06, + "loss": 0.4264, "step": 16067 }, { - "epoch": 0.92, - "grad_norm": 0.23229234122279033, - "learning_rate": 3.0792379842828234e-07, - "loss": 0.2061, + "epoch": 0.74, + "grad_norm": 0.41344009539807464, + "learning_rate": 3.3860108296507556e-06, + "loss": 0.2831, "step": 16068 }, { - "epoch": 0.92, - "grad_norm": 0.4327552771597201, - "learning_rate": 3.074657258670477e-07, - "loss": 0.1711, + "epoch": 0.74, + "grad_norm": 0.31462677688752494, + "learning_rate": 3.3848949123922857e-06, + "loss": 0.1608, "step": 16069 }, { - "epoch": 0.92, - "grad_norm": 0.4375268155712646, - "learning_rate": 3.070079889578592e-07, - "loss": 0.3044, + "epoch": 0.74, + "grad_norm": 0.32579844595315455, + "learning_rate": 3.3837791415858712e-06, + "loss": 0.2454, "step": 16070 }, { - "epoch": 0.92, - "grad_norm": 0.9263858301221611, - "learning_rate": 3.0655058771656755e-07, - "loss": 0.4369, + "epoch": 0.74, + "grad_norm": 0.89233648424047, + "learning_rate": 3.3826635172562096e-06, + "loss": 0.4376, "step": 16071 }, { - "epoch": 0.92, - "grad_norm": 0.2924196438960813, - "learning_rate": 3.060935221590111e-07, - "loss": 0.1915, + "epoch": 0.74, + "grad_norm": 0.42476969851014046, + "learning_rate": 3.3815480394279922e-06, + "loss": 0.2245, "step": 16072 }, { - "epoch": 0.92, - "grad_norm": 0.3187869889458041, - "learning_rate": 3.056367923010195e-07, - "loss": 0.2959, + "epoch": 0.74, + "grad_norm": 0.4633335775134509, + "learning_rate": 3.3804327081259304e-06, + "loss": 0.3422, "step": 16073 }, { - "epoch": 0.92, - "grad_norm": 0.2271257776392587, - "learning_rate": 3.0518039815841004e-07, - "loss": 0.13, + "epoch": 0.74, + "grad_norm": 0.40415755581022905, + "learning_rate": 3.3793175233747034e-06, + "loss": 0.2856, "step": 16074 }, { - "epoch": 0.92, - "grad_norm": 0.3284032596538808, - "learning_rate": 3.0472433974698566e-07, - "loss": 0.2056, + "epoch": 0.74, + "grad_norm": 0.41891745721233753, + "learning_rate": 3.37820248519901e-06, + "loss": 0.3054, "step": 16075 }, { - "epoch": 0.92, - "grad_norm": 0.7342704365795844, - "learning_rate": 3.042686170825404e-07, - "loss": 0.3854, + "epoch": 0.74, + "grad_norm": 0.1750573597262636, + "learning_rate": 3.377087593623527e-06, + "loss": 0.0703, "step": 16076 }, { - "epoch": 0.92, - "grad_norm": 0.34661328109772904, - "learning_rate": 3.0381323018085496e-07, - "loss": 0.3171, + "epoch": 0.74, + "grad_norm": 0.6656876433274187, + "learning_rate": 3.375972848672943e-06, + "loss": 0.2693, "step": 16077 }, { - "epoch": 0.92, - "grad_norm": 0.38368727591985285, - "learning_rate": 3.0335817905770115e-07, - "loss": 0.2618, + "epoch": 0.74, + "grad_norm": 0.29691160905151925, + "learning_rate": 3.3748582503719373e-06, + "loss": 0.2873, "step": 16078 }, { - "epoch": 0.92, - "grad_norm": 0.5057502110746248, - "learning_rate": 3.029034637288375e-07, - "loss": 0.2623, + "epoch": 0.74, + "grad_norm": 0.6789729851827108, + "learning_rate": 3.3737437987451826e-06, + "loss": 0.2966, "step": 16079 }, { - "epoch": 0.92, - "grad_norm": 0.2947538085012821, - "learning_rate": 3.024490842100092e-07, - "loss": 0.2066, + "epoch": 0.74, + "grad_norm": 0.6291131170262131, + "learning_rate": 3.3726294938173566e-06, + "loss": 0.3915, "step": 16080 }, { - "epoch": 0.92, - "grad_norm": 0.26156274907930127, - "learning_rate": 3.019950405169514e-07, - "loss": 0.2431, + "epoch": 0.74, + "grad_norm": 0.3480741757014308, + "learning_rate": 3.3715153356131223e-06, + "loss": 0.2457, "step": 16081 }, { - "epoch": 0.92, - "grad_norm": 0.5329165928014303, - "learning_rate": 3.015413326653893e-07, - "loss": 0.2872, + "epoch": 0.74, + "grad_norm": 0.2662859751938957, + "learning_rate": 3.370401324157151e-06, + "loss": 0.1932, "step": 16082 }, { - "epoch": 0.92, - "grad_norm": 1.347908220713831, - "learning_rate": 3.0108796067103376e-07, - "loss": 0.5954, + "epoch": 0.74, + "grad_norm": 0.6777872034971865, + "learning_rate": 3.3692874594741064e-06, + "loss": 0.2889, "step": 16083 }, { - "epoch": 0.92, - "grad_norm": 0.5714727630722087, - "learning_rate": 3.0063492454958434e-07, - "loss": 0.3317, + "epoch": 0.74, + "grad_norm": 0.4325174015223367, + "learning_rate": 3.3681737415886453e-06, + "loss": 0.3149, "step": 16084 }, { - "epoch": 0.92, - "grad_norm": 0.2701745129360195, - "learning_rate": 3.001822243167307e-07, - "loss": 0.2408, + "epoch": 0.74, + "grad_norm": 0.7309351079965748, + "learning_rate": 3.3670601705254235e-06, + "loss": 0.3985, "step": 16085 }, { - "epoch": 0.92, - "grad_norm": 0.43758200115949175, - "learning_rate": 2.997298599881493e-07, - "loss": 0.2901, + "epoch": 0.74, + "grad_norm": 0.2790770173588824, + "learning_rate": 3.3659467463090978e-06, + "loss": 0.2104, "step": 16086 }, { - "epoch": 0.92, - "grad_norm": 0.576522748883847, - "learning_rate": 2.992778315795064e-07, - "loss": 0.3509, + "epoch": 0.74, + "grad_norm": 0.6304181808535578, + "learning_rate": 3.3648334689643214e-06, + "loss": 0.3596, "step": 16087 }, { - "epoch": 0.92, - "grad_norm": 0.23680880305406177, - "learning_rate": 2.988261391064551e-07, - "loss": 0.2101, + "epoch": 0.74, + "grad_norm": 0.41725011910956933, + "learning_rate": 3.363720338515736e-06, + "loss": 0.1934, "step": 16088 }, { - "epoch": 0.92, - "grad_norm": 0.4501988796850427, - "learning_rate": 2.9837478258463725e-07, - "loss": 0.324, + "epoch": 0.74, + "grad_norm": 0.639307048280286, + "learning_rate": 3.362607354987979e-06, + "loss": 0.2318, "step": 16089 }, { - "epoch": 0.92, - "grad_norm": 0.5710226158588361, - "learning_rate": 2.9792376202968263e-07, - "loss": 0.3863, + "epoch": 0.74, + "grad_norm": 0.26523923206571404, + "learning_rate": 3.361494518405705e-06, + "loss": 0.2558, "step": 16090 }, { - "epoch": 0.92, - "grad_norm": 0.41143826535651795, - "learning_rate": 2.974730774572121e-07, - "loss": 0.2862, + "epoch": 0.74, + "grad_norm": 0.8076612069886973, + "learning_rate": 3.360381828793541e-06, + "loss": 0.4288, "step": 16091 }, { - "epoch": 0.92, - "grad_norm": 0.45703676201711263, - "learning_rate": 2.970227288828309e-07, - "loss": 0.2569, + "epoch": 0.74, + "grad_norm": 0.7366596725361553, + "learning_rate": 3.359269286176127e-06, + "loss": 0.2168, "step": 16092 }, { - "epoch": 0.92, - "grad_norm": 0.30212625877525195, - "learning_rate": 2.965727163221366e-07, - "loss": 0.2634, + "epoch": 0.74, + "grad_norm": 0.44630013796267093, + "learning_rate": 3.358156890578088e-06, + "loss": 0.2792, "step": 16093 }, { - "epoch": 0.92, - "grad_norm": 0.2798986506628534, - "learning_rate": 2.9612303979071e-07, - "loss": 0.2054, + "epoch": 0.74, + "grad_norm": 0.2930026884835982, + "learning_rate": 3.3570446420240534e-06, + "loss": 0.2464, "step": 16094 }, { - "epoch": 0.92, - "grad_norm": 1.641861194235436, - "learning_rate": 2.9567369930412646e-07, - "loss": 0.3078, + "epoch": 0.74, + "grad_norm": 0.3258605922665766, + "learning_rate": 3.3559325405386513e-06, + "loss": 0.1281, "step": 16095 }, { - "epoch": 0.92, - "grad_norm": 0.30785882672148424, - "learning_rate": 2.9522469487794467e-07, - "loss": 0.2824, + "epoch": 0.74, + "grad_norm": 0.41513553547822357, + "learning_rate": 3.3548205861464956e-06, + "loss": 0.322, "step": 16096 }, { - "epoch": 0.92, - "grad_norm": 0.37031077069707474, - "learning_rate": 2.9477602652771554e-07, - "loss": 0.318, + "epoch": 0.74, + "grad_norm": 1.3430851241228432, + "learning_rate": 3.35370877887221e-06, + "loss": 0.7711, "step": 16097 }, { - "epoch": 0.92, - "grad_norm": 0.6730152886289593, - "learning_rate": 2.943276942689732e-07, - "loss": 0.2634, + "epoch": 0.74, + "grad_norm": 0.3756029673686868, + "learning_rate": 3.352597118740404e-06, + "loss": 0.2844, "step": 16098 }, { - "epoch": 0.92, - "grad_norm": 0.4854497688120956, - "learning_rate": 2.9387969811724757e-07, - "loss": 0.2925, + "epoch": 0.74, + "grad_norm": 0.3620577419249336, + "learning_rate": 3.3514856057756905e-06, + "loss": 0.1953, "step": 16099 }, { - "epoch": 0.93, - "grad_norm": 0.35689041820120426, - "learning_rate": 2.9343203808804953e-07, - "loss": 0.252, + "epoch": 0.74, + "grad_norm": 0.33957096426189765, + "learning_rate": 3.3503742400026816e-06, + "loss": 0.1712, "step": 16100 }, { - "epoch": 0.93, - "grad_norm": 0.29101679336873626, - "learning_rate": 2.9298471419688335e-07, - "loss": 0.2409, + "epoch": 0.74, + "grad_norm": 0.527868012226301, + "learning_rate": 3.349263021445974e-06, + "loss": 0.3216, "step": 16101 }, { - "epoch": 0.93, - "grad_norm": 0.33551396449091, - "learning_rate": 2.925377264592388e-07, - "loss": 0.1526, + "epoch": 0.74, + "grad_norm": 0.2945789989140119, + "learning_rate": 3.348151950130174e-06, + "loss": 0.2104, "step": 16102 }, { - "epoch": 0.93, - "grad_norm": 0.403795727148337, - "learning_rate": 2.9209107489059474e-07, - "loss": 0.2942, + "epoch": 0.74, + "grad_norm": 0.9830299557255137, + "learning_rate": 3.347041026079878e-06, + "loss": 0.4528, "step": 16103 }, { - "epoch": 0.93, - "grad_norm": 0.35408713603736885, - "learning_rate": 2.916447595064198e-07, - "loss": 0.2958, + "epoch": 0.74, + "grad_norm": 0.9078949383299475, + "learning_rate": 3.345930249319684e-06, + "loss": 0.3626, "step": 16104 }, { - "epoch": 0.93, - "grad_norm": 0.6365072948732012, - "learning_rate": 2.9119878032216944e-07, - "loss": 0.1797, + "epoch": 0.74, + "grad_norm": 0.33762431493824857, + "learning_rate": 3.344819619874179e-06, + "loss": 0.1985, "step": 16105 }, { - "epoch": 0.93, - "grad_norm": 0.2916939704666153, - "learning_rate": 2.90753137353289e-07, - "loss": 0.2252, + "epoch": 0.74, + "grad_norm": 0.3758323617145274, + "learning_rate": 3.3437091377679563e-06, + "loss": 0.3184, "step": 16106 }, { - "epoch": 0.93, - "grad_norm": 0.46903057760889666, - "learning_rate": 2.903078306152085e-07, - "loss": 0.2796, + "epoch": 0.74, + "grad_norm": 0.7922945877591712, + "learning_rate": 3.342598803025595e-06, + "loss": 0.4255, "step": 16107 }, { - "epoch": 0.93, - "grad_norm": 0.30356242185573823, - "learning_rate": 2.8986286012335105e-07, - "loss": 0.2131, + "epoch": 0.74, + "grad_norm": 0.19907840941663313, + "learning_rate": 3.3414886156716785e-06, + "loss": 0.1386, "step": 16108 }, { - "epoch": 0.93, - "grad_norm": 0.3753404509166921, - "learning_rate": 2.8941822589312443e-07, - "loss": 0.3212, + "epoch": 0.74, + "grad_norm": 0.3598736035420362, + "learning_rate": 3.3403785757307905e-06, + "loss": 0.3183, "step": 16109 }, { - "epoch": 0.93, - "grad_norm": 0.6360470819564403, - "learning_rate": 2.889739279399262e-07, - "loss": 0.3638, + "epoch": 0.74, + "grad_norm": 1.474513499818245, + "learning_rate": 3.339268683227499e-06, + "loss": 0.6325, "step": 16110 }, { - "epoch": 0.93, - "grad_norm": 0.33098743442000983, - "learning_rate": 2.885299662791452e-07, - "loss": 0.1529, + "epoch": 0.74, + "grad_norm": 0.3508818339312539, + "learning_rate": 3.338158938186379e-06, + "loss": 0.2598, "step": 16111 }, { - "epoch": 0.93, - "grad_norm": 0.3058883589226098, - "learning_rate": 2.880863409261525e-07, - "loss": 0.28, + "epoch": 0.74, + "grad_norm": 0.7879045170250311, + "learning_rate": 3.3370493406320024e-06, + "loss": 0.2743, "step": 16112 }, { - "epoch": 0.93, - "grad_norm": 0.25707865836058724, - "learning_rate": 2.876430518963136e-07, - "loss": 0.1953, + "epoch": 0.74, + "grad_norm": 0.36733307798601805, + "learning_rate": 3.3359398905889295e-06, + "loss": 0.3142, "step": 16113 }, { - "epoch": 0.93, - "grad_norm": 0.6049082040792976, - "learning_rate": 2.872000992049773e-07, - "loss": 0.3962, + "epoch": 0.74, + "grad_norm": 0.27124368251898295, + "learning_rate": 3.3348305880817266e-06, + "loss": 0.199, "step": 16114 }, { - "epoch": 0.93, - "grad_norm": 0.36179534195311464, - "learning_rate": 2.867574828674824e-07, - "loss": 0.2377, + "epoch": 0.74, + "grad_norm": 0.23460667931874093, + "learning_rate": 3.3337214331349443e-06, + "loss": 0.1194, "step": 16115 }, { - "epoch": 0.93, - "grad_norm": 0.35134754663657647, - "learning_rate": 2.8631520289916004e-07, - "loss": 0.305, + "epoch": 0.74, + "grad_norm": 0.8757412830881607, + "learning_rate": 3.3326124257731506e-06, + "loss": 0.3882, "step": 16116 }, { - "epoch": 0.93, - "grad_norm": 0.6873273078284741, - "learning_rate": 2.858732593153246e-07, - "loss": 0.2425, + "epoch": 0.74, + "grad_norm": 0.4075015078449814, + "learning_rate": 3.3315035660208914e-06, + "loss": 0.2338, "step": 16117 }, { - "epoch": 0.93, - "grad_norm": 0.4355865580407592, - "learning_rate": 2.8543165213128057e-07, - "loss": 0.2198, + "epoch": 0.74, + "grad_norm": 0.3297661244973122, + "learning_rate": 3.330394853902714e-06, + "loss": 0.2377, "step": 16118 }, { - "epoch": 0.93, - "grad_norm": 0.2721525238168374, - "learning_rate": 2.8499038136231894e-07, - "loss": 0.2223, + "epoch": 0.74, + "grad_norm": 0.5905474520742258, + "learning_rate": 3.3292862894431653e-06, + "loss": 0.3733, "step": 16119 }, { - "epoch": 0.93, - "grad_norm": 0.3305990370089696, - "learning_rate": 2.845494470237242e-07, - "loss": 0.2855, + "epoch": 0.74, + "grad_norm": 0.3844957154891131, + "learning_rate": 3.328177872666789e-06, + "loss": 0.2696, "step": 16120 }, { - "epoch": 0.93, - "grad_norm": 0.3257204072520625, - "learning_rate": 2.841088491307642e-07, - "loss": 0.2172, + "epoch": 0.74, + "grad_norm": 0.21910926818725007, + "learning_rate": 3.3270696035981275e-06, + "loss": 0.1792, "step": 16121 }, { - "epoch": 0.93, - "grad_norm": 0.7299514903666677, - "learning_rate": 2.8366858769869663e-07, - "loss": 0.4215, + "epoch": 0.74, + "grad_norm": 0.8632624732793321, + "learning_rate": 3.32596148226171e-06, + "loss": 0.4857, "step": 16122 }, { - "epoch": 0.93, - "grad_norm": 1.0502896884292527, - "learning_rate": 2.8322866274276715e-07, - "loss": 0.3683, + "epoch": 0.74, + "grad_norm": 0.39270958489206576, + "learning_rate": 3.3248535086820776e-06, + "loss": 0.2763, "step": 16123 }, { - "epoch": 0.93, - "grad_norm": 0.2414259325709617, - "learning_rate": 2.8278907427821245e-07, - "loss": 0.223, + "epoch": 0.74, + "grad_norm": 0.7315780509249727, + "learning_rate": 3.32374568288375e-06, + "loss": 0.338, "step": 16124 }, { - "epoch": 0.93, - "grad_norm": 0.4580905634662621, - "learning_rate": 2.8234982232025365e-07, - "loss": 0.2462, + "epoch": 0.74, + "grad_norm": 0.38121667954284166, + "learning_rate": 3.3226380048912586e-06, + "loss": 0.2374, "step": 16125 }, { - "epoch": 0.93, - "grad_norm": 0.6134757196152566, - "learning_rate": 2.8191090688410305e-07, - "loss": 0.3354, + "epoch": 0.74, + "grad_norm": 0.39145930572575766, + "learning_rate": 3.32153047472913e-06, + "loss": 0.2768, "step": 16126 }, { - "epoch": 0.93, - "grad_norm": 0.34813149354241363, - "learning_rate": 2.8147232798496074e-07, - "loss": 0.2758, + "epoch": 0.74, + "grad_norm": 0.3538415504293478, + "learning_rate": 3.320423092421876e-06, + "loss": 0.1988, "step": 16127 }, { - "epoch": 0.93, - "grad_norm": 0.2445101180031528, - "learning_rate": 2.810340856380112e-07, - "loss": 0.1985, + "epoch": 0.74, + "grad_norm": 1.044541027306054, + "learning_rate": 3.3193158579940164e-06, + "loss": 0.3959, "step": 16128 }, { - "epoch": 0.93, - "grad_norm": 0.9321604201835165, - "learning_rate": 2.8059617985843557e-07, - "loss": 0.3523, + "epoch": 0.74, + "grad_norm": 0.35601210923626375, + "learning_rate": 3.3182087714700694e-06, + "loss": 0.2668, "step": 16129 }, { - "epoch": 0.93, - "grad_norm": 0.36475217838282614, - "learning_rate": 2.801586106613963e-07, - "loss": 0.2633, + "epoch": 0.74, + "grad_norm": 0.3682407844886599, + "learning_rate": 3.3171018328745364e-06, + "loss": 0.2926, "step": 16130 }, { - "epoch": 0.93, - "grad_norm": 0.9028096980700738, - "learning_rate": 2.797213780620456e-07, - "loss": 0.2922, + "epoch": 0.74, + "grad_norm": 2.297854303456897, + "learning_rate": 3.315995042231931e-06, + "loss": 0.2099, "step": 16131 }, { - "epoch": 0.93, - "grad_norm": 0.2847058382844327, - "learning_rate": 2.7928448207552474e-07, - "loss": 0.2463, + "epoch": 0.74, + "grad_norm": 0.26947889323034246, + "learning_rate": 3.3148883995667457e-06, + "loss": 0.2147, "step": 16132 }, { - "epoch": 0.93, - "grad_norm": 0.38924969775553325, - "learning_rate": 2.7884792271696603e-07, - "loss": 0.3169, + "epoch": 0.74, + "grad_norm": 0.4334930096280682, + "learning_rate": 3.3137819049034957e-06, + "loss": 0.2516, "step": 16133 }, { - "epoch": 0.93, - "grad_norm": 0.24599208379016946, - "learning_rate": 2.784117000014852e-07, - "loss": 0.1124, + "epoch": 0.74, + "grad_norm": 0.3368574989949426, + "learning_rate": 3.312675558266667e-06, + "loss": 0.2664, "step": 16134 }, { - "epoch": 0.93, - "grad_norm": 0.6790767935493142, - "learning_rate": 2.7797581394418907e-07, - "loss": 0.3211, + "epoch": 0.74, + "grad_norm": 0.32667901451206866, + "learning_rate": 3.3115693596807584e-06, + "loss": 0.2501, "step": 16135 }, { - "epoch": 0.93, - "grad_norm": 0.308392724398636, - "learning_rate": 2.7754026456017104e-07, - "loss": 0.2932, + "epoch": 0.74, + "grad_norm": 0.8003012011282131, + "learning_rate": 3.310463309170259e-06, + "loss": 0.4116, "step": 16136 }, { - "epoch": 0.93, - "grad_norm": 0.4612725789887395, - "learning_rate": 2.7710505186451684e-07, - "loss": 0.2619, + "epoch": 0.74, + "grad_norm": 0.3861248574902254, + "learning_rate": 3.309357406759647e-06, + "loss": 0.329, "step": 16137 }, { - "epoch": 0.93, - "grad_norm": 0.6119165445617544, - "learning_rate": 2.766701758722967e-07, - "loss": 0.3625, + "epoch": 0.74, + "grad_norm": 0.30955351571782025, + "learning_rate": 3.30825165247342e-06, + "loss": 0.1786, "step": 16138 }, { - "epoch": 0.93, - "grad_norm": 0.3401083159351843, - "learning_rate": 2.7623563659857186e-07, - "loss": 0.3047, + "epoch": 0.74, + "grad_norm": 0.5188374830472456, + "learning_rate": 3.3071460463360485e-06, + "loss": 0.2468, "step": 16139 }, { - "epoch": 0.93, - "grad_norm": 0.22356893956156418, - "learning_rate": 2.758014340583859e-07, - "loss": 0.216, + "epoch": 0.74, + "grad_norm": 0.7149262720806215, + "learning_rate": 3.306040588372017e-06, + "loss": 0.4317, "step": 16140 }, { - "epoch": 0.93, - "grad_norm": 1.9203710855895133, - "learning_rate": 2.7536756826678e-07, - "loss": 0.199, + "epoch": 0.74, + "grad_norm": 0.358192855929158, + "learning_rate": 3.304935278605791e-06, + "loss": 0.1965, "step": 16141 }, { - "epoch": 0.93, - "grad_norm": 0.3429263728146882, - "learning_rate": 2.749340392387767e-07, - "loss": 0.2606, + "epoch": 0.74, + "grad_norm": 0.29734969951174195, + "learning_rate": 3.303830117061846e-06, + "loss": 0.2347, "step": 16142 }, { - "epoch": 0.93, - "grad_norm": 0.6493811930991484, - "learning_rate": 2.745008469893884e-07, - "loss": 0.3888, + "epoch": 0.74, + "grad_norm": 1.2724837161518323, + "learning_rate": 3.3027251037646504e-06, + "loss": 0.5205, "step": 16143 }, { - "epoch": 0.93, - "grad_norm": 0.3245298437449268, - "learning_rate": 2.740679915336175e-07, - "loss": 0.2407, + "epoch": 0.74, + "grad_norm": 0.317019692791806, + "learning_rate": 3.301620238738664e-06, + "loss": 0.1878, "step": 16144 }, { - "epoch": 0.93, - "grad_norm": 0.3407142144619474, - "learning_rate": 2.7363547288645544e-07, - "loss": 0.3001, + "epoch": 0.74, + "grad_norm": 0.2936610601476582, + "learning_rate": 3.3005155220083485e-06, + "loss": 0.2479, "step": 16145 }, { - "epoch": 0.93, - "grad_norm": 0.4127223640520308, - "learning_rate": 2.732032910628779e-07, - "loss": 0.2387, + "epoch": 0.74, + "grad_norm": 0.9829195222449634, + "learning_rate": 3.2994109535981666e-06, + "loss": 0.5056, "step": 16146 }, { - "epoch": 0.93, - "grad_norm": 0.4410479453956271, - "learning_rate": 2.727714460778519e-07, - "loss": 0.0986, + "epoch": 0.74, + "grad_norm": 0.3818905881398835, + "learning_rate": 3.2983065335325636e-06, + "loss": 0.2155, "step": 16147 }, { - "epoch": 0.93, - "grad_norm": 0.23114652387431825, - "learning_rate": 2.7233993794633205e-07, - "loss": 0.2363, + "epoch": 0.74, + "grad_norm": 0.5160790801911431, + "learning_rate": 3.297202261835999e-06, + "loss": 0.2506, "step": 16148 }, { - "epoch": 0.93, - "grad_norm": 0.7707876240880364, - "learning_rate": 2.7190876668326207e-07, - "loss": 0.4322, + "epoch": 0.74, + "grad_norm": 0.4499971818936267, + "learning_rate": 3.2960981385329094e-06, + "loss": 0.3013, "step": 16149 }, { - "epoch": 0.93, - "grad_norm": 0.5681333562696601, - "learning_rate": 2.7147793230357434e-07, - "loss": 0.3007, + "epoch": 0.74, + "grad_norm": 0.359607336901922, + "learning_rate": 3.2949941636477523e-06, + "loss": 0.2383, "step": 16150 }, { - "epoch": 0.93, - "grad_norm": 0.334019256141992, - "learning_rate": 2.71047434822187e-07, - "loss": 0.2636, + "epoch": 0.74, + "grad_norm": 0.9870576953504104, + "learning_rate": 3.293890337204959e-06, + "loss": 0.3757, "step": 16151 }, { - "epoch": 0.93, - "grad_norm": 0.25145171079300954, - "learning_rate": 2.7061727425400695e-07, - "loss": 0.2368, + "epoch": 0.74, + "grad_norm": 0.9284451036292902, + "learning_rate": 3.2927866592289725e-06, + "loss": 0.3442, "step": 16152 }, { - "epoch": 0.93, - "grad_norm": 0.37370438219164975, - "learning_rate": 2.701874506139335e-07, - "loss": 0.2354, + "epoch": 0.74, + "grad_norm": 0.30458095187051076, + "learning_rate": 3.2916831297442255e-06, + "loss": 0.2601, "step": 16153 }, { - "epoch": 0.93, - "grad_norm": 0.32680459189413763, - "learning_rate": 2.6975796391685125e-07, - "loss": 0.1946, + "epoch": 0.74, + "grad_norm": 0.22536001354157525, + "learning_rate": 3.2905797487751424e-06, + "loss": 0.1459, "step": 16154 }, { - "epoch": 0.93, - "grad_norm": 0.5853293839882767, - "learning_rate": 2.6932881417763067e-07, - "loss": 0.342, + "epoch": 0.74, + "grad_norm": 1.1420696542265183, + "learning_rate": 3.289476516346163e-06, + "loss": 0.4822, "step": 16155 }, { - "epoch": 0.93, - "grad_norm": 0.3247975509729384, - "learning_rate": 2.689000014111365e-07, - "loss": 0.3094, + "epoch": 0.74, + "grad_norm": 0.4525238884397799, + "learning_rate": 3.288373432481703e-06, + "loss": 0.2864, "step": 16156 }, { - "epoch": 0.93, - "grad_norm": 0.33160684037157295, - "learning_rate": 2.684715256322146e-07, - "loss": 0.2061, + "epoch": 0.74, + "grad_norm": 0.30112622202575445, + "learning_rate": 3.2872704972061884e-06, + "loss": 0.2486, "step": 16157 }, { - "epoch": 0.93, - "grad_norm": 0.36609071143521194, - "learning_rate": 2.680433868557064e-07, - "loss": 0.2004, + "epoch": 0.74, + "grad_norm": 0.7537088589824122, + "learning_rate": 3.2861677105440335e-06, + "loss": 0.3805, "step": 16158 }, { - "epoch": 0.93, - "grad_norm": 1.2305571895568488, - "learning_rate": 2.676155850964379e-07, - "loss": 0.4733, + "epoch": 0.74, + "grad_norm": 0.3980597895663537, + "learning_rate": 3.2850650725196543e-06, + "loss": 0.2373, "step": 16159 }, { - "epoch": 0.93, - "grad_norm": 0.22411989695587825, - "learning_rate": 2.6718812036922283e-07, - "loss": 0.2119, + "epoch": 0.74, + "grad_norm": 0.2313688817433475, + "learning_rate": 3.2839625831574653e-06, + "loss": 0.135, "step": 16160 }, { - "epoch": 0.93, - "grad_norm": 0.6334435102540285, - "learning_rate": 2.6676099268886367e-07, - "loss": 0.3685, + "epoch": 0.74, + "grad_norm": 0.3854610936153916, + "learning_rate": 3.2828602424818677e-06, + "loss": 0.2898, "step": 16161 }, { - "epoch": 0.93, - "grad_norm": 1.2434403340381204, - "learning_rate": 2.6633420207015426e-07, - "loss": 0.8358, + "epoch": 0.74, + "grad_norm": 0.4087413720311877, + "learning_rate": 3.2817580505172717e-06, + "loss": 0.2605, "step": 16162 }, { - "epoch": 0.93, - "grad_norm": 0.2768553025982602, - "learning_rate": 2.659077485278716e-07, - "loss": 0.1891, + "epoch": 0.74, + "grad_norm": 0.5659023460892615, + "learning_rate": 3.28065600728808e-06, + "loss": 0.3543, "step": 16163 }, { - "epoch": 0.93, - "grad_norm": 1.8087700309256254, - "learning_rate": 2.6548163207678614e-07, - "loss": 0.3039, + "epoch": 0.74, + "grad_norm": 0.7191671636883915, + "learning_rate": 3.2795541128186848e-06, + "loss": 0.3054, "step": 16164 }, { - "epoch": 0.93, - "grad_norm": 0.3263779764663397, - "learning_rate": 2.6505585273165156e-07, - "loss": 0.1664, + "epoch": 0.74, + "grad_norm": 0.2765105041215185, + "learning_rate": 3.278452367133488e-06, + "loss": 0.2576, "step": 16165 }, { - "epoch": 0.93, - "grad_norm": 0.31180764115083154, - "learning_rate": 2.6463041050721615e-07, - "loss": 0.2553, + "epoch": 0.74, + "grad_norm": 0.2611474019458847, + "learning_rate": 3.277350770256873e-06, + "loss": 0.1902, "step": 16166 }, { - "epoch": 0.93, - "grad_norm": 0.8241373179238439, - "learning_rate": 2.642053054182103e-07, - "loss": 0.3087, + "epoch": 0.74, + "grad_norm": 1.6018837010269276, + "learning_rate": 3.2762493222132342e-06, + "loss": 0.2351, "step": 16167 }, { - "epoch": 0.93, - "grad_norm": 0.3446713828640923, - "learning_rate": 2.637805374793556e-07, - "loss": 0.3118, + "epoch": 0.74, + "grad_norm": 0.41364887796998884, + "learning_rate": 3.275148023026954e-06, + "loss": 0.2613, "step": 16168 }, { - "epoch": 0.93, - "grad_norm": 0.32013571828818826, - "learning_rate": 2.633561067053625e-07, - "loss": 0.2598, + "epoch": 0.74, + "grad_norm": 0.41165226246115383, + "learning_rate": 3.2740468727224184e-06, + "loss": 0.3007, "step": 16169 }, { - "epoch": 0.93, - "grad_norm": 0.4038815496361895, - "learning_rate": 2.629320131109281e-07, - "loss": 0.1137, + "epoch": 0.74, + "grad_norm": 0.5959572703682788, + "learning_rate": 3.272945871323999e-06, + "loss": 0.298, "step": 16170 }, { - "epoch": 0.93, - "grad_norm": 0.2753926358926855, - "learning_rate": 2.6250825671074065e-07, - "loss": 0.2389, + "epoch": 0.74, + "grad_norm": 0.3267540750924954, + "learning_rate": 3.271845018856075e-06, + "loss": 0.2509, "step": 16171 }, { - "epoch": 0.93, - "grad_norm": 0.4119719794526924, - "learning_rate": 2.6208483751947286e-07, - "loss": 0.2821, + "epoch": 0.74, + "grad_norm": 0.5570804574953516, + "learning_rate": 3.2707443153430206e-06, + "loss": 0.2465, "step": 16172 }, { - "epoch": 0.93, - "grad_norm": 0.47065265482665175, - "learning_rate": 2.6166175555178864e-07, - "loss": 0.2604, + "epoch": 0.74, + "grad_norm": 0.2727921489608086, + "learning_rate": 3.269643760809198e-06, + "loss": 0.1916, "step": 16173 }, { - "epoch": 0.93, - "grad_norm": 0.7827261475775145, - "learning_rate": 2.612390108223395e-07, - "loss": 0.579, + "epoch": 0.74, + "grad_norm": 0.4463482475968113, + "learning_rate": 3.268543355278979e-06, + "loss": 0.2669, "step": 16174 }, { - "epoch": 0.93, - "grad_norm": 0.33742011781888565, - "learning_rate": 2.6081660334576376e-07, - "loss": 0.2539, + "epoch": 0.74, + "grad_norm": 0.47338766101770396, + "learning_rate": 3.267443098776719e-06, + "loss": 0.307, "step": 16175 }, { - "epoch": 0.93, - "grad_norm": 0.35489690817213293, - "learning_rate": 2.6039453313669084e-07, - "loss": 0.224, + "epoch": 0.74, + "grad_norm": 0.4711178204213357, + "learning_rate": 3.2663429913267795e-06, + "loss": 0.3602, "step": 16176 }, { - "epoch": 0.93, - "grad_norm": 0.40363854862203224, - "learning_rate": 2.599728002097346e-07, - "loss": 0.2276, + "epoch": 0.74, + "grad_norm": 0.33058592293801603, + "learning_rate": 3.26524303295352e-06, + "loss": 0.19, "step": 16177 }, { - "epoch": 0.93, - "grad_norm": 0.2998525771419882, - "learning_rate": 2.5955140457950334e-07, - "loss": 0.254, + "epoch": 0.74, + "grad_norm": 0.3022331547532038, + "learning_rate": 3.2641432236812855e-06, + "loss": 0.2438, "step": 16178 }, { - "epoch": 0.93, - "grad_norm": 0.3225328457185091, - "learning_rate": 2.591303462605876e-07, - "loss": 0.2568, + "epoch": 0.74, + "grad_norm": 0.5042215389006428, + "learning_rate": 3.2630435635344283e-06, + "loss": 0.257, "step": 16179 }, { - "epoch": 0.93, - "grad_norm": 0.40344121357429547, - "learning_rate": 2.587096252675703e-07, - "loss": 0.2584, + "epoch": 0.74, + "grad_norm": 0.39676000572111725, + "learning_rate": 3.2619440525372927e-06, + "loss": 0.1999, "step": 16180 }, { - "epoch": 0.93, - "grad_norm": 0.4726299423223656, - "learning_rate": 2.5828924161501847e-07, - "loss": 0.2424, + "epoch": 0.74, + "grad_norm": 0.3088026956957434, + "learning_rate": 3.2608446907142244e-06, + "loss": 0.2939, "step": 16181 }, { - "epoch": 0.93, - "grad_norm": 1.2427679629599802, - "learning_rate": 2.5786919531749056e-07, - "loss": 0.5494, + "epoch": 0.74, + "grad_norm": 0.8325122734946345, + "learning_rate": 3.25974547808956e-06, + "loss": 0.5128, "step": 16182 }, { - "epoch": 0.93, - "grad_norm": 0.3302371945217039, - "learning_rate": 2.5744948638953495e-07, - "loss": 0.2585, + "epoch": 0.74, + "grad_norm": 0.3482061112546673, + "learning_rate": 3.258646414687632e-06, + "loss": 0.1943, "step": 16183 }, { - "epoch": 0.93, - "grad_norm": 0.2960406165691513, - "learning_rate": 2.570301148456833e-07, - "loss": 0.246, + "epoch": 0.74, + "grad_norm": 0.2761731327942557, + "learning_rate": 3.257547500532774e-06, + "loss": 0.1905, "step": 16184 }, { - "epoch": 0.93, - "grad_norm": 0.22857777782773264, - "learning_rate": 2.566110807004618e-07, - "loss": 0.1342, + "epoch": 0.74, + "grad_norm": 0.35580695012609526, + "learning_rate": 3.2564487356493157e-06, + "loss": 0.2877, "step": 16185 }, { - "epoch": 0.93, - "grad_norm": 0.9455741587639533, - "learning_rate": 2.5619238396837665e-07, - "loss": 0.5122, + "epoch": 0.74, + "grad_norm": 0.37528078305626134, + "learning_rate": 3.2553501200615858e-06, + "loss": 0.2211, "step": 16186 }, { - "epoch": 0.93, - "grad_norm": 0.3025016716582683, - "learning_rate": 2.5577402466393286e-07, - "loss": 0.238, + "epoch": 0.74, + "grad_norm": 0.5215771798823283, + "learning_rate": 3.2542516537939005e-06, + "loss": 0.3201, "step": 16187 }, { - "epoch": 0.93, - "grad_norm": 0.39578317038127414, - "learning_rate": 2.5535600280161444e-07, - "loss": 0.3052, + "epoch": 0.74, + "grad_norm": 1.060089521871366, + "learning_rate": 3.2531533368705828e-06, + "loss": 0.6665, "step": 16188 }, { - "epoch": 0.93, - "grad_norm": 0.5815913779636789, - "learning_rate": 2.549383183958998e-07, - "loss": 0.3511, + "epoch": 0.74, + "grad_norm": 0.31100061257219114, + "learning_rate": 3.252055169315951e-06, + "loss": 0.2582, "step": 16189 }, { - "epoch": 0.93, - "grad_norm": 0.38518298786026156, - "learning_rate": 2.5452097146125063e-07, - "loss": 0.1683, + "epoch": 0.74, + "grad_norm": 0.39891706900082424, + "learning_rate": 3.250957151154309e-06, + "loss": 0.2199, "step": 16190 }, { - "epoch": 0.93, - "grad_norm": 0.22205133621720435, - "learning_rate": 2.5410396201212105e-07, - "loss": 0.2007, + "epoch": 0.74, + "grad_norm": 0.3428219125410267, + "learning_rate": 3.249859282409976e-06, + "loss": 0.2199, "step": 16191 }, { - "epoch": 0.93, - "grad_norm": 0.5057323622641279, - "learning_rate": 2.536872900629539e-07, - "loss": 0.3944, + "epoch": 0.74, + "grad_norm": 0.5703773592059471, + "learning_rate": 3.248761563107249e-06, + "loss": 0.3302, "step": 16192 }, { - "epoch": 0.93, - "grad_norm": 0.3595273845184508, - "learning_rate": 2.5327095562817537e-07, - "loss": 0.1673, + "epoch": 0.74, + "grad_norm": 0.2669964828374416, + "learning_rate": 3.2476639932704335e-06, + "loss": 0.2284, "step": 16193 }, { - "epoch": 0.93, - "grad_norm": 0.5360787331974713, - "learning_rate": 2.528549587222051e-07, - "loss": 0.3074, + "epoch": 0.74, + "grad_norm": 1.230687750757218, + "learning_rate": 3.246566572923833e-06, + "loss": 0.8005, "step": 16194 }, { - "epoch": 0.93, - "grad_norm": 0.442058729074706, - "learning_rate": 2.524392993594482e-07, - "loss": 0.3311, + "epoch": 0.74, + "grad_norm": 0.8898841504817586, + "learning_rate": 3.245469302091735e-06, + "loss": 0.455, "step": 16195 }, { - "epoch": 0.93, - "grad_norm": 0.2855255735605113, - "learning_rate": 2.5202397755429876e-07, - "loss": 0.2012, + "epoch": 0.74, + "grad_norm": 0.26300311296790924, + "learning_rate": 3.244372180798441e-06, + "loss": 0.1681, "step": 16196 }, { - "epoch": 0.93, - "grad_norm": 0.23458277362119945, - "learning_rate": 2.5160899332114075e-07, - "loss": 0.1566, + "epoch": 0.74, + "grad_norm": 0.3113994946352287, + "learning_rate": 3.2432752090682286e-06, + "loss": 0.2479, "step": 16197 }, { - "epoch": 0.93, - "grad_norm": 1.307433304730941, - "learning_rate": 2.5119434667434384e-07, - "loss": 0.7377, + "epoch": 0.74, + "grad_norm": 0.7435028702934103, + "learning_rate": 3.2421783869253985e-06, + "loss": 0.3297, "step": 16198 }, { - "epoch": 0.93, - "grad_norm": 0.3418249915562307, - "learning_rate": 2.507800376282665e-07, - "loss": 0.1928, + "epoch": 0.74, + "grad_norm": 0.3689499863966752, + "learning_rate": 3.2410817143942207e-06, + "loss": 0.2076, "step": 16199 }, { - "epoch": 0.93, - "grad_norm": 0.3322723765898175, - "learning_rate": 2.5036606619725847e-07, - "loss": 0.3077, + "epoch": 0.74, + "grad_norm": 1.349573231309148, + "learning_rate": 3.2399851914989842e-06, + "loss": 0.7598, "step": 16200 }, { - "epoch": 0.93, - "grad_norm": 0.7679882980968665, - "learning_rate": 2.499524323956537e-07, - "loss": 0.4053, - "step": 16201 + "epoch": 0.74, + "grad_norm": 0.31551649890808037, + "learning_rate": 3.2388888182639566e-06, + "loss": 0.2445, + "step": 16201 }, { - "epoch": 0.93, - "grad_norm": 0.5105729769391152, - "learning_rate": 2.495391362377775e-07, - "loss": 0.2702, + "epoch": 0.74, + "grad_norm": 0.472714425087857, + "learning_rate": 3.2377925947134137e-06, + "loss": 0.3129, "step": 16202 }, { - "epoch": 0.93, - "grad_norm": 0.20248897495158377, - "learning_rate": 2.4912617773794057e-07, - "loss": 0.16, + "epoch": 0.74, + "grad_norm": 0.8052972015991857, + "learning_rate": 3.236696520871628e-06, + "loss": 0.2973, "step": 16203 }, { - "epoch": 0.93, - "grad_norm": 0.45523324068273235, - "learning_rate": 2.4871355691044595e-07, - "loss": 0.3446, + "epoch": 0.74, + "grad_norm": 0.3036751973221981, + "learning_rate": 3.23560059676286e-06, + "loss": 0.2193, "step": 16204 }, { - "epoch": 0.93, - "grad_norm": 0.31116609474106843, - "learning_rate": 2.4830127376958113e-07, - "loss": 0.2482, + "epoch": 0.74, + "grad_norm": 0.27859658895535727, + "learning_rate": 3.2345048224113764e-06, + "loss": 0.2303, "step": 16205 }, { - "epoch": 0.93, - "grad_norm": 0.7147870871308801, - "learning_rate": 2.4788932832962465e-07, - "loss": 0.2833, + "epoch": 0.74, + "grad_norm": 0.9480435756547749, + "learning_rate": 3.233409197841437e-06, + "loss": 0.5324, "step": 16206 }, { - "epoch": 0.93, - "grad_norm": 0.36457125106980254, - "learning_rate": 2.4747772060484064e-07, - "loss": 0.2912, + "epoch": 0.74, + "grad_norm": 0.6439513062890482, + "learning_rate": 3.2323137230772937e-06, + "loss": 0.2633, "step": 16207 }, { - "epoch": 0.93, - "grad_norm": 0.5240461723284267, - "learning_rate": 2.470664506094844e-07, - "loss": 0.2765, + "epoch": 0.74, + "grad_norm": 0.4078880036330967, + "learning_rate": 3.231218398143204e-06, + "loss": 0.2929, "step": 16208 }, { - "epoch": 0.93, - "grad_norm": 0.22721409677761747, - "learning_rate": 2.466555183577968e-07, - "loss": 0.146, + "epoch": 0.74, + "grad_norm": 0.4691604734785131, + "learning_rate": 3.2301232230634104e-06, + "loss": 0.27, "step": 16209 }, { - "epoch": 0.93, - "grad_norm": 0.8567658532302702, - "learning_rate": 2.462449238640097e-07, - "loss": 0.5097, + "epoch": 0.74, + "grad_norm": 0.3091572642571224, + "learning_rate": 3.2290281978621695e-06, + "loss": 0.166, "step": 16210 }, { - "epoch": 0.93, - "grad_norm": 0.37293244224659416, - "learning_rate": 2.458346671423406e-07, - "loss": 0.2645, + "epoch": 0.74, + "grad_norm": 0.40168543540251894, + "learning_rate": 3.227933322563718e-06, + "loss": 0.2813, "step": 16211 }, { - "epoch": 0.93, - "grad_norm": 0.27145054299585214, - "learning_rate": 2.4542474820699823e-07, - "loss": 0.244, + "epoch": 0.74, + "grad_norm": 0.49317599406421814, + "learning_rate": 3.226838597192292e-06, + "loss": 0.3843, "step": 16212 }, { - "epoch": 0.93, - "grad_norm": 1.3981516737715696, - "learning_rate": 2.450151670721768e-07, - "loss": 0.4875, + "epoch": 0.74, + "grad_norm": 0.4895074202392746, + "learning_rate": 3.225744021772136e-06, + "loss": 0.2281, "step": 16213 }, { - "epoch": 0.93, - "grad_norm": 0.5986289510427129, - "learning_rate": 2.446059237520615e-07, - "loss": 0.2993, + "epoch": 0.74, + "grad_norm": 0.386088206921874, + "learning_rate": 3.2246495963274713e-06, + "loss": 0.2599, "step": 16214 }, { - "epoch": 0.93, - "grad_norm": 0.22481400806917337, - "learning_rate": 2.441970182608222e-07, - "loss": 0.2347, + "epoch": 0.74, + "grad_norm": 0.7846604284698759, + "learning_rate": 3.2235553208825398e-06, + "loss": 0.3185, "step": 16215 }, { - "epoch": 0.93, - "grad_norm": 0.30778683391514594, - "learning_rate": 2.43788450612622e-07, - "loss": 0.1411, + "epoch": 0.74, + "grad_norm": 0.2870618483940645, + "learning_rate": 3.22246119546156e-06, + "loss": 0.1692, "step": 16216 }, { - "epoch": 0.93, - "grad_norm": 0.41000123055651855, - "learning_rate": 2.433802208216085e-07, - "loss": 0.2763, + "epoch": 0.74, + "grad_norm": 0.25390974017381585, + "learning_rate": 3.22136722008876e-06, + "loss": 0.2158, "step": 16217 }, { - "epoch": 0.93, - "grad_norm": 0.4744266292202893, - "learning_rate": 2.42972328901917e-07, - "loss": 0.3694, + "epoch": 0.75, + "grad_norm": 1.3354459556541762, + "learning_rate": 3.2202733947883536e-06, + "loss": 0.7886, "step": 16218 }, { - "epoch": 0.93, - "grad_norm": 0.3541890062856887, - "learning_rate": 2.4256477486767517e-07, - "loss": 0.2324, + "epoch": 0.75, + "grad_norm": 0.7585343414335117, + "learning_rate": 3.2191797195845597e-06, + "loss": 0.2318, "step": 16219 }, { - "epoch": 0.93, - "grad_norm": 0.41346592434653073, - "learning_rate": 2.4215755873299497e-07, - "loss": 0.2446, + "epoch": 0.75, + "grad_norm": 0.31746914090406975, + "learning_rate": 3.218086194501595e-06, + "loss": 0.2543, "step": 16220 }, { - "epoch": 0.93, - "grad_norm": 0.5884279680128669, - "learning_rate": 2.4175068051197957e-07, - "loss": 0.3041, + "epoch": 0.75, + "grad_norm": 0.494010222246306, + "learning_rate": 3.2169928195636612e-06, + "loss": 0.3474, "step": 16221 }, { - "epoch": 0.93, - "grad_norm": 0.4402183600759972, - "learning_rate": 2.413441402187178e-07, - "loss": 0.2166, + "epoch": 0.75, + "grad_norm": 0.6252414722084878, + "learning_rate": 3.21589959479497e-06, + "loss": 0.1265, "step": 16222 }, { - "epoch": 0.93, - "grad_norm": 0.2384371145087381, - "learning_rate": 2.4093793786728934e-07, - "loss": 0.2403, + "epoch": 0.75, + "grad_norm": 0.3141381262047527, + "learning_rate": 3.2148065202197255e-06, + "loss": 0.2307, "step": 16223 }, { - "epoch": 0.93, - "grad_norm": 0.4502437190455561, - "learning_rate": 2.4053207347175976e-07, - "loss": 0.3311, + "epoch": 0.75, + "grad_norm": 0.52698450431804, + "learning_rate": 3.2137135958621225e-06, + "loss": 0.4001, "step": 16224 }, { - "epoch": 0.93, - "grad_norm": 0.4684769525518414, - "learning_rate": 2.401265470461855e-07, - "loss": 0.12, + "epoch": 0.75, + "grad_norm": 0.31190476523618244, + "learning_rate": 3.212620821746362e-06, + "loss": 0.2172, "step": 16225 }, { - "epoch": 0.93, - "grad_norm": 0.5547322450352692, - "learning_rate": 2.397213586046099e-07, - "loss": 0.2999, + "epoch": 0.75, + "grad_norm": 0.3681224993063705, + "learning_rate": 3.2115281978966316e-06, + "loss": 0.2066, "step": 16226 }, { - "epoch": 0.93, - "grad_norm": 0.2818633700967168, - "learning_rate": 2.3931650816106267e-07, - "loss": 0.2502, + "epoch": 0.75, + "grad_norm": 0.7846341275647539, + "learning_rate": 3.2104357243371252e-06, + "loss": 0.4236, "step": 16227 }, { - "epoch": 0.93, - "grad_norm": 0.4363782611754942, - "learning_rate": 2.3891199572956493e-07, - "loss": 0.3209, + "epoch": 0.75, + "grad_norm": 0.3708710206772514, + "learning_rate": 3.20934340109203e-06, + "loss": 0.2855, "step": 16228 }, { - "epoch": 0.93, - "grad_norm": 0.47684193737555375, - "learning_rate": 2.3850782132412544e-07, - "loss": 0.2063, + "epoch": 0.75, + "grad_norm": 0.23886403852464788, + "learning_rate": 3.2082512281855247e-06, + "loss": 0.155, "step": 16229 }, { - "epoch": 0.93, - "grad_norm": 0.3005978735016527, - "learning_rate": 2.3810398495874076e-07, - "loss": 0.2733, + "epoch": 0.75, + "grad_norm": 0.4364437519522074, + "learning_rate": 3.2071592056417944e-06, + "loss": 0.3, "step": 16230 }, { - "epoch": 0.93, - "grad_norm": 0.2758614772557928, - "learning_rate": 2.377004866473953e-07, - "loss": 0.252, + "epoch": 0.75, + "grad_norm": 0.7605167157607732, + "learning_rate": 3.2060673334850056e-06, + "loss": 0.3923, "step": 16231 }, { - "epoch": 0.93, - "grad_norm": 0.5232494032312904, - "learning_rate": 2.3729732640406233e-07, - "loss": 0.0864, + "epoch": 0.75, + "grad_norm": 0.2771228307827486, + "learning_rate": 3.2049756117393437e-06, + "loss": 0.2217, "step": 16232 }, { - "epoch": 0.93, - "grad_norm": 0.43196860650976154, - "learning_rate": 2.3689450424270179e-07, - "loss": 0.2845, + "epoch": 0.75, + "grad_norm": 0.5539491551945313, + "learning_rate": 3.2038840404289706e-06, + "loss": 0.3297, "step": 16233 }, { - "epoch": 0.93, - "grad_norm": 0.6300939297391976, - "learning_rate": 2.364920201772658e-07, - "loss": 0.3533, + "epoch": 0.75, + "grad_norm": 1.4625646902945728, + "learning_rate": 3.202792619578057e-06, + "loss": 0.4737, "step": 16234 }, { - "epoch": 0.93, - "grad_norm": 0.24702854532908614, - "learning_rate": 2.3608987422169106e-07, - "loss": 0.2239, + "epoch": 0.75, + "grad_norm": 0.3215110002947949, + "learning_rate": 3.2017013492107608e-06, + "loss": 0.2073, "step": 16235 }, { - "epoch": 0.93, - "grad_norm": 0.5147166917845664, - "learning_rate": 2.3568806638990527e-07, - "loss": 0.384, + "epoch": 0.75, + "grad_norm": 0.5354099619295052, + "learning_rate": 3.2006102293512443e-06, + "loss": 0.3519, "step": 16236 }, { - "epoch": 0.93, - "grad_norm": 0.34917477325603674, - "learning_rate": 2.3528659669581954e-07, - "loss": 0.1735, + "epoch": 0.75, + "grad_norm": 0.36175366057772973, + "learning_rate": 3.199519260023667e-06, + "loss": 0.2581, "step": 16237 }, { - "epoch": 0.93, - "grad_norm": 0.35170576502926393, - "learning_rate": 2.3488546515334054e-07, - "loss": 0.1596, + "epoch": 0.75, + "grad_norm": 0.35628914783933086, + "learning_rate": 3.198428441252176e-06, + "loss": 0.2629, "step": 16238 }, { - "epoch": 0.93, - "grad_norm": 0.35057358099260666, - "learning_rate": 2.3448467177635826e-07, - "loss": 0.291, + "epoch": 0.75, + "grad_norm": 0.45489883960552546, + "learning_rate": 3.1973377730609233e-06, + "loss": 0.1047, "step": 16239 }, { - "epoch": 0.93, - "grad_norm": 0.43437427364444037, - "learning_rate": 2.3408421657875158e-07, - "loss": 0.3281, + "epoch": 0.75, + "grad_norm": 0.38334324049362095, + "learning_rate": 3.1962472554740598e-06, + "loss": 0.2789, "step": 16240 }, { - "epoch": 0.93, - "grad_norm": 0.4994355097839041, - "learning_rate": 2.3368409957438832e-07, - "loss": 0.3334, + "epoch": 0.75, + "grad_norm": 0.36390072735771056, + "learning_rate": 3.19515688851572e-06, + "loss": 0.2786, "step": 16241 }, { - "epoch": 0.93, - "grad_norm": 0.3989427545060095, - "learning_rate": 2.3328432077712516e-07, - "loss": 0.2308, + "epoch": 0.75, + "grad_norm": 0.8324471952602299, + "learning_rate": 3.1940666722100534e-06, + "loss": 0.3545, "step": 16242 }, { - "epoch": 0.93, - "grad_norm": 0.22736027420925461, - "learning_rate": 2.3288488020080546e-07, - "loss": 0.2094, + "epoch": 0.75, + "grad_norm": 0.3889448092239731, + "learning_rate": 3.1929766065811864e-06, + "loss": 0.2609, "step": 16243 }, { - "epoch": 0.93, - "grad_norm": 1.355549695082054, - "learning_rate": 2.3248577785926264e-07, - "loss": 0.3942, + "epoch": 0.75, + "grad_norm": 0.41634608796716804, + "learning_rate": 3.1918866916532564e-06, + "loss": 0.2746, "step": 16244 }, { - "epoch": 0.93, - "grad_norm": 0.3061093657317648, - "learning_rate": 2.3208701376631672e-07, - "loss": 0.2012, + "epoch": 0.75, + "grad_norm": 0.34839782341010006, + "learning_rate": 3.1907969274503945e-06, + "loss": 0.2046, "step": 16245 }, { - "epoch": 0.93, - "grad_norm": 0.4283494643377407, - "learning_rate": 2.3168858793577774e-07, - "loss": 0.3167, + "epoch": 0.75, + "grad_norm": 0.9107890104900368, + "learning_rate": 3.189707313996728e-06, + "loss": 0.3565, "step": 16246 }, { - "epoch": 0.93, - "grad_norm": 0.40586491113206713, - "learning_rate": 2.3129050038144362e-07, - "loss": 0.3005, + "epoch": 0.75, + "grad_norm": 0.40345173366511333, + "learning_rate": 3.1886178513163777e-06, + "loss": 0.2946, "step": 16247 }, { - "epoch": 0.93, - "grad_norm": 0.30763307469813905, - "learning_rate": 2.3089275111709886e-07, - "loss": 0.2203, + "epoch": 0.75, + "grad_norm": 0.35850114949904954, + "learning_rate": 3.1875285394334575e-06, + "loss": 0.2651, "step": 16248 }, { - "epoch": 0.93, - "grad_norm": 0.2669467232153082, - "learning_rate": 2.3049534015651686e-07, - "loss": 0.1807, + "epoch": 0.75, + "grad_norm": 0.8772466115033867, + "learning_rate": 3.186439378372096e-06, + "loss": 0.42, "step": 16249 }, { - "epoch": 0.93, - "grad_norm": 0.7019907586813147, - "learning_rate": 2.3009826751346332e-07, - "loss": 0.3552, + "epoch": 0.75, + "grad_norm": 0.352078792253974, + "learning_rate": 3.1853503681563957e-06, + "loss": 0.2611, "step": 16250 }, { - "epoch": 0.93, - "grad_norm": 0.2421896836266599, - "learning_rate": 2.2970153320168498e-07, - "loss": 0.2243, + "epoch": 0.75, + "grad_norm": 0.2530405286896428, + "learning_rate": 3.1842615088104744e-06, + "loss": 0.1279, "step": 16251 }, { - "epoch": 0.93, - "grad_norm": 0.8531496453021868, - "learning_rate": 2.293051372349231e-07, - "loss": 0.404, + "epoch": 0.75, + "grad_norm": 0.39104832882246765, + "learning_rate": 3.1831728003584308e-06, + "loss": 0.237, "step": 16252 }, { - "epoch": 0.93, - "grad_norm": 0.7552706084359576, - "learning_rate": 2.2890907962690335e-07, - "loss": 0.548, + "epoch": 0.75, + "grad_norm": 0.3536058828209409, + "learning_rate": 3.1820842428243704e-06, + "loss": 0.2605, "step": 16253 }, { - "epoch": 0.93, - "grad_norm": 0.3945319567289995, - "learning_rate": 2.2851336039134363e-07, - "loss": 0.2455, + "epoch": 0.75, + "grad_norm": 1.0349105712453652, + "learning_rate": 3.1809958362323977e-06, + "loss": 0.4249, "step": 16254 }, { - "epoch": 0.93, - "grad_norm": 0.21129773765826465, - "learning_rate": 2.2811797954194527e-07, - "loss": 0.172, + "epoch": 0.75, + "grad_norm": 1.0926383859800037, + "learning_rate": 3.1799075806066016e-06, + "loss": 0.314, "step": 16255 }, { - "epoch": 0.93, - "grad_norm": 0.5848773278732337, - "learning_rate": 2.2772293709240168e-07, - "loss": 0.3105, + "epoch": 0.75, + "grad_norm": 0.2862033288147034, + "learning_rate": 3.178819475971078e-06, + "loss": 0.2402, "step": 16256 }, { - "epoch": 0.93, - "grad_norm": 0.39209953000005016, - "learning_rate": 2.2732823305639197e-07, - "loss": 0.3028, + "epoch": 0.75, + "grad_norm": 0.23609206964331267, + "learning_rate": 3.1777315223499193e-06, + "loss": 0.1803, "step": 16257 }, { - "epoch": 0.93, - "grad_norm": 0.45565939341834677, - "learning_rate": 2.2693386744758405e-07, - "loss": 0.2577, + "epoch": 0.75, + "grad_norm": 1.203041357848546, + "learning_rate": 3.1766437197672074e-06, + "loss": 0.3383, "step": 16258 }, { - "epoch": 0.93, - "grad_norm": 0.3638561329873051, - "learning_rate": 2.2653984027963704e-07, - "loss": 0.2771, + "epoch": 0.75, + "grad_norm": 0.33612138284106663, + "learning_rate": 3.17555606824703e-06, + "loss": 0.2411, "step": 16259 }, { - "epoch": 0.93, - "grad_norm": 0.3977592178558374, - "learning_rate": 2.2614615156619556e-07, - "loss": 0.3164, + "epoch": 0.75, + "grad_norm": 0.45627768613669034, + "learning_rate": 3.174468567813461e-06, + "loss": 0.2937, "step": 16260 }, { - "epoch": 0.93, - "grad_norm": 0.2486224244899392, - "learning_rate": 2.2575280132089315e-07, - "loss": 0.0902, + "epoch": 0.75, + "grad_norm": 1.0737717862634149, + "learning_rate": 3.173381218490579e-06, + "loss": 0.3331, "step": 16261 }, { - "epoch": 0.93, - "grad_norm": 0.41724306546277456, - "learning_rate": 2.2535978955734895e-07, - "loss": 0.2994, + "epoch": 0.75, + "grad_norm": 0.41803195388632486, + "learning_rate": 3.1722940203024564e-06, + "loss": 0.2787, "step": 16262 }, { - "epoch": 0.93, - "grad_norm": 0.26274641689573147, - "learning_rate": 2.2496711628917644e-07, - "loss": 0.2548, + "epoch": 0.75, + "grad_norm": 0.2359905162798226, + "learning_rate": 3.1712069732731677e-06, + "loss": 0.1631, "step": 16263 }, { - "epoch": 0.93, - "grad_norm": 0.7297326479778752, - "learning_rate": 2.2457478152997148e-07, - "loss": 0.4205, + "epoch": 0.75, + "grad_norm": 0.384828706295368, + "learning_rate": 3.1701200774267714e-06, + "loss": 0.3102, "step": 16264 }, { - "epoch": 0.93, - "grad_norm": 1.0018169427080976, - "learning_rate": 2.2418278529332203e-07, - "loss": 0.4632, + "epoch": 0.75, + "grad_norm": 0.31486668099464993, + "learning_rate": 3.1690333327873348e-06, + "loss": 0.1823, "step": 16265 }, { - "epoch": 0.93, - "grad_norm": 0.3327648178772939, - "learning_rate": 2.2379112759280176e-07, - "loss": 0.2609, + "epoch": 0.75, + "grad_norm": 0.6747652748737898, + "learning_rate": 3.1679467393789185e-06, + "loss": 0.3744, "step": 16266 }, { - "epoch": 0.93, - "grad_norm": 0.23721807132324804, - "learning_rate": 2.2339980844197527e-07, - "loss": 0.2063, + "epoch": 0.75, + "grad_norm": 1.1603402025582956, + "learning_rate": 3.1668602972255733e-06, + "loss": 0.5156, "step": 16267 }, { - "epoch": 0.93, - "grad_norm": 0.5633019302848433, - "learning_rate": 2.2300882785439183e-07, - "loss": 0.2385, + "epoch": 0.75, + "grad_norm": 0.2746123462215477, + "learning_rate": 3.1657740063513596e-06, + "loss": 0.1977, "step": 16268 }, { - "epoch": 0.93, - "grad_norm": 0.39111215606894656, - "learning_rate": 2.2261818584359386e-07, - "loss": 0.2894, + "epoch": 0.75, + "grad_norm": 0.2807841200168824, + "learning_rate": 3.1646878667803183e-06, + "loss": 0.194, "step": 16269 }, { - "epoch": 0.93, - "grad_norm": 1.4038427231112138, - "learning_rate": 2.222278824231061e-07, - "loss": 0.4999, + "epoch": 0.75, + "grad_norm": 1.4841156706818384, + "learning_rate": 3.1636018785364996e-06, + "loss": 0.7687, "step": 16270 }, { - "epoch": 0.93, - "grad_norm": 0.2967884736462052, - "learning_rate": 2.2183791760644668e-07, - "loss": 0.2486, + "epoch": 0.75, + "grad_norm": 0.38761758464745655, + "learning_rate": 3.1625160416439503e-06, + "loss": 0.2115, "step": 16271 }, { - "epoch": 0.93, - "grad_norm": 0.34349186837418483, - "learning_rate": 2.214482914071203e-07, - "loss": 0.2819, + "epoch": 0.75, + "grad_norm": 0.4083625476888615, + "learning_rate": 3.1614303561267025e-06, + "loss": 0.3168, "step": 16272 }, { - "epoch": 0.93, - "grad_norm": 0.8920189124308012, - "learning_rate": 2.2105900383861956e-07, - "loss": 0.4225, + "epoch": 0.75, + "grad_norm": 1.358679343070449, + "learning_rate": 3.1603448220087975e-06, + "loss": 0.7661, "step": 16273 }, { - "epoch": 0.94, - "grad_norm": 0.28684107801549114, - "learning_rate": 2.2067005491442362e-07, - "loss": 0.244, + "epoch": 0.75, + "grad_norm": 0.3133154506054117, + "learning_rate": 3.1592594393142606e-06, + "loss": 0.1822, "step": 16274 }, { - "epoch": 0.94, - "grad_norm": 0.3494953028521881, - "learning_rate": 2.2028144464800393e-07, - "loss": 0.275, + "epoch": 0.75, + "grad_norm": 0.3653618659027755, + "learning_rate": 3.158174208067133e-06, + "loss": 0.1805, "step": 16275 }, { - "epoch": 0.94, - "grad_norm": 0.35670384309614495, - "learning_rate": 2.1989317305281755e-07, - "loss": 0.179, + "epoch": 0.75, + "grad_norm": 0.38146547103855466, + "learning_rate": 3.157089128291434e-06, + "loss": 0.2909, "step": 16276 }, { - "epoch": 0.94, - "grad_norm": 0.996066078136281, - "learning_rate": 2.1950524014231033e-07, - "loss": 0.4171, + "epoch": 0.75, + "grad_norm": 0.337836460064673, + "learning_rate": 3.1560042000111833e-06, + "loss": 0.2575, "step": 16277 }, { - "epoch": 0.94, - "grad_norm": 0.347523126063495, - "learning_rate": 2.191176459299138e-07, - "loss": 0.1977, + "epoch": 0.75, + "grad_norm": 0.7545227231503971, + "learning_rate": 3.154919423250403e-06, + "loss": 0.3293, "step": 16278 }, { - "epoch": 0.94, - "grad_norm": 0.32388881265928143, - "learning_rate": 2.1873039042905497e-07, - "loss": 0.2703, + "epoch": 0.75, + "grad_norm": 0.5563110687884437, + "learning_rate": 3.1538347980331097e-06, + "loss": 0.3406, "step": 16279 }, { - "epoch": 0.94, - "grad_norm": 0.39745643594948593, - "learning_rate": 2.183434736531409e-07, - "loss": 0.2683, + "epoch": 0.75, + "grad_norm": 0.3853232477254108, + "learning_rate": 3.152750324383318e-06, + "loss": 0.2637, "step": 16280 }, { - "epoch": 0.94, - "grad_norm": 0.22006091226414445, - "learning_rate": 2.1795689561557308e-07, - "loss": 0.152, + "epoch": 0.75, + "grad_norm": 0.23044809791306656, + "learning_rate": 3.1516660023250323e-06, + "loss": 0.1553, "step": 16281 }, { - "epoch": 0.94, - "grad_norm": 0.34373104169964486, - "learning_rate": 2.175706563297353e-07, - "loss": 0.2868, + "epoch": 0.75, + "grad_norm": 0.6493105216468711, + "learning_rate": 3.15058183188226e-06, + "loss": 0.407, "step": 16282 }, { - "epoch": 0.94, - "grad_norm": 1.263619920512286, - "learning_rate": 2.1718475580900567e-07, - "loss": 0.7482, + "epoch": 0.75, + "grad_norm": 0.5396976234288307, + "learning_rate": 3.1494978130790088e-06, + "loss": 0.2526, "step": 16283 }, { - "epoch": 0.94, - "grad_norm": 0.29616054478209414, - "learning_rate": 2.16799194066748e-07, - "loss": 0.1906, + "epoch": 0.75, + "grad_norm": 0.26920451758654373, + "learning_rate": 3.148413945939269e-06, + "loss": 0.2372, "step": 16284 }, { - "epoch": 0.94, - "grad_norm": 0.5626197863716071, - "learning_rate": 2.164139711163138e-07, - "loss": 0.3607, + "epoch": 0.75, + "grad_norm": 1.4291774908862382, + "learning_rate": 3.1473302304870445e-06, + "loss": 0.6303, "step": 16285 }, { - "epoch": 0.94, - "grad_norm": 0.3590447014920199, - "learning_rate": 2.160290869710424e-07, - "loss": 0.3023, + "epoch": 0.75, + "grad_norm": 0.4960315488451702, + "learning_rate": 3.146246666746321e-06, + "loss": 0.2994, "step": 16286 }, { - "epoch": 0.94, - "grad_norm": 0.22648512679657767, - "learning_rate": 2.1564454164426207e-07, - "loss": 0.1571, + "epoch": 0.75, + "grad_norm": 0.24110582009741974, + "learning_rate": 3.1451632547410906e-06, + "loss": 0.1567, "step": 16287 }, { - "epoch": 0.94, - "grad_norm": 0.4333570135949622, - "learning_rate": 2.1526033514929213e-07, - "loss": 0.2604, + "epoch": 0.75, + "grad_norm": 0.4766538464147961, + "learning_rate": 3.1440799944953416e-06, + "loss": 0.3154, "step": 16288 }, { - "epoch": 0.94, - "grad_norm": 1.220905292397416, - "learning_rate": 2.1487646749943524e-07, - "loss": 0.5481, + "epoch": 0.75, + "grad_norm": 0.41841794223725176, + "learning_rate": 3.1429968860330505e-06, + "loss": 0.2915, "step": 16289 }, { - "epoch": 0.94, - "grad_norm": 0.32203788282666024, - "learning_rate": 2.1449293870798637e-07, - "loss": 0.2688, + "epoch": 0.75, + "grad_norm": 0.5757099067643165, + "learning_rate": 3.141913929378203e-06, + "loss": 0.3783, "step": 16290 }, { - "epoch": 0.94, - "grad_norm": 0.34693965055254655, - "learning_rate": 2.1410974878822487e-07, - "loss": 0.2552, + "epoch": 0.75, + "grad_norm": 0.547374316905116, + "learning_rate": 3.140831124554765e-06, + "loss": 0.275, "step": 16291 }, { - "epoch": 0.94, - "grad_norm": 0.551396319559656, - "learning_rate": 2.1372689775342238e-07, - "loss": 0.2369, + "epoch": 0.75, + "grad_norm": 0.3341651304373114, + "learning_rate": 3.139748471586721e-06, + "loss": 0.2533, "step": 16292 }, { - "epoch": 0.94, - "grad_norm": 0.30887122673256734, - "learning_rate": 2.1334438561683713e-07, - "loss": 0.2373, + "epoch": 0.75, + "grad_norm": 0.6385625139870543, + "learning_rate": 3.1386659704980305e-06, + "loss": 0.4236, "step": 16293 }, { - "epoch": 0.94, - "grad_norm": 0.22820054330620598, - "learning_rate": 2.1296221239171523e-07, - "loss": 0.1915, + "epoch": 0.75, + "grad_norm": 0.335912281455225, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.1831, "step": 16294 }, { - "epoch": 0.94, - "grad_norm": 1.137231060487121, - "learning_rate": 2.125803780912894e-07, - "loss": 0.7172, + "epoch": 0.75, + "grad_norm": 0.4176407771660147, + "learning_rate": 3.136501424054582e-06, + "loss": 0.2869, "step": 16295 }, { - "epoch": 0.94, - "grad_norm": 0.40545905296169865, - "learning_rate": 2.1219888272878575e-07, - "loss": 0.2602, + "epoch": 0.75, + "grad_norm": 0.3188860843585173, + "learning_rate": 3.1354193787477428e-06, + "loss": 0.2844, "step": 16296 }, { - "epoch": 0.94, - "grad_norm": 0.4730775018614647, - "learning_rate": 2.118177263174137e-07, - "loss": 0.2668, + "epoch": 0.75, + "grad_norm": 1.1090137715507415, + "learning_rate": 3.1343374854161046e-06, + "loss": 0.3907, "step": 16297 }, { - "epoch": 0.94, - "grad_norm": 0.34205215126746363, - "learning_rate": 2.114369088703727e-07, - "loss": 0.2772, + "epoch": 0.75, + "grad_norm": 0.6734148894015325, + "learning_rate": 3.133255744083614e-06, + "loss": 0.3135, "step": 16298 }, { - "epoch": 0.94, - "grad_norm": 0.32729918038453004, - "learning_rate": 2.1105643040085112e-07, - "loss": 0.2464, + "epoch": 0.75, + "grad_norm": 0.40030865225891, + "learning_rate": 3.1321741547742236e-06, + "loss": 0.2746, "step": 16299 }, { - "epoch": 0.94, - "grad_norm": 0.24639056641925142, - "learning_rate": 2.106762909220228e-07, - "loss": 0.1184, + "epoch": 0.75, + "grad_norm": 0.31305484089135516, + "learning_rate": 3.131092717511881e-06, + "loss": 0.2277, "step": 16300 }, { - "epoch": 0.94, - "grad_norm": 0.99073060626058, - "learning_rate": 2.10296490447055e-07, - "loss": 0.4329, + "epoch": 0.75, + "grad_norm": 0.33292710062757813, + "learning_rate": 3.130011432320522e-06, + "loss": 0.1907, "step": 16301 }, { - "epoch": 0.94, - "grad_norm": 0.32121737540817974, - "learning_rate": 2.0991702898909838e-07, - "loss": 0.2518, + "epoch": 0.75, + "grad_norm": 0.4505201344827788, + "learning_rate": 3.128930299224092e-06, + "loss": 0.3272, "step": 16302 }, { - "epoch": 0.94, - "grad_norm": 0.3301117105900323, - "learning_rate": 2.0953790656129457e-07, - "loss": 0.3058, + "epoch": 0.75, + "grad_norm": 0.5691089812626369, + "learning_rate": 3.1278493182465187e-06, + "loss": 0.3862, "step": 16303 }, { - "epoch": 0.94, - "grad_norm": 1.7456922022694272, - "learning_rate": 2.091591231767709e-07, - "loss": 0.1374, + "epoch": 0.75, + "grad_norm": 0.32952246426544163, + "learning_rate": 3.126768489411739e-06, + "loss": 0.1953, "step": 16304 }, { - "epoch": 0.94, - "grad_norm": 0.32467499870042676, - "learning_rate": 2.0878067884864683e-07, - "loss": 0.2509, + "epoch": 0.75, + "grad_norm": 0.42341394819454375, + "learning_rate": 3.125687812743683e-06, + "loss": 0.2967, "step": 16305 }, { - "epoch": 0.94, - "grad_norm": 0.39956950076075687, - "learning_rate": 2.0840257359002635e-07, - "loss": 0.2391, + "epoch": 0.75, + "grad_norm": 0.7190048663580556, + "learning_rate": 3.12460728826627e-06, + "loss": 0.4242, "step": 16306 }, { - "epoch": 0.94, - "grad_norm": 0.3293116288624257, - "learning_rate": 2.0802480741400456e-07, - "loss": 0.2718, + "epoch": 0.75, + "grad_norm": 0.20516648425918738, + "learning_rate": 3.123526916003429e-06, + "loss": 0.1279, "step": 16307 }, { - "epoch": 0.94, - "grad_norm": 0.3131425030863296, - "learning_rate": 2.0764738033366095e-07, - "loss": 0.2571, + "epoch": 0.75, + "grad_norm": 0.322470069226777, + "learning_rate": 3.1224466959790676e-06, + "loss": 0.2994, "step": 16308 }, { - "epoch": 0.94, - "grad_norm": 0.44822453408391827, - "learning_rate": 2.0727029236206953e-07, - "loss": 0.237, + "epoch": 0.75, + "grad_norm": 1.4061965462645851, + "learning_rate": 3.121366628217114e-06, + "loss": 0.6254, "step": 16309 }, { - "epoch": 0.94, - "grad_norm": 0.3492819711372927, - "learning_rate": 2.068935435122854e-07, - "loss": 0.2403, + "epoch": 0.75, + "grad_norm": 0.4219804750964464, + "learning_rate": 3.1202867127414703e-06, + "loss": 0.1984, "step": 16310 }, { - "epoch": 0.94, - "grad_norm": 0.3019190826036198, - "learning_rate": 2.0651713379735706e-07, - "loss": 0.2593, + "epoch": 0.75, + "grad_norm": 0.5561840173192308, + "learning_rate": 3.1192069495760525e-06, + "loss": 0.3241, "step": 16311 }, { - "epoch": 0.94, - "grad_norm": 0.4584884405827178, - "learning_rate": 2.0614106323031846e-07, - "loss": 0.2696, + "epoch": 0.75, + "grad_norm": 0.39812558548430166, + "learning_rate": 3.1181273387447564e-06, + "loss": 0.3002, "step": 16312 }, { - "epoch": 0.94, - "grad_norm": 0.411254505830708, - "learning_rate": 2.0576533182419477e-07, - "loss": 0.2811, + "epoch": 0.75, + "grad_norm": 0.2810504180759767, + "learning_rate": 3.117047880271489e-06, + "loss": 0.144, "step": 16313 }, { - "epoch": 0.94, - "grad_norm": 0.3902520439694165, - "learning_rate": 2.053899395919956e-07, - "loss": 0.2648, + "epoch": 0.75, + "grad_norm": 0.5882950289785216, + "learning_rate": 3.115968574180149e-06, + "loss": 0.379, "step": 16314 }, { - "epoch": 0.94, - "grad_norm": 0.353453516685148, - "learning_rate": 2.0501488654672276e-07, - "loss": 0.2453, + "epoch": 0.75, + "grad_norm": 0.3033553642302104, + "learning_rate": 3.114889420494629e-06, + "loss": 0.2445, "step": 16315 }, { - "epoch": 0.94, - "grad_norm": 1.1755006016717067, - "learning_rate": 2.0464017270136139e-07, - "loss": 0.4489, + "epoch": 0.75, + "grad_norm": 0.7844651054596269, + "learning_rate": 3.1138104192388196e-06, + "loss": 0.3285, "step": 16316 }, { - "epoch": 0.94, - "grad_norm": 0.36115634958306614, - "learning_rate": 2.0426579806889114e-07, - "loss": 0.1607, + "epoch": 0.75, + "grad_norm": 0.3879251671714471, + "learning_rate": 3.1127315704366144e-06, + "loss": 0.2318, "step": 16317 }, { - "epoch": 0.94, - "grad_norm": 0.2925206750954721, - "learning_rate": 2.0389176266227494e-07, - "loss": 0.3033, + "epoch": 0.75, + "grad_norm": 0.9117075604763524, + "learning_rate": 3.111652874111891e-06, + "loss": 0.4072, "step": 16318 }, { - "epoch": 0.94, - "grad_norm": 0.7559070874858043, - "learning_rate": 2.0351806649446582e-07, - "loss": 0.4248, + "epoch": 0.75, + "grad_norm": 0.43466547473984385, + "learning_rate": 3.1105743302885373e-06, + "loss": 0.3148, "step": 16319 }, { - "epoch": 0.94, - "grad_norm": 0.3267369295851818, - "learning_rate": 2.0314470957840337e-07, - "loss": 0.2155, + "epoch": 0.75, + "grad_norm": 0.30857637111789443, + "learning_rate": 3.1094959389904245e-06, + "loss": 0.2271, "step": 16320 }, { - "epoch": 0.94, - "grad_norm": 0.27187878348807465, - "learning_rate": 2.0277169192701951e-07, - "loss": 0.1862, + "epoch": 0.75, + "grad_norm": 0.2482075362776378, + "learning_rate": 3.1084177002414307e-06, + "loss": 0.1585, "step": 16321 }, { - "epoch": 0.94, - "grad_norm": 0.3450218801283784, - "learning_rate": 2.0239901355323166e-07, - "loss": 0.2994, + "epoch": 0.75, + "grad_norm": 0.6315452501079256, + "learning_rate": 3.10733961406543e-06, + "loss": 0.3411, "step": 16322 }, { - "epoch": 0.94, - "grad_norm": 0.33760499589787035, - "learning_rate": 2.0202667446994396e-07, - "loss": 0.1877, + "epoch": 0.75, + "grad_norm": 0.30804980204368937, + "learning_rate": 3.1062616804862834e-06, + "loss": 0.2262, "step": 16323 }, { - "epoch": 0.94, - "grad_norm": 1.3079162281268204, - "learning_rate": 2.016546746900505e-07, - "loss": 0.5858, + "epoch": 0.75, + "grad_norm": 0.5492817497873844, + "learning_rate": 3.1051838995278617e-06, + "loss": 0.3062, "step": 16324 }, { - "epoch": 0.94, - "grad_norm": 0.5508684131706696, - "learning_rate": 2.0128301422643437e-07, - "loss": 0.3595, + "epoch": 0.75, + "grad_norm": 0.8798894324188794, + "learning_rate": 3.104106271214018e-06, + "loss": 0.4908, "step": 16325 }, { - "epoch": 0.94, - "grad_norm": 0.22361769791366404, - "learning_rate": 2.0091169309196635e-07, - "loss": 0.219, + "epoch": 0.75, + "grad_norm": 0.26086137333049136, + "learning_rate": 3.10302879556862e-06, + "loss": 0.1879, "step": 16326 }, { - "epoch": 0.94, - "grad_norm": 0.27103442373090697, - "learning_rate": 2.0054071129950503e-07, - "loss": 0.1882, + "epoch": 0.75, + "grad_norm": 0.349581128577903, + "learning_rate": 3.1019514726155154e-06, + "loss": 0.2308, "step": 16327 }, { - "epoch": 0.94, - "grad_norm": 1.4862555952033187, - "learning_rate": 2.0017006886189793e-07, - "loss": 0.5859, + "epoch": 0.75, + "grad_norm": 0.4333805052954599, + "learning_rate": 3.1008743023785593e-06, + "loss": 0.2838, "step": 16328 }, { - "epoch": 0.94, - "grad_norm": 0.43694139275442706, - "learning_rate": 1.9979976579197924e-07, - "loss": 0.2516, + "epoch": 0.75, + "grad_norm": 0.4343381791551161, + "learning_rate": 3.0997972848815926e-06, + "loss": 0.3014, "step": 16329 }, { - "epoch": 0.94, - "grad_norm": 0.2777763051503804, - "learning_rate": 1.9942980210257313e-07, - "loss": 0.2499, + "epoch": 0.75, + "grad_norm": 0.8405894053742556, + "learning_rate": 3.0987204201484646e-06, + "loss": 0.281, "step": 16330 }, { - "epoch": 0.94, - "grad_norm": 0.5401113539028809, - "learning_rate": 1.9906017780649267e-07, - "loss": 0.3562, + "epoch": 0.75, + "grad_norm": 0.32025642665563875, + "learning_rate": 3.0976437082030185e-06, + "loss": 0.2605, "step": 16331 }, { - "epoch": 0.94, - "grad_norm": 0.5545585769788984, - "learning_rate": 1.9869089291653544e-07, - "loss": 0.3261, + "epoch": 0.75, + "grad_norm": 0.45844415955387186, + "learning_rate": 3.096567149069084e-06, + "loss": 0.3327, "step": 16332 }, { - "epoch": 0.94, - "grad_norm": 0.24578812393967553, - "learning_rate": 1.9832194744549117e-07, - "loss": 0.1533, + "epoch": 0.75, + "grad_norm": 0.5211204326550954, + "learning_rate": 3.0954907427705026e-06, + "loss": 0.2092, "step": 16333 }, { - "epoch": 0.94, - "grad_norm": 0.3471473882109775, - "learning_rate": 1.9795334140613741e-07, - "loss": 0.2771, + "epoch": 0.75, + "grad_norm": 0.4250827514266838, + "learning_rate": 3.094414489331099e-06, + "loss": 0.243, "step": 16334 }, { - "epoch": 0.94, - "grad_norm": 0.7065238619042722, - "learning_rate": 1.9758507481123734e-07, - "loss": 0.3106, + "epoch": 0.75, + "grad_norm": 0.3968554017943291, + "learning_rate": 3.0933383887747014e-06, + "loss": 0.2561, "step": 16335 }, { - "epoch": 0.94, - "grad_norm": 0.37428051274745233, - "learning_rate": 1.9721714767354516e-07, - "loss": 0.2519, + "epoch": 0.75, + "grad_norm": 0.3566525686649253, + "learning_rate": 3.0922624411251403e-06, + "loss": 0.2358, "step": 16336 }, { - "epoch": 0.94, - "grad_norm": 0.8941617265103616, - "learning_rate": 1.968495600058018e-07, - "loss": 0.5307, + "epoch": 0.75, + "grad_norm": 0.8786999480380613, + "learning_rate": 3.0911866464062266e-06, + "loss": 0.4897, "step": 16337 }, { - "epoch": 0.94, - "grad_norm": 0.31408047918318077, - "learning_rate": 1.9648231182073484e-07, - "loss": 0.2839, + "epoch": 0.75, + "grad_norm": 0.3869213178669692, + "learning_rate": 3.0901110046417816e-06, + "loss": 0.3031, "step": 16338 }, { - "epoch": 0.94, - "grad_norm": 0.21954413848341273, - "learning_rate": 1.9611540313106526e-07, - "loss": 0.1522, + "epoch": 0.75, + "grad_norm": 0.33507264413712967, + "learning_rate": 3.0890355158556195e-06, + "loss": 0.2915, "step": 16339 }, { - "epoch": 0.94, - "grad_norm": 1.4172099353996386, - "learning_rate": 1.957488339494973e-07, - "loss": 0.4308, + "epoch": 0.75, + "grad_norm": 0.431369114924861, + "learning_rate": 3.087960180071553e-06, + "loss": 0.1726, "step": 16340 }, { - "epoch": 0.94, - "grad_norm": 0.4576229631078692, - "learning_rate": 1.9538260428872636e-07, - "loss": 0.2894, + "epoch": 0.75, + "grad_norm": 0.29841795431933993, + "learning_rate": 3.0868849973133875e-06, + "loss": 0.2141, "step": 16341 }, { - "epoch": 0.94, - "grad_norm": 0.29422998452537247, - "learning_rate": 1.9501671416143342e-07, - "loss": 0.2752, + "epoch": 0.75, + "grad_norm": 1.579432844699005, + "learning_rate": 3.085809967604917e-06, + "loss": 0.6987, "step": 16342 }, { - "epoch": 0.94, - "grad_norm": 0.7309287759530217, - "learning_rate": 1.9465116358029057e-07, - "loss": 0.3446, + "epoch": 0.75, + "grad_norm": 0.3538186319487211, + "learning_rate": 3.084735090969958e-06, + "loss": 0.232, "step": 16343 }, { - "epoch": 0.94, - "grad_norm": 0.3900915110844801, - "learning_rate": 1.9428595255795657e-07, - "loss": 0.2888, + "epoch": 0.75, + "grad_norm": 0.3730761161190041, + "learning_rate": 3.083660367432294e-06, + "loss": 0.2836, "step": 16344 }, { - "epoch": 0.94, - "grad_norm": 0.37353761465583235, - "learning_rate": 1.9392108110707686e-07, - "loss": 0.2302, + "epoch": 0.75, + "grad_norm": 0.7996858324781417, + "learning_rate": 3.082585797015728e-06, + "loss": 0.4347, "step": 16345 }, { - "epoch": 0.94, - "grad_norm": 0.3062970278982408, - "learning_rate": 1.935565492402891e-07, - "loss": 0.2056, + "epoch": 0.75, + "grad_norm": 0.3250360845854394, + "learning_rate": 3.081511379744042e-06, + "loss": 0.1491, "step": 16346 }, { - "epoch": 0.94, - "grad_norm": 0.5981284968410989, - "learning_rate": 1.9319235697021766e-07, - "loss": 0.3264, + "epoch": 0.75, + "grad_norm": 0.4167383314821497, + "learning_rate": 3.080437115641025e-06, + "loss": 0.2961, "step": 16347 }, { - "epoch": 0.94, - "grad_norm": 0.35572231889161077, - "learning_rate": 1.9282850430947242e-07, - "loss": 0.3058, + "epoch": 0.75, + "grad_norm": 0.39644347428887583, + "learning_rate": 3.0793630047304657e-06, + "loss": 0.2951, "step": 16348 }, { - "epoch": 0.94, - "grad_norm": 0.3250188011219933, - "learning_rate": 1.9246499127065333e-07, - "loss": 0.2664, + "epoch": 0.75, + "grad_norm": 0.5889262046595498, + "learning_rate": 3.078289047036135e-06, + "loss": 0.1752, "step": 16349 }, { - "epoch": 0.94, - "grad_norm": 0.5737774291728073, - "learning_rate": 1.9210181786635028e-07, - "loss": 0.2784, + "epoch": 0.75, + "grad_norm": 0.43751791325336575, + "learning_rate": 3.0772152425818167e-06, + "loss": 0.3049, "step": 16350 }, { - "epoch": 0.94, - "grad_norm": 0.4012088600693107, - "learning_rate": 1.9173898410913995e-07, - "loss": 0.2982, + "epoch": 0.75, + "grad_norm": 0.3699520337744608, + "learning_rate": 3.0761415913912783e-06, + "loss": 0.3203, "step": 16351 }, { - "epoch": 0.94, - "grad_norm": 0.29848629529359066, - "learning_rate": 1.9137649001158665e-07, - "loss": 0.1829, + "epoch": 0.75, + "grad_norm": 0.9671519000344597, + "learning_rate": 3.0750680934882914e-06, + "loss": 0.4952, "step": 16352 }, { - "epoch": 0.94, - "grad_norm": 0.48598014526128647, - "learning_rate": 1.9101433558624483e-07, - "loss": 0.2018, + "epoch": 0.75, + "grad_norm": 0.2717899328697984, + "learning_rate": 3.073994748896626e-06, + "loss": 0.1617, "step": 16353 }, { - "epoch": 0.94, - "grad_norm": 0.26365262037215137, - "learning_rate": 1.9065252084565222e-07, - "loss": 0.2652, + "epoch": 0.75, + "grad_norm": 0.5955640628442219, + "learning_rate": 3.0729215576400384e-06, + "loss": 0.2598, "step": 16354 }, { - "epoch": 0.94, - "grad_norm": 1.2940730599519867, - "learning_rate": 1.9029104580234325e-07, - "loss": 0.7013, + "epoch": 0.75, + "grad_norm": 0.353707756894835, + "learning_rate": 3.071848519742291e-06, + "loss": 0.314, "step": 16355 }, { - "epoch": 0.94, - "grad_norm": 0.6116318818103755, - "learning_rate": 1.8992991046883236e-07, - "loss": 0.1767, + "epoch": 0.75, + "grad_norm": 0.3420955633928122, + "learning_rate": 3.0707756352271388e-06, + "loss": 0.2078, "step": 16356 }, { - "epoch": 0.94, - "grad_norm": 0.23131950704158802, - "learning_rate": 1.895691148576273e-07, - "loss": 0.1767, + "epoch": 0.75, + "grad_norm": 0.6760074103336285, + "learning_rate": 3.06970290411834e-06, + "loss": 0.4008, "step": 16357 }, { - "epoch": 0.94, - "grad_norm": 0.3293918268569444, - "learning_rate": 1.8920865898122143e-07, - "loss": 0.2735, + "epoch": 0.75, + "grad_norm": 0.4878105804687725, + "learning_rate": 3.0686303264396353e-06, + "loss": 0.2284, "step": 16358 }, { - "epoch": 0.94, - "grad_norm": 0.4511372616577314, - "learning_rate": 1.888485428520992e-07, - "loss": 0.2337, + "epoch": 0.75, + "grad_norm": 0.23483456169127176, + "learning_rate": 3.0675579022147763e-06, + "loss": 0.2007, "step": 16359 }, { - "epoch": 0.94, - "grad_norm": 0.3753456647512689, - "learning_rate": 1.884887664827284e-07, - "loss": 0.2896, + "epoch": 0.75, + "grad_norm": 0.40365651413994263, + "learning_rate": 3.0664856314675053e-06, + "loss": 0.1797, "step": 16360 }, { - "epoch": 0.94, - "grad_norm": 0.44961231456689443, - "learning_rate": 1.881293298855713e-07, - "loss": 0.3533, + "epoch": 0.75, + "grad_norm": 0.9281598055528028, + "learning_rate": 3.0654135142215567e-06, + "loss": 0.4307, "step": 16361 }, { - "epoch": 0.94, - "grad_norm": 0.28662578208318146, - "learning_rate": 1.877702330730724e-07, - "loss": 0.1942, + "epoch": 0.75, + "grad_norm": 0.33718025458948453, + "learning_rate": 3.0643415505006733e-06, + "loss": 0.2115, "step": 16362 }, { - "epoch": 0.94, - "grad_norm": 0.6551363416683533, - "learning_rate": 1.874114760576684e-07, - "loss": 0.3072, + "epoch": 0.75, + "grad_norm": 0.3537468097262199, + "learning_rate": 3.063269740328579e-06, + "loss": 0.2999, "step": 16363 }, { - "epoch": 0.94, - "grad_norm": 0.32419575634159736, - "learning_rate": 1.870530588517827e-07, - "loss": 0.1952, + "epoch": 0.75, + "grad_norm": 1.1218175021389905, + "learning_rate": 3.062198083729008e-06, + "loss": 0.5458, "step": 16364 }, { - "epoch": 0.94, - "grad_norm": 0.3382957834583971, - "learning_rate": 1.8669498146782871e-07, - "loss": 0.2804, + "epoch": 0.75, + "grad_norm": 0.35184066118736007, + "learning_rate": 3.0611265807256875e-06, + "loss": 0.2518, "step": 16365 }, { - "epoch": 0.94, - "grad_norm": 0.27619581453105596, - "learning_rate": 1.863372439182054e-07, - "loss": 0.2148, + "epoch": 0.75, + "grad_norm": 0.3073132588673357, + "learning_rate": 3.060055231342334e-06, + "loss": 0.0859, "step": 16366 }, { - "epoch": 0.94, - "grad_norm": 1.2113862017013661, - "learning_rate": 1.8597984621530063e-07, - "loss": 0.8154, + "epoch": 0.75, + "grad_norm": 0.3473053146928505, + "learning_rate": 3.058984035602671e-06, + "loss": 0.3093, "step": 16367 }, { - "epoch": 0.94, - "grad_norm": 1.391102015454029, - "learning_rate": 1.8562278837149228e-07, - "loss": 0.4795, + "epoch": 0.75, + "grad_norm": 0.44133787331267116, + "learning_rate": 3.0579129935304065e-06, + "loss": 0.2881, "step": 16368 }, { - "epoch": 0.94, - "grad_norm": 0.3298554751561711, - "learning_rate": 1.852660703991438e-07, - "loss": 0.1875, + "epoch": 0.75, + "grad_norm": 0.49389009289683866, + "learning_rate": 3.0568421051492623e-06, + "loss": 0.2929, "step": 16369 }, { - "epoch": 0.94, - "grad_norm": 0.33732172554386936, - "learning_rate": 1.8490969231061085e-07, - "loss": 0.3013, + "epoch": 0.75, + "grad_norm": 0.5767997043061357, + "learning_rate": 3.055771370482944e-06, + "loss": 0.3548, "step": 16370 }, { - "epoch": 0.94, - "grad_norm": 0.41691582029462937, - "learning_rate": 1.8455365411823134e-07, - "loss": 0.2349, + "epoch": 0.75, + "grad_norm": 0.3456769543682887, + "learning_rate": 3.0547007895551496e-06, + "loss": 0.2657, "step": 16371 }, { - "epoch": 0.94, - "grad_norm": 0.23903592330484194, - "learning_rate": 1.8419795583433763e-07, - "loss": 0.1662, + "epoch": 0.75, + "grad_norm": 0.26100477799453764, + "learning_rate": 3.053630362389587e-06, + "loss": 0.1467, "step": 16372 }, { - "epoch": 0.94, - "grad_norm": 0.48747978730964747, - "learning_rate": 1.8384259747124766e-07, - "loss": 0.3685, + "epoch": 0.75, + "grad_norm": 0.7185061882388731, + "learning_rate": 3.052560089009953e-06, + "loss": 0.397, "step": 16373 }, { - "epoch": 0.94, - "grad_norm": 0.4383352697210021, - "learning_rate": 1.834875790412649e-07, - "loss": 0.3244, + "epoch": 0.75, + "grad_norm": 0.3431879589747441, + "learning_rate": 3.0514899694399445e-06, + "loss": 0.2718, "step": 16374 }, { - "epoch": 0.94, - "grad_norm": 0.34083610981257056, - "learning_rate": 1.831329005566851e-07, - "loss": 0.2185, + "epoch": 0.75, + "grad_norm": 0.3501777859924704, + "learning_rate": 3.0504200037032494e-06, + "loss": 0.2753, "step": 16375 }, { - "epoch": 0.94, - "grad_norm": 0.6216981447352833, - "learning_rate": 1.8277856202979282e-07, - "loss": 0.3541, + "epoch": 0.75, + "grad_norm": 1.7556192165480307, + "learning_rate": 3.049350191823557e-06, + "loss": 0.7727, "step": 16376 }, { - "epoch": 0.94, - "grad_norm": 0.2519742262206242, - "learning_rate": 1.8242456347285498e-07, - "loss": 0.2207, + "epoch": 0.75, + "grad_norm": 0.3662298702539251, + "learning_rate": 3.0482805338245545e-06, + "loss": 0.2545, "step": 16377 }, { - "epoch": 0.94, - "grad_norm": 0.27485795073992864, - "learning_rate": 1.8207090489813284e-07, - "loss": 0.2107, + "epoch": 0.75, + "grad_norm": 0.34018285075066207, + "learning_rate": 3.0472110297299183e-06, + "loss": 0.1657, "step": 16378 }, { - "epoch": 0.94, - "grad_norm": 0.9112149768074835, - "learning_rate": 1.8171758631787327e-07, - "loss": 0.5171, + "epoch": 0.75, + "grad_norm": 0.38452548875421755, + "learning_rate": 3.0461416795633316e-06, + "loss": 0.254, "step": 16379 }, { - "epoch": 0.94, - "grad_norm": 0.6028629951517451, - "learning_rate": 1.8136460774431097e-07, - "loss": 0.2255, + "epoch": 0.75, + "grad_norm": 0.31847985555688063, + "learning_rate": 3.0450724833484635e-06, + "loss": 0.2618, "step": 16380 }, { - "epoch": 0.94, - "grad_norm": 0.3830929246588872, - "learning_rate": 1.8101196918967056e-07, - "loss": 0.2811, + "epoch": 0.75, + "grad_norm": 1.1672268529401792, + "learning_rate": 3.044003441108987e-06, + "loss": 0.6476, "step": 16381 }, { - "epoch": 0.94, - "grad_norm": 0.3110335698157431, - "learning_rate": 1.806596706661634e-07, - "loss": 0.2559, + "epoch": 0.75, + "grad_norm": 0.35443302054874587, + "learning_rate": 3.0429345528685727e-06, + "loss": 0.2408, "step": 16382 }, { - "epoch": 0.94, - "grad_norm": 0.3532742177417911, - "learning_rate": 1.8030771218598863e-07, - "loss": 0.2153, + "epoch": 0.75, + "grad_norm": 0.36599661895380536, + "learning_rate": 3.0418658186508787e-06, + "loss": 0.2874, "step": 16383 }, { - "epoch": 0.94, - "grad_norm": 0.27118052800532905, - "learning_rate": 1.799560937613365e-07, - "loss": 0.2216, + "epoch": 0.75, + "grad_norm": 0.33048692150046277, + "learning_rate": 3.0407972384795736e-06, + "loss": 0.1546, "step": 16384 }, { - "epoch": 0.94, - "grad_norm": 0.30009922060906546, - "learning_rate": 1.7960481540438278e-07, - "loss": 0.259, + "epoch": 0.75, + "grad_norm": 0.6334949107599004, + "learning_rate": 3.039728812378303e-06, + "loss": 0.2956, "step": 16385 }, { - "epoch": 0.94, - "grad_norm": 1.2439511677777768, - "learning_rate": 1.7925387712729113e-07, - "loss": 0.3727, + "epoch": 0.75, + "grad_norm": 0.41657746064304524, + "learning_rate": 3.0386605403707347e-06, + "loss": 0.2757, "step": 16386 }, { - "epoch": 0.94, - "grad_norm": 0.3074681980467877, - "learning_rate": 1.7890327894221515e-07, - "loss": 0.2405, + "epoch": 0.75, + "grad_norm": 0.3143340567043007, + "learning_rate": 3.037592422480512e-06, + "loss": 0.2782, "step": 16387 }, { - "epoch": 0.94, - "grad_norm": 0.6218933087042393, - "learning_rate": 1.7855302086129734e-07, - "loss": 0.2886, + "epoch": 0.75, + "grad_norm": 1.2500536836247713, + "learning_rate": 3.0365244587312804e-06, + "loss": 0.2475, "step": 16388 }, { - "epoch": 0.94, - "grad_norm": 0.25243688522148056, - "learning_rate": 1.7820310289666577e-07, - "loss": 0.2225, + "epoch": 0.75, + "grad_norm": 0.4510531034035246, + "learning_rate": 3.035456649146685e-06, + "loss": 0.2931, "step": 16389 }, { - "epoch": 0.94, - "grad_norm": 0.24248323345775227, - "learning_rate": 1.7785352506043852e-07, - "loss": 0.2009, + "epoch": 0.75, + "grad_norm": 0.38730443085608907, + "learning_rate": 3.0343889937503677e-06, + "loss": 0.2655, "step": 16390 }, { - "epoch": 0.94, - "grad_norm": 1.140361489090267, - "learning_rate": 1.7750428736472146e-07, - "loss": 0.7097, + "epoch": 0.75, + "grad_norm": 0.3695970947522675, + "learning_rate": 3.033321492565967e-06, + "loss": 0.3007, "step": 16391 }, { - "epoch": 0.94, - "grad_norm": 0.6494164060996802, - "learning_rate": 1.7715538982160717e-07, - "loss": 0.2132, + "epoch": 0.75, + "grad_norm": 0.27058404527592655, + "learning_rate": 3.0322541456171115e-06, + "loss": 0.1565, "step": 16392 }, { - "epoch": 0.94, - "grad_norm": 0.29027986290742447, - "learning_rate": 1.7680683244318154e-07, - "loss": 0.274, + "epoch": 0.75, + "grad_norm": 1.2230822445057148, + "learning_rate": 3.0311869529274363e-06, + "loss": 0.8803, "step": 16393 }, { - "epoch": 0.94, - "grad_norm": 0.4739590651850567, - "learning_rate": 1.7645861524151152e-07, - "loss": 0.3406, + "epoch": 0.75, + "grad_norm": 1.4433662321864855, + "learning_rate": 3.030119914520562e-06, + "loss": 0.4755, "step": 16394 }, { - "epoch": 0.94, - "grad_norm": 0.47333802545971043, - "learning_rate": 1.7611073822865753e-07, - "loss": 0.1103, + "epoch": 0.75, + "grad_norm": 0.242304341416268, + "learning_rate": 3.029053030420115e-06, + "loss": 0.2087, "step": 16395 }, { - "epoch": 0.94, - "grad_norm": 0.34722614276277375, - "learning_rate": 1.7576320141666548e-07, - "loss": 0.297, + "epoch": 0.75, + "grad_norm": 0.9194181153506501, + "learning_rate": 3.027986300649719e-06, + "loss": 0.4386, "step": 16396 }, { - "epoch": 0.94, - "grad_norm": 0.4741094317552018, - "learning_rate": 1.7541600481757238e-07, - "loss": 0.3714, + "epoch": 0.75, + "grad_norm": 0.39885791308839486, + "learning_rate": 3.026919725232983e-06, + "loss": 0.2355, "step": 16397 }, { - "epoch": 0.94, - "grad_norm": 0.30253332304639474, - "learning_rate": 1.7506914844340084e-07, - "loss": 0.2009, + "epoch": 0.75, + "grad_norm": 0.3450698031315827, + "learning_rate": 3.0258533041935234e-06, + "loss": 0.2135, "step": 16398 }, { - "epoch": 0.94, - "grad_norm": 0.3499583597663478, - "learning_rate": 1.7472263230616126e-07, - "loss": 0.2693, + "epoch": 0.75, + "grad_norm": 0.40249996903549046, + "learning_rate": 3.0247870375549537e-06, + "loss": 0.3111, "step": 16399 }, { - "epoch": 0.94, - "grad_norm": 0.9313377074422858, - "learning_rate": 1.7437645641785404e-07, - "loss": 0.4164, + "epoch": 0.75, + "grad_norm": 1.2188928579883807, + "learning_rate": 3.0237209253408727e-06, + "loss": 0.5941, "step": 16400 }, { - "epoch": 0.94, - "grad_norm": 0.3084187512136342, - "learning_rate": 1.7403062079046851e-07, - "loss": 0.2116, + "epoch": 0.75, + "grad_norm": 0.31033871957649317, + "learning_rate": 3.0226549675748894e-06, + "loss": 0.1895, "step": 16401 }, { - "epoch": 0.94, - "grad_norm": 0.3905940549961325, - "learning_rate": 1.736851254359795e-07, - "loss": 0.2868, + "epoch": 0.75, + "grad_norm": 0.6872494138288879, + "learning_rate": 3.0215891642805937e-06, + "loss": 0.3519, "step": 16402 }, { - "epoch": 0.94, - "grad_norm": 0.45116394117225134, - "learning_rate": 1.7333997036635296e-07, - "loss": 0.3161, + "epoch": 0.75, + "grad_norm": 0.24000829920705616, + "learning_rate": 3.020523515481595e-06, + "loss": 0.2045, "step": 16403 }, { - "epoch": 0.94, - "grad_norm": 0.587337210292463, - "learning_rate": 1.7299515559354052e-07, - "loss": 0.3472, + "epoch": 0.75, + "grad_norm": 0.5845462286113416, + "learning_rate": 3.019458021201476e-06, + "loss": 0.3199, "step": 16404 }, { - "epoch": 0.94, - "grad_norm": 0.24303637781952572, - "learning_rate": 1.7265068112948257e-07, - "loss": 0.1978, + "epoch": 0.75, + "grad_norm": 0.43585553896310303, + "learning_rate": 3.018392681463831e-06, + "loss": 0.2541, "step": 16405 }, { - "epoch": 0.94, - "grad_norm": 0.3114290454839184, - "learning_rate": 1.7230654698610848e-07, - "loss": 0.2325, + "epoch": 0.75, + "grad_norm": 0.3643621740706527, + "learning_rate": 3.0173274962922396e-06, + "loss": 0.3064, "step": 16406 }, { - "epoch": 0.94, - "grad_norm": 1.4056712180482611, - "learning_rate": 1.7196275317533761e-07, - "loss": 0.4493, + "epoch": 0.75, + "grad_norm": 0.6920576889412565, + "learning_rate": 3.016262465710288e-06, + "loss": 0.302, "step": 16407 }, { - "epoch": 0.94, - "grad_norm": 0.28533591224264054, - "learning_rate": 1.7161929970907266e-07, - "loss": 0.2084, + "epoch": 0.75, + "grad_norm": 0.4139706464033087, + "learning_rate": 3.0151975897415574e-06, + "loss": 0.26, "step": 16408 }, { - "epoch": 0.94, - "grad_norm": 0.33232194989980035, - "learning_rate": 1.7127618659920963e-07, - "loss": 0.2976, + "epoch": 0.75, + "grad_norm": 0.6095672401529331, + "learning_rate": 3.014132868409617e-06, + "loss": 0.2867, "step": 16409 }, { - "epoch": 0.94, - "grad_norm": 0.5558040330434648, - "learning_rate": 1.7093341385762907e-07, - "loss": 0.3583, + "epoch": 0.75, + "grad_norm": 0.414872772309864, + "learning_rate": 3.0130683017380445e-06, + "loss": 0.304, "step": 16410 }, { - "epoch": 0.94, - "grad_norm": 0.1846507440261721, - "learning_rate": 1.7059098149620257e-07, - "loss": 0.1403, + "epoch": 0.75, + "grad_norm": 0.24624566130833486, + "learning_rate": 3.012003889750403e-06, + "loss": 0.1936, "step": 16411 }, { - "epoch": 0.94, - "grad_norm": 1.3482449752952768, - "learning_rate": 1.702488895267862e-07, - "loss": 0.5575, + "epoch": 0.75, + "grad_norm": 1.5713270389594192, + "learning_rate": 3.01093963247026e-06, + "loss": 0.5456, "step": 16412 }, { - "epoch": 0.94, - "grad_norm": 0.3510275776236248, - "learning_rate": 1.6990713796122938e-07, - "loss": 0.2993, + "epoch": 0.75, + "grad_norm": 0.5112708818750812, + "learning_rate": 3.009875529921181e-06, + "loss": 0.2923, "step": 16413 }, { - "epoch": 0.94, - "grad_norm": 0.2897476651472436, - "learning_rate": 1.6956572681136485e-07, - "loss": 0.1903, + "epoch": 0.75, + "grad_norm": 0.5561618773329151, + "learning_rate": 3.008811582126717e-06, + "loss": 0.2533, "step": 16414 }, { - "epoch": 0.94, - "grad_norm": 0.6730125448322015, - "learning_rate": 1.692246560890176e-07, - "loss": 0.4091, + "epoch": 0.75, + "grad_norm": 0.39359320774152307, + "learning_rate": 3.007747789110427e-06, + "loss": 0.2933, "step": 16415 }, { - "epoch": 0.94, - "grad_norm": 0.43855648011694554, - "learning_rate": 1.688839258059971e-07, - "loss": 0.3141, + "epoch": 0.75, + "grad_norm": 0.4250944807555506, + "learning_rate": 3.0066841508958642e-06, + "loss": 0.295, "step": 16416 }, { - "epoch": 0.94, - "grad_norm": 0.4039676932669491, - "learning_rate": 1.6854353597410278e-07, - "loss": 0.2686, + "epoch": 0.75, + "grad_norm": 0.5870015501540505, + "learning_rate": 3.005620667506571e-06, + "loss": 0.3475, "step": 16417 }, { - "epoch": 0.94, - "grad_norm": 0.24169214358315877, - "learning_rate": 1.68203486605123e-07, - "loss": 0.1656, + "epoch": 0.75, + "grad_norm": 0.2471418725997406, + "learning_rate": 3.0045573389660987e-06, + "loss": 0.1836, "step": 16418 }, { - "epoch": 0.94, - "grad_norm": 1.4027029186189741, - "learning_rate": 1.6786377771083496e-07, - "loss": 0.5126, + "epoch": 0.75, + "grad_norm": 0.5268144407869432, + "learning_rate": 3.0034941652979786e-06, + "loss": 0.3022, "step": 16419 }, { - "epoch": 0.94, - "grad_norm": 0.4016732571923031, - "learning_rate": 1.675244093030015e-07, - "loss": 0.272, + "epoch": 0.75, + "grad_norm": 0.46476752311174324, + "learning_rate": 3.0024311465257592e-06, + "loss": 0.2846, "step": 16420 }, { - "epoch": 0.94, - "grad_norm": 0.31401939566076453, - "learning_rate": 1.6718538139337325e-07, - "loss": 0.251, + "epoch": 0.75, + "grad_norm": 1.5023710955760372, + "learning_rate": 3.0013682826729686e-06, + "loss": 0.3623, "step": 16421 }, { - "epoch": 0.94, - "grad_norm": 0.7274054388947614, - "learning_rate": 1.6684669399369412e-07, - "loss": 0.4075, + "epoch": 0.75, + "grad_norm": 0.647681170257252, + "learning_rate": 3.0003055737631404e-06, + "loss": 0.3421, "step": 16422 }, { - "epoch": 0.94, - "grad_norm": 0.3134544640656414, - "learning_rate": 1.6650834711569031e-07, - "loss": 0.2514, + "epoch": 0.75, + "grad_norm": 0.27390564203419276, + "learning_rate": 2.9992430198197973e-06, + "loss": 0.2626, "step": 16423 }, { - "epoch": 0.94, - "grad_norm": 0.29983200999934284, - "learning_rate": 1.661703407710802e-07, - "loss": 0.0921, + "epoch": 0.75, + "grad_norm": 0.3821929577096597, + "learning_rate": 2.9981806208664676e-06, + "loss": 0.1857, "step": 16424 }, { - "epoch": 0.94, - "grad_norm": 0.3718216113469689, - "learning_rate": 1.6583267497156663e-07, - "loss": 0.3093, + "epoch": 0.75, + "grad_norm": 0.6654849922132344, + "learning_rate": 2.997118376926672e-06, + "loss": 0.3882, "step": 16425 }, { - "epoch": 0.94, - "grad_norm": 0.3162630195231657, - "learning_rate": 1.6549534972884584e-07, - "loss": 0.2512, + "epoch": 0.75, + "grad_norm": 0.3319571885779992, + "learning_rate": 2.9960562880239243e-06, + "loss": 0.276, "step": 16426 }, { - "epoch": 0.94, - "grad_norm": 0.5407154342108497, - "learning_rate": 1.6515836505459848e-07, - "loss": 0.3433, + "epoch": 0.75, + "grad_norm": 0.5830536577400622, + "learning_rate": 2.994994354181743e-06, + "loss": 0.2265, "step": 16427 }, { - "epoch": 0.94, - "grad_norm": 0.4685866469053301, - "learning_rate": 1.648217209604941e-07, - "loss": 0.2603, + "epoch": 0.75, + "grad_norm": 0.6431029928089156, + "learning_rate": 2.9939325754236316e-06, + "loss": 0.4137, "step": 16428 }, { - "epoch": 0.94, - "grad_norm": 0.2709135275382767, - "learning_rate": 1.6448541745819113e-07, - "loss": 0.2503, + "epoch": 0.75, + "grad_norm": 0.3096305372605152, + "learning_rate": 2.9928709517731005e-06, + "loss": 0.2473, "step": 16429 }, { - "epoch": 0.94, - "grad_norm": 0.3388604124213988, - "learning_rate": 1.6414945455933363e-07, - "loss": 0.1982, + "epoch": 0.75, + "grad_norm": 0.4741315376233661, + "learning_rate": 2.9918094832536547e-06, + "loss": 0.3367, "step": 16430 }, { - "epoch": 0.94, - "grad_norm": 0.9826337080521574, - "learning_rate": 1.638138322755578e-07, - "loss": 0.2629, + "epoch": 0.75, + "grad_norm": 0.27866133220471845, + "learning_rate": 2.990748169888791e-06, + "loss": 0.1783, "step": 16431 }, { - "epoch": 0.94, - "grad_norm": 0.300363016379023, - "learning_rate": 1.634785506184866e-07, - "loss": 0.2463, + "epoch": 0.75, + "grad_norm": 0.449529093149855, + "learning_rate": 2.9896870117020073e-06, + "loss": 0.2834, "step": 16432 }, { - "epoch": 0.94, - "grad_norm": 0.34111656134447776, - "learning_rate": 1.6314360959973075e-07, - "loss": 0.2959, + "epoch": 0.75, + "grad_norm": 1.1970140723890708, + "learning_rate": 2.9886260087167952e-06, + "loss": 0.5961, "step": 16433 }, { - "epoch": 0.94, - "grad_norm": 1.0121583179292197, - "learning_rate": 1.628090092308876e-07, - "loss": 0.3706, + "epoch": 0.75, + "grad_norm": 0.3030313867360394, + "learning_rate": 2.9875651609566503e-06, + "loss": 0.2209, "step": 16434 }, { - "epoch": 0.94, - "grad_norm": 0.39314249015309166, - "learning_rate": 1.6247474952354568e-07, - "loss": 0.2884, + "epoch": 0.76, + "grad_norm": 0.37263404072457224, + "learning_rate": 2.986504468445053e-06, + "loss": 0.2699, "step": 16435 }, { - "epoch": 0.94, - "grad_norm": 0.2459622684578627, - "learning_rate": 1.621408304892802e-07, - "loss": 0.19, + "epoch": 0.76, + "grad_norm": 0.8098807160075733, + "learning_rate": 2.9854439312054805e-06, + "loss": 0.3607, "step": 16436 }, { - "epoch": 0.94, - "grad_norm": 0.3447675748059979, - "learning_rate": 1.618072521396552e-07, - "loss": 0.247, + "epoch": 0.76, + "grad_norm": 0.2030841114563129, + "learning_rate": 2.984383549261426e-06, + "loss": 0.0711, "step": 16437 }, { - "epoch": 0.94, - "grad_norm": 0.41316622660519126, - "learning_rate": 1.6147401448622145e-07, - "loss": 0.2924, + "epoch": 0.76, + "grad_norm": 0.3329399025573987, + "learning_rate": 2.9833233226363547e-06, + "loss": 0.2917, "step": 16438 }, { - "epoch": 0.94, - "grad_norm": 0.44203133836751707, - "learning_rate": 1.6114111754051976e-07, - "loss": 0.3195, + "epoch": 0.76, + "grad_norm": 0.5522802318404808, + "learning_rate": 2.982263251353745e-06, + "loss": 0.3182, "step": 16439 }, { - "epoch": 0.94, - "grad_norm": 0.48436498902588176, - "learning_rate": 1.6080856131407862e-07, - "loss": 0.3431, + "epoch": 0.76, + "grad_norm": 0.5498526069053413, + "learning_rate": 2.9812033354370595e-06, + "loss": 0.243, "step": 16440 }, { - "epoch": 0.94, - "grad_norm": 0.28460937853923546, - "learning_rate": 1.6047634581841331e-07, - "loss": 0.184, + "epoch": 0.76, + "grad_norm": 0.41540113748382146, + "learning_rate": 2.9801435749097684e-06, + "loss": 0.2694, "step": 16441 }, { - "epoch": 0.94, - "grad_norm": 0.2880830569609324, - "learning_rate": 1.6014447106502907e-07, - "loss": 0.1768, + "epoch": 0.76, + "grad_norm": 0.3680730477452568, + "learning_rate": 2.9790839697953357e-06, + "loss": 0.2794, "step": 16442 }, { - "epoch": 0.94, - "grad_norm": 0.8736808553813734, - "learning_rate": 1.5981293706541888e-07, - "loss": 0.3771, + "epoch": 0.76, + "grad_norm": 0.40782841194675556, + "learning_rate": 2.978024520117213e-06, + "loss": 0.1723, "step": 16443 }, { - "epoch": 0.94, - "grad_norm": 0.31317366638812105, - "learning_rate": 1.5948174383106362e-07, - "loss": 0.2167, + "epoch": 0.76, + "grad_norm": 0.25419564615532436, + "learning_rate": 2.9769652258988633e-06, + "loss": 0.1576, "step": 16444 }, { - "epoch": 0.94, - "grad_norm": 0.3277336661706098, - "learning_rate": 1.5915089137343186e-07, - "loss": 0.3097, + "epoch": 0.76, + "grad_norm": 1.4132042359971555, + "learning_rate": 2.9759060871637314e-06, + "loss": 0.6667, "step": 16445 }, { - "epoch": 0.94, - "grad_norm": 1.1496737135542834, - "learning_rate": 1.5882037970398111e-07, - "loss": 0.7183, + "epoch": 0.76, + "grad_norm": 0.33516870011082367, + "learning_rate": 2.974847103935269e-06, + "loss": 0.2892, "step": 16446 }, { - "epoch": 0.94, - "grad_norm": 0.29250473166722574, - "learning_rate": 1.584902088341589e-07, - "loss": 0.1838, + "epoch": 0.76, + "grad_norm": 0.4907978212875139, + "learning_rate": 2.973788276236924e-06, + "loss": 0.2299, "step": 16447 }, { - "epoch": 0.95, - "grad_norm": 0.2878214529095352, - "learning_rate": 1.5816037877539715e-07, - "loss": 0.1679, + "epoch": 0.76, + "grad_norm": 0.8105001080107226, + "learning_rate": 2.9727296040921315e-06, + "loss": 0.4354, "step": 16448 }, { - "epoch": 0.95, - "grad_norm": 0.3309627514911893, - "learning_rate": 1.5783088953911784e-07, - "loss": 0.3004, + "epoch": 0.76, + "grad_norm": 0.3011798492925412, + "learning_rate": 2.9716710875243326e-06, + "loss": 0.1806, "step": 16449 }, { - "epoch": 0.95, - "grad_norm": 0.33326136046621196, - "learning_rate": 1.5750174113673077e-07, - "loss": 0.2167, + "epoch": 0.76, + "grad_norm": 0.3332671340503776, + "learning_rate": 2.9706127265569616e-06, + "loss": 0.2226, "step": 16450 }, { - "epoch": 0.95, - "grad_norm": 1.179074426417154, - "learning_rate": 1.5717293357963682e-07, - "loss": 0.5454, + "epoch": 0.76, + "grad_norm": 0.5621658552968646, + "learning_rate": 2.9695545212134523e-06, + "loss": 0.3606, "step": 16451 }, { - "epoch": 0.95, - "grad_norm": 0.4564239880365591, - "learning_rate": 1.5684446687922017e-07, - "loss": 0.3404, + "epoch": 0.76, + "grad_norm": 0.6157271902144869, + "learning_rate": 2.9684964715172306e-06, + "loss": 0.369, "step": 16452 }, { - "epoch": 0.95, - "grad_norm": 0.30674601902753773, - "learning_rate": 1.565163410468562e-07, - "loss": 0.2332, + "epoch": 0.76, + "grad_norm": 0.39379598675732985, + "learning_rate": 2.967438577491717e-06, + "loss": 0.2702, "step": 16453 }, { - "epoch": 0.95, - "grad_norm": 0.2190549081107368, - "learning_rate": 1.5618855609390803e-07, - "loss": 0.1242, + "epoch": 0.76, + "grad_norm": 0.39420820610033264, + "learning_rate": 2.9663808391603354e-06, + "loss": 0.2707, "step": 16454 }, { - "epoch": 0.95, - "grad_norm": 0.5729376141354351, - "learning_rate": 1.558611120317266e-07, - "loss": 0.3633, + "epoch": 0.76, + "grad_norm": 0.37204250415070306, + "learning_rate": 2.9653232565465017e-06, + "loss": 0.1773, "step": 16455 }, { - "epoch": 0.95, - "grad_norm": 0.5545582014596254, - "learning_rate": 1.5553400887165172e-07, - "loss": 0.2793, + "epoch": 0.76, + "grad_norm": 0.31408871748549827, + "learning_rate": 2.964265829673636e-06, + "loss": 0.2424, "step": 16456 }, { - "epoch": 0.95, - "grad_norm": 0.2458146065487911, - "learning_rate": 1.5520724662501207e-07, - "loss": 0.2315, + "epoch": 0.76, + "grad_norm": 0.9295425691535901, + "learning_rate": 2.9632085585651393e-06, + "loss": 0.3333, "step": 16457 }, { - "epoch": 0.95, - "grad_norm": 1.1168303612442845, - "learning_rate": 1.5488082530312087e-07, - "loss": 0.694, + "epoch": 0.76, + "grad_norm": 0.3582046866134283, + "learning_rate": 2.962151443244423e-06, + "loss": 0.2959, "step": 16458 }, { - "epoch": 0.95, - "grad_norm": 0.6203300463305141, - "learning_rate": 1.545547449172835e-07, - "loss": 0.2942, + "epoch": 0.76, + "grad_norm": 0.36575814476445256, + "learning_rate": 2.961094483734894e-06, + "loss": 0.2582, "step": 16459 }, { - "epoch": 0.95, - "grad_norm": 0.24751408093210903, - "learning_rate": 1.5422900547879206e-07, - "loss": 0.199, + "epoch": 0.76, + "grad_norm": 0.8513982645084819, + "learning_rate": 2.960037680059946e-06, + "loss": 0.3196, "step": 16460 }, { - "epoch": 0.95, - "grad_norm": 0.4335318922233136, - "learning_rate": 1.5390360699892636e-07, - "loss": 0.3313, + "epoch": 0.76, + "grad_norm": 0.2742952386711195, + "learning_rate": 2.9589810322429813e-06, + "loss": 0.1585, "step": 16461 }, { - "epoch": 0.95, - "grad_norm": 0.3373096852746386, - "learning_rate": 1.5357854948895634e-07, - "loss": 0.2321, + "epoch": 0.76, + "grad_norm": 0.2982649759450033, + "learning_rate": 2.957924540307384e-06, + "loss": 0.2377, "step": 16462 }, { - "epoch": 0.95, - "grad_norm": 0.3630705794262418, - "learning_rate": 1.532538329601363e-07, - "loss": 0.2461, + "epoch": 0.76, + "grad_norm": 0.5084747286381631, + "learning_rate": 2.956868204276556e-06, + "loss": 0.2566, "step": 16463 }, { - "epoch": 0.95, - "grad_norm": 0.5126707395414409, - "learning_rate": 1.529294574237139e-07, - "loss": 0.3202, + "epoch": 0.76, + "grad_norm": 0.7405019058125024, + "learning_rate": 2.9558120241738786e-06, + "loss": 0.3499, "step": 16464 }, { - "epoch": 0.95, - "grad_norm": 0.2925570376583863, - "learning_rate": 1.5260542289092016e-07, - "loss": 0.2425, + "epoch": 0.76, + "grad_norm": 0.3927707471178668, + "learning_rate": 2.9547560000227303e-06, + "loss": 0.2992, "step": 16465 }, { - "epoch": 0.95, - "grad_norm": 0.5729303620205985, - "learning_rate": 1.5228172937297837e-07, - "loss": 0.3226, + "epoch": 0.76, + "grad_norm": 0.36843102828714186, + "learning_rate": 2.953700131846494e-06, + "loss": 0.2936, "step": 16466 }, { - "epoch": 0.95, - "grad_norm": 0.4405506344298435, - "learning_rate": 1.5195837688109506e-07, - "loss": 0.2103, + "epoch": 0.76, + "grad_norm": 0.429806417654301, + "learning_rate": 2.9526444196685455e-06, + "loss": 0.1195, "step": 16467 }, { - "epoch": 0.95, - "grad_norm": 0.23129493919039548, - "learning_rate": 1.5163536542647018e-07, - "loss": 0.2064, + "epoch": 0.76, + "grad_norm": 0.2848051806041437, + "learning_rate": 2.9515888635122603e-06, + "loss": 0.2213, "step": 16468 }, { - "epoch": 0.95, - "grad_norm": 0.34775353831476136, - "learning_rate": 1.5131269502029034e-07, - "loss": 0.3053, + "epoch": 0.76, + "grad_norm": 1.0797897133530532, + "learning_rate": 2.950533463401001e-06, + "loss": 0.4425, "step": 16469 }, { - "epoch": 0.95, - "grad_norm": 1.0191758063869858, - "learning_rate": 1.509903656737277e-07, - "loss": 0.5071, + "epoch": 0.76, + "grad_norm": 0.3876924458894122, + "learning_rate": 2.9494782193581397e-06, + "loss": 0.2457, "step": 16470 }, { - "epoch": 0.95, - "grad_norm": 0.6469909048210354, - "learning_rate": 1.506683773979445e-07, - "loss": 0.352, + "epoch": 0.76, + "grad_norm": 0.3773487854202467, + "learning_rate": 2.948423131407032e-06, + "loss": 0.2633, "step": 16471 }, { - "epoch": 0.95, - "grad_norm": 0.3790611624365381, - "learning_rate": 1.5034673020409173e-07, - "loss": 0.2984, + "epoch": 0.76, + "grad_norm": 1.1367868776431203, + "learning_rate": 2.947368199571039e-06, + "loss": 0.6018, "step": 16472 }, { - "epoch": 0.95, - "grad_norm": 0.2816475223008202, - "learning_rate": 1.5002542410330946e-07, - "loss": 0.2091, + "epoch": 0.76, + "grad_norm": 0.3362863604780619, + "learning_rate": 2.9463134238735215e-06, + "loss": 0.1684, "step": 16473 }, { - "epoch": 0.95, - "grad_norm": 0.33164768844534576, - "learning_rate": 1.4970445910672205e-07, - "loss": 0.1769, + "epoch": 0.76, + "grad_norm": 0.37933820128583157, + "learning_rate": 2.9452588043378218e-06, + "loss": 0.2652, "step": 16474 }, { - "epoch": 0.95, - "grad_norm": 0.400650506425723, - "learning_rate": 1.493838352254462e-07, - "loss": 0.3036, + "epoch": 0.76, + "grad_norm": 0.4211902282732409, + "learning_rate": 2.9442043409872933e-06, + "loss": 0.2578, "step": 16475 }, { - "epoch": 0.95, - "grad_norm": 0.32037790280791784, - "learning_rate": 1.4906355247058412e-07, - "loss": 0.2392, + "epoch": 0.76, + "grad_norm": 0.952417529851729, + "learning_rate": 2.9431500338452833e-06, + "loss": 0.3406, "step": 16476 }, { - "epoch": 0.95, - "grad_norm": 0.605568545808694, - "learning_rate": 1.487436108532292e-07, - "loss": 0.33, + "epoch": 0.76, + "grad_norm": 0.34427445046750477, + "learning_rate": 2.9420958829351263e-06, + "loss": 0.2736, "step": 16477 }, { - "epoch": 0.95, - "grad_norm": 0.3674500331935406, - "learning_rate": 1.4842401038445808e-07, - "loss": 0.2867, + "epoch": 0.76, + "grad_norm": 0.37420444517583357, + "learning_rate": 2.9410418882801682e-06, + "loss": 0.305, "step": 16478 }, { - "epoch": 0.95, - "grad_norm": 0.9639296926238876, - "learning_rate": 1.4810475107533973e-07, - "loss": 0.3768, + "epoch": 0.76, + "grad_norm": 1.6499133216650348, + "learning_rate": 2.9399880499037325e-06, + "loss": 0.6196, "step": 16479 }, { - "epoch": 0.95, - "grad_norm": 0.2325175694217112, - "learning_rate": 1.4778583293692972e-07, - "loss": 0.1823, + "epoch": 0.76, + "grad_norm": 0.22550749683892404, + "learning_rate": 2.9389343678291624e-06, + "loss": 0.1574, "step": 16480 }, { - "epoch": 0.95, - "grad_norm": 0.30507855821640345, - "learning_rate": 1.4746725598027367e-07, - "loss": 0.257, + "epoch": 0.76, + "grad_norm": 0.5380962950228091, + "learning_rate": 2.9378808420797812e-06, + "loss": 0.2673, "step": 16481 }, { - "epoch": 0.95, - "grad_norm": 1.3321349423875684, - "learning_rate": 1.4714902021640277e-07, - "loss": 0.7486, + "epoch": 0.76, + "grad_norm": 0.3665603374802665, + "learning_rate": 2.936827472678908e-06, + "loss": 0.2989, "step": 16482 }, { - "epoch": 0.95, - "grad_norm": 0.5040720847135953, - "learning_rate": 1.4683112565633706e-07, - "loss": 0.2263, + "epoch": 0.76, + "grad_norm": 0.3361089992495192, + "learning_rate": 2.9357742596498693e-06, + "loss": 0.2125, "step": 16483 }, { - "epoch": 0.95, - "grad_norm": 0.3693885098403536, - "learning_rate": 1.4651357231108555e-07, - "loss": 0.2884, + "epoch": 0.76, + "grad_norm": 1.1650946264732804, + "learning_rate": 2.9347212030159746e-06, + "loss": 0.4847, "step": 16484 }, { - "epoch": 0.95, - "grad_norm": 0.3716837935292826, - "learning_rate": 1.4619636019164608e-07, - "loss": 0.3051, + "epoch": 0.76, + "grad_norm": 0.5677710605416404, + "learning_rate": 2.9336683028005486e-06, + "loss": 0.3544, "step": 16485 }, { - "epoch": 0.95, - "grad_norm": 0.20352716304565405, - "learning_rate": 1.458794893090032e-07, - "loss": 0.1185, + "epoch": 0.76, + "grad_norm": 0.21551013054115184, + "learning_rate": 2.9326155590268936e-06, + "loss": 0.169, "step": 16486 }, { - "epoch": 0.95, - "grad_norm": 0.5404248538254562, - "learning_rate": 1.4556295967412925e-07, - "loss": 0.3645, + "epoch": 0.76, + "grad_norm": 0.7618742957165453, + "learning_rate": 2.9315629717183204e-06, + "loss": 0.3947, "step": 16487 }, { - "epoch": 0.95, - "grad_norm": 0.35677414223681986, - "learning_rate": 1.4524677129798547e-07, - "loss": 0.2902, + "epoch": 0.76, + "grad_norm": 0.45705718341468793, + "learning_rate": 2.930510540898127e-06, + "loss": 0.2446, "step": 16488 }, { - "epoch": 0.95, - "grad_norm": 0.5106964245865684, - "learning_rate": 1.449309241915231e-07, - "loss": 0.1938, + "epoch": 0.76, + "grad_norm": 0.30524552988355325, + "learning_rate": 2.9294582665896176e-06, + "loss": 0.1904, "step": 16489 }, { - "epoch": 0.95, - "grad_norm": 0.3935425552716978, - "learning_rate": 1.4461541836568004e-07, - "loss": 0.3145, + "epoch": 0.76, + "grad_norm": 0.39649074582154, + "learning_rate": 2.9284061488160896e-06, + "loss": 0.3056, "step": 16490 }, { - "epoch": 0.95, - "grad_norm": 1.3403643170390191, - "learning_rate": 1.443002538313798e-07, - "loss": 0.5119, + "epoch": 0.76, + "grad_norm": 0.917118667781435, + "learning_rate": 2.9273541876008315e-06, + "loss": 0.4316, "step": 16491 }, { - "epoch": 0.95, - "grad_norm": 0.26232504943660767, - "learning_rate": 1.4398543059953918e-07, - "loss": 0.2427, + "epoch": 0.76, + "grad_norm": 0.4392835149742781, + "learning_rate": 2.9263023829671357e-06, + "loss": 0.2575, "step": 16492 }, { - "epoch": 0.95, - "grad_norm": 0.30415871484094553, - "learning_rate": 1.4367094868105725e-07, - "loss": 0.2046, + "epoch": 0.76, + "grad_norm": 0.386448379835768, + "learning_rate": 2.9252507349382884e-06, + "loss": 0.2613, "step": 16493 }, { - "epoch": 0.95, - "grad_norm": 0.42062338088496104, - "learning_rate": 1.433568080868286e-07, - "loss": 0.2729, + "epoch": 0.76, + "grad_norm": 0.3270134388747646, + "learning_rate": 2.92419924353757e-06, + "loss": 0.248, "step": 16494 }, { - "epoch": 0.95, - "grad_norm": 0.5711244914388044, - "learning_rate": 1.4304300882772903e-07, - "loss": 0.3911, + "epoch": 0.76, + "grad_norm": 0.39686432791917525, + "learning_rate": 2.923147908788263e-06, + "loss": 0.264, "step": 16495 }, { - "epoch": 0.95, - "grad_norm": 0.2384222357994241, - "learning_rate": 1.4272955091462648e-07, - "loss": 0.2252, + "epoch": 0.76, + "grad_norm": 0.3584657804945487, + "learning_rate": 2.922096730713634e-06, + "loss": 0.1711, "step": 16496 }, { - "epoch": 0.95, - "grad_norm": 1.3215017477278463, - "learning_rate": 1.424164343583745e-07, - "loss": 0.532, + "epoch": 0.76, + "grad_norm": 0.5692829015945852, + "learning_rate": 2.921045709336968e-06, + "loss": 0.3238, "step": 16497 }, { - "epoch": 0.95, - "grad_norm": 1.0583273806987223, - "learning_rate": 1.4210365916981882e-07, - "loss": 0.4026, + "epoch": 0.76, + "grad_norm": 0.2779637867452247, + "learning_rate": 2.919994844681524e-06, + "loss": 0.2504, "step": 16498 }, { - "epoch": 0.95, - "grad_norm": 0.3118227358818172, - "learning_rate": 1.4179122535978862e-07, - "loss": 0.2325, + "epoch": 0.76, + "grad_norm": 1.0317487955384954, + "learning_rate": 2.918944136770574e-06, + "loss": 0.3014, "step": 16499 }, { - "epoch": 0.95, - "grad_norm": 0.34214996786542395, - "learning_rate": 1.414791329391052e-07, - "loss": 0.3061, + "epoch": 0.76, + "grad_norm": 0.43710011670991133, + "learning_rate": 2.917893585627375e-06, + "loss": 0.2669, "step": 16500 }, { - "epoch": 0.95, - "grad_norm": 0.3931429662229714, - "learning_rate": 1.4116738191857437e-07, - "loss": 0.3039, + "epoch": 0.76, + "grad_norm": 0.35784923338863056, + "learning_rate": 2.9168431912751805e-06, + "loss": 0.2664, "step": 16501 }, { - "epoch": 0.95, - "grad_norm": 0.23757841266563745, - "learning_rate": 1.4085597230899418e-07, - "loss": 0.167, + "epoch": 0.76, + "grad_norm": 0.2693327361282062, + "learning_rate": 2.9157929537372577e-06, + "loss": 0.1929, "step": 16502 }, { - "epoch": 0.95, - "grad_norm": 1.2952326611305722, - "learning_rate": 1.4054490412114817e-07, - "loss": 0.5202, + "epoch": 0.76, + "grad_norm": 0.9708979208811768, + "learning_rate": 2.914742873036848e-06, + "loss": 0.3924, "step": 16503 }, { - "epoch": 0.95, - "grad_norm": 0.3303112645977888, - "learning_rate": 1.402341773658078e-07, - "loss": 0.2998, + "epoch": 0.76, + "grad_norm": 0.4536809152731579, + "learning_rate": 2.9136929491972044e-06, + "loss": 0.2469, "step": 16504 }, { - "epoch": 0.95, - "grad_norm": 0.3496116138968531, - "learning_rate": 1.3992379205373219e-07, - "loss": 0.2641, + "epoch": 0.76, + "grad_norm": 0.9155742549471608, + "learning_rate": 2.9126431822415658e-06, + "loss": 0.4448, "step": 16505 }, { - "epoch": 0.95, - "grad_norm": 0.6284477927092317, - "learning_rate": 1.3961374819567386e-07, - "loss": 0.3239, + "epoch": 0.76, + "grad_norm": 0.36295944201790226, + "learning_rate": 2.9115935721931766e-06, + "loss": 0.2668, "step": 16506 }, { - "epoch": 0.95, - "grad_norm": 0.29321421158262606, - "learning_rate": 1.3930404580236646e-07, - "loss": 0.1722, + "epoch": 0.76, + "grad_norm": 0.4635464661092478, + "learning_rate": 2.910544119075277e-06, + "loss": 0.3123, "step": 16507 }, { - "epoch": 0.95, - "grad_norm": 0.27799789142741405, - "learning_rate": 1.3899468488453583e-07, - "loss": 0.2652, + "epoch": 0.76, + "grad_norm": 0.2866690306469791, + "learning_rate": 2.9094948229110952e-06, + "loss": 0.1847, "step": 16508 }, { - "epoch": 0.95, - "grad_norm": 0.5390190495988705, - "learning_rate": 1.3868566545289563e-07, - "loss": 0.2118, + "epoch": 0.76, + "grad_norm": 0.45805280713826424, + "learning_rate": 2.908445683723864e-06, + "loss": 0.2663, "step": 16509 }, { - "epoch": 0.95, - "grad_norm": 1.2106732546022587, - "learning_rate": 1.383769875181462e-07, - "loss": 0.6369, + "epoch": 0.76, + "grad_norm": 0.41591185005957104, + "learning_rate": 2.907396701536813e-06, + "loss": 0.2496, "step": 16510 }, { - "epoch": 0.95, - "grad_norm": 0.3303768375574665, - "learning_rate": 1.38068651090979e-07, - "loss": 0.2693, + "epoch": 0.76, + "grad_norm": 0.5808348129969879, + "learning_rate": 2.90634787637316e-06, + "loss": 0.3289, "step": 16511 }, { - "epoch": 0.95, - "grad_norm": 0.31858858994348516, - "learning_rate": 1.377606561820699e-07, - "loss": 0.2569, + "epoch": 0.76, + "grad_norm": 0.9505693483730848, + "learning_rate": 2.9052992082561314e-06, + "loss": 0.4571, "step": 16512 }, { - "epoch": 0.95, - "grad_norm": 0.41073201497419076, - "learning_rate": 1.3745300280208373e-07, - "loss": 0.249, + "epoch": 0.76, + "grad_norm": 0.4324496263522261, + "learning_rate": 2.904250697208937e-06, + "loss": 0.2794, "step": 16513 }, { - "epoch": 0.95, - "grad_norm": 0.25752711484513774, - "learning_rate": 1.371456909616764e-07, - "loss": 0.2332, + "epoch": 0.76, + "grad_norm": 0.20934092649358407, + "learning_rate": 2.9032023432547927e-06, + "loss": 0.2024, "step": 16514 }, { - "epoch": 0.95, - "grad_norm": 1.4245672657375734, - "learning_rate": 1.3683872067149052e-07, - "loss": 0.6246, + "epoch": 0.76, + "grad_norm": 0.9657513927505617, + "learning_rate": 2.90215414641691e-06, + "loss": 0.2842, "step": 16515 }, { - "epoch": 0.95, - "grad_norm": 0.27505237674289085, - "learning_rate": 1.3653209194215534e-07, - "loss": 0.2159, + "epoch": 0.76, + "grad_norm": 0.4383857468575562, + "learning_rate": 2.9011061067184952e-06, + "loss": 0.2624, "step": 16516 }, { - "epoch": 0.95, - "grad_norm": 0.3313341959115871, - "learning_rate": 1.3622580478428903e-07, - "loss": 0.2806, + "epoch": 0.76, + "grad_norm": 0.42076545986855673, + "learning_rate": 2.9000582241827504e-06, + "loss": 0.315, "step": 16517 }, { - "epoch": 0.95, - "grad_norm": 0.6390544637914571, - "learning_rate": 1.3591985920849981e-07, - "loss": 0.3746, + "epoch": 0.76, + "grad_norm": 0.5367414523551842, + "learning_rate": 2.899010498832866e-06, + "loss": 0.3862, "step": 16518 }, { - "epoch": 0.95, - "grad_norm": 0.29075587750528226, - "learning_rate": 1.356142552253814e-07, - "loss": 0.1736, + "epoch": 0.76, + "grad_norm": 0.3453438563422978, + "learning_rate": 2.897962930692052e-06, + "loss": 0.218, "step": 16519 }, { - "epoch": 0.95, - "grad_norm": 0.28201683673325467, - "learning_rate": 1.3530899284551756e-07, - "loss": 0.2628, + "epoch": 0.76, + "grad_norm": 0.3296601959703274, + "learning_rate": 2.896915519783491e-06, + "loss": 0.1494, "step": 16520 }, { - "epoch": 0.95, - "grad_norm": 0.5231549591416089, - "learning_rate": 1.3500407207947875e-07, - "loss": 0.2276, + "epoch": 0.76, + "grad_norm": 0.5211773041965411, + "learning_rate": 2.8958682661303774e-06, + "loss": 0.2994, "step": 16521 }, { - "epoch": 0.95, - "grad_norm": 0.6591201808581352, - "learning_rate": 1.3469949293782426e-07, - "loss": 0.2343, + "epoch": 0.76, + "grad_norm": 0.39868423446807105, + "learning_rate": 2.894821169755889e-06, + "loss": 0.2115, "step": 16522 }, { - "epoch": 0.95, - "grad_norm": 0.35442038050860036, - "learning_rate": 1.3439525543110232e-07, - "loss": 0.285, + "epoch": 0.76, + "grad_norm": 1.4668791941985382, + "learning_rate": 2.893774230683213e-06, + "loss": 0.5821, "step": 16523 }, { - "epoch": 0.95, - "grad_norm": 0.3256582404882992, - "learning_rate": 1.3409135956984897e-07, - "loss": 0.2858, + "epoch": 0.76, + "grad_norm": 1.2660858894239166, + "learning_rate": 2.8927274489355296e-06, + "loss": 0.7786, "step": 16524 }, { - "epoch": 0.95, - "grad_norm": 0.4867984784902485, - "learning_rate": 1.337878053645869e-07, - "loss": 0.1892, + "epoch": 0.76, + "grad_norm": 0.36184081604962254, + "learning_rate": 2.891680824536007e-06, + "loss": 0.2014, "step": 16525 }, { - "epoch": 0.95, - "grad_norm": 0.2675448451742374, - "learning_rate": 1.334845928258288e-07, - "loss": 0.2195, + "epoch": 0.76, + "grad_norm": 0.338881300278615, + "learning_rate": 2.89063435750782e-06, + "loss": 0.2974, "step": 16526 }, { - "epoch": 0.95, - "grad_norm": 0.4860536461056225, - "learning_rate": 1.331817219640752e-07, - "loss": 0.2422, + "epoch": 0.76, + "grad_norm": 0.3828896905278202, + "learning_rate": 2.8895880478741357e-06, + "loss": 0.2425, "step": 16527 }, { - "epoch": 0.95, - "grad_norm": 0.3300567767788356, - "learning_rate": 1.3287919278981544e-07, - "loss": 0.2799, + "epoch": 0.76, + "grad_norm": 0.4433167756198432, + "learning_rate": 2.8885418956581226e-06, + "loss": 0.1754, "step": 16528 }, { - "epoch": 0.95, - "grad_norm": 0.33493831219224574, - "learning_rate": 1.3257700531352334e-07, - "loss": 0.2111, + "epoch": 0.76, + "grad_norm": 0.40868910817760534, + "learning_rate": 2.8874959008829372e-06, + "loss": 0.2973, "step": 16529 }, { - "epoch": 0.95, - "grad_norm": 0.7971942715761426, - "learning_rate": 1.3227515954566506e-07, - "loss": 0.4725, + "epoch": 0.76, + "grad_norm": 0.5422769469302408, + "learning_rate": 2.886450063571735e-06, + "loss": 0.3736, "step": 16530 }, { - "epoch": 0.95, - "grad_norm": 1.2101032035959423, - "learning_rate": 1.319736554966955e-07, - "loss": 0.4656, + "epoch": 0.76, + "grad_norm": 0.6031179373067898, + "learning_rate": 2.885404383747672e-06, + "loss": 0.3471, "step": 16531 }, { - "epoch": 0.95, - "grad_norm": 0.2161492687029256, - "learning_rate": 1.31672493177053e-07, - "loss": 0.2144, + "epoch": 0.76, + "grad_norm": 0.43487003035821237, + "learning_rate": 2.884358861433899e-06, + "loss": 0.251, "step": 16532 }, { - "epoch": 0.95, - "grad_norm": 0.2706126200232461, - "learning_rate": 1.3137167259716698e-07, - "loss": 0.1613, + "epoch": 0.76, + "grad_norm": 0.34554071098732797, + "learning_rate": 2.8833134966535658e-06, + "loss": 0.2558, "step": 16533 }, { - "epoch": 0.95, - "grad_norm": 0.7201379209477112, - "learning_rate": 1.310711937674569e-07, - "loss": 0.3627, + "epoch": 0.76, + "grad_norm": 0.3068776341201295, + "learning_rate": 2.8822682894298095e-06, + "loss": 0.1981, "step": 16534 }, { - "epoch": 0.95, - "grad_norm": 0.3188833536266433, - "learning_rate": 1.3077105669832556e-07, - "loss": 0.2127, + "epoch": 0.76, + "grad_norm": 0.45438020977869265, + "learning_rate": 2.881223239785772e-06, + "loss": 0.2251, "step": 16535 }, { - "epoch": 0.95, - "grad_norm": 0.353800166876077, - "learning_rate": 1.3047126140016907e-07, - "loss": 0.29, + "epoch": 0.76, + "grad_norm": 1.306898103716611, + "learning_rate": 2.8801783477445956e-06, + "loss": 0.7305, "step": 16536 }, { - "epoch": 0.95, - "grad_norm": 1.152362615048209, - "learning_rate": 1.3017180788336804e-07, - "loss": 0.5908, + "epoch": 0.76, + "grad_norm": 0.3188042951860328, + "learning_rate": 2.8791336133294047e-06, + "loss": 0.268, "step": 16537 }, { - "epoch": 0.95, - "grad_norm": 0.2201443708188749, - "learning_rate": 1.29872696158293e-07, - "loss": 0.1572, + "epoch": 0.76, + "grad_norm": 0.37195990488388075, + "learning_rate": 2.878089036563335e-06, + "loss": 0.2363, "step": 16538 }, { - "epoch": 0.95, - "grad_norm": 0.39768952646586436, - "learning_rate": 1.295739262353013e-07, - "loss": 0.2506, + "epoch": 0.76, + "grad_norm": 0.5621084973668745, + "learning_rate": 2.8770446174695067e-06, + "loss": 0.2931, "step": 16539 }, { - "epoch": 0.95, - "grad_norm": 0.3711468804434103, - "learning_rate": 1.2927549812474128e-07, - "loss": 0.3222, + "epoch": 0.76, + "grad_norm": 0.32366480160379035, + "learning_rate": 2.876000356071046e-06, + "loss": 0.2278, "step": 16540 }, { - "epoch": 0.95, - "grad_norm": 0.3875931478557323, - "learning_rate": 1.2897741183694578e-07, - "loss": 0.2718, + "epoch": 0.76, + "grad_norm": 0.4375931423443274, + "learning_rate": 2.8749562523910744e-06, + "loss": 0.2405, "step": 16541 }, { - "epoch": 0.95, - "grad_norm": 0.5040208709722376, - "learning_rate": 1.286796673822388e-07, - "loss": 0.2931, + "epoch": 0.76, + "grad_norm": 0.5513827025180599, + "learning_rate": 2.8739123064527007e-06, + "loss": 0.3727, "step": 16542 }, { - "epoch": 0.95, - "grad_norm": 0.3644096645649208, - "learning_rate": 1.2838226477092875e-07, - "loss": 0.2835, + "epoch": 0.76, + "grad_norm": 0.5655571202289628, + "learning_rate": 2.872868518279044e-06, + "loss": 0.3014, "step": 16543 }, { - "epoch": 0.95, - "grad_norm": 0.43199632651889797, - "learning_rate": 1.2808520401331737e-07, - "loss": 0.2786, + "epoch": 0.76, + "grad_norm": 0.43001945437126393, + "learning_rate": 2.871824887893202e-06, + "loss": 0.3054, "step": 16544 }, { - "epoch": 0.95, - "grad_norm": 0.26276909694452727, - "learning_rate": 1.27788485119692e-07, - "loss": 0.142, + "epoch": 0.76, + "grad_norm": 0.3616648253108674, + "learning_rate": 2.8707814153182935e-06, + "loss": 0.26, "step": 16545 }, { - "epoch": 0.95, - "grad_norm": 0.6481758566901482, - "learning_rate": 1.2749210810032664e-07, - "loss": 0.3816, + "epoch": 0.76, + "grad_norm": 0.2648527893966305, + "learning_rate": 2.8697381005774126e-06, + "loss": 0.1399, "step": 16546 }, { - "epoch": 0.95, - "grad_norm": 0.3026318614423293, - "learning_rate": 1.2719607296548309e-07, - "loss": 0.2441, + "epoch": 0.76, + "grad_norm": 0.4187983498661776, + "learning_rate": 2.868694943693655e-06, + "loss": 0.2603, "step": 16547 }, { - "epoch": 0.95, - "grad_norm": 0.31330158406573605, - "learning_rate": 1.2690037972541646e-07, - "loss": 0.2622, + "epoch": 0.76, + "grad_norm": 0.778209187746288, + "learning_rate": 2.8676519446901187e-06, + "loss": 0.321, "step": 16548 }, { - "epoch": 0.95, - "grad_norm": 1.5618122703655006, - "learning_rate": 1.2660502839036526e-07, - "loss": 0.5628, + "epoch": 0.76, + "grad_norm": 0.30508090710873276, + "learning_rate": 2.8666091035898935e-06, + "loss": 0.2364, "step": 16549 }, { - "epoch": 0.95, - "grad_norm": 0.3455198798238632, - "learning_rate": 1.2631001897055683e-07, - "loss": 0.2721, + "epoch": 0.76, + "grad_norm": 0.4004046834903637, + "learning_rate": 2.8655664204160718e-06, + "loss": 0.3095, "step": 16550 }, { - "epoch": 0.95, - "grad_norm": 0.2731384236231438, - "learning_rate": 1.2601535147620746e-07, - "loss": 0.1375, + "epoch": 0.76, + "grad_norm": 1.4446799760631164, + "learning_rate": 2.8645238951917287e-06, + "loss": 0.278, "step": 16551 }, { - "epoch": 0.95, - "grad_norm": 0.3394524321219831, - "learning_rate": 1.2572102591752234e-07, - "loss": 0.3059, + "epoch": 0.76, + "grad_norm": 0.2633997565884859, + "learning_rate": 2.8634815279399497e-06, + "loss": 0.1412, "step": 16552 }, { - "epoch": 0.95, - "grad_norm": 0.30354042132773396, - "learning_rate": 1.2542704230469326e-07, - "loss": 0.2326, + "epoch": 0.76, + "grad_norm": 0.2844974422348613, + "learning_rate": 2.8624393186838152e-06, + "loss": 0.2618, "step": 16553 }, { - "epoch": 0.95, - "grad_norm": 1.1255505639202197, - "learning_rate": 1.2513340064790102e-07, - "loss": 0.6941, + "epoch": 0.76, + "grad_norm": 1.0553809986614044, + "learning_rate": 2.8613972674463908e-06, + "loss": 0.3193, "step": 16554 }, { - "epoch": 0.95, - "grad_norm": 0.35766936306315206, - "learning_rate": 1.2484010095731414e-07, - "loss": 0.246, + "epoch": 0.76, + "grad_norm": 0.5527285769856833, + "learning_rate": 2.860355374250755e-06, + "loss": 0.304, "step": 16555 }, { - "epoch": 0.95, - "grad_norm": 0.3323040325151218, - "learning_rate": 1.2454714324309115e-07, - "loss": 0.2591, + "epoch": 0.76, + "grad_norm": 0.40881719124444355, + "learning_rate": 2.859313639119966e-06, + "loss": 0.2762, "step": 16556 }, { - "epoch": 0.95, - "grad_norm": 0.7689015974335848, - "learning_rate": 1.2425452751537503e-07, - "loss": 0.3713, + "epoch": 0.76, + "grad_norm": 0.3851466268295038, + "learning_rate": 2.858272062077091e-06, + "loss": 0.3088, "step": 16557 }, { - "epoch": 0.95, - "grad_norm": 0.2691954444647513, - "learning_rate": 1.2396225378430105e-07, - "loss": 0.144, + "epoch": 0.76, + "grad_norm": 0.1585462992668782, + "learning_rate": 2.8572306431451914e-06, + "loss": 0.0724, "step": 16558 }, { - "epoch": 0.95, - "grad_norm": 0.38664284669524407, - "learning_rate": 1.2367032205998775e-07, - "loss": 0.2492, + "epoch": 0.76, + "grad_norm": 0.43936903534590815, + "learning_rate": 2.8561893823473188e-06, + "loss": 0.2858, "step": 16559 }, { - "epoch": 0.95, - "grad_norm": 0.30289171963027917, - "learning_rate": 1.2337873235254704e-07, - "loss": 0.3089, + "epoch": 0.76, + "grad_norm": 0.9961653666244219, + "learning_rate": 2.8551482797065312e-06, + "loss": 0.3809, "step": 16560 }, { - "epoch": 0.95, - "grad_norm": 1.3377802262789582, - "learning_rate": 1.2308748467207753e-07, - "loss": 0.2227, + "epoch": 0.76, + "grad_norm": 0.33979836562281085, + "learning_rate": 2.854107335245868e-06, + "loss": 0.2321, "step": 16561 }, { - "epoch": 0.95, - "grad_norm": 0.39685089389976136, - "learning_rate": 1.2279657902866226e-07, - "loss": 0.2741, + "epoch": 0.76, + "grad_norm": 0.4472726076861613, + "learning_rate": 2.8530665489883867e-06, + "loss": 0.3363, "step": 16562 }, { - "epoch": 0.95, - "grad_norm": 0.34991767076091934, - "learning_rate": 1.225060154323776e-07, - "loss": 0.2956, + "epoch": 0.76, + "grad_norm": 1.5969552984003739, + "learning_rate": 2.8520259209571222e-06, + "loss": 0.6396, "step": 16563 }, { - "epoch": 0.95, - "grad_norm": 0.2420095759472581, - "learning_rate": 1.222157938932833e-07, - "loss": 0.1584, + "epoch": 0.76, + "grad_norm": 0.2401999395175981, + "learning_rate": 2.8509854511751166e-06, + "loss": 0.0805, "step": 16564 }, { - "epoch": 0.95, - "grad_norm": 0.4161152844228987, - "learning_rate": 1.219259144214324e-07, - "loss": 0.2804, + "epoch": 0.76, + "grad_norm": 0.262943268238002, + "learning_rate": 2.8499451396654e-06, + "loss": 0.253, "step": 16565 }, { - "epoch": 0.95, - "grad_norm": 0.5728617107352475, - "learning_rate": 1.216363770268625e-07, - "loss": 0.4119, + "epoch": 0.76, + "grad_norm": 0.8429604173971407, + "learning_rate": 2.8489049864510053e-06, + "loss": 0.3883, "step": 16566 }, { - "epoch": 0.95, - "grad_norm": 0.5569364520388921, - "learning_rate": 1.2134718171960103e-07, - "loss": 0.3633, + "epoch": 0.76, + "grad_norm": 0.6585996025071122, + "learning_rate": 2.8478649915549663e-06, + "loss": 0.2605, "step": 16567 }, { - "epoch": 0.95, - "grad_norm": 0.2639185558147655, - "learning_rate": 1.2105832850966004e-07, - "loss": 0.2085, + "epoch": 0.76, + "grad_norm": 0.35127952965274456, + "learning_rate": 2.8468251550003e-06, + "loss": 0.2611, "step": 16568 }, { - "epoch": 0.95, - "grad_norm": 0.59735708477971, - "learning_rate": 1.2076981740704485e-07, - "loss": 0.3744, + "epoch": 0.76, + "grad_norm": 0.3650854904187938, + "learning_rate": 2.84578547681003e-06, + "loss": 0.2928, "step": 16569 }, { - "epoch": 0.95, - "grad_norm": 0.34765113855029134, - "learning_rate": 1.2048164842174636e-07, - "loss": 0.1818, + "epoch": 0.76, + "grad_norm": 0.4908636684855845, + "learning_rate": 2.844745957007178e-06, + "loss": 0.1911, "step": 16570 }, { - "epoch": 0.95, - "grad_norm": 0.2991838323871734, - "learning_rate": 1.2019382156374326e-07, - "loss": 0.1983, + "epoch": 0.76, + "grad_norm": 0.2843779950599411, + "learning_rate": 2.84370659561475e-06, + "loss": 0.1772, "step": 16571 }, { - "epoch": 0.95, - "grad_norm": 0.33619733070672253, - "learning_rate": 1.1990633684300424e-07, - "loss": 0.2832, + "epoch": 0.76, + "grad_norm": 0.6646684203090293, + "learning_rate": 2.8426673926557646e-06, + "loss": 0.3829, "step": 16572 }, { - "epoch": 0.95, - "grad_norm": 0.9724457561401293, - "learning_rate": 1.1961919426948244e-07, - "loss": 0.4762, + "epoch": 0.76, + "grad_norm": 0.3874664238666174, + "learning_rate": 2.8416283481532214e-06, + "loss": 0.2762, "step": 16573 }, { - "epoch": 0.95, - "grad_norm": 0.3870489682298207, - "learning_rate": 1.1933239385312324e-07, - "loss": 0.2104, + "epoch": 0.76, + "grad_norm": 0.3544642621591675, + "learning_rate": 2.8405894621301276e-06, + "loss": 0.214, "step": 16574 }, { - "epoch": 0.95, - "grad_norm": 0.361921769610525, - "learning_rate": 1.190459356038598e-07, - "loss": 0.2939, + "epoch": 0.76, + "grad_norm": 1.2150027008016253, + "learning_rate": 2.839550734609485e-06, + "loss": 0.4971, "step": 16575 }, { - "epoch": 0.95, - "grad_norm": 0.3518873026409385, - "learning_rate": 1.1875981953160975e-07, - "loss": 0.2099, + "epoch": 0.76, + "grad_norm": 0.29534037613152425, + "learning_rate": 2.8385121656142856e-06, + "loss": 0.2098, "step": 16576 }, { - "epoch": 0.95, - "grad_norm": 0.3527276444123807, - "learning_rate": 1.1847404564628185e-07, - "loss": 0.1475, + "epoch": 0.76, + "grad_norm": 0.2988305251242387, + "learning_rate": 2.837473755167528e-06, + "loss": 0.2174, "step": 16577 }, { - "epoch": 0.95, - "grad_norm": 0.2743531371236152, - "learning_rate": 1.181886139577737e-07, - "loss": 0.2313, + "epoch": 0.76, + "grad_norm": 0.45515509273025667, + "learning_rate": 2.836435503292191e-06, + "loss": 0.2732, "step": 16578 }, { - "epoch": 0.95, - "grad_norm": 0.338837035760285, - "learning_rate": 1.1790352447596853e-07, - "loss": 0.3039, + "epoch": 0.76, + "grad_norm": 0.7282177041243947, + "learning_rate": 2.8353974100112737e-06, + "loss": 0.4261, "step": 16579 }, { - "epoch": 0.95, - "grad_norm": 0.5475170217274716, - "learning_rate": 1.1761877721073845e-07, - "loss": 0.3508, + "epoch": 0.76, + "grad_norm": 0.36821536275426064, + "learning_rate": 2.8343594753477498e-06, + "loss": 0.2831, "step": 16580 }, { - "epoch": 0.95, - "grad_norm": 0.5025763106213428, - "learning_rate": 1.1733437217194665e-07, - "loss": 0.2398, + "epoch": 0.76, + "grad_norm": 0.3698198260820381, + "learning_rate": 2.833321699324604e-06, + "loss": 0.2307, "step": 16581 }, { - "epoch": 0.95, - "grad_norm": 0.535478845267447, - "learning_rate": 1.1705030936943973e-07, - "loss": 0.2167, + "epoch": 0.76, + "grad_norm": 0.46603132521891044, + "learning_rate": 2.832284081964806e-06, + "loss": 0.2342, "step": 16582 }, { - "epoch": 0.95, - "grad_norm": 0.3968889128451416, - "learning_rate": 1.1676658881305647e-07, - "loss": 0.304, + "epoch": 0.76, + "grad_norm": 0.35819382942409606, + "learning_rate": 2.8312466232913284e-06, + "loss": 0.2452, "step": 16583 }, { - "epoch": 0.95, - "grad_norm": 0.21367273624373442, - "learning_rate": 1.1648321051262012e-07, - "loss": 0.196, + "epoch": 0.76, + "grad_norm": 0.3257399878017025, + "learning_rate": 2.8302093233271454e-06, + "loss": 0.2357, "step": 16584 }, { - "epoch": 0.95, - "grad_norm": 0.80451182074773, - "learning_rate": 1.1620017447794507e-07, - "loss": 0.3895, + "epoch": 0.76, + "grad_norm": 0.5048493698585362, + "learning_rate": 2.8291721820952146e-06, + "loss": 0.3577, "step": 16585 }, { - "epoch": 0.95, - "grad_norm": 0.5701986470439201, - "learning_rate": 1.1591748071883458e-07, - "loss": 0.2978, + "epoch": 0.76, + "grad_norm": 0.3421487850399439, + "learning_rate": 2.828135199618499e-06, + "loss": 0.2613, "step": 16586 }, { - "epoch": 0.95, - "grad_norm": 0.2685745996616732, - "learning_rate": 1.1563512924507525e-07, - "loss": 0.246, + "epoch": 0.76, + "grad_norm": 1.3481064742908084, + "learning_rate": 2.827098375919962e-06, + "loss": 0.1767, "step": 16587 }, { - "epoch": 0.95, - "grad_norm": 1.4199611521261792, - "learning_rate": 1.1535312006644706e-07, - "loss": 0.7062, + "epoch": 0.76, + "grad_norm": 0.4774937426962428, + "learning_rate": 2.8260617110225506e-06, + "loss": 0.3301, "step": 16588 }, { - "epoch": 0.95, - "grad_norm": 0.3948249665352689, - "learning_rate": 1.150714531927144e-07, - "loss": 0.2906, + "epoch": 0.76, + "grad_norm": 0.2966956709622009, + "learning_rate": 2.825025204949222e-06, + "loss": 0.2452, "step": 16589 }, { - "epoch": 0.95, - "grad_norm": 0.5276002741172402, - "learning_rate": 1.1479012863363282e-07, - "loss": 0.3783, + "epoch": 0.76, + "grad_norm": 0.4947360398641383, + "learning_rate": 2.8239888577229156e-06, + "loss": 0.1834, "step": 16590 }, { - "epoch": 0.95, - "grad_norm": 0.24658030549592613, - "learning_rate": 1.1450914639894451e-07, - "loss": 0.2207, + "epoch": 0.76, + "grad_norm": 0.7870808134103285, + "learning_rate": 2.822952669366581e-06, + "loss": 0.4666, "step": 16591 }, { - "epoch": 0.95, - "grad_norm": 0.3155271224796519, - "learning_rate": 1.1422850649837836e-07, - "loss": 0.223, + "epoch": 0.76, + "grad_norm": 0.2989686619789008, + "learning_rate": 2.821916639903156e-06, + "loss": 0.233, "step": 16592 }, { - "epoch": 0.95, - "grad_norm": 0.5925497152046855, - "learning_rate": 1.1394820894165437e-07, - "loss": 0.2979, + "epoch": 0.76, + "grad_norm": 0.4481966754326238, + "learning_rate": 2.820880769355582e-06, + "loss": 0.3085, "step": 16593 }, { - "epoch": 0.95, - "grad_norm": 1.5226704293130526, - "learning_rate": 1.1366825373847923e-07, - "loss": 0.2853, + "epoch": 0.76, + "grad_norm": 0.6442032863755881, + "learning_rate": 2.8198450577467882e-06, + "loss": 0.267, "step": 16594 }, { - "epoch": 0.95, - "grad_norm": 0.4706632168946928, - "learning_rate": 1.1338864089854629e-07, - "loss": 0.2925, + "epoch": 0.76, + "grad_norm": 0.39940322690931307, + "learning_rate": 2.8188095050996976e-06, + "loss": 0.2648, "step": 16595 }, { - "epoch": 0.95, - "grad_norm": 0.35243983811365676, - "learning_rate": 1.1310937043154113e-07, - "loss": 0.2916, + "epoch": 0.76, + "grad_norm": 0.4696527092936459, + "learning_rate": 2.8177741114372504e-06, + "loss": 0.2955, "step": 16596 }, { - "epoch": 0.95, - "grad_norm": 0.31547774494065683, - "learning_rate": 1.128304423471327e-07, - "loss": 0.1765, + "epoch": 0.76, + "grad_norm": 0.3552202605287625, + "learning_rate": 2.816738876782359e-06, + "loss": 0.2597, "step": 16597 }, { - "epoch": 0.95, - "grad_norm": 0.640983379760982, - "learning_rate": 1.1255185665497992e-07, - "loss": 0.3632, + "epoch": 0.76, + "grad_norm": 0.3555467813028991, + "learning_rate": 2.8157038011579485e-06, + "loss": 0.2357, "step": 16598 }, { - "epoch": 0.95, - "grad_norm": 0.2510122641943343, - "learning_rate": 1.1227361336473175e-07, - "loss": 0.256, + "epoch": 0.76, + "grad_norm": 0.7354285436811204, + "learning_rate": 2.8146688845869287e-06, + "loss": 0.2673, "step": 16599 }, { - "epoch": 0.95, - "grad_norm": 1.4540579733239993, - "learning_rate": 1.119957124860238e-07, - "loss": 0.203, + "epoch": 0.76, + "grad_norm": 0.5202925517063207, + "learning_rate": 2.813634127092213e-06, + "loss": 0.272, "step": 16600 }, { - "epoch": 0.95, - "grad_norm": 0.6368166691950049, - "learning_rate": 1.1171815402847841e-07, - "loss": 0.3969, + "epoch": 0.76, + "grad_norm": 0.30189947934203837, + "learning_rate": 2.8125995286967155e-06, + "loss": 0.2372, "step": 16601 }, { - "epoch": 0.95, - "grad_norm": 0.351165725596696, - "learning_rate": 1.1144093800170786e-07, - "loss": 0.2781, + "epoch": 0.76, + "grad_norm": 1.005801220163093, + "learning_rate": 2.811565089423335e-06, + "loss": 0.4833, "step": 16602 }, { - "epoch": 0.95, - "grad_norm": 0.3683204475698124, - "learning_rate": 1.1116406441531335e-07, - "loss": 0.3086, + "epoch": 0.76, + "grad_norm": 0.29886796492084367, + "learning_rate": 2.810530809294977e-06, + "loss": 0.1424, "step": 16603 }, { - "epoch": 0.95, - "grad_norm": 0.23192559345215039, - "learning_rate": 1.1088753327888169e-07, - "loss": 0.1139, + "epoch": 0.76, + "grad_norm": 0.2565281610730216, + "learning_rate": 2.809496688334534e-06, + "loss": 0.2009, "step": 16604 }, { - "epoch": 0.95, - "grad_norm": 0.4094259485578723, - "learning_rate": 1.1061134460198964e-07, - "loss": 0.2794, + "epoch": 0.76, + "grad_norm": 0.3712917777889735, + "learning_rate": 2.8084627265649057e-06, + "loss": 0.2994, "step": 16605 }, { - "epoch": 0.95, - "grad_norm": 1.2833862226613364, - "learning_rate": 1.1033549839420066e-07, - "loss": 0.5491, + "epoch": 0.76, + "grad_norm": 0.665379295913935, + "learning_rate": 2.8074289240089835e-06, + "loss": 0.347, "step": 16606 }, { - "epoch": 0.95, - "grad_norm": 0.2600204249198676, - "learning_rate": 1.1005999466506822e-07, - "loss": 0.2268, + "epoch": 0.76, + "grad_norm": 0.357936723707498, + "learning_rate": 2.806395280689649e-06, + "loss": 0.1877, "step": 16607 }, { - "epoch": 0.95, - "grad_norm": 0.36192005945864036, - "learning_rate": 1.0978483342413359e-07, - "loss": 0.288, + "epoch": 0.76, + "grad_norm": 1.356453048193744, + "learning_rate": 2.8053617966297908e-06, + "loss": 0.4811, "step": 16608 }, { - "epoch": 0.95, - "grad_norm": 0.5971870477119776, - "learning_rate": 1.0951001468092471e-07, - "loss": 0.3491, + "epoch": 0.76, + "grad_norm": 0.3788025160831487, + "learning_rate": 2.804328471852288e-06, + "loss": 0.3244, "step": 16609 }, { - "epoch": 0.95, - "grad_norm": 0.1664000066752861, - "learning_rate": 1.0923553844495838e-07, - "loss": 0.1133, + "epoch": 0.76, + "grad_norm": 0.3097997971083851, + "learning_rate": 2.8032953063800192e-06, + "loss": 0.1827, "step": 16610 }, { - "epoch": 0.95, - "grad_norm": 0.3779109104685889, - "learning_rate": 1.0896140472574035e-07, - "loss": 0.3052, + "epoch": 0.76, + "grad_norm": 0.32356151705666597, + "learning_rate": 2.8022623002358575e-06, + "loss": 0.1908, "step": 16611 }, { - "epoch": 0.95, - "grad_norm": 0.48651148623474383, - "learning_rate": 1.0868761353276414e-07, - "loss": 0.3653, + "epoch": 0.76, + "grad_norm": 0.3594711604917871, + "learning_rate": 2.8012294534426645e-06, + "loss": 0.2887, "step": 16612 }, { - "epoch": 0.95, - "grad_norm": 0.6419661588127807, - "learning_rate": 1.0841416487550994e-07, - "loss": 0.2488, + "epoch": 0.76, + "grad_norm": 0.3288007572383773, + "learning_rate": 2.80019676602332e-06, + "loss": 0.189, "step": 16613 }, { - "epoch": 0.95, - "grad_norm": 0.3649414029689163, - "learning_rate": 1.0814105876344794e-07, - "loss": 0.2928, + "epoch": 0.76, + "grad_norm": 1.3344141207946567, + "learning_rate": 2.7991642380006754e-06, + "loss": 0.492, "step": 16614 }, { - "epoch": 0.95, - "grad_norm": 0.3596194680368247, - "learning_rate": 1.0786829520603503e-07, - "loss": 0.3083, + "epoch": 0.76, + "grad_norm": 0.43172421875337313, + "learning_rate": 2.7981318693975988e-06, + "loss": 0.2544, "step": 16615 }, { - "epoch": 0.95, - "grad_norm": 0.2692613514757282, - "learning_rate": 1.0759587421271811e-07, - "loss": 0.1606, + "epoch": 0.76, + "grad_norm": 0.3053720627459669, + "learning_rate": 2.797099660236937e-06, + "loss": 0.1848, "step": 16616 }, { - "epoch": 0.95, - "grad_norm": 0.3453726228128851, - "learning_rate": 1.0732379579293184e-07, - "loss": 0.1923, + "epoch": 0.76, + "grad_norm": 0.28341131538950487, + "learning_rate": 2.7960676105415474e-06, + "loss": 0.2485, "step": 16617 }, { - "epoch": 0.95, - "grad_norm": 1.3122016638571212, - "learning_rate": 1.0705205995609536e-07, - "loss": 0.5729, + "epoch": 0.76, + "grad_norm": 0.8107387239783755, + "learning_rate": 2.79503572033428e-06, + "loss": 0.3569, "step": 16618 }, { - "epoch": 0.95, - "grad_norm": 0.3154318714815696, - "learning_rate": 1.0678066671162113e-07, - "loss": 0.2759, + "epoch": 0.76, + "grad_norm": 0.42585142859728004, + "learning_rate": 2.7940039896379757e-06, + "loss": 0.2673, "step": 16619 }, { - "epoch": 0.95, - "grad_norm": 0.3255968830256943, - "learning_rate": 1.0650961606890719e-07, - "loss": 0.2382, + "epoch": 0.76, + "grad_norm": 0.334215290189977, + "learning_rate": 2.79297241847548e-06, + "loss": 0.246, "step": 16620 }, { - "epoch": 0.95, - "grad_norm": 1.2859265206571604, - "learning_rate": 1.062389080373405e-07, - "loss": 0.8102, + "epoch": 0.76, + "grad_norm": 1.3968604133136437, + "learning_rate": 2.791941006869626e-06, + "loss": 0.8431, "step": 16621 }, { - "epoch": 0.96, - "grad_norm": 0.26955193137320504, - "learning_rate": 1.0596854262629352e-07, - "loss": 0.1498, + "epoch": 0.76, + "grad_norm": 0.46752884394421024, + "learning_rate": 2.790909754843251e-06, + "loss": 0.2392, "step": 16622 }, { - "epoch": 0.96, - "grad_norm": 0.24722787460287485, - "learning_rate": 1.0569851984513102e-07, - "loss": 0.2281, + "epoch": 0.76, + "grad_norm": 0.4654964680462339, + "learning_rate": 2.7898786624191878e-06, + "loss": 0.2599, "step": 16623 }, { - "epoch": 0.96, - "grad_norm": 1.3302779187313543, - "learning_rate": 1.0542883970320328e-07, - "loss": 0.5446, + "epoch": 0.76, + "grad_norm": 0.2608099775844682, + "learning_rate": 2.78884772962026e-06, + "loss": 0.2181, "step": 16624 }, { - "epoch": 0.96, - "grad_norm": 0.5332660462662445, - "learning_rate": 1.051595022098506e-07, - "loss": 0.3458, + "epoch": 0.76, + "grad_norm": 0.3219723620114302, + "learning_rate": 2.7878169564692926e-06, + "loss": 0.2487, "step": 16625 }, { - "epoch": 0.96, - "grad_norm": 0.30330182429363667, - "learning_rate": 1.0489050737439777e-07, - "loss": 0.2059, + "epoch": 0.76, + "grad_norm": 1.734205897890928, + "learning_rate": 2.786786342989106e-06, + "loss": 0.2772, "step": 16626 }, { - "epoch": 0.96, - "grad_norm": 0.36778833648956744, - "learning_rate": 1.0462185520616064e-07, - "loss": 0.315, + "epoch": 0.76, + "grad_norm": 0.9248608356272644, + "learning_rate": 2.7857558892025227e-06, + "loss": 0.5727, "step": 16627 }, { - "epoch": 0.96, - "grad_norm": 0.23797657274293577, - "learning_rate": 1.0435354571444401e-07, - "loss": 0.1594, + "epoch": 0.76, + "grad_norm": 0.37553281069516226, + "learning_rate": 2.784725595132346e-06, + "loss": 0.2349, "step": 16628 }, { - "epoch": 0.96, - "grad_norm": 0.3561924831436635, - "learning_rate": 1.0408557890853821e-07, - "loss": 0.2595, + "epoch": 0.76, + "grad_norm": 0.35762631415032536, + "learning_rate": 2.783695460801391e-06, + "loss": 0.2438, "step": 16629 }, { - "epoch": 0.96, - "grad_norm": 0.4752465406071092, - "learning_rate": 1.038179547977236e-07, - "loss": 0.2584, + "epoch": 0.76, + "grad_norm": 0.3758488152278506, + "learning_rate": 2.7826654862324665e-06, + "loss": 0.1943, "step": 16630 }, { - "epoch": 0.96, - "grad_norm": 0.39518269810907725, - "learning_rate": 1.0355067339126723e-07, - "loss": 0.2972, + "epoch": 0.76, + "grad_norm": 0.7231118314822784, + "learning_rate": 2.7816356714483685e-06, + "loss": 0.305, "step": 16631 }, { - "epoch": 0.96, - "grad_norm": 0.36175612579907546, - "learning_rate": 1.0328373469842502e-07, - "loss": 0.2588, + "epoch": 0.76, + "grad_norm": 0.31842760778053686, + "learning_rate": 2.7806060164719027e-06, + "loss": 0.2604, "step": 16632 }, { - "epoch": 0.96, - "grad_norm": 1.571137981694321, - "learning_rate": 1.0301713872844288e-07, - "loss": 0.414, + "epoch": 0.76, + "grad_norm": 0.5101585576180016, + "learning_rate": 2.7795765213258585e-06, + "loss": 0.2823, "step": 16633 }, { - "epoch": 0.96, - "grad_norm": 0.3156274111668739, - "learning_rate": 1.0275088549055123e-07, - "loss": 0.226, + "epoch": 0.76, + "grad_norm": 0.5009212629755045, + "learning_rate": 2.7785471860330313e-06, + "loss": 0.2698, "step": 16634 }, { - "epoch": 0.96, - "grad_norm": 0.2669295187752056, - "learning_rate": 1.0248497499396936e-07, - "loss": 0.2455, + "epoch": 0.76, + "grad_norm": 0.48819682283973637, + "learning_rate": 2.7775180106162126e-06, + "loss": 0.3425, "step": 16635 }, { - "epoch": 0.96, - "grad_norm": 0.4475781213109659, - "learning_rate": 1.022194072479088e-07, - "loss": 0.1764, + "epoch": 0.76, + "grad_norm": 0.2514496933622202, + "learning_rate": 2.776488995098181e-06, + "loss": 0.1901, "step": 16636 }, { - "epoch": 0.96, - "grad_norm": 0.7397982641900223, - "learning_rate": 1.019541822615644e-07, - "loss": 0.4622, + "epoch": 0.76, + "grad_norm": 0.3129090758384421, + "learning_rate": 2.7754601395017233e-06, + "loss": 0.1944, "step": 16637 }, { - "epoch": 0.96, - "grad_norm": 0.32650303175664785, - "learning_rate": 1.0168930004412103e-07, - "loss": 0.2467, + "epoch": 0.76, + "grad_norm": 0.5758389032633211, + "learning_rate": 2.774431443849609e-06, + "loss": 0.3476, "step": 16638 }, { - "epoch": 0.96, - "grad_norm": 0.3796386468131196, - "learning_rate": 1.0142476060475137e-07, - "loss": 0.2462, + "epoch": 0.76, + "grad_norm": 0.8367311955566802, + "learning_rate": 2.773402908164625e-06, + "loss": 0.3065, "step": 16639 }, { - "epoch": 0.96, - "grad_norm": 0.8287190883309027, - "learning_rate": 1.0116056395261586e-07, - "loss": 0.327, + "epoch": 0.76, + "grad_norm": 0.3082929500626861, + "learning_rate": 2.772374532469535e-06, + "loss": 0.2377, "step": 16640 }, { - "epoch": 0.96, - "grad_norm": 0.20625582020929892, - "learning_rate": 1.0089671009686497e-07, - "loss": 0.2011, + "epoch": 0.76, + "grad_norm": 0.3662318333441316, + "learning_rate": 2.7713463167871036e-06, + "loss": 0.2848, "step": 16641 }, { - "epoch": 0.96, - "grad_norm": 0.5948962808042882, - "learning_rate": 1.0063319904663471e-07, - "loss": 0.4045, + "epoch": 0.76, + "grad_norm": 0.2296830271231695, + "learning_rate": 2.770318261140098e-06, + "loss": 0.0825, "step": 16642 }, { - "epoch": 0.96, - "grad_norm": 0.29778674786226755, - "learning_rate": 1.0037003081105223e-07, - "loss": 0.2352, + "epoch": 0.76, + "grad_norm": 0.4095104529086831, + "learning_rate": 2.7692903655512783e-06, + "loss": 0.2569, "step": 16643 }, { - "epoch": 0.96, - "grad_norm": 0.33424304060632115, - "learning_rate": 1.0010720539922914e-07, - "loss": 0.2542, + "epoch": 0.76, + "grad_norm": 0.5750583955930172, + "learning_rate": 2.7682626300434037e-06, + "loss": 0.3812, "step": 16644 }, { - "epoch": 0.96, - "grad_norm": 1.1934592174173186, - "learning_rate": 9.984472282026814e-08, - "loss": 0.702, + "epoch": 0.76, + "grad_norm": 0.3880356314569871, + "learning_rate": 2.7672350546392213e-06, + "loss": 0.3098, "step": 16645 }, { - "epoch": 0.96, - "grad_norm": 0.3274171866966553, - "learning_rate": 9.958258308325975e-08, - "loss": 0.2261, + "epoch": 0.76, + "grad_norm": 0.35144556799420035, + "learning_rate": 2.7662076393614846e-06, + "loss": 0.2156, "step": 16646 }, { - "epoch": 0.96, - "grad_norm": 0.34650531219482306, - "learning_rate": 9.932078619727892e-08, - "loss": 0.2777, + "epoch": 0.76, + "grad_norm": 1.4221263041241423, + "learning_rate": 2.7651803842329406e-06, + "loss": 0.605, "step": 16647 }, { - "epoch": 0.96, - "grad_norm": 0.3254717650580907, - "learning_rate": 9.905933217139397e-08, - "loss": 0.2203, + "epoch": 0.76, + "grad_norm": 0.2959753898217581, + "learning_rate": 2.7641532892763267e-06, + "loss": 0.2371, "step": 16648 }, { - "epoch": 0.96, - "grad_norm": 0.7048051219031087, - "learning_rate": 9.879822101465874e-08, - "loss": 0.2505, + "epoch": 0.76, + "grad_norm": 0.3105532073877698, + "learning_rate": 2.7631263545143895e-06, + "loss": 0.0878, "step": 16649 }, { - "epoch": 0.96, - "grad_norm": 0.3501335317692874, - "learning_rate": 9.853745273611604e-08, - "loss": 0.275, + "epoch": 0.76, + "grad_norm": 0.4237932618064866, + "learning_rate": 2.7620995799698557e-06, + "loss": 0.3003, "step": 16650 }, { - "epoch": 0.96, - "grad_norm": 0.34539266905883126, - "learning_rate": 9.827702734479528e-08, - "loss": 0.3104, + "epoch": 0.76, + "grad_norm": 0.6064180142677038, + "learning_rate": 2.761072965665461e-06, + "loss": 0.376, "step": 16651 }, { - "epoch": 0.96, - "grad_norm": 2.0335167832865215, - "learning_rate": 9.801694484971369e-08, - "loss": 0.2343, + "epoch": 0.76, + "grad_norm": 0.26896087832692145, + "learning_rate": 2.7600465116239373e-06, + "loss": 0.1809, "step": 16652 }, { - "epoch": 0.96, - "grad_norm": 0.30600946235260734, - "learning_rate": 9.775720525988076e-08, - "loss": 0.2483, + "epoch": 0.77, + "grad_norm": 0.4731513670379963, + "learning_rate": 2.7590202178680035e-06, + "loss": 0.2827, "step": 16653 }, { - "epoch": 0.96, - "grad_norm": 0.3128501072923363, - "learning_rate": 9.749780858429036e-08, - "loss": 0.2964, + "epoch": 0.77, + "grad_norm": 0.48030946361538474, + "learning_rate": 2.7579940844203857e-06, + "loss": 0.2337, "step": 16654 }, { - "epoch": 0.96, - "grad_norm": 0.2661670102671037, - "learning_rate": 9.723875483192536e-08, - "loss": 0.1817, + "epoch": 0.77, + "grad_norm": 0.2533427280532958, + "learning_rate": 2.7569681113037914e-06, + "loss": 0.1315, "step": 16655 }, { - "epoch": 0.96, - "grad_norm": 0.2967207607658545, - "learning_rate": 9.69800440117552e-08, - "loss": 0.2061, + "epoch": 0.77, + "grad_norm": 0.31692548136498766, + "learning_rate": 2.75594229854095e-06, + "loss": 0.2913, "step": 16656 }, { - "epoch": 0.96, - "grad_norm": 1.3288708256421893, - "learning_rate": 9.672167613274053e-08, - "loss": 0.7043, + "epoch": 0.77, + "grad_norm": 0.6914481942381092, + "learning_rate": 2.7549166461545608e-06, + "loss": 0.3203, "step": 16657 }, { - "epoch": 0.96, - "grad_norm": 0.3621252628942739, - "learning_rate": 9.64636512038286e-08, - "loss": 0.2824, + "epoch": 0.77, + "grad_norm": 0.5499335805219313, + "learning_rate": 2.7538911541673376e-06, + "loss": 0.3347, "step": 16658 }, { - "epoch": 0.96, - "grad_norm": 0.2996790863119093, - "learning_rate": 9.620596923395454e-08, - "loss": 0.2111, + "epoch": 0.77, + "grad_norm": 0.4419902387599507, + "learning_rate": 2.752865822601977e-06, + "loss": 0.221, "step": 16659 }, { - "epoch": 0.96, - "grad_norm": 0.7797861094274411, - "learning_rate": 9.594863023204226e-08, - "loss": 0.3865, + "epoch": 0.77, + "grad_norm": 0.41338232842479383, + "learning_rate": 2.7518406514811815e-06, + "loss": 0.2956, "step": 16660 }, { - "epoch": 0.96, - "grad_norm": 0.24464123847918023, - "learning_rate": 9.569163420700245e-08, - "loss": 0.183, + "epoch": 0.77, + "grad_norm": 0.2844506904536113, + "learning_rate": 2.750815640827652e-06, + "loss": 0.1498, "step": 16661 }, { - "epoch": 0.96, - "grad_norm": 0.3100921626514089, - "learning_rate": 9.543498116773576e-08, - "loss": 0.1861, + "epoch": 0.77, + "grad_norm": 0.40864717677829043, + "learning_rate": 2.749790790664074e-06, + "loss": 0.2411, "step": 16662 }, { - "epoch": 0.96, - "grad_norm": 0.3610536933026674, - "learning_rate": 9.51786711231295e-08, - "loss": 0.2764, + "epoch": 0.77, + "grad_norm": 0.6833746519196006, + "learning_rate": 2.748766101013143e-06, + "loss": 0.4332, "step": 16663 }, { - "epoch": 0.96, - "grad_norm": 0.6986584506879514, - "learning_rate": 9.492270408206106e-08, - "loss": 0.3745, + "epoch": 0.77, + "grad_norm": 0.30424384838326585, + "learning_rate": 2.7477415718975387e-06, + "loss": 0.274, "step": 16664 }, { - "epoch": 0.96, - "grad_norm": 0.2895372923549709, - "learning_rate": 9.46670800533922e-08, - "loss": 0.1944, + "epoch": 0.77, + "grad_norm": 0.4688709404107074, + "learning_rate": 2.746717203339946e-06, + "loss": 0.2287, "step": 16665 }, { - "epoch": 0.96, - "grad_norm": 0.3473184880519653, - "learning_rate": 9.441179904597697e-08, - "loss": 0.3016, + "epoch": 0.77, + "grad_norm": 0.3984056091544771, + "learning_rate": 2.745692995363047e-06, + "loss": 0.2095, "step": 16666 }, { - "epoch": 0.96, - "grad_norm": 0.38263612931781815, - "learning_rate": 9.415686106865496e-08, - "loss": 0.2469, + "epoch": 0.77, + "grad_norm": 0.3516878251970094, + "learning_rate": 2.7446689479895105e-06, + "loss": 0.1843, "step": 16667 }, { - "epoch": 0.96, - "grad_norm": 0.4296040570362124, - "learning_rate": 9.390226613025466e-08, - "loss": 0.248, + "epoch": 0.77, + "grad_norm": 0.25471891974360444, + "learning_rate": 2.7436450612420098e-06, + "loss": 0.2263, "step": 16668 }, { - "epoch": 0.96, - "grad_norm": 0.31216699027746436, - "learning_rate": 9.364801423959235e-08, - "loss": 0.1625, + "epoch": 0.77, + "grad_norm": 0.6999031771674309, + "learning_rate": 2.7426213351432174e-06, + "loss": 0.4218, "step": 16669 }, { - "epoch": 0.96, - "grad_norm": 0.33552034711426854, - "learning_rate": 9.339410540547433e-08, - "loss": 0.2803, + "epoch": 0.77, + "grad_norm": 0.9338050303878667, + "learning_rate": 2.7415977697157903e-06, + "loss": 0.4605, "step": 16670 }, { - "epoch": 0.96, - "grad_norm": 0.3310185221024679, - "learning_rate": 9.314053963669245e-08, - "loss": 0.254, + "epoch": 0.77, + "grad_norm": 0.4134699646373108, + "learning_rate": 2.7405743649823967e-06, + "loss": 0.2669, "step": 16671 }, { - "epoch": 0.96, - "grad_norm": 0.7250290339843236, - "learning_rate": 9.288731694202747e-08, - "loss": 0.2567, + "epoch": 0.77, + "grad_norm": 0.4063789576578708, + "learning_rate": 2.7395511209656833e-06, + "loss": 0.252, "step": 16672 }, { - "epoch": 0.96, - "grad_norm": 1.2242252500615893, - "learning_rate": 9.263443733024791e-08, - "loss": 0.7467, + "epoch": 0.77, + "grad_norm": 0.39139710941021477, + "learning_rate": 2.738528037688316e-06, + "loss": 0.2196, "step": 16673 }, { - "epoch": 0.96, - "grad_norm": 0.32309648425980414, - "learning_rate": 9.238190081011345e-08, - "loss": 0.2512, + "epoch": 0.77, + "grad_norm": 0.3749867210685117, + "learning_rate": 2.737505115172937e-06, + "loss": 0.2817, "step": 16674 }, { - "epoch": 0.96, - "grad_norm": 0.19987086897763234, - "learning_rate": 9.212970739036709e-08, - "loss": 0.1648, + "epoch": 0.77, + "grad_norm": 0.854438367201808, + "learning_rate": 2.736482353442198e-06, + "loss": 0.3603, "step": 16675 }, { - "epoch": 0.96, - "grad_norm": 0.5338994500229073, - "learning_rate": 9.187785707974183e-08, - "loss": 0.3529, + "epoch": 0.77, + "grad_norm": 0.30159227535861494, + "learning_rate": 2.7354597525187365e-06, + "loss": 0.2649, "step": 16676 }, { - "epoch": 0.96, - "grad_norm": 0.32266698545901074, - "learning_rate": 9.162634988696184e-08, - "loss": 0.2438, + "epoch": 0.77, + "grad_norm": 0.33685320228283155, + "learning_rate": 2.7344373124251934e-06, + "loss": 0.2341, "step": 16677 }, { - "epoch": 0.96, - "grad_norm": 0.5167703108082058, - "learning_rate": 9.137518582073345e-08, - "loss": 0.3471, + "epoch": 0.77, + "grad_norm": 1.570008510523658, + "learning_rate": 2.733415033184209e-06, + "loss": 0.237, "step": 16678 }, { - "epoch": 0.96, - "grad_norm": 0.4186625502370127, - "learning_rate": 9.112436488975751e-08, - "loss": 0.2912, + "epoch": 0.77, + "grad_norm": 0.5498174152184994, + "learning_rate": 2.7323929148184094e-06, + "loss": 0.2974, "step": 16679 }, { - "epoch": 0.96, - "grad_norm": 0.3821134635943973, - "learning_rate": 9.087388710271927e-08, - "loss": 0.2757, + "epoch": 0.77, + "grad_norm": 0.3442415737565152, + "learning_rate": 2.7313709573504288e-06, + "loss": 0.2671, "step": 16680 }, { - "epoch": 0.96, - "grad_norm": 0.25700002896802115, - "learning_rate": 9.06237524682918e-08, - "loss": 0.1689, + "epoch": 0.77, + "grad_norm": 0.3065745704001493, + "learning_rate": 2.7303491608028864e-06, + "loss": 0.2003, "step": 16681 }, { - "epoch": 0.96, - "grad_norm": 0.3561604016730922, - "learning_rate": 9.037396099513707e-08, - "loss": 0.2703, + "epoch": 0.77, + "grad_norm": 0.32807840181264986, + "learning_rate": 2.7293275251984074e-06, + "loss": 0.181, "step": 16682 }, { - "epoch": 0.96, - "grad_norm": 0.39261394030257535, - "learning_rate": 9.012451269190592e-08, - "loss": 0.2591, + "epoch": 0.77, + "grad_norm": 0.45344050212510834, + "learning_rate": 2.7283060505596126e-06, + "loss": 0.3201, "step": 16683 }, { - "epoch": 0.96, - "grad_norm": 0.6363852734792065, - "learning_rate": 8.987540756723811e-08, - "loss": 0.3443, + "epoch": 0.77, + "grad_norm": 0.40792247051837116, + "learning_rate": 2.7272847369091093e-06, + "loss": 0.318, "step": 16684 }, { - "epoch": 0.96, - "grad_norm": 1.3103970018735047, - "learning_rate": 8.962664562975676e-08, - "loss": 0.3247, + "epoch": 0.77, + "grad_norm": 0.6731072566515036, + "learning_rate": 2.726263584269513e-06, + "loss": 0.2704, "step": 16685 }, { - "epoch": 0.96, - "grad_norm": 0.3028875377252202, - "learning_rate": 8.93782268880794e-08, - "loss": 0.2357, + "epoch": 0.77, + "grad_norm": 0.3259355884528992, + "learning_rate": 2.725242592663434e-06, + "loss": 0.2422, "step": 16686 }, { - "epoch": 0.96, - "grad_norm": 0.2752372436372155, - "learning_rate": 8.913015135080805e-08, - "loss": 0.2525, + "epoch": 0.77, + "grad_norm": 0.4702929102769375, + "learning_rate": 2.724221762113468e-06, + "loss": 0.3512, "step": 16687 }, { - "epoch": 0.96, - "grad_norm": 0.36823697113929454, - "learning_rate": 8.888241902653361e-08, - "loss": 0.1942, + "epoch": 0.77, + "grad_norm": 0.2675273269647348, + "learning_rate": 2.7232010926422235e-06, + "loss": 0.1697, "step": 16688 }, { - "epoch": 0.96, - "grad_norm": 0.4023075320401017, - "learning_rate": 8.863502992383477e-08, - "loss": 0.2551, + "epoch": 0.77, + "grad_norm": 0.2916902860540976, + "learning_rate": 2.7221805842722883e-06, + "loss": 0.1984, "step": 16689 }, { - "epoch": 0.96, - "grad_norm": 0.2965441139243796, - "learning_rate": 8.838798405127802e-08, - "loss": 0.2671, + "epoch": 0.77, + "grad_norm": 1.0363499183287534, + "learning_rate": 2.7211602370262656e-06, + "loss": 0.4611, "step": 16690 }, { - "epoch": 0.96, - "grad_norm": 1.325159274198456, - "learning_rate": 8.814128141741984e-08, - "loss": 0.6663, + "epoch": 0.77, + "grad_norm": 0.42280121707017404, + "learning_rate": 2.7201400509267373e-06, + "loss": 0.2531, "step": 16691 }, { - "epoch": 0.96, - "grad_norm": 0.2863213468271409, - "learning_rate": 8.78949220308023e-08, - "loss": 0.2085, + "epoch": 0.77, + "grad_norm": 0.3186983401970785, + "learning_rate": 2.7191200259962938e-06, + "loss": 0.2721, "step": 16692 }, { - "epoch": 0.96, - "grad_norm": 0.7635493682524934, - "learning_rate": 8.764890589995745e-08, - "loss": 0.4148, + "epoch": 0.77, + "grad_norm": 1.3277241274196392, + "learning_rate": 2.718100162257513e-06, + "loss": 0.6723, "step": 16693 }, { - "epoch": 0.96, - "grad_norm": 0.2738614036932938, - "learning_rate": 8.740323303340514e-08, - "loss": 0.2462, + "epoch": 0.77, + "grad_norm": 0.3168171624385611, + "learning_rate": 2.717080459732977e-06, + "loss": 0.1914, "step": 16694 }, { - "epoch": 0.96, - "grad_norm": 0.26102284400214015, - "learning_rate": 8.715790343965192e-08, - "loss": 0.1567, + "epoch": 0.77, + "grad_norm": 0.382763041741232, + "learning_rate": 2.7160609184452624e-06, + "loss": 0.2013, "step": 16695 }, { - "epoch": 0.96, - "grad_norm": 1.418513087327854, - "learning_rate": 8.691291712719541e-08, - "loss": 0.4353, + "epoch": 0.77, + "grad_norm": 0.35890682633987075, + "learning_rate": 2.715041538416936e-06, + "loss": 0.3125, "step": 16696 }, { - "epoch": 0.96, - "grad_norm": 1.1335322604001175, - "learning_rate": 8.666827410451772e-08, - "loss": 0.7633, + "epoch": 0.77, + "grad_norm": 0.5623659516809043, + "learning_rate": 2.7140223196705718e-06, + "loss": 0.3373, "step": 16697 }, { - "epoch": 0.96, - "grad_norm": 0.24218728721153193, - "learning_rate": 8.642397438008987e-08, - "loss": 0.2166, + "epoch": 0.77, + "grad_norm": 0.33339247750900636, + "learning_rate": 2.713003262228727e-06, + "loss": 0.1911, "step": 16698 }, { - "epoch": 0.96, - "grad_norm": 0.44698841899876174, - "learning_rate": 8.618001796237507e-08, - "loss": 0.3161, + "epoch": 0.77, + "grad_norm": 1.0844939041607131, + "learning_rate": 2.7119843661139677e-06, + "loss": 0.6947, "step": 16699 }, { - "epoch": 0.96, - "grad_norm": 0.8319313930088971, - "learning_rate": 8.593640485981991e-08, - "loss": 0.194, + "epoch": 0.77, + "grad_norm": 0.2811720132306387, + "learning_rate": 2.710965631348853e-06, + "loss": 0.229, "step": 16700 }, { - "epoch": 0.96, - "grad_norm": 0.3124725689478206, - "learning_rate": 8.569313508086097e-08, - "loss": 0.0728, + "epoch": 0.77, + "grad_norm": 0.23936624313079055, + "learning_rate": 2.7099470579559317e-06, + "loss": 0.167, "step": 16701 }, { - "epoch": 0.96, - "grad_norm": 0.2663921373594587, - "learning_rate": 8.545020863392262e-08, - "loss": 0.2698, + "epoch": 0.77, + "grad_norm": 0.6169389169633458, + "learning_rate": 2.708928645957756e-06, + "loss": 0.3691, "step": 16702 }, { - "epoch": 0.96, - "grad_norm": 1.0964726846573836, - "learning_rate": 8.520762552741591e-08, - "loss": 0.5473, + "epoch": 0.77, + "grad_norm": 0.361309840440058, + "learning_rate": 2.7079103953768725e-06, + "loss": 0.3012, "step": 16703 }, { - "epoch": 0.96, - "grad_norm": 0.5929084586700467, - "learning_rate": 8.496538576974412e-08, - "loss": 0.3214, + "epoch": 0.77, + "grad_norm": 0.3218830754681622, + "learning_rate": 2.7068923062358276e-06, + "loss": 0.1881, "step": 16704 }, { - "epoch": 0.96, - "grad_norm": 0.3420457220417289, - "learning_rate": 8.472348936929387e-08, - "loss": 0.2463, + "epoch": 0.77, + "grad_norm": 1.3702658836215624, + "learning_rate": 2.7058743785571573e-06, + "loss": 0.8038, "step": 16705 }, { - "epoch": 0.96, - "grad_norm": 0.33912810289817497, - "learning_rate": 8.448193633444291e-08, - "loss": 0.3064, + "epoch": 0.77, + "grad_norm": 0.49255027363258974, + "learning_rate": 2.7048566123633935e-06, + "loss": 0.2612, "step": 16706 }, { - "epoch": 0.96, - "grad_norm": 0.2234517371912271, - "learning_rate": 8.424072667355565e-08, - "loss": 0.1544, + "epoch": 0.77, + "grad_norm": 0.2665275606448107, + "learning_rate": 2.7038390076770783e-06, + "loss": 0.2084, "step": 16707 }, { - "epoch": 0.96, - "grad_norm": 0.44621753014878657, - "learning_rate": 8.399986039498653e-08, - "loss": 0.2155, + "epoch": 0.77, + "grad_norm": 0.37230596275630823, + "learning_rate": 2.702821564520732e-06, + "loss": 0.2515, "step": 16708 }, { - "epoch": 0.96, - "grad_norm": 0.8496786623009368, - "learning_rate": 8.375933750707554e-08, - "loss": 0.4511, + "epoch": 0.77, + "grad_norm": 0.9665570766730617, + "learning_rate": 2.7018042829168867e-06, + "loss": 0.4389, "step": 16709 }, { - "epoch": 0.96, - "grad_norm": 0.2951581272073628, - "learning_rate": 8.35191580181527e-08, - "loss": 0.2546, + "epoch": 0.77, + "grad_norm": 0.33355686024761394, + "learning_rate": 2.7007871628880567e-06, + "loss": 0.2382, "step": 16710 }, { - "epoch": 0.96, - "grad_norm": 0.3656795630471628, - "learning_rate": 8.327932193653355e-08, - "loss": 0.2506, + "epoch": 0.77, + "grad_norm": 1.2250471208907658, + "learning_rate": 2.6997702044567654e-06, + "loss": 0.5396, "step": 16711 }, { - "epoch": 0.96, - "grad_norm": 0.5578558576279988, - "learning_rate": 8.30398292705259e-08, - "loss": 0.2439, + "epoch": 0.77, + "grad_norm": 0.31793069580916333, + "learning_rate": 2.6987534076455267e-06, + "loss": 0.242, "step": 16712 }, { - "epoch": 0.96, - "grad_norm": 0.27556519383523753, - "learning_rate": 8.280068002842312e-08, - "loss": 0.2176, + "epoch": 0.77, + "grad_norm": 0.34769197644072003, + "learning_rate": 2.6977367724768486e-06, + "loss": 0.2647, "step": 16713 }, { - "epoch": 0.96, - "grad_norm": 0.2691015371715588, - "learning_rate": 8.256187421850636e-08, - "loss": 0.2203, + "epoch": 0.77, + "grad_norm": 0.4580150002971441, + "learning_rate": 2.6967202989732443e-06, + "loss": 0.1866, "step": 16714 }, { - "epoch": 0.96, - "grad_norm": 0.6831366435714303, - "learning_rate": 8.232341184904458e-08, - "loss": 0.3823, + "epoch": 0.77, + "grad_norm": 0.36281873438293033, + "learning_rate": 2.695703987157209e-06, + "loss": 0.2809, "step": 16715 }, { - "epoch": 0.96, - "grad_norm": 0.39566843622236897, - "learning_rate": 8.20852929282967e-08, - "loss": 0.2655, + "epoch": 0.77, + "grad_norm": 0.4003142238087575, + "learning_rate": 2.6946878370512485e-06, + "loss": 0.267, "step": 16716 }, { - "epoch": 0.96, - "grad_norm": 0.5305390025366589, - "learning_rate": 8.184751746450947e-08, - "loss": 0.3602, + "epoch": 0.77, + "grad_norm": 0.5918643598493024, + "learning_rate": 2.693671848677861e-06, + "loss": 0.2926, "step": 16717 }, { - "epoch": 0.96, - "grad_norm": 0.3804937384566211, - "learning_rate": 8.16100854659152e-08, - "loss": 0.243, + "epoch": 0.77, + "grad_norm": 0.8692842552197624, + "learning_rate": 2.6926560220595333e-06, + "loss": 0.3715, "step": 16718 }, { - "epoch": 0.96, - "grad_norm": 0.24366733203716592, - "learning_rate": 8.13729969407373e-08, - "loss": 0.1461, + "epoch": 0.77, + "grad_norm": 0.4122053278070936, + "learning_rate": 2.691640357218759e-06, + "loss": 0.2936, "step": 16719 }, { - "epoch": 0.96, - "grad_norm": 0.3626781267030705, - "learning_rate": 8.113625189718588e-08, - "loss": 0.2923, + "epoch": 0.77, + "grad_norm": 0.38337913227734677, + "learning_rate": 2.6906248541780233e-06, + "loss": 0.2998, "step": 16720 }, { - "epoch": 0.96, - "grad_norm": 0.3382050876755194, - "learning_rate": 8.089985034346104e-08, - "loss": 0.2521, + "epoch": 0.77, + "grad_norm": 0.22519551808218163, + "learning_rate": 2.689609512959811e-06, + "loss": 0.0687, "step": 16721 }, { - "epoch": 0.96, - "grad_norm": 0.6097586807838828, - "learning_rate": 8.066379228774624e-08, - "loss": 0.3351, + "epoch": 0.77, + "grad_norm": 0.3362320913505971, + "learning_rate": 2.6885943335865962e-06, + "loss": 0.2512, "step": 16722 }, { - "epoch": 0.96, - "grad_norm": 0.34159479554399735, - "learning_rate": 8.042807773821826e-08, - "loss": 0.285, + "epoch": 0.77, + "grad_norm": 0.5268567130523985, + "learning_rate": 2.6875793160808584e-06, + "loss": 0.3913, "step": 16723 }, { - "epoch": 0.96, - "grad_norm": 1.4091610780001418, - "learning_rate": 8.019270670303946e-08, - "loss": 0.2663, + "epoch": 0.77, + "grad_norm": 0.474324344012607, + "learning_rate": 2.686564460465063e-06, + "loss": 0.2438, "step": 16724 }, { - "epoch": 0.96, - "grad_norm": 0.2633520677569486, - "learning_rate": 7.995767919036002e-08, - "loss": 0.1632, + "epoch": 0.77, + "grad_norm": 0.35752894287736386, + "learning_rate": 2.6855497667616824e-06, + "loss": 0.2663, "step": 16725 }, { - "epoch": 0.96, - "grad_norm": 0.25363763611530493, - "learning_rate": 7.972299520832005e-08, - "loss": 0.2677, + "epoch": 0.77, + "grad_norm": 0.32089442098345655, + "learning_rate": 2.684535234993183e-06, + "loss": 0.1985, "step": 16726 }, { - "epoch": 0.96, - "grad_norm": 1.0371752079589232, - "learning_rate": 7.948865476504641e-08, - "loss": 0.2723, + "epoch": 0.77, + "grad_norm": 0.38417528649390204, + "learning_rate": 2.6835208651820198e-06, + "loss": 0.2419, "step": 16727 }, { - "epoch": 0.96, - "grad_norm": 0.5501126198181006, - "learning_rate": 7.925465786865372e-08, - "loss": 0.3554, + "epoch": 0.77, + "grad_norm": 0.35323582924758223, + "learning_rate": 2.6825066573506543e-06, + "loss": 0.26, "step": 16728 }, { - "epoch": 0.96, - "grad_norm": 0.3783143100194757, - "learning_rate": 7.902100452724548e-08, - "loss": 0.3148, + "epoch": 0.77, + "grad_norm": 1.348758012229431, + "learning_rate": 2.68149261152154e-06, + "loss": 0.7943, "step": 16729 }, { - "epoch": 0.96, - "grad_norm": 0.35404309098921694, - "learning_rate": 7.878769474891413e-08, - "loss": 0.2972, + "epoch": 0.77, + "grad_norm": 0.6751072821756354, + "learning_rate": 2.680478727717123e-06, + "loss": 0.2947, "step": 16730 }, { - "epoch": 0.96, - "grad_norm": 0.19637934444938035, - "learning_rate": 7.855472854173763e-08, - "loss": 0.0779, + "epoch": 0.77, + "grad_norm": 0.29463980600617223, + "learning_rate": 2.679465005959856e-06, + "loss": 0.2437, "step": 16731 }, { - "epoch": 0.96, - "grad_norm": 0.3869329443831731, - "learning_rate": 7.832210591378398e-08, - "loss": 0.2814, + "epoch": 0.77, + "grad_norm": 0.5413395913268767, + "learning_rate": 2.6784514462721713e-06, + "loss": 0.3311, "step": 16732 }, { - "epoch": 0.96, - "grad_norm": 0.42170939268755797, - "learning_rate": 7.808982687311006e-08, - "loss": 0.3055, + "epoch": 0.77, + "grad_norm": 0.33764951866060244, + "learning_rate": 2.6774380486765205e-06, + "loss": 0.1694, "step": 16733 }, { - "epoch": 0.96, - "grad_norm": 0.2777667725765214, - "learning_rate": 7.785789142775834e-08, - "loss": 0.2124, + "epoch": 0.77, + "grad_norm": 0.40874906671519173, + "learning_rate": 2.676424813195335e-06, + "loss": 0.1772, "step": 16734 }, { - "epoch": 0.96, - "grad_norm": 0.5345760328975254, - "learning_rate": 7.762629958576129e-08, - "loss": 0.3851, + "epoch": 0.77, + "grad_norm": 0.41951171391912384, + "learning_rate": 2.6754117398510417e-06, + "loss": 0.3384, "step": 16735 }, { - "epoch": 0.96, - "grad_norm": 1.316711270975337, - "learning_rate": 7.739505135513803e-08, - "loss": 0.541, + "epoch": 0.77, + "grad_norm": 0.5044259299138488, + "learning_rate": 2.674398828666074e-06, + "loss": 0.3566, "step": 16736 }, { - "epoch": 0.96, - "grad_norm": 0.2516530707494128, - "learning_rate": 7.716414674389771e-08, - "loss": 0.148, + "epoch": 0.77, + "grad_norm": 0.42734949853882725, + "learning_rate": 2.6733860796628542e-06, + "loss": 0.1977, "step": 16737 }, { - "epoch": 0.96, - "grad_norm": 0.23169324602476324, - "learning_rate": 7.693358576003617e-08, - "loss": 0.214, + "epoch": 0.77, + "grad_norm": 0.7110794778170559, + "learning_rate": 2.672373492863809e-06, + "loss": 0.3281, "step": 16738 }, { - "epoch": 0.96, - "grad_norm": 0.5344331125889435, - "learning_rate": 7.670336841153925e-08, - "loss": 0.3644, + "epoch": 0.77, + "grad_norm": 0.2542412579935446, + "learning_rate": 2.6713610682913494e-06, + "loss": 0.2153, "step": 16739 }, { - "epoch": 0.96, - "grad_norm": 0.4752035059563894, - "learning_rate": 7.64734947063761e-08, - "loss": 0.2514, + "epoch": 0.77, + "grad_norm": 0.292968086213938, + "learning_rate": 2.6703488059678952e-06, + "loss": 0.1757, "step": 16740 }, { - "epoch": 0.96, - "grad_norm": 0.36429165610906206, - "learning_rate": 7.624396465251038e-08, - "loss": 0.2732, + "epoch": 0.77, + "grad_norm": 0.947033983372503, + "learning_rate": 2.6693367059158515e-06, + "loss": 0.5632, "step": 16741 }, { - "epoch": 0.96, - "grad_norm": 0.3708596666694886, - "learning_rate": 7.601477825788905e-08, - "loss": 0.2794, + "epoch": 0.77, + "grad_norm": 0.7288273209144358, + "learning_rate": 2.6683247681576265e-06, + "loss": 0.3812, "step": 16742 }, { - "epoch": 0.96, - "grad_norm": 0.3845077104005944, - "learning_rate": 7.57859355304491e-08, - "loss": 0.1939, + "epoch": 0.77, + "grad_norm": 0.2851944491258738, + "learning_rate": 2.6673129927156285e-06, + "loss": 0.2111, "step": 16743 }, { - "epoch": 0.96, - "grad_norm": 0.3137435856663455, - "learning_rate": 7.55574364781153e-08, - "loss": 0.2363, + "epoch": 0.77, + "grad_norm": 0.5491525185607987, + "learning_rate": 2.6663013796122505e-06, + "loss": 0.3089, "step": 16744 }, { - "epoch": 0.96, - "grad_norm": 0.33829037017062646, - "learning_rate": 7.532928110880133e-08, - "loss": 0.2301, + "epoch": 0.77, + "grad_norm": 0.310065639023231, + "learning_rate": 2.665289928869892e-06, + "loss": 0.2087, "step": 16745 }, { - "epoch": 0.96, - "grad_norm": 0.40890395308043054, - "learning_rate": 7.510146943040641e-08, - "loss": 0.3042, + "epoch": 0.77, + "grad_norm": 0.40509558450595917, + "learning_rate": 2.6642786405109477e-06, + "loss": 0.2831, "step": 16746 }, { - "epoch": 0.96, - "grad_norm": 0.3051775333200117, - "learning_rate": 7.487400145082203e-08, - "loss": 0.2107, + "epoch": 0.77, + "grad_norm": 0.3949452787414176, + "learning_rate": 2.6632675145578e-06, + "loss": 0.2471, "step": 16747 }, { - "epoch": 0.96, - "grad_norm": 1.3662893373185263, - "learning_rate": 7.464687717792407e-08, - "loss": 0.4789, + "epoch": 0.77, + "grad_norm": 0.45650040969671, + "learning_rate": 2.6622565510328436e-06, + "loss": 0.3358, "step": 16748 }, { - "epoch": 0.96, - "grad_norm": 0.33082857403563304, - "learning_rate": 7.442009661957738e-08, - "loss": 0.2837, + "epoch": 0.77, + "grad_norm": 0.331421059659242, + "learning_rate": 2.6612457499584477e-06, + "loss": 0.2487, "step": 16749 }, { - "epoch": 0.96, - "grad_norm": 0.33747596972844696, - "learning_rate": 7.41936597836368e-08, - "loss": 0.2082, + "epoch": 0.77, + "grad_norm": 1.4135166675940847, + "learning_rate": 2.6602351113570036e-06, + "loss": 0.2702, "step": 16750 }, { - "epoch": 0.96, - "grad_norm": 0.3298350815450901, - "learning_rate": 7.396756667794158e-08, - "loss": 0.2119, + "epoch": 0.77, + "grad_norm": 0.2313632774435852, + "learning_rate": 2.6592246352508767e-06, + "loss": 0.2019, "step": 16751 }, { - "epoch": 0.96, - "grad_norm": 0.7593553931238317, - "learning_rate": 7.374181731032326e-08, - "loss": 0.5444, + "epoch": 0.77, + "grad_norm": 0.4468857926201969, + "learning_rate": 2.6582143216624445e-06, + "loss": 0.2935, "step": 16752 }, { - "epoch": 0.96, - "grad_norm": 0.3203570525125074, - "learning_rate": 7.35164116885989e-08, - "loss": 0.2598, + "epoch": 0.77, + "grad_norm": 0.4943364869972546, + "learning_rate": 2.6572041706140682e-06, + "loss": 0.2753, "step": 16753 }, { - "epoch": 0.96, - "grad_norm": 0.33154457148997163, - "learning_rate": 7.329134982057562e-08, - "loss": 0.2206, + "epoch": 0.77, + "grad_norm": 0.7067609635089999, + "learning_rate": 2.6561941821281145e-06, + "loss": 0.4327, "step": 16754 }, { - "epoch": 0.96, - "grad_norm": 0.7366957717865998, - "learning_rate": 7.306663171404494e-08, - "loss": 0.3779, + "epoch": 0.77, + "grad_norm": 0.3827729668520749, + "learning_rate": 2.6551843562269477e-06, + "loss": 0.2779, "step": 16755 }, { - "epoch": 0.96, - "grad_norm": 0.30293447315905003, - "learning_rate": 7.284225737678952e-08, - "loss": 0.2432, + "epoch": 0.77, + "grad_norm": 0.32694520773190744, + "learning_rate": 2.6541746929329158e-06, + "loss": 0.2354, "step": 16756 }, { - "epoch": 0.96, - "grad_norm": 0.25522354037367, - "learning_rate": 7.261822681657982e-08, - "loss": 0.1827, + "epoch": 0.77, + "grad_norm": 0.314747302287425, + "learning_rate": 2.65316519226838e-06, + "loss": 0.1601, "step": 16757 }, { - "epoch": 0.96, - "grad_norm": 1.388424240051691, - "learning_rate": 7.239454004117519e-08, - "loss": 0.8719, + "epoch": 0.77, + "grad_norm": 0.5691618930737959, + "learning_rate": 2.6521558542556815e-06, + "loss": 0.3506, "step": 16758 }, { - "epoch": 0.96, - "grad_norm": 0.31357229248114443, - "learning_rate": 7.217119705831943e-08, + "epoch": 0.77, + "grad_norm": 0.2716421589380168, + "learning_rate": 2.6511466789171715e-06, "loss": 0.2698, "step": 16759 }, { - "epoch": 0.96, - "grad_norm": 0.8776215112811757, - "learning_rate": 7.19481978757497e-08, - "loss": 0.2509, + "epoch": 0.77, + "grad_norm": 0.8871597053015742, + "learning_rate": 2.650137666275194e-06, + "loss": 0.3295, "step": 16760 }, { - "epoch": 0.96, - "grad_norm": 0.3567502082271883, - "learning_rate": 7.172554250118535e-08, - "loss": 0.3052, + "epoch": 0.77, + "grad_norm": 0.4506441184723755, + "learning_rate": 2.6491288163520825e-06, + "loss": 0.2877, "step": 16761 }, { - "epoch": 0.96, - "grad_norm": 0.3012814796438905, - "learning_rate": 7.150323094233912e-08, - "loss": 0.2416, + "epoch": 0.77, + "grad_norm": 0.5771436153690723, + "learning_rate": 2.648120129170173e-06, + "loss": 0.3266, "step": 16762 }, { - "epoch": 0.96, - "grad_norm": 0.44014096773481015, - "learning_rate": 7.128126320690931e-08, - "loss": 0.0926, + "epoch": 0.77, + "grad_norm": 0.2725425251040875, + "learning_rate": 2.647111604751801e-06, + "loss": 0.1954, "step": 16763 }, { - "epoch": 0.96, - "grad_norm": 0.7301742900613, - "learning_rate": 7.105963930258308e-08, - "loss": 0.4903, + "epoch": 0.77, + "grad_norm": 0.3089732205930462, + "learning_rate": 2.646103243119289e-06, + "loss": 0.2086, "step": 16764 }, { - "epoch": 0.96, - "grad_norm": 0.22739964947646552, - "learning_rate": 7.083835923703319e-08, - "loss": 0.2198, + "epoch": 0.77, + "grad_norm": 0.4605881138208105, + "learning_rate": 2.6450950442949654e-06, + "loss": 0.3316, "step": 16765 }, { - "epoch": 0.96, - "grad_norm": 0.4968594490313015, - "learning_rate": 7.061742301792462e-08, - "loss": 0.3535, + "epoch": 0.77, + "grad_norm": 0.9425963414084222, + "learning_rate": 2.644087008301144e-06, + "loss": 0.5267, "step": 16766 }, { - "epoch": 0.96, - "grad_norm": 0.49300324716515354, - "learning_rate": 7.039683065290792e-08, - "loss": 0.2496, + "epoch": 0.77, + "grad_norm": 0.30503558167830364, + "learning_rate": 2.6430791351601514e-06, + "loss": 0.2648, "step": 16767 }, { - "epoch": 0.96, - "grad_norm": 0.3440836293024113, - "learning_rate": 7.017658214962142e-08, - "loss": 0.2844, + "epoch": 0.77, + "grad_norm": 0.6165019630165293, + "learning_rate": 2.6420714248942938e-06, + "loss": 0.3469, "step": 16768 }, { - "epoch": 0.96, - "grad_norm": 0.7243014548865391, - "learning_rate": 6.995667751569346e-08, - "loss": 0.3251, + "epoch": 0.77, + "grad_norm": 0.46061170221758924, + "learning_rate": 2.6410638775258856e-06, + "loss": 0.1598, "step": 16769 }, { - "epoch": 0.96, - "grad_norm": 0.3702799611297618, - "learning_rate": 6.973711675873795e-08, - "loss": 0.2672, + "epoch": 0.77, + "grad_norm": 0.40919998550475734, + "learning_rate": 2.640056493077231e-06, + "loss": 0.2861, "step": 16770 }, { - "epoch": 0.96, - "grad_norm": 0.37034677737138777, - "learning_rate": 6.951789988635992e-08, - "loss": 0.2754, + "epoch": 0.77, + "grad_norm": 0.3080257520500706, + "learning_rate": 2.6390492715706264e-06, + "loss": 0.2658, "step": 16771 }, { - "epoch": 0.96, - "grad_norm": 0.33444108241821474, - "learning_rate": 6.929902690614998e-08, - "loss": 0.2079, + "epoch": 0.77, + "grad_norm": 0.928025723021502, + "learning_rate": 2.6380422130283812e-06, + "loss": 0.5588, "step": 16772 }, { - "epoch": 0.96, - "grad_norm": 0.3614151509687135, - "learning_rate": 6.90804978256876e-08, - "loss": 0.2578, + "epoch": 0.77, + "grad_norm": 0.2904725193917328, + "learning_rate": 2.6370353174727837e-06, + "loss": 0.0846, "step": 16773 }, { - "epoch": 0.96, - "grad_norm": 0.34113505165220437, - "learning_rate": 6.886231265254007e-08, - "loss": 0.2359, + "epoch": 0.77, + "grad_norm": 0.4444469749584699, + "learning_rate": 2.6360285849261303e-06, + "loss": 0.2748, "step": 16774 }, { - "epoch": 0.96, - "grad_norm": 1.2018390028919796, - "learning_rate": 6.864447139426356e-08, - "loss": 0.5018, + "epoch": 0.77, + "grad_norm": 0.36258214661021765, + "learning_rate": 2.6350220154107044e-06, + "loss": 0.2846, "step": 16775 }, { - "epoch": 0.96, - "grad_norm": 0.3092635740058215, - "learning_rate": 6.842697405840204e-08, - "loss": 0.1559, + "epoch": 0.77, + "grad_norm": 0.5987837160918604, + "learning_rate": 2.6340156089487912e-06, + "loss": 0.2089, "step": 16776 }, { - "epoch": 0.96, - "grad_norm": 0.2583079412979954, - "learning_rate": 6.820982065248837e-08, - "loss": 0.252, + "epoch": 0.77, + "grad_norm": 0.29346213528411325, + "learning_rate": 2.6330093655626777e-06, + "loss": 0.2292, "step": 16777 }, { - "epoch": 0.96, - "grad_norm": 0.3110707555011408, - "learning_rate": 6.79930111840399e-08, - "loss": 0.2555, + "epoch": 0.77, + "grad_norm": 1.2883965086248017, + "learning_rate": 2.6320032852746326e-06, + "loss": 0.7519, "step": 16778 }, { - "epoch": 0.96, - "grad_norm": 0.9741701424650024, - "learning_rate": 6.777654566056724e-08, - "loss": 0.3839, + "epoch": 0.77, + "grad_norm": 0.24275415218346255, + "learning_rate": 2.6309973681069333e-06, + "loss": 0.1763, "step": 16779 }, { - "epoch": 0.96, - "grad_norm": 0.3323721668501855, - "learning_rate": 6.756042408956554e-08, - "loss": 0.1876, + "epoch": 0.77, + "grad_norm": 0.43919299240527665, + "learning_rate": 2.6299916140818527e-06, + "loss": 0.2865, "step": 16780 }, { - "epoch": 0.96, - "grad_norm": 0.5357257838410326, - "learning_rate": 6.73446464785199e-08, - "loss": 0.3502, + "epoch": 0.77, + "grad_norm": 0.7603436078300181, + "learning_rate": 2.628986023221651e-06, + "loss": 0.4036, "step": 16781 }, { - "epoch": 0.96, - "grad_norm": 0.44370665817522104, - "learning_rate": 6.712921283490103e-08, - "loss": 0.3634, + "epoch": 0.77, + "grad_norm": 0.4474138613994775, + "learning_rate": 2.627980595548599e-06, + "loss": 0.229, "step": 16782 }, { - "epoch": 0.96, - "grad_norm": 0.28311071631336937, - "learning_rate": 6.691412316617075e-08, - "loss": 0.1912, + "epoch": 0.77, + "grad_norm": 0.39574076643155853, + "learning_rate": 2.6269753310849443e-06, + "loss": 0.2848, "step": 16783 }, { - "epoch": 0.96, - "grad_norm": 0.5741547571259257, - "learning_rate": 6.66993774797775e-08, - "loss": 0.3838, + "epoch": 0.77, + "grad_norm": 0.5298559210314184, + "learning_rate": 2.6259702298529565e-06, + "loss": 0.4018, "step": 16784 }, { - "epoch": 0.96, - "grad_norm": 0.2227072118964059, - "learning_rate": 6.64849757831576e-08, - "loss": 0.205, + "epoch": 0.77, + "grad_norm": 0.2487329921280947, + "learning_rate": 2.624965291874877e-06, + "loss": 0.1425, "step": 16785 }, { - "epoch": 0.96, - "grad_norm": 0.2907201308397229, - "learning_rate": 6.627091808373509e-08, - "loss": 0.1812, + "epoch": 0.77, + "grad_norm": 0.41049115428712835, + "learning_rate": 2.62396051717296e-06, + "loss": 0.2099, "step": 16786 }, { - "epoch": 0.96, - "grad_norm": 1.299735719613476, - "learning_rate": 6.605720438892515e-08, - "loss": 0.7159, + "epoch": 0.77, + "grad_norm": 0.3699195197291354, + "learning_rate": 2.6229559057694466e-06, + "loss": 0.2819, "step": 16787 }, { - "epoch": 0.96, - "grad_norm": 0.4745022016836694, - "learning_rate": 6.584383470612631e-08, - "loss": 0.3508, + "epoch": 0.77, + "grad_norm": 0.5156963593446017, + "learning_rate": 2.621951457686578e-06, + "loss": 0.292, "step": 16788 }, { - "epoch": 0.96, - "grad_norm": 0.2982418095480476, - "learning_rate": 6.563080904272712e-08, - "loss": 0.1854, + "epoch": 0.77, + "grad_norm": 0.4446364903914271, + "learning_rate": 2.6209471729465964e-06, + "loss": 0.2423, "step": 16789 }, { - "epoch": 0.96, - "grad_norm": 0.3813855226753747, - "learning_rate": 6.54181274061072e-08, - "loss": 0.3044, + "epoch": 0.77, + "grad_norm": 0.3873617995527452, + "learning_rate": 2.6199430515717296e-06, + "loss": 0.299, "step": 16790 }, { - "epoch": 0.96, - "grad_norm": 0.32301668216479656, - "learning_rate": 6.520578980362957e-08, - "loss": 0.1707, + "epoch": 0.77, + "grad_norm": 0.33792533150489, + "learning_rate": 2.618939093584214e-06, + "loss": 0.2287, "step": 16791 }, { - "epoch": 0.96, - "grad_norm": 0.41005198828464845, - "learning_rate": 6.499379624264834e-08, - "loss": 0.2734, + "epoch": 0.77, + "grad_norm": 0.38006130785460696, + "learning_rate": 2.617935299006269e-06, + "loss": 0.199, "step": 16792 }, { - "epoch": 0.96, - "grad_norm": 0.277449872251276, - "learning_rate": 6.478214673050542e-08, - "loss": 0.2324, + "epoch": 0.77, + "grad_norm": 0.5628717039689143, + "learning_rate": 2.616931667860123e-06, + "loss": 0.3339, "step": 16793 }, { - "epoch": 0.96, - "grad_norm": 1.2249416877156012, - "learning_rate": 6.45708412745294e-08, - "loss": 0.7376, + "epoch": 0.77, + "grad_norm": 0.9254852193896228, + "learning_rate": 2.6159282001679955e-06, + "loss": 0.316, "step": 16794 }, { - "epoch": 0.96, - "grad_norm": 0.3198672617571951, - "learning_rate": 6.435987988203662e-08, - "loss": 0.241, + "epoch": 0.77, + "grad_norm": 0.25300454347975904, + "learning_rate": 2.614924895952099e-06, + "loss": 0.2189, "step": 16795 }, { - "epoch": 0.97, - "grad_norm": 0.6394970517054889, - "learning_rate": 6.414926256033461e-08, - "loss": 0.2757, + "epoch": 0.77, + "grad_norm": 1.2691693027468753, + "learning_rate": 2.6139217552346462e-06, + "loss": 0.7178, "step": 16796 }, { - "epoch": 0.97, - "grad_norm": 0.22601691033016935, - "learning_rate": 6.393898931671749e-08, - "loss": 0.2039, + "epoch": 0.77, + "grad_norm": 0.3165158474442954, + "learning_rate": 2.6129187780378473e-06, + "loss": 0.154, "step": 16797 }, { - "epoch": 0.97, - "grad_norm": 0.33681906182926935, - "learning_rate": 6.372906015846502e-08, - "loss": 0.2788, + "epoch": 0.77, + "grad_norm": 0.3861589315444167, + "learning_rate": 2.6119159643839107e-06, + "loss": 0.3078, "step": 16798 }, { - "epoch": 0.97, - "grad_norm": 1.303947428546528, - "learning_rate": 6.351947509284695e-08, - "loss": 0.2012, + "epoch": 0.77, + "grad_norm": 0.348322252760024, + "learning_rate": 2.610913314295034e-06, + "loss": 0.2505, "step": 16799 }, { - "epoch": 0.97, - "grad_norm": 0.44996544450292303, - "learning_rate": 6.331023412712411e-08, - "loss": 0.3338, + "epoch": 0.77, + "grad_norm": 0.8675081368059038, + "learning_rate": 2.6099108277934105e-06, + "loss": 0.3754, "step": 16800 }, { - "epoch": 0.97, - "grad_norm": 0.2537708506596748, - "learning_rate": 6.310133726853962e-08, - "loss": 0.23, + "epoch": 0.77, + "grad_norm": 0.36584028807440316, + "learning_rate": 2.6089085049012397e-06, + "loss": 0.2616, "step": 16801 }, { - "epoch": 0.97, - "grad_norm": 0.8024519107979677, - "learning_rate": 6.289278452432768e-08, - "loss": 0.3105, + "epoch": 0.77, + "grad_norm": 0.4776039407152915, + "learning_rate": 2.6079063456407106e-06, + "loss": 0.2612, "step": 16802 }, { - "epoch": 0.97, - "grad_norm": 0.3602472486183106, - "learning_rate": 6.268457590171251e-08, - "loss": 0.1993, + "epoch": 0.77, + "grad_norm": 0.2903685272727473, + "learning_rate": 2.606904350034013e-06, + "loss": 0.1837, "step": 16803 }, { - "epoch": 0.97, - "grad_norm": 0.5559272095999094, - "learning_rate": 6.247671140790279e-08, - "loss": 0.2386, + "epoch": 0.77, + "grad_norm": 0.3447715261288622, + "learning_rate": 2.605902518103325e-06, + "loss": 0.2508, "step": 16804 }, { - "epoch": 0.97, - "grad_norm": 0.2690640064334697, - "learning_rate": 6.226919105009721e-08, - "loss": 0.269, + "epoch": 0.77, + "grad_norm": 0.7532184132320382, + "learning_rate": 2.6049008498708285e-06, + "loss": 0.2954, "step": 16805 }, { - "epoch": 0.97, - "grad_norm": 0.5439058823170019, - "learning_rate": 6.206201483548224e-08, - "loss": 0.3134, + "epoch": 0.77, + "grad_norm": 0.47185618134577817, + "learning_rate": 2.6038993453587034e-06, + "loss": 0.2624, "step": 16806 }, { - "epoch": 0.97, - "grad_norm": 0.4256086949463779, - "learning_rate": 6.185518277123215e-08, - "loss": 0.2782, + "epoch": 0.77, + "grad_norm": 0.3288253028536059, + "learning_rate": 2.602898004589115e-06, + "loss": 0.2647, "step": 16807 }, { - "epoch": 0.97, - "grad_norm": 0.5052015564502045, - "learning_rate": 6.164869486451008e-08, - "loss": 0.3196, + "epoch": 0.77, + "grad_norm": 1.1210583169858463, + "learning_rate": 2.601896827584238e-06, + "loss": 0.5075, "step": 16808 }, { - "epoch": 0.97, - "grad_norm": 0.20772748215582018, - "learning_rate": 6.144255112246589e-08, - "loss": 0.1723, + "epoch": 0.77, + "grad_norm": 0.7531327890121325, + "learning_rate": 2.6008958143662323e-06, + "loss": 0.2299, "step": 16809 }, { - "epoch": 0.97, - "grad_norm": 0.40896993870319204, - "learning_rate": 6.12367515522394e-08, - "loss": 0.3089, + "epoch": 0.77, + "grad_norm": 0.34352398359066944, + "learning_rate": 2.5998949649572614e-06, + "loss": 0.2664, "step": 16810 }, { - "epoch": 0.97, - "grad_norm": 0.5635204637734937, - "learning_rate": 6.103129616095605e-08, - "loss": 0.3505, + "epoch": 0.77, + "grad_norm": 0.296096905744487, + "learning_rate": 2.5988942793794868e-06, + "loss": 0.2398, "step": 16811 }, { - "epoch": 0.97, - "grad_norm": 0.4005554484110214, - "learning_rate": 6.082618495573234e-08, - "loss": 0.2544, + "epoch": 0.77, + "grad_norm": 1.0210335682212848, + "learning_rate": 2.5978937576550566e-06, + "loss": 0.115, "step": 16812 }, { - "epoch": 0.97, - "grad_norm": 0.33308458781620476, - "learning_rate": 6.062141794366927e-08, - "loss": 0.2608, + "epoch": 0.77, + "grad_norm": 0.38414739216715077, + "learning_rate": 2.596893399806124e-06, + "loss": 0.2886, "step": 16813 }, { - "epoch": 0.97, - "grad_norm": 0.66920581948784, - "learning_rate": 6.041699513186005e-08, - "loss": 0.4106, + "epoch": 0.77, + "grad_norm": 0.6846475995523693, + "learning_rate": 2.595893205854837e-06, + "loss": 0.3651, "step": 16814 }, { - "epoch": 0.97, - "grad_norm": 0.32580508384371565, - "learning_rate": 6.021291652738348e-08, - "loss": 0.09, + "epoch": 0.77, + "grad_norm": 0.5194245999026923, + "learning_rate": 2.594893175823341e-06, + "loss": 0.2443, "step": 16815 }, { - "epoch": 0.97, - "grad_norm": 0.3156954468518769, - "learning_rate": 6.0009182137305e-08, - "loss": 0.241, + "epoch": 0.77, + "grad_norm": 0.38345637001489674, + "learning_rate": 2.5938933097337703e-06, + "loss": 0.2705, "step": 16816 }, { - "epoch": 0.97, - "grad_norm": 0.35874233754083895, - "learning_rate": 5.98057919686823e-08, - "loss": 0.3018, + "epoch": 0.77, + "grad_norm": 0.2653099810231982, + "learning_rate": 2.5928936076082666e-06, + "loss": 0.154, "step": 16817 }, { - "epoch": 0.97, - "grad_norm": 0.5415758946024039, - "learning_rate": 5.9602746028556425e-08, - "loss": 0.3908, + "epoch": 0.77, + "grad_norm": 0.3914538779858025, + "learning_rate": 2.5918940694689552e-06, + "loss": 0.2318, "step": 16818 }, { - "epoch": 0.97, - "grad_norm": 0.32112130011980533, - "learning_rate": 5.9400044323960625e-08, - "loss": 0.2137, + "epoch": 0.77, + "grad_norm": 0.3575021810706745, + "learning_rate": 2.590894695337971e-06, + "loss": 0.2604, "step": 16819 }, { - "epoch": 0.97, - "grad_norm": 1.3192494544105202, - "learning_rate": 5.919768686191263e-08, - "loss": 0.6982, + "epoch": 0.77, + "grad_norm": 1.328414747237804, + "learning_rate": 2.589895485237439e-06, + "loss": 0.7878, "step": 16820 }, { - "epoch": 0.97, - "grad_norm": 0.27660356577726986, - "learning_rate": 5.8995673649422383e-08, - "loss": 0.2368, + "epoch": 0.77, + "grad_norm": 0.838495162454866, + "learning_rate": 2.588896439189477e-06, + "loss": 0.3845, "step": 16821 }, { - "epoch": 0.97, - "grad_norm": 0.31310707140945643, - "learning_rate": 5.879400469348429e-08, - "loss": 0.0847, + "epoch": 0.77, + "grad_norm": 0.3361392972626814, + "learning_rate": 2.5878975572162036e-06, + "loss": 0.1911, "step": 16822 }, { - "epoch": 0.97, - "grad_norm": 0.38135073290776306, - "learning_rate": 5.859268000108276e-08, - "loss": 0.3013, + "epoch": 0.77, + "grad_norm": 0.25127939616321776, + "learning_rate": 2.5868988393397376e-06, + "loss": 0.2064, "step": 16823 }, { - "epoch": 0.97, - "grad_norm": 0.3411340438737624, - "learning_rate": 5.8391699579188885e-08, - "loss": 0.2999, + "epoch": 0.77, + "grad_norm": 0.8623133443556457, + "learning_rate": 2.5859002855821837e-06, + "loss": 0.465, "step": 16824 }, { - "epoch": 0.97, - "grad_norm": 0.3468521220123775, - "learning_rate": 5.819106343476266e-08, - "loss": 0.1525, + "epoch": 0.77, + "grad_norm": 0.35890523857090056, + "learning_rate": 2.5849018959656546e-06, + "loss": 0.2181, "step": 16825 }, { - "epoch": 0.97, - "grad_norm": 0.38139159913608844, - "learning_rate": 5.799077157475297e-08, - "loss": 0.3019, + "epoch": 0.77, + "grad_norm": 0.3674442642755873, + "learning_rate": 2.5839036705122456e-06, + "loss": 0.2838, "step": 16826 }, { - "epoch": 0.97, - "grad_norm": 0.35172703587555404, - "learning_rate": 5.779082400609426e-08, - "loss": 0.1948, + "epoch": 0.77, + "grad_norm": 0.6401368583816887, + "learning_rate": 2.5829056092440664e-06, + "loss": 0.3714, "step": 16827 }, { - "epoch": 0.97, - "grad_norm": 0.3715864195679574, - "learning_rate": 5.7591220735712105e-08, - "loss": 0.1732, + "epoch": 0.77, + "grad_norm": 0.3431464510168847, + "learning_rate": 2.5819077121832092e-06, + "loss": 0.2032, "step": 16828 }, { - "epoch": 0.97, - "grad_norm": 0.2882289558296544, - "learning_rate": 5.7391961770519865e-08, - "loss": 0.3003, + "epoch": 0.77, + "grad_norm": 0.3101542705025432, + "learning_rate": 2.5809099793517622e-06, + "loss": 0.1666, "step": 16829 }, { - "epoch": 0.97, - "grad_norm": 0.8134034249660722, - "learning_rate": 5.7193047117415356e-08, - "loss": 0.4185, + "epoch": 0.77, + "grad_norm": 0.37664066821061204, + "learning_rate": 2.579912410771821e-06, + "loss": 0.2685, "step": 16830 }, { - "epoch": 0.97, - "grad_norm": 0.5514544419384692, - "learning_rate": 5.699447678328751e-08, - "loss": 0.373, + "epoch": 0.77, + "grad_norm": 0.3165874385171127, + "learning_rate": 2.578915006465461e-06, + "loss": 0.1987, "step": 16831 }, { - "epoch": 0.97, - "grad_norm": 0.29257468097948075, - "learning_rate": 5.6796250775014164e-08, - "loss": 0.2282, + "epoch": 0.77, + "grad_norm": 0.8323456064244357, + "learning_rate": 2.577917766454776e-06, + "loss": 0.4931, "step": 16832 }, { - "epoch": 0.97, - "grad_norm": 0.5000178068678629, - "learning_rate": 5.6598369099458705e-08, - "loss": 0.2971, + "epoch": 0.77, + "grad_norm": 0.8071155434145634, + "learning_rate": 2.576920690761836e-06, + "loss": 0.4146, "step": 16833 }, { - "epoch": 0.97, - "grad_norm": 0.22716384655094343, - "learning_rate": 5.640083176347455e-08, - "loss": 0.1603, + "epoch": 0.77, + "grad_norm": 0.3717142235434635, + "learning_rate": 2.57592377940872e-06, + "loss": 0.2166, "step": 16834 }, { - "epoch": 0.97, - "grad_norm": 0.3518311840836283, - "learning_rate": 5.620363877390178e-08, - "loss": 0.2388, + "epoch": 0.77, + "grad_norm": 0.3662039525766404, + "learning_rate": 2.5749270324174923e-06, + "loss": 0.2346, "step": 16835 }, { - "epoch": 0.97, - "grad_norm": 0.45494879435889785, - "learning_rate": 5.600679013756938e-08, - "loss": 0.3139, + "epoch": 0.77, + "grad_norm": 0.3465580723974107, + "learning_rate": 2.5739304498102246e-06, + "loss": 0.1981, "step": 16836 }, { - "epoch": 0.97, - "grad_norm": 0.31631780442761426, - "learning_rate": 5.581028586129411e-08, - "loss": 0.2677, + "epoch": 0.77, + "grad_norm": 0.32879939392495067, + "learning_rate": 2.5729340316089822e-06, + "loss": 0.2416, "step": 16837 }, { - "epoch": 0.97, - "grad_norm": 0.5723125886588851, - "learning_rate": 5.561412595188165e-08, - "loss": 0.2285, + "epoch": 0.77, + "grad_norm": 0.6138178753468019, + "learning_rate": 2.57193777783582e-06, + "loss": 0.2638, "step": 16838 }, { - "epoch": 0.97, - "grad_norm": 0.9015022205745714, - "learning_rate": 5.541831041612322e-08, - "loss": 0.4787, + "epoch": 0.77, + "grad_norm": 0.5413704457977427, + "learning_rate": 2.570941688512795e-06, + "loss": 0.31, "step": 16839 }, { - "epoch": 0.97, - "grad_norm": 0.37034387207268293, - "learning_rate": 5.5222839260802294e-08, - "loss": 0.2729, + "epoch": 0.77, + "grad_norm": 0.42851077043210345, + "learning_rate": 2.569945763661964e-06, + "loss": 0.2526, "step": 16840 }, { - "epoch": 0.97, - "grad_norm": 0.30520368304807893, - "learning_rate": 5.502771249268568e-08, - "loss": 0.2811, + "epoch": 0.77, + "grad_norm": 0.24815332113259747, + "learning_rate": 2.5689500033053705e-06, + "loss": 0.1304, "step": 16841 }, { - "epoch": 0.97, - "grad_norm": 0.2898764190409881, - "learning_rate": 5.483293011853241e-08, - "loss": 0.145, + "epoch": 0.77, + "grad_norm": 0.3622352266125836, + "learning_rate": 2.567954407465063e-06, + "loss": 0.3172, "step": 16842 }, { - "epoch": 0.97, - "grad_norm": 0.756039605785083, - "learning_rate": 5.4638492145087096e-08, - "loss": 0.3867, + "epoch": 0.77, + "grad_norm": 0.4271984038934308, + "learning_rate": 2.5669589761630776e-06, + "loss": 0.2664, "step": 16843 }, { - "epoch": 0.97, - "grad_norm": 0.3807809778880975, - "learning_rate": 5.4444398579083235e-08, - "loss": 0.2543, + "epoch": 0.77, + "grad_norm": 0.4874050682279112, + "learning_rate": 2.5659637094214616e-06, + "loss": 0.2766, "step": 16844 }, { - "epoch": 0.97, - "grad_norm": 0.37486671787998804, - "learning_rate": 5.425064942724212e-08, - "loss": 0.2385, + "epoch": 0.77, + "grad_norm": 1.4597999135020407, + "learning_rate": 2.5649686072622437e-06, + "loss": 0.6212, "step": 16845 }, { - "epoch": 0.97, - "grad_norm": 0.596115630845532, - "learning_rate": 5.4057244696272826e-08, - "loss": 0.3478, + "epoch": 0.77, + "grad_norm": 0.3524427806300477, + "learning_rate": 2.5639736697074525e-06, + "loss": 0.2415, "step": 16846 }, { - "epoch": 0.97, - "grad_norm": 0.36027385165231446, - "learning_rate": 5.386418439287444e-08, - "loss": 0.2733, + "epoch": 0.77, + "grad_norm": 0.3778868995479082, + "learning_rate": 2.5629788967791203e-06, + "loss": 0.2729, "step": 16847 }, { - "epoch": 0.97, - "grad_norm": 0.26856822778458994, - "learning_rate": 5.3671468523731617e-08, - "loss": 0.1785, + "epoch": 0.77, + "grad_norm": 0.1934119961692052, + "learning_rate": 2.5619842884992607e-06, + "loss": 0.1098, "step": 16848 }, { - "epoch": 0.97, - "grad_norm": 0.37989515893189935, - "learning_rate": 5.3479097095516795e-08, - "loss": 0.3023, + "epoch": 0.77, + "grad_norm": 0.34287014610942307, + "learning_rate": 2.5609898448899073e-06, + "loss": 0.2636, "step": 16849 }, { - "epoch": 0.97, - "grad_norm": 0.2918242952616663, - "learning_rate": 5.328707011489465e-08, - "loss": 0.2222, + "epoch": 0.77, + "grad_norm": 0.502287193825031, + "learning_rate": 2.5599955659730646e-06, + "loss": 0.3489, "step": 16850 }, { - "epoch": 0.97, - "grad_norm": 0.7682960082736956, - "learning_rate": 5.3095387588512074e-08, - "loss": 0.2587, + "epoch": 0.77, + "grad_norm": 0.5000799048716301, + "learning_rate": 2.559001451770753e-06, + "loss": 0.2656, "step": 16851 }, { - "epoch": 0.97, - "grad_norm": 0.36395943813381715, - "learning_rate": 5.2904049523009315e-08, - "loss": 0.2663, + "epoch": 0.77, + "grad_norm": 0.3808480942109761, + "learning_rate": 2.5580075023049744e-06, + "loss": 0.2555, "step": 16852 }, { - "epoch": 0.97, - "grad_norm": 0.38743575817296055, - "learning_rate": 5.271305592501108e-08, - "loss": 0.3176, + "epoch": 0.77, + "grad_norm": 1.3923487116574238, + "learning_rate": 2.557013717597737e-06, + "loss": 0.5902, "step": 16853 }, { - "epoch": 0.97, - "grad_norm": 0.417320125421086, - "learning_rate": 5.252240680113319e-08, - "loss": 0.2276, + "epoch": 0.77, + "grad_norm": 0.2156226855148095, + "learning_rate": 2.556020097671046e-06, + "loss": 0.1835, "step": 16854 }, { - "epoch": 0.97, - "grad_norm": 0.22973767123986893, - "learning_rate": 5.233210215797591e-08, - "loss": 0.104, + "epoch": 0.77, + "grad_norm": 0.3346579729252059, + "learning_rate": 2.555026642546892e-06, + "loss": 0.2521, "step": 16855 }, { - "epoch": 0.97, - "grad_norm": 0.3841946691455176, - "learning_rate": 5.2142142002129524e-08, - "loss": 0.3001, + "epoch": 0.77, + "grad_norm": 0.6987108622580954, + "learning_rate": 2.5540333522472717e-06, + "loss": 0.3875, "step": 16856 }, { - "epoch": 0.97, - "grad_norm": 0.34093585258614106, - "learning_rate": 5.195252634017434e-08, - "loss": 0.2781, + "epoch": 0.77, + "grad_norm": 0.49850009008590246, + "learning_rate": 2.55304022679418e-06, + "loss": 0.2672, "step": 16857 }, { - "epoch": 0.97, - "grad_norm": 0.5523198990332471, - "learning_rate": 5.1763255178673974e-08, - "loss": 0.2075, + "epoch": 0.77, + "grad_norm": 0.4267393320348157, + "learning_rate": 2.5520472662095975e-06, + "loss": 0.2845, "step": 16858 }, { - "epoch": 0.97, - "grad_norm": 0.4296501087072229, - "learning_rate": 5.1574328524184316e-08, - "loss": 0.2917, + "epoch": 0.77, + "grad_norm": 0.4083958525798289, + "learning_rate": 2.5510544705155125e-06, + "loss": 0.2966, "step": 16859 }, { - "epoch": 0.97, - "grad_norm": 0.47266839506660946, - "learning_rate": 5.1385746383249e-08, - "loss": 0.32, + "epoch": 0.77, + "grad_norm": 0.46755526510637707, + "learning_rate": 2.5500618397339004e-06, + "loss": 0.2531, "step": 16860 }, { - "epoch": 0.97, - "grad_norm": 0.21272369188801774, - "learning_rate": 5.1197508762397265e-08, - "loss": 0.1139, + "epoch": 0.77, + "grad_norm": 0.41370677999482647, + "learning_rate": 2.5490693738867377e-06, + "loss": 0.1827, "step": 16861 }, { - "epoch": 0.97, - "grad_norm": 0.2604253939433585, - "learning_rate": 5.1009615668147217e-08, - "loss": 0.2097, + "epoch": 0.77, + "grad_norm": 0.34722232862944413, + "learning_rate": 2.548077072995998e-06, + "loss": 0.2865, "step": 16862 }, { - "epoch": 0.97, - "grad_norm": 0.610360799421692, - "learning_rate": 5.082206710700699e-08, - "loss": 0.3483, + "epoch": 0.77, + "grad_norm": 0.4193843076091362, + "learning_rate": 2.5470849370836526e-06, + "loss": 0.279, "step": 16863 }, { - "epoch": 0.97, - "grad_norm": 0.295175344003875, - "learning_rate": 5.063486308547028e-08, - "loss": 0.2115, + "epoch": 0.77, + "grad_norm": 0.37035555149855964, + "learning_rate": 2.5460929661716637e-06, + "loss": 0.1568, "step": 16864 }, { - "epoch": 0.97, - "grad_norm": 0.33680233097645323, - "learning_rate": 5.044800361002078e-08, - "loss": 0.2811, + "epoch": 0.77, + "grad_norm": 0.6591113577096058, + "learning_rate": 2.5451011602819866e-06, + "loss": 0.2979, "step": 16865 }, { - "epoch": 0.97, - "grad_norm": 1.372302629109178, - "learning_rate": 5.0261488687128876e-08, - "loss": 0.62, + "epoch": 0.77, + "grad_norm": 0.36267197400145623, + "learning_rate": 2.5441095194365894e-06, + "loss": 0.2842, "step": 16866 }, { - "epoch": 0.97, - "grad_norm": 0.24720787872927993, - "learning_rate": 5.007531832325385e-08, - "loss": 0.1813, + "epoch": 0.77, + "grad_norm": 0.3414372005557124, + "learning_rate": 2.5431180436574197e-06, + "loss": 0.2125, "step": 16867 }, { - "epoch": 0.97, - "grad_norm": 0.2469605731267985, - "learning_rate": 4.9889492524842766e-08, - "loss": 0.2309, + "epoch": 0.77, + "grad_norm": 0.9238118130279691, + "learning_rate": 2.542126732966432e-06, + "loss": 0.5234, "step": 16868 }, { - "epoch": 0.97, - "grad_norm": 0.5687789725130987, - "learning_rate": 4.970401129833047e-08, - "loss": 0.3839, + "epoch": 0.77, + "grad_norm": 0.3769564465805755, + "learning_rate": 2.5411355873855683e-06, + "loss": 0.2167, "step": 16869 }, { - "epoch": 0.97, - "grad_norm": 0.7360556561940557, - "learning_rate": 4.9518874650139604e-08, - "loss": 0.3072, + "epoch": 0.77, + "grad_norm": 0.23081800478675196, + "learning_rate": 2.5401446069367717e-06, + "loss": 0.2052, "step": 16870 }, { - "epoch": 0.97, - "grad_norm": 0.36413256453148496, - "learning_rate": 4.933408258668393e-08, - "loss": 0.2074, + "epoch": 0.78, + "grad_norm": 1.4512488532012031, + "learning_rate": 2.5391537916419883e-06, + "loss": 0.5289, "step": 16871 }, { - "epoch": 0.97, - "grad_norm": 0.33474022926915215, - "learning_rate": 4.914963511436055e-08, - "loss": 0.2959, + "epoch": 0.78, + "grad_norm": 0.7515583571731129, + "learning_rate": 2.5381631415231455e-06, + "loss": 0.3521, "step": 16872 }, { - "epoch": 0.97, - "grad_norm": 0.3531737978180413, - "learning_rate": 4.896553223955658e-08, - "loss": 0.2193, + "epoch": 0.78, + "grad_norm": 0.4185299488064976, + "learning_rate": 2.5371726566021794e-06, + "loss": 0.3148, "step": 16873 }, { - "epoch": 0.97, - "grad_norm": 0.3508589055917248, - "learning_rate": 4.878177396864914e-08, - "loss": 0.2221, + "epoch": 0.78, + "grad_norm": 0.30117332410518094, + "learning_rate": 2.536182336901021e-06, + "loss": 0.2274, "step": 16874 }, { - "epoch": 0.97, - "grad_norm": 0.42703325762760735, - "learning_rate": 4.859836030800091e-08, - "loss": 0.261, + "epoch": 0.78, + "grad_norm": 0.28983120390795447, + "learning_rate": 2.535192182441588e-06, + "loss": 0.1837, "step": 16875 }, { - "epoch": 0.97, - "grad_norm": 0.29443074994484736, - "learning_rate": 4.8415291263962383e-08, - "loss": 0.2574, + "epoch": 0.78, + "grad_norm": 0.5925742843759656, + "learning_rate": 2.5342021932458094e-06, + "loss": 0.2979, "step": 16876 }, { - "epoch": 0.97, - "grad_norm": 0.37489421083434077, - "learning_rate": 4.823256684287625e-08, - "loss": 0.2146, + "epoch": 0.78, + "grad_norm": 0.41149221992558704, + "learning_rate": 2.533212369335595e-06, + "loss": 0.2367, "step": 16877 }, { - "epoch": 0.97, - "grad_norm": 1.1846274794014118, - "learning_rate": 4.805018705106745e-08, - "loss": 0.6589, + "epoch": 0.78, + "grad_norm": 0.3697593010579812, + "learning_rate": 2.5322227107328623e-06, + "loss": 0.3104, "step": 16878 }, { - "epoch": 0.97, - "grad_norm": 0.9644830863353985, - "learning_rate": 4.7868151894852054e-08, - "loss": 0.3864, + "epoch": 0.78, + "grad_norm": 0.6798879024518325, + "learning_rate": 2.531233217459521e-06, + "loss": 0.3743, "step": 16879 }, { - "epoch": 0.97, - "grad_norm": 0.24407539568102385, - "learning_rate": 4.768646138053501e-08, - "loss": 0.2382, + "epoch": 0.78, + "grad_norm": 0.35705572783131817, + "learning_rate": 2.5302438895374816e-06, + "loss": 0.2497, "step": 16880 }, { - "epoch": 0.97, - "grad_norm": 0.2693149381967699, - "learning_rate": 4.750511551440906e-08, - "loss": 0.189, + "epoch": 0.78, + "grad_norm": 0.2629687351516884, + "learning_rate": 2.529254726988639e-06, + "loss": 0.1871, "step": 16881 }, { - "epoch": 0.97, - "grad_norm": 1.318880587886877, - "learning_rate": 4.732411430275141e-08, - "loss": 0.455, + "epoch": 0.78, + "grad_norm": 0.4080779476275008, + "learning_rate": 2.5282657298348968e-06, + "loss": 0.2728, "step": 16882 }, { - "epoch": 0.97, - "grad_norm": 0.32987152777098505, - "learning_rate": 4.71434577518326e-08, - "loss": 0.2373, + "epoch": 0.78, + "grad_norm": 0.43083255903203216, + "learning_rate": 2.527276898098153e-06, + "loss": 0.2379, "step": 16883 }, { - "epoch": 0.97, - "grad_norm": 0.33653567396866907, - "learning_rate": 4.696314586790762e-08, - "loss": 0.2356, + "epoch": 0.78, + "grad_norm": 0.822864454535755, + "learning_rate": 2.5262882318002933e-06, + "loss": 0.4453, "step": 16884 }, { - "epoch": 0.97, - "grad_norm": 0.6980998648832283, - "learning_rate": 4.6783178657221486e-08, - "loss": 0.4677, + "epoch": 0.78, + "grad_norm": 0.6661796210757384, + "learning_rate": 2.5252997309632123e-06, + "loss": 0.4124, "step": 16885 }, { - "epoch": 0.97, - "grad_norm": 0.32680210012665406, - "learning_rate": 4.6603556126004756e-08, - "loss": 0.2597, + "epoch": 0.78, + "grad_norm": 0.298500243893485, + "learning_rate": 2.524311395608787e-06, + "loss": 0.2732, "step": 16886 }, { - "epoch": 0.97, - "grad_norm": 0.23594603800560673, - "learning_rate": 4.642427828047913e-08, - "loss": 0.1121, + "epoch": 0.78, + "grad_norm": 0.2682300631141981, + "learning_rate": 2.5233232257589037e-06, + "loss": 0.0997, "step": 16887 }, { - "epoch": 0.97, - "grad_norm": 0.32282382268927773, - "learning_rate": 4.624534512685297e-08, - "loss": 0.2613, + "epoch": 0.78, + "grad_norm": 0.41674266254070313, + "learning_rate": 2.5223352214354403e-06, + "loss": 0.2584, "step": 16888 }, { - "epoch": 0.97, - "grad_norm": 0.3497080476951133, - "learning_rate": 4.6066756671322434e-08, - "loss": 0.2655, + "epoch": 0.78, + "grad_norm": 0.597721082965969, + "learning_rate": 2.5213473826602643e-06, + "loss": 0.3151, "step": 16889 }, { - "epoch": 0.97, - "grad_norm": 0.9983668851762743, - "learning_rate": 4.588851292007257e-08, - "loss": 0.5257, + "epoch": 0.78, + "grad_norm": 0.3815100005426821, + "learning_rate": 2.5203597094552534e-06, + "loss": 0.2606, "step": 16890 }, { - "epoch": 0.97, - "grad_norm": 0.40613672708852766, - "learning_rate": 4.5710613879275115e-08, - "loss": 0.2886, + "epoch": 0.78, + "grad_norm": 0.4368068267935512, + "learning_rate": 2.5193722018422627e-06, + "loss": 0.2705, "step": 16891 }, { - "epoch": 0.97, - "grad_norm": 0.3492455486029241, - "learning_rate": 4.5533059555090684e-08, - "loss": 0.2554, + "epoch": 0.78, + "grad_norm": 0.5683527987148197, + "learning_rate": 2.518384859843168e-06, + "loss": 0.3579, "step": 16892 }, { - "epoch": 0.97, - "grad_norm": 0.4983795945378701, - "learning_rate": 4.535584995366882e-08, - "loss": 0.3268, + "epoch": 0.78, + "grad_norm": 0.2745876981304326, + "learning_rate": 2.517397683479822e-06, + "loss": 0.1575, "step": 16893 }, { - "epoch": 0.97, - "grad_norm": 0.2967937991637615, - "learning_rate": 4.517898508114571e-08, - "loss": 0.0822, + "epoch": 0.78, + "grad_norm": 0.5382777320927795, + "learning_rate": 2.5164106727740754e-06, + "loss": 0.3447, "step": 16894 }, { - "epoch": 0.97, - "grad_norm": 0.3401773758082715, - "learning_rate": 4.500246494364535e-08, - "loss": 0.2568, + "epoch": 0.78, + "grad_norm": 0.39568200048256325, + "learning_rate": 2.515423827747785e-06, + "loss": 0.2861, "step": 16895 }, { - "epoch": 0.97, - "grad_norm": 0.33076498271613985, - "learning_rate": 4.482628954728285e-08, - "loss": 0.3311, + "epoch": 0.78, + "grad_norm": 0.761951256315499, + "learning_rate": 2.514437148422797e-06, + "loss": 0.2979, "step": 16896 }, { - "epoch": 0.97, - "grad_norm": 0.6425786521864342, - "learning_rate": 4.465045889815778e-08, - "loss": 0.3003, + "epoch": 0.78, + "grad_norm": 0.5866253034442265, + "learning_rate": 2.5134506348209588e-06, + "loss": 0.31, "step": 16897 }, { - "epoch": 0.97, - "grad_norm": 0.2972202986795993, - "learning_rate": 4.447497300235859e-08, - "loss": 0.2615, + "epoch": 0.78, + "grad_norm": 0.27683190165021193, + "learning_rate": 2.5124642869641047e-06, + "loss": 0.2752, "step": 16898 }, { - "epoch": 0.97, - "grad_norm": 0.3100186032119291, - "learning_rate": 4.4299831865962653e-08, - "loss": 0.1789, + "epoch": 0.78, + "grad_norm": 0.5070257434932641, + "learning_rate": 2.5114781048740743e-06, + "loss": 0.2275, "step": 16899 }, { - "epoch": 0.97, - "grad_norm": 0.3280509719318347, - "learning_rate": 4.412503549503622e-08, - "loss": 0.2357, + "epoch": 0.78, + "grad_norm": 0.5858175782984074, + "learning_rate": 2.510492088572705e-06, + "loss": 0.1949, "step": 16900 }, { - "epoch": 0.97, - "grad_norm": 0.33145619269969046, - "learning_rate": 4.3950583895631116e-08, - "loss": 0.2536, + "epoch": 0.78, + "grad_norm": 0.3436963448845202, + "learning_rate": 2.509506238081818e-06, + "loss": 0.2528, "step": 16901 }, { - "epoch": 0.97, - "grad_norm": 0.7463227424917745, - "learning_rate": 4.377647707379029e-08, - "loss": 0.5095, + "epoch": 0.78, + "grad_norm": 0.39483053342906405, + "learning_rate": 2.508520553423248e-06, + "loss": 0.2826, "step": 16902 }, { - "epoch": 0.97, - "grad_norm": 0.31520197157597846, - "learning_rate": 4.360271503554114e-08, - "loss": 0.2509, + "epoch": 0.78, + "grad_norm": 0.5527862700215229, + "learning_rate": 2.5075350346188088e-06, + "loss": 0.1774, "step": 16903 }, { - "epoch": 0.97, - "grad_norm": 0.33216346525271806, - "learning_rate": 4.342929778690108e-08, - "loss": 0.2673, + "epoch": 0.78, + "grad_norm": 0.42214610893828297, + "learning_rate": 2.5065496816903223e-06, + "loss": 0.3151, "step": 16904 }, { - "epoch": 0.97, - "grad_norm": 1.4729941097580517, - "learning_rate": 4.325622533387752e-08, - "loss": 0.5392, + "epoch": 0.78, + "grad_norm": 0.5442473586267601, + "learning_rate": 2.505564494659607e-06, + "loss": 0.2434, "step": 16905 }, { - "epoch": 0.97, - "grad_norm": 0.3713416552428508, - "learning_rate": 4.308349768246234e-08, - "loss": 0.1466, + "epoch": 0.78, + "grad_norm": 0.3324821989307847, + "learning_rate": 2.5045794735484675e-06, + "loss": 0.2379, "step": 16906 }, { - "epoch": 0.97, - "grad_norm": 0.28593857290381, - "learning_rate": 4.291111483863741e-08, - "loss": 0.1936, + "epoch": 0.78, + "grad_norm": 0.42777671943191353, + "learning_rate": 2.5035946183787175e-06, + "loss": 0.2876, "step": 16907 }, { - "epoch": 0.97, - "grad_norm": 0.34487575236955365, - "learning_rate": 4.273907680837241e-08, - "loss": 0.2875, + "epoch": 0.78, + "grad_norm": 0.4753776837958063, + "learning_rate": 2.5026099291721517e-06, + "loss": 0.2498, "step": 16908 }, { - "epoch": 0.97, - "grad_norm": 0.5930762811522095, - "learning_rate": 4.2567383597624804e-08, - "loss": 0.394, + "epoch": 0.78, + "grad_norm": 0.308651720522031, + "learning_rate": 2.501625405950582e-06, + "loss": 0.2118, "step": 16909 }, { - "epoch": 0.97, - "grad_norm": 0.2870976878198615, - "learning_rate": 4.239603521234092e-08, - "loss": 0.188, + "epoch": 0.78, + "grad_norm": 0.3896609660569872, + "learning_rate": 2.500641048735798e-06, + "loss": 0.3212, "step": 16910 }, { - "epoch": 0.97, - "grad_norm": 1.5038426691479727, - "learning_rate": 4.2225031658453816e-08, - "loss": 0.6532, + "epoch": 0.78, + "grad_norm": 0.9037952051072807, + "learning_rate": 2.4996568575495906e-06, + "loss": 0.4453, "step": 16911 }, { - "epoch": 0.97, - "grad_norm": 0.22444525476756616, - "learning_rate": 4.205437294188541e-08, - "loss": 0.2015, + "epoch": 0.78, + "grad_norm": 0.6756230401967475, + "learning_rate": 2.498672832413751e-06, + "loss": 0.345, "step": 16912 }, { - "epoch": 0.97, - "grad_norm": 0.2935048318220947, - "learning_rate": 4.1884059068546534e-08, - "loss": 0.1828, + "epoch": 0.78, + "grad_norm": 0.256250434238195, + "learning_rate": 2.4976889733500664e-06, + "loss": 0.1615, "step": 16913 }, { - "epoch": 0.97, - "grad_norm": 0.5488045078725402, - "learning_rate": 4.171409004433358e-08, - "loss": 0.3798, + "epoch": 0.78, + "grad_norm": 0.2910456418231704, + "learning_rate": 2.496705280380318e-06, + "loss": 0.2491, "step": 16914 }, { - "epoch": 0.97, - "grad_norm": 0.887381775060886, - "learning_rate": 4.154446587513406e-08, - "loss": 0.5585, + "epoch": 0.78, + "grad_norm": 1.3657642000799581, + "learning_rate": 2.4957217535262824e-06, + "loss": 0.8027, "step": 16915 }, { - "epoch": 0.97, - "grad_norm": 0.2382508451881509, - "learning_rate": 4.137518656682216e-08, - "loss": 0.2414, + "epoch": 0.78, + "grad_norm": 0.37933924434434824, + "learning_rate": 2.4947383928097325e-06, + "loss": 0.2068, "step": 16916 }, { - "epoch": 0.97, - "grad_norm": 0.6288484829732698, - "learning_rate": 4.120625212525875e-08, - "loss": 0.2158, + "epoch": 0.78, + "grad_norm": 0.5566496674036311, + "learning_rate": 2.4937551982524443e-06, + "loss": 0.3536, "step": 16917 }, { - "epoch": 0.97, - "grad_norm": 0.2514810105291575, - "learning_rate": 4.10376625562936e-08, - "loss": 0.1322, + "epoch": 0.78, + "grad_norm": 0.523803963235113, + "learning_rate": 2.4927721698761796e-06, + "loss": 0.3222, "step": 16918 }, { - "epoch": 0.97, - "grad_norm": 0.4334570249701634, - "learning_rate": 4.086941786576759e-08, - "loss": 0.2788, + "epoch": 0.78, + "grad_norm": 0.27523480750231805, + "learning_rate": 2.4917893077027056e-06, + "loss": 0.1669, "step": 16919 }, { - "epoch": 0.97, - "grad_norm": 0.2745662783710371, - "learning_rate": 4.070151805950384e-08, - "loss": 0.2448, + "epoch": 0.78, + "grad_norm": 0.49927242210888956, + "learning_rate": 2.4908066117537766e-06, + "loss": 0.2544, "step": 16920 }, { - "epoch": 0.97, - "grad_norm": 0.8311620037180578, - "learning_rate": 4.053396314331881e-08, - "loss": 0.5275, + "epoch": 0.78, + "grad_norm": 0.45955856928829436, + "learning_rate": 2.489824082051152e-06, + "loss": 0.3463, "step": 16921 }, { - "epoch": 0.97, - "grad_norm": 0.3124622685734896, - "learning_rate": 4.0366753123014526e-08, - "loss": 0.2508, + "epoch": 0.78, + "grad_norm": 0.2985869777066035, + "learning_rate": 2.4888417186165868e-06, + "loss": 0.2231, "step": 16922 }, { - "epoch": 0.97, - "grad_norm": 1.9206862580118762, - "learning_rate": 4.0199888004381907e-08, - "loss": 0.2277, + "epoch": 0.78, + "grad_norm": 0.9341184370967512, + "learning_rate": 2.4878595214718236e-06, + "loss": 0.3727, "step": 16923 }, { - "epoch": 0.97, - "grad_norm": 0.22491161238978494, - "learning_rate": 4.003336779319855e-08, - "loss": 0.2061, + "epoch": 0.78, + "grad_norm": 0.6890972966248986, + "learning_rate": 2.486877490638613e-06, + "loss": 0.3352, "step": 16924 }, { - "epoch": 0.97, - "grad_norm": 0.3165803242891882, - "learning_rate": 3.9867192495230965e-08, - "loss": 0.2662, + "epoch": 0.78, + "grad_norm": 0.38828034988312793, + "learning_rate": 2.485895626138688e-06, + "loss": 0.2774, "step": 16925 }, { - "epoch": 0.97, - "grad_norm": 0.6313708675545323, - "learning_rate": 3.970136211623343e-08, - "loss": 0.2733, + "epoch": 0.78, + "grad_norm": 0.2372726782202294, + "learning_rate": 2.4849139279937974e-06, + "loss": 0.1658, "step": 16926 }, { - "epoch": 0.97, - "grad_norm": 0.3125624883391193, - "learning_rate": 3.9535876661951356e-08, - "loss": 0.3, + "epoch": 0.78, + "grad_norm": 0.7224570332482729, + "learning_rate": 2.4839323962256668e-06, + "loss": 0.4275, "step": 16927 }, { - "epoch": 0.97, - "grad_norm": 0.5088215981290167, - "learning_rate": 3.937073613811237e-08, - "loss": 0.2573, + "epoch": 0.78, + "grad_norm": 0.37822772641834596, + "learning_rate": 2.482951030856031e-06, + "loss": 0.2662, "step": 16928 }, { - "epoch": 0.97, - "grad_norm": 0.422677352010954, - "learning_rate": 3.920594055043636e-08, - "loss": 0.2661, + "epoch": 0.78, + "grad_norm": 0.3997958380044706, + "learning_rate": 2.481969831906612e-06, + "loss": 0.2641, "step": 16929 }, { - "epoch": 0.97, - "grad_norm": 0.32130140929879897, - "learning_rate": 3.9041489904629857e-08, - "loss": 0.1425, + "epoch": 0.78, + "grad_norm": 0.727595993338251, + "learning_rate": 2.4809887993991344e-06, + "loss": 0.3833, "step": 16930 }, { - "epoch": 0.97, - "grad_norm": 0.4239268555816805, - "learning_rate": 3.8877384206389426e-08, - "loss": 0.3111, + "epoch": 0.78, + "grad_norm": 0.36846946846261613, + "learning_rate": 2.4800079333553217e-06, + "loss": 0.2704, "step": 16931 }, { - "epoch": 0.97, - "grad_norm": 0.29273897539680954, - "learning_rate": 3.8713623461396066e-08, - "loss": 0.2897, + "epoch": 0.78, + "grad_norm": 0.31810168383484183, + "learning_rate": 2.4790272337968813e-06, + "loss": 0.1014, "step": 16932 }, { - "epoch": 0.97, - "grad_norm": 1.7208118477900014, - "learning_rate": 3.855020767532191e-08, - "loss": 0.3823, + "epoch": 0.78, + "grad_norm": 0.35266221344412063, + "learning_rate": 2.47804670074553e-06, + "loss": 0.3013, "step": 16933 }, { - "epoch": 0.97, - "grad_norm": 0.39365836561310663, - "learning_rate": 3.8387136853825776e-08, - "loss": 0.2538, + "epoch": 0.78, + "grad_norm": 0.3476636841227362, + "learning_rate": 2.4770663342229785e-06, + "loss": 0.2716, "step": 16934 }, { - "epoch": 0.97, - "grad_norm": 0.3864142535331856, - "learning_rate": 3.822441100255425e-08, - "loss": 0.2928, + "epoch": 0.78, + "grad_norm": 0.7093547275713081, + "learning_rate": 2.4760861342509235e-06, + "loss": 0.3828, "step": 16935 }, { - "epoch": 0.97, - "grad_norm": 0.4594334507085826, - "learning_rate": 3.806203012714394e-08, - "loss": 0.2365, + "epoch": 0.78, + "grad_norm": 1.509486048090909, + "learning_rate": 2.4751061008510736e-06, + "loss": 0.2192, "step": 16936 }, { - "epoch": 0.97, - "grad_norm": 0.36622375232538923, - "learning_rate": 3.7899994233216996e-08, - "loss": 0.2491, + "epoch": 0.78, + "grad_norm": 0.2666250661033359, + "learning_rate": 2.4741262340451187e-06, + "loss": 0.2376, "step": 16937 }, { - "epoch": 0.97, - "grad_norm": 0.2574689612287729, - "learning_rate": 3.77383033263834e-08, - "loss": 0.2046, + "epoch": 0.78, + "grad_norm": 0.39426939935170785, + "learning_rate": 2.4731465338547556e-06, + "loss": 0.2327, "step": 16938 }, { - "epoch": 0.97, - "grad_norm": 0.32583886214228214, - "learning_rate": 3.757695741224532e-08, - "loss": 0.2641, + "epoch": 0.78, + "grad_norm": 0.37709619473443684, + "learning_rate": 2.4721670003016762e-06, + "loss": 0.1428, "step": 16939 }, { - "epoch": 0.97, - "grad_norm": 0.30582437989842254, - "learning_rate": 3.7415956496388295e-08, - "loss": 0.2625, + "epoch": 0.78, + "grad_norm": 0.37448939671037723, + "learning_rate": 2.4711876334075623e-06, + "loss": 0.2805, "step": 16940 }, { - "epoch": 0.97, - "grad_norm": 1.272482518357978, - "learning_rate": 3.7255300584388976e-08, - "loss": 0.6096, + "epoch": 0.78, + "grad_norm": 0.5090748656575231, + "learning_rate": 2.4702084331941002e-06, + "loss": 0.3063, "step": 16941 }, { - "epoch": 0.97, - "grad_norm": 0.6476407086373608, - "learning_rate": 3.709498968180958e-08, - "loss": 0.371, + "epoch": 0.78, + "grad_norm": 0.45839736240020623, + "learning_rate": 2.4692293996829597e-06, + "loss": 0.2258, "step": 16942 }, { - "epoch": 0.97, - "grad_norm": 0.24076540832073126, - "learning_rate": 3.693502379420233e-08, - "loss": 0.1992, + "epoch": 0.78, + "grad_norm": 0.36634293993376316, + "learning_rate": 2.4682505328958283e-06, + "loss": 0.3026, "step": 16943 }, { - "epoch": 0.97, - "grad_norm": 0.47951685652698056, - "learning_rate": 3.677540292710724e-08, - "loss": 0.3247, + "epoch": 0.78, + "grad_norm": 0.2643927495902993, + "learning_rate": 2.467271832854368e-06, + "loss": 0.1573, "step": 16944 }, { - "epoch": 0.97, - "grad_norm": 0.434123742526139, - "learning_rate": 3.6616127086051e-08, - "loss": 0.3129, + "epoch": 0.78, + "grad_norm": 0.3050608651365397, + "learning_rate": 2.4662932995802514e-06, + "loss": 0.2366, "step": 16945 }, { - "epoch": 0.97, - "grad_norm": 0.1810864281589373, - "learning_rate": 3.64571962765492e-08, - "loss": 0.0715, + "epoch": 0.78, + "grad_norm": 0.35779832033210524, + "learning_rate": 2.4653149330951377e-06, + "loss": 0.2444, "step": 16946 }, { - "epoch": 0.97, - "grad_norm": 0.32436565764474184, - "learning_rate": 3.629861050410743e-08, - "loss": 0.2812, + "epoch": 0.78, + "grad_norm": 0.6983783111526196, + "learning_rate": 2.464336733420689e-06, + "loss": 0.4104, "step": 16947 }, { - "epoch": 0.97, - "grad_norm": 0.4181731715703347, - "learning_rate": 3.6140369774215755e-08, - "loss": 0.3126, + "epoch": 0.78, + "grad_norm": 1.330399029731723, + "learning_rate": 2.4633587005785664e-06, + "loss": 0.5298, "step": 16948 }, { - "epoch": 0.97, - "grad_norm": 0.5866495465077649, - "learning_rate": 3.5982474092355334e-08, - "loss": 0.1783, + "epoch": 0.78, + "grad_norm": 0.30397420918511087, + "learning_rate": 2.4623808345904142e-06, + "loss": 0.1796, "step": 16949 }, { - "epoch": 0.97, - "grad_norm": 0.28246328115634156, - "learning_rate": 3.58249234639918e-08, - "loss": 0.2231, + "epoch": 0.78, + "grad_norm": 0.24412385153374297, + "learning_rate": 2.461403135477888e-06, + "loss": 0.2256, "step": 16950 }, { - "epoch": 0.97, - "grad_norm": 0.3389229225390919, - "learning_rate": 3.566771789458412e-08, - "loss": 0.3261, + "epoch": 0.78, + "grad_norm": 0.7826359483147893, + "learning_rate": 2.4604256032626285e-06, + "loss": 0.4204, "step": 16951 }, { - "epoch": 0.97, - "grad_norm": 0.21439535606761287, - "learning_rate": 3.55108573895746e-08, - "loss": 0.1327, + "epoch": 0.78, + "grad_norm": 0.3216460244693477, + "learning_rate": 2.4594482379662787e-06, + "loss": 0.2106, "step": 16952 }, { - "epoch": 0.97, - "grad_norm": 0.5498107837423801, - "learning_rate": 3.535434195439558e-08, - "loss": 0.3353, + "epoch": 0.78, + "grad_norm": 0.35846615674132337, + "learning_rate": 2.4584710396104807e-06, + "loss": 0.3144, "step": 16953 }, { - "epoch": 0.97, - "grad_norm": 0.6198576731338329, - "learning_rate": 3.5198171594467145e-08, - "loss": 0.3519, + "epoch": 0.78, + "grad_norm": 1.6664077463195153, + "learning_rate": 2.457494008216862e-06, + "loss": 0.5844, "step": 16954 }, { - "epoch": 0.97, - "grad_norm": 0.38369011378607276, - "learning_rate": 3.504234631519721e-08, - "loss": 0.2421, + "epoch": 0.78, + "grad_norm": 0.3588565556874185, + "learning_rate": 2.456517143807057e-06, + "loss": 0.1951, "step": 16955 }, { - "epoch": 0.97, - "grad_norm": 0.2903915426775922, - "learning_rate": 3.4886866121982555e-08, - "loss": 0.2236, + "epoch": 0.78, + "grad_norm": 0.5834727707629743, + "learning_rate": 2.455540446402691e-06, + "loss": 0.2488, "step": 16956 }, { - "epoch": 0.97, - "grad_norm": 1.078379182005405, - "learning_rate": 3.473173102020666e-08, - "loss": 0.7698, + "epoch": 0.78, + "grad_norm": 0.375479290949618, + "learning_rate": 2.454563916025392e-06, + "loss": 0.2879, "step": 16957 }, { - "epoch": 0.97, - "grad_norm": 0.2756133154465786, - "learning_rate": 3.4576941015243003e-08, - "loss": 0.1562, + "epoch": 0.78, + "grad_norm": 0.3419722792136791, + "learning_rate": 2.4535875526967747e-06, + "loss": 0.2217, "step": 16958 }, { - "epoch": 0.97, - "grad_norm": 0.27127846778563536, - "learning_rate": 3.4422496112451745e-08, - "loss": 0.2316, + "epoch": 0.78, + "grad_norm": 1.2990383928409102, + "learning_rate": 2.4526113564384502e-06, + "loss": 0.8104, "step": 16959 }, { - "epoch": 0.97, - "grad_norm": 0.40386928409495443, - "learning_rate": 3.426839631718082e-08, - "loss": 0.3061, + "epoch": 0.78, + "grad_norm": 0.5795741853625167, + "learning_rate": 2.451635327272042e-06, + "loss": 0.248, "step": 16960 }, { - "epoch": 0.97, - "grad_norm": 0.4168805787576852, - "learning_rate": 3.411464163476597e-08, - "loss": 0.2353, + "epoch": 0.78, + "grad_norm": 0.3530166032328826, + "learning_rate": 2.4506594652191485e-06, + "loss": 0.2571, "step": 16961 }, { - "epoch": 0.97, - "grad_norm": 0.47659370916054206, - "learning_rate": 3.3961232070532927e-08, - "loss": 0.2327, + "epoch": 0.78, + "grad_norm": 0.3809267507522062, + "learning_rate": 2.449683770301382e-06, + "loss": 0.2347, "step": 16962 }, { - "epoch": 0.97, - "grad_norm": 0.34162007685684154, - "learning_rate": 3.380816762979411e-08, - "loss": 0.3236, + "epoch": 0.78, + "grad_norm": 0.6809355621931512, + "learning_rate": 2.4487082425403376e-06, + "loss": 0.3997, "step": 16963 }, { - "epoch": 0.97, - "grad_norm": 0.2637327032439912, - "learning_rate": 3.3655448317849725e-08, - "loss": 0.1839, + "epoch": 0.78, + "grad_norm": 0.4678938035625304, + "learning_rate": 2.447732881957614e-06, + "loss": 0.3057, "step": 16964 }, { - "epoch": 0.97, - "grad_norm": 0.4139189954002851, - "learning_rate": 3.350307413998888e-08, - "loss": 0.2518, + "epoch": 0.78, + "grad_norm": 0.2966454338947615, + "learning_rate": 2.446757688574808e-06, + "loss": 0.2432, "step": 16965 }, { - "epoch": 0.97, - "grad_norm": 0.6670530591028055, - "learning_rate": 3.335104510148734e-08, - "loss": 0.3724, + "epoch": 0.78, + "grad_norm": 0.3547027720305422, + "learning_rate": 2.445782662413504e-06, + "loss": 0.1865, "step": 16966 }, { - "epoch": 0.97, - "grad_norm": 0.31514853852025554, - "learning_rate": 3.3199361207610916e-08, - "loss": 0.2825, + "epoch": 0.78, + "grad_norm": 0.4271673021146245, + "learning_rate": 2.444807803495294e-06, + "loss": 0.2669, "step": 16967 }, { - "epoch": 0.97, - "grad_norm": 0.39693340246090936, - "learning_rate": 3.304802246361205e-08, - "loss": 0.2747, + "epoch": 0.78, + "grad_norm": 0.6938130855430237, + "learning_rate": 2.443833111841755e-06, + "loss": 0.2592, "step": 16968 }, { - "epoch": 0.97, - "grad_norm": 0.937981789113555, - "learning_rate": 3.2897028874731006e-08, - "loss": 0.5085, + "epoch": 0.78, + "grad_norm": 0.36401897641925773, + "learning_rate": 2.4428585874744682e-06, + "loss": 0.302, "step": 16969 }, { - "epoch": 0.98, - "grad_norm": 0.35797398099669914, - "learning_rate": 3.274638044619805e-08, - "loss": 0.2483, + "epoch": 0.78, + "grad_norm": 0.4286795573552825, + "learning_rate": 2.44188423041501e-06, + "loss": 0.269, "step": 16970 }, { - "epoch": 0.98, - "grad_norm": 0.24761901524686186, - "learning_rate": 3.2596077183228993e-08, - "loss": 0.2691, + "epoch": 0.78, + "grad_norm": 0.21988824195135032, + "learning_rate": 2.4409100406849496e-06, + "loss": 0.1497, "step": 16971 }, { - "epoch": 0.98, - "grad_norm": 0.4298641626817817, - "learning_rate": 3.244611909102857e-08, - "loss": 0.1868, + "epoch": 0.78, + "grad_norm": 1.4887848221505096, + "learning_rate": 2.439936018305854e-06, + "loss": 0.7312, "step": 16972 }, { - "epoch": 0.98, - "grad_norm": 0.6401200224266734, - "learning_rate": 3.229650617479152e-08, - "loss": 0.3064, + "epoch": 0.78, + "grad_norm": 0.28257214089767047, + "learning_rate": 2.438962163299289e-06, + "loss": 0.2418, "step": 16973 }, { - "epoch": 0.98, - "grad_norm": 0.3937630678490877, - "learning_rate": 3.2147238439697026e-08, - "loss": 0.3115, + "epoch": 0.78, + "grad_norm": 0.5532610360652411, + "learning_rate": 2.4379884756868167e-06, + "loss": 0.3201, "step": 16974 }, { - "epoch": 0.98, - "grad_norm": 0.31684257741207245, - "learning_rate": 3.19983158909154e-08, - "loss": 0.248, + "epoch": 0.78, + "grad_norm": 0.7413047015911038, + "learning_rate": 2.4370149554899915e-06, + "loss": 0.3019, "step": 16975 }, { - "epoch": 0.98, - "grad_norm": 0.47739044032650685, - "learning_rate": 3.1849738533603625e-08, - "loss": 0.273, + "epoch": 0.78, + "grad_norm": 0.337918537603653, + "learning_rate": 2.43604160273036e-06, + "loss": 0.2579, "step": 16976 }, { - "epoch": 0.98, - "grad_norm": 0.4075103274369792, - "learning_rate": 3.1701506372906476e-08, - "loss": 0.2934, + "epoch": 0.78, + "grad_norm": 0.513233075978404, + "learning_rate": 2.4350684174294824e-06, + "loss": 0.3607, "step": 16977 }, { - "epoch": 0.98, - "grad_norm": 0.3212448915171606, - "learning_rate": 3.155361941395763e-08, - "loss": 0.1268, + "epoch": 0.78, + "grad_norm": 0.2969367169992921, + "learning_rate": 2.434095399608897e-06, + "loss": 0.157, "step": 16978 }, { - "epoch": 0.98, - "grad_norm": 0.32874130733070184, - "learning_rate": 3.140607766187853e-08, - "loss": 0.251, + "epoch": 0.78, + "grad_norm": 0.4527418489354421, + "learning_rate": 2.43312254929015e-06, + "loss": 0.2462, "step": 16979 }, { - "epoch": 0.98, - "grad_norm": 0.39753759795242666, - "learning_rate": 3.125888112177733e-08, - "loss": 0.3094, + "epoch": 0.78, + "grad_norm": 0.5878091041638095, + "learning_rate": 2.432149866494774e-06, + "loss": 0.3401, "step": 16980 }, { - "epoch": 0.98, - "grad_norm": 0.8441004038029325, - "learning_rate": 3.1112029798753274e-08, - "loss": 0.5548, + "epoch": 0.78, + "grad_norm": 0.3554207469994959, + "learning_rate": 2.431177351244305e-06, + "loss": 0.2586, "step": 16981 }, { - "epoch": 0.98, - "grad_norm": 0.5623939560608282, - "learning_rate": 3.096552369789119e-08, - "loss": 0.2477, + "epoch": 0.78, + "grad_norm": 0.4366120763728419, + "learning_rate": 2.4302050035602785e-06, + "loss": 0.2609, "step": 16982 }, { - "epoch": 0.98, - "grad_norm": 0.24903041506543536, - "learning_rate": 3.081936282426368e-08, - "loss": 0.2619, + "epoch": 0.78, + "grad_norm": 0.5805507902654186, + "learning_rate": 2.4292328234642136e-06, + "loss": 0.3985, "step": 16983 }, { - "epoch": 0.98, - "grad_norm": 0.29800042452218295, - "learning_rate": 3.067354718293336e-08, - "loss": 0.1459, + "epoch": 0.78, + "grad_norm": 0.21104397541424086, + "learning_rate": 2.428260810977641e-06, + "loss": 0.1549, "step": 16984 }, { - "epoch": 0.98, - "grad_norm": 0.8593406624322196, - "learning_rate": 3.0528076778949536e-08, - "loss": 0.1092, + "epoch": 0.78, + "grad_norm": 0.4054415599968635, + "learning_rate": 2.427288966122069e-06, + "loss": 0.2559, "step": 16985 }, { - "epoch": 0.98, - "grad_norm": 0.34340164329049183, - "learning_rate": 3.038295161734928e-08, - "loss": 0.2913, + "epoch": 0.78, + "grad_norm": 0.49504710822478665, + "learning_rate": 2.4263172889190278e-06, + "loss": 0.3275, "step": 16986 }, { - "epoch": 0.98, - "grad_norm": 0.32860953048223024, - "learning_rate": 3.023817170315857e-08, - "loss": 0.2968, + "epoch": 0.78, + "grad_norm": 1.5769819907829796, + "learning_rate": 2.4253457793900214e-06, + "loss": 0.4378, "step": 16987 }, { - "epoch": 0.98, - "grad_norm": 0.6455934770032342, - "learning_rate": 3.0093737041392293e-08, - "loss": 0.2445, + "epoch": 0.78, + "grad_norm": 0.3696904977715235, + "learning_rate": 2.424374437556557e-06, + "loss": 0.2147, "step": 16988 }, { - "epoch": 0.98, - "grad_norm": 0.4043626940495485, - "learning_rate": 2.994964763704977e-08, - "loss": 0.2695, + "epoch": 0.78, + "grad_norm": 0.34217947780265673, + "learning_rate": 2.4234032634401404e-06, + "loss": 0.2791, "step": 16989 }, { - "epoch": 0.98, - "grad_norm": 0.22943688435845902, - "learning_rate": 2.980590349512258e-08, - "loss": 0.16, + "epoch": 0.78, + "grad_norm": 0.3810004631549262, + "learning_rate": 2.4224322570622725e-06, + "loss": 0.2031, "step": 16990 }, { - "epoch": 0.98, - "grad_norm": 0.3037975887387826, - "learning_rate": 2.966250462058895e-08, - "loss": 0.2288, + "epoch": 0.78, + "grad_norm": 0.44729662107919405, + "learning_rate": 2.421461418444455e-06, + "loss": 0.2074, "step": 16991 }, { - "epoch": 0.98, - "grad_norm": 0.3512971318276826, - "learning_rate": 2.9519451018413804e-08, - "loss": 0.2918, + "epoch": 0.78, + "grad_norm": 0.5177051573339955, + "learning_rate": 2.420490747608174e-06, + "loss": 0.3445, "step": 16992 }, { - "epoch": 0.98, - "grad_norm": 0.9975814522598434, - "learning_rate": 2.9376742693550954e-08, - "loss": 0.4212, + "epoch": 0.78, + "grad_norm": 0.40571388332829544, + "learning_rate": 2.4195202445749232e-06, + "loss": 0.3125, "step": 16993 }, { - "epoch": 0.98, - "grad_norm": 0.7888704691744978, - "learning_rate": 2.9234379650943113e-08, - "loss": 0.3391, + "epoch": 0.78, + "grad_norm": 0.3163997904965117, + "learning_rate": 2.41854990936619e-06, + "loss": 0.2104, "step": 16994 }, { - "epoch": 0.98, - "grad_norm": 0.2782399609130223, - "learning_rate": 2.9092361895519673e-08, - "loss": 0.2218, + "epoch": 0.78, + "grad_norm": 1.3112090192314, + "learning_rate": 2.417579742003453e-06, + "loss": 0.7431, "step": 16995 }, { - "epoch": 0.98, - "grad_norm": 0.22388047250656798, - "learning_rate": 2.895068943219892e-08, - "loss": 0.1858, + "epoch": 0.78, + "grad_norm": 0.31658930059476303, + "learning_rate": 2.4166097425081946e-06, + "loss": 0.2283, "step": 16996 }, { - "epoch": 0.98, - "grad_norm": 0.8291268218917041, - "learning_rate": 2.880936226588693e-08, - "loss": 0.3896, + "epoch": 0.78, + "grad_norm": 0.3161142110992237, + "learning_rate": 2.4156399109018846e-06, + "loss": 0.2167, "step": 16997 }, { - "epoch": 0.98, - "grad_norm": 0.3064483112237091, - "learning_rate": 2.866838040147868e-08, - "loss": 0.21, + "epoch": 0.78, + "grad_norm": 0.39037493545955315, + "learning_rate": 2.414670247205997e-06, + "loss": 0.2413, "step": 16998 }, { - "epoch": 0.98, - "grad_norm": 0.3857237505293713, - "learning_rate": 2.8527743843854704e-08, - "loss": 0.309, + "epoch": 0.78, + "grad_norm": 1.3526551900662174, + "learning_rate": 2.413700751442003e-06, + "loss": 0.6451, "step": 16999 }, { - "epoch": 0.98, - "grad_norm": 0.8772816244367653, - "learning_rate": 2.8387452597886666e-08, - "loss": 0.4426, + "epoch": 0.78, + "grad_norm": 0.7714883264961339, + "learning_rate": 2.4127314236313593e-06, + "loss": 0.3926, "step": 17000 }, { - "epoch": 0.98, - "grad_norm": 0.3463263519606244, - "learning_rate": 2.824750666843179e-08, - "loss": 0.1876, + "epoch": 0.78, + "grad_norm": 0.2806740618592558, + "learning_rate": 2.4117622637955316e-06, + "loss": 0.2224, "step": 17001 }, { - "epoch": 0.98, - "grad_norm": 0.26138233646250353, - "learning_rate": 2.810790606033731e-08, - "loss": 0.203, + "epoch": 0.78, + "grad_norm": 0.6288195870800063, + "learning_rate": 2.410793271955968e-06, + "loss": 0.3762, "step": 17002 }, { - "epoch": 0.98, - "grad_norm": 0.5924213734480587, - "learning_rate": 2.7968650778438245e-08, - "loss": 0.2967, + "epoch": 0.78, + "grad_norm": 0.6299350910753708, + "learning_rate": 2.4098244481341327e-06, + "loss": 0.3056, "step": 17003 }, { - "epoch": 0.98, - "grad_norm": 0.3005423451194237, - "learning_rate": 2.7829740827555185e-08, - "loss": 0.2472, + "epoch": 0.78, + "grad_norm": 0.23840795090350542, + "learning_rate": 2.4088557923514688e-06, + "loss": 0.1529, "step": 17004 }, { - "epoch": 0.98, - "grad_norm": 0.6290851599106121, - "learning_rate": 2.769117621249873e-08, - "loss": 0.3087, + "epoch": 0.78, + "grad_norm": 0.39644351455361826, + "learning_rate": 2.4078873046294183e-06, + "loss": 0.2983, "step": 17005 }, { - "epoch": 0.98, - "grad_norm": 0.5028037464387212, - "learning_rate": 2.7552956938068364e-08, - "loss": 0.366, + "epoch": 0.78, + "grad_norm": 0.6133725305313518, + "learning_rate": 2.406918984989426e-06, + "loss": 0.414, "step": 17006 }, { - "epoch": 0.98, - "grad_norm": 0.26322989882195386, - "learning_rate": 2.741508300905138e-08, - "loss": 0.2482, + "epoch": 0.78, + "grad_norm": 0.41183547091232275, + "learning_rate": 2.405950833452928e-06, + "loss": 0.234, "step": 17007 }, { - "epoch": 0.98, - "grad_norm": 1.7885337997270865, - "learning_rate": 2.727755443021951e-08, - "loss": 0.1453, + "epoch": 0.78, + "grad_norm": 0.49390893702471617, + "learning_rate": 2.404982850041363e-06, + "loss": 0.3352, "step": 17008 }, { - "epoch": 0.98, - "grad_norm": 0.3236513169203668, - "learning_rate": 2.7140371206337834e-08, - "loss": 0.2128, + "epoch": 0.78, + "grad_norm": 0.4074755524145822, + "learning_rate": 2.4040150347761535e-06, + "loss": 0.2637, "step": 17009 }, { - "epoch": 0.98, - "grad_norm": 0.410045538047261, - "learning_rate": 2.7003533342156995e-08, - "loss": 0.2497, + "epoch": 0.78, + "grad_norm": 0.31834719830685027, + "learning_rate": 2.403047387678734e-06, + "loss": 0.1829, "step": 17010 }, { - "epoch": 0.98, - "grad_norm": 0.3294855926484948, - "learning_rate": 2.6867040842414316e-08, - "loss": 0.2396, + "epoch": 0.78, + "grad_norm": 0.5380632403560895, + "learning_rate": 2.4020799087705203e-06, + "loss": 0.2548, "step": 17011 }, { - "epoch": 0.98, - "grad_norm": 0.7913529950119497, - "learning_rate": 2.6730893711837124e-08, - "loss": 0.4242, + "epoch": 0.78, + "grad_norm": 0.4004229048679349, + "learning_rate": 2.4011125980729346e-06, + "loss": 0.2837, "step": 17012 }, { - "epoch": 0.98, - "grad_norm": 0.34653414142176686, - "learning_rate": 2.6595091955139428e-08, - "loss": 0.2621, + "epoch": 0.78, + "grad_norm": 0.33190351679353536, + "learning_rate": 2.4001454556073946e-06, + "loss": 0.2598, "step": 17013 }, { - "epoch": 0.98, - "grad_norm": 0.27650331142096424, - "learning_rate": 2.6459635577026355e-08, - "loss": 0.1645, + "epoch": 0.78, + "grad_norm": 0.7329640788829259, + "learning_rate": 2.399178481395307e-06, + "loss": 0.2985, "step": 17014 }, { - "epoch": 0.98, - "grad_norm": 0.34663882196846774, - "learning_rate": 2.6324524582186374e-08, - "loss": 0.2303, + "epoch": 0.78, + "grad_norm": 0.47025874711866006, + "learning_rate": 2.3982116754580808e-06, + "loss": 0.2752, "step": 17015 }, { - "epoch": 0.98, - "grad_norm": 0.34330890321049773, - "learning_rate": 2.6189758975299074e-08, - "loss": 0.2632, + "epoch": 0.78, + "grad_norm": 0.26092192533856434, + "learning_rate": 2.3972450378171254e-06, + "loss": 0.1709, "step": 17016 }, { - "epoch": 0.98, - "grad_norm": 0.6417232033799707, - "learning_rate": 2.6055338761031835e-08, - "loss": 0.4148, + "epoch": 0.78, + "grad_norm": 0.35230653997915773, + "learning_rate": 2.3962785684938338e-06, + "loss": 0.2472, "step": 17017 }, { - "epoch": 0.98, - "grad_norm": 0.3119913260890965, - "learning_rate": 2.592126394403982e-08, - "loss": 0.2295, + "epoch": 0.78, + "grad_norm": 0.8274106759767784, + "learning_rate": 2.3953122675096096e-06, + "loss": 0.4546, "step": 17018 }, { - "epoch": 0.98, - "grad_norm": 0.37198388859885884, - "learning_rate": 2.5787534528964875e-08, - "loss": 0.2445, + "epoch": 0.78, + "grad_norm": 0.3851232653091529, + "learning_rate": 2.3943461348858367e-06, + "loss": 0.2866, "step": 17019 }, { - "epoch": 0.98, - "grad_norm": 1.3027242854603744, - "learning_rate": 2.5654150520438848e-08, - "loss": 0.3769, + "epoch": 0.78, + "grad_norm": 0.3554613360869296, + "learning_rate": 2.3933801706439154e-06, + "loss": 0.2572, "step": 17020 }, { - "epoch": 0.98, - "grad_norm": 0.3261165338744473, - "learning_rate": 2.5521111923080266e-08, - "loss": 0.1333, + "epoch": 0.78, + "grad_norm": 1.0635957910207081, + "learning_rate": 2.392414374805222e-06, + "loss": 0.417, "step": 17021 }, { - "epoch": 0.98, - "grad_norm": 0.3275988014909243, - "learning_rate": 2.5388418741497668e-08, - "loss": 0.2532, + "epoch": 0.78, + "grad_norm": 0.30596608925244795, + "learning_rate": 2.3914487473911463e-06, + "loss": 0.2363, "step": 17022 }, { - "epoch": 0.98, - "grad_norm": 0.3388604127745746, - "learning_rate": 2.5256070980284042e-08, - "loss": 0.2924, + "epoch": 0.78, + "grad_norm": 0.24436471871516222, + "learning_rate": 2.3904832884230576e-06, + "loss": 0.1132, "step": 17023 }, { - "epoch": 0.98, - "grad_norm": 1.4462487671630624, - "learning_rate": 2.5124068644024613e-08, - "loss": 0.2823, + "epoch": 0.78, + "grad_norm": 0.3591843841107055, + "learning_rate": 2.389517997922336e-06, + "loss": 0.259, "step": 17024 }, { - "epoch": 0.98, - "grad_norm": 0.3366263902770327, - "learning_rate": 2.4992411737289057e-08, - "loss": 0.2503, + "epoch": 0.78, + "grad_norm": 0.33134611909798173, + "learning_rate": 2.388552875910354e-06, + "loss": 0.2682, "step": 17025 }, { - "epoch": 0.98, - "grad_norm": 1.4836462318839685, - "learning_rate": 2.4861100264638172e-08, - "loss": 0.6723, + "epoch": 0.78, + "grad_norm": 0.6710787006465406, + "learning_rate": 2.3875879224084717e-06, + "loss": 0.3887, "step": 17026 }, { - "epoch": 0.98, - "grad_norm": 0.27144366896564376, - "learning_rate": 2.473013423061832e-08, - "loss": 0.221, + "epoch": 0.78, + "grad_norm": 0.6284728320169134, + "learning_rate": 2.386623137438059e-06, + "loss": 0.132, "step": 17027 }, { - "epoch": 0.98, - "grad_norm": 0.31117856512925224, - "learning_rate": 2.459951363976476e-08, - "loss": 0.2513, + "epoch": 0.78, + "grad_norm": 0.3248587771771781, + "learning_rate": 2.3856585210204695e-06, + "loss": 0.2409, "step": 17028 }, { - "epoch": 0.98, - "grad_norm": 0.38810367971759135, - "learning_rate": 2.4469238496600546e-08, - "loss": 0.2505, + "epoch": 0.78, + "grad_norm": 0.2935524378890023, + "learning_rate": 2.3846940731770606e-06, + "loss": 0.2417, "step": 17029 }, { - "epoch": 0.98, - "grad_norm": 0.33623722779449305, - "learning_rate": 2.433930880563762e-08, - "loss": 0.2981, + "epoch": 0.78, + "grad_norm": 0.5715982008755994, + "learning_rate": 2.3837297939291893e-06, + "loss": 0.2246, "step": 17030 }, { - "epoch": 0.98, - "grad_norm": 0.2907484340689656, - "learning_rate": 2.4209724571376826e-08, - "loss": 0.1823, + "epoch": 0.78, + "grad_norm": 0.40105583114530163, + "learning_rate": 2.382765683298196e-06, + "loss": 0.3011, "step": 17031 }, { - "epoch": 0.98, - "grad_norm": 1.158270296591638, - "learning_rate": 2.4080485798302355e-08, - "loss": 0.4486, + "epoch": 0.78, + "grad_norm": 0.46717818540006123, + "learning_rate": 2.3818017413054296e-06, + "loss": 0.3235, "step": 17032 }, { - "epoch": 0.98, - "grad_norm": 0.5911414484895722, - "learning_rate": 2.395159249089285e-08, - "loss": 0.3196, + "epoch": 0.78, + "grad_norm": 0.45870153525228036, + "learning_rate": 2.380837967972233e-06, + "loss": 0.1911, "step": 17033 }, { - "epoch": 0.98, - "grad_norm": 0.2877702121234691, - "learning_rate": 2.3823044653610295e-08, - "loss": 0.1904, + "epoch": 0.78, + "grad_norm": 0.44201459612917576, + "learning_rate": 2.3798743633199363e-06, + "loss": 0.3167, "step": 17034 }, { - "epoch": 0.98, - "grad_norm": 0.2822114281047928, - "learning_rate": 2.3694842290907792e-08, - "loss": 0.2648, + "epoch": 0.78, + "grad_norm": 0.3318049435041043, + "learning_rate": 2.378910927369881e-06, + "loss": 0.2125, "step": 17035 }, { - "epoch": 0.98, - "grad_norm": 0.4802254996038075, - "learning_rate": 2.356698540722291e-08, - "loss": 0.2575, + "epoch": 0.78, + "grad_norm": 0.3001763653230405, + "learning_rate": 2.377947660143386e-06, + "loss": 0.223, "step": 17036 }, { - "epoch": 0.98, - "grad_norm": 0.2828728976084193, - "learning_rate": 2.343947400698432e-08, - "loss": 0.1845, + "epoch": 0.78, + "grad_norm": 0.3325359961912638, + "learning_rate": 2.3769845616617895e-06, + "loss": 0.2649, "step": 17037 }, { - "epoch": 0.98, - "grad_norm": 0.3516289049644702, - "learning_rate": 2.3312308094607382e-08, - "loss": 0.2959, + "epoch": 0.78, + "grad_norm": 0.9273895352271897, + "learning_rate": 2.3760216319464047e-06, + "loss": 0.403, "step": 17038 }, { - "epoch": 0.98, - "grad_norm": 0.7073007426790366, - "learning_rate": 2.3185487674497463e-08, - "loss": 0.3777, + "epoch": 0.78, + "grad_norm": 1.710105467800304, + "learning_rate": 2.375058871018555e-06, + "loss": 0.7954, "step": 17039 }, { - "epoch": 0.98, - "grad_norm": 0.3036204352872146, - "learning_rate": 2.3059012751044386e-08, - "loss": 0.2045, + "epoch": 0.78, + "grad_norm": 0.2882441236405574, + "learning_rate": 2.3740962788995512e-06, + "loss": 0.2094, "step": 17040 }, { - "epoch": 0.98, - "grad_norm": 1.0686989334600154, - "learning_rate": 2.2932883328629087e-08, - "loss": 0.7031, + "epoch": 0.78, + "grad_norm": 0.36850261192660166, + "learning_rate": 2.373133855610705e-06, + "loss": 0.2679, "step": 17041 }, { - "epoch": 0.98, - "grad_norm": 0.26671675957112734, - "learning_rate": 2.280709941161807e-08, - "loss": 0.2414, + "epoch": 0.78, + "grad_norm": 0.7261630554160161, + "learning_rate": 2.3721716011733285e-06, + "loss": 0.3997, "step": 17042 }, { - "epoch": 0.98, - "grad_norm": 0.3138331144274908, - "learning_rate": 2.268166100436897e-08, - "loss": 0.2286, + "epoch": 0.78, + "grad_norm": 0.36082796126550604, + "learning_rate": 2.371209515608718e-06, + "loss": 0.2283, "step": 17043 }, { - "epoch": 0.98, - "grad_norm": 0.5218556651137621, - "learning_rate": 2.2556568111223866e-08, - "loss": 0.2401, + "epoch": 0.78, + "grad_norm": 0.3623755025998804, + "learning_rate": 2.3702475989381778e-06, + "loss": 0.2353, "step": 17044 }, { - "epoch": 0.98, - "grad_norm": 0.7268865897870525, - "learning_rate": 2.2431820736517062e-08, - "loss": 0.3442, + "epoch": 0.78, + "grad_norm": 0.5173006028378963, + "learning_rate": 2.3692858511829997e-06, + "loss": 0.2737, "step": 17045 }, { - "epoch": 0.98, - "grad_norm": 0.34937516121279033, - "learning_rate": 2.2307418884566225e-08, - "loss": 0.2662, + "epoch": 0.78, + "grad_norm": 0.38109127130423665, + "learning_rate": 2.3683242723644785e-06, + "loss": 0.2041, "step": 17046 }, { - "epoch": 0.98, - "grad_norm": 0.32484541126394684, - "learning_rate": 2.2183362559681232e-08, - "loss": 0.2435, + "epoch": 0.78, + "grad_norm": 0.4203378925697086, + "learning_rate": 2.3673628625039047e-06, + "loss": 0.2558, "step": 17047 }, { - "epoch": 0.98, - "grad_norm": 0.2538310862070762, - "learning_rate": 2.205965176615643e-08, - "loss": 0.1857, + "epoch": 0.78, + "grad_norm": 0.3243558761593813, + "learning_rate": 2.3664016216225584e-06, + "loss": 0.2958, "step": 17048 }, { - "epoch": 0.98, - "grad_norm": 0.36626610756991873, - "learning_rate": 2.1936286508278393e-08, - "loss": 0.234, + "epoch": 0.78, + "grad_norm": 0.37404677451811263, + "learning_rate": 2.365440549741722e-06, + "loss": 0.2247, "step": 17049 }, { - "epoch": 0.98, - "grad_norm": 0.31560292290674424, - "learning_rate": 2.1813266790315922e-08, - "loss": 0.2447, + "epoch": 0.78, + "grad_norm": 0.5301435387152867, + "learning_rate": 2.364479646882675e-06, + "loss": 0.2637, "step": 17050 }, { - "epoch": 0.98, - "grad_norm": 0.8116602561966607, - "learning_rate": 2.169059261653228e-08, - "loss": 0.3647, + "epoch": 0.78, + "grad_norm": 0.8986516359301125, + "learning_rate": 2.3635189130666914e-06, + "loss": 0.2728, "step": 17051 }, { - "epoch": 0.98, - "grad_norm": 0.646895185331955, - "learning_rate": 2.1568263991174065e-08, - "loss": 0.2433, + "epoch": 0.78, + "grad_norm": 0.3941178563055379, + "learning_rate": 2.3625583483150384e-06, + "loss": 0.2673, "step": 17052 }, { - "epoch": 0.98, - "grad_norm": 0.3426135098172122, - "learning_rate": 2.144628091847678e-08, - "loss": 0.2603, + "epoch": 0.78, + "grad_norm": 0.359800135648748, + "learning_rate": 2.3615979526489773e-06, + "loss": 0.2573, "step": 17053 }, { - "epoch": 0.98, - "grad_norm": 0.2182673378928602, - "learning_rate": 2.1324643402667045e-08, - "loss": 0.2074, + "epoch": 0.78, + "grad_norm": 0.7914490253622418, + "learning_rate": 2.360637726089782e-06, + "loss": 0.4397, "step": 17054 }, { - "epoch": 0.98, - "grad_norm": 0.4041287114922914, - "learning_rate": 2.1203351447954824e-08, - "loss": 0.2777, + "epoch": 0.78, + "grad_norm": 0.35233103321829007, + "learning_rate": 2.359677668658701e-06, + "loss": 0.2657, "step": 17055 }, { - "epoch": 0.98, - "grad_norm": 0.4575676863248586, - "learning_rate": 2.10824050585412e-08, - "loss": 0.2954, + "epoch": 0.78, + "grad_norm": 0.21773616778191962, + "learning_rate": 2.3587177803769945e-06, + "loss": 0.1759, "step": 17056 }, { - "epoch": 0.98, - "grad_norm": 1.0410511963285844, - "learning_rate": 2.0961804238616156e-08, - "loss": 0.2411, + "epoch": 0.78, + "grad_norm": 1.1736885065220493, + "learning_rate": 2.3577580612659102e-06, + "loss": 0.4617, "step": 17057 }, { - "epoch": 0.98, - "grad_norm": 0.29720838275806993, - "learning_rate": 2.0841548992354132e-08, - "loss": 0.2345, + "epoch": 0.78, + "grad_norm": 0.35292321350770617, + "learning_rate": 2.3567985113466963e-06, + "loss": 0.2475, "step": 17058 }, { - "epoch": 0.98, - "grad_norm": 0.37419418678071553, - "learning_rate": 2.0721639323919573e-08, - "loss": 0.3246, + "epoch": 0.78, + "grad_norm": 0.7317400272212284, + "learning_rate": 2.3558391306405994e-06, + "loss": 0.2975, "step": 17059 }, { - "epoch": 0.98, - "grad_norm": 0.16552952889999026, - "learning_rate": 2.0602075237465825e-08, - "loss": 0.0844, + "epoch": 0.78, + "grad_norm": 0.3522989578142284, + "learning_rate": 2.354879919168854e-06, + "loss": 0.2966, "step": 17060 }, { - "epoch": 0.98, - "grad_norm": 0.41038045330779327, - "learning_rate": 2.0482856737132906e-08, - "loss": 0.2749, + "epoch": 0.78, + "grad_norm": 0.34447940721456605, + "learning_rate": 2.353920876952701e-06, + "loss": 0.2366, "step": 17061 }, { - "epoch": 0.98, - "grad_norm": 0.29247558336905394, - "learning_rate": 2.0363983827049737e-08, - "loss": 0.2884, + "epoch": 0.78, + "grad_norm": 0.43918982122979916, + "learning_rate": 2.3529620040133683e-06, + "loss": 0.2487, "step": 17062 }, { - "epoch": 0.98, - "grad_norm": 1.2705941693360938, - "learning_rate": 2.0245456511333028e-08, - "loss": 0.2699, + "epoch": 0.78, + "grad_norm": 0.36047363796906834, + "learning_rate": 2.3520033003720865e-06, + "loss": 0.1767, "step": 17063 }, { - "epoch": 0.98, - "grad_norm": 0.3362945356885263, - "learning_rate": 2.012727479408616e-08, - "loss": 0.2474, + "epoch": 0.78, + "grad_norm": 0.29012177370671866, + "learning_rate": 2.3510447660500825e-06, + "loss": 0.2535, "step": 17064 }, { - "epoch": 0.98, - "grad_norm": 1.231997007937773, - "learning_rate": 2.0009438679403636e-08, - "loss": 0.6624, + "epoch": 0.78, + "grad_norm": 0.8468750588177727, + "learning_rate": 2.350086401068573e-06, + "loss": 0.4055, "step": 17065 }, { - "epoch": 0.98, - "grad_norm": 0.329125988206052, - "learning_rate": 1.9891948171364417e-08, - "loss": 0.2482, + "epoch": 0.78, + "grad_norm": 0.6602919427195721, + "learning_rate": 2.3491282054487773e-06, + "loss": 0.2758, "step": 17066 }, { - "epoch": 0.98, - "grad_norm": 0.4091946800044302, - "learning_rate": 1.9774803274038578e-08, - "loss": 0.2754, + "epoch": 0.78, + "grad_norm": 0.3703074420279087, + "learning_rate": 2.348170179211909e-06, + "loss": 0.2743, "step": 17067 }, { - "epoch": 0.98, - "grad_norm": 0.35837796725133636, - "learning_rate": 1.9658003991480656e-08, - "loss": 0.258, + "epoch": 0.78, + "grad_norm": 0.2963957637076691, + "learning_rate": 2.347212322379181e-06, + "loss": 0.2201, "step": 17068 }, { - "epoch": 0.98, - "grad_norm": 0.5680477821646825, - "learning_rate": 1.9541550327738524e-08, - "loss": 0.2052, + "epoch": 0.78, + "grad_norm": 0.38544552143432687, + "learning_rate": 2.346254634971796e-06, + "loss": 0.0896, "step": 17069 }, { - "epoch": 0.98, - "grad_norm": 0.2690535902574051, - "learning_rate": 1.942544228684229e-08, - "loss": 0.2, + "epoch": 0.78, + "grad_norm": 0.3573189466180187, + "learning_rate": 2.345297117010954e-06, + "loss": 0.266, "step": 17070 }, { - "epoch": 0.98, - "grad_norm": 0.3929295717834575, - "learning_rate": 1.930967987281429e-08, - "loss": 0.3381, + "epoch": 0.78, + "grad_norm": 0.7373158887140219, + "learning_rate": 2.344339768517857e-06, + "loss": 0.34, "step": 17071 }, { - "epoch": 0.98, - "grad_norm": 0.8100108431977137, - "learning_rate": 1.9194263089662435e-08, - "loss": 0.4147, + "epoch": 0.78, + "grad_norm": 0.36826743272041745, + "learning_rate": 2.3433825895136977e-06, + "loss": 0.2539, "step": 17072 }, { - "epoch": 0.98, - "grad_norm": 0.4949306445932574, - "learning_rate": 1.9079191941384635e-08, - "loss": 0.2105, + "epoch": 0.78, + "grad_norm": 0.3617206081674422, + "learning_rate": 2.3424255800196718e-06, + "loss": 0.2567, "step": 17073 }, { - "epoch": 0.98, - "grad_norm": 0.2799353141961879, - "learning_rate": 1.8964466431964367e-08, - "loss": 0.2766, + "epoch": 0.78, + "grad_norm": 0.31262599127317553, + "learning_rate": 2.34146874005696e-06, + "loss": 0.1804, "step": 17074 }, { - "epoch": 0.98, - "grad_norm": 0.25292916616971384, - "learning_rate": 1.8850086565376236e-08, - "loss": 0.1642, + "epoch": 0.78, + "grad_norm": 1.2874694852176902, + "learning_rate": 2.3405120696467485e-06, + "loss": 0.4419, "step": 17075 }, { - "epoch": 0.98, - "grad_norm": 0.43814973227571347, - "learning_rate": 1.8736052345580403e-08, - "loss": 0.188, + "epoch": 0.78, + "grad_norm": 0.2743640354141252, + "learning_rate": 2.339555568810221e-06, + "loss": 0.231, "step": 17076 }, { - "epoch": 0.98, - "grad_norm": 0.5327994711284099, - "learning_rate": 1.862236377652593e-08, - "loss": 0.3839, + "epoch": 0.78, + "grad_norm": 0.6318625079890814, + "learning_rate": 2.338599237568547e-06, + "loss": 0.365, "step": 17077 }, { - "epoch": 0.98, - "grad_norm": 0.35915994320120836, - "learning_rate": 1.8509020862149673e-08, - "loss": 0.3232, + "epoch": 0.78, + "grad_norm": 1.562500845358626, + "learning_rate": 2.3376430759429047e-06, + "loss": 0.7569, "step": 17078 }, { - "epoch": 0.98, - "grad_norm": 0.4653115137961894, - "learning_rate": 1.8396023606376268e-08, - "loss": 0.3291, + "epoch": 0.78, + "grad_norm": 0.29404639353693074, + "learning_rate": 2.3366870839544565e-06, + "loss": 0.1938, "step": 17079 }, { - "epoch": 0.98, - "grad_norm": 0.29836713476307714, - "learning_rate": 1.828337201311925e-08, - "loss": 0.1782, + "epoch": 0.78, + "grad_norm": 0.3716199289723143, + "learning_rate": 2.3357312616243697e-06, + "loss": 0.292, "step": 17080 }, { - "epoch": 0.98, - "grad_norm": 0.5164581462048005, - "learning_rate": 1.817106608628105e-08, - "loss": 0.2172, + "epoch": 0.78, + "grad_norm": 0.35648859292470936, + "learning_rate": 2.3347756089738093e-06, + "loss": 0.1918, "step": 17081 }, { - "epoch": 0.98, - "grad_norm": 0.28972253741358867, - "learning_rate": 1.805910582974857e-08, - "loss": 0.2673, + "epoch": 0.78, + "grad_norm": 0.3059382487577487, + "learning_rate": 2.333820126023927e-06, + "loss": 0.2174, "step": 17082 }, { - "epoch": 0.98, - "grad_norm": 0.3477015924607169, - "learning_rate": 1.7947491247399808e-08, - "loss": 0.2427, + "epoch": 0.78, + "grad_norm": 0.9372930811664, + "learning_rate": 2.3328648127958776e-06, + "loss": 0.4395, "step": 17083 }, { - "epoch": 0.98, - "grad_norm": 0.577825325414684, - "learning_rate": 1.783622234310056e-08, - "loss": 0.3721, + "epoch": 0.78, + "grad_norm": 0.3742317252521186, + "learning_rate": 2.331909669310811e-06, + "loss": 0.3189, "step": 17084 }, { - "epoch": 0.98, - "grad_norm": 0.41418340683900945, - "learning_rate": 1.772529912070442e-08, - "loss": 0.2571, + "epoch": 0.78, + "grad_norm": 0.3606563030370136, + "learning_rate": 2.3309546955898774e-06, + "loss": 0.1731, "step": 17085 }, { - "epoch": 0.98, - "grad_norm": 0.2782321577020592, - "learning_rate": 1.7614721584051643e-08, - "loss": 0.2388, + "epoch": 0.78, + "grad_norm": 0.3721473241197668, + "learning_rate": 2.329999891654212e-06, + "loss": 0.2489, "step": 17086 }, { - "epoch": 0.98, - "grad_norm": 0.3467169897838902, - "learning_rate": 1.7504489736971385e-08, - "loss": 0.1657, + "epoch": 0.78, + "grad_norm": 0.37280523250104886, + "learning_rate": 2.32904525752496e-06, + "loss": 0.2686, "step": 17087 }, { - "epoch": 0.98, - "grad_norm": 0.7927895004364096, - "learning_rate": 1.73946035832806e-08, - "loss": 0.5113, + "epoch": 0.79, + "grad_norm": 0.4302570620411634, + "learning_rate": 2.328090793223249e-06, + "loss": 0.2604, "step": 17088 }, { - "epoch": 0.98, - "grad_norm": 0.29960578060265763, - "learning_rate": 1.728506312678624e-08, - "loss": 0.2258, + "epoch": 0.79, + "grad_norm": 0.41250084686869154, + "learning_rate": 2.327136498770214e-06, + "loss": 0.2468, "step": 17089 }, { - "epoch": 0.98, - "grad_norm": 0.32245881570389173, - "learning_rate": 1.7175868371281936e-08, - "loss": 0.3108, + "epoch": 0.79, + "grad_norm": 1.2509651302445617, + "learning_rate": 2.326182374186984e-06, + "loss": 0.6774, "step": 17090 }, { - "epoch": 0.98, - "grad_norm": 0.951002243046634, - "learning_rate": 1.7067019320546886e-08, - "loss": 0.3718, + "epoch": 0.79, + "grad_norm": 0.4417184673691241, + "learning_rate": 2.3252284194946783e-06, + "loss": 0.2502, "step": 17091 }, { - "epoch": 0.98, - "grad_norm": 0.38421464500733016, - "learning_rate": 1.6958515978351407e-08, - "loss": 0.2816, + "epoch": 0.79, + "grad_norm": 0.3143739606602177, + "learning_rate": 2.3242746347144173e-06, + "loss": 0.2231, "step": 17092 }, { - "epoch": 0.98, - "grad_norm": 0.15888882951589792, - "learning_rate": 1.6850358348453612e-08, - "loss": 0.0663, + "epoch": 0.79, + "grad_norm": 0.6476153938642922, + "learning_rate": 2.323321019867322e-06, + "loss": 0.2388, "step": 17093 }, { - "epoch": 0.98, - "grad_norm": 0.3003963983604446, - "learning_rate": 1.6742546434598272e-08, - "loss": 0.2845, + "epoch": 0.79, + "grad_norm": 0.44938530916048725, + "learning_rate": 2.322367574974497e-06, + "loss": 0.2779, "step": 17094 }, { - "epoch": 0.98, - "grad_norm": 0.36253705809280756, - "learning_rate": 1.6635080240520186e-08, - "loss": 0.2953, + "epoch": 0.79, + "grad_norm": 0.2974743383959936, + "learning_rate": 2.3214143000570567e-06, + "loss": 0.1467, "step": 17095 }, { - "epoch": 0.98, - "grad_norm": 0.6824644163387067, - "learning_rate": 1.6527959769939705e-08, - "loss": 0.2952, + "epoch": 0.79, + "grad_norm": 0.3475706667457637, + "learning_rate": 2.3204611951360966e-06, + "loss": 0.3164, "step": 17096 }, { - "epoch": 0.98, - "grad_norm": 0.6366164928559752, - "learning_rate": 1.6421185026566088e-08, - "loss": 0.2909, + "epoch": 0.79, + "grad_norm": 0.34567835659424406, + "learning_rate": 2.319508260232731e-06, + "loss": 0.2469, "step": 17097 }, { - "epoch": 0.98, - "grad_norm": 0.24641835079060603, - "learning_rate": 1.631475601409749e-08, - "loss": 0.2552, + "epoch": 0.79, + "grad_norm": 1.515879941080919, + "learning_rate": 2.31855549536805e-06, + "loss": 0.2257, "step": 17098 }, { - "epoch": 0.98, - "grad_norm": 0.31674059694920764, - "learning_rate": 1.6208672736219843e-08, - "loss": 0.0891, + "epoch": 0.79, + "grad_norm": 0.4459190336607469, + "learning_rate": 2.317602900563143e-06, + "loss": 0.3252, "step": 17099 }, { - "epoch": 0.98, - "grad_norm": 0.5181426442377051, - "learning_rate": 1.6102935196606883e-08, - "loss": 0.3263, + "epoch": 0.79, + "grad_norm": 0.2824478728728605, + "learning_rate": 2.3166504758391075e-06, + "loss": 0.2558, "step": 17100 }, { - "epoch": 0.98, - "grad_norm": 0.3799271362409939, - "learning_rate": 1.5997543398919013e-08, - "loss": 0.2932, + "epoch": 0.79, + "grad_norm": 0.3180683048293316, + "learning_rate": 2.3156982212170187e-06, + "loss": 0.1897, "step": 17101 }, { - "epoch": 0.98, - "grad_norm": 0.32179637117374854, - "learning_rate": 1.5892497346807754e-08, - "loss": 0.271, + "epoch": 0.79, + "grad_norm": 1.0425945800789964, + "learning_rate": 2.3147461367179702e-06, + "loss": 0.5229, "step": 17102 }, { - "epoch": 0.98, - "grad_norm": 0.7109955981462678, - "learning_rate": 1.57877970439102e-08, - "loss": 0.3308, + "epoch": 0.79, + "grad_norm": 0.71084973938481, + "learning_rate": 2.3137942223630326e-06, + "loss": 0.3683, "step": 17103 }, { - "epoch": 0.98, - "grad_norm": 0.3955832021279465, - "learning_rate": 1.568344249385123e-08, - "loss": 0.2954, + "epoch": 0.79, + "grad_norm": 0.2726141481229837, + "learning_rate": 2.3128424781732863e-06, + "loss": 0.2569, "step": 17104 }, { - "epoch": 0.98, - "grad_norm": 0.28272887435926286, - "learning_rate": 1.5579433700245727e-08, - "loss": 0.1893, + "epoch": 0.79, + "grad_norm": 0.63819085184092, + "learning_rate": 2.3118909041697957e-06, + "loss": 0.2915, "step": 17105 }, { - "epoch": 0.98, - "grad_norm": 0.3781045982315596, - "learning_rate": 1.5475770666694145e-08, - "loss": 0.2298, + "epoch": 0.79, + "grad_norm": 0.37101634914380033, + "learning_rate": 2.31093950037363e-06, + "loss": 0.1975, "step": 17106 }, { - "epoch": 0.98, - "grad_norm": 0.3939611312355522, - "learning_rate": 1.5372453396788057e-08, - "loss": 0.284, + "epoch": 0.79, + "grad_norm": 0.31801777304923756, + "learning_rate": 2.309988266805856e-06, + "loss": 0.2378, "step": 17107 }, { - "epoch": 0.98, - "grad_norm": 0.7118015447019852, - "learning_rate": 1.52694818941046e-08, - "loss": 0.439, + "epoch": 0.79, + "grad_norm": 0.38403987422240415, + "learning_rate": 2.3090372034875274e-06, + "loss": 0.2779, "step": 17108 }, { - "epoch": 0.98, - "grad_norm": 0.4439561205053077, - "learning_rate": 1.516685616220981e-08, - "loss": 0.1962, + "epoch": 0.79, + "grad_norm": 0.5013916052041003, + "learning_rate": 2.308086310439702e-06, + "loss": 0.3036, "step": 17109 }, { - "epoch": 0.98, - "grad_norm": 0.28543838310868797, - "learning_rate": 1.506457620465751e-08, - "loss": 0.2636, + "epoch": 0.79, + "grad_norm": 0.5187925738845816, + "learning_rate": 2.3071355876834357e-06, + "loss": 0.2967, "step": 17110 }, { - "epoch": 0.98, - "grad_norm": 0.32304533937000707, - "learning_rate": 1.4962642024989316e-08, - "loss": 0.1582, + "epoch": 0.79, + "grad_norm": 0.45077246538650934, + "learning_rate": 2.3061850352397697e-06, + "loss": 0.2518, "step": 17111 }, { - "epoch": 0.98, - "grad_norm": 0.556431431252915, - "learning_rate": 1.4861053626734623e-08, - "loss": 0.2584, + "epoch": 0.79, + "grad_norm": 0.33520045133619686, + "learning_rate": 2.3052346531297542e-06, + "loss": 0.2577, "step": 17112 }, { - "epoch": 0.98, - "grad_norm": 0.3452856886959734, - "learning_rate": 1.475981101341284e-08, - "loss": 0.2698, + "epoch": 0.79, + "grad_norm": 0.2776784349295418, + "learning_rate": 2.3042844413744223e-06, + "loss": 0.1939, "step": 17113 }, { - "epoch": 0.98, - "grad_norm": 0.34444279352265655, - "learning_rate": 1.4658914188530049e-08, - "loss": 0.3137, + "epoch": 0.79, + "grad_norm": 1.370734979080085, + "learning_rate": 2.303334399994821e-06, + "loss": 0.7497, "step": 17114 }, { - "epoch": 0.98, - "grad_norm": 1.0069469226681833, - "learning_rate": 1.4558363155579013e-08, - "loss": 0.1646, + "epoch": 0.79, + "grad_norm": 0.40821383346713336, + "learning_rate": 2.302384529011975e-06, + "loss": 0.1967, "step": 17115 }, { - "epoch": 0.98, - "grad_norm": 0.3590631076036183, - "learning_rate": 1.4458157918042503e-08, - "loss": 0.2807, + "epoch": 0.79, + "grad_norm": 0.30902475061405105, + "learning_rate": 2.301434828446919e-06, + "loss": 0.2799, "step": 17116 }, { - "epoch": 0.98, - "grad_norm": 0.3076290801115484, - "learning_rate": 1.4358298479391076e-08, - "loss": 0.2447, + "epoch": 0.79, + "grad_norm": 0.70340382644846, + "learning_rate": 2.300485298320676e-06, + "loss": 0.3983, "step": 17117 }, { - "epoch": 0.98, - "grad_norm": 0.4345708272045855, - "learning_rate": 1.4258784843081963e-08, - "loss": 0.3126, + "epoch": 0.79, + "grad_norm": 0.36015960717053686, + "learning_rate": 2.2995359386542625e-06, + "loss": 0.1989, "step": 17118 }, { - "epoch": 0.98, - "grad_norm": 0.35796512209441345, - "learning_rate": 1.415961701256241e-08, - "loss": 0.2191, + "epoch": 0.79, + "grad_norm": 0.26000985536088944, + "learning_rate": 2.2985867494687065e-06, + "loss": 0.1571, "step": 17119 }, { - "epoch": 0.98, - "grad_norm": 0.43834411557560704, - "learning_rate": 1.4060794991265226e-08, - "loss": 0.3003, + "epoch": 0.79, + "grad_norm": 0.3848284667719156, + "learning_rate": 2.297637730785015e-06, + "loss": 0.3138, "step": 17120 }, { - "epoch": 0.98, - "grad_norm": 0.3023620756441359, - "learning_rate": 1.3962318782613226e-08, - "loss": 0.267, + "epoch": 0.79, + "grad_norm": 0.667179532048464, + "learning_rate": 2.296688882624203e-06, + "loss": 0.1717, "step": 17121 }, { - "epoch": 0.98, - "grad_norm": 0.33571420647735895, - "learning_rate": 1.386418839001702e-08, - "loss": 0.1946, + "epoch": 0.79, + "grad_norm": 0.4079319134771883, + "learning_rate": 2.2957402050072717e-06, + "loss": 0.2785, "step": 17122 }, { - "epoch": 0.98, - "grad_norm": 0.7962831409761743, - "learning_rate": 1.3766403816873886e-08, - "loss": 0.3344, + "epoch": 0.79, + "grad_norm": 0.3917199888762605, + "learning_rate": 2.2947916979552265e-06, + "loss": 0.2922, "step": 17123 }, { - "epoch": 0.98, - "grad_norm": 0.40328605944188584, - "learning_rate": 1.3668965066571115e-08, - "loss": 0.2527, + "epoch": 0.79, + "grad_norm": 0.5775566247225241, + "learning_rate": 2.2938433614890696e-06, + "loss": 0.0951, "step": 17124 }, { - "epoch": 0.98, - "grad_norm": 0.27251997461785155, - "learning_rate": 1.3571872142483789e-08, - "loss": 0.1927, + "epoch": 0.79, + "grad_norm": 0.3078485501239748, + "learning_rate": 2.2928951956297907e-06, + "loss": 0.2334, "step": 17125 }, { - "epoch": 0.98, - "grad_norm": 0.27747022869745464, - "learning_rate": 1.3475125047971438e-08, - "loss": 0.2665, + "epoch": 0.79, + "grad_norm": 0.45069433396455005, + "learning_rate": 2.2919472003983843e-06, + "loss": 0.3179, "step": 17126 }, { - "epoch": 0.98, - "grad_norm": 1.2894017763114165, - "learning_rate": 1.3378723786386938e-08, - "loss": 0.6075, + "epoch": 0.79, + "grad_norm": 0.45394170867305567, + "learning_rate": 2.290999375815841e-06, + "loss": 0.3074, "step": 17127 }, { - "epoch": 0.98, - "grad_norm": 0.3370384947172715, - "learning_rate": 1.3282668361067619e-08, - "loss": 0.185, + "epoch": 0.79, + "grad_norm": 0.30916602513827823, + "learning_rate": 2.2900517219031383e-06, + "loss": 0.234, "step": 17128 }, { - "epoch": 0.98, - "grad_norm": 0.33942967898729376, - "learning_rate": 1.3186958775339709e-08, - "loss": 0.3023, + "epoch": 0.79, + "grad_norm": 1.7794223862372456, + "learning_rate": 2.289104238681261e-06, + "loss": 0.5018, "step": 17129 }, { - "epoch": 0.98, - "grad_norm": 0.6884605520324051, - "learning_rate": 1.3091595032518333e-08, - "loss": 0.3898, + "epoch": 0.79, + "grad_norm": 0.738548153583518, + "learning_rate": 2.288156926171182e-06, + "loss": 0.3116, "step": 17130 }, { - "epoch": 0.98, - "grad_norm": 0.32177530158067874, - "learning_rate": 1.2996577135906408e-08, - "loss": 0.2731, + "epoch": 0.79, + "grad_norm": 0.22264537689307137, + "learning_rate": 2.287209784393877e-06, + "loss": 0.2071, "step": 17131 }, { - "epoch": 0.98, - "grad_norm": 0.22547779418670175, - "learning_rate": 1.290190508879241e-08, - "loss": 0.1392, + "epoch": 0.79, + "grad_norm": 1.1944300992778474, + "learning_rate": 2.2862628133703123e-06, + "loss": 0.6965, "step": 17132 }, { - "epoch": 0.98, - "grad_norm": 0.26112428619009537, - "learning_rate": 1.280757889445594e-08, - "loss": 0.2366, + "epoch": 0.79, + "grad_norm": 0.5490869534171477, + "learning_rate": 2.285316013121458e-06, + "loss": 0.3031, "step": 17133 }, { - "epoch": 0.98, - "grad_norm": 0.35491189542978396, - "learning_rate": 1.2713598556164386e-08, - "loss": 0.2418, - "step": 17134 + "epoch": 0.79, + "grad_norm": 0.3225210198201399, + "learning_rate": 2.2843693836682714e-06, + "loss": 0.191, + "step": 17134 }, { - "epoch": 0.98, - "grad_norm": 0.6705417107991852, - "learning_rate": 1.2619964077170699e-08, - "loss": 0.2883, + "epoch": 0.79, + "grad_norm": 0.39377404124638016, + "learning_rate": 2.283422925031704e-06, + "loss": 0.3232, "step": 17135 }, { - "epoch": 0.98, - "grad_norm": 0.7273092453633878, - "learning_rate": 1.252667546071784e-08, - "loss": 0.3569, + "epoch": 0.79, + "grad_norm": 0.7841319356189883, + "learning_rate": 2.2824766372327223e-06, + "loss": 0.3097, "step": 17136 }, { - "epoch": 0.98, - "grad_norm": 0.3310531820505807, - "learning_rate": 1.2433732710037671e-08, - "loss": 0.2706, + "epoch": 0.79, + "grad_norm": 0.29212967386276545, + "learning_rate": 2.2815305202922664e-06, + "loss": 0.1793, "step": 17137 }, { - "epoch": 0.98, - "grad_norm": 0.23516781376709966, - "learning_rate": 1.2341135828347617e-08, - "loss": 0.2121, + "epoch": 0.79, + "grad_norm": 0.6916170384877859, + "learning_rate": 2.2805845742312882e-06, + "loss": 0.4053, "step": 17138 }, { - "epoch": 0.98, - "grad_norm": 0.5182882686249745, - "learning_rate": 1.2248884818854001e-08, - "loss": 0.2102, + "epoch": 0.79, + "grad_norm": 0.3833139120205697, + "learning_rate": 2.279638799070726e-06, + "loss": 0.2807, "step": 17139 }, { - "epoch": 0.98, - "grad_norm": 0.347744576905726, - "learning_rate": 1.2156979684753157e-08, - "loss": 0.2669, + "epoch": 0.79, + "grad_norm": 0.34013155265780776, + "learning_rate": 2.2786931948315182e-06, + "loss": 0.2956, "step": 17140 }, { - "epoch": 0.98, - "grad_norm": 0.30419991686902886, - "learning_rate": 1.2065420429225872e-08, - "loss": 0.2622, + "epoch": 0.79, + "grad_norm": 0.567241433355386, + "learning_rate": 2.2777477615346046e-06, + "loss": 0.1188, "step": 17141 }, { - "epoch": 0.98, - "grad_norm": 1.6698959296242852, - "learning_rate": 1.1974207055444054e-08, - "loss": 0.6203, + "epoch": 0.79, + "grad_norm": 0.9734823823247243, + "learning_rate": 2.2768024992009097e-06, + "loss": 0.366, "step": 17142 }, { - "epoch": 0.98, - "grad_norm": 0.3200165060424897, - "learning_rate": 1.1883339566565178e-08, - "loss": 0.2531, + "epoch": 0.79, + "grad_norm": 0.2865073903836222, + "learning_rate": 2.275857407851364e-06, + "loss": 0.2203, "step": 17143 }, { - "epoch": 0.99, - "grad_norm": 0.411575213284914, - "learning_rate": 1.1792817965736725e-08, - "loss": 0.2983, + "epoch": 0.79, + "grad_norm": 0.4066187143781802, + "learning_rate": 2.274912487506893e-06, + "loss": 0.2723, "step": 17144 }, { - "epoch": 0.99, - "grad_norm": 0.2582650364241364, - "learning_rate": 1.1702642256093965e-08, - "loss": 0.1813, + "epoch": 0.79, + "grad_norm": 0.7497357805028236, + "learning_rate": 2.2739677381884117e-06, + "loss": 0.3645, "step": 17145 }, { - "epoch": 0.99, - "grad_norm": 0.3650810325649037, - "learning_rate": 1.1612812440758847e-08, - "loss": 0.2605, + "epoch": 0.79, + "grad_norm": 0.2760260297470873, + "learning_rate": 2.2730231599168407e-06, + "loss": 0.2156, "step": 17146 }, { - "epoch": 0.99, - "grad_norm": 0.5358995044698758, - "learning_rate": 1.1523328522843324e-08, - "loss": 0.3528, + "epoch": 0.79, + "grad_norm": 0.4401887119945976, + "learning_rate": 2.272078752713087e-06, + "loss": 0.2502, "step": 17147 }, { - "epoch": 0.99, - "grad_norm": 0.9446703893782842, - "learning_rate": 1.1434190505443809e-08, - "loss": 0.384, + "epoch": 0.79, + "grad_norm": 0.8590114232034153, + "learning_rate": 2.2711345165980616e-06, + "loss": 0.4194, "step": 17148 }, { - "epoch": 0.99, - "grad_norm": 0.2741899067962737, - "learning_rate": 1.1345398391650053e-08, - "loss": 0.2475, + "epoch": 0.79, + "grad_norm": 0.3834674172716536, + "learning_rate": 2.2701904515926686e-06, + "loss": 0.2664, "step": 17149 }, { - "epoch": 0.99, - "grad_norm": 0.5004404118735434, - "learning_rate": 1.1256952184535153e-08, - "loss": 0.3734, + "epoch": 0.79, + "grad_norm": 0.6514056367495175, + "learning_rate": 2.2692465577178113e-06, + "loss": 0.2979, "step": 17150 }, { - "epoch": 0.99, - "grad_norm": 0.363040297891496, - "learning_rate": 1.1168851887163323e-08, - "loss": 0.1338, + "epoch": 0.79, + "grad_norm": 0.30747226299518776, + "learning_rate": 2.2683028349943814e-06, + "loss": 0.2493, "step": 17151 }, { - "epoch": 0.99, - "grad_norm": 0.3385171036240212, - "learning_rate": 1.1081097502584349e-08, - "loss": 0.2484, + "epoch": 0.79, + "grad_norm": 0.415937319740506, + "learning_rate": 2.2673592834432755e-06, + "loss": 0.3372, "step": 17152 }, { - "epoch": 0.99, - "grad_norm": 0.4682014674094445, - "learning_rate": 1.0993689033836907e-08, - "loss": 0.3082, + "epoch": 0.79, + "grad_norm": 0.3318510185028204, + "learning_rate": 2.266415903085385e-06, + "loss": 0.1462, "step": 17153 }, { - "epoch": 0.99, - "grad_norm": 0.541802237828164, - "learning_rate": 1.090662648394858e-08, - "loss": 0.2646, + "epoch": 0.79, + "grad_norm": 0.7697280925485774, + "learning_rate": 2.2654726939415895e-06, + "loss": 0.1012, "step": 17154 }, { - "epoch": 0.99, - "grad_norm": 0.4216574372674074, - "learning_rate": 1.0819909855933618e-08, - "loss": 0.2951, + "epoch": 0.79, + "grad_norm": 0.4381525126065141, + "learning_rate": 2.264529656032777e-06, + "loss": 0.3045, "step": 17155 }, { - "epoch": 0.99, - "grad_norm": 0.5286671846393574, - "learning_rate": 1.073353915279629e-08, - "loss": 0.3723, + "epoch": 0.79, + "grad_norm": 0.3570085240881158, + "learning_rate": 2.263586789379819e-06, + "loss": 0.3059, "step": 17156 }, { - "epoch": 0.99, - "grad_norm": 0.26883762854101706, - "learning_rate": 1.0647514377527535e-08, - "loss": 0.2437, + "epoch": 0.79, + "grad_norm": 0.6003243779595991, + "learning_rate": 2.262644094003594e-06, + "loss": 0.2939, "step": 17157 }, { - "epoch": 0.99, - "grad_norm": 0.2838856173997619, - "learning_rate": 1.0561835533104969e-08, - "loss": 0.1802, + "epoch": 0.79, + "grad_norm": 0.388779645884081, + "learning_rate": 2.2617015699249735e-06, + "loss": 0.2496, "step": 17158 }, { - "epoch": 0.99, - "grad_norm": 0.48001083283485046, - "learning_rate": 1.0476502622496221e-08, - "loss": 0.2635, + "epoch": 0.79, + "grad_norm": 0.2512278806174626, + "learning_rate": 2.2607592171648197e-06, + "loss": 0.2097, "step": 17159 }, { - "epoch": 0.99, - "grad_norm": 1.2070202127543408, - "learning_rate": 1.0391515648657813e-08, - "loss": 0.6532, + "epoch": 0.79, + "grad_norm": 0.833050622020885, + "learning_rate": 2.259817035744e-06, + "loss": 0.1108, "step": 17160 }, { - "epoch": 0.99, - "grad_norm": 0.24790652896927554, - "learning_rate": 1.0306874614530727e-08, - "loss": 0.2034, + "epoch": 0.79, + "grad_norm": 0.3996464493257746, + "learning_rate": 2.258875025683366e-06, + "loss": 0.2804, "step": 17161 }, { - "epoch": 0.99, - "grad_norm": 0.5456230843925484, - "learning_rate": 1.022257952304706e-08, - "loss": 0.3501, + "epoch": 0.79, + "grad_norm": 0.6186153074205543, + "learning_rate": 2.2579331870037822e-06, + "loss": 0.4017, "step": 17162 }, { - "epoch": 0.99, - "grad_norm": 0.872555453274828, - "learning_rate": 1.0138630377125591e-08, - "loss": 0.3599, + "epoch": 0.79, + "grad_norm": 0.3067014989793992, + "learning_rate": 2.2569915197260974e-06, + "loss": 0.2243, "step": 17163 }, { - "epoch": 0.99, - "grad_norm": 0.30090643775201775, - "learning_rate": 1.0055027179675104e-08, - "loss": 0.211, + "epoch": 0.79, + "grad_norm": 0.38978267599642796, + "learning_rate": 2.2560500238711534e-06, + "loss": 0.2869, "step": 17164 }, { - "epoch": 0.99, - "grad_norm": 0.20651735036815919, - "learning_rate": 9.971769933587728e-09, - "loss": 0.1962, + "epoch": 0.79, + "grad_norm": 0.28318044096502587, + "learning_rate": 2.2551086994597993e-06, + "loss": 0.1696, "step": 17165 }, { - "epoch": 0.99, - "grad_norm": 1.3557084430328847, - "learning_rate": 9.888858641750044e-09, - "loss": 0.5679, + "epoch": 0.79, + "grad_norm": 0.777214384642687, + "learning_rate": 2.254167546512873e-06, + "loss": 0.3822, "step": 17166 }, { - "epoch": 0.99, - "grad_norm": 0.4083475818641006, - "learning_rate": 9.806293307030868e-09, - "loss": 0.2982, + "epoch": 0.79, + "grad_norm": 0.2498515895672066, + "learning_rate": 2.2532265650512154e-06, + "loss": 0.2158, "step": 17167 }, { - "epoch": 0.99, - "grad_norm": 0.5826905889124464, - "learning_rate": 9.724073932289024e-09, - "loss": 0.2523, + "epoch": 0.79, + "grad_norm": 0.8831558900009088, + "learning_rate": 2.252285755095652e-06, + "loss": 0.382, "step": 17168 }, { - "epoch": 0.99, - "grad_norm": 0.3194676400384813, - "learning_rate": 9.642200520374457e-09, - "loss": 0.299, + "epoch": 0.79, + "grad_norm": 1.2803222159960825, + "learning_rate": 2.251345116667014e-06, + "loss": 0.6776, "step": 17169 }, { - "epoch": 0.99, - "grad_norm": 0.29693589850133373, - "learning_rate": 9.560673074120452e-09, - "loss": 0.2682, + "epoch": 0.79, + "grad_norm": 0.3164006888531394, + "learning_rate": 2.2504046497861308e-06, + "loss": 0.186, "step": 17170 }, { - "epoch": 0.99, - "grad_norm": 0.16895972237669396, - "learning_rate": 9.47949159635031e-09, - "loss": 0.0703, + "epoch": 0.79, + "grad_norm": 0.23475518441000343, + "learning_rate": 2.249464354473816e-06, + "loss": 0.1976, "step": 17171 }, { - "epoch": 0.99, - "grad_norm": 0.46335545917394955, - "learning_rate": 9.398656089876224e-09, - "loss": 0.2993, + "epoch": 0.79, + "grad_norm": 0.64297109713618, + "learning_rate": 2.2485242307508936e-06, + "loss": 0.3276, "step": 17172 }, { - "epoch": 0.99, - "grad_norm": 0.29457119427898515, - "learning_rate": 9.318166557497066e-09, - "loss": 0.2394, + "epoch": 0.79, + "grad_norm": 0.35440729949926225, + "learning_rate": 2.247584278638171e-06, + "loss": 0.2119, "step": 17173 }, { - "epoch": 0.99, - "grad_norm": 0.47416681421393464, - "learning_rate": 9.238023001999496e-09, - "loss": 0.2596, + "epoch": 0.79, + "grad_norm": 0.9972870600536571, + "learning_rate": 2.2466444981564593e-06, + "loss": 0.4353, "step": 17174 }, { - "epoch": 0.99, - "grad_norm": 0.6675035464853056, - "learning_rate": 9.158225426160183e-09, - "loss": 0.3474, + "epoch": 0.79, + "grad_norm": 0.3429775771133243, + "learning_rate": 2.24570488932657e-06, + "loss": 0.2984, "step": 17175 }, { - "epoch": 0.99, - "grad_norm": 0.5988924080851362, - "learning_rate": 9.078773832742471e-09, - "loss": 0.2719, + "epoch": 0.79, + "grad_norm": 0.3828628644203816, + "learning_rate": 2.2447654521692975e-06, + "loss": 0.2876, "step": 17176 }, { - "epoch": 0.99, - "grad_norm": 0.18081883458352935, - "learning_rate": 8.999668224496383e-09, - "loss": 0.1884, + "epoch": 0.79, + "grad_norm": 0.4170889793197809, + "learning_rate": 2.243826186705446e-06, + "loss": 0.0963, "step": 17177 }, { - "epoch": 0.99, - "grad_norm": 1.1298631304084927, - "learning_rate": 8.92090860416195e-09, - "loss": 0.555, + "epoch": 0.79, + "grad_norm": 0.7400022987857334, + "learning_rate": 2.2428870929558012e-06, + "loss": 0.3557, "step": 17178 }, { - "epoch": 0.99, - "grad_norm": 0.40152654309722646, - "learning_rate": 8.842494974466986e-09, - "loss": 0.2712, + "epoch": 0.79, + "grad_norm": 0.2917936422072651, + "learning_rate": 2.241948170941165e-06, + "loss": 0.2369, "step": 17179 }, { - "epoch": 0.99, - "grad_norm": 0.49817004543825866, - "learning_rate": 8.764427338127102e-09, - "loss": 0.3426, + "epoch": 0.79, + "grad_norm": 0.5830577035725261, + "learning_rate": 2.2410094206823173e-06, + "loss": 0.2882, "step": 17180 }, { - "epoch": 0.99, - "grad_norm": 0.31938785931852637, - "learning_rate": 8.686705697845688e-09, - "loss": 0.2383, + "epoch": 0.79, + "grad_norm": 1.3682210912564259, + "learning_rate": 2.240070842200045e-06, + "loss": 0.7568, "step": 17181 }, { - "epoch": 0.99, - "grad_norm": 0.3866142623720773, - "learning_rate": 8.609330056313926e-09, - "loss": 0.2452, + "epoch": 0.79, + "grad_norm": 0.3461618045279277, + "learning_rate": 2.239132435515122e-06, + "loss": 0.2298, "step": 17182 }, { - "epoch": 0.99, - "grad_norm": 0.2502692400459461, - "learning_rate": 8.532300416210781e-09, - "loss": 0.1758, + "epoch": 0.79, + "grad_norm": 0.2795679108220238, + "learning_rate": 2.238194200648328e-06, + "loss": 0.1994, "step": 17183 }, { - "epoch": 0.99, - "grad_norm": 0.5478422286740277, - "learning_rate": 8.455616780205234e-09, - "loss": 0.243, + "epoch": 0.79, + "grad_norm": 0.6689037242336835, + "learning_rate": 2.237256137620436e-06, + "loss": 0.2748, "step": 17184 }, { - "epoch": 0.99, - "grad_norm": 0.3054428534977997, - "learning_rate": 8.379279150951824e-09, - "loss": 0.259, + "epoch": 0.79, + "grad_norm": 0.37406500837620066, + "learning_rate": 2.236318246452208e-06, + "loss": 0.2352, "step": 17185 }, { - "epoch": 0.99, - "grad_norm": 0.563806009862002, - "learning_rate": 8.303287531093996e-09, - "loss": 0.3525, + "epoch": 0.79, + "grad_norm": 1.7144741753791801, + "learning_rate": 2.2353805271644112e-06, + "loss": 0.3476, "step": 17186 }, { - "epoch": 0.99, - "grad_norm": 0.6691683684864551, - "learning_rate": 8.227641923264085e-09, - "loss": 0.3025, + "epoch": 0.79, + "grad_norm": 0.40751667690425686, + "learning_rate": 2.234442979777809e-06, + "loss": 0.3069, "step": 17187 }, { - "epoch": 0.99, - "grad_norm": 0.3912003511948697, - "learning_rate": 8.152342330081109e-09, - "loss": 0.247, + "epoch": 0.79, + "grad_norm": 0.3288656288229183, + "learning_rate": 2.233505604313152e-06, + "loss": 0.229, "step": 17188 }, { - "epoch": 0.99, - "grad_norm": 0.29904732930023953, - "learning_rate": 8.077388754151872e-09, - "loss": 0.2665, + "epoch": 0.79, + "grad_norm": 1.1228124576551153, + "learning_rate": 2.2325684007911984e-06, + "loss": 0.3886, "step": 17189 }, { - "epoch": 0.99, - "grad_norm": 0.28293767680946147, - "learning_rate": 8.002781198074294e-09, - "loss": 0.1153, + "epoch": 0.79, + "grad_norm": 0.2888745069202256, + "learning_rate": 2.2316313692326907e-06, + "loss": 0.217, "step": 17190 }, { - "epoch": 0.99, - "grad_norm": 0.39065053153705637, - "learning_rate": 7.928519664430756e-09, - "loss": 0.2872, + "epoch": 0.79, + "grad_norm": 0.46530315681055856, + "learning_rate": 2.2306945096583775e-06, + "loss": 0.2968, "step": 17191 }, { - "epoch": 0.99, - "grad_norm": 0.4524577827612803, - "learning_rate": 7.854604155791423e-09, - "loss": 0.3092, + "epoch": 0.79, + "grad_norm": 0.38386773205438335, + "learning_rate": 2.2297578220890027e-06, + "loss": 0.2457, "step": 17192 }, { - "epoch": 0.99, - "grad_norm": 0.3234766790264359, - "learning_rate": 7.78103467471869e-09, - "loss": 0.301, + "epoch": 0.79, + "grad_norm": 0.6087834094924855, + "learning_rate": 2.228821306545298e-06, + "loss": 0.2449, "step": 17193 }, { - "epoch": 0.99, - "grad_norm": 0.5498364437172288, - "learning_rate": 7.70781122375941e-09, - "loss": 0.0939, + "epoch": 0.79, + "grad_norm": 0.391062903415256, + "learning_rate": 2.2278849630480014e-06, + "loss": 0.2682, "step": 17194 }, { - "epoch": 0.99, - "grad_norm": 0.290298275004318, - "learning_rate": 7.634933805448219e-09, - "loss": 0.2089, + "epoch": 0.79, + "grad_norm": 0.379961351873939, + "learning_rate": 2.2269487916178354e-06, + "loss": 0.2922, "step": 17195 }, { - "epoch": 0.99, - "grad_norm": 0.2698869203749294, - "learning_rate": 7.562402422309767e-09, - "loss": 0.2444, + "epoch": 0.79, + "grad_norm": 1.265700092716687, + "learning_rate": 2.2260127922755383e-06, + "loss": 0.2115, "step": 17196 }, { - "epoch": 0.99, - "grad_norm": 0.5583643820899699, - "learning_rate": 7.490217076855377e-09, - "loss": 0.1874, + "epoch": 0.79, + "grad_norm": 0.27227524860969315, + "learning_rate": 2.2250769650418213e-06, + "loss": 0.2078, "step": 17197 }, { - "epoch": 0.99, - "grad_norm": 0.37483103695911446, - "learning_rate": 7.418377771585273e-09, - "loss": 0.3031, + "epoch": 0.79, + "grad_norm": 1.4414669491274652, + "learning_rate": 2.22414130993741e-06, + "loss": 0.6629, "step": 17198 }, { - "epoch": 0.99, - "grad_norm": 1.2057443310019202, - "learning_rate": 7.346884508987462e-09, - "loss": 0.5939, + "epoch": 0.79, + "grad_norm": 0.3479028958319379, + "learning_rate": 2.2232058269830126e-06, + "loss": 0.2432, "step": 17199 }, { - "epoch": 0.99, - "grad_norm": 0.40814755288192517, - "learning_rate": 7.275737291536633e-09, - "loss": 0.1775, + "epoch": 0.79, + "grad_norm": 0.35582280626712265, + "learning_rate": 2.222270516199343e-06, + "loss": 0.2586, "step": 17200 }, { - "epoch": 0.99, - "grad_norm": 0.23334911812197162, - "learning_rate": 7.204936121697481e-09, - "loss": 0.2408, + "epoch": 0.79, + "grad_norm": 0.7060125509241995, + "learning_rate": 2.221335377607111e-06, + "loss": 0.3715, "step": 17201 }, { - "epoch": 0.99, - "grad_norm": 0.452400964531313, - "learning_rate": 7.134481001922488e-09, - "loss": 0.253, + "epoch": 0.79, + "grad_norm": 0.519950536283211, + "learning_rate": 2.220400411227014e-06, + "loss": 0.3525, "step": 17202 }, { - "epoch": 0.99, - "grad_norm": 0.3952783924255758, - "learning_rate": 7.064371934649706e-09, - "loss": 0.2208, + "epoch": 0.79, + "grad_norm": 0.24539616972428374, + "learning_rate": 2.2194656170797534e-06, + "loss": 0.1826, "step": 17203 }, { - "epoch": 0.99, - "grad_norm": 0.5326550442015726, - "learning_rate": 6.9946089223082995e-09, - "loss": 0.3627, + "epoch": 0.79, + "grad_norm": 0.4555420081351435, + "learning_rate": 2.218530995186028e-06, + "loss": 0.2236, "step": 17204 }, { - "epoch": 0.99, - "grad_norm": 0.34586123136046604, - "learning_rate": 6.925191967314115e-09, - "loss": 0.3045, + "epoch": 0.79, + "grad_norm": 1.2060774057892782, + "learning_rate": 2.2175965455665225e-06, + "loss": 0.6888, "step": 17205 }, { - "epoch": 0.99, - "grad_norm": 0.7623488186505916, - "learning_rate": 6.856121072070787e-09, - "loss": 0.3254, + "epoch": 0.79, + "grad_norm": 0.3534622409077698, + "learning_rate": 2.2166622682419327e-06, + "loss": 0.2019, "step": 17206 }, { - "epoch": 0.99, - "grad_norm": 0.4536690860312234, - "learning_rate": 6.787396238969735e-09, - "loss": 0.2152, + "epoch": 0.79, + "grad_norm": 0.3566074483303219, + "learning_rate": 2.2157281632329353e-06, + "loss": 0.2856, "step": 17207 }, { - "epoch": 0.99, - "grad_norm": 0.23043597811227537, - "learning_rate": 6.719017470392386e-09, - "loss": 0.2123, + "epoch": 0.79, + "grad_norm": 0.500638730218545, + "learning_rate": 2.2147942305602144e-06, + "loss": 0.2393, "step": 17208 }, { - "epoch": 0.99, - "grad_norm": 0.39678585504428626, - "learning_rate": 6.650984768704627e-09, - "loss": 0.2842, + "epoch": 0.79, + "grad_norm": 0.24283930002033238, + "learning_rate": 2.213860470244448e-06, + "loss": 0.1247, "step": 17209 }, { - "epoch": 0.99, - "grad_norm": 0.484976822562212, - "learning_rate": 6.583298136264571e-09, - "loss": 0.2722, + "epoch": 0.79, + "grad_norm": 0.5768618166887786, + "learning_rate": 2.2129268823063044e-06, + "loss": 0.3832, "step": 17210 }, { - "epoch": 0.99, - "grad_norm": 1.1659165788472297, - "learning_rate": 6.515957575413678e-09, - "loss": 0.5502, + "epoch": 0.79, + "grad_norm": 0.3833830699173495, + "learning_rate": 2.2119934667664555e-06, + "loss": 0.2724, "step": 17211 }, { - "epoch": 0.99, - "grad_norm": 0.4491331575497026, - "learning_rate": 6.448963088486748e-09, - "loss": 0.3109, + "epoch": 0.79, + "grad_norm": 0.36551798767054433, + "learning_rate": 2.211060223645561e-06, + "loss": 0.1981, "step": 17212 }, { - "epoch": 0.99, - "grad_norm": 0.27854955150557587, - "learning_rate": 6.382314677803037e-09, - "loss": 0.2077, + "epoch": 0.79, + "grad_norm": 0.4884727495171653, + "learning_rate": 2.2101271529642907e-06, + "loss": 0.3079, "step": 17213 }, { - "epoch": 0.99, - "grad_norm": 0.41626719286042163, - "learning_rate": 6.316012345668476e-09, - "loss": 0.2705, + "epoch": 0.79, + "grad_norm": 0.5216664869888064, + "learning_rate": 2.209194254743295e-06, + "loss": 0.3577, "step": 17214 }, { - "epoch": 0.99, - "grad_norm": 0.7680084509491535, - "learning_rate": 6.2500560943812295e-09, - "loss": 0.3548, + "epoch": 0.79, + "grad_norm": 0.3292120838414288, + "learning_rate": 2.208261529003233e-06, + "loss": 0.2811, "step": 17215 }, { - "epoch": 0.99, - "grad_norm": 0.25815630989197186, - "learning_rate": 6.184445926225024e-09, - "loss": 0.2255, + "epoch": 0.79, + "grad_norm": 0.24862638985651028, + "learning_rate": 2.2073289757647477e-06, + "loss": 0.1552, "step": 17216 }, { - "epoch": 0.99, - "grad_norm": 0.3151753336653738, - "learning_rate": 6.119181843471378e-09, - "loss": 0.2446, + "epoch": 0.79, + "grad_norm": 0.8830509652435399, + "learning_rate": 2.2063965950484878e-06, + "loss": 0.4498, "step": 17217 }, { - "epoch": 0.99, - "grad_norm": 0.9113337178352028, - "learning_rate": 6.0542638483818136e-09, - "loss": 0.3112, + "epoch": 0.79, + "grad_norm": 0.38044774479665894, + "learning_rate": 2.205464386875099e-06, + "loss": 0.2661, "step": 17218 }, { - "epoch": 0.99, - "grad_norm": 0.3686064816471827, - "learning_rate": 5.989691943202314e-09, - "loss": 0.2553, + "epoch": 0.79, + "grad_norm": 0.30842645976079736, + "learning_rate": 2.2045323512652128e-06, + "loss": 0.2386, "step": 17219 }, { - "epoch": 0.99, - "grad_norm": 0.4338394855360507, - "learning_rate": 5.925466130169977e-09, - "loss": 0.2657, + "epoch": 0.79, + "grad_norm": 1.4772492118941674, + "learning_rate": 2.2036004882394702e-06, + "loss": 0.448, "step": 17220 }, { - "epoch": 0.99, - "grad_norm": 0.2913724177552018, - "learning_rate": 5.86158641150969e-09, - "loss": 0.2284, + "epoch": 0.79, + "grad_norm": 0.31301625706923664, + "learning_rate": 2.202668797818496e-06, + "loss": 0.1964, "step": 17221 }, { - "epoch": 0.99, - "grad_norm": 0.3690652954085403, - "learning_rate": 5.798052789431907e-09, - "loss": 0.3086, + "epoch": 0.79, + "grad_norm": 0.5322213749102688, + "learning_rate": 2.2017372800229188e-06, + "loss": 0.2786, "step": 17222 }, { - "epoch": 0.99, - "grad_norm": 0.44592350230275896, - "learning_rate": 5.734865266138201e-09, - "loss": 0.1507, + "epoch": 0.79, + "grad_norm": 0.3548614363570148, + "learning_rate": 2.200805934873366e-06, + "loss": 0.3016, "step": 17223 }, { - "epoch": 0.99, - "grad_norm": 0.29599152713750027, - "learning_rate": 5.67202384381682e-09, - "loss": 0.2542, + "epoch": 0.79, + "grad_norm": 0.35404673328192504, + "learning_rate": 2.199874762390449e-06, + "loss": 0.2496, "step": 17224 }, { - "epoch": 0.99, - "grad_norm": 0.38827856045711373, - "learning_rate": 5.609528524642694e-09, - "loss": 0.3015, + "epoch": 0.79, + "grad_norm": 0.47912460005126994, + "learning_rate": 2.1989437625947873e-06, + "loss": 0.1455, "step": 17225 }, { - "epoch": 0.99, - "grad_norm": 0.5828565839256031, - "learning_rate": 5.547379310781864e-09, - "loss": 0.2791, + "epoch": 0.79, + "grad_norm": 0.3572813072432284, + "learning_rate": 2.198012935506991e-06, + "loss": 0.2849, "step": 17226 }, { - "epoch": 0.99, - "grad_norm": 0.7721637320865353, - "learning_rate": 5.485576204383725e-09, - "loss": 0.4538, + "epoch": 0.79, + "grad_norm": 0.6022720433058638, + "learning_rate": 2.197082281147673e-06, + "loss": 0.2781, "step": 17227 }, { - "epoch": 0.99, - "grad_norm": 0.3448076249812229, - "learning_rate": 5.424119207592115e-09, - "loss": 0.2812, + "epoch": 0.79, + "grad_norm": 0.2955726209037957, + "learning_rate": 2.1961517995374314e-06, + "loss": 0.2334, "step": 17228 }, { - "epoch": 0.99, - "grad_norm": 0.21500697134765617, - "learning_rate": 5.3630083225331145e-09, - "loss": 0.1745, + "epoch": 0.79, + "grad_norm": 0.6409940322879456, + "learning_rate": 2.195221490696863e-06, + "loss": 0.318, "step": 17229 }, { - "epoch": 0.99, - "grad_norm": 1.028394499800441, - "learning_rate": 5.302243551322806e-09, - "loss": 0.4091, + "epoch": 0.79, + "grad_norm": 0.37250328819602857, + "learning_rate": 2.194291354646574e-06, + "loss": 0.2447, "step": 17230 }, { - "epoch": 0.99, - "grad_norm": 0.3682420380289012, - "learning_rate": 5.2418248960661725e-09, - "loss": 0.2909, + "epoch": 0.79, + "grad_norm": 0.305204730973577, + "learning_rate": 2.1933613914071474e-06, + "loss": 0.2411, "step": 17231 }, { - "epoch": 0.99, - "grad_norm": 0.3277072529659946, - "learning_rate": 5.181752358854874e-09, - "loss": 0.3007, + "epoch": 0.79, + "grad_norm": 1.5849246082045143, + "learning_rate": 2.1924316009991785e-06, + "loss": 0.2396, "step": 17232 }, { - "epoch": 0.99, - "grad_norm": 0.4392349799183583, - "learning_rate": 5.1220259417705806e-09, - "loss": 0.1281, + "epoch": 0.79, + "grad_norm": 0.7409587010773275, + "learning_rate": 2.191501983443247e-06, + "loss": 0.3488, "step": 17233 }, { - "epoch": 0.99, - "grad_norm": 0.3499626554649006, - "learning_rate": 5.0626456468805265e-09, - "loss": 0.2961, + "epoch": 0.79, + "grad_norm": 0.3320531174829815, + "learning_rate": 2.1905725387599355e-06, + "loss": 0.2898, "step": 17234 }, { - "epoch": 0.99, - "grad_norm": 0.28294895281739385, - "learning_rate": 5.003611476240844e-09, - "loss": 0.1918, + "epoch": 0.79, + "grad_norm": 0.4880637009581049, + "learning_rate": 2.1896432669698233e-06, + "loss": 0.2668, "step": 17235 }, { - "epoch": 0.99, - "grad_norm": 0.30877597909823423, - "learning_rate": 4.944923431896564e-09, - "loss": 0.218, + "epoch": 0.79, + "grad_norm": 0.5733790939152533, + "learning_rate": 2.1887141680934786e-06, + "loss": 0.2775, "step": 17236 }, { - "epoch": 0.99, - "grad_norm": 0.35228097598873376, - "learning_rate": 4.886581515880506e-09, - "loss": 0.2528, + "epoch": 0.79, + "grad_norm": 0.2676681366583005, + "learning_rate": 2.1877852421514767e-06, + "loss": 0.164, "step": 17237 }, { - "epoch": 0.99, - "grad_norm": 0.8101994452225071, - "learning_rate": 4.828585730211055e-09, - "loss": 0.3758, + "epoch": 0.79, + "grad_norm": 0.37885207014500677, + "learning_rate": 2.186856489164377e-06, + "loss": 0.2473, "step": 17238 }, { - "epoch": 0.99, - "grad_norm": 0.8340505753102018, - "learning_rate": 4.770936076898825e-09, - "loss": 0.2225, + "epoch": 0.79, + "grad_norm": 0.42500782696313366, + "learning_rate": 2.185927909152745e-06, + "loss": 0.2744, "step": 17239 }, { - "epoch": 0.99, - "grad_norm": 0.29161964930278367, - "learning_rate": 4.713632557938885e-09, - "loss": 0.2682, + "epoch": 0.79, + "grad_norm": 0.5438787531877634, + "learning_rate": 2.1849995021371405e-06, + "loss": 0.3417, "step": 17240 }, + { + "epoch": 0.79, + "grad_norm": 0.6333724182358521, + "learning_rate": 2.1840712681381116e-06, + "loss": 0.3434, + "step": 17241 + }, + { + "epoch": 0.79, + "grad_norm": 0.26762002505843974, + "learning_rate": 2.1831432071762117e-06, + "loss": 0.2289, + "step": 17242 + }, + { + "epoch": 0.79, + "grad_norm": 0.24573068064034145, + "learning_rate": 2.1822153192719876e-06, + "loss": 0.1728, + "step": 17243 + }, + { + "epoch": 0.79, + "grad_norm": 1.3793883546075032, + "learning_rate": 2.181287604445984e-06, + "loss": 0.4307, + "step": 17244 + }, + { + "epoch": 0.79, + "grad_norm": 0.31935228572104957, + "learning_rate": 2.180360062718734e-06, + "loss": 0.1139, + "step": 17245 + }, + { + "epoch": 0.79, + "grad_norm": 0.3335514511464505, + "learning_rate": 2.179432694110776e-06, + "loss": 0.2834, + "step": 17246 + }, + { + "epoch": 0.79, + "grad_norm": 0.5280062565778948, + "learning_rate": 2.1785054986426424e-06, + "loss": 0.3451, + "step": 17247 + }, + { + "epoch": 0.79, + "grad_norm": 0.30446234044287324, + "learning_rate": 2.1775784763348575e-06, + "loss": 0.1114, + "step": 17248 + }, + { + "epoch": 0.79, + "grad_norm": 0.27398955412267384, + "learning_rate": 2.1766516272079472e-06, + "loss": 0.186, + "step": 17249 + }, + { + "epoch": 0.79, + "grad_norm": 0.3619742392211366, + "learning_rate": 2.1757249512824276e-06, + "loss": 0.2803, + "step": 17250 + }, + { + "epoch": 0.79, + "grad_norm": 0.36101096734417387, + "learning_rate": 2.1747984485788155e-06, + "loss": 0.113, + "step": 17251 + }, + { + "epoch": 0.79, + "grad_norm": 0.5164446698839823, + "learning_rate": 2.1738721191176273e-06, + "loss": 0.2901, + "step": 17252 + }, + { + "epoch": 0.79, + "grad_norm": 0.9602371705017546, + "learning_rate": 2.1729459629193637e-06, + "loss": 0.4584, + "step": 17253 + }, + { + "epoch": 0.79, + "grad_norm": 0.391896258464414, + "learning_rate": 2.1720199800045373e-06, + "loss": 0.2914, + "step": 17254 + }, + { + "epoch": 0.79, + "grad_norm": 0.29647344408756765, + "learning_rate": 2.171094170393637e-06, + "loss": 0.1686, + "step": 17255 + }, + { + "epoch": 0.79, + "grad_norm": 0.6143951393809346, + "learning_rate": 2.170168534107172e-06, + "loss": 0.301, + "step": 17256 + }, + { + "epoch": 0.79, + "grad_norm": 0.7414084135711221, + "learning_rate": 2.169243071165629e-06, + "loss": 0.2686, + "step": 17257 + }, + { + "epoch": 0.79, + "grad_norm": 0.3192675350380784, + "learning_rate": 2.168317781589494e-06, + "loss": 0.2362, + "step": 17258 + }, + { + "epoch": 0.79, + "grad_norm": 0.5075742922603717, + "learning_rate": 2.167392665399256e-06, + "loss": 0.3606, + "step": 17259 + }, + { + "epoch": 0.79, + "grad_norm": 0.626177383963506, + "learning_rate": 2.166467722615394e-06, + "loss": 0.3506, + "step": 17260 + }, + { + "epoch": 0.79, + "grad_norm": 0.22050588551725933, + "learning_rate": 2.1655429532583905e-06, + "loss": 0.1484, + "step": 17261 + }, + { + "epoch": 0.79, + "grad_norm": 0.3888544501937455, + "learning_rate": 2.164618357348711e-06, + "loss": 0.2861, + "step": 17262 + }, + { + "epoch": 0.79, + "grad_norm": 0.8746329846516026, + "learning_rate": 2.163693934906831e-06, + "loss": 0.3618, + "step": 17263 + }, + { + "epoch": 0.79, + "grad_norm": 0.34879099767399935, + "learning_rate": 2.1627696859532156e-06, + "loss": 0.2188, + "step": 17264 + }, + { + "epoch": 0.79, + "grad_norm": 1.1975194116332641, + "learning_rate": 2.1618456105083242e-06, + "loss": 0.5909, + "step": 17265 + }, + { + "epoch": 0.79, + "grad_norm": 0.36712964440380685, + "learning_rate": 2.160921708592618e-06, + "loss": 0.3, + "step": 17266 + }, + { + "epoch": 0.79, + "grad_norm": 0.45488473971069426, + "learning_rate": 2.1599979802265482e-06, + "loss": 0.3152, + "step": 17267 + }, + { + "epoch": 0.79, + "grad_norm": 0.36838475416437205, + "learning_rate": 2.1590744254305664e-06, + "loss": 0.1185, + "step": 17268 + }, + { + "epoch": 0.79, + "grad_norm": 0.6698052348074903, + "learning_rate": 2.158151044225122e-06, + "loss": 0.3521, + "step": 17269 + }, + { + "epoch": 0.79, + "grad_norm": 0.25842929885787885, + "learning_rate": 2.1572278366306533e-06, + "loss": 0.2577, + "step": 17270 + }, + { + "epoch": 0.79, + "grad_norm": 1.021456009233583, + "learning_rate": 2.1563048026676037e-06, + "loss": 0.4566, + "step": 17271 + }, + { + "epoch": 0.79, + "grad_norm": 0.8742436816836272, + "learning_rate": 2.1553819423564006e-06, + "loss": 0.3677, + "step": 17272 + }, + { + "epoch": 0.79, + "grad_norm": 0.23091128634988273, + "learning_rate": 2.154459255717486e-06, + "loss": 0.1863, + "step": 17273 + }, + { + "epoch": 0.79, + "grad_norm": 0.35388327549937704, + "learning_rate": 2.1535367427712784e-06, + "loss": 0.2405, + "step": 17274 + }, + { + "epoch": 0.79, + "grad_norm": 0.6064043977184346, + "learning_rate": 2.152614403538209e-06, + "loss": 0.3318, + "step": 17275 + }, + { + "epoch": 0.79, + "grad_norm": 0.4268634805552709, + "learning_rate": 2.1516922380386896e-06, + "loss": 0.3133, + "step": 17276 + }, + { + "epoch": 0.79, + "grad_norm": 1.2198094642755963, + "learning_rate": 2.15077024629314e-06, + "loss": 0.4009, + "step": 17277 + }, + { + "epoch": 0.79, + "grad_norm": 0.3359046724505899, + "learning_rate": 2.1498484283219747e-06, + "loss": 0.2567, + "step": 17278 + }, + { + "epoch": 0.79, + "grad_norm": 0.47742485412065594, + "learning_rate": 2.148926784145596e-06, + "loss": 0.3181, + "step": 17279 + }, + { + "epoch": 0.79, + "grad_norm": 0.34615696417918396, + "learning_rate": 2.1480053137844115e-06, + "loss": 0.1963, + "step": 17280 + }, + { + "epoch": 0.79, + "grad_norm": 0.9039904632146665, + "learning_rate": 2.1470840172588246e-06, + "loss": 0.2932, + "step": 17281 + }, + { + "epoch": 0.79, + "grad_norm": 0.27634976831947505, + "learning_rate": 2.1461628945892255e-06, + "loss": 0.2302, + "step": 17282 + }, + { + "epoch": 0.79, + "grad_norm": 0.5008727349554704, + "learning_rate": 2.145241945796014e-06, + "loss": 0.3812, + "step": 17283 + }, + { + "epoch": 0.79, + "grad_norm": 2.57558298423319, + "learning_rate": 2.1443211708995713e-06, + "loss": 0.2064, + "step": 17284 + }, + { + "epoch": 0.79, + "grad_norm": 0.3467532279561505, + "learning_rate": 2.1434005699202877e-06, + "loss": 0.2464, + "step": 17285 + }, + { + "epoch": 0.79, + "grad_norm": 0.44929405051722027, + "learning_rate": 2.1424801428785447e-06, + "loss": 0.3041, + "step": 17286 + }, + { + "epoch": 0.79, + "grad_norm": 0.2765644815979597, + "learning_rate": 2.1415598897947164e-06, + "loss": 0.118, + "step": 17287 + }, + { + "epoch": 0.79, + "grad_norm": 0.4359138263904072, + "learning_rate": 2.14063981068918e-06, + "loss": 0.2993, + "step": 17288 + }, + { + "epoch": 0.79, + "grad_norm": 1.352131963319057, + "learning_rate": 2.139719905582298e-06, + "loss": 0.7674, + "step": 17289 + }, + { + "epoch": 0.79, + "grad_norm": 0.34526150294995045, + "learning_rate": 2.1388001744944476e-06, + "loss": 0.2139, + "step": 17290 + }, + { + "epoch": 0.79, + "grad_norm": 0.3555498688023054, + "learning_rate": 2.137880617445982e-06, + "loss": 0.2633, + "step": 17291 + }, + { + "epoch": 0.79, + "grad_norm": 0.6544705038833118, + "learning_rate": 2.136961234457264e-06, + "loss": 0.3704, + "step": 17292 + }, + { + "epoch": 0.79, + "grad_norm": 0.2703776554245429, + "learning_rate": 2.1360420255486426e-06, + "loss": 0.1846, + "step": 17293 + }, + { + "epoch": 0.79, + "grad_norm": 0.30865671460859356, + "learning_rate": 2.1351229907404727e-06, + "loss": 0.189, + "step": 17294 + }, + { + "epoch": 0.79, + "grad_norm": 0.5242197007365211, + "learning_rate": 2.1342041300531015e-06, + "loss": 0.3761, + "step": 17295 + }, + { + "epoch": 0.79, + "grad_norm": 1.0520448351560139, + "learning_rate": 2.133285443506866e-06, + "loss": 0.3963, + "step": 17296 + }, + { + "epoch": 0.79, + "grad_norm": 0.31075686426074234, + "learning_rate": 2.132366931122113e-06, + "loss": 0.1849, + "step": 17297 + }, + { + "epoch": 0.79, + "grad_norm": 0.37502535454705554, + "learning_rate": 2.1314485929191698e-06, + "loss": 0.295, + "step": 17298 + }, + { + "epoch": 0.79, + "grad_norm": 0.26864979036440173, + "learning_rate": 2.1305304289183714e-06, + "loss": 0.1796, + "step": 17299 + }, + { + "epoch": 0.79, + "grad_norm": 0.30988644192529574, + "learning_rate": 2.1296124391400466e-06, + "loss": 0.1978, + "step": 17300 + }, + { + "epoch": 0.79, + "grad_norm": 0.545476977420377, + "learning_rate": 2.128694623604515e-06, + "loss": 0.3468, + "step": 17301 + }, + { + "epoch": 0.79, + "grad_norm": 0.6007790819916371, + "learning_rate": 2.127776982332097e-06, + "loss": 0.3502, + "step": 17302 + }, + { + "epoch": 0.79, + "grad_norm": 0.3337522800038289, + "learning_rate": 2.126859515343113e-06, + "loss": 0.259, + "step": 17303 + }, + { + "epoch": 0.79, + "grad_norm": 0.6950721214055331, + "learning_rate": 2.1259422226578675e-06, + "loss": 0.2826, + "step": 17304 + }, + { + "epoch": 0.79, + "grad_norm": 0.25783983102580255, + "learning_rate": 2.1250251042966754e-06, + "loss": 0.1865, + "step": 17305 + }, + { + "epoch": 0.8, + "grad_norm": 0.29279831461136036, + "learning_rate": 2.124108160279832e-06, + "loss": 0.2455, + "step": 17306 + }, + { + "epoch": 0.8, + "grad_norm": 1.0705471809371274, + "learning_rate": 2.123191390627648e-06, + "loss": 0.5253, + "step": 17307 + }, + { + "epoch": 0.8, + "grad_norm": 0.7171836891691942, + "learning_rate": 2.122274795360412e-06, + "loss": 0.3772, + "step": 17308 + }, + { + "epoch": 0.8, + "grad_norm": 0.3436653191901018, + "learning_rate": 2.1213583744984223e-06, + "loss": 0.2426, + "step": 17309 + }, + { + "epoch": 0.8, + "grad_norm": 0.3615830002710391, + "learning_rate": 2.1204421280619626e-06, + "loss": 0.2467, + "step": 17310 + }, + { + "epoch": 0.8, + "grad_norm": 0.4515786219581236, + "learning_rate": 2.119526056071319e-06, + "loss": 0.2151, + "step": 17311 + }, + { + "epoch": 0.8, + "grad_norm": 0.35518488076041216, + "learning_rate": 2.118610158546777e-06, + "loss": 0.2577, + "step": 17312 + }, + { + "epoch": 0.8, + "grad_norm": 0.2947321801697784, + "learning_rate": 2.117694435508606e-06, + "loss": 0.1775, + "step": 17313 + }, + { + "epoch": 0.8, + "grad_norm": 0.4489838462552549, + "learning_rate": 2.116778886977087e-06, + "loss": 0.3228, + "step": 17314 + }, + { + "epoch": 0.8, + "grad_norm": 0.43106549031142327, + "learning_rate": 2.115863512972481e-06, + "loss": 0.2824, + "step": 17315 + }, + { + "epoch": 0.8, + "grad_norm": 0.5817303253146339, + "learning_rate": 2.1149483135150597e-06, + "loss": 0.3208, + "step": 17316 + }, + { + "epoch": 0.8, + "grad_norm": 0.39077009237253246, + "learning_rate": 2.1140332886250845e-06, + "loss": 0.2327, + "step": 17317 + }, + { + "epoch": 0.8, + "grad_norm": 0.43206135027873166, + "learning_rate": 2.1131184383228097e-06, + "loss": 0.2717, + "step": 17318 + }, + { + "epoch": 0.8, + "grad_norm": 0.33102179999394077, + "learning_rate": 2.112203762628491e-06, + "loss": 0.261, + "step": 17319 + }, + { + "epoch": 0.8, + "grad_norm": 0.692490159501316, + "learning_rate": 2.1112892615623794e-06, + "loss": 0.2744, + "step": 17320 + }, + { + "epoch": 0.8, + "grad_norm": 0.28547886872361417, + "learning_rate": 2.1103749351447223e-06, + "loss": 0.2288, + "step": 17321 + }, + { + "epoch": 0.8, + "grad_norm": 0.4224554267902391, + "learning_rate": 2.1094607833957592e-06, + "loss": 0.322, + "step": 17322 + }, + { + "epoch": 0.8, + "grad_norm": 1.0238128016070904, + "learning_rate": 2.108546806335725e-06, + "loss": 0.2367, + "step": 17323 + }, + { + "epoch": 0.8, + "grad_norm": 0.3226758423603047, + "learning_rate": 2.1076330039848638e-06, + "loss": 0.245, + "step": 17324 + }, + { + "epoch": 0.8, + "grad_norm": 0.9276839364685976, + "learning_rate": 2.106719376363399e-06, + "loss": 0.4944, + "step": 17325 + }, + { + "epoch": 0.8, + "grad_norm": 0.3059998765790841, + "learning_rate": 2.105805923491562e-06, + "loss": 0.2193, + "step": 17326 + }, + { + "epoch": 0.8, + "grad_norm": 0.26167768850355283, + "learning_rate": 2.10489264538957e-06, + "loss": 0.1922, + "step": 17327 + }, + { + "epoch": 0.8, + "grad_norm": 1.476242381854582, + "learning_rate": 2.1039795420776456e-06, + "loss": 0.5137, + "step": 17328 + }, + { + "epoch": 0.8, + "grad_norm": 0.3932570132212613, + "learning_rate": 2.103066613576007e-06, + "loss": 0.3042, + "step": 17329 + }, + { + "epoch": 0.8, + "grad_norm": 0.33706098740830437, + "learning_rate": 2.1021538599048594e-06, + "loss": 0.1914, + "step": 17330 + }, + { + "epoch": 0.8, + "grad_norm": 1.0459027514122232, + "learning_rate": 2.101241281084416e-06, + "loss": 0.5199, + "step": 17331 + }, + { + "epoch": 0.8, + "grad_norm": 0.44510510620090654, + "learning_rate": 2.1003288771348752e-06, + "loss": 0.2713, + "step": 17332 + }, + { + "epoch": 0.8, + "grad_norm": 0.25855313632729365, + "learning_rate": 2.099416648076439e-06, + "loss": 0.1381, + "step": 17333 + }, + { + "epoch": 0.8, + "grad_norm": 0.35798193479571144, + "learning_rate": 2.098504593929306e-06, + "loss": 0.2742, + "step": 17334 + }, + { + "epoch": 0.8, + "grad_norm": 1.4086137523918492, + "learning_rate": 2.097592714713663e-06, + "loss": 0.4637, + "step": 17335 + }, + { + "epoch": 0.8, + "grad_norm": 0.35853693881712917, + "learning_rate": 2.0966810104497013e-06, + "loss": 0.1791, + "step": 17336 + }, + { + "epoch": 0.8, + "grad_norm": 0.3810430625241888, + "learning_rate": 2.0957694811576058e-06, + "loss": 0.2917, + "step": 17337 + }, + { + "epoch": 0.8, + "grad_norm": 0.53963062082349, + "learning_rate": 2.0948581268575565e-06, + "loss": 0.3568, + "step": 17338 + }, + { + "epoch": 0.8, + "grad_norm": 0.18836258489777002, + "learning_rate": 2.093946947569727e-06, + "loss": 0.1191, + "step": 17339 + }, + { + "epoch": 0.8, + "grad_norm": 0.66899144139605, + "learning_rate": 2.0930359433142934e-06, + "loss": 0.3673, + "step": 17340 + }, + { + "epoch": 0.8, + "grad_norm": 0.3852583892701345, + "learning_rate": 2.092125114111425e-06, + "loss": 0.3037, + "step": 17341 + }, + { + "epoch": 0.8, + "grad_norm": 0.417185990965552, + "learning_rate": 2.091214459981282e-06, + "loss": 0.2665, + "step": 17342 + }, + { + "epoch": 0.8, + "grad_norm": 0.5041915982005274, + "learning_rate": 2.0903039809440307e-06, + "loss": 0.2554, + "step": 17343 + }, + { + "epoch": 0.8, + "grad_norm": 0.9557109828478421, + "learning_rate": 2.0893936770198232e-06, + "loss": 0.555, + "step": 17344 + }, + { + "epoch": 0.8, + "grad_norm": 0.24948702098933245, + "learning_rate": 2.088483548228816e-06, + "loss": 0.2168, + "step": 17345 + }, + { + "epoch": 0.8, + "grad_norm": 0.2859304880433394, + "learning_rate": 2.0875735945911602e-06, + "loss": 0.1917, + "step": 17346 + }, + { + "epoch": 0.8, + "grad_norm": 1.372868635654554, + "learning_rate": 2.086663816126996e-06, + "loss": 0.4619, + "step": 17347 + }, + { + "epoch": 0.8, + "grad_norm": 0.5881613621774432, + "learning_rate": 2.0857542128564714e-06, + "loss": 0.3262, + "step": 17348 + }, + { + "epoch": 0.8, + "grad_norm": 0.31741885742638487, + "learning_rate": 2.0848447847997145e-06, + "loss": 0.2464, + "step": 17349 + }, + { + "epoch": 0.8, + "grad_norm": 0.4918403554399696, + "learning_rate": 2.0839355319768707e-06, + "loss": 0.341, + "step": 17350 + }, + { + "epoch": 0.8, + "grad_norm": 0.6901650884608159, + "learning_rate": 2.0830264544080647e-06, + "loss": 0.3278, + "step": 17351 + }, + { + "epoch": 0.8, + "grad_norm": 0.22056904682657288, + "learning_rate": 2.0821175521134208e-06, + "loss": 0.1516, + "step": 17352 + }, + { + "epoch": 0.8, + "grad_norm": 0.3831559226883887, + "learning_rate": 2.0812088251130613e-06, + "loss": 0.3066, + "step": 17353 + }, + { + "epoch": 0.8, + "grad_norm": 0.6284276953454587, + "learning_rate": 2.0803002734271073e-06, + "loss": 0.3071, + "step": 17354 + }, + { + "epoch": 0.8, + "grad_norm": 0.4204141780757606, + "learning_rate": 2.0793918970756744e-06, + "loss": 0.2975, + "step": 17355 + }, + { + "epoch": 0.8, + "grad_norm": 1.4002266808187778, + "learning_rate": 2.078483696078869e-06, + "loss": 0.4217, + "step": 17356 + }, + { + "epoch": 0.8, + "grad_norm": 0.29855785191741985, + "learning_rate": 2.0775756704568018e-06, + "loss": 0.2532, + "step": 17357 + }, + { + "epoch": 0.8, + "grad_norm": 0.2390048461833621, + "learning_rate": 2.0766678202295698e-06, + "loss": 0.1962, + "step": 17358 + }, + { + "epoch": 0.8, + "grad_norm": 0.8597073465455748, + "learning_rate": 2.075760145417277e-06, + "loss": 0.2887, + "step": 17359 + }, + { + "epoch": 0.8, + "grad_norm": 0.5543420747943056, + "learning_rate": 2.07485264604002e-06, + "loss": 0.306, + "step": 17360 + }, + { + "epoch": 0.8, + "grad_norm": 0.31285872491835925, + "learning_rate": 2.073945322117884e-06, + "loss": 0.2829, + "step": 17361 + }, + { + "epoch": 0.8, + "grad_norm": 0.475938418923415, + "learning_rate": 2.0730381736709583e-06, + "loss": 0.2773, + "step": 17362 + }, + { + "epoch": 0.8, + "grad_norm": 0.46531018788621603, + "learning_rate": 2.072131200719332e-06, + "loss": 0.2707, + "step": 17363 + }, + { + "epoch": 0.8, + "grad_norm": 0.2679988474653125, + "learning_rate": 2.0712244032830753e-06, + "loss": 0.1984, + "step": 17364 + }, + { + "epoch": 0.8, + "grad_norm": 0.3740904092582829, + "learning_rate": 2.070317781382272e-06, + "loss": 0.2638, + "step": 17365 + }, + { + "epoch": 0.8, + "grad_norm": 0.5904689367034499, + "learning_rate": 2.069411335036985e-06, + "loss": 0.3044, + "step": 17366 + }, + { + "epoch": 0.8, + "grad_norm": 0.4334117765879798, + "learning_rate": 2.068505064267292e-06, + "loss": 0.2765, + "step": 17367 + }, + { + "epoch": 0.8, + "grad_norm": 1.2341946595608295, + "learning_rate": 2.067598969093254e-06, + "loss": 0.6385, + "step": 17368 + }, + { + "epoch": 0.8, + "grad_norm": 0.3162363582527675, + "learning_rate": 2.0666930495349256e-06, + "loss": 0.2198, + "step": 17369 + }, + { + "epoch": 0.8, + "grad_norm": 0.22186722137650894, + "learning_rate": 2.065787305612367e-06, + "loss": 0.1868, + "step": 17370 + }, + { + "epoch": 0.8, + "grad_norm": 0.7838027176673482, + "learning_rate": 2.064881737345631e-06, + "loss": 0.3543, + "step": 17371 + }, + { + "epoch": 0.8, + "grad_norm": 0.6665989802906895, + "learning_rate": 2.063976344754768e-06, + "loss": 0.12, + "step": 17372 + }, + { + "epoch": 0.8, + "grad_norm": 0.26990211701147754, + "learning_rate": 2.0630711278598157e-06, + "loss": 0.2515, + "step": 17373 + }, + { + "epoch": 0.8, + "grad_norm": 1.2288354676713495, + "learning_rate": 2.0621660866808234e-06, + "loss": 0.7528, + "step": 17374 + }, + { + "epoch": 0.8, + "grad_norm": 0.7783017086036176, + "learning_rate": 2.06126122123782e-06, + "loss": 0.1582, + "step": 17375 + }, + { + "epoch": 0.8, + "grad_norm": 0.3634845520035248, + "learning_rate": 2.060356531550841e-06, + "loss": 0.228, + "step": 17376 + }, + { + "epoch": 0.8, + "grad_norm": 0.36361766080818464, + "learning_rate": 2.05945201763992e-06, + "loss": 0.3022, + "step": 17377 + }, + { + "epoch": 0.8, + "grad_norm": 0.335279076976714, + "learning_rate": 2.0585476795250746e-06, + "loss": 0.1209, + "step": 17378 + }, + { + "epoch": 0.8, + "grad_norm": 0.43699484879874745, + "learning_rate": 2.05764351722633e-06, + "loss": 0.3317, + "step": 17379 + }, + { + "epoch": 0.8, + "grad_norm": 1.5145912152108578, + "learning_rate": 2.056739530763705e-06, + "loss": 0.7775, + "step": 17380 + }, + { + "epoch": 0.8, + "grad_norm": 0.37731700057212597, + "learning_rate": 2.0558357201572087e-06, + "loss": 0.2797, + "step": 17381 + }, + { + "epoch": 0.8, + "grad_norm": 0.373771349704236, + "learning_rate": 2.054932085426856e-06, + "loss": 0.2007, + "step": 17382 + }, + { + "epoch": 0.8, + "grad_norm": 0.5366801705462236, + "learning_rate": 2.0540286265926436e-06, + "loss": 0.2819, + "step": 17383 + }, + { + "epoch": 0.8, + "grad_norm": 0.347940533028942, + "learning_rate": 2.0531253436745847e-06, + "loss": 0.185, + "step": 17384 + }, + { + "epoch": 0.8, + "grad_norm": 0.2778848135405373, + "learning_rate": 2.052222236692668e-06, + "loss": 0.2276, + "step": 17385 + }, + { + "epoch": 0.8, + "grad_norm": 1.3247673049408129, + "learning_rate": 2.0513193056668934e-06, + "loss": 0.7172, + "step": 17386 + }, + { + "epoch": 0.8, + "grad_norm": 1.031726448291546, + "learning_rate": 2.050416550617249e-06, + "loss": 0.4367, + "step": 17387 + }, + { + "epoch": 0.8, + "grad_norm": 0.35984121014359366, + "learning_rate": 2.049513971563715e-06, + "loss": 0.2136, + "step": 17388 + }, + { + "epoch": 0.8, + "grad_norm": 0.2813935644392902, + "learning_rate": 2.0486115685262842e-06, + "loss": 0.2473, + "step": 17389 + }, + { + "epoch": 0.8, + "grad_norm": 0.4305641805375185, + "learning_rate": 2.047709341524926e-06, + "loss": 0.2664, + "step": 17390 + }, + { + "epoch": 0.8, + "grad_norm": 0.3108062215438227, + "learning_rate": 2.046807290579622e-06, + "loss": 0.2031, + "step": 17391 + }, + { + "epoch": 0.8, + "grad_norm": 1.2247628670865973, + "learning_rate": 2.0459054157103363e-06, + "loss": 0.5286, + "step": 17392 + }, + { + "epoch": 0.8, + "grad_norm": 0.33863638603051954, + "learning_rate": 2.0450037169370385e-06, + "loss": 0.2765, + "step": 17393 + }, + { + "epoch": 0.8, + "grad_norm": 0.392985576819022, + "learning_rate": 2.0441021942796947e-06, + "loss": 0.2709, + "step": 17394 + }, + { + "epoch": 0.8, + "grad_norm": 1.1381327449050558, + "learning_rate": 2.0432008477582567e-06, + "loss": 0.2797, + "step": 17395 + }, + { + "epoch": 0.8, + "grad_norm": 0.2652105388769732, + "learning_rate": 2.0422996773926827e-06, + "loss": 0.198, + "step": 17396 + }, + { + "epoch": 0.8, + "grad_norm": 0.3169588578398246, + "learning_rate": 2.0413986832029275e-06, + "loss": 0.2733, + "step": 17397 + }, + { + "epoch": 0.8, + "grad_norm": 1.4296217784095149, + "learning_rate": 2.0404978652089325e-06, + "loss": 0.3216, + "step": 17398 + }, + { + "epoch": 0.8, + "grad_norm": 0.6051495106446253, + "learning_rate": 2.039597223430645e-06, + "loss": 0.3606, + "step": 17399 + }, + { + "epoch": 0.8, + "grad_norm": 0.35464213854329785, + "learning_rate": 2.038696757887998e-06, + "loss": 0.2389, + "step": 17400 + }, + { + "epoch": 0.8, + "grad_norm": 0.37505675196446625, + "learning_rate": 2.0377964686009365e-06, + "loss": 0.2499, + "step": 17401 + }, + { + "epoch": 0.8, + "grad_norm": 0.24735264295867063, + "learning_rate": 2.036896355589385e-06, + "loss": 0.1626, + "step": 17402 + }, + { + "epoch": 0.8, + "grad_norm": 0.379914912674075, + "learning_rate": 2.035996418873275e-06, + "loss": 0.2727, + "step": 17403 + }, + { + "epoch": 0.8, + "grad_norm": 0.4808952807181051, + "learning_rate": 2.0350966584725264e-06, + "loss": 0.2907, + "step": 17404 + }, + { + "epoch": 0.8, + "grad_norm": 0.48827431644018576, + "learning_rate": 2.0341970744070617e-06, + "loss": 0.3197, + "step": 17405 + }, + { + "epoch": 0.8, + "grad_norm": 0.3322675775688114, + "learning_rate": 2.0332976666967976e-06, + "loss": 0.25, + "step": 17406 + }, + { + "epoch": 0.8, + "grad_norm": 1.2767194385236558, + "learning_rate": 2.0323984353616434e-06, + "loss": 0.3931, + "step": 17407 + }, + { + "epoch": 0.8, + "grad_norm": 0.27123829868822, + "learning_rate": 2.03149938042151e-06, + "loss": 0.2006, + "step": 17408 + }, + { + "epoch": 0.8, + "grad_norm": 0.36092390400886815, + "learning_rate": 2.030600501896298e-06, + "loss": 0.2653, + "step": 17409 + }, + { + "epoch": 0.8, + "grad_norm": 0.5127604782881632, + "learning_rate": 2.02970179980591e-06, + "loss": 0.2776, + "step": 17410 + }, + { + "epoch": 0.8, + "grad_norm": 0.8898886575590601, + "learning_rate": 2.0288032741702458e-06, + "loss": 0.2824, + "step": 17411 + }, + { + "epoch": 0.8, + "grad_norm": 0.3654653765481926, + "learning_rate": 2.027904925009191e-06, + "loss": 0.2556, + "step": 17412 + }, + { + "epoch": 0.8, + "grad_norm": 0.3699120191868951, + "learning_rate": 2.0270067523426373e-06, + "loss": 0.2808, + "step": 17413 + }, + { + "epoch": 0.8, + "grad_norm": 0.8827602258936752, + "learning_rate": 2.0261087561904693e-06, + "loss": 0.225, + "step": 17414 + }, + { + "epoch": 0.8, + "grad_norm": 0.3577938529300041, + "learning_rate": 2.0252109365725714e-06, + "loss": 0.2651, + "step": 17415 + }, + { + "epoch": 0.8, + "grad_norm": 0.4758614845370199, + "learning_rate": 2.024313293508817e-06, + "loss": 0.3013, + "step": 17416 + }, + { + "epoch": 0.8, + "grad_norm": 0.324313946411488, + "learning_rate": 2.0234158270190763e-06, + "loss": 0.2702, + "step": 17417 + }, + { + "epoch": 0.8, + "grad_norm": 0.331918932437627, + "learning_rate": 2.0225185371232216e-06, + "loss": 0.1809, + "step": 17418 + }, + { + "epoch": 0.8, + "grad_norm": 1.4443944649547569, + "learning_rate": 2.021621423841117e-06, + "loss": 0.3842, + "step": 17419 + }, + { + "epoch": 0.8, + "grad_norm": 0.360850154943085, + "learning_rate": 2.020724487192628e-06, + "loss": 0.3013, + "step": 17420 + }, + { + "epoch": 0.8, + "grad_norm": 0.32511069378390905, + "learning_rate": 2.019827727197605e-06, + "loss": 0.1821, + "step": 17421 + }, + { + "epoch": 0.8, + "grad_norm": 0.833136595530928, + "learning_rate": 2.018931143875905e-06, + "loss": 0.3602, + "step": 17422 + }, + { + "epoch": 0.8, + "grad_norm": 0.3331160603570943, + "learning_rate": 2.0180347372473807e-06, + "loss": 0.1818, + "step": 17423 + }, + { + "epoch": 0.8, + "grad_norm": 0.3258139671351841, + "learning_rate": 2.0171385073318706e-06, + "loss": 0.191, + "step": 17424 + }, + { + "epoch": 0.8, + "grad_norm": 0.38792618992702615, + "learning_rate": 2.016242454149223e-06, + "loss": 0.2833, + "step": 17425 + }, + { + "epoch": 0.8, + "grad_norm": 1.5232113291413734, + "learning_rate": 2.0153465777192693e-06, + "loss": 0.6892, + "step": 17426 + }, + { + "epoch": 0.8, + "grad_norm": 0.33815351975224206, + "learning_rate": 2.0144508780618476e-06, + "loss": 0.2171, + "step": 17427 + }, + { + "epoch": 0.8, + "grad_norm": 0.4855658940058251, + "learning_rate": 2.013555355196789e-06, + "loss": 0.3486, + "step": 17428 + }, + { + "epoch": 0.8, + "grad_norm": 0.35448757872947984, + "learning_rate": 2.012660009143914e-06, + "loss": 0.2705, + "step": 17429 + }, + { + "epoch": 0.8, + "grad_norm": 0.2574363949041688, + "learning_rate": 2.0117648399230495e-06, + "loss": 0.1952, + "step": 17430 + }, + { + "epoch": 0.8, + "grad_norm": 1.408941787360122, + "learning_rate": 2.0108698475540113e-06, + "loss": 0.1934, + "step": 17431 + }, + { + "epoch": 0.8, + "grad_norm": 0.3607938086379944, + "learning_rate": 2.009975032056618e-06, + "loss": 0.3028, + "step": 17432 + }, + { + "epoch": 0.8, + "grad_norm": 0.32996958182908287, + "learning_rate": 2.0090803934506765e-06, + "loss": 0.2378, + "step": 17433 + }, + { + "epoch": 0.8, + "grad_norm": 0.8021500559513837, + "learning_rate": 2.0081859317559905e-06, + "loss": 0.2919, + "step": 17434 + }, + { + "epoch": 0.8, + "grad_norm": 0.3003686828398488, + "learning_rate": 2.0072916469923654e-06, + "loss": 0.1751, + "step": 17435 + }, + { + "epoch": 0.8, + "grad_norm": 0.47067385584566673, + "learning_rate": 2.0063975391796e-06, + "loss": 0.2813, + "step": 17436 + }, + { + "epoch": 0.8, + "grad_norm": 0.31775988447841064, + "learning_rate": 2.00550360833749e-06, + "loss": 0.2356, + "step": 17437 + }, + { + "epoch": 0.8, + "grad_norm": 0.7170778500232188, + "learning_rate": 2.004609854485824e-06, + "loss": 0.4203, + "step": 17438 + }, + { + "epoch": 0.8, + "grad_norm": 0.4580394069391533, + "learning_rate": 2.0037162776443884e-06, + "loss": 0.289, + "step": 17439 + }, + { + "epoch": 0.8, + "grad_norm": 0.3278380748749609, + "learning_rate": 2.0028228778329718e-06, + "loss": 0.2535, + "step": 17440 + }, + { + "epoch": 0.8, + "grad_norm": 1.3783771167464154, + "learning_rate": 2.001929655071345e-06, + "loss": 0.6034, + "step": 17441 + }, + { + "epoch": 0.8, + "grad_norm": 0.2414993943618676, + "learning_rate": 2.00103660937929e-06, + "loss": 0.1652, + "step": 17442 + }, + { + "epoch": 0.8, + "grad_norm": 0.6017706833867376, + "learning_rate": 2.00014374077657e-06, + "loss": 0.2956, + "step": 17443 + }, + { + "epoch": 0.8, + "grad_norm": 0.34581204942754123, + "learning_rate": 1.999251049282962e-06, + "loss": 0.2326, + "step": 17444 + }, + { + "epoch": 0.8, + "grad_norm": 0.3279163157451626, + "learning_rate": 1.9983585349182243e-06, + "loss": 0.2521, + "step": 17445 + }, + { + "epoch": 0.8, + "grad_norm": 0.8872247240986167, + "learning_rate": 1.997466197702115e-06, + "loss": 0.4435, + "step": 17446 + }, + { + "epoch": 0.8, + "grad_norm": 0.47384088010556785, + "learning_rate": 1.996574037654393e-06, + "loss": 0.112, + "step": 17447 + }, + { + "epoch": 0.8, + "grad_norm": 0.24271663892932163, + "learning_rate": 1.995682054794803e-06, + "loss": 0.2005, + "step": 17448 + }, + { + "epoch": 0.8, + "grad_norm": 0.3782285680227093, + "learning_rate": 1.9947902491431024e-06, + "loss": 0.2967, + "step": 17449 + }, + { + "epoch": 0.8, + "grad_norm": 0.8288543411104783, + "learning_rate": 1.9938986207190282e-06, + "loss": 0.2872, + "step": 17450 + }, + { + "epoch": 0.8, + "grad_norm": 0.3614207369776317, + "learning_rate": 1.9930071695423246e-06, + "loss": 0.2559, + "step": 17451 + }, + { + "epoch": 0.8, + "grad_norm": 0.5737329133688324, + "learning_rate": 1.9921158956327214e-06, + "loss": 0.3997, + "step": 17452 + }, + { + "epoch": 0.8, + "grad_norm": 0.40917583698030513, + "learning_rate": 1.9912247990099556e-06, + "loss": 0.1884, + "step": 17453 + }, + { + "epoch": 0.8, + "grad_norm": 0.305413311915621, + "learning_rate": 1.9903338796937556e-06, + "loss": 0.1515, + "step": 17454 + }, + { + "epoch": 0.8, + "grad_norm": 0.4203046627360346, + "learning_rate": 1.9894431377038417e-06, + "loss": 0.3039, + "step": 17455 + }, + { + "epoch": 0.8, + "grad_norm": 0.3919339890330523, + "learning_rate": 1.9885525730599353e-06, + "loss": 0.2932, + "step": 17456 + }, + { + "epoch": 0.8, + "grad_norm": 0.40935336394126803, + "learning_rate": 1.9876621857817568e-06, + "loss": 0.1971, + "step": 17457 + }, + { + "epoch": 0.8, + "grad_norm": 0.5710417856378557, + "learning_rate": 1.9867719758890113e-06, + "loss": 0.3906, + "step": 17458 + }, + { + "epoch": 0.8, + "grad_norm": 1.2540797655344296, + "learning_rate": 1.9858819434014154e-06, + "loss": 0.4473, + "step": 17459 + }, + { + "epoch": 0.8, + "grad_norm": 0.24701172875408506, + "learning_rate": 1.984992088338663e-06, + "loss": 0.1634, + "step": 17460 + }, + { + "epoch": 0.8, + "grad_norm": 0.29464905353549137, + "learning_rate": 1.9841024107204653e-06, + "loss": 0.2416, + "step": 17461 + }, + { + "epoch": 0.8, + "grad_norm": 0.7213230863925465, + "learning_rate": 1.9832129105665155e-06, + "loss": 0.3825, + "step": 17462 + }, + { + "epoch": 0.8, + "grad_norm": 0.4734300424148013, + "learning_rate": 1.982323587896502e-06, + "loss": 0.2009, + "step": 17463 + }, + { + "epoch": 0.8, + "grad_norm": 0.3063852316533994, + "learning_rate": 1.981434442730119e-06, + "loss": 0.2682, + "step": 17464 + }, + { + "epoch": 0.8, + "grad_norm": 1.410067136578374, + "learning_rate": 1.9805454750870447e-06, + "loss": 0.5576, + "step": 17465 + }, + { + "epoch": 0.8, + "grad_norm": 0.25363496060548246, + "learning_rate": 1.979656684986969e-06, + "loss": 0.1579, + "step": 17466 + }, + { + "epoch": 0.8, + "grad_norm": 0.5247362310008205, + "learning_rate": 1.9787680724495617e-06, + "loss": 0.3061, + "step": 17467 + }, + { + "epoch": 0.8, + "grad_norm": 0.36013017745065956, + "learning_rate": 1.977879637494502e-06, + "loss": 0.3104, + "step": 17468 + }, + { + "epoch": 0.8, + "grad_norm": 0.486336595394454, + "learning_rate": 1.976991380141451e-06, + "loss": 0.3004, + "step": 17469 + }, + { + "epoch": 0.8, + "grad_norm": 0.48765146042070706, + "learning_rate": 1.9761033004100793e-06, + "loss": 0.2682, + "step": 17470 + }, + { + "epoch": 0.8, + "grad_norm": 1.6648495767312834, + "learning_rate": 1.9752153983200483e-06, + "loss": 0.7302, + "step": 17471 + }, + { + "epoch": 0.8, + "grad_norm": 0.32426312479842956, + "learning_rate": 1.9743276738910124e-06, + "loss": 0.2519, + "step": 17472 + }, + { + "epoch": 0.8, + "grad_norm": 0.3655750512152428, + "learning_rate": 1.9734401271426264e-06, + "loss": 0.1953, + "step": 17473 + }, + { + "epoch": 0.8, + "grad_norm": 0.4601058516110566, + "learning_rate": 1.9725527580945423e-06, + "loss": 0.2617, + "step": 17474 + }, + { + "epoch": 0.8, + "grad_norm": 1.0275764714853401, + "learning_rate": 1.971665566766401e-06, + "loss": 0.5164, + "step": 17475 + }, + { + "epoch": 0.8, + "grad_norm": 0.25298315728783144, + "learning_rate": 1.970778553177849e-06, + "loss": 0.2182, + "step": 17476 + }, + { + "epoch": 0.8, + "grad_norm": 1.334227765110419, + "learning_rate": 1.9698917173485175e-06, + "loss": 0.5366, + "step": 17477 + }, + { + "epoch": 0.8, + "grad_norm": 0.6222915606895938, + "learning_rate": 1.9690050592980446e-06, + "loss": 0.3011, + "step": 17478 + }, + { + "epoch": 0.8, + "grad_norm": 0.3847951488804628, + "learning_rate": 1.96811857904606e-06, + "loss": 0.258, + "step": 17479 + }, + { + "epoch": 0.8, + "grad_norm": 0.26787077752122934, + "learning_rate": 1.96723227661219e-06, + "loss": 0.2351, + "step": 17480 + }, + { + "epoch": 0.8, + "grad_norm": 0.26408275735407316, + "learning_rate": 1.9663461520160566e-06, + "loss": 0.1623, + "step": 17481 + }, + { + "epoch": 0.8, + "grad_norm": 0.41770340117450855, + "learning_rate": 1.9654602052772708e-06, + "loss": 0.2906, + "step": 17482 + }, + { + "epoch": 0.8, + "grad_norm": 1.3825284046774293, + "learning_rate": 1.964574436415457e-06, + "loss": 0.245, + "step": 17483 + }, + { + "epoch": 0.8, + "grad_norm": 0.32738337319671307, + "learning_rate": 1.963688845450218e-06, + "loss": 0.2802, + "step": 17484 + }, + { + "epoch": 0.8, + "grad_norm": 0.40984477353611803, + "learning_rate": 1.9628034324011656e-06, + "loss": 0.3245, + "step": 17485 + }, + { + "epoch": 0.8, + "grad_norm": 0.3783802727193963, + "learning_rate": 1.9619181972878955e-06, + "loss": 0.1714, + "step": 17486 + }, + { + "epoch": 0.8, + "grad_norm": 0.45054050363413284, + "learning_rate": 1.9610331401300097e-06, + "loss": 0.2694, + "step": 17487 + }, + { + "epoch": 0.8, + "grad_norm": 0.376053589384803, + "learning_rate": 1.9601482609471055e-06, + "loss": 0.2703, + "step": 17488 + }, + { + "epoch": 0.8, + "grad_norm": 0.515977393730018, + "learning_rate": 1.9592635597587663e-06, + "loss": 0.2439, + "step": 17489 + }, + { + "epoch": 0.8, + "grad_norm": 0.5442247071199551, + "learning_rate": 1.9583790365845823e-06, + "loss": 0.2811, + "step": 17490 + }, + { + "epoch": 0.8, + "grad_norm": 0.45430634565116684, + "learning_rate": 1.9574946914441386e-06, + "loss": 0.3195, + "step": 17491 + }, + { + "epoch": 0.8, + "grad_norm": 0.3176632687542709, + "learning_rate": 1.956610524357009e-06, + "loss": 0.2037, + "step": 17492 + }, + { + "epoch": 0.8, + "grad_norm": 0.8302930529551902, + "learning_rate": 1.9557265353427713e-06, + "loss": 0.5089, + "step": 17493 + }, + { + "epoch": 0.8, + "grad_norm": 0.28472935579914227, + "learning_rate": 1.9548427244209935e-06, + "loss": 0.2203, + "step": 17494 + }, + { + "epoch": 0.8, + "grad_norm": 0.5076610014203942, + "learning_rate": 1.953959091611243e-06, + "loss": 0.3163, + "step": 17495 + }, + { + "epoch": 0.8, + "grad_norm": 0.45042996515250217, + "learning_rate": 1.953075636933084e-06, + "loss": 0.2383, + "step": 17496 + }, + { + "epoch": 0.8, + "grad_norm": 0.3935109041073421, + "learning_rate": 1.9521923604060764e-06, + "loss": 0.2746, + "step": 17497 + }, + { + "epoch": 0.8, + "grad_norm": 0.6177315686089517, + "learning_rate": 1.9513092620497744e-06, + "loss": 0.2441, + "step": 17498 + }, + { + "epoch": 0.8, + "grad_norm": 0.42099789110376307, + "learning_rate": 1.950426341883721e-06, + "loss": 0.2268, + "step": 17499 + }, + { + "epoch": 0.8, + "grad_norm": 0.24177242806990507, + "learning_rate": 1.949543599927477e-06, + "loss": 0.2421, + "step": 17500 + }, + { + "epoch": 0.8, + "grad_norm": 1.12207951305231, + "learning_rate": 1.9486610362005755e-06, + "loss": 0.4064, + "step": 17501 + }, + { + "epoch": 0.8, + "grad_norm": 0.5349777401284257, + "learning_rate": 1.9477786507225615e-06, + "loss": 0.2325, + "step": 17502 + }, + { + "epoch": 0.8, + "grad_norm": 0.40638855020972686, + "learning_rate": 1.9468964435129643e-06, + "loss": 0.2663, + "step": 17503 + }, + { + "epoch": 0.8, + "grad_norm": 0.39493515937639534, + "learning_rate": 1.9460144145913184e-06, + "loss": 0.3103, + "step": 17504 + }, + { + "epoch": 0.8, + "grad_norm": 0.3806628556932682, + "learning_rate": 1.9451325639771536e-06, + "loss": 0.2172, + "step": 17505 + }, + { + "epoch": 0.8, + "grad_norm": 0.29098101249067376, + "learning_rate": 1.944250891689987e-06, + "loss": 0.2235, + "step": 17506 + }, + { + "epoch": 0.8, + "grad_norm": 0.7035975919880475, + "learning_rate": 1.9433693977493452e-06, + "loss": 0.2585, + "step": 17507 + }, + { + "epoch": 0.8, + "grad_norm": 0.45294384337207344, + "learning_rate": 1.942488082174734e-06, + "loss": 0.2724, + "step": 17508 + }, + { + "epoch": 0.8, + "grad_norm": 0.40266647084085155, + "learning_rate": 1.9416069449856757e-06, + "loss": 0.2155, + "step": 17509 + }, + { + "epoch": 0.8, + "grad_norm": 1.2128396971873856, + "learning_rate": 1.9407259862016725e-06, + "loss": 0.5199, + "step": 17510 + }, + { + "epoch": 0.8, + "grad_norm": 0.51488299829181, + "learning_rate": 1.939845205842227e-06, + "loss": 0.3482, + "step": 17511 + }, + { + "epoch": 0.8, + "grad_norm": 0.28071829781234403, + "learning_rate": 1.9389646039268396e-06, + "loss": 0.1924, + "step": 17512 + }, + { + "epoch": 0.8, + "grad_norm": 0.5193625505869319, + "learning_rate": 1.9380841804750063e-06, + "loss": 0.2952, + "step": 17513 + }, + { + "epoch": 0.8, + "grad_norm": 0.4496699728681639, + "learning_rate": 1.9372039355062223e-06, + "loss": 0.2016, + "step": 17514 + }, + { + "epoch": 0.8, + "grad_norm": 0.4245173445923174, + "learning_rate": 1.936323869039969e-06, + "loss": 0.1986, + "step": 17515 + }, + { + "epoch": 0.8, + "grad_norm": 0.3981202066122882, + "learning_rate": 1.9354439810957324e-06, + "loss": 0.3008, + "step": 17516 + }, + { + "epoch": 0.8, + "grad_norm": 1.162600591650766, + "learning_rate": 1.934564271692998e-06, + "loss": 0.7564, + "step": 17517 + }, + { + "epoch": 0.8, + "grad_norm": 0.3538151150066641, + "learning_rate": 1.933684740851233e-06, + "loss": 0.213, + "step": 17518 + }, + { + "epoch": 0.8, + "grad_norm": 0.5533524651474537, + "learning_rate": 1.9328053885899165e-06, + "loss": 0.3407, + "step": 17519 + }, + { + "epoch": 0.8, + "grad_norm": 0.2790711227466672, + "learning_rate": 1.9319262149285113e-06, + "loss": 0.1817, + "step": 17520 + }, + { + "epoch": 0.8, + "grad_norm": 0.3924844737130819, + "learning_rate": 1.9310472198864828e-06, + "loss": 0.2763, + "step": 17521 + }, + { + "epoch": 0.8, + "grad_norm": 1.075692128855626, + "learning_rate": 1.9301684034832946e-06, + "loss": 0.3564, + "step": 17522 + }, + { + "epoch": 0.8, + "grad_norm": 0.35324513382275907, + "learning_rate": 1.929289765738398e-06, + "loss": 0.3012, + "step": 17523 + }, + { + "epoch": 0.81, + "grad_norm": 0.4342231180145792, + "learning_rate": 1.9284113066712496e-06, + "loss": 0.2994, + "step": 17524 + }, + { + "epoch": 0.81, + "grad_norm": 0.7615587218670656, + "learning_rate": 1.9275330263012904e-06, + "loss": 0.2774, + "step": 17525 + }, + { + "epoch": 0.81, + "grad_norm": 0.390166261679381, + "learning_rate": 1.9266549246479748e-06, + "loss": 0.2073, + "step": 17526 + }, + { + "epoch": 0.81, + "grad_norm": 0.3502451486351875, + "learning_rate": 1.9257770017307376e-06, + "loss": 0.2613, + "step": 17527 + }, + { + "epoch": 0.81, + "grad_norm": 0.3449869514597588, + "learning_rate": 1.924899257569014e-06, + "loss": 0.2539, + "step": 17528 + }, + { + "epoch": 0.81, + "grad_norm": 0.8325282952862263, + "learning_rate": 1.9240216921822362e-06, + "loss": 0.4312, + "step": 17529 + }, + { + "epoch": 0.81, + "grad_norm": 0.32747449145830604, + "learning_rate": 1.9231443055898356e-06, + "loss": 0.2687, + "step": 17530 + }, + { + "epoch": 0.81, + "grad_norm": 0.3639249307050056, + "learning_rate": 1.922267097811238e-06, + "loss": 0.2618, + "step": 17531 + }, + { + "epoch": 0.81, + "grad_norm": 0.2998660442774641, + "learning_rate": 1.9213900688658594e-06, + "loss": 0.1679, + "step": 17532 + }, + { + "epoch": 0.81, + "grad_norm": 0.34227602463217105, + "learning_rate": 1.920513218773117e-06, + "loss": 0.2561, + "step": 17533 + }, + { + "epoch": 0.81, + "grad_norm": 1.358261670695232, + "learning_rate": 1.919636547552428e-06, + "loss": 0.5247, + "step": 17534 + }, + { + "epoch": 0.81, + "grad_norm": 0.3524418163740295, + "learning_rate": 1.9187600552231955e-06, + "loss": 0.2626, + "step": 17535 + }, + { + "epoch": 0.81, + "grad_norm": 0.3418491359690452, + "learning_rate": 1.917883741804829e-06, + "loss": 0.2628, + "step": 17536 + }, + { + "epoch": 0.81, + "grad_norm": 1.0174230888653388, + "learning_rate": 1.9170076073167245e-06, + "loss": 0.5034, + "step": 17537 + }, + { + "epoch": 0.81, + "grad_norm": 0.27207880604318396, + "learning_rate": 1.9161316517782813e-06, + "loss": 0.0752, + "step": 17538 + }, + { + "epoch": 0.81, + "grad_norm": 0.35416998769814395, + "learning_rate": 1.9152558752088947e-06, + "loss": 0.2722, + "step": 17539 + }, + { + "epoch": 0.81, + "grad_norm": 0.4158770867296729, + "learning_rate": 1.9143802776279476e-06, + "loss": 0.3069, + "step": 17540 + }, + { + "epoch": 0.81, + "grad_norm": 0.5285166259902728, + "learning_rate": 1.913504859054831e-06, + "loss": 0.2643, + "step": 17541 + }, + { + "epoch": 0.81, + "grad_norm": 0.36326564101602266, + "learning_rate": 1.9126296195089165e-06, + "loss": 0.2767, + "step": 17542 + }, + { + "epoch": 0.81, + "grad_norm": 0.4920461312583416, + "learning_rate": 1.9117545590095944e-06, + "loss": 0.357, + "step": 17543 + }, + { + "epoch": 0.81, + "grad_norm": 0.2610761530461123, + "learning_rate": 1.9108796775762285e-06, + "loss": 0.1854, + "step": 17544 + }, + { + "epoch": 0.81, + "grad_norm": 0.31279081033228084, + "learning_rate": 1.9100049752281914e-06, + "loss": 0.1837, + "step": 17545 + }, + { + "epoch": 0.81, + "grad_norm": 0.8053603385662662, + "learning_rate": 1.9091304519848453e-06, + "loss": 0.3883, + "step": 17546 + }, + { + "epoch": 0.81, + "grad_norm": 0.37222084607605005, + "learning_rate": 1.9082561078655513e-06, + "loss": 0.2812, + "step": 17547 + }, + { + "epoch": 0.81, + "grad_norm": 0.31919839224868163, + "learning_rate": 1.9073819428896722e-06, + "loss": 0.2156, + "step": 17548 + }, + { + "epoch": 0.81, + "grad_norm": 1.199841510371724, + "learning_rate": 1.9065079570765542e-06, + "loss": 0.5346, + "step": 17549 + }, + { + "epoch": 0.81, + "grad_norm": 0.26354155349447855, + "learning_rate": 1.90563415044555e-06, + "loss": 0.1439, + "step": 17550 + }, + { + "epoch": 0.81, + "grad_norm": 0.24554603082839396, + "learning_rate": 1.904760523016006e-06, + "loss": 0.1959, + "step": 17551 + }, + { + "epoch": 0.81, + "grad_norm": 0.7324894732626335, + "learning_rate": 1.903887074807259e-06, + "loss": 0.3096, + "step": 17552 + }, + { + "epoch": 0.81, + "grad_norm": 0.6413775070583715, + "learning_rate": 1.9030138058386526e-06, + "loss": 0.3716, + "step": 17553 + }, + { + "epoch": 0.81, + "grad_norm": 0.3089041575235801, + "learning_rate": 1.9021407161295135e-06, + "loss": 0.2039, + "step": 17554 + }, + { + "epoch": 0.81, + "grad_norm": 0.5485958338362673, + "learning_rate": 1.901267805699174e-06, + "loss": 0.3183, + "step": 17555 + }, + { + "epoch": 0.81, + "grad_norm": 0.4637767631811345, + "learning_rate": 1.900395074566962e-06, + "loss": 0.2561, + "step": 17556 + }, + { + "epoch": 0.81, + "grad_norm": 0.2489765597238874, + "learning_rate": 1.899522522752194e-06, + "loss": 0.2098, + "step": 17557 + }, + { + "epoch": 0.81, + "grad_norm": 0.6925201621952362, + "learning_rate": 1.8986501502741928e-06, + "loss": 0.2784, + "step": 17558 + }, + { + "epoch": 0.81, + "grad_norm": 0.3813530178695753, + "learning_rate": 1.8977779571522648e-06, + "loss": 0.2902, + "step": 17559 + }, + { + "epoch": 0.81, + "grad_norm": 0.6325370494304092, + "learning_rate": 1.896905943405728e-06, + "loss": 0.3512, + "step": 17560 + }, + { + "epoch": 0.81, + "grad_norm": 0.4301766405134946, + "learning_rate": 1.8960341090538813e-06, + "loss": 0.2409, + "step": 17561 + }, + { + "epoch": 0.81, + "grad_norm": 0.27525747680230406, + "learning_rate": 1.8951624541160306e-06, + "loss": 0.2114, + "step": 17562 + }, + { + "epoch": 0.81, + "grad_norm": 0.40946861749814945, + "learning_rate": 1.8942909786114704e-06, + "loss": 0.2265, + "step": 17563 + }, + { + "epoch": 0.81, + "grad_norm": 0.3832965487754755, + "learning_rate": 1.8934196825594943e-06, + "loss": 0.2658, + "step": 17564 + }, + { + "epoch": 0.81, + "grad_norm": 1.3318345898818984, + "learning_rate": 1.8925485659793962e-06, + "loss": 0.7466, + "step": 17565 + }, + { + "epoch": 0.81, + "grad_norm": 0.4120415828291118, + "learning_rate": 1.8916776288904571e-06, + "loss": 0.2684, + "step": 17566 + }, + { + "epoch": 0.81, + "grad_norm": 0.33026009982688637, + "learning_rate": 1.890806871311962e-06, + "loss": 0.2466, + "step": 17567 + }, + { + "epoch": 0.81, + "grad_norm": 1.6850958213452547, + "learning_rate": 1.889936293263185e-06, + "loss": 0.4766, + "step": 17568 + }, + { + "epoch": 0.81, + "grad_norm": 0.3117092275357189, + "learning_rate": 1.8890658947634011e-06, + "loss": 0.221, + "step": 17569 + }, + { + "epoch": 0.81, + "grad_norm": 0.49985617177540004, + "learning_rate": 1.8881956758318843e-06, + "loss": 0.3137, + "step": 17570 + }, + { + "epoch": 0.81, + "grad_norm": 0.2806918368869582, + "learning_rate": 1.8873256364878933e-06, + "loss": 0.202, + "step": 17571 + }, + { + "epoch": 0.81, + "grad_norm": 0.3564302915869451, + "learning_rate": 1.8864557767506952e-06, + "loss": 0.2565, + "step": 17572 + }, + { + "epoch": 0.81, + "grad_norm": 1.2460651118824557, + "learning_rate": 1.8855860966395446e-06, + "loss": 0.6823, + "step": 17573 + }, + { + "epoch": 0.81, + "grad_norm": 0.4614061719103919, + "learning_rate": 1.8847165961737013e-06, + "loss": 0.2423, + "step": 17574 + }, + { + "epoch": 0.81, + "grad_norm": 0.3242832224904078, + "learning_rate": 1.883847275372409e-06, + "loss": 0.2608, + "step": 17575 + }, + { + "epoch": 0.81, + "grad_norm": 0.509538939701538, + "learning_rate": 1.8829781342549126e-06, + "loss": 0.3456, + "step": 17576 + }, + { + "epoch": 0.81, + "grad_norm": 0.1856604763509889, + "learning_rate": 1.8821091728404606e-06, + "loss": 0.1071, + "step": 17577 + }, + { + "epoch": 0.81, + "grad_norm": 0.5731658151146986, + "learning_rate": 1.8812403911482858e-06, + "loss": 0.3192, + "step": 17578 + }, + { + "epoch": 0.81, + "grad_norm": 0.2729378209838868, + "learning_rate": 1.8803717891976258e-06, + "loss": 0.2697, + "step": 17579 + }, + { + "epoch": 0.81, + "grad_norm": 0.8600119752296859, + "learning_rate": 1.8795033670077057e-06, + "loss": 0.2674, + "step": 17580 + }, + { + "epoch": 0.81, + "grad_norm": 0.617089758974944, + "learning_rate": 1.8786351245977542e-06, + "loss": 0.3226, + "step": 17581 + }, + { + "epoch": 0.81, + "grad_norm": 0.3955971868870838, + "learning_rate": 1.877767061986997e-06, + "loss": 0.2409, + "step": 17582 + }, + { + "epoch": 0.81, + "grad_norm": 0.2772408848818341, + "learning_rate": 1.8768991791946455e-06, + "loss": 0.2425, + "step": 17583 + }, + { + "epoch": 0.81, + "grad_norm": 0.3581113003990737, + "learning_rate": 1.8760314762399201e-06, + "loss": 0.1867, + "step": 17584 + }, + { + "epoch": 0.81, + "grad_norm": 0.6032035538232708, + "learning_rate": 1.8751639531420253e-06, + "loss": 0.3695, + "step": 17585 + }, + { + "epoch": 0.81, + "grad_norm": 0.7395764529522229, + "learning_rate": 1.8742966099201699e-06, + "loss": 0.3641, + "step": 17586 + }, + { + "epoch": 0.81, + "grad_norm": 0.28398213665811234, + "learning_rate": 1.8734294465935577e-06, + "loss": 0.2238, + "step": 17587 + }, + { + "epoch": 0.81, + "grad_norm": 0.5730798864434758, + "learning_rate": 1.8725624631813832e-06, + "loss": 0.3429, + "step": 17588 + }, + { + "epoch": 0.81, + "grad_norm": 0.4748127894089518, + "learning_rate": 1.8716956597028424e-06, + "loss": 0.2316, + "step": 17589 + }, + { + "epoch": 0.81, + "grad_norm": 0.26270059850402633, + "learning_rate": 1.8708290361771252e-06, + "loss": 0.1537, + "step": 17590 + }, + { + "epoch": 0.81, + "grad_norm": 0.320854453730702, + "learning_rate": 1.869962592623421e-06, + "loss": 0.2716, + "step": 17591 + }, + { + "epoch": 0.81, + "grad_norm": 0.695734926807146, + "learning_rate": 1.8690963290609088e-06, + "loss": 0.3732, + "step": 17592 + }, + { + "epoch": 0.81, + "grad_norm": 0.38804762656312597, + "learning_rate": 1.868230245508762e-06, + "loss": 0.1967, + "step": 17593 + }, + { + "epoch": 0.81, + "grad_norm": 0.7283062085230795, + "learning_rate": 1.8673643419861664e-06, + "loss": 0.3575, + "step": 17594 + }, + { + "epoch": 0.81, + "grad_norm": 0.3021653377429519, + "learning_rate": 1.8664986185122825e-06, + "loss": 0.2559, + "step": 17595 + }, + { + "epoch": 0.81, + "grad_norm": 0.8650064720085949, + "learning_rate": 1.8656330751062823e-06, + "loss": 0.494, + "step": 17596 + }, + { + "epoch": 0.81, + "grad_norm": 0.28306597193737143, + "learning_rate": 1.864767711787323e-06, + "loss": 0.1834, + "step": 17597 + }, + { + "epoch": 0.81, + "grad_norm": 0.537904614011536, + "learning_rate": 1.863902528574566e-06, + "loss": 0.3352, + "step": 17598 + }, + { + "epoch": 0.81, + "grad_norm": 0.4699291854499806, + "learning_rate": 1.8630375254871679e-06, + "loss": 0.3007, + "step": 17599 + }, + { + "epoch": 0.81, + "grad_norm": 0.3759686947192036, + "learning_rate": 1.8621727025442748e-06, + "loss": 0.2047, + "step": 17600 + }, + { + "epoch": 0.81, + "grad_norm": 1.375949633678169, + "learning_rate": 1.8613080597650368e-06, + "loss": 0.6975, + "step": 17601 + }, + { + "epoch": 0.81, + "grad_norm": 0.32977058949190263, + "learning_rate": 1.8604435971685908e-06, + "loss": 0.2261, + "step": 17602 + }, + { + "epoch": 0.81, + "grad_norm": 0.24327225669155564, + "learning_rate": 1.8595793147740794e-06, + "loss": 0.1926, + "step": 17603 + }, + { + "epoch": 0.81, + "grad_norm": 0.6520274095589002, + "learning_rate": 1.8587152126006391e-06, + "loss": 0.3807, + "step": 17604 + }, + { + "epoch": 0.81, + "grad_norm": 0.8124243394893229, + "learning_rate": 1.857851290667394e-06, + "loss": 0.281, + "step": 17605 + }, + { + "epoch": 0.81, + "grad_norm": 0.3862330682042005, + "learning_rate": 1.856987548993474e-06, + "loss": 0.2092, + "step": 17606 + }, + { + "epoch": 0.81, + "grad_norm": 0.40002263327388365, + "learning_rate": 1.8561239875980008e-06, + "loss": 0.3314, + "step": 17607 + }, + { + "epoch": 0.81, + "grad_norm": 0.5813189470888308, + "learning_rate": 1.8552606065000966e-06, + "loss": 0.356, + "step": 17608 + }, + { + "epoch": 0.81, + "grad_norm": 0.4173438402334249, + "learning_rate": 1.8543974057188697e-06, + "loss": 0.3019, + "step": 17609 + }, + { + "epoch": 0.81, + "grad_norm": 0.30681560467746144, + "learning_rate": 1.8535343852734333e-06, + "loss": 0.1892, + "step": 17610 + }, + { + "epoch": 0.81, + "grad_norm": 0.4057348371811246, + "learning_rate": 1.852671545182897e-06, + "loss": 0.2977, + "step": 17611 + }, + { + "epoch": 0.81, + "grad_norm": 0.4447448855177972, + "learning_rate": 1.8518088854663574e-06, + "loss": 0.28, + "step": 17612 + }, + { + "epoch": 0.81, + "grad_norm": 1.1981182192921929, + "learning_rate": 1.8509464061429183e-06, + "loss": 0.3597, + "step": 17613 + }, + { + "epoch": 0.81, + "grad_norm": 0.6079404410432365, + "learning_rate": 1.850084107231669e-06, + "loss": 0.3618, + "step": 17614 + }, + { + "epoch": 0.81, + "grad_norm": 0.27985795859004264, + "learning_rate": 1.8492219887517027e-06, + "loss": 0.2536, + "step": 17615 + }, + { + "epoch": 0.81, + "grad_norm": 0.2781975370107386, + "learning_rate": 1.8483600507221077e-06, + "loss": 0.1385, + "step": 17616 + }, + { + "epoch": 0.81, + "grad_norm": 0.9998037920830309, + "learning_rate": 1.8474982931619622e-06, + "loss": 0.4669, + "step": 17617 + }, + { + "epoch": 0.81, + "grad_norm": 0.3767826015994102, + "learning_rate": 1.846636716090351e-06, + "loss": 0.2596, + "step": 17618 + }, + { + "epoch": 0.81, + "grad_norm": 0.33381330252185515, + "learning_rate": 1.8457753195263373e-06, + "loss": 0.2577, + "step": 17619 + }, + { + "epoch": 0.81, + "grad_norm": 0.6359837568264348, + "learning_rate": 1.844914103489005e-06, + "loss": 0.3563, + "step": 17620 + }, + { + "epoch": 0.81, + "grad_norm": 0.4094552905805184, + "learning_rate": 1.8440530679974145e-06, + "loss": 0.2718, + "step": 17621 + }, + { + "epoch": 0.81, + "grad_norm": 0.42374139769724245, + "learning_rate": 1.8431922130706258e-06, + "loss": 0.2056, + "step": 17622 + }, + { + "epoch": 0.81, + "grad_norm": 0.2664617510745276, + "learning_rate": 1.8423315387276997e-06, + "loss": 0.1902, + "step": 17623 + }, + { + "epoch": 0.81, + "grad_norm": 0.4051462342640805, + "learning_rate": 1.8414710449876915e-06, + "loss": 0.2704, + "step": 17624 + }, + { + "epoch": 0.81, + "grad_norm": 1.2899799686356217, + "learning_rate": 1.840610731869653e-06, + "loss": 0.7544, + "step": 17625 + }, + { + "epoch": 0.81, + "grad_norm": 0.43095812949829815, + "learning_rate": 1.8397505993926256e-06, + "loss": 0.2308, + "step": 17626 + }, + { + "epoch": 0.81, + "grad_norm": 0.332475645188263, + "learning_rate": 1.8388906475756586e-06, + "loss": 0.2745, + "step": 17627 + }, + { + "epoch": 0.81, + "grad_norm": 0.5786262372291936, + "learning_rate": 1.8380308764377841e-06, + "loss": 0.2808, + "step": 17628 + }, + { + "epoch": 0.81, + "grad_norm": 0.3113781234420406, + "learning_rate": 1.8371712859980395e-06, + "loss": 0.0813, + "step": 17629 + }, + { + "epoch": 0.81, + "grad_norm": 0.43830813118130924, + "learning_rate": 1.8363118762754572e-06, + "loss": 0.3035, + "step": 17630 + }, + { + "epoch": 0.81, + "grad_norm": 0.3770260451958353, + "learning_rate": 1.8354526472890588e-06, + "loss": 0.2968, + "step": 17631 + }, + { + "epoch": 0.81, + "grad_norm": 0.766503638281388, + "learning_rate": 1.8345935990578711e-06, + "loss": 0.2904, + "step": 17632 + }, + { + "epoch": 0.81, + "grad_norm": 0.34067391108770506, + "learning_rate": 1.8337347316009125e-06, + "loss": 0.2491, + "step": 17633 + }, + { + "epoch": 0.81, + "grad_norm": 0.38789054286669283, + "learning_rate": 1.832876044937194e-06, + "loss": 0.2791, + "step": 17634 + }, + { + "epoch": 0.81, + "grad_norm": 0.31904860438619576, + "learning_rate": 1.832017539085731e-06, + "loss": 0.1525, + "step": 17635 + }, + { + "epoch": 0.81, + "grad_norm": 0.39947217155886117, + "learning_rate": 1.831159214065522e-06, + "loss": 0.1945, + "step": 17636 + }, + { + "epoch": 0.81, + "grad_norm": 0.7922670148544848, + "learning_rate": 1.8303010698955803e-06, + "loss": 0.4028, + "step": 17637 + }, + { + "epoch": 0.81, + "grad_norm": 0.3877451575425467, + "learning_rate": 1.829443106594896e-06, + "loss": 0.3004, + "step": 17638 + }, + { + "epoch": 0.81, + "grad_norm": 0.3293443173643252, + "learning_rate": 1.8285853241824692e-06, + "loss": 0.2045, + "step": 17639 + }, + { + "epoch": 0.81, + "grad_norm": 1.4909580145794479, + "learning_rate": 1.8277277226772849e-06, + "loss": 0.6504, + "step": 17640 + }, + { + "epoch": 0.81, + "grad_norm": 0.3411677281238788, + "learning_rate": 1.8268703020983326e-06, + "loss": 0.1877, + "step": 17641 + }, + { + "epoch": 0.81, + "grad_norm": 0.2836506165340174, + "learning_rate": 1.8260130624645956e-06, + "loss": 0.2143, + "step": 17642 + }, + { + "epoch": 0.81, + "grad_norm": 0.4737190555143232, + "learning_rate": 1.82515600379505e-06, + "loss": 0.3337, + "step": 17643 + }, + { + "epoch": 0.81, + "grad_norm": 0.7701869782744053, + "learning_rate": 1.824299126108674e-06, + "loss": 0.4217, + "step": 17644 + }, + { + "epoch": 0.81, + "grad_norm": 0.3183819996858973, + "learning_rate": 1.8234424294244324e-06, + "loss": 0.1866, + "step": 17645 + }, + { + "epoch": 0.81, + "grad_norm": 0.3906354973821418, + "learning_rate": 1.8225859137612945e-06, + "loss": 0.2953, + "step": 17646 + }, + { + "epoch": 0.81, + "grad_norm": 0.2805326123236495, + "learning_rate": 1.8217295791382261e-06, + "loss": 0.1528, + "step": 17647 + }, + { + "epoch": 0.81, + "grad_norm": 0.4352743875620717, + "learning_rate": 1.82087342557418e-06, + "loss": 0.2808, + "step": 17648 + }, + { + "epoch": 0.81, + "grad_norm": 0.7790169323226602, + "learning_rate": 1.8200174530881133e-06, + "loss": 0.3017, + "step": 17649 + }, + { + "epoch": 0.81, + "grad_norm": 0.3379083772434371, + "learning_rate": 1.8191616616989782e-06, + "loss": 0.2896, + "step": 17650 + }, + { + "epoch": 0.81, + "grad_norm": 0.3211337965088575, + "learning_rate": 1.8183060514257167e-06, + "loss": 0.2487, + "step": 17651 + }, + { + "epoch": 0.81, + "grad_norm": 1.5420266416958888, + "learning_rate": 1.8174506222872767e-06, + "loss": 0.2489, + "step": 17652 + }, + { + "epoch": 0.81, + "grad_norm": 0.2655946380497727, + "learning_rate": 1.8165953743025878e-06, + "loss": 0.1493, + "step": 17653 + }, + { + "epoch": 0.81, + "grad_norm": 0.332874780506257, + "learning_rate": 1.8157403074905956e-06, + "loss": 0.2723, + "step": 17654 + }, + { + "epoch": 0.81, + "grad_norm": 0.362002043392259, + "learning_rate": 1.8148854218702217e-06, + "loss": 0.2535, + "step": 17655 + }, + { + "epoch": 0.81, + "grad_norm": 1.2143009580367556, + "learning_rate": 1.8140307174603989e-06, + "loss": 0.7832, + "step": 17656 + }, + { + "epoch": 0.81, + "grad_norm": 0.33763140540346065, + "learning_rate": 1.8131761942800453e-06, + "loss": 0.2583, + "step": 17657 + }, + { + "epoch": 0.81, + "grad_norm": 0.5831521119803689, + "learning_rate": 1.8123218523480758e-06, + "loss": 0.3685, + "step": 17658 + }, + { + "epoch": 0.81, + "grad_norm": 0.27092322695470505, + "learning_rate": 1.8114676916834139e-06, + "loss": 0.1438, + "step": 17659 + }, + { + "epoch": 0.81, + "grad_norm": 0.35463327345565193, + "learning_rate": 1.8106137123049628e-06, + "loss": 0.263, + "step": 17660 + }, + { + "epoch": 0.81, + "grad_norm": 0.8584246436877984, + "learning_rate": 1.8097599142316335e-06, + "loss": 0.388, + "step": 17661 + }, + { + "epoch": 0.81, + "grad_norm": 0.3678811728474368, + "learning_rate": 1.8089062974823235e-06, + "loss": 0.2694, + "step": 17662 + }, + { + "epoch": 0.81, + "grad_norm": 0.44958617200454826, + "learning_rate": 1.808052862075933e-06, + "loss": 0.2798, + "step": 17663 + }, + { + "epoch": 0.81, + "grad_norm": 0.6386959312097973, + "learning_rate": 1.8071996080313602e-06, + "loss": 0.37, + "step": 17664 + }, + { + "epoch": 0.81, + "grad_norm": 0.5092444346293096, + "learning_rate": 1.806346535367488e-06, + "loss": 0.2577, + "step": 17665 + }, + { + "epoch": 0.81, + "grad_norm": 0.41635235272326143, + "learning_rate": 1.8054936441032067e-06, + "loss": 0.2653, + "step": 17666 + }, + { + "epoch": 0.81, + "grad_norm": 0.2507749002943778, + "learning_rate": 1.8046409342574011e-06, + "loss": 0.2176, + "step": 17667 + }, + { + "epoch": 0.81, + "grad_norm": 0.9539739279532073, + "learning_rate": 1.803788405848944e-06, + "loss": 0.5225, + "step": 17668 + }, + { + "epoch": 0.81, + "grad_norm": 0.36349225309323085, + "learning_rate": 1.8029360588967138e-06, + "loss": 0.2642, + "step": 17669 + }, + { + "epoch": 0.81, + "grad_norm": 0.41191893278888075, + "learning_rate": 1.802083893419574e-06, + "loss": 0.2988, + "step": 17670 + }, + { + "epoch": 0.81, + "grad_norm": 1.0710021315237246, + "learning_rate": 1.8012319094364005e-06, + "loss": 0.4873, + "step": 17671 + }, + { + "epoch": 0.81, + "grad_norm": 0.3678836032807035, + "learning_rate": 1.8003801069660487e-06, + "loss": 0.2172, + "step": 17672 + }, + { + "epoch": 0.81, + "grad_norm": 0.49340961338601536, + "learning_rate": 1.7995284860273798e-06, + "loss": 0.2462, + "step": 17673 + }, + { + "epoch": 0.81, + "grad_norm": 0.3792381083854042, + "learning_rate": 1.7986770466392445e-06, + "loss": 0.2787, + "step": 17674 + }, + { + "epoch": 0.81, + "grad_norm": 0.2504452864294809, + "learning_rate": 1.7978257888204953e-06, + "loss": 0.1634, + "step": 17675 + }, + { + "epoch": 0.81, + "grad_norm": 1.3514811993508609, + "learning_rate": 1.7969747125899795e-06, + "loss": 0.5379, + "step": 17676 + }, + { + "epoch": 0.81, + "grad_norm": 0.7561772260861458, + "learning_rate": 1.7961238179665353e-06, + "loss": 0.3655, + "step": 17677 + }, + { + "epoch": 0.81, + "grad_norm": 0.2431029040003532, + "learning_rate": 1.7952731049690053e-06, + "loss": 0.2137, + "step": 17678 + }, + { + "epoch": 0.81, + "grad_norm": 0.46466035807893097, + "learning_rate": 1.7944225736162192e-06, + "loss": 0.241, + "step": 17679 + }, + { + "epoch": 0.81, + "grad_norm": 0.542753328383535, + "learning_rate": 1.793572223927007e-06, + "loss": 0.3495, + "step": 17680 + }, + { + "epoch": 0.81, + "grad_norm": 0.3700206169260214, + "learning_rate": 1.7927220559201997e-06, + "loss": 0.1599, + "step": 17681 + }, + { + "epoch": 0.81, + "grad_norm": 0.34075565819934583, + "learning_rate": 1.791872069614613e-06, + "loss": 0.2592, + "step": 17682 + }, + { + "epoch": 0.81, + "grad_norm": 0.7511123225351447, + "learning_rate": 1.7910222650290688e-06, + "loss": 0.3698, + "step": 17683 + }, + { + "epoch": 0.81, + "grad_norm": 0.6765649088301964, + "learning_rate": 1.7901726421823784e-06, + "loss": 0.2942, + "step": 17684 + }, + { + "epoch": 0.81, + "grad_norm": 0.38042974452598977, + "learning_rate": 1.789323201093356e-06, + "loss": 0.2388, + "step": 17685 + }, + { + "epoch": 0.81, + "grad_norm": 0.39006188683304643, + "learning_rate": 1.788473941780804e-06, + "loss": 0.3299, + "step": 17686 + }, + { + "epoch": 0.81, + "grad_norm": 0.2374283900611283, + "learning_rate": 1.787624864263522e-06, + "loss": 0.1398, + "step": 17687 + }, + { + "epoch": 0.81, + "grad_norm": 0.47006743557224434, + "learning_rate": 1.7867759685603115e-06, + "loss": 0.2423, + "step": 17688 + }, + { + "epoch": 0.81, + "grad_norm": 0.8669306379113703, + "learning_rate": 1.785927254689963e-06, + "loss": 0.3892, + "step": 17689 + }, + { + "epoch": 0.81, + "grad_norm": 0.29801453495126184, + "learning_rate": 1.785078722671273e-06, + "loss": 0.2685, + "step": 17690 + }, + { + "epoch": 0.81, + "grad_norm": 0.5950983629138429, + "learning_rate": 1.7842303725230181e-06, + "loss": 0.2448, + "step": 17691 + }, + { + "epoch": 0.81, + "grad_norm": 0.4139756080363069, + "learning_rate": 1.7833822042639848e-06, + "loss": 0.2329, + "step": 17692 + }, + { + "epoch": 0.81, + "grad_norm": 0.40393589143278463, + "learning_rate": 1.7825342179129535e-06, + "loss": 0.2232, + "step": 17693 + }, + { + "epoch": 0.81, + "grad_norm": 0.3186701093334031, + "learning_rate": 1.7816864134886914e-06, + "loss": 0.233, + "step": 17694 + }, + { + "epoch": 0.81, + "grad_norm": 0.822582450428089, + "learning_rate": 1.7808387910099733e-06, + "loss": 0.3864, + "step": 17695 + }, + { + "epoch": 0.81, + "grad_norm": 0.4742008085026481, + "learning_rate": 1.7799913504955614e-06, + "loss": 0.2775, + "step": 17696 + }, + { + "epoch": 0.81, + "grad_norm": 0.6102588593108411, + "learning_rate": 1.7791440919642178e-06, + "loss": 0.3671, + "step": 17697 + }, + { + "epoch": 0.81, + "grad_norm": 0.3570115560508427, + "learning_rate": 1.7782970154347025e-06, + "loss": 0.2452, + "step": 17698 + }, + { + "epoch": 0.81, + "grad_norm": 0.28491422470308564, + "learning_rate": 1.7774501209257655e-06, + "loss": 0.1967, + "step": 17699 + }, + { + "epoch": 0.81, + "grad_norm": 0.48524914918459794, + "learning_rate": 1.7766034084561568e-06, + "loss": 0.306, + "step": 17700 + }, + { + "epoch": 0.81, + "grad_norm": 0.2789205940870507, + "learning_rate": 1.7757568780446232e-06, + "loss": 0.198, + "step": 17701 + }, + { + "epoch": 0.81, + "grad_norm": 0.6781115126906824, + "learning_rate": 1.774910529709909e-06, + "loss": 0.2569, + "step": 17702 + }, + { + "epoch": 0.81, + "grad_norm": 0.41084639765848374, + "learning_rate": 1.7740643634707454e-06, + "loss": 0.294, + "step": 17703 + }, + { + "epoch": 0.81, + "grad_norm": 1.2267774066712085, + "learning_rate": 1.7732183793458701e-06, + "loss": 0.3534, + "step": 17704 + }, + { + "epoch": 0.81, + "grad_norm": 0.5808180902445617, + "learning_rate": 1.772372577354009e-06, + "loss": 0.3055, + "step": 17705 + }, + { + "epoch": 0.81, + "grad_norm": 0.31183530555021993, + "learning_rate": 1.7715269575138893e-06, + "loss": 0.2675, + "step": 17706 + }, + { + "epoch": 0.81, + "grad_norm": 0.26560221229782, + "learning_rate": 1.770681519844235e-06, + "loss": 0.118, + "step": 17707 + }, + { + "epoch": 0.81, + "grad_norm": 0.9536070241645741, + "learning_rate": 1.7698362643637568e-06, + "loss": 0.3295, + "step": 17708 + }, + { + "epoch": 0.81, + "grad_norm": 0.42477865285789024, + "learning_rate": 1.7689911910911717e-06, + "loss": 0.3309, + "step": 17709 + }, + { + "epoch": 0.81, + "grad_norm": 0.3701407932523433, + "learning_rate": 1.7681463000451914e-06, + "loss": 0.3182, + "step": 17710 + }, + { + "epoch": 0.81, + "grad_norm": 0.381768361429879, + "learning_rate": 1.7673015912445157e-06, + "loss": 0.2085, + "step": 17711 + }, + { + "epoch": 0.81, + "grad_norm": 0.4984345860922187, + "learning_rate": 1.7664570647078494e-06, + "loss": 0.3371, + "step": 17712 + }, + { + "epoch": 0.81, + "grad_norm": 0.30531401311383954, + "learning_rate": 1.7656127204538842e-06, + "loss": 0.1962, + "step": 17713 + }, + { + "epoch": 0.81, + "grad_norm": 0.31020131915483506, + "learning_rate": 1.7647685585013208e-06, + "loss": 0.1945, + "step": 17714 + }, + { + "epoch": 0.81, + "grad_norm": 0.5578501156965504, + "learning_rate": 1.7639245788688453e-06, + "loss": 0.3381, + "step": 17715 + }, + { + "epoch": 0.81, + "grad_norm": 0.8049611765017568, + "learning_rate": 1.7630807815751394e-06, + "loss": 0.448, + "step": 17716 + }, + { + "epoch": 0.81, + "grad_norm": 0.35950937631050794, + "learning_rate": 1.762237166638887e-06, + "loss": 0.2077, + "step": 17717 + }, + { + "epoch": 0.81, + "grad_norm": 0.3525961280225382, + "learning_rate": 1.7613937340787602e-06, + "loss": 0.2845, + "step": 17718 + }, + { + "epoch": 0.81, + "grad_norm": 0.2984720246151478, + "learning_rate": 1.7605504839134414e-06, + "loss": 0.1776, + "step": 17719 + }, + { + "epoch": 0.81, + "grad_norm": 0.6027460774310784, + "learning_rate": 1.75970741616159e-06, + "loss": 0.1002, + "step": 17720 + }, + { + "epoch": 0.81, + "grad_norm": 0.41835754601797753, + "learning_rate": 1.7588645308418771e-06, + "loss": 0.3284, + "step": 17721 + }, + { + "epoch": 0.81, + "grad_norm": 0.3765028793068553, + "learning_rate": 1.7580218279729578e-06, + "loss": 0.2721, + "step": 17722 + }, + { + "epoch": 0.81, + "grad_norm": 0.87360421727514, + "learning_rate": 1.7571793075734922e-06, + "loss": 0.3258, + "step": 17723 + }, + { + "epoch": 0.81, + "grad_norm": 0.42520324197174275, + "learning_rate": 1.7563369696621335e-06, + "loss": 0.2123, + "step": 17724 + }, + { + "epoch": 0.81, + "grad_norm": 0.26501758817724974, + "learning_rate": 1.7554948142575279e-06, + "loss": 0.1611, + "step": 17725 + }, + { + "epoch": 0.81, + "grad_norm": 0.36652803384918403, + "learning_rate": 1.7546528413783203e-06, + "loss": 0.3135, + "step": 17726 + }, + { + "epoch": 0.81, + "grad_norm": 0.331152527603275, + "learning_rate": 1.7538110510431538e-06, + "loss": 0.2282, + "step": 17727 + }, + { + "epoch": 0.81, + "grad_norm": 0.8301649562967964, + "learning_rate": 1.7529694432706611e-06, + "loss": 0.3804, + "step": 17728 + }, + { + "epoch": 0.81, + "grad_norm": 0.3442238028190519, + "learning_rate": 1.7521280180794787e-06, + "loss": 0.2934, + "step": 17729 + }, + { + "epoch": 0.81, + "grad_norm": 0.35564982321423017, + "learning_rate": 1.7512867754882269e-06, + "loss": 0.1959, + "step": 17730 + }, + { + "epoch": 0.81, + "grad_norm": 0.36965325054843795, + "learning_rate": 1.7504457155155419e-06, + "loss": 0.1923, + "step": 17731 + }, + { + "epoch": 0.81, + "grad_norm": 0.7467330382911693, + "learning_rate": 1.7496048381800347e-06, + "loss": 0.3111, + "step": 17732 + }, + { + "epoch": 0.81, + "grad_norm": 0.42375208239538686, + "learning_rate": 1.7487641435003266e-06, + "loss": 0.2467, + "step": 17733 + }, + { + "epoch": 0.81, + "grad_norm": 0.37151685221926545, + "learning_rate": 1.7479236314950275e-06, + "loss": 0.2957, + "step": 17734 + }, + { + "epoch": 0.81, + "grad_norm": 0.5784508107773185, + "learning_rate": 1.7470833021827416e-06, + "loss": 0.3641, + "step": 17735 + }, + { + "epoch": 0.81, + "grad_norm": 0.3970732111408475, + "learning_rate": 1.7462431555820824e-06, + "loss": 0.2713, + "step": 17736 + }, + { + "epoch": 0.81, + "grad_norm": 0.32116470295510374, + "learning_rate": 1.745403191711641e-06, + "loss": 0.1974, + "step": 17737 + }, + { + "epoch": 0.81, + "grad_norm": 0.4897202963031287, + "learning_rate": 1.7445634105900199e-06, + "loss": 0.2247, + "step": 17738 + }, + { + "epoch": 0.81, + "grad_norm": 0.33021291379656426, + "learning_rate": 1.7437238122358058e-06, + "loss": 0.258, + "step": 17739 + }, + { + "epoch": 0.81, + "grad_norm": 0.701785775494587, + "learning_rate": 1.742884396667589e-06, + "loss": 0.2929, + "step": 17740 + }, + { + "epoch": 0.82, + "grad_norm": 0.3327058984740834, + "learning_rate": 1.742045163903956e-06, + "loss": 0.285, + "step": 17741 + }, + { + "epoch": 0.82, + "grad_norm": 0.3956223045956301, + "learning_rate": 1.7412061139634818e-06, + "loss": 0.3056, + "step": 17742 + }, + { + "epoch": 0.82, + "grad_norm": 0.471995110324767, + "learning_rate": 1.7403672468647436e-06, + "loss": 0.087, + "step": 17743 + }, + { + "epoch": 0.82, + "grad_norm": 0.5372273395369969, + "learning_rate": 1.739528562626317e-06, + "loss": 0.2153, + "step": 17744 + }, + { + "epoch": 0.82, + "grad_norm": 0.3114627146732643, + "learning_rate": 1.7386900612667635e-06, + "loss": 0.2664, + "step": 17745 + }, + { + "epoch": 0.82, + "grad_norm": 0.48873881175607214, + "learning_rate": 1.7378517428046527e-06, + "loss": 0.2785, + "step": 17746 + }, + { + "epoch": 0.82, + "grad_norm": 0.7945965131010365, + "learning_rate": 1.7370136072585354e-06, + "loss": 0.4964, + "step": 17747 + }, + { + "epoch": 0.82, + "grad_norm": 0.3731840462490614, + "learning_rate": 1.7361756546469788e-06, + "loss": 0.2641, + "step": 17748 + }, + { + "epoch": 0.82, + "grad_norm": 0.5807694834893935, + "learning_rate": 1.7353378849885249e-06, + "loss": 0.3019, + "step": 17749 + }, + { + "epoch": 0.82, + "grad_norm": 0.2505972280302774, + "learning_rate": 1.7345002983017278e-06, + "loss": 0.1621, + "step": 17750 + }, + { + "epoch": 0.82, + "grad_norm": 0.37292998741116357, + "learning_rate": 1.733662894605127e-06, + "loss": 0.2549, + "step": 17751 + }, + { + "epoch": 0.82, + "grad_norm": 0.8601488311666952, + "learning_rate": 1.7328256739172577e-06, + "loss": 0.4516, + "step": 17752 + }, + { + "epoch": 0.82, + "grad_norm": 0.33941799053567356, + "learning_rate": 1.7319886362566662e-06, + "loss": 0.2413, + "step": 17753 + }, + { + "epoch": 0.82, + "grad_norm": 0.40243529887413604, + "learning_rate": 1.7311517816418732e-06, + "loss": 0.2761, + "step": 17754 + }, + { + "epoch": 0.82, + "grad_norm": 0.62470320810335, + "learning_rate": 1.7303151100914139e-06, + "loss": 0.221, + "step": 17755 + }, + { + "epoch": 0.82, + "grad_norm": 0.4145387171024202, + "learning_rate": 1.7294786216238046e-06, + "loss": 0.1958, + "step": 17756 + }, + { + "epoch": 0.82, + "grad_norm": 0.3133999132752345, + "learning_rate": 1.7286423162575684e-06, + "loss": 0.2616, + "step": 17757 + }, + { + "epoch": 0.82, + "grad_norm": 0.4741955775317056, + "learning_rate": 1.72780619401122e-06, + "loss": 0.3301, + "step": 17758 + }, + { + "epoch": 0.82, + "grad_norm": 1.6644666431323134, + "learning_rate": 1.7269702549032686e-06, + "loss": 0.3669, + "step": 17759 + }, + { + "epoch": 0.82, + "grad_norm": 0.33208576014068175, + "learning_rate": 1.7261344989522212e-06, + "loss": 0.2272, + "step": 17760 + }, + { + "epoch": 0.82, + "grad_norm": 0.5425651696241437, + "learning_rate": 1.725298926176584e-06, + "loss": 0.3627, + "step": 17761 + }, + { + "epoch": 0.82, + "grad_norm": 0.4447944214580623, + "learning_rate": 1.7244635365948514e-06, + "loss": 0.297, + "step": 17762 + }, + { + "epoch": 0.82, + "grad_norm": 0.26362233352828157, + "learning_rate": 1.723628330225523e-06, + "loss": 0.1692, + "step": 17763 + }, + { + "epoch": 0.82, + "grad_norm": 0.43735889080312873, + "learning_rate": 1.7227933070870828e-06, + "loss": 0.2835, + "step": 17764 + }, + { + "epoch": 0.82, + "grad_norm": 0.402055229788881, + "learning_rate": 1.7219584671980217e-06, + "loss": 0.3007, + "step": 17765 + }, + { + "epoch": 0.82, + "grad_norm": 0.301676342548234, + "learning_rate": 1.7211238105768213e-06, + "loss": 0.1775, + "step": 17766 + }, + { + "epoch": 0.82, + "grad_norm": 0.9555714589395403, + "learning_rate": 1.7202893372419637e-06, + "loss": 0.4004, + "step": 17767 + }, + { + "epoch": 0.82, + "grad_norm": 0.4346699915048842, + "learning_rate": 1.7194550472119165e-06, + "loss": 0.3271, + "step": 17768 + }, + { + "epoch": 0.82, + "grad_norm": 0.3212821604057173, + "learning_rate": 1.7186209405051547e-06, + "loss": 0.1764, + "step": 17769 + }, + { + "epoch": 0.82, + "grad_norm": 0.39396947312885783, + "learning_rate": 1.7177870171401455e-06, + "loss": 0.2937, + "step": 17770 + }, + { + "epoch": 0.82, + "grad_norm": 0.4598056144970062, + "learning_rate": 1.716953277135347e-06, + "loss": 0.2349, + "step": 17771 + }, + { + "epoch": 0.82, + "grad_norm": 0.32399864414097207, + "learning_rate": 1.7161197205092217e-06, + "loss": 0.1825, + "step": 17772 + }, + { + "epoch": 0.82, + "grad_norm": 0.34941272587109506, + "learning_rate": 1.7152863472802195e-06, + "loss": 0.2834, + "step": 17773 + }, + { + "epoch": 0.82, + "grad_norm": 0.7536597952490434, + "learning_rate": 1.7144531574667934e-06, + "loss": 0.3791, + "step": 17774 + }, + { + "epoch": 0.82, + "grad_norm": 0.3561009957030318, + "learning_rate": 1.7136201510873896e-06, + "loss": 0.2582, + "step": 17775 + }, + { + "epoch": 0.82, + "grad_norm": 0.2636845213632994, + "learning_rate": 1.7127873281604479e-06, + "loss": 0.1654, + "step": 17776 + }, + { + "epoch": 0.82, + "grad_norm": 0.26706442081592396, + "learning_rate": 1.711954688704407e-06, + "loss": 0.2247, + "step": 17777 + }, + { + "epoch": 0.82, + "grad_norm": 0.3627392289694131, + "learning_rate": 1.7111222327377009e-06, + "loss": 0.2465, + "step": 17778 + }, + { + "epoch": 0.82, + "grad_norm": 0.8064578675692119, + "learning_rate": 1.7102899602787625e-06, + "loss": 0.2554, + "step": 17779 + }, + { + "epoch": 0.82, + "grad_norm": 0.799560787314202, + "learning_rate": 1.7094578713460154e-06, + "loss": 0.4069, + "step": 17780 + }, + { + "epoch": 0.82, + "grad_norm": 0.30559273316010704, + "learning_rate": 1.7086259659578764e-06, + "loss": 0.2619, + "step": 17781 + }, + { + "epoch": 0.82, + "grad_norm": 0.4439632432382285, + "learning_rate": 1.7077942441327689e-06, + "loss": 0.2421, + "step": 17782 + }, + { + "epoch": 0.82, + "grad_norm": 0.26649800993200773, + "learning_rate": 1.7069627058891036e-06, + "loss": 0.1688, + "step": 17783 + }, + { + "epoch": 0.82, + "grad_norm": 0.36905973293316047, + "learning_rate": 1.7061313512452937e-06, + "loss": 0.265, + "step": 17784 + }, + { + "epoch": 0.82, + "grad_norm": 0.44073531968664775, + "learning_rate": 1.7053001802197388e-06, + "loss": 0.3191, + "step": 17785 + }, + { + "epoch": 0.82, + "grad_norm": 0.9134309249204701, + "learning_rate": 1.7044691928308442e-06, + "loss": 0.278, + "step": 17786 + }, + { + "epoch": 0.82, + "grad_norm": 0.4370964119941887, + "learning_rate": 1.7036383890970087e-06, + "loss": 0.2717, + "step": 17787 + }, + { + "epoch": 0.82, + "grad_norm": 0.576710861000084, + "learning_rate": 1.7028077690366208e-06, + "loss": 0.388, + "step": 17788 + }, + { + "epoch": 0.82, + "grad_norm": 0.2197662206213494, + "learning_rate": 1.7019773326680745e-06, + "loss": 0.1826, + "step": 17789 + }, + { + "epoch": 0.82, + "grad_norm": 0.40852248639628297, + "learning_rate": 1.7011470800097496e-06, + "loss": 0.2431, + "step": 17790 + }, + { + "epoch": 0.82, + "grad_norm": 0.5575406774301187, + "learning_rate": 1.7003170110800294e-06, + "loss": 0.3108, + "step": 17791 + }, + { + "epoch": 0.82, + "grad_norm": 1.5156084449621376, + "learning_rate": 1.6994871258972944e-06, + "loss": 0.2233, + "step": 17792 + }, + { + "epoch": 0.82, + "grad_norm": 0.2918184532838313, + "learning_rate": 1.6986574244799114e-06, + "loss": 0.2438, + "step": 17793 + }, + { + "epoch": 0.82, + "grad_norm": 0.47466923834266783, + "learning_rate": 1.6978279068462544e-06, + "loss": 0.3495, + "step": 17794 + }, + { + "epoch": 0.82, + "grad_norm": 0.4108204334536216, + "learning_rate": 1.696998573014682e-06, + "loss": 0.1442, + "step": 17795 + }, + { + "epoch": 0.82, + "grad_norm": 0.36090114187058847, + "learning_rate": 1.696169423003563e-06, + "loss": 0.2652, + "step": 17796 + }, + { + "epoch": 0.82, + "grad_norm": 0.37175798937258103, + "learning_rate": 1.6953404568312458e-06, + "loss": 0.2998, + "step": 17797 + }, + { + "epoch": 0.82, + "grad_norm": 0.5324461850004761, + "learning_rate": 1.6945116745160906e-06, + "loss": 0.2142, + "step": 17798 + }, + { + "epoch": 0.82, + "grad_norm": 0.35554553130911054, + "learning_rate": 1.69368307607644e-06, + "loss": 0.1887, + "step": 17799 + }, + { + "epoch": 0.82, + "grad_norm": 1.2674762491920029, + "learning_rate": 1.6928546615306396e-06, + "loss": 0.7728, + "step": 17800 + }, + { + "epoch": 0.82, + "grad_norm": 0.40724388384717003, + "learning_rate": 1.6920264308970325e-06, + "loss": 0.2916, + "step": 17801 + }, + { + "epoch": 0.82, + "grad_norm": 0.3825915363083655, + "learning_rate": 1.6911983841939516e-06, + "loss": 0.2198, + "step": 17802 + }, + { + "epoch": 0.82, + "grad_norm": 0.48858148806069507, + "learning_rate": 1.6903705214397292e-06, + "loss": 0.2055, + "step": 17803 + }, + { + "epoch": 0.82, + "grad_norm": 0.36413767013245807, + "learning_rate": 1.6895428426526972e-06, + "loss": 0.2113, + "step": 17804 + }, + { + "epoch": 0.82, + "grad_norm": 0.305373038986419, + "learning_rate": 1.6887153478511753e-06, + "loss": 0.1904, + "step": 17805 + }, + { + "epoch": 0.82, + "grad_norm": 0.5423464201197744, + "learning_rate": 1.6878880370534866e-06, + "loss": 0.3924, + "step": 17806 + }, + { + "epoch": 0.82, + "grad_norm": 0.650601090977567, + "learning_rate": 1.6870609102779411e-06, + "loss": 0.3759, + "step": 17807 + }, + { + "epoch": 0.82, + "grad_norm": 0.4105255267071267, + "learning_rate": 1.6862339675428595e-06, + "loss": 0.1963, + "step": 17808 + }, + { + "epoch": 0.82, + "grad_norm": 0.2741041889130225, + "learning_rate": 1.6854072088665453e-06, + "loss": 0.246, + "step": 17809 + }, + { + "epoch": 0.82, + "grad_norm": 0.6111350692786954, + "learning_rate": 1.6845806342672988e-06, + "loss": 0.2589, + "step": 17810 + }, + { + "epoch": 0.82, + "grad_norm": 1.545052669473046, + "learning_rate": 1.6837542437634257e-06, + "loss": 0.3738, + "step": 17811 + }, + { + "epoch": 0.82, + "grad_norm": 0.3231036589608299, + "learning_rate": 1.6829280373732126e-06, + "loss": 0.2486, + "step": 17812 + }, + { + "epoch": 0.82, + "grad_norm": 0.5226476089498232, + "learning_rate": 1.6821020151149624e-06, + "loss": 0.3186, + "step": 17813 + }, + { + "epoch": 0.82, + "grad_norm": 0.44556821877797, + "learning_rate": 1.6812761770069541e-06, + "loss": 0.2752, + "step": 17814 + }, + { + "epoch": 0.82, + "grad_norm": 0.403442173435759, + "learning_rate": 1.680450523067475e-06, + "loss": 0.1572, + "step": 17815 + }, + { + "epoch": 0.82, + "grad_norm": 0.3736816746893032, + "learning_rate": 1.6796250533148018e-06, + "loss": 0.2502, + "step": 17816 + }, + { + "epoch": 0.82, + "grad_norm": 0.33994970836641814, + "learning_rate": 1.6787997677672096e-06, + "loss": 0.2656, + "step": 17817 + }, + { + "epoch": 0.82, + "grad_norm": 0.4869682761866183, + "learning_rate": 1.6779746664429731e-06, + "loss": 0.2679, + "step": 17818 + }, + { + "epoch": 0.82, + "grad_norm": 0.5703996382305457, + "learning_rate": 1.677149749360355e-06, + "loss": 0.3701, + "step": 17819 + }, + { + "epoch": 0.82, + "grad_norm": 0.42672419321492977, + "learning_rate": 1.6763250165376189e-06, + "loss": 0.2422, + "step": 17820 + }, + { + "epoch": 0.82, + "grad_norm": 0.2800836265997891, + "learning_rate": 1.6755004679930275e-06, + "loss": 0.2236, + "step": 17821 + }, + { + "epoch": 0.82, + "grad_norm": 0.37172475002008415, + "learning_rate": 1.674676103744829e-06, + "loss": 0.1847, + "step": 17822 + }, + { + "epoch": 0.82, + "grad_norm": 0.8152765973584594, + "learning_rate": 1.6738519238112816e-06, + "loss": 0.469, + "step": 17823 + }, + { + "epoch": 0.82, + "grad_norm": 0.43457656158432384, + "learning_rate": 1.6730279282106243e-06, + "loss": 0.2772, + "step": 17824 + }, + { + "epoch": 0.82, + "grad_norm": 0.3272671552209532, + "learning_rate": 1.6722041169611026e-06, + "loss": 0.2461, + "step": 17825 + }, + { + "epoch": 0.82, + "grad_norm": 0.7240647603038967, + "learning_rate": 1.6713804900809583e-06, + "loss": 0.3525, + "step": 17826 + }, + { + "epoch": 0.82, + "grad_norm": 0.45592062277616324, + "learning_rate": 1.6705570475884203e-06, + "loss": 0.2847, + "step": 17827 + }, + { + "epoch": 0.82, + "grad_norm": 0.22970093704553485, + "learning_rate": 1.669733789501724e-06, + "loss": 0.1682, + "step": 17828 + }, + { + "epoch": 0.82, + "grad_norm": 0.6226835195021604, + "learning_rate": 1.6689107158390872e-06, + "loss": 0.3588, + "step": 17829 + }, + { + "epoch": 0.82, + "grad_norm": 0.41021067152568763, + "learning_rate": 1.6680878266187428e-06, + "loss": 0.3012, + "step": 17830 + }, + { + "epoch": 0.82, + "grad_norm": 0.7869382915379514, + "learning_rate": 1.6672651218589008e-06, + "loss": 0.315, + "step": 17831 + }, + { + "epoch": 0.82, + "grad_norm": 0.4323179329286414, + "learning_rate": 1.6664426015777801e-06, + "loss": 0.292, + "step": 17832 + }, + { + "epoch": 0.82, + "grad_norm": 0.33096288125583884, + "learning_rate": 1.6656202657935872e-06, + "loss": 0.2668, + "step": 17833 + }, + { + "epoch": 0.82, + "grad_norm": 0.2932062281169387, + "learning_rate": 1.6647981145245273e-06, + "loss": 0.087, + "step": 17834 + }, + { + "epoch": 0.82, + "grad_norm": 0.4590536239644486, + "learning_rate": 1.663976147788806e-06, + "loss": 0.2593, + "step": 17835 + }, + { + "epoch": 0.82, + "grad_norm": 0.5055173210434944, + "learning_rate": 1.6631543656046167e-06, + "loss": 0.3035, + "step": 17836 + }, + { + "epoch": 0.82, + "grad_norm": 0.37766134460218065, + "learning_rate": 1.6623327679901547e-06, + "loss": 0.2997, + "step": 17837 + }, + { + "epoch": 0.82, + "grad_norm": 0.38606130663615457, + "learning_rate": 1.661511354963612e-06, + "loss": 0.1785, + "step": 17838 + }, + { + "epoch": 0.82, + "grad_norm": 0.5937270701780326, + "learning_rate": 1.6606901265431675e-06, + "loss": 0.3357, + "step": 17839 + }, + { + "epoch": 0.82, + "grad_norm": 0.22344837592968383, + "learning_rate": 1.6598690827470088e-06, + "loss": 0.2021, + "step": 17840 + }, + { + "epoch": 0.82, + "grad_norm": 0.860713818733959, + "learning_rate": 1.659048223593308e-06, + "loss": 0.2428, + "step": 17841 + }, + { + "epoch": 0.82, + "grad_norm": 0.41166046914139925, + "learning_rate": 1.6582275491002408e-06, + "loss": 0.2868, + "step": 17842 + }, + { + "epoch": 0.82, + "grad_norm": 0.992824793993233, + "learning_rate": 1.657407059285976e-06, + "loss": 0.5139, + "step": 17843 + }, + { + "epoch": 0.82, + "grad_norm": 0.3412459915659717, + "learning_rate": 1.6565867541686798e-06, + "loss": 0.2212, + "step": 17844 + }, + { + "epoch": 0.82, + "grad_norm": 0.37897388800693155, + "learning_rate": 1.6557666337665124e-06, + "loss": 0.2815, + "step": 17845 + }, + { + "epoch": 0.82, + "grad_norm": 0.34853020672805135, + "learning_rate": 1.6549466980976237e-06, + "loss": 0.1765, + "step": 17846 + }, + { + "epoch": 0.82, + "grad_norm": 0.563761401117358, + "learning_rate": 1.6541269471801768e-06, + "loss": 0.2291, + "step": 17847 + }, + { + "epoch": 0.82, + "grad_norm": 0.33143211204399237, + "learning_rate": 1.6533073810323142e-06, + "loss": 0.289, + "step": 17848 + }, + { + "epoch": 0.82, + "grad_norm": 0.517674384679477, + "learning_rate": 1.652487999672182e-06, + "loss": 0.37, + "step": 17849 + }, + { + "epoch": 0.82, + "grad_norm": 0.8859703552231626, + "learning_rate": 1.6516688031179195e-06, + "loss": 0.4325, + "step": 17850 + }, + { + "epoch": 0.82, + "grad_norm": 0.3909297581171697, + "learning_rate": 1.650849791387662e-06, + "loss": 0.2075, + "step": 17851 + }, + { + "epoch": 0.82, + "grad_norm": 0.2902579496865952, + "learning_rate": 1.6500309644995472e-06, + "loss": 0.2392, + "step": 17852 + }, + { + "epoch": 0.82, + "grad_norm": 0.5754277019411785, + "learning_rate": 1.649212322471695e-06, + "loss": 0.3288, + "step": 17853 + }, + { + "epoch": 0.82, + "grad_norm": 0.26326085751444434, + "learning_rate": 1.6483938653222364e-06, + "loss": 0.1597, + "step": 17854 + }, + { + "epoch": 0.82, + "grad_norm": 1.255700334265664, + "learning_rate": 1.647575593069286e-06, + "loss": 0.7306, + "step": 17855 + }, + { + "epoch": 0.82, + "grad_norm": 0.31558366344555205, + "learning_rate": 1.6467575057309614e-06, + "loss": 0.2488, + "step": 17856 + }, + { + "epoch": 0.82, + "grad_norm": 0.42782537834153983, + "learning_rate": 1.6459396033253784e-06, + "loss": 0.216, + "step": 17857 + }, + { + "epoch": 0.82, + "grad_norm": 0.6761606614578485, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.3944, + "step": 17858 + }, + { + "epoch": 0.82, + "grad_norm": 0.4598650116273263, + "learning_rate": 1.6443043533848446e-06, + "loss": 0.2522, + "step": 17859 + }, + { + "epoch": 0.82, + "grad_norm": 0.28346926439006015, + "learning_rate": 1.6434870058861009e-06, + "loss": 0.2183, + "step": 17860 + }, + { + "epoch": 0.82, + "grad_norm": 0.3349176063453951, + "learning_rate": 1.6426698433925038e-06, + "loss": 0.2566, + "step": 17861 + }, + { + "epoch": 0.82, + "grad_norm": 1.7624453742411625, + "learning_rate": 1.6418528659221378e-06, + "loss": 0.7206, + "step": 17862 + }, + { + "epoch": 0.82, + "grad_norm": 0.3992551923522384, + "learning_rate": 1.6410360734930942e-06, + "loss": 0.2487, + "step": 17863 + }, + { + "epoch": 0.82, + "grad_norm": 0.41513380094620994, + "learning_rate": 1.6402194661234583e-06, + "loss": 0.2543, + "step": 17864 + }, + { + "epoch": 0.82, + "grad_norm": 0.7469366810898178, + "learning_rate": 1.6394030438313025e-06, + "loss": 0.4091, + "step": 17865 + }, + { + "epoch": 0.82, + "grad_norm": 0.37338571349379807, + "learning_rate": 1.6385868066347088e-06, + "loss": 0.293, + "step": 17866 + }, + { + "epoch": 0.82, + "grad_norm": 0.2879213692108565, + "learning_rate": 1.6377707545517418e-06, + "loss": 0.1464, + "step": 17867 + }, + { + "epoch": 0.82, + "grad_norm": 0.28329991872660953, + "learning_rate": 1.6369548876004704e-06, + "loss": 0.2464, + "step": 17868 + }, + { + "epoch": 0.82, + "grad_norm": 0.40121960531534223, + "learning_rate": 1.63613920579896e-06, + "loss": 0.2621, + "step": 17869 + }, + { + "epoch": 0.82, + "grad_norm": 0.7076156079393486, + "learning_rate": 1.6353237091652641e-06, + "loss": 0.2909, + "step": 17870 + }, + { + "epoch": 0.82, + "grad_norm": 0.8118680037477919, + "learning_rate": 1.6345083977174414e-06, + "loss": 0.3401, + "step": 17871 + }, + { + "epoch": 0.82, + "grad_norm": 0.3655126024601235, + "learning_rate": 1.6336932714735354e-06, + "loss": 0.2501, + "step": 17872 + }, + { + "epoch": 0.82, + "grad_norm": 0.2679541697813635, + "learning_rate": 1.6328783304516016e-06, + "loss": 0.2059, + "step": 17873 + }, + { + "epoch": 0.82, + "grad_norm": 0.4775629973981503, + "learning_rate": 1.6320635746696768e-06, + "loss": 0.1873, + "step": 17874 + }, + { + "epoch": 0.82, + "grad_norm": 0.39955514583744095, + "learning_rate": 1.6312490041457973e-06, + "loss": 0.2956, + "step": 17875 + }, + { + "epoch": 0.82, + "grad_norm": 0.3997085373400413, + "learning_rate": 1.6304346188979992e-06, + "loss": 0.2877, + "step": 17876 + }, + { + "epoch": 0.82, + "grad_norm": 0.7344360028403284, + "learning_rate": 1.6296204189443121e-06, + "loss": 0.2303, + "step": 17877 + }, + { + "epoch": 0.82, + "grad_norm": 0.38044339631923324, + "learning_rate": 1.628806404302763e-06, + "loss": 0.2834, + "step": 17878 + }, + { + "epoch": 0.82, + "grad_norm": 0.613118892334737, + "learning_rate": 1.6279925749913693e-06, + "loss": 0.3369, + "step": 17879 + }, + { + "epoch": 0.82, + "grad_norm": 0.28436663447557947, + "learning_rate": 1.6271789310281515e-06, + "loss": 0.1805, + "step": 17880 + }, + { + "epoch": 0.82, + "grad_norm": 0.3611682736987512, + "learning_rate": 1.626365472431125e-06, + "loss": 0.2432, + "step": 17881 + }, + { + "epoch": 0.82, + "grad_norm": 0.7542239247415545, + "learning_rate": 1.6255521992182942e-06, + "loss": 0.3695, + "step": 17882 + }, + { + "epoch": 0.82, + "grad_norm": 0.4510411912447935, + "learning_rate": 1.6247391114076683e-06, + "loss": 0.2421, + "step": 17883 + }, + { + "epoch": 0.82, + "grad_norm": 0.2963030192975179, + "learning_rate": 1.6239262090172436e-06, + "loss": 0.2467, + "step": 17884 + }, + { + "epoch": 0.82, + "grad_norm": 1.19114947788652, + "learning_rate": 1.6231134920650193e-06, + "loss": 0.6559, + "step": 17885 + }, + { + "epoch": 0.82, + "grad_norm": 0.3267237945608854, + "learning_rate": 1.622300960568992e-06, + "loss": 0.1688, + "step": 17886 + }, + { + "epoch": 0.82, + "grad_norm": 0.3474136105216397, + "learning_rate": 1.6214886145471442e-06, + "loss": 0.2554, + "step": 17887 + }, + { + "epoch": 0.82, + "grad_norm": 0.40883381295824295, + "learning_rate": 1.6206764540174657e-06, + "loss": 0.2664, + "step": 17888 + }, + { + "epoch": 0.82, + "grad_norm": 1.3823921732088091, + "learning_rate": 1.619864478997929e-06, + "loss": 0.5244, + "step": 17889 + }, + { + "epoch": 0.82, + "grad_norm": 0.3028639446923094, + "learning_rate": 1.6190526895065205e-06, + "loss": 0.1794, + "step": 17890 + }, + { + "epoch": 0.82, + "grad_norm": 1.3006765277034078, + "learning_rate": 1.6182410855612085e-06, + "loss": 0.7605, + "step": 17891 + }, + { + "epoch": 0.82, + "grad_norm": 0.311274194836303, + "learning_rate": 1.6174296671799571e-06, + "loss": 0.2478, + "step": 17892 + }, + { + "epoch": 0.82, + "grad_norm": 0.3127615542952781, + "learning_rate": 1.6166184343807346e-06, + "loss": 0.1825, + "step": 17893 + }, + { + "epoch": 0.82, + "grad_norm": 0.49404070419757556, + "learning_rate": 1.6158073871814995e-06, + "loss": 0.2602, + "step": 17894 + }, + { + "epoch": 0.82, + "grad_norm": 0.40027603779981735, + "learning_rate": 1.614996525600211e-06, + "loss": 0.294, + "step": 17895 + }, + { + "epoch": 0.82, + "grad_norm": 0.3310231112688102, + "learning_rate": 1.6141858496548147e-06, + "loss": 0.1816, + "step": 17896 + }, + { + "epoch": 0.82, + "grad_norm": 1.1545558204261583, + "learning_rate": 1.6133753593632617e-06, + "loss": 0.5579, + "step": 17897 + }, + { + "epoch": 0.82, + "grad_norm": 0.7037685351075302, + "learning_rate": 1.6125650547434979e-06, + "loss": 0.3521, + "step": 17898 + }, + { + "epoch": 0.82, + "grad_norm": 0.3716704635905693, + "learning_rate": 1.6117549358134566e-06, + "loss": 0.2536, + "step": 17899 + }, + { + "epoch": 0.82, + "grad_norm": 0.2240276948034991, + "learning_rate": 1.61094500259108e-06, + "loss": 0.1668, + "step": 17900 + }, + { + "epoch": 0.82, + "grad_norm": 1.5080092059926637, + "learning_rate": 1.6101352550942916e-06, + "loss": 0.7888, + "step": 17901 + }, + { + "epoch": 0.82, + "grad_norm": 0.32581939872473537, + "learning_rate": 1.609325693341024e-06, + "loss": 0.2406, + "step": 17902 + }, + { + "epoch": 0.82, + "grad_norm": 0.9279723157649599, + "learning_rate": 1.6085163173492003e-06, + "loss": 0.2958, + "step": 17903 + }, + { + "epoch": 0.82, + "grad_norm": 0.3523750429180116, + "learning_rate": 1.607707127136734e-06, + "loss": 0.2902, + "step": 17904 + }, + { + "epoch": 0.82, + "grad_norm": 0.34463953375991896, + "learning_rate": 1.6068981227215475e-06, + "loss": 0.2261, + "step": 17905 + }, + { + "epoch": 0.82, + "grad_norm": 0.16085512172830718, + "learning_rate": 1.6060893041215409e-06, + "loss": 0.0695, + "step": 17906 + }, + { + "epoch": 0.82, + "grad_norm": 0.38735692700757773, + "learning_rate": 1.6052806713546321e-06, + "loss": 0.3113, + "step": 17907 + }, + { + "epoch": 0.82, + "grad_norm": 0.39905830246245694, + "learning_rate": 1.6044722244387168e-06, + "loss": 0.2612, + "step": 17908 + }, + { + "epoch": 0.82, + "grad_norm": 0.496069079335485, + "learning_rate": 1.6036639633916962e-06, + "loss": 0.2734, + "step": 17909 + }, + { + "epoch": 0.82, + "grad_norm": 0.6833809446227312, + "learning_rate": 1.6028558882314604e-06, + "loss": 0.3682, + "step": 17910 + }, + { + "epoch": 0.82, + "grad_norm": 0.41285248307055894, + "learning_rate": 1.602047998975903e-06, + "loss": 0.2364, + "step": 17911 + }, + { + "epoch": 0.82, + "grad_norm": 0.2767450941096929, + "learning_rate": 1.6012402956429107e-06, + "loss": 0.2414, + "step": 17912 + }, + { + "epoch": 0.82, + "grad_norm": 0.47754686609990277, + "learning_rate": 1.6004327782503603e-06, + "loss": 0.1269, + "step": 17913 + }, + { + "epoch": 0.82, + "grad_norm": 0.4124364330820471, + "learning_rate": 1.5996254468161364e-06, + "loss": 0.2695, + "step": 17914 + }, + { + "epoch": 0.82, + "grad_norm": 0.48550868702886113, + "learning_rate": 1.5988183013581048e-06, + "loss": 0.3566, + "step": 17915 + }, + { + "epoch": 0.82, + "grad_norm": 0.33709795998759534, + "learning_rate": 1.59801134189414e-06, + "loss": 0.2433, + "step": 17916 + }, + { + "epoch": 0.82, + "grad_norm": 0.45992753936688874, + "learning_rate": 1.5972045684421078e-06, + "loss": 0.2539, + "step": 17917 + }, + { + "epoch": 0.82, + "grad_norm": 0.2862187329897796, + "learning_rate": 1.596397981019866e-06, + "loss": 0.1682, + "step": 17918 + }, + { + "epoch": 0.82, + "grad_norm": 0.5779185592869104, + "learning_rate": 1.5955915796452736e-06, + "loss": 0.2747, + "step": 17919 + }, + { + "epoch": 0.82, + "grad_norm": 0.30298275328984553, + "learning_rate": 1.5947853643361844e-06, + "loss": 0.246, + "step": 17920 + }, + { + "epoch": 0.82, + "grad_norm": 1.0690317283049726, + "learning_rate": 1.5939793351104448e-06, + "loss": 0.4099, + "step": 17921 + }, + { + "epoch": 0.82, + "grad_norm": 0.7684602263393544, + "learning_rate": 1.5931734919859033e-06, + "loss": 0.2886, + "step": 17922 + }, + { + "epoch": 0.82, + "grad_norm": 0.34379290855285777, + "learning_rate": 1.5923678349803928e-06, + "loss": 0.2549, + "step": 17923 + }, + { + "epoch": 0.82, + "grad_norm": 0.3655213771276033, + "learning_rate": 1.5915623641117605e-06, + "loss": 0.2695, + "step": 17924 + }, + { + "epoch": 0.82, + "grad_norm": 0.3069460237471101, + "learning_rate": 1.5907570793978312e-06, + "loss": 0.1915, + "step": 17925 + }, + { + "epoch": 0.82, + "grad_norm": 0.4045490548556977, + "learning_rate": 1.5899519808564368e-06, + "loss": 0.2111, + "step": 17926 + }, + { + "epoch": 0.82, + "grad_norm": 0.5325397743430399, + "learning_rate": 1.589147068505398e-06, + "loss": 0.3416, + "step": 17927 + }, + { + "epoch": 0.82, + "grad_norm": 0.3751408190321858, + "learning_rate": 1.5883423423625356e-06, + "loss": 0.3053, + "step": 17928 + }, + { + "epoch": 0.82, + "grad_norm": 0.9431345751184442, + "learning_rate": 1.58753780244567e-06, + "loss": 0.1269, + "step": 17929 + }, + { + "epoch": 0.82, + "grad_norm": 0.31636453520879515, + "learning_rate": 1.586733448772606e-06, + "loss": 0.2335, + "step": 17930 + }, + { + "epoch": 0.82, + "grad_norm": 0.303927579614148, + "learning_rate": 1.5859292813611583e-06, + "loss": 0.2493, + "step": 17931 + }, + { + "epoch": 0.82, + "grad_norm": 0.3920819278512701, + "learning_rate": 1.5851253002291234e-06, + "loss": 0.197, + "step": 17932 + }, + { + "epoch": 0.82, + "grad_norm": 0.4697931796273447, + "learning_rate": 1.584321505394304e-06, + "loss": 0.3398, + "step": 17933 + }, + { + "epoch": 0.82, + "grad_norm": 1.325875353619506, + "learning_rate": 1.583517896874498e-06, + "loss": 0.5778, + "step": 17934 + }, + { + "epoch": 0.82, + "grad_norm": 0.3481727455095071, + "learning_rate": 1.5827144746874912e-06, + "loss": 0.2024, + "step": 17935 + }, + { + "epoch": 0.82, + "grad_norm": 0.31377243391835863, + "learning_rate": 1.5819112388510739e-06, + "loss": 0.2377, + "step": 17936 + }, + { + "epoch": 0.82, + "grad_norm": 0.5327879998446331, + "learning_rate": 1.5811081893830272e-06, + "loss": 0.279, + "step": 17937 + }, + { + "epoch": 0.82, + "grad_norm": 0.5507448121471459, + "learning_rate": 1.5803053263011348e-06, + "loss": 0.3464, + "step": 17938 + }, + { + "epoch": 0.82, + "grad_norm": 0.37863610365997197, + "learning_rate": 1.5795026496231658e-06, + "loss": 0.235, + "step": 17939 + }, + { + "epoch": 0.82, + "grad_norm": 0.36617158167496144, + "learning_rate": 1.5787001593668882e-06, + "loss": 0.3091, + "step": 17940 + }, + { + "epoch": 0.82, + "grad_norm": 0.48393546036733776, + "learning_rate": 1.577897855550078e-06, + "loss": 0.2663, + "step": 17941 + }, + { + "epoch": 0.82, + "grad_norm": 0.679040152182578, + "learning_rate": 1.5770957381904894e-06, + "loss": 0.2081, + "step": 17942 + }, + { + "epoch": 0.82, + "grad_norm": 0.23420657091476496, + "learning_rate": 1.5762938073058853e-06, + "loss": 0.2155, + "step": 17943 + }, + { + "epoch": 0.82, + "grad_norm": 0.4405390369853548, + "learning_rate": 1.5754920629140146e-06, + "loss": 0.2705, + "step": 17944 + }, + { + "epoch": 0.82, + "grad_norm": 0.5301000889683434, + "learning_rate": 1.57469050503263e-06, + "loss": 0.2776, + "step": 17945 + }, + { + "epoch": 0.82, + "grad_norm": 1.205504121421561, + "learning_rate": 1.5738891336794805e-06, + "loss": 0.5954, + "step": 17946 + }, + { + "epoch": 0.82, + "grad_norm": 0.4681851802628399, + "learning_rate": 1.5730879488723005e-06, + "loss": 0.333, + "step": 17947 + }, + { + "epoch": 0.82, + "grad_norm": 0.29761587772035697, + "learning_rate": 1.5722869506288352e-06, + "loss": 0.2243, + "step": 17948 + }, + { + "epoch": 0.82, + "grad_norm": 0.48632445179820405, + "learning_rate": 1.5714861389668113e-06, + "loss": 0.2964, + "step": 17949 + }, + { + "epoch": 0.82, + "grad_norm": 0.3787466845174593, + "learning_rate": 1.57068551390396e-06, + "loss": 0.2215, + "step": 17950 + }, + { + "epoch": 0.82, + "grad_norm": 0.31654961738205817, + "learning_rate": 1.5698850754580108e-06, + "loss": 0.256, + "step": 17951 + }, + { + "epoch": 0.82, + "grad_norm": 0.5546867662513121, + "learning_rate": 1.569084823646677e-06, + "loss": 0.2546, + "step": 17952 + }, + { + "epoch": 0.82, + "grad_norm": 0.8726533284776277, + "learning_rate": 1.5682847584876803e-06, + "loss": 0.2997, + "step": 17953 + }, + { + "epoch": 0.82, + "grad_norm": 0.4124420279017348, + "learning_rate": 1.567484879998733e-06, + "loss": 0.288, + "step": 17954 + }, + { + "epoch": 0.82, + "grad_norm": 0.3617669801682913, + "learning_rate": 1.5666851881975453e-06, + "loss": 0.246, + "step": 17955 + }, + { + "epoch": 0.82, + "grad_norm": 0.36132392601742513, + "learning_rate": 1.5658856831018188e-06, + "loss": 0.206, + "step": 17956 + }, + { + "epoch": 0.82, + "grad_norm": 0.4166474962705854, + "learning_rate": 1.5650863647292491e-06, + "loss": 0.2949, + "step": 17957 + }, + { + "epoch": 0.82, + "grad_norm": 0.4880196400892418, + "learning_rate": 1.5642872330975434e-06, + "loss": 0.1472, + "step": 17958 + }, + { + "epoch": 0.83, + "grad_norm": 0.3211861643900025, + "learning_rate": 1.5634882882243852e-06, + "loss": 0.265, + "step": 17959 + }, + { + "epoch": 0.83, + "grad_norm": 0.4263384774921326, + "learning_rate": 1.562689530127468e-06, + "loss": 0.306, + "step": 17960 + }, + { + "epoch": 0.83, + "grad_norm": 0.576198421213249, + "learning_rate": 1.561890958824469e-06, + "loss": 0.2882, + "step": 17961 + }, + { + "epoch": 0.83, + "grad_norm": 0.4535275798234041, + "learning_rate": 1.561092574333073e-06, + "loss": 0.3038, + "step": 17962 + }, + { + "epoch": 0.83, + "grad_norm": 0.37847337169368267, + "learning_rate": 1.5602943766709543e-06, + "loss": 0.3051, + "step": 17963 + }, + { + "epoch": 0.83, + "grad_norm": 0.25625915474582045, + "learning_rate": 1.5594963658557827e-06, + "loss": 0.202, + "step": 17964 + }, + { + "epoch": 0.83, + "grad_norm": 0.9363420086752337, + "learning_rate": 1.558698541905229e-06, + "loss": 0.1394, + "step": 17965 + }, + { + "epoch": 0.83, + "grad_norm": 0.37323717393903505, + "learning_rate": 1.5579009048369486e-06, + "loss": 0.2702, + "step": 17966 + }, + { + "epoch": 0.83, + "grad_norm": 0.3726239705999417, + "learning_rate": 1.5571034546686102e-06, + "loss": 0.3107, + "step": 17967 + }, + { + "epoch": 0.83, + "grad_norm": 0.4787336475324272, + "learning_rate": 1.5563061914178646e-06, + "loss": 0.133, + "step": 17968 + }, + { + "epoch": 0.83, + "grad_norm": 0.37590661824669397, + "learning_rate": 1.5555091151023594e-06, + "loss": 0.2732, + "step": 17969 + }, + { + "epoch": 0.83, + "grad_norm": 0.2979672090426112, + "learning_rate": 1.554712225739743e-06, + "loss": 0.1843, + "step": 17970 + }, + { + "epoch": 0.83, + "grad_norm": 0.3365451715714415, + "learning_rate": 1.5539155233476576e-06, + "loss": 0.2238, + "step": 17971 + }, + { + "epoch": 0.83, + "grad_norm": 0.3736541990261076, + "learning_rate": 1.5531190079437453e-06, + "loss": 0.2744, + "step": 17972 + }, + { + "epoch": 0.83, + "grad_norm": 1.0442771677003688, + "learning_rate": 1.5523226795456349e-06, + "loss": 0.4375, + "step": 17973 + }, + { + "epoch": 0.83, + "grad_norm": 0.6537473178825682, + "learning_rate": 1.5515265381709598e-06, + "loss": 0.1686, + "step": 17974 + }, + { + "epoch": 0.83, + "grad_norm": 0.3234888021798698, + "learning_rate": 1.5507305838373432e-06, + "loss": 0.2741, + "step": 17975 + }, + { + "epoch": 0.83, + "grad_norm": 0.3566459997427665, + "learning_rate": 1.5499348165624073e-06, + "loss": 0.2476, + "step": 17976 + }, + { + "epoch": 0.83, + "grad_norm": 0.5569656167947696, + "learning_rate": 1.5491392363637724e-06, + "loss": 0.244, + "step": 17977 + }, + { + "epoch": 0.83, + "grad_norm": 0.36361025316090034, + "learning_rate": 1.548343843259048e-06, + "loss": 0.217, + "step": 17978 + }, + { + "epoch": 0.83, + "grad_norm": 0.3673220953253236, + "learning_rate": 1.5475486372658444e-06, + "loss": 0.3047, + "step": 17979 + }, + { + "epoch": 0.83, + "grad_norm": 1.1491715145411694, + "learning_rate": 1.5467536184017696e-06, + "loss": 0.4374, + "step": 17980 + }, + { + "epoch": 0.83, + "grad_norm": 0.3494019559362568, + "learning_rate": 1.5459587866844205e-06, + "loss": 0.2069, + "step": 17981 + }, + { + "epoch": 0.83, + "grad_norm": 0.3439186344714828, + "learning_rate": 1.545164142131399e-06, + "loss": 0.2052, + "step": 17982 + }, + { + "epoch": 0.83, + "grad_norm": 0.35360798071161786, + "learning_rate": 1.5443696847602884e-06, + "loss": 0.2704, + "step": 17983 + }, + { + "epoch": 0.83, + "grad_norm": 0.35986972487194413, + "learning_rate": 1.5435754145886882e-06, + "loss": 0.2224, + "step": 17984 + }, + { + "epoch": 0.83, + "grad_norm": 1.4340539270322243, + "learning_rate": 1.5427813316341789e-06, + "loss": 0.535, + "step": 17985 + }, + { + "epoch": 0.83, + "grad_norm": 1.2279919830980677, + "learning_rate": 1.5419874359143361e-06, + "loss": 0.6776, + "step": 17986 + }, + { + "epoch": 0.83, + "grad_norm": 0.24583901102301525, + "learning_rate": 1.5411937274467404e-06, + "loss": 0.2037, + "step": 17987 + }, + { + "epoch": 0.83, + "grad_norm": 1.0286371821761173, + "learning_rate": 1.5404002062489631e-06, + "loss": 0.4332, + "step": 17988 + }, + { + "epoch": 0.83, + "grad_norm": 0.4459414528282251, + "learning_rate": 1.5396068723385737e-06, + "loss": 0.2678, + "step": 17989 + }, + { + "epoch": 0.83, + "grad_norm": 0.2494730044153878, + "learning_rate": 1.5388137257331315e-06, + "loss": 0.2031, + "step": 17990 + }, + { + "epoch": 0.83, + "grad_norm": 0.41660481137766436, + "learning_rate": 1.5380207664502e-06, + "loss": 0.2275, + "step": 17991 + }, + { + "epoch": 0.83, + "grad_norm": 1.314096869717473, + "learning_rate": 1.537227994507332e-06, + "loss": 0.7665, + "step": 17992 + }, + { + "epoch": 0.83, + "grad_norm": 0.3719516520908303, + "learning_rate": 1.536435409922079e-06, + "loss": 0.2661, + "step": 17993 + }, + { + "epoch": 0.83, + "grad_norm": 0.8486301665467313, + "learning_rate": 1.5356430127119915e-06, + "loss": 0.2801, + "step": 17994 + }, + { + "epoch": 0.83, + "grad_norm": 0.3566478625770529, + "learning_rate": 1.5348508028946063e-06, + "loss": 0.2793, + "step": 17995 + }, + { + "epoch": 0.83, + "grad_norm": 0.3905255356993447, + "learning_rate": 1.5340587804874662e-06, + "loss": 0.2722, + "step": 17996 + }, + { + "epoch": 0.83, + "grad_norm": 0.3409118201937726, + "learning_rate": 1.5332669455081074e-06, + "loss": 0.0953, + "step": 17997 + }, + { + "epoch": 0.83, + "grad_norm": 0.4478902739668914, + "learning_rate": 1.5324752979740566e-06, + "loss": 0.3501, + "step": 17998 + }, + { + "epoch": 0.83, + "grad_norm": 0.39586152571393807, + "learning_rate": 1.5316838379028431e-06, + "loss": 0.2712, + "step": 17999 + }, + { + "epoch": 0.83, + "grad_norm": 0.49855811886793444, + "learning_rate": 1.5308925653119822e-06, + "loss": 0.2667, + "step": 18000 + }, + { + "epoch": 0.83, + "grad_norm": 1.0649299231188127, + "learning_rate": 1.5301014802190027e-06, + "loss": 0.4559, + "step": 18001 + }, + { + "epoch": 0.83, + "grad_norm": 0.27059790379208454, + "learning_rate": 1.5293105826414112e-06, + "loss": 0.2079, + "step": 18002 + }, + { + "epoch": 0.83, + "grad_norm": 0.28536241244279165, + "learning_rate": 1.5285198725967209e-06, + "loss": 0.2355, + "step": 18003 + }, + { + "epoch": 0.83, + "grad_norm": 0.9910399167787206, + "learning_rate": 1.5277293501024359e-06, + "loss": 0.465, + "step": 18004 + }, + { + "epoch": 0.83, + "grad_norm": 0.3205322066269283, + "learning_rate": 1.526939015176052e-06, + "loss": 0.2508, + "step": 18005 + }, + { + "epoch": 0.83, + "grad_norm": 0.6658483011912187, + "learning_rate": 1.5261488678350777e-06, + "loss": 0.3527, + "step": 18006 + }, + { + "epoch": 0.83, + "grad_norm": 0.3445343693624867, + "learning_rate": 1.5253589080969976e-06, + "loss": 0.2354, + "step": 18007 + }, + { + "epoch": 0.83, + "grad_norm": 0.36352895318265754, + "learning_rate": 1.5245691359793058e-06, + "loss": 0.2554, + "step": 18008 + }, + { + "epoch": 0.83, + "grad_norm": 0.3055318767201801, + "learning_rate": 1.5237795514994813e-06, + "loss": 0.1422, + "step": 18009 + }, + { + "epoch": 0.83, + "grad_norm": 0.34368424022846283, + "learning_rate": 1.5229901546750092e-06, + "loss": 0.2529, + "step": 18010 + }, + { + "epoch": 0.83, + "grad_norm": 0.3982156341068539, + "learning_rate": 1.5222009455233666e-06, + "loss": 0.2449, + "step": 18011 + }, + { + "epoch": 0.83, + "grad_norm": 0.6770964671289202, + "learning_rate": 1.5214119240620217e-06, + "loss": 0.3806, + "step": 18012 + }, + { + "epoch": 0.83, + "grad_norm": 0.5238136152779747, + "learning_rate": 1.5206230903084451e-06, + "loss": 0.1592, + "step": 18013 + }, + { + "epoch": 0.83, + "grad_norm": 0.46736831158562875, + "learning_rate": 1.5198344442801028e-06, + "loss": 0.2597, + "step": 18014 + }, + { + "epoch": 0.83, + "grad_norm": 0.2558877036628446, + "learning_rate": 1.5190459859944506e-06, + "loss": 0.2408, + "step": 18015 + }, + { + "epoch": 0.83, + "grad_norm": 0.9354042653910232, + "learning_rate": 1.518257715468948e-06, + "loss": 0.4837, + "step": 18016 + }, + { + "epoch": 0.83, + "grad_norm": 0.30470435785872035, + "learning_rate": 1.5174696327210415e-06, + "loss": 0.1751, + "step": 18017 + }, + { + "epoch": 0.83, + "grad_norm": 0.452660651772885, + "learning_rate": 1.5166817377681854e-06, + "loss": 0.3215, + "step": 18018 + }, + { + "epoch": 0.83, + "grad_norm": 0.6388633506409068, + "learning_rate": 1.515894030627817e-06, + "loss": 0.3009, + "step": 18019 + }, + { + "epoch": 0.83, + "grad_norm": 0.32695794633277264, + "learning_rate": 1.5151065113173802e-06, + "loss": 0.1755, + "step": 18020 + }, + { + "epoch": 0.83, + "grad_norm": 0.3555260365897052, + "learning_rate": 1.5143191798543056e-06, + "loss": 0.1909, + "step": 18021 + }, + { + "epoch": 0.83, + "grad_norm": 0.5848815781353452, + "learning_rate": 1.5135320362560246e-06, + "loss": 0.3377, + "step": 18022 + }, + { + "epoch": 0.83, + "grad_norm": 0.285677614637501, + "learning_rate": 1.512745080539968e-06, + "loss": 0.2122, + "step": 18023 + }, + { + "epoch": 0.83, + "grad_norm": 0.7221017003516694, + "learning_rate": 1.5119583127235525e-06, + "loss": 0.4568, + "step": 18024 + }, + { + "epoch": 0.83, + "grad_norm": 1.559993018203462, + "learning_rate": 1.5111717328242016e-06, + "loss": 0.5341, + "step": 18025 + }, + { + "epoch": 0.83, + "grad_norm": 0.3520882283861581, + "learning_rate": 1.510385340859325e-06, + "loss": 0.2597, + "step": 18026 + }, + { + "epoch": 0.83, + "grad_norm": 0.27273425987896144, + "learning_rate": 1.5095991368463337e-06, + "loss": 0.1894, + "step": 18027 + }, + { + "epoch": 0.83, + "grad_norm": 0.4523832687249976, + "learning_rate": 1.5088131208026368e-06, + "loss": 0.2688, + "step": 18028 + }, + { + "epoch": 0.83, + "grad_norm": 0.4187177055394945, + "learning_rate": 1.5080272927456318e-06, + "loss": 0.2554, + "step": 18029 + }, + { + "epoch": 0.83, + "grad_norm": 1.0009966271589186, + "learning_rate": 1.507241652692718e-06, + "loss": 0.3331, + "step": 18030 + }, + { + "epoch": 0.83, + "grad_norm": 0.40522175018924755, + "learning_rate": 1.5064562006612882e-06, + "loss": 0.2717, + "step": 18031 + }, + { + "epoch": 0.83, + "grad_norm": 0.7981159163469557, + "learning_rate": 1.5056709366687339e-06, + "loss": 0.3055, + "step": 18032 + }, + { + "epoch": 0.83, + "grad_norm": 0.21443565623901853, + "learning_rate": 1.504885860732438e-06, + "loss": 0.1507, + "step": 18033 + }, + { + "epoch": 0.83, + "grad_norm": 0.37373059700156125, + "learning_rate": 1.5041009728697797e-06, + "loss": 0.3167, + "step": 18034 + }, + { + "epoch": 0.83, + "grad_norm": 0.5916156265659752, + "learning_rate": 1.5033162730981376e-06, + "loss": 0.3042, + "step": 18035 + }, + { + "epoch": 0.83, + "grad_norm": 0.3726830230623231, + "learning_rate": 1.5025317614348834e-06, + "loss": 0.245, + "step": 18036 + }, + { + "epoch": 0.83, + "grad_norm": 1.3153526595682334, + "learning_rate": 1.5017474378973884e-06, + "loss": 0.532, + "step": 18037 + }, + { + "epoch": 0.83, + "grad_norm": 0.42551576049934886, + "learning_rate": 1.5009633025030124e-06, + "loss": 0.2431, + "step": 18038 + }, + { + "epoch": 0.83, + "grad_norm": 0.26485758372611823, + "learning_rate": 1.5001793552691168e-06, + "loss": 0.2507, + "step": 18039 + }, + { + "epoch": 0.83, + "grad_norm": 0.7826060995368694, + "learning_rate": 1.4993955962130613e-06, + "loss": 0.2769, + "step": 18040 + }, + { + "epoch": 0.83, + "grad_norm": 0.40234363583005434, + "learning_rate": 1.4986120253521919e-06, + "loss": 0.2682, + "step": 18041 + }, + { + "epoch": 0.83, + "grad_norm": 0.2904331226856138, + "learning_rate": 1.4978286427038602e-06, + "loss": 0.2369, + "step": 18042 + }, + { + "epoch": 0.83, + "grad_norm": 0.5847555653994496, + "learning_rate": 1.4970454482854058e-06, + "loss": 0.2513, + "step": 18043 + }, + { + "epoch": 0.83, + "grad_norm": 0.5166471218360574, + "learning_rate": 1.4962624421141702e-06, + "loss": 0.2498, + "step": 18044 + }, + { + "epoch": 0.83, + "grad_norm": 0.3255879149508685, + "learning_rate": 1.4954796242074897e-06, + "loss": 0.2014, + "step": 18045 + }, + { + "epoch": 0.83, + "grad_norm": 0.3853528154455466, + "learning_rate": 1.4946969945826917e-06, + "loss": 0.2634, + "step": 18046 + }, + { + "epoch": 0.83, + "grad_norm": 0.35635141303676876, + "learning_rate": 1.4939145532571054e-06, + "loss": 0.2669, + "step": 18047 + }, + { + "epoch": 0.83, + "grad_norm": 1.2512292168062669, + "learning_rate": 1.4931323002480513e-06, + "loss": 0.7513, + "step": 18048 + }, + { + "epoch": 0.83, + "grad_norm": 0.6742257733064705, + "learning_rate": 1.4923502355728525e-06, + "loss": 0.1055, + "step": 18049 + }, + { + "epoch": 0.83, + "grad_norm": 0.35602087466771853, + "learning_rate": 1.4915683592488195e-06, + "loss": 0.2303, + "step": 18050 + }, + { + "epoch": 0.83, + "grad_norm": 0.35863239655841717, + "learning_rate": 1.4907866712932596e-06, + "loss": 0.2949, + "step": 18051 + }, + { + "epoch": 0.83, + "grad_norm": 0.7233417999120992, + "learning_rate": 1.4900051717234821e-06, + "loss": 0.4053, + "step": 18052 + }, + { + "epoch": 0.83, + "grad_norm": 0.400665555981816, + "learning_rate": 1.4892238605567876e-06, + "loss": 0.152, + "step": 18053 + }, + { + "epoch": 0.83, + "grad_norm": 0.270502520225025, + "learning_rate": 1.488442737810476e-06, + "loss": 0.2393, + "step": 18054 + }, + { + "epoch": 0.83, + "grad_norm": 0.4099639613242563, + "learning_rate": 1.4876618035018376e-06, + "loss": 0.2125, + "step": 18055 + }, + { + "epoch": 0.83, + "grad_norm": 0.4991277658667054, + "learning_rate": 1.4868810576481618e-06, + "loss": 0.1625, + "step": 18056 + }, + { + "epoch": 0.83, + "grad_norm": 0.37640319142899137, + "learning_rate": 1.4861005002667361e-06, + "loss": 0.2983, + "step": 18057 + }, + { + "epoch": 0.83, + "grad_norm": 0.5097789753440797, + "learning_rate": 1.4853201313748378e-06, + "loss": 0.2985, + "step": 18058 + }, + { + "epoch": 0.83, + "grad_norm": 0.3163494801727335, + "learning_rate": 1.4845399509897474e-06, + "loss": 0.2143, + "step": 18059 + }, + { + "epoch": 0.83, + "grad_norm": 0.3421998547284436, + "learning_rate": 1.4837599591287333e-06, + "loss": 0.2367, + "step": 18060 + }, + { + "epoch": 0.83, + "grad_norm": 0.4367527611450621, + "learning_rate": 1.482980155809065e-06, + "loss": 0.1856, + "step": 18061 + }, + { + "epoch": 0.83, + "grad_norm": 0.293045678911986, + "learning_rate": 1.48220054104801e-06, + "loss": 0.2084, + "step": 18062 + }, + { + "epoch": 0.83, + "grad_norm": 0.5322901345206392, + "learning_rate": 1.4814211148628232e-06, + "loss": 0.3239, + "step": 18063 + }, + { + "epoch": 0.83, + "grad_norm": 0.7937118042218305, + "learning_rate": 1.4806418772707643e-06, + "loss": 0.3923, + "step": 18064 + }, + { + "epoch": 0.83, + "grad_norm": 0.5655585247370369, + "learning_rate": 1.4798628282890793e-06, + "loss": 0.3356, + "step": 18065 + }, + { + "epoch": 0.83, + "grad_norm": 0.2876697829025247, + "learning_rate": 1.4790839679350243e-06, + "loss": 0.209, + "step": 18066 + }, + { + "epoch": 0.83, + "grad_norm": 0.27601628748425405, + "learning_rate": 1.478305296225835e-06, + "loss": 0.1973, + "step": 18067 + }, + { + "epoch": 0.83, + "grad_norm": 0.6251837183312858, + "learning_rate": 1.4775268131787547e-06, + "loss": 0.2853, + "step": 18068 + }, + { + "epoch": 0.83, + "grad_norm": 0.4030026236550037, + "learning_rate": 1.4767485188110154e-06, + "loss": 0.2367, + "step": 18069 + }, + { + "epoch": 0.83, + "grad_norm": 0.3917628035290433, + "learning_rate": 1.4759704131398488e-06, + "loss": 0.3003, + "step": 18070 + }, + { + "epoch": 0.83, + "grad_norm": 0.875164925102872, + "learning_rate": 1.4751924961824837e-06, + "loss": 0.504, + "step": 18071 + }, + { + "epoch": 0.83, + "grad_norm": 0.37961358627610026, + "learning_rate": 1.4744147679561383e-06, + "loss": 0.2134, + "step": 18072 + }, + { + "epoch": 0.83, + "grad_norm": 0.3641219544739669, + "learning_rate": 1.473637228478032e-06, + "loss": 0.2219, + "step": 18073 + }, + { + "epoch": 0.83, + "grad_norm": 0.333099635656096, + "learning_rate": 1.4728598777653836e-06, + "loss": 0.2645, + "step": 18074 + }, + { + "epoch": 0.83, + "grad_norm": 0.3955044838376945, + "learning_rate": 1.4720827158353957e-06, + "loss": 0.2212, + "step": 18075 + }, + { + "epoch": 0.83, + "grad_norm": 1.435336710258183, + "learning_rate": 1.471305742705279e-06, + "loss": 0.7051, + "step": 18076 + }, + { + "epoch": 0.83, + "grad_norm": 0.8789767992020964, + "learning_rate": 1.47052895839223e-06, + "loss": 0.4428, + "step": 18077 + }, + { + "epoch": 0.83, + "grad_norm": 0.26481279736533686, + "learning_rate": 1.4697523629134525e-06, + "loss": 0.2529, + "step": 18078 + }, + { + "epoch": 0.83, + "grad_norm": 0.37738612554590467, + "learning_rate": 1.468975956286136e-06, + "loss": 0.1205, + "step": 18079 + }, + { + "epoch": 0.83, + "grad_norm": 0.8914120619079094, + "learning_rate": 1.4681997385274683e-06, + "loss": 0.296, + "step": 18080 + }, + { + "epoch": 0.83, + "grad_norm": 0.4649643378005454, + "learning_rate": 1.4674237096546362e-06, + "loss": 0.3031, + "step": 18081 + }, + { + "epoch": 0.83, + "grad_norm": 0.3757900680460659, + "learning_rate": 1.4666478696848153e-06, + "loss": 0.2519, + "step": 18082 + }, + { + "epoch": 0.83, + "grad_norm": 0.6442614776195555, + "learning_rate": 1.4658722186351915e-06, + "loss": 0.3919, + "step": 18083 + }, + { + "epoch": 0.83, + "grad_norm": 0.4380821704380783, + "learning_rate": 1.465096756522928e-06, + "loss": 0.3013, + "step": 18084 + }, + { + "epoch": 0.83, + "grad_norm": 0.7296760125446096, + "learning_rate": 1.4643214833651997e-06, + "loss": 0.2855, + "step": 18085 + }, + { + "epoch": 0.83, + "grad_norm": 0.2856415638136337, + "learning_rate": 1.4635463991791633e-06, + "loss": 0.2304, + "step": 18086 + }, + { + "epoch": 0.83, + "grad_norm": 0.28391649823352666, + "learning_rate": 1.4627715039819834e-06, + "loss": 0.2252, + "step": 18087 + }, + { + "epoch": 0.83, + "grad_norm": 1.237433276976469, + "learning_rate": 1.4619967977908157e-06, + "loss": 0.316, + "step": 18088 + }, + { + "epoch": 0.83, + "grad_norm": 0.9321338248744325, + "learning_rate": 1.461222280622807e-06, + "loss": 0.4907, + "step": 18089 + }, + { + "epoch": 0.83, + "grad_norm": 0.2629161887373475, + "learning_rate": 1.4604479524951087e-06, + "loss": 0.2538, + "step": 18090 + }, + { + "epoch": 0.83, + "grad_norm": 0.6145896257695453, + "learning_rate": 1.4596738134248634e-06, + "loss": 0.3659, + "step": 18091 + }, + { + "epoch": 0.83, + "grad_norm": 0.22289655846947887, + "learning_rate": 1.4588998634292062e-06, + "loss": 0.0719, + "step": 18092 + }, + { + "epoch": 0.83, + "grad_norm": 0.30505171459499464, + "learning_rate": 1.4581261025252768e-06, + "loss": 0.2107, + "step": 18093 + }, + { + "epoch": 0.83, + "grad_norm": 0.446118266906929, + "learning_rate": 1.4573525307302006e-06, + "loss": 0.3178, + "step": 18094 + }, + { + "epoch": 0.83, + "grad_norm": 0.600178524504717, + "learning_rate": 1.4565791480611057e-06, + "loss": 0.2868, + "step": 18095 + }, + { + "epoch": 0.83, + "grad_norm": 0.4358986192548637, + "learning_rate": 1.4558059545351144e-06, + "loss": 0.2729, + "step": 18096 + }, + { + "epoch": 0.83, + "grad_norm": 0.6839326754043568, + "learning_rate": 1.4550329501693462e-06, + "loss": 0.3732, + "step": 18097 + }, + { + "epoch": 0.83, + "grad_norm": 0.27348523128154995, + "learning_rate": 1.4542601349809127e-06, + "loss": 0.1855, + "step": 18098 + }, + { + "epoch": 0.83, + "grad_norm": 0.3732775907676523, + "learning_rate": 1.4534875089869183e-06, + "loss": 0.2887, + "step": 18099 + }, + { + "epoch": 0.83, + "grad_norm": 0.651653375066412, + "learning_rate": 1.4527150722044781e-06, + "loss": 0.236, + "step": 18100 + }, + { + "epoch": 0.83, + "grad_norm": 0.41340286753494937, + "learning_rate": 1.4519428246506862e-06, + "loss": 0.2025, + "step": 18101 + }, + { + "epoch": 0.83, + "grad_norm": 0.31518018199604475, + "learning_rate": 1.4511707663426443e-06, + "loss": 0.2658, + "step": 18102 + }, + { + "epoch": 0.83, + "grad_norm": 0.599798533573514, + "learning_rate": 1.4503988972974393e-06, + "loss": 0.349, + "step": 18103 + }, + { + "epoch": 0.83, + "grad_norm": 0.5156347768948834, + "learning_rate": 1.4496272175321624e-06, + "loss": 0.1895, + "step": 18104 + }, + { + "epoch": 0.83, + "grad_norm": 0.2525570623700407, + "learning_rate": 1.4488557270639004e-06, + "loss": 0.162, + "step": 18105 + }, + { + "epoch": 0.83, + "grad_norm": 0.3940395636866371, + "learning_rate": 1.448084425909728e-06, + "loss": 0.3165, + "step": 18106 + }, + { + "epoch": 0.83, + "grad_norm": 0.9382063778504801, + "learning_rate": 1.4473133140867246e-06, + "loss": 0.5166, + "step": 18107 + }, + { + "epoch": 0.83, + "grad_norm": 0.3441388844486491, + "learning_rate": 1.4465423916119637e-06, + "loss": 0.2136, + "step": 18108 + }, + { + "epoch": 0.83, + "grad_norm": 0.5137079514396238, + "learning_rate": 1.4457716585025073e-06, + "loss": 0.3186, + "step": 18109 + }, + { + "epoch": 0.83, + "grad_norm": 0.5630580512911609, + "learning_rate": 1.445001114775425e-06, + "loss": 0.3059, + "step": 18110 + }, + { + "epoch": 0.83, + "grad_norm": 0.19213778733029777, + "learning_rate": 1.444230760447769e-06, + "loss": 0.1304, + "step": 18111 + }, + { + "epoch": 0.83, + "grad_norm": 1.403089291346263, + "learning_rate": 1.4434605955365989e-06, + "loss": 0.8025, + "step": 18112 + }, + { + "epoch": 0.83, + "grad_norm": 0.4783803605202183, + "learning_rate": 1.4426906200589641e-06, + "loss": 0.3468, + "step": 18113 + }, + { + "epoch": 0.83, + "grad_norm": 0.27485766458240046, + "learning_rate": 1.4419208340319135e-06, + "loss": 0.2218, + "step": 18114 + }, + { + "epoch": 0.83, + "grad_norm": 1.059629114851713, + "learning_rate": 1.4411512374724867e-06, + "loss": 0.4605, + "step": 18115 + }, + { + "epoch": 0.83, + "grad_norm": 0.6572806040901578, + "learning_rate": 1.440381830397719e-06, + "loss": 0.2446, + "step": 18116 + }, + { + "epoch": 0.83, + "grad_norm": 0.3364706895953859, + "learning_rate": 1.4396126128246513e-06, + "loss": 0.2572, + "step": 18117 + }, + { + "epoch": 0.83, + "grad_norm": 0.24749313718943855, + "learning_rate": 1.4388435847703074e-06, + "loss": 0.1898, + "step": 18118 + }, + { + "epoch": 0.83, + "grad_norm": 0.605769595142131, + "learning_rate": 1.4380747462517186e-06, + "loss": 0.3636, + "step": 18119 + }, + { + "epoch": 0.83, + "grad_norm": 0.4261243477600186, + "learning_rate": 1.4373060972858999e-06, + "loss": 0.2749, + "step": 18120 + }, + { + "epoch": 0.83, + "grad_norm": 0.44562528835396936, + "learning_rate": 1.436537637889871e-06, + "loss": 0.2448, + "step": 18121 + }, + { + "epoch": 0.83, + "grad_norm": 0.4576172087306867, + "learning_rate": 1.4357693680806485e-06, + "loss": 0.2642, + "step": 18122 + }, + { + "epoch": 0.83, + "grad_norm": 0.3676033939427828, + "learning_rate": 1.435001287875234e-06, + "loss": 0.2648, + "step": 18123 + }, + { + "epoch": 0.83, + "grad_norm": 0.40861945150740275, + "learning_rate": 1.4342333972906398e-06, + "loss": 0.1862, + "step": 18124 + }, + { + "epoch": 0.83, + "grad_norm": 0.45210078882973675, + "learning_rate": 1.4334656963438587e-06, + "loss": 0.3017, + "step": 18125 + }, + { + "epoch": 0.83, + "grad_norm": 0.28604394473350964, + "learning_rate": 1.4326981850518917e-06, + "loss": 0.2377, + "step": 18126 + }, + { + "epoch": 0.83, + "grad_norm": 0.527125257948193, + "learning_rate": 1.4319308634317308e-06, + "loss": 0.147, + "step": 18127 + }, + { + "epoch": 0.83, + "grad_norm": 1.6034416230158934, + "learning_rate": 1.4311637315003612e-06, + "loss": 0.5589, + "step": 18128 + }, + { + "epoch": 0.83, + "grad_norm": 0.23994646247124496, + "learning_rate": 1.4303967892747684e-06, + "loss": 0.1879, + "step": 18129 + }, + { + "epoch": 0.83, + "grad_norm": 0.3917087939770791, + "learning_rate": 1.4296300367719297e-06, + "loss": 0.2998, + "step": 18130 + }, + { + "epoch": 0.83, + "grad_norm": 0.6456164270902304, + "learning_rate": 1.4288634740088247e-06, + "loss": 0.2958, + "step": 18131 + }, + { + "epoch": 0.83, + "grad_norm": 0.3588479412569209, + "learning_rate": 1.4280971010024192e-06, + "loss": 0.2494, + "step": 18132 + }, + { + "epoch": 0.83, + "grad_norm": 0.5757243419839844, + "learning_rate": 1.4273309177696826e-06, + "loss": 0.2695, + "step": 18133 + }, + { + "epoch": 0.83, + "grad_norm": 0.3575877226696018, + "learning_rate": 1.4265649243275782e-06, + "loss": 0.2358, + "step": 18134 + }, + { + "epoch": 0.83, + "grad_norm": 0.3155418247159494, + "learning_rate": 1.4257991206930622e-06, + "loss": 0.2262, + "step": 18135 + }, + { + "epoch": 0.83, + "grad_norm": 0.7649864707231198, + "learning_rate": 1.4250335068830913e-06, + "loss": 0.444, + "step": 18136 + }, + { + "epoch": 0.83, + "grad_norm": 0.3333589848411431, + "learning_rate": 1.4242680829146117e-06, + "loss": 0.2635, + "step": 18137 + }, + { + "epoch": 0.83, + "grad_norm": 0.4197667395846607, + "learning_rate": 1.423502848804571e-06, + "loss": 0.2733, + "step": 18138 + }, + { + "epoch": 0.83, + "grad_norm": 0.2987100474482119, + "learning_rate": 1.4227378045699137e-06, + "loss": 0.2054, + "step": 18139 + }, + { + "epoch": 0.83, + "grad_norm": 1.7471159853752192, + "learning_rate": 1.4219729502275726e-06, + "loss": 0.2068, + "step": 18140 + }, + { + "epoch": 0.83, + "grad_norm": 0.4343917243897533, + "learning_rate": 1.4212082857944842e-06, + "loss": 0.2771, + "step": 18141 + }, + { + "epoch": 0.83, + "grad_norm": 0.3174130452203371, + "learning_rate": 1.420443811287572e-06, + "loss": 0.2663, + "step": 18142 + }, + { + "epoch": 0.83, + "grad_norm": 0.7838966221708615, + "learning_rate": 1.4196795267237695e-06, + "loss": 0.405, + "step": 18143 + }, + { + "epoch": 0.83, + "grad_norm": 0.3083155685183564, + "learning_rate": 1.4189154321199917e-06, + "loss": 0.2031, + "step": 18144 + }, + { + "epoch": 0.83, + "grad_norm": 0.2444757528386469, + "learning_rate": 1.4181515274931545e-06, + "loss": 0.2244, + "step": 18145 + }, + { + "epoch": 0.83, + "grad_norm": 1.6604570659717346, + "learning_rate": 1.4173878128601704e-06, + "loss": 0.7331, + "step": 18146 + }, + { + "epoch": 0.83, + "grad_norm": 0.3385861190731466, + "learning_rate": 1.4166242882379478e-06, + "loss": 0.1875, + "step": 18147 + }, + { + "epoch": 0.83, + "grad_norm": 0.6228128037246986, + "learning_rate": 1.4158609536433944e-06, + "loss": 0.3693, + "step": 18148 + }, + { + "epoch": 0.83, + "grad_norm": 0.4296787036514854, + "learning_rate": 1.415097809093402e-06, + "loss": 0.2949, + "step": 18149 + }, + { + "epoch": 0.83, + "grad_norm": 0.3258243293170314, + "learning_rate": 1.4143348546048706e-06, + "loss": 0.2118, + "step": 18150 + }, + { + "epoch": 0.83, + "grad_norm": 0.28464229829898596, + "learning_rate": 1.4135720901946936e-06, + "loss": 0.1836, + "step": 18151 + }, + { + "epoch": 0.83, + "grad_norm": 0.951724281644622, + "learning_rate": 1.4128095158797517e-06, + "loss": 0.3999, + "step": 18152 + }, + { + "epoch": 0.83, + "grad_norm": 0.34178031465150255, + "learning_rate": 1.4120471316769324e-06, + "loss": 0.2618, + "step": 18153 + }, + { + "epoch": 0.83, + "grad_norm": 0.3564088210299669, + "learning_rate": 1.4112849376031112e-06, + "loss": 0.2528, + "step": 18154 + }, + { + "epoch": 0.83, + "grad_norm": 1.1312117733385187, + "learning_rate": 1.4105229336751636e-06, + "loss": 0.6136, + "step": 18155 + }, + { + "epoch": 0.83, + "grad_norm": 0.7060319574947277, + "learning_rate": 1.4097611199099615e-06, + "loss": 0.3621, + "step": 18156 + }, + { + "epoch": 0.83, + "grad_norm": 0.23496095416094162, + "learning_rate": 1.4089994963243658e-06, + "loss": 0.1992, + "step": 18157 + }, + { + "epoch": 0.83, + "grad_norm": 0.5434280550063493, + "learning_rate": 1.4082380629352444e-06, + "loss": 0.2149, + "step": 18158 + }, + { + "epoch": 0.83, + "grad_norm": 0.6130912822260625, + "learning_rate": 1.407476819759447e-06, + "loss": 0.2963, + "step": 18159 + }, + { + "epoch": 0.83, + "grad_norm": 0.40356686162362815, + "learning_rate": 1.4067157668138354e-06, + "loss": 0.2322, + "step": 18160 + }, + { + "epoch": 0.83, + "grad_norm": 0.3671287068229323, + "learning_rate": 1.405954904115252e-06, + "loss": 0.2774, + "step": 18161 + }, + { + "epoch": 0.83, + "grad_norm": 0.4133176366336819, + "learning_rate": 1.4051942316805468e-06, + "loss": 0.2912, + "step": 18162 + }, + { + "epoch": 0.83, + "grad_norm": 0.28347598842355753, + "learning_rate": 1.4044337495265548e-06, + "loss": 0.1314, + "step": 18163 + }, + { + "epoch": 0.83, + "grad_norm": 0.8814561652303995, + "learning_rate": 1.403673457670115e-06, + "loss": 0.4168, + "step": 18164 + }, + { + "epoch": 0.83, + "grad_norm": 0.39075223866866426, + "learning_rate": 1.4029133561280618e-06, + "loss": 0.2564, + "step": 18165 + }, + { + "epoch": 0.83, + "grad_norm": 0.40799242059498464, + "learning_rate": 1.402153444917218e-06, + "loss": 0.2832, + "step": 18166 + }, + { + "epoch": 0.83, + "grad_norm": 1.1512475970083706, + "learning_rate": 1.4013937240544118e-06, + "loss": 0.4557, + "step": 18167 + }, + { + "epoch": 0.83, + "grad_norm": 0.43182953910728566, + "learning_rate": 1.4006341935564628e-06, + "loss": 0.2709, + "step": 18168 + }, + { + "epoch": 0.83, + "grad_norm": 0.3970760613398073, + "learning_rate": 1.3998748534401817e-06, + "loss": 0.3015, + "step": 18169 + }, + { + "epoch": 0.83, + "grad_norm": 0.2873751927451588, + "learning_rate": 1.3991157037223857e-06, + "loss": 0.1833, + "step": 18170 + }, + { + "epoch": 0.83, + "grad_norm": 0.4562357676599878, + "learning_rate": 1.3983567444198753e-06, + "loss": 0.2731, + "step": 18171 + }, + { + "epoch": 0.83, + "grad_norm": 0.6760248742599947, + "learning_rate": 1.3975979755494562e-06, + "loss": 0.3514, + "step": 18172 + }, + { + "epoch": 0.83, + "grad_norm": 0.3747274074919487, + "learning_rate": 1.3968393971279293e-06, + "loss": 0.2551, + "step": 18173 + }, + { + "epoch": 0.83, + "grad_norm": 0.546961268567488, + "learning_rate": 1.3960810091720844e-06, + "loss": 0.2915, + "step": 18174 + }, + { + "epoch": 0.83, + "grad_norm": 0.4519380026360848, + "learning_rate": 1.395322811698715e-06, + "loss": 0.2887, + "step": 18175 + }, + { + "epoch": 0.83, + "grad_norm": 0.3205983095772739, + "learning_rate": 1.3945648047246007e-06, + "loss": 0.2146, + "step": 18176 + }, + { + "epoch": 0.84, + "grad_norm": 0.38675624263691544, + "learning_rate": 1.3938069882665327e-06, + "loss": 0.2545, + "step": 18177 + }, + { + "epoch": 0.84, + "grad_norm": 0.32222129147567685, + "learning_rate": 1.3930493623412812e-06, + "loss": 0.23, + "step": 18178 + }, + { + "epoch": 0.84, + "grad_norm": 1.2987602195937102, + "learning_rate": 1.3922919269656232e-06, + "loss": 0.7506, + "step": 18179 + }, + { + "epoch": 0.84, + "grad_norm": 0.3554013338677597, + "learning_rate": 1.3915346821563235e-06, + "loss": 0.1111, + "step": 18180 + }, + { + "epoch": 0.84, + "grad_norm": 0.28835058650507406, + "learning_rate": 1.3907776279301488e-06, + "loss": 0.2735, + "step": 18181 + }, + { + "epoch": 0.84, + "grad_norm": 0.7427304773208124, + "learning_rate": 1.390020764303862e-06, + "loss": 0.3889, + "step": 18182 + }, + { + "epoch": 0.84, + "grad_norm": 0.22662791360786488, + "learning_rate": 1.3892640912942146e-06, + "loss": 0.1397, + "step": 18183 + }, + { + "epoch": 0.84, + "grad_norm": 0.6607942540776736, + "learning_rate": 1.388507608917964e-06, + "loss": 0.3414, + "step": 18184 + }, + { + "epoch": 0.84, + "grad_norm": 0.3838073883376752, + "learning_rate": 1.387751317191852e-06, + "loss": 0.3136, + "step": 18185 + }, + { + "epoch": 0.84, + "grad_norm": 0.4429494689130015, + "learning_rate": 1.3869952161326261e-06, + "loss": 0.1927, + "step": 18186 + }, + { + "epoch": 0.84, + "grad_norm": 0.6194883777514623, + "learning_rate": 1.3862393057570267e-06, + "loss": 0.3467, + "step": 18187 + }, + { + "epoch": 0.84, + "grad_norm": 0.8265362452908632, + "learning_rate": 1.385483586081785e-06, + "loss": 0.3535, + "step": 18188 + }, + { + "epoch": 0.84, + "grad_norm": 0.21992358605925325, + "learning_rate": 1.3847280571236332e-06, + "loss": 0.1835, + "step": 18189 + }, + { + "epoch": 0.84, + "grad_norm": 0.3469621347449507, + "learning_rate": 1.3839727188993014e-06, + "loss": 0.2266, + "step": 18190 + }, + { + "epoch": 0.84, + "grad_norm": 1.2293026612426468, + "learning_rate": 1.3832175714255068e-06, + "loss": 0.6376, + "step": 18191 + }, + { + "epoch": 0.84, + "grad_norm": 0.9990717145302259, + "learning_rate": 1.3824626147189734e-06, + "loss": 0.4017, + "step": 18192 + }, + { + "epoch": 0.84, + "grad_norm": 0.25546697148001346, + "learning_rate": 1.3817078487964063e-06, + "loss": 0.2133, + "step": 18193 + }, + { + "epoch": 0.84, + "grad_norm": 0.7205622639168112, + "learning_rate": 1.3809532736745269e-06, + "loss": 0.3943, + "step": 18194 + }, + { + "epoch": 0.84, + "grad_norm": 0.2421382207965262, + "learning_rate": 1.3801988893700312e-06, + "loss": 0.1338, + "step": 18195 + }, + { + "epoch": 0.84, + "grad_norm": 0.37414188194655623, + "learning_rate": 1.3794446958996277e-06, + "loss": 0.2089, + "step": 18196 + }, + { + "epoch": 0.84, + "grad_norm": 0.3529886736063192, + "learning_rate": 1.3786906932800071e-06, + "loss": 0.3069, + "step": 18197 + }, + { + "epoch": 0.84, + "grad_norm": 0.7977665578803115, + "learning_rate": 1.3779368815278648e-06, + "loss": 0.3434, + "step": 18198 + }, + { + "epoch": 0.84, + "grad_norm": 0.39558928471104043, + "learning_rate": 1.3771832606598935e-06, + "loss": 0.2478, + "step": 18199 + }, + { + "epoch": 0.84, + "grad_norm": 1.687070817392689, + "learning_rate": 1.3764298306927703e-06, + "loss": 0.693, + "step": 18200 + }, + { + "epoch": 0.84, + "grad_norm": 0.26921954219390976, + "learning_rate": 1.3756765916431825e-06, + "loss": 0.2195, + "step": 18201 + }, + { + "epoch": 0.84, + "grad_norm": 0.252331106557988, + "learning_rate": 1.3749235435277997e-06, + "loss": 0.1608, + "step": 18202 + }, + { + "epoch": 0.84, + "grad_norm": 1.3867589701883567, + "learning_rate": 1.3741706863632976e-06, + "loss": 0.7759, + "step": 18203 + }, + { + "epoch": 0.84, + "grad_norm": 0.5517134184872348, + "learning_rate": 1.373418020166344e-06, + "loss": 0.2868, + "step": 18204 + }, + { + "epoch": 0.84, + "grad_norm": 0.3155788234593152, + "learning_rate": 1.3726655449535998e-06, + "loss": 0.2704, + "step": 18205 + }, + { + "epoch": 0.84, + "grad_norm": 0.5725567306673096, + "learning_rate": 1.371913260741724e-06, + "loss": 0.2418, + "step": 18206 + }, + { + "epoch": 0.84, + "grad_norm": 0.37124600479101655, + "learning_rate": 1.3711611675473734e-06, + "loss": 0.2206, + "step": 18207 + }, + { + "epoch": 0.84, + "grad_norm": 0.31257267594450916, + "learning_rate": 1.3704092653872002e-06, + "loss": 0.24, + "step": 18208 + }, + { + "epoch": 0.84, + "grad_norm": 0.35362463421338136, + "learning_rate": 1.369657554277849e-06, + "loss": 0.275, + "step": 18209 + }, + { + "epoch": 0.84, + "grad_norm": 0.7638492512110073, + "learning_rate": 1.3689060342359573e-06, + "loss": 0.3737, + "step": 18210 + }, + { + "epoch": 0.84, + "grad_norm": 0.37406343765717726, + "learning_rate": 1.3681547052781707e-06, + "loss": 0.2575, + "step": 18211 + }, + { + "epoch": 0.84, + "grad_norm": 0.4005267723409763, + "learning_rate": 1.3674035674211183e-06, + "loss": 0.2284, + "step": 18212 + }, + { + "epoch": 0.84, + "grad_norm": 0.46746327980802027, + "learning_rate": 1.3666526206814323e-06, + "loss": 0.2459, + "step": 18213 + }, + { + "epoch": 0.84, + "grad_norm": 0.27692313763712756, + "learning_rate": 1.3659018650757339e-06, + "loss": 0.217, + "step": 18214 + }, + { + "epoch": 0.84, + "grad_norm": 0.7301021531223986, + "learning_rate": 1.3651513006206463e-06, + "loss": 0.3219, + "step": 18215 + }, + { + "epoch": 0.84, + "grad_norm": 0.4769601690616754, + "learning_rate": 1.3644009273327896e-06, + "loss": 0.2828, + "step": 18216 + }, + { + "epoch": 0.84, + "grad_norm": 0.31912991999539886, + "learning_rate": 1.3636507452287706e-06, + "loss": 0.2759, + "step": 18217 + }, + { + "epoch": 0.84, + "grad_norm": 1.703766412369911, + "learning_rate": 1.3629007543252027e-06, + "loss": 0.5994, + "step": 18218 + }, + { + "epoch": 0.84, + "grad_norm": 0.5271275157347987, + "learning_rate": 1.3621509546386847e-06, + "loss": 0.1165, + "step": 18219 + }, + { + "epoch": 0.84, + "grad_norm": 0.36199526360939055, + "learning_rate": 1.36140134618582e-06, + "loss": 0.2894, + "step": 18220 + }, + { + "epoch": 0.84, + "grad_norm": 0.3527667643222183, + "learning_rate": 1.3606519289832054e-06, + "loss": 0.2759, + "step": 18221 + }, + { + "epoch": 0.84, + "grad_norm": 0.7862873046353133, + "learning_rate": 1.3599027030474288e-06, + "loss": 0.2847, + "step": 18222 + }, + { + "epoch": 0.84, + "grad_norm": 0.3865830703537873, + "learning_rate": 1.3591536683950779e-06, + "loss": 0.256, + "step": 18223 + }, + { + "epoch": 0.84, + "grad_norm": 0.33921484896926096, + "learning_rate": 1.3584048250427373e-06, + "loss": 0.2456, + "step": 18224 + }, + { + "epoch": 0.84, + "grad_norm": 0.43069135953057586, + "learning_rate": 1.357656173006987e-06, + "loss": 0.1911, + "step": 18225 + }, + { + "epoch": 0.84, + "grad_norm": 0.3053350783472205, + "learning_rate": 1.3569077123043973e-06, + "loss": 0.2232, + "step": 18226 + }, + { + "epoch": 0.84, + "grad_norm": 0.7038154611902642, + "learning_rate": 1.3561594429515412e-06, + "loss": 0.3929, + "step": 18227 + }, + { + "epoch": 0.84, + "grad_norm": 0.3789085172540409, + "learning_rate": 1.3554113649649847e-06, + "loss": 0.2529, + "step": 18228 + }, + { + "epoch": 0.84, + "grad_norm": 0.3520233126767513, + "learning_rate": 1.3546634783612877e-06, + "loss": 0.2636, + "step": 18229 + }, + { + "epoch": 0.84, + "grad_norm": 0.6479573296094628, + "learning_rate": 1.3539157831570105e-06, + "loss": 0.2158, + "step": 18230 + }, + { + "epoch": 0.84, + "grad_norm": 1.6480518991265372, + "learning_rate": 1.353168279368703e-06, + "loss": 0.7475, + "step": 18231 + }, + { + "epoch": 0.84, + "grad_norm": 0.531421930917357, + "learning_rate": 1.3524209670129152e-06, + "loss": 0.2177, + "step": 18232 + }, + { + "epoch": 0.84, + "grad_norm": 0.3701187304070856, + "learning_rate": 1.3516738461061952e-06, + "loss": 0.2891, + "step": 18233 + }, + { + "epoch": 0.84, + "grad_norm": 0.9626978686329741, + "learning_rate": 1.3509269166650785e-06, + "loss": 0.5051, + "step": 18234 + }, + { + "epoch": 0.84, + "grad_norm": 0.18356981311313056, + "learning_rate": 1.3501801787061065e-06, + "loss": 0.1388, + "step": 18235 + }, + { + "epoch": 0.84, + "grad_norm": 1.599429372054145, + "learning_rate": 1.3494336322458034e-06, + "loss": 0.7666, + "step": 18236 + }, + { + "epoch": 0.84, + "grad_norm": 0.38936045921397605, + "learning_rate": 1.3486872773007064e-06, + "loss": 0.2811, + "step": 18237 + }, + { + "epoch": 0.84, + "grad_norm": 0.3244927225438379, + "learning_rate": 1.3479411138873354e-06, + "loss": 0.2103, + "step": 18238 + }, + { + "epoch": 0.84, + "grad_norm": 0.7711852978203485, + "learning_rate": 1.3471951420222075e-06, + "loss": 0.4033, + "step": 18239 + }, + { + "epoch": 0.84, + "grad_norm": 0.3710787579507663, + "learning_rate": 1.3464493617218389e-06, + "loss": 0.3277, + "step": 18240 + }, + { + "epoch": 0.84, + "grad_norm": 0.3189683918993497, + "learning_rate": 1.3457037730027411e-06, + "loss": 0.1873, + "step": 18241 + }, + { + "epoch": 0.84, + "grad_norm": 0.35512600113006515, + "learning_rate": 1.3449583758814222e-06, + "loss": 0.1853, + "step": 18242 + }, + { + "epoch": 0.84, + "grad_norm": 0.7719686941697731, + "learning_rate": 1.3442131703743821e-06, + "loss": 0.4014, + "step": 18243 + }, + { + "epoch": 0.84, + "grad_norm": 0.4041083189067108, + "learning_rate": 1.343468156498121e-06, + "loss": 0.2476, + "step": 18244 + }, + { + "epoch": 0.84, + "grad_norm": 0.34984070214113927, + "learning_rate": 1.3427233342691293e-06, + "loss": 0.2521, + "step": 18245 + }, + { + "epoch": 0.84, + "grad_norm": 1.4983053330361942, + "learning_rate": 1.3419787037039e-06, + "loss": 0.6536, + "step": 18246 + }, + { + "epoch": 0.84, + "grad_norm": 0.32960600849565425, + "learning_rate": 1.3412342648189192e-06, + "loss": 0.2389, + "step": 18247 + }, + { + "epoch": 0.84, + "grad_norm": 0.21041651600051087, + "learning_rate": 1.3404900176306635e-06, + "loss": 0.1605, + "step": 18248 + }, + { + "epoch": 0.84, + "grad_norm": 0.971889729259226, + "learning_rate": 1.339745962155613e-06, + "loss": 0.405, + "step": 18249 + }, + { + "epoch": 0.84, + "grad_norm": 0.3648475922367184, + "learning_rate": 1.3390020984102426e-06, + "loss": 0.266, + "step": 18250 + }, + { + "epoch": 0.84, + "grad_norm": 0.9237508915682208, + "learning_rate": 1.3382584264110165e-06, + "loss": 0.3108, + "step": 18251 + }, + { + "epoch": 0.84, + "grad_norm": 0.3507495396155283, + "learning_rate": 1.3375149461744019e-06, + "loss": 0.2928, + "step": 18252 + }, + { + "epoch": 0.84, + "grad_norm": 0.36671409344922407, + "learning_rate": 1.3367716577168531e-06, + "loss": 0.2423, + "step": 18253 + }, + { + "epoch": 0.84, + "grad_norm": 0.2963961798190344, + "learning_rate": 1.3360285610548341e-06, + "loss": 0.0779, + "step": 18254 + }, + { + "epoch": 0.84, + "grad_norm": 0.6487567304498819, + "learning_rate": 1.3352856562047912e-06, + "loss": 0.3594, + "step": 18255 + }, + { + "epoch": 0.84, + "grad_norm": 0.2911878953322162, + "learning_rate": 1.3345429431831735e-06, + "loss": 0.238, + "step": 18256 + }, + { + "epoch": 0.84, + "grad_norm": 0.5556880500075052, + "learning_rate": 1.3338004220064227e-06, + "loss": 0.3615, + "step": 18257 + }, + { + "epoch": 0.84, + "grad_norm": 1.2389806677470196, + "learning_rate": 1.3330580926909765e-06, + "loss": 0.3901, + "step": 18258 + }, + { + "epoch": 0.84, + "grad_norm": 0.4337604997757784, + "learning_rate": 1.3323159552532738e-06, + "loss": 0.2651, + "step": 18259 + }, + { + "epoch": 0.84, + "grad_norm": 0.2587123152428974, + "learning_rate": 1.3315740097097386e-06, + "loss": 0.2064, + "step": 18260 + }, + { + "epoch": 0.84, + "grad_norm": 0.46082695451759753, + "learning_rate": 1.3308322560768038e-06, + "loss": 0.2689, + "step": 18261 + }, + { + "epoch": 0.84, + "grad_norm": 0.42415794228714326, + "learning_rate": 1.3300906943708836e-06, + "loss": 0.2412, + "step": 18262 + }, + { + "epoch": 0.84, + "grad_norm": 0.5802999529238541, + "learning_rate": 1.329349324608401e-06, + "loss": 0.3678, + "step": 18263 + }, + { + "epoch": 0.84, + "grad_norm": 0.33773388141461513, + "learning_rate": 1.3286081468057689e-06, + "loss": 0.2429, + "step": 18264 + }, + { + "epoch": 0.84, + "grad_norm": 0.4996610573554146, + "learning_rate": 1.3278671609793915e-06, + "loss": 0.2491, + "step": 18265 + }, + { + "epoch": 0.84, + "grad_norm": 0.29291532265947956, + "learning_rate": 1.327126367145678e-06, + "loss": 0.2025, + "step": 18266 + }, + { + "epoch": 0.84, + "grad_norm": 0.7046132345240388, + "learning_rate": 1.32638576532103e-06, + "loss": 0.3679, + "step": 18267 + }, + { + "epoch": 0.84, + "grad_norm": 0.26820225104211287, + "learning_rate": 1.3256453555218385e-06, + "loss": 0.2006, + "step": 18268 + }, + { + "epoch": 0.84, + "grad_norm": 0.6488939108170363, + "learning_rate": 1.3249051377645018e-06, + "loss": 0.3285, + "step": 18269 + }, + { + "epoch": 0.84, + "grad_norm": 1.3610509276466798, + "learning_rate": 1.3241651120653986e-06, + "loss": 0.6752, + "step": 18270 + }, + { + "epoch": 0.84, + "grad_norm": 0.3166872234356702, + "learning_rate": 1.323425278440923e-06, + "loss": 0.1888, + "step": 18271 + }, + { + "epoch": 0.84, + "grad_norm": 0.49699065927766595, + "learning_rate": 1.322685636907447e-06, + "loss": 0.335, + "step": 18272 + }, + { + "epoch": 0.84, + "grad_norm": 0.34568880395028856, + "learning_rate": 1.3219461874813489e-06, + "loss": 0.2614, + "step": 18273 + }, + { + "epoch": 0.84, + "grad_norm": 0.27129210885590926, + "learning_rate": 1.3212069301789966e-06, + "loss": 0.1516, + "step": 18274 + }, + { + "epoch": 0.84, + "grad_norm": 1.341592904412576, + "learning_rate": 1.320467865016759e-06, + "loss": 0.6056, + "step": 18275 + }, + { + "epoch": 0.84, + "grad_norm": 0.3963328010129757, + "learning_rate": 1.3197289920109991e-06, + "loss": 0.3146, + "step": 18276 + }, + { + "epoch": 0.84, + "grad_norm": 0.35776597043376046, + "learning_rate": 1.31899031117807e-06, + "loss": 0.0731, + "step": 18277 + }, + { + "epoch": 0.84, + "grad_norm": 0.4458153789723602, + "learning_rate": 1.3182518225343322e-06, + "loss": 0.2877, + "step": 18278 + }, + { + "epoch": 0.84, + "grad_norm": 0.2992467474875995, + "learning_rate": 1.317513526096128e-06, + "loss": 0.2161, + "step": 18279 + }, + { + "epoch": 0.84, + "grad_norm": 0.37573354572968565, + "learning_rate": 1.3167754218798067e-06, + "loss": 0.2469, + "step": 18280 + }, + { + "epoch": 0.84, + "grad_norm": 0.45212814681935537, + "learning_rate": 1.3160375099017108e-06, + "loss": 0.2403, + "step": 18281 + }, + { + "epoch": 0.84, + "grad_norm": 1.173705423779554, + "learning_rate": 1.3152997901781717e-06, + "loss": 0.4724, + "step": 18282 + }, + { + "epoch": 0.84, + "grad_norm": 0.5982283479752917, + "learning_rate": 1.314562262725526e-06, + "loss": 0.2895, + "step": 18283 + }, + { + "epoch": 0.84, + "grad_norm": 0.2622329488110608, + "learning_rate": 1.3138249275601024e-06, + "loss": 0.2383, + "step": 18284 + }, + { + "epoch": 0.84, + "grad_norm": 0.9048839619647755, + "learning_rate": 1.3130877846982204e-06, + "loss": 0.4223, + "step": 18285 + }, + { + "epoch": 0.84, + "grad_norm": 0.22927404048248723, + "learning_rate": 1.3123508341562052e-06, + "loss": 0.1417, + "step": 18286 + }, + { + "epoch": 0.84, + "grad_norm": 0.38274260976284213, + "learning_rate": 1.3116140759503648e-06, + "loss": 0.2588, + "step": 18287 + }, + { + "epoch": 0.84, + "grad_norm": 0.3697216507581401, + "learning_rate": 1.3108775100970183e-06, + "loss": 0.2864, + "step": 18288 + }, + { + "epoch": 0.84, + "grad_norm": 0.4037774986981472, + "learning_rate": 1.3101411366124682e-06, + "loss": 0.2873, + "step": 18289 + }, + { + "epoch": 0.84, + "grad_norm": 0.535087843370923, + "learning_rate": 1.3094049555130195e-06, + "loss": 0.2584, + "step": 18290 + }, + { + "epoch": 0.84, + "grad_norm": 0.3860786489524906, + "learning_rate": 1.3086689668149665e-06, + "loss": 0.2024, + "step": 18291 + }, + { + "epoch": 0.84, + "grad_norm": 0.2748769232022338, + "learning_rate": 1.3079331705346055e-06, + "loss": 0.2421, + "step": 18292 + }, + { + "epoch": 0.84, + "grad_norm": 0.6193863712694652, + "learning_rate": 1.3071975666882297e-06, + "loss": 0.3448, + "step": 18293 + }, + { + "epoch": 0.84, + "grad_norm": 1.1877291402041013, + "learning_rate": 1.3064621552921185e-06, + "loss": 0.348, + "step": 18294 + }, + { + "epoch": 0.84, + "grad_norm": 0.5644565979741699, + "learning_rate": 1.305726936362559e-06, + "loss": 0.3117, + "step": 18295 + }, + { + "epoch": 0.84, + "grad_norm": 0.2795727052023342, + "learning_rate": 1.3049919099158236e-06, + "loss": 0.2676, + "step": 18296 + }, + { + "epoch": 0.84, + "grad_norm": 0.5399081304840454, + "learning_rate": 1.3042570759681862e-06, + "loss": 0.1559, + "step": 18297 + }, + { + "epoch": 0.84, + "grad_norm": 0.39383334333303627, + "learning_rate": 1.3035224345359188e-06, + "loss": 0.236, + "step": 18298 + }, + { + "epoch": 0.84, + "grad_norm": 0.35798682720556085, + "learning_rate": 1.3027879856352798e-06, + "loss": 0.2865, + "step": 18299 + }, + { + "epoch": 0.84, + "grad_norm": 0.3725868116203345, + "learning_rate": 1.302053729282533e-06, + "loss": 0.2502, + "step": 18300 + }, + { + "epoch": 0.84, + "grad_norm": 0.6089947177018004, + "learning_rate": 1.3013196654939341e-06, + "loss": 0.3097, + "step": 18301 + }, + { + "epoch": 0.84, + "grad_norm": 0.4538844863251005, + "learning_rate": 1.3005857942857358e-06, + "loss": 0.2789, + "step": 18302 + }, + { + "epoch": 0.84, + "grad_norm": 3.075166625570616, + "learning_rate": 1.2998521156741828e-06, + "loss": 0.1584, + "step": 18303 + }, + { + "epoch": 0.84, + "grad_norm": 0.2448033283016496, + "learning_rate": 1.2991186296755142e-06, + "loss": 0.2031, + "step": 18304 + }, + { + "epoch": 0.84, + "grad_norm": 0.4092702064317997, + "learning_rate": 1.2983853363059785e-06, + "loss": 0.3315, + "step": 18305 + }, + { + "epoch": 0.84, + "grad_norm": 0.83165876715656, + "learning_rate": 1.297652235581801e-06, + "loss": 0.4004, + "step": 18306 + }, + { + "epoch": 0.84, + "grad_norm": 0.2920359543645724, + "learning_rate": 1.2969193275192193e-06, + "loss": 0.1855, + "step": 18307 + }, + { + "epoch": 0.84, + "grad_norm": 0.3232254794346485, + "learning_rate": 1.2961866121344524e-06, + "loss": 0.2597, + "step": 18308 + }, + { + "epoch": 0.84, + "grad_norm": 1.7246716482091662, + "learning_rate": 1.2954540894437251e-06, + "loss": 0.4892, + "step": 18309 + }, + { + "epoch": 0.84, + "grad_norm": 0.2525896875108071, + "learning_rate": 1.2947217594632577e-06, + "loss": 0.1357, + "step": 18310 + }, + { + "epoch": 0.84, + "grad_norm": 0.592869668101887, + "learning_rate": 1.2939896222092574e-06, + "loss": 0.3979, + "step": 18311 + }, + { + "epoch": 0.84, + "grad_norm": 0.38455032480939805, + "learning_rate": 1.2932576776979379e-06, + "loss": 0.2811, + "step": 18312 + }, + { + "epoch": 0.84, + "grad_norm": 0.3803467492881652, + "learning_rate": 1.2925259259455004e-06, + "loss": 0.1617, + "step": 18313 + }, + { + "epoch": 0.84, + "grad_norm": 0.4728424813820251, + "learning_rate": 1.2917943669681455e-06, + "loss": 0.2913, + "step": 18314 + }, + { + "epoch": 0.84, + "grad_norm": 0.6000021143131452, + "learning_rate": 1.2910630007820734e-06, + "loss": 0.3354, + "step": 18315 + }, + { + "epoch": 0.84, + "grad_norm": 0.39788999903280187, + "learning_rate": 1.2903318274034692e-06, + "loss": 0.2078, + "step": 18316 + }, + { + "epoch": 0.84, + "grad_norm": 0.3584796959032887, + "learning_rate": 1.2896008468485254e-06, + "loss": 0.2783, + "step": 18317 + }, + { + "epoch": 0.84, + "grad_norm": 0.7011700070386414, + "learning_rate": 1.2888700591334225e-06, + "loss": 0.3873, + "step": 18318 + }, + { + "epoch": 0.84, + "grad_norm": 0.24871554069145616, + "learning_rate": 1.2881394642743438e-06, + "loss": 0.175, + "step": 18319 + }, + { + "epoch": 0.84, + "grad_norm": 0.2729341860865139, + "learning_rate": 1.287409062287458e-06, + "loss": 0.2104, + "step": 18320 + }, + { + "epoch": 0.84, + "grad_norm": 1.4375318188899784, + "learning_rate": 1.2866788531889406e-06, + "loss": 0.5366, + "step": 18321 + }, + { + "epoch": 0.84, + "grad_norm": 0.82105341222846, + "learning_rate": 1.2859488369949524e-06, + "loss": 0.4634, + "step": 18322 + }, + { + "epoch": 0.84, + "grad_norm": 0.29825471980416135, + "learning_rate": 1.2852190137216592e-06, + "loss": 0.2221, + "step": 18323 + }, + { + "epoch": 0.84, + "grad_norm": 0.5440056084717765, + "learning_rate": 1.28448938338522e-06, + "loss": 0.3199, + "step": 18324 + }, + { + "epoch": 0.84, + "grad_norm": 0.2779128093552639, + "learning_rate": 1.2837599460017824e-06, + "loss": 0.1844, + "step": 18325 + }, + { + "epoch": 0.84, + "grad_norm": 0.38617741696885105, + "learning_rate": 1.2830307015874999e-06, + "loss": 0.1997, + "step": 18326 + }, + { + "epoch": 0.84, + "grad_norm": 0.581865896020572, + "learning_rate": 1.2823016501585172e-06, + "loss": 0.2991, + "step": 18327 + }, + { + "epoch": 0.84, + "grad_norm": 0.464265025004806, + "learning_rate": 1.2815727917309727e-06, + "loss": 0.3087, + "step": 18328 + }, + { + "epoch": 0.84, + "grad_norm": 0.38639436173357034, + "learning_rate": 1.2808441263210059e-06, + "loss": 0.2169, + "step": 18329 + }, + { + "epoch": 0.84, + "grad_norm": 0.7372489939508887, + "learning_rate": 1.2801156539447413e-06, + "loss": 0.4114, + "step": 18330 + }, + { + "epoch": 0.84, + "grad_norm": 0.3046190780578103, + "learning_rate": 1.2793873746183162e-06, + "loss": 0.1886, + "step": 18331 + }, + { + "epoch": 0.84, + "grad_norm": 0.31389461261787965, + "learning_rate": 1.2786592883578497e-06, + "loss": 0.2736, + "step": 18332 + }, + { + "epoch": 0.84, + "grad_norm": 0.4342942100588863, + "learning_rate": 1.2779313951794591e-06, + "loss": 0.0837, + "step": 18333 + }, + { + "epoch": 0.84, + "grad_norm": 0.7217141381624553, + "learning_rate": 1.2772036950992606e-06, + "loss": 0.4407, + "step": 18334 + }, + { + "epoch": 0.84, + "grad_norm": 0.3358247453152375, + "learning_rate": 1.2764761881333653e-06, + "loss": 0.2589, + "step": 18335 + }, + { + "epoch": 0.84, + "grad_norm": 0.3263993836636742, + "learning_rate": 1.2757488742978818e-06, + "loss": 0.2615, + "step": 18336 + }, + { + "epoch": 0.84, + "grad_norm": 0.5077049445081057, + "learning_rate": 1.275021753608907e-06, + "loss": 0.1941, + "step": 18337 + }, + { + "epoch": 0.84, + "grad_norm": 0.2614110612438898, + "learning_rate": 1.2742948260825439e-06, + "loss": 0.1986, + "step": 18338 + }, + { + "epoch": 0.84, + "grad_norm": 0.5720392109357952, + "learning_rate": 1.2735680917348802e-06, + "loss": 0.2299, + "step": 18339 + }, + { + "epoch": 0.84, + "grad_norm": 0.5056769507258967, + "learning_rate": 1.2728415505820091e-06, + "loss": 0.32, + "step": 18340 + }, + { + "epoch": 0.84, + "grad_norm": 0.3825077598483137, + "learning_rate": 1.2721152026400174e-06, + "loss": 0.2643, + "step": 18341 + }, + { + "epoch": 0.84, + "grad_norm": 0.9133805992207336, + "learning_rate": 1.2713890479249803e-06, + "loss": 0.5113, + "step": 18342 + }, + { + "epoch": 0.84, + "grad_norm": 0.35374790470991097, + "learning_rate": 1.2706630864529768e-06, + "loss": 0.2657, + "step": 18343 + }, + { + "epoch": 0.84, + "grad_norm": 0.27519954123799933, + "learning_rate": 1.2699373182400821e-06, + "loss": 0.2188, + "step": 18344 + }, + { + "epoch": 0.84, + "grad_norm": 0.6159065762042707, + "learning_rate": 1.2692117433023577e-06, + "loss": 0.2876, + "step": 18345 + }, + { + "epoch": 0.84, + "grad_norm": 0.5287466580115844, + "learning_rate": 1.2684863616558728e-06, + "loss": 0.2646, + "step": 18346 + }, + { + "epoch": 0.84, + "grad_norm": 0.3716601014966549, + "learning_rate": 1.2677611733166807e-06, + "loss": 0.2721, + "step": 18347 + }, + { + "epoch": 0.84, + "grad_norm": 0.41349985685475216, + "learning_rate": 1.2670361783008446e-06, + "loss": 0.3039, + "step": 18348 + }, + { + "epoch": 0.84, + "grad_norm": 0.42237216986654635, + "learning_rate": 1.2663113766244094e-06, + "loss": 0.1289, + "step": 18349 + }, + { + "epoch": 0.84, + "grad_norm": 0.37579516416562303, + "learning_rate": 1.2655867683034217e-06, + "loss": 0.2729, + "step": 18350 + }, + { + "epoch": 0.84, + "grad_norm": 0.28142624528551063, + "learning_rate": 1.2648623533539262e-06, + "loss": 0.218, + "step": 18351 + }, + { + "epoch": 0.84, + "grad_norm": 0.6212086793523379, + "learning_rate": 1.2641381317919542e-06, + "loss": 0.2815, + "step": 18352 + }, + { + "epoch": 0.84, + "grad_norm": 0.3515714150728949, + "learning_rate": 1.263414103633549e-06, + "loss": 0.252, + "step": 18353 + }, + { + "epoch": 0.84, + "grad_norm": 1.1936057687721544, + "learning_rate": 1.262690268894734e-06, + "loss": 0.6659, + "step": 18354 + }, + { + "epoch": 0.84, + "grad_norm": 0.3774875132235774, + "learning_rate": 1.2619666275915366e-06, + "loss": 0.2317, + "step": 18355 + }, + { + "epoch": 0.84, + "grad_norm": 0.28050655471654656, + "learning_rate": 1.2612431797399738e-06, + "loss": 0.208, + "step": 18356 + }, + { + "epoch": 0.84, + "grad_norm": 0.5790296303316192, + "learning_rate": 1.260519925356064e-06, + "loss": 0.2843, + "step": 18357 + }, + { + "epoch": 0.84, + "grad_norm": 0.8369948805204668, + "learning_rate": 1.2597968644558234e-06, + "loss": 0.4782, + "step": 18358 + }, + { + "epoch": 0.84, + "grad_norm": 0.28237020385287975, + "learning_rate": 1.2590739970552534e-06, + "loss": 0.2009, + "step": 18359 + }, + { + "epoch": 0.84, + "grad_norm": 0.5793622160038471, + "learning_rate": 1.2583513231703604e-06, + "loss": 0.3698, + "step": 18360 + }, + { + "epoch": 0.84, + "grad_norm": 1.1874080773087419, + "learning_rate": 1.2576288428171467e-06, + "loss": 0.4375, + "step": 18361 + }, + { + "epoch": 0.84, + "grad_norm": 0.23865158244412135, + "learning_rate": 1.2569065560116012e-06, + "loss": 0.1534, + "step": 18362 + }, + { + "epoch": 0.84, + "grad_norm": 0.2872731439473153, + "learning_rate": 1.2561844627697205e-06, + "loss": 0.2461, + "step": 18363 + }, + { + "epoch": 0.84, + "grad_norm": 0.653137734886242, + "learning_rate": 1.2554625631074846e-06, + "loss": 0.3894, + "step": 18364 + }, + { + "epoch": 0.84, + "grad_norm": 0.2940214831802127, + "learning_rate": 1.2547408570408826e-06, + "loss": 0.1825, + "step": 18365 + }, + { + "epoch": 0.84, + "grad_norm": 1.2804171550079295, + "learning_rate": 1.2540193445858883e-06, + "loss": 0.7555, + "step": 18366 + }, + { + "epoch": 0.84, + "grad_norm": 0.3872638429881968, + "learning_rate": 1.253298025758477e-06, + "loss": 0.2691, + "step": 18367 + }, + { + "epoch": 0.84, + "grad_norm": 0.31454952072195236, + "learning_rate": 1.252576900574618e-06, + "loss": 0.1804, + "step": 18368 + }, + { + "epoch": 0.84, + "grad_norm": 0.6171198312176669, + "learning_rate": 1.251855969050272e-06, + "loss": 0.3252, + "step": 18369 + }, + { + "epoch": 0.84, + "grad_norm": 0.3328572402900212, + "learning_rate": 1.2511352312014068e-06, + "loss": 0.1682, + "step": 18370 + }, + { + "epoch": 0.84, + "grad_norm": 0.35593740927673934, + "learning_rate": 1.2504146870439726e-06, + "loss": 0.2678, + "step": 18371 + }, + { + "epoch": 0.84, + "grad_norm": 0.38895650973657475, + "learning_rate": 1.2496943365939273e-06, + "loss": 0.2735, + "step": 18372 + }, + { + "epoch": 0.84, + "grad_norm": 1.6649132214142823, + "learning_rate": 1.248974179867214e-06, + "loss": 0.7678, + "step": 18373 + }, + { + "epoch": 0.84, + "grad_norm": 0.34190150944456876, + "learning_rate": 1.2482542168797772e-06, + "loss": 0.2408, + "step": 18374 + }, + { + "epoch": 0.84, + "grad_norm": 0.34079964623374936, + "learning_rate": 1.2475344476475593e-06, + "loss": 0.2487, + "step": 18375 + }, + { + "epoch": 0.84, + "grad_norm": 0.35654738734408165, + "learning_rate": 1.2468148721864904e-06, + "loss": 0.1736, + "step": 18376 + }, + { + "epoch": 0.84, + "grad_norm": 0.36699083278516226, + "learning_rate": 1.2460954905125045e-06, + "loss": 0.2786, + "step": 18377 + }, + { + "epoch": 0.84, + "grad_norm": 1.127813046447214, + "learning_rate": 1.245376302641529e-06, + "loss": 0.5096, + "step": 18378 + }, + { + "epoch": 0.84, + "grad_norm": 0.43460507228742423, + "learning_rate": 1.244657308589482e-06, + "loss": 0.3015, + "step": 18379 + }, + { + "epoch": 0.84, + "grad_norm": 0.3490033485265696, + "learning_rate": 1.2439385083722866e-06, + "loss": 0.2437, + "step": 18380 + }, + { + "epoch": 0.84, + "grad_norm": 0.7161084328908159, + "learning_rate": 1.243219902005851e-06, + "loss": 0.2815, + "step": 18381 + }, + { + "epoch": 0.84, + "grad_norm": 0.2883059202100981, + "learning_rate": 1.2425014895060871e-06, + "loss": 0.1833, + "step": 18382 + }, + { + "epoch": 0.84, + "grad_norm": 0.5917663070907673, + "learning_rate": 1.2417832708888988e-06, + "loss": 0.2576, + "step": 18383 + }, + { + "epoch": 0.84, + "grad_norm": 0.39891384710206995, + "learning_rate": 1.2410652461701899e-06, + "loss": 0.3189, + "step": 18384 + }, + { + "epoch": 0.84, + "grad_norm": 0.7750257887709735, + "learning_rate": 1.2403474153658534e-06, + "loss": 0.291, + "step": 18385 + }, + { + "epoch": 0.84, + "grad_norm": 0.4226853018298562, + "learning_rate": 1.239629778491781e-06, + "loss": 0.2643, + "step": 18386 + }, + { + "epoch": 0.84, + "grad_norm": 0.30198170067587116, + "learning_rate": 1.2389123355638655e-06, + "loss": 0.275, + "step": 18387 + }, + { + "epoch": 0.84, + "grad_norm": 0.19785658811124057, + "learning_rate": 1.2381950865979841e-06, + "loss": 0.0726, + "step": 18388 + }, + { + "epoch": 0.84, + "grad_norm": 0.43604604000047653, + "learning_rate": 1.237478031610021e-06, + "loss": 0.2834, + "step": 18389 + }, + { + "epoch": 0.84, + "grad_norm": 0.5858892931742264, + "learning_rate": 1.2367611706158467e-06, + "loss": 0.4341, + "step": 18390 + }, + { + "epoch": 0.84, + "grad_norm": 0.34141748132359667, + "learning_rate": 1.236044503631333e-06, + "loss": 0.2531, + "step": 18391 + }, + { + "epoch": 0.84, + "grad_norm": 0.40263935702471865, + "learning_rate": 1.2353280306723503e-06, + "loss": 0.2555, + "step": 18392 + }, + { + "epoch": 0.84, + "grad_norm": 0.747953255506472, + "learning_rate": 1.234611751754755e-06, + "loss": 0.3262, + "step": 18393 + }, + { + "epoch": 0.85, + "grad_norm": 0.3850009499023764, + "learning_rate": 1.233895666894408e-06, + "loss": 0.193, + "step": 18394 + }, + { + "epoch": 0.85, + "grad_norm": 0.26063635051246775, + "learning_rate": 1.2331797761071618e-06, + "loss": 0.2105, + "step": 18395 + }, + { + "epoch": 0.85, + "grad_norm": 1.2837809213226825, + "learning_rate": 1.2324640794088671e-06, + "loss": 0.675, + "step": 18396 + }, + { + "epoch": 0.85, + "grad_norm": 0.6673461970980067, + "learning_rate": 1.2317485768153681e-06, + "loss": 0.3818, + "step": 18397 + }, + { + "epoch": 0.85, + "grad_norm": 0.3355873691697014, + "learning_rate": 1.231033268342503e-06, + "loss": 0.2015, + "step": 18398 + }, + { + "epoch": 0.85, + "grad_norm": 0.38211933202847975, + "learning_rate": 1.2303181540061083e-06, + "loss": 0.2927, + "step": 18399 + }, + { + "epoch": 0.85, + "grad_norm": 0.6422682583427372, + "learning_rate": 1.2296032338220188e-06, + "loss": 0.2135, + "step": 18400 + }, + { + "epoch": 0.85, + "grad_norm": 0.3774542282538439, + "learning_rate": 1.2288885078060619e-06, + "loss": 0.1593, + "step": 18401 + }, + { + "epoch": 0.85, + "grad_norm": 0.38730285842565143, + "learning_rate": 1.2281739759740575e-06, + "loss": 0.2825, + "step": 18402 + }, + { + "epoch": 0.85, + "grad_norm": 0.3656620418719611, + "learning_rate": 1.227459638341828e-06, + "loss": 0.3124, + "step": 18403 + }, + { + "epoch": 0.85, + "grad_norm": 0.36613403173972836, + "learning_rate": 1.2267454949251877e-06, + "loss": 0.0835, + "step": 18404 + }, + { + "epoch": 0.85, + "grad_norm": 0.48136195962327094, + "learning_rate": 1.2260315457399453e-06, + "loss": 0.2718, + "step": 18405 + }, + { + "epoch": 0.85, + "grad_norm": 0.3663269618951646, + "learning_rate": 1.22531779080191e-06, + "loss": 0.2322, + "step": 18406 + }, + { + "epoch": 0.85, + "grad_norm": 0.36647094127241453, + "learning_rate": 1.224604230126879e-06, + "loss": 0.2558, + "step": 18407 + }, + { + "epoch": 0.85, + "grad_norm": 0.5431615183956858, + "learning_rate": 1.2238908637306534e-06, + "loss": 0.2537, + "step": 18408 + }, + { + "epoch": 0.85, + "grad_norm": 0.8529799629959973, + "learning_rate": 1.2231776916290273e-06, + "loss": 0.4398, + "step": 18409 + }, + { + "epoch": 0.85, + "grad_norm": 0.3409475009270906, + "learning_rate": 1.2224647138377854e-06, + "loss": 0.1921, + "step": 18410 + }, + { + "epoch": 0.85, + "grad_norm": 0.25114416065541095, + "learning_rate": 1.2217519303727165e-06, + "loss": 0.2237, + "step": 18411 + }, + { + "epoch": 0.85, + "grad_norm": 1.4371853581217737, + "learning_rate": 1.2210393412495958e-06, + "loss": 0.439, + "step": 18412 + }, + { + "epoch": 0.85, + "grad_norm": 0.6216958555918276, + "learning_rate": 1.2203269464842071e-06, + "loss": 0.3551, + "step": 18413 + }, + { + "epoch": 0.85, + "grad_norm": 0.3996980767578418, + "learning_rate": 1.219614746092318e-06, + "loss": 0.2302, + "step": 18414 + }, + { + "epoch": 0.85, + "grad_norm": 0.3464015156164392, + "learning_rate": 1.2189027400896935e-06, + "loss": 0.2825, + "step": 18415 + }, + { + "epoch": 0.85, + "grad_norm": 0.26568681649238823, + "learning_rate": 1.218190928492099e-06, + "loss": 0.2043, + "step": 18416 + }, + { + "epoch": 0.85, + "grad_norm": 0.6667343485944556, + "learning_rate": 1.2174793113152928e-06, + "loss": 0.2094, + "step": 18417 + }, + { + "epoch": 0.85, + "grad_norm": 0.38563511906128456, + "learning_rate": 1.2167678885750322e-06, + "loss": 0.2642, + "step": 18418 + }, + { + "epoch": 0.85, + "grad_norm": 0.8779205264619279, + "learning_rate": 1.216056660287064e-06, + "loss": 0.4627, + "step": 18419 + }, + { + "epoch": 0.85, + "grad_norm": 0.3666020684536555, + "learning_rate": 1.2153456264671337e-06, + "loss": 0.2801, + "step": 18420 + }, + { + "epoch": 0.85, + "grad_norm": 0.3011981537559982, + "learning_rate": 1.2146347871309882e-06, + "loss": 0.1666, + "step": 18421 + }, + { + "epoch": 0.85, + "grad_norm": 0.4236458103987891, + "learning_rate": 1.2139241422943582e-06, + "loss": 0.2915, + "step": 18422 + }, + { + "epoch": 0.85, + "grad_norm": 0.3151596465324451, + "learning_rate": 1.213213691972981e-06, + "loss": 0.2702, + "step": 18423 + }, + { + "epoch": 0.85, + "grad_norm": 1.846776675659577, + "learning_rate": 1.2125034361825805e-06, + "loss": 0.2163, + "step": 18424 + }, + { + "epoch": 0.85, + "grad_norm": 0.6079796256047143, + "learning_rate": 1.2117933749388889e-06, + "loss": 0.3397, + "step": 18425 + }, + { + "epoch": 0.85, + "grad_norm": 0.3888254720613999, + "learning_rate": 1.211083508257621e-06, + "loss": 0.268, + "step": 18426 + }, + { + "epoch": 0.85, + "grad_norm": 0.3544895414106576, + "learning_rate": 1.2103738361544914e-06, + "loss": 0.2573, + "step": 18427 + }, + { + "epoch": 0.85, + "grad_norm": 0.24196975633846807, + "learning_rate": 1.209664358645216e-06, + "loss": 0.1442, + "step": 18428 + }, + { + "epoch": 0.85, + "grad_norm": 0.41865785788101456, + "learning_rate": 1.2089550757454948e-06, + "loss": 0.2709, + "step": 18429 + }, + { + "epoch": 0.85, + "grad_norm": 0.43089496441061653, + "learning_rate": 1.2082459874710405e-06, + "loss": 0.2354, + "step": 18430 + }, + { + "epoch": 0.85, + "grad_norm": 0.3945539937173343, + "learning_rate": 1.207537093837543e-06, + "loss": 0.2673, + "step": 18431 + }, + { + "epoch": 0.85, + "grad_norm": 0.4417466505795868, + "learning_rate": 1.206828394860703e-06, + "loss": 0.3009, + "step": 18432 + }, + { + "epoch": 0.85, + "grad_norm": 0.9144225172149618, + "learning_rate": 1.2061198905562043e-06, + "loss": 0.5229, + "step": 18433 + }, + { + "epoch": 0.85, + "grad_norm": 0.27560003886959195, + "learning_rate": 1.2054115809397371e-06, + "loss": 0.1815, + "step": 18434 + }, + { + "epoch": 0.85, + "grad_norm": 0.2983885965929589, + "learning_rate": 1.2047034660269818e-06, + "loss": 0.2222, + "step": 18435 + }, + { + "epoch": 0.85, + "grad_norm": 1.0792518302069471, + "learning_rate": 1.203995545833614e-06, + "loss": 0.4479, + "step": 18436 + }, + { + "epoch": 0.85, + "grad_norm": 0.5819984334549985, + "learning_rate": 1.2032878203753062e-06, + "loss": 0.2647, + "step": 18437 + }, + { + "epoch": 0.85, + "grad_norm": 0.34653831927836926, + "learning_rate": 1.2025802896677297e-06, + "loss": 0.2613, + "step": 18438 + }, + { + "epoch": 0.85, + "grad_norm": 0.408647250261592, + "learning_rate": 1.201872953726544e-06, + "loss": 0.3144, + "step": 18439 + }, + { + "epoch": 0.85, + "grad_norm": 0.2955337592511455, + "learning_rate": 1.2011658125674141e-06, + "loss": 0.078, + "step": 18440 + }, + { + "epoch": 0.85, + "grad_norm": 0.4266059742784034, + "learning_rate": 1.2004588662059913e-06, + "loss": 0.2686, + "step": 18441 + }, + { + "epoch": 0.85, + "grad_norm": 0.35470911800902205, + "learning_rate": 1.1997521146579272e-06, + "loss": 0.2912, + "step": 18442 + }, + { + "epoch": 0.85, + "grad_norm": 0.6660419688271784, + "learning_rate": 1.1990455579388715e-06, + "loss": 0.2425, + "step": 18443 + }, + { + "epoch": 0.85, + "grad_norm": 0.36518878123619075, + "learning_rate": 1.198339196064463e-06, + "loss": 0.2698, + "step": 18444 + }, + { + "epoch": 0.85, + "grad_norm": 1.3555596696696948, + "learning_rate": 1.1976330290503434e-06, + "loss": 0.7061, + "step": 18445 + }, + { + "epoch": 0.85, + "grad_norm": 0.33828814233024784, + "learning_rate": 1.1969270569121406e-06, + "loss": 0.2644, + "step": 18446 + }, + { + "epoch": 0.85, + "grad_norm": 0.313220307387397, + "learning_rate": 1.1962212796654927e-06, + "loss": 0.1659, + "step": 18447 + }, + { + "epoch": 0.85, + "grad_norm": 0.8882947173598601, + "learning_rate": 1.1955156973260184e-06, + "loss": 0.3488, + "step": 18448 + }, + { + "epoch": 0.85, + "grad_norm": 0.7134463739868675, + "learning_rate": 1.1948103099093422e-06, + "loss": 0.3772, + "step": 18449 + }, + { + "epoch": 0.85, + "grad_norm": 0.3530218438364439, + "learning_rate": 1.1941051174310768e-06, + "loss": 0.2048, + "step": 18450 + }, + { + "epoch": 0.85, + "grad_norm": 0.4085928759120029, + "learning_rate": 1.193400119906838e-06, + "loss": 0.3069, + "step": 18451 + }, + { + "epoch": 0.85, + "grad_norm": 0.23021038491267404, + "learning_rate": 1.1926953173522337e-06, + "loss": 0.1281, + "step": 18452 + }, + { + "epoch": 0.85, + "grad_norm": 0.34971224361402575, + "learning_rate": 1.1919907097828654e-06, + "loss": 0.1986, + "step": 18453 + }, + { + "epoch": 0.85, + "grad_norm": 0.37305292626635445, + "learning_rate": 1.1912862972143325e-06, + "loss": 0.2933, + "step": 18454 + }, + { + "epoch": 0.85, + "grad_norm": 1.1901580456930732, + "learning_rate": 1.1905820796622336e-06, + "loss": 0.4686, + "step": 18455 + }, + { + "epoch": 0.85, + "grad_norm": 0.2882285808753656, + "learning_rate": 1.1898780571421554e-06, + "loss": 0.1883, + "step": 18456 + }, + { + "epoch": 0.85, + "grad_norm": 1.4396089483610623, + "learning_rate": 1.1891742296696873e-06, + "loss": 0.6281, + "step": 18457 + }, + { + "epoch": 0.85, + "grad_norm": 0.42424674443129645, + "learning_rate": 1.188470597260407e-06, + "loss": 0.2773, + "step": 18458 + }, + { + "epoch": 0.85, + "grad_norm": 0.2570280054579128, + "learning_rate": 1.1877671599298957e-06, + "loss": 0.202, + "step": 18459 + }, + { + "epoch": 0.85, + "grad_norm": 0.46903292085253917, + "learning_rate": 1.187063917693726e-06, + "loss": 0.2065, + "step": 18460 + }, + { + "epoch": 0.85, + "grad_norm": 1.3451293997278146, + "learning_rate": 1.186360870567469e-06, + "loss": 0.7321, + "step": 18461 + }, + { + "epoch": 0.85, + "grad_norm": 0.2989824796331753, + "learning_rate": 1.1856580185666878e-06, + "loss": 0.2538, + "step": 18462 + }, + { + "epoch": 0.85, + "grad_norm": 0.46328141452638744, + "learning_rate": 1.1849553617069386e-06, + "loss": 0.2562, + "step": 18463 + }, + { + "epoch": 0.85, + "grad_norm": 1.0501533551359017, + "learning_rate": 1.184252900003786e-06, + "loss": 0.3843, + "step": 18464 + }, + { + "epoch": 0.85, + "grad_norm": 0.3474449293072234, + "learning_rate": 1.1835506334727754e-06, + "loss": 0.2698, + "step": 18465 + }, + { + "epoch": 0.85, + "grad_norm": 0.3659375824110046, + "learning_rate": 1.1828485621294583e-06, + "loss": 0.2465, + "step": 18466 + }, + { + "epoch": 0.85, + "grad_norm": 0.39388991680878294, + "learning_rate": 1.1821466859893738e-06, + "loss": 0.1863, + "step": 18467 + }, + { + "epoch": 0.85, + "grad_norm": 0.3827593734476382, + "learning_rate": 1.1814450050680626e-06, + "loss": 0.2705, + "step": 18468 + }, + { + "epoch": 0.85, + "grad_norm": 1.267487868262863, + "learning_rate": 1.1807435193810623e-06, + "loss": 0.3009, + "step": 18469 + }, + { + "epoch": 0.85, + "grad_norm": 0.35637710741488415, + "learning_rate": 1.1800422289438984e-06, + "loss": 0.2851, + "step": 18470 + }, + { + "epoch": 0.85, + "grad_norm": 0.3520532584793097, + "learning_rate": 1.1793411337720994e-06, + "loss": 0.2447, + "step": 18471 + }, + { + "epoch": 0.85, + "grad_norm": 0.49041255804908956, + "learning_rate": 1.1786402338811853e-06, + "loss": 0.2563, + "step": 18472 + }, + { + "epoch": 0.85, + "grad_norm": 0.2924966326105871, + "learning_rate": 1.1779395292866746e-06, + "loss": 0.1689, + "step": 18473 + }, + { + "epoch": 0.85, + "grad_norm": 0.35869819876629216, + "learning_rate": 1.1772390200040817e-06, + "loss": 0.2769, + "step": 18474 + }, + { + "epoch": 0.85, + "grad_norm": 0.5236479125020699, + "learning_rate": 1.176538706048911e-06, + "loss": 0.3476, + "step": 18475 + }, + { + "epoch": 0.85, + "grad_norm": 0.7615581066627264, + "learning_rate": 1.1758385874366696e-06, + "loss": 0.2677, + "step": 18476 + }, + { + "epoch": 0.85, + "grad_norm": 0.34906258249323385, + "learning_rate": 1.1751386641828567e-06, + "loss": 0.259, + "step": 18477 + }, + { + "epoch": 0.85, + "grad_norm": 0.4076652830941242, + "learning_rate": 1.1744389363029707e-06, + "loss": 0.2771, + "step": 18478 + }, + { + "epoch": 0.85, + "grad_norm": 0.1865067892014689, + "learning_rate": 1.1737394038124994e-06, + "loss": 0.099, + "step": 18479 + }, + { + "epoch": 0.85, + "grad_norm": 0.3854405676527759, + "learning_rate": 1.1730400667269282e-06, + "loss": 0.2569, + "step": 18480 + }, + { + "epoch": 0.85, + "grad_norm": 1.659534759891341, + "learning_rate": 1.1723409250617456e-06, + "loss": 0.5704, + "step": 18481 + }, + { + "epoch": 0.85, + "grad_norm": 0.3917114573579003, + "learning_rate": 1.1716419788324252e-06, + "loss": 0.251, + "step": 18482 + }, + { + "epoch": 0.85, + "grad_norm": 0.3418924829861694, + "learning_rate": 1.170943228054444e-06, + "loss": 0.2494, + "step": 18483 + }, + { + "epoch": 0.85, + "grad_norm": 1.69043375622048, + "learning_rate": 1.1702446727432681e-06, + "loss": 0.5016, + "step": 18484 + }, + { + "epoch": 0.85, + "grad_norm": 0.30023143315625417, + "learning_rate": 1.1695463129143647e-06, + "loss": 0.2242, + "step": 18485 + }, + { + "epoch": 0.85, + "grad_norm": 0.2694071417644677, + "learning_rate": 1.1688481485831982e-06, + "loss": 0.2039, + "step": 18486 + }, + { + "epoch": 0.85, + "grad_norm": 0.48177585407850515, + "learning_rate": 1.1681501797652194e-06, + "loss": 0.3259, + "step": 18487 + }, + { + "epoch": 0.85, + "grad_norm": 0.7888364205142923, + "learning_rate": 1.1674524064758851e-06, + "loss": 0.3538, + "step": 18488 + }, + { + "epoch": 0.85, + "grad_norm": 0.5074515066881172, + "learning_rate": 1.1667548287306373e-06, + "loss": 0.1677, + "step": 18489 + }, + { + "epoch": 0.85, + "grad_norm": 0.3374891337953125, + "learning_rate": 1.1660574465449293e-06, + "loss": 0.2741, + "step": 18490 + }, + { + "epoch": 0.85, + "grad_norm": 0.4321489689657549, + "learning_rate": 1.165360259934194e-06, + "loss": 0.3, + "step": 18491 + }, + { + "epoch": 0.85, + "grad_norm": 0.3102724751799246, + "learning_rate": 1.164663268913866e-06, + "loss": 0.18, + "step": 18492 + }, + { + "epoch": 0.85, + "grad_norm": 0.4619999679870109, + "learning_rate": 1.1639664734993783e-06, + "loss": 0.2922, + "step": 18493 + }, + { + "epoch": 0.85, + "grad_norm": 0.3656184367885025, + "learning_rate": 1.1632698737061553e-06, + "loss": 0.2843, + "step": 18494 + }, + { + "epoch": 0.85, + "grad_norm": 0.303024305594965, + "learning_rate": 1.162573469549624e-06, + "loss": 0.1751, + "step": 18495 + }, + { + "epoch": 0.85, + "grad_norm": 1.486096433897814, + "learning_rate": 1.1618772610451956e-06, + "loss": 0.4545, + "step": 18496 + }, + { + "epoch": 0.85, + "grad_norm": 1.2467100525061277, + "learning_rate": 1.1611812482082862e-06, + "loss": 0.7977, + "step": 18497 + }, + { + "epoch": 0.85, + "grad_norm": 0.27766831295425964, + "learning_rate": 1.1604854310543068e-06, + "loss": 0.242, + "step": 18498 + }, + { + "epoch": 0.85, + "grad_norm": 0.34487378670716096, + "learning_rate": 1.1597898095986582e-06, + "loss": 0.1768, + "step": 18499 + }, + { + "epoch": 0.85, + "grad_norm": 0.5153320010009277, + "learning_rate": 1.159094383856746e-06, + "loss": 0.291, + "step": 18500 + }, + { + "epoch": 0.85, + "grad_norm": 0.33930731204218867, + "learning_rate": 1.15839915384396e-06, + "loss": 0.2343, + "step": 18501 + }, + { + "epoch": 0.85, + "grad_norm": 0.37880821878352244, + "learning_rate": 1.1577041195756954e-06, + "loss": 0.2204, + "step": 18502 + }, + { + "epoch": 0.85, + "grad_norm": 1.1606668863354124, + "learning_rate": 1.1570092810673417e-06, + "loss": 0.5956, + "step": 18503 + }, + { + "epoch": 0.85, + "grad_norm": 0.4004540540598084, + "learning_rate": 1.156314638334277e-06, + "loss": 0.2616, + "step": 18504 + }, + { + "epoch": 0.85, + "grad_norm": 0.45435963373724036, + "learning_rate": 1.1556201913918852e-06, + "loss": 0.2604, + "step": 18505 + }, + { + "epoch": 0.85, + "grad_norm": 0.24849755773060228, + "learning_rate": 1.1549259402555336e-06, + "loss": 0.1974, + "step": 18506 + }, + { + "epoch": 0.85, + "grad_norm": 0.6358200294507124, + "learning_rate": 1.1542318849406008e-06, + "loss": 0.294, + "step": 18507 + }, + { + "epoch": 0.85, + "grad_norm": 0.4442642138296424, + "learning_rate": 1.1535380254624485e-06, + "loss": 0.3227, + "step": 18508 + }, + { + "epoch": 0.85, + "grad_norm": 0.3333682951005025, + "learning_rate": 1.152844361836435e-06, + "loss": 0.258, + "step": 18509 + }, + { + "epoch": 0.85, + "grad_norm": 0.43541507800716234, + "learning_rate": 1.1521508940779214e-06, + "loss": 0.2654, + "step": 18510 + }, + { + "epoch": 0.85, + "grad_norm": 0.45583122652456276, + "learning_rate": 1.1514576222022589e-06, + "loss": 0.3023, + "step": 18511 + }, + { + "epoch": 0.85, + "grad_norm": 0.25991239478952716, + "learning_rate": 1.1507645462247985e-06, + "loss": 0.0945, + "step": 18512 + }, + { + "epoch": 0.85, + "grad_norm": 0.4177212449984254, + "learning_rate": 1.15007166616088e-06, + "loss": 0.2316, + "step": 18513 + }, + { + "epoch": 0.85, + "grad_norm": 0.3317148581622481, + "learning_rate": 1.149378982025845e-06, + "loss": 0.291, + "step": 18514 + }, + { + "epoch": 0.85, + "grad_norm": 1.1109337001691912, + "learning_rate": 1.1486864938350317e-06, + "loss": 0.4646, + "step": 18515 + }, + { + "epoch": 0.85, + "grad_norm": 0.41470232740767804, + "learning_rate": 1.147994201603766e-06, + "loss": 0.305, + "step": 18516 + }, + { + "epoch": 0.85, + "grad_norm": 0.5199809022853263, + "learning_rate": 1.1473021053473787e-06, + "loss": 0.3363, + "step": 18517 + }, + { + "epoch": 0.85, + "grad_norm": 0.2259227779112923, + "learning_rate": 1.146610205081189e-06, + "loss": 0.1725, + "step": 18518 + }, + { + "epoch": 0.85, + "grad_norm": 0.519393208762347, + "learning_rate": 1.1459185008205154e-06, + "loss": 0.2577, + "step": 18519 + }, + { + "epoch": 0.85, + "grad_norm": 0.6156440578777506, + "learning_rate": 1.1452269925806757e-06, + "loss": 0.3412, + "step": 18520 + }, + { + "epoch": 0.85, + "grad_norm": 0.4908314101488441, + "learning_rate": 1.1445356803769736e-06, + "loss": 0.3236, + "step": 18521 + }, + { + "epoch": 0.85, + "grad_norm": 0.3056714739910209, + "learning_rate": 1.1438445642247177e-06, + "loss": 0.1986, + "step": 18522 + }, + { + "epoch": 0.85, + "grad_norm": 0.5222418531004269, + "learning_rate": 1.1431536441392045e-06, + "loss": 0.3577, + "step": 18523 + }, + { + "epoch": 0.85, + "grad_norm": 0.3275356557577568, + "learning_rate": 1.1424629201357373e-06, + "loss": 0.1731, + "step": 18524 + }, + { + "epoch": 0.85, + "grad_norm": 0.31921402795743786, + "learning_rate": 1.1417723922296008e-06, + "loss": 0.0656, + "step": 18525 + }, + { + "epoch": 0.85, + "grad_norm": 0.2915461046143253, + "learning_rate": 1.1410820604360895e-06, + "loss": 0.2662, + "step": 18526 + }, + { + "epoch": 0.85, + "grad_norm": 0.6702393546228886, + "learning_rate": 1.1403919247704799e-06, + "loss": 0.363, + "step": 18527 + }, + { + "epoch": 0.85, + "grad_norm": 0.7699350196582578, + "learning_rate": 1.139701985248055e-06, + "loss": 0.201, + "step": 18528 + }, + { + "epoch": 0.85, + "grad_norm": 0.3486030571751547, + "learning_rate": 1.1390122418840899e-06, + "loss": 0.3031, + "step": 18529 + }, + { + "epoch": 0.85, + "grad_norm": 0.5443888738535169, + "learning_rate": 1.1383226946938508e-06, + "loss": 0.3448, + "step": 18530 + }, + { + "epoch": 0.85, + "grad_norm": 0.23996665621455848, + "learning_rate": 1.137633343692609e-06, + "loss": 0.1162, + "step": 18531 + }, + { + "epoch": 0.85, + "grad_norm": 0.6117798410059235, + "learning_rate": 1.1369441888956212e-06, + "loss": 0.3571, + "step": 18532 + }, + { + "epoch": 0.85, + "grad_norm": 0.5651548999230909, + "learning_rate": 1.1362552303181473e-06, + "loss": 0.3046, + "step": 18533 + }, + { + "epoch": 0.85, + "grad_norm": 0.4433352793103084, + "learning_rate": 1.1355664679754408e-06, + "loss": 0.2676, + "step": 18534 + }, + { + "epoch": 0.85, + "grad_norm": 0.37247945709177277, + "learning_rate": 1.1348779018827472e-06, + "loss": 0.2424, + "step": 18535 + }, + { + "epoch": 0.85, + "grad_norm": 0.5865115031748358, + "learning_rate": 1.1341895320553132e-06, + "loss": 0.2615, + "step": 18536 + }, + { + "epoch": 0.85, + "grad_norm": 0.31227244792105735, + "learning_rate": 1.1335013585083797e-06, + "loss": 0.2259, + "step": 18537 + }, + { + "epoch": 0.85, + "grad_norm": 0.3225702320974326, + "learning_rate": 1.1328133812571784e-06, + "loss": 0.2181, + "step": 18538 + }, + { + "epoch": 0.85, + "grad_norm": 0.6715692697466935, + "learning_rate": 1.1321256003169445e-06, + "loss": 0.392, + "step": 18539 + }, + { + "epoch": 0.85, + "grad_norm": 0.575511639653504, + "learning_rate": 1.131438015702898e-06, + "loss": 0.316, + "step": 18540 + }, + { + "epoch": 0.85, + "grad_norm": 0.4264359841903588, + "learning_rate": 1.1307506274302715e-06, + "loss": 0.2303, + "step": 18541 + }, + { + "epoch": 0.85, + "grad_norm": 0.3889750839928973, + "learning_rate": 1.1300634355142748e-06, + "loss": 0.2894, + "step": 18542 + }, + { + "epoch": 0.85, + "grad_norm": 0.2564367414375854, + "learning_rate": 1.1293764399701269e-06, + "loss": 0.1589, + "step": 18543 + }, + { + "epoch": 0.85, + "grad_norm": 0.3522245713995217, + "learning_rate": 1.1286896408130333e-06, + "loss": 0.2197, + "step": 18544 + }, + { + "epoch": 0.85, + "grad_norm": 0.3890666027816536, + "learning_rate": 1.1280030380582007e-06, + "loss": 0.2945, + "step": 18545 + }, + { + "epoch": 0.85, + "grad_norm": 0.9348965771651383, + "learning_rate": 1.1273166317208317e-06, + "loss": 0.416, + "step": 18546 + }, + { + "epoch": 0.85, + "grad_norm": 0.36063396892950506, + "learning_rate": 1.1266304218161195e-06, + "loss": 0.2831, + "step": 18547 + }, + { + "epoch": 0.85, + "grad_norm": 0.9122101632942758, + "learning_rate": 1.1259444083592585e-06, + "loss": 0.2347, + "step": 18548 + }, + { + "epoch": 0.85, + "grad_norm": 0.21611097335453472, + "learning_rate": 1.1252585913654347e-06, + "loss": 0.1902, + "step": 18549 + }, + { + "epoch": 0.85, + "grad_norm": 0.4079413680629914, + "learning_rate": 1.124572970849831e-06, + "loss": 0.2586, + "step": 18550 + }, + { + "epoch": 0.85, + "grad_norm": 0.7992349647426591, + "learning_rate": 1.12388754682763e-06, + "loss": 0.2708, + "step": 18551 + }, + { + "epoch": 0.85, + "grad_norm": 0.6392882487613913, + "learning_rate": 1.1232023193140018e-06, + "loss": 0.364, + "step": 18552 + }, + { + "epoch": 0.85, + "grad_norm": 0.3373874855017208, + "learning_rate": 1.1225172883241187e-06, + "loss": 0.2665, + "step": 18553 + }, + { + "epoch": 0.85, + "grad_norm": 0.5934688049772899, + "learning_rate": 1.1218324538731462e-06, + "loss": 0.2202, + "step": 18554 + }, + { + "epoch": 0.85, + "grad_norm": 0.4488962022313426, + "learning_rate": 1.121147815976248e-06, + "loss": 0.2197, + "step": 18555 + }, + { + "epoch": 0.85, + "grad_norm": 0.4192671480643173, + "learning_rate": 1.1204633746485806e-06, + "loss": 0.3262, + "step": 18556 + }, + { + "epoch": 0.85, + "grad_norm": 0.2931286517846229, + "learning_rate": 1.1197791299052907e-06, + "loss": 0.2039, + "step": 18557 + }, + { + "epoch": 0.85, + "grad_norm": 0.7557320222532726, + "learning_rate": 1.1190950817615375e-06, + "loss": 0.3379, + "step": 18558 + }, + { + "epoch": 0.85, + "grad_norm": 0.41521819051202913, + "learning_rate": 1.1184112302324568e-06, + "loss": 0.2919, + "step": 18559 + }, + { + "epoch": 0.85, + "grad_norm": 1.415985480299404, + "learning_rate": 1.1177275753331928e-06, + "loss": 0.5429, + "step": 18560 + }, + { + "epoch": 0.85, + "grad_norm": 0.35862678103296897, + "learning_rate": 1.1170441170788782e-06, + "loss": 0.2417, + "step": 18561 + }, + { + "epoch": 0.85, + "grad_norm": 0.36005089519746364, + "learning_rate": 1.116360855484645e-06, + "loss": 0.2496, + "step": 18562 + }, + { + "epoch": 0.85, + "grad_norm": 0.3429337690560491, + "learning_rate": 1.1156777905656224e-06, + "loss": 0.2259, + "step": 18563 + }, + { + "epoch": 0.85, + "grad_norm": 0.5491816733173454, + "learning_rate": 1.1149949223369282e-06, + "loss": 0.2545, + "step": 18564 + }, + { + "epoch": 0.85, + "grad_norm": 0.29465767492852074, + "learning_rate": 1.1143122508136861e-06, + "loss": 0.2559, + "step": 18565 + }, + { + "epoch": 0.85, + "grad_norm": 0.5897542357407781, + "learning_rate": 1.1136297760110038e-06, + "loss": 0.3093, + "step": 18566 + }, + { + "epoch": 0.85, + "grad_norm": 0.6711588156960568, + "learning_rate": 1.1129474979439937e-06, + "loss": 0.2528, + "step": 18567 + }, + { + "epoch": 0.85, + "grad_norm": 0.3843502712966087, + "learning_rate": 1.1122654166277624e-06, + "loss": 0.2674, + "step": 18568 + }, + { + "epoch": 0.85, + "grad_norm": 0.2714603605856902, + "learning_rate": 1.111583532077407e-06, + "loss": 0.1984, + "step": 18569 + }, + { + "epoch": 0.85, + "grad_norm": 0.4865320618229059, + "learning_rate": 1.1109018443080256e-06, + "loss": 0.2508, + "step": 18570 + }, + { + "epoch": 0.85, + "grad_norm": 0.39454255962482615, + "learning_rate": 1.1102203533347089e-06, + "loss": 0.2685, + "step": 18571 + }, + { + "epoch": 0.85, + "grad_norm": 0.9876325015848909, + "learning_rate": 1.1095390591725485e-06, + "loss": 0.4191, + "step": 18572 + }, + { + "epoch": 0.85, + "grad_norm": 0.3488915733101975, + "learning_rate": 1.1088579618366235e-06, + "loss": 0.2918, + "step": 18573 + }, + { + "epoch": 0.85, + "grad_norm": 0.3382613753915631, + "learning_rate": 1.1081770613420107e-06, + "loss": 0.2211, + "step": 18574 + }, + { + "epoch": 0.85, + "grad_norm": 0.40911927874198656, + "learning_rate": 1.1074963577037912e-06, + "loss": 0.1648, + "step": 18575 + }, + { + "epoch": 0.85, + "grad_norm": 0.7227061940851612, + "learning_rate": 1.1068158509370309e-06, + "loss": 0.4664, + "step": 18576 + }, + { + "epoch": 0.85, + "grad_norm": 0.28891944316542145, + "learning_rate": 1.1061355410567965e-06, + "loss": 0.2013, + "step": 18577 + }, + { + "epoch": 0.85, + "grad_norm": 0.4777330129494525, + "learning_rate": 1.1054554280781483e-06, + "loss": 0.3262, + "step": 18578 + }, + { + "epoch": 0.85, + "grad_norm": 1.4894182154734759, + "learning_rate": 1.1047755120161441e-06, + "loss": 0.3644, + "step": 18579 + }, + { + "epoch": 0.85, + "grad_norm": 0.3323684411387164, + "learning_rate": 1.1040957928858386e-06, + "loss": 0.1797, + "step": 18580 + }, + { + "epoch": 0.85, + "grad_norm": 0.24338116117250558, + "learning_rate": 1.1034162707022765e-06, + "loss": 0.2107, + "step": 18581 + }, + { + "epoch": 0.85, + "grad_norm": 1.2970215337498758, + "learning_rate": 1.1027369454805058e-06, + "loss": 0.6948, + "step": 18582 + }, + { + "epoch": 0.85, + "grad_norm": 0.33714501958101595, + "learning_rate": 1.1020578172355611e-06, + "loss": 0.1935, + "step": 18583 + }, + { + "epoch": 0.85, + "grad_norm": 0.6550779644098745, + "learning_rate": 1.1013788859824804e-06, + "loss": 0.3527, + "step": 18584 + }, + { + "epoch": 0.85, + "grad_norm": 0.37598662860874915, + "learning_rate": 1.1007001517362969e-06, + "loss": 0.3118, + "step": 18585 + }, + { + "epoch": 0.85, + "grad_norm": 0.34706122862601296, + "learning_rate": 1.1000216145120324e-06, + "loss": 0.2601, + "step": 18586 + }, + { + "epoch": 0.85, + "grad_norm": 0.43078861772849436, + "learning_rate": 1.0993432743247123e-06, + "loss": 0.0844, + "step": 18587 + }, + { + "epoch": 0.85, + "grad_norm": 0.48965528302910083, + "learning_rate": 1.0986651311893525e-06, + "loss": 0.3587, + "step": 18588 + }, + { + "epoch": 0.85, + "grad_norm": 0.27665785871164866, + "learning_rate": 1.09798718512097e-06, + "loss": 0.2205, + "step": 18589 + }, + { + "epoch": 0.85, + "grad_norm": 0.5931830683625802, + "learning_rate": 1.0973094361345694e-06, + "loss": 0.2051, + "step": 18590 + }, + { + "epoch": 0.85, + "grad_norm": 0.9625867285124854, + "learning_rate": 1.0966318842451596e-06, + "loss": 0.3589, + "step": 18591 + }, + { + "epoch": 0.85, + "grad_norm": 0.42482197004674255, + "learning_rate": 1.0959545294677366e-06, + "loss": 0.2463, + "step": 18592 + }, + { + "epoch": 0.85, + "grad_norm": 0.29328785612137515, + "learning_rate": 1.0952773718172982e-06, + "loss": 0.2328, + "step": 18593 + }, + { + "epoch": 0.85, + "grad_norm": 0.8097767129126426, + "learning_rate": 1.0946004113088381e-06, + "loss": 0.515, + "step": 18594 + }, + { + "epoch": 0.85, + "grad_norm": 0.38620183909029965, + "learning_rate": 1.09392364795734e-06, + "loss": 0.246, + "step": 18595 + }, + { + "epoch": 0.85, + "grad_norm": 0.36582631139372745, + "learning_rate": 1.0932470817777884e-06, + "loss": 0.184, + "step": 18596 + }, + { + "epoch": 0.85, + "grad_norm": 0.3932779388703519, + "learning_rate": 1.0925707127851648e-06, + "loss": 0.2911, + "step": 18597 + }, + { + "epoch": 0.85, + "grad_norm": 0.34993401479634867, + "learning_rate": 1.0918945409944382e-06, + "loss": 0.2561, + "step": 18598 + }, + { + "epoch": 0.85, + "grad_norm": 1.3545386966927844, + "learning_rate": 1.0912185664205822e-06, + "loss": 0.4351, + "step": 18599 + }, + { + "epoch": 0.85, + "grad_norm": 0.4910395521720643, + "learning_rate": 1.0905427890785569e-06, + "loss": 0.2809, + "step": 18600 + }, + { + "epoch": 0.85, + "grad_norm": 0.2736428517049694, + "learning_rate": 1.0898672089833307e-06, + "loss": 0.2591, + "step": 18601 + }, + { + "epoch": 0.85, + "grad_norm": 0.45646161190044654, + "learning_rate": 1.089191826149858e-06, + "loss": 0.2671, + "step": 18602 + }, + { + "epoch": 0.85, + "grad_norm": 0.40313714418714025, + "learning_rate": 1.088516640593087e-06, + "loss": 0.0856, + "step": 18603 + }, + { + "epoch": 0.85, + "grad_norm": 0.37175668213173024, + "learning_rate": 1.087841652327969e-06, + "loss": 0.2435, + "step": 18604 + }, + { + "epoch": 0.85, + "grad_norm": 0.41340277511544443, + "learning_rate": 1.0871668613694465e-06, + "loss": 0.2999, + "step": 18605 + }, + { + "epoch": 0.85, + "grad_norm": 0.7825194010053912, + "learning_rate": 1.086492267732462e-06, + "loss": 0.3346, + "step": 18606 + }, + { + "epoch": 0.85, + "grad_norm": 0.3235395377144184, + "learning_rate": 1.0858178714319457e-06, + "loss": 0.2616, + "step": 18607 + }, + { + "epoch": 0.85, + "grad_norm": 0.9070543684926277, + "learning_rate": 1.0851436724828323e-06, + "loss": 0.5117, + "step": 18608 + }, + { + "epoch": 0.85, + "grad_norm": 0.22887653680775552, + "learning_rate": 1.0844696709000435e-06, + "loss": 0.1607, + "step": 18609 + }, + { + "epoch": 0.85, + "grad_norm": 0.4379293003026843, + "learning_rate": 1.0837958666985038e-06, + "loss": 0.2256, + "step": 18610 + }, + { + "epoch": 0.85, + "grad_norm": 0.6428478315502666, + "learning_rate": 1.0831222598931312e-06, + "loss": 0.3397, + "step": 18611 + }, + { + "epoch": 0.86, + "grad_norm": 0.3450514052541221, + "learning_rate": 1.082448850498836e-06, + "loss": 0.2865, + "step": 18612 + }, + { + "epoch": 0.86, + "grad_norm": 0.40001107987512025, + "learning_rate": 1.0817756385305278e-06, + "loss": 0.1969, + "step": 18613 + }, + { + "epoch": 0.86, + "grad_norm": 0.563998861088998, + "learning_rate": 1.0811026240031142e-06, + "loss": 0.3492, + "step": 18614 + }, + { + "epoch": 0.86, + "grad_norm": 0.38194266795406717, + "learning_rate": 1.08042980693149e-06, + "loss": 0.1906, + "step": 18615 + }, + { + "epoch": 0.86, + "grad_norm": 0.3059251536968794, + "learning_rate": 1.0797571873305557e-06, + "loss": 0.1833, + "step": 18616 + }, + { + "epoch": 0.86, + "grad_norm": 0.3729300571650092, + "learning_rate": 1.079084765215196e-06, + "loss": 0.3221, + "step": 18617 + }, + { + "epoch": 0.86, + "grad_norm": 0.7939708432623358, + "learning_rate": 1.0784125406003044e-06, + "loss": 0.3815, + "step": 18618 + }, + { + "epoch": 0.86, + "grad_norm": 0.3226179182364412, + "learning_rate": 1.077740513500759e-06, + "loss": 0.206, + "step": 18619 + }, + { + "epoch": 0.86, + "grad_norm": 0.494084465689211, + "learning_rate": 1.0770686839314415e-06, + "loss": 0.3778, + "step": 18620 + }, + { + "epoch": 0.86, + "grad_norm": 0.2610899797124783, + "learning_rate": 1.076397051907222e-06, + "loss": 0.2062, + "step": 18621 + }, + { + "epoch": 0.86, + "grad_norm": 0.36609197532263715, + "learning_rate": 1.0757256174429686e-06, + "loss": 0.1829, + "step": 18622 + }, + { + "epoch": 0.86, + "grad_norm": 1.2806371649262305, + "learning_rate": 1.075054380553552e-06, + "loss": 0.4958, + "step": 18623 + }, + { + "epoch": 0.86, + "grad_norm": 0.5224213459431699, + "learning_rate": 1.0743833412538275e-06, + "loss": 0.3136, + "step": 18624 + }, + { + "epoch": 0.86, + "grad_norm": 0.33866442939322916, + "learning_rate": 1.0737124995586556e-06, + "loss": 0.2536, + "step": 18625 + }, + { + "epoch": 0.86, + "grad_norm": 0.6136305449393494, + "learning_rate": 1.0730418554828836e-06, + "loss": 0.2376, + "step": 18626 + }, + { + "epoch": 0.86, + "grad_norm": 0.2755812819570444, + "learning_rate": 1.0723714090413607e-06, + "loss": 0.1539, + "step": 18627 + }, + { + "epoch": 0.86, + "grad_norm": 0.4479001748828852, + "learning_rate": 1.0717011602489324e-06, + "loss": 0.2721, + "step": 18628 + }, + { + "epoch": 0.86, + "grad_norm": 0.3321924008905497, + "learning_rate": 1.071031109120433e-06, + "loss": 0.2413, + "step": 18629 + }, + { + "epoch": 0.86, + "grad_norm": 0.6372198850325546, + "learning_rate": 1.0703612556706988e-06, + "loss": 0.4011, + "step": 18630 + }, + { + "epoch": 0.86, + "grad_norm": 0.588652120811114, + "learning_rate": 1.0696915999145629e-06, + "loss": 0.304, + "step": 18631 + }, + { + "epoch": 0.86, + "grad_norm": 0.3489001048349934, + "learning_rate": 1.0690221418668444e-06, + "loss": 0.2321, + "step": 18632 + }, + { + "epoch": 0.86, + "grad_norm": 0.26724561066474967, + "learning_rate": 1.0683528815423705e-06, + "loss": 0.1756, + "step": 18633 + }, + { + "epoch": 0.86, + "grad_norm": 0.6235706838075445, + "learning_rate": 1.0676838189559524e-06, + "loss": 0.3158, + "step": 18634 + }, + { + "epoch": 0.86, + "grad_norm": 0.46316771451345295, + "learning_rate": 1.0670149541224085e-06, + "loss": 0.2717, + "step": 18635 + }, + { + "epoch": 0.86, + "grad_norm": 0.4888647477409954, + "learning_rate": 1.066346287056541e-06, + "loss": 0.2817, + "step": 18636 + }, + { + "epoch": 0.86, + "grad_norm": 0.34003879596702186, + "learning_rate": 1.0656778177731597e-06, + "loss": 0.2727, + "step": 18637 + }, + { + "epoch": 0.86, + "grad_norm": 0.6221216111175275, + "learning_rate": 1.0650095462870602e-06, + "loss": 0.3265, + "step": 18638 + }, + { + "epoch": 0.86, + "grad_norm": 0.1710092640478974, + "learning_rate": 1.064341472613033e-06, + "loss": 0.0751, + "step": 18639 + }, + { + "epoch": 0.86, + "grad_norm": 0.3756051190254683, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2493, + "step": 18640 + }, + { + "epoch": 0.86, + "grad_norm": 0.4166444089144006, + "learning_rate": 1.0630059187603748e-06, + "loss": 0.306, + "step": 18641 + }, + { + "epoch": 0.86, + "grad_norm": 0.6917105693781808, + "learning_rate": 1.0623384386113088e-06, + "loss": 0.3175, + "step": 18642 + }, + { + "epoch": 0.86, + "grad_norm": 0.434312006734094, + "learning_rate": 1.0616711563334537e-06, + "loss": 0.2624, + "step": 18643 + }, + { + "epoch": 0.86, + "grad_norm": 0.5482271338488345, + "learning_rate": 1.0610040719415838e-06, + "loss": 0.3468, + "step": 18644 + }, + { + "epoch": 0.86, + "grad_norm": 0.2673379206838608, + "learning_rate": 1.0603371854504696e-06, + "loss": 0.1656, + "step": 18645 + }, + { + "epoch": 0.86, + "grad_norm": 0.7043577054954441, + "learning_rate": 1.0596704968748727e-06, + "loss": 0.2791, + "step": 18646 + }, + { + "epoch": 0.86, + "grad_norm": 0.41984829674476787, + "learning_rate": 1.059004006229555e-06, + "loss": 0.3107, + "step": 18647 + }, + { + "epoch": 0.86, + "grad_norm": 0.3777457374221175, + "learning_rate": 1.0583377135292728e-06, + "loss": 0.2592, + "step": 18648 + }, + { + "epoch": 0.86, + "grad_norm": 0.36622885453214377, + "learning_rate": 1.0576716187887726e-06, + "loss": 0.1565, + "step": 18649 + }, + { + "epoch": 0.86, + "grad_norm": 0.40042926644571364, + "learning_rate": 1.057005722022807e-06, + "loss": 0.2925, + "step": 18650 + }, + { + "epoch": 0.86, + "grad_norm": 0.51386520284655, + "learning_rate": 1.056340023246113e-06, + "loss": 0.3168, + "step": 18651 + }, + { + "epoch": 0.86, + "grad_norm": 0.3774279752829283, + "learning_rate": 1.055674522473431e-06, + "loss": 0.2032, + "step": 18652 + }, + { + "epoch": 0.86, + "grad_norm": 0.3111044716345392, + "learning_rate": 1.0550092197194939e-06, + "loss": 0.2788, + "step": 18653 + }, + { + "epoch": 0.86, + "grad_norm": 0.4923543900579858, + "learning_rate": 1.054344114999034e-06, + "loss": 0.2927, + "step": 18654 + }, + { + "epoch": 0.86, + "grad_norm": 0.7031399097394464, + "learning_rate": 1.053679208326771e-06, + "loss": 0.2116, + "step": 18655 + }, + { + "epoch": 0.86, + "grad_norm": 0.2990698927859586, + "learning_rate": 1.0530144997174275e-06, + "loss": 0.2706, + "step": 18656 + }, + { + "epoch": 0.86, + "grad_norm": 1.3921179864236217, + "learning_rate": 1.0523499891857226e-06, + "loss": 0.5001, + "step": 18657 + }, + { + "epoch": 0.86, + "grad_norm": 0.4204023472048564, + "learning_rate": 1.0516856767463624e-06, + "loss": 0.216, + "step": 18658 + }, + { + "epoch": 0.86, + "grad_norm": 0.5742226738943116, + "learning_rate": 1.0510215624140596e-06, + "loss": 0.3333, + "step": 18659 + }, + { + "epoch": 0.86, + "grad_norm": 0.23166906659247136, + "learning_rate": 1.0503576462035113e-06, + "loss": 0.2067, + "step": 18660 + }, + { + "epoch": 0.86, + "grad_norm": 0.5574536246054336, + "learning_rate": 1.0496939281294193e-06, + "loss": 0.3572, + "step": 18661 + }, + { + "epoch": 0.86, + "grad_norm": 0.4412555692493903, + "learning_rate": 1.0490304082064795e-06, + "loss": 0.2066, + "step": 18662 + }, + { + "epoch": 0.86, + "grad_norm": 0.6537779637516484, + "learning_rate": 1.0483670864493777e-06, + "loss": 0.3094, + "step": 18663 + }, + { + "epoch": 0.86, + "grad_norm": 0.35007260553789427, + "learning_rate": 1.0477039628728002e-06, + "loss": 0.2793, + "step": 18664 + }, + { + "epoch": 0.86, + "grad_norm": 0.3365208261101066, + "learning_rate": 1.0470410374914286e-06, + "loss": 0.1929, + "step": 18665 + }, + { + "epoch": 0.86, + "grad_norm": 0.28270174219138705, + "learning_rate": 1.046378310319942e-06, + "loss": 0.2013, + "step": 18666 + }, + { + "epoch": 0.86, + "grad_norm": 1.659825124549638, + "learning_rate": 1.0457157813730102e-06, + "loss": 0.6363, + "step": 18667 + }, + { + "epoch": 0.86, + "grad_norm": 0.24674942813648695, + "learning_rate": 1.0450534506652987e-06, + "loss": 0.2024, + "step": 18668 + }, + { + "epoch": 0.86, + "grad_norm": 0.7016017250668796, + "learning_rate": 1.0443913182114717e-06, + "loss": 0.3626, + "step": 18669 + }, + { + "epoch": 0.86, + "grad_norm": 0.7889914087559371, + "learning_rate": 1.0437293840261908e-06, + "loss": 0.3928, + "step": 18670 + }, + { + "epoch": 0.86, + "grad_norm": 0.23909162718263413, + "learning_rate": 1.0430676481241108e-06, + "loss": 0.1554, + "step": 18671 + }, + { + "epoch": 0.86, + "grad_norm": 0.3612949786482525, + "learning_rate": 1.042406110519878e-06, + "loss": 0.2979, + "step": 18672 + }, + { + "epoch": 0.86, + "grad_norm": 0.39391885551147027, + "learning_rate": 1.0417447712281403e-06, + "loss": 0.2506, + "step": 18673 + }, + { + "epoch": 0.86, + "grad_norm": 0.42370122899481627, + "learning_rate": 1.0410836302635418e-06, + "loss": 0.2788, + "step": 18674 + }, + { + "epoch": 0.86, + "grad_norm": 0.7576182433128071, + "learning_rate": 1.0404226876407142e-06, + "loss": 0.2873, + "step": 18675 + }, + { + "epoch": 0.86, + "grad_norm": 0.3436602279063442, + "learning_rate": 1.0397619433742955e-06, + "loss": 0.2553, + "step": 18676 + }, + { + "epoch": 0.86, + "grad_norm": 0.4316210695604483, + "learning_rate": 1.03910139747891e-06, + "loss": 0.2468, + "step": 18677 + }, + { + "epoch": 0.86, + "grad_norm": 0.49456526780372245, + "learning_rate": 1.0384410499691821e-06, + "loss": 0.156, + "step": 18678 + }, + { + "epoch": 0.86, + "grad_norm": 0.39545731892894964, + "learning_rate": 1.037780900859735e-06, + "loss": 0.1928, + "step": 18679 + }, + { + "epoch": 0.86, + "grad_norm": 0.40266402838437787, + "learning_rate": 1.037120950165178e-06, + "loss": 0.282, + "step": 18680 + }, + { + "epoch": 0.86, + "grad_norm": 0.34984594338121056, + "learning_rate": 1.036461197900126e-06, + "loss": 0.2542, + "step": 18681 + }, + { + "epoch": 0.86, + "grad_norm": 0.8954710541362723, + "learning_rate": 1.0358016440791818e-06, + "loss": 0.4169, + "step": 18682 + }, + { + "epoch": 0.86, + "grad_norm": 0.39294471400605707, + "learning_rate": 1.0351422887169515e-06, + "loss": 0.2771, + "step": 18683 + }, + { + "epoch": 0.86, + "grad_norm": 0.3648169587476222, + "learning_rate": 1.03448313182803e-06, + "loss": 0.2684, + "step": 18684 + }, + { + "epoch": 0.86, + "grad_norm": 0.3362345811443943, + "learning_rate": 1.0338241734270116e-06, + "loss": 0.1983, + "step": 18685 + }, + { + "epoch": 0.86, + "grad_norm": 0.4341480724136818, + "learning_rate": 1.033165413528483e-06, + "loss": 0.2504, + "step": 18686 + }, + { + "epoch": 0.86, + "grad_norm": 0.6709914646444647, + "learning_rate": 1.0325068521470294e-06, + "loss": 0.3518, + "step": 18687 + }, + { + "epoch": 0.86, + "grad_norm": 0.36518393518362713, + "learning_rate": 1.0318484892972336e-06, + "loss": 0.2271, + "step": 18688 + }, + { + "epoch": 0.86, + "grad_norm": 0.33176595550482674, + "learning_rate": 1.031190324993666e-06, + "loss": 0.2379, + "step": 18689 + }, + { + "epoch": 0.86, + "grad_norm": 1.3446727491137787, + "learning_rate": 1.030532359250901e-06, + "loss": 0.7188, + "step": 18690 + }, + { + "epoch": 0.86, + "grad_norm": 0.7046058476389304, + "learning_rate": 1.0298745920835073e-06, + "loss": 0.2483, + "step": 18691 + }, + { + "epoch": 0.86, + "grad_norm": 0.26533624323119187, + "learning_rate": 1.0292170235060417e-06, + "loss": 0.242, + "step": 18692 + }, + { + "epoch": 0.86, + "grad_norm": 0.44979835337151663, + "learning_rate": 1.0285596535330667e-06, + "loss": 0.2625, + "step": 18693 + }, + { + "epoch": 0.86, + "grad_norm": 0.4152663737001791, + "learning_rate": 1.0279024821791306e-06, + "loss": 0.1064, + "step": 18694 + }, + { + "epoch": 0.86, + "grad_norm": 0.4107879309756198, + "learning_rate": 1.0272455094587896e-06, + "loss": 0.2779, + "step": 18695 + }, + { + "epoch": 0.86, + "grad_norm": 0.3818477542731597, + "learning_rate": 1.0265887353865856e-06, + "loss": 0.3025, + "step": 18696 + }, + { + "epoch": 0.86, + "grad_norm": 0.5922981129737003, + "learning_rate": 1.0259321599770566e-06, + "loss": 0.2612, + "step": 18697 + }, + { + "epoch": 0.86, + "grad_norm": 0.37941957303468116, + "learning_rate": 1.0252757832447424e-06, + "loss": 0.2739, + "step": 18698 + }, + { + "epoch": 0.86, + "grad_norm": 0.31556211697239017, + "learning_rate": 1.024619605204168e-06, + "loss": 0.1895, + "step": 18699 + }, + { + "epoch": 0.86, + "grad_norm": 0.3344852281024109, + "learning_rate": 1.0239636258698683e-06, + "loss": 0.2608, + "step": 18700 + }, + { + "epoch": 0.86, + "grad_norm": 0.41119821361503944, + "learning_rate": 1.0233078452563617e-06, + "loss": 0.2057, + "step": 18701 + }, + { + "epoch": 0.86, + "grad_norm": 1.2572827350304026, + "learning_rate": 1.0226522633781688e-06, + "loss": 0.6748, + "step": 18702 + }, + { + "epoch": 0.86, + "grad_norm": 0.7157911739472715, + "learning_rate": 1.0219968802498004e-06, + "loss": 0.3409, + "step": 18703 + }, + { + "epoch": 0.86, + "grad_norm": 0.23008948592140596, + "learning_rate": 1.021341695885768e-06, + "loss": 0.2162, + "step": 18704 + }, + { + "epoch": 0.86, + "grad_norm": 0.3741882240694689, + "learning_rate": 1.020686710300579e-06, + "loss": 0.187, + "step": 18705 + }, + { + "epoch": 0.86, + "grad_norm": 1.5313109842837906, + "learning_rate": 1.0200319235087297e-06, + "loss": 0.7347, + "step": 18706 + }, + { + "epoch": 0.86, + "grad_norm": 0.34298063130928497, + "learning_rate": 1.0193773355247183e-06, + "loss": 0.2025, + "step": 18707 + }, + { + "epoch": 0.86, + "grad_norm": 0.37870020913210695, + "learning_rate": 1.01872294636304e-06, + "loss": 0.3148, + "step": 18708 + }, + { + "epoch": 0.86, + "grad_norm": 0.6182895505595318, + "learning_rate": 1.0180687560381764e-06, + "loss": 0.365, + "step": 18709 + }, + { + "epoch": 0.86, + "grad_norm": 0.34425286500000357, + "learning_rate": 1.017414764564616e-06, + "loss": 0.202, + "step": 18710 + }, + { + "epoch": 0.86, + "grad_norm": 0.32506380196461593, + "learning_rate": 1.016760971956834e-06, + "loss": 0.1814, + "step": 18711 + }, + { + "epoch": 0.86, + "grad_norm": 0.4006755358089029, + "learning_rate": 1.0161073782293051e-06, + "loss": 0.2951, + "step": 18712 + }, + { + "epoch": 0.86, + "grad_norm": 0.35815340975359067, + "learning_rate": 1.0154539833964994e-06, + "loss": 0.2658, + "step": 18713 + }, + { + "epoch": 0.86, + "grad_norm": 0.8393129519555721, + "learning_rate": 1.014800787472886e-06, + "loss": 0.3144, + "step": 18714 + }, + { + "epoch": 0.86, + "grad_norm": 0.3909485591427999, + "learning_rate": 1.0141477904729225e-06, + "loss": 0.3407, + "step": 18715 + }, + { + "epoch": 0.86, + "grad_norm": 0.4653553348789618, + "learning_rate": 1.0134949924110627e-06, + "loss": 0.3015, + "step": 18716 + }, + { + "epoch": 0.86, + "grad_norm": 0.36826369904815975, + "learning_rate": 1.0128423933017674e-06, + "loss": 0.1621, + "step": 18717 + }, + { + "epoch": 0.86, + "grad_norm": 0.5138433275092511, + "learning_rate": 1.0121899931594758e-06, + "loss": 0.2427, + "step": 18718 + }, + { + "epoch": 0.86, + "grad_norm": 0.4268475825697538, + "learning_rate": 1.011537791998638e-06, + "loss": 0.2418, + "step": 18719 + }, + { + "epoch": 0.86, + "grad_norm": 0.3213659931256797, + "learning_rate": 1.0108857898336887e-06, + "loss": 0.2468, + "step": 18720 + }, + { + "epoch": 0.86, + "grad_norm": 0.780571705896692, + "learning_rate": 1.0102339866790633e-06, + "loss": 0.4253, + "step": 18721 + }, + { + "epoch": 0.86, + "grad_norm": 0.33284583107544924, + "learning_rate": 1.0095823825491957e-06, + "loss": 0.2841, + "step": 18722 + }, + { + "epoch": 0.86, + "grad_norm": 0.39106314806121506, + "learning_rate": 1.0089309774585066e-06, + "loss": 0.2499, + "step": 18723 + }, + { + "epoch": 0.86, + "grad_norm": 0.3467714150334075, + "learning_rate": 1.008279771421421e-06, + "loss": 0.1785, + "step": 18724 + }, + { + "epoch": 0.86, + "grad_norm": 0.358761772292316, + "learning_rate": 1.0076287644523552e-06, + "loss": 0.2671, + "step": 18725 + }, + { + "epoch": 0.86, + "grad_norm": 0.7464391080990729, + "learning_rate": 1.0069779565657212e-06, + "loss": 0.3853, + "step": 18726 + }, + { + "epoch": 0.86, + "grad_norm": 0.37560015677486686, + "learning_rate": 1.0063273477759283e-06, + "loss": 0.2619, + "step": 18727 + }, + { + "epoch": 0.86, + "grad_norm": 0.35161126729687153, + "learning_rate": 1.0056769380973785e-06, + "loss": 0.2539, + "step": 18728 + }, + { + "epoch": 0.86, + "grad_norm": 1.4776399258691444, + "learning_rate": 1.0050267275444725e-06, + "loss": 0.4742, + "step": 18729 + }, + { + "epoch": 0.86, + "grad_norm": 0.29672215732189927, + "learning_rate": 1.0043767161316053e-06, + "loss": 0.115, + "step": 18730 + }, + { + "epoch": 0.86, + "grad_norm": 0.42685329150290496, + "learning_rate": 1.0037269038731689e-06, + "loss": 0.2683, + "step": 18731 + }, + { + "epoch": 0.86, + "grad_norm": 0.3035689156763195, + "learning_rate": 1.0030772907835484e-06, + "loss": 0.2751, + "step": 18732 + }, + { + "epoch": 0.86, + "grad_norm": 0.9528861895836189, + "learning_rate": 1.0024278768771223e-06, + "loss": 0.3518, + "step": 18733 + }, + { + "epoch": 0.86, + "grad_norm": 0.4244016067804892, + "learning_rate": 1.0017786621682734e-06, + "loss": 0.2457, + "step": 18734 + }, + { + "epoch": 0.86, + "grad_norm": 0.6498856869623918, + "learning_rate": 1.0011296466713717e-06, + "loss": 0.3071, + "step": 18735 + }, + { + "epoch": 0.86, + "grad_norm": 0.2180288729217266, + "learning_rate": 1.0004808304007873e-06, + "loss": 0.1778, + "step": 18736 + }, + { + "epoch": 0.86, + "grad_norm": 0.44904866034245694, + "learning_rate": 9.998322133708827e-07, + "loss": 0.2786, + "step": 18737 + }, + { + "epoch": 0.86, + "grad_norm": 0.561620195355768, + "learning_rate": 9.991837955960171e-07, + "loss": 0.331, + "step": 18738 + }, + { + "epoch": 0.86, + "grad_norm": 0.3642671711563693, + "learning_rate": 9.985355770905502e-07, + "loss": 0.3028, + "step": 18739 + }, + { + "epoch": 0.86, + "grad_norm": 0.3803745405446701, + "learning_rate": 9.978875578688274e-07, + "loss": 0.1789, + "step": 18740 + }, + { + "epoch": 0.86, + "grad_norm": 0.794093409662379, + "learning_rate": 9.972397379452003e-07, + "loss": 0.3393, + "step": 18741 + }, + { + "epoch": 0.86, + "grad_norm": 0.30784399288001274, + "learning_rate": 9.965921173340054e-07, + "loss": 0.1833, + "step": 18742 + }, + { + "epoch": 0.86, + "grad_norm": 0.38089923198040615, + "learning_rate": 9.959446960495845e-07, + "loss": 0.2232, + "step": 18743 + }, + { + "epoch": 0.86, + "grad_norm": 0.3727951098394824, + "learning_rate": 9.952974741062704e-07, + "loss": 0.2799, + "step": 18744 + }, + { + "epoch": 0.86, + "grad_norm": 1.1714135737909928, + "learning_rate": 9.946504515183909e-07, + "loss": 0.7299, + "step": 18745 + }, + { + "epoch": 0.86, + "grad_norm": 0.4089982340334295, + "learning_rate": 9.940036283002695e-07, + "loss": 0.1972, + "step": 18746 + }, + { + "epoch": 0.86, + "grad_norm": 1.677128203283916, + "learning_rate": 9.93357004466229e-07, + "loss": 0.5986, + "step": 18747 + }, + { + "epoch": 0.86, + "grad_norm": 0.3385923689132285, + "learning_rate": 9.927105800305858e-07, + "loss": 0.2825, + "step": 18748 + }, + { + "epoch": 0.86, + "grad_norm": 0.4541128933262122, + "learning_rate": 9.92064355007646e-07, + "loss": 0.2847, + "step": 18749 + }, + { + "epoch": 0.86, + "grad_norm": 0.25828813050281324, + "learning_rate": 9.914183294117197e-07, + "loss": 0.1232, + "step": 18750 + }, + { + "epoch": 0.86, + "grad_norm": 0.37063286003405876, + "learning_rate": 9.907725032571113e-07, + "loss": 0.3158, + "step": 18751 + }, + { + "epoch": 0.86, + "grad_norm": 0.8602429738185712, + "learning_rate": 9.90126876558114e-07, + "loss": 0.4475, + "step": 18752 + }, + { + "epoch": 0.86, + "grad_norm": 0.3524135840110389, + "learning_rate": 9.89481449329026e-07, + "loss": 0.1981, + "step": 18753 + }, + { + "epoch": 0.86, + "grad_norm": 0.6756599718420799, + "learning_rate": 9.88836221584133e-07, + "loss": 0.3721, + "step": 18754 + }, + { + "epoch": 0.86, + "grad_norm": 0.4053602365209885, + "learning_rate": 9.881911933377197e-07, + "loss": 0.2725, + "step": 18755 + }, + { + "epoch": 0.86, + "grad_norm": 0.24205991513098726, + "learning_rate": 9.875463646040706e-07, + "loss": 0.1691, + "step": 18756 + }, + { + "epoch": 0.86, + "grad_norm": 1.3178938304793124, + "learning_rate": 9.869017353974563e-07, + "loss": 0.7411, + "step": 18757 + }, + { + "epoch": 0.86, + "grad_norm": 0.6825938190038657, + "learning_rate": 9.862573057321535e-07, + "loss": 0.285, + "step": 18758 + }, + { + "epoch": 0.86, + "grad_norm": 0.28339053429332756, + "learning_rate": 9.856130756224214e-07, + "loss": 0.2393, + "step": 18759 + }, + { + "epoch": 0.86, + "grad_norm": 0.654179414906362, + "learning_rate": 9.84969045082531e-07, + "loss": 0.3828, + "step": 18760 + }, + { + "epoch": 0.86, + "grad_norm": 0.6054461044150481, + "learning_rate": 9.84325214126739e-07, + "loss": 0.349, + "step": 18761 + }, + { + "epoch": 0.86, + "grad_norm": 0.2407489920656504, + "learning_rate": 9.836815827692936e-07, + "loss": 0.2043, + "step": 18762 + }, + { + "epoch": 0.86, + "grad_norm": 0.35465522223014795, + "learning_rate": 9.830381510244491e-07, + "loss": 0.2598, + "step": 18763 + }, + { + "epoch": 0.86, + "grad_norm": 0.5081768619928726, + "learning_rate": 9.823949189064486e-07, + "loss": 0.2663, + "step": 18764 + }, + { + "epoch": 0.86, + "grad_norm": 0.7252849209384994, + "learning_rate": 9.817518864295362e-07, + "loss": 0.3483, + "step": 18765 + }, + { + "epoch": 0.86, + "grad_norm": 0.7247961035079349, + "learning_rate": 9.811090536079426e-07, + "loss": 0.2804, + "step": 18766 + }, + { + "epoch": 0.86, + "grad_norm": 0.2959093990059026, + "learning_rate": 9.804664204559012e-07, + "loss": 0.2483, + "step": 18767 + }, + { + "epoch": 0.86, + "grad_norm": 0.5427880511897198, + "learning_rate": 9.798239869876435e-07, + "loss": 0.3219, + "step": 18768 + }, + { + "epoch": 0.86, + "grad_norm": 0.1983111592098258, + "learning_rate": 9.791817532173864e-07, + "loss": 0.1259, + "step": 18769 + }, + { + "epoch": 0.86, + "grad_norm": 0.9812437413922588, + "learning_rate": 9.785397191593527e-07, + "loss": 0.4212, + "step": 18770 + }, + { + "epoch": 0.86, + "grad_norm": 0.30938243251541014, + "learning_rate": 9.77897884827752e-07, + "loss": 0.2794, + "step": 18771 + }, + { + "epoch": 0.86, + "grad_norm": 0.41971397759887613, + "learning_rate": 9.772562502367976e-07, + "loss": 0.2564, + "step": 18772 + }, + { + "epoch": 0.86, + "grad_norm": 0.970521899034133, + "learning_rate": 9.766148154006948e-07, + "loss": 0.4832, + "step": 18773 + }, + { + "epoch": 0.86, + "grad_norm": 0.3058992575342056, + "learning_rate": 9.759735803336424e-07, + "loss": 0.2197, + "step": 18774 + }, + { + "epoch": 0.86, + "grad_norm": 0.3813923248200112, + "learning_rate": 9.753325450498386e-07, + "loss": 0.297, + "step": 18775 + }, + { + "epoch": 0.86, + "grad_norm": 0.2935622369039107, + "learning_rate": 9.7469170956347e-07, + "loss": 0.1047, + "step": 18776 + }, + { + "epoch": 0.86, + "grad_norm": 0.37754125436616504, + "learning_rate": 9.740510738887322e-07, + "loss": 0.2866, + "step": 18777 + }, + { + "epoch": 0.86, + "grad_norm": 0.9682928029472468, + "learning_rate": 9.734106380398022e-07, + "loss": 0.4539, + "step": 18778 + }, + { + "epoch": 0.86, + "grad_norm": 0.3432623747260534, + "learning_rate": 9.727704020308638e-07, + "loss": 0.1923, + "step": 18779 + }, + { + "epoch": 0.86, + "grad_norm": 0.45355040898575505, + "learning_rate": 9.72130365876085e-07, + "loss": 0.2924, + "step": 18780 + }, + { + "epoch": 0.86, + "grad_norm": 1.2389473549267436, + "learning_rate": 9.714905295896393e-07, + "loss": 0.7132, + "step": 18781 + }, + { + "epoch": 0.86, + "grad_norm": 0.4757175651789911, + "learning_rate": 9.70850893185693e-07, + "loss": 0.1779, + "step": 18782 + }, + { + "epoch": 0.86, + "grad_norm": 0.34493713925595115, + "learning_rate": 9.702114566784049e-07, + "loss": 0.284, + "step": 18783 + }, + { + "epoch": 0.86, + "grad_norm": 0.36410437011753966, + "learning_rate": 9.695722200819301e-07, + "loss": 0.2522, + "step": 18784 + }, + { + "epoch": 0.86, + "grad_norm": 0.4552950027817988, + "learning_rate": 9.689331834104266e-07, + "loss": 0.1615, + "step": 18785 + }, + { + "epoch": 0.86, + "grad_norm": 0.574644251361816, + "learning_rate": 9.682943466780348e-07, + "loss": 0.357, + "step": 18786 + }, + { + "epoch": 0.86, + "grad_norm": 0.3806069756340322, + "learning_rate": 9.676557098989036e-07, + "loss": 0.3252, + "step": 18787 + }, + { + "epoch": 0.86, + "grad_norm": 0.6665581065850456, + "learning_rate": 9.670172730871674e-07, + "loss": 0.3325, + "step": 18788 + }, + { + "epoch": 0.86, + "grad_norm": 0.35336607403707654, + "learning_rate": 9.663790362569637e-07, + "loss": 0.2202, + "step": 18789 + }, + { + "epoch": 0.86, + "grad_norm": 0.3332132134959122, + "learning_rate": 9.65740999422422e-07, + "loss": 0.156, + "step": 18790 + }, + { + "epoch": 0.86, + "grad_norm": 0.3522909716667261, + "learning_rate": 9.65103162597666e-07, + "loss": 0.2661, + "step": 18791 + }, + { + "epoch": 0.86, + "grad_norm": 0.2976605399255977, + "learning_rate": 9.644655257968204e-07, + "loss": 0.1868, + "step": 18792 + }, + { + "epoch": 0.86, + "grad_norm": 1.2195101490674904, + "learning_rate": 9.638280890339945e-07, + "loss": 0.5236, + "step": 18793 + }, + { + "epoch": 0.86, + "grad_norm": 0.46289429444153046, + "learning_rate": 9.631908523233102e-07, + "loss": 0.3544, + "step": 18794 + }, + { + "epoch": 0.86, + "grad_norm": 0.28082337963282467, + "learning_rate": 9.625538156788683e-07, + "loss": 0.2128, + "step": 18795 + }, + { + "epoch": 0.86, + "grad_norm": 0.3681580811434375, + "learning_rate": 9.619169791147775e-07, + "loss": 0.1702, + "step": 18796 + }, + { + "epoch": 0.86, + "grad_norm": 0.9510674321589174, + "learning_rate": 9.61280342645131e-07, + "loss": 0.3396, + "step": 18797 + }, + { + "epoch": 0.86, + "grad_norm": 0.3352421777033682, + "learning_rate": 9.606439062840256e-07, + "loss": 0.2256, + "step": 18798 + }, + { + "epoch": 0.86, + "grad_norm": 0.3740895714491285, + "learning_rate": 9.60007670045554e-07, + "loss": 0.2849, + "step": 18799 + }, + { + "epoch": 0.86, + "grad_norm": 0.7074572732064504, + "learning_rate": 9.593716339437986e-07, + "loss": 0.3434, + "step": 18800 + }, + { + "epoch": 0.86, + "grad_norm": 0.3824209631427379, + "learning_rate": 9.587357979928414e-07, + "loss": 0.2675, + "step": 18801 + }, + { + "epoch": 0.86, + "grad_norm": 0.29972127657285114, + "learning_rate": 9.581001622067609e-07, + "loss": 0.0784, + "step": 18802 + }, + { + "epoch": 0.86, + "grad_norm": 0.34422641886103383, + "learning_rate": 9.574647265996272e-07, + "loss": 0.2749, + "step": 18803 + }, + { + "epoch": 0.86, + "grad_norm": 0.3940740043548812, + "learning_rate": 9.568294911855102e-07, + "loss": 0.2785, + "step": 18804 + }, + { + "epoch": 0.86, + "grad_norm": 0.6755693768932453, + "learning_rate": 9.561944559784708e-07, + "loss": 0.3163, + "step": 18805 + }, + { + "epoch": 0.86, + "grad_norm": 0.4513552119735532, + "learning_rate": 9.555596209925687e-07, + "loss": 0.3109, + "step": 18806 + }, + { + "epoch": 0.86, + "grad_norm": 0.298287075164543, + "learning_rate": 9.54924986241863e-07, + "loss": 0.2463, + "step": 18807 + }, + { + "epoch": 0.86, + "grad_norm": 0.3326941182035881, + "learning_rate": 9.542905517403977e-07, + "loss": 0.0807, + "step": 18808 + }, + { + "epoch": 0.86, + "grad_norm": 1.500076913625921, + "learning_rate": 9.536563175022229e-07, + "loss": 0.5835, + "step": 18809 + }, + { + "epoch": 0.86, + "grad_norm": 0.3236400834489422, + "learning_rate": 9.530222835413739e-07, + "loss": 0.2462, + "step": 18810 + }, + { + "epoch": 0.86, + "grad_norm": 0.3555063437134827, + "learning_rate": 9.523884498718972e-07, + "loss": 0.2483, + "step": 18811 + }, + { + "epoch": 0.86, + "grad_norm": 0.9665254133902232, + "learning_rate": 9.517548165078173e-07, + "loss": 0.4433, + "step": 18812 + }, + { + "epoch": 0.86, + "grad_norm": 0.4187777398010818, + "learning_rate": 9.51121383463166e-07, + "loss": 0.2498, + "step": 18813 + }, + { + "epoch": 0.86, + "grad_norm": 0.278632084100606, + "learning_rate": 9.504881507519658e-07, + "loss": 0.1549, + "step": 18814 + }, + { + "epoch": 0.86, + "grad_norm": 0.3888897105802697, + "learning_rate": 9.498551183882343e-07, + "loss": 0.2407, + "step": 18815 + }, + { + "epoch": 0.86, + "grad_norm": 0.33762901189885725, + "learning_rate": 9.492222863859912e-07, + "loss": 0.2557, + "step": 18816 + }, + { + "epoch": 0.86, + "grad_norm": 0.6936415221550374, + "learning_rate": 9.4858965475924e-07, + "loss": 0.36, + "step": 18817 + }, + { + "epoch": 0.86, + "grad_norm": 0.4046094084143305, + "learning_rate": 9.479572235219925e-07, + "loss": 0.2572, + "step": 18818 + }, + { + "epoch": 0.86, + "grad_norm": 0.3495032573166012, + "learning_rate": 9.473249926882466e-07, + "loss": 0.2336, + "step": 18819 + }, + { + "epoch": 0.86, + "grad_norm": 0.4100858800528111, + "learning_rate": 9.46692962271999e-07, + "loss": 0.2387, + "step": 18820 + }, + { + "epoch": 0.86, + "grad_norm": 0.4299534095699312, + "learning_rate": 9.46061132287246e-07, + "loss": 0.1493, + "step": 18821 + }, + { + "epoch": 0.86, + "grad_norm": 0.37438787638157117, + "learning_rate": 9.454295027479709e-07, + "loss": 0.26, + "step": 18822 + }, + { + "epoch": 0.86, + "grad_norm": 0.3893512213958239, + "learning_rate": 9.447980736681606e-07, + "loss": 0.3063, + "step": 18823 + }, + { + "epoch": 0.86, + "grad_norm": 1.7994330914516738, + "learning_rate": 9.441668450617924e-07, + "loss": 0.3876, + "step": 18824 + }, + { + "epoch": 0.86, + "grad_norm": 0.3486993986514949, + "learning_rate": 9.435358169428444e-07, + "loss": 0.2368, + "step": 18825 + }, + { + "epoch": 0.86, + "grad_norm": 0.26104371871676996, + "learning_rate": 9.429049893252851e-07, + "loss": 0.2094, + "step": 18826 + }, + { + "epoch": 0.86, + "grad_norm": 0.5398834996541773, + "learning_rate": 9.422743622230757e-07, + "loss": 0.2779, + "step": 18827 + }, + { + "epoch": 0.86, + "grad_norm": 0.3209426612002473, + "learning_rate": 9.416439356501861e-07, + "loss": 0.2043, + "step": 18828 + }, + { + "epoch": 0.86, + "grad_norm": 0.8734088595990264, + "learning_rate": 9.410137096205674e-07, + "loss": 0.4558, + "step": 18829 + }, + { + "epoch": 0.87, + "grad_norm": 0.5920457479640567, + "learning_rate": 9.40383684148175e-07, + "loss": 0.3282, + "step": 18830 + }, + { + "epoch": 0.87, + "grad_norm": 0.2923753763290717, + "learning_rate": 9.397538592469557e-07, + "loss": 0.2016, + "step": 18831 + }, + { + "epoch": 0.87, + "grad_norm": 1.6326765195036865, + "learning_rate": 9.391242349308527e-07, + "loss": 0.6477, + "step": 18832 + }, + { + "epoch": 0.87, + "grad_norm": 0.3504124623178383, + "learning_rate": 9.384948112138082e-07, + "loss": 0.2105, + "step": 18833 + }, + { + "epoch": 0.87, + "grad_norm": 0.29447065809239237, + "learning_rate": 9.37865588109752e-07, + "loss": 0.2246, + "step": 18834 + }, + { + "epoch": 0.87, + "grad_norm": 0.5704340579896158, + "learning_rate": 9.37236565632621e-07, + "loss": 0.3827, + "step": 18835 + }, + { + "epoch": 0.87, + "grad_norm": 1.363555127578698, + "learning_rate": 9.36607743796335e-07, + "loss": 0.6199, + "step": 18836 + }, + { + "epoch": 0.87, + "grad_norm": 0.3438149296795903, + "learning_rate": 9.359791226148185e-07, + "loss": 0.1512, + "step": 18837 + }, + { + "epoch": 0.87, + "grad_norm": 0.3397907413118579, + "learning_rate": 9.353507021019892e-07, + "loss": 0.2495, + "step": 18838 + }, + { + "epoch": 0.87, + "grad_norm": 0.368151597078527, + "learning_rate": 9.347224822717571e-07, + "loss": 0.2392, + "step": 18839 + }, + { + "epoch": 0.87, + "grad_norm": 0.4287633962978534, + "learning_rate": 9.340944631380333e-07, + "loss": 0.2851, + "step": 18840 + }, + { + "epoch": 0.87, + "grad_norm": 0.5944363968693355, + "learning_rate": 9.334666447147189e-07, + "loss": 0.2857, + "step": 18841 + }, + { + "epoch": 0.87, + "grad_norm": 0.3630627966509355, + "learning_rate": 9.328390270157172e-07, + "loss": 0.2856, + "step": 18842 + }, + { + "epoch": 0.87, + "grad_norm": 0.46366260384704483, + "learning_rate": 9.322116100549172e-07, + "loss": 0.2621, + "step": 18843 + }, + { + "epoch": 0.87, + "grad_norm": 0.6342174081457774, + "learning_rate": 9.315843938462143e-07, + "loss": 0.2254, + "step": 18844 + }, + { + "epoch": 0.87, + "grad_norm": 0.73671605280944, + "learning_rate": 9.309573784034931e-07, + "loss": 0.3659, + "step": 18845 + }, + { + "epoch": 0.87, + "grad_norm": 0.2194343119840035, + "learning_rate": 9.303305637406335e-07, + "loss": 0.1549, + "step": 18846 + }, + { + "epoch": 0.87, + "grad_norm": 0.29566095238270096, + "learning_rate": 9.297039498715155e-07, + "loss": 0.247, + "step": 18847 + }, + { + "epoch": 0.87, + "grad_norm": 1.6209792244684877, + "learning_rate": 9.29077536810008e-07, + "loss": 0.7435, + "step": 18848 + }, + { + "epoch": 0.87, + "grad_norm": 0.36110234246484674, + "learning_rate": 9.284513245699823e-07, + "loss": 0.2537, + "step": 18849 + }, + { + "epoch": 0.87, + "grad_norm": 0.5232560925517842, + "learning_rate": 9.278253131653014e-07, + "loss": 0.2357, + "step": 18850 + }, + { + "epoch": 0.87, + "grad_norm": 0.5192236217253473, + "learning_rate": 9.271995026098224e-07, + "loss": 0.292, + "step": 18851 + }, + { + "epoch": 0.87, + "grad_norm": 0.3830818663878956, + "learning_rate": 9.265738929174051e-07, + "loss": 0.2662, + "step": 18852 + }, + { + "epoch": 0.87, + "grad_norm": 0.4647701773957379, + "learning_rate": 9.259484841018917e-07, + "loss": 0.2933, + "step": 18853 + }, + { + "epoch": 0.87, + "grad_norm": 0.2918725182806727, + "learning_rate": 9.253232761771369e-07, + "loss": 0.2143, + "step": 18854 + }, + { + "epoch": 0.87, + "grad_norm": 0.5885689597253098, + "learning_rate": 9.246982691569794e-07, + "loss": 0.2528, + "step": 18855 + }, + { + "epoch": 0.87, + "grad_norm": 0.5247729722126491, + "learning_rate": 9.240734630552528e-07, + "loss": 0.3224, + "step": 18856 + }, + { + "epoch": 0.87, + "grad_norm": 0.4494132334443572, + "learning_rate": 9.234488578857925e-07, + "loss": 0.2013, + "step": 18857 + }, + { + "epoch": 0.87, + "grad_norm": 0.3930843042539551, + "learning_rate": 9.228244536624264e-07, + "loss": 0.2753, + "step": 18858 + }, + { + "epoch": 0.87, + "grad_norm": 0.36024233552190976, + "learning_rate": 9.222002503989803e-07, + "loss": 0.2915, + "step": 18859 + }, + { + "epoch": 0.87, + "grad_norm": 0.3598019090739894, + "learning_rate": 9.215762481092694e-07, + "loss": 0.1219, + "step": 18860 + }, + { + "epoch": 0.87, + "grad_norm": 0.4244392754975432, + "learning_rate": 9.209524468071096e-07, + "loss": 0.295, + "step": 18861 + }, + { + "epoch": 0.87, + "grad_norm": 0.3258787291325904, + "learning_rate": 9.203288465063143e-07, + "loss": 0.2679, + "step": 18862 + }, + { + "epoch": 0.87, + "grad_norm": 0.8217004797780705, + "learning_rate": 9.197054472206857e-07, + "loss": 0.262, + "step": 18863 + }, + { + "epoch": 0.87, + "grad_norm": 0.4887247900594674, + "learning_rate": 9.190822489640294e-07, + "loss": 0.2904, + "step": 18864 + }, + { + "epoch": 0.87, + "grad_norm": 0.8843274438861373, + "learning_rate": 9.184592517501367e-07, + "loss": 0.4211, + "step": 18865 + }, + { + "epoch": 0.87, + "grad_norm": 0.28823465783532826, + "learning_rate": 9.178364555928043e-07, + "loss": 0.2385, + "step": 18866 + }, + { + "epoch": 0.87, + "grad_norm": 0.320127196856453, + "learning_rate": 9.172138605058201e-07, + "loss": 0.1644, + "step": 18867 + }, + { + "epoch": 0.87, + "grad_norm": 0.5093834549822057, + "learning_rate": 9.165914665029663e-07, + "loss": 0.3173, + "step": 18868 + }, + { + "epoch": 0.87, + "grad_norm": 0.9993546129032954, + "learning_rate": 9.15969273598023e-07, + "loss": 0.3777, + "step": 18869 + }, + { + "epoch": 0.87, + "grad_norm": 0.2638497601368738, + "learning_rate": 9.153472818047627e-07, + "loss": 0.2049, + "step": 18870 + }, + { + "epoch": 0.87, + "grad_norm": 0.514858592531016, + "learning_rate": 9.147254911369597e-07, + "loss": 0.3542, + "step": 18871 + }, + { + "epoch": 0.87, + "grad_norm": 0.3087457247789488, + "learning_rate": 9.141039016083786e-07, + "loss": 0.1959, + "step": 18872 + }, + { + "epoch": 0.87, + "grad_norm": 0.4140340378500285, + "learning_rate": 9.134825132327784e-07, + "loss": 0.1971, + "step": 18873 + }, + { + "epoch": 0.87, + "grad_norm": 0.3772149688147411, + "learning_rate": 9.128613260239172e-07, + "loss": 0.2981, + "step": 18874 + }, + { + "epoch": 0.87, + "grad_norm": 0.5976708732402051, + "learning_rate": 9.122403399955493e-07, + "loss": 0.3239, + "step": 18875 + }, + { + "epoch": 0.87, + "grad_norm": 0.8570607138616448, + "learning_rate": 9.116195551614215e-07, + "loss": 0.4279, + "step": 18876 + }, + { + "epoch": 0.87, + "grad_norm": 0.3434920635418428, + "learning_rate": 9.109989715352762e-07, + "loss": 0.2283, + "step": 18877 + }, + { + "epoch": 0.87, + "grad_norm": 0.29188599507292157, + "learning_rate": 9.103785891308548e-07, + "loss": 0.2391, + "step": 18878 + }, + { + "epoch": 0.87, + "grad_norm": 0.5865646152486707, + "learning_rate": 9.097584079618893e-07, + "loss": 0.2927, + "step": 18879 + }, + { + "epoch": 0.87, + "grad_norm": 0.32190440293246897, + "learning_rate": 9.091384280421123e-07, + "loss": 0.2056, + "step": 18880 + }, + { + "epoch": 0.87, + "grad_norm": 1.4658020704934267, + "learning_rate": 9.085186493852494e-07, + "loss": 0.6687, + "step": 18881 + }, + { + "epoch": 0.87, + "grad_norm": 0.3196103736867, + "learning_rate": 9.078990720050196e-07, + "loss": 0.2435, + "step": 18882 + }, + { + "epoch": 0.87, + "grad_norm": 0.3886127094075236, + "learning_rate": 9.072796959151409e-07, + "loss": 0.2598, + "step": 18883 + }, + { + "epoch": 0.87, + "grad_norm": 0.7620096921376133, + "learning_rate": 9.066605211293278e-07, + "loss": 0.3855, + "step": 18884 + }, + { + "epoch": 0.87, + "grad_norm": 0.652874890761138, + "learning_rate": 9.060415476612849e-07, + "loss": 0.325, + "step": 18885 + }, + { + "epoch": 0.87, + "grad_norm": 0.3119538187146621, + "learning_rate": 9.054227755247191e-07, + "loss": 0.1916, + "step": 18886 + }, + { + "epoch": 0.87, + "grad_norm": 0.399123611326978, + "learning_rate": 9.048042047333239e-07, + "loss": 0.2448, + "step": 18887 + }, + { + "epoch": 0.87, + "grad_norm": 0.5641628087018107, + "learning_rate": 9.041858353008015e-07, + "loss": 0.3206, + "step": 18888 + }, + { + "epoch": 0.87, + "grad_norm": 0.43354807046476623, + "learning_rate": 9.035676672408367e-07, + "loss": 0.3033, + "step": 18889 + }, + { + "epoch": 0.87, + "grad_norm": 0.37617807192630454, + "learning_rate": 9.029497005671173e-07, + "loss": 0.2511, + "step": 18890 + }, + { + "epoch": 0.87, + "grad_norm": 0.4502097985776204, + "learning_rate": 9.023319352933225e-07, + "loss": 0.3049, + "step": 18891 + }, + { + "epoch": 0.87, + "grad_norm": 0.3356574198342961, + "learning_rate": 9.0171437143313e-07, + "loss": 0.1925, + "step": 18892 + }, + { + "epoch": 0.87, + "grad_norm": 0.41889309019794485, + "learning_rate": 9.010970090002135e-07, + "loss": 0.1645, + "step": 18893 + }, + { + "epoch": 0.87, + "grad_norm": 0.4428809998005926, + "learning_rate": 9.004798480082388e-07, + "loss": 0.2855, + "step": 18894 + }, + { + "epoch": 0.87, + "grad_norm": 0.3726263492083325, + "learning_rate": 8.998628884708705e-07, + "loss": 0.3003, + "step": 18895 + }, + { + "epoch": 0.87, + "grad_norm": 0.6474236434967158, + "learning_rate": 8.992461304017663e-07, + "loss": 0.2963, + "step": 18896 + }, + { + "epoch": 0.87, + "grad_norm": 0.9175221644915363, + "learning_rate": 8.986295738145812e-07, + "loss": 0.371, + "step": 18897 + }, + { + "epoch": 0.87, + "grad_norm": 0.2763343729730441, + "learning_rate": 8.980132187229673e-07, + "loss": 0.2467, + "step": 18898 + }, + { + "epoch": 0.87, + "grad_norm": 0.31078048715961265, + "learning_rate": 8.973970651405661e-07, + "loss": 0.0818, + "step": 18899 + }, + { + "epoch": 0.87, + "grad_norm": 0.7217040705181044, + "learning_rate": 8.9678111308102e-07, + "loss": 0.3481, + "step": 18900 + }, + { + "epoch": 0.87, + "grad_norm": 0.39691667455029683, + "learning_rate": 8.961653625579691e-07, + "loss": 0.3002, + "step": 18901 + }, + { + "epoch": 0.87, + "grad_norm": 0.40179015563465464, + "learning_rate": 8.955498135850405e-07, + "loss": 0.2882, + "step": 18902 + }, + { + "epoch": 0.87, + "grad_norm": 0.7449130602274193, + "learning_rate": 8.949344661758652e-07, + "loss": 0.1774, + "step": 18903 + }, + { + "epoch": 0.87, + "grad_norm": 0.29920893193675446, + "learning_rate": 8.943193203440625e-07, + "loss": 0.2331, + "step": 18904 + }, + { + "epoch": 0.87, + "grad_norm": 0.47564299098732626, + "learning_rate": 8.937043761032571e-07, + "loss": 0.2199, + "step": 18905 + }, + { + "epoch": 0.87, + "grad_norm": 0.29436078119338727, + "learning_rate": 8.93089633467058e-07, + "loss": 0.2204, + "step": 18906 + }, + { + "epoch": 0.87, + "grad_norm": 0.4103587457819994, + "learning_rate": 8.924750924490799e-07, + "loss": 0.2833, + "step": 18907 + }, + { + "epoch": 0.87, + "grad_norm": 0.5726936716283302, + "learning_rate": 8.91860753062923e-07, + "loss": 0.3424, + "step": 18908 + }, + { + "epoch": 0.87, + "grad_norm": 0.5670378023407201, + "learning_rate": 8.912466153221899e-07, + "loss": 0.2116, + "step": 18909 + }, + { + "epoch": 0.87, + "grad_norm": 0.2921827245634143, + "learning_rate": 8.906326792404796e-07, + "loss": 0.2404, + "step": 18910 + }, + { + "epoch": 0.87, + "grad_norm": 0.2855471693231724, + "learning_rate": 8.900189448313812e-07, + "loss": 0.1683, + "step": 18911 + }, + { + "epoch": 0.87, + "grad_norm": 0.7998165762495214, + "learning_rate": 8.894054121084839e-07, + "loss": 0.2552, + "step": 18912 + }, + { + "epoch": 0.87, + "grad_norm": 0.3732932526263299, + "learning_rate": 8.887920810853678e-07, + "loss": 0.2805, + "step": 18913 + }, + { + "epoch": 0.87, + "grad_norm": 0.3678071890655849, + "learning_rate": 8.881789517756145e-07, + "loss": 0.3198, + "step": 18914 + }, + { + "epoch": 0.87, + "grad_norm": 1.6643275548060616, + "learning_rate": 8.875660241927985e-07, + "loss": 0.7905, + "step": 18915 + }, + { + "epoch": 0.87, + "grad_norm": 0.3975593958959978, + "learning_rate": 8.869532983504859e-07, + "loss": 0.1977, + "step": 18916 + }, + { + "epoch": 0.87, + "grad_norm": 0.3665686178618616, + "learning_rate": 8.863407742622443e-07, + "loss": 0.1575, + "step": 18917 + }, + { + "epoch": 0.87, + "grad_norm": 0.46384417793004634, + "learning_rate": 8.857284519416343e-07, + "loss": 0.3051, + "step": 18918 + }, + { + "epoch": 0.87, + "grad_norm": 0.3618020837454022, + "learning_rate": 8.851163314022138e-07, + "loss": 0.2158, + "step": 18919 + }, + { + "epoch": 0.87, + "grad_norm": 1.2669080404141906, + "learning_rate": 8.84504412657532e-07, + "loss": 0.7385, + "step": 18920 + }, + { + "epoch": 0.87, + "grad_norm": 0.5050831190506294, + "learning_rate": 8.838926957211336e-07, + "loss": 0.3386, + "step": 18921 + }, + { + "epoch": 0.87, + "grad_norm": 0.2967969580370743, + "learning_rate": 8.832811806065689e-07, + "loss": 0.2076, + "step": 18922 + }, + { + "epoch": 0.87, + "grad_norm": 0.3390828264274052, + "learning_rate": 8.826698673273692e-07, + "loss": 0.2051, + "step": 18923 + }, + { + "epoch": 0.87, + "grad_norm": 0.5588968127604035, + "learning_rate": 8.820587558970739e-07, + "loss": 0.2998, + "step": 18924 + }, + { + "epoch": 0.87, + "grad_norm": 0.3684949562092011, + "learning_rate": 8.814478463292076e-07, + "loss": 0.2156, + "step": 18925 + }, + { + "epoch": 0.87, + "grad_norm": 0.35456679243741357, + "learning_rate": 8.808371386372971e-07, + "loss": 0.3077, + "step": 18926 + }, + { + "epoch": 0.87, + "grad_norm": 0.9361735552321727, + "learning_rate": 8.802266328348663e-07, + "loss": 0.4029, + "step": 18927 + }, + { + "epoch": 0.87, + "grad_norm": 0.3882030177387654, + "learning_rate": 8.796163289354253e-07, + "loss": 0.2702, + "step": 18928 + }, + { + "epoch": 0.87, + "grad_norm": 0.28570409058523233, + "learning_rate": 8.790062269524901e-07, + "loss": 0.1609, + "step": 18929 + }, + { + "epoch": 0.87, + "grad_norm": 0.5087992929157799, + "learning_rate": 8.783963268995643e-07, + "loss": 0.3295, + "step": 18930 + }, + { + "epoch": 0.87, + "grad_norm": 0.33976686319062094, + "learning_rate": 8.777866287901526e-07, + "loss": 0.253, + "step": 18931 + }, + { + "epoch": 0.87, + "grad_norm": 1.1794297448413247, + "learning_rate": 8.771771326377543e-07, + "loss": 0.3992, + "step": 18932 + }, + { + "epoch": 0.87, + "grad_norm": 0.39741501997140877, + "learning_rate": 8.765678384558607e-07, + "loss": 0.279, + "step": 18933 + }, + { + "epoch": 0.87, + "grad_norm": 0.3437074860414685, + "learning_rate": 8.75958746257961e-07, + "loss": 0.2574, + "step": 18934 + }, + { + "epoch": 0.87, + "grad_norm": 0.4171877711553692, + "learning_rate": 8.753498560575402e-07, + "loss": 0.1403, + "step": 18935 + }, + { + "epoch": 0.87, + "grad_norm": 0.4226782786099076, + "learning_rate": 8.747411678680817e-07, + "loss": 0.2618, + "step": 18936 + }, + { + "epoch": 0.87, + "grad_norm": 0.34152708157740014, + "learning_rate": 8.741326817030594e-07, + "loss": 0.2492, + "step": 18937 + }, + { + "epoch": 0.87, + "grad_norm": 0.3534839711812888, + "learning_rate": 8.735243975759411e-07, + "loss": 0.2496, + "step": 18938 + }, + { + "epoch": 0.87, + "grad_norm": 1.3868618994597817, + "learning_rate": 8.729163155001975e-07, + "loss": 0.4175, + "step": 18939 + }, + { + "epoch": 0.87, + "grad_norm": 0.36058788097455874, + "learning_rate": 8.723084354892886e-07, + "loss": 0.261, + "step": 18940 + }, + { + "epoch": 0.87, + "grad_norm": 1.1027058531891762, + "learning_rate": 8.71700757556676e-07, + "loss": 0.3553, + "step": 18941 + }, + { + "epoch": 0.87, + "grad_norm": 0.2745882122097378, + "learning_rate": 8.710932817158091e-07, + "loss": 0.2023, + "step": 18942 + }, + { + "epoch": 0.87, + "grad_norm": 0.380078601130948, + "learning_rate": 8.704860079801381e-07, + "loss": 0.257, + "step": 18943 + }, + { + "epoch": 0.87, + "grad_norm": 0.42378470403585644, + "learning_rate": 8.698789363631088e-07, + "loss": 0.2786, + "step": 18944 + }, + { + "epoch": 0.87, + "grad_norm": 0.37952895638988604, + "learning_rate": 8.692720668781596e-07, + "loss": 0.2299, + "step": 18945 + }, + { + "epoch": 0.87, + "grad_norm": 0.37010618611198703, + "learning_rate": 8.686653995387273e-07, + "loss": 0.2559, + "step": 18946 + }, + { + "epoch": 0.87, + "grad_norm": 0.6935291674141774, + "learning_rate": 8.68058934358239e-07, + "loss": 0.3777, + "step": 18947 + }, + { + "epoch": 0.87, + "grad_norm": 0.43240093940848606, + "learning_rate": 8.674526713501286e-07, + "loss": 0.1495, + "step": 18948 + }, + { + "epoch": 0.87, + "grad_norm": 0.3479697041473474, + "learning_rate": 8.668466105278128e-07, + "loss": 0.2694, + "step": 18949 + }, + { + "epoch": 0.87, + "grad_norm": 0.3601097343161797, + "learning_rate": 8.662407519047089e-07, + "loss": 0.3297, + "step": 18950 + }, + { + "epoch": 0.87, + "grad_norm": 0.4555246551112367, + "learning_rate": 8.656350954942328e-07, + "loss": 0.0826, + "step": 18951 + }, + { + "epoch": 0.87, + "grad_norm": 0.3215596246859388, + "learning_rate": 8.650296413097903e-07, + "loss": 0.2241, + "step": 18952 + }, + { + "epoch": 0.87, + "grad_norm": 0.4762956326888779, + "learning_rate": 8.644243893647897e-07, + "loss": 0.3393, + "step": 18953 + }, + { + "epoch": 0.87, + "grad_norm": 0.6275685048213777, + "learning_rate": 8.638193396726257e-07, + "loss": 0.308, + "step": 18954 + }, + { + "epoch": 0.87, + "grad_norm": 0.23710470467552497, + "learning_rate": 8.63214492246699e-07, + "loss": 0.1631, + "step": 18955 + }, + { + "epoch": 0.87, + "grad_norm": 1.3023343534560967, + "learning_rate": 8.626098471003941e-07, + "loss": 0.7332, + "step": 18956 + }, + { + "epoch": 0.87, + "grad_norm": 0.2913590717654896, + "learning_rate": 8.620054042471015e-07, + "loss": 0.2716, + "step": 18957 + }, + { + "epoch": 0.87, + "grad_norm": 0.3400575354588975, + "learning_rate": 8.614011637002029e-07, + "loss": 0.2093, + "step": 18958 + }, + { + "epoch": 0.87, + "grad_norm": 0.7299331363516635, + "learning_rate": 8.607971254730741e-07, + "loss": 0.3653, + "step": 18959 + }, + { + "epoch": 0.87, + "grad_norm": 1.4538820876940137, + "learning_rate": 8.601932895790877e-07, + "loss": 0.5566, + "step": 18960 + }, + { + "epoch": 0.87, + "grad_norm": 0.29500953961414933, + "learning_rate": 8.595896560316142e-07, + "loss": 0.1941, + "step": 18961 + }, + { + "epoch": 0.87, + "grad_norm": 0.3801070490627266, + "learning_rate": 8.58986224844014e-07, + "loss": 0.3098, + "step": 18962 + }, + { + "epoch": 0.87, + "grad_norm": 0.35607095004376244, + "learning_rate": 8.583829960296519e-07, + "loss": 0.214, + "step": 18963 + }, + { + "epoch": 0.87, + "grad_norm": 0.3198778201900577, + "learning_rate": 8.57779969601874e-07, + "loss": 0.188, + "step": 18964 + }, + { + "epoch": 0.87, + "grad_norm": 0.3814237324776373, + "learning_rate": 8.571771455740407e-07, + "loss": 0.2729, + "step": 18965 + }, + { + "epoch": 0.87, + "grad_norm": 1.9794993125079294, + "learning_rate": 8.565745239594936e-07, + "loss": 0.3982, + "step": 18966 + }, + { + "epoch": 0.87, + "grad_norm": 0.4327866528871953, + "learning_rate": 8.559721047715719e-07, + "loss": 0.2624, + "step": 18967 + }, + { + "epoch": 0.87, + "grad_norm": 0.21737636530942112, + "learning_rate": 8.553698880236172e-07, + "loss": 0.1504, + "step": 18968 + }, + { + "epoch": 0.87, + "grad_norm": 0.3866805763574534, + "learning_rate": 8.547678737289556e-07, + "loss": 0.3136, + "step": 18969 + }, + { + "epoch": 0.87, + "grad_norm": 0.41532138717673067, + "learning_rate": 8.541660619009217e-07, + "loss": 0.2742, + "step": 18970 + }, + { + "epoch": 0.87, + "grad_norm": 0.5814246492432071, + "learning_rate": 8.535644525528353e-07, + "loss": 0.2381, + "step": 18971 + }, + { + "epoch": 0.87, + "grad_norm": 1.4576454644419, + "learning_rate": 8.529630456980175e-07, + "loss": 0.5314, + "step": 18972 + }, + { + "epoch": 0.87, + "grad_norm": 0.2641624510333769, + "learning_rate": 8.523618413497814e-07, + "loss": 0.2404, + "step": 18973 + }, + { + "epoch": 0.87, + "grad_norm": 0.5452765595102245, + "learning_rate": 8.517608395214361e-07, + "loss": 0.2631, + "step": 18974 + }, + { + "epoch": 0.87, + "grad_norm": 0.47618578067920797, + "learning_rate": 8.51160040226291e-07, + "loss": 0.2403, + "step": 18975 + }, + { + "epoch": 0.87, + "grad_norm": 0.3312721818217621, + "learning_rate": 8.505594434776432e-07, + "loss": 0.2361, + "step": 18976 + }, + { + "epoch": 0.87, + "grad_norm": 0.2647445344264366, + "learning_rate": 8.49959049288791e-07, + "loss": 0.1885, + "step": 18977 + }, + { + "epoch": 0.87, + "grad_norm": 1.2865961274945428, + "learning_rate": 8.493588576730283e-07, + "loss": 0.4625, + "step": 18978 + }, + { + "epoch": 0.87, + "grad_norm": 0.576051959752146, + "learning_rate": 8.487588686436387e-07, + "loss": 0.3094, + "step": 18979 + }, + { + "epoch": 0.87, + "grad_norm": 0.43333879142018555, + "learning_rate": 8.481590822139108e-07, + "loss": 0.2912, + "step": 18980 + }, + { + "epoch": 0.87, + "grad_norm": 0.36261927183282977, + "learning_rate": 8.475594983971148e-07, + "loss": 0.2571, + "step": 18981 + }, + { + "epoch": 0.87, + "grad_norm": 0.6324646657873435, + "learning_rate": 8.469601172065356e-07, + "loss": 0.301, + "step": 18982 + }, + { + "epoch": 0.87, + "grad_norm": 0.2462914965218332, + "learning_rate": 8.463609386554339e-07, + "loss": 0.1967, + "step": 18983 + }, + { + "epoch": 0.87, + "grad_norm": 2.1128689960052904, + "learning_rate": 8.457619627570824e-07, + "loss": 0.2888, + "step": 18984 + }, + { + "epoch": 0.87, + "grad_norm": 0.34622813444542877, + "learning_rate": 8.45163189524737e-07, + "loss": 0.2902, + "step": 18985 + }, + { + "epoch": 0.87, + "grad_norm": 0.42748284655691493, + "learning_rate": 8.445646189716506e-07, + "loss": 0.3169, + "step": 18986 + }, + { + "epoch": 0.87, + "grad_norm": 0.8097034959160273, + "learning_rate": 8.439662511110846e-07, + "loss": 0.2681, + "step": 18987 + }, + { + "epoch": 0.87, + "grad_norm": 0.3341323654020718, + "learning_rate": 8.433680859562787e-07, + "loss": 0.2052, + "step": 18988 + }, + { + "epoch": 0.87, + "grad_norm": 0.2644793304128655, + "learning_rate": 8.4277012352048e-07, + "loss": 0.2358, + "step": 18989 + }, + { + "epoch": 0.87, + "grad_norm": 1.6192977165995233, + "learning_rate": 8.421723638169222e-07, + "loss": 0.7347, + "step": 18990 + }, + { + "epoch": 0.87, + "grad_norm": 0.37274164176763475, + "learning_rate": 8.415748068588425e-07, + "loss": 0.2117, + "step": 18991 + }, + { + "epoch": 0.87, + "grad_norm": 0.6525883417708729, + "learning_rate": 8.409774526594716e-07, + "loss": 0.3236, + "step": 18992 + }, + { + "epoch": 0.87, + "grad_norm": 0.3815008517674871, + "learning_rate": 8.403803012320311e-07, + "loss": 0.2876, + "step": 18993 + }, + { + "epoch": 0.87, + "grad_norm": 0.5238316435682695, + "learning_rate": 8.397833525897415e-07, + "loss": 0.172, + "step": 18994 + }, + { + "epoch": 0.87, + "grad_norm": 0.27184076687514025, + "learning_rate": 8.391866067458221e-07, + "loss": 0.2086, + "step": 18995 + }, + { + "epoch": 0.87, + "grad_norm": 1.3120938292607551, + "learning_rate": 8.385900637134792e-07, + "loss": 0.5248, + "step": 18996 + }, + { + "epoch": 0.87, + "grad_norm": 0.289955944085589, + "learning_rate": 8.379937235059254e-07, + "loss": 0.199, + "step": 18997 + }, + { + "epoch": 0.87, + "grad_norm": 0.41110906995783775, + "learning_rate": 8.373975861363582e-07, + "loss": 0.2882, + "step": 18998 + }, + { + "epoch": 0.87, + "grad_norm": 0.864466697011512, + "learning_rate": 8.368016516179766e-07, + "loss": 0.4581, + "step": 18999 + }, + { + "epoch": 0.87, + "grad_norm": 0.1891065185586739, + "learning_rate": 8.36205919963975e-07, + "loss": 0.0701, + "step": 19000 + }, + { + "epoch": 0.87, + "grad_norm": 0.3238247214464417, + "learning_rate": 8.356103911875446e-07, + "loss": 0.2423, + "step": 19001 + }, + { + "epoch": 0.87, + "grad_norm": 1.7312028826365304, + "learning_rate": 8.350150653018651e-07, + "loss": 0.6667, + "step": 19002 + }, + { + "epoch": 0.87, + "grad_norm": 0.5933598010889024, + "learning_rate": 8.344199423201194e-07, + "loss": 0.3277, + "step": 19003 + }, + { + "epoch": 0.87, + "grad_norm": 0.3640038421313204, + "learning_rate": 8.338250222554833e-07, + "loss": 0.2359, + "step": 19004 + }, + { + "epoch": 0.87, + "grad_norm": 0.391795445380038, + "learning_rate": 8.332303051211244e-07, + "loss": 0.2986, + "step": 19005 + }, + { + "epoch": 0.87, + "grad_norm": 0.9451842798290248, + "learning_rate": 8.326357909302141e-07, + "loss": 0.2717, + "step": 19006 + }, + { + "epoch": 0.87, + "grad_norm": 0.24797767512882987, + "learning_rate": 8.320414796959097e-07, + "loss": 0.1548, + "step": 19007 + }, + { + "epoch": 0.87, + "grad_norm": 1.5115961773945388, + "learning_rate": 8.31447371431372e-07, + "loss": 0.6177, + "step": 19008 + }, + { + "epoch": 0.87, + "grad_norm": 0.3026605604782784, + "learning_rate": 8.308534661497525e-07, + "loss": 0.2548, + "step": 19009 + }, + { + "epoch": 0.87, + "grad_norm": 0.35722717921630737, + "learning_rate": 8.302597638641996e-07, + "loss": 0.2369, + "step": 19010 + }, + { + "epoch": 0.87, + "grad_norm": 1.0598073565192736, + "learning_rate": 8.296662645878573e-07, + "loss": 0.4888, + "step": 19011 + }, + { + "epoch": 0.87, + "grad_norm": 0.6200732566681314, + "learning_rate": 8.290729683338649e-07, + "loss": 0.297, + "step": 19012 + }, + { + "epoch": 0.87, + "grad_norm": 0.2581691729174255, + "learning_rate": 8.2847987511536e-07, + "loss": 0.2309, + "step": 19013 + }, + { + "epoch": 0.87, + "grad_norm": 0.33419975573685173, + "learning_rate": 8.278869849454718e-07, + "loss": 0.202, + "step": 19014 + }, + { + "epoch": 0.87, + "grad_norm": 0.6462974547577311, + "learning_rate": 8.272942978373222e-07, + "loss": 0.3232, + "step": 19015 + }, + { + "epoch": 0.87, + "grad_norm": 0.3678025661468835, + "learning_rate": 8.267018138040372e-07, + "loss": 0.2827, + "step": 19016 + }, + { + "epoch": 0.87, + "grad_norm": 0.3920925839484375, + "learning_rate": 8.261095328587332e-07, + "loss": 0.2555, + "step": 19017 + }, + { + "epoch": 0.87, + "grad_norm": 0.9873372888059545, + "learning_rate": 8.255174550145229e-07, + "loss": 0.4414, + "step": 19018 + }, + { + "epoch": 0.87, + "grad_norm": 0.2738688936877376, + "learning_rate": 8.249255802845124e-07, + "loss": 0.2045, + "step": 19019 + }, + { + "epoch": 0.87, + "grad_norm": 0.35347059883707593, + "learning_rate": 8.243339086818059e-07, + "loss": 0.188, + "step": 19020 + }, + { + "epoch": 0.87, + "grad_norm": 0.4636443973610433, + "learning_rate": 8.23742440219506e-07, + "loss": 0.2911, + "step": 19021 + }, + { + "epoch": 0.87, + "grad_norm": 0.37960958301044795, + "learning_rate": 8.231511749107013e-07, + "loss": 0.3028, + "step": 19022 + }, + { + "epoch": 0.87, + "grad_norm": 0.9112937605775095, + "learning_rate": 8.225601127684867e-07, + "loss": 0.3699, + "step": 19023 + }, + { + "epoch": 0.87, + "grad_norm": 0.5030961517723759, + "learning_rate": 8.219692538059454e-07, + "loss": 0.2925, + "step": 19024 + }, + { + "epoch": 0.87, + "grad_norm": 0.30622602018718664, + "learning_rate": 8.213785980361577e-07, + "loss": 0.2637, + "step": 19025 + }, + { + "epoch": 0.87, + "grad_norm": 0.2952368102824688, + "learning_rate": 8.207881454722033e-07, + "loss": 0.1363, + "step": 19026 + }, + { + "epoch": 0.87, + "grad_norm": 0.6050218237378773, + "learning_rate": 8.201978961271506e-07, + "loss": 0.2908, + "step": 19027 + }, + { + "epoch": 0.87, + "grad_norm": 0.416794538914146, + "learning_rate": 8.196078500140703e-07, + "loss": 0.3089, + "step": 19028 + }, + { + "epoch": 0.87, + "grad_norm": 0.3752686152699415, + "learning_rate": 8.190180071460218e-07, + "loss": 0.2931, + "step": 19029 + }, + { + "epoch": 0.87, + "grad_norm": 0.2712976163975354, + "learning_rate": 8.184283675360683e-07, + "loss": 0.0679, + "step": 19030 + }, + { + "epoch": 0.87, + "grad_norm": 0.43859590879094684, + "learning_rate": 8.178389311972612e-07, + "loss": 0.2897, + "step": 19031 + }, + { + "epoch": 0.87, + "grad_norm": 0.40590827281764735, + "learning_rate": 8.172496981426492e-07, + "loss": 0.268, + "step": 19032 + }, + { + "epoch": 0.87, + "grad_norm": 0.2833302209088497, + "learning_rate": 8.166606683852784e-07, + "loss": 0.1813, + "step": 19033 + }, + { + "epoch": 0.87, + "grad_norm": 0.3514643407549719, + "learning_rate": 8.160718419381886e-07, + "loss": 0.2558, + "step": 19034 + }, + { + "epoch": 0.87, + "grad_norm": 1.4597336347787988, + "learning_rate": 8.154832188144191e-07, + "loss": 0.54, + "step": 19035 + }, + { + "epoch": 0.87, + "grad_norm": 0.3957739446574237, + "learning_rate": 8.148947990269973e-07, + "loss": 0.2269, + "step": 19036 + }, + { + "epoch": 0.87, + "grad_norm": 0.3325904970818053, + "learning_rate": 8.143065825889518e-07, + "loss": 0.2321, + "step": 19037 + }, + { + "epoch": 0.87, + "grad_norm": 0.6391397999000846, + "learning_rate": 8.137185695133076e-07, + "loss": 0.3442, + "step": 19038 + }, + { + "epoch": 0.87, + "grad_norm": 0.25491727382312285, + "learning_rate": 8.131307598130778e-07, + "loss": 0.1201, + "step": 19039 + }, + { + "epoch": 0.87, + "grad_norm": 0.3556588612588018, + "learning_rate": 8.125431535012807e-07, + "loss": 0.2827, + "step": 19040 + }, + { + "epoch": 0.87, + "grad_norm": 0.28030210751157425, + "learning_rate": 8.119557505909214e-07, + "loss": 0.2513, + "step": 19041 + }, + { + "epoch": 0.87, + "grad_norm": 1.2190992801998461, + "learning_rate": 8.113685510950054e-07, + "loss": 0.5076, + "step": 19042 + }, + { + "epoch": 0.87, + "grad_norm": 0.3416880376232932, + "learning_rate": 8.107815550265363e-07, + "loss": 0.2082, + "step": 19043 + }, + { + "epoch": 0.87, + "grad_norm": 0.47439585190525857, + "learning_rate": 8.101947623985051e-07, + "loss": 0.3297, + "step": 19044 + }, + { + "epoch": 0.87, + "grad_norm": 0.5334662355908194, + "learning_rate": 8.096081732239058e-07, + "loss": 0.3864, + "step": 19045 + }, + { + "epoch": 0.87, + "grad_norm": 0.300371633079931, + "learning_rate": 8.090217875157203e-07, + "loss": 0.1852, + "step": 19046 + }, + { + "epoch": 0.88, + "grad_norm": 0.2689298491021298, + "learning_rate": 8.08435605286938e-07, + "loss": 0.1825, + "step": 19047 + }, + { + "epoch": 0.88, + "grad_norm": 0.37326502490629226, + "learning_rate": 8.078496265505309e-07, + "loss": 0.2746, + "step": 19048 + }, + { + "epoch": 0.88, + "grad_norm": 0.35925652137268366, + "learning_rate": 8.072638513194752e-07, + "loss": 0.2097, + "step": 19049 + }, + { + "epoch": 0.88, + "grad_norm": 0.6736882157557524, + "learning_rate": 8.066782796067351e-07, + "loss": 0.3525, + "step": 19050 + }, + { + "epoch": 0.88, + "grad_norm": 0.9366757556141992, + "learning_rate": 8.06092911425278e-07, + "loss": 0.4013, + "step": 19051 + }, + { + "epoch": 0.88, + "grad_norm": 0.33630107910461077, + "learning_rate": 8.055077467880645e-07, + "loss": 0.2016, + "step": 19052 + }, + { + "epoch": 0.88, + "grad_norm": 0.25572830619763703, + "learning_rate": 8.049227857080455e-07, + "loss": 0.2175, + "step": 19053 + }, + { + "epoch": 0.88, + "grad_norm": 0.769528195731873, + "learning_rate": 8.043380281981739e-07, + "loss": 0.357, + "step": 19054 + }, + { + "epoch": 0.88, + "grad_norm": 0.3449461984693298, + "learning_rate": 8.03753474271397e-07, + "loss": 0.271, + "step": 19055 + }, + { + "epoch": 0.88, + "grad_norm": 0.3512712186541674, + "learning_rate": 8.031691239406536e-07, + "loss": 0.2402, + "step": 19056 + }, + { + "epoch": 0.88, + "grad_norm": 1.3853436884094832, + "learning_rate": 8.025849772188831e-07, + "loss": 0.5689, + "step": 19057 + }, + { + "epoch": 0.88, + "grad_norm": 0.35783712998611034, + "learning_rate": 8.020010341190154e-07, + "loss": 0.2815, + "step": 19058 + }, + { + "epoch": 0.88, + "grad_norm": 0.2390374312111081, + "learning_rate": 8.014172946539789e-07, + "loss": 0.0908, + "step": 19059 + }, + { + "epoch": 0.88, + "grad_norm": 0.38651309132575284, + "learning_rate": 8.008337588366999e-07, + "loss": 0.288, + "step": 19060 + }, + { + "epoch": 0.88, + "grad_norm": 0.3503191353526383, + "learning_rate": 8.002504266800937e-07, + "loss": 0.2696, + "step": 19061 + }, + { + "epoch": 0.88, + "grad_norm": 0.9987790710210278, + "learning_rate": 7.996672981970777e-07, + "loss": 0.2243, + "step": 19062 + }, + { + "epoch": 0.88, + "grad_norm": 1.1867048413839014, + "learning_rate": 7.99084373400556e-07, + "loss": 0.7392, + "step": 19063 + }, + { + "epoch": 0.88, + "grad_norm": 0.3513326562858509, + "learning_rate": 7.985016523034428e-07, + "loss": 0.2618, + "step": 19064 + }, + { + "epoch": 0.88, + "grad_norm": 0.2200918569227306, + "learning_rate": 7.979191349186322e-07, + "loss": 0.1663, + "step": 19065 + }, + { + "epoch": 0.88, + "grad_norm": 0.6783453206394917, + "learning_rate": 7.973368212590249e-07, + "loss": 0.3526, + "step": 19066 + }, + { + "epoch": 0.88, + "grad_norm": 0.4298862751791666, + "learning_rate": 7.967547113375096e-07, + "loss": 0.2798, + "step": 19067 + }, + { + "epoch": 0.88, + "grad_norm": 0.4282416692434011, + "learning_rate": 7.961728051669737e-07, + "loss": 0.3158, + "step": 19068 + }, + { + "epoch": 0.88, + "grad_norm": 0.424678224180368, + "learning_rate": 7.955911027603036e-07, + "loss": 0.2643, + "step": 19069 + }, + { + "epoch": 0.88, + "grad_norm": 0.3515285539289351, + "learning_rate": 7.950096041303734e-07, + "loss": 0.2379, + "step": 19070 + }, + { + "epoch": 0.88, + "grad_norm": 0.7662790755210265, + "learning_rate": 7.944283092900584e-07, + "loss": 0.4195, + "step": 19071 + }, + { + "epoch": 0.88, + "grad_norm": 0.3901816534392451, + "learning_rate": 7.938472182522305e-07, + "loss": 0.2518, + "step": 19072 + }, + { + "epoch": 0.88, + "grad_norm": 0.2593516000315393, + "learning_rate": 7.932663310297495e-07, + "loss": 0.2008, + "step": 19073 + }, + { + "epoch": 0.88, + "grad_norm": 0.6722491973801278, + "learning_rate": 7.926856476354805e-07, + "loss": 0.2264, + "step": 19074 + }, + { + "epoch": 0.88, + "grad_norm": 1.1161784194660844, + "learning_rate": 7.921051680822756e-07, + "loss": 0.5161, + "step": 19075 + }, + { + "epoch": 0.88, + "grad_norm": 0.30326251383523245, + "learning_rate": 7.915248923829877e-07, + "loss": 0.2346, + "step": 19076 + }, + { + "epoch": 0.88, + "grad_norm": 0.49811330525421477, + "learning_rate": 7.909448205504633e-07, + "loss": 0.3234, + "step": 19077 + }, + { + "epoch": 0.88, + "grad_norm": 0.3953999072122668, + "learning_rate": 7.903649525975465e-07, + "loss": 0.2044, + "step": 19078 + }, + { + "epoch": 0.88, + "grad_norm": 0.262267232759829, + "learning_rate": 7.897852885370727e-07, + "loss": 0.2048, + "step": 19079 + }, + { + "epoch": 0.88, + "grad_norm": 0.6713813033419213, + "learning_rate": 7.892058283818727e-07, + "loss": 0.3087, + "step": 19080 + }, + { + "epoch": 0.88, + "grad_norm": 0.5008968674257203, + "learning_rate": 7.886265721447816e-07, + "loss": 0.3197, + "step": 19081 + }, + { + "epoch": 0.88, + "grad_norm": 0.30133102887793933, + "learning_rate": 7.880475198386195e-07, + "loss": 0.1906, + "step": 19082 + }, + { + "epoch": 0.88, + "grad_norm": 0.6961966223697794, + "learning_rate": 7.874686714762069e-07, + "loss": 0.3919, + "step": 19083 + }, + { + "epoch": 0.88, + "grad_norm": 0.40254700995907683, + "learning_rate": 7.868900270703572e-07, + "loss": 0.2996, + "step": 19084 + }, + { + "epoch": 0.88, + "grad_norm": 0.26200285918769367, + "learning_rate": 7.863115866338833e-07, + "loss": 0.1305, + "step": 19085 + }, + { + "epoch": 0.88, + "grad_norm": 0.3750703519470904, + "learning_rate": 7.857333501795927e-07, + "loss": 0.2404, + "step": 19086 + }, + { + "epoch": 0.88, + "grad_norm": 0.46395877063394975, + "learning_rate": 7.85155317720282e-07, + "loss": 0.3159, + "step": 19087 + }, + { + "epoch": 0.88, + "grad_norm": 0.30236062210862735, + "learning_rate": 7.84577489268753e-07, + "loss": 0.2014, + "step": 19088 + }, + { + "epoch": 0.88, + "grad_norm": 0.5421736601551634, + "learning_rate": 7.839998648377956e-07, + "loss": 0.3032, + "step": 19089 + }, + { + "epoch": 0.88, + "grad_norm": 1.6894963417842135, + "learning_rate": 7.834224444401983e-07, + "loss": 0.6027, + "step": 19090 + }, + { + "epoch": 0.88, + "grad_norm": 0.2763263006832676, + "learning_rate": 7.828452280887466e-07, + "loss": 0.1637, + "step": 19091 + }, + { + "epoch": 0.88, + "grad_norm": 0.292350665131583, + "learning_rate": 7.822682157962159e-07, + "loss": 0.2323, + "step": 19092 + }, + { + "epoch": 0.88, + "grad_norm": 1.373045548217957, + "learning_rate": 7.816914075753834e-07, + "loss": 0.8395, + "step": 19093 + }, + { + "epoch": 0.88, + "grad_norm": 0.4083229408056449, + "learning_rate": 7.811148034390182e-07, + "loss": 0.2512, + "step": 19094 + }, + { + "epoch": 0.88, + "grad_norm": 0.5200554628936158, + "learning_rate": 7.805384033998875e-07, + "loss": 0.2578, + "step": 19095 + }, + { + "epoch": 0.88, + "grad_norm": 0.38494575216860555, + "learning_rate": 7.799622074707513e-07, + "loss": 0.2737, + "step": 19096 + }, + { + "epoch": 0.88, + "grad_norm": 0.32550826928639337, + "learning_rate": 7.793862156643617e-07, + "loss": 0.2042, + "step": 19097 + }, + { + "epoch": 0.88, + "grad_norm": 0.29811316451366265, + "learning_rate": 7.788104279934772e-07, + "loss": 0.1424, + "step": 19098 + }, + { + "epoch": 0.88, + "grad_norm": 0.4730283661181188, + "learning_rate": 7.782348444708409e-07, + "loss": 0.3404, + "step": 19099 + }, + { + "epoch": 0.88, + "grad_norm": 0.3330903895874114, + "learning_rate": 7.776594651091995e-07, + "loss": 0.2736, + "step": 19100 + }, + { + "epoch": 0.88, + "grad_norm": 0.491622494954163, + "learning_rate": 7.77084289921286e-07, + "loss": 0.256, + "step": 19101 + }, + { + "epoch": 0.88, + "grad_norm": 1.4193965905033123, + "learning_rate": 7.765093189198381e-07, + "loss": 0.5064, + "step": 19102 + }, + { + "epoch": 0.88, + "grad_norm": 0.31727248369951444, + "learning_rate": 7.759345521175854e-07, + "loss": 0.1464, + "step": 19103 + }, + { + "epoch": 0.88, + "grad_norm": 0.24008301562319506, + "learning_rate": 7.753599895272501e-07, + "loss": 0.2053, + "step": 19104 + }, + { + "epoch": 0.88, + "grad_norm": 0.6683857943026734, + "learning_rate": 7.747856311615554e-07, + "loss": 0.4289, + "step": 19105 + }, + { + "epoch": 0.88, + "grad_norm": 0.595364616127965, + "learning_rate": 7.742114770332132e-07, + "loss": 0.3227, + "step": 19106 + }, + { + "epoch": 0.88, + "grad_norm": 0.43414959253844804, + "learning_rate": 7.736375271549379e-07, + "loss": 0.31, + "step": 19107 + }, + { + "epoch": 0.88, + "grad_norm": 0.40293418441699713, + "learning_rate": 7.73063781539437e-07, + "loss": 0.234, + "step": 19108 + }, + { + "epoch": 0.88, + "grad_norm": 0.656618466884473, + "learning_rate": 7.724902401994084e-07, + "loss": 0.3287, + "step": 19109 + }, + { + "epoch": 0.88, + "grad_norm": 0.34254417746340204, + "learning_rate": 7.719169031475526e-07, + "loss": 0.2046, + "step": 19110 + }, + { + "epoch": 0.88, + "grad_norm": 0.47301847023541294, + "learning_rate": 7.713437703965621e-07, + "loss": 0.2615, + "step": 19111 + }, + { + "epoch": 0.88, + "grad_norm": 0.3570579264271897, + "learning_rate": 7.707708419591286e-07, + "loss": 0.2587, + "step": 19112 + }, + { + "epoch": 0.88, + "grad_norm": 0.5373763369602805, + "learning_rate": 7.701981178479312e-07, + "loss": 0.34, + "step": 19113 + }, + { + "epoch": 0.88, + "grad_norm": 1.9674568654069153, + "learning_rate": 7.696255980756506e-07, + "loss": 0.2194, + "step": 19114 + }, + { + "epoch": 0.88, + "grad_norm": 0.32195481722936126, + "learning_rate": 7.690532826549657e-07, + "loss": 0.2219, + "step": 19115 + }, + { + "epoch": 0.88, + "grad_norm": 0.32845690795284216, + "learning_rate": 7.684811715985429e-07, + "loss": 0.2865, + "step": 19116 + }, + { + "epoch": 0.88, + "grad_norm": 0.506812272437964, + "learning_rate": 7.6790926491905e-07, + "loss": 0.258, + "step": 19117 + }, + { + "epoch": 0.88, + "grad_norm": 0.35994821138178995, + "learning_rate": 7.673375626291468e-07, + "loss": 0.2171, + "step": 19118 + }, + { + "epoch": 0.88, + "grad_norm": 0.5741611775301507, + "learning_rate": 7.66766064741492e-07, + "loss": 0.3448, + "step": 19119 + }, + { + "epoch": 0.88, + "grad_norm": 0.40086705752628304, + "learning_rate": 7.661947712687389e-07, + "loss": 0.2922, + "step": 19120 + }, + { + "epoch": 0.88, + "grad_norm": 1.117984088311144, + "learning_rate": 7.656236822235318e-07, + "loss": 0.1306, + "step": 19121 + }, + { + "epoch": 0.88, + "grad_norm": 0.29075011889927244, + "learning_rate": 7.650527976185174e-07, + "loss": 0.219, + "step": 19122 + }, + { + "epoch": 0.88, + "grad_norm": 0.49223978356667186, + "learning_rate": 7.644821174663308e-07, + "loss": 0.3372, + "step": 19123 + }, + { + "epoch": 0.88, + "grad_norm": 0.3517379154038919, + "learning_rate": 7.639116417796122e-07, + "loss": 0.1986, + "step": 19124 + }, + { + "epoch": 0.88, + "grad_norm": 0.30887089658244643, + "learning_rate": 7.63341370570988e-07, + "loss": 0.224, + "step": 19125 + }, + { + "epoch": 0.88, + "grad_norm": 1.341481118710694, + "learning_rate": 7.627713038530815e-07, + "loss": 0.4765, + "step": 19126 + }, + { + "epoch": 0.88, + "grad_norm": 0.4436689636064535, + "learning_rate": 7.622014416385148e-07, + "loss": 0.1964, + "step": 19127 + }, + { + "epoch": 0.88, + "grad_norm": 0.26976445472079835, + "learning_rate": 7.616317839399057e-07, + "loss": 0.2256, + "step": 19128 + }, + { + "epoch": 0.88, + "grad_norm": 0.7121597900792968, + "learning_rate": 7.610623307698662e-07, + "loss": 0.3696, + "step": 19129 + }, + { + "epoch": 0.88, + "grad_norm": 0.7945609456718764, + "learning_rate": 7.604930821409995e-07, + "loss": 0.4382, + "step": 19130 + }, + { + "epoch": 0.88, + "grad_norm": 0.3089730277815874, + "learning_rate": 7.599240380659123e-07, + "loss": 0.164, + "step": 19131 + }, + { + "epoch": 0.88, + "grad_norm": 0.3758814774626434, + "learning_rate": 7.593551985572023e-07, + "loss": 0.2914, + "step": 19132 + }, + { + "epoch": 0.88, + "grad_norm": 0.6269428750560389, + "learning_rate": 7.587865636274594e-07, + "loss": 0.2706, + "step": 19133 + }, + { + "epoch": 0.88, + "grad_norm": 0.38462883468165826, + "learning_rate": 7.58218133289278e-07, + "loss": 0.232, + "step": 19134 + }, + { + "epoch": 0.88, + "grad_norm": 0.46555698105700166, + "learning_rate": 7.57649907555238e-07, + "loss": 0.3294, + "step": 19135 + }, + { + "epoch": 0.88, + "grad_norm": 0.39517994597928713, + "learning_rate": 7.570818864379203e-07, + "loss": 0.2622, + "step": 19136 + }, + { + "epoch": 0.88, + "grad_norm": 0.3052914867016372, + "learning_rate": 7.56514069949904e-07, + "loss": 0.1806, + "step": 19137 + }, + { + "epoch": 0.88, + "grad_norm": 0.6525958107606444, + "learning_rate": 7.559464581037546e-07, + "loss": 0.2122, + "step": 19138 + }, + { + "epoch": 0.88, + "grad_norm": 0.39448062940169276, + "learning_rate": 7.553790509120429e-07, + "loss": 0.2866, + "step": 19139 + }, + { + "epoch": 0.88, + "grad_norm": 0.31692685612009375, + "learning_rate": 7.548118483873257e-07, + "loss": 0.2167, + "step": 19140 + }, + { + "epoch": 0.88, + "grad_norm": 0.8733360849689117, + "learning_rate": 7.542448505421673e-07, + "loss": 0.5067, + "step": 19141 + }, + { + "epoch": 0.88, + "grad_norm": 1.5006315621657527, + "learning_rate": 7.536780573891144e-07, + "loss": 0.5767, + "step": 19142 + }, + { + "epoch": 0.88, + "grad_norm": 0.26424488135252666, + "learning_rate": 7.531114689407204e-07, + "loss": 0.2308, + "step": 19143 + }, + { + "epoch": 0.88, + "grad_norm": 0.2653388494462439, + "learning_rate": 7.525450852095229e-07, + "loss": 0.1864, + "step": 19144 + }, + { + "epoch": 0.88, + "grad_norm": 0.7011935675902602, + "learning_rate": 7.519789062080662e-07, + "loss": 0.2847, + "step": 19145 + }, + { + "epoch": 0.88, + "grad_norm": 0.37042884560913886, + "learning_rate": 7.514129319488839e-07, + "loss": 0.2707, + "step": 19146 + }, + { + "epoch": 0.88, + "grad_norm": 0.7069425804854719, + "learning_rate": 7.508471624445035e-07, + "loss": 0.2572, + "step": 19147 + }, + { + "epoch": 0.88, + "grad_norm": 0.43648140368588034, + "learning_rate": 7.50281597707454e-07, + "loss": 0.3182, + "step": 19148 + }, + { + "epoch": 0.88, + "grad_norm": 0.2865351862503098, + "learning_rate": 7.497162377502543e-07, + "loss": 0.2301, + "step": 19149 + }, + { + "epoch": 0.88, + "grad_norm": 0.49340578249555367, + "learning_rate": 7.491510825854198e-07, + "loss": 0.1372, + "step": 19150 + }, + { + "epoch": 0.88, + "grad_norm": 0.3367042040755629, + "learning_rate": 7.485861322254673e-07, + "loss": 0.2575, + "step": 19151 + }, + { + "epoch": 0.88, + "grad_norm": 0.38330447591012007, + "learning_rate": 7.480213866828989e-07, + "loss": 0.2554, + "step": 19152 + }, + { + "epoch": 0.88, + "grad_norm": 1.0667690114892183, + "learning_rate": 7.474568459702203e-07, + "loss": 0.2613, + "step": 19153 + }, + { + "epoch": 0.88, + "grad_norm": 0.5697743074330679, + "learning_rate": 7.468925100999314e-07, + "loss": 0.3186, + "step": 19154 + }, + { + "epoch": 0.88, + "grad_norm": 0.4389947799843306, + "learning_rate": 7.463283790845221e-07, + "loss": 0.3053, + "step": 19155 + }, + { + "epoch": 0.88, + "grad_norm": 0.237365384974626, + "learning_rate": 7.45764452936485e-07, + "loss": 0.2099, + "step": 19156 + }, + { + "epoch": 0.88, + "grad_norm": 0.7238410983707284, + "learning_rate": 7.452007316683007e-07, + "loss": 0.2882, + "step": 19157 + }, + { + "epoch": 0.88, + "grad_norm": 0.3427214504908176, + "learning_rate": 7.446372152924552e-07, + "loss": 0.2562, + "step": 19158 + }, + { + "epoch": 0.88, + "grad_norm": 0.4173291447341031, + "learning_rate": 7.440739038214195e-07, + "loss": 0.2863, + "step": 19159 + }, + { + "epoch": 0.88, + "grad_norm": 0.9628351843439291, + "learning_rate": 7.435107972676691e-07, + "loss": 0.5201, + "step": 19160 + }, + { + "epoch": 0.88, + "grad_norm": 0.33805056365634545, + "learning_rate": 7.429478956436653e-07, + "loss": 0.2429, + "step": 19161 + }, + { + "epoch": 0.88, + "grad_norm": 0.4819980918193347, + "learning_rate": 7.423851989618735e-07, + "loss": 0.193, + "step": 19162 + }, + { + "epoch": 0.88, + "grad_norm": 0.39664099158765115, + "learning_rate": 7.418227072347528e-07, + "loss": 0.2473, + "step": 19163 + }, + { + "epoch": 0.88, + "grad_norm": 0.387638212156763, + "learning_rate": 7.412604204747531e-07, + "loss": 0.2567, + "step": 19164 + }, + { + "epoch": 0.88, + "grad_norm": 1.651374651824193, + "learning_rate": 7.406983386943245e-07, + "loss": 0.6351, + "step": 19165 + }, + { + "epoch": 0.88, + "grad_norm": 0.5144010401186656, + "learning_rate": 7.401364619059093e-07, + "loss": 0.2868, + "step": 19166 + }, + { + "epoch": 0.88, + "grad_norm": 0.2696545386256489, + "learning_rate": 7.395747901219474e-07, + "loss": 0.2539, + "step": 19167 + }, + { + "epoch": 0.88, + "grad_norm": 0.9578600893319451, + "learning_rate": 7.390133233548768e-07, + "loss": 0.4334, + "step": 19168 + }, + { + "epoch": 0.88, + "grad_norm": 0.35420670559684964, + "learning_rate": 7.384520616171232e-07, + "loss": 0.2174, + "step": 19169 + }, + { + "epoch": 0.88, + "grad_norm": 0.31358738871384395, + "learning_rate": 7.378910049211152e-07, + "loss": 0.1814, + "step": 19170 + }, + { + "epoch": 0.88, + "grad_norm": 0.39855158148708747, + "learning_rate": 7.373301532792754e-07, + "loss": 0.2944, + "step": 19171 + }, + { + "epoch": 0.88, + "grad_norm": 1.2421401723655467, + "learning_rate": 7.367695067040159e-07, + "loss": 0.8148, + "step": 19172 + }, + { + "epoch": 0.88, + "grad_norm": 0.34370070717259343, + "learning_rate": 7.362090652077536e-07, + "loss": 0.1875, + "step": 19173 + }, + { + "epoch": 0.88, + "grad_norm": 0.9950254136008255, + "learning_rate": 7.356488288028907e-07, + "loss": 0.3521, + "step": 19174 + }, + { + "epoch": 0.88, + "grad_norm": 0.32937495265021244, + "learning_rate": 7.350887975018362e-07, + "loss": 0.2405, + "step": 19175 + }, + { + "epoch": 0.88, + "grad_norm": 0.23085615553001654, + "learning_rate": 7.345289713169856e-07, + "loss": 0.1515, + "step": 19176 + }, + { + "epoch": 0.88, + "grad_norm": 1.4666311198421587, + "learning_rate": 7.339693502607337e-07, + "loss": 0.6272, + "step": 19177 + }, + { + "epoch": 0.88, + "grad_norm": 1.4056212885044614, + "learning_rate": 7.334099343454692e-07, + "loss": 0.6426, + "step": 19178 + }, + { + "epoch": 0.88, + "grad_norm": 0.27892941341359917, + "learning_rate": 7.328507235835769e-07, + "loss": 0.203, + "step": 19179 + }, + { + "epoch": 0.88, + "grad_norm": 0.46047776713148203, + "learning_rate": 7.322917179874401e-07, + "loss": 0.3211, + "step": 19180 + }, + { + "epoch": 0.88, + "grad_norm": 0.2745424877795719, + "learning_rate": 7.317329175694299e-07, + "loss": 0.1669, + "step": 19181 + }, + { + "epoch": 0.88, + "grad_norm": 0.36522066395350244, + "learning_rate": 7.311743223419221e-07, + "loss": 0.2566, + "step": 19182 + }, + { + "epoch": 0.88, + "grad_norm": 0.36905851855195365, + "learning_rate": 7.306159323172801e-07, + "loss": 0.2219, + "step": 19183 + }, + { + "epoch": 0.88, + "grad_norm": 1.0051614637708495, + "learning_rate": 7.300577475078663e-07, + "loss": 0.4976, + "step": 19184 + }, + { + "epoch": 0.88, + "grad_norm": 0.3662537147358378, + "learning_rate": 7.294997679260418e-07, + "loss": 0.2646, + "step": 19185 + }, + { + "epoch": 0.88, + "grad_norm": 0.8873711417767954, + "learning_rate": 7.289419935841557e-07, + "loss": 0.278, + "step": 19186 + }, + { + "epoch": 0.88, + "grad_norm": 0.2774295308209739, + "learning_rate": 7.283844244945581e-07, + "loss": 0.241, + "step": 19187 + }, + { + "epoch": 0.88, + "grad_norm": 0.44803017485754965, + "learning_rate": 7.278270606695937e-07, + "loss": 0.2606, + "step": 19188 + }, + { + "epoch": 0.88, + "grad_norm": 0.32887153838662353, + "learning_rate": 7.272699021216034e-07, + "loss": 0.1788, + "step": 19189 + }, + { + "epoch": 0.88, + "grad_norm": 0.4550450680187026, + "learning_rate": 7.267129488629199e-07, + "loss": 0.2955, + "step": 19190 + }, + { + "epoch": 0.88, + "grad_norm": 0.4001647215177, + "learning_rate": 7.261562009058709e-07, + "loss": 0.2653, + "step": 19191 + }, + { + "epoch": 0.88, + "grad_norm": 0.39976034650706493, + "learning_rate": 7.255996582627878e-07, + "loss": 0.2528, + "step": 19192 + }, + { + "epoch": 0.88, + "grad_norm": 0.41842127896002995, + "learning_rate": 7.250433209459895e-07, + "loss": 0.16, + "step": 19193 + }, + { + "epoch": 0.88, + "grad_norm": 0.607069803871317, + "learning_rate": 7.244871889677929e-07, + "loss": 0.2473, + "step": 19194 + }, + { + "epoch": 0.88, + "grad_norm": 0.31951253536005747, + "learning_rate": 7.239312623405092e-07, + "loss": 0.2624, + "step": 19195 + }, + { + "epoch": 0.88, + "grad_norm": 0.767804889107416, + "learning_rate": 7.233755410764465e-07, + "loss": 0.3147, + "step": 19196 + }, + { + "epoch": 0.88, + "grad_norm": 0.3526254105148667, + "learning_rate": 7.228200251879102e-07, + "loss": 0.2664, + "step": 19197 + }, + { + "epoch": 0.88, + "grad_norm": 0.8594456190346461, + "learning_rate": 7.222647146871952e-07, + "loss": 0.4196, + "step": 19198 + }, + { + "epoch": 0.88, + "grad_norm": 0.38647757877160077, + "learning_rate": 7.217096095865995e-07, + "loss": 0.2292, + "step": 19199 + }, + { + "epoch": 0.88, + "grad_norm": 0.2576814779809863, + "learning_rate": 7.211547098984084e-07, + "loss": 0.203, + "step": 19200 + }, + { + "epoch": 0.88, + "grad_norm": 0.48300131056923046, + "learning_rate": 7.206000156349103e-07, + "loss": 0.2502, + "step": 19201 + }, + { + "epoch": 0.88, + "grad_norm": 0.42232358814305526, + "learning_rate": 7.20045526808384e-07, + "loss": 0.267, + "step": 19202 + }, + { + "epoch": 0.88, + "grad_norm": 0.2673786884096133, + "learning_rate": 7.194912434311052e-07, + "loss": 0.2327, + "step": 19203 + }, + { + "epoch": 0.88, + "grad_norm": 1.4068913733915827, + "learning_rate": 7.189371655153455e-07, + "loss": 0.6702, + "step": 19204 + }, + { + "epoch": 0.88, + "grad_norm": 0.5383284123558172, + "learning_rate": 7.183832930733714e-07, + "loss": 0.0973, + "step": 19205 + }, + { + "epoch": 0.88, + "grad_norm": 0.3076218016685364, + "learning_rate": 7.178296261174467e-07, + "loss": 0.1579, + "step": 19206 + }, + { + "epoch": 0.88, + "grad_norm": 0.27879660090390457, + "learning_rate": 7.17276164659827e-07, + "loss": 0.2565, + "step": 19207 + }, + { + "epoch": 0.88, + "grad_norm": 0.6366163414383302, + "learning_rate": 7.167229087127669e-07, + "loss": 0.366, + "step": 19208 + }, + { + "epoch": 0.88, + "grad_norm": 0.35744465355295635, + "learning_rate": 7.161698582885135e-07, + "loss": 0.1758, + "step": 19209 + }, + { + "epoch": 0.88, + "grad_norm": 0.5852466236818958, + "learning_rate": 7.156170133993112e-07, + "loss": 0.3306, + "step": 19210 + }, + { + "epoch": 0.88, + "grad_norm": 0.3895572733872164, + "learning_rate": 7.150643740574015e-07, + "loss": 0.3003, + "step": 19211 + }, + { + "epoch": 0.88, + "grad_norm": 0.27339503955572314, + "learning_rate": 7.145119402750167e-07, + "loss": 0.1307, + "step": 19212 + }, + { + "epoch": 0.88, + "grad_norm": 0.38054505213932543, + "learning_rate": 7.13959712064387e-07, + "loss": 0.2498, + "step": 19213 + }, + { + "epoch": 0.88, + "grad_norm": 0.7886480691958353, + "learning_rate": 7.134076894377407e-07, + "loss": 0.4153, + "step": 19214 + }, + { + "epoch": 0.88, + "grad_norm": 0.2597452948511349, + "learning_rate": 7.128558724072976e-07, + "loss": 0.2119, + "step": 19215 + }, + { + "epoch": 0.88, + "grad_norm": 0.7401159415146347, + "learning_rate": 7.123042609852748e-07, + "loss": 0.3857, + "step": 19216 + }, + { + "epoch": 0.88, + "grad_norm": 1.2879497620174907, + "learning_rate": 7.117528551838804e-07, + "loss": 0.4913, + "step": 19217 + }, + { + "epoch": 0.88, + "grad_norm": 0.22557738847132777, + "learning_rate": 7.1120165501533e-07, + "loss": 0.1511, + "step": 19218 + }, + { + "epoch": 0.88, + "grad_norm": 0.37139860122979707, + "learning_rate": 7.106506604918217e-07, + "loss": 0.2852, + "step": 19219 + }, + { + "epoch": 0.88, + "grad_norm": 0.6687166125061019, + "learning_rate": 7.100998716255536e-07, + "loss": 0.3651, + "step": 19220 + }, + { + "epoch": 0.88, + "grad_norm": 0.4159865348403842, + "learning_rate": 7.095492884287192e-07, + "loss": 0.2821, + "step": 19221 + }, + { + "epoch": 0.88, + "grad_norm": 0.2865968756505844, + "learning_rate": 7.089989109135109e-07, + "loss": 0.1349, + "step": 19222 + }, + { + "epoch": 0.88, + "grad_norm": 0.4241610673735297, + "learning_rate": 7.084487390921125e-07, + "loss": 0.2796, + "step": 19223 + }, + { + "epoch": 0.88, + "grad_norm": 0.5922881756689353, + "learning_rate": 7.078987729767028e-07, + "loss": 0.239, + "step": 19224 + }, + { + "epoch": 0.88, + "grad_norm": 0.40091204688977466, + "learning_rate": 7.073490125794591e-07, + "loss": 0.2471, + "step": 19225 + }, + { + "epoch": 0.88, + "grad_norm": 0.30835857867203303, + "learning_rate": 7.067994579125515e-07, + "loss": 0.2817, + "step": 19226 + }, + { + "epoch": 0.88, + "grad_norm": 0.8448069816073958, + "learning_rate": 7.062501089881458e-07, + "loss": 0.4297, + "step": 19227 + }, + { + "epoch": 0.88, + "grad_norm": 0.2823904993721932, + "learning_rate": 7.057009658184078e-07, + "loss": 0.1656, + "step": 19228 + }, + { + "epoch": 0.88, + "grad_norm": 1.7786339393312804, + "learning_rate": 7.051520284154911e-07, + "loss": 0.5132, + "step": 19229 + }, + { + "epoch": 0.88, + "grad_norm": 0.5486923571018723, + "learning_rate": 7.046032967915484e-07, + "loss": 0.2807, + "step": 19230 + }, + { + "epoch": 0.88, + "grad_norm": 0.27276447200254633, + "learning_rate": 7.040547709587331e-07, + "loss": 0.269, + "step": 19231 + }, + { + "epoch": 0.88, + "grad_norm": 1.0638934942008298, + "learning_rate": 7.035064509291833e-07, + "loss": 0.3118, + "step": 19232 + }, + { + "epoch": 0.88, + "grad_norm": 0.3076638423286795, + "learning_rate": 7.029583367150416e-07, + "loss": 0.1933, + "step": 19233 + }, + { + "epoch": 0.88, + "grad_norm": 0.34028037205160244, + "learning_rate": 7.024104283284394e-07, + "loss": 0.228, + "step": 19234 + }, + { + "epoch": 0.88, + "grad_norm": 0.3435567762262159, + "learning_rate": 7.018627257815113e-07, + "loss": 0.2148, + "step": 19235 + }, + { + "epoch": 0.88, + "grad_norm": 0.5297540285894589, + "learning_rate": 7.0131522908638e-07, + "loss": 0.3039, + "step": 19236 + }, + { + "epoch": 0.88, + "grad_norm": 0.4210898997043807, + "learning_rate": 7.007679382551691e-07, + "loss": 0.2801, + "step": 19237 + }, + { + "epoch": 0.88, + "grad_norm": 0.34473605633110976, + "learning_rate": 7.002208532999933e-07, + "loss": 0.2479, + "step": 19238 + }, + { + "epoch": 0.88, + "grad_norm": 0.41438206898807484, + "learning_rate": 6.996739742329606e-07, + "loss": 0.2883, + "step": 19239 + }, + { + "epoch": 0.88, + "grad_norm": 0.38184646594687427, + "learning_rate": 6.99127301066187e-07, + "loss": 0.2342, + "step": 19240 + }, + { + "epoch": 0.88, + "grad_norm": 0.4988983488168713, + "learning_rate": 6.985808338117673e-07, + "loss": 0.1964, + "step": 19241 + }, + { + "epoch": 0.88, + "grad_norm": 0.4503269553335103, + "learning_rate": 6.980345724818061e-07, + "loss": 0.3175, + "step": 19242 + }, + { + "epoch": 0.88, + "grad_norm": 0.3250307658892617, + "learning_rate": 6.974885170883916e-07, + "loss": 0.2619, + "step": 19243 + }, + { + "epoch": 0.88, + "grad_norm": 1.3439332542989475, + "learning_rate": 6.969426676436164e-07, + "loss": 0.676, + "step": 19244 + }, + { + "epoch": 0.88, + "grad_norm": 0.33792103847466226, + "learning_rate": 6.963970241595653e-07, + "loss": 0.1004, + "step": 19245 + }, + { + "epoch": 0.88, + "grad_norm": 0.2974932484397419, + "learning_rate": 6.958515866483151e-07, + "loss": 0.203, + "step": 19246 + }, + { + "epoch": 0.88, + "grad_norm": 0.38326174916754424, + "learning_rate": 6.95306355121943e-07, + "loss": 0.2854, + "step": 19247 + }, + { + "epoch": 0.88, + "grad_norm": 0.5652467202564389, + "learning_rate": 6.947613295925226e-07, + "loss": 0.2141, + "step": 19248 + }, + { + "epoch": 0.88, + "grad_norm": 0.518119486740566, + "learning_rate": 6.942165100721165e-07, + "loss": 0.2971, + "step": 19249 + }, + { + "epoch": 0.88, + "grad_norm": 1.4755982385474888, + "learning_rate": 6.936718965727884e-07, + "loss": 0.3463, + "step": 19250 + }, + { + "epoch": 0.88, + "grad_norm": 0.3470077405772606, + "learning_rate": 6.931274891065931e-07, + "loss": 0.1976, + "step": 19251 + }, + { + "epoch": 0.88, + "grad_norm": 0.2562906156583866, + "learning_rate": 6.925832876855876e-07, + "loss": 0.1906, + "step": 19252 + }, + { + "epoch": 0.88, + "grad_norm": 0.7807209972048277, + "learning_rate": 6.920392923218156e-07, + "loss": 0.3955, + "step": 19253 + }, + { + "epoch": 0.88, + "grad_norm": 0.3278206509188804, + "learning_rate": 6.914955030273251e-07, + "loss": 0.2133, + "step": 19254 + }, + { + "epoch": 0.88, + "grad_norm": 0.4536696196560434, + "learning_rate": 6.909519198141512e-07, + "loss": 0.2941, + "step": 19255 + }, + { + "epoch": 0.88, + "grad_norm": 1.1606015884512435, + "learning_rate": 6.904085426943275e-07, + "loss": 0.544, + "step": 19256 + }, + { + "epoch": 0.88, + "grad_norm": 0.5297817702240754, + "learning_rate": 6.898653716798887e-07, + "loss": 0.2283, + "step": 19257 + }, + { + "epoch": 0.88, + "grad_norm": 0.353494199275767, + "learning_rate": 6.893224067828552e-07, + "loss": 0.2115, + "step": 19258 + }, + { + "epoch": 0.88, + "grad_norm": 0.35156753718874845, + "learning_rate": 6.887796480152531e-07, + "loss": 0.3064, + "step": 19259 + }, + { + "epoch": 0.88, + "grad_norm": 0.32155043145552376, + "learning_rate": 6.882370953890927e-07, + "loss": 0.1678, + "step": 19260 + }, + { + "epoch": 0.88, + "grad_norm": 0.4134695919309794, + "learning_rate": 6.876947489163877e-07, + "loss": 0.2129, + "step": 19261 + }, + { + "epoch": 0.88, + "grad_norm": 0.3802575637156816, + "learning_rate": 6.871526086091473e-07, + "loss": 0.3363, + "step": 19262 + }, + { + "epoch": 0.88, + "grad_norm": 1.4299050991215738, + "learning_rate": 6.86610674479371e-07, + "loss": 0.5412, + "step": 19263 + }, + { + "epoch": 0.88, + "grad_norm": 0.31327250090320174, + "learning_rate": 6.860689465390591e-07, + "loss": 0.1975, + "step": 19264 + }, + { + "epoch": 0.89, + "grad_norm": 0.34505078454851085, + "learning_rate": 6.855274248002042e-07, + "loss": 0.2069, + "step": 19265 + }, + { + "epoch": 0.89, + "grad_norm": 0.3855991376548766, + "learning_rate": 6.849861092747934e-07, + "loss": 0.292, + "step": 19266 + }, + { + "epoch": 0.89, + "grad_norm": 0.2968159837478404, + "learning_rate": 6.844449999748137e-07, + "loss": 0.1831, + "step": 19267 + }, + { + "epoch": 0.89, + "grad_norm": 1.1974805473349812, + "learning_rate": 6.839040969122401e-07, + "loss": 0.7142, + "step": 19268 + }, + { + "epoch": 0.89, + "grad_norm": 0.7956044411792038, + "learning_rate": 6.833634000990541e-07, + "loss": 0.3191, + "step": 19269 + }, + { + "epoch": 0.89, + "grad_norm": 0.29852511430722, + "learning_rate": 6.828229095472217e-07, + "loss": 0.2676, + "step": 19270 + }, + { + "epoch": 0.89, + "grad_norm": 0.5373356124464185, + "learning_rate": 6.822826252687109e-07, + "loss": 0.2598, + "step": 19271 + }, + { + "epoch": 0.89, + "grad_norm": 0.23417208973854192, + "learning_rate": 6.817425472754813e-07, + "loss": 0.1447, + "step": 19272 + }, + { + "epoch": 0.89, + "grad_norm": 0.36149216458784794, + "learning_rate": 6.812026755794899e-07, + "loss": 0.2572, + "step": 19273 + }, + { + "epoch": 0.89, + "grad_norm": 0.3560949309202671, + "learning_rate": 6.806630101926926e-07, + "loss": 0.258, + "step": 19274 + }, + { + "epoch": 0.89, + "grad_norm": 0.7172271712062704, + "learning_rate": 6.80123551127031e-07, + "loss": 0.3788, + "step": 19275 + }, + { + "epoch": 0.89, + "grad_norm": 0.35134700708120375, + "learning_rate": 6.795842983944545e-07, + "loss": 0.2484, + "step": 19276 + }, + { + "epoch": 0.89, + "grad_norm": 0.5418956978905485, + "learning_rate": 6.790452520068957e-07, + "loss": 0.1753, + "step": 19277 + }, + { + "epoch": 0.89, + "grad_norm": 0.2971629890691787, + "learning_rate": 6.785064119762919e-07, + "loss": 0.2224, + "step": 19278 + }, + { + "epoch": 0.89, + "grad_norm": 0.3913571097387703, + "learning_rate": 6.779677783145732e-07, + "loss": 0.271, + "step": 19279 + }, + { + "epoch": 0.89, + "grad_norm": 1.8469547938644046, + "learning_rate": 6.774293510336615e-07, + "loss": 0.3485, + "step": 19280 + }, + { + "epoch": 0.89, + "grad_norm": 0.7234212342784379, + "learning_rate": 6.768911301454794e-07, + "loss": 0.3479, + "step": 19281 + }, + { + "epoch": 0.89, + "grad_norm": 0.2872656267700285, + "learning_rate": 6.763531156619418e-07, + "loss": 0.2371, + "step": 19282 + }, + { + "epoch": 0.89, + "grad_norm": 0.5619362059418193, + "learning_rate": 6.758153075949613e-07, + "loss": 0.3137, + "step": 19283 + }, + { + "epoch": 0.89, + "grad_norm": 0.24475089825449406, + "learning_rate": 6.752777059564431e-07, + "loss": 0.082, + "step": 19284 + }, + { + "epoch": 0.89, + "grad_norm": 0.33058254850905905, + "learning_rate": 6.747403107582884e-07, + "loss": 0.2497, + "step": 19285 + }, + { + "epoch": 0.89, + "grad_norm": 0.34372700418444846, + "learning_rate": 6.742031220123946e-07, + "loss": 0.2977, + "step": 19286 + }, + { + "epoch": 0.89, + "grad_norm": 0.6762834132783561, + "learning_rate": 6.736661397306554e-07, + "loss": 0.3026, + "step": 19287 + }, + { + "epoch": 0.89, + "grad_norm": 0.35300511203847806, + "learning_rate": 6.731293639249604e-07, + "loss": 0.2597, + "step": 19288 + }, + { + "epoch": 0.89, + "grad_norm": 1.294282014224038, + "learning_rate": 6.725927946071908e-07, + "loss": 0.4886, + "step": 19289 + }, + { + "epoch": 0.89, + "grad_norm": 0.2071939987812276, + "learning_rate": 6.720564317892275e-07, + "loss": 0.1682, + "step": 19290 + }, + { + "epoch": 0.89, + "grad_norm": 0.38599507229309754, + "learning_rate": 6.715202754829453e-07, + "loss": 0.28, + "step": 19291 + }, + { + "epoch": 0.89, + "grad_norm": 0.9158237135644358, + "learning_rate": 6.709843257002113e-07, + "loss": 0.4875, + "step": 19292 + }, + { + "epoch": 0.89, + "grad_norm": 0.41436712473059667, + "learning_rate": 6.70448582452895e-07, + "loss": 0.2551, + "step": 19293 + }, + { + "epoch": 0.89, + "grad_norm": 0.3720876754585612, + "learning_rate": 6.699130457528535e-07, + "loss": 0.2765, + "step": 19294 + }, + { + "epoch": 0.89, + "grad_norm": 0.5220026134805323, + "learning_rate": 6.693777156119441e-07, + "loss": 0.3011, + "step": 19295 + }, + { + "epoch": 0.89, + "grad_norm": 0.2976415256316706, + "learning_rate": 6.688425920420216e-07, + "loss": 0.1703, + "step": 19296 + }, + { + "epoch": 0.89, + "grad_norm": 0.31437248131197465, + "learning_rate": 6.683076750549288e-07, + "loss": 0.1831, + "step": 19297 + }, + { + "epoch": 0.89, + "grad_norm": 0.34576507846748544, + "learning_rate": 6.677729646625097e-07, + "loss": 0.2907, + "step": 19298 + }, + { + "epoch": 0.89, + "grad_norm": 0.6383618609597203, + "learning_rate": 6.672384608766025e-07, + "loss": 0.3769, + "step": 19299 + }, + { + "epoch": 0.89, + "grad_norm": 0.32125736650602976, + "learning_rate": 6.667041637090432e-07, + "loss": 0.1962, + "step": 19300 + }, + { + "epoch": 0.89, + "grad_norm": 1.4590306584945751, + "learning_rate": 6.661700731716558e-07, + "loss": 0.4416, + "step": 19301 + }, + { + "epoch": 0.89, + "grad_norm": 0.39300259439950214, + "learning_rate": 6.656361892762686e-07, + "loss": 0.3239, + "step": 19302 + }, + { + "epoch": 0.89, + "grad_norm": 0.2403431488918557, + "learning_rate": 6.651025120346988e-07, + "loss": 0.1607, + "step": 19303 + }, + { + "epoch": 0.89, + "grad_norm": 0.7266648657689263, + "learning_rate": 6.645690414587613e-07, + "loss": 0.3765, + "step": 19304 + }, + { + "epoch": 0.89, + "grad_norm": 0.5240077198371482, + "learning_rate": 6.640357775602701e-07, + "loss": 0.3791, + "step": 19305 + }, + { + "epoch": 0.89, + "grad_norm": 0.22311643999491565, + "learning_rate": 6.635027203510258e-07, + "loss": 0.1758, + "step": 19306 + }, + { + "epoch": 0.89, + "grad_norm": 1.7238022972978235, + "learning_rate": 6.629698698428333e-07, + "loss": 0.6118, + "step": 19307 + }, + { + "epoch": 0.89, + "grad_norm": 0.4614443227540382, + "learning_rate": 6.62437226047491e-07, + "loss": 0.2642, + "step": 19308 + }, + { + "epoch": 0.89, + "grad_norm": 0.3362431270685603, + "learning_rate": 6.619047889767871e-07, + "loss": 0.2429, + "step": 19309 + }, + { + "epoch": 0.89, + "grad_norm": 0.35626178826564253, + "learning_rate": 6.613725586425112e-07, + "loss": 0.2497, + "step": 19310 + }, + { + "epoch": 0.89, + "grad_norm": 0.44925093120466697, + "learning_rate": 6.608405350564451e-07, + "loss": 0.2624, + "step": 19311 + }, + { + "epoch": 0.89, + "grad_norm": 0.42915532983785387, + "learning_rate": 6.603087182303702e-07, + "loss": 0.2924, + "step": 19312 + }, + { + "epoch": 0.89, + "grad_norm": 0.7596291624774706, + "learning_rate": 6.597771081760584e-07, + "loss": 0.2097, + "step": 19313 + }, + { + "epoch": 0.89, + "grad_norm": 0.4377995005949378, + "learning_rate": 6.592457049052781e-07, + "loss": 0.3171, + "step": 19314 + }, + { + "epoch": 0.89, + "grad_norm": 0.403123035577979, + "learning_rate": 6.587145084297963e-07, + "loss": 0.2293, + "step": 19315 + }, + { + "epoch": 0.89, + "grad_norm": 0.47948769869408847, + "learning_rate": 6.581835187613695e-07, + "loss": 0.2674, + "step": 19316 + }, + { + "epoch": 0.89, + "grad_norm": 0.40265252583005723, + "learning_rate": 6.57652735911759e-07, + "loss": 0.2446, + "step": 19317 + }, + { + "epoch": 0.89, + "grad_norm": 0.27738093271911946, + "learning_rate": 6.571221598927102e-07, + "loss": 0.2526, + "step": 19318 + }, + { + "epoch": 0.89, + "grad_norm": 0.5152921785521215, + "learning_rate": 6.565917907159747e-07, + "loss": 0.1681, + "step": 19319 + }, + { + "epoch": 0.89, + "grad_norm": 0.953059922589591, + "learning_rate": 6.560616283932897e-07, + "loss": 0.4495, + "step": 19320 + }, + { + "epoch": 0.89, + "grad_norm": 0.4248977661221667, + "learning_rate": 6.555316729363937e-07, + "loss": 0.2725, + "step": 19321 + }, + { + "epoch": 0.89, + "grad_norm": 0.31371014057055197, + "learning_rate": 6.550019243570227e-07, + "loss": 0.2939, + "step": 19322 + }, + { + "epoch": 0.89, + "grad_norm": 0.2703229457972463, + "learning_rate": 6.544723826668998e-07, + "loss": 0.168, + "step": 19323 + }, + { + "epoch": 0.89, + "grad_norm": 0.2570232491692117, + "learning_rate": 6.53943047877752e-07, + "loss": 0.2044, + "step": 19324 + }, + { + "epoch": 0.89, + "grad_norm": 1.6515158642725487, + "learning_rate": 6.534139200012979e-07, + "loss": 0.5292, + "step": 19325 + }, + { + "epoch": 0.89, + "grad_norm": 0.37650659054123, + "learning_rate": 6.528849990492503e-07, + "loss": 0.2337, + "step": 19326 + }, + { + "epoch": 0.89, + "grad_norm": 0.3336105096393883, + "learning_rate": 6.523562850333221e-07, + "loss": 0.2403, + "step": 19327 + }, + { + "epoch": 0.89, + "grad_norm": 0.9125595063515183, + "learning_rate": 6.518277779652115e-07, + "loss": 0.5061, + "step": 19328 + }, + { + "epoch": 0.89, + "grad_norm": 0.3315793184602384, + "learning_rate": 6.512994778566284e-07, + "loss": 0.252, + "step": 19329 + }, + { + "epoch": 0.89, + "grad_norm": 0.3375279406468978, + "learning_rate": 6.507713847192643e-07, + "loss": 0.1987, + "step": 19330 + }, + { + "epoch": 0.89, + "grad_norm": 0.3146508357843586, + "learning_rate": 6.502434985648098e-07, + "loss": 0.24, + "step": 19331 + }, + { + "epoch": 0.89, + "grad_norm": 0.6461567353632968, + "learning_rate": 6.497158194049535e-07, + "loss": 0.278, + "step": 19332 + }, + { + "epoch": 0.89, + "grad_norm": 0.40320234901610724, + "learning_rate": 6.491883472513738e-07, + "loss": 0.2393, + "step": 19333 + }, + { + "epoch": 0.89, + "grad_norm": 0.3362351329666341, + "learning_rate": 6.486610821157557e-07, + "loss": 0.2852, + "step": 19334 + }, + { + "epoch": 0.89, + "grad_norm": 1.1789486724509293, + "learning_rate": 6.481340240097655e-07, + "loss": 0.6892, + "step": 19335 + }, + { + "epoch": 0.89, + "grad_norm": 0.2991461621643354, + "learning_rate": 6.476071729450772e-07, + "loss": 0.1375, + "step": 19336 + }, + { + "epoch": 0.89, + "grad_norm": 0.31554517965054824, + "learning_rate": 6.470805289333504e-07, + "loss": 0.2455, + "step": 19337 + }, + { + "epoch": 0.89, + "grad_norm": 0.4614306843287431, + "learning_rate": 6.465540919862457e-07, + "loss": 0.318, + "step": 19338 + }, + { + "epoch": 0.89, + "grad_norm": 0.4016540047400901, + "learning_rate": 6.460278621154203e-07, + "loss": 0.2016, + "step": 19339 + }, + { + "epoch": 0.89, + "grad_norm": 0.6110033753888543, + "learning_rate": 6.455018393325218e-07, + "loss": 0.3029, + "step": 19340 + }, + { + "epoch": 0.89, + "grad_norm": 0.5265732284067997, + "learning_rate": 6.449760236491953e-07, + "loss": 0.3752, + "step": 19341 + }, + { + "epoch": 0.89, + "grad_norm": 0.23708505985340458, + "learning_rate": 6.444504150770859e-07, + "loss": 0.1559, + "step": 19342 + }, + { + "epoch": 0.89, + "grad_norm": 0.3719736188749245, + "learning_rate": 6.439250136278253e-07, + "loss": 0.2355, + "step": 19343 + }, + { + "epoch": 0.89, + "grad_norm": 0.8177555115930268, + "learning_rate": 6.433998193130486e-07, + "loss": 0.3757, + "step": 19344 + }, + { + "epoch": 0.89, + "grad_norm": 0.3545133310917489, + "learning_rate": 6.4287483214438e-07, + "loss": 0.1831, + "step": 19345 + }, + { + "epoch": 0.89, + "grad_norm": 0.3559362173148364, + "learning_rate": 6.423500521334447e-07, + "loss": 0.2911, + "step": 19346 + }, + { + "epoch": 0.89, + "grad_norm": 1.483148668584176, + "learning_rate": 6.418254792918598e-07, + "loss": 0.5996, + "step": 19347 + }, + { + "epoch": 0.89, + "grad_norm": 0.43766951455509023, + "learning_rate": 6.413011136312419e-07, + "loss": 0.1943, + "step": 19348 + }, + { + "epoch": 0.89, + "grad_norm": 0.25030754781779846, + "learning_rate": 6.40776955163196e-07, + "loss": 0.1977, + "step": 19349 + }, + { + "epoch": 0.89, + "grad_norm": 0.4393473843311356, + "learning_rate": 6.402530038993249e-07, + "loss": 0.3295, + "step": 19350 + }, + { + "epoch": 0.89, + "grad_norm": 0.9106636249087902, + "learning_rate": 6.39729259851235e-07, + "loss": 0.4535, + "step": 19351 + }, + { + "epoch": 0.89, + "grad_norm": 0.3662780068001187, + "learning_rate": 6.39205723030516e-07, + "loss": 0.2119, + "step": 19352 + }, + { + "epoch": 0.89, + "grad_norm": 0.35018605809127135, + "learning_rate": 6.386823934487619e-07, + "loss": 0.2952, + "step": 19353 + }, + { + "epoch": 0.89, + "grad_norm": 0.59779877839737, + "learning_rate": 6.381592711175555e-07, + "loss": 0.2667, + "step": 19354 + }, + { + "epoch": 0.89, + "grad_norm": 0.2917772872243583, + "learning_rate": 6.376363560484789e-07, + "loss": 0.2056, + "step": 19355 + }, + { + "epoch": 0.89, + "grad_norm": 0.5095208133445415, + "learning_rate": 6.371136482531126e-07, + "loss": 0.247, + "step": 19356 + }, + { + "epoch": 0.89, + "grad_norm": 0.4573497922283493, + "learning_rate": 6.365911477430242e-07, + "loss": 0.2668, + "step": 19357 + }, + { + "epoch": 0.89, + "grad_norm": 0.327349984635034, + "learning_rate": 6.360688545297822e-07, + "loss": 0.2675, + "step": 19358 + }, + { + "epoch": 0.89, + "grad_norm": 1.7068460200662563, + "learning_rate": 6.355467686249528e-07, + "loss": 0.3789, + "step": 19359 + }, + { + "epoch": 0.89, + "grad_norm": 0.5624429786579328, + "learning_rate": 6.350248900400913e-07, + "loss": 0.3135, + "step": 19360 + }, + { + "epoch": 0.89, + "grad_norm": 0.40307942142253794, + "learning_rate": 6.345032187867539e-07, + "loss": 0.2946, + "step": 19361 + }, + { + "epoch": 0.89, + "grad_norm": 0.2378402495747571, + "learning_rate": 6.33981754876487e-07, + "loss": 0.1818, + "step": 19362 + }, + { + "epoch": 0.89, + "grad_norm": 0.6318358701936131, + "learning_rate": 6.33460498320837e-07, + "loss": 0.3239, + "step": 19363 + }, + { + "epoch": 0.89, + "grad_norm": 0.450940398282647, + "learning_rate": 6.329394491313445e-07, + "loss": 0.3545, + "step": 19364 + }, + { + "epoch": 0.89, + "grad_norm": 0.33735041889798073, + "learning_rate": 6.32418607319546e-07, + "loss": 0.2615, + "step": 19365 + }, + { + "epoch": 0.89, + "grad_norm": 0.7524162396387354, + "learning_rate": 6.318979728969687e-07, + "loss": 0.3209, + "step": 19366 + }, + { + "epoch": 0.89, + "grad_norm": 0.37244493696922476, + "learning_rate": 6.313775458751415e-07, + "loss": 0.2699, + "step": 19367 + }, + { + "epoch": 0.89, + "grad_norm": 0.43691738446713946, + "learning_rate": 6.30857326265587e-07, + "loss": 0.1348, + "step": 19368 + }, + { + "epoch": 0.89, + "grad_norm": 0.34571823744855684, + "learning_rate": 6.303373140798197e-07, + "loss": 0.2688, + "step": 19369 + }, + { + "epoch": 0.89, + "grad_norm": 0.37849016289173865, + "learning_rate": 6.298175093293557e-07, + "loss": 0.3017, + "step": 19370 + }, + { + "epoch": 0.89, + "grad_norm": 1.1541719297063717, + "learning_rate": 6.292979120256992e-07, + "loss": 0.4704, + "step": 19371 + }, + { + "epoch": 0.89, + "grad_norm": 0.6122205296979082, + "learning_rate": 6.287785221803555e-07, + "loss": 0.2616, + "step": 19372 + }, + { + "epoch": 0.89, + "grad_norm": 0.31170435388489137, + "learning_rate": 6.282593398048254e-07, + "loss": 0.2557, + "step": 19373 + }, + { + "epoch": 0.89, + "grad_norm": 0.5372987932338423, + "learning_rate": 6.277403649105985e-07, + "loss": 0.3492, + "step": 19374 + }, + { + "epoch": 0.89, + "grad_norm": 0.2861833641465051, + "learning_rate": 6.272215975091678e-07, + "loss": 0.0901, + "step": 19375 + }, + { + "epoch": 0.89, + "grad_norm": 0.407529043158273, + "learning_rate": 6.267030376120154e-07, + "loss": 0.341, + "step": 19376 + }, + { + "epoch": 0.89, + "grad_norm": 0.35819377738234265, + "learning_rate": 6.261846852306264e-07, + "loss": 0.2999, + "step": 19377 + }, + { + "epoch": 0.89, + "grad_norm": 0.49980554869766025, + "learning_rate": 6.256665403764739e-07, + "loss": 0.2489, + "step": 19378 + }, + { + "epoch": 0.89, + "grad_norm": 0.43380630071232396, + "learning_rate": 6.251486030610266e-07, + "loss": 0.2665, + "step": 19379 + }, + { + "epoch": 0.89, + "grad_norm": 0.49537820329052495, + "learning_rate": 6.246308732957551e-07, + "loss": 0.1923, + "step": 19380 + }, + { + "epoch": 0.89, + "grad_norm": 0.2496297213480803, + "learning_rate": 6.241133510921193e-07, + "loss": 0.1631, + "step": 19381 + }, + { + "epoch": 0.89, + "grad_norm": 0.44377976046645234, + "learning_rate": 6.235960364615779e-07, + "loss": 0.3154, + "step": 19382 + }, + { + "epoch": 0.89, + "grad_norm": 0.789491206584543, + "learning_rate": 6.230789294155826e-07, + "loss": 0.4228, + "step": 19383 + }, + { + "epoch": 0.89, + "grad_norm": 0.780913989629484, + "learning_rate": 6.225620299655821e-07, + "loss": 0.3835, + "step": 19384 + }, + { + "epoch": 0.89, + "grad_norm": 0.26451024679968543, + "learning_rate": 6.220453381230219e-07, + "loss": 0.1983, + "step": 19385 + }, + { + "epoch": 0.89, + "grad_norm": 0.3824376216333412, + "learning_rate": 6.21528853899338e-07, + "loss": 0.2814, + "step": 19386 + }, + { + "epoch": 0.89, + "grad_norm": 0.47654661551089184, + "learning_rate": 6.210125773059672e-07, + "loss": 0.262, + "step": 19387 + }, + { + "epoch": 0.89, + "grad_norm": 0.31173545206745584, + "learning_rate": 6.204965083543368e-07, + "loss": 0.2012, + "step": 19388 + }, + { + "epoch": 0.89, + "grad_norm": 0.3455479855856143, + "learning_rate": 6.199806470558744e-07, + "loss": 0.2842, + "step": 19389 + }, + { + "epoch": 0.89, + "grad_norm": 0.7721047252262552, + "learning_rate": 6.194649934220009e-07, + "loss": 0.4057, + "step": 19390 + }, + { + "epoch": 0.89, + "grad_norm": 0.3591928303074338, + "learning_rate": 6.189495474641293e-07, + "loss": 0.1845, + "step": 19391 + }, + { + "epoch": 0.89, + "grad_norm": 1.3833431954435864, + "learning_rate": 6.184343091936751e-07, + "loss": 0.4509, + "step": 19392 + }, + { + "epoch": 0.89, + "grad_norm": 0.25073552101392166, + "learning_rate": 6.1791927862204e-07, + "loss": 0.2202, + "step": 19393 + }, + { + "epoch": 0.89, + "grad_norm": 0.3277288603599387, + "learning_rate": 6.174044557606329e-07, + "loss": 0.2025, + "step": 19394 + }, + { + "epoch": 0.89, + "grad_norm": 0.717750845206327, + "learning_rate": 6.168898406208479e-07, + "loss": 0.3645, + "step": 19395 + }, + { + "epoch": 0.89, + "grad_norm": 0.8804302489269706, + "learning_rate": 6.16375433214077e-07, + "loss": 0.5006, + "step": 19396 + }, + { + "epoch": 0.89, + "grad_norm": 0.30523221088621844, + "learning_rate": 6.1586123355171e-07, + "loss": 0.2532, + "step": 19397 + }, + { + "epoch": 0.89, + "grad_norm": 0.6972045812168728, + "learning_rate": 6.153472416451301e-07, + "loss": 0.2386, + "step": 19398 + }, + { + "epoch": 0.89, + "grad_norm": 0.4821174894118999, + "learning_rate": 6.148334575057191e-07, + "loss": 0.2678, + "step": 19399 + }, + { + "epoch": 0.89, + "grad_norm": 0.35569729390183025, + "learning_rate": 6.14319881144848e-07, + "loss": 0.2592, + "step": 19400 + }, + { + "epoch": 0.89, + "grad_norm": 0.34377455265383444, + "learning_rate": 6.138065125738901e-07, + "loss": 0.2494, + "step": 19401 + }, + { + "epoch": 0.89, + "grad_norm": 0.4446249125903841, + "learning_rate": 6.132933518042094e-07, + "loss": 0.2829, + "step": 19402 + }, + { + "epoch": 0.89, + "grad_norm": 0.3735080765879937, + "learning_rate": 6.127803988471659e-07, + "loss": 0.2721, + "step": 19403 + }, + { + "epoch": 0.89, + "grad_norm": 0.7032681274098433, + "learning_rate": 6.122676537141182e-07, + "loss": 0.248, + "step": 19404 + }, + { + "epoch": 0.89, + "grad_norm": 0.36336825146632873, + "learning_rate": 6.117551164164159e-07, + "loss": 0.2751, + "step": 19405 + }, + { + "epoch": 0.89, + "grad_norm": 0.3379826316242414, + "learning_rate": 6.112427869654059e-07, + "loss": 0.2467, + "step": 19406 + }, + { + "epoch": 0.89, + "grad_norm": 0.7612297529412821, + "learning_rate": 6.107306653724332e-07, + "loss": 0.3003, + "step": 19407 + }, + { + "epoch": 0.89, + "grad_norm": 0.43851991376851945, + "learning_rate": 6.102187516488323e-07, + "loss": 0.2488, + "step": 19408 + }, + { + "epoch": 0.89, + "grad_norm": 0.2861592469040228, + "learning_rate": 6.097070458059406e-07, + "loss": 0.2417, + "step": 19409 + }, + { + "epoch": 0.89, + "grad_norm": 1.4134174752803659, + "learning_rate": 6.091955478550815e-07, + "loss": 0.4973, + "step": 19410 + }, + { + "epoch": 0.89, + "grad_norm": 0.6930038615247549, + "learning_rate": 6.086842578075835e-07, + "loss": 0.2633, + "step": 19411 + }, + { + "epoch": 0.89, + "grad_norm": 0.35406910311529566, + "learning_rate": 6.081731756747644e-07, + "loss": 0.2691, + "step": 19412 + }, + { + "epoch": 0.89, + "grad_norm": 0.36335280781815466, + "learning_rate": 6.076623014679406e-07, + "loss": 0.2958, + "step": 19413 + }, + { + "epoch": 0.89, + "grad_norm": 0.3194835933180014, + "learning_rate": 6.071516351984197e-07, + "loss": 0.1162, + "step": 19414 + }, + { + "epoch": 0.89, + "grad_norm": 0.4323786296577063, + "learning_rate": 6.066411768775083e-07, + "loss": 0.2579, + "step": 19415 + }, + { + "epoch": 0.89, + "grad_norm": 0.6848942729070734, + "learning_rate": 6.061309265165094e-07, + "loss": 0.3316, + "step": 19416 + }, + { + "epoch": 0.89, + "grad_norm": 0.35496582409054395, + "learning_rate": 6.056208841267153e-07, + "loss": 0.2356, + "step": 19417 + }, + { + "epoch": 0.89, + "grad_norm": 0.3822526091803811, + "learning_rate": 6.051110497194213e-07, + "loss": 0.2684, + "step": 19418 + }, + { + "epoch": 0.89, + "grad_norm": 0.7161795921314614, + "learning_rate": 6.046014233059161e-07, + "loss": 0.4655, + "step": 19419 + }, + { + "epoch": 0.89, + "grad_norm": 0.21624760364641082, + "learning_rate": 6.040920048974774e-07, + "loss": 0.1651, + "step": 19420 + }, + { + "epoch": 0.89, + "grad_norm": 0.352422385438307, + "learning_rate": 6.035827945053874e-07, + "loss": 0.2496, + "step": 19421 + }, + { + "epoch": 0.89, + "grad_norm": 1.4680557023536365, + "learning_rate": 6.030737921409169e-07, + "loss": 0.4356, + "step": 19422 + }, + { + "epoch": 0.89, + "grad_norm": 0.8506876538445198, + "learning_rate": 6.025649978153358e-07, + "loss": 0.3345, + "step": 19423 + }, + { + "epoch": 0.89, + "grad_norm": 0.36522173005242015, + "learning_rate": 6.020564115399085e-07, + "loss": 0.2078, + "step": 19424 + }, + { + "epoch": 0.89, + "grad_norm": 0.38610344761970394, + "learning_rate": 6.015480333258949e-07, + "loss": 0.3142, + "step": 19425 + }, + { + "epoch": 0.89, + "grad_norm": 0.4557996304800295, + "learning_rate": 6.010398631845493e-07, + "loss": 0.2487, + "step": 19426 + }, + { + "epoch": 0.89, + "grad_norm": 0.2901577459956041, + "learning_rate": 6.005319011271205e-07, + "loss": 0.161, + "step": 19427 + }, + { + "epoch": 0.89, + "grad_norm": 1.039855760616618, + "learning_rate": 6.000241471648582e-07, + "loss": 0.4405, + "step": 19428 + }, + { + "epoch": 0.89, + "grad_norm": 0.3615874166830275, + "learning_rate": 5.995166013090004e-07, + "loss": 0.3021, + "step": 19429 + }, + { + "epoch": 0.89, + "grad_norm": 0.35270930655655214, + "learning_rate": 5.990092635707856e-07, + "loss": 0.1814, + "step": 19430 + }, + { + "epoch": 0.89, + "grad_norm": 1.3321280171541616, + "learning_rate": 5.985021339614449e-07, + "loss": 0.7909, + "step": 19431 + }, + { + "epoch": 0.89, + "grad_norm": 0.411896462502767, + "learning_rate": 5.979952124922039e-07, + "loss": 0.3373, + "step": 19432 + }, + { + "epoch": 0.89, + "grad_norm": 0.25527619123574175, + "learning_rate": 5.974884991742902e-07, + "loss": 0.1162, + "step": 19433 + }, + { + "epoch": 0.89, + "grad_norm": 0.38655337960344727, + "learning_rate": 5.969819940189159e-07, + "loss": 0.2677, + "step": 19434 + }, + { + "epoch": 0.89, + "grad_norm": 0.7090671761199423, + "learning_rate": 5.964756970372998e-07, + "loss": 0.3575, + "step": 19435 + }, + { + "epoch": 0.89, + "grad_norm": 0.5974598634155753, + "learning_rate": 5.959696082406474e-07, + "loss": 0.3002, + "step": 19436 + }, + { + "epoch": 0.89, + "grad_norm": 0.2753184209205759, + "learning_rate": 5.954637276401643e-07, + "loss": 0.2489, + "step": 19437 + }, + { + "epoch": 0.89, + "grad_norm": 0.32556935244222474, + "learning_rate": 5.949580552470502e-07, + "loss": 0.1862, + "step": 19438 + }, + { + "epoch": 0.89, + "grad_norm": 0.40221940467920436, + "learning_rate": 5.944525910724996e-07, + "loss": 0.2335, + "step": 19439 + }, + { + "epoch": 0.89, + "grad_norm": 0.38726074289808626, + "learning_rate": 5.939473351277037e-07, + "loss": 0.251, + "step": 19440 + }, + { + "epoch": 0.89, + "grad_norm": 0.5446160385748545, + "learning_rate": 5.934422874238466e-07, + "loss": 0.3468, + "step": 19441 + }, + { + "epoch": 0.89, + "grad_norm": 0.4948161619657878, + "learning_rate": 5.92937447972114e-07, + "loss": 0.259, + "step": 19442 + }, + { + "epoch": 0.89, + "grad_norm": 0.5810466310252292, + "learning_rate": 5.924328167836791e-07, + "loss": 0.2771, + "step": 19443 + }, + { + "epoch": 0.89, + "grad_norm": 0.3472336298062181, + "learning_rate": 5.919283938697118e-07, + "loss": 0.2766, + "step": 19444 + }, + { + "epoch": 0.89, + "grad_norm": 0.288846263349043, + "learning_rate": 5.914241792413855e-07, + "loss": 0.1876, + "step": 19445 + }, + { + "epoch": 0.89, + "grad_norm": 0.4784640650315341, + "learning_rate": 5.90920172909858e-07, + "loss": 0.2564, + "step": 19446 + }, + { + "epoch": 0.89, + "grad_norm": 0.4818491873725768, + "learning_rate": 5.904163748862902e-07, + "loss": 0.2674, + "step": 19447 + }, + { + "epoch": 0.89, + "grad_norm": 0.3330787446198168, + "learning_rate": 5.899127851818342e-07, + "loss": 0.244, + "step": 19448 + }, + { + "epoch": 0.89, + "grad_norm": 0.4137655503127341, + "learning_rate": 5.894094038076392e-07, + "loss": 0.3159, + "step": 19449 + }, + { + "epoch": 0.89, + "grad_norm": 0.27598339298049307, + "learning_rate": 5.889062307748517e-07, + "loss": 0.119, + "step": 19450 + }, + { + "epoch": 0.89, + "grad_norm": 0.4113829426351915, + "learning_rate": 5.884032660946071e-07, + "loss": 0.2736, + "step": 19451 + }, + { + "epoch": 0.89, + "grad_norm": 0.5328368846844337, + "learning_rate": 5.879005097780455e-07, + "loss": 0.3099, + "step": 19452 + }, + { + "epoch": 0.89, + "grad_norm": 0.3331385255521013, + "learning_rate": 5.873979618362935e-07, + "loss": 0.1932, + "step": 19453 + }, + { + "epoch": 0.89, + "grad_norm": 0.6094260404893885, + "learning_rate": 5.868956222804789e-07, + "loss": 0.3552, + "step": 19454 + }, + { + "epoch": 0.89, + "grad_norm": 0.4504885068921421, + "learning_rate": 5.863934911217239e-07, + "loss": 0.353, + "step": 19455 + }, + { + "epoch": 0.89, + "grad_norm": 0.3445020439013414, + "learning_rate": 5.85891568371143e-07, + "loss": 0.2327, + "step": 19456 + }, + { + "epoch": 0.89, + "grad_norm": 0.6251606229293999, + "learning_rate": 5.853898540398495e-07, + "loss": 0.2943, + "step": 19457 + }, + { + "epoch": 0.89, + "grad_norm": 0.3992083676782403, + "learning_rate": 5.8488834813895e-07, + "loss": 0.2911, + "step": 19458 + }, + { + "epoch": 0.89, + "grad_norm": 0.2594681445495387, + "learning_rate": 5.843870506795502e-07, + "loss": 0.081, + "step": 19459 + }, + { + "epoch": 0.89, + "grad_norm": 0.32547519914632644, + "learning_rate": 5.838859616727455e-07, + "loss": 0.2581, + "step": 19460 + }, + { + "epoch": 0.89, + "grad_norm": 0.3793930579102918, + "learning_rate": 5.833850811296282e-07, + "loss": 0.3048, + "step": 19461 + }, + { + "epoch": 0.89, + "grad_norm": 1.1247583537727894, + "learning_rate": 5.828844090612918e-07, + "loss": 0.4597, + "step": 19462 + }, + { + "epoch": 0.89, + "grad_norm": 0.44324822110961787, + "learning_rate": 5.823839454788161e-07, + "loss": 0.2178, + "step": 19463 + }, + { + "epoch": 0.89, + "grad_norm": 0.47470864976821686, + "learning_rate": 5.818836903932857e-07, + "loss": 0.2956, + "step": 19464 + }, + { + "epoch": 0.89, + "grad_norm": 0.2501264945155644, + "learning_rate": 5.813836438157716e-07, + "loss": 0.183, + "step": 19465 + }, + { + "epoch": 0.89, + "grad_norm": 0.3505468328392777, + "learning_rate": 5.808838057573451e-07, + "loss": 0.1881, + "step": 19466 + }, + { + "epoch": 0.89, + "grad_norm": 0.6005927463443071, + "learning_rate": 5.803841762290741e-07, + "loss": 0.369, + "step": 19467 + }, + { + "epoch": 0.89, + "grad_norm": 0.38753030703860153, + "learning_rate": 5.798847552420184e-07, + "loss": 0.2825, + "step": 19468 + }, + { + "epoch": 0.89, + "grad_norm": 0.5075264765892931, + "learning_rate": 5.793855428072348e-07, + "loss": 0.2249, + "step": 19469 + }, + { + "epoch": 0.89, + "grad_norm": 0.4262243846034928, + "learning_rate": 5.788865389357745e-07, + "loss": 0.2834, + "step": 19470 + }, + { + "epoch": 0.89, + "grad_norm": 0.2703331153704299, + "learning_rate": 5.783877436386876e-07, + "loss": 0.1782, + "step": 19471 + }, + { + "epoch": 0.89, + "grad_norm": 0.3571432286423395, + "learning_rate": 5.778891569270162e-07, + "loss": 0.2699, + "step": 19472 + }, + { + "epoch": 0.89, + "grad_norm": 0.35315362334060674, + "learning_rate": 5.77390778811796e-07, + "loss": 0.2269, + "step": 19473 + }, + { + "epoch": 0.89, + "grad_norm": 0.6018882501296571, + "learning_rate": 5.768926093040617e-07, + "loss": 0.3412, + "step": 19474 + }, + { + "epoch": 0.89, + "grad_norm": 1.0671678500241466, + "learning_rate": 5.763946484148442e-07, + "loss": 0.4041, + "step": 19475 + }, + { + "epoch": 0.89, + "grad_norm": 0.27204079004696546, + "learning_rate": 5.758968961551669e-07, + "loss": 0.2246, + "step": 19476 + }, + { + "epoch": 0.89, + "grad_norm": 0.5493241201960452, + "learning_rate": 5.75399352536048e-07, + "loss": 0.1805, + "step": 19477 + }, + { + "epoch": 0.89, + "grad_norm": 0.4054301864489693, + "learning_rate": 5.749020175685038e-07, + "loss": 0.2738, + "step": 19478 + }, + { + "epoch": 0.89, + "grad_norm": 0.372262524237946, + "learning_rate": 5.744048912635469e-07, + "loss": 0.2336, + "step": 19479 + }, + { + "epoch": 0.89, + "grad_norm": 0.37738706359355795, + "learning_rate": 5.739079736321796e-07, + "loss": 0.2907, + "step": 19480 + }, + { + "epoch": 0.89, + "grad_norm": 0.6798143267531725, + "learning_rate": 5.734112646854062e-07, + "loss": 0.3343, + "step": 19481 + }, + { + "epoch": 0.89, + "grad_norm": 0.42945108752446853, + "learning_rate": 5.729147644342204e-07, + "loss": 0.2226, + "step": 19482 + }, + { + "epoch": 0.9, + "grad_norm": 0.25813971401887836, + "learning_rate": 5.724184728896165e-07, + "loss": 0.1431, + "step": 19483 + }, + { + "epoch": 0.9, + "grad_norm": 0.3254489910391765, + "learning_rate": 5.719223900625814e-07, + "loss": 0.27, + "step": 19484 + }, + { + "epoch": 0.9, + "grad_norm": 0.42677267634340726, + "learning_rate": 5.714265159640974e-07, + "loss": 0.2954, + "step": 19485 + }, + { + "epoch": 0.9, + "grad_norm": 0.6273777039549431, + "learning_rate": 5.709308506051436e-07, + "loss": 0.2848, + "step": 19486 + }, + { + "epoch": 0.9, + "grad_norm": 0.825461172667066, + "learning_rate": 5.7043539399669e-07, + "loss": 0.2632, + "step": 19487 + }, + { + "epoch": 0.9, + "grad_norm": 0.34733526711764046, + "learning_rate": 5.699401461497111e-07, + "loss": 0.2847, + "step": 19488 + }, + { + "epoch": 0.9, + "grad_norm": 0.33541865072786525, + "learning_rate": 5.694451070751695e-07, + "loss": 0.1647, + "step": 19489 + }, + { + "epoch": 0.9, + "grad_norm": 0.9548438339795196, + "learning_rate": 5.689502767840215e-07, + "loss": 0.4663, + "step": 19490 + }, + { + "epoch": 0.9, + "grad_norm": 0.3278747496853244, + "learning_rate": 5.684556552872256e-07, + "loss": 0.2401, + "step": 19491 + }, + { + "epoch": 0.9, + "grad_norm": 0.27429348212126975, + "learning_rate": 5.679612425957304e-07, + "loss": 0.2131, + "step": 19492 + }, + { + "epoch": 0.9, + "grad_norm": 1.0985252066559386, + "learning_rate": 5.67467038720485e-07, + "loss": 0.3806, + "step": 19493 + }, + { + "epoch": 0.9, + "grad_norm": 0.391041876063372, + "learning_rate": 5.669730436724263e-07, + "loss": 0.273, + "step": 19494 + }, + { + "epoch": 0.9, + "grad_norm": 1.0925805941316105, + "learning_rate": 5.664792574624934e-07, + "loss": 0.2394, + "step": 19495 + }, + { + "epoch": 0.9, + "grad_norm": 0.45368272529833187, + "learning_rate": 5.659856801016173e-07, + "loss": 0.2725, + "step": 19496 + }, + { + "epoch": 0.9, + "grad_norm": 0.34931069083216426, + "learning_rate": 5.65492311600725e-07, + "loss": 0.2764, + "step": 19497 + }, + { + "epoch": 0.9, + "grad_norm": 0.872270013248097, + "learning_rate": 5.649991519707409e-07, + "loss": 0.4919, + "step": 19498 + }, + { + "epoch": 0.9, + "grad_norm": 0.29982733126917793, + "learning_rate": 5.64506201222581e-07, + "loss": 0.1394, + "step": 19499 + }, + { + "epoch": 0.9, + "grad_norm": 0.3057956591155022, + "learning_rate": 5.640134593671598e-07, + "loss": 0.2516, + "step": 19500 + }, + { + "epoch": 0.9, + "grad_norm": 1.3232166164868833, + "learning_rate": 5.635209264153874e-07, + "loss": 0.3737, + "step": 19501 + }, + { + "epoch": 0.9, + "grad_norm": 0.576559561644349, + "learning_rate": 5.63028602378165e-07, + "loss": 0.2319, + "step": 19502 + }, + { + "epoch": 0.9, + "grad_norm": 0.4141251241639885, + "learning_rate": 5.625364872663963e-07, + "loss": 0.2795, + "step": 19503 + }, + { + "epoch": 0.9, + "grad_norm": 0.366525372927563, + "learning_rate": 5.620445810909703e-07, + "loss": 0.2981, + "step": 19504 + }, + { + "epoch": 0.9, + "grad_norm": 0.1374273000053429, + "learning_rate": 5.615528838627838e-07, + "loss": 0.0689, + "step": 19505 + }, + { + "epoch": 0.9, + "grad_norm": 0.4315390063896131, + "learning_rate": 5.61061395592718e-07, + "loss": 0.303, + "step": 19506 + }, + { + "epoch": 0.9, + "grad_norm": 1.0234380104797598, + "learning_rate": 5.605701162916566e-07, + "loss": 0.3933, + "step": 19507 + }, + { + "epoch": 0.9, + "grad_norm": 0.3383641370599889, + "learning_rate": 5.600790459704742e-07, + "loss": 0.2442, + "step": 19508 + }, + { + "epoch": 0.9, + "grad_norm": 0.3491526831085786, + "learning_rate": 5.59588184640043e-07, + "loss": 0.2414, + "step": 19509 + }, + { + "epoch": 0.9, + "grad_norm": 0.5977105187509787, + "learning_rate": 5.590975323112324e-07, + "loss": 0.2541, + "step": 19510 + }, + { + "epoch": 0.9, + "grad_norm": 0.613543446574011, + "learning_rate": 5.586070889949013e-07, + "loss": 0.2714, + "step": 19511 + }, + { + "epoch": 0.9, + "grad_norm": 0.2568584535482536, + "learning_rate": 5.581168547019112e-07, + "loss": 0.2164, + "step": 19512 + }, + { + "epoch": 0.9, + "grad_norm": 1.1696856517485377, + "learning_rate": 5.576268294431131e-07, + "loss": 0.4274, + "step": 19513 + }, + { + "epoch": 0.9, + "grad_norm": 0.7603651793144063, + "learning_rate": 5.571370132293552e-07, + "loss": 0.3984, + "step": 19514 + }, + { + "epoch": 0.9, + "grad_norm": 0.3066639817005141, + "learning_rate": 5.566474060714844e-07, + "loss": 0.1988, + "step": 19515 + }, + { + "epoch": 0.9, + "grad_norm": 0.37118270829059813, + "learning_rate": 5.561580079803375e-07, + "loss": 0.3167, + "step": 19516 + }, + { + "epoch": 0.9, + "grad_norm": 1.0306085896225783, + "learning_rate": 5.556688189667492e-07, + "loss": 0.1625, + "step": 19517 + }, + { + "epoch": 0.9, + "grad_norm": 0.3107723342864482, + "learning_rate": 5.55179839041553e-07, + "loss": 0.1779, + "step": 19518 + }, + { + "epoch": 0.9, + "grad_norm": 0.7372747698490006, + "learning_rate": 5.546910682155704e-07, + "loss": 0.4002, + "step": 19519 + }, + { + "epoch": 0.9, + "grad_norm": 0.3648138041339951, + "learning_rate": 5.542025064996248e-07, + "loss": 0.299, + "step": 19520 + }, + { + "epoch": 0.9, + "grad_norm": 0.29524586493765337, + "learning_rate": 5.537141539045298e-07, + "loss": 0.1882, + "step": 19521 + }, + { + "epoch": 0.9, + "grad_norm": 1.2376353837688123, + "learning_rate": 5.532260104411014e-07, + "loss": 0.53, + "step": 19522 + }, + { + "epoch": 0.9, + "grad_norm": 0.2669633203671443, + "learning_rate": 5.527380761201428e-07, + "loss": 0.1889, + "step": 19523 + }, + { + "epoch": 0.9, + "grad_norm": 0.3785260016416561, + "learning_rate": 5.522503509524591e-07, + "loss": 0.2678, + "step": 19524 + }, + { + "epoch": 0.9, + "grad_norm": 0.5454181293191016, + "learning_rate": 5.517628349488458e-07, + "loss": 0.2577, + "step": 19525 + }, + { + "epoch": 0.9, + "grad_norm": 0.8810513994462302, + "learning_rate": 5.512755281200965e-07, + "loss": 0.4511, + "step": 19526 + }, + { + "epoch": 0.9, + "grad_norm": 0.335831981770893, + "learning_rate": 5.507884304770028e-07, + "loss": 0.2592, + "step": 19527 + }, + { + "epoch": 0.9, + "grad_norm": 0.32256497286262864, + "learning_rate": 5.503015420303437e-07, + "loss": 0.2457, + "step": 19528 + }, + { + "epoch": 0.9, + "grad_norm": 0.26240729063673424, + "learning_rate": 5.498148627909017e-07, + "loss": 0.1583, + "step": 19529 + }, + { + "epoch": 0.9, + "grad_norm": 0.38437328275928395, + "learning_rate": 5.493283927694492e-07, + "loss": 0.2343, + "step": 19530 + }, + { + "epoch": 0.9, + "grad_norm": 0.4607063767153047, + "learning_rate": 5.488421319767578e-07, + "loss": 0.2561, + "step": 19531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6374528843923447, + "learning_rate": 5.483560804235943e-07, + "loss": 0.3631, + "step": 19532 + }, + { + "epoch": 0.9, + "grad_norm": 0.3380508135557405, + "learning_rate": 5.478702381207146e-07, + "loss": 0.249, + "step": 19533 + }, + { + "epoch": 0.9, + "grad_norm": 1.472709549865522, + "learning_rate": 5.473846050788789e-07, + "loss": 0.2867, + "step": 19534 + }, + { + "epoch": 0.9, + "grad_norm": 0.24015571464238913, + "learning_rate": 5.468991813088375e-07, + "loss": 0.2188, + "step": 19535 + }, + { + "epoch": 0.9, + "grad_norm": 0.349468326815021, + "learning_rate": 5.464139668213386e-07, + "loss": 0.235, + "step": 19536 + }, + { + "epoch": 0.9, + "grad_norm": 0.6619302382591248, + "learning_rate": 5.459289616271224e-07, + "loss": 0.3563, + "step": 19537 + }, + { + "epoch": 0.9, + "grad_norm": 1.0513130135012814, + "learning_rate": 5.454441657369247e-07, + "loss": 0.3341, + "step": 19538 + }, + { + "epoch": 0.9, + "grad_norm": 0.42263858936616483, + "learning_rate": 5.449595791614836e-07, + "loss": 0.2698, + "step": 19539 + }, + { + "epoch": 0.9, + "grad_norm": 0.3101466776684008, + "learning_rate": 5.444752019115229e-07, + "loss": 0.2802, + "step": 19540 + }, + { + "epoch": 0.9, + "grad_norm": 0.5406091135683908, + "learning_rate": 5.439910339977694e-07, + "loss": 0.1888, + "step": 19541 + }, + { + "epoch": 0.9, + "grad_norm": 0.4549053927322506, + "learning_rate": 5.435070754309402e-07, + "loss": 0.2608, + "step": 19542 + }, + { + "epoch": 0.9, + "grad_norm": 0.2945722692526129, + "learning_rate": 5.430233262217488e-07, + "loss": 0.2622, + "step": 19543 + }, + { + "epoch": 0.9, + "grad_norm": 0.5130242383216363, + "learning_rate": 5.425397863809079e-07, + "loss": 0.2462, + "step": 19544 + }, + { + "epoch": 0.9, + "grad_norm": 0.3248577152922799, + "learning_rate": 5.420564559191188e-07, + "loss": 0.2335, + "step": 19545 + }, + { + "epoch": 0.9, + "grad_norm": 1.4070545486253954, + "learning_rate": 5.415733348470864e-07, + "loss": 0.4504, + "step": 19546 + }, + { + "epoch": 0.9, + "grad_norm": 0.3902777301198903, + "learning_rate": 5.410904231755032e-07, + "loss": 0.2412, + "step": 19547 + }, + { + "epoch": 0.9, + "grad_norm": 0.35341047684089694, + "learning_rate": 5.406077209150606e-07, + "loss": 0.2275, + "step": 19548 + }, + { + "epoch": 0.9, + "grad_norm": 0.3329012954175152, + "learning_rate": 5.401252280764469e-07, + "loss": 0.2141, + "step": 19549 + }, + { + "epoch": 0.9, + "grad_norm": 1.435140010245774, + "learning_rate": 5.396429446703433e-07, + "loss": 0.7832, + "step": 19550 + }, + { + "epoch": 0.9, + "grad_norm": 0.32700795106193686, + "learning_rate": 5.391608707074258e-07, + "loss": 0.2047, + "step": 19551 + }, + { + "epoch": 0.9, + "grad_norm": 0.36045084168404257, + "learning_rate": 5.386790061983682e-07, + "loss": 0.2898, + "step": 19552 + }, + { + "epoch": 0.9, + "grad_norm": 1.0226706262825154, + "learning_rate": 5.381973511538396e-07, + "loss": 0.3851, + "step": 19553 + }, + { + "epoch": 0.9, + "grad_norm": 0.3451572256093321, + "learning_rate": 5.377159055845028e-07, + "loss": 0.214, + "step": 19554 + }, + { + "epoch": 0.9, + "grad_norm": 0.42548728789439844, + "learning_rate": 5.372346695010145e-07, + "loss": 0.2532, + "step": 19555 + }, + { + "epoch": 0.9, + "grad_norm": 0.4041027884326333, + "learning_rate": 5.367536429140308e-07, + "loss": 0.2431, + "step": 19556 + }, + { + "epoch": 0.9, + "grad_norm": 0.35770669044320574, + "learning_rate": 5.362728258341998e-07, + "loss": 0.184, + "step": 19557 + }, + { + "epoch": 0.9, + "grad_norm": 0.6485639923932551, + "learning_rate": 5.357922182721687e-07, + "loss": 0.3874, + "step": 19558 + }, + { + "epoch": 0.9, + "grad_norm": 0.37034597076583015, + "learning_rate": 5.353118202385743e-07, + "loss": 0.2948, + "step": 19559 + }, + { + "epoch": 0.9, + "grad_norm": 0.369206817896097, + "learning_rate": 5.348316317440549e-07, + "loss": 0.2013, + "step": 19560 + }, + { + "epoch": 0.9, + "grad_norm": 0.3743217322839835, + "learning_rate": 5.34351652799242e-07, + "loss": 0.2441, + "step": 19561 + }, + { + "epoch": 0.9, + "grad_norm": 0.6082038549322307, + "learning_rate": 5.338718834147583e-07, + "loss": 0.2271, + "step": 19562 + }, + { + "epoch": 0.9, + "grad_norm": 0.38191081718532044, + "learning_rate": 5.333923236012295e-07, + "loss": 0.2748, + "step": 19563 + }, + { + "epoch": 0.9, + "grad_norm": 0.33149731099439955, + "learning_rate": 5.329129733692684e-07, + "loss": 0.2337, + "step": 19564 + }, + { + "epoch": 0.9, + "grad_norm": 0.7064794956872598, + "learning_rate": 5.324338327294909e-07, + "loss": 0.3646, + "step": 19565 + }, + { + "epoch": 0.9, + "grad_norm": 0.7050058186233589, + "learning_rate": 5.319549016925041e-07, + "loss": 0.2827, + "step": 19566 + }, + { + "epoch": 0.9, + "grad_norm": 0.3001333371978063, + "learning_rate": 5.314761802689083e-07, + "loss": 0.1732, + "step": 19567 + }, + { + "epoch": 0.9, + "grad_norm": 0.3713104733442832, + "learning_rate": 5.309976684693053e-07, + "loss": 0.2664, + "step": 19568 + }, + { + "epoch": 0.9, + "grad_norm": 0.3999708170510087, + "learning_rate": 5.305193663042862e-07, + "loss": 0.2731, + "step": 19569 + }, + { + "epoch": 0.9, + "grad_norm": 0.58872414662589, + "learning_rate": 5.30041273784444e-07, + "loss": 0.2406, + "step": 19570 + }, + { + "epoch": 0.9, + "grad_norm": 0.35200948140516203, + "learning_rate": 5.295633909203591e-07, + "loss": 0.2969, + "step": 19571 + }, + { + "epoch": 0.9, + "grad_norm": 0.40278998675373134, + "learning_rate": 5.290857177226139e-07, + "loss": 0.2411, + "step": 19572 + }, + { + "epoch": 0.9, + "grad_norm": 0.6211036948515942, + "learning_rate": 5.286082542017812e-07, + "loss": 0.2171, + "step": 19573 + }, + { + "epoch": 0.9, + "grad_norm": 0.25164053817988535, + "learning_rate": 5.281310003684337e-07, + "loss": 0.1785, + "step": 19574 + }, + { + "epoch": 0.9, + "grad_norm": 0.34537468029334767, + "learning_rate": 5.276539562331384e-07, + "loss": 0.2552, + "step": 19575 + }, + { + "epoch": 0.9, + "grad_norm": 0.38454438588556517, + "learning_rate": 5.271771218064526e-07, + "loss": 0.2776, + "step": 19576 + }, + { + "epoch": 0.9, + "grad_norm": 0.7080971729085463, + "learning_rate": 5.267004970989365e-07, + "loss": 0.2856, + "step": 19577 + }, + { + "epoch": 0.9, + "grad_norm": 0.710040166600337, + "learning_rate": 5.262240821211417e-07, + "loss": 0.2564, + "step": 19578 + }, + { + "epoch": 0.9, + "grad_norm": 0.39034852512430224, + "learning_rate": 5.257478768836133e-07, + "loss": 0.2655, + "step": 19579 + }, + { + "epoch": 0.9, + "grad_norm": 0.2370447145042257, + "learning_rate": 5.252718813968971e-07, + "loss": 0.1579, + "step": 19580 + }, + { + "epoch": 0.9, + "grad_norm": 0.6328402325884102, + "learning_rate": 5.247960956715259e-07, + "loss": 0.297, + "step": 19581 + }, + { + "epoch": 0.9, + "grad_norm": 0.38275854652265867, + "learning_rate": 5.243205197180412e-07, + "loss": 0.2823, + "step": 19582 + }, + { + "epoch": 0.9, + "grad_norm": 0.36522964623513204, + "learning_rate": 5.238451535469658e-07, + "loss": 0.2453, + "step": 19583 + }, + { + "epoch": 0.9, + "grad_norm": 0.5912301414077614, + "learning_rate": 5.233699971688256e-07, + "loss": 0.2455, + "step": 19584 + }, + { + "epoch": 0.9, + "grad_norm": 0.42143163426848074, + "learning_rate": 5.228950505941399e-07, + "loss": 0.2759, + "step": 19585 + }, + { + "epoch": 0.9, + "grad_norm": 0.2901665660874478, + "learning_rate": 5.224203138334216e-07, + "loss": 0.1464, + "step": 19586 + }, + { + "epoch": 0.9, + "grad_norm": 0.2885509936705279, + "learning_rate": 5.219457868971856e-07, + "loss": 0.2371, + "step": 19587 + }, + { + "epoch": 0.9, + "grad_norm": 0.5448218149539864, + "learning_rate": 5.214714697959333e-07, + "loss": 0.3377, + "step": 19588 + }, + { + "epoch": 0.9, + "grad_norm": 0.4960796515277802, + "learning_rate": 5.209973625401687e-07, + "loss": 0.3035, + "step": 19589 + }, + { + "epoch": 0.9, + "grad_norm": 0.5111457771830094, + "learning_rate": 5.205234651403857e-07, + "loss": 0.1661, + "step": 19590 + }, + { + "epoch": 0.9, + "grad_norm": 0.41976264360372134, + "learning_rate": 5.200497776070756e-07, + "loss": 0.3228, + "step": 19591 + }, + { + "epoch": 0.9, + "grad_norm": 0.49621808618065694, + "learning_rate": 5.195762999507292e-07, + "loss": 0.3253, + "step": 19592 + }, + { + "epoch": 0.9, + "grad_norm": 0.6315173936842243, + "learning_rate": 5.191030321818236e-07, + "loss": 0.2394, + "step": 19593 + }, + { + "epoch": 0.9, + "grad_norm": 0.4153674162110678, + "learning_rate": 5.186299743108392e-07, + "loss": 0.2827, + "step": 19594 + }, + { + "epoch": 0.9, + "grad_norm": 0.22381856968066582, + "learning_rate": 5.18157126348251e-07, + "loss": 0.2042, + "step": 19595 + }, + { + "epoch": 0.9, + "grad_norm": 0.9226079575953229, + "learning_rate": 5.17684488304524e-07, + "loss": 0.1162, + "step": 19596 + }, + { + "epoch": 0.9, + "grad_norm": 0.4939142537847988, + "learning_rate": 5.172120601901243e-07, + "loss": 0.3007, + "step": 19597 + }, + { + "epoch": 0.9, + "grad_norm": 0.46736350609043203, + "learning_rate": 5.167398420155068e-07, + "loss": 0.3297, + "step": 19598 + }, + { + "epoch": 0.9, + "grad_norm": 0.4424770792520786, + "learning_rate": 5.16267833791132e-07, + "loss": 0.3039, + "step": 19599 + }, + { + "epoch": 0.9, + "grad_norm": 0.371209593083219, + "learning_rate": 5.157960355274461e-07, + "loss": 0.2233, + "step": 19600 + }, + { + "epoch": 0.9, + "grad_norm": 0.2755786208095063, + "learning_rate": 5.153244472348951e-07, + "loss": 0.1517, + "step": 19601 + }, + { + "epoch": 0.9, + "grad_norm": 1.033116538517288, + "learning_rate": 5.148530689239206e-07, + "loss": 0.3489, + "step": 19602 + }, + { + "epoch": 0.9, + "grad_norm": 0.25746873352794136, + "learning_rate": 5.143819006049532e-07, + "loss": 0.2169, + "step": 19603 + }, + { + "epoch": 0.9, + "grad_norm": 1.0034686947591478, + "learning_rate": 5.139109422884326e-07, + "loss": 0.4384, + "step": 19604 + }, + { + "epoch": 0.9, + "grad_norm": 0.747423985984192, + "learning_rate": 5.134401939847789e-07, + "loss": 0.3747, + "step": 19605 + }, + { + "epoch": 0.9, + "grad_norm": 0.3610567336007929, + "learning_rate": 5.129696557044173e-07, + "loss": 0.2135, + "step": 19606 + }, + { + "epoch": 0.9, + "grad_norm": 0.27768318707035533, + "learning_rate": 5.124993274577617e-07, + "loss": 0.2255, + "step": 19607 + }, + { + "epoch": 0.9, + "grad_norm": 0.4091754429497875, + "learning_rate": 5.12029209255227e-07, + "loss": 0.1977, + "step": 19608 + }, + { + "epoch": 0.9, + "grad_norm": 0.4359770661904798, + "learning_rate": 5.115593011072229e-07, + "loss": 0.2174, + "step": 19609 + }, + { + "epoch": 0.9, + "grad_norm": 0.4314319137275342, + "learning_rate": 5.110896030241497e-07, + "loss": 0.3063, + "step": 19610 + }, + { + "epoch": 0.9, + "grad_norm": 0.4444521185235352, + "learning_rate": 5.10620115016407e-07, + "loss": 0.3027, + "step": 19611 + }, + { + "epoch": 0.9, + "grad_norm": 0.37832480380725725, + "learning_rate": 5.101508370943897e-07, + "loss": 0.3076, + "step": 19612 + }, + { + "epoch": 0.9, + "grad_norm": 0.2674426170772036, + "learning_rate": 5.096817692684864e-07, + "loss": 0.1038, + "step": 19613 + }, + { + "epoch": 0.9, + "grad_norm": 0.5502366182170125, + "learning_rate": 5.092129115490818e-07, + "loss": 0.3429, + "step": 19614 + }, + { + "epoch": 0.9, + "grad_norm": 0.26351502322059167, + "learning_rate": 5.087442639465557e-07, + "loss": 0.243, + "step": 19615 + }, + { + "epoch": 0.9, + "grad_norm": 0.653914918491935, + "learning_rate": 5.082758264712828e-07, + "loss": 0.2668, + "step": 19616 + }, + { + "epoch": 0.9, + "grad_norm": 0.8484333902960137, + "learning_rate": 5.078075991336351e-07, + "loss": 0.5113, + "step": 19617 + }, + { + "epoch": 0.9, + "grad_norm": 0.38693701897836535, + "learning_rate": 5.073395819439797e-07, + "loss": 0.2994, + "step": 19618 + }, + { + "epoch": 0.9, + "grad_norm": 0.4309157312124287, + "learning_rate": 5.068717749126772e-07, + "loss": 0.2315, + "step": 19619 + }, + { + "epoch": 0.9, + "grad_norm": 0.27435493336142197, + "learning_rate": 5.064041780500817e-07, + "loss": 0.1669, + "step": 19620 + }, + { + "epoch": 0.9, + "grad_norm": 0.3830696055662994, + "learning_rate": 5.059367913665503e-07, + "loss": 0.2719, + "step": 19621 + }, + { + "epoch": 0.9, + "grad_norm": 0.7556990647460939, + "learning_rate": 5.054696148724259e-07, + "loss": 0.2822, + "step": 19622 + }, + { + "epoch": 0.9, + "grad_norm": 0.34434066872051233, + "learning_rate": 5.050026485780546e-07, + "loss": 0.3016, + "step": 19623 + }, + { + "epoch": 0.9, + "grad_norm": 0.3693384659842256, + "learning_rate": 5.045358924937726e-07, + "loss": 0.2705, + "step": 19624 + }, + { + "epoch": 0.9, + "grad_norm": 1.5732034596227875, + "learning_rate": 5.040693466299129e-07, + "loss": 0.4483, + "step": 19625 + }, + { + "epoch": 0.9, + "grad_norm": 0.20061946145955564, + "learning_rate": 5.036030109968082e-07, + "loss": 0.152, + "step": 19626 + }, + { + "epoch": 0.9, + "grad_norm": 0.3655570234364517, + "learning_rate": 5.03136885604778e-07, + "loss": 0.2606, + "step": 19627 + }, + { + "epoch": 0.9, + "grad_norm": 0.7057237169058612, + "learning_rate": 5.026709704641441e-07, + "loss": 0.3454, + "step": 19628 + }, + { + "epoch": 0.9, + "grad_norm": 0.9295579075306751, + "learning_rate": 5.022052655852228e-07, + "loss": 0.2565, + "step": 19629 + }, + { + "epoch": 0.9, + "grad_norm": 0.4108744787879554, + "learning_rate": 5.017397709783212e-07, + "loss": 0.2931, + "step": 19630 + }, + { + "epoch": 0.9, + "grad_norm": 0.39780159591038405, + "learning_rate": 5.012744866537478e-07, + "loss": 0.2678, + "step": 19631 + }, + { + "epoch": 0.9, + "grad_norm": 0.40484375787503546, + "learning_rate": 5.00809412621801e-07, + "loss": 0.1349, + "step": 19632 + }, + { + "epoch": 0.9, + "grad_norm": 0.33348181540655064, + "learning_rate": 5.003445488927794e-07, + "loss": 0.2477, + "step": 19633 + }, + { + "epoch": 0.9, + "grad_norm": 0.5613506797260215, + "learning_rate": 4.998798954769724e-07, + "loss": 0.3497, + "step": 19634 + }, + { + "epoch": 0.9, + "grad_norm": 0.4712242288126376, + "learning_rate": 4.994154523846695e-07, + "loss": 0.2834, + "step": 19635 + }, + { + "epoch": 0.9, + "grad_norm": 0.3641231548796775, + "learning_rate": 4.989512196261503e-07, + "loss": 0.2565, + "step": 19636 + }, + { + "epoch": 0.9, + "grad_norm": 1.783803333416028, + "learning_rate": 4.984871972116945e-07, + "loss": 0.5295, + "step": 19637 + }, + { + "epoch": 0.9, + "grad_norm": 0.30109901561395735, + "learning_rate": 4.980233851515759e-07, + "loss": 0.2298, + "step": 19638 + }, + { + "epoch": 0.9, + "grad_norm": 0.342230552838601, + "learning_rate": 4.975597834560597e-07, + "loss": 0.217, + "step": 19639 + }, + { + "epoch": 0.9, + "grad_norm": 0.40426406321144526, + "learning_rate": 4.970963921354133e-07, + "loss": 0.2151, + "step": 19640 + }, + { + "epoch": 0.9, + "grad_norm": 1.2673502442551616, + "learning_rate": 4.966332111998918e-07, + "loss": 0.5978, + "step": 19641 + }, + { + "epoch": 0.9, + "grad_norm": 0.3573757646755341, + "learning_rate": 4.961702406597513e-07, + "loss": 0.1814, + "step": 19642 + }, + { + "epoch": 0.9, + "grad_norm": 0.4271281896969654, + "learning_rate": 4.957074805252438e-07, + "loss": 0.2692, + "step": 19643 + }, + { + "epoch": 0.9, + "grad_norm": 0.6822296158400306, + "learning_rate": 4.952449308066099e-07, + "loss": 0.3746, + "step": 19644 + }, + { + "epoch": 0.9, + "grad_norm": 0.3365251071173328, + "learning_rate": 4.947825915140946e-07, + "loss": 0.1904, + "step": 19645 + }, + { + "epoch": 0.9, + "grad_norm": 0.2686791125298311, + "learning_rate": 4.94320462657929e-07, + "loss": 0.181, + "step": 19646 + }, + { + "epoch": 0.9, + "grad_norm": 0.5117345591170361, + "learning_rate": 4.93858544248349e-07, + "loss": 0.3892, + "step": 19647 + }, + { + "epoch": 0.9, + "grad_norm": 0.3317222949272828, + "learning_rate": 4.933968362955788e-07, + "loss": 0.1821, + "step": 19648 + }, + { + "epoch": 0.9, + "grad_norm": 0.8268837516276252, + "learning_rate": 4.929353388098379e-07, + "loss": 0.3948, + "step": 19649 + }, + { + "epoch": 0.9, + "grad_norm": 0.47167547509246294, + "learning_rate": 4.924740518013471e-07, + "loss": 0.3324, + "step": 19650 + }, + { + "epoch": 0.9, + "grad_norm": 0.31109891637165965, + "learning_rate": 4.92012975280316e-07, + "loss": 0.267, + "step": 19651 + }, + { + "epoch": 0.9, + "grad_norm": 0.3357105022891711, + "learning_rate": 4.915521092569553e-07, + "loss": 0.1001, + "step": 19652 + }, + { + "epoch": 0.9, + "grad_norm": 1.428533001954699, + "learning_rate": 4.910914537414657e-07, + "loss": 0.7241, + "step": 19653 + }, + { + "epoch": 0.9, + "grad_norm": 0.3328088537488865, + "learning_rate": 4.906310087440469e-07, + "loss": 0.2212, + "step": 19654 + }, + { + "epoch": 0.9, + "grad_norm": 0.3509923634391299, + "learning_rate": 4.90170774274893e-07, + "loss": 0.2336, + "step": 19655 + }, + { + "epoch": 0.9, + "grad_norm": 0.6132607002630902, + "learning_rate": 4.897107503441912e-07, + "loss": 0.3547, + "step": 19656 + }, + { + "epoch": 0.9, + "grad_norm": 0.36333551498806105, + "learning_rate": 4.892509369621279e-07, + "loss": 0.2463, + "step": 19657 + }, + { + "epoch": 0.9, + "grad_norm": 0.21649006606653148, + "learning_rate": 4.887913341388817e-07, + "loss": 0.1334, + "step": 19658 + }, + { + "epoch": 0.9, + "grad_norm": 0.5126165050404505, + "learning_rate": 4.883319418846277e-07, + "loss": 0.3799, + "step": 19659 + }, + { + "epoch": 0.9, + "grad_norm": 0.3438260140668514, + "learning_rate": 4.87872760209539e-07, + "loss": 0.2564, + "step": 19660 + }, + { + "epoch": 0.9, + "grad_norm": 0.680784170345311, + "learning_rate": 4.874137891237784e-07, + "loss": 0.2659, + "step": 19661 + }, + { + "epoch": 0.9, + "grad_norm": 0.3765494729902323, + "learning_rate": 4.869550286375091e-07, + "loss": 0.2781, + "step": 19662 + }, + { + "epoch": 0.9, + "grad_norm": 0.3717314043152907, + "learning_rate": 4.864964787608839e-07, + "loss": 0.2225, + "step": 19663 + }, + { + "epoch": 0.9, + "grad_norm": 0.2949407503610009, + "learning_rate": 4.860381395040604e-07, + "loss": 0.1845, + "step": 19664 + }, + { + "epoch": 0.9, + "grad_norm": 0.5625720403647588, + "learning_rate": 4.855800108771814e-07, + "loss": 0.2902, + "step": 19665 + }, + { + "epoch": 0.9, + "grad_norm": 0.41834165866616, + "learning_rate": 4.851220928903922e-07, + "loss": 0.2857, + "step": 19666 + }, + { + "epoch": 0.9, + "grad_norm": 0.418505000138813, + "learning_rate": 4.84664385553828e-07, + "loss": 0.2908, + "step": 19667 + }, + { + "epoch": 0.9, + "grad_norm": 1.2778845138338732, + "learning_rate": 4.84206888877623e-07, + "loss": 0.2531, + "step": 19668 + }, + { + "epoch": 0.9, + "grad_norm": 0.43619572955125463, + "learning_rate": 4.837496028719079e-07, + "loss": 0.2762, + "step": 19669 + }, + { + "epoch": 0.9, + "grad_norm": 0.25350154502389105, + "learning_rate": 4.832925275468025e-07, + "loss": 0.2272, + "step": 19670 + }, + { + "epoch": 0.9, + "grad_norm": 1.0472404000705837, + "learning_rate": 4.828356629124287e-07, + "loss": 0.5238, + "step": 19671 + }, + { + "epoch": 0.9, + "grad_norm": 0.27181130801103, + "learning_rate": 4.823790089789026e-07, + "loss": 0.221, + "step": 19672 + }, + { + "epoch": 0.9, + "grad_norm": 0.6478343085224642, + "learning_rate": 4.819225657563298e-07, + "loss": 0.3844, + "step": 19673 + }, + { + "epoch": 0.9, + "grad_norm": 0.4159980831174123, + "learning_rate": 4.814663332548197e-07, + "loss": 0.2363, + "step": 19674 + }, + { + "epoch": 0.9, + "grad_norm": 0.3730436567155494, + "learning_rate": 4.810103114844688e-07, + "loss": 0.2535, + "step": 19675 + }, + { + "epoch": 0.9, + "grad_norm": 0.5432841945783066, + "learning_rate": 4.805545004553757e-07, + "loss": 0.2153, + "step": 19676 + }, + { + "epoch": 0.9, + "grad_norm": 0.9882795880700765, + "learning_rate": 4.800989001776323e-07, + "loss": 0.563, + "step": 19677 + }, + { + "epoch": 0.9, + "grad_norm": 0.30364035888077595, + "learning_rate": 4.796435106613217e-07, + "loss": 0.1981, + "step": 19678 + }, + { + "epoch": 0.9, + "grad_norm": 0.41098185686447475, + "learning_rate": 4.791883319165302e-07, + "loss": 0.2884, + "step": 19679 + }, + { + "epoch": 0.9, + "grad_norm": 0.5238633499587887, + "learning_rate": 4.787333639533298e-07, + "loss": 0.2322, + "step": 19680 + }, + { + "epoch": 0.9, + "grad_norm": 0.34938431977299, + "learning_rate": 4.782786067817991e-07, + "loss": 0.1717, + "step": 19681 + }, + { + "epoch": 0.9, + "grad_norm": 0.34274778254202876, + "learning_rate": 4.77824060412001e-07, + "loss": 0.281, + "step": 19682 + }, + { + "epoch": 0.9, + "grad_norm": 0.8241661078263844, + "learning_rate": 4.773697248540022e-07, + "loss": 0.4141, + "step": 19683 + }, + { + "epoch": 0.9, + "grad_norm": 0.4056326951656576, + "learning_rate": 4.769156001178576e-07, + "loss": 0.1959, + "step": 19684 + }, + { + "epoch": 0.9, + "grad_norm": 0.26316576888897975, + "learning_rate": 4.76461686213624e-07, + "loss": 0.1985, + "step": 19685 + }, + { + "epoch": 0.9, + "grad_norm": 0.4323788151244527, + "learning_rate": 4.760079831513509e-07, + "loss": 0.299, + "step": 19686 + }, + { + "epoch": 0.9, + "grad_norm": 0.4143349113728948, + "learning_rate": 4.755544909410803e-07, + "loss": 0.1577, + "step": 19687 + }, + { + "epoch": 0.9, + "grad_norm": 0.5748322040193202, + "learning_rate": 4.7510120959285313e-07, + "loss": 0.3446, + "step": 19688 + }, + { + "epoch": 0.9, + "grad_norm": 0.7227308648162886, + "learning_rate": 4.746481391167068e-07, + "loss": 0.3674, + "step": 19689 + }, + { + "epoch": 0.9, + "grad_norm": 0.3292687923279854, + "learning_rate": 4.7419527952266896e-07, + "loss": 0.2742, + "step": 19690 + }, + { + "epoch": 0.9, + "grad_norm": 0.4178898078263602, + "learning_rate": 4.737426308207671e-07, + "loss": 0.2352, + "step": 19691 + }, + { + "epoch": 0.9, + "grad_norm": 0.32260181426177587, + "learning_rate": 4.7329019302102096e-07, + "loss": 0.1753, + "step": 19692 + }, + { + "epoch": 0.9, + "grad_norm": 0.4129840386667242, + "learning_rate": 4.728379661334481e-07, + "loss": 0.2323, + "step": 19693 + }, + { + "epoch": 0.9, + "grad_norm": 0.32454101903148397, + "learning_rate": 4.723859501680594e-07, + "loss": 0.2443, + "step": 19694 + }, + { + "epoch": 0.9, + "grad_norm": 0.7199251552262581, + "learning_rate": 4.7193414513486577e-07, + "loss": 0.3886, + "step": 19695 + }, + { + "epoch": 0.9, + "grad_norm": 0.5069539180343947, + "learning_rate": 4.7148255104386585e-07, + "loss": 0.2775, + "step": 19696 + }, + { + "epoch": 0.9, + "grad_norm": 0.376878172241434, + "learning_rate": 4.710311679050561e-07, + "loss": 0.159, + "step": 19697 + }, + { + "epoch": 0.9, + "grad_norm": 0.34960728413117415, + "learning_rate": 4.7057999572843516e-07, + "loss": 0.2368, + "step": 19698 + }, + { + "epoch": 0.9, + "grad_norm": 1.0924672190479658, + "learning_rate": 4.7012903452398615e-07, + "loss": 0.3852, + "step": 19699 + }, + { + "epoch": 0.9, + "grad_norm": 0.36059689624808944, + "learning_rate": 4.696782843016978e-07, + "loss": 0.2156, + "step": 19700 + }, + { + "epoch": 0.91, + "grad_norm": 0.4933696973496774, + "learning_rate": 4.6922774507154543e-07, + "loss": 0.336, + "step": 19701 + }, + { + "epoch": 0.91, + "grad_norm": 0.3760125699511816, + "learning_rate": 4.687774168435044e-07, + "loss": 0.2564, + "step": 19702 + }, + { + "epoch": 0.91, + "grad_norm": 0.43444275631195906, + "learning_rate": 4.6832729962754676e-07, + "loss": 0.3085, + "step": 19703 + }, + { + "epoch": 0.91, + "grad_norm": 0.18826943294063364, + "learning_rate": 4.678773934336334e-07, + "loss": 0.0715, + "step": 19704 + }, + { + "epoch": 0.91, + "grad_norm": 1.0038717761266667, + "learning_rate": 4.674276982717307e-07, + "loss": 0.2989, + "step": 19705 + }, + { + "epoch": 0.91, + "grad_norm": 0.2789744000259988, + "learning_rate": 4.6697821415178867e-07, + "loss": 0.2799, + "step": 19706 + }, + { + "epoch": 0.91, + "grad_norm": 0.6135899969025143, + "learning_rate": 4.6652894108376034e-07, + "loss": 0.2823, + "step": 19707 + }, + { + "epoch": 0.91, + "grad_norm": 0.5726634170603374, + "learning_rate": 4.6607987907759556e-07, + "loss": 0.342, + "step": 19708 + }, + { + "epoch": 0.91, + "grad_norm": 0.3361356297417865, + "learning_rate": 4.656310281432308e-07, + "loss": 0.2179, + "step": 19709 + }, + { + "epoch": 0.91, + "grad_norm": 0.2837792841623217, + "learning_rate": 4.651823882906059e-07, + "loss": 0.1965, + "step": 19710 + }, + { + "epoch": 0.91, + "grad_norm": 0.803259917653567, + "learning_rate": 4.6473395952965406e-07, + "loss": 0.2846, + "step": 19711 + }, + { + "epoch": 0.91, + "grad_norm": 0.40945308889029214, + "learning_rate": 4.6428574187030284e-07, + "loss": 0.2931, + "step": 19712 + }, + { + "epoch": 0.91, + "grad_norm": 0.7242143497057505, + "learning_rate": 4.6383773532247433e-07, + "loss": 0.3799, + "step": 19713 + }, + { + "epoch": 0.91, + "grad_norm": 0.2507826002586037, + "learning_rate": 4.6338993989608506e-07, + "loss": 0.2089, + "step": 19714 + }, + { + "epoch": 0.91, + "grad_norm": 0.5764000414362074, + "learning_rate": 4.6294235560105374e-07, + "loss": 0.341, + "step": 19715 + }, + { + "epoch": 0.91, + "grad_norm": 0.39813677132321484, + "learning_rate": 4.6249498244728573e-07, + "loss": 0.1805, + "step": 19716 + }, + { + "epoch": 0.91, + "grad_norm": 0.6196067231669172, + "learning_rate": 4.6204782044468654e-07, + "loss": 0.2187, + "step": 19717 + }, + { + "epoch": 0.91, + "grad_norm": 0.25662900185794185, + "learning_rate": 4.61600869603156e-07, + "loss": 0.2589, + "step": 19718 + }, + { + "epoch": 0.91, + "grad_norm": 0.7283203335900867, + "learning_rate": 4.611541299325883e-07, + "loss": 0.3807, + "step": 19719 + }, + { + "epoch": 0.91, + "grad_norm": 1.092738342565134, + "learning_rate": 4.6070760144287576e-07, + "loss": 0.2109, + "step": 19720 + }, + { + "epoch": 0.91, + "grad_norm": 0.4111538269586557, + "learning_rate": 4.602612841439014e-07, + "loss": 0.2526, + "step": 19721 + }, + { + "epoch": 0.91, + "grad_norm": 0.30565356392077386, + "learning_rate": 4.5981517804554843e-07, + "loss": 0.2823, + "step": 19722 + }, + { + "epoch": 0.91, + "grad_norm": 0.2907672161228244, + "learning_rate": 4.5936928315769235e-07, + "loss": 0.1194, + "step": 19723 + }, + { + "epoch": 0.91, + "grad_norm": 0.400908450759209, + "learning_rate": 4.5892359949020413e-07, + "loss": 0.2797, + "step": 19724 + }, + { + "epoch": 0.91, + "grad_norm": 1.3536647085690066, + "learning_rate": 4.584781270529537e-07, + "loss": 0.7272, + "step": 19725 + }, + { + "epoch": 0.91, + "grad_norm": 0.37145407325281, + "learning_rate": 4.5803286585579973e-07, + "loss": 0.3058, + "step": 19726 + }, + { + "epoch": 0.91, + "grad_norm": 0.3402555374466048, + "learning_rate": 4.5758781590860115e-07, + "loss": 0.1883, + "step": 19727 + }, + { + "epoch": 0.91, + "grad_norm": 0.37104664438654733, + "learning_rate": 4.5714297722121105e-07, + "loss": 0.1623, + "step": 19728 + }, + { + "epoch": 0.91, + "grad_norm": 0.41852115508656573, + "learning_rate": 4.5669834980347936e-07, + "loss": 0.2822, + "step": 19729 + }, + { + "epoch": 0.91, + "grad_norm": 0.2911871814365297, + "learning_rate": 4.562539336652472e-07, + "loss": 0.2115, + "step": 19730 + }, + { + "epoch": 0.91, + "grad_norm": 0.875946276897322, + "learning_rate": 4.5580972881635434e-07, + "loss": 0.3933, + "step": 19731 + }, + { + "epoch": 0.91, + "grad_norm": 0.9689322472608545, + "learning_rate": 4.553657352666363e-07, + "loss": 0.4092, + "step": 19732 + }, + { + "epoch": 0.91, + "grad_norm": 0.3406543413540663, + "learning_rate": 4.549219530259208e-07, + "loss": 0.1958, + "step": 19733 + }, + { + "epoch": 0.91, + "grad_norm": 0.3451287375943511, + "learning_rate": 4.544783821040355e-07, + "loss": 0.2645, + "step": 19734 + }, + { + "epoch": 0.91, + "grad_norm": 0.7439674246675786, + "learning_rate": 4.540350225107959e-07, + "loss": 0.366, + "step": 19735 + }, + { + "epoch": 0.91, + "grad_norm": 0.2082561503270713, + "learning_rate": 4.535918742560219e-07, + "loss": 0.1429, + "step": 19736 + }, + { + "epoch": 0.91, + "grad_norm": 0.3788886713640703, + "learning_rate": 4.531489373495235e-07, + "loss": 0.3283, + "step": 19737 + }, + { + "epoch": 0.91, + "grad_norm": 1.293010485749024, + "learning_rate": 4.5270621180110497e-07, + "loss": 0.6909, + "step": 19738 + }, + { + "epoch": 0.91, + "grad_norm": 0.33674316864104575, + "learning_rate": 4.5226369762056965e-07, + "loss": 0.2447, + "step": 19739 + }, + { + "epoch": 0.91, + "grad_norm": 0.89930482753975, + "learning_rate": 4.518213948177119e-07, + "loss": 0.266, + "step": 19740 + }, + { + "epoch": 0.91, + "grad_norm": 0.37768048847922786, + "learning_rate": 4.513793034023295e-07, + "loss": 0.282, + "step": 19741 + }, + { + "epoch": 0.91, + "grad_norm": 0.26326430884617374, + "learning_rate": 4.509374233842045e-07, + "loss": 0.2179, + "step": 19742 + }, + { + "epoch": 0.91, + "grad_norm": 0.2936378141780968, + "learning_rate": 4.5049575477312145e-07, + "loss": 0.1209, + "step": 19743 + }, + { + "epoch": 0.91, + "grad_norm": 1.0414377939499828, + "learning_rate": 4.50054297578858e-07, + "loss": 0.3817, + "step": 19744 + }, + { + "epoch": 0.91, + "grad_norm": 0.39027183973080437, + "learning_rate": 4.4961305181118743e-07, + "loss": 0.2753, + "step": 19745 + }, + { + "epoch": 0.91, + "grad_norm": 0.33699468590754833, + "learning_rate": 4.491720174798808e-07, + "loss": 0.2453, + "step": 19746 + }, + { + "epoch": 0.91, + "grad_norm": 0.7103607597284152, + "learning_rate": 4.4873119459469925e-07, + "loss": 0.3486, + "step": 19747 + }, + { + "epoch": 0.91, + "grad_norm": 0.3762038229527056, + "learning_rate": 4.482905831654039e-07, + "loss": 0.276, + "step": 19748 + }, + { + "epoch": 0.91, + "grad_norm": 0.2219767020981018, + "learning_rate": 4.478501832017501e-07, + "loss": 0.1736, + "step": 19749 + }, + { + "epoch": 0.91, + "grad_norm": 0.8060964864426851, + "learning_rate": 4.474099947134847e-07, + "loss": 0.4676, + "step": 19750 + }, + { + "epoch": 0.91, + "grad_norm": 0.3561407094641517, + "learning_rate": 4.469700177103575e-07, + "loss": 0.2618, + "step": 19751 + }, + { + "epoch": 0.91, + "grad_norm": 0.664969371254339, + "learning_rate": 4.465302522021042e-07, + "loss": 0.3429, + "step": 19752 + }, + { + "epoch": 0.91, + "grad_norm": 0.3815591769767246, + "learning_rate": 4.4609069819846473e-07, + "loss": 0.2234, + "step": 19753 + }, + { + "epoch": 0.91, + "grad_norm": 0.3514167519748393, + "learning_rate": 4.456513557091691e-07, + "loss": 0.2281, + "step": 19754 + }, + { + "epoch": 0.91, + "grad_norm": 0.3723628282419125, + "learning_rate": 4.4521222474394276e-07, + "loss": 0.194, + "step": 19755 + }, + { + "epoch": 0.91, + "grad_norm": 1.0613067796026163, + "learning_rate": 4.447733053125114e-07, + "loss": 0.3707, + "step": 19756 + }, + { + "epoch": 0.91, + "grad_norm": 0.35486525465206176, + "learning_rate": 4.4433459742458496e-07, + "loss": 0.2693, + "step": 19757 + }, + { + "epoch": 0.91, + "grad_norm": 0.34877866663426244, + "learning_rate": 4.438961010898846e-07, + "loss": 0.2973, + "step": 19758 + }, + { + "epoch": 0.91, + "grad_norm": 2.0739547121001447, + "learning_rate": 4.434578163181125e-07, + "loss": 0.1492, + "step": 19759 + }, + { + "epoch": 0.91, + "grad_norm": 0.2663948095723982, + "learning_rate": 4.4301974311897533e-07, + "loss": 0.2034, + "step": 19760 + }, + { + "epoch": 0.91, + "grad_norm": 0.4255795553645608, + "learning_rate": 4.4258188150216875e-07, + "loss": 0.2592, + "step": 19761 + }, + { + "epoch": 0.91, + "grad_norm": 0.3797626386090198, + "learning_rate": 4.4214423147738714e-07, + "loss": 0.2533, + "step": 19762 + }, + { + "epoch": 0.91, + "grad_norm": 0.34444976853319015, + "learning_rate": 4.417067930543217e-07, + "loss": 0.2462, + "step": 19763 + }, + { + "epoch": 0.91, + "grad_norm": 1.2201148908971131, + "learning_rate": 4.412695662426547e-07, + "loss": 0.3439, + "step": 19764 + }, + { + "epoch": 0.91, + "grad_norm": 0.41309647063277094, + "learning_rate": 4.408325510520661e-07, + "loss": 0.2826, + "step": 19765 + }, + { + "epoch": 0.91, + "grad_norm": 0.3582109704642266, + "learning_rate": 4.403957474922327e-07, + "loss": 0.1942, + "step": 19766 + }, + { + "epoch": 0.91, + "grad_norm": 0.573767306999602, + "learning_rate": 4.399591555728233e-07, + "loss": 0.2585, + "step": 19767 + }, + { + "epoch": 0.91, + "grad_norm": 0.6807180208184898, + "learning_rate": 4.3952277530350473e-07, + "loss": 0.379, + "step": 19768 + }, + { + "epoch": 0.91, + "grad_norm": 0.3147821299585097, + "learning_rate": 4.390866066939359e-07, + "loss": 0.1857, + "step": 19769 + }, + { + "epoch": 0.91, + "grad_norm": 0.2960851281082761, + "learning_rate": 4.386506497537757e-07, + "loss": 0.2518, + "step": 19770 + }, + { + "epoch": 0.91, + "grad_norm": 1.7004741670427852, + "learning_rate": 4.3821490449267534e-07, + "loss": 0.5845, + "step": 19771 + }, + { + "epoch": 0.91, + "grad_norm": 0.2950058679593831, + "learning_rate": 4.377793709202804e-07, + "loss": 0.1837, + "step": 19772 + }, + { + "epoch": 0.91, + "grad_norm": 0.3021267494878759, + "learning_rate": 4.373440490462344e-07, + "loss": 0.2706, + "step": 19773 + }, + { + "epoch": 0.91, + "grad_norm": 0.9180732422579636, + "learning_rate": 4.369089388801728e-07, + "loss": 0.5291, + "step": 19774 + }, + { + "epoch": 0.91, + "grad_norm": 0.34708281508369293, + "learning_rate": 4.3647404043173246e-07, + "loss": 0.2086, + "step": 19775 + }, + { + "epoch": 0.91, + "grad_norm": 0.4858649183863681, + "learning_rate": 4.3603935371053784e-07, + "loss": 0.2698, + "step": 19776 + }, + { + "epoch": 0.91, + "grad_norm": 0.4000847379651149, + "learning_rate": 4.356048787262157e-07, + "loss": 0.2964, + "step": 19777 + }, + { + "epoch": 0.91, + "grad_norm": 0.34787394064988847, + "learning_rate": 4.351706154883828e-07, + "loss": 0.2469, + "step": 19778 + }, + { + "epoch": 0.91, + "grad_norm": 1.153077847298311, + "learning_rate": 4.3473656400665256e-07, + "loss": 0.3617, + "step": 19779 + }, + { + "epoch": 0.91, + "grad_norm": 0.604102738560976, + "learning_rate": 4.343027242906372e-07, + "loss": 0.3501, + "step": 19780 + }, + { + "epoch": 0.91, + "grad_norm": 0.2833022869709932, + "learning_rate": 4.3386909634993923e-07, + "loss": 0.2382, + "step": 19781 + }, + { + "epoch": 0.91, + "grad_norm": 0.23452828881729748, + "learning_rate": 4.334356801941597e-07, + "loss": 0.1442, + "step": 19782 + }, + { + "epoch": 0.91, + "grad_norm": 1.2747507230643942, + "learning_rate": 4.330024758328932e-07, + "loss": 0.4115, + "step": 19783 + }, + { + "epoch": 0.91, + "grad_norm": 0.4149898560806994, + "learning_rate": 4.3256948327573214e-07, + "loss": 0.2455, + "step": 19784 + }, + { + "epoch": 0.91, + "grad_norm": 0.2972455159974874, + "learning_rate": 4.321367025322609e-07, + "loss": 0.2335, + "step": 19785 + }, + { + "epoch": 0.91, + "grad_norm": 0.6136455630499438, + "learning_rate": 4.31704133612062e-07, + "loss": 0.4006, + "step": 19786 + }, + { + "epoch": 0.91, + "grad_norm": 0.4046642300211354, + "learning_rate": 4.3127177652470987e-07, + "loss": 0.2309, + "step": 19787 + }, + { + "epoch": 0.91, + "grad_norm": 0.2937780580837999, + "learning_rate": 4.308396312797802e-07, + "loss": 0.1419, + "step": 19788 + }, + { + "epoch": 0.91, + "grad_norm": 0.359238590438979, + "learning_rate": 4.304076978868377e-07, + "loss": 0.2902, + "step": 19789 + }, + { + "epoch": 0.91, + "grad_norm": 0.4486169797273892, + "learning_rate": 4.2997597635544563e-07, + "loss": 0.2806, + "step": 19790 + }, + { + "epoch": 0.91, + "grad_norm": 0.501753037111615, + "learning_rate": 4.295444666951598e-07, + "loss": 0.3068, + "step": 19791 + }, + { + "epoch": 0.91, + "grad_norm": 0.861883459541339, + "learning_rate": 4.29113168915537e-07, + "loss": 0.2967, + "step": 19792 + }, + { + "epoch": 0.91, + "grad_norm": 0.28982507873488367, + "learning_rate": 4.28682083026124e-07, + "loss": 0.2484, + "step": 19793 + }, + { + "epoch": 0.91, + "grad_norm": 0.3036750843834868, + "learning_rate": 4.2825120903646543e-07, + "loss": 0.2132, + "step": 19794 + }, + { + "epoch": 0.91, + "grad_norm": 2.304524203264433, + "learning_rate": 4.2782054695609807e-07, + "loss": 0.1853, + "step": 19795 + }, + { + "epoch": 0.91, + "grad_norm": 0.4092160182506747, + "learning_rate": 4.273900967945588e-07, + "loss": 0.2708, + "step": 19796 + }, + { + "epoch": 0.91, + "grad_norm": 0.3832262830059241, + "learning_rate": 4.2695985856137767e-07, + "loss": 0.2965, + "step": 19797 + }, + { + "epoch": 0.91, + "grad_norm": 0.45138579902102494, + "learning_rate": 4.2652983226607716e-07, + "loss": 0.2676, + "step": 19798 + }, + { + "epoch": 0.91, + "grad_norm": 0.3339018841304625, + "learning_rate": 4.261000179181807e-07, + "loss": 0.2492, + "step": 19799 + }, + { + "epoch": 0.91, + "grad_norm": 0.5659456858931539, + "learning_rate": 4.2567041552720066e-07, + "loss": 0.2265, + "step": 19800 + }, + { + "epoch": 0.91, + "grad_norm": 0.28436534322503054, + "learning_rate": 4.252410251026495e-07, + "loss": 0.1946, + "step": 19801 + }, + { + "epoch": 0.91, + "grad_norm": 0.46856686153181293, + "learning_rate": 4.248118466540352e-07, + "loss": 0.2606, + "step": 19802 + }, + { + "epoch": 0.91, + "grad_norm": 0.5102042285757433, + "learning_rate": 4.243828801908578e-07, + "loss": 0.3124, + "step": 19803 + }, + { + "epoch": 0.91, + "grad_norm": 0.4817684645473332, + "learning_rate": 4.239541257226132e-07, + "loss": 0.3589, + "step": 19804 + }, + { + "epoch": 0.91, + "grad_norm": 0.3377212346069086, + "learning_rate": 4.2352558325879477e-07, + "loss": 0.1892, + "step": 19805 + }, + { + "epoch": 0.91, + "grad_norm": 0.302474484727652, + "learning_rate": 4.230972528088917e-07, + "loss": 0.226, + "step": 19806 + }, + { + "epoch": 0.91, + "grad_norm": 0.5720702123811231, + "learning_rate": 4.2266913438238524e-07, + "loss": 0.2241, + "step": 19807 + }, + { + "epoch": 0.91, + "grad_norm": 0.36213143987426427, + "learning_rate": 4.2224122798875e-07, + "loss": 0.1763, + "step": 19808 + }, + { + "epoch": 0.91, + "grad_norm": 0.33069920549693566, + "learning_rate": 4.2181353363746624e-07, + "loss": 0.2846, + "step": 19809 + }, + { + "epoch": 0.91, + "grad_norm": 0.9110626429122548, + "learning_rate": 4.213860513379975e-07, + "loss": 0.4987, + "step": 19810 + }, + { + "epoch": 0.91, + "grad_norm": 0.339500681998412, + "learning_rate": 4.209587810998117e-07, + "loss": 0.1925, + "step": 19811 + }, + { + "epoch": 0.91, + "grad_norm": 0.2544588755551442, + "learning_rate": 4.2053172293236354e-07, + "loss": 0.1958, + "step": 19812 + }, + { + "epoch": 0.91, + "grad_norm": 0.4021873665829171, + "learning_rate": 4.2010487684511105e-07, + "loss": 0.2933, + "step": 19813 + }, + { + "epoch": 0.91, + "grad_norm": 0.42210001958679977, + "learning_rate": 4.1967824284750436e-07, + "loss": 0.1948, + "step": 19814 + }, + { + "epoch": 0.91, + "grad_norm": 0.48653784944104306, + "learning_rate": 4.192518209489871e-07, + "loss": 0.3606, + "step": 19815 + }, + { + "epoch": 0.91, + "grad_norm": 1.2554167127531906, + "learning_rate": 4.1882561115900054e-07, + "loss": 0.5862, + "step": 19816 + }, + { + "epoch": 0.91, + "grad_norm": 0.342675420844382, + "learning_rate": 4.1839961348698054e-07, + "loss": 0.2464, + "step": 19817 + }, + { + "epoch": 0.91, + "grad_norm": 0.4176102567013411, + "learning_rate": 4.179738279423573e-07, + "loss": 0.2232, + "step": 19818 + }, + { + "epoch": 0.91, + "grad_norm": 0.4769599159867558, + "learning_rate": 4.1754825453455995e-07, + "loss": 0.2129, + "step": 19819 + }, + { + "epoch": 0.91, + "grad_norm": 0.5556131275289173, + "learning_rate": 4.171228932730065e-07, + "loss": 0.3087, + "step": 19820 + }, + { + "epoch": 0.91, + "grad_norm": 0.2779744591972851, + "learning_rate": 4.1669774416711715e-07, + "loss": 0.2379, + "step": 19821 + }, + { + "epoch": 0.91, + "grad_norm": 1.1230407746773776, + "learning_rate": 4.1627280722630224e-07, + "loss": 0.7306, + "step": 19822 + }, + { + "epoch": 0.91, + "grad_norm": 1.0068110536288042, + "learning_rate": 4.1584808245997313e-07, + "loss": 0.323, + "step": 19823 + }, + { + "epoch": 0.91, + "grad_norm": 0.28430074361406493, + "learning_rate": 4.1542356987752775e-07, + "loss": 0.1561, + "step": 19824 + }, + { + "epoch": 0.91, + "grad_norm": 0.2809496820285546, + "learning_rate": 4.149992694883664e-07, + "loss": 0.2396, + "step": 19825 + }, + { + "epoch": 0.91, + "grad_norm": 0.7192541618546603, + "learning_rate": 4.1457518130188613e-07, + "loss": 0.3517, + "step": 19826 + }, + { + "epoch": 0.91, + "grad_norm": 0.40111991365850913, + "learning_rate": 4.141513053274704e-07, + "loss": 0.2193, + "step": 19827 + }, + { + "epoch": 0.91, + "grad_norm": 1.407630646117947, + "learning_rate": 4.137276415745062e-07, + "loss": 0.7636, + "step": 19828 + }, + { + "epoch": 0.91, + "grad_norm": 0.29834461721221045, + "learning_rate": 4.1330419005237266e-07, + "loss": 0.2311, + "step": 19829 + }, + { + "epoch": 0.91, + "grad_norm": 0.44983202754844104, + "learning_rate": 4.128809507704445e-07, + "loss": 0.3446, + "step": 19830 + }, + { + "epoch": 0.91, + "grad_norm": 0.8206746406892417, + "learning_rate": 4.1245792373809213e-07, + "loss": 0.2768, + "step": 19831 + }, + { + "epoch": 0.91, + "grad_norm": 0.3059814248251906, + "learning_rate": 4.1203510896468124e-07, + "loss": 0.234, + "step": 19832 + }, + { + "epoch": 0.91, + "grad_norm": 0.25980440023167445, + "learning_rate": 4.116125064595722e-07, + "loss": 0.2337, + "step": 19833 + }, + { + "epoch": 0.91, + "grad_norm": 1.1630961426503912, + "learning_rate": 4.111901162321175e-07, + "loss": 0.5136, + "step": 19834 + }, + { + "epoch": 0.91, + "grad_norm": 0.6564656270625742, + "learning_rate": 4.107679382916763e-07, + "loss": 0.3237, + "step": 19835 + }, + { + "epoch": 0.91, + "grad_norm": 0.43015717468762144, + "learning_rate": 4.103459726475889e-07, + "loss": 0.2954, + "step": 19836 + }, + { + "epoch": 0.91, + "grad_norm": 0.3782946871726423, + "learning_rate": 4.09924219309199e-07, + "loss": 0.265, + "step": 19837 + }, + { + "epoch": 0.91, + "grad_norm": 0.34221109465433935, + "learning_rate": 4.095026782858436e-07, + "loss": 0.1657, + "step": 19838 + }, + { + "epoch": 0.91, + "grad_norm": 0.4019943113102182, + "learning_rate": 4.0908134958685506e-07, + "loss": 0.2946, + "step": 19839 + }, + { + "epoch": 0.91, + "grad_norm": 0.7192032351701517, + "learning_rate": 4.086602332215628e-07, + "loss": 0.3832, + "step": 19840 + }, + { + "epoch": 0.91, + "grad_norm": 0.56774413506875, + "learning_rate": 4.082393291992881e-07, + "loss": 0.2147, + "step": 19841 + }, + { + "epoch": 0.91, + "grad_norm": 0.3993334499086172, + "learning_rate": 4.0781863752935026e-07, + "loss": 0.2656, + "step": 19842 + }, + { + "epoch": 0.91, + "grad_norm": 0.6146832923142538, + "learning_rate": 4.073981582210629e-07, + "loss": 0.3479, + "step": 19843 + }, + { + "epoch": 0.91, + "grad_norm": 0.2644310243021898, + "learning_rate": 4.06977891283733e-07, + "loss": 0.1496, + "step": 19844 + }, + { + "epoch": 0.91, + "grad_norm": 0.24713858658997417, + "learning_rate": 4.065578367266698e-07, + "loss": 0.2089, + "step": 19845 + }, + { + "epoch": 0.91, + "grad_norm": 1.2982743337991483, + "learning_rate": 4.0613799455916704e-07, + "loss": 0.7555, + "step": 19846 + }, + { + "epoch": 0.91, + "grad_norm": 0.7103384837908386, + "learning_rate": 4.0571836479052384e-07, + "loss": 0.1892, + "step": 19847 + }, + { + "epoch": 0.91, + "grad_norm": 0.3428313305646021, + "learning_rate": 4.052989474300295e-07, + "loss": 0.2492, + "step": 19848 + }, + { + "epoch": 0.91, + "grad_norm": 0.4855649379008054, + "learning_rate": 4.048797424869677e-07, + "loss": 0.3177, + "step": 19849 + }, + { + "epoch": 0.91, + "grad_norm": 0.7677384446727405, + "learning_rate": 4.04460749970621e-07, + "loss": 0.1094, + "step": 19850 + }, + { + "epoch": 0.91, + "grad_norm": 0.3160381517904302, + "learning_rate": 4.0404196989026313e-07, + "loss": 0.2319, + "step": 19851 + }, + { + "epoch": 0.91, + "grad_norm": 0.49577837297947086, + "learning_rate": 4.036234022551711e-07, + "loss": 0.3736, + "step": 19852 + }, + { + "epoch": 0.91, + "grad_norm": 0.3156342121526298, + "learning_rate": 4.032050470746063e-07, + "loss": 0.2291, + "step": 19853 + }, + { + "epoch": 0.91, + "grad_norm": 0.35343553640327374, + "learning_rate": 4.027869043578314e-07, + "loss": 0.213, + "step": 19854 + }, + { + "epoch": 0.91, + "grad_norm": 0.8590904377050814, + "learning_rate": 4.023689741141046e-07, + "loss": 0.3757, + "step": 19855 + }, + { + "epoch": 0.91, + "grad_norm": 0.35040191760095213, + "learning_rate": 4.019512563526784e-07, + "loss": 0.2573, + "step": 19856 + }, + { + "epoch": 0.91, + "grad_norm": 0.24742512969226757, + "learning_rate": 4.0153375108280104e-07, + "loss": 0.158, + "step": 19857 + }, + { + "epoch": 0.91, + "grad_norm": 0.41920099643243186, + "learning_rate": 4.0111645831371506e-07, + "loss": 0.2934, + "step": 19858 + }, + { + "epoch": 0.91, + "grad_norm": 0.7967682088857647, + "learning_rate": 4.0069937805466084e-07, + "loss": 0.3294, + "step": 19859 + }, + { + "epoch": 0.91, + "grad_norm": 0.3020426522857551, + "learning_rate": 4.0028251031486775e-07, + "loss": 0.2152, + "step": 19860 + }, + { + "epoch": 0.91, + "grad_norm": 0.5039197975907477, + "learning_rate": 3.9986585510356835e-07, + "loss": 0.2954, + "step": 19861 + }, + { + "epoch": 0.91, + "grad_norm": 3.8899929926924717, + "learning_rate": 3.9944941242998747e-07, + "loss": 0.5789, + "step": 19862 + }, + { + "epoch": 0.91, + "grad_norm": 0.3106495163642383, + "learning_rate": 3.990331823033422e-07, + "loss": 0.2034, + "step": 19863 + }, + { + "epoch": 0.91, + "grad_norm": 0.48808622301736526, + "learning_rate": 3.986171647328496e-07, + "loss": 0.3692, + "step": 19864 + }, + { + "epoch": 0.91, + "grad_norm": 0.3331986018138216, + "learning_rate": 3.98201359727719e-07, + "loss": 0.2435, + "step": 19865 + }, + { + "epoch": 0.91, + "grad_norm": 0.3477163095835327, + "learning_rate": 3.977857672971552e-07, + "loss": 0.2698, + "step": 19866 + }, + { + "epoch": 0.91, + "grad_norm": 0.4475541027831273, + "learning_rate": 3.9737038745036095e-07, + "loss": 0.0954, + "step": 19867 + }, + { + "epoch": 0.91, + "grad_norm": 0.3961288873255856, + "learning_rate": 3.9695522019652874e-07, + "loss": 0.2919, + "step": 19868 + }, + { + "epoch": 0.91, + "grad_norm": 0.31665232384926084, + "learning_rate": 3.965402655448547e-07, + "loss": 0.2511, + "step": 19869 + }, + { + "epoch": 0.91, + "grad_norm": 0.9095326132532182, + "learning_rate": 3.9612552350452247e-07, + "loss": 0.337, + "step": 19870 + }, + { + "epoch": 0.91, + "grad_norm": 0.42779134732612795, + "learning_rate": 3.9571099408471594e-07, + "loss": 0.2174, + "step": 19871 + }, + { + "epoch": 0.91, + "grad_norm": 0.37312409874350105, + "learning_rate": 3.9529667729461094e-07, + "loss": 0.2621, + "step": 19872 + }, + { + "epoch": 0.91, + "grad_norm": 0.31454070674186574, + "learning_rate": 3.948825731433781e-07, + "loss": 0.192, + "step": 19873 + }, + { + "epoch": 0.91, + "grad_norm": 0.9516343102382538, + "learning_rate": 3.9446868164018993e-07, + "loss": 0.3685, + "step": 19874 + }, + { + "epoch": 0.91, + "grad_norm": 0.350510324429391, + "learning_rate": 3.9405500279420583e-07, + "loss": 0.2711, + "step": 19875 + }, + { + "epoch": 0.91, + "grad_norm": 0.5877969612030379, + "learning_rate": 3.936415366145874e-07, + "loss": 0.2538, + "step": 19876 + }, + { + "epoch": 0.91, + "grad_norm": 0.7231314208777663, + "learning_rate": 3.93228283110485e-07, + "loss": 0.3876, + "step": 19877 + }, + { + "epoch": 0.91, + "grad_norm": 0.34470765887975996, + "learning_rate": 3.928152422910492e-07, + "loss": 0.2449, + "step": 19878 + }, + { + "epoch": 0.91, + "grad_norm": 0.28969117992946786, + "learning_rate": 3.924024141654259e-07, + "loss": 0.1546, + "step": 19879 + }, + { + "epoch": 0.91, + "grad_norm": 0.41522483250863024, + "learning_rate": 3.9198979874275235e-07, + "loss": 0.2313, + "step": 19880 + }, + { + "epoch": 0.91, + "grad_norm": 0.3476039626720442, + "learning_rate": 3.9157739603216337e-07, + "loss": 0.2621, + "step": 19881 + }, + { + "epoch": 0.91, + "grad_norm": 0.7238059127524109, + "learning_rate": 3.9116520604279285e-07, + "loss": 0.4561, + "step": 19882 + }, + { + "epoch": 0.91, + "grad_norm": 0.9597618642870126, + "learning_rate": 3.907532287837612e-07, + "loss": 0.2829, + "step": 19883 + }, + { + "epoch": 0.91, + "grad_norm": 0.2825385672652789, + "learning_rate": 3.903414642641923e-07, + "loss": 0.2507, + "step": 19884 + }, + { + "epoch": 0.91, + "grad_norm": 0.2630026951721663, + "learning_rate": 3.899299124931999e-07, + "loss": 0.1959, + "step": 19885 + }, + { + "epoch": 0.91, + "grad_norm": 1.5364991096522524, + "learning_rate": 3.89518573479899e-07, + "loss": 0.3017, + "step": 19886 + }, + { + "epoch": 0.91, + "grad_norm": 0.3425652702646359, + "learning_rate": 3.8910744723339334e-07, + "loss": 0.2404, + "step": 19887 + }, + { + "epoch": 0.91, + "grad_norm": 0.43316565911924715, + "learning_rate": 3.8869653376278684e-07, + "loss": 0.282, + "step": 19888 + }, + { + "epoch": 0.91, + "grad_norm": 0.7836103390204027, + "learning_rate": 3.8828583307717435e-07, + "loss": 0.3245, + "step": 19889 + }, + { + "epoch": 0.91, + "grad_norm": 0.41183017602888133, + "learning_rate": 3.878753451856487e-07, + "loss": 0.2661, + "step": 19890 + }, + { + "epoch": 0.91, + "grad_norm": 0.28630296931988164, + "learning_rate": 3.874650700973015e-07, + "loss": 0.218, + "step": 19891 + }, + { + "epoch": 0.91, + "grad_norm": 0.41275176075058767, + "learning_rate": 3.8705500782121096e-07, + "loss": 0.2958, + "step": 19892 + }, + { + "epoch": 0.91, + "grad_norm": 0.31717182080086875, + "learning_rate": 3.866451583664588e-07, + "loss": 0.1801, + "step": 19893 + }, + { + "epoch": 0.91, + "grad_norm": 0.6167852749254489, + "learning_rate": 3.862355217421154e-07, + "loss": 0.3549, + "step": 19894 + }, + { + "epoch": 0.91, + "grad_norm": 1.3392562587555885, + "learning_rate": 3.8582609795725256e-07, + "loss": 0.5543, + "step": 19895 + }, + { + "epoch": 0.91, + "grad_norm": 0.27867943987355087, + "learning_rate": 3.8541688702093405e-07, + "loss": 0.199, + "step": 19896 + }, + { + "epoch": 0.91, + "grad_norm": 0.2837445480732412, + "learning_rate": 3.850078889422182e-07, + "loss": 0.2144, + "step": 19897 + }, + { + "epoch": 0.91, + "grad_norm": 1.6390383584427333, + "learning_rate": 3.8459910373016105e-07, + "loss": 0.7468, + "step": 19898 + }, + { + "epoch": 0.91, + "grad_norm": 0.3334933277665799, + "learning_rate": 3.841905313938121e-07, + "loss": 0.2119, + "step": 19899 + }, + { + "epoch": 0.91, + "grad_norm": 0.3799398315581481, + "learning_rate": 3.8378217194221743e-07, + "loss": 0.3103, + "step": 19900 + }, + { + "epoch": 0.91, + "grad_norm": 1.3136629268529836, + "learning_rate": 3.833740253844187e-07, + "loss": 0.8052, + "step": 19901 + }, + { + "epoch": 0.91, + "grad_norm": 0.3145367882430307, + "learning_rate": 3.829660917294475e-07, + "loss": 0.1829, + "step": 19902 + }, + { + "epoch": 0.91, + "grad_norm": 0.3369501483409386, + "learning_rate": 3.8255837098633897e-07, + "loss": 0.159, + "step": 19903 + }, + { + "epoch": 0.91, + "grad_norm": 0.399800259503826, + "learning_rate": 3.821508631641191e-07, + "loss": 0.2778, + "step": 19904 + }, + { + "epoch": 0.91, + "grad_norm": 0.347008833254004, + "learning_rate": 3.817435682718096e-07, + "loss": 0.2636, + "step": 19905 + }, + { + "epoch": 0.91, + "grad_norm": 0.8975740526911679, + "learning_rate": 3.813364863184266e-07, + "loss": 0.3296, + "step": 19906 + }, + { + "epoch": 0.91, + "grad_norm": 0.5110464300875857, + "learning_rate": 3.8092961731298285e-07, + "loss": 0.3255, + "step": 19907 + }, + { + "epoch": 0.91, + "grad_norm": 0.40564447347518345, + "learning_rate": 3.8052296126448897e-07, + "loss": 0.2604, + "step": 19908 + }, + { + "epoch": 0.91, + "grad_norm": 0.2694780948973626, + "learning_rate": 3.8011651818194216e-07, + "loss": 0.151, + "step": 19909 + }, + { + "epoch": 0.91, + "grad_norm": 0.7635059801695246, + "learning_rate": 3.797102880743464e-07, + "loss": 0.3983, + "step": 19910 + }, + { + "epoch": 0.91, + "grad_norm": 0.7612143127809361, + "learning_rate": 3.793042709506911e-07, + "loss": 0.3141, + "step": 19911 + }, + { + "epoch": 0.91, + "grad_norm": 0.27084523528884896, + "learning_rate": 3.788984668199669e-07, + "loss": 0.2426, + "step": 19912 + }, + { + "epoch": 0.91, + "grad_norm": 1.3569523326147956, + "learning_rate": 3.7849287569115876e-07, + "loss": 0.5478, + "step": 19913 + }, + { + "epoch": 0.91, + "grad_norm": 0.45821735212188525, + "learning_rate": 3.7808749757324293e-07, + "loss": 0.2701, + "step": 19914 + }, + { + "epoch": 0.91, + "grad_norm": 0.22000263009414384, + "learning_rate": 3.776823324751977e-07, + "loss": 0.1565, + "step": 19915 + }, + { + "epoch": 0.91, + "grad_norm": 0.5172344714932559, + "learning_rate": 3.772773804059904e-07, + "loss": 0.3137, + "step": 19916 + }, + { + "epoch": 0.91, + "grad_norm": 0.4059399426169871, + "learning_rate": 3.768726413745893e-07, + "loss": 0.2852, + "step": 19917 + }, + { + "epoch": 0.92, + "grad_norm": 0.6134693094137341, + "learning_rate": 3.76468115389953e-07, + "loss": 0.3633, + "step": 19918 + }, + { + "epoch": 0.92, + "grad_norm": 0.5443067207022229, + "learning_rate": 3.7606380246103524e-07, + "loss": 0.2229, + "step": 19919 + }, + { + "epoch": 0.92, + "grad_norm": 0.3358937803822818, + "learning_rate": 3.756597025967901e-07, + "loss": 0.2417, + "step": 19920 + }, + { + "epoch": 0.92, + "grad_norm": 0.5816465794835072, + "learning_rate": 3.7525581580616255e-07, + "loss": 0.3236, + "step": 19921 + }, + { + "epoch": 0.92, + "grad_norm": 0.39927751934554173, + "learning_rate": 3.748521420980966e-07, + "loss": 0.1665, + "step": 19922 + }, + { + "epoch": 0.92, + "grad_norm": 0.4316333650601254, + "learning_rate": 3.7444868148152627e-07, + "loss": 0.2648, + "step": 19923 + }, + { + "epoch": 0.92, + "grad_norm": 0.3178169485301669, + "learning_rate": 3.740454339653843e-07, + "loss": 0.2805, + "step": 19924 + }, + { + "epoch": 0.92, + "grad_norm": 1.5372655801819484, + "learning_rate": 3.7364239955860025e-07, + "loss": 0.3753, + "step": 19925 + }, + { + "epoch": 0.92, + "grad_norm": 0.7422869460697129, + "learning_rate": 3.732395782700937e-07, + "loss": 0.3136, + "step": 19926 + }, + { + "epoch": 0.92, + "grad_norm": 0.4546342004984768, + "learning_rate": 3.7283697010878636e-07, + "loss": 0.2651, + "step": 19927 + }, + { + "epoch": 0.92, + "grad_norm": 0.28318483006238593, + "learning_rate": 3.7243457508358784e-07, + "loss": 0.2162, + "step": 19928 + }, + { + "epoch": 0.92, + "grad_norm": 0.3580493502161358, + "learning_rate": 3.720323932034098e-07, + "loss": 0.1958, + "step": 19929 + }, + { + "epoch": 0.92, + "grad_norm": 0.4147679704134558, + "learning_rate": 3.7163042447715627e-07, + "loss": 0.3199, + "step": 19930 + }, + { + "epoch": 0.92, + "grad_norm": 0.5587071754929154, + "learning_rate": 3.7122866891372346e-07, + "loss": 0.385, + "step": 19931 + }, + { + "epoch": 0.92, + "grad_norm": 0.3072101987997162, + "learning_rate": 3.708271265220087e-07, + "loss": 0.1734, + "step": 19932 + }, + { + "epoch": 0.92, + "grad_norm": 0.4185232243988091, + "learning_rate": 3.704257973108982e-07, + "loss": 0.2854, + "step": 19933 + }, + { + "epoch": 0.92, + "grad_norm": 0.8722022609450718, + "learning_rate": 3.700246812892816e-07, + "loss": 0.4461, + "step": 19934 + }, + { + "epoch": 0.92, + "grad_norm": 0.170759954143786, + "learning_rate": 3.6962377846603614e-07, + "loss": 0.1141, + "step": 19935 + }, + { + "epoch": 0.92, + "grad_norm": 0.31802085764689164, + "learning_rate": 3.692230888500392e-07, + "loss": 0.2947, + "step": 19936 + }, + { + "epoch": 0.92, + "grad_norm": 1.3710467997212215, + "learning_rate": 3.6882261245016036e-07, + "loss": 0.5155, + "step": 19937 + }, + { + "epoch": 0.92, + "grad_norm": 0.4232609728695872, + "learning_rate": 3.6842234927526587e-07, + "loss": 0.1792, + "step": 19938 + }, + { + "epoch": 0.92, + "grad_norm": 0.5122152006494809, + "learning_rate": 3.680222993342186e-07, + "loss": 0.3297, + "step": 19939 + }, + { + "epoch": 0.92, + "grad_norm": 0.3856510584094294, + "learning_rate": 3.6762246263587265e-07, + "loss": 0.2865, + "step": 19940 + }, + { + "epoch": 0.92, + "grad_norm": 0.3669151614645981, + "learning_rate": 3.6722283918908195e-07, + "loss": 0.1464, + "step": 19941 + }, + { + "epoch": 0.92, + "grad_norm": 0.6175031000521726, + "learning_rate": 3.668234290026951e-07, + "loss": 0.4005, + "step": 19942 + }, + { + "epoch": 0.92, + "grad_norm": 0.34574251370342707, + "learning_rate": 3.6642423208555157e-07, + "loss": 0.2821, + "step": 19943 + }, + { + "epoch": 0.92, + "grad_norm": 0.7628507861902257, + "learning_rate": 3.66025248446491e-07, + "loss": 0.3895, + "step": 19944 + }, + { + "epoch": 0.92, + "grad_norm": 0.36283444785113733, + "learning_rate": 3.656264780943441e-07, + "loss": 0.2262, + "step": 19945 + }, + { + "epoch": 0.92, + "grad_norm": 1.010684942178908, + "learning_rate": 3.652279210379439e-07, + "loss": 0.4187, + "step": 19946 + }, + { + "epoch": 0.92, + "grad_norm": 0.42642223186166195, + "learning_rate": 3.6482957728611213e-07, + "loss": 0.3192, + "step": 19947 + }, + { + "epoch": 0.92, + "grad_norm": 0.30212769143575047, + "learning_rate": 3.644314468476651e-07, + "loss": 0.2273, + "step": 19948 + }, + { + "epoch": 0.92, + "grad_norm": 0.29573742166870615, + "learning_rate": 3.6403352973141904e-07, + "loss": 0.1877, + "step": 19949 + }, + { + "epoch": 0.92, + "grad_norm": 0.8086591971582502, + "learning_rate": 3.6363582594618254e-07, + "loss": 0.3001, + "step": 19950 + }, + { + "epoch": 0.92, + "grad_norm": 0.3343840287563893, + "learning_rate": 3.632383355007629e-07, + "loss": 0.2168, + "step": 19951 + }, + { + "epoch": 0.92, + "grad_norm": 0.5495309458733093, + "learning_rate": 3.6284105840395765e-07, + "loss": 0.3031, + "step": 19952 + }, + { + "epoch": 0.92, + "grad_norm": 0.7845803857087339, + "learning_rate": 3.6244399466456415e-07, + "loss": 0.4598, + "step": 19953 + }, + { + "epoch": 0.92, + "grad_norm": 0.25964317178650065, + "learning_rate": 3.6204714429136976e-07, + "loss": 0.1783, + "step": 19954 + }, + { + "epoch": 0.92, + "grad_norm": 0.3148871484035735, + "learning_rate": 3.616505072931631e-07, + "loss": 0.2129, + "step": 19955 + }, + { + "epoch": 0.92, + "grad_norm": 0.4485615431833554, + "learning_rate": 3.6125408367872594e-07, + "loss": 0.2765, + "step": 19956 + }, + { + "epoch": 0.92, + "grad_norm": 0.4111445671733057, + "learning_rate": 3.6085787345683243e-07, + "loss": 0.2691, + "step": 19957 + }, + { + "epoch": 0.92, + "grad_norm": 1.0002068188321827, + "learning_rate": 3.604618766362544e-07, + "loss": 0.2423, + "step": 19958 + }, + { + "epoch": 0.92, + "grad_norm": 0.3517317134963925, + "learning_rate": 3.600660932257616e-07, + "loss": 0.2561, + "step": 19959 + }, + { + "epoch": 0.92, + "grad_norm": 0.4126417800009638, + "learning_rate": 3.5967052323411354e-07, + "loss": 0.2857, + "step": 19960 + }, + { + "epoch": 0.92, + "grad_norm": 0.4812825944824377, + "learning_rate": 3.592751666700689e-07, + "loss": 0.2111, + "step": 19961 + }, + { + "epoch": 0.92, + "grad_norm": 0.40297684145969614, + "learning_rate": 3.588800235423795e-07, + "loss": 0.2186, + "step": 19962 + }, + { + "epoch": 0.92, + "grad_norm": 0.4201351728328389, + "learning_rate": 3.584850938597939e-07, + "loss": 0.2848, + "step": 19963 + }, + { + "epoch": 0.92, + "grad_norm": 0.35639618481861624, + "learning_rate": 3.5809037763105626e-07, + "loss": 0.223, + "step": 19964 + }, + { + "epoch": 0.92, + "grad_norm": 0.8370608424754384, + "learning_rate": 3.5769587486490507e-07, + "loss": 0.429, + "step": 19965 + }, + { + "epoch": 0.92, + "grad_norm": 0.3897325122925885, + "learning_rate": 3.5730158557007454e-07, + "loss": 0.2921, + "step": 19966 + }, + { + "epoch": 0.92, + "grad_norm": 0.34398365335995246, + "learning_rate": 3.5690750975528986e-07, + "loss": 0.2717, + "step": 19967 + }, + { + "epoch": 0.92, + "grad_norm": 0.41074221218611123, + "learning_rate": 3.5651364742928186e-07, + "loss": 0.1565, + "step": 19968 + }, + { + "epoch": 0.92, + "grad_norm": 0.32279580429885923, + "learning_rate": 3.5611999860076683e-07, + "loss": 0.2269, + "step": 19969 + }, + { + "epoch": 0.92, + "grad_norm": 1.7469096812840725, + "learning_rate": 3.557265632784601e-07, + "loss": 0.6329, + "step": 19970 + }, + { + "epoch": 0.92, + "grad_norm": 0.35920937373869566, + "learning_rate": 3.5533334147107133e-07, + "loss": 0.2284, + "step": 19971 + }, + { + "epoch": 0.92, + "grad_norm": 0.36816665907220764, + "learning_rate": 3.549403331873058e-07, + "loss": 0.2724, + "step": 19972 + }, + { + "epoch": 0.92, + "grad_norm": 0.8298626092969419, + "learning_rate": 3.5454753843586767e-07, + "loss": 0.4264, + "step": 19973 + }, + { + "epoch": 0.92, + "grad_norm": 0.3157006719959381, + "learning_rate": 3.541549572254488e-07, + "loss": 0.1435, + "step": 19974 + }, + { + "epoch": 0.92, + "grad_norm": 0.36797205745718725, + "learning_rate": 3.537625895647423e-07, + "loss": 0.2852, + "step": 19975 + }, + { + "epoch": 0.92, + "grad_norm": 0.3677344099615494, + "learning_rate": 3.533704354624368e-07, + "loss": 0.267, + "step": 19976 + }, + { + "epoch": 0.92, + "grad_norm": 0.6598532283083989, + "learning_rate": 3.5297849492721083e-07, + "loss": 0.1672, + "step": 19977 + }, + { + "epoch": 0.92, + "grad_norm": 0.41842781174059906, + "learning_rate": 3.525867679677442e-07, + "loss": 0.3241, + "step": 19978 + }, + { + "epoch": 0.92, + "grad_norm": 0.36563236528870313, + "learning_rate": 3.521952545927065e-07, + "loss": 0.2908, + "step": 19979 + }, + { + "epoch": 0.92, + "grad_norm": 0.9384831011623197, + "learning_rate": 3.5180395481076767e-07, + "loss": 0.4577, + "step": 19980 + }, + { + "epoch": 0.92, + "grad_norm": 0.23270048304817367, + "learning_rate": 3.5141286863059064e-07, + "loss": 0.1523, + "step": 19981 + }, + { + "epoch": 0.92, + "grad_norm": 0.5898310376648472, + "learning_rate": 3.510219960608341e-07, + "loss": 0.2481, + "step": 19982 + }, + { + "epoch": 0.92, + "grad_norm": 0.37189018436359517, + "learning_rate": 3.5063133711014884e-07, + "loss": 0.3239, + "step": 19983 + }, + { + "epoch": 0.92, + "grad_norm": 0.3358643306727903, + "learning_rate": 3.502408917871869e-07, + "loss": 0.2018, + "step": 19984 + }, + { + "epoch": 0.92, + "grad_norm": 0.6938782594785033, + "learning_rate": 3.4985066010059134e-07, + "loss": 0.3891, + "step": 19985 + }, + { + "epoch": 0.92, + "grad_norm": 0.5868070852709735, + "learning_rate": 3.4946064205899966e-07, + "loss": 0.2347, + "step": 19986 + }, + { + "epoch": 0.92, + "grad_norm": 0.25124001526846423, + "learning_rate": 3.4907083767105053e-07, + "loss": 0.2182, + "step": 19987 + }, + { + "epoch": 0.92, + "grad_norm": 0.41446750003183525, + "learning_rate": 3.4868124694536933e-07, + "loss": 0.2173, + "step": 19988 + }, + { + "epoch": 0.92, + "grad_norm": 0.9879616607875817, + "learning_rate": 3.482918698905835e-07, + "loss": 0.4844, + "step": 19989 + }, + { + "epoch": 0.92, + "grad_norm": 0.4015938346324124, + "learning_rate": 3.47902706515314e-07, + "loss": 0.2064, + "step": 19990 + }, + { + "epoch": 0.92, + "grad_norm": 0.3694916549469895, + "learning_rate": 3.47513756828175e-07, + "loss": 0.2805, + "step": 19991 + }, + { + "epoch": 0.92, + "grad_norm": 1.476889715975262, + "learning_rate": 3.4712502083777964e-07, + "loss": 0.5958, + "step": 19992 + }, + { + "epoch": 0.92, + "grad_norm": 0.35226314732815156, + "learning_rate": 3.4673649855272997e-07, + "loss": 0.2409, + "step": 19993 + }, + { + "epoch": 0.92, + "grad_norm": 0.25530260015267053, + "learning_rate": 3.4634818998163233e-07, + "loss": 0.0776, + "step": 19994 + }, + { + "epoch": 0.92, + "grad_norm": 0.37727041974845216, + "learning_rate": 3.4596009513308215e-07, + "loss": 0.3275, + "step": 19995 + }, + { + "epoch": 0.92, + "grad_norm": 0.462614583444022, + "learning_rate": 3.455722140156692e-07, + "loss": 0.2968, + "step": 19996 + }, + { + "epoch": 0.92, + "grad_norm": 0.5230042679034724, + "learning_rate": 3.451845466379833e-07, + "loss": 0.273, + "step": 19997 + }, + { + "epoch": 0.92, + "grad_norm": 0.5999673071788141, + "learning_rate": 3.4479709300860533e-07, + "loss": 0.3514, + "step": 19998 + }, + { + "epoch": 0.92, + "grad_norm": 0.36288271706838937, + "learning_rate": 3.4440985313611507e-07, + "loss": 0.2698, + "step": 19999 + }, + { + "epoch": 0.92, + "grad_norm": 0.2656433833317198, + "learning_rate": 3.4402282702908353e-07, + "loss": 0.1374, + "step": 20000 + }, + { + "epoch": 0.92, + "grad_norm": 0.6620025047564988, + "learning_rate": 3.436360146960793e-07, + "loss": 0.3731, + "step": 20001 + }, + { + "epoch": 0.92, + "grad_norm": 0.34183171104713556, + "learning_rate": 3.4324941614566897e-07, + "loss": 0.2587, + "step": 20002 + }, + { + "epoch": 0.92, + "grad_norm": 0.3407070963775404, + "learning_rate": 3.428630313864079e-07, + "loss": 0.2635, + "step": 20003 + }, + { + "epoch": 0.92, + "grad_norm": 1.583030481820721, + "learning_rate": 3.424768604268525e-07, + "loss": 0.7012, + "step": 20004 + }, + { + "epoch": 0.92, + "grad_norm": 0.3496434753458176, + "learning_rate": 3.420909032755504e-07, + "loss": 0.2338, + "step": 20005 + }, + { + "epoch": 0.92, + "grad_norm": 0.3458974792920106, + "learning_rate": 3.41705159941047e-07, + "loss": 0.1615, + "step": 20006 + }, + { + "epoch": 0.92, + "grad_norm": 0.3559367010334245, + "learning_rate": 3.4131963043188333e-07, + "loss": 0.2468, + "step": 20007 + }, + { + "epoch": 0.92, + "grad_norm": 0.3602301381721926, + "learning_rate": 3.4093431475659355e-07, + "loss": 0.252, + "step": 20008 + }, + { + "epoch": 0.92, + "grad_norm": 1.1940564430583402, + "learning_rate": 3.4054921292370977e-07, + "loss": 0.6941, + "step": 20009 + }, + { + "epoch": 0.92, + "grad_norm": 0.3566038208566618, + "learning_rate": 3.40164324941753e-07, + "loss": 0.2368, + "step": 20010 + }, + { + "epoch": 0.92, + "grad_norm": 0.32749568002746926, + "learning_rate": 3.397796508192519e-07, + "loss": 0.2292, + "step": 20011 + }, + { + "epoch": 0.92, + "grad_norm": 0.2995468963678539, + "learning_rate": 3.393951905647175e-07, + "loss": 0.147, + "step": 20012 + }, + { + "epoch": 0.92, + "grad_norm": 0.6057201736308819, + "learning_rate": 3.390109441866618e-07, + "loss": 0.2911, + "step": 20013 + }, + { + "epoch": 0.92, + "grad_norm": 0.4181620933261763, + "learning_rate": 3.386269116935914e-07, + "loss": 0.2674, + "step": 20014 + }, + { + "epoch": 0.92, + "grad_norm": 0.386383044920115, + "learning_rate": 3.382430930940106e-07, + "loss": 0.2965, + "step": 20015 + }, + { + "epoch": 0.92, + "grad_norm": 1.8742038713011822, + "learning_rate": 3.37859488396417e-07, + "loss": 0.2827, + "step": 20016 + }, + { + "epoch": 0.92, + "grad_norm": 0.4473341454023465, + "learning_rate": 3.3747609760929944e-07, + "loss": 0.2601, + "step": 20017 + }, + { + "epoch": 0.92, + "grad_norm": 0.389867358198897, + "learning_rate": 3.3709292074114887e-07, + "loss": 0.278, + "step": 20018 + }, + { + "epoch": 0.92, + "grad_norm": 0.3608077281210852, + "learning_rate": 3.367099578004496e-07, + "loss": 0.291, + "step": 20019 + }, + { + "epoch": 0.92, + "grad_norm": 0.26456942102633285, + "learning_rate": 3.3632720879567594e-07, + "loss": 0.1577, + "step": 20020 + }, + { + "epoch": 0.92, + "grad_norm": 1.2381364092853324, + "learning_rate": 3.359446737353056e-07, + "loss": 0.7711, + "step": 20021 + }, + { + "epoch": 0.92, + "grad_norm": 1.5681495020986826, + "learning_rate": 3.3556235262780513e-07, + "loss": 0.5365, + "step": 20022 + }, + { + "epoch": 0.92, + "grad_norm": 0.23284479839944283, + "learning_rate": 3.3518024548163887e-07, + "loss": 0.2078, + "step": 20023 + }, + { + "epoch": 0.92, + "grad_norm": 0.9606229493679549, + "learning_rate": 3.3479835230526894e-07, + "loss": 0.3788, + "step": 20024 + }, + { + "epoch": 0.92, + "grad_norm": 0.3315819184633779, + "learning_rate": 3.344166731071452e-07, + "loss": 0.2296, + "step": 20025 + }, + { + "epoch": 0.92, + "grad_norm": 0.3076619777026424, + "learning_rate": 3.340352078957232e-07, + "loss": 0.2067, + "step": 20026 + }, + { + "epoch": 0.92, + "grad_norm": 0.371517835866063, + "learning_rate": 3.336539566794428e-07, + "loss": 0.3008, + "step": 20027 + }, + { + "epoch": 0.92, + "grad_norm": 1.4125771820426318, + "learning_rate": 3.332729194667494e-07, + "loss": 0.5225, + "step": 20028 + }, + { + "epoch": 0.92, + "grad_norm": 0.31079195207070665, + "learning_rate": 3.3289209626607533e-07, + "loss": 0.1852, + "step": 20029 + }, + { + "epoch": 0.92, + "grad_norm": 0.592868706130989, + "learning_rate": 3.325114870858548e-07, + "loss": 0.329, + "step": 20030 + }, + { + "epoch": 0.92, + "grad_norm": 0.22287237990105, + "learning_rate": 3.3213109193451e-07, + "loss": 0.2122, + "step": 20031 + }, + { + "epoch": 0.92, + "grad_norm": 0.6407406001049274, + "learning_rate": 3.317509108204664e-07, + "loss": 0.2673, + "step": 20032 + }, + { + "epoch": 0.92, + "grad_norm": 0.4227412089128269, + "learning_rate": 3.3137094375213843e-07, + "loss": 0.2401, + "step": 20033 + }, + { + "epoch": 0.92, + "grad_norm": 0.38654054939632837, + "learning_rate": 3.309911907379393e-07, + "loss": 0.2991, + "step": 20034 + }, + { + "epoch": 0.92, + "grad_norm": 0.6643718408789809, + "learning_rate": 3.306116517862756e-07, + "loss": 0.2982, + "step": 20035 + }, + { + "epoch": 0.92, + "grad_norm": 0.4591282467617554, + "learning_rate": 3.3023232690555184e-07, + "loss": 0.2495, + "step": 20036 + }, + { + "epoch": 0.92, + "grad_norm": 0.5484319092214814, + "learning_rate": 3.298532161041634e-07, + "loss": 0.2405, + "step": 20037 + }, + { + "epoch": 0.92, + "grad_norm": 0.40665761788732585, + "learning_rate": 3.294743193905059e-07, + "loss": 0.2847, + "step": 20038 + }, + { + "epoch": 0.92, + "grad_norm": 0.25786155293640856, + "learning_rate": 3.2909563677296473e-07, + "loss": 0.2078, + "step": 20039 + }, + { + "epoch": 0.92, + "grad_norm": 1.7432892091035728, + "learning_rate": 3.287171682599255e-07, + "loss": 0.5314, + "step": 20040 + }, + { + "epoch": 0.92, + "grad_norm": 0.4401901052827708, + "learning_rate": 3.283389138597681e-07, + "loss": 0.2674, + "step": 20041 + }, + { + "epoch": 0.92, + "grad_norm": 0.4766102437945921, + "learning_rate": 3.279608735808637e-07, + "loss": 0.2498, + "step": 20042 + }, + { + "epoch": 0.92, + "grad_norm": 0.40673897023428773, + "learning_rate": 3.2758304743158554e-07, + "loss": 0.2801, + "step": 20043 + }, + { + "epoch": 0.92, + "grad_norm": 0.43782070327692807, + "learning_rate": 3.272054354202936e-07, + "loss": 0.2986, + "step": 20044 + }, + { + "epoch": 0.92, + "grad_norm": 0.567137151346914, + "learning_rate": 3.2682803755535233e-07, + "loss": 0.384, + "step": 20045 + }, + { + "epoch": 0.92, + "grad_norm": 0.24823178067374493, + "learning_rate": 3.26450853845115e-07, + "loss": 0.1771, + "step": 20046 + }, + { + "epoch": 0.92, + "grad_norm": 0.5892734116705268, + "learning_rate": 3.2607388429793274e-07, + "loss": 0.3082, + "step": 20047 + }, + { + "epoch": 0.92, + "grad_norm": 0.4274727084090879, + "learning_rate": 3.2569712892215e-07, + "loss": 0.2892, + "step": 20048 + }, + { + "epoch": 0.92, + "grad_norm": 1.4988576645200309, + "learning_rate": 3.2532058772610895e-07, + "loss": 0.325, + "step": 20049 + }, + { + "epoch": 0.92, + "grad_norm": 0.5794215876193486, + "learning_rate": 3.2494426071814523e-07, + "loss": 0.3087, + "step": 20050 + }, + { + "epoch": 0.92, + "grad_norm": 0.29156370079823435, + "learning_rate": 3.2456814790659096e-07, + "loss": 0.2716, + "step": 20051 + }, + { + "epoch": 0.92, + "grad_norm": 0.3212997203048011, + "learning_rate": 3.241922492997729e-07, + "loss": 0.1846, + "step": 20052 + }, + { + "epoch": 0.92, + "grad_norm": 0.6155676286535304, + "learning_rate": 3.238165649060121e-07, + "loss": 0.2636, + "step": 20053 + }, + { + "epoch": 0.92, + "grad_norm": 0.3167114780956871, + "learning_rate": 3.234410947336264e-07, + "loss": 0.273, + "step": 20054 + }, + { + "epoch": 0.92, + "grad_norm": 0.5896699451238753, + "learning_rate": 3.2306583879093023e-07, + "loss": 0.2108, + "step": 20055 + }, + { + "epoch": 0.92, + "grad_norm": 0.6688405500362632, + "learning_rate": 3.226907970862281e-07, + "loss": 0.4424, + "step": 20056 + }, + { + "epoch": 0.92, + "grad_norm": 0.3046481110032741, + "learning_rate": 3.223159696278244e-07, + "loss": 0.2135, + "step": 20057 + }, + { + "epoch": 0.92, + "grad_norm": 0.44683056504965746, + "learning_rate": 3.2194135642401705e-07, + "loss": 0.3062, + "step": 20058 + }, + { + "epoch": 0.92, + "grad_norm": 0.2966384546749176, + "learning_rate": 3.215669574831026e-07, + "loss": 0.1804, + "step": 20059 + }, + { + "epoch": 0.92, + "grad_norm": 0.42658652461904467, + "learning_rate": 3.211927728133668e-07, + "loss": 0.298, + "step": 20060 + }, + { + "epoch": 0.92, + "grad_norm": 1.364608308356298, + "learning_rate": 3.208188024230918e-07, + "loss": 0.5944, + "step": 20061 + }, + { + "epoch": 0.92, + "grad_norm": 0.30449796114658834, + "learning_rate": 3.204450463205633e-07, + "loss": 0.2113, + "step": 20062 + }, + { + "epoch": 0.92, + "grad_norm": 0.4215930756244218, + "learning_rate": 3.200715045140501e-07, + "loss": 0.3093, + "step": 20063 + }, + { + "epoch": 0.92, + "grad_norm": 0.597943612119744, + "learning_rate": 3.196981770118246e-07, + "loss": 0.3336, + "step": 20064 + }, + { + "epoch": 0.92, + "grad_norm": 0.15710787491637743, + "learning_rate": 3.193250638221512e-07, + "loss": 0.0718, + "step": 20065 + }, + { + "epoch": 0.92, + "grad_norm": 0.3299102389667, + "learning_rate": 3.1895216495329116e-07, + "loss": 0.2789, + "step": 20066 + }, + { + "epoch": 0.92, + "grad_norm": 0.5785092486359147, + "learning_rate": 3.1857948041349894e-07, + "loss": 0.3252, + "step": 20067 + }, + { + "epoch": 0.92, + "grad_norm": 0.5510607015579685, + "learning_rate": 3.1820701021102576e-07, + "loss": 0.2596, + "step": 20068 + }, + { + "epoch": 0.92, + "grad_norm": 0.37921533474901487, + "learning_rate": 3.1783475435411935e-07, + "loss": 0.2765, + "step": 20069 + }, + { + "epoch": 0.92, + "grad_norm": 0.36553048481916767, + "learning_rate": 3.174627128510188e-07, + "loss": 0.301, + "step": 20070 + }, + { + "epoch": 0.92, + "grad_norm": 0.403351157126921, + "learning_rate": 3.170908857099608e-07, + "loss": 0.1883, + "step": 20071 + }, + { + "epoch": 0.92, + "grad_norm": 0.25823888554696645, + "learning_rate": 3.167192729391799e-07, + "loss": 0.1668, + "step": 20072 + }, + { + "epoch": 0.92, + "grad_norm": 1.3356014895391508, + "learning_rate": 3.1634787454689954e-07, + "loss": 0.7151, + "step": 20073 + }, + { + "epoch": 0.92, + "grad_norm": 0.3369234828242105, + "learning_rate": 3.1597669054134417e-07, + "loss": 0.2886, + "step": 20074 + }, + { + "epoch": 0.92, + "grad_norm": 0.3370695209270014, + "learning_rate": 3.156057209307317e-07, + "loss": 0.2257, + "step": 20075 + }, + { + "epoch": 0.92, + "grad_norm": 0.7828519389273079, + "learning_rate": 3.152349657232756e-07, + "loss": 0.3961, + "step": 20076 + }, + { + "epoch": 0.92, + "grad_norm": 0.3088735810118458, + "learning_rate": 3.148644249271826e-07, + "loss": 0.1776, + "step": 20077 + }, + { + "epoch": 0.92, + "grad_norm": 0.29949245571140787, + "learning_rate": 3.1449409855065506e-07, + "loss": 0.209, + "step": 20078 + }, + { + "epoch": 0.92, + "grad_norm": 0.5805740139720473, + "learning_rate": 3.141239866018952e-07, + "loss": 0.3308, + "step": 20079 + }, + { + "epoch": 0.92, + "grad_norm": 0.6469044786353962, + "learning_rate": 3.1375408908909333e-07, + "loss": 0.4029, + "step": 20080 + }, + { + "epoch": 0.92, + "grad_norm": 0.37057449673291254, + "learning_rate": 3.133844060204416e-07, + "loss": 0.25, + "step": 20081 + }, + { + "epoch": 0.92, + "grad_norm": 0.3499481414055778, + "learning_rate": 3.130149374041225e-07, + "loss": 0.2549, + "step": 20082 + }, + { + "epoch": 0.92, + "grad_norm": 0.39585581118871255, + "learning_rate": 3.12645683248316e-07, + "loss": 0.1561, + "step": 20083 + }, + { + "epoch": 0.92, + "grad_norm": 0.29746354023328586, + "learning_rate": 3.122766435611979e-07, + "loss": 0.2345, + "step": 20084 + }, + { + "epoch": 0.92, + "grad_norm": 1.0207877008422335, + "learning_rate": 3.119078183509372e-07, + "loss": 0.3247, + "step": 20085 + }, + { + "epoch": 0.92, + "grad_norm": 0.35295820352837787, + "learning_rate": 3.115392076257007e-07, + "loss": 0.2717, + "step": 20086 + }, + { + "epoch": 0.92, + "grad_norm": 0.33301019449562125, + "learning_rate": 3.1117081139364626e-07, + "loss": 0.2275, + "step": 20087 + }, + { + "epoch": 0.92, + "grad_norm": 1.1992313954871763, + "learning_rate": 3.1080262966293294e-07, + "loss": 0.3053, + "step": 20088 + }, + { + "epoch": 0.92, + "grad_norm": 0.28900644081914445, + "learning_rate": 3.1043466244171204e-07, + "loss": 0.1703, + "step": 20089 + }, + { + "epoch": 0.92, + "grad_norm": 0.2900488723534963, + "learning_rate": 3.1006690973812704e-07, + "loss": 0.2485, + "step": 20090 + }, + { + "epoch": 0.92, + "grad_norm": 0.4906743368051409, + "learning_rate": 3.096993715603225e-07, + "loss": 0.2334, + "step": 20091 + }, + { + "epoch": 0.92, + "grad_norm": 0.5468764569104397, + "learning_rate": 3.09332047916433e-07, + "loss": 0.3645, + "step": 20092 + }, + { + "epoch": 0.92, + "grad_norm": 0.37309010189270414, + "learning_rate": 3.0896493881459323e-07, + "loss": 0.2748, + "step": 20093 + }, + { + "epoch": 0.92, + "grad_norm": 0.36642005857148335, + "learning_rate": 3.085980442629288e-07, + "loss": 0.2908, + "step": 20094 + }, + { + "epoch": 0.92, + "grad_norm": 0.5883005099219749, + "learning_rate": 3.0823136426956334e-07, + "loss": 0.1184, + "step": 20095 + }, + { + "epoch": 0.92, + "grad_norm": 0.2872717748195638, + "learning_rate": 3.078648988426147e-07, + "loss": 0.2148, + "step": 20096 + }, + { + "epoch": 0.92, + "grad_norm": 0.9988833964207855, + "learning_rate": 3.0749864799019426e-07, + "loss": 0.4919, + "step": 20097 + }, + { + "epoch": 0.92, + "grad_norm": 0.37152959692920323, + "learning_rate": 3.071326117204143e-07, + "loss": 0.2448, + "step": 20098 + }, + { + "epoch": 0.92, + "grad_norm": 0.36289021047111236, + "learning_rate": 3.067667900413751e-07, + "loss": 0.2675, + "step": 20099 + }, + { + "epoch": 0.92, + "grad_norm": 1.2371040243122153, + "learning_rate": 3.064011829611757e-07, + "loss": 0.6413, + "step": 20100 + }, + { + "epoch": 0.92, + "grad_norm": 0.3280054118548916, + "learning_rate": 3.06035790487913e-07, + "loss": 0.1618, + "step": 20101 + }, + { + "epoch": 0.92, + "grad_norm": 0.3793311838822268, + "learning_rate": 3.0567061262967376e-07, + "loss": 0.2664, + "step": 20102 + }, + { + "epoch": 0.92, + "grad_norm": 0.3762066444416537, + "learning_rate": 3.053056493945439e-07, + "loss": 0.2352, + "step": 20103 + }, + { + "epoch": 0.92, + "grad_norm": 0.8260370499677854, + "learning_rate": 3.0494090079060235e-07, + "loss": 0.3199, + "step": 20104 + }, + { + "epoch": 0.92, + "grad_norm": 0.3414318808237356, + "learning_rate": 3.0457636682592604e-07, + "loss": 0.2578, + "step": 20105 + }, + { + "epoch": 0.92, + "grad_norm": 0.3798873991042024, + "learning_rate": 3.042120475085852e-07, + "loss": 0.3121, + "step": 20106 + }, + { + "epoch": 0.92, + "grad_norm": 1.4831672630835995, + "learning_rate": 3.038479428466423e-07, + "loss": 0.5124, + "step": 20107 + }, + { + "epoch": 0.92, + "grad_norm": 0.2315393388058869, + "learning_rate": 3.0348405284816193e-07, + "loss": 0.1496, + "step": 20108 + }, + { + "epoch": 0.92, + "grad_norm": 0.46951042864463327, + "learning_rate": 3.031203775211988e-07, + "loss": 0.2618, + "step": 20109 + }, + { + "epoch": 0.92, + "grad_norm": 0.3643427511132734, + "learning_rate": 3.0275691687380536e-07, + "loss": 0.3299, + "step": 20110 + }, + { + "epoch": 0.92, + "grad_norm": 0.3113673064075314, + "learning_rate": 3.023936709140263e-07, + "loss": 0.2078, + "step": 20111 + }, + { + "epoch": 0.92, + "grad_norm": 1.2082536996703304, + "learning_rate": 3.020306396499062e-07, + "loss": 0.3945, + "step": 20112 + }, + { + "epoch": 0.92, + "grad_norm": 0.6297023765287655, + "learning_rate": 3.016678230894787e-07, + "loss": 0.3316, + "step": 20113 + }, + { + "epoch": 0.92, + "grad_norm": 0.21233945043759542, + "learning_rate": 3.0130522124077967e-07, + "loss": 0.1692, + "step": 20114 + }, + { + "epoch": 0.92, + "grad_norm": 0.741102810374366, + "learning_rate": 3.009428341118359e-07, + "loss": 0.3744, + "step": 20115 + }, + { + "epoch": 0.92, + "grad_norm": 0.4656100806538979, + "learning_rate": 3.005806617106677e-07, + "loss": 0.2786, + "step": 20116 + }, + { + "epoch": 0.92, + "grad_norm": 0.31951203519703775, + "learning_rate": 3.002187040452964e-07, + "loss": 0.1799, + "step": 20117 + }, + { + "epoch": 0.92, + "grad_norm": 0.44964500414831005, + "learning_rate": 2.9985696112373455e-07, + "loss": 0.2944, + "step": 20118 + }, + { + "epoch": 0.92, + "grad_norm": 0.9393446868217752, + "learning_rate": 2.9949543295398896e-07, + "loss": 0.4099, + "step": 20119 + }, + { + "epoch": 0.92, + "grad_norm": 0.42207292789399414, + "learning_rate": 2.991341195440678e-07, + "loss": 0.2628, + "step": 20120 + }, + { + "epoch": 0.92, + "grad_norm": 0.4152277544593938, + "learning_rate": 2.987730209019635e-07, + "loss": 0.2573, + "step": 20121 + }, + { + "epoch": 0.92, + "grad_norm": 0.3616595768622279, + "learning_rate": 2.984121370356774e-07, + "loss": 0.2691, + "step": 20122 + }, + { + "epoch": 0.92, + "grad_norm": 0.4323388260288412, + "learning_rate": 2.9805146795319537e-07, + "loss": 0.2807, + "step": 20123 + }, + { + "epoch": 0.92, + "grad_norm": 0.4065079422113088, + "learning_rate": 2.976910136625033e-07, + "loss": 0.1617, + "step": 20124 + }, + { + "epoch": 0.92, + "grad_norm": 0.5771983329855844, + "learning_rate": 2.973307741715803e-07, + "loss": 0.334, + "step": 20125 + }, + { + "epoch": 0.92, + "grad_norm": 0.29140134517512134, + "learning_rate": 2.969707494884022e-07, + "loss": 0.2543, + "step": 20126 + }, + { + "epoch": 0.92, + "grad_norm": 0.6597708414289951, + "learning_rate": 2.9661093962094045e-07, + "loss": 0.2806, + "step": 20127 + }, + { + "epoch": 0.92, + "grad_norm": 0.44319121406711504, + "learning_rate": 2.9625134457715975e-07, + "loss": 0.2802, + "step": 20128 + }, + { + "epoch": 0.92, + "grad_norm": 0.35362216422282455, + "learning_rate": 2.9589196436502267e-07, + "loss": 0.2499, + "step": 20129 + }, + { + "epoch": 0.92, + "grad_norm": 0.25176602956947075, + "learning_rate": 2.955327989924839e-07, + "loss": 0.1723, + "step": 20130 + }, + { + "epoch": 0.92, + "grad_norm": 0.7211725899673292, + "learning_rate": 2.9517384846749485e-07, + "loss": 0.3827, + "step": 20131 + }, + { + "epoch": 0.92, + "grad_norm": 0.33066685937795576, + "learning_rate": 2.9481511279800477e-07, + "loss": 0.2522, + "step": 20132 + }, + { + "epoch": 0.92, + "grad_norm": 1.043378444324432, + "learning_rate": 2.9445659199195285e-07, + "loss": 0.4287, + "step": 20133 + }, + { + "epoch": 0.92, + "grad_norm": 0.36333313517049026, + "learning_rate": 2.940982860572772e-07, + "loss": 0.256, + "step": 20134 + }, + { + "epoch": 0.92, + "grad_norm": 0.4172372244432512, + "learning_rate": 2.9374019500191255e-07, + "loss": 0.2558, + "step": 20135 + }, + { + "epoch": 0.93, + "grad_norm": 0.29884128577672325, + "learning_rate": 2.9338231883378365e-07, + "loss": 0.1826, + "step": 20136 + }, + { + "epoch": 0.93, + "grad_norm": 0.37566362178976254, + "learning_rate": 2.9302465756081646e-07, + "loss": 0.2597, + "step": 20137 + }, + { + "epoch": 0.93, + "grad_norm": 0.4286135849902286, + "learning_rate": 2.9266721119092454e-07, + "loss": 0.2546, + "step": 20138 + }, + { + "epoch": 0.93, + "grad_norm": 0.5682725374234371, + "learning_rate": 2.9230997973202724e-07, + "loss": 0.3201, + "step": 20139 + }, + { + "epoch": 0.93, + "grad_norm": 1.1895890465416745, + "learning_rate": 2.9195296319202927e-07, + "loss": 0.4468, + "step": 20140 + }, + { + "epoch": 0.93, + "grad_norm": 0.4467348731807258, + "learning_rate": 2.9159616157883763e-07, + "loss": 0.2986, + "step": 20141 + }, + { + "epoch": 0.93, + "grad_norm": 0.21482362384647072, + "learning_rate": 2.912395749003494e-07, + "loss": 0.2054, + "step": 20142 + }, + { + "epoch": 0.93, + "grad_norm": 0.6986049620345507, + "learning_rate": 2.9088320316445705e-07, + "loss": 0.2676, + "step": 20143 + }, + { + "epoch": 0.93, + "grad_norm": 0.4140355007499485, + "learning_rate": 2.905270463790555e-07, + "loss": 0.268, + "step": 20144 + }, + { + "epoch": 0.93, + "grad_norm": 0.41713146953259794, + "learning_rate": 2.9017110455202613e-07, + "loss": 0.2667, + "step": 20145 + }, + { + "epoch": 0.93, + "grad_norm": 0.5145376808125004, + "learning_rate": 2.8981537769125046e-07, + "loss": 0.3782, + "step": 20146 + }, + { + "epoch": 0.93, + "grad_norm": 0.3236385114088312, + "learning_rate": 2.894598658046033e-07, + "loss": 0.2034, + "step": 20147 + }, + { + "epoch": 0.93, + "grad_norm": 0.3428730147587895, + "learning_rate": 2.89104568899955e-07, + "loss": 0.1651, + "step": 20148 + }, + { + "epoch": 0.93, + "grad_norm": 0.3867476671522409, + "learning_rate": 2.887494869851737e-07, + "loss": 0.3001, + "step": 20149 + }, + { + "epoch": 0.93, + "grad_norm": 0.37520127391224767, + "learning_rate": 2.883946200681176e-07, + "loss": 0.2112, + "step": 20150 + }, + { + "epoch": 0.93, + "grad_norm": 1.4413669975833727, + "learning_rate": 2.880399681566437e-07, + "loss": 0.5489, + "step": 20151 + }, + { + "epoch": 0.93, + "grad_norm": 1.1805838556041948, + "learning_rate": 2.8768553125860577e-07, + "loss": 0.6865, + "step": 20152 + }, + { + "epoch": 0.93, + "grad_norm": 0.35754755360634777, + "learning_rate": 2.873313093818486e-07, + "loss": 0.184, + "step": 20153 + }, + { + "epoch": 0.93, + "grad_norm": 0.34890149420800903, + "learning_rate": 2.8697730253421595e-07, + "loss": 0.2686, + "step": 20154 + }, + { + "epoch": 0.93, + "grad_norm": 0.36238774177898353, + "learning_rate": 2.8662351072354267e-07, + "loss": 0.2115, + "step": 20155 + }, + { + "epoch": 0.93, + "grad_norm": 0.4159066836281099, + "learning_rate": 2.8626993395766467e-07, + "loss": 0.1725, + "step": 20156 + }, + { + "epoch": 0.93, + "grad_norm": 0.41600941336656855, + "learning_rate": 2.859165722444068e-07, + "loss": 0.2851, + "step": 20157 + }, + { + "epoch": 0.93, + "grad_norm": 0.5413607402466021, + "learning_rate": 2.8556342559159513e-07, + "loss": 0.3957, + "step": 20158 + }, + { + "epoch": 0.93, + "grad_norm": 0.6078644735670684, + "learning_rate": 2.852104940070455e-07, + "loss": 0.3534, + "step": 20159 + }, + { + "epoch": 0.93, + "grad_norm": 0.38913923473040934, + "learning_rate": 2.848577774985717e-07, + "loss": 0.2347, + "step": 20160 + }, + { + "epoch": 0.93, + "grad_norm": 0.3664355380307951, + "learning_rate": 2.8450527607398416e-07, + "loss": 0.2675, + "step": 20161 + }, + { + "epoch": 0.93, + "grad_norm": 0.33061116795768813, + "learning_rate": 2.8415298974108443e-07, + "loss": 0.2325, + "step": 20162 + }, + { + "epoch": 0.93, + "grad_norm": 0.5090773637124499, + "learning_rate": 2.838009185076751e-07, + "loss": 0.2171, + "step": 20163 + }, + { + "epoch": 0.93, + "grad_norm": 1.2690319142176463, + "learning_rate": 2.834490623815478e-07, + "loss": 0.7513, + "step": 20164 + }, + { + "epoch": 0.93, + "grad_norm": 0.551514320981443, + "learning_rate": 2.830974213704929e-07, + "loss": 0.2514, + "step": 20165 + }, + { + "epoch": 0.93, + "grad_norm": 0.3736412005902798, + "learning_rate": 2.827459954822964e-07, + "loss": 0.2258, + "step": 20166 + }, + { + "epoch": 0.93, + "grad_norm": 0.4707505814274513, + "learning_rate": 2.823947847247377e-07, + "loss": 0.2555, + "step": 20167 + }, + { + "epoch": 0.93, + "grad_norm": 0.3038351998527382, + "learning_rate": 2.820437891055927e-07, + "loss": 0.2047, + "step": 20168 + }, + { + "epoch": 0.93, + "grad_norm": 0.3963720453849057, + "learning_rate": 2.8169300863263084e-07, + "loss": 0.2082, + "step": 20169 + }, + { + "epoch": 0.93, + "grad_norm": 0.5413860526503246, + "learning_rate": 2.813424433136214e-07, + "loss": 0.3642, + "step": 20170 + }, + { + "epoch": 0.93, + "grad_norm": 0.5995151851571557, + "learning_rate": 2.809920931563226e-07, + "loss": 0.3172, + "step": 20171 + }, + { + "epoch": 0.93, + "grad_norm": 0.4619368588640355, + "learning_rate": 2.806419581684905e-07, + "loss": 0.2902, + "step": 20172 + }, + { + "epoch": 0.93, + "grad_norm": 0.3488466308094244, + "learning_rate": 2.802920383578778e-07, + "loss": 0.2324, + "step": 20173 + }, + { + "epoch": 0.93, + "grad_norm": 0.27232748118659855, + "learning_rate": 2.7994233373223155e-07, + "loss": 0.1684, + "step": 20174 + }, + { + "epoch": 0.93, + "grad_norm": 0.40474158641460867, + "learning_rate": 2.7959284429929456e-07, + "loss": 0.2721, + "step": 20175 + }, + { + "epoch": 0.93, + "grad_norm": 0.9626553578901039, + "learning_rate": 2.792435700668028e-07, + "loss": 0.3241, + "step": 20176 + }, + { + "epoch": 0.93, + "grad_norm": 0.31722322062937275, + "learning_rate": 2.78894511042489e-07, + "loss": 0.2604, + "step": 20177 + }, + { + "epoch": 0.93, + "grad_norm": 0.3895468554815851, + "learning_rate": 2.785456672340825e-07, + "loss": 0.294, + "step": 20178 + }, + { + "epoch": 0.93, + "grad_norm": 1.4939854993319297, + "learning_rate": 2.781970386493049e-07, + "loss": 0.2432, + "step": 20179 + }, + { + "epoch": 0.93, + "grad_norm": 0.2915987221868721, + "learning_rate": 2.7784862529587565e-07, + "loss": 0.1529, + "step": 20180 + }, + { + "epoch": 0.93, + "grad_norm": 0.28184409448982767, + "learning_rate": 2.7750042718150514e-07, + "loss": 0.2585, + "step": 20181 + }, + { + "epoch": 0.93, + "grad_norm": 0.9899062014939967, + "learning_rate": 2.771524443139062e-07, + "loss": 0.2794, + "step": 20182 + }, + { + "epoch": 0.93, + "grad_norm": 0.5350820048618158, + "learning_rate": 2.768046767007815e-07, + "loss": 0.3114, + "step": 20183 + }, + { + "epoch": 0.93, + "grad_norm": 0.45072258099149337, + "learning_rate": 2.764571243498282e-07, + "loss": 0.3151, + "step": 20184 + }, + { + "epoch": 0.93, + "grad_norm": 0.38721633349725504, + "learning_rate": 2.761097872687435e-07, + "loss": 0.3144, + "step": 20185 + }, + { + "epoch": 0.93, + "grad_norm": 0.15391859611414427, + "learning_rate": 2.757626654652157e-07, + "loss": 0.0697, + "step": 20186 + }, + { + "epoch": 0.93, + "grad_norm": 0.4129131986024776, + "learning_rate": 2.7541575894693194e-07, + "loss": 0.2583, + "step": 20187 + }, + { + "epoch": 0.93, + "grad_norm": 0.6860461859416019, + "learning_rate": 2.750690677215684e-07, + "loss": 0.3578, + "step": 20188 + }, + { + "epoch": 0.93, + "grad_norm": 0.29277410857329794, + "learning_rate": 2.7472259179680436e-07, + "loss": 0.2134, + "step": 20189 + }, + { + "epoch": 0.93, + "grad_norm": 0.41219622653327004, + "learning_rate": 2.7437633118030714e-07, + "loss": 0.2876, + "step": 20190 + }, + { + "epoch": 0.93, + "grad_norm": 1.417225158143789, + "learning_rate": 2.74030285879745e-07, + "loss": 0.6476, + "step": 20191 + }, + { + "epoch": 0.93, + "grad_norm": 0.2301404947418482, + "learning_rate": 2.736844559027796e-07, + "loss": 0.0757, + "step": 20192 + }, + { + "epoch": 0.93, + "grad_norm": 0.261165745726926, + "learning_rate": 2.7333884125706366e-07, + "loss": 0.2584, + "step": 20193 + }, + { + "epoch": 0.93, + "grad_norm": 0.7182975732933482, + "learning_rate": 2.729934419502522e-07, + "loss": 0.3662, + "step": 20194 + }, + { + "epoch": 0.93, + "grad_norm": 0.7670753992433383, + "learning_rate": 2.726482579899914e-07, + "loss": 0.2481, + "step": 20195 + }, + { + "epoch": 0.93, + "grad_norm": 0.3555808371749825, + "learning_rate": 2.723032893839217e-07, + "loss": 0.2588, + "step": 20196 + }, + { + "epoch": 0.93, + "grad_norm": 0.36739059675262153, + "learning_rate": 2.719585361396837e-07, + "loss": 0.2841, + "step": 20197 + }, + { + "epoch": 0.93, + "grad_norm": 0.5240409322559296, + "learning_rate": 2.7161399826490466e-07, + "loss": 0.2057, + "step": 20198 + }, + { + "epoch": 0.93, + "grad_norm": 0.2661827074728033, + "learning_rate": 2.712696757672173e-07, + "loss": 0.1756, + "step": 20199 + }, + { + "epoch": 0.93, + "grad_norm": 0.7452292715715435, + "learning_rate": 2.7092556865424335e-07, + "loss": 0.3628, + "step": 20200 + }, + { + "epoch": 0.93, + "grad_norm": 0.3563883628963731, + "learning_rate": 2.7058167693359894e-07, + "loss": 0.2953, + "step": 20201 + }, + { + "epoch": 0.93, + "grad_norm": 0.3570397838882156, + "learning_rate": 2.702380006128991e-07, + "loss": 0.2106, + "step": 20202 + }, + { + "epoch": 0.93, + "grad_norm": 1.3362907943324822, + "learning_rate": 2.69894539699751e-07, + "loss": 0.5229, + "step": 20203 + }, + { + "epoch": 0.93, + "grad_norm": 0.3556723230714628, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.2353, + "step": 20204 + }, + { + "epoch": 0.93, + "grad_norm": 0.3091573995403711, + "learning_rate": 2.692082641265281e-07, + "loss": 0.2184, + "step": 20205 + }, + { + "epoch": 0.93, + "grad_norm": 0.42567940828249273, + "learning_rate": 2.688654494816445e-07, + "loss": 0.2699, + "step": 20206 + }, + { + "epoch": 0.93, + "grad_norm": 0.8022598823523924, + "learning_rate": 2.685228502747006e-07, + "loss": 0.5746, + "step": 20207 + }, + { + "epoch": 0.93, + "grad_norm": 0.36396721921117126, + "learning_rate": 2.6818046651328143e-07, + "loss": 0.2803, + "step": 20208 + }, + { + "epoch": 0.93, + "grad_norm": 0.46629831862394444, + "learning_rate": 2.6783829820496875e-07, + "loss": 0.2347, + "step": 20209 + }, + { + "epoch": 0.93, + "grad_norm": 0.6155483486563386, + "learning_rate": 2.6749634535733425e-07, + "loss": 0.2873, + "step": 20210 + }, + { + "epoch": 0.93, + "grad_norm": 0.35869568701384413, + "learning_rate": 2.6715460797795077e-07, + "loss": 0.255, + "step": 20211 + }, + { + "epoch": 0.93, + "grad_norm": 0.39641371785188345, + "learning_rate": 2.668130860743845e-07, + "loss": 0.2253, + "step": 20212 + }, + { + "epoch": 0.93, + "grad_norm": 0.526819493104097, + "learning_rate": 2.664717796541949e-07, + "loss": 0.3509, + "step": 20213 + }, + { + "epoch": 0.93, + "grad_norm": 0.3299100849529229, + "learning_rate": 2.661306887249393e-07, + "loss": 0.2572, + "step": 20214 + }, + { + "epoch": 0.93, + "grad_norm": 1.738010369940992, + "learning_rate": 2.657898132941661e-07, + "loss": 0.1408, + "step": 20215 + }, + { + "epoch": 0.93, + "grad_norm": 0.5461471972474474, + "learning_rate": 2.65449153369427e-07, + "loss": 0.2994, + "step": 20216 + }, + { + "epoch": 0.93, + "grad_norm": 0.2888324069364487, + "learning_rate": 2.6510870895826044e-07, + "loss": 0.2387, + "step": 20217 + }, + { + "epoch": 0.93, + "grad_norm": 0.41673286033025686, + "learning_rate": 2.64768480068206e-07, + "loss": 0.1825, + "step": 20218 + }, + { + "epoch": 0.93, + "grad_norm": 0.7993532437251042, + "learning_rate": 2.6442846670679424e-07, + "loss": 0.4802, + "step": 20219 + }, + { + "epoch": 0.93, + "grad_norm": 0.28531139039427766, + "learning_rate": 2.6408866888155024e-07, + "loss": 0.2131, + "step": 20220 + }, + { + "epoch": 0.93, + "grad_norm": 0.4050006639587751, + "learning_rate": 2.6374908660000255e-07, + "loss": 0.2812, + "step": 20221 + }, + { + "epoch": 0.93, + "grad_norm": 0.6525850454659222, + "learning_rate": 2.634097198696639e-07, + "loss": 0.2532, + "step": 20222 + }, + { + "epoch": 0.93, + "grad_norm": 0.3810252928933606, + "learning_rate": 2.630705686980517e-07, + "loss": 0.2651, + "step": 20223 + }, + { + "epoch": 0.93, + "grad_norm": 0.5456574501643089, + "learning_rate": 2.6273163309267215e-07, + "loss": 0.2559, + "step": 20224 + }, + { + "epoch": 0.93, + "grad_norm": 0.33147757532260236, + "learning_rate": 2.623929130610281e-07, + "loss": 0.2553, + "step": 20225 + }, + { + "epoch": 0.93, + "grad_norm": 0.3694990538687372, + "learning_rate": 2.620544086106214e-07, + "loss": 0.2574, + "step": 20226 + }, + { + "epoch": 0.93, + "grad_norm": 0.5833577935788045, + "learning_rate": 2.617161197489426e-07, + "loss": 0.2971, + "step": 20227 + }, + { + "epoch": 0.93, + "grad_norm": 0.4601823133389908, + "learning_rate": 2.6137804648348475e-07, + "loss": 0.2521, + "step": 20228 + }, + { + "epoch": 0.93, + "grad_norm": 0.30097295006029495, + "learning_rate": 2.6104018882173065e-07, + "loss": 0.2559, + "step": 20229 + }, + { + "epoch": 0.93, + "grad_norm": 0.9482623066789242, + "learning_rate": 2.6070254677115883e-07, + "loss": 0.4963, + "step": 20230 + }, + { + "epoch": 0.93, + "grad_norm": 0.246937631188175, + "learning_rate": 2.603651203392477e-07, + "loss": 0.1434, + "step": 20231 + }, + { + "epoch": 0.93, + "grad_norm": 0.2745141671934632, + "learning_rate": 2.600279095334635e-07, + "loss": 0.2006, + "step": 20232 + }, + { + "epoch": 0.93, + "grad_norm": 0.3840054725868079, + "learning_rate": 2.596909143612747e-07, + "loss": 0.299, + "step": 20233 + }, + { + "epoch": 0.93, + "grad_norm": 0.6084770027547077, + "learning_rate": 2.5935413483014096e-07, + "loss": 0.3199, + "step": 20234 + }, + { + "epoch": 0.93, + "grad_norm": 0.436650169971636, + "learning_rate": 2.5901757094751956e-07, + "loss": 0.1849, + "step": 20235 + }, + { + "epoch": 0.93, + "grad_norm": 1.5088172664479822, + "learning_rate": 2.5868122272086127e-07, + "loss": 0.457, + "step": 20236 + }, + { + "epoch": 0.93, + "grad_norm": 0.35984480993276813, + "learning_rate": 2.58345090157609e-07, + "loss": 0.3084, + "step": 20237 + }, + { + "epoch": 0.93, + "grad_norm": 0.32307510187584904, + "learning_rate": 2.5800917326521013e-07, + "loss": 0.1801, + "step": 20238 + }, + { + "epoch": 0.93, + "grad_norm": 0.3737985025641793, + "learning_rate": 2.5767347205109763e-07, + "loss": 0.1828, + "step": 20239 + }, + { + "epoch": 0.93, + "grad_norm": 0.400257088207057, + "learning_rate": 2.5733798652270435e-07, + "loss": 0.2886, + "step": 20240 + }, + { + "epoch": 0.93, + "grad_norm": 0.31851108753004104, + "learning_rate": 2.570027166874578e-07, + "loss": 0.1835, + "step": 20241 + }, + { + "epoch": 0.93, + "grad_norm": 1.3338231288364275, + "learning_rate": 2.5666766255278087e-07, + "loss": 0.4715, + "step": 20242 + }, + { + "epoch": 0.93, + "grad_norm": 0.47592708100632153, + "learning_rate": 2.5633282412609207e-07, + "loss": 0.2862, + "step": 20243 + }, + { + "epoch": 0.93, + "grad_norm": 0.3154445641298411, + "learning_rate": 2.5599820141480326e-07, + "loss": 0.1858, + "step": 20244 + }, + { + "epoch": 0.93, + "grad_norm": 0.30290634283070234, + "learning_rate": 2.5566379442632185e-07, + "loss": 0.2411, + "step": 20245 + }, + { + "epoch": 0.93, + "grad_norm": 0.8617087596788787, + "learning_rate": 2.55329603168053e-07, + "loss": 0.3666, + "step": 20246 + }, + { + "epoch": 0.93, + "grad_norm": 0.42513575359285893, + "learning_rate": 2.549956276473953e-07, + "loss": 0.2486, + "step": 20247 + }, + { + "epoch": 0.93, + "grad_norm": 0.32321571423805295, + "learning_rate": 2.546618678717416e-07, + "loss": 0.2313, + "step": 20248 + }, + { + "epoch": 0.93, + "grad_norm": 1.277329839984253, + "learning_rate": 2.543283238484806e-07, + "loss": 0.6595, + "step": 20249 + }, + { + "epoch": 0.93, + "grad_norm": 0.4980861580041878, + "learning_rate": 2.539949955849985e-07, + "loss": 0.2724, + "step": 20250 + }, + { + "epoch": 0.93, + "grad_norm": 0.4478850520930908, + "learning_rate": 2.5366188308867277e-07, + "loss": 0.2443, + "step": 20251 + }, + { + "epoch": 0.93, + "grad_norm": 0.24753836591677975, + "learning_rate": 2.5332898636688087e-07, + "loss": 0.2269, + "step": 20252 + }, + { + "epoch": 0.93, + "grad_norm": 0.35801743365078215, + "learning_rate": 2.5299630542699015e-07, + "loss": 0.2607, + "step": 20253 + }, + { + "epoch": 0.93, + "grad_norm": 2.044771878249905, + "learning_rate": 2.5266384027636705e-07, + "loss": 0.2272, + "step": 20254 + }, + { + "epoch": 0.93, + "grad_norm": 0.8769861360240941, + "learning_rate": 2.523315909223723e-07, + "loss": 0.5199, + "step": 20255 + }, + { + "epoch": 0.93, + "grad_norm": 0.3867640874302784, + "learning_rate": 2.5199955737236104e-07, + "loss": 0.252, + "step": 20256 + }, + { + "epoch": 0.93, + "grad_norm": 0.3430958669740715, + "learning_rate": 2.516677396336842e-07, + "loss": 0.2535, + "step": 20257 + }, + { + "epoch": 0.93, + "grad_norm": 0.39511471586655034, + "learning_rate": 2.5133613771368803e-07, + "loss": 0.1536, + "step": 20258 + }, + { + "epoch": 0.93, + "grad_norm": 0.739350940546067, + "learning_rate": 2.510047516197134e-07, + "loss": 0.3264, + "step": 20259 + }, + { + "epoch": 0.93, + "grad_norm": 0.32475921453100937, + "learning_rate": 2.506735813590988e-07, + "loss": 0.2792, + "step": 20260 + }, + { + "epoch": 0.93, + "grad_norm": 0.4252590542063217, + "learning_rate": 2.50342626939174e-07, + "loss": 0.2705, + "step": 20261 + }, + { + "epoch": 0.93, + "grad_norm": 0.4370952043631025, + "learning_rate": 2.500118883672653e-07, + "loss": 0.2475, + "step": 20262 + }, + { + "epoch": 0.93, + "grad_norm": 0.5073994312791925, + "learning_rate": 2.496813656506969e-07, + "loss": 0.3274, + "step": 20263 + }, + { + "epoch": 0.93, + "grad_norm": 0.2713759143800125, + "learning_rate": 2.4935105879678734e-07, + "loss": 0.1917, + "step": 20264 + }, + { + "epoch": 0.93, + "grad_norm": 0.2963030583515216, + "learning_rate": 2.4902096781284633e-07, + "loss": 0.1758, + "step": 20265 + }, + { + "epoch": 0.93, + "grad_norm": 0.6415973249817809, + "learning_rate": 2.486910927061825e-07, + "loss": 0.3741, + "step": 20266 + }, + { + "epoch": 0.93, + "grad_norm": 0.8155998931265895, + "learning_rate": 2.483614334841e-07, + "loss": 0.2965, + "step": 20267 + }, + { + "epoch": 0.93, + "grad_norm": 0.3350300342590162, + "learning_rate": 2.4803199015389524e-07, + "loss": 0.2624, + "step": 20268 + }, + { + "epoch": 0.93, + "grad_norm": 0.4055295121337285, + "learning_rate": 2.4770276272286563e-07, + "loss": 0.3003, + "step": 20269 + }, + { + "epoch": 0.93, + "grad_norm": 0.30571723614964197, + "learning_rate": 2.473737511982954e-07, + "loss": 0.0832, + "step": 20270 + }, + { + "epoch": 0.93, + "grad_norm": 0.43670735212686623, + "learning_rate": 2.4704495558747097e-07, + "loss": 0.2584, + "step": 20271 + }, + { + "epoch": 0.93, + "grad_norm": 0.6380377944041562, + "learning_rate": 2.467163758976721e-07, + "loss": 0.369, + "step": 20272 + }, + { + "epoch": 0.93, + "grad_norm": 0.35140796119154283, + "learning_rate": 2.463880121361717e-07, + "loss": 0.2948, + "step": 20273 + }, + { + "epoch": 0.93, + "grad_norm": 0.3804209734815149, + "learning_rate": 2.4605986431024075e-07, + "loss": 0.2192, + "step": 20274 + }, + { + "epoch": 0.93, + "grad_norm": 1.4749005472439265, + "learning_rate": 2.4573193242714234e-07, + "loss": 0.5619, + "step": 20275 + }, + { + "epoch": 0.93, + "grad_norm": 0.36173073449424503, + "learning_rate": 2.454042164941384e-07, + "loss": 0.2341, + "step": 20276 + }, + { + "epoch": 0.93, + "grad_norm": 0.3229009453146505, + "learning_rate": 2.450767165184831e-07, + "loss": 0.0794, + "step": 20277 + }, + { + "epoch": 0.93, + "grad_norm": 0.42162576358308523, + "learning_rate": 2.4474943250742734e-07, + "loss": 0.3738, + "step": 20278 + }, + { + "epoch": 0.93, + "grad_norm": 0.6112125037337589, + "learning_rate": 2.4442236446821754e-07, + "loss": 0.3663, + "step": 20279 + }, + { + "epoch": 0.93, + "grad_norm": 0.29490886378306325, + "learning_rate": 2.4409551240809237e-07, + "loss": 0.2105, + "step": 20280 + }, + { + "epoch": 0.93, + "grad_norm": 0.452786444533277, + "learning_rate": 2.437688763342916e-07, + "loss": 0.3006, + "step": 20281 + }, + { + "epoch": 0.93, + "grad_norm": 0.45123505309602124, + "learning_rate": 2.4344245625404385e-07, + "loss": 0.204, + "step": 20282 + }, + { + "epoch": 0.93, + "grad_norm": 0.250896575916827, + "learning_rate": 2.431162521745778e-07, + "loss": 0.1253, + "step": 20283 + }, + { + "epoch": 0.93, + "grad_norm": 0.31551979535233116, + "learning_rate": 2.4279026410311326e-07, + "loss": 0.2764, + "step": 20284 + }, + { + "epoch": 0.93, + "grad_norm": 0.8200551498706821, + "learning_rate": 2.424644920468677e-07, + "loss": 0.3629, + "step": 20285 + }, + { + "epoch": 0.93, + "grad_norm": 0.5641014690955516, + "learning_rate": 2.421389360130544e-07, + "loss": 0.338, + "step": 20286 + }, + { + "epoch": 0.93, + "grad_norm": 0.4785305909289923, + "learning_rate": 2.4181359600887965e-07, + "loss": 0.2125, + "step": 20287 + }, + { + "epoch": 0.93, + "grad_norm": 0.4061423224059129, + "learning_rate": 2.414884720415467e-07, + "loss": 0.274, + "step": 20288 + }, + { + "epoch": 0.93, + "grad_norm": 0.30088749255608993, + "learning_rate": 2.4116356411825526e-07, + "loss": 0.1783, + "step": 20289 + }, + { + "epoch": 0.93, + "grad_norm": 0.4105270021940145, + "learning_rate": 2.4083887224619517e-07, + "loss": 0.2309, + "step": 20290 + }, + { + "epoch": 0.93, + "grad_norm": 0.842164330084923, + "learning_rate": 2.4051439643255737e-07, + "loss": 0.4385, + "step": 20291 + }, + { + "epoch": 0.93, + "grad_norm": 0.302655314777836, + "learning_rate": 2.4019013668452385e-07, + "loss": 0.2484, + "step": 20292 + }, + { + "epoch": 0.93, + "grad_norm": 0.5277050958918428, + "learning_rate": 2.3986609300927443e-07, + "loss": 0.2296, + "step": 20293 + }, + { + "epoch": 0.93, + "grad_norm": 0.3968767535723013, + "learning_rate": 2.395422654139834e-07, + "loss": 0.1911, + "step": 20294 + }, + { + "epoch": 0.93, + "grad_norm": 0.39800450306383267, + "learning_rate": 2.3921865390581834e-07, + "loss": 0.2442, + "step": 20295 + }, + { + "epoch": 0.93, + "grad_norm": 0.2530313238900536, + "learning_rate": 2.3889525849194573e-07, + "loss": 0.2302, + "step": 20296 + }, + { + "epoch": 0.93, + "grad_norm": 0.6657040919509456, + "learning_rate": 2.385720791795221e-07, + "loss": 0.3626, + "step": 20297 + }, + { + "epoch": 0.93, + "grad_norm": 0.9596038743778246, + "learning_rate": 2.382491159757072e-07, + "loss": 0.5399, + "step": 20298 + }, + { + "epoch": 0.93, + "grad_norm": 0.41118919885413835, + "learning_rate": 2.3792636888764653e-07, + "loss": 0.2798, + "step": 20299 + }, + { + "epoch": 0.93, + "grad_norm": 0.46458227413717607, + "learning_rate": 2.3760383792248877e-07, + "loss": 0.2362, + "step": 20300 + }, + { + "epoch": 0.93, + "grad_norm": 0.4206024100970382, + "learning_rate": 2.372815230873715e-07, + "loss": 0.2384, + "step": 20301 + }, + { + "epoch": 0.93, + "grad_norm": 0.39675777516914296, + "learning_rate": 2.3695942438943242e-07, + "loss": 0.2776, + "step": 20302 + }, + { + "epoch": 0.93, + "grad_norm": 0.9036010448243601, + "learning_rate": 2.3663754183580246e-07, + "loss": 0.3673, + "step": 20303 + }, + { + "epoch": 0.93, + "grad_norm": 0.3099949562224228, + "learning_rate": 2.363158754336059e-07, + "loss": 0.2501, + "step": 20304 + }, + { + "epoch": 0.93, + "grad_norm": 0.3270929693647476, + "learning_rate": 2.3599442518996595e-07, + "loss": 0.2273, + "step": 20305 + }, + { + "epoch": 0.93, + "grad_norm": 1.8138154553927484, + "learning_rate": 2.3567319111200026e-07, + "loss": 0.1846, + "step": 20306 + }, + { + "epoch": 0.93, + "grad_norm": 0.5444685251749236, + "learning_rate": 2.3535217320681757e-07, + "loss": 0.2963, + "step": 20307 + }, + { + "epoch": 0.93, + "grad_norm": 0.32499922480771637, + "learning_rate": 2.3503137148152667e-07, + "loss": 0.2809, + "step": 20308 + }, + { + "epoch": 0.93, + "grad_norm": 0.3244533675020009, + "learning_rate": 2.347107859432296e-07, + "loss": 0.216, + "step": 20309 + }, + { + "epoch": 0.93, + "grad_norm": 0.38251829464169473, + "learning_rate": 2.3439041659902405e-07, + "loss": 0.1854, + "step": 20310 + }, + { + "epoch": 0.93, + "grad_norm": 0.6514739418433115, + "learning_rate": 2.3407026345600326e-07, + "loss": 0.3373, + "step": 20311 + }, + { + "epoch": 0.93, + "grad_norm": 0.46085583374294253, + "learning_rate": 2.3375032652125262e-07, + "loss": 0.3232, + "step": 20312 + }, + { + "epoch": 0.93, + "grad_norm": 0.7056120353029429, + "learning_rate": 2.334306058018587e-07, + "loss": 0.2697, + "step": 20313 + }, + { + "epoch": 0.93, + "grad_norm": 0.3627885529221782, + "learning_rate": 2.3311110130489589e-07, + "loss": 0.2501, + "step": 20314 + }, + { + "epoch": 0.93, + "grad_norm": 0.5040363119911151, + "learning_rate": 2.3279181303744182e-07, + "loss": 0.3626, + "step": 20315 + }, + { + "epoch": 0.93, + "grad_norm": 0.2784540163617053, + "learning_rate": 2.3247274100656192e-07, + "loss": 0.1625, + "step": 20316 + }, + { + "epoch": 0.93, + "grad_norm": 0.28793214363624575, + "learning_rate": 2.321538852193228e-07, + "loss": 0.2236, + "step": 20317 + }, + { + "epoch": 0.93, + "grad_norm": 1.0457888910222868, + "learning_rate": 2.3183524568278103e-07, + "loss": 0.4959, + "step": 20318 + }, + { + "epoch": 0.93, + "grad_norm": 0.4212363905585375, + "learning_rate": 2.315168224039932e-07, + "loss": 0.2373, + "step": 20319 + }, + { + "epoch": 0.93, + "grad_norm": 0.32649739895160507, + "learning_rate": 2.311986153900081e-07, + "loss": 0.2568, + "step": 20320 + }, + { + "epoch": 0.93, + "grad_norm": 1.334089284694028, + "learning_rate": 2.3088062464786898e-07, + "loss": 0.6777, + "step": 20321 + }, + { + "epoch": 0.93, + "grad_norm": 0.366063592687289, + "learning_rate": 2.30562850184618e-07, + "loss": 0.1818, + "step": 20322 + }, + { + "epoch": 0.93, + "grad_norm": 0.3955331787358139, + "learning_rate": 2.3024529200728952e-07, + "loss": 0.187, + "step": 20323 + }, + { + "epoch": 0.93, + "grad_norm": 0.3861155678140227, + "learning_rate": 2.299279501229146e-07, + "loss": 0.2874, + "step": 20324 + }, + { + "epoch": 0.93, + "grad_norm": 0.6414790965607733, + "learning_rate": 2.296108245385187e-07, + "loss": 0.3351, + "step": 20325 + }, + { + "epoch": 0.93, + "grad_norm": 0.3242079721436717, + "learning_rate": 2.2929391526112067e-07, + "loss": 0.1969, + "step": 20326 + }, + { + "epoch": 0.93, + "grad_norm": 1.270508341889147, + "learning_rate": 2.2897722229773934e-07, + "loss": 0.8165, + "step": 20327 + }, + { + "epoch": 0.93, + "grad_norm": 0.27067775847068815, + "learning_rate": 2.2866074565538355e-07, + "loss": 0.2275, + "step": 20328 + }, + { + "epoch": 0.93, + "grad_norm": 0.2784601732216756, + "learning_rate": 2.2834448534106322e-07, + "loss": 0.1578, + "step": 20329 + }, + { + "epoch": 0.93, + "grad_norm": 0.6654540134533653, + "learning_rate": 2.2802844136177727e-07, + "loss": 0.4029, + "step": 20330 + }, + { + "epoch": 0.93, + "grad_norm": 0.36523997297511285, + "learning_rate": 2.2771261372452225e-07, + "loss": 0.2815, + "step": 20331 + }, + { + "epoch": 0.93, + "grad_norm": 0.3874379345787041, + "learning_rate": 2.2739700243629258e-07, + "loss": 0.1865, + "step": 20332 + }, + { + "epoch": 0.93, + "grad_norm": 1.2624394491082165, + "learning_rate": 2.2708160750407272e-07, + "loss": 0.7589, + "step": 20333 + }, + { + "epoch": 0.93, + "grad_norm": 0.4753703274883254, + "learning_rate": 2.2676642893484924e-07, + "loss": 0.2531, + "step": 20334 + }, + { + "epoch": 0.93, + "grad_norm": 0.29742470537833593, + "learning_rate": 2.2645146673559548e-07, + "loss": 0.222, + "step": 20335 + }, + { + "epoch": 0.93, + "grad_norm": 0.37956837268024224, + "learning_rate": 2.26136720913287e-07, + "loss": 0.2525, + "step": 20336 + }, + { + "epoch": 0.93, + "grad_norm": 1.0212594133599044, + "learning_rate": 2.2582219147489148e-07, + "loss": 0.3834, + "step": 20337 + }, + { + "epoch": 0.93, + "grad_norm": 0.3218352834113083, + "learning_rate": 2.255078784273712e-07, + "loss": 0.2429, + "step": 20338 + }, + { + "epoch": 0.93, + "grad_norm": 1.2726911456318988, + "learning_rate": 2.2519378177768726e-07, + "loss": 0.5171, + "step": 20339 + }, + { + "epoch": 0.93, + "grad_norm": 0.3019517703327587, + "learning_rate": 2.248799015327907e-07, + "loss": 0.2344, + "step": 20340 + }, + { + "epoch": 0.93, + "grad_norm": 0.3631963923240562, + "learning_rate": 2.245662376996316e-07, + "loss": 0.2641, + "step": 20341 + }, + { + "epoch": 0.93, + "grad_norm": 0.47189501742338735, + "learning_rate": 2.2425279028515658e-07, + "loss": 0.2008, + "step": 20342 + }, + { + "epoch": 0.93, + "grad_norm": 0.39569661663070327, + "learning_rate": 2.2393955929630006e-07, + "loss": 0.2883, + "step": 20343 + }, + { + "epoch": 0.93, + "grad_norm": 0.38570537432736174, + "learning_rate": 2.236265447399999e-07, + "loss": 0.2255, + "step": 20344 + }, + { + "epoch": 0.93, + "grad_norm": 0.5477151484180477, + "learning_rate": 2.2331374662318606e-07, + "loss": 0.2596, + "step": 20345 + }, + { + "epoch": 0.93, + "grad_norm": 0.81591999427598, + "learning_rate": 2.23001164952783e-07, + "loss": 0.3123, + "step": 20346 + }, + { + "epoch": 0.93, + "grad_norm": 0.38507966022435786, + "learning_rate": 2.2268879973571077e-07, + "loss": 0.2834, + "step": 20347 + }, + { + "epoch": 0.93, + "grad_norm": 0.407515416413074, + "learning_rate": 2.2237665097888494e-07, + "loss": 0.3007, + "step": 20348 + }, + { + "epoch": 0.93, + "grad_norm": 0.1556462289393866, + "learning_rate": 2.2206471868921775e-07, + "loss": 0.069, + "step": 20349 + }, + { + "epoch": 0.93, + "grad_norm": 0.3638398600921348, + "learning_rate": 2.2175300287361146e-07, + "loss": 0.2723, + "step": 20350 + }, + { + "epoch": 0.93, + "grad_norm": 0.5305481133851238, + "learning_rate": 2.2144150353897053e-07, + "loss": 0.4006, + "step": 20351 + }, + { + "epoch": 0.93, + "grad_norm": 0.5128084600900923, + "learning_rate": 2.2113022069218947e-07, + "loss": 0.2433, + "step": 20352 + }, + { + "epoch": 0.93, + "grad_norm": 0.35666864695916084, + "learning_rate": 2.2081915434016053e-07, + "loss": 0.252, + "step": 20353 + }, + { + "epoch": 0.94, + "grad_norm": 0.3513070545024936, + "learning_rate": 2.2050830448977046e-07, + "loss": 0.2015, + "step": 20354 + }, + { + "epoch": 0.94, + "grad_norm": 0.4797732697225549, + "learning_rate": 2.2019767114790037e-07, + "loss": 0.2237, + "step": 20355 + }, + { + "epoch": 0.94, + "grad_norm": 0.4320824685646187, + "learning_rate": 2.1988725432142921e-07, + "loss": 0.2603, + "step": 20356 + }, + { + "epoch": 0.94, + "grad_norm": 1.219671450713135, + "learning_rate": 2.1957705401722486e-07, + "loss": 0.7308, + "step": 20357 + }, + { + "epoch": 0.94, + "grad_norm": 0.5907908938222902, + "learning_rate": 2.1926707024216065e-07, + "loss": 0.29, + "step": 20358 + }, + { + "epoch": 0.94, + "grad_norm": 0.2980162293910632, + "learning_rate": 2.1895730300309782e-07, + "loss": 0.2547, + "step": 20359 + }, + { + "epoch": 0.94, + "grad_norm": 0.5376113299389748, + "learning_rate": 2.1864775230689082e-07, + "loss": 0.3275, + "step": 20360 + }, + { + "epoch": 0.94, + "grad_norm": 0.3447872746986093, + "learning_rate": 2.1833841816039536e-07, + "loss": 0.1724, + "step": 20361 + }, + { + "epoch": 0.94, + "grad_norm": 0.5777005569542517, + "learning_rate": 2.1802930057046033e-07, + "loss": 0.1709, + "step": 20362 + }, + { + "epoch": 0.94, + "grad_norm": 0.36678873634119313, + "learning_rate": 2.177203995439292e-07, + "loss": 0.2942, + "step": 20363 + }, + { + "epoch": 0.94, + "grad_norm": 0.453671282522417, + "learning_rate": 2.174117150876398e-07, + "loss": 0.3196, + "step": 20364 + }, + { + "epoch": 0.94, + "grad_norm": 0.4022097635019398, + "learning_rate": 2.1710324720842556e-07, + "loss": 0.2013, + "step": 20365 + }, + { + "epoch": 0.94, + "grad_norm": 0.610924593817953, + "learning_rate": 2.167949959131177e-07, + "loss": 0.3092, + "step": 20366 + }, + { + "epoch": 0.94, + "grad_norm": 0.24023013007139188, + "learning_rate": 2.1648696120853852e-07, + "loss": 0.1977, + "step": 20367 + }, + { + "epoch": 0.94, + "grad_norm": 0.30836719366336796, + "learning_rate": 2.1617914310150923e-07, + "loss": 0.1762, + "step": 20368 + }, + { + "epoch": 0.94, + "grad_norm": 0.8934089653922884, + "learning_rate": 2.1587154159884326e-07, + "loss": 0.5387, + "step": 20369 + }, + { + "epoch": 0.94, + "grad_norm": 0.7326153035333582, + "learning_rate": 2.1556415670735186e-07, + "loss": 0.3405, + "step": 20370 + }, + { + "epoch": 0.94, + "grad_norm": 0.2687671004486713, + "learning_rate": 2.1525698843383957e-07, + "loss": 0.2047, + "step": 20371 + }, + { + "epoch": 0.94, + "grad_norm": 0.6407793777763497, + "learning_rate": 2.149500367851065e-07, + "loss": 0.3433, + "step": 20372 + }, + { + "epoch": 0.94, + "grad_norm": 0.24486951353909198, + "learning_rate": 2.1464330176795057e-07, + "loss": 0.1356, + "step": 20373 + }, + { + "epoch": 0.94, + "grad_norm": 0.43065552540764385, + "learning_rate": 2.1433678338915743e-07, + "loss": 0.2612, + "step": 20374 + }, + { + "epoch": 0.94, + "grad_norm": 0.3642075185994928, + "learning_rate": 2.140304816555183e-07, + "loss": 0.2396, + "step": 20375 + }, + { + "epoch": 0.94, + "grad_norm": 0.6808316565385003, + "learning_rate": 2.1372439657381339e-07, + "loss": 0.3066, + "step": 20376 + }, + { + "epoch": 0.94, + "grad_norm": 0.3551770542473789, + "learning_rate": 2.134185281508161e-07, + "loss": 0.2614, + "step": 20377 + }, + { + "epoch": 0.94, + "grad_norm": 1.4415529381269057, + "learning_rate": 2.1311287639330102e-07, + "loss": 0.1986, + "step": 20378 + }, + { + "epoch": 0.94, + "grad_norm": 0.23632459020531968, + "learning_rate": 2.1280744130803387e-07, + "loss": 0.2085, + "step": 20379 + }, + { + "epoch": 0.94, + "grad_norm": 0.39953285597734156, + "learning_rate": 2.1250222290177813e-07, + "loss": 0.241, + "step": 20380 + }, + { + "epoch": 0.94, + "grad_norm": 0.48470030142612236, + "learning_rate": 2.1219722118128838e-07, + "loss": 0.2571, + "step": 20381 + }, + { + "epoch": 0.94, + "grad_norm": 0.706063137222035, + "learning_rate": 2.1189243615331923e-07, + "loss": 0.3918, + "step": 20382 + }, + { + "epoch": 0.94, + "grad_norm": 0.3872402879199893, + "learning_rate": 2.1158786782461749e-07, + "loss": 0.2777, + "step": 20383 + }, + { + "epoch": 0.94, + "grad_norm": 0.3272993457786425, + "learning_rate": 2.1128351620192666e-07, + "loss": 0.2237, + "step": 20384 + }, + { + "epoch": 0.94, + "grad_norm": 0.3221901484105169, + "learning_rate": 2.109793812919847e-07, + "loss": 0.1791, + "step": 20385 + }, + { + "epoch": 0.94, + "grad_norm": 0.6111052974105129, + "learning_rate": 2.1067546310152287e-07, + "loss": 0.3089, + "step": 20386 + }, + { + "epoch": 0.94, + "grad_norm": 0.27871068665779475, + "learning_rate": 2.1037176163727136e-07, + "loss": 0.2717, + "step": 20387 + }, + { + "epoch": 0.94, + "grad_norm": 0.7988840093341497, + "learning_rate": 2.1006827690595478e-07, + "loss": 0.3091, + "step": 20388 + }, + { + "epoch": 0.94, + "grad_norm": 0.4735241670265819, + "learning_rate": 2.0976500891429107e-07, + "loss": 0.2695, + "step": 20389 + }, + { + "epoch": 0.94, + "grad_norm": 0.6613151062401929, + "learning_rate": 2.094619576689938e-07, + "loss": 0.3487, + "step": 20390 + }, + { + "epoch": 0.94, + "grad_norm": 0.2598688411503686, + "learning_rate": 2.091591231767709e-07, + "loss": 0.1915, + "step": 20391 + }, + { + "epoch": 0.94, + "grad_norm": 0.3280152449456758, + "learning_rate": 2.0885650544433033e-07, + "loss": 0.2131, + "step": 20392 + }, + { + "epoch": 0.94, + "grad_norm": 0.48272306235511014, + "learning_rate": 2.0855410447836899e-07, + "loss": 0.3169, + "step": 20393 + }, + { + "epoch": 0.94, + "grad_norm": 1.0209784982127665, + "learning_rate": 2.0825192028558373e-07, + "loss": 0.4927, + "step": 20394 + }, + { + "epoch": 0.94, + "grad_norm": 0.28851719200288073, + "learning_rate": 2.0794995287266251e-07, + "loss": 0.2671, + "step": 20395 + }, + { + "epoch": 0.94, + "grad_norm": 0.933235717363819, + "learning_rate": 2.0764820224629222e-07, + "loss": 0.3166, + "step": 20396 + }, + { + "epoch": 0.94, + "grad_norm": 0.5017730251616959, + "learning_rate": 2.073466684131531e-07, + "loss": 0.1522, + "step": 20397 + }, + { + "epoch": 0.94, + "grad_norm": 0.43156852936186113, + "learning_rate": 2.0704535137991867e-07, + "loss": 0.2671, + "step": 20398 + }, + { + "epoch": 0.94, + "grad_norm": 0.32873267894150576, + "learning_rate": 2.0674425115326357e-07, + "loss": 0.2725, + "step": 20399 + }, + { + "epoch": 0.94, + "grad_norm": 0.9888763073706089, + "learning_rate": 2.064433677398514e-07, + "loss": 0.5853, + "step": 20400 + }, + { + "epoch": 0.94, + "grad_norm": 0.4128605356417361, + "learning_rate": 2.0614270114634238e-07, + "loss": 0.0844, + "step": 20401 + }, + { + "epoch": 0.94, + "grad_norm": 0.4461166208872364, + "learning_rate": 2.0584225137939673e-07, + "loss": 0.2971, + "step": 20402 + }, + { + "epoch": 0.94, + "grad_norm": 0.36416643002486687, + "learning_rate": 2.0554201844566246e-07, + "loss": 0.2931, + "step": 20403 + }, + { + "epoch": 0.94, + "grad_norm": 0.6022783532858119, + "learning_rate": 2.052420023517887e-07, + "loss": 0.1899, + "step": 20404 + }, + { + "epoch": 0.94, + "grad_norm": 0.3200812607866496, + "learning_rate": 2.0494220310441683e-07, + "loss": 0.2569, + "step": 20405 + }, + { + "epoch": 0.94, + "grad_norm": 1.1696846086953894, + "learning_rate": 2.0464262071018258e-07, + "loss": 0.7248, + "step": 20406 + }, + { + "epoch": 0.94, + "grad_norm": 0.2392843467032607, + "learning_rate": 2.0434325517572185e-07, + "loss": 0.1645, + "step": 20407 + }, + { + "epoch": 0.94, + "grad_norm": 0.5100837631449398, + "learning_rate": 2.0404410650765817e-07, + "loss": 0.2815, + "step": 20408 + }, + { + "epoch": 0.94, + "grad_norm": 0.8881724471217609, + "learning_rate": 2.0374517471261734e-07, + "loss": 0.3767, + "step": 20409 + }, + { + "epoch": 0.94, + "grad_norm": 0.47059438844974977, + "learning_rate": 2.0344645979721632e-07, + "loss": 0.2169, + "step": 20410 + }, + { + "epoch": 0.94, + "grad_norm": 0.3945350936540093, + "learning_rate": 2.0314796176806984e-07, + "loss": 0.2953, + "step": 20411 + }, + { + "epoch": 0.94, + "grad_norm": 0.5164100091668331, + "learning_rate": 2.0284968063178477e-07, + "loss": 0.3876, + "step": 20412 + }, + { + "epoch": 0.94, + "grad_norm": 0.23973671206336145, + "learning_rate": 2.025516163949637e-07, + "loss": 0.1405, + "step": 20413 + }, + { + "epoch": 0.94, + "grad_norm": 0.39926256369913904, + "learning_rate": 2.02253769064209e-07, + "loss": 0.2164, + "step": 20414 + }, + { + "epoch": 0.94, + "grad_norm": 0.4069668460569309, + "learning_rate": 2.0195613864611108e-07, + "loss": 0.2847, + "step": 20415 + }, + { + "epoch": 0.94, + "grad_norm": 0.6034305812371357, + "learning_rate": 2.0165872514726237e-07, + "loss": 0.3068, + "step": 20416 + }, + { + "epoch": 0.94, + "grad_norm": 0.4245702671823467, + "learning_rate": 2.013615285742443e-07, + "loss": 0.2192, + "step": 20417 + }, + { + "epoch": 0.94, + "grad_norm": 0.33197027042437155, + "learning_rate": 2.0106454893363824e-07, + "loss": 0.2693, + "step": 20418 + }, + { + "epoch": 0.94, + "grad_norm": 0.3099281917365217, + "learning_rate": 2.0076778623201898e-07, + "loss": 0.2169, + "step": 20419 + }, + { + "epoch": 0.94, + "grad_norm": 0.36704963176160615, + "learning_rate": 2.0047124047595567e-07, + "loss": 0.1908, + "step": 20420 + }, + { + "epoch": 0.94, + "grad_norm": 0.7703200634484844, + "learning_rate": 2.001749116720153e-07, + "loss": 0.3597, + "step": 20421 + }, + { + "epoch": 0.94, + "grad_norm": 1.2707164665385928, + "learning_rate": 1.9987879982675596e-07, + "loss": 0.329, + "step": 20422 + }, + { + "epoch": 0.94, + "grad_norm": 0.25195888092046703, + "learning_rate": 1.995829049467357e-07, + "loss": 0.2317, + "step": 20423 + }, + { + "epoch": 0.94, + "grad_norm": 1.5811561566242762, + "learning_rate": 1.992872270385038e-07, + "loss": 0.724, + "step": 20424 + }, + { + "epoch": 0.94, + "grad_norm": 0.3043885414567743, + "learning_rate": 1.9899176610860605e-07, + "loss": 0.1614, + "step": 20425 + }, + { + "epoch": 0.94, + "grad_norm": 0.39372796653292413, + "learning_rate": 1.9869652216358505e-07, + "loss": 0.2715, + "step": 20426 + }, + { + "epoch": 0.94, + "grad_norm": 0.35851093330294664, + "learning_rate": 1.9840149520997552e-07, + "loss": 0.2358, + "step": 20427 + }, + { + "epoch": 0.94, + "grad_norm": 0.957601474558348, + "learning_rate": 1.981066852543112e-07, + "loss": 0.3854, + "step": 20428 + }, + { + "epoch": 0.94, + "grad_norm": 0.3932757507110718, + "learning_rate": 1.9781209230311682e-07, + "loss": 0.2677, + "step": 20429 + }, + { + "epoch": 0.94, + "grad_norm": 0.5287798956466142, + "learning_rate": 1.9751771636291496e-07, + "loss": 0.2612, + "step": 20430 + }, + { + "epoch": 0.94, + "grad_norm": 0.3418548491039063, + "learning_rate": 1.972235574402237e-07, + "loss": 0.2239, + "step": 20431 + }, + { + "epoch": 0.94, + "grad_norm": 0.2845129583643551, + "learning_rate": 1.9692961554155455e-07, + "loss": 0.2113, + "step": 20432 + }, + { + "epoch": 0.94, + "grad_norm": 0.6648581863837347, + "learning_rate": 1.966358906734167e-07, + "loss": 0.284, + "step": 20433 + }, + { + "epoch": 0.94, + "grad_norm": 0.43754425350539133, + "learning_rate": 1.9634238284230945e-07, + "loss": 0.2885, + "step": 20434 + }, + { + "epoch": 0.94, + "grad_norm": 0.3433553621456016, + "learning_rate": 1.960490920547342e-07, + "loss": 0.2884, + "step": 20435 + }, + { + "epoch": 0.94, + "grad_norm": 1.10506298031863, + "learning_rate": 1.957560183171825e-07, + "loss": 0.5028, + "step": 20436 + }, + { + "epoch": 0.94, + "grad_norm": 0.5508027036185996, + "learning_rate": 1.9546316163614354e-07, + "loss": 0.2761, + "step": 20437 + }, + { + "epoch": 0.94, + "grad_norm": 0.35337702173589947, + "learning_rate": 1.9517052201809994e-07, + "loss": 0.2557, + "step": 20438 + }, + { + "epoch": 0.94, + "grad_norm": 0.31641671055389003, + "learning_rate": 1.9487809946953095e-07, + "loss": 0.2466, + "step": 20439 + }, + { + "epoch": 0.94, + "grad_norm": 1.0594179808228281, + "learning_rate": 1.945858939969114e-07, + "loss": 0.1026, + "step": 20440 + }, + { + "epoch": 0.94, + "grad_norm": 0.3712443940130986, + "learning_rate": 1.9429390560670946e-07, + "loss": 0.2781, + "step": 20441 + }, + { + "epoch": 0.94, + "grad_norm": 0.5788349920310496, + "learning_rate": 1.9400213430538773e-07, + "loss": 0.3295, + "step": 20442 + }, + { + "epoch": 0.94, + "grad_norm": 0.44125149317889895, + "learning_rate": 1.937105800994099e-07, + "loss": 0.2368, + "step": 20443 + }, + { + "epoch": 0.94, + "grad_norm": 0.38456601402433777, + "learning_rate": 1.9341924299522641e-07, + "loss": 0.2561, + "step": 20444 + }, + { + "epoch": 0.94, + "grad_norm": 0.2952617166111256, + "learning_rate": 1.9312812299929096e-07, + "loss": 0.1808, + "step": 20445 + }, + { + "epoch": 0.94, + "grad_norm": 0.3828287251667097, + "learning_rate": 1.9283722011804616e-07, + "loss": 0.2246, + "step": 20446 + }, + { + "epoch": 0.94, + "grad_norm": 0.3572317357069083, + "learning_rate": 1.9254653435793247e-07, + "loss": 0.2674, + "step": 20447 + }, + { + "epoch": 0.94, + "grad_norm": 1.2261124422778444, + "learning_rate": 1.9225606572538691e-07, + "loss": 0.6444, + "step": 20448 + }, + { + "epoch": 0.94, + "grad_norm": 0.641795778479192, + "learning_rate": 1.9196581422683879e-07, + "loss": 0.3487, + "step": 20449 + }, + { + "epoch": 0.94, + "grad_norm": 0.3160819442364377, + "learning_rate": 1.916757798687152e-07, + "loss": 0.184, + "step": 20450 + }, + { + "epoch": 0.94, + "grad_norm": 0.2666931696691146, + "learning_rate": 1.9138596265743437e-07, + "loss": 0.2108, + "step": 20451 + }, + { + "epoch": 0.94, + "grad_norm": 0.9001285574396501, + "learning_rate": 1.9109636259941665e-07, + "loss": 0.4365, + "step": 20452 + }, + { + "epoch": 0.94, + "grad_norm": 0.3354497451641189, + "learning_rate": 1.9080697970107143e-07, + "loss": 0.1997, + "step": 20453 + }, + { + "epoch": 0.94, + "grad_norm": 0.3562307992058924, + "learning_rate": 1.9051781396880465e-07, + "loss": 0.2761, + "step": 20454 + }, + { + "epoch": 0.94, + "grad_norm": 0.7190937300166942, + "learning_rate": 1.9022886540901896e-07, + "loss": 0.3587, + "step": 20455 + }, + { + "epoch": 0.94, + "grad_norm": 0.33736248086153364, + "learning_rate": 1.8994013402811152e-07, + "loss": 0.1967, + "step": 20456 + }, + { + "epoch": 0.94, + "grad_norm": 0.38291055506058985, + "learning_rate": 1.8965161983247494e-07, + "loss": 0.1826, + "step": 20457 + }, + { + "epoch": 0.94, + "grad_norm": 0.3634390794393071, + "learning_rate": 1.8936332282849524e-07, + "loss": 0.2942, + "step": 20458 + }, + { + "epoch": 0.94, + "grad_norm": 0.3134220166612323, + "learning_rate": 1.890752430225573e-07, + "loss": 0.1992, + "step": 20459 + }, + { + "epoch": 0.94, + "grad_norm": 0.7358527077854286, + "learning_rate": 1.8878738042103717e-07, + "loss": 0.4177, + "step": 20460 + }, + { + "epoch": 0.94, + "grad_norm": 0.8232860449028477, + "learning_rate": 1.884997350303075e-07, + "loss": 0.4002, + "step": 20461 + }, + { + "epoch": 0.94, + "grad_norm": 0.34073203080387404, + "learning_rate": 1.8821230685673763e-07, + "loss": 0.2539, + "step": 20462 + }, + { + "epoch": 0.94, + "grad_norm": 0.3773305505852029, + "learning_rate": 1.8792509590669028e-07, + "loss": 0.2166, + "step": 20463 + }, + { + "epoch": 0.94, + "grad_norm": 0.3947361355671965, + "learning_rate": 1.8763810218652478e-07, + "loss": 0.2126, + "step": 20464 + }, + { + "epoch": 0.94, + "grad_norm": 0.32581076371949746, + "learning_rate": 1.8735132570259497e-07, + "loss": 0.259, + "step": 20465 + }, + { + "epoch": 0.94, + "grad_norm": 0.45798812597979743, + "learning_rate": 1.8706476646124794e-07, + "loss": 0.2512, + "step": 20466 + }, + { + "epoch": 0.94, + "grad_norm": 0.5369868188462893, + "learning_rate": 1.8677842446883087e-07, + "loss": 0.3506, + "step": 20467 + }, + { + "epoch": 0.94, + "grad_norm": 0.4337113445391564, + "learning_rate": 1.864922997316787e-07, + "loss": 0.255, + "step": 20468 + }, + { + "epoch": 0.94, + "grad_norm": 0.3409451569285432, + "learning_rate": 1.8620639225613078e-07, + "loss": 0.1286, + "step": 20469 + }, + { + "epoch": 0.94, + "grad_norm": 0.3700127269553743, + "learning_rate": 1.8592070204851542e-07, + "loss": 0.2946, + "step": 20470 + }, + { + "epoch": 0.94, + "grad_norm": 0.4262934698041841, + "learning_rate": 1.856352291151553e-07, + "loss": 0.2502, + "step": 20471 + }, + { + "epoch": 0.94, + "grad_norm": 0.5591357314816566, + "learning_rate": 1.8534997346237094e-07, + "loss": 0.2649, + "step": 20472 + }, + { + "epoch": 0.94, + "grad_norm": 1.3081355861364123, + "learning_rate": 1.850649350964806e-07, + "loss": 0.6253, + "step": 20473 + }, + { + "epoch": 0.94, + "grad_norm": 0.34105478965163877, + "learning_rate": 1.8478011402379258e-07, + "loss": 0.2518, + "step": 20474 + }, + { + "epoch": 0.94, + "grad_norm": 0.4159186954680078, + "learning_rate": 1.8449551025061186e-07, + "loss": 0.2761, + "step": 20475 + }, + { + "epoch": 0.94, + "grad_norm": 0.1739731628125064, + "learning_rate": 1.8421112378324113e-07, + "loss": 0.0918, + "step": 20476 + }, + { + "epoch": 0.94, + "grad_norm": 0.3551346296474776, + "learning_rate": 1.8392695462797537e-07, + "loss": 0.2786, + "step": 20477 + }, + { + "epoch": 0.94, + "grad_norm": 0.4849899641672736, + "learning_rate": 1.8364300279110514e-07, + "loss": 0.3308, + "step": 20478 + }, + { + "epoch": 0.94, + "grad_norm": 0.5578742994492052, + "learning_rate": 1.833592682789187e-07, + "loss": 0.2628, + "step": 20479 + }, + { + "epoch": 0.94, + "grad_norm": 0.41016988027942997, + "learning_rate": 1.830757510976966e-07, + "loss": 0.2546, + "step": 20480 + }, + { + "epoch": 0.94, + "grad_norm": 1.9021470652297021, + "learning_rate": 1.827924512537149e-07, + "loss": 0.5034, + "step": 20481 + }, + { + "epoch": 0.94, + "grad_norm": 0.2110853031075331, + "learning_rate": 1.8250936875324755e-07, + "loss": 0.1657, + "step": 20482 + }, + { + "epoch": 0.94, + "grad_norm": 0.3452904691240392, + "learning_rate": 1.8222650360255944e-07, + "loss": 0.2515, + "step": 20483 + }, + { + "epoch": 0.94, + "grad_norm": 0.6430167247263239, + "learning_rate": 1.8194385580791562e-07, + "loss": 0.3735, + "step": 20484 + }, + { + "epoch": 0.94, + "grad_norm": 0.5487244406749696, + "learning_rate": 1.8166142537556997e-07, + "loss": 0.2702, + "step": 20485 + }, + { + "epoch": 0.94, + "grad_norm": 0.43554094012383743, + "learning_rate": 1.8137921231177856e-07, + "loss": 0.2786, + "step": 20486 + }, + { + "epoch": 0.94, + "grad_norm": 0.44008215794976246, + "learning_rate": 1.8109721662278755e-07, + "loss": 0.2807, + "step": 20487 + }, + { + "epoch": 0.94, + "grad_norm": 0.4872874386164632, + "learning_rate": 1.8081543831484082e-07, + "loss": 0.2695, + "step": 20488 + }, + { + "epoch": 0.94, + "grad_norm": 0.48281113191000186, + "learning_rate": 1.8053387739417782e-07, + "loss": 0.1935, + "step": 20489 + }, + { + "epoch": 0.94, + "grad_norm": 0.3061508092708022, + "learning_rate": 1.80252533867028e-07, + "loss": 0.2823, + "step": 20490 + }, + { + "epoch": 0.94, + "grad_norm": 0.4860581173143456, + "learning_rate": 1.7997140773962418e-07, + "loss": 0.3248, + "step": 20491 + }, + { + "epoch": 0.94, + "grad_norm": 0.37810209978247084, + "learning_rate": 1.7969049901818913e-07, + "loss": 0.1651, + "step": 20492 + }, + { + "epoch": 0.94, + "grad_norm": 0.648351473682077, + "learning_rate": 1.7940980770894122e-07, + "loss": 0.3368, + "step": 20493 + }, + { + "epoch": 0.94, + "grad_norm": 0.38140417822583, + "learning_rate": 1.791293338180944e-07, + "loss": 0.2965, + "step": 20494 + }, + { + "epoch": 0.94, + "grad_norm": 0.3228661667115769, + "learning_rate": 1.7884907735185807e-07, + "loss": 0.2171, + "step": 20495 + }, + { + "epoch": 0.94, + "grad_norm": 0.8766083083615784, + "learning_rate": 1.7856903831643957e-07, + "loss": 0.5052, + "step": 20496 + }, + { + "epoch": 0.94, + "grad_norm": 0.3164946816302706, + "learning_rate": 1.78289216718035e-07, + "loss": 0.1667, + "step": 20497 + }, + { + "epoch": 0.94, + "grad_norm": 0.26628472415444915, + "learning_rate": 1.7800961256284054e-07, + "loss": 0.2126, + "step": 20498 + }, + { + "epoch": 0.94, + "grad_norm": 1.6943038042317498, + "learning_rate": 1.777302258570479e-07, + "loss": 0.4154, + "step": 20499 + }, + { + "epoch": 0.94, + "grad_norm": 0.6635018843264386, + "learning_rate": 1.7745105660683993e-07, + "loss": 0.3697, + "step": 20500 + }, + { + "epoch": 0.94, + "grad_norm": 0.4245086142165088, + "learning_rate": 1.7717210481839942e-07, + "loss": 0.2907, + "step": 20501 + }, + { + "epoch": 0.94, + "grad_norm": 0.30697746514597013, + "learning_rate": 1.7689337049790035e-07, + "loss": 0.2381, + "step": 20502 + }, + { + "epoch": 0.94, + "grad_norm": 0.26114647542628233, + "learning_rate": 1.7661485365151553e-07, + "loss": 0.1643, + "step": 20503 + }, + { + "epoch": 0.94, + "grad_norm": 0.6409470330149446, + "learning_rate": 1.7633655428540897e-07, + "loss": 0.3168, + "step": 20504 + }, + { + "epoch": 0.94, + "grad_norm": 0.4231173436237854, + "learning_rate": 1.7605847240574346e-07, + "loss": 0.2184, + "step": 20505 + }, + { + "epoch": 0.94, + "grad_norm": 0.38606656324697736, + "learning_rate": 1.7578060801867524e-07, + "loss": 0.307, + "step": 20506 + }, + { + "epoch": 0.94, + "grad_norm": 0.6954259545403917, + "learning_rate": 1.7550296113035493e-07, + "loss": 0.3573, + "step": 20507 + }, + { + "epoch": 0.94, + "grad_norm": 0.39662291682772427, + "learning_rate": 1.7522553174693091e-07, + "loss": 0.2485, + "step": 20508 + }, + { + "epoch": 0.94, + "grad_norm": 0.24883322856511075, + "learning_rate": 1.7494831987454276e-07, + "loss": 0.1795, + "step": 20509 + }, + { + "epoch": 0.94, + "grad_norm": 0.4238716187523525, + "learning_rate": 1.7467132551933107e-07, + "loss": 0.2928, + "step": 20510 + }, + { + "epoch": 0.94, + "grad_norm": 0.4501244271365935, + "learning_rate": 1.7439454868742544e-07, + "loss": 0.222, + "step": 20511 + }, + { + "epoch": 0.94, + "grad_norm": 0.9020058530950563, + "learning_rate": 1.741179893849554e-07, + "loss": 0.4087, + "step": 20512 + }, + { + "epoch": 0.94, + "grad_norm": 0.6367162663365894, + "learning_rate": 1.7384164761804266e-07, + "loss": 0.368, + "step": 20513 + }, + { + "epoch": 0.94, + "grad_norm": 0.2709768499484475, + "learning_rate": 1.7356552339280353e-07, + "loss": 0.266, + "step": 20514 + }, + { + "epoch": 0.94, + "grad_norm": 0.3725812762441721, + "learning_rate": 1.7328961671535415e-07, + "loss": 0.1006, + "step": 20515 + }, + { + "epoch": 0.94, + "grad_norm": 0.40369228025531934, + "learning_rate": 1.730139275918019e-07, + "loss": 0.2427, + "step": 20516 + }, + { + "epoch": 0.94, + "grad_norm": 0.6043181322799305, + "learning_rate": 1.7273845602824967e-07, + "loss": 0.3097, + "step": 20517 + }, + { + "epoch": 0.94, + "grad_norm": 0.3852209302611917, + "learning_rate": 1.7246320203079702e-07, + "loss": 0.2527, + "step": 20518 + }, + { + "epoch": 0.94, + "grad_norm": 0.4912208421506404, + "learning_rate": 1.7218816560553575e-07, + "loss": 0.2826, + "step": 20519 + }, + { + "epoch": 0.94, + "grad_norm": 0.5695267983866613, + "learning_rate": 1.7191334675855654e-07, + "loss": 0.3805, + "step": 20520 + }, + { + "epoch": 0.94, + "grad_norm": 0.2362732118700907, + "learning_rate": 1.716387454959434e-07, + "loss": 0.1397, + "step": 20521 + }, + { + "epoch": 0.94, + "grad_norm": 0.49685712246660824, + "learning_rate": 1.7136436182377703e-07, + "loss": 0.2723, + "step": 20522 + }, + { + "epoch": 0.94, + "grad_norm": 0.371983856161379, + "learning_rate": 1.7109019574812925e-07, + "loss": 0.2828, + "step": 20523 + }, + { + "epoch": 0.94, + "grad_norm": 0.8721997845298104, + "learning_rate": 1.7081624727507184e-07, + "loss": 0.2965, + "step": 20524 + }, + { + "epoch": 0.94, + "grad_norm": 0.6810482582357328, + "learning_rate": 1.7054251641066999e-07, + "loss": 0.4024, + "step": 20525 + }, + { + "epoch": 0.94, + "grad_norm": 0.2666340310350867, + "learning_rate": 1.7026900316098217e-07, + "loss": 0.2765, + "step": 20526 + }, + { + "epoch": 0.94, + "grad_norm": 0.6172578138634932, + "learning_rate": 1.6999570753206574e-07, + "loss": 0.2732, + "step": 20527 + }, + { + "epoch": 0.94, + "grad_norm": 0.5385625859111121, + "learning_rate": 1.6972262952996921e-07, + "loss": 0.1998, + "step": 20528 + }, + { + "epoch": 0.94, + "grad_norm": 0.4552642022317783, + "learning_rate": 1.6944976916073774e-07, + "loss": 0.2271, + "step": 20529 + }, + { + "epoch": 0.94, + "grad_norm": 0.380535445678306, + "learning_rate": 1.6917712643041539e-07, + "loss": 0.2745, + "step": 20530 + }, + { + "epoch": 0.94, + "grad_norm": 0.5232590610561363, + "learning_rate": 1.6890470134503621e-07, + "loss": 0.1596, + "step": 20531 + }, + { + "epoch": 0.94, + "grad_norm": 0.4115750124789743, + "learning_rate": 1.6863249391063097e-07, + "loss": 0.3071, + "step": 20532 + }, + { + "epoch": 0.94, + "grad_norm": 0.698334630770339, + "learning_rate": 1.6836050413322702e-07, + "loss": 0.227, + "step": 20533 + }, + { + "epoch": 0.94, + "grad_norm": 0.30837970071372506, + "learning_rate": 1.6808873201884624e-07, + "loss": 0.217, + "step": 20534 + }, + { + "epoch": 0.94, + "grad_norm": 0.4275719880438468, + "learning_rate": 1.6781717757350492e-07, + "loss": 0.2927, + "step": 20535 + }, + { + "epoch": 0.94, + "grad_norm": 0.5217407989372366, + "learning_rate": 1.675458408032138e-07, + "loss": 0.2472, + "step": 20536 + }, + { + "epoch": 0.94, + "grad_norm": 0.30058818967363676, + "learning_rate": 1.672747217139814e-07, + "loss": 0.2146, + "step": 20537 + }, + { + "epoch": 0.94, + "grad_norm": 0.42278227502314064, + "learning_rate": 1.6700382031180962e-07, + "loss": 0.348, + "step": 20538 + }, + { + "epoch": 0.94, + "grad_norm": 1.0815819135430929, + "learning_rate": 1.6673313660269695e-07, + "loss": 0.4834, + "step": 20539 + }, + { + "epoch": 0.94, + "grad_norm": 0.6030929184570383, + "learning_rate": 1.6646267059263422e-07, + "loss": 0.3081, + "step": 20540 + }, + { + "epoch": 0.94, + "grad_norm": 0.2591730134648022, + "learning_rate": 1.661924222876099e-07, + "loss": 0.1626, + "step": 20541 + }, + { + "epoch": 0.94, + "grad_norm": 0.2859069549167505, + "learning_rate": 1.6592239169360924e-07, + "loss": 0.2332, + "step": 20542 + }, + { + "epoch": 0.94, + "grad_norm": 1.299930481311688, + "learning_rate": 1.6565257881660746e-07, + "loss": 0.7919, + "step": 20543 + }, + { + "epoch": 0.94, + "grad_norm": 0.35311503431033286, + "learning_rate": 1.6538298366257975e-07, + "loss": 0.1959, + "step": 20544 + }, + { + "epoch": 0.94, + "grad_norm": 0.48688598421538504, + "learning_rate": 1.6511360623749362e-07, + "loss": 0.3158, + "step": 20545 + }, + { + "epoch": 0.94, + "grad_norm": 0.43124338825691466, + "learning_rate": 1.6484444654731203e-07, + "loss": 0.2964, + "step": 20546 + }, + { + "epoch": 0.94, + "grad_norm": 0.2523231195951359, + "learning_rate": 1.6457550459799687e-07, + "loss": 0.1541, + "step": 20547 + }, + { + "epoch": 0.94, + "grad_norm": 0.45320137850318426, + "learning_rate": 1.6430678039550008e-07, + "loss": 0.2094, + "step": 20548 + }, + { + "epoch": 0.94, + "grad_norm": 0.4281758057601902, + "learning_rate": 1.6403827394577244e-07, + "loss": 0.2951, + "step": 20549 + }, + { + "epoch": 0.94, + "grad_norm": 0.309488784806853, + "learning_rate": 1.6376998525475473e-07, + "loss": 0.2224, + "step": 20550 + }, + { + "epoch": 0.94, + "grad_norm": 0.7905387940721017, + "learning_rate": 1.6350191432839114e-07, + "loss": 0.3674, + "step": 20551 + }, + { + "epoch": 0.94, + "grad_norm": 0.663994398977876, + "learning_rate": 1.6323406117261465e-07, + "loss": 0.3309, + "step": 20552 + }, + { + "epoch": 0.94, + "grad_norm": 0.37766137762842583, + "learning_rate": 1.6296642579335497e-07, + "loss": 0.2775, + "step": 20553 + }, + { + "epoch": 0.94, + "grad_norm": 0.2269788111243829, + "learning_rate": 1.626990081965374e-07, + "loss": 0.1611, + "step": 20554 + }, + { + "epoch": 0.94, + "grad_norm": 0.852158449880713, + "learning_rate": 1.624318083880827e-07, + "loss": 0.5495, + "step": 20555 + }, + { + "epoch": 0.94, + "grad_norm": 0.41235650281134567, + "learning_rate": 1.6216482637390618e-07, + "loss": 0.291, + "step": 20556 + }, + { + "epoch": 0.94, + "grad_norm": 0.3851375127426412, + "learning_rate": 1.6189806215991865e-07, + "loss": 0.2466, + "step": 20557 + }, + { + "epoch": 0.94, + "grad_norm": 0.7487880501146439, + "learning_rate": 1.616315157520254e-07, + "loss": 0.4068, + "step": 20558 + }, + { + "epoch": 0.94, + "grad_norm": 0.3755536542120205, + "learning_rate": 1.6136518715612837e-07, + "loss": 0.2644, + "step": 20559 + }, + { + "epoch": 0.94, + "grad_norm": 0.28798944690432743, + "learning_rate": 1.6109907637812283e-07, + "loss": 0.0902, + "step": 20560 + }, + { + "epoch": 0.94, + "grad_norm": 0.38107512472268873, + "learning_rate": 1.6083318342390298e-07, + "loss": 0.2967, + "step": 20561 + }, + { + "epoch": 0.94, + "grad_norm": 0.3529802466242184, + "learning_rate": 1.6056750829935076e-07, + "loss": 0.2581, + "step": 20562 + }, + { + "epoch": 0.94, + "grad_norm": 0.6626733160826997, + "learning_rate": 1.6030205101035146e-07, + "loss": 0.3622, + "step": 20563 + }, + { + "epoch": 0.94, + "grad_norm": 2.0577084282109124, + "learning_rate": 1.600368115627826e-07, + "loss": 0.1926, + "step": 20564 + }, + { + "epoch": 0.94, + "grad_norm": 0.3023344382331097, + "learning_rate": 1.5977178996251285e-07, + "loss": 0.2512, + "step": 20565 + }, + { + "epoch": 0.94, + "grad_norm": 0.3615232549958181, + "learning_rate": 1.5950698621541305e-07, + "loss": 0.2347, + "step": 20566 + }, + { + "epoch": 0.94, + "grad_norm": 0.49178067681717913, + "learning_rate": 1.5924240032734296e-07, + "loss": 0.1325, + "step": 20567 + }, + { + "epoch": 0.94, + "grad_norm": 0.4025581359812394, + "learning_rate": 1.589780323041623e-07, + "loss": 0.2734, + "step": 20568 + }, + { + "epoch": 0.94, + "grad_norm": 0.4933478701809438, + "learning_rate": 1.5871388215172202e-07, + "loss": 0.3347, + "step": 20569 + }, + { + "epoch": 0.94, + "grad_norm": 0.4630282346162931, + "learning_rate": 1.5844994987587293e-07, + "loss": 0.2065, + "step": 20570 + }, + { + "epoch": 0.95, + "grad_norm": 0.40562905503907637, + "learning_rate": 1.5818623548245482e-07, + "loss": 0.2678, + "step": 20571 + }, + { + "epoch": 0.95, + "grad_norm": 0.2776803886630753, + "learning_rate": 1.5792273897730858e-07, + "loss": 0.1698, + "step": 20572 + }, + { + "epoch": 0.95, + "grad_norm": 0.36350197279380075, + "learning_rate": 1.5765946036626734e-07, + "loss": 0.2484, + "step": 20573 + }, + { + "epoch": 0.95, + "grad_norm": 0.38383679749533844, + "learning_rate": 1.5739639965515863e-07, + "loss": 0.2664, + "step": 20574 + }, + { + "epoch": 0.95, + "grad_norm": 0.7057024799790953, + "learning_rate": 1.571335568498078e-07, + "loss": 0.3588, + "step": 20575 + }, + { + "epoch": 0.95, + "grad_norm": 1.753413046148868, + "learning_rate": 1.5687093195603353e-07, + "loss": 0.4749, + "step": 20576 + }, + { + "epoch": 0.95, + "grad_norm": 0.3064426030160529, + "learning_rate": 1.5660852497965008e-07, + "loss": 0.1812, + "step": 20577 + }, + { + "epoch": 0.95, + "grad_norm": 0.23913216714039906, + "learning_rate": 1.563463359264661e-07, + "loss": 0.205, + "step": 20578 + }, + { + "epoch": 0.95, + "grad_norm": 0.8204179766454661, + "learning_rate": 1.5608436480228696e-07, + "loss": 0.3715, + "step": 20579 + }, + { + "epoch": 0.95, + "grad_norm": 0.3577960960543862, + "learning_rate": 1.5582261161291246e-07, + "loss": 0.2183, + "step": 20580 + }, + { + "epoch": 0.95, + "grad_norm": 0.3716783122079666, + "learning_rate": 1.5556107636413685e-07, + "loss": 0.3227, + "step": 20581 + }, + { + "epoch": 0.95, + "grad_norm": 1.5209756670900243, + "learning_rate": 1.552997590617511e-07, + "loss": 0.6691, + "step": 20582 + }, + { + "epoch": 0.95, + "grad_norm": 0.3473830868594975, + "learning_rate": 1.5503865971154052e-07, + "loss": 0.1888, + "step": 20583 + }, + { + "epoch": 0.95, + "grad_norm": 0.6784745063417846, + "learning_rate": 1.5477777831928497e-07, + "loss": 0.2514, + "step": 20584 + }, + { + "epoch": 0.95, + "grad_norm": 0.3620669425077828, + "learning_rate": 1.5451711489076094e-07, + "loss": 0.2963, + "step": 20585 + }, + { + "epoch": 0.95, + "grad_norm": 0.33849070350144034, + "learning_rate": 1.5425666943173822e-07, + "loss": 0.2119, + "step": 20586 + }, + { + "epoch": 0.95, + "grad_norm": 1.397768748868032, + "learning_rate": 1.5399644194798335e-07, + "loss": 0.7126, + "step": 20587 + }, + { + "epoch": 0.95, + "grad_norm": 0.47104127197102663, + "learning_rate": 1.5373643244525838e-07, + "loss": 0.2108, + "step": 20588 + }, + { + "epoch": 0.95, + "grad_norm": 0.44504718512374813, + "learning_rate": 1.5347664092931758e-07, + "loss": 0.2428, + "step": 20589 + }, + { + "epoch": 0.95, + "grad_norm": 0.3614696211523254, + "learning_rate": 1.5321706740591525e-07, + "loss": 0.2223, + "step": 20590 + }, + { + "epoch": 0.95, + "grad_norm": 0.6075341928077955, + "learning_rate": 1.5295771188079568e-07, + "loss": 0.3516, + "step": 20591 + }, + { + "epoch": 0.95, + "grad_norm": 0.4267960389882433, + "learning_rate": 1.5269857435970093e-07, + "loss": 0.2854, + "step": 20592 + }, + { + "epoch": 0.95, + "grad_norm": 0.2935933744480745, + "learning_rate": 1.5243965484837086e-07, + "loss": 0.2481, + "step": 20593 + }, + { + "epoch": 0.95, + "grad_norm": 0.31129415850020214, + "learning_rate": 1.5218095335253423e-07, + "loss": 0.1579, + "step": 20594 + }, + { + "epoch": 0.95, + "grad_norm": 0.4256009005006297, + "learning_rate": 1.519224698779198e-07, + "loss": 0.2764, + "step": 20595 + }, + { + "epoch": 0.95, + "grad_norm": 0.5257367849838214, + "learning_rate": 1.5166420443025076e-07, + "loss": 0.2458, + "step": 20596 + }, + { + "epoch": 0.95, + "grad_norm": 0.3465059048756922, + "learning_rate": 1.5140615701524364e-07, + "loss": 0.2761, + "step": 20597 + }, + { + "epoch": 0.95, + "grad_norm": 0.537112871215346, + "learning_rate": 1.5114832763861164e-07, + "loss": 0.2656, + "step": 20598 + }, + { + "epoch": 0.95, + "grad_norm": 0.22901007055540692, + "learning_rate": 1.5089071630606466e-07, + "loss": 0.1469, + "step": 20599 + }, + { + "epoch": 0.95, + "grad_norm": 1.6622188424882447, + "learning_rate": 1.5063332302330368e-07, + "loss": 0.5666, + "step": 20600 + }, + { + "epoch": 0.95, + "grad_norm": 0.2876046519019182, + "learning_rate": 1.5037614779602748e-07, + "loss": 0.2321, + "step": 20601 + }, + { + "epoch": 0.95, + "grad_norm": 0.5065872005160751, + "learning_rate": 1.5011919062993152e-07, + "loss": 0.3089, + "step": 20602 + }, + { + "epoch": 0.95, + "grad_norm": 0.6682465626191572, + "learning_rate": 1.4986245153070234e-07, + "loss": 0.275, + "step": 20603 + }, + { + "epoch": 0.95, + "grad_norm": 0.32859676915869407, + "learning_rate": 1.4960593050402537e-07, + "loss": 0.2864, + "step": 20604 + }, + { + "epoch": 0.95, + "grad_norm": 0.5342556839934085, + "learning_rate": 1.4934962755557835e-07, + "loss": 0.363, + "step": 20605 + }, + { + "epoch": 0.95, + "grad_norm": 0.3278569763690953, + "learning_rate": 1.4909354269103672e-07, + "loss": 0.1507, + "step": 20606 + }, + { + "epoch": 0.95, + "grad_norm": 0.446644126715135, + "learning_rate": 1.4883767591606924e-07, + "loss": 0.244, + "step": 20607 + }, + { + "epoch": 0.95, + "grad_norm": 0.544534397646364, + "learning_rate": 1.485820272363414e-07, + "loss": 0.2862, + "step": 20608 + }, + { + "epoch": 0.95, + "grad_norm": 0.37654454237145846, + "learning_rate": 1.4832659665751316e-07, + "loss": 0.258, + "step": 20609 + }, + { + "epoch": 0.95, + "grad_norm": 0.45323773736872014, + "learning_rate": 1.480713841852377e-07, + "loss": 0.2686, + "step": 20610 + }, + { + "epoch": 0.95, + "grad_norm": 0.5941862624360262, + "learning_rate": 1.4781638982516723e-07, + "loss": 0.3929, + "step": 20611 + }, + { + "epoch": 0.95, + "grad_norm": 0.2155394146196827, + "learning_rate": 1.4756161358294608e-07, + "loss": 0.1583, + "step": 20612 + }, + { + "epoch": 0.95, + "grad_norm": 0.4154579073962875, + "learning_rate": 1.4730705546421532e-07, + "loss": 0.2533, + "step": 20613 + }, + { + "epoch": 0.95, + "grad_norm": 0.5089990303570201, + "learning_rate": 1.470527154746093e-07, + "loss": 0.3154, + "step": 20614 + }, + { + "epoch": 0.95, + "grad_norm": 1.49968892730712, + "learning_rate": 1.4679859361975913e-07, + "loss": 0.4487, + "step": 20615 + }, + { + "epoch": 0.95, + "grad_norm": 0.37013722902406837, + "learning_rate": 1.4654468990529357e-07, + "loss": 0.21, + "step": 20616 + }, + { + "epoch": 0.95, + "grad_norm": 0.3397656695057043, + "learning_rate": 1.4629100433683042e-07, + "loss": 0.2776, + "step": 20617 + }, + { + "epoch": 0.95, + "grad_norm": 0.3819614158423416, + "learning_rate": 1.4603753691998735e-07, + "loss": 0.1724, + "step": 20618 + }, + { + "epoch": 0.95, + "grad_norm": 0.3867415984829305, + "learning_rate": 1.4578428766037654e-07, + "loss": 0.1878, + "step": 20619 + }, + { + "epoch": 0.95, + "grad_norm": 0.5483608516327109, + "learning_rate": 1.4553125656360245e-07, + "loss": 0.3424, + "step": 20620 + }, + { + "epoch": 0.95, + "grad_norm": 0.4296049875352283, + "learning_rate": 1.4527844363527056e-07, + "loss": 0.3065, + "step": 20621 + }, + { + "epoch": 0.95, + "grad_norm": 0.3240333291506084, + "learning_rate": 1.4502584888097416e-07, + "loss": 0.208, + "step": 20622 + }, + { + "epoch": 0.95, + "grad_norm": 1.4150929266549446, + "learning_rate": 1.4477347230630767e-07, + "loss": 0.7787, + "step": 20623 + }, + { + "epoch": 0.95, + "grad_norm": 0.3838270226282027, + "learning_rate": 1.4452131391685776e-07, + "loss": 0.2724, + "step": 20624 + }, + { + "epoch": 0.95, + "grad_norm": 0.3372114949609897, + "learning_rate": 1.4426937371820772e-07, + "loss": 0.2132, + "step": 20625 + }, + { + "epoch": 0.95, + "grad_norm": 0.3686484191851059, + "learning_rate": 1.4401765171593418e-07, + "loss": 0.2695, + "step": 20626 + }, + { + "epoch": 0.95, + "grad_norm": 1.3768605775936185, + "learning_rate": 1.437661479156094e-07, + "loss": 0.5532, + "step": 20627 + }, + { + "epoch": 0.95, + "grad_norm": 0.8457937336742294, + "learning_rate": 1.4351486232280442e-07, + "loss": 0.4219, + "step": 20628 + }, + { + "epoch": 0.95, + "grad_norm": 0.2697674485887778, + "learning_rate": 1.432637949430804e-07, + "loss": 0.2113, + "step": 20629 + }, + { + "epoch": 0.95, + "grad_norm": 0.6791989645071949, + "learning_rate": 1.4301294578199508e-07, + "loss": 0.3447, + "step": 20630 + }, + { + "epoch": 0.95, + "grad_norm": 0.5945166450743877, + "learning_rate": 1.4276231484510295e-07, + "loss": 0.3097, + "step": 20631 + }, + { + "epoch": 0.95, + "grad_norm": 0.23192793431062594, + "learning_rate": 1.4251190213795286e-07, + "loss": 0.1518, + "step": 20632 + }, + { + "epoch": 0.95, + "grad_norm": 0.3688595092365043, + "learning_rate": 1.4226170766608927e-07, + "loss": 0.2828, + "step": 20633 + }, + { + "epoch": 0.95, + "grad_norm": 0.606395385121721, + "learning_rate": 1.4201173143504887e-07, + "loss": 0.3729, + "step": 20634 + }, + { + "epoch": 0.95, + "grad_norm": 0.42974434574059384, + "learning_rate": 1.4176197345036835e-07, + "loss": 0.2249, + "step": 20635 + }, + { + "epoch": 0.95, + "grad_norm": 0.509120063420832, + "learning_rate": 1.4151243371757663e-07, + "loss": 0.3111, + "step": 20636 + }, + { + "epoch": 0.95, + "grad_norm": 0.42286792034774806, + "learning_rate": 1.4126311224219702e-07, + "loss": 0.2814, + "step": 20637 + }, + { + "epoch": 0.95, + "grad_norm": 0.30655206484798125, + "learning_rate": 1.410140090297507e-07, + "loss": 0.172, + "step": 20638 + }, + { + "epoch": 0.95, + "grad_norm": 0.5224652183597427, + "learning_rate": 1.4076512408575216e-07, + "loss": 0.2152, + "step": 20639 + }, + { + "epoch": 0.95, + "grad_norm": 0.4291925973995371, + "learning_rate": 1.405164574157103e-07, + "loss": 0.302, + "step": 20640 + }, + { + "epoch": 0.95, + "grad_norm": 0.35032888082878083, + "learning_rate": 1.4026800902513293e-07, + "loss": 0.2827, + "step": 20641 + }, + { + "epoch": 0.95, + "grad_norm": 0.6445050792114264, + "learning_rate": 1.400197789195179e-07, + "loss": 0.2656, + "step": 20642 + }, + { + "epoch": 0.95, + "grad_norm": 0.4564393594952866, + "learning_rate": 1.3977176710436191e-07, + "loss": 0.3001, + "step": 20643 + }, + { + "epoch": 0.95, + "grad_norm": 0.27278780212702575, + "learning_rate": 1.39523973585155e-07, + "loss": 0.1968, + "step": 20644 + }, + { + "epoch": 0.95, + "grad_norm": 0.3955634121940631, + "learning_rate": 1.3927639836738505e-07, + "loss": 0.2632, + "step": 20645 + }, + { + "epoch": 0.95, + "grad_norm": 0.9021953705898903, + "learning_rate": 1.3902904145653094e-07, + "loss": 0.417, + "step": 20646 + }, + { + "epoch": 0.95, + "grad_norm": 0.35996958783814886, + "learning_rate": 1.3878190285807057e-07, + "loss": 0.2474, + "step": 20647 + }, + { + "epoch": 0.95, + "grad_norm": 0.338763963002527, + "learning_rate": 1.385349825774729e-07, + "loss": 0.2542, + "step": 20648 + }, + { + "epoch": 0.95, + "grad_norm": 0.9139617686952466, + "learning_rate": 1.3828828062020683e-07, + "loss": 0.418, + "step": 20649 + }, + { + "epoch": 0.95, + "grad_norm": 0.2717189498012119, + "learning_rate": 1.3804179699173358e-07, + "loss": 0.203, + "step": 20650 + }, + { + "epoch": 0.95, + "grad_norm": 0.2615893599229896, + "learning_rate": 1.3779553169750992e-07, + "loss": 0.1196, + "step": 20651 + }, + { + "epoch": 0.95, + "grad_norm": 0.38169336698347794, + "learning_rate": 1.37549484742987e-07, + "loss": 0.2764, + "step": 20652 + }, + { + "epoch": 0.95, + "grad_norm": 0.4473010113881805, + "learning_rate": 1.3730365613361497e-07, + "loss": 0.2667, + "step": 20653 + }, + { + "epoch": 0.95, + "grad_norm": 0.7559415895602231, + "learning_rate": 1.3705804587483274e-07, + "loss": 0.3831, + "step": 20654 + }, + { + "epoch": 0.95, + "grad_norm": 0.760355300796119, + "learning_rate": 1.3681265397207932e-07, + "loss": 0.1242, + "step": 20655 + }, + { + "epoch": 0.95, + "grad_norm": 0.33682353287116307, + "learning_rate": 1.3656748043078815e-07, + "loss": 0.2314, + "step": 20656 + }, + { + "epoch": 0.95, + "grad_norm": 0.2804567454971787, + "learning_rate": 1.363225252563849e-07, + "loss": 0.2373, + "step": 20657 + }, + { + "epoch": 0.95, + "grad_norm": 0.47407169529877385, + "learning_rate": 1.360777884542963e-07, + "loss": 0.2181, + "step": 20658 + }, + { + "epoch": 0.95, + "grad_norm": 0.3998187148872344, + "learning_rate": 1.3583327002993695e-07, + "loss": 0.3059, + "step": 20659 + }, + { + "epoch": 0.95, + "grad_norm": 0.6215313915293608, + "learning_rate": 1.355889699887225e-07, + "loss": 0.3201, + "step": 20660 + }, + { + "epoch": 0.95, + "grad_norm": 0.4748953965664612, + "learning_rate": 1.3534488833605976e-07, + "loss": 0.1897, + "step": 20661 + }, + { + "epoch": 0.95, + "grad_norm": 0.42257722539914655, + "learning_rate": 1.3510102507735544e-07, + "loss": 0.2805, + "step": 20662 + }, + { + "epoch": 0.95, + "grad_norm": 0.3065216146144009, + "learning_rate": 1.3485738021800532e-07, + "loss": 0.1956, + "step": 20663 + }, + { + "epoch": 0.95, + "grad_norm": 0.31780619837123913, + "learning_rate": 1.3461395376340502e-07, + "loss": 0.2208, + "step": 20664 + }, + { + "epoch": 0.95, + "grad_norm": 0.3641136963474888, + "learning_rate": 1.343707457189425e-07, + "loss": 0.2654, + "step": 20665 + }, + { + "epoch": 0.95, + "grad_norm": 1.0031694703250642, + "learning_rate": 1.341277560900034e-07, + "loss": 0.4591, + "step": 20666 + }, + { + "epoch": 0.95, + "grad_norm": 1.71204442386652, + "learning_rate": 1.3388498488196787e-07, + "loss": 0.7008, + "step": 20667 + }, + { + "epoch": 0.95, + "grad_norm": 0.2532703919088304, + "learning_rate": 1.3364243210020943e-07, + "loss": 0.2021, + "step": 20668 + }, + { + "epoch": 0.95, + "grad_norm": 0.33679116476752935, + "learning_rate": 1.334000977500982e-07, + "loss": 0.2414, + "step": 20669 + }, + { + "epoch": 0.95, + "grad_norm": 0.6846364611720372, + "learning_rate": 1.331579818369988e-07, + "loss": 0.3841, + "step": 20670 + }, + { + "epoch": 0.95, + "grad_norm": 0.3552760556877613, + "learning_rate": 1.3291608436627135e-07, + "loss": 0.2167, + "step": 20671 + }, + { + "epoch": 0.95, + "grad_norm": 0.3897932103621604, + "learning_rate": 1.3267440534327381e-07, + "loss": 0.2763, + "step": 20672 + }, + { + "epoch": 0.95, + "grad_norm": 0.5019402997556139, + "learning_rate": 1.32432944773353e-07, + "loss": 0.288, + "step": 20673 + }, + { + "epoch": 0.95, + "grad_norm": 0.3753268856216738, + "learning_rate": 1.3219170266185577e-07, + "loss": 0.1982, + "step": 20674 + }, + { + "epoch": 0.95, + "grad_norm": 0.460201394051659, + "learning_rate": 1.319506790141245e-07, + "loss": 0.2653, + "step": 20675 + }, + { + "epoch": 0.95, + "grad_norm": 0.32518520618545643, + "learning_rate": 1.3170987383549495e-07, + "loss": 0.269, + "step": 20676 + }, + { + "epoch": 0.95, + "grad_norm": 0.34316726746303916, + "learning_rate": 1.3146928713129726e-07, + "loss": 0.2182, + "step": 20677 + }, + { + "epoch": 0.95, + "grad_norm": 0.9130698899227945, + "learning_rate": 1.3122891890685606e-07, + "loss": 0.2913, + "step": 20678 + }, + { + "epoch": 0.95, + "grad_norm": 1.0363730179381838, + "learning_rate": 1.3098876916749713e-07, + "loss": 0.324, + "step": 20679 + }, + { + "epoch": 0.95, + "grad_norm": 0.3781339361250915, + "learning_rate": 1.3074883791853398e-07, + "loss": 0.2611, + "step": 20680 + }, + { + "epoch": 0.95, + "grad_norm": 0.3518694956616934, + "learning_rate": 1.3050912516528125e-07, + "loss": 0.2612, + "step": 20681 + }, + { + "epoch": 0.95, + "grad_norm": 0.7204022986248192, + "learning_rate": 1.3026963091304246e-07, + "loss": 0.4051, + "step": 20682 + }, + { + "epoch": 0.95, + "grad_norm": 0.38637551150969396, + "learning_rate": 1.3003035516712116e-07, + "loss": 0.2781, + "step": 20683 + }, + { + "epoch": 0.95, + "grad_norm": 0.2462810600346777, + "learning_rate": 1.2979129793281641e-07, + "loss": 0.1754, + "step": 20684 + }, + { + "epoch": 0.95, + "grad_norm": 1.2602664983780982, + "learning_rate": 1.2955245921541847e-07, + "loss": 0.4663, + "step": 20685 + }, + { + "epoch": 0.95, + "grad_norm": 0.5554679570428661, + "learning_rate": 1.2931383902021533e-07, + "loss": 0.2438, + "step": 20686 + }, + { + "epoch": 0.95, + "grad_norm": 0.6487358845247734, + "learning_rate": 1.2907543735249163e-07, + "loss": 0.2884, + "step": 20687 + }, + { + "epoch": 0.95, + "grad_norm": 0.3920177312651138, + "learning_rate": 1.2883725421752203e-07, + "loss": 0.3114, + "step": 20688 + }, + { + "epoch": 0.95, + "grad_norm": 0.3368389466333891, + "learning_rate": 1.2859928962058344e-07, + "loss": 0.256, + "step": 20689 + }, + { + "epoch": 0.95, + "grad_norm": 0.41373404903617544, + "learning_rate": 1.2836154356694163e-07, + "loss": 0.2117, + "step": 20690 + }, + { + "epoch": 0.95, + "grad_norm": 0.38659062207455097, + "learning_rate": 1.281240160618613e-07, + "loss": 0.1759, + "step": 20691 + }, + { + "epoch": 0.95, + "grad_norm": 0.27482368293027104, + "learning_rate": 1.2788670711060046e-07, + "loss": 0.2337, + "step": 20692 + }, + { + "epoch": 0.95, + "grad_norm": 0.9042109860427079, + "learning_rate": 1.276496167184127e-07, + "loss": 0.3832, + "step": 20693 + }, + { + "epoch": 0.95, + "grad_norm": 0.7685272778489982, + "learning_rate": 1.2741274489054823e-07, + "loss": 0.294, + "step": 20694 + }, + { + "epoch": 0.95, + "grad_norm": 0.3663933729094589, + "learning_rate": 1.2717609163224843e-07, + "loss": 0.2583, + "step": 20695 + }, + { + "epoch": 0.95, + "grad_norm": 0.30759146560388095, + "learning_rate": 1.2693965694875689e-07, + "loss": 0.2567, + "step": 20696 + }, + { + "epoch": 0.95, + "grad_norm": 0.381156015514647, + "learning_rate": 1.2670344084530384e-07, + "loss": 0.0775, + "step": 20697 + }, + { + "epoch": 0.95, + "grad_norm": 0.37905845851795905, + "learning_rate": 1.264674433271218e-07, + "loss": 0.2822, + "step": 20698 + }, + { + "epoch": 0.95, + "grad_norm": 0.6889517287067817, + "learning_rate": 1.2623166439943325e-07, + "loss": 0.3555, + "step": 20699 + }, + { + "epoch": 0.95, + "grad_norm": 0.3563619249963613, + "learning_rate": 1.2599610406745844e-07, + "loss": 0.2458, + "step": 20700 + }, + { + "epoch": 0.95, + "grad_norm": 0.35179627987335754, + "learning_rate": 1.2576076233641543e-07, + "loss": 0.2707, + "step": 20701 + }, + { + "epoch": 0.95, + "grad_norm": 0.3352054568594747, + "learning_rate": 1.2552563921151116e-07, + "loss": 0.1673, + "step": 20702 + }, + { + "epoch": 0.95, + "grad_norm": 1.8334751571614538, + "learning_rate": 1.252907346979515e-07, + "loss": 0.4468, + "step": 20703 + }, + { + "epoch": 0.95, + "grad_norm": 0.24162683751234798, + "learning_rate": 1.2505604880093892e-07, + "loss": 0.218, + "step": 20704 + }, + { + "epoch": 0.95, + "grad_norm": 0.6351750612395176, + "learning_rate": 1.2482158152566591e-07, + "loss": 0.3568, + "step": 20705 + }, + { + "epoch": 0.95, + "grad_norm": 1.3546354890731713, + "learning_rate": 1.2458733287732728e-07, + "loss": 0.6961, + "step": 20706 + }, + { + "epoch": 0.95, + "grad_norm": 0.3052501612605196, + "learning_rate": 1.2435330286110549e-07, + "loss": 0.1915, + "step": 20707 + }, + { + "epoch": 0.95, + "grad_norm": 0.368190222128102, + "learning_rate": 1.2411949148218415e-07, + "loss": 0.278, + "step": 20708 + }, + { + "epoch": 0.95, + "grad_norm": 0.3489554686883393, + "learning_rate": 1.2388589874573808e-07, + "loss": 0.1947, + "step": 20709 + }, + { + "epoch": 0.95, + "grad_norm": 0.37506395325684777, + "learning_rate": 1.2365252465694088e-07, + "loss": 0.2263, + "step": 20710 + }, + { + "epoch": 0.95, + "grad_norm": 1.384312316971383, + "learning_rate": 1.234193692209562e-07, + "loss": 0.4955, + "step": 20711 + }, + { + "epoch": 0.95, + "grad_norm": 0.35621244535515667, + "learning_rate": 1.2318643244294882e-07, + "loss": 0.311, + "step": 20712 + }, + { + "epoch": 0.95, + "grad_norm": 0.3651315739647565, + "learning_rate": 1.2295371432807346e-07, + "loss": 0.1753, + "step": 20713 + }, + { + "epoch": 0.95, + "grad_norm": 0.39302018827370017, + "learning_rate": 1.227212148814838e-07, + "loss": 0.212, + "step": 20714 + }, + { + "epoch": 0.95, + "grad_norm": 0.3522928584184849, + "learning_rate": 1.2248893410832686e-07, + "loss": 0.2516, + "step": 20715 + }, + { + "epoch": 0.95, + "grad_norm": 0.4660371806244114, + "learning_rate": 1.2225687201374403e-07, + "loss": 0.2265, + "step": 20716 + }, + { + "epoch": 0.95, + "grad_norm": 0.39455263510759686, + "learning_rate": 1.2202502860287457e-07, + "loss": 0.2294, + "step": 20717 + }, + { + "epoch": 0.95, + "grad_norm": 1.2189134934369297, + "learning_rate": 1.2179340388084993e-07, + "loss": 0.7798, + "step": 20718 + }, + { + "epoch": 0.95, + "grad_norm": 0.5190433423006692, + "learning_rate": 1.215619978527993e-07, + "loss": 0.3028, + "step": 20719 + }, + { + "epoch": 0.95, + "grad_norm": 0.3132570596805649, + "learning_rate": 1.213308105238442e-07, + "loss": 0.2226, + "step": 20720 + }, + { + "epoch": 0.95, + "grad_norm": 0.5332478859318329, + "learning_rate": 1.2109984189910385e-07, + "loss": 0.2989, + "step": 20721 + }, + { + "epoch": 0.95, + "grad_norm": 0.4423235642743931, + "learning_rate": 1.208690919836919e-07, + "loss": 0.2554, + "step": 20722 + }, + { + "epoch": 0.95, + "grad_norm": 0.3155619412854641, + "learning_rate": 1.2063856078271762e-07, + "loss": 0.1457, + "step": 20723 + }, + { + "epoch": 0.95, + "grad_norm": 0.34451037738490725, + "learning_rate": 1.204082483012825e-07, + "loss": 0.2968, + "step": 20724 + }, + { + "epoch": 0.95, + "grad_norm": 0.3284787047881008, + "learning_rate": 1.2017815454448578e-07, + "loss": 0.2401, + "step": 20725 + }, + { + "epoch": 0.95, + "grad_norm": 1.5195555770389837, + "learning_rate": 1.1994827951742338e-07, + "loss": 0.1955, + "step": 20726 + }, + { + "epoch": 0.95, + "grad_norm": 0.4820150201165967, + "learning_rate": 1.1971862322518458e-07, + "loss": 0.3291, + "step": 20727 + }, + { + "epoch": 0.95, + "grad_norm": 0.27260686122269695, + "learning_rate": 1.1948918567285195e-07, + "loss": 0.2434, + "step": 20728 + }, + { + "epoch": 0.95, + "grad_norm": 0.27323742305774895, + "learning_rate": 1.192599668655059e-07, + "loss": 0.1387, + "step": 20729 + }, + { + "epoch": 0.95, + "grad_norm": 0.9526749972963651, + "learning_rate": 1.1903096680822012e-07, + "loss": 0.5031, + "step": 20730 + }, + { + "epoch": 0.95, + "grad_norm": 0.7747997667842748, + "learning_rate": 1.1880218550606615e-07, + "loss": 0.3078, + "step": 20731 + }, + { + "epoch": 0.95, + "grad_norm": 0.258047288689007, + "learning_rate": 1.1857362296410879e-07, + "loss": 0.2493, + "step": 20732 + }, + { + "epoch": 0.95, + "grad_norm": 0.6623880705877354, + "learning_rate": 1.1834527918740624e-07, + "loss": 0.2828, + "step": 20733 + }, + { + "epoch": 0.95, + "grad_norm": 0.34876351700589153, + "learning_rate": 1.1811715418101555e-07, + "loss": 0.1744, + "step": 20734 + }, + { + "epoch": 0.95, + "grad_norm": 0.3213230210993715, + "learning_rate": 1.1788924794998713e-07, + "loss": 0.2311, + "step": 20735 + }, + { + "epoch": 0.95, + "grad_norm": 0.3290569298079769, + "learning_rate": 1.1766156049936583e-07, + "loss": 0.2474, + "step": 20736 + }, + { + "epoch": 0.95, + "grad_norm": 0.5332233490166133, + "learning_rate": 1.1743409183419319e-07, + "loss": 0.2679, + "step": 20737 + }, + { + "epoch": 0.95, + "grad_norm": 0.5146897110260323, + "learning_rate": 1.1720684195950405e-07, + "loss": 0.3003, + "step": 20738 + }, + { + "epoch": 0.95, + "grad_norm": 0.4627086697692388, + "learning_rate": 1.1697981088033106e-07, + "loss": 0.2535, + "step": 20739 + }, + { + "epoch": 0.95, + "grad_norm": 0.3682057158955699, + "learning_rate": 1.167529986017002e-07, + "loss": 0.2568, + "step": 20740 + }, + { + "epoch": 0.95, + "grad_norm": 0.30254631513094815, + "learning_rate": 1.1652640512863189e-07, + "loss": 0.1902, + "step": 20741 + }, + { + "epoch": 0.95, + "grad_norm": 1.3528080011684007, + "learning_rate": 1.1630003046614324e-07, + "loss": 0.7938, + "step": 20742 + }, + { + "epoch": 0.95, + "grad_norm": 0.4497056766571873, + "learning_rate": 1.1607387461924468e-07, + "loss": 0.1895, + "step": 20743 + }, + { + "epoch": 0.95, + "grad_norm": 0.30125257439753106, + "learning_rate": 1.1584793759294555e-07, + "loss": 0.2652, + "step": 20744 + }, + { + "epoch": 0.95, + "grad_norm": 0.7438882343291316, + "learning_rate": 1.1562221939224627e-07, + "loss": 0.3452, + "step": 20745 + }, + { + "epoch": 0.95, + "grad_norm": 0.33698653173336357, + "learning_rate": 1.1539672002214508e-07, + "loss": 0.191, + "step": 20746 + }, + { + "epoch": 0.95, + "grad_norm": 0.2946056486933315, + "learning_rate": 1.1517143948763243e-07, + "loss": 0.1996, + "step": 20747 + }, + { + "epoch": 0.95, + "grad_norm": 0.39341762678260567, + "learning_rate": 1.1494637779369766e-07, + "loss": 0.329, + "step": 20748 + }, + { + "epoch": 0.95, + "grad_norm": 0.6238143961986106, + "learning_rate": 1.1472153494532235e-07, + "loss": 0.1598, + "step": 20749 + }, + { + "epoch": 0.95, + "grad_norm": 0.40356620534238624, + "learning_rate": 1.1449691094748472e-07, + "loss": 0.3182, + "step": 20750 + }, + { + "epoch": 0.95, + "grad_norm": 0.391425105791511, + "learning_rate": 1.1427250580515859e-07, + "loss": 0.2922, + "step": 20751 + }, + { + "epoch": 0.95, + "grad_norm": 0.8669023789594598, + "learning_rate": 1.1404831952330997e-07, + "loss": 0.096, + "step": 20752 + }, + { + "epoch": 0.95, + "grad_norm": 0.2996275123671956, + "learning_rate": 1.138243521069038e-07, + "loss": 0.2367, + "step": 20753 + }, + { + "epoch": 0.95, + "grad_norm": 0.4650952847327551, + "learning_rate": 1.1360060356089941e-07, + "loss": 0.3078, + "step": 20754 + }, + { + "epoch": 0.95, + "grad_norm": 0.44300937727253387, + "learning_rate": 1.1337707389024621e-07, + "loss": 0.2636, + "step": 20755 + }, + { + "epoch": 0.95, + "grad_norm": 0.29016510058034867, + "learning_rate": 1.13153763099898e-07, + "loss": 0.2406, + "step": 20756 + }, + { + "epoch": 0.95, + "grad_norm": 1.663982122903921, + "learning_rate": 1.1293067119479528e-07, + "loss": 0.4673, + "step": 20757 + }, + { + "epoch": 0.95, + "grad_norm": 0.8334899444616646, + "learning_rate": 1.1270779817987965e-07, + "loss": 0.306, + "step": 20758 + }, + { + "epoch": 0.95, + "grad_norm": 0.22990419405453819, + "learning_rate": 1.124851440600827e-07, + "loss": 0.2129, + "step": 20759 + }, + { + "epoch": 0.95, + "grad_norm": 1.2773472435999411, + "learning_rate": 1.1226270884033386e-07, + "loss": 0.7469, + "step": 20760 + }, + { + "epoch": 0.95, + "grad_norm": 0.4839664252259881, + "learning_rate": 1.1204049252556138e-07, + "loss": 0.2829, + "step": 20761 + }, + { + "epoch": 0.95, + "grad_norm": 0.2987469381954329, + "learning_rate": 1.1181849512068021e-07, + "loss": 0.1826, + "step": 20762 + }, + { + "epoch": 0.95, + "grad_norm": 0.41178376215444074, + "learning_rate": 1.1159671663060868e-07, + "loss": 0.3073, + "step": 20763 + }, + { + "epoch": 0.95, + "grad_norm": 0.7568904686394, + "learning_rate": 1.1137515706025393e-07, + "loss": 0.2657, + "step": 20764 + }, + { + "epoch": 0.95, + "grad_norm": 0.2816664680916136, + "learning_rate": 1.1115381641452205e-07, + "loss": 0.1679, + "step": 20765 + }, + { + "epoch": 0.95, + "grad_norm": 0.6387373290028822, + "learning_rate": 1.109326946983158e-07, + "loss": 0.3826, + "step": 20766 + }, + { + "epoch": 0.95, + "grad_norm": 0.4362002869971888, + "learning_rate": 1.1071179191652681e-07, + "loss": 0.2812, + "step": 20767 + }, + { + "epoch": 0.95, + "grad_norm": 0.3125475131392368, + "learning_rate": 1.1049110807404783e-07, + "loss": 0.2836, + "step": 20768 + }, + { + "epoch": 0.95, + "grad_norm": 0.48275375279537397, + "learning_rate": 1.1027064317576386e-07, + "loss": 0.1138, + "step": 20769 + }, + { + "epoch": 0.95, + "grad_norm": 1.0128665941014303, + "learning_rate": 1.1005039722655653e-07, + "loss": 0.3423, + "step": 20770 + }, + { + "epoch": 0.95, + "grad_norm": 0.29100319660013674, + "learning_rate": 1.0983037023130083e-07, + "loss": 0.2213, + "step": 20771 + }, + { + "epoch": 0.95, + "grad_norm": 0.35666283230782536, + "learning_rate": 1.0961056219486843e-07, + "loss": 0.2592, + "step": 20772 + }, + { + "epoch": 0.95, + "grad_norm": 0.8541162473652517, + "learning_rate": 1.0939097312212543e-07, + "loss": 0.3468, + "step": 20773 + }, + { + "epoch": 0.95, + "grad_norm": 0.278406707094813, + "learning_rate": 1.0917160301793461e-07, + "loss": 0.2203, + "step": 20774 + }, + { + "epoch": 0.95, + "grad_norm": 0.39257826060095213, + "learning_rate": 1.0895245188715097e-07, + "loss": 0.2324, + "step": 20775 + }, + { + "epoch": 0.95, + "grad_norm": 0.9375911849386959, + "learning_rate": 1.0873351973462731e-07, + "loss": 0.4127, + "step": 20776 + }, + { + "epoch": 0.95, + "grad_norm": 0.38924173456233513, + "learning_rate": 1.0851480656520975e-07, + "loss": 0.2777, + "step": 20777 + }, + { + "epoch": 0.95, + "grad_norm": 0.6350143675389002, + "learning_rate": 1.0829631238374105e-07, + "loss": 0.2744, + "step": 20778 + }, + { + "epoch": 0.95, + "grad_norm": 0.31948055623560906, + "learning_rate": 1.0807803719505849e-07, + "loss": 0.2694, + "step": 20779 + }, + { + "epoch": 0.95, + "grad_norm": 0.4011095697978131, + "learning_rate": 1.0785998100399376e-07, + "loss": 0.2975, + "step": 20780 + }, + { + "epoch": 0.95, + "grad_norm": 0.3844968039755211, + "learning_rate": 1.0764214381537519e-07, + "loss": 0.1367, + "step": 20781 + }, + { + "epoch": 0.95, + "grad_norm": 0.7991233497783274, + "learning_rate": 1.074245256340245e-07, + "loss": 0.0955, + "step": 20782 + }, + { + "epoch": 0.95, + "grad_norm": 0.38456615385273496, + "learning_rate": 1.0720712646476116e-07, + "loss": 0.2783, + "step": 20783 + }, + { + "epoch": 0.95, + "grad_norm": 0.3715078813488768, + "learning_rate": 1.0698994631239689e-07, + "loss": 0.281, + "step": 20784 + }, + { + "epoch": 0.95, + "grad_norm": 0.5839737382227904, + "learning_rate": 1.0677298518174006e-07, + "loss": 0.287, + "step": 20785 + }, + { + "epoch": 0.95, + "grad_norm": 0.3911856436521603, + "learning_rate": 1.0655624307759348e-07, + "loss": 0.2949, + "step": 20786 + }, + { + "epoch": 0.95, + "grad_norm": 0.2487312606204923, + "learning_rate": 1.0633972000475779e-07, + "loss": 0.2065, + "step": 20787 + }, + { + "epoch": 0.95, + "grad_norm": 0.8784199628099109, + "learning_rate": 1.0612341596802467e-07, + "loss": 0.1081, + "step": 20788 + }, + { + "epoch": 0.96, + "grad_norm": 0.38829483837473877, + "learning_rate": 1.0590733097218142e-07, + "loss": 0.2664, + "step": 20789 + }, + { + "epoch": 0.96, + "grad_norm": 0.6531115786074333, + "learning_rate": 1.0569146502201643e-07, + "loss": 0.3687, + "step": 20790 + }, + { + "epoch": 0.96, + "grad_norm": 0.3283151652790656, + "learning_rate": 1.0547581812230478e-07, + "loss": 0.2367, + "step": 20791 + }, + { + "epoch": 0.96, + "grad_norm": 0.40992427640878837, + "learning_rate": 1.0526039027782264e-07, + "loss": 0.2769, + "step": 20792 + }, + { + "epoch": 0.96, + "grad_norm": 0.3026814798780175, + "learning_rate": 1.0504518149333731e-07, + "loss": 0.1992, + "step": 20793 + }, + { + "epoch": 0.96, + "grad_norm": 0.8052698058055426, + "learning_rate": 1.0483019177361609e-07, + "loss": 0.3831, + "step": 20794 + }, + { + "epoch": 0.96, + "grad_norm": 0.23937265382546366, + "learning_rate": 1.0461542112341738e-07, + "loss": 0.2034, + "step": 20795 + }, + { + "epoch": 0.96, + "grad_norm": 0.8101610964808613, + "learning_rate": 1.0440086954749517e-07, + "loss": 0.3845, + "step": 20796 + }, + { + "epoch": 0.96, + "grad_norm": 1.3046895822101234, + "learning_rate": 1.0418653705060123e-07, + "loss": 0.8179, + "step": 20797 + }, + { + "epoch": 0.96, + "grad_norm": 0.3128319050566063, + "learning_rate": 1.039724236374795e-07, + "loss": 0.1795, + "step": 20798 + }, + { + "epoch": 0.96, + "grad_norm": 0.2403984993030371, + "learning_rate": 1.0375852931286956e-07, + "loss": 0.2099, + "step": 20799 + }, + { + "epoch": 0.96, + "grad_norm": 0.660756532109065, + "learning_rate": 1.0354485408150871e-07, + "loss": 0.3272, + "step": 20800 + }, + { + "epoch": 0.96, + "grad_norm": 0.35127447118304683, + "learning_rate": 1.0333139794812541e-07, + "loss": 0.2106, + "step": 20801 + }, + { + "epoch": 0.96, + "grad_norm": 1.0386561962292427, + "learning_rate": 1.0311816091744698e-07, + "loss": 0.472, + "step": 20802 + }, + { + "epoch": 0.96, + "grad_norm": 0.3419692615268063, + "learning_rate": 1.0290514299419296e-07, + "loss": 0.2951, + "step": 20803 + }, + { + "epoch": 0.96, + "grad_norm": 0.38702540430725063, + "learning_rate": 1.0269234418308183e-07, + "loss": 0.2813, + "step": 20804 + }, + { + "epoch": 0.96, + "grad_norm": 0.4417887629067861, + "learning_rate": 1.0247976448882202e-07, + "loss": 0.0976, + "step": 20805 + }, + { + "epoch": 0.96, + "grad_norm": 0.7157111569903305, + "learning_rate": 1.0226740391612089e-07, + "loss": 0.3648, + "step": 20806 + }, + { + "epoch": 0.96, + "grad_norm": 0.2839922185727559, + "learning_rate": 1.0205526246968023e-07, + "loss": 0.2547, + "step": 20807 + }, + { + "epoch": 0.96, + "grad_norm": 0.5059263714278324, + "learning_rate": 1.0184334015419517e-07, + "loss": 0.2713, + "step": 20808 + }, + { + "epoch": 0.96, + "grad_norm": 1.2462544463711431, + "learning_rate": 1.0163163697435974e-07, + "loss": 0.7168, + "step": 20809 + }, + { + "epoch": 0.96, + "grad_norm": 0.3382274794775425, + "learning_rate": 1.0142015293485907e-07, + "loss": 0.2555, + "step": 20810 + }, + { + "epoch": 0.96, + "grad_norm": 0.27431032947997974, + "learning_rate": 1.012088880403761e-07, + "loss": 0.1942, + "step": 20811 + }, + { + "epoch": 0.96, + "grad_norm": 0.44125605991899713, + "learning_rate": 1.0099784229558817e-07, + "loss": 0.2572, + "step": 20812 + }, + { + "epoch": 0.96, + "grad_norm": 0.3561812911181639, + "learning_rate": 1.0078701570516602e-07, + "loss": 0.2589, + "step": 20813 + }, + { + "epoch": 0.96, + "grad_norm": 1.892282693443322, + "learning_rate": 1.0057640827378034e-07, + "loss": 0.327, + "step": 20814 + }, + { + "epoch": 0.96, + "grad_norm": 0.3736953952079247, + "learning_rate": 1.0036602000608963e-07, + "loss": 0.3028, + "step": 20815 + }, + { + "epoch": 0.96, + "grad_norm": 0.3324474242000121, + "learning_rate": 1.0015585090675573e-07, + "loss": 0.2596, + "step": 20816 + }, + { + "epoch": 0.96, + "grad_norm": 1.1745604159450236, + "learning_rate": 9.994590098042822e-08, + "loss": 0.4094, + "step": 20817 + }, + { + "epoch": 0.96, + "grad_norm": 0.2889475477218363, + "learning_rate": 9.973617023175786e-08, + "loss": 0.2239, + "step": 20818 + }, + { + "epoch": 0.96, + "grad_norm": 0.4002042585059943, + "learning_rate": 9.952665866538536e-08, + "loss": 0.2482, + "step": 20819 + }, + { + "epoch": 0.96, + "grad_norm": 0.3491096101982642, + "learning_rate": 9.931736628595146e-08, + "loss": 0.2508, + "step": 20820 + }, + { + "epoch": 0.96, + "grad_norm": 0.5914407775014046, + "learning_rate": 9.910829309808801e-08, + "loss": 0.2446, + "step": 20821 + }, + { + "epoch": 0.96, + "grad_norm": 0.3465350598076389, + "learning_rate": 9.889943910642463e-08, + "loss": 0.2569, + "step": 20822 + }, + { + "epoch": 0.96, + "grad_norm": 0.38730402185230806, + "learning_rate": 9.869080431558542e-08, + "loss": 0.2963, + "step": 20823 + }, + { + "epoch": 0.96, + "grad_norm": 0.4967044547694842, + "learning_rate": 9.84823887301889e-08, + "loss": 0.2124, + "step": 20824 + }, + { + "epoch": 0.96, + "grad_norm": 0.2610611634859002, + "learning_rate": 9.827419235484803e-08, + "loss": 0.2047, + "step": 20825 + }, + { + "epoch": 0.96, + "grad_norm": 1.2164033249489292, + "learning_rate": 9.806621519417358e-08, + "loss": 0.6175, + "step": 20826 + }, + { + "epoch": 0.96, + "grad_norm": 0.4213754891471513, + "learning_rate": 9.785845725276966e-08, + "loss": 0.2495, + "step": 20827 + }, + { + "epoch": 0.96, + "grad_norm": 0.3426560797761467, + "learning_rate": 9.765091853523478e-08, + "loss": 0.2342, + "step": 20828 + }, + { + "epoch": 0.96, + "grad_norm": 0.6673574349711081, + "learning_rate": 9.74435990461664e-08, + "loss": 0.3752, + "step": 20829 + }, + { + "epoch": 0.96, + "grad_norm": 0.5110573737562784, + "learning_rate": 9.723649879015085e-08, + "loss": 0.3179, + "step": 20830 + }, + { + "epoch": 0.96, + "grad_norm": 0.22375368787094405, + "learning_rate": 9.702961777177556e-08, + "loss": 0.1764, + "step": 20831 + }, + { + "epoch": 0.96, + "grad_norm": 0.40435991570706886, + "learning_rate": 9.682295599561908e-08, + "loss": 0.2027, + "step": 20832 + }, + { + "epoch": 0.96, + "grad_norm": 1.3809643848379378, + "learning_rate": 9.661651346625889e-08, + "loss": 0.6955, + "step": 20833 + }, + { + "epoch": 0.96, + "grad_norm": 0.3317144817301387, + "learning_rate": 9.641029018826465e-08, + "loss": 0.1959, + "step": 20834 + }, + { + "epoch": 0.96, + "grad_norm": 0.36537049709495073, + "learning_rate": 9.620428616619936e-08, + "loss": 0.288, + "step": 20835 + }, + { + "epoch": 0.96, + "grad_norm": 0.4829106079737012, + "learning_rate": 9.599850140462719e-08, + "loss": 0.206, + "step": 20836 + }, + { + "epoch": 0.96, + "grad_norm": 0.2148894402796256, + "learning_rate": 9.579293590810113e-08, + "loss": 0.1141, + "step": 20837 + }, + { + "epoch": 0.96, + "grad_norm": 0.5748870926814279, + "learning_rate": 9.558758968117532e-08, + "loss": 0.3588, + "step": 20838 + }, + { + "epoch": 0.96, + "grad_norm": 0.40419198079435953, + "learning_rate": 9.53824627283928e-08, + "loss": 0.3119, + "step": 20839 + }, + { + "epoch": 0.96, + "grad_norm": 0.4924648615857512, + "learning_rate": 9.51775550542977e-08, + "loss": 0.1824, + "step": 20840 + }, + { + "epoch": 0.96, + "grad_norm": 0.4732904819517592, + "learning_rate": 9.49728666634242e-08, + "loss": 0.3231, + "step": 20841 + }, + { + "epoch": 0.96, + "grad_norm": 0.5271751715870728, + "learning_rate": 9.476839756030531e-08, + "loss": 0.3093, + "step": 20842 + }, + { + "epoch": 0.96, + "grad_norm": 0.3076982898455123, + "learning_rate": 9.456414774946743e-08, + "loss": 0.2363, + "step": 20843 + }, + { + "epoch": 0.96, + "grad_norm": 0.22349633058371374, + "learning_rate": 9.436011723543137e-08, + "loss": 0.1431, + "step": 20844 + }, + { + "epoch": 0.96, + "grad_norm": 0.9140091510322358, + "learning_rate": 9.415630602271575e-08, + "loss": 0.4798, + "step": 20845 + }, + { + "epoch": 0.96, + "grad_norm": 0.4201375788544067, + "learning_rate": 9.395271411583251e-08, + "loss": 0.2567, + "step": 20846 + }, + { + "epoch": 0.96, + "grad_norm": 0.34397009759387076, + "learning_rate": 9.374934151928916e-08, + "loss": 0.2571, + "step": 20847 + }, + { + "epoch": 0.96, + "grad_norm": 1.4089349732548058, + "learning_rate": 9.354618823758654e-08, + "loss": 0.4506, + "step": 20848 + }, + { + "epoch": 0.96, + "grad_norm": 0.3242831348415367, + "learning_rate": 9.334325427522328e-08, + "loss": 0.2095, + "step": 20849 + }, + { + "epoch": 0.96, + "grad_norm": 0.5165063125170287, + "learning_rate": 9.314053963669245e-08, + "loss": 0.2734, + "step": 20850 + }, + { + "epoch": 0.96, + "grad_norm": 0.3510599195698513, + "learning_rate": 9.293804432648157e-08, + "loss": 0.2783, + "step": 20851 + }, + { + "epoch": 0.96, + "grad_norm": 0.33263484052898906, + "learning_rate": 9.273576834907483e-08, + "loss": 0.2441, + "step": 20852 + }, + { + "epoch": 0.96, + "grad_norm": 0.5669293073206008, + "learning_rate": 9.253371170894865e-08, + "loss": 0.1489, + "step": 20853 + }, + { + "epoch": 0.96, + "grad_norm": 0.36630369353747055, + "learning_rate": 9.233187441057612e-08, + "loss": 0.2916, + "step": 20854 + }, + { + "epoch": 0.96, + "grad_norm": 0.6741567523970805, + "learning_rate": 9.213025645842921e-08, + "loss": 0.3701, + "step": 20855 + }, + { + "epoch": 0.96, + "grad_norm": 0.3204710034985468, + "learning_rate": 9.192885785696659e-08, + "loss": 0.262, + "step": 20856 + }, + { + "epoch": 0.96, + "grad_norm": 0.5836075680518281, + "learning_rate": 9.172767861065135e-08, + "loss": 0.2983, + "step": 20857 + }, + { + "epoch": 0.96, + "grad_norm": 0.37710069569883, + "learning_rate": 9.152671872393437e-08, + "loss": 0.2376, + "step": 20858 + }, + { + "epoch": 0.96, + "grad_norm": 0.25585110812094386, + "learning_rate": 9.132597820126654e-08, + "loss": 0.2245, + "step": 20859 + }, + { + "epoch": 0.96, + "grad_norm": 1.4007490278216879, + "learning_rate": 9.112545704709207e-08, + "loss": 0.1976, + "step": 20860 + }, + { + "epoch": 0.96, + "grad_norm": 0.7442109115197545, + "learning_rate": 9.092515526584855e-08, + "loss": 0.3505, + "step": 20861 + }, + { + "epoch": 0.96, + "grad_norm": 0.32618250766267587, + "learning_rate": 9.07250728619713e-08, + "loss": 0.2814, + "step": 20862 + }, + { + "epoch": 0.96, + "grad_norm": 0.44855672147709247, + "learning_rate": 9.052520983989233e-08, + "loss": 0.2624, + "step": 20863 + }, + { + "epoch": 0.96, + "grad_norm": 0.4629268515198908, + "learning_rate": 9.032556620403254e-08, + "loss": 0.2764, + "step": 20864 + }, + { + "epoch": 0.96, + "grad_norm": 0.27158603257987185, + "learning_rate": 9.012614195881397e-08, + "loss": 0.2022, + "step": 20865 + }, + { + "epoch": 0.96, + "grad_norm": 0.42642081157990547, + "learning_rate": 8.992693710865197e-08, + "loss": 0.2493, + "step": 20866 + }, + { + "epoch": 0.96, + "grad_norm": 0.40966032182455425, + "learning_rate": 8.972795165795522e-08, + "loss": 0.2198, + "step": 20867 + }, + { + "epoch": 0.96, + "grad_norm": 0.5555061616885009, + "learning_rate": 8.952918561113022e-08, + "loss": 0.3601, + "step": 20868 + }, + { + "epoch": 0.96, + "grad_norm": 0.5813969184202027, + "learning_rate": 8.933063897257787e-08, + "loss": 0.3631, + "step": 20869 + }, + { + "epoch": 0.96, + "grad_norm": 0.3395446133581907, + "learning_rate": 8.913231174669246e-08, + "loss": 0.2117, + "step": 20870 + }, + { + "epoch": 0.96, + "grad_norm": 0.2633476817248766, + "learning_rate": 8.89342039378649e-08, + "loss": 0.1805, + "step": 20871 + }, + { + "epoch": 0.96, + "grad_norm": 1.3745869516244957, + "learning_rate": 8.87363155504828e-08, + "loss": 0.4074, + "step": 20872 + }, + { + "epoch": 0.96, + "grad_norm": 0.3423359018775441, + "learning_rate": 8.853864658892596e-08, + "loss": 0.1136, + "step": 20873 + }, + { + "epoch": 0.96, + "grad_norm": 0.3623496574207589, + "learning_rate": 8.834119705757093e-08, + "loss": 0.2881, + "step": 20874 + }, + { + "epoch": 0.96, + "grad_norm": 0.4420455861019985, + "learning_rate": 8.814396696078753e-08, + "loss": 0.332, + "step": 20875 + }, + { + "epoch": 0.96, + "grad_norm": 0.2906953844396678, + "learning_rate": 8.794695630294447e-08, + "loss": 0.1133, + "step": 20876 + }, + { + "epoch": 0.96, + "grad_norm": 0.26415644697070917, + "learning_rate": 8.775016508840273e-08, + "loss": 0.1818, + "step": 20877 + }, + { + "epoch": 0.96, + "grad_norm": 0.38169615738706814, + "learning_rate": 8.755359332151769e-08, + "loss": 0.2701, + "step": 20878 + }, + { + "epoch": 0.96, + "grad_norm": 0.3019191263952281, + "learning_rate": 8.735724100664256e-08, + "loss": 0.1098, + "step": 20879 + }, + { + "epoch": 0.96, + "grad_norm": 0.3837706603241768, + "learning_rate": 8.716110814812496e-08, + "loss": 0.2912, + "step": 20880 + }, + { + "epoch": 0.96, + "grad_norm": 1.0753798470204377, + "learning_rate": 8.696519475030585e-08, + "loss": 0.4949, + "step": 20881 + }, + { + "epoch": 0.96, + "grad_norm": 0.3706112023524697, + "learning_rate": 8.6769500817524e-08, + "loss": 0.2763, + "step": 20882 + }, + { + "epoch": 0.96, + "grad_norm": 0.27039971150375186, + "learning_rate": 8.657402635410928e-08, + "loss": 0.1572, + "step": 20883 + }, + { + "epoch": 0.96, + "grad_norm": 0.6440284424813976, + "learning_rate": 8.637877136439155e-08, + "loss": 0.2392, + "step": 20884 + }, + { + "epoch": 0.96, + "grad_norm": 0.5770686648673842, + "learning_rate": 8.61837358526929e-08, + "loss": 0.2822, + "step": 20885 + }, + { + "epoch": 0.96, + "grad_norm": 0.33295461153943073, + "learning_rate": 8.598891982333213e-08, + "loss": 0.2292, + "step": 20886 + }, + { + "epoch": 0.96, + "grad_norm": 0.495381146477583, + "learning_rate": 8.57943232806202e-08, + "loss": 0.3657, + "step": 20887 + }, + { + "epoch": 0.96, + "grad_norm": 0.653089012636174, + "learning_rate": 8.559994622886702e-08, + "loss": 0.3456, + "step": 20888 + }, + { + "epoch": 0.96, + "grad_norm": 0.2269785772390465, + "learning_rate": 8.540578867237581e-08, + "loss": 0.1496, + "step": 20889 + }, + { + "epoch": 0.96, + "grad_norm": 0.39192652770169045, + "learning_rate": 8.521185061544423e-08, + "loss": 0.2598, + "step": 20890 + }, + { + "epoch": 0.96, + "grad_norm": 0.8021736812475867, + "learning_rate": 8.501813206236664e-08, + "loss": 0.4142, + "step": 20891 + }, + { + "epoch": 0.96, + "grad_norm": 0.3219627113294337, + "learning_rate": 8.482463301743182e-08, + "loss": 0.2113, + "step": 20892 + }, + { + "epoch": 0.96, + "grad_norm": 1.1239801730082162, + "learning_rate": 8.463135348492191e-08, + "loss": 0.5786, + "step": 20893 + }, + { + "epoch": 0.96, + "grad_norm": 0.37202503324251085, + "learning_rate": 8.443829346911792e-08, + "loss": 0.2836, + "step": 20894 + }, + { + "epoch": 0.96, + "grad_norm": 0.41039813633639977, + "learning_rate": 8.424545297429309e-08, + "loss": 0.2654, + "step": 20895 + }, + { + "epoch": 0.96, + "grad_norm": 0.25241410114457336, + "learning_rate": 8.405283200471848e-08, + "loss": 0.1054, + "step": 20896 + }, + { + "epoch": 0.96, + "grad_norm": 0.5797586963597887, + "learning_rate": 8.38604305646551e-08, + "loss": 0.3377, + "step": 20897 + }, + { + "epoch": 0.96, + "grad_norm": 0.26450182210355294, + "learning_rate": 8.36682486583651e-08, + "loss": 0.2482, + "step": 20898 + }, + { + "epoch": 0.96, + "grad_norm": 1.16640056548404, + "learning_rate": 8.347628629010285e-08, + "loss": 0.462, + "step": 20899 + }, + { + "epoch": 0.96, + "grad_norm": 1.1183143143031313, + "learning_rate": 8.32845434641183e-08, + "loss": 0.3473, + "step": 20900 + }, + { + "epoch": 0.96, + "grad_norm": 0.21857714365090183, + "learning_rate": 8.309302018465581e-08, + "loss": 0.1625, + "step": 20901 + }, + { + "epoch": 0.96, + "grad_norm": 0.35085293253043426, + "learning_rate": 8.290171645595535e-08, + "loss": 0.2342, + "step": 20902 + }, + { + "epoch": 0.96, + "grad_norm": 0.6299361488360684, + "learning_rate": 8.271063228225351e-08, + "loss": 0.3315, + "step": 20903 + }, + { + "epoch": 0.96, + "grad_norm": 0.34729952627141764, + "learning_rate": 8.251976766777914e-08, + "loss": 0.2494, + "step": 20904 + }, + { + "epoch": 0.96, + "grad_norm": 1.3255736442213075, + "learning_rate": 8.232912261675774e-08, + "loss": 0.3862, + "step": 20905 + }, + { + "epoch": 0.96, + "grad_norm": 0.3732522797955375, + "learning_rate": 8.21386971334126e-08, + "loss": 0.2767, + "step": 20906 + }, + { + "epoch": 0.96, + "grad_norm": 0.4627466230901579, + "learning_rate": 8.194849122195702e-08, + "loss": 0.2982, + "step": 20907 + }, + { + "epoch": 0.96, + "grad_norm": 0.34163293298774766, + "learning_rate": 8.175850488660209e-08, + "loss": 0.1892, + "step": 20908 + }, + { + "epoch": 0.96, + "grad_norm": 0.7493727448231007, + "learning_rate": 8.156873813155442e-08, + "loss": 0.2529, + "step": 20909 + }, + { + "epoch": 0.96, + "grad_norm": 0.2811481335486801, + "learning_rate": 8.13791909610162e-08, + "loss": 0.2457, + "step": 20910 + }, + { + "epoch": 0.96, + "grad_norm": 0.543120884948702, + "learning_rate": 8.118986337918411e-08, + "loss": 0.3767, + "step": 20911 + }, + { + "epoch": 0.96, + "grad_norm": 3.4170232680063313, + "learning_rate": 8.100075539024699e-08, + "loss": 0.1796, + "step": 20912 + }, + { + "epoch": 0.96, + "grad_norm": 0.3444819797008657, + "learning_rate": 8.081186699839371e-08, + "loss": 0.2479, + "step": 20913 + }, + { + "epoch": 0.96, + "grad_norm": 0.3698564434755987, + "learning_rate": 8.062319820780428e-08, + "loss": 0.2945, + "step": 20914 + }, + { + "epoch": 0.96, + "grad_norm": 0.2489373354363003, + "learning_rate": 8.043474902265757e-08, + "loss": 0.1121, + "step": 20915 + }, + { + "epoch": 0.96, + "grad_norm": 0.3856458572983423, + "learning_rate": 8.024651944712469e-08, + "loss": 0.2704, + "step": 20916 + }, + { + "epoch": 0.96, + "grad_norm": 1.336237141679605, + "learning_rate": 8.005850948537453e-08, + "loss": 0.7046, + "step": 20917 + }, + { + "epoch": 0.96, + "grad_norm": 0.3311376014040576, + "learning_rate": 7.987071914156597e-08, + "loss": 0.2184, + "step": 20918 + }, + { + "epoch": 0.96, + "grad_norm": 0.3848520750916892, + "learning_rate": 7.968314841985902e-08, + "loss": 0.2871, + "step": 20919 + }, + { + "epoch": 0.96, + "grad_norm": 0.6290212964259561, + "learning_rate": 7.949579732440705e-08, + "loss": 0.3579, + "step": 20920 + }, + { + "epoch": 0.96, + "grad_norm": 0.2941406420185858, + "learning_rate": 7.93086658593556e-08, + "loss": 0.1795, + "step": 20921 + }, + { + "epoch": 0.96, + "grad_norm": 0.32304831792081323, + "learning_rate": 7.912175402884914e-08, + "loss": 0.1806, + "step": 20922 + }, + { + "epoch": 0.96, + "grad_norm": 0.49024716811534175, + "learning_rate": 7.893506183702437e-08, + "loss": 0.3805, + "step": 20923 + }, + { + "epoch": 0.96, + "grad_norm": 0.9384423046934137, + "learning_rate": 7.874858928801577e-08, + "loss": 0.4119, + "step": 20924 + }, + { + "epoch": 0.96, + "grad_norm": 0.29944458278838615, + "learning_rate": 7.856233638595223e-08, + "loss": 0.1807, + "step": 20925 + }, + { + "epoch": 0.96, + "grad_norm": 0.37669615605650464, + "learning_rate": 7.837630313495493e-08, + "loss": 0.3007, + "step": 20926 + }, + { + "epoch": 0.96, + "grad_norm": 0.2827886269204321, + "learning_rate": 7.819048953914387e-08, + "loss": 0.1674, + "step": 20927 + }, + { + "epoch": 0.96, + "grad_norm": 0.3272916859500001, + "learning_rate": 7.800489560263247e-08, + "loss": 0.2014, + "step": 20928 + }, + { + "epoch": 0.96, + "grad_norm": 0.5426117183216848, + "learning_rate": 7.781952132952963e-08, + "loss": 0.3921, + "step": 20929 + }, + { + "epoch": 0.96, + "grad_norm": 0.5478441375290847, + "learning_rate": 7.763436672393987e-08, + "loss": 0.3465, + "step": 20930 + }, + { + "epoch": 0.96, + "grad_norm": 0.328964867641083, + "learning_rate": 7.7449431789961e-08, + "loss": 0.2485, + "step": 20931 + }, + { + "epoch": 0.96, + "grad_norm": 0.6791546981515926, + "learning_rate": 7.726471653168977e-08, + "loss": 0.2773, + "step": 20932 + }, + { + "epoch": 0.96, + "grad_norm": 0.23668818074695283, + "learning_rate": 7.708022095321288e-08, + "loss": 0.1772, + "step": 20933 + }, + { + "epoch": 0.96, + "grad_norm": 0.27782492865400477, + "learning_rate": 7.689594505861708e-08, + "loss": 0.2328, + "step": 20934 + }, + { + "epoch": 0.96, + "grad_norm": 1.0227714225666613, + "learning_rate": 7.671188885198022e-08, + "loss": 0.5186, + "step": 20935 + }, + { + "epoch": 0.96, + "grad_norm": 0.6833071725458699, + "learning_rate": 7.652805233737792e-08, + "loss": 0.362, + "step": 20936 + }, + { + "epoch": 0.96, + "grad_norm": 0.3487903309815111, + "learning_rate": 7.634443551888137e-08, + "loss": 0.25, + "step": 20937 + }, + { + "epoch": 0.96, + "grad_norm": 0.3568585370008684, + "learning_rate": 7.616103840055289e-08, + "loss": 0.2357, + "step": 20938 + }, + { + "epoch": 0.96, + "grad_norm": 0.5692708035998152, + "learning_rate": 7.597786098645477e-08, + "loss": 0.2935, + "step": 20939 + }, + { + "epoch": 0.96, + "grad_norm": 0.352655623743412, + "learning_rate": 7.579490328064265e-08, + "loss": 0.2406, + "step": 20940 + }, + { + "epoch": 0.96, + "grad_norm": 0.32831862225873626, + "learning_rate": 7.561216528716552e-08, + "loss": 0.1976, + "step": 20941 + }, + { + "epoch": 0.96, + "grad_norm": 0.45159428126693574, + "learning_rate": 7.542964701007016e-08, + "loss": 0.3274, + "step": 20942 + }, + { + "epoch": 0.96, + "grad_norm": 0.43178168512532805, + "learning_rate": 7.524734845339665e-08, + "loss": 0.2711, + "step": 20943 + }, + { + "epoch": 0.96, + "grad_norm": 0.5852231991827269, + "learning_rate": 7.506526962118176e-08, + "loss": 0.3279, + "step": 20944 + }, + { + "epoch": 0.96, + "grad_norm": 0.4507357410168006, + "learning_rate": 7.488341051745562e-08, + "loss": 0.2432, + "step": 20945 + }, + { + "epoch": 0.96, + "grad_norm": 0.4184224805417246, + "learning_rate": 7.4701771146245e-08, + "loss": 0.2667, + "step": 20946 + }, + { + "epoch": 0.96, + "grad_norm": 0.3766177038063524, + "learning_rate": 7.45203515115711e-08, + "loss": 0.2642, + "step": 20947 + }, + { + "epoch": 0.96, + "grad_norm": 0.6011207543980479, + "learning_rate": 7.433915161744965e-08, + "loss": 0.2736, + "step": 20948 + }, + { + "epoch": 0.96, + "grad_norm": 0.2577723024016978, + "learning_rate": 7.415817146789406e-08, + "loss": 0.1898, + "step": 20949 + }, + { + "epoch": 0.96, + "grad_norm": 0.42736346259781277, + "learning_rate": 7.397741106690892e-08, + "loss": 0.2865, + "step": 20950 + }, + { + "epoch": 0.96, + "grad_norm": 1.247096702231234, + "learning_rate": 7.37968704184977e-08, + "loss": 0.2411, + "step": 20951 + }, + { + "epoch": 0.96, + "grad_norm": 0.337915982871162, + "learning_rate": 7.361654952665608e-08, + "loss": 0.2453, + "step": 20952 + }, + { + "epoch": 0.96, + "grad_norm": 0.8992801223892667, + "learning_rate": 7.343644839537756e-08, + "loss": 0.5287, + "step": 20953 + }, + { + "epoch": 0.96, + "grad_norm": 0.2781801729042686, + "learning_rate": 7.325656702864891e-08, + "loss": 0.2157, + "step": 20954 + }, + { + "epoch": 0.96, + "grad_norm": 0.2656787111445052, + "learning_rate": 7.307690543045142e-08, + "loss": 0.2038, + "step": 20955 + }, + { + "epoch": 0.96, + "grad_norm": 1.5594629924756103, + "learning_rate": 7.289746360476524e-08, + "loss": 0.5222, + "step": 20956 + }, + { + "epoch": 0.96, + "grad_norm": 0.40075593671668047, + "learning_rate": 7.27182415555594e-08, + "loss": 0.2695, + "step": 20957 + }, + { + "epoch": 0.96, + "grad_norm": 0.31043194877996777, + "learning_rate": 7.253923928680406e-08, + "loss": 0.1916, + "step": 20958 + }, + { + "epoch": 0.96, + "grad_norm": 0.9304813055901388, + "learning_rate": 7.236045680246273e-08, + "loss": 0.4644, + "step": 20959 + }, + { + "epoch": 0.96, + "grad_norm": 0.4475450213943127, + "learning_rate": 7.218189410649113e-08, + "loss": 0.2743, + "step": 20960 + }, + { + "epoch": 0.96, + "grad_norm": 0.248892337187839, + "learning_rate": 7.200355120284496e-08, + "loss": 0.1286, + "step": 20961 + }, + { + "epoch": 0.96, + "grad_norm": 0.3290205961407414, + "learning_rate": 7.182542809547111e-08, + "loss": 0.2765, + "step": 20962 + }, + { + "epoch": 0.96, + "grad_norm": 1.4834556045454674, + "learning_rate": 7.164752478831305e-08, + "loss": 0.4459, + "step": 20963 + }, + { + "epoch": 0.96, + "grad_norm": 0.36448259271028577, + "learning_rate": 7.146984128530988e-08, + "loss": 0.1746, + "step": 20964 + }, + { + "epoch": 0.96, + "grad_norm": 0.3546452947096683, + "learning_rate": 7.129237759039509e-08, + "loss": 0.2771, + "step": 20965 + }, + { + "epoch": 0.96, + "grad_norm": 0.4902707925376102, + "learning_rate": 7.11151337074989e-08, + "loss": 0.3311, + "step": 20966 + }, + { + "epoch": 0.96, + "grad_norm": 0.18624699967589212, + "learning_rate": 7.09381096405426e-08, + "loss": 0.1202, + "step": 20967 + }, + { + "epoch": 0.96, + "grad_norm": 0.7822412910398101, + "learning_rate": 7.07613053934475e-08, + "loss": 0.3361, + "step": 20968 + }, + { + "epoch": 0.96, + "grad_norm": 0.38006838628332645, + "learning_rate": 7.058472097012715e-08, + "loss": 0.295, + "step": 20969 + }, + { + "epoch": 0.96, + "grad_norm": 0.44913289921966887, + "learning_rate": 7.040835637449062e-08, + "loss": 0.2884, + "step": 20970 + }, + { + "epoch": 0.96, + "grad_norm": 0.4890102212725251, + "learning_rate": 7.023221161044258e-08, + "loss": 0.2486, + "step": 20971 + }, + { + "epoch": 0.96, + "grad_norm": 0.8703076335245795, + "learning_rate": 7.005628668188325e-08, + "loss": 0.4948, + "step": 20972 + }, + { + "epoch": 0.96, + "grad_norm": 0.2659242523458978, + "learning_rate": 6.988058159270727e-08, + "loss": 0.2025, + "step": 20973 + }, + { + "epoch": 0.96, + "grad_norm": 0.280910965525807, + "learning_rate": 6.970509634680378e-08, + "loss": 0.1779, + "step": 20974 + }, + { + "epoch": 0.96, + "grad_norm": 1.2799547202824724, + "learning_rate": 6.952983094805965e-08, + "loss": 0.4506, + "step": 20975 + }, + { + "epoch": 0.96, + "grad_norm": 0.5272729290816845, + "learning_rate": 6.9354785400354e-08, + "loss": 0.2912, + "step": 20976 + }, + { + "epoch": 0.96, + "grad_norm": 0.32819716796095183, + "learning_rate": 6.917995970756153e-08, + "loss": 0.2413, + "step": 20977 + }, + { + "epoch": 0.96, + "grad_norm": 0.4979588026306972, + "learning_rate": 6.900535387355245e-08, + "loss": 0.3412, + "step": 20978 + }, + { + "epoch": 0.96, + "grad_norm": 0.7149091934708959, + "learning_rate": 6.883096790219479e-08, + "loss": 0.297, + "step": 20979 + }, + { + "epoch": 0.96, + "grad_norm": 0.24804894744224407, + "learning_rate": 6.865680179734657e-08, + "loss": 0.1513, + "step": 20980 + }, + { + "epoch": 0.96, + "grad_norm": 0.3804917484073137, + "learning_rate": 6.848285556286583e-08, + "loss": 0.2787, + "step": 20981 + }, + { + "epoch": 0.96, + "grad_norm": 0.6138913461661227, + "learning_rate": 6.830912920260169e-08, + "loss": 0.3419, + "step": 20982 + }, + { + "epoch": 0.96, + "grad_norm": 0.4368497983663204, + "learning_rate": 6.813562272040109e-08, + "loss": 0.3011, + "step": 20983 + }, + { + "epoch": 0.96, + "grad_norm": 1.6674525531911986, + "learning_rate": 6.796233612010539e-08, + "loss": 0.3966, + "step": 20984 + }, + { + "epoch": 0.96, + "grad_norm": 0.29825239211327864, + "learning_rate": 6.778926940555152e-08, + "loss": 0.2255, + "step": 20985 + }, + { + "epoch": 0.96, + "grad_norm": 0.25089060112955863, + "learning_rate": 6.761642258056977e-08, + "loss": 0.1946, + "step": 20986 + }, + { + "epoch": 0.96, + "grad_norm": 0.7977865478637034, + "learning_rate": 6.744379564898818e-08, + "loss": 0.241, + "step": 20987 + }, + { + "epoch": 0.96, + "grad_norm": 0.5537487225069052, + "learning_rate": 6.727138861462812e-08, + "loss": 0.2903, + "step": 20988 + }, + { + "epoch": 0.96, + "grad_norm": 0.33226320495919426, + "learning_rate": 6.709920148130544e-08, + "loss": 0.2975, + "step": 20989 + }, + { + "epoch": 0.96, + "grad_norm": 0.4723067324242132, + "learning_rate": 6.692723425283265e-08, + "loss": 0.272, + "step": 20990 + }, + { + "epoch": 0.96, + "grad_norm": 0.4785276005933155, + "learning_rate": 6.675548693301781e-08, + "loss": 0.2775, + "step": 20991 + }, + { + "epoch": 0.96, + "grad_norm": 0.24439470465992322, + "learning_rate": 6.658395952566233e-08, + "loss": 0.1536, + "step": 20992 + }, + { + "epoch": 0.96, + "grad_norm": 0.35119293766639925, + "learning_rate": 6.641265203456537e-08, + "loss": 0.2416, + "step": 20993 + }, + { + "epoch": 0.96, + "grad_norm": 0.5679376007196011, + "learning_rate": 6.624156446351615e-08, + "loss": 0.2873, + "step": 20994 + }, + { + "epoch": 0.96, + "grad_norm": 0.4345223423672163, + "learning_rate": 6.607069681630606e-08, + "loss": 0.316, + "step": 20995 + }, + { + "epoch": 0.96, + "grad_norm": 1.4845227169624138, + "learning_rate": 6.590004909671543e-08, + "loss": 0.5733, + "step": 20996 + }, + { + "epoch": 0.96, + "grad_norm": 0.3168726207894635, + "learning_rate": 6.572962130852345e-08, + "loss": 0.2189, + "step": 20997 + }, + { + "epoch": 0.96, + "grad_norm": 0.24651593406613984, + "learning_rate": 6.555941345550265e-08, + "loss": 0.1982, + "step": 20998 + }, + { + "epoch": 0.96, + "grad_norm": 0.6599167224065907, + "learning_rate": 6.538942554142114e-08, + "loss": 0.3512, + "step": 20999 + }, + { + "epoch": 0.96, + "grad_norm": 0.6367149066849355, + "learning_rate": 6.521965757004367e-08, + "loss": 0.1027, + "step": 21000 + }, + { + "epoch": 0.96, + "grad_norm": 0.2635131481922569, + "learning_rate": 6.505010954512725e-08, + "loss": 0.2701, + "step": 21001 + }, + { + "epoch": 0.96, + "grad_norm": 1.3268981156118795, + "learning_rate": 6.488078147042554e-08, + "loss": 0.7576, + "step": 21002 + }, + { + "epoch": 0.96, + "grad_norm": 0.7807833963395009, + "learning_rate": 6.471167334968887e-08, + "loss": 0.1341, + "step": 21003 + }, + { + "epoch": 0.96, + "grad_norm": 0.3098628571783303, + "learning_rate": 6.454278518665869e-08, + "loss": 0.2544, + "step": 21004 + }, + { + "epoch": 0.96, + "grad_norm": 0.35385541120747066, + "learning_rate": 6.437411698507645e-08, + "loss": 0.2901, + "step": 21005 + }, + { + "epoch": 0.96, + "grad_norm": 0.33579937195910586, + "learning_rate": 6.420566874867363e-08, + "loss": 0.1197, + "step": 21006 + }, + { + "epoch": 0.97, + "grad_norm": 0.4216155896744583, + "learning_rate": 6.403744048118277e-08, + "loss": 0.3068, + "step": 21007 + }, + { + "epoch": 0.97, + "grad_norm": 1.374009464580873, + "learning_rate": 6.386943218632535e-08, + "loss": 0.717, + "step": 21008 + }, + { + "epoch": 0.97, + "grad_norm": 0.341526128496463, + "learning_rate": 6.370164386782285e-08, + "loss": 0.2525, + "step": 21009 + }, + { + "epoch": 0.97, + "grad_norm": 0.3493405122156894, + "learning_rate": 6.353407552938895e-08, + "loss": 0.1892, + "step": 21010 + }, + { + "epoch": 0.97, + "grad_norm": 0.44740823313160427, + "learning_rate": 6.336672717473402e-08, + "loss": 0.2717, + "step": 21011 + }, + { + "epoch": 0.97, + "grad_norm": 0.3375390911257085, + "learning_rate": 6.319959880756176e-08, + "loss": 0.1778, + "step": 21012 + }, + { + "epoch": 0.97, + "grad_norm": 0.25874487195947016, + "learning_rate": 6.303269043157367e-08, + "loss": 0.2233, + "step": 21013 + }, + { + "epoch": 0.97, + "grad_norm": 1.3159685635081946, + "learning_rate": 6.286600205046566e-08, + "loss": 0.7655, + "step": 21014 + }, + { + "epoch": 0.97, + "grad_norm": 0.9693206785714636, + "learning_rate": 6.269953366792481e-08, + "loss": 0.4191, + "step": 21015 + }, + { + "epoch": 0.97, + "grad_norm": 0.40390567134190547, + "learning_rate": 6.253328528764035e-08, + "loss": 0.2137, + "step": 21016 + }, + { + "epoch": 0.97, + "grad_norm": 0.2918628400038654, + "learning_rate": 6.236725691329049e-08, + "loss": 0.2478, + "step": 21017 + }, + { + "epoch": 0.97, + "grad_norm": 0.5166313739900902, + "learning_rate": 6.220144854855115e-08, + "loss": 0.2892, + "step": 21018 + }, + { + "epoch": 0.97, + "grad_norm": 0.3111552586826631, + "learning_rate": 6.203586019709384e-08, + "loss": 0.2033, + "step": 21019 + }, + { + "epoch": 0.97, + "grad_norm": 1.5071738096138139, + "learning_rate": 6.187049186258453e-08, + "loss": 0.5232, + "step": 21020 + }, + { + "epoch": 0.97, + "grad_norm": 0.3414389570798249, + "learning_rate": 6.170534354868251e-08, + "loss": 0.2651, + "step": 21021 + }, + { + "epoch": 0.97, + "grad_norm": 0.34882043652653827, + "learning_rate": 6.154041525904708e-08, + "loss": 0.2677, + "step": 21022 + }, + { + "epoch": 0.97, + "grad_norm": 1.0199661157709752, + "learning_rate": 6.137570699732753e-08, + "loss": 0.2477, + "step": 21023 + }, + { + "epoch": 0.97, + "grad_norm": 0.24240787157595187, + "learning_rate": 6.121121876717206e-08, + "loss": 0.1939, + "step": 21024 + }, + { + "epoch": 0.97, + "grad_norm": 0.2972542099972891, + "learning_rate": 6.104695057221887e-08, + "loss": 0.2688, + "step": 21025 + }, + { + "epoch": 0.97, + "grad_norm": 2.0054562355267063, + "learning_rate": 6.08829024161084e-08, + "loss": 0.3024, + "step": 21026 + }, + { + "epoch": 0.97, + "grad_norm": 0.5329130865929097, + "learning_rate": 6.071907430247104e-08, + "loss": 0.3458, + "step": 21027 + }, + { + "epoch": 0.97, + "grad_norm": 0.3941837523435308, + "learning_rate": 6.055546623493392e-08, + "loss": 0.2537, + "step": 21028 + }, + { + "epoch": 0.97, + "grad_norm": 0.39080321529263906, + "learning_rate": 6.039207821711856e-08, + "loss": 0.2499, + "step": 21029 + }, + { + "epoch": 0.97, + "grad_norm": 0.2668253796144861, + "learning_rate": 6.02289102526421e-08, + "loss": 0.1661, + "step": 21030 + }, + { + "epoch": 0.97, + "grad_norm": 0.40433685724306456, + "learning_rate": 6.00659623451183e-08, + "loss": 0.2613, + "step": 21031 + }, + { + "epoch": 0.97, + "grad_norm": 0.5072479338361667, + "learning_rate": 5.990323449815316e-08, + "loss": 0.2696, + "step": 21032 + }, + { + "epoch": 0.97, + "grad_norm": 0.4795865715266076, + "learning_rate": 5.974072671535047e-08, + "loss": 0.2961, + "step": 21033 + }, + { + "epoch": 0.97, + "grad_norm": 0.34712187506857195, + "learning_rate": 5.957843900030735e-08, + "loss": 0.2529, + "step": 21034 + }, + { + "epoch": 0.97, + "grad_norm": 1.1667171278496133, + "learning_rate": 5.9416371356617596e-08, + "loss": 0.3876, + "step": 21035 + }, + { + "epoch": 0.97, + "grad_norm": 0.28197657818009786, + "learning_rate": 5.925452378786833e-08, + "loss": 0.1996, + "step": 21036 + }, + { + "epoch": 0.97, + "grad_norm": 0.3411686794312948, + "learning_rate": 5.9092896297642254e-08, + "loss": 0.2629, + "step": 21037 + }, + { + "epoch": 0.97, + "grad_norm": 0.4868318614874262, + "learning_rate": 5.893148888951872e-08, + "loss": 0.2403, + "step": 21038 + }, + { + "epoch": 0.97, + "grad_norm": 0.7566832572607907, + "learning_rate": 5.877030156707042e-08, + "loss": 0.2779, + "step": 21039 + }, + { + "epoch": 0.97, + "grad_norm": 0.3273158831965462, + "learning_rate": 5.860933433386673e-08, + "loss": 0.237, + "step": 21040 + }, + { + "epoch": 0.97, + "grad_norm": 0.3665577847433682, + "learning_rate": 5.844858719347035e-08, + "loss": 0.2869, + "step": 21041 + }, + { + "epoch": 0.97, + "grad_norm": 0.9720960958138958, + "learning_rate": 5.828806014943955e-08, + "loss": 0.2239, + "step": 21042 + }, + { + "epoch": 0.97, + "grad_norm": 0.37827517985385767, + "learning_rate": 5.8127753205330375e-08, + "loss": 0.2607, + "step": 21043 + }, + { + "epoch": 0.97, + "grad_norm": 0.48589045401312925, + "learning_rate": 5.7967666364689975e-08, + "loss": 0.306, + "step": 21044 + }, + { + "epoch": 0.97, + "grad_norm": 0.2989415029962626, + "learning_rate": 5.7807799631064423e-08, + "loss": 0.2698, + "step": 21045 + }, + { + "epoch": 0.97, + "grad_norm": 0.3265293889674919, + "learning_rate": 5.764815300798976e-08, + "loss": 0.1909, + "step": 21046 + }, + { + "epoch": 0.97, + "grad_norm": 1.400948657107216, + "learning_rate": 5.748872649900428e-08, + "loss": 0.4168, + "step": 21047 + }, + { + "epoch": 0.97, + "grad_norm": 0.3708611335060279, + "learning_rate": 5.732952010763515e-08, + "loss": 0.3267, + "step": 21048 + }, + { + "epoch": 0.97, + "grad_norm": 0.3068513815828977, + "learning_rate": 5.717053383740734e-08, + "loss": 0.1803, + "step": 21049 + }, + { + "epoch": 0.97, + "grad_norm": 0.7669352683266277, + "learning_rate": 5.701176769184025e-08, + "loss": 0.4029, + "step": 21050 + }, + { + "epoch": 0.97, + "grad_norm": 0.33158510907614885, + "learning_rate": 5.685322167444995e-08, + "loss": 0.2158, + "step": 21051 + }, + { + "epoch": 0.97, + "grad_norm": 0.28984673113048603, + "learning_rate": 5.6694895788746984e-08, + "loss": 0.1793, + "step": 21052 + }, + { + "epoch": 0.97, + "grad_norm": 0.3820019687630575, + "learning_rate": 5.653679003823409e-08, + "loss": 0.2729, + "step": 21053 + }, + { + "epoch": 0.97, + "grad_norm": 1.313660678012032, + "learning_rate": 5.637890442641403e-08, + "loss": 0.5951, + "step": 21054 + }, + { + "epoch": 0.97, + "grad_norm": 0.34318564712964045, + "learning_rate": 5.6221238956780664e-08, + "loss": 0.2093, + "step": 21055 + }, + { + "epoch": 0.97, + "grad_norm": 0.4321675398302973, + "learning_rate": 5.6063793632825655e-08, + "loss": 0.3034, + "step": 21056 + }, + { + "epoch": 0.97, + "grad_norm": 0.3628331983910679, + "learning_rate": 5.590656845803399e-08, + "loss": 0.2551, + "step": 21057 + }, + { + "epoch": 0.97, + "grad_norm": 0.28415210544096003, + "learning_rate": 5.574956343588622e-08, + "loss": 0.2061, + "step": 21058 + }, + { + "epoch": 0.97, + "grad_norm": 1.6048733845177607, + "learning_rate": 5.559277856985845e-08, + "loss": 0.1641, + "step": 21059 + }, + { + "epoch": 0.97, + "grad_norm": 0.37301445085082735, + "learning_rate": 5.543621386342346e-08, + "loss": 0.327, + "step": 21060 + }, + { + "epoch": 0.97, + "grad_norm": 0.3589962709287775, + "learning_rate": 5.527986932004403e-08, + "loss": 0.2463, + "step": 21061 + }, + { + "epoch": 0.97, + "grad_norm": 0.6080953459712184, + "learning_rate": 5.512374494318518e-08, + "loss": 0.2731, + "step": 21062 + }, + { + "epoch": 0.97, + "grad_norm": 0.27833664189840807, + "learning_rate": 5.4967840736300795e-08, + "loss": 0.1709, + "step": 21063 + }, + { + "epoch": 0.97, + "grad_norm": 0.42373087746989385, + "learning_rate": 5.481215670284368e-08, + "loss": 0.2677, + "step": 21064 + }, + { + "epoch": 0.97, + "grad_norm": 0.3402161081578707, + "learning_rate": 5.465669284625996e-08, + "loss": 0.2266, + "step": 21065 + }, + { + "epoch": 0.97, + "grad_norm": 0.8005976894015006, + "learning_rate": 5.450144916999134e-08, + "loss": 0.4147, + "step": 21066 + }, + { + "epoch": 0.97, + "grad_norm": 0.4197180773698471, + "learning_rate": 5.434642567747506e-08, + "loss": 0.2587, + "step": 21067 + }, + { + "epoch": 0.97, + "grad_norm": 0.2897076669049023, + "learning_rate": 5.4191622372143924e-08, + "loss": 0.2419, + "step": 21068 + }, + { + "epoch": 0.97, + "grad_norm": 1.464334737225091, + "learning_rate": 5.4037039257422986e-08, + "loss": 0.5636, + "step": 21069 + }, + { + "epoch": 0.97, + "grad_norm": 0.22004676736744955, + "learning_rate": 5.388267633673727e-08, + "loss": 0.1504, + "step": 21070 + }, + { + "epoch": 0.97, + "grad_norm": 0.6864883978978314, + "learning_rate": 5.3728533613502944e-08, + "loss": 0.3482, + "step": 21071 + }, + { + "epoch": 0.97, + "grad_norm": 0.3435431529233863, + "learning_rate": 5.357461109113171e-08, + "loss": 0.2479, + "step": 21072 + }, + { + "epoch": 0.97, + "grad_norm": 0.3503505072199705, + "learning_rate": 5.342090877303196e-08, + "loss": 0.2685, + "step": 21073 + }, + { + "epoch": 0.97, + "grad_norm": 0.7985746599691683, + "learning_rate": 5.326742666260765e-08, + "loss": 0.4346, + "step": 21074 + }, + { + "epoch": 0.97, + "grad_norm": 0.6667805766036662, + "learning_rate": 5.3114164763254925e-08, + "loss": 0.1023, + "step": 21075 + }, + { + "epoch": 0.97, + "grad_norm": 0.2719418761550178, + "learning_rate": 5.296112307836776e-08, + "loss": 0.2028, + "step": 21076 + }, + { + "epoch": 0.97, + "grad_norm": 0.36642683951161004, + "learning_rate": 5.280830161133455e-08, + "loss": 0.2793, + "step": 21077 + }, + { + "epoch": 0.97, + "grad_norm": 0.7236212952436392, + "learning_rate": 5.265570036553813e-08, + "loss": 0.271, + "step": 21078 + }, + { + "epoch": 0.97, + "grad_norm": 0.3584295670886505, + "learning_rate": 5.2503319344356926e-08, + "loss": 0.2666, + "step": 21079 + }, + { + "epoch": 0.97, + "grad_norm": 0.5130197882624794, + "learning_rate": 5.23511585511649e-08, + "loss": 0.3477, + "step": 21080 + }, + { + "epoch": 0.97, + "grad_norm": 0.46114344980404376, + "learning_rate": 5.2199217989330475e-08, + "loss": 0.1741, + "step": 21081 + }, + { + "epoch": 0.97, + "grad_norm": 0.37840433132689727, + "learning_rate": 5.2047497662217617e-08, + "loss": 0.1985, + "step": 21082 + }, + { + "epoch": 0.97, + "grad_norm": 0.43870601246326696, + "learning_rate": 5.1895997573185865e-08, + "loss": 0.3148, + "step": 21083 + }, + { + "epoch": 0.97, + "grad_norm": 0.35122471261549876, + "learning_rate": 5.1744717725588087e-08, + "loss": 0.2862, + "step": 21084 + }, + { + "epoch": 0.97, + "grad_norm": 0.4105564272989338, + "learning_rate": 5.1593658122773835e-08, + "loss": 0.1968, + "step": 21085 + }, + { + "epoch": 0.97, + "grad_norm": 0.5400570320109478, + "learning_rate": 5.14428187680871e-08, + "loss": 0.3592, + "step": 21086 + }, + { + "epoch": 0.97, + "grad_norm": 1.5205512720960765, + "learning_rate": 5.1292199664868534e-08, + "loss": 0.4631, + "step": 21087 + }, + { + "epoch": 0.97, + "grad_norm": 0.2523982014319497, + "learning_rate": 5.1141800816452144e-08, + "loss": 0.1766, + "step": 21088 + }, + { + "epoch": 0.97, + "grad_norm": 0.3003613142168817, + "learning_rate": 5.0991622226167494e-08, + "loss": 0.2387, + "step": 21089 + }, + { + "epoch": 0.97, + "grad_norm": 0.637288497901785, + "learning_rate": 5.084166389733858e-08, + "loss": 0.3764, + "step": 21090 + }, + { + "epoch": 0.97, + "grad_norm": 0.5670730657497081, + "learning_rate": 5.069192583328719e-08, + "loss": 0.1959, + "step": 21091 + }, + { + "epoch": 0.97, + "grad_norm": 0.310162056162934, + "learning_rate": 5.054240803732624e-08, + "loss": 0.2871, + "step": 21092 + }, + { + "epoch": 0.97, + "grad_norm": 1.6757636151313895, + "learning_rate": 5.039311051276752e-08, + "loss": 0.5598, + "step": 21093 + }, + { + "epoch": 0.97, + "grad_norm": 0.24018036370079948, + "learning_rate": 5.024403326291505e-08, + "loss": 0.1567, + "step": 21094 + }, + { + "epoch": 0.97, + "grad_norm": 0.4163847925276713, + "learning_rate": 5.009517629107063e-08, + "loss": 0.2125, + "step": 21095 + }, + { + "epoch": 0.97, + "grad_norm": 0.33646559433904505, + "learning_rate": 4.994653960052942e-08, + "loss": 0.2993, + "step": 21096 + }, + { + "epoch": 0.97, + "grad_norm": 0.4266638964812246, + "learning_rate": 4.9798123194580994e-08, + "loss": 0.232, + "step": 21097 + }, + { + "epoch": 0.97, + "grad_norm": 0.49100924805239876, + "learning_rate": 4.964992707651273e-08, + "loss": 0.2565, + "step": 21098 + }, + { + "epoch": 0.97, + "grad_norm": 1.5217569695239344, + "learning_rate": 4.9501951249605326e-08, + "loss": 0.5678, + "step": 21099 + }, + { + "epoch": 0.97, + "grad_norm": 0.3495297658278421, + "learning_rate": 4.935419571713285e-08, + "loss": 0.2794, + "step": 21100 + }, + { + "epoch": 0.97, + "grad_norm": 0.35212144113344696, + "learning_rate": 4.920666048236933e-08, + "loss": 0.1941, + "step": 21101 + }, + { + "epoch": 0.97, + "grad_norm": 0.5149780279089359, + "learning_rate": 4.905934554857772e-08, + "loss": 0.2497, + "step": 21102 + }, + { + "epoch": 0.97, + "grad_norm": 0.8744784321572094, + "learning_rate": 4.8912250919023186e-08, + "loss": 0.4955, + "step": 21103 + }, + { + "epoch": 0.97, + "grad_norm": 0.2528673517505503, + "learning_rate": 4.876537659695979e-08, + "loss": 0.223, + "step": 21104 + }, + { + "epoch": 0.97, + "grad_norm": 1.3046576319012568, + "learning_rate": 4.861872258564049e-08, + "loss": 0.4883, + "step": 21105 + }, + { + "epoch": 0.97, + "grad_norm": 0.6646035749145004, + "learning_rate": 4.847228888831046e-08, + "loss": 0.3031, + "step": 21106 + }, + { + "epoch": 0.97, + "grad_norm": 0.41056298233446153, + "learning_rate": 4.832607550821267e-08, + "loss": 0.2456, + "step": 21107 + }, + { + "epoch": 0.97, + "grad_norm": 0.29751743505254297, + "learning_rate": 4.818008244858452e-08, + "loss": 0.2363, + "step": 21108 + }, + { + "epoch": 0.97, + "grad_norm": 0.3575051636913753, + "learning_rate": 4.803430971265677e-08, + "loss": 0.2404, + "step": 21109 + }, + { + "epoch": 0.97, + "grad_norm": 0.39003777610185875, + "learning_rate": 4.788875730365905e-08, + "loss": 0.3075, + "step": 21110 + }, + { + "epoch": 0.97, + "grad_norm": 1.6747443999028828, + "learning_rate": 4.7743425224811014e-08, + "loss": 0.2265, + "step": 21111 + }, + { + "epoch": 0.97, + "grad_norm": 0.33630030558678903, + "learning_rate": 4.759831347933119e-08, + "loss": 0.2692, + "step": 21112 + }, + { + "epoch": 0.97, + "grad_norm": 0.42058105460767614, + "learning_rate": 4.7453422070433685e-08, + "loss": 0.3031, + "step": 21113 + }, + { + "epoch": 0.97, + "grad_norm": 0.3516580337872266, + "learning_rate": 4.730875100132481e-08, + "loss": 0.1551, + "step": 21114 + }, + { + "epoch": 0.97, + "grad_norm": 0.39433957713335116, + "learning_rate": 4.7164300275206465e-08, + "loss": 0.2714, + "step": 21115 + }, + { + "epoch": 0.97, + "grad_norm": 0.372107570831673, + "learning_rate": 4.702006989527941e-08, + "loss": 0.2982, + "step": 21116 + }, + { + "epoch": 0.97, + "grad_norm": 0.4974362338379551, + "learning_rate": 4.6876059864734425e-08, + "loss": 0.2352, + "step": 21117 + }, + { + "epoch": 0.97, + "grad_norm": 0.5936583397743813, + "learning_rate": 4.673227018676119e-08, + "loss": 0.3264, + "step": 21118 + }, + { + "epoch": 0.97, + "grad_norm": 0.44054090911972454, + "learning_rate": 4.658870086454048e-08, + "loss": 0.2843, + "step": 21119 + }, + { + "epoch": 0.97, + "grad_norm": 0.300082661884542, + "learning_rate": 4.644535190125421e-08, + "loss": 0.2005, + "step": 21120 + }, + { + "epoch": 0.97, + "grad_norm": 0.8772850497603905, + "learning_rate": 4.630222330007428e-08, + "loss": 0.5063, + "step": 21121 + }, + { + "epoch": 0.97, + "grad_norm": 0.2724254784174644, + "learning_rate": 4.615931506417038e-08, + "loss": 0.211, + "step": 21122 + }, + { + "epoch": 0.97, + "grad_norm": 0.5464751653214102, + "learning_rate": 4.601662719670441e-08, + "loss": 0.2861, + "step": 21123 + }, + { + "epoch": 0.97, + "grad_norm": 0.4067677371090619, + "learning_rate": 4.58741597008372e-08, + "loss": 0.2482, + "step": 21124 + }, + { + "epoch": 0.97, + "grad_norm": 0.37787775723618844, + "learning_rate": 4.573191257972176e-08, + "loss": 0.2652, + "step": 21125 + }, + { + "epoch": 0.97, + "grad_norm": 0.5389939986745881, + "learning_rate": 4.558988583650781e-08, + "loss": 0.2313, + "step": 21126 + }, + { + "epoch": 0.97, + "grad_norm": 0.4176353016003204, + "learning_rate": 4.54480794743406e-08, + "loss": 0.2142, + "step": 21127 + }, + { + "epoch": 0.97, + "grad_norm": 0.2341460087765664, + "learning_rate": 4.530649349635763e-08, + "loss": 0.2295, + "step": 21128 + }, + { + "epoch": 0.97, + "grad_norm": 0.9263619864837682, + "learning_rate": 4.516512790569416e-08, + "loss": 0.4546, + "step": 21129 + }, + { + "epoch": 0.97, + "grad_norm": 0.5625177551348991, + "learning_rate": 4.502398270548103e-08, + "loss": 0.2255, + "step": 21130 + }, + { + "epoch": 0.97, + "grad_norm": 0.4962413215534032, + "learning_rate": 4.488305789884129e-08, + "loss": 0.2871, + "step": 21131 + }, + { + "epoch": 0.97, + "grad_norm": 0.36205985313862665, + "learning_rate": 4.474235348889577e-08, + "loss": 0.2843, + "step": 21132 + }, + { + "epoch": 0.97, + "grad_norm": 0.36900435914783886, + "learning_rate": 4.460186947876088e-08, + "loss": 0.2187, + "step": 21133 + }, + { + "epoch": 0.97, + "grad_norm": 0.3125631010406034, + "learning_rate": 4.4461605871544136e-08, + "loss": 0.2238, + "step": 21134 + }, + { + "epoch": 0.97, + "grad_norm": 0.33763838686029524, + "learning_rate": 4.4321562670353036e-08, + "loss": 0.2512, + "step": 21135 + }, + { + "epoch": 0.97, + "grad_norm": 0.44480054999027174, + "learning_rate": 4.4181739878286224e-08, + "loss": 0.278, + "step": 21136 + }, + { + "epoch": 0.97, + "grad_norm": 0.4207981674636647, + "learning_rate": 4.404213749844011e-08, + "loss": 0.1966, + "step": 21137 + }, + { + "epoch": 0.97, + "grad_norm": 1.390059346075542, + "learning_rate": 4.390275553390555e-08, + "loss": 0.5865, + "step": 21138 + }, + { + "epoch": 0.97, + "grad_norm": 0.5447445076689306, + "learning_rate": 4.3763593987768974e-08, + "loss": 0.4026, + "step": 21139 + }, + { + "epoch": 0.97, + "grad_norm": 0.2568867802394545, + "learning_rate": 4.3624652863110126e-08, + "loss": 0.2052, + "step": 21140 + }, + { + "epoch": 0.97, + "grad_norm": 0.4258187132900209, + "learning_rate": 4.348593216300545e-08, + "loss": 0.2674, + "step": 21141 + }, + { + "epoch": 0.97, + "grad_norm": 0.6210853711131512, + "learning_rate": 4.334743189052581e-08, + "loss": 0.2759, + "step": 21142 + }, + { + "epoch": 0.97, + "grad_norm": 0.48781223201105295, + "learning_rate": 4.3209152048737656e-08, + "loss": 0.1972, + "step": 21143 + }, + { + "epoch": 0.97, + "grad_norm": 0.37373762865719584, + "learning_rate": 4.307109264070297e-08, + "loss": 0.2859, + "step": 21144 + }, + { + "epoch": 0.97, + "grad_norm": 1.2006803936275638, + "learning_rate": 4.2933253669477096e-08, + "loss": 0.7454, + "step": 21145 + }, + { + "epoch": 0.97, + "grad_norm": 0.3392050791420737, + "learning_rate": 4.2795635138112025e-08, + "loss": 0.2111, + "step": 21146 + }, + { + "epoch": 0.97, + "grad_norm": 0.45404522575058237, + "learning_rate": 4.2658237049655325e-08, + "loss": 0.326, + "step": 21147 + }, + { + "epoch": 0.97, + "grad_norm": 0.25112403325781696, + "learning_rate": 4.25210594071479e-08, + "loss": 0.1764, + "step": 21148 + }, + { + "epoch": 0.97, + "grad_norm": 0.3820738855619782, + "learning_rate": 4.238410221362621e-08, + "loss": 0.293, + "step": 21149 + }, + { + "epoch": 0.97, + "grad_norm": 1.2899239708395975, + "learning_rate": 4.224736547212449e-08, + "loss": 0.3577, + "step": 21150 + }, + { + "epoch": 0.97, + "grad_norm": 0.33520807674430275, + "learning_rate": 4.21108491856681e-08, + "loss": 0.2906, + "step": 21151 + }, + { + "epoch": 0.97, + "grad_norm": 0.7828227675197496, + "learning_rate": 4.1974553357281287e-08, + "loss": 0.2488, + "step": 21152 + }, + { + "epoch": 0.97, + "grad_norm": 0.6763536946890087, + "learning_rate": 4.183847798997831e-08, + "loss": 0.2712, + "step": 21153 + }, + { + "epoch": 0.97, + "grad_norm": 0.3755502869864424, + "learning_rate": 4.170262308677453e-08, + "loss": 0.1857, + "step": 21154 + }, + { + "epoch": 0.97, + "grad_norm": 0.36775123688393413, + "learning_rate": 4.156698865067643e-08, + "loss": 0.2712, + "step": 21155 + }, + { + "epoch": 0.97, + "grad_norm": 0.35837980960294924, + "learning_rate": 4.143157468468717e-08, + "loss": 0.2368, + "step": 21156 + }, + { + "epoch": 0.97, + "grad_norm": 0.7352457177494476, + "learning_rate": 4.1296381191805456e-08, + "loss": 0.3569, + "step": 21157 + }, + { + "epoch": 0.97, + "grad_norm": 0.3447528485542165, + "learning_rate": 4.116140817502223e-08, + "loss": 0.2508, + "step": 21158 + }, + { + "epoch": 0.97, + "grad_norm": 0.4069240183607295, + "learning_rate": 4.102665563732844e-08, + "loss": 0.2374, + "step": 21159 + }, + { + "epoch": 0.97, + "grad_norm": 0.28272824598592494, + "learning_rate": 4.089212358170502e-08, + "loss": 0.1264, + "step": 21160 + }, + { + "epoch": 0.97, + "grad_norm": 0.3336567824170479, + "learning_rate": 4.0757812011131826e-08, + "loss": 0.2527, + "step": 21161 + }, + { + "epoch": 0.97, + "grad_norm": 1.3354131508591367, + "learning_rate": 4.062372092858091e-08, + "loss": 0.5973, + "step": 21162 + }, + { + "epoch": 0.97, + "grad_norm": 0.3500555340706462, + "learning_rate": 4.048985033702213e-08, + "loss": 0.2553, + "step": 21163 + }, + { + "epoch": 0.97, + "grad_norm": 0.3649962646381245, + "learning_rate": 4.035620023941978e-08, + "loss": 0.2658, + "step": 21164 + }, + { + "epoch": 0.97, + "grad_norm": 1.0019637059957476, + "learning_rate": 4.022277063873037e-08, + "loss": 0.5109, + "step": 21165 + }, + { + "epoch": 0.97, + "grad_norm": 0.2827467731650862, + "learning_rate": 4.0089561537910436e-08, + "loss": 0.0766, + "step": 21166 + }, + { + "epoch": 0.97, + "grad_norm": 0.33517833406336506, + "learning_rate": 3.995657293990762e-08, + "loss": 0.2441, + "step": 21167 + }, + { + "epoch": 0.97, + "grad_norm": 0.43954007001851886, + "learning_rate": 3.9823804847667345e-08, + "loss": 0.3241, + "step": 21168 + }, + { + "epoch": 0.97, + "grad_norm": 0.6139278212217922, + "learning_rate": 3.969125726412837e-08, + "loss": 0.2553, + "step": 21169 + }, + { + "epoch": 0.97, + "grad_norm": 0.3895024509949226, + "learning_rate": 3.955893019222501e-08, + "loss": 0.267, + "step": 21170 + }, + { + "epoch": 0.97, + "grad_norm": 0.5150541960225514, + "learning_rate": 3.9426823634887146e-08, + "loss": 0.3739, + "step": 21171 + }, + { + "epoch": 0.97, + "grad_norm": 0.2733817085484941, + "learning_rate": 3.9294937595038e-08, + "loss": 0.1971, + "step": 21172 + }, + { + "epoch": 0.97, + "grad_norm": 0.3278815056491724, + "learning_rate": 3.916327207559967e-08, + "loss": 0.1849, + "step": 21173 + }, + { + "epoch": 0.97, + "grad_norm": 0.7441019052134975, + "learning_rate": 3.9031827079486494e-08, + "loss": 0.3174, + "step": 21174 + }, + { + "epoch": 0.97, + "grad_norm": 0.38421909331348925, + "learning_rate": 3.890060260960726e-08, + "loss": 0.2779, + "step": 21175 + }, + { + "epoch": 0.97, + "grad_norm": 0.3377984463415079, + "learning_rate": 3.8769598668868533e-08, + "loss": 0.2178, + "step": 21176 + }, + { + "epoch": 0.97, + "grad_norm": 1.3234114712323035, + "learning_rate": 3.8638815260170216e-08, + "loss": 0.5345, + "step": 21177 + }, + { + "epoch": 0.97, + "grad_norm": 0.3171097486719009, + "learning_rate": 3.8508252386407766e-08, + "loss": 0.2007, + "step": 21178 + }, + { + "epoch": 0.97, + "grad_norm": 0.2586488596845841, + "learning_rate": 3.837791005047109e-08, + "loss": 0.1942, + "step": 21179 + }, + { + "epoch": 0.97, + "grad_norm": 0.5537220405789994, + "learning_rate": 3.824778825524678e-08, + "loss": 0.3234, + "step": 21180 + }, + { + "epoch": 0.97, + "grad_norm": 0.6891975327188988, + "learning_rate": 3.811788700361474e-08, + "loss": 0.376, + "step": 21181 + }, + { + "epoch": 0.97, + "grad_norm": 0.3167959515814778, + "learning_rate": 3.798820629845157e-08, + "loss": 0.2016, + "step": 21182 + }, + { + "epoch": 0.97, + "grad_norm": 0.49363761044188875, + "learning_rate": 3.78587461426283e-08, + "loss": 0.3204, + "step": 21183 + }, + { + "epoch": 0.97, + "grad_norm": 0.4076611109048864, + "learning_rate": 3.7729506539009306e-08, + "loss": 0.2507, + "step": 21184 + }, + { + "epoch": 0.97, + "grad_norm": 0.300317878697749, + "learning_rate": 3.760048749045897e-08, + "loss": 0.2218, + "step": 21185 + }, + { + "epoch": 0.97, + "grad_norm": 0.6442524984432371, + "learning_rate": 3.747168899983167e-08, + "loss": 0.2419, + "step": 21186 + }, + { + "epoch": 0.97, + "grad_norm": 0.36976103359497225, + "learning_rate": 3.734311106997845e-08, + "loss": 0.3158, + "step": 21187 + }, + { + "epoch": 0.97, + "grad_norm": 0.7121765990205521, + "learning_rate": 3.721475370374705e-08, + "loss": 0.3614, + "step": 21188 + }, + { + "epoch": 0.97, + "grad_norm": 0.41234097706890954, + "learning_rate": 3.7086616903978525e-08, + "loss": 0.2316, + "step": 21189 + }, + { + "epoch": 0.97, + "grad_norm": 0.3098329020265625, + "learning_rate": 3.6958700673510596e-08, + "loss": 0.2373, + "step": 21190 + }, + { + "epoch": 0.97, + "grad_norm": 0.4378150333579222, + "learning_rate": 3.6831005015173224e-08, + "loss": 0.2596, + "step": 21191 + }, + { + "epoch": 0.97, + "grad_norm": 0.36293096165001487, + "learning_rate": 3.6703529931796376e-08, + "loss": 0.2564, + "step": 21192 + }, + { + "epoch": 0.97, + "grad_norm": 1.8038479033501196, + "learning_rate": 3.6576275426200014e-08, + "loss": 0.6379, + "step": 21193 + }, + { + "epoch": 0.97, + "grad_norm": 0.4256762242256026, + "learning_rate": 3.644924150120188e-08, + "loss": 0.2625, + "step": 21194 + }, + { + "epoch": 0.97, + "grad_norm": 0.32622133401488496, + "learning_rate": 3.632242815961418e-08, + "loss": 0.2301, + "step": 21195 + }, + { + "epoch": 0.97, + "grad_norm": 2.1332287721141627, + "learning_rate": 3.619583540424465e-08, + "loss": 0.4592, + "step": 21196 + }, + { + "epoch": 0.97, + "grad_norm": 0.29067740770146955, + "learning_rate": 3.606946323789662e-08, + "loss": 0.2099, + "step": 21197 + }, + { + "epoch": 0.97, + "grad_norm": 0.5377624650099606, + "learning_rate": 3.594331166336784e-08, + "loss": 0.3049, + "step": 21198 + }, + { + "epoch": 0.97, + "grad_norm": 0.3454137926577501, + "learning_rate": 3.5817380683450534e-08, + "loss": 0.2017, + "step": 21199 + }, + { + "epoch": 0.97, + "grad_norm": 0.35488055391906154, + "learning_rate": 3.5691670300932456e-08, + "loss": 0.282, + "step": 21200 + }, + { + "epoch": 0.97, + "grad_norm": 1.3948825295376965, + "learning_rate": 3.5566180518595836e-08, + "loss": 0.6282, + "step": 21201 + }, + { + "epoch": 0.97, + "grad_norm": 0.46387491837264816, + "learning_rate": 3.544091133922179e-08, + "loss": 0.2331, + "step": 21202 + }, + { + "epoch": 0.97, + "grad_norm": 0.3284127838592469, + "learning_rate": 3.531586276558141e-08, + "loss": 0.2464, + "step": 21203 + }, + { + "epoch": 0.97, + "grad_norm": 0.47830274508794735, + "learning_rate": 3.5191034800444724e-08, + "loss": 0.3185, + "step": 21204 + }, + { + "epoch": 0.97, + "grad_norm": 0.1926771375287304, + "learning_rate": 3.506642744657285e-08, + "loss": 0.1135, + "step": 21205 + }, + { + "epoch": 0.97, + "grad_norm": 0.6683990935179004, + "learning_rate": 3.494204070672691e-08, + "loss": 0.3456, + "step": 21206 + }, + { + "epoch": 0.97, + "grad_norm": 0.27475559463820753, + "learning_rate": 3.481787458365915e-08, + "loss": 0.2658, + "step": 21207 + }, + { + "epoch": 0.97, + "grad_norm": 0.7368775766741292, + "learning_rate": 3.4693929080119596e-08, + "loss": 0.2639, + "step": 21208 + }, + { + "epoch": 0.97, + "grad_norm": 0.6157885787519518, + "learning_rate": 3.457020419885049e-08, + "loss": 0.2942, + "step": 21209 + }, + { + "epoch": 0.97, + "grad_norm": 0.3236562004900343, + "learning_rate": 3.4446699942594083e-08, + "loss": 0.208, + "step": 21210 + }, + { + "epoch": 0.97, + "grad_norm": 0.286559086850683, + "learning_rate": 3.432341631408154e-08, + "loss": 0.2536, + "step": 21211 + }, + { + "epoch": 0.97, + "grad_norm": 0.3407040510631902, + "learning_rate": 3.4200353316043986e-08, + "loss": 0.1861, + "step": 21212 + }, + { + "epoch": 0.97, + "grad_norm": 0.6476043268069748, + "learning_rate": 3.407751095120593e-08, + "loss": 0.3231, + "step": 21213 + }, + { + "epoch": 0.97, + "grad_norm": 1.2731813404021057, + "learning_rate": 3.395488922228518e-08, + "loss": 0.3844, + "step": 21214 + }, + { + "epoch": 0.97, + "grad_norm": 0.2839452090941616, + "learning_rate": 3.383248813199846e-08, + "loss": 0.2135, + "step": 21215 + }, + { + "epoch": 0.97, + "grad_norm": 0.5440037389550196, + "learning_rate": 3.371030768305583e-08, + "loss": 0.3183, + "step": 21216 + }, + { + "epoch": 0.97, + "grad_norm": 0.4545933964056159, + "learning_rate": 3.358834787816068e-08, + "loss": 0.2573, + "step": 21217 + }, + { + "epoch": 0.97, + "grad_norm": 0.24017430964761644, + "learning_rate": 3.346660872001306e-08, + "loss": 0.1449, + "step": 21218 + }, + { + "epoch": 0.97, + "grad_norm": 0.31539219481897435, + "learning_rate": 3.3345090211309714e-08, + "loss": 0.275, + "step": 21219 + }, + { + "epoch": 0.97, + "grad_norm": 0.8003366768286112, + "learning_rate": 3.32237923547396e-08, + "loss": 0.3542, + "step": 21220 + }, + { + "epoch": 0.97, + "grad_norm": 0.4013846819438035, + "learning_rate": 3.3102715152989464e-08, + "loss": 0.1926, + "step": 21221 + }, + { + "epoch": 0.97, + "grad_norm": 0.7209293263188188, + "learning_rate": 3.298185860873826e-08, + "loss": 0.3139, + "step": 21222 + }, + { + "epoch": 0.97, + "grad_norm": 0.2787475070774912, + "learning_rate": 3.286122272466164e-08, + "loss": 0.2492, + "step": 21223 + }, + { + "epoch": 0.98, + "grad_norm": 0.9342032863628137, + "learning_rate": 3.2740807503433e-08, + "loss": 0.5369, + "step": 21224 + }, + { + "epoch": 0.98, + "grad_norm": 0.29178973945427933, + "learning_rate": 3.262061294771468e-08, + "loss": 0.1788, + "step": 21225 + }, + { + "epoch": 0.98, + "grad_norm": 0.4512308396879576, + "learning_rate": 3.250063906017009e-08, + "loss": 0.3065, + "step": 21226 + }, + { + "epoch": 0.98, + "grad_norm": 0.43534664975249715, + "learning_rate": 3.238088584345489e-08, + "loss": 0.2576, + "step": 21227 + }, + { + "epoch": 0.98, + "grad_norm": 0.4004113421969863, + "learning_rate": 3.226135330021918e-08, + "loss": 0.2008, + "step": 21228 + }, + { + "epoch": 0.98, + "grad_norm": 1.4152930772629513, + "learning_rate": 3.2142041433109725e-08, + "loss": 0.6204, + "step": 21229 + }, + { + "epoch": 0.98, + "grad_norm": 0.32284866501451115, + "learning_rate": 3.202295024476887e-08, + "loss": 0.2474, + "step": 21230 + }, + { + "epoch": 0.98, + "grad_norm": 0.25706249011883076, + "learning_rate": 3.190407973783338e-08, + "loss": 0.195, + "step": 21231 + }, + { + "epoch": 0.98, + "grad_norm": 0.6353603765712579, + "learning_rate": 3.178542991493339e-08, + "loss": 0.3619, + "step": 21232 + }, + { + "epoch": 0.98, + "grad_norm": 0.8455389317000258, + "learning_rate": 3.166700077869678e-08, + "loss": 0.2639, + "step": 21233 + }, + { + "epoch": 0.98, + "grad_norm": 0.37674966122253767, + "learning_rate": 3.1548792331744795e-08, + "loss": 0.2167, + "step": 21234 + }, + { + "epoch": 0.98, + "grad_norm": 0.35140975981692507, + "learning_rate": 3.143080457669423e-08, + "loss": 0.2987, + "step": 21235 + }, + { + "epoch": 0.98, + "grad_norm": 0.5871168381309729, + "learning_rate": 3.131303751615855e-08, + "loss": 0.3149, + "step": 21236 + }, + { + "epoch": 0.98, + "grad_norm": 0.4197770304680542, + "learning_rate": 3.119549115274456e-08, + "loss": 0.2847, + "step": 21237 + }, + { + "epoch": 0.98, + "grad_norm": 0.29249004434999737, + "learning_rate": 3.107816548905462e-08, + "loss": 0.1914, + "step": 21238 + }, + { + "epoch": 0.98, + "grad_norm": 0.3872772296648083, + "learning_rate": 3.0961060527685546e-08, + "loss": 0.2602, + "step": 21239 + }, + { + "epoch": 0.98, + "grad_norm": 0.5092018362602075, + "learning_rate": 3.084417627122971e-08, + "loss": 0.2948, + "step": 21240 + }, + { + "epoch": 0.98, + "grad_norm": 1.433082363358442, + "learning_rate": 3.0727512722276143e-08, + "loss": 0.3518, + "step": 21241 + }, + { + "epoch": 0.98, + "grad_norm": 0.6410559524884702, + "learning_rate": 3.061106988340612e-08, + "loss": 0.3204, + "step": 21242 + }, + { + "epoch": 0.98, + "grad_norm": 0.24684011441846793, + "learning_rate": 3.04948477571998e-08, + "loss": 0.258, + "step": 21243 + }, + { + "epoch": 0.98, + "grad_norm": 0.33928708251979944, + "learning_rate": 3.0378846346227345e-08, + "loss": 0.142, + "step": 21244 + }, + { + "epoch": 0.98, + "grad_norm": 1.0384358074695823, + "learning_rate": 3.0263065653058923e-08, + "loss": 0.329, + "step": 21245 + }, + { + "epoch": 0.98, + "grad_norm": 0.42327810529431803, + "learning_rate": 3.014750568025804e-08, + "loss": 0.2726, + "step": 21246 + }, + { + "epoch": 0.98, + "grad_norm": 0.3616440827123659, + "learning_rate": 3.003216643038154e-08, + "loss": 0.2597, + "step": 21247 + }, + { + "epoch": 0.98, + "grad_norm": 0.6133712102811347, + "learning_rate": 2.9917047905982934e-08, + "loss": 0.3668, + "step": 21248 + }, + { + "epoch": 0.98, + "grad_norm": 0.3686513266249708, + "learning_rate": 2.9802150109612405e-08, + "loss": 0.2865, + "step": 21249 + }, + { + "epoch": 0.98, + "grad_norm": 0.5102102563509107, + "learning_rate": 2.9687473043813476e-08, + "loss": 0.2812, + "step": 21250 + }, + { + "epoch": 0.98, + "grad_norm": 0.24414369689729915, + "learning_rate": 2.9573016711124113e-08, + "loss": 0.1685, + "step": 21251 + }, + { + "epoch": 0.98, + "grad_norm": 0.39799343110575863, + "learning_rate": 2.945878111407785e-08, + "loss": 0.292, + "step": 21252 + }, + { + "epoch": 0.98, + "grad_norm": 1.368877773107355, + "learning_rate": 2.9344766255204883e-08, + "loss": 0.8098, + "step": 21253 + }, + { + "epoch": 0.98, + "grad_norm": 0.37144858207335923, + "learning_rate": 2.9230972137028745e-08, + "loss": 0.232, + "step": 21254 + }, + { + "epoch": 0.98, + "grad_norm": 0.33987127005080076, + "learning_rate": 2.911739876206965e-08, + "loss": 0.2904, + "step": 21255 + }, + { + "epoch": 0.98, + "grad_norm": 0.5599640184414972, + "learning_rate": 2.9004046132840026e-08, + "loss": 0.2653, + "step": 21256 + }, + { + "epoch": 0.98, + "grad_norm": 0.345666156640323, + "learning_rate": 2.8890914251851198e-08, + "loss": 0.0796, + "step": 21257 + }, + { + "epoch": 0.98, + "grad_norm": 0.4331338850598414, + "learning_rate": 2.8778003121607834e-08, + "loss": 0.2831, + "step": 21258 + }, + { + "epoch": 0.98, + "grad_norm": 0.352709555514618, + "learning_rate": 2.866531274460904e-08, + "loss": 0.2728, + "step": 21259 + }, + { + "epoch": 0.98, + "grad_norm": 0.7054333240007781, + "learning_rate": 2.8552843123349494e-08, + "loss": 0.2726, + "step": 21260 + }, + { + "epoch": 0.98, + "grad_norm": 0.3412544342856643, + "learning_rate": 2.844059426031831e-08, + "loss": 0.2668, + "step": 21261 + }, + { + "epoch": 0.98, + "grad_norm": 0.369696372883487, + "learning_rate": 2.8328566158002392e-08, + "loss": 0.2716, + "step": 21262 + }, + { + "epoch": 0.98, + "grad_norm": 0.3521884000407015, + "learning_rate": 2.8216758818881972e-08, + "loss": 0.1607, + "step": 21263 + }, + { + "epoch": 0.98, + "grad_norm": 0.3961871718288436, + "learning_rate": 2.8105172245430633e-08, + "loss": 0.1935, + "step": 21264 + }, + { + "epoch": 0.98, + "grad_norm": 0.7101404435381233, + "learning_rate": 2.799380644012084e-08, + "loss": 0.4301, + "step": 21265 + }, + { + "epoch": 0.98, + "grad_norm": 0.36668904113978323, + "learning_rate": 2.7882661405416177e-08, + "loss": 0.3166, + "step": 21266 + }, + { + "epoch": 0.98, + "grad_norm": 0.31766907604596556, + "learning_rate": 2.777173714377801e-08, + "loss": 0.2089, + "step": 21267 + }, + { + "epoch": 0.98, + "grad_norm": 1.6691790317194544, + "learning_rate": 2.766103365766215e-08, + "loss": 0.5932, + "step": 21268 + }, + { + "epoch": 0.98, + "grad_norm": 0.34161704165660167, + "learning_rate": 2.7550550949519972e-08, + "loss": 0.1624, + "step": 21269 + }, + { + "epoch": 0.98, + "grad_norm": 0.26600573706564357, + "learning_rate": 2.7440289021797293e-08, + "loss": 0.2051, + "step": 21270 + }, + { + "epoch": 0.98, + "grad_norm": 0.5092789110746891, + "learning_rate": 2.733024787693439e-08, + "loss": 0.3228, + "step": 21271 + }, + { + "epoch": 0.98, + "grad_norm": 0.8702057125583568, + "learning_rate": 2.7220427517368196e-08, + "loss": 0.4356, + "step": 21272 + }, + { + "epoch": 0.98, + "grad_norm": 0.30987222007073606, + "learning_rate": 2.7110827945530106e-08, + "loss": 0.1815, + "step": 21273 + }, + { + "epoch": 0.98, + "grad_norm": 0.4007343467849004, + "learning_rate": 2.700144916384595e-08, + "loss": 0.3157, + "step": 21274 + }, + { + "epoch": 0.98, + "grad_norm": 0.3431377247540255, + "learning_rate": 2.6892291174737127e-08, + "loss": 0.2036, + "step": 21275 + }, + { + "epoch": 0.98, + "grad_norm": 0.3866486044637885, + "learning_rate": 2.6783353980621705e-08, + "loss": 0.3035, + "step": 21276 + }, + { + "epoch": 0.98, + "grad_norm": 0.660129926345083, + "learning_rate": 2.667463758390998e-08, + "loss": 0.2863, + "step": 21277 + }, + { + "epoch": 0.98, + "grad_norm": 0.386075677318688, + "learning_rate": 2.656614198701002e-08, + "loss": 0.3326, + "step": 21278 + }, + { + "epoch": 0.98, + "grad_norm": 0.36646764704006635, + "learning_rate": 2.6457867192322128e-08, + "loss": 0.2509, + "step": 21279 + }, + { + "epoch": 0.98, + "grad_norm": 1.576757556302514, + "learning_rate": 2.63498132022455e-08, + "loss": 0.2143, + "step": 21280 + }, + { + "epoch": 0.98, + "grad_norm": 0.2645309400044281, + "learning_rate": 2.6241980019170445e-08, + "loss": 0.1436, + "step": 21281 + }, + { + "epoch": 0.98, + "grad_norm": 0.33933324455144787, + "learning_rate": 2.613436764548505e-08, + "loss": 0.2398, + "step": 21282 + }, + { + "epoch": 0.98, + "grad_norm": 0.3655010655720267, + "learning_rate": 2.6026976083572963e-08, + "loss": 0.2619, + "step": 21283 + }, + { + "epoch": 0.98, + "grad_norm": 1.2599414169956256, + "learning_rate": 2.5919805335810067e-08, + "loss": 0.7693, + "step": 21284 + }, + { + "epoch": 0.98, + "grad_norm": 0.34157161480711806, + "learning_rate": 2.5812855404568903e-08, + "loss": 0.2623, + "step": 21285 + }, + { + "epoch": 0.98, + "grad_norm": 0.5565150788690916, + "learning_rate": 2.570612629221758e-08, + "loss": 0.3704, + "step": 21286 + }, + { + "epoch": 0.98, + "grad_norm": 0.30421787885274804, + "learning_rate": 2.5599618001120874e-08, + "loss": 0.1587, + "step": 21287 + }, + { + "epoch": 0.98, + "grad_norm": 0.35501860426510085, + "learning_rate": 2.5493330533633564e-08, + "loss": 0.2582, + "step": 21288 + }, + { + "epoch": 0.98, + "grad_norm": 0.8412439799572042, + "learning_rate": 2.5387263892111546e-08, + "loss": 0.3905, + "step": 21289 + }, + { + "epoch": 0.98, + "grad_norm": 0.33771670088479894, + "learning_rate": 2.5281418078900723e-08, + "loss": 0.2689, + "step": 21290 + }, + { + "epoch": 0.98, + "grad_norm": 0.4398475008834995, + "learning_rate": 2.517579309634588e-08, + "loss": 0.2757, + "step": 21291 + }, + { + "epoch": 0.98, + "grad_norm": 0.6290943742878116, + "learning_rate": 2.507038894678626e-08, + "loss": 0.3374, + "step": 21292 + }, + { + "epoch": 0.98, + "grad_norm": 0.4795959606786148, + "learning_rate": 2.4965205632553334e-08, + "loss": 0.2541, + "step": 21293 + }, + { + "epoch": 0.98, + "grad_norm": 0.42918830067401353, + "learning_rate": 2.486024315597635e-08, + "loss": 0.2524, + "step": 21294 + }, + { + "epoch": 0.98, + "grad_norm": 0.25327089427839544, + "learning_rate": 2.475550151938011e-08, + "loss": 0.2173, + "step": 21295 + }, + { + "epoch": 0.98, + "grad_norm": 1.080946765224079, + "learning_rate": 2.4650980725082762e-08, + "loss": 0.5248, + "step": 21296 + }, + { + "epoch": 0.98, + "grad_norm": 0.35778627677436703, + "learning_rate": 2.4546680775398013e-08, + "loss": 0.2706, + "step": 21297 + }, + { + "epoch": 0.98, + "grad_norm": 0.41466058702632164, + "learning_rate": 2.4442601672635125e-08, + "loss": 0.2884, + "step": 21298 + }, + { + "epoch": 0.98, + "grad_norm": 0.9727301430201135, + "learning_rate": 2.433874341909892e-08, + "loss": 0.4358, + "step": 21299 + }, + { + "epoch": 0.98, + "grad_norm": 0.33762389645408836, + "learning_rate": 2.4235106017087562e-08, + "loss": 0.2125, + "step": 21300 + }, + { + "epoch": 0.98, + "grad_norm": 0.5579403047485948, + "learning_rate": 2.413168946889699e-08, + "loss": 0.3124, + "step": 21301 + }, + { + "epoch": 0.98, + "grad_norm": 0.3902427560940118, + "learning_rate": 2.4028493776815375e-08, + "loss": 0.3282, + "step": 21302 + }, + { + "epoch": 0.98, + "grad_norm": 0.23493729017604723, + "learning_rate": 2.3925518943128667e-08, + "loss": 0.1577, + "step": 21303 + }, + { + "epoch": 0.98, + "grad_norm": 1.6799045193712885, + "learning_rate": 2.3822764970115042e-08, + "loss": 0.5659, + "step": 21304 + }, + { + "epoch": 0.98, + "grad_norm": 0.6026877435219661, + "learning_rate": 2.3720231860051567e-08, + "loss": 0.3542, + "step": 21305 + }, + { + "epoch": 0.98, + "grad_norm": 0.25011324802411417, + "learning_rate": 2.361791961520532e-08, + "loss": 0.2201, + "step": 21306 + }, + { + "epoch": 0.98, + "grad_norm": 0.39371531913698155, + "learning_rate": 2.3515828237843376e-08, + "loss": 0.1782, + "step": 21307 + }, + { + "epoch": 0.98, + "grad_norm": 0.4849964366831416, + "learning_rate": 2.3413957730226144e-08, + "loss": 0.3395, + "step": 21308 + }, + { + "epoch": 0.98, + "grad_norm": 0.35801688402775805, + "learning_rate": 2.3312308094607382e-08, + "loss": 0.1732, + "step": 21309 + }, + { + "epoch": 0.98, + "grad_norm": 0.34919518761617685, + "learning_rate": 2.321087933323973e-08, + "loss": 0.2932, + "step": 21310 + }, + { + "epoch": 0.98, + "grad_norm": 0.5854497949209628, + "learning_rate": 2.3109671448366955e-08, + "loss": 0.3206, + "step": 21311 + }, + { + "epoch": 0.98, + "grad_norm": 0.5375385914320893, + "learning_rate": 2.300868444222948e-08, + "loss": 0.2343, + "step": 21312 + }, + { + "epoch": 0.98, + "grad_norm": 0.38273048188306785, + "learning_rate": 2.2907918317064403e-08, + "loss": 0.2294, + "step": 21313 + }, + { + "epoch": 0.98, + "grad_norm": 0.3647377467137911, + "learning_rate": 2.280737307510217e-08, + "loss": 0.2905, + "step": 21314 + }, + { + "epoch": 0.98, + "grad_norm": 0.23397169590163214, + "learning_rate": 2.270704871856877e-08, + "loss": 0.1551, + "step": 21315 + }, + { + "epoch": 0.98, + "grad_norm": 0.4052270864039506, + "learning_rate": 2.2606945249684654e-08, + "loss": 0.2434, + "step": 21316 + }, + { + "epoch": 0.98, + "grad_norm": 0.7502293236598188, + "learning_rate": 2.2507062670665826e-08, + "loss": 0.4108, + "step": 21317 + }, + { + "epoch": 0.98, + "grad_norm": 0.29471859520888355, + "learning_rate": 2.240740098372496e-08, + "loss": 0.2658, + "step": 21318 + }, + { + "epoch": 0.98, + "grad_norm": 0.6622238365736859, + "learning_rate": 2.2307960191066956e-08, + "loss": 0.262, + "step": 21319 + }, + { + "epoch": 0.98, + "grad_norm": 0.45151838436898933, + "learning_rate": 2.2208740294895613e-08, + "loss": 0.2679, + "step": 21320 + }, + { + "epoch": 0.98, + "grad_norm": 0.3652121867557897, + "learning_rate": 2.210974129740473e-08, + "loss": 0.226, + "step": 21321 + }, + { + "epoch": 0.98, + "grad_norm": 0.2983816854341923, + "learning_rate": 2.2010963200786995e-08, + "loss": 0.2409, + "step": 21322 + }, + { + "epoch": 0.98, + "grad_norm": 0.6172571065155857, + "learning_rate": 2.191240600723066e-08, + "loss": 0.3492, + "step": 21323 + }, + { + "epoch": 0.98, + "grad_norm": 0.5041019598930978, + "learning_rate": 2.1814069718916198e-08, + "loss": 0.2589, + "step": 21324 + }, + { + "epoch": 0.98, + "grad_norm": 0.5411510835184253, + "learning_rate": 2.171595433802187e-08, + "loss": 0.3529, + "step": 21325 + }, + { + "epoch": 0.98, + "grad_norm": 0.3723726555736365, + "learning_rate": 2.1618059866718166e-08, + "loss": 0.2617, + "step": 21326 + }, + { + "epoch": 0.98, + "grad_norm": 0.29151194509133554, + "learning_rate": 2.1520386307173346e-08, + "loss": 0.2004, + "step": 21327 + }, + { + "epoch": 0.98, + "grad_norm": 0.47669243734525635, + "learning_rate": 2.1422933661550127e-08, + "loss": 0.2957, + "step": 21328 + }, + { + "epoch": 0.98, + "grad_norm": 0.2995718036469097, + "learning_rate": 2.132570193200567e-08, + "loss": 0.201, + "step": 21329 + }, + { + "epoch": 0.98, + "grad_norm": 0.693968674488601, + "learning_rate": 2.12286911206927e-08, + "loss": 0.3785, + "step": 21330 + }, + { + "epoch": 0.98, + "grad_norm": 0.4376728374311516, + "learning_rate": 2.113190122975839e-08, + "loss": 0.2915, + "step": 21331 + }, + { + "epoch": 0.98, + "grad_norm": 1.5141101124695313, + "learning_rate": 2.1035332261346576e-08, + "loss": 0.3555, + "step": 21332 + }, + { + "epoch": 0.98, + "grad_norm": 0.6060788249286279, + "learning_rate": 2.0938984217594437e-08, + "loss": 0.2501, + "step": 21333 + }, + { + "epoch": 0.98, + "grad_norm": 0.2679824033350691, + "learning_rate": 2.0842857100635826e-08, + "loss": 0.2615, + "step": 21334 + }, + { + "epoch": 0.98, + "grad_norm": 0.25513368066484793, + "learning_rate": 2.074695091259793e-08, + "loss": 0.1117, + "step": 21335 + }, + { + "epoch": 0.98, + "grad_norm": 0.7425642097530736, + "learning_rate": 2.065126565560349e-08, + "loss": 0.3308, + "step": 21336 + }, + { + "epoch": 0.98, + "grad_norm": 0.4401652797310039, + "learning_rate": 2.055580133177304e-08, + "loss": 0.3431, + "step": 21337 + }, + { + "epoch": 0.98, + "grad_norm": 0.37529100286738093, + "learning_rate": 2.046055794321822e-08, + "loss": 0.2973, + "step": 21338 + }, + { + "epoch": 0.98, + "grad_norm": 0.4132268392744198, + "learning_rate": 2.036553549204845e-08, + "loss": 0.1987, + "step": 21339 + }, + { + "epoch": 0.98, + "grad_norm": 0.48773075506266494, + "learning_rate": 2.0270733980366496e-08, + "loss": 0.3223, + "step": 21340 + }, + { + "epoch": 0.98, + "grad_norm": 0.31417343644405044, + "learning_rate": 2.0176153410272902e-08, + "loss": 0.2068, + "step": 21341 + }, + { + "epoch": 0.98, + "grad_norm": 0.3143277721599033, + "learning_rate": 2.0081793783860437e-08, + "loss": 0.1904, + "step": 21342 + }, + { + "epoch": 0.98, + "grad_norm": 0.6360589443059183, + "learning_rate": 1.998765510321743e-08, + "loss": 0.3334, + "step": 21343 + }, + { + "epoch": 0.98, + "grad_norm": 0.7081486553529889, + "learning_rate": 1.989373737042999e-08, + "loss": 0.4229, + "step": 21344 + }, + { + "epoch": 0.98, + "grad_norm": 0.4160069480029801, + "learning_rate": 1.9800040587575342e-08, + "loss": 0.2287, + "step": 21345 + }, + { + "epoch": 0.98, + "grad_norm": 0.3482785764092505, + "learning_rate": 1.9706564756729606e-08, + "loss": 0.2749, + "step": 21346 + }, + { + "epoch": 0.98, + "grad_norm": 0.2824763869286202, + "learning_rate": 1.961330987996113e-08, + "loss": 0.1727, + "step": 21347 + }, + { + "epoch": 0.98, + "grad_norm": 0.6970283086935132, + "learning_rate": 1.9520275959334922e-08, + "loss": 0.0961, + "step": 21348 + }, + { + "epoch": 0.98, + "grad_norm": 0.3944109513640567, + "learning_rate": 1.9427462996910452e-08, + "loss": 0.3157, + "step": 21349 + }, + { + "epoch": 0.98, + "grad_norm": 0.38949912529158587, + "learning_rate": 1.933487099474163e-08, + "loss": 0.299, + "step": 21350 + }, + { + "epoch": 0.98, + "grad_norm": 0.6082916501084299, + "learning_rate": 1.9242499954880145e-08, + "loss": 0.3167, + "step": 21351 + }, + { + "epoch": 0.98, + "grad_norm": 0.48454505816411697, + "learning_rate": 1.9150349879369924e-08, + "loss": 0.2059, + "step": 21352 + }, + { + "epoch": 0.98, + "grad_norm": 0.29144940894457944, + "learning_rate": 1.905842077025155e-08, + "loss": 0.1739, + "step": 21353 + }, + { + "epoch": 0.98, + "grad_norm": 0.34396326961043594, + "learning_rate": 1.896671262955896e-08, + "loss": 0.2637, + "step": 21354 + }, + { + "epoch": 0.98, + "grad_norm": 0.34922781290675475, + "learning_rate": 1.8875225459323853e-08, + "loss": 0.2214, + "step": 21355 + }, + { + "epoch": 0.98, + "grad_norm": 0.6407810511251222, + "learning_rate": 1.878395926157239e-08, + "loss": 0.3867, + "step": 21356 + }, + { + "epoch": 0.98, + "grad_norm": 0.33454042850774107, + "learning_rate": 1.869291403832407e-08, + "loss": 0.283, + "step": 21357 + }, + { + "epoch": 0.98, + "grad_norm": 0.3473279800208587, + "learning_rate": 1.8602089791592836e-08, + "loss": 0.2078, + "step": 21358 + }, + { + "epoch": 0.98, + "grad_norm": 0.38243798884197344, + "learning_rate": 1.851148652339263e-08, + "loss": 0.1612, + "step": 21359 + }, + { + "epoch": 0.98, + "grad_norm": 0.7445926408566705, + "learning_rate": 1.8421104235727406e-08, + "loss": 0.3188, + "step": 21360 + }, + { + "epoch": 0.98, + "grad_norm": 0.39312976958267143, + "learning_rate": 1.8330942930598894e-08, + "loss": 0.236, + "step": 21361 + }, + { + "epoch": 0.98, + "grad_norm": 0.3290475215844878, + "learning_rate": 1.8241002610002167e-08, + "loss": 0.2869, + "step": 21362 + }, + { + "epoch": 0.98, + "grad_norm": 0.5714816575137822, + "learning_rate": 1.8151283275928966e-08, + "loss": 0.3136, + "step": 21363 + }, + { + "epoch": 0.98, + "grad_norm": 0.43796923289986767, + "learning_rate": 1.8061784930366587e-08, + "loss": 0.3078, + "step": 21364 + }, + { + "epoch": 0.98, + "grad_norm": 0.3846413864364781, + "learning_rate": 1.7972507575294564e-08, + "loss": 0.2088, + "step": 21365 + }, + { + "epoch": 0.98, + "grad_norm": 0.47570284698234705, + "learning_rate": 1.7883451212691307e-08, + "loss": 0.2092, + "step": 21366 + }, + { + "epoch": 0.98, + "grad_norm": 0.34079647160835025, + "learning_rate": 1.779461584452746e-08, + "loss": 0.2509, + "step": 21367 + }, + { + "epoch": 0.98, + "grad_norm": 0.5943221061808651, + "learning_rate": 1.770600147276924e-08, + "loss": 0.2921, + "step": 21368 + }, + { + "epoch": 0.98, + "grad_norm": 0.3490005787653116, + "learning_rate": 1.7617608099379514e-08, + "loss": 0.2619, + "step": 21369 + }, + { + "epoch": 0.98, + "grad_norm": 0.37053821908122403, + "learning_rate": 1.7529435726315602e-08, + "loss": 0.2889, + "step": 21370 + }, + { + "epoch": 0.98, + "grad_norm": 0.5093449288495477, + "learning_rate": 1.744148435552706e-08, + "loss": 0.0847, + "step": 21371 + }, + { + "epoch": 0.98, + "grad_norm": 0.486502360872377, + "learning_rate": 1.735375398896344e-08, + "loss": 0.231, + "step": 21372 + }, + { + "epoch": 0.98, + "grad_norm": 0.3036772053791677, + "learning_rate": 1.726624462856652e-08, + "loss": 0.2491, + "step": 21373 + }, + { + "epoch": 0.98, + "grad_norm": 0.4851969622693287, + "learning_rate": 1.717895627627364e-08, + "loss": 0.2716, + "step": 21374 + }, + { + "epoch": 0.98, + "grad_norm": 0.7859446706697011, + "learning_rate": 1.7091888934016586e-08, + "loss": 0.4836, + "step": 21375 + }, + { + "epoch": 0.98, + "grad_norm": 0.37740454108999655, + "learning_rate": 1.700504260372382e-08, + "loss": 0.2426, + "step": 21376 + }, + { + "epoch": 0.98, + "grad_norm": 0.5894058889389044, + "learning_rate": 1.6918417287318245e-08, + "loss": 0.3395, + "step": 21377 + }, + { + "epoch": 0.98, + "grad_norm": 0.2459647796658079, + "learning_rate": 1.6832012986716107e-08, + "loss": 0.1589, + "step": 21378 + }, + { + "epoch": 0.98, + "grad_norm": 0.371035775795198, + "learning_rate": 1.6745829703831428e-08, + "loss": 0.252, + "step": 21379 + }, + { + "epoch": 0.98, + "grad_norm": 0.793475639671936, + "learning_rate": 1.6659867440572685e-08, + "loss": 0.4412, + "step": 21380 + }, + { + "epoch": 0.98, + "grad_norm": 0.3010231957959712, + "learning_rate": 1.657412619884169e-08, + "loss": 0.2326, + "step": 21381 + }, + { + "epoch": 0.98, + "grad_norm": 0.38469722305541704, + "learning_rate": 1.648860598053803e-08, + "loss": 0.2659, + "step": 21382 + }, + { + "epoch": 0.98, + "grad_norm": 0.6916044277506731, + "learning_rate": 1.640330678755464e-08, + "loss": 0.2333, + "step": 21383 + }, + { + "epoch": 0.98, + "grad_norm": 0.46728579785011565, + "learning_rate": 1.63182286217789e-08, + "loss": 0.2, + "step": 21384 + }, + { + "epoch": 0.98, + "grad_norm": 0.27332715095222404, + "learning_rate": 1.6233371485094852e-08, + "loss": 0.2584, + "step": 21385 + }, + { + "epoch": 0.98, + "grad_norm": 0.4842132021336142, + "learning_rate": 1.614873537938211e-08, + "loss": 0.3485, + "step": 21386 + }, + { + "epoch": 0.98, + "grad_norm": 1.7473790827464588, + "learning_rate": 1.6064320306513616e-08, + "loss": 0.3474, + "step": 21387 + }, + { + "epoch": 0.98, + "grad_norm": 0.3254348512201076, + "learning_rate": 1.5980126268358985e-08, + "loss": 0.2546, + "step": 21388 + }, + { + "epoch": 0.98, + "grad_norm": 0.5258563295234254, + "learning_rate": 1.5896153266781177e-08, + "loss": 0.3133, + "step": 21389 + }, + { + "epoch": 0.98, + "grad_norm": 0.4458996195858037, + "learning_rate": 1.5812401303639813e-08, + "loss": 0.3132, + "step": 21390 + }, + { + "epoch": 0.98, + "grad_norm": 0.26577259944799275, + "learning_rate": 1.5728870380788962e-08, + "loss": 0.1733, + "step": 21391 + }, + { + "epoch": 0.98, + "grad_norm": 0.43053350481897545, + "learning_rate": 1.5645560500078262e-08, + "loss": 0.2519, + "step": 21392 + }, + { + "epoch": 0.98, + "grad_norm": 0.3925129341697558, + "learning_rate": 1.5562471663351787e-08, + "loss": 0.3136, + "step": 21393 + }, + { + "epoch": 0.98, + "grad_norm": 0.3201601637457968, + "learning_rate": 1.5479603872448068e-08, + "loss": 0.178, + "step": 21394 + }, + { + "epoch": 0.98, + "grad_norm": 0.9346199355606215, + "learning_rate": 1.5396957129203416e-08, + "loss": 0.4417, + "step": 21395 + }, + { + "epoch": 0.98, + "grad_norm": 0.4884474059913416, + "learning_rate": 1.5314531435447477e-08, + "loss": 0.3097, + "step": 21396 + }, + { + "epoch": 0.98, + "grad_norm": 0.3044605236767771, + "learning_rate": 1.523232679300324e-08, + "loss": 0.1816, + "step": 21397 + }, + { + "epoch": 0.98, + "grad_norm": 0.336287815575612, + "learning_rate": 1.5150343203692574e-08, + "loss": 0.2229, + "step": 21398 + }, + { + "epoch": 0.98, + "grad_norm": 0.5316219945471311, + "learning_rate": 1.506858066932959e-08, + "loss": 0.2703, + "step": 21399 + }, + { + "epoch": 0.98, + "grad_norm": 0.3183849877125431, + "learning_rate": 1.498703919172506e-08, + "loss": 0.1817, + "step": 21400 + }, + { + "epoch": 0.98, + "grad_norm": 0.3691607112251648, + "learning_rate": 1.49057187726831e-08, + "loss": 0.2758, + "step": 21401 + }, + { + "epoch": 0.98, + "grad_norm": 0.7430593630501673, + "learning_rate": 1.4824619414005592e-08, + "loss": 0.3655, + "step": 21402 + }, + { + "epoch": 0.98, + "grad_norm": 0.3206136959706364, + "learning_rate": 1.4743741117486666e-08, + "loss": 0.2218, + "step": 21403 + }, + { + "epoch": 0.98, + "grad_norm": 0.2972440402852377, + "learning_rate": 1.4663083884917107e-08, + "loss": 0.1671, + "step": 21404 + }, + { + "epoch": 0.98, + "grad_norm": 0.2659607812568832, + "learning_rate": 1.4582647718083264e-08, + "loss": 0.2268, + "step": 21405 + }, + { + "epoch": 0.98, + "grad_norm": 0.3621797524273526, + "learning_rate": 1.4502432618765938e-08, + "loss": 0.2392, + "step": 21406 + }, + { + "epoch": 0.98, + "grad_norm": 0.9116437916921559, + "learning_rate": 1.4422438588740372e-08, + "loss": 0.2356, + "step": 21407 + }, + { + "epoch": 0.98, + "grad_norm": 0.8701648671885002, + "learning_rate": 1.4342665629777375e-08, + "loss": 0.3714, + "step": 21408 + }, + { + "epoch": 0.98, + "grad_norm": 0.2885970029428302, + "learning_rate": 1.4263113743643308e-08, + "loss": 0.2598, + "step": 21409 + }, + { + "epoch": 0.98, + "grad_norm": 0.47969700176231017, + "learning_rate": 1.4183782932100099e-08, + "loss": 0.2451, + "step": 21410 + }, + { + "epoch": 0.98, + "grad_norm": 0.29292654065527207, + "learning_rate": 1.4104673196903007e-08, + "loss": 0.183, + "step": 21411 + }, + { + "epoch": 0.98, + "grad_norm": 0.3320630433037302, + "learning_rate": 1.4025784539803966e-08, + "loss": 0.2691, + "step": 21412 + }, + { + "epoch": 0.98, + "grad_norm": 0.36344725918382764, + "learning_rate": 1.3947116962549356e-08, + "loss": 0.2915, + "step": 21413 + }, + { + "epoch": 0.98, + "grad_norm": 0.8431029062533919, + "learning_rate": 1.3868670466882229e-08, + "loss": 0.2808, + "step": 21414 + }, + { + "epoch": 0.98, + "grad_norm": 0.43288094741148736, + "learning_rate": 1.379044505453675e-08, + "loss": 0.2651, + "step": 21415 + }, + { + "epoch": 0.98, + "grad_norm": 0.6125725358015277, + "learning_rate": 1.371244072724598e-08, + "loss": 0.407, + "step": 21416 + }, + { + "epoch": 0.98, + "grad_norm": 0.2243136041331308, + "learning_rate": 1.3634657486737424e-08, + "loss": 0.1797, + "step": 21417 + }, + { + "epoch": 0.98, + "grad_norm": 0.435768294323353, + "learning_rate": 1.3557095334731928e-08, + "loss": 0.2798, + "step": 21418 + }, + { + "epoch": 0.98, + "grad_norm": 0.44151959667755075, + "learning_rate": 1.347975427294701e-08, + "loss": 0.323, + "step": 21419 + }, + { + "epoch": 0.98, + "grad_norm": 1.4135068496302063, + "learning_rate": 1.340263430309574e-08, + "loss": 0.1857, + "step": 21420 + }, + { + "epoch": 0.98, + "grad_norm": 0.28211412407628406, + "learning_rate": 1.3325735426885644e-08, + "loss": 0.2308, + "step": 21421 + }, + { + "epoch": 0.98, + "grad_norm": 0.5166156718762321, + "learning_rate": 1.3249057646017583e-08, + "loss": 0.3569, + "step": 21422 + }, + { + "epoch": 0.98, + "grad_norm": 0.41855496783663165, + "learning_rate": 1.3172600962190196e-08, + "loss": 0.1448, + "step": 21423 + }, + { + "epoch": 0.98, + "grad_norm": 0.34279074320989944, + "learning_rate": 1.3096365377095466e-08, + "loss": 0.2573, + "step": 21424 + }, + { + "epoch": 0.98, + "grad_norm": 0.3612364547001016, + "learning_rate": 1.302035089242204e-08, + "loss": 0.3015, + "step": 21425 + }, + { + "epoch": 0.98, + "grad_norm": 0.41494882316468046, + "learning_rate": 1.2944557509853017e-08, + "loss": 0.2168, + "step": 21426 + }, + { + "epoch": 0.98, + "grad_norm": 0.36323578903913956, + "learning_rate": 1.2868985231065945e-08, + "loss": 0.1842, + "step": 21427 + }, + { + "epoch": 0.98, + "grad_norm": 1.1827290624131916, + "learning_rate": 1.2793634057732817e-08, + "loss": 0.6482, + "step": 21428 + }, + { + "epoch": 0.98, + "grad_norm": 0.39474622838025364, + "learning_rate": 1.2718503991523412e-08, + "loss": 0.2954, + "step": 21429 + }, + { + "epoch": 0.98, + "grad_norm": 0.3509030599713413, + "learning_rate": 1.2643595034100842e-08, + "loss": 0.2057, + "step": 21430 + }, + { + "epoch": 0.98, + "grad_norm": 0.5418254391523893, + "learning_rate": 1.2568907187122669e-08, + "loss": 0.271, + "step": 21431 + }, + { + "epoch": 0.98, + "grad_norm": 0.41381462209647335, + "learning_rate": 1.2494440452243128e-08, + "loss": 0.206, + "step": 21432 + }, + { + "epoch": 0.98, + "grad_norm": 0.30476195298695946, + "learning_rate": 1.2420194831110899e-08, + "loss": 0.1999, + "step": 21433 + }, + { + "epoch": 0.98, + "grad_norm": 0.5972368383314923, + "learning_rate": 1.2346170325368001e-08, + "loss": 0.4318, + "step": 21434 + }, + { + "epoch": 0.98, + "grad_norm": 0.694370966541876, + "learning_rate": 1.2272366936656454e-08, + "loss": 0.3913, + "step": 21435 + }, + { + "epoch": 0.98, + "grad_norm": 0.3865850516238881, + "learning_rate": 1.2198784666607177e-08, + "loss": 0.1931, + "step": 21436 + }, + { + "epoch": 0.98, + "grad_norm": 0.26412259132432797, + "learning_rate": 1.2125423516851087e-08, + "loss": 0.2315, + "step": 21437 + }, + { + "epoch": 0.98, + "grad_norm": 0.640891446762223, + "learning_rate": 1.205228348901133e-08, + "loss": 0.2151, + "step": 21438 + }, + { + "epoch": 0.98, + "grad_norm": 0.6677313329424343, + "learning_rate": 1.197936458470772e-08, + "loss": 0.3154, + "step": 21439 + }, + { + "epoch": 0.98, + "grad_norm": 0.3170431112606332, + "learning_rate": 1.1906666805554523e-08, + "loss": 0.2562, + "step": 21440 + }, + { + "epoch": 0.98, + "grad_norm": 0.466802882961251, + "learning_rate": 1.1834190153160452e-08, + "loss": 0.3261, + "step": 21441 + }, + { + "epoch": 0.99, + "grad_norm": 0.4338549633435439, + "learning_rate": 1.176193462913089e-08, + "loss": 0.2742, + "step": 21442 + }, + { + "epoch": 0.99, + "grad_norm": 0.39209135191580274, + "learning_rate": 1.1689900235065666e-08, + "loss": 0.1453, + "step": 21443 + }, + { + "epoch": 0.99, + "grad_norm": 0.36593671518857845, + "learning_rate": 1.1618086972559062e-08, + "loss": 0.24, + "step": 21444 + }, + { + "epoch": 0.99, + "grad_norm": 0.34187874634592147, + "learning_rate": 1.1546494843200917e-08, + "loss": 0.2653, + "step": 21445 + }, + { + "epoch": 0.99, + "grad_norm": 0.5127058783860863, + "learning_rate": 1.147512384857663e-08, + "loss": 0.2753, + "step": 21446 + }, + { + "epoch": 0.99, + "grad_norm": 0.583544291950978, + "learning_rate": 1.1403973990266049e-08, + "loss": 0.3324, + "step": 21447 + }, + { + "epoch": 0.99, + "grad_norm": 0.46383763193807964, + "learning_rate": 1.1333045269843467e-08, + "loss": 0.2646, + "step": 21448 + }, + { + "epoch": 0.99, + "grad_norm": 0.31313801674014163, + "learning_rate": 1.1262337688880964e-08, + "loss": 0.2334, + "step": 21449 + }, + { + "epoch": 0.99, + "grad_norm": 0.35724312795616076, + "learning_rate": 1.1191851248942842e-08, + "loss": 0.1528, + "step": 21450 + }, + { + "epoch": 0.99, + "grad_norm": 0.8234689721889634, + "learning_rate": 1.1121585951590074e-08, + "loss": 0.4019, + "step": 21451 + }, + { + "epoch": 0.99, + "grad_norm": 0.36334023472000265, + "learning_rate": 1.1051541798378086e-08, + "loss": 0.2677, + "step": 21452 + }, + { + "epoch": 0.99, + "grad_norm": 0.35157013984139307, + "learning_rate": 1.0981718790856744e-08, + "loss": 0.2595, + "step": 21453 + }, + { + "epoch": 0.99, + "grad_norm": 0.7744710429800236, + "learning_rate": 1.0912116930572592e-08, + "loss": 0.2674, + "step": 21454 + }, + { + "epoch": 0.99, + "grad_norm": 0.4121275045507686, + "learning_rate": 1.084273621906773e-08, + "loss": 0.2726, + "step": 21455 + }, + { + "epoch": 0.99, + "grad_norm": 0.22254162498513125, + "learning_rate": 1.0773576657875372e-08, + "loss": 0.1614, + "step": 21456 + }, + { + "epoch": 0.99, + "grad_norm": 0.6856359784415711, + "learning_rate": 1.0704638248529852e-08, + "loss": 0.3856, + "step": 21457 + }, + { + "epoch": 0.99, + "grad_norm": 0.38923153033621244, + "learning_rate": 1.0635920992554393e-08, + "loss": 0.302, + "step": 21458 + }, + { + "epoch": 0.99, + "grad_norm": 0.7749417265166105, + "learning_rate": 1.0567424891473332e-08, + "loss": 0.3157, + "step": 21459 + }, + { + "epoch": 0.99, + "grad_norm": 0.40492033841371855, + "learning_rate": 1.0499149946801012e-08, + "loss": 0.2578, + "step": 21460 + }, + { + "epoch": 0.99, + "grad_norm": 0.36771311176778, + "learning_rate": 1.0431096160048449e-08, + "loss": 0.2836, + "step": 21461 + }, + { + "epoch": 0.99, + "grad_norm": 0.35313126036104814, + "learning_rate": 1.0363263532724433e-08, + "loss": 0.0852, + "step": 21462 + }, + { + "epoch": 0.99, + "grad_norm": 0.44407847151800756, + "learning_rate": 1.0295652066328877e-08, + "loss": 0.2766, + "step": 21463 + }, + { + "epoch": 0.99, + "grad_norm": 0.5294988892810603, + "learning_rate": 1.022826176236058e-08, + "loss": 0.2988, + "step": 21464 + }, + { + "epoch": 0.99, + "grad_norm": 0.35221151794253763, + "learning_rate": 1.0161092622309465e-08, + "loss": 0.2929, + "step": 21465 + }, + { + "epoch": 0.99, + "grad_norm": 0.3582539767322185, + "learning_rate": 1.009414464766323e-08, + "loss": 0.1653, + "step": 21466 + }, + { + "epoch": 0.99, + "grad_norm": 0.6277326791983803, + "learning_rate": 1.0027417839904019e-08, + "loss": 0.3302, + "step": 21467 + }, + { + "epoch": 0.99, + "grad_norm": 0.23630472445010045, + "learning_rate": 9.960912200510652e-09, + "loss": 0.2118, + "step": 21468 + }, + { + "epoch": 0.99, + "grad_norm": 0.7891375730060838, + "learning_rate": 9.894627730953066e-09, + "loss": 0.2411, + "step": 21469 + }, + { + "epoch": 0.99, + "grad_norm": 0.3667227319722737, + "learning_rate": 9.828564432700082e-09, + "loss": 0.2638, + "step": 21470 + }, + { + "epoch": 0.99, + "grad_norm": 0.9885794648831816, + "learning_rate": 9.762722307213867e-09, + "loss": 0.5114, + "step": 21471 + }, + { + "epoch": 0.99, + "grad_norm": 0.3194600554178285, + "learning_rate": 9.697101355952143e-09, + "loss": 0.2245, + "step": 21472 + }, + { + "epoch": 0.99, + "grad_norm": 0.3994135132179661, + "learning_rate": 9.63170158036819e-09, + "loss": 0.2796, + "step": 21473 + }, + { + "epoch": 0.99, + "grad_norm": 0.3496199808465557, + "learning_rate": 9.566522981909743e-09, + "loss": 0.1525, + "step": 21474 + }, + { + "epoch": 0.99, + "grad_norm": 0.5022273922304606, + "learning_rate": 9.501565562018977e-09, + "loss": 0.2201, + "step": 21475 + }, + { + "epoch": 0.99, + "grad_norm": 0.31959411721682784, + "learning_rate": 9.436829322134743e-09, + "loss": 0.2661, + "step": 21476 + }, + { + "epoch": 0.99, + "grad_norm": 0.5379090744882348, + "learning_rate": 9.372314263690342e-09, + "loss": 0.3833, + "step": 21477 + }, + { + "epoch": 0.99, + "grad_norm": 0.8755134587203777, + "learning_rate": 9.308020388113515e-09, + "loss": 0.3975, + "step": 21478 + }, + { + "epoch": 0.99, + "grad_norm": 0.365790249320034, + "learning_rate": 9.243947696828682e-09, + "loss": 0.1951, + "step": 21479 + }, + { + "epoch": 0.99, + "grad_norm": 0.28762831275755524, + "learning_rate": 9.180096191253595e-09, + "loss": 0.2371, + "step": 21480 + }, + { + "epoch": 0.99, + "grad_norm": 0.5390754470619237, + "learning_rate": 9.116465872800462e-09, + "loss": 0.2958, + "step": 21481 + }, + { + "epoch": 0.99, + "grad_norm": 0.28065442167481003, + "learning_rate": 9.053056742880373e-09, + "loss": 0.1627, + "step": 21482 + }, + { + "epoch": 0.99, + "grad_norm": 1.2738478937073574, + "learning_rate": 8.98986880289665e-09, + "loss": 0.7054, + "step": 21483 + }, + { + "epoch": 0.99, + "grad_norm": 0.33266014770109253, + "learning_rate": 8.926902054247067e-09, + "loss": 0.2774, + "step": 21484 + }, + { + "epoch": 0.99, + "grad_norm": 0.5164999263111189, + "learning_rate": 8.86415649832717e-09, + "loss": 0.2116, + "step": 21485 + }, + { + "epoch": 0.99, + "grad_norm": 0.8015601157427507, + "learning_rate": 8.80163213652474e-09, + "loss": 0.3166, + "step": 21486 + }, + { + "epoch": 0.99, + "grad_norm": 0.38568232967970706, + "learning_rate": 8.739328970224226e-09, + "loss": 0.2058, + "step": 21487 + }, + { + "epoch": 0.99, + "grad_norm": 0.2799198887920922, + "learning_rate": 8.677247000805632e-09, + "loss": 0.2126, + "step": 21488 + }, + { + "epoch": 0.99, + "grad_norm": 0.34370398996394924, + "learning_rate": 8.615386229642309e-09, + "loss": 0.2579, + "step": 21489 + }, + { + "epoch": 0.99, + "grad_norm": 1.650960231161021, + "learning_rate": 8.553746658105377e-09, + "loss": 0.6912, + "step": 21490 + }, + { + "epoch": 0.99, + "grad_norm": 0.35387691082339146, + "learning_rate": 8.492328287558194e-09, + "loss": 0.2459, + "step": 21491 + }, + { + "epoch": 0.99, + "grad_norm": 0.3886156113408288, + "learning_rate": 8.431131119361891e-09, + "loss": 0.2543, + "step": 21492 + }, + { + "epoch": 0.99, + "grad_norm": 0.8117505151600072, + "learning_rate": 8.37015515486872e-09, + "loss": 0.3802, + "step": 21493 + }, + { + "epoch": 0.99, + "grad_norm": 0.34974377887697355, + "learning_rate": 8.309400395432043e-09, + "loss": 0.2714, + "step": 21494 + }, + { + "epoch": 0.99, + "grad_norm": 0.2812790479749563, + "learning_rate": 8.24886684239412e-09, + "loss": 0.1373, + "step": 21495 + }, + { + "epoch": 0.99, + "grad_norm": 0.25470779800657534, + "learning_rate": 8.188554497096101e-09, + "loss": 0.205, + "step": 21496 + }, + { + "epoch": 0.99, + "grad_norm": 0.4186588818496496, + "learning_rate": 8.128463360872473e-09, + "loss": 0.2792, + "step": 21497 + }, + { + "epoch": 0.99, + "grad_norm": 0.5996969658273908, + "learning_rate": 8.068593435055504e-09, + "loss": 0.2718, + "step": 21498 + }, + { + "epoch": 0.99, + "grad_norm": 0.8765596812821451, + "learning_rate": 8.008944720969692e-09, + "loss": 0.4364, + "step": 21499 + }, + { + "epoch": 0.99, + "grad_norm": 0.3781751287092715, + "learning_rate": 7.949517219935088e-09, + "loss": 0.2664, + "step": 21500 + }, + { + "epoch": 0.99, + "grad_norm": 0.25806078207905836, + "learning_rate": 7.89031093326731e-09, + "loss": 0.2079, + "step": 21501 + }, + { + "epoch": 0.99, + "grad_norm": 0.5353428668317922, + "learning_rate": 7.831325862277527e-09, + "loss": 0.1914, + "step": 21502 + }, + { + "epoch": 0.99, + "grad_norm": 0.37773229060676133, + "learning_rate": 7.772562008272477e-09, + "loss": 0.2397, + "step": 21503 + }, + { + "epoch": 0.99, + "grad_norm": 0.34732635752874064, + "learning_rate": 7.714019372551118e-09, + "loss": 0.2758, + "step": 21504 + }, + { + "epoch": 0.99, + "grad_norm": 1.0754734057994448, + "learning_rate": 7.655697956411301e-09, + "loss": 0.23, + "step": 21505 + }, + { + "epoch": 0.99, + "grad_norm": 0.3580077234594924, + "learning_rate": 7.597597761144215e-09, + "loss": 0.2878, + "step": 21506 + }, + { + "epoch": 0.99, + "grad_norm": 0.5444363463080844, + "learning_rate": 7.539718788034389e-09, + "loss": 0.2639, + "step": 21507 + }, + { + "epoch": 0.99, + "grad_norm": 0.30472176311330224, + "learning_rate": 7.48206103836524e-09, + "loss": 0.1884, + "step": 21508 + }, + { + "epoch": 0.99, + "grad_norm": 0.33138166734152413, + "learning_rate": 7.424624513411305e-09, + "loss": 0.2473, + "step": 21509 + }, + { + "epoch": 0.99, + "grad_norm": 0.7757974803418083, + "learning_rate": 7.3674092144460084e-09, + "loss": 0.3701, + "step": 21510 + }, + { + "epoch": 0.99, + "grad_norm": 0.4527654640556501, + "learning_rate": 7.310415142735006e-09, + "loss": 0.2301, + "step": 21511 + }, + { + "epoch": 0.99, + "grad_norm": 0.275330227219164, + "learning_rate": 7.2536422995406196e-09, + "loss": 0.2419, + "step": 21512 + }, + { + "epoch": 0.99, + "grad_norm": 1.2544213035942349, + "learning_rate": 7.197090686119623e-09, + "loss": 0.7327, + "step": 21513 + }, + { + "epoch": 0.99, + "grad_norm": 0.3134565393270457, + "learning_rate": 7.140760303723237e-09, + "loss": 0.1596, + "step": 21514 + }, + { + "epoch": 0.99, + "grad_norm": 0.34427015539532196, + "learning_rate": 7.084651153599353e-09, + "loss": 0.2382, + "step": 21515 + }, + { + "epoch": 0.99, + "grad_norm": 0.4130954961021895, + "learning_rate": 7.02876323699031e-09, + "loss": 0.2623, + "step": 21516 + }, + { + "epoch": 0.99, + "grad_norm": 1.3692640506794043, + "learning_rate": 6.973096555132896e-09, + "loss": 0.4834, + "step": 21517 + }, + { + "epoch": 0.99, + "grad_norm": 0.310703230365906, + "learning_rate": 6.91765110925946e-09, + "loss": 0.1786, + "step": 21518 + }, + { + "epoch": 0.99, + "grad_norm": 1.5202894869163943, + "learning_rate": 6.862426900597907e-09, + "loss": 0.7263, + "step": 21519 + }, + { + "epoch": 0.99, + "grad_norm": 0.28627431757707894, + "learning_rate": 6.8074239303705934e-09, + "loss": 0.2317, + "step": 21520 + }, + { + "epoch": 0.99, + "grad_norm": 0.31074701785982906, + "learning_rate": 6.7526421997954335e-09, + "loss": 0.182, + "step": 21521 + }, + { + "epoch": 0.99, + "grad_norm": 0.4553187842930531, + "learning_rate": 6.698081710084792e-09, + "loss": 0.2523, + "step": 21522 + }, + { + "epoch": 0.99, + "grad_norm": 0.36486479948261447, + "learning_rate": 6.6437424624477e-09, + "loss": 0.2848, + "step": 21523 + }, + { + "epoch": 0.99, + "grad_norm": 0.3093268905736733, + "learning_rate": 6.58962445808653e-09, + "loss": 0.1836, + "step": 21524 + }, + { + "epoch": 0.99, + "grad_norm": 1.1686933530312147, + "learning_rate": 6.535727698199213e-09, + "loss": 0.5545, + "step": 21525 + }, + { + "epoch": 0.99, + "grad_norm": 0.838983469809667, + "learning_rate": 6.482052183978127e-09, + "loss": 0.3639, + "step": 21526 + }, + { + "epoch": 0.99, + "grad_norm": 0.34065939524257677, + "learning_rate": 6.428597916613433e-09, + "loss": 0.2356, + "step": 21527 + }, + { + "epoch": 0.99, + "grad_norm": 0.2330062967001011, + "learning_rate": 6.375364897287517e-09, + "loss": 0.1666, + "step": 21528 + }, + { + "epoch": 0.99, + "grad_norm": 1.3909180525133036, + "learning_rate": 6.322353127178326e-09, + "loss": 0.6702, + "step": 21529 + }, + { + "epoch": 0.99, + "grad_norm": 0.36392344916243163, + "learning_rate": 6.269562607461588e-09, + "loss": 0.2784, + "step": 21530 + }, + { + "epoch": 0.99, + "grad_norm": 1.2204739891367613, + "learning_rate": 6.216993339303034e-09, + "loss": 0.2814, + "step": 21531 + }, + { + "epoch": 0.99, + "grad_norm": 0.3580824049823755, + "learning_rate": 6.164645323869511e-09, + "loss": 0.3015, + "step": 21532 + }, + { + "epoch": 0.99, + "grad_norm": 0.38677085565020597, + "learning_rate": 6.112518562317871e-09, + "loss": 0.2418, + "step": 21533 + }, + { + "epoch": 0.99, + "grad_norm": 0.16722957125355134, + "learning_rate": 6.060613055802744e-09, + "loss": 0.0704, + "step": 21534 + }, + { + "epoch": 0.99, + "grad_norm": 0.3704672482723745, + "learning_rate": 6.008928805473213e-09, + "loss": 0.2902, + "step": 21535 + }, + { + "epoch": 0.99, + "grad_norm": 0.44685106206105607, + "learning_rate": 5.957465812473917e-09, + "loss": 0.2899, + "step": 21536 + }, + { + "epoch": 0.99, + "grad_norm": 0.5071838821214395, + "learning_rate": 5.906224077943945e-09, + "loss": 0.2621, + "step": 21537 + }, + { + "epoch": 0.99, + "grad_norm": 0.7673621789069253, + "learning_rate": 5.855203603017945e-09, + "loss": 0.3381, + "step": 21538 + }, + { + "epoch": 0.99, + "grad_norm": 0.43794477105658364, + "learning_rate": 5.804404388825013e-09, + "loss": 0.2671, + "step": 21539 + }, + { + "epoch": 0.99, + "grad_norm": 0.2696375840193878, + "learning_rate": 5.753826436490917e-09, + "loss": 0.2196, + "step": 21540 + }, + { + "epoch": 0.99, + "grad_norm": 0.3569859371725361, + "learning_rate": 5.7034697471336495e-09, + "loss": 0.1197, + "step": 21541 + }, + { + "epoch": 0.99, + "grad_norm": 0.44894690529766557, + "learning_rate": 5.653334321868986e-09, + "loss": 0.28, + "step": 21542 + }, + { + "epoch": 0.99, + "grad_norm": 0.49035358332072015, + "learning_rate": 5.603420161807149e-09, + "loss": 0.324, + "step": 21543 + }, + { + "epoch": 0.99, + "grad_norm": 0.3508351822123145, + "learning_rate": 5.553727268051701e-09, + "loss": 0.2386, + "step": 21544 + }, + { + "epoch": 0.99, + "grad_norm": 0.4361404344993259, + "learning_rate": 5.504255641705092e-09, + "loss": 0.2635, + "step": 21545 + }, + { + "epoch": 0.99, + "grad_norm": 0.274315518625277, + "learning_rate": 5.455005283862002e-09, + "loss": 0.1641, + "step": 21546 + }, + { + "epoch": 0.99, + "grad_norm": 0.5219978532385382, + "learning_rate": 5.405976195611562e-09, + "loss": 0.271, + "step": 21547 + }, + { + "epoch": 0.99, + "grad_norm": 0.2777436173040839, + "learning_rate": 5.357168378039568e-09, + "loss": 0.2315, + "step": 21548 + }, + { + "epoch": 0.99, + "grad_norm": 0.7901886025911812, + "learning_rate": 5.308581832226267e-09, + "loss": 0.507, + "step": 21549 + }, + { + "epoch": 0.99, + "grad_norm": 0.743255209664263, + "learning_rate": 5.260216559249687e-09, + "loss": 0.2775, + "step": 21550 + }, + { + "epoch": 0.99, + "grad_norm": 0.3219815475448342, + "learning_rate": 5.212072560177861e-09, + "loss": 0.2252, + "step": 21551 + }, + { + "epoch": 0.99, + "grad_norm": 0.37816095996664706, + "learning_rate": 5.1641498360777146e-09, + "loss": 0.2865, + "step": 21552 + }, + { + "epoch": 0.99, + "grad_norm": 0.3594253891255938, + "learning_rate": 5.116448388009509e-09, + "loss": 0.2036, + "step": 21553 + }, + { + "epoch": 0.99, + "grad_norm": 0.39636592024092515, + "learning_rate": 5.068968217031289e-09, + "loss": 0.1958, + "step": 21554 + }, + { + "epoch": 0.99, + "grad_norm": 0.5575924883322314, + "learning_rate": 5.021709324192214e-09, + "loss": 0.3227, + "step": 21555 + }, + { + "epoch": 0.99, + "grad_norm": 0.3723546818495945, + "learning_rate": 4.974671710539225e-09, + "loss": 0.311, + "step": 21556 + }, + { + "epoch": 0.99, + "grad_norm": 0.8473967610236064, + "learning_rate": 4.9278553771137105e-09, + "loss": 0.1106, + "step": 21557 + }, + { + "epoch": 0.99, + "grad_norm": 0.3478989295637733, + "learning_rate": 4.881260324951509e-09, + "loss": 0.2208, + "step": 21558 + }, + { + "epoch": 0.99, + "grad_norm": 0.29964458311911185, + "learning_rate": 4.834886555085128e-09, + "loss": 0.2615, + "step": 21559 + }, + { + "epoch": 0.99, + "grad_norm": 0.42164599629970967, + "learning_rate": 4.788734068541523e-09, + "loss": 0.1961, + "step": 21560 + }, + { + "epoch": 0.99, + "grad_norm": 0.4923722461072486, + "learning_rate": 4.74280286634099e-09, + "loss": 0.3216, + "step": 21561 + }, + { + "epoch": 0.99, + "grad_norm": 1.6467087831318428, + "learning_rate": 4.697092949501603e-09, + "loss": 0.5366, + "step": 21562 + }, + { + "epoch": 0.99, + "grad_norm": 0.36283506164023727, + "learning_rate": 4.651604319035885e-09, + "loss": 0.2164, + "step": 21563 + }, + { + "epoch": 0.99, + "grad_norm": 0.29628124220668456, + "learning_rate": 4.606336975948589e-09, + "loss": 0.229, + "step": 21564 + }, + { + "epoch": 0.99, + "grad_norm": 0.509862357281291, + "learning_rate": 4.561290921243355e-09, + "loss": 0.2557, + "step": 21565 + }, + { + "epoch": 0.99, + "grad_norm": 0.5249777831643306, + "learning_rate": 4.516466155918276e-09, + "loss": 0.301, + "step": 21566 + }, + { + "epoch": 0.99, + "grad_norm": 0.3660895893222812, + "learning_rate": 4.471862680964778e-09, + "loss": 0.2303, + "step": 21567 + }, + { + "epoch": 0.99, + "grad_norm": 0.36306604770389955, + "learning_rate": 4.427480497369852e-09, + "loss": 0.2895, + "step": 21568 + }, + { + "epoch": 0.99, + "grad_norm": 0.45055986115795227, + "learning_rate": 4.383319606117153e-09, + "loss": 0.2642, + "step": 21569 + }, + { + "epoch": 0.99, + "grad_norm": 0.7234005571377594, + "learning_rate": 4.33938000818368e-09, + "loss": 0.2131, + "step": 21570 + }, + { + "epoch": 0.99, + "grad_norm": 0.2567419376302108, + "learning_rate": 4.2956617045419865e-09, + "loss": 0.2408, + "step": 21571 + }, + { + "epoch": 0.99, + "grad_norm": 0.42850693458520744, + "learning_rate": 4.252164696161298e-09, + "loss": 0.2665, + "step": 21572 + }, + { + "epoch": 0.99, + "grad_norm": 0.4938350227972083, + "learning_rate": 4.208888984003068e-09, + "loss": 0.2695, + "step": 21573 + }, + { + "epoch": 0.99, + "grad_norm": 1.6190327224947545, + "learning_rate": 4.165834569026528e-09, + "loss": 0.5385, + "step": 21574 + }, + { + "epoch": 0.99, + "grad_norm": 0.4928285194951692, + "learning_rate": 4.123001452183139e-09, + "loss": 0.3384, + "step": 21575 + }, + { + "epoch": 0.99, + "grad_norm": 0.2874082242426235, + "learning_rate": 4.0803896344232545e-09, + "loss": 0.2138, + "step": 21576 + }, + { + "epoch": 0.99, + "grad_norm": 0.4060456345182681, + "learning_rate": 4.037999116689451e-09, + "loss": 0.254, + "step": 21577 + }, + { + "epoch": 0.99, + "grad_norm": 0.371309652995185, + "learning_rate": 3.995829899918757e-09, + "loss": 0.2061, + "step": 21578 + }, + { + "epoch": 0.99, + "grad_norm": 0.32730304052825715, + "learning_rate": 3.953881985047092e-09, + "loss": 0.2616, + "step": 21579 + }, + { + "epoch": 0.99, + "grad_norm": 0.5294288356817469, + "learning_rate": 3.912155373002602e-09, + "loss": 0.2636, + "step": 21580 + }, + { + "epoch": 0.99, + "grad_norm": 0.8019539014481959, + "learning_rate": 3.8706500647078814e-09, + "loss": 0.2732, + "step": 21581 + }, + { + "epoch": 0.99, + "grad_norm": 0.40061723361616414, + "learning_rate": 3.829366061083306e-09, + "loss": 0.2636, + "step": 21582 + }, + { + "epoch": 0.99, + "grad_norm": 0.35489475647924795, + "learning_rate": 3.788303363041479e-09, + "loss": 0.241, + "step": 21583 + }, + { + "epoch": 0.99, + "grad_norm": 0.4261941466040029, + "learning_rate": 3.747461971492783e-09, + "loss": 0.2365, + "step": 21584 + }, + { + "epoch": 0.99, + "grad_norm": 0.4259577061041173, + "learning_rate": 3.7068418873398293e-09, + "loss": 0.3338, + "step": 21585 + }, + { + "epoch": 0.99, + "grad_norm": 0.5368930353343816, + "learning_rate": 3.666443111484119e-09, + "loss": 0.1535, + "step": 21586 + }, + { + "epoch": 0.99, + "grad_norm": 0.30414809369924783, + "learning_rate": 3.6262656448182722e-09, + "loss": 0.2404, + "step": 21587 + }, + { + "epoch": 0.99, + "grad_norm": 0.4065528642939639, + "learning_rate": 3.586309488231576e-09, + "loss": 0.2684, + "step": 21588 + }, + { + "epoch": 0.99, + "grad_norm": 0.5523473273685721, + "learning_rate": 3.5465746426099902e-09, + "loss": 0.267, + "step": 21589 + }, + { + "epoch": 0.99, + "grad_norm": 0.4143058538191836, + "learning_rate": 3.5070611088317e-09, + "loss": 0.2534, + "step": 21590 + }, + { + "epoch": 0.99, + "grad_norm": 0.4093892735673874, + "learning_rate": 3.4677688877737812e-09, + "loss": 0.3019, + "step": 21591 + }, + { + "epoch": 0.99, + "grad_norm": 0.24394009469562805, + "learning_rate": 3.4286979803033193e-09, + "loss": 0.1983, + "step": 21592 + }, + { + "epoch": 0.99, + "grad_norm": 1.0644738864111938, + "learning_rate": 3.3898483872873976e-09, + "loss": 0.1322, + "step": 21593 + }, + { + "epoch": 0.99, + "grad_norm": 0.3557023150661273, + "learning_rate": 3.351220109585329e-09, + "loss": 0.2762, + "step": 21594 + }, + { + "epoch": 0.99, + "grad_norm": 0.40290461880223555, + "learning_rate": 3.3128131480519856e-09, + "loss": 0.2969, + "step": 21595 + }, + { + "epoch": 0.99, + "grad_norm": 0.5063733834187837, + "learning_rate": 3.2746275035377972e-09, + "loss": 0.1325, + "step": 21596 + }, + { + "epoch": 0.99, + "grad_norm": 0.37482864600818017, + "learning_rate": 3.236663176889865e-09, + "loss": 0.294, + "step": 21597 + }, + { + "epoch": 0.99, + "grad_norm": 0.2842164375955541, + "learning_rate": 3.1989201689452964e-09, + "loss": 0.1542, + "step": 21598 + }, + { + "epoch": 0.99, + "grad_norm": 0.35699383052726724, + "learning_rate": 3.1613984805423105e-09, + "loss": 0.2346, + "step": 21599 + }, + { + "epoch": 0.99, + "grad_norm": 0.3581259809965335, + "learning_rate": 3.1240981125113535e-09, + "loss": 0.2795, + "step": 21600 + }, + { + "epoch": 0.99, + "grad_norm": 0.9199713566271871, + "learning_rate": 3.0870190656773214e-09, + "loss": 0.4416, + "step": 21601 + }, { "epoch": 0.99, - "grad_norm": 0.3629011642961109, - "learning_rate": 4.6566751753163166e-09, - "loss": 0.2429, - "step": 17241 + "grad_norm": 0.7487161676811744, + "learning_rate": 3.050161340861779e-09, + "loss": 0.1641, + "step": 21602 }, { "epoch": 0.99, - "grad_norm": 0.455365093725818, - "learning_rate": 4.600063931002874e-09, - "loss": 0.2451, - "step": 17242 + "grad_norm": 0.3206002966688541, + "learning_rate": 3.013524938880741e-09, + "loss": 0.2771, + "step": 21603 }, { "epoch": 0.99, - "grad_norm": 0.336152325585062, - "learning_rate": 4.543798826959211e-09, - "loss": 0.2254, - "step": 17243 + "grad_norm": 0.3326284789686114, + "learning_rate": 2.977109860544669e-09, + "loss": 0.2299, + "step": 21604 }, { "epoch": 0.99, - "grad_norm": 0.3599515795554048, - "learning_rate": 4.4878798651337705e-09, - "loss": 0.2767, - "step": 17244 + "grad_norm": 0.5048964944722827, + "learning_rate": 2.940916106659586e-09, + "loss": 0.2576, + "step": 21605 }, { "epoch": 0.99, - "grad_norm": 0.9079207839441679, - "learning_rate": 4.4323070474638906e-09, - "loss": 0.4227, - "step": 17245 + "grad_norm": 0.36812986538371545, + "learning_rate": 2.9049436780281825e-09, + "loss": 0.214, + "step": 21606 }, { "epoch": 0.99, - "grad_norm": 0.3120814844657969, - "learning_rate": 4.377080375873588e-09, - "loss": 0.2139, - "step": 17246 + "grad_norm": 0.3816476173484007, + "learning_rate": 2.8691925754453785e-09, + "loss": 0.2951, + "step": 21607 }, { "epoch": 0.99, - "grad_norm": 0.2816458631063591, - "learning_rate": 4.322199852274667e-09, - "loss": 0.2005, - "step": 17247 + "grad_norm": 0.9341432930965753, + "learning_rate": 2.8336627997038735e-09, + "loss": 0.3964, + "step": 21608 }, { "epoch": 0.99, - "grad_norm": 0.34555457410158374, - "learning_rate": 4.267665478567829e-09, - "loss": 0.2776, - "step": 17248 + "grad_norm": 0.33102507118690233, + "learning_rate": 2.7983543515897048e-09, + "loss": 0.2021, + "step": 21609 }, { "epoch": 0.99, - "grad_norm": 0.3508270629408411, - "learning_rate": 4.213477256642673e-09, - "loss": 0.22, - "step": 17249 + "grad_norm": 0.3180966371696221, + "learning_rate": 2.76326723188447e-09, + "loss": 0.2016, + "step": 21610 }, { "epoch": 0.99, - "grad_norm": 0.8936437076660343, - "learning_rate": 4.159635188375477e-09, - "loss": 0.3986, - "step": 17250 + "grad_norm": 0.37614340133159213, + "learning_rate": 2.7284014413642144e-09, + "loss": 0.2891, + "step": 21611 }, { "epoch": 0.99, - "grad_norm": 1.2272218348029869, - "learning_rate": 4.106139275629195e-09, - "loss": 0.6651, - "step": 17251 + "grad_norm": 0.31408486227283594, + "learning_rate": 2.693756980802764e-09, + "loss": 0.2105, + "step": 21612 }, { "epoch": 0.99, - "grad_norm": 0.22314666519834417, - "learning_rate": 4.0529895202579e-09, - "loss": 0.2085, - "step": 17252 + "grad_norm": 1.5666803004603664, + "learning_rate": 2.6593338509650623e-09, + "loss": 0.4198, + "step": 21613 }, { "epoch": 0.99, - "grad_norm": 1.4471815041901033, - "learning_rate": 4.00018592410123e-09, - "loss": 0.6434, - "step": 17253 + "grad_norm": 1.30172625271836, + "learning_rate": 2.6251320526149427e-09, + "loss": 0.8052, + "step": 21614 }, { "epoch": 0.99, - "grad_norm": 0.3906800807951317, - "learning_rate": 3.947728488988833e-09, - "loss": 0.2578, - "step": 17254 + "grad_norm": 0.25023752532296084, + "learning_rate": 2.591151586508467e-09, + "loss": 0.2135, + "step": 21615 }, { "epoch": 0.99, - "grad_norm": 0.2559969138747373, - "learning_rate": 3.895617216735925e-09, - "loss": 0.199, - "step": 17255 + "grad_norm": 0.982252797759143, + "learning_rate": 2.5573924533983664e-09, + "loss": 0.4262, + "step": 21616 }, { "epoch": 0.99, - "grad_norm": 0.3663610961053545, - "learning_rate": 3.843852109148838e-09, - "loss": 0.2337, - "step": 17256 + "grad_norm": 0.4040939107815881, + "learning_rate": 2.523854654031821e-09, + "loss": 0.246, + "step": 21617 }, { "epoch": 0.99, - "grad_norm": 1.1749116983977523, - "learning_rate": 3.792433168019471e-09, - "loss": 0.7044, - "step": 17257 + "grad_norm": 0.24627529001706758, + "learning_rate": 2.49053818915157e-09, + "loss": 0.2031, + "step": 21618 }, { "epoch": 0.99, - "grad_norm": 0.32863069988556126, - "learning_rate": 3.741360395127513e-09, - "loss": 0.2506, - "step": 17258 + "grad_norm": 0.3905765089336634, + "learning_rate": 2.4574430594948016e-09, + "loss": 0.2399, + "step": 21619 }, { "epoch": 0.99, - "grad_norm": 0.7208685948356803, - "learning_rate": 3.6906337922426593e-09, - "loss": 0.2665, - "step": 17259 + "grad_norm": 1.1977269485675561, + "learning_rate": 2.4245692657942633e-09, + "loss": 0.6318, + "step": 21620 }, { "epoch": 0.99, - "grad_norm": 0.3628388138696765, - "learning_rate": 3.640253361121282e-09, - "loss": 0.309, - "step": 17260 + "grad_norm": 0.33922133935384097, + "learning_rate": 2.3919168087782607e-09, + "loss": 0.2594, + "step": 21621 }, { "epoch": 0.99, - "grad_norm": 0.36290377979163024, - "learning_rate": 3.590219103508652e-09, - "loss": 0.2359, - "step": 17261 + "grad_norm": 0.8376659865420893, + "learning_rate": 2.3594856891695493e-09, + "loss": 0.2451, + "step": 21622 }, { "epoch": 0.99, - "grad_norm": 0.3023237550567913, - "learning_rate": 3.540531021135607e-09, - "loss": 0.1374, - "step": 17262 + "grad_norm": 0.33234190090878546, + "learning_rate": 2.3272759076864437e-09, + "loss": 0.2819, + "step": 21623 }, { "epoch": 0.99, - "grad_norm": 0.3912028186810661, - "learning_rate": 3.491189115725213e-09, - "loss": 0.3087, - "step": 17263 + "grad_norm": 0.38347436208434454, + "learning_rate": 2.2952874650405964e-09, + "loss": 0.2674, + "step": 21624 }, { "epoch": 0.99, - "grad_norm": 0.3489042396908763, - "learning_rate": 3.4421933889849936e-09, - "loss": 0.2758, - "step": 17264 + "grad_norm": 0.39807186990993204, + "learning_rate": 2.2635203619414405e-09, + "loss": 0.0992, + "step": 21625 }, { "epoch": 0.99, - "grad_norm": 0.3906870321219293, - "learning_rate": 3.3935438426113687e-09, - "loss": 0.262, - "step": 17265 + "grad_norm": 0.4417410141708921, + "learning_rate": 2.2319745990928566e-09, + "loss": 0.3349, + "step": 21626 }, { "epoch": 0.99, - "grad_norm": 0.9233473327277593, - "learning_rate": 3.3452404782896577e-09, - "loss": 0.4265, - "step": 17266 + "grad_norm": 0.35693221833620314, + "learning_rate": 2.200650177190955e-09, + "loss": 0.2532, + "step": 21627 }, { "epoch": 0.99, - "grad_norm": 0.2371891988216668, - "learning_rate": 3.2972832976918557e-09, - "loss": 0.1978, - "step": 17267 + "grad_norm": 0.4528213277687044, + "learning_rate": 2.1695470969318454e-09, + "loss": 0.2608, + "step": 21628 }, { "epoch": 0.99, - "grad_norm": 0.26164169308525725, - "learning_rate": 3.2496723024799672e-09, - "loss": 0.2508, - "step": 17268 + "grad_norm": 0.9343348367527564, + "learning_rate": 2.138665359002756e-09, + "loss": 0.4589, + "step": 21629 }, { "epoch": 0.99, - "grad_norm": 1.0699215886321223, - "learning_rate": 3.2024074943015626e-09, - "loss": 0.4488, - "step": 17269 + "grad_norm": 0.25642444842145096, + "learning_rate": 2.108004964086474e-09, + "loss": 0.1975, + "step": 21630 }, { "epoch": 0.99, - "grad_norm": 0.3099846689515529, - "learning_rate": 3.1554888747942213e-09, - "loss": 0.2469, - "step": 17270 + "grad_norm": 0.2790452139464055, + "learning_rate": 2.077565912863566e-09, + "loss": 0.2272, + "step": 21631 }, { "epoch": 0.99, - "grad_norm": 0.6191979829959496, - "learning_rate": 3.10891644558331e-09, - "loss": 0.3591, - "step": 17271 + "grad_norm": 1.0958564138246014, + "learning_rate": 2.0473482060079375e-09, + "loss": 0.4654, + "step": 21632 }, { "epoch": 0.99, - "grad_norm": 0.34374932351710824, - "learning_rate": 3.0626902082797615e-09, - "loss": 0.2373, - "step": 17272 + "grad_norm": 0.3397165909891794, + "learning_rate": 2.0173518441868324e-09, + "loss": 0.2546, + "step": 21633 }, { "epoch": 0.99, - "grad_norm": 0.34319801121283044, - "learning_rate": 3.0168101644845183e-09, - "loss": 0.2517, - "step": 17273 + "grad_norm": 0.6078773316892453, + "learning_rate": 1.9875768280663843e-09, + "loss": 0.3436, + "step": 21634 }, { "epoch": 0.99, - "grad_norm": 0.38067607576018514, - "learning_rate": 2.9712763157885293e-09, - "loss": 0.1608, - "step": 17274 + "grad_norm": 0.3808509143965247, + "learning_rate": 1.9580231583038457e-09, + "loss": 0.24, + "step": 21635 }, { "epoch": 0.99, - "grad_norm": 0.3449780354209424, - "learning_rate": 2.9260886637672014e-09, - "loss": 0.2681, - "step": 17275 + "grad_norm": 0.35517559597608905, + "learning_rate": 1.928690835555358e-09, + "loss": 0.2388, + "step": 21636 }, { "epoch": 0.99, - "grad_norm": 0.30939155012806313, - "learning_rate": 2.881247209984839e-09, - "loss": 0.2485, - "step": 17276 + "grad_norm": 0.3595316387492313, + "learning_rate": 1.8995798604681813e-09, + "loss": 0.1653, + "step": 21637 }, { "epoch": 0.99, - "grad_norm": 0.6205897551993651, - "learning_rate": 2.8367519559957537e-09, - "loss": 0.3882, - "step": 17277 + "grad_norm": 0.349162835841552, + "learning_rate": 1.8706902336884656e-09, + "loss": 0.2648, + "step": 21638 }, { "epoch": 0.99, - "grad_norm": 1.1477515882330112, - "learning_rate": 2.792602903339825e-09, - "loss": 0.2529, - "step": 17278 + "grad_norm": 0.33474948582430675, + "learning_rate": 1.8420219558556995e-09, + "loss": 0.2512, + "step": 21639 }, { "epoch": 0.99, - "grad_norm": 0.38265407718892425, - "learning_rate": 2.7488000535458303e-09, - "loss": 0.2418, - "step": 17279 + "grad_norm": 0.6328446175820863, + "learning_rate": 1.81357502760382e-09, + "loss": 0.3557, + "step": 21640 }, { "epoch": 0.99, - "grad_norm": 0.19879501472534586, - "learning_rate": 2.7053434081314447e-09, - "loss": 0.2037, - "step": 17280 + "grad_norm": 0.5537088921641299, + "learning_rate": 1.7853494495634338e-09, + "loss": 0.1492, + "step": 21641 }, { "epoch": 0.99, - "grad_norm": 0.9142794037777149, - "learning_rate": 2.6622329686010196e-09, - "loss": 0.4647, - "step": 17281 + "grad_norm": 0.47202978151328984, + "learning_rate": 1.7573452223584865e-09, + "loss": 0.285, + "step": 21642 }, { "epoch": 0.99, - "grad_norm": 0.28941532117602325, - "learning_rate": 2.619468736446695e-09, - "loss": 0.1886, - "step": 17282 + "grad_norm": 0.2605883340992969, + "learning_rate": 1.7295623466107026e-09, + "loss": 0.2321, + "step": 21643 }, { "epoch": 0.99, - "grad_norm": 0.6324264222405012, - "learning_rate": 2.5770507131517297e-09, - "loss": 0.4013, - "step": 17283 + "grad_norm": 0.9993597437921435, + "learning_rate": 1.7020008229329255e-09, + "loss": 0.4501, + "step": 21644 }, { "epoch": 0.99, - "grad_norm": 0.328680654110541, - "learning_rate": 2.5349789001827274e-09, - "loss": 0.2716, - "step": 17284 + "grad_norm": 0.306825167228871, + "learning_rate": 1.6746606519357777e-09, + "loss": 0.1865, + "step": 21645 }, { "epoch": 0.99, - "grad_norm": 0.2842382251427557, - "learning_rate": 2.4932532989974113e-09, - "loss": 0.1838, - "step": 17285 + "grad_norm": 0.4456645673173325, + "learning_rate": 1.6475418342265515e-09, + "loss": 0.328, + "step": 21646 }, { "epoch": 0.99, - "grad_norm": 0.32087338368593543, - "learning_rate": 2.4518739110412913e-09, - "loss": 0.1578, - "step": 17286 + "grad_norm": 0.5685285193969339, + "learning_rate": 1.6206443704036568e-09, + "loss": 0.3143, + "step": 21647 }, { "epoch": 0.99, - "grad_norm": 0.46121708923217086, - "learning_rate": 2.410840737746556e-09, - "loss": 0.3881, - "step": 17287 + "grad_norm": 0.3350194777285541, + "learning_rate": 1.5939682610621732e-09, + "loss": 0.1778, + "step": 21648 }, { "epoch": 0.99, - "grad_norm": 0.25180801069509645, - "learning_rate": 2.37015378053429e-09, - "loss": 0.2123, - "step": 17288 + "grad_norm": 0.38565327715846165, + "learning_rate": 1.56751350679496e-09, + "loss": 0.2076, + "step": 21649 }, { "epoch": 0.99, - "grad_norm": 0.6193436792258021, - "learning_rate": 2.329813040814477e-09, - "loss": 0.3735, - "step": 17289 + "grad_norm": 0.49259567506199814, + "learning_rate": 1.5412801081859941e-09, + "loss": 0.3374, + "step": 21650 }, { "epoch": 0.99, - "grad_norm": 1.283115078097724, - "learning_rate": 2.289818519982667e-09, - "loss": 0.4848, - "step": 17290 + "grad_norm": 0.2822211063300327, + "learning_rate": 1.5152680658159225e-09, + "loss": 0.2118, + "step": 21651 }, { "epoch": 0.99, - "grad_norm": 0.2806743140172014, - "learning_rate": 2.2501702194244192e-09, - "loss": 0.1837, - "step": 17291 + "grad_norm": 0.7308099899489343, + "learning_rate": 1.489477380262061e-09, + "loss": 0.4148, + "step": 21652 }, { "epoch": 0.99, - "grad_norm": 0.28516428759808665, - "learning_rate": 2.2108681405141885e-09, - "loss": 0.2424, - "step": 17292 + "grad_norm": 1.615600908726532, + "learning_rate": 1.4639080520939541e-09, + "loss": 0.6796, + "step": 21653 }, { "epoch": 0.99, - "grad_norm": 0.38473609798441183, - "learning_rate": 2.1719122846097783e-09, - "loss": 0.2753, - "step": 17293 + "grad_norm": 0.3510468935890721, + "learning_rate": 1.4385600818778156e-09, + "loss": 0.2857, + "step": 21654 }, { "epoch": 0.99, - "grad_norm": 0.31851548707689087, - "learning_rate": 2.1333026530634403e-09, - "loss": 0.238, - "step": 17294 + "grad_norm": 0.2883813521152375, + "learning_rate": 1.4134334701754182e-09, + "loss": 0.2042, + "step": 21655 }, { "epoch": 0.99, - "grad_norm": 1.181637073888693, - "learning_rate": 2.0950392472107726e-09, - "loss": 0.3125, - "step": 17295 + "grad_norm": 0.480955536926555, + "learning_rate": 1.388528217544094e-09, + "loss": 0.2676, + "step": 21656 }, { "epoch": 0.99, - "grad_norm": 0.3795736909128083, - "learning_rate": 2.0571220683762717e-09, - "loss": 0.2708, - "step": 17296 + "grad_norm": 0.330343129049868, + "learning_rate": 1.363844324532293e-09, + "loss": 0.2492, + "step": 21657 }, { "epoch": 0.99, - "grad_norm": 0.6764029907443159, - "learning_rate": 2.019551117874441e-09, - "loss": 0.3151, - "step": 17297 + "grad_norm": 0.9936849173522314, + "learning_rate": 1.3393817916895756e-09, + "loss": 0.3374, + "step": 21658 }, { "epoch": 0.99, - "grad_norm": 0.200451458051388, - "learning_rate": 1.9823263970042416e-09, - "loss": 0.1499, - "step": 17298 + "grad_norm": 0.4541444252575229, + "learning_rate": 1.3151406195544002e-09, + "loss": 0.3139, + "step": 21659 }, { - "epoch": 0.99, - "grad_norm": 0.34620613205315875, - "learning_rate": 1.9454479070579735e-09, - "loss": 0.2849, - "step": 17299 + "epoch": 1.0, + "grad_norm": 0.7146052675798962, + "learning_rate": 1.2911208086663351e-09, + "loss": 0.3306, + "step": 21660 }, { - "epoch": 0.99, - "grad_norm": 0.5328903980935618, - "learning_rate": 1.9089156493101722e-09, - "loss": 0.3001, - "step": 17300 + "epoch": 1.0, + "grad_norm": 0.22350510364993992, + "learning_rate": 1.267322359556067e-09, + "loss": 0.1526, + "step": 21661 }, { - "epoch": 0.99, - "grad_norm": 0.342820967041687, - "learning_rate": 1.8727296250264924e-09, - "loss": 0.2557, - "step": 17301 + "epoch": 1.0, + "grad_norm": 0.3449199083898967, + "learning_rate": 1.2437452727498412e-09, + "loss": 0.3104, + "step": 21662 }, { - "epoch": 0.99, - "grad_norm": 1.1405798965958176, - "learning_rate": 1.8368898354603759e-09, - "loss": 0.4979, - "step": 17302 + "epoch": 1.0, + "grad_norm": 0.6527753256633516, + "learning_rate": 1.220389548770573e-09, + "loss": 0.3164, + "step": 21663 }, { - "epoch": 0.99, - "grad_norm": 0.3883494449017486, - "learning_rate": 1.8013962818530516e-09, - "loss": 0.273, - "step": 17303 + "epoch": 1.0, + "grad_norm": 0.40491874997345795, + "learning_rate": 1.1972551881345162e-09, + "loss": 0.2483, + "step": 21664 }, { - "epoch": 0.99, - "grad_norm": 0.21653975646538426, - "learning_rate": 1.7662489654324267e-09, - "loss": 0.206, - "step": 17304 + "epoch": 1.0, + "grad_norm": 1.886718547954293, + "learning_rate": 1.1743421913545938e-09, + "loss": 0.5242, + "step": 21665 }, { - "epoch": 0.99, - "grad_norm": 0.7855936171737142, - "learning_rate": 1.7314478874175255e-09, - "loss": 0.406, - "step": 17305 + "epoch": 1.0, + "grad_norm": 0.42985751505636405, + "learning_rate": 1.1516505589381777e-09, + "loss": 0.2723, + "step": 21666 }, { - "epoch": 0.99, - "grad_norm": 0.43869514092740386, - "learning_rate": 1.6969930490129406e-09, - "loss": 0.2855, - "step": 17306 + "epoch": 1.0, + "grad_norm": 0.23350890288194356, + "learning_rate": 1.1291802913859784e-09, + "loss": 0.2226, + "step": 21667 }, { - "epoch": 0.99, - "grad_norm": 0.2802090575204732, - "learning_rate": 1.662884451411051e-09, - "loss": 0.2516, - "step": 17307 + "epoch": 1.0, + "grad_norm": 0.7039442467593681, + "learning_rate": 1.1069313891975964e-09, + "loss": 0.2848, + "step": 21668 }, { - "epoch": 0.99, - "grad_norm": 0.5347954446359646, - "learning_rate": 1.6291220957942443e-09, - "loss": 0.23, - "step": 17308 + "epoch": 1.0, + "grad_norm": 0.43966574829754296, + "learning_rate": 1.0849038528648603e-09, + "loss": 0.2676, + "step": 21669 }, { - "epoch": 0.99, - "grad_norm": 0.43073846683388817, - "learning_rate": 1.5957059833293653e-09, - "loss": 0.2637, - "step": 17309 + "epoch": 1.0, + "grad_norm": 0.2870216514598382, + "learning_rate": 1.0630976828740481e-09, + "loss": 0.2509, + "step": 21670 }, { - "epoch": 0.99, - "grad_norm": 0.5787939035509605, - "learning_rate": 1.5626361151765967e-09, - "loss": 0.3245, - "step": 17310 + "epoch": 1.0, + "grad_norm": 0.5169265317743014, + "learning_rate": 1.0415128797103268e-09, + "loss": 0.1967, + "step": 21671 }, { - "epoch": 0.99, - "grad_norm": 0.30066704859583493, - "learning_rate": 1.5299124924794684e-09, - "loss": 0.2534, - "step": 17311 + "epoch": 1.0, + "grad_norm": 0.4754034371688888, + "learning_rate": 1.0201494438499825e-09, + "loss": 0.2529, + "step": 21672 }, { - "epoch": 0.99, - "grad_norm": 0.3953423827246001, - "learning_rate": 1.497535116371518e-09, - "loss": 0.2949, - "step": 17312 + "epoch": 1.0, + "grad_norm": 0.3493277251201268, + "learning_rate": 9.990073757670804e-10, + "loss": 0.2248, + "step": 21673 }, { - "epoch": 0.99, - "grad_norm": 0.5339570681109478, - "learning_rate": 1.4655039879740706e-09, - "loss": 0.3517, - "step": 17313 + "epoch": 1.0, + "grad_norm": 0.33316429168516826, + "learning_rate": 9.78086675927914e-10, + "loss": 0.2603, + "step": 21674 }, { - "epoch": 0.99, - "grad_norm": 0.2465958574137639, - "learning_rate": 1.4338191083962394e-09, - "loss": 0.0919, - "step": 17314 + "epoch": 1.0, + "grad_norm": 0.3820858136520356, + "learning_rate": 9.573873447976667e-10, + "loss": 0.2625, + "step": 21675 }, { - "epoch": 0.99, - "grad_norm": 0.3375883743148337, - "learning_rate": 1.4024804787349244e-09, - "loss": 0.2508, - "step": 17315 + "epoch": 1.0, + "grad_norm": 1.360922936304401, + "learning_rate": 9.369093828326403e-10, + "loss": 0.7765, + "step": 21676 }, { - "epoch": 0.99, - "grad_norm": 0.34858518129319876, - "learning_rate": 1.371488100075924e-09, - "loss": 0.2834, - "step": 17316 + "epoch": 1.0, + "grad_norm": 0.5293869206914182, + "learning_rate": 9.166527904880263e-10, + "loss": 0.096, + "step": 21677 }, { - "epoch": 0.99, - "grad_norm": 0.5732830362171144, - "learning_rate": 1.3408419734928235e-09, - "loss": 0.2936, - "step": 17317 + "epoch": 1.0, + "grad_norm": 0.3845172999466061, + "learning_rate": 8.966175682112443e-10, + "loss": 0.2632, + "step": 21678 }, { "epoch": 1.0, - "grad_norm": 0.5490046678633443, - "learning_rate": 1.3105421000458861e-09, - "loss": 0.3244, - "step": 17318 + "grad_norm": 0.353691667508306, + "learning_rate": 8.768037164463838e-10, + "loss": 0.2678, + "step": 21679 }, { "epoch": 1.0, - "grad_norm": 0.25973593846651, - "learning_rate": 1.280588480785383e-09, - "loss": 0.2386, - "step": 17319 + "grad_norm": 0.6699491706264941, + "learning_rate": 8.572112356308726e-10, + "loss": 0.3688, + "step": 21680 }, { "epoch": 1.0, - "grad_norm": 0.40068654789339847, - "learning_rate": 1.2509811167482622e-09, - "loss": 0.2308, - "step": 17320 + "grad_norm": 0.35179555512076083, + "learning_rate": 8.37840126199918e-10, + "loss": 0.1502, + "step": 21681 }, { "epoch": 1.0, - "grad_norm": 0.27862506869394177, - "learning_rate": 1.2217200089592596e-09, - "loss": 0.0688, - "step": 17321 + "grad_norm": 0.2659674413129796, + "learning_rate": 8.186903885820663e-10, + "loss": 0.2309, + "step": 21682 }, { "epoch": 1.0, - "grad_norm": 0.3646002178721917, - "learning_rate": 1.192805158432009e-09, - "loss": 0.2923, - "step": 17322 + "grad_norm": 0.4283630816635962, + "learning_rate": 7.997620232014225e-10, + "loss": 0.2375, + "step": 21683 }, { "epoch": 1.0, - "grad_norm": 0.4507387100999731, - "learning_rate": 1.164236566167931e-09, - "loss": 0.3267, - "step": 17323 + "grad_norm": 0.6819758872490643, + "learning_rate": 7.810550304754305e-10, + "loss": 0.15, + "step": 21684 }, { "epoch": 1.0, - "grad_norm": 0.2687264725141766, - "learning_rate": 1.1360142331562351e-09, - "loss": 0.2057, - "step": 17324 + "grad_norm": 0.3794621562298256, + "learning_rate": 7.62569410820424e-10, + "loss": 0.298, + "step": 21685 }, { "epoch": 1.0, - "grad_norm": 0.35005883485614064, - "learning_rate": 1.1081381603750275e-09, - "loss": 0.2811, - "step": 17325 + "grad_norm": 0.5583109562539721, + "learning_rate": 7.44305164644965e-10, + "loss": 0.3448, + "step": 21686 }, { "epoch": 1.0, - "grad_norm": 0.47338451847770335, - "learning_rate": 1.0806083487890917e-09, - "loss": 0.2107, - "step": 17326 + "grad_norm": 0.3184933885882249, + "learning_rate": 7.262622923531748e-10, + "loss": 0.2265, + "step": 21687 }, { "epoch": 1.0, - "grad_norm": 0.25788576820285475, - "learning_rate": 1.0534247993509994e-09, - "loss": 0.2088, - "step": 17327 + "grad_norm": 0.34309846354109635, + "learning_rate": 7.084407943436233e-10, + "loss": 0.26, + "step": 21688 }, { "epoch": 1.0, - "grad_norm": 0.5201529560961416, - "learning_rate": 1.0265875130033298e-09, - "loss": 0.3249, - "step": 17328 + "grad_norm": 0.4512031508504746, + "learning_rate": 6.908406710126603e-10, + "loss": 0.1894, + "step": 21689 }, { "epoch": 1.0, - "grad_norm": 0.7291318527937328, - "learning_rate": 1.0000964906753396e-09, - "loss": 0.3989, - "step": 17329 + "grad_norm": 0.2727539675440686, + "learning_rate": 6.734619227488637e-10, + "loss": 0.1983, + "step": 21690 }, { "epoch": 1.0, - "grad_norm": 0.6160151881883443, - "learning_rate": 9.739517332829628e-10, - "loss": 0.3685, - "step": 17330 + "grad_norm": 0.5111901520870467, + "learning_rate": 6.563045499363707e-10, + "loss": 0.3125, + "step": 21691 }, { "epoch": 1.0, - "grad_norm": 0.2861802045422825, - "learning_rate": 9.481532417332518e-10, - "loss": 0.2201, - "step": 17331 + "grad_norm": 0.7090908417222125, + "learning_rate": 6.393685529570981e-10, + "loss": 0.3828, + "step": 21692 }, { "epoch": 1.0, - "grad_norm": 0.33069292196442907, - "learning_rate": 9.227010169188256e-10, - "loss": 0.1932, - "step": 17332 + "grad_norm": 0.5656748785435641, + "learning_rate": 6.226539321840808e-10, + "loss": 0.2984, + "step": 21693 }, { "epoch": 1.0, - "grad_norm": 0.5335904809825592, - "learning_rate": 8.975950597212014e-10, - "loss": 0.318, - "step": 17333 + "grad_norm": 0.31245024392137855, + "learning_rate": 6.061606879881333e-10, + "loss": 0.2294, + "step": 21694 }, { "epoch": 1.0, - "grad_norm": 0.34095909495518917, - "learning_rate": 8.728353710107939e-10, - "loss": 0.217, - "step": 17334 + "grad_norm": 0.2848229747875986, + "learning_rate": 5.898888207334086e-10, + "loss": 0.2099, + "step": 21695 }, { "epoch": 1.0, - "grad_norm": 0.3489415469794577, - "learning_rate": 8.484219516435854e-10, - "loss": 0.3089, - "step": 17335 + "grad_norm": 0.5948077828725327, + "learning_rate": 5.738383307818396e-10, + "loss": 0.3165, + "step": 21696 }, { "epoch": 1.0, - "grad_norm": 0.77950741238855, - "learning_rate": 8.243548024655656e-10, - "loss": 0.4937, - "step": 17336 + "grad_norm": 0.36176047065746203, + "learning_rate": 5.580092184875874e-10, + "loss": 0.2427, + "step": 21697 }, { "epoch": 1.0, - "grad_norm": 0.3601096759079263, - "learning_rate": 8.006339243094019e-10, - "loss": 0.2173, - "step": 17337 + "grad_norm": 0.3716996756971005, + "learning_rate": 5.424014842014824e-10, + "loss": 0.3039, + "step": 21698 }, { "epoch": 1.0, - "grad_norm": 0.295337610488898, - "learning_rate": 7.772593179977694e-10, - "loss": 0.1954, - "step": 17338 + "grad_norm": 0.8645350010365502, + "learning_rate": 5.270151282688041e-10, + "loss": 0.4891, + "step": 21699 }, { "epoch": 1.0, - "grad_norm": 0.30542651027800183, - "learning_rate": 7.542309843400209e-10, - "loss": 0.2761, - "step": 17339 + "grad_norm": 0.3654841280977617, + "learning_rate": 5.118501510303909e-10, + "loss": 0.203, + "step": 21700 }, { "epoch": 1.0, - "grad_norm": 0.31871131623653226, - "learning_rate": 7.315489241332963e-10, - "loss": 0.2258, - "step": 17340 + "grad_norm": 0.30528617381241235, + "learning_rate": 4.969065528226402e-10, + "loss": 0.159, + "step": 21701 }, { "epoch": 1.0, - "grad_norm": 1.4402612913254227, - "learning_rate": 7.092131381625233e-10, - "loss": 0.71, - "step": 17341 + "grad_norm": 0.34838076773590904, + "learning_rate": 4.821843339752885e-10, + "loss": 0.2594, + "step": 21702 }, { "epoch": 1.0, - "grad_norm": 0.8675495422620723, - "learning_rate": 6.872236272026378e-10, - "loss": 0.4088, - "step": 17342 + "grad_norm": 0.3750308099579273, + "learning_rate": 4.676834948147413e-10, + "loss": 0.2256, + "step": 21703 }, { "epoch": 1.0, - "grad_norm": 0.2562096236525775, - "learning_rate": 6.655803920130322e-10, - "loss": 0.2502, - "step": 17343 + "grad_norm": 1.5477096133337906, + "learning_rate": 4.53404035661853e-10, + "loss": 0.7436, + "step": 21704 }, { "epoch": 1.0, - "grad_norm": 0.3887523928164326, - "learning_rate": 6.442834333453274e-10, - "loss": 0.1314, - "step": 17344 + "grad_norm": 0.869875546062393, + "learning_rate": 4.3934595683303716e-10, + "loss": 0.4606, + "step": 21705 }, { "epoch": 1.0, - "grad_norm": 0.49486610459972546, - "learning_rate": 6.233327519356014e-10, - "loss": 0.3164, - "step": 17345 + "grad_norm": 0.2545067793607434, + "learning_rate": 4.2550925863915624e-10, + "loss": 0.2543, + "step": 21706 }, { "epoch": 1.0, - "grad_norm": 0.2790524317225934, - "learning_rate": 6.0272834850994e-10, - "loss": 0.2222, - "step": 17346 + "grad_norm": 0.3452805033008394, + "learning_rate": 4.118939413877421e-10, + "loss": 0.1098, + "step": 21707 }, { "epoch": 1.0, - "grad_norm": 0.3245395596263069, - "learning_rate": 5.824702237822167e-10, - "loss": 0.2424, - "step": 17347 + "grad_norm": 0.5360738819964151, + "learning_rate": 3.985000053785548e-10, + "loss": 0.3373, + "step": 21708 }, { "epoch": 1.0, - "grad_norm": 0.550353076174964, - "learning_rate": 5.62558378452982e-10, - "loss": 0.3918, - "step": 17348 + "grad_norm": 0.425466283467058, + "learning_rate": 3.8532745090913427e-10, + "loss": 0.3243, + "step": 21709 }, { "epoch": 1.0, - "grad_norm": 0.3946960811855935, - "learning_rate": 5.429928132127948e-10, - "loss": 0.2718, - "step": 17349 + "grad_norm": 0.3559831514711698, + "learning_rate": 3.723762782703588e-10, + "loss": 0.2533, + "step": 21710 }, { "epoch": 1.0, - "grad_norm": 0.577554881905842, - "learning_rate": 5.23773528737781e-10, - "loss": 0.289, - "step": 17350 + "grad_norm": 0.6124996975356838, + "learning_rate": 3.596464877497763e-10, + "loss": 0.385, + "step": 21711 }, { "epoch": 1.0, - "grad_norm": 0.26363339789068374, - "learning_rate": 5.049005256951845e-10, - "loss": 0.2336, - "step": 17351 + "grad_norm": 0.44268259548629474, + "learning_rate": 3.471380796282731e-10, + "loss": 0.2783, + "step": 21712 }, { "epoch": 1.0, - "grad_norm": 0.2628831020493132, - "learning_rate": 4.863738047378164e-10, - "loss": 0.2008, - "step": 17352 + "grad_norm": 0.7561702378340494, + "learning_rate": 3.348510541834049e-10, + "loss": 0.2787, + "step": 21713 }, { "epoch": 1.0, - "grad_norm": 1.316507968789768, - "learning_rate": 4.681933665084959e-10, - "loss": 0.3222, - "step": 17353 + "grad_norm": 0.2915018395642266, + "learning_rate": 3.2278541168717647e-10, + "loss": 0.2503, + "step": 21714 }, { "epoch": 1.0, - "grad_norm": 1.1554124886092956, - "learning_rate": 4.5035921163449905e-10, - "loss": 0.4016, - "step": 17354 + "grad_norm": 0.2772427119246304, + "learning_rate": 3.109411524071515e-10, + "loss": 0.1941, + "step": 21715 }, { "epoch": 1.0, - "grad_norm": 0.24058819285647187, - "learning_rate": 4.3287134073422e-10, - "loss": 0.2455, - "step": 17355 + "grad_norm": 1.4683710504748486, + "learning_rate": 2.9931827660534263e-10, + "loss": 0.3138, + "step": 21716 }, { "epoch": 1.0, - "grad_norm": 0.6174960471662688, - "learning_rate": 4.1572975441384055e-10, - "loss": 0.4182, - "step": 17356 + "grad_norm": 0.8713967156654168, + "learning_rate": 2.8791678453821135e-10, + "loss": 0.4454, + "step": 21717 }, { "epoch": 1.0, - "grad_norm": 0.3324218340581116, - "learning_rate": 3.9893445326733003e-10, - "loss": 0.1355, - "step": 17357 + "grad_norm": 0.253944387044597, + "learning_rate": 2.7673667645888856e-10, + "loss": 0.2463, + "step": 21718 }, { "epoch": 1.0, - "grad_norm": 0.26942764406218134, - "learning_rate": 3.824854378753351e-10, - "loss": 0.2026, - "step": 17358 + "grad_norm": 0.6042439103711952, + "learning_rate": 2.65777952614954e-10, + "loss": 0.3501, + "step": 21719 }, { "epoch": 1.0, - "grad_norm": 0.35914740702980735, - "learning_rate": 3.6638270880851034e-10, - "loss": 0.2896, - "step": 17359 + "grad_norm": 0.20040722879285555, + "learning_rate": 2.550406132484362e-10, + "loss": 0.0674, + "step": 21720 }, { "epoch": 1.0, - "grad_norm": 0.5455096931025434, - "learning_rate": 3.5062626662307753e-10, - "loss": 0.2895, - "step": 17360 + "grad_norm": 0.29838045121578627, + "learning_rate": 2.44524658596923e-10, + "loss": 0.1909, + "step": 21721 }, { "epoch": 1.0, - "grad_norm": 0.3443424930766011, - "learning_rate": 3.352161118652664e-10, - "loss": 0.2766, - "step": 17361 + "grad_norm": 0.39325338079862293, + "learning_rate": 2.3423008889467134e-10, + "loss": 0.3057, + "step": 21722 }, { "epoch": 1.0, - "grad_norm": 0.6169748491571364, - "learning_rate": 3.2015224506909414e-10, - "loss": 0.3999, - "step": 17362 + "grad_norm": 0.49580547990507134, + "learning_rate": 2.2415690436816685e-10, + "loss": 0.2815, + "step": 21723 }, { "epoch": 1.0, - "grad_norm": 0.22643565427717527, - "learning_rate": 3.0543466675636567e-10, - "loss": 0.1819, - "step": 17363 + "grad_norm": 0.3694042826272208, + "learning_rate": 2.1430510524167448e-10, + "loss": 0.2979, + "step": 21724 }, { "epoch": 1.0, - "grad_norm": 0.33718256571038757, - "learning_rate": 2.9106337743667336e-10, - "loss": 0.2753, - "step": 17364 + "grad_norm": 0.6444201871641582, + "learning_rate": 2.0467469173168776e-10, + "loss": 0.3699, + "step": 21725 }, { "epoch": 1.0, - "grad_norm": 0.5574590750528694, - "learning_rate": 2.7703837760739706e-10, - "loss": 0.2991, - "step": 17365 + "grad_norm": 0.24061020415216497, + "learning_rate": 1.9526566405247972e-10, + "loss": 0.1728, + "step": 21726 }, { "epoch": 1.0, - "grad_norm": 0.36791725805618314, - "learning_rate": 2.6335966775370423e-10, - "loss": 0.2042, - "step": 17366 + "grad_norm": 0.38192158712139984, + "learning_rate": 1.8607802241277228e-10, + "loss": 0.2643, + "step": 21727 }, { "epoch": 1.0, - "grad_norm": 0.2976665692719068, - "learning_rate": 2.500272483496602e-10, - "loss": 0.2583, - "step": 17367 + "grad_norm": 0.6006917995005504, + "learning_rate": 1.7711176701462607e-10, + "loss": 0.2377, + "step": 21728 }, { "epoch": 1.0, - "grad_norm": 0.563196407125086, - "learning_rate": 2.370411198582279e-10, - "loss": 0.3444, - "step": 17368 + "grad_norm": 0.37807502779184093, + "learning_rate": 1.683668980578812e-10, + "loss": 0.2036, + "step": 21729 }, { "epoch": 1.0, - "grad_norm": 0.4542464555776609, - "learning_rate": 2.2440128272682716e-10, - "loss": 0.2276, - "step": 17369 + "grad_norm": 0.3356607290175542, + "learning_rate": 1.5984341573460626e-10, + "loss": 0.28, + "step": 21730 }, { "epoch": 1.0, - "grad_norm": 0.2292789630209967, - "learning_rate": 2.1210773739510637e-10, - "loss": 0.1631, - "step": 17370 + "grad_norm": 0.6545291338281968, + "learning_rate": 1.5154132023575962e-10, + "loss": 0.3493, + "step": 21731 }, { "epoch": 1.0, - "grad_norm": 0.3201790858158781, - "learning_rate": 2.0016048428828095e-10, - "loss": 0.2865, - "step": 17371 + "grad_norm": 0.4970172752431013, + "learning_rate": 1.4346061174230763e-10, + "loss": 0.2129, + "step": 21732 }, { "epoch": 1.0, - "grad_norm": 1.1917003624322648, - "learning_rate": 1.8855952381935384e-10, - "loss": 0.6258, - "step": 17372 + "grad_norm": 0.25582173781224604, + "learning_rate": 1.3560129043632685e-10, + "loss": 0.1557, + "step": 21733 }, { "epoch": 1.0, - "grad_norm": 0.29913451282453496, - "learning_rate": 1.7730485639133598e-10, - "loss": 0.2036, - "step": 17373 + "grad_norm": 0.3790901229948752, + "learning_rate": 1.2796335648879166e-10, + "loss": 0.3095, + "step": 21734 }, { "epoch": 1.0, - "grad_norm": 0.4936270409112451, - "learning_rate": 1.6639648239280547e-10, - "loss": 0.3253, - "step": 17374 + "grad_norm": 0.9558574864114417, + "learning_rate": 1.2054681007067637e-10, + "loss": 0.5092, + "step": 21735 }, { "epoch": 1.0, - "grad_norm": 0.5255633349642953, - "learning_rate": 1.5583440220234835e-10, - "loss": 0.3583, - "step": 17375 + "grad_norm": 0.32310943257825164, + "learning_rate": 1.1335165134518378e-10, + "loss": 0.2164, + "step": 21736 }, { "epoch": 1.0, - "grad_norm": 0.21257105141332128, - "learning_rate": 1.4561861618411778e-10, - "loss": 0.1564, - "step": 17376 + "grad_norm": 0.5413391920236553, + "learning_rate": 1.06377880472186e-10, + "loss": 0.3188, + "step": 21737 }, { "epoch": 1.0, - "grad_norm": 0.4397563294608192, - "learning_rate": 1.357491246944953e-10, - "loss": 0.3039, - "step": 17377 + "grad_norm": 0.5614760330936077, + "learning_rate": 9.962549760711427e-11, + "loss": 0.2876, + "step": 21738 }, { "epoch": 1.0, - "grad_norm": 0.43366888050053, - "learning_rate": 1.2622592807320922e-10, - "loss": 0.3299, - "step": 17378 + "grad_norm": 0.18302935147811014, + "learning_rate": 9.3094502896518e-11, + "loss": 0.1326, + "step": 21739 }, { "epoch": 1.0, - "grad_norm": 0.25224273533898794, - "learning_rate": 1.1704902665110596e-10, - "loss": 0.2109, - "step": 17379 + "grad_norm": 1.4654245355543103, + "learning_rate": 8.678489648805688e-11, + "loss": 0.6957, + "step": 21740 }, { "epoch": 1.0, - "grad_norm": 0.876683409290362, - "learning_rate": 1.082184207445991e-10, - "loss": 0.5038, - "step": 17380 + "grad_norm": 0.49381659135447514, + "learning_rate": 8.069667851939856e-11, + "loss": 0.3231, + "step": 21741 }, { "epoch": 1.0, - "grad_norm": 0.5709334603042204, - "learning_rate": 9.97341106612204e-11, - "loss": 0.2467, - "step": 17381 + "grad_norm": 0.27289146691544053, + "learning_rate": 7.482984912710045e-11, + "loss": 0.2141, + "step": 21742 }, { "epoch": 1.0, - "grad_norm": 0.3252570517790537, - "learning_rate": 9.159609669406876e-11, - "loss": 0.2698, - "step": 17382 + "grad_norm": 1.1015648932902677, + "learning_rate": 6.918440843883822e-11, + "loss": 0.4007, + "step": 21743 }, { "epoch": 1.0, - "grad_norm": 0.2536394980429637, - "learning_rate": 8.380437912514083e-11, - "loss": 0.2018, - "step": 17383 + "grad_norm": 0.593323061455853, + "learning_rate": 6.37603565811773e-11, + "loss": 0.1885, + "step": 21744 }, { "epoch": 1.0, - "grad_norm": 0.5485757717623354, - "learning_rate": 7.635895822311057e-11, - "loss": 0.3742, - "step": 17384 + "grad_norm": 0.3330743867635877, + "learning_rate": 5.855769367402176e-11, + "loss": 0.2572, + "step": 21745 }, { "epoch": 1.0, - "grad_norm": 0.3684524592933877, - "learning_rate": 6.925983424777016e-11, - "loss": 0.2536, - "step": 17385 + "grad_norm": 0.2589686848628386, + "learning_rate": 5.3576419831724566e-11, + "loss": 0.2062, + "step": 21746 }, { "epoch": 1.0, - "grad_norm": 0.3760098681531618, - "learning_rate": 6.250700744336869e-11, - "loss": 0.2418, - "step": 17386 + "grad_norm": 0.6611910289929411, + "learning_rate": 4.8816535166418266e-11, + "loss": 0.3423, + "step": 21747 }, { "epoch": 1.0, - "grad_norm": 0.44201853065463803, - "learning_rate": 5.610047804527341e-11, - "loss": 0.2955, - "step": 17387 + "grad_norm": 0.42254838139413986, + "learning_rate": 4.4278039781353585e-11, + "loss": 0.2764, + "step": 21748 }, { "epoch": 1.0, - "grad_norm": 0.3492915963630672, - "learning_rate": 5.00402462733085e-11, - "loss": 0.2622, - "step": 17388 + "grad_norm": 0.41744818058299255, + "learning_rate": 3.9960933777560826e-11, + "loss": 0.2352, + "step": 21749 }, { "epoch": 1.0, - "grad_norm": 0.40702154184454287, - "learning_rate": 4.4326312338416333e-11, - "loss": 0.1927, - "step": 17389 + "grad_norm": 0.4781134275376225, + "learning_rate": 3.586521725162939e-11, + "loss": 0.2867, + "step": 21750 }, { "epoch": 1.0, - "grad_norm": 0.4248298337535507, - "learning_rate": 3.895867643932683e-11, - "loss": 0.3135, - "step": 17390 + "grad_norm": 0.3936997308993916, + "learning_rate": 3.199089029348734e-11, + "loss": 0.2829, + "step": 21751 }, { "epoch": 1.0, - "grad_norm": 0.26443247178119667, - "learning_rate": 3.3937338760337e-11, - "loss": 0.2393, - "step": 17391 + "grad_norm": 0.45433621665121454, + "learning_rate": 2.8337952988621853e-11, + "loss": 0.1875, + "step": 21752 }, { "epoch": 1.0, - "grad_norm": 0.47992056831554575, - "learning_rate": 2.92622994768621e-11, - "loss": 0.1771, - "step": 17392 + "grad_norm": 0.4558330327323344, + "learning_rate": 2.4906405418079206e-11, + "loss": 0.3096, + "step": 21753 }, { "epoch": 1.0, - "grad_norm": 1.7800715125968598, - "learning_rate": 2.4933558749884456e-11, - "loss": 0.4894, - "step": 17393 + "grad_norm": 0.27946005568873983, + "learning_rate": 2.1696247658464785e-11, + "loss": 0.2418, + "step": 21754 }, { "epoch": 1.0, - "grad_norm": 0.2614907187670138, - "learning_rate": 2.0951116729284182e-11, - "loss": 0.2234, - "step": 17394 + "grad_norm": 0.6073615387903163, + "learning_rate": 1.8707479779722648e-11, + "loss": 0.1463, + "step": 21755 }, { "epoch": 1.0, - "grad_norm": 0.33747834456865705, - "learning_rate": 1.731497355272893e-11, - "loss": 0.319, - "step": 17395 + "grad_norm": 1.9979528859376219, + "learning_rate": 1.5940101849576394e-11, + "loss": 0.6006, + "step": 21756 + }, + { + "epoch": 1.0, + "grad_norm": 0.2787063265436414, + "learning_rate": 1.3394113927978069e-11, + "loss": 0.2166, + "step": 21757 }, { "epoch": 1.0, - "grad_norm": 0.5460667339487827, - "learning_rate": 1.4025129346784127e-11, + "grad_norm": 0.37708467033115073, + "learning_rate": 1.1069516071549047e-11, "loss": 0.3004, - "step": 17396 + "step": 21758 }, { "epoch": 1.0, - "grad_norm": 0.32489280345231525, - "learning_rate": 1.1081584224692521e-11, - "loss": 0.2555, - "step": 17397 + "grad_norm": 0.6805469461554808, + "learning_rate": 8.966308332469809e-12, + "loss": 0.2847, + "step": 21759 }, { "epoch": 1.0, - "grad_norm": 0.5915103642932011, - "learning_rate": 8.484338289704852e-12, - "loss": 0.3031, - "step": 17398 + "grad_norm": 0.33815053488109426, + "learning_rate": 7.0844907562594985e-12, + "loss": 0.2659, + "step": 21760 }, { "epoch": 1.0, - "grad_norm": 0.37556640988554935, - "learning_rate": 6.233391630638963e-12, - "loss": 0.2357, - "step": 17399 + "grad_norm": 0.5455686230231608, + "learning_rate": 5.424063385106593e-12, + "loss": 0.2454, + "step": 21761 }, { "epoch": 1.0, - "grad_norm": 0.29506593921851954, - "learning_rate": 4.328744325210466e-12, - "loss": 0.2447, - "step": 17400 + "grad_norm": 0.40450404973701837, + "learning_rate": 3.9850262567586726e-12, + "loss": 0.2388, + "step": 21762 }, { "epoch": 1.0, - "grad_norm": 0.7211757381438109, - "learning_rate": 2.770396440032741e-12, - "loss": 0.4654, - "step": 17401 + "grad_norm": 0.3417118882130641, + "learning_rate": 2.7673794011917608e-12, + "loss": 0.2397, + "step": 21763 }, { "epoch": 1.0, - "grad_norm": 0.316378855890181, - "learning_rate": 1.5583480295067177e-12, - "loss": 0.278, - "step": 17402 + "grad_norm": 0.7317025291557016, + "learning_rate": 1.7711228461614327e-12, + "loss": 0.4163, + "step": 21764 }, { "epoch": 1.0, - "grad_norm": 0.38606123702259176, - "learning_rate": 6.925991336004245e-13, - "loss": 0.2463, - "step": 17403 + "grad_norm": 0.6761798824984974, + "learning_rate": 9.962566138721485e-13, + "loss": 0.2715, + "step": 21765 }, { "epoch": 1.0, - "grad_norm": 0.26007754178125586, - "learning_rate": 1.7314978451032915e-13, - "loss": 0.1748, - "step": 17404 + "grad_norm": 0.41496727159002544, + "learning_rate": 4.427807209772539e-13, + "loss": 0.2676, + "step": 21766 + }, + { + "epoch": 1.0, + "grad_norm": 0.25374487131977586, + "learning_rate": 1.1069518079942498e-13, + "loss": 0.1654, + "step": 21767 }, { "epoch": 1.0, - "grad_norm": 1.5025672310396203, + "grad_norm": 2.187948659713511, "learning_rate": 0.0, - "loss": 0.1774, - "step": 17405 + "loss": 0.1929, + "step": 21768 }, { "epoch": 1.0, - "step": 17405, + "step": 21768, "total_flos": 0.0, - "train_loss": 0.3205628498764781, - "train_runtime": 138539.6755, - "train_samples_per_second": 60.365, - "train_steps_per_second": 0.126 + "train_loss": 0.3187218575373166, + "train_runtime": 134905.7372, + "train_samples_per_second": 62.001, + "train_steps_per_second": 0.161 } ], "logging_steps": 1.0, - "max_steps": 17405, + "max_steps": 21768, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300,