{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 17405, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.0507381496559225, "learning_rate": 3.824091778202677e-08, "loss": 0.2942, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.8070182416007133, "learning_rate": 7.648183556405354e-08, "loss": 0.3872, "step": 2 }, { "epoch": 0.0, "grad_norm": 1.80041143859621, "learning_rate": 1.1472275334608032e-07, "loss": 0.5905, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.8120071191832767, "learning_rate": 1.5296367112810708e-07, "loss": 0.2618, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.5949392601041631, "learning_rate": 1.9120458891013387e-07, "loss": 0.2642, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.055790990377875, "learning_rate": 2.2944550669216063e-07, "loss": 0.7541, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.7613072929404066, "learning_rate": 2.676864244741874e-07, "loss": 0.2649, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.304037252213996, "learning_rate": 3.0592734225621416e-07, "loss": 0.5504, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.9141654125014961, "learning_rate": 3.441682600382409e-07, "loss": 0.3869, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.7609269558117047, "learning_rate": 3.8240917782026774e-07, "loss": 0.2751, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.7955995450796811, "learning_rate": 4.206500956022945e-07, "loss": 0.1885, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.8612661473785725, "learning_rate": 4.5889101338432127e-07, "loss": 0.585, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.7748274547715579, "learning_rate": 4.97131931166348e-07, "loss": 0.3195, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.8639653689032786, "learning_rate": 5.353728489483748e-07, "loss": 0.3883, "step": 14 }, { "epoch": 0.0, "grad_norm": 2.5884579720855796, "learning_rate": 5.736137667304016e-07, "loss": 0.785, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.8703771449042337, "learning_rate": 6.118546845124283e-07, "loss": 0.3993, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.6961613428356354, "learning_rate": 6.500956022944552e-07, "loss": 0.2686, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.971276714643732, "learning_rate": 6.883365200764818e-07, "loss": 0.3745, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.4283398141820636, "learning_rate": 7.265774378585087e-07, "loss": 0.4011, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.9170881209785016, "learning_rate": 7.648183556405355e-07, "loss": 0.3542, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.9135336035316859, "learning_rate": 8.030592734225621e-07, "loss": 0.3697, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.0476506879512788, "learning_rate": 8.41300191204589e-07, "loss": 0.3371, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.6115452493182032, "learning_rate": 8.795411089866157e-07, "loss": 0.2105, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.67676855881491, "learning_rate": 9.177820267686425e-07, "loss": 0.5603, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.7202407335460682, "learning_rate": 9.560229445506693e-07, "loss": 0.3293, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.9865131037247048, "learning_rate": 9.94263862332696e-07, "loss": 0.41, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.4535881440886347, "learning_rate": 1.0325047801147228e-06, "loss": 0.5634, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.0947257185323476, "learning_rate": 1.0707456978967496e-06, "loss": 0.3149, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.0129147168499806, "learning_rate": 1.1089866156787763e-06, "loss": 0.3681, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.6576020756810448, "learning_rate": 1.1472275334608031e-06, "loss": 0.2731, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.9047392383713737, "learning_rate": 1.1854684512428299e-06, "loss": 0.3647, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.4541026438021523, "learning_rate": 1.2237093690248566e-06, "loss": 0.4479, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.8653286599987811, "learning_rate": 1.2619502868068834e-06, "loss": 0.3584, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.121457616803094, "learning_rate": 1.3001912045889104e-06, "loss": 0.3729, "step": 34 }, { "epoch": 0.0, "grad_norm": 0.8277941941005295, "learning_rate": 1.3384321223709371e-06, "loss": 0.3191, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.7443085270016412, "learning_rate": 1.3766730401529637e-06, "loss": 0.3493, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.1585945317118969, "learning_rate": 1.4149139579349905e-06, "loss": 0.3186, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.5269750513842424, "learning_rate": 1.4531548757170174e-06, "loss": 0.3761, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.837589017463728, "learning_rate": 1.4913957934990442e-06, "loss": 0.7611, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.1361050627404332, "learning_rate": 1.529636711281071e-06, "loss": 0.1478, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.9717312245736287, "learning_rate": 1.5678776290630975e-06, "loss": 0.3778, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.6502427723791129, "learning_rate": 1.6061185468451243e-06, "loss": 0.5467, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.6014184946161949, "learning_rate": 1.6443594646271512e-06, "loss": 0.1958, "step": 43 }, { "epoch": 0.0, "grad_norm": 1.7347266365889769, "learning_rate": 1.682600382409178e-06, "loss": 0.4705, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.9784708341611575, "learning_rate": 1.7208413001912048e-06, "loss": 0.3925, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.8155632192584802, "learning_rate": 1.7590822179732313e-06, "loss": 0.2767, "step": 46 }, { "epoch": 0.0, "grad_norm": 1.6297697128120328, "learning_rate": 1.7973231357552585e-06, "loss": 0.4902, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.0192981344155907, "learning_rate": 1.835564053537285e-06, "loss": 0.5705, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.6176671121632008, "learning_rate": 1.8738049713193118e-06, "loss": 0.2417, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.715246719483139, "learning_rate": 1.9120458891013386e-06, "loss": 0.2814, "step": 50 }, { "epoch": 0.0, "grad_norm": 3.0156791314075595, "learning_rate": 1.950286806883365e-06, "loss": 0.8249, "step": 51 }, { "epoch": 0.0, "grad_norm": 1.656560840965785, "learning_rate": 1.988527724665392e-06, "loss": 0.5013, "step": 52 }, { "epoch": 0.0, "grad_norm": 1.1577287645486485, "learning_rate": 2.026768642447419e-06, "loss": 0.2761, "step": 53 }, { "epoch": 0.0, "grad_norm": 3.0509099785445395, "learning_rate": 2.0650095602294456e-06, "loss": 0.6112, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.9025595249195338, "learning_rate": 2.103250478011472e-06, "loss": 0.1648, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.9417125486809438, "learning_rate": 2.141491395793499e-06, "loss": 0.325, "step": 56 }, { "epoch": 0.0, "grad_norm": 0.9816339235493463, "learning_rate": 2.179732313575526e-06, "loss": 0.39, "step": 57 }, { "epoch": 0.0, "grad_norm": 1.7398045274330625, "learning_rate": 2.2179732313575527e-06, "loss": 0.495, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.1507513410092913, "learning_rate": 2.2562141491395797e-06, "loss": 0.3634, "step": 59 }, { "epoch": 0.0, "grad_norm": 4.505952386635836, "learning_rate": 2.2944550669216062e-06, "loss": 0.7398, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.706667753638737, "learning_rate": 2.332695984703633e-06, "loss": 0.2919, "step": 61 }, { "epoch": 0.0, "grad_norm": 0.702597844899516, "learning_rate": 2.3709369024856597e-06, "loss": 0.2103, "step": 62 }, { "epoch": 0.0, "grad_norm": 7.783725003259625, "learning_rate": 2.4091778202676867e-06, "loss": 0.853, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.3779057437038236, "learning_rate": 2.4474187380497133e-06, "loss": 0.4548, "step": 64 }, { "epoch": 0.0, "grad_norm": 2.4971913730088304, "learning_rate": 2.4856596558317402e-06, "loss": 0.3769, "step": 65 }, { "epoch": 0.0, "grad_norm": 2.077225117758281, "learning_rate": 2.523900573613767e-06, "loss": 0.4513, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.7469593011509847, "learning_rate": 2.5621414913957938e-06, "loss": 0.1754, "step": 67 }, { "epoch": 0.0, "grad_norm": 3.1701958149020584, "learning_rate": 2.6003824091778207e-06, "loss": 0.381, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.0667732033492074, "learning_rate": 2.6386233269598473e-06, "loss": 0.365, "step": 69 }, { "epoch": 0.0, "grad_norm": 2.805103712753199, "learning_rate": 2.6768642447418743e-06, "loss": 0.5996, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.1103519370245432, "learning_rate": 2.7151051625239004e-06, "loss": 0.2977, "step": 71 }, { "epoch": 0.0, "grad_norm": 0.9741205038133375, "learning_rate": 2.7533460803059274e-06, "loss": 0.3441, "step": 72 }, { "epoch": 0.0, "grad_norm": 3.3389588104138266, "learning_rate": 2.7915869980879544e-06, "loss": 0.4686, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.1622466550114607, "learning_rate": 2.829827915869981e-06, "loss": 0.2293, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.8095204360677337, "learning_rate": 2.868068833652008e-06, "loss": 0.4982, "step": 75 }, { "epoch": 0.0, "grad_norm": 2.216426651097327, "learning_rate": 2.906309751434035e-06, "loss": 0.4559, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.7137062684636206, "learning_rate": 2.9445506692160614e-06, "loss": 0.3426, "step": 77 }, { "epoch": 0.0, "grad_norm": 3.186090472631493, "learning_rate": 2.9827915869980884e-06, "loss": 0.6149, "step": 78 }, { "epoch": 0.0, "grad_norm": 1.2457594513024053, "learning_rate": 3.021032504780115e-06, "loss": 0.2545, "step": 79 }, { "epoch": 0.0, "grad_norm": 1.3233143755012595, "learning_rate": 3.059273422562142e-06, "loss": 0.3427, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.9962990513631782, "learning_rate": 3.097514340344169e-06, "loss": 0.4061, "step": 81 }, { "epoch": 0.0, "grad_norm": 2.1748881777328464, "learning_rate": 3.135755258126195e-06, "loss": 0.4804, "step": 82 }, { "epoch": 0.0, "grad_norm": 1.1349511100419203, "learning_rate": 3.173996175908222e-06, "loss": 0.3407, "step": 83 }, { "epoch": 0.0, "grad_norm": 0.8325230383368618, "learning_rate": 3.2122370936902485e-06, "loss": 0.2679, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.4409698819778003, "learning_rate": 3.2504780114722755e-06, "loss": 0.2756, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.7717083869066103, "learning_rate": 3.2887189292543025e-06, "loss": 0.2771, "step": 86 }, { "epoch": 0.0, "grad_norm": 3.0396106154008105, "learning_rate": 3.326959847036329e-06, "loss": 0.5461, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.8349352537444616, "learning_rate": 3.365200764818356e-06, "loss": 0.3778, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.8634730104834349, "learning_rate": 3.4034416826003826e-06, "loss": 0.26, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.8073245544233498, "learning_rate": 3.4416826003824095e-06, "loss": 0.3411, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.7074868343906857, "learning_rate": 3.4799235181644365e-06, "loss": 0.7505, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.4009581352720348, "learning_rate": 3.5181644359464626e-06, "loss": 0.3037, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.7581392814988335, "learning_rate": 3.5564053537284896e-06, "loss": 0.4342, "step": 93 }, { "epoch": 0.01, "grad_norm": 4.586880035707701, "learning_rate": 3.594646271510517e-06, "loss": 0.95, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.9134436895106094, "learning_rate": 3.632887189292543e-06, "loss": 0.1859, "step": 95 }, { "epoch": 0.01, "grad_norm": 3.5368674428633557, "learning_rate": 3.67112810707457e-06, "loss": 0.4042, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.695940105136588, "learning_rate": 3.7093690248565967e-06, "loss": 0.4439, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.4933772907830718, "learning_rate": 3.7476099426386236e-06, "loss": 0.3166, "step": 98 }, { "epoch": 0.01, "grad_norm": 4.929994478426711, "learning_rate": 3.7858508604206506e-06, "loss": 0.6366, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.0588657191516118, "learning_rate": 3.824091778202677e-06, "loss": 0.3998, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.8299018542448147, "learning_rate": 3.862332695984704e-06, "loss": 0.3014, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.894972655187633, "learning_rate": 3.90057361376673e-06, "loss": 0.152, "step": 102 }, { "epoch": 0.01, "grad_norm": 3.0056612950240744, "learning_rate": 3.938814531548758e-06, "loss": 0.5743, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.9439871853559727, "learning_rate": 3.977055449330784e-06, "loss": 0.292, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.0959590718704717, "learning_rate": 4.015296367112811e-06, "loss": 0.3727, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.2912607966735963, "learning_rate": 4.053537284894838e-06, "loss": 0.7559, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.9171050201317897, "learning_rate": 4.091778202676865e-06, "loss": 0.314, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.6284229206544971, "learning_rate": 4.130019120458891e-06, "loss": 0.2165, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.212927916169031, "learning_rate": 4.168260038240919e-06, "loss": 0.6106, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.8099374882393748, "learning_rate": 4.206500956022944e-06, "loss": 0.3248, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.655364301227268, "learning_rate": 4.244741873804972e-06, "loss": 0.5664, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.4636213484496585, "learning_rate": 4.282982791586998e-06, "loss": 0.393, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.0063990268097915, "learning_rate": 4.321223709369025e-06, "loss": 0.3236, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.6371885352644112, "learning_rate": 4.359464627151052e-06, "loss": 0.1923, "step": 114 }, { "epoch": 0.01, "grad_norm": 3.490580634292843, "learning_rate": 4.397705544933079e-06, "loss": 0.5172, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.8466201174343196, "learning_rate": 4.435946462715105e-06, "loss": 0.3246, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.5069924896763471, "learning_rate": 4.474187380497133e-06, "loss": 0.3987, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.72546443790111, "learning_rate": 4.512428298279159e-06, "loss": 0.6617, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.2270235639211564, "learning_rate": 4.550669216061186e-06, "loss": 0.3173, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.611271995853063, "learning_rate": 4.5889101338432124e-06, "loss": 0.2566, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.1591179991848053, "learning_rate": 4.627151051625239e-06, "loss": 0.4692, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.8409326010763974, "learning_rate": 4.665391969407266e-06, "loss": 0.2925, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.6635869940410941, "learning_rate": 4.703632887189293e-06, "loss": 0.4966, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.8489652670343713, "learning_rate": 4.7418738049713195e-06, "loss": 0.3289, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.7948026475678698, "learning_rate": 4.780114722753346e-06, "loss": 0.3079, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.62879378549951, "learning_rate": 4.8183556405353734e-06, "loss": 0.2514, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.9267329931460127, "learning_rate": 4.8565965583174e-06, "loss": 0.575, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.6930803429078386, "learning_rate": 4.8948374760994265e-06, "loss": 0.2828, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.567288278653447, "learning_rate": 4.933078393881454e-06, "loss": 0.5037, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.4508660819457764, "learning_rate": 4.9713193116634805e-06, "loss": 0.8513, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.7264682519312051, "learning_rate": 5.009560229445507e-06, "loss": 0.2465, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.826742636113808, "learning_rate": 5.047801147227534e-06, "loss": 0.3116, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.1392276722179755, "learning_rate": 5.086042065009561e-06, "loss": 0.3726, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.0376511756469218, "learning_rate": 5.1242829827915875e-06, "loss": 0.2853, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.1718548855836737, "learning_rate": 5.162523900573614e-06, "loss": 0.8045, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.8519765854258867, "learning_rate": 5.2007648183556415e-06, "loss": 0.4227, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.8055693275531942, "learning_rate": 5.239005736137668e-06, "loss": 0.2194, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.7267804162988287, "learning_rate": 5.277246653919695e-06, "loss": 0.4669, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.5457146032547153, "learning_rate": 5.315487571701722e-06, "loss": 0.2702, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.8774674725605524, "learning_rate": 5.3537284894837486e-06, "loss": 0.3322, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.150336747102144, "learning_rate": 5.391969407265774e-06, "loss": 0.3492, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.1991018274354253, "learning_rate": 5.430210325047801e-06, "loss": 0.7824, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.2228654899985612, "learning_rate": 5.468451242829829e-06, "loss": 0.4072, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.7038931034059297, "learning_rate": 5.506692160611855e-06, "loss": 0.3396, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.745883442870367, "learning_rate": 5.544933078393881e-06, "loss": 0.6713, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.5065291039901288, "learning_rate": 5.583173996175909e-06, "loss": 0.2111, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.89516023920091, "learning_rate": 5.621414913957935e-06, "loss": 0.3344, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.5215200346193276, "learning_rate": 5.659655831739962e-06, "loss": 0.4512, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.0561066764454057, "learning_rate": 5.697896749521989e-06, "loss": 0.348, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.7077571805921623, "learning_rate": 5.736137667304016e-06, "loss": 0.44, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.7032590857528236, "learning_rate": 5.774378585086042e-06, "loss": 0.3231, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.6939036942549888, "learning_rate": 5.81261950286807e-06, "loss": 0.2651, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.23769484190234, "learning_rate": 5.850860420650096e-06, "loss": 0.4505, "step": 153 }, { "epoch": 0.01, "grad_norm": 3.57505503947449, "learning_rate": 5.889101338432123e-06, "loss": 0.5387, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.4496022682391447, "learning_rate": 5.927342256214149e-06, "loss": 0.4709, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.9171040848212857, "learning_rate": 5.965583173996177e-06, "loss": 0.3718, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.3430140069069527, "learning_rate": 6.003824091778203e-06, "loss": 0.2232, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.7080554277096157, "learning_rate": 6.04206500956023e-06, "loss": 0.2103, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.2894404724722592, "learning_rate": 6.080305927342257e-06, "loss": 0.3918, "step": 159 }, { "epoch": 0.01, "grad_norm": 3.1163562975594825, "learning_rate": 6.118546845124284e-06, "loss": 0.4489, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.8601545568123237, "learning_rate": 6.15678776290631e-06, "loss": 0.427, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.9819232651964342, "learning_rate": 6.195028680688338e-06, "loss": 0.4107, "step": 162 }, { "epoch": 0.01, "grad_norm": 1.0574104719168467, "learning_rate": 6.233269598470364e-06, "loss": 0.3337, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.7953279060143272, "learning_rate": 6.27151051625239e-06, "loss": 0.2119, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.0482843536337256, "learning_rate": 6.3097514340344166e-06, "loss": 0.3817, "step": 165 }, { "epoch": 0.01, "grad_norm": 2.1370186037298056, "learning_rate": 6.347992351816444e-06, "loss": 0.6306, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.6915570774478592, "learning_rate": 6.3862332695984705e-06, "loss": 0.3499, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.9773007045626713, "learning_rate": 6.424474187380497e-06, "loss": 0.369, "step": 168 }, { "epoch": 0.01, "grad_norm": 2.6951767139469633, "learning_rate": 6.4627151051625245e-06, "loss": 0.7238, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.6679450780611562, "learning_rate": 6.500956022944551e-06, "loss": 0.1775, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.2440364112811422, "learning_rate": 6.539196940726578e-06, "loss": 0.4651, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.4532282158517489, "learning_rate": 6.577437858508605e-06, "loss": 0.4368, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.0484087595525342, "learning_rate": 6.6156787762906315e-06, "loss": 0.2313, "step": 173 }, { "epoch": 0.01, "grad_norm": 1.2959855414896784, "learning_rate": 6.653919694072658e-06, "loss": 0.3893, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.149727668633913, "learning_rate": 6.6921606118546855e-06, "loss": 0.4034, "step": 175 }, { "epoch": 0.01, "grad_norm": 2.3990532753289666, "learning_rate": 6.730401529636712e-06, "loss": 0.7575, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.7216334237931397, "learning_rate": 6.768642447418739e-06, "loss": 0.2945, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.6587270290307061, "learning_rate": 6.806883365200765e-06, "loss": 0.5882, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.7186727610904583, "learning_rate": 6.8451242829827925e-06, "loss": 0.2788, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.7580278664452965, "learning_rate": 6.883365200764819e-06, "loss": 0.2313, "step": 180 }, { "epoch": 0.01, "grad_norm": 3.3484131561448582, "learning_rate": 6.921606118546846e-06, "loss": 0.7148, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.2480514471359154, "learning_rate": 6.959847036328873e-06, "loss": 0.5018, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.853569911541321, "learning_rate": 6.9980879541109e-06, "loss": 0.334, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.5935129539425128, "learning_rate": 7.036328871892925e-06, "loss": 0.484, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.8264510735195749, "learning_rate": 7.0745697896749535e-06, "loss": 0.2185, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.9399844092870507, "learning_rate": 7.112810707456979e-06, "loss": 0.3235, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.7297204562340768, "learning_rate": 7.151051625239006e-06, "loss": 0.3819, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.2104810033539928, "learning_rate": 7.189292543021034e-06, "loss": 0.4596, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.931742528076528, "learning_rate": 7.22753346080306e-06, "loss": 0.4061, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.490075769645383, "learning_rate": 7.265774378585086e-06, "loss": 0.5611, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.8152941819336589, "learning_rate": 7.304015296367113e-06, "loss": 0.3674, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.710166833510888, "learning_rate": 7.34225621414914e-06, "loss": 0.2668, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.568404819733602, "learning_rate": 7.380497131931167e-06, "loss": 0.1746, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.2671387794567825, "learning_rate": 7.418738049713193e-06, "loss": 0.519, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.7997462545674416, "learning_rate": 7.456978967495221e-06, "loss": 0.3353, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.9409506841315287, "learning_rate": 7.495219885277247e-06, "loss": 0.3744, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.0262626181568872, "learning_rate": 7.533460803059274e-06, "loss": 0.1996, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.6961696439810705, "learning_rate": 7.571701720841301e-06, "loss": 0.3293, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.805884804921434, "learning_rate": 7.609942638623328e-06, "loss": 0.3308, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.94997387801345, "learning_rate": 7.648183556405354e-06, "loss": 0.3929, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.7291239386927754, "learning_rate": 7.686424474187381e-06, "loss": 0.3934, "step": 201 }, { "epoch": 0.01, "grad_norm": 2.004028030890788, "learning_rate": 7.724665391969407e-06, "loss": 0.8334, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.7999130816704142, "learning_rate": 7.762906309751434e-06, "loss": 0.3051, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.5985667826912873, "learning_rate": 7.80114722753346e-06, "loss": 0.2605, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.1179195289862098, "learning_rate": 7.839388145315489e-06, "loss": 0.2994, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.0573596138813663, "learning_rate": 7.877629063097515e-06, "loss": 0.4686, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.024477703497844, "learning_rate": 7.915869980879542e-06, "loss": 0.3344, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.8805912272772863, "learning_rate": 7.954110898661568e-06, "loss": 0.4153, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.8453276392969327, "learning_rate": 7.992351816443595e-06, "loss": 0.3134, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.6630727990199321, "learning_rate": 8.030592734225622e-06, "loss": 0.278, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.593524418186559, "learning_rate": 8.06883365200765e-06, "loss": 0.3308, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.5867712659122044, "learning_rate": 8.107074569789676e-06, "loss": 0.5022, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.858433894348274, "learning_rate": 8.145315487571703e-06, "loss": 0.3147, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.9048694714090977, "learning_rate": 8.18355640535373e-06, "loss": 0.8704, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.795882586881069, "learning_rate": 8.221797323135756e-06, "loss": 0.3508, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.2363337807852626, "learning_rate": 8.260038240917783e-06, "loss": 0.2234, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.2961495565006664, "learning_rate": 8.298279158699809e-06, "loss": 0.3844, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.7121762384248471, "learning_rate": 8.336520076481837e-06, "loss": 0.6158, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.7273021014242879, "learning_rate": 8.374760994263862e-06, "loss": 0.2802, "step": 219 }, { "epoch": 0.01, "grad_norm": 2.6273244509030462, "learning_rate": 8.413001912045889e-06, "loss": 0.7777, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.4955213875868227, "learning_rate": 8.451242829827917e-06, "loss": 0.7094, "step": 221 }, { "epoch": 0.01, "grad_norm": 1.0844734774581914, "learning_rate": 8.489483747609944e-06, "loss": 0.2128, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.7637627517102057, "learning_rate": 8.52772466539197e-06, "loss": 0.416, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.5085068537732527, "learning_rate": 8.565965583173997e-06, "loss": 0.4051, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.6779193752161682, "learning_rate": 8.604206500956023e-06, "loss": 0.248, "step": 225 }, { "epoch": 0.01, "grad_norm": 3.324560462924011, "learning_rate": 8.64244741873805e-06, "loss": 0.823, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.7247870815802524, "learning_rate": 8.680688336520076e-06, "loss": 0.3926, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.874007650243524, "learning_rate": 8.718929254302105e-06, "loss": 0.3379, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.4143629669283038, "learning_rate": 8.757170172084131e-06, "loss": 0.3663, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.164683195417105, "learning_rate": 8.795411089866158e-06, "loss": 0.2845, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.5604708590835067, "learning_rate": 8.833652007648184e-06, "loss": 0.3083, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.196489898146485, "learning_rate": 8.87189292543021e-06, "loss": 0.4396, "step": 232 }, { "epoch": 0.01, "grad_norm": 2.3549774616852077, "learning_rate": 8.910133843212237e-06, "loss": 0.7806, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.8069917514152943, "learning_rate": 8.948374760994266e-06, "loss": 0.3283, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.8060819660517314, "learning_rate": 8.986615678776292e-06, "loss": 0.4036, "step": 235 }, { "epoch": 0.01, "grad_norm": 1.6268257383806288, "learning_rate": 9.024856596558319e-06, "loss": 0.2297, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.8055104715664961, "learning_rate": 9.063097514340345e-06, "loss": 0.325, "step": 237 }, { "epoch": 0.01, "grad_norm": 1.922017240792222, "learning_rate": 9.101338432122372e-06, "loss": 0.6324, "step": 238 }, { "epoch": 0.01, "grad_norm": 1.1570165829521175, "learning_rate": 9.139579349904398e-06, "loss": 0.4164, "step": 239 }, { "epoch": 0.01, "grad_norm": 1.0882648871617964, "learning_rate": 9.177820267686425e-06, "loss": 0.3779, "step": 240 }, { "epoch": 0.01, "grad_norm": 2.0650365945242295, "learning_rate": 9.216061185468453e-06, "loss": 0.6302, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.7876721236295158, "learning_rate": 9.254302103250478e-06, "loss": 0.2161, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.877348325878266, "learning_rate": 9.292543021032505e-06, "loss": 0.3256, "step": 243 }, { "epoch": 0.01, "grad_norm": 1.2294123749330266, "learning_rate": 9.330783938814533e-06, "loss": 0.4789, "step": 244 }, { "epoch": 0.01, "grad_norm": 2.0693063329445263, "learning_rate": 9.36902485659656e-06, "loss": 0.5469, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.8369746366963294, "learning_rate": 9.407265774378586e-06, "loss": 0.342, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.9630683957243528, "learning_rate": 9.445506692160612e-06, "loss": 0.3704, "step": 247 }, { "epoch": 0.01, "grad_norm": 0.5500498119249109, "learning_rate": 9.483747609942639e-06, "loss": 0.0887, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.7378523176182469, "learning_rate": 9.521988527724666e-06, "loss": 0.3357, "step": 249 }, { "epoch": 0.01, "grad_norm": 1.2327162595508403, "learning_rate": 9.560229445506692e-06, "loss": 0.4636, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.9886430366490976, "learning_rate": 9.59847036328872e-06, "loss": 0.416, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.9459014328163804, "learning_rate": 9.636711281070747e-06, "loss": 0.3347, "step": 252 }, { "epoch": 0.01, "grad_norm": 1.4714918256305243, "learning_rate": 9.674952198852773e-06, "loss": 0.6434, "step": 253 }, { "epoch": 0.01, "grad_norm": 0.6094277496484368, "learning_rate": 9.7131931166348e-06, "loss": 0.2764, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.6710565760041877, "learning_rate": 9.751434034416827e-06, "loss": 0.242, "step": 255 }, { "epoch": 0.01, "grad_norm": 2.07823406563584, "learning_rate": 9.789674952198853e-06, "loss": 0.8126, "step": 256 }, { "epoch": 0.01, "grad_norm": 1.7712955553118728, "learning_rate": 9.827915869980881e-06, "loss": 0.5811, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.8150028793715761, "learning_rate": 9.866156787762908e-06, "loss": 0.2496, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.7771432411433113, "learning_rate": 9.904397705544934e-06, "loss": 0.3765, "step": 259 }, { "epoch": 0.01, "grad_norm": 0.6160523614998065, "learning_rate": 9.942638623326961e-06, "loss": 0.2061, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.7046157767914728, "learning_rate": 9.980879541108988e-06, "loss": 0.232, "step": 261 }, { "epoch": 0.02, "grad_norm": 2.149024983560915, "learning_rate": 1.0019120458891014e-05, "loss": 0.4943, "step": 262 }, { "epoch": 0.02, "grad_norm": 1.2286307024685774, "learning_rate": 1.005736137667304e-05, "loss": 0.4477, "step": 263 }, { "epoch": 0.02, "grad_norm": 1.097112552920304, "learning_rate": 1.0095602294455067e-05, "loss": 0.3751, "step": 264 }, { "epoch": 0.02, "grad_norm": 0.7614533976101915, "learning_rate": 1.0133843212237095e-05, "loss": 0.3311, "step": 265 }, { "epoch": 0.02, "grad_norm": 0.7984309540612244, "learning_rate": 1.0172084130019122e-05, "loss": 0.3827, "step": 266 }, { "epoch": 0.02, "grad_norm": 0.564765501949692, "learning_rate": 1.0210325047801149e-05, "loss": 0.213, "step": 267 }, { "epoch": 0.02, "grad_norm": 1.75929373969915, "learning_rate": 1.0248565965583175e-05, "loss": 0.3951, "step": 268 }, { "epoch": 0.02, "grad_norm": 2.166353414260506, "learning_rate": 1.0286806883365202e-05, "loss": 0.6514, "step": 269 }, { "epoch": 0.02, "grad_norm": 0.49891390595289997, "learning_rate": 1.0325047801147228e-05, "loss": 0.2669, "step": 270 }, { "epoch": 0.02, "grad_norm": 0.7155232121133612, "learning_rate": 1.0363288718929255e-05, "loss": 0.3295, "step": 271 }, { "epoch": 0.02, "grad_norm": 2.4276693546072683, "learning_rate": 1.0401529636711283e-05, "loss": 0.8387, "step": 272 }, { "epoch": 0.02, "grad_norm": 0.7231598135654922, "learning_rate": 1.043977055449331e-05, "loss": 0.3431, "step": 273 }, { "epoch": 0.02, "grad_norm": 1.7399633483574997, "learning_rate": 1.0478011472275336e-05, "loss": 0.4369, "step": 274 }, { "epoch": 0.02, "grad_norm": 1.0433473187882643, "learning_rate": 1.0516252390057363e-05, "loss": 0.4318, "step": 275 }, { "epoch": 0.02, "grad_norm": 0.4793959174200006, "learning_rate": 1.055449330783939e-05, "loss": 0.2016, "step": 276 }, { "epoch": 0.02, "grad_norm": 1.1899034899997953, "learning_rate": 1.0592734225621416e-05, "loss": 0.4827, "step": 277 }, { "epoch": 0.02, "grad_norm": 0.7622063676502636, "learning_rate": 1.0630975143403444e-05, "loss": 0.3748, "step": 278 }, { "epoch": 0.02, "grad_norm": 1.0017165788816116, "learning_rate": 1.066921606118547e-05, "loss": 0.4185, "step": 279 }, { "epoch": 0.02, "grad_norm": 0.8309753387936472, "learning_rate": 1.0707456978967497e-05, "loss": 0.4142, "step": 280 }, { "epoch": 0.02, "grad_norm": 0.6895111232491048, "learning_rate": 1.0745697896749524e-05, "loss": 0.211, "step": 281 }, { "epoch": 0.02, "grad_norm": 0.6991545205239938, "learning_rate": 1.0783938814531549e-05, "loss": 0.3342, "step": 282 }, { "epoch": 0.02, "grad_norm": 0.8430434837260139, "learning_rate": 1.0822179732313575e-05, "loss": 0.3487, "step": 283 }, { "epoch": 0.02, "grad_norm": 2.4751180199401044, "learning_rate": 1.0860420650095602e-05, "loss": 0.5592, "step": 284 }, { "epoch": 0.02, "grad_norm": 1.1958526321793117, "learning_rate": 1.0898661567877632e-05, "loss": 0.4458, "step": 285 }, { "epoch": 0.02, "grad_norm": 0.893083908512126, "learning_rate": 1.0936902485659658e-05, "loss": 0.3897, "step": 286 }, { "epoch": 0.02, "grad_norm": 1.1128175053171323, "learning_rate": 1.0975143403441683e-05, "loss": 0.3729, "step": 287 }, { "epoch": 0.02, "grad_norm": 0.7310765740074888, "learning_rate": 1.101338432122371e-05, "loss": 0.1865, "step": 288 }, { "epoch": 0.02, "grad_norm": 1.146099557909946, "learning_rate": 1.1051625239005736e-05, "loss": 0.3986, "step": 289 }, { "epoch": 0.02, "grad_norm": 0.8860784783630814, "learning_rate": 1.1089866156787763e-05, "loss": 0.4463, "step": 290 }, { "epoch": 0.02, "grad_norm": 0.7447706908798476, "learning_rate": 1.1128107074569791e-05, "loss": 0.3275, "step": 291 }, { "epoch": 0.02, "grad_norm": 1.202436710568476, "learning_rate": 1.1166347992351817e-05, "loss": 0.5282, "step": 292 }, { "epoch": 0.02, "grad_norm": 1.7152994728714328, "learning_rate": 1.1204588910133844e-05, "loss": 0.7841, "step": 293 }, { "epoch": 0.02, "grad_norm": 0.5435181164029416, "learning_rate": 1.124282982791587e-05, "loss": 0.2422, "step": 294 }, { "epoch": 0.02, "grad_norm": 0.6497594633770308, "learning_rate": 1.1281070745697897e-05, "loss": 0.315, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.4737099800720082, "learning_rate": 1.1319311663479924e-05, "loss": 0.6034, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.60322244237985, "learning_rate": 1.135755258126195e-05, "loss": 0.3813, "step": 297 }, { "epoch": 0.02, "grad_norm": 0.7002085464698281, "learning_rate": 1.1395793499043978e-05, "loss": 0.3664, "step": 298 }, { "epoch": 0.02, "grad_norm": 0.951906463609564, "learning_rate": 1.1434034416826005e-05, "loss": 0.4852, "step": 299 }, { "epoch": 0.02, "grad_norm": 0.44217514977603756, "learning_rate": 1.1472275334608032e-05, "loss": 0.1206, "step": 300 }, { "epoch": 0.02, "grad_norm": 0.6930363078552553, "learning_rate": 1.1510516252390058e-05, "loss": 0.3479, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.5346406725473178, "learning_rate": 1.1548757170172085e-05, "loss": 0.4308, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.83215837021332, "learning_rate": 1.1586998087954111e-05, "loss": 0.5532, "step": 303 }, { "epoch": 0.02, "grad_norm": 0.6617004606774994, "learning_rate": 1.162523900573614e-05, "loss": 0.2918, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.7033213061876957, "learning_rate": 1.1663479923518166e-05, "loss": 0.8174, "step": 305 }, { "epoch": 0.02, "grad_norm": 0.5124769881897817, "learning_rate": 1.1701720841300193e-05, "loss": 0.2797, "step": 306 }, { "epoch": 0.02, "grad_norm": 0.5573421129903319, "learning_rate": 1.1739961759082219e-05, "loss": 0.2435, "step": 307 }, { "epoch": 0.02, "grad_norm": 1.936707015756441, "learning_rate": 1.1778202676864246e-05, "loss": 0.587, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.101229402299148, "learning_rate": 1.1816443594646272e-05, "loss": 0.5648, "step": 309 }, { "epoch": 0.02, "grad_norm": 0.5718313616620087, "learning_rate": 1.1854684512428299e-05, "loss": 0.2873, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.9736432054583405, "learning_rate": 1.1892925430210327e-05, "loss": 0.4525, "step": 311 }, { "epoch": 0.02, "grad_norm": 0.5300437727042384, "learning_rate": 1.1931166347992354e-05, "loss": 0.1757, "step": 312 }, { "epoch": 0.02, "grad_norm": 0.6818941883625804, "learning_rate": 1.196940726577438e-05, "loss": 0.2652, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.0449214685551322, "learning_rate": 1.2007648183556407e-05, "loss": 0.4432, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.3218968880735233, "learning_rate": 1.2045889101338433e-05, "loss": 0.5656, "step": 315 }, { "epoch": 0.02, "grad_norm": 0.675172558207587, "learning_rate": 1.208413001912046e-05, "loss": 0.3515, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.9376915018087, "learning_rate": 1.2122370936902486e-05, "loss": 0.6026, "step": 317 }, { "epoch": 0.02, "grad_norm": 0.7527951764348736, "learning_rate": 1.2160611854684515e-05, "loss": 0.39, "step": 318 }, { "epoch": 0.02, "grad_norm": 0.6271551344942625, "learning_rate": 1.2198852772466541e-05, "loss": 0.3249, "step": 319 }, { "epoch": 0.02, "grad_norm": 0.9520118512963162, "learning_rate": 1.2237093690248568e-05, "loss": 0.2872, "step": 320 }, { "epoch": 0.02, "grad_norm": 0.9489835344276659, "learning_rate": 1.2275334608030594e-05, "loss": 0.4268, "step": 321 }, { "epoch": 0.02, "grad_norm": 0.7608542124414743, "learning_rate": 1.231357552581262e-05, "loss": 0.3252, "step": 322 }, { "epoch": 0.02, "grad_norm": 4.06054704906878, "learning_rate": 1.2351816443594646e-05, "loss": 0.692, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.0376904064881367, "learning_rate": 1.2390057361376676e-05, "loss": 0.4878, "step": 324 }, { "epoch": 0.02, "grad_norm": 0.7936001862726354, "learning_rate": 1.2428298279158702e-05, "loss": 0.3846, "step": 325 }, { "epoch": 0.02, "grad_norm": 0.7655996437533465, "learning_rate": 1.2466539196940729e-05, "loss": 0.2969, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.6075196309967965, "learning_rate": 1.2504780114722753e-05, "loss": 0.322, "step": 327 }, { "epoch": 0.02, "grad_norm": 0.8333976096028658, "learning_rate": 1.254302103250478e-05, "loss": 0.3605, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.395249474296435, "learning_rate": 1.2581261950286807e-05, "loss": 0.8674, "step": 329 }, { "epoch": 0.02, "grad_norm": 0.9426907172159265, "learning_rate": 1.2619502868068833e-05, "loss": 0.3545, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.6597426619503529, "learning_rate": 1.2657743785850863e-05, "loss": 0.3462, "step": 331 }, { "epoch": 0.02, "grad_norm": 0.9277589366404783, "learning_rate": 1.2695984703632888e-05, "loss": 0.4374, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.49886983704319643, "learning_rate": 1.2734225621414914e-05, "loss": 0.2108, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.6541306009678697, "learning_rate": 1.2772466539196941e-05, "loss": 0.3265, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.7330589458418397, "learning_rate": 1.2810707456978968e-05, "loss": 0.765, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.9292808339282705, "learning_rate": 1.2848948374760994e-05, "loss": 0.5198, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.8238173474277146, "learning_rate": 1.2887189292543022e-05, "loss": 0.313, "step": 337 }, { "epoch": 0.02, "grad_norm": 0.7391319260856145, "learning_rate": 1.2925430210325049e-05, "loss": 0.3138, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.8080684350177465, "learning_rate": 1.2963671128107076e-05, "loss": 0.3086, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.7345914153185755, "learning_rate": 1.3001912045889102e-05, "loss": 0.2706, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.9255928748149223, "learning_rate": 1.3040152963671129e-05, "loss": 0.7527, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.2808433824506942, "learning_rate": 1.3078393881453155e-05, "loss": 0.4384, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.7058183166356123, "learning_rate": 1.3116634799235182e-05, "loss": 0.3182, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.226462284510022, "learning_rate": 1.315487571701721e-05, "loss": 0.7395, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.6742925329478396, "learning_rate": 1.3193116634799237e-05, "loss": 0.2795, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.7439587116812453, "learning_rate": 1.3231357552581263e-05, "loss": 0.2749, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.4361449975745808, "learning_rate": 1.326959847036329e-05, "loss": 0.7106, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.2439492090019573, "learning_rate": 1.3307839388145316e-05, "loss": 0.589, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.7435609340200998, "learning_rate": 1.3346080305927343e-05, "loss": 0.2519, "step": 349 }, { "epoch": 0.02, "grad_norm": 0.7689761834441273, "learning_rate": 1.3384321223709371e-05, "loss": 0.3878, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.604279488549852, "learning_rate": 1.3422562141491398e-05, "loss": 0.2728, "step": 351 }, { "epoch": 0.02, "grad_norm": 0.6789890550766914, "learning_rate": 1.3460803059273424e-05, "loss": 0.3179, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.115636152118617, "learning_rate": 1.349904397705545e-05, "loss": 0.4138, "step": 353 }, { "epoch": 0.02, "grad_norm": 0.6232254061695315, "learning_rate": 1.3537284894837477e-05, "loss": 0.3321, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.6478378710624862, "learning_rate": 1.3575525812619504e-05, "loss": 0.2918, "step": 355 }, { "epoch": 0.02, "grad_norm": 0.6760264548742129, "learning_rate": 1.361376673040153e-05, "loss": 0.2258, "step": 356 }, { "epoch": 0.02, "grad_norm": 0.8021332779117495, "learning_rate": 1.3652007648183559e-05, "loss": 0.4408, "step": 357 }, { "epoch": 0.02, "grad_norm": 0.6232210579121872, "learning_rate": 1.3690248565965585e-05, "loss": 0.3153, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.737205410962497, "learning_rate": 1.3728489483747612e-05, "loss": 0.5192, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.882470126979341, "learning_rate": 1.3766730401529638e-05, "loss": 0.428, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.6041133303431246, "learning_rate": 1.3804971319311665e-05, "loss": 0.3065, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.8041421990760149, "learning_rate": 1.3843212237093691e-05, "loss": 0.3698, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.7265607031963566, "learning_rate": 1.388145315487572e-05, "loss": 0.7857, "step": 363 }, { "epoch": 0.02, "grad_norm": 0.8204545366840523, "learning_rate": 1.3919694072657746e-05, "loss": 0.3723, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.8724560011222986, "learning_rate": 1.3957934990439773e-05, "loss": 0.579, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.49392242007190124, "learning_rate": 1.39961759082218e-05, "loss": 0.2412, "step": 366 }, { "epoch": 0.02, "grad_norm": 0.6500233155836509, "learning_rate": 1.4034416826003826e-05, "loss": 0.3158, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.0068899962526328, "learning_rate": 1.407265774378585e-05, "loss": 0.3929, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.1908103503569025, "learning_rate": 1.4110898661567877e-05, "loss": 0.3623, "step": 369 }, { "epoch": 0.02, "grad_norm": 0.6938542623801782, "learning_rate": 1.4149139579349907e-05, "loss": 0.3063, "step": 370 }, { "epoch": 0.02, "grad_norm": 2.1507116005912006, "learning_rate": 1.4187380497131934e-05, "loss": 0.5147, "step": 371 }, { "epoch": 0.02, "grad_norm": 0.7129151557497264, "learning_rate": 1.4225621414913958e-05, "loss": 0.1442, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.5561459063539664, "learning_rate": 1.4263862332695985e-05, "loss": 0.3316, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.7795311581768043, "learning_rate": 1.4302103250478012e-05, "loss": 0.3971, "step": 374 }, { "epoch": 0.02, "grad_norm": 2.1573702444835594, "learning_rate": 1.4340344168260038e-05, "loss": 0.5285, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.8271672714492758, "learning_rate": 1.4378585086042068e-05, "loss": 0.346, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.22345199872424, "learning_rate": 1.4416826003824093e-05, "loss": 0.4845, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.6311654742572094, "learning_rate": 1.445506692160612e-05, "loss": 0.3056, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.48625752015949253, "learning_rate": 1.4493307839388146e-05, "loss": 0.2126, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.9799198948538195, "learning_rate": 1.4531548757170173e-05, "loss": 0.7895, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.8516318849459286, "learning_rate": 1.4569789674952199e-05, "loss": 0.4181, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.014567135790311, "learning_rate": 1.4608030592734226e-05, "loss": 0.3468, "step": 382 }, { "epoch": 0.02, "grad_norm": 0.8579154771118827, "learning_rate": 1.4646271510516254e-05, "loss": 0.4599, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.6148793445042039, "learning_rate": 1.468451242829828e-05, "loss": 0.234, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.5488181146615484, "learning_rate": 1.4722753346080307e-05, "loss": 0.2141, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.6799560781486946, "learning_rate": 1.4760994263862334e-05, "loss": 0.3486, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.3813615427326338, "learning_rate": 1.479923518164436e-05, "loss": 0.5956, "step": 387 }, { "epoch": 0.02, "grad_norm": 0.6414359509532044, "learning_rate": 1.4837476099426387e-05, "loss": 0.2277, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.586932100761519, "learning_rate": 1.4875717017208415e-05, "loss": 0.3545, "step": 389 }, { "epoch": 0.02, "grad_norm": 0.7838674501697059, "learning_rate": 1.4913957934990441e-05, "loss": 0.2907, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.5658830728766918, "learning_rate": 1.4952198852772468e-05, "loss": 0.2235, "step": 391 }, { "epoch": 0.02, "grad_norm": 0.8068619501663652, "learning_rate": 1.4990439770554495e-05, "loss": 0.344, "step": 392 }, { "epoch": 0.02, "grad_norm": 0.6596708569982442, "learning_rate": 1.5028680688336521e-05, "loss": 0.3774, "step": 393 }, { "epoch": 0.02, "grad_norm": 0.8793793597200517, "learning_rate": 1.5066921606118548e-05, "loss": 0.4466, "step": 394 }, { "epoch": 0.02, "grad_norm": 1.0600744438699607, "learning_rate": 1.5105162523900574e-05, "loss": 0.3287, "step": 395 }, { "epoch": 0.02, "grad_norm": 0.7919519835895109, "learning_rate": 1.5143403441682602e-05, "loss": 0.3511, "step": 396 }, { "epoch": 0.02, "grad_norm": 0.5578491432818139, "learning_rate": 1.5181644359464629e-05, "loss": 0.2575, "step": 397 }, { "epoch": 0.02, "grad_norm": 0.8105475118217892, "learning_rate": 1.5219885277246656e-05, "loss": 0.3245, "step": 398 }, { "epoch": 0.02, "grad_norm": 0.7809252479896669, "learning_rate": 1.5258126195028682e-05, "loss": 0.5919, "step": 399 }, { "epoch": 0.02, "grad_norm": 0.6766528930681027, "learning_rate": 1.529636711281071e-05, "loss": 0.3405, "step": 400 }, { "epoch": 0.02, "grad_norm": 0.6551308795259978, "learning_rate": 1.5334608030592735e-05, "loss": 0.3199, "step": 401 }, { "epoch": 0.02, "grad_norm": 0.6205927231666513, "learning_rate": 1.5372848948374762e-05, "loss": 0.2926, "step": 402 }, { "epoch": 0.02, "grad_norm": 0.6434102165393524, "learning_rate": 1.541108986615679e-05, "loss": 0.2168, "step": 403 }, { "epoch": 0.02, "grad_norm": 0.7035727497301221, "learning_rate": 1.5449330783938815e-05, "loss": 0.3766, "step": 404 }, { "epoch": 0.02, "grad_norm": 0.6351981901932477, "learning_rate": 1.548757170172084e-05, "loss": 0.4042, "step": 405 }, { "epoch": 0.02, "grad_norm": 1.0733492412828631, "learning_rate": 1.5525812619502868e-05, "loss": 0.3453, "step": 406 }, { "epoch": 0.02, "grad_norm": 0.6931124633556552, "learning_rate": 1.5564053537284895e-05, "loss": 0.4334, "step": 407 }, { "epoch": 0.02, "grad_norm": 1.344580603509804, "learning_rate": 1.560229445506692e-05, "loss": 0.3942, "step": 408 }, { "epoch": 0.02, "grad_norm": 0.48351949036648106, "learning_rate": 1.564053537284895e-05, "loss": 0.2785, "step": 409 }, { "epoch": 0.02, "grad_norm": 0.5724541069062071, "learning_rate": 1.5678776290630978e-05, "loss": 0.3334, "step": 410 }, { "epoch": 0.02, "grad_norm": 1.0690880110758512, "learning_rate": 1.5717017208413004e-05, "loss": 0.5032, "step": 411 }, { "epoch": 0.02, "grad_norm": 0.8955745333838802, "learning_rate": 1.575525812619503e-05, "loss": 0.4174, "step": 412 }, { "epoch": 0.02, "grad_norm": 0.6489343273710856, "learning_rate": 1.5793499043977057e-05, "loss": 0.354, "step": 413 }, { "epoch": 0.02, "grad_norm": 2.00140719177433, "learning_rate": 1.5831739961759084e-05, "loss": 0.6871, "step": 414 }, { "epoch": 0.02, "grad_norm": 0.9468441119751057, "learning_rate": 1.586998087954111e-05, "loss": 0.3439, "step": 415 }, { "epoch": 0.02, "grad_norm": 0.6811167520988824, "learning_rate": 1.5908221797323137e-05, "loss": 0.4035, "step": 416 }, { "epoch": 0.02, "grad_norm": 0.5067638312443203, "learning_rate": 1.5946462715105163e-05, "loss": 0.2935, "step": 417 }, { "epoch": 0.02, "grad_norm": 0.7214352543145547, "learning_rate": 1.598470363288719e-05, "loss": 0.2881, "step": 418 }, { "epoch": 0.02, "grad_norm": 1.0508862135463248, "learning_rate": 1.6022944550669217e-05, "loss": 0.4995, "step": 419 }, { "epoch": 0.02, "grad_norm": 1.8285100746936782, "learning_rate": 1.6061185468451243e-05, "loss": 0.6615, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.5708789432768756, "learning_rate": 1.609942638623327e-05, "loss": 0.3237, "step": 421 }, { "epoch": 0.02, "grad_norm": 0.7903225965702565, "learning_rate": 1.61376673040153e-05, "loss": 0.3891, "step": 422 }, { "epoch": 0.02, "grad_norm": 0.5582795431672651, "learning_rate": 1.6175908221797326e-05, "loss": 0.2004, "step": 423 }, { "epoch": 0.02, "grad_norm": 0.8407191661050721, "learning_rate": 1.6214149139579353e-05, "loss": 0.312, "step": 424 }, { "epoch": 0.02, "grad_norm": 0.6198087613324328, "learning_rate": 1.625239005736138e-05, "loss": 0.3276, "step": 425 }, { "epoch": 0.02, "grad_norm": 1.8207898222982781, "learning_rate": 1.6290630975143406e-05, "loss": 0.7767, "step": 426 }, { "epoch": 0.02, "grad_norm": 1.330115693764209, "learning_rate": 1.6328871892925432e-05, "loss": 0.5159, "step": 427 }, { "epoch": 0.02, "grad_norm": 0.6387056248865568, "learning_rate": 1.636711281070746e-05, "loss": 0.3419, "step": 428 }, { "epoch": 0.02, "grad_norm": 0.6215273109764146, "learning_rate": 1.6405353728489485e-05, "loss": 0.3152, "step": 429 }, { "epoch": 0.02, "grad_norm": 0.9514248403782344, "learning_rate": 1.6443594646271512e-05, "loss": 0.2815, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.9310589290777825, "learning_rate": 1.648183556405354e-05, "loss": 0.3134, "step": 431 }, { "epoch": 0.02, "grad_norm": 1.858149168694772, "learning_rate": 1.6520076481835565e-05, "loss": 0.5817, "step": 432 }, { "epoch": 0.02, "grad_norm": 0.7691230463430312, "learning_rate": 1.655831739961759e-05, "loss": 0.3825, "step": 433 }, { "epoch": 0.02, "grad_norm": 0.7411479882760116, "learning_rate": 1.6596558317399618e-05, "loss": 0.2704, "step": 434 }, { "epoch": 0.02, "grad_norm": 1.1171597084891796, "learning_rate": 1.6634799235181648e-05, "loss": 0.1792, "step": 435 }, { "epoch": 0.03, "grad_norm": 1.0198428305122083, "learning_rate": 1.6673040152963675e-05, "loss": 0.4587, "step": 436 }, { "epoch": 0.03, "grad_norm": 0.8646954669117713, "learning_rate": 1.67112810707457e-05, "loss": 0.3177, "step": 437 }, { "epoch": 0.03, "grad_norm": 2.2919448984679605, "learning_rate": 1.6749521988527724e-05, "loss": 0.6591, "step": 438 }, { "epoch": 0.03, "grad_norm": 1.385420612343266, "learning_rate": 1.678776290630975e-05, "loss": 0.5364, "step": 439 }, { "epoch": 0.03, "grad_norm": 0.8656076895746075, "learning_rate": 1.6826003824091778e-05, "loss": 0.3426, "step": 440 }, { "epoch": 0.03, "grad_norm": 0.8693095612262008, "learning_rate": 1.6864244741873804e-05, "loss": 0.2902, "step": 441 }, { "epoch": 0.03, "grad_norm": 2.203916729584415, "learning_rate": 1.6902485659655834e-05, "loss": 0.9356, "step": 442 }, { "epoch": 0.03, "grad_norm": 0.6401954440906067, "learning_rate": 1.694072657743786e-05, "loss": 0.2328, "step": 443 }, { "epoch": 0.03, "grad_norm": 1.2732766268188778, "learning_rate": 1.6978967495219887e-05, "loss": 0.4489, "step": 444 }, { "epoch": 0.03, "grad_norm": 1.0743702431986437, "learning_rate": 1.7017208413001914e-05, "loss": 0.4417, "step": 445 }, { "epoch": 0.03, "grad_norm": 0.7493088642138847, "learning_rate": 1.705544933078394e-05, "loss": 0.3438, "step": 446 }, { "epoch": 0.03, "grad_norm": 1.872577130088869, "learning_rate": 1.7093690248565967e-05, "loss": 0.5092, "step": 447 }, { "epoch": 0.03, "grad_norm": 1.6800381906510835, "learning_rate": 1.7131931166347993e-05, "loss": 0.3587, "step": 448 }, { "epoch": 0.03, "grad_norm": 0.6966844101543027, "learning_rate": 1.717017208413002e-05, "loss": 0.3088, "step": 449 }, { "epoch": 0.03, "grad_norm": 1.2041520310637208, "learning_rate": 1.7208413001912046e-05, "loss": 0.5373, "step": 450 }, { "epoch": 0.03, "grad_norm": 1.1907325057349623, "learning_rate": 1.7246653919694073e-05, "loss": 0.3502, "step": 451 }, { "epoch": 0.03, "grad_norm": 0.9197893826285918, "learning_rate": 1.72848948374761e-05, "loss": 0.3349, "step": 452 }, { "epoch": 0.03, "grad_norm": 0.8202204456029969, "learning_rate": 1.7323135755258126e-05, "loss": 0.391, "step": 453 }, { "epoch": 0.03, "grad_norm": 1.0635885255387458, "learning_rate": 1.7361376673040153e-05, "loss": 0.4402, "step": 454 }, { "epoch": 0.03, "grad_norm": 0.7312803167255396, "learning_rate": 1.7399617590822183e-05, "loss": 0.341, "step": 455 }, { "epoch": 0.03, "grad_norm": 0.7672739780107467, "learning_rate": 1.743785850860421e-05, "loss": 0.3814, "step": 456 }, { "epoch": 0.03, "grad_norm": 0.7107829663539708, "learning_rate": 1.7476099426386236e-05, "loss": 0.2131, "step": 457 }, { "epoch": 0.03, "grad_norm": 0.7640513015983967, "learning_rate": 1.7514340344168262e-05, "loss": 0.3358, "step": 458 }, { "epoch": 0.03, "grad_norm": 1.4799590833631586, "learning_rate": 1.755258126195029e-05, "loss": 0.7856, "step": 459 }, { "epoch": 0.03, "grad_norm": 0.6436605240448509, "learning_rate": 1.7590822179732315e-05, "loss": 0.3497, "step": 460 }, { "epoch": 0.03, "grad_norm": 0.7464987117639431, "learning_rate": 1.7629063097514342e-05, "loss": 0.3435, "step": 461 }, { "epoch": 0.03, "grad_norm": 0.7696930422997337, "learning_rate": 1.766730401529637e-05, "loss": 0.4322, "step": 462 }, { "epoch": 0.03, "grad_norm": 0.6025265599658696, "learning_rate": 1.7705544933078395e-05, "loss": 0.1584, "step": 463 }, { "epoch": 0.03, "grad_norm": 0.6378754667711887, "learning_rate": 1.774378585086042e-05, "loss": 0.3131, "step": 464 }, { "epoch": 0.03, "grad_norm": 0.6729866894407964, "learning_rate": 1.7782026768642448e-05, "loss": 0.3856, "step": 465 }, { "epoch": 0.03, "grad_norm": 0.935070447059843, "learning_rate": 1.7820267686424475e-05, "loss": 0.6001, "step": 466 }, { "epoch": 0.03, "grad_norm": 0.5333343068447588, "learning_rate": 1.78585086042065e-05, "loss": 0.3228, "step": 467 }, { "epoch": 0.03, "grad_norm": 0.9091879424576893, "learning_rate": 1.789674952198853e-05, "loss": 0.4292, "step": 468 }, { "epoch": 0.03, "grad_norm": 0.5387821924240094, "learning_rate": 1.7934990439770558e-05, "loss": 0.2639, "step": 469 }, { "epoch": 0.03, "grad_norm": 0.5311447953582863, "learning_rate": 1.7973231357552584e-05, "loss": 0.2444, "step": 470 }, { "epoch": 0.03, "grad_norm": 1.395307363682624, "learning_rate": 1.801147227533461e-05, "loss": 0.7412, "step": 471 }, { "epoch": 0.03, "grad_norm": 0.6504582290964441, "learning_rate": 1.8049713193116637e-05, "loss": 0.4336, "step": 472 }, { "epoch": 0.03, "grad_norm": 0.5715139902093684, "learning_rate": 1.8087954110898664e-05, "loss": 0.2398, "step": 473 }, { "epoch": 0.03, "grad_norm": 1.125542602171137, "learning_rate": 1.812619502868069e-05, "loss": 0.6667, "step": 474 }, { "epoch": 0.03, "grad_norm": 0.4982371633361562, "learning_rate": 1.8164435946462717e-05, "loss": 0.237, "step": 475 }, { "epoch": 0.03, "grad_norm": 0.5691266935810649, "learning_rate": 1.8202676864244744e-05, "loss": 0.2451, "step": 476 }, { "epoch": 0.03, "grad_norm": 0.7030820342200949, "learning_rate": 1.824091778202677e-05, "loss": 0.4042, "step": 477 }, { "epoch": 0.03, "grad_norm": 1.3941896853612525, "learning_rate": 1.8279158699808797e-05, "loss": 0.5841, "step": 478 }, { "epoch": 0.03, "grad_norm": 0.6743046891263709, "learning_rate": 1.8317399617590823e-05, "loss": 0.3182, "step": 479 }, { "epoch": 0.03, "grad_norm": 0.750355710951829, "learning_rate": 1.835564053537285e-05, "loss": 0.3391, "step": 480 }, { "epoch": 0.03, "grad_norm": 0.46562694341915173, "learning_rate": 1.839388145315488e-05, "loss": 0.2027, "step": 481 }, { "epoch": 0.03, "grad_norm": 0.6383543781218285, "learning_rate": 1.8432122370936906e-05, "loss": 0.3138, "step": 482 }, { "epoch": 0.03, "grad_norm": 2.128120166212137, "learning_rate": 1.847036328871893e-05, "loss": 0.5683, "step": 483 }, { "epoch": 0.03, "grad_norm": 0.7605870557660157, "learning_rate": 1.8508604206500956e-05, "loss": 0.4408, "step": 484 }, { "epoch": 0.03, "grad_norm": 0.6354556583007445, "learning_rate": 1.8546845124282983e-05, "loss": 0.3147, "step": 485 }, { "epoch": 0.03, "grad_norm": 2.9816014322679436, "learning_rate": 1.858508604206501e-05, "loss": 0.5888, "step": 486 }, { "epoch": 0.03, "grad_norm": 0.5289682974012722, "learning_rate": 1.8623326959847036e-05, "loss": 0.2015, "step": 487 }, { "epoch": 0.03, "grad_norm": 0.6236586560186679, "learning_rate": 1.8661567877629066e-05, "loss": 0.3256, "step": 488 }, { "epoch": 0.03, "grad_norm": 0.6494916866609775, "learning_rate": 1.8699808795411092e-05, "loss": 0.3929, "step": 489 }, { "epoch": 0.03, "grad_norm": 1.1871808128906545, "learning_rate": 1.873804971319312e-05, "loss": 0.6403, "step": 490 }, { "epoch": 0.03, "grad_norm": 0.7214128916285852, "learning_rate": 1.8776290630975145e-05, "loss": 0.3347, "step": 491 }, { "epoch": 0.03, "grad_norm": 0.6602266191900171, "learning_rate": 1.8814531548757172e-05, "loss": 0.3601, "step": 492 }, { "epoch": 0.03, "grad_norm": 0.694604817799548, "learning_rate": 1.88527724665392e-05, "loss": 0.1621, "step": 493 }, { "epoch": 0.03, "grad_norm": 0.7076032930955318, "learning_rate": 1.8891013384321225e-05, "loss": 0.3279, "step": 494 }, { "epoch": 0.03, "grad_norm": 0.8666075990621492, "learning_rate": 1.892925430210325e-05, "loss": 0.4989, "step": 495 }, { "epoch": 0.03, "grad_norm": 0.7678193345063739, "learning_rate": 1.8967495219885278e-05, "loss": 0.3314, "step": 496 }, { "epoch": 0.03, "grad_norm": 0.555716909316726, "learning_rate": 1.9005736137667305e-05, "loss": 0.3502, "step": 497 }, { "epoch": 0.03, "grad_norm": 0.8106958346923805, "learning_rate": 1.904397705544933e-05, "loss": 0.4847, "step": 498 }, { "epoch": 0.03, "grad_norm": 0.5347451642599353, "learning_rate": 1.9082217973231358e-05, "loss": 0.1286, "step": 499 }, { "epoch": 0.03, "grad_norm": 0.6613152718234083, "learning_rate": 1.9120458891013384e-05, "loss": 0.3141, "step": 500 }, { "epoch": 0.03, "grad_norm": 0.9116473642098698, "learning_rate": 1.9158699808795414e-05, "loss": 0.4658, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.199682601869777, "learning_rate": 1.919694072657744e-05, "loss": 0.6147, "step": 502 }, { "epoch": 0.03, "grad_norm": 0.7314781117930744, "learning_rate": 1.9235181644359467e-05, "loss": 0.2652, "step": 503 }, { "epoch": 0.03, "grad_norm": 0.8325395903217065, "learning_rate": 1.9273422562141494e-05, "loss": 0.3995, "step": 504 }, { "epoch": 0.03, "grad_norm": 0.5062483839186255, "learning_rate": 1.931166347992352e-05, "loss": 0.3077, "step": 505 }, { "epoch": 0.03, "grad_norm": 0.5967051191367326, "learning_rate": 1.9349904397705547e-05, "loss": 0.3258, "step": 506 }, { "epoch": 0.03, "grad_norm": 0.8272368056777742, "learning_rate": 1.9388145315487573e-05, "loss": 0.3995, "step": 507 }, { "epoch": 0.03, "grad_norm": 0.9304286527087062, "learning_rate": 1.94263862332696e-05, "loss": 0.3921, "step": 508 }, { "epoch": 0.03, "grad_norm": 0.5272587543969978, "learning_rate": 1.9464627151051627e-05, "loss": 0.2309, "step": 509 }, { "epoch": 0.03, "grad_norm": 0.8486834700401805, "learning_rate": 1.9502868068833653e-05, "loss": 0.5258, "step": 510 }, { "epoch": 0.03, "grad_norm": 0.6631668030453906, "learning_rate": 1.954110898661568e-05, "loss": 0.4544, "step": 511 }, { "epoch": 0.03, "grad_norm": 0.763621465095935, "learning_rate": 1.9579349904397706e-05, "loss": 0.2702, "step": 512 }, { "epoch": 0.03, "grad_norm": 0.5529369486422537, "learning_rate": 1.9617590822179733e-05, "loss": 0.3651, "step": 513 }, { "epoch": 0.03, "grad_norm": 0.648527669233175, "learning_rate": 1.9655831739961763e-05, "loss": 0.3392, "step": 514 }, { "epoch": 0.03, "grad_norm": 0.8333363603111317, "learning_rate": 1.969407265774379e-05, "loss": 0.4271, "step": 515 }, { "epoch": 0.03, "grad_norm": 0.9578624303870017, "learning_rate": 1.9732313575525816e-05, "loss": 0.3442, "step": 516 }, { "epoch": 0.03, "grad_norm": 0.9338089452068773, "learning_rate": 1.9770554493307842e-05, "loss": 0.5805, "step": 517 }, { "epoch": 0.03, "grad_norm": 0.7261028256374711, "learning_rate": 1.980879541108987e-05, "loss": 0.4576, "step": 518 }, { "epoch": 0.03, "grad_norm": 0.6206482079476885, "learning_rate": 1.9847036328871892e-05, "loss": 0.2308, "step": 519 }, { "epoch": 0.03, "grad_norm": 0.5731488669023147, "learning_rate": 1.9885277246653922e-05, "loss": 0.3147, "step": 520 }, { "epoch": 0.03, "grad_norm": 0.5715431597204902, "learning_rate": 1.992351816443595e-05, "loss": 0.3604, "step": 521 }, { "epoch": 0.03, "grad_norm": 0.7894908009717267, "learning_rate": 1.9961759082217975e-05, "loss": 0.3063, "step": 522 }, { "epoch": 0.03, "grad_norm": 0.8229915510438305, "learning_rate": 2e-05, "loss": 0.4539, "step": 523 }, { "epoch": 0.03, "grad_norm": 0.6027096957912251, "learning_rate": 1.9999999826850218e-05, "loss": 0.3497, "step": 524 }, { "epoch": 0.03, "grad_norm": 0.9160865998931952, "learning_rate": 1.999999930740087e-05, "loss": 0.2652, "step": 525 }, { "epoch": 0.03, "grad_norm": 0.4656131298936089, "learning_rate": 1.9999998441651974e-05, "loss": 0.224, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.0868401681216302, "learning_rate": 1.999999722960356e-05, "loss": 0.5024, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.0309672847824662, "learning_rate": 1.9999995671255675e-05, "loss": 0.3227, "step": 528 }, { "epoch": 0.03, "grad_norm": 0.9537024074650399, "learning_rate": 1.999999376660837e-05, "loss": 0.5217, "step": 529 }, { "epoch": 0.03, "grad_norm": 0.7989227071893219, "learning_rate": 1.9999991515661712e-05, "loss": 0.3248, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.5587548459372644, "learning_rate": 1.9999988918415777e-05, "loss": 0.2575, "step": 531 }, { "epoch": 0.03, "grad_norm": 0.6038947163659312, "learning_rate": 1.9999985974870653e-05, "loss": 0.2639, "step": 532 }, { "epoch": 0.03, "grad_norm": 0.9005065511749109, "learning_rate": 1.999998268502645e-05, "loss": 0.424, "step": 533 }, { "epoch": 0.03, "grad_norm": 0.7791223727687504, "learning_rate": 1.9999979048883275e-05, "loss": 0.3973, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.049952558614156, "learning_rate": 1.999997506644125e-05, "loss": 0.402, "step": 535 }, { "epoch": 0.03, "grad_norm": 0.654247607069547, "learning_rate": 1.9999970737700526e-05, "loss": 0.3599, "step": 536 }, { "epoch": 0.03, "grad_norm": 0.794701933156306, "learning_rate": 1.999996606266124e-05, "loss": 0.3839, "step": 537 }, { "epoch": 0.03, "grad_norm": 0.7277135121566177, "learning_rate": 1.999996104132356e-05, "loss": 0.235, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.4981718868298558, "learning_rate": 1.9999955673687663e-05, "loss": 0.2761, "step": 539 }, { "epoch": 0.03, "grad_norm": 0.7402595774332386, "learning_rate": 1.999994995975373e-05, "loss": 0.3425, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.5576818896204434, "learning_rate": 1.9999943899521955e-05, "loss": 0.587, "step": 541 }, { "epoch": 0.03, "grad_norm": 0.5357370858034539, "learning_rate": 1.9999937492992558e-05, "loss": 0.2118, "step": 542 }, { "epoch": 0.03, "grad_norm": 0.8512678621652553, "learning_rate": 1.9999930740165755e-05, "loss": 0.3902, "step": 543 }, { "epoch": 0.03, "grad_norm": 0.8553424623445812, "learning_rate": 1.999992364104178e-05, "loss": 0.3927, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.5979085965888817, "learning_rate": 1.9999916195620875e-05, "loss": 0.3931, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.7901713482230103, "learning_rate": 1.9999908403903307e-05, "loss": 0.3896, "step": 546 }, { "epoch": 0.03, "grad_norm": 0.394859846631288, "learning_rate": 1.999990026588934e-05, "loss": 0.2883, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.7640357579682913, "learning_rate": 1.9999891781579256e-05, "loss": 0.3455, "step": 548 }, { "epoch": 0.03, "grad_norm": 0.6769055305092657, "learning_rate": 1.9999882950973352e-05, "loss": 0.3481, "step": 549 }, { "epoch": 0.03, "grad_norm": 1.9577307589064314, "learning_rate": 1.999987377407193e-05, "loss": 0.7917, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.9439456813870318, "learning_rate": 1.9999864250875305e-05, "loss": 0.2947, "step": 551 }, { "epoch": 0.03, "grad_norm": 0.8175729706645952, "learning_rate": 1.9999854381383817e-05, "loss": 0.3907, "step": 552 }, { "epoch": 0.03, "grad_norm": 0.4732231542439032, "learning_rate": 1.99998441655978e-05, "loss": 0.2284, "step": 553 }, { "epoch": 0.03, "grad_norm": 1.238787974808822, "learning_rate": 1.999983360351761e-05, "loss": 0.5136, "step": 554 }, { "epoch": 0.03, "grad_norm": 0.7050079922045303, "learning_rate": 1.999982269514361e-05, "loss": 0.3216, "step": 555 }, { "epoch": 0.03, "grad_norm": 0.8573120183532486, "learning_rate": 1.9999811440476182e-05, "loss": 0.3806, "step": 556 }, { "epoch": 0.03, "grad_norm": 1.2215743690492187, "learning_rate": 1.999979983951571e-05, "loss": 0.5398, "step": 557 }, { "epoch": 0.03, "grad_norm": 0.6446022868528241, "learning_rate": 1.9999787892262605e-05, "loss": 0.3098, "step": 558 }, { "epoch": 0.03, "grad_norm": 0.5002653329984211, "learning_rate": 1.9999775598717276e-05, "loss": 0.2903, "step": 559 }, { "epoch": 0.03, "grad_norm": 0.5895344073161108, "learning_rate": 1.9999762958880145e-05, "loss": 0.2155, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.8267429050787638, "learning_rate": 1.9999749972751653e-05, "loss": 0.3351, "step": 561 }, { "epoch": 0.03, "grad_norm": 1.353467415655348, "learning_rate": 1.999973664033225e-05, "loss": 0.5793, "step": 562 }, { "epoch": 0.03, "grad_norm": 0.7361197465689773, "learning_rate": 1.9999722961622395e-05, "loss": 0.3886, "step": 563 }, { "epoch": 0.03, "grad_norm": 0.5447246086308706, "learning_rate": 1.9999708936622564e-05, "loss": 0.2858, "step": 564 }, { "epoch": 0.03, "grad_norm": 0.5984252294712458, "learning_rate": 1.9999694565333246e-05, "loss": 0.1902, "step": 565 }, { "epoch": 0.03, "grad_norm": 1.482940990942458, "learning_rate": 1.999967984775493e-05, "loss": 0.724, "step": 566 }, { "epoch": 0.03, "grad_norm": 0.6741795289929788, "learning_rate": 1.9999664783888138e-05, "loss": 0.3405, "step": 567 }, { "epoch": 0.03, "grad_norm": 0.9182428191621349, "learning_rate": 1.9999649373733377e-05, "loss": 0.3617, "step": 568 }, { "epoch": 0.03, "grad_norm": 1.6389573056553957, "learning_rate": 1.9999633617291196e-05, "loss": 0.6102, "step": 569 }, { "epoch": 0.03, "grad_norm": 0.6475024165740241, "learning_rate": 1.9999617514562125e-05, "loss": 0.3249, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.7182517478278182, "learning_rate": 1.9999601065546733e-05, "loss": 0.3586, "step": 571 }, { "epoch": 0.03, "grad_norm": 0.6687765532978037, "learning_rate": 1.9999584270245588e-05, "loss": 0.206, "step": 572 }, { "epoch": 0.03, "grad_norm": 0.7089634046898335, "learning_rate": 1.9999567128659267e-05, "loss": 0.3192, "step": 573 }, { "epoch": 0.03, "grad_norm": 2.260767215457327, "learning_rate": 1.9999549640788368e-05, "loss": 0.5278, "step": 574 }, { "epoch": 0.03, "grad_norm": 0.6480748864054804, "learning_rate": 1.9999531806633493e-05, "loss": 0.3919, "step": 575 }, { "epoch": 0.03, "grad_norm": 0.675673550540879, "learning_rate": 1.9999513626195265e-05, "loss": 0.3158, "step": 576 }, { "epoch": 0.03, "grad_norm": 1.6538901519919962, "learning_rate": 1.9999495099474306e-05, "loss": 0.831, "step": 577 }, { "epoch": 0.03, "grad_norm": 0.5293004891675508, "learning_rate": 1.9999476226471265e-05, "loss": 0.1482, "step": 578 }, { "epoch": 0.03, "grad_norm": 0.9542678819942969, "learning_rate": 1.999945700718679e-05, "loss": 0.3594, "step": 579 }, { "epoch": 0.03, "grad_norm": 1.3111617705092098, "learning_rate": 1.9999437441621547e-05, "loss": 0.4239, "step": 580 }, { "epoch": 0.03, "grad_norm": 2.0926709671371593, "learning_rate": 1.9999417529776218e-05, "loss": 0.5737, "step": 581 }, { "epoch": 0.03, "grad_norm": 0.6708490869863095, "learning_rate": 1.999939727165149e-05, "loss": 0.3331, "step": 582 }, { "epoch": 0.03, "grad_norm": 0.7622221883483998, "learning_rate": 1.9999376667248068e-05, "loss": 0.3782, "step": 583 }, { "epoch": 0.03, "grad_norm": 0.7441804908580606, "learning_rate": 1.9999355716566655e-05, "loss": 0.1226, "step": 584 }, { "epoch": 0.03, "grad_norm": 0.899774384946839, "learning_rate": 1.999933441960799e-05, "loss": 0.3422, "step": 585 }, { "epoch": 0.03, "grad_norm": 3.4082682727776334, "learning_rate": 1.9999312776372798e-05, "loss": 0.6145, "step": 586 }, { "epoch": 0.03, "grad_norm": 0.9534436469990148, "learning_rate": 1.9999290786861837e-05, "loss": 0.3507, "step": 587 }, { "epoch": 0.03, "grad_norm": 0.7050693202744416, "learning_rate": 1.999926845107587e-05, "loss": 0.3421, "step": 588 }, { "epoch": 0.03, "grad_norm": 1.7025023466205644, "learning_rate": 1.9999245769015663e-05, "loss": 0.7674, "step": 589 }, { "epoch": 0.03, "grad_norm": 0.7588867665045335, "learning_rate": 1.9999222740682004e-05, "loss": 0.2988, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.8053056561388221, "learning_rate": 1.9999199366075694e-05, "loss": 0.279, "step": 591 }, { "epoch": 0.03, "grad_norm": 2.7201821145047025, "learning_rate": 1.9999175645197537e-05, "loss": 0.6306, "step": 592 }, { "epoch": 0.03, "grad_norm": 1.6922097540301058, "learning_rate": 1.9999151578048357e-05, "loss": 0.806, "step": 593 }, { "epoch": 0.03, "grad_norm": 0.5154629765050703, "learning_rate": 1.9999127164628992e-05, "loss": 0.2397, "step": 594 }, { "epoch": 0.03, "grad_norm": 0.6208505950553166, "learning_rate": 1.999910240494028e-05, "loss": 0.3727, "step": 595 }, { "epoch": 0.03, "grad_norm": 0.6884024496186021, "learning_rate": 1.9999077298983084e-05, "loss": 0.2778, "step": 596 }, { "epoch": 0.03, "grad_norm": 0.7202762821630867, "learning_rate": 1.9999051846758267e-05, "loss": 0.2434, "step": 597 }, { "epoch": 0.03, "grad_norm": 2.2942921881997282, "learning_rate": 1.999902604826672e-05, "loss": 0.7034, "step": 598 }, { "epoch": 0.03, "grad_norm": 0.772582124815898, "learning_rate": 1.9998999903509326e-05, "loss": 0.4107, "step": 599 }, { "epoch": 0.03, "grad_norm": 0.6564284208707677, "learning_rate": 1.9998973412487e-05, "loss": 0.2485, "step": 600 }, { "epoch": 0.03, "grad_norm": 4.5218916723217415, "learning_rate": 1.9998946575200652e-05, "loss": 0.6996, "step": 601 }, { "epoch": 0.03, "grad_norm": 1.2246507040544916, "learning_rate": 1.9998919391651214e-05, "loss": 0.5521, "step": 602 }, { "epoch": 0.03, "grad_norm": 0.6937961374682375, "learning_rate": 1.9998891861839627e-05, "loss": 0.3197, "step": 603 }, { "epoch": 0.03, "grad_norm": 1.0318409532385129, "learning_rate": 1.9998863985766845e-05, "loss": 0.2606, "step": 604 }, { "epoch": 0.03, "grad_norm": 2.722572887274754, "learning_rate": 1.9998835763433836e-05, "loss": 0.8884, "step": 605 }, { "epoch": 0.03, "grad_norm": 0.6300069061706091, "learning_rate": 1.999880719484157e-05, "loss": 0.3101, "step": 606 }, { "epoch": 0.03, "grad_norm": 0.6983482017923085, "learning_rate": 1.9998778279991042e-05, "loss": 0.3086, "step": 607 }, { "epoch": 0.03, "grad_norm": 2.0707704364945454, "learning_rate": 1.999874901888325e-05, "loss": 0.6135, "step": 608 }, { "epoch": 0.03, "grad_norm": 0.5749567826655014, "learning_rate": 1.9998719411519215e-05, "loss": 0.3335, "step": 609 }, { "epoch": 0.04, "grad_norm": 0.5182988783531453, "learning_rate": 1.9998689457899955e-05, "loss": 0.1555, "step": 610 }, { "epoch": 0.04, "grad_norm": 0.7617833054443183, "learning_rate": 1.999865915802651e-05, "loss": 0.4108, "step": 611 }, { "epoch": 0.04, "grad_norm": 1.1526576959541612, "learning_rate": 1.9998628511899925e-05, "loss": 0.4166, "step": 612 }, { "epoch": 0.04, "grad_norm": 0.6571422467528628, "learning_rate": 1.999859751952127e-05, "loss": 0.3332, "step": 613 }, { "epoch": 0.04, "grad_norm": 0.7513551490147077, "learning_rate": 1.9998566180891606e-05, "loss": 0.4341, "step": 614 }, { "epoch": 0.04, "grad_norm": 0.5806459633657569, "learning_rate": 1.9998534496012026e-05, "loss": 0.3158, "step": 615 }, { "epoch": 0.04, "grad_norm": 1.6232848400792341, "learning_rate": 1.9998502464883632e-05, "loss": 0.2476, "step": 616 }, { "epoch": 0.04, "grad_norm": 2.5384024602749533, "learning_rate": 1.9998470087507522e-05, "loss": 0.6709, "step": 617 }, { "epoch": 0.04, "grad_norm": 0.6685859795334497, "learning_rate": 1.9998437363884825e-05, "loss": 0.3515, "step": 618 }, { "epoch": 0.04, "grad_norm": 0.5741696913240106, "learning_rate": 1.999840429401667e-05, "loss": 0.3636, "step": 619 }, { "epoch": 0.04, "grad_norm": 1.5049238219312997, "learning_rate": 1.9998370877904208e-05, "loss": 0.5154, "step": 620 }, { "epoch": 0.04, "grad_norm": 0.672421234183969, "learning_rate": 1.9998337115548588e-05, "loss": 0.2922, "step": 621 }, { "epoch": 0.04, "grad_norm": 2.2798101602612184, "learning_rate": 1.999830300695099e-05, "loss": 0.3201, "step": 622 }, { "epoch": 0.04, "grad_norm": 0.6970423165330458, "learning_rate": 1.9998268552112586e-05, "loss": 0.3156, "step": 623 }, { "epoch": 0.04, "grad_norm": 0.4610523510184624, "learning_rate": 1.999823375103457e-05, "loss": 0.2628, "step": 624 }, { "epoch": 0.04, "grad_norm": 2.7809585438545255, "learning_rate": 1.9998198603718148e-05, "loss": 0.6929, "step": 625 }, { "epoch": 0.04, "grad_norm": 0.7009702901425052, "learning_rate": 1.9998163110164543e-05, "loss": 0.4038, "step": 626 }, { "epoch": 0.04, "grad_norm": 0.7154276273247365, "learning_rate": 1.9998127270374975e-05, "loss": 0.3343, "step": 627 }, { "epoch": 0.04, "grad_norm": 0.6401189026507259, "learning_rate": 1.999809108435069e-05, "loss": 0.2824, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.1197570166445445, "learning_rate": 1.9998054552092943e-05, "loss": 0.3704, "step": 629 }, { "epoch": 0.04, "grad_norm": 0.6645133821988366, "learning_rate": 1.9998017673602996e-05, "loss": 0.2726, "step": 630 }, { "epoch": 0.04, "grad_norm": 0.6906799629741556, "learning_rate": 1.9997980448882125e-05, "loss": 0.3588, "step": 631 }, { "epoch": 0.04, "grad_norm": 1.227238559775126, "learning_rate": 1.9997942877931624e-05, "loss": 0.6336, "step": 632 }, { "epoch": 0.04, "grad_norm": 0.7603717426281402, "learning_rate": 1.9997904960752794e-05, "loss": 0.2539, "step": 633 }, { "epoch": 0.04, "grad_norm": 0.7780626216278012, "learning_rate": 1.999786669734694e-05, "loss": 0.4715, "step": 634 }, { "epoch": 0.04, "grad_norm": 0.5482206816162322, "learning_rate": 1.999782808771539e-05, "loss": 0.3319, "step": 635 }, { "epoch": 0.04, "grad_norm": 0.7702010294483638, "learning_rate": 1.999778913185949e-05, "loss": 0.2772, "step": 636 }, { "epoch": 0.04, "grad_norm": 0.5466923355560228, "learning_rate": 1.9997749829780577e-05, "loss": 0.3879, "step": 637 }, { "epoch": 0.04, "grad_norm": 0.7200638224194983, "learning_rate": 1.9997710181480018e-05, "loss": 0.381, "step": 638 }, { "epoch": 0.04, "grad_norm": 0.47588589352893357, "learning_rate": 1.9997670186959187e-05, "loss": 0.288, "step": 639 }, { "epoch": 0.04, "grad_norm": 0.8159078143196873, "learning_rate": 1.999762984621947e-05, "loss": 0.4299, "step": 640 }, { "epoch": 0.04, "grad_norm": 0.764774529343326, "learning_rate": 1.9997589159262255e-05, "loss": 0.3122, "step": 641 }, { "epoch": 0.04, "grad_norm": 0.5095595388066965, "learning_rate": 1.9997548126088963e-05, "loss": 0.3581, "step": 642 }, { "epoch": 0.04, "grad_norm": 0.5936175913635908, "learning_rate": 1.9997506746701006e-05, "loss": 0.3294, "step": 643 }, { "epoch": 0.04, "grad_norm": 0.5773573972412904, "learning_rate": 1.9997465021099818e-05, "loss": 0.4026, "step": 644 }, { "epoch": 0.04, "grad_norm": 0.5790983443211878, "learning_rate": 1.9997422949286852e-05, "loss": 0.3016, "step": 645 }, { "epoch": 0.04, "grad_norm": 1.5202482266217976, "learning_rate": 1.9997380531263555e-05, "loss": 0.3345, "step": 646 }, { "epoch": 0.04, "grad_norm": 0.5326923128194373, "learning_rate": 1.99973377670314e-05, "loss": 0.4049, "step": 647 }, { "epoch": 0.04, "grad_norm": 0.6871631996017432, "learning_rate": 1.999729465659187e-05, "loss": 0.4462, "step": 648 }, { "epoch": 0.04, "grad_norm": 0.5156786584531907, "learning_rate": 1.9997251199946456e-05, "loss": 0.2645, "step": 649 }, { "epoch": 0.04, "grad_norm": 0.523957794870545, "learning_rate": 1.999720739709666e-05, "loss": 0.3247, "step": 650 }, { "epoch": 0.04, "grad_norm": 1.1519934314835216, "learning_rate": 1.9997163248044008e-05, "loss": 0.4499, "step": 651 }, { "epoch": 0.04, "grad_norm": 0.714868485827706, "learning_rate": 1.9997118752790016e-05, "loss": 0.2821, "step": 652 }, { "epoch": 0.04, "grad_norm": 0.732881875664929, "learning_rate": 1.9997073911336234e-05, "loss": 0.5607, "step": 653 }, { "epoch": 0.04, "grad_norm": 0.6503986849828309, "learning_rate": 1.9997028723684213e-05, "loss": 0.3507, "step": 654 }, { "epoch": 0.04, "grad_norm": 0.5726787275180806, "learning_rate": 1.999698318983552e-05, "loss": 0.3602, "step": 655 }, { "epoch": 0.04, "grad_norm": 0.5050293657924345, "learning_rate": 1.9996937309791722e-05, "loss": 0.1513, "step": 656 }, { "epoch": 0.04, "grad_norm": 0.8105239826073464, "learning_rate": 1.999689108355442e-05, "loss": 0.4691, "step": 657 }, { "epoch": 0.04, "grad_norm": 0.5513704345716917, "learning_rate": 1.9996844511125205e-05, "loss": 0.3814, "step": 658 }, { "epoch": 0.04, "grad_norm": 0.6100721289861497, "learning_rate": 1.9996797592505703e-05, "loss": 0.4041, "step": 659 }, { "epoch": 0.04, "grad_norm": 0.6755649661583865, "learning_rate": 1.9996750327697523e-05, "loss": 0.4135, "step": 660 }, { "epoch": 0.04, "grad_norm": 0.5909747328001143, "learning_rate": 1.999670271670231e-05, "loss": 0.3755, "step": 661 }, { "epoch": 0.04, "grad_norm": 0.5530198259517983, "learning_rate": 1.9996654759521713e-05, "loss": 0.2282, "step": 662 }, { "epoch": 0.04, "grad_norm": 1.0659525428191399, "learning_rate": 1.999660645615739e-05, "loss": 0.2963, "step": 663 }, { "epoch": 0.04, "grad_norm": 0.6884011137041578, "learning_rate": 1.9996557806611017e-05, "loss": 0.375, "step": 664 }, { "epoch": 0.04, "grad_norm": 1.2005303148264228, "learning_rate": 1.9996508810884277e-05, "loss": 0.5657, "step": 665 }, { "epoch": 0.04, "grad_norm": 0.7004073880089609, "learning_rate": 1.9996459468978865e-05, "loss": 0.2783, "step": 666 }, { "epoch": 0.04, "grad_norm": 0.6849488869877197, "learning_rate": 1.9996409780896495e-05, "loss": 0.3791, "step": 667 }, { "epoch": 0.04, "grad_norm": 0.9692201762247729, "learning_rate": 1.999635974663888e-05, "loss": 0.2391, "step": 668 }, { "epoch": 0.04, "grad_norm": 1.3591328381927894, "learning_rate": 1.999630936620776e-05, "loss": 0.3121, "step": 669 }, { "epoch": 0.04, "grad_norm": 0.6988627891771934, "learning_rate": 1.9996258639604874e-05, "loss": 0.3539, "step": 670 }, { "epoch": 0.04, "grad_norm": 0.9556786113936547, "learning_rate": 1.999620756683198e-05, "loss": 0.4806, "step": 671 }, { "epoch": 0.04, "grad_norm": 1.4342440011179771, "learning_rate": 1.999615614789085e-05, "loss": 0.3508, "step": 672 }, { "epoch": 0.04, "grad_norm": 0.6267491039851717, "learning_rate": 1.9996104382783266e-05, "loss": 0.3627, "step": 673 }, { "epoch": 0.04, "grad_norm": 0.6388301817445797, "learning_rate": 1.9996052271511012e-05, "loss": 0.2443, "step": 674 }, { "epoch": 0.04, "grad_norm": 0.6126860481430475, "learning_rate": 1.99959998140759e-05, "loss": 0.2574, "step": 675 }, { "epoch": 0.04, "grad_norm": 0.6335639107595146, "learning_rate": 1.9995947010479744e-05, "loss": 0.4149, "step": 676 }, { "epoch": 0.04, "grad_norm": 1.009173507203124, "learning_rate": 1.999589386072437e-05, "loss": 0.6275, "step": 677 }, { "epoch": 0.04, "grad_norm": 0.5334355696559099, "learning_rate": 1.9995840364811627e-05, "loss": 0.3687, "step": 678 }, { "epoch": 0.04, "grad_norm": 0.5264094885639136, "learning_rate": 1.999578652274336e-05, "loss": 0.2631, "step": 679 }, { "epoch": 0.04, "grad_norm": 0.7406091669303169, "learning_rate": 1.9995732334521432e-05, "loss": 0.2581, "step": 680 }, { "epoch": 0.04, "grad_norm": 0.9294914520950267, "learning_rate": 1.9995677800147728e-05, "loss": 0.5048, "step": 681 }, { "epoch": 0.04, "grad_norm": 0.5268582006547304, "learning_rate": 1.9995622919624127e-05, "loss": 0.2644, "step": 682 }, { "epoch": 0.04, "grad_norm": 0.848890819064537, "learning_rate": 1.9995567692952537e-05, "loss": 0.4564, "step": 683 }, { "epoch": 0.04, "grad_norm": 0.9587797278208916, "learning_rate": 1.9995512120134867e-05, "loss": 0.5373, "step": 684 }, { "epoch": 0.04, "grad_norm": 0.5326515080037058, "learning_rate": 1.9995456201173044e-05, "loss": 0.2536, "step": 685 }, { "epoch": 0.04, "grad_norm": 0.4332630514103188, "learning_rate": 1.9995399936069e-05, "loss": 0.2911, "step": 686 }, { "epoch": 0.04, "grad_norm": 0.8241672572613798, "learning_rate": 1.9995343324824686e-05, "loss": 0.3667, "step": 687 }, { "epoch": 0.04, "grad_norm": 0.5755331586063459, "learning_rate": 1.9995286367442062e-05, "loss": 0.3133, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.406266965308756, "learning_rate": 1.9995229063923104e-05, "loss": 0.8357, "step": 689 }, { "epoch": 0.04, "grad_norm": 0.5461542233249763, "learning_rate": 1.9995171414269793e-05, "loss": 0.3926, "step": 690 }, { "epoch": 0.04, "grad_norm": 0.5332452855766027, "learning_rate": 1.999511341848412e-05, "loss": 0.3142, "step": 691 }, { "epoch": 0.04, "grad_norm": 0.5558375093163075, "learning_rate": 1.9995055076568107e-05, "loss": 0.1537, "step": 692 }, { "epoch": 0.04, "grad_norm": 0.5096980092738956, "learning_rate": 1.999499638852376e-05, "loss": 0.3855, "step": 693 }, { "epoch": 0.04, "grad_norm": 0.585061570533692, "learning_rate": 1.999493735435312e-05, "loss": 0.3167, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.3571760544795732, "learning_rate": 1.999487797405823e-05, "loss": 0.5282, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.1375980474442728, "learning_rate": 1.9994818247641147e-05, "loss": 0.5273, "step": 696 }, { "epoch": 0.04, "grad_norm": 0.6087244752271485, "learning_rate": 1.9994758175103935e-05, "loss": 0.3229, "step": 697 }, { "epoch": 0.04, "grad_norm": 0.6091437037486682, "learning_rate": 1.999469775644868e-05, "loss": 0.316, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.0768666202960535, "learning_rate": 1.999463699167747e-05, "loss": 0.5091, "step": 699 }, { "epoch": 0.04, "grad_norm": 0.6652825079939793, "learning_rate": 1.999457588079241e-05, "loss": 0.3182, "step": 700 }, { "epoch": 0.04, "grad_norm": 0.5327514124177343, "learning_rate": 1.9994514423795616e-05, "loss": 0.1642, "step": 701 }, { "epoch": 0.04, "grad_norm": 0.6094186828307505, "learning_rate": 1.9994452620689218e-05, "loss": 0.3472, "step": 702 }, { "epoch": 0.04, "grad_norm": 0.5620751646923052, "learning_rate": 1.999439047147536e-05, "loss": 0.3312, "step": 703 }, { "epoch": 0.04, "grad_norm": 0.9069035528958392, "learning_rate": 1.9994327976156184e-05, "loss": 0.6049, "step": 704 }, { "epoch": 0.04, "grad_norm": 0.6506383954312657, "learning_rate": 1.9994265134733862e-05, "loss": 0.397, "step": 705 }, { "epoch": 0.04, "grad_norm": 0.45557859200370937, "learning_rate": 1.999420194721057e-05, "loss": 0.3243, "step": 706 }, { "epoch": 0.04, "grad_norm": 0.48790557123093586, "learning_rate": 1.999413841358849e-05, "loss": 0.2508, "step": 707 }, { "epoch": 0.04, "grad_norm": 1.776312246851432, "learning_rate": 1.999407453386983e-05, "loss": 0.3304, "step": 708 }, { "epoch": 0.04, "grad_norm": 0.4977790934365835, "learning_rate": 1.99940103080568e-05, "loss": 0.3172, "step": 709 }, { "epoch": 0.04, "grad_norm": 0.6252880128089249, "learning_rate": 1.999394573615162e-05, "loss": 0.3972, "step": 710 }, { "epoch": 0.04, "grad_norm": 1.1498258098687408, "learning_rate": 1.999388081815653e-05, "loss": 0.5164, "step": 711 }, { "epoch": 0.04, "grad_norm": 0.892056556618054, "learning_rate": 1.9993815554073778e-05, "loss": 0.2629, "step": 712 }, { "epoch": 0.04, "grad_norm": 1.6940538088084591, "learning_rate": 1.999374994390562e-05, "loss": 0.7976, "step": 713 }, { "epoch": 0.04, "grad_norm": 0.500147764955215, "learning_rate": 1.9993683987654333e-05, "loss": 0.2667, "step": 714 }, { "epoch": 0.04, "grad_norm": 0.560974892337304, "learning_rate": 1.99936176853222e-05, "loss": 0.3075, "step": 715 }, { "epoch": 0.04, "grad_norm": 1.183879772416323, "learning_rate": 1.9993551036911514e-05, "loss": 0.5875, "step": 716 }, { "epoch": 0.04, "grad_norm": 0.5553161207454865, "learning_rate": 1.9993484042424588e-05, "loss": 0.3873, "step": 717 }, { "epoch": 0.04, "grad_norm": 0.4812560176886251, "learning_rate": 1.9993416701863736e-05, "loss": 0.2393, "step": 718 }, { "epoch": 0.04, "grad_norm": 1.3210916333620273, "learning_rate": 1.9993349015231297e-05, "loss": 0.8372, "step": 719 }, { "epoch": 0.04, "grad_norm": 0.5269059279825339, "learning_rate": 1.999328098252961e-05, "loss": 0.2579, "step": 720 }, { "epoch": 0.04, "grad_norm": 0.6285141235409536, "learning_rate": 1.9993212603761032e-05, "loss": 0.2367, "step": 721 }, { "epoch": 0.04, "grad_norm": 0.564077425147305, "learning_rate": 1.9993143878927933e-05, "loss": 0.413, "step": 722 }, { "epoch": 0.04, "grad_norm": 1.5407667855885592, "learning_rate": 1.999307480803269e-05, "loss": 0.6998, "step": 723 }, { "epoch": 0.04, "grad_norm": 0.44099260765567994, "learning_rate": 1.9993005391077694e-05, "loss": 0.2509, "step": 724 }, { "epoch": 0.04, "grad_norm": 0.6939970930509446, "learning_rate": 1.999293562806535e-05, "loss": 0.4579, "step": 725 }, { "epoch": 0.04, "grad_norm": 0.5590999653786924, "learning_rate": 1.999286551899808e-05, "loss": 0.2503, "step": 726 }, { "epoch": 0.04, "grad_norm": 0.4594339329962618, "learning_rate": 1.9992795063878304e-05, "loss": 0.2918, "step": 727 }, { "epoch": 0.04, "grad_norm": 0.6897597721436182, "learning_rate": 1.9992724262708466e-05, "loss": 0.5618, "step": 728 }, { "epoch": 0.04, "grad_norm": 0.6934232532251842, "learning_rate": 1.9992653115491015e-05, "loss": 0.4563, "step": 729 }, { "epoch": 0.04, "grad_norm": 0.457017970343513, "learning_rate": 1.9992581622228418e-05, "loss": 0.3187, "step": 730 }, { "epoch": 0.04, "grad_norm": 1.331623318724993, "learning_rate": 1.9992509782923148e-05, "loss": 0.5542, "step": 731 }, { "epoch": 0.04, "grad_norm": 0.5444148013267605, "learning_rate": 1.9992437597577692e-05, "loss": 0.3955, "step": 732 }, { "epoch": 0.04, "grad_norm": 0.4182804082754334, "learning_rate": 1.9992365066194554e-05, "loss": 0.3364, "step": 733 }, { "epoch": 0.04, "grad_norm": 0.4630481339095031, "learning_rate": 1.9992292188776244e-05, "loss": 0.2866, "step": 734 }, { "epoch": 0.04, "grad_norm": 1.576845422602978, "learning_rate": 1.9992218965325283e-05, "loss": 0.6871, "step": 735 }, { "epoch": 0.04, "grad_norm": 0.8118373704300686, "learning_rate": 1.999214539584421e-05, "loss": 0.369, "step": 736 }, { "epoch": 0.04, "grad_norm": 0.5701701832074956, "learning_rate": 1.9992071480335574e-05, "loss": 0.3343, "step": 737 }, { "epoch": 0.04, "grad_norm": 0.592621937478156, "learning_rate": 1.9991997218801925e-05, "loss": 0.4622, "step": 738 }, { "epoch": 0.04, "grad_norm": 0.5980367247552338, "learning_rate": 1.999192261124585e-05, "loss": 0.3063, "step": 739 }, { "epoch": 0.04, "grad_norm": 0.40888204345849005, "learning_rate": 1.9991847657669922e-05, "loss": 0.2738, "step": 740 }, { "epoch": 0.04, "grad_norm": 0.6537608743254626, "learning_rate": 1.9991772358076738e-05, "loss": 0.3047, "step": 741 }, { "epoch": 0.04, "grad_norm": 0.5674094793046328, "learning_rate": 1.999169671246891e-05, "loss": 0.3443, "step": 742 }, { "epoch": 0.04, "grad_norm": 0.6513678869487316, "learning_rate": 1.999162072084905e-05, "loss": 0.4706, "step": 743 }, { "epoch": 0.04, "grad_norm": 0.7395606121601905, "learning_rate": 1.9991544383219796e-05, "loss": 0.4724, "step": 744 }, { "epoch": 0.04, "grad_norm": 0.4922112233983982, "learning_rate": 1.999146769958379e-05, "loss": 0.312, "step": 745 }, { "epoch": 0.04, "grad_norm": 0.4493305840997312, "learning_rate": 1.999139066994369e-05, "loss": 0.2616, "step": 746 }, { "epoch": 0.04, "grad_norm": 1.197209300931008, "learning_rate": 1.9991313294302155e-05, "loss": 0.4788, "step": 747 }, { "epoch": 0.04, "grad_norm": 0.4994103566217086, "learning_rate": 1.9991235572661874e-05, "loss": 0.3094, "step": 748 }, { "epoch": 0.04, "grad_norm": 1.6447067429291433, "learning_rate": 1.9991157505025534e-05, "loss": 0.9033, "step": 749 }, { "epoch": 0.04, "grad_norm": 0.5606319354875383, "learning_rate": 1.999107909139584e-05, "loss": 0.3895, "step": 750 }, { "epoch": 0.04, "grad_norm": 0.5734727183073857, "learning_rate": 1.9991000331775506e-05, "loss": 0.3423, "step": 751 }, { "epoch": 0.04, "grad_norm": 0.569467318382504, "learning_rate": 1.999092122616726e-05, "loss": 0.2506, "step": 752 }, { "epoch": 0.04, "grad_norm": 0.5087244938226237, "learning_rate": 1.9990841774573843e-05, "loss": 0.331, "step": 753 }, { "epoch": 0.04, "grad_norm": 0.34419700613207677, "learning_rate": 1.9990761976998e-05, "loss": 0.0829, "step": 754 }, { "epoch": 0.04, "grad_norm": 0.5802236112684132, "learning_rate": 1.9990681833442503e-05, "loss": 0.3881, "step": 755 }, { "epoch": 0.04, "grad_norm": 0.9103632693548557, "learning_rate": 1.9990601343910126e-05, "loss": 0.6007, "step": 756 }, { "epoch": 0.04, "grad_norm": 0.45808385012447494, "learning_rate": 1.999052050840365e-05, "loss": 0.2621, "step": 757 }, { "epoch": 0.04, "grad_norm": 0.4230696341008969, "learning_rate": 1.9990439326925882e-05, "loss": 0.2886, "step": 758 }, { "epoch": 0.04, "grad_norm": 0.78737798009791, "learning_rate": 1.9990357799479626e-05, "loss": 0.342, "step": 759 }, { "epoch": 0.04, "grad_norm": 0.5573592556824364, "learning_rate": 1.9990275926067713e-05, "loss": 0.2124, "step": 760 }, { "epoch": 0.04, "grad_norm": 0.6546682929758058, "learning_rate": 1.9990193706692972e-05, "loss": 0.3749, "step": 761 }, { "epoch": 0.04, "grad_norm": 1.533380714290272, "learning_rate": 1.9990111141358252e-05, "loss": 0.6221, "step": 762 }, { "epoch": 0.04, "grad_norm": 0.5379176222315797, "learning_rate": 1.9990028230066413e-05, "loss": 0.2695, "step": 763 }, { "epoch": 0.04, "grad_norm": 0.4715225267416928, "learning_rate": 1.998994497282033e-05, "loss": 0.252, "step": 764 }, { "epoch": 0.04, "grad_norm": 0.4811990499682349, "learning_rate": 1.9989861369622877e-05, "loss": 0.2896, "step": 765 }, { "epoch": 0.04, "grad_norm": 0.886555411853447, "learning_rate": 1.9989777420476954e-05, "loss": 0.4155, "step": 766 }, { "epoch": 0.04, "grad_norm": 0.9790462773253676, "learning_rate": 1.998969312538547e-05, "loss": 0.3598, "step": 767 }, { "epoch": 0.04, "grad_norm": 2.1859233395253255, "learning_rate": 1.9989608484351343e-05, "loss": 0.676, "step": 768 }, { "epoch": 0.04, "grad_norm": 0.5154902003353229, "learning_rate": 1.9989523497377505e-05, "loss": 0.3098, "step": 769 }, { "epoch": 0.04, "grad_norm": 0.6483670299556568, "learning_rate": 1.9989438164466896e-05, "loss": 0.3439, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.39362482228028733, "learning_rate": 1.9989352485622472e-05, "loss": 0.1467, "step": 771 }, { "epoch": 0.04, "grad_norm": 1.74006668147056, "learning_rate": 1.9989266460847207e-05, "loss": 0.4542, "step": 772 }, { "epoch": 0.04, "grad_norm": 0.8813615902844071, "learning_rate": 1.998918009014407e-05, "loss": 0.3356, "step": 773 }, { "epoch": 0.04, "grad_norm": 1.841448242464731, "learning_rate": 1.9989093373516053e-05, "loss": 0.7304, "step": 774 }, { "epoch": 0.04, "grad_norm": 1.0736502504903374, "learning_rate": 1.9989006310966162e-05, "loss": 0.4557, "step": 775 }, { "epoch": 0.04, "grad_norm": 0.5013227982869481, "learning_rate": 1.9988918902497417e-05, "loss": 0.2518, "step": 776 }, { "epoch": 0.04, "grad_norm": 0.5610738984393089, "learning_rate": 1.998883114811284e-05, "loss": 0.3094, "step": 777 }, { "epoch": 0.04, "grad_norm": 1.8664247677141819, "learning_rate": 1.9988743047815465e-05, "loss": 0.4851, "step": 778 }, { "epoch": 0.04, "grad_norm": 1.1921957110147678, "learning_rate": 1.9988654601608354e-05, "loss": 0.4159, "step": 779 }, { "epoch": 0.04, "grad_norm": 1.8570412985712648, "learning_rate": 1.998856580949456e-05, "loss": 0.6539, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.5304787388263474, "learning_rate": 1.998847667147716e-05, "loss": 0.3169, "step": 781 }, { "epoch": 0.04, "grad_norm": 0.6695906793491527, "learning_rate": 1.9988387187559245e-05, "loss": 0.3891, "step": 782 }, { "epoch": 0.04, "grad_norm": 0.4991033760441603, "learning_rate": 1.9988297357743906e-05, "loss": 0.1626, "step": 783 }, { "epoch": 0.05, "grad_norm": 1.6294509961056738, "learning_rate": 1.9988207182034264e-05, "loss": 0.4898, "step": 784 }, { "epoch": 0.05, "grad_norm": 0.7317674592716401, "learning_rate": 1.9988116660433435e-05, "loss": 0.3378, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.1238616873997904, "learning_rate": 1.9988025792944558e-05, "loss": 0.3753, "step": 786 }, { "epoch": 0.05, "grad_norm": 0.7119062202083866, "learning_rate": 1.9987934579570776e-05, "loss": 0.3211, "step": 787 }, { "epoch": 0.05, "grad_norm": 0.7562474683264186, "learning_rate": 1.9987843020315248e-05, "loss": 0.3854, "step": 788 }, { "epoch": 0.05, "grad_norm": 0.5043603296184589, "learning_rate": 1.9987751115181147e-05, "loss": 0.3109, "step": 789 }, { "epoch": 0.05, "grad_norm": 1.0146693352319747, "learning_rate": 1.9987658864171653e-05, "loss": 0.5063, "step": 790 }, { "epoch": 0.05, "grad_norm": 0.42337892738407523, "learning_rate": 1.9987566267289963e-05, "loss": 0.2544, "step": 791 }, { "epoch": 0.05, "grad_norm": 1.2475416323106774, "learning_rate": 1.9987473324539285e-05, "loss": 0.7866, "step": 792 }, { "epoch": 0.05, "grad_norm": 0.7025050670031587, "learning_rate": 1.998738003592283e-05, "loss": 0.3059, "step": 793 }, { "epoch": 0.05, "grad_norm": 0.5384616703375238, "learning_rate": 1.9987286401443838e-05, "loss": 0.3314, "step": 794 }, { "epoch": 0.05, "grad_norm": 1.0796972648685363, "learning_rate": 1.9987192421105546e-05, "loss": 0.5532, "step": 795 }, { "epoch": 0.05, "grad_norm": 0.34313687983856905, "learning_rate": 1.998709809491121e-05, "loss": 0.1962, "step": 796 }, { "epoch": 0.05, "grad_norm": 0.3985139635769342, "learning_rate": 1.9987003422864094e-05, "loss": 0.2577, "step": 797 }, { "epoch": 0.05, "grad_norm": 1.6243134259537804, "learning_rate": 1.998690840496748e-05, "loss": 0.8693, "step": 798 }, { "epoch": 0.05, "grad_norm": 0.7373614038171744, "learning_rate": 1.9986813041224662e-05, "loss": 0.2976, "step": 799 }, { "epoch": 0.05, "grad_norm": 0.6195132227874984, "learning_rate": 1.9986717331638935e-05, "loss": 0.3861, "step": 800 }, { "epoch": 0.05, "grad_norm": 0.5595292667728948, "learning_rate": 1.9986621276213616e-05, "loss": 0.3883, "step": 801 }, { "epoch": 0.05, "grad_norm": 0.5434227894915622, "learning_rate": 1.998652487495203e-05, "loss": 0.2684, "step": 802 }, { "epoch": 0.05, "grad_norm": 0.4591481879778902, "learning_rate": 1.998642812785752e-05, "loss": 0.2694, "step": 803 }, { "epoch": 0.05, "grad_norm": 1.6614380862805616, "learning_rate": 1.998633103493343e-05, "loss": 0.8139, "step": 804 }, { "epoch": 0.05, "grad_norm": 0.4670262324947827, "learning_rate": 1.998623359618313e-05, "loss": 0.3484, "step": 805 }, { "epoch": 0.05, "grad_norm": 0.564486768316231, "learning_rate": 1.9986135811609983e-05, "loss": 0.2648, "step": 806 }, { "epoch": 0.05, "grad_norm": 1.053932627292119, "learning_rate": 1.998603768121739e-05, "loss": 0.5622, "step": 807 }, { "epoch": 0.05, "grad_norm": 0.4672012049877789, "learning_rate": 1.9985939205008734e-05, "loss": 0.3168, "step": 808 }, { "epoch": 0.05, "grad_norm": 0.4293014248611022, "learning_rate": 1.998584038298744e-05, "loss": 0.2512, "step": 809 }, { "epoch": 0.05, "grad_norm": 0.6436970530457229, "learning_rate": 1.998574121515692e-05, "loss": 0.3514, "step": 810 }, { "epoch": 0.05, "grad_norm": 1.0177245118126792, "learning_rate": 1.998564170152061e-05, "loss": 0.5856, "step": 811 }, { "epoch": 0.05, "grad_norm": 0.45703131510777767, "learning_rate": 1.9985541842081957e-05, "loss": 0.3059, "step": 812 }, { "epoch": 0.05, "grad_norm": 0.48741084543212293, "learning_rate": 1.9985441636844424e-05, "loss": 0.413, "step": 813 }, { "epoch": 0.05, "grad_norm": 1.4244781136880331, "learning_rate": 1.998534108581147e-05, "loss": 0.6553, "step": 814 }, { "epoch": 0.05, "grad_norm": 0.46494993418916686, "learning_rate": 1.998524018898659e-05, "loss": 0.2524, "step": 815 }, { "epoch": 0.05, "grad_norm": 0.5302682621937632, "learning_rate": 1.9985138946373266e-05, "loss": 0.3199, "step": 816 }, { "epoch": 0.05, "grad_norm": 0.43980037601885463, "learning_rate": 1.9985037357975013e-05, "loss": 0.3068, "step": 817 }, { "epoch": 0.05, "grad_norm": 0.504431850220822, "learning_rate": 1.9984935423795345e-05, "loss": 0.3338, "step": 818 }, { "epoch": 0.05, "grad_norm": 0.8249104939952835, "learning_rate": 1.998483314383779e-05, "loss": 0.4644, "step": 819 }, { "epoch": 0.05, "grad_norm": 0.5695000365147458, "learning_rate": 1.9984730518105897e-05, "loss": 0.3879, "step": 820 }, { "epoch": 0.05, "grad_norm": 0.4473843111395704, "learning_rate": 1.9984627546603214e-05, "loss": 0.3239, "step": 821 }, { "epoch": 0.05, "grad_norm": 0.3833543869741045, "learning_rate": 1.9984524229333307e-05, "loss": 0.1974, "step": 822 }, { "epoch": 0.05, "grad_norm": 0.4942323075738801, "learning_rate": 1.9984420566299756e-05, "loss": 0.347, "step": 823 }, { "epoch": 0.05, "grad_norm": 0.5670416194035222, "learning_rate": 1.998431655750615e-05, "loss": 0.3483, "step": 824 }, { "epoch": 0.05, "grad_norm": 0.5051832663125587, "learning_rate": 1.998421220295609e-05, "loss": 0.3617, "step": 825 }, { "epoch": 0.05, "grad_norm": 1.6905171954763243, "learning_rate": 1.9984107502653193e-05, "loss": 0.7626, "step": 826 }, { "epoch": 0.05, "grad_norm": 0.4875542594639041, "learning_rate": 1.9984002456601082e-05, "loss": 0.2914, "step": 827 }, { "epoch": 0.05, "grad_norm": 0.6393314261217024, "learning_rate": 1.9983897064803396e-05, "loss": 0.4494, "step": 828 }, { "epoch": 0.05, "grad_norm": 0.5626873081314722, "learning_rate": 1.9983791327263782e-05, "loss": 0.4125, "step": 829 }, { "epoch": 0.05, "grad_norm": 0.4740622156552829, "learning_rate": 1.9983685243985905e-05, "loss": 0.3176, "step": 830 }, { "epoch": 0.05, "grad_norm": 0.45140349812898456, "learning_rate": 1.9983578814973437e-05, "loss": 0.2087, "step": 831 }, { "epoch": 0.05, "grad_norm": 0.582391697868439, "learning_rate": 1.9983472040230063e-05, "loss": 0.3153, "step": 832 }, { "epoch": 0.05, "grad_norm": 0.504699200239562, "learning_rate": 1.998336491975948e-05, "loss": 0.3052, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.6280795918613231, "learning_rate": 1.9983257453565402e-05, "loss": 0.8253, "step": 834 }, { "epoch": 0.05, "grad_norm": 0.538834623854235, "learning_rate": 1.9983149641651546e-05, "loss": 0.3547, "step": 835 }, { "epoch": 0.05, "grad_norm": 0.661409036567593, "learning_rate": 1.998304148402165e-05, "loss": 0.3122, "step": 836 }, { "epoch": 0.05, "grad_norm": 0.48427368476034816, "learning_rate": 1.9982932980679455e-05, "loss": 0.2972, "step": 837 }, { "epoch": 0.05, "grad_norm": 1.8611669536442395, "learning_rate": 1.998282413162872e-05, "loss": 0.5054, "step": 838 }, { "epoch": 0.05, "grad_norm": 0.5038129864771659, "learning_rate": 1.9982714936873215e-05, "loss": 0.3187, "step": 839 }, { "epoch": 0.05, "grad_norm": 0.6294109039118858, "learning_rate": 1.998260539641672e-05, "loss": 0.4348, "step": 840 }, { "epoch": 0.05, "grad_norm": 0.582952653039707, "learning_rate": 1.998249551026303e-05, "loss": 0.4443, "step": 841 }, { "epoch": 0.05, "grad_norm": 0.5631283283567661, "learning_rate": 1.998238527841595e-05, "loss": 0.2381, "step": 842 }, { "epoch": 0.05, "grad_norm": 0.40488497045596294, "learning_rate": 1.9982274700879295e-05, "loss": 0.1833, "step": 843 }, { "epoch": 0.05, "grad_norm": 0.6296325473623197, "learning_rate": 1.9982163777656902e-05, "loss": 0.3783, "step": 844 }, { "epoch": 0.05, "grad_norm": 0.5638688872776851, "learning_rate": 1.9982052508752605e-05, "loss": 0.2327, "step": 845 }, { "epoch": 0.05, "grad_norm": 0.9837522407153001, "learning_rate": 1.998194089417025e-05, "loss": 0.5804, "step": 846 }, { "epoch": 0.05, "grad_norm": 0.8229817506353978, "learning_rate": 1.9981828933913722e-05, "loss": 0.5947, "step": 847 }, { "epoch": 0.05, "grad_norm": 0.5743285905374217, "learning_rate": 1.9981716627986882e-05, "loss": 0.2498, "step": 848 }, { "epoch": 0.05, "grad_norm": 0.440183500729955, "learning_rate": 1.9981603976393625e-05, "loss": 0.2566, "step": 849 }, { "epoch": 0.05, "grad_norm": 1.769652766589286, "learning_rate": 1.9981490979137853e-05, "loss": 0.8859, "step": 850 }, { "epoch": 0.05, "grad_norm": 0.5989281680159326, "learning_rate": 1.9981377636223477e-05, "loss": 0.2521, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.0097503020108294, "learning_rate": 1.998126394765442e-05, "loss": 0.4525, "step": 852 }, { "epoch": 0.05, "grad_norm": 0.7673720570657228, "learning_rate": 1.9981149913434626e-05, "loss": 0.4724, "step": 853 }, { "epoch": 0.05, "grad_norm": 0.4981975468603891, "learning_rate": 1.9981035533568035e-05, "loss": 0.3007, "step": 854 }, { "epoch": 0.05, "grad_norm": 0.340649236428933, "learning_rate": 1.998092080805862e-05, "loss": 0.1132, "step": 855 }, { "epoch": 0.05, "grad_norm": 0.652425187067407, "learning_rate": 1.9980805736910337e-05, "loss": 0.4076, "step": 856 }, { "epoch": 0.05, "grad_norm": 0.5959681924395248, "learning_rate": 1.9980690320127188e-05, "loss": 0.3472, "step": 857 }, { "epoch": 0.05, "grad_norm": 1.4708566945755925, "learning_rate": 1.998057455771316e-05, "loss": 0.4343, "step": 858 }, { "epoch": 0.05, "grad_norm": 1.6946441906172074, "learning_rate": 1.9980458449672263e-05, "loss": 0.7589, "step": 859 }, { "epoch": 0.05, "grad_norm": 0.546655269731215, "learning_rate": 1.998034199600852e-05, "loss": 0.3238, "step": 860 }, { "epoch": 0.05, "grad_norm": 0.4237690098340433, "learning_rate": 1.9980225196725964e-05, "loss": 0.2079, "step": 861 }, { "epoch": 0.05, "grad_norm": 1.1400927331869855, "learning_rate": 1.998010805182864e-05, "loss": 0.5551, "step": 862 }, { "epoch": 0.05, "grad_norm": 0.5594124097189067, "learning_rate": 1.9979990561320597e-05, "loss": 0.3234, "step": 863 }, { "epoch": 0.05, "grad_norm": 0.8241251910035258, "learning_rate": 1.9979872725205915e-05, "loss": 0.3545, "step": 864 }, { "epoch": 0.05, "grad_norm": 1.6342272397960347, "learning_rate": 1.997975454348867e-05, "loss": 0.7562, "step": 865 }, { "epoch": 0.05, "grad_norm": 0.5442992828218223, "learning_rate": 1.9979636016172952e-05, "loss": 0.3479, "step": 866 }, { "epoch": 0.05, "grad_norm": 1.0426475164986881, "learning_rate": 1.9979517143262867e-05, "loss": 0.2534, "step": 867 }, { "epoch": 0.05, "grad_norm": 0.8241038141038709, "learning_rate": 1.9979397924762537e-05, "loss": 0.3189, "step": 868 }, { "epoch": 0.05, "grad_norm": 0.8868676701867368, "learning_rate": 1.9979278360676082e-05, "loss": 0.3811, "step": 869 }, { "epoch": 0.05, "grad_norm": 0.8775413046921993, "learning_rate": 1.9979158451007648e-05, "loss": 0.4496, "step": 870 }, { "epoch": 0.05, "grad_norm": 1.841188215520176, "learning_rate": 1.9979038195761386e-05, "loss": 0.5085, "step": 871 }, { "epoch": 0.05, "grad_norm": 0.46194012023312764, "learning_rate": 1.997891759494146e-05, "loss": 0.2981, "step": 872 }, { "epoch": 0.05, "grad_norm": 0.7829547374288088, "learning_rate": 1.9978796648552045e-05, "loss": 0.415, "step": 873 }, { "epoch": 0.05, "grad_norm": 0.7663806220882737, "learning_rate": 1.9978675356597334e-05, "loss": 0.2796, "step": 874 }, { "epoch": 0.05, "grad_norm": 0.6505647168387442, "learning_rate": 1.9978553719081523e-05, "loss": 0.3577, "step": 875 }, { "epoch": 0.05, "grad_norm": 0.6693024697212153, "learning_rate": 1.997843173600883e-05, "loss": 0.3511, "step": 876 }, { "epoch": 0.05, "grad_norm": 1.4101677187766488, "learning_rate": 1.997830940738347e-05, "loss": 0.5603, "step": 877 }, { "epoch": 0.05, "grad_norm": 0.8793601862533259, "learning_rate": 1.9978186733209686e-05, "loss": 0.3397, "step": 878 }, { "epoch": 0.05, "grad_norm": 0.587777319443702, "learning_rate": 1.997806371349172e-05, "loss": 0.2368, "step": 879 }, { "epoch": 0.05, "grad_norm": 0.5639544073283919, "learning_rate": 1.9977940348233845e-05, "loss": 0.4186, "step": 880 }, { "epoch": 0.05, "grad_norm": 0.7457185631167834, "learning_rate": 1.997781663744032e-05, "loss": 0.2677, "step": 881 }, { "epoch": 0.05, "grad_norm": 0.6447444340438591, "learning_rate": 1.9977692581115436e-05, "loss": 0.3951, "step": 882 }, { "epoch": 0.05, "grad_norm": 1.6502688235687761, "learning_rate": 1.9977568179263484e-05, "loss": 0.8296, "step": 883 }, { "epoch": 0.05, "grad_norm": 0.4514393605711949, "learning_rate": 1.9977443431888778e-05, "loss": 0.2656, "step": 884 }, { "epoch": 0.05, "grad_norm": 1.0553305711117273, "learning_rate": 1.9977318338995632e-05, "loss": 0.4572, "step": 885 }, { "epoch": 0.05, "grad_norm": 0.8407200121591809, "learning_rate": 1.9977192900588385e-05, "loss": 0.5764, "step": 886 }, { "epoch": 0.05, "grad_norm": 0.47881812578845484, "learning_rate": 1.9977067116671374e-05, "loss": 0.1567, "step": 887 }, { "epoch": 0.05, "grad_norm": 0.4873118349825849, "learning_rate": 1.9976940987248956e-05, "loss": 0.3673, "step": 888 }, { "epoch": 0.05, "grad_norm": 1.4079005001201457, "learning_rate": 1.9976814512325503e-05, "loss": 0.8054, "step": 889 }, { "epoch": 0.05, "grad_norm": 0.5093928927192002, "learning_rate": 1.9976687691905394e-05, "loss": 0.2287, "step": 890 }, { "epoch": 0.05, "grad_norm": 0.7518705232066628, "learning_rate": 1.9976560525993015e-05, "loss": 0.4441, "step": 891 }, { "epoch": 0.05, "grad_norm": 0.6356126660781017, "learning_rate": 1.9976433014592776e-05, "loss": 0.4113, "step": 892 }, { "epoch": 0.05, "grad_norm": 0.52789164129456, "learning_rate": 1.9976305157709092e-05, "loss": 0.2405, "step": 893 }, { "epoch": 0.05, "grad_norm": 0.6451124335109072, "learning_rate": 1.9976176955346392e-05, "loss": 0.3365, "step": 894 }, { "epoch": 0.05, "grad_norm": 0.4228472921247635, "learning_rate": 1.9976048407509107e-05, "loss": 0.3027, "step": 895 }, { "epoch": 0.05, "grad_norm": 0.5781676443680558, "learning_rate": 1.99759195142017e-05, "loss": 0.3938, "step": 896 }, { "epoch": 0.05, "grad_norm": 0.5641365163135668, "learning_rate": 1.9975790275428625e-05, "loss": 0.3254, "step": 897 }, { "epoch": 0.05, "grad_norm": 0.5717025160344602, "learning_rate": 1.9975660691194365e-05, "loss": 0.4194, "step": 898 }, { "epoch": 0.05, "grad_norm": 1.0582116646632875, "learning_rate": 1.99755307615034e-05, "loss": 0.5222, "step": 899 }, { "epoch": 0.05, "grad_norm": 0.4722208730822841, "learning_rate": 1.997540048636024e-05, "loss": 0.2897, "step": 900 }, { "epoch": 0.05, "grad_norm": 0.6038363034287668, "learning_rate": 1.997526986576938e-05, "loss": 0.3378, "step": 901 }, { "epoch": 0.05, "grad_norm": 0.5519184370509896, "learning_rate": 1.9975138899735366e-05, "loss": 0.3994, "step": 902 }, { "epoch": 0.05, "grad_norm": 0.4720499415776914, "learning_rate": 1.9975007588262715e-05, "loss": 0.3905, "step": 903 }, { "epoch": 0.05, "grad_norm": 0.5569224560379596, "learning_rate": 1.9974875931355977e-05, "loss": 0.3047, "step": 904 }, { "epoch": 0.05, "grad_norm": 0.7713492034663683, "learning_rate": 1.9974743929019717e-05, "loss": 0.3841, "step": 905 }, { "epoch": 0.05, "grad_norm": 0.5967554524752199, "learning_rate": 1.99746115812585e-05, "loss": 0.3936, "step": 906 }, { "epoch": 0.05, "grad_norm": 0.4020352157362454, "learning_rate": 1.997447888807692e-05, "loss": 0.1904, "step": 907 }, { "epoch": 0.05, "grad_norm": 0.4793739647988955, "learning_rate": 1.997434584947956e-05, "loss": 0.3606, "step": 908 }, { "epoch": 0.05, "grad_norm": 0.5324891885968183, "learning_rate": 1.9974212465471037e-05, "loss": 0.3638, "step": 909 }, { "epoch": 0.05, "grad_norm": 0.711921743120288, "learning_rate": 1.9974078736055963e-05, "loss": 0.4732, "step": 910 }, { "epoch": 0.05, "grad_norm": 0.4754960806126236, "learning_rate": 1.997394466123897e-05, "loss": 0.3441, "step": 911 }, { "epoch": 0.05, "grad_norm": 0.4828296923573561, "learning_rate": 1.99738102410247e-05, "loss": 0.3559, "step": 912 }, { "epoch": 0.05, "grad_norm": 0.4723973954273907, "learning_rate": 1.9973675475417814e-05, "loss": 0.1828, "step": 913 }, { "epoch": 0.05, "grad_norm": 0.9490921519164291, "learning_rate": 1.9973540364422973e-05, "loss": 0.4723, "step": 914 }, { "epoch": 0.05, "grad_norm": 0.5314162635369581, "learning_rate": 1.997340490804486e-05, "loss": 0.3647, "step": 915 }, { "epoch": 0.05, "grad_norm": 0.48861096170235063, "learning_rate": 1.9973269106288163e-05, "loss": 0.387, "step": 916 }, { "epoch": 0.05, "grad_norm": 0.635391914689005, "learning_rate": 1.997313295915759e-05, "loss": 0.3044, "step": 917 }, { "epoch": 0.05, "grad_norm": 0.46713708953393285, "learning_rate": 1.9972996466657846e-05, "loss": 0.3306, "step": 918 }, { "epoch": 0.05, "grad_norm": 0.478474730302629, "learning_rate": 1.9972859628793663e-05, "loss": 0.3477, "step": 919 }, { "epoch": 0.05, "grad_norm": 0.8346975964677359, "learning_rate": 1.9972722445569782e-05, "loss": 0.3668, "step": 920 }, { "epoch": 0.05, "grad_norm": 0.44496567184720826, "learning_rate": 1.997258491699095e-05, "loss": 0.2714, "step": 921 }, { "epoch": 0.05, "grad_norm": 1.387562812852257, "learning_rate": 1.9972447043061933e-05, "loss": 0.7089, "step": 922 }, { "epoch": 0.05, "grad_norm": 0.4650130501105177, "learning_rate": 1.9972308823787504e-05, "loss": 0.2542, "step": 923 }, { "epoch": 0.05, "grad_norm": 0.49044841015158935, "learning_rate": 1.9972170259172444e-05, "loss": 0.3412, "step": 924 }, { "epoch": 0.05, "grad_norm": 0.9751351988159547, "learning_rate": 1.9972031349221563e-05, "loss": 0.585, "step": 925 }, { "epoch": 0.05, "grad_norm": 0.4007795809187631, "learning_rate": 1.9971892093939663e-05, "loss": 0.2822, "step": 926 }, { "epoch": 0.05, "grad_norm": 0.4016861009078226, "learning_rate": 1.9971752493331568e-05, "loss": 0.2736, "step": 927 }, { "epoch": 0.05, "grad_norm": 0.5568432011062416, "learning_rate": 1.9971612547402116e-05, "loss": 0.3634, "step": 928 }, { "epoch": 0.05, "grad_norm": 0.8775709744925506, "learning_rate": 1.9971472256156147e-05, "loss": 0.507, "step": 929 }, { "epoch": 0.05, "grad_norm": 0.5513163113977712, "learning_rate": 1.997133161959852e-05, "loss": 0.2609, "step": 930 }, { "epoch": 0.05, "grad_norm": 0.5854383825688924, "learning_rate": 1.9971190637734113e-05, "loss": 0.3781, "step": 931 }, { "epoch": 0.05, "grad_norm": 0.5414854941618819, "learning_rate": 1.99710493105678e-05, "loss": 0.3819, "step": 932 }, { "epoch": 0.05, "grad_norm": 0.36193619268358984, "learning_rate": 1.9970907638104483e-05, "loss": 0.1874, "step": 933 }, { "epoch": 0.05, "grad_norm": 1.0396706344808258, "learning_rate": 1.9970765620349058e-05, "loss": 0.6366, "step": 934 }, { "epoch": 0.05, "grad_norm": 0.5805743491039917, "learning_rate": 1.997062325730645e-05, "loss": 0.36, "step": 935 }, { "epoch": 0.05, "grad_norm": 0.4610257475867486, "learning_rate": 1.997048054898159e-05, "loss": 0.2515, "step": 936 }, { "epoch": 0.05, "grad_norm": 1.3399968664903208, "learning_rate": 1.997033749537941e-05, "loss": 0.5433, "step": 937 }, { "epoch": 0.05, "grad_norm": 0.5558017309777695, "learning_rate": 1.9970194096504877e-05, "loss": 0.2575, "step": 938 }, { "epoch": 0.05, "grad_norm": 0.47350587576542036, "learning_rate": 1.9970050352362952e-05, "loss": 0.2149, "step": 939 }, { "epoch": 0.05, "grad_norm": 0.8210880489033746, "learning_rate": 1.996990626295861e-05, "loss": 0.4412, "step": 940 }, { "epoch": 0.05, "grad_norm": 1.5633944668244224, "learning_rate": 1.9969761828296843e-05, "loss": 0.8621, "step": 941 }, { "epoch": 0.05, "grad_norm": 0.5279281352946498, "learning_rate": 1.9969617048382653e-05, "loss": 0.3227, "step": 942 }, { "epoch": 0.05, "grad_norm": 0.5300481438173281, "learning_rate": 1.996947192322105e-05, "loss": 0.3458, "step": 943 }, { "epoch": 0.05, "grad_norm": 0.7143815231199013, "learning_rate": 1.9969326452817068e-05, "loss": 0.4565, "step": 944 }, { "epoch": 0.05, "grad_norm": 0.5568644549729681, "learning_rate": 1.9969180637175737e-05, "loss": 0.3141, "step": 945 }, { "epoch": 0.05, "grad_norm": 0.4280076221992522, "learning_rate": 1.9969034476302108e-05, "loss": 0.0969, "step": 946 }, { "epoch": 0.05, "grad_norm": 0.5319356976602545, "learning_rate": 1.996888797020125e-05, "loss": 0.3553, "step": 947 }, { "epoch": 0.05, "grad_norm": 0.5872685383034628, "learning_rate": 1.9968741118878224e-05, "loss": 0.3226, "step": 948 }, { "epoch": 0.05, "grad_norm": 1.0190650467630764, "learning_rate": 1.9968593922338125e-05, "loss": 0.4937, "step": 949 }, { "epoch": 0.05, "grad_norm": 0.6087067671856272, "learning_rate": 1.9968446380586045e-05, "loss": 0.3468, "step": 950 }, { "epoch": 0.05, "grad_norm": 0.5114568446529008, "learning_rate": 1.9968298493627096e-05, "loss": 0.2784, "step": 951 }, { "epoch": 0.05, "grad_norm": 0.40876696190949857, "learning_rate": 1.99681502614664e-05, "loss": 0.2605, "step": 952 }, { "epoch": 0.05, "grad_norm": 1.1545115092345783, "learning_rate": 1.9968001684109086e-05, "loss": 0.6098, "step": 953 }, { "epoch": 0.05, "grad_norm": 0.48476631935801145, "learning_rate": 1.9967852761560304e-05, "loss": 0.3156, "step": 954 }, { "epoch": 0.05, "grad_norm": 0.4489912788177858, "learning_rate": 1.996770349382521e-05, "loss": 0.3845, "step": 955 }, { "epoch": 0.05, "grad_norm": 0.8817666801720644, "learning_rate": 1.9967553880908973e-05, "loss": 0.3831, "step": 956 }, { "epoch": 0.05, "grad_norm": 0.5005095482186253, "learning_rate": 1.996740392281677e-05, "loss": 0.3326, "step": 957 }, { "epoch": 0.06, "grad_norm": 0.41208570683327056, "learning_rate": 1.9967253619553805e-05, "loss": 0.1964, "step": 958 }, { "epoch": 0.06, "grad_norm": 0.9145023410100701, "learning_rate": 1.996710297112527e-05, "loss": 0.3778, "step": 959 }, { "epoch": 0.06, "grad_norm": 0.5094709013843017, "learning_rate": 1.9966951977536387e-05, "loss": 0.3345, "step": 960 }, { "epoch": 0.06, "grad_norm": 1.1238884116633348, "learning_rate": 1.996680063879239e-05, "loss": 0.6331, "step": 961 }, { "epoch": 0.06, "grad_norm": 0.6022319418093043, "learning_rate": 1.9966648954898515e-05, "loss": 0.3348, "step": 962 }, { "epoch": 0.06, "grad_norm": 0.5012059527869338, "learning_rate": 1.9966496925860014e-05, "loss": 0.2811, "step": 963 }, { "epoch": 0.06, "grad_norm": 0.34219376584073213, "learning_rate": 1.996634455168215e-05, "loss": 0.2451, "step": 964 }, { "epoch": 0.06, "grad_norm": 1.2382160129106405, "learning_rate": 1.9966191832370208e-05, "loss": 0.4533, "step": 965 }, { "epoch": 0.06, "grad_norm": 0.5785626838524301, "learning_rate": 1.9966038767929468e-05, "loss": 0.3016, "step": 966 }, { "epoch": 0.06, "grad_norm": 0.5612011179574358, "learning_rate": 1.9965885358365234e-05, "loss": 0.3872, "step": 967 }, { "epoch": 0.06, "grad_norm": 1.4325677209096839, "learning_rate": 1.996573160368282e-05, "loss": 0.6171, "step": 968 }, { "epoch": 0.06, "grad_norm": 0.4714099776950298, "learning_rate": 1.996557750388755e-05, "loss": 0.2267, "step": 969 }, { "epoch": 0.06, "grad_norm": 0.45563509639747796, "learning_rate": 1.996542305898476e-05, "loss": 0.2878, "step": 970 }, { "epoch": 0.06, "grad_norm": 0.5373298863023417, "learning_rate": 1.9965268268979794e-05, "loss": 0.357, "step": 971 }, { "epoch": 0.06, "grad_norm": 0.588654271043847, "learning_rate": 1.996511313387802e-05, "loss": 0.2835, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.4807502575156875, "learning_rate": 1.9964957653684804e-05, "loss": 0.8575, "step": 973 }, { "epoch": 0.06, "grad_norm": 1.4442004066106364, "learning_rate": 1.9964801828405536e-05, "loss": 0.6195, "step": 974 }, { "epoch": 0.06, "grad_norm": 0.4943797589189964, "learning_rate": 1.9964645658045607e-05, "loss": 0.2719, "step": 975 }, { "epoch": 0.06, "grad_norm": 0.39577055533039807, "learning_rate": 1.9964489142610426e-05, "loss": 0.1685, "step": 976 }, { "epoch": 0.06, "grad_norm": 1.1935907452878245, "learning_rate": 1.996433228210542e-05, "loss": 0.5387, "step": 977 }, { "epoch": 0.06, "grad_norm": 0.5879709174870033, "learning_rate": 1.996417507653601e-05, "loss": 0.2531, "step": 978 }, { "epoch": 0.06, "grad_norm": 0.5808913193155929, "learning_rate": 1.9964017525907646e-05, "loss": 0.365, "step": 979 }, { "epoch": 0.06, "grad_norm": 1.8732982117118, "learning_rate": 1.9963859630225786e-05, "loss": 0.7228, "step": 980 }, { "epoch": 0.06, "grad_norm": 0.5626726887011435, "learning_rate": 1.9963701389495896e-05, "loss": 0.3331, "step": 981 }, { "epoch": 0.06, "grad_norm": 0.77938332796842, "learning_rate": 1.9963542803723452e-05, "loss": 0.4066, "step": 982 }, { "epoch": 0.06, "grad_norm": 0.42358202474178924, "learning_rate": 1.996338387291395e-05, "loss": 0.2655, "step": 983 }, { "epoch": 0.06, "grad_norm": 0.7147571092797101, "learning_rate": 1.9963224597072896e-05, "loss": 0.3894, "step": 984 }, { "epoch": 0.06, "grad_norm": 0.6968938961710522, "learning_rate": 1.99630649762058e-05, "loss": 0.3112, "step": 985 }, { "epoch": 0.06, "grad_norm": 0.6853819380607388, "learning_rate": 1.996290501031819e-05, "loss": 0.4494, "step": 986 }, { "epoch": 0.06, "grad_norm": 0.4657733992528829, "learning_rate": 1.996274469941561e-05, "loss": 0.317, "step": 987 }, { "epoch": 0.06, "grad_norm": 0.9112005971743489, "learning_rate": 1.9962584043503616e-05, "loss": 0.4292, "step": 988 }, { "epoch": 0.06, "grad_norm": 0.49945616956948385, "learning_rate": 1.9962423042587756e-05, "loss": 0.2444, "step": 989 }, { "epoch": 0.06, "grad_norm": 0.5901638109218509, "learning_rate": 1.996226169667362e-05, "loss": 0.3631, "step": 990 }, { "epoch": 0.06, "grad_norm": 0.5898119279938763, "learning_rate": 1.9962100005766783e-05, "loss": 0.3696, "step": 991 }, { "epoch": 0.06, "grad_norm": 0.37546046574179853, "learning_rate": 1.9961937969872858e-05, "loss": 0.0755, "step": 992 }, { "epoch": 0.06, "grad_norm": 0.538046268884345, "learning_rate": 1.996177558899745e-05, "loss": 0.3463, "step": 993 }, { "epoch": 0.06, "grad_norm": 0.7582586586626521, "learning_rate": 1.9961612863146175e-05, "loss": 0.4278, "step": 994 }, { "epoch": 0.06, "grad_norm": 0.4647624246924399, "learning_rate": 1.9961449792324677e-05, "loss": 0.2752, "step": 995 }, { "epoch": 0.06, "grad_norm": 0.46627636277037593, "learning_rate": 1.9961286376538607e-05, "loss": 0.2918, "step": 996 }, { "epoch": 0.06, "grad_norm": 1.4470087570646943, "learning_rate": 1.996112261579361e-05, "loss": 0.8131, "step": 997 }, { "epoch": 0.06, "grad_norm": 0.40123913886301504, "learning_rate": 1.9960958510095373e-05, "loss": 0.248, "step": 998 }, { "epoch": 0.06, "grad_norm": 0.48846965331054437, "learning_rate": 1.9960794059449564e-05, "loss": 0.3432, "step": 999 }, { "epoch": 0.06, "grad_norm": 0.6562528678683638, "learning_rate": 1.996062926386189e-05, "loss": 0.4336, "step": 1000 }, { "epoch": 0.06, "grad_norm": 0.9824380086974319, "learning_rate": 1.996046412333805e-05, "loss": 0.472, "step": 1001 }, { "epoch": 0.06, "grad_norm": 0.70434564261008, "learning_rate": 1.996029863788377e-05, "loss": 0.335, "step": 1002 }, { "epoch": 0.06, "grad_norm": 0.47245421383507813, "learning_rate": 1.9960132807504772e-05, "loss": 0.3284, "step": 1003 }, { "epoch": 0.06, "grad_norm": 0.39877422949668473, "learning_rate": 1.9959966632206804e-05, "loss": 0.2873, "step": 1004 }, { "epoch": 0.06, "grad_norm": 0.5732104169024447, "learning_rate": 1.995980011199562e-05, "loss": 0.3251, "step": 1005 }, { "epoch": 0.06, "grad_norm": 0.5576190638879068, "learning_rate": 1.9959633246876987e-05, "loss": 0.3643, "step": 1006 }, { "epoch": 0.06, "grad_norm": 0.48385803317161563, "learning_rate": 1.995946603685668e-05, "loss": 0.3651, "step": 1007 }, { "epoch": 0.06, "grad_norm": 0.46973803118307117, "learning_rate": 1.99592984819405e-05, "loss": 0.2235, "step": 1008 }, { "epoch": 0.06, "grad_norm": 0.31711744184779644, "learning_rate": 1.9959130582134234e-05, "loss": 0.2126, "step": 1009 }, { "epoch": 0.06, "grad_norm": 0.6060670327723078, "learning_rate": 1.995896233744371e-05, "loss": 0.4179, "step": 1010 }, { "epoch": 0.06, "grad_norm": 0.39326789263335804, "learning_rate": 1.9958793747874744e-05, "loss": 0.2994, "step": 1011 }, { "epoch": 0.06, "grad_norm": 0.6352828886575169, "learning_rate": 1.995862481343318e-05, "loss": 0.4093, "step": 1012 }, { "epoch": 0.06, "grad_norm": 1.117282848504116, "learning_rate": 1.9958455534124867e-05, "loss": 0.6806, "step": 1013 }, { "epoch": 0.06, "grad_norm": 0.38513507477952136, "learning_rate": 1.9958285909955668e-05, "loss": 0.2518, "step": 1014 }, { "epoch": 0.06, "grad_norm": 0.49782104906396, "learning_rate": 1.9958115940931454e-05, "loss": 0.3526, "step": 1015 }, { "epoch": 0.06, "grad_norm": 0.5610251919492635, "learning_rate": 1.9957945627058115e-05, "loss": 0.3765, "step": 1016 }, { "epoch": 0.06, "grad_norm": 0.349097678917595, "learning_rate": 1.995777496834155e-05, "loss": 0.2702, "step": 1017 }, { "epoch": 0.06, "grad_norm": 0.6490771901337339, "learning_rate": 1.9957603964787662e-05, "loss": 0.2902, "step": 1018 }, { "epoch": 0.06, "grad_norm": 0.5038053547197864, "learning_rate": 1.9957432616402377e-05, "loss": 0.4018, "step": 1019 }, { "epoch": 0.06, "grad_norm": 1.0330461815808312, "learning_rate": 1.995726092319163e-05, "loss": 0.4524, "step": 1020 }, { "epoch": 0.06, "grad_norm": 0.4900679690379944, "learning_rate": 1.9957088885161366e-05, "loss": 0.3125, "step": 1021 }, { "epoch": 0.06, "grad_norm": 0.46117236213253127, "learning_rate": 1.9956916502317537e-05, "loss": 0.395, "step": 1022 }, { "epoch": 0.06, "grad_norm": 0.4574938839171714, "learning_rate": 1.9956743774666124e-05, "loss": 0.2827, "step": 1023 }, { "epoch": 0.06, "grad_norm": 0.36527000688924915, "learning_rate": 1.99565707022131e-05, "loss": 0.2012, "step": 1024 }, { "epoch": 0.06, "grad_norm": 1.771745956925933, "learning_rate": 1.995639728496446e-05, "loss": 0.859, "step": 1025 }, { "epoch": 0.06, "grad_norm": 0.5436455796015267, "learning_rate": 1.9956223522926212e-05, "loss": 0.3379, "step": 1026 }, { "epoch": 0.06, "grad_norm": 0.5224236889719983, "learning_rate": 1.995604941610437e-05, "loss": 0.3158, "step": 1027 }, { "epoch": 0.06, "grad_norm": 0.8151837330528591, "learning_rate": 1.9955874964504964e-05, "loss": 0.5744, "step": 1028 }, { "epoch": 0.06, "grad_norm": 0.31072521368728423, "learning_rate": 1.995570016813404e-05, "loss": 0.158, "step": 1029 }, { "epoch": 0.06, "grad_norm": 0.6735818235853589, "learning_rate": 1.995552502699764e-05, "loss": 0.3841, "step": 1030 }, { "epoch": 0.06, "grad_norm": 0.6394142288022795, "learning_rate": 1.9955349541101844e-05, "loss": 0.3293, "step": 1031 }, { "epoch": 0.06, "grad_norm": 0.7060434285177977, "learning_rate": 1.995517371045272e-05, "loss": 0.4121, "step": 1032 }, { "epoch": 0.06, "grad_norm": 0.7891476065439303, "learning_rate": 1.9954997535056354e-05, "loss": 0.361, "step": 1033 }, { "epoch": 0.06, "grad_norm": 0.6378419743565197, "learning_rate": 1.9954821014918857e-05, "loss": 0.3563, "step": 1034 }, { "epoch": 0.06, "grad_norm": 0.48004431946828835, "learning_rate": 1.995464415004633e-05, "loss": 0.2107, "step": 1035 }, { "epoch": 0.06, "grad_norm": 0.3995032608655963, "learning_rate": 1.9954466940444913e-05, "loss": 0.2438, "step": 1036 }, { "epoch": 0.06, "grad_norm": 1.1159782193087138, "learning_rate": 1.9954289386120728e-05, "loss": 0.5159, "step": 1037 }, { "epoch": 0.06, "grad_norm": 0.5618332940945832, "learning_rate": 1.995411148707993e-05, "loss": 0.3351, "step": 1038 }, { "epoch": 0.06, "grad_norm": 0.5215455455748942, "learning_rate": 1.995393324332868e-05, "loss": 0.3607, "step": 1039 }, { "epoch": 0.06, "grad_norm": 0.8208653345275203, "learning_rate": 1.9953754654873148e-05, "loss": 0.5031, "step": 1040 }, { "epoch": 0.06, "grad_norm": 0.3994022765006455, "learning_rate": 1.995357572171952e-05, "loss": 0.1645, "step": 1041 }, { "epoch": 0.06, "grad_norm": 0.49644502143567504, "learning_rate": 1.9953396443873996e-05, "loss": 0.322, "step": 1042 }, { "epoch": 0.06, "grad_norm": 0.7034835367330016, "learning_rate": 1.995321682134278e-05, "loss": 0.433, "step": 1043 }, { "epoch": 0.06, "grad_norm": 0.7908560910972711, "learning_rate": 1.995303685413209e-05, "loss": 0.4042, "step": 1044 }, { "epoch": 0.06, "grad_norm": 0.49210532169863536, "learning_rate": 1.9952856542248168e-05, "loss": 0.3592, "step": 1045 }, { "epoch": 0.06, "grad_norm": 0.646689735534446, "learning_rate": 1.995267588569725e-05, "loss": 0.4115, "step": 1046 }, { "epoch": 0.06, "grad_norm": 0.4048654510347636, "learning_rate": 1.9952494884485593e-05, "loss": 0.2289, "step": 1047 }, { "epoch": 0.06, "grad_norm": 0.4411817060201455, "learning_rate": 1.9952313538619467e-05, "loss": 0.2335, "step": 1048 }, { "epoch": 0.06, "grad_norm": 1.2620590338316189, "learning_rate": 1.995213184810515e-05, "loss": 0.7683, "step": 1049 }, { "epoch": 0.06, "grad_norm": 0.5329777387061266, "learning_rate": 1.9951949812948933e-05, "loss": 0.3357, "step": 1050 }, { "epoch": 0.06, "grad_norm": 0.5525441866753129, "learning_rate": 1.9951767433157126e-05, "loss": 0.3914, "step": 1051 }, { "epoch": 0.06, "grad_norm": 1.0809939876830499, "learning_rate": 1.9951584708736038e-05, "loss": 0.7235, "step": 1052 }, { "epoch": 0.06, "grad_norm": 0.3968852627552618, "learning_rate": 1.9951401639692e-05, "loss": 0.1826, "step": 1053 }, { "epoch": 0.06, "grad_norm": 0.4945020554203922, "learning_rate": 1.9951218226031354e-05, "loss": 0.3181, "step": 1054 }, { "epoch": 0.06, "grad_norm": 0.42878235628024847, "learning_rate": 1.9951034467760446e-05, "loss": 0.3214, "step": 1055 }, { "epoch": 0.06, "grad_norm": 0.904246221547212, "learning_rate": 1.995085036488564e-05, "loss": 0.5193, "step": 1056 }, { "epoch": 0.06, "grad_norm": 0.4788553603759133, "learning_rate": 1.9950665917413318e-05, "loss": 0.2582, "step": 1057 }, { "epoch": 0.06, "grad_norm": 0.497482325015419, "learning_rate": 1.995048112534986e-05, "loss": 0.3624, "step": 1058 }, { "epoch": 0.06, "grad_norm": 1.1997009804190655, "learning_rate": 1.995029598870167e-05, "loss": 0.6325, "step": 1059 }, { "epoch": 0.06, "grad_norm": 0.3760169516809768, "learning_rate": 1.995011050747516e-05, "loss": 0.1641, "step": 1060 }, { "epoch": 0.06, "grad_norm": 1.0218299290087096, "learning_rate": 1.994992468167675e-05, "loss": 0.5896, "step": 1061 }, { "epoch": 0.06, "grad_norm": 0.5325934080764194, "learning_rate": 1.9949738511312872e-05, "loss": 0.4082, "step": 1062 }, { "epoch": 0.06, "grad_norm": 0.5189018963919375, "learning_rate": 1.994955199638998e-05, "loss": 0.2595, "step": 1063 }, { "epoch": 0.06, "grad_norm": 1.8985392734797513, "learning_rate": 1.994936513691453e-05, "loss": 0.8639, "step": 1064 }, { "epoch": 0.06, "grad_norm": 0.616301320543746, "learning_rate": 1.9949177932892997e-05, "loss": 0.4081, "step": 1065 }, { "epoch": 0.06, "grad_norm": 0.43255047491592685, "learning_rate": 1.9948990384331853e-05, "loss": 0.2952, "step": 1066 }, { "epoch": 0.06, "grad_norm": 0.4972852098105318, "learning_rate": 1.9948802491237608e-05, "loss": 0.2134, "step": 1067 }, { "epoch": 0.06, "grad_norm": 0.7883153357673944, "learning_rate": 1.994861425361675e-05, "loss": 0.525, "step": 1068 }, { "epoch": 0.06, "grad_norm": 0.6785269587723624, "learning_rate": 1.9948425671475816e-05, "loss": 0.3279, "step": 1069 }, { "epoch": 0.06, "grad_norm": 0.5178554482526371, "learning_rate": 1.9948236744821327e-05, "loss": 0.2993, "step": 1070 }, { "epoch": 0.06, "grad_norm": 0.9167086626332699, "learning_rate": 1.994804747365983e-05, "loss": 0.4278, "step": 1071 }, { "epoch": 0.06, "grad_norm": 0.5356860464606096, "learning_rate": 1.994785785799787e-05, "loss": 0.3144, "step": 1072 }, { "epoch": 0.06, "grad_norm": 0.4967397434179642, "learning_rate": 1.9947667897842027e-05, "loss": 0.2778, "step": 1073 }, { "epoch": 0.06, "grad_norm": 0.6967124996922901, "learning_rate": 1.994747759319887e-05, "loss": 0.3812, "step": 1074 }, { "epoch": 0.06, "grad_norm": 0.48475894899954397, "learning_rate": 1.994728694407499e-05, "loss": 0.2961, "step": 1075 }, { "epoch": 0.06, "grad_norm": 0.9083581496874299, "learning_rate": 1.9947095950476992e-05, "loss": 0.5082, "step": 1076 }, { "epoch": 0.06, "grad_norm": 0.7444906820012983, "learning_rate": 1.994690461241149e-05, "loss": 0.3969, "step": 1077 }, { "epoch": 0.06, "grad_norm": 0.46270242209941526, "learning_rate": 1.994671292988511e-05, "loss": 0.3147, "step": 1078 }, { "epoch": 0.06, "grad_norm": 0.7264460965731782, "learning_rate": 1.9946520902904485e-05, "loss": 0.5275, "step": 1079 }, { "epoch": 0.06, "grad_norm": 0.43704367289000573, "learning_rate": 1.994632853147627e-05, "loss": 0.1736, "step": 1080 }, { "epoch": 0.06, "grad_norm": 0.5335354016333684, "learning_rate": 1.9946135815607128e-05, "loss": 0.3071, "step": 1081 }, { "epoch": 0.06, "grad_norm": 0.4954050549137195, "learning_rate": 1.9945942755303727e-05, "loss": 0.2834, "step": 1082 }, { "epoch": 0.06, "grad_norm": 1.0935397207678366, "learning_rate": 1.994574935057276e-05, "loss": 0.4421, "step": 1083 }, { "epoch": 0.06, "grad_norm": 0.438437072971696, "learning_rate": 1.994555560142092e-05, "loss": 0.3371, "step": 1084 }, { "epoch": 0.06, "grad_norm": 0.8748799090587484, "learning_rate": 1.9945361507854914e-05, "loss": 0.5721, "step": 1085 }, { "epoch": 0.06, "grad_norm": 0.49367308799806964, "learning_rate": 1.9945167069881468e-05, "loss": 0.3301, "step": 1086 }, { "epoch": 0.06, "grad_norm": 0.5471671685840018, "learning_rate": 1.9944972287507316e-05, "loss": 0.2885, "step": 1087 }, { "epoch": 0.06, "grad_norm": 0.44722860542267706, "learning_rate": 1.99447771607392e-05, "loss": 0.2388, "step": 1088 }, { "epoch": 0.06, "grad_norm": 0.4399451723523636, "learning_rate": 1.9944581689583878e-05, "loss": 0.3063, "step": 1089 }, { "epoch": 0.06, "grad_norm": 0.49743000266769444, "learning_rate": 1.994438587404812e-05, "loss": 0.3283, "step": 1090 }, { "epoch": 0.06, "grad_norm": 1.2104431749854316, "learning_rate": 1.994418971413871e-05, "loss": 0.5652, "step": 1091 }, { "epoch": 0.06, "grad_norm": 1.2263233603048742, "learning_rate": 1.994399320986243e-05, "loss": 0.7256, "step": 1092 }, { "epoch": 0.06, "grad_norm": 0.5740931856541661, "learning_rate": 1.99437963612261e-05, "loss": 0.2438, "step": 1093 }, { "epoch": 0.06, "grad_norm": 0.39197406731229295, "learning_rate": 1.9943599168236526e-05, "loss": 0.2557, "step": 1094 }, { "epoch": 0.06, "grad_norm": 1.245863493813506, "learning_rate": 1.9943401630900543e-05, "loss": 0.5469, "step": 1095 }, { "epoch": 0.06, "grad_norm": 0.5865781893361174, "learning_rate": 1.9943203749224986e-05, "loss": 0.3007, "step": 1096 }, { "epoch": 0.06, "grad_norm": 1.3374424837824597, "learning_rate": 1.9943005523216713e-05, "loss": 0.581, "step": 1097 }, { "epoch": 0.06, "grad_norm": 0.5545592888985182, "learning_rate": 1.9942806952882587e-05, "loss": 0.3663, "step": 1098 }, { "epoch": 0.06, "grad_norm": 0.4807505633477306, "learning_rate": 1.994260803822948e-05, "loss": 0.2451, "step": 1099 }, { "epoch": 0.06, "grad_norm": 0.3769247168053469, "learning_rate": 1.994240877926429e-05, "loss": 0.2144, "step": 1100 }, { "epoch": 0.06, "grad_norm": 0.7380917848688296, "learning_rate": 1.994220917599391e-05, "loss": 0.4029, "step": 1101 }, { "epoch": 0.06, "grad_norm": 0.5737493430661074, "learning_rate": 1.994200922842525e-05, "loss": 0.3119, "step": 1102 }, { "epoch": 0.06, "grad_norm": 2.063385576846166, "learning_rate": 1.994180893656524e-05, "loss": 0.8398, "step": 1103 }, { "epoch": 0.06, "grad_norm": 1.3920732373420577, "learning_rate": 1.9941608300420815e-05, "loss": 0.7834, "step": 1104 }, { "epoch": 0.06, "grad_norm": 0.7518563438767537, "learning_rate": 1.9941407319998918e-05, "loss": 0.3335, "step": 1105 }, { "epoch": 0.06, "grad_norm": 0.45728680330538174, "learning_rate": 1.9941205995306517e-05, "loss": 0.2271, "step": 1106 }, { "epoch": 0.06, "grad_norm": 0.7047347617250996, "learning_rate": 1.994100432635058e-05, "loss": 0.5542, "step": 1107 }, { "epoch": 0.06, "grad_norm": 0.7164373543084449, "learning_rate": 1.9940802313138092e-05, "loss": 0.3288, "step": 1108 }, { "epoch": 0.06, "grad_norm": 0.4610040084120822, "learning_rate": 1.994059995567604e-05, "loss": 0.2943, "step": 1109 }, { "epoch": 0.06, "grad_norm": 0.6608778183845305, "learning_rate": 1.9940397253971447e-05, "loss": 0.3997, "step": 1110 }, { "epoch": 0.06, "grad_norm": 0.6010430641256433, "learning_rate": 1.9940194208031322e-05, "loss": 0.3541, "step": 1111 }, { "epoch": 0.06, "grad_norm": 0.5892069559400017, "learning_rate": 1.9939990817862696e-05, "loss": 0.3829, "step": 1112 }, { "epoch": 0.06, "grad_norm": 0.39423776153888224, "learning_rate": 1.9939787083472616e-05, "loss": 0.3134, "step": 1113 }, { "epoch": 0.06, "grad_norm": 0.3783860241639249, "learning_rate": 1.993958300486814e-05, "loss": 0.2465, "step": 1114 }, { "epoch": 0.06, "grad_norm": 0.5857598729183383, "learning_rate": 1.9939378582056332e-05, "loss": 0.3123, "step": 1115 }, { "epoch": 0.06, "grad_norm": 1.451790292271993, "learning_rate": 1.993917381504427e-05, "loss": 0.8534, "step": 1116 }, { "epoch": 0.06, "grad_norm": 0.43312088945702426, "learning_rate": 1.9938968703839045e-05, "loss": 0.3252, "step": 1117 }, { "epoch": 0.06, "grad_norm": 0.5774867354453822, "learning_rate": 1.9938763248447762e-05, "loss": 0.3247, "step": 1118 }, { "epoch": 0.06, "grad_norm": 0.5606158673409164, "learning_rate": 1.9938557448877536e-05, "loss": 0.3491, "step": 1119 }, { "epoch": 0.06, "grad_norm": 0.3791222671011608, "learning_rate": 1.9938351305135492e-05, "loss": 0.235, "step": 1120 }, { "epoch": 0.06, "grad_norm": 1.9555671938828716, "learning_rate": 1.993814481722877e-05, "loss": 0.8035, "step": 1121 }, { "epoch": 0.06, "grad_norm": 0.5317081120210266, "learning_rate": 1.9937937985164518e-05, "loss": 0.3285, "step": 1122 }, { "epoch": 0.06, "grad_norm": 0.7753229601610528, "learning_rate": 1.9937730808949905e-05, "loss": 0.3667, "step": 1123 }, { "epoch": 0.06, "grad_norm": 0.5828111764073042, "learning_rate": 1.99375232885921e-05, "loss": 0.4137, "step": 1124 }, { "epoch": 0.06, "grad_norm": 0.39500188850271334, "learning_rate": 1.9937315424098288e-05, "loss": 0.291, "step": 1125 }, { "epoch": 0.06, "grad_norm": 0.41533959678148713, "learning_rate": 1.9937107215475673e-05, "loss": 0.2114, "step": 1126 }, { "epoch": 0.06, "grad_norm": 0.5307863364980268, "learning_rate": 1.9936898662731463e-05, "loss": 0.3774, "step": 1127 }, { "epoch": 0.06, "grad_norm": 1.1478798383437205, "learning_rate": 1.9936689765872878e-05, "loss": 0.5261, "step": 1128 }, { "epoch": 0.06, "grad_norm": 0.46423088674225926, "learning_rate": 1.9936480524907154e-05, "loss": 0.3142, "step": 1129 }, { "epoch": 0.06, "grad_norm": 0.4766471909599253, "learning_rate": 1.9936270939841536e-05, "loss": 0.3808, "step": 1130 }, { "epoch": 0.06, "grad_norm": 1.155229588534361, "learning_rate": 1.9936061010683285e-05, "loss": 0.5931, "step": 1131 }, { "epoch": 0.07, "grad_norm": 0.3543521865584727, "learning_rate": 1.9935850737439667e-05, "loss": 0.134, "step": 1132 }, { "epoch": 0.07, "grad_norm": 0.4931860315991214, "learning_rate": 1.9935640120117965e-05, "loss": 0.3337, "step": 1133 }, { "epoch": 0.07, "grad_norm": 1.390135619937858, "learning_rate": 1.9935429158725475e-05, "loss": 0.8683, "step": 1134 }, { "epoch": 0.07, "grad_norm": 0.5975249328681871, "learning_rate": 1.9935217853269497e-05, "loss": 0.3176, "step": 1135 }, { "epoch": 0.07, "grad_norm": 0.625255175254, "learning_rate": 1.9935006203757354e-05, "loss": 0.4517, "step": 1136 }, { "epoch": 0.07, "grad_norm": 0.5236283609437692, "learning_rate": 1.9934794210196374e-05, "loss": 0.3806, "step": 1137 }, { "epoch": 0.07, "grad_norm": 0.25258242792276325, "learning_rate": 1.9934581872593893e-05, "loss": 0.1451, "step": 1138 }, { "epoch": 0.07, "grad_norm": 0.6981887311269585, "learning_rate": 1.9934369190957275e-05, "loss": 0.4007, "step": 1139 }, { "epoch": 0.07, "grad_norm": 0.8433075775942684, "learning_rate": 1.9934156165293878e-05, "loss": 0.6068, "step": 1140 }, { "epoch": 0.07, "grad_norm": 0.4589248024137913, "learning_rate": 1.9933942795611075e-05, "loss": 0.2664, "step": 1141 }, { "epoch": 0.07, "grad_norm": 0.5854364839569616, "learning_rate": 1.9933729081916266e-05, "loss": 0.4127, "step": 1142 }, { "epoch": 0.07, "grad_norm": 1.1202946340701216, "learning_rate": 1.9933515024216844e-05, "loss": 0.6557, "step": 1143 }, { "epoch": 0.07, "grad_norm": 0.32203550195598013, "learning_rate": 1.9933300622520225e-05, "loss": 0.1755, "step": 1144 }, { "epoch": 0.07, "grad_norm": 0.47969552743697114, "learning_rate": 1.9933085876833833e-05, "loss": 0.2807, "step": 1145 }, { "epoch": 0.07, "grad_norm": 1.0268504627365178, "learning_rate": 1.99328707871651e-05, "loss": 0.6002, "step": 1146 }, { "epoch": 0.07, "grad_norm": 0.6890581144690542, "learning_rate": 1.9932655353521483e-05, "loss": 0.4666, "step": 1147 }, { "epoch": 0.07, "grad_norm": 0.5003936995109405, "learning_rate": 1.9932439575910436e-05, "loss": 0.2906, "step": 1148 }, { "epoch": 0.07, "grad_norm": 0.5008695250916106, "learning_rate": 1.9932223454339435e-05, "loss": 0.3429, "step": 1149 }, { "epoch": 0.07, "grad_norm": 0.43940406343985966, "learning_rate": 1.993200698881596e-05, "loss": 0.2448, "step": 1150 }, { "epoch": 0.07, "grad_norm": 0.37846887106776983, "learning_rate": 1.9931790179347514e-05, "loss": 0.2156, "step": 1151 }, { "epoch": 0.07, "grad_norm": 1.9187364850397228, "learning_rate": 1.99315730259416e-05, "loss": 0.6074, "step": 1152 }, { "epoch": 0.07, "grad_norm": 0.4227557946650834, "learning_rate": 1.9931355528605738e-05, "loss": 0.3318, "step": 1153 }, { "epoch": 0.07, "grad_norm": 0.6158280113417645, "learning_rate": 1.993113768734746e-05, "loss": 0.3654, "step": 1154 }, { "epoch": 0.07, "grad_norm": 1.1925768363660976, "learning_rate": 1.9930919502174312e-05, "loss": 0.4372, "step": 1155 }, { "epoch": 0.07, "grad_norm": 0.2978244213918772, "learning_rate": 1.993070097309385e-05, "loss": 0.1655, "step": 1156 }, { "epoch": 0.07, "grad_norm": 0.4938521213731747, "learning_rate": 1.9930482100113642e-05, "loss": 0.3306, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.026122384333359, "learning_rate": 1.9930262883241265e-05, "loss": 0.4183, "step": 1158 }, { "epoch": 0.07, "grad_norm": 0.9371568067117475, "learning_rate": 1.9930043322484306e-05, "loss": 0.552, "step": 1159 }, { "epoch": 0.07, "grad_norm": 0.5631135620050378, "learning_rate": 1.992982341785038e-05, "loss": 0.3326, "step": 1160 }, { "epoch": 0.07, "grad_norm": 0.5588638465425702, "learning_rate": 1.9929603169347095e-05, "loss": 0.3178, "step": 1161 }, { "epoch": 0.07, "grad_norm": 1.096666482734466, "learning_rate": 1.9929382576982076e-05, "loss": 0.4737, "step": 1162 }, { "epoch": 0.07, "grad_norm": 0.3927720395948921, "learning_rate": 1.9929161640762968e-05, "loss": 0.281, "step": 1163 }, { "epoch": 0.07, "grad_norm": 0.7510863349671362, "learning_rate": 1.992894036069742e-05, "loss": 0.5086, "step": 1164 }, { "epoch": 0.07, "grad_norm": 0.6070472615776253, "learning_rate": 1.992871873679309e-05, "loss": 0.3358, "step": 1165 }, { "epoch": 0.07, "grad_norm": 0.4624380279404961, "learning_rate": 1.9928496769057662e-05, "loss": 0.2936, "step": 1166 }, { "epoch": 0.07, "grad_norm": 2.0190631314339442, "learning_rate": 1.9928274457498818e-05, "loss": 0.7872, "step": 1167 }, { "epoch": 0.07, "grad_norm": 0.4957965334493548, "learning_rate": 1.9928051802124252e-05, "loss": 0.3326, "step": 1168 }, { "epoch": 0.07, "grad_norm": 0.47560370552216386, "learning_rate": 1.9927828802941683e-05, "loss": 0.3471, "step": 1169 }, { "epoch": 0.07, "grad_norm": 0.5550644991870759, "learning_rate": 1.9927605459958825e-05, "loss": 0.3929, "step": 1170 }, { "epoch": 0.07, "grad_norm": 0.4000080669193282, "learning_rate": 1.992738177318342e-05, "loss": 0.1703, "step": 1171 }, { "epoch": 0.07, "grad_norm": 0.6353595781264442, "learning_rate": 1.992715774262321e-05, "loss": 0.3254, "step": 1172 }, { "epoch": 0.07, "grad_norm": 0.5679976742869749, "learning_rate": 1.992693336828596e-05, "loss": 0.3704, "step": 1173 }, { "epoch": 0.07, "grad_norm": 0.5135060035524516, "learning_rate": 1.9926708650179426e-05, "loss": 0.2752, "step": 1174 }, { "epoch": 0.07, "grad_norm": 0.5082286635920554, "learning_rate": 1.9926483588311402e-05, "loss": 0.3722, "step": 1175 }, { "epoch": 0.07, "grad_norm": 0.7864428636362328, "learning_rate": 1.9926258182689677e-05, "loss": 0.5978, "step": 1176 }, { "epoch": 0.07, "grad_norm": 0.4977399864736148, "learning_rate": 1.992603243332206e-05, "loss": 0.2923, "step": 1177 }, { "epoch": 0.07, "grad_norm": 0.3823089546041058, "learning_rate": 1.9925806340216365e-05, "loss": 0.2497, "step": 1178 }, { "epoch": 0.07, "grad_norm": 0.5603124203509675, "learning_rate": 1.9925579903380425e-05, "loss": 0.3242, "step": 1179 }, { "epoch": 0.07, "grad_norm": 0.4985678408576438, "learning_rate": 1.9925353122822077e-05, "loss": 0.4399, "step": 1180 }, { "epoch": 0.07, "grad_norm": 0.37733182664093273, "learning_rate": 1.992512599854918e-05, "loss": 0.269, "step": 1181 }, { "epoch": 0.07, "grad_norm": 1.1088873935769934, "learning_rate": 1.9924898530569594e-05, "loss": 0.6713, "step": 1182 }, { "epoch": 0.07, "grad_norm": 0.5461847080526667, "learning_rate": 1.99246707188912e-05, "loss": 0.3439, "step": 1183 }, { "epoch": 0.07, "grad_norm": 0.3354047490782291, "learning_rate": 1.9924442563521885e-05, "loss": 0.1924, "step": 1184 }, { "epoch": 0.07, "grad_norm": 0.5354214457370254, "learning_rate": 1.992421406446955e-05, "loss": 0.3413, "step": 1185 }, { "epoch": 0.07, "grad_norm": 0.9941015836479209, "learning_rate": 1.9923985221742112e-05, "loss": 0.5636, "step": 1186 }, { "epoch": 0.07, "grad_norm": 0.4010314605169855, "learning_rate": 1.992375603534749e-05, "loss": 0.2394, "step": 1187 }, { "epoch": 0.07, "grad_norm": 1.578704678511741, "learning_rate": 1.9923526505293623e-05, "loss": 0.844, "step": 1188 }, { "epoch": 0.07, "grad_norm": 0.5145164289755612, "learning_rate": 1.9923296631588462e-05, "loss": 0.3961, "step": 1189 }, { "epoch": 0.07, "grad_norm": 0.42736151507104164, "learning_rate": 1.9923066414239965e-05, "loss": 0.2347, "step": 1190 }, { "epoch": 0.07, "grad_norm": 0.39926855460341876, "learning_rate": 1.9922835853256103e-05, "loss": 0.2363, "step": 1191 }, { "epoch": 0.07, "grad_norm": 0.6873331025968474, "learning_rate": 1.9922604948644865e-05, "loss": 0.4264, "step": 1192 }, { "epoch": 0.07, "grad_norm": 0.5172055478516355, "learning_rate": 1.992237370041424e-05, "loss": 0.3225, "step": 1193 }, { "epoch": 0.07, "grad_norm": 0.5461826946326311, "learning_rate": 1.9922142108572245e-05, "loss": 0.3776, "step": 1194 }, { "epoch": 0.07, "grad_norm": 0.8171086797305573, "learning_rate": 1.9921910173126894e-05, "loss": 0.545, "step": 1195 }, { "epoch": 0.07, "grad_norm": 0.545195871554347, "learning_rate": 1.9921677894086217e-05, "loss": 0.3055, "step": 1196 }, { "epoch": 0.07, "grad_norm": 0.296707240427443, "learning_rate": 1.9921445271458263e-05, "loss": 0.2576, "step": 1197 }, { "epoch": 0.07, "grad_norm": 1.0695270171865263, "learning_rate": 1.992121230525109e-05, "loss": 0.5985, "step": 1198 }, { "epoch": 0.07, "grad_norm": 0.5373484107110249, "learning_rate": 1.992097899547276e-05, "loss": 0.3029, "step": 1199 }, { "epoch": 0.07, "grad_norm": 0.521812372352844, "learning_rate": 1.992074534213135e-05, "loss": 0.3079, "step": 1200 }, { "epoch": 0.07, "grad_norm": 1.3838297456653332, "learning_rate": 1.9920511345234956e-05, "loss": 0.7985, "step": 1201 }, { "epoch": 0.07, "grad_norm": 0.48375303096121197, "learning_rate": 1.9920277004791682e-05, "loss": 0.3086, "step": 1202 }, { "epoch": 0.07, "grad_norm": 0.552106262015009, "learning_rate": 1.992004232080964e-05, "loss": 0.3058, "step": 1203 }, { "epoch": 0.07, "grad_norm": 0.5531874559226082, "learning_rate": 1.9919807293296963e-05, "loss": 0.3231, "step": 1204 }, { "epoch": 0.07, "grad_norm": 0.5455190994044464, "learning_rate": 1.9919571922261784e-05, "loss": 0.3043, "step": 1205 }, { "epoch": 0.07, "grad_norm": 1.2944476032556527, "learning_rate": 1.9919336207712258e-05, "loss": 0.7407, "step": 1206 }, { "epoch": 0.07, "grad_norm": 0.9246456693780484, "learning_rate": 1.991910014965654e-05, "loss": 0.5139, "step": 1207 }, { "epoch": 0.07, "grad_norm": 0.5754654948616781, "learning_rate": 1.9918863748102818e-05, "loss": 0.2991, "step": 1208 }, { "epoch": 0.07, "grad_norm": 0.5043664025461552, "learning_rate": 1.9918627003059266e-05, "loss": 0.3907, "step": 1209 }, { "epoch": 0.07, "grad_norm": 0.3706019964753908, "learning_rate": 1.9918389914534086e-05, "loss": 0.2143, "step": 1210 }, { "epoch": 0.07, "grad_norm": 0.5300152129471692, "learning_rate": 1.9918152482535494e-05, "loss": 0.3016, "step": 1211 }, { "epoch": 0.07, "grad_norm": 0.5248817577305702, "learning_rate": 1.9917914707071703e-05, "loss": 0.4038, "step": 1212 }, { "epoch": 0.07, "grad_norm": 0.617387402551657, "learning_rate": 1.991767658815096e-05, "loss": 0.3454, "step": 1213 }, { "epoch": 0.07, "grad_norm": 0.4336728307512205, "learning_rate": 1.9917438125781497e-05, "loss": 0.2924, "step": 1214 }, { "epoch": 0.07, "grad_norm": 0.72894687779715, "learning_rate": 1.991719931997158e-05, "loss": 0.5171, "step": 1215 }, { "epoch": 0.07, "grad_norm": 0.3394246258776419, "learning_rate": 1.9916960170729475e-05, "loss": 0.2009, "step": 1216 }, { "epoch": 0.07, "grad_norm": 0.44296100608782896, "learning_rate": 1.9916720678063467e-05, "loss": 0.3119, "step": 1217 }, { "epoch": 0.07, "grad_norm": 1.2136487371261702, "learning_rate": 1.9916480841981853e-05, "loss": 0.6634, "step": 1218 }, { "epoch": 0.07, "grad_norm": 0.9793411528882585, "learning_rate": 1.991624066249293e-05, "loss": 0.6157, "step": 1219 }, { "epoch": 0.07, "grad_norm": 0.3788574100278588, "learning_rate": 1.9916000139605013e-05, "loss": 0.2773, "step": 1220 }, { "epoch": 0.07, "grad_norm": 0.5509266148859141, "learning_rate": 1.9915759273326447e-05, "loss": 0.4402, "step": 1221 }, { "epoch": 0.07, "grad_norm": 0.35834376031323334, "learning_rate": 1.9915518063665556e-05, "loss": 0.1633, "step": 1222 }, { "epoch": 0.07, "grad_norm": 0.4119437399996039, "learning_rate": 1.991527651063071e-05, "loss": 0.2417, "step": 1223 }, { "epoch": 0.07, "grad_norm": 0.6280447549338211, "learning_rate": 1.9915034614230256e-05, "loss": 0.4287, "step": 1224 }, { "epoch": 0.07, "grad_norm": 0.5800209981404687, "learning_rate": 1.9914792374472584e-05, "loss": 0.4533, "step": 1225 }, { "epoch": 0.07, "grad_norm": 0.43318599109249784, "learning_rate": 1.991454979136608e-05, "loss": 0.262, "step": 1226 }, { "epoch": 0.07, "grad_norm": 0.8602780715330212, "learning_rate": 1.991430686491914e-05, "loss": 0.5644, "step": 1227 }, { "epoch": 0.07, "grad_norm": 0.37876834389922714, "learning_rate": 1.9914063595140184e-05, "loss": 0.3045, "step": 1228 }, { "epoch": 0.07, "grad_norm": 0.41026526676860525, "learning_rate": 1.9913819982037627e-05, "loss": 0.2101, "step": 1229 }, { "epoch": 0.07, "grad_norm": 0.5216085347059072, "learning_rate": 1.991357602561991e-05, "loss": 0.2894, "step": 1230 }, { "epoch": 0.07, "grad_norm": 0.9685343917475587, "learning_rate": 1.9913331725895485e-05, "loss": 0.5665, "step": 1231 }, { "epoch": 0.07, "grad_norm": 0.49616526614313705, "learning_rate": 1.9913087082872808e-05, "loss": 0.3046, "step": 1232 }, { "epoch": 0.07, "grad_norm": 0.40009900721312874, "learning_rate": 1.9912842096560348e-05, "loss": 0.2986, "step": 1233 }, { "epoch": 0.07, "grad_norm": 0.6011999434725922, "learning_rate": 1.9912596766966598e-05, "loss": 0.3332, "step": 1234 }, { "epoch": 0.07, "grad_norm": 0.3754282911061259, "learning_rate": 1.9912351094100043e-05, "loss": 0.2408, "step": 1235 }, { "epoch": 0.07, "grad_norm": 0.4790995862974184, "learning_rate": 1.99121050779692e-05, "loss": 0.313, "step": 1236 }, { "epoch": 0.07, "grad_norm": 0.9519799944694786, "learning_rate": 1.9911858718582583e-05, "loss": 0.5728, "step": 1237 }, { "epoch": 0.07, "grad_norm": 0.48524535965068866, "learning_rate": 1.9911612015948726e-05, "loss": 0.3642, "step": 1238 }, { "epoch": 0.07, "grad_norm": 0.6463784806450834, "learning_rate": 1.9911364970076167e-05, "loss": 0.2903, "step": 1239 }, { "epoch": 0.07, "grad_norm": 0.5487653396668991, "learning_rate": 1.9911117580973468e-05, "loss": 0.3872, "step": 1240 }, { "epoch": 0.07, "grad_norm": 0.28116441246936236, "learning_rate": 1.9910869848649192e-05, "loss": 0.1866, "step": 1241 }, { "epoch": 0.07, "grad_norm": 0.7251262440666021, "learning_rate": 1.991062177311192e-05, "loss": 0.4662, "step": 1242 }, { "epoch": 0.07, "grad_norm": 1.4629712657903426, "learning_rate": 1.9910373354370245e-05, "loss": 0.504, "step": 1243 }, { "epoch": 0.07, "grad_norm": 0.4424282521343179, "learning_rate": 1.9910124592432763e-05, "loss": 0.3082, "step": 1244 }, { "epoch": 0.07, "grad_norm": 0.7088073521939233, "learning_rate": 1.9909875487308096e-05, "loss": 0.4297, "step": 1245 }, { "epoch": 0.07, "grad_norm": 0.45330422783598706, "learning_rate": 1.9909626039004862e-05, "loss": 0.1286, "step": 1246 }, { "epoch": 0.07, "grad_norm": 0.6475879211850505, "learning_rate": 1.990937624753171e-05, "loss": 0.3348, "step": 1247 }, { "epoch": 0.07, "grad_norm": 0.5626112796455328, "learning_rate": 1.990912611289728e-05, "loss": 0.3295, "step": 1248 }, { "epoch": 0.07, "grad_norm": 2.169279671276457, "learning_rate": 1.9908875635110244e-05, "loss": 0.5194, "step": 1249 }, { "epoch": 0.07, "grad_norm": 0.7257332887197935, "learning_rate": 1.990862481417927e-05, "loss": 0.4001, "step": 1250 }, { "epoch": 0.07, "grad_norm": 0.6408089696432202, "learning_rate": 1.990837365011304e-05, "loss": 0.3905, "step": 1251 }, { "epoch": 0.07, "grad_norm": 0.628381425763534, "learning_rate": 1.9908122142920262e-05, "loss": 0.2872, "step": 1252 }, { "epoch": 0.07, "grad_norm": 0.393935167434499, "learning_rate": 1.9907870292609633e-05, "loss": 0.1497, "step": 1253 }, { "epoch": 0.07, "grad_norm": 1.3577178025888306, "learning_rate": 1.990761809918989e-05, "loss": 0.4176, "step": 1254 }, { "epoch": 0.07, "grad_norm": 2.0334997177234264, "learning_rate": 1.9907365562669753e-05, "loss": 0.6454, "step": 1255 }, { "epoch": 0.07, "grad_norm": 0.4657792094873402, "learning_rate": 1.9907112683057974e-05, "loss": 0.2523, "step": 1256 }, { "epoch": 0.07, "grad_norm": 0.6638005020345322, "learning_rate": 1.9906859460363307e-05, "loss": 0.4164, "step": 1257 }, { "epoch": 0.07, "grad_norm": 1.2400648252392563, "learning_rate": 1.9906605894594525e-05, "loss": 0.6687, "step": 1258 }, { "epoch": 0.07, "grad_norm": 0.5219896184442447, "learning_rate": 1.990635198576041e-05, "loss": 0.2149, "step": 1259 }, { "epoch": 0.07, "grad_norm": 0.7292828300759986, "learning_rate": 1.9906097733869746e-05, "loss": 0.3419, "step": 1260 }, { "epoch": 0.07, "grad_norm": 2.141928613905644, "learning_rate": 1.990584313893135e-05, "loss": 0.5934, "step": 1261 }, { "epoch": 0.07, "grad_norm": 0.352641454123304, "learning_rate": 1.9905588200954025e-05, "loss": 0.1808, "step": 1262 }, { "epoch": 0.07, "grad_norm": 0.743654610148312, "learning_rate": 1.990533291994661e-05, "loss": 0.4418, "step": 1263 }, { "epoch": 0.07, "grad_norm": 0.5783868326461529, "learning_rate": 1.9905077295917944e-05, "loss": 0.3728, "step": 1264 }, { "epoch": 0.07, "grad_norm": 0.6927545700509946, "learning_rate": 1.9904821328876873e-05, "loss": 0.2807, "step": 1265 }, { "epoch": 0.07, "grad_norm": 0.9029660267620941, "learning_rate": 1.9904565018832267e-05, "loss": 0.4061, "step": 1266 }, { "epoch": 0.07, "grad_norm": 0.48832856958609416, "learning_rate": 1.9904308365792998e-05, "loss": 0.3221, "step": 1267 }, { "epoch": 0.07, "grad_norm": 0.6071012810126546, "learning_rate": 1.9904051369767958e-05, "loss": 0.3727, "step": 1268 }, { "epoch": 0.07, "grad_norm": 0.4333485493904293, "learning_rate": 1.9903794030766047e-05, "loss": 0.216, "step": 1269 }, { "epoch": 0.07, "grad_norm": 0.8550028384302811, "learning_rate": 1.9903536348796172e-05, "loss": 0.5802, "step": 1270 }, { "epoch": 0.07, "grad_norm": 0.5444160978702124, "learning_rate": 1.9903278323867262e-05, "loss": 0.502, "step": 1271 }, { "epoch": 0.07, "grad_norm": 0.49056768853210037, "learning_rate": 1.9903019955988246e-05, "loss": 0.2712, "step": 1272 }, { "epoch": 0.07, "grad_norm": 1.1139660490114864, "learning_rate": 1.9902761245168078e-05, "loss": 0.659, "step": 1273 }, { "epoch": 0.07, "grad_norm": 0.36944374947304287, "learning_rate": 1.990250219141571e-05, "loss": 0.1944, "step": 1274 }, { "epoch": 0.07, "grad_norm": 0.4744817910147028, "learning_rate": 1.990224279474012e-05, "loss": 0.2431, "step": 1275 }, { "epoch": 0.07, "grad_norm": 0.5063465239101601, "learning_rate": 1.990198305515029e-05, "loss": 0.3569, "step": 1276 }, { "epoch": 0.07, "grad_norm": 0.8148200431071357, "learning_rate": 1.9901722972655207e-05, "loss": 0.4337, "step": 1277 }, { "epoch": 0.07, "grad_norm": 0.4741481971958385, "learning_rate": 1.990146254726389e-05, "loss": 0.3101, "step": 1278 }, { "epoch": 0.07, "grad_norm": 0.5962509814169538, "learning_rate": 1.9901201778985344e-05, "loss": 0.4426, "step": 1279 }, { "epoch": 0.07, "grad_norm": 0.38573360540688933, "learning_rate": 1.9900940667828606e-05, "loss": 0.259, "step": 1280 }, { "epoch": 0.07, "grad_norm": 0.40658105084541624, "learning_rate": 1.9900679213802724e-05, "loss": 0.2474, "step": 1281 }, { "epoch": 0.07, "grad_norm": 0.7175034272163154, "learning_rate": 1.9900417416916742e-05, "loss": 0.4537, "step": 1282 }, { "epoch": 0.07, "grad_norm": 0.5669166964944146, "learning_rate": 1.9900155277179734e-05, "loss": 0.3968, "step": 1283 }, { "epoch": 0.07, "grad_norm": 0.7815627889065767, "learning_rate": 1.989989279460077e-05, "loss": 0.313, "step": 1284 }, { "epoch": 0.07, "grad_norm": 0.9383698484651365, "learning_rate": 1.989962996918895e-05, "loss": 0.5734, "step": 1285 }, { "epoch": 0.07, "grad_norm": 0.4976846772814327, "learning_rate": 1.9899366800953367e-05, "loss": 0.2426, "step": 1286 }, { "epoch": 0.07, "grad_norm": 0.38644317778329124, "learning_rate": 1.9899103289903137e-05, "loss": 0.2545, "step": 1287 }, { "epoch": 0.07, "grad_norm": 0.545966418703407, "learning_rate": 1.9898839436047384e-05, "loss": 0.3516, "step": 1288 }, { "epoch": 0.07, "grad_norm": 0.8579484617086817, "learning_rate": 1.989857523939525e-05, "loss": 0.4972, "step": 1289 }, { "epoch": 0.07, "grad_norm": 0.455372961518016, "learning_rate": 1.9898310699955884e-05, "loss": 0.3282, "step": 1290 }, { "epoch": 0.07, "grad_norm": 0.6335961270391565, "learning_rate": 1.989804581773844e-05, "loss": 0.3331, "step": 1291 }, { "epoch": 0.07, "grad_norm": 0.50902091626044, "learning_rate": 1.989778059275209e-05, "loss": 0.3618, "step": 1292 }, { "epoch": 0.07, "grad_norm": 0.30150229710949894, "learning_rate": 1.989751502500603e-05, "loss": 0.2083, "step": 1293 }, { "epoch": 0.07, "grad_norm": 0.9675402387132627, "learning_rate": 1.9897249114509453e-05, "loss": 0.533, "step": 1294 }, { "epoch": 0.07, "grad_norm": 0.5243771162575476, "learning_rate": 1.9896982861271558e-05, "loss": 0.3046, "step": 1295 }, { "epoch": 0.07, "grad_norm": 0.5198603235923646, "learning_rate": 1.9896716265301577e-05, "loss": 0.3362, "step": 1296 }, { "epoch": 0.07, "grad_norm": 1.1509175904091664, "learning_rate": 1.9896449326608734e-05, "loss": 0.8126, "step": 1297 }, { "epoch": 0.07, "grad_norm": 0.23527713331733763, "learning_rate": 1.9896182045202278e-05, "loss": 0.1337, "step": 1298 }, { "epoch": 0.07, "grad_norm": 0.5617916958589548, "learning_rate": 1.989591442109146e-05, "loss": 0.3786, "step": 1299 }, { "epoch": 0.07, "grad_norm": 0.5253914577139904, "learning_rate": 1.9895646454285558e-05, "loss": 0.4054, "step": 1300 }, { "epoch": 0.07, "grad_norm": 0.3003271411328984, "learning_rate": 1.989537814479384e-05, "loss": 0.1277, "step": 1301 }, { "epoch": 0.07, "grad_norm": 0.4684808438512099, "learning_rate": 1.9895109492625604e-05, "loss": 0.2995, "step": 1302 }, { "epoch": 0.07, "grad_norm": 0.5034967279795296, "learning_rate": 1.9894840497790154e-05, "loss": 0.3855, "step": 1303 }, { "epoch": 0.07, "grad_norm": 0.6015096789873883, "learning_rate": 1.98945711602968e-05, "loss": 0.4516, "step": 1304 }, { "epoch": 0.07, "grad_norm": 0.43202696608905033, "learning_rate": 1.9894301480154873e-05, "loss": 0.2953, "step": 1305 }, { "epoch": 0.08, "grad_norm": 0.39747979018767193, "learning_rate": 1.989403145737371e-05, "loss": 0.2653, "step": 1306 }, { "epoch": 0.08, "grad_norm": 0.5408971085490359, "learning_rate": 1.989376109196266e-05, "loss": 0.3496, "step": 1307 }, { "epoch": 0.08, "grad_norm": 0.4058095462488843, "learning_rate": 1.9893490383931095e-05, "loss": 0.2257, "step": 1308 }, { "epoch": 0.08, "grad_norm": 1.6788930294125746, "learning_rate": 1.989321933328838e-05, "loss": 0.8471, "step": 1309 }, { "epoch": 0.08, "grad_norm": 0.7626511562337264, "learning_rate": 1.9892947940043906e-05, "loss": 0.5442, "step": 1310 }, { "epoch": 0.08, "grad_norm": 0.5049748058118498, "learning_rate": 1.989267620420707e-05, "loss": 0.2766, "step": 1311 }, { "epoch": 0.08, "grad_norm": 0.7666799027992355, "learning_rate": 1.9892404125787283e-05, "loss": 0.438, "step": 1312 }, { "epoch": 0.08, "grad_norm": 0.3823035156163941, "learning_rate": 1.9892131704793968e-05, "loss": 0.2047, "step": 1313 }, { "epoch": 0.08, "grad_norm": 0.47813218026484006, "learning_rate": 1.9891858941236554e-05, "loss": 0.2231, "step": 1314 }, { "epoch": 0.08, "grad_norm": 0.573911958015997, "learning_rate": 1.989158583512449e-05, "loss": 0.377, "step": 1315 }, { "epoch": 0.08, "grad_norm": 0.7540145368248665, "learning_rate": 1.989131238646724e-05, "loss": 0.4964, "step": 1316 }, { "epoch": 0.08, "grad_norm": 0.534381050112668, "learning_rate": 1.989103859527426e-05, "loss": 0.3059, "step": 1317 }, { "epoch": 0.08, "grad_norm": 0.4482636946041032, "learning_rate": 1.9890764461555044e-05, "loss": 0.2821, "step": 1318 }, { "epoch": 0.08, "grad_norm": 0.43344662744253243, "learning_rate": 1.9890489985319077e-05, "loss": 0.268, "step": 1319 }, { "epoch": 0.08, "grad_norm": 0.5202040997266357, "learning_rate": 1.989021516657587e-05, "loss": 0.2804, "step": 1320 }, { "epoch": 0.08, "grad_norm": 1.2201274898081262, "learning_rate": 1.9889940005334932e-05, "loss": 0.5302, "step": 1321 }, { "epoch": 0.08, "grad_norm": 0.8568685374679005, "learning_rate": 1.98896645016058e-05, "loss": 0.5544, "step": 1322 }, { "epoch": 0.08, "grad_norm": 0.45238693312134165, "learning_rate": 1.9889388655398015e-05, "loss": 0.3015, "step": 1323 }, { "epoch": 0.08, "grad_norm": 0.45007359982134754, "learning_rate": 1.9889112466721122e-05, "loss": 0.3122, "step": 1324 }, { "epoch": 0.08, "grad_norm": 0.3702787792276908, "learning_rate": 1.9888835935584686e-05, "loss": 0.2158, "step": 1325 }, { "epoch": 0.08, "grad_norm": 0.44701134918728447, "learning_rate": 1.9888559061998294e-05, "loss": 0.2895, "step": 1326 }, { "epoch": 0.08, "grad_norm": 0.5825053647444507, "learning_rate": 1.9888281845971522e-05, "loss": 0.3207, "step": 1327 }, { "epoch": 0.08, "grad_norm": 1.9820095173697092, "learning_rate": 1.988800428751398e-05, "loss": 0.6004, "step": 1328 }, { "epoch": 0.08, "grad_norm": 0.46305327622259507, "learning_rate": 1.988772638663527e-05, "loss": 0.3056, "step": 1329 }, { "epoch": 0.08, "grad_norm": 1.3268004204954384, "learning_rate": 1.9887448143345022e-05, "loss": 0.6153, "step": 1330 }, { "epoch": 0.08, "grad_norm": 0.3606881840853597, "learning_rate": 1.988716955765287e-05, "loss": 0.2031, "step": 1331 }, { "epoch": 0.08, "grad_norm": 0.45448857925172526, "learning_rate": 1.988689062956846e-05, "loss": 0.2976, "step": 1332 }, { "epoch": 0.08, "grad_norm": 0.8464056685167116, "learning_rate": 1.9886611359101455e-05, "loss": 0.4598, "step": 1333 }, { "epoch": 0.08, "grad_norm": 0.8824103472910778, "learning_rate": 1.9886331746261523e-05, "loss": 0.4065, "step": 1334 }, { "epoch": 0.08, "grad_norm": 0.4968644088356621, "learning_rate": 1.988605179105835e-05, "loss": 0.3155, "step": 1335 }, { "epoch": 0.08, "grad_norm": 0.5104609846598314, "learning_rate": 1.9885771493501625e-05, "loss": 0.3783, "step": 1336 }, { "epoch": 0.08, "grad_norm": 0.28315552118650017, "learning_rate": 1.9885490853601058e-05, "loss": 0.1439, "step": 1337 }, { "epoch": 0.08, "grad_norm": 0.441602883979966, "learning_rate": 1.988520987136637e-05, "loss": 0.3015, "step": 1338 }, { "epoch": 0.08, "grad_norm": 0.4708608501945979, "learning_rate": 1.9884928546807286e-05, "loss": 0.3942, "step": 1339 }, { "epoch": 0.08, "grad_norm": 0.7510346237626402, "learning_rate": 1.9884646879933555e-05, "loss": 0.5117, "step": 1340 }, { "epoch": 0.08, "grad_norm": 0.48347633854447297, "learning_rate": 1.9884364870754925e-05, "loss": 0.2751, "step": 1341 }, { "epoch": 0.08, "grad_norm": 1.5360278255553699, "learning_rate": 1.988408251928117e-05, "loss": 0.6755, "step": 1342 }, { "epoch": 0.08, "grad_norm": 0.38517016065839627, "learning_rate": 1.9883799825522056e-05, "loss": 0.2931, "step": 1343 }, { "epoch": 0.08, "grad_norm": 0.36798370244603723, "learning_rate": 1.988351678948738e-05, "loss": 0.1964, "step": 1344 }, { "epoch": 0.08, "grad_norm": 0.8098718422245624, "learning_rate": 1.9883233411186947e-05, "loss": 0.5179, "step": 1345 }, { "epoch": 0.08, "grad_norm": 1.1491988511215931, "learning_rate": 1.9882949690630563e-05, "loss": 0.634, "step": 1346 }, { "epoch": 0.08, "grad_norm": 0.38455018173049227, "learning_rate": 1.9882665627828054e-05, "loss": 0.2628, "step": 1347 }, { "epoch": 0.08, "grad_norm": 2.134503438141174, "learning_rate": 1.988238122278926e-05, "loss": 0.6759, "step": 1348 }, { "epoch": 0.08, "grad_norm": 0.45620155579332167, "learning_rate": 1.9882096475524032e-05, "loss": 0.3142, "step": 1349 }, { "epoch": 0.08, "grad_norm": 0.6149984382187618, "learning_rate": 1.988181138604223e-05, "loss": 0.2522, "step": 1350 }, { "epoch": 0.08, "grad_norm": 0.534257884253937, "learning_rate": 1.988152595435372e-05, "loss": 0.3617, "step": 1351 }, { "epoch": 0.08, "grad_norm": 0.6039783722918978, "learning_rate": 1.9881240180468394e-05, "loss": 0.3739, "step": 1352 }, { "epoch": 0.08, "grad_norm": 0.5317008736940727, "learning_rate": 1.988095406439614e-05, "loss": 0.1945, "step": 1353 }, { "epoch": 0.08, "grad_norm": 0.7544906573407119, "learning_rate": 1.9880667606146878e-05, "loss": 0.4314, "step": 1354 }, { "epoch": 0.08, "grad_norm": 0.461252463212955, "learning_rate": 1.988038080573052e-05, "loss": 0.3904, "step": 1355 }, { "epoch": 0.08, "grad_norm": 0.6937852377504348, "learning_rate": 1.9880093663157e-05, "loss": 0.3911, "step": 1356 }, { "epoch": 0.08, "grad_norm": 0.5038983872790718, "learning_rate": 1.9879806178436258e-05, "loss": 0.355, "step": 1357 }, { "epoch": 0.08, "grad_norm": 0.6390378111434681, "learning_rate": 1.9879518351578254e-05, "loss": 0.2838, "step": 1358 }, { "epoch": 0.08, "grad_norm": 0.38546418075325506, "learning_rate": 1.9879230182592958e-05, "loss": 0.3129, "step": 1359 }, { "epoch": 0.08, "grad_norm": 0.49492171454772504, "learning_rate": 1.9878941671490342e-05, "loss": 0.2142, "step": 1360 }, { "epoch": 0.08, "grad_norm": 0.8425537889343386, "learning_rate": 1.9878652818280402e-05, "loss": 0.5642, "step": 1361 }, { "epoch": 0.08, "grad_norm": 0.4003643201454913, "learning_rate": 1.9878363622973137e-05, "loss": 0.3595, "step": 1362 }, { "epoch": 0.08, "grad_norm": 0.43574709207441126, "learning_rate": 1.987807408557857e-05, "loss": 0.2916, "step": 1363 }, { "epoch": 0.08, "grad_norm": 0.517603021457836, "learning_rate": 1.987778420610672e-05, "loss": 0.3182, "step": 1364 }, { "epoch": 0.08, "grad_norm": 0.40541367749597956, "learning_rate": 1.9877493984567623e-05, "loss": 0.2377, "step": 1365 }, { "epoch": 0.08, "grad_norm": 0.595146514308601, "learning_rate": 1.9877203420971338e-05, "loss": 0.3073, "step": 1366 }, { "epoch": 0.08, "grad_norm": 0.3996742522387982, "learning_rate": 1.9876912515327925e-05, "loss": 0.3877, "step": 1367 }, { "epoch": 0.08, "grad_norm": 0.6372451758270241, "learning_rate": 1.9876621267647452e-05, "loss": 0.4087, "step": 1368 }, { "epoch": 0.08, "grad_norm": 0.5014611845347008, "learning_rate": 1.9876329677940015e-05, "loss": 0.3651, "step": 1369 }, { "epoch": 0.08, "grad_norm": 0.4638886549372142, "learning_rate": 1.9876037746215703e-05, "loss": 0.3335, "step": 1370 }, { "epoch": 0.08, "grad_norm": 0.5265298020277424, "learning_rate": 1.9875745472484627e-05, "loss": 0.2185, "step": 1371 }, { "epoch": 0.08, "grad_norm": 0.38274955304444164, "learning_rate": 1.987545285675691e-05, "loss": 0.261, "step": 1372 }, { "epoch": 0.08, "grad_norm": 0.7594647538424038, "learning_rate": 1.9875159899042685e-05, "loss": 0.4655, "step": 1373 }, { "epoch": 0.08, "grad_norm": 0.653896701468227, "learning_rate": 1.98748665993521e-05, "loss": 0.3104, "step": 1374 }, { "epoch": 0.08, "grad_norm": 0.39040508633732796, "learning_rate": 1.987457295769531e-05, "loss": 0.3225, "step": 1375 }, { "epoch": 0.08, "grad_norm": 1.0234810635667073, "learning_rate": 1.9874278974082482e-05, "loss": 0.5349, "step": 1376 }, { "epoch": 0.08, "grad_norm": 0.34396246269087427, "learning_rate": 1.9873984648523796e-05, "loss": 0.1595, "step": 1377 }, { "epoch": 0.08, "grad_norm": 0.5609481420866197, "learning_rate": 1.9873689981029445e-05, "loss": 0.3817, "step": 1378 }, { "epoch": 0.08, "grad_norm": 0.5179111274026793, "learning_rate": 1.9873394971609636e-05, "loss": 0.3453, "step": 1379 }, { "epoch": 0.08, "grad_norm": 0.46525695462885736, "learning_rate": 1.9873099620274585e-05, "loss": 0.327, "step": 1380 }, { "epoch": 0.08, "grad_norm": 0.6728477544118068, "learning_rate": 1.987280392703452e-05, "loss": 0.433, "step": 1381 }, { "epoch": 0.08, "grad_norm": 0.6129052674102026, "learning_rate": 1.987250789189968e-05, "loss": 0.437, "step": 1382 }, { "epoch": 0.08, "grad_norm": 0.33943298098310193, "learning_rate": 1.987221151488031e-05, "loss": 0.215, "step": 1383 }, { "epoch": 0.08, "grad_norm": 0.4695780101175386, "learning_rate": 1.9871914795986683e-05, "loss": 0.2768, "step": 1384 }, { "epoch": 0.08, "grad_norm": 0.9439529862803306, "learning_rate": 1.987161773522907e-05, "loss": 0.5373, "step": 1385 }, { "epoch": 0.08, "grad_norm": 0.4313605518517955, "learning_rate": 1.9871320332617762e-05, "loss": 0.2314, "step": 1386 }, { "epoch": 0.08, "grad_norm": 0.4968256366599863, "learning_rate": 1.9871022588163057e-05, "loss": 0.3409, "step": 1387 }, { "epoch": 0.08, "grad_norm": 1.4379547696004578, "learning_rate": 1.987072450187526e-05, "loss": 0.8263, "step": 1388 }, { "epoch": 0.08, "grad_norm": 0.3114063006115645, "learning_rate": 1.98704260737647e-05, "loss": 0.117, "step": 1389 }, { "epoch": 0.08, "grad_norm": 0.5394045691022546, "learning_rate": 1.9870127303841708e-05, "loss": 0.393, "step": 1390 }, { "epoch": 0.08, "grad_norm": 0.6326035735789947, "learning_rate": 1.9869828192116634e-05, "loss": 0.3016, "step": 1391 }, { "epoch": 0.08, "grad_norm": 0.73994607381943, "learning_rate": 1.986952873859983e-05, "loss": 0.4724, "step": 1392 }, { "epoch": 0.08, "grad_norm": 0.43056343619915194, "learning_rate": 1.9869228943301677e-05, "loss": 0.2817, "step": 1393 }, { "epoch": 0.08, "grad_norm": 0.6526090883250378, "learning_rate": 1.9868928806232545e-05, "loss": 0.4252, "step": 1394 }, { "epoch": 0.08, "grad_norm": 0.5255751811418452, "learning_rate": 1.9868628327402833e-05, "loss": 0.3748, "step": 1395 }, { "epoch": 0.08, "grad_norm": 0.3704563536878913, "learning_rate": 1.9868327506822948e-05, "loss": 0.3383, "step": 1396 }, { "epoch": 0.08, "grad_norm": 0.4162983542173049, "learning_rate": 1.9868026344503307e-05, "loss": 0.1772, "step": 1397 }, { "epoch": 0.08, "grad_norm": 0.5122113190467039, "learning_rate": 1.9867724840454336e-05, "loss": 0.3366, "step": 1398 }, { "epoch": 0.08, "grad_norm": 0.46115541335438065, "learning_rate": 1.986742299468648e-05, "loss": 0.2477, "step": 1399 }, { "epoch": 0.08, "grad_norm": 3.5151180809976266, "learning_rate": 1.9867120807210188e-05, "loss": 0.7334, "step": 1400 }, { "epoch": 0.08, "grad_norm": 2.158542189747334, "learning_rate": 1.9866818278035926e-05, "loss": 0.4279, "step": 1401 }, { "epoch": 0.08, "grad_norm": 0.6994009052744022, "learning_rate": 1.9866515407174174e-05, "loss": 0.3324, "step": 1402 }, { "epoch": 0.08, "grad_norm": 1.0880865146791605, "learning_rate": 1.9866212194635414e-05, "loss": 0.2424, "step": 1403 }, { "epoch": 0.08, "grad_norm": 1.9025391903348476, "learning_rate": 1.986590864043015e-05, "loss": 0.4065, "step": 1404 }, { "epoch": 0.08, "grad_norm": 1.2725994136331205, "learning_rate": 1.98656047445689e-05, "loss": 0.3837, "step": 1405 }, { "epoch": 0.08, "grad_norm": 1.1275735184080333, "learning_rate": 1.9865300507062177e-05, "loss": 0.3384, "step": 1406 }, { "epoch": 0.08, "grad_norm": 0.6833289165964801, "learning_rate": 1.986499592792052e-05, "loss": 0.501, "step": 1407 }, { "epoch": 0.08, "grad_norm": 1.7700107274879846, "learning_rate": 1.9864691007154486e-05, "loss": 0.3731, "step": 1408 }, { "epoch": 0.08, "grad_norm": 2.4943906235284774, "learning_rate": 1.9864385744774618e-05, "loss": 0.1505, "step": 1409 }, { "epoch": 0.08, "grad_norm": 0.7558856050651058, "learning_rate": 1.9864080140791503e-05, "loss": 0.3196, "step": 1410 }, { "epoch": 0.08, "grad_norm": 0.9313219061687138, "learning_rate": 1.9863774195215714e-05, "loss": 0.359, "step": 1411 }, { "epoch": 0.08, "grad_norm": 1.319474351714657, "learning_rate": 1.9863467908057846e-05, "loss": 0.4746, "step": 1412 }, { "epoch": 0.08, "grad_norm": 0.7970602847741426, "learning_rate": 1.986316127932851e-05, "loss": 0.4742, "step": 1413 }, { "epoch": 0.08, "grad_norm": 2.602170573188123, "learning_rate": 1.9862854309038324e-05, "loss": 0.3773, "step": 1414 }, { "epoch": 0.08, "grad_norm": 0.9078365075980418, "learning_rate": 1.9862546997197917e-05, "loss": 0.2455, "step": 1415 }, { "epoch": 0.08, "grad_norm": 0.6767700019137007, "learning_rate": 1.9862239343817932e-05, "loss": 0.2878, "step": 1416 }, { "epoch": 0.08, "grad_norm": 3.0559677425033875, "learning_rate": 1.9861931348909024e-05, "loss": 0.3493, "step": 1417 }, { "epoch": 0.08, "grad_norm": 1.0023429161123776, "learning_rate": 1.9861623012481853e-05, "loss": 0.4367, "step": 1418 }, { "epoch": 0.08, "grad_norm": 0.9368686272084427, "learning_rate": 1.9861314334547105e-05, "loss": 0.3789, "step": 1419 }, { "epoch": 0.08, "grad_norm": 0.9117183827974491, "learning_rate": 1.9861005315115466e-05, "loss": 0.3442, "step": 1420 }, { "epoch": 0.08, "grad_norm": 1.3748896935498363, "learning_rate": 1.9860695954197635e-05, "loss": 0.3071, "step": 1421 }, { "epoch": 0.08, "grad_norm": 0.6790630927567458, "learning_rate": 1.9860386251804327e-05, "loss": 0.2388, "step": 1422 }, { "epoch": 0.08, "grad_norm": 0.8602801769926192, "learning_rate": 1.9860076207946268e-05, "loss": 0.352, "step": 1423 }, { "epoch": 0.08, "grad_norm": 1.4437574717264712, "learning_rate": 1.9859765822634194e-05, "loss": 0.5806, "step": 1424 }, { "epoch": 0.08, "grad_norm": 0.8911016493029239, "learning_rate": 1.9859455095878853e-05, "loss": 0.4489, "step": 1425 }, { "epoch": 0.08, "grad_norm": 0.8138852682470921, "learning_rate": 1.9859144027691006e-05, "loss": 0.3231, "step": 1426 }, { "epoch": 0.08, "grad_norm": 1.3279680757064087, "learning_rate": 1.9858832618081427e-05, "loss": 0.2975, "step": 1427 }, { "epoch": 0.08, "grad_norm": 0.8824804769850708, "learning_rate": 1.9858520867060897e-05, "loss": 0.1917, "step": 1428 }, { "epoch": 0.08, "grad_norm": 0.7736102829235555, "learning_rate": 1.9858208774640213e-05, "loss": 0.3659, "step": 1429 }, { "epoch": 0.08, "grad_norm": 1.003301610323338, "learning_rate": 1.9857896340830182e-05, "loss": 0.4005, "step": 1430 }, { "epoch": 0.08, "grad_norm": 1.6699557262792868, "learning_rate": 1.9857583565641627e-05, "loss": 0.6112, "step": 1431 }, { "epoch": 0.08, "grad_norm": 0.7784077780986679, "learning_rate": 1.9857270449085378e-05, "loss": 0.2576, "step": 1432 }, { "epoch": 0.08, "grad_norm": 1.7558353459824572, "learning_rate": 1.9856956991172272e-05, "loss": 0.7202, "step": 1433 }, { "epoch": 0.08, "grad_norm": 0.8726908894911565, "learning_rate": 1.9856643191913173e-05, "loss": 0.3263, "step": 1434 }, { "epoch": 0.08, "grad_norm": 0.6394845806469756, "learning_rate": 1.9856329051318942e-05, "loss": 0.2556, "step": 1435 }, { "epoch": 0.08, "grad_norm": 2.364157046676915, "learning_rate": 1.9856014569400463e-05, "loss": 0.5657, "step": 1436 }, { "epoch": 0.08, "grad_norm": 0.6999329889210596, "learning_rate": 1.985569974616862e-05, "loss": 0.3046, "step": 1437 }, { "epoch": 0.08, "grad_norm": 0.8304526967619219, "learning_rate": 1.9855384581634322e-05, "loss": 0.2348, "step": 1438 }, { "epoch": 0.08, "grad_norm": 0.6726255986180282, "learning_rate": 1.985506907580848e-05, "loss": 0.4075, "step": 1439 }, { "epoch": 0.08, "grad_norm": 0.6573471554110082, "learning_rate": 1.9854753228702016e-05, "loss": 0.3843, "step": 1440 }, { "epoch": 0.08, "grad_norm": 0.5886305659040076, "learning_rate": 1.9854437040325872e-05, "loss": 0.234, "step": 1441 }, { "epoch": 0.08, "grad_norm": 0.7112981770432031, "learning_rate": 1.9854120510691e-05, "loss": 0.4008, "step": 1442 }, { "epoch": 0.08, "grad_norm": 0.6036621461010034, "learning_rate": 1.9853803639808357e-05, "loss": 0.2906, "step": 1443 }, { "epoch": 0.08, "grad_norm": 0.5678057149957291, "learning_rate": 1.9853486427688918e-05, "loss": 0.3099, "step": 1444 }, { "epoch": 0.08, "grad_norm": 0.5707284738168328, "learning_rate": 1.9853168874343665e-05, "loss": 0.3151, "step": 1445 }, { "epoch": 0.08, "grad_norm": 0.6543721185475424, "learning_rate": 1.98528509797836e-05, "loss": 0.4543, "step": 1446 }, { "epoch": 0.08, "grad_norm": 0.5152518450197626, "learning_rate": 1.985253274401973e-05, "loss": 0.3259, "step": 1447 }, { "epoch": 0.08, "grad_norm": 0.7630087118038243, "learning_rate": 1.985221416706307e-05, "loss": 0.5161, "step": 1448 }, { "epoch": 0.08, "grad_norm": 0.6039997332315664, "learning_rate": 1.9851895248924662e-05, "loss": 0.2395, "step": 1449 }, { "epoch": 0.08, "grad_norm": 0.5091314902898457, "learning_rate": 1.9851575989615545e-05, "loss": 0.3048, "step": 1450 }, { "epoch": 0.08, "grad_norm": 1.4961746423041666, "learning_rate": 1.9851256389146774e-05, "loss": 0.089, "step": 1451 }, { "epoch": 0.08, "grad_norm": 0.8407751460559815, "learning_rate": 1.985093644752942e-05, "loss": 0.5311, "step": 1452 }, { "epoch": 0.08, "grad_norm": 0.5369866218744758, "learning_rate": 1.9850616164774556e-05, "loss": 0.3195, "step": 1453 }, { "epoch": 0.08, "grad_norm": 0.6396346126247083, "learning_rate": 1.985029554089328e-05, "loss": 0.3142, "step": 1454 }, { "epoch": 0.08, "grad_norm": 0.6021130089990159, "learning_rate": 1.9849974575896695e-05, "loss": 0.2219, "step": 1455 }, { "epoch": 0.08, "grad_norm": 0.8167722928601668, "learning_rate": 1.984965326979591e-05, "loss": 0.3154, "step": 1456 }, { "epoch": 0.08, "grad_norm": 0.6078781381764322, "learning_rate": 1.984933162260206e-05, "loss": 0.4259, "step": 1457 }, { "epoch": 0.08, "grad_norm": 0.5111068434766219, "learning_rate": 1.9849009634326275e-05, "loss": 0.3663, "step": 1458 }, { "epoch": 0.08, "grad_norm": 0.491679245588098, "learning_rate": 1.984868730497971e-05, "loss": 0.3022, "step": 1459 }, { "epoch": 0.08, "grad_norm": 0.6657778643771437, "learning_rate": 1.9848364634573533e-05, "loss": 0.461, "step": 1460 }, { "epoch": 0.08, "grad_norm": 0.4499622319663323, "learning_rate": 1.984804162311891e-05, "loss": 0.1674, "step": 1461 }, { "epoch": 0.08, "grad_norm": 0.43775680064572114, "learning_rate": 1.9847718270627022e-05, "loss": 0.2935, "step": 1462 }, { "epoch": 0.08, "grad_norm": 0.9724835720183442, "learning_rate": 1.9847394577109083e-05, "loss": 0.6095, "step": 1463 }, { "epoch": 0.08, "grad_norm": 0.6832214021917391, "learning_rate": 1.984707054257629e-05, "loss": 0.4568, "step": 1464 }, { "epoch": 0.08, "grad_norm": 0.553754646433227, "learning_rate": 1.9846746167039864e-05, "loss": 0.2975, "step": 1465 }, { "epoch": 0.08, "grad_norm": 0.4793911200155322, "learning_rate": 1.9846421450511045e-05, "loss": 0.3857, "step": 1466 }, { "epoch": 0.08, "grad_norm": 0.553399104439865, "learning_rate": 1.9846096393001074e-05, "loss": 0.1008, "step": 1467 }, { "epoch": 0.08, "grad_norm": 0.4279760960702405, "learning_rate": 1.984577099452121e-05, "loss": 0.2942, "step": 1468 }, { "epoch": 0.08, "grad_norm": 1.0356861473365662, "learning_rate": 1.984544525508272e-05, "loss": 0.5771, "step": 1469 }, { "epoch": 0.08, "grad_norm": 0.4577243781894562, "learning_rate": 1.9845119174696882e-05, "loss": 0.4015, "step": 1470 }, { "epoch": 0.08, "grad_norm": 0.42424685772485127, "learning_rate": 1.984479275337499e-05, "loss": 0.2457, "step": 1471 }, { "epoch": 0.08, "grad_norm": 1.4301253843484638, "learning_rate": 1.984446599112835e-05, "loss": 0.838, "step": 1472 }, { "epoch": 0.08, "grad_norm": 0.3919599300947234, "learning_rate": 1.9844138887968273e-05, "loss": 0.304, "step": 1473 }, { "epoch": 0.08, "grad_norm": 0.3932132520222729, "learning_rate": 1.9843811443906093e-05, "loss": 0.1543, "step": 1474 }, { "epoch": 0.08, "grad_norm": 0.6901366164674826, "learning_rate": 1.9843483658953148e-05, "loss": 0.4529, "step": 1475 }, { "epoch": 0.08, "grad_norm": 0.7955472019255666, "learning_rate": 1.9843155533120782e-05, "loss": 0.5426, "step": 1476 }, { "epoch": 0.08, "grad_norm": 0.47244127096043176, "learning_rate": 1.9842827066420366e-05, "loss": 0.2129, "step": 1477 }, { "epoch": 0.08, "grad_norm": 0.4441680857479455, "learning_rate": 1.9842498258863274e-05, "loss": 0.362, "step": 1478 }, { "epoch": 0.08, "grad_norm": 0.5322606042879695, "learning_rate": 1.9842169110460885e-05, "loss": 0.295, "step": 1479 }, { "epoch": 0.09, "grad_norm": 0.404128094447175, "learning_rate": 1.9841839621224606e-05, "loss": 0.2315, "step": 1480 }, { "epoch": 0.09, "grad_norm": 0.5781529263016978, "learning_rate": 1.9841509791165847e-05, "loss": 0.3367, "step": 1481 }, { "epoch": 0.09, "grad_norm": 0.6389770846973686, "learning_rate": 1.9841179620296022e-05, "loss": 0.4023, "step": 1482 }, { "epoch": 0.09, "grad_norm": 0.5240438297258404, "learning_rate": 1.9840849108626574e-05, "loss": 0.3152, "step": 1483 }, { "epoch": 0.09, "grad_norm": 0.6238085399437978, "learning_rate": 1.984051825616894e-05, "loss": 0.34, "step": 1484 }, { "epoch": 0.09, "grad_norm": 0.5489870116923643, "learning_rate": 1.9840187062934583e-05, "loss": 0.3452, "step": 1485 }, { "epoch": 0.09, "grad_norm": 0.3663527210417489, "learning_rate": 1.9839855528934972e-05, "loss": 0.2432, "step": 1486 }, { "epoch": 0.09, "grad_norm": 0.6521194126080185, "learning_rate": 1.983952365418159e-05, "loss": 0.3821, "step": 1487 }, { "epoch": 0.09, "grad_norm": 0.48007821958494157, "learning_rate": 1.9839191438685922e-05, "loss": 0.3019, "step": 1488 }, { "epoch": 0.09, "grad_norm": 0.5161949916231795, "learning_rate": 1.9838858882459483e-05, "loss": 0.3056, "step": 1489 }, { "epoch": 0.09, "grad_norm": 0.5390200772732368, "learning_rate": 1.9838525985513783e-05, "loss": 0.2989, "step": 1490 }, { "epoch": 0.09, "grad_norm": 0.8864658034838391, "learning_rate": 1.9838192747860345e-05, "loss": 0.6144, "step": 1491 }, { "epoch": 0.09, "grad_norm": 0.5180367210317303, "learning_rate": 1.9837859169510723e-05, "loss": 0.4018, "step": 1492 }, { "epoch": 0.09, "grad_norm": 0.4120010395143323, "learning_rate": 1.9837525250476454e-05, "loss": 0.2938, "step": 1493 }, { "epoch": 0.09, "grad_norm": 0.40632841819630106, "learning_rate": 1.9837190990769115e-05, "loss": 0.2472, "step": 1494 }, { "epoch": 0.09, "grad_norm": 0.7107951744200354, "learning_rate": 1.9836856390400273e-05, "loss": 0.3808, "step": 1495 }, { "epoch": 0.09, "grad_norm": 0.5256133147742968, "learning_rate": 1.9836521449381515e-05, "loss": 0.3597, "step": 1496 }, { "epoch": 0.09, "grad_norm": 0.43642958130027093, "learning_rate": 1.9836186167724443e-05, "loss": 0.3331, "step": 1497 }, { "epoch": 0.09, "grad_norm": 0.6079517801611382, "learning_rate": 1.983585054544067e-05, "loss": 0.4103, "step": 1498 }, { "epoch": 0.09, "grad_norm": 0.46354758872951146, "learning_rate": 1.9835514582541812e-05, "loss": 0.3661, "step": 1499 }, { "epoch": 0.09, "grad_norm": 0.2972644570786185, "learning_rate": 1.983517827903951e-05, "loss": 0.1053, "step": 1500 }, { "epoch": 0.09, "grad_norm": 0.3795868350126114, "learning_rate": 1.9834841634945402e-05, "loss": 0.2982, "step": 1501 }, { "epoch": 0.09, "grad_norm": 0.5592247863268277, "learning_rate": 1.9834504650271157e-05, "loss": 0.3494, "step": 1502 }, { "epoch": 0.09, "grad_norm": 0.6832464565355145, "learning_rate": 1.9834167325028436e-05, "loss": 0.4326, "step": 1503 }, { "epoch": 0.09, "grad_norm": 0.45112271937913245, "learning_rate": 1.9833829659228923e-05, "loss": 0.3454, "step": 1504 }, { "epoch": 0.09, "grad_norm": 0.6484508195836393, "learning_rate": 1.983349165288431e-05, "loss": 0.3971, "step": 1505 }, { "epoch": 0.09, "grad_norm": 0.3254094644109379, "learning_rate": 1.983315330600631e-05, "loss": 0.2295, "step": 1506 }, { "epoch": 0.09, "grad_norm": 0.45949992766591424, "learning_rate": 1.983281461860663e-05, "loss": 0.2378, "step": 1507 }, { "epoch": 0.09, "grad_norm": 0.6729297248346193, "learning_rate": 1.9832475590697e-05, "loss": 0.4185, "step": 1508 }, { "epoch": 0.09, "grad_norm": 0.5529243330352177, "learning_rate": 1.9832136222289168e-05, "loss": 0.3886, "step": 1509 }, { "epoch": 0.09, "grad_norm": 0.6263171618684181, "learning_rate": 1.983179651339488e-05, "loss": 0.3395, "step": 1510 }, { "epoch": 0.09, "grad_norm": 0.4995660700715559, "learning_rate": 1.9831456464025897e-05, "loss": 0.3677, "step": 1511 }, { "epoch": 0.09, "grad_norm": 0.32612760199585944, "learning_rate": 1.9831116074194006e-05, "loss": 0.1966, "step": 1512 }, { "epoch": 0.09, "grad_norm": 0.507043580890754, "learning_rate": 1.9830775343910984e-05, "loss": 0.2792, "step": 1513 }, { "epoch": 0.09, "grad_norm": 0.5548136660748703, "learning_rate": 1.9830434273188636e-05, "loss": 0.3315, "step": 1514 }, { "epoch": 0.09, "grad_norm": 1.1903386244763454, "learning_rate": 1.9830092862038773e-05, "loss": 0.5327, "step": 1515 }, { "epoch": 0.09, "grad_norm": 0.35362906803195593, "learning_rate": 1.9829751110473215e-05, "loss": 0.0803, "step": 1516 }, { "epoch": 0.09, "grad_norm": 0.38747733634272685, "learning_rate": 1.98294090185038e-05, "loss": 0.3157, "step": 1517 }, { "epoch": 0.09, "grad_norm": 0.3734505271613158, "learning_rate": 1.9829066586142375e-05, "loss": 0.2002, "step": 1518 }, { "epoch": 0.09, "grad_norm": 0.9309793973511313, "learning_rate": 1.982872381340079e-05, "loss": 0.5253, "step": 1519 }, { "epoch": 0.09, "grad_norm": 0.8832068734492745, "learning_rate": 1.982838070029093e-05, "loss": 0.3373, "step": 1520 }, { "epoch": 0.09, "grad_norm": 0.5060812038856458, "learning_rate": 1.9828037246824664e-05, "loss": 0.3777, "step": 1521 }, { "epoch": 0.09, "grad_norm": 0.7545887148384214, "learning_rate": 1.9827693453013892e-05, "loss": 0.5325, "step": 1522 }, { "epoch": 0.09, "grad_norm": 0.3855633161515275, "learning_rate": 1.982734931887052e-05, "loss": 0.2315, "step": 1523 }, { "epoch": 0.09, "grad_norm": 0.3295751571087598, "learning_rate": 1.982700484440646e-05, "loss": 0.2219, "step": 1524 }, { "epoch": 0.09, "grad_norm": 0.4242518325493269, "learning_rate": 1.982666002963365e-05, "loss": 0.3222, "step": 1525 }, { "epoch": 0.09, "grad_norm": 0.4857458074762703, "learning_rate": 1.982631487456402e-05, "loss": 0.3091, "step": 1526 }, { "epoch": 0.09, "grad_norm": 0.8730410681906142, "learning_rate": 1.9825969379209533e-05, "loss": 0.5321, "step": 1527 }, { "epoch": 0.09, "grad_norm": 0.8632641640956932, "learning_rate": 1.9825623543582145e-05, "loss": 0.4488, "step": 1528 }, { "epoch": 0.09, "grad_norm": 0.38414737870140997, "learning_rate": 1.982527736769384e-05, "loss": 0.2726, "step": 1529 }, { "epoch": 0.09, "grad_norm": 0.4745151571052424, "learning_rate": 1.9824930851556604e-05, "loss": 0.3034, "step": 1530 }, { "epoch": 0.09, "grad_norm": 0.9141713621897183, "learning_rate": 1.982458399518243e-05, "loss": 0.5761, "step": 1531 }, { "epoch": 0.09, "grad_norm": 0.4592062472149273, "learning_rate": 1.9824236798583338e-05, "loss": 0.3236, "step": 1532 }, { "epoch": 0.09, "grad_norm": 0.32912152990907256, "learning_rate": 1.9823889261771346e-05, "loss": 0.2727, "step": 1533 }, { "epoch": 0.09, "grad_norm": 0.9264614208288615, "learning_rate": 1.9823541384758492e-05, "loss": 0.4646, "step": 1534 }, { "epoch": 0.09, "grad_norm": 0.43380933167627445, "learning_rate": 1.982319316755682e-05, "loss": 0.3267, "step": 1535 }, { "epoch": 0.09, "grad_norm": 0.3434369284109887, "learning_rate": 1.9822844610178394e-05, "loss": 0.1541, "step": 1536 }, { "epoch": 0.09, "grad_norm": 0.42805356727710603, "learning_rate": 1.982249571263528e-05, "loss": 0.35, "step": 1537 }, { "epoch": 0.09, "grad_norm": 0.46768921780410705, "learning_rate": 1.9822146474939563e-05, "loss": 0.3597, "step": 1538 }, { "epoch": 0.09, "grad_norm": 0.3205738141307784, "learning_rate": 1.9821796897103334e-05, "loss": 0.171, "step": 1539 }, { "epoch": 0.09, "grad_norm": 0.4918143379538728, "learning_rate": 1.98214469791387e-05, "loss": 0.3352, "step": 1540 }, { "epoch": 0.09, "grad_norm": 0.38168005214386574, "learning_rate": 1.9821096721057787e-05, "loss": 0.3347, "step": 1541 }, { "epoch": 0.09, "grad_norm": 0.9996924570187796, "learning_rate": 1.982074612287271e-05, "loss": 0.4523, "step": 1542 }, { "epoch": 0.09, "grad_norm": 0.532892984371382, "learning_rate": 1.982039518459562e-05, "loss": 0.4597, "step": 1543 }, { "epoch": 0.09, "grad_norm": 0.4556488663844575, "learning_rate": 1.9820043906238667e-05, "loss": 0.3112, "step": 1544 }, { "epoch": 0.09, "grad_norm": 0.5333818960745033, "learning_rate": 1.9819692287814014e-05, "loss": 0.3715, "step": 1545 }, { "epoch": 0.09, "grad_norm": 0.3205794074373295, "learning_rate": 1.981934032933384e-05, "loss": 0.1031, "step": 1546 }, { "epoch": 0.09, "grad_norm": 0.5161864800464732, "learning_rate": 1.981898803081033e-05, "loss": 0.3052, "step": 1547 }, { "epoch": 0.09, "grad_norm": 0.7795833825785495, "learning_rate": 1.981863539225569e-05, "loss": 0.5345, "step": 1548 }, { "epoch": 0.09, "grad_norm": 0.4615357704091355, "learning_rate": 1.9818282413682127e-05, "loss": 0.3508, "step": 1549 }, { "epoch": 0.09, "grad_norm": 0.3946497104947286, "learning_rate": 1.981792909510187e-05, "loss": 0.2904, "step": 1550 }, { "epoch": 0.09, "grad_norm": 0.3454321623744058, "learning_rate": 1.9817575436527147e-05, "loss": 0.2079, "step": 1551 }, { "epoch": 0.09, "grad_norm": 0.4495745099321259, "learning_rate": 1.981722143797021e-05, "loss": 0.2907, "step": 1552 }, { "epoch": 0.09, "grad_norm": 0.45876952116799197, "learning_rate": 1.9816867099443314e-05, "loss": 0.3153, "step": 1553 }, { "epoch": 0.09, "grad_norm": 1.1485942385955623, "learning_rate": 1.9816512420958734e-05, "loss": 0.5895, "step": 1554 }, { "epoch": 0.09, "grad_norm": 0.9054974881219597, "learning_rate": 1.9816157402528753e-05, "loss": 0.5669, "step": 1555 }, { "epoch": 0.09, "grad_norm": 0.4243340339417857, "learning_rate": 1.9815802044165663e-05, "loss": 0.2367, "step": 1556 }, { "epoch": 0.09, "grad_norm": 0.5642073908469096, "learning_rate": 1.981544634588177e-05, "loss": 0.3691, "step": 1557 }, { "epoch": 0.09, "grad_norm": 0.46111573851136795, "learning_rate": 1.9815090307689392e-05, "loss": 0.2229, "step": 1558 }, { "epoch": 0.09, "grad_norm": 0.42590791206403383, "learning_rate": 1.9814733929600857e-05, "loss": 0.2272, "step": 1559 }, { "epoch": 0.09, "grad_norm": 1.9421334937751409, "learning_rate": 1.981437721162851e-05, "loss": 0.5379, "step": 1560 }, { "epoch": 0.09, "grad_norm": 0.561819349052626, "learning_rate": 1.98140201537847e-05, "loss": 0.3477, "step": 1561 }, { "epoch": 0.09, "grad_norm": 0.4240617607652223, "learning_rate": 1.9813662756081794e-05, "loss": 0.2214, "step": 1562 }, { "epoch": 0.09, "grad_norm": 1.0596622839127672, "learning_rate": 1.9813305018532172e-05, "loss": 0.7146, "step": 1563 }, { "epoch": 0.09, "grad_norm": 0.3903654621157037, "learning_rate": 1.981294694114822e-05, "loss": 0.2457, "step": 1564 }, { "epoch": 0.09, "grad_norm": 0.4339515383696826, "learning_rate": 1.9812588523942334e-05, "loss": 0.2166, "step": 1565 }, { "epoch": 0.09, "grad_norm": 1.650174074607824, "learning_rate": 1.981222976692693e-05, "loss": 0.5498, "step": 1566 }, { "epoch": 0.09, "grad_norm": 1.2077256521150712, "learning_rate": 1.981187067011443e-05, "loss": 0.6577, "step": 1567 }, { "epoch": 0.09, "grad_norm": 0.6294600660337998, "learning_rate": 1.9811511233517275e-05, "loss": 0.313, "step": 1568 }, { "epoch": 0.09, "grad_norm": 0.560575934466919, "learning_rate": 1.9811151457147904e-05, "loss": 0.29, "step": 1569 }, { "epoch": 0.09, "grad_norm": 0.3676919839173703, "learning_rate": 1.981079134101878e-05, "loss": 0.214, "step": 1570 }, { "epoch": 0.09, "grad_norm": 0.5136664835618114, "learning_rate": 1.9810430885142377e-05, "loss": 0.312, "step": 1571 }, { "epoch": 0.09, "grad_norm": 1.4254358022411515, "learning_rate": 1.981007008953117e-05, "loss": 0.4594, "step": 1572 }, { "epoch": 0.09, "grad_norm": 0.7586444608485484, "learning_rate": 1.9809708954197658e-05, "loss": 0.3544, "step": 1573 }, { "epoch": 0.09, "grad_norm": 0.5827592667394669, "learning_rate": 1.980934747915435e-05, "loss": 0.2916, "step": 1574 }, { "epoch": 0.09, "grad_norm": 1.6997799572006387, "learning_rate": 1.9808985664413757e-05, "loss": 0.4934, "step": 1575 }, { "epoch": 0.09, "grad_norm": 0.34869623686568857, "learning_rate": 1.9808623509988415e-05, "loss": 0.2492, "step": 1576 }, { "epoch": 0.09, "grad_norm": 0.7051869049693643, "learning_rate": 1.980826101589086e-05, "loss": 0.3112, "step": 1577 }, { "epoch": 0.09, "grad_norm": 2.375367808092143, "learning_rate": 1.980789818213365e-05, "loss": 0.4905, "step": 1578 }, { "epoch": 0.09, "grad_norm": 1.6868012547392646, "learning_rate": 1.9807535008729347e-05, "loss": 0.8162, "step": 1579 }, { "epoch": 0.09, "grad_norm": 0.5252690640932575, "learning_rate": 1.980717149569053e-05, "loss": 0.3133, "step": 1580 }, { "epoch": 0.09, "grad_norm": 0.467302492898827, "learning_rate": 1.9806807643029786e-05, "loss": 0.3452, "step": 1581 }, { "epoch": 0.09, "grad_norm": 1.0265829484598235, "learning_rate": 1.9806443450759715e-05, "loss": 0.3143, "step": 1582 }, { "epoch": 0.09, "grad_norm": 0.5886112657839901, "learning_rate": 1.9806078918892925e-05, "loss": 0.34, "step": 1583 }, { "epoch": 0.09, "grad_norm": 0.5920413529763409, "learning_rate": 1.9805714047442045e-05, "loss": 0.3122, "step": 1584 }, { "epoch": 0.09, "grad_norm": 0.6764117396792537, "learning_rate": 1.9805348836419712e-05, "loss": 0.3036, "step": 1585 }, { "epoch": 0.09, "grad_norm": 0.5594852265265499, "learning_rate": 1.9804983285838567e-05, "loss": 0.3569, "step": 1586 }, { "epoch": 0.09, "grad_norm": 0.6844695489408091, "learning_rate": 1.9804617395711275e-05, "loss": 0.4128, "step": 1587 }, { "epoch": 0.09, "grad_norm": 0.6307190369431338, "learning_rate": 1.9804251166050505e-05, "loss": 0.3458, "step": 1588 }, { "epoch": 0.09, "grad_norm": 0.41952195036620993, "learning_rate": 1.9803884596868937e-05, "loss": 0.3168, "step": 1589 }, { "epoch": 0.09, "grad_norm": 0.3453562469345302, "learning_rate": 1.9803517688179264e-05, "loss": 0.2204, "step": 1590 }, { "epoch": 0.09, "grad_norm": 1.0262728387774869, "learning_rate": 1.9803150439994202e-05, "loss": 0.5397, "step": 1591 }, { "epoch": 0.09, "grad_norm": 0.5369925676511108, "learning_rate": 1.9802782852326456e-05, "loss": 0.3018, "step": 1592 }, { "epoch": 0.09, "grad_norm": 0.7179121095737457, "learning_rate": 1.9802414925188766e-05, "loss": 0.3874, "step": 1593 }, { "epoch": 0.09, "grad_norm": 0.7147170188690286, "learning_rate": 1.9802046658593867e-05, "loss": 0.5446, "step": 1594 }, { "epoch": 0.09, "grad_norm": 0.4854192777883944, "learning_rate": 1.9801678052554512e-05, "loss": 0.2607, "step": 1595 }, { "epoch": 0.09, "grad_norm": 0.3895561134200103, "learning_rate": 1.9801309107083465e-05, "loss": 0.2836, "step": 1596 }, { "epoch": 0.09, "grad_norm": 0.3859364875104156, "learning_rate": 1.9800939822193512e-05, "loss": 0.2896, "step": 1597 }, { "epoch": 0.09, "grad_norm": 0.4179295750829518, "learning_rate": 1.980057019789743e-05, "loss": 0.2141, "step": 1598 }, { "epoch": 0.09, "grad_norm": 0.5931917930493955, "learning_rate": 1.9800200234208022e-05, "loss": 0.3975, "step": 1599 }, { "epoch": 0.09, "grad_norm": 0.5242128316638529, "learning_rate": 1.9799829931138107e-05, "loss": 0.4053, "step": 1600 }, { "epoch": 0.09, "grad_norm": 0.4126511092046684, "learning_rate": 1.9799459288700498e-05, "loss": 0.2093, "step": 1601 }, { "epoch": 0.09, "grad_norm": 0.3701533373340219, "learning_rate": 1.9799088306908035e-05, "loss": 0.2427, "step": 1602 }, { "epoch": 0.09, "grad_norm": 1.1308744292919486, "learning_rate": 1.9798716985773567e-05, "loss": 0.6998, "step": 1603 }, { "epoch": 0.09, "grad_norm": 0.3758134428810142, "learning_rate": 1.9798345325309952e-05, "loss": 0.2527, "step": 1604 }, { "epoch": 0.09, "grad_norm": 0.5785694558525246, "learning_rate": 1.9797973325530058e-05, "loss": 0.4437, "step": 1605 }, { "epoch": 0.09, "grad_norm": 0.7455283306651675, "learning_rate": 1.9797600986446773e-05, "loss": 0.5741, "step": 1606 }, { "epoch": 0.09, "grad_norm": 0.49548617080975527, "learning_rate": 1.9797228308072982e-05, "loss": 0.3097, "step": 1607 }, { "epoch": 0.09, "grad_norm": 0.399945434741605, "learning_rate": 1.97968552904216e-05, "loss": 0.2833, "step": 1608 }, { "epoch": 0.09, "grad_norm": 0.3260983798124931, "learning_rate": 1.9796481933505535e-05, "loss": 0.1926, "step": 1609 }, { "epoch": 0.09, "grad_norm": 0.5046829217920206, "learning_rate": 1.9796108237337724e-05, "loss": 0.3538, "step": 1610 }, { "epoch": 0.09, "grad_norm": 0.588275165004359, "learning_rate": 1.979573420193111e-05, "loss": 0.3805, "step": 1611 }, { "epoch": 0.09, "grad_norm": 0.45169272979663194, "learning_rate": 1.9795359827298643e-05, "loss": 0.3902, "step": 1612 }, { "epoch": 0.09, "grad_norm": 0.45282344751971243, "learning_rate": 1.979498511345328e-05, "loss": 0.3399, "step": 1613 }, { "epoch": 0.09, "grad_norm": 0.3358407517147635, "learning_rate": 1.9794610060408007e-05, "loss": 0.1913, "step": 1614 }, { "epoch": 0.09, "grad_norm": 0.468574580020193, "learning_rate": 1.979423466817581e-05, "loss": 0.3122, "step": 1615 }, { "epoch": 0.09, "grad_norm": 0.49239486490812656, "learning_rate": 1.9793858936769683e-05, "loss": 0.3336, "step": 1616 }, { "epoch": 0.09, "grad_norm": 0.4719865931595738, "learning_rate": 1.9793482866202645e-05, "loss": 0.3269, "step": 1617 }, { "epoch": 0.09, "grad_norm": 0.8462916216082507, "learning_rate": 1.9793106456487717e-05, "loss": 0.5964, "step": 1618 }, { "epoch": 0.09, "grad_norm": 0.7204368957434376, "learning_rate": 1.9792729707637935e-05, "loss": 0.3242, "step": 1619 }, { "epoch": 0.09, "grad_norm": 0.36900163439375716, "learning_rate": 1.979235261966634e-05, "loss": 0.323, "step": 1620 }, { "epoch": 0.09, "grad_norm": 0.3250448460505923, "learning_rate": 1.9791975192586e-05, "loss": 0.1878, "step": 1621 }, { "epoch": 0.09, "grad_norm": 0.7114004359000305, "learning_rate": 1.9791597426409973e-05, "loss": 0.4203, "step": 1622 }, { "epoch": 0.09, "grad_norm": 0.48779469367220873, "learning_rate": 1.9791219321151356e-05, "loss": 0.4049, "step": 1623 }, { "epoch": 0.09, "grad_norm": 0.33688741860596855, "learning_rate": 1.979084087682323e-05, "loss": 0.2401, "step": 1624 }, { "epoch": 0.09, "grad_norm": 0.6457978863500597, "learning_rate": 1.9790462093438707e-05, "loss": 0.3773, "step": 1625 }, { "epoch": 0.09, "grad_norm": 0.43997114105386825, "learning_rate": 1.9790082971010903e-05, "loss": 0.3352, "step": 1626 }, { "epoch": 0.09, "grad_norm": 0.8578347622033227, "learning_rate": 1.9789703509552947e-05, "loss": 0.4459, "step": 1627 }, { "epoch": 0.09, "grad_norm": 0.3320817142902381, "learning_rate": 1.978932370907798e-05, "loss": 0.3051, "step": 1628 }, { "epoch": 0.09, "grad_norm": 0.46309576333499364, "learning_rate": 1.978894356959915e-05, "loss": 0.4221, "step": 1629 }, { "epoch": 0.09, "grad_norm": 0.23451118651819808, "learning_rate": 1.978856309112963e-05, "loss": 0.1448, "step": 1630 }, { "epoch": 0.09, "grad_norm": 0.4679183307690478, "learning_rate": 1.978818227368259e-05, "loss": 0.2926, "step": 1631 }, { "epoch": 0.09, "grad_norm": 0.4429604060875303, "learning_rate": 1.9787801117271213e-05, "loss": 0.3402, "step": 1632 }, { "epoch": 0.09, "grad_norm": 0.8594224955091179, "learning_rate": 1.978741962190871e-05, "loss": 0.6281, "step": 1633 }, { "epoch": 0.09, "grad_norm": 0.6065069575331038, "learning_rate": 1.9787037787608287e-05, "loss": 0.3928, "step": 1634 }, { "epoch": 0.09, "grad_norm": 0.42824444304880876, "learning_rate": 1.9786655614383163e-05, "loss": 0.3295, "step": 1635 }, { "epoch": 0.09, "grad_norm": 0.30050373194288177, "learning_rate": 1.978627310224658e-05, "loss": 0.2354, "step": 1636 }, { "epoch": 0.09, "grad_norm": 0.5963019421892746, "learning_rate": 1.9785890251211777e-05, "loss": 0.1868, "step": 1637 }, { "epoch": 0.09, "grad_norm": 0.400518818725669, "learning_rate": 1.9785507061292017e-05, "loss": 0.2862, "step": 1638 }, { "epoch": 0.09, "grad_norm": 0.9336374686602577, "learning_rate": 1.978512353250057e-05, "loss": 0.5577, "step": 1639 }, { "epoch": 0.09, "grad_norm": 0.35757051249823835, "learning_rate": 1.978473966485071e-05, "loss": 0.2641, "step": 1640 }, { "epoch": 0.09, "grad_norm": 0.4439429484546262, "learning_rate": 1.978435545835574e-05, "loss": 0.3602, "step": 1641 }, { "epoch": 0.09, "grad_norm": 0.3921207587665929, "learning_rate": 1.978397091302896e-05, "loss": 0.1743, "step": 1642 }, { "epoch": 0.09, "grad_norm": 0.7618586521311463, "learning_rate": 1.9783586028883688e-05, "loss": 0.4513, "step": 1643 }, { "epoch": 0.09, "grad_norm": 0.3751913533863891, "learning_rate": 1.9783200805933252e-05, "loss": 0.2761, "step": 1644 }, { "epoch": 0.09, "grad_norm": 0.7805939473702508, "learning_rate": 1.9782815244190997e-05, "loss": 0.5491, "step": 1645 }, { "epoch": 0.09, "grad_norm": 0.6195714209412733, "learning_rate": 1.9782429343670267e-05, "loss": 0.5169, "step": 1646 }, { "epoch": 0.09, "grad_norm": 0.4918147444122574, "learning_rate": 1.978204310438443e-05, "loss": 0.2578, "step": 1647 }, { "epoch": 0.09, "grad_norm": 0.3435562889326771, "learning_rate": 1.9781656526346863e-05, "loss": 0.2275, "step": 1648 }, { "epoch": 0.09, "grad_norm": 0.7237826939442035, "learning_rate": 1.9781269609570945e-05, "loss": 0.3185, "step": 1649 }, { "epoch": 0.09, "grad_norm": 0.5178707759432076, "learning_rate": 1.978088235407009e-05, "loss": 0.2934, "step": 1650 }, { "epoch": 0.09, "grad_norm": 0.7923454047135651, "learning_rate": 1.978049475985769e-05, "loss": 0.4562, "step": 1651 }, { "epoch": 0.09, "grad_norm": 0.5428509403531181, "learning_rate": 1.9780106826947184e-05, "loss": 0.3805, "step": 1652 }, { "epoch": 0.09, "grad_norm": 0.45702446485133047, "learning_rate": 1.9779718555351997e-05, "loss": 0.2591, "step": 1653 }, { "epoch": 0.1, "grad_norm": 0.36296431359883, "learning_rate": 1.9779329945085578e-05, "loss": 0.2195, "step": 1654 }, { "epoch": 0.1, "grad_norm": 0.6704714680728022, "learning_rate": 1.9778940996161382e-05, "loss": 0.4751, "step": 1655 }, { "epoch": 0.1, "grad_norm": 0.43102537372482086, "learning_rate": 1.9778551708592883e-05, "loss": 0.2816, "step": 1656 }, { "epoch": 0.1, "grad_norm": 0.7987627103272095, "learning_rate": 1.9778162082393554e-05, "loss": 0.4378, "step": 1657 }, { "epoch": 0.1, "grad_norm": 1.4586157829032407, "learning_rate": 1.9777772117576893e-05, "loss": 0.8086, "step": 1658 }, { "epoch": 0.1, "grad_norm": 0.5097364905331209, "learning_rate": 1.977738181415641e-05, "loss": 0.2658, "step": 1659 }, { "epoch": 0.1, "grad_norm": 0.5945672810230304, "learning_rate": 1.977699117214561e-05, "loss": 0.2941, "step": 1660 }, { "epoch": 0.1, "grad_norm": 0.3399558203328304, "learning_rate": 1.9776600191558025e-05, "loss": 0.2018, "step": 1661 }, { "epoch": 0.1, "grad_norm": 0.6445674144621936, "learning_rate": 1.9776208872407202e-05, "loss": 0.3421, "step": 1662 }, { "epoch": 0.1, "grad_norm": 0.7888347334580998, "learning_rate": 1.9775817214706682e-05, "loss": 0.3533, "step": 1663 }, { "epoch": 0.1, "grad_norm": 0.7188794356920819, "learning_rate": 1.977542521847003e-05, "loss": 0.3961, "step": 1664 }, { "epoch": 0.1, "grad_norm": 0.5233002822851152, "learning_rate": 1.9775032883710826e-05, "loss": 0.3212, "step": 1665 }, { "epoch": 0.1, "grad_norm": 0.22069561834964382, "learning_rate": 1.9774640210442654e-05, "loss": 0.1167, "step": 1666 }, { "epoch": 0.1, "grad_norm": 0.4538299309152852, "learning_rate": 1.977424719867911e-05, "loss": 0.3369, "step": 1667 }, { "epoch": 0.1, "grad_norm": 0.5837962589537384, "learning_rate": 1.9773853848433806e-05, "loss": 0.3332, "step": 1668 }, { "epoch": 0.1, "grad_norm": 1.2822390523629652, "learning_rate": 1.9773460159720365e-05, "loss": 0.499, "step": 1669 }, { "epoch": 0.1, "grad_norm": 0.6098981703977231, "learning_rate": 1.977306613255242e-05, "loss": 0.3464, "step": 1670 }, { "epoch": 0.1, "grad_norm": 0.5010711098035411, "learning_rate": 1.977267176694361e-05, "loss": 0.3058, "step": 1671 }, { "epoch": 0.1, "grad_norm": 0.4940495865152775, "learning_rate": 1.97722770629076e-05, "loss": 0.3481, "step": 1672 }, { "epoch": 0.1, "grad_norm": 0.6839400527952039, "learning_rate": 1.9771882020458055e-05, "loss": 0.3003, "step": 1673 }, { "epoch": 0.1, "grad_norm": 0.4901359588398224, "learning_rate": 1.9771486639608657e-05, "loss": 0.3097, "step": 1674 }, { "epoch": 0.1, "grad_norm": 0.38335500215663265, "learning_rate": 1.9771090920373096e-05, "loss": 0.2784, "step": 1675 }, { "epoch": 0.1, "grad_norm": 1.0457668872874353, "learning_rate": 1.9770694862765077e-05, "loss": 0.6059, "step": 1676 }, { "epoch": 0.1, "grad_norm": 0.4529475843166634, "learning_rate": 1.9770298466798316e-05, "loss": 0.2996, "step": 1677 }, { "epoch": 0.1, "grad_norm": 0.9647389731875299, "learning_rate": 1.9769901732486542e-05, "loss": 0.545, "step": 1678 }, { "epoch": 0.1, "grad_norm": 0.4138833322271508, "learning_rate": 1.9769504659843486e-05, "loss": 0.3303, "step": 1679 }, { "epoch": 0.1, "grad_norm": 0.37714688186515205, "learning_rate": 1.9769107248882904e-05, "loss": 0.2794, "step": 1680 }, { "epoch": 0.1, "grad_norm": 0.539646531653099, "learning_rate": 1.976870949961856e-05, "loss": 0.2455, "step": 1681 }, { "epoch": 0.1, "grad_norm": 1.9706385236557056, "learning_rate": 1.9768311412064224e-05, "loss": 0.8027, "step": 1682 }, { "epoch": 0.1, "grad_norm": 0.4506680655535206, "learning_rate": 1.9767912986233685e-05, "loss": 0.2213, "step": 1683 }, { "epoch": 0.1, "grad_norm": 0.4818504309643764, "learning_rate": 1.976751422214074e-05, "loss": 0.3729, "step": 1684 }, { "epoch": 0.1, "grad_norm": 0.6867809896350154, "learning_rate": 1.9767115119799197e-05, "loss": 0.5068, "step": 1685 }, { "epoch": 0.1, "grad_norm": 0.4081668174851612, "learning_rate": 1.9766715679222875e-05, "loss": 0.2312, "step": 1686 }, { "epoch": 0.1, "grad_norm": 0.3395615177013707, "learning_rate": 1.9766315900425613e-05, "loss": 0.2423, "step": 1687 }, { "epoch": 0.1, "grad_norm": 1.7966060680798426, "learning_rate": 1.976591578342125e-05, "loss": 0.8855, "step": 1688 }, { "epoch": 0.1, "grad_norm": 0.380188460807513, "learning_rate": 1.9765515328223644e-05, "loss": 0.2262, "step": 1689 }, { "epoch": 0.1, "grad_norm": 0.7703079045710871, "learning_rate": 1.9765114534846662e-05, "loss": 0.5256, "step": 1690 }, { "epoch": 0.1, "grad_norm": 0.5143745240081559, "learning_rate": 1.9764713403304183e-05, "loss": 0.3954, "step": 1691 }, { "epoch": 0.1, "grad_norm": 0.46836814781278047, "learning_rate": 1.97643119336101e-05, "loss": 0.2407, "step": 1692 }, { "epoch": 0.1, "grad_norm": 0.3690528313572445, "learning_rate": 1.976391012577831e-05, "loss": 0.1872, "step": 1693 }, { "epoch": 0.1, "grad_norm": 1.2780455908200286, "learning_rate": 1.9763507979822737e-05, "loss": 0.7456, "step": 1694 }, { "epoch": 0.1, "grad_norm": 0.4214444867559315, "learning_rate": 1.97631054957573e-05, "loss": 0.2931, "step": 1695 }, { "epoch": 0.1, "grad_norm": 0.5189345658634478, "learning_rate": 1.9762702673595943e-05, "loss": 0.3177, "step": 1696 }, { "epoch": 0.1, "grad_norm": 0.880757909211329, "learning_rate": 1.9762299513352604e-05, "loss": 0.5314, "step": 1697 }, { "epoch": 0.1, "grad_norm": 0.4845554317665074, "learning_rate": 1.976189601504126e-05, "loss": 0.2937, "step": 1698 }, { "epoch": 0.1, "grad_norm": 0.30040774511408, "learning_rate": 1.9761492178675876e-05, "loss": 0.1939, "step": 1699 }, { "epoch": 0.1, "grad_norm": 1.1642293277701257, "learning_rate": 1.9761088004270435e-05, "loss": 0.7888, "step": 1700 }, { "epoch": 0.1, "grad_norm": 0.43107411175540156, "learning_rate": 1.976068349183894e-05, "loss": 0.2908, "step": 1701 }, { "epoch": 0.1, "grad_norm": 0.7767979561549465, "learning_rate": 1.9760278641395395e-05, "loss": 0.447, "step": 1702 }, { "epoch": 0.1, "grad_norm": 0.562402325785457, "learning_rate": 1.9759873452953816e-05, "loss": 0.3703, "step": 1703 }, { "epoch": 0.1, "grad_norm": 0.4995224833626151, "learning_rate": 1.9759467926528242e-05, "loss": 0.3042, "step": 1704 }, { "epoch": 0.1, "grad_norm": 0.27285547576376873, "learning_rate": 1.975906206213271e-05, "loss": 0.1633, "step": 1705 }, { "epoch": 0.1, "grad_norm": 0.9079324937335543, "learning_rate": 1.9758655859781282e-05, "loss": 0.6932, "step": 1706 }, { "epoch": 0.1, "grad_norm": 0.40677426359666474, "learning_rate": 1.975824931948802e-05, "loss": 0.2871, "step": 1707 }, { "epoch": 0.1, "grad_norm": 0.45580786137028356, "learning_rate": 1.975784244126701e-05, "loss": 0.3893, "step": 1708 }, { "epoch": 0.1, "grad_norm": 0.9194512800758327, "learning_rate": 1.9757435225132325e-05, "loss": 0.3548, "step": 1709 }, { "epoch": 0.1, "grad_norm": 0.44115743053683315, "learning_rate": 1.9757027671098083e-05, "loss": 0.3031, "step": 1710 }, { "epoch": 0.1, "grad_norm": 0.2825713405110046, "learning_rate": 1.9756619779178393e-05, "loss": 0.2427, "step": 1711 }, { "epoch": 0.1, "grad_norm": 1.1849133538579537, "learning_rate": 1.9756211549387378e-05, "loss": 0.4944, "step": 1712 }, { "epoch": 0.1, "grad_norm": 0.36690700948942095, "learning_rate": 1.975580298173918e-05, "loss": 0.3057, "step": 1713 }, { "epoch": 0.1, "grad_norm": 0.7245805282332856, "learning_rate": 1.975539407624794e-05, "loss": 0.5556, "step": 1714 }, { "epoch": 0.1, "grad_norm": 0.4612175857613643, "learning_rate": 1.9754984832927825e-05, "loss": 0.3085, "step": 1715 }, { "epoch": 0.1, "grad_norm": 0.44090499684018897, "learning_rate": 1.9754575251793006e-05, "loss": 0.3089, "step": 1716 }, { "epoch": 0.1, "grad_norm": 0.5288877897785201, "learning_rate": 1.975416533285766e-05, "loss": 0.3071, "step": 1717 }, { "epoch": 0.1, "grad_norm": 1.3106501640407626, "learning_rate": 1.9753755076135994e-05, "loss": 0.587, "step": 1718 }, { "epoch": 0.1, "grad_norm": 0.3528842608221892, "learning_rate": 1.9753344481642205e-05, "loss": 0.2396, "step": 1719 }, { "epoch": 0.1, "grad_norm": 0.6296848852224964, "learning_rate": 1.975293354939052e-05, "loss": 0.4465, "step": 1720 }, { "epoch": 0.1, "grad_norm": 0.5365509567706639, "learning_rate": 1.975252227939516e-05, "loss": 0.2626, "step": 1721 }, { "epoch": 0.1, "grad_norm": 0.3821923286062335, "learning_rate": 1.9752110671670375e-05, "loss": 0.1844, "step": 1722 }, { "epoch": 0.1, "grad_norm": 0.40328934838267844, "learning_rate": 1.975169872623042e-05, "loss": 0.3276, "step": 1723 }, { "epoch": 0.1, "grad_norm": 0.9932472840383428, "learning_rate": 1.9751286443089555e-05, "loss": 0.5172, "step": 1724 }, { "epoch": 0.1, "grad_norm": 0.39449755648213397, "learning_rate": 1.975087382226206e-05, "loss": 0.2579, "step": 1725 }, { "epoch": 0.1, "grad_norm": 0.34791684251849897, "learning_rate": 1.9750460863762225e-05, "loss": 0.2135, "step": 1726 }, { "epoch": 0.1, "grad_norm": 0.4824628574743985, "learning_rate": 1.9750047567604348e-05, "loss": 0.3734, "step": 1727 }, { "epoch": 0.1, "grad_norm": 0.45556148548025616, "learning_rate": 1.9749633933802743e-05, "loss": 0.22, "step": 1728 }, { "epoch": 0.1, "grad_norm": 0.6722527683545421, "learning_rate": 1.9749219962371736e-05, "loss": 0.4172, "step": 1729 }, { "epoch": 0.1, "grad_norm": 1.3213518952540688, "learning_rate": 1.9748805653325657e-05, "loss": 0.5264, "step": 1730 }, { "epoch": 0.1, "grad_norm": 0.43129412639939235, "learning_rate": 1.974839100667886e-05, "loss": 0.3006, "step": 1731 }, { "epoch": 0.1, "grad_norm": 0.5744145659725003, "learning_rate": 1.97479760224457e-05, "loss": 0.3189, "step": 1732 }, { "epoch": 0.1, "grad_norm": 0.28489126228750195, "learning_rate": 1.9747560700640552e-05, "loss": 0.1805, "step": 1733 }, { "epoch": 0.1, "grad_norm": 0.4691600450533372, "learning_rate": 1.97471450412778e-05, "loss": 0.2848, "step": 1734 }, { "epoch": 0.1, "grad_norm": 0.48444296804977377, "learning_rate": 1.9746729044371826e-05, "loss": 0.2677, "step": 1735 }, { "epoch": 0.1, "grad_norm": 1.6659386268209997, "learning_rate": 1.9746312709937047e-05, "loss": 0.5739, "step": 1736 }, { "epoch": 0.1, "grad_norm": 0.4936719526225396, "learning_rate": 1.974589603798788e-05, "loss": 0.3041, "step": 1737 }, { "epoch": 0.1, "grad_norm": 0.34597533311036405, "learning_rate": 1.974547902853875e-05, "loss": 0.2043, "step": 1738 }, { "epoch": 0.1, "grad_norm": 0.38018595031990254, "learning_rate": 1.9745061681604104e-05, "loss": 0.3154, "step": 1739 }, { "epoch": 0.1, "grad_norm": 0.8001416151657222, "learning_rate": 1.974464399719839e-05, "loss": 0.461, "step": 1740 }, { "epoch": 0.1, "grad_norm": 0.5315768234902671, "learning_rate": 1.974422597533607e-05, "loss": 0.3053, "step": 1741 }, { "epoch": 0.1, "grad_norm": 0.5469926220469847, "learning_rate": 1.9743807616031624e-05, "loss": 0.3805, "step": 1742 }, { "epoch": 0.1, "grad_norm": 0.7185184120264385, "learning_rate": 1.974338891929954e-05, "loss": 0.3472, "step": 1743 }, { "epoch": 0.1, "grad_norm": 0.5697328626613448, "learning_rate": 1.9742969885154318e-05, "loss": 0.3864, "step": 1744 }, { "epoch": 0.1, "grad_norm": 0.2519361023605971, "learning_rate": 1.974255051361047e-05, "loss": 0.076, "step": 1745 }, { "epoch": 0.1, "grad_norm": 0.5995345198599, "learning_rate": 1.974213080468251e-05, "loss": 0.3818, "step": 1746 }, { "epoch": 0.1, "grad_norm": 0.40605771267747187, "learning_rate": 1.9741710758384985e-05, "loss": 0.3281, "step": 1747 }, { "epoch": 0.1, "grad_norm": 0.852458852226233, "learning_rate": 1.9741290374732434e-05, "loss": 0.4804, "step": 1748 }, { "epoch": 0.1, "grad_norm": 0.6862694898087204, "learning_rate": 1.9740869653739413e-05, "loss": 0.4156, "step": 1749 }, { "epoch": 0.1, "grad_norm": 0.5487997218358596, "learning_rate": 1.97404485954205e-05, "loss": 0.3663, "step": 1750 }, { "epoch": 0.1, "grad_norm": 0.303352834458015, "learning_rate": 1.974002719979027e-05, "loss": 0.1821, "step": 1751 }, { "epoch": 0.1, "grad_norm": 0.5481110458066554, "learning_rate": 1.973960546686331e-05, "loss": 0.3175, "step": 1752 }, { "epoch": 0.1, "grad_norm": 0.6217773627623907, "learning_rate": 1.973918339665424e-05, "loss": 0.4053, "step": 1753 }, { "epoch": 0.1, "grad_norm": 0.4157060592887071, "learning_rate": 1.9738760989177665e-05, "loss": 0.3342, "step": 1754 }, { "epoch": 0.1, "grad_norm": 0.48771030560903766, "learning_rate": 1.9738338244448214e-05, "loss": 0.3222, "step": 1755 }, { "epoch": 0.1, "grad_norm": 0.5874946088172913, "learning_rate": 1.9737915162480527e-05, "loss": 0.4358, "step": 1756 }, { "epoch": 0.1, "grad_norm": 0.37947933594438027, "learning_rate": 1.973749174328926e-05, "loss": 0.2179, "step": 1757 }, { "epoch": 0.1, "grad_norm": 0.4789152218360017, "learning_rate": 1.9737067986889072e-05, "loss": 0.3133, "step": 1758 }, { "epoch": 0.1, "grad_norm": 0.45954057561314693, "learning_rate": 1.973664389329464e-05, "loss": 0.3215, "step": 1759 }, { "epoch": 0.1, "grad_norm": 1.0575391871829922, "learning_rate": 1.9736219462520645e-05, "loss": 0.6064, "step": 1760 }, { "epoch": 0.1, "grad_norm": 0.5316241369605608, "learning_rate": 1.973579469458179e-05, "loss": 0.2216, "step": 1761 }, { "epoch": 0.1, "grad_norm": 0.5200064785449022, "learning_rate": 1.9735369589492786e-05, "loss": 0.3547, "step": 1762 }, { "epoch": 0.1, "grad_norm": 0.3661703504092863, "learning_rate": 1.973494414726835e-05, "loss": 0.2464, "step": 1763 }, { "epoch": 0.1, "grad_norm": 0.6885337064692816, "learning_rate": 1.9734518367923216e-05, "loss": 0.3472, "step": 1764 }, { "epoch": 0.1, "grad_norm": 0.44627825575802316, "learning_rate": 1.973409225147213e-05, "loss": 0.3789, "step": 1765 }, { "epoch": 0.1, "grad_norm": 0.5653737927716992, "learning_rate": 1.973366579792985e-05, "loss": 0.3915, "step": 1766 }, { "epoch": 0.1, "grad_norm": 0.47244377328827575, "learning_rate": 1.9733239007311137e-05, "loss": 0.2816, "step": 1767 }, { "epoch": 0.1, "grad_norm": 0.5118989988502627, "learning_rate": 1.973281187963078e-05, "loss": 0.3149, "step": 1768 }, { "epoch": 0.1, "grad_norm": 0.8961397366295146, "learning_rate": 1.9732384414903562e-05, "loss": 0.5801, "step": 1769 }, { "epoch": 0.1, "grad_norm": 0.40915889685555634, "learning_rate": 1.9731956613144297e-05, "loss": 0.3385, "step": 1770 }, { "epoch": 0.1, "grad_norm": 0.36489091879616176, "learning_rate": 1.9731528474367787e-05, "loss": 0.1888, "step": 1771 }, { "epoch": 0.1, "grad_norm": 0.4653678958356583, "learning_rate": 1.9731099998588865e-05, "loss": 0.3244, "step": 1772 }, { "epoch": 0.1, "grad_norm": 0.7889078900037176, "learning_rate": 1.973067118582237e-05, "loss": 0.4141, "step": 1773 }, { "epoch": 0.1, "grad_norm": 0.48457358345367246, "learning_rate": 1.973024203608315e-05, "loss": 0.2676, "step": 1774 }, { "epoch": 0.1, "grad_norm": 0.5849351746372535, "learning_rate": 1.9729812549386066e-05, "loss": 0.3811, "step": 1775 }, { "epoch": 0.1, "grad_norm": 0.6100840543617714, "learning_rate": 1.9729382725745997e-05, "loss": 0.3586, "step": 1776 }, { "epoch": 0.1, "grad_norm": 0.32169239000575955, "learning_rate": 1.9728952565177817e-05, "loss": 0.1859, "step": 1777 }, { "epoch": 0.1, "grad_norm": 0.4926537571689009, "learning_rate": 1.972852206769643e-05, "loss": 0.3794, "step": 1778 }, { "epoch": 0.1, "grad_norm": 1.4956203678206292, "learning_rate": 1.972809123331674e-05, "loss": 0.8704, "step": 1779 }, { "epoch": 0.1, "grad_norm": 0.432781530735122, "learning_rate": 1.972766006205367e-05, "loss": 0.2187, "step": 1780 }, { "epoch": 0.1, "grad_norm": 0.8836133604422183, "learning_rate": 1.9727228553922152e-05, "loss": 0.5072, "step": 1781 }, { "epoch": 0.1, "grad_norm": 0.6000939456264764, "learning_rate": 1.9726796708937125e-05, "loss": 0.3967, "step": 1782 }, { "epoch": 0.1, "grad_norm": 0.36428401062594407, "learning_rate": 1.972636452711355e-05, "loss": 0.2402, "step": 1783 }, { "epoch": 0.1, "grad_norm": 0.35233509057631474, "learning_rate": 1.9725932008466383e-05, "loss": 0.1392, "step": 1784 }, { "epoch": 0.1, "grad_norm": 0.7541938557788923, "learning_rate": 1.9725499153010613e-05, "loss": 0.4919, "step": 1785 }, { "epoch": 0.1, "grad_norm": 0.4648704692571199, "learning_rate": 1.9725065960761225e-05, "loss": 0.3193, "step": 1786 }, { "epoch": 0.1, "grad_norm": 0.7643519431832295, "learning_rate": 1.9724632431733223e-05, "loss": 0.3782, "step": 1787 }, { "epoch": 0.1, "grad_norm": 0.4383417741200626, "learning_rate": 1.9724198565941616e-05, "loss": 0.2711, "step": 1788 }, { "epoch": 0.1, "grad_norm": 0.5752027956523929, "learning_rate": 1.972376436340143e-05, "loss": 0.3578, "step": 1789 }, { "epoch": 0.1, "grad_norm": 0.3491638947718094, "learning_rate": 1.9723329824127703e-05, "loss": 0.2435, "step": 1790 }, { "epoch": 0.1, "grad_norm": 0.9210790563154199, "learning_rate": 1.9722894948135485e-05, "loss": 0.5047, "step": 1791 }, { "epoch": 0.1, "grad_norm": 0.4745370515957307, "learning_rate": 1.972245973543983e-05, "loss": 0.3136, "step": 1792 }, { "epoch": 0.1, "grad_norm": 0.8457754702291259, "learning_rate": 1.9722024186055812e-05, "loss": 0.4412, "step": 1793 }, { "epoch": 0.1, "grad_norm": 0.5232986705641869, "learning_rate": 1.9721588299998516e-05, "loss": 0.3358, "step": 1794 }, { "epoch": 0.1, "grad_norm": 0.43026069963986585, "learning_rate": 1.9721152077283038e-05, "loss": 0.2955, "step": 1795 }, { "epoch": 0.1, "grad_norm": 0.3170651313977208, "learning_rate": 1.9720715517924476e-05, "loss": 0.2067, "step": 1796 }, { "epoch": 0.1, "grad_norm": 1.055213009657008, "learning_rate": 1.9720278621937954e-05, "loss": 0.4355, "step": 1797 }, { "epoch": 0.1, "grad_norm": 0.39566676103877046, "learning_rate": 1.9719841389338605e-05, "loss": 0.2776, "step": 1798 }, { "epoch": 0.1, "grad_norm": 0.45559232573659136, "learning_rate": 1.9719403820141563e-05, "loss": 0.4138, "step": 1799 }, { "epoch": 0.1, "grad_norm": 0.9887763688604866, "learning_rate": 1.971896591436199e-05, "loss": 0.2925, "step": 1800 }, { "epoch": 0.1, "grad_norm": 0.3638885992702867, "learning_rate": 1.971852767201504e-05, "loss": 0.2546, "step": 1801 }, { "epoch": 0.1, "grad_norm": 0.6125066940881474, "learning_rate": 1.97180890931159e-05, "loss": 0.4316, "step": 1802 }, { "epoch": 0.1, "grad_norm": 0.645523555658483, "learning_rate": 1.971765017767975e-05, "loss": 0.3526, "step": 1803 }, { "epoch": 0.1, "grad_norm": 0.48292600807911545, "learning_rate": 1.9717210925721792e-05, "loss": 0.3628, "step": 1804 }, { "epoch": 0.1, "grad_norm": 0.510701994227429, "learning_rate": 1.9716771337257235e-05, "loss": 0.2944, "step": 1805 }, { "epoch": 0.1, "grad_norm": 0.512257770671276, "learning_rate": 1.9716331412301304e-05, "loss": 0.3456, "step": 1806 }, { "epoch": 0.1, "grad_norm": 0.3723557207434571, "learning_rate": 1.971589115086924e-05, "loss": 0.2188, "step": 1807 }, { "epoch": 0.1, "grad_norm": 0.4776975335292782, "learning_rate": 1.971545055297628e-05, "loss": 0.3271, "step": 1808 }, { "epoch": 0.1, "grad_norm": 0.4671253964539537, "learning_rate": 1.971500961863768e-05, "loss": 0.4177, "step": 1809 }, { "epoch": 0.1, "grad_norm": 0.4226489406088548, "learning_rate": 1.9714568347868722e-05, "loss": 0.2196, "step": 1810 }, { "epoch": 0.1, "grad_norm": 0.36354839297692376, "learning_rate": 1.9714126740684677e-05, "loss": 0.3186, "step": 1811 }, { "epoch": 0.1, "grad_norm": 1.4618017892562303, "learning_rate": 1.9713684797100843e-05, "loss": 0.7304, "step": 1812 }, { "epoch": 0.1, "grad_norm": 0.35343600195296193, "learning_rate": 1.9713242517132518e-05, "loss": 0.2205, "step": 1813 }, { "epoch": 0.1, "grad_norm": 0.4373763331330048, "learning_rate": 1.9712799900795026e-05, "loss": 0.3891, "step": 1814 }, { "epoch": 0.1, "grad_norm": 0.7587560756410427, "learning_rate": 1.971235694810369e-05, "loss": 0.5362, "step": 1815 }, { "epoch": 0.1, "grad_norm": 0.38486193172658234, "learning_rate": 1.971191365907385e-05, "loss": 0.2663, "step": 1816 }, { "epoch": 0.1, "grad_norm": 0.3515214902888066, "learning_rate": 1.9711470033720856e-05, "loss": 0.168, "step": 1817 }, { "epoch": 0.1, "grad_norm": 0.48100670484562935, "learning_rate": 1.9711026072060077e-05, "loss": 0.3562, "step": 1818 }, { "epoch": 0.1, "grad_norm": 0.39630337446911107, "learning_rate": 1.971058177410688e-05, "loss": 0.2998, "step": 1819 }, { "epoch": 0.1, "grad_norm": 0.8004872736078943, "learning_rate": 1.9710137139876653e-05, "loss": 0.4614, "step": 1820 }, { "epoch": 0.1, "grad_norm": 0.49674968425439975, "learning_rate": 1.9709692169384794e-05, "loss": 0.4202, "step": 1821 }, { "epoch": 0.1, "grad_norm": 0.3863426434060569, "learning_rate": 1.9709246862646712e-05, "loss": 0.2811, "step": 1822 }, { "epoch": 0.1, "grad_norm": 0.22159405934145307, "learning_rate": 1.9708801219677832e-05, "loss": 0.0938, "step": 1823 }, { "epoch": 0.1, "grad_norm": 1.116757279703663, "learning_rate": 1.970835524049358e-05, "loss": 0.6516, "step": 1824 }, { "epoch": 0.1, "grad_norm": 0.4811222290577782, "learning_rate": 1.9707908925109405e-05, "loss": 0.3042, "step": 1825 }, { "epoch": 0.1, "grad_norm": 0.4106501961078986, "learning_rate": 1.9707462273540762e-05, "loss": 0.3154, "step": 1826 }, { "epoch": 0.1, "grad_norm": 0.8876173475980776, "learning_rate": 1.9707015285803117e-05, "loss": 0.5606, "step": 1827 }, { "epoch": 0.11, "grad_norm": 0.4763770328566096, "learning_rate": 1.9706567961911952e-05, "loss": 0.3034, "step": 1828 }, { "epoch": 0.11, "grad_norm": 0.2731838409066813, "learning_rate": 1.9706120301882755e-05, "loss": 0.1925, "step": 1829 }, { "epoch": 0.11, "grad_norm": 0.6050226311696162, "learning_rate": 1.9705672305731027e-05, "loss": 0.3795, "step": 1830 }, { "epoch": 0.11, "grad_norm": 0.5254253951284472, "learning_rate": 1.9705223973472288e-05, "loss": 0.3244, "step": 1831 }, { "epoch": 0.11, "grad_norm": 0.7309290009909968, "learning_rate": 1.9704775305122057e-05, "loss": 0.4473, "step": 1832 }, { "epoch": 0.11, "grad_norm": 0.5142034091004006, "learning_rate": 1.9704326300695874e-05, "loss": 0.3584, "step": 1833 }, { "epoch": 0.11, "grad_norm": 0.41208408396675505, "learning_rate": 1.9703876960209292e-05, "loss": 0.2997, "step": 1834 }, { "epoch": 0.11, "grad_norm": 0.4215025386501301, "learning_rate": 1.970342728367787e-05, "loss": 0.207, "step": 1835 }, { "epoch": 0.11, "grad_norm": 1.1215430532038166, "learning_rate": 1.9702977271117172e-05, "loss": 0.3586, "step": 1836 }, { "epoch": 0.11, "grad_norm": 0.40290356090073365, "learning_rate": 1.9702526922542788e-05, "loss": 0.3035, "step": 1837 }, { "epoch": 0.11, "grad_norm": 0.4444886784774347, "learning_rate": 1.970207623797032e-05, "loss": 0.367, "step": 1838 }, { "epoch": 0.11, "grad_norm": 0.7960973226307128, "learning_rate": 1.9701625217415366e-05, "loss": 0.4945, "step": 1839 }, { "epoch": 0.11, "grad_norm": 0.40419674822028395, "learning_rate": 1.9701173860893547e-05, "loss": 0.2819, "step": 1840 }, { "epoch": 0.11, "grad_norm": 0.3928574273644804, "learning_rate": 1.9700722168420493e-05, "loss": 0.1876, "step": 1841 }, { "epoch": 0.11, "grad_norm": 0.6832258064420349, "learning_rate": 1.9700270140011852e-05, "loss": 0.3169, "step": 1842 }, { "epoch": 0.11, "grad_norm": 0.5857771512420725, "learning_rate": 1.969981777568327e-05, "loss": 0.3352, "step": 1843 }, { "epoch": 0.11, "grad_norm": 0.4742736573868894, "learning_rate": 1.969936507545042e-05, "loss": 0.3776, "step": 1844 }, { "epoch": 0.11, "grad_norm": 0.448882200382126, "learning_rate": 1.969891203932897e-05, "loss": 0.3702, "step": 1845 }, { "epoch": 0.11, "grad_norm": 0.3997430046247253, "learning_rate": 1.9698458667334616e-05, "loss": 0.2028, "step": 1846 }, { "epoch": 0.11, "grad_norm": 0.42185324820247727, "learning_rate": 1.969800495948305e-05, "loss": 0.2546, "step": 1847 }, { "epoch": 0.11, "grad_norm": 0.581614305117135, "learning_rate": 1.9697550915789992e-05, "loss": 0.3695, "step": 1848 }, { "epoch": 0.11, "grad_norm": 0.46363413685206445, "learning_rate": 1.9697096536271166e-05, "loss": 0.2585, "step": 1849 }, { "epoch": 0.11, "grad_norm": 0.42718596651710894, "learning_rate": 1.96966418209423e-05, "loss": 0.3225, "step": 1850 }, { "epoch": 0.11, "grad_norm": 1.4101683380916672, "learning_rate": 1.9696186769819146e-05, "loss": 0.8429, "step": 1851 }, { "epoch": 0.11, "grad_norm": 0.406650262457706, "learning_rate": 1.9695731382917463e-05, "loss": 0.2515, "step": 1852 }, { "epoch": 0.11, "grad_norm": 0.2883048378866706, "learning_rate": 1.9695275660253017e-05, "loss": 0.236, "step": 1853 }, { "epoch": 0.11, "grad_norm": 0.5570129462774673, "learning_rate": 1.9694819601841593e-05, "loss": 0.4075, "step": 1854 }, { "epoch": 0.11, "grad_norm": 0.6608487821497272, "learning_rate": 1.969436320769898e-05, "loss": 0.324, "step": 1855 }, { "epoch": 0.11, "grad_norm": 0.695684745170112, "learning_rate": 1.969390647784099e-05, "loss": 0.4257, "step": 1856 }, { "epoch": 0.11, "grad_norm": 0.42950676535452104, "learning_rate": 1.9693449412283435e-05, "loss": 0.3572, "step": 1857 }, { "epoch": 0.11, "grad_norm": 0.422128890021055, "learning_rate": 1.9692992011042143e-05, "loss": 0.2884, "step": 1858 }, { "epoch": 0.11, "grad_norm": 0.4976931054505805, "learning_rate": 1.969253427413295e-05, "loss": 0.3108, "step": 1859 }, { "epoch": 0.11, "grad_norm": 0.42506504553438845, "learning_rate": 1.969207620157172e-05, "loss": 0.2968, "step": 1860 }, { "epoch": 0.11, "grad_norm": 0.44845845302893456, "learning_rate": 1.9691617793374304e-05, "loss": 0.2973, "step": 1861 }, { "epoch": 0.11, "grad_norm": 0.463892625466207, "learning_rate": 1.969115904955658e-05, "loss": 0.2725, "step": 1862 }, { "epoch": 0.11, "grad_norm": 1.2968732447910472, "learning_rate": 1.9690699970134436e-05, "loss": 0.7922, "step": 1863 }, { "epoch": 0.11, "grad_norm": 0.7591209376006144, "learning_rate": 1.9690240555123767e-05, "loss": 0.4119, "step": 1864 }, { "epoch": 0.11, "grad_norm": 0.37942803845107786, "learning_rate": 1.9689780804540487e-05, "loss": 0.2586, "step": 1865 }, { "epoch": 0.11, "grad_norm": 0.5185336534105881, "learning_rate": 1.968932071840051e-05, "loss": 0.4203, "step": 1866 }, { "epoch": 0.11, "grad_norm": 0.41377909230155135, "learning_rate": 1.968886029671978e-05, "loss": 0.2385, "step": 1867 }, { "epoch": 0.11, "grad_norm": 0.3525014148154284, "learning_rate": 1.968839953951423e-05, "loss": 0.2215, "step": 1868 }, { "epoch": 0.11, "grad_norm": 0.4768967276948403, "learning_rate": 1.968793844679982e-05, "loss": 0.3546, "step": 1869 }, { "epoch": 0.11, "grad_norm": 0.5977294660884478, "learning_rate": 1.9687477018592517e-05, "loss": 0.3633, "step": 1870 }, { "epoch": 0.11, "grad_norm": 0.4148338563966621, "learning_rate": 1.9687015254908305e-05, "loss": 0.375, "step": 1871 }, { "epoch": 0.11, "grad_norm": 0.5783402829003099, "learning_rate": 1.968655315576317e-05, "loss": 0.4202, "step": 1872 }, { "epoch": 0.11, "grad_norm": 0.3409097705300037, "learning_rate": 1.9686090721173118e-05, "loss": 0.2646, "step": 1873 }, { "epoch": 0.11, "grad_norm": 0.3731720064413754, "learning_rate": 1.9685627951154154e-05, "loss": 0.259, "step": 1874 }, { "epoch": 0.11, "grad_norm": 1.056227121717924, "learning_rate": 1.9685164845722317e-05, "loss": 0.6084, "step": 1875 }, { "epoch": 0.11, "grad_norm": 0.47496672745326424, "learning_rate": 1.9684701404893635e-05, "loss": 0.331, "step": 1876 }, { "epoch": 0.11, "grad_norm": 0.6892217331540745, "learning_rate": 1.9684237628684162e-05, "loss": 0.4158, "step": 1877 }, { "epoch": 0.11, "grad_norm": 0.5381495786374161, "learning_rate": 1.9683773517109954e-05, "loss": 0.3442, "step": 1878 }, { "epoch": 0.11, "grad_norm": 0.4442211799150528, "learning_rate": 1.9683309070187088e-05, "loss": 0.2373, "step": 1879 }, { "epoch": 0.11, "grad_norm": 0.4516614938674143, "learning_rate": 1.9682844287931643e-05, "loss": 0.2681, "step": 1880 }, { "epoch": 0.11, "grad_norm": 0.46034638626235896, "learning_rate": 1.9682379170359717e-05, "loss": 0.379, "step": 1881 }, { "epoch": 0.11, "grad_norm": 0.7113109005722336, "learning_rate": 1.9681913717487418e-05, "loss": 0.2923, "step": 1882 }, { "epoch": 0.11, "grad_norm": 0.46398582128192567, "learning_rate": 1.9681447929330864e-05, "loss": 0.343, "step": 1883 }, { "epoch": 0.11, "grad_norm": 0.8005165026287019, "learning_rate": 1.9680981805906182e-05, "loss": 0.5295, "step": 1884 }, { "epoch": 0.11, "grad_norm": 0.5318756913666476, "learning_rate": 1.968051534722952e-05, "loss": 0.222, "step": 1885 }, { "epoch": 0.11, "grad_norm": 0.36430519799466327, "learning_rate": 1.968004855331702e-05, "loss": 0.2523, "step": 1886 }, { "epoch": 0.11, "grad_norm": 1.379727729095134, "learning_rate": 1.9679581424184862e-05, "loss": 0.803, "step": 1887 }, { "epoch": 0.11, "grad_norm": 0.6338746679719071, "learning_rate": 1.9679113959849213e-05, "loss": 0.3052, "step": 1888 }, { "epoch": 0.11, "grad_norm": 0.436401101542336, "learning_rate": 1.9678646160326268e-05, "loss": 0.3473, "step": 1889 }, { "epoch": 0.11, "grad_norm": 0.9931177098972608, "learning_rate": 1.9678178025632217e-05, "loss": 0.5409, "step": 1890 }, { "epoch": 0.11, "grad_norm": 0.4048200278519992, "learning_rate": 1.9677709555783278e-05, "loss": 0.1736, "step": 1891 }, { "epoch": 0.11, "grad_norm": 0.5199305893202164, "learning_rate": 1.9677240750795677e-05, "loss": 0.3528, "step": 1892 }, { "epoch": 0.11, "grad_norm": 0.45682355339831787, "learning_rate": 1.967677161068564e-05, "loss": 0.3718, "step": 1893 }, { "epoch": 0.11, "grad_norm": 0.6801158394462933, "learning_rate": 1.9676302135469424e-05, "loss": 0.3543, "step": 1894 }, { "epoch": 0.11, "grad_norm": 0.4675467552847163, "learning_rate": 1.9675832325163277e-05, "loss": 0.2873, "step": 1895 }, { "epoch": 0.11, "grad_norm": 1.21108403861959, "learning_rate": 1.9675362179783472e-05, "loss": 0.6525, "step": 1896 }, { "epoch": 0.11, "grad_norm": 0.4128238832269528, "learning_rate": 1.967489169934629e-05, "loss": 0.3157, "step": 1897 }, { "epoch": 0.11, "grad_norm": 0.4572471040151339, "learning_rate": 1.9674420883868032e-05, "loss": 0.2685, "step": 1898 }, { "epoch": 0.11, "grad_norm": 0.5102926952782346, "learning_rate": 1.9673949733364987e-05, "loss": 0.3365, "step": 1899 }, { "epoch": 0.11, "grad_norm": 0.801037699421989, "learning_rate": 1.9673478247853482e-05, "loss": 0.5427, "step": 1900 }, { "epoch": 0.11, "grad_norm": 0.40519304528153915, "learning_rate": 1.967300642734984e-05, "loss": 0.2522, "step": 1901 }, { "epoch": 0.11, "grad_norm": 0.43817917000824186, "learning_rate": 1.96725342718704e-05, "loss": 0.2323, "step": 1902 }, { "epoch": 0.11, "grad_norm": 1.0546181162648953, "learning_rate": 1.967206178143152e-05, "loss": 0.6465, "step": 1903 }, { "epoch": 0.11, "grad_norm": 0.40293228795698843, "learning_rate": 1.967158895604955e-05, "loss": 0.2422, "step": 1904 }, { "epoch": 0.11, "grad_norm": 0.4840118868245216, "learning_rate": 1.967111579574087e-05, "loss": 0.3647, "step": 1905 }, { "epoch": 0.11, "grad_norm": 0.856390248834586, "learning_rate": 1.967064230052187e-05, "loss": 0.3756, "step": 1906 }, { "epoch": 0.11, "grad_norm": 0.3372824122208772, "learning_rate": 1.9670168470408942e-05, "loss": 0.2382, "step": 1907 }, { "epoch": 0.11, "grad_norm": 1.0445851645817175, "learning_rate": 1.9669694305418498e-05, "loss": 0.3723, "step": 1908 }, { "epoch": 0.11, "grad_norm": 0.5236158385845334, "learning_rate": 1.9669219805566954e-05, "loss": 0.3219, "step": 1909 }, { "epoch": 0.11, "grad_norm": 0.489704534862564, "learning_rate": 1.966874497087074e-05, "loss": 0.3341, "step": 1910 }, { "epoch": 0.11, "grad_norm": 0.8448033082087018, "learning_rate": 1.9668269801346305e-05, "loss": 0.4509, "step": 1911 }, { "epoch": 0.11, "grad_norm": 0.440510759931443, "learning_rate": 1.966779429701011e-05, "loss": 0.3606, "step": 1912 }, { "epoch": 0.11, "grad_norm": 0.49307606252366426, "learning_rate": 1.9667318457878604e-05, "loss": 0.3268, "step": 1913 }, { "epoch": 0.11, "grad_norm": 0.4100642263944072, "learning_rate": 1.966684228396828e-05, "loss": 0.2066, "step": 1914 }, { "epoch": 0.11, "grad_norm": 0.9405707203282181, "learning_rate": 1.9666365775295622e-05, "loss": 0.4985, "step": 1915 }, { "epoch": 0.11, "grad_norm": 0.46413787240405263, "learning_rate": 1.966588893187714e-05, "loss": 0.2986, "step": 1916 }, { "epoch": 0.11, "grad_norm": 0.532938541056413, "learning_rate": 1.966541175372933e-05, "loss": 0.3551, "step": 1917 }, { "epoch": 0.11, "grad_norm": 0.8985917488823684, "learning_rate": 1.966493424086873e-05, "loss": 0.6217, "step": 1918 }, { "epoch": 0.11, "grad_norm": 0.33445042184657847, "learning_rate": 1.9664456393311876e-05, "loss": 0.2323, "step": 1919 }, { "epoch": 0.11, "grad_norm": 0.5402494374756727, "learning_rate": 1.966397821107531e-05, "loss": 0.2774, "step": 1920 }, { "epoch": 0.11, "grad_norm": 0.45417449458805104, "learning_rate": 1.9663499694175596e-05, "loss": 0.2966, "step": 1921 }, { "epoch": 0.11, "grad_norm": 0.41078506159188094, "learning_rate": 1.96630208426293e-05, "loss": 0.3234, "step": 1922 }, { "epoch": 0.11, "grad_norm": 0.880978732867112, "learning_rate": 1.966254165645301e-05, "loss": 0.5256, "step": 1923 }, { "epoch": 0.11, "grad_norm": 0.397876864335666, "learning_rate": 1.9662062135663316e-05, "loss": 0.2994, "step": 1924 }, { "epoch": 0.11, "grad_norm": 0.36149216851821453, "learning_rate": 1.9661582280276828e-05, "loss": 0.2885, "step": 1925 }, { "epoch": 0.11, "grad_norm": 0.32020921506739036, "learning_rate": 1.9661102090310157e-05, "loss": 0.1698, "step": 1926 }, { "epoch": 0.11, "grad_norm": 1.0554723084064312, "learning_rate": 1.9660621565779943e-05, "loss": 0.4549, "step": 1927 }, { "epoch": 0.11, "grad_norm": 0.39709968758603115, "learning_rate": 1.966014070670281e-05, "loss": 0.2861, "step": 1928 }, { "epoch": 0.11, "grad_norm": 0.4627118390227328, "learning_rate": 1.965965951309543e-05, "loss": 0.37, "step": 1929 }, { "epoch": 0.11, "grad_norm": 0.5652502961754505, "learning_rate": 1.965917798497445e-05, "loss": 0.3301, "step": 1930 }, { "epoch": 0.11, "grad_norm": 0.5014863993079642, "learning_rate": 1.9658696122356556e-05, "loss": 0.3489, "step": 1931 }, { "epoch": 0.11, "grad_norm": 0.3653922709051474, "learning_rate": 1.965821392525843e-05, "loss": 0.2062, "step": 1932 }, { "epoch": 0.11, "grad_norm": 0.5667706777693249, "learning_rate": 1.9657731393696768e-05, "loss": 0.3548, "step": 1933 }, { "epoch": 0.11, "grad_norm": 0.36616733693133574, "learning_rate": 1.9657248527688285e-05, "loss": 0.2127, "step": 1934 }, { "epoch": 0.11, "grad_norm": 1.0432472752070607, "learning_rate": 1.9656765327249697e-05, "loss": 0.5552, "step": 1935 }, { "epoch": 0.11, "grad_norm": 0.5719621038132259, "learning_rate": 1.9656281792397745e-05, "loss": 0.4127, "step": 1936 }, { "epoch": 0.11, "grad_norm": 0.37457017917808055, "learning_rate": 1.965579792314917e-05, "loss": 0.2565, "step": 1937 }, { "epoch": 0.11, "grad_norm": 0.30681311507200504, "learning_rate": 1.9655313719520726e-05, "loss": 0.2221, "step": 1938 }, { "epoch": 0.11, "grad_norm": 0.8535758857629228, "learning_rate": 1.9654829181529186e-05, "loss": 0.5047, "step": 1939 }, { "epoch": 0.11, "grad_norm": 0.5719271930360567, "learning_rate": 1.965434430919132e-05, "loss": 0.286, "step": 1940 }, { "epoch": 0.11, "grad_norm": 0.5563675709499672, "learning_rate": 1.9653859102523936e-05, "loss": 0.364, "step": 1941 }, { "epoch": 0.11, "grad_norm": 1.3137731119716958, "learning_rate": 1.965337356154382e-05, "loss": 0.8021, "step": 1942 }, { "epoch": 0.11, "grad_norm": 0.38001594373431286, "learning_rate": 1.9652887686267795e-05, "loss": 0.2393, "step": 1943 }, { "epoch": 0.11, "grad_norm": 0.2948874349792371, "learning_rate": 1.965240147671268e-05, "loss": 0.1511, "step": 1944 }, { "epoch": 0.11, "grad_norm": 0.6219444278818353, "learning_rate": 1.965191493289532e-05, "loss": 0.4022, "step": 1945 }, { "epoch": 0.11, "grad_norm": 0.48882053571073614, "learning_rate": 1.9651428054832562e-05, "loss": 0.3009, "step": 1946 }, { "epoch": 0.11, "grad_norm": 1.0667319837664195, "learning_rate": 1.9650940842541265e-05, "loss": 0.4638, "step": 1947 }, { "epoch": 0.11, "grad_norm": 0.4139384433063878, "learning_rate": 1.9650453296038302e-05, "loss": 0.3544, "step": 1948 }, { "epoch": 0.11, "grad_norm": 0.5010220701794562, "learning_rate": 1.9649965415340553e-05, "loss": 0.3336, "step": 1949 }, { "epoch": 0.11, "grad_norm": 0.28320481878320053, "learning_rate": 1.964947720046492e-05, "loss": 0.1546, "step": 1950 }, { "epoch": 0.11, "grad_norm": 1.2928573922057744, "learning_rate": 1.9648988651428308e-05, "loss": 0.5374, "step": 1951 }, { "epoch": 0.11, "grad_norm": 0.48585857871631216, "learning_rate": 1.964849976824763e-05, "loss": 0.2929, "step": 1952 }, { "epoch": 0.11, "grad_norm": 0.4246659305786616, "learning_rate": 1.964801055093982e-05, "loss": 0.3102, "step": 1953 }, { "epoch": 0.11, "grad_norm": 1.487500029193504, "learning_rate": 1.964752099952182e-05, "loss": 0.8572, "step": 1954 }, { "epoch": 0.11, "grad_norm": 0.4105755914914621, "learning_rate": 1.9647031114010585e-05, "loss": 0.3072, "step": 1955 }, { "epoch": 0.11, "grad_norm": 0.3978007522631092, "learning_rate": 1.9646540894423074e-05, "loss": 0.2497, "step": 1956 }, { "epoch": 0.11, "grad_norm": 1.6591002883094217, "learning_rate": 1.964605034077627e-05, "loss": 0.5882, "step": 1957 }, { "epoch": 0.11, "grad_norm": 0.34811743917000226, "learning_rate": 1.9645559453087158e-05, "loss": 0.235, "step": 1958 }, { "epoch": 0.11, "grad_norm": 0.6295042608541986, "learning_rate": 1.9645068231372733e-05, "loss": 0.426, "step": 1959 }, { "epoch": 0.11, "grad_norm": 0.3943625001062919, "learning_rate": 1.9644576675650012e-05, "loss": 0.2832, "step": 1960 }, { "epoch": 0.11, "grad_norm": 0.4604908836391139, "learning_rate": 1.9644084785936014e-05, "loss": 0.3313, "step": 1961 }, { "epoch": 0.11, "grad_norm": 1.0978664581234787, "learning_rate": 1.9643592562247776e-05, "loss": 0.5663, "step": 1962 }, { "epoch": 0.11, "grad_norm": 0.5053998885488958, "learning_rate": 1.964310000460234e-05, "loss": 0.2447, "step": 1963 }, { "epoch": 0.11, "grad_norm": 0.4433806067056563, "learning_rate": 1.964260711301677e-05, "loss": 0.2844, "step": 1964 }, { "epoch": 0.11, "grad_norm": 0.5933816056127612, "learning_rate": 1.9642113887508127e-05, "loss": 0.4007, "step": 1965 }, { "epoch": 0.11, "grad_norm": 0.9395519950185571, "learning_rate": 1.9641620328093496e-05, "loss": 0.5341, "step": 1966 }, { "epoch": 0.11, "grad_norm": 0.6664142487597251, "learning_rate": 1.964112643478997e-05, "loss": 0.3412, "step": 1967 }, { "epoch": 0.11, "grad_norm": 0.5458830028835255, "learning_rate": 1.9640632207614647e-05, "loss": 0.3502, "step": 1968 }, { "epoch": 0.11, "grad_norm": 0.4650976263027919, "learning_rate": 1.9640137646584646e-05, "loss": 0.3399, "step": 1969 }, { "epoch": 0.11, "grad_norm": 0.2908370498165278, "learning_rate": 1.963964275171709e-05, "loss": 0.1562, "step": 1970 }, { "epoch": 0.11, "grad_norm": 0.7007220925956545, "learning_rate": 1.9639147523029125e-05, "loss": 0.4742, "step": 1971 }, { "epoch": 0.11, "grad_norm": 0.5483497042322073, "learning_rate": 1.963865196053789e-05, "loss": 0.3736, "step": 1972 }, { "epoch": 0.11, "grad_norm": 0.42343628698039426, "learning_rate": 1.9638156064260555e-05, "loss": 0.1913, "step": 1973 }, { "epoch": 0.11, "grad_norm": 0.4601079222814512, "learning_rate": 1.9637659834214294e-05, "loss": 0.4321, "step": 1974 }, { "epoch": 0.11, "grad_norm": 1.3014578591026609, "learning_rate": 1.9637163270416283e-05, "loss": 0.7311, "step": 1975 }, { "epoch": 0.11, "grad_norm": 0.31662376400744924, "learning_rate": 1.9636666372883722e-05, "loss": 0.2132, "step": 1976 }, { "epoch": 0.11, "grad_norm": 0.423523284773505, "learning_rate": 1.963616914163382e-05, "loss": 0.2908, "step": 1977 }, { "epoch": 0.11, "grad_norm": 1.229874934088239, "learning_rate": 1.9635671576683798e-05, "loss": 0.7617, "step": 1978 }, { "epoch": 0.11, "grad_norm": 0.4814629313067314, "learning_rate": 1.9635173678050878e-05, "loss": 0.3127, "step": 1979 }, { "epoch": 0.11, "grad_norm": 0.5001839315639627, "learning_rate": 1.963467544575231e-05, "loss": 0.3492, "step": 1980 }, { "epoch": 0.11, "grad_norm": 0.5230200200146033, "learning_rate": 1.9634176879805347e-05, "loss": 0.3975, "step": 1981 }, { "epoch": 0.11, "grad_norm": 0.31796381358669257, "learning_rate": 1.9633677980227254e-05, "loss": 0.2032, "step": 1982 }, { "epoch": 0.11, "grad_norm": 0.562655816237142, "learning_rate": 1.96331787470353e-05, "loss": 0.3209, "step": 1983 }, { "epoch": 0.11, "grad_norm": 0.3877665388095057, "learning_rate": 1.9632679180246787e-05, "loss": 0.3566, "step": 1984 }, { "epoch": 0.11, "grad_norm": 0.5555037398364667, "learning_rate": 1.9632179279879006e-05, "loss": 0.4005, "step": 1985 }, { "epoch": 0.11, "grad_norm": 0.4124910862080048, "learning_rate": 1.963167904594927e-05, "loss": 0.3273, "step": 1986 }, { "epoch": 0.11, "grad_norm": 0.6000884096195452, "learning_rate": 1.9631178478474905e-05, "loss": 0.3107, "step": 1987 }, { "epoch": 0.11, "grad_norm": 0.4283814322598163, "learning_rate": 1.9630677577473242e-05, "loss": 0.3005, "step": 1988 }, { "epoch": 0.11, "grad_norm": 0.4370673442250718, "learning_rate": 1.963017634296163e-05, "loss": 0.2721, "step": 1989 }, { "epoch": 0.11, "grad_norm": 0.4521900634802621, "learning_rate": 1.9629674774957425e-05, "loss": 0.3517, "step": 1990 }, { "epoch": 0.11, "grad_norm": 0.5563225794926104, "learning_rate": 1.9629172873477995e-05, "loss": 0.3998, "step": 1991 }, { "epoch": 0.11, "grad_norm": 0.4409425742430659, "learning_rate": 1.9628670638540722e-05, "loss": 0.2862, "step": 1992 }, { "epoch": 0.11, "grad_norm": 1.3474166819860802, "learning_rate": 1.9628168070163e-05, "loss": 0.6459, "step": 1993 }, { "epoch": 0.11, "grad_norm": 0.6413772554148557, "learning_rate": 1.9627665168362234e-05, "loss": 0.4017, "step": 1994 }, { "epoch": 0.11, "grad_norm": 0.4979080455824859, "learning_rate": 1.9627161933155833e-05, "loss": 0.3773, "step": 1995 }, { "epoch": 0.11, "grad_norm": 0.47734635309643114, "learning_rate": 1.962665836456123e-05, "loss": 0.3279, "step": 1996 }, { "epoch": 0.11, "grad_norm": 0.4702257531545852, "learning_rate": 1.9626154462595863e-05, "loss": 0.2722, "step": 1997 }, { "epoch": 0.11, "grad_norm": 0.36757418615763193, "learning_rate": 1.9625650227277182e-05, "loss": 0.2572, "step": 1998 }, { "epoch": 0.11, "grad_norm": 2.0698780573684648, "learning_rate": 1.9625145658622644e-05, "loss": 0.3931, "step": 1999 }, { "epoch": 0.11, "grad_norm": 0.4586305864242774, "learning_rate": 1.962464075664973e-05, "loss": 0.2985, "step": 2000 }, { "epoch": 0.11, "grad_norm": 0.4799034083760339, "learning_rate": 1.9624135521375914e-05, "loss": 0.345, "step": 2001 }, { "epoch": 0.12, "grad_norm": 0.8087456132963655, "learning_rate": 1.9623629952818705e-05, "loss": 0.4573, "step": 2002 }, { "epoch": 0.12, "grad_norm": 0.273169211369182, "learning_rate": 1.9623124050995603e-05, "loss": 0.1335, "step": 2003 }, { "epoch": 0.12, "grad_norm": 0.44311381479955264, "learning_rate": 1.9622617815924125e-05, "loss": 0.3134, "step": 2004 }, { "epoch": 0.12, "grad_norm": 4.874639231040764, "learning_rate": 1.962211124762181e-05, "loss": 0.5349, "step": 2005 }, { "epoch": 0.12, "grad_norm": 0.910928440837772, "learning_rate": 1.9621604346106197e-05, "loss": 0.5488, "step": 2006 }, { "epoch": 0.12, "grad_norm": 0.730448021870774, "learning_rate": 1.9621097111394837e-05, "loss": 0.334, "step": 2007 }, { "epoch": 0.12, "grad_norm": 0.8847582506626019, "learning_rate": 1.9620589543505297e-05, "loss": 0.4115, "step": 2008 }, { "epoch": 0.12, "grad_norm": 0.26085846379198124, "learning_rate": 1.9620081642455155e-05, "loss": 0.109, "step": 2009 }, { "epoch": 0.12, "grad_norm": 0.37726397681575125, "learning_rate": 1.9619573408262004e-05, "loss": 0.2366, "step": 2010 }, { "epoch": 0.12, "grad_norm": 8.216495069611042, "learning_rate": 1.9619064840943432e-05, "loss": 0.8289, "step": 2011 }, { "epoch": 0.12, "grad_norm": 0.4878648634362295, "learning_rate": 1.9618555940517062e-05, "loss": 0.2654, "step": 2012 }, { "epoch": 0.12, "grad_norm": 8.093933887287843, "learning_rate": 1.9618046707000515e-05, "loss": 0.4438, "step": 2013 }, { "epoch": 0.12, "grad_norm": 7.040222786715266, "learning_rate": 1.9617537140411423e-05, "loss": 0.6686, "step": 2014 }, { "epoch": 0.12, "grad_norm": 1.4288877331493517, "learning_rate": 1.961702724076743e-05, "loss": 0.2278, "step": 2015 }, { "epoch": 0.12, "grad_norm": 0.609857521709017, "learning_rate": 1.96165170080862e-05, "loss": 0.3412, "step": 2016 }, { "epoch": 0.12, "grad_norm": 2.3849639020833395, "learning_rate": 1.9616006442385403e-05, "loss": 0.5256, "step": 2017 }, { "epoch": 0.12, "grad_norm": 1.0769342424347998, "learning_rate": 1.9615495543682712e-05, "loss": 0.3283, "step": 2018 }, { "epoch": 0.12, "grad_norm": 1.679860697691885, "learning_rate": 1.9614984311995825e-05, "loss": 0.4145, "step": 2019 }, { "epoch": 0.12, "grad_norm": 0.7572448145592617, "learning_rate": 1.9614472747342445e-05, "loss": 0.4164, "step": 2020 }, { "epoch": 0.12, "grad_norm": 1.0369229362139345, "learning_rate": 1.9613960849740284e-05, "loss": 0.4782, "step": 2021 }, { "epoch": 0.12, "grad_norm": 0.43789872014124714, "learning_rate": 1.9613448619207077e-05, "loss": 0.172, "step": 2022 }, { "epoch": 0.12, "grad_norm": 0.8217791474880007, "learning_rate": 1.9612936055760557e-05, "loss": 0.4245, "step": 2023 }, { "epoch": 0.12, "grad_norm": 0.7758649033699128, "learning_rate": 1.9612423159418474e-05, "loss": 0.4332, "step": 2024 }, { "epoch": 0.12, "grad_norm": 0.5558536412840944, "learning_rate": 1.9611909930198588e-05, "loss": 0.305, "step": 2025 }, { "epoch": 0.12, "grad_norm": 2.763245662074697, "learning_rate": 1.9611396368118675e-05, "loss": 0.68, "step": 2026 }, { "epoch": 0.12, "grad_norm": 0.6710808942620035, "learning_rate": 1.961088247319652e-05, "loss": 0.4107, "step": 2027 }, { "epoch": 0.12, "grad_norm": 0.36096294284561037, "learning_rate": 1.961036824544992e-05, "loss": 0.2201, "step": 2028 }, { "epoch": 0.12, "grad_norm": 0.5810650256593232, "learning_rate": 1.960985368489668e-05, "loss": 0.2909, "step": 2029 }, { "epoch": 0.12, "grad_norm": 1.5690149812729766, "learning_rate": 1.9609338791554623e-05, "loss": 0.5314, "step": 2030 }, { "epoch": 0.12, "grad_norm": 0.6393362197055744, "learning_rate": 1.960882356544157e-05, "loss": 0.2609, "step": 2031 }, { "epoch": 0.12, "grad_norm": 0.6976100461836577, "learning_rate": 1.9608308006575373e-05, "loss": 0.4226, "step": 2032 }, { "epoch": 0.12, "grad_norm": 1.479240310964179, "learning_rate": 1.9607792114973884e-05, "loss": 0.5768, "step": 2033 }, { "epoch": 0.12, "grad_norm": 0.3668310724900021, "learning_rate": 1.9607275890654967e-05, "loss": 0.2626, "step": 2034 }, { "epoch": 0.12, "grad_norm": 2.857454535850804, "learning_rate": 1.9606759333636498e-05, "loss": 0.2436, "step": 2035 }, { "epoch": 0.12, "grad_norm": 0.9569480593524808, "learning_rate": 1.9606242443936368e-05, "loss": 0.4232, "step": 2036 }, { "epoch": 0.12, "grad_norm": 0.5653570617669198, "learning_rate": 1.9605725221572475e-05, "loss": 0.3075, "step": 2037 }, { "epoch": 0.12, "grad_norm": 1.2376985969066299, "learning_rate": 1.960520766656273e-05, "loss": 0.594, "step": 2038 }, { "epoch": 0.12, "grad_norm": 0.6424647493461252, "learning_rate": 1.9604689778925056e-05, "loss": 0.3995, "step": 2039 }, { "epoch": 0.12, "grad_norm": 0.4235494564972714, "learning_rate": 1.960417155867739e-05, "loss": 0.3047, "step": 2040 }, { "epoch": 0.12, "grad_norm": 0.5912471531591597, "learning_rate": 1.960365300583767e-05, "loss": 0.2393, "step": 2041 }, { "epoch": 0.12, "grad_norm": 1.029688638479728, "learning_rate": 1.960313412042387e-05, "loss": 0.5656, "step": 2042 }, { "epoch": 0.12, "grad_norm": 0.4732960711478903, "learning_rate": 1.960261490245394e-05, "loss": 0.3001, "step": 2043 }, { "epoch": 0.12, "grad_norm": 0.5220022647134588, "learning_rate": 1.9602095351945872e-05, "loss": 0.3535, "step": 2044 }, { "epoch": 0.12, "grad_norm": 1.5883266373730363, "learning_rate": 1.9601575468917654e-05, "loss": 0.4899, "step": 2045 }, { "epoch": 0.12, "grad_norm": 0.3387809210650048, "learning_rate": 1.9601055253387292e-05, "loss": 0.2403, "step": 2046 }, { "epoch": 0.12, "grad_norm": 0.5623556957444193, "learning_rate": 1.9600534705372795e-05, "loss": 0.3546, "step": 2047 }, { "epoch": 0.12, "grad_norm": 0.5773352363021086, "learning_rate": 1.9600013824892198e-05, "loss": 0.3738, "step": 2048 }, { "epoch": 0.12, "grad_norm": 0.4232267894991952, "learning_rate": 1.9599492611963533e-05, "loss": 0.29, "step": 2049 }, { "epoch": 0.12, "grad_norm": 1.5190140255118185, "learning_rate": 1.9598971066604854e-05, "loss": 0.818, "step": 2050 }, { "epoch": 0.12, "grad_norm": 0.6714832479028133, "learning_rate": 1.9598449188834218e-05, "loss": 0.314, "step": 2051 }, { "epoch": 0.12, "grad_norm": 0.44430442128339687, "learning_rate": 1.95979269786697e-05, "loss": 0.2978, "step": 2052 }, { "epoch": 0.12, "grad_norm": 0.9537546657131378, "learning_rate": 1.959740443612938e-05, "loss": 0.5501, "step": 2053 }, { "epoch": 0.12, "grad_norm": 0.2242161983506829, "learning_rate": 1.9596881561231363e-05, "loss": 0.1592, "step": 2054 }, { "epoch": 0.12, "grad_norm": 0.48220806957042844, "learning_rate": 1.9596358353993747e-05, "loss": 0.3105, "step": 2055 }, { "epoch": 0.12, "grad_norm": 0.6976369230595237, "learning_rate": 1.959583481443465e-05, "loss": 0.4052, "step": 2056 }, { "epoch": 0.12, "grad_norm": 1.8327429933334323, "learning_rate": 1.9595310942572212e-05, "loss": 0.768, "step": 2057 }, { "epoch": 0.12, "grad_norm": 0.44394301554498045, "learning_rate": 1.9594786738424566e-05, "loss": 0.2232, "step": 2058 }, { "epoch": 0.12, "grad_norm": 0.7342598439395362, "learning_rate": 1.959426220200987e-05, "loss": 0.4006, "step": 2059 }, { "epoch": 0.12, "grad_norm": 0.427795716449992, "learning_rate": 1.9593737333346286e-05, "loss": 0.2239, "step": 2060 }, { "epoch": 0.12, "grad_norm": 0.4343683489018406, "learning_rate": 1.9593212132451992e-05, "loss": 0.2375, "step": 2061 }, { "epoch": 0.12, "grad_norm": 1.6866265621347991, "learning_rate": 1.959268659934517e-05, "loss": 0.7592, "step": 2062 }, { "epoch": 0.12, "grad_norm": 0.46897540943464006, "learning_rate": 1.9592160734044027e-05, "loss": 0.3492, "step": 2063 }, { "epoch": 0.12, "grad_norm": 0.46889413154603504, "learning_rate": 1.9591634536566766e-05, "loss": 0.271, "step": 2064 }, { "epoch": 0.12, "grad_norm": 1.0019504066133773, "learning_rate": 1.9591108006931618e-05, "loss": 0.5227, "step": 2065 }, { "epoch": 0.12, "grad_norm": 0.4826247658886829, "learning_rate": 1.9590581145156812e-05, "loss": 0.2342, "step": 2066 }, { "epoch": 0.12, "grad_norm": 0.46222434250869515, "learning_rate": 1.959005395126059e-05, "loss": 0.2521, "step": 2067 }, { "epoch": 0.12, "grad_norm": 0.5843926885025996, "learning_rate": 1.9589526425261213e-05, "loss": 0.4092, "step": 2068 }, { "epoch": 0.12, "grad_norm": 0.8389100016932158, "learning_rate": 1.958899856717695e-05, "loss": 0.5575, "step": 2069 }, { "epoch": 0.12, "grad_norm": 0.4238709325497757, "learning_rate": 1.958847037702608e-05, "loss": 0.334, "step": 2070 }, { "epoch": 0.12, "grad_norm": 0.4507205040580907, "learning_rate": 1.9587941854826892e-05, "loss": 0.3119, "step": 2071 }, { "epoch": 0.12, "grad_norm": 0.47985631218897673, "learning_rate": 1.9587413000597687e-05, "loss": 0.3043, "step": 2072 }, { "epoch": 0.12, "grad_norm": 0.7661228750951127, "learning_rate": 1.9586883814356785e-05, "loss": 0.4541, "step": 2073 }, { "epoch": 0.12, "grad_norm": 0.3672906869250385, "learning_rate": 1.958635429612251e-05, "loss": 0.2207, "step": 2074 }, { "epoch": 0.12, "grad_norm": 0.4746221052006005, "learning_rate": 1.9585824445913194e-05, "loss": 0.3761, "step": 2075 }, { "epoch": 0.12, "grad_norm": 0.4915279326174938, "learning_rate": 1.9585294263747192e-05, "loss": 0.3161, "step": 2076 }, { "epoch": 0.12, "grad_norm": 0.49898984543053004, "learning_rate": 1.9584763749642862e-05, "loss": 0.3595, "step": 2077 }, { "epoch": 0.12, "grad_norm": 0.5341824181711823, "learning_rate": 1.9584232903618576e-05, "loss": 0.3403, "step": 2078 }, { "epoch": 0.12, "grad_norm": 0.38017236195959025, "learning_rate": 1.9583701725692716e-05, "loss": 0.3137, "step": 2079 }, { "epoch": 0.12, "grad_norm": 0.32745929971134763, "learning_rate": 1.9583170215883677e-05, "loss": 0.2298, "step": 2080 }, { "epoch": 0.12, "grad_norm": 0.8625293763295988, "learning_rate": 1.9582638374209864e-05, "loss": 0.5644, "step": 2081 }, { "epoch": 0.12, "grad_norm": 0.4296796221626104, "learning_rate": 1.9582106200689698e-05, "loss": 0.3539, "step": 2082 }, { "epoch": 0.12, "grad_norm": 0.43436138803461416, "learning_rate": 1.9581573695341607e-05, "loss": 0.3427, "step": 2083 }, { "epoch": 0.12, "grad_norm": 0.9284517598696714, "learning_rate": 1.9581040858184028e-05, "loss": 0.3858, "step": 2084 }, { "epoch": 0.12, "grad_norm": 0.38693079051285023, "learning_rate": 1.958050768923542e-05, "loss": 0.3074, "step": 2085 }, { "epoch": 0.12, "grad_norm": 0.4582866470000151, "learning_rate": 1.957997418851424e-05, "loss": 0.2932, "step": 2086 }, { "epoch": 0.12, "grad_norm": 0.7347478591787949, "learning_rate": 1.9579440356038966e-05, "loss": 0.3689, "step": 2087 }, { "epoch": 0.12, "grad_norm": 0.3424729743541756, "learning_rate": 1.9578906191828086e-05, "loss": 0.2313, "step": 2088 }, { "epoch": 0.12, "grad_norm": 0.6525889969280737, "learning_rate": 1.9578371695900097e-05, "loss": 0.4339, "step": 2089 }, { "epoch": 0.12, "grad_norm": 0.49133362050474455, "learning_rate": 1.9577836868273504e-05, "loss": 0.3199, "step": 2090 }, { "epoch": 0.12, "grad_norm": 0.46103788217869224, "learning_rate": 1.9577301708966837e-05, "loss": 0.3133, "step": 2091 }, { "epoch": 0.12, "grad_norm": 0.3968124504129748, "learning_rate": 1.9576766217998622e-05, "loss": 0.2731, "step": 2092 }, { "epoch": 0.12, "grad_norm": 0.5598574134391687, "learning_rate": 1.9576230395387403e-05, "loss": 0.3231, "step": 2093 }, { "epoch": 0.12, "grad_norm": 0.507072225088975, "learning_rate": 1.9575694241151737e-05, "loss": 0.3042, "step": 2094 }, { "epoch": 0.12, "grad_norm": 0.4229706741547905, "learning_rate": 1.9575157755310193e-05, "loss": 0.3297, "step": 2095 }, { "epoch": 0.12, "grad_norm": 1.2787555528563692, "learning_rate": 1.957462093788135e-05, "loss": 0.677, "step": 2096 }, { "epoch": 0.12, "grad_norm": 0.40537782087377044, "learning_rate": 1.957408378888379e-05, "loss": 0.245, "step": 2097 }, { "epoch": 0.12, "grad_norm": 0.6392618964501624, "learning_rate": 1.957354630833612e-05, "loss": 0.4378, "step": 2098 }, { "epoch": 0.12, "grad_norm": 0.44083595616310706, "learning_rate": 1.957300849625696e-05, "loss": 0.3578, "step": 2099 }, { "epoch": 0.12, "grad_norm": 0.277591819404161, "learning_rate": 1.9572470352664923e-05, "loss": 0.1437, "step": 2100 }, { "epoch": 0.12, "grad_norm": 0.6445522078777529, "learning_rate": 1.957193187757865e-05, "loss": 0.4184, "step": 2101 }, { "epoch": 0.12, "grad_norm": 1.06003956945609, "learning_rate": 1.9571393071016785e-05, "loss": 0.7141, "step": 2102 }, { "epoch": 0.12, "grad_norm": 0.3327901938604386, "learning_rate": 1.9570853932997993e-05, "loss": 0.2564, "step": 2103 }, { "epoch": 0.12, "grad_norm": 0.5349142642984711, "learning_rate": 1.957031446354094e-05, "loss": 0.4241, "step": 2104 }, { "epoch": 0.12, "grad_norm": 0.7450362254049, "learning_rate": 1.9569774662664306e-05, "loss": 0.5647, "step": 2105 }, { "epoch": 0.12, "grad_norm": 0.2616609861296739, "learning_rate": 1.9569234530386792e-05, "loss": 0.1297, "step": 2106 }, { "epoch": 0.12, "grad_norm": 0.4211284765636636, "learning_rate": 1.9568694066727095e-05, "loss": 0.311, "step": 2107 }, { "epoch": 0.12, "grad_norm": 1.4618973142603389, "learning_rate": 1.9568153271703932e-05, "loss": 0.8132, "step": 2108 }, { "epoch": 0.12, "grad_norm": 0.738279974487151, "learning_rate": 1.9567612145336036e-05, "loss": 0.4929, "step": 2109 }, { "epoch": 0.12, "grad_norm": 0.44960921087855577, "learning_rate": 1.9567070687642142e-05, "loss": 0.317, "step": 2110 }, { "epoch": 0.12, "grad_norm": 0.4821347287488108, "learning_rate": 1.9566528898641e-05, "loss": 0.3665, "step": 2111 }, { "epoch": 0.12, "grad_norm": 0.32569543755682334, "learning_rate": 1.9565986778351376e-05, "loss": 0.1666, "step": 2112 }, { "epoch": 0.12, "grad_norm": 0.48347428505913215, "learning_rate": 1.9565444326792038e-05, "loss": 0.2783, "step": 2113 }, { "epoch": 0.12, "grad_norm": 1.5037634230328805, "learning_rate": 1.9564901543981776e-05, "loss": 0.7917, "step": 2114 }, { "epoch": 0.12, "grad_norm": 0.3697719835295979, "learning_rate": 1.9564358429939386e-05, "loss": 0.3053, "step": 2115 }, { "epoch": 0.12, "grad_norm": 0.5447348890954887, "learning_rate": 1.9563814984683674e-05, "loss": 0.3506, "step": 2116 }, { "epoch": 0.12, "grad_norm": 1.304263404947526, "learning_rate": 1.9563271208233462e-05, "loss": 0.8144, "step": 2117 }, { "epoch": 0.12, "grad_norm": 0.3159438724391187, "learning_rate": 1.9562727100607577e-05, "loss": 0.1621, "step": 2118 }, { "epoch": 0.12, "grad_norm": 0.41587057997986904, "learning_rate": 1.956218266182486e-05, "loss": 0.2664, "step": 2119 }, { "epoch": 0.12, "grad_norm": 1.0253769563866983, "learning_rate": 1.9561637891904176e-05, "loss": 0.568, "step": 2120 }, { "epoch": 0.12, "grad_norm": 0.6434762446373379, "learning_rate": 1.9561092790864376e-05, "loss": 0.4318, "step": 2121 }, { "epoch": 0.12, "grad_norm": 0.5164150783625712, "learning_rate": 1.956054735872435e-05, "loss": 0.3499, "step": 2122 }, { "epoch": 0.12, "grad_norm": 0.437135421408849, "learning_rate": 1.9560001595502978e-05, "loss": 0.3035, "step": 2123 }, { "epoch": 0.12, "grad_norm": 0.3042150377766891, "learning_rate": 1.955945550121916e-05, "loss": 0.1825, "step": 2124 }, { "epoch": 0.12, "grad_norm": 0.506806034858422, "learning_rate": 1.9558909075891812e-05, "loss": 0.3272, "step": 2125 }, { "epoch": 0.12, "grad_norm": 0.47732056135622875, "learning_rate": 1.955836231953985e-05, "loss": 0.3269, "step": 2126 }, { "epoch": 0.12, "grad_norm": 0.968804033210661, "learning_rate": 1.9557815232182216e-05, "loss": 0.4565, "step": 2127 }, { "epoch": 0.12, "grad_norm": 0.3925570608975105, "learning_rate": 1.9557267813837848e-05, "loss": 0.3436, "step": 2128 }, { "epoch": 0.12, "grad_norm": 0.9226390952793319, "learning_rate": 1.955672006452571e-05, "loss": 0.4685, "step": 2129 }, { "epoch": 0.12, "grad_norm": 0.4344185349778874, "learning_rate": 1.9556171984264765e-05, "loss": 0.2778, "step": 2130 }, { "epoch": 0.12, "grad_norm": 0.3822100632725823, "learning_rate": 1.9555623573073994e-05, "loss": 0.2504, "step": 2131 }, { "epoch": 0.12, "grad_norm": 1.166974059789117, "learning_rate": 1.955507483097239e-05, "loss": 0.548, "step": 2132 }, { "epoch": 0.12, "grad_norm": 0.7183519526044224, "learning_rate": 1.9554525757978958e-05, "loss": 0.4007, "step": 2133 }, { "epoch": 0.12, "grad_norm": 0.40625089003812276, "learning_rate": 1.955397635411271e-05, "loss": 0.3013, "step": 2134 }, { "epoch": 0.12, "grad_norm": 0.6427276061652427, "learning_rate": 1.955342661939267e-05, "loss": 0.4044, "step": 2135 }, { "epoch": 0.12, "grad_norm": 0.7481016203756692, "learning_rate": 1.9552876553837878e-05, "loss": 0.2465, "step": 2136 }, { "epoch": 0.12, "grad_norm": 0.4078942359314576, "learning_rate": 1.955232615746738e-05, "loss": 0.2344, "step": 2137 }, { "epoch": 0.12, "grad_norm": 0.9841112838622158, "learning_rate": 1.9551775430300238e-05, "loss": 0.4383, "step": 2138 }, { "epoch": 0.12, "grad_norm": 0.5783608548387257, "learning_rate": 1.9551224372355523e-05, "loss": 0.2947, "step": 2139 }, { "epoch": 0.12, "grad_norm": 0.513156088021875, "learning_rate": 1.955067298365232e-05, "loss": 0.3523, "step": 2140 }, { "epoch": 0.12, "grad_norm": 1.4460222044906592, "learning_rate": 1.9550121264209724e-05, "loss": 0.8275, "step": 2141 }, { "epoch": 0.12, "grad_norm": 0.47796263241095566, "learning_rate": 1.9549569214046837e-05, "loss": 0.261, "step": 2142 }, { "epoch": 0.12, "grad_norm": 0.5068551991890565, "learning_rate": 1.954901683318278e-05, "loss": 0.2742, "step": 2143 }, { "epoch": 0.12, "grad_norm": 0.7854309469545647, "learning_rate": 1.9548464121636678e-05, "loss": 0.39, "step": 2144 }, { "epoch": 0.12, "grad_norm": 0.9562992698117687, "learning_rate": 1.954791107942768e-05, "loss": 0.4997, "step": 2145 }, { "epoch": 0.12, "grad_norm": 0.49884170414180384, "learning_rate": 1.9547357706574926e-05, "loss": 0.2523, "step": 2146 }, { "epoch": 0.12, "grad_norm": 0.43289131925381763, "learning_rate": 1.9546804003097588e-05, "loss": 0.3589, "step": 2147 }, { "epoch": 0.12, "grad_norm": 1.1386748667131321, "learning_rate": 1.9546249969014836e-05, "loss": 0.673, "step": 2148 }, { "epoch": 0.12, "grad_norm": 0.478383355031457, "learning_rate": 1.954569560434586e-05, "loss": 0.283, "step": 2149 }, { "epoch": 0.12, "grad_norm": 0.5539994701491447, "learning_rate": 1.9545140909109854e-05, "loss": 0.3762, "step": 2150 }, { "epoch": 0.12, "grad_norm": 0.35583342252654265, "learning_rate": 1.954458588332603e-05, "loss": 0.168, "step": 2151 }, { "epoch": 0.12, "grad_norm": 0.47482346170901524, "learning_rate": 1.9544030527013603e-05, "loss": 0.236, "step": 2152 }, { "epoch": 0.12, "grad_norm": 1.488695547220567, "learning_rate": 1.9543474840191817e-05, "loss": 0.7431, "step": 2153 }, { "epoch": 0.12, "grad_norm": 0.4557534247583365, "learning_rate": 1.9542918822879902e-05, "loss": 0.3779, "step": 2154 }, { "epoch": 0.12, "grad_norm": 0.33288832250828193, "learning_rate": 1.954236247509712e-05, "loss": 0.2592, "step": 2155 }, { "epoch": 0.12, "grad_norm": 0.4796173843723215, "learning_rate": 1.954180579686274e-05, "loss": 0.3337, "step": 2156 }, { "epoch": 0.12, "grad_norm": 0.4089067855760762, "learning_rate": 1.954124878819603e-05, "loss": 0.2759, "step": 2157 }, { "epoch": 0.12, "grad_norm": 0.4690219866747868, "learning_rate": 1.9540691449116286e-05, "loss": 0.2872, "step": 2158 }, { "epoch": 0.12, "grad_norm": 0.445945068109367, "learning_rate": 1.9540133779642806e-05, "loss": 0.3255, "step": 2159 }, { "epoch": 0.12, "grad_norm": 0.620237870223999, "learning_rate": 1.9539575779794906e-05, "loss": 0.4513, "step": 2160 }, { "epoch": 0.12, "grad_norm": 0.4622798158558348, "learning_rate": 1.9539017449591905e-05, "loss": 0.3245, "step": 2161 }, { "epoch": 0.12, "grad_norm": 0.40301614161542826, "learning_rate": 1.9538458789053143e-05, "loss": 0.3361, "step": 2162 }, { "epoch": 0.12, "grad_norm": 0.4533281560660749, "learning_rate": 1.9537899798197963e-05, "loss": 0.2963, "step": 2163 }, { "epoch": 0.12, "grad_norm": 0.4209930222368525, "learning_rate": 1.953734047704572e-05, "loss": 0.281, "step": 2164 }, { "epoch": 0.12, "grad_norm": 0.36565295658400504, "learning_rate": 1.9536780825615788e-05, "loss": 0.2151, "step": 2165 }, { "epoch": 0.12, "grad_norm": 0.4422205941031057, "learning_rate": 1.9536220843927544e-05, "loss": 0.3669, "step": 2166 }, { "epoch": 0.12, "grad_norm": 0.4385665259665185, "learning_rate": 1.953566053200039e-05, "loss": 0.3155, "step": 2167 }, { "epoch": 0.12, "grad_norm": 0.6424769906790054, "learning_rate": 1.953509988985371e-05, "loss": 0.4169, "step": 2168 }, { "epoch": 0.12, "grad_norm": 1.194460975030824, "learning_rate": 1.953453891750694e-05, "loss": 0.7403, "step": 2169 }, { "epoch": 0.12, "grad_norm": 0.3717051558823366, "learning_rate": 1.9533977614979493e-05, "loss": 0.2938, "step": 2170 }, { "epoch": 0.12, "grad_norm": 0.3201587539393018, "learning_rate": 1.9533415982290813e-05, "loss": 0.2192, "step": 2171 }, { "epoch": 0.12, "grad_norm": 0.7368872184262916, "learning_rate": 1.9532854019460346e-05, "loss": 0.4229, "step": 2172 }, { "epoch": 0.12, "grad_norm": 0.44181930693740384, "learning_rate": 1.9532291726507557e-05, "loss": 0.2848, "step": 2173 }, { "epoch": 0.12, "grad_norm": 0.4664201908407062, "learning_rate": 1.9531729103451912e-05, "loss": 0.35, "step": 2174 }, { "epoch": 0.12, "grad_norm": 1.454446688689338, "learning_rate": 1.9531166150312902e-05, "loss": 0.605, "step": 2175 }, { "epoch": 0.13, "grad_norm": 0.46776768368543914, "learning_rate": 1.9530602867110015e-05, "loss": 0.2971, "step": 2176 }, { "epoch": 0.13, "grad_norm": 0.3477231628548521, "learning_rate": 1.953003925386276e-05, "loss": 0.2378, "step": 2177 }, { "epoch": 0.13, "grad_norm": 0.5094617271180392, "learning_rate": 1.9529475310590656e-05, "loss": 0.3366, "step": 2178 }, { "epoch": 0.13, "grad_norm": 0.46931079454798147, "learning_rate": 1.9528911037313233e-05, "loss": 0.3177, "step": 2179 }, { "epoch": 0.13, "grad_norm": 1.178746644251515, "learning_rate": 1.952834643405003e-05, "loss": 0.6243, "step": 2180 }, { "epoch": 0.13, "grad_norm": 1.9884192839837473, "learning_rate": 1.95277815008206e-05, "loss": 0.5839, "step": 2181 }, { "epoch": 0.13, "grad_norm": 0.4751766716695899, "learning_rate": 1.9527216237644508e-05, "loss": 0.2844, "step": 2182 }, { "epoch": 0.13, "grad_norm": 0.587711364919638, "learning_rate": 1.9526650644541326e-05, "loss": 0.3622, "step": 2183 }, { "epoch": 0.13, "grad_norm": 0.8459515983868039, "learning_rate": 1.952608472153064e-05, "loss": 0.3906, "step": 2184 }, { "epoch": 0.13, "grad_norm": 0.4486221294280846, "learning_rate": 1.952551846863205e-05, "loss": 0.2203, "step": 2185 }, { "epoch": 0.13, "grad_norm": 0.6553354179919787, "learning_rate": 1.9524951885865165e-05, "loss": 0.3764, "step": 2186 }, { "epoch": 0.13, "grad_norm": 1.830584238414141, "learning_rate": 1.952438497324961e-05, "loss": 0.7863, "step": 2187 }, { "epoch": 0.13, "grad_norm": 0.5252954020431466, "learning_rate": 1.9523817730805008e-05, "loss": 0.2538, "step": 2188 }, { "epoch": 0.13, "grad_norm": 0.8584255441697742, "learning_rate": 1.952325015855101e-05, "loss": 0.5336, "step": 2189 }, { "epoch": 0.13, "grad_norm": 0.3471580515229295, "learning_rate": 1.9522682256507268e-05, "loss": 0.2511, "step": 2190 }, { "epoch": 0.13, "grad_norm": 0.4088705601951199, "learning_rate": 1.952211402469345e-05, "loss": 0.2464, "step": 2191 }, { "epoch": 0.13, "grad_norm": 1.2705970535049596, "learning_rate": 1.952154546312923e-05, "loss": 0.6611, "step": 2192 }, { "epoch": 0.13, "grad_norm": 0.6454425353434776, "learning_rate": 1.9520976571834304e-05, "loss": 0.4209, "step": 2193 }, { "epoch": 0.13, "grad_norm": 0.44634869722763765, "learning_rate": 1.9520407350828364e-05, "loss": 0.2355, "step": 2194 }, { "epoch": 0.13, "grad_norm": 0.7939220131831405, "learning_rate": 1.9519837800131127e-05, "loss": 0.4377, "step": 2195 }, { "epoch": 0.13, "grad_norm": 0.3147220383048209, "learning_rate": 1.9519267919762318e-05, "loss": 0.1702, "step": 2196 }, { "epoch": 0.13, "grad_norm": 0.703105533799731, "learning_rate": 1.951869770974167e-05, "loss": 0.3809, "step": 2197 }, { "epoch": 0.13, "grad_norm": 0.47335849847144024, "learning_rate": 1.951812717008893e-05, "loss": 0.2881, "step": 2198 }, { "epoch": 0.13, "grad_norm": 1.1606156210211465, "learning_rate": 1.951755630082386e-05, "loss": 0.7156, "step": 2199 }, { "epoch": 0.13, "grad_norm": 0.45947436250339385, "learning_rate": 1.9516985101966218e-05, "loss": 0.3269, "step": 2200 }, { "epoch": 0.13, "grad_norm": 0.43358484416349463, "learning_rate": 1.9516413573535794e-05, "loss": 0.3523, "step": 2201 }, { "epoch": 0.13, "grad_norm": 0.5472429483088781, "learning_rate": 1.9515841715552376e-05, "loss": 0.3946, "step": 2202 }, { "epoch": 0.13, "grad_norm": 0.34276882389222835, "learning_rate": 1.951526952803577e-05, "loss": 0.1918, "step": 2203 }, { "epoch": 0.13, "grad_norm": 0.5604731992436495, "learning_rate": 1.951469701100579e-05, "loss": 0.3188, "step": 2204 }, { "epoch": 0.13, "grad_norm": 0.5485943753095354, "learning_rate": 1.951412416448226e-05, "loss": 0.4226, "step": 2205 }, { "epoch": 0.13, "grad_norm": 0.38949595158304373, "learning_rate": 1.951355098848502e-05, "loss": 0.3067, "step": 2206 }, { "epoch": 0.13, "grad_norm": 0.5935212725749861, "learning_rate": 1.9512977483033916e-05, "loss": 0.4256, "step": 2207 }, { "epoch": 0.13, "grad_norm": 0.3030094098212264, "learning_rate": 1.9512403648148813e-05, "loss": 0.1428, "step": 2208 }, { "epoch": 0.13, "grad_norm": 0.3956227850830546, "learning_rate": 1.951182948384958e-05, "loss": 0.2861, "step": 2209 }, { "epoch": 0.13, "grad_norm": 0.4697965880003851, "learning_rate": 1.9511254990156103e-05, "loss": 0.3211, "step": 2210 }, { "epoch": 0.13, "grad_norm": 0.9172394638687885, "learning_rate": 1.9510680167088275e-05, "loss": 0.4232, "step": 2211 }, { "epoch": 0.13, "grad_norm": 0.43535146422603604, "learning_rate": 1.9510105014665998e-05, "loss": 0.3297, "step": 2212 }, { "epoch": 0.13, "grad_norm": 0.5773882269858571, "learning_rate": 1.9509529532909196e-05, "loss": 0.4388, "step": 2213 }, { "epoch": 0.13, "grad_norm": 0.3137998718551292, "learning_rate": 1.9508953721837795e-05, "loss": 0.2522, "step": 2214 }, { "epoch": 0.13, "grad_norm": 0.35939192870248576, "learning_rate": 1.9508377581471735e-05, "loss": 0.1858, "step": 2215 }, { "epoch": 0.13, "grad_norm": 0.5116306737278765, "learning_rate": 1.9507801111830967e-05, "loss": 0.3576, "step": 2216 }, { "epoch": 0.13, "grad_norm": 0.5731809416440271, "learning_rate": 1.950722431293546e-05, "loss": 0.3353, "step": 2217 }, { "epoch": 0.13, "grad_norm": 0.47304259161573736, "learning_rate": 1.950664718480518e-05, "loss": 0.3236, "step": 2218 }, { "epoch": 0.13, "grad_norm": 0.6276525740396246, "learning_rate": 1.9506069727460116e-05, "loss": 0.4444, "step": 2219 }, { "epoch": 0.13, "grad_norm": 1.090832100391312, "learning_rate": 1.9505491940920268e-05, "loss": 0.6109, "step": 2220 }, { "epoch": 0.13, "grad_norm": 0.2481407623113843, "learning_rate": 1.9504913825205643e-05, "loss": 0.1352, "step": 2221 }, { "epoch": 0.13, "grad_norm": 0.4448099721555093, "learning_rate": 1.950433538033626e-05, "loss": 0.3269, "step": 2222 }, { "epoch": 0.13, "grad_norm": 0.866009184797271, "learning_rate": 1.950375660633215e-05, "loss": 0.55, "step": 2223 }, { "epoch": 0.13, "grad_norm": 0.4384851116001498, "learning_rate": 1.950317750321336e-05, "loss": 0.2842, "step": 2224 }, { "epoch": 0.13, "grad_norm": 0.6313666828316659, "learning_rate": 1.950259807099994e-05, "loss": 0.3842, "step": 2225 }, { "epoch": 0.13, "grad_norm": 0.4331107293530284, "learning_rate": 1.950201830971196e-05, "loss": 0.3443, "step": 2226 }, { "epoch": 0.13, "grad_norm": 0.23533665688629704, "learning_rate": 1.9501438219369492e-05, "loss": 0.1392, "step": 2227 }, { "epoch": 0.13, "grad_norm": 0.6139970636009742, "learning_rate": 1.9500857799992628e-05, "loss": 0.4041, "step": 2228 }, { "epoch": 0.13, "grad_norm": 0.48155996698312703, "learning_rate": 1.9500277051601465e-05, "loss": 0.3663, "step": 2229 }, { "epoch": 0.13, "grad_norm": 0.4152605894643503, "learning_rate": 1.949969597421612e-05, "loss": 0.2154, "step": 2230 }, { "epoch": 0.13, "grad_norm": 0.6361750555191724, "learning_rate": 1.9499114567856708e-05, "loss": 0.4438, "step": 2231 }, { "epoch": 0.13, "grad_norm": 1.2016891543780488, "learning_rate": 1.9498532832543372e-05, "loss": 0.6434, "step": 2232 }, { "epoch": 0.13, "grad_norm": 0.39561891747852557, "learning_rate": 1.9497950768296246e-05, "loss": 0.2549, "step": 2233 }, { "epoch": 0.13, "grad_norm": 0.31869872802169985, "learning_rate": 1.9497368375135497e-05, "loss": 0.2302, "step": 2234 }, { "epoch": 0.13, "grad_norm": 0.8584671892353136, "learning_rate": 1.949678565308129e-05, "loss": 0.4976, "step": 2235 }, { "epoch": 0.13, "grad_norm": 0.8802500200900603, "learning_rate": 1.9496202602153805e-05, "loss": 0.4712, "step": 2236 }, { "epoch": 0.13, "grad_norm": 0.35855578518631354, "learning_rate": 1.949561922237323e-05, "loss": 0.2521, "step": 2237 }, { "epoch": 0.13, "grad_norm": 0.5343010087481445, "learning_rate": 1.9495035513759772e-05, "loss": 0.386, "step": 2238 }, { "epoch": 0.13, "grad_norm": 0.3763359873365377, "learning_rate": 1.9494451476333637e-05, "loss": 0.2417, "step": 2239 }, { "epoch": 0.13, "grad_norm": 0.7266720426527591, "learning_rate": 1.949386711011506e-05, "loss": 0.3591, "step": 2240 }, { "epoch": 0.13, "grad_norm": 0.3434275838027053, "learning_rate": 1.9493282415124274e-05, "loss": 0.279, "step": 2241 }, { "epoch": 0.13, "grad_norm": 0.793014395205871, "learning_rate": 1.9492697391381523e-05, "loss": 0.3409, "step": 2242 }, { "epoch": 0.13, "grad_norm": 0.4304945592429074, "learning_rate": 1.949211203890707e-05, "loss": 0.2789, "step": 2243 }, { "epoch": 0.13, "grad_norm": 0.8892252947519116, "learning_rate": 1.949152635772119e-05, "loss": 0.617, "step": 2244 }, { "epoch": 0.13, "grad_norm": 0.362436878968347, "learning_rate": 1.9490940347844156e-05, "loss": 0.3207, "step": 2245 }, { "epoch": 0.13, "grad_norm": 0.44349164878713104, "learning_rate": 1.9490354009296268e-05, "loss": 0.3616, "step": 2246 }, { "epoch": 0.13, "grad_norm": 0.3274107448446957, "learning_rate": 1.9489767342097824e-05, "loss": 0.2196, "step": 2247 }, { "epoch": 0.13, "grad_norm": 0.8783237744052049, "learning_rate": 1.948918034626915e-05, "loss": 0.5678, "step": 2248 }, { "epoch": 0.13, "grad_norm": 0.43709607755935653, "learning_rate": 1.948859302183057e-05, "loss": 0.329, "step": 2249 }, { "epoch": 0.13, "grad_norm": 0.4099301199962942, "learning_rate": 1.9488005368802415e-05, "loss": 0.2837, "step": 2250 }, { "epoch": 0.13, "grad_norm": 0.5420335066249002, "learning_rate": 1.948741738720505e-05, "loss": 0.4656, "step": 2251 }, { "epoch": 0.13, "grad_norm": 0.3731959010848403, "learning_rate": 1.9486829077058823e-05, "loss": 0.3333, "step": 2252 }, { "epoch": 0.13, "grad_norm": 0.2855765128398837, "learning_rate": 1.9486240438384114e-05, "loss": 0.2129, "step": 2253 }, { "epoch": 0.13, "grad_norm": 0.8666950180014855, "learning_rate": 1.9485651471201306e-05, "loss": 0.4876, "step": 2254 }, { "epoch": 0.13, "grad_norm": 0.46097536285247287, "learning_rate": 1.94850621755308e-05, "loss": 0.3323, "step": 2255 }, { "epoch": 0.13, "grad_norm": 0.7361756550595048, "learning_rate": 1.9484472551392993e-05, "loss": 0.4123, "step": 2256 }, { "epoch": 0.13, "grad_norm": 0.3658556105623814, "learning_rate": 1.9483882598808315e-05, "loss": 0.3458, "step": 2257 }, { "epoch": 0.13, "grad_norm": 0.44969033758873034, "learning_rate": 1.948329231779719e-05, "loss": 0.3461, "step": 2258 }, { "epoch": 0.13, "grad_norm": 0.5560341222882007, "learning_rate": 1.9482701708380056e-05, "loss": 0.2802, "step": 2259 }, { "epoch": 0.13, "grad_norm": 0.4553291061872389, "learning_rate": 1.948211077057737e-05, "loss": 0.2485, "step": 2260 }, { "epoch": 0.13, "grad_norm": 0.4033237221641209, "learning_rate": 1.9481519504409596e-05, "loss": 0.2866, "step": 2261 }, { "epoch": 0.13, "grad_norm": 0.5367463676843832, "learning_rate": 1.948092790989721e-05, "loss": 0.3968, "step": 2262 }, { "epoch": 0.13, "grad_norm": 0.5626236268127532, "learning_rate": 1.94803359870607e-05, "loss": 0.3514, "step": 2263 }, { "epoch": 0.13, "grad_norm": 0.4338608687064876, "learning_rate": 1.947974373592056e-05, "loss": 0.3034, "step": 2264 }, { "epoch": 0.13, "grad_norm": 0.4131007253237356, "learning_rate": 1.9479151156497303e-05, "loss": 0.3456, "step": 2265 }, { "epoch": 0.13, "grad_norm": 0.7775496063551903, "learning_rate": 1.9478558248811448e-05, "loss": 0.3321, "step": 2266 }, { "epoch": 0.13, "grad_norm": 0.35258832797577216, "learning_rate": 1.947796501288353e-05, "loss": 0.2487, "step": 2267 }, { "epoch": 0.13, "grad_norm": 0.6701083819158502, "learning_rate": 1.947737144873409e-05, "loss": 0.3394, "step": 2268 }, { "epoch": 0.13, "grad_norm": 0.43898287408668396, "learning_rate": 1.9476777556383685e-05, "loss": 0.3219, "step": 2269 }, { "epoch": 0.13, "grad_norm": 0.3596482929485019, "learning_rate": 1.947618333585288e-05, "loss": 0.2842, "step": 2270 }, { "epoch": 0.13, "grad_norm": 1.1019436057037184, "learning_rate": 1.947558878716225e-05, "loss": 0.7142, "step": 2271 }, { "epoch": 0.13, "grad_norm": 0.5411126947410136, "learning_rate": 1.9474993910332394e-05, "loss": 0.3461, "step": 2272 }, { "epoch": 0.13, "grad_norm": 0.40898556290402976, "learning_rate": 1.9474398705383904e-05, "loss": 0.2542, "step": 2273 }, { "epoch": 0.13, "grad_norm": 0.6471286583545248, "learning_rate": 1.9473803172337396e-05, "loss": 0.3206, "step": 2274 }, { "epoch": 0.13, "grad_norm": 0.8779107686075143, "learning_rate": 1.947320731121349e-05, "loss": 0.5398, "step": 2275 }, { "epoch": 0.13, "grad_norm": 0.3532457572292047, "learning_rate": 1.947261112203282e-05, "loss": 0.2389, "step": 2276 }, { "epoch": 0.13, "grad_norm": 0.4493598183529648, "learning_rate": 1.947201460481604e-05, "loss": 0.3389, "step": 2277 }, { "epoch": 0.13, "grad_norm": 1.2898301269699424, "learning_rate": 1.9471417759583796e-05, "loss": 0.7665, "step": 2278 }, { "epoch": 0.13, "grad_norm": 0.465301976628202, "learning_rate": 1.9470820586356763e-05, "loss": 0.2486, "step": 2279 }, { "epoch": 0.13, "grad_norm": 0.8359470240166336, "learning_rate": 1.9470223085155622e-05, "loss": 0.3916, "step": 2280 }, { "epoch": 0.13, "grad_norm": 0.36417049272575086, "learning_rate": 1.946962525600106e-05, "loss": 0.2749, "step": 2281 }, { "epoch": 0.13, "grad_norm": 0.39093612328850025, "learning_rate": 1.9469027098913787e-05, "loss": 0.2384, "step": 2282 }, { "epoch": 0.13, "grad_norm": 1.4785923875213445, "learning_rate": 1.946842861391451e-05, "loss": 0.876, "step": 2283 }, { "epoch": 0.13, "grad_norm": 0.49453376260267884, "learning_rate": 1.9467829801023957e-05, "loss": 0.3659, "step": 2284 }, { "epoch": 0.13, "grad_norm": 0.5000531898361844, "learning_rate": 1.9467230660262864e-05, "loss": 0.3059, "step": 2285 }, { "epoch": 0.13, "grad_norm": 0.5643620845088193, "learning_rate": 1.9466631191651984e-05, "loss": 0.305, "step": 2286 }, { "epoch": 0.13, "grad_norm": 0.4382366231139346, "learning_rate": 1.9466031395212073e-05, "loss": 0.3067, "step": 2287 }, { "epoch": 0.13, "grad_norm": 0.40747576339356095, "learning_rate": 1.9465431270963898e-05, "loss": 0.2737, "step": 2288 }, { "epoch": 0.13, "grad_norm": 0.4389132877484424, "learning_rate": 1.9464830818928247e-05, "loss": 0.2912, "step": 2289 }, { "epoch": 0.13, "grad_norm": 0.8213346014038311, "learning_rate": 1.946423003912591e-05, "loss": 0.648, "step": 2290 }, { "epoch": 0.13, "grad_norm": 0.3995884995275768, "learning_rate": 1.94636289315777e-05, "loss": 0.3271, "step": 2291 }, { "epoch": 0.13, "grad_norm": 0.8771463518079066, "learning_rate": 1.946302749630442e-05, "loss": 0.4139, "step": 2292 }, { "epoch": 0.13, "grad_norm": 0.3162617732642643, "learning_rate": 1.9462425733326906e-05, "loss": 0.2405, "step": 2293 }, { "epoch": 0.13, "grad_norm": 0.40209065551847967, "learning_rate": 1.9461823642666e-05, "loss": 0.2874, "step": 2294 }, { "epoch": 0.13, "grad_norm": 0.9720138998527125, "learning_rate": 1.9461221224342544e-05, "loss": 0.5947, "step": 2295 }, { "epoch": 0.13, "grad_norm": 0.46874994093569994, "learning_rate": 1.9460618478377406e-05, "loss": 0.342, "step": 2296 }, { "epoch": 0.13, "grad_norm": 0.4541039308298201, "learning_rate": 1.9460015404791456e-05, "loss": 0.3137, "step": 2297 }, { "epoch": 0.13, "grad_norm": 0.9474289796905376, "learning_rate": 1.9459412003605577e-05, "loss": 0.595, "step": 2298 }, { "epoch": 0.13, "grad_norm": 0.3041258639698369, "learning_rate": 1.945880827484067e-05, "loss": 0.1296, "step": 2299 }, { "epoch": 0.13, "grad_norm": 0.44826921009046905, "learning_rate": 1.9458204218517638e-05, "loss": 0.2813, "step": 2300 }, { "epoch": 0.13, "grad_norm": 0.4353294044673283, "learning_rate": 1.94575998346574e-05, "loss": 0.3258, "step": 2301 }, { "epoch": 0.13, "grad_norm": 0.9102026658374495, "learning_rate": 1.945699512328089e-05, "loss": 0.4023, "step": 2302 }, { "epoch": 0.13, "grad_norm": 0.4436564510925583, "learning_rate": 1.9456390084409044e-05, "loss": 0.3012, "step": 2303 }, { "epoch": 0.13, "grad_norm": 0.8056583922169369, "learning_rate": 1.9455784718062813e-05, "loss": 0.5757, "step": 2304 }, { "epoch": 0.13, "grad_norm": 0.2843505676702236, "learning_rate": 1.9455179024263166e-05, "loss": 0.1952, "step": 2305 }, { "epoch": 0.13, "grad_norm": 0.42052209498560444, "learning_rate": 1.9454573003031078e-05, "loss": 0.2669, "step": 2306 }, { "epoch": 0.13, "grad_norm": 0.9245941501277933, "learning_rate": 1.945396665438753e-05, "loss": 0.6041, "step": 2307 }, { "epoch": 0.13, "grad_norm": 0.5978445007136682, "learning_rate": 1.9453359978353524e-05, "loss": 0.4349, "step": 2308 }, { "epoch": 0.13, "grad_norm": 0.32742532663412943, "learning_rate": 1.945275297495007e-05, "loss": 0.2599, "step": 2309 }, { "epoch": 0.13, "grad_norm": 0.9431148146992495, "learning_rate": 1.9452145644198185e-05, "loss": 0.6554, "step": 2310 }, { "epoch": 0.13, "grad_norm": 0.3051067841482634, "learning_rate": 1.9451537986118904e-05, "loss": 0.1965, "step": 2311 }, { "epoch": 0.13, "grad_norm": 0.4158374417954412, "learning_rate": 1.945093000073327e-05, "loss": 0.2121, "step": 2312 }, { "epoch": 0.13, "grad_norm": 0.4787539021841934, "learning_rate": 1.9450321688062336e-05, "loss": 0.3388, "step": 2313 }, { "epoch": 0.13, "grad_norm": 1.1867093489703773, "learning_rate": 1.944971304812717e-05, "loss": 0.5359, "step": 2314 }, { "epoch": 0.13, "grad_norm": 0.31635180255683376, "learning_rate": 1.9449104080948842e-05, "loss": 0.2358, "step": 2315 }, { "epoch": 0.13, "grad_norm": 1.9261529169569618, "learning_rate": 1.9448494786548448e-05, "loss": 0.8326, "step": 2316 }, { "epoch": 0.13, "grad_norm": 0.34949286273506625, "learning_rate": 1.944788516494709e-05, "loss": 0.2497, "step": 2317 }, { "epoch": 0.13, "grad_norm": 0.4379583841036261, "learning_rate": 1.944727521616587e-05, "loss": 0.2183, "step": 2318 }, { "epoch": 0.13, "grad_norm": 0.8861251508067872, "learning_rate": 1.9446664940225917e-05, "loss": 0.4202, "step": 2319 }, { "epoch": 0.13, "grad_norm": 0.5479306060921134, "learning_rate": 1.9446054337148364e-05, "loss": 0.3957, "step": 2320 }, { "epoch": 0.13, "grad_norm": 0.8068260676833023, "learning_rate": 1.9445443406954357e-05, "loss": 0.4384, "step": 2321 }, { "epoch": 0.13, "grad_norm": 0.4521357323513071, "learning_rate": 1.9444832149665048e-05, "loss": 0.285, "step": 2322 }, { "epoch": 0.13, "grad_norm": 0.3373624792377293, "learning_rate": 1.944422056530161e-05, "loss": 0.1889, "step": 2323 }, { "epoch": 0.13, "grad_norm": 0.5131559776901216, "learning_rate": 1.944360865388522e-05, "loss": 0.3261, "step": 2324 }, { "epoch": 0.13, "grad_norm": 0.4789087105248987, "learning_rate": 1.9442996415437066e-05, "loss": 0.3175, "step": 2325 }, { "epoch": 0.13, "grad_norm": 1.0279162829610848, "learning_rate": 1.9442383849978354e-05, "loss": 0.6445, "step": 2326 }, { "epoch": 0.13, "grad_norm": 0.5037670907171229, "learning_rate": 1.9441770957530295e-05, "loss": 0.3263, "step": 2327 }, { "epoch": 0.13, "grad_norm": 0.4267882635388313, "learning_rate": 1.9441157738114114e-05, "loss": 0.2697, "step": 2328 }, { "epoch": 0.13, "grad_norm": 0.3900503798338192, "learning_rate": 1.9440544191751046e-05, "loss": 0.2927, "step": 2329 }, { "epoch": 0.13, "grad_norm": 0.3245214667402623, "learning_rate": 1.943993031846234e-05, "loss": 0.2375, "step": 2330 }, { "epoch": 0.13, "grad_norm": 0.7409088978080296, "learning_rate": 1.9439316118269248e-05, "loss": 0.3682, "step": 2331 }, { "epoch": 0.13, "grad_norm": 0.5008460271734648, "learning_rate": 1.943870159119305e-05, "loss": 0.3801, "step": 2332 }, { "epoch": 0.13, "grad_norm": 0.41324478033839196, "learning_rate": 1.9438086737255022e-05, "loss": 0.2974, "step": 2333 }, { "epoch": 0.13, "grad_norm": 1.0615661938603904, "learning_rate": 1.9437471556476454e-05, "loss": 0.6605, "step": 2334 }, { "epoch": 0.13, "grad_norm": 0.7578715878324432, "learning_rate": 1.9436856048878653e-05, "loss": 0.4057, "step": 2335 }, { "epoch": 0.13, "grad_norm": 0.4578545746760707, "learning_rate": 1.943624021448293e-05, "loss": 0.2886, "step": 2336 }, { "epoch": 0.13, "grad_norm": 0.46684580870173853, "learning_rate": 1.9435624053310617e-05, "loss": 0.335, "step": 2337 }, { "epoch": 0.13, "grad_norm": 0.2697068340642194, "learning_rate": 1.943500756538305e-05, "loss": 0.1427, "step": 2338 }, { "epoch": 0.13, "grad_norm": 0.620415357649466, "learning_rate": 1.943439075072157e-05, "loss": 0.2752, "step": 2339 }, { "epoch": 0.13, "grad_norm": 0.4502906967970935, "learning_rate": 1.9433773609347553e-05, "loss": 0.3357, "step": 2340 }, { "epoch": 0.13, "grad_norm": 0.5019698091806195, "learning_rate": 1.9433156141282356e-05, "loss": 0.3581, "step": 2341 }, { "epoch": 0.13, "grad_norm": 0.6175844702796243, "learning_rate": 1.943253834654737e-05, "loss": 0.4041, "step": 2342 }, { "epoch": 0.13, "grad_norm": 0.365708234558843, "learning_rate": 1.9431920225163984e-05, "loss": 0.2691, "step": 2343 }, { "epoch": 0.13, "grad_norm": 0.45935395249114086, "learning_rate": 1.9431301777153607e-05, "loss": 0.2848, "step": 2344 }, { "epoch": 0.13, "grad_norm": 0.44585991482447607, "learning_rate": 1.9430683002537657e-05, "loss": 0.327, "step": 2345 }, { "epoch": 0.13, "grad_norm": 0.3728807303254334, "learning_rate": 1.9430063901337562e-05, "loss": 0.3105, "step": 2346 }, { "epoch": 0.13, "grad_norm": 0.7637717215205987, "learning_rate": 1.9429444473574753e-05, "loss": 0.5145, "step": 2347 }, { "epoch": 0.13, "grad_norm": 0.4234374308721751, "learning_rate": 1.942882471927069e-05, "loss": 0.2724, "step": 2348 }, { "epoch": 0.13, "grad_norm": 0.569665903816151, "learning_rate": 1.9428204638446834e-05, "loss": 0.412, "step": 2349 }, { "epoch": 0.14, "grad_norm": 0.4707793907074113, "learning_rate": 1.9427584231124656e-05, "loss": 0.3092, "step": 2350 }, { "epoch": 0.14, "grad_norm": 0.2978071148204672, "learning_rate": 1.942696349732564e-05, "loss": 0.1615, "step": 2351 }, { "epoch": 0.14, "grad_norm": 0.4934235765164325, "learning_rate": 1.9426342437071287e-05, "loss": 0.3496, "step": 2352 }, { "epoch": 0.14, "grad_norm": 0.5296265448238974, "learning_rate": 1.94257210503831e-05, "loss": 0.4341, "step": 2353 }, { "epoch": 0.14, "grad_norm": 0.4039824220929225, "learning_rate": 1.9425099337282596e-05, "loss": 0.2677, "step": 2354 }, { "epoch": 0.14, "grad_norm": 0.46538885633625465, "learning_rate": 1.942447729779131e-05, "loss": 0.3583, "step": 2355 }, { "epoch": 0.14, "grad_norm": 0.5022552896111364, "learning_rate": 1.9423854931930778e-05, "loss": 0.3459, "step": 2356 }, { "epoch": 0.14, "grad_norm": 0.2656556336693924, "learning_rate": 1.9423232239722557e-05, "loss": 0.1004, "step": 2357 }, { "epoch": 0.14, "grad_norm": 0.3751636957128666, "learning_rate": 1.9422609221188208e-05, "loss": 0.2678, "step": 2358 }, { "epoch": 0.14, "grad_norm": 0.7635549619993852, "learning_rate": 1.9421985876349307e-05, "loss": 0.5, "step": 2359 }, { "epoch": 0.14, "grad_norm": 0.4246343907147413, "learning_rate": 1.942136220522744e-05, "loss": 0.3152, "step": 2360 }, { "epoch": 0.14, "grad_norm": 0.40637458685864913, "learning_rate": 1.9420738207844202e-05, "loss": 0.2499, "step": 2361 }, { "epoch": 0.14, "grad_norm": 1.2699260617905255, "learning_rate": 1.9420113884221207e-05, "loss": 0.7759, "step": 2362 }, { "epoch": 0.14, "grad_norm": 0.2895707578621004, "learning_rate": 1.9419489234380077e-05, "loss": 0.1907, "step": 2363 }, { "epoch": 0.14, "grad_norm": 0.42114540707427844, "learning_rate": 1.9418864258342433e-05, "loss": 0.3008, "step": 2364 }, { "epoch": 0.14, "grad_norm": 0.823988721160775, "learning_rate": 1.941823895612993e-05, "loss": 0.5296, "step": 2365 }, { "epoch": 0.14, "grad_norm": 0.5848088756752329, "learning_rate": 1.9417613327764214e-05, "loss": 0.3968, "step": 2366 }, { "epoch": 0.14, "grad_norm": 0.4707688843312788, "learning_rate": 1.9416987373266957e-05, "loss": 0.2714, "step": 2367 }, { "epoch": 0.14, "grad_norm": 0.4300757765992352, "learning_rate": 1.941636109265983e-05, "loss": 0.3517, "step": 2368 }, { "epoch": 0.14, "grad_norm": 0.3547634392299474, "learning_rate": 1.941573448596452e-05, "loss": 0.2276, "step": 2369 }, { "epoch": 0.14, "grad_norm": 0.4239024769417483, "learning_rate": 1.9415107553202736e-05, "loss": 0.3535, "step": 2370 }, { "epoch": 0.14, "grad_norm": 0.5859513694469434, "learning_rate": 1.9414480294396178e-05, "loss": 0.3019, "step": 2371 }, { "epoch": 0.14, "grad_norm": 0.41543686750719744, "learning_rate": 1.9413852709566574e-05, "loss": 0.3323, "step": 2372 }, { "epoch": 0.14, "grad_norm": 0.4297104608283397, "learning_rate": 1.9413224798735655e-05, "loss": 0.3159, "step": 2373 }, { "epoch": 0.14, "grad_norm": 0.9676280782400583, "learning_rate": 1.9412596561925164e-05, "loss": 0.5264, "step": 2374 }, { "epoch": 0.14, "grad_norm": 0.7162830264380677, "learning_rate": 1.9411967999156866e-05, "loss": 0.5078, "step": 2375 }, { "epoch": 0.14, "grad_norm": 0.3800369126423308, "learning_rate": 1.9411339110452512e-05, "loss": 0.3029, "step": 2376 }, { "epoch": 0.14, "grad_norm": 0.3623660175814287, "learning_rate": 1.9410709895833895e-05, "loss": 0.2252, "step": 2377 }, { "epoch": 0.14, "grad_norm": 0.7698720451832731, "learning_rate": 1.9410080355322797e-05, "loss": 0.4136, "step": 2378 }, { "epoch": 0.14, "grad_norm": 0.5038883234863918, "learning_rate": 1.9409450488941018e-05, "loss": 0.3308, "step": 2379 }, { "epoch": 0.14, "grad_norm": 0.523722918460601, "learning_rate": 1.9408820296710377e-05, "loss": 0.3026, "step": 2380 }, { "epoch": 0.14, "grad_norm": 0.6180731282065387, "learning_rate": 1.9408189778652694e-05, "loss": 0.473, "step": 2381 }, { "epoch": 0.14, "grad_norm": 0.4264018524998352, "learning_rate": 1.94075589347898e-05, "loss": 0.3437, "step": 2382 }, { "epoch": 0.14, "grad_norm": 0.34952776740085484, "learning_rate": 1.940692776514355e-05, "loss": 0.1839, "step": 2383 }, { "epoch": 0.14, "grad_norm": 0.3725548545644662, "learning_rate": 1.9406296269735792e-05, "loss": 0.2575, "step": 2384 }, { "epoch": 0.14, "grad_norm": 0.5378553612475551, "learning_rate": 1.94056644485884e-05, "loss": 0.3269, "step": 2385 }, { "epoch": 0.14, "grad_norm": 1.312515301867269, "learning_rate": 1.940503230172325e-05, "loss": 0.8482, "step": 2386 }, { "epoch": 0.14, "grad_norm": 0.6711137585190222, "learning_rate": 1.940439982916224e-05, "loss": 0.3149, "step": 2387 }, { "epoch": 0.14, "grad_norm": 0.39804796254758124, "learning_rate": 1.9403767030927265e-05, "loss": 0.3114, "step": 2388 }, { "epoch": 0.14, "grad_norm": 0.49038512112971977, "learning_rate": 1.9403133907040245e-05, "loss": 0.3422, "step": 2389 }, { "epoch": 0.14, "grad_norm": 0.2498696865928933, "learning_rate": 1.94025004575231e-05, "loss": 0.1121, "step": 2390 }, { "epoch": 0.14, "grad_norm": 0.4388817507166522, "learning_rate": 1.940186668239777e-05, "loss": 0.3111, "step": 2391 }, { "epoch": 0.14, "grad_norm": 0.5706651361437006, "learning_rate": 1.94012325816862e-05, "loss": 0.3399, "step": 2392 }, { "epoch": 0.14, "grad_norm": 1.1738840800470578, "learning_rate": 1.9400598155410352e-05, "loss": 0.4457, "step": 2393 }, { "epoch": 0.14, "grad_norm": 0.4022071782421249, "learning_rate": 1.939996340359219e-05, "loss": 0.2773, "step": 2394 }, { "epoch": 0.14, "grad_norm": 0.3049991474196785, "learning_rate": 1.9399328326253702e-05, "loss": 0.2383, "step": 2395 }, { "epoch": 0.14, "grad_norm": 0.5410903011939309, "learning_rate": 1.9398692923416877e-05, "loss": 0.3912, "step": 2396 }, { "epoch": 0.14, "grad_norm": 0.38073227562966827, "learning_rate": 1.9398057195103725e-05, "loss": 0.2435, "step": 2397 }, { "epoch": 0.14, "grad_norm": 1.3803216316707985, "learning_rate": 1.9397421141336252e-05, "loss": 0.6396, "step": 2398 }, { "epoch": 0.14, "grad_norm": 0.5643971893815023, "learning_rate": 1.9396784762136488e-05, "loss": 0.3818, "step": 2399 }, { "epoch": 0.14, "grad_norm": 0.31425978785959, "learning_rate": 1.9396148057526473e-05, "loss": 0.2359, "step": 2400 }, { "epoch": 0.14, "grad_norm": 1.311778884841662, "learning_rate": 1.9395511027528257e-05, "loss": 0.653, "step": 2401 }, { "epoch": 0.14, "grad_norm": 0.3134910613406851, "learning_rate": 1.9394873672163896e-05, "loss": 0.1867, "step": 2402 }, { "epoch": 0.14, "grad_norm": 0.45026996141169107, "learning_rate": 1.9394235991455464e-05, "loss": 0.2147, "step": 2403 }, { "epoch": 0.14, "grad_norm": 0.6240035351919085, "learning_rate": 1.9393597985425044e-05, "loss": 0.3687, "step": 2404 }, { "epoch": 0.14, "grad_norm": 1.0016231929095378, "learning_rate": 1.939295965409473e-05, "loss": 0.4781, "step": 2405 }, { "epoch": 0.14, "grad_norm": 0.39450446781921794, "learning_rate": 1.9392320997486624e-05, "loss": 0.234, "step": 2406 }, { "epoch": 0.14, "grad_norm": 0.6254131653942641, "learning_rate": 1.939168201562285e-05, "loss": 0.3663, "step": 2407 }, { "epoch": 0.14, "grad_norm": 0.3195029509674968, "learning_rate": 1.939104270852553e-05, "loss": 0.2335, "step": 2408 }, { "epoch": 0.14, "grad_norm": 0.46483875879314784, "learning_rate": 1.9390403076216805e-05, "loss": 0.3006, "step": 2409 }, { "epoch": 0.14, "grad_norm": 1.1589225599424164, "learning_rate": 1.9389763118718824e-05, "loss": 0.4651, "step": 2410 }, { "epoch": 0.14, "grad_norm": 0.6169719370131748, "learning_rate": 1.938912283605375e-05, "loss": 0.3751, "step": 2411 }, { "epoch": 0.14, "grad_norm": 0.3554253129647085, "learning_rate": 1.9388482228243758e-05, "loss": 0.3024, "step": 2412 }, { "epoch": 0.14, "grad_norm": 0.4486534251987144, "learning_rate": 1.938784129531103e-05, "loss": 0.2002, "step": 2413 }, { "epoch": 0.14, "grad_norm": 0.47231607456050406, "learning_rate": 1.938720003727776e-05, "loss": 0.2697, "step": 2414 }, { "epoch": 0.14, "grad_norm": 0.4328718171018966, "learning_rate": 1.9386558454166158e-05, "loss": 0.3028, "step": 2415 }, { "epoch": 0.14, "grad_norm": 0.9168616970784886, "learning_rate": 1.938591654599844e-05, "loss": 0.324, "step": 2416 }, { "epoch": 0.14, "grad_norm": 1.0797620646008737, "learning_rate": 1.9385274312796834e-05, "loss": 0.6565, "step": 2417 }, { "epoch": 0.14, "grad_norm": 0.400563368589376, "learning_rate": 1.9384631754583586e-05, "loss": 0.2876, "step": 2418 }, { "epoch": 0.14, "grad_norm": 0.7285786800582772, "learning_rate": 1.938398887138094e-05, "loss": 0.2817, "step": 2419 }, { "epoch": 0.14, "grad_norm": 0.2902928252182725, "learning_rate": 1.9383345663211162e-05, "loss": 0.2576, "step": 2420 }, { "epoch": 0.14, "grad_norm": 0.3972358838624784, "learning_rate": 1.938270213009653e-05, "loss": 0.2703, "step": 2421 }, { "epoch": 0.14, "grad_norm": 0.8769939771723178, "learning_rate": 1.9382058272059326e-05, "loss": 0.5173, "step": 2422 }, { "epoch": 0.14, "grad_norm": 0.5224940687326848, "learning_rate": 1.9381414089121848e-05, "loss": 0.3176, "step": 2423 }, { "epoch": 0.14, "grad_norm": 0.4746503525476009, "learning_rate": 1.9380769581306404e-05, "loss": 0.2982, "step": 2424 }, { "epoch": 0.14, "grad_norm": 0.5547203830668347, "learning_rate": 1.9380124748635312e-05, "loss": 0.4058, "step": 2425 }, { "epoch": 0.14, "grad_norm": 0.8275991864380573, "learning_rate": 1.9379479591130903e-05, "loss": 0.197, "step": 2426 }, { "epoch": 0.14, "grad_norm": 0.395529200272553, "learning_rate": 1.937883410881552e-05, "loss": 0.2868, "step": 2427 }, { "epoch": 0.14, "grad_norm": 0.5053460012049817, "learning_rate": 1.937818830171151e-05, "loss": 0.3728, "step": 2428 }, { "epoch": 0.14, "grad_norm": 1.0284565975396147, "learning_rate": 1.937754216984125e-05, "loss": 0.5913, "step": 2429 }, { "epoch": 0.14, "grad_norm": 0.41865327079166226, "learning_rate": 1.9376895713227106e-05, "loss": 0.2927, "step": 2430 }, { "epoch": 0.14, "grad_norm": 0.5272358314902269, "learning_rate": 1.9376248931891463e-05, "loss": 0.3642, "step": 2431 }, { "epoch": 0.14, "grad_norm": 0.7547226733060595, "learning_rate": 1.9375601825856724e-05, "loss": 0.4149, "step": 2432 }, { "epoch": 0.14, "grad_norm": 0.37814727594751524, "learning_rate": 1.93749543951453e-05, "loss": 0.2974, "step": 2433 }, { "epoch": 0.14, "grad_norm": 0.4377703315425029, "learning_rate": 1.9374306639779606e-05, "loss": 0.2508, "step": 2434 }, { "epoch": 0.14, "grad_norm": 0.46933076479616304, "learning_rate": 1.9373658559782075e-05, "loss": 0.3411, "step": 2435 }, { "epoch": 0.14, "grad_norm": 0.38251871677416266, "learning_rate": 1.9373010155175155e-05, "loss": 0.2416, "step": 2436 }, { "epoch": 0.14, "grad_norm": 1.4471766073972423, "learning_rate": 1.9372361425981293e-05, "loss": 0.7063, "step": 2437 }, { "epoch": 0.14, "grad_norm": 0.7185286092475877, "learning_rate": 1.937171237222296e-05, "loss": 0.4922, "step": 2438 }, { "epoch": 0.14, "grad_norm": 0.3228268085409524, "learning_rate": 1.9371062993922627e-05, "loss": 0.238, "step": 2439 }, { "epoch": 0.14, "grad_norm": 0.3947789797377527, "learning_rate": 1.937041329110279e-05, "loss": 0.2659, "step": 2440 }, { "epoch": 0.14, "grad_norm": 0.4828332809183167, "learning_rate": 1.9369763263785942e-05, "loss": 0.2961, "step": 2441 }, { "epoch": 0.14, "grad_norm": 0.42032450024642715, "learning_rate": 1.936911291199459e-05, "loss": 0.1853, "step": 2442 }, { "epoch": 0.14, "grad_norm": 0.42092663555747223, "learning_rate": 1.9368462235751267e-05, "loss": 0.3511, "step": 2443 }, { "epoch": 0.14, "grad_norm": 0.8694030752192309, "learning_rate": 1.93678112350785e-05, "loss": 0.5347, "step": 2444 }, { "epoch": 0.14, "grad_norm": 0.3919234530205258, "learning_rate": 1.936715990999883e-05, "loss": 0.1909, "step": 2445 }, { "epoch": 0.14, "grad_norm": 0.43121382968926086, "learning_rate": 1.9366508260534816e-05, "loss": 0.2989, "step": 2446 }, { "epoch": 0.14, "grad_norm": 0.41099307028572735, "learning_rate": 1.936585628670902e-05, "loss": 0.3394, "step": 2447 }, { "epoch": 0.14, "grad_norm": 0.3230654577529209, "learning_rate": 1.936520398854403e-05, "loss": 0.2249, "step": 2448 }, { "epoch": 0.14, "grad_norm": 0.5695656142368685, "learning_rate": 1.9364551366062426e-05, "loss": 0.3446, "step": 2449 }, { "epoch": 0.14, "grad_norm": 0.7429372957735979, "learning_rate": 1.9363898419286812e-05, "loss": 0.5617, "step": 2450 }, { "epoch": 0.14, "grad_norm": 0.5023685924135727, "learning_rate": 1.9363245148239796e-05, "loss": 0.3299, "step": 2451 }, { "epoch": 0.14, "grad_norm": 0.45158885365379514, "learning_rate": 1.9362591552944004e-05, "loss": 0.2682, "step": 2452 }, { "epoch": 0.14, "grad_norm": 0.4618495334104154, "learning_rate": 1.9361937633422066e-05, "loss": 0.3204, "step": 2453 }, { "epoch": 0.14, "grad_norm": 0.28793626450866544, "learning_rate": 1.9361283389696637e-05, "loss": 0.2239, "step": 2454 }, { "epoch": 0.14, "grad_norm": 0.4599655334941602, "learning_rate": 1.9360628821790365e-05, "loss": 0.3066, "step": 2455 }, { "epoch": 0.14, "grad_norm": 1.1546352730802452, "learning_rate": 1.9359973929725915e-05, "loss": 0.5382, "step": 2456 }, { "epoch": 0.14, "grad_norm": 0.41592337368392673, "learning_rate": 1.9359318713525974e-05, "loss": 0.303, "step": 2457 }, { "epoch": 0.14, "grad_norm": 1.5380319613336388, "learning_rate": 1.9358663173213227e-05, "loss": 0.8324, "step": 2458 }, { "epoch": 0.14, "grad_norm": 0.2911517817560607, "learning_rate": 1.9358007308810377e-05, "loss": 0.23, "step": 2459 }, { "epoch": 0.14, "grad_norm": 0.32365945054881184, "learning_rate": 1.9357351120340137e-05, "loss": 0.2067, "step": 2460 }, { "epoch": 0.14, "grad_norm": 0.5107713270649263, "learning_rate": 1.935669460782523e-05, "loss": 0.3869, "step": 2461 }, { "epoch": 0.14, "grad_norm": 0.5950877636646806, "learning_rate": 1.935603777128839e-05, "loss": 0.3651, "step": 2462 }, { "epoch": 0.14, "grad_norm": 0.4329840077090914, "learning_rate": 1.9355380610752364e-05, "loss": 0.2897, "step": 2463 }, { "epoch": 0.14, "grad_norm": 0.5263592276254527, "learning_rate": 1.9354723126239913e-05, "loss": 0.368, "step": 2464 }, { "epoch": 0.14, "grad_norm": 0.8971518502199158, "learning_rate": 1.93540653177738e-05, "loss": 0.5344, "step": 2465 }, { "epoch": 0.14, "grad_norm": 0.3520567138494093, "learning_rate": 1.9353407185376805e-05, "loss": 0.2685, "step": 2466 }, { "epoch": 0.14, "grad_norm": 0.4554545967398719, "learning_rate": 1.9352748729071727e-05, "loss": 0.3259, "step": 2467 }, { "epoch": 0.14, "grad_norm": 0.4117401158899818, "learning_rate": 1.9352089948881358e-05, "loss": 0.2224, "step": 2468 }, { "epoch": 0.14, "grad_norm": 0.4564482028613299, "learning_rate": 1.935143084482852e-05, "loss": 0.2908, "step": 2469 }, { "epoch": 0.14, "grad_norm": 0.6895418863474747, "learning_rate": 1.935077141693603e-05, "loss": 0.447, "step": 2470 }, { "epoch": 0.14, "grad_norm": 0.5274251237370065, "learning_rate": 1.935011166522673e-05, "loss": 0.3838, "step": 2471 }, { "epoch": 0.14, "grad_norm": 0.4138804782363925, "learning_rate": 1.9349451589723465e-05, "loss": 0.2777, "step": 2472 }, { "epoch": 0.14, "grad_norm": 0.5436885501881874, "learning_rate": 1.9348791190449092e-05, "loss": 0.4258, "step": 2473 }, { "epoch": 0.14, "grad_norm": 0.3883095750427234, "learning_rate": 1.9348130467426486e-05, "loss": 0.1774, "step": 2474 }, { "epoch": 0.14, "grad_norm": 0.3857708543855049, "learning_rate": 1.934746942067852e-05, "loss": 0.2547, "step": 2475 }, { "epoch": 0.14, "grad_norm": 0.5736300598247335, "learning_rate": 1.9346808050228093e-05, "loss": 0.4197, "step": 2476 }, { "epoch": 0.14, "grad_norm": 0.7074214232035989, "learning_rate": 1.9346146356098103e-05, "loss": 0.5278, "step": 2477 }, { "epoch": 0.14, "grad_norm": 0.442403841592901, "learning_rate": 1.9345484338311467e-05, "loss": 0.2977, "step": 2478 }, { "epoch": 0.14, "grad_norm": 0.4645088493036005, "learning_rate": 1.9344821996891106e-05, "loss": 0.3155, "step": 2479 }, { "epoch": 0.14, "grad_norm": 0.4166778230104967, "learning_rate": 1.9344159331859965e-05, "loss": 0.1801, "step": 2480 }, { "epoch": 0.14, "grad_norm": 0.4899829826259577, "learning_rate": 1.9343496343240994e-05, "loss": 0.1517, "step": 2481 }, { "epoch": 0.14, "grad_norm": 0.5196827332828534, "learning_rate": 1.9342833031057138e-05, "loss": 0.3627, "step": 2482 }, { "epoch": 0.14, "grad_norm": 0.46032261971842325, "learning_rate": 1.934216939533138e-05, "loss": 0.367, "step": 2483 }, { "epoch": 0.14, "grad_norm": 0.8366442088496777, "learning_rate": 1.9341505436086695e-05, "loss": 0.441, "step": 2484 }, { "epoch": 0.14, "grad_norm": 0.48325853141372255, "learning_rate": 1.934084115334608e-05, "loss": 0.263, "step": 2485 }, { "epoch": 0.14, "grad_norm": 0.38690498341366925, "learning_rate": 1.9340176547132536e-05, "loss": 0.2036, "step": 2486 }, { "epoch": 0.14, "grad_norm": 0.41703692608099363, "learning_rate": 1.9339511617469082e-05, "loss": 0.3184, "step": 2487 }, { "epoch": 0.14, "grad_norm": 0.47705445064380225, "learning_rate": 1.9338846364378742e-05, "loss": 0.2599, "step": 2488 }, { "epoch": 0.14, "grad_norm": 1.186205154115369, "learning_rate": 1.9338180787884554e-05, "loss": 0.5164, "step": 2489 }, { "epoch": 0.14, "grad_norm": 0.5865526165668318, "learning_rate": 1.9337514888009566e-05, "loss": 0.4188, "step": 2490 }, { "epoch": 0.14, "grad_norm": 0.3801379438763642, "learning_rate": 1.933684866477684e-05, "loss": 0.264, "step": 2491 }, { "epoch": 0.14, "grad_norm": 0.2829332968836524, "learning_rate": 1.9336182118209442e-05, "loss": 0.192, "step": 2492 }, { "epoch": 0.14, "grad_norm": 0.5463688788581329, "learning_rate": 1.9335515248330462e-05, "loss": 0.3456, "step": 2493 }, { "epoch": 0.14, "grad_norm": 0.719184449716863, "learning_rate": 1.933484805516299e-05, "loss": 0.3116, "step": 2494 }, { "epoch": 0.14, "grad_norm": 0.8530292958738765, "learning_rate": 1.9334180538730133e-05, "loss": 0.3951, "step": 2495 }, { "epoch": 0.14, "grad_norm": 1.5021582635497452, "learning_rate": 1.9333512699055004e-05, "loss": 0.7942, "step": 2496 }, { "epoch": 0.14, "grad_norm": 0.37553192886357234, "learning_rate": 1.933284453616073e-05, "loss": 0.3011, "step": 2497 }, { "epoch": 0.14, "grad_norm": 0.27112912159156816, "learning_rate": 1.9332176050070454e-05, "loss": 0.2007, "step": 2498 }, { "epoch": 0.14, "grad_norm": 0.763728328487966, "learning_rate": 1.9331507240807322e-05, "loss": 0.4586, "step": 2499 }, { "epoch": 0.14, "grad_norm": 0.6138837661580483, "learning_rate": 1.9330838108394497e-05, "loss": 0.327, "step": 2500 }, { "epoch": 0.14, "grad_norm": 1.051207841064682, "learning_rate": 1.933016865285515e-05, "loss": 0.438, "step": 2501 }, { "epoch": 0.14, "grad_norm": 0.5530078844473687, "learning_rate": 1.932949887421246e-05, "loss": 0.3198, "step": 2502 }, { "epoch": 0.14, "grad_norm": 0.4045931786615891, "learning_rate": 1.9328828772489623e-05, "loss": 0.293, "step": 2503 }, { "epoch": 0.14, "grad_norm": 0.3240936130174602, "learning_rate": 1.932815834770985e-05, "loss": 0.1162, "step": 2504 }, { "epoch": 0.14, "grad_norm": 0.37395828600806214, "learning_rate": 1.9327487599896355e-05, "loss": 0.2758, "step": 2505 }, { "epoch": 0.14, "grad_norm": 0.4175861001760937, "learning_rate": 1.9326816529072364e-05, "loss": 0.3197, "step": 2506 }, { "epoch": 0.14, "grad_norm": 0.7256771592836744, "learning_rate": 1.932614513526112e-05, "loss": 0.3772, "step": 2507 }, { "epoch": 0.14, "grad_norm": 0.6586431832270313, "learning_rate": 1.932547341848587e-05, "loss": 0.4013, "step": 2508 }, { "epoch": 0.14, "grad_norm": 0.5348147230679424, "learning_rate": 1.932480137876988e-05, "loss": 0.3208, "step": 2509 }, { "epoch": 0.14, "grad_norm": 0.39809772127850734, "learning_rate": 1.9324129016136414e-05, "loss": 0.265, "step": 2510 }, { "epoch": 0.14, "grad_norm": 0.29378693207938744, "learning_rate": 1.9323456330608762e-05, "loss": 0.2083, "step": 2511 }, { "epoch": 0.14, "grad_norm": 0.41811342753632974, "learning_rate": 1.932278332221022e-05, "loss": 0.3463, "step": 2512 }, { "epoch": 0.14, "grad_norm": 0.7113376176543778, "learning_rate": 1.932210999096409e-05, "loss": 0.5525, "step": 2513 }, { "epoch": 0.14, "grad_norm": 0.4728687902245757, "learning_rate": 1.9321436336893693e-05, "loss": 0.2821, "step": 2514 }, { "epoch": 0.14, "grad_norm": 0.3785533771206013, "learning_rate": 1.9320762360022354e-05, "loss": 0.2939, "step": 2515 }, { "epoch": 0.14, "grad_norm": 0.4423604779271077, "learning_rate": 1.932008806037342e-05, "loss": 0.2852, "step": 2516 }, { "epoch": 0.14, "grad_norm": 0.34758961266810223, "learning_rate": 1.9319413437970236e-05, "loss": 0.2658, "step": 2517 }, { "epoch": 0.14, "grad_norm": 0.40220948422232616, "learning_rate": 1.9318738492836165e-05, "loss": 0.2915, "step": 2518 }, { "epoch": 0.14, "grad_norm": 0.515572698759552, "learning_rate": 1.931806322499458e-05, "loss": 0.4523, "step": 2519 }, { "epoch": 0.14, "grad_norm": 0.5901676572616871, "learning_rate": 1.9317387634468867e-05, "loss": 0.2373, "step": 2520 }, { "epoch": 0.14, "grad_norm": 0.42464998973281115, "learning_rate": 1.9316711721282423e-05, "loss": 0.3158, "step": 2521 }, { "epoch": 0.14, "grad_norm": 0.5342049535459824, "learning_rate": 1.931603548545865e-05, "loss": 0.3712, "step": 2522 }, { "epoch": 0.14, "grad_norm": 0.3737853946547916, "learning_rate": 1.9315358927020965e-05, "loss": 0.3404, "step": 2523 }, { "epoch": 0.15, "grad_norm": 0.28710083849023404, "learning_rate": 1.931468204599281e-05, "loss": 0.2055, "step": 2524 }, { "epoch": 0.15, "grad_norm": 1.221810801453039, "learning_rate": 1.931400484239761e-05, "loss": 0.8419, "step": 2525 }, { "epoch": 0.15, "grad_norm": 0.4907234180429648, "learning_rate": 1.931332731625882e-05, "loss": 0.3614, "step": 2526 }, { "epoch": 0.15, "grad_norm": 0.37691010597331226, "learning_rate": 1.931264946759991e-05, "loss": 0.2088, "step": 2527 }, { "epoch": 0.15, "grad_norm": 0.7873124730767418, "learning_rate": 1.931197129644435e-05, "loss": 0.5543, "step": 2528 }, { "epoch": 0.15, "grad_norm": 0.5526920490936016, "learning_rate": 1.9311292802815626e-05, "loss": 0.4587, "step": 2529 }, { "epoch": 0.15, "grad_norm": 0.28601007260420575, "learning_rate": 1.931061398673723e-05, "loss": 0.1858, "step": 2530 }, { "epoch": 0.15, "grad_norm": 0.4887404163838093, "learning_rate": 1.930993484823267e-05, "loss": 0.3604, "step": 2531 }, { "epoch": 0.15, "grad_norm": 0.5338039650083546, "learning_rate": 1.9309255387325468e-05, "loss": 0.3452, "step": 2532 }, { "epoch": 0.15, "grad_norm": 0.34880351685054517, "learning_rate": 1.9308575604039156e-05, "loss": 0.2113, "step": 2533 }, { "epoch": 0.15, "grad_norm": 0.50223678765854, "learning_rate": 1.9307895498397268e-05, "loss": 0.3496, "step": 2534 }, { "epoch": 0.15, "grad_norm": 1.0383332882780774, "learning_rate": 1.9307215070423354e-05, "loss": 0.5223, "step": 2535 }, { "epoch": 0.15, "grad_norm": 0.40725782340222744, "learning_rate": 1.930653432014099e-05, "loss": 0.3043, "step": 2536 }, { "epoch": 0.15, "grad_norm": 1.0892283717210414, "learning_rate": 1.930585324757374e-05, "loss": 0.5579, "step": 2537 }, { "epoch": 0.15, "grad_norm": 0.3285256564934091, "learning_rate": 1.9305171852745192e-05, "loss": 0.2542, "step": 2538 }, { "epoch": 0.15, "grad_norm": 0.3941296085844982, "learning_rate": 1.9304490135678944e-05, "loss": 0.2776, "step": 2539 }, { "epoch": 0.15, "grad_norm": 0.8550640205170302, "learning_rate": 1.9303808096398602e-05, "loss": 0.4352, "step": 2540 }, { "epoch": 0.15, "grad_norm": 0.7635534341325637, "learning_rate": 1.9303125734927784e-05, "loss": 0.5426, "step": 2541 }, { "epoch": 0.15, "grad_norm": 0.3402636225143411, "learning_rate": 1.9302443051290126e-05, "loss": 0.2776, "step": 2542 }, { "epoch": 0.15, "grad_norm": 0.6774987084654007, "learning_rate": 1.930176004550926e-05, "loss": 0.3408, "step": 2543 }, { "epoch": 0.15, "grad_norm": 0.33787908931664956, "learning_rate": 1.930107671760885e-05, "loss": 0.2045, "step": 2544 }, { "epoch": 0.15, "grad_norm": 0.47972956125767385, "learning_rate": 1.9300393067612545e-05, "loss": 0.2875, "step": 2545 }, { "epoch": 0.15, "grad_norm": 0.4447447023685629, "learning_rate": 1.9299709095544035e-05, "loss": 0.3332, "step": 2546 }, { "epoch": 0.15, "grad_norm": 0.9395633232005106, "learning_rate": 1.9299024801426994e-05, "loss": 0.4455, "step": 2547 }, { "epoch": 0.15, "grad_norm": 0.4374552033580974, "learning_rate": 1.9298340185285127e-05, "loss": 0.2683, "step": 2548 }, { "epoch": 0.15, "grad_norm": 0.6582182316770087, "learning_rate": 1.929765524714214e-05, "loss": 0.4531, "step": 2549 }, { "epoch": 0.15, "grad_norm": 0.3174736595811298, "learning_rate": 1.929696998702175e-05, "loss": 0.1964, "step": 2550 }, { "epoch": 0.15, "grad_norm": 0.40811572548117436, "learning_rate": 1.929628440494769e-05, "loss": 0.2882, "step": 2551 }, { "epoch": 0.15, "grad_norm": 0.8862066628895517, "learning_rate": 1.92955985009437e-05, "loss": 0.4916, "step": 2552 }, { "epoch": 0.15, "grad_norm": 1.0030933658275236, "learning_rate": 1.9294912275033533e-05, "loss": 0.3916, "step": 2553 }, { "epoch": 0.15, "grad_norm": 0.4245887044072022, "learning_rate": 1.9294225727240958e-05, "loss": 0.2804, "step": 2554 }, { "epoch": 0.15, "grad_norm": 0.5604093210603397, "learning_rate": 1.9293538857589737e-05, "loss": 0.4478, "step": 2555 }, { "epoch": 0.15, "grad_norm": 0.22753638244231286, "learning_rate": 1.9292851666103673e-05, "loss": 0.1424, "step": 2556 }, { "epoch": 0.15, "grad_norm": 0.393215750130867, "learning_rate": 1.9292164152806553e-05, "loss": 0.321, "step": 2557 }, { "epoch": 0.15, "grad_norm": 0.42867114089154623, "learning_rate": 1.9291476317722185e-05, "loss": 0.3651, "step": 2558 }, { "epoch": 0.15, "grad_norm": 1.0719463340093578, "learning_rate": 1.9290788160874395e-05, "loss": 0.5735, "step": 2559 }, { "epoch": 0.15, "grad_norm": 0.42211388783214765, "learning_rate": 1.9290099682287012e-05, "loss": 0.2557, "step": 2560 }, { "epoch": 0.15, "grad_norm": 1.346065145365312, "learning_rate": 1.928941088198387e-05, "loss": 0.877, "step": 2561 }, { "epoch": 0.15, "grad_norm": 0.4098509316745687, "learning_rate": 1.9288721759988834e-05, "loss": 0.372, "step": 2562 }, { "epoch": 0.15, "grad_norm": 0.42484258362884286, "learning_rate": 1.9288032316325762e-05, "loss": 0.2418, "step": 2563 }, { "epoch": 0.15, "grad_norm": 0.35294191462631713, "learning_rate": 1.928734255101853e-05, "loss": 0.2576, "step": 2564 }, { "epoch": 0.15, "grad_norm": 0.5300039653477053, "learning_rate": 1.9286652464091024e-05, "loss": 0.4365, "step": 2565 }, { "epoch": 0.15, "grad_norm": 0.36024518354461466, "learning_rate": 1.9285962055567144e-05, "loss": 0.2375, "step": 2566 }, { "epoch": 0.15, "grad_norm": 0.676422785771166, "learning_rate": 1.9285271325470794e-05, "loss": 0.4694, "step": 2567 }, { "epoch": 0.15, "grad_norm": 0.8126513636201301, "learning_rate": 1.92845802738259e-05, "loss": 0.5042, "step": 2568 }, { "epoch": 0.15, "grad_norm": 0.4114429506728939, "learning_rate": 1.928388890065639e-05, "loss": 0.2532, "step": 2569 }, { "epoch": 0.15, "grad_norm": 0.3213342473563277, "learning_rate": 1.928319720598621e-05, "loss": 0.27, "step": 2570 }, { "epoch": 0.15, "grad_norm": 0.40903951950701906, "learning_rate": 1.9282505189839305e-05, "loss": 0.2594, "step": 2571 }, { "epoch": 0.15, "grad_norm": 0.44270353797659523, "learning_rate": 1.9281812852239647e-05, "loss": 0.3207, "step": 2572 }, { "epoch": 0.15, "grad_norm": 0.43969504549066846, "learning_rate": 1.928112019321121e-05, "loss": 0.297, "step": 2573 }, { "epoch": 0.15, "grad_norm": 0.6514459661898435, "learning_rate": 1.9280427212777976e-05, "loss": 0.4354, "step": 2574 }, { "epoch": 0.15, "grad_norm": 0.5278709615714426, "learning_rate": 1.927973391096395e-05, "loss": 0.334, "step": 2575 }, { "epoch": 0.15, "grad_norm": 0.3225729087069408, "learning_rate": 1.9279040287793138e-05, "loss": 0.1843, "step": 2576 }, { "epoch": 0.15, "grad_norm": 0.38192438331171524, "learning_rate": 1.9278346343289555e-05, "loss": 0.285, "step": 2577 }, { "epoch": 0.15, "grad_norm": 0.4238320027450154, "learning_rate": 1.9277652077477242e-05, "loss": 0.3222, "step": 2578 }, { "epoch": 0.15, "grad_norm": 0.5692015716239939, "learning_rate": 1.9276957490380236e-05, "loss": 0.3179, "step": 2579 }, { "epoch": 0.15, "grad_norm": 0.6989023862803406, "learning_rate": 1.927626258202259e-05, "loss": 0.4818, "step": 2580 }, { "epoch": 0.15, "grad_norm": 0.47851777806709617, "learning_rate": 1.927556735242837e-05, "loss": 0.3246, "step": 2581 }, { "epoch": 0.15, "grad_norm": 0.37801509190886534, "learning_rate": 1.9274871801621652e-05, "loss": 0.2653, "step": 2582 }, { "epoch": 0.15, "grad_norm": 0.31902918198499913, "learning_rate": 1.927417592962652e-05, "loss": 0.2124, "step": 2583 }, { "epoch": 0.15, "grad_norm": 0.5711538165034585, "learning_rate": 1.9273479736467077e-05, "loss": 0.3241, "step": 2584 }, { "epoch": 0.15, "grad_norm": 0.48424088195332443, "learning_rate": 1.9272783222167424e-05, "loss": 0.3557, "step": 2585 }, { "epoch": 0.15, "grad_norm": 0.5128003089466692, "learning_rate": 1.9272086386751693e-05, "loss": 0.3375, "step": 2586 }, { "epoch": 0.15, "grad_norm": 0.5824582826960971, "learning_rate": 1.927138923024401e-05, "loss": 0.3402, "step": 2587 }, { "epoch": 0.15, "grad_norm": 0.3477545206134362, "learning_rate": 1.927069175266851e-05, "loss": 0.2719, "step": 2588 }, { "epoch": 0.15, "grad_norm": 0.35480404863309206, "learning_rate": 1.926999395404936e-05, "loss": 0.1912, "step": 2589 }, { "epoch": 0.15, "grad_norm": 0.4088392855070457, "learning_rate": 1.9269295834410715e-05, "loss": 0.3091, "step": 2590 }, { "epoch": 0.15, "grad_norm": 0.4128220609130996, "learning_rate": 1.9268597393776753e-05, "loss": 0.3596, "step": 2591 }, { "epoch": 0.15, "grad_norm": 0.6523103375473858, "learning_rate": 1.9267898632171663e-05, "loss": 0.4379, "step": 2592 }, { "epoch": 0.15, "grad_norm": 0.42876866445114575, "learning_rate": 1.9267199549619643e-05, "loss": 0.2967, "step": 2593 }, { "epoch": 0.15, "grad_norm": 0.4201511676991754, "learning_rate": 1.92665001461449e-05, "loss": 0.3146, "step": 2594 }, { "epoch": 0.15, "grad_norm": 0.23291446601377253, "learning_rate": 1.9265800421771655e-05, "loss": 0.1366, "step": 2595 }, { "epoch": 0.15, "grad_norm": 0.43171545122840194, "learning_rate": 1.926510037652414e-05, "loss": 0.3457, "step": 2596 }, { "epoch": 0.15, "grad_norm": 0.5129067503078448, "learning_rate": 1.9264400010426598e-05, "loss": 0.4012, "step": 2597 }, { "epoch": 0.15, "grad_norm": 0.41831673800447344, "learning_rate": 1.926369932350328e-05, "loss": 0.3836, "step": 2598 }, { "epoch": 0.15, "grad_norm": 0.30934776416133264, "learning_rate": 1.9262998315778453e-05, "loss": 0.1444, "step": 2599 }, { "epoch": 0.15, "grad_norm": 0.46632659562332673, "learning_rate": 1.9262296987276395e-05, "loss": 0.348, "step": 2600 }, { "epoch": 0.15, "grad_norm": 0.27229649864445243, "learning_rate": 1.9261595338021388e-05, "loss": 0.2331, "step": 2601 }, { "epoch": 0.15, "grad_norm": 0.40189885147596294, "learning_rate": 1.9260893368037735e-05, "loss": 0.1689, "step": 2602 }, { "epoch": 0.15, "grad_norm": 0.41329355608342555, "learning_rate": 1.9260191077349742e-05, "loss": 0.3672, "step": 2603 }, { "epoch": 0.15, "grad_norm": 0.6931020159640395, "learning_rate": 1.925948846598173e-05, "loss": 0.5568, "step": 2604 }, { "epoch": 0.15, "grad_norm": 0.3821818839566917, "learning_rate": 1.925878553395803e-05, "loss": 0.2077, "step": 2605 }, { "epoch": 0.15, "grad_norm": 0.36378068853839796, "learning_rate": 1.9258082281302988e-05, "loss": 0.3239, "step": 2606 }, { "epoch": 0.15, "grad_norm": 0.3489085591030622, "learning_rate": 1.925737870804095e-05, "loss": 0.1991, "step": 2607 }, { "epoch": 0.15, "grad_norm": 0.4365373846944763, "learning_rate": 1.925667481419629e-05, "loss": 0.3204, "step": 2608 }, { "epoch": 0.15, "grad_norm": 0.37061176255326356, "learning_rate": 1.9255970599793376e-05, "loss": 0.3358, "step": 2609 }, { "epoch": 0.15, "grad_norm": 0.5448112276722097, "learning_rate": 1.92552660648566e-05, "loss": 0.4135, "step": 2610 }, { "epoch": 0.15, "grad_norm": 0.7625349028762688, "learning_rate": 1.925456120941036e-05, "loss": 0.4468, "step": 2611 }, { "epoch": 0.15, "grad_norm": 0.3891806726343858, "learning_rate": 1.925385603347906e-05, "loss": 0.2244, "step": 2612 }, { "epoch": 0.15, "grad_norm": 0.3134500837415368, "learning_rate": 1.9253150537087123e-05, "loss": 0.2471, "step": 2613 }, { "epoch": 0.15, "grad_norm": 0.6358650633518437, "learning_rate": 1.9252444720258982e-05, "loss": 0.3955, "step": 2614 }, { "epoch": 0.15, "grad_norm": 0.4146219135777843, "learning_rate": 1.925173858301908e-05, "loss": 0.3018, "step": 2615 }, { "epoch": 0.15, "grad_norm": 0.5250021533099688, "learning_rate": 1.9251032125391867e-05, "loss": 0.3568, "step": 2616 }, { "epoch": 0.15, "grad_norm": 0.3748774238993723, "learning_rate": 1.925032534740181e-05, "loss": 0.3043, "step": 2617 }, { "epoch": 0.15, "grad_norm": 0.4535619682866479, "learning_rate": 1.9249618249073384e-05, "loss": 0.2748, "step": 2618 }, { "epoch": 0.15, "grad_norm": 0.7356712302689734, "learning_rate": 1.9248910830431073e-05, "loss": 0.5015, "step": 2619 }, { "epoch": 0.15, "grad_norm": 0.4561318815496416, "learning_rate": 1.924820309149938e-05, "loss": 0.3636, "step": 2620 }, { "epoch": 0.15, "grad_norm": 0.40170842706925475, "learning_rate": 1.9247495032302812e-05, "loss": 0.317, "step": 2621 }, { "epoch": 0.15, "grad_norm": 0.3116553045717942, "learning_rate": 1.924678665286589e-05, "loss": 0.2193, "step": 2622 }, { "epoch": 0.15, "grad_norm": 1.0170395298052468, "learning_rate": 1.924607795321314e-05, "loss": 0.4508, "step": 2623 }, { "epoch": 0.15, "grad_norm": 0.4078089046880881, "learning_rate": 1.924536893336911e-05, "loss": 0.3254, "step": 2624 }, { "epoch": 0.15, "grad_norm": 0.43203476595198353, "learning_rate": 1.924465959335835e-05, "loss": 0.3156, "step": 2625 }, { "epoch": 0.15, "grad_norm": 0.8382291099953593, "learning_rate": 1.924394993320543e-05, "loss": 0.4348, "step": 2626 }, { "epoch": 0.15, "grad_norm": 0.4875964972050568, "learning_rate": 1.9243239952934918e-05, "loss": 0.3129, "step": 2627 }, { "epoch": 0.15, "grad_norm": 0.3377201613499536, "learning_rate": 1.9242529652571405e-05, "loss": 0.1133, "step": 2628 }, { "epoch": 0.15, "grad_norm": 0.4149281039334817, "learning_rate": 1.9241819032139487e-05, "loss": 0.3227, "step": 2629 }, { "epoch": 0.15, "grad_norm": 0.48560499229576615, "learning_rate": 1.9241108091663774e-05, "loss": 0.3227, "step": 2630 }, { "epoch": 0.15, "grad_norm": 1.3978598633144965, "learning_rate": 1.9240396831168883e-05, "loss": 0.4226, "step": 2631 }, { "epoch": 0.15, "grad_norm": 0.5892322349385228, "learning_rate": 1.923968525067945e-05, "loss": 0.3879, "step": 2632 }, { "epoch": 0.15, "grad_norm": 0.5121453834736848, "learning_rate": 1.923897335022011e-05, "loss": 0.3623, "step": 2633 }, { "epoch": 0.15, "grad_norm": 0.4052392757884597, "learning_rate": 1.9238261129815526e-05, "loss": 0.2655, "step": 2634 }, { "epoch": 0.15, "grad_norm": 0.3905272412860965, "learning_rate": 1.9237548589490355e-05, "loss": 0.14, "step": 2635 }, { "epoch": 0.15, "grad_norm": 0.7394376527945423, "learning_rate": 1.923683572926927e-05, "loss": 0.3439, "step": 2636 }, { "epoch": 0.15, "grad_norm": 0.8275157121617628, "learning_rate": 1.9236122549176963e-05, "loss": 0.3755, "step": 2637 }, { "epoch": 0.15, "grad_norm": 0.9844646238383457, "learning_rate": 1.923540904923813e-05, "loss": 0.3717, "step": 2638 }, { "epoch": 0.15, "grad_norm": 0.3459704137199816, "learning_rate": 1.9234695229477475e-05, "loss": 0.3101, "step": 2639 }, { "epoch": 0.15, "grad_norm": 0.31297296064647323, "learning_rate": 1.9233981089919727e-05, "loss": 0.1909, "step": 2640 }, { "epoch": 0.15, "grad_norm": 0.6016717619270547, "learning_rate": 1.9233266630589607e-05, "loss": 0.2894, "step": 2641 }, { "epoch": 0.15, "grad_norm": 0.5144862160701495, "learning_rate": 1.923255185151186e-05, "loss": 0.3252, "step": 2642 }, { "epoch": 0.15, "grad_norm": 1.0734016508045525, "learning_rate": 1.923183675271124e-05, "loss": 0.4787, "step": 2643 }, { "epoch": 0.15, "grad_norm": 0.6162045563854098, "learning_rate": 1.9231121334212513e-05, "loss": 0.2236, "step": 2644 }, { "epoch": 0.15, "grad_norm": 0.45479225577934984, "learning_rate": 1.9230405596040448e-05, "loss": 0.2862, "step": 2645 }, { "epoch": 0.15, "grad_norm": 1.5405861989390848, "learning_rate": 1.922968953821984e-05, "loss": 0.8193, "step": 2646 }, { "epoch": 0.15, "grad_norm": 0.3822968065664251, "learning_rate": 1.9228973160775474e-05, "loss": 0.2538, "step": 2647 }, { "epoch": 0.15, "grad_norm": 0.4312183020139809, "learning_rate": 1.9228256463732165e-05, "loss": 0.2563, "step": 2648 }, { "epoch": 0.15, "grad_norm": 0.614927999518781, "learning_rate": 1.9227539447114732e-05, "loss": 0.3677, "step": 2649 }, { "epoch": 0.15, "grad_norm": 1.1379887071076775, "learning_rate": 1.9226822110948005e-05, "loss": 0.6745, "step": 2650 }, { "epoch": 0.15, "grad_norm": 0.37894151707122936, "learning_rate": 1.9226104455256827e-05, "loss": 0.2201, "step": 2651 }, { "epoch": 0.15, "grad_norm": 0.4781041246729594, "learning_rate": 1.9225386480066046e-05, "loss": 0.3187, "step": 2652 }, { "epoch": 0.15, "grad_norm": 0.4630776461667929, "learning_rate": 1.9224668185400528e-05, "loss": 0.3508, "step": 2653 }, { "epoch": 0.15, "grad_norm": 0.38185126415206494, "learning_rate": 1.922394957128515e-05, "loss": 0.2651, "step": 2654 }, { "epoch": 0.15, "grad_norm": 0.4365198237742083, "learning_rate": 1.9223230637744792e-05, "loss": 0.2976, "step": 2655 }, { "epoch": 0.15, "grad_norm": 1.6072299775806103, "learning_rate": 1.9222511384804355e-05, "loss": 0.8611, "step": 2656 }, { "epoch": 0.15, "grad_norm": 0.35629856336310844, "learning_rate": 1.9221791812488746e-05, "loss": 0.2504, "step": 2657 }, { "epoch": 0.15, "grad_norm": 0.9970693080774518, "learning_rate": 1.9221071920822882e-05, "loss": 0.6513, "step": 2658 }, { "epoch": 0.15, "grad_norm": 0.7995130307427668, "learning_rate": 1.922035170983169e-05, "loss": 0.5226, "step": 2659 }, { "epoch": 0.15, "grad_norm": 0.3808491244278673, "learning_rate": 1.9219631179540125e-05, "loss": 0.3165, "step": 2660 }, { "epoch": 0.15, "grad_norm": 0.3197238539222895, "learning_rate": 1.921891032997312e-05, "loss": 0.1838, "step": 2661 }, { "epoch": 0.15, "grad_norm": 1.503646870246312, "learning_rate": 1.921818916115565e-05, "loss": 0.794, "step": 2662 }, { "epoch": 0.15, "grad_norm": 0.39543891144707394, "learning_rate": 1.9217467673112685e-05, "loss": 0.2965, "step": 2663 }, { "epoch": 0.15, "grad_norm": 0.9269117583869534, "learning_rate": 1.921674586586921e-05, "loss": 0.4454, "step": 2664 }, { "epoch": 0.15, "grad_norm": 0.4249499526884838, "learning_rate": 1.9216023739450222e-05, "loss": 0.3679, "step": 2665 }, { "epoch": 0.15, "grad_norm": 0.4217638497070136, "learning_rate": 1.921530129388073e-05, "loss": 0.2539, "step": 2666 }, { "epoch": 0.15, "grad_norm": 0.277953569574096, "learning_rate": 1.921457852918575e-05, "loss": 0.1021, "step": 2667 }, { "epoch": 0.15, "grad_norm": 0.44369267599406603, "learning_rate": 1.921385544539031e-05, "loss": 0.3468, "step": 2668 }, { "epoch": 0.15, "grad_norm": 0.4842457706418174, "learning_rate": 1.9213132042519453e-05, "loss": 0.2911, "step": 2669 }, { "epoch": 0.15, "grad_norm": 0.836503549357935, "learning_rate": 1.921240832059823e-05, "loss": 0.376, "step": 2670 }, { "epoch": 0.15, "grad_norm": 0.8464103679581624, "learning_rate": 1.9211684279651703e-05, "loss": 0.5637, "step": 2671 }, { "epoch": 0.15, "grad_norm": 0.4795410800772796, "learning_rate": 1.9210959919704946e-05, "loss": 0.2707, "step": 2672 }, { "epoch": 0.15, "grad_norm": 0.3464321091289899, "learning_rate": 1.9210235240783044e-05, "loss": 0.2455, "step": 2673 }, { "epoch": 0.15, "grad_norm": 1.184053359160218, "learning_rate": 1.920951024291109e-05, "loss": 0.4233, "step": 2674 }, { "epoch": 0.15, "grad_norm": 0.4197906790697942, "learning_rate": 1.9208784926114194e-05, "loss": 0.3018, "step": 2675 }, { "epoch": 0.15, "grad_norm": 0.9864362176828683, "learning_rate": 1.9208059290417468e-05, "loss": 0.5272, "step": 2676 }, { "epoch": 0.15, "grad_norm": 0.42562445797911436, "learning_rate": 1.9207333335846048e-05, "loss": 0.311, "step": 2677 }, { "epoch": 0.15, "grad_norm": 0.4449285311849742, "learning_rate": 1.920660706242507e-05, "loss": 0.2907, "step": 2678 }, { "epoch": 0.15, "grad_norm": 0.4022883840813125, "learning_rate": 1.9205880470179682e-05, "loss": 0.2319, "step": 2679 }, { "epoch": 0.15, "grad_norm": 0.45241222820165394, "learning_rate": 1.9205153559135056e-05, "loss": 0.2982, "step": 2680 }, { "epoch": 0.15, "grad_norm": 0.3837648287989111, "learning_rate": 1.9204426329316354e-05, "loss": 0.28, "step": 2681 }, { "epoch": 0.15, "grad_norm": 1.1774690784929287, "learning_rate": 1.9203698780748765e-05, "loss": 0.5684, "step": 2682 }, { "epoch": 0.15, "grad_norm": 0.9706697136795795, "learning_rate": 1.920297091345748e-05, "loss": 0.4298, "step": 2683 }, { "epoch": 0.15, "grad_norm": 0.44832358661289956, "learning_rate": 1.9202242727467713e-05, "loss": 0.2841, "step": 2684 }, { "epoch": 0.15, "grad_norm": 0.40119273159907204, "learning_rate": 1.9201514222804672e-05, "loss": 0.2904, "step": 2685 }, { "epoch": 0.15, "grad_norm": 0.5340909714198232, "learning_rate": 1.9200785399493592e-05, "loss": 0.3694, "step": 2686 }, { "epoch": 0.15, "grad_norm": 0.6551279965817546, "learning_rate": 1.9200056257559706e-05, "loss": 0.2458, "step": 2687 }, { "epoch": 0.15, "grad_norm": 1.2765480970724306, "learning_rate": 1.9199326797028268e-05, "loss": 0.4481, "step": 2688 }, { "epoch": 0.15, "grad_norm": 0.4896591885875324, "learning_rate": 1.9198597017924543e-05, "loss": 0.3536, "step": 2689 }, { "epoch": 0.15, "grad_norm": 0.3569245432977872, "learning_rate": 1.9197866920273794e-05, "loss": 0.2064, "step": 2690 }, { "epoch": 0.15, "grad_norm": 0.33094841706483585, "learning_rate": 1.919713650410131e-05, "loss": 0.2783, "step": 2691 }, { "epoch": 0.15, "grad_norm": 0.46907427883634445, "learning_rate": 1.9196405769432385e-05, "loss": 0.3683, "step": 2692 }, { "epoch": 0.15, "grad_norm": 0.540396516824427, "learning_rate": 1.9195674716292326e-05, "loss": 0.2609, "step": 2693 }, { "epoch": 0.15, "grad_norm": 0.7820904049432057, "learning_rate": 1.9194943344706444e-05, "loss": 0.423, "step": 2694 }, { "epoch": 0.15, "grad_norm": 1.4341198750989717, "learning_rate": 1.919421165470007e-05, "loss": 0.7056, "step": 2695 }, { "epoch": 0.15, "grad_norm": 0.41243600637594274, "learning_rate": 1.919347964629854e-05, "loss": 0.217, "step": 2696 }, { "epoch": 0.15, "grad_norm": 0.29327859156718405, "learning_rate": 1.919274731952721e-05, "loss": 0.2332, "step": 2697 }, { "epoch": 0.16, "grad_norm": 0.9189693213979191, "learning_rate": 1.919201467441143e-05, "loss": 0.629, "step": 2698 }, { "epoch": 0.16, "grad_norm": 0.46368652568716096, "learning_rate": 1.919128171097658e-05, "loss": 0.3495, "step": 2699 }, { "epoch": 0.16, "grad_norm": 0.594946940268688, "learning_rate": 1.919054842924804e-05, "loss": 0.341, "step": 2700 }, { "epoch": 0.16, "grad_norm": 0.4819190307628047, "learning_rate": 1.9189814829251204e-05, "loss": 0.3548, "step": 2701 }, { "epoch": 0.16, "grad_norm": 0.48033771181806284, "learning_rate": 1.9189080911011474e-05, "loss": 0.3313, "step": 2702 }, { "epoch": 0.16, "grad_norm": 0.38210522382790274, "learning_rate": 1.9188346674554267e-05, "loss": 0.1974, "step": 2703 }, { "epoch": 0.16, "grad_norm": 0.36886884191893365, "learning_rate": 1.918761211990501e-05, "loss": 0.2936, "step": 2704 }, { "epoch": 0.16, "grad_norm": 0.47789844802984893, "learning_rate": 1.918687724708914e-05, "loss": 0.3213, "step": 2705 }, { "epoch": 0.16, "grad_norm": 0.5360226368299564, "learning_rate": 1.918614205613211e-05, "loss": 0.3334, "step": 2706 }, { "epoch": 0.16, "grad_norm": 0.878108287963956, "learning_rate": 1.9185406547059367e-05, "loss": 0.5888, "step": 2707 }, { "epoch": 0.16, "grad_norm": 0.48964078753235296, "learning_rate": 1.91846707198964e-05, "loss": 0.3056, "step": 2708 }, { "epoch": 0.16, "grad_norm": 0.4418702371752559, "learning_rate": 1.9183934574668674e-05, "loss": 0.3089, "step": 2709 }, { "epoch": 0.16, "grad_norm": 0.4469509833893016, "learning_rate": 1.918319811140169e-05, "loss": 0.3066, "step": 2710 }, { "epoch": 0.16, "grad_norm": 0.5447582634426448, "learning_rate": 1.9182461330120952e-05, "loss": 0.3778, "step": 2711 }, { "epoch": 0.16, "grad_norm": 0.38486688340331743, "learning_rate": 1.918172423085197e-05, "loss": 0.3022, "step": 2712 }, { "epoch": 0.16, "grad_norm": 0.3383846109182444, "learning_rate": 1.9180986813620276e-05, "loss": 0.2254, "step": 2713 }, { "epoch": 0.16, "grad_norm": 0.5701648234097344, "learning_rate": 1.9180249078451406e-05, "loss": 0.3499, "step": 2714 }, { "epoch": 0.16, "grad_norm": 0.4083474638888638, "learning_rate": 1.9179511025370902e-05, "loss": 0.3619, "step": 2715 }, { "epoch": 0.16, "grad_norm": 0.41177540040311006, "learning_rate": 1.9178772654404323e-05, "loss": 0.3057, "step": 2716 }, { "epoch": 0.16, "grad_norm": 0.3629090355785191, "learning_rate": 1.9178033965577243e-05, "loss": 0.2618, "step": 2717 }, { "epoch": 0.16, "grad_norm": 0.5828011824693402, "learning_rate": 1.9177294958915246e-05, "loss": 0.4277, "step": 2718 }, { "epoch": 0.16, "grad_norm": 0.44338515108899634, "learning_rate": 1.9176555634443912e-05, "loss": 0.1719, "step": 2719 }, { "epoch": 0.16, "grad_norm": 0.3385222291686445, "learning_rate": 1.9175815992188856e-05, "loss": 0.2838, "step": 2720 }, { "epoch": 0.16, "grad_norm": 0.5432606412549738, "learning_rate": 1.9175076032175685e-05, "loss": 0.3732, "step": 2721 }, { "epoch": 0.16, "grad_norm": 0.5378990298129169, "learning_rate": 1.9174335754430026e-05, "loss": 0.4871, "step": 2722 }, { "epoch": 0.16, "grad_norm": 0.544793784552123, "learning_rate": 1.9173595158977515e-05, "loss": 0.2266, "step": 2723 }, { "epoch": 0.16, "grad_norm": 0.34300168100501205, "learning_rate": 1.9172854245843796e-05, "loss": 0.3132, "step": 2724 }, { "epoch": 0.16, "grad_norm": 0.35438645648218475, "learning_rate": 1.917211301505453e-05, "loss": 0.2455, "step": 2725 }, { "epoch": 0.16, "grad_norm": 0.37041691780405817, "learning_rate": 1.9171371466635385e-05, "loss": 0.1947, "step": 2726 }, { "epoch": 0.16, "grad_norm": 0.41229526848301834, "learning_rate": 1.9170629600612044e-05, "loss": 0.3676, "step": 2727 }, { "epoch": 0.16, "grad_norm": 0.3968209388434555, "learning_rate": 1.916988741701019e-05, "loss": 0.3847, "step": 2728 }, { "epoch": 0.16, "grad_norm": 0.45513126229457, "learning_rate": 1.9169144915855532e-05, "loss": 0.1219, "step": 2729 }, { "epoch": 0.16, "grad_norm": 0.4630940115074412, "learning_rate": 1.9168402097173774e-05, "loss": 0.3857, "step": 2730 }, { "epoch": 0.16, "grad_norm": 0.31886087385947326, "learning_rate": 1.916765896099065e-05, "loss": 0.1737, "step": 2731 }, { "epoch": 0.16, "grad_norm": 0.38274972925488704, "learning_rate": 1.916691550733189e-05, "loss": 0.2351, "step": 2732 }, { "epoch": 0.16, "grad_norm": 0.3942346069361589, "learning_rate": 1.9166171736223244e-05, "loss": 0.3809, "step": 2733 }, { "epoch": 0.16, "grad_norm": 0.7779362864741345, "learning_rate": 1.9165427647690457e-05, "loss": 0.5735, "step": 2734 }, { "epoch": 0.16, "grad_norm": 0.7486116425794241, "learning_rate": 1.916468324175931e-05, "loss": 0.5144, "step": 2735 }, { "epoch": 0.16, "grad_norm": 0.4035043380202906, "learning_rate": 1.9163938518455577e-05, "loss": 0.2557, "step": 2736 }, { "epoch": 0.16, "grad_norm": 0.2841072985025231, "learning_rate": 1.9163193477805042e-05, "loss": 0.2075, "step": 2737 }, { "epoch": 0.16, "grad_norm": 0.4792294242182115, "learning_rate": 1.9162448119833515e-05, "loss": 0.4132, "step": 2738 }, { "epoch": 0.16, "grad_norm": 0.440562055294282, "learning_rate": 1.9161702444566803e-05, "loss": 0.3153, "step": 2739 }, { "epoch": 0.16, "grad_norm": 0.43331696516541285, "learning_rate": 1.9160956452030728e-05, "loss": 0.3573, "step": 2740 }, { "epoch": 0.16, "grad_norm": 0.7305226944821447, "learning_rate": 1.9160210142251127e-05, "loss": 0.4819, "step": 2741 }, { "epoch": 0.16, "grad_norm": 0.39263699843117555, "learning_rate": 1.9159463515253842e-05, "loss": 0.2561, "step": 2742 }, { "epoch": 0.16, "grad_norm": 0.28498177963286886, "learning_rate": 1.9158716571064728e-05, "loss": 0.2061, "step": 2743 }, { "epoch": 0.16, "grad_norm": 0.509629190991603, "learning_rate": 1.9157969309709656e-05, "loss": 0.4165, "step": 2744 }, { "epoch": 0.16, "grad_norm": 0.343301559511347, "learning_rate": 1.9157221731214498e-05, "loss": 0.2715, "step": 2745 }, { "epoch": 0.16, "grad_norm": 0.9309225184678585, "learning_rate": 1.9156473835605146e-05, "loss": 0.6432, "step": 2746 }, { "epoch": 0.16, "grad_norm": 0.8504928341821835, "learning_rate": 1.9155725622907496e-05, "loss": 0.5317, "step": 2747 }, { "epoch": 0.16, "grad_norm": 0.3641216283918297, "learning_rate": 1.9154977093147467e-05, "loss": 0.2938, "step": 2748 }, { "epoch": 0.16, "grad_norm": 0.36303716960804644, "learning_rate": 1.915422824635097e-05, "loss": 0.148, "step": 2749 }, { "epoch": 0.16, "grad_norm": 0.6009779568927565, "learning_rate": 1.9153479082543945e-05, "loss": 0.4414, "step": 2750 }, { "epoch": 0.16, "grad_norm": 0.3125761277927421, "learning_rate": 1.9152729601752334e-05, "loss": 0.2688, "step": 2751 }, { "epoch": 0.16, "grad_norm": 0.4369004478121061, "learning_rate": 1.9151979804002086e-05, "loss": 0.2762, "step": 2752 }, { "epoch": 0.16, "grad_norm": 0.8335443101192821, "learning_rate": 1.9151229689319177e-05, "loss": 0.5237, "step": 2753 }, { "epoch": 0.16, "grad_norm": 0.41561726711084734, "learning_rate": 1.9150479257729576e-05, "loss": 0.3092, "step": 2754 }, { "epoch": 0.16, "grad_norm": 0.9114026808149253, "learning_rate": 1.9149728509259268e-05, "loss": 0.3958, "step": 2755 }, { "epoch": 0.16, "grad_norm": 0.37242842792677777, "learning_rate": 1.9148977443934257e-05, "loss": 0.313, "step": 2756 }, { "epoch": 0.16, "grad_norm": 0.45442449929965356, "learning_rate": 1.914822606178055e-05, "loss": 0.328, "step": 2757 }, { "epoch": 0.16, "grad_norm": 0.29522579252993075, "learning_rate": 1.914747436282417e-05, "loss": 0.1609, "step": 2758 }, { "epoch": 0.16, "grad_norm": 0.5405467495766618, "learning_rate": 1.9146722347091145e-05, "loss": 0.385, "step": 2759 }, { "epoch": 0.16, "grad_norm": 0.3952204533027063, "learning_rate": 1.9145970014607517e-05, "loss": 0.297, "step": 2760 }, { "epoch": 0.16, "grad_norm": 1.4252943635395237, "learning_rate": 1.914521736539934e-05, "loss": 0.5132, "step": 2761 }, { "epoch": 0.16, "grad_norm": 0.42261706849570274, "learning_rate": 1.9144464399492682e-05, "loss": 0.2417, "step": 2762 }, { "epoch": 0.16, "grad_norm": 0.3112984251374848, "learning_rate": 1.9143711116913614e-05, "loss": 0.2201, "step": 2763 }, { "epoch": 0.16, "grad_norm": 0.5503114825951091, "learning_rate": 1.9142957517688226e-05, "loss": 0.3508, "step": 2764 }, { "epoch": 0.16, "grad_norm": 2.290525038080281, "learning_rate": 1.9142203601842607e-05, "loss": 0.626, "step": 2765 }, { "epoch": 0.16, "grad_norm": 0.4340454110617708, "learning_rate": 1.9141449369402873e-05, "loss": 0.305, "step": 2766 }, { "epoch": 0.16, "grad_norm": 1.0459985522737103, "learning_rate": 1.914069482039514e-05, "loss": 0.5296, "step": 2767 }, { "epoch": 0.16, "grad_norm": 0.4969749645117052, "learning_rate": 1.913993995484554e-05, "loss": 0.2863, "step": 2768 }, { "epoch": 0.16, "grad_norm": 0.40985606509878225, "learning_rate": 1.913918477278021e-05, "loss": 0.2643, "step": 2769 }, { "epoch": 0.16, "grad_norm": 0.6034420793845431, "learning_rate": 1.9138429274225306e-05, "loss": 0.3003, "step": 2770 }, { "epoch": 0.16, "grad_norm": 0.9476623773086797, "learning_rate": 1.913767345920699e-05, "loss": 0.2913, "step": 2771 }, { "epoch": 0.16, "grad_norm": 0.7160270392478728, "learning_rate": 1.9136917327751433e-05, "loss": 0.3226, "step": 2772 }, { "epoch": 0.16, "grad_norm": 1.8619252386421343, "learning_rate": 1.913616087988482e-05, "loss": 0.4995, "step": 2773 }, { "epoch": 0.16, "grad_norm": 1.1457060818764169, "learning_rate": 1.9135404115633354e-05, "loss": 0.6252, "step": 2774 }, { "epoch": 0.16, "grad_norm": 0.4366886822707938, "learning_rate": 1.9134647035023233e-05, "loss": 0.2177, "step": 2775 }, { "epoch": 0.16, "grad_norm": 0.37860818327377915, "learning_rate": 1.913388963808068e-05, "loss": 0.2423, "step": 2776 }, { "epoch": 0.16, "grad_norm": 0.9684838256494043, "learning_rate": 1.9133131924831917e-05, "loss": 0.4987, "step": 2777 }, { "epoch": 0.16, "grad_norm": 0.8367814228740066, "learning_rate": 1.9132373895303193e-05, "loss": 0.2736, "step": 2778 }, { "epoch": 0.16, "grad_norm": 1.2098389822749138, "learning_rate": 1.9131615549520752e-05, "loss": 0.4306, "step": 2779 }, { "epoch": 0.16, "grad_norm": 0.8197725081398024, "learning_rate": 1.913085688751086e-05, "loss": 0.3871, "step": 2780 }, { "epoch": 0.16, "grad_norm": 0.49208863169300127, "learning_rate": 1.913009790929978e-05, "loss": 0.2306, "step": 2781 }, { "epoch": 0.16, "grad_norm": 0.3526381294259932, "learning_rate": 1.9129338614913808e-05, "loss": 0.2522, "step": 2782 }, { "epoch": 0.16, "grad_norm": 0.4756058297463948, "learning_rate": 1.912857900437923e-05, "loss": 0.326, "step": 2783 }, { "epoch": 0.16, "grad_norm": 0.45837754491678995, "learning_rate": 1.9127819077722353e-05, "loss": 0.3238, "step": 2784 }, { "epoch": 0.16, "grad_norm": 1.4324033883404848, "learning_rate": 1.9127058834969494e-05, "loss": 0.4619, "step": 2785 }, { "epoch": 0.16, "grad_norm": 1.0480337876957362, "learning_rate": 1.9126298276146982e-05, "loss": 0.5592, "step": 2786 }, { "epoch": 0.16, "grad_norm": 0.41564243767044307, "learning_rate": 1.912553740128115e-05, "loss": 0.2959, "step": 2787 }, { "epoch": 0.16, "grad_norm": 0.2892471983222418, "learning_rate": 1.9124776210398354e-05, "loss": 0.1853, "step": 2788 }, { "epoch": 0.16, "grad_norm": 0.7875199409762493, "learning_rate": 1.9124014703524946e-05, "loss": 0.5366, "step": 2789 }, { "epoch": 0.16, "grad_norm": 0.4263502403630036, "learning_rate": 1.9123252880687303e-05, "loss": 0.3377, "step": 2790 }, { "epoch": 0.16, "grad_norm": 0.5112805395238533, "learning_rate": 1.9122490741911806e-05, "loss": 0.2663, "step": 2791 }, { "epoch": 0.16, "grad_norm": 0.525359587134657, "learning_rate": 1.9121728287224844e-05, "loss": 0.4025, "step": 2792 }, { "epoch": 0.16, "grad_norm": 0.4425050759597999, "learning_rate": 1.9120965516652828e-05, "loss": 0.2785, "step": 2793 }, { "epoch": 0.16, "grad_norm": 0.2788256255576857, "learning_rate": 1.912020243022217e-05, "loss": 0.1893, "step": 2794 }, { "epoch": 0.16, "grad_norm": 0.47566417935386807, "learning_rate": 1.911943902795929e-05, "loss": 0.3708, "step": 2795 }, { "epoch": 0.16, "grad_norm": 0.47443133106229063, "learning_rate": 1.9118675309890628e-05, "loss": 0.3175, "step": 2796 }, { "epoch": 0.16, "grad_norm": 0.5872484241415183, "learning_rate": 1.911791127604263e-05, "loss": 0.4337, "step": 2797 }, { "epoch": 0.16, "grad_norm": 0.9705192208886395, "learning_rate": 1.9117146926441757e-05, "loss": 0.3478, "step": 2798 }, { "epoch": 0.16, "grad_norm": 0.3903267981435643, "learning_rate": 1.9116382261114484e-05, "loss": 0.2852, "step": 2799 }, { "epoch": 0.16, "grad_norm": 0.3265402016275641, "learning_rate": 1.911561728008728e-05, "loss": 0.2788, "step": 2800 }, { "epoch": 0.16, "grad_norm": 0.7350955291555213, "learning_rate": 1.9114851983386646e-05, "loss": 0.4241, "step": 2801 }, { "epoch": 0.16, "grad_norm": 0.4414743371523098, "learning_rate": 1.9114086371039078e-05, "loss": 0.3136, "step": 2802 }, { "epoch": 0.16, "grad_norm": 0.4236622091499315, "learning_rate": 1.911332044307109e-05, "loss": 0.3239, "step": 2803 }, { "epoch": 0.16, "grad_norm": 0.5945493542557609, "learning_rate": 1.9112554199509207e-05, "loss": 0.3062, "step": 2804 }, { "epoch": 0.16, "grad_norm": 0.4080833519256838, "learning_rate": 1.911178764037996e-05, "loss": 0.2928, "step": 2805 }, { "epoch": 0.16, "grad_norm": 0.803133176210868, "learning_rate": 1.9111020765709905e-05, "loss": 0.5323, "step": 2806 }, { "epoch": 0.16, "grad_norm": 0.4639683487323455, "learning_rate": 1.9110253575525593e-05, "loss": 0.3419, "step": 2807 }, { "epoch": 0.16, "grad_norm": 0.48221297823220266, "learning_rate": 1.910948606985359e-05, "loss": 0.3114, "step": 2808 }, { "epoch": 0.16, "grad_norm": 0.4188949921186524, "learning_rate": 1.9108718248720472e-05, "loss": 0.2776, "step": 2809 }, { "epoch": 0.16, "grad_norm": 0.4180698257150617, "learning_rate": 1.9107950112152838e-05, "loss": 0.2458, "step": 2810 }, { "epoch": 0.16, "grad_norm": 0.33142474909538355, "learning_rate": 1.910718166017728e-05, "loss": 0.2246, "step": 2811 }, { "epoch": 0.16, "grad_norm": 0.3976551718650196, "learning_rate": 1.910641289282041e-05, "loss": 0.3871, "step": 2812 }, { "epoch": 0.16, "grad_norm": 0.754813347285477, "learning_rate": 1.910564381010886e-05, "loss": 0.5863, "step": 2813 }, { "epoch": 0.16, "grad_norm": 0.4022736855798634, "learning_rate": 1.9104874412069253e-05, "loss": 0.2171, "step": 2814 }, { "epoch": 0.16, "grad_norm": 0.32538048947625997, "learning_rate": 1.9104104698728235e-05, "loss": 0.2844, "step": 2815 }, { "epoch": 0.16, "grad_norm": 0.4742319547411235, "learning_rate": 1.9103334670112468e-05, "loss": 0.3209, "step": 2816 }, { "epoch": 0.16, "grad_norm": 0.3539327872330429, "learning_rate": 1.9102564326248608e-05, "loss": 0.1927, "step": 2817 }, { "epoch": 0.16, "grad_norm": 0.5182586066974086, "learning_rate": 1.910179366716334e-05, "loss": 0.4189, "step": 2818 }, { "epoch": 0.16, "grad_norm": 0.46291557914978526, "learning_rate": 1.9101022692883348e-05, "loss": 0.3427, "step": 2819 }, { "epoch": 0.16, "grad_norm": 0.3223453767011489, "learning_rate": 1.910025140343533e-05, "loss": 0.2285, "step": 2820 }, { "epoch": 0.16, "grad_norm": 0.4671981056244132, "learning_rate": 1.9099479798845997e-05, "loss": 0.2945, "step": 2821 }, { "epoch": 0.16, "grad_norm": 0.48019689418661415, "learning_rate": 1.9098707879142072e-05, "loss": 0.3136, "step": 2822 }, { "epoch": 0.16, "grad_norm": 0.4030849015440731, "learning_rate": 1.9097935644350284e-05, "loss": 0.2743, "step": 2823 }, { "epoch": 0.16, "grad_norm": 0.5656401007116433, "learning_rate": 1.9097163094497374e-05, "loss": 0.349, "step": 2824 }, { "epoch": 0.16, "grad_norm": 0.6376831416506765, "learning_rate": 1.9096390229610095e-05, "loss": 0.557, "step": 2825 }, { "epoch": 0.16, "grad_norm": 0.4153453663473684, "learning_rate": 1.9095617049715217e-05, "loss": 0.3185, "step": 2826 }, { "epoch": 0.16, "grad_norm": 0.34493890499970015, "learning_rate": 1.9094843554839513e-05, "loss": 0.2722, "step": 2827 }, { "epoch": 0.16, "grad_norm": 0.3024656467389478, "learning_rate": 1.9094069745009766e-05, "loss": 0.2226, "step": 2828 }, { "epoch": 0.16, "grad_norm": 0.7295724146360978, "learning_rate": 1.9093295620252776e-05, "loss": 0.4354, "step": 2829 }, { "epoch": 0.16, "grad_norm": 0.3640806422459361, "learning_rate": 1.9092521180595347e-05, "loss": 0.2594, "step": 2830 }, { "epoch": 0.16, "grad_norm": 0.3788662395591776, "learning_rate": 1.9091746426064303e-05, "loss": 0.3483, "step": 2831 }, { "epoch": 0.16, "grad_norm": 0.5866543462727462, "learning_rate": 1.9090971356686473e-05, "loss": 0.3439, "step": 2832 }, { "epoch": 0.16, "grad_norm": 0.2907543494961357, "learning_rate": 1.909019597248869e-05, "loss": 0.2154, "step": 2833 }, { "epoch": 0.16, "grad_norm": 0.440487120983383, "learning_rate": 1.9089420273497813e-05, "loss": 0.2879, "step": 2834 }, { "epoch": 0.16, "grad_norm": 0.3480069803830963, "learning_rate": 1.9088644259740708e-05, "loss": 0.3217, "step": 2835 }, { "epoch": 0.16, "grad_norm": 0.5148676360714839, "learning_rate": 1.9087867931244238e-05, "loss": 0.364, "step": 2836 }, { "epoch": 0.16, "grad_norm": 0.9445440875281329, "learning_rate": 1.9087091288035293e-05, "loss": 0.4219, "step": 2837 }, { "epoch": 0.16, "grad_norm": 0.6342925351854132, "learning_rate": 1.908631433014077e-05, "loss": 0.4129, "step": 2838 }, { "epoch": 0.16, "grad_norm": 0.32986836933112085, "learning_rate": 1.9085537057587568e-05, "loss": 0.3206, "step": 2839 }, { "epoch": 0.16, "grad_norm": 0.5347354807326525, "learning_rate": 1.9084759470402612e-05, "loss": 0.247, "step": 2840 }, { "epoch": 0.16, "grad_norm": 0.4500849622065172, "learning_rate": 1.9083981568612828e-05, "loss": 0.3415, "step": 2841 }, { "epoch": 0.16, "grad_norm": 0.4047582580502225, "learning_rate": 1.9083203352245148e-05, "loss": 0.255, "step": 2842 }, { "epoch": 0.16, "grad_norm": 0.4042265707783651, "learning_rate": 1.9082424821326532e-05, "loss": 0.3031, "step": 2843 }, { "epoch": 0.16, "grad_norm": 0.4400449057120318, "learning_rate": 1.9081645975883928e-05, "loss": 0.325, "step": 2844 }, { "epoch": 0.16, "grad_norm": 0.5280907990199353, "learning_rate": 1.908086681594432e-05, "loss": 0.3971, "step": 2845 }, { "epoch": 0.16, "grad_norm": 0.4402381574442051, "learning_rate": 1.908008734153468e-05, "loss": 0.3617, "step": 2846 }, { "epoch": 0.16, "grad_norm": 0.34365348876661916, "learning_rate": 1.9079307552682013e-05, "loss": 0.2332, "step": 2847 }, { "epoch": 0.16, "grad_norm": 0.3630634485960283, "learning_rate": 1.907852744941331e-05, "loss": 0.2653, "step": 2848 }, { "epoch": 0.16, "grad_norm": 0.9110965112820536, "learning_rate": 1.9077747031755594e-05, "loss": 0.6379, "step": 2849 }, { "epoch": 0.16, "grad_norm": 0.4468249874232147, "learning_rate": 1.9076966299735887e-05, "loss": 0.1208, "step": 2850 }, { "epoch": 0.16, "grad_norm": 0.3606662643476382, "learning_rate": 1.9076185253381227e-05, "loss": 0.2985, "step": 2851 }, { "epoch": 0.16, "grad_norm": 0.6445782349707652, "learning_rate": 1.9075403892718664e-05, "loss": 0.533, "step": 2852 }, { "epoch": 0.16, "grad_norm": 0.3657364361197772, "learning_rate": 1.9074622217775253e-05, "loss": 0.1565, "step": 2853 }, { "epoch": 0.16, "grad_norm": 0.352593659165976, "learning_rate": 1.9073840228578068e-05, "loss": 0.2859, "step": 2854 }, { "epoch": 0.16, "grad_norm": 0.4152158108112304, "learning_rate": 1.9073057925154184e-05, "loss": 0.3108, "step": 2855 }, { "epoch": 0.16, "grad_norm": 0.5767800434899424, "learning_rate": 1.9072275307530692e-05, "loss": 0.2169, "step": 2856 }, { "epoch": 0.16, "grad_norm": 0.406189229853736, "learning_rate": 1.9071492375734698e-05, "loss": 0.3657, "step": 2857 }, { "epoch": 0.16, "grad_norm": 0.7434134856812202, "learning_rate": 1.9070709129793313e-05, "loss": 0.536, "step": 2858 }, { "epoch": 0.16, "grad_norm": 0.30162267512857593, "learning_rate": 1.906992556973366e-05, "loss": 0.1912, "step": 2859 }, { "epoch": 0.16, "grad_norm": 0.360668147936046, "learning_rate": 1.906914169558288e-05, "loss": 0.2589, "step": 2860 }, { "epoch": 0.16, "grad_norm": 0.8302107796578109, "learning_rate": 1.9068357507368108e-05, "loss": 0.6726, "step": 2861 }, { "epoch": 0.16, "grad_norm": 0.43014673007102716, "learning_rate": 1.9067573005116506e-05, "loss": 0.3113, "step": 2862 }, { "epoch": 0.16, "grad_norm": 0.351210921130384, "learning_rate": 1.9066788188855237e-05, "loss": 0.2871, "step": 2863 }, { "epoch": 0.16, "grad_norm": 0.8047851678299188, "learning_rate": 1.906600305861149e-05, "loss": 0.4959, "step": 2864 }, { "epoch": 0.16, "grad_norm": 0.47323207583686905, "learning_rate": 1.906521761441244e-05, "loss": 0.2408, "step": 2865 }, { "epoch": 0.16, "grad_norm": 0.3269727310027329, "learning_rate": 1.90644318562853e-05, "loss": 0.1871, "step": 2866 }, { "epoch": 0.16, "grad_norm": 0.4722268465786524, "learning_rate": 1.9063645784257274e-05, "loss": 0.3434, "step": 2867 }, { "epoch": 0.16, "grad_norm": 0.8141264243435448, "learning_rate": 1.906285939835558e-05, "loss": 0.5588, "step": 2868 }, { "epoch": 0.16, "grad_norm": 0.3784784877528985, "learning_rate": 1.9062072698607457e-05, "loss": 0.2705, "step": 2869 }, { "epoch": 0.16, "grad_norm": 0.6574080394539428, "learning_rate": 1.9061285685040148e-05, "loss": 0.3812, "step": 2870 }, { "epoch": 0.16, "grad_norm": 0.4694759377072518, "learning_rate": 1.9060498357680905e-05, "loss": 0.3247, "step": 2871 }, { "epoch": 0.17, "grad_norm": 0.3590925137545595, "learning_rate": 1.905971071655699e-05, "loss": 0.243, "step": 2872 }, { "epoch": 0.17, "grad_norm": 0.43092525195270276, "learning_rate": 1.9058922761695684e-05, "loss": 0.1675, "step": 2873 }, { "epoch": 0.17, "grad_norm": 0.509367055075115, "learning_rate": 1.9058134493124275e-05, "loss": 0.3637, "step": 2874 }, { "epoch": 0.17, "grad_norm": 0.3456206104797771, "learning_rate": 1.9057345910870054e-05, "loss": 0.3076, "step": 2875 }, { "epoch": 0.17, "grad_norm": 1.0697084543942041, "learning_rate": 1.905655701496034e-05, "loss": 0.3674, "step": 2876 }, { "epoch": 0.17, "grad_norm": 0.40182912681545313, "learning_rate": 1.9055767805422438e-05, "loss": 0.2836, "step": 2877 }, { "epoch": 0.17, "grad_norm": 0.473138542860798, "learning_rate": 1.905497828228369e-05, "loss": 0.3122, "step": 2878 }, { "epoch": 0.17, "grad_norm": 0.3813765291907576, "learning_rate": 1.9054188445571435e-05, "loss": 0.2404, "step": 2879 }, { "epoch": 0.17, "grad_norm": 0.7796531744569375, "learning_rate": 1.905339829531302e-05, "loss": 0.448, "step": 2880 }, { "epoch": 0.17, "grad_norm": 0.42795114627019815, "learning_rate": 1.9052607831535812e-05, "loss": 0.2856, "step": 2881 }, { "epoch": 0.17, "grad_norm": 0.4012186112644199, "learning_rate": 1.9051817054267184e-05, "loss": 0.3072, "step": 2882 }, { "epoch": 0.17, "grad_norm": 0.748819417416899, "learning_rate": 1.9051025963534526e-05, "loss": 0.4127, "step": 2883 }, { "epoch": 0.17, "grad_norm": 0.41468273696256386, "learning_rate": 1.9050234559365223e-05, "loss": 0.3054, "step": 2884 }, { "epoch": 0.17, "grad_norm": 0.5002904393428474, "learning_rate": 1.904944284178669e-05, "loss": 0.3061, "step": 2885 }, { "epoch": 0.17, "grad_norm": 0.4156710785103392, "learning_rate": 1.9048650810826333e-05, "loss": 0.3083, "step": 2886 }, { "epoch": 0.17, "grad_norm": 0.40806478413793273, "learning_rate": 1.9047858466511594e-05, "loss": 0.3049, "step": 2887 }, { "epoch": 0.17, "grad_norm": 0.4128113081838106, "learning_rate": 1.9047065808869902e-05, "loss": 0.2899, "step": 2888 }, { "epoch": 0.17, "grad_norm": 0.3442654178253383, "learning_rate": 1.9046272837928713e-05, "loss": 0.1134, "step": 2889 }, { "epoch": 0.17, "grad_norm": 0.3907830303341359, "learning_rate": 1.9045479553715482e-05, "loss": 0.2846, "step": 2890 }, { "epoch": 0.17, "grad_norm": 0.5828855253121485, "learning_rate": 1.9044685956257686e-05, "loss": 0.4289, "step": 2891 }, { "epoch": 0.17, "grad_norm": 0.514484579636209, "learning_rate": 1.9043892045582804e-05, "loss": 0.3811, "step": 2892 }, { "epoch": 0.17, "grad_norm": 0.3266743076958912, "learning_rate": 1.9043097821718327e-05, "loss": 0.2815, "step": 2893 }, { "epoch": 0.17, "grad_norm": 0.5032500743515141, "learning_rate": 1.9042303284691762e-05, "loss": 0.3833, "step": 2894 }, { "epoch": 0.17, "grad_norm": 0.4345386460910853, "learning_rate": 1.9041508434530622e-05, "loss": 0.2607, "step": 2895 }, { "epoch": 0.17, "grad_norm": 0.41878196971218673, "learning_rate": 1.9040713271262438e-05, "loss": 0.2993, "step": 2896 }, { "epoch": 0.17, "grad_norm": 0.8099840083560745, "learning_rate": 1.9039917794914736e-05, "loss": 0.5032, "step": 2897 }, { "epoch": 0.17, "grad_norm": 0.42788121734475587, "learning_rate": 1.9039122005515074e-05, "loss": 0.3714, "step": 2898 }, { "epoch": 0.17, "grad_norm": 0.3654518107930367, "learning_rate": 1.9038325903091003e-05, "loss": 0.218, "step": 2899 }, { "epoch": 0.17, "grad_norm": 0.37284915313610173, "learning_rate": 1.90375294876701e-05, "loss": 0.2112, "step": 2900 }, { "epoch": 0.17, "grad_norm": 1.25242807424076, "learning_rate": 1.9036732759279935e-05, "loss": 0.6335, "step": 2901 }, { "epoch": 0.17, "grad_norm": 0.3850326090104052, "learning_rate": 1.9035935717948102e-05, "loss": 0.2102, "step": 2902 }, { "epoch": 0.17, "grad_norm": 0.6094638643634038, "learning_rate": 1.9035138363702206e-05, "loss": 0.392, "step": 2903 }, { "epoch": 0.17, "grad_norm": 0.9291079961641789, "learning_rate": 1.9034340696569858e-05, "loss": 0.635, "step": 2904 }, { "epoch": 0.17, "grad_norm": 0.6338307726280604, "learning_rate": 1.9033542716578677e-05, "loss": 0.248, "step": 2905 }, { "epoch": 0.17, "grad_norm": 0.3460124838063629, "learning_rate": 1.90327444237563e-05, "loss": 0.2289, "step": 2906 }, { "epoch": 0.17, "grad_norm": 1.568956704643555, "learning_rate": 1.9031945818130373e-05, "loss": 0.7279, "step": 2907 }, { "epoch": 0.17, "grad_norm": 0.4521370738515677, "learning_rate": 1.9031146899728555e-05, "loss": 0.2614, "step": 2908 }, { "epoch": 0.17, "grad_norm": 0.9225487734393911, "learning_rate": 1.9030347668578506e-05, "loss": 0.5389, "step": 2909 }, { "epoch": 0.17, "grad_norm": 0.5007109970727628, "learning_rate": 1.90295481247079e-05, "loss": 0.3537, "step": 2910 }, { "epoch": 0.17, "grad_norm": 0.3697140213680634, "learning_rate": 1.902874826814444e-05, "loss": 0.2837, "step": 2911 }, { "epoch": 0.17, "grad_norm": 0.25441038379523395, "learning_rate": 1.902794809891581e-05, "loss": 0.1146, "step": 2912 }, { "epoch": 0.17, "grad_norm": 0.9960926038338246, "learning_rate": 1.9027147617049727e-05, "loss": 0.5207, "step": 2913 }, { "epoch": 0.17, "grad_norm": 0.49115998471316646, "learning_rate": 1.9026346822573906e-05, "loss": 0.2785, "step": 2914 }, { "epoch": 0.17, "grad_norm": 0.6090023738796487, "learning_rate": 1.902554571551609e-05, "loss": 0.3221, "step": 2915 }, { "epoch": 0.17, "grad_norm": 1.1039710139860757, "learning_rate": 1.902474429590401e-05, "loss": 0.735, "step": 2916 }, { "epoch": 0.17, "grad_norm": 0.49261792202989546, "learning_rate": 1.9023942563765422e-05, "loss": 0.2849, "step": 2917 }, { "epoch": 0.17, "grad_norm": 0.28069746124257844, "learning_rate": 1.9023140519128093e-05, "loss": 0.1992, "step": 2918 }, { "epoch": 0.17, "grad_norm": 1.3144734644202736, "learning_rate": 1.9022338162019794e-05, "loss": 0.5199, "step": 2919 }, { "epoch": 0.17, "grad_norm": 0.5300530829580868, "learning_rate": 1.9021535492468313e-05, "loss": 0.3645, "step": 2920 }, { "epoch": 0.17, "grad_norm": 0.5599830716860857, "learning_rate": 1.9020732510501445e-05, "loss": 0.3213, "step": 2921 }, { "epoch": 0.17, "grad_norm": 0.4318688829311013, "learning_rate": 1.9019929216147002e-05, "loss": 0.3321, "step": 2922 }, { "epoch": 0.17, "grad_norm": 0.40200132881097284, "learning_rate": 1.9019125609432793e-05, "loss": 0.2933, "step": 2923 }, { "epoch": 0.17, "grad_norm": 0.35823983826684375, "learning_rate": 1.9018321690386656e-05, "loss": 0.2411, "step": 2924 }, { "epoch": 0.17, "grad_norm": 1.2091907994532067, "learning_rate": 1.9017517459036426e-05, "loss": 0.4403, "step": 2925 }, { "epoch": 0.17, "grad_norm": 0.44885633440024797, "learning_rate": 1.9016712915409953e-05, "loss": 0.2987, "step": 2926 }, { "epoch": 0.17, "grad_norm": 0.6097786376505031, "learning_rate": 1.90159080595351e-05, "loss": 0.3783, "step": 2927 }, { "epoch": 0.17, "grad_norm": 1.4166969282231294, "learning_rate": 1.901510289143974e-05, "loss": 0.5974, "step": 2928 }, { "epoch": 0.17, "grad_norm": 0.39087853057358346, "learning_rate": 1.901429741115175e-05, "loss": 0.2789, "step": 2929 }, { "epoch": 0.17, "grad_norm": 0.4707325846249934, "learning_rate": 1.901349161869903e-05, "loss": 0.3342, "step": 2930 }, { "epoch": 0.17, "grad_norm": 0.5044461165624786, "learning_rate": 1.9012685514109487e-05, "loss": 0.2705, "step": 2931 }, { "epoch": 0.17, "grad_norm": 0.446221372568942, "learning_rate": 1.9011879097411028e-05, "loss": 0.2971, "step": 2932 }, { "epoch": 0.17, "grad_norm": 0.7000113910533794, "learning_rate": 1.9011072368631586e-05, "loss": 0.4128, "step": 2933 }, { "epoch": 0.17, "grad_norm": 0.6515818280851625, "learning_rate": 1.9010265327799092e-05, "loss": 0.3242, "step": 2934 }, { "epoch": 0.17, "grad_norm": 0.4527641519593625, "learning_rate": 1.90094579749415e-05, "loss": 0.271, "step": 2935 }, { "epoch": 0.17, "grad_norm": 0.34571287724653443, "learning_rate": 1.9008650310086768e-05, "loss": 0.3034, "step": 2936 }, { "epoch": 0.17, "grad_norm": 0.3554877938716127, "learning_rate": 1.900784233326286e-05, "loss": 0.3289, "step": 2937 }, { "epoch": 0.17, "grad_norm": 0.4412511425860835, "learning_rate": 1.9007034044497757e-05, "loss": 0.2159, "step": 2938 }, { "epoch": 0.17, "grad_norm": 0.7452586967068775, "learning_rate": 1.9006225443819456e-05, "loss": 0.4174, "step": 2939 }, { "epoch": 0.17, "grad_norm": 1.353204527219416, "learning_rate": 1.900541653125595e-05, "loss": 0.8481, "step": 2940 }, { "epoch": 0.17, "grad_norm": 0.4632062818338658, "learning_rate": 1.9004607306835263e-05, "loss": 0.218, "step": 2941 }, { "epoch": 0.17, "grad_norm": 0.5341120108017874, "learning_rate": 1.900379777058541e-05, "loss": 0.3449, "step": 2942 }, { "epoch": 0.17, "grad_norm": 0.9366471498287889, "learning_rate": 1.9002987922534427e-05, "loss": 0.5459, "step": 2943 }, { "epoch": 0.17, "grad_norm": 0.27741904876939394, "learning_rate": 1.900217776271036e-05, "loss": 0.1587, "step": 2944 }, { "epoch": 0.17, "grad_norm": 1.3941750294590425, "learning_rate": 1.9001367291141264e-05, "loss": 0.6409, "step": 2945 }, { "epoch": 0.17, "grad_norm": 0.48903414699985137, "learning_rate": 1.9000556507855204e-05, "loss": 0.353, "step": 2946 }, { "epoch": 0.17, "grad_norm": 0.4693605982996768, "learning_rate": 1.8999745412880264e-05, "loss": 0.3303, "step": 2947 }, { "epoch": 0.17, "grad_norm": 0.5832080658668408, "learning_rate": 1.8998934006244522e-05, "loss": 0.3351, "step": 2948 }, { "epoch": 0.17, "grad_norm": 0.5745640754829965, "learning_rate": 1.8998122287976085e-05, "loss": 0.4208, "step": 2949 }, { "epoch": 0.17, "grad_norm": 0.3367288088164626, "learning_rate": 1.899731025810306e-05, "loss": 0.2534, "step": 2950 }, { "epoch": 0.17, "grad_norm": 0.38374292249387476, "learning_rate": 1.8996497916653565e-05, "loss": 0.1868, "step": 2951 }, { "epoch": 0.17, "grad_norm": 1.4710186990718137, "learning_rate": 1.899568526365574e-05, "loss": 0.8454, "step": 2952 }, { "epoch": 0.17, "grad_norm": 0.6019236351354754, "learning_rate": 1.8994872299137715e-05, "loss": 0.462, "step": 2953 }, { "epoch": 0.17, "grad_norm": 0.3551118807958904, "learning_rate": 1.8994059023127655e-05, "loss": 0.2787, "step": 2954 }, { "epoch": 0.17, "grad_norm": 0.4704132793170269, "learning_rate": 1.899324543565371e-05, "loss": 0.3093, "step": 2955 }, { "epoch": 0.17, "grad_norm": 0.37749498137552984, "learning_rate": 1.899243153674407e-05, "loss": 0.1934, "step": 2956 }, { "epoch": 0.17, "grad_norm": 0.4254951358246995, "learning_rate": 1.8991617326426907e-05, "loss": 0.2868, "step": 2957 }, { "epoch": 0.17, "grad_norm": 0.6753680434296535, "learning_rate": 1.8990802804730424e-05, "loss": 0.4295, "step": 2958 }, { "epoch": 0.17, "grad_norm": 0.7358847969565454, "learning_rate": 1.8989987971682828e-05, "loss": 0.3888, "step": 2959 }, { "epoch": 0.17, "grad_norm": 0.40847603026118584, "learning_rate": 1.8989172827312337e-05, "loss": 0.3701, "step": 2960 }, { "epoch": 0.17, "grad_norm": 0.5456246498843511, "learning_rate": 1.8988357371647173e-05, "loss": 0.2735, "step": 2961 }, { "epoch": 0.17, "grad_norm": 0.31106052444760784, "learning_rate": 1.8987541604715584e-05, "loss": 0.2288, "step": 2962 }, { "epoch": 0.17, "grad_norm": 0.36068213668776067, "learning_rate": 1.898672552654581e-05, "loss": 0.2461, "step": 2963 }, { "epoch": 0.17, "grad_norm": 0.9711275231098498, "learning_rate": 1.8985909137166122e-05, "loss": 0.4776, "step": 2964 }, { "epoch": 0.17, "grad_norm": 0.3911974616653507, "learning_rate": 1.8985092436604783e-05, "loss": 0.329, "step": 2965 }, { "epoch": 0.17, "grad_norm": 0.4923858502324332, "learning_rate": 1.8984275424890085e-05, "loss": 0.3115, "step": 2966 }, { "epoch": 0.17, "grad_norm": 0.5945187050292371, "learning_rate": 1.8983458102050313e-05, "loss": 0.4092, "step": 2967 }, { "epoch": 0.17, "grad_norm": 0.24507124970481212, "learning_rate": 1.8982640468113774e-05, "loss": 0.197, "step": 2968 }, { "epoch": 0.17, "grad_norm": 0.5955934559945066, "learning_rate": 1.898182252310878e-05, "loss": 0.4072, "step": 2969 }, { "epoch": 0.17, "grad_norm": 0.429200057143055, "learning_rate": 1.8981004267063658e-05, "loss": 0.3242, "step": 2970 }, { "epoch": 0.17, "grad_norm": 0.5983194080034205, "learning_rate": 1.8980185700006744e-05, "loss": 0.4249, "step": 2971 }, { "epoch": 0.17, "grad_norm": 0.42932383596148743, "learning_rate": 1.8979366821966386e-05, "loss": 0.3293, "step": 2972 }, { "epoch": 0.17, "grad_norm": 0.3954252758600323, "learning_rate": 1.8978547632970943e-05, "loss": 0.3225, "step": 2973 }, { "epoch": 0.17, "grad_norm": 0.30943424037499556, "learning_rate": 1.897772813304878e-05, "loss": 0.0774, "step": 2974 }, { "epoch": 0.17, "grad_norm": 0.2955268319884107, "learning_rate": 1.8976908322228277e-05, "loss": 0.2291, "step": 2975 }, { "epoch": 0.17, "grad_norm": 0.8219325687989213, "learning_rate": 1.897608820053783e-05, "loss": 0.5307, "step": 2976 }, { "epoch": 0.17, "grad_norm": 0.3811432509694952, "learning_rate": 1.8975267768005828e-05, "loss": 0.2864, "step": 2977 }, { "epoch": 0.17, "grad_norm": 0.4285338176452864, "learning_rate": 1.897444702466069e-05, "loss": 0.3331, "step": 2978 }, { "epoch": 0.17, "grad_norm": 0.9754019324573616, "learning_rate": 1.897362597053084e-05, "loss": 0.5513, "step": 2979 }, { "epoch": 0.17, "grad_norm": 0.28062601130507825, "learning_rate": 1.897280460564471e-05, "loss": 0.1539, "step": 2980 }, { "epoch": 0.17, "grad_norm": 0.5016667633376, "learning_rate": 1.897198293003074e-05, "loss": 0.3324, "step": 2981 }, { "epoch": 0.17, "grad_norm": 0.5883370722958858, "learning_rate": 1.8971160943717387e-05, "loss": 0.3618, "step": 2982 }, { "epoch": 0.17, "grad_norm": 0.7596055078716403, "learning_rate": 1.8970338646733112e-05, "loss": 0.3371, "step": 2983 }, { "epoch": 0.17, "grad_norm": 0.34230411328604804, "learning_rate": 1.8969516039106402e-05, "loss": 0.2392, "step": 2984 }, { "epoch": 0.17, "grad_norm": 0.42128346482721546, "learning_rate": 1.8968693120865734e-05, "loss": 0.3327, "step": 2985 }, { "epoch": 0.17, "grad_norm": 0.6798764575206757, "learning_rate": 1.896786989203961e-05, "loss": 0.3974, "step": 2986 }, { "epoch": 0.17, "grad_norm": 0.3784241264767709, "learning_rate": 1.896704635265654e-05, "loss": 0.2136, "step": 2987 }, { "epoch": 0.17, "grad_norm": 1.276853121304933, "learning_rate": 1.8966222502745034e-05, "loss": 0.5163, "step": 2988 }, { "epoch": 0.17, "grad_norm": 0.44923140900964686, "learning_rate": 1.8965398342333632e-05, "loss": 0.3497, "step": 2989 }, { "epoch": 0.17, "grad_norm": 0.2639236438544276, "learning_rate": 1.896457387145087e-05, "loss": 0.1798, "step": 2990 }, { "epoch": 0.17, "grad_norm": 1.1097809048080438, "learning_rate": 1.8963749090125302e-05, "loss": 0.6302, "step": 2991 }, { "epoch": 0.17, "grad_norm": 1.234947244764597, "learning_rate": 1.8962923998385487e-05, "loss": 0.6959, "step": 2992 }, { "epoch": 0.17, "grad_norm": 0.4410351618966857, "learning_rate": 1.896209859626e-05, "loss": 0.258, "step": 2993 }, { "epoch": 0.17, "grad_norm": 0.9236199272405345, "learning_rate": 1.8961272883777424e-05, "loss": 0.4043, "step": 2994 }, { "epoch": 0.17, "grad_norm": 0.8780417499501578, "learning_rate": 1.8960446860966353e-05, "loss": 0.5626, "step": 2995 }, { "epoch": 0.17, "grad_norm": 0.2345376241037861, "learning_rate": 1.895962052785539e-05, "loss": 0.1591, "step": 2996 }, { "epoch": 0.17, "grad_norm": 0.4550241241419985, "learning_rate": 1.895879388447316e-05, "loss": 0.347, "step": 2997 }, { "epoch": 0.17, "grad_norm": 1.0166192508391756, "learning_rate": 1.8957966930848278e-05, "loss": 0.4882, "step": 2998 }, { "epoch": 0.17, "grad_norm": 0.4244725281572897, "learning_rate": 1.8957139667009388e-05, "loss": 0.3237, "step": 2999 }, { "epoch": 0.17, "grad_norm": 0.9454011346453592, "learning_rate": 1.8956312092985135e-05, "loss": 0.453, "step": 3000 }, { "epoch": 0.17, "grad_norm": 0.4287534901474559, "learning_rate": 1.895548420880418e-05, "loss": 0.3222, "step": 3001 }, { "epoch": 0.17, "grad_norm": 0.3976559492680062, "learning_rate": 1.8954656014495193e-05, "loss": 0.2943, "step": 3002 }, { "epoch": 0.17, "grad_norm": 0.256110448009788, "learning_rate": 1.8953827510086855e-05, "loss": 0.128, "step": 3003 }, { "epoch": 0.17, "grad_norm": 0.6654967054817726, "learning_rate": 1.8952998695607848e-05, "loss": 0.4478, "step": 3004 }, { "epoch": 0.17, "grad_norm": 0.4407137936298601, "learning_rate": 1.895216957108689e-05, "loss": 0.2961, "step": 3005 }, { "epoch": 0.17, "grad_norm": 0.4160984346255178, "learning_rate": 1.8951340136552677e-05, "loss": 0.3153, "step": 3006 }, { "epoch": 0.17, "grad_norm": 1.078401739989723, "learning_rate": 1.8950510392033945e-05, "loss": 0.669, "step": 3007 }, { "epoch": 0.17, "grad_norm": 0.33084119777448523, "learning_rate": 1.8949680337559422e-05, "loss": 0.2457, "step": 3008 }, { "epoch": 0.17, "grad_norm": 0.3027583624026009, "learning_rate": 1.8948849973157855e-05, "loss": 0.2139, "step": 3009 }, { "epoch": 0.17, "grad_norm": 0.9818615820212763, "learning_rate": 1.8948019298858e-05, "loss": 0.4861, "step": 3010 }, { "epoch": 0.17, "grad_norm": 0.4073751640836383, "learning_rate": 1.8947188314688614e-05, "loss": 0.3296, "step": 3011 }, { "epoch": 0.17, "grad_norm": 0.7477288026893142, "learning_rate": 1.8946357020678484e-05, "loss": 0.5517, "step": 3012 }, { "epoch": 0.17, "grad_norm": 0.38402672958445916, "learning_rate": 1.89455254168564e-05, "loss": 0.2941, "step": 3013 }, { "epoch": 0.17, "grad_norm": 0.378465393496418, "learning_rate": 1.8944693503251154e-05, "loss": 0.2712, "step": 3014 }, { "epoch": 0.17, "grad_norm": 0.38536621633444945, "learning_rate": 1.8943861279891555e-05, "loss": 0.19, "step": 3015 }, { "epoch": 0.17, "grad_norm": 1.112414636701315, "learning_rate": 1.8943028746806423e-05, "loss": 0.3941, "step": 3016 }, { "epoch": 0.17, "grad_norm": 0.4202034683893572, "learning_rate": 1.8942195904024593e-05, "loss": 0.315, "step": 3017 }, { "epoch": 0.17, "grad_norm": 1.5026074471147242, "learning_rate": 1.89413627515749e-05, "loss": 0.8241, "step": 3018 }, { "epoch": 0.17, "grad_norm": 0.6070421657926213, "learning_rate": 1.89405292894862e-05, "loss": 0.3541, "step": 3019 }, { "epoch": 0.17, "grad_norm": 0.41453425610717626, "learning_rate": 1.8939695517787355e-05, "loss": 0.3023, "step": 3020 }, { "epoch": 0.17, "grad_norm": 0.29993170844835293, "learning_rate": 1.893886143650724e-05, "loss": 0.2491, "step": 3021 }, { "epoch": 0.17, "grad_norm": 0.6600485195325879, "learning_rate": 1.893802704567474e-05, "loss": 0.3919, "step": 3022 }, { "epoch": 0.17, "grad_norm": 0.4849563665909965, "learning_rate": 1.8937192345318745e-05, "loss": 0.2887, "step": 3023 }, { "epoch": 0.17, "grad_norm": 1.121191174841304, "learning_rate": 1.8936357335468164e-05, "loss": 0.7787, "step": 3024 }, { "epoch": 0.17, "grad_norm": 0.4336597655448138, "learning_rate": 1.8935522016151914e-05, "loss": 0.3233, "step": 3025 }, { "epoch": 0.17, "grad_norm": 0.30783811468715805, "learning_rate": 1.8934686387398916e-05, "loss": 0.2047, "step": 3026 }, { "epoch": 0.17, "grad_norm": 0.31539444070821426, "learning_rate": 1.8933850449238118e-05, "loss": 0.2582, "step": 3027 }, { "epoch": 0.17, "grad_norm": 0.7055654305578103, "learning_rate": 1.893301420169846e-05, "loss": 0.4926, "step": 3028 }, { "epoch": 0.17, "grad_norm": 0.3686311554656913, "learning_rate": 1.893217764480891e-05, "loss": 0.2407, "step": 3029 }, { "epoch": 0.17, "grad_norm": 1.1909355193895523, "learning_rate": 1.8931340778598427e-05, "loss": 0.6177, "step": 3030 }, { "epoch": 0.17, "grad_norm": 1.4663985038033136, "learning_rate": 1.8930503603095996e-05, "loss": 0.8403, "step": 3031 }, { "epoch": 0.17, "grad_norm": 0.3190431419463848, "learning_rate": 1.892966611833061e-05, "loss": 0.1833, "step": 3032 }, { "epoch": 0.17, "grad_norm": 0.4511305241542145, "learning_rate": 1.892882832433127e-05, "loss": 0.3602, "step": 3033 }, { "epoch": 0.17, "grad_norm": 0.7068053529043494, "learning_rate": 1.8927990221126992e-05, "loss": 0.3597, "step": 3034 }, { "epoch": 0.17, "grad_norm": 0.42401792713410896, "learning_rate": 1.8927151808746794e-05, "loss": 0.284, "step": 3035 }, { "epoch": 0.17, "grad_norm": 1.0660654062947625, "learning_rate": 1.8926313087219715e-05, "loss": 0.4866, "step": 3036 }, { "epoch": 0.17, "grad_norm": 0.4213915786757873, "learning_rate": 1.8925474056574797e-05, "loss": 0.3367, "step": 3037 }, { "epoch": 0.17, "grad_norm": 0.5059010362430902, "learning_rate": 1.8924634716841095e-05, "loss": 0.3145, "step": 3038 }, { "epoch": 0.17, "grad_norm": 0.6904293276334135, "learning_rate": 1.8923795068047676e-05, "loss": 0.3638, "step": 3039 }, { "epoch": 0.17, "grad_norm": 0.32126069141038616, "learning_rate": 1.892295511022362e-05, "loss": 0.2206, "step": 3040 }, { "epoch": 0.17, "grad_norm": 0.49072173398664753, "learning_rate": 1.8922114843398008e-05, "loss": 0.2853, "step": 3041 }, { "epoch": 0.17, "grad_norm": 0.6178054617716159, "learning_rate": 1.8921274267599948e-05, "loss": 0.3293, "step": 3042 }, { "epoch": 0.17, "grad_norm": 1.3910184013031428, "learning_rate": 1.8920433382858543e-05, "loss": 0.6896, "step": 3043 }, { "epoch": 0.17, "grad_norm": 0.43153226746326395, "learning_rate": 1.8919592189202907e-05, "loss": 0.3513, "step": 3044 }, { "epoch": 0.17, "grad_norm": 0.4102488838420799, "learning_rate": 1.8918750686662182e-05, "loss": 0.3131, "step": 3045 }, { "epoch": 0.18, "grad_norm": 0.47947338773237685, "learning_rate": 1.8917908875265507e-05, "loss": 0.3053, "step": 3046 }, { "epoch": 0.18, "grad_norm": 0.3460541287559192, "learning_rate": 1.8917066755042028e-05, "loss": 0.2311, "step": 3047 }, { "epoch": 0.18, "grad_norm": 0.5716077686683415, "learning_rate": 1.891622432602091e-05, "loss": 0.4332, "step": 3048 }, { "epoch": 0.18, "grad_norm": 0.5029071813624045, "learning_rate": 1.8915381588231327e-05, "loss": 0.2939, "step": 3049 }, { "epoch": 0.18, "grad_norm": 0.44046657183956867, "learning_rate": 1.8914538541702466e-05, "loss": 0.2922, "step": 3050 }, { "epoch": 0.18, "grad_norm": 0.7995531866883153, "learning_rate": 1.8913695186463517e-05, "loss": 0.4911, "step": 3051 }, { "epoch": 0.18, "grad_norm": 0.36615694629870316, "learning_rate": 1.8912851522543687e-05, "loss": 0.2274, "step": 3052 }, { "epoch": 0.18, "grad_norm": 0.32321900527474984, "learning_rate": 1.891200754997219e-05, "loss": 0.2136, "step": 3053 }, { "epoch": 0.18, "grad_norm": 1.2596204295464923, "learning_rate": 1.8911163268778257e-05, "loss": 0.8114, "step": 3054 }, { "epoch": 0.18, "grad_norm": 0.7144167478169268, "learning_rate": 1.891031867899112e-05, "loss": 0.4086, "step": 3055 }, { "epoch": 0.18, "grad_norm": 0.6253671899765448, "learning_rate": 1.8909473780640037e-05, "loss": 0.3703, "step": 3056 }, { "epoch": 0.18, "grad_norm": 0.45134821280279186, "learning_rate": 1.8908628573754254e-05, "loss": 0.3021, "step": 3057 }, { "epoch": 0.18, "grad_norm": 0.22005853180138574, "learning_rate": 1.890778305836305e-05, "loss": 0.1269, "step": 3058 }, { "epoch": 0.18, "grad_norm": 0.4524272668639035, "learning_rate": 1.89069372344957e-05, "loss": 0.264, "step": 3059 }, { "epoch": 0.18, "grad_norm": 0.6209278718230915, "learning_rate": 1.8906091102181495e-05, "loss": 0.4388, "step": 3060 }, { "epoch": 0.18, "grad_norm": 0.4937884114799369, "learning_rate": 1.890524466144974e-05, "loss": 0.3877, "step": 3061 }, { "epoch": 0.18, "grad_norm": 0.41369254008640416, "learning_rate": 1.8904397912329745e-05, "loss": 0.2448, "step": 3062 }, { "epoch": 0.18, "grad_norm": 0.5973610783320861, "learning_rate": 1.8903550854850834e-05, "loss": 0.3712, "step": 3063 }, { "epoch": 0.18, "grad_norm": 0.656951303682737, "learning_rate": 1.890270348904234e-05, "loss": 0.4271, "step": 3064 }, { "epoch": 0.18, "grad_norm": 0.2452544715442981, "learning_rate": 1.8901855814933607e-05, "loss": 0.1632, "step": 3065 }, { "epoch": 0.18, "grad_norm": 0.5677559684846096, "learning_rate": 1.890100783255399e-05, "loss": 0.4578, "step": 3066 }, { "epoch": 0.18, "grad_norm": 0.5891915481483365, "learning_rate": 1.890015954193285e-05, "loss": 0.4746, "step": 3067 }, { "epoch": 0.18, "grad_norm": 0.35663869657398584, "learning_rate": 1.8899310943099573e-05, "loss": 0.2468, "step": 3068 }, { "epoch": 0.18, "grad_norm": 0.5247041604668781, "learning_rate": 1.8898462036083537e-05, "loss": 0.3401, "step": 3069 }, { "epoch": 0.18, "grad_norm": 1.0380567894925528, "learning_rate": 1.8897612820914147e-05, "loss": 0.5755, "step": 3070 }, { "epoch": 0.18, "grad_norm": 0.2670561595530922, "learning_rate": 1.8896763297620805e-05, "loss": 0.1841, "step": 3071 }, { "epoch": 0.18, "grad_norm": 0.9455444266024311, "learning_rate": 1.8895913466232937e-05, "loss": 0.6592, "step": 3072 }, { "epoch": 0.18, "grad_norm": 0.41053313749568826, "learning_rate": 1.8895063326779965e-05, "loss": 0.3663, "step": 3073 }, { "epoch": 0.18, "grad_norm": 0.36267065242506263, "learning_rate": 1.8894212879291332e-05, "loss": 0.2047, "step": 3074 }, { "epoch": 0.18, "grad_norm": 0.4786448101222816, "learning_rate": 1.8893362123796488e-05, "loss": 0.2759, "step": 3075 }, { "epoch": 0.18, "grad_norm": 0.4093384984549816, "learning_rate": 1.88925110603249e-05, "loss": 0.3397, "step": 3076 }, { "epoch": 0.18, "grad_norm": 0.845745579057613, "learning_rate": 1.8891659688906033e-05, "loss": 0.4151, "step": 3077 }, { "epoch": 0.18, "grad_norm": 0.47292178771932475, "learning_rate": 1.8890808009569376e-05, "loss": 0.3051, "step": 3078 }, { "epoch": 0.18, "grad_norm": 0.49182209863150383, "learning_rate": 1.8889956022344414e-05, "loss": 0.3479, "step": 3079 }, { "epoch": 0.18, "grad_norm": 0.46806648165181836, "learning_rate": 1.8889103727260666e-05, "loss": 0.2757, "step": 3080 }, { "epoch": 0.18, "grad_norm": 0.3120414790673563, "learning_rate": 1.888825112434763e-05, "loss": 0.2244, "step": 3081 }, { "epoch": 0.18, "grad_norm": 1.2679215891920346, "learning_rate": 1.8887398213634848e-05, "loss": 0.7125, "step": 3082 }, { "epoch": 0.18, "grad_norm": 0.6403408615039261, "learning_rate": 1.8886544995151844e-05, "loss": 0.38, "step": 3083 }, { "epoch": 0.18, "grad_norm": 0.33151253151833543, "learning_rate": 1.8885691468928166e-05, "loss": 0.2932, "step": 3084 }, { "epoch": 0.18, "grad_norm": 0.7277961331311545, "learning_rate": 1.8884837634993377e-05, "loss": 0.5574, "step": 3085 }, { "epoch": 0.18, "grad_norm": 0.340274101058813, "learning_rate": 1.8883983493377045e-05, "loss": 0.1544, "step": 3086 }, { "epoch": 0.18, "grad_norm": 0.4429892097106967, "learning_rate": 1.8883129044108744e-05, "loss": 0.3517, "step": 3087 }, { "epoch": 0.18, "grad_norm": 0.3928554684234481, "learning_rate": 1.8882274287218067e-05, "loss": 0.2741, "step": 3088 }, { "epoch": 0.18, "grad_norm": 0.4743146801940653, "learning_rate": 1.8881419222734615e-05, "loss": 0.3134, "step": 3089 }, { "epoch": 0.18, "grad_norm": 0.5239268770428587, "learning_rate": 1.8880563850687995e-05, "loss": 0.4167, "step": 3090 }, { "epoch": 0.18, "grad_norm": 0.8884943070873123, "learning_rate": 1.8879708171107828e-05, "loss": 0.4566, "step": 3091 }, { "epoch": 0.18, "grad_norm": 0.5371132744319287, "learning_rate": 1.8878852184023754e-05, "loss": 0.3002, "step": 3092 }, { "epoch": 0.18, "grad_norm": 0.32469681817681595, "learning_rate": 1.887799588946541e-05, "loss": 0.2159, "step": 3093 }, { "epoch": 0.18, "grad_norm": 1.2594353839536387, "learning_rate": 1.8877139287462446e-05, "loss": 0.3695, "step": 3094 }, { "epoch": 0.18, "grad_norm": 0.6393985107547493, "learning_rate": 1.8876282378044535e-05, "loss": 0.4849, "step": 3095 }, { "epoch": 0.18, "grad_norm": 0.3707643080188264, "learning_rate": 1.8875425161241345e-05, "loss": 0.3116, "step": 3096 }, { "epoch": 0.18, "grad_norm": 0.5094016079937749, "learning_rate": 1.887456763708256e-05, "loss": 0.3134, "step": 3097 }, { "epoch": 0.18, "grad_norm": 0.2347285327171995, "learning_rate": 1.8873709805597884e-05, "loss": 0.1598, "step": 3098 }, { "epoch": 0.18, "grad_norm": 0.40752913954631803, "learning_rate": 1.8872851666817017e-05, "loss": 0.3338, "step": 3099 }, { "epoch": 0.18, "grad_norm": 0.5350281103112107, "learning_rate": 1.887199322076968e-05, "loss": 0.3887, "step": 3100 }, { "epoch": 0.18, "grad_norm": 0.6124219414870147, "learning_rate": 1.8871134467485597e-05, "loss": 0.2902, "step": 3101 }, { "epoch": 0.18, "grad_norm": 0.5018307054344358, "learning_rate": 1.8870275406994513e-05, "loss": 0.3797, "step": 3102 }, { "epoch": 0.18, "grad_norm": 1.1961520940052548, "learning_rate": 1.886941603932617e-05, "loss": 0.7538, "step": 3103 }, { "epoch": 0.18, "grad_norm": 0.32363681224514806, "learning_rate": 1.886855636451033e-05, "loss": 0.2232, "step": 3104 }, { "epoch": 0.18, "grad_norm": 0.4023577478846728, "learning_rate": 1.8867696382576767e-05, "loss": 0.2839, "step": 3105 }, { "epoch": 0.18, "grad_norm": 0.437139177965491, "learning_rate": 1.886683609355526e-05, "loss": 0.3293, "step": 3106 }, { "epoch": 0.18, "grad_norm": 0.6121045236750057, "learning_rate": 1.8865975497475596e-05, "loss": 0.333, "step": 3107 }, { "epoch": 0.18, "grad_norm": 0.4427691977419533, "learning_rate": 1.8865114594367585e-05, "loss": 0.3435, "step": 3108 }, { "epoch": 0.18, "grad_norm": 0.6110743595027336, "learning_rate": 1.8864253384261036e-05, "loss": 0.4338, "step": 3109 }, { "epoch": 0.18, "grad_norm": 0.580897320895251, "learning_rate": 1.8863391867185774e-05, "loss": 0.3527, "step": 3110 }, { "epoch": 0.18, "grad_norm": 0.29260609606365173, "learning_rate": 1.8862530043171633e-05, "loss": 0.1696, "step": 3111 }, { "epoch": 0.18, "grad_norm": 0.4524691334794982, "learning_rate": 1.8861667912248456e-05, "loss": 0.3644, "step": 3112 }, { "epoch": 0.18, "grad_norm": 0.8502053958943329, "learning_rate": 1.8860805474446103e-05, "loss": 0.5187, "step": 3113 }, { "epoch": 0.18, "grad_norm": 0.34026675743767276, "learning_rate": 1.8859942729794433e-05, "loss": 0.2207, "step": 3114 }, { "epoch": 0.18, "grad_norm": 0.5803398218349507, "learning_rate": 1.885907967832333e-05, "loss": 0.4215, "step": 3115 }, { "epoch": 0.18, "grad_norm": 0.5413184538995245, "learning_rate": 1.885821632006268e-05, "loss": 0.385, "step": 3116 }, { "epoch": 0.18, "grad_norm": 0.267538412442085, "learning_rate": 1.8857352655042378e-05, "loss": 0.1605, "step": 3117 }, { "epoch": 0.18, "grad_norm": 0.9481877105032955, "learning_rate": 1.885648868329234e-05, "loss": 0.5323, "step": 3118 }, { "epoch": 0.18, "grad_norm": 0.7094437605080949, "learning_rate": 1.8855624404842472e-05, "loss": 0.4595, "step": 3119 }, { "epoch": 0.18, "grad_norm": 0.3795999302636725, "learning_rate": 1.8854759819722713e-05, "loss": 0.2596, "step": 3120 }, { "epoch": 0.18, "grad_norm": 1.3263785752350479, "learning_rate": 1.8853894927963004e-05, "loss": 0.8709, "step": 3121 }, { "epoch": 0.18, "grad_norm": 0.6358746810174966, "learning_rate": 1.8853029729593296e-05, "loss": 0.4608, "step": 3122 }, { "epoch": 0.18, "grad_norm": 0.3726386152293299, "learning_rate": 1.8852164224643546e-05, "loss": 0.2343, "step": 3123 }, { "epoch": 0.18, "grad_norm": 0.5836516260644941, "learning_rate": 1.885129841314373e-05, "loss": 0.3314, "step": 3124 }, { "epoch": 0.18, "grad_norm": 0.49963353752080175, "learning_rate": 1.8850432295123832e-05, "loss": 0.2861, "step": 3125 }, { "epoch": 0.18, "grad_norm": 0.4523725849030945, "learning_rate": 1.8849565870613844e-05, "loss": 0.3031, "step": 3126 }, { "epoch": 0.18, "grad_norm": 0.47633458502229503, "learning_rate": 1.8848699139643768e-05, "loss": 0.3484, "step": 3127 }, { "epoch": 0.18, "grad_norm": 0.4572869976239571, "learning_rate": 1.8847832102243626e-05, "loss": 0.3506, "step": 3128 }, { "epoch": 0.18, "grad_norm": 0.4738886849479047, "learning_rate": 1.8846964758443434e-05, "loss": 0.3156, "step": 3129 }, { "epoch": 0.18, "grad_norm": 0.3158973478786652, "learning_rate": 1.8846097108273234e-05, "loss": 0.2454, "step": 3130 }, { "epoch": 0.18, "grad_norm": 0.6088936520152631, "learning_rate": 1.8845229151763072e-05, "loss": 0.3999, "step": 3131 }, { "epoch": 0.18, "grad_norm": 0.341198313608363, "learning_rate": 1.884436088894301e-05, "loss": 0.2854, "step": 3132 }, { "epoch": 0.18, "grad_norm": 0.8632109606054379, "learning_rate": 1.8843492319843105e-05, "loss": 0.5362, "step": 3133 }, { "epoch": 0.18, "grad_norm": 0.8698131777727494, "learning_rate": 1.884262344449344e-05, "loss": 0.5899, "step": 3134 }, { "epoch": 0.18, "grad_norm": 0.40345140665949575, "learning_rate": 1.8841754262924106e-05, "loss": 0.2877, "step": 3135 }, { "epoch": 0.18, "grad_norm": 0.4323619276330218, "learning_rate": 1.8840884775165204e-05, "loss": 0.3597, "step": 3136 }, { "epoch": 0.18, "grad_norm": 0.23494410269272176, "learning_rate": 1.8840014981246843e-05, "loss": 0.121, "step": 3137 }, { "epoch": 0.18, "grad_norm": 0.39915844033516507, "learning_rate": 1.8839144881199144e-05, "loss": 0.2998, "step": 3138 }, { "epoch": 0.18, "grad_norm": 1.3686412428770773, "learning_rate": 1.8838274475052233e-05, "loss": 0.8266, "step": 3139 }, { "epoch": 0.18, "grad_norm": 0.3427600739038595, "learning_rate": 1.883740376283626e-05, "loss": 0.2858, "step": 3140 }, { "epoch": 0.18, "grad_norm": 0.4077783868641141, "learning_rate": 1.8836532744581377e-05, "loss": 0.3271, "step": 3141 }, { "epoch": 0.18, "grad_norm": 0.7926122519324589, "learning_rate": 1.8835661420317745e-05, "loss": 0.4978, "step": 3142 }, { "epoch": 0.18, "grad_norm": 0.25284561606242717, "learning_rate": 1.8834789790075536e-05, "loss": 0.1756, "step": 3143 }, { "epoch": 0.18, "grad_norm": 0.38142146209876004, "learning_rate": 1.8833917853884935e-05, "loss": 0.2853, "step": 3144 }, { "epoch": 0.18, "grad_norm": 1.088916732216867, "learning_rate": 1.8833045611776143e-05, "loss": 0.801, "step": 3145 }, { "epoch": 0.18, "grad_norm": 0.618587140267945, "learning_rate": 1.8832173063779357e-05, "loss": 0.3862, "step": 3146 }, { "epoch": 0.18, "grad_norm": 0.4643314856293626, "learning_rate": 1.8831300209924797e-05, "loss": 0.3202, "step": 3147 }, { "epoch": 0.18, "grad_norm": 0.40377489613397566, "learning_rate": 1.8830427050242693e-05, "loss": 0.3142, "step": 3148 }, { "epoch": 0.18, "grad_norm": 0.2564840205627374, "learning_rate": 1.8829553584763278e-05, "loss": 0.1487, "step": 3149 }, { "epoch": 0.18, "grad_norm": 0.3232933389811095, "learning_rate": 1.8828679813516806e-05, "loss": 0.2236, "step": 3150 }, { "epoch": 0.18, "grad_norm": 0.5490085608060375, "learning_rate": 1.8827805736533528e-05, "loss": 0.4398, "step": 3151 }, { "epoch": 0.18, "grad_norm": 0.5491388527656633, "learning_rate": 1.8826931353843717e-05, "loss": 0.4112, "step": 3152 }, { "epoch": 0.18, "grad_norm": 0.39943577631082894, "learning_rate": 1.8826056665477654e-05, "loss": 0.2803, "step": 3153 }, { "epoch": 0.18, "grad_norm": 0.773866438726851, "learning_rate": 1.8825181671465628e-05, "loss": 0.5776, "step": 3154 }, { "epoch": 0.18, "grad_norm": 0.3216913090997973, "learning_rate": 1.882430637183794e-05, "loss": 0.2737, "step": 3155 }, { "epoch": 0.18, "grad_norm": 0.290138086584041, "learning_rate": 1.8823430766624905e-05, "loss": 0.1886, "step": 3156 }, { "epoch": 0.18, "grad_norm": 1.4431178972907859, "learning_rate": 1.8822554855856838e-05, "loss": 0.8918, "step": 3157 }, { "epoch": 0.18, "grad_norm": 0.5797258534093347, "learning_rate": 1.8821678639564075e-05, "loss": 0.4781, "step": 3158 }, { "epoch": 0.18, "grad_norm": 0.3837048306336846, "learning_rate": 1.8820802117776963e-05, "loss": 0.2224, "step": 3159 }, { "epoch": 0.18, "grad_norm": 0.42205950077117843, "learning_rate": 1.8819925290525854e-05, "loss": 0.3351, "step": 3160 }, { "epoch": 0.18, "grad_norm": 0.3076485977032022, "learning_rate": 1.8819048157841105e-05, "loss": 0.2032, "step": 3161 }, { "epoch": 0.18, "grad_norm": 0.5041804621749775, "learning_rate": 1.8818170719753104e-05, "loss": 0.3487, "step": 3162 }, { "epoch": 0.18, "grad_norm": 0.38071028970387893, "learning_rate": 1.8817292976292227e-05, "loss": 0.33, "step": 3163 }, { "epoch": 0.18, "grad_norm": 0.4558400988040032, "learning_rate": 1.8816414927488877e-05, "loss": 0.3912, "step": 3164 }, { "epoch": 0.18, "grad_norm": 0.45130705580419683, "learning_rate": 1.8815536573373453e-05, "loss": 0.2836, "step": 3165 }, { "epoch": 0.18, "grad_norm": 0.8510761019378265, "learning_rate": 1.8814657913976377e-05, "loss": 0.302, "step": 3166 }, { "epoch": 0.18, "grad_norm": 0.429963063044294, "learning_rate": 1.881377894932808e-05, "loss": 0.3323, "step": 3167 }, { "epoch": 0.18, "grad_norm": 0.29400249907992115, "learning_rate": 1.8812899679458993e-05, "loss": 0.23, "step": 3168 }, { "epoch": 0.18, "grad_norm": 0.464996279016933, "learning_rate": 1.8812020104399572e-05, "loss": 0.3656, "step": 3169 }, { "epoch": 0.18, "grad_norm": 0.46758247447598833, "learning_rate": 1.8811140224180273e-05, "loss": 0.3112, "step": 3170 }, { "epoch": 0.18, "grad_norm": 0.41433320208215574, "learning_rate": 1.8810260038831564e-05, "loss": 0.291, "step": 3171 }, { "epoch": 0.18, "grad_norm": 0.452144073174074, "learning_rate": 1.8809379548383932e-05, "loss": 0.2709, "step": 3172 }, { "epoch": 0.18, "grad_norm": 1.178725602164265, "learning_rate": 1.8808498752867863e-05, "loss": 0.7155, "step": 3173 }, { "epoch": 0.18, "grad_norm": 0.34409535391847335, "learning_rate": 1.880761765231386e-05, "loss": 0.2906, "step": 3174 }, { "epoch": 0.18, "grad_norm": 0.7192857758620904, "learning_rate": 1.8806736246752443e-05, "loss": 0.4855, "step": 3175 }, { "epoch": 0.18, "grad_norm": 0.3361678418408267, "learning_rate": 1.8805854536214122e-05, "loss": 0.2721, "step": 3176 }, { "epoch": 0.18, "grad_norm": 0.37055400507860653, "learning_rate": 1.8804972520729443e-05, "loss": 0.262, "step": 3177 }, { "epoch": 0.18, "grad_norm": 0.9664731315219579, "learning_rate": 1.8804090200328938e-05, "loss": 0.5504, "step": 3178 }, { "epoch": 0.18, "grad_norm": 0.42261767384952514, "learning_rate": 1.880320757504317e-05, "loss": 0.2837, "step": 3179 }, { "epoch": 0.18, "grad_norm": 0.614889244744422, "learning_rate": 1.8802324644902704e-05, "loss": 0.3954, "step": 3180 }, { "epoch": 0.18, "grad_norm": 0.5047054055116674, "learning_rate": 1.880144140993811e-05, "loss": 0.365, "step": 3181 }, { "epoch": 0.18, "grad_norm": 0.2809330896067468, "learning_rate": 1.880055787017998e-05, "loss": 0.1917, "step": 3182 }, { "epoch": 0.18, "grad_norm": 0.4424624416740233, "learning_rate": 1.8799674025658913e-05, "loss": 0.3117, "step": 3183 }, { "epoch": 0.18, "grad_norm": 0.4259910851470908, "learning_rate": 1.879878987640551e-05, "loss": 0.3169, "step": 3184 }, { "epoch": 0.18, "grad_norm": 1.5477900879594761, "learning_rate": 1.879790542245039e-05, "loss": 0.4208, "step": 3185 }, { "epoch": 0.18, "grad_norm": 0.5038440134908405, "learning_rate": 1.8797020663824187e-05, "loss": 0.3235, "step": 3186 }, { "epoch": 0.18, "grad_norm": 0.4738666383574769, "learning_rate": 1.8796135600557534e-05, "loss": 0.3355, "step": 3187 }, { "epoch": 0.18, "grad_norm": 1.4251583580601876, "learning_rate": 1.8795250232681085e-05, "loss": 0.9205, "step": 3188 }, { "epoch": 0.18, "grad_norm": 0.253214970145997, "learning_rate": 1.8794364560225496e-05, "loss": 0.1462, "step": 3189 }, { "epoch": 0.18, "grad_norm": 0.6313789827965512, "learning_rate": 1.8793478583221448e-05, "loss": 0.3916, "step": 3190 }, { "epoch": 0.18, "grad_norm": 0.4616837157366747, "learning_rate": 1.879259230169961e-05, "loss": 0.3263, "step": 3191 }, { "epoch": 0.18, "grad_norm": 0.3453392906380078, "learning_rate": 1.8791705715690675e-05, "loss": 0.2672, "step": 3192 }, { "epoch": 0.18, "grad_norm": 0.7657077369940369, "learning_rate": 1.8790818825225355e-05, "loss": 0.5951, "step": 3193 }, { "epoch": 0.18, "grad_norm": 0.4873348104716959, "learning_rate": 1.8789931630334353e-05, "loss": 0.3264, "step": 3194 }, { "epoch": 0.18, "grad_norm": 0.3471370066679422, "learning_rate": 1.8789044131048397e-05, "loss": 0.2468, "step": 3195 }, { "epoch": 0.18, "grad_norm": 0.5258610482459171, "learning_rate": 1.8788156327398225e-05, "loss": 0.2788, "step": 3196 }, { "epoch": 0.18, "grad_norm": 0.5999552621196282, "learning_rate": 1.8787268219414572e-05, "loss": 0.4748, "step": 3197 }, { "epoch": 0.18, "grad_norm": 0.5376375761332872, "learning_rate": 1.87863798071282e-05, "loss": 0.3582, "step": 3198 }, { "epoch": 0.18, "grad_norm": 0.42167059924717126, "learning_rate": 1.8785491090569876e-05, "loss": 0.2779, "step": 3199 }, { "epoch": 0.18, "grad_norm": 1.048591073850127, "learning_rate": 1.878460206977037e-05, "loss": 0.7897, "step": 3200 }, { "epoch": 0.18, "grad_norm": 0.3659218143881057, "learning_rate": 1.8783712744760475e-05, "loss": 0.1953, "step": 3201 }, { "epoch": 0.18, "grad_norm": 0.4075025882352195, "learning_rate": 1.878282311557098e-05, "loss": 0.2572, "step": 3202 }, { "epoch": 0.18, "grad_norm": 0.43005347413976514, "learning_rate": 1.8781933182232702e-05, "loss": 0.3816, "step": 3203 }, { "epoch": 0.18, "grad_norm": 0.5294171894124521, "learning_rate": 1.8781042944776457e-05, "loss": 0.3735, "step": 3204 }, { "epoch": 0.18, "grad_norm": 0.4682006514675936, "learning_rate": 1.8780152403233073e-05, "loss": 0.29, "step": 3205 }, { "epoch": 0.18, "grad_norm": 1.4391156031332297, "learning_rate": 1.8779261557633385e-05, "loss": 0.696, "step": 3206 }, { "epoch": 0.18, "grad_norm": 0.34352071114583893, "learning_rate": 1.8778370408008247e-05, "loss": 0.256, "step": 3207 }, { "epoch": 0.18, "grad_norm": 0.31215051226110274, "learning_rate": 1.877747895438852e-05, "loss": 0.2139, "step": 3208 }, { "epoch": 0.18, "grad_norm": 0.7384179652760221, "learning_rate": 1.8776587196805077e-05, "loss": 0.4823, "step": 3209 }, { "epoch": 0.18, "grad_norm": 0.6360730312478228, "learning_rate": 1.8775695135288794e-05, "loss": 0.4391, "step": 3210 }, { "epoch": 0.18, "grad_norm": 0.46584518363056193, "learning_rate": 1.8774802769870564e-05, "loss": 0.3308, "step": 3211 }, { "epoch": 0.18, "grad_norm": 0.4706672925631658, "learning_rate": 1.8773910100581294e-05, "loss": 0.3272, "step": 3212 }, { "epoch": 0.18, "grad_norm": 0.2877101741955395, "learning_rate": 1.8773017127451893e-05, "loss": 0.2086, "step": 3213 }, { "epoch": 0.18, "grad_norm": 0.4410049272946392, "learning_rate": 1.877212385051329e-05, "loss": 0.2814, "step": 3214 }, { "epoch": 0.18, "grad_norm": 0.4041381893357965, "learning_rate": 1.8771230269796412e-05, "loss": 0.3087, "step": 3215 }, { "epoch": 0.18, "grad_norm": 0.7639969423662106, "learning_rate": 1.877033638533221e-05, "loss": 0.4319, "step": 3216 }, { "epoch": 0.18, "grad_norm": 0.4278602034149037, "learning_rate": 1.876944219715163e-05, "loss": 0.3137, "step": 3217 }, { "epoch": 0.18, "grad_norm": 0.48961249607176893, "learning_rate": 1.876854770528565e-05, "loss": 0.3156, "step": 3218 }, { "epoch": 0.18, "grad_norm": 0.44431744410002555, "learning_rate": 1.8767652909765236e-05, "loss": 0.3015, "step": 3219 }, { "epoch": 0.19, "grad_norm": 0.33106297024120673, "learning_rate": 1.8766757810621383e-05, "loss": 0.3407, "step": 3220 }, { "epoch": 0.19, "grad_norm": 0.28688345276164595, "learning_rate": 1.876586240788508e-05, "loss": 0.1918, "step": 3221 }, { "epoch": 0.19, "grad_norm": 0.5071273372638695, "learning_rate": 1.876496670158734e-05, "loss": 0.3566, "step": 3222 }, { "epoch": 0.19, "grad_norm": 0.4110348765229278, "learning_rate": 1.876407069175918e-05, "loss": 0.316, "step": 3223 }, { "epoch": 0.19, "grad_norm": 0.9657130710337607, "learning_rate": 1.876317437843163e-05, "loss": 0.6621, "step": 3224 }, { "epoch": 0.19, "grad_norm": 0.4092046974694814, "learning_rate": 1.8762277761635725e-05, "loss": 0.2433, "step": 3225 }, { "epoch": 0.19, "grad_norm": 0.39059696176709385, "learning_rate": 1.876138084140252e-05, "loss": 0.3564, "step": 3226 }, { "epoch": 0.19, "grad_norm": 0.3427885145541495, "learning_rate": 1.876048361776307e-05, "loss": 0.2631, "step": 3227 }, { "epoch": 0.19, "grad_norm": 0.2848821101035255, "learning_rate": 1.8759586090748454e-05, "loss": 0.1501, "step": 3228 }, { "epoch": 0.19, "grad_norm": 0.4979226409712117, "learning_rate": 1.8758688260389747e-05, "loss": 0.3348, "step": 3229 }, { "epoch": 0.19, "grad_norm": 1.7883755482032335, "learning_rate": 1.8757790126718044e-05, "loss": 0.849, "step": 3230 }, { "epoch": 0.19, "grad_norm": 0.3038487339560472, "learning_rate": 1.8756891689764444e-05, "loss": 0.2654, "step": 3231 }, { "epoch": 0.19, "grad_norm": 0.426868655627405, "learning_rate": 1.8755992949560058e-05, "loss": 0.3428, "step": 3232 }, { "epoch": 0.19, "grad_norm": 0.34227594083645335, "learning_rate": 1.8755093906136016e-05, "loss": 0.2369, "step": 3233 }, { "epoch": 0.19, "grad_norm": 0.4159884317750057, "learning_rate": 1.8754194559523447e-05, "loss": 0.2368, "step": 3234 }, { "epoch": 0.19, "grad_norm": 0.4452450173245431, "learning_rate": 1.87532949097535e-05, "loss": 0.3102, "step": 3235 }, { "epoch": 0.19, "grad_norm": 0.8178285752035969, "learning_rate": 1.8752394956857322e-05, "loss": 0.5632, "step": 3236 }, { "epoch": 0.19, "grad_norm": 0.6512942111877557, "learning_rate": 1.8751494700866088e-05, "loss": 0.491, "step": 3237 }, { "epoch": 0.19, "grad_norm": 0.36820713984463505, "learning_rate": 1.8750594141810964e-05, "loss": 0.205, "step": 3238 }, { "epoch": 0.19, "grad_norm": 0.32327909485014955, "learning_rate": 1.8749693279723146e-05, "loss": 0.241, "step": 3239 }, { "epoch": 0.19, "grad_norm": 1.3048291367110827, "learning_rate": 1.874879211463382e-05, "loss": 0.6865, "step": 3240 }, { "epoch": 0.19, "grad_norm": 0.38934075541803975, "learning_rate": 1.8747890646574204e-05, "loss": 0.2433, "step": 3241 }, { "epoch": 0.19, "grad_norm": 0.8793842949876021, "learning_rate": 1.874698887557551e-05, "loss": 0.5371, "step": 3242 }, { "epoch": 0.19, "grad_norm": 0.4606832588802145, "learning_rate": 1.8746086801668964e-05, "loss": 0.3555, "step": 3243 }, { "epoch": 0.19, "grad_norm": 0.34513483530045763, "learning_rate": 1.8745184424885815e-05, "loss": 0.2315, "step": 3244 }, { "epoch": 0.19, "grad_norm": 0.6050070424752414, "learning_rate": 1.87442817452573e-05, "loss": 0.2856, "step": 3245 }, { "epoch": 0.19, "grad_norm": 0.30067116685715717, "learning_rate": 1.8743378762814685e-05, "loss": 0.2406, "step": 3246 }, { "epoch": 0.19, "grad_norm": 0.3879704127020492, "learning_rate": 1.874247547758924e-05, "loss": 0.2495, "step": 3247 }, { "epoch": 0.19, "grad_norm": 1.0069201260432241, "learning_rate": 1.8741571889612248e-05, "loss": 0.5666, "step": 3248 }, { "epoch": 0.19, "grad_norm": 0.6375161367998676, "learning_rate": 1.8740667998914996e-05, "loss": 0.4336, "step": 3249 }, { "epoch": 0.19, "grad_norm": 0.42327093852418024, "learning_rate": 1.8739763805528782e-05, "loss": 0.306, "step": 3250 }, { "epoch": 0.19, "grad_norm": 0.41315398403240317, "learning_rate": 1.8738859309484926e-05, "loss": 0.2775, "step": 3251 }, { "epoch": 0.19, "grad_norm": 0.4775464807987565, "learning_rate": 1.8737954510814752e-05, "loss": 0.306, "step": 3252 }, { "epoch": 0.19, "grad_norm": 0.3845598831291664, "learning_rate": 1.8737049409549586e-05, "loss": 0.2899, "step": 3253 }, { "epoch": 0.19, "grad_norm": 0.7332887356293013, "learning_rate": 1.8736144005720775e-05, "loss": 0.3581, "step": 3254 }, { "epoch": 0.19, "grad_norm": 0.5740626229995096, "learning_rate": 1.8735238299359672e-05, "loss": 0.3243, "step": 3255 }, { "epoch": 0.19, "grad_norm": 0.37707225799385136, "learning_rate": 1.8734332290497642e-05, "loss": 0.285, "step": 3256 }, { "epoch": 0.19, "grad_norm": 0.2926649412762437, "learning_rate": 1.8733425979166063e-05, "loss": 0.1384, "step": 3257 }, { "epoch": 0.19, "grad_norm": 0.4234930784962659, "learning_rate": 1.8732519365396314e-05, "loss": 0.3293, "step": 3258 }, { "epoch": 0.19, "grad_norm": 0.44694857769056656, "learning_rate": 1.87316124492198e-05, "loss": 0.3063, "step": 3259 }, { "epoch": 0.19, "grad_norm": 0.881998655230814, "learning_rate": 1.8730705230667916e-05, "loss": 0.4143, "step": 3260 }, { "epoch": 0.19, "grad_norm": 0.47126328441633636, "learning_rate": 1.8729797709772088e-05, "loss": 0.3326, "step": 3261 }, { "epoch": 0.19, "grad_norm": 0.3424134567721488, "learning_rate": 1.872888988656374e-05, "loss": 0.309, "step": 3262 }, { "epoch": 0.19, "grad_norm": 0.5270321114224565, "learning_rate": 1.8727981761074315e-05, "loss": 0.375, "step": 3263 }, { "epoch": 0.19, "grad_norm": 0.6391706970436589, "learning_rate": 1.872707333333525e-05, "loss": 0.3429, "step": 3264 }, { "epoch": 0.19, "grad_norm": 0.4914387054979702, "learning_rate": 1.8726164603378016e-05, "loss": 0.3219, "step": 3265 }, { "epoch": 0.19, "grad_norm": 0.6902772708493562, "learning_rate": 1.8725255571234075e-05, "loss": 0.4276, "step": 3266 }, { "epoch": 0.19, "grad_norm": 0.252273761014582, "learning_rate": 1.872434623693491e-05, "loss": 0.1688, "step": 3267 }, { "epoch": 0.19, "grad_norm": 0.43328993906220703, "learning_rate": 1.8723436600512007e-05, "loss": 0.3001, "step": 3268 }, { "epoch": 0.19, "grad_norm": 1.5775971207484492, "learning_rate": 1.8722526661996872e-05, "loss": 0.6708, "step": 3269 }, { "epoch": 0.19, "grad_norm": 0.4552562263007585, "learning_rate": 1.8721616421421017e-05, "loss": 0.3101, "step": 3270 }, { "epoch": 0.19, "grad_norm": 0.3964430436328616, "learning_rate": 1.8720705878815953e-05, "loss": 0.2978, "step": 3271 }, { "epoch": 0.19, "grad_norm": 0.9240033860694061, "learning_rate": 1.8719795034213226e-05, "loss": 0.5123, "step": 3272 }, { "epoch": 0.19, "grad_norm": 0.35059453995859596, "learning_rate": 1.871888388764437e-05, "loss": 0.1863, "step": 3273 }, { "epoch": 0.19, "grad_norm": 0.32757580915834644, "learning_rate": 1.8717972439140938e-05, "loss": 0.23, "step": 3274 }, { "epoch": 0.19, "grad_norm": 0.5944268003456217, "learning_rate": 1.8717060688734495e-05, "loss": 0.41, "step": 3275 }, { "epoch": 0.19, "grad_norm": 1.1405676959172855, "learning_rate": 1.871614863645662e-05, "loss": 0.5975, "step": 3276 }, { "epoch": 0.19, "grad_norm": 0.4561238387683592, "learning_rate": 1.871523628233889e-05, "loss": 0.267, "step": 3277 }, { "epoch": 0.19, "grad_norm": 0.5709846215706098, "learning_rate": 1.87143236264129e-05, "loss": 0.4121, "step": 3278 }, { "epoch": 0.19, "grad_norm": 0.32475528845468604, "learning_rate": 1.871341066871026e-05, "loss": 0.2167, "step": 3279 }, { "epoch": 0.19, "grad_norm": 0.35223528951570476, "learning_rate": 1.8712497409262582e-05, "loss": 0.2124, "step": 3280 }, { "epoch": 0.19, "grad_norm": 1.107902808950662, "learning_rate": 1.8711583848101492e-05, "loss": 0.6202, "step": 3281 }, { "epoch": 0.19, "grad_norm": 0.5448763076692719, "learning_rate": 1.871066998525863e-05, "loss": 0.3702, "step": 3282 }, { "epoch": 0.19, "grad_norm": 0.43760624896546474, "learning_rate": 1.870975582076564e-05, "loss": 0.2446, "step": 3283 }, { "epoch": 0.19, "grad_norm": 0.49297898663282697, "learning_rate": 1.8708841354654184e-05, "loss": 0.4166, "step": 3284 }, { "epoch": 0.19, "grad_norm": 0.3323663731490816, "learning_rate": 1.870792658695592e-05, "loss": 0.2341, "step": 3285 }, { "epoch": 0.19, "grad_norm": 0.49578418381735967, "learning_rate": 1.870701151770254e-05, "loss": 0.2838, "step": 3286 }, { "epoch": 0.19, "grad_norm": 0.4174917171345786, "learning_rate": 1.870609614692572e-05, "loss": 0.2761, "step": 3287 }, { "epoch": 0.19, "grad_norm": 0.750839879220727, "learning_rate": 1.8705180474657166e-05, "loss": 0.4965, "step": 3288 }, { "epoch": 0.19, "grad_norm": 0.5129470258434858, "learning_rate": 1.8704264500928588e-05, "loss": 0.2986, "step": 3289 }, { "epoch": 0.19, "grad_norm": 0.42347571147632856, "learning_rate": 1.87033482257717e-05, "loss": 0.2931, "step": 3290 }, { "epoch": 0.19, "grad_norm": 0.481272218463836, "learning_rate": 1.8702431649218245e-05, "loss": 0.2205, "step": 3291 }, { "epoch": 0.19, "grad_norm": 0.30216937260105364, "learning_rate": 1.870151477129995e-05, "loss": 0.2286, "step": 3292 }, { "epoch": 0.19, "grad_norm": 0.5601484432369667, "learning_rate": 1.8700597592048576e-05, "loss": 0.3279, "step": 3293 }, { "epoch": 0.19, "grad_norm": 0.4983673447612351, "learning_rate": 1.869968011149588e-05, "loss": 0.3449, "step": 3294 }, { "epoch": 0.19, "grad_norm": 0.48122935402031364, "learning_rate": 1.8698762329673636e-05, "loss": 0.3335, "step": 3295 }, { "epoch": 0.19, "grad_norm": 0.5877751356413182, "learning_rate": 1.869784424661363e-05, "loss": 0.3133, "step": 3296 }, { "epoch": 0.19, "grad_norm": 0.3422421447813925, "learning_rate": 1.8696925862347647e-05, "loss": 0.2158, "step": 3297 }, { "epoch": 0.19, "grad_norm": 0.3768728050060615, "learning_rate": 1.8696007176907494e-05, "loss": 0.2928, "step": 3298 }, { "epoch": 0.19, "grad_norm": 0.6163884429509207, "learning_rate": 1.869508819032499e-05, "loss": 0.3752, "step": 3299 }, { "epoch": 0.19, "grad_norm": 0.8648706566024917, "learning_rate": 1.8694168902631957e-05, "loss": 0.4133, "step": 3300 }, { "epoch": 0.19, "grad_norm": 0.4157955496291116, "learning_rate": 1.8693249313860225e-05, "loss": 0.3008, "step": 3301 }, { "epoch": 0.19, "grad_norm": 0.3660210547536385, "learning_rate": 1.8692329424041648e-05, "loss": 0.3064, "step": 3302 }, { "epoch": 0.19, "grad_norm": 0.267248482819533, "learning_rate": 1.8691409233208072e-05, "loss": 0.1172, "step": 3303 }, { "epoch": 0.19, "grad_norm": 0.6430966014648096, "learning_rate": 1.869048874139137e-05, "loss": 0.3144, "step": 3304 }, { "epoch": 0.19, "grad_norm": 0.666856224468795, "learning_rate": 1.8689567948623417e-05, "loss": 0.3695, "step": 3305 }, { "epoch": 0.19, "grad_norm": 0.5142155690233231, "learning_rate": 1.86886468549361e-05, "loss": 0.3119, "step": 3306 }, { "epoch": 0.19, "grad_norm": 0.4921167365321923, "learning_rate": 1.8687725460361315e-05, "loss": 0.3192, "step": 3307 }, { "epoch": 0.19, "grad_norm": 0.6095335014762302, "learning_rate": 1.868680376493097e-05, "loss": 0.4577, "step": 3308 }, { "epoch": 0.19, "grad_norm": 0.3542443554161993, "learning_rate": 1.8685881768676983e-05, "loss": 0.1199, "step": 3309 }, { "epoch": 0.19, "grad_norm": 0.3551171004629968, "learning_rate": 1.868495947163129e-05, "loss": 0.2994, "step": 3310 }, { "epoch": 0.19, "grad_norm": 0.4032175463163124, "learning_rate": 1.8684036873825817e-05, "loss": 0.3191, "step": 3311 }, { "epoch": 0.19, "grad_norm": 0.9000679546075138, "learning_rate": 1.8683113975292522e-05, "loss": 0.5656, "step": 3312 }, { "epoch": 0.19, "grad_norm": 0.4240286543723701, "learning_rate": 1.8682190776063368e-05, "loss": 0.2259, "step": 3313 }, { "epoch": 0.19, "grad_norm": 0.4201800746491544, "learning_rate": 1.8681267276170315e-05, "loss": 0.3116, "step": 3314 }, { "epoch": 0.19, "grad_norm": 0.8506988193417204, "learning_rate": 1.8680343475645354e-05, "loss": 0.5609, "step": 3315 }, { "epoch": 0.19, "grad_norm": 0.3969631858789526, "learning_rate": 1.8679419374520467e-05, "loss": 0.2653, "step": 3316 }, { "epoch": 0.19, "grad_norm": 0.6065235075604374, "learning_rate": 1.8678494972827665e-05, "loss": 0.419, "step": 3317 }, { "epoch": 0.19, "grad_norm": 0.32114184153525965, "learning_rate": 1.8677570270598956e-05, "loss": 0.2346, "step": 3318 }, { "epoch": 0.19, "grad_norm": 0.40387175409091025, "learning_rate": 1.8676645267866356e-05, "loss": 0.2204, "step": 3319 }, { "epoch": 0.19, "grad_norm": 0.6801443170904025, "learning_rate": 1.867571996466191e-05, "loss": 0.4311, "step": 3320 }, { "epoch": 0.19, "grad_norm": 0.5159272006258127, "learning_rate": 1.867479436101765e-05, "loss": 0.3767, "step": 3321 }, { "epoch": 0.19, "grad_norm": 0.4089571870928136, "learning_rate": 1.8673868456965635e-05, "loss": 0.2527, "step": 3322 }, { "epoch": 0.19, "grad_norm": 0.48397213976412556, "learning_rate": 1.867294225253793e-05, "loss": 0.3643, "step": 3323 }, { "epoch": 0.19, "grad_norm": 0.3446079467709578, "learning_rate": 1.8672015747766606e-05, "loss": 0.205, "step": 3324 }, { "epoch": 0.19, "grad_norm": 0.8503150555575159, "learning_rate": 1.8671088942683752e-05, "loss": 0.5678, "step": 3325 }, { "epoch": 0.19, "grad_norm": 0.39654527040197246, "learning_rate": 1.8670161837321457e-05, "loss": 0.2591, "step": 3326 }, { "epoch": 0.19, "grad_norm": 1.0220278362714632, "learning_rate": 1.8669234431711833e-05, "loss": 0.5651, "step": 3327 }, { "epoch": 0.19, "grad_norm": 0.5649493509801116, "learning_rate": 1.866830672588699e-05, "loss": 0.3735, "step": 3328 }, { "epoch": 0.19, "grad_norm": 0.3635380013575951, "learning_rate": 1.866737871987906e-05, "loss": 0.2953, "step": 3329 }, { "epoch": 0.19, "grad_norm": 0.3433034425476781, "learning_rate": 1.866645041372018e-05, "loss": 0.2176, "step": 3330 }, { "epoch": 0.19, "grad_norm": 0.6755984958812598, "learning_rate": 1.8665521807442495e-05, "loss": 0.4254, "step": 3331 }, { "epoch": 0.19, "grad_norm": 0.41328940725588204, "learning_rate": 1.866459290107816e-05, "loss": 0.2699, "step": 3332 }, { "epoch": 0.19, "grad_norm": 0.5305543815291648, "learning_rate": 1.8663663694659348e-05, "loss": 0.3819, "step": 3333 }, { "epoch": 0.19, "grad_norm": 0.42991083738123465, "learning_rate": 1.866273418821823e-05, "loss": 0.3441, "step": 3334 }, { "epoch": 0.19, "grad_norm": 0.6544073955837417, "learning_rate": 1.8661804381787e-05, "loss": 0.2438, "step": 3335 }, { "epoch": 0.19, "grad_norm": 0.3474028790555719, "learning_rate": 1.8660874275397864e-05, "loss": 0.2269, "step": 3336 }, { "epoch": 0.19, "grad_norm": 0.44660721208933224, "learning_rate": 1.8659943869083016e-05, "loss": 0.3194, "step": 3337 }, { "epoch": 0.19, "grad_norm": 0.36479851194376395, "learning_rate": 1.865901316287469e-05, "loss": 0.3026, "step": 3338 }, { "epoch": 0.19, "grad_norm": 0.7251793067592559, "learning_rate": 1.8658082156805105e-05, "loss": 0.4025, "step": 3339 }, { "epoch": 0.19, "grad_norm": 1.1762657605614177, "learning_rate": 1.8657150850906515e-05, "loss": 0.4898, "step": 3340 }, { "epoch": 0.19, "grad_norm": 0.44822935372518946, "learning_rate": 1.8656219245211157e-05, "loss": 0.3101, "step": 3341 }, { "epoch": 0.19, "grad_norm": 0.3095470714000383, "learning_rate": 1.86552873397513e-05, "loss": 0.2329, "step": 3342 }, { "epoch": 0.19, "grad_norm": 0.8687453149208805, "learning_rate": 1.8654355134559216e-05, "loss": 0.5129, "step": 3343 }, { "epoch": 0.19, "grad_norm": 0.3021584131565959, "learning_rate": 1.8653422629667183e-05, "loss": 0.2725, "step": 3344 }, { "epoch": 0.19, "grad_norm": 0.5873873067313393, "learning_rate": 1.8652489825107497e-05, "loss": 0.358, "step": 3345 }, { "epoch": 0.19, "grad_norm": 0.46313571761420985, "learning_rate": 1.865155672091246e-05, "loss": 0.3272, "step": 3346 }, { "epoch": 0.19, "grad_norm": 0.42763704537288455, "learning_rate": 1.865062331711439e-05, "loss": 0.2979, "step": 3347 }, { "epoch": 0.19, "grad_norm": 0.8943885070194173, "learning_rate": 1.8649689613745605e-05, "loss": 0.4268, "step": 3348 }, { "epoch": 0.19, "grad_norm": 0.4519340597314842, "learning_rate": 1.864875561083844e-05, "loss": 0.3254, "step": 3349 }, { "epoch": 0.19, "grad_norm": 0.34699922308137227, "learning_rate": 1.864782130842524e-05, "loss": 0.2848, "step": 3350 }, { "epoch": 0.19, "grad_norm": 0.6473095340940566, "learning_rate": 1.8646886706538358e-05, "loss": 0.471, "step": 3351 }, { "epoch": 0.19, "grad_norm": 0.337720021022626, "learning_rate": 1.8645951805210164e-05, "loss": 0.1635, "step": 3352 }, { "epoch": 0.19, "grad_norm": 0.515168665181641, "learning_rate": 1.864501660447303e-05, "loss": 0.363, "step": 3353 }, { "epoch": 0.19, "grad_norm": 0.45899933383969893, "learning_rate": 1.8644081104359343e-05, "loss": 0.3239, "step": 3354 }, { "epoch": 0.19, "grad_norm": 0.4341098182538872, "learning_rate": 1.8643145304901497e-05, "loss": 0.2779, "step": 3355 }, { "epoch": 0.19, "grad_norm": 0.43247406577763553, "learning_rate": 1.8642209206131902e-05, "loss": 0.3562, "step": 3356 }, { "epoch": 0.19, "grad_norm": 0.315059317047122, "learning_rate": 1.8641272808082975e-05, "loss": 0.3091, "step": 3357 }, { "epoch": 0.19, "grad_norm": 0.24709989347560327, "learning_rate": 1.864033611078714e-05, "loss": 0.0748, "step": 3358 }, { "epoch": 0.19, "grad_norm": 0.4234610157900327, "learning_rate": 1.863939911427684e-05, "loss": 0.3157, "step": 3359 }, { "epoch": 0.19, "grad_norm": 1.0117601153578217, "learning_rate": 1.8638461818584517e-05, "loss": 0.672, "step": 3360 }, { "epoch": 0.19, "grad_norm": 0.4147187717202964, "learning_rate": 1.8637524223742636e-05, "loss": 0.3426, "step": 3361 }, { "epoch": 0.19, "grad_norm": 0.35245150890474214, "learning_rate": 1.863658632978366e-05, "loss": 0.2728, "step": 3362 }, { "epoch": 0.19, "grad_norm": 0.4678697988608274, "learning_rate": 1.8635648136740072e-05, "loss": 0.3828, "step": 3363 }, { "epoch": 0.19, "grad_norm": 0.4667194994023077, "learning_rate": 1.863470964464436e-05, "loss": 0.2872, "step": 3364 }, { "epoch": 0.19, "grad_norm": 0.32564335685434603, "learning_rate": 1.8633770853529025e-05, "loss": 0.236, "step": 3365 }, { "epoch": 0.19, "grad_norm": 0.5608166440455324, "learning_rate": 1.8632831763426574e-05, "loss": 0.3899, "step": 3366 }, { "epoch": 0.19, "grad_norm": 0.6682512540559473, "learning_rate": 1.863189237436953e-05, "loss": 0.4906, "step": 3367 }, { "epoch": 0.19, "grad_norm": 0.35298988092787115, "learning_rate": 1.863095268639043e-05, "loss": 0.2482, "step": 3368 }, { "epoch": 0.19, "grad_norm": 0.42340797367221217, "learning_rate": 1.8630012699521806e-05, "loss": 0.3356, "step": 3369 }, { "epoch": 0.19, "grad_norm": 0.2767430952636719, "learning_rate": 1.8629072413796213e-05, "loss": 0.1825, "step": 3370 }, { "epoch": 0.19, "grad_norm": 0.3605012936700501, "learning_rate": 1.862813182924621e-05, "loss": 0.2341, "step": 3371 }, { "epoch": 0.19, "grad_norm": 1.153440050494576, "learning_rate": 1.8627190945904382e-05, "loss": 0.5661, "step": 3372 }, { "epoch": 0.19, "grad_norm": 0.48897754156527273, "learning_rate": 1.8626249763803295e-05, "loss": 0.3302, "step": 3373 }, { "epoch": 0.19, "grad_norm": 0.46612356766072266, "learning_rate": 1.8625308282975552e-05, "loss": 0.3312, "step": 3374 }, { "epoch": 0.19, "grad_norm": 0.3181013883121376, "learning_rate": 1.8624366503453752e-05, "loss": 0.2266, "step": 3375 }, { "epoch": 0.19, "grad_norm": 0.4652146982144647, "learning_rate": 1.8623424425270514e-05, "loss": 0.2808, "step": 3376 }, { "epoch": 0.19, "grad_norm": 0.39602293510398723, "learning_rate": 1.8622482048458454e-05, "loss": 0.3131, "step": 3377 }, { "epoch": 0.19, "grad_norm": 0.44220445658577895, "learning_rate": 1.8621539373050218e-05, "loss": 0.3288, "step": 3378 }, { "epoch": 0.19, "grad_norm": 0.7299981826510856, "learning_rate": 1.862059639907844e-05, "loss": 0.569, "step": 3379 }, { "epoch": 0.19, "grad_norm": 0.39958092415064894, "learning_rate": 1.861965312657578e-05, "loss": 0.2999, "step": 3380 }, { "epoch": 0.19, "grad_norm": 0.3869027931462021, "learning_rate": 1.8618709555574903e-05, "loss": 0.277, "step": 3381 }, { "epoch": 0.19, "grad_norm": 0.3501211008611354, "learning_rate": 1.8617765686108486e-05, "loss": 0.2184, "step": 3382 }, { "epoch": 0.19, "grad_norm": 0.3614144002216407, "learning_rate": 1.8616821518209213e-05, "loss": 0.2989, "step": 3383 }, { "epoch": 0.19, "grad_norm": 0.8955308958623391, "learning_rate": 1.8615877051909783e-05, "loss": 0.4199, "step": 3384 }, { "epoch": 0.19, "grad_norm": 0.420971422861682, "learning_rate": 1.8614932287242897e-05, "loss": 0.3649, "step": 3385 }, { "epoch": 0.19, "grad_norm": 0.39696752016884945, "learning_rate": 1.8613987224241283e-05, "loss": 0.2856, "step": 3386 }, { "epoch": 0.19, "grad_norm": 1.1534536864598737, "learning_rate": 1.8613041862937656e-05, "loss": 0.6503, "step": 3387 }, { "epoch": 0.19, "grad_norm": 0.26372459169274415, "learning_rate": 1.861209620336476e-05, "loss": 0.0746, "step": 3388 }, { "epoch": 0.19, "grad_norm": 0.5163757820951047, "learning_rate": 1.8611150245555345e-05, "loss": 0.3125, "step": 3389 }, { "epoch": 0.19, "grad_norm": 0.5196639264351134, "learning_rate": 1.861020398954217e-05, "loss": 0.3546, "step": 3390 }, { "epoch": 0.19, "grad_norm": 0.8214877192095381, "learning_rate": 1.8609257435357995e-05, "loss": 0.387, "step": 3391 }, { "epoch": 0.19, "grad_norm": 0.45946932605048707, "learning_rate": 1.8608310583035607e-05, "loss": 0.3055, "step": 3392 }, { "epoch": 0.19, "grad_norm": 0.452824130541817, "learning_rate": 1.8607363432607793e-05, "loss": 0.3342, "step": 3393 }, { "epoch": 0.2, "grad_norm": 0.33934441895880785, "learning_rate": 1.8606415984107357e-05, "loss": 0.1221, "step": 3394 }, { "epoch": 0.2, "grad_norm": 0.4336853637931559, "learning_rate": 1.8605468237567103e-05, "loss": 0.2819, "step": 3395 }, { "epoch": 0.2, "grad_norm": 1.0742880039688079, "learning_rate": 1.8604520193019855e-05, "loss": 0.5185, "step": 3396 }, { "epoch": 0.2, "grad_norm": 0.39427788076090214, "learning_rate": 1.860357185049844e-05, "loss": 0.2925, "step": 3397 }, { "epoch": 0.2, "grad_norm": 0.39690461047371556, "learning_rate": 1.8602623210035707e-05, "loss": 0.307, "step": 3398 }, { "epoch": 0.2, "grad_norm": 1.1140528804897003, "learning_rate": 1.8601674271664497e-05, "loss": 0.7088, "step": 3399 }, { "epoch": 0.2, "grad_norm": 0.2886567836009131, "learning_rate": 1.8600725035417678e-05, "loss": 0.1846, "step": 3400 }, { "epoch": 0.2, "grad_norm": 0.3227616596528429, "learning_rate": 1.8599775501328125e-05, "loss": 0.2425, "step": 3401 }, { "epoch": 0.2, "grad_norm": 0.848213881735556, "learning_rate": 1.8598825669428713e-05, "loss": 0.473, "step": 3402 }, { "epoch": 0.2, "grad_norm": 0.837716610668294, "learning_rate": 1.8597875539752337e-05, "loss": 0.562, "step": 3403 }, { "epoch": 0.2, "grad_norm": 0.33372430563348676, "learning_rate": 1.85969251123319e-05, "loss": 0.2439, "step": 3404 }, { "epoch": 0.2, "grad_norm": 0.49703820168497725, "learning_rate": 1.859597438720032e-05, "loss": 0.3647, "step": 3405 }, { "epoch": 0.2, "grad_norm": 0.4523814047602288, "learning_rate": 1.8595023364390515e-05, "loss": 0.3094, "step": 3406 }, { "epoch": 0.2, "grad_norm": 0.3932121628983667, "learning_rate": 1.8594072043935418e-05, "loss": 0.241, "step": 3407 }, { "epoch": 0.2, "grad_norm": 0.530507341323851, "learning_rate": 1.8593120425867977e-05, "loss": 0.3333, "step": 3408 }, { "epoch": 0.2, "grad_norm": 0.45189749977195065, "learning_rate": 1.859216851022115e-05, "loss": 0.3763, "step": 3409 }, { "epoch": 0.2, "grad_norm": 0.31896053252137, "learning_rate": 1.859121629702789e-05, "loss": 0.2049, "step": 3410 }, { "epoch": 0.2, "grad_norm": 1.4510347587273766, "learning_rate": 1.8590263786321182e-05, "loss": 0.7991, "step": 3411 }, { "epoch": 0.2, "grad_norm": 0.5061445451316303, "learning_rate": 1.858931097813401e-05, "loss": 0.4186, "step": 3412 }, { "epoch": 0.2, "grad_norm": 0.35254918453174994, "learning_rate": 1.8588357872499364e-05, "loss": 0.2737, "step": 3413 }, { "epoch": 0.2, "grad_norm": 0.30901119829012413, "learning_rate": 1.8587404469450256e-05, "loss": 0.1988, "step": 3414 }, { "epoch": 0.2, "grad_norm": 1.1748732561056525, "learning_rate": 1.85864507690197e-05, "loss": 0.7539, "step": 3415 }, { "epoch": 0.2, "grad_norm": 0.4472144634212105, "learning_rate": 1.8585496771240726e-05, "loss": 0.3451, "step": 3416 }, { "epoch": 0.2, "grad_norm": 0.3288085382989217, "learning_rate": 1.8584542476146364e-05, "loss": 0.2777, "step": 3417 }, { "epoch": 0.2, "grad_norm": 0.701787075717411, "learning_rate": 1.8583587883769668e-05, "loss": 0.5152, "step": 3418 }, { "epoch": 0.2, "grad_norm": 0.4124720953337286, "learning_rate": 1.8582632994143693e-05, "loss": 0.3517, "step": 3419 }, { "epoch": 0.2, "grad_norm": 0.26448985422784416, "learning_rate": 1.8581677807301507e-05, "loss": 0.1267, "step": 3420 }, { "epoch": 0.2, "grad_norm": 0.4416407925509442, "learning_rate": 1.8580722323276186e-05, "loss": 0.35, "step": 3421 }, { "epoch": 0.2, "grad_norm": 0.36003377851432783, "learning_rate": 1.857976654210082e-05, "loss": 0.2706, "step": 3422 }, { "epoch": 0.2, "grad_norm": 1.0498458699375062, "learning_rate": 1.857881046380851e-05, "loss": 0.3784, "step": 3423 }, { "epoch": 0.2, "grad_norm": 0.3688550683980165, "learning_rate": 1.8577854088432355e-05, "loss": 0.3505, "step": 3424 }, { "epoch": 0.2, "grad_norm": 0.44942001956730154, "learning_rate": 1.8576897416005487e-05, "loss": 0.3227, "step": 3425 }, { "epoch": 0.2, "grad_norm": 0.6148292590435895, "learning_rate": 1.857594044656103e-05, "loss": 0.3691, "step": 3426 }, { "epoch": 0.2, "grad_norm": 0.26088148061349903, "learning_rate": 1.8574983180132128e-05, "loss": 0.139, "step": 3427 }, { "epoch": 0.2, "grad_norm": 0.6143931862134248, "learning_rate": 1.8574025616751923e-05, "loss": 0.2772, "step": 3428 }, { "epoch": 0.2, "grad_norm": 0.40106184091905833, "learning_rate": 1.8573067756453578e-05, "loss": 0.312, "step": 3429 }, { "epoch": 0.2, "grad_norm": 0.6215169188275277, "learning_rate": 1.8572109599270266e-05, "loss": 0.4106, "step": 3430 }, { "epoch": 0.2, "grad_norm": 0.4565998521916527, "learning_rate": 1.857115114523517e-05, "loss": 0.3022, "step": 3431 }, { "epoch": 0.2, "grad_norm": 0.3754630031717685, "learning_rate": 1.857019239438148e-05, "loss": 0.2459, "step": 3432 }, { "epoch": 0.2, "grad_norm": 0.3584316941904439, "learning_rate": 1.8569233346742392e-05, "loss": 0.2165, "step": 3433 }, { "epoch": 0.2, "grad_norm": 0.4805254322830583, "learning_rate": 1.856827400235112e-05, "loss": 0.3607, "step": 3434 }, { "epoch": 0.2, "grad_norm": 0.5382648282007101, "learning_rate": 1.8567314361240893e-05, "loss": 0.3951, "step": 3435 }, { "epoch": 0.2, "grad_norm": 0.4865226514408291, "learning_rate": 1.8566354423444933e-05, "loss": 0.3781, "step": 3436 }, { "epoch": 0.2, "grad_norm": 0.358374646552267, "learning_rate": 1.856539418899649e-05, "loss": 0.2488, "step": 3437 }, { "epoch": 0.2, "grad_norm": 0.5303949361998881, "learning_rate": 1.8564433657928815e-05, "loss": 0.3902, "step": 3438 }, { "epoch": 0.2, "grad_norm": 0.4547723658493619, "learning_rate": 1.8563472830275172e-05, "loss": 0.3408, "step": 3439 }, { "epoch": 0.2, "grad_norm": 0.270919077914169, "learning_rate": 1.856251170606883e-05, "loss": 0.1925, "step": 3440 }, { "epoch": 0.2, "grad_norm": 0.38320462625120033, "learning_rate": 1.8561550285343077e-05, "loss": 0.3415, "step": 3441 }, { "epoch": 0.2, "grad_norm": 0.7710149460976287, "learning_rate": 1.8560588568131205e-05, "loss": 0.5157, "step": 3442 }, { "epoch": 0.2, "grad_norm": 0.45201487807738494, "learning_rate": 1.8559626554466523e-05, "loss": 0.1742, "step": 3443 }, { "epoch": 0.2, "grad_norm": 0.6001960036849652, "learning_rate": 1.8558664244382338e-05, "loss": 0.3938, "step": 3444 }, { "epoch": 0.2, "grad_norm": 0.4399877397096689, "learning_rate": 1.8557701637911978e-05, "loss": 0.3467, "step": 3445 }, { "epoch": 0.2, "grad_norm": 0.6694405301341718, "learning_rate": 1.855673873508878e-05, "loss": 0.2563, "step": 3446 }, { "epoch": 0.2, "grad_norm": 0.5429342439640811, "learning_rate": 1.855577553594609e-05, "loss": 0.3684, "step": 3447 }, { "epoch": 0.2, "grad_norm": 0.3009787386986737, "learning_rate": 1.8554812040517255e-05, "loss": 0.2322, "step": 3448 }, { "epoch": 0.2, "grad_norm": 0.782021777816558, "learning_rate": 1.855384824883565e-05, "loss": 0.4354, "step": 3449 }, { "epoch": 0.2, "grad_norm": 0.3773608366203985, "learning_rate": 1.8552884160934647e-05, "loss": 0.2431, "step": 3450 }, { "epoch": 0.2, "grad_norm": 1.2925400215861826, "learning_rate": 1.8551919776847634e-05, "loss": 0.837, "step": 3451 }, { "epoch": 0.2, "grad_norm": 0.43774430556639365, "learning_rate": 1.8550955096608007e-05, "loss": 0.3146, "step": 3452 }, { "epoch": 0.2, "grad_norm": 0.4075921947601884, "learning_rate": 1.8549990120249174e-05, "loss": 0.3066, "step": 3453 }, { "epoch": 0.2, "grad_norm": 0.3309237068426338, "learning_rate": 1.8549024847804547e-05, "loss": 0.1845, "step": 3454 }, { "epoch": 0.2, "grad_norm": 0.5641842287581473, "learning_rate": 1.854805927930756e-05, "loss": 0.3114, "step": 3455 }, { "epoch": 0.2, "grad_norm": 0.4169525329222009, "learning_rate": 1.854709341479165e-05, "loss": 0.2763, "step": 3456 }, { "epoch": 0.2, "grad_norm": 0.49289840010942465, "learning_rate": 1.8546127254290257e-05, "loss": 0.3636, "step": 3457 }, { "epoch": 0.2, "grad_norm": 0.5541013067086684, "learning_rate": 1.8545160797836847e-05, "loss": 0.4148, "step": 3458 }, { "epoch": 0.2, "grad_norm": 0.4141408240338715, "learning_rate": 1.8544194045464888e-05, "loss": 0.2903, "step": 3459 }, { "epoch": 0.2, "grad_norm": 0.28105441189183067, "learning_rate": 1.8543226997207854e-05, "loss": 0.2312, "step": 3460 }, { "epoch": 0.2, "grad_norm": 0.6534352410994891, "learning_rate": 1.8542259653099236e-05, "loss": 0.2852, "step": 3461 }, { "epoch": 0.2, "grad_norm": 0.4909083697020356, "learning_rate": 1.8541292013172538e-05, "loss": 0.3315, "step": 3462 }, { "epoch": 0.2, "grad_norm": 0.924048730821875, "learning_rate": 1.854032407746126e-05, "loss": 0.4172, "step": 3463 }, { "epoch": 0.2, "grad_norm": 0.36777627302291016, "learning_rate": 1.853935584599893e-05, "loss": 0.2993, "step": 3464 }, { "epoch": 0.2, "grad_norm": 0.4041852347714912, "learning_rate": 1.8538387318819074e-05, "loss": 0.3167, "step": 3465 }, { "epoch": 0.2, "grad_norm": 0.2281237706488959, "learning_rate": 1.853741849595523e-05, "loss": 0.0881, "step": 3466 }, { "epoch": 0.2, "grad_norm": 0.8221586813021399, "learning_rate": 1.853644937744095e-05, "loss": 0.486, "step": 3467 }, { "epoch": 0.2, "grad_norm": 0.3859475003282859, "learning_rate": 1.8535479963309796e-05, "loss": 0.313, "step": 3468 }, { "epoch": 0.2, "grad_norm": 0.6530897529329045, "learning_rate": 1.853451025359534e-05, "loss": 0.3369, "step": 3469 }, { "epoch": 0.2, "grad_norm": 1.042977278524951, "learning_rate": 1.8533540248331162e-05, "loss": 0.4573, "step": 3470 }, { "epoch": 0.2, "grad_norm": 0.41146027977536204, "learning_rate": 1.8532569947550846e-05, "loss": 0.3255, "step": 3471 }, { "epoch": 0.2, "grad_norm": 0.35765902247522974, "learning_rate": 1.8531599351288007e-05, "loss": 0.2252, "step": 3472 }, { "epoch": 0.2, "grad_norm": 0.3855611438390921, "learning_rate": 1.8530628459576243e-05, "loss": 0.2276, "step": 3473 }, { "epoch": 0.2, "grad_norm": 0.5204397928422537, "learning_rate": 1.8529657272449186e-05, "loss": 0.3064, "step": 3474 }, { "epoch": 0.2, "grad_norm": 1.810437510701928, "learning_rate": 1.8528685789940463e-05, "loss": 0.5076, "step": 3475 }, { "epoch": 0.2, "grad_norm": 0.40869087933670045, "learning_rate": 1.8527714012083718e-05, "loss": 0.2685, "step": 3476 }, { "epoch": 0.2, "grad_norm": 0.43618505181314465, "learning_rate": 1.8526741938912605e-05, "loss": 0.3277, "step": 3477 }, { "epoch": 0.2, "grad_norm": 0.42187847427639297, "learning_rate": 1.8525769570460783e-05, "loss": 0.2454, "step": 3478 }, { "epoch": 0.2, "grad_norm": 0.3165643524146173, "learning_rate": 1.8524796906761928e-05, "loss": 0.1485, "step": 3479 }, { "epoch": 0.2, "grad_norm": 0.46732317429125, "learning_rate": 1.8523823947849722e-05, "loss": 0.2923, "step": 3480 }, { "epoch": 0.2, "grad_norm": 1.1275700154976764, "learning_rate": 1.8522850693757865e-05, "loss": 0.4158, "step": 3481 }, { "epoch": 0.2, "grad_norm": 0.7907367906006263, "learning_rate": 1.8521877144520047e-05, "loss": 0.2819, "step": 3482 }, { "epoch": 0.2, "grad_norm": 0.44558259936120503, "learning_rate": 1.8520903300169993e-05, "loss": 0.3128, "step": 3483 }, { "epoch": 0.2, "grad_norm": 0.31241687566734866, "learning_rate": 1.8519929160741427e-05, "loss": 0.2827, "step": 3484 }, { "epoch": 0.2, "grad_norm": 0.31618469870226495, "learning_rate": 1.8518954726268076e-05, "loss": 0.1135, "step": 3485 }, { "epoch": 0.2, "grad_norm": 0.3964958438859382, "learning_rate": 1.851797999678369e-05, "loss": 0.3282, "step": 3486 }, { "epoch": 0.2, "grad_norm": 0.8788401444161869, "learning_rate": 1.8517004972322022e-05, "loss": 0.5349, "step": 3487 }, { "epoch": 0.2, "grad_norm": 0.5842037117974624, "learning_rate": 1.851602965291684e-05, "loss": 0.3523, "step": 3488 }, { "epoch": 0.2, "grad_norm": 0.4250093402378405, "learning_rate": 1.851505403860192e-05, "loss": 0.2307, "step": 3489 }, { "epoch": 0.2, "grad_norm": 1.1648122285456606, "learning_rate": 1.8514078129411045e-05, "loss": 0.5946, "step": 3490 }, { "epoch": 0.2, "grad_norm": 0.33870419487266124, "learning_rate": 1.8513101925378006e-05, "loss": 0.2547, "step": 3491 }, { "epoch": 0.2, "grad_norm": 0.3289610462955457, "learning_rate": 1.8512125426536617e-05, "loss": 0.2312, "step": 3492 }, { "epoch": 0.2, "grad_norm": 0.7856786137489506, "learning_rate": 1.8511148632920685e-05, "loss": 0.4921, "step": 3493 }, { "epoch": 0.2, "grad_norm": 0.9243725044925123, "learning_rate": 1.851017154456405e-05, "loss": 0.5751, "step": 3494 }, { "epoch": 0.2, "grad_norm": 0.38206441570861877, "learning_rate": 1.8509194161500536e-05, "loss": 0.24, "step": 3495 }, { "epoch": 0.2, "grad_norm": 0.4583245897267825, "learning_rate": 1.8508216483763993e-05, "loss": 0.3524, "step": 3496 }, { "epoch": 0.2, "grad_norm": 0.2942176231528043, "learning_rate": 1.850723851138828e-05, "loss": 0.2291, "step": 3497 }, { "epoch": 0.2, "grad_norm": 0.3675286195522216, "learning_rate": 1.850626024440726e-05, "loss": 0.2646, "step": 3498 }, { "epoch": 0.2, "grad_norm": 0.4881052772922558, "learning_rate": 1.850528168285482e-05, "loss": 0.4122, "step": 3499 }, { "epoch": 0.2, "grad_norm": 0.5091923599798945, "learning_rate": 1.8504302826764835e-05, "loss": 0.3891, "step": 3500 }, { "epoch": 0.2, "grad_norm": 0.3633056005786531, "learning_rate": 1.8503323676171212e-05, "loss": 0.2949, "step": 3501 }, { "epoch": 0.2, "grad_norm": 0.5991201857527949, "learning_rate": 1.8502344231107855e-05, "loss": 0.3974, "step": 3502 }, { "epoch": 0.2, "grad_norm": 0.40415730403043115, "learning_rate": 1.8501364491608683e-05, "loss": 0.3875, "step": 3503 }, { "epoch": 0.2, "grad_norm": 0.34569731298708684, "learning_rate": 1.8500384457707625e-05, "loss": 0.285, "step": 3504 }, { "epoch": 0.2, "grad_norm": 0.2568026864658739, "learning_rate": 1.8499404129438617e-05, "loss": 0.1563, "step": 3505 }, { "epoch": 0.2, "grad_norm": 1.1707986100418522, "learning_rate": 1.8498423506835613e-05, "loss": 0.7772, "step": 3506 }, { "epoch": 0.2, "grad_norm": 0.38602275849489903, "learning_rate": 1.8497442589932568e-05, "loss": 0.2805, "step": 3507 }, { "epoch": 0.2, "grad_norm": 0.4079578462025017, "learning_rate": 1.8496461378763445e-05, "loss": 0.2396, "step": 3508 }, { "epoch": 0.2, "grad_norm": 0.5885481273942008, "learning_rate": 1.8495479873362237e-05, "loss": 0.4813, "step": 3509 }, { "epoch": 0.2, "grad_norm": 0.28505728074008135, "learning_rate": 1.8494498073762924e-05, "loss": 0.2308, "step": 3510 }, { "epoch": 0.2, "grad_norm": 0.3259515702055285, "learning_rate": 1.8493515979999508e-05, "loss": 0.2147, "step": 3511 }, { "epoch": 0.2, "grad_norm": 0.5081829044842984, "learning_rate": 1.8492533592105998e-05, "loss": 0.3786, "step": 3512 }, { "epoch": 0.2, "grad_norm": 0.40211123448914077, "learning_rate": 1.8491550910116415e-05, "loss": 0.2937, "step": 3513 }, { "epoch": 0.2, "grad_norm": 0.5797803004312481, "learning_rate": 1.8490567934064788e-05, "loss": 0.3893, "step": 3514 }, { "epoch": 0.2, "grad_norm": 0.35474516854979343, "learning_rate": 1.848958466398516e-05, "loss": 0.311, "step": 3515 }, { "epoch": 0.2, "grad_norm": 0.3743499556350666, "learning_rate": 1.8488601099911582e-05, "loss": 0.2879, "step": 3516 }, { "epoch": 0.2, "grad_norm": 0.34838342234863295, "learning_rate": 1.8487617241878114e-05, "loss": 0.2501, "step": 3517 }, { "epoch": 0.2, "grad_norm": 0.9330455844434704, "learning_rate": 1.8486633089918823e-05, "loss": 0.5301, "step": 3518 }, { "epoch": 0.2, "grad_norm": 0.3876398473412494, "learning_rate": 1.848564864406779e-05, "loss": 0.2736, "step": 3519 }, { "epoch": 0.2, "grad_norm": 0.38224800897615363, "learning_rate": 1.8484663904359112e-05, "loss": 0.3232, "step": 3520 }, { "epoch": 0.2, "grad_norm": 0.7342180011930438, "learning_rate": 1.848367887082689e-05, "loss": 0.4027, "step": 3521 }, { "epoch": 0.2, "grad_norm": 0.37253887582059153, "learning_rate": 1.848269354350523e-05, "loss": 0.2922, "step": 3522 }, { "epoch": 0.2, "grad_norm": 0.4486634748559754, "learning_rate": 1.848170792242826e-05, "loss": 0.3188, "step": 3523 }, { "epoch": 0.2, "grad_norm": 0.41124068257992336, "learning_rate": 1.848072200763011e-05, "loss": 0.3629, "step": 3524 }, { "epoch": 0.2, "grad_norm": 0.252569055610881, "learning_rate": 1.8479735799144917e-05, "loss": 0.1877, "step": 3525 }, { "epoch": 0.2, "grad_norm": 0.6926210684337959, "learning_rate": 1.847874929700684e-05, "loss": 0.5067, "step": 3526 }, { "epoch": 0.2, "grad_norm": 0.5730030339821481, "learning_rate": 1.8477762501250046e-05, "loss": 0.3562, "step": 3527 }, { "epoch": 0.2, "grad_norm": 0.36594434314465024, "learning_rate": 1.847677541190869e-05, "loss": 0.2255, "step": 3528 }, { "epoch": 0.2, "grad_norm": 0.5439482028879972, "learning_rate": 1.8475788029016974e-05, "loss": 0.3023, "step": 3529 }, { "epoch": 0.2, "grad_norm": 0.5147312308129324, "learning_rate": 1.847480035260908e-05, "loss": 0.3325, "step": 3530 }, { "epoch": 0.2, "grad_norm": 0.396855562434019, "learning_rate": 1.8473812382719215e-05, "loss": 0.2331, "step": 3531 }, { "epoch": 0.2, "grad_norm": 0.3650937303546053, "learning_rate": 1.8472824119381592e-05, "loss": 0.3324, "step": 3532 }, { "epoch": 0.2, "grad_norm": 0.8069519781469283, "learning_rate": 1.8471835562630435e-05, "loss": 0.5369, "step": 3533 }, { "epoch": 0.2, "grad_norm": 0.3224016264255935, "learning_rate": 1.8470846712499977e-05, "loss": 0.2151, "step": 3534 }, { "epoch": 0.2, "grad_norm": 0.49887651442034286, "learning_rate": 1.846985756902446e-05, "loss": 0.4021, "step": 3535 }, { "epoch": 0.2, "grad_norm": 0.372999385227418, "learning_rate": 1.8468868132238138e-05, "loss": 0.3151, "step": 3536 }, { "epoch": 0.2, "grad_norm": 0.4155513944432335, "learning_rate": 1.8467878402175278e-05, "loss": 0.3341, "step": 3537 }, { "epoch": 0.2, "grad_norm": 0.38115414788893204, "learning_rate": 1.8466888378870155e-05, "loss": 0.2816, "step": 3538 }, { "epoch": 0.2, "grad_norm": 0.3971013745780681, "learning_rate": 1.8465898062357048e-05, "loss": 0.3233, "step": 3539 }, { "epoch": 0.2, "grad_norm": 0.405786263334606, "learning_rate": 1.846490745267026e-05, "loss": 0.3123, "step": 3540 }, { "epoch": 0.2, "grad_norm": 0.524142882921503, "learning_rate": 1.8463916549844084e-05, "loss": 0.2972, "step": 3541 }, { "epoch": 0.2, "grad_norm": 0.43131354308913267, "learning_rate": 1.846292535391285e-05, "loss": 0.3471, "step": 3542 }, { "epoch": 0.2, "grad_norm": 0.3522374006752674, "learning_rate": 1.846193386491087e-05, "loss": 0.316, "step": 3543 }, { "epoch": 0.2, "grad_norm": 0.4367897212599001, "learning_rate": 1.846094208287248e-05, "loss": 0.3275, "step": 3544 }, { "epoch": 0.2, "grad_norm": 0.4318912036953102, "learning_rate": 1.845995000783204e-05, "loss": 0.2809, "step": 3545 }, { "epoch": 0.2, "grad_norm": 0.37576132450278826, "learning_rate": 1.8458957639823887e-05, "loss": 0.271, "step": 3546 }, { "epoch": 0.2, "grad_norm": 0.44073497833225533, "learning_rate": 1.8457964978882397e-05, "loss": 0.2809, "step": 3547 }, { "epoch": 0.2, "grad_norm": 0.4402566585814481, "learning_rate": 1.8456972025041943e-05, "loss": 0.411, "step": 3548 }, { "epoch": 0.2, "grad_norm": 0.5817384512608036, "learning_rate": 1.845597877833691e-05, "loss": 0.3939, "step": 3549 }, { "epoch": 0.2, "grad_norm": 0.347563565667777, "learning_rate": 1.84549852388017e-05, "loss": 0.2921, "step": 3550 }, { "epoch": 0.2, "grad_norm": 0.3335895526495127, "learning_rate": 1.845399140647071e-05, "loss": 0.2274, "step": 3551 }, { "epoch": 0.2, "grad_norm": 0.7074274468152657, "learning_rate": 1.8452997281378364e-05, "loss": 0.3816, "step": 3552 }, { "epoch": 0.2, "grad_norm": 0.4984396584595269, "learning_rate": 1.8452002863559086e-05, "loss": 0.3568, "step": 3553 }, { "epoch": 0.2, "grad_norm": 0.6284443435298703, "learning_rate": 1.845100815304731e-05, "loss": 0.4117, "step": 3554 }, { "epoch": 0.2, "grad_norm": 0.3490991839784079, "learning_rate": 1.845001314987749e-05, "loss": 0.28, "step": 3555 }, { "epoch": 0.2, "grad_norm": 0.4250866389209264, "learning_rate": 1.8449017854084072e-05, "loss": 0.3298, "step": 3556 }, { "epoch": 0.2, "grad_norm": 0.3043618837957513, "learning_rate": 1.844802226570153e-05, "loss": 0.1234, "step": 3557 }, { "epoch": 0.2, "grad_norm": 0.4379559170905638, "learning_rate": 1.8447026384764343e-05, "loss": 0.3207, "step": 3558 }, { "epoch": 0.2, "grad_norm": 0.3965722955626275, "learning_rate": 1.8446030211306993e-05, "loss": 0.3391, "step": 3559 }, { "epoch": 0.2, "grad_norm": 1.024955890124149, "learning_rate": 1.844503374536398e-05, "loss": 0.427, "step": 3560 }, { "epoch": 0.2, "grad_norm": 0.41957172094011896, "learning_rate": 1.8444036986969814e-05, "loss": 0.336, "step": 3561 }, { "epoch": 0.2, "grad_norm": 0.5542877434696505, "learning_rate": 1.8443039936159007e-05, "loss": 0.3461, "step": 3562 }, { "epoch": 0.2, "grad_norm": 0.2729193047980405, "learning_rate": 1.8442042592966095e-05, "loss": 0.2327, "step": 3563 }, { "epoch": 0.2, "grad_norm": 0.3549744193912546, "learning_rate": 1.8441044957425608e-05, "loss": 0.1783, "step": 3564 }, { "epoch": 0.2, "grad_norm": 0.5966312774507957, "learning_rate": 1.8440047029572094e-05, "loss": 0.41, "step": 3565 }, { "epoch": 0.2, "grad_norm": 1.0779161799481367, "learning_rate": 1.843904880944012e-05, "loss": 0.5309, "step": 3566 }, { "epoch": 0.2, "grad_norm": 0.3425507945907625, "learning_rate": 1.843805029706425e-05, "loss": 0.2269, "step": 3567 }, { "epoch": 0.2, "grad_norm": 0.43024803622830704, "learning_rate": 1.8437051492479053e-05, "loss": 0.3461, "step": 3568 }, { "epoch": 0.21, "grad_norm": 0.32182440028828346, "learning_rate": 1.843605239571913e-05, "loss": 0.1903, "step": 3569 }, { "epoch": 0.21, "grad_norm": 0.43595402051191434, "learning_rate": 1.8435053006819073e-05, "loss": 0.1819, "step": 3570 }, { "epoch": 0.21, "grad_norm": 0.4289935360872228, "learning_rate": 1.8434053325813495e-05, "loss": 0.3026, "step": 3571 }, { "epoch": 0.21, "grad_norm": 1.259600352356555, "learning_rate": 1.8433053352737014e-05, "loss": 0.5352, "step": 3572 }, { "epoch": 0.21, "grad_norm": 0.46454768580118233, "learning_rate": 1.8432053087624258e-05, "loss": 0.2123, "step": 3573 }, { "epoch": 0.21, "grad_norm": 0.3683549795511345, "learning_rate": 1.8431052530509866e-05, "loss": 0.2625, "step": 3574 }, { "epoch": 0.21, "grad_norm": 0.3434082020790198, "learning_rate": 1.8430051681428486e-05, "loss": 0.2504, "step": 3575 }, { "epoch": 0.21, "grad_norm": 0.6314261221401793, "learning_rate": 1.842905054041478e-05, "loss": 0.3683, "step": 3576 }, { "epoch": 0.21, "grad_norm": 0.7178093309482125, "learning_rate": 1.8428049107503417e-05, "loss": 0.3267, "step": 3577 }, { "epoch": 0.21, "grad_norm": 1.1845693433728908, "learning_rate": 1.8427047382729073e-05, "loss": 0.606, "step": 3578 }, { "epoch": 0.21, "grad_norm": 0.3857144192125777, "learning_rate": 1.842604536612644e-05, "loss": 0.3092, "step": 3579 }, { "epoch": 0.21, "grad_norm": 0.422890021608481, "learning_rate": 1.842504305773022e-05, "loss": 0.2626, "step": 3580 }, { "epoch": 0.21, "grad_norm": 0.319692052216907, "learning_rate": 1.8424040457575124e-05, "loss": 0.1854, "step": 3581 }, { "epoch": 0.21, "grad_norm": 0.609801137363293, "learning_rate": 1.8423037565695864e-05, "loss": 0.3804, "step": 3582 }, { "epoch": 0.21, "grad_norm": 0.3952230768238028, "learning_rate": 1.842203438212718e-05, "loss": 0.2583, "step": 3583 }, { "epoch": 0.21, "grad_norm": 0.9995316109812836, "learning_rate": 1.8421030906903805e-05, "loss": 0.5573, "step": 3584 }, { "epoch": 0.21, "grad_norm": 0.8080682129475155, "learning_rate": 1.8420027140060493e-05, "loss": 0.464, "step": 3585 }, { "epoch": 0.21, "grad_norm": 0.38502970294226097, "learning_rate": 1.8419023081632e-05, "loss": 0.2237, "step": 3586 }, { "epoch": 0.21, "grad_norm": 0.3003903655050074, "learning_rate": 1.8418018731653106e-05, "loss": 0.2376, "step": 3587 }, { "epoch": 0.21, "grad_norm": 0.8598542283516147, "learning_rate": 1.841701409015858e-05, "loss": 0.4417, "step": 3588 }, { "epoch": 0.21, "grad_norm": 0.4722555501097505, "learning_rate": 1.841600915718322e-05, "loss": 0.3618, "step": 3589 }, { "epoch": 0.21, "grad_norm": 0.8780361475667137, "learning_rate": 1.8415003932761823e-05, "loss": 0.5438, "step": 3590 }, { "epoch": 0.21, "grad_norm": 0.36609700316137517, "learning_rate": 1.8413998416929205e-05, "loss": 0.3317, "step": 3591 }, { "epoch": 0.21, "grad_norm": 0.4232364971031735, "learning_rate": 1.8412992609720183e-05, "loss": 0.3091, "step": 3592 }, { "epoch": 0.21, "grad_norm": 0.2582320073159369, "learning_rate": 1.8411986511169585e-05, "loss": 0.0721, "step": 3593 }, { "epoch": 0.21, "grad_norm": 0.615145234861627, "learning_rate": 1.8410980121312258e-05, "loss": 0.41, "step": 3594 }, { "epoch": 0.21, "grad_norm": 0.3981913852643038, "learning_rate": 1.8409973440183054e-05, "loss": 0.3132, "step": 3595 }, { "epoch": 0.21, "grad_norm": 1.2713381305155262, "learning_rate": 1.840896646781683e-05, "loss": 0.5107, "step": 3596 }, { "epoch": 0.21, "grad_norm": 0.5843553386069017, "learning_rate": 1.8407959204248455e-05, "loss": 0.3627, "step": 3597 }, { "epoch": 0.21, "grad_norm": 0.46489119063537054, "learning_rate": 1.8406951649512817e-05, "loss": 0.3248, "step": 3598 }, { "epoch": 0.21, "grad_norm": 0.47103184457362807, "learning_rate": 1.8405943803644803e-05, "loss": 0.3158, "step": 3599 }, { "epoch": 0.21, "grad_norm": 0.7760715327257453, "learning_rate": 1.840493566667932e-05, "loss": 0.3387, "step": 3600 }, { "epoch": 0.21, "grad_norm": 0.3663667134586227, "learning_rate": 1.8403927238651274e-05, "loss": 0.2674, "step": 3601 }, { "epoch": 0.21, "grad_norm": 0.5303169306539498, "learning_rate": 1.8402918519595592e-05, "loss": 0.3238, "step": 3602 }, { "epoch": 0.21, "grad_norm": 0.47049291683057637, "learning_rate": 1.8401909509547196e-05, "loss": 0.2613, "step": 3603 }, { "epoch": 0.21, "grad_norm": 0.4636995547912467, "learning_rate": 1.8400900208541045e-05, "loss": 0.3415, "step": 3604 }, { "epoch": 0.21, "grad_norm": 0.610816875773527, "learning_rate": 1.8399890616612073e-05, "loss": 0.4726, "step": 3605 }, { "epoch": 0.21, "grad_norm": 0.3932357164783332, "learning_rate": 1.8398880733795253e-05, "loss": 0.2688, "step": 3606 }, { "epoch": 0.21, "grad_norm": 0.33115183414057575, "learning_rate": 1.8397870560125554e-05, "loss": 0.2524, "step": 3607 }, { "epoch": 0.21, "grad_norm": 0.5466588959281407, "learning_rate": 1.839686009563796e-05, "loss": 0.368, "step": 3608 }, { "epoch": 0.21, "grad_norm": 0.3873845994650159, "learning_rate": 1.839584934036746e-05, "loss": 0.0684, "step": 3609 }, { "epoch": 0.21, "grad_norm": 0.5054505521879232, "learning_rate": 1.8394838294349058e-05, "loss": 0.3073, "step": 3610 }, { "epoch": 0.21, "grad_norm": 0.595052581816832, "learning_rate": 1.839382695761777e-05, "loss": 0.3465, "step": 3611 }, { "epoch": 0.21, "grad_norm": 1.1129720778207792, "learning_rate": 1.839281533020861e-05, "loss": 0.4943, "step": 3612 }, { "epoch": 0.21, "grad_norm": 0.33785668181825734, "learning_rate": 1.839180341215662e-05, "loss": 0.1835, "step": 3613 }, { "epoch": 0.21, "grad_norm": 1.3938903417041624, "learning_rate": 1.8390791203496842e-05, "loss": 0.8968, "step": 3614 }, { "epoch": 0.21, "grad_norm": 0.3179001508479327, "learning_rate": 1.838977870426432e-05, "loss": 0.2777, "step": 3615 }, { "epoch": 0.21, "grad_norm": 0.38387065537886916, "learning_rate": 1.8388765914494124e-05, "loss": 0.1989, "step": 3616 }, { "epoch": 0.21, "grad_norm": 0.9955530352807715, "learning_rate": 1.8387752834221326e-05, "loss": 0.4931, "step": 3617 }, { "epoch": 0.21, "grad_norm": 0.45334877775759963, "learning_rate": 1.8386739463481004e-05, "loss": 0.2833, "step": 3618 }, { "epoch": 0.21, "grad_norm": 0.4419384557222577, "learning_rate": 1.838572580230826e-05, "loss": 0.2239, "step": 3619 }, { "epoch": 0.21, "grad_norm": 1.2042963328857403, "learning_rate": 1.838471185073819e-05, "loss": 0.7506, "step": 3620 }, { "epoch": 0.21, "grad_norm": 0.28333197613712585, "learning_rate": 1.8383697608805907e-05, "loss": 0.2277, "step": 3621 }, { "epoch": 0.21, "grad_norm": 0.35663776056728297, "learning_rate": 1.838268307654654e-05, "loss": 0.2561, "step": 3622 }, { "epoch": 0.21, "grad_norm": 0.38137141453365087, "learning_rate": 1.8381668253995216e-05, "loss": 0.3414, "step": 3623 }, { "epoch": 0.21, "grad_norm": 1.3007390949271715, "learning_rate": 1.8380653141187084e-05, "loss": 0.5905, "step": 3624 }, { "epoch": 0.21, "grad_norm": 0.3586717879521889, "learning_rate": 1.837963773815729e-05, "loss": 0.2828, "step": 3625 }, { "epoch": 0.21, "grad_norm": 0.4624648099073319, "learning_rate": 1.8378622044941007e-05, "loss": 0.2992, "step": 3626 }, { "epoch": 0.21, "grad_norm": 0.28370620018557996, "learning_rate": 1.8377606061573398e-05, "loss": 0.1702, "step": 3627 }, { "epoch": 0.21, "grad_norm": 0.4113606179544061, "learning_rate": 1.8376589788089655e-05, "loss": 0.2951, "step": 3628 }, { "epoch": 0.21, "grad_norm": 0.771704015761994, "learning_rate": 1.837557322452496e-05, "loss": 0.4089, "step": 3629 }, { "epoch": 0.21, "grad_norm": 0.4695330219200323, "learning_rate": 1.8374556370914533e-05, "loss": 0.3328, "step": 3630 }, { "epoch": 0.21, "grad_norm": 0.3891106792486114, "learning_rate": 1.8373539227293576e-05, "loss": 0.2798, "step": 3631 }, { "epoch": 0.21, "grad_norm": 0.9231594968954966, "learning_rate": 1.8372521793697317e-05, "loss": 0.5186, "step": 3632 }, { "epoch": 0.21, "grad_norm": 0.5334306794574006, "learning_rate": 1.8371504070160985e-05, "loss": 0.3294, "step": 3633 }, { "epoch": 0.21, "grad_norm": 0.4208129570730506, "learning_rate": 1.8370486056719828e-05, "loss": 0.2981, "step": 3634 }, { "epoch": 0.21, "grad_norm": 0.3440651749124253, "learning_rate": 1.8369467753409102e-05, "loss": 0.2406, "step": 3635 }, { "epoch": 0.21, "grad_norm": 1.0039452445703967, "learning_rate": 1.8368449160264064e-05, "loss": 0.5453, "step": 3636 }, { "epoch": 0.21, "grad_norm": 0.44073239184364055, "learning_rate": 1.836743027731999e-05, "loss": 0.3189, "step": 3637 }, { "epoch": 0.21, "grad_norm": 0.40024632916236896, "learning_rate": 1.8366411104612168e-05, "loss": 0.3527, "step": 3638 }, { "epoch": 0.21, "grad_norm": 0.4302314709355559, "learning_rate": 1.8365391642175892e-05, "loss": 0.3616, "step": 3639 }, { "epoch": 0.21, "grad_norm": 0.3902773433049301, "learning_rate": 1.836437189004646e-05, "loss": 0.2706, "step": 3640 }, { "epoch": 0.21, "grad_norm": 0.3034132980546468, "learning_rate": 1.836335184825919e-05, "loss": 0.2261, "step": 3641 }, { "epoch": 0.21, "grad_norm": 0.4224483560026623, "learning_rate": 1.8362331516849405e-05, "loss": 0.2859, "step": 3642 }, { "epoch": 0.21, "grad_norm": 0.3997927453552849, "learning_rate": 1.8361310895852437e-05, "loss": 0.2909, "step": 3643 }, { "epoch": 0.21, "grad_norm": 0.8073075120292302, "learning_rate": 1.8360289985303637e-05, "loss": 0.5745, "step": 3644 }, { "epoch": 0.21, "grad_norm": 0.5995548870672561, "learning_rate": 1.8359268785238348e-05, "loss": 0.4058, "step": 3645 }, { "epoch": 0.21, "grad_norm": 0.4135190586223736, "learning_rate": 1.8358247295691946e-05, "loss": 0.2841, "step": 3646 }, { "epoch": 0.21, "grad_norm": 0.2933792066466321, "learning_rate": 1.8357225516699797e-05, "loss": 0.2371, "step": 3647 }, { "epoch": 0.21, "grad_norm": 0.8306758061657067, "learning_rate": 1.835620344829729e-05, "loss": 0.3256, "step": 3648 }, { "epoch": 0.21, "grad_norm": 0.40272226138516737, "learning_rate": 1.8355181090519814e-05, "loss": 0.2961, "step": 3649 }, { "epoch": 0.21, "grad_norm": 0.7342811633805951, "learning_rate": 1.8354158443402777e-05, "loss": 0.4417, "step": 3650 }, { "epoch": 0.21, "grad_norm": 0.5508644077439324, "learning_rate": 1.835313550698159e-05, "loss": 0.3978, "step": 3651 }, { "epoch": 0.21, "grad_norm": 0.3936970414946991, "learning_rate": 1.8352112281291683e-05, "loss": 0.2251, "step": 3652 }, { "epoch": 0.21, "grad_norm": 0.30868325393237483, "learning_rate": 1.8351088766368487e-05, "loss": 0.1808, "step": 3653 }, { "epoch": 0.21, "grad_norm": 0.4717999561118713, "learning_rate": 1.8350064962247443e-05, "loss": 0.3688, "step": 3654 }, { "epoch": 0.21, "grad_norm": 0.3690124510455886, "learning_rate": 1.8349040868964012e-05, "loss": 0.1872, "step": 3655 }, { "epoch": 0.21, "grad_norm": 0.516836148527607, "learning_rate": 1.8348016486553653e-05, "loss": 0.4212, "step": 3656 }, { "epoch": 0.21, "grad_norm": 0.4174942641382965, "learning_rate": 1.8346991815051844e-05, "loss": 0.4136, "step": 3657 }, { "epoch": 0.21, "grad_norm": 0.34246783846879647, "learning_rate": 1.8345966854494065e-05, "loss": 0.2218, "step": 3658 }, { "epoch": 0.21, "grad_norm": 0.29464001657216854, "learning_rate": 1.8344941604915813e-05, "loss": 0.2341, "step": 3659 }, { "epoch": 0.21, "grad_norm": 1.0963469580919971, "learning_rate": 1.8343916066352593e-05, "loss": 0.5888, "step": 3660 }, { "epoch": 0.21, "grad_norm": 0.39006202105175725, "learning_rate": 1.834289023883992e-05, "loss": 0.2244, "step": 3661 }, { "epoch": 0.21, "grad_norm": 0.45351816412520496, "learning_rate": 1.8341864122413313e-05, "loss": 0.3389, "step": 3662 }, { "epoch": 0.21, "grad_norm": 1.013846492560218, "learning_rate": 1.8340837717108312e-05, "loss": 0.6265, "step": 3663 }, { "epoch": 0.21, "grad_norm": 0.44933729211335915, "learning_rate": 1.8339811022960458e-05, "loss": 0.3057, "step": 3664 }, { "epoch": 0.21, "grad_norm": 0.31780800408881466, "learning_rate": 1.833878404000531e-05, "loss": 0.1626, "step": 3665 }, { "epoch": 0.21, "grad_norm": 0.3529354991105561, "learning_rate": 1.8337756768278425e-05, "loss": 0.2648, "step": 3666 }, { "epoch": 0.21, "grad_norm": 0.4312121030833976, "learning_rate": 1.8336729207815386e-05, "loss": 0.2812, "step": 3667 }, { "epoch": 0.21, "grad_norm": 0.7354723969211455, "learning_rate": 1.833570135865177e-05, "loss": 0.3566, "step": 3668 }, { "epoch": 0.21, "grad_norm": 1.0099131930682428, "learning_rate": 1.8334673220823175e-05, "loss": 0.6111, "step": 3669 }, { "epoch": 0.21, "grad_norm": 0.31425998270147343, "learning_rate": 1.8333644794365205e-05, "loss": 0.27, "step": 3670 }, { "epoch": 0.21, "grad_norm": 0.3286391464636836, "learning_rate": 1.8332616079313473e-05, "loss": 0.2264, "step": 3671 }, { "epoch": 0.21, "grad_norm": 0.47239060853182213, "learning_rate": 1.8331587075703607e-05, "loss": 0.302, "step": 3672 }, { "epoch": 0.21, "grad_norm": 0.5911532765330798, "learning_rate": 1.833055778357124e-05, "loss": 0.3735, "step": 3673 }, { "epoch": 0.21, "grad_norm": 0.3816260176156264, "learning_rate": 1.8329528202952013e-05, "loss": 0.2915, "step": 3674 }, { "epoch": 0.21, "grad_norm": 1.2602046875475506, "learning_rate": 1.832849833388158e-05, "loss": 0.8317, "step": 3675 }, { "epoch": 0.21, "grad_norm": 0.5802384065602598, "learning_rate": 1.8327468176395614e-05, "loss": 0.378, "step": 3676 }, { "epoch": 0.21, "grad_norm": 0.3233690864917701, "learning_rate": 1.8326437730529778e-05, "loss": 0.2625, "step": 3677 }, { "epoch": 0.21, "grad_norm": 0.30650130488543587, "learning_rate": 1.8325406996319762e-05, "loss": 0.2177, "step": 3678 }, { "epoch": 0.21, "grad_norm": 0.4361961196440014, "learning_rate": 1.8324375973801262e-05, "loss": 0.3167, "step": 3679 }, { "epoch": 0.21, "grad_norm": 0.540582742403159, "learning_rate": 1.8323344663009976e-05, "loss": 0.4255, "step": 3680 }, { "epoch": 0.21, "grad_norm": 1.26530181020778, "learning_rate": 1.8322313063981628e-05, "loss": 0.5116, "step": 3681 }, { "epoch": 0.21, "grad_norm": 0.3019632406735647, "learning_rate": 1.8321281176751932e-05, "loss": 0.2601, "step": 3682 }, { "epoch": 0.21, "grad_norm": 0.5169685558732896, "learning_rate": 1.8320249001356627e-05, "loss": 0.3703, "step": 3683 }, { "epoch": 0.21, "grad_norm": 0.40742847831985723, "learning_rate": 1.831921653783146e-05, "loss": 0.1924, "step": 3684 }, { "epoch": 0.21, "grad_norm": 0.5064412702318805, "learning_rate": 1.8318183786212177e-05, "loss": 0.3476, "step": 3685 }, { "epoch": 0.21, "grad_norm": 0.39189664074458824, "learning_rate": 1.8317150746534553e-05, "loss": 0.3303, "step": 3686 }, { "epoch": 0.21, "grad_norm": 1.386780392774275, "learning_rate": 1.8316117418834352e-05, "loss": 0.8824, "step": 3687 }, { "epoch": 0.21, "grad_norm": 0.3476158374729174, "learning_rate": 1.831508380314736e-05, "loss": 0.1713, "step": 3688 }, { "epoch": 0.21, "grad_norm": 0.5407610308721493, "learning_rate": 1.831404989950938e-05, "loss": 0.4049, "step": 3689 }, { "epoch": 0.21, "grad_norm": 0.34332848216555034, "learning_rate": 1.8313015707956205e-05, "loss": 0.2995, "step": 3690 }, { "epoch": 0.21, "grad_norm": 0.4909483803893434, "learning_rate": 1.831198122852366e-05, "loss": 0.2905, "step": 3691 }, { "epoch": 0.21, "grad_norm": 0.34866275708306377, "learning_rate": 1.8310946461247553e-05, "loss": 0.2568, "step": 3692 }, { "epoch": 0.21, "grad_norm": 0.5144534607864611, "learning_rate": 1.8309911406163736e-05, "loss": 0.3944, "step": 3693 }, { "epoch": 0.21, "grad_norm": 0.4451881757530133, "learning_rate": 1.830887606330804e-05, "loss": 0.2252, "step": 3694 }, { "epoch": 0.21, "grad_norm": 0.46182446045835396, "learning_rate": 1.8307840432716323e-05, "loss": 0.3298, "step": 3695 }, { "epoch": 0.21, "grad_norm": 0.7256437850629539, "learning_rate": 1.830680451442445e-05, "loss": 0.4658, "step": 3696 }, { "epoch": 0.21, "grad_norm": 0.31246873377998513, "learning_rate": 1.8305768308468294e-05, "loss": 0.1741, "step": 3697 }, { "epoch": 0.21, "grad_norm": 0.3792276788301899, "learning_rate": 1.830473181488374e-05, "loss": 0.2723, "step": 3698 }, { "epoch": 0.21, "grad_norm": 1.4475052756468612, "learning_rate": 1.8303695033706675e-05, "loss": 0.8954, "step": 3699 }, { "epoch": 0.21, "grad_norm": 0.4832360785231713, "learning_rate": 1.8302657964973014e-05, "loss": 0.3362, "step": 3700 }, { "epoch": 0.21, "grad_norm": 0.38449186292934234, "learning_rate": 1.830162060871866e-05, "loss": 0.2838, "step": 3701 }, { "epoch": 0.21, "grad_norm": 0.4419538620503951, "learning_rate": 1.8300582964979544e-05, "loss": 0.3564, "step": 3702 }, { "epoch": 0.21, "grad_norm": 0.36778425880138244, "learning_rate": 1.8299545033791596e-05, "loss": 0.2228, "step": 3703 }, { "epoch": 0.21, "grad_norm": 0.30802703243687346, "learning_rate": 1.829850681519076e-05, "loss": 0.1985, "step": 3704 }, { "epoch": 0.21, "grad_norm": 0.6655387140633007, "learning_rate": 1.8297468309212994e-05, "loss": 0.4198, "step": 3705 }, { "epoch": 0.21, "grad_norm": 0.45041933695514075, "learning_rate": 1.8296429515894255e-05, "loss": 0.3048, "step": 3706 }, { "epoch": 0.21, "grad_norm": 0.41420386663343106, "learning_rate": 1.8295390435270516e-05, "loss": 0.2809, "step": 3707 }, { "epoch": 0.21, "grad_norm": 0.6850026949422853, "learning_rate": 1.8294351067377762e-05, "loss": 0.4826, "step": 3708 }, { "epoch": 0.21, "grad_norm": 0.3475258356965287, "learning_rate": 1.829331141225199e-05, "loss": 0.2416, "step": 3709 }, { "epoch": 0.21, "grad_norm": 0.39414644413001365, "learning_rate": 1.8292271469929202e-05, "loss": 0.2489, "step": 3710 }, { "epoch": 0.21, "grad_norm": 0.46620981413281065, "learning_rate": 1.829123124044541e-05, "loss": 0.3112, "step": 3711 }, { "epoch": 0.21, "grad_norm": 0.8393024501634222, "learning_rate": 1.8290190723836632e-05, "loss": 0.4927, "step": 3712 }, { "epoch": 0.21, "grad_norm": 0.5442988866000577, "learning_rate": 1.828914992013891e-05, "loss": 0.3368, "step": 3713 }, { "epoch": 0.21, "grad_norm": 0.5338386638537052, "learning_rate": 1.828810882938828e-05, "loss": 0.2955, "step": 3714 }, { "epoch": 0.21, "grad_norm": 0.7721046804040257, "learning_rate": 1.8287067451620796e-05, "loss": 0.4498, "step": 3715 }, { "epoch": 0.21, "grad_norm": 0.39359198704409026, "learning_rate": 1.8286025786872526e-05, "loss": 0.3215, "step": 3716 }, { "epoch": 0.21, "grad_norm": 0.309872202825924, "learning_rate": 1.8284983835179536e-05, "loss": 0.2202, "step": 3717 }, { "epoch": 0.21, "grad_norm": 0.3891445865569338, "learning_rate": 1.8283941596577917e-05, "loss": 0.2658, "step": 3718 }, { "epoch": 0.21, "grad_norm": 0.5257520788875862, "learning_rate": 1.8282899071103755e-05, "loss": 0.3476, "step": 3719 }, { "epoch": 0.21, "grad_norm": 0.9016773666981592, "learning_rate": 1.8281856258793155e-05, "loss": 0.364, "step": 3720 }, { "epoch": 0.21, "grad_norm": 0.40787992390494676, "learning_rate": 1.8280813159682226e-05, "loss": 0.3125, "step": 3721 }, { "epoch": 0.21, "grad_norm": 0.39215624227848944, "learning_rate": 1.8279769773807094e-05, "loss": 0.295, "step": 3722 }, { "epoch": 0.21, "grad_norm": 0.19942256276475304, "learning_rate": 1.8278726101203892e-05, "loss": 0.1365, "step": 3723 }, { "epoch": 0.21, "grad_norm": 1.0897560592528546, "learning_rate": 1.8277682141908763e-05, "loss": 0.4017, "step": 3724 }, { "epoch": 0.21, "grad_norm": 0.45932270538036235, "learning_rate": 1.8276637895957853e-05, "loss": 0.331, "step": 3725 }, { "epoch": 0.21, "grad_norm": 0.38873202435458054, "learning_rate": 1.827559336338733e-05, "loss": 0.3411, "step": 3726 }, { "epoch": 0.21, "grad_norm": 0.7523879935802436, "learning_rate": 1.8274548544233367e-05, "loss": 0.2851, "step": 3727 }, { "epoch": 0.21, "grad_norm": 0.3509208008137019, "learning_rate": 1.827350343853214e-05, "loss": 0.2867, "step": 3728 }, { "epoch": 0.21, "grad_norm": 0.4536301444246971, "learning_rate": 1.8272458046319848e-05, "loss": 0.3437, "step": 3729 }, { "epoch": 0.21, "grad_norm": 1.733104281686962, "learning_rate": 1.8271412367632688e-05, "loss": 0.4302, "step": 3730 }, { "epoch": 0.21, "grad_norm": 0.294900982042248, "learning_rate": 1.8270366402506872e-05, "loss": 0.223, "step": 3731 }, { "epoch": 0.21, "grad_norm": 0.45382345157960424, "learning_rate": 1.8269320150978625e-05, "loss": 0.312, "step": 3732 }, { "epoch": 0.21, "grad_norm": 0.4332203720271532, "learning_rate": 1.8268273613084177e-05, "loss": 0.2502, "step": 3733 }, { "epoch": 0.21, "grad_norm": 0.4650150296518048, "learning_rate": 1.826722678885977e-05, "loss": 0.3157, "step": 3734 }, { "epoch": 0.21, "grad_norm": 1.571029287761417, "learning_rate": 1.8266179678341654e-05, "loss": 0.7587, "step": 3735 }, { "epoch": 0.21, "grad_norm": 1.048302589229643, "learning_rate": 1.826513228156609e-05, "loss": 0.3022, "step": 3736 }, { "epoch": 0.21, "grad_norm": 0.3862146200874309, "learning_rate": 1.826408459856935e-05, "loss": 0.2914, "step": 3737 }, { "epoch": 0.21, "grad_norm": 0.5222607559671937, "learning_rate": 1.826303662938772e-05, "loss": 0.2394, "step": 3738 }, { "epoch": 0.21, "grad_norm": 1.3267918828905096, "learning_rate": 1.826198837405748e-05, "loss": 0.709, "step": 3739 }, { "epoch": 0.21, "grad_norm": 0.39077333658956925, "learning_rate": 1.8260939832614942e-05, "loss": 0.2071, "step": 3740 }, { "epoch": 0.21, "grad_norm": 0.44790985884301965, "learning_rate": 1.8259891005096414e-05, "loss": 0.3655, "step": 3741 }, { "epoch": 0.21, "grad_norm": 0.667719602409911, "learning_rate": 1.8258841891538214e-05, "loss": 0.4282, "step": 3742 }, { "epoch": 0.22, "grad_norm": 0.516883514093155, "learning_rate": 1.8257792491976676e-05, "loss": 0.2252, "step": 3743 }, { "epoch": 0.22, "grad_norm": 0.42556191862763, "learning_rate": 1.825674280644814e-05, "loss": 0.192, "step": 3744 }, { "epoch": 0.22, "grad_norm": 0.42746714546213527, "learning_rate": 1.8255692834988952e-05, "loss": 0.3717, "step": 3745 }, { "epoch": 0.22, "grad_norm": 0.3455175925005218, "learning_rate": 1.8254642577635478e-05, "loss": 0.2232, "step": 3746 }, { "epoch": 0.22, "grad_norm": 0.5540250899869666, "learning_rate": 1.8253592034424085e-05, "loss": 0.5053, "step": 3747 }, { "epoch": 0.22, "grad_norm": 1.0421298320869656, "learning_rate": 1.8252541205391155e-05, "loss": 0.6174, "step": 3748 }, { "epoch": 0.22, "grad_norm": 0.44154945291697445, "learning_rate": 1.825149009057308e-05, "loss": 0.2326, "step": 3749 }, { "epoch": 0.22, "grad_norm": 0.3709396033941532, "learning_rate": 1.8250438690006257e-05, "loss": 0.2199, "step": 3750 }, { "epoch": 0.22, "grad_norm": 1.3866050733970163, "learning_rate": 1.8249387003727097e-05, "loss": 0.7559, "step": 3751 }, { "epoch": 0.22, "grad_norm": 0.384792848713734, "learning_rate": 1.824833503177202e-05, "loss": 0.3101, "step": 3752 }, { "epoch": 0.22, "grad_norm": 0.3965591271182984, "learning_rate": 1.8247282774177456e-05, "loss": 0.3009, "step": 3753 }, { "epoch": 0.22, "grad_norm": 0.7462894283297303, "learning_rate": 1.824623023097984e-05, "loss": 0.4997, "step": 3754 }, { "epoch": 0.22, "grad_norm": 0.3943149843852122, "learning_rate": 1.824517740221563e-05, "loss": 0.2825, "step": 3755 }, { "epoch": 0.22, "grad_norm": 0.2177870980472017, "learning_rate": 1.824412428792128e-05, "loss": 0.1226, "step": 3756 }, { "epoch": 0.22, "grad_norm": 0.499144612415864, "learning_rate": 1.8243070888133262e-05, "loss": 0.361, "step": 3757 }, { "epoch": 0.22, "grad_norm": 0.5001785109282243, "learning_rate": 1.8242017202888053e-05, "loss": 0.3159, "step": 3758 }, { "epoch": 0.22, "grad_norm": 0.7388789713211021, "learning_rate": 1.824096323222214e-05, "loss": 0.3556, "step": 3759 }, { "epoch": 0.22, "grad_norm": 0.534150495177783, "learning_rate": 1.8239908976172027e-05, "loss": 0.4201, "step": 3760 }, { "epoch": 0.22, "grad_norm": 0.4950814070548895, "learning_rate": 1.823885443477422e-05, "loss": 0.2938, "step": 3761 }, { "epoch": 0.22, "grad_norm": 0.28446685490046, "learning_rate": 1.8237799608065238e-05, "loss": 0.2129, "step": 3762 }, { "epoch": 0.22, "grad_norm": 0.6083754544167314, "learning_rate": 1.823674449608161e-05, "loss": 0.405, "step": 3763 }, { "epoch": 0.22, "grad_norm": 0.3692492992721875, "learning_rate": 1.8235689098859874e-05, "loss": 0.2551, "step": 3764 }, { "epoch": 0.22, "grad_norm": 0.39185364731901257, "learning_rate": 1.823463341643658e-05, "loss": 0.3614, "step": 3765 }, { "epoch": 0.22, "grad_norm": 0.8683819485532979, "learning_rate": 1.8233577448848283e-05, "loss": 0.4439, "step": 3766 }, { "epoch": 0.22, "grad_norm": 0.3771140986653881, "learning_rate": 1.8232521196131552e-05, "loss": 0.3055, "step": 3767 }, { "epoch": 0.22, "grad_norm": 0.35751520286149197, "learning_rate": 1.823146465832297e-05, "loss": 0.2238, "step": 3768 }, { "epoch": 0.22, "grad_norm": 0.4560483343446017, "learning_rate": 1.823040783545912e-05, "loss": 0.309, "step": 3769 }, { "epoch": 0.22, "grad_norm": 0.35578254331441966, "learning_rate": 1.8229350727576597e-05, "loss": 0.2929, "step": 3770 }, { "epoch": 0.22, "grad_norm": 0.7910099204556784, "learning_rate": 1.8228293334712015e-05, "loss": 0.5764, "step": 3771 }, { "epoch": 0.22, "grad_norm": 0.47524677083124306, "learning_rate": 1.822723565690199e-05, "loss": 0.3022, "step": 3772 }, { "epoch": 0.22, "grad_norm": 0.3292122348325114, "learning_rate": 1.8226177694183144e-05, "loss": 0.2735, "step": 3773 }, { "epoch": 0.22, "grad_norm": 0.5184964716025334, "learning_rate": 1.8225119446592122e-05, "loss": 0.2828, "step": 3774 }, { "epoch": 0.22, "grad_norm": 0.3960302570284166, "learning_rate": 1.8224060914165564e-05, "loss": 0.3376, "step": 3775 }, { "epoch": 0.22, "grad_norm": 0.3777909633108905, "learning_rate": 1.8223002096940133e-05, "loss": 0.2277, "step": 3776 }, { "epoch": 0.22, "grad_norm": 0.3777145561989262, "learning_rate": 1.822194299495249e-05, "loss": 0.334, "step": 3777 }, { "epoch": 0.22, "grad_norm": 0.9906118795976717, "learning_rate": 1.8220883608239317e-05, "loss": 0.6105, "step": 3778 }, { "epoch": 0.22, "grad_norm": 0.3680252029129673, "learning_rate": 1.82198239368373e-05, "loss": 0.2007, "step": 3779 }, { "epoch": 0.22, "grad_norm": 0.29715685119899815, "learning_rate": 1.8218763980783127e-05, "loss": 0.2653, "step": 3780 }, { "epoch": 0.22, "grad_norm": 0.3725279137918228, "learning_rate": 1.8217703740113518e-05, "loss": 0.3547, "step": 3781 }, { "epoch": 0.22, "grad_norm": 0.36171919213090536, "learning_rate": 1.8216643214865176e-05, "loss": 0.1849, "step": 3782 }, { "epoch": 0.22, "grad_norm": 0.5560972723081516, "learning_rate": 1.8215582405074838e-05, "loss": 0.4323, "step": 3783 }, { "epoch": 0.22, "grad_norm": 1.6986156985423673, "learning_rate": 1.821452131077923e-05, "loss": 0.7669, "step": 3784 }, { "epoch": 0.22, "grad_norm": 0.2939380110603323, "learning_rate": 1.8213459932015104e-05, "loss": 0.2367, "step": 3785 }, { "epoch": 0.22, "grad_norm": 0.5918622124773595, "learning_rate": 1.8212398268819214e-05, "loss": 0.4855, "step": 3786 }, { "epoch": 0.22, "grad_norm": 0.4214369828365935, "learning_rate": 1.8211336321228326e-05, "loss": 0.3198, "step": 3787 }, { "epoch": 0.22, "grad_norm": 0.3423353082604522, "learning_rate": 1.8210274089279214e-05, "loss": 0.2273, "step": 3788 }, { "epoch": 0.22, "grad_norm": 0.48273982176060215, "learning_rate": 1.8209211573008663e-05, "loss": 0.2885, "step": 3789 }, { "epoch": 0.22, "grad_norm": 1.081969336795844, "learning_rate": 1.8208148772453466e-05, "loss": 0.6395, "step": 3790 }, { "epoch": 0.22, "grad_norm": 0.4540363009130433, "learning_rate": 1.8207085687650433e-05, "loss": 0.3198, "step": 3791 }, { "epoch": 0.22, "grad_norm": 0.6965030051365356, "learning_rate": 1.8206022318636375e-05, "loss": 0.3283, "step": 3792 }, { "epoch": 0.22, "grad_norm": 0.36981946890013234, "learning_rate": 1.8204958665448116e-05, "loss": 0.3409, "step": 3793 }, { "epoch": 0.22, "grad_norm": 0.30072224494974714, "learning_rate": 1.8203894728122492e-05, "loss": 0.2236, "step": 3794 }, { "epoch": 0.22, "grad_norm": 0.489290813598683, "learning_rate": 1.8202830506696346e-05, "loss": 0.3333, "step": 3795 }, { "epoch": 0.22, "grad_norm": 0.3308604611705892, "learning_rate": 1.8201766001206533e-05, "loss": 0.2731, "step": 3796 }, { "epoch": 0.22, "grad_norm": 0.5543044671117653, "learning_rate": 1.8200701211689915e-05, "loss": 0.3272, "step": 3797 }, { "epoch": 0.22, "grad_norm": 0.49671661520822036, "learning_rate": 1.819963613818337e-05, "loss": 0.3211, "step": 3798 }, { "epoch": 0.22, "grad_norm": 0.39700198461844927, "learning_rate": 1.8198570780723773e-05, "loss": 0.2987, "step": 3799 }, { "epoch": 0.22, "grad_norm": 0.6450937703071342, "learning_rate": 1.8197505139348023e-05, "loss": 0.3396, "step": 3800 }, { "epoch": 0.22, "grad_norm": 0.34925975121207203, "learning_rate": 1.8196439214093023e-05, "loss": 0.2853, "step": 3801 }, { "epoch": 0.22, "grad_norm": 0.3075989600836906, "learning_rate": 1.819537300499569e-05, "loss": 0.1727, "step": 3802 }, { "epoch": 0.22, "grad_norm": 0.40942585125745, "learning_rate": 1.8194306512092938e-05, "loss": 0.3184, "step": 3803 }, { "epoch": 0.22, "grad_norm": 0.3781360749319683, "learning_rate": 1.8193239735421703e-05, "loss": 0.3548, "step": 3804 }, { "epoch": 0.22, "grad_norm": 0.5636306621999936, "learning_rate": 1.819217267501893e-05, "loss": 0.2625, "step": 3805 }, { "epoch": 0.22, "grad_norm": 0.44361177405533797, "learning_rate": 1.819110533092157e-05, "loss": 0.3215, "step": 3806 }, { "epoch": 0.22, "grad_norm": 0.6164138406659124, "learning_rate": 1.8190037703166585e-05, "loss": 0.4649, "step": 3807 }, { "epoch": 0.22, "grad_norm": 0.25182141423743754, "learning_rate": 1.8188969791790946e-05, "loss": 0.1842, "step": 3808 }, { "epoch": 0.22, "grad_norm": 0.4578954704514088, "learning_rate": 1.8187901596831638e-05, "loss": 0.3233, "step": 3809 }, { "epoch": 0.22, "grad_norm": 0.5217483050301579, "learning_rate": 1.8186833118325645e-05, "loss": 0.4206, "step": 3810 }, { "epoch": 0.22, "grad_norm": 0.5431293205707014, "learning_rate": 1.8185764356309975e-05, "loss": 0.4136, "step": 3811 }, { "epoch": 0.22, "grad_norm": 0.45279199338795334, "learning_rate": 1.8184695310821635e-05, "loss": 0.2989, "step": 3812 }, { "epoch": 0.22, "grad_norm": 0.5020020584142632, "learning_rate": 1.8183625981897653e-05, "loss": 0.3023, "step": 3813 }, { "epoch": 0.22, "grad_norm": 0.34007283638326713, "learning_rate": 1.818255636957505e-05, "loss": 0.2279, "step": 3814 }, { "epoch": 0.22, "grad_norm": 0.5940995910555453, "learning_rate": 1.818148647389088e-05, "loss": 0.2751, "step": 3815 }, { "epoch": 0.22, "grad_norm": 0.3728257688155502, "learning_rate": 1.8180416294882178e-05, "loss": 0.3428, "step": 3816 }, { "epoch": 0.22, "grad_norm": 0.540902906588058, "learning_rate": 1.817934583258601e-05, "loss": 0.413, "step": 3817 }, { "epoch": 0.22, "grad_norm": 0.32082684762622404, "learning_rate": 1.8178275087039452e-05, "loss": 0.1668, "step": 3818 }, { "epoch": 0.22, "grad_norm": 0.4394101437520785, "learning_rate": 1.8177204058279577e-05, "loss": 0.3368, "step": 3819 }, { "epoch": 0.22, "grad_norm": 0.41746611985218063, "learning_rate": 1.817613274634348e-05, "loss": 0.2987, "step": 3820 }, { "epoch": 0.22, "grad_norm": 0.5320382789127321, "learning_rate": 1.8175061151268255e-05, "loss": 0.3093, "step": 3821 }, { "epoch": 0.22, "grad_norm": 0.2989541511419468, "learning_rate": 1.8173989273091014e-05, "loss": 0.2661, "step": 3822 }, { "epoch": 0.22, "grad_norm": 1.724527721383952, "learning_rate": 1.8172917111848878e-05, "loss": 0.8488, "step": 3823 }, { "epoch": 0.22, "grad_norm": 0.3618052569029262, "learning_rate": 1.817184466757897e-05, "loss": 0.2378, "step": 3824 }, { "epoch": 0.22, "grad_norm": 0.4104423447288719, "learning_rate": 1.8170771940318437e-05, "loss": 0.3514, "step": 3825 }, { "epoch": 0.22, "grad_norm": 0.600402119776531, "learning_rate": 1.816969893010442e-05, "loss": 0.496, "step": 3826 }, { "epoch": 0.22, "grad_norm": 0.3670048381276231, "learning_rate": 1.8168625636974085e-05, "loss": 0.2748, "step": 3827 }, { "epoch": 0.22, "grad_norm": 0.3009020200838833, "learning_rate": 1.816755206096459e-05, "loss": 0.1996, "step": 3828 }, { "epoch": 0.22, "grad_norm": 0.504312784568991, "learning_rate": 1.816647820211312e-05, "loss": 0.38, "step": 3829 }, { "epoch": 0.22, "grad_norm": 0.6567393314755094, "learning_rate": 1.8165404060456863e-05, "loss": 0.3679, "step": 3830 }, { "epoch": 0.22, "grad_norm": 0.3984924468921483, "learning_rate": 1.8164329636033012e-05, "loss": 0.252, "step": 3831 }, { "epoch": 0.22, "grad_norm": 0.4239600262528248, "learning_rate": 1.8163254928878777e-05, "loss": 0.3663, "step": 3832 }, { "epoch": 0.22, "grad_norm": 0.4164722216338981, "learning_rate": 1.8162179939031377e-05, "loss": 0.299, "step": 3833 }, { "epoch": 0.22, "grad_norm": 0.35787438325680954, "learning_rate": 1.8161104666528033e-05, "loss": 0.1849, "step": 3834 }, { "epoch": 0.22, "grad_norm": 0.982908517879726, "learning_rate": 1.8160029111405986e-05, "loss": 0.6916, "step": 3835 }, { "epoch": 0.22, "grad_norm": 0.46631107207101113, "learning_rate": 1.8158953273702486e-05, "loss": 0.3176, "step": 3836 }, { "epoch": 0.22, "grad_norm": 0.3537925294977268, "learning_rate": 1.8157877153454785e-05, "loss": 0.273, "step": 3837 }, { "epoch": 0.22, "grad_norm": 0.6615628600913611, "learning_rate": 1.8156800750700143e-05, "loss": 0.4955, "step": 3838 }, { "epoch": 0.22, "grad_norm": 0.41214597949670184, "learning_rate": 1.8155724065475845e-05, "loss": 0.2243, "step": 3839 }, { "epoch": 0.22, "grad_norm": 0.3755937652787498, "learning_rate": 1.8154647097819172e-05, "loss": 0.2564, "step": 3840 }, { "epoch": 0.22, "grad_norm": 0.48876484681974797, "learning_rate": 1.8153569847767423e-05, "loss": 0.2991, "step": 3841 }, { "epoch": 0.22, "grad_norm": 0.7458949562080135, "learning_rate": 1.8152492315357902e-05, "loss": 0.5104, "step": 3842 }, { "epoch": 0.22, "grad_norm": 0.3735505680224295, "learning_rate": 1.815141450062792e-05, "loss": 0.282, "step": 3843 }, { "epoch": 0.22, "grad_norm": 0.41540487618060473, "learning_rate": 1.8150336403614804e-05, "loss": 0.3188, "step": 3844 }, { "epoch": 0.22, "grad_norm": 0.5712946769237918, "learning_rate": 1.814925802435589e-05, "loss": 0.3764, "step": 3845 }, { "epoch": 0.22, "grad_norm": 0.3572232718406907, "learning_rate": 1.814817936288852e-05, "loss": 0.2627, "step": 3846 }, { "epoch": 0.22, "grad_norm": 0.3055017102946289, "learning_rate": 1.814710041925005e-05, "loss": 0.1336, "step": 3847 }, { "epoch": 0.22, "grad_norm": 0.3538207884429109, "learning_rate": 1.8146021193477846e-05, "loss": 0.3007, "step": 3848 }, { "epoch": 0.22, "grad_norm": 0.4314766190553378, "learning_rate": 1.8144941685609273e-05, "loss": 0.3486, "step": 3849 }, { "epoch": 0.22, "grad_norm": 0.6441772312208831, "learning_rate": 1.8143861895681723e-05, "loss": 0.4833, "step": 3850 }, { "epoch": 0.22, "grad_norm": 0.30729483425364906, "learning_rate": 1.8142781823732582e-05, "loss": 0.1791, "step": 3851 }, { "epoch": 0.22, "grad_norm": 0.3808537248820102, "learning_rate": 1.814170146979926e-05, "loss": 0.2803, "step": 3852 }, { "epoch": 0.22, "grad_norm": 0.37558207046729103, "learning_rate": 1.8140620833919165e-05, "loss": 0.2885, "step": 3853 }, { "epoch": 0.22, "grad_norm": 0.6154231825728195, "learning_rate": 1.813953991612972e-05, "loss": 0.4008, "step": 3854 }, { "epoch": 0.22, "grad_norm": 0.35044717507297185, "learning_rate": 1.813845871646836e-05, "loss": 0.2794, "step": 3855 }, { "epoch": 0.22, "grad_norm": 0.412504774677019, "learning_rate": 1.813737723497252e-05, "loss": 0.3406, "step": 3856 }, { "epoch": 0.22, "grad_norm": 0.5714736784254149, "learning_rate": 1.8136295471679662e-05, "loss": 0.2334, "step": 3857 }, { "epoch": 0.22, "grad_norm": 0.3209375714349231, "learning_rate": 1.8135213426627237e-05, "loss": 0.2582, "step": 3858 }, { "epoch": 0.22, "grad_norm": 0.6265957239815656, "learning_rate": 1.8134131099852724e-05, "loss": 0.2952, "step": 3859 }, { "epoch": 0.22, "grad_norm": 0.4414280891315469, "learning_rate": 1.81330484913936e-05, "loss": 0.284, "step": 3860 }, { "epoch": 0.22, "grad_norm": 0.41773899450768254, "learning_rate": 1.8131965601287357e-05, "loss": 0.3228, "step": 3861 }, { "epoch": 0.22, "grad_norm": 0.8234390961731449, "learning_rate": 1.8130882429571496e-05, "loss": 0.5967, "step": 3862 }, { "epoch": 0.22, "grad_norm": 0.5027296598506996, "learning_rate": 1.8129798976283522e-05, "loss": 0.4022, "step": 3863 }, { "epoch": 0.22, "grad_norm": 0.4194025271798038, "learning_rate": 1.812871524146096e-05, "loss": 0.2134, "step": 3864 }, { "epoch": 0.22, "grad_norm": 0.3110021600253895, "learning_rate": 1.812763122514134e-05, "loss": 0.2123, "step": 3865 }, { "epoch": 0.22, "grad_norm": 0.7499695380435911, "learning_rate": 1.8126546927362204e-05, "loss": 0.4432, "step": 3866 }, { "epoch": 0.22, "grad_norm": 0.35946689971793444, "learning_rate": 1.8125462348161093e-05, "loss": 0.2403, "step": 3867 }, { "epoch": 0.22, "grad_norm": 0.4073612075479078, "learning_rate": 1.812437748757557e-05, "loss": 0.3583, "step": 3868 }, { "epoch": 0.22, "grad_norm": 1.1775040168059026, "learning_rate": 1.8123292345643203e-05, "loss": 0.7311, "step": 3869 }, { "epoch": 0.22, "grad_norm": 0.3359065562504141, "learning_rate": 1.8122206922401573e-05, "loss": 0.2049, "step": 3870 }, { "epoch": 0.22, "grad_norm": 0.3035022075371208, "learning_rate": 1.8121121217888268e-05, "loss": 0.2616, "step": 3871 }, { "epoch": 0.22, "grad_norm": 0.4404755357892392, "learning_rate": 1.8120035232140884e-05, "loss": 0.3895, "step": 3872 }, { "epoch": 0.22, "grad_norm": 0.3304185548692813, "learning_rate": 1.8118948965197027e-05, "loss": 0.2378, "step": 3873 }, { "epoch": 0.22, "grad_norm": 1.1038158285453556, "learning_rate": 1.8117862417094318e-05, "loss": 0.8049, "step": 3874 }, { "epoch": 0.22, "grad_norm": 0.47637000688014897, "learning_rate": 1.811677558787038e-05, "loss": 0.4025, "step": 3875 }, { "epoch": 0.22, "grad_norm": 0.3109465314784538, "learning_rate": 1.8115688477562855e-05, "loss": 0.27, "step": 3876 }, { "epoch": 0.22, "grad_norm": 0.2238118146029211, "learning_rate": 1.8114601086209387e-05, "loss": 0.137, "step": 3877 }, { "epoch": 0.22, "grad_norm": 0.6389128213112062, "learning_rate": 1.8113513413847634e-05, "loss": 0.4794, "step": 3878 }, { "epoch": 0.22, "grad_norm": 0.9375943093100196, "learning_rate": 1.811242546051526e-05, "loss": 0.2699, "step": 3879 }, { "epoch": 0.22, "grad_norm": 0.4320199364520138, "learning_rate": 1.811133722624994e-05, "loss": 0.2924, "step": 3880 }, { "epoch": 0.22, "grad_norm": 1.0738585193981018, "learning_rate": 1.811024871108936e-05, "loss": 0.5673, "step": 3881 }, { "epoch": 0.22, "grad_norm": 0.4571929492755686, "learning_rate": 1.8109159915071215e-05, "loss": 0.3033, "step": 3882 }, { "epoch": 0.22, "grad_norm": 0.40435453226191476, "learning_rate": 1.810807083823321e-05, "loss": 0.3142, "step": 3883 }, { "epoch": 0.22, "grad_norm": 0.3231601205057292, "learning_rate": 1.8106981480613063e-05, "loss": 0.2424, "step": 3884 }, { "epoch": 0.22, "grad_norm": 0.47123062190681675, "learning_rate": 1.8105891842248496e-05, "loss": 0.3023, "step": 3885 }, { "epoch": 0.22, "grad_norm": 0.5011503701732182, "learning_rate": 1.810480192317724e-05, "loss": 0.3053, "step": 3886 }, { "epoch": 0.22, "grad_norm": 0.5743534405718205, "learning_rate": 1.8103711723437048e-05, "loss": 0.4042, "step": 3887 }, { "epoch": 0.22, "grad_norm": 0.3408418367503883, "learning_rate": 1.8102621243065665e-05, "loss": 0.2877, "step": 3888 }, { "epoch": 0.22, "grad_norm": 0.6381779781359501, "learning_rate": 1.8101530482100855e-05, "loss": 0.4795, "step": 3889 }, { "epoch": 0.22, "grad_norm": 0.2908090921032573, "learning_rate": 1.8100439440580393e-05, "loss": 0.1469, "step": 3890 }, { "epoch": 0.22, "grad_norm": 0.4231337495748093, "learning_rate": 1.809934811854206e-05, "loss": 0.3074, "step": 3891 }, { "epoch": 0.22, "grad_norm": 0.3200805794748924, "learning_rate": 1.8098256516023654e-05, "loss": 0.2839, "step": 3892 }, { "epoch": 0.22, "grad_norm": 1.011602052709592, "learning_rate": 1.809716463306297e-05, "loss": 0.2713, "step": 3893 }, { "epoch": 0.22, "grad_norm": 0.3933605424410423, "learning_rate": 1.8096072469697822e-05, "loss": 0.324, "step": 3894 }, { "epoch": 0.22, "grad_norm": 0.7690843943555367, "learning_rate": 1.8094980025966036e-05, "loss": 0.4747, "step": 3895 }, { "epoch": 0.22, "grad_norm": 0.3765206416318077, "learning_rate": 1.809388730190544e-05, "loss": 0.2389, "step": 3896 }, { "epoch": 0.22, "grad_norm": 0.3835611886266919, "learning_rate": 1.8092794297553873e-05, "loss": 0.28, "step": 3897 }, { "epoch": 0.22, "grad_norm": 0.4180873590762182, "learning_rate": 1.8091701012949187e-05, "loss": 0.2725, "step": 3898 }, { "epoch": 0.22, "grad_norm": 0.44449937407460755, "learning_rate": 1.8090607448129244e-05, "loss": 0.3152, "step": 3899 }, { "epoch": 0.22, "grad_norm": 0.43700527552620777, "learning_rate": 1.8089513603131913e-05, "loss": 0.301, "step": 3900 }, { "epoch": 0.22, "grad_norm": 0.6587341669479327, "learning_rate": 1.808841947799507e-05, "loss": 0.4034, "step": 3901 }, { "epoch": 0.22, "grad_norm": 1.2622215351655142, "learning_rate": 1.808732507275661e-05, "loss": 0.7012, "step": 3902 }, { "epoch": 0.22, "grad_norm": 0.35778822861958404, "learning_rate": 1.8086230387454434e-05, "loss": 0.2038, "step": 3903 }, { "epoch": 0.22, "grad_norm": 0.41095801057868664, "learning_rate": 1.8085135422126448e-05, "loss": 0.2571, "step": 3904 }, { "epoch": 0.22, "grad_norm": 0.5132522096870761, "learning_rate": 1.8084040176810567e-05, "loss": 0.349, "step": 3905 }, { "epoch": 0.22, "grad_norm": 0.4841833694973949, "learning_rate": 1.808294465154472e-05, "loss": 0.2723, "step": 3906 }, { "epoch": 0.22, "grad_norm": 0.4857845170840667, "learning_rate": 1.8081848846366852e-05, "loss": 0.3875, "step": 3907 }, { "epoch": 0.22, "grad_norm": 0.5247363261757332, "learning_rate": 1.8080752761314904e-05, "loss": 0.3969, "step": 3908 }, { "epoch": 0.22, "grad_norm": 0.34761712484213264, "learning_rate": 1.8079656396426834e-05, "loss": 0.2346, "step": 3909 }, { "epoch": 0.22, "grad_norm": 0.2940198797850371, "learning_rate": 1.807855975174061e-05, "loss": 0.1858, "step": 3910 }, { "epoch": 0.22, "grad_norm": 0.40981413692161606, "learning_rate": 1.8077462827294214e-05, "loss": 0.3515, "step": 3911 }, { "epoch": 0.22, "grad_norm": 0.3987787782958305, "learning_rate": 1.8076365623125625e-05, "loss": 0.2753, "step": 3912 }, { "epoch": 0.22, "grad_norm": 0.8541449017563671, "learning_rate": 1.8075268139272842e-05, "loss": 0.5343, "step": 3913 }, { "epoch": 0.22, "grad_norm": 1.122921649883041, "learning_rate": 1.8074170375773867e-05, "loss": 0.7047, "step": 3914 }, { "epoch": 0.22, "grad_norm": 0.337497593773467, "learning_rate": 1.8073072332666723e-05, "loss": 0.2768, "step": 3915 }, { "epoch": 0.22, "grad_norm": 0.4214408990448417, "learning_rate": 1.807197400998943e-05, "loss": 0.257, "step": 3916 }, { "epoch": 0.23, "grad_norm": 0.6049790373030124, "learning_rate": 1.8070875407780026e-05, "loss": 0.355, "step": 3917 }, { "epoch": 0.23, "grad_norm": 0.30708101652710945, "learning_rate": 1.806977652607655e-05, "loss": 0.1988, "step": 3918 }, { "epoch": 0.23, "grad_norm": 0.4084061861810741, "learning_rate": 1.8068677364917063e-05, "loss": 0.283, "step": 3919 }, { "epoch": 0.23, "grad_norm": 0.626383104086622, "learning_rate": 1.806757792433962e-05, "loss": 0.4173, "step": 3920 }, { "epoch": 0.23, "grad_norm": 0.6969864507969111, "learning_rate": 1.806647820438231e-05, "loss": 0.4274, "step": 3921 }, { "epoch": 0.23, "grad_norm": 0.3530384228236583, "learning_rate": 1.8065378205083202e-05, "loss": 0.316, "step": 3922 }, { "epoch": 0.23, "grad_norm": 0.4005606977157132, "learning_rate": 1.8064277926480392e-05, "loss": 0.3583, "step": 3923 }, { "epoch": 0.23, "grad_norm": 0.31165153005202517, "learning_rate": 1.8063177368611988e-05, "loss": 0.214, "step": 3924 }, { "epoch": 0.23, "grad_norm": 0.33150213382715915, "learning_rate": 1.8062076531516094e-05, "loss": 0.2387, "step": 3925 }, { "epoch": 0.23, "grad_norm": 0.8408216515743814, "learning_rate": 1.806097541523084e-05, "loss": 0.4223, "step": 3926 }, { "epoch": 0.23, "grad_norm": 0.43962821527072266, "learning_rate": 1.8059874019794352e-05, "loss": 0.3006, "step": 3927 }, { "epoch": 0.23, "grad_norm": 0.4049589074514208, "learning_rate": 1.8058772345244775e-05, "loss": 0.3666, "step": 3928 }, { "epoch": 0.23, "grad_norm": 0.6436948491887067, "learning_rate": 1.8057670391620258e-05, "loss": 0.3893, "step": 3929 }, { "epoch": 0.23, "grad_norm": 0.26980658996097057, "learning_rate": 1.8056568158958958e-05, "loss": 0.194, "step": 3930 }, { "epoch": 0.23, "grad_norm": 0.5857657539673209, "learning_rate": 1.8055465647299052e-05, "loss": 0.4342, "step": 3931 }, { "epoch": 0.23, "grad_norm": 0.3769739875049705, "learning_rate": 1.805436285667872e-05, "loss": 0.2869, "step": 3932 }, { "epoch": 0.23, "grad_norm": 0.44323285132468504, "learning_rate": 1.8053259787136144e-05, "loss": 0.3503, "step": 3933 }, { "epoch": 0.23, "grad_norm": 0.5264863084482408, "learning_rate": 1.8052156438709527e-05, "loss": 0.3917, "step": 3934 }, { "epoch": 0.23, "grad_norm": 0.3824530834229984, "learning_rate": 1.805105281143708e-05, "loss": 0.3065, "step": 3935 }, { "epoch": 0.23, "grad_norm": 0.39071495256736205, "learning_rate": 1.8049948905357023e-05, "loss": 0.1898, "step": 3936 }, { "epoch": 0.23, "grad_norm": 0.3091528912278944, "learning_rate": 1.804884472050758e-05, "loss": 0.2312, "step": 3937 }, { "epoch": 0.23, "grad_norm": 0.9481919338379736, "learning_rate": 1.8047740256926993e-05, "loss": 0.6748, "step": 3938 }, { "epoch": 0.23, "grad_norm": 0.35422805559962023, "learning_rate": 1.8046635514653505e-05, "loss": 0.2324, "step": 3939 }, { "epoch": 0.23, "grad_norm": 0.412781947395747, "learning_rate": 1.8045530493725375e-05, "loss": 0.3477, "step": 3940 }, { "epoch": 0.23, "grad_norm": 0.7610604686105258, "learning_rate": 1.8044425194180868e-05, "loss": 0.5374, "step": 3941 }, { "epoch": 0.23, "grad_norm": 0.23494787925537458, "learning_rate": 1.8043319616058266e-05, "loss": 0.0743, "step": 3942 }, { "epoch": 0.23, "grad_norm": 0.32367312358901884, "learning_rate": 1.804221375939585e-05, "loss": 0.2955, "step": 3943 }, { "epoch": 0.23, "grad_norm": 0.9361586658175016, "learning_rate": 1.8041107624231916e-05, "loss": 0.6321, "step": 3944 }, { "epoch": 0.23, "grad_norm": 0.4736112307448519, "learning_rate": 1.804000121060477e-05, "loss": 0.3214, "step": 3945 }, { "epoch": 0.23, "grad_norm": 0.4152969262120841, "learning_rate": 1.803889451855273e-05, "loss": 0.346, "step": 3946 }, { "epoch": 0.23, "grad_norm": 0.425406663947165, "learning_rate": 1.8037787548114122e-05, "loss": 0.3407, "step": 3947 }, { "epoch": 0.23, "grad_norm": 0.3436781987502028, "learning_rate": 1.8036680299327273e-05, "loss": 0.1941, "step": 3948 }, { "epoch": 0.23, "grad_norm": 0.2759805308240463, "learning_rate": 1.8035572772230526e-05, "loss": 0.1915, "step": 3949 }, { "epoch": 0.23, "grad_norm": 0.7343999781835354, "learning_rate": 1.8034464966862247e-05, "loss": 0.5131, "step": 3950 }, { "epoch": 0.23, "grad_norm": 0.3709341545619263, "learning_rate": 1.8033356883260786e-05, "loss": 0.3157, "step": 3951 }, { "epoch": 0.23, "grad_norm": 0.3961664624622881, "learning_rate": 1.8032248521464526e-05, "loss": 0.301, "step": 3952 }, { "epoch": 0.23, "grad_norm": 0.9898774268867264, "learning_rate": 1.8031139881511844e-05, "loss": 0.6261, "step": 3953 }, { "epoch": 0.23, "grad_norm": 0.39230522301924536, "learning_rate": 1.8030030963441133e-05, "loss": 0.2662, "step": 3954 }, { "epoch": 0.23, "grad_norm": 0.28249473186971336, "learning_rate": 1.8028921767290796e-05, "loss": 0.2525, "step": 3955 }, { "epoch": 0.23, "grad_norm": 0.5699095328371475, "learning_rate": 1.802781229309924e-05, "loss": 0.3755, "step": 3956 }, { "epoch": 0.23, "grad_norm": 0.7814378890464383, "learning_rate": 1.8026702540904893e-05, "loss": 0.4934, "step": 3957 }, { "epoch": 0.23, "grad_norm": 0.31906069339089543, "learning_rate": 1.802559251074618e-05, "loss": 0.2279, "step": 3958 }, { "epoch": 0.23, "grad_norm": 0.4300544687060639, "learning_rate": 1.8024482202661544e-05, "loss": 0.3191, "step": 3959 }, { "epoch": 0.23, "grad_norm": 0.7882393206382694, "learning_rate": 1.8023371616689437e-05, "loss": 0.4986, "step": 3960 }, { "epoch": 0.23, "grad_norm": 0.2288574775693125, "learning_rate": 1.8022260752868314e-05, "loss": 0.1694, "step": 3961 }, { "epoch": 0.23, "grad_norm": 1.129884549557266, "learning_rate": 1.8021149611236644e-05, "loss": 0.5047, "step": 3962 }, { "epoch": 0.23, "grad_norm": 0.3673895981661131, "learning_rate": 1.8020038191832912e-05, "loss": 0.3243, "step": 3963 }, { "epoch": 0.23, "grad_norm": 0.3960014025969493, "learning_rate": 1.80189264946956e-05, "loss": 0.3041, "step": 3964 }, { "epoch": 0.23, "grad_norm": 0.7265493417525126, "learning_rate": 1.8017814519863206e-05, "loss": 0.2853, "step": 3965 }, { "epoch": 0.23, "grad_norm": 0.4288534438453486, "learning_rate": 1.8016702267374243e-05, "loss": 0.3599, "step": 3966 }, { "epoch": 0.23, "grad_norm": 0.3331264382746232, "learning_rate": 1.8015589737267226e-05, "loss": 0.2876, "step": 3967 }, { "epoch": 0.23, "grad_norm": 0.43022925309376164, "learning_rate": 1.801447692958068e-05, "loss": 0.2281, "step": 3968 }, { "epoch": 0.23, "grad_norm": 0.5957577630302175, "learning_rate": 1.801336384435314e-05, "loss": 0.3927, "step": 3969 }, { "epoch": 0.23, "grad_norm": 0.4640169255196903, "learning_rate": 1.8012250481623158e-05, "loss": 0.3406, "step": 3970 }, { "epoch": 0.23, "grad_norm": 0.39945224996755907, "learning_rate": 1.8011136841429285e-05, "loss": 0.2894, "step": 3971 }, { "epoch": 0.23, "grad_norm": 0.8005686154322458, "learning_rate": 1.801002292381009e-05, "loss": 0.4909, "step": 3972 }, { "epoch": 0.23, "grad_norm": 0.33287331011769583, "learning_rate": 1.800890872880414e-05, "loss": 0.2381, "step": 3973 }, { "epoch": 0.23, "grad_norm": 0.36297847553286217, "learning_rate": 1.8007794256450027e-05, "loss": 0.3012, "step": 3974 }, { "epoch": 0.23, "grad_norm": 0.5849863340592387, "learning_rate": 1.800667950678635e-05, "loss": 0.4239, "step": 3975 }, { "epoch": 0.23, "grad_norm": 0.378342194413626, "learning_rate": 1.8005564479851697e-05, "loss": 0.2682, "step": 3976 }, { "epoch": 0.23, "grad_norm": 0.7952623423604379, "learning_rate": 1.8004449175684697e-05, "loss": 0.6006, "step": 3977 }, { "epoch": 0.23, "grad_norm": 0.4097070482739936, "learning_rate": 1.8003333594323962e-05, "loss": 0.2736, "step": 3978 }, { "epoch": 0.23, "grad_norm": 0.3728346283618812, "learning_rate": 1.800221773580813e-05, "loss": 0.2866, "step": 3979 }, { "epoch": 0.23, "grad_norm": 0.5766114897185256, "learning_rate": 1.8001101600175843e-05, "loss": 0.4659, "step": 3980 }, { "epoch": 0.23, "grad_norm": 0.26613403625490034, "learning_rate": 1.799998518746575e-05, "loss": 0.1474, "step": 3981 }, { "epoch": 0.23, "grad_norm": 0.31031290429059133, "learning_rate": 1.7998868497716516e-05, "loss": 0.2421, "step": 3982 }, { "epoch": 0.23, "grad_norm": 0.4392924725322208, "learning_rate": 1.7997751530966806e-05, "loss": 0.3419, "step": 3983 }, { "epoch": 0.23, "grad_norm": 0.5462229687452343, "learning_rate": 1.799663428725531e-05, "loss": 0.2927, "step": 3984 }, { "epoch": 0.23, "grad_norm": 0.39202509019051524, "learning_rate": 1.7995516766620706e-05, "loss": 0.3135, "step": 3985 }, { "epoch": 0.23, "grad_norm": 0.7239484639028205, "learning_rate": 1.7994398969101704e-05, "loss": 0.5246, "step": 3986 }, { "epoch": 0.23, "grad_norm": 0.3566107377592294, "learning_rate": 1.799328089473701e-05, "loss": 0.2878, "step": 3987 }, { "epoch": 0.23, "grad_norm": 0.4206515109068827, "learning_rate": 1.799216254356534e-05, "loss": 0.3109, "step": 3988 }, { "epoch": 0.23, "grad_norm": 0.337123993090019, "learning_rate": 1.7991043915625427e-05, "loss": 0.2135, "step": 3989 }, { "epoch": 0.23, "grad_norm": 0.34315829448210505, "learning_rate": 1.798992501095601e-05, "loss": 0.3353, "step": 3990 }, { "epoch": 0.23, "grad_norm": 0.36382392161356036, "learning_rate": 1.7988805829595825e-05, "loss": 0.2506, "step": 3991 }, { "epoch": 0.23, "grad_norm": 0.6713822788963748, "learning_rate": 1.7987686371583643e-05, "loss": 0.501, "step": 3992 }, { "epoch": 0.23, "grad_norm": 1.326313126631407, "learning_rate": 1.7986566636958228e-05, "loss": 0.8042, "step": 3993 }, { "epoch": 0.23, "grad_norm": 0.27371657393729876, "learning_rate": 1.798544662575835e-05, "loss": 0.188, "step": 3994 }, { "epoch": 0.23, "grad_norm": 0.31879060982136537, "learning_rate": 1.7984326338022797e-05, "loss": 0.2921, "step": 3995 }, { "epoch": 0.23, "grad_norm": 0.6619585978080218, "learning_rate": 1.798320577379037e-05, "loss": 0.4928, "step": 3996 }, { "epoch": 0.23, "grad_norm": 0.353273027323354, "learning_rate": 1.7982084933099868e-05, "loss": 0.2369, "step": 3997 }, { "epoch": 0.23, "grad_norm": 1.1077945691153839, "learning_rate": 1.798096381599011e-05, "loss": 0.6902, "step": 3998 }, { "epoch": 0.23, "grad_norm": 0.3950848276115181, "learning_rate": 1.7979842422499917e-05, "loss": 0.3657, "step": 3999 }, { "epoch": 0.23, "grad_norm": 0.298536345873338, "learning_rate": 1.7978720752668123e-05, "loss": 0.2263, "step": 4000 }, { "epoch": 0.23, "grad_norm": 0.3103745986561787, "learning_rate": 1.7977598806533575e-05, "loss": 0.1444, "step": 4001 }, { "epoch": 0.23, "grad_norm": 0.46143426008863875, "learning_rate": 1.797647658413512e-05, "loss": 0.3617, "step": 4002 }, { "epoch": 0.23, "grad_norm": 0.35964308521923766, "learning_rate": 1.7975354085511627e-05, "loss": 0.3244, "step": 4003 }, { "epoch": 0.23, "grad_norm": 1.0071718247568406, "learning_rate": 1.7974231310701964e-05, "loss": 0.0483, "step": 4004 }, { "epoch": 0.23, "grad_norm": 0.8956530240852543, "learning_rate": 1.7973108259745012e-05, "loss": 0.7035, "step": 4005 }, { "epoch": 0.23, "grad_norm": 0.4631104114709448, "learning_rate": 1.7971984932679663e-05, "loss": 0.2886, "step": 4006 }, { "epoch": 0.23, "grad_norm": 0.25131716487061634, "learning_rate": 1.7970861329544823e-05, "loss": 0.2199, "step": 4007 }, { "epoch": 0.23, "grad_norm": 0.6088567020101945, "learning_rate": 1.7969737450379395e-05, "loss": 0.4963, "step": 4008 }, { "epoch": 0.23, "grad_norm": 0.6053261675860214, "learning_rate": 1.7968613295222304e-05, "loss": 0.2756, "step": 4009 }, { "epoch": 0.23, "grad_norm": 0.3335128304816215, "learning_rate": 1.7967488864112473e-05, "loss": 0.2663, "step": 4010 }, { "epoch": 0.23, "grad_norm": 1.4790286393120566, "learning_rate": 1.7966364157088853e-05, "loss": 0.8433, "step": 4011 }, { "epoch": 0.23, "grad_norm": 0.3681446596417048, "learning_rate": 1.7965239174190376e-05, "loss": 0.2871, "step": 4012 }, { "epoch": 0.23, "grad_norm": 0.6855409947925746, "learning_rate": 1.7964113915456013e-05, "loss": 0.5071, "step": 4013 }, { "epoch": 0.23, "grad_norm": 0.3131770028920056, "learning_rate": 1.7962988380924727e-05, "loss": 0.2426, "step": 4014 }, { "epoch": 0.23, "grad_norm": 0.39953225148360916, "learning_rate": 1.7961862570635496e-05, "loss": 0.2161, "step": 4015 }, { "epoch": 0.23, "grad_norm": 1.0905675166530646, "learning_rate": 1.7960736484627306e-05, "loss": 0.5986, "step": 4016 }, { "epoch": 0.23, "grad_norm": 0.8487839708425405, "learning_rate": 1.7959610122939155e-05, "loss": 0.521, "step": 4017 }, { "epoch": 0.23, "grad_norm": 0.35305893350636597, "learning_rate": 1.7958483485610048e-05, "loss": 0.275, "step": 4018 }, { "epoch": 0.23, "grad_norm": 0.5051109731237906, "learning_rate": 1.7957356572678998e-05, "loss": 0.389, "step": 4019 }, { "epoch": 0.23, "grad_norm": 0.37882249996544415, "learning_rate": 1.7956229384185036e-05, "loss": 0.217, "step": 4020 }, { "epoch": 0.23, "grad_norm": 0.43823490781549507, "learning_rate": 1.7955101920167188e-05, "loss": 0.2772, "step": 4021 }, { "epoch": 0.23, "grad_norm": 0.5312447099067517, "learning_rate": 1.7953974180664504e-05, "loss": 0.3589, "step": 4022 }, { "epoch": 0.23, "grad_norm": 0.5289342208062883, "learning_rate": 1.7952846165716038e-05, "loss": 0.3424, "step": 4023 }, { "epoch": 0.23, "grad_norm": 0.44434750769308423, "learning_rate": 1.795171787536085e-05, "loss": 0.2691, "step": 4024 }, { "epoch": 0.23, "grad_norm": 0.5755076448856171, "learning_rate": 1.7950589309638014e-05, "loss": 0.4052, "step": 4025 }, { "epoch": 0.23, "grad_norm": 0.30817817030080696, "learning_rate": 1.794946046858661e-05, "loss": 0.2699, "step": 4026 }, { "epoch": 0.23, "grad_norm": 0.3306458946911386, "learning_rate": 1.7948331352245736e-05, "loss": 0.1414, "step": 4027 }, { "epoch": 0.23, "grad_norm": 0.509175609376385, "learning_rate": 1.7947201960654488e-05, "loss": 0.3813, "step": 4028 }, { "epoch": 0.23, "grad_norm": 0.8703972842173666, "learning_rate": 1.7946072293851976e-05, "loss": 0.6002, "step": 4029 }, { "epoch": 0.23, "grad_norm": 0.343542528326212, "learning_rate": 1.794494235187732e-05, "loss": 0.2338, "step": 4030 }, { "epoch": 0.23, "grad_norm": 0.4128671296922721, "learning_rate": 1.7943812134769656e-05, "loss": 0.4134, "step": 4031 }, { "epoch": 0.23, "grad_norm": 1.041845445196055, "learning_rate": 1.794268164256812e-05, "loss": 0.628, "step": 4032 }, { "epoch": 0.23, "grad_norm": 0.23769051134991365, "learning_rate": 1.7941550875311858e-05, "loss": 0.1505, "step": 4033 }, { "epoch": 0.23, "grad_norm": 0.45355094491900144, "learning_rate": 1.794041983304003e-05, "loss": 0.3421, "step": 4034 }, { "epoch": 0.23, "grad_norm": 1.1868507744408294, "learning_rate": 1.7939288515791806e-05, "loss": 0.7895, "step": 4035 }, { "epoch": 0.23, "grad_norm": 0.395618032596114, "learning_rate": 1.7938156923606362e-05, "loss": 0.2637, "step": 4036 }, { "epoch": 0.23, "grad_norm": 0.7611232233618405, "learning_rate": 1.7937025056522884e-05, "loss": 0.5185, "step": 4037 }, { "epoch": 0.23, "grad_norm": 0.4522028536625917, "learning_rate": 1.7935892914580572e-05, "loss": 0.3422, "step": 4038 }, { "epoch": 0.23, "grad_norm": 0.32600638350104655, "learning_rate": 1.7934760497818628e-05, "loss": 0.1793, "step": 4039 }, { "epoch": 0.23, "grad_norm": 0.5263969279202462, "learning_rate": 1.7933627806276267e-05, "loss": 0.2869, "step": 4040 }, { "epoch": 0.23, "grad_norm": 0.5985454643455146, "learning_rate": 1.7932494839992723e-05, "loss": 0.4168, "step": 4041 }, { "epoch": 0.23, "grad_norm": 0.40800954663605743, "learning_rate": 1.793136159900722e-05, "loss": 0.286, "step": 4042 }, { "epoch": 0.23, "grad_norm": 0.4796678193727095, "learning_rate": 1.7930228083359002e-05, "loss": 0.3264, "step": 4043 }, { "epoch": 0.23, "grad_norm": 0.4774420141970719, "learning_rate": 1.792909429308733e-05, "loss": 0.3023, "step": 4044 }, { "epoch": 0.23, "grad_norm": 0.33552706676028116, "learning_rate": 1.792796022823146e-05, "loss": 0.1879, "step": 4045 }, { "epoch": 0.23, "grad_norm": 0.34466688409543883, "learning_rate": 1.7926825888830673e-05, "loss": 0.2726, "step": 4046 }, { "epoch": 0.23, "grad_norm": 0.6822063282086891, "learning_rate": 1.7925691274924247e-05, "loss": 0.5477, "step": 4047 }, { "epoch": 0.23, "grad_norm": 0.4588359590822482, "learning_rate": 1.7924556386551472e-05, "loss": 0.4077, "step": 4048 }, { "epoch": 0.23, "grad_norm": 0.39472767234202216, "learning_rate": 1.792342122375165e-05, "loss": 0.289, "step": 4049 }, { "epoch": 0.23, "grad_norm": 0.4249909103766952, "learning_rate": 1.792228578656409e-05, "loss": 0.3337, "step": 4050 }, { "epoch": 0.23, "grad_norm": 0.40114167914557236, "learning_rate": 1.7921150075028112e-05, "loss": 0.2137, "step": 4051 }, { "epoch": 0.23, "grad_norm": 0.3296340901766964, "learning_rate": 1.792001408918305e-05, "loss": 0.2957, "step": 4052 }, { "epoch": 0.23, "grad_norm": 0.8150910975190974, "learning_rate": 1.791887782906824e-05, "loss": 0.4098, "step": 4053 }, { "epoch": 0.23, "grad_norm": 0.3098451298308945, "learning_rate": 1.7917741294723035e-05, "loss": 0.2909, "step": 4054 }, { "epoch": 0.23, "grad_norm": 0.6086232025078487, "learning_rate": 1.7916604486186786e-05, "loss": 0.4586, "step": 4055 }, { "epoch": 0.23, "grad_norm": 0.7056291363756321, "learning_rate": 1.7915467403498864e-05, "loss": 0.3649, "step": 4056 }, { "epoch": 0.23, "grad_norm": 0.232611976797289, "learning_rate": 1.791433004669865e-05, "loss": 0.1749, "step": 4057 }, { "epoch": 0.23, "grad_norm": 0.3738788987291126, "learning_rate": 1.791319241582552e-05, "loss": 0.3219, "step": 4058 }, { "epoch": 0.23, "grad_norm": 1.0108750504208646, "learning_rate": 1.791205451091888e-05, "loss": 0.4338, "step": 4059 }, { "epoch": 0.23, "grad_norm": 0.8253458918236195, "learning_rate": 1.7910916332018137e-05, "loss": 0.4663, "step": 4060 }, { "epoch": 0.23, "grad_norm": 0.4535609620382996, "learning_rate": 1.7909777879162695e-05, "loss": 0.2864, "step": 4061 }, { "epoch": 0.23, "grad_norm": 0.5264572392672097, "learning_rate": 1.7908639152391988e-05, "loss": 0.3096, "step": 4062 }, { "epoch": 0.23, "grad_norm": 0.6313130758765294, "learning_rate": 1.790750015174545e-05, "loss": 0.3589, "step": 4063 }, { "epoch": 0.23, "grad_norm": 0.4027259179702243, "learning_rate": 1.7906360877262515e-05, "loss": 0.3104, "step": 4064 }, { "epoch": 0.23, "grad_norm": 0.8796907969909685, "learning_rate": 1.7905221328982647e-05, "loss": 0.5063, "step": 4065 }, { "epoch": 0.23, "grad_norm": 0.3771326162949683, "learning_rate": 1.7904081506945304e-05, "loss": 0.25, "step": 4066 }, { "epoch": 0.23, "grad_norm": 0.37190974661172604, "learning_rate": 1.790294141118996e-05, "loss": 0.2481, "step": 4067 }, { "epoch": 0.23, "grad_norm": 1.5489614559790572, "learning_rate": 1.790180104175609e-05, "loss": 0.7663, "step": 4068 }, { "epoch": 0.23, "grad_norm": 0.5644846432613142, "learning_rate": 1.7900660398683192e-05, "loss": 0.2647, "step": 4069 }, { "epoch": 0.23, "grad_norm": 0.5649799604812463, "learning_rate": 1.789951948201077e-05, "loss": 0.3422, "step": 4070 }, { "epoch": 0.23, "grad_norm": 0.5752127497152826, "learning_rate": 1.789837829177832e-05, "loss": 0.3297, "step": 4071 }, { "epoch": 0.23, "grad_norm": 0.2515960625460765, "learning_rate": 1.7897236828025373e-05, "loss": 0.13, "step": 4072 }, { "epoch": 0.23, "grad_norm": 0.454993611244226, "learning_rate": 1.7896095090791452e-05, "loss": 0.3405, "step": 4073 }, { "epoch": 0.23, "grad_norm": 0.46244441347114756, "learning_rate": 1.7894953080116102e-05, "loss": 0.3233, "step": 4074 }, { "epoch": 0.23, "grad_norm": 0.46040709971171495, "learning_rate": 1.7893810796038862e-05, "loss": 0.2484, "step": 4075 }, { "epoch": 0.23, "grad_norm": 0.7576076875799639, "learning_rate": 1.7892668238599293e-05, "loss": 0.4109, "step": 4076 }, { "epoch": 0.23, "grad_norm": 0.6283333218980784, "learning_rate": 1.7891525407836967e-05, "loss": 0.4061, "step": 4077 }, { "epoch": 0.23, "grad_norm": 0.49411712691782517, "learning_rate": 1.789038230379145e-05, "loss": 0.3519, "step": 4078 }, { "epoch": 0.23, "grad_norm": 0.25958003257867746, "learning_rate": 1.7889238926502336e-05, "loss": 0.1599, "step": 4079 }, { "epoch": 0.23, "grad_norm": 0.6904502163139321, "learning_rate": 1.788809527600922e-05, "loss": 0.5345, "step": 4080 }, { "epoch": 0.23, "grad_norm": 0.42273087648124946, "learning_rate": 1.78869513523517e-05, "loss": 0.3553, "step": 4081 }, { "epoch": 0.23, "grad_norm": 0.41321642610780296, "learning_rate": 1.7885807155569395e-05, "loss": 0.2611, "step": 4082 }, { "epoch": 0.23, "grad_norm": 1.29593229800086, "learning_rate": 1.7884662685701927e-05, "loss": 0.797, "step": 4083 }, { "epoch": 0.23, "grad_norm": 0.4185943883142081, "learning_rate": 1.788351794278893e-05, "loss": 0.2627, "step": 4084 }, { "epoch": 0.23, "grad_norm": 0.30586527586360296, "learning_rate": 1.7882372926870045e-05, "loss": 0.2179, "step": 4085 }, { "epoch": 0.23, "grad_norm": 0.5378611831339616, "learning_rate": 1.7881227637984922e-05, "loss": 0.391, "step": 4086 }, { "epoch": 0.23, "grad_norm": 0.6274700701055269, "learning_rate": 1.788008207617323e-05, "loss": 0.4243, "step": 4087 }, { "epoch": 0.23, "grad_norm": 0.3917913457312723, "learning_rate": 1.787893624147463e-05, "loss": 0.3136, "step": 4088 }, { "epoch": 0.23, "grad_norm": 0.4596357049274881, "learning_rate": 1.7877790133928807e-05, "loss": 0.2903, "step": 4089 }, { "epoch": 0.23, "grad_norm": 0.4524210604969984, "learning_rate": 1.7876643753575457e-05, "loss": 0.3614, "step": 4090 }, { "epoch": 0.24, "grad_norm": 0.382573208203039, "learning_rate": 1.7875497100454266e-05, "loss": 0.3108, "step": 4091 }, { "epoch": 0.24, "grad_norm": 0.3418759654466254, "learning_rate": 1.787435017460495e-05, "loss": 0.1814, "step": 4092 }, { "epoch": 0.24, "grad_norm": 0.48679353512366635, "learning_rate": 1.7873202976067225e-05, "loss": 0.3678, "step": 4093 }, { "epoch": 0.24, "grad_norm": 0.3769792537925658, "learning_rate": 1.787205550488082e-05, "loss": 0.3096, "step": 4094 }, { "epoch": 0.24, "grad_norm": 0.744428708424359, "learning_rate": 1.7870907761085474e-05, "loss": 0.4643, "step": 4095 }, { "epoch": 0.24, "grad_norm": 0.6447660292616412, "learning_rate": 1.786975974472093e-05, "loss": 0.4391, "step": 4096 }, { "epoch": 0.24, "grad_norm": 0.3339067669230319, "learning_rate": 1.7868611455826942e-05, "loss": 0.2451, "step": 4097 }, { "epoch": 0.24, "grad_norm": 0.3580989985318797, "learning_rate": 1.7867462894443283e-05, "loss": 0.2714, "step": 4098 }, { "epoch": 0.24, "grad_norm": 0.9081337673125298, "learning_rate": 1.7866314060609714e-05, "loss": 0.6394, "step": 4099 }, { "epoch": 0.24, "grad_norm": 0.4002506801084894, "learning_rate": 1.7865164954366033e-05, "loss": 0.2894, "step": 4100 }, { "epoch": 0.24, "grad_norm": 0.4251875388623584, "learning_rate": 1.7864015575752026e-05, "loss": 0.353, "step": 4101 }, { "epoch": 0.24, "grad_norm": 1.063061765139635, "learning_rate": 1.78628659248075e-05, "loss": 0.4411, "step": 4102 }, { "epoch": 0.24, "grad_norm": 0.29534226202391967, "learning_rate": 1.7861716001572262e-05, "loss": 0.2349, "step": 4103 }, { "epoch": 0.24, "grad_norm": 0.6987250149273035, "learning_rate": 1.7860565806086142e-05, "loss": 0.4517, "step": 4104 }, { "epoch": 0.24, "grad_norm": 0.31222378317392696, "learning_rate": 1.7859415338388963e-05, "loss": 0.228, "step": 4105 }, { "epoch": 0.24, "grad_norm": 0.4311047635232939, "learning_rate": 1.7858264598520568e-05, "loss": 0.3151, "step": 4106 }, { "epoch": 0.24, "grad_norm": 1.4992911334787213, "learning_rate": 1.7857113586520806e-05, "loss": 0.7821, "step": 4107 }, { "epoch": 0.24, "grad_norm": 0.5395303710203192, "learning_rate": 1.7855962302429542e-05, "loss": 0.2653, "step": 4108 }, { "epoch": 0.24, "grad_norm": 0.40037400934122036, "learning_rate": 1.785481074628664e-05, "loss": 0.3089, "step": 4109 }, { "epoch": 0.24, "grad_norm": 0.4222237834089705, "learning_rate": 1.785365891813198e-05, "loss": 0.3479, "step": 4110 }, { "epoch": 0.24, "grad_norm": 0.26589948850227196, "learning_rate": 1.785250681800545e-05, "loss": 0.1399, "step": 4111 }, { "epoch": 0.24, "grad_norm": 0.41721052652204943, "learning_rate": 1.7851354445946944e-05, "loss": 0.3075, "step": 4112 }, { "epoch": 0.24, "grad_norm": 0.4215018631260107, "learning_rate": 1.785020180199637e-05, "loss": 0.3456, "step": 4113 }, { "epoch": 0.24, "grad_norm": 1.3790817111716487, "learning_rate": 1.7849048886193648e-05, "loss": 0.8667, "step": 4114 }, { "epoch": 0.24, "grad_norm": 0.354931172130831, "learning_rate": 1.7847895698578702e-05, "loss": 0.2496, "step": 4115 }, { "epoch": 0.24, "grad_norm": 0.423749864426112, "learning_rate": 1.7846742239191464e-05, "loss": 0.3579, "step": 4116 }, { "epoch": 0.24, "grad_norm": 0.3605983029128115, "learning_rate": 1.784558850807188e-05, "loss": 0.2686, "step": 4117 }, { "epoch": 0.24, "grad_norm": 0.3667287214717069, "learning_rate": 1.7844434505259904e-05, "loss": 0.2296, "step": 4118 }, { "epoch": 0.24, "grad_norm": 0.9766307941377927, "learning_rate": 1.7843280230795496e-05, "loss": 0.6913, "step": 4119 }, { "epoch": 0.24, "grad_norm": 0.7464374748135114, "learning_rate": 1.784212568471863e-05, "loss": 0.5132, "step": 4120 }, { "epoch": 0.24, "grad_norm": 0.34975514825247495, "learning_rate": 1.7840970867069293e-05, "loss": 0.2485, "step": 4121 }, { "epoch": 0.24, "grad_norm": 0.7385557641533902, "learning_rate": 1.7839815777887472e-05, "loss": 0.511, "step": 4122 }, { "epoch": 0.24, "grad_norm": 0.31425441408801236, "learning_rate": 1.7838660417213166e-05, "loss": 0.179, "step": 4123 }, { "epoch": 0.24, "grad_norm": 0.3292178319221814, "learning_rate": 1.7837504785086386e-05, "loss": 0.2168, "step": 4124 }, { "epoch": 0.24, "grad_norm": 0.36915751123007257, "learning_rate": 1.7836348881547153e-05, "loss": 0.3212, "step": 4125 }, { "epoch": 0.24, "grad_norm": 0.9386073864728668, "learning_rate": 1.7835192706635494e-05, "loss": 0.5632, "step": 4126 }, { "epoch": 0.24, "grad_norm": 0.38900805335201366, "learning_rate": 1.783403626039145e-05, "loss": 0.2911, "step": 4127 }, { "epoch": 0.24, "grad_norm": 0.7257634073735822, "learning_rate": 1.7832879542855067e-05, "loss": 0.4026, "step": 4128 }, { "epoch": 0.24, "grad_norm": 0.260931509801551, "learning_rate": 1.7831722554066403e-05, "loss": 0.2349, "step": 4129 }, { "epoch": 0.24, "grad_norm": 0.41780863397346774, "learning_rate": 1.7830565294065522e-05, "loss": 0.2964, "step": 4130 }, { "epoch": 0.24, "grad_norm": 0.5120862661410127, "learning_rate": 1.7829407762892504e-05, "loss": 0.2983, "step": 4131 }, { "epoch": 0.24, "grad_norm": 0.9249334484384528, "learning_rate": 1.7828249960587428e-05, "loss": 0.4811, "step": 4132 }, { "epoch": 0.24, "grad_norm": 0.36244641485753243, "learning_rate": 1.7827091887190396e-05, "loss": 0.3057, "step": 4133 }, { "epoch": 0.24, "grad_norm": 0.36789794513151913, "learning_rate": 1.7825933542741506e-05, "loss": 0.2761, "step": 4134 }, { "epoch": 0.24, "grad_norm": 0.2799037207296845, "learning_rate": 1.7824774927280877e-05, "loss": 0.1626, "step": 4135 }, { "epoch": 0.24, "grad_norm": 0.3573932183986537, "learning_rate": 1.7823616040848625e-05, "loss": 0.3016, "step": 4136 }, { "epoch": 0.24, "grad_norm": 0.4498035506977487, "learning_rate": 1.782245688348489e-05, "loss": 0.3051, "step": 4137 }, { "epoch": 0.24, "grad_norm": 1.1538000821845913, "learning_rate": 1.7821297455229807e-05, "loss": 0.4959, "step": 4138 }, { "epoch": 0.24, "grad_norm": 0.3316262304163246, "learning_rate": 1.7820137756123527e-05, "loss": 0.2842, "step": 4139 }, { "epoch": 0.24, "grad_norm": 1.53751881341363, "learning_rate": 1.7818977786206217e-05, "loss": 0.8152, "step": 4140 }, { "epoch": 0.24, "grad_norm": 0.32222675266258977, "learning_rate": 1.7817817545518045e-05, "loss": 0.2308, "step": 4141 }, { "epoch": 0.24, "grad_norm": 0.3286628540441941, "learning_rate": 1.7816657034099182e-05, "loss": 0.2449, "step": 4142 }, { "epoch": 0.24, "grad_norm": 1.4467305285964456, "learning_rate": 1.781549625198982e-05, "loss": 0.8061, "step": 4143 }, { "epoch": 0.24, "grad_norm": 0.870823301196831, "learning_rate": 1.7814335199230164e-05, "loss": 0.3788, "step": 4144 }, { "epoch": 0.24, "grad_norm": 0.336748819198341, "learning_rate": 1.7813173875860416e-05, "loss": 0.2872, "step": 4145 }, { "epoch": 0.24, "grad_norm": 0.6111350426253821, "learning_rate": 1.781201228192079e-05, "loss": 0.4169, "step": 4146 }, { "epoch": 0.24, "grad_norm": 0.21527619658183722, "learning_rate": 1.7810850417451517e-05, "loss": 0.1193, "step": 4147 }, { "epoch": 0.24, "grad_norm": 0.4550362260441464, "learning_rate": 1.780968828249283e-05, "loss": 0.2929, "step": 4148 }, { "epoch": 0.24, "grad_norm": 0.49114266037537563, "learning_rate": 1.780852587708497e-05, "loss": 0.3229, "step": 4149 }, { "epoch": 0.24, "grad_norm": 1.1197232619356512, "learning_rate": 1.78073632012682e-05, "loss": 0.4345, "step": 4150 }, { "epoch": 0.24, "grad_norm": 0.4730494255334301, "learning_rate": 1.780620025508277e-05, "loss": 0.3158, "step": 4151 }, { "epoch": 0.24, "grad_norm": 0.5063609235250967, "learning_rate": 1.7805037038568972e-05, "loss": 0.4172, "step": 4152 }, { "epoch": 0.24, "grad_norm": 0.41048674198046375, "learning_rate": 1.780387355176707e-05, "loss": 0.2985, "step": 4153 }, { "epoch": 0.24, "grad_norm": 0.21889835566971555, "learning_rate": 1.7802709794717363e-05, "loss": 0.1212, "step": 4154 }, { "epoch": 0.24, "grad_norm": 1.0661163910225424, "learning_rate": 1.780154576746015e-05, "loss": 0.4338, "step": 4155 }, { "epoch": 0.24, "grad_norm": 1.3205369104367537, "learning_rate": 1.7800381470035745e-05, "loss": 0.7398, "step": 4156 }, { "epoch": 0.24, "grad_norm": 0.36004560177244327, "learning_rate": 1.7799216902484465e-05, "loss": 0.2448, "step": 4157 }, { "epoch": 0.24, "grad_norm": 1.496192252384227, "learning_rate": 1.7798052064846637e-05, "loss": 0.805, "step": 4158 }, { "epoch": 0.24, "grad_norm": 0.8488803818598749, "learning_rate": 1.7796886957162603e-05, "loss": 0.5812, "step": 4159 }, { "epoch": 0.24, "grad_norm": 0.2631792065123535, "learning_rate": 1.7795721579472712e-05, "loss": 0.2027, "step": 4160 }, { "epoch": 0.24, "grad_norm": 0.39486478303427447, "learning_rate": 1.7794555931817314e-05, "loss": 0.3416, "step": 4161 }, { "epoch": 0.24, "grad_norm": 0.7292174481348954, "learning_rate": 1.779339001423678e-05, "loss": 0.5763, "step": 4162 }, { "epoch": 0.24, "grad_norm": 0.277092082065919, "learning_rate": 1.7792223826771484e-05, "loss": 0.1583, "step": 4163 }, { "epoch": 0.24, "grad_norm": 0.648421763994709, "learning_rate": 1.779105736946181e-05, "loss": 0.4098, "step": 4164 }, { "epoch": 0.24, "grad_norm": 0.4148689107380746, "learning_rate": 1.778989064234816e-05, "loss": 0.3432, "step": 4165 }, { "epoch": 0.24, "grad_norm": 0.5232083805274033, "learning_rate": 1.7788723645470928e-05, "loss": 0.3272, "step": 4166 }, { "epoch": 0.24, "grad_norm": 0.2818994881344073, "learning_rate": 1.7787556378870534e-05, "loss": 0.2582, "step": 4167 }, { "epoch": 0.24, "grad_norm": 0.457924102148931, "learning_rate": 1.7786388842587397e-05, "loss": 0.377, "step": 4168 }, { "epoch": 0.24, "grad_norm": 0.5522352742313344, "learning_rate": 1.7785221036661945e-05, "loss": 0.3301, "step": 4169 }, { "epoch": 0.24, "grad_norm": 0.32880274033610146, "learning_rate": 1.778405296113463e-05, "loss": 0.2008, "step": 4170 }, { "epoch": 0.24, "grad_norm": 0.7913590840489569, "learning_rate": 1.7782884616045892e-05, "loss": 0.5754, "step": 4171 }, { "epoch": 0.24, "grad_norm": 0.5348610835612185, "learning_rate": 1.7781716001436192e-05, "loss": 0.3692, "step": 4172 }, { "epoch": 0.24, "grad_norm": 0.3764624857271168, "learning_rate": 1.7780547117346005e-05, "loss": 0.2682, "step": 4173 }, { "epoch": 0.24, "grad_norm": 1.2520872298678765, "learning_rate": 1.7779377963815804e-05, "loss": 0.7315, "step": 4174 }, { "epoch": 0.24, "grad_norm": 0.3392114778810658, "learning_rate": 1.7778208540886082e-05, "loss": 0.1731, "step": 4175 }, { "epoch": 0.24, "grad_norm": 0.5068710149305249, "learning_rate": 1.777703884859733e-05, "loss": 0.3636, "step": 4176 }, { "epoch": 0.24, "grad_norm": 0.393948244622996, "learning_rate": 1.7775868886990056e-05, "loss": 0.3052, "step": 4177 }, { "epoch": 0.24, "grad_norm": 0.4345252768576852, "learning_rate": 1.7774698656104778e-05, "loss": 0.343, "step": 4178 }, { "epoch": 0.24, "grad_norm": 0.5250465194272104, "learning_rate": 1.777352815598202e-05, "loss": 0.3784, "step": 4179 }, { "epoch": 0.24, "grad_norm": 0.34170118548309036, "learning_rate": 1.7772357386662316e-05, "loss": 0.2921, "step": 4180 }, { "epoch": 0.24, "grad_norm": 0.3091716890840008, "learning_rate": 1.777118634818621e-05, "loss": 0.2455, "step": 4181 }, { "epoch": 0.24, "grad_norm": 0.32875149163461864, "learning_rate": 1.7770015040594256e-05, "loss": 0.2709, "step": 4182 }, { "epoch": 0.24, "grad_norm": 0.5657387770353896, "learning_rate": 1.7768843463927012e-05, "loss": 0.4015, "step": 4183 }, { "epoch": 0.24, "grad_norm": 0.4578594861031997, "learning_rate": 1.776767161822506e-05, "loss": 0.3508, "step": 4184 }, { "epoch": 0.24, "grad_norm": 0.3722925933401782, "learning_rate": 1.7766499503528965e-05, "loss": 0.293, "step": 4185 }, { "epoch": 0.24, "grad_norm": 0.8542895200701599, "learning_rate": 1.776532711987933e-05, "loss": 0.561, "step": 4186 }, { "epoch": 0.24, "grad_norm": 0.3903147749744142, "learning_rate": 1.7764154467316753e-05, "loss": 0.206, "step": 4187 }, { "epoch": 0.24, "grad_norm": 0.3327376950139663, "learning_rate": 1.776298154588184e-05, "loss": 0.2483, "step": 4188 }, { "epoch": 0.24, "grad_norm": 0.6250386359397111, "learning_rate": 1.7761808355615207e-05, "loss": 0.4006, "step": 4189 }, { "epoch": 0.24, "grad_norm": 0.49450962581882085, "learning_rate": 1.7760634896557483e-05, "loss": 0.2286, "step": 4190 }, { "epoch": 0.24, "grad_norm": 0.42356405355596793, "learning_rate": 1.775946116874931e-05, "loss": 0.3589, "step": 4191 }, { "epoch": 0.24, "grad_norm": 0.5552779771392584, "learning_rate": 1.7758287172231333e-05, "loss": 0.4024, "step": 4192 }, { "epoch": 0.24, "grad_norm": 0.37558968188355335, "learning_rate": 1.77571129070442e-05, "loss": 0.2241, "step": 4193 }, { "epoch": 0.24, "grad_norm": 0.25518944264314564, "learning_rate": 1.775593837322858e-05, "loss": 0.2089, "step": 4194 }, { "epoch": 0.24, "grad_norm": 1.00903247293503, "learning_rate": 1.775476357082515e-05, "loss": 0.4944, "step": 4195 }, { "epoch": 0.24, "grad_norm": 0.2975944794423369, "learning_rate": 1.7753588499874592e-05, "loss": 0.2273, "step": 4196 }, { "epoch": 0.24, "grad_norm": 0.44397822897049244, "learning_rate": 1.7752413160417597e-05, "loss": 0.357, "step": 4197 }, { "epoch": 0.24, "grad_norm": 1.1675312656602383, "learning_rate": 1.7751237552494867e-05, "loss": 0.862, "step": 4198 }, { "epoch": 0.24, "grad_norm": 0.2532797489976462, "learning_rate": 1.7750061676147114e-05, "loss": 0.108, "step": 4199 }, { "epoch": 0.24, "grad_norm": 0.4934846294571999, "learning_rate": 1.774888553141506e-05, "loss": 0.3664, "step": 4200 }, { "epoch": 0.24, "grad_norm": 0.3683129833102259, "learning_rate": 1.7747709118339428e-05, "loss": 0.2941, "step": 4201 }, { "epoch": 0.24, "grad_norm": 1.1727462583908863, "learning_rate": 1.7746532436960965e-05, "loss": 0.653, "step": 4202 }, { "epoch": 0.24, "grad_norm": 0.3065183783693479, "learning_rate": 1.7745355487320418e-05, "loss": 0.2344, "step": 4203 }, { "epoch": 0.24, "grad_norm": 0.41090849776968874, "learning_rate": 1.7744178269458547e-05, "loss": 0.3612, "step": 4204 }, { "epoch": 0.24, "grad_norm": 0.6804604082844093, "learning_rate": 1.774300078341611e-05, "loss": 0.5032, "step": 4205 }, { "epoch": 0.24, "grad_norm": 0.3564856518489956, "learning_rate": 1.7741823029233892e-05, "loss": 0.2573, "step": 4206 }, { "epoch": 0.24, "grad_norm": 0.3123261819071861, "learning_rate": 1.7740645006952674e-05, "loss": 0.2185, "step": 4207 }, { "epoch": 0.24, "grad_norm": 0.36540726586784045, "learning_rate": 1.773946671661325e-05, "loss": 0.2873, "step": 4208 }, { "epoch": 0.24, "grad_norm": 0.3851981339848704, "learning_rate": 1.773828815825643e-05, "loss": 0.2368, "step": 4209 }, { "epoch": 0.24, "grad_norm": 1.403401900227148, "learning_rate": 1.773710933192302e-05, "loss": 0.8898, "step": 4210 }, { "epoch": 0.24, "grad_norm": 0.8506443084440741, "learning_rate": 1.7735930237653853e-05, "loss": 0.4175, "step": 4211 }, { "epoch": 0.24, "grad_norm": 0.3401308629226619, "learning_rate": 1.773475087548975e-05, "loss": 0.2571, "step": 4212 }, { "epoch": 0.24, "grad_norm": 0.3457458099433843, "learning_rate": 1.7733571245471557e-05, "loss": 0.2495, "step": 4213 }, { "epoch": 0.24, "grad_norm": 0.3963265595086062, "learning_rate": 1.7732391347640125e-05, "loss": 0.2667, "step": 4214 }, { "epoch": 0.24, "grad_norm": 0.41455830175953723, "learning_rate": 1.7731211182036312e-05, "loss": 0.3137, "step": 4215 }, { "epoch": 0.24, "grad_norm": 0.40500875371408623, "learning_rate": 1.773003074870099e-05, "loss": 0.299, "step": 4216 }, { "epoch": 0.24, "grad_norm": 0.5403576844166071, "learning_rate": 1.7728850047675035e-05, "loss": 0.35, "step": 4217 }, { "epoch": 0.24, "grad_norm": 0.4087263684779261, "learning_rate": 1.7727669078999336e-05, "loss": 0.3484, "step": 4218 }, { "epoch": 0.24, "grad_norm": 0.2268995265041368, "learning_rate": 1.772648784271479e-05, "loss": 0.1586, "step": 4219 }, { "epoch": 0.24, "grad_norm": 0.38627405176100876, "learning_rate": 1.7725306338862298e-05, "loss": 0.3399, "step": 4220 }, { "epoch": 0.24, "grad_norm": 0.34508423192036813, "learning_rate": 1.7724124567482782e-05, "loss": 0.2773, "step": 4221 }, { "epoch": 0.24, "grad_norm": 0.5635387172753441, "learning_rate": 1.7722942528617163e-05, "loss": 0.443, "step": 4222 }, { "epoch": 0.24, "grad_norm": 0.6278867989446979, "learning_rate": 1.772176022230638e-05, "loss": 0.4841, "step": 4223 }, { "epoch": 0.24, "grad_norm": 0.3090845892934344, "learning_rate": 1.7720577648591368e-05, "loss": 0.2715, "step": 4224 }, { "epoch": 0.24, "grad_norm": 0.43203694883147464, "learning_rate": 1.771939480751309e-05, "loss": 0.2774, "step": 4225 }, { "epoch": 0.24, "grad_norm": 0.28886362206526994, "learning_rate": 1.7718211699112496e-05, "loss": 0.1862, "step": 4226 }, { "epoch": 0.24, "grad_norm": 0.35675105970492776, "learning_rate": 1.7717028323430562e-05, "loss": 0.2986, "step": 4227 }, { "epoch": 0.24, "grad_norm": 0.4192480731244511, "learning_rate": 1.7715844680508273e-05, "loss": 0.3561, "step": 4228 }, { "epoch": 0.24, "grad_norm": 0.954576314104161, "learning_rate": 1.7714660770386615e-05, "loss": 0.3737, "step": 4229 }, { "epoch": 0.24, "grad_norm": 0.37465111339648155, "learning_rate": 1.771347659310658e-05, "loss": 0.3111, "step": 4230 }, { "epoch": 0.24, "grad_norm": 1.154325732369229, "learning_rate": 1.7712292148709188e-05, "loss": 0.667, "step": 4231 }, { "epoch": 0.24, "grad_norm": 0.23526929972174873, "learning_rate": 1.7711107437235453e-05, "loss": 0.1821, "step": 4232 }, { "epoch": 0.24, "grad_norm": 0.4190335124027483, "learning_rate": 1.7709922458726395e-05, "loss": 0.3299, "step": 4233 }, { "epoch": 0.24, "grad_norm": 0.8046301903471953, "learning_rate": 1.770873721322305e-05, "loss": 0.5668, "step": 4234 }, { "epoch": 0.24, "grad_norm": 0.3958648304902932, "learning_rate": 1.7707551700766474e-05, "loss": 0.2981, "step": 4235 }, { "epoch": 0.24, "grad_norm": 0.4148428047558691, "learning_rate": 1.770636592139771e-05, "loss": 0.3152, "step": 4236 }, { "epoch": 0.24, "grad_norm": 0.4855183491267279, "learning_rate": 1.7705179875157826e-05, "loss": 0.3586, "step": 4237 }, { "epoch": 0.24, "grad_norm": 0.24397917134812547, "learning_rate": 1.7703993562087895e-05, "loss": 0.1169, "step": 4238 }, { "epoch": 0.24, "grad_norm": 0.3776877212817177, "learning_rate": 1.7702806982229e-05, "loss": 0.2973, "step": 4239 }, { "epoch": 0.24, "grad_norm": 0.5154970926323544, "learning_rate": 1.7701620135622228e-05, "loss": 0.3764, "step": 4240 }, { "epoch": 0.24, "grad_norm": 1.042901070896944, "learning_rate": 1.7700433022308684e-05, "loss": 0.635, "step": 4241 }, { "epoch": 0.24, "grad_norm": 0.3494430310945817, "learning_rate": 1.7699245642329473e-05, "loss": 0.2064, "step": 4242 }, { "epoch": 0.24, "grad_norm": 1.1972989287454718, "learning_rate": 1.7698057995725717e-05, "loss": 0.6586, "step": 4243 }, { "epoch": 0.24, "grad_norm": 0.3358791750280596, "learning_rate": 1.7696870082538544e-05, "loss": 0.2379, "step": 4244 }, { "epoch": 0.24, "grad_norm": 0.4175936922148545, "learning_rate": 1.769568190280909e-05, "loss": 0.2467, "step": 4245 }, { "epoch": 0.24, "grad_norm": 1.2074876580005733, "learning_rate": 1.7694493456578503e-05, "loss": 0.4767, "step": 4246 }, { "epoch": 0.24, "grad_norm": 0.4768857304128634, "learning_rate": 1.769330474388794e-05, "loss": 0.3806, "step": 4247 }, { "epoch": 0.24, "grad_norm": 0.3347435221937712, "learning_rate": 1.7692115764778564e-05, "loss": 0.2254, "step": 4248 }, { "epoch": 0.24, "grad_norm": 1.2119371591283326, "learning_rate": 1.7690926519291548e-05, "loss": 0.6777, "step": 4249 }, { "epoch": 0.24, "grad_norm": 0.5732297049816935, "learning_rate": 1.7689737007468082e-05, "loss": 0.3127, "step": 4250 }, { "epoch": 0.24, "grad_norm": 0.4110880892865378, "learning_rate": 1.768854722934935e-05, "loss": 0.2711, "step": 4251 }, { "epoch": 0.24, "grad_norm": 0.5346347752222697, "learning_rate": 1.7687357184976558e-05, "loss": 0.304, "step": 4252 }, { "epoch": 0.24, "grad_norm": 0.4209249612683654, "learning_rate": 1.7686166874390916e-05, "loss": 0.3113, "step": 4253 }, { "epoch": 0.24, "grad_norm": 0.4029524455386135, "learning_rate": 1.768497629763365e-05, "loss": 0.284, "step": 4254 }, { "epoch": 0.24, "grad_norm": 0.39416868752265927, "learning_rate": 1.7683785454745983e-05, "loss": 0.267, "step": 4255 }, { "epoch": 0.24, "grad_norm": 0.4629564240237894, "learning_rate": 1.768259434576916e-05, "loss": 0.3873, "step": 4256 }, { "epoch": 0.24, "grad_norm": 0.39823641313634994, "learning_rate": 1.768140297074442e-05, "loss": 0.3366, "step": 4257 }, { "epoch": 0.24, "grad_norm": 0.44095663559634946, "learning_rate": 1.7680211329713027e-05, "loss": 0.3567, "step": 4258 }, { "epoch": 0.24, "grad_norm": 0.3111575942167663, "learning_rate": 1.7679019422716244e-05, "loss": 0.2723, "step": 4259 }, { "epoch": 0.24, "grad_norm": 0.32667092720747753, "learning_rate": 1.767782724979535e-05, "loss": 0.2803, "step": 4260 }, { "epoch": 0.24, "grad_norm": 0.3594877209168396, "learning_rate": 1.767663481099163e-05, "loss": 0.15, "step": 4261 }, { "epoch": 0.24, "grad_norm": 0.5838500192099852, "learning_rate": 1.7675442106346377e-05, "loss": 0.4736, "step": 4262 }, { "epoch": 0.24, "grad_norm": 0.34204796905833723, "learning_rate": 1.7674249135900892e-05, "loss": 0.3, "step": 4263 }, { "epoch": 0.24, "grad_norm": 0.4358076489958484, "learning_rate": 1.767305589969649e-05, "loss": 0.3492, "step": 4264 }, { "epoch": 0.25, "grad_norm": 0.21427413315468624, "learning_rate": 1.7671862397774494e-05, "loss": 0.1329, "step": 4265 }, { "epoch": 0.25, "grad_norm": 0.35988453024304295, "learning_rate": 1.767066863017623e-05, "loss": 0.2818, "step": 4266 }, { "epoch": 0.25, "grad_norm": 0.9682883111015376, "learning_rate": 1.766947459694304e-05, "loss": 0.5893, "step": 4267 }, { "epoch": 0.25, "grad_norm": 0.3709235509398682, "learning_rate": 1.766828029811628e-05, "loss": 0.3169, "step": 4268 }, { "epoch": 0.25, "grad_norm": 0.5406561017554826, "learning_rate": 1.7667085733737298e-05, "loss": 0.3701, "step": 4269 }, { "epoch": 0.25, "grad_norm": 0.4233765597253248, "learning_rate": 1.7665890903847468e-05, "loss": 0.3483, "step": 4270 }, { "epoch": 0.25, "grad_norm": 0.2603354062606371, "learning_rate": 1.7664695808488164e-05, "loss": 0.2296, "step": 4271 }, { "epoch": 0.25, "grad_norm": 0.4590253911487976, "learning_rate": 1.766350044770078e-05, "loss": 0.2895, "step": 4272 }, { "epoch": 0.25, "grad_norm": 0.3741916057341281, "learning_rate": 1.76623048215267e-05, "loss": 0.3108, "step": 4273 }, { "epoch": 0.25, "grad_norm": 0.7421170186002759, "learning_rate": 1.7661108930007334e-05, "loss": 0.3889, "step": 4274 }, { "epoch": 0.25, "grad_norm": 0.45537247750720905, "learning_rate": 1.7659912773184095e-05, "loss": 0.2839, "step": 4275 }, { "epoch": 0.25, "grad_norm": 0.3989938266714445, "learning_rate": 1.7658716351098407e-05, "loss": 0.3258, "step": 4276 }, { "epoch": 0.25, "grad_norm": 1.318451272676308, "learning_rate": 1.76575196637917e-05, "loss": 0.7469, "step": 4277 }, { "epoch": 0.25, "grad_norm": 0.22641040750112476, "learning_rate": 1.7656322711305417e-05, "loss": 0.1216, "step": 4278 }, { "epoch": 0.25, "grad_norm": 0.3801159653441589, "learning_rate": 1.7655125493681012e-05, "loss": 0.3474, "step": 4279 }, { "epoch": 0.25, "grad_norm": 0.5049163226978106, "learning_rate": 1.7653928010959936e-05, "loss": 0.3951, "step": 4280 }, { "epoch": 0.25, "grad_norm": 0.3293722513867542, "learning_rate": 1.765273026318366e-05, "loss": 0.2225, "step": 4281 }, { "epoch": 0.25, "grad_norm": 1.349210674604087, "learning_rate": 1.7651532250393666e-05, "loss": 0.7544, "step": 4282 }, { "epoch": 0.25, "grad_norm": 0.3886610404525146, "learning_rate": 1.7650333972631443e-05, "loss": 0.3501, "step": 4283 }, { "epoch": 0.25, "grad_norm": 0.24857630478813894, "learning_rate": 1.7649135429938477e-05, "loss": 0.1807, "step": 4284 }, { "epoch": 0.25, "grad_norm": 0.4471098422892603, "learning_rate": 1.764793662235628e-05, "loss": 0.3165, "step": 4285 }, { "epoch": 0.25, "grad_norm": 0.6148974597430013, "learning_rate": 1.7646737549926376e-05, "loss": 0.4766, "step": 4286 }, { "epoch": 0.25, "grad_norm": 0.37646323777327495, "learning_rate": 1.764553821269027e-05, "loss": 0.216, "step": 4287 }, { "epoch": 0.25, "grad_norm": 0.39924535706099146, "learning_rate": 1.764433861068951e-05, "loss": 0.3246, "step": 4288 }, { "epoch": 0.25, "grad_norm": 1.0115253786277327, "learning_rate": 1.764313874396563e-05, "loss": 0.6713, "step": 4289 }, { "epoch": 0.25, "grad_norm": 0.37546513680467763, "learning_rate": 1.7641938612560182e-05, "loss": 0.2153, "step": 4290 }, { "epoch": 0.25, "grad_norm": 0.31499422778096564, "learning_rate": 1.7640738216514733e-05, "loss": 0.293, "step": 4291 }, { "epoch": 0.25, "grad_norm": 0.341004193409986, "learning_rate": 1.7639537555870844e-05, "loss": 0.2931, "step": 4292 }, { "epoch": 0.25, "grad_norm": 0.5975754148604239, "learning_rate": 1.7638336630670102e-05, "loss": 0.3508, "step": 4293 }, { "epoch": 0.25, "grad_norm": 0.4147059353112628, "learning_rate": 1.763713544095409e-05, "loss": 0.299, "step": 4294 }, { "epoch": 0.25, "grad_norm": 0.3928079898041313, "learning_rate": 1.7635933986764403e-05, "loss": 0.3508, "step": 4295 }, { "epoch": 0.25, "grad_norm": 0.559386737857805, "learning_rate": 1.7634732268142652e-05, "loss": 0.3621, "step": 4296 }, { "epoch": 0.25, "grad_norm": 0.2935297955911469, "learning_rate": 1.7633530285130452e-05, "loss": 0.2541, "step": 4297 }, { "epoch": 0.25, "grad_norm": 0.4767343833347403, "learning_rate": 1.7632328037769423e-05, "loss": 0.2457, "step": 4298 }, { "epoch": 0.25, "grad_norm": 0.3746108706148988, "learning_rate": 1.7631125526101206e-05, "loss": 0.3049, "step": 4299 }, { "epoch": 0.25, "grad_norm": 0.4020633558516471, "learning_rate": 1.7629922750167437e-05, "loss": 0.2688, "step": 4300 }, { "epoch": 0.25, "grad_norm": 0.8783609175595337, "learning_rate": 1.7628719710009777e-05, "loss": 0.6493, "step": 4301 }, { "epoch": 0.25, "grad_norm": 0.3907321374100405, "learning_rate": 1.7627516405669876e-05, "loss": 0.3237, "step": 4302 }, { "epoch": 0.25, "grad_norm": 0.49895888305836794, "learning_rate": 1.7626312837189412e-05, "loss": 0.4026, "step": 4303 }, { "epoch": 0.25, "grad_norm": 0.25989485619288133, "learning_rate": 1.7625109004610065e-05, "loss": 0.1937, "step": 4304 }, { "epoch": 0.25, "grad_norm": 0.7321965943129197, "learning_rate": 1.7623904907973515e-05, "loss": 0.395, "step": 4305 }, { "epoch": 0.25, "grad_norm": 0.43686726649567725, "learning_rate": 1.762270054732147e-05, "loss": 0.3508, "step": 4306 }, { "epoch": 0.25, "grad_norm": 0.37833068558361926, "learning_rate": 1.7621495922695633e-05, "loss": 0.307, "step": 4307 }, { "epoch": 0.25, "grad_norm": 0.6682080526735688, "learning_rate": 1.7620291034137718e-05, "loss": 0.4271, "step": 4308 }, { "epoch": 0.25, "grad_norm": 0.3964426570352359, "learning_rate": 1.7619085881689454e-05, "loss": 0.3013, "step": 4309 }, { "epoch": 0.25, "grad_norm": 0.23222205472935048, "learning_rate": 1.761788046539257e-05, "loss": 0.1052, "step": 4310 }, { "epoch": 0.25, "grad_norm": 0.3912585076504096, "learning_rate": 1.7616674785288815e-05, "loss": 0.301, "step": 4311 }, { "epoch": 0.25, "grad_norm": 0.47490678407484405, "learning_rate": 1.761546884141994e-05, "loss": 0.3829, "step": 4312 }, { "epoch": 0.25, "grad_norm": 0.7637011922535032, "learning_rate": 1.761426263382771e-05, "loss": 0.3716, "step": 4313 }, { "epoch": 0.25, "grad_norm": 0.506704534728562, "learning_rate": 1.761305616255389e-05, "loss": 0.3931, "step": 4314 }, { "epoch": 0.25, "grad_norm": 0.36459501700346764, "learning_rate": 1.761184942764026e-05, "loss": 0.3025, "step": 4315 }, { "epoch": 0.25, "grad_norm": 0.38492087075628456, "learning_rate": 1.761064242912861e-05, "loss": 0.2247, "step": 4316 }, { "epoch": 0.25, "grad_norm": 0.2590479572690357, "learning_rate": 1.7609435167060745e-05, "loss": 0.1378, "step": 4317 }, { "epoch": 0.25, "grad_norm": 0.505463936850372, "learning_rate": 1.7608227641478467e-05, "loss": 0.3552, "step": 4318 }, { "epoch": 0.25, "grad_norm": 0.4637726016282319, "learning_rate": 1.760701985242359e-05, "loss": 0.3487, "step": 4319 }, { "epoch": 0.25, "grad_norm": 0.9181606038737803, "learning_rate": 1.7605811799937946e-05, "loss": 0.3159, "step": 4320 }, { "epoch": 0.25, "grad_norm": 0.4582402750612981, "learning_rate": 1.7604603484063363e-05, "loss": 0.325, "step": 4321 }, { "epoch": 0.25, "grad_norm": 0.3085161695929028, "learning_rate": 1.760339490484169e-05, "loss": 0.2247, "step": 4322 }, { "epoch": 0.25, "grad_norm": 0.3913027721852565, "learning_rate": 1.760218606231478e-05, "loss": 0.2511, "step": 4323 }, { "epoch": 0.25, "grad_norm": 0.5046921571413169, "learning_rate": 1.7600976956524493e-05, "loss": 0.3392, "step": 4324 }, { "epoch": 0.25, "grad_norm": 1.4373319914356162, "learning_rate": 1.7599767587512698e-05, "loss": 0.4993, "step": 4325 }, { "epoch": 0.25, "grad_norm": 0.5257980257453084, "learning_rate": 1.7598557955321282e-05, "loss": 0.2709, "step": 4326 }, { "epoch": 0.25, "grad_norm": 0.32672331423731715, "learning_rate": 1.7597348059992128e-05, "loss": 0.3039, "step": 4327 }, { "epoch": 0.25, "grad_norm": 0.2592941345566562, "learning_rate": 1.7596137901567138e-05, "loss": 0.1403, "step": 4328 }, { "epoch": 0.25, "grad_norm": 1.4062233302214164, "learning_rate": 1.759492748008822e-05, "loss": 0.8632, "step": 4329 }, { "epoch": 0.25, "grad_norm": 0.6436373842109813, "learning_rate": 1.759371679559729e-05, "loss": 0.2666, "step": 4330 }, { "epoch": 0.25, "grad_norm": 0.5413600815168292, "learning_rate": 1.759250584813627e-05, "loss": 0.3359, "step": 4331 }, { "epoch": 0.25, "grad_norm": 0.6795446930850362, "learning_rate": 1.7591294637747104e-05, "loss": 0.4545, "step": 4332 }, { "epoch": 0.25, "grad_norm": 0.34797795924646585, "learning_rate": 1.7590083164471728e-05, "loss": 0.2473, "step": 4333 }, { "epoch": 0.25, "grad_norm": 0.4751834716100019, "learning_rate": 1.75888714283521e-05, "loss": 0.2814, "step": 4334 }, { "epoch": 0.25, "grad_norm": 0.4792771622907987, "learning_rate": 1.758765942943018e-05, "loss": 0.3511, "step": 4335 }, { "epoch": 0.25, "grad_norm": 0.4409034335516681, "learning_rate": 1.7586447167747943e-05, "loss": 0.2339, "step": 4336 }, { "epoch": 0.25, "grad_norm": 1.0216680780688296, "learning_rate": 1.7585234643347363e-05, "loss": 0.4759, "step": 4337 }, { "epoch": 0.25, "grad_norm": 0.3495006405637813, "learning_rate": 1.7584021856270435e-05, "loss": 0.2516, "step": 4338 }, { "epoch": 0.25, "grad_norm": 0.399030800710068, "learning_rate": 1.7582808806559154e-05, "loss": 0.3012, "step": 4339 }, { "epoch": 0.25, "grad_norm": 0.33002710223861065, "learning_rate": 1.7581595494255533e-05, "loss": 0.2138, "step": 4340 }, { "epoch": 0.25, "grad_norm": 0.6027956584907409, "learning_rate": 1.7580381919401586e-05, "loss": 0.4314, "step": 4341 }, { "epoch": 0.25, "grad_norm": 0.3981500370131176, "learning_rate": 1.757916808203934e-05, "loss": 0.3312, "step": 4342 }, { "epoch": 0.25, "grad_norm": 0.36799863739960603, "learning_rate": 1.757795398221083e-05, "loss": 0.3211, "step": 4343 }, { "epoch": 0.25, "grad_norm": 0.44567616701273444, "learning_rate": 1.7576739619958096e-05, "loss": 0.2847, "step": 4344 }, { "epoch": 0.25, "grad_norm": 0.37317146489865405, "learning_rate": 1.7575524995323195e-05, "loss": 0.2739, "step": 4345 }, { "epoch": 0.25, "grad_norm": 0.38858376631551034, "learning_rate": 1.757431010834819e-05, "loss": 0.2789, "step": 4346 }, { "epoch": 0.25, "grad_norm": 0.7434345776855413, "learning_rate": 1.7573094959075148e-05, "loss": 0.5022, "step": 4347 }, { "epoch": 0.25, "grad_norm": 0.3514566700372044, "learning_rate": 1.757187954754616e-05, "loss": 0.2869, "step": 4348 }, { "epoch": 0.25, "grad_norm": 0.7291519504367259, "learning_rate": 1.7570663873803305e-05, "loss": 0.4462, "step": 4349 }, { "epoch": 0.25, "grad_norm": 0.2717965523574744, "learning_rate": 1.7569447937888686e-05, "loss": 0.2532, "step": 4350 }, { "epoch": 0.25, "grad_norm": 0.35466411632539085, "learning_rate": 1.756823173984441e-05, "loss": 0.29, "step": 4351 }, { "epoch": 0.25, "grad_norm": 1.4631381574190476, "learning_rate": 1.7567015279712598e-05, "loss": 0.7348, "step": 4352 }, { "epoch": 0.25, "grad_norm": 0.8172752249272174, "learning_rate": 1.756579855753537e-05, "loss": 0.3552, "step": 4353 }, { "epoch": 0.25, "grad_norm": 0.3753765285233513, "learning_rate": 1.756458157335486e-05, "loss": 0.3274, "step": 4354 }, { "epoch": 0.25, "grad_norm": 0.39089010889022724, "learning_rate": 1.756336432721322e-05, "loss": 0.3154, "step": 4355 }, { "epoch": 0.25, "grad_norm": 0.23367638912187802, "learning_rate": 1.7562146819152595e-05, "loss": 0.1237, "step": 4356 }, { "epoch": 0.25, "grad_norm": 0.4181181779260803, "learning_rate": 1.7560929049215155e-05, "loss": 0.3227, "step": 4357 }, { "epoch": 0.25, "grad_norm": 0.8631313679687664, "learning_rate": 1.7559711017443062e-05, "loss": 0.4784, "step": 4358 }, { "epoch": 0.25, "grad_norm": 0.41323073775920643, "learning_rate": 1.7558492723878507e-05, "loss": 0.3094, "step": 4359 }, { "epoch": 0.25, "grad_norm": 0.3782931761755461, "learning_rate": 1.755727416856367e-05, "loss": 0.3099, "step": 4360 }, { "epoch": 0.25, "grad_norm": 1.2000243266515298, "learning_rate": 1.7556055351540757e-05, "loss": 0.8228, "step": 4361 }, { "epoch": 0.25, "grad_norm": 0.2778527231030494, "learning_rate": 1.7554836272851967e-05, "loss": 0.1797, "step": 4362 }, { "epoch": 0.25, "grad_norm": 0.34868729040736435, "learning_rate": 1.7553616932539522e-05, "loss": 0.2861, "step": 4363 }, { "epoch": 0.25, "grad_norm": 1.0895145164950082, "learning_rate": 1.7552397330645654e-05, "loss": 0.5217, "step": 4364 }, { "epoch": 0.25, "grad_norm": 0.6910131523731271, "learning_rate": 1.7551177467212585e-05, "loss": 0.508, "step": 4365 }, { "epoch": 0.25, "grad_norm": 0.3367083352550415, "learning_rate": 1.7549957342282567e-05, "loss": 0.2234, "step": 4366 }, { "epoch": 0.25, "grad_norm": 0.4083921331597858, "learning_rate": 1.7548736955897852e-05, "loss": 0.3233, "step": 4367 }, { "epoch": 0.25, "grad_norm": 0.310682312374531, "learning_rate": 1.75475163081007e-05, "loss": 0.1883, "step": 4368 }, { "epoch": 0.25, "grad_norm": 0.35150586064186157, "learning_rate": 1.7546295398933383e-05, "loss": 0.2042, "step": 4369 }, { "epoch": 0.25, "grad_norm": 1.1970685231744054, "learning_rate": 1.754507422843818e-05, "loss": 0.5545, "step": 4370 }, { "epoch": 0.25, "grad_norm": 0.390038478824965, "learning_rate": 1.7543852796657382e-05, "loss": 0.3434, "step": 4371 }, { "epoch": 0.25, "grad_norm": 0.3332006509863933, "learning_rate": 1.7542631103633284e-05, "loss": 0.1989, "step": 4372 }, { "epoch": 0.25, "grad_norm": 1.2245310547498793, "learning_rate": 1.7541409149408198e-05, "loss": 0.8196, "step": 4373 }, { "epoch": 0.25, "grad_norm": 0.2594242254872551, "learning_rate": 1.7540186934024434e-05, "loss": 0.2391, "step": 4374 }, { "epoch": 0.25, "grad_norm": 0.3663500648240596, "learning_rate": 1.7538964457524326e-05, "loss": 0.1984, "step": 4375 }, { "epoch": 0.25, "grad_norm": 1.3797280623086665, "learning_rate": 1.7537741719950197e-05, "loss": 0.4802, "step": 4376 }, { "epoch": 0.25, "grad_norm": 0.9288280071064241, "learning_rate": 1.75365187213444e-05, "loss": 0.5739, "step": 4377 }, { "epoch": 0.25, "grad_norm": 0.4592788301354964, "learning_rate": 1.7535295461749285e-05, "loss": 0.3199, "step": 4378 }, { "epoch": 0.25, "grad_norm": 0.371263615108485, "learning_rate": 1.753407194120721e-05, "loss": 0.2718, "step": 4379 }, { "epoch": 0.25, "grad_norm": 0.4478970966584819, "learning_rate": 1.753284815976055e-05, "loss": 0.2777, "step": 4380 }, { "epoch": 0.25, "grad_norm": 0.43708047320580123, "learning_rate": 1.7531624117451678e-05, "loss": 0.263, "step": 4381 }, { "epoch": 0.25, "grad_norm": 0.7647331876742203, "learning_rate": 1.753039981432299e-05, "loss": 0.3161, "step": 4382 }, { "epoch": 0.25, "grad_norm": 1.2246597599600573, "learning_rate": 1.7529175250416878e-05, "loss": 0.5649, "step": 4383 }, { "epoch": 0.25, "grad_norm": 0.4019441310451681, "learning_rate": 1.7527950425775753e-05, "loss": 0.2567, "step": 4384 }, { "epoch": 0.25, "grad_norm": 0.932022146410723, "learning_rate": 1.7526725340442028e-05, "loss": 0.5759, "step": 4385 }, { "epoch": 0.25, "grad_norm": 0.31841220554720956, "learning_rate": 1.7525499994458124e-05, "loss": 0.2613, "step": 4386 }, { "epoch": 0.25, "grad_norm": 0.37423694836917065, "learning_rate": 1.7524274387866483e-05, "loss": 0.2822, "step": 4387 }, { "epoch": 0.25, "grad_norm": 0.5350548955670364, "learning_rate": 1.7523048520709543e-05, "loss": 0.2867, "step": 4388 }, { "epoch": 0.25, "grad_norm": 1.8947754710817244, "learning_rate": 1.7521822393029758e-05, "loss": 0.5852, "step": 4389 }, { "epoch": 0.25, "grad_norm": 0.3998881128220424, "learning_rate": 1.7520596004869584e-05, "loss": 0.2702, "step": 4390 }, { "epoch": 0.25, "grad_norm": 0.5361072708652229, "learning_rate": 1.7519369356271492e-05, "loss": 0.3904, "step": 4391 }, { "epoch": 0.25, "grad_norm": 0.8123518823570961, "learning_rate": 1.751814244727797e-05, "loss": 0.3895, "step": 4392 }, { "epoch": 0.25, "grad_norm": 0.6059125687840098, "learning_rate": 1.751691527793149e-05, "loss": 0.339, "step": 4393 }, { "epoch": 0.25, "grad_norm": 0.3313177646320267, "learning_rate": 1.7515687848274562e-05, "loss": 0.2728, "step": 4394 }, { "epoch": 0.25, "grad_norm": 0.35552239325197443, "learning_rate": 1.7514460158349686e-05, "loss": 0.2089, "step": 4395 }, { "epoch": 0.25, "grad_norm": 0.4505421213963791, "learning_rate": 1.7513232208199378e-05, "loss": 0.3099, "step": 4396 }, { "epoch": 0.25, "grad_norm": 0.6644199425595185, "learning_rate": 1.751200399786616e-05, "loss": 0.4381, "step": 4397 }, { "epoch": 0.25, "grad_norm": 0.4423052272917962, "learning_rate": 1.7510775527392566e-05, "loss": 0.2984, "step": 4398 }, { "epoch": 0.25, "grad_norm": 0.6068964287449121, "learning_rate": 1.7509546796821144e-05, "loss": 0.3403, "step": 4399 }, { "epoch": 0.25, "grad_norm": 0.44069931145334174, "learning_rate": 1.7508317806194436e-05, "loss": 0.316, "step": 4400 }, { "epoch": 0.25, "grad_norm": 0.30685445519676646, "learning_rate": 1.7507088555555003e-05, "loss": 0.1484, "step": 4401 }, { "epoch": 0.25, "grad_norm": 0.319991819668123, "learning_rate": 1.750585904494542e-05, "loss": 0.2779, "step": 4402 }, { "epoch": 0.25, "grad_norm": 0.5975686561638618, "learning_rate": 1.7504629274408257e-05, "loss": 0.4789, "step": 4403 }, { "epoch": 0.25, "grad_norm": 0.8063943453403252, "learning_rate": 1.750339924398611e-05, "loss": 0.4771, "step": 4404 }, { "epoch": 0.25, "grad_norm": 0.3169912413687381, "learning_rate": 1.7502168953721564e-05, "loss": 0.2506, "step": 4405 }, { "epoch": 0.25, "grad_norm": 0.5907726571115687, "learning_rate": 1.7500938403657235e-05, "loss": 0.4054, "step": 4406 }, { "epoch": 0.25, "grad_norm": 0.28948065757597125, "learning_rate": 1.7499707593835728e-05, "loss": 0.1955, "step": 4407 }, { "epoch": 0.25, "grad_norm": 0.346293298851469, "learning_rate": 1.749847652429967e-05, "loss": 0.2249, "step": 4408 }, { "epoch": 0.25, "grad_norm": 1.1109798245103102, "learning_rate": 1.7497245195091694e-05, "loss": 0.6656, "step": 4409 }, { "epoch": 0.25, "grad_norm": 0.49764755117641957, "learning_rate": 1.749601360625444e-05, "loss": 0.3518, "step": 4410 }, { "epoch": 0.25, "grad_norm": 0.6586021135670197, "learning_rate": 1.7494781757830554e-05, "loss": 0.2586, "step": 4411 }, { "epoch": 0.25, "grad_norm": 0.415911984220887, "learning_rate": 1.74935496498627e-05, "loss": 0.3387, "step": 4412 }, { "epoch": 0.25, "grad_norm": 0.3528941062652166, "learning_rate": 1.7492317282393543e-05, "loss": 0.1973, "step": 4413 }, { "epoch": 0.25, "grad_norm": 0.5379242200023283, "learning_rate": 1.7491084655465762e-05, "loss": 0.3619, "step": 4414 }, { "epoch": 0.25, "grad_norm": 0.4547219643409678, "learning_rate": 1.748985176912204e-05, "loss": 0.2714, "step": 4415 }, { "epoch": 0.25, "grad_norm": 0.9862381216448781, "learning_rate": 1.7488618623405075e-05, "loss": 0.5093, "step": 4416 }, { "epoch": 0.25, "grad_norm": 0.6014511226082759, "learning_rate": 1.748738521835757e-05, "loss": 0.3081, "step": 4417 }, { "epoch": 0.25, "grad_norm": 0.42678893832728415, "learning_rate": 1.7486151554022233e-05, "loss": 0.266, "step": 4418 }, { "epoch": 0.25, "grad_norm": 0.29200160850606793, "learning_rate": 1.748491763044179e-05, "loss": 0.1875, "step": 4419 }, { "epoch": 0.25, "grad_norm": 0.609705572125106, "learning_rate": 1.7483683447658976e-05, "loss": 0.3825, "step": 4420 }, { "epoch": 0.25, "grad_norm": 0.6734514732700916, "learning_rate": 1.748244900571652e-05, "loss": 0.3149, "step": 4421 }, { "epoch": 0.25, "grad_norm": 0.5674551833098238, "learning_rate": 1.748121430465718e-05, "loss": 0.3457, "step": 4422 }, { "epoch": 0.25, "grad_norm": 0.5692437298539499, "learning_rate": 1.747997934452371e-05, "loss": 0.3981, "step": 4423 }, { "epoch": 0.25, "grad_norm": 0.40695746244632547, "learning_rate": 1.7478744125358877e-05, "loss": 0.2523, "step": 4424 }, { "epoch": 0.25, "grad_norm": 0.26111116306772175, "learning_rate": 1.7477508647205456e-05, "loss": 0.2255, "step": 4425 }, { "epoch": 0.25, "grad_norm": 0.3739424274051894, "learning_rate": 1.7476272910106233e-05, "loss": 0.2915, "step": 4426 }, { "epoch": 0.25, "grad_norm": 0.5242746749994995, "learning_rate": 1.7475036914104e-05, "loss": 0.4205, "step": 4427 }, { "epoch": 0.25, "grad_norm": 0.5970732747562592, "learning_rate": 1.747380065924156e-05, "loss": 0.409, "step": 4428 }, { "epoch": 0.25, "grad_norm": 0.7671707463296635, "learning_rate": 1.7472564145561725e-05, "loss": 0.4088, "step": 4429 }, { "epoch": 0.25, "grad_norm": 0.3501468041889073, "learning_rate": 1.7471327373107317e-05, "loss": 0.2827, "step": 4430 }, { "epoch": 0.25, "grad_norm": 0.3703347723536773, "learning_rate": 1.747009034192116e-05, "loss": 0.1713, "step": 4431 }, { "epoch": 0.25, "grad_norm": 0.6832880354899038, "learning_rate": 1.7468853052046095e-05, "loss": 0.4288, "step": 4432 }, { "epoch": 0.25, "grad_norm": 0.385508552267273, "learning_rate": 1.7467615503524973e-05, "loss": 0.3287, "step": 4433 }, { "epoch": 0.25, "grad_norm": 0.2565484503974381, "learning_rate": 1.7466377696400646e-05, "loss": 0.2168, "step": 4434 }, { "epoch": 0.25, "grad_norm": 0.6083624161878634, "learning_rate": 1.746513963071598e-05, "loss": 0.374, "step": 4435 }, { "epoch": 0.25, "grad_norm": 0.4290866249248875, "learning_rate": 1.746390130651385e-05, "loss": 0.3343, "step": 4436 }, { "epoch": 0.25, "grad_norm": 0.4210646856691826, "learning_rate": 1.746266272383714e-05, "loss": 0.1923, "step": 4437 }, { "epoch": 0.25, "grad_norm": 0.31382665898340373, "learning_rate": 1.746142388272874e-05, "loss": 0.2997, "step": 4438 }, { "epoch": 0.26, "grad_norm": 0.40780261576434734, "learning_rate": 1.746018478323155e-05, "loss": 0.3683, "step": 4439 }, { "epoch": 0.26, "grad_norm": 0.42413178269441665, "learning_rate": 1.7458945425388484e-05, "loss": 0.2996, "step": 4440 }, { "epoch": 0.26, "grad_norm": 0.3334174550196039, "learning_rate": 1.7457705809242455e-05, "loss": 0.1964, "step": 4441 }, { "epoch": 0.26, "grad_norm": 0.3571659636822857, "learning_rate": 1.74564659348364e-05, "loss": 0.3006, "step": 4442 }, { "epoch": 0.26, "grad_norm": 0.8945360836656251, "learning_rate": 1.7455225802213246e-05, "loss": 0.5754, "step": 4443 }, { "epoch": 0.26, "grad_norm": 0.5110983783626839, "learning_rate": 1.7453985411415945e-05, "loss": 0.3305, "step": 4444 }, { "epoch": 0.26, "grad_norm": 0.40930073398439676, "learning_rate": 1.745274476248745e-05, "loss": 0.3161, "step": 4445 }, { "epoch": 0.26, "grad_norm": 0.2968822772265499, "learning_rate": 1.7451503855470722e-05, "loss": 0.2828, "step": 4446 }, { "epoch": 0.26, "grad_norm": 0.27604539059250005, "learning_rate": 1.745026269040874e-05, "loss": 0.1201, "step": 4447 }, { "epoch": 0.26, "grad_norm": 0.4091950250734807, "learning_rate": 1.744902126734448e-05, "loss": 0.305, "step": 4448 }, { "epoch": 0.26, "grad_norm": 0.4688130768266721, "learning_rate": 1.744777958632093e-05, "loss": 0.3995, "step": 4449 }, { "epoch": 0.26, "grad_norm": 0.4199210326535034, "learning_rate": 1.74465376473811e-05, "loss": 0.2857, "step": 4450 }, { "epoch": 0.26, "grad_norm": 0.36946539820255947, "learning_rate": 1.7445295450567985e-05, "loss": 0.3241, "step": 4451 }, { "epoch": 0.26, "grad_norm": 0.30417635766535905, "learning_rate": 1.7444052995924612e-05, "loss": 0.2137, "step": 4452 }, { "epoch": 0.26, "grad_norm": 0.388316362774309, "learning_rate": 1.7442810283494002e-05, "loss": 0.3331, "step": 4453 }, { "epoch": 0.26, "grad_norm": 0.33429814252358525, "learning_rate": 1.7441567313319194e-05, "loss": 0.2181, "step": 4454 }, { "epoch": 0.26, "grad_norm": 0.7626510299733518, "learning_rate": 1.7440324085443227e-05, "loss": 0.4604, "step": 4455 }, { "epoch": 0.26, "grad_norm": 0.5831532138070068, "learning_rate": 1.7439080599909163e-05, "loss": 0.4221, "step": 4456 }, { "epoch": 0.26, "grad_norm": 0.29553524419263844, "learning_rate": 1.743783685676005e-05, "loss": 0.2498, "step": 4457 }, { "epoch": 0.26, "grad_norm": 0.49518512762179856, "learning_rate": 1.743659285603897e-05, "loss": 0.3983, "step": 4458 }, { "epoch": 0.26, "grad_norm": 0.25390198843159745, "learning_rate": 1.7435348597789e-05, "loss": 0.1813, "step": 4459 }, { "epoch": 0.26, "grad_norm": 0.36903051665820324, "learning_rate": 1.7434104082053227e-05, "loss": 0.2264, "step": 4460 }, { "epoch": 0.26, "grad_norm": 0.848917048411256, "learning_rate": 1.743285930887475e-05, "loss": 0.4192, "step": 4461 }, { "epoch": 0.26, "grad_norm": 0.5239600697847725, "learning_rate": 1.7431614278296672e-05, "loss": 0.3618, "step": 4462 }, { "epoch": 0.26, "grad_norm": 0.39962990775437407, "learning_rate": 1.7430368990362114e-05, "loss": 0.2368, "step": 4463 }, { "epoch": 0.26, "grad_norm": 0.41735657916516267, "learning_rate": 1.7429123445114196e-05, "loss": 0.2857, "step": 4464 }, { "epoch": 0.26, "grad_norm": 0.32698671191140255, "learning_rate": 1.7427877642596053e-05, "loss": 0.2875, "step": 4465 }, { "epoch": 0.26, "grad_norm": 0.3624786033802996, "learning_rate": 1.7426631582850827e-05, "loss": 0.2762, "step": 4466 }, { "epoch": 0.26, "grad_norm": 0.5845800939907507, "learning_rate": 1.742538526592167e-05, "loss": 0.4098, "step": 4467 }, { "epoch": 0.26, "grad_norm": 0.9870741959193776, "learning_rate": 1.742413869185174e-05, "loss": 0.6744, "step": 4468 }, { "epoch": 0.26, "grad_norm": 0.42257891785206114, "learning_rate": 1.7422891860684202e-05, "loss": 0.2697, "step": 4469 }, { "epoch": 0.26, "grad_norm": 0.32188754467568104, "learning_rate": 1.7421644772462247e-05, "loss": 0.2059, "step": 4470 }, { "epoch": 0.26, "grad_norm": 0.4304055386861345, "learning_rate": 1.7420397427229045e-05, "loss": 0.2905, "step": 4471 }, { "epoch": 0.26, "grad_norm": 0.33978838217773394, "learning_rate": 1.7419149825027802e-05, "loss": 0.3002, "step": 4472 }, { "epoch": 0.26, "grad_norm": 0.3788118884308564, "learning_rate": 1.7417901965901717e-05, "loss": 0.3153, "step": 4473 }, { "epoch": 0.26, "grad_norm": 0.521900293581829, "learning_rate": 1.7416653849894008e-05, "loss": 0.3964, "step": 4474 }, { "epoch": 0.26, "grad_norm": 0.3665116506708937, "learning_rate": 1.7415405477047895e-05, "loss": 0.2777, "step": 4475 }, { "epoch": 0.26, "grad_norm": 0.9883665529741489, "learning_rate": 1.741415684740661e-05, "loss": 0.4042, "step": 4476 }, { "epoch": 0.26, "grad_norm": 0.24629002200117167, "learning_rate": 1.741290796101339e-05, "loss": 0.2254, "step": 4477 }, { "epoch": 0.26, "grad_norm": 0.3537787580716675, "learning_rate": 1.7411658817911487e-05, "loss": 0.3092, "step": 4478 }, { "epoch": 0.26, "grad_norm": 0.6388605821733183, "learning_rate": 1.741040941814416e-05, "loss": 0.4883, "step": 4479 }, { "epoch": 0.26, "grad_norm": 0.6188260255813043, "learning_rate": 1.740915976175467e-05, "loss": 0.3133, "step": 4480 }, { "epoch": 0.26, "grad_norm": 0.4176908201497853, "learning_rate": 1.74079098487863e-05, "loss": 0.3111, "step": 4481 }, { "epoch": 0.26, "grad_norm": 0.45129474320870905, "learning_rate": 1.7406659679282326e-05, "loss": 0.3424, "step": 4482 }, { "epoch": 0.26, "grad_norm": 0.4174295494281297, "learning_rate": 1.740540925328605e-05, "loss": 0.2623, "step": 4483 }, { "epoch": 0.26, "grad_norm": 0.3767565914818703, "learning_rate": 1.7404158570840765e-05, "loss": 0.282, "step": 4484 }, { "epoch": 0.26, "grad_norm": 0.2783113500296502, "learning_rate": 1.7402907631989793e-05, "loss": 0.2917, "step": 4485 }, { "epoch": 0.26, "grad_norm": 0.9873955581325996, "learning_rate": 1.7401656436776445e-05, "loss": 0.4259, "step": 4486 }, { "epoch": 0.26, "grad_norm": 0.3845423744262796, "learning_rate": 1.740040498524405e-05, "loss": 0.2788, "step": 4487 }, { "epoch": 0.26, "grad_norm": 0.7272016165043925, "learning_rate": 1.7399153277435954e-05, "loss": 0.5691, "step": 4488 }, { "epoch": 0.26, "grad_norm": 0.40153871463464896, "learning_rate": 1.7397901313395497e-05, "loss": 0.2958, "step": 4489 }, { "epoch": 0.26, "grad_norm": 0.39987319741953187, "learning_rate": 1.7396649093166034e-05, "loss": 0.3051, "step": 4490 }, { "epoch": 0.26, "grad_norm": 0.35067447545845665, "learning_rate": 1.7395396616790932e-05, "loss": 0.1451, "step": 4491 }, { "epoch": 0.26, "grad_norm": 0.9550529008649429, "learning_rate": 1.7394143884313562e-05, "loss": 0.7368, "step": 4492 }, { "epoch": 0.26, "grad_norm": 0.30621419928245114, "learning_rate": 1.7392890895777305e-05, "loss": 0.2344, "step": 4493 }, { "epoch": 0.26, "grad_norm": 1.4312493233871788, "learning_rate": 1.7391637651225556e-05, "loss": 0.7849, "step": 4494 }, { "epoch": 0.26, "grad_norm": 0.6076999334668871, "learning_rate": 1.7390384150701715e-05, "loss": 0.4489, "step": 4495 }, { "epoch": 0.26, "grad_norm": 0.38957330875145113, "learning_rate": 1.738913039424919e-05, "loss": 0.1873, "step": 4496 }, { "epoch": 0.26, "grad_norm": 0.34020086053360765, "learning_rate": 1.7387876381911395e-05, "loss": 0.2616, "step": 4497 }, { "epoch": 0.26, "grad_norm": 0.43665721845984257, "learning_rate": 1.7386622113731758e-05, "loss": 0.3332, "step": 4498 }, { "epoch": 0.26, "grad_norm": 0.35317580614898686, "learning_rate": 1.7385367589753714e-05, "loss": 0.2111, "step": 4499 }, { "epoch": 0.26, "grad_norm": 0.8179148641200064, "learning_rate": 1.738411281002071e-05, "loss": 0.4621, "step": 4500 }, { "epoch": 0.26, "grad_norm": 0.37262485183921623, "learning_rate": 1.7382857774576195e-05, "loss": 0.3425, "step": 4501 }, { "epoch": 0.26, "grad_norm": 0.4434924061456271, "learning_rate": 1.7381602483463637e-05, "loss": 0.2924, "step": 4502 }, { "epoch": 0.26, "grad_norm": 0.23825968934381375, "learning_rate": 1.73803469367265e-05, "loss": 0.1482, "step": 4503 }, { "epoch": 0.26, "grad_norm": 1.3993363887214298, "learning_rate": 1.7379091134408265e-05, "loss": 0.9296, "step": 4504 }, { "epoch": 0.26, "grad_norm": 0.3323388490174505, "learning_rate": 1.737783507655242e-05, "loss": 0.2951, "step": 4505 }, { "epoch": 0.26, "grad_norm": 0.596493427646773, "learning_rate": 1.7376578763202465e-05, "loss": 0.3465, "step": 4506 }, { "epoch": 0.26, "grad_norm": 0.85841984621207, "learning_rate": 1.7375322194401905e-05, "loss": 0.5163, "step": 4507 }, { "epoch": 0.26, "grad_norm": 0.3979185219008631, "learning_rate": 1.7374065370194253e-05, "loss": 0.3023, "step": 4508 }, { "epoch": 0.26, "grad_norm": 0.3792909090305572, "learning_rate": 1.7372808290623034e-05, "loss": 0.2717, "step": 4509 }, { "epoch": 0.26, "grad_norm": 0.3288322471843398, "learning_rate": 1.7371550955731786e-05, "loss": 0.2334, "step": 4510 }, { "epoch": 0.26, "grad_norm": 0.3603072373462248, "learning_rate": 1.737029336556404e-05, "loss": 0.2721, "step": 4511 }, { "epoch": 0.26, "grad_norm": 1.0258413937489468, "learning_rate": 1.7369035520163355e-05, "loss": 0.4195, "step": 4512 }, { "epoch": 0.26, "grad_norm": 0.38788937506634363, "learning_rate": 1.7367777419573285e-05, "loss": 0.3458, "step": 4513 }, { "epoch": 0.26, "grad_norm": 0.3663495306117589, "learning_rate": 1.73665190638374e-05, "loss": 0.2827, "step": 4514 }, { "epoch": 0.26, "grad_norm": 0.35904037745450235, "learning_rate": 1.7365260452999277e-05, "loss": 0.239, "step": 4515 }, { "epoch": 0.26, "grad_norm": 0.368230106992383, "learning_rate": 1.73640015871025e-05, "loss": 0.2299, "step": 4516 }, { "epoch": 0.26, "grad_norm": 0.4124820648794057, "learning_rate": 1.7362742466190668e-05, "loss": 0.2929, "step": 4517 }, { "epoch": 0.26, "grad_norm": 0.6313760443488636, "learning_rate": 1.736148309030738e-05, "loss": 0.3801, "step": 4518 }, { "epoch": 0.26, "grad_norm": 0.8649835515367721, "learning_rate": 1.7360223459496244e-05, "loss": 0.3975, "step": 4519 }, { "epoch": 0.26, "grad_norm": 0.3995323752107863, "learning_rate": 1.735896357380089e-05, "loss": 0.2723, "step": 4520 }, { "epoch": 0.26, "grad_norm": 0.34544221499812, "learning_rate": 1.7357703433264945e-05, "loss": 0.3229, "step": 4521 }, { "epoch": 0.26, "grad_norm": 0.2902986551221321, "learning_rate": 1.7356443037932046e-05, "loss": 0.1643, "step": 4522 }, { "epoch": 0.26, "grad_norm": 0.4838021066394585, "learning_rate": 1.7355182387845843e-05, "loss": 0.3209, "step": 4523 }, { "epoch": 0.26, "grad_norm": 0.5004591776306092, "learning_rate": 1.7353921483049985e-05, "loss": 0.3296, "step": 4524 }, { "epoch": 0.26, "grad_norm": 0.4258313917846841, "learning_rate": 1.7352660323588146e-05, "loss": 0.2727, "step": 4525 }, { "epoch": 0.26, "grad_norm": 0.4033155476729041, "learning_rate": 1.7351398909503995e-05, "loss": 0.3081, "step": 4526 }, { "epoch": 0.26, "grad_norm": 0.5300104630284196, "learning_rate": 1.7350137240841218e-05, "loss": 0.4131, "step": 4527 }, { "epoch": 0.26, "grad_norm": 0.6412206824998394, "learning_rate": 1.73488753176435e-05, "loss": 0.5222, "step": 4528 }, { "epoch": 0.26, "grad_norm": 0.3077706381580906, "learning_rate": 1.7347613139954548e-05, "loss": 0.2402, "step": 4529 }, { "epoch": 0.26, "grad_norm": 0.6688065829605588, "learning_rate": 1.734635070781807e-05, "loss": 0.538, "step": 4530 }, { "epoch": 0.26, "grad_norm": 0.35909638887144074, "learning_rate": 1.734508802127778e-05, "loss": 0.2608, "step": 4531 }, { "epoch": 0.26, "grad_norm": 0.41579088397011704, "learning_rate": 1.734382508037741e-05, "loss": 0.1842, "step": 4532 }, { "epoch": 0.26, "grad_norm": 0.4211404512665711, "learning_rate": 1.7342561885160694e-05, "loss": 0.3147, "step": 4533 }, { "epoch": 0.26, "grad_norm": 0.8256111760795807, "learning_rate": 1.7341298435671373e-05, "loss": 0.5434, "step": 4534 }, { "epoch": 0.26, "grad_norm": 0.5292156928559316, "learning_rate": 1.7340034731953204e-05, "loss": 0.2707, "step": 4535 }, { "epoch": 0.26, "grad_norm": 0.3626707079391625, "learning_rate": 1.7338770774049948e-05, "loss": 0.2969, "step": 4536 }, { "epoch": 0.26, "grad_norm": 0.2987530938150695, "learning_rate": 1.733750656200538e-05, "loss": 0.2727, "step": 4537 }, { "epoch": 0.26, "grad_norm": 0.41984643592449417, "learning_rate": 1.733624209586327e-05, "loss": 0.0769, "step": 4538 }, { "epoch": 0.26, "grad_norm": 0.4498582260094257, "learning_rate": 1.733497737566741e-05, "loss": 0.3627, "step": 4539 }, { "epoch": 0.26, "grad_norm": 0.7896624131435914, "learning_rate": 1.7333712401461602e-05, "loss": 0.5321, "step": 4540 }, { "epoch": 0.26, "grad_norm": 0.33277012471811757, "learning_rate": 1.7332447173289648e-05, "loss": 0.3177, "step": 4541 }, { "epoch": 0.26, "grad_norm": 0.4780594140246693, "learning_rate": 1.7331181691195364e-05, "loss": 0.2849, "step": 4542 }, { "epoch": 0.26, "grad_norm": 0.35576731933438077, "learning_rate": 1.7329915955222578e-05, "loss": 0.2063, "step": 4543 }, { "epoch": 0.26, "grad_norm": 0.5907181564894486, "learning_rate": 1.7328649965415108e-05, "loss": 0.358, "step": 4544 }, { "epoch": 0.26, "grad_norm": 0.31900993570485137, "learning_rate": 1.732738372181681e-05, "loss": 0.2571, "step": 4545 }, { "epoch": 0.26, "grad_norm": 0.8407790010179014, "learning_rate": 1.7326117224471534e-05, "loss": 0.4832, "step": 4546 }, { "epoch": 0.26, "grad_norm": 0.6475543922825446, "learning_rate": 1.7324850473423124e-05, "loss": 0.4714, "step": 4547 }, { "epoch": 0.26, "grad_norm": 0.4103670098764781, "learning_rate": 1.7323583468715464e-05, "loss": 0.2612, "step": 4548 }, { "epoch": 0.26, "grad_norm": 0.25746760920249057, "learning_rate": 1.732231621039242e-05, "loss": 0.2225, "step": 4549 }, { "epoch": 0.26, "grad_norm": 0.45385734178505405, "learning_rate": 1.732104869849788e-05, "loss": 0.3283, "step": 4550 }, { "epoch": 0.26, "grad_norm": 0.5715738690411651, "learning_rate": 1.731978093307574e-05, "loss": 0.3138, "step": 4551 }, { "epoch": 0.26, "grad_norm": 0.47645140136250524, "learning_rate": 1.7318512914169903e-05, "loss": 0.3329, "step": 4552 }, { "epoch": 0.26, "grad_norm": 0.8370401653202683, "learning_rate": 1.7317244641824275e-05, "loss": 0.5012, "step": 4553 }, { "epoch": 0.26, "grad_norm": 0.40217735496250034, "learning_rate": 1.731597611608278e-05, "loss": 0.319, "step": 4554 }, { "epoch": 0.26, "grad_norm": 0.22407629826832084, "learning_rate": 1.731470733698935e-05, "loss": 0.137, "step": 4555 }, { "epoch": 0.26, "grad_norm": 0.43215683904816754, "learning_rate": 1.7313438304587918e-05, "loss": 0.3071, "step": 4556 }, { "epoch": 0.26, "grad_norm": 0.318869649065026, "learning_rate": 1.731216901892243e-05, "loss": 0.2901, "step": 4557 }, { "epoch": 0.26, "grad_norm": 0.8355303688845711, "learning_rate": 1.7310899480036845e-05, "loss": 0.3845, "step": 4558 }, { "epoch": 0.26, "grad_norm": 0.8346211453513296, "learning_rate": 1.7309629687975126e-05, "loss": 0.5678, "step": 4559 }, { "epoch": 0.26, "grad_norm": 0.3490837116866384, "learning_rate": 1.730835964278124e-05, "loss": 0.2892, "step": 4560 }, { "epoch": 0.26, "grad_norm": 0.32214562875779884, "learning_rate": 1.7307089344499178e-05, "loss": 0.2011, "step": 4561 }, { "epoch": 0.26, "grad_norm": 0.4094468270950029, "learning_rate": 1.730581879317293e-05, "loss": 0.2334, "step": 4562 }, { "epoch": 0.26, "grad_norm": 0.6224042650550641, "learning_rate": 1.730454798884649e-05, "loss": 0.3563, "step": 4563 }, { "epoch": 0.26, "grad_norm": 0.379128591299934, "learning_rate": 1.7303276931563862e-05, "loss": 0.2706, "step": 4564 }, { "epoch": 0.26, "grad_norm": 0.6789909142350193, "learning_rate": 1.730200562136907e-05, "loss": 0.4433, "step": 4565 }, { "epoch": 0.26, "grad_norm": 0.4124711177122947, "learning_rate": 1.7300734058306138e-05, "loss": 0.335, "step": 4566 }, { "epoch": 0.26, "grad_norm": 0.2858262013205675, "learning_rate": 1.72994622424191e-05, "loss": 0.2076, "step": 4567 }, { "epoch": 0.26, "grad_norm": 0.30957498941420897, "learning_rate": 1.7298190173751996e-05, "loss": 0.2559, "step": 4568 }, { "epoch": 0.26, "grad_norm": 0.45178314832303773, "learning_rate": 1.7296917852348882e-05, "loss": 0.342, "step": 4569 }, { "epoch": 0.26, "grad_norm": 0.6733044811452652, "learning_rate": 1.7295645278253817e-05, "loss": 0.5276, "step": 4570 }, { "epoch": 0.26, "grad_norm": 0.49577571076452776, "learning_rate": 1.729437245151087e-05, "loss": 0.2828, "step": 4571 }, { "epoch": 0.26, "grad_norm": 0.3413429533842971, "learning_rate": 1.7293099372164118e-05, "loss": 0.2894, "step": 4572 }, { "epoch": 0.26, "grad_norm": 0.3407676552622252, "learning_rate": 1.729182604025765e-05, "loss": 0.2752, "step": 4573 }, { "epoch": 0.26, "grad_norm": 0.561291828414926, "learning_rate": 1.729055245583556e-05, "loss": 0.3268, "step": 4574 }, { "epoch": 0.26, "grad_norm": 0.29530320708055313, "learning_rate": 1.728927861894195e-05, "loss": 0.2669, "step": 4575 }, { "epoch": 0.26, "grad_norm": 0.3881094912526957, "learning_rate": 1.7288004529620935e-05, "loss": 0.3326, "step": 4576 }, { "epoch": 0.26, "grad_norm": 1.1701078373770246, "learning_rate": 1.7286730187916635e-05, "loss": 0.8335, "step": 4577 }, { "epoch": 0.26, "grad_norm": 0.31692692063010275, "learning_rate": 1.7285455593873183e-05, "loss": 0.2234, "step": 4578 }, { "epoch": 0.26, "grad_norm": 0.7275364835658681, "learning_rate": 1.728418074753472e-05, "loss": 0.5326, "step": 4579 }, { "epoch": 0.26, "grad_norm": 0.3451992181340388, "learning_rate": 1.7282905648945386e-05, "loss": 0.342, "step": 4580 }, { "epoch": 0.26, "grad_norm": 0.2693554960138304, "learning_rate": 1.7281630298149346e-05, "loss": 0.2137, "step": 4581 }, { "epoch": 0.26, "grad_norm": 0.4373966659922116, "learning_rate": 1.728035469519076e-05, "loss": 0.2423, "step": 4582 }, { "epoch": 0.26, "grad_norm": 0.8356130794756954, "learning_rate": 1.7279078840113805e-05, "loss": 0.4754, "step": 4583 }, { "epoch": 0.26, "grad_norm": 0.3556758936236917, "learning_rate": 1.7277802732962662e-05, "loss": 0.2365, "step": 4584 }, { "epoch": 0.26, "grad_norm": 0.3887816940254315, "learning_rate": 1.7276526373781525e-05, "loss": 0.3398, "step": 4585 }, { "epoch": 0.26, "grad_norm": 0.6248091341949249, "learning_rate": 1.7275249762614592e-05, "loss": 0.4088, "step": 4586 }, { "epoch": 0.26, "grad_norm": 0.2780554084812607, "learning_rate": 1.727397289950607e-05, "loss": 0.1789, "step": 4587 }, { "epoch": 0.26, "grad_norm": 0.35914471939866316, "learning_rate": 1.7272695784500185e-05, "loss": 0.2958, "step": 4588 }, { "epoch": 0.26, "grad_norm": 1.1927746460401998, "learning_rate": 1.7271418417641153e-05, "loss": 0.7851, "step": 4589 }, { "epoch": 0.26, "grad_norm": 0.3336997350745605, "learning_rate": 1.7270140798973215e-05, "loss": 0.259, "step": 4590 }, { "epoch": 0.26, "grad_norm": 0.7934746268294893, "learning_rate": 1.7268862928540616e-05, "loss": 0.3916, "step": 4591 }, { "epoch": 0.26, "grad_norm": 0.440992452385509, "learning_rate": 1.7267584806387604e-05, "loss": 0.3606, "step": 4592 }, { "epoch": 0.26, "grad_norm": 0.3643981449840152, "learning_rate": 1.726630643255844e-05, "loss": 0.2713, "step": 4593 }, { "epoch": 0.26, "grad_norm": 0.23831222638239447, "learning_rate": 1.7265027807097402e-05, "loss": 0.1099, "step": 4594 }, { "epoch": 0.26, "grad_norm": 0.9794922046894605, "learning_rate": 1.726374893004876e-05, "loss": 0.641, "step": 4595 }, { "epoch": 0.26, "grad_norm": 0.3302361301129314, "learning_rate": 1.7262469801456806e-05, "loss": 0.2753, "step": 4596 }, { "epoch": 0.26, "grad_norm": 0.6913170287228856, "learning_rate": 1.7261190421365836e-05, "loss": 0.3405, "step": 4597 }, { "epoch": 0.26, "grad_norm": 0.7638555754563725, "learning_rate": 1.7259910789820152e-05, "loss": 0.5113, "step": 4598 }, { "epoch": 0.26, "grad_norm": 0.30601965328502595, "learning_rate": 1.7258630906864068e-05, "loss": 0.203, "step": 4599 }, { "epoch": 0.26, "grad_norm": 0.35651461621894115, "learning_rate": 1.7257350772541914e-05, "loss": 0.2329, "step": 4600 }, { "epoch": 0.26, "grad_norm": 0.8782920891777145, "learning_rate": 1.725607038689801e-05, "loss": 0.6243, "step": 4601 }, { "epoch": 0.26, "grad_norm": 0.42622194472487185, "learning_rate": 1.7254789749976703e-05, "loss": 0.2775, "step": 4602 }, { "epoch": 0.26, "grad_norm": 1.1987544832356745, "learning_rate": 1.7253508861822338e-05, "loss": 0.4799, "step": 4603 }, { "epoch": 0.26, "grad_norm": 0.37401071978627676, "learning_rate": 1.725222772247927e-05, "loss": 0.2316, "step": 4604 }, { "epoch": 0.26, "grad_norm": 0.4714683560992018, "learning_rate": 1.725094633199187e-05, "loss": 0.279, "step": 4605 }, { "epoch": 0.26, "grad_norm": 0.3252260992389096, "learning_rate": 1.7249664690404514e-05, "loss": 0.1989, "step": 4606 }, { "epoch": 0.26, "grad_norm": 1.2681138371986997, "learning_rate": 1.7248382797761576e-05, "loss": 0.4433, "step": 4607 }, { "epoch": 0.26, "grad_norm": 0.38382751971823414, "learning_rate": 1.7247100654107458e-05, "loss": 0.2707, "step": 4608 }, { "epoch": 0.26, "grad_norm": 0.5398894230168891, "learning_rate": 1.724581825948655e-05, "loss": 0.3814, "step": 4609 }, { "epoch": 0.26, "grad_norm": 0.7840013858936152, "learning_rate": 1.7244535613943273e-05, "loss": 0.3339, "step": 4610 }, { "epoch": 0.26, "grad_norm": 0.4177543238883827, "learning_rate": 1.7243252717522037e-05, "loss": 0.2759, "step": 4611 }, { "epoch": 0.26, "grad_norm": 0.2814456169503759, "learning_rate": 1.724196957026727e-05, "loss": 0.2205, "step": 4612 }, { "epoch": 0.27, "grad_norm": 1.286573728865289, "learning_rate": 1.724068617222341e-05, "loss": 0.4222, "step": 4613 }, { "epoch": 0.27, "grad_norm": 0.3835480722641368, "learning_rate": 1.7239402523434898e-05, "loss": 0.2947, "step": 4614 }, { "epoch": 0.27, "grad_norm": 0.9076811236266914, "learning_rate": 1.723811862394619e-05, "loss": 0.5795, "step": 4615 }, { "epoch": 0.27, "grad_norm": 0.4121511463726725, "learning_rate": 1.7236834473801744e-05, "loss": 0.344, "step": 4616 }, { "epoch": 0.27, "grad_norm": 0.33727558721149664, "learning_rate": 1.723555007304603e-05, "loss": 0.2264, "step": 4617 }, { "epoch": 0.27, "grad_norm": 0.49247949403688007, "learning_rate": 1.7234265421723528e-05, "loss": 0.2619, "step": 4618 }, { "epoch": 0.27, "grad_norm": 0.7509746626101724, "learning_rate": 1.7232980519878727e-05, "loss": 0.4011, "step": 4619 }, { "epoch": 0.27, "grad_norm": 0.3204159675138603, "learning_rate": 1.723169536755612e-05, "loss": 0.2003, "step": 4620 }, { "epoch": 0.27, "grad_norm": 0.41050620070576016, "learning_rate": 1.7230409964800215e-05, "loss": 0.3201, "step": 4621 }, { "epoch": 0.27, "grad_norm": 1.120953694972054, "learning_rate": 1.7229124311655524e-05, "loss": 0.5876, "step": 4622 }, { "epoch": 0.27, "grad_norm": 0.34506829258410193, "learning_rate": 1.722783840816657e-05, "loss": 0.2215, "step": 4623 }, { "epoch": 0.27, "grad_norm": 0.39872366387719055, "learning_rate": 1.7226552254377883e-05, "loss": 0.3462, "step": 4624 }, { "epoch": 0.27, "grad_norm": 0.5939456678866099, "learning_rate": 1.7225265850333997e-05, "loss": 0.4869, "step": 4625 }, { "epoch": 0.27, "grad_norm": 0.3497056047001658, "learning_rate": 1.7223979196079466e-05, "loss": 0.2402, "step": 4626 }, { "epoch": 0.27, "grad_norm": 0.31503886763827527, "learning_rate": 1.7222692291658853e-05, "loss": 0.2371, "step": 4627 }, { "epoch": 0.27, "grad_norm": 0.5476162155781016, "learning_rate": 1.7221405137116712e-05, "loss": 0.3956, "step": 4628 }, { "epoch": 0.27, "grad_norm": 0.4289308831084935, "learning_rate": 1.722011773249762e-05, "loss": 0.2724, "step": 4629 }, { "epoch": 0.27, "grad_norm": 0.45957358140657234, "learning_rate": 1.7218830077846164e-05, "loss": 0.329, "step": 4630 }, { "epoch": 0.27, "grad_norm": 0.4495152254009832, "learning_rate": 1.7217542173206932e-05, "loss": 0.3848, "step": 4631 }, { "epoch": 0.27, "grad_norm": 0.3251634536660676, "learning_rate": 1.7216254018624524e-05, "loss": 0.2706, "step": 4632 }, { "epoch": 0.27, "grad_norm": 0.36095585230014704, "learning_rate": 1.7214965614143554e-05, "loss": 0.1692, "step": 4633 }, { "epoch": 0.27, "grad_norm": 0.42598816677891027, "learning_rate": 1.721367695980863e-05, "loss": 0.2535, "step": 4634 }, { "epoch": 0.27, "grad_norm": 0.37269976982163927, "learning_rate": 1.7212388055664385e-05, "loss": 0.2948, "step": 4635 }, { "epoch": 0.27, "grad_norm": 0.3768645284351786, "learning_rate": 1.7211098901755453e-05, "loss": 0.2551, "step": 4636 }, { "epoch": 0.27, "grad_norm": 0.7238591197734653, "learning_rate": 1.7209809498126473e-05, "loss": 0.4616, "step": 4637 }, { "epoch": 0.27, "grad_norm": 0.46787631269308716, "learning_rate": 1.72085198448221e-05, "loss": 0.327, "step": 4638 }, { "epoch": 0.27, "grad_norm": 0.28015550313024745, "learning_rate": 1.7207229941887e-05, "loss": 0.2313, "step": 4639 }, { "epoch": 0.27, "grad_norm": 0.32694743935340276, "learning_rate": 1.7205939789365834e-05, "loss": 0.2899, "step": 4640 }, { "epoch": 0.27, "grad_norm": 0.57415873988465, "learning_rate": 1.720464938730328e-05, "loss": 0.3279, "step": 4641 }, { "epoch": 0.27, "grad_norm": 0.4253211319708633, "learning_rate": 1.720335873574403e-05, "loss": 0.3365, "step": 4642 }, { "epoch": 0.27, "grad_norm": 0.4051995236013618, "learning_rate": 1.7202067834732778e-05, "loss": 0.3115, "step": 4643 }, { "epoch": 0.27, "grad_norm": 0.4540323810623343, "learning_rate": 1.7200776684314226e-05, "loss": 0.3126, "step": 4644 }, { "epoch": 0.27, "grad_norm": 0.3539122334412321, "learning_rate": 1.7199485284533088e-05, "loss": 0.2621, "step": 4645 }, { "epoch": 0.27, "grad_norm": 0.26353112650494664, "learning_rate": 1.7198193635434083e-05, "loss": 0.0729, "step": 4646 }, { "epoch": 0.27, "grad_norm": 0.40396435081770415, "learning_rate": 1.719690173706194e-05, "loss": 0.3095, "step": 4647 }, { "epoch": 0.27, "grad_norm": 0.33135791809707316, "learning_rate": 1.71956095894614e-05, "loss": 0.3202, "step": 4648 }, { "epoch": 0.27, "grad_norm": 0.8182839067779433, "learning_rate": 1.719431719267721e-05, "loss": 0.4072, "step": 4649 }, { "epoch": 0.27, "grad_norm": 0.4000599760224488, "learning_rate": 1.7193024546754125e-05, "loss": 0.3128, "step": 4650 }, { "epoch": 0.27, "grad_norm": 0.5156932368052041, "learning_rate": 1.719173165173691e-05, "loss": 0.406, "step": 4651 }, { "epoch": 0.27, "grad_norm": 0.2688587493026527, "learning_rate": 1.7190438507670337e-05, "loss": 0.1869, "step": 4652 }, { "epoch": 0.27, "grad_norm": 0.36163058971972184, "learning_rate": 1.7189145114599188e-05, "loss": 0.3013, "step": 4653 }, { "epoch": 0.27, "grad_norm": 0.787712368896139, "learning_rate": 1.718785147256825e-05, "loss": 0.4917, "step": 4654 }, { "epoch": 0.27, "grad_norm": 0.3618209143003872, "learning_rate": 1.7186557581622327e-05, "loss": 0.3338, "step": 4655 }, { "epoch": 0.27, "grad_norm": 0.32715965622544163, "learning_rate": 1.7185263441806227e-05, "loss": 0.2166, "step": 4656 }, { "epoch": 0.27, "grad_norm": 0.5405807570579455, "learning_rate": 1.7183969053164757e-05, "loss": 0.3873, "step": 4657 }, { "epoch": 0.27, "grad_norm": 0.33734823448871526, "learning_rate": 1.718267441574275e-05, "loss": 0.2136, "step": 4658 }, { "epoch": 0.27, "grad_norm": 0.41124631225255276, "learning_rate": 1.718137952958504e-05, "loss": 0.2576, "step": 4659 }, { "epoch": 0.27, "grad_norm": 0.372867877130881, "learning_rate": 1.7180084394736464e-05, "loss": 0.3317, "step": 4660 }, { "epoch": 0.27, "grad_norm": 1.2657038079052518, "learning_rate": 1.717878901124187e-05, "loss": 0.8484, "step": 4661 }, { "epoch": 0.27, "grad_norm": 0.30325582500593035, "learning_rate": 1.7177493379146123e-05, "loss": 0.1239, "step": 4662 }, { "epoch": 0.27, "grad_norm": 0.32165604472772885, "learning_rate": 1.717619749849409e-05, "loss": 0.296, "step": 4663 }, { "epoch": 0.27, "grad_norm": 0.30992102992511755, "learning_rate": 1.7174901369330648e-05, "loss": 0.2242, "step": 4664 }, { "epoch": 0.27, "grad_norm": 0.48324249109187434, "learning_rate": 1.7173604991700678e-05, "loss": 0.363, "step": 4665 }, { "epoch": 0.27, "grad_norm": 0.403222549687474, "learning_rate": 1.7172308365649077e-05, "loss": 0.2954, "step": 4666 }, { "epoch": 0.27, "grad_norm": 0.45450122410769483, "learning_rate": 1.7171011491220744e-05, "loss": 0.3464, "step": 4667 }, { "epoch": 0.27, "grad_norm": 0.6262524047569294, "learning_rate": 1.7169714368460593e-05, "loss": 0.3987, "step": 4668 }, { "epoch": 0.27, "grad_norm": 0.40588895977274775, "learning_rate": 1.716841699741354e-05, "loss": 0.241, "step": 4669 }, { "epoch": 0.27, "grad_norm": 0.7130249951084894, "learning_rate": 1.7167119378124516e-05, "loss": 0.4351, "step": 4670 }, { "epoch": 0.27, "grad_norm": 0.3442235779286332, "learning_rate": 1.7165821510638456e-05, "loss": 0.2933, "step": 4671 }, { "epoch": 0.27, "grad_norm": 0.23626658265469405, "learning_rate": 1.7164523395000304e-05, "loss": 0.1589, "step": 4672 }, { "epoch": 0.27, "grad_norm": 0.8478012547554327, "learning_rate": 1.7163225031255018e-05, "loss": 0.6467, "step": 4673 }, { "epoch": 0.27, "grad_norm": 0.5128732762763075, "learning_rate": 1.7161926419447555e-05, "loss": 0.3303, "step": 4674 }, { "epoch": 0.27, "grad_norm": 0.3949461464032854, "learning_rate": 1.7160627559622888e-05, "loss": 0.2547, "step": 4675 }, { "epoch": 0.27, "grad_norm": 0.5768859794534354, "learning_rate": 1.7159328451825995e-05, "loss": 0.3725, "step": 4676 }, { "epoch": 0.27, "grad_norm": 0.4045747712340561, "learning_rate": 1.7158029096101868e-05, "loss": 0.2743, "step": 4677 }, { "epoch": 0.27, "grad_norm": 0.31346151516334575, "learning_rate": 1.71567294924955e-05, "loss": 0.2506, "step": 4678 }, { "epoch": 0.27, "grad_norm": 0.38541238161229885, "learning_rate": 1.71554296410519e-05, "loss": 0.281, "step": 4679 }, { "epoch": 0.27, "grad_norm": 0.8739729047054263, "learning_rate": 1.7154129541816078e-05, "loss": 0.509, "step": 4680 }, { "epoch": 0.27, "grad_norm": 0.38755430202848945, "learning_rate": 1.7152829194833054e-05, "loss": 0.2878, "step": 4681 }, { "epoch": 0.27, "grad_norm": 0.7597051840219803, "learning_rate": 1.7151528600147868e-05, "loss": 0.3923, "step": 4682 }, { "epoch": 0.27, "grad_norm": 0.3355241879573539, "learning_rate": 1.7150227757805552e-05, "loss": 0.3151, "step": 4683 }, { "epoch": 0.27, "grad_norm": 0.25231565932966504, "learning_rate": 1.7148926667851156e-05, "loss": 0.1974, "step": 4684 }, { "epoch": 0.27, "grad_norm": 0.9743567662925537, "learning_rate": 1.7147625330329734e-05, "loss": 0.5156, "step": 4685 }, { "epoch": 0.27, "grad_norm": 0.5801427580892613, "learning_rate": 1.714632374528636e-05, "loss": 0.4265, "step": 4686 }, { "epoch": 0.27, "grad_norm": 0.38310225366593875, "learning_rate": 1.7145021912766096e-05, "loss": 0.3218, "step": 4687 }, { "epoch": 0.27, "grad_norm": 0.5825425991209423, "learning_rate": 1.7143719832814034e-05, "loss": 0.3381, "step": 4688 }, { "epoch": 0.27, "grad_norm": 0.3340408898626788, "learning_rate": 1.714241750547526e-05, "loss": 0.239, "step": 4689 }, { "epoch": 0.27, "grad_norm": 0.3168336149753657, "learning_rate": 1.7141114930794876e-05, "loss": 0.2373, "step": 4690 }, { "epoch": 0.27, "grad_norm": 0.40724845843177176, "learning_rate": 1.7139812108817988e-05, "loss": 0.3455, "step": 4691 }, { "epoch": 0.27, "grad_norm": 0.5094991595317109, "learning_rate": 1.7138509039589713e-05, "loss": 0.2222, "step": 4692 }, { "epoch": 0.27, "grad_norm": 0.4787797053735826, "learning_rate": 1.7137205723155178e-05, "loss": 0.2925, "step": 4693 }, { "epoch": 0.27, "grad_norm": 0.7871752853141931, "learning_rate": 1.7135902159559518e-05, "loss": 0.4657, "step": 4694 }, { "epoch": 0.27, "grad_norm": 0.3929226671181522, "learning_rate": 1.713459834884787e-05, "loss": 0.2591, "step": 4695 }, { "epoch": 0.27, "grad_norm": 0.31143988193266453, "learning_rate": 1.7133294291065387e-05, "loss": 0.2568, "step": 4696 }, { "epoch": 0.27, "grad_norm": 0.5147095929397764, "learning_rate": 1.7131989986257233e-05, "loss": 0.3835, "step": 4697 }, { "epoch": 0.27, "grad_norm": 0.8803237290957555, "learning_rate": 1.713068543446857e-05, "loss": 0.337, "step": 4698 }, { "epoch": 0.27, "grad_norm": 0.32761454354627956, "learning_rate": 1.7129380635744578e-05, "loss": 0.2859, "step": 4699 }, { "epoch": 0.27, "grad_norm": 0.6789050208924245, "learning_rate": 1.712807559013044e-05, "loss": 0.5107, "step": 4700 }, { "epoch": 0.27, "grad_norm": 0.580704421074495, "learning_rate": 1.7126770297671353e-05, "loss": 0.1709, "step": 4701 }, { "epoch": 0.27, "grad_norm": 0.3050190429982249, "learning_rate": 1.7125464758412517e-05, "loss": 0.2517, "step": 4702 }, { "epoch": 0.27, "grad_norm": 0.4061107982630693, "learning_rate": 1.7124158972399142e-05, "loss": 0.3583, "step": 4703 }, { "epoch": 0.27, "grad_norm": 1.1006064066544996, "learning_rate": 1.7122852939676448e-05, "loss": 0.4624, "step": 4704 }, { "epoch": 0.27, "grad_norm": 0.41879601303777364, "learning_rate": 1.712154666028966e-05, "loss": 0.2789, "step": 4705 }, { "epoch": 0.27, "grad_norm": 0.4464716584986142, "learning_rate": 1.712024013428402e-05, "loss": 0.2636, "step": 4706 }, { "epoch": 0.27, "grad_norm": 0.3530264257968865, "learning_rate": 1.7118933361704773e-05, "loss": 0.297, "step": 4707 }, { "epoch": 0.27, "grad_norm": 0.4282023284180023, "learning_rate": 1.7117626342597168e-05, "loss": 0.2344, "step": 4708 }, { "epoch": 0.27, "grad_norm": 0.4580585344409273, "learning_rate": 1.711631907700647e-05, "loss": 0.3537, "step": 4709 }, { "epoch": 0.27, "grad_norm": 0.7468136572361314, "learning_rate": 1.711501156497794e-05, "loss": 0.3764, "step": 4710 }, { "epoch": 0.27, "grad_norm": 0.37408046619978336, "learning_rate": 1.7113703806556875e-05, "loss": 0.2427, "step": 4711 }, { "epoch": 0.27, "grad_norm": 0.49604510274681346, "learning_rate": 1.711239580178855e-05, "loss": 0.3652, "step": 4712 }, { "epoch": 0.27, "grad_norm": 1.1377881037810844, "learning_rate": 1.7111087550718265e-05, "loss": 0.6517, "step": 4713 }, { "epoch": 0.27, "grad_norm": 0.3778912354875147, "learning_rate": 1.7109779053391322e-05, "loss": 0.2191, "step": 4714 }, { "epoch": 0.27, "grad_norm": 0.3796078306581816, "learning_rate": 1.710847030985304e-05, "loss": 0.3385, "step": 4715 }, { "epoch": 0.27, "grad_norm": 0.6783617967841638, "learning_rate": 1.710716132014873e-05, "loss": 0.4694, "step": 4716 }, { "epoch": 0.27, "grad_norm": 0.3984794551445479, "learning_rate": 1.7105852084323736e-05, "loss": 0.2969, "step": 4717 }, { "epoch": 0.27, "grad_norm": 0.28023283128766, "learning_rate": 1.7104542602423385e-05, "loss": 0.0739, "step": 4718 }, { "epoch": 0.27, "grad_norm": 0.365961154130174, "learning_rate": 1.710323287449303e-05, "loss": 0.3137, "step": 4719 }, { "epoch": 0.27, "grad_norm": 0.3846054233012314, "learning_rate": 1.710192290057803e-05, "loss": 0.2914, "step": 4720 }, { "epoch": 0.27, "grad_norm": 0.7279304764082829, "learning_rate": 1.7100612680723744e-05, "loss": 0.4087, "step": 4721 }, { "epoch": 0.27, "grad_norm": 0.35474730728888954, "learning_rate": 1.7099302214975545e-05, "loss": 0.3523, "step": 4722 }, { "epoch": 0.27, "grad_norm": 0.35652743317062197, "learning_rate": 1.7097991503378812e-05, "loss": 0.2756, "step": 4723 }, { "epoch": 0.27, "grad_norm": 0.17111219321848872, "learning_rate": 1.7096680545978946e-05, "loss": 0.0714, "step": 4724 }, { "epoch": 0.27, "grad_norm": 0.7528476749497117, "learning_rate": 1.709536934282133e-05, "loss": 0.4266, "step": 4725 }, { "epoch": 0.27, "grad_norm": 0.4257309461122103, "learning_rate": 1.7094057893951385e-05, "loss": 0.306, "step": 4726 }, { "epoch": 0.27, "grad_norm": 0.40249958959659726, "learning_rate": 1.709274619941452e-05, "loss": 0.3084, "step": 4727 }, { "epoch": 0.27, "grad_norm": 0.6821814944692812, "learning_rate": 1.7091434259256155e-05, "loss": 0.4543, "step": 4728 }, { "epoch": 0.27, "grad_norm": 0.4068026712533653, "learning_rate": 1.7090122073521726e-05, "loss": 0.2961, "step": 4729 }, { "epoch": 0.27, "grad_norm": 0.30780879761690466, "learning_rate": 1.7088809642256677e-05, "loss": 0.2363, "step": 4730 }, { "epoch": 0.27, "grad_norm": 0.4450334908612994, "learning_rate": 1.7087496965506457e-05, "loss": 0.2771, "step": 4731 }, { "epoch": 0.27, "grad_norm": 0.3757471843854514, "learning_rate": 1.7086184043316518e-05, "loss": 0.2816, "step": 4732 }, { "epoch": 0.27, "grad_norm": 0.718898235374059, "learning_rate": 1.7084870875732332e-05, "loss": 0.4816, "step": 4733 }, { "epoch": 0.27, "grad_norm": 0.35620390057471335, "learning_rate": 1.708355746279937e-05, "loss": 0.2784, "step": 4734 }, { "epoch": 0.27, "grad_norm": 0.3257456059274286, "learning_rate": 1.7082243804563123e-05, "loss": 0.2776, "step": 4735 }, { "epoch": 0.27, "grad_norm": 0.2670452463661605, "learning_rate": 1.7080929901069076e-05, "loss": 0.163, "step": 4736 }, { "epoch": 0.27, "grad_norm": 0.6344362497021302, "learning_rate": 1.7079615752362727e-05, "loss": 0.3661, "step": 4737 }, { "epoch": 0.27, "grad_norm": 0.35318412416104933, "learning_rate": 1.707830135848959e-05, "loss": 0.2906, "step": 4738 }, { "epoch": 0.27, "grad_norm": 0.415746007004765, "learning_rate": 1.7076986719495184e-05, "loss": 0.3621, "step": 4739 }, { "epoch": 0.27, "grad_norm": 0.9097433398595413, "learning_rate": 1.7075671835425032e-05, "loss": 0.6311, "step": 4740 }, { "epoch": 0.27, "grad_norm": 0.30654017642837456, "learning_rate": 1.7074356706324668e-05, "loss": 0.22, "step": 4741 }, { "epoch": 0.27, "grad_norm": 0.2861658906239352, "learning_rate": 1.7073041332239634e-05, "loss": 0.1787, "step": 4742 }, { "epoch": 0.27, "grad_norm": 0.38805970077000235, "learning_rate": 1.7071725713215483e-05, "loss": 0.3573, "step": 4743 }, { "epoch": 0.27, "grad_norm": 0.35965465826234444, "learning_rate": 1.7070409849297774e-05, "loss": 0.216, "step": 4744 }, { "epoch": 0.27, "grad_norm": 0.7013740357821047, "learning_rate": 1.7069093740532083e-05, "loss": 0.4085, "step": 4745 }, { "epoch": 0.27, "grad_norm": 0.3788364821404023, "learning_rate": 1.706777738696397e-05, "loss": 0.3364, "step": 4746 }, { "epoch": 0.27, "grad_norm": 0.33608417673074076, "learning_rate": 1.7066460788639035e-05, "loss": 0.2014, "step": 4747 }, { "epoch": 0.27, "grad_norm": 0.2977740305003241, "learning_rate": 1.7065143945602867e-05, "loss": 0.1881, "step": 4748 }, { "epoch": 0.27, "grad_norm": 0.6092323032794157, "learning_rate": 1.7063826857901066e-05, "loss": 0.4919, "step": 4749 }, { "epoch": 0.27, "grad_norm": 0.35754157093924055, "learning_rate": 1.7062509525579244e-05, "loss": 0.2416, "step": 4750 }, { "epoch": 0.27, "grad_norm": 0.3915972606090248, "learning_rate": 1.7061191948683024e-05, "loss": 0.3557, "step": 4751 }, { "epoch": 0.27, "grad_norm": 1.2231230334847767, "learning_rate": 1.7059874127258028e-05, "loss": 0.7646, "step": 4752 }, { "epoch": 0.27, "grad_norm": 0.3921839884714695, "learning_rate": 1.7058556061349894e-05, "loss": 0.3111, "step": 4753 }, { "epoch": 0.27, "grad_norm": 0.2249213774799527, "learning_rate": 1.705723775100427e-05, "loss": 0.1823, "step": 4754 }, { "epoch": 0.27, "grad_norm": 0.46286782233072077, "learning_rate": 1.7055919196266806e-05, "loss": 0.3736, "step": 4755 }, { "epoch": 0.27, "grad_norm": 0.441673470781393, "learning_rate": 1.7054600397183162e-05, "loss": 0.3233, "step": 4756 }, { "epoch": 0.27, "grad_norm": 0.5357999621963078, "learning_rate": 1.705328135379901e-05, "loss": 0.2646, "step": 4757 }, { "epoch": 0.27, "grad_norm": 0.4294620287968328, "learning_rate": 1.7051962066160027e-05, "loss": 0.3657, "step": 4758 }, { "epoch": 0.27, "grad_norm": 0.36051927885640783, "learning_rate": 1.7050642534311904e-05, "loss": 0.2862, "step": 4759 }, { "epoch": 0.27, "grad_norm": 0.3010743262062142, "learning_rate": 1.704932275830033e-05, "loss": 0.1729, "step": 4760 }, { "epoch": 0.27, "grad_norm": 0.4334278974070254, "learning_rate": 1.704800273817101e-05, "loss": 0.3478, "step": 4761 }, { "epoch": 0.27, "grad_norm": 0.4264062843363303, "learning_rate": 1.7046682473969664e-05, "loss": 0.2949, "step": 4762 }, { "epoch": 0.27, "grad_norm": 0.4010993186213262, "learning_rate": 1.7045361965742004e-05, "loss": 0.2649, "step": 4763 }, { "epoch": 0.27, "grad_norm": 1.2493476218292963, "learning_rate": 1.704404121353376e-05, "loss": 0.7307, "step": 4764 }, { "epoch": 0.27, "grad_norm": 0.5374611777732092, "learning_rate": 1.7042720217390677e-05, "loss": 0.3441, "step": 4765 }, { "epoch": 0.27, "grad_norm": 0.35668982665157084, "learning_rate": 1.7041398977358494e-05, "loss": 0.2937, "step": 4766 }, { "epoch": 0.27, "grad_norm": 0.5150162023118233, "learning_rate": 1.7040077493482964e-05, "loss": 0.3482, "step": 4767 }, { "epoch": 0.27, "grad_norm": 0.3524313571951868, "learning_rate": 1.7038755765809857e-05, "loss": 0.2136, "step": 4768 }, { "epoch": 0.27, "grad_norm": 0.3518307329267539, "learning_rate": 1.7037433794384938e-05, "loss": 0.2492, "step": 4769 }, { "epoch": 0.27, "grad_norm": 0.37417425107718427, "learning_rate": 1.7036111579253992e-05, "loss": 0.2956, "step": 4770 }, { "epoch": 0.27, "grad_norm": 0.3943110391556108, "learning_rate": 1.70347891204628e-05, "loss": 0.3086, "step": 4771 }, { "epoch": 0.27, "grad_norm": 0.4517027039596723, "learning_rate": 1.7033466418057166e-05, "loss": 0.3716, "step": 4772 }, { "epoch": 0.27, "grad_norm": 0.44044704690746816, "learning_rate": 1.7032143472082893e-05, "loss": 0.2414, "step": 4773 }, { "epoch": 0.27, "grad_norm": 0.4506278934272334, "learning_rate": 1.7030820282585795e-05, "loss": 0.256, "step": 4774 }, { "epoch": 0.27, "grad_norm": 0.39868821005548993, "learning_rate": 1.7029496849611687e-05, "loss": 0.2626, "step": 4775 }, { "epoch": 0.27, "grad_norm": 1.3150474383898678, "learning_rate": 1.702817317320641e-05, "loss": 0.5298, "step": 4776 }, { "epoch": 0.27, "grad_norm": 0.3613155978762499, "learning_rate": 1.70268492534158e-05, "loss": 0.2521, "step": 4777 }, { "epoch": 0.27, "grad_norm": 0.460323545686999, "learning_rate": 1.70255250902857e-05, "loss": 0.3628, "step": 4778 }, { "epoch": 0.27, "grad_norm": 0.5444376532180347, "learning_rate": 1.702420068386197e-05, "loss": 0.3987, "step": 4779 }, { "epoch": 0.27, "grad_norm": 0.26635586243885684, "learning_rate": 1.7022876034190468e-05, "loss": 0.0774, "step": 4780 }, { "epoch": 0.27, "grad_norm": 0.3994512255967334, "learning_rate": 1.7021551141317075e-05, "loss": 0.2613, "step": 4781 }, { "epoch": 0.27, "grad_norm": 0.5373762784077355, "learning_rate": 1.7020226005287665e-05, "loss": 0.3608, "step": 4782 }, { "epoch": 0.27, "grad_norm": 0.5289576791227809, "learning_rate": 1.701890062614813e-05, "loss": 0.19, "step": 4783 }, { "epoch": 0.27, "grad_norm": 0.40705610275602827, "learning_rate": 1.7017575003944374e-05, "loss": 0.3555, "step": 4784 }, { "epoch": 0.27, "grad_norm": 0.8059748782716618, "learning_rate": 1.7016249138722295e-05, "loss": 0.5116, "step": 4785 }, { "epoch": 0.27, "grad_norm": 0.3090383888410909, "learning_rate": 1.7014923030527808e-05, "loss": 0.1888, "step": 4786 }, { "epoch": 0.28, "grad_norm": 0.3782301763328678, "learning_rate": 1.701359667940684e-05, "loss": 0.284, "step": 4787 }, { "epoch": 0.28, "grad_norm": 1.3901516550893385, "learning_rate": 1.7012270085405317e-05, "loss": 0.8495, "step": 4788 }, { "epoch": 0.28, "grad_norm": 0.4533944763836812, "learning_rate": 1.7010943248569185e-05, "loss": 0.2694, "step": 4789 }, { "epoch": 0.28, "grad_norm": 0.4358691412921533, "learning_rate": 1.700961616894439e-05, "loss": 0.3246, "step": 4790 }, { "epoch": 0.28, "grad_norm": 1.2521917339608342, "learning_rate": 1.7008288846576886e-05, "loss": 0.5989, "step": 4791 }, { "epoch": 0.28, "grad_norm": 0.3808308096968665, "learning_rate": 1.7006961281512645e-05, "loss": 0.251, "step": 4792 }, { "epoch": 0.28, "grad_norm": 0.46592574712611845, "learning_rate": 1.7005633473797632e-05, "loss": 0.2673, "step": 4793 }, { "epoch": 0.28, "grad_norm": 0.3932673619699627, "learning_rate": 1.7004305423477835e-05, "loss": 0.3008, "step": 4794 }, { "epoch": 0.28, "grad_norm": 0.6720359832254608, "learning_rate": 1.700297713059924e-05, "loss": 0.374, "step": 4795 }, { "epoch": 0.28, "grad_norm": 0.38595536528371815, "learning_rate": 1.700164859520785e-05, "loss": 0.2642, "step": 4796 }, { "epoch": 0.28, "grad_norm": 0.525812046298573, "learning_rate": 1.7000319817349673e-05, "loss": 0.3478, "step": 4797 }, { "epoch": 0.28, "grad_norm": 0.39726507148120443, "learning_rate": 1.699899079707072e-05, "loss": 0.3034, "step": 4798 }, { "epoch": 0.28, "grad_norm": 0.38302101200138, "learning_rate": 1.6997661534417015e-05, "loss": 0.2645, "step": 4799 }, { "epoch": 0.28, "grad_norm": 0.45339021595960577, "learning_rate": 1.699633202943459e-05, "loss": 0.3409, "step": 4800 }, { "epoch": 0.28, "grad_norm": 0.6208504710150762, "learning_rate": 1.699500228216949e-05, "loss": 0.4191, "step": 4801 }, { "epoch": 0.28, "grad_norm": 0.3185182893708112, "learning_rate": 1.6993672292667766e-05, "loss": 0.2207, "step": 4802 }, { "epoch": 0.28, "grad_norm": 1.197840212134981, "learning_rate": 1.6992342060975467e-05, "loss": 0.6374, "step": 4803 }, { "epoch": 0.28, "grad_norm": 0.7354342772061968, "learning_rate": 1.6991011587138665e-05, "loss": 0.4787, "step": 4804 }, { "epoch": 0.28, "grad_norm": 0.4419399786931295, "learning_rate": 1.698968087120343e-05, "loss": 0.3406, "step": 4805 }, { "epoch": 0.28, "grad_norm": 0.5150312372588404, "learning_rate": 1.6988349913215848e-05, "loss": 0.2817, "step": 4806 }, { "epoch": 0.28, "grad_norm": 0.6398701761513857, "learning_rate": 1.698701871322201e-05, "loss": 0.3152, "step": 4807 }, { "epoch": 0.28, "grad_norm": 0.2889587602038658, "learning_rate": 1.698568727126801e-05, "loss": 0.2323, "step": 4808 }, { "epoch": 0.28, "grad_norm": 0.46663731677113146, "learning_rate": 1.6984355587399964e-05, "loss": 0.284, "step": 4809 }, { "epoch": 0.28, "grad_norm": 0.44856779416870474, "learning_rate": 1.6983023661663987e-05, "loss": 0.3398, "step": 4810 }, { "epoch": 0.28, "grad_norm": 0.5213901240015865, "learning_rate": 1.6981691494106196e-05, "loss": 0.3272, "step": 4811 }, { "epoch": 0.28, "grad_norm": 1.055086036269047, "learning_rate": 1.698035908477273e-05, "loss": 0.4127, "step": 4812 }, { "epoch": 0.28, "grad_norm": 0.4546873145751855, "learning_rate": 1.697902643370973e-05, "loss": 0.3177, "step": 4813 }, { "epoch": 0.28, "grad_norm": 0.4027812289256814, "learning_rate": 1.6977693540963347e-05, "loss": 0.3157, "step": 4814 }, { "epoch": 0.28, "grad_norm": 0.2686362672041927, "learning_rate": 1.6976360406579734e-05, "loss": 0.1518, "step": 4815 }, { "epoch": 0.28, "grad_norm": 0.7597687415584575, "learning_rate": 1.697502703060506e-05, "loss": 0.4421, "step": 4816 }, { "epoch": 0.28, "grad_norm": 0.44251032893440007, "learning_rate": 1.69736934130855e-05, "loss": 0.2877, "step": 4817 }, { "epoch": 0.28, "grad_norm": 0.5865777905310624, "learning_rate": 1.6972359554067237e-05, "loss": 0.3693, "step": 4818 }, { "epoch": 0.28, "grad_norm": 0.8136374248949291, "learning_rate": 1.6971025453596463e-05, "loss": 0.3066, "step": 4819 }, { "epoch": 0.28, "grad_norm": 0.2770444473086494, "learning_rate": 1.6969691111719377e-05, "loss": 0.2053, "step": 4820 }, { "epoch": 0.28, "grad_norm": 0.5373847607276323, "learning_rate": 1.6968356528482187e-05, "loss": 0.399, "step": 4821 }, { "epoch": 0.28, "grad_norm": 0.4172327112811106, "learning_rate": 1.696702170393111e-05, "loss": 0.225, "step": 4822 }, { "epoch": 0.28, "grad_norm": 0.4781253488053807, "learning_rate": 1.6965686638112373e-05, "loss": 0.308, "step": 4823 }, { "epoch": 0.28, "grad_norm": 1.2234511064967395, "learning_rate": 1.6964351331072205e-05, "loss": 0.4584, "step": 4824 }, { "epoch": 0.28, "grad_norm": 0.4180514861148252, "learning_rate": 1.696301578285685e-05, "loss": 0.2807, "step": 4825 }, { "epoch": 0.28, "grad_norm": 0.2841258483758155, "learning_rate": 1.696167999351256e-05, "loss": 0.2448, "step": 4826 }, { "epoch": 0.28, "grad_norm": 0.459516630611072, "learning_rate": 1.6960343963085587e-05, "loss": 0.2917, "step": 4827 }, { "epoch": 0.28, "grad_norm": 1.0542058454902612, "learning_rate": 1.6959007691622206e-05, "loss": 0.5435, "step": 4828 }, { "epoch": 0.28, "grad_norm": 0.3935580375131141, "learning_rate": 1.6957671179168687e-05, "loss": 0.2066, "step": 4829 }, { "epoch": 0.28, "grad_norm": 0.5260916371309337, "learning_rate": 1.695633442577131e-05, "loss": 0.349, "step": 4830 }, { "epoch": 0.28, "grad_norm": 0.6822341191380212, "learning_rate": 1.6954997431476376e-05, "loss": 0.4439, "step": 4831 }, { "epoch": 0.28, "grad_norm": 0.34931542652822906, "learning_rate": 1.695366019633018e-05, "loss": 0.2223, "step": 4832 }, { "epoch": 0.28, "grad_norm": 0.28818157760600444, "learning_rate": 1.695232272037903e-05, "loss": 0.2245, "step": 4833 }, { "epoch": 0.28, "grad_norm": 0.44886495313474906, "learning_rate": 1.695098500366924e-05, "loss": 0.3747, "step": 4834 }, { "epoch": 0.28, "grad_norm": 0.376489095537162, "learning_rate": 1.694964704624714e-05, "loss": 0.257, "step": 4835 }, { "epoch": 0.28, "grad_norm": 0.6917611206680756, "learning_rate": 1.6948308848159064e-05, "loss": 0.5613, "step": 4836 }, { "epoch": 0.28, "grad_norm": 0.41105937337798043, "learning_rate": 1.694697040945135e-05, "loss": 0.3446, "step": 4837 }, { "epoch": 0.28, "grad_norm": 0.3334884198947588, "learning_rate": 1.694563173017035e-05, "loss": 0.225, "step": 4838 }, { "epoch": 0.28, "grad_norm": 0.3046497158528754, "learning_rate": 1.694429281036242e-05, "loss": 0.1881, "step": 4839 }, { "epoch": 0.28, "grad_norm": 0.6787872188789502, "learning_rate": 1.6942953650073926e-05, "loss": 0.4572, "step": 4840 }, { "epoch": 0.28, "grad_norm": 0.3605281610226177, "learning_rate": 1.6941614249351252e-05, "loss": 0.3004, "step": 4841 }, { "epoch": 0.28, "grad_norm": 0.4345068829504192, "learning_rate": 1.6940274608240773e-05, "loss": 0.2796, "step": 4842 }, { "epoch": 0.28, "grad_norm": 1.0070853290732158, "learning_rate": 1.693893472678888e-05, "loss": 0.6626, "step": 4843 }, { "epoch": 0.28, "grad_norm": 0.3629286253161658, "learning_rate": 1.693759460504198e-05, "loss": 0.2631, "step": 4844 }, { "epoch": 0.28, "grad_norm": 0.2677448034312148, "learning_rate": 1.6936254243046472e-05, "loss": 0.152, "step": 4845 }, { "epoch": 0.28, "grad_norm": 0.6535965345558263, "learning_rate": 1.6934913640848782e-05, "loss": 0.3904, "step": 4846 }, { "epoch": 0.28, "grad_norm": 0.3450106487612924, "learning_rate": 1.6933572798495327e-05, "loss": 0.2696, "step": 4847 }, { "epoch": 0.28, "grad_norm": 0.8901525449936295, "learning_rate": 1.6932231716032548e-05, "loss": 0.463, "step": 4848 }, { "epoch": 0.28, "grad_norm": 0.35316268141975193, "learning_rate": 1.6930890393506882e-05, "loss": 0.3056, "step": 4849 }, { "epoch": 0.28, "grad_norm": 0.37333332234721345, "learning_rate": 1.692954883096478e-05, "loss": 0.314, "step": 4850 }, { "epoch": 0.28, "grad_norm": 0.26962638573625075, "learning_rate": 1.6928207028452698e-05, "loss": 0.1461, "step": 4851 }, { "epoch": 0.28, "grad_norm": 0.7124643097673313, "learning_rate": 1.6926864986017105e-05, "loss": 0.4647, "step": 4852 }, { "epoch": 0.28, "grad_norm": 0.3938996265220362, "learning_rate": 1.6925522703704475e-05, "loss": 0.3013, "step": 4853 }, { "epoch": 0.28, "grad_norm": 0.4226078356649217, "learning_rate": 1.6924180181561297e-05, "loss": 0.3757, "step": 4854 }, { "epoch": 0.28, "grad_norm": 0.993544391815923, "learning_rate": 1.6922837419634052e-05, "loss": 0.4262, "step": 4855 }, { "epoch": 0.28, "grad_norm": 0.37675799254882103, "learning_rate": 1.6921494417969245e-05, "loss": 0.2918, "step": 4856 }, { "epoch": 0.28, "grad_norm": 0.3003610120862018, "learning_rate": 1.6920151176613383e-05, "loss": 0.2703, "step": 4857 }, { "epoch": 0.28, "grad_norm": 0.5360807413545341, "learning_rate": 1.6918807695612984e-05, "loss": 0.2786, "step": 4858 }, { "epoch": 0.28, "grad_norm": 0.37804769698458474, "learning_rate": 1.6917463975014575e-05, "loss": 0.2754, "step": 4859 }, { "epoch": 0.28, "grad_norm": 1.1356673660352166, "learning_rate": 1.691612001486468e-05, "loss": 0.6455, "step": 4860 }, { "epoch": 0.28, "grad_norm": 0.3493864639681745, "learning_rate": 1.6914775815209853e-05, "loss": 0.2859, "step": 4861 }, { "epoch": 0.28, "grad_norm": 0.37809936495877855, "learning_rate": 1.6913431376096633e-05, "loss": 0.2852, "step": 4862 }, { "epoch": 0.28, "grad_norm": 0.7970041802176591, "learning_rate": 1.6912086697571584e-05, "loss": 0.5109, "step": 4863 }, { "epoch": 0.28, "grad_norm": 0.2716805871139541, "learning_rate": 1.6910741779681264e-05, "loss": 0.2215, "step": 4864 }, { "epoch": 0.28, "grad_norm": 0.4244076834143325, "learning_rate": 1.690939662247226e-05, "loss": 0.2942, "step": 4865 }, { "epoch": 0.28, "grad_norm": 0.5023134654075716, "learning_rate": 1.6908051225991146e-05, "loss": 0.3721, "step": 4866 }, { "epoch": 0.28, "grad_norm": 1.2878202571798292, "learning_rate": 1.6906705590284517e-05, "loss": 0.8234, "step": 4867 }, { "epoch": 0.28, "grad_norm": 0.37248189574940166, "learning_rate": 1.690535971539897e-05, "loss": 0.1709, "step": 4868 }, { "epoch": 0.28, "grad_norm": 0.4067382861706183, "learning_rate": 1.690401360138111e-05, "loss": 0.3374, "step": 4869 }, { "epoch": 0.28, "grad_norm": 0.316105557965605, "learning_rate": 1.6902667248277557e-05, "loss": 0.2383, "step": 4870 }, { "epoch": 0.28, "grad_norm": 0.38753037141289065, "learning_rate": 1.6901320656134935e-05, "loss": 0.2109, "step": 4871 }, { "epoch": 0.28, "grad_norm": 0.9724087834349608, "learning_rate": 1.6899973824999872e-05, "loss": 0.7267, "step": 4872 }, { "epoch": 0.28, "grad_norm": 0.3812345170205305, "learning_rate": 1.6898626754919018e-05, "loss": 0.3212, "step": 4873 }, { "epoch": 0.28, "grad_norm": 0.3219187996146285, "learning_rate": 1.6897279445939012e-05, "loss": 0.1968, "step": 4874 }, { "epoch": 0.28, "grad_norm": 0.6624262246155297, "learning_rate": 1.6895931898106517e-05, "loss": 0.4522, "step": 4875 }, { "epoch": 0.28, "grad_norm": 0.47937324578610235, "learning_rate": 1.6894584111468196e-05, "loss": 0.2651, "step": 4876 }, { "epoch": 0.28, "grad_norm": 0.2998688492477607, "learning_rate": 1.6893236086070722e-05, "loss": 0.2302, "step": 4877 }, { "epoch": 0.28, "grad_norm": 0.4740529224435299, "learning_rate": 1.6891887821960783e-05, "loss": 0.299, "step": 4878 }, { "epoch": 0.28, "grad_norm": 0.7311170213555515, "learning_rate": 1.689053931918506e-05, "loss": 0.5443, "step": 4879 }, { "epoch": 0.28, "grad_norm": 0.41467295139387256, "learning_rate": 1.688919057779026e-05, "loss": 0.3303, "step": 4880 }, { "epoch": 0.28, "grad_norm": 0.3324366508784158, "learning_rate": 1.6887841597823088e-05, "loss": 0.2817, "step": 4881 }, { "epoch": 0.28, "grad_norm": 0.4272022615661138, "learning_rate": 1.6886492379330254e-05, "loss": 0.2808, "step": 4882 }, { "epoch": 0.28, "grad_norm": 0.29255807880365126, "learning_rate": 1.6885142922358486e-05, "loss": 0.2237, "step": 4883 }, { "epoch": 0.28, "grad_norm": 0.8529213592258498, "learning_rate": 1.6883793226954516e-05, "loss": 0.5751, "step": 4884 }, { "epoch": 0.28, "grad_norm": 0.5232124412222744, "learning_rate": 1.6882443293165083e-05, "loss": 0.3541, "step": 4885 }, { "epoch": 0.28, "grad_norm": 0.513732509312485, "learning_rate": 1.6881093121036933e-05, "loss": 0.3429, "step": 4886 }, { "epoch": 0.28, "grad_norm": 0.43596299045855724, "learning_rate": 1.6879742710616826e-05, "loss": 0.3057, "step": 4887 }, { "epoch": 0.28, "grad_norm": 0.33911296956631076, "learning_rate": 1.6878392061951525e-05, "loss": 0.2385, "step": 4888 }, { "epoch": 0.28, "grad_norm": 0.40006287993866735, "learning_rate": 1.6877041175087802e-05, "loss": 0.2909, "step": 4889 }, { "epoch": 0.28, "grad_norm": 0.43421895746675027, "learning_rate": 1.6875690050072435e-05, "loss": 0.3087, "step": 4890 }, { "epoch": 0.28, "grad_norm": 0.4299156983242811, "learning_rate": 1.687433868695222e-05, "loss": 0.3169, "step": 4891 }, { "epoch": 0.28, "grad_norm": 0.38431969838964786, "learning_rate": 1.687298708577395e-05, "loss": 0.3265, "step": 4892 }, { "epoch": 0.28, "grad_norm": 0.4932553865657155, "learning_rate": 1.687163524658444e-05, "loss": 0.3143, "step": 4893 }, { "epoch": 0.28, "grad_norm": 0.6671294183699192, "learning_rate": 1.687028316943049e-05, "loss": 0.3774, "step": 4894 }, { "epoch": 0.28, "grad_norm": 0.5398718612910458, "learning_rate": 1.686893085435893e-05, "loss": 0.2871, "step": 4895 }, { "epoch": 0.28, "grad_norm": 0.5618030739271445, "learning_rate": 1.686757830141659e-05, "loss": 0.435, "step": 4896 }, { "epoch": 0.28, "grad_norm": 0.4098895772036004, "learning_rate": 1.6866225510650312e-05, "loss": 0.3044, "step": 4897 }, { "epoch": 0.28, "grad_norm": 0.3133046926958272, "learning_rate": 1.686487248210694e-05, "loss": 0.2384, "step": 4898 }, { "epoch": 0.28, "grad_norm": 0.38071433829603696, "learning_rate": 1.6863519215833327e-05, "loss": 0.2787, "step": 4899 }, { "epoch": 0.28, "grad_norm": 0.4427576937958323, "learning_rate": 1.686216571187634e-05, "loss": 0.2594, "step": 4900 }, { "epoch": 0.28, "grad_norm": 0.37213118554343044, "learning_rate": 1.6860811970282844e-05, "loss": 0.2937, "step": 4901 }, { "epoch": 0.28, "grad_norm": 0.5833133836609231, "learning_rate": 1.685945799109973e-05, "loss": 0.3999, "step": 4902 }, { "epoch": 0.28, "grad_norm": 0.6780494801302973, "learning_rate": 1.6858103774373877e-05, "loss": 0.5117, "step": 4903 }, { "epoch": 0.28, "grad_norm": 0.24541000634477722, "learning_rate": 1.685674932015219e-05, "loss": 0.0744, "step": 4904 }, { "epoch": 0.28, "grad_norm": 0.3375440873846795, "learning_rate": 1.6855394628481565e-05, "loss": 0.3181, "step": 4905 }, { "epoch": 0.28, "grad_norm": 0.8924703502539542, "learning_rate": 1.6854039699408923e-05, "loss": 0.5844, "step": 4906 }, { "epoch": 0.28, "grad_norm": 0.4013162255792, "learning_rate": 1.6852684532981176e-05, "loss": 0.1698, "step": 4907 }, { "epoch": 0.28, "grad_norm": 0.442763641896375, "learning_rate": 1.685132912924526e-05, "loss": 0.328, "step": 4908 }, { "epoch": 0.28, "grad_norm": 0.48415946618162076, "learning_rate": 1.684997348824811e-05, "loss": 0.3534, "step": 4909 }, { "epoch": 0.28, "grad_norm": 0.27868722935079016, "learning_rate": 1.6848617610036676e-05, "loss": 0.0971, "step": 4910 }, { "epoch": 0.28, "grad_norm": 0.32980783132441815, "learning_rate": 1.6847261494657904e-05, "loss": 0.2478, "step": 4911 }, { "epoch": 0.28, "grad_norm": 1.1178058794694627, "learning_rate": 1.6845905142158764e-05, "loss": 0.6226, "step": 4912 }, { "epoch": 0.28, "grad_norm": 0.2863922274104645, "learning_rate": 1.6844548552586225e-05, "loss": 0.2348, "step": 4913 }, { "epoch": 0.28, "grad_norm": 0.49945881324068725, "learning_rate": 1.684319172598726e-05, "loss": 0.3969, "step": 4914 }, { "epoch": 0.28, "grad_norm": 0.7076134237344921, "learning_rate": 1.684183466240886e-05, "loss": 0.5169, "step": 4915 }, { "epoch": 0.28, "grad_norm": 0.3167088144334443, "learning_rate": 1.684047736189802e-05, "loss": 0.2335, "step": 4916 }, { "epoch": 0.28, "grad_norm": 0.3104910368581827, "learning_rate": 1.6839119824501747e-05, "loss": 0.2116, "step": 4917 }, { "epoch": 0.28, "grad_norm": 0.9284231793627837, "learning_rate": 1.6837762050267044e-05, "loss": 0.567, "step": 4918 }, { "epoch": 0.28, "grad_norm": 0.596611993917205, "learning_rate": 1.683640403924094e-05, "loss": 0.3658, "step": 4919 }, { "epoch": 0.28, "grad_norm": 0.3944841548350714, "learning_rate": 1.6835045791470453e-05, "loss": 0.2971, "step": 4920 }, { "epoch": 0.28, "grad_norm": 0.3928615059353819, "learning_rate": 1.683368730700263e-05, "loss": 0.3528, "step": 4921 }, { "epoch": 0.28, "grad_norm": 0.8844377675062224, "learning_rate": 1.6832328585884505e-05, "loss": 0.4563, "step": 4922 }, { "epoch": 0.28, "grad_norm": 0.24744733076868877, "learning_rate": 1.6830969628163134e-05, "loss": 0.1611, "step": 4923 }, { "epoch": 0.28, "grad_norm": 0.37799077238329093, "learning_rate": 1.6829610433885583e-05, "loss": 0.308, "step": 4924 }, { "epoch": 0.28, "grad_norm": 0.7391018917542396, "learning_rate": 1.6828251003098913e-05, "loss": 0.448, "step": 4925 }, { "epoch": 0.28, "grad_norm": 0.4267263829375562, "learning_rate": 1.6826891335850205e-05, "loss": 0.2828, "step": 4926 }, { "epoch": 0.28, "grad_norm": 0.9901949114209159, "learning_rate": 1.6825531432186545e-05, "loss": 0.6693, "step": 4927 }, { "epoch": 0.28, "grad_norm": 0.4338305088608627, "learning_rate": 1.682417129215502e-05, "loss": 0.3244, "step": 4928 }, { "epoch": 0.28, "grad_norm": 0.3122226230150543, "learning_rate": 1.682281091580274e-05, "loss": 0.2397, "step": 4929 }, { "epoch": 0.28, "grad_norm": 0.2536689856422566, "learning_rate": 1.682145030317681e-05, "loss": 0.1118, "step": 4930 }, { "epoch": 0.28, "grad_norm": 1.2644511808907366, "learning_rate": 1.6820089454324355e-05, "loss": 0.4325, "step": 4931 }, { "epoch": 0.28, "grad_norm": 0.3830284678075912, "learning_rate": 1.681872836929249e-05, "loss": 0.297, "step": 4932 }, { "epoch": 0.28, "grad_norm": 0.44403371374329265, "learning_rate": 1.6817367048128357e-05, "loss": 0.2896, "step": 4933 }, { "epoch": 0.28, "grad_norm": 0.6483549831795611, "learning_rate": 1.6816005490879096e-05, "loss": 0.3426, "step": 4934 }, { "epoch": 0.28, "grad_norm": 0.32440225504314685, "learning_rate": 1.6814643697591857e-05, "loss": 0.2452, "step": 4935 }, { "epoch": 0.28, "grad_norm": 0.39596665611969095, "learning_rate": 1.68132816683138e-05, "loss": 0.2387, "step": 4936 }, { "epoch": 0.28, "grad_norm": 0.9595030167316971, "learning_rate": 1.6811919403092093e-05, "loss": 0.3894, "step": 4937 }, { "epoch": 0.28, "grad_norm": 0.4468030719392794, "learning_rate": 1.6810556901973907e-05, "loss": 0.3355, "step": 4938 }, { "epoch": 0.28, "grad_norm": 0.8717707766987532, "learning_rate": 1.680919416500643e-05, "loss": 0.5147, "step": 4939 }, { "epoch": 0.28, "grad_norm": 0.38028104901258003, "learning_rate": 1.6807831192236855e-05, "loss": 0.3398, "step": 4940 }, { "epoch": 0.28, "grad_norm": 0.3862054255947419, "learning_rate": 1.6806467983712375e-05, "loss": 0.2686, "step": 4941 }, { "epoch": 0.28, "grad_norm": 0.40855243616372805, "learning_rate": 1.68051045394802e-05, "loss": 0.2499, "step": 4942 }, { "epoch": 0.28, "grad_norm": 0.7503261455917112, "learning_rate": 1.680374085958755e-05, "loss": 0.3199, "step": 4943 }, { "epoch": 0.28, "grad_norm": 0.47228792715288803, "learning_rate": 1.680237694408165e-05, "loss": 0.3086, "step": 4944 }, { "epoch": 0.28, "grad_norm": 0.5675653321231017, "learning_rate": 1.6801012793009725e-05, "loss": 0.4409, "step": 4945 }, { "epoch": 0.28, "grad_norm": 0.657462471357182, "learning_rate": 1.6799648406419018e-05, "loss": 0.2353, "step": 4946 }, { "epoch": 0.28, "grad_norm": 0.2388900030606124, "learning_rate": 1.679828378435678e-05, "loss": 0.1982, "step": 4947 }, { "epoch": 0.28, "grad_norm": 0.4841571640687985, "learning_rate": 1.6796918926870266e-05, "loss": 0.3666, "step": 4948 }, { "epoch": 0.28, "grad_norm": 0.5190146104941877, "learning_rate": 1.6795553834006746e-05, "loss": 0.2917, "step": 4949 }, { "epoch": 0.28, "grad_norm": 0.44852090196045463, "learning_rate": 1.6794188505813486e-05, "loss": 0.3222, "step": 4950 }, { "epoch": 0.28, "grad_norm": 0.985136552799657, "learning_rate": 1.679282294233777e-05, "loss": 0.622, "step": 4951 }, { "epoch": 0.28, "grad_norm": 0.32555809213851533, "learning_rate": 1.6791457143626887e-05, "loss": 0.2532, "step": 4952 }, { "epoch": 0.28, "grad_norm": 0.3561321537682771, "learning_rate": 1.679009110972814e-05, "loss": 0.2804, "step": 4953 }, { "epoch": 0.28, "grad_norm": 0.6560341934071064, "learning_rate": 1.6788724840688823e-05, "loss": 0.4473, "step": 4954 }, { "epoch": 0.28, "grad_norm": 0.31007613366907594, "learning_rate": 1.678735833655626e-05, "loss": 0.1873, "step": 4955 }, { "epoch": 0.28, "grad_norm": 0.326241133662786, "learning_rate": 1.6785991597377772e-05, "loss": 0.243, "step": 4956 }, { "epoch": 0.28, "grad_norm": 0.5177649120868529, "learning_rate": 1.6784624623200684e-05, "loss": 0.4381, "step": 4957 }, { "epoch": 0.28, "grad_norm": 1.240446473986299, "learning_rate": 1.6783257414072336e-05, "loss": 0.6064, "step": 4958 }, { "epoch": 0.28, "grad_norm": 0.33109360065083926, "learning_rate": 1.678188997004008e-05, "loss": 0.2334, "step": 4959 }, { "epoch": 0.28, "grad_norm": 0.39985346698702146, "learning_rate": 1.678052229115126e-05, "loss": 0.3504, "step": 4960 }, { "epoch": 0.29, "grad_norm": 0.5592780579901474, "learning_rate": 1.6779154377453248e-05, "loss": 0.1822, "step": 4961 }, { "epoch": 0.29, "grad_norm": 0.3496638175036046, "learning_rate": 1.677778622899341e-05, "loss": 0.2214, "step": 4962 }, { "epoch": 0.29, "grad_norm": 1.0124778899205107, "learning_rate": 1.677641784581913e-05, "loss": 0.6795, "step": 4963 }, { "epoch": 0.29, "grad_norm": 0.41286864504725357, "learning_rate": 1.6775049227977788e-05, "loss": 0.3245, "step": 4964 }, { "epoch": 0.29, "grad_norm": 0.37057078830092144, "learning_rate": 1.6773680375516784e-05, "loss": 0.2586, "step": 4965 }, { "epoch": 0.29, "grad_norm": 0.6333735841850971, "learning_rate": 1.6772311288483517e-05, "loss": 0.4853, "step": 4966 }, { "epoch": 0.29, "grad_norm": 0.4697963107259023, "learning_rate": 1.6770941966925404e-05, "loss": 0.2266, "step": 4967 }, { "epoch": 0.29, "grad_norm": 0.33424175052387356, "learning_rate": 1.676957241088986e-05, "loss": 0.2609, "step": 4968 }, { "epoch": 0.29, "grad_norm": 0.485965282810177, "learning_rate": 1.6768202620424317e-05, "loss": 0.2656, "step": 4969 }, { "epoch": 0.29, "grad_norm": 0.6859894051180101, "learning_rate": 1.6766832595576205e-05, "loss": 0.4445, "step": 4970 }, { "epoch": 0.29, "grad_norm": 0.41946713395899093, "learning_rate": 1.6765462336392975e-05, "loss": 0.2915, "step": 4971 }, { "epoch": 0.29, "grad_norm": 0.29292304755912535, "learning_rate": 1.6764091842922076e-05, "loss": 0.2806, "step": 4972 }, { "epoch": 0.29, "grad_norm": 0.41222303207216854, "learning_rate": 1.6762721115210964e-05, "loss": 0.3055, "step": 4973 }, { "epoch": 0.29, "grad_norm": 0.420655109877888, "learning_rate": 1.676135015330711e-05, "loss": 0.308, "step": 4974 }, { "epoch": 0.29, "grad_norm": 0.29319502490689703, "learning_rate": 1.675997895725799e-05, "loss": 0.1811, "step": 4975 }, { "epoch": 0.29, "grad_norm": 0.4219624190782678, "learning_rate": 1.675860752711109e-05, "loss": 0.33, "step": 4976 }, { "epoch": 0.29, "grad_norm": 0.3887698623250924, "learning_rate": 1.675723586291391e-05, "loss": 0.2888, "step": 4977 }, { "epoch": 0.29, "grad_norm": 0.6488987519940098, "learning_rate": 1.6755863964713934e-05, "loss": 0.3712, "step": 4978 }, { "epoch": 0.29, "grad_norm": 0.42002949206495394, "learning_rate": 1.675449183255868e-05, "loss": 0.3128, "step": 4979 }, { "epoch": 0.29, "grad_norm": 0.33873593445127587, "learning_rate": 1.6753119466495667e-05, "loss": 0.2737, "step": 4980 }, { "epoch": 0.29, "grad_norm": 0.334940371790023, "learning_rate": 1.6751746866572415e-05, "loss": 0.2823, "step": 4981 }, { "epoch": 0.29, "grad_norm": 0.9608719470015646, "learning_rate": 1.6750374032836462e-05, "loss": 0.3832, "step": 4982 }, { "epoch": 0.29, "grad_norm": 0.33594948667433394, "learning_rate": 1.6749000965335344e-05, "loss": 0.2858, "step": 4983 }, { "epoch": 0.29, "grad_norm": 0.3677272022726247, "learning_rate": 1.6747627664116615e-05, "loss": 0.3131, "step": 4984 }, { "epoch": 0.29, "grad_norm": 0.8046638072008366, "learning_rate": 1.674625412922783e-05, "loss": 0.4111, "step": 4985 }, { "epoch": 0.29, "grad_norm": 0.32686647655558276, "learning_rate": 1.6744880360716555e-05, "loss": 0.2732, "step": 4986 }, { "epoch": 0.29, "grad_norm": 0.41757090613796255, "learning_rate": 1.6743506358630358e-05, "loss": 0.2837, "step": 4987 }, { "epoch": 0.29, "grad_norm": 0.35935865239680986, "learning_rate": 1.674213212301683e-05, "loss": 0.268, "step": 4988 }, { "epoch": 0.29, "grad_norm": 0.4316263565610041, "learning_rate": 1.6740757653923557e-05, "loss": 0.2988, "step": 4989 }, { "epoch": 0.29, "grad_norm": 0.5887593298772523, "learning_rate": 1.673938295139814e-05, "loss": 0.3779, "step": 4990 }, { "epoch": 0.29, "grad_norm": 0.5006344489216323, "learning_rate": 1.6738008015488174e-05, "loss": 0.3939, "step": 4991 }, { "epoch": 0.29, "grad_norm": 0.32402544400838923, "learning_rate": 1.6736632846241282e-05, "loss": 0.2105, "step": 4992 }, { "epoch": 0.29, "grad_norm": 0.3172635891197995, "learning_rate": 1.6735257443705085e-05, "loss": 0.2832, "step": 4993 }, { "epoch": 0.29, "grad_norm": 0.3766031153954571, "learning_rate": 1.6733881807927212e-05, "loss": 0.3244, "step": 4994 }, { "epoch": 0.29, "grad_norm": 0.33773156689449546, "learning_rate": 1.67325059389553e-05, "loss": 0.1948, "step": 4995 }, { "epoch": 0.29, "grad_norm": 0.4008554402803368, "learning_rate": 1.6731129836837e-05, "loss": 0.3248, "step": 4996 }, { "epoch": 0.29, "grad_norm": 1.4279087807348623, "learning_rate": 1.672975350161996e-05, "loss": 0.7799, "step": 4997 }, { "epoch": 0.29, "grad_norm": 0.32856664750277276, "learning_rate": 1.672837693335185e-05, "loss": 0.2259, "step": 4998 }, { "epoch": 0.29, "grad_norm": 0.3994700065203357, "learning_rate": 1.6727000132080332e-05, "loss": 0.3668, "step": 4999 }, { "epoch": 0.29, "grad_norm": 0.4195276798735785, "learning_rate": 1.6725623097853092e-05, "loss": 0.3758, "step": 5000 }, { "epoch": 0.29, "grad_norm": 0.19978298158962446, "learning_rate": 1.672424583071781e-05, "loss": 0.1467, "step": 5001 }, { "epoch": 0.29, "grad_norm": 1.198150816066066, "learning_rate": 1.672286833072219e-05, "loss": 0.6234, "step": 5002 }, { "epoch": 0.29, "grad_norm": 0.45414769727547355, "learning_rate": 1.6721490597913922e-05, "loss": 0.3633, "step": 5003 }, { "epoch": 0.29, "grad_norm": 0.38110366694566206, "learning_rate": 1.672011263234073e-05, "loss": 0.2942, "step": 5004 }, { "epoch": 0.29, "grad_norm": 0.43945156180002193, "learning_rate": 1.671873443405032e-05, "loss": 0.3225, "step": 5005 }, { "epoch": 0.29, "grad_norm": 0.44775285754769784, "learning_rate": 1.671735600309043e-05, "loss": 0.3277, "step": 5006 }, { "epoch": 0.29, "grad_norm": 0.31152176309096835, "learning_rate": 1.6715977339508787e-05, "loss": 0.2134, "step": 5007 }, { "epoch": 0.29, "grad_norm": 0.335916270130865, "learning_rate": 1.6714598443353137e-05, "loss": 0.2523, "step": 5008 }, { "epoch": 0.29, "grad_norm": 1.2214895870371527, "learning_rate": 1.6713219314671235e-05, "loss": 0.7403, "step": 5009 }, { "epoch": 0.29, "grad_norm": 0.5000389590232583, "learning_rate": 1.6711839953510835e-05, "loss": 0.3313, "step": 5010 }, { "epoch": 0.29, "grad_norm": 0.371776258828232, "learning_rate": 1.6710460359919708e-05, "loss": 0.2937, "step": 5011 }, { "epoch": 0.29, "grad_norm": 0.3811309362080807, "learning_rate": 1.6709080533945624e-05, "loss": 0.3059, "step": 5012 }, { "epoch": 0.29, "grad_norm": 0.3476381176969048, "learning_rate": 1.6707700475636372e-05, "loss": 0.1721, "step": 5013 }, { "epoch": 0.29, "grad_norm": 0.41573768957075496, "learning_rate": 1.6706320185039742e-05, "loss": 0.2663, "step": 5014 }, { "epoch": 0.29, "grad_norm": 0.5511130497075707, "learning_rate": 1.670493966220353e-05, "loss": 0.3951, "step": 5015 }, { "epoch": 0.29, "grad_norm": 0.34083421472885944, "learning_rate": 1.6703558907175548e-05, "loss": 0.3184, "step": 5016 }, { "epoch": 0.29, "grad_norm": 0.47507592384082353, "learning_rate": 1.6702177920003607e-05, "loss": 0.4127, "step": 5017 }, { "epoch": 0.29, "grad_norm": 0.8809556008590094, "learning_rate": 1.6700796700735534e-05, "loss": 0.3604, "step": 5018 }, { "epoch": 0.29, "grad_norm": 0.3058900980432858, "learning_rate": 1.669941524941916e-05, "loss": 0.1977, "step": 5019 }, { "epoch": 0.29, "grad_norm": 0.4092102952384807, "learning_rate": 1.6698033566102322e-05, "loss": 0.2895, "step": 5020 }, { "epoch": 0.29, "grad_norm": 0.7002227774841857, "learning_rate": 1.6696651650832874e-05, "loss": 0.3808, "step": 5021 }, { "epoch": 0.29, "grad_norm": 0.39936961207401184, "learning_rate": 1.6695269503658663e-05, "loss": 0.339, "step": 5022 }, { "epoch": 0.29, "grad_norm": 0.5260095728189574, "learning_rate": 1.6693887124627556e-05, "loss": 0.3984, "step": 5023 }, { "epoch": 0.29, "grad_norm": 0.41866801996607933, "learning_rate": 1.6692504513787432e-05, "loss": 0.2768, "step": 5024 }, { "epoch": 0.29, "grad_norm": 0.2600843617318522, "learning_rate": 1.669112167118616e-05, "loss": 0.1399, "step": 5025 }, { "epoch": 0.29, "grad_norm": 0.44756851536480885, "learning_rate": 1.668973859687163e-05, "loss": 0.3325, "step": 5026 }, { "epoch": 0.29, "grad_norm": 0.42835429389794066, "learning_rate": 1.6688355290891746e-05, "loss": 0.308, "step": 5027 }, { "epoch": 0.29, "grad_norm": 0.5913982247983119, "learning_rate": 1.66869717532944e-05, "loss": 0.3365, "step": 5028 }, { "epoch": 0.29, "grad_norm": 0.40328321757099445, "learning_rate": 1.6685587984127513e-05, "loss": 0.3689, "step": 5029 }, { "epoch": 0.29, "grad_norm": 1.0759732739079926, "learning_rate": 1.6684203983439e-05, "loss": 0.606, "step": 5030 }, { "epoch": 0.29, "grad_norm": 0.2783514820703303, "learning_rate": 1.668281975127679e-05, "loss": 0.1537, "step": 5031 }, { "epoch": 0.29, "grad_norm": 0.33181140217007127, "learning_rate": 1.6681435287688823e-05, "loss": 0.2664, "step": 5032 }, { "epoch": 0.29, "grad_norm": 0.8302507762797569, "learning_rate": 1.6680050592723038e-05, "loss": 0.5131, "step": 5033 }, { "epoch": 0.29, "grad_norm": 0.6098011206817671, "learning_rate": 1.6678665666427387e-05, "loss": 0.3049, "step": 5034 }, { "epoch": 0.29, "grad_norm": 0.35164039268183567, "learning_rate": 1.6677280508849828e-05, "loss": 0.3084, "step": 5035 }, { "epoch": 0.29, "grad_norm": 0.951993401806805, "learning_rate": 1.667589512003834e-05, "loss": 0.6228, "step": 5036 }, { "epoch": 0.29, "grad_norm": 0.34895682350666707, "learning_rate": 1.6674509500040885e-05, "loss": 0.2188, "step": 5037 }, { "epoch": 0.29, "grad_norm": 0.3204560482309398, "learning_rate": 1.6673123648905454e-05, "loss": 0.2321, "step": 5038 }, { "epoch": 0.29, "grad_norm": 0.4577995928154056, "learning_rate": 1.667173756668004e-05, "loss": 0.337, "step": 5039 }, { "epoch": 0.29, "grad_norm": 0.3938689527075941, "learning_rate": 1.667035125341264e-05, "loss": 0.2516, "step": 5040 }, { "epoch": 0.29, "grad_norm": 0.572570413679517, "learning_rate": 1.6668964709151265e-05, "loss": 0.4199, "step": 5041 }, { "epoch": 0.29, "grad_norm": 1.4299646855354586, "learning_rate": 1.6667577933943925e-05, "loss": 0.7393, "step": 5042 }, { "epoch": 0.29, "grad_norm": 0.33121075066613553, "learning_rate": 1.666619092783865e-05, "loss": 0.3047, "step": 5043 }, { "epoch": 0.29, "grad_norm": 0.24035738339207335, "learning_rate": 1.6664803690883465e-05, "loss": 0.1529, "step": 5044 }, { "epoch": 0.29, "grad_norm": 0.5652684265469679, "learning_rate": 1.666341622312642e-05, "loss": 0.4583, "step": 5045 }, { "epoch": 0.29, "grad_norm": 0.8551662769154373, "learning_rate": 1.666202852461556e-05, "loss": 0.5423, "step": 5046 }, { "epoch": 0.29, "grad_norm": 0.3637742889707305, "learning_rate": 1.6660640595398934e-05, "loss": 0.2563, "step": 5047 }, { "epoch": 0.29, "grad_norm": 0.49084959630933184, "learning_rate": 1.6659252435524613e-05, "loss": 0.3588, "step": 5048 }, { "epoch": 0.29, "grad_norm": 0.9077022712537721, "learning_rate": 1.6657864045040665e-05, "loss": 0.4459, "step": 5049 }, { "epoch": 0.29, "grad_norm": 0.44606011433230913, "learning_rate": 1.665647542399517e-05, "loss": 0.2837, "step": 5050 }, { "epoch": 0.29, "grad_norm": 0.40943914248299257, "learning_rate": 1.665508657243622e-05, "loss": 0.3512, "step": 5051 }, { "epoch": 0.29, "grad_norm": 0.30297531819075785, "learning_rate": 1.6653697490411905e-05, "loss": 0.187, "step": 5052 }, { "epoch": 0.29, "grad_norm": 0.3876810435050498, "learning_rate": 1.6652308177970335e-05, "loss": 0.2396, "step": 5053 }, { "epoch": 0.29, "grad_norm": 1.1355168231759345, "learning_rate": 1.665091863515962e-05, "loss": 0.6173, "step": 5054 }, { "epoch": 0.29, "grad_norm": 0.3430108180818135, "learning_rate": 1.6649528862027877e-05, "loss": 0.3208, "step": 5055 }, { "epoch": 0.29, "grad_norm": 0.3775947002380297, "learning_rate": 1.6648138858623236e-05, "loss": 0.3046, "step": 5056 }, { "epoch": 0.29, "grad_norm": 0.3376050104963141, "learning_rate": 1.664674862499383e-05, "loss": 0.1998, "step": 5057 }, { "epoch": 0.29, "grad_norm": 0.5965276651128877, "learning_rate": 1.664535816118781e-05, "loss": 0.4102, "step": 5058 }, { "epoch": 0.29, "grad_norm": 0.4126818965088, "learning_rate": 1.6643967467253316e-05, "loss": 0.3416, "step": 5059 }, { "epoch": 0.29, "grad_norm": 0.35265629901301665, "learning_rate": 1.664257654323852e-05, "loss": 0.282, "step": 5060 }, { "epoch": 0.29, "grad_norm": 0.46529035889049486, "learning_rate": 1.664118538919158e-05, "loss": 0.3418, "step": 5061 }, { "epoch": 0.29, "grad_norm": 0.386042563376412, "learning_rate": 1.6639794005160677e-05, "loss": 0.3206, "step": 5062 }, { "epoch": 0.29, "grad_norm": 0.347541240252572, "learning_rate": 1.6638402391193993e-05, "loss": 0.234, "step": 5063 }, { "epoch": 0.29, "grad_norm": 1.3657479293256798, "learning_rate": 1.663701054733972e-05, "loss": 0.8305, "step": 5064 }, { "epoch": 0.29, "grad_norm": 0.35412391052650544, "learning_rate": 1.6635618473646058e-05, "loss": 0.2778, "step": 5065 }, { "epoch": 0.29, "grad_norm": 0.32667171769057474, "learning_rate": 1.6634226170161213e-05, "loss": 0.2391, "step": 5066 }, { "epoch": 0.29, "grad_norm": 0.43478785542869364, "learning_rate": 1.66328336369334e-05, "loss": 0.3431, "step": 5067 }, { "epoch": 0.29, "grad_norm": 0.33734429475445615, "learning_rate": 1.6631440874010842e-05, "loss": 0.3147, "step": 5068 }, { "epoch": 0.29, "grad_norm": 0.8787246640463405, "learning_rate": 1.6630047881441774e-05, "loss": 0.6012, "step": 5069 }, { "epoch": 0.29, "grad_norm": 0.49363055525613264, "learning_rate": 1.6628654659274433e-05, "loss": 0.3192, "step": 5070 }, { "epoch": 0.29, "grad_norm": 0.30082111207502216, "learning_rate": 1.6627261207557068e-05, "loss": 0.2643, "step": 5071 }, { "epoch": 0.29, "grad_norm": 0.26296980966438765, "learning_rate": 1.6625867526337928e-05, "loss": 0.1485, "step": 5072 }, { "epoch": 0.29, "grad_norm": 0.6053081587769884, "learning_rate": 1.662447361566528e-05, "loss": 0.3424, "step": 5073 }, { "epoch": 0.29, "grad_norm": 0.41307754279842807, "learning_rate": 1.6623079475587403e-05, "loss": 0.3124, "step": 5074 }, { "epoch": 0.29, "grad_norm": 0.40972224319212935, "learning_rate": 1.6621685106152564e-05, "loss": 0.3288, "step": 5075 }, { "epoch": 0.29, "grad_norm": 0.49432112174734544, "learning_rate": 1.6620290507409053e-05, "loss": 0.3193, "step": 5076 }, { "epoch": 0.29, "grad_norm": 0.380041557575946, "learning_rate": 1.6618895679405165e-05, "loss": 0.283, "step": 5077 }, { "epoch": 0.29, "grad_norm": 0.35160063555106125, "learning_rate": 1.6617500622189208e-05, "loss": 0.246, "step": 5078 }, { "epoch": 0.29, "grad_norm": 0.3811836486280641, "learning_rate": 1.6616105335809487e-05, "loss": 0.3454, "step": 5079 }, { "epoch": 0.29, "grad_norm": 0.33824258296520193, "learning_rate": 1.6614709820314323e-05, "loss": 0.2238, "step": 5080 }, { "epoch": 0.29, "grad_norm": 0.8228245625272075, "learning_rate": 1.6613314075752044e-05, "loss": 0.5268, "step": 5081 }, { "epoch": 0.29, "grad_norm": 1.1414700434465859, "learning_rate": 1.661191810217098e-05, "loss": 0.7977, "step": 5082 }, { "epoch": 0.29, "grad_norm": 0.294198134495597, "learning_rate": 1.661052189961948e-05, "loss": 0.2376, "step": 5083 }, { "epoch": 0.29, "grad_norm": 0.46694318435537846, "learning_rate": 1.660912546814589e-05, "loss": 0.3321, "step": 5084 }, { "epoch": 0.29, "grad_norm": 0.40253889624722783, "learning_rate": 1.6607728807798568e-05, "loss": 0.2855, "step": 5085 }, { "epoch": 0.29, "grad_norm": 0.3838630594646114, "learning_rate": 1.660633191862588e-05, "loss": 0.2163, "step": 5086 }, { "epoch": 0.29, "grad_norm": 0.4452069568209437, "learning_rate": 1.6604934800676207e-05, "loss": 0.3252, "step": 5087 }, { "epoch": 0.29, "grad_norm": 1.205226316107024, "learning_rate": 1.660353745399792e-05, "loss": 0.7247, "step": 5088 }, { "epoch": 0.29, "grad_norm": 0.32386036990488415, "learning_rate": 1.6602139878639417e-05, "loss": 0.2436, "step": 5089 }, { "epoch": 0.29, "grad_norm": 0.7146860073301247, "learning_rate": 1.6600742074649095e-05, "loss": 0.4543, "step": 5090 }, { "epoch": 0.29, "grad_norm": 0.2521102701136926, "learning_rate": 1.659934404207536e-05, "loss": 0.2253, "step": 5091 }, { "epoch": 0.29, "grad_norm": 0.3929148102982469, "learning_rate": 1.6597945780966626e-05, "loss": 0.2923, "step": 5092 }, { "epoch": 0.29, "grad_norm": 0.7744747631107772, "learning_rate": 1.659654729137131e-05, "loss": 0.3298, "step": 5093 }, { "epoch": 0.29, "grad_norm": 0.385373842207475, "learning_rate": 1.6595148573337843e-05, "loss": 0.3322, "step": 5094 }, { "epoch": 0.29, "grad_norm": 0.44188848071352976, "learning_rate": 1.6593749626914665e-05, "loss": 0.3031, "step": 5095 }, { "epoch": 0.29, "grad_norm": 0.596587404876109, "learning_rate": 1.6592350452150223e-05, "loss": 0.34, "step": 5096 }, { "epoch": 0.29, "grad_norm": 0.2930106307014018, "learning_rate": 1.6590951049092966e-05, "loss": 0.1554, "step": 5097 }, { "epoch": 0.29, "grad_norm": 0.45593236462612596, "learning_rate": 1.658955141779136e-05, "loss": 0.3053, "step": 5098 }, { "epoch": 0.29, "grad_norm": 0.5457068512880814, "learning_rate": 1.6588151558293874e-05, "loss": 0.2644, "step": 5099 }, { "epoch": 0.29, "grad_norm": 1.2365592496960516, "learning_rate": 1.658675147064898e-05, "loss": 0.8412, "step": 5100 }, { "epoch": 0.29, "grad_norm": 0.4162610077770308, "learning_rate": 1.6585351154905163e-05, "loss": 0.2924, "step": 5101 }, { "epoch": 0.29, "grad_norm": 0.4220963462240051, "learning_rate": 1.6583950611110923e-05, "loss": 0.3159, "step": 5102 }, { "epoch": 0.29, "grad_norm": 0.44314877537383723, "learning_rate": 1.6582549839314756e-05, "loss": 0.2598, "step": 5103 }, { "epoch": 0.29, "grad_norm": 0.2803168276990925, "learning_rate": 1.658114883956517e-05, "loss": 0.2255, "step": 5104 }, { "epoch": 0.29, "grad_norm": 0.9679407388981311, "learning_rate": 1.6579747611910684e-05, "loss": 0.557, "step": 5105 }, { "epoch": 0.29, "grad_norm": 0.7293098670982834, "learning_rate": 1.657834615639982e-05, "loss": 0.2901, "step": 5106 }, { "epoch": 0.29, "grad_norm": 0.3672744421536344, "learning_rate": 1.6576944473081112e-05, "loss": 0.2756, "step": 5107 }, { "epoch": 0.29, "grad_norm": 0.8130875641546872, "learning_rate": 1.6575542562003097e-05, "loss": 0.5067, "step": 5108 }, { "epoch": 0.29, "grad_norm": 0.2011438979448081, "learning_rate": 1.6574140423214327e-05, "loss": 0.1124, "step": 5109 }, { "epoch": 0.29, "grad_norm": 0.38553771059667385, "learning_rate": 1.657273805676336e-05, "loss": 0.2765, "step": 5110 }, { "epoch": 0.29, "grad_norm": 0.4494145707717537, "learning_rate": 1.6571335462698755e-05, "loss": 0.3405, "step": 5111 }, { "epoch": 0.29, "grad_norm": 1.1128740224578688, "learning_rate": 1.6569932641069083e-05, "loss": 0.4035, "step": 5112 }, { "epoch": 0.29, "grad_norm": 0.40172625854201377, "learning_rate": 1.656852959192293e-05, "loss": 0.325, "step": 5113 }, { "epoch": 0.29, "grad_norm": 0.4042048518785761, "learning_rate": 1.656712631530888e-05, "loss": 0.3685, "step": 5114 }, { "epoch": 0.29, "grad_norm": 0.25272424394886184, "learning_rate": 1.6565722811275526e-05, "loss": 0.1761, "step": 5115 }, { "epoch": 0.29, "grad_norm": 0.3961321724738942, "learning_rate": 1.6564319079871472e-05, "loss": 0.2789, "step": 5116 }, { "epoch": 0.29, "grad_norm": 0.5680165426058105, "learning_rate": 1.656291512114533e-05, "loss": 0.3786, "step": 5117 }, { "epoch": 0.29, "grad_norm": 0.5206323825700394, "learning_rate": 1.656151093514572e-05, "loss": 0.3638, "step": 5118 }, { "epoch": 0.29, "grad_norm": 0.3543282139228944, "learning_rate": 1.6560106521921272e-05, "loss": 0.248, "step": 5119 }, { "epoch": 0.29, "grad_norm": 0.9717910872721999, "learning_rate": 1.6558701881520616e-05, "loss": 0.6354, "step": 5120 }, { "epoch": 0.29, "grad_norm": 0.4011467519529105, "learning_rate": 1.6557297013992395e-05, "loss": 0.2849, "step": 5121 }, { "epoch": 0.29, "grad_norm": 0.2840900794225836, "learning_rate": 1.6555891919385262e-05, "loss": 0.1773, "step": 5122 }, { "epoch": 0.29, "grad_norm": 0.4765438374827972, "learning_rate": 1.655448659774787e-05, "loss": 0.3306, "step": 5123 }, { "epoch": 0.29, "grad_norm": 1.0038008012167339, "learning_rate": 1.6553081049128894e-05, "loss": 0.4335, "step": 5124 }, { "epoch": 0.29, "grad_norm": 0.3737795700734047, "learning_rate": 1.6551675273577e-05, "loss": 0.2289, "step": 5125 }, { "epoch": 0.29, "grad_norm": 1.0662857043698486, "learning_rate": 1.6550269271140872e-05, "loss": 0.6902, "step": 5126 }, { "epoch": 0.29, "grad_norm": 0.3832966114148078, "learning_rate": 1.6548863041869203e-05, "loss": 0.3521, "step": 5127 }, { "epoch": 0.29, "grad_norm": 0.2462215752704896, "learning_rate": 1.6547456585810687e-05, "loss": 0.1567, "step": 5128 }, { "epoch": 0.29, "grad_norm": 0.9104250748758096, "learning_rate": 1.6546049903014034e-05, "loss": 0.5161, "step": 5129 }, { "epoch": 0.29, "grad_norm": 0.39445391596511015, "learning_rate": 1.6544642993527952e-05, "loss": 0.3519, "step": 5130 }, { "epoch": 0.29, "grad_norm": 0.4248670156930363, "learning_rate": 1.6543235857401163e-05, "loss": 0.3151, "step": 5131 }, { "epoch": 0.29, "grad_norm": 0.5492970992711453, "learning_rate": 1.6541828494682398e-05, "loss": 0.3257, "step": 5132 }, { "epoch": 0.29, "grad_norm": 0.4673011597855598, "learning_rate": 1.6540420905420395e-05, "loss": 0.2742, "step": 5133 }, { "epoch": 0.29, "grad_norm": 0.39281271146739444, "learning_rate": 1.6539013089663897e-05, "loss": 0.3005, "step": 5134 }, { "epoch": 0.3, "grad_norm": 0.4087028084684856, "learning_rate": 1.6537605047461654e-05, "loss": 0.2509, "step": 5135 }, { "epoch": 0.3, "grad_norm": 0.8586628812462002, "learning_rate": 1.6536196778862433e-05, "loss": 0.5085, "step": 5136 }, { "epoch": 0.3, "grad_norm": 0.5709002766581852, "learning_rate": 1.6534788283915e-05, "loss": 0.3931, "step": 5137 }, { "epoch": 0.3, "grad_norm": 0.3001544493179158, "learning_rate": 1.6533379562668126e-05, "loss": 0.261, "step": 5138 }, { "epoch": 0.3, "grad_norm": 1.2209468608505851, "learning_rate": 1.6531970615170602e-05, "loss": 0.7915, "step": 5139 }, { "epoch": 0.3, "grad_norm": 0.29863282667681906, "learning_rate": 1.6530561441471215e-05, "loss": 0.2372, "step": 5140 }, { "epoch": 0.3, "grad_norm": 0.6536704998804858, "learning_rate": 1.6529152041618767e-05, "loss": 0.3358, "step": 5141 }, { "epoch": 0.3, "grad_norm": 0.2971516691014197, "learning_rate": 1.652774241566206e-05, "loss": 0.2563, "step": 5142 }, { "epoch": 0.3, "grad_norm": 0.4049693944771069, "learning_rate": 1.652633256364992e-05, "loss": 0.3354, "step": 5143 }, { "epoch": 0.3, "grad_norm": 0.569984221282444, "learning_rate": 1.652492248563116e-05, "loss": 0.452, "step": 5144 }, { "epoch": 0.3, "grad_norm": 0.6023169787669885, "learning_rate": 1.6523512181654616e-05, "loss": 0.379, "step": 5145 }, { "epoch": 0.3, "grad_norm": 0.3194650614374405, "learning_rate": 1.6522101651769124e-05, "loss": 0.2582, "step": 5146 }, { "epoch": 0.3, "grad_norm": 0.6151729058432179, "learning_rate": 1.6520690896023536e-05, "loss": 0.3939, "step": 5147 }, { "epoch": 0.3, "grad_norm": 0.2655700172516779, "learning_rate": 1.6519279914466703e-05, "loss": 0.1816, "step": 5148 }, { "epoch": 0.3, "grad_norm": 0.8313908290203407, "learning_rate": 1.6517868707147484e-05, "loss": 0.461, "step": 5149 }, { "epoch": 0.3, "grad_norm": 0.40243904423512905, "learning_rate": 1.651645727411475e-05, "loss": 0.3019, "step": 5150 }, { "epoch": 0.3, "grad_norm": 0.48595523304443944, "learning_rate": 1.6515045615417385e-05, "loss": 0.2973, "step": 5151 }, { "epoch": 0.3, "grad_norm": 0.6055444378343406, "learning_rate": 1.6513633731104268e-05, "loss": 0.3708, "step": 5152 }, { "epoch": 0.3, "grad_norm": 0.4178836687928149, "learning_rate": 1.6512221621224296e-05, "loss": 0.336, "step": 5153 }, { "epoch": 0.3, "grad_norm": 0.2420107469251138, "learning_rate": 1.651080928582637e-05, "loss": 0.2302, "step": 5154 }, { "epoch": 0.3, "grad_norm": 0.2784278043315081, "learning_rate": 1.6509396724959396e-05, "loss": 0.1629, "step": 5155 }, { "epoch": 0.3, "grad_norm": 0.4202735086060693, "learning_rate": 1.6507983938672295e-05, "loss": 0.3557, "step": 5156 }, { "epoch": 0.3, "grad_norm": 0.7828694849798539, "learning_rate": 1.650657092701399e-05, "loss": 0.5474, "step": 5157 }, { "epoch": 0.3, "grad_norm": 0.30049067721571665, "learning_rate": 1.6505157690033417e-05, "loss": 0.2597, "step": 5158 }, { "epoch": 0.3, "grad_norm": 0.3824140483311469, "learning_rate": 1.650374422777951e-05, "loss": 0.3437, "step": 5159 }, { "epoch": 0.3, "grad_norm": 0.42131426536676336, "learning_rate": 1.6502330540301217e-05, "loss": 0.266, "step": 5160 }, { "epoch": 0.3, "grad_norm": 0.337804821268556, "learning_rate": 1.6500916627647498e-05, "loss": 0.224, "step": 5161 }, { "epoch": 0.3, "grad_norm": 0.30612728307066595, "learning_rate": 1.6499502489867318e-05, "loss": 0.2456, "step": 5162 }, { "epoch": 0.3, "grad_norm": 0.5060885778707491, "learning_rate": 1.6498088127009647e-05, "loss": 0.4094, "step": 5163 }, { "epoch": 0.3, "grad_norm": 0.34297861726048556, "learning_rate": 1.649667353912346e-05, "loss": 0.2678, "step": 5164 }, { "epoch": 0.3, "grad_norm": 0.5259607161237364, "learning_rate": 1.649525872625775e-05, "loss": 0.3798, "step": 5165 }, { "epoch": 0.3, "grad_norm": 0.3577087368747908, "learning_rate": 1.649384368846151e-05, "loss": 0.3183, "step": 5166 }, { "epoch": 0.3, "grad_norm": 0.8135960641640962, "learning_rate": 1.649242842578374e-05, "loss": 0.504, "step": 5167 }, { "epoch": 0.3, "grad_norm": 0.22855591149860482, "learning_rate": 1.6491012938273457e-05, "loss": 0.1531, "step": 5168 }, { "epoch": 0.3, "grad_norm": 0.3865190212108506, "learning_rate": 1.6489597225979673e-05, "loss": 0.3684, "step": 5169 }, { "epoch": 0.3, "grad_norm": 0.45872722550496825, "learning_rate": 1.6488181288951416e-05, "loss": 0.4088, "step": 5170 }, { "epoch": 0.3, "grad_norm": 0.2849367931072605, "learning_rate": 1.648676512723772e-05, "loss": 0.2268, "step": 5171 }, { "epoch": 0.3, "grad_norm": 1.246603368276133, "learning_rate": 1.6485348740887624e-05, "loss": 0.7727, "step": 5172 }, { "epoch": 0.3, "grad_norm": 0.3887406473005966, "learning_rate": 1.6483932129950183e-05, "loss": 0.2651, "step": 5173 }, { "epoch": 0.3, "grad_norm": 0.24155302974837958, "learning_rate": 1.648251529447445e-05, "loss": 0.2227, "step": 5174 }, { "epoch": 0.3, "grad_norm": 0.5747540021102897, "learning_rate": 1.6481098234509493e-05, "loss": 0.4604, "step": 5175 }, { "epoch": 0.3, "grad_norm": 0.6065999774862946, "learning_rate": 1.647968095010438e-05, "loss": 0.4313, "step": 5176 }, { "epoch": 0.3, "grad_norm": 0.3560299438233281, "learning_rate": 1.6478263441308197e-05, "loss": 0.2519, "step": 5177 }, { "epoch": 0.3, "grad_norm": 0.4235898813948726, "learning_rate": 1.6476845708170025e-05, "loss": 0.3201, "step": 5178 }, { "epoch": 0.3, "grad_norm": 0.7045373601265343, "learning_rate": 1.647542775073897e-05, "loss": 0.45, "step": 5179 }, { "epoch": 0.3, "grad_norm": 0.41738227489163404, "learning_rate": 1.647400956906413e-05, "loss": 0.3245, "step": 5180 }, { "epoch": 0.3, "grad_norm": 0.3797306952733853, "learning_rate": 1.6472591163194613e-05, "loss": 0.2625, "step": 5181 }, { "epoch": 0.3, "grad_norm": 0.2729805774158222, "learning_rate": 1.6471172533179545e-05, "loss": 0.2373, "step": 5182 }, { "epoch": 0.3, "grad_norm": 0.42957414092760055, "learning_rate": 1.646975367906805e-05, "loss": 0.3196, "step": 5183 }, { "epoch": 0.3, "grad_norm": 1.040323100353576, "learning_rate": 1.6468334600909265e-05, "loss": 0.4741, "step": 5184 }, { "epoch": 0.3, "grad_norm": 0.4346814146354387, "learning_rate": 1.646691529875233e-05, "loss": 0.3474, "step": 5185 }, { "epoch": 0.3, "grad_norm": 0.36473624376665237, "learning_rate": 1.6465495772646395e-05, "loss": 0.3155, "step": 5186 }, { "epoch": 0.3, "grad_norm": 0.3374164626344427, "learning_rate": 1.646407602264062e-05, "loss": 0.2212, "step": 5187 }, { "epoch": 0.3, "grad_norm": 0.9076271617776354, "learning_rate": 1.646265604878417e-05, "loss": 0.5641, "step": 5188 }, { "epoch": 0.3, "grad_norm": 0.34881563109518027, "learning_rate": 1.6461235851126217e-05, "loss": 0.2696, "step": 5189 }, { "epoch": 0.3, "grad_norm": 0.4391047335953409, "learning_rate": 1.6459815429715947e-05, "loss": 0.2938, "step": 5190 }, { "epoch": 0.3, "grad_norm": 0.6558038337089468, "learning_rate": 1.6458394784602548e-05, "loss": 0.4387, "step": 5191 }, { "epoch": 0.3, "grad_norm": 0.3632607082060281, "learning_rate": 1.6456973915835216e-05, "loss": 0.3324, "step": 5192 }, { "epoch": 0.3, "grad_norm": 0.8758092746992635, "learning_rate": 1.645555282346315e-05, "loss": 0.5511, "step": 5193 }, { "epoch": 0.3, "grad_norm": 0.25760201740560185, "learning_rate": 1.645413150753557e-05, "loss": 0.1809, "step": 5194 }, { "epoch": 0.3, "grad_norm": 0.37353304142667226, "learning_rate": 1.645270996810169e-05, "loss": 0.2963, "step": 5195 }, { "epoch": 0.3, "grad_norm": 1.087573636596399, "learning_rate": 1.645128820521075e-05, "loss": 0.7903, "step": 5196 }, { "epoch": 0.3, "grad_norm": 0.33864893891719766, "learning_rate": 1.644986621891197e-05, "loss": 0.281, "step": 5197 }, { "epoch": 0.3, "grad_norm": 0.39698780680277895, "learning_rate": 1.64484440092546e-05, "loss": 0.2983, "step": 5198 }, { "epoch": 0.3, "grad_norm": 0.8776485906722473, "learning_rate": 1.6447021576287893e-05, "loss": 0.5644, "step": 5199 }, { "epoch": 0.3, "grad_norm": 0.18605182562974185, "learning_rate": 1.6445598920061104e-05, "loss": 0.1143, "step": 5200 }, { "epoch": 0.3, "grad_norm": 0.3799839090356079, "learning_rate": 1.6444176040623506e-05, "loss": 0.2902, "step": 5201 }, { "epoch": 0.3, "grad_norm": 0.3898545043893843, "learning_rate": 1.6442752938024367e-05, "loss": 0.3586, "step": 5202 }, { "epoch": 0.3, "grad_norm": 0.7637000427048503, "learning_rate": 1.644132961231297e-05, "loss": 0.3395, "step": 5203 }, { "epoch": 0.3, "grad_norm": 0.41850232975506924, "learning_rate": 1.6439906063538602e-05, "loss": 0.32, "step": 5204 }, { "epoch": 0.3, "grad_norm": 0.5203465807048746, "learning_rate": 1.6438482291750567e-05, "loss": 0.4057, "step": 5205 }, { "epoch": 0.3, "grad_norm": 0.2428540144139758, "learning_rate": 1.6437058296998168e-05, "loss": 0.1995, "step": 5206 }, { "epoch": 0.3, "grad_norm": 0.3433948371999074, "learning_rate": 1.643563407933072e-05, "loss": 0.2046, "step": 5207 }, { "epoch": 0.3, "grad_norm": 0.7815039800380226, "learning_rate": 1.6434209638797535e-05, "loss": 0.5257, "step": 5208 }, { "epoch": 0.3, "grad_norm": 0.41070430570121524, "learning_rate": 1.643278497544795e-05, "loss": 0.3437, "step": 5209 }, { "epoch": 0.3, "grad_norm": 0.3379383058006753, "learning_rate": 1.6431360089331297e-05, "loss": 0.2553, "step": 5210 }, { "epoch": 0.3, "grad_norm": 0.943370279969184, "learning_rate": 1.642993498049692e-05, "loss": 0.6497, "step": 5211 }, { "epoch": 0.3, "grad_norm": 0.2744195839372174, "learning_rate": 1.6428509648994172e-05, "loss": 0.1808, "step": 5212 }, { "epoch": 0.3, "grad_norm": 0.2973940784991242, "learning_rate": 1.6427084094872413e-05, "loss": 0.2217, "step": 5213 }, { "epoch": 0.3, "grad_norm": 0.5296153676122131, "learning_rate": 1.6425658318181007e-05, "loss": 0.4247, "step": 5214 }, { "epoch": 0.3, "grad_norm": 0.7738880370457544, "learning_rate": 1.6424232318969327e-05, "loss": 0.4883, "step": 5215 }, { "epoch": 0.3, "grad_norm": 0.3653662339834992, "learning_rate": 1.642280609728676e-05, "loss": 0.2146, "step": 5216 }, { "epoch": 0.3, "grad_norm": 0.532355787670923, "learning_rate": 1.6421379653182695e-05, "loss": 0.3656, "step": 5217 }, { "epoch": 0.3, "grad_norm": 0.2788563700674229, "learning_rate": 1.6419952986706523e-05, "loss": 0.2249, "step": 5218 }, { "epoch": 0.3, "grad_norm": 0.42198391043135125, "learning_rate": 1.641852609790766e-05, "loss": 0.3224, "step": 5219 }, { "epoch": 0.3, "grad_norm": 0.8746102140737043, "learning_rate": 1.641709898683552e-05, "loss": 0.3473, "step": 5220 }, { "epoch": 0.3, "grad_norm": 0.4154761127237117, "learning_rate": 1.641567165353951e-05, "loss": 0.3684, "step": 5221 }, { "epoch": 0.3, "grad_norm": 0.41962129121867997, "learning_rate": 1.6414244098069068e-05, "loss": 0.2641, "step": 5222 }, { "epoch": 0.3, "grad_norm": 0.46239702845240743, "learning_rate": 1.641281632047363e-05, "loss": 0.2797, "step": 5223 }, { "epoch": 0.3, "grad_norm": 0.34177914074807497, "learning_rate": 1.6411388320802637e-05, "loss": 0.2041, "step": 5224 }, { "epoch": 0.3, "grad_norm": 0.3992637612794155, "learning_rate": 1.6409960099105543e-05, "loss": 0.2882, "step": 5225 }, { "epoch": 0.3, "grad_norm": 0.7002086710277914, "learning_rate": 1.6408531655431806e-05, "loss": 0.3284, "step": 5226 }, { "epoch": 0.3, "grad_norm": 1.3006242151336662, "learning_rate": 1.6407102989830894e-05, "loss": 0.7902, "step": 5227 }, { "epoch": 0.3, "grad_norm": 0.4334420186583708, "learning_rate": 1.640567410235228e-05, "loss": 0.2615, "step": 5228 }, { "epoch": 0.3, "grad_norm": 0.6111767264288636, "learning_rate": 1.6404244993045447e-05, "loss": 0.2422, "step": 5229 }, { "epoch": 0.3, "grad_norm": 0.30863707616793146, "learning_rate": 1.6402815661959886e-05, "loss": 0.227, "step": 5230 }, { "epoch": 0.3, "grad_norm": 0.38996995551842206, "learning_rate": 1.6401386109145098e-05, "loss": 0.291, "step": 5231 }, { "epoch": 0.3, "grad_norm": 0.9864271562835062, "learning_rate": 1.639995633465058e-05, "loss": 0.4252, "step": 5232 }, { "epoch": 0.3, "grad_norm": 0.5006342644476464, "learning_rate": 1.6398526338525852e-05, "loss": 0.3058, "step": 5233 }, { "epoch": 0.3, "grad_norm": 0.3515919149907774, "learning_rate": 1.639709612082043e-05, "loss": 0.2666, "step": 5234 }, { "epoch": 0.3, "grad_norm": 0.9812781991271189, "learning_rate": 1.6395665681583842e-05, "loss": 0.5665, "step": 5235 }, { "epoch": 0.3, "grad_norm": 0.5594165984678707, "learning_rate": 1.639423502086563e-05, "loss": 0.3173, "step": 5236 }, { "epoch": 0.3, "grad_norm": 0.4214927341840395, "learning_rate": 1.6392804138715334e-05, "loss": 0.28, "step": 5237 }, { "epoch": 0.3, "grad_norm": 0.368125224933628, "learning_rate": 1.6391373035182506e-05, "loss": 0.296, "step": 5238 }, { "epoch": 0.3, "grad_norm": 0.29769042288686703, "learning_rate": 1.6389941710316703e-05, "loss": 0.183, "step": 5239 }, { "epoch": 0.3, "grad_norm": 0.42317068403499525, "learning_rate": 1.6388510164167492e-05, "loss": 0.2838, "step": 5240 }, { "epoch": 0.3, "grad_norm": 0.38688120862100955, "learning_rate": 1.6387078396784447e-05, "loss": 0.317, "step": 5241 }, { "epoch": 0.3, "grad_norm": 0.9506995811934351, "learning_rate": 1.6385646408217158e-05, "loss": 0.492, "step": 5242 }, { "epoch": 0.3, "grad_norm": 0.3817544458955061, "learning_rate": 1.638421419851521e-05, "loss": 0.261, "step": 5243 }, { "epoch": 0.3, "grad_norm": 0.3240206473375033, "learning_rate": 1.638278176772819e-05, "loss": 0.2811, "step": 5244 }, { "epoch": 0.3, "grad_norm": 0.3847891589500509, "learning_rate": 1.6381349115905718e-05, "loss": 0.3365, "step": 5245 }, { "epoch": 0.3, "grad_norm": 0.3515846322925854, "learning_rate": 1.6379916243097398e-05, "loss": 0.2199, "step": 5246 }, { "epoch": 0.3, "grad_norm": 0.4147502570911691, "learning_rate": 1.6378483149352857e-05, "loss": 0.2787, "step": 5247 }, { "epoch": 0.3, "grad_norm": 0.7085441353078938, "learning_rate": 1.6377049834721713e-05, "loss": 0.4585, "step": 5248 }, { "epoch": 0.3, "grad_norm": 0.3256721748852568, "learning_rate": 1.637561629925361e-05, "loss": 0.2384, "step": 5249 }, { "epoch": 0.3, "grad_norm": 0.850566415646217, "learning_rate": 1.637418254299819e-05, "loss": 0.5317, "step": 5250 }, { "epoch": 0.3, "grad_norm": 0.4374892514913494, "learning_rate": 1.63727485660051e-05, "loss": 0.3344, "step": 5251 }, { "epoch": 0.3, "grad_norm": 0.24873000470380066, "learning_rate": 1.6371314368324002e-05, "loss": 0.136, "step": 5252 }, { "epoch": 0.3, "grad_norm": 0.3694156067809314, "learning_rate": 1.6369879950004564e-05, "loss": 0.3051, "step": 5253 }, { "epoch": 0.3, "grad_norm": 0.8041414855390386, "learning_rate": 1.6368445311096452e-05, "loss": 0.4433, "step": 5254 }, { "epoch": 0.3, "grad_norm": 0.5618816209689073, "learning_rate": 1.6367010451649357e-05, "loss": 0.3546, "step": 5255 }, { "epoch": 0.3, "grad_norm": 0.39348874710374027, "learning_rate": 1.636557537171296e-05, "loss": 0.2843, "step": 5256 }, { "epoch": 0.3, "grad_norm": 0.3829165507517021, "learning_rate": 1.6364140071336967e-05, "loss": 0.3449, "step": 5257 }, { "epoch": 0.3, "grad_norm": 0.3896253368807964, "learning_rate": 1.6362704550571073e-05, "loss": 0.2402, "step": 5258 }, { "epoch": 0.3, "grad_norm": 0.2660992170071291, "learning_rate": 1.6361268809464998e-05, "loss": 0.2279, "step": 5259 }, { "epoch": 0.3, "grad_norm": 0.7432002175469005, "learning_rate": 1.6359832848068455e-05, "loss": 0.4667, "step": 5260 }, { "epoch": 0.3, "grad_norm": 0.33013913089948044, "learning_rate": 1.6358396666431176e-05, "loss": 0.269, "step": 5261 }, { "epoch": 0.3, "grad_norm": 0.39504619567062776, "learning_rate": 1.635696026460289e-05, "loss": 0.2618, "step": 5262 }, { "epoch": 0.3, "grad_norm": 1.2763020879980969, "learning_rate": 1.6355523642633346e-05, "loss": 0.832, "step": 5263 }, { "epoch": 0.3, "grad_norm": 0.23495958054537083, "learning_rate": 1.6354086800572287e-05, "loss": 0.1672, "step": 5264 }, { "epoch": 0.3, "grad_norm": 0.28905131340359264, "learning_rate": 1.635264973846948e-05, "loss": 0.2705, "step": 5265 }, { "epoch": 0.3, "grad_norm": 0.7167395323464834, "learning_rate": 1.6351212456374684e-05, "loss": 0.4445, "step": 5266 }, { "epoch": 0.3, "grad_norm": 0.4054608197750515, "learning_rate": 1.6349774954337676e-05, "loss": 0.3239, "step": 5267 }, { "epoch": 0.3, "grad_norm": 0.5343281535109453, "learning_rate": 1.6348337232408235e-05, "loss": 0.3897, "step": 5268 }, { "epoch": 0.3, "grad_norm": 0.3590528787248728, "learning_rate": 1.6346899290636145e-05, "loss": 0.288, "step": 5269 }, { "epoch": 0.3, "grad_norm": 0.3249108619545788, "learning_rate": 1.6345461129071207e-05, "loss": 0.2352, "step": 5270 }, { "epoch": 0.3, "grad_norm": 0.5694464956960685, "learning_rate": 1.6344022747763225e-05, "loss": 0.3867, "step": 5271 }, { "epoch": 0.3, "grad_norm": 0.3305233715465694, "learning_rate": 1.6342584146762005e-05, "loss": 0.2424, "step": 5272 }, { "epoch": 0.3, "grad_norm": 0.5162607918437077, "learning_rate": 1.634114532611737e-05, "loss": 0.3408, "step": 5273 }, { "epoch": 0.3, "grad_norm": 0.40391491962918796, "learning_rate": 1.6339706285879144e-05, "loss": 0.3143, "step": 5274 }, { "epoch": 0.3, "grad_norm": 0.9206819584778668, "learning_rate": 1.6338267026097162e-05, "loss": 0.5015, "step": 5275 }, { "epoch": 0.3, "grad_norm": 0.4979518942637146, "learning_rate": 1.633682754682127e-05, "loss": 0.3239, "step": 5276 }, { "epoch": 0.3, "grad_norm": 0.3491657958787403, "learning_rate": 1.6335387848101307e-05, "loss": 0.3165, "step": 5277 }, { "epoch": 0.3, "grad_norm": 0.28912270831094544, "learning_rate": 1.6333947929987137e-05, "loss": 0.177, "step": 5278 }, { "epoch": 0.3, "grad_norm": 0.5085041190375486, "learning_rate": 1.6332507792528626e-05, "loss": 0.3688, "step": 5279 }, { "epoch": 0.3, "grad_norm": 0.5023828501731868, "learning_rate": 1.633106743577564e-05, "loss": 0.3515, "step": 5280 }, { "epoch": 0.3, "grad_norm": 0.4313172948281927, "learning_rate": 1.6329626859778057e-05, "loss": 0.3235, "step": 5281 }, { "epoch": 0.3, "grad_norm": 0.4878188145202696, "learning_rate": 1.632818606458577e-05, "loss": 0.2731, "step": 5282 }, { "epoch": 0.3, "grad_norm": 0.39398917324063887, "learning_rate": 1.6326745050248675e-05, "loss": 0.3514, "step": 5283 }, { "epoch": 0.3, "grad_norm": 0.3186584290148543, "learning_rate": 1.632530381681667e-05, "loss": 0.1892, "step": 5284 }, { "epoch": 0.3, "grad_norm": 0.2990432265997818, "learning_rate": 1.6323862364339663e-05, "loss": 0.2251, "step": 5285 }, { "epoch": 0.3, "grad_norm": 0.5905438488593204, "learning_rate": 1.6322420692867577e-05, "loss": 0.3983, "step": 5286 }, { "epoch": 0.3, "grad_norm": 0.7228437691044046, "learning_rate": 1.632097880245033e-05, "loss": 0.585, "step": 5287 }, { "epoch": 0.3, "grad_norm": 0.3268212611120316, "learning_rate": 1.6319536693137862e-05, "loss": 0.2669, "step": 5288 }, { "epoch": 0.3, "grad_norm": 0.40593698432909653, "learning_rate": 1.631809436498011e-05, "loss": 0.3245, "step": 5289 }, { "epoch": 0.3, "grad_norm": 0.2983396151975038, "learning_rate": 1.6316651818027024e-05, "loss": 0.1933, "step": 5290 }, { "epoch": 0.3, "grad_norm": 0.47509655229133435, "learning_rate": 1.6315209052328554e-05, "loss": 0.1699, "step": 5291 }, { "epoch": 0.3, "grad_norm": 0.46728137226170324, "learning_rate": 1.6313766067934668e-05, "loss": 0.3261, "step": 5292 }, { "epoch": 0.3, "grad_norm": 0.3831760523182167, "learning_rate": 1.6312322864895334e-05, "loss": 0.3321, "step": 5293 }, { "epoch": 0.3, "grad_norm": 0.70358765871688, "learning_rate": 1.631087944326053e-05, "loss": 0.4898, "step": 5294 }, { "epoch": 0.3, "grad_norm": 0.33376431508160115, "learning_rate": 1.6309435803080244e-05, "loss": 0.2051, "step": 5295 }, { "epoch": 0.3, "grad_norm": 0.2822733219097903, "learning_rate": 1.6307991944404466e-05, "loss": 0.2165, "step": 5296 }, { "epoch": 0.3, "grad_norm": 0.4344437630599208, "learning_rate": 1.6306547867283197e-05, "loss": 0.3403, "step": 5297 }, { "epoch": 0.3, "grad_norm": 0.3531370933554759, "learning_rate": 1.630510357176645e-05, "loss": 0.2542, "step": 5298 }, { "epoch": 0.3, "grad_norm": 0.8157810100988, "learning_rate": 1.6303659057904232e-05, "loss": 0.4894, "step": 5299 }, { "epoch": 0.3, "grad_norm": 0.33867142000505207, "learning_rate": 1.6302214325746577e-05, "loss": 0.336, "step": 5300 }, { "epoch": 0.3, "grad_norm": 0.3516250686888519, "learning_rate": 1.6300769375343508e-05, "loss": 0.2335, "step": 5301 }, { "epoch": 0.3, "grad_norm": 0.4404752724035265, "learning_rate": 1.6299324206745066e-05, "loss": 0.2584, "step": 5302 }, { "epoch": 0.3, "grad_norm": 0.3540326459594793, "learning_rate": 1.6297878820001302e-05, "loss": 0.2618, "step": 5303 }, { "epoch": 0.3, "grad_norm": 0.3477074044921296, "learning_rate": 1.6296433215162258e-05, "loss": 0.2255, "step": 5304 }, { "epoch": 0.3, "grad_norm": 0.444796893845932, "learning_rate": 1.629498739227801e-05, "loss": 0.3526, "step": 5305 }, { "epoch": 0.3, "grad_norm": 0.5521271885013249, "learning_rate": 1.6293541351398616e-05, "loss": 0.3678, "step": 5306 }, { "epoch": 0.3, "grad_norm": 0.43164109413325197, "learning_rate": 1.6292095092574154e-05, "loss": 0.3293, "step": 5307 }, { "epoch": 0.3, "grad_norm": 0.37843319285317356, "learning_rate": 1.6290648615854712e-05, "loss": 0.249, "step": 5308 }, { "epoch": 0.31, "grad_norm": 0.25288135816628304, "learning_rate": 1.6289201921290377e-05, "loss": 0.1774, "step": 5309 }, { "epoch": 0.31, "grad_norm": 0.335350277217877, "learning_rate": 1.6287755008931255e-05, "loss": 0.2844, "step": 5310 }, { "epoch": 0.31, "grad_norm": 0.7224538732310122, "learning_rate": 1.6286307878827443e-05, "loss": 0.4002, "step": 5311 }, { "epoch": 0.31, "grad_norm": 0.4249497381815193, "learning_rate": 1.6284860531029062e-05, "loss": 0.342, "step": 5312 }, { "epoch": 0.31, "grad_norm": 0.37589361249803893, "learning_rate": 1.6283412965586227e-05, "loss": 0.2856, "step": 5313 }, { "epoch": 0.31, "grad_norm": 0.32777900712701974, "learning_rate": 1.6281965182549077e-05, "loss": 0.1539, "step": 5314 }, { "epoch": 0.31, "grad_norm": 0.46467288740435403, "learning_rate": 1.6280517181967733e-05, "loss": 0.3337, "step": 5315 }, { "epoch": 0.31, "grad_norm": 0.302574483899151, "learning_rate": 1.6279068963892358e-05, "loss": 0.2726, "step": 5316 }, { "epoch": 0.31, "grad_norm": 0.5464579445728996, "learning_rate": 1.6277620528373094e-05, "loss": 0.3922, "step": 5317 }, { "epoch": 0.31, "grad_norm": 0.6586890435544829, "learning_rate": 1.6276171875460097e-05, "loss": 0.3371, "step": 5318 }, { "epoch": 0.31, "grad_norm": 0.4057018760179316, "learning_rate": 1.6274723005203542e-05, "loss": 0.2928, "step": 5319 }, { "epoch": 0.31, "grad_norm": 0.5354467630028948, "learning_rate": 1.6273273917653596e-05, "loss": 0.3619, "step": 5320 }, { "epoch": 0.31, "grad_norm": 0.24525178033362732, "learning_rate": 1.6271824612860445e-05, "loss": 0.1796, "step": 5321 }, { "epoch": 0.31, "grad_norm": 0.3806740733460614, "learning_rate": 1.6270375090874276e-05, "loss": 0.2955, "step": 5322 }, { "epoch": 0.31, "grad_norm": 1.016911246302538, "learning_rate": 1.626892535174529e-05, "loss": 0.4905, "step": 5323 }, { "epoch": 0.31, "grad_norm": 0.39313652227601653, "learning_rate": 1.6267475395523686e-05, "loss": 0.265, "step": 5324 }, { "epoch": 0.31, "grad_norm": 0.42444131470135105, "learning_rate": 1.626602522225968e-05, "loss": 0.31, "step": 5325 }, { "epoch": 0.31, "grad_norm": 1.1439901110836537, "learning_rate": 1.6264574832003492e-05, "loss": 0.704, "step": 5326 }, { "epoch": 0.31, "grad_norm": 0.3857249796155631, "learning_rate": 1.6263124224805345e-05, "loss": 0.1889, "step": 5327 }, { "epoch": 0.31, "grad_norm": 0.3412589948287796, "learning_rate": 1.6261673400715475e-05, "loss": 0.2426, "step": 5328 }, { "epoch": 0.31, "grad_norm": 0.5851866759927743, "learning_rate": 1.6260222359784123e-05, "loss": 0.4064, "step": 5329 }, { "epoch": 0.31, "grad_norm": 1.3336728934135882, "learning_rate": 1.6258771102061543e-05, "loss": 0.8571, "step": 5330 }, { "epoch": 0.31, "grad_norm": 0.3092169640199647, "learning_rate": 1.6257319627597986e-05, "loss": 0.2259, "step": 5331 }, { "epoch": 0.31, "grad_norm": 0.4063809902015279, "learning_rate": 1.6255867936443724e-05, "loss": 0.3177, "step": 5332 }, { "epoch": 0.31, "grad_norm": 0.7712406027659143, "learning_rate": 1.625441602864902e-05, "loss": 0.485, "step": 5333 }, { "epoch": 0.31, "grad_norm": 0.3238377010953509, "learning_rate": 1.625296390426416e-05, "loss": 0.2591, "step": 5334 }, { "epoch": 0.31, "grad_norm": 0.3062860851213545, "learning_rate": 1.6251511563339426e-05, "loss": 0.1903, "step": 5335 }, { "epoch": 0.31, "grad_norm": 0.42276890338671497, "learning_rate": 1.6250059005925117e-05, "loss": 0.3612, "step": 5336 }, { "epoch": 0.31, "grad_norm": 0.3259165375372027, "learning_rate": 1.6248606232071536e-05, "loss": 0.2178, "step": 5337 }, { "epoch": 0.31, "grad_norm": 1.165783767802092, "learning_rate": 1.6247153241828985e-05, "loss": 0.6111, "step": 5338 }, { "epoch": 0.31, "grad_norm": 0.71001471508266, "learning_rate": 1.624570003524779e-05, "loss": 0.3921, "step": 5339 }, { "epoch": 0.31, "grad_norm": 0.3474997200766379, "learning_rate": 1.624424661237827e-05, "loss": 0.2082, "step": 5340 }, { "epoch": 0.31, "grad_norm": 0.3418873789831343, "learning_rate": 1.6242792973270758e-05, "loss": 0.2739, "step": 5341 }, { "epoch": 0.31, "grad_norm": 0.4231357230178703, "learning_rate": 1.6241339117975596e-05, "loss": 0.2998, "step": 5342 }, { "epoch": 0.31, "grad_norm": 0.4355566229176757, "learning_rate": 1.6239885046543125e-05, "loss": 0.2823, "step": 5343 }, { "epoch": 0.31, "grad_norm": 0.35899156253158837, "learning_rate": 1.6238430759023706e-05, "loss": 0.27, "step": 5344 }, { "epoch": 0.31, "grad_norm": 0.8135718113775179, "learning_rate": 1.6236976255467697e-05, "loss": 0.4483, "step": 5345 }, { "epoch": 0.31, "grad_norm": 0.4186232071036985, "learning_rate": 1.623552153592547e-05, "loss": 0.3089, "step": 5346 }, { "epoch": 0.31, "grad_norm": 0.26769754516544547, "learning_rate": 1.6234066600447397e-05, "loss": 0.1907, "step": 5347 }, { "epoch": 0.31, "grad_norm": 0.37312629158031374, "learning_rate": 1.6232611449083866e-05, "loss": 0.3283, "step": 5348 }, { "epoch": 0.31, "grad_norm": 0.3136542119516781, "learning_rate": 1.623115608188527e-05, "loss": 0.2588, "step": 5349 }, { "epoch": 0.31, "grad_norm": 1.0894221985880963, "learning_rate": 1.6229700498902008e-05, "loss": 0.3676, "step": 5350 }, { "epoch": 0.31, "grad_norm": 0.7124718935045236, "learning_rate": 1.6228244700184484e-05, "loss": 0.5309, "step": 5351 }, { "epoch": 0.31, "grad_norm": 0.3407741653174107, "learning_rate": 1.622678868578311e-05, "loss": 0.2742, "step": 5352 }, { "epoch": 0.31, "grad_norm": 0.47774823506024416, "learning_rate": 1.622533245574832e-05, "loss": 0.2726, "step": 5353 }, { "epoch": 0.31, "grad_norm": 0.33453155351886066, "learning_rate": 1.622387601013053e-05, "loss": 0.2116, "step": 5354 }, { "epoch": 0.31, "grad_norm": 0.3884129554118678, "learning_rate": 1.622241934898018e-05, "loss": 0.2891, "step": 5355 }, { "epoch": 0.31, "grad_norm": 0.5698439071171101, "learning_rate": 1.622096247234772e-05, "loss": 0.3661, "step": 5356 }, { "epoch": 0.31, "grad_norm": 1.6227867700244734, "learning_rate": 1.6219505380283593e-05, "loss": 0.4115, "step": 5357 }, { "epoch": 0.31, "grad_norm": 0.35548790471839764, "learning_rate": 1.6218048072838265e-05, "loss": 0.2962, "step": 5358 }, { "epoch": 0.31, "grad_norm": 1.3251708651923235, "learning_rate": 1.62165905500622e-05, "loss": 0.7052, "step": 5359 }, { "epoch": 0.31, "grad_norm": 0.29768315161623826, "learning_rate": 1.621513281200587e-05, "loss": 0.2041, "step": 5360 }, { "epoch": 0.31, "grad_norm": 0.432601921603546, "learning_rate": 1.6213674858719758e-05, "loss": 0.3035, "step": 5361 }, { "epoch": 0.31, "grad_norm": 1.1288404236236713, "learning_rate": 1.6212216690254353e-05, "loss": 0.4222, "step": 5362 }, { "epoch": 0.31, "grad_norm": 0.8101163644596073, "learning_rate": 1.621075830666015e-05, "loss": 0.3231, "step": 5363 }, { "epoch": 0.31, "grad_norm": 0.3462029633898307, "learning_rate": 1.6209299707987656e-05, "loss": 0.2848, "step": 5364 }, { "epoch": 0.31, "grad_norm": 0.5494931342333284, "learning_rate": 1.6207840894287377e-05, "loss": 0.42, "step": 5365 }, { "epoch": 0.31, "grad_norm": 0.31147969450902874, "learning_rate": 1.6206381865609836e-05, "loss": 0.1297, "step": 5366 }, { "epoch": 0.31, "grad_norm": 0.42917601362153945, "learning_rate": 1.620492262200556e-05, "loss": 0.3263, "step": 5367 }, { "epoch": 0.31, "grad_norm": 0.43424402968714376, "learning_rate": 1.620346316352508e-05, "loss": 0.3371, "step": 5368 }, { "epoch": 0.31, "grad_norm": 0.43098949036670525, "learning_rate": 1.6202003490218932e-05, "loss": 0.2333, "step": 5369 }, { "epoch": 0.31, "grad_norm": 0.4145635482591804, "learning_rate": 1.6200543602137676e-05, "loss": 0.2221, "step": 5370 }, { "epoch": 0.31, "grad_norm": 1.537768224298209, "learning_rate": 1.619908349933186e-05, "loss": 0.851, "step": 5371 }, { "epoch": 0.31, "grad_norm": 0.513208048312179, "learning_rate": 1.619762318185205e-05, "loss": 0.3555, "step": 5372 }, { "epoch": 0.31, "grad_norm": 0.32926945386236683, "learning_rate": 1.6196162649748815e-05, "loss": 0.255, "step": 5373 }, { "epoch": 0.31, "grad_norm": 0.6119467962093083, "learning_rate": 1.6194701903072734e-05, "loss": 0.4787, "step": 5374 }, { "epoch": 0.31, "grad_norm": 0.2881545608075555, "learning_rate": 1.619324094187439e-05, "loss": 0.2352, "step": 5375 }, { "epoch": 0.31, "grad_norm": 0.3695005519980028, "learning_rate": 1.619177976620438e-05, "loss": 0.2304, "step": 5376 }, { "epoch": 0.31, "grad_norm": 0.6251592955848971, "learning_rate": 1.6190318376113307e-05, "loss": 0.4482, "step": 5377 }, { "epoch": 0.31, "grad_norm": 0.7395901583447987, "learning_rate": 1.618885677165177e-05, "loss": 0.4733, "step": 5378 }, { "epoch": 0.31, "grad_norm": 0.39642035979972523, "learning_rate": 1.6187394952870392e-05, "loss": 0.2308, "step": 5379 }, { "epoch": 0.31, "grad_norm": 0.3336838668804227, "learning_rate": 1.618593291981979e-05, "loss": 0.305, "step": 5380 }, { "epoch": 0.31, "grad_norm": 0.28950738150393335, "learning_rate": 1.61844706725506e-05, "loss": 0.1669, "step": 5381 }, { "epoch": 0.31, "grad_norm": 0.5750376618606018, "learning_rate": 1.6183008211113454e-05, "loss": 0.3297, "step": 5382 }, { "epoch": 0.31, "grad_norm": 0.3405128455992142, "learning_rate": 1.6181545535559e-05, "loss": 0.277, "step": 5383 }, { "epoch": 0.31, "grad_norm": 0.5570942141284251, "learning_rate": 1.6180082645937888e-05, "loss": 0.3617, "step": 5384 }, { "epoch": 0.31, "grad_norm": 0.6443057721616354, "learning_rate": 1.6178619542300783e-05, "loss": 0.3859, "step": 5385 }, { "epoch": 0.31, "grad_norm": 0.3067421355198817, "learning_rate": 1.617715622469835e-05, "loss": 0.2076, "step": 5386 }, { "epoch": 0.31, "grad_norm": 0.341538422997227, "learning_rate": 1.617569269318126e-05, "loss": 0.2534, "step": 5387 }, { "epoch": 0.31, "grad_norm": 0.3996699166427557, "learning_rate": 1.61742289478002e-05, "loss": 0.2901, "step": 5388 }, { "epoch": 0.31, "grad_norm": 0.35294345317967296, "learning_rate": 1.6172764988605855e-05, "loss": 0.2503, "step": 5389 }, { "epoch": 0.31, "grad_norm": 0.679404700705096, "learning_rate": 1.6171300815648922e-05, "loss": 0.4566, "step": 5390 }, { "epoch": 0.31, "grad_norm": 0.4050109468817518, "learning_rate": 1.6169836428980108e-05, "loss": 0.3135, "step": 5391 }, { "epoch": 0.31, "grad_norm": 0.33445459512149867, "learning_rate": 1.6168371828650123e-05, "loss": 0.2532, "step": 5392 }, { "epoch": 0.31, "grad_norm": 0.29912038151863696, "learning_rate": 1.616690701470969e-05, "loss": 0.2024, "step": 5393 }, { "epoch": 0.31, "grad_norm": 0.4029994690890542, "learning_rate": 1.6165441987209532e-05, "loss": 0.2963, "step": 5394 }, { "epoch": 0.31, "grad_norm": 0.5210104635097429, "learning_rate": 1.6163976746200384e-05, "loss": 0.3754, "step": 5395 }, { "epoch": 0.31, "grad_norm": 0.41946498662119747, "learning_rate": 1.6162511291732984e-05, "loss": 0.3246, "step": 5396 }, { "epoch": 0.31, "grad_norm": 0.8088520261509311, "learning_rate": 1.616104562385808e-05, "loss": 0.4104, "step": 5397 }, { "epoch": 0.31, "grad_norm": 0.41364569032392223, "learning_rate": 1.615957974262644e-05, "loss": 0.3169, "step": 5398 }, { "epoch": 0.31, "grad_norm": 0.25882567121199573, "learning_rate": 1.615811364808881e-05, "loss": 0.1889, "step": 5399 }, { "epoch": 0.31, "grad_norm": 0.5591145105552626, "learning_rate": 1.6156647340295973e-05, "loss": 0.3205, "step": 5400 }, { "epoch": 0.31, "grad_norm": 0.39999487861467214, "learning_rate": 1.6155180819298703e-05, "loss": 0.3648, "step": 5401 }, { "epoch": 0.31, "grad_norm": 0.669508277334371, "learning_rate": 1.6153714085147783e-05, "loss": 0.3816, "step": 5402 }, { "epoch": 0.31, "grad_norm": 0.397360661949208, "learning_rate": 1.6152247137894012e-05, "loss": 0.2861, "step": 5403 }, { "epoch": 0.31, "grad_norm": 0.3426434244744426, "learning_rate": 1.6150779977588186e-05, "loss": 0.2873, "step": 5404 }, { "epoch": 0.31, "grad_norm": 0.40844847649718674, "learning_rate": 1.6149312604281115e-05, "loss": 0.2428, "step": 5405 }, { "epoch": 0.31, "grad_norm": 0.332255987493901, "learning_rate": 1.6147845018023612e-05, "loss": 0.1821, "step": 5406 }, { "epoch": 0.31, "grad_norm": 0.35299100766743446, "learning_rate": 1.61463772188665e-05, "loss": 0.3115, "step": 5407 }, { "epoch": 0.31, "grad_norm": 0.46449691352497247, "learning_rate": 1.6144909206860607e-05, "loss": 0.3955, "step": 5408 }, { "epoch": 0.31, "grad_norm": 0.3484300824139732, "learning_rate": 1.6143440982056777e-05, "loss": 0.2251, "step": 5409 }, { "epoch": 0.31, "grad_norm": 0.6109240447729549, "learning_rate": 1.614197254450585e-05, "loss": 0.378, "step": 5410 }, { "epoch": 0.31, "grad_norm": 0.29480715347424497, "learning_rate": 1.6140503894258674e-05, "loss": 0.2724, "step": 5411 }, { "epoch": 0.31, "grad_norm": 0.3103918092644329, "learning_rate": 1.6139035031366116e-05, "loss": 0.169, "step": 5412 }, { "epoch": 0.31, "grad_norm": 0.3729779854233855, "learning_rate": 1.6137565955879036e-05, "loss": 0.3555, "step": 5413 }, { "epoch": 0.31, "grad_norm": 0.7389313540620621, "learning_rate": 1.6136096667848313e-05, "loss": 0.5823, "step": 5414 }, { "epoch": 0.31, "grad_norm": 0.3544691682936791, "learning_rate": 1.6134627167324827e-05, "loss": 0.2543, "step": 5415 }, { "epoch": 0.31, "grad_norm": 0.3960851188225142, "learning_rate": 1.613315745435946e-05, "loss": 0.2833, "step": 5416 }, { "epoch": 0.31, "grad_norm": 0.2765012263636344, "learning_rate": 1.613168752900312e-05, "loss": 0.1917, "step": 5417 }, { "epoch": 0.31, "grad_norm": 0.6375209628011956, "learning_rate": 1.61302173913067e-05, "loss": 0.4634, "step": 5418 }, { "epoch": 0.31, "grad_norm": 0.2715428312700595, "learning_rate": 1.612874704132112e-05, "loss": 0.2351, "step": 5419 }, { "epoch": 0.31, "grad_norm": 0.7184165179152175, "learning_rate": 1.612727647909729e-05, "loss": 0.5226, "step": 5420 }, { "epoch": 0.31, "grad_norm": 0.7367803499422515, "learning_rate": 1.612580570468614e-05, "loss": 0.481, "step": 5421 }, { "epoch": 0.31, "grad_norm": 0.40185485568900076, "learning_rate": 1.6124334718138602e-05, "loss": 0.2303, "step": 5422 }, { "epoch": 0.31, "grad_norm": 0.4198601147325941, "learning_rate": 1.6122863519505618e-05, "loss": 0.3464, "step": 5423 }, { "epoch": 0.31, "grad_norm": 0.4956845797294979, "learning_rate": 1.6121392108838132e-05, "loss": 0.3011, "step": 5424 }, { "epoch": 0.31, "grad_norm": 0.26916066161630525, "learning_rate": 1.6119920486187102e-05, "loss": 0.2054, "step": 5425 }, { "epoch": 0.31, "grad_norm": 1.1799061771549846, "learning_rate": 1.611844865160349e-05, "loss": 0.9037, "step": 5426 }, { "epoch": 0.31, "grad_norm": 0.34547055929760423, "learning_rate": 1.611697660513826e-05, "loss": 0.3116, "step": 5427 }, { "epoch": 0.31, "grad_norm": 0.3723557442890717, "learning_rate": 1.6115504346842393e-05, "loss": 0.215, "step": 5428 }, { "epoch": 0.31, "grad_norm": 0.7354679497674211, "learning_rate": 1.6114031876766877e-05, "loss": 0.4834, "step": 5429 }, { "epoch": 0.31, "grad_norm": 0.5397253363814973, "learning_rate": 1.61125591949627e-05, "loss": 0.385, "step": 5430 }, { "epoch": 0.31, "grad_norm": 0.31820906837570323, "learning_rate": 1.611108630148086e-05, "loss": 0.2479, "step": 5431 }, { "epoch": 0.31, "grad_norm": 0.43775281349809764, "learning_rate": 1.610961319637236e-05, "loss": 0.3013, "step": 5432 }, { "epoch": 0.31, "grad_norm": 0.48523621853194515, "learning_rate": 1.610813987968822e-05, "loss": 0.3288, "step": 5433 }, { "epoch": 0.31, "grad_norm": 0.3286365653300218, "learning_rate": 1.6106666351479462e-05, "loss": 0.2691, "step": 5434 }, { "epoch": 0.31, "grad_norm": 0.4073669030943272, "learning_rate": 1.610519261179711e-05, "loss": 0.2907, "step": 5435 }, { "epoch": 0.31, "grad_norm": 0.86365452708418, "learning_rate": 1.61037186606922e-05, "loss": 0.5174, "step": 5436 }, { "epoch": 0.31, "grad_norm": 0.34139315066637166, "learning_rate": 1.610224449821577e-05, "loss": 0.2648, "step": 5437 }, { "epoch": 0.31, "grad_norm": 0.781900670758003, "learning_rate": 1.6100770124418882e-05, "loss": 0.5627, "step": 5438 }, { "epoch": 0.31, "grad_norm": 0.2584851055369354, "learning_rate": 1.6099295539352583e-05, "loss": 0.2251, "step": 5439 }, { "epoch": 0.31, "grad_norm": 0.32668801819835613, "learning_rate": 1.6097820743067945e-05, "loss": 0.2867, "step": 5440 }, { "epoch": 0.31, "grad_norm": 0.8179062477321604, "learning_rate": 1.6096345735616036e-05, "loss": 0.3766, "step": 5441 }, { "epoch": 0.31, "grad_norm": 0.5923473411548568, "learning_rate": 1.6094870517047937e-05, "loss": 0.4198, "step": 5442 }, { "epoch": 0.31, "grad_norm": 0.31872599364875503, "learning_rate": 1.609339508741473e-05, "loss": 0.2893, "step": 5443 }, { "epoch": 0.31, "grad_norm": 1.379638334873006, "learning_rate": 1.6091919446767517e-05, "loss": 0.8334, "step": 5444 }, { "epoch": 0.31, "grad_norm": 0.2200588702556372, "learning_rate": 1.6090443595157396e-05, "loss": 0.0865, "step": 5445 }, { "epoch": 0.31, "grad_norm": 0.3663693385434264, "learning_rate": 1.608896753263547e-05, "loss": 0.3005, "step": 5446 }, { "epoch": 0.31, "grad_norm": 0.4523250755868021, "learning_rate": 1.6087491259252865e-05, "loss": 0.3534, "step": 5447 }, { "epoch": 0.31, "grad_norm": 0.6353127882984126, "learning_rate": 1.60860147750607e-05, "loss": 0.316, "step": 5448 }, { "epoch": 0.31, "grad_norm": 0.4157340148684122, "learning_rate": 1.60845380801101e-05, "loss": 0.316, "step": 5449 }, { "epoch": 0.31, "grad_norm": 1.2008936520366085, "learning_rate": 1.6083061174452214e-05, "loss": 0.8211, "step": 5450 }, { "epoch": 0.31, "grad_norm": 0.22059778641398187, "learning_rate": 1.6081584058138178e-05, "loss": 0.1828, "step": 5451 }, { "epoch": 0.31, "grad_norm": 0.38232047263168106, "learning_rate": 1.6080106731219147e-05, "loss": 0.2847, "step": 5452 }, { "epoch": 0.31, "grad_norm": 0.7296256845720059, "learning_rate": 1.6078629193746283e-05, "loss": 0.4682, "step": 5453 }, { "epoch": 0.31, "grad_norm": 0.49677717222315976, "learning_rate": 1.607715144577075e-05, "loss": 0.2671, "step": 5454 }, { "epoch": 0.31, "grad_norm": 0.3413141139616929, "learning_rate": 1.6075673487343725e-05, "loss": 0.2721, "step": 5455 }, { "epoch": 0.31, "grad_norm": 1.2043097896012789, "learning_rate": 1.6074195318516385e-05, "loss": 0.8261, "step": 5456 }, { "epoch": 0.31, "grad_norm": 0.3009263002823461, "learning_rate": 1.6072716939339924e-05, "loss": 0.227, "step": 5457 }, { "epoch": 0.31, "grad_norm": 0.40159765325055435, "learning_rate": 1.607123834986554e-05, "loss": 0.2515, "step": 5458 }, { "epoch": 0.31, "grad_norm": 0.4338965338706003, "learning_rate": 1.606975955014443e-05, "loss": 0.3357, "step": 5459 }, { "epoch": 0.31, "grad_norm": 1.1490956748629666, "learning_rate": 1.6068280540227807e-05, "loss": 0.6926, "step": 5460 }, { "epoch": 0.31, "grad_norm": 0.36110726958810213, "learning_rate": 1.6066801320166897e-05, "loss": 0.2308, "step": 5461 }, { "epoch": 0.31, "grad_norm": 0.998317680190995, "learning_rate": 1.606532189001291e-05, "loss": 0.6979, "step": 5462 }, { "epoch": 0.31, "grad_norm": 0.32793332659947005, "learning_rate": 1.606384224981709e-05, "loss": 0.3374, "step": 5463 }, { "epoch": 0.31, "grad_norm": 0.3159672370415879, "learning_rate": 1.6062362399630673e-05, "loss": 0.2052, "step": 5464 }, { "epoch": 0.31, "grad_norm": 0.28198983815444506, "learning_rate": 1.6060882339504905e-05, "loss": 0.2491, "step": 5465 }, { "epoch": 0.31, "grad_norm": 0.3914189720919406, "learning_rate": 1.6059402069491047e-05, "loss": 0.3358, "step": 5466 }, { "epoch": 0.31, "grad_norm": 0.31173129053212056, "learning_rate": 1.605792158964035e-05, "loss": 0.2001, "step": 5467 }, { "epoch": 0.31, "grad_norm": 1.0947564593865875, "learning_rate": 1.6056440900004094e-05, "loss": 0.7421, "step": 5468 }, { "epoch": 0.31, "grad_norm": 0.5952027400139973, "learning_rate": 1.6054960000633545e-05, "loss": 0.4776, "step": 5469 }, { "epoch": 0.31, "grad_norm": 0.420934632151069, "learning_rate": 1.6053478891579993e-05, "loss": 0.3136, "step": 5470 }, { "epoch": 0.31, "grad_norm": 0.24668825200323996, "learning_rate": 1.605199757289473e-05, "loss": 0.1935, "step": 5471 }, { "epoch": 0.31, "grad_norm": 1.1178765441595595, "learning_rate": 1.605051604462905e-05, "loss": 0.646, "step": 5472 }, { "epoch": 0.31, "grad_norm": 0.38129148872885377, "learning_rate": 1.6049034306834258e-05, "loss": 0.275, "step": 5473 }, { "epoch": 0.31, "grad_norm": 0.4828124601149025, "learning_rate": 1.6047552359561672e-05, "loss": 0.3161, "step": 5474 }, { "epoch": 0.31, "grad_norm": 0.6728617355740534, "learning_rate": 1.6046070202862605e-05, "loss": 0.378, "step": 5475 }, { "epoch": 0.31, "grad_norm": 0.37208712058090737, "learning_rate": 1.6044587836788385e-05, "loss": 0.2971, "step": 5476 }, { "epoch": 0.31, "grad_norm": 0.2293316968318131, "learning_rate": 1.6043105261390352e-05, "loss": 0.0965, "step": 5477 }, { "epoch": 0.31, "grad_norm": 0.4546947048442612, "learning_rate": 1.6041622476719842e-05, "loss": 0.3595, "step": 5478 }, { "epoch": 0.31, "grad_norm": 0.3668420835351272, "learning_rate": 1.6040139482828207e-05, "loss": 0.2734, "step": 5479 }, { "epoch": 0.31, "grad_norm": 1.312312858344449, "learning_rate": 1.60386562797668e-05, "loss": 0.7963, "step": 5480 }, { "epoch": 0.31, "grad_norm": 0.6925249119934285, "learning_rate": 1.6037172867586984e-05, "loss": 0.3647, "step": 5481 }, { "epoch": 0.31, "grad_norm": 0.4077235005317406, "learning_rate": 1.6035689246340132e-05, "loss": 0.2688, "step": 5482 }, { "epoch": 0.32, "grad_norm": 0.34113054517286356, "learning_rate": 1.603420541607762e-05, "loss": 0.291, "step": 5483 }, { "epoch": 0.32, "grad_norm": 0.23609225173467213, "learning_rate": 1.6032721376850837e-05, "loss": 0.1449, "step": 5484 }, { "epoch": 0.32, "grad_norm": 0.4239941906638344, "learning_rate": 1.603123712871117e-05, "loss": 0.2859, "step": 5485 }, { "epoch": 0.32, "grad_norm": 0.42335183714015223, "learning_rate": 1.602975267171002e-05, "loss": 0.3501, "step": 5486 }, { "epoch": 0.32, "grad_norm": 0.5602162541545916, "learning_rate": 1.6028268005898798e-05, "loss": 0.3304, "step": 5487 }, { "epoch": 0.32, "grad_norm": 0.4283245085092912, "learning_rate": 1.6026783131328915e-05, "loss": 0.292, "step": 5488 }, { "epoch": 0.32, "grad_norm": 0.27342802197048033, "learning_rate": 1.6025298048051784e-05, "loss": 0.229, "step": 5489 }, { "epoch": 0.32, "grad_norm": 0.34821181236786064, "learning_rate": 1.6023812756118845e-05, "loss": 0.2565, "step": 5490 }, { "epoch": 0.32, "grad_norm": 0.4254165247173863, "learning_rate": 1.602232725558153e-05, "loss": 0.3036, "step": 5491 }, { "epoch": 0.32, "grad_norm": 0.4819874115221115, "learning_rate": 1.6020841546491278e-05, "loss": 0.3834, "step": 5492 }, { "epoch": 0.32, "grad_norm": 0.8487213879795744, "learning_rate": 1.6019355628899545e-05, "loss": 0.5686, "step": 5493 }, { "epoch": 0.32, "grad_norm": 0.33225687121251357, "learning_rate": 1.6017869502857785e-05, "loss": 0.2232, "step": 5494 }, { "epoch": 0.32, "grad_norm": 0.3973590452255858, "learning_rate": 1.6016383168417463e-05, "loss": 0.3365, "step": 5495 }, { "epoch": 0.32, "grad_norm": 0.29339420160733515, "learning_rate": 1.601489662563005e-05, "loss": 0.1986, "step": 5496 }, { "epoch": 0.32, "grad_norm": 0.3666754590084301, "learning_rate": 1.6013409874547026e-05, "loss": 0.2422, "step": 5497 }, { "epoch": 0.32, "grad_norm": 0.8964912741710412, "learning_rate": 1.6011922915219877e-05, "loss": 0.4598, "step": 5498 }, { "epoch": 0.32, "grad_norm": 0.38686744503564313, "learning_rate": 1.6010435747700097e-05, "loss": 0.3271, "step": 5499 }, { "epoch": 0.32, "grad_norm": 0.36402267171787295, "learning_rate": 1.600894837203918e-05, "loss": 0.1334, "step": 5500 }, { "epoch": 0.32, "grad_norm": 0.39810639722148555, "learning_rate": 1.6007460788288644e-05, "loss": 0.3626, "step": 5501 }, { "epoch": 0.32, "grad_norm": 0.26937219839700005, "learning_rate": 1.60059729965e-05, "loss": 0.237, "step": 5502 }, { "epoch": 0.32, "grad_norm": 0.3572405700250666, "learning_rate": 1.600448499672477e-05, "loss": 0.2023, "step": 5503 }, { "epoch": 0.32, "grad_norm": 0.7519951161732132, "learning_rate": 1.600299678901448e-05, "loss": 0.4123, "step": 5504 }, { "epoch": 0.32, "grad_norm": 0.8481384242186841, "learning_rate": 1.6001508373420666e-05, "loss": 0.609, "step": 5505 }, { "epoch": 0.32, "grad_norm": 0.38602966781075637, "learning_rate": 1.6000019749994882e-05, "loss": 0.3056, "step": 5506 }, { "epoch": 0.32, "grad_norm": 0.3067806475149186, "learning_rate": 1.5998530918788667e-05, "loss": 0.254, "step": 5507 }, { "epoch": 0.32, "grad_norm": 0.28563113629324893, "learning_rate": 1.5997041879853585e-05, "loss": 0.1853, "step": 5508 }, { "epoch": 0.32, "grad_norm": 0.560627051750689, "learning_rate": 1.5995552633241206e-05, "loss": 0.3737, "step": 5509 }, { "epoch": 0.32, "grad_norm": 0.31902956156250195, "learning_rate": 1.599406317900309e-05, "loss": 0.2693, "step": 5510 }, { "epoch": 0.32, "grad_norm": 1.0324425132045223, "learning_rate": 1.5992573517190826e-05, "loss": 0.7321, "step": 5511 }, { "epoch": 0.32, "grad_norm": 0.5816127897195071, "learning_rate": 1.5991083647856003e-05, "loss": 0.3399, "step": 5512 }, { "epoch": 0.32, "grad_norm": 0.3638464303346846, "learning_rate": 1.5989593571050207e-05, "loss": 0.2275, "step": 5513 }, { "epoch": 0.32, "grad_norm": 0.2997715451537564, "learning_rate": 1.5988103286825043e-05, "loss": 0.2977, "step": 5514 }, { "epoch": 0.32, "grad_norm": 0.3882699857377122, "learning_rate": 1.5986612795232122e-05, "loss": 0.3314, "step": 5515 }, { "epoch": 0.32, "grad_norm": 0.4607329976592203, "learning_rate": 1.5985122096323053e-05, "loss": 0.3253, "step": 5516 }, { "epoch": 0.32, "grad_norm": 0.42979471017625737, "learning_rate": 1.5983631190149466e-05, "loss": 0.3066, "step": 5517 }, { "epoch": 0.32, "grad_norm": 0.3052137957577575, "learning_rate": 1.5982140076762988e-05, "loss": 0.2657, "step": 5518 }, { "epoch": 0.32, "grad_norm": 0.4134905162494446, "learning_rate": 1.5980648756215256e-05, "loss": 0.3369, "step": 5519 }, { "epoch": 0.32, "grad_norm": 0.5637451037693646, "learning_rate": 1.597915722855792e-05, "loss": 0.3639, "step": 5520 }, { "epoch": 0.32, "grad_norm": 0.37421259673932755, "learning_rate": 1.597766549384262e-05, "loss": 0.2469, "step": 5521 }, { "epoch": 0.32, "grad_norm": 0.3058915736850958, "learning_rate": 1.5976173552121023e-05, "loss": 0.2922, "step": 5522 }, { "epoch": 0.32, "grad_norm": 0.2841295684463912, "learning_rate": 1.597468140344479e-05, "loss": 0.1977, "step": 5523 }, { "epoch": 0.32, "grad_norm": 0.6162024062628615, "learning_rate": 1.59731890478656e-05, "loss": 0.2815, "step": 5524 }, { "epoch": 0.32, "grad_norm": 0.4094893668831604, "learning_rate": 1.5971696485435128e-05, "loss": 0.3378, "step": 5525 }, { "epoch": 0.32, "grad_norm": 0.40588421773351036, "learning_rate": 1.5970203716205066e-05, "loss": 0.2895, "step": 5526 }, { "epoch": 0.32, "grad_norm": 0.6938594055918771, "learning_rate": 1.5968710740227106e-05, "loss": 0.3855, "step": 5527 }, { "epoch": 0.32, "grad_norm": 0.4116993177994943, "learning_rate": 1.5967217557552944e-05, "loss": 0.329, "step": 5528 }, { "epoch": 0.32, "grad_norm": 0.22779602155734383, "learning_rate": 1.5965724168234304e-05, "loss": 0.086, "step": 5529 }, { "epoch": 0.32, "grad_norm": 0.3500048899822258, "learning_rate": 1.5964230572322884e-05, "loss": 0.2697, "step": 5530 }, { "epoch": 0.32, "grad_norm": 0.4157625173818804, "learning_rate": 1.596273676987042e-05, "loss": 0.3202, "step": 5531 }, { "epoch": 0.32, "grad_norm": 0.8107650750102577, "learning_rate": 1.596124276092864e-05, "loss": 0.4456, "step": 5532 }, { "epoch": 0.32, "grad_norm": 0.35678553696656035, "learning_rate": 1.595974854554928e-05, "loss": 0.2202, "step": 5533 }, { "epoch": 0.32, "grad_norm": 0.40454325538013913, "learning_rate": 1.5958254123784077e-05, "loss": 0.3206, "step": 5534 }, { "epoch": 0.32, "grad_norm": 0.2564749702657718, "learning_rate": 1.5956759495684796e-05, "loss": 0.2145, "step": 5535 }, { "epoch": 0.32, "grad_norm": 0.37351136197316315, "learning_rate": 1.595526466130319e-05, "loss": 0.1787, "step": 5536 }, { "epoch": 0.32, "grad_norm": 0.5263171270492955, "learning_rate": 1.595376962069102e-05, "loss": 0.3519, "step": 5537 }, { "epoch": 0.32, "grad_norm": 0.42044904764267466, "learning_rate": 1.5952274373900067e-05, "loss": 0.3532, "step": 5538 }, { "epoch": 0.32, "grad_norm": 0.45297528673077875, "learning_rate": 1.5950778920982108e-05, "loss": 0.2163, "step": 5539 }, { "epoch": 0.32, "grad_norm": 0.3791943745784225, "learning_rate": 1.5949283261988934e-05, "loss": 0.3221, "step": 5540 }, { "epoch": 0.32, "grad_norm": 0.2635293913337198, "learning_rate": 1.5947787396972332e-05, "loss": 0.2126, "step": 5541 }, { "epoch": 0.32, "grad_norm": 0.39755428740194615, "learning_rate": 1.5946291325984108e-05, "loss": 0.2607, "step": 5542 }, { "epoch": 0.32, "grad_norm": 0.3970521445893526, "learning_rate": 1.5944795049076072e-05, "loss": 0.3215, "step": 5543 }, { "epoch": 0.32, "grad_norm": 0.6915226148979662, "learning_rate": 1.594329856630004e-05, "loss": 0.5286, "step": 5544 }, { "epoch": 0.32, "grad_norm": 1.0272727002837614, "learning_rate": 1.5941801877707835e-05, "loss": 0.5231, "step": 5545 }, { "epoch": 0.32, "grad_norm": 0.2711694648544462, "learning_rate": 1.594030498335129e-05, "loss": 0.2551, "step": 5546 }, { "epoch": 0.32, "grad_norm": 1.0522809594213387, "learning_rate": 1.5938807883282234e-05, "loss": 0.6938, "step": 5547 }, { "epoch": 0.32, "grad_norm": 0.2780677764777073, "learning_rate": 1.5937310577552517e-05, "loss": 0.2152, "step": 5548 }, { "epoch": 0.32, "grad_norm": 0.3764746606296057, "learning_rate": 1.5935813066213988e-05, "loss": 0.254, "step": 5549 }, { "epoch": 0.32, "grad_norm": 0.39611418844585977, "learning_rate": 1.593431534931851e-05, "loss": 0.345, "step": 5550 }, { "epoch": 0.32, "grad_norm": 0.6565438887638069, "learning_rate": 1.5932817426917945e-05, "loss": 0.4974, "step": 5551 }, { "epoch": 0.32, "grad_norm": 0.3633914159676232, "learning_rate": 1.593131929906417e-05, "loss": 0.2314, "step": 5552 }, { "epoch": 0.32, "grad_norm": 0.31526102547006846, "learning_rate": 1.5929820965809063e-05, "loss": 0.2752, "step": 5553 }, { "epoch": 0.32, "grad_norm": 0.3736946671960914, "learning_rate": 1.592832242720451e-05, "loss": 0.3219, "step": 5554 }, { "epoch": 0.32, "grad_norm": 0.3615427063133629, "learning_rate": 1.5926823683302404e-05, "loss": 0.2956, "step": 5555 }, { "epoch": 0.32, "grad_norm": 0.40873885213362576, "learning_rate": 1.5925324734154654e-05, "loss": 0.2416, "step": 5556 }, { "epoch": 0.32, "grad_norm": 0.5019426658331753, "learning_rate": 1.5923825579813158e-05, "loss": 0.4355, "step": 5557 }, { "epoch": 0.32, "grad_norm": 0.3081784728369049, "learning_rate": 1.592232622032984e-05, "loss": 0.2679, "step": 5558 }, { "epoch": 0.32, "grad_norm": 0.581079170192961, "learning_rate": 1.5920826655756617e-05, "loss": 0.4, "step": 5559 }, { "epoch": 0.32, "grad_norm": 0.5650872586904916, "learning_rate": 1.591932688614542e-05, "loss": 0.4542, "step": 5560 }, { "epoch": 0.32, "grad_norm": 0.27258563527685675, "learning_rate": 1.5917826911548194e-05, "loss": 0.2233, "step": 5561 }, { "epoch": 0.32, "grad_norm": 0.2959203113731466, "learning_rate": 1.591632673201687e-05, "loss": 0.2158, "step": 5562 }, { "epoch": 0.32, "grad_norm": 1.2341214992158127, "learning_rate": 1.5914826347603412e-05, "loss": 0.7517, "step": 5563 }, { "epoch": 0.32, "grad_norm": 0.33028417983241837, "learning_rate": 1.591332575835977e-05, "loss": 0.2764, "step": 5564 }, { "epoch": 0.32, "grad_norm": 0.46893872523644764, "learning_rate": 1.591182496433791e-05, "loss": 0.3126, "step": 5565 }, { "epoch": 0.32, "grad_norm": 0.5536360678259141, "learning_rate": 1.5910323965589803e-05, "loss": 0.3726, "step": 5566 }, { "epoch": 0.32, "grad_norm": 0.3735817111568126, "learning_rate": 1.5908822762167435e-05, "loss": 0.3044, "step": 5567 }, { "epoch": 0.32, "grad_norm": 0.35011633102273504, "learning_rate": 1.5907321354122788e-05, "loss": 0.2051, "step": 5568 }, { "epoch": 0.32, "grad_norm": 0.3548591682635962, "learning_rate": 1.5905819741507856e-05, "loss": 0.2592, "step": 5569 }, { "epoch": 0.32, "grad_norm": 0.3581690176525001, "learning_rate": 1.590431792437464e-05, "loss": 0.3012, "step": 5570 }, { "epoch": 0.32, "grad_norm": 0.7579819660734486, "learning_rate": 1.590281590277515e-05, "loss": 0.5101, "step": 5571 }, { "epoch": 0.32, "grad_norm": 0.4774255317314944, "learning_rate": 1.5901313676761397e-05, "loss": 0.3312, "step": 5572 }, { "epoch": 0.32, "grad_norm": 0.3935810128176364, "learning_rate": 1.5899811246385404e-05, "loss": 0.2783, "step": 5573 }, { "epoch": 0.32, "grad_norm": 0.2434832696741011, "learning_rate": 1.5898308611699204e-05, "loss": 0.2066, "step": 5574 }, { "epoch": 0.32, "grad_norm": 0.838098712521921, "learning_rate": 1.589680577275483e-05, "loss": 0.4197, "step": 5575 }, { "epoch": 0.32, "grad_norm": 0.38018436720857546, "learning_rate": 1.5895302729604323e-05, "loss": 0.274, "step": 5576 }, { "epoch": 0.32, "grad_norm": 0.5879533204990789, "learning_rate": 1.589379948229974e-05, "loss": 0.4019, "step": 5577 }, { "epoch": 0.32, "grad_norm": 0.469626009070046, "learning_rate": 1.5892296030893134e-05, "loss": 0.2904, "step": 5578 }, { "epoch": 0.32, "grad_norm": 0.3569116943337263, "learning_rate": 1.5890792375436568e-05, "loss": 0.2826, "step": 5579 }, { "epoch": 0.32, "grad_norm": 0.30781010648834556, "learning_rate": 1.5889288515982118e-05, "loss": 0.214, "step": 5580 }, { "epoch": 0.32, "grad_norm": 0.4452871745168325, "learning_rate": 1.5887784452581857e-05, "loss": 0.3477, "step": 5581 }, { "epoch": 0.32, "grad_norm": 0.3423890676840111, "learning_rate": 1.5886280185287874e-05, "loss": 0.2258, "step": 5582 }, { "epoch": 0.32, "grad_norm": 0.9397225493977781, "learning_rate": 1.588477571415226e-05, "loss": 0.4489, "step": 5583 }, { "epoch": 0.32, "grad_norm": 0.8992191750862868, "learning_rate": 1.588327103922712e-05, "loss": 0.5669, "step": 5584 }, { "epoch": 0.32, "grad_norm": 0.34519578977057513, "learning_rate": 1.5881766160564553e-05, "loss": 0.2008, "step": 5585 }, { "epoch": 0.32, "grad_norm": 0.2613946128658462, "learning_rate": 1.588026107821668e-05, "loss": 0.2106, "step": 5586 }, { "epoch": 0.32, "grad_norm": 1.1043781598317808, "learning_rate": 1.5878755792235616e-05, "loss": 0.5675, "step": 5587 }, { "epoch": 0.32, "grad_norm": 0.3429172657775994, "learning_rate": 1.5877250302673493e-05, "loss": 0.2068, "step": 5588 }, { "epoch": 0.32, "grad_norm": 0.6522423001767897, "learning_rate": 1.5875744609582444e-05, "loss": 0.3912, "step": 5589 }, { "epoch": 0.32, "grad_norm": 0.4731252524113648, "learning_rate": 1.587423871301461e-05, "loss": 0.3802, "step": 5590 }, { "epoch": 0.32, "grad_norm": 0.33128379858329554, "learning_rate": 1.5872732613022147e-05, "loss": 0.2088, "step": 5591 }, { "epoch": 0.32, "grad_norm": 0.2749688226198136, "learning_rate": 1.5871226309657203e-05, "loss": 0.1898, "step": 5592 }, { "epoch": 0.32, "grad_norm": 0.449249676522092, "learning_rate": 1.5869719802971947e-05, "loss": 0.3547, "step": 5593 }, { "epoch": 0.32, "grad_norm": 0.545477058578212, "learning_rate": 1.5868213093018543e-05, "loss": 0.3069, "step": 5594 }, { "epoch": 0.32, "grad_norm": 0.7375781226958218, "learning_rate": 1.5866706179849172e-05, "loss": 0.3483, "step": 5595 }, { "epoch": 0.32, "grad_norm": 1.0617075868345798, "learning_rate": 1.586519906351602e-05, "loss": 0.6041, "step": 5596 }, { "epoch": 0.32, "grad_norm": 0.3294184144488115, "learning_rate": 1.586369174407128e-05, "loss": 0.2779, "step": 5597 }, { "epoch": 0.32, "grad_norm": 0.2594062832915638, "learning_rate": 1.5862184221567144e-05, "loss": 0.1825, "step": 5598 }, { "epoch": 0.32, "grad_norm": 0.9733876205705159, "learning_rate": 1.586067649605582e-05, "loss": 0.5383, "step": 5599 }, { "epoch": 0.32, "grad_norm": 0.4270131446780353, "learning_rate": 1.585916856758952e-05, "loss": 0.2909, "step": 5600 }, { "epoch": 0.32, "grad_norm": 0.5168643875206522, "learning_rate": 1.5857660436220466e-05, "loss": 0.315, "step": 5601 }, { "epoch": 0.32, "grad_norm": 0.5136780415745466, "learning_rate": 1.5856152102000878e-05, "loss": 0.3597, "step": 5602 }, { "epoch": 0.32, "grad_norm": 0.3472687358366042, "learning_rate": 1.5854643564983e-05, "loss": 0.2772, "step": 5603 }, { "epoch": 0.32, "grad_norm": 0.21984666591055935, "learning_rate": 1.5853134825219066e-05, "loss": 0.1203, "step": 5604 }, { "epoch": 0.32, "grad_norm": 0.5955025245870108, "learning_rate": 1.5851625882761326e-05, "loss": 0.3752, "step": 5605 }, { "epoch": 0.32, "grad_norm": 0.3950271910878111, "learning_rate": 1.585011673766203e-05, "loss": 0.2838, "step": 5606 }, { "epoch": 0.32, "grad_norm": 0.6397240293769488, "learning_rate": 1.5848607389973446e-05, "loss": 0.4141, "step": 5607 }, { "epoch": 0.32, "grad_norm": 0.4432763685637882, "learning_rate": 1.584709783974784e-05, "loss": 0.2897, "step": 5608 }, { "epoch": 0.32, "grad_norm": 0.4240833841777455, "learning_rate": 1.5845588087037484e-05, "loss": 0.2904, "step": 5609 }, { "epoch": 0.32, "grad_norm": 0.43798524592763793, "learning_rate": 1.5844078131894668e-05, "loss": 0.3254, "step": 5610 }, { "epoch": 0.32, "grad_norm": 0.44403383059135365, "learning_rate": 1.5842567974371672e-05, "loss": 0.2548, "step": 5611 }, { "epoch": 0.32, "grad_norm": 0.37943493120580635, "learning_rate": 1.5841057614520803e-05, "loss": 0.3146, "step": 5612 }, { "epoch": 0.32, "grad_norm": 0.3330746925906659, "learning_rate": 1.5839547052394356e-05, "loss": 0.3125, "step": 5613 }, { "epoch": 0.32, "grad_norm": 0.36957538435382176, "learning_rate": 1.583803628804465e-05, "loss": 0.1807, "step": 5614 }, { "epoch": 0.32, "grad_norm": 0.3799639377524021, "learning_rate": 1.5836525321523998e-05, "loss": 0.2976, "step": 5615 }, { "epoch": 0.32, "grad_norm": 0.5407995328754999, "learning_rate": 1.5835014152884722e-05, "loss": 0.371, "step": 5616 }, { "epoch": 0.32, "grad_norm": 0.4602781375274827, "learning_rate": 1.5833502782179157e-05, "loss": 0.3056, "step": 5617 }, { "epoch": 0.32, "grad_norm": 0.2703581818132286, "learning_rate": 1.5831991209459646e-05, "loss": 0.2294, "step": 5618 }, { "epoch": 0.32, "grad_norm": 1.3941351735792076, "learning_rate": 1.583047943477853e-05, "loss": 0.8033, "step": 5619 }, { "epoch": 0.32, "grad_norm": 0.4269407961412312, "learning_rate": 1.5828967458188155e-05, "loss": 0.2612, "step": 5620 }, { "epoch": 0.32, "grad_norm": 0.32749877211677897, "learning_rate": 1.5827455279740892e-05, "loss": 0.2406, "step": 5621 }, { "epoch": 0.32, "grad_norm": 0.7597682432080118, "learning_rate": 1.5825942899489103e-05, "loss": 0.4945, "step": 5622 }, { "epoch": 0.32, "grad_norm": 0.6648356899162475, "learning_rate": 1.5824430317485163e-05, "loss": 0.4611, "step": 5623 }, { "epoch": 0.32, "grad_norm": 0.3576858149221899, "learning_rate": 1.582291753378145e-05, "loss": 0.1644, "step": 5624 }, { "epoch": 0.32, "grad_norm": 0.30960874021564383, "learning_rate": 1.5821404548430352e-05, "loss": 0.2993, "step": 5625 }, { "epoch": 0.32, "grad_norm": 0.2692899639231665, "learning_rate": 1.5819891361484266e-05, "loss": 0.2074, "step": 5626 }, { "epoch": 0.32, "grad_norm": 0.362044309838756, "learning_rate": 1.5818377972995594e-05, "loss": 0.2007, "step": 5627 }, { "epoch": 0.32, "grad_norm": 0.5932912412651169, "learning_rate": 1.581686438301674e-05, "loss": 0.3933, "step": 5628 }, { "epoch": 0.32, "grad_norm": 0.37069755753292, "learning_rate": 1.5815350591600124e-05, "loss": 0.3382, "step": 5629 }, { "epoch": 0.32, "grad_norm": 0.30830834981343785, "learning_rate": 1.5813836598798168e-05, "loss": 0.1911, "step": 5630 }, { "epoch": 0.32, "grad_norm": 0.36059298843687776, "learning_rate": 1.5812322404663304e-05, "loss": 0.2967, "step": 5631 }, { "epoch": 0.32, "grad_norm": 0.385845649979882, "learning_rate": 1.581080800924796e-05, "loss": 0.1999, "step": 5632 }, { "epoch": 0.32, "grad_norm": 0.33126056841829, "learning_rate": 1.5809293412604584e-05, "loss": 0.299, "step": 5633 }, { "epoch": 0.32, "grad_norm": 0.386058427132325, "learning_rate": 1.580777861478563e-05, "loss": 0.3071, "step": 5634 }, { "epoch": 0.32, "grad_norm": 0.7101238664226756, "learning_rate": 1.580626361584355e-05, "loss": 0.5124, "step": 5635 }, { "epoch": 0.32, "grad_norm": 0.589818500555523, "learning_rate": 1.5804748415830814e-05, "loss": 0.4467, "step": 5636 }, { "epoch": 0.32, "grad_norm": 0.2932950338270886, "learning_rate": 1.5803233014799887e-05, "loss": 0.2563, "step": 5637 }, { "epoch": 0.32, "grad_norm": 0.2636118456350962, "learning_rate": 1.5801717412803246e-05, "loss": 0.2042, "step": 5638 }, { "epoch": 0.32, "grad_norm": 0.3969761989987564, "learning_rate": 1.580020160989339e-05, "loss": 0.3166, "step": 5639 }, { "epoch": 0.32, "grad_norm": 0.5113430169982263, "learning_rate": 1.5798685606122795e-05, "loss": 0.3068, "step": 5640 }, { "epoch": 0.32, "grad_norm": 0.40055815261009947, "learning_rate": 1.579716940154397e-05, "loss": 0.3314, "step": 5641 }, { "epoch": 0.32, "grad_norm": 0.7927953903429585, "learning_rate": 1.5795652996209416e-05, "loss": 0.5219, "step": 5642 }, { "epoch": 0.32, "grad_norm": 0.37086620699465855, "learning_rate": 1.579413639017165e-05, "loss": 0.3161, "step": 5643 }, { "epoch": 0.32, "grad_norm": 0.24236166483538968, "learning_rate": 1.5792619583483183e-05, "loss": 0.1854, "step": 5644 }, { "epoch": 0.32, "grad_norm": 0.6014945831118405, "learning_rate": 1.5791102576196555e-05, "loss": 0.4099, "step": 5645 }, { "epoch": 0.32, "grad_norm": 0.3698987883509682, "learning_rate": 1.5789585368364296e-05, "loss": 0.3163, "step": 5646 }, { "epoch": 0.32, "grad_norm": 0.7593189596779604, "learning_rate": 1.5788067960038942e-05, "loss": 0.3718, "step": 5647 }, { "epoch": 0.32, "grad_norm": 0.5227360365319026, "learning_rate": 1.5786550351273043e-05, "loss": 0.3476, "step": 5648 }, { "epoch": 0.32, "grad_norm": 0.34840131645436817, "learning_rate": 1.5785032542119155e-05, "loss": 0.3044, "step": 5649 }, { "epoch": 0.32, "grad_norm": 0.3537908681190076, "learning_rate": 1.578351453262984e-05, "loss": 0.1915, "step": 5650 }, { "epoch": 0.32, "grad_norm": 0.5534665540651362, "learning_rate": 1.578199632285766e-05, "loss": 0.3911, "step": 5651 }, { "epoch": 0.32, "grad_norm": 0.3296603796034221, "learning_rate": 1.57804779128552e-05, "loss": 0.2536, "step": 5652 }, { "epoch": 0.32, "grad_norm": 0.36719045075931117, "learning_rate": 1.577895930267504e-05, "loss": 0.255, "step": 5653 }, { "epoch": 0.32, "grad_norm": 0.5549103218709931, "learning_rate": 1.5777440492369764e-05, "loss": 0.4179, "step": 5654 }, { "epoch": 0.32, "grad_norm": 0.4099773567180406, "learning_rate": 1.5775921481991976e-05, "loss": 0.3489, "step": 5655 }, { "epoch": 0.32, "grad_norm": 0.9487980274852443, "learning_rate": 1.5774402271594272e-05, "loss": 0.4496, "step": 5656 }, { "epoch": 0.33, "grad_norm": 0.30146272872090246, "learning_rate": 1.577288286122927e-05, "loss": 0.2396, "step": 5657 }, { "epoch": 0.33, "grad_norm": 0.32480569594008457, "learning_rate": 1.5771363250949582e-05, "loss": 0.246, "step": 5658 }, { "epoch": 0.33, "grad_norm": 0.3711344965945374, "learning_rate": 1.5769843440807828e-05, "loss": 0.2793, "step": 5659 }, { "epoch": 0.33, "grad_norm": 0.3992119007745314, "learning_rate": 1.5768323430856647e-05, "loss": 0.2608, "step": 5660 }, { "epoch": 0.33, "grad_norm": 0.335697923487834, "learning_rate": 1.5766803221148676e-05, "loss": 0.2743, "step": 5661 }, { "epoch": 0.33, "grad_norm": 1.1159080031165294, "learning_rate": 1.576528281173655e-05, "loss": 0.4803, "step": 5662 }, { "epoch": 0.33, "grad_norm": 0.24838037661375026, "learning_rate": 1.5763762202672933e-05, "loss": 0.1415, "step": 5663 }, { "epoch": 0.33, "grad_norm": 0.30420554106490244, "learning_rate": 1.576224139401048e-05, "loss": 0.2233, "step": 5664 }, { "epoch": 0.33, "grad_norm": 0.422273654707413, "learning_rate": 1.5760720385801855e-05, "loss": 0.3285, "step": 5665 }, { "epoch": 0.33, "grad_norm": 0.6169292780868955, "learning_rate": 1.575919917809973e-05, "loss": 0.2722, "step": 5666 }, { "epoch": 0.33, "grad_norm": 0.4049489150331608, "learning_rate": 1.5757677770956785e-05, "loss": 0.2992, "step": 5667 }, { "epoch": 0.33, "grad_norm": 0.8471162417110218, "learning_rate": 1.5756156164425703e-05, "loss": 0.5039, "step": 5668 }, { "epoch": 0.33, "grad_norm": 0.4114900323796081, "learning_rate": 1.5754634358559187e-05, "loss": 0.2948, "step": 5669 }, { "epoch": 0.33, "grad_norm": 0.24786075406450575, "learning_rate": 1.5753112353409928e-05, "loss": 0.1681, "step": 5670 }, { "epoch": 0.33, "grad_norm": 1.1452188003836672, "learning_rate": 1.5751590149030632e-05, "loss": 0.7431, "step": 5671 }, { "epoch": 0.33, "grad_norm": 0.4562773003590891, "learning_rate": 1.575006774547402e-05, "loss": 0.3109, "step": 5672 }, { "epoch": 0.33, "grad_norm": 0.434282672437878, "learning_rate": 1.5748545142792807e-05, "loss": 0.2306, "step": 5673 }, { "epoch": 0.33, "grad_norm": 1.0737371125385688, "learning_rate": 1.5747022341039727e-05, "loss": 0.4466, "step": 5674 }, { "epoch": 0.33, "grad_norm": 0.719146996338594, "learning_rate": 1.5745499340267508e-05, "loss": 0.4678, "step": 5675 }, { "epoch": 0.33, "grad_norm": 0.314472948884982, "learning_rate": 1.5743976140528893e-05, "loss": 0.1734, "step": 5676 }, { "epoch": 0.33, "grad_norm": 0.40527113405825066, "learning_rate": 1.5742452741876632e-05, "loss": 0.2722, "step": 5677 }, { "epoch": 0.33, "grad_norm": 0.7245521666416738, "learning_rate": 1.574092914436348e-05, "loss": 0.4212, "step": 5678 }, { "epoch": 0.33, "grad_norm": 0.4596505412358331, "learning_rate": 1.5739405348042197e-05, "loss": 0.2702, "step": 5679 }, { "epoch": 0.33, "grad_norm": 0.42284574176881146, "learning_rate": 1.5737881352965556e-05, "loss": 0.34, "step": 5680 }, { "epoch": 0.33, "grad_norm": 1.7840092327565245, "learning_rate": 1.573635715918633e-05, "loss": 0.7355, "step": 5681 }, { "epoch": 0.33, "grad_norm": 0.3697786735178063, "learning_rate": 1.5734832766757302e-05, "loss": 0.2946, "step": 5682 }, { "epoch": 0.33, "grad_norm": 0.26905521183914954, "learning_rate": 1.573330817573126e-05, "loss": 0.1296, "step": 5683 }, { "epoch": 0.33, "grad_norm": 0.3968443457830917, "learning_rate": 1.5731783386161007e-05, "loss": 0.3475, "step": 5684 }, { "epoch": 0.33, "grad_norm": 0.3421390360738499, "learning_rate": 1.5730258398099335e-05, "loss": 0.2744, "step": 5685 }, { "epoch": 0.33, "grad_norm": 0.7376461566219767, "learning_rate": 1.5728733211599067e-05, "loss": 0.3775, "step": 5686 }, { "epoch": 0.33, "grad_norm": 1.2203592124029954, "learning_rate": 1.572720782671301e-05, "loss": 0.6442, "step": 5687 }, { "epoch": 0.33, "grad_norm": 0.3257697053227165, "learning_rate": 1.5725682243493995e-05, "loss": 0.2935, "step": 5688 }, { "epoch": 0.33, "grad_norm": 0.24273051299883028, "learning_rate": 1.572415646199485e-05, "loss": 0.1582, "step": 5689 }, { "epoch": 0.33, "grad_norm": 0.7445388041307233, "learning_rate": 1.5722630482268413e-05, "loss": 0.4049, "step": 5690 }, { "epoch": 0.33, "grad_norm": 0.3916307018462784, "learning_rate": 1.5721104304367526e-05, "loss": 0.318, "step": 5691 }, { "epoch": 0.33, "grad_norm": 0.3538495633272354, "learning_rate": 1.5719577928345045e-05, "loss": 0.2772, "step": 5692 }, { "epoch": 0.33, "grad_norm": 0.7364894330489864, "learning_rate": 1.5718051354253828e-05, "loss": 0.4585, "step": 5693 }, { "epoch": 0.33, "grad_norm": 0.4082152277198964, "learning_rate": 1.5716524582146734e-05, "loss": 0.304, "step": 5694 }, { "epoch": 0.33, "grad_norm": 0.31484704632365135, "learning_rate": 1.5714997612076643e-05, "loss": 0.232, "step": 5695 }, { "epoch": 0.33, "grad_norm": 0.39016338699647013, "learning_rate": 1.571347044409643e-05, "loss": 0.3046, "step": 5696 }, { "epoch": 0.33, "grad_norm": 0.3233481171244161, "learning_rate": 1.571194307825898e-05, "loss": 0.2492, "step": 5697 }, { "epoch": 0.33, "grad_norm": 1.092219467523616, "learning_rate": 1.5710415514617187e-05, "loss": 0.7269, "step": 5698 }, { "epoch": 0.33, "grad_norm": 1.0398976979337753, "learning_rate": 1.5708887753223953e-05, "loss": 0.2765, "step": 5699 }, { "epoch": 0.33, "grad_norm": 0.29904995978400906, "learning_rate": 1.5707359794132178e-05, "loss": 0.2535, "step": 5700 }, { "epoch": 0.33, "grad_norm": 0.36950212436020996, "learning_rate": 1.5705831637394783e-05, "loss": 0.3008, "step": 5701 }, { "epoch": 0.33, "grad_norm": 0.5786942167807272, "learning_rate": 1.570430328306468e-05, "loss": 0.3812, "step": 5702 }, { "epoch": 0.33, "grad_norm": 0.39500817970292046, "learning_rate": 1.5702774731194802e-05, "loss": 0.3126, "step": 5703 }, { "epoch": 0.33, "grad_norm": 0.3453869948145563, "learning_rate": 1.570124598183808e-05, "loss": 0.2725, "step": 5704 }, { "epoch": 0.33, "grad_norm": 0.4214946705124231, "learning_rate": 1.569971703504745e-05, "loss": 0.2872, "step": 5705 }, { "epoch": 0.33, "grad_norm": 0.3257990453502148, "learning_rate": 1.5698187890875867e-05, "loss": 0.2885, "step": 5706 }, { "epoch": 0.33, "grad_norm": 0.6989314635364635, "learning_rate": 1.5696658549376286e-05, "loss": 0.5029, "step": 5707 }, { "epoch": 0.33, "grad_norm": 0.34822630078350825, "learning_rate": 1.569512901060166e-05, "loss": 0.3442, "step": 5708 }, { "epoch": 0.33, "grad_norm": 0.34505897851320166, "learning_rate": 1.569359927460496e-05, "loss": 0.1921, "step": 5709 }, { "epoch": 0.33, "grad_norm": 0.2695284101966276, "learning_rate": 1.5692069341439164e-05, "loss": 0.2228, "step": 5710 }, { "epoch": 0.33, "grad_norm": 0.5436723132485777, "learning_rate": 1.5690539211157255e-05, "loss": 0.3901, "step": 5711 }, { "epoch": 0.33, "grad_norm": 0.33269868149267295, "learning_rate": 1.5689008883812212e-05, "loss": 0.2106, "step": 5712 }, { "epoch": 0.33, "grad_norm": 0.5032767085391571, "learning_rate": 1.568747835945704e-05, "loss": 0.3967, "step": 5713 }, { "epoch": 0.33, "grad_norm": 0.7674497920759249, "learning_rate": 1.5685947638144736e-05, "loss": 0.6305, "step": 5714 }, { "epoch": 0.33, "grad_norm": 0.33746113836252484, "learning_rate": 1.5684416719928314e-05, "loss": 0.2059, "step": 5715 }, { "epoch": 0.33, "grad_norm": 0.27730558315725434, "learning_rate": 1.568288560486078e-05, "loss": 0.2342, "step": 5716 }, { "epoch": 0.33, "grad_norm": 0.8937613239506347, "learning_rate": 1.5681354292995164e-05, "loss": 0.5943, "step": 5717 }, { "epoch": 0.33, "grad_norm": 0.32282729775940155, "learning_rate": 1.5679822784384492e-05, "loss": 0.213, "step": 5718 }, { "epoch": 0.33, "grad_norm": 1.0217359415141611, "learning_rate": 1.56782910790818e-05, "loss": 0.4414, "step": 5719 }, { "epoch": 0.33, "grad_norm": 0.37906399149967873, "learning_rate": 1.5676759177140132e-05, "loss": 0.3148, "step": 5720 }, { "epoch": 0.33, "grad_norm": 0.32548021526366716, "learning_rate": 1.567522707861254e-05, "loss": 0.2696, "step": 5721 }, { "epoch": 0.33, "grad_norm": 0.27672622569811445, "learning_rate": 1.567369478355208e-05, "loss": 0.1349, "step": 5722 }, { "epoch": 0.33, "grad_norm": 0.34813866318867676, "learning_rate": 1.567216229201181e-05, "loss": 0.2762, "step": 5723 }, { "epoch": 0.33, "grad_norm": 0.31379746680097137, "learning_rate": 1.5670629604044804e-05, "loss": 0.2616, "step": 5724 }, { "epoch": 0.33, "grad_norm": 0.8743307806116454, "learning_rate": 1.566909671970414e-05, "loss": 0.3819, "step": 5725 }, { "epoch": 0.33, "grad_norm": 0.9740313278256991, "learning_rate": 1.5667563639042904e-05, "loss": 0.6429, "step": 5726 }, { "epoch": 0.33, "grad_norm": 0.3849691561398392, "learning_rate": 1.5666030362114175e-05, "loss": 0.2787, "step": 5727 }, { "epoch": 0.33, "grad_norm": 0.29480543187124647, "learning_rate": 1.566449688897106e-05, "loss": 0.2065, "step": 5728 }, { "epoch": 0.33, "grad_norm": 0.5564381113672089, "learning_rate": 1.5662963219666666e-05, "loss": 0.3209, "step": 5729 }, { "epoch": 0.33, "grad_norm": 0.6185522440055002, "learning_rate": 1.5661429354254096e-05, "loss": 0.3824, "step": 5730 }, { "epoch": 0.33, "grad_norm": 0.40049158281522773, "learning_rate": 1.5659895292786474e-05, "loss": 0.3183, "step": 5731 }, { "epoch": 0.33, "grad_norm": 0.4038008265047527, "learning_rate": 1.565836103531692e-05, "loss": 0.3016, "step": 5732 }, { "epoch": 0.33, "grad_norm": 0.3987251335173393, "learning_rate": 1.5656826581898563e-05, "loss": 0.2744, "step": 5733 }, { "epoch": 0.33, "grad_norm": 0.34573611183952435, "learning_rate": 1.565529193258455e-05, "loss": 0.2572, "step": 5734 }, { "epoch": 0.33, "grad_norm": 0.392359831338795, "learning_rate": 1.5653757087428015e-05, "loss": 0.2155, "step": 5735 }, { "epoch": 0.33, "grad_norm": 0.29979714182243516, "learning_rate": 1.5652222046482118e-05, "loss": 0.288, "step": 5736 }, { "epoch": 0.33, "grad_norm": 0.5553731791527236, "learning_rate": 1.5650686809800016e-05, "loss": 0.388, "step": 5737 }, { "epoch": 0.33, "grad_norm": 0.935348590466938, "learning_rate": 1.564915137743487e-05, "loss": 0.5044, "step": 5738 }, { "epoch": 0.33, "grad_norm": 0.42901141158263, "learning_rate": 1.5647615749439858e-05, "loss": 0.302, "step": 5739 }, { "epoch": 0.33, "grad_norm": 0.3289608155747018, "learning_rate": 1.5646079925868152e-05, "loss": 0.2922, "step": 5740 }, { "epoch": 0.33, "grad_norm": 0.5271177700015318, "learning_rate": 1.564454390677294e-05, "loss": 0.2541, "step": 5741 }, { "epoch": 0.33, "grad_norm": 0.3476763157178258, "learning_rate": 1.5643007692207422e-05, "loss": 0.3031, "step": 5742 }, { "epoch": 0.33, "grad_norm": 0.4244417596608532, "learning_rate": 1.5641471282224788e-05, "loss": 0.2396, "step": 5743 }, { "epoch": 0.33, "grad_norm": 0.4413347170413946, "learning_rate": 1.563993467687824e-05, "loss": 0.3494, "step": 5744 }, { "epoch": 0.33, "grad_norm": 0.38877102993582696, "learning_rate": 1.5638397876221002e-05, "loss": 0.2112, "step": 5745 }, { "epoch": 0.33, "grad_norm": 0.4975149211929953, "learning_rate": 1.563686088030629e-05, "loss": 0.4052, "step": 5746 }, { "epoch": 0.33, "grad_norm": 0.5640690106411652, "learning_rate": 1.5635323689187323e-05, "loss": 0.3971, "step": 5747 }, { "epoch": 0.33, "grad_norm": 0.2221851855752157, "learning_rate": 1.5633786302917343e-05, "loss": 0.1517, "step": 5748 }, { "epoch": 0.33, "grad_norm": 0.2994501028669219, "learning_rate": 1.5632248721549584e-05, "loss": 0.2548, "step": 5749 }, { "epoch": 0.33, "grad_norm": 0.8917217167943163, "learning_rate": 1.5630710945137293e-05, "loss": 0.6478, "step": 5750 }, { "epoch": 0.33, "grad_norm": 0.31361359549888296, "learning_rate": 1.5629172973733724e-05, "loss": 0.1688, "step": 5751 }, { "epoch": 0.33, "grad_norm": 0.35912544773740485, "learning_rate": 1.562763480739214e-05, "loss": 0.2893, "step": 5752 }, { "epoch": 0.33, "grad_norm": 0.711575311545721, "learning_rate": 1.56260964461658e-05, "loss": 0.4265, "step": 5753 }, { "epoch": 0.33, "grad_norm": 0.2564745105098827, "learning_rate": 1.5624557890107983e-05, "loss": 0.1733, "step": 5754 }, { "epoch": 0.33, "grad_norm": 0.3070550130073271, "learning_rate": 1.5623019139271967e-05, "loss": 0.2762, "step": 5755 }, { "epoch": 0.33, "grad_norm": 0.5205475343496394, "learning_rate": 1.5621480193711046e-05, "loss": 0.427, "step": 5756 }, { "epoch": 0.33, "grad_norm": 0.6040346459958775, "learning_rate": 1.56199410534785e-05, "loss": 0.3935, "step": 5757 }, { "epoch": 0.33, "grad_norm": 0.5393463384567777, "learning_rate": 1.5618401718627644e-05, "loss": 0.2879, "step": 5758 }, { "epoch": 0.33, "grad_norm": 0.4889068289885966, "learning_rate": 1.5616862189211774e-05, "loss": 0.3763, "step": 5759 }, { "epoch": 0.33, "grad_norm": 0.27513668919227896, "learning_rate": 1.561532246528421e-05, "loss": 0.2058, "step": 5760 }, { "epoch": 0.33, "grad_norm": 0.2966718812667392, "learning_rate": 1.5613782546898268e-05, "loss": 0.2096, "step": 5761 }, { "epoch": 0.33, "grad_norm": 0.9302097875569516, "learning_rate": 1.561224243410728e-05, "loss": 0.6853, "step": 5762 }, { "epoch": 0.33, "grad_norm": 0.3034217660889585, "learning_rate": 1.561070212696458e-05, "loss": 0.2851, "step": 5763 }, { "epoch": 0.33, "grad_norm": 0.3943008222386303, "learning_rate": 1.56091616255235e-05, "loss": 0.2914, "step": 5764 }, { "epoch": 0.33, "grad_norm": 0.7197890030209596, "learning_rate": 1.5607620929837398e-05, "loss": 0.4985, "step": 5765 }, { "epoch": 0.33, "grad_norm": 0.3846549791318257, "learning_rate": 1.5606080039959624e-05, "loss": 0.2175, "step": 5766 }, { "epoch": 0.33, "grad_norm": 0.2644764393345778, "learning_rate": 1.5604538955943542e-05, "loss": 0.2057, "step": 5767 }, { "epoch": 0.33, "grad_norm": 0.5406969227154103, "learning_rate": 1.5602997677842515e-05, "loss": 0.4251, "step": 5768 }, { "epoch": 0.33, "grad_norm": 0.6047624630317593, "learning_rate": 1.560145620570992e-05, "loss": 0.4523, "step": 5769 }, { "epoch": 0.33, "grad_norm": 0.355995131291561, "learning_rate": 1.5599914539599135e-05, "loss": 0.314, "step": 5770 }, { "epoch": 0.33, "grad_norm": 0.4253729585784995, "learning_rate": 1.559837267956355e-05, "loss": 0.2929, "step": 5771 }, { "epoch": 0.33, "grad_norm": 0.42743246173578286, "learning_rate": 1.559683062565656e-05, "loss": 0.3047, "step": 5772 }, { "epoch": 0.33, "grad_norm": 0.2515830454917624, "learning_rate": 1.559528837793157e-05, "loss": 0.2118, "step": 5773 }, { "epoch": 0.33, "grad_norm": 0.7515736027063699, "learning_rate": 1.559374593644198e-05, "loss": 0.5555, "step": 5774 }, { "epoch": 0.33, "grad_norm": 0.4013581312855905, "learning_rate": 1.559220330124121e-05, "loss": 0.3164, "step": 5775 }, { "epoch": 0.33, "grad_norm": 0.32182922729328917, "learning_rate": 1.5590660472382682e-05, "loss": 0.3081, "step": 5776 }, { "epoch": 0.33, "grad_norm": 0.853128767784892, "learning_rate": 1.558911744991982e-05, "loss": 0.2577, "step": 5777 }, { "epoch": 0.33, "grad_norm": 0.3250498927602875, "learning_rate": 1.5587574233906063e-05, "loss": 0.2265, "step": 5778 }, { "epoch": 0.33, "grad_norm": 0.30334383260105524, "learning_rate": 1.5586030824394848e-05, "loss": 0.2567, "step": 5779 }, { "epoch": 0.33, "grad_norm": 0.36184984465163766, "learning_rate": 1.5584487221439628e-05, "loss": 0.3016, "step": 5780 }, { "epoch": 0.33, "grad_norm": 0.46282632685077807, "learning_rate": 1.5582943425093856e-05, "loss": 0.3936, "step": 5781 }, { "epoch": 0.33, "grad_norm": 0.40893305897006493, "learning_rate": 1.558139943541099e-05, "loss": 0.3362, "step": 5782 }, { "epoch": 0.33, "grad_norm": 0.3743712544882061, "learning_rate": 1.5579855252444506e-05, "loss": 0.3249, "step": 5783 }, { "epoch": 0.33, "grad_norm": 0.46996244742050114, "learning_rate": 1.557831087624787e-05, "loss": 0.1574, "step": 5784 }, { "epoch": 0.33, "grad_norm": 0.2683925413029673, "learning_rate": 1.5576766306874572e-05, "loss": 0.2282, "step": 5785 }, { "epoch": 0.33, "grad_norm": 0.6926340821108188, "learning_rate": 1.5575221544378094e-05, "loss": 0.4933, "step": 5786 }, { "epoch": 0.33, "grad_norm": 0.31226964320871636, "learning_rate": 1.5573676588811935e-05, "loss": 0.2602, "step": 5787 }, { "epoch": 0.33, "grad_norm": 0.39932555162861216, "learning_rate": 1.5572131440229593e-05, "loss": 0.3255, "step": 5788 }, { "epoch": 0.33, "grad_norm": 0.3892684727048228, "learning_rate": 1.557058609868458e-05, "loss": 0.2558, "step": 5789 }, { "epoch": 0.33, "grad_norm": 0.2385948593781187, "learning_rate": 1.5569040564230414e-05, "loss": 0.1092, "step": 5790 }, { "epoch": 0.33, "grad_norm": 0.27404269814150173, "learning_rate": 1.556749483692061e-05, "loss": 0.2733, "step": 5791 }, { "epoch": 0.33, "grad_norm": 0.8492486232920454, "learning_rate": 1.5565948916808697e-05, "loss": 0.5681, "step": 5792 }, { "epoch": 0.33, "grad_norm": 0.6782578354702179, "learning_rate": 1.5564402803948215e-05, "loss": 0.2971, "step": 5793 }, { "epoch": 0.33, "grad_norm": 0.3093663841738642, "learning_rate": 1.55628564983927e-05, "loss": 0.2356, "step": 5794 }, { "epoch": 0.33, "grad_norm": 0.3650624325489127, "learning_rate": 1.556131000019571e-05, "loss": 0.3337, "step": 5795 }, { "epoch": 0.33, "grad_norm": 0.760194714593758, "learning_rate": 1.5559763309410787e-05, "loss": 0.4862, "step": 5796 }, { "epoch": 0.33, "grad_norm": 0.3294635430927283, "learning_rate": 1.5558216426091505e-05, "loss": 0.2005, "step": 5797 }, { "epoch": 0.33, "grad_norm": 1.077418090885813, "learning_rate": 1.5556669350291422e-05, "loss": 0.4722, "step": 5798 }, { "epoch": 0.33, "grad_norm": 0.3504419610826631, "learning_rate": 1.5555122082064123e-05, "loss": 0.323, "step": 5799 }, { "epoch": 0.33, "grad_norm": 0.3475869790586581, "learning_rate": 1.5553574621463183e-05, "loss": 0.2275, "step": 5800 }, { "epoch": 0.33, "grad_norm": 0.3020170894774215, "learning_rate": 1.5552026968542192e-05, "loss": 0.224, "step": 5801 }, { "epoch": 0.33, "grad_norm": 1.697556497115101, "learning_rate": 1.555047912335475e-05, "loss": 0.6149, "step": 5802 }, { "epoch": 0.33, "grad_norm": 0.29416917374336826, "learning_rate": 1.5548931085954448e-05, "loss": 0.2219, "step": 5803 }, { "epoch": 0.33, "grad_norm": 0.6232842819271314, "learning_rate": 1.5547382856394905e-05, "loss": 0.3939, "step": 5804 }, { "epoch": 0.33, "grad_norm": 1.1892346356469627, "learning_rate": 1.5545834434729732e-05, "loss": 0.8569, "step": 5805 }, { "epoch": 0.33, "grad_norm": 0.3967731998369141, "learning_rate": 1.554428582101255e-05, "loss": 0.2805, "step": 5806 }, { "epoch": 0.33, "grad_norm": 0.34087023673325834, "learning_rate": 1.554273701529699e-05, "loss": 0.1824, "step": 5807 }, { "epoch": 0.33, "grad_norm": 1.374281261563989, "learning_rate": 1.5541188017636683e-05, "loss": 0.678, "step": 5808 }, { "epoch": 0.33, "grad_norm": 0.3489684607123253, "learning_rate": 1.5539638828085278e-05, "loss": 0.2842, "step": 5809 }, { "epoch": 0.33, "grad_norm": 0.6789014020469017, "learning_rate": 1.5538089446696414e-05, "loss": 0.3743, "step": 5810 }, { "epoch": 0.33, "grad_norm": 0.4519718080022967, "learning_rate": 1.553653987352375e-05, "loss": 0.3394, "step": 5811 }, { "epoch": 0.33, "grad_norm": 0.4552790837774947, "learning_rate": 1.553499010862095e-05, "loss": 0.299, "step": 5812 }, { "epoch": 0.33, "grad_norm": 0.4965276597889446, "learning_rate": 1.553344015204168e-05, "loss": 0.1166, "step": 5813 }, { "epoch": 0.33, "grad_norm": 0.5101673833679304, "learning_rate": 1.553189000383962e-05, "loss": 0.394, "step": 5814 }, { "epoch": 0.33, "grad_norm": 0.42668233072512435, "learning_rate": 1.553033966406844e-05, "loss": 0.2904, "step": 5815 }, { "epoch": 0.33, "grad_norm": 0.3709815294186113, "learning_rate": 1.552878913278184e-05, "loss": 0.3063, "step": 5816 }, { "epoch": 0.33, "grad_norm": 0.7836478889317833, "learning_rate": 1.5527238410033508e-05, "loss": 0.4855, "step": 5817 }, { "epoch": 0.33, "grad_norm": 0.43253924637788094, "learning_rate": 1.552568749587715e-05, "loss": 0.2718, "step": 5818 }, { "epoch": 0.33, "grad_norm": 0.287688911198083, "learning_rate": 1.5524136390366468e-05, "loss": 0.2535, "step": 5819 }, { "epoch": 0.33, "grad_norm": 0.723004816360338, "learning_rate": 1.5522585093555184e-05, "loss": 0.3463, "step": 5820 }, { "epoch": 0.33, "grad_norm": 0.3808008491185971, "learning_rate": 1.5521033605497013e-05, "loss": 0.2936, "step": 5821 }, { "epoch": 0.33, "grad_norm": 0.8200187852102342, "learning_rate": 1.5519481926245687e-05, "loss": 0.5662, "step": 5822 }, { "epoch": 0.33, "grad_norm": 0.36108551149142204, "learning_rate": 1.551793005585494e-05, "loss": 0.2783, "step": 5823 }, { "epoch": 0.33, "grad_norm": 0.3836885248758884, "learning_rate": 1.5516377994378513e-05, "loss": 0.288, "step": 5824 }, { "epoch": 0.33, "grad_norm": 0.32215157055186644, "learning_rate": 1.551482574187015e-05, "loss": 0.234, "step": 5825 }, { "epoch": 0.33, "grad_norm": 0.4798535959282858, "learning_rate": 1.5513273298383607e-05, "loss": 0.3197, "step": 5826 }, { "epoch": 0.33, "grad_norm": 0.32604749167703956, "learning_rate": 1.551172066397265e-05, "loss": 0.2737, "step": 5827 }, { "epoch": 0.33, "grad_norm": 0.9180829156266932, "learning_rate": 1.5510167838691047e-05, "loss": 0.5507, "step": 5828 }, { "epoch": 0.33, "grad_norm": 1.2475558469396781, "learning_rate": 1.550861482259256e-05, "loss": 0.5742, "step": 5829 }, { "epoch": 0.33, "grad_norm": 0.37812167267697283, "learning_rate": 1.5507061615730986e-05, "loss": 0.2834, "step": 5830 }, { "epoch": 0.34, "grad_norm": 0.31942624860729857, "learning_rate": 1.5505508218160103e-05, "loss": 0.2459, "step": 5831 }, { "epoch": 0.34, "grad_norm": 0.5164865863370692, "learning_rate": 1.5503954629933707e-05, "loss": 0.3329, "step": 5832 }, { "epoch": 0.34, "grad_norm": 0.4604448154652938, "learning_rate": 1.5502400851105603e-05, "loss": 0.1969, "step": 5833 }, { "epoch": 0.34, "grad_norm": 0.57149449313321, "learning_rate": 1.5500846881729587e-05, "loss": 0.4065, "step": 5834 }, { "epoch": 0.34, "grad_norm": 0.4424655995824884, "learning_rate": 1.5499292721859483e-05, "loss": 0.3512, "step": 5835 }, { "epoch": 0.34, "grad_norm": 0.3409734791754797, "learning_rate": 1.5497738371549108e-05, "loss": 0.2024, "step": 5836 }, { "epoch": 0.34, "grad_norm": 0.44336289462689166, "learning_rate": 1.549618383085229e-05, "loss": 0.3014, "step": 5837 }, { "epoch": 0.34, "grad_norm": 0.3706424088281798, "learning_rate": 1.549462909982286e-05, "loss": 0.2969, "step": 5838 }, { "epoch": 0.34, "grad_norm": 0.3273551180286123, "learning_rate": 1.5493074178514665e-05, "loss": 0.2201, "step": 5839 }, { "epoch": 0.34, "grad_norm": 1.4602400150554773, "learning_rate": 1.5491519066981547e-05, "loss": 0.7239, "step": 5840 }, { "epoch": 0.34, "grad_norm": 1.2934303316229814, "learning_rate": 1.5489963765277356e-05, "loss": 0.8344, "step": 5841 }, { "epoch": 0.34, "grad_norm": 0.37170829852311305, "learning_rate": 1.548840827345596e-05, "loss": 0.2075, "step": 5842 }, { "epoch": 0.34, "grad_norm": 0.3690170784055627, "learning_rate": 1.5486852591571217e-05, "loss": 0.3312, "step": 5843 }, { "epoch": 0.34, "grad_norm": 0.42868695087431713, "learning_rate": 1.5485296719677005e-05, "loss": 0.2912, "step": 5844 }, { "epoch": 0.34, "grad_norm": 0.32516213319606574, "learning_rate": 1.5483740657827205e-05, "loss": 0.2423, "step": 5845 }, { "epoch": 0.34, "grad_norm": 0.8624921342883979, "learning_rate": 1.5482184406075705e-05, "loss": 0.3653, "step": 5846 }, { "epoch": 0.34, "grad_norm": 0.43311550871258736, "learning_rate": 1.5480627964476392e-05, "loss": 0.3782, "step": 5847 }, { "epoch": 0.34, "grad_norm": 0.3466225539727474, "learning_rate": 1.547907133308317e-05, "loss": 0.2827, "step": 5848 }, { "epoch": 0.34, "grad_norm": 0.6526142864376929, "learning_rate": 1.547751451194994e-05, "loss": 0.3642, "step": 5849 }, { "epoch": 0.34, "grad_norm": 0.3194788764576168, "learning_rate": 1.5475957501130622e-05, "loss": 0.3314, "step": 5850 }, { "epoch": 0.34, "grad_norm": 0.33247814679611337, "learning_rate": 1.5474400300679128e-05, "loss": 0.2097, "step": 5851 }, { "epoch": 0.34, "grad_norm": 0.34086337965321845, "learning_rate": 1.5472842910649387e-05, "loss": 0.2012, "step": 5852 }, { "epoch": 0.34, "grad_norm": 0.8793044262444761, "learning_rate": 1.5471285331095334e-05, "loss": 0.6676, "step": 5853 }, { "epoch": 0.34, "grad_norm": 0.5278240555166073, "learning_rate": 1.5469727562070904e-05, "loss": 0.3511, "step": 5854 }, { "epoch": 0.34, "grad_norm": 0.3723179875730455, "learning_rate": 1.5468169603630045e-05, "loss": 0.2741, "step": 5855 }, { "epoch": 0.34, "grad_norm": 0.4558277232143511, "learning_rate": 1.5466611455826703e-05, "loss": 0.2971, "step": 5856 }, { "epoch": 0.34, "grad_norm": 0.3657369354981838, "learning_rate": 1.5465053118714846e-05, "loss": 0.2267, "step": 5857 }, { "epoch": 0.34, "grad_norm": 0.40254068196433923, "learning_rate": 1.5463494592348435e-05, "loss": 0.3381, "step": 5858 }, { "epoch": 0.34, "grad_norm": 0.4389789142234693, "learning_rate": 1.5461935876781436e-05, "loss": 0.3111, "step": 5859 }, { "epoch": 0.34, "grad_norm": 0.42792874304101514, "learning_rate": 1.5460376972067837e-05, "loss": 0.3259, "step": 5860 }, { "epoch": 0.34, "grad_norm": 0.5828730662973869, "learning_rate": 1.5458817878261617e-05, "loss": 0.3779, "step": 5861 }, { "epoch": 0.34, "grad_norm": 0.35919316933562373, "learning_rate": 1.5457258595416766e-05, "loss": 0.2507, "step": 5862 }, { "epoch": 0.34, "grad_norm": 0.33609490374293677, "learning_rate": 1.5455699123587286e-05, "loss": 0.225, "step": 5863 }, { "epoch": 0.34, "grad_norm": 0.34888688858881917, "learning_rate": 1.5454139462827183e-05, "loss": 0.2434, "step": 5864 }, { "epoch": 0.34, "grad_norm": 0.743990986427087, "learning_rate": 1.5452579613190462e-05, "loss": 0.4317, "step": 5865 }, { "epoch": 0.34, "grad_norm": 0.3496844176665509, "learning_rate": 1.5451019574731147e-05, "loss": 0.3032, "step": 5866 }, { "epoch": 0.34, "grad_norm": 0.39178507409648256, "learning_rate": 1.5449459347503255e-05, "loss": 0.3358, "step": 5867 }, { "epoch": 0.34, "grad_norm": 0.30121681347934715, "learning_rate": 1.5447898931560824e-05, "loss": 0.2011, "step": 5868 }, { "epoch": 0.34, "grad_norm": 0.2932285924621865, "learning_rate": 1.544633832695788e-05, "loss": 0.205, "step": 5869 }, { "epoch": 0.34, "grad_norm": 0.3773063886258503, "learning_rate": 1.544477753374848e-05, "loss": 0.3348, "step": 5870 }, { "epoch": 0.34, "grad_norm": 0.4860936216259758, "learning_rate": 1.5443216551986667e-05, "loss": 0.4027, "step": 5871 }, { "epoch": 0.34, "grad_norm": 0.5365140122310895, "learning_rate": 1.5441655381726496e-05, "loss": 0.2675, "step": 5872 }, { "epoch": 0.34, "grad_norm": 0.5126169170705192, "learning_rate": 1.5440094023022035e-05, "loss": 0.347, "step": 5873 }, { "epoch": 0.34, "grad_norm": 0.3880198754870328, "learning_rate": 1.5438532475927354e-05, "loss": 0.3277, "step": 5874 }, { "epoch": 0.34, "grad_norm": 0.3398774666350326, "learning_rate": 1.5436970740496527e-05, "loss": 0.1709, "step": 5875 }, { "epoch": 0.34, "grad_norm": 0.2934353428437412, "learning_rate": 1.5435408816783635e-05, "loss": 0.2122, "step": 5876 }, { "epoch": 0.34, "grad_norm": 1.1948470060235745, "learning_rate": 1.543384670484277e-05, "loss": 0.5015, "step": 5877 }, { "epoch": 0.34, "grad_norm": 0.37606014000496846, "learning_rate": 1.5432284404728027e-05, "loss": 0.256, "step": 5878 }, { "epoch": 0.34, "grad_norm": 0.4058687646461349, "learning_rate": 1.5430721916493507e-05, "loss": 0.3533, "step": 5879 }, { "epoch": 0.34, "grad_norm": 0.8713355870009596, "learning_rate": 1.542915924019332e-05, "loss": 0.6042, "step": 5880 }, { "epoch": 0.34, "grad_norm": 0.2912589294582795, "learning_rate": 1.5427596375881587e-05, "loss": 0.1711, "step": 5881 }, { "epoch": 0.34, "grad_norm": 0.6110141021008527, "learning_rate": 1.5426033323612425e-05, "loss": 0.3838, "step": 5882 }, { "epoch": 0.34, "grad_norm": 0.8743221394886532, "learning_rate": 1.5424470083439958e-05, "loss": 0.3455, "step": 5883 }, { "epoch": 0.34, "grad_norm": 0.6854045258290827, "learning_rate": 1.5422906655418327e-05, "loss": 0.4075, "step": 5884 }, { "epoch": 0.34, "grad_norm": 0.28645214108364625, "learning_rate": 1.5421343039601672e-05, "loss": 0.185, "step": 5885 }, { "epoch": 0.34, "grad_norm": 0.39523662011429733, "learning_rate": 1.5419779236044142e-05, "loss": 0.3153, "step": 5886 }, { "epoch": 0.34, "grad_norm": 0.6709944581548378, "learning_rate": 1.541821524479989e-05, "loss": 0.4063, "step": 5887 }, { "epoch": 0.34, "grad_norm": 0.3576383977318532, "learning_rate": 1.541665106592307e-05, "loss": 0.2235, "step": 5888 }, { "epoch": 0.34, "grad_norm": 1.0212539106154426, "learning_rate": 1.5415086699467864e-05, "loss": 0.4622, "step": 5889 }, { "epoch": 0.34, "grad_norm": 0.39618828216583873, "learning_rate": 1.5413522145488437e-05, "loss": 0.3115, "step": 5890 }, { "epoch": 0.34, "grad_norm": 0.27461183094712965, "learning_rate": 1.541195740403897e-05, "loss": 0.1857, "step": 5891 }, { "epoch": 0.34, "grad_norm": 0.9085111519204856, "learning_rate": 1.541039247517365e-05, "loss": 0.5692, "step": 5892 }, { "epoch": 0.34, "grad_norm": 0.8533930219299408, "learning_rate": 1.5408827358946675e-05, "loss": 0.5139, "step": 5893 }, { "epoch": 0.34, "grad_norm": 0.4079774306429693, "learning_rate": 1.5407262055412238e-05, "loss": 0.2896, "step": 5894 }, { "epoch": 0.34, "grad_norm": 0.7506257968661608, "learning_rate": 1.540569656462455e-05, "loss": 0.3278, "step": 5895 }, { "epoch": 0.34, "grad_norm": 0.6944121449841651, "learning_rate": 1.5404130886637822e-05, "loss": 0.4847, "step": 5896 }, { "epoch": 0.34, "grad_norm": 0.289549678372288, "learning_rate": 1.5402565021506273e-05, "loss": 0.2154, "step": 5897 }, { "epoch": 0.34, "grad_norm": 0.3543644317502926, "learning_rate": 1.540099896928413e-05, "loss": 0.237, "step": 5898 }, { "epoch": 0.34, "grad_norm": 0.5313370994029551, "learning_rate": 1.5399432730025626e-05, "loss": 0.3474, "step": 5899 }, { "epoch": 0.34, "grad_norm": 0.3858996506274038, "learning_rate": 1.5397866303784996e-05, "loss": 0.3342, "step": 5900 }, { "epoch": 0.34, "grad_norm": 0.8419789834381768, "learning_rate": 1.539629969061649e-05, "loss": 0.428, "step": 5901 }, { "epoch": 0.34, "grad_norm": 0.32299250679010993, "learning_rate": 1.539473289057436e-05, "loss": 0.3036, "step": 5902 }, { "epoch": 0.34, "grad_norm": 0.27648283554967706, "learning_rate": 1.5393165903712856e-05, "loss": 0.2395, "step": 5903 }, { "epoch": 0.34, "grad_norm": 0.2843375577344682, "learning_rate": 1.5391598730086254e-05, "loss": 0.1435, "step": 5904 }, { "epoch": 0.34, "grad_norm": 0.5555925119853822, "learning_rate": 1.539003136974882e-05, "loss": 0.3774, "step": 5905 }, { "epoch": 0.34, "grad_norm": 0.3144089559292263, "learning_rate": 1.5388463822754827e-05, "loss": 0.2653, "step": 5906 }, { "epoch": 0.34, "grad_norm": 0.4289801488875986, "learning_rate": 1.538689608915857e-05, "loss": 0.3952, "step": 5907 }, { "epoch": 0.34, "grad_norm": 0.4080185198275796, "learning_rate": 1.5385328169014325e-05, "loss": 0.2654, "step": 5908 }, { "epoch": 0.34, "grad_norm": 0.23617679924423607, "learning_rate": 1.53837600623764e-05, "loss": 0.1897, "step": 5909 }, { "epoch": 0.34, "grad_norm": 0.3640319175940855, "learning_rate": 1.5382191769299096e-05, "loss": 0.3338, "step": 5910 }, { "epoch": 0.34, "grad_norm": 0.5949894824883931, "learning_rate": 1.5380623289836724e-05, "loss": 0.2869, "step": 5911 }, { "epoch": 0.34, "grad_norm": 0.33508321042850364, "learning_rate": 1.5379054624043596e-05, "loss": 0.3238, "step": 5912 }, { "epoch": 0.34, "grad_norm": 1.042458284916835, "learning_rate": 1.537748577197404e-05, "loss": 0.7016, "step": 5913 }, { "epoch": 0.34, "grad_norm": 0.28987327834740845, "learning_rate": 1.537591673368238e-05, "loss": 0.2563, "step": 5914 }, { "epoch": 0.34, "grad_norm": 0.25989501170956886, "learning_rate": 1.5374347509222962e-05, "loss": 0.2252, "step": 5915 }, { "epoch": 0.34, "grad_norm": 0.4262012511260465, "learning_rate": 1.5372778098650115e-05, "loss": 0.2876, "step": 5916 }, { "epoch": 0.34, "grad_norm": 0.6347305763733735, "learning_rate": 1.5371208502018194e-05, "loss": 0.3217, "step": 5917 }, { "epoch": 0.34, "grad_norm": 0.312424850972814, "learning_rate": 1.5369638719381555e-05, "loss": 0.2914, "step": 5918 }, { "epoch": 0.34, "grad_norm": 1.4014464340135386, "learning_rate": 1.5368068750794557e-05, "loss": 0.7689, "step": 5919 }, { "epoch": 0.34, "grad_norm": 0.6480541705283047, "learning_rate": 1.5366498596311568e-05, "loss": 0.4657, "step": 5920 }, { "epoch": 0.34, "grad_norm": 0.2557476620889882, "learning_rate": 1.5364928255986966e-05, "loss": 0.1722, "step": 5921 }, { "epoch": 0.34, "grad_norm": 0.3457499150185082, "learning_rate": 1.5363357729875126e-05, "loss": 0.2854, "step": 5922 }, { "epoch": 0.34, "grad_norm": 0.7202941326070144, "learning_rate": 1.536178701803044e-05, "loss": 0.392, "step": 5923 }, { "epoch": 0.34, "grad_norm": 0.34428868696575277, "learning_rate": 1.5360216120507302e-05, "loss": 0.2324, "step": 5924 }, { "epoch": 0.34, "grad_norm": 0.5147196984487438, "learning_rate": 1.5358645037360108e-05, "loss": 0.4216, "step": 5925 }, { "epoch": 0.34, "grad_norm": 0.537087396199377, "learning_rate": 1.535707376864327e-05, "loss": 0.377, "step": 5926 }, { "epoch": 0.34, "grad_norm": 0.34006389687556765, "learning_rate": 1.5355502314411194e-05, "loss": 0.1906, "step": 5927 }, { "epoch": 0.34, "grad_norm": 0.3373315399765145, "learning_rate": 1.5353930674718305e-05, "loss": 0.2455, "step": 5928 }, { "epoch": 0.34, "grad_norm": 0.46153518536615695, "learning_rate": 1.5352358849619024e-05, "loss": 0.3767, "step": 5929 }, { "epoch": 0.34, "grad_norm": 0.3368768244126122, "learning_rate": 1.535078683916779e-05, "loss": 0.2229, "step": 5930 }, { "epoch": 0.34, "grad_norm": 1.1003227430828704, "learning_rate": 1.5349214643419034e-05, "loss": 0.6771, "step": 5931 }, { "epoch": 0.34, "grad_norm": 1.4907212686940303, "learning_rate": 1.5347642262427206e-05, "loss": 0.8189, "step": 5932 }, { "epoch": 0.34, "grad_norm": 0.3184937333207399, "learning_rate": 1.5346069696246758e-05, "loss": 0.2215, "step": 5933 }, { "epoch": 0.34, "grad_norm": 0.41845407671672097, "learning_rate": 1.534449694493215e-05, "loss": 0.2991, "step": 5934 }, { "epoch": 0.34, "grad_norm": 0.4774275005408674, "learning_rate": 1.534292400853784e-05, "loss": 0.3517, "step": 5935 }, { "epoch": 0.34, "grad_norm": 0.379325144992996, "learning_rate": 1.53413508871183e-05, "loss": 0.2905, "step": 5936 }, { "epoch": 0.34, "grad_norm": 0.563366841637593, "learning_rate": 1.5339777580728003e-05, "loss": 0.3085, "step": 5937 }, { "epoch": 0.34, "grad_norm": 0.463202999933601, "learning_rate": 1.5338204089421447e-05, "loss": 0.3625, "step": 5938 }, { "epoch": 0.34, "grad_norm": 0.3695255382677873, "learning_rate": 1.5336630413253108e-05, "loss": 0.2843, "step": 5939 }, { "epoch": 0.34, "grad_norm": 0.6537639861062193, "learning_rate": 1.533505655227749e-05, "loss": 0.3645, "step": 5940 }, { "epoch": 0.34, "grad_norm": 0.255986281866402, "learning_rate": 1.533348250654909e-05, "loss": 0.2218, "step": 5941 }, { "epoch": 0.34, "grad_norm": 0.40879555334744677, "learning_rate": 1.5331908276122424e-05, "loss": 0.3137, "step": 5942 }, { "epoch": 0.34, "grad_norm": 0.9387421903416497, "learning_rate": 1.5330333861051998e-05, "loss": 0.5082, "step": 5943 }, { "epoch": 0.34, "grad_norm": 1.137603306784779, "learning_rate": 1.5328759261392344e-05, "loss": 0.5119, "step": 5944 }, { "epoch": 0.34, "grad_norm": 0.3617661594873792, "learning_rate": 1.5327184477197984e-05, "loss": 0.301, "step": 5945 }, { "epoch": 0.34, "grad_norm": 0.443054484749188, "learning_rate": 1.5325609508523456e-05, "loss": 0.3465, "step": 5946 }, { "epoch": 0.34, "grad_norm": 0.2663146536046802, "learning_rate": 1.53240343554233e-05, "loss": 0.1144, "step": 5947 }, { "epoch": 0.34, "grad_norm": 0.38139187527879836, "learning_rate": 1.532245901795206e-05, "loss": 0.2786, "step": 5948 }, { "epoch": 0.34, "grad_norm": 0.5305598314897465, "learning_rate": 1.5320883496164295e-05, "loss": 0.3807, "step": 5949 }, { "epoch": 0.34, "grad_norm": 0.4413082084005958, "learning_rate": 1.5319307790114563e-05, "loss": 0.3002, "step": 5950 }, { "epoch": 0.34, "grad_norm": 0.3583027015117161, "learning_rate": 1.5317731899857434e-05, "loss": 0.3078, "step": 5951 }, { "epoch": 0.34, "grad_norm": 0.7092834562172043, "learning_rate": 1.5316155825447476e-05, "loss": 0.4817, "step": 5952 }, { "epoch": 0.34, "grad_norm": 0.2955321017741597, "learning_rate": 1.5314579566939274e-05, "loss": 0.1988, "step": 5953 }, { "epoch": 0.34, "grad_norm": 0.37715745593890787, "learning_rate": 1.5313003124387404e-05, "loss": 0.2891, "step": 5954 }, { "epoch": 0.34, "grad_norm": 1.3060160054166214, "learning_rate": 1.5311426497846466e-05, "loss": 0.7819, "step": 5955 }, { "epoch": 0.34, "grad_norm": 0.7017722643430202, "learning_rate": 1.530984968737106e-05, "loss": 0.3755, "step": 5956 }, { "epoch": 0.34, "grad_norm": 0.44304273216374057, "learning_rate": 1.5308272693015785e-05, "loss": 0.2946, "step": 5957 }, { "epoch": 0.34, "grad_norm": 0.3809476824053261, "learning_rate": 1.530669551483525e-05, "loss": 0.2984, "step": 5958 }, { "epoch": 0.34, "grad_norm": 0.32731689524967394, "learning_rate": 1.5305118152884086e-05, "loss": 0.2025, "step": 5959 }, { "epoch": 0.34, "grad_norm": 0.32662163125723054, "learning_rate": 1.5303540607216906e-05, "loss": 0.2074, "step": 5960 }, { "epoch": 0.34, "grad_norm": 0.6125753143165558, "learning_rate": 1.5301962877888338e-05, "loss": 0.4477, "step": 5961 }, { "epoch": 0.34, "grad_norm": 0.7210324834024456, "learning_rate": 1.5300384964953028e-05, "loss": 0.3894, "step": 5962 }, { "epoch": 0.34, "grad_norm": 0.3512008862927995, "learning_rate": 1.5298806868465615e-05, "loss": 0.2395, "step": 5963 }, { "epoch": 0.34, "grad_norm": 1.203964316197915, "learning_rate": 1.5297228588480744e-05, "loss": 0.6873, "step": 5964 }, { "epoch": 0.34, "grad_norm": 0.4470807698277849, "learning_rate": 1.5295650125053078e-05, "loss": 0.344, "step": 5965 }, { "epoch": 0.34, "grad_norm": 0.27900170832730514, "learning_rate": 1.529407147823728e-05, "loss": 0.1535, "step": 5966 }, { "epoch": 0.34, "grad_norm": 1.2564959101088011, "learning_rate": 1.529249264808801e-05, "loss": 0.9462, "step": 5967 }, { "epoch": 0.34, "grad_norm": 0.8273397489953469, "learning_rate": 1.5290913634659946e-05, "loss": 0.4594, "step": 5968 }, { "epoch": 0.34, "grad_norm": 0.38156537104710225, "learning_rate": 1.528933443800777e-05, "loss": 0.2717, "step": 5969 }, { "epoch": 0.34, "grad_norm": 0.40525374010366605, "learning_rate": 1.5287755058186173e-05, "loss": 0.2764, "step": 5970 }, { "epoch": 0.34, "grad_norm": 0.44559989652126264, "learning_rate": 1.5286175495249845e-05, "loss": 0.2679, "step": 5971 }, { "epoch": 0.34, "grad_norm": 0.3800803496165543, "learning_rate": 1.5284595749253486e-05, "loss": 0.2887, "step": 5972 }, { "epoch": 0.34, "grad_norm": 0.5854777998684502, "learning_rate": 1.5283015820251802e-05, "loss": 0.3128, "step": 5973 }, { "epoch": 0.34, "grad_norm": 0.6490823546446826, "learning_rate": 1.528143570829951e-05, "loss": 0.3814, "step": 5974 }, { "epoch": 0.34, "grad_norm": 0.37018842276377245, "learning_rate": 1.5279855413451323e-05, "loss": 0.223, "step": 5975 }, { "epoch": 0.34, "grad_norm": 0.4312382176580631, "learning_rate": 1.527827493576197e-05, "loss": 0.284, "step": 5976 }, { "epoch": 0.34, "grad_norm": 0.40708070399001484, "learning_rate": 1.5276694275286188e-05, "loss": 0.3473, "step": 5977 }, { "epoch": 0.34, "grad_norm": 0.5786187802356816, "learning_rate": 1.5275113432078707e-05, "loss": 0.3675, "step": 5978 }, { "epoch": 0.34, "grad_norm": 0.5832872436341768, "learning_rate": 1.5273532406194273e-05, "loss": 0.3081, "step": 5979 }, { "epoch": 0.34, "grad_norm": 0.48552801852112154, "learning_rate": 1.5271951197687642e-05, "loss": 0.3343, "step": 5980 }, { "epoch": 0.34, "grad_norm": 0.3366837647895135, "learning_rate": 1.5270369806613566e-05, "loss": 0.2745, "step": 5981 }, { "epoch": 0.34, "grad_norm": 0.30439148010092015, "learning_rate": 1.5268788233026813e-05, "loss": 0.2576, "step": 5982 }, { "epoch": 0.34, "grad_norm": 0.8410593472550371, "learning_rate": 1.5267206476982143e-05, "loss": 0.2501, "step": 5983 }, { "epoch": 0.34, "grad_norm": 0.41983616658716516, "learning_rate": 1.5265624538534346e-05, "loss": 0.335, "step": 5984 }, { "epoch": 0.34, "grad_norm": 0.29918174520908947, "learning_rate": 1.5264042417738198e-05, "loss": 0.3013, "step": 5985 }, { "epoch": 0.34, "grad_norm": 0.9234263832577031, "learning_rate": 1.5262460114648487e-05, "loss": 0.4014, "step": 5986 }, { "epoch": 0.34, "grad_norm": 0.23127103984282238, "learning_rate": 1.5260877629320003e-05, "loss": 0.1819, "step": 5987 }, { "epoch": 0.34, "grad_norm": 0.6068087085849305, "learning_rate": 1.5259294961807557e-05, "loss": 0.3792, "step": 5988 }, { "epoch": 0.34, "grad_norm": 0.4333690325500259, "learning_rate": 1.5257712112165952e-05, "loss": 0.2803, "step": 5989 }, { "epoch": 0.34, "grad_norm": 0.3537720037573216, "learning_rate": 1.5256129080450004e-05, "loss": 0.2856, "step": 5990 }, { "epoch": 0.34, "grad_norm": 1.1232073792582922, "learning_rate": 1.5254545866714531e-05, "loss": 0.5189, "step": 5991 }, { "epoch": 0.34, "grad_norm": 0.4744517484846774, "learning_rate": 1.5252962471014358e-05, "loss": 0.3488, "step": 5992 }, { "epoch": 0.34, "grad_norm": 0.3529505990489683, "learning_rate": 1.5251378893404324e-05, "loss": 0.2746, "step": 5993 }, { "epoch": 0.34, "grad_norm": 0.2696964638067003, "learning_rate": 1.5249795133939262e-05, "loss": 0.2123, "step": 5994 }, { "epoch": 0.34, "grad_norm": 0.98685682080623, "learning_rate": 1.5248211192674023e-05, "loss": 0.5288, "step": 5995 }, { "epoch": 0.34, "grad_norm": 0.6945777224570654, "learning_rate": 1.5246627069663453e-05, "loss": 0.2634, "step": 5996 }, { "epoch": 0.34, "grad_norm": 0.3390663328402181, "learning_rate": 1.5245042764962416e-05, "loss": 0.3044, "step": 5997 }, { "epoch": 0.34, "grad_norm": 1.1668116852346306, "learning_rate": 1.5243458278625771e-05, "loss": 0.8487, "step": 5998 }, { "epoch": 0.34, "grad_norm": 0.20443862509215088, "learning_rate": 1.5241873610708395e-05, "loss": 0.0946, "step": 5999 }, { "epoch": 0.34, "grad_norm": 0.4356866107324079, "learning_rate": 1.5240288761265158e-05, "loss": 0.3402, "step": 6000 }, { "epoch": 0.34, "grad_norm": 0.36544328154024314, "learning_rate": 1.523870373035095e-05, "loss": 0.3262, "step": 6001 }, { "epoch": 0.34, "grad_norm": 0.684350221463673, "learning_rate": 1.5237118518020656e-05, "loss": 0.2627, "step": 6002 }, { "epoch": 0.34, "grad_norm": 0.7652539514029776, "learning_rate": 1.5235533124329172e-05, "loss": 0.4287, "step": 6003 }, { "epoch": 0.34, "grad_norm": 1.4259890730487947, "learning_rate": 1.5233947549331399e-05, "loss": 0.8299, "step": 6004 }, { "epoch": 0.35, "grad_norm": 0.2552789675583203, "learning_rate": 1.5232361793082251e-05, "loss": 0.2188, "step": 6005 }, { "epoch": 0.35, "grad_norm": 0.26345923306917046, "learning_rate": 1.5230775855636635e-05, "loss": 0.1973, "step": 6006 }, { "epoch": 0.35, "grad_norm": 0.7307326078246826, "learning_rate": 1.522918973704948e-05, "loss": 0.4981, "step": 6007 }, { "epoch": 0.35, "grad_norm": 0.622572484624821, "learning_rate": 1.5227603437375704e-05, "loss": 0.3457, "step": 6008 }, { "epoch": 0.35, "grad_norm": 0.3376871552073923, "learning_rate": 1.5226016956670251e-05, "loss": 0.2555, "step": 6009 }, { "epoch": 0.35, "grad_norm": 1.1334926262165705, "learning_rate": 1.5224430294988055e-05, "loss": 0.7165, "step": 6010 }, { "epoch": 0.35, "grad_norm": 0.5732166382836338, "learning_rate": 1.522284345238406e-05, "loss": 0.3788, "step": 6011 }, { "epoch": 0.35, "grad_norm": 0.2403970752189537, "learning_rate": 1.5221256428913225e-05, "loss": 0.1541, "step": 6012 }, { "epoch": 0.35, "grad_norm": 0.46260415675738403, "learning_rate": 1.52196692246305e-05, "loss": 0.3579, "step": 6013 }, { "epoch": 0.35, "grad_norm": 0.7041434257174486, "learning_rate": 1.5218081839590855e-05, "loss": 0.4429, "step": 6014 }, { "epoch": 0.35, "grad_norm": 0.33938295637228305, "learning_rate": 1.5216494273849261e-05, "loss": 0.243, "step": 6015 }, { "epoch": 0.35, "grad_norm": 0.5118168354209909, "learning_rate": 1.5214906527460695e-05, "loss": 0.3826, "step": 6016 }, { "epoch": 0.35, "grad_norm": 0.4851060809319056, "learning_rate": 1.5213318600480138e-05, "loss": 0.3629, "step": 6017 }, { "epoch": 0.35, "grad_norm": 0.22921600058430264, "learning_rate": 1.5211730492962587e-05, "loss": 0.161, "step": 6018 }, { "epoch": 0.35, "grad_norm": 0.675451182016171, "learning_rate": 1.521014220496303e-05, "loss": 0.4479, "step": 6019 }, { "epoch": 0.35, "grad_norm": 0.5422555876026078, "learning_rate": 1.5208553736536473e-05, "loss": 0.3674, "step": 6020 }, { "epoch": 0.35, "grad_norm": 0.34458266162583756, "learning_rate": 1.5206965087737922e-05, "loss": 0.2923, "step": 6021 }, { "epoch": 0.35, "grad_norm": 0.9441003102508054, "learning_rate": 1.5205376258622397e-05, "loss": 0.4712, "step": 6022 }, { "epoch": 0.35, "grad_norm": 0.5721243311796721, "learning_rate": 1.5203787249244914e-05, "loss": 0.3698, "step": 6023 }, { "epoch": 0.35, "grad_norm": 0.32675837351870496, "learning_rate": 1.5202198059660504e-05, "loss": 0.2448, "step": 6024 }, { "epoch": 0.35, "grad_norm": 0.43602889305513376, "learning_rate": 1.5200608689924197e-05, "loss": 0.3166, "step": 6025 }, { "epoch": 0.35, "grad_norm": 0.3492478650448725, "learning_rate": 1.5199019140091037e-05, "loss": 0.1646, "step": 6026 }, { "epoch": 0.35, "grad_norm": 0.3940936399332753, "learning_rate": 1.5197429410216065e-05, "loss": 0.3279, "step": 6027 }, { "epoch": 0.35, "grad_norm": 0.3892155238830433, "learning_rate": 1.5195839500354337e-05, "loss": 0.2918, "step": 6028 }, { "epoch": 0.35, "grad_norm": 0.6679840842709365, "learning_rate": 1.5194249410560913e-05, "loss": 0.4226, "step": 6029 }, { "epoch": 0.35, "grad_norm": 0.3529717737800829, "learning_rate": 1.5192659140890851e-05, "loss": 0.3048, "step": 6030 }, { "epoch": 0.35, "grad_norm": 0.3234022745735802, "learning_rate": 1.5191068691399229e-05, "loss": 0.2089, "step": 6031 }, { "epoch": 0.35, "grad_norm": 0.4406803384243917, "learning_rate": 1.518947806214112e-05, "loss": 0.356, "step": 6032 }, { "epoch": 0.35, "grad_norm": 0.3316857147599567, "learning_rate": 1.5187887253171609e-05, "loss": 0.3087, "step": 6033 }, { "epoch": 0.35, "grad_norm": 0.9447485585303956, "learning_rate": 1.5186296264545787e-05, "loss": 0.6407, "step": 6034 }, { "epoch": 0.35, "grad_norm": 0.507392674710051, "learning_rate": 1.5184705096318748e-05, "loss": 0.2305, "step": 6035 }, { "epoch": 0.35, "grad_norm": 0.31821764679036, "learning_rate": 1.5183113748545595e-05, "loss": 0.2953, "step": 6036 }, { "epoch": 0.35, "grad_norm": 0.37936593474211566, "learning_rate": 1.5181522221281435e-05, "loss": 0.335, "step": 6037 }, { "epoch": 0.35, "grad_norm": 0.1908639292571459, "learning_rate": 1.5179930514581383e-05, "loss": 0.1056, "step": 6038 }, { "epoch": 0.35, "grad_norm": 0.39556981816109443, "learning_rate": 1.517833862850056e-05, "loss": 0.3185, "step": 6039 }, { "epoch": 0.35, "grad_norm": 1.0851322992534065, "learning_rate": 1.5176746563094092e-05, "loss": 0.6819, "step": 6040 }, { "epoch": 0.35, "grad_norm": 0.32365313141441393, "learning_rate": 1.5175154318417116e-05, "loss": 0.2474, "step": 6041 }, { "epoch": 0.35, "grad_norm": 0.44971760340828426, "learning_rate": 1.5173561894524765e-05, "loss": 0.3509, "step": 6042 }, { "epoch": 0.35, "grad_norm": 0.6905509263632318, "learning_rate": 1.517196929147219e-05, "loss": 0.4647, "step": 6043 }, { "epoch": 0.35, "grad_norm": 0.24909171253007986, "learning_rate": 1.5170376509314539e-05, "loss": 0.1757, "step": 6044 }, { "epoch": 0.35, "grad_norm": 0.39274007428317864, "learning_rate": 1.5168783548106976e-05, "loss": 0.3238, "step": 6045 }, { "epoch": 0.35, "grad_norm": 0.9155198249464075, "learning_rate": 1.5167190407904656e-05, "loss": 0.6702, "step": 6046 }, { "epoch": 0.35, "grad_norm": 0.573228632743676, "learning_rate": 1.5165597088762757e-05, "loss": 0.3907, "step": 6047 }, { "epoch": 0.35, "grad_norm": 0.37380539990802764, "learning_rate": 1.5164003590736452e-05, "loss": 0.2131, "step": 6048 }, { "epoch": 0.35, "grad_norm": 0.43565239129066136, "learning_rate": 1.5162409913880927e-05, "loss": 0.3494, "step": 6049 }, { "epoch": 0.35, "grad_norm": 0.31162452510656125, "learning_rate": 1.5160816058251367e-05, "loss": 0.1867, "step": 6050 }, { "epoch": 0.35, "grad_norm": 0.33539240652800983, "learning_rate": 1.5159222023902969e-05, "loss": 0.2238, "step": 6051 }, { "epoch": 0.35, "grad_norm": 0.3631277574565755, "learning_rate": 1.5157627810890937e-05, "loss": 0.3246, "step": 6052 }, { "epoch": 0.35, "grad_norm": 0.744527008947136, "learning_rate": 1.5156033419270472e-05, "loss": 0.4741, "step": 6053 }, { "epoch": 0.35, "grad_norm": 0.3080561599256677, "learning_rate": 1.5154438849096791e-05, "loss": 0.2386, "step": 6054 }, { "epoch": 0.35, "grad_norm": 0.9356964028488047, "learning_rate": 1.5152844100425114e-05, "loss": 0.5416, "step": 6055 }, { "epoch": 0.35, "grad_norm": 0.24395016905721084, "learning_rate": 1.5151249173310672e-05, "loss": 0.2228, "step": 6056 }, { "epoch": 0.35, "grad_norm": 0.36970463621043104, "learning_rate": 1.5149654067808688e-05, "loss": 0.2883, "step": 6057 }, { "epoch": 0.35, "grad_norm": 0.9048703602569318, "learning_rate": 1.5148058783974407e-05, "loss": 0.4467, "step": 6058 }, { "epoch": 0.35, "grad_norm": 0.695872288096352, "learning_rate": 1.5146463321863069e-05, "loss": 0.4602, "step": 6059 }, { "epoch": 0.35, "grad_norm": 0.37486593573199756, "learning_rate": 1.514486768152993e-05, "loss": 0.2843, "step": 6060 }, { "epoch": 0.35, "grad_norm": 0.3653532580952253, "learning_rate": 1.5143271863030244e-05, "loss": 0.2839, "step": 6061 }, { "epoch": 0.35, "grad_norm": 0.2517009406199723, "learning_rate": 1.5141675866419276e-05, "loss": 0.1826, "step": 6062 }, { "epoch": 0.35, "grad_norm": 0.3704254478483168, "learning_rate": 1.5140079691752293e-05, "loss": 0.2775, "step": 6063 }, { "epoch": 0.35, "grad_norm": 0.46425654567960384, "learning_rate": 1.5138483339084571e-05, "loss": 0.3136, "step": 6064 }, { "epoch": 0.35, "grad_norm": 0.959500141667516, "learning_rate": 1.5136886808471389e-05, "loss": 0.5221, "step": 6065 }, { "epoch": 0.35, "grad_norm": 0.3435644123187082, "learning_rate": 1.5135290099968043e-05, "loss": 0.2827, "step": 6066 }, { "epoch": 0.35, "grad_norm": 1.0712411877235641, "learning_rate": 1.5133693213629818e-05, "loss": 0.3403, "step": 6067 }, { "epoch": 0.35, "grad_norm": 0.2858791739075835, "learning_rate": 1.513209614951202e-05, "loss": 0.2423, "step": 6068 }, { "epoch": 0.35, "grad_norm": 0.4780296493408544, "learning_rate": 1.5130498907669952e-05, "loss": 0.2785, "step": 6069 }, { "epoch": 0.35, "grad_norm": 0.8319222954221501, "learning_rate": 1.512890148815893e-05, "loss": 0.479, "step": 6070 }, { "epoch": 0.35, "grad_norm": 0.32882328520167026, "learning_rate": 1.5127303891034264e-05, "loss": 0.183, "step": 6071 }, { "epoch": 0.35, "grad_norm": 0.33276901027696554, "learning_rate": 1.5125706116351291e-05, "loss": 0.279, "step": 6072 }, { "epoch": 0.35, "grad_norm": 0.5268756881469756, "learning_rate": 1.5124108164165333e-05, "loss": 0.3849, "step": 6073 }, { "epoch": 0.35, "grad_norm": 1.2489127782935394, "learning_rate": 1.512251003453173e-05, "loss": 0.4358, "step": 6074 }, { "epoch": 0.35, "grad_norm": 0.34742346899867804, "learning_rate": 1.5120911727505822e-05, "loss": 0.2864, "step": 6075 }, { "epoch": 0.35, "grad_norm": 0.537616793518178, "learning_rate": 1.5119313243142964e-05, "loss": 0.3793, "step": 6076 }, { "epoch": 0.35, "grad_norm": 0.4013084106067741, "learning_rate": 1.5117714581498509e-05, "loss": 0.1941, "step": 6077 }, { "epoch": 0.35, "grad_norm": 0.38191970049336904, "learning_rate": 1.5116115742627815e-05, "loss": 0.2683, "step": 6078 }, { "epoch": 0.35, "grad_norm": 0.8920627144260248, "learning_rate": 1.5114516726586254e-05, "loss": 0.5596, "step": 6079 }, { "epoch": 0.35, "grad_norm": 0.4618446581767463, "learning_rate": 1.51129175334292e-05, "loss": 0.2894, "step": 6080 }, { "epoch": 0.35, "grad_norm": 0.43028981687553913, "learning_rate": 1.5111318163212032e-05, "loss": 0.2758, "step": 6081 }, { "epoch": 0.35, "grad_norm": 0.6246636674476441, "learning_rate": 1.5109718615990135e-05, "loss": 0.397, "step": 6082 }, { "epoch": 0.35, "grad_norm": 0.2595488255827491, "learning_rate": 1.51081188918189e-05, "loss": 0.2166, "step": 6083 }, { "epoch": 0.35, "grad_norm": 0.3366273152459292, "learning_rate": 1.5106518990753731e-05, "loss": 0.2066, "step": 6084 }, { "epoch": 0.35, "grad_norm": 0.5509473415305383, "learning_rate": 1.5104918912850029e-05, "loss": 0.3952, "step": 6085 }, { "epoch": 0.35, "grad_norm": 1.168537237871508, "learning_rate": 1.5103318658163202e-05, "loss": 0.6159, "step": 6086 }, { "epoch": 0.35, "grad_norm": 0.44113838145483497, "learning_rate": 1.5101718226748673e-05, "loss": 0.2563, "step": 6087 }, { "epoch": 0.35, "grad_norm": 0.42233647100665583, "learning_rate": 1.5100117618661856e-05, "loss": 0.3352, "step": 6088 }, { "epoch": 0.35, "grad_norm": 1.153551365592648, "learning_rate": 1.5098516833958187e-05, "loss": 0.7705, "step": 6089 }, { "epoch": 0.35, "grad_norm": 0.21605941110243826, "learning_rate": 1.50969158726931e-05, "loss": 0.1453, "step": 6090 }, { "epoch": 0.35, "grad_norm": 1.1214770160197514, "learning_rate": 1.5095314734922037e-05, "loss": 0.6139, "step": 6091 }, { "epoch": 0.35, "grad_norm": 0.4166865421164272, "learning_rate": 1.509371342070044e-05, "loss": 0.323, "step": 6092 }, { "epoch": 0.35, "grad_norm": 0.3283893206173644, "learning_rate": 1.509211193008377e-05, "loss": 0.2456, "step": 6093 }, { "epoch": 0.35, "grad_norm": 1.0555578416538822, "learning_rate": 1.509051026312748e-05, "loss": 0.6061, "step": 6094 }, { "epoch": 0.35, "grad_norm": 0.29921174955999685, "learning_rate": 1.508890841988704e-05, "loss": 0.2598, "step": 6095 }, { "epoch": 0.35, "grad_norm": 0.3149749288748337, "learning_rate": 1.5087306400417921e-05, "loss": 0.2747, "step": 6096 }, { "epoch": 0.35, "grad_norm": 0.31212173256557457, "learning_rate": 1.5085704204775598e-05, "loss": 0.1728, "step": 6097 }, { "epoch": 0.35, "grad_norm": 0.5652954029006605, "learning_rate": 1.508410183301556e-05, "loss": 0.4581, "step": 6098 }, { "epoch": 0.35, "grad_norm": 0.6856421881804132, "learning_rate": 1.508249928519329e-05, "loss": 0.3919, "step": 6099 }, { "epoch": 0.35, "grad_norm": 0.30656764100593287, "learning_rate": 1.5080896561364293e-05, "loss": 0.2557, "step": 6100 }, { "epoch": 0.35, "grad_norm": 0.4687395321208568, "learning_rate": 1.5079293661584063e-05, "loss": 0.3314, "step": 6101 }, { "epoch": 0.35, "grad_norm": 0.3349070250202825, "learning_rate": 1.5077690585908113e-05, "loss": 0.2066, "step": 6102 }, { "epoch": 0.35, "grad_norm": 0.3640182696226341, "learning_rate": 1.5076087334391957e-05, "loss": 0.2836, "step": 6103 }, { "epoch": 0.35, "grad_norm": 0.34746788590963384, "learning_rate": 1.5074483907091115e-05, "loss": 0.3276, "step": 6104 }, { "epoch": 0.35, "grad_norm": 0.3712053354906611, "learning_rate": 1.5072880304061112e-05, "loss": 0.3489, "step": 6105 }, { "epoch": 0.35, "grad_norm": 0.4718096602897961, "learning_rate": 1.5071276525357486e-05, "loss": 0.3113, "step": 6106 }, { "epoch": 0.35, "grad_norm": 0.5120999080003616, "learning_rate": 1.5069672571035766e-05, "loss": 0.3747, "step": 6107 }, { "epoch": 0.35, "grad_norm": 0.28354797155566364, "learning_rate": 1.506806844115151e-05, "loss": 0.2414, "step": 6108 }, { "epoch": 0.35, "grad_norm": 0.37166878535602804, "learning_rate": 1.5066464135760254e-05, "loss": 0.2915, "step": 6109 }, { "epoch": 0.35, "grad_norm": 0.5029551308263018, "learning_rate": 1.506485965491757e-05, "loss": 0.3747, "step": 6110 }, { "epoch": 0.35, "grad_norm": 0.4002329105834612, "learning_rate": 1.5063254998679009e-05, "loss": 0.2825, "step": 6111 }, { "epoch": 0.35, "grad_norm": 0.3814977933139438, "learning_rate": 1.5061650167100146e-05, "loss": 0.2968, "step": 6112 }, { "epoch": 0.35, "grad_norm": 0.829273859116276, "learning_rate": 1.5060045160236556e-05, "loss": 0.452, "step": 6113 }, { "epoch": 0.35, "grad_norm": 0.27937980686236547, "learning_rate": 1.505843997814382e-05, "loss": 0.2292, "step": 6114 }, { "epoch": 0.35, "grad_norm": 0.47676045935760386, "learning_rate": 1.5056834620877525e-05, "loss": 0.312, "step": 6115 }, { "epoch": 0.35, "grad_norm": 0.3451101796926662, "learning_rate": 1.5055229088493264e-05, "loss": 0.2801, "step": 6116 }, { "epoch": 0.35, "grad_norm": 0.579516426649944, "learning_rate": 1.5053623381046639e-05, "loss": 0.394, "step": 6117 }, { "epoch": 0.35, "grad_norm": 0.44553554966049524, "learning_rate": 1.505201749859325e-05, "loss": 0.3536, "step": 6118 }, { "epoch": 0.35, "grad_norm": 0.3590680813627751, "learning_rate": 1.5050411441188714e-05, "loss": 0.2677, "step": 6119 }, { "epoch": 0.35, "grad_norm": 0.5367161392325959, "learning_rate": 1.5048805208888651e-05, "loss": 0.3061, "step": 6120 }, { "epoch": 0.35, "grad_norm": 0.4162731031659161, "learning_rate": 1.5047198801748677e-05, "loss": 0.3569, "step": 6121 }, { "epoch": 0.35, "grad_norm": 0.2970681347926114, "learning_rate": 1.5045592219824423e-05, "loss": 0.2119, "step": 6122 }, { "epoch": 0.35, "grad_norm": 0.35046893311451455, "learning_rate": 1.5043985463171532e-05, "loss": 0.2086, "step": 6123 }, { "epoch": 0.35, "grad_norm": 0.313277427937686, "learning_rate": 1.5042378531845638e-05, "loss": 0.3118, "step": 6124 }, { "epoch": 0.35, "grad_norm": 1.2384222887646135, "learning_rate": 1.5040771425902393e-05, "loss": 0.744, "step": 6125 }, { "epoch": 0.35, "grad_norm": 0.3171223595122585, "learning_rate": 1.503916414539745e-05, "loss": 0.2281, "step": 6126 }, { "epoch": 0.35, "grad_norm": 0.413448931832896, "learning_rate": 1.5037556690386472e-05, "loss": 0.389, "step": 6127 }, { "epoch": 0.35, "grad_norm": 0.23777876075584098, "learning_rate": 1.5035949060925118e-05, "loss": 0.2254, "step": 6128 }, { "epoch": 0.35, "grad_norm": 0.3183163904203893, "learning_rate": 1.5034341257069072e-05, "loss": 0.2173, "step": 6129 }, { "epoch": 0.35, "grad_norm": 0.5650146581955648, "learning_rate": 1.5032733278873996e-05, "loss": 0.4094, "step": 6130 }, { "epoch": 0.35, "grad_norm": 0.513930176388067, "learning_rate": 1.5031125126395589e-05, "loss": 0.4267, "step": 6131 }, { "epoch": 0.35, "grad_norm": 0.3492885607799953, "learning_rate": 1.5029516799689533e-05, "loss": 0.3188, "step": 6132 }, { "epoch": 0.35, "grad_norm": 0.3884188797338796, "learning_rate": 1.5027908298811527e-05, "loss": 0.2838, "step": 6133 }, { "epoch": 0.35, "grad_norm": 0.28370779822743003, "learning_rate": 1.5026299623817273e-05, "loss": 0.2077, "step": 6134 }, { "epoch": 0.35, "grad_norm": 0.4015496523870909, "learning_rate": 1.5024690774762478e-05, "loss": 0.3009, "step": 6135 }, { "epoch": 0.35, "grad_norm": 0.3578652014760878, "learning_rate": 1.5023081751702857e-05, "loss": 0.2493, "step": 6136 }, { "epoch": 0.35, "grad_norm": 0.7011799483411165, "learning_rate": 1.5021472554694134e-05, "loss": 0.46, "step": 6137 }, { "epoch": 0.35, "grad_norm": 0.5617022132112179, "learning_rate": 1.501986318379203e-05, "loss": 0.4276, "step": 6138 }, { "epoch": 0.35, "grad_norm": 0.3212717857556724, "learning_rate": 1.501825363905228e-05, "loss": 0.2388, "step": 6139 }, { "epoch": 0.35, "grad_norm": 0.25259321483356195, "learning_rate": 1.5016643920530625e-05, "loss": 0.2136, "step": 6140 }, { "epoch": 0.35, "grad_norm": 0.7643284203192063, "learning_rate": 1.5015034028282802e-05, "loss": 0.4888, "step": 6141 }, { "epoch": 0.35, "grad_norm": 0.3205694041270008, "learning_rate": 1.5013423962364571e-05, "loss": 0.2469, "step": 6142 }, { "epoch": 0.35, "grad_norm": 0.47535488032774514, "learning_rate": 1.501181372283168e-05, "loss": 0.3845, "step": 6143 }, { "epoch": 0.35, "grad_norm": 0.5102639689359593, "learning_rate": 1.5010203309739897e-05, "loss": 0.3412, "step": 6144 }, { "epoch": 0.35, "grad_norm": 0.34843918812053276, "learning_rate": 1.5008592723144987e-05, "loss": 0.3098, "step": 6145 }, { "epoch": 0.35, "grad_norm": 0.23040537290013943, "learning_rate": 1.500698196310273e-05, "loss": 0.0713, "step": 6146 }, { "epoch": 0.35, "grad_norm": 0.4026091520264639, "learning_rate": 1.5005371029668899e-05, "loss": 0.2918, "step": 6147 }, { "epoch": 0.35, "grad_norm": 0.3332787714379259, "learning_rate": 1.5003759922899286e-05, "loss": 0.2908, "step": 6148 }, { "epoch": 0.35, "grad_norm": 0.8116793651226156, "learning_rate": 1.5002148642849683e-05, "loss": 0.3831, "step": 6149 }, { "epoch": 0.35, "grad_norm": 0.4649050578936953, "learning_rate": 1.5000537189575885e-05, "loss": 0.3492, "step": 6150 }, { "epoch": 0.35, "grad_norm": 0.398384632333775, "learning_rate": 1.4998925563133702e-05, "loss": 0.3362, "step": 6151 }, { "epoch": 0.35, "grad_norm": 0.2859884604619067, "learning_rate": 1.499731376357894e-05, "loss": 0.1982, "step": 6152 }, { "epoch": 0.35, "grad_norm": 1.2499071609751304, "learning_rate": 1.499570179096742e-05, "loss": 0.8175, "step": 6153 }, { "epoch": 0.35, "grad_norm": 0.35604390551676335, "learning_rate": 1.499408964535496e-05, "loss": 0.2641, "step": 6154 }, { "epoch": 0.35, "grad_norm": 0.6096935274842166, "learning_rate": 1.499247732679739e-05, "loss": 0.3215, "step": 6155 }, { "epoch": 0.35, "grad_norm": 0.4061485551230446, "learning_rate": 1.4990864835350544e-05, "loss": 0.1853, "step": 6156 }, { "epoch": 0.35, "grad_norm": 0.3753388063151038, "learning_rate": 1.4989252171070265e-05, "loss": 0.2943, "step": 6157 }, { "epoch": 0.35, "grad_norm": 0.43330743666285093, "learning_rate": 1.4987639334012398e-05, "loss": 0.2916, "step": 6158 }, { "epoch": 0.35, "grad_norm": 0.3607792270908736, "learning_rate": 1.4986026324232796e-05, "loss": 0.2567, "step": 6159 }, { "epoch": 0.35, "grad_norm": 0.41735006093173743, "learning_rate": 1.4984413141787312e-05, "loss": 0.3247, "step": 6160 }, { "epoch": 0.35, "grad_norm": 0.6882683097516364, "learning_rate": 1.498279978673182e-05, "loss": 0.4989, "step": 6161 }, { "epoch": 0.35, "grad_norm": 0.33450054220257863, "learning_rate": 1.4981186259122185e-05, "loss": 0.176, "step": 6162 }, { "epoch": 0.35, "grad_norm": 0.3138479440645983, "learning_rate": 1.4979572559014284e-05, "loss": 0.2784, "step": 6163 }, { "epoch": 0.35, "grad_norm": 0.9177725613411829, "learning_rate": 1.4977958686463998e-05, "loss": 0.6058, "step": 6164 }, { "epoch": 0.35, "grad_norm": 0.43862792944574786, "learning_rate": 1.497634464152722e-05, "loss": 0.2266, "step": 6165 }, { "epoch": 0.35, "grad_norm": 0.4316088985564484, "learning_rate": 1.4974730424259836e-05, "loss": 0.3325, "step": 6166 }, { "epoch": 0.35, "grad_norm": 0.4754951018407942, "learning_rate": 1.4973116034717754e-05, "loss": 0.3411, "step": 6167 }, { "epoch": 0.35, "grad_norm": 0.25015470689457353, "learning_rate": 1.4971501472956875e-05, "loss": 0.1052, "step": 6168 }, { "epoch": 0.35, "grad_norm": 0.46799978705707523, "learning_rate": 1.4969886739033116e-05, "loss": 0.3381, "step": 6169 }, { "epoch": 0.35, "grad_norm": 1.5456881995311484, "learning_rate": 1.4968271833002393e-05, "loss": 0.8595, "step": 6170 }, { "epoch": 0.35, "grad_norm": 0.35870092364375566, "learning_rate": 1.4966656754920635e-05, "loss": 0.3482, "step": 6171 }, { "epoch": 0.35, "grad_norm": 0.32167903780767804, "learning_rate": 1.496504150484376e-05, "loss": 0.2257, "step": 6172 }, { "epoch": 0.35, "grad_norm": 0.4229197497644949, "learning_rate": 1.4963426082827714e-05, "loss": 0.3368, "step": 6173 }, { "epoch": 0.35, "grad_norm": 0.47531000042653976, "learning_rate": 1.4961810488928434e-05, "loss": 0.2762, "step": 6174 }, { "epoch": 0.35, "grad_norm": 0.3133918190580812, "learning_rate": 1.4960194723201873e-05, "loss": 0.2267, "step": 6175 }, { "epoch": 0.35, "grad_norm": 1.041910669070392, "learning_rate": 1.4958578785703982e-05, "loss": 0.6656, "step": 6176 }, { "epoch": 0.35, "grad_norm": 0.827017268485664, "learning_rate": 1.4956962676490719e-05, "loss": 0.5432, "step": 6177 }, { "epoch": 0.35, "grad_norm": 0.3650626346399905, "learning_rate": 1.495534639561805e-05, "loss": 0.2469, "step": 6178 }, { "epoch": 0.36, "grad_norm": 0.3928206475372948, "learning_rate": 1.4953729943141952e-05, "loss": 0.3242, "step": 6179 }, { "epoch": 0.36, "grad_norm": 0.284766364444701, "learning_rate": 1.49521133191184e-05, "loss": 0.1743, "step": 6180 }, { "epoch": 0.36, "grad_norm": 0.3653187691143623, "learning_rate": 1.4950496523603373e-05, "loss": 0.2148, "step": 6181 }, { "epoch": 0.36, "grad_norm": 0.9229835999005866, "learning_rate": 1.4948879556652866e-05, "loss": 0.5495, "step": 6182 }, { "epoch": 0.36, "grad_norm": 0.4713117458300445, "learning_rate": 1.4947262418322872e-05, "loss": 0.3238, "step": 6183 }, { "epoch": 0.36, "grad_norm": 0.4509127635088528, "learning_rate": 1.4945645108669395e-05, "loss": 0.2793, "step": 6184 }, { "epoch": 0.36, "grad_norm": 0.4986348814970563, "learning_rate": 1.4944027627748438e-05, "loss": 0.3296, "step": 6185 }, { "epoch": 0.36, "grad_norm": 0.2946512585424218, "learning_rate": 1.4942409975616019e-05, "loss": 0.1613, "step": 6186 }, { "epoch": 0.36, "grad_norm": 0.3968498037859521, "learning_rate": 1.4940792152328156e-05, "loss": 0.2815, "step": 6187 }, { "epoch": 0.36, "grad_norm": 0.3634508359888088, "learning_rate": 1.4939174157940872e-05, "loss": 0.2695, "step": 6188 }, { "epoch": 0.36, "grad_norm": 0.6800060796414303, "learning_rate": 1.4937555992510198e-05, "loss": 0.4762, "step": 6189 }, { "epoch": 0.36, "grad_norm": 0.34781887939904654, "learning_rate": 1.4935937656092175e-05, "loss": 0.2857, "step": 6190 }, { "epoch": 0.36, "grad_norm": 0.3356382407547884, "learning_rate": 1.493431914874284e-05, "loss": 0.2678, "step": 6191 }, { "epoch": 0.36, "grad_norm": 0.2680360999255305, "learning_rate": 1.4932700470518247e-05, "loss": 0.1794, "step": 6192 }, { "epoch": 0.36, "grad_norm": 0.36281121646648407, "learning_rate": 1.4931081621474448e-05, "loss": 0.2694, "step": 6193 }, { "epoch": 0.36, "grad_norm": 0.7202442627048605, "learning_rate": 1.4929462601667504e-05, "loss": 0.3639, "step": 6194 }, { "epoch": 0.36, "grad_norm": 0.38416910639035173, "learning_rate": 1.4927843411153481e-05, "loss": 0.3403, "step": 6195 }, { "epoch": 0.36, "grad_norm": 0.33023100678471856, "learning_rate": 1.4926224049988456e-05, "loss": 0.2986, "step": 6196 }, { "epoch": 0.36, "grad_norm": 1.2815573161505327, "learning_rate": 1.4924604518228503e-05, "loss": 0.8653, "step": 6197 }, { "epoch": 0.36, "grad_norm": 0.23076577952264316, "learning_rate": 1.4922984815929707e-05, "loss": 0.1842, "step": 6198 }, { "epoch": 0.36, "grad_norm": 0.2980217129945579, "learning_rate": 1.4921364943148158e-05, "loss": 0.2783, "step": 6199 }, { "epoch": 0.36, "grad_norm": 0.8018563432059227, "learning_rate": 1.4919744899939952e-05, "loss": 0.4694, "step": 6200 }, { "epoch": 0.36, "grad_norm": 0.7658024271159686, "learning_rate": 1.4918124686361193e-05, "loss": 0.3918, "step": 6201 }, { "epoch": 0.36, "grad_norm": 0.40099316107463984, "learning_rate": 1.4916504302467987e-05, "loss": 0.2832, "step": 6202 }, { "epoch": 0.36, "grad_norm": 0.3396109439104663, "learning_rate": 1.4914883748316448e-05, "loss": 0.3053, "step": 6203 }, { "epoch": 0.36, "grad_norm": 0.17946159090846295, "learning_rate": 1.4913263023962698e-05, "loss": 0.0944, "step": 6204 }, { "epoch": 0.36, "grad_norm": 0.42650868983363494, "learning_rate": 1.491164212946286e-05, "loss": 0.2872, "step": 6205 }, { "epoch": 0.36, "grad_norm": 0.7748315569899414, "learning_rate": 1.4910021064873066e-05, "loss": 0.4013, "step": 6206 }, { "epoch": 0.36, "grad_norm": 0.33464339880757243, "learning_rate": 1.4908399830249454e-05, "loss": 0.2786, "step": 6207 }, { "epoch": 0.36, "grad_norm": 0.32860205040007584, "learning_rate": 1.4906778425648165e-05, "loss": 0.2614, "step": 6208 }, { "epoch": 0.36, "grad_norm": 1.1586441525453701, "learning_rate": 1.4905156851125354e-05, "loss": 0.7866, "step": 6209 }, { "epoch": 0.36, "grad_norm": 0.46748475889837865, "learning_rate": 1.4903535106737166e-05, "loss": 0.2579, "step": 6210 }, { "epoch": 0.36, "grad_norm": 0.26132463339166095, "learning_rate": 1.4901913192539773e-05, "loss": 0.2383, "step": 6211 }, { "epoch": 0.36, "grad_norm": 0.46282006638803863, "learning_rate": 1.4900291108589335e-05, "loss": 0.3461, "step": 6212 }, { "epoch": 0.36, "grad_norm": 1.4299229249539138, "learning_rate": 1.4898668854942029e-05, "loss": 0.8093, "step": 6213 }, { "epoch": 0.36, "grad_norm": 0.3654395694243854, "learning_rate": 1.4897046431654028e-05, "loss": 0.2173, "step": 6214 }, { "epoch": 0.36, "grad_norm": 0.40808297868803545, "learning_rate": 1.4895423838781523e-05, "loss": 0.3156, "step": 6215 }, { "epoch": 0.36, "grad_norm": 0.8572774804511525, "learning_rate": 1.4893801076380697e-05, "loss": 0.6124, "step": 6216 }, { "epoch": 0.36, "grad_norm": 0.3163807117672398, "learning_rate": 1.4892178144507754e-05, "loss": 0.2505, "step": 6217 }, { "epoch": 0.36, "grad_norm": 0.39502399231456026, "learning_rate": 1.4890555043218888e-05, "loss": 0.3094, "step": 6218 }, { "epoch": 0.36, "grad_norm": 0.305393097707522, "learning_rate": 1.4888931772570314e-05, "loss": 0.26, "step": 6219 }, { "epoch": 0.36, "grad_norm": 0.4573475138697167, "learning_rate": 1.4887308332618245e-05, "loss": 0.2775, "step": 6220 }, { "epoch": 0.36, "grad_norm": 0.5099326579508142, "learning_rate": 1.4885684723418897e-05, "loss": 0.2951, "step": 6221 }, { "epoch": 0.36, "grad_norm": 0.3622752534920921, "learning_rate": 1.4884060945028495e-05, "loss": 0.3238, "step": 6222 }, { "epoch": 0.36, "grad_norm": 0.4769109545150248, "learning_rate": 1.4882436997503273e-05, "loss": 0.3067, "step": 6223 }, { "epoch": 0.36, "grad_norm": 0.31122680816110204, "learning_rate": 1.4880812880899472e-05, "loss": 0.2261, "step": 6224 }, { "epoch": 0.36, "grad_norm": 0.44219289230671227, "learning_rate": 1.4879188595273326e-05, "loss": 0.3029, "step": 6225 }, { "epoch": 0.36, "grad_norm": 0.566041714859357, "learning_rate": 1.487756414068109e-05, "loss": 0.334, "step": 6226 }, { "epoch": 0.36, "grad_norm": 0.3654004821252147, "learning_rate": 1.4875939517179016e-05, "loss": 0.2639, "step": 6227 }, { "epoch": 0.36, "grad_norm": 0.755425769942834, "learning_rate": 1.4874314724823368e-05, "loss": 0.5034, "step": 6228 }, { "epoch": 0.36, "grad_norm": 0.3716823951023819, "learning_rate": 1.487268976367041e-05, "loss": 0.3237, "step": 6229 }, { "epoch": 0.36, "grad_norm": 0.2941511105541551, "learning_rate": 1.4871064633776418e-05, "loss": 0.2806, "step": 6230 }, { "epoch": 0.36, "grad_norm": 0.31363827129766925, "learning_rate": 1.4869439335197661e-05, "loss": 0.1919, "step": 6231 }, { "epoch": 0.36, "grad_norm": 0.38920044645943885, "learning_rate": 1.4867813867990435e-05, "loss": 0.2882, "step": 6232 }, { "epoch": 0.36, "grad_norm": 0.5297592578525103, "learning_rate": 1.486618823221102e-05, "loss": 0.4331, "step": 6233 }, { "epoch": 0.36, "grad_norm": 0.34389016761519975, "learning_rate": 1.4864562427915722e-05, "loss": 0.2951, "step": 6234 }, { "epoch": 0.36, "grad_norm": 0.4098129452025419, "learning_rate": 1.486293645516083e-05, "loss": 0.3112, "step": 6235 }, { "epoch": 0.36, "grad_norm": 0.5460341175814037, "learning_rate": 1.4861310314002659e-05, "loss": 0.339, "step": 6236 }, { "epoch": 0.36, "grad_norm": 0.24296990938486865, "learning_rate": 1.485968400449752e-05, "loss": 0.1272, "step": 6237 }, { "epoch": 0.36, "grad_norm": 0.4041372596227764, "learning_rate": 1.485805752670174e-05, "loss": 0.2752, "step": 6238 }, { "epoch": 0.36, "grad_norm": 0.31120053524746066, "learning_rate": 1.4856430880671628e-05, "loss": 0.3153, "step": 6239 }, { "epoch": 0.36, "grad_norm": 0.5829267478852111, "learning_rate": 1.485480406646353e-05, "loss": 0.3675, "step": 6240 }, { "epoch": 0.36, "grad_norm": 0.5853392794597301, "learning_rate": 1.485317708413377e-05, "loss": 0.2736, "step": 6241 }, { "epoch": 0.36, "grad_norm": 0.3633480158282146, "learning_rate": 1.48515499337387e-05, "loss": 0.2897, "step": 6242 }, { "epoch": 0.36, "grad_norm": 0.3294481476064836, "learning_rate": 1.4849922615334662e-05, "loss": 0.2107, "step": 6243 }, { "epoch": 0.36, "grad_norm": 0.596863758511093, "learning_rate": 1.4848295128978016e-05, "loss": 0.3861, "step": 6244 }, { "epoch": 0.36, "grad_norm": 0.30007973784979886, "learning_rate": 1.4846667474725115e-05, "loss": 0.2746, "step": 6245 }, { "epoch": 0.36, "grad_norm": 0.4615792309021516, "learning_rate": 1.484503965263233e-05, "loss": 0.3483, "step": 6246 }, { "epoch": 0.36, "grad_norm": 0.35022336479364224, "learning_rate": 1.4843411662756028e-05, "loss": 0.2043, "step": 6247 }, { "epoch": 0.36, "grad_norm": 0.4208558050427091, "learning_rate": 1.484178350515259e-05, "loss": 0.3449, "step": 6248 }, { "epoch": 0.36, "grad_norm": 1.038883513719484, "learning_rate": 1.4840155179878398e-05, "loss": 0.7463, "step": 6249 }, { "epoch": 0.36, "grad_norm": 0.2896134927667209, "learning_rate": 1.4838526686989836e-05, "loss": 0.2265, "step": 6250 }, { "epoch": 0.36, "grad_norm": 0.2974585469109081, "learning_rate": 1.4836898026543307e-05, "loss": 0.2787, "step": 6251 }, { "epoch": 0.36, "grad_norm": 0.4006467030665213, "learning_rate": 1.4835269198595206e-05, "loss": 0.3279, "step": 6252 }, { "epoch": 0.36, "grad_norm": 0.33309182783458663, "learning_rate": 1.483364020320194e-05, "loss": 0.2246, "step": 6253 }, { "epoch": 0.36, "grad_norm": 0.36433501957512965, "learning_rate": 1.4832011040419922e-05, "loss": 0.3392, "step": 6254 }, { "epoch": 0.36, "grad_norm": 0.5059114706018922, "learning_rate": 1.4830381710305572e-05, "loss": 0.4148, "step": 6255 }, { "epoch": 0.36, "grad_norm": 0.4326822693331583, "learning_rate": 1.4828752212915309e-05, "loss": 0.2464, "step": 6256 }, { "epoch": 0.36, "grad_norm": 0.346803304093782, "learning_rate": 1.4827122548305566e-05, "loss": 0.3429, "step": 6257 }, { "epoch": 0.36, "grad_norm": 0.24676986871220055, "learning_rate": 1.4825492716532772e-05, "loss": 0.2387, "step": 6258 }, { "epoch": 0.36, "grad_norm": 0.8936383019306559, "learning_rate": 1.4823862717653377e-05, "loss": 0.4183, "step": 6259 }, { "epoch": 0.36, "grad_norm": 0.3613517629923259, "learning_rate": 1.4822232551723824e-05, "loss": 0.2395, "step": 6260 }, { "epoch": 0.36, "grad_norm": 0.7576742857770312, "learning_rate": 1.4820602218800562e-05, "loss": 0.604, "step": 6261 }, { "epoch": 0.36, "grad_norm": 0.36124256710193003, "learning_rate": 1.4818971718940053e-05, "loss": 0.282, "step": 6262 }, { "epoch": 0.36, "grad_norm": 0.3117081152835165, "learning_rate": 1.4817341052198763e-05, "loss": 0.2732, "step": 6263 }, { "epoch": 0.36, "grad_norm": 0.4077993695451762, "learning_rate": 1.481571021863316e-05, "loss": 0.2519, "step": 6264 }, { "epoch": 0.36, "grad_norm": 0.36274755930196695, "learning_rate": 1.4814079218299715e-05, "loss": 0.2588, "step": 6265 }, { "epoch": 0.36, "grad_norm": 0.31755716594371836, "learning_rate": 1.4812448051254914e-05, "loss": 0.2515, "step": 6266 }, { "epoch": 0.36, "grad_norm": 1.2379686903604816, "learning_rate": 1.4810816717555248e-05, "loss": 0.8207, "step": 6267 }, { "epoch": 0.36, "grad_norm": 0.6200789147881792, "learning_rate": 1.4809185217257205e-05, "loss": 0.4401, "step": 6268 }, { "epoch": 0.36, "grad_norm": 0.3576042390007011, "learning_rate": 1.4807553550417281e-05, "loss": 0.236, "step": 6269 }, { "epoch": 0.36, "grad_norm": 0.3135412551606865, "learning_rate": 1.4805921717091989e-05, "loss": 0.2926, "step": 6270 }, { "epoch": 0.36, "grad_norm": 0.33504011374279746, "learning_rate": 1.480428971733783e-05, "loss": 0.2396, "step": 6271 }, { "epoch": 0.36, "grad_norm": 0.395484364592832, "learning_rate": 1.4802657551211331e-05, "loss": 0.2966, "step": 6272 }, { "epoch": 0.36, "grad_norm": 0.7334203945138051, "learning_rate": 1.4801025218769001e-05, "loss": 0.3673, "step": 6273 }, { "epoch": 0.36, "grad_norm": 0.3095981068865218, "learning_rate": 1.4799392720067378e-05, "loss": 0.2805, "step": 6274 }, { "epoch": 0.36, "grad_norm": 0.37500869393675135, "learning_rate": 1.4797760055162988e-05, "loss": 0.3432, "step": 6275 }, { "epoch": 0.36, "grad_norm": 0.21332335884529122, "learning_rate": 1.4796127224112378e-05, "loss": 0.1254, "step": 6276 }, { "epoch": 0.36, "grad_norm": 1.1738522008194672, "learning_rate": 1.4794494226972088e-05, "loss": 0.7157, "step": 6277 }, { "epoch": 0.36, "grad_norm": 0.2622768681089619, "learning_rate": 1.4792861063798664e-05, "loss": 0.2618, "step": 6278 }, { "epoch": 0.36, "grad_norm": 0.5248605026141624, "learning_rate": 1.4791227734648672e-05, "loss": 0.3278, "step": 6279 }, { "epoch": 0.36, "grad_norm": 0.5791380522386198, "learning_rate": 1.4789594239578668e-05, "loss": 0.3969, "step": 6280 }, { "epoch": 0.36, "grad_norm": 0.40689169491954263, "learning_rate": 1.4787960578645222e-05, "loss": 0.2991, "step": 6281 }, { "epoch": 0.36, "grad_norm": 0.35430059420501625, "learning_rate": 1.4786326751904907e-05, "loss": 0.2538, "step": 6282 }, { "epoch": 0.36, "grad_norm": 0.25326613938208253, "learning_rate": 1.4784692759414303e-05, "loss": 0.1784, "step": 6283 }, { "epoch": 0.36, "grad_norm": 0.3879921231259588, "learning_rate": 1.4783058601229994e-05, "loss": 0.3079, "step": 6284 }, { "epoch": 0.36, "grad_norm": 0.7226592658190863, "learning_rate": 1.4781424277408572e-05, "loss": 0.4558, "step": 6285 }, { "epoch": 0.36, "grad_norm": 0.30693828632115694, "learning_rate": 1.4779789788006632e-05, "loss": 0.2734, "step": 6286 }, { "epoch": 0.36, "grad_norm": 0.3880005622421795, "learning_rate": 1.4778155133080776e-05, "loss": 0.2958, "step": 6287 }, { "epoch": 0.36, "grad_norm": 1.1057367174861266, "learning_rate": 1.4776520312687614e-05, "loss": 0.6752, "step": 6288 }, { "epoch": 0.36, "grad_norm": 0.21447372968479209, "learning_rate": 1.477488532688376e-05, "loss": 0.0726, "step": 6289 }, { "epoch": 0.36, "grad_norm": 0.35395443621688044, "learning_rate": 1.4773250175725833e-05, "loss": 0.3048, "step": 6290 }, { "epoch": 0.36, "grad_norm": 0.5289778545984578, "learning_rate": 1.4771614859270458e-05, "loss": 0.3525, "step": 6291 }, { "epoch": 0.36, "grad_norm": 0.48440354407701286, "learning_rate": 1.4769979377574264e-05, "loss": 0.2645, "step": 6292 }, { "epoch": 0.36, "grad_norm": 0.4060500797659063, "learning_rate": 1.4768343730693888e-05, "loss": 0.3174, "step": 6293 }, { "epoch": 0.36, "grad_norm": 0.3819313122256466, "learning_rate": 1.4766707918685974e-05, "loss": 0.3538, "step": 6294 }, { "epoch": 0.36, "grad_norm": 0.3470332126651621, "learning_rate": 1.4765071941607172e-05, "loss": 0.2331, "step": 6295 }, { "epoch": 0.36, "grad_norm": 0.306715059456966, "learning_rate": 1.4763435799514132e-05, "loss": 0.2315, "step": 6296 }, { "epoch": 0.36, "grad_norm": 0.5344701290774023, "learning_rate": 1.4761799492463516e-05, "loss": 0.381, "step": 6297 }, { "epoch": 0.36, "grad_norm": 0.45486931737761455, "learning_rate": 1.4760163020511986e-05, "loss": 0.3881, "step": 6298 }, { "epoch": 0.36, "grad_norm": 0.31770712221775865, "learning_rate": 1.4758526383716219e-05, "loss": 0.2241, "step": 6299 }, { "epoch": 0.36, "grad_norm": 1.1779532369599066, "learning_rate": 1.4756889582132886e-05, "loss": 0.7267, "step": 6300 }, { "epoch": 0.36, "grad_norm": 0.25416641771855647, "learning_rate": 1.4755252615818671e-05, "loss": 0.2218, "step": 6301 }, { "epoch": 0.36, "grad_norm": 0.28196065175567037, "learning_rate": 1.4753615484830261e-05, "loss": 0.2354, "step": 6302 }, { "epoch": 0.36, "grad_norm": 0.6902396292135428, "learning_rate": 1.4751978189224354e-05, "loss": 0.4771, "step": 6303 }, { "epoch": 0.36, "grad_norm": 1.0221054595371608, "learning_rate": 1.4750340729057646e-05, "loss": 0.728, "step": 6304 }, { "epoch": 0.36, "grad_norm": 0.3009926847405351, "learning_rate": 1.4748703104386843e-05, "loss": 0.215, "step": 6305 }, { "epoch": 0.36, "grad_norm": 0.32641194854409294, "learning_rate": 1.4747065315268655e-05, "loss": 0.2941, "step": 6306 }, { "epoch": 0.36, "grad_norm": 0.4241970288360463, "learning_rate": 1.4745427361759801e-05, "loss": 0.3376, "step": 6307 }, { "epoch": 0.36, "grad_norm": 0.45701611260847547, "learning_rate": 1.4743789243916999e-05, "loss": 0.2903, "step": 6308 }, { "epoch": 0.36, "grad_norm": 0.2999184950849396, "learning_rate": 1.4742150961796981e-05, "loss": 0.2455, "step": 6309 }, { "epoch": 0.36, "grad_norm": 0.45425353186377854, "learning_rate": 1.4740512515456479e-05, "loss": 0.3645, "step": 6310 }, { "epoch": 0.36, "grad_norm": 0.374441697495103, "learning_rate": 1.4738873904952232e-05, "loss": 0.2927, "step": 6311 }, { "epoch": 0.36, "grad_norm": 0.8017812489117565, "learning_rate": 1.4737235130340985e-05, "loss": 0.4057, "step": 6312 }, { "epoch": 0.36, "grad_norm": 0.4483513029265171, "learning_rate": 1.473559619167949e-05, "loss": 0.3609, "step": 6313 }, { "epoch": 0.36, "grad_norm": 0.30337256483409936, "learning_rate": 1.4733957089024502e-05, "loss": 0.2775, "step": 6314 }, { "epoch": 0.36, "grad_norm": 0.27788517511323146, "learning_rate": 1.4732317822432782e-05, "loss": 0.1849, "step": 6315 }, { "epoch": 0.36, "grad_norm": 1.2877189024995006, "learning_rate": 1.4730678391961102e-05, "loss": 0.7865, "step": 6316 }, { "epoch": 0.36, "grad_norm": 0.3273282971439404, "learning_rate": 1.472903879766623e-05, "loss": 0.2822, "step": 6317 }, { "epoch": 0.36, "grad_norm": 0.36865587604384104, "learning_rate": 1.4727399039604951e-05, "loss": 0.2766, "step": 6318 }, { "epoch": 0.36, "grad_norm": 0.6217638120111219, "learning_rate": 1.4725759117834045e-05, "loss": 0.4275, "step": 6319 }, { "epoch": 0.36, "grad_norm": 0.34169170317511, "learning_rate": 1.4724119032410305e-05, "loss": 0.3136, "step": 6320 }, { "epoch": 0.36, "grad_norm": 0.30920422479643384, "learning_rate": 1.4722478783390522e-05, "loss": 0.1771, "step": 6321 }, { "epoch": 0.36, "grad_norm": 0.34775308402691785, "learning_rate": 1.472083837083151e-05, "loss": 0.2888, "step": 6322 }, { "epoch": 0.36, "grad_norm": 0.3193141427715202, "learning_rate": 1.471919779479006e-05, "loss": 0.256, "step": 6323 }, { "epoch": 0.36, "grad_norm": 1.1400092958513408, "learning_rate": 1.4717557055322997e-05, "loss": 0.7106, "step": 6324 }, { "epoch": 0.36, "grad_norm": 0.40323223147712445, "learning_rate": 1.4715916152487135e-05, "loss": 0.3131, "step": 6325 }, { "epoch": 0.36, "grad_norm": 0.4250005070452697, "learning_rate": 1.47142750863393e-05, "loss": 0.2852, "step": 6326 }, { "epoch": 0.36, "grad_norm": 0.3343011248837186, "learning_rate": 1.471263385693632e-05, "loss": 0.2307, "step": 6327 }, { "epoch": 0.36, "grad_norm": 0.3055552288905154, "learning_rate": 1.4710992464335034e-05, "loss": 0.1793, "step": 6328 }, { "epoch": 0.36, "grad_norm": 0.40306747549933275, "learning_rate": 1.4709350908592281e-05, "loss": 0.2806, "step": 6329 }, { "epoch": 0.36, "grad_norm": 0.32815408215415465, "learning_rate": 1.4707709189764909e-05, "loss": 0.3094, "step": 6330 }, { "epoch": 0.36, "grad_norm": 0.6956160911594513, "learning_rate": 1.470606730790977e-05, "loss": 0.356, "step": 6331 }, { "epoch": 0.36, "grad_norm": 0.3454136921848711, "learning_rate": 1.4704425263083722e-05, "loss": 0.2861, "step": 6332 }, { "epoch": 0.36, "grad_norm": 0.30035263482807145, "learning_rate": 1.470278305534363e-05, "loss": 0.2579, "step": 6333 }, { "epoch": 0.36, "grad_norm": 0.4375086593799831, "learning_rate": 1.4701140684746363e-05, "loss": 0.3229, "step": 6334 }, { "epoch": 0.36, "grad_norm": 0.32172857440332125, "learning_rate": 1.4699498151348797e-05, "loss": 0.2142, "step": 6335 }, { "epoch": 0.36, "grad_norm": 0.6809092715571192, "learning_rate": 1.469785545520781e-05, "loss": 0.4272, "step": 6336 }, { "epoch": 0.36, "grad_norm": 0.3487777939185845, "learning_rate": 1.469621259638029e-05, "loss": 0.3407, "step": 6337 }, { "epoch": 0.36, "grad_norm": 0.29453832274472436, "learning_rate": 1.4694569574923132e-05, "loss": 0.2134, "step": 6338 }, { "epoch": 0.36, "grad_norm": 0.8308626976224194, "learning_rate": 1.4692926390893234e-05, "loss": 0.5887, "step": 6339 }, { "epoch": 0.36, "grad_norm": 0.39159026271164604, "learning_rate": 1.469128304434749e-05, "loss": 0.3049, "step": 6340 }, { "epoch": 0.36, "grad_norm": 0.24883858751617285, "learning_rate": 1.4689639535342823e-05, "loss": 0.1906, "step": 6341 }, { "epoch": 0.36, "grad_norm": 0.5107631135808215, "learning_rate": 1.4687995863936135e-05, "loss": 0.3797, "step": 6342 }, { "epoch": 0.36, "grad_norm": 0.6558962140201424, "learning_rate": 1.4686352030184354e-05, "loss": 0.4598, "step": 6343 }, { "epoch": 0.36, "grad_norm": 0.32642046172243194, "learning_rate": 1.4684708034144403e-05, "loss": 0.174, "step": 6344 }, { "epoch": 0.36, "grad_norm": 0.3823814532754202, "learning_rate": 1.4683063875873215e-05, "loss": 0.3208, "step": 6345 }, { "epoch": 0.36, "grad_norm": 0.4802433585379294, "learning_rate": 1.4681419555427727e-05, "loss": 0.4175, "step": 6346 }, { "epoch": 0.36, "grad_norm": 0.521892647783289, "learning_rate": 1.467977507286488e-05, "loss": 0.3415, "step": 6347 }, { "epoch": 0.36, "grad_norm": 0.3458481345611792, "learning_rate": 1.4678130428241623e-05, "loss": 0.2438, "step": 6348 }, { "epoch": 0.36, "grad_norm": 0.3032108143688598, "learning_rate": 1.4676485621614913e-05, "loss": 0.2495, "step": 6349 }, { "epoch": 0.36, "grad_norm": 0.5573975802181028, "learning_rate": 1.4674840653041706e-05, "loss": 0.3531, "step": 6350 }, { "epoch": 0.36, "grad_norm": 0.37910874435426417, "learning_rate": 1.4673195522578967e-05, "loss": 0.2586, "step": 6351 }, { "epoch": 0.36, "grad_norm": 1.1453819020562075, "learning_rate": 1.467155023028367e-05, "loss": 0.8029, "step": 6352 }, { "epoch": 0.37, "grad_norm": 0.3225394768457753, "learning_rate": 1.4669904776212786e-05, "loss": 0.301, "step": 6353 }, { "epoch": 0.37, "grad_norm": 0.4135206494047781, "learning_rate": 1.4668259160423305e-05, "loss": 0.2995, "step": 6354 }, { "epoch": 0.37, "grad_norm": 0.30632042423602535, "learning_rate": 1.4666613382972205e-05, "loss": 0.2069, "step": 6355 }, { "epoch": 0.37, "grad_norm": 0.38142377744009864, "learning_rate": 1.4664967443916489e-05, "loss": 0.3089, "step": 6356 }, { "epoch": 0.37, "grad_norm": 0.3386038426663132, "learning_rate": 1.4663321343313148e-05, "loss": 0.2622, "step": 6357 }, { "epoch": 0.37, "grad_norm": 0.45717911991857507, "learning_rate": 1.4661675081219191e-05, "loss": 0.3994, "step": 6358 }, { "epoch": 0.37, "grad_norm": 0.39907584698503695, "learning_rate": 1.4660028657691626e-05, "loss": 0.3111, "step": 6359 }, { "epoch": 0.37, "grad_norm": 0.4939593127745104, "learning_rate": 1.465838207278747e-05, "loss": 0.3689, "step": 6360 }, { "epoch": 0.37, "grad_norm": 0.2352579898444018, "learning_rate": 1.4656735326563738e-05, "loss": 0.1747, "step": 6361 }, { "epoch": 0.37, "grad_norm": 0.59732590291894, "learning_rate": 1.4655088419077466e-05, "loss": 0.3185, "step": 6362 }, { "epoch": 0.37, "grad_norm": 0.39752606248880057, "learning_rate": 1.4653441350385682e-05, "loss": 0.3149, "step": 6363 }, { "epoch": 0.37, "grad_norm": 0.4368499204854432, "learning_rate": 1.4651794120545424e-05, "loss": 0.3275, "step": 6364 }, { "epoch": 0.37, "grad_norm": 0.3946428941104599, "learning_rate": 1.4650146729613735e-05, "loss": 0.311, "step": 6365 }, { "epoch": 0.37, "grad_norm": 0.4052584218015384, "learning_rate": 1.4648499177647665e-05, "loss": 0.3519, "step": 6366 }, { "epoch": 0.37, "grad_norm": 0.2228152253683647, "learning_rate": 1.4646851464704269e-05, "loss": 0.0727, "step": 6367 }, { "epoch": 0.37, "grad_norm": 0.55650450715017, "learning_rate": 1.4645203590840607e-05, "loss": 0.3699, "step": 6368 }, { "epoch": 0.37, "grad_norm": 0.2782351877624767, "learning_rate": 1.4643555556113742e-05, "loss": 0.2925, "step": 6369 }, { "epoch": 0.37, "grad_norm": 0.6856577889773536, "learning_rate": 1.4641907360580749e-05, "loss": 0.3796, "step": 6370 }, { "epoch": 0.37, "grad_norm": 0.49911463412678736, "learning_rate": 1.4640259004298706e-05, "loss": 0.3838, "step": 6371 }, { "epoch": 0.37, "grad_norm": 0.3727450596138779, "learning_rate": 1.4638610487324688e-05, "loss": 0.333, "step": 6372 }, { "epoch": 0.37, "grad_norm": 0.2839276525223571, "learning_rate": 1.4636961809715793e-05, "loss": 0.2624, "step": 6373 }, { "epoch": 0.37, "grad_norm": 0.24988639619999695, "learning_rate": 1.463531297152911e-05, "loss": 0.1668, "step": 6374 }, { "epoch": 0.37, "grad_norm": 0.5883872688338556, "learning_rate": 1.4633663972821737e-05, "loss": 0.4016, "step": 6375 }, { "epoch": 0.37, "grad_norm": 0.969636159928934, "learning_rate": 1.4632014813650779e-05, "loss": 0.4825, "step": 6376 }, { "epoch": 0.37, "grad_norm": 0.27400554273030164, "learning_rate": 1.4630365494073348e-05, "loss": 0.2349, "step": 6377 }, { "epoch": 0.37, "grad_norm": 0.5706865974074818, "learning_rate": 1.4628716014146558e-05, "loss": 0.4436, "step": 6378 }, { "epoch": 0.37, "grad_norm": 0.28185065914961815, "learning_rate": 1.4627066373927534e-05, "loss": 0.1703, "step": 6379 }, { "epoch": 0.37, "grad_norm": 0.5918352536787485, "learning_rate": 1.4625416573473397e-05, "loss": 0.2241, "step": 6380 }, { "epoch": 0.37, "grad_norm": 0.2945602690656717, "learning_rate": 1.4623766612841286e-05, "loss": 0.2792, "step": 6381 }, { "epoch": 0.37, "grad_norm": 1.245141698601791, "learning_rate": 1.4622116492088335e-05, "loss": 0.4757, "step": 6382 }, { "epoch": 0.37, "grad_norm": 0.7575767671387406, "learning_rate": 1.4620466211271686e-05, "loss": 0.4512, "step": 6383 }, { "epoch": 0.37, "grad_norm": 0.42239162484195003, "learning_rate": 1.4618815770448493e-05, "loss": 0.2349, "step": 6384 }, { "epoch": 0.37, "grad_norm": 0.28014002852446207, "learning_rate": 1.461716516967591e-05, "loss": 0.2347, "step": 6385 }, { "epoch": 0.37, "grad_norm": 0.792038715756322, "learning_rate": 1.4615514409011093e-05, "loss": 0.4452, "step": 6386 }, { "epoch": 0.37, "grad_norm": 0.7042697472461529, "learning_rate": 1.4613863488511214e-05, "loss": 0.2951, "step": 6387 }, { "epoch": 0.37, "grad_norm": 1.090984118378016, "learning_rate": 1.4612212408233438e-05, "loss": 0.5841, "step": 6388 }, { "epoch": 0.37, "grad_norm": 0.3318163237944138, "learning_rate": 1.4610561168234942e-05, "loss": 0.3015, "step": 6389 }, { "epoch": 0.37, "grad_norm": 0.36470561968264775, "learning_rate": 1.4608909768572917e-05, "loss": 0.2485, "step": 6390 }, { "epoch": 0.37, "grad_norm": 0.5483411965792786, "learning_rate": 1.4607258209304542e-05, "loss": 0.2347, "step": 6391 }, { "epoch": 0.37, "grad_norm": 0.4024074470010744, "learning_rate": 1.4605606490487013e-05, "loss": 0.2603, "step": 6392 }, { "epoch": 0.37, "grad_norm": 0.3917554003411267, "learning_rate": 1.4603954612177532e-05, "loss": 0.231, "step": 6393 }, { "epoch": 0.37, "grad_norm": 1.2772938766587616, "learning_rate": 1.4602302574433297e-05, "loss": 0.486, "step": 6394 }, { "epoch": 0.37, "grad_norm": 0.6668301917465416, "learning_rate": 1.4600650377311523e-05, "loss": 0.4474, "step": 6395 }, { "epoch": 0.37, "grad_norm": 0.39712967225015333, "learning_rate": 1.4598998020869426e-05, "loss": 0.3148, "step": 6396 }, { "epoch": 0.37, "grad_norm": 0.28444972237384936, "learning_rate": 1.4597345505164222e-05, "loss": 0.218, "step": 6397 }, { "epoch": 0.37, "grad_norm": 0.5753183483183651, "learning_rate": 1.4595692830253143e-05, "loss": 0.2875, "step": 6398 }, { "epoch": 0.37, "grad_norm": 0.5209561480298429, "learning_rate": 1.459403999619342e-05, "loss": 0.3033, "step": 6399 }, { "epoch": 0.37, "grad_norm": 0.47041709352127614, "learning_rate": 1.4592387003042287e-05, "loss": 0.2843, "step": 6400 }, { "epoch": 0.37, "grad_norm": 1.4217366843048336, "learning_rate": 1.4590733850856989e-05, "loss": 0.7602, "step": 6401 }, { "epoch": 0.37, "grad_norm": 0.3419329387584038, "learning_rate": 1.4589080539694778e-05, "loss": 0.2929, "step": 6402 }, { "epoch": 0.37, "grad_norm": 0.6029781463091936, "learning_rate": 1.4587427069612902e-05, "loss": 0.3747, "step": 6403 }, { "epoch": 0.37, "grad_norm": 0.3860877146598658, "learning_rate": 1.4585773440668626e-05, "loss": 0.3044, "step": 6404 }, { "epoch": 0.37, "grad_norm": 0.3332149808196481, "learning_rate": 1.4584119652919213e-05, "loss": 0.2862, "step": 6405 }, { "epoch": 0.37, "grad_norm": 0.2517107564009192, "learning_rate": 1.4582465706421935e-05, "loss": 0.1132, "step": 6406 }, { "epoch": 0.37, "grad_norm": 0.9180317484308079, "learning_rate": 1.4580811601234067e-05, "loss": 0.55, "step": 6407 }, { "epoch": 0.37, "grad_norm": 0.4199511381911743, "learning_rate": 1.4579157337412886e-05, "loss": 0.2734, "step": 6408 }, { "epoch": 0.37, "grad_norm": 0.40497014891679506, "learning_rate": 1.4577502915015687e-05, "loss": 0.3179, "step": 6409 }, { "epoch": 0.37, "grad_norm": 0.7870272373752119, "learning_rate": 1.4575848334099756e-05, "loss": 0.3261, "step": 6410 }, { "epoch": 0.37, "grad_norm": 0.30582763377378736, "learning_rate": 1.4574193594722394e-05, "loss": 0.256, "step": 6411 }, { "epoch": 0.37, "grad_norm": 0.3369040954406996, "learning_rate": 1.4572538696940908e-05, "loss": 0.2748, "step": 6412 }, { "epoch": 0.37, "grad_norm": 0.36361392670595094, "learning_rate": 1.4570883640812602e-05, "loss": 0.2604, "step": 6413 }, { "epoch": 0.37, "grad_norm": 0.3875128108510935, "learning_rate": 1.456922842639479e-05, "loss": 0.2852, "step": 6414 }, { "epoch": 0.37, "grad_norm": 0.8988967262988671, "learning_rate": 1.4567573053744797e-05, "loss": 0.4492, "step": 6415 }, { "epoch": 0.37, "grad_norm": 0.40165688387675075, "learning_rate": 1.4565917522919944e-05, "loss": 0.2684, "step": 6416 }, { "epoch": 0.37, "grad_norm": 0.38594474836512377, "learning_rate": 1.4564261833977563e-05, "loss": 0.3048, "step": 6417 }, { "epoch": 0.37, "grad_norm": 0.2507980036522095, "learning_rate": 1.4562605986974991e-05, "loss": 0.194, "step": 6418 }, { "epoch": 0.37, "grad_norm": 0.872875765589793, "learning_rate": 1.456094998196957e-05, "loss": 0.4096, "step": 6419 }, { "epoch": 0.37, "grad_norm": 0.358181410493378, "learning_rate": 1.4559293819018648e-05, "loss": 0.2836, "step": 6420 }, { "epoch": 0.37, "grad_norm": 0.45407158678411264, "learning_rate": 1.4557637498179577e-05, "loss": 0.346, "step": 6421 }, { "epoch": 0.37, "grad_norm": 0.9255722275983588, "learning_rate": 1.4555981019509716e-05, "loss": 0.5238, "step": 6422 }, { "epoch": 0.37, "grad_norm": 0.29691564293618045, "learning_rate": 1.4554324383066427e-05, "loss": 0.2093, "step": 6423 }, { "epoch": 0.37, "grad_norm": 0.4785704057795955, "learning_rate": 1.4552667588907082e-05, "loss": 0.3371, "step": 6424 }, { "epoch": 0.37, "grad_norm": 0.2952211128804953, "learning_rate": 1.4551010637089054e-05, "loss": 0.2773, "step": 6425 }, { "epoch": 0.37, "grad_norm": 0.36035741477242134, "learning_rate": 1.454935352766972e-05, "loss": 0.2268, "step": 6426 }, { "epoch": 0.37, "grad_norm": 0.8603431400721523, "learning_rate": 1.4547696260706474e-05, "loss": 0.4749, "step": 6427 }, { "epoch": 0.37, "grad_norm": 0.45518616624458275, "learning_rate": 1.4546038836256698e-05, "loss": 0.3162, "step": 6428 }, { "epoch": 0.37, "grad_norm": 0.32397378870781374, "learning_rate": 1.4544381254377794e-05, "loss": 0.2011, "step": 6429 }, { "epoch": 0.37, "grad_norm": 0.48798619337408167, "learning_rate": 1.4542723515127161e-05, "loss": 0.2987, "step": 6430 }, { "epoch": 0.37, "grad_norm": 0.46757065365840805, "learning_rate": 1.4541065618562211e-05, "loss": 0.301, "step": 6431 }, { "epoch": 0.37, "grad_norm": 0.39111537126054996, "learning_rate": 1.4539407564740353e-05, "loss": 0.2013, "step": 6432 }, { "epoch": 0.37, "grad_norm": 0.4772229613888993, "learning_rate": 1.4537749353719006e-05, "loss": 0.3447, "step": 6433 }, { "epoch": 0.37, "grad_norm": 0.7128445538916925, "learning_rate": 1.4536090985555595e-05, "loss": 0.4374, "step": 6434 }, { "epoch": 0.37, "grad_norm": 0.37996740838238047, "learning_rate": 1.4534432460307546e-05, "loss": 0.284, "step": 6435 }, { "epoch": 0.37, "grad_norm": 0.3521223528144348, "learning_rate": 1.4532773778032297e-05, "loss": 0.2757, "step": 6436 }, { "epoch": 0.37, "grad_norm": 0.4727501869932283, "learning_rate": 1.4531114938787285e-05, "loss": 0.3093, "step": 6437 }, { "epoch": 0.37, "grad_norm": 0.34669175481499864, "learning_rate": 1.452945594262996e-05, "loss": 0.2832, "step": 6438 }, { "epoch": 0.37, "grad_norm": 0.3986170161984597, "learning_rate": 1.452779678961777e-05, "loss": 0.2385, "step": 6439 }, { "epoch": 0.37, "grad_norm": 0.37509714723728876, "learning_rate": 1.4526137479808173e-05, "loss": 0.3058, "step": 6440 }, { "epoch": 0.37, "grad_norm": 0.3784935823336974, "learning_rate": 1.452447801325863e-05, "loss": 0.2805, "step": 6441 }, { "epoch": 0.37, "grad_norm": 0.9648337594394374, "learning_rate": 1.4522818390026605e-05, "loss": 0.4174, "step": 6442 }, { "epoch": 0.37, "grad_norm": 0.36332205508671817, "learning_rate": 1.4521158610169575e-05, "loss": 0.3118, "step": 6443 }, { "epoch": 0.37, "grad_norm": 0.3043988410895945, "learning_rate": 1.4519498673745019e-05, "loss": 0.2756, "step": 6444 }, { "epoch": 0.37, "grad_norm": 0.3661883748618258, "learning_rate": 1.4517838580810413e-05, "loss": 0.2444, "step": 6445 }, { "epoch": 0.37, "grad_norm": 0.8097337394192086, "learning_rate": 1.4516178331423257e-05, "loss": 0.576, "step": 6446 }, { "epoch": 0.37, "grad_norm": 0.33163644933954645, "learning_rate": 1.4514517925641035e-05, "loss": 0.3033, "step": 6447 }, { "epoch": 0.37, "grad_norm": 0.5218645713887373, "learning_rate": 1.4512857363521254e-05, "loss": 0.393, "step": 6448 }, { "epoch": 0.37, "grad_norm": 0.3793598909180065, "learning_rate": 1.4511196645121414e-05, "loss": 0.319, "step": 6449 }, { "epoch": 0.37, "grad_norm": 0.3896431397236735, "learning_rate": 1.450953577049903e-05, "loss": 0.2771, "step": 6450 }, { "epoch": 0.37, "grad_norm": 0.2705214298693227, "learning_rate": 1.4507874739711616e-05, "loss": 0.2298, "step": 6451 }, { "epoch": 0.37, "grad_norm": 0.34851249948794466, "learning_rate": 1.4506213552816694e-05, "loss": 0.2555, "step": 6452 }, { "epoch": 0.37, "grad_norm": 0.39110676315148374, "learning_rate": 1.4504552209871791e-05, "loss": 0.2659, "step": 6453 }, { "epoch": 0.37, "grad_norm": 0.5909154208429732, "learning_rate": 1.4502890710934438e-05, "loss": 0.4545, "step": 6454 }, { "epoch": 0.37, "grad_norm": 0.8661006074737951, "learning_rate": 1.4501229056062174e-05, "loss": 0.36, "step": 6455 }, { "epoch": 0.37, "grad_norm": 0.2980737565737271, "learning_rate": 1.449956724531254e-05, "loss": 0.2735, "step": 6456 }, { "epoch": 0.37, "grad_norm": 0.37860627191593116, "learning_rate": 1.4497905278743086e-05, "loss": 0.3064, "step": 6457 }, { "epoch": 0.37, "grad_norm": 0.4986638746583317, "learning_rate": 1.4496243156411367e-05, "loss": 0.3235, "step": 6458 }, { "epoch": 0.37, "grad_norm": 0.36881140079123487, "learning_rate": 1.4494580878374942e-05, "loss": 0.2306, "step": 6459 }, { "epoch": 0.37, "grad_norm": 0.4396370739515429, "learning_rate": 1.4492918444691371e-05, "loss": 0.3544, "step": 6460 }, { "epoch": 0.37, "grad_norm": 1.4881242453846941, "learning_rate": 1.4491255855418228e-05, "loss": 0.4526, "step": 6461 }, { "epoch": 0.37, "grad_norm": 0.31509315415532924, "learning_rate": 1.4489593110613087e-05, "loss": 0.2344, "step": 6462 }, { "epoch": 0.37, "grad_norm": 0.4766555392210357, "learning_rate": 1.4487930210333532e-05, "loss": 0.29, "step": 6463 }, { "epoch": 0.37, "grad_norm": 0.3242238172436043, "learning_rate": 1.4486267154637146e-05, "loss": 0.2684, "step": 6464 }, { "epoch": 0.37, "grad_norm": 0.38454949832911445, "learning_rate": 1.4484603943581523e-05, "loss": 0.1761, "step": 6465 }, { "epoch": 0.37, "grad_norm": 0.9162557910018109, "learning_rate": 1.4482940577224254e-05, "loss": 0.4149, "step": 6466 }, { "epoch": 0.37, "grad_norm": 0.9944619174618472, "learning_rate": 1.4481277055622948e-05, "loss": 0.3897, "step": 6467 }, { "epoch": 0.37, "grad_norm": 0.3133210801429484, "learning_rate": 1.4479613378835211e-05, "loss": 0.2176, "step": 6468 }, { "epoch": 0.37, "grad_norm": 0.49623665028789815, "learning_rate": 1.4477949546918655e-05, "loss": 0.4053, "step": 6469 }, { "epoch": 0.37, "grad_norm": 0.28227034323549527, "learning_rate": 1.4476285559930899e-05, "loss": 0.1809, "step": 6470 }, { "epoch": 0.37, "grad_norm": 0.5943690582874419, "learning_rate": 1.4474621417929566e-05, "loss": 0.2999, "step": 6471 }, { "epoch": 0.37, "grad_norm": 0.4368804350447025, "learning_rate": 1.4472957120972284e-05, "loss": 0.2854, "step": 6472 }, { "epoch": 0.37, "grad_norm": 1.0839653605743804, "learning_rate": 1.4471292669116692e-05, "loss": 0.4932, "step": 6473 }, { "epoch": 0.37, "grad_norm": 0.6103092418262205, "learning_rate": 1.4469628062420427e-05, "loss": 0.3787, "step": 6474 }, { "epoch": 0.37, "grad_norm": 0.2815049852060284, "learning_rate": 1.4467963300941135e-05, "loss": 0.1699, "step": 6475 }, { "epoch": 0.37, "grad_norm": 0.42956074118826143, "learning_rate": 1.4466298384736463e-05, "loss": 0.32, "step": 6476 }, { "epoch": 0.37, "grad_norm": 0.42963144754338745, "learning_rate": 1.4464633313864073e-05, "loss": 0.2869, "step": 6477 }, { "epoch": 0.37, "grad_norm": 0.5424138447706254, "learning_rate": 1.4462968088381621e-05, "loss": 0.327, "step": 6478 }, { "epoch": 0.37, "grad_norm": 0.5458547157279315, "learning_rate": 1.4461302708346778e-05, "loss": 0.3884, "step": 6479 }, { "epoch": 0.37, "grad_norm": 0.32194605311238567, "learning_rate": 1.4459637173817214e-05, "loss": 0.2684, "step": 6480 }, { "epoch": 0.37, "grad_norm": 0.4947001893698327, "learning_rate": 1.4457971484850604e-05, "loss": 0.2686, "step": 6481 }, { "epoch": 0.37, "grad_norm": 0.30792978417290967, "learning_rate": 1.4456305641504636e-05, "loss": 0.1929, "step": 6482 }, { "epoch": 0.37, "grad_norm": 0.39304218999952134, "learning_rate": 1.4454639643836993e-05, "loss": 0.3088, "step": 6483 }, { "epoch": 0.37, "grad_norm": 0.350822152919814, "learning_rate": 1.4452973491905372e-05, "loss": 0.3195, "step": 6484 }, { "epoch": 0.37, "grad_norm": 0.7457583251157986, "learning_rate": 1.4451307185767469e-05, "loss": 0.4278, "step": 6485 }, { "epoch": 0.37, "grad_norm": 0.9576714232110599, "learning_rate": 1.4449640725480991e-05, "loss": 0.471, "step": 6486 }, { "epoch": 0.37, "grad_norm": 0.38426895057800664, "learning_rate": 1.4447974111103645e-05, "loss": 0.285, "step": 6487 }, { "epoch": 0.37, "grad_norm": 0.22401458190615092, "learning_rate": 1.4446307342693149e-05, "loss": 0.2138, "step": 6488 }, { "epoch": 0.37, "grad_norm": 0.5444407977701945, "learning_rate": 1.4444640420307217e-05, "loss": 0.319, "step": 6489 }, { "epoch": 0.37, "grad_norm": 0.4500081741351894, "learning_rate": 1.4442973344003582e-05, "loss": 0.3505, "step": 6490 }, { "epoch": 0.37, "grad_norm": 1.0057233925466538, "learning_rate": 1.4441306113839971e-05, "loss": 0.5737, "step": 6491 }, { "epoch": 0.37, "grad_norm": 0.34070622377314463, "learning_rate": 1.4439638729874119e-05, "loss": 0.2789, "step": 6492 }, { "epoch": 0.37, "grad_norm": 0.39274503048589104, "learning_rate": 1.4437971192163768e-05, "loss": 0.3363, "step": 6493 }, { "epoch": 0.37, "grad_norm": 0.260970503088254, "learning_rate": 1.4436303500766667e-05, "loss": 0.139, "step": 6494 }, { "epoch": 0.37, "grad_norm": 0.4353457494525623, "learning_rate": 1.4434635655740566e-05, "loss": 0.3152, "step": 6495 }, { "epoch": 0.37, "grad_norm": 0.36338683442474146, "learning_rate": 1.4432967657143223e-05, "loss": 0.3243, "step": 6496 }, { "epoch": 0.37, "grad_norm": 1.4910761065746299, "learning_rate": 1.44312995050324e-05, "loss": 0.8249, "step": 6497 }, { "epoch": 0.37, "grad_norm": 0.3636983831741691, "learning_rate": 1.4429631199465866e-05, "loss": 0.2165, "step": 6498 }, { "epoch": 0.37, "grad_norm": 0.6184276802434224, "learning_rate": 1.4427962740501396e-05, "loss": 0.4225, "step": 6499 }, { "epoch": 0.37, "grad_norm": 0.4139216454007609, "learning_rate": 1.4426294128196763e-05, "loss": 0.3305, "step": 6500 }, { "epoch": 0.37, "grad_norm": 0.49001496573354486, "learning_rate": 1.4424625362609757e-05, "loss": 0.2682, "step": 6501 }, { "epoch": 0.37, "grad_norm": 0.27947362207932114, "learning_rate": 1.4422956443798165e-05, "loss": 0.2087, "step": 6502 }, { "epoch": 0.37, "grad_norm": 0.5741699302860873, "learning_rate": 1.4421287371819781e-05, "loss": 0.4186, "step": 6503 }, { "epoch": 0.37, "grad_norm": 0.7860385506372445, "learning_rate": 1.4419618146732404e-05, "loss": 0.2432, "step": 6504 }, { "epoch": 0.37, "grad_norm": 0.4018869658660575, "learning_rate": 1.4417948768593842e-05, "loss": 0.3231, "step": 6505 }, { "epoch": 0.37, "grad_norm": 0.5974194600768572, "learning_rate": 1.4416279237461903e-05, "loss": 0.4459, "step": 6506 }, { "epoch": 0.37, "grad_norm": 0.5043837478835471, "learning_rate": 1.4414609553394408e-05, "loss": 0.2003, "step": 6507 }, { "epoch": 0.37, "grad_norm": 0.3785285725739497, "learning_rate": 1.441293971644917e-05, "loss": 0.2199, "step": 6508 }, { "epoch": 0.37, "grad_norm": 1.1012730369361647, "learning_rate": 1.4411269726684022e-05, "loss": 0.7594, "step": 6509 }, { "epoch": 0.37, "grad_norm": 0.8161801633283232, "learning_rate": 1.4409599584156791e-05, "loss": 0.4433, "step": 6510 }, { "epoch": 0.37, "grad_norm": 0.3898556198374485, "learning_rate": 1.4407929288925316e-05, "loss": 0.2459, "step": 6511 }, { "epoch": 0.37, "grad_norm": 0.5468015768810729, "learning_rate": 1.440625884104744e-05, "loss": 0.3783, "step": 6512 }, { "epoch": 0.37, "grad_norm": 0.734511890686694, "learning_rate": 1.440458824058101e-05, "loss": 0.3978, "step": 6513 }, { "epoch": 0.37, "grad_norm": 0.2729669946052368, "learning_rate": 1.4402917487583876e-05, "loss": 0.1975, "step": 6514 }, { "epoch": 0.37, "grad_norm": 0.5058855067236363, "learning_rate": 1.4401246582113904e-05, "loss": 0.4243, "step": 6515 }, { "epoch": 0.37, "grad_norm": 0.2797481012857069, "learning_rate": 1.4399575524228949e-05, "loss": 0.237, "step": 6516 }, { "epoch": 0.37, "grad_norm": 0.36853983359838033, "learning_rate": 1.4397904313986881e-05, "loss": 0.2095, "step": 6517 }, { "epoch": 0.37, "grad_norm": 0.8808245431323464, "learning_rate": 1.439623295144558e-05, "loss": 0.4242, "step": 6518 }, { "epoch": 0.37, "grad_norm": 0.32676114481551416, "learning_rate": 1.4394561436662917e-05, "loss": 0.282, "step": 6519 }, { "epoch": 0.37, "grad_norm": 0.32006809977392964, "learning_rate": 1.4392889769696783e-05, "loss": 0.2367, "step": 6520 }, { "epoch": 0.37, "grad_norm": 0.4047428854960159, "learning_rate": 1.4391217950605063e-05, "loss": 0.3227, "step": 6521 }, { "epoch": 0.37, "grad_norm": 0.36465854255387276, "learning_rate": 1.4389545979445655e-05, "loss": 0.245, "step": 6522 }, { "epoch": 0.37, "grad_norm": 0.35563849101387063, "learning_rate": 1.438787385627646e-05, "loss": 0.3014, "step": 6523 }, { "epoch": 0.37, "grad_norm": 0.3814440985840305, "learning_rate": 1.438620158115538e-05, "loss": 0.2927, "step": 6524 }, { "epoch": 0.37, "grad_norm": 1.1983742937891095, "learning_rate": 1.438452915414033e-05, "loss": 0.7023, "step": 6525 }, { "epoch": 0.37, "grad_norm": 0.34701945637472476, "learning_rate": 1.4382856575289223e-05, "loss": 0.2919, "step": 6526 }, { "epoch": 0.38, "grad_norm": 0.35292559953145647, "learning_rate": 1.438118384465998e-05, "loss": 0.2924, "step": 6527 }, { "epoch": 0.38, "grad_norm": 0.26027699865809883, "learning_rate": 1.4379510962310532e-05, "loss": 0.1848, "step": 6528 }, { "epoch": 0.38, "grad_norm": 0.35662767213458074, "learning_rate": 1.4377837928298804e-05, "loss": 0.3079, "step": 6529 }, { "epoch": 0.38, "grad_norm": 0.6596498854621378, "learning_rate": 1.4376164742682738e-05, "loss": 0.3599, "step": 6530 }, { "epoch": 0.38, "grad_norm": 0.36191791630131526, "learning_rate": 1.4374491405520274e-05, "loss": 0.2928, "step": 6531 }, { "epoch": 0.38, "grad_norm": 0.3860443428448384, "learning_rate": 1.4372817916869364e-05, "loss": 0.3045, "step": 6532 }, { "epoch": 0.38, "grad_norm": 0.9970264359008559, "learning_rate": 1.4371144276787954e-05, "loss": 0.5673, "step": 6533 }, { "epoch": 0.38, "grad_norm": 0.4185340140823584, "learning_rate": 1.436947048533401e-05, "loss": 0.2696, "step": 6534 }, { "epoch": 0.38, "grad_norm": 0.37333837957749794, "learning_rate": 1.4367796542565486e-05, "loss": 0.2877, "step": 6535 }, { "epoch": 0.38, "grad_norm": 0.38025057980172355, "learning_rate": 1.4366122448540361e-05, "loss": 0.266, "step": 6536 }, { "epoch": 0.38, "grad_norm": 0.6973816542088787, "learning_rate": 1.4364448203316599e-05, "loss": 0.2136, "step": 6537 }, { "epoch": 0.38, "grad_norm": 0.354013344957813, "learning_rate": 1.4362773806952184e-05, "loss": 0.2847, "step": 6538 }, { "epoch": 0.38, "grad_norm": 0.42870882657679155, "learning_rate": 1.4361099259505102e-05, "loss": 0.3461, "step": 6539 }, { "epoch": 0.38, "grad_norm": 0.7161912133243448, "learning_rate": 1.4359424561033337e-05, "loss": 0.3663, "step": 6540 }, { "epoch": 0.38, "grad_norm": 0.32702988657639787, "learning_rate": 1.435774971159489e-05, "loss": 0.2704, "step": 6541 }, { "epoch": 0.38, "grad_norm": 0.2874840199994187, "learning_rate": 1.4356074711247759e-05, "loss": 0.1933, "step": 6542 }, { "epoch": 0.38, "grad_norm": 0.3422143523196144, "learning_rate": 1.4354399560049943e-05, "loss": 0.2543, "step": 6543 }, { "epoch": 0.38, "grad_norm": 0.3620610486020329, "learning_rate": 1.4352724258059461e-05, "loss": 0.2882, "step": 6544 }, { "epoch": 0.38, "grad_norm": 0.6999998230635959, "learning_rate": 1.4351048805334325e-05, "loss": 0.5029, "step": 6545 }, { "epoch": 0.38, "grad_norm": 0.7040615510823431, "learning_rate": 1.4349373201932553e-05, "loss": 0.4828, "step": 6546 }, { "epoch": 0.38, "grad_norm": 0.34328707423243926, "learning_rate": 1.4347697447912176e-05, "loss": 0.2311, "step": 6547 }, { "epoch": 0.38, "grad_norm": 0.34255042274766273, "learning_rate": 1.4346021543331224e-05, "loss": 0.2109, "step": 6548 }, { "epoch": 0.38, "grad_norm": 0.9416350640193081, "learning_rate": 1.4344345488247733e-05, "loss": 0.5087, "step": 6549 }, { "epoch": 0.38, "grad_norm": 0.3524115680822469, "learning_rate": 1.4342669282719741e-05, "loss": 0.2236, "step": 6550 }, { "epoch": 0.38, "grad_norm": 0.34939936511288033, "learning_rate": 1.4340992926805304e-05, "loss": 0.3067, "step": 6551 }, { "epoch": 0.38, "grad_norm": 0.9389882276144642, "learning_rate": 1.4339316420562464e-05, "loss": 0.4627, "step": 6552 }, { "epoch": 0.38, "grad_norm": 0.3572146276189607, "learning_rate": 1.4337639764049285e-05, "loss": 0.2025, "step": 6553 }, { "epoch": 0.38, "grad_norm": 0.3440940221274594, "learning_rate": 1.4335962957323827e-05, "loss": 0.2088, "step": 6554 }, { "epoch": 0.38, "grad_norm": 0.43469991652072393, "learning_rate": 1.433428600044416e-05, "loss": 0.3331, "step": 6555 }, { "epoch": 0.38, "grad_norm": 0.30077868298122107, "learning_rate": 1.4332608893468351e-05, "loss": 0.1945, "step": 6556 }, { "epoch": 0.38, "grad_norm": 0.6253365617274365, "learning_rate": 1.4330931636454489e-05, "loss": 0.4415, "step": 6557 }, { "epoch": 0.38, "grad_norm": 0.47640301786281314, "learning_rate": 1.4329254229460645e-05, "loss": 0.3846, "step": 6558 }, { "epoch": 0.38, "grad_norm": 0.30769115910174644, "learning_rate": 1.4327576672544917e-05, "loss": 0.2608, "step": 6559 }, { "epoch": 0.38, "grad_norm": 0.24923379904346613, "learning_rate": 1.432589896576539e-05, "loss": 0.152, "step": 6560 }, { "epoch": 0.38, "grad_norm": 1.1160575763268, "learning_rate": 1.4324221109180173e-05, "loss": 0.6775, "step": 6561 }, { "epoch": 0.38, "grad_norm": 0.39827529165073156, "learning_rate": 1.4322543102847362e-05, "loss": 0.2889, "step": 6562 }, { "epoch": 0.38, "grad_norm": 0.4097091571777183, "learning_rate": 1.432086494682507e-05, "loss": 0.3128, "step": 6563 }, { "epoch": 0.38, "grad_norm": 1.4122864900557979, "learning_rate": 1.4319186641171412e-05, "loss": 0.7525, "step": 6564 }, { "epoch": 0.38, "grad_norm": 0.345257798485318, "learning_rate": 1.4317508185944504e-05, "loss": 0.2947, "step": 6565 }, { "epoch": 0.38, "grad_norm": 0.18780929074416594, "learning_rate": 1.4315829581202474e-05, "loss": 0.0706, "step": 6566 }, { "epoch": 0.38, "grad_norm": 0.37982000063976756, "learning_rate": 1.431415082700345e-05, "loss": 0.326, "step": 6567 }, { "epoch": 0.38, "grad_norm": 0.37529555056231606, "learning_rate": 1.4312471923405571e-05, "loss": 0.2817, "step": 6568 }, { "epoch": 0.38, "grad_norm": 0.7558881857203945, "learning_rate": 1.4310792870466973e-05, "loss": 0.3752, "step": 6569 }, { "epoch": 0.38, "grad_norm": 0.4981673910095424, "learning_rate": 1.4309113668245804e-05, "loss": 0.4285, "step": 6570 }, { "epoch": 0.38, "grad_norm": 0.32064816753904357, "learning_rate": 1.4307434316800213e-05, "loss": 0.2705, "step": 6571 }, { "epoch": 0.38, "grad_norm": 0.30413949280789165, "learning_rate": 1.4305754816188358e-05, "loss": 0.2559, "step": 6572 }, { "epoch": 0.38, "grad_norm": 0.4309987650378867, "learning_rate": 1.4304075166468396e-05, "loss": 0.2168, "step": 6573 }, { "epoch": 0.38, "grad_norm": 0.40311342743064493, "learning_rate": 1.43023953676985e-05, "loss": 0.2705, "step": 6574 }, { "epoch": 0.38, "grad_norm": 0.2954206348253972, "learning_rate": 1.4300715419936834e-05, "loss": 0.3028, "step": 6575 }, { "epoch": 0.38, "grad_norm": 0.9026406553036119, "learning_rate": 1.4299035323241583e-05, "loss": 0.5688, "step": 6576 }, { "epoch": 0.38, "grad_norm": 0.3587211236933703, "learning_rate": 1.429735507767092e-05, "loss": 0.2561, "step": 6577 }, { "epoch": 0.38, "grad_norm": 0.26006833779910876, "learning_rate": 1.4295674683283037e-05, "loss": 0.2293, "step": 6578 }, { "epoch": 0.38, "grad_norm": 0.3906207143635448, "learning_rate": 1.4293994140136123e-05, "loss": 0.2902, "step": 6579 }, { "epoch": 0.38, "grad_norm": 0.3336786346711805, "learning_rate": 1.4292313448288377e-05, "loss": 0.2849, "step": 6580 }, { "epoch": 0.38, "grad_norm": 0.7731830224890437, "learning_rate": 1.4290632607797998e-05, "loss": 0.4842, "step": 6581 }, { "epoch": 0.38, "grad_norm": 0.46191721868987656, "learning_rate": 1.4288951618723201e-05, "loss": 0.2651, "step": 6582 }, { "epoch": 0.38, "grad_norm": 0.3088949080198425, "learning_rate": 1.428727048112219e-05, "loss": 0.2683, "step": 6583 }, { "epoch": 0.38, "grad_norm": 1.0472390701706424, "learning_rate": 1.4285589195053191e-05, "loss": 0.6867, "step": 6584 }, { "epoch": 0.38, "grad_norm": 0.3409265332982283, "learning_rate": 1.428390776057442e-05, "loss": 0.2472, "step": 6585 }, { "epoch": 0.38, "grad_norm": 0.35059501414858735, "learning_rate": 1.4282226177744107e-05, "loss": 0.2534, "step": 6586 }, { "epoch": 0.38, "grad_norm": 0.3968681655453618, "learning_rate": 1.4280544446620485e-05, "loss": 0.3418, "step": 6587 }, { "epoch": 0.38, "grad_norm": 1.3699405066952637, "learning_rate": 1.4278862567261796e-05, "loss": 0.8169, "step": 6588 }, { "epoch": 0.38, "grad_norm": 0.36354057094968956, "learning_rate": 1.4277180539726278e-05, "loss": 0.1807, "step": 6589 }, { "epoch": 0.38, "grad_norm": 0.37436367140991333, "learning_rate": 1.427549836407218e-05, "loss": 0.274, "step": 6590 }, { "epoch": 0.38, "grad_norm": 0.4031137876109123, "learning_rate": 1.4273816040357762e-05, "loss": 0.3387, "step": 6591 }, { "epoch": 0.38, "grad_norm": 0.39913767829709507, "learning_rate": 1.4272133568641273e-05, "loss": 0.2186, "step": 6592 }, { "epoch": 0.38, "grad_norm": 0.34901425704231603, "learning_rate": 1.4270450948980989e-05, "loss": 0.2849, "step": 6593 }, { "epoch": 0.38, "grad_norm": 0.43807677930615996, "learning_rate": 1.4268768181435166e-05, "loss": 0.3457, "step": 6594 }, { "epoch": 0.38, "grad_norm": 0.33170476466487625, "learning_rate": 1.4267085266062088e-05, "loss": 0.1751, "step": 6595 }, { "epoch": 0.38, "grad_norm": 0.47900650476435713, "learning_rate": 1.4265402202920029e-05, "loss": 0.3583, "step": 6596 }, { "epoch": 0.38, "grad_norm": 0.6101026898693607, "learning_rate": 1.4263718992067276e-05, "loss": 0.44, "step": 6597 }, { "epoch": 0.38, "grad_norm": 0.3100847262447763, "learning_rate": 1.4262035633562117e-05, "loss": 0.2415, "step": 6598 }, { "epoch": 0.38, "grad_norm": 0.29612737366854475, "learning_rate": 1.4260352127462848e-05, "loss": 0.2147, "step": 6599 }, { "epoch": 0.38, "grad_norm": 1.0381415145756596, "learning_rate": 1.4258668473827766e-05, "loss": 0.7037, "step": 6600 }, { "epoch": 0.38, "grad_norm": 0.40518343031761545, "learning_rate": 1.4256984672715182e-05, "loss": 0.2977, "step": 6601 }, { "epoch": 0.38, "grad_norm": 0.4991942601955228, "learning_rate": 1.4255300724183396e-05, "loss": 0.2844, "step": 6602 }, { "epoch": 0.38, "grad_norm": 0.43905524147113645, "learning_rate": 1.4253616628290735e-05, "loss": 0.3412, "step": 6603 }, { "epoch": 0.38, "grad_norm": 0.32293505302934256, "learning_rate": 1.425193238509551e-05, "loss": 0.2129, "step": 6604 }, { "epoch": 0.38, "grad_norm": 0.30523910076856414, "learning_rate": 1.425024799465605e-05, "loss": 0.1843, "step": 6605 }, { "epoch": 0.38, "grad_norm": 0.7134300357593555, "learning_rate": 1.4248563457030684e-05, "loss": 0.3323, "step": 6606 }, { "epoch": 0.38, "grad_norm": 0.3821121733080358, "learning_rate": 1.4246878772277748e-05, "loss": 0.2799, "step": 6607 }, { "epoch": 0.38, "grad_norm": 0.5362957512770763, "learning_rate": 1.4245193940455583e-05, "loss": 0.2907, "step": 6608 }, { "epoch": 0.38, "grad_norm": 0.7015232324733971, "learning_rate": 1.4243508961622536e-05, "loss": 0.4711, "step": 6609 }, { "epoch": 0.38, "grad_norm": 0.36283666869718023, "learning_rate": 1.4241823835836957e-05, "loss": 0.231, "step": 6610 }, { "epoch": 0.38, "grad_norm": 0.3676225906926481, "learning_rate": 1.4240138563157197e-05, "loss": 0.2852, "step": 6611 }, { "epoch": 0.38, "grad_norm": 0.3206621409131723, "learning_rate": 1.4238453143641623e-05, "loss": 0.1736, "step": 6612 }, { "epoch": 0.38, "grad_norm": 0.6024596594608613, "learning_rate": 1.4236767577348597e-05, "loss": 0.3085, "step": 6613 }, { "epoch": 0.38, "grad_norm": 0.3601264112579988, "learning_rate": 1.4235081864336495e-05, "loss": 0.3118, "step": 6614 }, { "epoch": 0.38, "grad_norm": 0.4754139334883257, "learning_rate": 1.4233396004663686e-05, "loss": 0.2908, "step": 6615 }, { "epoch": 0.38, "grad_norm": 0.6780714577012267, "learning_rate": 1.423170999838856e-05, "loss": 0.3664, "step": 6616 }, { "epoch": 0.38, "grad_norm": 0.4271908689523186, "learning_rate": 1.4230023845569497e-05, "loss": 0.375, "step": 6617 }, { "epoch": 0.38, "grad_norm": 0.2646098949172002, "learning_rate": 1.422833754626489e-05, "loss": 0.2093, "step": 6618 }, { "epoch": 0.38, "grad_norm": 0.36022570135160514, "learning_rate": 1.4226651100533136e-05, "loss": 0.2529, "step": 6619 }, { "epoch": 0.38, "grad_norm": 0.4342374594084135, "learning_rate": 1.4224964508432635e-05, "loss": 0.3454, "step": 6620 }, { "epoch": 0.38, "grad_norm": 1.0431596454133143, "learning_rate": 1.4223277770021794e-05, "loss": 0.4782, "step": 6621 }, { "epoch": 0.38, "grad_norm": 0.2630385189545252, "learning_rate": 1.4221590885359029e-05, "loss": 0.2208, "step": 6622 }, { "epoch": 0.38, "grad_norm": 0.37912554241453955, "learning_rate": 1.421990385450275e-05, "loss": 0.3119, "step": 6623 }, { "epoch": 0.38, "grad_norm": 0.31620577959594415, "learning_rate": 1.4218216677511383e-05, "loss": 0.2133, "step": 6624 }, { "epoch": 0.38, "grad_norm": 0.7817419011776064, "learning_rate": 1.4216529354443355e-05, "loss": 0.2797, "step": 6625 }, { "epoch": 0.38, "grad_norm": 0.3580806273138164, "learning_rate": 1.4214841885357096e-05, "loss": 0.3249, "step": 6626 }, { "epoch": 0.38, "grad_norm": 0.47803700129219767, "learning_rate": 1.4213154270311043e-05, "loss": 0.4245, "step": 6627 }, { "epoch": 0.38, "grad_norm": 0.29809480038275465, "learning_rate": 1.421146650936364e-05, "loss": 0.1387, "step": 6628 }, { "epoch": 0.38, "grad_norm": 0.3552015102693456, "learning_rate": 1.4209778602573332e-05, "loss": 0.2911, "step": 6629 }, { "epoch": 0.38, "grad_norm": 0.371671833280386, "learning_rate": 1.4208090549998572e-05, "loss": 0.3407, "step": 6630 }, { "epoch": 0.38, "grad_norm": 0.774988864764028, "learning_rate": 1.420640235169782e-05, "loss": 0.3303, "step": 6631 }, { "epoch": 0.38, "grad_norm": 0.2656032968656245, "learning_rate": 1.420471400772953e-05, "loss": 0.2185, "step": 6632 }, { "epoch": 0.38, "grad_norm": 0.7574900916209983, "learning_rate": 1.4203025518152178e-05, "loss": 0.4876, "step": 6633 }, { "epoch": 0.38, "grad_norm": 0.3571697183187376, "learning_rate": 1.420133688302423e-05, "loss": 0.3068, "step": 6634 }, { "epoch": 0.38, "grad_norm": 0.3627025977269262, "learning_rate": 1.419964810240417e-05, "loss": 0.2454, "step": 6635 }, { "epoch": 0.38, "grad_norm": 0.9722718639281581, "learning_rate": 1.4197959176350476e-05, "loss": 0.5431, "step": 6636 }, { "epoch": 0.38, "grad_norm": 0.45319845739485237, "learning_rate": 1.4196270104921637e-05, "loss": 0.2968, "step": 6637 }, { "epoch": 0.38, "grad_norm": 0.3168864543772265, "learning_rate": 1.4194580888176141e-05, "loss": 0.2416, "step": 6638 }, { "epoch": 0.38, "grad_norm": 0.4402300020745205, "learning_rate": 1.4192891526172494e-05, "loss": 0.2527, "step": 6639 }, { "epoch": 0.38, "grad_norm": 0.7619509636775692, "learning_rate": 1.419120201896919e-05, "loss": 0.4639, "step": 6640 }, { "epoch": 0.38, "grad_norm": 0.4094808720491614, "learning_rate": 1.4189512366624745e-05, "loss": 0.2223, "step": 6641 }, { "epoch": 0.38, "grad_norm": 0.49203402514236594, "learning_rate": 1.4187822569197662e-05, "loss": 0.3586, "step": 6642 }, { "epoch": 0.38, "grad_norm": 0.6042575546599518, "learning_rate": 1.4186132626746466e-05, "loss": 0.3776, "step": 6643 }, { "epoch": 0.38, "grad_norm": 0.2850850597785273, "learning_rate": 1.4184442539329677e-05, "loss": 0.1775, "step": 6644 }, { "epoch": 0.38, "grad_norm": 0.41097360765078583, "learning_rate": 1.4182752307005822e-05, "loss": 0.2656, "step": 6645 }, { "epoch": 0.38, "grad_norm": 0.3749280168749956, "learning_rate": 1.4181061929833435e-05, "loss": 0.3093, "step": 6646 }, { "epoch": 0.38, "grad_norm": 0.36949049484555013, "learning_rate": 1.4179371407871054e-05, "loss": 0.2755, "step": 6647 }, { "epoch": 0.38, "grad_norm": 0.8305477195457657, "learning_rate": 1.4177680741177217e-05, "loss": 0.3878, "step": 6648 }, { "epoch": 0.38, "grad_norm": 0.9615488498518213, "learning_rate": 1.4175989929810481e-05, "loss": 0.5805, "step": 6649 }, { "epoch": 0.38, "grad_norm": 0.2774945282091035, "learning_rate": 1.417429897382939e-05, "loss": 0.2652, "step": 6650 }, { "epoch": 0.38, "grad_norm": 0.21826789390351636, "learning_rate": 1.4172607873292505e-05, "loss": 0.1444, "step": 6651 }, { "epoch": 0.38, "grad_norm": 0.7917466348827511, "learning_rate": 1.4170916628258392e-05, "loss": 0.5094, "step": 6652 }, { "epoch": 0.38, "grad_norm": 0.4402092475426011, "learning_rate": 1.4169225238785611e-05, "loss": 0.3204, "step": 6653 }, { "epoch": 0.38, "grad_norm": 0.48816511867608736, "learning_rate": 1.4167533704932743e-05, "loss": 0.2935, "step": 6654 }, { "epoch": 0.38, "grad_norm": 0.7306057653718944, "learning_rate": 1.416584202675836e-05, "loss": 0.44, "step": 6655 }, { "epoch": 0.38, "grad_norm": 0.32525920456793944, "learning_rate": 1.4164150204321046e-05, "loss": 0.2374, "step": 6656 }, { "epoch": 0.38, "grad_norm": 0.3296501402587944, "learning_rate": 1.4162458237679389e-05, "loss": 0.1485, "step": 6657 }, { "epoch": 0.38, "grad_norm": 0.36981045940986673, "learning_rate": 1.4160766126891985e-05, "loss": 0.3043, "step": 6658 }, { "epoch": 0.38, "grad_norm": 0.3883473725235902, "learning_rate": 1.4159073872017427e-05, "loss": 0.2873, "step": 6659 }, { "epoch": 0.38, "grad_norm": 0.8316799239970233, "learning_rate": 1.4157381473114323e-05, "loss": 0.4332, "step": 6660 }, { "epoch": 0.38, "grad_norm": 0.40166375360596235, "learning_rate": 1.4155688930241274e-05, "loss": 0.218, "step": 6661 }, { "epoch": 0.38, "grad_norm": 0.375701225006493, "learning_rate": 1.4153996243456898e-05, "loss": 0.2991, "step": 6662 }, { "epoch": 0.38, "grad_norm": 0.2775566069575302, "learning_rate": 1.4152303412819808e-05, "loss": 0.2017, "step": 6663 }, { "epoch": 0.38, "grad_norm": 0.7846062358543829, "learning_rate": 1.4150610438388633e-05, "loss": 0.3898, "step": 6664 }, { "epoch": 0.38, "grad_norm": 0.35540285474583694, "learning_rate": 1.4148917320221992e-05, "loss": 0.2879, "step": 6665 }, { "epoch": 0.38, "grad_norm": 0.36580621119695167, "learning_rate": 1.4147224058378525e-05, "loss": 0.3285, "step": 6666 }, { "epoch": 0.38, "grad_norm": 0.8255865521291464, "learning_rate": 1.4145530652916868e-05, "loss": 0.3617, "step": 6667 }, { "epoch": 0.38, "grad_norm": 0.35219097025016854, "learning_rate": 1.4143837103895663e-05, "loss": 0.278, "step": 6668 }, { "epoch": 0.38, "grad_norm": 0.29669356912673234, "learning_rate": 1.4142143411373559e-05, "loss": 0.2179, "step": 6669 }, { "epoch": 0.38, "grad_norm": 0.4085564007164582, "learning_rate": 1.4140449575409203e-05, "loss": 0.2935, "step": 6670 }, { "epoch": 0.38, "grad_norm": 0.3408342696053366, "learning_rate": 1.4138755596061257e-05, "loss": 0.2819, "step": 6671 }, { "epoch": 0.38, "grad_norm": 0.8311186813808776, "learning_rate": 1.4137061473388383e-05, "loss": 0.617, "step": 6672 }, { "epoch": 0.38, "grad_norm": 0.5219159840804157, "learning_rate": 1.4135367207449248e-05, "loss": 0.3674, "step": 6673 }, { "epoch": 0.38, "grad_norm": 0.29763937422590947, "learning_rate": 1.4133672798302525e-05, "loss": 0.2223, "step": 6674 }, { "epoch": 0.38, "grad_norm": 0.25470244060159813, "learning_rate": 1.4131978246006892e-05, "loss": 0.1708, "step": 6675 }, { "epoch": 0.38, "grad_norm": 0.6082879025289717, "learning_rate": 1.4130283550621027e-05, "loss": 0.4386, "step": 6676 }, { "epoch": 0.38, "grad_norm": 0.3082473816953717, "learning_rate": 1.4128588712203626e-05, "loss": 0.1922, "step": 6677 }, { "epoch": 0.38, "grad_norm": 0.40828261098657215, "learning_rate": 1.4126893730813369e-05, "loss": 0.3571, "step": 6678 }, { "epoch": 0.38, "grad_norm": 1.284813336317904, "learning_rate": 1.4125198606508963e-05, "loss": 0.587, "step": 6679 }, { "epoch": 0.38, "grad_norm": 0.3249047265979789, "learning_rate": 1.4123503339349105e-05, "loss": 0.2049, "step": 6680 }, { "epoch": 0.38, "grad_norm": 0.27026754771556555, "learning_rate": 1.4121807929392505e-05, "loss": 0.2416, "step": 6681 }, { "epoch": 0.38, "grad_norm": 0.4587471969379784, "learning_rate": 1.4120112376697873e-05, "loss": 0.3916, "step": 6682 }, { "epoch": 0.38, "grad_norm": 0.30805966674421015, "learning_rate": 1.4118416681323925e-05, "loss": 0.1992, "step": 6683 }, { "epoch": 0.38, "grad_norm": 1.2854672987600315, "learning_rate": 1.4116720843329385e-05, "loss": 0.8468, "step": 6684 }, { "epoch": 0.38, "grad_norm": 0.5225364984100592, "learning_rate": 1.4115024862772981e-05, "loss": 0.3391, "step": 6685 }, { "epoch": 0.38, "grad_norm": 0.3050026501666236, "learning_rate": 1.4113328739713442e-05, "loss": 0.2727, "step": 6686 }, { "epoch": 0.38, "grad_norm": 0.7048532439613371, "learning_rate": 1.4111632474209506e-05, "loss": 0.3857, "step": 6687 }, { "epoch": 0.38, "grad_norm": 0.2856578199252998, "learning_rate": 1.4109936066319915e-05, "loss": 0.2344, "step": 6688 }, { "epoch": 0.38, "grad_norm": 0.38790911949598295, "learning_rate": 1.4108239516103412e-05, "loss": 0.2793, "step": 6689 }, { "epoch": 0.38, "grad_norm": 0.42389402150691363, "learning_rate": 1.4106542823618754e-05, "loss": 0.2845, "step": 6690 }, { "epoch": 0.38, "grad_norm": 1.1114232450461785, "learning_rate": 1.4104845988924694e-05, "loss": 0.6795, "step": 6691 }, { "epoch": 0.38, "grad_norm": 0.31314498273119074, "learning_rate": 1.4103149012079994e-05, "loss": 0.2629, "step": 6692 }, { "epoch": 0.38, "grad_norm": 0.4220924174883716, "learning_rate": 1.4101451893143418e-05, "loss": 0.2873, "step": 6693 }, { "epoch": 0.38, "grad_norm": 0.34138109317922455, "learning_rate": 1.4099754632173744e-05, "loss": 0.2751, "step": 6694 }, { "epoch": 0.38, "grad_norm": 0.404482535836102, "learning_rate": 1.409805722922974e-05, "loss": 0.2508, "step": 6695 }, { "epoch": 0.38, "grad_norm": 0.33534481802465277, "learning_rate": 1.409635968437019e-05, "loss": 0.2233, "step": 6696 }, { "epoch": 0.38, "grad_norm": 0.3605268365663909, "learning_rate": 1.409466199765388e-05, "loss": 0.3063, "step": 6697 }, { "epoch": 0.38, "grad_norm": 0.39065776412591735, "learning_rate": 1.4092964169139603e-05, "loss": 0.2807, "step": 6698 }, { "epoch": 0.38, "grad_norm": 0.44905297298310376, "learning_rate": 1.409126619888615e-05, "loss": 0.3731, "step": 6699 }, { "epoch": 0.38, "grad_norm": 0.34506324518643255, "learning_rate": 1.4089568086952327e-05, "loss": 0.2082, "step": 6700 }, { "epoch": 0.39, "grad_norm": 0.39996919181229135, "learning_rate": 1.4087869833396936e-05, "loss": 0.2956, "step": 6701 }, { "epoch": 0.39, "grad_norm": 0.3419182623503886, "learning_rate": 1.408617143827879e-05, "loss": 0.2966, "step": 6702 }, { "epoch": 0.39, "grad_norm": 0.35602932018929834, "learning_rate": 1.40844729016567e-05, "loss": 0.1709, "step": 6703 }, { "epoch": 0.39, "grad_norm": 0.33780685153209516, "learning_rate": 1.4082774223589492e-05, "loss": 0.2794, "step": 6704 }, { "epoch": 0.39, "grad_norm": 0.3433742555355464, "learning_rate": 1.4081075404135987e-05, "loss": 0.3471, "step": 6705 }, { "epoch": 0.39, "grad_norm": 0.698436131545571, "learning_rate": 1.4079376443355016e-05, "loss": 0.0464, "step": 6706 }, { "epoch": 0.39, "grad_norm": 0.3513574365202577, "learning_rate": 1.4077677341305414e-05, "loss": 0.2612, "step": 6707 }, { "epoch": 0.39, "grad_norm": 1.17360579573699, "learning_rate": 1.4075978098046022e-05, "loss": 0.7839, "step": 6708 }, { "epoch": 0.39, "grad_norm": 0.26317161420211993, "learning_rate": 1.4074278713635683e-05, "loss": 0.2323, "step": 6709 }, { "epoch": 0.39, "grad_norm": 0.2971917186585945, "learning_rate": 1.4072579188133247e-05, "loss": 0.2247, "step": 6710 }, { "epoch": 0.39, "grad_norm": 0.6271548854008792, "learning_rate": 1.407087952159757e-05, "loss": 0.4772, "step": 6711 }, { "epoch": 0.39, "grad_norm": 0.871101430569621, "learning_rate": 1.406917971408751e-05, "loss": 0.6613, "step": 6712 }, { "epoch": 0.39, "grad_norm": 0.3915942662772745, "learning_rate": 1.4067479765661929e-05, "loss": 0.2208, "step": 6713 }, { "epoch": 0.39, "grad_norm": 0.33841239049328914, "learning_rate": 1.4065779676379702e-05, "loss": 0.3083, "step": 6714 }, { "epoch": 0.39, "grad_norm": 0.30049951964028676, "learning_rate": 1.4064079446299699e-05, "loss": 0.2178, "step": 6715 }, { "epoch": 0.39, "grad_norm": 0.38831455943753335, "learning_rate": 1.4062379075480799e-05, "loss": 0.2218, "step": 6716 }, { "epoch": 0.39, "grad_norm": 0.291218423496063, "learning_rate": 1.4060678563981886e-05, "loss": 0.317, "step": 6717 }, { "epoch": 0.39, "grad_norm": 1.0342262669644686, "learning_rate": 1.4058977911861846e-05, "loss": 0.6031, "step": 6718 }, { "epoch": 0.39, "grad_norm": 0.35540629091607656, "learning_rate": 1.405727711917958e-05, "loss": 0.2125, "step": 6719 }, { "epoch": 0.39, "grad_norm": 0.3154034414211785, "learning_rate": 1.405557618599398e-05, "loss": 0.2661, "step": 6720 }, { "epoch": 0.39, "grad_norm": 0.3914215878276067, "learning_rate": 1.4053875112363953e-05, "loss": 0.3172, "step": 6721 }, { "epoch": 0.39, "grad_norm": 0.5473122810280361, "learning_rate": 1.40521738983484e-05, "loss": 0.3438, "step": 6722 }, { "epoch": 0.39, "grad_norm": 0.2795370643280423, "learning_rate": 1.4050472544006243e-05, "loss": 0.2083, "step": 6723 }, { "epoch": 0.39, "grad_norm": 1.4035101146683626, "learning_rate": 1.4048771049396397e-05, "loss": 0.7028, "step": 6724 }, { "epoch": 0.39, "grad_norm": 0.2976029002338224, "learning_rate": 1.4047069414577782e-05, "loss": 0.2697, "step": 6725 }, { "epoch": 0.39, "grad_norm": 0.35788245762048565, "learning_rate": 1.4045367639609326e-05, "loss": 0.265, "step": 6726 }, { "epoch": 0.39, "grad_norm": 0.8534396855898208, "learning_rate": 1.4043665724549967e-05, "loss": 0.5141, "step": 6727 }, { "epoch": 0.39, "grad_norm": 0.2832789254792039, "learning_rate": 1.4041963669458633e-05, "loss": 0.2318, "step": 6728 }, { "epoch": 0.39, "grad_norm": 0.3004873857950981, "learning_rate": 1.4040261474394275e-05, "loss": 0.1911, "step": 6729 }, { "epoch": 0.39, "grad_norm": 0.48748591817012543, "learning_rate": 1.4038559139415832e-05, "loss": 0.3871, "step": 6730 }, { "epoch": 0.39, "grad_norm": 0.66305837104567, "learning_rate": 1.4036856664582263e-05, "loss": 0.4026, "step": 6731 }, { "epoch": 0.39, "grad_norm": 0.4050811425920618, "learning_rate": 1.403515404995252e-05, "loss": 0.2587, "step": 6732 }, { "epoch": 0.39, "grad_norm": 0.41824055428183066, "learning_rate": 1.4033451295585565e-05, "loss": 0.3413, "step": 6733 }, { "epoch": 0.39, "grad_norm": 0.37066586491062015, "learning_rate": 1.4031748401540366e-05, "loss": 0.2305, "step": 6734 }, { "epoch": 0.39, "grad_norm": 0.29315654585139084, "learning_rate": 1.4030045367875893e-05, "loss": 0.2388, "step": 6735 }, { "epoch": 0.39, "grad_norm": 0.8745220798442661, "learning_rate": 1.4028342194651123e-05, "loss": 0.4191, "step": 6736 }, { "epoch": 0.39, "grad_norm": 0.30931597379685477, "learning_rate": 1.4026638881925032e-05, "loss": 0.2733, "step": 6737 }, { "epoch": 0.39, "grad_norm": 0.37488368146363077, "learning_rate": 1.4024935429756614e-05, "loss": 0.3517, "step": 6738 }, { "epoch": 0.39, "grad_norm": 0.548881557197987, "learning_rate": 1.4023231838204854e-05, "loss": 0.372, "step": 6739 }, { "epoch": 0.39, "grad_norm": 0.31575575686443624, "learning_rate": 1.4021528107328749e-05, "loss": 0.1803, "step": 6740 }, { "epoch": 0.39, "grad_norm": 0.2777086707952842, "learning_rate": 1.4019824237187296e-05, "loss": 0.2545, "step": 6741 }, { "epoch": 0.39, "grad_norm": 1.014625121565217, "learning_rate": 1.4018120227839505e-05, "loss": 0.3794, "step": 6742 }, { "epoch": 0.39, "grad_norm": 0.5247262023251844, "learning_rate": 1.4016416079344382e-05, "loss": 0.373, "step": 6743 }, { "epoch": 0.39, "grad_norm": 0.3867050547624269, "learning_rate": 1.4014711791760944e-05, "loss": 0.3268, "step": 6744 }, { "epoch": 0.39, "grad_norm": 0.35604241188905456, "learning_rate": 1.401300736514821e-05, "loss": 0.2967, "step": 6745 }, { "epoch": 0.39, "grad_norm": 0.4154383606703178, "learning_rate": 1.4011302799565205e-05, "loss": 0.3106, "step": 6746 }, { "epoch": 0.39, "grad_norm": 0.250345897544341, "learning_rate": 1.4009598095070951e-05, "loss": 0.1927, "step": 6747 }, { "epoch": 0.39, "grad_norm": 1.2188115276047022, "learning_rate": 1.4007893251724491e-05, "loss": 0.7797, "step": 6748 }, { "epoch": 0.39, "grad_norm": 0.29690370719853887, "learning_rate": 1.400618826958486e-05, "loss": 0.2267, "step": 6749 }, { "epoch": 0.39, "grad_norm": 0.40303575141635606, "learning_rate": 1.4004483148711101e-05, "loss": 0.3435, "step": 6750 }, { "epoch": 0.39, "grad_norm": 0.9008436992328396, "learning_rate": 1.4002777889162262e-05, "loss": 0.4848, "step": 6751 }, { "epoch": 0.39, "grad_norm": 0.2933102711442764, "learning_rate": 1.4001072490997399e-05, "loss": 0.1314, "step": 6752 }, { "epoch": 0.39, "grad_norm": 0.2828190568585241, "learning_rate": 1.3999366954275566e-05, "loss": 0.2767, "step": 6753 }, { "epoch": 0.39, "grad_norm": 0.362682035058518, "learning_rate": 1.3997661279055826e-05, "loss": 0.247, "step": 6754 }, { "epoch": 0.39, "grad_norm": 0.6611501611701132, "learning_rate": 1.399595546539725e-05, "loss": 0.3083, "step": 6755 }, { "epoch": 0.39, "grad_norm": 0.3474578684143908, "learning_rate": 1.3994249513358907e-05, "loss": 0.3028, "step": 6756 }, { "epoch": 0.39, "grad_norm": 0.38528825517022264, "learning_rate": 1.3992543422999876e-05, "loss": 0.3243, "step": 6757 }, { "epoch": 0.39, "grad_norm": 0.3992245764777032, "learning_rate": 1.3990837194379236e-05, "loss": 0.1723, "step": 6758 }, { "epoch": 0.39, "grad_norm": 0.24633718428561147, "learning_rate": 1.3989130827556077e-05, "loss": 0.2043, "step": 6759 }, { "epoch": 0.39, "grad_norm": 1.7078159518251719, "learning_rate": 1.398742432258949e-05, "loss": 0.8635, "step": 6760 }, { "epoch": 0.39, "grad_norm": 0.386580417191464, "learning_rate": 1.398571767953857e-05, "loss": 0.3186, "step": 6761 }, { "epoch": 0.39, "grad_norm": 0.4046253648674059, "learning_rate": 1.3984010898462417e-05, "loss": 0.2438, "step": 6762 }, { "epoch": 0.39, "grad_norm": 0.8689183740842735, "learning_rate": 1.398230397942014e-05, "loss": 0.5434, "step": 6763 }, { "epoch": 0.39, "grad_norm": 0.4089398395770051, "learning_rate": 1.3980596922470844e-05, "loss": 0.3337, "step": 6764 }, { "epoch": 0.39, "grad_norm": 0.29438732424321856, "learning_rate": 1.397888972767365e-05, "loss": 0.2378, "step": 6765 }, { "epoch": 0.39, "grad_norm": 0.3592290968908871, "learning_rate": 1.3977182395087674e-05, "loss": 0.2221, "step": 6766 }, { "epoch": 0.39, "grad_norm": 0.6970432495304754, "learning_rate": 1.3975474924772043e-05, "loss": 0.3725, "step": 6767 }, { "epoch": 0.39, "grad_norm": 0.4009812575724056, "learning_rate": 1.3973767316785887e-05, "loss": 0.2633, "step": 6768 }, { "epoch": 0.39, "grad_norm": 0.37223344888144816, "learning_rate": 1.397205957118834e-05, "loss": 0.3468, "step": 6769 }, { "epoch": 0.39, "grad_norm": 1.57109691824902, "learning_rate": 1.397035168803854e-05, "loss": 0.762, "step": 6770 }, { "epoch": 0.39, "grad_norm": 0.2339637760874292, "learning_rate": 1.3968643667395634e-05, "loss": 0.1738, "step": 6771 }, { "epoch": 0.39, "grad_norm": 0.32562666939438184, "learning_rate": 1.3966935509318766e-05, "loss": 0.2632, "step": 6772 }, { "epoch": 0.39, "grad_norm": 0.7221752928764033, "learning_rate": 1.3965227213867093e-05, "loss": 0.4831, "step": 6773 }, { "epoch": 0.39, "grad_norm": 0.33874134003021095, "learning_rate": 1.3963518781099774e-05, "loss": 0.2641, "step": 6774 }, { "epoch": 0.39, "grad_norm": 0.9198892609799831, "learning_rate": 1.3961810211075965e-05, "loss": 0.4748, "step": 6775 }, { "epoch": 0.39, "grad_norm": 0.41627147942332504, "learning_rate": 1.3960101503854843e-05, "loss": 0.314, "step": 6776 }, { "epoch": 0.39, "grad_norm": 0.32450432247327654, "learning_rate": 1.3958392659495575e-05, "loss": 0.2792, "step": 6777 }, { "epoch": 0.39, "grad_norm": 0.3360326187425709, "learning_rate": 1.3956683678057342e-05, "loss": 0.1615, "step": 6778 }, { "epoch": 0.39, "grad_norm": 0.7677364221803948, "learning_rate": 1.395497455959932e-05, "loss": 0.3848, "step": 6779 }, { "epoch": 0.39, "grad_norm": 0.3772113181888643, "learning_rate": 1.39532653041807e-05, "loss": 0.2806, "step": 6780 }, { "epoch": 0.39, "grad_norm": 0.6590453255458756, "learning_rate": 1.3951555911860672e-05, "loss": 0.2858, "step": 6781 }, { "epoch": 0.39, "grad_norm": 0.8705894982837394, "learning_rate": 1.3949846382698433e-05, "loss": 0.4125, "step": 6782 }, { "epoch": 0.39, "grad_norm": 0.3819991956597844, "learning_rate": 1.3948136716753183e-05, "loss": 0.2831, "step": 6783 }, { "epoch": 0.39, "grad_norm": 0.4511179604074903, "learning_rate": 1.394642691408413e-05, "loss": 0.3855, "step": 6784 }, { "epoch": 0.39, "grad_norm": 0.23375528534045187, "learning_rate": 1.394471697475048e-05, "loss": 0.1753, "step": 6785 }, { "epoch": 0.39, "grad_norm": 0.3867215053995416, "learning_rate": 1.3943006898811453e-05, "loss": 0.2849, "step": 6786 }, { "epoch": 0.39, "grad_norm": 1.1350554649970255, "learning_rate": 1.3941296686326266e-05, "loss": 0.8263, "step": 6787 }, { "epoch": 0.39, "grad_norm": 0.3652869785590026, "learning_rate": 1.3939586337354146e-05, "loss": 0.2795, "step": 6788 }, { "epoch": 0.39, "grad_norm": 0.3622856983259643, "learning_rate": 1.3937875851954316e-05, "loss": 0.2834, "step": 6789 }, { "epoch": 0.39, "grad_norm": 0.5852464787006104, "learning_rate": 1.3936165230186018e-05, "loss": 0.4318, "step": 6790 }, { "epoch": 0.39, "grad_norm": 0.2725903077636448, "learning_rate": 1.3934454472108488e-05, "loss": 0.1255, "step": 6791 }, { "epoch": 0.39, "grad_norm": 0.3542987477803968, "learning_rate": 1.3932743577780967e-05, "loss": 0.2905, "step": 6792 }, { "epoch": 0.39, "grad_norm": 0.3643810295443851, "learning_rate": 1.3931032547262707e-05, "loss": 0.2526, "step": 6793 }, { "epoch": 0.39, "grad_norm": 0.6951650698020321, "learning_rate": 1.3929321380612955e-05, "loss": 0.3213, "step": 6794 }, { "epoch": 0.39, "grad_norm": 0.3221414003316139, "learning_rate": 1.3927610077890976e-05, "loss": 0.2688, "step": 6795 }, { "epoch": 0.39, "grad_norm": 0.6841427532021138, "learning_rate": 1.3925898639156028e-05, "loss": 0.4703, "step": 6796 }, { "epoch": 0.39, "grad_norm": 0.2893061685049914, "learning_rate": 1.3924187064467378e-05, "loss": 0.2869, "step": 6797 }, { "epoch": 0.39, "grad_norm": 0.3150811635114615, "learning_rate": 1.3922475353884302e-05, "loss": 0.2133, "step": 6798 }, { "epoch": 0.39, "grad_norm": 0.39697628073105073, "learning_rate": 1.3920763507466071e-05, "loss": 0.2859, "step": 6799 }, { "epoch": 0.39, "grad_norm": 0.3824519346405898, "learning_rate": 1.3919051525271968e-05, "loss": 0.3316, "step": 6800 }, { "epoch": 0.39, "grad_norm": 0.3120471764511407, "learning_rate": 1.3917339407361278e-05, "loss": 0.1981, "step": 6801 }, { "epoch": 0.39, "grad_norm": 0.7719291568422046, "learning_rate": 1.3915627153793294e-05, "loss": 0.4703, "step": 6802 }, { "epoch": 0.39, "grad_norm": 1.0406178609357577, "learning_rate": 1.3913914764627311e-05, "loss": 0.7213, "step": 6803 }, { "epoch": 0.39, "grad_norm": 0.3082881655989037, "learning_rate": 1.3912202239922627e-05, "loss": 0.1947, "step": 6804 }, { "epoch": 0.39, "grad_norm": 0.269808735942457, "learning_rate": 1.3910489579738548e-05, "loss": 0.2328, "step": 6805 }, { "epoch": 0.39, "grad_norm": 0.8449664005507034, "learning_rate": 1.3908776784134382e-05, "loss": 0.4975, "step": 6806 }, { "epoch": 0.39, "grad_norm": 0.3668433609014333, "learning_rate": 1.3907063853169445e-05, "loss": 0.2513, "step": 6807 }, { "epoch": 0.39, "grad_norm": 0.3828472985391112, "learning_rate": 1.3905350786903053e-05, "loss": 0.3159, "step": 6808 }, { "epoch": 0.39, "grad_norm": 0.9917346778146909, "learning_rate": 1.3903637585394534e-05, "loss": 0.6777, "step": 6809 }, { "epoch": 0.39, "grad_norm": 0.34320658920451136, "learning_rate": 1.390192424870321e-05, "loss": 0.2877, "step": 6810 }, { "epoch": 0.39, "grad_norm": 0.22123271814015424, "learning_rate": 1.3900210776888421e-05, "loss": 0.1037, "step": 6811 }, { "epoch": 0.39, "grad_norm": 0.43597104711910406, "learning_rate": 1.38984971700095e-05, "loss": 0.3482, "step": 6812 }, { "epoch": 0.39, "grad_norm": 0.3876067777475436, "learning_rate": 1.3896783428125789e-05, "loss": 0.2848, "step": 6813 }, { "epoch": 0.39, "grad_norm": 0.814104832790337, "learning_rate": 1.3895069551296634e-05, "loss": 0.3313, "step": 6814 }, { "epoch": 0.39, "grad_norm": 1.0191123045537875, "learning_rate": 1.389335553958139e-05, "loss": 0.7098, "step": 6815 }, { "epoch": 0.39, "grad_norm": 0.3189777727484337, "learning_rate": 1.3891641393039408e-05, "loss": 0.2773, "step": 6816 }, { "epoch": 0.39, "grad_norm": 0.3103356437264256, "learning_rate": 1.3889927111730056e-05, "loss": 0.1763, "step": 6817 }, { "epoch": 0.39, "grad_norm": 0.6834230208026675, "learning_rate": 1.3888212695712693e-05, "loss": 0.3407, "step": 6818 }, { "epoch": 0.39, "grad_norm": 0.4373386910283233, "learning_rate": 1.3886498145046689e-05, "loss": 0.2706, "step": 6819 }, { "epoch": 0.39, "grad_norm": 0.33425168275995765, "learning_rate": 1.3884783459791427e-05, "loss": 0.2554, "step": 6820 }, { "epoch": 0.39, "grad_norm": 0.9990408288161097, "learning_rate": 1.3883068640006277e-05, "loss": 0.6442, "step": 6821 }, { "epoch": 0.39, "grad_norm": 0.3974999866738344, "learning_rate": 1.3881353685750627e-05, "loss": 0.2808, "step": 6822 }, { "epoch": 0.39, "grad_norm": 0.48249560737044717, "learning_rate": 1.3879638597083864e-05, "loss": 0.3902, "step": 6823 }, { "epoch": 0.39, "grad_norm": 0.406224053807835, "learning_rate": 1.387792337406539e-05, "loss": 0.2945, "step": 6824 }, { "epoch": 0.39, "grad_norm": 0.2699756257137786, "learning_rate": 1.3876208016754589e-05, "loss": 0.2, "step": 6825 }, { "epoch": 0.39, "grad_norm": 0.35033622402874065, "learning_rate": 1.3874492525210874e-05, "loss": 0.2657, "step": 6826 }, { "epoch": 0.39, "grad_norm": 0.923795988793285, "learning_rate": 1.387277689949365e-05, "loss": 0.4945, "step": 6827 }, { "epoch": 0.39, "grad_norm": 0.29261064970858314, "learning_rate": 1.3871061139662328e-05, "loss": 0.2662, "step": 6828 }, { "epoch": 0.39, "grad_norm": 0.5097583064881249, "learning_rate": 1.3869345245776326e-05, "loss": 0.3505, "step": 6829 }, { "epoch": 0.39, "grad_norm": 0.3765657158030522, "learning_rate": 1.3867629217895067e-05, "loss": 0.2203, "step": 6830 }, { "epoch": 0.39, "grad_norm": 0.3066487278928788, "learning_rate": 1.3865913056077968e-05, "loss": 0.2238, "step": 6831 }, { "epoch": 0.39, "grad_norm": 0.49241965675255883, "learning_rate": 1.3864196760384471e-05, "loss": 0.3577, "step": 6832 }, { "epoch": 0.39, "grad_norm": 0.47556465523233393, "learning_rate": 1.3862480330874004e-05, "loss": 0.2933, "step": 6833 }, { "epoch": 0.39, "grad_norm": 0.3757822961632396, "learning_rate": 1.3860763767606012e-05, "loss": 0.2787, "step": 6834 }, { "epoch": 0.39, "grad_norm": 0.7440805037762387, "learning_rate": 1.3859047070639933e-05, "loss": 0.4623, "step": 6835 }, { "epoch": 0.39, "grad_norm": 0.33732921252747394, "learning_rate": 1.3857330240035223e-05, "loss": 0.3338, "step": 6836 }, { "epoch": 0.39, "grad_norm": 0.18536598268984897, "learning_rate": 1.385561327585133e-05, "loss": 0.0748, "step": 6837 }, { "epoch": 0.39, "grad_norm": 0.30528617287475135, "learning_rate": 1.3853896178147717e-05, "loss": 0.2542, "step": 6838 }, { "epoch": 0.39, "grad_norm": 0.48011544732265776, "learning_rate": 1.3852178946983845e-05, "loss": 0.4014, "step": 6839 }, { "epoch": 0.39, "grad_norm": 0.32213207603181976, "learning_rate": 1.385046158241918e-05, "loss": 0.2102, "step": 6840 }, { "epoch": 0.39, "grad_norm": 0.6276796859084134, "learning_rate": 1.3848744084513197e-05, "loss": 0.3767, "step": 6841 }, { "epoch": 0.39, "grad_norm": 0.9645205479290853, "learning_rate": 1.3847026453325371e-05, "loss": 0.6006, "step": 6842 }, { "epoch": 0.39, "grad_norm": 0.18739949526105895, "learning_rate": 1.3845308688915187e-05, "loss": 0.1148, "step": 6843 }, { "epoch": 0.39, "grad_norm": 0.29294272479257805, "learning_rate": 1.3843590791342125e-05, "loss": 0.2939, "step": 6844 }, { "epoch": 0.39, "grad_norm": 1.0560695569212466, "learning_rate": 1.3841872760665682e-05, "loss": 0.7409, "step": 6845 }, { "epoch": 0.39, "grad_norm": 0.40634206076714197, "learning_rate": 1.3840154596945352e-05, "loss": 0.2672, "step": 6846 }, { "epoch": 0.39, "grad_norm": 0.5534173099885904, "learning_rate": 1.383843630024063e-05, "loss": 0.4431, "step": 6847 }, { "epoch": 0.39, "grad_norm": 0.35250494584408637, "learning_rate": 1.3836717870611025e-05, "loss": 0.3131, "step": 6848 }, { "epoch": 0.39, "grad_norm": 0.3773994520684638, "learning_rate": 1.3834999308116045e-05, "loss": 0.2855, "step": 6849 }, { "epoch": 0.39, "grad_norm": 0.22077046425320704, "learning_rate": 1.3833280612815204e-05, "loss": 0.1251, "step": 6850 }, { "epoch": 0.39, "grad_norm": 0.4475654638391578, "learning_rate": 1.383156178476802e-05, "loss": 0.42, "step": 6851 }, { "epoch": 0.39, "grad_norm": 0.3264987173804153, "learning_rate": 1.3829842824034016e-05, "loss": 0.3119, "step": 6852 }, { "epoch": 0.39, "grad_norm": 0.4714716770949217, "learning_rate": 1.382812373067272e-05, "loss": 0.3138, "step": 6853 }, { "epoch": 0.39, "grad_norm": 0.8911497620521466, "learning_rate": 1.3826404504743662e-05, "loss": 0.57, "step": 6854 }, { "epoch": 0.39, "grad_norm": 0.33170515753882024, "learning_rate": 1.3824685146306385e-05, "loss": 0.2292, "step": 6855 }, { "epoch": 0.39, "grad_norm": 0.28216547828961547, "learning_rate": 1.3822965655420422e-05, "loss": 0.2139, "step": 6856 }, { "epoch": 0.39, "grad_norm": 0.735537001474033, "learning_rate": 1.3821246032145324e-05, "loss": 0.4583, "step": 6857 }, { "epoch": 0.39, "grad_norm": 0.5961310337286932, "learning_rate": 1.381952627654064e-05, "loss": 0.4259, "step": 6858 }, { "epoch": 0.39, "grad_norm": 0.3900981399016567, "learning_rate": 1.3817806388665926e-05, "loss": 0.2397, "step": 6859 }, { "epoch": 0.39, "grad_norm": 0.37331750009126125, "learning_rate": 1.381608636858074e-05, "loss": 0.3157, "step": 6860 }, { "epoch": 0.39, "grad_norm": 0.5407319177531821, "learning_rate": 1.3814366216344647e-05, "loss": 0.3703, "step": 6861 }, { "epoch": 0.39, "grad_norm": 0.22703111036296625, "learning_rate": 1.3812645932017217e-05, "loss": 0.1943, "step": 6862 }, { "epoch": 0.39, "grad_norm": 0.653693685219927, "learning_rate": 1.3810925515658022e-05, "loss": 0.3258, "step": 6863 }, { "epoch": 0.39, "grad_norm": 0.4038868654554686, "learning_rate": 1.3809204967326641e-05, "loss": 0.3235, "step": 6864 }, { "epoch": 0.39, "grad_norm": 0.40131278450352875, "learning_rate": 1.3807484287082655e-05, "loss": 0.3271, "step": 6865 }, { "epoch": 0.39, "grad_norm": 0.8587126265813383, "learning_rate": 1.3805763474985651e-05, "loss": 0.2415, "step": 6866 }, { "epoch": 0.39, "grad_norm": 0.3657681750663915, "learning_rate": 1.3804042531095223e-05, "loss": 0.3043, "step": 6867 }, { "epoch": 0.39, "grad_norm": 0.31855912993435875, "learning_rate": 1.3802321455470967e-05, "loss": 0.2917, "step": 6868 }, { "epoch": 0.39, "grad_norm": 0.4853379729560478, "learning_rate": 1.3800600248172478e-05, "loss": 0.2178, "step": 6869 }, { "epoch": 0.39, "grad_norm": 0.39579207655536114, "learning_rate": 1.3798878909259368e-05, "loss": 0.3008, "step": 6870 }, { "epoch": 0.39, "grad_norm": 0.5117750783727549, "learning_rate": 1.3797157438791244e-05, "loss": 0.4075, "step": 6871 }, { "epoch": 0.39, "grad_norm": 0.38032603698465695, "learning_rate": 1.3795435836827724e-05, "loss": 0.3377, "step": 6872 }, { "epoch": 0.39, "grad_norm": 0.5204418610641315, "learning_rate": 1.3793714103428421e-05, "loss": 0.1617, "step": 6873 }, { "epoch": 0.39, "grad_norm": 0.2955007971124637, "learning_rate": 1.3791992238652965e-05, "loss": 0.2467, "step": 6874 }, { "epoch": 0.4, "grad_norm": 0.3308877907838149, "learning_rate": 1.3790270242560977e-05, "loss": 0.2977, "step": 6875 }, { "epoch": 0.4, "grad_norm": 0.38992600679999917, "learning_rate": 1.3788548115212095e-05, "loss": 0.2571, "step": 6876 }, { "epoch": 0.4, "grad_norm": 0.38401415930593574, "learning_rate": 1.3786825856665958e-05, "loss": 0.3058, "step": 6877 }, { "epoch": 0.4, "grad_norm": 0.7709559475093668, "learning_rate": 1.3785103466982199e-05, "loss": 0.5874, "step": 6878 }, { "epoch": 0.4, "grad_norm": 0.3114444710448705, "learning_rate": 1.3783380946220474e-05, "loss": 0.2394, "step": 6879 }, { "epoch": 0.4, "grad_norm": 0.3432166855056662, "learning_rate": 1.3781658294440427e-05, "loss": 0.3115, "step": 6880 }, { "epoch": 0.4, "grad_norm": 0.6522356404592244, "learning_rate": 1.3779935511701717e-05, "loss": 0.4908, "step": 6881 }, { "epoch": 0.4, "grad_norm": 0.21070036258642488, "learning_rate": 1.3778212598064002e-05, "loss": 0.1286, "step": 6882 }, { "epoch": 0.4, "grad_norm": 0.27838016788335157, "learning_rate": 1.3776489553586949e-05, "loss": 0.2634, "step": 6883 }, { "epoch": 0.4, "grad_norm": 0.5184621555490639, "learning_rate": 1.3774766378330221e-05, "loss": 0.4013, "step": 6884 }, { "epoch": 0.4, "grad_norm": 0.615158987776567, "learning_rate": 1.3773043072353503e-05, "loss": 0.4176, "step": 6885 }, { "epoch": 0.4, "grad_norm": 0.32917103198871883, "learning_rate": 1.3771319635716459e-05, "loss": 0.2412, "step": 6886 }, { "epoch": 0.4, "grad_norm": 0.48537442050696666, "learning_rate": 1.376959606847878e-05, "loss": 0.3794, "step": 6887 }, { "epoch": 0.4, "grad_norm": 0.437233197013368, "learning_rate": 1.376787237070015e-05, "loss": 0.3628, "step": 6888 }, { "epoch": 0.4, "grad_norm": 0.3363888104947281, "learning_rate": 1.3766148542440265e-05, "loss": 0.2289, "step": 6889 }, { "epoch": 0.4, "grad_norm": 0.2641410487640712, "learning_rate": 1.3764424583758816e-05, "loss": 0.1768, "step": 6890 }, { "epoch": 0.4, "grad_norm": 0.3430245182460297, "learning_rate": 1.3762700494715506e-05, "loss": 0.3178, "step": 6891 }, { "epoch": 0.4, "grad_norm": 0.3694883870067206, "learning_rate": 1.3760976275370039e-05, "loss": 0.2562, "step": 6892 }, { "epoch": 0.4, "grad_norm": 0.6870317668549498, "learning_rate": 1.3759251925782127e-05, "loss": 0.4852, "step": 6893 }, { "epoch": 0.4, "grad_norm": 1.2038599651172182, "learning_rate": 1.3757527446011479e-05, "loss": 0.8427, "step": 6894 }, { "epoch": 0.4, "grad_norm": 0.2434935746268074, "learning_rate": 1.375580283611782e-05, "loss": 0.1944, "step": 6895 }, { "epoch": 0.4, "grad_norm": 0.3256197247270627, "learning_rate": 1.3754078096160871e-05, "loss": 0.2661, "step": 6896 }, { "epoch": 0.4, "grad_norm": 0.7596028889004636, "learning_rate": 1.3752353226200359e-05, "loss": 0.4223, "step": 6897 }, { "epoch": 0.4, "grad_norm": 0.3579206665965782, "learning_rate": 1.3750628226296012e-05, "loss": 0.2608, "step": 6898 }, { "epoch": 0.4, "grad_norm": 0.42991083239765604, "learning_rate": 1.3748903096507576e-05, "loss": 0.2457, "step": 6899 }, { "epoch": 0.4, "grad_norm": 0.46276695668103407, "learning_rate": 1.3747177836894783e-05, "loss": 0.4311, "step": 6900 }, { "epoch": 0.4, "grad_norm": 0.33938255692060043, "learning_rate": 1.3745452447517384e-05, "loss": 0.253, "step": 6901 }, { "epoch": 0.4, "grad_norm": 0.33146128208950326, "learning_rate": 1.3743726928435129e-05, "loss": 0.1271, "step": 6902 }, { "epoch": 0.4, "grad_norm": 0.4516192355561343, "learning_rate": 1.3742001279707771e-05, "loss": 0.3194, "step": 6903 }, { "epoch": 0.4, "grad_norm": 0.3831757519966381, "learning_rate": 1.3740275501395068e-05, "loss": 0.2835, "step": 6904 }, { "epoch": 0.4, "grad_norm": 0.7703354652013342, "learning_rate": 1.3738549593556787e-05, "loss": 0.255, "step": 6905 }, { "epoch": 0.4, "grad_norm": 0.6825758957008587, "learning_rate": 1.3736823556252694e-05, "loss": 0.5165, "step": 6906 }, { "epoch": 0.4, "grad_norm": 0.3866563201141378, "learning_rate": 1.373509738954256e-05, "loss": 0.3017, "step": 6907 }, { "epoch": 0.4, "grad_norm": 0.2428939493291775, "learning_rate": 1.3733371093486168e-05, "loss": 0.2285, "step": 6908 }, { "epoch": 0.4, "grad_norm": 0.6721461211190297, "learning_rate": 1.3731644668143291e-05, "loss": 0.4576, "step": 6909 }, { "epoch": 0.4, "grad_norm": 0.3443745352273169, "learning_rate": 1.3729918113573723e-05, "loss": 0.2499, "step": 6910 }, { "epoch": 0.4, "grad_norm": 0.3699209177126759, "learning_rate": 1.3728191429837247e-05, "loss": 0.3297, "step": 6911 }, { "epoch": 0.4, "grad_norm": 0.9218896511237294, "learning_rate": 1.3726464616993667e-05, "loss": 0.5562, "step": 6912 }, { "epoch": 0.4, "grad_norm": 0.36061835491190963, "learning_rate": 1.3724737675102774e-05, "loss": 0.251, "step": 6913 }, { "epoch": 0.4, "grad_norm": 0.6697499130698259, "learning_rate": 1.3723010604224381e-05, "loss": 0.4177, "step": 6914 }, { "epoch": 0.4, "grad_norm": 0.22123181145234125, "learning_rate": 1.3721283404418283e-05, "loss": 0.1735, "step": 6915 }, { "epoch": 0.4, "grad_norm": 0.31925352597321077, "learning_rate": 1.3719556075744306e-05, "loss": 0.2455, "step": 6916 }, { "epoch": 0.4, "grad_norm": 0.9239303856324638, "learning_rate": 1.3717828618262261e-05, "loss": 0.5894, "step": 6917 }, { "epoch": 0.4, "grad_norm": 0.5162709979789786, "learning_rate": 1.3716101032031972e-05, "loss": 0.3261, "step": 6918 }, { "epoch": 0.4, "grad_norm": 0.29582580195856606, "learning_rate": 1.371437331711326e-05, "loss": 0.2552, "step": 6919 }, { "epoch": 0.4, "grad_norm": 0.8585276824390256, "learning_rate": 1.3712645473565964e-05, "loss": 0.4624, "step": 6920 }, { "epoch": 0.4, "grad_norm": 0.3036535597617637, "learning_rate": 1.3710917501449911e-05, "loss": 0.1622, "step": 6921 }, { "epoch": 0.4, "grad_norm": 0.375439624708647, "learning_rate": 1.3709189400824948e-05, "loss": 0.2807, "step": 6922 }, { "epoch": 0.4, "grad_norm": 0.38903684691791973, "learning_rate": 1.3707461171750916e-05, "loss": 0.2963, "step": 6923 }, { "epoch": 0.4, "grad_norm": 1.3030138881612745, "learning_rate": 1.370573281428766e-05, "loss": 0.8033, "step": 6924 }, { "epoch": 0.4, "grad_norm": 0.3366170348258912, "learning_rate": 1.3704004328495037e-05, "loss": 0.2032, "step": 6925 }, { "epoch": 0.4, "grad_norm": 0.8395341835724895, "learning_rate": 1.3702275714432905e-05, "loss": 0.4634, "step": 6926 }, { "epoch": 0.4, "grad_norm": 0.22712153378717959, "learning_rate": 1.3700546972161121e-05, "loss": 0.2303, "step": 6927 }, { "epoch": 0.4, "grad_norm": 0.3062410578990784, "learning_rate": 1.3698818101739554e-05, "loss": 0.1977, "step": 6928 }, { "epoch": 0.4, "grad_norm": 1.0418163317880769, "learning_rate": 1.3697089103228081e-05, "loss": 0.7044, "step": 6929 }, { "epoch": 0.4, "grad_norm": 0.7665050260008015, "learning_rate": 1.3695359976686568e-05, "loss": 0.52, "step": 6930 }, { "epoch": 0.4, "grad_norm": 0.2718208821347409, "learning_rate": 1.3693630722174898e-05, "loss": 0.225, "step": 6931 }, { "epoch": 0.4, "grad_norm": 0.5994959857163412, "learning_rate": 1.3691901339752955e-05, "loss": 0.48, "step": 6932 }, { "epoch": 0.4, "grad_norm": 0.42763359444722393, "learning_rate": 1.3690171829480628e-05, "loss": 0.279, "step": 6933 }, { "epoch": 0.4, "grad_norm": 0.2819399462682956, "learning_rate": 1.3688442191417805e-05, "loss": 0.169, "step": 6934 }, { "epoch": 0.4, "grad_norm": 0.3855701660647398, "learning_rate": 1.3686712425624393e-05, "loss": 0.3225, "step": 6935 }, { "epoch": 0.4, "grad_norm": 0.9462468602813544, "learning_rate": 1.3684982532160285e-05, "loss": 0.5966, "step": 6936 }, { "epoch": 0.4, "grad_norm": 0.34783075864199253, "learning_rate": 1.3683252511085391e-05, "loss": 0.2796, "step": 6937 }, { "epoch": 0.4, "grad_norm": 0.702993674672384, "learning_rate": 1.3681522362459623e-05, "loss": 0.3554, "step": 6938 }, { "epoch": 0.4, "grad_norm": 0.2971323853404743, "learning_rate": 1.3679792086342892e-05, "loss": 0.2779, "step": 6939 }, { "epoch": 0.4, "grad_norm": 0.2884143111686183, "learning_rate": 1.367806168279512e-05, "loss": 0.2304, "step": 6940 }, { "epoch": 0.4, "grad_norm": 0.5212153825592891, "learning_rate": 1.3676331151876227e-05, "loss": 0.2851, "step": 6941 }, { "epoch": 0.4, "grad_norm": 0.4431896490235908, "learning_rate": 1.3674600493646146e-05, "loss": 0.3398, "step": 6942 }, { "epoch": 0.4, "grad_norm": 0.4475954113556971, "learning_rate": 1.367286970816481e-05, "loss": 0.2763, "step": 6943 }, { "epoch": 0.4, "grad_norm": 0.669305965541524, "learning_rate": 1.3671138795492155e-05, "loss": 0.3324, "step": 6944 }, { "epoch": 0.4, "grad_norm": 0.3080889359061354, "learning_rate": 1.3669407755688117e-05, "loss": 0.1614, "step": 6945 }, { "epoch": 0.4, "grad_norm": 0.40932396074492605, "learning_rate": 1.366767658881265e-05, "loss": 0.2977, "step": 6946 }, { "epoch": 0.4, "grad_norm": 0.32701274366526195, "learning_rate": 1.36659452949257e-05, "loss": 0.2876, "step": 6947 }, { "epoch": 0.4, "grad_norm": 0.7057822249995881, "learning_rate": 1.3664213874087223e-05, "loss": 0.3661, "step": 6948 }, { "epoch": 0.4, "grad_norm": 0.3797999918281963, "learning_rate": 1.3662482326357172e-05, "loss": 0.28, "step": 6949 }, { "epoch": 0.4, "grad_norm": 0.5054705360107299, "learning_rate": 1.366075065179552e-05, "loss": 0.373, "step": 6950 }, { "epoch": 0.4, "grad_norm": 0.4181068108631131, "learning_rate": 1.3659018850462226e-05, "loss": 0.2382, "step": 6951 }, { "epoch": 0.4, "grad_norm": 0.2658521324344886, "learning_rate": 1.3657286922417272e-05, "loss": 0.1952, "step": 6952 }, { "epoch": 0.4, "grad_norm": 0.33355700539346833, "learning_rate": 1.3655554867720623e-05, "loss": 0.2598, "step": 6953 }, { "epoch": 0.4, "grad_norm": 0.7434397980880477, "learning_rate": 1.3653822686432271e-05, "loss": 0.3192, "step": 6954 }, { "epoch": 0.4, "grad_norm": 0.30312485770426095, "learning_rate": 1.3652090378612198e-05, "loss": 0.2706, "step": 6955 }, { "epoch": 0.4, "grad_norm": 1.0086654469423801, "learning_rate": 1.3650357944320387e-05, "loss": 0.6798, "step": 6956 }, { "epoch": 0.4, "grad_norm": 0.2880087205351191, "learning_rate": 1.3648625383616841e-05, "loss": 0.1437, "step": 6957 }, { "epoch": 0.4, "grad_norm": 0.29183807559357594, "learning_rate": 1.3646892696561554e-05, "loss": 0.2153, "step": 6958 }, { "epoch": 0.4, "grad_norm": 0.36485493480482983, "learning_rate": 1.3645159883214528e-05, "loss": 0.3043, "step": 6959 }, { "epoch": 0.4, "grad_norm": 0.6233292098008584, "learning_rate": 1.3643426943635774e-05, "loss": 0.4441, "step": 6960 }, { "epoch": 0.4, "grad_norm": 0.35202902887011656, "learning_rate": 1.36416938778853e-05, "loss": 0.1655, "step": 6961 }, { "epoch": 0.4, "grad_norm": 0.4126226172553717, "learning_rate": 1.3639960686023126e-05, "loss": 0.3344, "step": 6962 }, { "epoch": 0.4, "grad_norm": 0.37428085092700025, "learning_rate": 1.3638227368109268e-05, "loss": 0.3312, "step": 6963 }, { "epoch": 0.4, "grad_norm": 0.30610032926280945, "learning_rate": 1.3636493924203756e-05, "loss": 0.1934, "step": 6964 }, { "epoch": 0.4, "grad_norm": 0.31909149409640514, "learning_rate": 1.3634760354366612e-05, "loss": 0.2953, "step": 6965 }, { "epoch": 0.4, "grad_norm": 0.816373087029119, "learning_rate": 1.3633026658657872e-05, "loss": 0.6193, "step": 6966 }, { "epoch": 0.4, "grad_norm": 0.2950274786523385, "learning_rate": 1.3631292837137577e-05, "loss": 0.2384, "step": 6967 }, { "epoch": 0.4, "grad_norm": 0.34463380268308075, "learning_rate": 1.3629558889865768e-05, "loss": 0.2693, "step": 6968 }, { "epoch": 0.4, "grad_norm": 1.445343970358979, "learning_rate": 1.3627824816902494e-05, "loss": 0.7602, "step": 6969 }, { "epoch": 0.4, "grad_norm": 0.2910854511165072, "learning_rate": 1.3626090618307796e-05, "loss": 0.2366, "step": 6970 }, { "epoch": 0.4, "grad_norm": 0.3727574615944739, "learning_rate": 1.3624356294141738e-05, "loss": 0.347, "step": 6971 }, { "epoch": 0.4, "grad_norm": 0.4251921909257409, "learning_rate": 1.3622621844464379e-05, "loss": 0.2888, "step": 6972 }, { "epoch": 0.4, "grad_norm": 0.3357843692589245, "learning_rate": 1.362088726933578e-05, "loss": 0.2384, "step": 6973 }, { "epoch": 0.4, "grad_norm": 0.36993298536045216, "learning_rate": 1.361915256881601e-05, "loss": 0.2396, "step": 6974 }, { "epoch": 0.4, "grad_norm": 0.3614041412393671, "learning_rate": 1.3617417742965144e-05, "loss": 0.3096, "step": 6975 }, { "epoch": 0.4, "grad_norm": 0.37952388985458196, "learning_rate": 1.3615682791843257e-05, "loss": 0.3231, "step": 6976 }, { "epoch": 0.4, "grad_norm": 0.3901412474468786, "learning_rate": 1.3613947715510429e-05, "loss": 0.3072, "step": 6977 }, { "epoch": 0.4, "grad_norm": 0.36218398221854425, "learning_rate": 1.3612212514026746e-05, "loss": 0.3417, "step": 6978 }, { "epoch": 0.4, "grad_norm": 0.8055590911647899, "learning_rate": 1.3610477187452303e-05, "loss": 0.4594, "step": 6979 }, { "epoch": 0.4, "grad_norm": 0.2289632841507798, "learning_rate": 1.3608741735847186e-05, "loss": 0.1503, "step": 6980 }, { "epoch": 0.4, "grad_norm": 0.6391920773085977, "learning_rate": 1.3607006159271503e-05, "loss": 0.4503, "step": 6981 }, { "epoch": 0.4, "grad_norm": 0.3301091566046304, "learning_rate": 1.3605270457785346e-05, "loss": 0.3016, "step": 6982 }, { "epoch": 0.4, "grad_norm": 0.3309863876921998, "learning_rate": 1.3603534631448831e-05, "loss": 0.2646, "step": 6983 }, { "epoch": 0.4, "grad_norm": 1.2898443402936977, "learning_rate": 1.3601798680322068e-05, "loss": 0.7883, "step": 6984 }, { "epoch": 0.4, "grad_norm": 0.34812557437044317, "learning_rate": 1.3600062604465168e-05, "loss": 0.2385, "step": 6985 }, { "epoch": 0.4, "grad_norm": 0.277167302217143, "learning_rate": 1.3598326403938255e-05, "loss": 0.2632, "step": 6986 }, { "epoch": 0.4, "grad_norm": 0.5174657148756132, "learning_rate": 1.3596590078801458e-05, "loss": 0.3016, "step": 6987 }, { "epoch": 0.4, "grad_norm": 0.4792618762975345, "learning_rate": 1.3594853629114896e-05, "loss": 0.3499, "step": 6988 }, { "epoch": 0.4, "grad_norm": 0.41025013782946984, "learning_rate": 1.359311705493871e-05, "loss": 0.312, "step": 6989 }, { "epoch": 0.4, "grad_norm": 0.3738632934541031, "learning_rate": 1.3591380356333038e-05, "loss": 0.2848, "step": 6990 }, { "epoch": 0.4, "grad_norm": 0.5999221057686619, "learning_rate": 1.3589643533358013e-05, "loss": 0.3353, "step": 6991 }, { "epoch": 0.4, "grad_norm": 0.32699926243927424, "learning_rate": 1.358790658607379e-05, "loss": 0.2355, "step": 6992 }, { "epoch": 0.4, "grad_norm": 0.43293676610358867, "learning_rate": 1.3586169514540512e-05, "loss": 0.2488, "step": 6993 }, { "epoch": 0.4, "grad_norm": 0.32886181991925645, "learning_rate": 1.3584432318818344e-05, "loss": 0.3064, "step": 6994 }, { "epoch": 0.4, "grad_norm": 0.35377350978253397, "learning_rate": 1.3582694998967434e-05, "loss": 0.316, "step": 6995 }, { "epoch": 0.4, "grad_norm": 0.685901668579314, "learning_rate": 1.3580957555047953e-05, "loss": 0.4758, "step": 6996 }, { "epoch": 0.4, "grad_norm": 0.6877362501402533, "learning_rate": 1.3579219987120065e-05, "loss": 0.4549, "step": 6997 }, { "epoch": 0.4, "grad_norm": 0.27094743803612736, "learning_rate": 1.3577482295243944e-05, "loss": 0.2349, "step": 6998 }, { "epoch": 0.4, "grad_norm": 0.3247502461542125, "learning_rate": 1.3575744479479764e-05, "loss": 0.2856, "step": 6999 }, { "epoch": 0.4, "grad_norm": 0.6147448714871042, "learning_rate": 1.3574006539887707e-05, "loss": 0.2349, "step": 7000 }, { "epoch": 0.4, "grad_norm": 0.38562569593393564, "learning_rate": 1.3572268476527954e-05, "loss": 0.2908, "step": 7001 }, { "epoch": 0.4, "grad_norm": 0.3642951593240954, "learning_rate": 1.3570530289460701e-05, "loss": 0.3264, "step": 7002 }, { "epoch": 0.4, "grad_norm": 0.5235074663626574, "learning_rate": 1.3568791978746137e-05, "loss": 0.2494, "step": 7003 }, { "epoch": 0.4, "grad_norm": 0.2952802191250271, "learning_rate": 1.356705354444446e-05, "loss": 0.2524, "step": 7004 }, { "epoch": 0.4, "grad_norm": 0.946358029879268, "learning_rate": 1.3565314986615871e-05, "loss": 0.4903, "step": 7005 }, { "epoch": 0.4, "grad_norm": 0.23563139165705427, "learning_rate": 1.3563576305320579e-05, "loss": 0.1791, "step": 7006 }, { "epoch": 0.4, "grad_norm": 0.40333290157473234, "learning_rate": 1.356183750061879e-05, "loss": 0.3251, "step": 7007 }, { "epoch": 0.4, "grad_norm": 1.2051917632433689, "learning_rate": 1.3560098572570725e-05, "loss": 0.821, "step": 7008 }, { "epoch": 0.4, "grad_norm": 0.37974187511583635, "learning_rate": 1.35583595212366e-05, "loss": 0.1655, "step": 7009 }, { "epoch": 0.4, "grad_norm": 0.3522486679741345, "learning_rate": 1.3556620346676633e-05, "loss": 0.2931, "step": 7010 }, { "epoch": 0.4, "grad_norm": 0.6771158949431482, "learning_rate": 1.355488104895106e-05, "loss": 0.3763, "step": 7011 }, { "epoch": 0.4, "grad_norm": 0.2299304342135175, "learning_rate": 1.3553141628120107e-05, "loss": 0.1646, "step": 7012 }, { "epoch": 0.4, "grad_norm": 0.33815698261225796, "learning_rate": 1.3551402084244014e-05, "loss": 0.2277, "step": 7013 }, { "epoch": 0.4, "grad_norm": 0.3550244974142543, "learning_rate": 1.3549662417383018e-05, "loss": 0.3468, "step": 7014 }, { "epoch": 0.4, "grad_norm": 1.499478939324147, "learning_rate": 1.354792262759737e-05, "loss": 0.7654, "step": 7015 }, { "epoch": 0.4, "grad_norm": 0.33123396540469313, "learning_rate": 1.3546182714947309e-05, "loss": 0.2452, "step": 7016 }, { "epoch": 0.4, "grad_norm": 0.30626982106831946, "learning_rate": 1.3544442679493095e-05, "loss": 0.2432, "step": 7017 }, { "epoch": 0.4, "grad_norm": 0.36381971525818, "learning_rate": 1.3542702521294981e-05, "loss": 0.3203, "step": 7018 }, { "epoch": 0.4, "grad_norm": 0.2968935067872526, "learning_rate": 1.3540962240413233e-05, "loss": 0.2144, "step": 7019 }, { "epoch": 0.4, "grad_norm": 0.9758630593144416, "learning_rate": 1.3539221836908113e-05, "loss": 0.6957, "step": 7020 }, { "epoch": 0.4, "grad_norm": 0.47204156166840916, "learning_rate": 1.3537481310839897e-05, "loss": 0.351, "step": 7021 }, { "epoch": 0.4, "grad_norm": 0.31181724975707065, "learning_rate": 1.353574066226885e-05, "loss": 0.2339, "step": 7022 }, { "epoch": 0.4, "grad_norm": 0.8749376420778082, "learning_rate": 1.353399989125526e-05, "loss": 0.4849, "step": 7023 }, { "epoch": 0.4, "grad_norm": 0.2859353718730429, "learning_rate": 1.3532258997859404e-05, "loss": 0.197, "step": 7024 }, { "epoch": 0.4, "grad_norm": 0.3325129256222981, "learning_rate": 1.3530517982141574e-05, "loss": 0.2746, "step": 7025 }, { "epoch": 0.4, "grad_norm": 0.3507133854637492, "learning_rate": 1.3528776844162052e-05, "loss": 0.277, "step": 7026 }, { "epoch": 0.4, "grad_norm": 0.5776205911314057, "learning_rate": 1.3527035583981145e-05, "loss": 0.4077, "step": 7027 }, { "epoch": 0.4, "grad_norm": 0.3720017793660261, "learning_rate": 1.3525294201659145e-05, "loss": 0.2941, "step": 7028 }, { "epoch": 0.4, "grad_norm": 0.6266844281338557, "learning_rate": 1.3523552697256359e-05, "loss": 0.3631, "step": 7029 }, { "epoch": 0.4, "grad_norm": 0.3606261379793363, "learning_rate": 1.3521811070833095e-05, "loss": 0.2194, "step": 7030 }, { "epoch": 0.4, "grad_norm": 0.37657437220486956, "learning_rate": 1.3520069322449663e-05, "loss": 0.2835, "step": 7031 }, { "epoch": 0.4, "grad_norm": 0.8620238092878418, "learning_rate": 1.3518327452166385e-05, "loss": 0.4009, "step": 7032 }, { "epoch": 0.4, "grad_norm": 0.654820668093201, "learning_rate": 1.3516585460043576e-05, "loss": 0.4591, "step": 7033 }, { "epoch": 0.4, "grad_norm": 0.3013622890908199, "learning_rate": 1.3514843346141566e-05, "loss": 0.2671, "step": 7034 }, { "epoch": 0.4, "grad_norm": 0.4458870442813115, "learning_rate": 1.3513101110520678e-05, "loss": 0.3693, "step": 7035 }, { "epoch": 0.4, "grad_norm": 0.19906848799136267, "learning_rate": 1.3511358753241254e-05, "loss": 0.1152, "step": 7036 }, { "epoch": 0.4, "grad_norm": 0.32092656053178614, "learning_rate": 1.3509616274363623e-05, "loss": 0.2576, "step": 7037 }, { "epoch": 0.4, "grad_norm": 0.3677600497999778, "learning_rate": 1.3507873673948137e-05, "loss": 0.3478, "step": 7038 }, { "epoch": 0.4, "grad_norm": 0.9170207065182442, "learning_rate": 1.3506130952055132e-05, "loss": 0.4109, "step": 7039 }, { "epoch": 0.4, "grad_norm": 0.35086745820449605, "learning_rate": 1.3504388108744966e-05, "loss": 0.3033, "step": 7040 }, { "epoch": 0.4, "grad_norm": 1.099592946999764, "learning_rate": 1.3502645144077987e-05, "loss": 0.5843, "step": 7041 }, { "epoch": 0.4, "grad_norm": 0.212123843237808, "learning_rate": 1.350090205811456e-05, "loss": 0.1806, "step": 7042 }, { "epoch": 0.4, "grad_norm": 0.33900519569136367, "learning_rate": 1.3499158850915044e-05, "loss": 0.2861, "step": 7043 }, { "epoch": 0.4, "grad_norm": 0.8621314063592519, "learning_rate": 1.3497415522539807e-05, "loss": 0.6942, "step": 7044 }, { "epoch": 0.4, "grad_norm": 0.3746493330911956, "learning_rate": 1.3495672073049221e-05, "loss": 0.3091, "step": 7045 }, { "epoch": 0.4, "grad_norm": 0.3969087626583301, "learning_rate": 1.3493928502503664e-05, "loss": 0.2964, "step": 7046 }, { "epoch": 0.4, "grad_norm": 0.48381748985794387, "learning_rate": 1.3492184810963512e-05, "loss": 0.3625, "step": 7047 }, { "epoch": 0.4, "grad_norm": 0.3345842253885811, "learning_rate": 1.349044099848915e-05, "loss": 0.2105, "step": 7048 }, { "epoch": 0.4, "grad_norm": 0.30381301666422506, "learning_rate": 1.3488697065140964e-05, "loss": 0.1968, "step": 7049 }, { "epoch": 0.41, "grad_norm": 0.41334840598392936, "learning_rate": 1.348695301097935e-05, "loss": 0.3586, "step": 7050 }, { "epoch": 0.41, "grad_norm": 0.7832888653278328, "learning_rate": 1.3485208836064705e-05, "loss": 0.4857, "step": 7051 }, { "epoch": 0.41, "grad_norm": 0.33241008219443263, "learning_rate": 1.3483464540457428e-05, "loss": 0.2076, "step": 7052 }, { "epoch": 0.41, "grad_norm": 0.49777611048133946, "learning_rate": 1.348172012421792e-05, "loss": 0.3706, "step": 7053 }, { "epoch": 0.41, "grad_norm": 0.5163034090447539, "learning_rate": 1.3479975587406595e-05, "loss": 0.3745, "step": 7054 }, { "epoch": 0.41, "grad_norm": 0.22940568788016039, "learning_rate": 1.3478230930083868e-05, "loss": 0.1524, "step": 7055 }, { "epoch": 0.41, "grad_norm": 0.9077703091017053, "learning_rate": 1.3476486152310152e-05, "loss": 0.444, "step": 7056 }, { "epoch": 0.41, "grad_norm": 0.5325747150064066, "learning_rate": 1.3474741254145868e-05, "loss": 0.397, "step": 7057 }, { "epoch": 0.41, "grad_norm": 0.2938576127412327, "learning_rate": 1.3472996235651446e-05, "loss": 0.227, "step": 7058 }, { "epoch": 0.41, "grad_norm": 1.2376508865292748, "learning_rate": 1.3471251096887312e-05, "loss": 0.6284, "step": 7059 }, { "epoch": 0.41, "grad_norm": 0.400031588718824, "learning_rate": 1.3469505837913903e-05, "loss": 0.2609, "step": 7060 }, { "epoch": 0.41, "grad_norm": 0.3362257964775687, "learning_rate": 1.3467760458791656e-05, "loss": 0.2845, "step": 7061 }, { "epoch": 0.41, "grad_norm": 0.4483199711195374, "learning_rate": 1.3466014959581013e-05, "loss": 0.2888, "step": 7062 }, { "epoch": 0.41, "grad_norm": 1.0500941292938073, "learning_rate": 1.3464269340342422e-05, "loss": 0.6551, "step": 7063 }, { "epoch": 0.41, "grad_norm": 0.35210336824936767, "learning_rate": 1.346252360113633e-05, "loss": 0.2358, "step": 7064 }, { "epoch": 0.41, "grad_norm": 0.3657147809379971, "learning_rate": 1.3460777742023202e-05, "loss": 0.2663, "step": 7065 }, { "epoch": 0.41, "grad_norm": 0.503373542099447, "learning_rate": 1.3459031763063482e-05, "loss": 0.365, "step": 7066 }, { "epoch": 0.41, "grad_norm": 0.42739082768414866, "learning_rate": 1.3457285664317645e-05, "loss": 0.3174, "step": 7067 }, { "epoch": 0.41, "grad_norm": 0.358482347181096, "learning_rate": 1.3455539445846151e-05, "loss": 0.2484, "step": 7068 }, { "epoch": 0.41, "grad_norm": 0.3661152740979383, "learning_rate": 1.3453793107709476e-05, "loss": 0.3302, "step": 7069 }, { "epoch": 0.41, "grad_norm": 0.3814990470872192, "learning_rate": 1.3452046649968091e-05, "loss": 0.2572, "step": 7070 }, { "epoch": 0.41, "grad_norm": 0.3260559821888633, "learning_rate": 1.3450300072682485e-05, "loss": 0.1877, "step": 7071 }, { "epoch": 0.41, "grad_norm": 0.823857743699866, "learning_rate": 1.3448553375913132e-05, "loss": 0.4651, "step": 7072 }, { "epoch": 0.41, "grad_norm": 0.35011088470733437, "learning_rate": 1.3446806559720525e-05, "loss": 0.3223, "step": 7073 }, { "epoch": 0.41, "grad_norm": 0.35841773511333275, "learning_rate": 1.3445059624165156e-05, "loss": 0.3149, "step": 7074 }, { "epoch": 0.41, "grad_norm": 0.9612427340225015, "learning_rate": 1.3443312569307517e-05, "loss": 0.5688, "step": 7075 }, { "epoch": 0.41, "grad_norm": 0.25064310434852977, "learning_rate": 1.3441565395208114e-05, "loss": 0.1798, "step": 7076 }, { "epoch": 0.41, "grad_norm": 0.5838118579171294, "learning_rate": 1.343981810192745e-05, "loss": 0.3781, "step": 7077 }, { "epoch": 0.41, "grad_norm": 0.40143952342120387, "learning_rate": 1.3438070689526033e-05, "loss": 0.2896, "step": 7078 }, { "epoch": 0.41, "grad_norm": 0.36313108466885224, "learning_rate": 1.3436323158064373e-05, "loss": 0.2885, "step": 7079 }, { "epoch": 0.41, "grad_norm": 0.7995956838032602, "learning_rate": 1.3434575507602991e-05, "loss": 0.5193, "step": 7080 }, { "epoch": 0.41, "grad_norm": 0.4270076800549399, "learning_rate": 1.3432827738202407e-05, "loss": 0.2954, "step": 7081 }, { "epoch": 0.41, "grad_norm": 0.3050621499863082, "learning_rate": 1.3431079849923153e-05, "loss": 0.1848, "step": 7082 }, { "epoch": 0.41, "grad_norm": 0.3510960749297572, "learning_rate": 1.3429331842825742e-05, "loss": 0.2506, "step": 7083 }, { "epoch": 0.41, "grad_norm": 0.6715341783485381, "learning_rate": 1.342758371697072e-05, "loss": 0.3628, "step": 7084 }, { "epoch": 0.41, "grad_norm": 0.4085613591910861, "learning_rate": 1.342583547241862e-05, "loss": 0.2762, "step": 7085 }, { "epoch": 0.41, "grad_norm": 0.32885154493809926, "learning_rate": 1.3424087109229986e-05, "loss": 0.3057, "step": 7086 }, { "epoch": 0.41, "grad_norm": 1.1248597962403253, "learning_rate": 1.3422338627465364e-05, "loss": 0.7771, "step": 7087 }, { "epoch": 0.41, "grad_norm": 0.2362658079430318, "learning_rate": 1.3420590027185301e-05, "loss": 0.1333, "step": 7088 }, { "epoch": 0.41, "grad_norm": 0.27743124046471523, "learning_rate": 1.3418841308450353e-05, "loss": 0.2448, "step": 7089 }, { "epoch": 0.41, "grad_norm": 1.0089479383803033, "learning_rate": 1.3417092471321076e-05, "loss": 0.439, "step": 7090 }, { "epoch": 0.41, "grad_norm": 0.34819709868420223, "learning_rate": 1.3415343515858035e-05, "loss": 0.2192, "step": 7091 }, { "epoch": 0.41, "grad_norm": 0.5070973840014404, "learning_rate": 1.3413594442121796e-05, "loss": 0.3956, "step": 7092 }, { "epoch": 0.41, "grad_norm": 0.3532321667025554, "learning_rate": 1.3411845250172928e-05, "loss": 0.2936, "step": 7093 }, { "epoch": 0.41, "grad_norm": 0.3256958107973817, "learning_rate": 1.3410095940072004e-05, "loss": 0.1915, "step": 7094 }, { "epoch": 0.41, "grad_norm": 0.26218402620283915, "learning_rate": 1.3408346511879604e-05, "loss": 0.1904, "step": 7095 }, { "epoch": 0.41, "grad_norm": 0.9728861537090818, "learning_rate": 1.340659696565631e-05, "loss": 0.461, "step": 7096 }, { "epoch": 0.41, "grad_norm": 0.29866630802365746, "learning_rate": 1.3404847301462713e-05, "loss": 0.2247, "step": 7097 }, { "epoch": 0.41, "grad_norm": 0.5127063422711399, "learning_rate": 1.3403097519359397e-05, "loss": 0.3778, "step": 7098 }, { "epoch": 0.41, "grad_norm": 1.0938596974308756, "learning_rate": 1.3401347619406966e-05, "loss": 0.8696, "step": 7099 }, { "epoch": 0.41, "grad_norm": 0.3440710027431961, "learning_rate": 1.3399597601666008e-05, "loss": 0.214, "step": 7100 }, { "epoch": 0.41, "grad_norm": 0.33249458020431705, "learning_rate": 1.3397847466197133e-05, "loss": 0.2699, "step": 7101 }, { "epoch": 0.41, "grad_norm": 0.3150673143037075, "learning_rate": 1.3396097213060943e-05, "loss": 0.3037, "step": 7102 }, { "epoch": 0.41, "grad_norm": 0.9226442021708258, "learning_rate": 1.3394346842318058e-05, "loss": 0.5105, "step": 7103 }, { "epoch": 0.41, "grad_norm": 0.3788209793940605, "learning_rate": 1.3392596354029084e-05, "loss": 0.2471, "step": 7104 }, { "epoch": 0.41, "grad_norm": 0.36634771446717296, "learning_rate": 1.3390845748254645e-05, "loss": 0.3481, "step": 7105 }, { "epoch": 0.41, "grad_norm": 0.6158781133726381, "learning_rate": 1.3389095025055363e-05, "loss": 0.4241, "step": 7106 }, { "epoch": 0.41, "grad_norm": 0.3631589753231446, "learning_rate": 1.3387344184491869e-05, "loss": 0.2813, "step": 7107 }, { "epoch": 0.41, "grad_norm": 0.27955123400877324, "learning_rate": 1.3385593226624787e-05, "loss": 0.1621, "step": 7108 }, { "epoch": 0.41, "grad_norm": 0.3783695100880318, "learning_rate": 1.338384215151476e-05, "loss": 0.2813, "step": 7109 }, { "epoch": 0.41, "grad_norm": 0.6041099103124075, "learning_rate": 1.3382090959222425e-05, "loss": 0.3282, "step": 7110 }, { "epoch": 0.41, "grad_norm": 1.1035000418322451, "learning_rate": 1.3380339649808425e-05, "loss": 0.4968, "step": 7111 }, { "epoch": 0.41, "grad_norm": 0.4884716979614059, "learning_rate": 1.337858822333341e-05, "loss": 0.3715, "step": 7112 }, { "epoch": 0.41, "grad_norm": 0.3107210491119011, "learning_rate": 1.3376836679858026e-05, "loss": 0.2974, "step": 7113 }, { "epoch": 0.41, "grad_norm": 0.22911849054150063, "learning_rate": 1.3375085019442937e-05, "loss": 0.1643, "step": 7114 }, { "epoch": 0.41, "grad_norm": 0.5691552869077963, "learning_rate": 1.3373333242148796e-05, "loss": 0.3392, "step": 7115 }, { "epoch": 0.41, "grad_norm": 0.49816342959997756, "learning_rate": 1.337158134803627e-05, "loss": 0.3428, "step": 7116 }, { "epoch": 0.41, "grad_norm": 0.4375022231297627, "learning_rate": 1.3369829337166031e-05, "loss": 0.3051, "step": 7117 }, { "epoch": 0.41, "grad_norm": 0.5267758958092926, "learning_rate": 1.3368077209598744e-05, "loss": 0.3613, "step": 7118 }, { "epoch": 0.41, "grad_norm": 0.3807706588481771, "learning_rate": 1.3366324965395088e-05, "loss": 0.3324, "step": 7119 }, { "epoch": 0.41, "grad_norm": 0.23055407507354644, "learning_rate": 1.3364572604615744e-05, "loss": 0.156, "step": 7120 }, { "epoch": 0.41, "grad_norm": 0.47250351493960807, "learning_rate": 1.3362820127321391e-05, "loss": 0.3263, "step": 7121 }, { "epoch": 0.41, "grad_norm": 0.40387974042821045, "learning_rate": 1.3361067533572726e-05, "loss": 0.2879, "step": 7122 }, { "epoch": 0.41, "grad_norm": 0.6065089402375893, "learning_rate": 1.3359314823430436e-05, "loss": 0.5049, "step": 7123 }, { "epoch": 0.41, "grad_norm": 0.48384521210853454, "learning_rate": 1.335756199695522e-05, "loss": 0.3052, "step": 7124 }, { "epoch": 0.41, "grad_norm": 0.30084930743731136, "learning_rate": 1.3355809054207774e-05, "loss": 0.2649, "step": 7125 }, { "epoch": 0.41, "grad_norm": 0.9024344911983133, "learning_rate": 1.3354055995248805e-05, "loss": 0.6037, "step": 7126 }, { "epoch": 0.41, "grad_norm": 0.19286218457362345, "learning_rate": 1.335230282013902e-05, "loss": 0.1028, "step": 7127 }, { "epoch": 0.41, "grad_norm": 0.3553929943930889, "learning_rate": 1.3350549528939135e-05, "loss": 0.3059, "step": 7128 }, { "epoch": 0.41, "grad_norm": 0.3556388786278789, "learning_rate": 1.3348796121709862e-05, "loss": 0.3285, "step": 7129 }, { "epoch": 0.41, "grad_norm": 0.6547730919119816, "learning_rate": 1.3347042598511926e-05, "loss": 0.2819, "step": 7130 }, { "epoch": 0.41, "grad_norm": 0.36796600659289685, "learning_rate": 1.3345288959406045e-05, "loss": 0.2986, "step": 7131 }, { "epoch": 0.41, "grad_norm": 0.37729407521418595, "learning_rate": 1.3343535204452953e-05, "loss": 0.224, "step": 7132 }, { "epoch": 0.41, "grad_norm": 0.2750629669350354, "learning_rate": 1.3341781333713381e-05, "loss": 0.1908, "step": 7133 }, { "epoch": 0.41, "grad_norm": 0.44690214839091336, "learning_rate": 1.3340027347248068e-05, "loss": 0.3296, "step": 7134 }, { "epoch": 0.41, "grad_norm": 1.1545043346285377, "learning_rate": 1.3338273245117745e-05, "loss": 0.4603, "step": 7135 }, { "epoch": 0.41, "grad_norm": 0.40566944235161556, "learning_rate": 1.3336519027383168e-05, "loss": 0.3405, "step": 7136 }, { "epoch": 0.41, "grad_norm": 0.31670328181132457, "learning_rate": 1.3334764694105079e-05, "loss": 0.2335, "step": 7137 }, { "epoch": 0.41, "grad_norm": 0.8579819449562148, "learning_rate": 1.3333010245344232e-05, "loss": 0.5027, "step": 7138 }, { "epoch": 0.41, "grad_norm": 0.30341933853948494, "learning_rate": 1.3331255681161386e-05, "loss": 0.17, "step": 7139 }, { "epoch": 0.41, "grad_norm": 0.38527026955972754, "learning_rate": 1.3329501001617294e-05, "loss": 0.2346, "step": 7140 }, { "epoch": 0.41, "grad_norm": 0.5092949576398142, "learning_rate": 1.332774620677273e-05, "loss": 0.3329, "step": 7141 }, { "epoch": 0.41, "grad_norm": 1.7811634793100883, "learning_rate": 1.3325991296688455e-05, "loss": 0.7817, "step": 7142 }, { "epoch": 0.41, "grad_norm": 0.3345933775300051, "learning_rate": 1.3324236271425245e-05, "loss": 0.2055, "step": 7143 }, { "epoch": 0.41, "grad_norm": 1.1826649808614909, "learning_rate": 1.3322481131043876e-05, "loss": 0.6385, "step": 7144 }, { "epoch": 0.41, "grad_norm": 0.25436360208374936, "learning_rate": 1.332072587560513e-05, "loss": 0.2162, "step": 7145 }, { "epoch": 0.41, "grad_norm": 0.3245351746881674, "learning_rate": 1.3318970505169786e-05, "loss": 0.2445, "step": 7146 }, { "epoch": 0.41, "grad_norm": 0.7222227073899474, "learning_rate": 1.3317215019798639e-05, "loss": 0.497, "step": 7147 }, { "epoch": 0.41, "grad_norm": 0.3898074555572452, "learning_rate": 1.3315459419552477e-05, "loss": 0.3174, "step": 7148 }, { "epoch": 0.41, "grad_norm": 0.357549111885812, "learning_rate": 1.33137037044921e-05, "loss": 0.2622, "step": 7149 }, { "epoch": 0.41, "grad_norm": 0.8507678942147663, "learning_rate": 1.3311947874678306e-05, "loss": 0.2878, "step": 7150 }, { "epoch": 0.41, "grad_norm": 0.33806956641060104, "learning_rate": 1.3310191930171898e-05, "loss": 0.2648, "step": 7151 }, { "epoch": 0.41, "grad_norm": 0.41027889521502914, "learning_rate": 1.3308435871033687e-05, "loss": 0.2778, "step": 7152 }, { "epoch": 0.41, "grad_norm": 0.3561677104384658, "learning_rate": 1.3306679697324485e-05, "loss": 0.3, "step": 7153 }, { "epoch": 0.41, "grad_norm": 0.4096178462824716, "learning_rate": 1.3304923409105104e-05, "loss": 0.2723, "step": 7154 }, { "epoch": 0.41, "grad_norm": 0.38934669547306056, "learning_rate": 1.3303167006436371e-05, "loss": 0.278, "step": 7155 }, { "epoch": 0.41, "grad_norm": 0.37412796658975134, "learning_rate": 1.3301410489379103e-05, "loss": 0.2778, "step": 7156 }, { "epoch": 0.41, "grad_norm": 0.7818448980631783, "learning_rate": 1.3299653857994135e-05, "loss": 0.4927, "step": 7157 }, { "epoch": 0.41, "grad_norm": 0.3268093806978687, "learning_rate": 1.3297897112342294e-05, "loss": 0.2912, "step": 7158 }, { "epoch": 0.41, "grad_norm": 0.686057582809523, "learning_rate": 1.3296140252484417e-05, "loss": 0.419, "step": 7159 }, { "epoch": 0.41, "grad_norm": 0.2794205927146474, "learning_rate": 1.3294383278481346e-05, "loss": 0.2731, "step": 7160 }, { "epoch": 0.41, "grad_norm": 0.306089163013698, "learning_rate": 1.3292626190393923e-05, "loss": 0.2232, "step": 7161 }, { "epoch": 0.41, "grad_norm": 1.0595231500511526, "learning_rate": 1.3290868988282999e-05, "loss": 0.5485, "step": 7162 }, { "epoch": 0.41, "grad_norm": 0.7733042625073977, "learning_rate": 1.328911167220942e-05, "loss": 0.3675, "step": 7163 }, { "epoch": 0.41, "grad_norm": 0.3460305444373183, "learning_rate": 1.3287354242234047e-05, "loss": 0.2629, "step": 7164 }, { "epoch": 0.41, "grad_norm": 0.3792220485449821, "learning_rate": 1.3285596698417738e-05, "loss": 0.3372, "step": 7165 }, { "epoch": 0.41, "grad_norm": 0.2128910770084968, "learning_rate": 1.3283839040821355e-05, "loss": 0.1166, "step": 7166 }, { "epoch": 0.41, "grad_norm": 0.35463706912168896, "learning_rate": 1.3282081269505771e-05, "loss": 0.2832, "step": 7167 }, { "epoch": 0.41, "grad_norm": 0.5153840336512678, "learning_rate": 1.3280323384531852e-05, "loss": 0.3804, "step": 7168 }, { "epoch": 0.41, "grad_norm": 0.5089755863319722, "learning_rate": 1.3278565385960476e-05, "loss": 0.3391, "step": 7169 }, { "epoch": 0.41, "grad_norm": 0.4031836019221641, "learning_rate": 1.3276807273852522e-05, "loss": 0.2773, "step": 7170 }, { "epoch": 0.41, "grad_norm": 0.48049808170008806, "learning_rate": 1.3275049048268869e-05, "loss": 0.4095, "step": 7171 }, { "epoch": 0.41, "grad_norm": 0.2558281539455274, "learning_rate": 1.327329070927041e-05, "loss": 0.1998, "step": 7172 }, { "epoch": 0.41, "grad_norm": 0.38630900592037487, "learning_rate": 1.3271532256918036e-05, "loss": 0.295, "step": 7173 }, { "epoch": 0.41, "grad_norm": 0.39008618040812376, "learning_rate": 1.326977369127264e-05, "loss": 0.2581, "step": 7174 }, { "epoch": 0.41, "grad_norm": 0.6205933584816866, "learning_rate": 1.326801501239512e-05, "loss": 0.4402, "step": 7175 }, { "epoch": 0.41, "grad_norm": 0.31882272613528434, "learning_rate": 1.3266256220346383e-05, "loss": 0.1977, "step": 7176 }, { "epoch": 0.41, "grad_norm": 0.4407270029368251, "learning_rate": 1.3264497315187334e-05, "loss": 0.3436, "step": 7177 }, { "epoch": 0.41, "grad_norm": 0.38673637353069634, "learning_rate": 1.326273829697888e-05, "loss": 0.244, "step": 7178 }, { "epoch": 0.41, "grad_norm": 0.27113858546133, "learning_rate": 1.3260979165781942e-05, "loss": 0.1703, "step": 7179 }, { "epoch": 0.41, "grad_norm": 0.4419932070737161, "learning_rate": 1.3259219921657436e-05, "loss": 0.3672, "step": 7180 }, { "epoch": 0.41, "grad_norm": 0.45245896039209665, "learning_rate": 1.3257460564666283e-05, "loss": 0.3697, "step": 7181 }, { "epoch": 0.41, "grad_norm": 0.32808870623561165, "learning_rate": 1.3255701094869408e-05, "loss": 0.1927, "step": 7182 }, { "epoch": 0.41, "grad_norm": 1.1927858978015542, "learning_rate": 1.325394151232775e-05, "loss": 0.7927, "step": 7183 }, { "epoch": 0.41, "grad_norm": 0.4014879560091093, "learning_rate": 1.3252181817102235e-05, "loss": 0.3279, "step": 7184 }, { "epoch": 0.41, "grad_norm": 0.23244466150471954, "learning_rate": 1.3250422009253802e-05, "loss": 0.1628, "step": 7185 }, { "epoch": 0.41, "grad_norm": 0.4222245281834194, "learning_rate": 1.3248662088843395e-05, "loss": 0.2875, "step": 7186 }, { "epoch": 0.41, "grad_norm": 0.6467322169281083, "learning_rate": 1.3246902055931961e-05, "loss": 0.4428, "step": 7187 }, { "epoch": 0.41, "grad_norm": 0.38186519736687974, "learning_rate": 1.3245141910580446e-05, "loss": 0.3217, "step": 7188 }, { "epoch": 0.41, "grad_norm": 0.3362029065075712, "learning_rate": 1.324338165284981e-05, "loss": 0.2656, "step": 7189 }, { "epoch": 0.41, "grad_norm": 0.40943953574665665, "learning_rate": 1.3241621282801002e-05, "loss": 0.267, "step": 7190 }, { "epoch": 0.41, "grad_norm": 0.4200553653950379, "learning_rate": 1.3239860800494993e-05, "loss": 0.3042, "step": 7191 }, { "epoch": 0.41, "grad_norm": 0.4031697492165725, "learning_rate": 1.3238100205992739e-05, "loss": 0.286, "step": 7192 }, { "epoch": 0.41, "grad_norm": 0.4600727603852913, "learning_rate": 1.3236339499355217e-05, "loss": 0.3152, "step": 7193 }, { "epoch": 0.41, "grad_norm": 0.3986539572582897, "learning_rate": 1.3234578680643394e-05, "loss": 0.3215, "step": 7194 }, { "epoch": 0.41, "grad_norm": 0.4169919148480683, "learning_rate": 1.3232817749918256e-05, "loss": 0.3092, "step": 7195 }, { "epoch": 0.41, "grad_norm": 0.3654175654707768, "learning_rate": 1.3231056707240775e-05, "loss": 0.3141, "step": 7196 }, { "epoch": 0.41, "grad_norm": 0.36406119272639237, "learning_rate": 1.322929555267194e-05, "loss": 0.3191, "step": 7197 }, { "epoch": 0.41, "grad_norm": 0.31941887730802226, "learning_rate": 1.3227534286272741e-05, "loss": 0.2926, "step": 7198 }, { "epoch": 0.41, "grad_norm": 0.2620408151493418, "learning_rate": 1.3225772908104165e-05, "loss": 0.0687, "step": 7199 }, { "epoch": 0.41, "grad_norm": 0.30135243734827055, "learning_rate": 1.3224011418227215e-05, "loss": 0.268, "step": 7200 }, { "epoch": 0.41, "grad_norm": 0.5366585174326642, "learning_rate": 1.3222249816702885e-05, "loss": 0.3695, "step": 7201 }, { "epoch": 0.41, "grad_norm": 0.590033821849479, "learning_rate": 1.3220488103592184e-05, "loss": 0.381, "step": 7202 }, { "epoch": 0.41, "grad_norm": 0.3385399492527422, "learning_rate": 1.3218726278956117e-05, "loss": 0.2911, "step": 7203 }, { "epoch": 0.41, "grad_norm": 0.39860811495487514, "learning_rate": 1.32169643428557e-05, "loss": 0.393, "step": 7204 }, { "epoch": 0.41, "grad_norm": 0.25190821405668745, "learning_rate": 1.3215202295351946e-05, "loss": 0.1631, "step": 7205 }, { "epoch": 0.41, "grad_norm": 0.5825783438242356, "learning_rate": 1.3213440136505872e-05, "loss": 0.3373, "step": 7206 }, { "epoch": 0.41, "grad_norm": 0.46201854347371046, "learning_rate": 1.3211677866378505e-05, "loss": 0.3525, "step": 7207 }, { "epoch": 0.41, "grad_norm": 0.33494447559061813, "learning_rate": 1.3209915485030872e-05, "loss": 0.2768, "step": 7208 }, { "epoch": 0.41, "grad_norm": 0.5209714849109807, "learning_rate": 1.3208152992524004e-05, "loss": 0.334, "step": 7209 }, { "epoch": 0.41, "grad_norm": 0.37421682429920466, "learning_rate": 1.3206390388918937e-05, "loss": 0.3097, "step": 7210 }, { "epoch": 0.41, "grad_norm": 0.30114301634692187, "learning_rate": 1.3204627674276706e-05, "loss": 0.2041, "step": 7211 }, { "epoch": 0.41, "grad_norm": 0.30929089763537454, "learning_rate": 1.320286484865836e-05, "loss": 0.2373, "step": 7212 }, { "epoch": 0.41, "grad_norm": 0.5349211331103826, "learning_rate": 1.3201101912124938e-05, "loss": 0.4244, "step": 7213 }, { "epoch": 0.41, "grad_norm": 0.7905658849528135, "learning_rate": 1.31993388647375e-05, "loss": 0.4311, "step": 7214 }, { "epoch": 0.41, "grad_norm": 0.36725135920394, "learning_rate": 1.3197575706557089e-05, "loss": 0.2345, "step": 7215 }, { "epoch": 0.41, "grad_norm": 0.32845897394315215, "learning_rate": 1.3195812437644771e-05, "loss": 0.3105, "step": 7216 }, { "epoch": 0.41, "grad_norm": 0.29800837631567817, "learning_rate": 1.3194049058061606e-05, "loss": 0.1937, "step": 7217 }, { "epoch": 0.41, "grad_norm": 0.3439065437611227, "learning_rate": 1.3192285567868662e-05, "loss": 0.2248, "step": 7218 }, { "epoch": 0.41, "grad_norm": 0.5481495321388145, "learning_rate": 1.3190521967127e-05, "loss": 0.425, "step": 7219 }, { "epoch": 0.41, "grad_norm": 0.5796551641203623, "learning_rate": 1.3188758255897705e-05, "loss": 0.3267, "step": 7220 }, { "epoch": 0.41, "grad_norm": 0.5415045151729231, "learning_rate": 1.3186994434241845e-05, "loss": 0.2433, "step": 7221 }, { "epoch": 0.41, "grad_norm": 0.5321969246900784, "learning_rate": 1.3185230502220508e-05, "loss": 0.3406, "step": 7222 }, { "epoch": 0.41, "grad_norm": 0.2422636781051529, "learning_rate": 1.3183466459894774e-05, "loss": 0.2162, "step": 7223 }, { "epoch": 0.42, "grad_norm": 0.4012142713831807, "learning_rate": 1.3181702307325732e-05, "loss": 0.3343, "step": 7224 }, { "epoch": 0.42, "grad_norm": 0.4794094927724806, "learning_rate": 1.3179938044574478e-05, "loss": 0.2723, "step": 7225 }, { "epoch": 0.42, "grad_norm": 0.9303568882746546, "learning_rate": 1.3178173671702106e-05, "loss": 0.4344, "step": 7226 }, { "epoch": 0.42, "grad_norm": 0.6859661533697369, "learning_rate": 1.3176409188769715e-05, "loss": 0.4222, "step": 7227 }, { "epoch": 0.42, "grad_norm": 0.27217942492278485, "learning_rate": 1.3174644595838411e-05, "loss": 0.2338, "step": 7228 }, { "epoch": 0.42, "grad_norm": 0.3033095269716523, "learning_rate": 1.3172879892969302e-05, "loss": 0.1795, "step": 7229 }, { "epoch": 0.42, "grad_norm": 0.7835735876374561, "learning_rate": 1.3171115080223498e-05, "loss": 0.4627, "step": 7230 }, { "epoch": 0.42, "grad_norm": 0.6595555477893355, "learning_rate": 1.3169350157662115e-05, "loss": 0.2827, "step": 7231 }, { "epoch": 0.42, "grad_norm": 0.45468179409236015, "learning_rate": 1.3167585125346271e-05, "loss": 0.3494, "step": 7232 }, { "epoch": 0.42, "grad_norm": 0.6287105703435774, "learning_rate": 1.3165819983337093e-05, "loss": 0.3983, "step": 7233 }, { "epoch": 0.42, "grad_norm": 0.4100430193967899, "learning_rate": 1.3164054731695706e-05, "loss": 0.2633, "step": 7234 }, { "epoch": 0.42, "grad_norm": 0.3711852345127576, "learning_rate": 1.3162289370483239e-05, "loss": 0.2647, "step": 7235 }, { "epoch": 0.42, "grad_norm": 0.37139285226115515, "learning_rate": 1.3160523899760824e-05, "loss": 0.3054, "step": 7236 }, { "epoch": 0.42, "grad_norm": 0.4224536498097396, "learning_rate": 1.3158758319589604e-05, "loss": 0.3383, "step": 7237 }, { "epoch": 0.42, "grad_norm": 0.5765797692400455, "learning_rate": 1.3156992630030719e-05, "loss": 0.2837, "step": 7238 }, { "epoch": 0.42, "grad_norm": 0.38557712218290724, "learning_rate": 1.3155226831145316e-05, "loss": 0.2873, "step": 7239 }, { "epoch": 0.42, "grad_norm": 0.31621584525691937, "learning_rate": 1.3153460922994543e-05, "loss": 0.2885, "step": 7240 }, { "epoch": 0.42, "grad_norm": 0.29877105273955934, "learning_rate": 1.3151694905639553e-05, "loss": 0.164, "step": 7241 }, { "epoch": 0.42, "grad_norm": 0.5334188626320423, "learning_rate": 1.3149928779141506e-05, "loss": 0.4122, "step": 7242 }, { "epoch": 0.42, "grad_norm": 0.3828319018088226, "learning_rate": 1.3148162543561557e-05, "loss": 0.3108, "step": 7243 }, { "epoch": 0.42, "grad_norm": 0.27405350590100247, "learning_rate": 1.3146396198960881e-05, "loss": 0.2322, "step": 7244 }, { "epoch": 0.42, "grad_norm": 0.7653235271564569, "learning_rate": 1.3144629745400632e-05, "loss": 0.3892, "step": 7245 }, { "epoch": 0.42, "grad_norm": 0.40187932943275734, "learning_rate": 1.3142863182941996e-05, "loss": 0.2975, "step": 7246 }, { "epoch": 0.42, "grad_norm": 0.3764866541385056, "learning_rate": 1.3141096511646141e-05, "loss": 0.256, "step": 7247 }, { "epoch": 0.42, "grad_norm": 0.5491287247419514, "learning_rate": 1.3139329731574248e-05, "loss": 0.402, "step": 7248 }, { "epoch": 0.42, "grad_norm": 0.33817869550264207, "learning_rate": 1.3137562842787502e-05, "loss": 0.314, "step": 7249 }, { "epoch": 0.42, "grad_norm": 1.1455881396931549, "learning_rate": 1.3135795845347091e-05, "loss": 0.786, "step": 7250 }, { "epoch": 0.42, "grad_norm": 0.260305077900469, "learning_rate": 1.3134028739314204e-05, "loss": 0.1759, "step": 7251 }, { "epoch": 0.42, "grad_norm": 0.3841956692601113, "learning_rate": 1.3132261524750038e-05, "loss": 0.2806, "step": 7252 }, { "epoch": 0.42, "grad_norm": 1.0728971327464343, "learning_rate": 1.3130494201715786e-05, "loss": 0.5737, "step": 7253 }, { "epoch": 0.42, "grad_norm": 0.4996657504738954, "learning_rate": 1.312872677027266e-05, "loss": 0.2811, "step": 7254 }, { "epoch": 0.42, "grad_norm": 0.4259459115097978, "learning_rate": 1.3126959230481855e-05, "loss": 0.3429, "step": 7255 }, { "epoch": 0.42, "grad_norm": 0.3836148834285065, "learning_rate": 1.312519158240459e-05, "loss": 0.3439, "step": 7256 }, { "epoch": 0.42, "grad_norm": 0.195603144277449, "learning_rate": 1.3123423826102074e-05, "loss": 0.1038, "step": 7257 }, { "epoch": 0.42, "grad_norm": 0.3946116029677441, "learning_rate": 1.3121655961635523e-05, "loss": 0.3305, "step": 7258 }, { "epoch": 0.42, "grad_norm": 0.429020225728886, "learning_rate": 1.311988798906616e-05, "loss": 0.3417, "step": 7259 }, { "epoch": 0.42, "grad_norm": 0.43981050593951315, "learning_rate": 1.3118119908455214e-05, "loss": 0.2962, "step": 7260 }, { "epoch": 0.42, "grad_norm": 0.36302632134687873, "learning_rate": 1.3116351719863906e-05, "loss": 0.297, "step": 7261 }, { "epoch": 0.42, "grad_norm": 0.44066367931176303, "learning_rate": 1.3114583423353476e-05, "loss": 0.3472, "step": 7262 }, { "epoch": 0.42, "grad_norm": 0.301064575674817, "learning_rate": 1.3112815018985154e-05, "loss": 0.2503, "step": 7263 }, { "epoch": 0.42, "grad_norm": 0.31585777990855474, "learning_rate": 1.311104650682018e-05, "loss": 0.2119, "step": 7264 }, { "epoch": 0.42, "grad_norm": 0.7170533675281239, "learning_rate": 1.3109277886919802e-05, "loss": 0.4184, "step": 7265 }, { "epoch": 0.42, "grad_norm": 0.7914186842307775, "learning_rate": 1.3107509159345262e-05, "loss": 0.5291, "step": 7266 }, { "epoch": 0.42, "grad_norm": 0.267323184964963, "learning_rate": 1.3105740324157817e-05, "loss": 0.2167, "step": 7267 }, { "epoch": 0.42, "grad_norm": 0.4627191275190162, "learning_rate": 1.3103971381418713e-05, "loss": 0.3843, "step": 7268 }, { "epoch": 0.42, "grad_norm": 0.2892535892426708, "learning_rate": 1.310220233118922e-05, "loss": 0.1798, "step": 7269 }, { "epoch": 0.42, "grad_norm": 0.42638022005244247, "learning_rate": 1.3100433173530589e-05, "loss": 0.222, "step": 7270 }, { "epoch": 0.42, "grad_norm": 0.6288156366173488, "learning_rate": 1.3098663908504091e-05, "loss": 0.3793, "step": 7271 }, { "epoch": 0.42, "grad_norm": 0.5008266918389817, "learning_rate": 1.3096894536170994e-05, "loss": 0.3658, "step": 7272 }, { "epoch": 0.42, "grad_norm": 0.34497131150322446, "learning_rate": 1.3095125056592575e-05, "loss": 0.2732, "step": 7273 }, { "epoch": 0.42, "grad_norm": 0.9667430455753272, "learning_rate": 1.3093355469830107e-05, "loss": 0.5544, "step": 7274 }, { "epoch": 0.42, "grad_norm": 0.2635365400899789, "learning_rate": 1.3091585775944873e-05, "loss": 0.2102, "step": 7275 }, { "epoch": 0.42, "grad_norm": 0.3788616946807546, "learning_rate": 1.3089815974998154e-05, "loss": 0.275, "step": 7276 }, { "epoch": 0.42, "grad_norm": 0.897185868184525, "learning_rate": 1.3088046067051243e-05, "loss": 0.3707, "step": 7277 }, { "epoch": 0.42, "grad_norm": 0.9592855330664415, "learning_rate": 1.308627605216543e-05, "loss": 0.624, "step": 7278 }, { "epoch": 0.42, "grad_norm": 0.347671499106493, "learning_rate": 1.308450593040201e-05, "loss": 0.2543, "step": 7279 }, { "epoch": 0.42, "grad_norm": 0.4070033059546702, "learning_rate": 1.3082735701822281e-05, "loss": 0.2806, "step": 7280 }, { "epoch": 0.42, "grad_norm": 0.30304605216105446, "learning_rate": 1.3080965366487548e-05, "loss": 0.1902, "step": 7281 }, { "epoch": 0.42, "grad_norm": 0.45872537263094, "learning_rate": 1.3079194924459118e-05, "loss": 0.2992, "step": 7282 }, { "epoch": 0.42, "grad_norm": 0.5951235637057674, "learning_rate": 1.3077424375798295e-05, "loss": 0.2988, "step": 7283 }, { "epoch": 0.42, "grad_norm": 1.182568248414101, "learning_rate": 1.3075653720566404e-05, "loss": 0.7021, "step": 7284 }, { "epoch": 0.42, "grad_norm": 0.34373368560518014, "learning_rate": 1.3073882958824755e-05, "loss": 0.2923, "step": 7285 }, { "epoch": 0.42, "grad_norm": 1.157375136004901, "learning_rate": 1.307211209063467e-05, "loss": 0.7641, "step": 7286 }, { "epoch": 0.42, "grad_norm": 0.2623129816600877, "learning_rate": 1.3070341116057476e-05, "loss": 0.2098, "step": 7287 }, { "epoch": 0.42, "grad_norm": 0.3824276930333361, "learning_rate": 1.3068570035154503e-05, "loss": 0.2981, "step": 7288 }, { "epoch": 0.42, "grad_norm": 0.4109022486432033, "learning_rate": 1.306679884798708e-05, "loss": 0.2912, "step": 7289 }, { "epoch": 0.42, "grad_norm": 0.5582932916423002, "learning_rate": 1.3065027554616547e-05, "loss": 0.302, "step": 7290 }, { "epoch": 0.42, "grad_norm": 0.34401297473123493, "learning_rate": 1.3063256155104239e-05, "loss": 0.2613, "step": 7291 }, { "epoch": 0.42, "grad_norm": 0.5353972926544106, "learning_rate": 1.3061484649511503e-05, "loss": 0.4175, "step": 7292 }, { "epoch": 0.42, "grad_norm": 0.5389049209563237, "learning_rate": 1.3059713037899683e-05, "loss": 0.3414, "step": 7293 }, { "epoch": 0.42, "grad_norm": 0.40682939769048926, "learning_rate": 1.3057941320330134e-05, "loss": 0.2576, "step": 7294 }, { "epoch": 0.42, "grad_norm": 0.270271026585561, "learning_rate": 1.3056169496864208e-05, "loss": 0.2742, "step": 7295 }, { "epoch": 0.42, "grad_norm": 0.28083438824338586, "learning_rate": 1.3054397567563266e-05, "loss": 0.134, "step": 7296 }, { "epoch": 0.42, "grad_norm": 0.3797228091663637, "learning_rate": 1.3052625532488663e-05, "loss": 0.2747, "step": 7297 }, { "epoch": 0.42, "grad_norm": 0.8666369485609857, "learning_rate": 1.3050853391701774e-05, "loss": 0.6257, "step": 7298 }, { "epoch": 0.42, "grad_norm": 0.3821053180321841, "learning_rate": 1.304908114526396e-05, "loss": 0.3131, "step": 7299 }, { "epoch": 0.42, "grad_norm": 0.327705833281437, "learning_rate": 1.3047308793236599e-05, "loss": 0.2423, "step": 7300 }, { "epoch": 0.42, "grad_norm": 0.32553038184470945, "learning_rate": 1.3045536335681064e-05, "loss": 0.2019, "step": 7301 }, { "epoch": 0.42, "grad_norm": 1.0963164215457855, "learning_rate": 1.3043763772658739e-05, "loss": 0.7163, "step": 7302 }, { "epoch": 0.42, "grad_norm": 0.2779405820004573, "learning_rate": 1.3041991104231004e-05, "loss": 0.2391, "step": 7303 }, { "epoch": 0.42, "grad_norm": 1.1690442501518032, "learning_rate": 1.3040218330459249e-05, "loss": 0.795, "step": 7304 }, { "epoch": 0.42, "grad_norm": 0.6931092185091348, "learning_rate": 1.3038445451404862e-05, "loss": 0.4273, "step": 7305 }, { "epoch": 0.42, "grad_norm": 0.33132146745020336, "learning_rate": 1.3036672467129241e-05, "loss": 0.2127, "step": 7306 }, { "epoch": 0.42, "grad_norm": 0.3629514950779212, "learning_rate": 1.3034899377693782e-05, "loss": 0.313, "step": 7307 }, { "epoch": 0.42, "grad_norm": 0.297556760270983, "learning_rate": 1.3033126183159887e-05, "loss": 0.2063, "step": 7308 }, { "epoch": 0.42, "grad_norm": 0.2941708532876371, "learning_rate": 1.3031352883588965e-05, "loss": 0.2085, "step": 7309 }, { "epoch": 0.42, "grad_norm": 0.7852519937723851, "learning_rate": 1.3029579479042423e-05, "loss": 0.5261, "step": 7310 }, { "epoch": 0.42, "grad_norm": 0.4725984968530518, "learning_rate": 1.3027805969581674e-05, "loss": 0.327, "step": 7311 }, { "epoch": 0.42, "grad_norm": 0.758050405985813, "learning_rate": 1.3026032355268132e-05, "loss": 0.3737, "step": 7312 }, { "epoch": 0.42, "grad_norm": 0.27727090117219044, "learning_rate": 1.3024258636163221e-05, "loss": 0.2148, "step": 7313 }, { "epoch": 0.42, "grad_norm": 0.27413178777534775, "learning_rate": 1.3022484812328365e-05, "loss": 0.2706, "step": 7314 }, { "epoch": 0.42, "grad_norm": 0.5412617681591296, "learning_rate": 1.3020710883824987e-05, "loss": 0.3523, "step": 7315 }, { "epoch": 0.42, "grad_norm": 0.45275149176644774, "learning_rate": 1.3018936850714524e-05, "loss": 0.2881, "step": 7316 }, { "epoch": 0.42, "grad_norm": 0.7923988739792034, "learning_rate": 1.3017162713058404e-05, "loss": 0.4677, "step": 7317 }, { "epoch": 0.42, "grad_norm": 0.39895495917725043, "learning_rate": 1.3015388470918072e-05, "loss": 0.2999, "step": 7318 }, { "epoch": 0.42, "grad_norm": 0.23533187055257196, "learning_rate": 1.3013614124354969e-05, "loss": 0.201, "step": 7319 }, { "epoch": 0.42, "grad_norm": 0.41061255560760335, "learning_rate": 1.3011839673430536e-05, "loss": 0.2942, "step": 7320 }, { "epoch": 0.42, "grad_norm": 0.3836948488403221, "learning_rate": 1.3010065118206223e-05, "loss": 0.3036, "step": 7321 }, { "epoch": 0.42, "grad_norm": 0.8039043999491806, "learning_rate": 1.3008290458743486e-05, "loss": 0.3215, "step": 7322 }, { "epoch": 0.42, "grad_norm": 0.3526267061237842, "learning_rate": 1.3006515695103779e-05, "loss": 0.3402, "step": 7323 }, { "epoch": 0.42, "grad_norm": 0.3874565419841915, "learning_rate": 1.3004740827348563e-05, "loss": 0.2971, "step": 7324 }, { "epoch": 0.42, "grad_norm": 0.4916981175451748, "learning_rate": 1.3002965855539303e-05, "loss": 0.3836, "step": 7325 }, { "epoch": 0.42, "grad_norm": 0.1963129197249394, "learning_rate": 1.300119077973746e-05, "loss": 0.1841, "step": 7326 }, { "epoch": 0.42, "grad_norm": 0.3374193053149697, "learning_rate": 1.2999415600004515e-05, "loss": 0.2758, "step": 7327 }, { "epoch": 0.42, "grad_norm": 0.9488092760590442, "learning_rate": 1.2997640316401934e-05, "loss": 0.4981, "step": 7328 }, { "epoch": 0.42, "grad_norm": 0.6829150533818018, "learning_rate": 1.2995864928991198e-05, "loss": 0.3648, "step": 7329 }, { "epoch": 0.42, "grad_norm": 0.5376235231735698, "learning_rate": 1.2994089437833788e-05, "loss": 0.3335, "step": 7330 }, { "epoch": 0.42, "grad_norm": 0.30612015196427483, "learning_rate": 1.2992313842991189e-05, "loss": 0.2881, "step": 7331 }, { "epoch": 0.42, "grad_norm": 0.35369907883631363, "learning_rate": 1.2990538144524894e-05, "loss": 0.2176, "step": 7332 }, { "epoch": 0.42, "grad_norm": 0.6509986938076566, "learning_rate": 1.2988762342496386e-05, "loss": 0.366, "step": 7333 }, { "epoch": 0.42, "grad_norm": 0.4988119608715147, "learning_rate": 1.298698643696717e-05, "loss": 0.3416, "step": 7334 }, { "epoch": 0.42, "grad_norm": 0.2772638591631809, "learning_rate": 1.2985210427998743e-05, "loss": 0.2226, "step": 7335 }, { "epoch": 0.42, "grad_norm": 0.3951269297273973, "learning_rate": 1.2983434315652606e-05, "loss": 0.2974, "step": 7336 }, { "epoch": 0.42, "grad_norm": 0.5102487142923199, "learning_rate": 1.2981658099990266e-05, "loss": 0.3945, "step": 7337 }, { "epoch": 0.42, "grad_norm": 0.4414157270716968, "learning_rate": 1.2979881781073235e-05, "loss": 0.3039, "step": 7338 }, { "epoch": 0.42, "grad_norm": 0.2909371552105041, "learning_rate": 1.2978105358963026e-05, "loss": 0.2469, "step": 7339 }, { "epoch": 0.42, "grad_norm": 0.5649634546078088, "learning_rate": 1.2976328833721157e-05, "loss": 0.3579, "step": 7340 }, { "epoch": 0.42, "grad_norm": 0.3848501314342395, "learning_rate": 1.2974552205409147e-05, "loss": 0.2769, "step": 7341 }, { "epoch": 0.42, "grad_norm": 0.3049961532656385, "learning_rate": 1.2972775474088524e-05, "loss": 0.2088, "step": 7342 }, { "epoch": 0.42, "grad_norm": 0.34617722088590974, "learning_rate": 1.297099863982081e-05, "loss": 0.3243, "step": 7343 }, { "epoch": 0.42, "grad_norm": 0.7259079907052219, "learning_rate": 1.2969221702667547e-05, "loss": 0.5144, "step": 7344 }, { "epoch": 0.42, "grad_norm": 0.38372525596694496, "learning_rate": 1.2967444662690261e-05, "loss": 0.2622, "step": 7345 }, { "epoch": 0.42, "grad_norm": 0.5209288626419487, "learning_rate": 1.2965667519950494e-05, "loss": 0.3831, "step": 7346 }, { "epoch": 0.42, "grad_norm": 0.23884674114338472, "learning_rate": 1.2963890274509789e-05, "loss": 0.2358, "step": 7347 }, { "epoch": 0.42, "grad_norm": 0.38743989121030176, "learning_rate": 1.2962112926429691e-05, "loss": 0.1331, "step": 7348 }, { "epoch": 0.42, "grad_norm": 0.43142029339243576, "learning_rate": 1.2960335475771748e-05, "loss": 0.3201, "step": 7349 }, { "epoch": 0.42, "grad_norm": 0.3556038132379808, "learning_rate": 1.2958557922597516e-05, "loss": 0.34, "step": 7350 }, { "epoch": 0.42, "grad_norm": 0.5629136553826217, "learning_rate": 1.2956780266968552e-05, "loss": 0.409, "step": 7351 }, { "epoch": 0.42, "grad_norm": 0.3375499747099566, "learning_rate": 1.2955002508946413e-05, "loss": 0.2461, "step": 7352 }, { "epoch": 0.42, "grad_norm": 0.26063028491401785, "learning_rate": 1.2953224648592664e-05, "loss": 0.1594, "step": 7353 }, { "epoch": 0.42, "grad_norm": 0.3286104317910206, "learning_rate": 1.2951446685968874e-05, "loss": 0.2815, "step": 7354 }, { "epoch": 0.42, "grad_norm": 0.33363688183935714, "learning_rate": 1.294966862113661e-05, "loss": 0.2217, "step": 7355 }, { "epoch": 0.42, "grad_norm": 0.6185356603885251, "learning_rate": 1.2947890454157448e-05, "loss": 0.4454, "step": 7356 }, { "epoch": 0.42, "grad_norm": 0.5052412376086751, "learning_rate": 1.294611218509297e-05, "loss": 0.3758, "step": 7357 }, { "epoch": 0.42, "grad_norm": 0.31425887157261073, "learning_rate": 1.2944333814004748e-05, "loss": 0.2567, "step": 7358 }, { "epoch": 0.42, "grad_norm": 0.3024510934501898, "learning_rate": 1.2942555340954377e-05, "loss": 0.2408, "step": 7359 }, { "epoch": 0.42, "grad_norm": 0.2721480122115268, "learning_rate": 1.294077676600344e-05, "loss": 0.1685, "step": 7360 }, { "epoch": 0.42, "grad_norm": 0.4334281254358977, "learning_rate": 1.293899808921353e-05, "loss": 0.3266, "step": 7361 }, { "epoch": 0.42, "grad_norm": 0.33199319188044935, "learning_rate": 1.2937219310646242e-05, "loss": 0.2896, "step": 7362 }, { "epoch": 0.42, "grad_norm": 0.5113244424261909, "learning_rate": 1.2935440430363177e-05, "loss": 0.3779, "step": 7363 }, { "epoch": 0.42, "grad_norm": 0.4415669571896982, "learning_rate": 1.2933661448425933e-05, "loss": 0.3187, "step": 7364 }, { "epoch": 0.42, "grad_norm": 0.21104509788054693, "learning_rate": 1.2931882364896125e-05, "loss": 0.1266, "step": 7365 }, { "epoch": 0.42, "grad_norm": 0.39429657285673275, "learning_rate": 1.2930103179835352e-05, "loss": 0.3014, "step": 7366 }, { "epoch": 0.42, "grad_norm": 0.34096887677103693, "learning_rate": 1.2928323893305233e-05, "loss": 0.2779, "step": 7367 }, { "epoch": 0.42, "grad_norm": 0.5398849722107039, "learning_rate": 1.2926544505367384e-05, "loss": 0.3547, "step": 7368 }, { "epoch": 0.42, "grad_norm": 0.7583480793502161, "learning_rate": 1.2924765016083427e-05, "loss": 0.5297, "step": 7369 }, { "epoch": 0.42, "grad_norm": 0.30692372439687576, "learning_rate": 1.2922985425514977e-05, "loss": 0.279, "step": 7370 }, { "epoch": 0.42, "grad_norm": 0.270890793342731, "learning_rate": 1.2921205733723672e-05, "loss": 0.1777, "step": 7371 }, { "epoch": 0.42, "grad_norm": 0.33009884553113095, "learning_rate": 1.2919425940771138e-05, "loss": 0.2584, "step": 7372 }, { "epoch": 0.42, "grad_norm": 0.3529582845492279, "learning_rate": 1.2917646046719007e-05, "loss": 0.304, "step": 7373 }, { "epoch": 0.42, "grad_norm": 0.4422830853360155, "learning_rate": 1.2915866051628923e-05, "loss": 0.3514, "step": 7374 }, { "epoch": 0.42, "grad_norm": 0.3936738090536721, "learning_rate": 1.291408595556252e-05, "loss": 0.258, "step": 7375 }, { "epoch": 0.42, "grad_norm": 0.3645622895731119, "learning_rate": 1.2912305758581444e-05, "loss": 0.3064, "step": 7376 }, { "epoch": 0.42, "grad_norm": 0.8760977353616266, "learning_rate": 1.2910525460747346e-05, "loss": 0.4878, "step": 7377 }, { "epoch": 0.42, "grad_norm": 0.1923060008162484, "learning_rate": 1.290874506212188e-05, "loss": 0.1669, "step": 7378 }, { "epoch": 0.42, "grad_norm": 0.402353938670631, "learning_rate": 1.2906964562766691e-05, "loss": 0.3496, "step": 7379 }, { "epoch": 0.42, "grad_norm": 0.7286714606166612, "learning_rate": 1.290518396274345e-05, "loss": 0.437, "step": 7380 }, { "epoch": 0.42, "grad_norm": 0.35226470412228356, "learning_rate": 1.290340326211381e-05, "loss": 0.2338, "step": 7381 }, { "epoch": 0.42, "grad_norm": 0.3621217534989928, "learning_rate": 1.290162246093944e-05, "loss": 0.2912, "step": 7382 }, { "epoch": 0.42, "grad_norm": 0.4861495391034015, "learning_rate": 1.289984155928201e-05, "loss": 0.3546, "step": 7383 }, { "epoch": 0.42, "grad_norm": 0.64343988125234, "learning_rate": 1.289806055720319e-05, "loss": 0.2483, "step": 7384 }, { "epoch": 0.42, "grad_norm": 0.3655898241983234, "learning_rate": 1.2896279454764659e-05, "loss": 0.2909, "step": 7385 }, { "epoch": 0.42, "grad_norm": 0.29398028827370154, "learning_rate": 1.28944982520281e-05, "loss": 0.2718, "step": 7386 }, { "epoch": 0.42, "grad_norm": 1.1800139635163578, "learning_rate": 1.2892716949055184e-05, "loss": 0.7545, "step": 7387 }, { "epoch": 0.42, "grad_norm": 0.3023709880759031, "learning_rate": 1.2890935545907608e-05, "loss": 0.2228, "step": 7388 }, { "epoch": 0.42, "grad_norm": 0.6965321600545177, "learning_rate": 1.2889154042647056e-05, "loss": 0.4642, "step": 7389 }, { "epoch": 0.42, "grad_norm": 0.35911214934551505, "learning_rate": 1.2887372439335224e-05, "loss": 0.3357, "step": 7390 }, { "epoch": 0.42, "grad_norm": 0.29662533744407216, "learning_rate": 1.2885590736033808e-05, "loss": 0.2187, "step": 7391 }, { "epoch": 0.42, "grad_norm": 0.27718955489176644, "learning_rate": 1.2883808932804512e-05, "loss": 0.1913, "step": 7392 }, { "epoch": 0.42, "grad_norm": 0.4086851848096156, "learning_rate": 1.2882027029709034e-05, "loss": 0.3497, "step": 7393 }, { "epoch": 0.42, "grad_norm": 0.3038815146416857, "learning_rate": 1.2880245026809085e-05, "loss": 0.2307, "step": 7394 }, { "epoch": 0.42, "grad_norm": 1.500569565176652, "learning_rate": 1.2878462924166374e-05, "loss": 0.661, "step": 7395 }, { "epoch": 0.42, "grad_norm": 0.6963263635658477, "learning_rate": 1.2876680721842616e-05, "loss": 0.4307, "step": 7396 }, { "epoch": 0.42, "grad_norm": 0.333813168785862, "learning_rate": 1.2874898419899528e-05, "loss": 0.2022, "step": 7397 }, { "epoch": 0.43, "grad_norm": 0.24787304670295343, "learning_rate": 1.287311601839883e-05, "loss": 0.2155, "step": 7398 }, { "epoch": 0.43, "grad_norm": 0.7433576356848729, "learning_rate": 1.2871333517402251e-05, "loss": 0.481, "step": 7399 }, { "epoch": 0.43, "grad_norm": 0.3939320064161549, "learning_rate": 1.2869550916971512e-05, "loss": 0.2881, "step": 7400 }, { "epoch": 0.43, "grad_norm": 0.8308971616875137, "learning_rate": 1.2867768217168353e-05, "loss": 0.3781, "step": 7401 }, { "epoch": 0.43, "grad_norm": 0.3497647575246814, "learning_rate": 1.28659854180545e-05, "loss": 0.3359, "step": 7402 }, { "epoch": 0.43, "grad_norm": 0.36233416373040706, "learning_rate": 1.2864202519691698e-05, "loss": 0.2455, "step": 7403 }, { "epoch": 0.43, "grad_norm": 0.21250382304746632, "learning_rate": 1.2862419522141684e-05, "loss": 0.1056, "step": 7404 }, { "epoch": 0.43, "grad_norm": 0.48055525319680503, "learning_rate": 1.2860636425466207e-05, "loss": 0.353, "step": 7405 }, { "epoch": 0.43, "grad_norm": 0.2929548307001326, "learning_rate": 1.285885322972701e-05, "loss": 0.267, "step": 7406 }, { "epoch": 0.43, "grad_norm": 0.9742707277009065, "learning_rate": 1.2857069934985851e-05, "loss": 0.3784, "step": 7407 }, { "epoch": 0.43, "grad_norm": 0.7845705775706029, "learning_rate": 1.2855286541304481e-05, "loss": 0.4938, "step": 7408 }, { "epoch": 0.43, "grad_norm": 0.3324160422402967, "learning_rate": 1.2853503048744664e-05, "loss": 0.2625, "step": 7409 }, { "epoch": 0.43, "grad_norm": 0.2891167340341053, "learning_rate": 1.2851719457368157e-05, "loss": 0.2068, "step": 7410 }, { "epoch": 0.43, "grad_norm": 0.42210199453028185, "learning_rate": 1.2849935767236729e-05, "loss": 0.3184, "step": 7411 }, { "epoch": 0.43, "grad_norm": 0.36308547586520445, "learning_rate": 1.284815197841215e-05, "loss": 0.3046, "step": 7412 }, { "epoch": 0.43, "grad_norm": 1.1643204041845043, "learning_rate": 1.2846368090956185e-05, "loss": 0.4728, "step": 7413 }, { "epoch": 0.43, "grad_norm": 0.31753775412680657, "learning_rate": 1.284458410493062e-05, "loss": 0.2662, "step": 7414 }, { "epoch": 0.43, "grad_norm": 0.38438890919644864, "learning_rate": 1.2842800020397226e-05, "loss": 0.2863, "step": 7415 }, { "epoch": 0.43, "grad_norm": 0.23075903001714582, "learning_rate": 1.2841015837417792e-05, "loss": 0.1904, "step": 7416 }, { "epoch": 0.43, "grad_norm": 0.3437999763100494, "learning_rate": 1.2839231556054101e-05, "loss": 0.2819, "step": 7417 }, { "epoch": 0.43, "grad_norm": 0.3856629904474758, "learning_rate": 1.2837447176367944e-05, "loss": 0.2964, "step": 7418 }, { "epoch": 0.43, "grad_norm": 0.6560835963344139, "learning_rate": 1.2835662698421112e-05, "loss": 0.3934, "step": 7419 }, { "epoch": 0.43, "grad_norm": 0.7348691624408498, "learning_rate": 1.2833878122275407e-05, "loss": 0.2447, "step": 7420 }, { "epoch": 0.43, "grad_norm": 0.34732153973656654, "learning_rate": 1.283209344799262e-05, "loss": 0.2776, "step": 7421 }, { "epoch": 0.43, "grad_norm": 0.29962271123904893, "learning_rate": 1.283030867563456e-05, "loss": 0.2691, "step": 7422 }, { "epoch": 0.43, "grad_norm": 0.7215084346836231, "learning_rate": 1.282852380526303e-05, "loss": 0.4093, "step": 7423 }, { "epoch": 0.43, "grad_norm": 0.3187780573476562, "learning_rate": 1.2826738836939844e-05, "loss": 0.2666, "step": 7424 }, { "epoch": 0.43, "grad_norm": 0.3051997295007057, "learning_rate": 1.2824953770726813e-05, "loss": 0.2971, "step": 7425 }, { "epoch": 0.43, "grad_norm": 0.5136628961032486, "learning_rate": 1.2823168606685756e-05, "loss": 0.3615, "step": 7426 }, { "epoch": 0.43, "grad_norm": 0.3726773739401942, "learning_rate": 1.2821383344878491e-05, "loss": 0.2014, "step": 7427 }, { "epoch": 0.43, "grad_norm": 1.1260492763383276, "learning_rate": 1.2819597985366843e-05, "loss": 0.5823, "step": 7428 }, { "epoch": 0.43, "grad_norm": 0.49804903503850456, "learning_rate": 1.2817812528212635e-05, "loss": 0.3953, "step": 7429 }, { "epoch": 0.43, "grad_norm": 0.278617679213449, "learning_rate": 1.2816026973477702e-05, "loss": 0.2334, "step": 7430 }, { "epoch": 0.43, "grad_norm": 0.8077408746603993, "learning_rate": 1.2814241321223876e-05, "loss": 0.5714, "step": 7431 }, { "epoch": 0.43, "grad_norm": 0.2938940132824784, "learning_rate": 1.2812455571512996e-05, "loss": 0.178, "step": 7432 }, { "epoch": 0.43, "grad_norm": 0.31693402945068383, "learning_rate": 1.28106697244069e-05, "loss": 0.1713, "step": 7433 }, { "epoch": 0.43, "grad_norm": 0.39056029624267147, "learning_rate": 1.2808883779967429e-05, "loss": 0.2973, "step": 7434 }, { "epoch": 0.43, "grad_norm": 0.8784300285705219, "learning_rate": 1.2807097738256436e-05, "loss": 0.4679, "step": 7435 }, { "epoch": 0.43, "grad_norm": 0.6096405649192743, "learning_rate": 1.2805311599335768e-05, "loss": 0.3627, "step": 7436 }, { "epoch": 0.43, "grad_norm": 0.280902071047559, "learning_rate": 1.2803525363267281e-05, "loss": 0.2223, "step": 7437 }, { "epoch": 0.43, "grad_norm": 0.33005696907181864, "learning_rate": 1.2801739030112833e-05, "loss": 0.2775, "step": 7438 }, { "epoch": 0.43, "grad_norm": 0.5559933672937305, "learning_rate": 1.279995259993428e-05, "loss": 0.3064, "step": 7439 }, { "epoch": 0.43, "grad_norm": 0.39420878524487984, "learning_rate": 1.279816607279349e-05, "loss": 0.2677, "step": 7440 }, { "epoch": 0.43, "grad_norm": 0.5102506758920938, "learning_rate": 1.279637944875233e-05, "loss": 0.3898, "step": 7441 }, { "epoch": 0.43, "grad_norm": 0.3100867254059578, "learning_rate": 1.2794592727872665e-05, "loss": 0.2904, "step": 7442 }, { "epoch": 0.43, "grad_norm": 0.2778272955079186, "learning_rate": 1.279280591021638e-05, "loss": 0.1926, "step": 7443 }, { "epoch": 0.43, "grad_norm": 0.3652081080576812, "learning_rate": 1.2791018995845343e-05, "loss": 0.2066, "step": 7444 }, { "epoch": 0.43, "grad_norm": 0.4003295351901793, "learning_rate": 1.278923198482144e-05, "loss": 0.2906, "step": 7445 }, { "epoch": 0.43, "grad_norm": 0.31584749571079734, "learning_rate": 1.2787444877206552e-05, "loss": 0.2526, "step": 7446 }, { "epoch": 0.43, "grad_norm": 0.6723729764280729, "learning_rate": 1.2785657673062567e-05, "loss": 0.4328, "step": 7447 }, { "epoch": 0.43, "grad_norm": 0.4069163041642835, "learning_rate": 1.2783870372451377e-05, "loss": 0.3099, "step": 7448 }, { "epoch": 0.43, "grad_norm": 0.5471645897491344, "learning_rate": 1.278208297543488e-05, "loss": 0.3764, "step": 7449 }, { "epoch": 0.43, "grad_norm": 0.22582651123557201, "learning_rate": 1.2780295482074965e-05, "loss": 0.1677, "step": 7450 }, { "epoch": 0.43, "grad_norm": 0.38504551814048305, "learning_rate": 1.2778507892433538e-05, "loss": 0.2699, "step": 7451 }, { "epoch": 0.43, "grad_norm": 0.5300421426970018, "learning_rate": 1.2776720206572502e-05, "loss": 0.3693, "step": 7452 }, { "epoch": 0.43, "grad_norm": 0.4316438122749434, "learning_rate": 1.2774932424553763e-05, "loss": 0.3068, "step": 7453 }, { "epoch": 0.43, "grad_norm": 0.6039224321850404, "learning_rate": 1.2773144546439235e-05, "loss": 0.3833, "step": 7454 }, { "epoch": 0.43, "grad_norm": 0.41397690083601735, "learning_rate": 1.2771356572290834e-05, "loss": 0.3263, "step": 7455 }, { "epoch": 0.43, "grad_norm": 0.22875703408665155, "learning_rate": 1.2769568502170469e-05, "loss": 0.1126, "step": 7456 }, { "epoch": 0.43, "grad_norm": 0.38290759108779704, "learning_rate": 1.276778033614007e-05, "loss": 0.2816, "step": 7457 }, { "epoch": 0.43, "grad_norm": 0.3645825413303027, "learning_rate": 1.2765992074261555e-05, "loss": 0.3286, "step": 7458 }, { "epoch": 0.43, "grad_norm": 0.7066287348903324, "learning_rate": 1.276420371659685e-05, "loss": 0.3722, "step": 7459 }, { "epoch": 0.43, "grad_norm": 0.6244111465506637, "learning_rate": 1.2762415263207895e-05, "loss": 0.3676, "step": 7460 }, { "epoch": 0.43, "grad_norm": 0.2867319890884178, "learning_rate": 1.2760626714156614e-05, "loss": 0.2733, "step": 7461 }, { "epoch": 0.43, "grad_norm": 0.37856860799168346, "learning_rate": 1.2758838069504952e-05, "loss": 0.2388, "step": 7462 }, { "epoch": 0.43, "grad_norm": 0.27983356612817223, "learning_rate": 1.2757049329314842e-05, "loss": 0.1823, "step": 7463 }, { "epoch": 0.43, "grad_norm": 0.675249528883432, "learning_rate": 1.2755260493648235e-05, "loss": 0.3907, "step": 7464 }, { "epoch": 0.43, "grad_norm": 0.3990296921748976, "learning_rate": 1.2753471562567074e-05, "loss": 0.3226, "step": 7465 }, { "epoch": 0.43, "grad_norm": 0.3446096915891851, "learning_rate": 1.2751682536133313e-05, "loss": 0.2236, "step": 7466 }, { "epoch": 0.43, "grad_norm": 0.5313072118472417, "learning_rate": 1.2749893414408903e-05, "loss": 0.3751, "step": 7467 }, { "epoch": 0.43, "grad_norm": 0.265255518955803, "learning_rate": 1.2748104197455804e-05, "loss": 0.1889, "step": 7468 }, { "epoch": 0.43, "grad_norm": 0.3086628494538964, "learning_rate": 1.274631488533597e-05, "loss": 0.235, "step": 7469 }, { "epoch": 0.43, "grad_norm": 0.3955694497758381, "learning_rate": 1.2744525478111379e-05, "loss": 0.3382, "step": 7470 }, { "epoch": 0.43, "grad_norm": 0.7593369964854633, "learning_rate": 1.274273597584398e-05, "loss": 0.5741, "step": 7471 }, { "epoch": 0.43, "grad_norm": 0.38222913352863014, "learning_rate": 1.2740946378595758e-05, "loss": 0.1657, "step": 7472 }, { "epoch": 0.43, "grad_norm": 0.3301246287937124, "learning_rate": 1.2739156686428675e-05, "loss": 0.2843, "step": 7473 }, { "epoch": 0.43, "grad_norm": 0.47079369177307934, "learning_rate": 1.2737366899404718e-05, "loss": 0.309, "step": 7474 }, { "epoch": 0.43, "grad_norm": 0.6375823621608645, "learning_rate": 1.273557701758586e-05, "loss": 0.4102, "step": 7475 }, { "epoch": 0.43, "grad_norm": 0.3015373234296537, "learning_rate": 1.2733787041034092e-05, "loss": 0.2052, "step": 7476 }, { "epoch": 0.43, "grad_norm": 0.3550705089146975, "learning_rate": 1.2731996969811393e-05, "loss": 0.2863, "step": 7477 }, { "epoch": 0.43, "grad_norm": 0.7378788393900801, "learning_rate": 1.2730206803979754e-05, "loss": 0.5696, "step": 7478 }, { "epoch": 0.43, "grad_norm": 0.3263003907992609, "learning_rate": 1.2728416543601177e-05, "loss": 0.2251, "step": 7479 }, { "epoch": 0.43, "grad_norm": 0.775695398769665, "learning_rate": 1.2726626188737647e-05, "loss": 0.4716, "step": 7480 }, { "epoch": 0.43, "grad_norm": 0.33048671823166975, "learning_rate": 1.2724835739451172e-05, "loss": 0.2793, "step": 7481 }, { "epoch": 0.43, "grad_norm": 0.2443173066345539, "learning_rate": 1.272304519580375e-05, "loss": 0.1975, "step": 7482 }, { "epoch": 0.43, "grad_norm": 0.37993338584071, "learning_rate": 1.2721254557857394e-05, "loss": 0.255, "step": 7483 }, { "epoch": 0.43, "grad_norm": 0.5994907663851344, "learning_rate": 1.2719463825674105e-05, "loss": 0.3759, "step": 7484 }, { "epoch": 0.43, "grad_norm": 0.35639175499384124, "learning_rate": 1.2717672999315904e-05, "loss": 0.2441, "step": 7485 }, { "epoch": 0.43, "grad_norm": 0.5549223599700548, "learning_rate": 1.2715882078844804e-05, "loss": 0.3907, "step": 7486 }, { "epoch": 0.43, "grad_norm": 0.5760563913885721, "learning_rate": 1.2714091064322824e-05, "loss": 0.3488, "step": 7487 }, { "epoch": 0.43, "grad_norm": 0.30595159376311326, "learning_rate": 1.2712299955811987e-05, "loss": 0.2508, "step": 7488 }, { "epoch": 0.43, "grad_norm": 0.2638652449536984, "learning_rate": 1.2710508753374317e-05, "loss": 0.2126, "step": 7489 }, { "epoch": 0.43, "grad_norm": 1.237358245192641, "learning_rate": 1.2708717457071849e-05, "loss": 0.7199, "step": 7490 }, { "epoch": 0.43, "grad_norm": 0.36818599307685185, "learning_rate": 1.270692606696661e-05, "loss": 0.2631, "step": 7491 }, { "epoch": 0.43, "grad_norm": 0.471986003217928, "learning_rate": 1.2705134583120638e-05, "loss": 0.3152, "step": 7492 }, { "epoch": 0.43, "grad_norm": 0.42344872331296374, "learning_rate": 1.2703343005595972e-05, "loss": 0.3291, "step": 7493 }, { "epoch": 0.43, "grad_norm": 0.36850684943447043, "learning_rate": 1.2701551334454652e-05, "loss": 0.2939, "step": 7494 }, { "epoch": 0.43, "grad_norm": 0.21811476150078793, "learning_rate": 1.2699759569758727e-05, "loss": 0.1028, "step": 7495 }, { "epoch": 0.43, "grad_norm": 0.6464980054789515, "learning_rate": 1.2697967711570243e-05, "loss": 0.4877, "step": 7496 }, { "epoch": 0.43, "grad_norm": 0.28044990754809157, "learning_rate": 1.2696175759951254e-05, "loss": 0.2786, "step": 7497 }, { "epoch": 0.43, "grad_norm": 0.6001584093412066, "learning_rate": 1.2694383714963818e-05, "loss": 0.3558, "step": 7498 }, { "epoch": 0.43, "grad_norm": 0.6795937624798115, "learning_rate": 1.2692591576669983e-05, "loss": 0.443, "step": 7499 }, { "epoch": 0.43, "grad_norm": 0.2836693554883349, "learning_rate": 1.2690799345131824e-05, "loss": 0.2405, "step": 7500 }, { "epoch": 0.43, "grad_norm": 0.3201123731313662, "learning_rate": 1.2689007020411394e-05, "loss": 0.2822, "step": 7501 }, { "epoch": 0.43, "grad_norm": 0.36880094607275504, "learning_rate": 1.268721460257077e-05, "loss": 0.2344, "step": 7502 }, { "epoch": 0.43, "grad_norm": 0.42415078679604923, "learning_rate": 1.2685422091672017e-05, "loss": 0.3176, "step": 7503 }, { "epoch": 0.43, "grad_norm": 1.0388651965272275, "learning_rate": 1.2683629487777219e-05, "loss": 0.4227, "step": 7504 }, { "epoch": 0.43, "grad_norm": 0.26138461355474946, "learning_rate": 1.268183679094844e-05, "loss": 0.2142, "step": 7505 }, { "epoch": 0.43, "grad_norm": 0.38880693292751656, "learning_rate": 1.2680044001247774e-05, "loss": 0.3062, "step": 7506 }, { "epoch": 0.43, "grad_norm": 0.28854957691060595, "learning_rate": 1.2678251118737293e-05, "loss": 0.2059, "step": 7507 }, { "epoch": 0.43, "grad_norm": 0.6409188766262047, "learning_rate": 1.2676458143479095e-05, "loss": 0.306, "step": 7508 }, { "epoch": 0.43, "grad_norm": 0.3106921649626693, "learning_rate": 1.2674665075535264e-05, "loss": 0.2657, "step": 7509 }, { "epoch": 0.43, "grad_norm": 0.9982593919162711, "learning_rate": 1.26728719149679e-05, "loss": 0.4393, "step": 7510 }, { "epoch": 0.43, "grad_norm": 1.0040110349618188, "learning_rate": 1.2671078661839093e-05, "loss": 0.2504, "step": 7511 }, { "epoch": 0.43, "grad_norm": 0.27760287035235526, "learning_rate": 1.2669285316210948e-05, "loss": 0.2207, "step": 7512 }, { "epoch": 0.43, "grad_norm": 0.33738457628735213, "learning_rate": 1.2667491878145568e-05, "loss": 0.2674, "step": 7513 }, { "epoch": 0.43, "grad_norm": 0.8728551700618518, "learning_rate": 1.266569834770506e-05, "loss": 0.4935, "step": 7514 }, { "epoch": 0.43, "grad_norm": 0.5237880393460295, "learning_rate": 1.266390472495153e-05, "loss": 0.2501, "step": 7515 }, { "epoch": 0.43, "grad_norm": 1.2321177960808543, "learning_rate": 1.2662111009947096e-05, "loss": 0.7564, "step": 7516 }, { "epoch": 0.43, "grad_norm": 0.3852958785860179, "learning_rate": 1.2660317202753873e-05, "loss": 0.308, "step": 7517 }, { "epoch": 0.43, "grad_norm": 0.2878776513060847, "learning_rate": 1.2658523303433979e-05, "loss": 0.2114, "step": 7518 }, { "epoch": 0.43, "grad_norm": 0.3866565714290425, "learning_rate": 1.2656729312049536e-05, "loss": 0.2787, "step": 7519 }, { "epoch": 0.43, "grad_norm": 0.44929076242454746, "learning_rate": 1.265493522866267e-05, "loss": 0.3211, "step": 7520 }, { "epoch": 0.43, "grad_norm": 0.31019924286079437, "learning_rate": 1.2653141053335513e-05, "loss": 0.2021, "step": 7521 }, { "epoch": 0.43, "grad_norm": 0.5257400746389835, "learning_rate": 1.2651346786130199e-05, "loss": 0.3338, "step": 7522 }, { "epoch": 0.43, "grad_norm": 0.7700473026585529, "learning_rate": 1.2649552427108856e-05, "loss": 0.4551, "step": 7523 }, { "epoch": 0.43, "grad_norm": 0.37836722990428473, "learning_rate": 1.2647757976333626e-05, "loss": 0.2697, "step": 7524 }, { "epoch": 0.43, "grad_norm": 0.40109303353556514, "learning_rate": 1.2645963433866653e-05, "loss": 0.2891, "step": 7525 }, { "epoch": 0.43, "grad_norm": 0.5801053269128108, "learning_rate": 1.2644168799770075e-05, "loss": 0.4479, "step": 7526 }, { "epoch": 0.43, "grad_norm": 0.3038245233952448, "learning_rate": 1.2642374074106052e-05, "loss": 0.2673, "step": 7527 }, { "epoch": 0.43, "grad_norm": 0.23955365444396562, "learning_rate": 1.2640579256936723e-05, "loss": 0.195, "step": 7528 }, { "epoch": 0.43, "grad_norm": 0.9894905671520695, "learning_rate": 1.263878434832425e-05, "loss": 0.5351, "step": 7529 }, { "epoch": 0.43, "grad_norm": 0.33128274197373925, "learning_rate": 1.2636989348330791e-05, "loss": 0.2585, "step": 7530 }, { "epoch": 0.43, "grad_norm": 0.7002081987136007, "learning_rate": 1.2635194257018501e-05, "loss": 0.3638, "step": 7531 }, { "epoch": 0.43, "grad_norm": 0.3775323790192974, "learning_rate": 1.2633399074449548e-05, "loss": 0.3146, "step": 7532 }, { "epoch": 0.43, "grad_norm": 0.33698150077075945, "learning_rate": 1.2631603800686099e-05, "loss": 0.279, "step": 7533 }, { "epoch": 0.43, "grad_norm": 0.18218076324549323, "learning_rate": 1.2629808435790322e-05, "loss": 0.1041, "step": 7534 }, { "epoch": 0.43, "grad_norm": 1.185980333284646, "learning_rate": 1.2628012979824394e-05, "loss": 0.7281, "step": 7535 }, { "epoch": 0.43, "grad_norm": 0.34323992321440966, "learning_rate": 1.262621743285049e-05, "loss": 0.262, "step": 7536 }, { "epoch": 0.43, "grad_norm": 0.37375107807679175, "learning_rate": 1.2624421794930785e-05, "loss": 0.3399, "step": 7537 }, { "epoch": 0.43, "grad_norm": 0.9873288871171293, "learning_rate": 1.262262606612747e-05, "loss": 0.3846, "step": 7538 }, { "epoch": 0.43, "grad_norm": 0.4163728309097746, "learning_rate": 1.2620830246502729e-05, "loss": 0.2865, "step": 7539 }, { "epoch": 0.43, "grad_norm": 0.29402576393144825, "learning_rate": 1.2619034336118742e-05, "loss": 0.2692, "step": 7540 }, { "epoch": 0.43, "grad_norm": 0.3024954021730123, "learning_rate": 1.2617238335037717e-05, "loss": 0.222, "step": 7541 }, { "epoch": 0.43, "grad_norm": 0.4399043946526552, "learning_rate": 1.2615442243321837e-05, "loss": 0.2605, "step": 7542 }, { "epoch": 0.43, "grad_norm": 0.6267856143786402, "learning_rate": 1.2613646061033303e-05, "loss": 0.3875, "step": 7543 }, { "epoch": 0.43, "grad_norm": 0.383120587325467, "learning_rate": 1.261184978823432e-05, "loss": 0.2875, "step": 7544 }, { "epoch": 0.43, "grad_norm": 0.3409458484018371, "learning_rate": 1.261005342498709e-05, "loss": 0.277, "step": 7545 }, { "epoch": 0.43, "grad_norm": 0.25057745497441336, "learning_rate": 1.2608256971353825e-05, "loss": 0.1904, "step": 7546 }, { "epoch": 0.43, "grad_norm": 0.9892133398630651, "learning_rate": 1.2606460427396729e-05, "loss": 0.0705, "step": 7547 }, { "epoch": 0.43, "grad_norm": 0.3220890480663572, "learning_rate": 1.2604663793178025e-05, "loss": 0.2779, "step": 7548 }, { "epoch": 0.43, "grad_norm": 0.3346067280468399, "learning_rate": 1.2602867068759921e-05, "loss": 0.3242, "step": 7549 }, { "epoch": 0.43, "grad_norm": 0.9802497675330882, "learning_rate": 1.2601070254204645e-05, "loss": 0.7108, "step": 7550 }, { "epoch": 0.43, "grad_norm": 0.3310026474654711, "learning_rate": 1.2599273349574417e-05, "loss": 0.2248, "step": 7551 }, { "epoch": 0.43, "grad_norm": 0.4692480509388019, "learning_rate": 1.2597476354931466e-05, "loss": 0.3678, "step": 7552 }, { "epoch": 0.43, "grad_norm": 0.26325901123873297, "learning_rate": 1.2595679270338019e-05, "loss": 0.2001, "step": 7553 }, { "epoch": 0.43, "grad_norm": 0.3018764077905086, "learning_rate": 1.2593882095856314e-05, "loss": 0.2367, "step": 7554 }, { "epoch": 0.43, "grad_norm": 0.6035545300935379, "learning_rate": 1.259208483154858e-05, "loss": 0.4614, "step": 7555 }, { "epoch": 0.43, "grad_norm": 0.3817271648715765, "learning_rate": 1.2590287477477063e-05, "loss": 0.3635, "step": 7556 }, { "epoch": 0.43, "grad_norm": 0.3606104677787802, "learning_rate": 1.2588490033703999e-05, "loss": 0.1954, "step": 7557 }, { "epoch": 0.43, "grad_norm": 0.33357966342409084, "learning_rate": 1.2586692500291639e-05, "loss": 0.2763, "step": 7558 }, { "epoch": 0.43, "grad_norm": 0.4054777299779856, "learning_rate": 1.2584894877302224e-05, "loss": 0.3124, "step": 7559 }, { "epoch": 0.43, "grad_norm": 0.2770112585553754, "learning_rate": 1.2583097164798015e-05, "loss": 0.2086, "step": 7560 }, { "epoch": 0.43, "grad_norm": 0.33701165254254206, "learning_rate": 1.2581299362841262e-05, "loss": 0.3406, "step": 7561 }, { "epoch": 0.43, "grad_norm": 1.5593734746120151, "learning_rate": 1.2579501471494222e-05, "loss": 0.7475, "step": 7562 }, { "epoch": 0.43, "grad_norm": 0.6377691899908655, "learning_rate": 1.2577703490819157e-05, "loss": 0.3114, "step": 7563 }, { "epoch": 0.43, "grad_norm": 0.27445686268636504, "learning_rate": 1.2575905420878332e-05, "loss": 0.2575, "step": 7564 }, { "epoch": 0.43, "grad_norm": 0.26622696284658587, "learning_rate": 1.2574107261734012e-05, "loss": 0.2002, "step": 7565 }, { "epoch": 0.43, "grad_norm": 0.37561943751948673, "learning_rate": 1.2572309013448467e-05, "loss": 0.2893, "step": 7566 }, { "epoch": 0.43, "grad_norm": 0.4144161359543641, "learning_rate": 1.2570510676083974e-05, "loss": 0.2954, "step": 7567 }, { "epoch": 0.43, "grad_norm": 0.36156343975898747, "learning_rate": 1.2568712249702806e-05, "loss": 0.3277, "step": 7568 }, { "epoch": 0.43, "grad_norm": 0.41364106807789164, "learning_rate": 1.2566913734367244e-05, "loss": 0.2982, "step": 7569 }, { "epoch": 0.43, "grad_norm": 0.4642819832894536, "learning_rate": 1.2565115130139565e-05, "loss": 0.2253, "step": 7570 }, { "epoch": 0.43, "grad_norm": 0.5140464591085009, "learning_rate": 1.2563316437082065e-05, "loss": 0.4326, "step": 7571 }, { "epoch": 0.44, "grad_norm": 0.26369296742189074, "learning_rate": 1.2561517655257025e-05, "loss": 0.2555, "step": 7572 }, { "epoch": 0.44, "grad_norm": 0.25556862333416436, "learning_rate": 1.255971878472674e-05, "loss": 0.1509, "step": 7573 }, { "epoch": 0.44, "grad_norm": 1.0276481868351715, "learning_rate": 1.2557919825553502e-05, "loss": 0.6818, "step": 7574 }, { "epoch": 0.44, "grad_norm": 0.6329669628544323, "learning_rate": 1.255612077779961e-05, "loss": 0.3306, "step": 7575 }, { "epoch": 0.44, "grad_norm": 0.32794636640693425, "learning_rate": 1.2554321641527367e-05, "loss": 0.2998, "step": 7576 }, { "epoch": 0.44, "grad_norm": 0.5110473328508144, "learning_rate": 1.2552522416799073e-05, "loss": 0.3151, "step": 7577 }, { "epoch": 0.44, "grad_norm": 0.41058569117740423, "learning_rate": 1.2550723103677039e-05, "loss": 0.2154, "step": 7578 }, { "epoch": 0.44, "grad_norm": 0.32708584999844575, "learning_rate": 1.2548923702223574e-05, "loss": 0.251, "step": 7579 }, { "epoch": 0.44, "grad_norm": 0.3670176202785912, "learning_rate": 1.254712421250099e-05, "loss": 0.2829, "step": 7580 }, { "epoch": 0.44, "grad_norm": 0.5745557867723556, "learning_rate": 1.2545324634571604e-05, "loss": 0.3194, "step": 7581 }, { "epoch": 0.44, "grad_norm": 0.40854419253185176, "learning_rate": 1.2543524968497738e-05, "loss": 0.3315, "step": 7582 }, { "epoch": 0.44, "grad_norm": 0.6312594387495574, "learning_rate": 1.2541725214341708e-05, "loss": 0.3503, "step": 7583 }, { "epoch": 0.44, "grad_norm": 0.3346389065280756, "learning_rate": 1.2539925372165847e-05, "loss": 0.2932, "step": 7584 }, { "epoch": 0.44, "grad_norm": 0.23633536868367974, "learning_rate": 1.2538125442032474e-05, "loss": 0.2035, "step": 7585 }, { "epoch": 0.44, "grad_norm": 0.9129265657545105, "learning_rate": 1.2536325424003932e-05, "loss": 0.5529, "step": 7586 }, { "epoch": 0.44, "grad_norm": 0.38335161706444687, "learning_rate": 1.2534525318142546e-05, "loss": 0.3163, "step": 7587 }, { "epoch": 0.44, "grad_norm": 0.37815106189321185, "learning_rate": 1.2532725124510658e-05, "loss": 0.3199, "step": 7588 }, { "epoch": 0.44, "grad_norm": 0.45945715714105756, "learning_rate": 1.2530924843170609e-05, "loss": 0.3619, "step": 7589 }, { "epoch": 0.44, "grad_norm": 0.25719376284057816, "learning_rate": 1.2529124474184742e-05, "loss": 0.154, "step": 7590 }, { "epoch": 0.44, "grad_norm": 0.352646974082044, "learning_rate": 1.25273240176154e-05, "loss": 0.2434, "step": 7591 }, { "epoch": 0.44, "grad_norm": 0.40108702245255196, "learning_rate": 1.2525523473524938e-05, "loss": 0.3153, "step": 7592 }, { "epoch": 0.44, "grad_norm": 0.5606921794556875, "learning_rate": 1.2523722841975707e-05, "loss": 0.153, "step": 7593 }, { "epoch": 0.44, "grad_norm": 0.35881739246190164, "learning_rate": 1.2521922123030061e-05, "loss": 0.3164, "step": 7594 }, { "epoch": 0.44, "grad_norm": 0.4775366714403366, "learning_rate": 1.252012131675036e-05, "loss": 0.3781, "step": 7595 }, { "epoch": 0.44, "grad_norm": 0.3417720512306077, "learning_rate": 1.2518320423198968e-05, "loss": 0.2006, "step": 7596 }, { "epoch": 0.44, "grad_norm": 0.2217803683706333, "learning_rate": 1.251651944243825e-05, "loss": 0.207, "step": 7597 }, { "epoch": 0.44, "grad_norm": 0.9289910609233664, "learning_rate": 1.2514718374530568e-05, "loss": 0.6753, "step": 7598 }, { "epoch": 0.44, "grad_norm": 0.5061395116029092, "learning_rate": 1.2512917219538297e-05, "loss": 0.2249, "step": 7599 }, { "epoch": 0.44, "grad_norm": 0.3169813552029995, "learning_rate": 1.2511115977523813e-05, "loss": 0.3035, "step": 7600 }, { "epoch": 0.44, "grad_norm": 0.7382839722814659, "learning_rate": 1.2509314648549491e-05, "loss": 0.5035, "step": 7601 }, { "epoch": 0.44, "grad_norm": 0.7547998463825323, "learning_rate": 1.2507513232677707e-05, "loss": 0.397, "step": 7602 }, { "epoch": 0.44, "grad_norm": 0.2598908072442766, "learning_rate": 1.250571172997085e-05, "loss": 0.1986, "step": 7603 }, { "epoch": 0.44, "grad_norm": 0.40470480560147853, "learning_rate": 1.2503910140491305e-05, "loss": 0.3349, "step": 7604 }, { "epoch": 0.44, "grad_norm": 0.5543636608248991, "learning_rate": 1.2502108464301456e-05, "loss": 0.3531, "step": 7605 }, { "epoch": 0.44, "grad_norm": 0.4062889895351509, "learning_rate": 1.2500306701463702e-05, "loss": 0.2834, "step": 7606 }, { "epoch": 0.44, "grad_norm": 0.3447667781654027, "learning_rate": 1.2498504852040433e-05, "loss": 0.2808, "step": 7607 }, { "epoch": 0.44, "grad_norm": 0.363996062685284, "learning_rate": 1.2496702916094048e-05, "loss": 0.2969, "step": 7608 }, { "epoch": 0.44, "grad_norm": 0.26788663410836405, "learning_rate": 1.2494900893686949e-05, "loss": 0.1817, "step": 7609 }, { "epoch": 0.44, "grad_norm": 0.9228599503181674, "learning_rate": 1.2493098784881539e-05, "loss": 0.5611, "step": 7610 }, { "epoch": 0.44, "grad_norm": 0.3804507345579722, "learning_rate": 1.2491296589740224e-05, "loss": 0.3399, "step": 7611 }, { "epoch": 0.44, "grad_norm": 0.2948051354691912, "learning_rate": 1.2489494308325415e-05, "loss": 0.2742, "step": 7612 }, { "epoch": 0.44, "grad_norm": 0.3363350955735608, "learning_rate": 1.2487691940699529e-05, "loss": 0.2011, "step": 7613 }, { "epoch": 0.44, "grad_norm": 1.1140463461241292, "learning_rate": 1.2485889486924974e-05, "loss": 0.6561, "step": 7614 }, { "epoch": 0.44, "grad_norm": 0.3030326221824844, "learning_rate": 1.2484086947064175e-05, "loss": 0.2679, "step": 7615 }, { "epoch": 0.44, "grad_norm": 0.36992495041043094, "learning_rate": 1.2482284321179552e-05, "loss": 0.2854, "step": 7616 }, { "epoch": 0.44, "grad_norm": 0.7049813104276725, "learning_rate": 1.248048160933353e-05, "loss": 0.3893, "step": 7617 }, { "epoch": 0.44, "grad_norm": 0.2756630402796699, "learning_rate": 1.2478678811588535e-05, "loss": 0.2303, "step": 7618 }, { "epoch": 0.44, "grad_norm": 0.26760176350237963, "learning_rate": 1.2476875928006999e-05, "loss": 0.1611, "step": 7619 }, { "epoch": 0.44, "grad_norm": 0.41829783407336724, "learning_rate": 1.2475072958651358e-05, "loss": 0.3304, "step": 7620 }, { "epoch": 0.44, "grad_norm": 0.35645273997227866, "learning_rate": 1.2473269903584046e-05, "loss": 0.2964, "step": 7621 }, { "epoch": 0.44, "grad_norm": 0.8604531292814376, "learning_rate": 1.2471466762867506e-05, "loss": 0.3573, "step": 7622 }, { "epoch": 0.44, "grad_norm": 0.3153244327743291, "learning_rate": 1.2469663536564177e-05, "loss": 0.3042, "step": 7623 }, { "epoch": 0.44, "grad_norm": 0.3374073034408949, "learning_rate": 1.2467860224736501e-05, "loss": 0.2689, "step": 7624 }, { "epoch": 0.44, "grad_norm": 0.2715934504776164, "learning_rate": 1.2466056827446937e-05, "loss": 0.1519, "step": 7625 }, { "epoch": 0.44, "grad_norm": 0.4920404791571963, "learning_rate": 1.2464253344757929e-05, "loss": 0.1767, "step": 7626 }, { "epoch": 0.44, "grad_norm": 0.40765441951874637, "learning_rate": 1.2462449776731935e-05, "loss": 0.3151, "step": 7627 }, { "epoch": 0.44, "grad_norm": 0.4929507662219086, "learning_rate": 1.246064612343141e-05, "loss": 0.3614, "step": 7628 }, { "epoch": 0.44, "grad_norm": 0.8465290944998883, "learning_rate": 1.2458842384918815e-05, "loss": 0.3429, "step": 7629 }, { "epoch": 0.44, "grad_norm": 0.37771971215198696, "learning_rate": 1.2457038561256616e-05, "loss": 0.2933, "step": 7630 }, { "epoch": 0.44, "grad_norm": 0.25817417503857, "learning_rate": 1.2455234652507276e-05, "loss": 0.2179, "step": 7631 }, { "epoch": 0.44, "grad_norm": 1.003852559033286, "learning_rate": 1.2453430658733265e-05, "loss": 0.2929, "step": 7632 }, { "epoch": 0.44, "grad_norm": 0.33167390172321803, "learning_rate": 1.2451626579997056e-05, "loss": 0.2602, "step": 7633 }, { "epoch": 0.44, "grad_norm": 0.8273407599930446, "learning_rate": 1.2449822416361123e-05, "loss": 0.4349, "step": 7634 }, { "epoch": 0.44, "grad_norm": 0.3353811089623452, "learning_rate": 1.2448018167887947e-05, "loss": 0.264, "step": 7635 }, { "epoch": 0.44, "grad_norm": 0.34648324200161335, "learning_rate": 1.2446213834640007e-05, "loss": 0.2492, "step": 7636 }, { "epoch": 0.44, "grad_norm": 0.30946096151194946, "learning_rate": 1.2444409416679786e-05, "loss": 0.2142, "step": 7637 }, { "epoch": 0.44, "grad_norm": 0.813049180954248, "learning_rate": 1.2442604914069773e-05, "loss": 0.4697, "step": 7638 }, { "epoch": 0.44, "grad_norm": 0.3757559066901632, "learning_rate": 1.2440800326872457e-05, "loss": 0.2301, "step": 7639 }, { "epoch": 0.44, "grad_norm": 0.4310357553833105, "learning_rate": 1.2438995655150332e-05, "loss": 0.3578, "step": 7640 }, { "epoch": 0.44, "grad_norm": 1.0897373835543203, "learning_rate": 1.243719089896589e-05, "loss": 0.7668, "step": 7641 }, { "epoch": 0.44, "grad_norm": 0.2947300212873115, "learning_rate": 1.2435386058381634e-05, "loss": 0.2155, "step": 7642 }, { "epoch": 0.44, "grad_norm": 0.2607968228356934, "learning_rate": 1.243358113346006e-05, "loss": 0.2048, "step": 7643 }, { "epoch": 0.44, "grad_norm": 0.444883989342041, "learning_rate": 1.243177612426368e-05, "loss": 0.368, "step": 7644 }, { "epoch": 0.44, "grad_norm": 0.3069080743699087, "learning_rate": 1.2429971030854993e-05, "loss": 0.1943, "step": 7645 }, { "epoch": 0.44, "grad_norm": 0.8069208624859093, "learning_rate": 1.2428165853296517e-05, "loss": 0.4731, "step": 7646 }, { "epoch": 0.44, "grad_norm": 0.3425899314113828, "learning_rate": 1.2426360591650761e-05, "loss": 0.2992, "step": 7647 }, { "epoch": 0.44, "grad_norm": 0.3365953563747308, "learning_rate": 1.2424555245980241e-05, "loss": 0.2127, "step": 7648 }, { "epoch": 0.44, "grad_norm": 0.281929789738832, "learning_rate": 1.2422749816347479e-05, "loss": 0.1837, "step": 7649 }, { "epoch": 0.44, "grad_norm": 0.8104273831878697, "learning_rate": 1.2420944302814992e-05, "loss": 0.4816, "step": 7650 }, { "epoch": 0.44, "grad_norm": 0.34569478851091506, "learning_rate": 1.2419138705445314e-05, "loss": 0.2962, "step": 7651 }, { "epoch": 0.44, "grad_norm": 0.3634484166137741, "learning_rate": 1.241733302430096e-05, "loss": 0.2862, "step": 7652 }, { "epoch": 0.44, "grad_norm": 1.0261468850569297, "learning_rate": 1.2415527259444471e-05, "loss": 0.6782, "step": 7653 }, { "epoch": 0.44, "grad_norm": 0.3215611242030219, "learning_rate": 1.2413721410938373e-05, "loss": 0.2798, "step": 7654 }, { "epoch": 0.44, "grad_norm": 0.2449067046131677, "learning_rate": 1.2411915478845211e-05, "loss": 0.1634, "step": 7655 }, { "epoch": 0.44, "grad_norm": 0.4588366813097026, "learning_rate": 1.2410109463227519e-05, "loss": 0.3376, "step": 7656 }, { "epoch": 0.44, "grad_norm": 0.3169675064491585, "learning_rate": 1.240830336414784e-05, "loss": 0.3055, "step": 7657 }, { "epoch": 0.44, "grad_norm": 0.8641165712233401, "learning_rate": 1.2406497181668717e-05, "loss": 0.4004, "step": 7658 }, { "epoch": 0.44, "grad_norm": 0.34837061522076307, "learning_rate": 1.2404690915852701e-05, "loss": 0.3421, "step": 7659 }, { "epoch": 0.44, "grad_norm": 0.3212739319107255, "learning_rate": 1.2402884566762341e-05, "loss": 0.2886, "step": 7660 }, { "epoch": 0.44, "grad_norm": 0.2776234930677816, "learning_rate": 1.2401078134460194e-05, "loss": 0.1348, "step": 7661 }, { "epoch": 0.44, "grad_norm": 0.41273755963460756, "learning_rate": 1.2399271619008812e-05, "loss": 0.3599, "step": 7662 }, { "epoch": 0.44, "grad_norm": 0.42377632401126736, "learning_rate": 1.2397465020470757e-05, "loss": 0.2785, "step": 7663 }, { "epoch": 0.44, "grad_norm": 0.3625811131545241, "learning_rate": 1.2395658338908594e-05, "loss": 0.3151, "step": 7664 }, { "epoch": 0.44, "grad_norm": 0.7998829300948974, "learning_rate": 1.2393851574384886e-05, "loss": 0.4532, "step": 7665 }, { "epoch": 0.44, "grad_norm": 0.40151280132662404, "learning_rate": 1.23920447269622e-05, "loss": 0.2937, "step": 7666 }, { "epoch": 0.44, "grad_norm": 0.3260653072198325, "learning_rate": 1.2390237796703107e-05, "loss": 0.3084, "step": 7667 }, { "epoch": 0.44, "grad_norm": 0.5939761261701787, "learning_rate": 1.238843078367018e-05, "loss": 0.3467, "step": 7668 }, { "epoch": 0.44, "grad_norm": 0.2672933275218854, "learning_rate": 1.2386623687926001e-05, "loss": 0.2261, "step": 7669 }, { "epoch": 0.44, "grad_norm": 0.4603784285403028, "learning_rate": 1.2384816509533145e-05, "loss": 0.3053, "step": 7670 }, { "epoch": 0.44, "grad_norm": 0.32124928699513156, "learning_rate": 1.2383009248554195e-05, "loss": 0.245, "step": 7671 }, { "epoch": 0.44, "grad_norm": 0.34815018711320994, "learning_rate": 1.2381201905051738e-05, "loss": 0.2903, "step": 7672 }, { "epoch": 0.44, "grad_norm": 0.630884248029316, "learning_rate": 1.2379394479088363e-05, "loss": 0.4409, "step": 7673 }, { "epoch": 0.44, "grad_norm": 0.37651909244520654, "learning_rate": 1.2377586970726658e-05, "loss": 0.2451, "step": 7674 }, { "epoch": 0.44, "grad_norm": 0.282839422926257, "learning_rate": 1.2375779380029218e-05, "loss": 0.2584, "step": 7675 }, { "epoch": 0.44, "grad_norm": 0.4063717632353682, "learning_rate": 1.2373971707058643e-05, "loss": 0.2808, "step": 7676 }, { "epoch": 0.44, "grad_norm": 0.9496003441841634, "learning_rate": 1.2372163951877525e-05, "loss": 0.6778, "step": 7677 }, { "epoch": 0.44, "grad_norm": 0.2875371706876149, "learning_rate": 1.2370356114548476e-05, "loss": 0.2067, "step": 7678 }, { "epoch": 0.44, "grad_norm": 0.3650829049658475, "learning_rate": 1.2368548195134094e-05, "loss": 0.3043, "step": 7679 }, { "epoch": 0.44, "grad_norm": 0.7485902503018145, "learning_rate": 1.2366740193696991e-05, "loss": 0.4163, "step": 7680 }, { "epoch": 0.44, "grad_norm": 0.23644972909627532, "learning_rate": 1.2364932110299775e-05, "loss": 0.1315, "step": 7681 }, { "epoch": 0.44, "grad_norm": 0.3908523069948069, "learning_rate": 1.2363123945005064e-05, "loss": 0.2984, "step": 7682 }, { "epoch": 0.44, "grad_norm": 0.36885901856842995, "learning_rate": 1.2361315697875469e-05, "loss": 0.3257, "step": 7683 }, { "epoch": 0.44, "grad_norm": 0.3138719168813281, "learning_rate": 1.2359507368973618e-05, "loss": 0.1951, "step": 7684 }, { "epoch": 0.44, "grad_norm": 0.6338918277920743, "learning_rate": 1.2357698958362123e-05, "loss": 0.3682, "step": 7685 }, { "epoch": 0.44, "grad_norm": 1.075166134031894, "learning_rate": 1.2355890466103619e-05, "loss": 0.6696, "step": 7686 }, { "epoch": 0.44, "grad_norm": 0.2538342119211804, "learning_rate": 1.2354081892260728e-05, "loss": 0.2455, "step": 7687 }, { "epoch": 0.44, "grad_norm": 0.2697614085126694, "learning_rate": 1.2352273236896082e-05, "loss": 0.2123, "step": 7688 }, { "epoch": 0.44, "grad_norm": 1.115602045075998, "learning_rate": 1.2350464500072314e-05, "loss": 0.5415, "step": 7689 }, { "epoch": 0.44, "grad_norm": 0.40017360263197127, "learning_rate": 1.2348655681852064e-05, "loss": 0.299, "step": 7690 }, { "epoch": 0.44, "grad_norm": 0.27748444108314924, "learning_rate": 1.2346846782297966e-05, "loss": 0.2675, "step": 7691 }, { "epoch": 0.44, "grad_norm": 1.024308978329414, "learning_rate": 1.2345037801472669e-05, "loss": 0.5259, "step": 7692 }, { "epoch": 0.44, "grad_norm": 0.2658736169881998, "learning_rate": 1.2343228739438811e-05, "loss": 0.2082, "step": 7693 }, { "epoch": 0.44, "grad_norm": 0.5155626562375746, "learning_rate": 1.2341419596259038e-05, "loss": 0.295, "step": 7694 }, { "epoch": 0.44, "grad_norm": 0.26905222497297376, "learning_rate": 1.2339610371996012e-05, "loss": 0.2577, "step": 7695 }, { "epoch": 0.44, "grad_norm": 0.3606434958043496, "learning_rate": 1.2337801066712376e-05, "loss": 0.3027, "step": 7696 }, { "epoch": 0.44, "grad_norm": 0.49890333651717567, "learning_rate": 1.2335991680470792e-05, "loss": 0.2738, "step": 7697 }, { "epoch": 0.44, "grad_norm": 0.39467269562385565, "learning_rate": 1.2334182213333912e-05, "loss": 0.3395, "step": 7698 }, { "epoch": 0.44, "grad_norm": 0.5731151803650315, "learning_rate": 1.2332372665364406e-05, "loss": 0.3394, "step": 7699 }, { "epoch": 0.44, "grad_norm": 0.4065624798269097, "learning_rate": 1.2330563036624934e-05, "loss": 0.3214, "step": 7700 }, { "epoch": 0.44, "grad_norm": 0.36053450120448083, "learning_rate": 1.2328753327178164e-05, "loss": 0.2515, "step": 7701 }, { "epoch": 0.44, "grad_norm": 0.5689075010121288, "learning_rate": 1.2326943537086766e-05, "loss": 0.3741, "step": 7702 }, { "epoch": 0.44, "grad_norm": 0.2783654959597047, "learning_rate": 1.2325133666413414e-05, "loss": 0.2591, "step": 7703 }, { "epoch": 0.44, "grad_norm": 1.3908455924860992, "learning_rate": 1.2323323715220783e-05, "loss": 0.2327, "step": 7704 }, { "epoch": 0.44, "grad_norm": 0.47826736293204375, "learning_rate": 1.2321513683571553e-05, "loss": 0.3268, "step": 7705 }, { "epoch": 0.44, "grad_norm": 0.4121896236281999, "learning_rate": 1.2319703571528403e-05, "loss": 0.3567, "step": 7706 }, { "epoch": 0.44, "grad_norm": 0.35279691238740696, "learning_rate": 1.231789337915402e-05, "loss": 0.2763, "step": 7707 }, { "epoch": 0.44, "grad_norm": 0.37557769110490014, "learning_rate": 1.2316083106511085e-05, "loss": 0.2277, "step": 7708 }, { "epoch": 0.44, "grad_norm": 0.3360999778532407, "learning_rate": 1.2314272753662295e-05, "loss": 0.2438, "step": 7709 }, { "epoch": 0.44, "grad_norm": 0.46270153126000013, "learning_rate": 1.2312462320670335e-05, "loss": 0.2576, "step": 7710 }, { "epoch": 0.44, "grad_norm": 0.3572732447396155, "learning_rate": 1.2310651807597909e-05, "loss": 0.233, "step": 7711 }, { "epoch": 0.44, "grad_norm": 0.4912302945710772, "learning_rate": 1.2308841214507708e-05, "loss": 0.3823, "step": 7712 }, { "epoch": 0.44, "grad_norm": 0.5918930747562702, "learning_rate": 1.2307030541462435e-05, "loss": 0.4092, "step": 7713 }, { "epoch": 0.44, "grad_norm": 0.27316585495061246, "learning_rate": 1.2305219788524794e-05, "loss": 0.224, "step": 7714 }, { "epoch": 0.44, "grad_norm": 0.2925029836392248, "learning_rate": 1.230340895575749e-05, "loss": 0.248, "step": 7715 }, { "epoch": 0.44, "grad_norm": 0.5948366275202696, "learning_rate": 1.2301598043223233e-05, "loss": 0.2956, "step": 7716 }, { "epoch": 0.44, "grad_norm": 0.5585403674560185, "learning_rate": 1.2299787050984736e-05, "loss": 0.2098, "step": 7717 }, { "epoch": 0.44, "grad_norm": 0.357036950905096, "learning_rate": 1.2297975979104711e-05, "loss": 0.3173, "step": 7718 }, { "epoch": 0.44, "grad_norm": 0.3489053320187153, "learning_rate": 1.2296164827645875e-05, "loss": 0.3219, "step": 7719 }, { "epoch": 0.44, "grad_norm": 0.26655390566547604, "learning_rate": 1.2294353596670954e-05, "loss": 0.1146, "step": 7720 }, { "epoch": 0.44, "grad_norm": 0.22260566173399035, "learning_rate": 1.2292542286242663e-05, "loss": 0.1856, "step": 7721 }, { "epoch": 0.44, "grad_norm": 0.5210406943968808, "learning_rate": 1.2290730896423733e-05, "loss": 0.3649, "step": 7722 }, { "epoch": 0.44, "grad_norm": 0.30909289929601924, "learning_rate": 1.2288919427276889e-05, "loss": 0.2107, "step": 7723 }, { "epoch": 0.44, "grad_norm": 0.3394742788735173, "learning_rate": 1.2287107878864868e-05, "loss": 0.3283, "step": 7724 }, { "epoch": 0.44, "grad_norm": 0.6441934367210518, "learning_rate": 1.2285296251250396e-05, "loss": 0.4857, "step": 7725 }, { "epoch": 0.44, "grad_norm": 0.33547352374751016, "learning_rate": 1.2283484544496214e-05, "loss": 0.3212, "step": 7726 }, { "epoch": 0.44, "grad_norm": 0.22535303095029596, "learning_rate": 1.2281672758665057e-05, "loss": 0.1664, "step": 7727 }, { "epoch": 0.44, "grad_norm": 1.1198897473384355, "learning_rate": 1.2279860893819677e-05, "loss": 0.5423, "step": 7728 }, { "epoch": 0.44, "grad_norm": 0.5315956158941079, "learning_rate": 1.2278048950022807e-05, "loss": 0.4195, "step": 7729 }, { "epoch": 0.44, "grad_norm": 0.3316998991025966, "learning_rate": 1.2276236927337201e-05, "loss": 0.2396, "step": 7730 }, { "epoch": 0.44, "grad_norm": 0.3949812567558907, "learning_rate": 1.2274424825825608e-05, "loss": 0.3378, "step": 7731 }, { "epoch": 0.44, "grad_norm": 0.5902242463857145, "learning_rate": 1.2272612645550783e-05, "loss": 0.4106, "step": 7732 }, { "epoch": 0.44, "grad_norm": 0.27268811226890377, "learning_rate": 1.2270800386575479e-05, "loss": 0.182, "step": 7733 }, { "epoch": 0.44, "grad_norm": 0.30230494310639255, "learning_rate": 1.2268988048962454e-05, "loss": 0.2506, "step": 7734 }, { "epoch": 0.44, "grad_norm": 0.7007259407472975, "learning_rate": 1.2267175632774472e-05, "loss": 0.4328, "step": 7735 }, { "epoch": 0.44, "grad_norm": 0.3078098222416652, "learning_rate": 1.2265363138074294e-05, "loss": 0.2572, "step": 7736 }, { "epoch": 0.44, "grad_norm": 0.7233622730142071, "learning_rate": 1.226355056492469e-05, "loss": 0.5583, "step": 7737 }, { "epoch": 0.44, "grad_norm": 0.3706652593809387, "learning_rate": 1.2261737913388424e-05, "loss": 0.3142, "step": 7738 }, { "epoch": 0.44, "grad_norm": 0.3325676696521458, "learning_rate": 1.2259925183528275e-05, "loss": 0.2802, "step": 7739 }, { "epoch": 0.44, "grad_norm": 0.285868913478341, "learning_rate": 1.225811237540701e-05, "loss": 0.1341, "step": 7740 }, { "epoch": 0.44, "grad_norm": 0.6615062653955808, "learning_rate": 1.2256299489087416e-05, "loss": 0.3796, "step": 7741 }, { "epoch": 0.44, "grad_norm": 0.29847265124901856, "learning_rate": 1.2254486524632263e-05, "loss": 0.2744, "step": 7742 }, { "epoch": 0.44, "grad_norm": 0.39855067333182725, "learning_rate": 1.225267348210434e-05, "loss": 0.2633, "step": 7743 }, { "epoch": 0.44, "grad_norm": 0.6883013273514436, "learning_rate": 1.225086036156643e-05, "loss": 0.475, "step": 7744 }, { "epoch": 0.44, "grad_norm": 0.3010695239911448, "learning_rate": 1.2249047163081325e-05, "loss": 0.2446, "step": 7745 }, { "epoch": 0.45, "grad_norm": 0.265481799290773, "learning_rate": 1.2247233886711811e-05, "loss": 0.1961, "step": 7746 }, { "epoch": 0.45, "grad_norm": 0.6405863195516796, "learning_rate": 1.2245420532520687e-05, "loss": 0.3769, "step": 7747 }, { "epoch": 0.45, "grad_norm": 0.3782124832205112, "learning_rate": 1.2243607100570743e-05, "loss": 0.3033, "step": 7748 }, { "epoch": 0.45, "grad_norm": 0.8096337104484459, "learning_rate": 1.2241793590924785e-05, "loss": 0.4681, "step": 7749 }, { "epoch": 0.45, "grad_norm": 0.2962372025335575, "learning_rate": 1.2239980003645606e-05, "loss": 0.2728, "step": 7750 }, { "epoch": 0.45, "grad_norm": 0.4048492625045128, "learning_rate": 1.2238166338796021e-05, "loss": 0.3013, "step": 7751 }, { "epoch": 0.45, "grad_norm": 0.307624574114885, "learning_rate": 1.2236352596438832e-05, "loss": 0.1997, "step": 7752 }, { "epoch": 0.45, "grad_norm": 1.1069574333300105, "learning_rate": 1.2234538776636846e-05, "loss": 0.3514, "step": 7753 }, { "epoch": 0.45, "grad_norm": 0.31546488061455996, "learning_rate": 1.223272487945288e-05, "loss": 0.2837, "step": 7754 }, { "epoch": 0.45, "grad_norm": 0.4835867800375805, "learning_rate": 1.2230910904949747e-05, "loss": 0.3979, "step": 7755 }, { "epoch": 0.45, "grad_norm": 0.959949705436744, "learning_rate": 1.2229096853190265e-05, "loss": 0.3485, "step": 7756 }, { "epoch": 0.45, "grad_norm": 0.3530160623767107, "learning_rate": 1.2227282724237254e-05, "loss": 0.2498, "step": 7757 }, { "epoch": 0.45, "grad_norm": 0.5273841705816225, "learning_rate": 1.2225468518153543e-05, "loss": 0.2861, "step": 7758 }, { "epoch": 0.45, "grad_norm": 0.9535026076828771, "learning_rate": 1.2223654235001948e-05, "loss": 0.2452, "step": 7759 }, { "epoch": 0.45, "grad_norm": 0.36487906220355076, "learning_rate": 1.2221839874845303e-05, "loss": 0.2854, "step": 7760 }, { "epoch": 0.45, "grad_norm": 1.1723647175488505, "learning_rate": 1.2220025437746437e-05, "loss": 0.8119, "step": 7761 }, { "epoch": 0.45, "grad_norm": 0.395196663415754, "learning_rate": 1.221821092376819e-05, "loss": 0.2566, "step": 7762 }, { "epoch": 0.45, "grad_norm": 0.43346883361783417, "learning_rate": 1.2216396332973391e-05, "loss": 0.3076, "step": 7763 }, { "epoch": 0.45, "grad_norm": 0.8082014848351015, "learning_rate": 1.2214581665424883e-05, "loss": 0.442, "step": 7764 }, { "epoch": 0.45, "grad_norm": 0.2674446403474515, "learning_rate": 1.2212766921185506e-05, "loss": 0.2374, "step": 7765 }, { "epoch": 0.45, "grad_norm": 0.28619470020545024, "learning_rate": 1.2210952100318107e-05, "loss": 0.1977, "step": 7766 }, { "epoch": 0.45, "grad_norm": 0.5161481861718609, "learning_rate": 1.220913720288553e-05, "loss": 0.4137, "step": 7767 }, { "epoch": 0.45, "grad_norm": 1.4577512973298372, "learning_rate": 1.2207322228950628e-05, "loss": 0.7202, "step": 7768 }, { "epoch": 0.45, "grad_norm": 0.3080743765473305, "learning_rate": 1.2205507178576252e-05, "loss": 0.1984, "step": 7769 }, { "epoch": 0.45, "grad_norm": 0.439692529348634, "learning_rate": 1.220369205182526e-05, "loss": 0.3539, "step": 7770 }, { "epoch": 0.45, "grad_norm": 0.2810154705395384, "learning_rate": 1.2201876848760505e-05, "loss": 0.2121, "step": 7771 }, { "epoch": 0.45, "grad_norm": 0.34147306795497445, "learning_rate": 1.2200061569444848e-05, "loss": 0.2114, "step": 7772 }, { "epoch": 0.45, "grad_norm": 1.1078075037797748, "learning_rate": 1.2198246213941156e-05, "loss": 0.6738, "step": 7773 }, { "epoch": 0.45, "grad_norm": 0.43075098792032634, "learning_rate": 1.2196430782312292e-05, "loss": 0.3043, "step": 7774 }, { "epoch": 0.45, "grad_norm": 0.3794600600488902, "learning_rate": 1.2194615274621122e-05, "loss": 0.2685, "step": 7775 }, { "epoch": 0.45, "grad_norm": 0.5827109851181835, "learning_rate": 1.2192799690930521e-05, "loss": 0.3391, "step": 7776 }, { "epoch": 0.45, "grad_norm": 0.35902548591666494, "learning_rate": 1.2190984031303361e-05, "loss": 0.2472, "step": 7777 }, { "epoch": 0.45, "grad_norm": 0.3640745890902366, "learning_rate": 1.2189168295802519e-05, "loss": 0.2622, "step": 7778 }, { "epoch": 0.45, "grad_norm": 0.34088465488055364, "learning_rate": 1.218735248449087e-05, "loss": 0.1616, "step": 7779 }, { "epoch": 0.45, "grad_norm": 0.7991062158846808, "learning_rate": 1.21855365974313e-05, "loss": 0.4441, "step": 7780 }, { "epoch": 0.45, "grad_norm": 0.3664535149552145, "learning_rate": 1.2183720634686693e-05, "loss": 0.2753, "step": 7781 }, { "epoch": 0.45, "grad_norm": 0.37060343525461587, "learning_rate": 1.2181904596319933e-05, "loss": 0.2746, "step": 7782 }, { "epoch": 0.45, "grad_norm": 0.26177238030517913, "learning_rate": 1.2180088482393911e-05, "loss": 0.1789, "step": 7783 }, { "epoch": 0.45, "grad_norm": 0.3335241264892765, "learning_rate": 1.2178272292971519e-05, "loss": 0.2824, "step": 7784 }, { "epoch": 0.45, "grad_norm": 0.7427978808671635, "learning_rate": 1.217645602811565e-05, "loss": 0.484, "step": 7785 }, { "epoch": 0.45, "grad_norm": 0.4443305120052916, "learning_rate": 1.2174639687889202e-05, "loss": 0.3144, "step": 7786 }, { "epoch": 0.45, "grad_norm": 0.39557505313196073, "learning_rate": 1.2172823272355077e-05, "loss": 0.2631, "step": 7787 }, { "epoch": 0.45, "grad_norm": 0.6536016348918546, "learning_rate": 1.2171006781576173e-05, "loss": 0.3575, "step": 7788 }, { "epoch": 0.45, "grad_norm": 0.2895298768999007, "learning_rate": 1.2169190215615401e-05, "loss": 0.1717, "step": 7789 }, { "epoch": 0.45, "grad_norm": 0.3057102143177872, "learning_rate": 1.2167373574535661e-05, "loss": 0.258, "step": 7790 }, { "epoch": 0.45, "grad_norm": 0.5624184239293547, "learning_rate": 1.2165556858399874e-05, "loss": 0.443, "step": 7791 }, { "epoch": 0.45, "grad_norm": 0.4411597730654119, "learning_rate": 1.2163740067270941e-05, "loss": 0.2398, "step": 7792 }, { "epoch": 0.45, "grad_norm": 0.33775600014775914, "learning_rate": 1.2161923201211783e-05, "loss": 0.273, "step": 7793 }, { "epoch": 0.45, "grad_norm": 0.354861755129926, "learning_rate": 1.2160106260285316e-05, "loss": 0.293, "step": 7794 }, { "epoch": 0.45, "grad_norm": 0.7224381640648487, "learning_rate": 1.2158289244554467e-05, "loss": 0.3158, "step": 7795 }, { "epoch": 0.45, "grad_norm": 0.34561253546241905, "learning_rate": 1.2156472154082149e-05, "loss": 0.255, "step": 7796 }, { "epoch": 0.45, "grad_norm": 1.1561107168749716, "learning_rate": 1.2154654988931296e-05, "loss": 0.7711, "step": 7797 }, { "epoch": 0.45, "grad_norm": 0.37902420717857044, "learning_rate": 1.2152837749164834e-05, "loss": 0.2881, "step": 7798 }, { "epoch": 0.45, "grad_norm": 0.3027625498266799, "learning_rate": 1.215102043484569e-05, "loss": 0.224, "step": 7799 }, { "epoch": 0.45, "grad_norm": 0.4722136725028722, "learning_rate": 1.2149203046036803e-05, "loss": 0.2767, "step": 7800 }, { "epoch": 0.45, "grad_norm": 0.47601793386394814, "learning_rate": 1.2147385582801106e-05, "loss": 0.3417, "step": 7801 }, { "epoch": 0.45, "grad_norm": 0.30416584956647924, "learning_rate": 1.214556804520154e-05, "loss": 0.2099, "step": 7802 }, { "epoch": 0.45, "grad_norm": 0.5322689707569284, "learning_rate": 1.2143750433301043e-05, "loss": 0.3731, "step": 7803 }, { "epoch": 0.45, "grad_norm": 0.48119429572557804, "learning_rate": 1.2141932747162564e-05, "loss": 0.3049, "step": 7804 }, { "epoch": 0.45, "grad_norm": 0.2579226364490064, "learning_rate": 1.2140114986849043e-05, "loss": 0.138, "step": 7805 }, { "epoch": 0.45, "grad_norm": 0.32117319040447845, "learning_rate": 1.2138297152423432e-05, "loss": 0.2956, "step": 7806 }, { "epoch": 0.45, "grad_norm": 0.9370941699181503, "learning_rate": 1.2136479243948683e-05, "loss": 0.6731, "step": 7807 }, { "epoch": 0.45, "grad_norm": 0.3129022751134737, "learning_rate": 1.2134661261487752e-05, "loss": 0.1771, "step": 7808 }, { "epoch": 0.45, "grad_norm": 0.3969024582867962, "learning_rate": 1.2132843205103591e-05, "loss": 0.322, "step": 7809 }, { "epoch": 0.45, "grad_norm": 0.4938649637259429, "learning_rate": 1.2131025074859164e-05, "loss": 0.3403, "step": 7810 }, { "epoch": 0.45, "grad_norm": 0.21944148398620011, "learning_rate": 1.2129206870817428e-05, "loss": 0.1346, "step": 7811 }, { "epoch": 0.45, "grad_norm": 0.3246560530948743, "learning_rate": 1.2127388593041348e-05, "loss": 0.24, "step": 7812 }, { "epoch": 0.45, "grad_norm": 0.48767280290232373, "learning_rate": 1.2125570241593894e-05, "loss": 0.3602, "step": 7813 }, { "epoch": 0.45, "grad_norm": 0.33893538071778284, "learning_rate": 1.2123751816538037e-05, "loss": 0.2849, "step": 7814 }, { "epoch": 0.45, "grad_norm": 0.49358569403480806, "learning_rate": 1.2121933317936741e-05, "loss": 0.3024, "step": 7815 }, { "epoch": 0.45, "grad_norm": 0.6051859788689352, "learning_rate": 1.2120114745852989e-05, "loss": 0.4997, "step": 7816 }, { "epoch": 0.45, "grad_norm": 0.2764055452780025, "learning_rate": 1.2118296100349753e-05, "loss": 0.2308, "step": 7817 }, { "epoch": 0.45, "grad_norm": 0.25970750334520953, "learning_rate": 1.2116477381490013e-05, "loss": 0.1922, "step": 7818 }, { "epoch": 0.45, "grad_norm": 0.8639977301196575, "learning_rate": 1.2114658589336754e-05, "loss": 0.5905, "step": 7819 }, { "epoch": 0.45, "grad_norm": 0.4017731721362114, "learning_rate": 1.2112839723952958e-05, "loss": 0.2922, "step": 7820 }, { "epoch": 0.45, "grad_norm": 0.3604215580984441, "learning_rate": 1.2111020785401615e-05, "loss": 0.2731, "step": 7821 }, { "epoch": 0.45, "grad_norm": 0.6402061764469947, "learning_rate": 1.210920177374571e-05, "loss": 0.3718, "step": 7822 }, { "epoch": 0.45, "grad_norm": 0.6287622775741888, "learning_rate": 1.210738268904824e-05, "loss": 0.3071, "step": 7823 }, { "epoch": 0.45, "grad_norm": 0.23030235373629632, "learning_rate": 1.21055635313722e-05, "loss": 0.1565, "step": 7824 }, { "epoch": 0.45, "grad_norm": 0.36849606576333455, "learning_rate": 1.2103744300780586e-05, "loss": 0.3311, "step": 7825 }, { "epoch": 0.45, "grad_norm": 0.5539057972494684, "learning_rate": 1.2101924997336393e-05, "loss": 0.3156, "step": 7826 }, { "epoch": 0.45, "grad_norm": 0.39605459800732945, "learning_rate": 1.2100105621102631e-05, "loss": 0.3243, "step": 7827 }, { "epoch": 0.45, "grad_norm": 0.8298682413471936, "learning_rate": 1.20982861721423e-05, "loss": 0.462, "step": 7828 }, { "epoch": 0.45, "grad_norm": 0.38887716172679526, "learning_rate": 1.209646665051841e-05, "loss": 0.2785, "step": 7829 }, { "epoch": 0.45, "grad_norm": 0.22922781355910388, "learning_rate": 1.2094647056293969e-05, "loss": 0.2256, "step": 7830 }, { "epoch": 0.45, "grad_norm": 0.6853732992858654, "learning_rate": 1.2092827389531992e-05, "loss": 0.3065, "step": 7831 }, { "epoch": 0.45, "grad_norm": 0.5288262475482187, "learning_rate": 1.209100765029549e-05, "loss": 0.3367, "step": 7832 }, { "epoch": 0.45, "grad_norm": 0.3683993478374688, "learning_rate": 1.2089187838647484e-05, "loss": 0.2997, "step": 7833 }, { "epoch": 0.45, "grad_norm": 0.46618559440736795, "learning_rate": 1.2087367954650992e-05, "loss": 0.3075, "step": 7834 }, { "epoch": 0.45, "grad_norm": 0.3954069825574937, "learning_rate": 1.208554799836904e-05, "loss": 0.2891, "step": 7835 }, { "epoch": 0.45, "grad_norm": 0.2476792511905212, "learning_rate": 1.2083727969864652e-05, "loss": 0.2011, "step": 7836 }, { "epoch": 0.45, "grad_norm": 0.4296079969449888, "learning_rate": 1.208190786920085e-05, "loss": 0.2942, "step": 7837 }, { "epoch": 0.45, "grad_norm": 0.6835585045500028, "learning_rate": 1.208008769644067e-05, "loss": 0.3775, "step": 7838 }, { "epoch": 0.45, "grad_norm": 0.429431380987189, "learning_rate": 1.2078267451647141e-05, "loss": 0.319, "step": 7839 }, { "epoch": 0.45, "grad_norm": 0.9934331425210245, "learning_rate": 1.20764471348833e-05, "loss": 0.6671, "step": 7840 }, { "epoch": 0.45, "grad_norm": 0.35648383354079055, "learning_rate": 1.2074626746212183e-05, "loss": 0.2447, "step": 7841 }, { "epoch": 0.45, "grad_norm": 0.2711537520251157, "learning_rate": 1.2072806285696836e-05, "loss": 0.2366, "step": 7842 }, { "epoch": 0.45, "grad_norm": 0.5954785393208696, "learning_rate": 1.2070985753400293e-05, "loss": 0.3112, "step": 7843 }, { "epoch": 0.45, "grad_norm": 0.506182932788288, "learning_rate": 1.2069165149385603e-05, "loss": 0.1937, "step": 7844 }, { "epoch": 0.45, "grad_norm": 0.3082872471946073, "learning_rate": 1.2067344473715813e-05, "loss": 0.2891, "step": 7845 }, { "epoch": 0.45, "grad_norm": 1.3423903304220317, "learning_rate": 1.2065523726453971e-05, "loss": 0.7992, "step": 7846 }, { "epoch": 0.45, "grad_norm": 0.4917154874110692, "learning_rate": 1.206370290766313e-05, "loss": 0.1749, "step": 7847 }, { "epoch": 0.45, "grad_norm": 0.241763552867306, "learning_rate": 1.206188201740635e-05, "loss": 0.2102, "step": 7848 }, { "epoch": 0.45, "grad_norm": 0.3783400126203032, "learning_rate": 1.206006105574668e-05, "loss": 0.3344, "step": 7849 }, { "epoch": 0.45, "grad_norm": 0.73194324677599, "learning_rate": 1.205824002274719e-05, "loss": 0.4001, "step": 7850 }, { "epoch": 0.45, "grad_norm": 0.31024938969861154, "learning_rate": 1.2056418918470931e-05, "loss": 0.2304, "step": 7851 }, { "epoch": 0.45, "grad_norm": 0.9587907228244877, "learning_rate": 1.2054597742980975e-05, "loss": 0.6535, "step": 7852 }, { "epoch": 0.45, "grad_norm": 0.353162787883959, "learning_rate": 1.2052776496340389e-05, "loss": 0.2805, "step": 7853 }, { "epoch": 0.45, "grad_norm": 0.32028660740744386, "learning_rate": 1.205095517861224e-05, "loss": 0.2061, "step": 7854 }, { "epoch": 0.45, "grad_norm": 0.43666200782343506, "learning_rate": 1.2049133789859602e-05, "loss": 0.3048, "step": 7855 }, { "epoch": 0.45, "grad_norm": 0.364837631487842, "learning_rate": 1.2047312330145549e-05, "loss": 0.2357, "step": 7856 }, { "epoch": 0.45, "grad_norm": 0.2910287527496673, "learning_rate": 1.204549079953316e-05, "loss": 0.253, "step": 7857 }, { "epoch": 0.45, "grad_norm": 0.8730968420317119, "learning_rate": 1.2043669198085509e-05, "loss": 0.6054, "step": 7858 }, { "epoch": 0.45, "grad_norm": 1.4669922543739415, "learning_rate": 1.2041847525865681e-05, "loss": 0.7951, "step": 7859 }, { "epoch": 0.45, "grad_norm": 0.3773913635551674, "learning_rate": 1.2040025782936766e-05, "loss": 0.2398, "step": 7860 }, { "epoch": 0.45, "grad_norm": 0.2937030728988133, "learning_rate": 1.2038203969361841e-05, "loss": 0.2758, "step": 7861 }, { "epoch": 0.45, "grad_norm": 0.39829030321756786, "learning_rate": 1.2036382085204004e-05, "loss": 0.2639, "step": 7862 }, { "epoch": 0.45, "grad_norm": 0.35718100232276506, "learning_rate": 1.2034560130526341e-05, "loss": 0.2594, "step": 7863 }, { "epoch": 0.45, "grad_norm": 0.8819076750989475, "learning_rate": 1.2032738105391945e-05, "loss": 0.4058, "step": 7864 }, { "epoch": 0.45, "grad_norm": 0.37511149307403135, "learning_rate": 1.2030916009863921e-05, "loss": 0.3146, "step": 7865 }, { "epoch": 0.45, "grad_norm": 0.3427182179012295, "learning_rate": 1.2029093844005359e-05, "loss": 0.2962, "step": 7866 }, { "epoch": 0.45, "grad_norm": 0.6080154357699522, "learning_rate": 1.2027271607879368e-05, "loss": 0.362, "step": 7867 }, { "epoch": 0.45, "grad_norm": 0.24903988506822783, "learning_rate": 1.2025449301549046e-05, "loss": 0.1698, "step": 7868 }, { "epoch": 0.45, "grad_norm": 0.3080660901489215, "learning_rate": 1.2023626925077503e-05, "loss": 0.272, "step": 7869 }, { "epoch": 0.45, "grad_norm": 1.1014977729702664, "learning_rate": 1.2021804478527845e-05, "loss": 0.401, "step": 7870 }, { "epoch": 0.45, "grad_norm": 0.6438849569428409, "learning_rate": 1.2019981961963185e-05, "loss": 0.4141, "step": 7871 }, { "epoch": 0.45, "grad_norm": 0.3717529729312076, "learning_rate": 1.2018159375446636e-05, "loss": 0.2579, "step": 7872 }, { "epoch": 0.45, "grad_norm": 0.3212381811770286, "learning_rate": 1.2016336719041318e-05, "loss": 0.2701, "step": 7873 }, { "epoch": 0.45, "grad_norm": 0.38203566172353803, "learning_rate": 1.2014513992810344e-05, "loss": 0.2637, "step": 7874 }, { "epoch": 0.45, "grad_norm": 0.3642964971611716, "learning_rate": 1.2012691196816836e-05, "loss": 0.275, "step": 7875 }, { "epoch": 0.45, "grad_norm": 0.37088618583675204, "learning_rate": 1.2010868331123922e-05, "loss": 0.2343, "step": 7876 }, { "epoch": 0.45, "grad_norm": 0.40984124709798897, "learning_rate": 1.2009045395794723e-05, "loss": 0.283, "step": 7877 }, { "epoch": 0.45, "grad_norm": 0.31850183923540143, "learning_rate": 1.2007222390892365e-05, "loss": 0.2847, "step": 7878 }, { "epoch": 0.45, "grad_norm": 0.6904602282111612, "learning_rate": 1.2005399316479984e-05, "loss": 0.4745, "step": 7879 }, { "epoch": 0.45, "grad_norm": 0.28231886869155365, "learning_rate": 1.200357617262071e-05, "loss": 0.1993, "step": 7880 }, { "epoch": 0.45, "grad_norm": 0.2864075909628802, "learning_rate": 1.2001752959377681e-05, "loss": 0.2689, "step": 7881 }, { "epoch": 0.45, "grad_norm": 0.5250714104982417, "learning_rate": 1.1999929676814036e-05, "loss": 0.2763, "step": 7882 }, { "epoch": 0.45, "grad_norm": 0.6084203947302514, "learning_rate": 1.1998106324992906e-05, "loss": 0.3492, "step": 7883 }, { "epoch": 0.45, "grad_norm": 0.3261697352200898, "learning_rate": 1.1996282903977442e-05, "loss": 0.3021, "step": 7884 }, { "epoch": 0.45, "grad_norm": 0.3600944615199071, "learning_rate": 1.1994459413830785e-05, "loss": 0.3117, "step": 7885 }, { "epoch": 0.45, "grad_norm": 0.43975626688178493, "learning_rate": 1.1992635854616088e-05, "loss": 0.2537, "step": 7886 }, { "epoch": 0.45, "grad_norm": 0.3502728085639639, "learning_rate": 1.1990812226396496e-05, "loss": 0.3062, "step": 7887 }, { "epoch": 0.45, "grad_norm": 0.43356883571119836, "learning_rate": 1.1988988529235161e-05, "loss": 0.305, "step": 7888 }, { "epoch": 0.45, "grad_norm": 0.29485545554777076, "learning_rate": 1.198716476319524e-05, "loss": 0.2916, "step": 7889 }, { "epoch": 0.45, "grad_norm": 0.31064543092327446, "learning_rate": 1.198534092833989e-05, "loss": 0.2028, "step": 7890 }, { "epoch": 0.45, "grad_norm": 0.8897180222566816, "learning_rate": 1.1983517024732266e-05, "loss": 0.59, "step": 7891 }, { "epoch": 0.45, "grad_norm": 0.3955907200246761, "learning_rate": 1.1981693052435537e-05, "loss": 0.3412, "step": 7892 }, { "epoch": 0.45, "grad_norm": 0.29039852394862853, "learning_rate": 1.1979869011512859e-05, "loss": 0.2017, "step": 7893 }, { "epoch": 0.45, "grad_norm": 0.39928201842865546, "learning_rate": 1.1978044902027403e-05, "loss": 0.3085, "step": 7894 }, { "epoch": 0.45, "grad_norm": 0.4353633841389933, "learning_rate": 1.1976220724042336e-05, "loss": 0.2926, "step": 7895 }, { "epoch": 0.45, "grad_norm": 0.32887704414837243, "learning_rate": 1.1974396477620833e-05, "loss": 0.1961, "step": 7896 }, { "epoch": 0.45, "grad_norm": 0.34286785641908873, "learning_rate": 1.1972572162826061e-05, "loss": 0.314, "step": 7897 }, { "epoch": 0.45, "grad_norm": 1.3682061627663276, "learning_rate": 1.1970747779721203e-05, "loss": 0.7447, "step": 7898 }, { "epoch": 0.45, "grad_norm": 0.29070012715612625, "learning_rate": 1.1968923328369433e-05, "loss": 0.219, "step": 7899 }, { "epoch": 0.45, "grad_norm": 0.4741431492632248, "learning_rate": 1.1967098808833935e-05, "loss": 0.3583, "step": 7900 }, { "epoch": 0.45, "grad_norm": 0.298824920933953, "learning_rate": 1.196527422117789e-05, "loss": 0.2645, "step": 7901 }, { "epoch": 0.45, "grad_norm": 0.24902107345337846, "learning_rate": 1.1963449565464478e-05, "loss": 0.2042, "step": 7902 }, { "epoch": 0.45, "grad_norm": 1.0625977661145665, "learning_rate": 1.1961624841756896e-05, "loss": 0.3038, "step": 7903 }, { "epoch": 0.45, "grad_norm": 0.42317283623319885, "learning_rate": 1.1959800050118328e-05, "loss": 0.3523, "step": 7904 }, { "epoch": 0.45, "grad_norm": 0.3517066275803351, "learning_rate": 1.195797519061197e-05, "loss": 0.2577, "step": 7905 }, { "epoch": 0.45, "grad_norm": 0.7735868151817158, "learning_rate": 1.1956150263301014e-05, "loss": 0.3493, "step": 7906 }, { "epoch": 0.45, "grad_norm": 0.310163938160124, "learning_rate": 1.1954325268248662e-05, "loss": 0.1994, "step": 7907 }, { "epoch": 0.45, "grad_norm": 0.397983349336781, "learning_rate": 1.1952500205518107e-05, "loss": 0.241, "step": 7908 }, { "epoch": 0.45, "grad_norm": 0.3896181687123652, "learning_rate": 1.1950675075172557e-05, "loss": 0.2484, "step": 7909 }, { "epoch": 0.45, "grad_norm": 0.9368903933280383, "learning_rate": 1.1948849877275209e-05, "loss": 0.5781, "step": 7910 }, { "epoch": 0.45, "grad_norm": 0.39187076583274866, "learning_rate": 1.1947024611889276e-05, "loss": 0.2909, "step": 7911 }, { "epoch": 0.45, "grad_norm": 0.3288277529256562, "learning_rate": 1.1945199279077962e-05, "loss": 0.2785, "step": 7912 }, { "epoch": 0.45, "grad_norm": 0.5272823310712186, "learning_rate": 1.1943373878904482e-05, "loss": 0.2873, "step": 7913 }, { "epoch": 0.45, "grad_norm": 0.29573508428328504, "learning_rate": 1.194154841143205e-05, "loss": 0.1921, "step": 7914 }, { "epoch": 0.45, "grad_norm": 0.6374701531641034, "learning_rate": 1.1939722876723878e-05, "loss": 0.4224, "step": 7915 }, { "epoch": 0.45, "grad_norm": 0.3905831304108459, "learning_rate": 1.1937897274843184e-05, "loss": 0.2766, "step": 7916 }, { "epoch": 0.45, "grad_norm": 0.41233010441397044, "learning_rate": 1.1936071605853195e-05, "loss": 0.286, "step": 7917 }, { "epoch": 0.45, "grad_norm": 1.0236772511840988, "learning_rate": 1.1934245869817127e-05, "loss": 0.4651, "step": 7918 }, { "epoch": 0.45, "grad_norm": 0.27799086007368734, "learning_rate": 1.193242006679821e-05, "loss": 0.1277, "step": 7919 }, { "epoch": 0.46, "grad_norm": 0.2699895568941243, "learning_rate": 1.1930594196859667e-05, "loss": 0.2208, "step": 7920 }, { "epoch": 0.46, "grad_norm": 0.4589470961563092, "learning_rate": 1.1928768260064732e-05, "loss": 0.3091, "step": 7921 }, { "epoch": 0.46, "grad_norm": 1.4691715710811628, "learning_rate": 1.1926942256476632e-05, "loss": 0.3769, "step": 7922 }, { "epoch": 0.46, "grad_norm": 0.37149958982181935, "learning_rate": 1.1925116186158605e-05, "loss": 0.2761, "step": 7923 }, { "epoch": 0.46, "grad_norm": 0.5020463855050367, "learning_rate": 1.192329004917389e-05, "loss": 0.3984, "step": 7924 }, { "epoch": 0.46, "grad_norm": 0.31379760183411154, "learning_rate": 1.1921463845585722e-05, "loss": 0.2095, "step": 7925 }, { "epoch": 0.46, "grad_norm": 0.304322085714914, "learning_rate": 1.1919637575457343e-05, "loss": 0.1815, "step": 7926 }, { "epoch": 0.46, "grad_norm": 0.4280709827379945, "learning_rate": 1.1917811238852e-05, "loss": 0.3252, "step": 7927 }, { "epoch": 0.46, "grad_norm": 0.5484844773978607, "learning_rate": 1.1915984835832934e-05, "loss": 0.3202, "step": 7928 }, { "epoch": 0.46, "grad_norm": 0.49450017929156964, "learning_rate": 1.1914158366463392e-05, "loss": 0.2197, "step": 7929 }, { "epoch": 0.46, "grad_norm": 0.6972758363104524, "learning_rate": 1.1912331830806634e-05, "loss": 0.4078, "step": 7930 }, { "epoch": 0.46, "grad_norm": 1.163113774171106, "learning_rate": 1.1910505228925903e-05, "loss": 0.6485, "step": 7931 }, { "epoch": 0.46, "grad_norm": 0.23145347746277942, "learning_rate": 1.1908678560884462e-05, "loss": 0.1715, "step": 7932 }, { "epoch": 0.46, "grad_norm": 0.30815546200945876, "learning_rate": 1.190685182674556e-05, "loss": 0.2405, "step": 7933 }, { "epoch": 0.46, "grad_norm": 0.6610420498319607, "learning_rate": 1.1905025026572466e-05, "loss": 0.4497, "step": 7934 }, { "epoch": 0.46, "grad_norm": 0.3484729311489025, "learning_rate": 1.1903198160428433e-05, "loss": 0.2436, "step": 7935 }, { "epoch": 0.46, "grad_norm": 0.37479821138076064, "learning_rate": 1.190137122837673e-05, "loss": 0.3104, "step": 7936 }, { "epoch": 0.46, "grad_norm": 0.903404747222253, "learning_rate": 1.1899544230480623e-05, "loss": 0.5389, "step": 7937 }, { "epoch": 0.46, "grad_norm": 0.3793020472891658, "learning_rate": 1.1897717166803384e-05, "loss": 0.2626, "step": 7938 }, { "epoch": 0.46, "grad_norm": 0.21536537499616923, "learning_rate": 1.189589003740828e-05, "loss": 0.158, "step": 7939 }, { "epoch": 0.46, "grad_norm": 0.37013074965928505, "learning_rate": 1.1894062842358585e-05, "loss": 0.3346, "step": 7940 }, { "epoch": 0.46, "grad_norm": 0.4442486156485244, "learning_rate": 1.1892235581717575e-05, "loss": 0.2893, "step": 7941 }, { "epoch": 0.46, "grad_norm": 0.5788706585727145, "learning_rate": 1.1890408255548526e-05, "loss": 0.2968, "step": 7942 }, { "epoch": 0.46, "grad_norm": 1.205983665744992, "learning_rate": 1.1888580863914724e-05, "loss": 0.613, "step": 7943 }, { "epoch": 0.46, "grad_norm": 0.3300548857885787, "learning_rate": 1.188675340687945e-05, "loss": 0.2639, "step": 7944 }, { "epoch": 0.46, "grad_norm": 0.23304407517691592, "learning_rate": 1.188492588450598e-05, "loss": 0.1743, "step": 7945 }, { "epoch": 0.46, "grad_norm": 0.6386065237872193, "learning_rate": 1.1883098296857614e-05, "loss": 0.444, "step": 7946 }, { "epoch": 0.46, "grad_norm": 0.5258883988322739, "learning_rate": 1.188127064399763e-05, "loss": 0.2976, "step": 7947 }, { "epoch": 0.46, "grad_norm": 0.30515301220401475, "learning_rate": 1.1879442925989327e-05, "loss": 0.2492, "step": 7948 }, { "epoch": 0.46, "grad_norm": 0.9557336485183242, "learning_rate": 1.1877615142895995e-05, "loss": 0.5884, "step": 7949 }, { "epoch": 0.46, "grad_norm": 0.58178474444118, "learning_rate": 1.1875787294780932e-05, "loss": 0.324, "step": 7950 }, { "epoch": 0.46, "grad_norm": 0.4263233009317428, "learning_rate": 1.1873959381707437e-05, "loss": 0.323, "step": 7951 }, { "epoch": 0.46, "grad_norm": 0.2837105656508334, "learning_rate": 1.1872131403738807e-05, "loss": 0.2144, "step": 7952 }, { "epoch": 0.46, "grad_norm": 0.3091377428079506, "learning_rate": 1.1870303360938349e-05, "loss": 0.2136, "step": 7953 }, { "epoch": 0.46, "grad_norm": 0.38310899231449197, "learning_rate": 1.1868475253369362e-05, "loss": 0.313, "step": 7954 }, { "epoch": 0.46, "grad_norm": 0.9306600839745718, "learning_rate": 1.1866647081095162e-05, "loss": 0.2579, "step": 7955 }, { "epoch": 0.46, "grad_norm": 0.3050132731937429, "learning_rate": 1.186481884417905e-05, "loss": 0.2768, "step": 7956 }, { "epoch": 0.46, "grad_norm": 0.42166795211866515, "learning_rate": 1.1862990542684345e-05, "loss": 0.3322, "step": 7957 }, { "epoch": 0.46, "grad_norm": 0.2862765123153871, "learning_rate": 1.1861162176674354e-05, "loss": 0.1944, "step": 7958 }, { "epoch": 0.46, "grad_norm": 0.3983928765816261, "learning_rate": 1.1859333746212403e-05, "loss": 0.3066, "step": 7959 }, { "epoch": 0.46, "grad_norm": 0.37464310126796313, "learning_rate": 1.1857505251361801e-05, "loss": 0.3424, "step": 7960 }, { "epoch": 0.46, "grad_norm": 0.4032873786677103, "learning_rate": 1.1855676692185872e-05, "loss": 0.2819, "step": 7961 }, { "epoch": 0.46, "grad_norm": 0.4679357898041959, "learning_rate": 1.1853848068747938e-05, "loss": 0.3131, "step": 7962 }, { "epoch": 0.46, "grad_norm": 0.3412178021471056, "learning_rate": 1.1852019381111326e-05, "loss": 0.3276, "step": 7963 }, { "epoch": 0.46, "grad_norm": 0.2875821893013411, "learning_rate": 1.185019062933936e-05, "loss": 0.2642, "step": 7964 }, { "epoch": 0.46, "grad_norm": 0.4411680471167332, "learning_rate": 1.1848361813495376e-05, "loss": 0.2624, "step": 7965 }, { "epoch": 0.46, "grad_norm": 0.2880036099235008, "learning_rate": 1.18465329336427e-05, "loss": 0.2542, "step": 7966 }, { "epoch": 0.46, "grad_norm": 0.4021660522898735, "learning_rate": 1.1844703989844666e-05, "loss": 0.3471, "step": 7967 }, { "epoch": 0.46, "grad_norm": 0.33634227392857685, "learning_rate": 1.1842874982164616e-05, "loss": 0.2271, "step": 7968 }, { "epoch": 0.46, "grad_norm": 0.3303900188393209, "learning_rate": 1.1841045910665881e-05, "loss": 0.3177, "step": 7969 }, { "epoch": 0.46, "grad_norm": 0.9671616145587876, "learning_rate": 1.1839216775411808e-05, "loss": 0.609, "step": 7970 }, { "epoch": 0.46, "grad_norm": 0.35308979972001275, "learning_rate": 1.1837387576465735e-05, "loss": 0.2297, "step": 7971 }, { "epoch": 0.46, "grad_norm": 0.24389530242129417, "learning_rate": 1.183555831389101e-05, "loss": 0.2464, "step": 7972 }, { "epoch": 0.46, "grad_norm": 0.4290048907771581, "learning_rate": 1.183372898775098e-05, "loss": 0.2936, "step": 7973 }, { "epoch": 0.46, "grad_norm": 0.4132601964727188, "learning_rate": 1.1831899598108993e-05, "loss": 0.2794, "step": 7974 }, { "epoch": 0.46, "grad_norm": 0.4167507268336926, "learning_rate": 1.1830070145028404e-05, "loss": 0.3419, "step": 7975 }, { "epoch": 0.46, "grad_norm": 0.4073307537683485, "learning_rate": 1.1828240628572563e-05, "loss": 0.3289, "step": 7976 }, { "epoch": 0.46, "grad_norm": 0.7557884514763259, "learning_rate": 1.182641104880483e-05, "loss": 0.4952, "step": 7977 }, { "epoch": 0.46, "grad_norm": 0.26140284114058343, "learning_rate": 1.1824581405788558e-05, "loss": 0.1731, "step": 7978 }, { "epoch": 0.46, "grad_norm": 0.39551359172857126, "learning_rate": 1.1822751699587109e-05, "loss": 0.2648, "step": 7979 }, { "epoch": 0.46, "grad_norm": 0.3736897265914453, "learning_rate": 1.1820921930263851e-05, "loss": 0.2989, "step": 7980 }, { "epoch": 0.46, "grad_norm": 0.38382012833460544, "learning_rate": 1.1819092097882141e-05, "loss": 0.231, "step": 7981 }, { "epoch": 0.46, "grad_norm": 1.296301622420184, "learning_rate": 1.1817262202505353e-05, "loss": 0.4792, "step": 7982 }, { "epoch": 0.46, "grad_norm": 0.5060071372814816, "learning_rate": 1.1815432244196849e-05, "loss": 0.3495, "step": 7983 }, { "epoch": 0.46, "grad_norm": 0.285491659038382, "learning_rate": 1.181360222302001e-05, "loss": 0.2228, "step": 7984 }, { "epoch": 0.46, "grad_norm": 0.43293902333173934, "learning_rate": 1.18117721390382e-05, "loss": 0.2953, "step": 7985 }, { "epoch": 0.46, "grad_norm": 0.42922801670662475, "learning_rate": 1.1809941992314799e-05, "loss": 0.2395, "step": 7986 }, { "epoch": 0.46, "grad_norm": 0.33588685351110065, "learning_rate": 1.1808111782913188e-05, "loss": 0.2126, "step": 7987 }, { "epoch": 0.46, "grad_norm": 0.40812800461735654, "learning_rate": 1.180628151089674e-05, "loss": 0.3165, "step": 7988 }, { "epoch": 0.46, "grad_norm": 0.6953194615167572, "learning_rate": 1.1804451176328844e-05, "loss": 0.524, "step": 7989 }, { "epoch": 0.46, "grad_norm": 0.38056672727143354, "learning_rate": 1.1802620779272877e-05, "loss": 0.3416, "step": 7990 }, { "epoch": 0.46, "grad_norm": 0.4374708121864055, "learning_rate": 1.1800790319792234e-05, "loss": 0.3152, "step": 7991 }, { "epoch": 0.46, "grad_norm": 0.24034260281344746, "learning_rate": 1.1798959797950298e-05, "loss": 0.1912, "step": 7992 }, { "epoch": 0.46, "grad_norm": 0.3737299347380316, "learning_rate": 1.1797129213810462e-05, "loss": 0.3117, "step": 7993 }, { "epoch": 0.46, "grad_norm": 0.8087123591015803, "learning_rate": 1.179529856743612e-05, "loss": 0.3435, "step": 7994 }, { "epoch": 0.46, "grad_norm": 0.3312821317125005, "learning_rate": 1.1793467858890666e-05, "loss": 0.2908, "step": 7995 }, { "epoch": 0.46, "grad_norm": 0.37260236653991463, "learning_rate": 1.1791637088237493e-05, "loss": 0.2907, "step": 7996 }, { "epoch": 0.46, "grad_norm": 0.5993123611192235, "learning_rate": 1.1789806255540008e-05, "loss": 0.3494, "step": 7997 }, { "epoch": 0.46, "grad_norm": 0.23486965767951468, "learning_rate": 1.1787975360861607e-05, "loss": 0.1612, "step": 7998 }, { "epoch": 0.46, "grad_norm": 0.36941228758433226, "learning_rate": 1.1786144404265701e-05, "loss": 0.3084, "step": 7999 }, { "epoch": 0.46, "grad_norm": 0.38811622550655456, "learning_rate": 1.1784313385815685e-05, "loss": 0.2744, "step": 8000 }, { "epoch": 0.46, "grad_norm": 0.8777252991056873, "learning_rate": 1.1782482305574976e-05, "loss": 0.5783, "step": 8001 }, { "epoch": 0.46, "grad_norm": 0.33300708206449414, "learning_rate": 1.178065116360698e-05, "loss": 0.2683, "step": 8002 }, { "epoch": 0.46, "grad_norm": 0.3506847663910355, "learning_rate": 1.1778819959975114e-05, "loss": 0.3212, "step": 8003 }, { "epoch": 0.46, "grad_norm": 0.2039203231034237, "learning_rate": 1.1776988694742786e-05, "loss": 0.0902, "step": 8004 }, { "epoch": 0.46, "grad_norm": 0.32754667156190614, "learning_rate": 1.1775157367973417e-05, "loss": 0.2659, "step": 8005 }, { "epoch": 0.46, "grad_norm": 0.9840863478213498, "learning_rate": 1.1773325979730428e-05, "loss": 0.6268, "step": 8006 }, { "epoch": 0.46, "grad_norm": 0.4432248748342312, "learning_rate": 1.1771494530077233e-05, "loss": 0.3049, "step": 8007 }, { "epoch": 0.46, "grad_norm": 0.3207501681884594, "learning_rate": 1.1769663019077258e-05, "loss": 0.2882, "step": 8008 }, { "epoch": 0.46, "grad_norm": 0.8120058617367432, "learning_rate": 1.176783144679393e-05, "loss": 0.5619, "step": 8009 }, { "epoch": 0.46, "grad_norm": 0.20825762019102417, "learning_rate": 1.1765999813290674e-05, "loss": 0.0947, "step": 8010 }, { "epoch": 0.46, "grad_norm": 0.4053812780070775, "learning_rate": 1.1764168118630922e-05, "loss": 0.277, "step": 8011 }, { "epoch": 0.46, "grad_norm": 0.3988311894219567, "learning_rate": 1.1762336362878104e-05, "loss": 0.3246, "step": 8012 }, { "epoch": 0.46, "grad_norm": 0.6500249623410269, "learning_rate": 1.1760504546095653e-05, "loss": 0.4227, "step": 8013 }, { "epoch": 0.46, "grad_norm": 0.29550813912741236, "learning_rate": 1.1758672668347005e-05, "loss": 0.19, "step": 8014 }, { "epoch": 0.46, "grad_norm": 0.34503398766813, "learning_rate": 1.1756840729695598e-05, "loss": 0.342, "step": 8015 }, { "epoch": 0.46, "grad_norm": 0.28919837746058225, "learning_rate": 1.1755008730204873e-05, "loss": 0.2108, "step": 8016 }, { "epoch": 0.46, "grad_norm": 0.31556082209556957, "learning_rate": 1.1753176669938269e-05, "loss": 0.1865, "step": 8017 }, { "epoch": 0.46, "grad_norm": 0.8571833591537861, "learning_rate": 1.1751344548959233e-05, "loss": 0.5349, "step": 8018 }, { "epoch": 0.46, "grad_norm": 0.3899301745997571, "learning_rate": 1.174951236733121e-05, "loss": 0.3314, "step": 8019 }, { "epoch": 0.46, "grad_norm": 0.2910575101476483, "learning_rate": 1.1747680125117654e-05, "loss": 0.2382, "step": 8020 }, { "epoch": 0.46, "grad_norm": 0.9408216363736895, "learning_rate": 1.1745847822382004e-05, "loss": 0.6722, "step": 8021 }, { "epoch": 0.46, "grad_norm": 0.3182946462400599, "learning_rate": 1.174401545918772e-05, "loss": 0.2214, "step": 8022 }, { "epoch": 0.46, "grad_norm": 0.2663466323044907, "learning_rate": 1.1742183035598258e-05, "loss": 0.2274, "step": 8023 }, { "epoch": 0.46, "grad_norm": 0.49731607824725593, "learning_rate": 1.1740350551677073e-05, "loss": 0.3698, "step": 8024 }, { "epoch": 0.46, "grad_norm": 0.9450188765195661, "learning_rate": 1.1738518007487621e-05, "loss": 0.4426, "step": 8025 }, { "epoch": 0.46, "grad_norm": 0.31989191955177276, "learning_rate": 1.1736685403093367e-05, "loss": 0.2602, "step": 8026 }, { "epoch": 0.46, "grad_norm": 0.4274492080687954, "learning_rate": 1.1734852738557772e-05, "loss": 0.2698, "step": 8027 }, { "epoch": 0.46, "grad_norm": 0.32934834987845174, "learning_rate": 1.1733020013944301e-05, "loss": 0.2708, "step": 8028 }, { "epoch": 0.46, "grad_norm": 0.2622235332469892, "learning_rate": 1.1731187229316418e-05, "loss": 0.2088, "step": 8029 }, { "epoch": 0.46, "grad_norm": 1.6319701577728358, "learning_rate": 1.1729354384737602e-05, "loss": 0.3641, "step": 8030 }, { "epoch": 0.46, "grad_norm": 0.39856243611000813, "learning_rate": 1.1727521480271315e-05, "loss": 0.3438, "step": 8031 }, { "epoch": 0.46, "grad_norm": 0.3740268094501319, "learning_rate": 1.172568851598103e-05, "loss": 0.2601, "step": 8032 }, { "epoch": 0.46, "grad_norm": 0.49836553108072196, "learning_rate": 1.1723855491930232e-05, "loss": 0.3013, "step": 8033 }, { "epoch": 0.46, "grad_norm": 0.3562893968298132, "learning_rate": 1.1722022408182388e-05, "loss": 0.288, "step": 8034 }, { "epoch": 0.46, "grad_norm": 0.3640195155365308, "learning_rate": 1.1720189264800983e-05, "loss": 0.2428, "step": 8035 }, { "epoch": 0.46, "grad_norm": 0.706087521346157, "learning_rate": 1.1718356061849496e-05, "loss": 0.2559, "step": 8036 }, { "epoch": 0.46, "grad_norm": 1.2503475403720288, "learning_rate": 1.1716522799391417e-05, "loss": 0.5743, "step": 8037 }, { "epoch": 0.46, "grad_norm": 0.4138155307392456, "learning_rate": 1.1714689477490224e-05, "loss": 0.3201, "step": 8038 }, { "epoch": 0.46, "grad_norm": 0.3644446915489398, "learning_rate": 1.1712856096209411e-05, "loss": 0.292, "step": 8039 }, { "epoch": 0.46, "grad_norm": 0.7135645309500714, "learning_rate": 1.1711022655612461e-05, "loss": 0.2894, "step": 8040 }, { "epoch": 0.46, "grad_norm": 0.28773646997277114, "learning_rate": 1.1709189155762872e-05, "loss": 0.2276, "step": 8041 }, { "epoch": 0.46, "grad_norm": 0.8314554766196716, "learning_rate": 1.1707355596724135e-05, "loss": 0.4062, "step": 8042 }, { "epoch": 0.46, "grad_norm": 0.34693532859102244, "learning_rate": 1.1705521978559748e-05, "loss": 0.215, "step": 8043 }, { "epoch": 0.46, "grad_norm": 0.3694538811668248, "learning_rate": 1.1703688301333211e-05, "loss": 0.2752, "step": 8044 }, { "epoch": 0.46, "grad_norm": 1.349991276656821, "learning_rate": 1.1701854565108019e-05, "loss": 0.7906, "step": 8045 }, { "epoch": 0.46, "grad_norm": 0.4942671800850725, "learning_rate": 1.1700020769947675e-05, "loss": 0.3041, "step": 8046 }, { "epoch": 0.46, "grad_norm": 0.28376124490302723, "learning_rate": 1.1698186915915689e-05, "loss": 0.258, "step": 8047 }, { "epoch": 0.46, "grad_norm": 0.5974320339927423, "learning_rate": 1.1696353003075558e-05, "loss": 0.4175, "step": 8048 }, { "epoch": 0.46, "grad_norm": 0.25196002764368464, "learning_rate": 1.16945190314908e-05, "loss": 0.1402, "step": 8049 }, { "epoch": 0.46, "grad_norm": 0.5777653906969942, "learning_rate": 1.1692685001224918e-05, "loss": 0.335, "step": 8050 }, { "epoch": 0.46, "grad_norm": 0.45541649563077186, "learning_rate": 1.1690850912341427e-05, "loss": 0.2842, "step": 8051 }, { "epoch": 0.46, "grad_norm": 0.9243817653395691, "learning_rate": 1.1689016764903841e-05, "loss": 0.4757, "step": 8052 }, { "epoch": 0.46, "grad_norm": 0.4541786271595083, "learning_rate": 1.168718255897568e-05, "loss": 0.2181, "step": 8053 }, { "epoch": 0.46, "grad_norm": 0.3405747715638682, "learning_rate": 1.1685348294620457e-05, "loss": 0.2851, "step": 8054 }, { "epoch": 0.46, "grad_norm": 0.31051270814141485, "learning_rate": 1.1683513971901697e-05, "loss": 0.2689, "step": 8055 }, { "epoch": 0.46, "grad_norm": 0.3512555459275891, "learning_rate": 1.168167959088292e-05, "loss": 0.2056, "step": 8056 }, { "epoch": 0.46, "grad_norm": 0.5055296842698704, "learning_rate": 1.1679845151627648e-05, "loss": 0.3748, "step": 8057 }, { "epoch": 0.46, "grad_norm": 0.7602648728429585, "learning_rate": 1.1678010654199417e-05, "loss": 0.4803, "step": 8058 }, { "epoch": 0.46, "grad_norm": 0.41065482545930426, "learning_rate": 1.1676176098661742e-05, "loss": 0.2424, "step": 8059 }, { "epoch": 0.46, "grad_norm": 0.47665228757374284, "learning_rate": 1.1674341485078167e-05, "loss": 0.3772, "step": 8060 }, { "epoch": 0.46, "grad_norm": 0.4010258063879982, "learning_rate": 1.1672506813512217e-05, "loss": 0.3169, "step": 8061 }, { "epoch": 0.46, "grad_norm": 0.3090499814345548, "learning_rate": 1.1670672084027425e-05, "loss": 0.2161, "step": 8062 }, { "epoch": 0.46, "grad_norm": 0.32229611953247095, "learning_rate": 1.1668837296687332e-05, "loss": 0.2474, "step": 8063 }, { "epoch": 0.46, "grad_norm": 0.723866973557168, "learning_rate": 1.1667002451555476e-05, "loss": 0.4764, "step": 8064 }, { "epoch": 0.46, "grad_norm": 0.38413806263337497, "learning_rate": 1.1665167548695395e-05, "loss": 0.2978, "step": 8065 }, { "epoch": 0.46, "grad_norm": 0.443860016995288, "learning_rate": 1.1663332588170637e-05, "loss": 0.2366, "step": 8066 }, { "epoch": 0.46, "grad_norm": 0.34763785680591497, "learning_rate": 1.1661497570044737e-05, "loss": 0.3424, "step": 8067 }, { "epoch": 0.46, "grad_norm": 0.8125107733973791, "learning_rate": 1.1659662494381255e-05, "loss": 0.4655, "step": 8068 }, { "epoch": 0.46, "grad_norm": 0.23319806165726573, "learning_rate": 1.1657827361243725e-05, "loss": 0.1526, "step": 8069 }, { "epoch": 0.46, "grad_norm": 0.38548295708245645, "learning_rate": 1.1655992170695709e-05, "loss": 0.3449, "step": 8070 }, { "epoch": 0.46, "grad_norm": 0.6652817103965797, "learning_rate": 1.1654156922800757e-05, "loss": 0.4387, "step": 8071 }, { "epoch": 0.46, "grad_norm": 0.40606341336922763, "learning_rate": 1.1652321617622418e-05, "loss": 0.2247, "step": 8072 }, { "epoch": 0.46, "grad_norm": 1.2211351878797165, "learning_rate": 1.1650486255224254e-05, "loss": 0.7863, "step": 8073 }, { "epoch": 0.46, "grad_norm": 0.2775457316895789, "learning_rate": 1.1648650835669821e-05, "loss": 0.1874, "step": 8074 }, { "epoch": 0.46, "grad_norm": 0.23958314554752064, "learning_rate": 1.1646815359022683e-05, "loss": 0.2295, "step": 8075 }, { "epoch": 0.46, "grad_norm": 0.5807486031966581, "learning_rate": 1.1644979825346397e-05, "loss": 0.4551, "step": 8076 }, { "epoch": 0.46, "grad_norm": 0.5713200590272652, "learning_rate": 1.1643144234704531e-05, "loss": 0.3726, "step": 8077 }, { "epoch": 0.46, "grad_norm": 0.4064945951489773, "learning_rate": 1.1641308587160654e-05, "loss": 0.3286, "step": 8078 }, { "epoch": 0.46, "grad_norm": 0.3521036176468557, "learning_rate": 1.1639472882778328e-05, "loss": 0.2556, "step": 8079 }, { "epoch": 0.46, "grad_norm": 0.563728121201371, "learning_rate": 1.1637637121621126e-05, "loss": 0.3345, "step": 8080 }, { "epoch": 0.46, "grad_norm": 0.38309492806397877, "learning_rate": 1.1635801303752622e-05, "loss": 0.3351, "step": 8081 }, { "epoch": 0.46, "grad_norm": 0.3018626635818792, "learning_rate": 1.1633965429236389e-05, "loss": 0.2433, "step": 8082 }, { "epoch": 0.46, "grad_norm": 0.27316504593409413, "learning_rate": 1.1632129498136005e-05, "loss": 0.2163, "step": 8083 }, { "epoch": 0.46, "grad_norm": 0.5375141536539753, "learning_rate": 1.1630293510515043e-05, "loss": 0.3382, "step": 8084 }, { "epoch": 0.46, "grad_norm": 0.9905869281952622, "learning_rate": 1.1628457466437091e-05, "loss": 0.5418, "step": 8085 }, { "epoch": 0.46, "grad_norm": 0.43572613249512804, "learning_rate": 1.1626621365965725e-05, "loss": 0.2925, "step": 8086 }, { "epoch": 0.46, "grad_norm": 0.29459633182802186, "learning_rate": 1.162478520916453e-05, "loss": 0.267, "step": 8087 }, { "epoch": 0.46, "grad_norm": 0.35473443242054237, "learning_rate": 1.1622948996097095e-05, "loss": 0.2073, "step": 8088 }, { "epoch": 0.46, "grad_norm": 0.6581069323859764, "learning_rate": 1.1621112726827004e-05, "loss": 0.4389, "step": 8089 }, { "epoch": 0.46, "grad_norm": 0.36928184336908865, "learning_rate": 1.161927640141785e-05, "loss": 0.306, "step": 8090 }, { "epoch": 0.46, "grad_norm": 0.3721782809106861, "learning_rate": 1.1617440019933226e-05, "loss": 0.3317, "step": 8091 }, { "epoch": 0.46, "grad_norm": 0.4796932591821559, "learning_rate": 1.1615603582436723e-05, "loss": 0.2115, "step": 8092 }, { "epoch": 0.46, "grad_norm": 0.3904149073219585, "learning_rate": 1.1613767088991935e-05, "loss": 0.3234, "step": 8093 }, { "epoch": 0.47, "grad_norm": 0.36772963613224796, "learning_rate": 1.1611930539662463e-05, "loss": 0.2398, "step": 8094 }, { "epoch": 0.47, "grad_norm": 0.23813307940661996, "learning_rate": 1.1610093934511908e-05, "loss": 0.1875, "step": 8095 }, { "epoch": 0.47, "grad_norm": 0.4083627297184952, "learning_rate": 1.1608257273603864e-05, "loss": 0.3244, "step": 8096 }, { "epoch": 0.47, "grad_norm": 1.3618253703159187, "learning_rate": 1.1606420557001945e-05, "loss": 0.7872, "step": 8097 }, { "epoch": 0.47, "grad_norm": 0.282233577421707, "learning_rate": 1.160458378476975e-05, "loss": 0.2427, "step": 8098 }, { "epoch": 0.47, "grad_norm": 0.3926305634323061, "learning_rate": 1.1602746956970886e-05, "loss": 0.3108, "step": 8099 }, { "epoch": 0.47, "grad_norm": 0.8545494870759214, "learning_rate": 1.1600910073668964e-05, "loss": 0.5018, "step": 8100 }, { "epoch": 0.47, "grad_norm": 0.2593570986614807, "learning_rate": 1.1599073134927597e-05, "loss": 0.1651, "step": 8101 }, { "epoch": 0.47, "grad_norm": 0.34342933446892515, "learning_rate": 1.1597236140810394e-05, "loss": 0.217, "step": 8102 }, { "epoch": 0.47, "grad_norm": 0.3786898079471971, "learning_rate": 1.1595399091380972e-05, "loss": 0.3624, "step": 8103 }, { "epoch": 0.47, "grad_norm": 0.661006336761479, "learning_rate": 1.159356198670295e-05, "loss": 0.4046, "step": 8104 }, { "epoch": 0.47, "grad_norm": 0.33321983617894935, "learning_rate": 1.1591724826839943e-05, "loss": 0.2469, "step": 8105 }, { "epoch": 0.47, "grad_norm": 0.3513617843380767, "learning_rate": 1.1589887611855574e-05, "loss": 0.3298, "step": 8106 }, { "epoch": 0.47, "grad_norm": 0.2512576604987261, "learning_rate": 1.1588050341813466e-05, "loss": 0.1484, "step": 8107 }, { "epoch": 0.47, "grad_norm": 0.2924953972160349, "learning_rate": 1.1586213016777244e-05, "loss": 0.2089, "step": 8108 }, { "epoch": 0.47, "grad_norm": 0.7746893833833915, "learning_rate": 1.158437563681053e-05, "loss": 0.4779, "step": 8109 }, { "epoch": 0.47, "grad_norm": 0.3601070577324971, "learning_rate": 1.1582538201976958e-05, "loss": 0.3198, "step": 8110 }, { "epoch": 0.47, "grad_norm": 0.27886876485206846, "learning_rate": 1.1580700712340159e-05, "loss": 0.2167, "step": 8111 }, { "epoch": 0.47, "grad_norm": 0.8927279441564796, "learning_rate": 1.1578863167963761e-05, "loss": 0.58, "step": 8112 }, { "epoch": 0.47, "grad_norm": 0.25440229767955796, "learning_rate": 1.1577025568911395e-05, "loss": 0.1733, "step": 8113 }, { "epoch": 0.47, "grad_norm": 0.28882003772125364, "learning_rate": 1.1575187915246706e-05, "loss": 0.262, "step": 8114 }, { "epoch": 0.47, "grad_norm": 0.47526159258141804, "learning_rate": 1.1573350207033324e-05, "loss": 0.3023, "step": 8115 }, { "epoch": 0.47, "grad_norm": 0.7238904758177274, "learning_rate": 1.1571512444334894e-05, "loss": 0.4253, "step": 8116 }, { "epoch": 0.47, "grad_norm": 0.3652372666944264, "learning_rate": 1.1569674627215057e-05, "loss": 0.2946, "step": 8117 }, { "epoch": 0.47, "grad_norm": 0.3398527200099904, "learning_rate": 1.1567836755737452e-05, "loss": 0.2535, "step": 8118 }, { "epoch": 0.47, "grad_norm": 0.25940567321868835, "learning_rate": 1.156599882996573e-05, "loss": 0.1895, "step": 8119 }, { "epoch": 0.47, "grad_norm": 0.41358852547742536, "learning_rate": 1.1564160849963533e-05, "loss": 0.2941, "step": 8120 }, { "epoch": 0.47, "grad_norm": 0.6890446909789003, "learning_rate": 1.1562322815794516e-05, "loss": 0.3646, "step": 8121 }, { "epoch": 0.47, "grad_norm": 0.39128113875580217, "learning_rate": 1.1560484727522323e-05, "loss": 0.3401, "step": 8122 }, { "epoch": 0.47, "grad_norm": 0.3501552171817226, "learning_rate": 1.1558646585210615e-05, "loss": 0.2678, "step": 8123 }, { "epoch": 0.47, "grad_norm": 0.8310288981112209, "learning_rate": 1.1556808388923043e-05, "loss": 0.3499, "step": 8124 }, { "epoch": 0.47, "grad_norm": 0.32194066075690564, "learning_rate": 1.155497013872326e-05, "loss": 0.2148, "step": 8125 }, { "epoch": 0.47, "grad_norm": 0.3097208230540672, "learning_rate": 1.1553131834674929e-05, "loss": 0.2589, "step": 8126 }, { "epoch": 0.47, "grad_norm": 0.5418465886898102, "learning_rate": 1.1551293476841712e-05, "loss": 0.3533, "step": 8127 }, { "epoch": 0.47, "grad_norm": 0.9933329488460488, "learning_rate": 1.1549455065287267e-05, "loss": 0.5601, "step": 8128 }, { "epoch": 0.47, "grad_norm": 0.33251164007014095, "learning_rate": 1.1547616600075262e-05, "loss": 0.2702, "step": 8129 }, { "epoch": 0.47, "grad_norm": 0.48264580396110907, "learning_rate": 1.1545778081269356e-05, "loss": 0.3385, "step": 8130 }, { "epoch": 0.47, "grad_norm": 0.26561831151431287, "learning_rate": 1.1543939508933226e-05, "loss": 0.1731, "step": 8131 }, { "epoch": 0.47, "grad_norm": 0.38986731532730423, "learning_rate": 1.1542100883130534e-05, "loss": 0.2678, "step": 8132 }, { "epoch": 0.47, "grad_norm": 0.8541658768168554, "learning_rate": 1.1540262203924957e-05, "loss": 0.4704, "step": 8133 }, { "epoch": 0.47, "grad_norm": 0.37102575891994377, "learning_rate": 1.1538423471380162e-05, "loss": 0.2628, "step": 8134 }, { "epoch": 0.47, "grad_norm": 0.4084559161761699, "learning_rate": 1.1536584685559833e-05, "loss": 0.2971, "step": 8135 }, { "epoch": 0.47, "grad_norm": 1.090623885447246, "learning_rate": 1.1534745846527643e-05, "loss": 0.5709, "step": 8136 }, { "epoch": 0.47, "grad_norm": 0.5691462758797022, "learning_rate": 1.1532906954347265e-05, "loss": 0.2653, "step": 8137 }, { "epoch": 0.47, "grad_norm": 0.3616131380469849, "learning_rate": 1.1531068009082388e-05, "loss": 0.2771, "step": 8138 }, { "epoch": 0.47, "grad_norm": 0.35732383039321136, "learning_rate": 1.1529229010796693e-05, "loss": 0.2351, "step": 8139 }, { "epoch": 0.47, "grad_norm": 1.2853466987734752, "learning_rate": 1.152738995955386e-05, "loss": 0.8433, "step": 8140 }, { "epoch": 0.47, "grad_norm": 0.2917511597672303, "learning_rate": 1.1525550855417579e-05, "loss": 0.2001, "step": 8141 }, { "epoch": 0.47, "grad_norm": 0.41454996938168326, "learning_rate": 1.152371169845154e-05, "loss": 0.322, "step": 8142 }, { "epoch": 0.47, "grad_norm": 0.7889175233489341, "learning_rate": 1.152187248871943e-05, "loss": 0.4598, "step": 8143 }, { "epoch": 0.47, "grad_norm": 0.39376249011371567, "learning_rate": 1.1520033226284942e-05, "loss": 0.2423, "step": 8144 }, { "epoch": 0.47, "grad_norm": 0.42249860389750094, "learning_rate": 1.1518193911211763e-05, "loss": 0.3088, "step": 8145 }, { "epoch": 0.47, "grad_norm": 0.3832570482048342, "learning_rate": 1.1516354543563603e-05, "loss": 0.3023, "step": 8146 }, { "epoch": 0.47, "grad_norm": 0.27028430753402555, "learning_rate": 1.1514515123404144e-05, "loss": 0.1744, "step": 8147 }, { "epoch": 0.47, "grad_norm": 0.9814466672326471, "learning_rate": 1.1512675650797093e-05, "loss": 0.5945, "step": 8148 }, { "epoch": 0.47, "grad_norm": 0.5236065950219225, "learning_rate": 1.1510836125806148e-05, "loss": 0.3632, "step": 8149 }, { "epoch": 0.47, "grad_norm": 0.2567177557850539, "learning_rate": 1.1508996548495015e-05, "loss": 0.2272, "step": 8150 }, { "epoch": 0.47, "grad_norm": 0.48664804419956864, "learning_rate": 1.1507156918927396e-05, "loss": 0.2979, "step": 8151 }, { "epoch": 0.47, "grad_norm": 0.4403235305691835, "learning_rate": 1.1505317237166997e-05, "loss": 0.342, "step": 8152 }, { "epoch": 0.47, "grad_norm": 0.4455779532667353, "learning_rate": 1.1503477503277526e-05, "loss": 0.304, "step": 8153 }, { "epoch": 0.47, "grad_norm": 0.3233973459258135, "learning_rate": 1.1501637717322695e-05, "loss": 0.2459, "step": 8154 }, { "epoch": 0.47, "grad_norm": 0.6969251101798604, "learning_rate": 1.1499797879366214e-05, "loss": 0.4513, "step": 8155 }, { "epoch": 0.47, "grad_norm": 0.4039155950028945, "learning_rate": 1.1497957989471798e-05, "loss": 0.2988, "step": 8156 }, { "epoch": 0.47, "grad_norm": 0.5008602945483102, "learning_rate": 1.1496118047703162e-05, "loss": 0.2993, "step": 8157 }, { "epoch": 0.47, "grad_norm": 0.29040380927590353, "learning_rate": 1.1494278054124019e-05, "loss": 0.2655, "step": 8158 }, { "epoch": 0.47, "grad_norm": 0.39909885373138854, "learning_rate": 1.1492438008798093e-05, "loss": 0.264, "step": 8159 }, { "epoch": 0.47, "grad_norm": 0.4596488600608055, "learning_rate": 1.1490597911789104e-05, "loss": 0.2365, "step": 8160 }, { "epoch": 0.47, "grad_norm": 0.5358031775312709, "learning_rate": 1.1488757763160771e-05, "loss": 0.3619, "step": 8161 }, { "epoch": 0.47, "grad_norm": 0.289051100704963, "learning_rate": 1.148691756297682e-05, "loss": 0.2643, "step": 8162 }, { "epoch": 0.47, "grad_norm": 0.46616362480908935, "learning_rate": 1.1485077311300983e-05, "loss": 0.2833, "step": 8163 }, { "epoch": 0.47, "grad_norm": 0.43853018014503176, "learning_rate": 1.1483237008196978e-05, "loss": 0.3051, "step": 8164 }, { "epoch": 0.47, "grad_norm": 0.27216529737512757, "learning_rate": 1.1481396653728542e-05, "loss": 0.2282, "step": 8165 }, { "epoch": 0.47, "grad_norm": 0.3954700014749519, "learning_rate": 1.14795562479594e-05, "loss": 0.3425, "step": 8166 }, { "epoch": 0.47, "grad_norm": 0.639378666232857, "learning_rate": 1.1477715790953293e-05, "loss": 0.3417, "step": 8167 }, { "epoch": 0.47, "grad_norm": 0.3230244326253486, "learning_rate": 1.1475875282773948e-05, "loss": 0.2642, "step": 8168 }, { "epoch": 0.47, "grad_norm": 1.0780614602536482, "learning_rate": 1.1474034723485108e-05, "loss": 0.6736, "step": 8169 }, { "epoch": 0.47, "grad_norm": 0.299867058226962, "learning_rate": 1.1472194113150507e-05, "loss": 0.2598, "step": 8170 }, { "epoch": 0.47, "grad_norm": 0.2996405103989052, "learning_rate": 1.1470353451833889e-05, "loss": 0.2208, "step": 8171 }, { "epoch": 0.47, "grad_norm": 0.62197502201416, "learning_rate": 1.1468512739598991e-05, "loss": 0.3585, "step": 8172 }, { "epoch": 0.47, "grad_norm": 0.28867732255449474, "learning_rate": 1.1466671976509564e-05, "loss": 0.2173, "step": 8173 }, { "epoch": 0.47, "grad_norm": 0.613279875657845, "learning_rate": 1.1464831162629346e-05, "loss": 0.3476, "step": 8174 }, { "epoch": 0.47, "grad_norm": 0.3803647956262368, "learning_rate": 1.146299029802209e-05, "loss": 0.3359, "step": 8175 }, { "epoch": 0.47, "grad_norm": 1.0181850562146808, "learning_rate": 1.1461149382751544e-05, "loss": 0.6673, "step": 8176 }, { "epoch": 0.47, "grad_norm": 0.7603144529101236, "learning_rate": 1.1459308416881454e-05, "loss": 0.2102, "step": 8177 }, { "epoch": 0.47, "grad_norm": 0.30168260451446044, "learning_rate": 1.145746740047558e-05, "loss": 0.2995, "step": 8178 }, { "epoch": 0.47, "grad_norm": 0.3321460276508209, "learning_rate": 1.1455626333597672e-05, "loss": 0.2085, "step": 8179 }, { "epoch": 0.47, "grad_norm": 0.34455866728257245, "learning_rate": 1.1453785216311484e-05, "loss": 0.2043, "step": 8180 }, { "epoch": 0.47, "grad_norm": 0.3685518357521418, "learning_rate": 1.1451944048680779e-05, "loss": 0.3475, "step": 8181 }, { "epoch": 0.47, "grad_norm": 0.4831933139625984, "learning_rate": 1.1450102830769314e-05, "loss": 0.3685, "step": 8182 }, { "epoch": 0.47, "grad_norm": 0.3531600940862889, "learning_rate": 1.1448261562640848e-05, "loss": 0.2355, "step": 8183 }, { "epoch": 0.47, "grad_norm": 0.439610405654752, "learning_rate": 1.1446420244359148e-05, "loss": 0.3821, "step": 8184 }, { "epoch": 0.47, "grad_norm": 0.26939221681386455, "learning_rate": 1.1444578875987978e-05, "loss": 0.2208, "step": 8185 }, { "epoch": 0.47, "grad_norm": 0.27650453372703404, "learning_rate": 1.1442737457591102e-05, "loss": 0.2099, "step": 8186 }, { "epoch": 0.47, "grad_norm": 1.3629929252444442, "learning_rate": 1.144089598923229e-05, "loss": 0.7209, "step": 8187 }, { "epoch": 0.47, "grad_norm": 0.6214268287048519, "learning_rate": 1.1439054470975312e-05, "loss": 0.506, "step": 8188 }, { "epoch": 0.47, "grad_norm": 0.3119158539908204, "learning_rate": 1.143721290288394e-05, "loss": 0.2878, "step": 8189 }, { "epoch": 0.47, "grad_norm": 0.3318970742581742, "learning_rate": 1.1435371285021948e-05, "loss": 0.262, "step": 8190 }, { "epoch": 0.47, "grad_norm": 0.2773017863670644, "learning_rate": 1.1433529617453108e-05, "loss": 0.1932, "step": 8191 }, { "epoch": 0.47, "grad_norm": 0.8760997978912737, "learning_rate": 1.1431687900241201e-05, "loss": 0.4663, "step": 8192 }, { "epoch": 0.47, "grad_norm": 0.3367584318928723, "learning_rate": 1.142984613345e-05, "loss": 0.2346, "step": 8193 }, { "epoch": 0.47, "grad_norm": 0.4579062321186222, "learning_rate": 1.1428004317143293e-05, "loss": 0.3515, "step": 8194 }, { "epoch": 0.47, "grad_norm": 0.6150169647018473, "learning_rate": 1.1426162451384857e-05, "loss": 0.3871, "step": 8195 }, { "epoch": 0.47, "grad_norm": 0.3216805849443447, "learning_rate": 1.1424320536238478e-05, "loss": 0.2268, "step": 8196 }, { "epoch": 0.47, "grad_norm": 0.25155788365831927, "learning_rate": 1.1422478571767937e-05, "loss": 0.206, "step": 8197 }, { "epoch": 0.47, "grad_norm": 0.34325115785592575, "learning_rate": 1.1420636558037026e-05, "loss": 0.2802, "step": 8198 }, { "epoch": 0.47, "grad_norm": 0.3558996480535589, "learning_rate": 1.1418794495109528e-05, "loss": 0.2655, "step": 8199 }, { "epoch": 0.47, "grad_norm": 0.7393643667231588, "learning_rate": 1.1416952383049244e-05, "loss": 0.4348, "step": 8200 }, { "epoch": 0.47, "grad_norm": 0.3308899478444616, "learning_rate": 1.1415110221919958e-05, "loss": 0.2869, "step": 8201 }, { "epoch": 0.47, "grad_norm": 0.4330551569974486, "learning_rate": 1.1413268011785463e-05, "loss": 0.3299, "step": 8202 }, { "epoch": 0.47, "grad_norm": 0.16466774797088232, "learning_rate": 1.1411425752709561e-05, "loss": 0.0933, "step": 8203 }, { "epoch": 0.47, "grad_norm": 0.5658043381450948, "learning_rate": 1.1409583444756043e-05, "loss": 0.3813, "step": 8204 }, { "epoch": 0.47, "grad_norm": 0.39733186363612516, "learning_rate": 1.1407741087988713e-05, "loss": 0.311, "step": 8205 }, { "epoch": 0.47, "grad_norm": 0.5293473050356182, "learning_rate": 1.1405898682471367e-05, "loss": 0.3052, "step": 8206 }, { "epoch": 0.47, "grad_norm": 0.3863878784564842, "learning_rate": 1.1404056228267813e-05, "loss": 0.3073, "step": 8207 }, { "epoch": 0.47, "grad_norm": 0.5500023359847825, "learning_rate": 1.140221372544185e-05, "loss": 0.3302, "step": 8208 }, { "epoch": 0.47, "grad_norm": 0.26832299732179304, "learning_rate": 1.1400371174057287e-05, "loss": 0.2081, "step": 8209 }, { "epoch": 0.47, "grad_norm": 0.3489058273162678, "learning_rate": 1.139852857417793e-05, "loss": 0.2296, "step": 8210 }, { "epoch": 0.47, "grad_norm": 0.35245208526279237, "learning_rate": 1.139668592586759e-05, "loss": 0.2888, "step": 8211 }, { "epoch": 0.47, "grad_norm": 0.6348054017588185, "learning_rate": 1.1394843229190076e-05, "loss": 0.3672, "step": 8212 }, { "epoch": 0.47, "grad_norm": 0.3565702444562063, "learning_rate": 1.1393000484209202e-05, "loss": 0.2839, "step": 8213 }, { "epoch": 0.47, "grad_norm": 0.35892910995714983, "learning_rate": 1.139115769098878e-05, "loss": 0.2883, "step": 8214 }, { "epoch": 0.47, "grad_norm": 0.4652118043069549, "learning_rate": 1.1389314849592626e-05, "loss": 0.2623, "step": 8215 }, { "epoch": 0.47, "grad_norm": 0.31534488534631117, "learning_rate": 1.1387471960084557e-05, "loss": 0.1603, "step": 8216 }, { "epoch": 0.47, "grad_norm": 0.3043071870765772, "learning_rate": 1.1385629022528397e-05, "loss": 0.2729, "step": 8217 }, { "epoch": 0.47, "grad_norm": 0.791220490302, "learning_rate": 1.1383786036987963e-05, "loss": 0.433, "step": 8218 }, { "epoch": 0.47, "grad_norm": 0.3690113603730788, "learning_rate": 1.1381943003527077e-05, "loss": 0.1939, "step": 8219 }, { "epoch": 0.47, "grad_norm": 0.3844076124295093, "learning_rate": 1.1380099922209564e-05, "loss": 0.3177, "step": 8220 }, { "epoch": 0.47, "grad_norm": 0.3525552292700522, "learning_rate": 1.1378256793099251e-05, "loss": 0.3186, "step": 8221 }, { "epoch": 0.47, "grad_norm": 0.2606543369638635, "learning_rate": 1.1376413616259965e-05, "loss": 0.1299, "step": 8222 }, { "epoch": 0.47, "grad_norm": 0.45789735825217354, "learning_rate": 1.1374570391755532e-05, "loss": 0.3216, "step": 8223 }, { "epoch": 0.47, "grad_norm": 0.7984697434570868, "learning_rate": 1.137272711964979e-05, "loss": 0.4888, "step": 8224 }, { "epoch": 0.47, "grad_norm": 0.31774601929632706, "learning_rate": 1.1370883800006562e-05, "loss": 0.2252, "step": 8225 }, { "epoch": 0.47, "grad_norm": 0.42364764216329703, "learning_rate": 1.1369040432889691e-05, "loss": 0.3239, "step": 8226 }, { "epoch": 0.47, "grad_norm": 0.43654263319431763, "learning_rate": 1.1367197018363005e-05, "loss": 0.2615, "step": 8227 }, { "epoch": 0.47, "grad_norm": 0.5638770827996676, "learning_rate": 1.1365353556490348e-05, "loss": 0.3034, "step": 8228 }, { "epoch": 0.47, "grad_norm": 0.26140699917356136, "learning_rate": 1.1363510047335553e-05, "loss": 0.2156, "step": 8229 }, { "epoch": 0.47, "grad_norm": 0.8208422504947541, "learning_rate": 1.1361666490962468e-05, "loss": 0.4557, "step": 8230 }, { "epoch": 0.47, "grad_norm": 0.9822644350984603, "learning_rate": 1.1359822887434927e-05, "loss": 0.7397, "step": 8231 }, { "epoch": 0.47, "grad_norm": 0.3314402613856236, "learning_rate": 1.1357979236816781e-05, "loss": 0.2076, "step": 8232 }, { "epoch": 0.47, "grad_norm": 0.4487027140708006, "learning_rate": 1.135613553917187e-05, "loss": 0.3122, "step": 8233 }, { "epoch": 0.47, "grad_norm": 0.9682417335085425, "learning_rate": 1.1354291794564045e-05, "loss": 0.4434, "step": 8234 }, { "epoch": 0.47, "grad_norm": 0.2746870381544497, "learning_rate": 1.1352448003057153e-05, "loss": 0.1845, "step": 8235 }, { "epoch": 0.47, "grad_norm": 0.3943326196154475, "learning_rate": 1.1350604164715044e-05, "loss": 0.237, "step": 8236 }, { "epoch": 0.47, "grad_norm": 0.3423516469836019, "learning_rate": 1.1348760279601572e-05, "loss": 0.3115, "step": 8237 }, { "epoch": 0.47, "grad_norm": 0.3458819009956223, "learning_rate": 1.134691634778059e-05, "loss": 0.219, "step": 8238 }, { "epoch": 0.47, "grad_norm": 0.8749680292286744, "learning_rate": 1.1345072369315951e-05, "loss": 0.4917, "step": 8239 }, { "epoch": 0.47, "grad_norm": 0.444178078876986, "learning_rate": 1.1343228344271515e-05, "loss": 0.352, "step": 8240 }, { "epoch": 0.47, "grad_norm": 0.7944684734767636, "learning_rate": 1.1341384272711138e-05, "loss": 0.2822, "step": 8241 }, { "epoch": 0.47, "grad_norm": 0.28858447543694066, "learning_rate": 1.1339540154698682e-05, "loss": 0.1913, "step": 8242 }, { "epoch": 0.47, "grad_norm": 0.33636234607029175, "learning_rate": 1.133769599029801e-05, "loss": 0.2435, "step": 8243 }, { "epoch": 0.47, "grad_norm": 0.37425119175237026, "learning_rate": 1.1335851779572979e-05, "loss": 0.2879, "step": 8244 }, { "epoch": 0.47, "grad_norm": 0.3420784443581969, "learning_rate": 1.1334007522587462e-05, "loss": 0.2764, "step": 8245 }, { "epoch": 0.47, "grad_norm": 0.626268923474981, "learning_rate": 1.1332163219405318e-05, "loss": 0.4184, "step": 8246 }, { "epoch": 0.47, "grad_norm": 0.33327889942284983, "learning_rate": 1.1330318870090427e-05, "loss": 0.2542, "step": 8247 }, { "epoch": 0.47, "grad_norm": 0.3209884215108911, "learning_rate": 1.1328474474706643e-05, "loss": 0.224, "step": 8248 }, { "epoch": 0.47, "grad_norm": 0.38396143689121437, "learning_rate": 1.132663003331785e-05, "loss": 0.2768, "step": 8249 }, { "epoch": 0.47, "grad_norm": 0.34285888950900517, "learning_rate": 1.1324785545987911e-05, "loss": 0.2586, "step": 8250 }, { "epoch": 0.47, "grad_norm": 0.7016183186742044, "learning_rate": 1.1322941012780707e-05, "loss": 0.3433, "step": 8251 }, { "epoch": 0.47, "grad_norm": 0.4224569861018966, "learning_rate": 1.1321096433760116e-05, "loss": 0.3503, "step": 8252 }, { "epoch": 0.47, "grad_norm": 0.2995614695059995, "learning_rate": 1.1319251808990009e-05, "loss": 0.2572, "step": 8253 }, { "epoch": 0.47, "grad_norm": 1.2672865639780329, "learning_rate": 1.1317407138534268e-05, "loss": 0.714, "step": 8254 }, { "epoch": 0.47, "grad_norm": 0.20313439742018613, "learning_rate": 1.1315562422456776e-05, "loss": 0.1281, "step": 8255 }, { "epoch": 0.47, "grad_norm": 0.37114924445860703, "learning_rate": 1.1313717660821413e-05, "loss": 0.2819, "step": 8256 }, { "epoch": 0.47, "grad_norm": 0.4130806594357466, "learning_rate": 1.1311872853692065e-05, "loss": 0.3215, "step": 8257 }, { "epoch": 0.47, "grad_norm": 0.7315074555515565, "learning_rate": 1.1310028001132615e-05, "loss": 0.2889, "step": 8258 }, { "epoch": 0.47, "grad_norm": 0.3518162924006683, "learning_rate": 1.1308183103206956e-05, "loss": 0.2678, "step": 8259 }, { "epoch": 0.47, "grad_norm": 1.263822710183851, "learning_rate": 1.1306338159978968e-05, "loss": 0.8082, "step": 8260 }, { "epoch": 0.47, "grad_norm": 0.2290140963302349, "learning_rate": 1.1304493171512548e-05, "loss": 0.1771, "step": 8261 }, { "epoch": 0.47, "grad_norm": 0.3609070041446921, "learning_rate": 1.1302648137871584e-05, "loss": 0.2591, "step": 8262 }, { "epoch": 0.47, "grad_norm": 0.7500955160282625, "learning_rate": 1.1300803059119969e-05, "loss": 0.4461, "step": 8263 }, { "epoch": 0.47, "grad_norm": 0.47247522252554397, "learning_rate": 1.1298957935321604e-05, "loss": 0.4008, "step": 8264 }, { "epoch": 0.47, "grad_norm": 0.29187071697633193, "learning_rate": 1.129711276654038e-05, "loss": 0.2102, "step": 8265 }, { "epoch": 0.47, "grad_norm": 0.9689601512300138, "learning_rate": 1.1295267552840198e-05, "loss": 0.7327, "step": 8266 }, { "epoch": 0.47, "grad_norm": 0.39459373406513565, "learning_rate": 1.1293422294284955e-05, "loss": 0.255, "step": 8267 }, { "epoch": 0.48, "grad_norm": 0.2841527542011416, "learning_rate": 1.1291576990938556e-05, "loss": 0.234, "step": 8268 }, { "epoch": 0.48, "grad_norm": 0.26846990348811833, "learning_rate": 1.1289731642864896e-05, "loss": 0.2712, "step": 8269 }, { "epoch": 0.48, "grad_norm": 1.2160805062145421, "learning_rate": 1.1287886250127888e-05, "loss": 0.7619, "step": 8270 }, { "epoch": 0.48, "grad_norm": 0.3180662479564137, "learning_rate": 1.1286040812791431e-05, "loss": 0.2088, "step": 8271 }, { "epoch": 0.48, "grad_norm": 1.2780419499498312, "learning_rate": 1.1284195330919443e-05, "loss": 0.763, "step": 8272 }, { "epoch": 0.48, "grad_norm": 0.34751151421346865, "learning_rate": 1.128234980457582e-05, "loss": 0.3257, "step": 8273 }, { "epoch": 0.48, "grad_norm": 0.29037446054088023, "learning_rate": 1.1280504233824481e-05, "loss": 0.2382, "step": 8274 }, { "epoch": 0.48, "grad_norm": 0.400642610476773, "learning_rate": 1.1278658618729334e-05, "loss": 0.2889, "step": 8275 }, { "epoch": 0.48, "grad_norm": 0.28690418079254904, "learning_rate": 1.1276812959354295e-05, "loss": 0.2466, "step": 8276 }, { "epoch": 0.48, "grad_norm": 0.3976401918567556, "learning_rate": 1.1274967255763278e-05, "loss": 0.2747, "step": 8277 }, { "epoch": 0.48, "grad_norm": 0.4454863521161837, "learning_rate": 1.1273121508020202e-05, "loss": 0.2982, "step": 8278 }, { "epoch": 0.48, "grad_norm": 0.5729809893476822, "learning_rate": 1.1271275716188978e-05, "loss": 0.4256, "step": 8279 }, { "epoch": 0.48, "grad_norm": 0.410761763093016, "learning_rate": 1.1269429880333533e-05, "loss": 0.2653, "step": 8280 }, { "epoch": 0.48, "grad_norm": 0.2517747570353783, "learning_rate": 1.1267584000517788e-05, "loss": 0.2289, "step": 8281 }, { "epoch": 0.48, "grad_norm": 0.4175701025964965, "learning_rate": 1.1265738076805663e-05, "loss": 0.2465, "step": 8282 }, { "epoch": 0.48, "grad_norm": 0.4040133712318601, "learning_rate": 1.1263892109261081e-05, "loss": 0.2884, "step": 8283 }, { "epoch": 0.48, "grad_norm": 0.3315685666860372, "learning_rate": 1.126204609794797e-05, "loss": 0.2601, "step": 8284 }, { "epoch": 0.48, "grad_norm": 0.6738959505588366, "learning_rate": 1.1260200042930257e-05, "loss": 0.4439, "step": 8285 }, { "epoch": 0.48, "grad_norm": 0.3713996287944544, "learning_rate": 1.125835394427187e-05, "loss": 0.3126, "step": 8286 }, { "epoch": 0.48, "grad_norm": 0.26116662030586907, "learning_rate": 1.1256507802036742e-05, "loss": 0.1798, "step": 8287 }, { "epoch": 0.48, "grad_norm": 0.2923099580818043, "learning_rate": 1.12546616162888e-05, "loss": 0.241, "step": 8288 }, { "epoch": 0.48, "grad_norm": 0.4591105534812718, "learning_rate": 1.1252815387091984e-05, "loss": 0.293, "step": 8289 }, { "epoch": 0.48, "grad_norm": 0.5414758905754559, "learning_rate": 1.1250969114510221e-05, "loss": 0.4012, "step": 8290 }, { "epoch": 0.48, "grad_norm": 0.8942911584626979, "learning_rate": 1.1249122798607454e-05, "loss": 0.3676, "step": 8291 }, { "epoch": 0.48, "grad_norm": 0.35940592339450533, "learning_rate": 1.1247276439447616e-05, "loss": 0.2682, "step": 8292 }, { "epoch": 0.48, "grad_norm": 0.3883606131416445, "learning_rate": 1.124543003709465e-05, "loss": 0.3171, "step": 8293 }, { "epoch": 0.48, "grad_norm": 0.2069420790155224, "learning_rate": 1.1243583591612495e-05, "loss": 0.1109, "step": 8294 }, { "epoch": 0.48, "grad_norm": 0.3893463873924719, "learning_rate": 1.1241737103065096e-05, "loss": 0.2684, "step": 8295 }, { "epoch": 0.48, "grad_norm": 0.44638528446469994, "learning_rate": 1.1239890571516389e-05, "loss": 0.3573, "step": 8296 }, { "epoch": 0.48, "grad_norm": 0.9358041698450383, "learning_rate": 1.123804399703033e-05, "loss": 0.3285, "step": 8297 }, { "epoch": 0.48, "grad_norm": 0.6012185686365062, "learning_rate": 1.1236197379670861e-05, "loss": 0.3471, "step": 8298 }, { "epoch": 0.48, "grad_norm": 0.36799061504790626, "learning_rate": 1.1234350719501927e-05, "loss": 0.3477, "step": 8299 }, { "epoch": 0.48, "grad_norm": 0.22412798408524393, "learning_rate": 1.1232504016587482e-05, "loss": 0.1747, "step": 8300 }, { "epoch": 0.48, "grad_norm": 0.4149487453115047, "learning_rate": 1.1230657270991476e-05, "loss": 0.304, "step": 8301 }, { "epoch": 0.48, "grad_norm": 0.5459475622031438, "learning_rate": 1.1228810482777859e-05, "loss": 0.3991, "step": 8302 }, { "epoch": 0.48, "grad_norm": 0.9562637175288264, "learning_rate": 1.1226963652010592e-05, "loss": 0.5005, "step": 8303 }, { "epoch": 0.48, "grad_norm": 0.27131366109251476, "learning_rate": 1.1225116778753622e-05, "loss": 0.2124, "step": 8304 }, { "epoch": 0.48, "grad_norm": 0.37477020048094534, "learning_rate": 1.1223269863070913e-05, "loss": 0.3101, "step": 8305 }, { "epoch": 0.48, "grad_norm": 0.37990887770967674, "learning_rate": 1.1221422905026424e-05, "loss": 0.1822, "step": 8306 }, { "epoch": 0.48, "grad_norm": 0.4007847446197605, "learning_rate": 1.1219575904684109e-05, "loss": 0.2274, "step": 8307 }, { "epoch": 0.48, "grad_norm": 0.3859141568843464, "learning_rate": 1.1217728862107932e-05, "loss": 0.3088, "step": 8308 }, { "epoch": 0.48, "grad_norm": 0.5814554045581455, "learning_rate": 1.1215881777361858e-05, "loss": 0.3877, "step": 8309 }, { "epoch": 0.48, "grad_norm": 0.31317402052160326, "learning_rate": 1.1214034650509853e-05, "loss": 0.2069, "step": 8310 }, { "epoch": 0.48, "grad_norm": 1.3864302814986327, "learning_rate": 1.1212187481615875e-05, "loss": 0.6831, "step": 8311 }, { "epoch": 0.48, "grad_norm": 0.24158373982842526, "learning_rate": 1.1210340270743903e-05, "loss": 0.2158, "step": 8312 }, { "epoch": 0.48, "grad_norm": 0.38673341680171747, "learning_rate": 1.1208493017957893e-05, "loss": 0.2194, "step": 8313 }, { "epoch": 0.48, "grad_norm": 0.3480815756689843, "learning_rate": 1.1206645723321825e-05, "loss": 0.3104, "step": 8314 }, { "epoch": 0.48, "grad_norm": 1.142989272685945, "learning_rate": 1.1204798386899669e-05, "loss": 0.8243, "step": 8315 }, { "epoch": 0.48, "grad_norm": 0.334355108496965, "learning_rate": 1.1202951008755395e-05, "loss": 0.2723, "step": 8316 }, { "epoch": 0.48, "grad_norm": 0.342405895746166, "learning_rate": 1.1201103588952979e-05, "loss": 0.2292, "step": 8317 }, { "epoch": 0.48, "grad_norm": 0.35075106621619223, "learning_rate": 1.11992561275564e-05, "loss": 0.2068, "step": 8318 }, { "epoch": 0.48, "grad_norm": 0.5953512589583299, "learning_rate": 1.1197408624629626e-05, "loss": 0.4117, "step": 8319 }, { "epoch": 0.48, "grad_norm": 0.27076852878493896, "learning_rate": 1.119556108023665e-05, "loss": 0.2509, "step": 8320 }, { "epoch": 0.48, "grad_norm": 1.2379250749740365, "learning_rate": 1.119371349444144e-05, "loss": 0.8403, "step": 8321 }, { "epoch": 0.48, "grad_norm": 0.6147005884618181, "learning_rate": 1.1191865867307987e-05, "loss": 0.4054, "step": 8322 }, { "epoch": 0.48, "grad_norm": 0.3673949040504266, "learning_rate": 1.1190018198900267e-05, "loss": 0.2359, "step": 8323 }, { "epoch": 0.48, "grad_norm": 0.2579786089458528, "learning_rate": 1.118817048928227e-05, "loss": 0.2306, "step": 8324 }, { "epoch": 0.48, "grad_norm": 0.5153506800417209, "learning_rate": 1.1186322738517983e-05, "loss": 0.3424, "step": 8325 }, { "epoch": 0.48, "grad_norm": 0.2880843628827029, "learning_rate": 1.1184474946671384e-05, "loss": 0.2012, "step": 8326 }, { "epoch": 0.48, "grad_norm": 1.1330484123308262, "learning_rate": 1.1182627113806475e-05, "loss": 0.7822, "step": 8327 }, { "epoch": 0.48, "grad_norm": 0.29700312651214944, "learning_rate": 1.1180779239987233e-05, "loss": 0.2688, "step": 8328 }, { "epoch": 0.48, "grad_norm": 0.3926248997185353, "learning_rate": 1.1178931325277662e-05, "loss": 0.3018, "step": 8329 }, { "epoch": 0.48, "grad_norm": 0.4880065387650843, "learning_rate": 1.1177083369741749e-05, "loss": 0.3457, "step": 8330 }, { "epoch": 0.48, "grad_norm": 0.4622472597385853, "learning_rate": 1.117523537344349e-05, "loss": 0.3208, "step": 8331 }, { "epoch": 0.48, "grad_norm": 0.26773122651731995, "learning_rate": 1.1173387336446879e-05, "loss": 0.246, "step": 8332 }, { "epoch": 0.48, "grad_norm": 0.27469813985436253, "learning_rate": 1.1171539258815916e-05, "loss": 0.2141, "step": 8333 }, { "epoch": 0.48, "grad_norm": 0.6729771240210307, "learning_rate": 1.1169691140614597e-05, "loss": 0.4272, "step": 8334 }, { "epoch": 0.48, "grad_norm": 0.40650217240748127, "learning_rate": 1.1167842981906927e-05, "loss": 0.3098, "step": 8335 }, { "epoch": 0.48, "grad_norm": 0.33788023906972464, "learning_rate": 1.1165994782756902e-05, "loss": 0.2863, "step": 8336 }, { "epoch": 0.48, "grad_norm": 0.8965463163501143, "learning_rate": 1.1164146543228529e-05, "loss": 0.4183, "step": 8337 }, { "epoch": 0.48, "grad_norm": 0.37608376944922794, "learning_rate": 1.116229826338581e-05, "loss": 0.2967, "step": 8338 }, { "epoch": 0.48, "grad_norm": 1.1709144692691702, "learning_rate": 1.1160449943292754e-05, "loss": 0.7408, "step": 8339 }, { "epoch": 0.48, "grad_norm": 0.20606643679960182, "learning_rate": 1.1158601583013365e-05, "loss": 0.1803, "step": 8340 }, { "epoch": 0.48, "grad_norm": 0.3825272882807248, "learning_rate": 1.1156753182611655e-05, "loss": 0.2924, "step": 8341 }, { "epoch": 0.48, "grad_norm": 0.757439344922993, "learning_rate": 1.1154904742151628e-05, "loss": 0.408, "step": 8342 }, { "epoch": 0.48, "grad_norm": 0.47983847004271224, "learning_rate": 1.1153056261697303e-05, "loss": 0.2154, "step": 8343 }, { "epoch": 0.48, "grad_norm": 0.31408770009618836, "learning_rate": 1.1151207741312688e-05, "loss": 0.2766, "step": 8344 }, { "epoch": 0.48, "grad_norm": 1.1777256065853905, "learning_rate": 1.11493591810618e-05, "loss": 0.8841, "step": 8345 }, { "epoch": 0.48, "grad_norm": 0.14795722092036293, "learning_rate": 1.1147510581008654e-05, "loss": 0.0727, "step": 8346 }, { "epoch": 0.48, "grad_norm": 0.44407525535508613, "learning_rate": 1.114566194121726e-05, "loss": 0.3331, "step": 8347 }, { "epoch": 0.48, "grad_norm": 0.4429171903471908, "learning_rate": 1.1143813261751648e-05, "loss": 0.3368, "step": 8348 }, { "epoch": 0.48, "grad_norm": 0.5326441778644128, "learning_rate": 1.1141964542675831e-05, "loss": 0.2415, "step": 8349 }, { "epoch": 0.48, "grad_norm": 0.35629373156998423, "learning_rate": 1.1140115784053828e-05, "loss": 0.2704, "step": 8350 }, { "epoch": 0.48, "grad_norm": 0.473832740368251, "learning_rate": 1.1138266985949668e-05, "loss": 0.3667, "step": 8351 }, { "epoch": 0.48, "grad_norm": 0.24716345117047317, "learning_rate": 1.113641814842737e-05, "loss": 0.1892, "step": 8352 }, { "epoch": 0.48, "grad_norm": 0.37177419017363733, "learning_rate": 1.1134569271550959e-05, "loss": 0.2241, "step": 8353 }, { "epoch": 0.48, "grad_norm": 0.6795655987417271, "learning_rate": 1.1132720355384466e-05, "loss": 0.433, "step": 8354 }, { "epoch": 0.48, "grad_norm": 0.4546558880720396, "learning_rate": 1.1130871399991912e-05, "loss": 0.3476, "step": 8355 }, { "epoch": 0.48, "grad_norm": 0.2735908648714051, "learning_rate": 1.1129022405437333e-05, "loss": 0.2299, "step": 8356 }, { "epoch": 0.48, "grad_norm": 1.1504777351766975, "learning_rate": 1.1127173371784755e-05, "loss": 0.7199, "step": 8357 }, { "epoch": 0.48, "grad_norm": 0.320853590136227, "learning_rate": 1.112532429909821e-05, "loss": 0.22, "step": 8358 }, { "epoch": 0.48, "grad_norm": 0.2922420371940266, "learning_rate": 1.1123475187441735e-05, "loss": 0.2365, "step": 8359 }, { "epoch": 0.48, "grad_norm": 0.4621379502333083, "learning_rate": 1.1121626036879362e-05, "loss": 0.3615, "step": 8360 }, { "epoch": 0.48, "grad_norm": 0.7091643725104544, "learning_rate": 1.1119776847475128e-05, "loss": 0.3798, "step": 8361 }, { "epoch": 0.48, "grad_norm": 0.32042337282411815, "learning_rate": 1.1117927619293072e-05, "loss": 0.2344, "step": 8362 }, { "epoch": 0.48, "grad_norm": 0.46225350017426375, "learning_rate": 1.1116078352397226e-05, "loss": 0.3815, "step": 8363 }, { "epoch": 0.48, "grad_norm": 0.44794763153718986, "learning_rate": 1.1114229046851639e-05, "loss": 0.3283, "step": 8364 }, { "epoch": 0.48, "grad_norm": 0.35863928811423473, "learning_rate": 1.1112379702720346e-05, "loss": 0.2993, "step": 8365 }, { "epoch": 0.48, "grad_norm": 0.2997559968892406, "learning_rate": 1.1110530320067395e-05, "loss": 0.1826, "step": 8366 }, { "epoch": 0.48, "grad_norm": 0.31177442430328983, "learning_rate": 1.110868089895682e-05, "loss": 0.2818, "step": 8367 }, { "epoch": 0.48, "grad_norm": 0.34426675271911766, "learning_rate": 1.1106831439452678e-05, "loss": 0.2813, "step": 8368 }, { "epoch": 0.48, "grad_norm": 1.159122972842692, "learning_rate": 1.1104981941619008e-05, "loss": 0.5404, "step": 8369 }, { "epoch": 0.48, "grad_norm": 0.6308084552291687, "learning_rate": 1.1103132405519866e-05, "loss": 0.4178, "step": 8370 }, { "epoch": 0.48, "grad_norm": 0.3508909818733613, "learning_rate": 1.1101282831219292e-05, "loss": 0.2844, "step": 8371 }, { "epoch": 0.48, "grad_norm": 0.24925963458866746, "learning_rate": 1.1099433218781342e-05, "loss": 0.1824, "step": 8372 }, { "epoch": 0.48, "grad_norm": 1.0146895770631508, "learning_rate": 1.1097583568270068e-05, "loss": 0.5618, "step": 8373 }, { "epoch": 0.48, "grad_norm": 0.3402111836733542, "learning_rate": 1.1095733879749517e-05, "loss": 0.2561, "step": 8374 }, { "epoch": 0.48, "grad_norm": 0.44394453768906555, "learning_rate": 1.1093884153283755e-05, "loss": 0.2666, "step": 8375 }, { "epoch": 0.48, "grad_norm": 0.5030724555789422, "learning_rate": 1.1092034388936827e-05, "loss": 0.3637, "step": 8376 }, { "epoch": 0.48, "grad_norm": 0.3053604077245766, "learning_rate": 1.1090184586772798e-05, "loss": 0.2541, "step": 8377 }, { "epoch": 0.48, "grad_norm": 0.26072680670778176, "learning_rate": 1.1088334746855724e-05, "loss": 0.1699, "step": 8378 }, { "epoch": 0.48, "grad_norm": 0.33213697117973734, "learning_rate": 1.1086484869249664e-05, "loss": 0.2501, "step": 8379 }, { "epoch": 0.48, "grad_norm": 0.3411697209711852, "learning_rate": 1.1084634954018679e-05, "loss": 0.2749, "step": 8380 }, { "epoch": 0.48, "grad_norm": 0.7937308936897097, "learning_rate": 1.1082785001226833e-05, "loss": 0.527, "step": 8381 }, { "epoch": 0.48, "grad_norm": 0.5427995751624755, "learning_rate": 1.108093501093819e-05, "loss": 0.2829, "step": 8382 }, { "epoch": 0.48, "grad_norm": 0.37423001686239854, "learning_rate": 1.1079084983216812e-05, "loss": 0.294, "step": 8383 }, { "epoch": 0.48, "grad_norm": 0.2610551272973318, "learning_rate": 1.107723491812677e-05, "loss": 0.2371, "step": 8384 }, { "epoch": 0.48, "grad_norm": 0.2848863442961672, "learning_rate": 1.1075384815732126e-05, "loss": 0.1655, "step": 8385 }, { "epoch": 0.48, "grad_norm": 0.36620040221470535, "learning_rate": 1.1073534676096953e-05, "loss": 0.3269, "step": 8386 }, { "epoch": 0.48, "grad_norm": 0.40895567597138205, "learning_rate": 1.107168449928532e-05, "loss": 0.3493, "step": 8387 }, { "epoch": 0.48, "grad_norm": 0.9694638436873771, "learning_rate": 1.1069834285361299e-05, "loss": 0.3581, "step": 8388 }, { "epoch": 0.48, "grad_norm": 0.3307306363709223, "learning_rate": 1.1067984034388963e-05, "loss": 0.2843, "step": 8389 }, { "epoch": 0.48, "grad_norm": 0.2628285063571431, "learning_rate": 1.1066133746432388e-05, "loss": 0.1729, "step": 8390 }, { "epoch": 0.48, "grad_norm": 0.39208782700167216, "learning_rate": 1.1064283421555643e-05, "loss": 0.2981, "step": 8391 }, { "epoch": 0.48, "grad_norm": 0.32337473433052144, "learning_rate": 1.1062433059822813e-05, "loss": 0.2115, "step": 8392 }, { "epoch": 0.48, "grad_norm": 0.9751414436431275, "learning_rate": 1.106058266129797e-05, "loss": 0.4711, "step": 8393 }, { "epoch": 0.48, "grad_norm": 0.917279257161967, "learning_rate": 1.1058732226045195e-05, "loss": 0.504, "step": 8394 }, { "epoch": 0.48, "grad_norm": 0.27816877474586016, "learning_rate": 1.1056881754128568e-05, "loss": 0.2211, "step": 8395 }, { "epoch": 0.48, "grad_norm": 0.4602594483651638, "learning_rate": 1.1055031245612172e-05, "loss": 0.3818, "step": 8396 }, { "epoch": 0.48, "grad_norm": 0.2664228010575755, "learning_rate": 1.1053180700560086e-05, "loss": 0.1863, "step": 8397 }, { "epoch": 0.48, "grad_norm": 0.36439137476338745, "learning_rate": 1.1051330119036404e-05, "loss": 0.2241, "step": 8398 }, { "epoch": 0.48, "grad_norm": 0.638885939720972, "learning_rate": 1.1049479501105202e-05, "loss": 0.3925, "step": 8399 }, { "epoch": 0.48, "grad_norm": 0.4978465949232517, "learning_rate": 1.1047628846830571e-05, "loss": 0.3931, "step": 8400 }, { "epoch": 0.48, "grad_norm": 0.33079743434334197, "learning_rate": 1.1045778156276596e-05, "loss": 0.1901, "step": 8401 }, { "epoch": 0.48, "grad_norm": 0.3101247786660534, "learning_rate": 1.104392742950737e-05, "loss": 0.2588, "step": 8402 }, { "epoch": 0.48, "grad_norm": 0.2805944667132338, "learning_rate": 1.104207666658698e-05, "loss": 0.2896, "step": 8403 }, { "epoch": 0.48, "grad_norm": 0.3730808898739765, "learning_rate": 1.1040225867579522e-05, "loss": 0.2604, "step": 8404 }, { "epoch": 0.48, "grad_norm": 0.5112786993131957, "learning_rate": 1.1038375032549085e-05, "loss": 0.33, "step": 8405 }, { "epoch": 0.48, "grad_norm": 0.9322563183427266, "learning_rate": 1.1036524161559767e-05, "loss": 0.5786, "step": 8406 }, { "epoch": 0.48, "grad_norm": 0.355904824217904, "learning_rate": 1.103467325467566e-05, "loss": 0.2834, "step": 8407 }, { "epoch": 0.48, "grad_norm": 0.2743782333531795, "learning_rate": 1.1032822311960866e-05, "loss": 0.2112, "step": 8408 }, { "epoch": 0.48, "grad_norm": 0.4259052635545149, "learning_rate": 1.1030971333479477e-05, "loss": 0.3138, "step": 8409 }, { "epoch": 0.48, "grad_norm": 0.37998773225911436, "learning_rate": 1.1029120319295597e-05, "loss": 0.3443, "step": 8410 }, { "epoch": 0.48, "grad_norm": 0.34994216114806836, "learning_rate": 1.1027269269473324e-05, "loss": 0.2692, "step": 8411 }, { "epoch": 0.48, "grad_norm": 1.0286726329274982, "learning_rate": 1.102541818407676e-05, "loss": 0.6472, "step": 8412 }, { "epoch": 0.48, "grad_norm": 0.39969223773471535, "learning_rate": 1.1023567063170008e-05, "loss": 0.3095, "step": 8413 }, { "epoch": 0.48, "grad_norm": 0.4792364386637519, "learning_rate": 1.1021715906817172e-05, "loss": 0.2102, "step": 8414 }, { "epoch": 0.48, "grad_norm": 0.24998225105266594, "learning_rate": 1.101986471508236e-05, "loss": 0.2621, "step": 8415 }, { "epoch": 0.48, "grad_norm": 0.3296303617196106, "learning_rate": 1.1018013488029675e-05, "loss": 0.272, "step": 8416 }, { "epoch": 0.48, "grad_norm": 0.831873250616347, "learning_rate": 1.1016162225723227e-05, "loss": 0.5823, "step": 8417 }, { "epoch": 0.48, "grad_norm": 0.3007402335020122, "learning_rate": 1.1014310928227125e-05, "loss": 0.2131, "step": 8418 }, { "epoch": 0.48, "grad_norm": 0.364565455538296, "learning_rate": 1.101245959560548e-05, "loss": 0.2672, "step": 8419 }, { "epoch": 0.48, "grad_norm": 0.3912456702815622, "learning_rate": 1.1010608227922401e-05, "loss": 0.3275, "step": 8420 }, { "epoch": 0.48, "grad_norm": 0.4532113401173461, "learning_rate": 1.1008756825242007e-05, "loss": 0.2709, "step": 8421 }, { "epoch": 0.48, "grad_norm": 0.5767832010251636, "learning_rate": 1.10069053876284e-05, "loss": 0.3767, "step": 8422 }, { "epoch": 0.48, "grad_norm": 0.3190711161352703, "learning_rate": 1.100505391514571e-05, "loss": 0.309, "step": 8423 }, { "epoch": 0.48, "grad_norm": 0.2566852014475039, "learning_rate": 1.1003202407858042e-05, "loss": 0.1297, "step": 8424 }, { "epoch": 0.48, "grad_norm": 0.3887013888366776, "learning_rate": 1.1001350865829519e-05, "loss": 0.2731, "step": 8425 }, { "epoch": 0.48, "grad_norm": 0.49546719830955543, "learning_rate": 1.0999499289124259e-05, "loss": 0.371, "step": 8426 }, { "epoch": 0.48, "grad_norm": 0.3685020261407154, "learning_rate": 1.0997647677806381e-05, "loss": 0.318, "step": 8427 }, { "epoch": 0.48, "grad_norm": 0.323905474118738, "learning_rate": 1.0995796031940004e-05, "loss": 0.2349, "step": 8428 }, { "epoch": 0.48, "grad_norm": 0.5429412404412678, "learning_rate": 1.0993944351589257e-05, "loss": 0.4474, "step": 8429 }, { "epoch": 0.48, "grad_norm": 0.2698732310913593, "learning_rate": 1.0992092636818261e-05, "loss": 0.1615, "step": 8430 }, { "epoch": 0.48, "grad_norm": 0.2698012591809379, "learning_rate": 1.0990240887691135e-05, "loss": 0.2257, "step": 8431 }, { "epoch": 0.48, "grad_norm": 0.5174891868486703, "learning_rate": 1.0988389104272012e-05, "loss": 0.3601, "step": 8432 }, { "epoch": 0.48, "grad_norm": 0.6007606961336437, "learning_rate": 1.098653728662502e-05, "loss": 0.4257, "step": 8433 }, { "epoch": 0.48, "grad_norm": 0.32474766627803614, "learning_rate": 1.098468543481428e-05, "loss": 0.2144, "step": 8434 }, { "epoch": 0.48, "grad_norm": 0.39732993885930923, "learning_rate": 1.0982833548903926e-05, "loss": 0.3183, "step": 8435 }, { "epoch": 0.48, "grad_norm": 0.26185582493114695, "learning_rate": 1.0980981628958091e-05, "loss": 0.2002, "step": 8436 }, { "epoch": 0.48, "grad_norm": 0.3630987442855988, "learning_rate": 1.0979129675040902e-05, "loss": 0.1708, "step": 8437 }, { "epoch": 0.48, "grad_norm": 0.4030675818976453, "learning_rate": 1.0977277687216497e-05, "loss": 0.3411, "step": 8438 }, { "epoch": 0.48, "grad_norm": 0.37339136570184556, "learning_rate": 1.0975425665549005e-05, "loss": 0.3173, "step": 8439 }, { "epoch": 0.48, "grad_norm": 0.8845059457123733, "learning_rate": 1.0973573610102566e-05, "loss": 0.4562, "step": 8440 }, { "epoch": 0.48, "grad_norm": 0.31660898433428125, "learning_rate": 1.0971721520941312e-05, "loss": 0.239, "step": 8441 }, { "epoch": 0.49, "grad_norm": 0.23668371578828956, "learning_rate": 1.0969869398129385e-05, "loss": 0.1924, "step": 8442 }, { "epoch": 0.49, "grad_norm": 0.40438627675862854, "learning_rate": 1.0968017241730922e-05, "loss": 0.3176, "step": 8443 }, { "epoch": 0.49, "grad_norm": 0.4396742144328353, "learning_rate": 1.0966165051810066e-05, "loss": 0.2638, "step": 8444 }, { "epoch": 0.49, "grad_norm": 0.8565682090328727, "learning_rate": 1.0964312828430952e-05, "loss": 0.4941, "step": 8445 }, { "epoch": 0.49, "grad_norm": 0.644386355055355, "learning_rate": 1.096246057165773e-05, "loss": 0.3571, "step": 8446 }, { "epoch": 0.49, "grad_norm": 0.2632623269668236, "learning_rate": 1.0960608281554536e-05, "loss": 0.258, "step": 8447 }, { "epoch": 0.49, "grad_norm": 0.3898501466449026, "learning_rate": 1.0958755958185521e-05, "loss": 0.2782, "step": 8448 }, { "epoch": 0.49, "grad_norm": 0.39076503964584275, "learning_rate": 1.0956903601614827e-05, "loss": 0.2519, "step": 8449 }, { "epoch": 0.49, "grad_norm": 0.34799939748395714, "learning_rate": 1.0955051211906607e-05, "loss": 0.2699, "step": 8450 }, { "epoch": 0.49, "grad_norm": 0.34692010363267356, "learning_rate": 1.0953198789125e-05, "loss": 0.3284, "step": 8451 }, { "epoch": 0.49, "grad_norm": 0.5508850692361121, "learning_rate": 1.095134633333416e-05, "loss": 0.3315, "step": 8452 }, { "epoch": 0.49, "grad_norm": 0.38972264224855135, "learning_rate": 1.0949493844598237e-05, "loss": 0.3421, "step": 8453 }, { "epoch": 0.49, "grad_norm": 0.2861316486348512, "learning_rate": 1.0947641322981387e-05, "loss": 0.2046, "step": 8454 }, { "epoch": 0.49, "grad_norm": 0.4901872853125752, "learning_rate": 1.0945788768547754e-05, "loss": 0.3408, "step": 8455 }, { "epoch": 0.49, "grad_norm": 0.34229442973648827, "learning_rate": 1.0943936181361501e-05, "loss": 0.3144, "step": 8456 }, { "epoch": 0.49, "grad_norm": 0.30739139755930395, "learning_rate": 1.0942083561486775e-05, "loss": 0.1327, "step": 8457 }, { "epoch": 0.49, "grad_norm": 0.3992928835707834, "learning_rate": 1.0940230908987737e-05, "loss": 0.3175, "step": 8458 }, { "epoch": 0.49, "grad_norm": 0.2975848893390521, "learning_rate": 1.0938378223928545e-05, "loss": 0.2855, "step": 8459 }, { "epoch": 0.49, "grad_norm": 0.6888931226475903, "learning_rate": 1.0936525506373353e-05, "loss": 0.3482, "step": 8460 }, { "epoch": 0.49, "grad_norm": 0.5397706730536465, "learning_rate": 1.0934672756386324e-05, "loss": 0.4013, "step": 8461 }, { "epoch": 0.49, "grad_norm": 0.2018669105288497, "learning_rate": 1.0932819974031616e-05, "loss": 0.1768, "step": 8462 }, { "epoch": 0.49, "grad_norm": 0.3994888465967652, "learning_rate": 1.0930967159373393e-05, "loss": 0.2612, "step": 8463 }, { "epoch": 0.49, "grad_norm": 0.7874372028522038, "learning_rate": 1.0929114312475818e-05, "loss": 0.4882, "step": 8464 }, { "epoch": 0.49, "grad_norm": 0.3514672262598415, "learning_rate": 1.0927261433403055e-05, "loss": 0.3112, "step": 8465 }, { "epoch": 0.49, "grad_norm": 0.5423505007021057, "learning_rate": 1.092540852221927e-05, "loss": 0.3962, "step": 8466 }, { "epoch": 0.49, "grad_norm": 0.47049811026281263, "learning_rate": 1.0923555578988624e-05, "loss": 0.2611, "step": 8467 }, { "epoch": 0.49, "grad_norm": 0.39710438531386655, "learning_rate": 1.0921702603775288e-05, "loss": 0.2879, "step": 8468 }, { "epoch": 0.49, "grad_norm": 0.2792052805726842, "learning_rate": 1.0919849596643434e-05, "loss": 0.1829, "step": 8469 }, { "epoch": 0.49, "grad_norm": 0.3047677379880632, "learning_rate": 1.0917996557657224e-05, "loss": 0.2355, "step": 8470 }, { "epoch": 0.49, "grad_norm": 0.40934080266730394, "learning_rate": 1.0916143486880836e-05, "loss": 0.3056, "step": 8471 }, { "epoch": 0.49, "grad_norm": 0.8182633785891162, "learning_rate": 1.0914290384378436e-05, "loss": 0.4636, "step": 8472 }, { "epoch": 0.49, "grad_norm": 0.5738428159353489, "learning_rate": 1.09124372502142e-05, "loss": 0.284, "step": 8473 }, { "epoch": 0.49, "grad_norm": 0.2757029445511652, "learning_rate": 1.09105840844523e-05, "loss": 0.2102, "step": 8474 }, { "epoch": 0.49, "grad_norm": 0.24592269658330476, "learning_rate": 1.0908730887156915e-05, "loss": 0.2472, "step": 8475 }, { "epoch": 0.49, "grad_norm": 0.4877436876419471, "learning_rate": 1.090687765839222e-05, "loss": 0.2014, "step": 8476 }, { "epoch": 0.49, "grad_norm": 0.3949928498732002, "learning_rate": 1.0905024398222386e-05, "loss": 0.3212, "step": 8477 }, { "epoch": 0.49, "grad_norm": 0.5094201877628184, "learning_rate": 1.09031711067116e-05, "loss": 0.3381, "step": 8478 }, { "epoch": 0.49, "grad_norm": 0.9709515948124112, "learning_rate": 1.0901317783924032e-05, "loss": 0.4758, "step": 8479 }, { "epoch": 0.49, "grad_norm": 0.3008208066982179, "learning_rate": 1.0899464429923874e-05, "loss": 0.2053, "step": 8480 }, { "epoch": 0.49, "grad_norm": 0.2425174984408598, "learning_rate": 1.0897611044775299e-05, "loss": 0.1676, "step": 8481 }, { "epoch": 0.49, "grad_norm": 0.3567217475863464, "learning_rate": 1.0895757628542492e-05, "loss": 0.3243, "step": 8482 }, { "epoch": 0.49, "grad_norm": 0.3865743507662977, "learning_rate": 1.0893904181289637e-05, "loss": 0.2332, "step": 8483 }, { "epoch": 0.49, "grad_norm": 0.9081131636400291, "learning_rate": 1.0892050703080918e-05, "loss": 0.4564, "step": 8484 }, { "epoch": 0.49, "grad_norm": 1.337498254558959, "learning_rate": 1.0890197193980523e-05, "loss": 0.8662, "step": 8485 }, { "epoch": 0.49, "grad_norm": 0.3029460382510471, "learning_rate": 1.0888343654052636e-05, "loss": 0.2082, "step": 8486 }, { "epoch": 0.49, "grad_norm": 0.22595061674438283, "learning_rate": 1.0886490083361445e-05, "loss": 0.2207, "step": 8487 }, { "epoch": 0.49, "grad_norm": 0.6513624736080968, "learning_rate": 1.0884636481971145e-05, "loss": 0.4554, "step": 8488 }, { "epoch": 0.49, "grad_norm": 0.3546626256247301, "learning_rate": 1.0882782849945917e-05, "loss": 0.2338, "step": 8489 }, { "epoch": 0.49, "grad_norm": 0.4004379725262802, "learning_rate": 1.088092918734996e-05, "loss": 0.3369, "step": 8490 }, { "epoch": 0.49, "grad_norm": 1.06392676133309, "learning_rate": 1.0879075494247459e-05, "loss": 0.6164, "step": 8491 }, { "epoch": 0.49, "grad_norm": 0.33643344970466654, "learning_rate": 1.0877221770702618e-05, "loss": 0.2818, "step": 8492 }, { "epoch": 0.49, "grad_norm": 0.20948852930825718, "learning_rate": 1.087536801677962e-05, "loss": 0.1037, "step": 8493 }, { "epoch": 0.49, "grad_norm": 0.3585673923696074, "learning_rate": 1.0873514232542665e-05, "loss": 0.3189, "step": 8494 }, { "epoch": 0.49, "grad_norm": 0.35729435806128756, "learning_rate": 1.0871660418055954e-05, "loss": 0.2793, "step": 8495 }, { "epoch": 0.49, "grad_norm": 0.616608207430578, "learning_rate": 1.0869806573383675e-05, "loss": 0.362, "step": 8496 }, { "epoch": 0.49, "grad_norm": 1.3825893173323816, "learning_rate": 1.0867952698590036e-05, "loss": 0.6839, "step": 8497 }, { "epoch": 0.49, "grad_norm": 0.27447229366512443, "learning_rate": 1.0866098793739229e-05, "loss": 0.2518, "step": 8498 }, { "epoch": 0.49, "grad_norm": 0.21800662896904763, "learning_rate": 1.0864244858895461e-05, "loss": 0.1464, "step": 8499 }, { "epoch": 0.49, "grad_norm": 0.5799123938435271, "learning_rate": 1.086239089412293e-05, "loss": 0.4081, "step": 8500 }, { "epoch": 0.49, "grad_norm": 0.3538200919550498, "learning_rate": 1.086053689948584e-05, "loss": 0.2998, "step": 8501 }, { "epoch": 0.49, "grad_norm": 0.441401374507026, "learning_rate": 1.085868287504839e-05, "loss": 0.3884, "step": 8502 }, { "epoch": 0.49, "grad_norm": 0.4445663999073777, "learning_rate": 1.0856828820874794e-05, "loss": 0.2894, "step": 8503 }, { "epoch": 0.49, "grad_norm": 0.3221303501831563, "learning_rate": 1.0854974737029248e-05, "loss": 0.2564, "step": 8504 }, { "epoch": 0.49, "grad_norm": 0.2526415990634436, "learning_rate": 1.0853120623575968e-05, "loss": 0.1755, "step": 8505 }, { "epoch": 0.49, "grad_norm": 0.3396586056153518, "learning_rate": 1.0851266480579155e-05, "loss": 0.2631, "step": 8506 }, { "epoch": 0.49, "grad_norm": 0.32619397347087004, "learning_rate": 1.0849412308103023e-05, "loss": 0.2644, "step": 8507 }, { "epoch": 0.49, "grad_norm": 0.8049881277903056, "learning_rate": 1.0847558106211775e-05, "loss": 0.6005, "step": 8508 }, { "epoch": 0.49, "grad_norm": 0.39477084899592646, "learning_rate": 1.0845703874969629e-05, "loss": 0.2584, "step": 8509 }, { "epoch": 0.49, "grad_norm": 0.3284641100010038, "learning_rate": 1.0843849614440793e-05, "loss": 0.2518, "step": 8510 }, { "epoch": 0.49, "grad_norm": 0.3164534348135442, "learning_rate": 1.0841995324689482e-05, "loss": 0.2347, "step": 8511 }, { "epoch": 0.49, "grad_norm": 0.6746338727776406, "learning_rate": 1.0840141005779907e-05, "loss": 0.3469, "step": 8512 }, { "epoch": 0.49, "grad_norm": 0.3406299935814668, "learning_rate": 1.0838286657776289e-05, "loss": 0.2758, "step": 8513 }, { "epoch": 0.49, "grad_norm": 0.368507523241613, "learning_rate": 1.0836432280742837e-05, "loss": 0.3614, "step": 8514 }, { "epoch": 0.49, "grad_norm": 0.4196989763584233, "learning_rate": 1.0834577874743772e-05, "loss": 0.2835, "step": 8515 }, { "epoch": 0.49, "grad_norm": 0.286344189141085, "learning_rate": 1.0832723439843313e-05, "loss": 0.2135, "step": 8516 }, { "epoch": 0.49, "grad_norm": 0.8103735048771367, "learning_rate": 1.0830868976105677e-05, "loss": 0.4709, "step": 8517 }, { "epoch": 0.49, "grad_norm": 0.3806559083984567, "learning_rate": 1.0829014483595081e-05, "loss": 0.3363, "step": 8518 }, { "epoch": 0.49, "grad_norm": 0.22981873993975727, "learning_rate": 1.0827159962375753e-05, "loss": 0.1717, "step": 8519 }, { "epoch": 0.49, "grad_norm": 1.1201661013138515, "learning_rate": 1.0825305412511906e-05, "loss": 0.7243, "step": 8520 }, { "epoch": 0.49, "grad_norm": 0.29503616397144117, "learning_rate": 1.0823450834067772e-05, "loss": 0.2378, "step": 8521 }, { "epoch": 0.49, "grad_norm": 0.2551033595031021, "learning_rate": 1.0821596227107572e-05, "loss": 0.2024, "step": 8522 }, { "epoch": 0.49, "grad_norm": 0.7899047872546144, "learning_rate": 1.0819741591695526e-05, "loss": 0.4307, "step": 8523 }, { "epoch": 0.49, "grad_norm": 0.6356314127947628, "learning_rate": 1.0817886927895866e-05, "loss": 0.4684, "step": 8524 }, { "epoch": 0.49, "grad_norm": 0.344138473561077, "learning_rate": 1.0816032235772816e-05, "loss": 0.194, "step": 8525 }, { "epoch": 0.49, "grad_norm": 0.3089788067804815, "learning_rate": 1.0814177515390605e-05, "loss": 0.297, "step": 8526 }, { "epoch": 0.49, "grad_norm": 0.32417727591946566, "learning_rate": 1.081232276681346e-05, "loss": 0.2092, "step": 8527 }, { "epoch": 0.49, "grad_norm": 0.4153972292522417, "learning_rate": 1.0810467990105617e-05, "loss": 0.2856, "step": 8528 }, { "epoch": 0.49, "grad_norm": 0.655625681624118, "learning_rate": 1.0808613185331297e-05, "loss": 0.3203, "step": 8529 }, { "epoch": 0.49, "grad_norm": 0.3785813411354443, "learning_rate": 1.0806758352554743e-05, "loss": 0.3061, "step": 8530 }, { "epoch": 0.49, "grad_norm": 0.37824841761056816, "learning_rate": 1.0804903491840178e-05, "loss": 0.2755, "step": 8531 }, { "epoch": 0.49, "grad_norm": 0.21373532506861836, "learning_rate": 1.080304860325184e-05, "loss": 0.1536, "step": 8532 }, { "epoch": 0.49, "grad_norm": 0.4860820363746777, "learning_rate": 1.0801193686853964e-05, "loss": 0.3688, "step": 8533 }, { "epoch": 0.49, "grad_norm": 0.2978394874490292, "learning_rate": 1.0799338742710788e-05, "loss": 0.24, "step": 8534 }, { "epoch": 0.49, "grad_norm": 0.48456413182199815, "learning_rate": 1.0797483770886542e-05, "loss": 0.2991, "step": 8535 }, { "epoch": 0.49, "grad_norm": 0.6425265613421929, "learning_rate": 1.0795628771445467e-05, "loss": 0.4898, "step": 8536 }, { "epoch": 0.49, "grad_norm": 0.3882124012170663, "learning_rate": 1.0793773744451804e-05, "loss": 0.2956, "step": 8537 }, { "epoch": 0.49, "grad_norm": 0.29591018567833727, "learning_rate": 1.079191868996979e-05, "loss": 0.2537, "step": 8538 }, { "epoch": 0.49, "grad_norm": 0.275873528349889, "learning_rate": 1.0790063608063664e-05, "loss": 0.2111, "step": 8539 }, { "epoch": 0.49, "grad_norm": 0.309022281433369, "learning_rate": 1.078820849879767e-05, "loss": 0.2702, "step": 8540 }, { "epoch": 0.49, "grad_norm": 0.7002771887809298, "learning_rate": 1.0786353362236051e-05, "loss": 0.4943, "step": 8541 }, { "epoch": 0.49, "grad_norm": 0.3367810286858073, "learning_rate": 1.0784498198443048e-05, "loss": 0.267, "step": 8542 }, { "epoch": 0.49, "grad_norm": 0.6251041919918563, "learning_rate": 1.0782643007482908e-05, "loss": 0.3424, "step": 8543 }, { "epoch": 0.49, "grad_norm": 0.4196247509404695, "learning_rate": 1.0780787789419868e-05, "loss": 0.3126, "step": 8544 }, { "epoch": 0.49, "grad_norm": 0.222604196010212, "learning_rate": 1.0778932544318185e-05, "loss": 0.1972, "step": 8545 }, { "epoch": 0.49, "grad_norm": 0.5323880097633098, "learning_rate": 1.0777077272242103e-05, "loss": 0.3519, "step": 8546 }, { "epoch": 0.49, "grad_norm": 0.3522018391320144, "learning_rate": 1.0775221973255866e-05, "loss": 0.3304, "step": 8547 }, { "epoch": 0.49, "grad_norm": 0.7935920052225027, "learning_rate": 1.0773366647423724e-05, "loss": 0.3471, "step": 8548 }, { "epoch": 0.49, "grad_norm": 0.3614599034269359, "learning_rate": 1.0771511294809933e-05, "loss": 0.2682, "step": 8549 }, { "epoch": 0.49, "grad_norm": 0.30816488025273653, "learning_rate": 1.0769655915478734e-05, "loss": 0.2914, "step": 8550 }, { "epoch": 0.49, "grad_norm": 0.41552829457555635, "learning_rate": 1.076780050949439e-05, "loss": 0.2073, "step": 8551 }, { "epoch": 0.49, "grad_norm": 0.3957754702487912, "learning_rate": 1.0765945076921143e-05, "loss": 0.3081, "step": 8552 }, { "epoch": 0.49, "grad_norm": 0.3438857205809205, "learning_rate": 1.0764089617823252e-05, "loss": 0.2495, "step": 8553 }, { "epoch": 0.49, "grad_norm": 0.3916725126911459, "learning_rate": 1.0762234132264969e-05, "loss": 0.3022, "step": 8554 }, { "epoch": 0.49, "grad_norm": 0.3472478565503047, "learning_rate": 1.0760378620310551e-05, "loss": 0.2252, "step": 8555 }, { "epoch": 0.49, "grad_norm": 0.5140747501812202, "learning_rate": 1.0758523082024255e-05, "loss": 0.39, "step": 8556 }, { "epoch": 0.49, "grad_norm": 0.46369297394140513, "learning_rate": 1.0756667517470337e-05, "loss": 0.3532, "step": 8557 }, { "epoch": 0.49, "grad_norm": 0.2763335707004524, "learning_rate": 1.0754811926713053e-05, "loss": 0.233, "step": 8558 }, { "epoch": 0.49, "grad_norm": 0.2642442656817401, "learning_rate": 1.075295630981667e-05, "loss": 0.1955, "step": 8559 }, { "epoch": 0.49, "grad_norm": 0.8901340050690897, "learning_rate": 1.0751100666845437e-05, "loss": 0.5739, "step": 8560 }, { "epoch": 0.49, "grad_norm": 0.33780611056603577, "learning_rate": 1.0749244997863624e-05, "loss": 0.1916, "step": 8561 }, { "epoch": 0.49, "grad_norm": 0.314626187389568, "learning_rate": 1.0747389302935487e-05, "loss": 0.2898, "step": 8562 }, { "epoch": 0.49, "grad_norm": 0.5661987660912312, "learning_rate": 1.074553358212529e-05, "loss": 0.4068, "step": 8563 }, { "epoch": 0.49, "grad_norm": 0.22613700731661518, "learning_rate": 1.07436778354973e-05, "loss": 0.1042, "step": 8564 }, { "epoch": 0.49, "grad_norm": 0.26126861145961344, "learning_rate": 1.0741822063115774e-05, "loss": 0.2493, "step": 8565 }, { "epoch": 0.49, "grad_norm": 0.48082663751471855, "learning_rate": 1.0739966265044985e-05, "loss": 0.3952, "step": 8566 }, { "epoch": 0.49, "grad_norm": 0.776697669372256, "learning_rate": 1.0738110441349194e-05, "loss": 0.568, "step": 8567 }, { "epoch": 0.49, "grad_norm": 0.35344836698827203, "learning_rate": 1.0736254592092674e-05, "loss": 0.2445, "step": 8568 }, { "epoch": 0.49, "grad_norm": 0.41759160329707223, "learning_rate": 1.0734398717339687e-05, "loss": 0.3361, "step": 8569 }, { "epoch": 0.49, "grad_norm": 0.36406742647874546, "learning_rate": 1.0732542817154505e-05, "loss": 0.268, "step": 8570 }, { "epoch": 0.49, "grad_norm": 0.21027776814527477, "learning_rate": 1.0730686891601394e-05, "loss": 0.1567, "step": 8571 }, { "epoch": 0.49, "grad_norm": 1.0486613084863996, "learning_rate": 1.072883094074463e-05, "loss": 0.66, "step": 8572 }, { "epoch": 0.49, "grad_norm": 0.30409813856147094, "learning_rate": 1.0726974964648478e-05, "loss": 0.2823, "step": 8573 }, { "epoch": 0.49, "grad_norm": 0.3632538787528237, "learning_rate": 1.072511896337722e-05, "loss": 0.2477, "step": 8574 }, { "epoch": 0.49, "grad_norm": 0.6178813686029212, "learning_rate": 1.0723262936995118e-05, "loss": 0.4058, "step": 8575 }, { "epoch": 0.49, "grad_norm": 0.5408942484954933, "learning_rate": 1.0721406885566455e-05, "loss": 0.3104, "step": 8576 }, { "epoch": 0.49, "grad_norm": 0.22758907901569544, "learning_rate": 1.07195508091555e-05, "loss": 0.1969, "step": 8577 }, { "epoch": 0.49, "grad_norm": 0.29891880470997145, "learning_rate": 1.0717694707826534e-05, "loss": 0.2564, "step": 8578 }, { "epoch": 0.49, "grad_norm": 0.6764541746498948, "learning_rate": 1.0715838581643829e-05, "loss": 0.4602, "step": 8579 }, { "epoch": 0.49, "grad_norm": 0.3694126858365215, "learning_rate": 1.0713982430671668e-05, "loss": 0.3039, "step": 8580 }, { "epoch": 0.49, "grad_norm": 0.3388532266528095, "learning_rate": 1.0712126254974325e-05, "loss": 0.2766, "step": 8581 }, { "epoch": 0.49, "grad_norm": 0.8196371361869945, "learning_rate": 1.0710270054616077e-05, "loss": 0.3736, "step": 8582 }, { "epoch": 0.49, "grad_norm": 0.2575004355415229, "learning_rate": 1.070841382966121e-05, "loss": 0.2168, "step": 8583 }, { "epoch": 0.49, "grad_norm": 0.3064963483696533, "learning_rate": 1.0706557580174002e-05, "loss": 0.1623, "step": 8584 }, { "epoch": 0.49, "grad_norm": 0.30059761729633716, "learning_rate": 1.0704701306218737e-05, "loss": 0.2986, "step": 8585 }, { "epoch": 0.49, "grad_norm": 0.3834769871882269, "learning_rate": 1.0702845007859697e-05, "loss": 0.2874, "step": 8586 }, { "epoch": 0.49, "grad_norm": 0.7050746353337347, "learning_rate": 1.0700988685161162e-05, "loss": 0.3589, "step": 8587 }, { "epoch": 0.49, "grad_norm": 0.8554268457849118, "learning_rate": 1.069913233818742e-05, "loss": 0.5188, "step": 8588 }, { "epoch": 0.49, "grad_norm": 0.2600277824616293, "learning_rate": 1.0697275967002754e-05, "loss": 0.26, "step": 8589 }, { "epoch": 0.49, "grad_norm": 0.2661738871990709, "learning_rate": 1.069541957167145e-05, "loss": 0.1824, "step": 8590 }, { "epoch": 0.49, "grad_norm": 0.5728659494558793, "learning_rate": 1.06935631522578e-05, "loss": 0.2859, "step": 8591 }, { "epoch": 0.49, "grad_norm": 0.36724855285541735, "learning_rate": 1.0691706708826084e-05, "loss": 0.3184, "step": 8592 }, { "epoch": 0.49, "grad_norm": 0.36684883389905065, "learning_rate": 1.0689850241440598e-05, "loss": 0.3254, "step": 8593 }, { "epoch": 0.49, "grad_norm": 0.448707775246444, "learning_rate": 1.0687993750165623e-05, "loss": 0.2209, "step": 8594 }, { "epoch": 0.49, "grad_norm": 0.40597125420971497, "learning_rate": 1.0686137235065458e-05, "loss": 0.3169, "step": 8595 }, { "epoch": 0.49, "grad_norm": 0.34023890211065355, "learning_rate": 1.068428069620439e-05, "loss": 0.2522, "step": 8596 }, { "epoch": 0.49, "grad_norm": 0.41372972524648555, "learning_rate": 1.0682424133646712e-05, "loss": 0.275, "step": 8597 }, { "epoch": 0.49, "grad_norm": 0.3490980753166338, "learning_rate": 1.068056754745671e-05, "loss": 0.2738, "step": 8598 }, { "epoch": 0.49, "grad_norm": 0.9919442403924039, "learning_rate": 1.0678710937698689e-05, "loss": 0.7263, "step": 8599 }, { "epoch": 0.49, "grad_norm": 0.477348484888539, "learning_rate": 1.0676854304436936e-05, "loss": 0.1544, "step": 8600 }, { "epoch": 0.49, "grad_norm": 0.29238634288399545, "learning_rate": 1.0674997647735745e-05, "loss": 0.2659, "step": 8601 }, { "epoch": 0.49, "grad_norm": 0.4137595889493302, "learning_rate": 1.0673140967659418e-05, "loss": 0.2705, "step": 8602 }, { "epoch": 0.49, "grad_norm": 0.7976162801001311, "learning_rate": 1.0671284264272249e-05, "loss": 0.4373, "step": 8603 }, { "epoch": 0.49, "grad_norm": 0.35688291511303527, "learning_rate": 1.066942753763853e-05, "loss": 0.2226, "step": 8604 }, { "epoch": 0.49, "grad_norm": 0.28563980083841095, "learning_rate": 1.0667570787822568e-05, "loss": 0.2589, "step": 8605 }, { "epoch": 0.49, "grad_norm": 0.8815030483266307, "learning_rate": 1.0665714014888657e-05, "loss": 0.5098, "step": 8606 }, { "epoch": 0.49, "grad_norm": 0.3215761513411512, "learning_rate": 1.0663857218901097e-05, "loss": 0.2103, "step": 8607 }, { "epoch": 0.49, "grad_norm": 0.9842923959569092, "learning_rate": 1.0662000399924193e-05, "loss": 0.4303, "step": 8608 }, { "epoch": 0.49, "grad_norm": 0.34745505755938316, "learning_rate": 1.066014355802224e-05, "loss": 0.3268, "step": 8609 }, { "epoch": 0.49, "grad_norm": 0.31255754616013104, "learning_rate": 1.0658286693259544e-05, "loss": 0.1985, "step": 8610 }, { "epoch": 0.49, "grad_norm": 0.273327258396647, "learning_rate": 1.065642980570041e-05, "loss": 0.1981, "step": 8611 }, { "epoch": 0.49, "grad_norm": 0.4237158009498341, "learning_rate": 1.0654572895409142e-05, "loss": 0.3071, "step": 8612 }, { "epoch": 0.49, "grad_norm": 0.3323909110807824, "learning_rate": 1.065271596245004e-05, "loss": 0.2247, "step": 8613 }, { "epoch": 0.49, "grad_norm": 1.197242401674588, "learning_rate": 1.0650859006887412e-05, "loss": 0.4439, "step": 8614 }, { "epoch": 0.49, "grad_norm": 1.1780449474286145, "learning_rate": 1.0649002028785564e-05, "loss": 0.7566, "step": 8615 }, { "epoch": 0.5, "grad_norm": 0.3025044117036119, "learning_rate": 1.0647145028208808e-05, "loss": 0.2614, "step": 8616 }, { "epoch": 0.5, "grad_norm": 0.21862780541713553, "learning_rate": 1.0645288005221443e-05, "loss": 0.1848, "step": 8617 }, { "epoch": 0.5, "grad_norm": 0.9973641936045772, "learning_rate": 1.0643430959887786e-05, "loss": 0.5432, "step": 8618 }, { "epoch": 0.5, "grad_norm": 0.3686939049542852, "learning_rate": 1.064157389227214e-05, "loss": 0.2805, "step": 8619 }, { "epoch": 0.5, "grad_norm": 1.3729969071937, "learning_rate": 1.063971680243882e-05, "loss": 0.3485, "step": 8620 }, { "epoch": 0.5, "grad_norm": 0.41373947900301006, "learning_rate": 1.063785969045213e-05, "loss": 0.3296, "step": 8621 }, { "epoch": 0.5, "grad_norm": 0.3333660307427822, "learning_rate": 1.063600255637639e-05, "loss": 0.2753, "step": 8622 }, { "epoch": 0.5, "grad_norm": 0.1846235788447922, "learning_rate": 1.0634145400275906e-05, "loss": 0.0857, "step": 8623 }, { "epoch": 0.5, "grad_norm": 0.4198005388095872, "learning_rate": 1.0632288222214998e-05, "loss": 0.333, "step": 8624 }, { "epoch": 0.5, "grad_norm": 0.4751118320166682, "learning_rate": 1.0630431022257975e-05, "loss": 0.2883, "step": 8625 }, { "epoch": 0.5, "grad_norm": 1.5403740119919342, "learning_rate": 1.062857380046915e-05, "loss": 0.3654, "step": 8626 }, { "epoch": 0.5, "grad_norm": 0.906474785955322, "learning_rate": 1.0626716556912845e-05, "loss": 0.656, "step": 8627 }, { "epoch": 0.5, "grad_norm": 0.40934538461942965, "learning_rate": 1.062485929165337e-05, "loss": 0.3045, "step": 8628 }, { "epoch": 0.5, "grad_norm": 0.2483934265695432, "learning_rate": 1.0623002004755045e-05, "loss": 0.2398, "step": 8629 }, { "epoch": 0.5, "grad_norm": 0.8469478281869853, "learning_rate": 1.0621144696282187e-05, "loss": 0.2506, "step": 8630 }, { "epoch": 0.5, "grad_norm": 0.41460984148063096, "learning_rate": 1.0619287366299116e-05, "loss": 0.2685, "step": 8631 }, { "epoch": 0.5, "grad_norm": 0.7057658416397729, "learning_rate": 1.061743001487015e-05, "loss": 0.3861, "step": 8632 }, { "epoch": 0.5, "grad_norm": 0.44006599321635453, "learning_rate": 1.0615572642059608e-05, "loss": 0.2791, "step": 8633 }, { "epoch": 0.5, "grad_norm": 0.35366409363198736, "learning_rate": 1.0613715247931811e-05, "loss": 0.2718, "step": 8634 }, { "epoch": 0.5, "grad_norm": 0.28522502179300874, "learning_rate": 1.0611857832551088e-05, "loss": 0.1987, "step": 8635 }, { "epoch": 0.5, "grad_norm": 0.4444119647481306, "learning_rate": 1.0610000395981748e-05, "loss": 0.2836, "step": 8636 }, { "epoch": 0.5, "grad_norm": 0.29795136052669713, "learning_rate": 1.0608142938288122e-05, "loss": 0.2719, "step": 8637 }, { "epoch": 0.5, "grad_norm": 0.7921438236185548, "learning_rate": 1.0606285459534531e-05, "loss": 0.4997, "step": 8638 }, { "epoch": 0.5, "grad_norm": 1.113545353925166, "learning_rate": 1.0604427959785305e-05, "loss": 0.5553, "step": 8639 }, { "epoch": 0.5, "grad_norm": 0.35533823163207906, "learning_rate": 1.0602570439104758e-05, "loss": 0.2753, "step": 8640 }, { "epoch": 0.5, "grad_norm": 0.4131396122160858, "learning_rate": 1.0600712897557229e-05, "loss": 0.3235, "step": 8641 }, { "epoch": 0.5, "grad_norm": 0.578787262125609, "learning_rate": 1.0598855335207032e-05, "loss": 0.3251, "step": 8642 }, { "epoch": 0.5, "grad_norm": 0.2622329097119207, "learning_rate": 1.0596997752118505e-05, "loss": 0.1882, "step": 8643 }, { "epoch": 0.5, "grad_norm": 1.6319084606170982, "learning_rate": 1.0595140148355971e-05, "loss": 0.7732, "step": 8644 }, { "epoch": 0.5, "grad_norm": 0.38998737359035557, "learning_rate": 1.059328252398376e-05, "loss": 0.329, "step": 8645 }, { "epoch": 0.5, "grad_norm": 0.3543340025838131, "learning_rate": 1.0591424879066199e-05, "loss": 0.1859, "step": 8646 }, { "epoch": 0.5, "grad_norm": 0.4751509222241394, "learning_rate": 1.058956721366762e-05, "loss": 0.4029, "step": 8647 }, { "epoch": 0.5, "grad_norm": 0.40849080293214024, "learning_rate": 1.0587709527852354e-05, "loss": 0.3217, "step": 8648 }, { "epoch": 0.5, "grad_norm": 0.2225141351938953, "learning_rate": 1.0585851821684731e-05, "loss": 0.1289, "step": 8649 }, { "epoch": 0.5, "grad_norm": 0.35872861250269855, "learning_rate": 1.0583994095229086e-05, "loss": 0.2722, "step": 8650 }, { "epoch": 0.5, "grad_norm": 1.3321392581467473, "learning_rate": 1.0582136348549751e-05, "loss": 0.7329, "step": 8651 }, { "epoch": 0.5, "grad_norm": 0.2864610099756312, "learning_rate": 1.0580278581711062e-05, "loss": 0.1966, "step": 8652 }, { "epoch": 0.5, "grad_norm": 0.36678325852272914, "learning_rate": 1.0578420794777347e-05, "loss": 0.3385, "step": 8653 }, { "epoch": 0.5, "grad_norm": 0.6607280902420078, "learning_rate": 1.0576562987812946e-05, "loss": 0.4297, "step": 8654 }, { "epoch": 0.5, "grad_norm": 0.26838225983595704, "learning_rate": 1.057470516088219e-05, "loss": 0.2015, "step": 8655 }, { "epoch": 0.5, "grad_norm": 0.23625021150066397, "learning_rate": 1.0572847314049424e-05, "loss": 0.2171, "step": 8656 }, { "epoch": 0.5, "grad_norm": 1.3904254222964338, "learning_rate": 1.0570989447378977e-05, "loss": 0.8084, "step": 8657 }, { "epoch": 0.5, "grad_norm": 0.3996383469966337, "learning_rate": 1.056913156093519e-05, "loss": 0.3012, "step": 8658 }, { "epoch": 0.5, "grad_norm": 0.47744810436214125, "learning_rate": 1.0567273654782402e-05, "loss": 0.2646, "step": 8659 }, { "epoch": 0.5, "grad_norm": 0.37398280158270103, "learning_rate": 1.0565415728984954e-05, "loss": 0.3085, "step": 8660 }, { "epoch": 0.5, "grad_norm": 0.2956477490756631, "learning_rate": 1.0563557783607182e-05, "loss": 0.173, "step": 8661 }, { "epoch": 0.5, "grad_norm": 0.26479803730153023, "learning_rate": 1.0561699818713427e-05, "loss": 0.1764, "step": 8662 }, { "epoch": 0.5, "grad_norm": 1.1463907876064328, "learning_rate": 1.0559841834368032e-05, "loss": 0.6209, "step": 8663 }, { "epoch": 0.5, "grad_norm": 0.3129762774498092, "learning_rate": 1.055798383063534e-05, "loss": 0.2687, "step": 8664 }, { "epoch": 0.5, "grad_norm": 0.3635218135502235, "learning_rate": 1.0556125807579691e-05, "loss": 0.3221, "step": 8665 }, { "epoch": 0.5, "grad_norm": 0.692553553878542, "learning_rate": 1.0554267765265428e-05, "loss": 0.3318, "step": 8666 }, { "epoch": 0.5, "grad_norm": 0.3232964697528742, "learning_rate": 1.0552409703756896e-05, "loss": 0.2057, "step": 8667 }, { "epoch": 0.5, "grad_norm": 0.2696835374164365, "learning_rate": 1.0550551623118442e-05, "loss": 0.2448, "step": 8668 }, { "epoch": 0.5, "grad_norm": 0.4353801277831827, "learning_rate": 1.0548693523414408e-05, "loss": 0.3043, "step": 8669 }, { "epoch": 0.5, "grad_norm": 0.6110278457480871, "learning_rate": 1.0546835404709142e-05, "loss": 0.3705, "step": 8670 }, { "epoch": 0.5, "grad_norm": 0.4375873666361446, "learning_rate": 1.0544977267066986e-05, "loss": 0.3336, "step": 8671 }, { "epoch": 0.5, "grad_norm": 0.3254332303228954, "learning_rate": 1.0543119110552293e-05, "loss": 0.2477, "step": 8672 }, { "epoch": 0.5, "grad_norm": 0.4354120294911844, "learning_rate": 1.054126093522941e-05, "loss": 0.3037, "step": 8673 }, { "epoch": 0.5, "grad_norm": 0.2439818838843636, "learning_rate": 1.053940274116268e-05, "loss": 0.1891, "step": 8674 }, { "epoch": 0.5, "grad_norm": 1.0463292921514271, "learning_rate": 1.0537544528416462e-05, "loss": 0.396, "step": 8675 }, { "epoch": 0.5, "grad_norm": 0.2922783263343222, "learning_rate": 1.0535686297055095e-05, "loss": 0.2758, "step": 8676 }, { "epoch": 0.5, "grad_norm": 0.33915434052209564, "learning_rate": 1.0533828047142936e-05, "loss": 0.313, "step": 8677 }, { "epoch": 0.5, "grad_norm": 1.2238073735137502, "learning_rate": 1.0531969778744333e-05, "loss": 0.7319, "step": 8678 }, { "epoch": 0.5, "grad_norm": 0.24234025299640638, "learning_rate": 1.0530111491923642e-05, "loss": 0.1626, "step": 8679 }, { "epoch": 0.5, "grad_norm": 0.2732911508199734, "learning_rate": 1.0528253186745212e-05, "loss": 0.2467, "step": 8680 }, { "epoch": 0.5, "grad_norm": 0.5100215847515718, "learning_rate": 1.05263948632734e-05, "loss": 0.4121, "step": 8681 }, { "epoch": 0.5, "grad_norm": 0.7570537100397893, "learning_rate": 1.052453652157255e-05, "loss": 0.2958, "step": 8682 }, { "epoch": 0.5, "grad_norm": 0.40521850584754937, "learning_rate": 1.0522678161707028e-05, "loss": 0.3425, "step": 8683 }, { "epoch": 0.5, "grad_norm": 0.35722274671332144, "learning_rate": 1.0520819783741183e-05, "loss": 0.3157, "step": 8684 }, { "epoch": 0.5, "grad_norm": 0.378465025509005, "learning_rate": 1.0518961387739371e-05, "loss": 0.1266, "step": 8685 }, { "epoch": 0.5, "grad_norm": 0.28385498048625774, "learning_rate": 1.0517102973765947e-05, "loss": 0.2468, "step": 8686 }, { "epoch": 0.5, "grad_norm": 0.6575638629333442, "learning_rate": 1.0515244541885272e-05, "loss": 0.4461, "step": 8687 }, { "epoch": 0.5, "grad_norm": 0.2883829375610862, "learning_rate": 1.0513386092161698e-05, "loss": 0.2233, "step": 8688 }, { "epoch": 0.5, "grad_norm": 0.2855510500547569, "learning_rate": 1.0511527624659585e-05, "loss": 0.2701, "step": 8689 }, { "epoch": 0.5, "grad_norm": 0.9124086442455803, "learning_rate": 1.0509669139443298e-05, "loss": 0.5854, "step": 8690 }, { "epoch": 0.5, "grad_norm": 0.3336901059066564, "learning_rate": 1.0507810636577183e-05, "loss": 0.1928, "step": 8691 }, { "epoch": 0.5, "grad_norm": 0.263736365371567, "learning_rate": 1.0505952116125613e-05, "loss": 0.2334, "step": 8692 }, { "epoch": 0.5, "grad_norm": 0.731716892313344, "learning_rate": 1.0504093578152939e-05, "loss": 0.4945, "step": 8693 }, { "epoch": 0.5, "grad_norm": 0.5263262316670798, "learning_rate": 1.050223502272353e-05, "loss": 0.3654, "step": 8694 }, { "epoch": 0.5, "grad_norm": 0.2641345245374191, "learning_rate": 1.050037644990174e-05, "loss": 0.1886, "step": 8695 }, { "epoch": 0.5, "grad_norm": 0.36928392656217834, "learning_rate": 1.0498517859751937e-05, "loss": 0.3137, "step": 8696 }, { "epoch": 0.5, "grad_norm": 0.609889087343759, "learning_rate": 1.0496659252338481e-05, "loss": 0.3196, "step": 8697 }, { "epoch": 0.5, "grad_norm": 0.32459381957992633, "learning_rate": 1.049480062772574e-05, "loss": 0.2378, "step": 8698 }, { "epoch": 0.5, "grad_norm": 0.7666747207357101, "learning_rate": 1.0492941985978068e-05, "loss": 0.4289, "step": 8699 }, { "epoch": 0.5, "grad_norm": 0.3039731492970164, "learning_rate": 1.049108332715984e-05, "loss": 0.2918, "step": 8700 }, { "epoch": 0.5, "grad_norm": 0.30935907905723153, "learning_rate": 1.048922465133542e-05, "loss": 0.1984, "step": 8701 }, { "epoch": 0.5, "grad_norm": 0.3842240130182527, "learning_rate": 1.0487365958569168e-05, "loss": 0.2887, "step": 8702 }, { "epoch": 0.5, "grad_norm": 1.4564138910898665, "learning_rate": 1.0485507248925455e-05, "loss": 0.7804, "step": 8703 }, { "epoch": 0.5, "grad_norm": 0.2959826982997899, "learning_rate": 1.0483648522468648e-05, "loss": 0.2633, "step": 8704 }, { "epoch": 0.5, "grad_norm": 0.6727655285294992, "learning_rate": 1.0481789779263112e-05, "loss": 0.3068, "step": 8705 }, { "epoch": 0.5, "grad_norm": 0.8354526542235158, "learning_rate": 1.0479931019373218e-05, "loss": 0.5327, "step": 8706 }, { "epoch": 0.5, "grad_norm": 0.28734045291129795, "learning_rate": 1.0478072242863329e-05, "loss": 0.2392, "step": 8707 }, { "epoch": 0.5, "grad_norm": 0.2747471101459275, "learning_rate": 1.0476213449797823e-05, "loss": 0.2075, "step": 8708 }, { "epoch": 0.5, "grad_norm": 0.7019374127703849, "learning_rate": 1.0474354640241065e-05, "loss": 0.4068, "step": 8709 }, { "epoch": 0.5, "grad_norm": 0.36357077422014367, "learning_rate": 1.0472495814257426e-05, "loss": 0.294, "step": 8710 }, { "epoch": 0.5, "grad_norm": 0.6829568268139392, "learning_rate": 1.0470636971911277e-05, "loss": 0.3449, "step": 8711 }, { "epoch": 0.5, "grad_norm": 0.3321892388960226, "learning_rate": 1.046877811326699e-05, "loss": 0.3047, "step": 8712 }, { "epoch": 0.5, "grad_norm": 0.3844356675302627, "learning_rate": 1.0466919238388937e-05, "loss": 0.2823, "step": 8713 }, { "epoch": 0.5, "grad_norm": 0.2569912841925734, "learning_rate": 1.046506034734149e-05, "loss": 0.1119, "step": 8714 }, { "epoch": 0.5, "grad_norm": 0.5010661824434921, "learning_rate": 1.0463201440189026e-05, "loss": 0.3222, "step": 8715 }, { "epoch": 0.5, "grad_norm": 0.3171331861477249, "learning_rate": 1.0461342516995911e-05, "loss": 0.2934, "step": 8716 }, { "epoch": 0.5, "grad_norm": 0.43958228142956374, "learning_rate": 1.0459483577826531e-05, "loss": 0.3712, "step": 8717 }, { "epoch": 0.5, "grad_norm": 0.3325947509858215, "learning_rate": 1.0457624622745249e-05, "loss": 0.2327, "step": 8718 }, { "epoch": 0.5, "grad_norm": 0.4079029698952522, "learning_rate": 1.0455765651816447e-05, "loss": 0.2972, "step": 8719 }, { "epoch": 0.5, "grad_norm": 0.23506748013692239, "learning_rate": 1.0453906665104503e-05, "loss": 0.2195, "step": 8720 }, { "epoch": 0.5, "grad_norm": 0.5733380652274306, "learning_rate": 1.045204766267379e-05, "loss": 0.3302, "step": 8721 }, { "epoch": 0.5, "grad_norm": 0.3160060820791429, "learning_rate": 1.0450188644588684e-05, "loss": 0.2767, "step": 8722 }, { "epoch": 0.5, "grad_norm": 0.4478524014302271, "learning_rate": 1.0448329610913566e-05, "loss": 0.3633, "step": 8723 }, { "epoch": 0.5, "grad_norm": 0.3882152266604375, "learning_rate": 1.0446470561712811e-05, "loss": 0.2507, "step": 8724 }, { "epoch": 0.5, "grad_norm": 0.34054329567829944, "learning_rate": 1.0444611497050802e-05, "loss": 0.2896, "step": 8725 }, { "epoch": 0.5, "grad_norm": 0.2992192854650521, "learning_rate": 1.0442752416991912e-05, "loss": 0.1979, "step": 8726 }, { "epoch": 0.5, "grad_norm": 0.45370200964194424, "learning_rate": 1.0440893321600529e-05, "loss": 0.2756, "step": 8727 }, { "epoch": 0.5, "grad_norm": 0.32295994917909365, "learning_rate": 1.0439034210941029e-05, "loss": 0.2951, "step": 8728 }, { "epoch": 0.5, "grad_norm": 1.5271479543748887, "learning_rate": 1.043717508507779e-05, "loss": 0.6307, "step": 8729 }, { "epoch": 0.5, "grad_norm": 1.2596569844559131, "learning_rate": 1.0435315944075202e-05, "loss": 0.8572, "step": 8730 }, { "epoch": 0.5, "grad_norm": 0.3131215205253095, "learning_rate": 1.0433456787997636e-05, "loss": 0.198, "step": 8731 }, { "epoch": 0.5, "grad_norm": 0.24968167949562506, "learning_rate": 1.0431597616909483e-05, "loss": 0.2025, "step": 8732 }, { "epoch": 0.5, "grad_norm": 0.48335571779497233, "learning_rate": 1.0429738430875123e-05, "loss": 0.4133, "step": 8733 }, { "epoch": 0.5, "grad_norm": 0.31200844208398004, "learning_rate": 1.042787922995894e-05, "loss": 0.1918, "step": 8734 }, { "epoch": 0.5, "grad_norm": 0.45930456874730524, "learning_rate": 1.0426020014225313e-05, "loss": 0.3733, "step": 8735 }, { "epoch": 0.5, "grad_norm": 0.40373907057363134, "learning_rate": 1.0424160783738637e-05, "loss": 0.342, "step": 8736 }, { "epoch": 0.5, "grad_norm": 0.3350587314333596, "learning_rate": 1.042230153856329e-05, "loss": 0.2144, "step": 8737 }, { "epoch": 0.5, "grad_norm": 0.3563876657519294, "learning_rate": 1.0420442278763658e-05, "loss": 0.2922, "step": 8738 }, { "epoch": 0.5, "grad_norm": 0.31504314484019597, "learning_rate": 1.0418583004404128e-05, "loss": 0.2687, "step": 8739 }, { "epoch": 0.5, "grad_norm": 0.30162193745736154, "learning_rate": 1.0416723715549086e-05, "loss": 0.2186, "step": 8740 }, { "epoch": 0.5, "grad_norm": 1.3170050829277982, "learning_rate": 1.041486441226292e-05, "loss": 0.8277, "step": 8741 }, { "epoch": 0.5, "grad_norm": 1.3552064424071086, "learning_rate": 1.0413005094610018e-05, "loss": 0.8414, "step": 8742 }, { "epoch": 0.5, "grad_norm": 0.380534543132684, "learning_rate": 1.0411145762654767e-05, "loss": 0.2638, "step": 8743 }, { "epoch": 0.5, "grad_norm": 0.36920878357651454, "learning_rate": 1.0409286416461557e-05, "loss": 0.2848, "step": 8744 }, { "epoch": 0.5, "grad_norm": 0.2959147152051227, "learning_rate": 1.0407427056094772e-05, "loss": 0.2261, "step": 8745 }, { "epoch": 0.5, "grad_norm": 0.34193258538459975, "learning_rate": 1.040556768161881e-05, "loss": 0.2593, "step": 8746 }, { "epoch": 0.5, "grad_norm": 0.45975870258676144, "learning_rate": 1.0403708293098054e-05, "loss": 0.2501, "step": 8747 }, { "epoch": 0.5, "grad_norm": 0.4738679517364789, "learning_rate": 1.04018488905969e-05, "loss": 0.3933, "step": 8748 }, { "epoch": 0.5, "grad_norm": 0.3316696976128548, "learning_rate": 1.0399989474179735e-05, "loss": 0.2681, "step": 8749 }, { "epoch": 0.5, "grad_norm": 0.567887887250414, "learning_rate": 1.0398130043910949e-05, "loss": 0.3258, "step": 8750 }, { "epoch": 0.5, "grad_norm": 0.2892458714833695, "learning_rate": 1.0396270599854939e-05, "loss": 0.2585, "step": 8751 }, { "epoch": 0.5, "grad_norm": 0.4389296101630068, "learning_rate": 1.0394411142076092e-05, "loss": 0.2814, "step": 8752 }, { "epoch": 0.5, "grad_norm": 0.35415454086871245, "learning_rate": 1.039255167063881e-05, "loss": 0.2628, "step": 8753 }, { "epoch": 0.5, "grad_norm": 0.6841238312775954, "learning_rate": 1.0390692185607479e-05, "loss": 0.3674, "step": 8754 }, { "epoch": 0.5, "grad_norm": 0.41943936748101157, "learning_rate": 1.0388832687046493e-05, "loss": 0.3115, "step": 8755 }, { "epoch": 0.5, "grad_norm": 0.3136131249500698, "learning_rate": 1.0386973175020248e-05, "loss": 0.3109, "step": 8756 }, { "epoch": 0.5, "grad_norm": 0.2241031296136957, "learning_rate": 1.0385113649593137e-05, "loss": 0.1017, "step": 8757 }, { "epoch": 0.5, "grad_norm": 0.25406672869397945, "learning_rate": 1.0383254110829557e-05, "loss": 0.2081, "step": 8758 }, { "epoch": 0.5, "grad_norm": 0.3567819115485671, "learning_rate": 1.0381394558793907e-05, "loss": 0.3352, "step": 8759 }, { "epoch": 0.5, "grad_norm": 0.8288749721657916, "learning_rate": 1.0379534993550574e-05, "loss": 0.4522, "step": 8760 }, { "epoch": 0.5, "grad_norm": 0.35424045018318084, "learning_rate": 1.0377675415163965e-05, "loss": 0.2828, "step": 8761 }, { "epoch": 0.5, "grad_norm": 0.7610154560364215, "learning_rate": 1.0375815823698471e-05, "loss": 0.4219, "step": 8762 }, { "epoch": 0.5, "grad_norm": 0.3571386552082526, "learning_rate": 1.0373956219218495e-05, "loss": 0.2569, "step": 8763 }, { "epoch": 0.5, "grad_norm": 0.27089130035377157, "learning_rate": 1.0372096601788426e-05, "loss": 0.2144, "step": 8764 }, { "epoch": 0.5, "grad_norm": 0.4068884998575498, "learning_rate": 1.0370236971472671e-05, "loss": 0.251, "step": 8765 }, { "epoch": 0.5, "grad_norm": 0.7412488012692768, "learning_rate": 1.0368377328335623e-05, "loss": 0.5212, "step": 8766 }, { "epoch": 0.5, "grad_norm": 0.27216313191409564, "learning_rate": 1.0366517672441687e-05, "loss": 0.2245, "step": 8767 }, { "epoch": 0.5, "grad_norm": 0.429758312937244, "learning_rate": 1.0364658003855256e-05, "loss": 0.3467, "step": 8768 }, { "epoch": 0.5, "grad_norm": 0.4300319006050432, "learning_rate": 1.0362798322640736e-05, "loss": 0.2656, "step": 8769 }, { "epoch": 0.5, "grad_norm": 0.2356666590382897, "learning_rate": 1.0360938628862527e-05, "loss": 0.1391, "step": 8770 }, { "epoch": 0.5, "grad_norm": 0.3233756808129512, "learning_rate": 1.0359078922585029e-05, "loss": 0.2775, "step": 8771 }, { "epoch": 0.5, "grad_norm": 0.8330387344459389, "learning_rate": 1.0357219203872641e-05, "loss": 0.4525, "step": 8772 }, { "epoch": 0.5, "grad_norm": 0.34761547025612005, "learning_rate": 1.035535947278977e-05, "loss": 0.2182, "step": 8773 }, { "epoch": 0.5, "grad_norm": 0.4854843573284205, "learning_rate": 1.035349972940081e-05, "loss": 0.3956, "step": 8774 }, { "epoch": 0.5, "grad_norm": 0.3489720860066471, "learning_rate": 1.0351639973770175e-05, "loss": 0.2953, "step": 8775 }, { "epoch": 0.5, "grad_norm": 0.29659453460115837, "learning_rate": 1.0349780205962264e-05, "loss": 0.1995, "step": 8776 }, { "epoch": 0.5, "grad_norm": 0.2555721583072774, "learning_rate": 1.0347920426041475e-05, "loss": 0.188, "step": 8777 }, { "epoch": 0.5, "grad_norm": 1.1484304454055863, "learning_rate": 1.034606063407222e-05, "loss": 0.4487, "step": 8778 }, { "epoch": 0.5, "grad_norm": 0.3225354838444197, "learning_rate": 1.0344200830118899e-05, "loss": 0.2934, "step": 8779 }, { "epoch": 0.5, "grad_norm": 0.36739556613949326, "learning_rate": 1.0342341014245918e-05, "loss": 0.2392, "step": 8780 }, { "epoch": 0.5, "grad_norm": 0.7747310744381001, "learning_rate": 1.0340481186517678e-05, "loss": 0.4761, "step": 8781 }, { "epoch": 0.5, "grad_norm": 0.27809320794294334, "learning_rate": 1.0338621346998596e-05, "loss": 0.2168, "step": 8782 }, { "epoch": 0.5, "grad_norm": 0.399339632887001, "learning_rate": 1.0336761495753067e-05, "loss": 0.2646, "step": 8783 }, { "epoch": 0.5, "grad_norm": 0.7950306926717371, "learning_rate": 1.0334901632845504e-05, "loss": 0.3684, "step": 8784 }, { "epoch": 0.5, "grad_norm": 0.37529210841211885, "learning_rate": 1.0333041758340312e-05, "loss": 0.2767, "step": 8785 }, { "epoch": 0.5, "grad_norm": 0.2740845981263997, "learning_rate": 1.0331181872301898e-05, "loss": 0.1405, "step": 8786 }, { "epoch": 0.5, "grad_norm": 0.3420145569049249, "learning_rate": 1.0329321974794671e-05, "loss": 0.2961, "step": 8787 }, { "epoch": 0.5, "grad_norm": 0.5831138037987301, "learning_rate": 1.0327462065883036e-05, "loss": 0.3274, "step": 8788 }, { "epoch": 0.5, "grad_norm": 0.2896592046347293, "learning_rate": 1.0325602145631403e-05, "loss": 0.223, "step": 8789 }, { "epoch": 0.51, "grad_norm": 0.43954585956748027, "learning_rate": 1.0323742214104185e-05, "loss": 0.3921, "step": 8790 }, { "epoch": 0.51, "grad_norm": 0.3625988463122244, "learning_rate": 1.0321882271365786e-05, "loss": 0.2806, "step": 8791 }, { "epoch": 0.51, "grad_norm": 0.3160804484447476, "learning_rate": 1.0320022317480618e-05, "loss": 0.2677, "step": 8792 }, { "epoch": 0.51, "grad_norm": 1.0366060736983413, "learning_rate": 1.031816235251309e-05, "loss": 0.3159, "step": 8793 }, { "epoch": 0.51, "grad_norm": 0.5889250828589246, "learning_rate": 1.0316302376527616e-05, "loss": 0.3359, "step": 8794 }, { "epoch": 0.51, "grad_norm": 0.270751161601078, "learning_rate": 1.0314442389588603e-05, "loss": 0.2913, "step": 8795 }, { "epoch": 0.51, "grad_norm": 0.6323219923443495, "learning_rate": 1.0312582391760462e-05, "loss": 0.3475, "step": 8796 }, { "epoch": 0.51, "grad_norm": 0.4125990956262591, "learning_rate": 1.0310722383107608e-05, "loss": 0.2992, "step": 8797 }, { "epoch": 0.51, "grad_norm": 0.23697489736616656, "learning_rate": 1.030886236369445e-05, "loss": 0.1779, "step": 8798 }, { "epoch": 0.51, "grad_norm": 0.3704019970521971, "learning_rate": 1.0307002333585404e-05, "loss": 0.2598, "step": 8799 }, { "epoch": 0.51, "grad_norm": 0.44374597809610156, "learning_rate": 1.0305142292844876e-05, "loss": 0.2887, "step": 8800 }, { "epoch": 0.51, "grad_norm": 0.4554383476417022, "learning_rate": 1.0303282241537287e-05, "loss": 0.3395, "step": 8801 }, { "epoch": 0.51, "grad_norm": 0.3874203617171426, "learning_rate": 1.0301422179727045e-05, "loss": 0.3243, "step": 8802 }, { "epoch": 0.51, "grad_norm": 0.3125693395295528, "learning_rate": 1.0299562107478569e-05, "loss": 0.2679, "step": 8803 }, { "epoch": 0.51, "grad_norm": 0.25728668270074057, "learning_rate": 1.0297702024856268e-05, "loss": 0.1966, "step": 8804 }, { "epoch": 0.51, "grad_norm": 1.0480113904388246, "learning_rate": 1.0295841931924559e-05, "loss": 0.5405, "step": 8805 }, { "epoch": 0.51, "grad_norm": 0.663364129540765, "learning_rate": 1.0293981828747857e-05, "loss": 0.2988, "step": 8806 }, { "epoch": 0.51, "grad_norm": 0.2966797163973741, "learning_rate": 1.0292121715390576e-05, "loss": 0.2727, "step": 8807 }, { "epoch": 0.51, "grad_norm": 0.6970721071112972, "learning_rate": 1.0290261591917132e-05, "loss": 0.5367, "step": 8808 }, { "epoch": 0.51, "grad_norm": 0.38124847024879227, "learning_rate": 1.0288401458391943e-05, "loss": 0.1685, "step": 8809 }, { "epoch": 0.51, "grad_norm": 0.2504095296687366, "learning_rate": 1.0286541314879424e-05, "loss": 0.2031, "step": 8810 }, { "epoch": 0.51, "grad_norm": 0.3809688844646517, "learning_rate": 1.028468116144399e-05, "loss": 0.3369, "step": 8811 }, { "epoch": 0.51, "grad_norm": 0.4955121367154073, "learning_rate": 1.028282099815006e-05, "loss": 0.2312, "step": 8812 }, { "epoch": 0.51, "grad_norm": 0.4093725872251935, "learning_rate": 1.0280960825062054e-05, "loss": 0.3284, "step": 8813 }, { "epoch": 0.51, "grad_norm": 1.1636539511340667, "learning_rate": 1.0279100642244382e-05, "loss": 0.8014, "step": 8814 }, { "epoch": 0.51, "grad_norm": 0.28726866422333247, "learning_rate": 1.027724044976147e-05, "loss": 0.2295, "step": 8815 }, { "epoch": 0.51, "grad_norm": 0.24419854470114155, "learning_rate": 1.0275380247677733e-05, "loss": 0.1984, "step": 8816 }, { "epoch": 0.51, "grad_norm": 0.9213705414253189, "learning_rate": 1.0273520036057587e-05, "loss": 0.517, "step": 8817 }, { "epoch": 0.51, "grad_norm": 0.4880627076951752, "learning_rate": 1.0271659814965457e-05, "loss": 0.3437, "step": 8818 }, { "epoch": 0.51, "grad_norm": 0.25429892494500267, "learning_rate": 1.0269799584465758e-05, "loss": 0.2382, "step": 8819 }, { "epoch": 0.51, "grad_norm": 1.1443560930208179, "learning_rate": 1.0267939344622912e-05, "loss": 0.7465, "step": 8820 }, { "epoch": 0.51, "grad_norm": 0.47916608663820603, "learning_rate": 1.0266079095501338e-05, "loss": 0.3078, "step": 8821 }, { "epoch": 0.51, "grad_norm": 0.22200518903738178, "learning_rate": 1.0264218837165459e-05, "loss": 0.1626, "step": 8822 }, { "epoch": 0.51, "grad_norm": 0.38383767303997884, "learning_rate": 1.0262358569679686e-05, "loss": 0.3217, "step": 8823 }, { "epoch": 0.51, "grad_norm": 0.6558207841443644, "learning_rate": 1.0260498293108452e-05, "loss": 0.4036, "step": 8824 }, { "epoch": 0.51, "grad_norm": 0.3591505119739801, "learning_rate": 1.025863800751617e-05, "loss": 0.2283, "step": 8825 }, { "epoch": 0.51, "grad_norm": 0.36957746518241613, "learning_rate": 1.025677771296727e-05, "loss": 0.3354, "step": 8826 }, { "epoch": 0.51, "grad_norm": 1.4188426062019395, "learning_rate": 1.0254917409526163e-05, "loss": 0.7654, "step": 8827 }, { "epoch": 0.51, "grad_norm": 0.30075617474235133, "learning_rate": 1.0253057097257281e-05, "loss": 0.2479, "step": 8828 }, { "epoch": 0.51, "grad_norm": 0.3521081179987658, "learning_rate": 1.025119677622504e-05, "loss": 0.2155, "step": 8829 }, { "epoch": 0.51, "grad_norm": 0.4290538039923925, "learning_rate": 1.0249336446493869e-05, "loss": 0.3082, "step": 8830 }, { "epoch": 0.51, "grad_norm": 0.28813142501039557, "learning_rate": 1.0247476108128183e-05, "loss": 0.2763, "step": 8831 }, { "epoch": 0.51, "grad_norm": 0.7845090507131877, "learning_rate": 1.0245615761192414e-05, "loss": 0.4415, "step": 8832 }, { "epoch": 0.51, "grad_norm": 0.8072316896704939, "learning_rate": 1.024375540575098e-05, "loss": 0.4035, "step": 8833 }, { "epoch": 0.51, "grad_norm": 0.2726303021214843, "learning_rate": 1.0241895041868306e-05, "loss": 0.2369, "step": 8834 }, { "epoch": 0.51, "grad_norm": 0.36492778874634096, "learning_rate": 1.024003466960882e-05, "loss": 0.2715, "step": 8835 }, { "epoch": 0.51, "grad_norm": 0.360313855746173, "learning_rate": 1.0238174289036942e-05, "loss": 0.2589, "step": 8836 }, { "epoch": 0.51, "grad_norm": 0.36305815876103154, "learning_rate": 1.0236313900217099e-05, "loss": 0.2774, "step": 8837 }, { "epoch": 0.51, "grad_norm": 0.35402837649284924, "learning_rate": 1.0234453503213715e-05, "loss": 0.286, "step": 8838 }, { "epoch": 0.51, "grad_norm": 0.6168437114744936, "learning_rate": 1.0232593098091215e-05, "loss": 0.3838, "step": 8839 }, { "epoch": 0.51, "grad_norm": 0.4155028386232177, "learning_rate": 1.0230732684914029e-05, "loss": 0.3072, "step": 8840 }, { "epoch": 0.51, "grad_norm": 0.5305176733751163, "learning_rate": 1.022887226374658e-05, "loss": 0.4033, "step": 8841 }, { "epoch": 0.51, "grad_norm": 0.20583299188599177, "learning_rate": 1.022701183465329e-05, "loss": 0.1745, "step": 8842 }, { "epoch": 0.51, "grad_norm": 0.3890161245544743, "learning_rate": 1.0225151397698597e-05, "loss": 0.3028, "step": 8843 }, { "epoch": 0.51, "grad_norm": 0.9808698140562502, "learning_rate": 1.0223290952946914e-05, "loss": 0.6994, "step": 8844 }, { "epoch": 0.51, "grad_norm": 0.6121490823974144, "learning_rate": 1.0221430500462677e-05, "loss": 0.3077, "step": 8845 }, { "epoch": 0.51, "grad_norm": 0.3066633720162846, "learning_rate": 1.0219570040310312e-05, "loss": 0.2655, "step": 8846 }, { "epoch": 0.51, "grad_norm": 0.34538918095313825, "learning_rate": 1.0217709572554247e-05, "loss": 0.3273, "step": 8847 }, { "epoch": 0.51, "grad_norm": 0.1859650911764527, "learning_rate": 1.0215849097258905e-05, "loss": 0.0884, "step": 8848 }, { "epoch": 0.51, "grad_norm": 0.3703599398367777, "learning_rate": 1.0213988614488721e-05, "loss": 0.289, "step": 8849 }, { "epoch": 0.51, "grad_norm": 0.4604316756843436, "learning_rate": 1.0212128124308121e-05, "loss": 0.3816, "step": 8850 }, { "epoch": 0.51, "grad_norm": 0.38250000158150516, "learning_rate": 1.0210267626781532e-05, "loss": 0.2585, "step": 8851 }, { "epoch": 0.51, "grad_norm": 0.350693722826386, "learning_rate": 1.0208407121973383e-05, "loss": 0.3005, "step": 8852 }, { "epoch": 0.51, "grad_norm": 0.7957509715262783, "learning_rate": 1.0206546609948107e-05, "loss": 0.4827, "step": 8853 }, { "epoch": 0.51, "grad_norm": 0.235842723340732, "learning_rate": 1.020468609077013e-05, "loss": 0.216, "step": 8854 }, { "epoch": 0.51, "grad_norm": 0.30607668526376075, "learning_rate": 1.0202825564503885e-05, "loss": 0.2021, "step": 8855 }, { "epoch": 0.51, "grad_norm": 1.1314724300561396, "learning_rate": 1.0200965031213795e-05, "loss": 0.7834, "step": 8856 }, { "epoch": 0.51, "grad_norm": 0.7721586850426669, "learning_rate": 1.0199104490964296e-05, "loss": 0.4185, "step": 8857 }, { "epoch": 0.51, "grad_norm": 0.3153541338301299, "learning_rate": 1.0197243943819816e-05, "loss": 0.2, "step": 8858 }, { "epoch": 0.51, "grad_norm": 0.3830402764051056, "learning_rate": 1.0195383389844789e-05, "loss": 0.3396, "step": 8859 }, { "epoch": 0.51, "grad_norm": 0.2716412665480148, "learning_rate": 1.0193522829103643e-05, "loss": 0.183, "step": 8860 }, { "epoch": 0.51, "grad_norm": 0.3956289122519081, "learning_rate": 1.0191662261660809e-05, "loss": 0.2267, "step": 8861 }, { "epoch": 0.51, "grad_norm": 0.3719950768589626, "learning_rate": 1.018980168758072e-05, "loss": 0.3198, "step": 8862 }, { "epoch": 0.51, "grad_norm": 1.073061393618599, "learning_rate": 1.0187941106927803e-05, "loss": 0.4103, "step": 8863 }, { "epoch": 0.51, "grad_norm": 0.3268498389256042, "learning_rate": 1.0186080519766499e-05, "loss": 0.2171, "step": 8864 }, { "epoch": 0.51, "grad_norm": 0.932570603873562, "learning_rate": 1.0184219926161229e-05, "loss": 0.5862, "step": 8865 }, { "epoch": 0.51, "grad_norm": 0.2876069287491476, "learning_rate": 1.0182359326176437e-05, "loss": 0.2418, "step": 8866 }, { "epoch": 0.51, "grad_norm": 0.2760938844417518, "learning_rate": 1.0180498719876546e-05, "loss": 0.2243, "step": 8867 }, { "epoch": 0.51, "grad_norm": 0.9738896002035565, "learning_rate": 1.0178638107325993e-05, "loss": 0.5697, "step": 8868 }, { "epoch": 0.51, "grad_norm": 0.9192592820798557, "learning_rate": 1.0176777488589206e-05, "loss": 0.4044, "step": 8869 }, { "epoch": 0.51, "grad_norm": 0.2846380947105442, "learning_rate": 1.0174916863730628e-05, "loss": 0.244, "step": 8870 }, { "epoch": 0.51, "grad_norm": 0.4158621955491559, "learning_rate": 1.0173056232814684e-05, "loss": 0.2808, "step": 8871 }, { "epoch": 0.51, "grad_norm": 0.4635268387198839, "learning_rate": 1.0171195595905811e-05, "loss": 0.2996, "step": 8872 }, { "epoch": 0.51, "grad_norm": 0.33203805455122837, "learning_rate": 1.0169334953068442e-05, "loss": 0.2639, "step": 8873 }, { "epoch": 0.51, "grad_norm": 0.3697048151949442, "learning_rate": 1.0167474304367011e-05, "loss": 0.2824, "step": 8874 }, { "epoch": 0.51, "grad_norm": 0.4844233725817165, "learning_rate": 1.0165613649865951e-05, "loss": 0.3137, "step": 8875 }, { "epoch": 0.51, "grad_norm": 0.3984242851912228, "learning_rate": 1.0163752989629698e-05, "loss": 0.2471, "step": 8876 }, { "epoch": 0.51, "grad_norm": 0.47180498037132856, "learning_rate": 1.0161892323722684e-05, "loss": 0.274, "step": 8877 }, { "epoch": 0.51, "grad_norm": 0.3669597020882024, "learning_rate": 1.0160031652209348e-05, "loss": 0.2947, "step": 8878 }, { "epoch": 0.51, "grad_norm": 0.386735586286777, "learning_rate": 1.0158170975154121e-05, "loss": 0.2804, "step": 8879 }, { "epoch": 0.51, "grad_norm": 0.472778223664258, "learning_rate": 1.015631029262144e-05, "loss": 0.3634, "step": 8880 }, { "epoch": 0.51, "grad_norm": 0.31498560183851326, "learning_rate": 1.0154449604675745e-05, "loss": 0.2056, "step": 8881 }, { "epoch": 0.51, "grad_norm": 0.31106123240309985, "learning_rate": 1.015258891138146e-05, "loss": 0.2597, "step": 8882 }, { "epoch": 0.51, "grad_norm": 0.32163140709549864, "learning_rate": 1.0150728212803034e-05, "loss": 0.248, "step": 8883 }, { "epoch": 0.51, "grad_norm": 1.0163468760891674, "learning_rate": 1.0148867509004892e-05, "loss": 0.3417, "step": 8884 }, { "epoch": 0.51, "grad_norm": 0.3535521516167228, "learning_rate": 1.0147006800051475e-05, "loss": 0.273, "step": 8885 }, { "epoch": 0.51, "grad_norm": 0.32974085632081673, "learning_rate": 1.0145146086007219e-05, "loss": 0.3063, "step": 8886 }, { "epoch": 0.51, "grad_norm": 0.37848673394257054, "learning_rate": 1.0143285366936562e-05, "loss": 0.2336, "step": 8887 }, { "epoch": 0.51, "grad_norm": 0.27078939239282207, "learning_rate": 1.0141424642903936e-05, "loss": 0.208, "step": 8888 }, { "epoch": 0.51, "grad_norm": 1.0221622920030797, "learning_rate": 1.0139563913973787e-05, "loss": 0.5142, "step": 8889 }, { "epoch": 0.51, "grad_norm": 0.34970550608599016, "learning_rate": 1.0137703180210538e-05, "loss": 0.2834, "step": 8890 }, { "epoch": 0.51, "grad_norm": 0.35189717797611997, "learning_rate": 1.0135842441678639e-05, "loss": 0.2754, "step": 8891 }, { "epoch": 0.51, "grad_norm": 0.6613352038736564, "learning_rate": 1.0133981698442519e-05, "loss": 0.411, "step": 8892 }, { "epoch": 0.51, "grad_norm": 0.49333120019293597, "learning_rate": 1.013212095056662e-05, "loss": 0.4239, "step": 8893 }, { "epoch": 0.51, "grad_norm": 0.2691281560347184, "learning_rate": 1.0130260198115376e-05, "loss": 0.2087, "step": 8894 }, { "epoch": 0.51, "grad_norm": 0.25133156816965974, "learning_rate": 1.012839944115323e-05, "loss": 0.1896, "step": 8895 }, { "epoch": 0.51, "grad_norm": 0.644446483143422, "learning_rate": 1.0126538679744615e-05, "loss": 0.4346, "step": 8896 }, { "epoch": 0.51, "grad_norm": 0.31080535051286085, "learning_rate": 1.0124677913953971e-05, "loss": 0.2094, "step": 8897 }, { "epoch": 0.51, "grad_norm": 0.2925056576358622, "learning_rate": 1.0122817143845736e-05, "loss": 0.29, "step": 8898 }, { "epoch": 0.51, "grad_norm": 1.1700861136940273, "learning_rate": 1.0120956369484352e-05, "loss": 0.8421, "step": 8899 }, { "epoch": 0.51, "grad_norm": 0.18646929013358124, "learning_rate": 1.011909559093425e-05, "loss": 0.1259, "step": 8900 }, { "epoch": 0.51, "grad_norm": 0.5031315658177309, "learning_rate": 1.0117234808259875e-05, "loss": 0.3329, "step": 8901 }, { "epoch": 0.51, "grad_norm": 0.3837696227726515, "learning_rate": 1.0115374021525664e-05, "loss": 0.2908, "step": 8902 }, { "epoch": 0.51, "grad_norm": 0.3224777264779916, "learning_rate": 1.0113513230796052e-05, "loss": 0.2378, "step": 8903 }, { "epoch": 0.51, "grad_norm": 0.7486941413631881, "learning_rate": 1.0111652436135486e-05, "loss": 0.4879, "step": 8904 }, { "epoch": 0.51, "grad_norm": 0.4577781436154072, "learning_rate": 1.01097916376084e-05, "loss": 0.4012, "step": 8905 }, { "epoch": 0.51, "grad_norm": 0.3227090641491666, "learning_rate": 1.0107930835279234e-05, "loss": 0.2838, "step": 8906 }, { "epoch": 0.51, "grad_norm": 0.25374936217959587, "learning_rate": 1.0106070029212424e-05, "loss": 0.1262, "step": 8907 }, { "epoch": 0.51, "grad_norm": 0.6524870029201337, "learning_rate": 1.0104209219472418e-05, "loss": 0.3958, "step": 8908 }, { "epoch": 0.51, "grad_norm": 0.37586082968520385, "learning_rate": 1.010234840612365e-05, "loss": 0.2897, "step": 8909 }, { "epoch": 0.51, "grad_norm": 0.3307571873747576, "learning_rate": 1.010048758923056e-05, "loss": 0.2648, "step": 8910 }, { "epoch": 0.51, "grad_norm": 0.8419980738165997, "learning_rate": 1.0098626768857591e-05, "loss": 0.6101, "step": 8911 }, { "epoch": 0.51, "grad_norm": 0.6204223015862753, "learning_rate": 1.009676594506918e-05, "loss": 0.3407, "step": 8912 }, { "epoch": 0.51, "grad_norm": 0.28043986557667766, "learning_rate": 1.0094905117929767e-05, "loss": 0.1755, "step": 8913 }, { "epoch": 0.51, "grad_norm": 0.35706107090418515, "learning_rate": 1.0093044287503797e-05, "loss": 0.3173, "step": 8914 }, { "epoch": 0.51, "grad_norm": 0.564795786408195, "learning_rate": 1.0091183453855706e-05, "loss": 0.2933, "step": 8915 }, { "epoch": 0.51, "grad_norm": 0.37689587316033274, "learning_rate": 1.0089322617049936e-05, "loss": 0.3088, "step": 8916 }, { "epoch": 0.51, "grad_norm": 0.4513654962243863, "learning_rate": 1.0087461777150926e-05, "loss": 0.2704, "step": 8917 }, { "epoch": 0.51, "grad_norm": 0.3927024710302205, "learning_rate": 1.0085600934223121e-05, "loss": 0.2996, "step": 8918 }, { "epoch": 0.51, "grad_norm": 0.26729943984490195, "learning_rate": 1.008374008833096e-05, "loss": 0.2113, "step": 8919 }, { "epoch": 0.51, "grad_norm": 0.6478463082866031, "learning_rate": 1.0081879239538881e-05, "loss": 0.332, "step": 8920 }, { "epoch": 0.51, "grad_norm": 0.3996713800799195, "learning_rate": 1.0080018387911328e-05, "loss": 0.2707, "step": 8921 }, { "epoch": 0.51, "grad_norm": 0.3221994879121444, "learning_rate": 1.0078157533512742e-05, "loss": 0.3001, "step": 8922 }, { "epoch": 0.51, "grad_norm": 0.8538514540552568, "learning_rate": 1.0076296676407565e-05, "loss": 0.404, "step": 8923 }, { "epoch": 0.51, "grad_norm": 0.41161279723696287, "learning_rate": 1.0074435816660235e-05, "loss": 0.2951, "step": 8924 }, { "epoch": 0.51, "grad_norm": 0.35715732608291045, "learning_rate": 1.00725749543352e-05, "loss": 0.2687, "step": 8925 }, { "epoch": 0.51, "grad_norm": 0.3368658460212253, "learning_rate": 1.0070714089496891e-05, "loss": 0.2479, "step": 8926 }, { "epoch": 0.51, "grad_norm": 0.44449844022617024, "learning_rate": 1.006885322220976e-05, "loss": 0.3089, "step": 8927 }, { "epoch": 0.51, "grad_norm": 0.5794718664509232, "learning_rate": 1.0066992352538245e-05, "loss": 0.3989, "step": 8928 }, { "epoch": 0.51, "grad_norm": 0.40439067942242224, "learning_rate": 1.0065131480546788e-05, "loss": 0.327, "step": 8929 }, { "epoch": 0.51, "grad_norm": 0.7094782461959981, "learning_rate": 1.006327060629983e-05, "loss": 0.2071, "step": 8930 }, { "epoch": 0.51, "grad_norm": 0.32681962706815443, "learning_rate": 1.0061409729861814e-05, "loss": 0.3041, "step": 8931 }, { "epoch": 0.51, "grad_norm": 0.29148296068996754, "learning_rate": 1.0059548851297178e-05, "loss": 0.2023, "step": 8932 }, { "epoch": 0.51, "grad_norm": 0.3429142652730741, "learning_rate": 1.0057687970670372e-05, "loss": 0.2153, "step": 8933 }, { "epoch": 0.51, "grad_norm": 0.34103484654619365, "learning_rate": 1.005582708804583e-05, "loss": 0.2975, "step": 8934 }, { "epoch": 0.51, "grad_norm": 1.0197996496950272, "learning_rate": 1.0053966203488003e-05, "loss": 0.677, "step": 8935 }, { "epoch": 0.51, "grad_norm": 0.36527832406679867, "learning_rate": 1.0052105317061327e-05, "loss": 0.182, "step": 8936 }, { "epoch": 0.51, "grad_norm": 0.29033341390121514, "learning_rate": 1.0050244428830246e-05, "loss": 0.2933, "step": 8937 }, { "epoch": 0.51, "grad_norm": 0.42647799922546814, "learning_rate": 1.0048383538859202e-05, "loss": 0.354, "step": 8938 }, { "epoch": 0.51, "grad_norm": 0.1574000968867463, "learning_rate": 1.0046522647212642e-05, "loss": 0.087, "step": 8939 }, { "epoch": 0.51, "grad_norm": 0.41188216839380715, "learning_rate": 1.0044661753955001e-05, "loss": 0.3561, "step": 8940 }, { "epoch": 0.51, "grad_norm": 0.4381041332600563, "learning_rate": 1.0042800859150726e-05, "loss": 0.3384, "step": 8941 }, { "epoch": 0.51, "grad_norm": 0.3689628610234497, "learning_rate": 1.0040939962864258e-05, "loss": 0.311, "step": 8942 }, { "epoch": 0.51, "grad_norm": 0.3731679048957679, "learning_rate": 1.0039079065160042e-05, "loss": 0.2616, "step": 8943 }, { "epoch": 0.51, "grad_norm": 0.41815692396401, "learning_rate": 1.0037218166102518e-05, "loss": 0.2991, "step": 8944 }, { "epoch": 0.51, "grad_norm": 0.2680388741950821, "learning_rate": 1.0035357265756134e-05, "loss": 0.2294, "step": 8945 }, { "epoch": 0.51, "grad_norm": 0.3967633759634121, "learning_rate": 1.003349636418533e-05, "loss": 0.2559, "step": 8946 }, { "epoch": 0.51, "grad_norm": 0.7089779370160822, "learning_rate": 1.0031635461454544e-05, "loss": 0.5292, "step": 8947 }, { "epoch": 0.51, "grad_norm": 0.8142086961246247, "learning_rate": 1.0029774557628224e-05, "loss": 0.3636, "step": 8948 }, { "epoch": 0.51, "grad_norm": 0.3104295700200426, "learning_rate": 1.0027913652770813e-05, "loss": 0.2351, "step": 8949 }, { "epoch": 0.51, "grad_norm": 0.3667967386304453, "learning_rate": 1.0026052746946756e-05, "loss": 0.2733, "step": 8950 }, { "epoch": 0.51, "grad_norm": 0.45760292541987996, "learning_rate": 1.002419184022049e-05, "loss": 0.282, "step": 8951 }, { "epoch": 0.51, "grad_norm": 0.2975732477040278, "learning_rate": 1.0022330932656463e-05, "loss": 0.2097, "step": 8952 }, { "epoch": 0.51, "grad_norm": 0.36123061495278136, "learning_rate": 1.0020470024319115e-05, "loss": 0.2913, "step": 8953 }, { "epoch": 0.51, "grad_norm": 0.6903017856807934, "learning_rate": 1.0018609115272896e-05, "loss": 0.3646, "step": 8954 }, { "epoch": 0.51, "grad_norm": 0.34900373266645707, "learning_rate": 1.0016748205582238e-05, "loss": 0.2853, "step": 8955 }, { "epoch": 0.51, "grad_norm": 0.6639895995416654, "learning_rate": 1.0014887295311595e-05, "loss": 0.3339, "step": 8956 }, { "epoch": 0.51, "grad_norm": 0.21779948373374317, "learning_rate": 1.0013026384525404e-05, "loss": 0.2023, "step": 8957 }, { "epoch": 0.51, "grad_norm": 0.3989690662409945, "learning_rate": 1.0011165473288108e-05, "loss": 0.3183, "step": 8958 }, { "epoch": 0.51, "grad_norm": 0.9027589397240701, "learning_rate": 1.0009304561664154e-05, "loss": 0.359, "step": 8959 }, { "epoch": 0.51, "grad_norm": 0.6025413935849674, "learning_rate": 1.0007443649717985e-05, "loss": 0.3867, "step": 8960 }, { "epoch": 0.51, "grad_norm": 0.3535480063313833, "learning_rate": 1.0005582737514039e-05, "loss": 0.2922, "step": 8961 }, { "epoch": 0.51, "grad_norm": 0.34940809513436644, "learning_rate": 1.0003721825116766e-05, "loss": 0.2565, "step": 8962 }, { "epoch": 0.51, "grad_norm": 0.23572550353217794, "learning_rate": 1.0001860912590604e-05, "loss": 0.1681, "step": 8963 }, { "epoch": 0.52, "grad_norm": 0.4371986239915237, "learning_rate": 1e-05, "loss": 0.3286, "step": 8964 }, { "epoch": 0.52, "grad_norm": 0.5882681268821435, "learning_rate": 9.998139087409399e-06, "loss": 0.3003, "step": 8965 }, { "epoch": 0.52, "grad_norm": 0.998562898170738, "learning_rate": 9.996278174883236e-06, "loss": 0.4324, "step": 8966 }, { "epoch": 0.52, "grad_norm": 0.32535718394732693, "learning_rate": 9.994417262485963e-06, "loss": 0.2808, "step": 8967 }, { "epoch": 0.52, "grad_norm": 0.8845442812310932, "learning_rate": 9.992556350282018e-06, "loss": 0.5168, "step": 8968 }, { "epoch": 0.52, "grad_norm": 0.2605945618913011, "learning_rate": 9.990695438335847e-06, "loss": 0.2, "step": 8969 }, { "epoch": 0.52, "grad_norm": 0.3912375752348602, "learning_rate": 9.988834526711893e-06, "loss": 0.267, "step": 8970 }, { "epoch": 0.52, "grad_norm": 0.8202549742402676, "learning_rate": 9.9869736154746e-06, "loss": 0.4242, "step": 8971 }, { "epoch": 0.52, "grad_norm": 0.2838554077787522, "learning_rate": 9.985112704688406e-06, "loss": 0.1891, "step": 8972 }, { "epoch": 0.52, "grad_norm": 0.2958047596139349, "learning_rate": 9.983251794417763e-06, "loss": 0.2796, "step": 8973 }, { "epoch": 0.52, "grad_norm": 1.146356366991916, "learning_rate": 9.981390884727106e-06, "loss": 0.6581, "step": 8974 }, { "epoch": 0.52, "grad_norm": 0.4452802708748933, "learning_rate": 9.979529975680885e-06, "loss": 0.2432, "step": 8975 }, { "epoch": 0.52, "grad_norm": 0.38232368072779344, "learning_rate": 9.977669067343537e-06, "loss": 0.3037, "step": 8976 }, { "epoch": 0.52, "grad_norm": 0.3709480557645404, "learning_rate": 9.975808159779512e-06, "loss": 0.2857, "step": 8977 }, { "epoch": 0.52, "grad_norm": 0.2848080628979088, "learning_rate": 9.973947253053248e-06, "loss": 0.1488, "step": 8978 }, { "epoch": 0.52, "grad_norm": 0.36194839241988114, "learning_rate": 9.972086347229187e-06, "loss": 0.3028, "step": 8979 }, { "epoch": 0.52, "grad_norm": 0.9172902205994092, "learning_rate": 9.970225442371778e-06, "loss": 0.5909, "step": 8980 }, { "epoch": 0.52, "grad_norm": 0.3406603301566798, "learning_rate": 9.968364538545461e-06, "loss": 0.3361, "step": 8981 }, { "epoch": 0.52, "grad_norm": 0.3044827668809311, "learning_rate": 9.966503635814677e-06, "loss": 0.2007, "step": 8982 }, { "epoch": 0.52, "grad_norm": 0.5498909403562776, "learning_rate": 9.96464273424387e-06, "loss": 0.4395, "step": 8983 }, { "epoch": 0.52, "grad_norm": 0.2707344192696009, "learning_rate": 9.962781833897484e-06, "loss": 0.2111, "step": 8984 }, { "epoch": 0.52, "grad_norm": 0.27766021070239455, "learning_rate": 9.960920934839963e-06, "loss": 0.2124, "step": 8985 }, { "epoch": 0.52, "grad_norm": 1.0646904612166812, "learning_rate": 9.959060037135745e-06, "loss": 0.5268, "step": 8986 }, { "epoch": 0.52, "grad_norm": 0.7117116899008378, "learning_rate": 9.95719914084928e-06, "loss": 0.4817, "step": 8987 }, { "epoch": 0.52, "grad_norm": 0.2762098719031825, "learning_rate": 9.955338246045004e-06, "loss": 0.2282, "step": 8988 }, { "epoch": 0.52, "grad_norm": 0.3723392795742885, "learning_rate": 9.953477352787363e-06, "loss": 0.3297, "step": 8989 }, { "epoch": 0.52, "grad_norm": 0.4044795205958676, "learning_rate": 9.9516164611408e-06, "loss": 0.2681, "step": 8990 }, { "epoch": 0.52, "grad_norm": 0.2391132673895818, "learning_rate": 9.949755571169757e-06, "loss": 0.2128, "step": 8991 }, { "epoch": 0.52, "grad_norm": 0.9771484815283424, "learning_rate": 9.947894682938676e-06, "loss": 0.2187, "step": 8992 }, { "epoch": 0.52, "grad_norm": 0.3742803679785533, "learning_rate": 9.946033796511999e-06, "loss": 0.3154, "step": 8993 }, { "epoch": 0.52, "grad_norm": 0.36770528833525723, "learning_rate": 9.944172911954173e-06, "loss": 0.2913, "step": 8994 }, { "epoch": 0.52, "grad_norm": 0.7614209857492386, "learning_rate": 9.942312029329631e-06, "loss": 0.4018, "step": 8995 }, { "epoch": 0.52, "grad_norm": 0.27484815842745103, "learning_rate": 9.940451148702826e-06, "loss": 0.2612, "step": 8996 }, { "epoch": 0.52, "grad_norm": 0.31502147368921235, "learning_rate": 9.938590270138191e-06, "loss": 0.2551, "step": 8997 }, { "epoch": 0.52, "grad_norm": 0.43320342033214754, "learning_rate": 9.936729393700176e-06, "loss": 0.1657, "step": 8998 }, { "epoch": 0.52, "grad_norm": 0.7310020498893697, "learning_rate": 9.934868519453215e-06, "loss": 0.4141, "step": 8999 }, { "epoch": 0.52, "grad_norm": 0.3868343055255914, "learning_rate": 9.933007647461758e-06, "loss": 0.2496, "step": 9000 }, { "epoch": 0.52, "grad_norm": 0.304909472398119, "learning_rate": 9.931146777790241e-06, "loss": 0.2584, "step": 9001 }, { "epoch": 0.52, "grad_norm": 0.43745718846557247, "learning_rate": 9.929285910503112e-06, "loss": 0.2569, "step": 9002 }, { "epoch": 0.52, "grad_norm": 0.2749356993094986, "learning_rate": 9.927425045664804e-06, "loss": 0.202, "step": 9003 }, { "epoch": 0.52, "grad_norm": 0.44467210424965653, "learning_rate": 9.925564183339768e-06, "loss": 0.3516, "step": 9004 }, { "epoch": 0.52, "grad_norm": 0.3609803021729023, "learning_rate": 9.92370332359244e-06, "loss": 0.2799, "step": 9005 }, { "epoch": 0.52, "grad_norm": 0.32493117789295467, "learning_rate": 9.92184246648726e-06, "loss": 0.2776, "step": 9006 }, { "epoch": 0.52, "grad_norm": 1.0347469550216204, "learning_rate": 9.919981612088676e-06, "loss": 0.7681, "step": 9007 }, { "epoch": 0.52, "grad_norm": 0.33748772354766315, "learning_rate": 9.91812076046112e-06, "loss": 0.2435, "step": 9008 }, { "epoch": 0.52, "grad_norm": 0.26513883317931297, "learning_rate": 9.916259911669044e-06, "loss": 0.2091, "step": 9009 }, { "epoch": 0.52, "grad_norm": 0.46014096183328146, "learning_rate": 9.914399065776879e-06, "loss": 0.2877, "step": 9010 }, { "epoch": 0.52, "grad_norm": 0.6082292573499609, "learning_rate": 9.912538222849074e-06, "loss": 0.3486, "step": 9011 }, { "epoch": 0.52, "grad_norm": 0.34178859785901966, "learning_rate": 9.910677382950064e-06, "loss": 0.2791, "step": 9012 }, { "epoch": 0.52, "grad_norm": 0.3524107406208595, "learning_rate": 9.908816546144296e-06, "loss": 0.3128, "step": 9013 }, { "epoch": 0.52, "grad_norm": 0.2773592503100069, "learning_rate": 9.906955712496203e-06, "loss": 0.1512, "step": 9014 }, { "epoch": 0.52, "grad_norm": 0.3265634728938896, "learning_rate": 9.905094882070234e-06, "loss": 0.2592, "step": 9015 }, { "epoch": 0.52, "grad_norm": 0.40937969659931533, "learning_rate": 9.903234054930824e-06, "loss": 0.3167, "step": 9016 }, { "epoch": 0.52, "grad_norm": 0.33132514739426283, "learning_rate": 9.901373231142416e-06, "loss": 0.3214, "step": 9017 }, { "epoch": 0.52, "grad_norm": 0.35046526567785413, "learning_rate": 9.899512410769443e-06, "loss": 0.1871, "step": 9018 }, { "epoch": 0.52, "grad_norm": 0.4758272095091834, "learning_rate": 9.897651593876356e-06, "loss": 0.4102, "step": 9019 }, { "epoch": 0.52, "grad_norm": 0.32645946398502895, "learning_rate": 9.895790780527585e-06, "loss": 0.2765, "step": 9020 }, { "epoch": 0.52, "grad_norm": 0.33879706747924987, "learning_rate": 9.89392997078758e-06, "loss": 0.1954, "step": 9021 }, { "epoch": 0.52, "grad_norm": 0.420195059981251, "learning_rate": 9.892069164720771e-06, "loss": 0.3489, "step": 9022 }, { "epoch": 0.52, "grad_norm": 0.27953102565373356, "learning_rate": 9.890208362391606e-06, "loss": 0.1685, "step": 9023 }, { "epoch": 0.52, "grad_norm": 0.33324986313949784, "learning_rate": 9.888347563864517e-06, "loss": 0.2261, "step": 9024 }, { "epoch": 0.52, "grad_norm": 0.4158775305414562, "learning_rate": 9.886486769203951e-06, "loss": 0.3704, "step": 9025 }, { "epoch": 0.52, "grad_norm": 1.1118021729267935, "learning_rate": 9.884625978474341e-06, "loss": 0.6197, "step": 9026 }, { "epoch": 0.52, "grad_norm": 0.2940205641682392, "learning_rate": 9.88276519174013e-06, "loss": 0.2177, "step": 9027 }, { "epoch": 0.52, "grad_norm": 0.632974645298959, "learning_rate": 9.880904409065753e-06, "loss": 0.4462, "step": 9028 }, { "epoch": 0.52, "grad_norm": 0.21338348316802783, "learning_rate": 9.879043630515651e-06, "loss": 0.2196, "step": 9029 }, { "epoch": 0.52, "grad_norm": 0.4146681584696092, "learning_rate": 9.877182856154267e-06, "loss": 0.3047, "step": 9030 }, { "epoch": 0.52, "grad_norm": 0.4489759474178878, "learning_rate": 9.87532208604603e-06, "loss": 0.2851, "step": 9031 }, { "epoch": 0.52, "grad_norm": 0.36796592998808814, "learning_rate": 9.873461320255388e-06, "loss": 0.3119, "step": 9032 }, { "epoch": 0.52, "grad_norm": 0.3727674532757231, "learning_rate": 9.871600558846772e-06, "loss": 0.2888, "step": 9033 }, { "epoch": 0.52, "grad_norm": 0.4707617476263641, "learning_rate": 9.869739801884627e-06, "loss": 0.2876, "step": 9034 }, { "epoch": 0.52, "grad_norm": 0.2443750988765055, "learning_rate": 9.867879049433383e-06, "loss": 0.1874, "step": 9035 }, { "epoch": 0.52, "grad_norm": 0.39422198057692154, "learning_rate": 9.866018301557484e-06, "loss": 0.2668, "step": 9036 }, { "epoch": 0.52, "grad_norm": 0.29803699521115307, "learning_rate": 9.864157558321364e-06, "loss": 0.2592, "step": 9037 }, { "epoch": 0.52, "grad_norm": 0.4831922205500076, "learning_rate": 9.862296819789464e-06, "loss": 0.3992, "step": 9038 }, { "epoch": 0.52, "grad_norm": 0.5305541289799832, "learning_rate": 9.860436086026218e-06, "loss": 0.3633, "step": 9039 }, { "epoch": 0.52, "grad_norm": 0.2684294559842977, "learning_rate": 9.858575357096064e-06, "loss": 0.2333, "step": 9040 }, { "epoch": 0.52, "grad_norm": 0.25245525380867007, "learning_rate": 9.85671463306344e-06, "loss": 0.1646, "step": 9041 }, { "epoch": 0.52, "grad_norm": 0.5212038708909201, "learning_rate": 9.854853913992783e-06, "loss": 0.3353, "step": 9042 }, { "epoch": 0.52, "grad_norm": 0.3956254025587982, "learning_rate": 9.852993199948527e-06, "loss": 0.3331, "step": 9043 }, { "epoch": 0.52, "grad_norm": 0.3427049716407958, "learning_rate": 9.85113249099511e-06, "loss": 0.276, "step": 9044 }, { "epoch": 0.52, "grad_norm": 0.46855115446728846, "learning_rate": 9.849271787196971e-06, "loss": 0.3368, "step": 9045 }, { "epoch": 0.52, "grad_norm": 0.3783074851487932, "learning_rate": 9.847411088618539e-06, "loss": 0.3037, "step": 9046 }, { "epoch": 0.52, "grad_norm": 0.18974821301931535, "learning_rate": 9.845550395324259e-06, "loss": 0.0865, "step": 9047 }, { "epoch": 0.52, "grad_norm": 0.2976246395263267, "learning_rate": 9.843689707378558e-06, "loss": 0.2641, "step": 9048 }, { "epoch": 0.52, "grad_norm": 0.3856321462242667, "learning_rate": 9.841829024845882e-06, "loss": 0.3264, "step": 9049 }, { "epoch": 0.52, "grad_norm": 0.8229565405098527, "learning_rate": 9.839968347790657e-06, "loss": 0.3381, "step": 9050 }, { "epoch": 0.52, "grad_norm": 0.5464022009127502, "learning_rate": 9.83810767627732e-06, "loss": 0.3216, "step": 9051 }, { "epoch": 0.52, "grad_norm": 0.3146404356039652, "learning_rate": 9.836247010370308e-06, "loss": 0.2941, "step": 9052 }, { "epoch": 0.52, "grad_norm": 0.29591631958317766, "learning_rate": 9.834386350134052e-06, "loss": 0.1869, "step": 9053 }, { "epoch": 0.52, "grad_norm": 0.7751683065263755, "learning_rate": 9.832525695632994e-06, "loss": 0.4217, "step": 9054 }, { "epoch": 0.52, "grad_norm": 0.4249413501512791, "learning_rate": 9.830665046931563e-06, "loss": 0.2905, "step": 9055 }, { "epoch": 0.52, "grad_norm": 0.5985386409545421, "learning_rate": 9.828804404094192e-06, "loss": 0.3324, "step": 9056 }, { "epoch": 0.52, "grad_norm": 0.20868793648929493, "learning_rate": 9.82694376718532e-06, "loss": 0.0741, "step": 9057 }, { "epoch": 0.52, "grad_norm": 0.3726465468468453, "learning_rate": 9.825083136269375e-06, "loss": 0.3055, "step": 9058 }, { "epoch": 0.52, "grad_norm": 0.4228458238529731, "learning_rate": 9.823222511410795e-06, "loss": 0.2778, "step": 9059 }, { "epoch": 0.52, "grad_norm": 0.29394146644376107, "learning_rate": 9.82136189267401e-06, "loss": 0.2364, "step": 9060 }, { "epoch": 0.52, "grad_norm": 0.43066936972759234, "learning_rate": 9.819501280123458e-06, "loss": 0.3554, "step": 9061 }, { "epoch": 0.52, "grad_norm": 0.6720219973395819, "learning_rate": 9.817640673823566e-06, "loss": 0.4414, "step": 9062 }, { "epoch": 0.52, "grad_norm": 0.22057283222630394, "learning_rate": 9.815780073838773e-06, "loss": 0.0968, "step": 9063 }, { "epoch": 0.52, "grad_norm": 0.27013861542171497, "learning_rate": 9.813919480233503e-06, "loss": 0.283, "step": 9064 }, { "epoch": 0.52, "grad_norm": 0.7716994426637789, "learning_rate": 9.812058893072199e-06, "loss": 0.5575, "step": 9065 }, { "epoch": 0.52, "grad_norm": 0.34090727687569566, "learning_rate": 9.810198312419284e-06, "loss": 0.2088, "step": 9066 }, { "epoch": 0.52, "grad_norm": 0.4536499925740262, "learning_rate": 9.808337738339194e-06, "loss": 0.355, "step": 9067 }, { "epoch": 0.52, "grad_norm": 0.3564000526722853, "learning_rate": 9.80647717089636e-06, "loss": 0.3234, "step": 9068 }, { "epoch": 0.52, "grad_norm": 0.23213456762043275, "learning_rate": 9.804616610155215e-06, "loss": 0.1778, "step": 9069 }, { "epoch": 0.52, "grad_norm": 0.3640646952779975, "learning_rate": 9.802756056180187e-06, "loss": 0.2301, "step": 9070 }, { "epoch": 0.52, "grad_norm": 1.2155181820109497, "learning_rate": 9.800895509035708e-06, "loss": 0.8132, "step": 9071 }, { "epoch": 0.52, "grad_norm": 0.3652622733588144, "learning_rate": 9.799034968786209e-06, "loss": 0.2981, "step": 9072 }, { "epoch": 0.52, "grad_norm": 0.3160744756567268, "learning_rate": 9.797174435496119e-06, "loss": 0.2346, "step": 9073 }, { "epoch": 0.52, "grad_norm": 0.3198722182667888, "learning_rate": 9.795313909229872e-06, "loss": 0.2408, "step": 9074 }, { "epoch": 0.52, "grad_norm": 0.43873354265547654, "learning_rate": 9.793453390051894e-06, "loss": 0.3458, "step": 9075 }, { "epoch": 0.52, "grad_norm": 0.27745929926602364, "learning_rate": 9.791592878026617e-06, "loss": 0.2203, "step": 9076 }, { "epoch": 0.52, "grad_norm": 0.962358758398474, "learning_rate": 9.789732373218468e-06, "loss": 0.5281, "step": 9077 }, { "epoch": 0.52, "grad_norm": 0.5943049810697584, "learning_rate": 9.78787187569188e-06, "loss": 0.3826, "step": 9078 }, { "epoch": 0.52, "grad_norm": 0.3278974230441489, "learning_rate": 9.786011385511279e-06, "loss": 0.2963, "step": 9079 }, { "epoch": 0.52, "grad_norm": 0.3273067224989936, "learning_rate": 9.784150902741095e-06, "loss": 0.2385, "step": 9080 }, { "epoch": 0.52, "grad_norm": 0.27902999532971506, "learning_rate": 9.782290427445755e-06, "loss": 0.1677, "step": 9081 }, { "epoch": 0.52, "grad_norm": 0.3192097580526225, "learning_rate": 9.78042995968969e-06, "loss": 0.2732, "step": 9082 }, { "epoch": 0.52, "grad_norm": 0.6567345097224239, "learning_rate": 9.778569499537327e-06, "loss": 0.3247, "step": 9083 }, { "epoch": 0.52, "grad_norm": 0.30540297396342236, "learning_rate": 9.77670904705309e-06, "loss": 0.2962, "step": 9084 }, { "epoch": 0.52, "grad_norm": 0.368732504569253, "learning_rate": 9.77484860230141e-06, "loss": 0.292, "step": 9085 }, { "epoch": 0.52, "grad_norm": 0.8805145259459297, "learning_rate": 9.772988165346715e-06, "loss": 0.4137, "step": 9086 }, { "epoch": 0.52, "grad_norm": 0.22124704588757194, "learning_rate": 9.771127736253426e-06, "loss": 0.1537, "step": 9087 }, { "epoch": 0.52, "grad_norm": 0.3510303095693387, "learning_rate": 9.769267315085976e-06, "loss": 0.2932, "step": 9088 }, { "epoch": 0.52, "grad_norm": 0.4614132599295984, "learning_rate": 9.767406901908787e-06, "loss": 0.2882, "step": 9089 }, { "epoch": 0.52, "grad_norm": 0.8664585977799852, "learning_rate": 9.76554649678629e-06, "loss": 0.4554, "step": 9090 }, { "epoch": 0.52, "grad_norm": 0.3287018378571944, "learning_rate": 9.763686099782905e-06, "loss": 0.2582, "step": 9091 }, { "epoch": 0.52, "grad_norm": 0.31568488663477967, "learning_rate": 9.761825710963063e-06, "loss": 0.2942, "step": 9092 }, { "epoch": 0.52, "grad_norm": 0.20206609002791392, "learning_rate": 9.759965330391182e-06, "loss": 0.0898, "step": 9093 }, { "epoch": 0.52, "grad_norm": 0.3151578729799421, "learning_rate": 9.758104958131696e-06, "loss": 0.2367, "step": 9094 }, { "epoch": 0.52, "grad_norm": 0.741103555846677, "learning_rate": 9.756244594249024e-06, "loss": 0.4884, "step": 9095 }, { "epoch": 0.52, "grad_norm": 0.368566194853679, "learning_rate": 9.754384238807589e-06, "loss": 0.2917, "step": 9096 }, { "epoch": 0.52, "grad_norm": 0.32908075448316854, "learning_rate": 9.752523891871819e-06, "loss": 0.2796, "step": 9097 }, { "epoch": 0.52, "grad_norm": 0.9739561754616147, "learning_rate": 9.750663553506134e-06, "loss": 0.6682, "step": 9098 }, { "epoch": 0.52, "grad_norm": 0.19140722525879253, "learning_rate": 9.748803223774962e-06, "loss": 0.1553, "step": 9099 }, { "epoch": 0.52, "grad_norm": 0.29655050143646866, "learning_rate": 9.746942902742722e-06, "loss": 0.2873, "step": 9100 }, { "epoch": 0.52, "grad_norm": 0.8291151161681248, "learning_rate": 9.745082590473839e-06, "loss": 0.4544, "step": 9101 }, { "epoch": 0.52, "grad_norm": 0.6396141518635055, "learning_rate": 9.743222287032734e-06, "loss": 0.3046, "step": 9102 }, { "epoch": 0.52, "grad_norm": 0.3562190167610645, "learning_rate": 9.741361992483832e-06, "loss": 0.2791, "step": 9103 }, { "epoch": 0.52, "grad_norm": 0.3584912974204283, "learning_rate": 9.739501706891551e-06, "loss": 0.3235, "step": 9104 }, { "epoch": 0.52, "grad_norm": 0.28189812469588293, "learning_rate": 9.737641430320315e-06, "loss": 0.1743, "step": 9105 }, { "epoch": 0.52, "grad_norm": 0.30844703586005295, "learning_rate": 9.735781162834546e-06, "loss": 0.1798, "step": 9106 }, { "epoch": 0.52, "grad_norm": 0.8857786094208216, "learning_rate": 9.733920904498664e-06, "loss": 0.3613, "step": 9107 }, { "epoch": 0.52, "grad_norm": 0.4731519913769104, "learning_rate": 9.73206065537709e-06, "loss": 0.3541, "step": 9108 }, { "epoch": 0.52, "grad_norm": 0.291075703344724, "learning_rate": 9.730200415534242e-06, "loss": 0.1994, "step": 9109 }, { "epoch": 0.52, "grad_norm": 1.2261999869951392, "learning_rate": 9.728340185034545e-06, "loss": 0.7143, "step": 9110 }, { "epoch": 0.52, "grad_norm": 0.31050079628050553, "learning_rate": 9.726479963942412e-06, "loss": 0.2554, "step": 9111 }, { "epoch": 0.52, "grad_norm": 0.26466322118910507, "learning_rate": 9.72461975232227e-06, "loss": 0.1923, "step": 9112 }, { "epoch": 0.52, "grad_norm": 0.6613580619617312, "learning_rate": 9.72275955023853e-06, "loss": 0.4344, "step": 9113 }, { "epoch": 0.52, "grad_norm": 1.3485587659150748, "learning_rate": 9.720899357755618e-06, "loss": 0.8047, "step": 9114 }, { "epoch": 0.52, "grad_norm": 0.29397938026410353, "learning_rate": 9.719039174937948e-06, "loss": 0.2338, "step": 9115 }, { "epoch": 0.52, "grad_norm": 0.4540959149910563, "learning_rate": 9.717179001849942e-06, "loss": 0.3446, "step": 9116 }, { "epoch": 0.52, "grad_norm": 0.7506127708683249, "learning_rate": 9.715318838556014e-06, "loss": 0.4407, "step": 9117 }, { "epoch": 0.52, "grad_norm": 0.3762547929412095, "learning_rate": 9.71345868512058e-06, "loss": 0.2911, "step": 9118 }, { "epoch": 0.52, "grad_norm": 0.32214352086262227, "learning_rate": 9.711598541608062e-06, "loss": 0.2218, "step": 9119 }, { "epoch": 0.52, "grad_norm": 0.3327082599332806, "learning_rate": 9.709738408082873e-06, "loss": 0.2906, "step": 9120 }, { "epoch": 0.52, "grad_norm": 0.3958060769664337, "learning_rate": 9.707878284609429e-06, "loss": 0.2693, "step": 9121 }, { "epoch": 0.52, "grad_norm": 0.4592079526507075, "learning_rate": 9.706018171252148e-06, "loss": 0.2672, "step": 9122 }, { "epoch": 0.52, "grad_norm": 0.34486135538857204, "learning_rate": 9.704158068075445e-06, "loss": 0.314, "step": 9123 }, { "epoch": 0.52, "grad_norm": 0.3742732158714571, "learning_rate": 9.702297975143737e-06, "loss": 0.2434, "step": 9124 }, { "epoch": 0.52, "grad_norm": 0.2477396220146489, "learning_rate": 9.700437892521434e-06, "loss": 0.1747, "step": 9125 }, { "epoch": 0.52, "grad_norm": 1.2186332134145217, "learning_rate": 9.698577820272958e-06, "loss": 0.81, "step": 9126 }, { "epoch": 0.52, "grad_norm": 0.4225976525036132, "learning_rate": 9.696717758462716e-06, "loss": 0.2788, "step": 9127 }, { "epoch": 0.52, "grad_norm": 0.2970628514194815, "learning_rate": 9.694857707155126e-06, "loss": 0.2496, "step": 9128 }, { "epoch": 0.52, "grad_norm": 0.8264539934908051, "learning_rate": 9.6929976664146e-06, "loss": 0.4638, "step": 9129 }, { "epoch": 0.52, "grad_norm": 0.33276779372939486, "learning_rate": 9.691137636305554e-06, "loss": 0.2866, "step": 9130 }, { "epoch": 0.52, "grad_norm": 0.3552299923460887, "learning_rate": 9.689277616892396e-06, "loss": 0.3332, "step": 9131 }, { "epoch": 0.52, "grad_norm": 0.21249879155980383, "learning_rate": 9.687417608239541e-06, "loss": 0.1038, "step": 9132 }, { "epoch": 0.52, "grad_norm": 0.3190251853080398, "learning_rate": 9.6855576104114e-06, "loss": 0.2811, "step": 9133 }, { "epoch": 0.52, "grad_norm": 1.1275659444191837, "learning_rate": 9.683697623472387e-06, "loss": 0.6108, "step": 9134 }, { "epoch": 0.52, "grad_norm": 0.4722264674934552, "learning_rate": 9.681837647486912e-06, "loss": 0.2835, "step": 9135 }, { "epoch": 0.52, "grad_norm": 0.3269246466042898, "learning_rate": 9.679977682519385e-06, "loss": 0.2699, "step": 9136 }, { "epoch": 0.52, "grad_norm": 1.6250715059284526, "learning_rate": 9.678117728634217e-06, "loss": 0.6513, "step": 9137 }, { "epoch": 0.53, "grad_norm": 0.23143067024166109, "learning_rate": 9.676257785895817e-06, "loss": 0.134, "step": 9138 }, { "epoch": 0.53, "grad_norm": 0.39965635338532934, "learning_rate": 9.674397854368598e-06, "loss": 0.2694, "step": 9139 }, { "epoch": 0.53, "grad_norm": 0.3096227242107919, "learning_rate": 9.672537934116966e-06, "loss": 0.3045, "step": 9140 }, { "epoch": 0.53, "grad_norm": 0.5551639828567845, "learning_rate": 9.670678025205332e-06, "loss": 0.3214, "step": 9141 }, { "epoch": 0.53, "grad_norm": 0.3793778776155998, "learning_rate": 9.668818127698103e-06, "loss": 0.2593, "step": 9142 }, { "epoch": 0.53, "grad_norm": 0.3052842446630093, "learning_rate": 9.66695824165969e-06, "loss": 0.2919, "step": 9143 }, { "epoch": 0.53, "grad_norm": 0.3974048517058976, "learning_rate": 9.665098367154496e-06, "loss": 0.2662, "step": 9144 }, { "epoch": 0.53, "grad_norm": 0.30419717373936883, "learning_rate": 9.663238504246933e-06, "loss": 0.2007, "step": 9145 }, { "epoch": 0.53, "grad_norm": 0.3827763477642118, "learning_rate": 9.661378653001404e-06, "loss": 0.2769, "step": 9146 }, { "epoch": 0.53, "grad_norm": 0.3473256096785234, "learning_rate": 9.65951881348232e-06, "loss": 0.3079, "step": 9147 }, { "epoch": 0.53, "grad_norm": 0.3339712098319444, "learning_rate": 9.657658985754085e-06, "loss": 0.162, "step": 9148 }, { "epoch": 0.53, "grad_norm": 0.507041220843939, "learning_rate": 9.655799169881103e-06, "loss": 0.3754, "step": 9149 }, { "epoch": 0.53, "grad_norm": 1.3073406626204165, "learning_rate": 9.653939365927785e-06, "loss": 0.7838, "step": 9150 }, { "epoch": 0.53, "grad_norm": 0.28633539371312033, "learning_rate": 9.652079573958529e-06, "loss": 0.2202, "step": 9151 }, { "epoch": 0.53, "grad_norm": 0.3691162201113786, "learning_rate": 9.650219794037741e-06, "loss": 0.238, "step": 9152 }, { "epoch": 0.53, "grad_norm": 0.41623336946895095, "learning_rate": 9.648360026229828e-06, "loss": 0.3067, "step": 9153 }, { "epoch": 0.53, "grad_norm": 0.35646845738442257, "learning_rate": 9.646500270599191e-06, "loss": 0.2647, "step": 9154 }, { "epoch": 0.53, "grad_norm": 0.28283345328135584, "learning_rate": 9.644640527210235e-06, "loss": 0.2443, "step": 9155 }, { "epoch": 0.53, "grad_norm": 1.1947336670116657, "learning_rate": 9.642780796127362e-06, "loss": 0.6928, "step": 9156 }, { "epoch": 0.53, "grad_norm": 0.5466204845370135, "learning_rate": 9.640921077414975e-06, "loss": 0.3285, "step": 9157 }, { "epoch": 0.53, "grad_norm": 0.35102520321136116, "learning_rate": 9.639061371137475e-06, "loss": 0.2716, "step": 9158 }, { "epoch": 0.53, "grad_norm": 0.24634222677994205, "learning_rate": 9.637201677359266e-06, "loss": 0.2251, "step": 9159 }, { "epoch": 0.53, "grad_norm": 0.5712291239043696, "learning_rate": 9.635341996144747e-06, "loss": 0.3271, "step": 9160 }, { "epoch": 0.53, "grad_norm": 0.34496339742721027, "learning_rate": 9.633482327558316e-06, "loss": 0.2331, "step": 9161 }, { "epoch": 0.53, "grad_norm": 0.8177903308392893, "learning_rate": 9.63162267166438e-06, "loss": 0.6087, "step": 9162 }, { "epoch": 0.53, "grad_norm": 0.3266062325787849, "learning_rate": 9.629763028527332e-06, "loss": 0.2651, "step": 9163 }, { "epoch": 0.53, "grad_norm": 0.42567311427029086, "learning_rate": 9.627903398211577e-06, "loss": 0.2801, "step": 9164 }, { "epoch": 0.53, "grad_norm": 0.3037908672972681, "learning_rate": 9.626043780781508e-06, "loss": 0.1905, "step": 9165 }, { "epoch": 0.53, "grad_norm": 0.623542999401843, "learning_rate": 9.62418417630153e-06, "loss": 0.3826, "step": 9166 }, { "epoch": 0.53, "grad_norm": 0.31378311285481736, "learning_rate": 9.622324584836036e-06, "loss": 0.2816, "step": 9167 }, { "epoch": 0.53, "grad_norm": 1.04329856978667, "learning_rate": 9.620465006449427e-06, "loss": 0.4929, "step": 9168 }, { "epoch": 0.53, "grad_norm": 0.6656212816613781, "learning_rate": 9.618605441206098e-06, "loss": 0.3594, "step": 9169 }, { "epoch": 0.53, "grad_norm": 0.3644528569994132, "learning_rate": 9.616745889170446e-06, "loss": 0.3054, "step": 9170 }, { "epoch": 0.53, "grad_norm": 0.1996589631527687, "learning_rate": 9.614886350406865e-06, "loss": 0.1666, "step": 9171 }, { "epoch": 0.53, "grad_norm": 0.3726302590229509, "learning_rate": 9.613026824979757e-06, "loss": 0.2799, "step": 9172 }, { "epoch": 0.53, "grad_norm": 0.5874746934630642, "learning_rate": 9.61116731295351e-06, "loss": 0.3598, "step": 9173 }, { "epoch": 0.53, "grad_norm": 0.4754181343382663, "learning_rate": 9.609307814392525e-06, "loss": 0.3157, "step": 9174 }, { "epoch": 0.53, "grad_norm": 0.3358430754062846, "learning_rate": 9.607448329361193e-06, "loss": 0.2765, "step": 9175 }, { "epoch": 0.53, "grad_norm": 0.368547482274709, "learning_rate": 9.605588857923906e-06, "loss": 0.3226, "step": 9176 }, { "epoch": 0.53, "grad_norm": 0.24855846818078145, "learning_rate": 9.603729400145063e-06, "loss": 0.1304, "step": 9177 }, { "epoch": 0.53, "grad_norm": 0.8221257736419765, "learning_rate": 9.601869956089051e-06, "loss": 0.3965, "step": 9178 }, { "epoch": 0.53, "grad_norm": 0.2844103771443083, "learning_rate": 9.60001052582027e-06, "loss": 0.2756, "step": 9179 }, { "epoch": 0.53, "grad_norm": 0.5660900591073189, "learning_rate": 9.598151109403102e-06, "loss": 0.4276, "step": 9180 }, { "epoch": 0.53, "grad_norm": 0.4704157923383228, "learning_rate": 9.596291706901946e-06, "loss": 0.2179, "step": 9181 }, { "epoch": 0.53, "grad_norm": 0.4820397687697669, "learning_rate": 9.59443231838119e-06, "loss": 0.3491, "step": 9182 }, { "epoch": 0.53, "grad_norm": 0.38983163889496963, "learning_rate": 9.59257294390523e-06, "loss": 0.3285, "step": 9183 }, { "epoch": 0.53, "grad_norm": 0.2131714527848812, "learning_rate": 9.59071358353845e-06, "loss": 0.1013, "step": 9184 }, { "epoch": 0.53, "grad_norm": 0.3957734107129791, "learning_rate": 9.588854237345238e-06, "loss": 0.3476, "step": 9185 }, { "epoch": 0.53, "grad_norm": 0.9320135709989978, "learning_rate": 9.586994905389985e-06, "loss": 0.4693, "step": 9186 }, { "epoch": 0.53, "grad_norm": 0.3124984898779434, "learning_rate": 9.585135587737085e-06, "loss": 0.2266, "step": 9187 }, { "epoch": 0.53, "grad_norm": 0.45270701652538803, "learning_rate": 9.583276284450917e-06, "loss": 0.3355, "step": 9188 }, { "epoch": 0.53, "grad_norm": 0.44931577222450564, "learning_rate": 9.581416995595877e-06, "loss": 0.2828, "step": 9189 }, { "epoch": 0.53, "grad_norm": 0.23532191378371217, "learning_rate": 9.579557721236345e-06, "loss": 0.1269, "step": 9190 }, { "epoch": 0.53, "grad_norm": 0.34700214444362804, "learning_rate": 9.577698461436715e-06, "loss": 0.2821, "step": 9191 }, { "epoch": 0.53, "grad_norm": 1.1069957482466108, "learning_rate": 9.575839216261366e-06, "loss": 0.4473, "step": 9192 }, { "epoch": 0.53, "grad_norm": 0.539025147105325, "learning_rate": 9.573979985774689e-06, "loss": 0.3448, "step": 9193 }, { "epoch": 0.53, "grad_norm": 0.4325392202671178, "learning_rate": 9.572120770041065e-06, "loss": 0.2551, "step": 9194 }, { "epoch": 0.53, "grad_norm": 0.37278555312821754, "learning_rate": 9.570261569124882e-06, "loss": 0.3115, "step": 9195 }, { "epoch": 0.53, "grad_norm": 0.31715591922240705, "learning_rate": 9.568402383090519e-06, "loss": 0.1692, "step": 9196 }, { "epoch": 0.53, "grad_norm": 0.3931850717706773, "learning_rate": 9.566543212002365e-06, "loss": 0.2381, "step": 9197 }, { "epoch": 0.53, "grad_norm": 0.4006785161291414, "learning_rate": 9.564684055924801e-06, "loss": 0.3082, "step": 9198 }, { "epoch": 0.53, "grad_norm": 0.9563471812307568, "learning_rate": 9.562824914922211e-06, "loss": 0.6652, "step": 9199 }, { "epoch": 0.53, "grad_norm": 0.29294573478695535, "learning_rate": 9.560965789058975e-06, "loss": 0.2166, "step": 9200 }, { "epoch": 0.53, "grad_norm": 1.056246726061743, "learning_rate": 9.559106678399473e-06, "loss": 0.5694, "step": 9201 }, { "epoch": 0.53, "grad_norm": 0.27936225502157397, "learning_rate": 9.55724758300809e-06, "loss": 0.1959, "step": 9202 }, { "epoch": 0.53, "grad_norm": 0.3162580378410517, "learning_rate": 9.555388502949201e-06, "loss": 0.2596, "step": 9203 }, { "epoch": 0.53, "grad_norm": 0.6572106044140896, "learning_rate": 9.553529438287192e-06, "loss": 0.4197, "step": 9204 }, { "epoch": 0.53, "grad_norm": 0.8055700596054751, "learning_rate": 9.551670389086438e-06, "loss": 0.5843, "step": 9205 }, { "epoch": 0.53, "grad_norm": 0.372228470556821, "learning_rate": 9.54981135541132e-06, "loss": 0.2797, "step": 9206 }, { "epoch": 0.53, "grad_norm": 0.3561420751933189, "learning_rate": 9.547952337326214e-06, "loss": 0.2531, "step": 9207 }, { "epoch": 0.53, "grad_norm": 0.37148428650642595, "learning_rate": 9.546093334895498e-06, "loss": 0.2567, "step": 9208 }, { "epoch": 0.53, "grad_norm": 0.36363796098136053, "learning_rate": 9.544234348183553e-06, "loss": 0.279, "step": 9209 }, { "epoch": 0.53, "grad_norm": 0.2959650585604881, "learning_rate": 9.542375377254753e-06, "loss": 0.238, "step": 9210 }, { "epoch": 0.53, "grad_norm": 0.6060688718356312, "learning_rate": 9.54051642217347e-06, "loss": 0.399, "step": 9211 }, { "epoch": 0.53, "grad_norm": 0.3963007962854568, "learning_rate": 9.538657483004088e-06, "loss": 0.3126, "step": 9212 }, { "epoch": 0.53, "grad_norm": 0.7595275160558781, "learning_rate": 9.536798559810978e-06, "loss": 0.3205, "step": 9213 }, { "epoch": 0.53, "grad_norm": 0.3413631598251066, "learning_rate": 9.53493965265851e-06, "loss": 0.3021, "step": 9214 }, { "epoch": 0.53, "grad_norm": 0.327046762074509, "learning_rate": 9.533080761611066e-06, "loss": 0.2814, "step": 9215 }, { "epoch": 0.53, "grad_norm": 0.21901892281945962, "learning_rate": 9.53122188673301e-06, "loss": 0.1508, "step": 9216 }, { "epoch": 0.53, "grad_norm": 1.0974836030044757, "learning_rate": 9.529363028088725e-06, "loss": 0.7183, "step": 9217 }, { "epoch": 0.53, "grad_norm": 0.34926035459810145, "learning_rate": 9.52750418574258e-06, "loss": 0.265, "step": 9218 }, { "epoch": 0.53, "grad_norm": 0.3801483295500776, "learning_rate": 9.525645359758939e-06, "loss": 0.3289, "step": 9219 }, { "epoch": 0.53, "grad_norm": 1.1846574314137546, "learning_rate": 9.523786550202182e-06, "loss": 0.3465, "step": 9220 }, { "epoch": 0.53, "grad_norm": 0.35928822346783335, "learning_rate": 9.521927757136673e-06, "loss": 0.2593, "step": 9221 }, { "epoch": 0.53, "grad_norm": 0.25623013450271037, "learning_rate": 9.520068980626789e-06, "loss": 0.1978, "step": 9222 }, { "epoch": 0.53, "grad_norm": 0.37505431562660235, "learning_rate": 9.518210220736892e-06, "loss": 0.2664, "step": 9223 }, { "epoch": 0.53, "grad_norm": 0.37891600500710415, "learning_rate": 9.516351477531357e-06, "loss": 0.2994, "step": 9224 }, { "epoch": 0.53, "grad_norm": 0.9228496278274424, "learning_rate": 9.51449275107455e-06, "loss": 0.4677, "step": 9225 }, { "epoch": 0.53, "grad_norm": 0.3824480382733829, "learning_rate": 9.512634041430835e-06, "loss": 0.2635, "step": 9226 }, { "epoch": 0.53, "grad_norm": 0.41175544519260576, "learning_rate": 9.510775348664584e-06, "loss": 0.3034, "step": 9227 }, { "epoch": 0.53, "grad_norm": 0.26939742805578254, "learning_rate": 9.508916672840161e-06, "loss": 0.1751, "step": 9228 }, { "epoch": 0.53, "grad_norm": 1.172159570979836, "learning_rate": 9.507058014021933e-06, "loss": 0.5465, "step": 9229 }, { "epoch": 0.53, "grad_norm": 0.35814475799141565, "learning_rate": 9.505199372274264e-06, "loss": 0.27, "step": 9230 }, { "epoch": 0.53, "grad_norm": 0.43551068922587277, "learning_rate": 9.50334074766152e-06, "loss": 0.3128, "step": 9231 }, { "epoch": 0.53, "grad_norm": 0.7961712293430698, "learning_rate": 9.501482140248064e-06, "loss": 0.4733, "step": 9232 }, { "epoch": 0.53, "grad_norm": 0.3121898207774114, "learning_rate": 9.499623550098262e-06, "loss": 0.1957, "step": 9233 }, { "epoch": 0.53, "grad_norm": 0.2227896224663921, "learning_rate": 9.497764977276473e-06, "loss": 0.2213, "step": 9234 }, { "epoch": 0.53, "grad_norm": 1.5631757758484857, "learning_rate": 9.495906421847063e-06, "loss": 0.792, "step": 9235 }, { "epoch": 0.53, "grad_norm": 0.3107139886169333, "learning_rate": 9.49404788387439e-06, "loss": 0.2195, "step": 9236 }, { "epoch": 0.53, "grad_norm": 0.7244261519601902, "learning_rate": 9.492189363422819e-06, "loss": 0.4441, "step": 9237 }, { "epoch": 0.53, "grad_norm": 0.3605582089400721, "learning_rate": 9.490330860556707e-06, "loss": 0.3125, "step": 9238 }, { "epoch": 0.53, "grad_norm": 0.2873919445850608, "learning_rate": 9.488472375340417e-06, "loss": 0.2106, "step": 9239 }, { "epoch": 0.53, "grad_norm": 0.39888390668387463, "learning_rate": 9.486613907838306e-06, "loss": 0.2769, "step": 9240 }, { "epoch": 0.53, "grad_norm": 0.4867865905958677, "learning_rate": 9.484755458114732e-06, "loss": 0.3455, "step": 9241 }, { "epoch": 0.53, "grad_norm": 0.310940342891282, "learning_rate": 9.482897026234056e-06, "loss": 0.2665, "step": 9242 }, { "epoch": 0.53, "grad_norm": 0.501526397698931, "learning_rate": 9.48103861226063e-06, "loss": 0.3035, "step": 9243 }, { "epoch": 0.53, "grad_norm": 0.7521124857164626, "learning_rate": 9.47918021625882e-06, "loss": 0.5031, "step": 9244 }, { "epoch": 0.53, "grad_norm": 0.393541576127973, "learning_rate": 9.477321838292972e-06, "loss": 0.279, "step": 9245 }, { "epoch": 0.53, "grad_norm": 0.3212033534001573, "learning_rate": 9.475463478427451e-06, "loss": 0.2417, "step": 9246 }, { "epoch": 0.53, "grad_norm": 0.7874596505559587, "learning_rate": 9.473605136726602e-06, "loss": 0.6043, "step": 9247 }, { "epoch": 0.53, "grad_norm": 0.3961777779705941, "learning_rate": 9.471746813254788e-06, "loss": 0.2912, "step": 9248 }, { "epoch": 0.53, "grad_norm": 0.27725278619850396, "learning_rate": 9.469888508076357e-06, "loss": 0.1882, "step": 9249 }, { "epoch": 0.53, "grad_norm": 0.3362006708506701, "learning_rate": 9.468030221255667e-06, "loss": 0.3169, "step": 9250 }, { "epoch": 0.53, "grad_norm": 0.38625457503857924, "learning_rate": 9.46617195285707e-06, "loss": 0.2824, "step": 9251 }, { "epoch": 0.53, "grad_norm": 0.4559670161202051, "learning_rate": 9.464313702944912e-06, "loss": 0.272, "step": 9252 }, { "epoch": 0.53, "grad_norm": 0.8027237790991724, "learning_rate": 9.462455471583545e-06, "loss": 0.5867, "step": 9253 }, { "epoch": 0.53, "grad_norm": 0.2779288614551335, "learning_rate": 9.460597258837325e-06, "loss": 0.2458, "step": 9254 }, { "epoch": 0.53, "grad_norm": 0.43085408967342126, "learning_rate": 9.458739064770595e-06, "loss": 0.351, "step": 9255 }, { "epoch": 0.53, "grad_norm": 0.22237328741189238, "learning_rate": 9.456880889447712e-06, "loss": 0.093, "step": 9256 }, { "epoch": 0.53, "grad_norm": 0.3228147343712859, "learning_rate": 9.455022732933017e-06, "loss": 0.2727, "step": 9257 }, { "epoch": 0.53, "grad_norm": 0.4869726882315915, "learning_rate": 9.453164595290865e-06, "loss": 0.3442, "step": 9258 }, { "epoch": 0.53, "grad_norm": 0.4765999794871249, "learning_rate": 9.451306476585595e-06, "loss": 0.3115, "step": 9259 }, { "epoch": 0.53, "grad_norm": 0.37056859041713597, "learning_rate": 9.449448376881563e-06, "loss": 0.2723, "step": 9260 }, { "epoch": 0.53, "grad_norm": 0.9036552153671678, "learning_rate": 9.447590296243106e-06, "loss": 0.4958, "step": 9261 }, { "epoch": 0.53, "grad_norm": 0.21923658796039047, "learning_rate": 9.445732234734576e-06, "loss": 0.1774, "step": 9262 }, { "epoch": 0.53, "grad_norm": 0.36316978936914124, "learning_rate": 9.443874192420312e-06, "loss": 0.2585, "step": 9263 }, { "epoch": 0.53, "grad_norm": 0.5077816681609485, "learning_rate": 9.442016169364664e-06, "loss": 0.3649, "step": 9264 }, { "epoch": 0.53, "grad_norm": 0.33319325267717365, "learning_rate": 9.440158165631972e-06, "loss": 0.2898, "step": 9265 }, { "epoch": 0.53, "grad_norm": 0.4992737656729488, "learning_rate": 9.438300181286576e-06, "loss": 0.284, "step": 9266 }, { "epoch": 0.53, "grad_norm": 0.4975012780414002, "learning_rate": 9.436442216392823e-06, "loss": 0.4145, "step": 9267 }, { "epoch": 0.53, "grad_norm": 0.2694784425314511, "learning_rate": 9.43458427101505e-06, "loss": 0.1441, "step": 9268 }, { "epoch": 0.53, "grad_norm": 0.3240022249400918, "learning_rate": 9.4327263452176e-06, "loss": 0.1771, "step": 9269 }, { "epoch": 0.53, "grad_norm": 0.3343235297742924, "learning_rate": 9.430868439064813e-06, "loss": 0.3089, "step": 9270 }, { "epoch": 0.53, "grad_norm": 0.6910139457529847, "learning_rate": 9.429010552621027e-06, "loss": 0.4781, "step": 9271 }, { "epoch": 0.53, "grad_norm": 0.35862362126432507, "learning_rate": 9.42715268595058e-06, "loss": 0.243, "step": 9272 }, { "epoch": 0.53, "grad_norm": 0.49851971643361387, "learning_rate": 9.425294839117812e-06, "loss": 0.391, "step": 9273 }, { "epoch": 0.53, "grad_norm": 0.23422338164763837, "learning_rate": 9.423437012187057e-06, "loss": 0.2075, "step": 9274 }, { "epoch": 0.53, "grad_norm": 0.3086926262711173, "learning_rate": 9.421579205222657e-06, "loss": 0.2067, "step": 9275 }, { "epoch": 0.53, "grad_norm": 1.310073728398527, "learning_rate": 9.41972141828894e-06, "loss": 0.6654, "step": 9276 }, { "epoch": 0.53, "grad_norm": 0.5841986899196692, "learning_rate": 9.41786365145025e-06, "loss": 0.3337, "step": 9277 }, { "epoch": 0.53, "grad_norm": 0.2536795044961282, "learning_rate": 9.416005904770916e-06, "loss": 0.2111, "step": 9278 }, { "epoch": 0.53, "grad_norm": 1.2215967277636537, "learning_rate": 9.414148178315268e-06, "loss": 0.8136, "step": 9279 }, { "epoch": 0.53, "grad_norm": 0.3058690669119812, "learning_rate": 9.412290472147648e-06, "loss": 0.1969, "step": 9280 }, { "epoch": 0.53, "grad_norm": 0.7589007743382404, "learning_rate": 9.41043278633238e-06, "loss": 0.409, "step": 9281 }, { "epoch": 0.53, "grad_norm": 0.3057591447527336, "learning_rate": 9.408575120933804e-06, "loss": 0.2498, "step": 9282 }, { "epoch": 0.53, "grad_norm": 0.6995045368657311, "learning_rate": 9.406717476016242e-06, "loss": 0.4688, "step": 9283 }, { "epoch": 0.53, "grad_norm": 0.5984000260809333, "learning_rate": 9.40485985164403e-06, "loss": 0.3472, "step": 9284 }, { "epoch": 0.53, "grad_norm": 0.375960869641853, "learning_rate": 9.403002247881499e-06, "loss": 0.2511, "step": 9285 }, { "epoch": 0.53, "grad_norm": 0.2200191456962238, "learning_rate": 9.40114466479297e-06, "loss": 0.209, "step": 9286 }, { "epoch": 0.53, "grad_norm": 0.5939482634092973, "learning_rate": 9.399287102442776e-06, "loss": 0.333, "step": 9287 }, { "epoch": 0.53, "grad_norm": 0.3786700230040074, "learning_rate": 9.397429560895243e-06, "loss": 0.2799, "step": 9288 }, { "epoch": 0.53, "grad_norm": 0.47843379765938265, "learning_rate": 9.395572040214702e-06, "loss": 0.3526, "step": 9289 }, { "epoch": 0.53, "grad_norm": 0.3725937972585066, "learning_rate": 9.393714540465474e-06, "loss": 0.2929, "step": 9290 }, { "epoch": 0.53, "grad_norm": 0.34822543851312543, "learning_rate": 9.391857061711883e-06, "loss": 0.2587, "step": 9291 }, { "epoch": 0.53, "grad_norm": 0.2866067925550406, "learning_rate": 9.389999604018258e-06, "loss": 0.1637, "step": 9292 }, { "epoch": 0.53, "grad_norm": 0.3738816323207938, "learning_rate": 9.388142167448917e-06, "loss": 0.3084, "step": 9293 }, { "epoch": 0.53, "grad_norm": 0.29667286372725454, "learning_rate": 9.38628475206819e-06, "loss": 0.2788, "step": 9294 }, { "epoch": 0.53, "grad_norm": 0.6007497819778793, "learning_rate": 9.384427357940394e-06, "loss": 0.3506, "step": 9295 }, { "epoch": 0.53, "grad_norm": 0.41917867119031543, "learning_rate": 9.382569985129854e-06, "loss": 0.3215, "step": 9296 }, { "epoch": 0.53, "grad_norm": 0.5345523972264065, "learning_rate": 9.380712633700887e-06, "loss": 0.37, "step": 9297 }, { "epoch": 0.53, "grad_norm": 0.20961728479650352, "learning_rate": 9.378855303717817e-06, "loss": 0.1741, "step": 9298 }, { "epoch": 0.53, "grad_norm": 0.5007677585180572, "learning_rate": 9.376997995244957e-06, "loss": 0.3596, "step": 9299 }, { "epoch": 0.53, "grad_norm": 0.30960347623118034, "learning_rate": 9.375140708346634e-06, "loss": 0.2864, "step": 9300 }, { "epoch": 0.53, "grad_norm": 0.3443777763500569, "learning_rate": 9.373283443087159e-06, "loss": 0.2645, "step": 9301 }, { "epoch": 0.53, "grad_norm": 0.7207968905840805, "learning_rate": 9.371426199530853e-06, "loss": 0.4504, "step": 9302 }, { "epoch": 0.53, "grad_norm": 0.3588460744152287, "learning_rate": 9.369568977742028e-06, "loss": 0.3213, "step": 9303 }, { "epoch": 0.53, "grad_norm": 0.6060702782939552, "learning_rate": 9.367711777785004e-06, "loss": 0.324, "step": 9304 }, { "epoch": 0.53, "grad_norm": 0.3480442813272276, "learning_rate": 9.365854599724096e-06, "loss": 0.2939, "step": 9305 }, { "epoch": 0.53, "grad_norm": 0.23986228711116675, "learning_rate": 9.363997443623612e-06, "loss": 0.253, "step": 9306 }, { "epoch": 0.53, "grad_norm": 0.4254434470268579, "learning_rate": 9.362140309547873e-06, "loss": 0.2891, "step": 9307 }, { "epoch": 0.53, "grad_norm": 0.4305528719251428, "learning_rate": 9.360283197561185e-06, "loss": 0.1963, "step": 9308 }, { "epoch": 0.53, "grad_norm": 0.32013724856043474, "learning_rate": 9.358426107727862e-06, "loss": 0.2812, "step": 9309 }, { "epoch": 0.53, "grad_norm": 0.49958189678242726, "learning_rate": 9.356569040112216e-06, "loss": 0.351, "step": 9310 }, { "epoch": 0.53, "grad_norm": 0.4889885463858359, "learning_rate": 9.354711994778558e-06, "loss": 0.2716, "step": 9311 }, { "epoch": 0.54, "grad_norm": 0.2855203325007413, "learning_rate": 9.352854971791192e-06, "loss": 0.2248, "step": 9312 }, { "epoch": 0.54, "grad_norm": 0.27880863601709716, "learning_rate": 9.350997971214434e-06, "loss": 0.2507, "step": 9313 }, { "epoch": 0.54, "grad_norm": 0.3792599482042661, "learning_rate": 9.349140993112588e-06, "loss": 0.2169, "step": 9314 }, { "epoch": 0.54, "grad_norm": 0.38684532344974176, "learning_rate": 9.347284037549962e-06, "loss": 0.3065, "step": 9315 }, { "epoch": 0.54, "grad_norm": 0.8003084968892227, "learning_rate": 9.34542710459086e-06, "loss": 0.4174, "step": 9316 }, { "epoch": 0.54, "grad_norm": 0.3193010117283392, "learning_rate": 9.343570194299591e-06, "loss": 0.2854, "step": 9317 }, { "epoch": 0.54, "grad_norm": 0.32379332391684124, "learning_rate": 9.341713306740457e-06, "loss": 0.2349, "step": 9318 }, { "epoch": 0.54, "grad_norm": 0.30827057912219974, "learning_rate": 9.339856441977767e-06, "loss": 0.2344, "step": 9319 }, { "epoch": 0.54, "grad_norm": 1.1331119563905843, "learning_rate": 9.337999600075814e-06, "loss": 0.6408, "step": 9320 }, { "epoch": 0.54, "grad_norm": 0.2831832360300413, "learning_rate": 9.336142781098908e-06, "loss": 0.2151, "step": 9321 }, { "epoch": 0.54, "grad_norm": 0.5405839352824342, "learning_rate": 9.33428598511135e-06, "loss": 0.3459, "step": 9322 }, { "epoch": 0.54, "grad_norm": 0.9654227292116724, "learning_rate": 9.332429212177438e-06, "loss": 0.4725, "step": 9323 }, { "epoch": 0.54, "grad_norm": 0.23482472356001713, "learning_rate": 9.330572462361474e-06, "loss": 0.1729, "step": 9324 }, { "epoch": 0.54, "grad_norm": 0.4684689393595989, "learning_rate": 9.328715735727758e-06, "loss": 0.3859, "step": 9325 }, { "epoch": 0.54, "grad_norm": 0.297606052083825, "learning_rate": 9.326859032340585e-06, "loss": 0.2425, "step": 9326 }, { "epoch": 0.54, "grad_norm": 0.4108345649916392, "learning_rate": 9.325002352264257e-06, "loss": 0.2403, "step": 9327 }, { "epoch": 0.54, "grad_norm": 0.9941058792175076, "learning_rate": 9.323145695563067e-06, "loss": 0.4418, "step": 9328 }, { "epoch": 0.54, "grad_norm": 0.32359946359922775, "learning_rate": 9.321289062301313e-06, "loss": 0.2928, "step": 9329 }, { "epoch": 0.54, "grad_norm": 0.3409617393200946, "learning_rate": 9.319432452543292e-06, "loss": 0.2698, "step": 9330 }, { "epoch": 0.54, "grad_norm": 0.24943573775102337, "learning_rate": 9.317575866353293e-06, "loss": 0.151, "step": 9331 }, { "epoch": 0.54, "grad_norm": 0.47506315731861976, "learning_rate": 9.315719303795614e-06, "loss": 0.2959, "step": 9332 }, { "epoch": 0.54, "grad_norm": 0.36402547396764845, "learning_rate": 9.313862764934543e-06, "loss": 0.2759, "step": 9333 }, { "epoch": 0.54, "grad_norm": 0.5439363586977312, "learning_rate": 9.312006249834378e-06, "loss": 0.2909, "step": 9334 }, { "epoch": 0.54, "grad_norm": 0.7236335627092366, "learning_rate": 9.310149758559405e-06, "loss": 0.3895, "step": 9335 }, { "epoch": 0.54, "grad_norm": 0.36068005871087383, "learning_rate": 9.30829329117392e-06, "loss": 0.2747, "step": 9336 }, { "epoch": 0.54, "grad_norm": 0.3165484712620228, "learning_rate": 9.306436847742203e-06, "loss": 0.2673, "step": 9337 }, { "epoch": 0.54, "grad_norm": 0.38534131183340886, "learning_rate": 9.304580428328552e-06, "loss": 0.2414, "step": 9338 }, { "epoch": 0.54, "grad_norm": 0.3115474788513869, "learning_rate": 9.30272403299725e-06, "loss": 0.2647, "step": 9339 }, { "epoch": 0.54, "grad_norm": 0.3285219921891418, "learning_rate": 9.300867661812585e-06, "loss": 0.1915, "step": 9340 }, { "epoch": 0.54, "grad_norm": 0.40017987691315415, "learning_rate": 9.29901131483884e-06, "loss": 0.3231, "step": 9341 }, { "epoch": 0.54, "grad_norm": 0.34327243727753615, "learning_rate": 9.297154992140307e-06, "loss": 0.2567, "step": 9342 }, { "epoch": 0.54, "grad_norm": 1.2467554100617364, "learning_rate": 9.295298693781267e-06, "loss": 0.7486, "step": 9343 }, { "epoch": 0.54, "grad_norm": 0.3513490054263849, "learning_rate": 9.293442419825998e-06, "loss": 0.2287, "step": 9344 }, { "epoch": 0.54, "grad_norm": 0.29446477079644806, "learning_rate": 9.291586170338793e-06, "loss": 0.2604, "step": 9345 }, { "epoch": 0.54, "grad_norm": 0.3906785495034913, "learning_rate": 9.289729945383924e-06, "loss": 0.2681, "step": 9346 }, { "epoch": 0.54, "grad_norm": 0.693760446359131, "learning_rate": 9.28787374502568e-06, "loss": 0.2316, "step": 9347 }, { "epoch": 0.54, "grad_norm": 0.34998798570881384, "learning_rate": 9.286017569328334e-06, "loss": 0.2665, "step": 9348 }, { "epoch": 0.54, "grad_norm": 0.3792652287586318, "learning_rate": 9.284161418356171e-06, "loss": 0.3214, "step": 9349 }, { "epoch": 0.54, "grad_norm": 0.6013268840193028, "learning_rate": 9.282305292173467e-06, "loss": 0.3168, "step": 9350 }, { "epoch": 0.54, "grad_norm": 0.40681134523342405, "learning_rate": 9.280449190844501e-06, "loss": 0.2882, "step": 9351 }, { "epoch": 0.54, "grad_norm": 0.32759946772814447, "learning_rate": 9.278593114433547e-06, "loss": 0.2228, "step": 9352 }, { "epoch": 0.54, "grad_norm": 0.38246028250584957, "learning_rate": 9.276737063004884e-06, "loss": 0.2469, "step": 9353 }, { "epoch": 0.54, "grad_norm": 0.3570238331886624, "learning_rate": 9.274881036622785e-06, "loss": 0.2766, "step": 9354 }, { "epoch": 0.54, "grad_norm": 0.8536521626653547, "learning_rate": 9.273025035351526e-06, "loss": 0.5547, "step": 9355 }, { "epoch": 0.54, "grad_norm": 0.6642206879063963, "learning_rate": 9.271169059255376e-06, "loss": 0.4194, "step": 9356 }, { "epoch": 0.54, "grad_norm": 0.2544235207525964, "learning_rate": 9.269313108398611e-06, "loss": 0.2227, "step": 9357 }, { "epoch": 0.54, "grad_norm": 0.27520770254521076, "learning_rate": 9.2674571828455e-06, "loss": 0.1867, "step": 9358 }, { "epoch": 0.54, "grad_norm": 0.9026816205634214, "learning_rate": 9.265601282660318e-06, "loss": 0.5432, "step": 9359 }, { "epoch": 0.54, "grad_norm": 0.3427639614610134, "learning_rate": 9.263745407907329e-06, "loss": 0.2059, "step": 9360 }, { "epoch": 0.54, "grad_norm": 0.4234555347458187, "learning_rate": 9.261889558650809e-06, "loss": 0.3324, "step": 9361 }, { "epoch": 0.54, "grad_norm": 1.0330872273748664, "learning_rate": 9.260033734955018e-06, "loss": 0.421, "step": 9362 }, { "epoch": 0.54, "grad_norm": 0.3320606941363923, "learning_rate": 9.25817793688423e-06, "loss": 0.2124, "step": 9363 }, { "epoch": 0.54, "grad_norm": 0.32162030886140774, "learning_rate": 9.256322164502704e-06, "loss": 0.1961, "step": 9364 }, { "epoch": 0.54, "grad_norm": 0.3775542765593719, "learning_rate": 9.254466417874714e-06, "loss": 0.3093, "step": 9365 }, { "epoch": 0.54, "grad_norm": 0.3245176121995681, "learning_rate": 9.252610697064516e-06, "loss": 0.1544, "step": 9366 }, { "epoch": 0.54, "grad_norm": 0.6532298265771281, "learning_rate": 9.25075500213638e-06, "loss": 0.3814, "step": 9367 }, { "epoch": 0.54, "grad_norm": 0.39797220000426015, "learning_rate": 9.248899333154565e-06, "loss": 0.3394, "step": 9368 }, { "epoch": 0.54, "grad_norm": 0.3901046536766521, "learning_rate": 9.247043690183334e-06, "loss": 0.2712, "step": 9369 }, { "epoch": 0.54, "grad_norm": 0.2983187384453381, "learning_rate": 9.245188073286949e-06, "loss": 0.1997, "step": 9370 }, { "epoch": 0.54, "grad_norm": 0.4306936639740022, "learning_rate": 9.243332482529665e-06, "loss": 0.3333, "step": 9371 }, { "epoch": 0.54, "grad_norm": 0.4124403701710802, "learning_rate": 9.241476917975748e-06, "loss": 0.2959, "step": 9372 }, { "epoch": 0.54, "grad_norm": 0.3857979000578478, "learning_rate": 9.239621379689452e-06, "loss": 0.2773, "step": 9373 }, { "epoch": 0.54, "grad_norm": 0.7877835736477719, "learning_rate": 9.237765867735035e-06, "loss": 0.5544, "step": 9374 }, { "epoch": 0.54, "grad_norm": 0.3805108940267185, "learning_rate": 9.235910382176751e-06, "loss": 0.2811, "step": 9375 }, { "epoch": 0.54, "grad_norm": 0.2183372803331831, "learning_rate": 9.234054923078862e-06, "loss": 0.1758, "step": 9376 }, { "epoch": 0.54, "grad_norm": 0.44649442283170376, "learning_rate": 9.232199490505613e-06, "loss": 0.3603, "step": 9377 }, { "epoch": 0.54, "grad_norm": 0.3408016275041556, "learning_rate": 9.230344084521266e-06, "loss": 0.2609, "step": 9378 }, { "epoch": 0.54, "grad_norm": 0.8404217815307761, "learning_rate": 9.228488705190069e-06, "loss": 0.3583, "step": 9379 }, { "epoch": 0.54, "grad_norm": 0.34473996252645167, "learning_rate": 9.226633352576276e-06, "loss": 0.3451, "step": 9380 }, { "epoch": 0.54, "grad_norm": 0.3742015521440866, "learning_rate": 9.224778026744135e-06, "loss": 0.2612, "step": 9381 }, { "epoch": 0.54, "grad_norm": 0.9236809927403062, "learning_rate": 9.222922727757899e-06, "loss": 0.5384, "step": 9382 }, { "epoch": 0.54, "grad_norm": 0.2849500682429431, "learning_rate": 9.221067455681817e-06, "loss": 0.1571, "step": 9383 }, { "epoch": 0.54, "grad_norm": 0.4049939669342111, "learning_rate": 9.219212210580132e-06, "loss": 0.318, "step": 9384 }, { "epoch": 0.54, "grad_norm": 0.29913254424780833, "learning_rate": 9.217356992517097e-06, "loss": 0.2886, "step": 9385 }, { "epoch": 0.54, "grad_norm": 1.0583760086865348, "learning_rate": 9.215501801556954e-06, "loss": 0.4293, "step": 9386 }, { "epoch": 0.54, "grad_norm": 0.61751445391918, "learning_rate": 9.213646637763954e-06, "loss": 0.3031, "step": 9387 }, { "epoch": 0.54, "grad_norm": 0.4187489970847992, "learning_rate": 9.211791501202333e-06, "loss": 0.3314, "step": 9388 }, { "epoch": 0.54, "grad_norm": 0.2328530107323132, "learning_rate": 9.209936391936339e-06, "loss": 0.2078, "step": 9389 }, { "epoch": 0.54, "grad_norm": 0.42827604942936515, "learning_rate": 9.208081310030216e-06, "loss": 0.273, "step": 9390 }, { "epoch": 0.54, "grad_norm": 0.5354159507363013, "learning_rate": 9.2062262555482e-06, "loss": 0.3933, "step": 9391 }, { "epoch": 0.54, "grad_norm": 0.4463089490948037, "learning_rate": 9.204371228554538e-06, "loss": 0.2974, "step": 9392 }, { "epoch": 0.54, "grad_norm": 0.3010100233617082, "learning_rate": 9.202516229113462e-06, "loss": 0.2662, "step": 9393 }, { "epoch": 0.54, "grad_norm": 0.508972715684767, "learning_rate": 9.200661257289217e-06, "loss": 0.3506, "step": 9394 }, { "epoch": 0.54, "grad_norm": 0.3217573305101514, "learning_rate": 9.19880631314604e-06, "loss": 0.2201, "step": 9395 }, { "epoch": 0.54, "grad_norm": 0.29336545819378834, "learning_rate": 9.196951396748164e-06, "loss": 0.2371, "step": 9396 }, { "epoch": 0.54, "grad_norm": 0.4756760206599183, "learning_rate": 9.195096508159826e-06, "loss": 0.323, "step": 9397 }, { "epoch": 0.54, "grad_norm": 1.0804796916074983, "learning_rate": 9.193241647445262e-06, "loss": 0.77, "step": 9398 }, { "epoch": 0.54, "grad_norm": 0.3241278568868132, "learning_rate": 9.191386814668704e-06, "loss": 0.201, "step": 9399 }, { "epoch": 0.54, "grad_norm": 1.8403532514129441, "learning_rate": 9.189532009894387e-06, "loss": 0.7033, "step": 9400 }, { "epoch": 0.54, "grad_norm": 0.32997349588142116, "learning_rate": 9.187677233186541e-06, "loss": 0.318, "step": 9401 }, { "epoch": 0.54, "grad_norm": 0.37909775087026976, "learning_rate": 9.185822484609397e-06, "loss": 0.2387, "step": 9402 }, { "epoch": 0.54, "grad_norm": 0.26048756876240237, "learning_rate": 9.183967764227188e-06, "loss": 0.2072, "step": 9403 }, { "epoch": 0.54, "grad_norm": 0.3638139283443146, "learning_rate": 9.182113072104137e-06, "loss": 0.3321, "step": 9404 }, { "epoch": 0.54, "grad_norm": 0.7221700317212014, "learning_rate": 9.180258408304478e-06, "loss": 0.3916, "step": 9405 }, { "epoch": 0.54, "grad_norm": 0.3277302216404148, "learning_rate": 9.178403772892433e-06, "loss": 0.2227, "step": 9406 }, { "epoch": 0.54, "grad_norm": 0.6393616769102479, "learning_rate": 9.176549165932231e-06, "loss": 0.3885, "step": 9407 }, { "epoch": 0.54, "grad_norm": 0.3951954935016409, "learning_rate": 9.174694587488097e-06, "loss": 0.2902, "step": 9408 }, { "epoch": 0.54, "grad_norm": 0.22636610131498916, "learning_rate": 9.17284003762425e-06, "loss": 0.1794, "step": 9409 }, { "epoch": 0.54, "grad_norm": 1.2675540511328809, "learning_rate": 9.170985516404922e-06, "loss": 0.7407, "step": 9410 }, { "epoch": 0.54, "grad_norm": 0.59893272808341, "learning_rate": 9.169131023894325e-06, "loss": 0.2856, "step": 9411 }, { "epoch": 0.54, "grad_norm": 0.28230999589030675, "learning_rate": 9.16727656015669e-06, "loss": 0.2509, "step": 9412 }, { "epoch": 0.54, "grad_norm": 0.8586813691539498, "learning_rate": 9.165422125256228e-06, "loss": 0.4289, "step": 9413 }, { "epoch": 0.54, "grad_norm": 0.5314017571523516, "learning_rate": 9.163567719257164e-06, "loss": 0.3422, "step": 9414 }, { "epoch": 0.54, "grad_norm": 0.31946600380526713, "learning_rate": 9.161713342223711e-06, "loss": 0.2, "step": 9415 }, { "epoch": 0.54, "grad_norm": 0.3639625388147878, "learning_rate": 9.159858994220092e-06, "loss": 0.3099, "step": 9416 }, { "epoch": 0.54, "grad_norm": 0.2937050738295473, "learning_rate": 9.15800467531052e-06, "loss": 0.1719, "step": 9417 }, { "epoch": 0.54, "grad_norm": 0.42166472914570036, "learning_rate": 9.156150385559208e-06, "loss": 0.3034, "step": 9418 }, { "epoch": 0.54, "grad_norm": 1.4713264661584862, "learning_rate": 9.154296125030371e-06, "loss": 0.3491, "step": 9419 }, { "epoch": 0.54, "grad_norm": 0.28848784892017665, "learning_rate": 9.15244189378823e-06, "loss": 0.2629, "step": 9420 }, { "epoch": 0.54, "grad_norm": 0.5160241916906537, "learning_rate": 9.150587691896984e-06, "loss": 0.344, "step": 9421 }, { "epoch": 0.54, "grad_norm": 0.183426975433477, "learning_rate": 9.14873351942085e-06, "loss": 0.1326, "step": 9422 }, { "epoch": 0.54, "grad_norm": 0.7740940117037703, "learning_rate": 9.146879376424037e-06, "loss": 0.3699, "step": 9423 }, { "epoch": 0.54, "grad_norm": 0.36265406185090515, "learning_rate": 9.145025262970757e-06, "loss": 0.2904, "step": 9424 }, { "epoch": 0.54, "grad_norm": 0.7197643231905059, "learning_rate": 9.143171179125212e-06, "loss": 0.2937, "step": 9425 }, { "epoch": 0.54, "grad_norm": 0.6953661847377259, "learning_rate": 9.141317124951613e-06, "loss": 0.349, "step": 9426 }, { "epoch": 0.54, "grad_norm": 0.32798387026370746, "learning_rate": 9.139463100514166e-06, "loss": 0.2851, "step": 9427 }, { "epoch": 0.54, "grad_norm": 0.3583724552430915, "learning_rate": 9.137609105877075e-06, "loss": 0.2518, "step": 9428 }, { "epoch": 0.54, "grad_norm": 0.21343072518400563, "learning_rate": 9.135755141104544e-06, "loss": 0.1528, "step": 9429 }, { "epoch": 0.54, "grad_norm": 0.4644618331947836, "learning_rate": 9.133901206260773e-06, "loss": 0.3436, "step": 9430 }, { "epoch": 0.54, "grad_norm": 0.8170445704875847, "learning_rate": 9.132047301409968e-06, "loss": 0.4853, "step": 9431 }, { "epoch": 0.54, "grad_norm": 0.27928105311252444, "learning_rate": 9.130193426616327e-06, "loss": 0.2082, "step": 9432 }, { "epoch": 0.54, "grad_norm": 0.41359155985409485, "learning_rate": 9.12833958194405e-06, "loss": 0.31, "step": 9433 }, { "epoch": 0.54, "grad_norm": 1.1546078883310094, "learning_rate": 9.126485767457336e-06, "loss": 0.7811, "step": 9434 }, { "epoch": 0.54, "grad_norm": 0.3413134026821165, "learning_rate": 9.124631983220384e-06, "loss": 0.1997, "step": 9435 }, { "epoch": 0.54, "grad_norm": 0.3295985332780704, "learning_rate": 9.122778229297387e-06, "loss": 0.2877, "step": 9436 }, { "epoch": 0.54, "grad_norm": 0.3604904526688763, "learning_rate": 9.120924505752543e-06, "loss": 0.2817, "step": 9437 }, { "epoch": 0.54, "grad_norm": 0.45457565674240885, "learning_rate": 9.119070812650044e-06, "loss": 0.155, "step": 9438 }, { "epoch": 0.54, "grad_norm": 0.4349575002134113, "learning_rate": 9.117217150054087e-06, "loss": 0.3293, "step": 9439 }, { "epoch": 0.54, "grad_norm": 0.40328109153502684, "learning_rate": 9.115363518028858e-06, "loss": 0.3267, "step": 9440 }, { "epoch": 0.54, "grad_norm": 0.7613078562304955, "learning_rate": 9.113509916638557e-06, "loss": 0.2908, "step": 9441 }, { "epoch": 0.54, "grad_norm": 0.27332115307343685, "learning_rate": 9.111656345947367e-06, "loss": 0.2487, "step": 9442 }, { "epoch": 0.54, "grad_norm": 0.4121158185394958, "learning_rate": 9.10980280601948e-06, "loss": 0.2577, "step": 9443 }, { "epoch": 0.54, "grad_norm": 0.34272848135119255, "learning_rate": 9.107949296919084e-06, "loss": 0.3144, "step": 9444 }, { "epoch": 0.54, "grad_norm": 0.3178424358331697, "learning_rate": 9.106095818710367e-06, "loss": 0.2214, "step": 9445 }, { "epoch": 0.54, "grad_norm": 0.8274851482459838, "learning_rate": 9.10424237145751e-06, "loss": 0.4761, "step": 9446 }, { "epoch": 0.54, "grad_norm": 0.4605336475712775, "learning_rate": 9.102388955224703e-06, "loss": 0.3781, "step": 9447 }, { "epoch": 0.54, "grad_norm": 0.2601899726482571, "learning_rate": 9.10053557007613e-06, "loss": 0.2162, "step": 9448 }, { "epoch": 0.54, "grad_norm": 0.2737288914687979, "learning_rate": 9.098682216075968e-06, "loss": 0.1548, "step": 9449 }, { "epoch": 0.54, "grad_norm": 0.7389906969176646, "learning_rate": 9.096828893288404e-06, "loss": 0.3916, "step": 9450 }, { "epoch": 0.54, "grad_norm": 0.35483877986484735, "learning_rate": 9.094975601777615e-06, "loss": 0.231, "step": 9451 }, { "epoch": 0.54, "grad_norm": 0.38272983487780293, "learning_rate": 9.093122341607782e-06, "loss": 0.3315, "step": 9452 }, { "epoch": 0.54, "grad_norm": 0.7223595246219131, "learning_rate": 9.091269112843084e-06, "loss": 0.4254, "step": 9453 }, { "epoch": 0.54, "grad_norm": 0.34796801665638943, "learning_rate": 9.089415915547702e-06, "loss": 0.2188, "step": 9454 }, { "epoch": 0.54, "grad_norm": 0.30394563246761563, "learning_rate": 9.087562749785805e-06, "loss": 0.1796, "step": 9455 }, { "epoch": 0.54, "grad_norm": 0.3333343056858731, "learning_rate": 9.085709615621567e-06, "loss": 0.2878, "step": 9456 }, { "epoch": 0.54, "grad_norm": 0.3613631132845819, "learning_rate": 9.083856513119169e-06, "loss": 0.2873, "step": 9457 }, { "epoch": 0.54, "grad_norm": 0.6852597899585396, "learning_rate": 9.082003442342779e-06, "loss": 0.3644, "step": 9458 }, { "epoch": 0.54, "grad_norm": 0.4615449689732116, "learning_rate": 9.080150403356571e-06, "loss": 0.3572, "step": 9459 }, { "epoch": 0.54, "grad_norm": 0.27980817096660626, "learning_rate": 9.078297396224716e-06, "loss": 0.2563, "step": 9460 }, { "epoch": 0.54, "grad_norm": 0.25145383400299104, "learning_rate": 9.07644442101138e-06, "loss": 0.1177, "step": 9461 }, { "epoch": 0.54, "grad_norm": 1.4001380538324593, "learning_rate": 9.074591477780736e-06, "loss": 0.7624, "step": 9462 }, { "epoch": 0.54, "grad_norm": 0.322723513333794, "learning_rate": 9.072738566596948e-06, "loss": 0.2826, "step": 9463 }, { "epoch": 0.54, "grad_norm": 0.3481210569513449, "learning_rate": 9.070885687524184e-06, "loss": 0.2795, "step": 9464 }, { "epoch": 0.54, "grad_norm": 1.013606524113796, "learning_rate": 9.069032840626608e-06, "loss": 0.6801, "step": 9465 }, { "epoch": 0.54, "grad_norm": 0.3272263147205583, "learning_rate": 9.067180025968387e-06, "loss": 0.2762, "step": 9466 }, { "epoch": 0.54, "grad_norm": 0.20755738352285572, "learning_rate": 9.065327243613679e-06, "loss": 0.0841, "step": 9467 }, { "epoch": 0.54, "grad_norm": 0.35855047633435044, "learning_rate": 9.06347449362665e-06, "loss": 0.3212, "step": 9468 }, { "epoch": 0.54, "grad_norm": 0.3261481520927009, "learning_rate": 9.061621776071458e-06, "loss": 0.2756, "step": 9469 }, { "epoch": 0.54, "grad_norm": 0.6254025257211152, "learning_rate": 9.059769091012265e-06, "loss": 0.3968, "step": 9470 }, { "epoch": 0.54, "grad_norm": 0.3530171497321099, "learning_rate": 9.057916438513226e-06, "loss": 0.2633, "step": 9471 }, { "epoch": 0.54, "grad_norm": 0.36080066395287175, "learning_rate": 9.056063818638502e-06, "loss": 0.3188, "step": 9472 }, { "epoch": 0.54, "grad_norm": 0.2664798556440693, "learning_rate": 9.054211231452248e-06, "loss": 0.2171, "step": 9473 }, { "epoch": 0.54, "grad_norm": 0.5965216255280918, "learning_rate": 9.052358677018615e-06, "loss": 0.2632, "step": 9474 }, { "epoch": 0.54, "grad_norm": 0.376392912726472, "learning_rate": 9.050506155401764e-06, "loss": 0.2729, "step": 9475 }, { "epoch": 0.54, "grad_norm": 0.3404934744042724, "learning_rate": 9.048653666665841e-06, "loss": 0.3248, "step": 9476 }, { "epoch": 0.54, "grad_norm": 1.040043861261599, "learning_rate": 9.046801210875002e-06, "loss": 0.4977, "step": 9477 }, { "epoch": 0.54, "grad_norm": 0.3204967432613155, "learning_rate": 9.044948788093396e-06, "loss": 0.2685, "step": 9478 }, { "epoch": 0.54, "grad_norm": 0.22589665188179028, "learning_rate": 9.043096398385174e-06, "loss": 0.208, "step": 9479 }, { "epoch": 0.54, "grad_norm": 0.4867779438407952, "learning_rate": 9.041244041814479e-06, "loss": 0.3523, "step": 9480 }, { "epoch": 0.54, "grad_norm": 0.3340647249545658, "learning_rate": 9.039391718445466e-06, "loss": 0.2391, "step": 9481 }, { "epoch": 0.54, "grad_norm": 0.8060812933217341, "learning_rate": 9.03753942834227e-06, "loss": 0.5286, "step": 9482 }, { "epoch": 0.54, "grad_norm": 0.518908014472298, "learning_rate": 9.03568717156905e-06, "loss": 0.3698, "step": 9483 }, { "epoch": 0.54, "grad_norm": 0.28165729999453204, "learning_rate": 9.033834948189936e-06, "loss": 0.2253, "step": 9484 }, { "epoch": 0.54, "grad_norm": 1.099977564464965, "learning_rate": 9.031982758269078e-06, "loss": 0.626, "step": 9485 }, { "epoch": 0.55, "grad_norm": 0.40518128341349524, "learning_rate": 9.030130601870615e-06, "loss": 0.2398, "step": 9486 }, { "epoch": 0.55, "grad_norm": 0.27175186242628224, "learning_rate": 9.02827847905869e-06, "loss": 0.2198, "step": 9487 }, { "epoch": 0.55, "grad_norm": 0.47784160961695693, "learning_rate": 9.02642638989744e-06, "loss": 0.3479, "step": 9488 }, { "epoch": 0.55, "grad_norm": 1.1214209878128412, "learning_rate": 9.024574334451002e-06, "loss": 0.7194, "step": 9489 }, { "epoch": 0.55, "grad_norm": 0.32215965041453914, "learning_rate": 9.02272231278351e-06, "loss": 0.1616, "step": 9490 }, { "epoch": 0.55, "grad_norm": 0.28186208190846546, "learning_rate": 9.020870324959103e-06, "loss": 0.2528, "step": 9491 }, { "epoch": 0.55, "grad_norm": 0.4286246139050462, "learning_rate": 9.019018371041914e-06, "loss": 0.3669, "step": 9492 }, { "epoch": 0.55, "grad_norm": 0.433043538846795, "learning_rate": 9.017166451096077e-06, "loss": 0.2856, "step": 9493 }, { "epoch": 0.55, "grad_norm": 0.252036692216471, "learning_rate": 9.015314565185724e-06, "loss": 0.1777, "step": 9494 }, { "epoch": 0.55, "grad_norm": 0.3567792880638821, "learning_rate": 9.013462713374986e-06, "loss": 0.3031, "step": 9495 }, { "epoch": 0.55, "grad_norm": 0.4525301068269673, "learning_rate": 9.01161089572799e-06, "loss": 0.3151, "step": 9496 }, { "epoch": 0.55, "grad_norm": 0.4602195249201141, "learning_rate": 9.009759112308867e-06, "loss": 0.2832, "step": 9497 }, { "epoch": 0.55, "grad_norm": 0.6256387629393889, "learning_rate": 9.007907363181742e-06, "loss": 0.4464, "step": 9498 }, { "epoch": 0.55, "grad_norm": 0.2897790404691268, "learning_rate": 9.006055648410745e-06, "loss": 0.2242, "step": 9499 }, { "epoch": 0.55, "grad_norm": 0.2947409062044821, "learning_rate": 9.004203968059997e-06, "loss": 0.203, "step": 9500 }, { "epoch": 0.55, "grad_norm": 0.8779701484144261, "learning_rate": 9.002352322193622e-06, "loss": 0.5903, "step": 9501 }, { "epoch": 0.55, "grad_norm": 0.31908124321909886, "learning_rate": 9.000500710875746e-06, "loss": 0.2542, "step": 9502 }, { "epoch": 0.55, "grad_norm": 0.450366955751652, "learning_rate": 8.998649134170484e-06, "loss": 0.2701, "step": 9503 }, { "epoch": 0.55, "grad_norm": 0.5082350399387127, "learning_rate": 8.996797592141962e-06, "loss": 0.337, "step": 9504 }, { "epoch": 0.55, "grad_norm": 0.2484863804487214, "learning_rate": 8.994946084854294e-06, "loss": 0.2122, "step": 9505 }, { "epoch": 0.55, "grad_norm": 0.3488933962122594, "learning_rate": 8.9930946123716e-06, "loss": 0.2352, "step": 9506 }, { "epoch": 0.55, "grad_norm": 0.3152421095593874, "learning_rate": 8.991243174757997e-06, "loss": 0.2676, "step": 9507 }, { "epoch": 0.55, "grad_norm": 0.4266176579389591, "learning_rate": 8.9893917720776e-06, "loss": 0.3114, "step": 9508 }, { "epoch": 0.55, "grad_norm": 0.4938255856927612, "learning_rate": 8.987540404394521e-06, "loss": 0.3598, "step": 9509 }, { "epoch": 0.55, "grad_norm": 0.3394737874270989, "learning_rate": 8.985689071772877e-06, "loss": 0.2153, "step": 9510 }, { "epoch": 0.55, "grad_norm": 0.37853025059595635, "learning_rate": 8.983837774276774e-06, "loss": 0.2724, "step": 9511 }, { "epoch": 0.55, "grad_norm": 0.2888364011937344, "learning_rate": 8.981986511970327e-06, "loss": 0.2864, "step": 9512 }, { "epoch": 0.55, "grad_norm": 0.28390144484902724, "learning_rate": 8.980135284917644e-06, "loss": 0.1234, "step": 9513 }, { "epoch": 0.55, "grad_norm": 0.35464412918329336, "learning_rate": 8.97828409318283e-06, "loss": 0.2683, "step": 9514 }, { "epoch": 0.55, "grad_norm": 0.286350130274255, "learning_rate": 8.976432936829995e-06, "loss": 0.2999, "step": 9515 }, { "epoch": 0.55, "grad_norm": 0.6943883642781716, "learning_rate": 8.974581815923242e-06, "loss": 0.3359, "step": 9516 }, { "epoch": 0.55, "grad_norm": 0.40022381769441084, "learning_rate": 8.972730730526679e-06, "loss": 0.243, "step": 9517 }, { "epoch": 0.55, "grad_norm": 0.5276954077075993, "learning_rate": 8.970879680704404e-06, "loss": 0.4043, "step": 9518 }, { "epoch": 0.55, "grad_norm": 0.28500783585719586, "learning_rate": 8.969028666520524e-06, "loss": 0.2542, "step": 9519 }, { "epoch": 0.55, "grad_norm": 0.24384593736610496, "learning_rate": 8.967177688039135e-06, "loss": 0.1619, "step": 9520 }, { "epoch": 0.55, "grad_norm": 0.48283497815577264, "learning_rate": 8.96532674532434e-06, "loss": 0.3448, "step": 9521 }, { "epoch": 0.55, "grad_norm": 0.8189859030284328, "learning_rate": 8.963475838440237e-06, "loss": 0.4574, "step": 9522 }, { "epoch": 0.55, "grad_norm": 0.2617952258223295, "learning_rate": 8.961624967450917e-06, "loss": 0.2188, "step": 9523 }, { "epoch": 0.55, "grad_norm": 0.48019246869526944, "learning_rate": 8.959774132420481e-06, "loss": 0.3712, "step": 9524 }, { "epoch": 0.55, "grad_norm": 0.29000128064258673, "learning_rate": 8.957923333413024e-06, "loss": 0.2129, "step": 9525 }, { "epoch": 0.55, "grad_norm": 0.526363917290331, "learning_rate": 8.956072570492635e-06, "loss": 0.2352, "step": 9526 }, { "epoch": 0.55, "grad_norm": 0.2704870327838035, "learning_rate": 8.954221843723409e-06, "loss": 0.2755, "step": 9527 }, { "epoch": 0.55, "grad_norm": 1.0257404142025073, "learning_rate": 8.952371153169435e-06, "loss": 0.4987, "step": 9528 }, { "epoch": 0.55, "grad_norm": 0.2940447139600711, "learning_rate": 8.950520498894803e-06, "loss": 0.1177, "step": 9529 }, { "epoch": 0.55, "grad_norm": 0.38604536877936113, "learning_rate": 8.9486698809636e-06, "loss": 0.3443, "step": 9530 }, { "epoch": 0.55, "grad_norm": 0.33418039366402746, "learning_rate": 8.946819299439915e-06, "loss": 0.3424, "step": 9531 }, { "epoch": 0.55, "grad_norm": 0.5842996172246837, "learning_rate": 8.944968754387832e-06, "loss": 0.3918, "step": 9532 }, { "epoch": 0.55, "grad_norm": 0.20671343129546188, "learning_rate": 8.943118245871437e-06, "loss": 0.1549, "step": 9533 }, { "epoch": 0.55, "grad_norm": 0.8778299994060808, "learning_rate": 8.941267773954809e-06, "loss": 0.475, "step": 9534 }, { "epoch": 0.55, "grad_norm": 0.3016216513617832, "learning_rate": 8.939417338702034e-06, "loss": 0.285, "step": 9535 }, { "epoch": 0.55, "grad_norm": 0.3695515460906184, "learning_rate": 8.93756694017719e-06, "loss": 0.2673, "step": 9536 }, { "epoch": 0.55, "grad_norm": 0.7614491699387651, "learning_rate": 8.935716578444358e-06, "loss": 0.4398, "step": 9537 }, { "epoch": 0.55, "grad_norm": 0.341722836767795, "learning_rate": 8.933866253567615e-06, "loss": 0.2352, "step": 9538 }, { "epoch": 0.55, "grad_norm": 0.25011501068791103, "learning_rate": 8.932015965611039e-06, "loss": 0.2378, "step": 9539 }, { "epoch": 0.55, "grad_norm": 0.39852031588177406, "learning_rate": 8.930165714638705e-06, "loss": 0.1926, "step": 9540 }, { "epoch": 0.55, "grad_norm": 0.5183265685725699, "learning_rate": 8.928315500714682e-06, "loss": 0.3365, "step": 9541 }, { "epoch": 0.55, "grad_norm": 0.3523020413766487, "learning_rate": 8.92646532390305e-06, "loss": 0.2559, "step": 9542 }, { "epoch": 0.55, "grad_norm": 0.4905252780482375, "learning_rate": 8.924615184267876e-06, "loss": 0.3216, "step": 9543 }, { "epoch": 0.55, "grad_norm": 0.5270482680361739, "learning_rate": 8.922765081873235e-06, "loss": 0.3081, "step": 9544 }, { "epoch": 0.55, "grad_norm": 0.25970806818561254, "learning_rate": 8.92091501678319e-06, "loss": 0.2239, "step": 9545 }, { "epoch": 0.55, "grad_norm": 0.3026984576541297, "learning_rate": 8.919064989061813e-06, "loss": 0.194, "step": 9546 }, { "epoch": 0.55, "grad_norm": 0.4164454229618692, "learning_rate": 8.917214998773169e-06, "loss": 0.3015, "step": 9547 }, { "epoch": 0.55, "grad_norm": 0.32582574801627273, "learning_rate": 8.915365045981323e-06, "loss": 0.285, "step": 9548 }, { "epoch": 0.55, "grad_norm": 0.7217319400615111, "learning_rate": 8.913515130750336e-06, "loss": 0.3338, "step": 9549 }, { "epoch": 0.55, "grad_norm": 0.7820623728911443, "learning_rate": 8.911665253144277e-06, "loss": 0.4671, "step": 9550 }, { "epoch": 0.55, "grad_norm": 0.24558093961398544, "learning_rate": 8.9098154132272e-06, "loss": 0.2646, "step": 9551 }, { "epoch": 0.55, "grad_norm": 0.19719586673338096, "learning_rate": 8.907965611063173e-06, "loss": 0.073, "step": 9552 }, { "epoch": 0.55, "grad_norm": 0.5547086944121592, "learning_rate": 8.906115846716247e-06, "loss": 0.3461, "step": 9553 }, { "epoch": 0.55, "grad_norm": 0.3694099603084277, "learning_rate": 8.904266120250483e-06, "loss": 0.3258, "step": 9554 }, { "epoch": 0.55, "grad_norm": 0.4425574975466733, "learning_rate": 8.902416431729939e-06, "loss": 0.2769, "step": 9555 }, { "epoch": 0.55, "grad_norm": 0.5256141648470348, "learning_rate": 8.900566781218665e-06, "loss": 0.3479, "step": 9556 }, { "epoch": 0.55, "grad_norm": 0.3022772910244815, "learning_rate": 8.898717168780713e-06, "loss": 0.2375, "step": 9557 }, { "epoch": 0.55, "grad_norm": 0.409281807309569, "learning_rate": 8.896867594480141e-06, "loss": 0.2499, "step": 9558 }, { "epoch": 0.55, "grad_norm": 0.3148261067696356, "learning_rate": 8.895018058380995e-06, "loss": 0.2314, "step": 9559 }, { "epoch": 0.55, "grad_norm": 0.3935927137427299, "learning_rate": 8.893168560547327e-06, "loss": 0.3369, "step": 9560 }, { "epoch": 0.55, "grad_norm": 0.6398746892434829, "learning_rate": 8.891319101043181e-06, "loss": 0.4199, "step": 9561 }, { "epoch": 0.55, "grad_norm": 0.39721004210862404, "learning_rate": 8.889469679932612e-06, "loss": 0.2113, "step": 9562 }, { "epoch": 0.55, "grad_norm": 0.28574456478066207, "learning_rate": 8.887620297279656e-06, "loss": 0.2957, "step": 9563 }, { "epoch": 0.55, "grad_norm": 0.26193897143332595, "learning_rate": 8.885770953148364e-06, "loss": 0.1744, "step": 9564 }, { "epoch": 0.55, "grad_norm": 0.6740258025951248, "learning_rate": 8.883921647602777e-06, "loss": 0.2764, "step": 9565 }, { "epoch": 0.55, "grad_norm": 0.3760220070940269, "learning_rate": 8.882072380706931e-06, "loss": 0.3045, "step": 9566 }, { "epoch": 0.55, "grad_norm": 0.3810517214167689, "learning_rate": 8.880223152524875e-06, "loss": 0.3385, "step": 9567 }, { "epoch": 0.55, "grad_norm": 0.9536008985955754, "learning_rate": 8.87837396312064e-06, "loss": 0.5612, "step": 9568 }, { "epoch": 0.55, "grad_norm": 0.22293813348609617, "learning_rate": 8.876524812558269e-06, "loss": 0.1594, "step": 9569 }, { "epoch": 0.55, "grad_norm": 0.3957805630650527, "learning_rate": 8.874675700901791e-06, "loss": 0.2501, "step": 9570 }, { "epoch": 0.55, "grad_norm": 0.38770512159826404, "learning_rate": 8.87282662821525e-06, "loss": 0.3198, "step": 9571 }, { "epoch": 0.55, "grad_norm": 0.3391420199028762, "learning_rate": 8.87097759456267e-06, "loss": 0.2365, "step": 9572 }, { "epoch": 0.55, "grad_norm": 1.202499855422108, "learning_rate": 8.869128600008092e-06, "loss": 0.818, "step": 9573 }, { "epoch": 0.55, "grad_norm": 0.45885122466342415, "learning_rate": 8.867279644615537e-06, "loss": 0.3281, "step": 9574 }, { "epoch": 0.55, "grad_norm": 0.31119385570500957, "learning_rate": 8.865430728449043e-06, "loss": 0.2231, "step": 9575 }, { "epoch": 0.55, "grad_norm": 0.2595652472913027, "learning_rate": 8.863581851572633e-06, "loss": 0.1539, "step": 9576 }, { "epoch": 0.55, "grad_norm": 0.4554085077648678, "learning_rate": 8.861733014050334e-06, "loss": 0.3657, "step": 9577 }, { "epoch": 0.55, "grad_norm": 0.3500096854930678, "learning_rate": 8.859884215946174e-06, "loss": 0.2237, "step": 9578 }, { "epoch": 0.55, "grad_norm": 0.3774828819594551, "learning_rate": 8.858035457324172e-06, "loss": 0.3163, "step": 9579 }, { "epoch": 0.55, "grad_norm": 0.7309407493660592, "learning_rate": 8.856186738248355e-06, "loss": 0.4135, "step": 9580 }, { "epoch": 0.55, "grad_norm": 0.3706488154985834, "learning_rate": 8.85433805878274e-06, "loss": 0.288, "step": 9581 }, { "epoch": 0.55, "grad_norm": 0.21088115991413472, "learning_rate": 8.85248941899135e-06, "loss": 0.1959, "step": 9582 }, { "epoch": 0.55, "grad_norm": 0.850973894592171, "learning_rate": 8.850640818938202e-06, "loss": 0.4281, "step": 9583 }, { "epoch": 0.55, "grad_norm": 0.31743512024875603, "learning_rate": 8.848792258687312e-06, "loss": 0.2678, "step": 9584 }, { "epoch": 0.55, "grad_norm": 0.773444547415871, "learning_rate": 8.846943738302697e-06, "loss": 0.5385, "step": 9585 }, { "epoch": 0.55, "grad_norm": 0.40078285238849626, "learning_rate": 8.845095257848372e-06, "loss": 0.3058, "step": 9586 }, { "epoch": 0.55, "grad_norm": 0.364521054432038, "learning_rate": 8.843246817388345e-06, "loss": 0.2559, "step": 9587 }, { "epoch": 0.55, "grad_norm": 1.3184618070839194, "learning_rate": 8.841398416986635e-06, "loss": 0.3191, "step": 9588 }, { "epoch": 0.55, "grad_norm": 0.3073635659674038, "learning_rate": 8.83955005670725e-06, "loss": 0.2267, "step": 9589 }, { "epoch": 0.55, "grad_norm": 0.2940086234938814, "learning_rate": 8.837701736614194e-06, "loss": 0.2482, "step": 9590 }, { "epoch": 0.55, "grad_norm": 0.29719764315455904, "learning_rate": 8.835853456771476e-06, "loss": 0.2412, "step": 9591 }, { "epoch": 0.55, "grad_norm": 0.9759795203201609, "learning_rate": 8.834005217243103e-06, "loss": 0.593, "step": 9592 }, { "epoch": 0.55, "grad_norm": 0.3481287823907185, "learning_rate": 8.832157018093078e-06, "loss": 0.2459, "step": 9593 }, { "epoch": 0.55, "grad_norm": 0.5531869767322063, "learning_rate": 8.830308859385408e-06, "loss": 0.3371, "step": 9594 }, { "epoch": 0.55, "grad_norm": 0.34635142391335155, "learning_rate": 8.828460741184089e-06, "loss": 0.2135, "step": 9595 }, { "epoch": 0.55, "grad_norm": 0.4122181775243568, "learning_rate": 8.826612663553126e-06, "loss": 0.2867, "step": 9596 }, { "epoch": 0.55, "grad_norm": 0.4546733222643512, "learning_rate": 8.824764626556514e-06, "loss": 0.3519, "step": 9597 }, { "epoch": 0.55, "grad_norm": 0.30491457595218185, "learning_rate": 8.822916630258255e-06, "loss": 0.2441, "step": 9598 }, { "epoch": 0.55, "grad_norm": 0.35806364278988395, "learning_rate": 8.82106867472234e-06, "loss": 0.2882, "step": 9599 }, { "epoch": 0.55, "grad_norm": 0.6824198286469831, "learning_rate": 8.819220760012768e-06, "loss": 0.4241, "step": 9600 }, { "epoch": 0.55, "grad_norm": 0.2923506374089054, "learning_rate": 8.81737288619353e-06, "loss": 0.1254, "step": 9601 }, { "epoch": 0.55, "grad_norm": 0.3244416849157942, "learning_rate": 8.815525053328617e-06, "loss": 0.2596, "step": 9602 }, { "epoch": 0.55, "grad_norm": 0.28496715222960844, "learning_rate": 8.81367726148202e-06, "loss": 0.2617, "step": 9603 }, { "epoch": 0.55, "grad_norm": 0.7812979157651535, "learning_rate": 8.811829510717731e-06, "loss": 0.3444, "step": 9604 }, { "epoch": 0.55, "grad_norm": 0.31375336364159706, "learning_rate": 8.809981801099735e-06, "loss": 0.2695, "step": 9605 }, { "epoch": 0.55, "grad_norm": 0.392233640141547, "learning_rate": 8.808134132692015e-06, "loss": 0.3176, "step": 9606 }, { "epoch": 0.55, "grad_norm": 1.3762085157918527, "learning_rate": 8.806286505558563e-06, "loss": 0.6777, "step": 9607 }, { "epoch": 0.55, "grad_norm": 0.3082493565251616, "learning_rate": 8.804438919763352e-06, "loss": 0.219, "step": 9608 }, { "epoch": 0.55, "grad_norm": 0.4539330693613155, "learning_rate": 8.802591375370375e-06, "loss": 0.2814, "step": 9609 }, { "epoch": 0.55, "grad_norm": 0.30749822562633533, "learning_rate": 8.800743872443605e-06, "loss": 0.2762, "step": 9610 }, { "epoch": 0.55, "grad_norm": 0.3004335057158179, "learning_rate": 8.798896411047024e-06, "loss": 0.2093, "step": 9611 }, { "epoch": 0.55, "grad_norm": 0.651005317404619, "learning_rate": 8.797048991244606e-06, "loss": 0.4288, "step": 9612 }, { "epoch": 0.55, "grad_norm": 0.9232587293760873, "learning_rate": 8.795201613100334e-06, "loss": 0.627, "step": 9613 }, { "epoch": 0.55, "grad_norm": 0.2907613834716408, "learning_rate": 8.793354276678176e-06, "loss": 0.1997, "step": 9614 }, { "epoch": 0.55, "grad_norm": 0.3186326974408788, "learning_rate": 8.791506982042107e-06, "loss": 0.2948, "step": 9615 }, { "epoch": 0.55, "grad_norm": 0.5159427141535858, "learning_rate": 8.789659729256099e-06, "loss": 0.2764, "step": 9616 }, { "epoch": 0.55, "grad_norm": 0.3412013433538205, "learning_rate": 8.787812518384125e-06, "loss": 0.1942, "step": 9617 }, { "epoch": 0.55, "grad_norm": 0.35941120379477665, "learning_rate": 8.78596534949015e-06, "loss": 0.283, "step": 9618 }, { "epoch": 0.55, "grad_norm": 1.0007832706509505, "learning_rate": 8.784118222638142e-06, "loss": 0.6069, "step": 9619 }, { "epoch": 0.55, "grad_norm": 0.4190917204278424, "learning_rate": 8.78227113789207e-06, "loss": 0.3474, "step": 9620 }, { "epoch": 0.55, "grad_norm": 0.25232330543617304, "learning_rate": 8.780424095315893e-06, "loss": 0.2104, "step": 9621 }, { "epoch": 0.55, "grad_norm": 0.3665822398797408, "learning_rate": 8.778577094973579e-06, "loss": 0.3238, "step": 9622 }, { "epoch": 0.55, "grad_norm": 0.41883098987828316, "learning_rate": 8.77673013692909e-06, "loss": 0.3045, "step": 9623 }, { "epoch": 0.55, "grad_norm": 0.3007795637019158, "learning_rate": 8.77488322124638e-06, "loss": 0.1753, "step": 9624 }, { "epoch": 0.55, "grad_norm": 1.344525945362686, "learning_rate": 8.773036347989413e-06, "loss": 0.7523, "step": 9625 }, { "epoch": 0.55, "grad_norm": 0.26911175627901446, "learning_rate": 8.771189517222143e-06, "loss": 0.265, "step": 9626 }, { "epoch": 0.55, "grad_norm": 0.42709579508130063, "learning_rate": 8.769342729008529e-06, "loss": 0.2923, "step": 9627 }, { "epoch": 0.55, "grad_norm": 0.6199825882275222, "learning_rate": 8.767495983412521e-06, "loss": 0.3981, "step": 9628 }, { "epoch": 0.55, "grad_norm": 0.25178532363408446, "learning_rate": 8.765649280498076e-06, "loss": 0.197, "step": 9629 }, { "epoch": 0.55, "grad_norm": 0.2845849191707805, "learning_rate": 8.763802620329146e-06, "loss": 0.2088, "step": 9630 }, { "epoch": 0.55, "grad_norm": 1.0896261704932444, "learning_rate": 8.761956002969672e-06, "loss": 0.7473, "step": 9631 }, { "epoch": 0.55, "grad_norm": 0.5818223951584665, "learning_rate": 8.760109428483613e-06, "loss": 0.3236, "step": 9632 }, { "epoch": 0.55, "grad_norm": 0.41990213718374075, "learning_rate": 8.758262896934909e-06, "loss": 0.3098, "step": 9633 }, { "epoch": 0.55, "grad_norm": 0.399969974428762, "learning_rate": 8.756416408387507e-06, "loss": 0.2806, "step": 9634 }, { "epoch": 0.55, "grad_norm": 0.3299745775632892, "learning_rate": 8.754569962905351e-06, "loss": 0.2081, "step": 9635 }, { "epoch": 0.55, "grad_norm": 0.3008769243224494, "learning_rate": 8.752723560552386e-06, "loss": 0.2482, "step": 9636 }, { "epoch": 0.55, "grad_norm": 0.9013635666683019, "learning_rate": 8.750877201392547e-06, "loss": 0.3913, "step": 9637 }, { "epoch": 0.55, "grad_norm": 0.28342819302573247, "learning_rate": 8.749030885489782e-06, "loss": 0.2537, "step": 9638 }, { "epoch": 0.55, "grad_norm": 0.5810023181066697, "learning_rate": 8.747184612908019e-06, "loss": 0.3466, "step": 9639 }, { "epoch": 0.55, "grad_norm": 0.6679312576871723, "learning_rate": 8.745338383711202e-06, "loss": 0.3194, "step": 9640 }, { "epoch": 0.55, "grad_norm": 0.29402336429450904, "learning_rate": 8.74349219796326e-06, "loss": 0.2023, "step": 9641 }, { "epoch": 0.55, "grad_norm": 0.26832438600589964, "learning_rate": 8.741646055728133e-06, "loss": 0.2494, "step": 9642 }, { "epoch": 0.55, "grad_norm": 1.104583917262513, "learning_rate": 8.739799957069747e-06, "loss": 0.681, "step": 9643 }, { "epoch": 0.55, "grad_norm": 0.35403449470833537, "learning_rate": 8.737953902052031e-06, "loss": 0.2109, "step": 9644 }, { "epoch": 0.55, "grad_norm": 0.6163331604318542, "learning_rate": 8.736107890738922e-06, "loss": 0.3423, "step": 9645 }, { "epoch": 0.55, "grad_norm": 0.44648031150704204, "learning_rate": 8.73426192319434e-06, "loss": 0.3083, "step": 9646 }, { "epoch": 0.55, "grad_norm": 0.32973426498978503, "learning_rate": 8.732415999482214e-06, "loss": 0.2018, "step": 9647 }, { "epoch": 0.55, "grad_norm": 0.29871322069317574, "learning_rate": 8.730570119666465e-06, "loss": 0.18, "step": 9648 }, { "epoch": 0.55, "grad_norm": 0.5177502319297812, "learning_rate": 8.728724283811024e-06, "loss": 0.3988, "step": 9649 }, { "epoch": 0.55, "grad_norm": 0.3241226683385959, "learning_rate": 8.7268784919798e-06, "loss": 0.2099, "step": 9650 }, { "epoch": 0.55, "grad_norm": 0.45021331398071146, "learning_rate": 8.725032744236723e-06, "loss": 0.3137, "step": 9651 }, { "epoch": 0.55, "grad_norm": 0.7969625967830009, "learning_rate": 8.723187040645704e-06, "loss": 0.5277, "step": 9652 }, { "epoch": 0.55, "grad_norm": 0.27981093470443125, "learning_rate": 8.721341381270668e-06, "loss": 0.0984, "step": 9653 }, { "epoch": 0.55, "grad_norm": 0.2597047101805497, "learning_rate": 8.719495766175519e-06, "loss": 0.2336, "step": 9654 }, { "epoch": 0.55, "grad_norm": 1.1135473319190738, "learning_rate": 8.717650195424182e-06, "loss": 0.6095, "step": 9655 }, { "epoch": 0.55, "grad_norm": 0.6191392989069763, "learning_rate": 8.715804669080559e-06, "loss": 0.3826, "step": 9656 }, { "epoch": 0.55, "grad_norm": 0.3043730092485221, "learning_rate": 8.713959187208572e-06, "loss": 0.2419, "step": 9657 }, { "epoch": 0.55, "grad_norm": 0.4603338423923025, "learning_rate": 8.712113749872117e-06, "loss": 0.3772, "step": 9658 }, { "epoch": 0.55, "grad_norm": 0.5274180041754517, "learning_rate": 8.710268357135109e-06, "loss": 0.3372, "step": 9659 }, { "epoch": 0.56, "grad_norm": 0.20931527382171677, "learning_rate": 8.70842300906145e-06, "loss": 0.1499, "step": 9660 }, { "epoch": 0.56, "grad_norm": 0.45032579061938405, "learning_rate": 8.70657770571505e-06, "loss": 0.3645, "step": 9661 }, { "epoch": 0.56, "grad_norm": 0.4065200216698055, "learning_rate": 8.704732447159807e-06, "loss": 0.2926, "step": 9662 }, { "epoch": 0.56, "grad_norm": 0.41762152764098875, "learning_rate": 8.702887233459625e-06, "loss": 0.2756, "step": 9663 }, { "epoch": 0.56, "grad_norm": 1.3246494443880283, "learning_rate": 8.7010420646784e-06, "loss": 0.8345, "step": 9664 }, { "epoch": 0.56, "grad_norm": 0.34809237419193545, "learning_rate": 8.699196940880032e-06, "loss": 0.2729, "step": 9665 }, { "epoch": 0.56, "grad_norm": 0.2492864391185059, "learning_rate": 8.69735186212842e-06, "loss": 0.2019, "step": 9666 }, { "epoch": 0.56, "grad_norm": 0.5684745090978655, "learning_rate": 8.695506828487457e-06, "loss": 0.285, "step": 9667 }, { "epoch": 0.56, "grad_norm": 0.4835198325712554, "learning_rate": 8.693661840021035e-06, "loss": 0.3404, "step": 9668 }, { "epoch": 0.56, "grad_norm": 0.41002756921375183, "learning_rate": 8.691816896793049e-06, "loss": 0.3262, "step": 9669 }, { "epoch": 0.56, "grad_norm": 0.3301324479001951, "learning_rate": 8.689971998867386e-06, "loss": 0.271, "step": 9670 }, { "epoch": 0.56, "grad_norm": 0.8899810603812406, "learning_rate": 8.688127146307938e-06, "loss": 0.4589, "step": 9671 }, { "epoch": 0.56, "grad_norm": 0.26078813677723967, "learning_rate": 8.68628233917859e-06, "loss": 0.2169, "step": 9672 }, { "epoch": 0.56, "grad_norm": 0.2721884189437094, "learning_rate": 8.684437577543227e-06, "loss": 0.2406, "step": 9673 }, { "epoch": 0.56, "grad_norm": 0.5468489073374028, "learning_rate": 8.682592861465735e-06, "loss": 0.3952, "step": 9674 }, { "epoch": 0.56, "grad_norm": 0.3647467307258367, "learning_rate": 8.680748191009995e-06, "loss": 0.295, "step": 9675 }, { "epoch": 0.56, "grad_norm": 0.7706378980657368, "learning_rate": 8.67890356623989e-06, "loss": 0.3905, "step": 9676 }, { "epoch": 0.56, "grad_norm": 0.3748336812202746, "learning_rate": 8.677058987219294e-06, "loss": 0.3049, "step": 9677 }, { "epoch": 0.56, "grad_norm": 0.2454214308007065, "learning_rate": 8.675214454012092e-06, "loss": 0.233, "step": 9678 }, { "epoch": 0.56, "grad_norm": 0.45322202546424467, "learning_rate": 8.673369966682154e-06, "loss": 0.2341, "step": 9679 }, { "epoch": 0.56, "grad_norm": 0.5365199502435255, "learning_rate": 8.67152552529336e-06, "loss": 0.3428, "step": 9680 }, { "epoch": 0.56, "grad_norm": 0.4117158142211994, "learning_rate": 8.669681129909578e-06, "loss": 0.3221, "step": 9681 }, { "epoch": 0.56, "grad_norm": 0.34998858461194654, "learning_rate": 8.667836780594682e-06, "loss": 0.336, "step": 9682 }, { "epoch": 0.56, "grad_norm": 0.3850112115715607, "learning_rate": 8.665992477412541e-06, "loss": 0.1246, "step": 9683 }, { "epoch": 0.56, "grad_norm": 0.3888304992876215, "learning_rate": 8.664148220427023e-06, "loss": 0.3117, "step": 9684 }, { "epoch": 0.56, "grad_norm": 0.38035497224060644, "learning_rate": 8.662304009701994e-06, "loss": 0.3066, "step": 9685 }, { "epoch": 0.56, "grad_norm": 0.2546173089481329, "learning_rate": 8.66045984530132e-06, "loss": 0.1335, "step": 9686 }, { "epoch": 0.56, "grad_norm": 0.35621827719168836, "learning_rate": 8.658615727288863e-06, "loss": 0.2854, "step": 9687 }, { "epoch": 0.56, "grad_norm": 1.0338373334263178, "learning_rate": 8.656771655728487e-06, "loss": 0.6266, "step": 9688 }, { "epoch": 0.56, "grad_norm": 0.3064225676834638, "learning_rate": 8.65492763068405e-06, "loss": 0.2283, "step": 9689 }, { "epoch": 0.56, "grad_norm": 0.33605010056283907, "learning_rate": 8.653083652219417e-06, "loss": 0.302, "step": 9690 }, { "epoch": 0.56, "grad_norm": 0.6969521748952202, "learning_rate": 8.651239720398433e-06, "loss": 0.4025, "step": 9691 }, { "epoch": 0.56, "grad_norm": 0.22137321403803517, "learning_rate": 8.64939583528496e-06, "loss": 0.114, "step": 9692 }, { "epoch": 0.56, "grad_norm": 0.31067351486742123, "learning_rate": 8.647551996942852e-06, "loss": 0.2666, "step": 9693 }, { "epoch": 0.56, "grad_norm": 0.3017237884993278, "learning_rate": 8.645708205435959e-06, "loss": 0.237, "step": 9694 }, { "epoch": 0.56, "grad_norm": 0.7947918989546467, "learning_rate": 8.643864460828135e-06, "loss": 0.4867, "step": 9695 }, { "epoch": 0.56, "grad_norm": 0.30815653925994546, "learning_rate": 8.642020763183224e-06, "loss": 0.2316, "step": 9696 }, { "epoch": 0.56, "grad_norm": 0.4541266781213096, "learning_rate": 8.640177112565078e-06, "loss": 0.3681, "step": 9697 }, { "epoch": 0.56, "grad_norm": 0.2920545878428287, "learning_rate": 8.638333509037537e-06, "loss": 0.2506, "step": 9698 }, { "epoch": 0.56, "grad_norm": 0.3173736734585809, "learning_rate": 8.63648995266445e-06, "loss": 0.211, "step": 9699 }, { "epoch": 0.56, "grad_norm": 0.38440130858759364, "learning_rate": 8.634646443509656e-06, "loss": 0.2679, "step": 9700 }, { "epoch": 0.56, "grad_norm": 0.32061793907154607, "learning_rate": 8.632802981636998e-06, "loss": 0.3013, "step": 9701 }, { "epoch": 0.56, "grad_norm": 0.31661079459501085, "learning_rate": 8.630959567110314e-06, "loss": 0.2308, "step": 9702 }, { "epoch": 0.56, "grad_norm": 0.839441115520653, "learning_rate": 8.629116199993441e-06, "loss": 0.4207, "step": 9703 }, { "epoch": 0.56, "grad_norm": 1.1032675562865573, "learning_rate": 8.627272880350214e-06, "loss": 0.7347, "step": 9704 }, { "epoch": 0.56, "grad_norm": 0.26791299634997684, "learning_rate": 8.62542960824447e-06, "loss": 0.212, "step": 9705 }, { "epoch": 0.56, "grad_norm": 0.2568316260810858, "learning_rate": 8.623586383740037e-06, "loss": 0.2, "step": 9706 }, { "epoch": 0.56, "grad_norm": 1.0746648875443938, "learning_rate": 8.621743206900752e-06, "loss": 0.407, "step": 9707 }, { "epoch": 0.56, "grad_norm": 0.3060183253023324, "learning_rate": 8.619900077790439e-06, "loss": 0.2603, "step": 9708 }, { "epoch": 0.56, "grad_norm": 0.3354086747479937, "learning_rate": 8.618056996472925e-06, "loss": 0.2808, "step": 9709 }, { "epoch": 0.56, "grad_norm": 1.25565098457294, "learning_rate": 8.616213963012042e-06, "loss": 0.7637, "step": 9710 }, { "epoch": 0.56, "grad_norm": 0.34005069327044857, "learning_rate": 8.614370977471604e-06, "loss": 0.27, "step": 9711 }, { "epoch": 0.56, "grad_norm": 0.18000557563902286, "learning_rate": 8.612528039915444e-06, "loss": 0.0875, "step": 9712 }, { "epoch": 0.56, "grad_norm": 0.5228609692059021, "learning_rate": 8.610685150407376e-06, "loss": 0.3247, "step": 9713 }, { "epoch": 0.56, "grad_norm": 0.3874828243771713, "learning_rate": 8.608842309011224e-06, "loss": 0.2822, "step": 9714 }, { "epoch": 0.56, "grad_norm": 0.9765194619225318, "learning_rate": 8.606999515790801e-06, "loss": 0.2146, "step": 9715 }, { "epoch": 0.56, "grad_norm": 0.4766873680274542, "learning_rate": 8.605156770809926e-06, "loss": 0.3851, "step": 9716 }, { "epoch": 0.56, "grad_norm": 0.33428061403293535, "learning_rate": 8.603314074132411e-06, "loss": 0.2666, "step": 9717 }, { "epoch": 0.56, "grad_norm": 0.29411656533246155, "learning_rate": 8.60147142582207e-06, "loss": 0.2137, "step": 9718 }, { "epoch": 0.56, "grad_norm": 0.45824528014285415, "learning_rate": 8.599628825942713e-06, "loss": 0.2705, "step": 9719 }, { "epoch": 0.56, "grad_norm": 0.37961707152946494, "learning_rate": 8.597786274558152e-06, "loss": 0.2672, "step": 9720 }, { "epoch": 0.56, "grad_norm": 0.375183535077221, "learning_rate": 8.595943771732187e-06, "loss": 0.3041, "step": 9721 }, { "epoch": 0.56, "grad_norm": 1.2665256641960105, "learning_rate": 8.594101317528634e-06, "loss": 0.4257, "step": 9722 }, { "epoch": 0.56, "grad_norm": 0.3198305090286384, "learning_rate": 8.59225891201129e-06, "loss": 0.227, "step": 9723 }, { "epoch": 0.56, "grad_norm": 0.5603332070743713, "learning_rate": 8.590416555243962e-06, "loss": 0.4177, "step": 9724 }, { "epoch": 0.56, "grad_norm": 0.34251976719886146, "learning_rate": 8.588574247290444e-06, "loss": 0.2623, "step": 9725 }, { "epoch": 0.56, "grad_norm": 0.26791062951199285, "learning_rate": 8.586731988214542e-06, "loss": 0.21, "step": 9726 }, { "epoch": 0.56, "grad_norm": 0.43863608264963566, "learning_rate": 8.584889778080049e-06, "loss": 0.2461, "step": 9727 }, { "epoch": 0.56, "grad_norm": 0.4854419789559304, "learning_rate": 8.583047616950761e-06, "loss": 0.3134, "step": 9728 }, { "epoch": 0.56, "grad_norm": 0.32119730040485317, "learning_rate": 8.581205504890474e-06, "loss": 0.2772, "step": 9729 }, { "epoch": 0.56, "grad_norm": 0.8707909117456779, "learning_rate": 8.57936344196298e-06, "loss": 0.4635, "step": 9730 }, { "epoch": 0.56, "grad_norm": 0.49345414113964153, "learning_rate": 8.577521428232067e-06, "loss": 0.2946, "step": 9731 }, { "epoch": 0.56, "grad_norm": 0.23250652886101789, "learning_rate": 8.575679463761527e-06, "loss": 0.1565, "step": 9732 }, { "epoch": 0.56, "grad_norm": 0.3752357585942652, "learning_rate": 8.573837548615144e-06, "loss": 0.3268, "step": 9733 }, { "epoch": 0.56, "grad_norm": 1.1076609438672396, "learning_rate": 8.57199568285671e-06, "loss": 0.6822, "step": 9734 }, { "epoch": 0.56, "grad_norm": 0.31491087231924286, "learning_rate": 8.570153866550002e-06, "loss": 0.2089, "step": 9735 }, { "epoch": 0.56, "grad_norm": 1.3889797767245986, "learning_rate": 8.568312099758802e-06, "loss": 0.4492, "step": 9736 }, { "epoch": 0.56, "grad_norm": 0.36769358781100087, "learning_rate": 8.566470382546896e-06, "loss": 0.3141, "step": 9737 }, { "epoch": 0.56, "grad_norm": 0.21757133357345376, "learning_rate": 8.564628714978055e-06, "loss": 0.1293, "step": 9738 }, { "epoch": 0.56, "grad_norm": 0.34371264630242915, "learning_rate": 8.562787097116063e-06, "loss": 0.2301, "step": 9739 }, { "epoch": 0.56, "grad_norm": 0.36254630132426113, "learning_rate": 8.56094552902469e-06, "loss": 0.354, "step": 9740 }, { "epoch": 0.56, "grad_norm": 0.321207211418575, "learning_rate": 8.559104010767713e-06, "loss": 0.1934, "step": 9741 }, { "epoch": 0.56, "grad_norm": 1.018284315811994, "learning_rate": 8.5572625424089e-06, "loss": 0.4464, "step": 9742 }, { "epoch": 0.56, "grad_norm": 1.050265768211607, "learning_rate": 8.555421124012026e-06, "loss": 0.5515, "step": 9743 }, { "epoch": 0.56, "grad_norm": 0.23753029210938723, "learning_rate": 8.553579755640853e-06, "loss": 0.1735, "step": 9744 }, { "epoch": 0.56, "grad_norm": 0.3188849467880673, "learning_rate": 8.551738437359154e-06, "loss": 0.2574, "step": 9745 }, { "epoch": 0.56, "grad_norm": 0.7259221869316915, "learning_rate": 8.549897169230689e-06, "loss": 0.5058, "step": 9746 }, { "epoch": 0.56, "grad_norm": 0.4847398425340026, "learning_rate": 8.548055951319223e-06, "loss": 0.3058, "step": 9747 }, { "epoch": 0.56, "grad_norm": 0.4390574813917582, "learning_rate": 8.546214783688518e-06, "loss": 0.2942, "step": 9748 }, { "epoch": 0.56, "grad_norm": 0.3452402290361021, "learning_rate": 8.544373666402331e-06, "loss": 0.3128, "step": 9749 }, { "epoch": 0.56, "grad_norm": 0.3458191816422412, "learning_rate": 8.542532599524422e-06, "loss": 0.2582, "step": 9750 }, { "epoch": 0.56, "grad_norm": 0.18158561259795064, "learning_rate": 8.540691583118545e-06, "loss": 0.0899, "step": 9751 }, { "epoch": 0.56, "grad_norm": 0.4406492459517275, "learning_rate": 8.53885061724846e-06, "loss": 0.3035, "step": 9752 }, { "epoch": 0.56, "grad_norm": 0.3702396463379801, "learning_rate": 8.537009701977909e-06, "loss": 0.276, "step": 9753 }, { "epoch": 0.56, "grad_norm": 0.8471022600906709, "learning_rate": 8.535168837370656e-06, "loss": 0.357, "step": 9754 }, { "epoch": 0.56, "grad_norm": 0.8915584935820783, "learning_rate": 8.533328023490438e-06, "loss": 0.5318, "step": 9755 }, { "epoch": 0.56, "grad_norm": 0.3460314169075071, "learning_rate": 8.531487260401009e-06, "loss": 0.2315, "step": 9756 }, { "epoch": 0.56, "grad_norm": 0.2540429650530294, "learning_rate": 8.529646548166113e-06, "loss": 0.2456, "step": 9757 }, { "epoch": 0.56, "grad_norm": 0.8162276591758156, "learning_rate": 8.527805886849496e-06, "loss": 0.3404, "step": 9758 }, { "epoch": 0.56, "grad_norm": 0.6224711881741437, "learning_rate": 8.525965276514897e-06, "loss": 0.4223, "step": 9759 }, { "epoch": 0.56, "grad_norm": 0.41310833028155763, "learning_rate": 8.524124717226057e-06, "loss": 0.3289, "step": 9760 }, { "epoch": 0.56, "grad_norm": 0.3572582038454073, "learning_rate": 8.522284209046713e-06, "loss": 0.2494, "step": 9761 }, { "epoch": 0.56, "grad_norm": 0.3742903886740477, "learning_rate": 8.520443752040604e-06, "loss": 0.2831, "step": 9762 }, { "epoch": 0.56, "grad_norm": 0.2706597483671145, "learning_rate": 8.518603346271463e-06, "loss": 0.2184, "step": 9763 }, { "epoch": 0.56, "grad_norm": 0.38218243499476895, "learning_rate": 8.516762991803027e-06, "loss": 0.2738, "step": 9764 }, { "epoch": 0.56, "grad_norm": 0.4708346428419067, "learning_rate": 8.51492268869902e-06, "loss": 0.2916, "step": 9765 }, { "epoch": 0.56, "grad_norm": 0.5177037495364069, "learning_rate": 8.513082437023182e-06, "loss": 0.4067, "step": 9766 }, { "epoch": 0.56, "grad_norm": 0.8805166112100425, "learning_rate": 8.511242236839232e-06, "loss": 0.2842, "step": 9767 }, { "epoch": 0.56, "grad_norm": 0.32887364777368694, "learning_rate": 8.509402088210901e-06, "loss": 0.2746, "step": 9768 }, { "epoch": 0.56, "grad_norm": 0.26738679639436275, "learning_rate": 8.507561991201908e-06, "loss": 0.2453, "step": 9769 }, { "epoch": 0.56, "grad_norm": 0.41725219260940183, "learning_rate": 8.505721945875985e-06, "loss": 0.3137, "step": 9770 }, { "epoch": 0.56, "grad_norm": 0.3031029677592879, "learning_rate": 8.503881952296842e-06, "loss": 0.2186, "step": 9771 }, { "epoch": 0.56, "grad_norm": 1.254047894329755, "learning_rate": 8.502042010528205e-06, "loss": 0.7768, "step": 9772 }, { "epoch": 0.56, "grad_norm": 0.3559282773742175, "learning_rate": 8.50020212063379e-06, "loss": 0.3021, "step": 9773 }, { "epoch": 0.56, "grad_norm": 0.3623989422829272, "learning_rate": 8.498362282677308e-06, "loss": 0.1237, "step": 9774 }, { "epoch": 0.56, "grad_norm": 0.33483177445849416, "learning_rate": 8.496522496722476e-06, "loss": 0.2634, "step": 9775 }, { "epoch": 0.56, "grad_norm": 0.32082119699761735, "learning_rate": 8.494682762833004e-06, "loss": 0.2741, "step": 9776 }, { "epoch": 0.56, "grad_norm": 0.3887285850178219, "learning_rate": 8.492843081072609e-06, "loss": 0.1544, "step": 9777 }, { "epoch": 0.56, "grad_norm": 0.4023507557929457, "learning_rate": 8.491003451504987e-06, "loss": 0.3336, "step": 9778 }, { "epoch": 0.56, "grad_norm": 0.8923515179116659, "learning_rate": 8.489163874193854e-06, "loss": 0.5183, "step": 9779 }, { "epoch": 0.56, "grad_norm": 0.32433786243686613, "learning_rate": 8.487324349202909e-06, "loss": 0.2238, "step": 9780 }, { "epoch": 0.56, "grad_norm": 0.3837847982567723, "learning_rate": 8.485484876595859e-06, "loss": 0.3084, "step": 9781 }, { "epoch": 0.56, "grad_norm": 0.45428916728106616, "learning_rate": 8.4836454564364e-06, "loss": 0.2587, "step": 9782 }, { "epoch": 0.56, "grad_norm": 0.5838701470386773, "learning_rate": 8.481806088788235e-06, "loss": 0.3887, "step": 9783 }, { "epoch": 0.56, "grad_norm": 0.23669142326985326, "learning_rate": 8.47996677371506e-06, "loss": 0.2022, "step": 9784 }, { "epoch": 0.56, "grad_norm": 0.4869737162516183, "learning_rate": 8.478127511280571e-06, "loss": 0.399, "step": 9785 }, { "epoch": 0.56, "grad_norm": 0.5174517432230594, "learning_rate": 8.476288301548458e-06, "loss": 0.3913, "step": 9786 }, { "epoch": 0.56, "grad_norm": 0.36426826902545983, "learning_rate": 8.474449144582419e-06, "loss": 0.2611, "step": 9787 }, { "epoch": 0.56, "grad_norm": 0.339033501439424, "learning_rate": 8.472610040446142e-06, "loss": 0.3378, "step": 9788 }, { "epoch": 0.56, "grad_norm": 0.6482947546336639, "learning_rate": 8.470770989203309e-06, "loss": 0.4807, "step": 9789 }, { "epoch": 0.56, "grad_norm": 0.2700529166944222, "learning_rate": 8.468931990917613e-06, "loss": 0.1869, "step": 9790 }, { "epoch": 0.56, "grad_norm": 0.38726787939089174, "learning_rate": 8.467093045652736e-06, "loss": 0.2809, "step": 9791 }, { "epoch": 0.56, "grad_norm": 0.3210957803126272, "learning_rate": 8.465254153472362e-06, "loss": 0.2913, "step": 9792 }, { "epoch": 0.56, "grad_norm": 0.3114223586414909, "learning_rate": 8.463415314440172e-06, "loss": 0.2626, "step": 9793 }, { "epoch": 0.56, "grad_norm": 0.9172670409997502, "learning_rate": 8.46157652861984e-06, "loss": 0.6035, "step": 9794 }, { "epoch": 0.56, "grad_norm": 0.7608940459805851, "learning_rate": 8.45973779607505e-06, "loss": 0.4936, "step": 9795 }, { "epoch": 0.56, "grad_norm": 0.2258225225155695, "learning_rate": 8.457899116869469e-06, "loss": 0.2257, "step": 9796 }, { "epoch": 0.56, "grad_norm": 0.2627940231472568, "learning_rate": 8.45606049106678e-06, "loss": 0.1897, "step": 9797 }, { "epoch": 0.56, "grad_norm": 0.5623646815898292, "learning_rate": 8.454221918730646e-06, "loss": 0.3763, "step": 9798 }, { "epoch": 0.56, "grad_norm": 0.3464750026436955, "learning_rate": 8.452383399924743e-06, "loss": 0.2948, "step": 9799 }, { "epoch": 0.56, "grad_norm": 0.3381683606193286, "learning_rate": 8.450544934712736e-06, "loss": 0.2535, "step": 9800 }, { "epoch": 0.56, "grad_norm": 0.6811041565579721, "learning_rate": 8.44870652315829e-06, "loss": 0.4924, "step": 9801 }, { "epoch": 0.56, "grad_norm": 0.2585541466426452, "learning_rate": 8.446868165325073e-06, "loss": 0.2242, "step": 9802 }, { "epoch": 0.56, "grad_norm": 0.4408860747267185, "learning_rate": 8.445029861276742e-06, "loss": 0.1794, "step": 9803 }, { "epoch": 0.56, "grad_norm": 0.30415946757221135, "learning_rate": 8.443191611076962e-06, "loss": 0.2894, "step": 9804 }, { "epoch": 0.56, "grad_norm": 0.3788792995472617, "learning_rate": 8.441353414789386e-06, "loss": 0.2975, "step": 9805 }, { "epoch": 0.56, "grad_norm": 1.3016544580089682, "learning_rate": 8.439515272477679e-06, "loss": 0.6672, "step": 9806 }, { "epoch": 0.56, "grad_norm": 0.4190179098500511, "learning_rate": 8.437677184205488e-06, "loss": 0.2239, "step": 9807 }, { "epoch": 0.56, "grad_norm": 0.3089991674098943, "learning_rate": 8.43583915003647e-06, "loss": 0.2844, "step": 9808 }, { "epoch": 0.56, "grad_norm": 0.2639149079109884, "learning_rate": 8.434001170034273e-06, "loss": 0.2243, "step": 9809 }, { "epoch": 0.56, "grad_norm": 0.556166511583863, "learning_rate": 8.432163244262551e-06, "loss": 0.3236, "step": 9810 }, { "epoch": 0.56, "grad_norm": 0.32418630784592684, "learning_rate": 8.430325372784946e-06, "loss": 0.2629, "step": 9811 }, { "epoch": 0.56, "grad_norm": 0.3574565696512591, "learning_rate": 8.428487555665108e-06, "loss": 0.3277, "step": 9812 }, { "epoch": 0.56, "grad_norm": 0.43378655974248065, "learning_rate": 8.426649792966679e-06, "loss": 0.2564, "step": 9813 }, { "epoch": 0.56, "grad_norm": 0.3671253402319884, "learning_rate": 8.424812084753297e-06, "loss": 0.2683, "step": 9814 }, { "epoch": 0.56, "grad_norm": 0.6749395956942922, "learning_rate": 8.422974431088607e-06, "loss": 0.3903, "step": 9815 }, { "epoch": 0.56, "grad_norm": 0.2000129236440116, "learning_rate": 8.421136832036242e-06, "loss": 0.1704, "step": 9816 }, { "epoch": 0.56, "grad_norm": 0.3516750238446244, "learning_rate": 8.419299287659844e-06, "loss": 0.2996, "step": 9817 }, { "epoch": 0.56, "grad_norm": 1.055794532902847, "learning_rate": 8.417461798023042e-06, "loss": 0.6188, "step": 9818 }, { "epoch": 0.56, "grad_norm": 0.4547799461366989, "learning_rate": 8.41562436318947e-06, "loss": 0.3617, "step": 9819 }, { "epoch": 0.56, "grad_norm": 0.28796199443351, "learning_rate": 8.413786983222758e-06, "loss": 0.2079, "step": 9820 }, { "epoch": 0.56, "grad_norm": 0.828814734820839, "learning_rate": 8.411949658186536e-06, "loss": 0.3814, "step": 9821 }, { "epoch": 0.56, "grad_norm": 0.29849571473835407, "learning_rate": 8.410112388144426e-06, "loss": 0.2056, "step": 9822 }, { "epoch": 0.56, "grad_norm": 0.3716093461533886, "learning_rate": 8.408275173160059e-06, "loss": 0.207, "step": 9823 }, { "epoch": 0.56, "grad_norm": 0.3931711629255428, "learning_rate": 8.406438013297052e-06, "loss": 0.3198, "step": 9824 }, { "epoch": 0.56, "grad_norm": 0.8020438520926569, "learning_rate": 8.404600908619033e-06, "loss": 0.5594, "step": 9825 }, { "epoch": 0.56, "grad_norm": 0.39734590436170697, "learning_rate": 8.40276385918961e-06, "loss": 0.2346, "step": 9826 }, { "epoch": 0.56, "grad_norm": 0.8506087767154734, "learning_rate": 8.40092686507241e-06, "loss": 0.4187, "step": 9827 }, { "epoch": 0.56, "grad_norm": 0.255074633086988, "learning_rate": 8.39908992633104e-06, "loss": 0.2158, "step": 9828 }, { "epoch": 0.56, "grad_norm": 0.31639947374690586, "learning_rate": 8.39725304302912e-06, "loss": 0.1998, "step": 9829 }, { "epoch": 0.56, "grad_norm": 1.2054591386012041, "learning_rate": 8.395416215230255e-06, "loss": 0.7528, "step": 9830 }, { "epoch": 0.56, "grad_norm": 0.4489425221494689, "learning_rate": 8.39357944299806e-06, "loss": 0.3473, "step": 9831 }, { "epoch": 0.56, "grad_norm": 0.3019501530801851, "learning_rate": 8.391742726396138e-06, "loss": 0.2471, "step": 9832 }, { "epoch": 0.56, "grad_norm": 0.6973811792398468, "learning_rate": 8.389906065488099e-06, "loss": 0.3255, "step": 9833 }, { "epoch": 0.57, "grad_norm": 0.28463618303054367, "learning_rate": 8.38806946033754e-06, "loss": 0.1894, "step": 9834 }, { "epoch": 0.57, "grad_norm": 0.35151178079415824, "learning_rate": 8.386232911008069e-06, "loss": 0.2611, "step": 9835 }, { "epoch": 0.57, "grad_norm": 0.40041939562222845, "learning_rate": 8.38439641756328e-06, "loss": 0.2496, "step": 9836 }, { "epoch": 0.57, "grad_norm": 0.876402219608769, "learning_rate": 8.382559980066778e-06, "loss": 0.4565, "step": 9837 }, { "epoch": 0.57, "grad_norm": 0.31643695032595764, "learning_rate": 8.380723598582152e-06, "loss": 0.2451, "step": 9838 }, { "epoch": 0.57, "grad_norm": 0.9533380440004469, "learning_rate": 8.378887273172997e-06, "loss": 0.3012, "step": 9839 }, { "epoch": 0.57, "grad_norm": 0.20603783465622966, "learning_rate": 8.37705100390291e-06, "loss": 0.2236, "step": 9840 }, { "epoch": 0.57, "grad_norm": 0.37732886480516004, "learning_rate": 8.375214790835471e-06, "loss": 0.2641, "step": 9841 }, { "epoch": 0.57, "grad_norm": 0.48964154358706147, "learning_rate": 8.37337863403428e-06, "loss": 0.2918, "step": 9842 }, { "epoch": 0.57, "grad_norm": 0.4250539613089976, "learning_rate": 8.371542533562912e-06, "loss": 0.3089, "step": 9843 }, { "epoch": 0.57, "grad_norm": 0.35552109627010425, "learning_rate": 8.369706489484958e-06, "loss": 0.2745, "step": 9844 }, { "epoch": 0.57, "grad_norm": 0.9900011631599458, "learning_rate": 8.367870501863999e-06, "loss": 0.4337, "step": 9845 }, { "epoch": 0.57, "grad_norm": 0.18633542192754032, "learning_rate": 8.366034570763614e-06, "loss": 0.0689, "step": 9846 }, { "epoch": 0.57, "grad_norm": 0.3414535184442479, "learning_rate": 8.36419869624738e-06, "loss": 0.2813, "step": 9847 }, { "epoch": 0.57, "grad_norm": 0.37358328219071146, "learning_rate": 8.362362878378876e-06, "loss": 0.329, "step": 9848 }, { "epoch": 0.57, "grad_norm": 0.8248357384042281, "learning_rate": 8.360527117221675e-06, "loss": 0.3291, "step": 9849 }, { "epoch": 0.57, "grad_norm": 0.36147984520054255, "learning_rate": 8.358691412839351e-06, "loss": 0.3034, "step": 9850 }, { "epoch": 0.57, "grad_norm": 0.5104748583363516, "learning_rate": 8.35685576529547e-06, "loss": 0.3727, "step": 9851 }, { "epoch": 0.57, "grad_norm": 0.40198951064850447, "learning_rate": 8.355020174653605e-06, "loss": 0.2165, "step": 9852 }, { "epoch": 0.57, "grad_norm": 0.24647184833766747, "learning_rate": 8.35318464097732e-06, "loss": 0.2172, "step": 9853 }, { "epoch": 0.57, "grad_norm": 0.3810295500607417, "learning_rate": 8.35134916433018e-06, "loss": 0.2782, "step": 9854 }, { "epoch": 0.57, "grad_norm": 0.3857340949697676, "learning_rate": 8.349513744775748e-06, "loss": 0.289, "step": 9855 }, { "epoch": 0.57, "grad_norm": 0.34685648534680974, "learning_rate": 8.347678382377584e-06, "loss": 0.2889, "step": 9856 }, { "epoch": 0.57, "grad_norm": 1.0213706600766048, "learning_rate": 8.345843077199247e-06, "loss": 0.604, "step": 9857 }, { "epoch": 0.57, "grad_norm": 0.43460712379136257, "learning_rate": 8.344007829304291e-06, "loss": 0.2663, "step": 9858 }, { "epoch": 0.57, "grad_norm": 0.2396752936639507, "learning_rate": 8.342172638756276e-06, "loss": 0.1397, "step": 9859 }, { "epoch": 0.57, "grad_norm": 0.3169017785415661, "learning_rate": 8.34033750561875e-06, "loss": 0.29, "step": 9860 }, { "epoch": 0.57, "grad_norm": 0.9931863567969424, "learning_rate": 8.338502429955264e-06, "loss": 0.4568, "step": 9861 }, { "epoch": 0.57, "grad_norm": 0.3480195681104575, "learning_rate": 8.33666741182937e-06, "loss": 0.1948, "step": 9862 }, { "epoch": 0.57, "grad_norm": 0.37565505477248806, "learning_rate": 8.334832451304607e-06, "loss": 0.3106, "step": 9863 }, { "epoch": 0.57, "grad_norm": 0.45130905555980977, "learning_rate": 8.332997548444528e-06, "loss": 0.3433, "step": 9864 }, { "epoch": 0.57, "grad_norm": 0.3033398640505855, "learning_rate": 8.331162703312671e-06, "loss": 0.1934, "step": 9865 }, { "epoch": 0.57, "grad_norm": 0.5363775633765079, "learning_rate": 8.329327915972578e-06, "loss": 0.2919, "step": 9866 }, { "epoch": 0.57, "grad_norm": 0.747351620662827, "learning_rate": 8.32749318648779e-06, "loss": 0.3933, "step": 9867 }, { "epoch": 0.57, "grad_norm": 0.26553280669644175, "learning_rate": 8.325658514921838e-06, "loss": 0.211, "step": 9868 }, { "epoch": 0.57, "grad_norm": 0.4145040507342814, "learning_rate": 8.32382390133826e-06, "loss": 0.227, "step": 9869 }, { "epoch": 0.57, "grad_norm": 1.1950814437419346, "learning_rate": 8.321989345800587e-06, "loss": 0.6061, "step": 9870 }, { "epoch": 0.57, "grad_norm": 0.3002830485967934, "learning_rate": 8.320154848372353e-06, "loss": 0.2718, "step": 9871 }, { "epoch": 0.57, "grad_norm": 0.3226666488160495, "learning_rate": 8.318320409117082e-06, "loss": 0.2688, "step": 9872 }, { "epoch": 0.57, "grad_norm": 0.37089315357089786, "learning_rate": 8.316486028098306e-06, "loss": 0.2992, "step": 9873 }, { "epoch": 0.57, "grad_norm": 0.29002209727779943, "learning_rate": 8.314651705379544e-06, "loss": 0.2231, "step": 9874 }, { "epoch": 0.57, "grad_norm": 0.3455087689241484, "learning_rate": 8.312817441024324e-06, "loss": 0.2336, "step": 9875 }, { "epoch": 0.57, "grad_norm": 0.5069761719351095, "learning_rate": 8.31098323509616e-06, "loss": 0.3668, "step": 9876 }, { "epoch": 0.57, "grad_norm": 0.38171523506523025, "learning_rate": 8.309149087658576e-06, "loss": 0.278, "step": 9877 }, { "epoch": 0.57, "grad_norm": 0.37069169080183434, "learning_rate": 8.307314998775087e-06, "loss": 0.2917, "step": 9878 }, { "epoch": 0.57, "grad_norm": 0.3392341659426202, "learning_rate": 8.305480968509204e-06, "loss": 0.3168, "step": 9879 }, { "epoch": 0.57, "grad_norm": 0.5341042702280032, "learning_rate": 8.303646996924445e-06, "loss": 0.3165, "step": 9880 }, { "epoch": 0.57, "grad_norm": 0.2169102274872214, "learning_rate": 8.301813084084315e-06, "loss": 0.1475, "step": 9881 }, { "epoch": 0.57, "grad_norm": 0.5998300732121216, "learning_rate": 8.299979230052327e-06, "loss": 0.4361, "step": 9882 }, { "epoch": 0.57, "grad_norm": 0.31913217855811254, "learning_rate": 8.298145434891983e-06, "loss": 0.2759, "step": 9883 }, { "epoch": 0.57, "grad_norm": 0.3384502713432209, "learning_rate": 8.296311698666792e-06, "loss": 0.3219, "step": 9884 }, { "epoch": 0.57, "grad_norm": 0.7524840483991162, "learning_rate": 8.29447802144025e-06, "loss": 0.4665, "step": 9885 }, { "epoch": 0.57, "grad_norm": 0.2888469947900723, "learning_rate": 8.292644403275865e-06, "loss": 0.2242, "step": 9886 }, { "epoch": 0.57, "grad_norm": 0.2532786012933536, "learning_rate": 8.290810844237128e-06, "loss": 0.2485, "step": 9887 }, { "epoch": 0.57, "grad_norm": 0.7263770853364647, "learning_rate": 8.28897734438754e-06, "loss": 0.2903, "step": 9888 }, { "epoch": 0.57, "grad_norm": 0.3729535266775858, "learning_rate": 8.28714390379059e-06, "loss": 0.2678, "step": 9889 }, { "epoch": 0.57, "grad_norm": 0.4971089032472461, "learning_rate": 8.285310522509777e-06, "loss": 0.3529, "step": 9890 }, { "epoch": 0.57, "grad_norm": 0.32657011764466537, "learning_rate": 8.283477200608585e-06, "loss": 0.2734, "step": 9891 }, { "epoch": 0.57, "grad_norm": 0.4085045169813695, "learning_rate": 8.281643938150504e-06, "loss": 0.2973, "step": 9892 }, { "epoch": 0.57, "grad_norm": 0.25379677627879427, "learning_rate": 8.27981073519902e-06, "loss": 0.1814, "step": 9893 }, { "epoch": 0.57, "grad_norm": 0.8729493209016004, "learning_rate": 8.277977591817617e-06, "loss": 0.3863, "step": 9894 }, { "epoch": 0.57, "grad_norm": 0.30599910372112143, "learning_rate": 8.276144508069775e-06, "loss": 0.2213, "step": 9895 }, { "epoch": 0.57, "grad_norm": 0.38495626365299385, "learning_rate": 8.274311484018975e-06, "loss": 0.3128, "step": 9896 }, { "epoch": 0.57, "grad_norm": 0.9544093017904444, "learning_rate": 8.27247851972869e-06, "loss": 0.6904, "step": 9897 }, { "epoch": 0.57, "grad_norm": 0.4332396370100671, "learning_rate": 8.270645615262405e-06, "loss": 0.1635, "step": 9898 }, { "epoch": 0.57, "grad_norm": 0.21958238359771295, "learning_rate": 8.268812770683583e-06, "loss": 0.2311, "step": 9899 }, { "epoch": 0.57, "grad_norm": 0.7528076032824318, "learning_rate": 8.266979986055704e-06, "loss": 0.3034, "step": 9900 }, { "epoch": 0.57, "grad_norm": 0.2277184270609133, "learning_rate": 8.265147261442232e-06, "loss": 0.0727, "step": 9901 }, { "epoch": 0.57, "grad_norm": 0.3900048604849335, "learning_rate": 8.263314596906636e-06, "loss": 0.3282, "step": 9902 }, { "epoch": 0.57, "grad_norm": 0.3516000182623754, "learning_rate": 8.261481992512382e-06, "loss": 0.3266, "step": 9903 }, { "epoch": 0.57, "grad_norm": 0.41670733313075314, "learning_rate": 8.25964944832293e-06, "loss": 0.1948, "step": 9904 }, { "epoch": 0.57, "grad_norm": 0.309400465234977, "learning_rate": 8.257816964401745e-06, "loss": 0.2583, "step": 9905 }, { "epoch": 0.57, "grad_norm": 0.4465142891914701, "learning_rate": 8.255984540812281e-06, "loss": 0.3008, "step": 9906 }, { "epoch": 0.57, "grad_norm": 0.28221141190811483, "learning_rate": 8.254152177618e-06, "loss": 0.2664, "step": 9907 }, { "epoch": 0.57, "grad_norm": 0.3505287455300988, "learning_rate": 8.252319874882351e-06, "loss": 0.2389, "step": 9908 }, { "epoch": 0.57, "grad_norm": 1.0994942283600821, "learning_rate": 8.250487632668793e-06, "loss": 0.7445, "step": 9909 }, { "epoch": 0.57, "grad_norm": 0.522218227682, "learning_rate": 8.248655451040768e-06, "loss": 0.2764, "step": 9910 }, { "epoch": 0.57, "grad_norm": 0.2631576808456909, "learning_rate": 8.246823330061734e-06, "loss": 0.2347, "step": 9911 }, { "epoch": 0.57, "grad_norm": 0.40714845276580147, "learning_rate": 8.24499126979513e-06, "loss": 0.281, "step": 9912 }, { "epoch": 0.57, "grad_norm": 0.28989171280742454, "learning_rate": 8.243159270304406e-06, "loss": 0.1528, "step": 9913 }, { "epoch": 0.57, "grad_norm": 0.3758477950586018, "learning_rate": 8.241327331652997e-06, "loss": 0.2369, "step": 9914 }, { "epoch": 0.57, "grad_norm": 0.35371479814125156, "learning_rate": 8.23949545390435e-06, "loss": 0.3125, "step": 9915 }, { "epoch": 0.57, "grad_norm": 0.7429248777607208, "learning_rate": 8.237663637121897e-06, "loss": 0.4045, "step": 9916 }, { "epoch": 0.57, "grad_norm": 0.32476985877232545, "learning_rate": 8.23583188136908e-06, "loss": 0.2445, "step": 9917 }, { "epoch": 0.57, "grad_norm": 0.2535536568991639, "learning_rate": 8.234000186709327e-06, "loss": 0.2115, "step": 9918 }, { "epoch": 0.57, "grad_norm": 0.424295206181053, "learning_rate": 8.232168553206072e-06, "loss": 0.3238, "step": 9919 }, { "epoch": 0.57, "grad_norm": 0.32505346775808885, "learning_rate": 8.230336980922744e-06, "loss": 0.2858, "step": 9920 }, { "epoch": 0.57, "grad_norm": 0.9237249641595617, "learning_rate": 8.228505469922769e-06, "loss": 0.4477, "step": 9921 }, { "epoch": 0.57, "grad_norm": 0.4630004104417319, "learning_rate": 8.226674020269576e-06, "loss": 0.3362, "step": 9922 }, { "epoch": 0.57, "grad_norm": 0.2790719952889474, "learning_rate": 8.224842632026583e-06, "loss": 0.2751, "step": 9923 }, { "epoch": 0.57, "grad_norm": 0.7399588708936765, "learning_rate": 8.223011305257214e-06, "loss": 0.2648, "step": 9924 }, { "epoch": 0.57, "grad_norm": 0.24700257914947743, "learning_rate": 8.221180040024887e-06, "loss": 0.1378, "step": 9925 }, { "epoch": 0.57, "grad_norm": 0.36109041110206347, "learning_rate": 8.21934883639302e-06, "loss": 0.2974, "step": 9926 }, { "epoch": 0.57, "grad_norm": 0.36150839242761174, "learning_rate": 8.217517694425027e-06, "loss": 0.2756, "step": 9927 }, { "epoch": 0.57, "grad_norm": 0.5338739079601947, "learning_rate": 8.215686614184317e-06, "loss": 0.3363, "step": 9928 }, { "epoch": 0.57, "grad_norm": 0.3856577519384225, "learning_rate": 8.213855595734306e-06, "loss": 0.3088, "step": 9929 }, { "epoch": 0.57, "grad_norm": 0.4388607395567644, "learning_rate": 8.212024639138398e-06, "loss": 0.2918, "step": 9930 }, { "epoch": 0.57, "grad_norm": 0.22893949698281893, "learning_rate": 8.210193744459997e-06, "loss": 0.1762, "step": 9931 }, { "epoch": 0.57, "grad_norm": 0.38411850632766775, "learning_rate": 8.20836291176251e-06, "loss": 0.3018, "step": 9932 }, { "epoch": 0.57, "grad_norm": 0.7225551464888974, "learning_rate": 8.20653214110934e-06, "loss": 0.5503, "step": 9933 }, { "epoch": 0.57, "grad_norm": 0.4470092332721713, "learning_rate": 8.204701432563886e-06, "loss": 0.2851, "step": 9934 }, { "epoch": 0.57, "grad_norm": 0.28828857751457004, "learning_rate": 8.202870786189541e-06, "loss": 0.2635, "step": 9935 }, { "epoch": 0.57, "grad_norm": 0.9999090217246076, "learning_rate": 8.201040202049705e-06, "loss": 0.6211, "step": 9936 }, { "epoch": 0.57, "grad_norm": 0.1682762823248837, "learning_rate": 8.199209680207768e-06, "loss": 0.0922, "step": 9937 }, { "epoch": 0.57, "grad_norm": 0.35653206640661295, "learning_rate": 8.197379220727124e-06, "loss": 0.2672, "step": 9938 }, { "epoch": 0.57, "grad_norm": 0.34611855850166906, "learning_rate": 8.19554882367116e-06, "loss": 0.3211, "step": 9939 }, { "epoch": 0.57, "grad_norm": 0.7223852385709048, "learning_rate": 8.193718489103261e-06, "loss": 0.3111, "step": 9940 }, { "epoch": 0.57, "grad_norm": 0.3878265662993998, "learning_rate": 8.191888217086813e-06, "loss": 0.3133, "step": 9941 }, { "epoch": 0.57, "grad_norm": 0.9687515986579172, "learning_rate": 8.190058007685203e-06, "loss": 0.5916, "step": 9942 }, { "epoch": 0.57, "grad_norm": 0.21520660321755714, "learning_rate": 8.188227860961804e-06, "loss": 0.1676, "step": 9943 }, { "epoch": 0.57, "grad_norm": 0.343814550581402, "learning_rate": 8.186397776979992e-06, "loss": 0.2711, "step": 9944 }, { "epoch": 0.57, "grad_norm": 0.9159296933741887, "learning_rate": 8.184567755803153e-06, "loss": 0.4973, "step": 9945 }, { "epoch": 0.57, "grad_norm": 0.38092338926104646, "learning_rate": 8.18273779749465e-06, "loss": 0.3144, "step": 9946 }, { "epoch": 0.57, "grad_norm": 0.29189600354117673, "learning_rate": 8.180907902117862e-06, "loss": 0.1929, "step": 9947 }, { "epoch": 0.57, "grad_norm": 1.1424900738622954, "learning_rate": 8.179078069736152e-06, "loss": 0.6561, "step": 9948 }, { "epoch": 0.57, "grad_norm": 0.22268099279160083, "learning_rate": 8.177248300412893e-06, "loss": 0.1552, "step": 9949 }, { "epoch": 0.57, "grad_norm": 0.3583785102766591, "learning_rate": 8.175418594211445e-06, "loss": 0.2161, "step": 9950 }, { "epoch": 0.57, "grad_norm": 0.5628579774742724, "learning_rate": 8.173588951195175e-06, "loss": 0.3217, "step": 9951 }, { "epoch": 0.57, "grad_norm": 0.9119904572848944, "learning_rate": 8.171759371427439e-06, "loss": 0.492, "step": 9952 }, { "epoch": 0.57, "grad_norm": 0.2712431380235841, "learning_rate": 8.169929854971598e-06, "loss": 0.2144, "step": 9953 }, { "epoch": 0.57, "grad_norm": 0.41873206115862327, "learning_rate": 8.168100401891007e-06, "loss": 0.3233, "step": 9954 }, { "epoch": 0.57, "grad_norm": 0.41447916444623933, "learning_rate": 8.166271012249022e-06, "loss": 0.26, "step": 9955 }, { "epoch": 0.57, "grad_norm": 0.2745882143555028, "learning_rate": 8.164441686108991e-06, "loss": 0.1833, "step": 9956 }, { "epoch": 0.57, "grad_norm": 0.8934275316001319, "learning_rate": 8.162612423534266e-06, "loss": 0.4585, "step": 9957 }, { "epoch": 0.57, "grad_norm": 0.37576440877617384, "learning_rate": 8.160783224588196e-06, "loss": 0.3174, "step": 9958 }, { "epoch": 0.57, "grad_norm": 0.34228438562146457, "learning_rate": 8.15895408933412e-06, "loss": 0.2592, "step": 9959 }, { "epoch": 0.57, "grad_norm": 1.0015700431321963, "learning_rate": 8.157125017835389e-06, "loss": 0.2788, "step": 9960 }, { "epoch": 0.57, "grad_norm": 0.3365784712512407, "learning_rate": 8.15529601015534e-06, "loss": 0.2207, "step": 9961 }, { "epoch": 0.57, "grad_norm": 0.3051365494564319, "learning_rate": 8.153467066357305e-06, "loss": 0.2439, "step": 9962 }, { "epoch": 0.57, "grad_norm": 0.4885135862243723, "learning_rate": 8.15163818650463e-06, "loss": 0.3013, "step": 9963 }, { "epoch": 0.57, "grad_norm": 0.38036239490366125, "learning_rate": 8.149809370660643e-06, "loss": 0.2774, "step": 9964 }, { "epoch": 0.57, "grad_norm": 0.39821866202603395, "learning_rate": 8.14798061888868e-06, "loss": 0.2925, "step": 9965 }, { "epoch": 0.57, "grad_norm": 0.3459147936489671, "learning_rate": 8.146151931252067e-06, "loss": 0.2577, "step": 9966 }, { "epoch": 0.57, "grad_norm": 0.590203539009197, "learning_rate": 8.144323307814133e-06, "loss": 0.3359, "step": 9967 }, { "epoch": 0.57, "grad_norm": 0.3644950630409327, "learning_rate": 8.142494748638204e-06, "loss": 0.2875, "step": 9968 }, { "epoch": 0.57, "grad_norm": 0.38221492079631614, "learning_rate": 8.140666253787602e-06, "loss": 0.2538, "step": 9969 }, { "epoch": 0.57, "grad_norm": 0.399983798988496, "learning_rate": 8.138837823325647e-06, "loss": 0.3374, "step": 9970 }, { "epoch": 0.57, "grad_norm": 0.35302886052202936, "learning_rate": 8.137009457315658e-06, "loss": 0.2862, "step": 9971 }, { "epoch": 0.57, "grad_norm": 0.40507971925164943, "learning_rate": 8.135181155820953e-06, "loss": 0.2151, "step": 9972 }, { "epoch": 0.57, "grad_norm": 0.5666935537735343, "learning_rate": 8.13335291890484e-06, "loss": 0.3137, "step": 9973 }, { "epoch": 0.57, "grad_norm": 0.32067561902428615, "learning_rate": 8.13152474663064e-06, "loss": 0.2686, "step": 9974 }, { "epoch": 0.57, "grad_norm": 0.33814247044172163, "learning_rate": 8.129696639061654e-06, "loss": 0.286, "step": 9975 }, { "epoch": 0.57, "grad_norm": 0.3246915939454487, "learning_rate": 8.127868596261198e-06, "loss": 0.1764, "step": 9976 }, { "epoch": 0.57, "grad_norm": 0.26615674398341954, "learning_rate": 8.126040618292566e-06, "loss": 0.2069, "step": 9977 }, { "epoch": 0.57, "grad_norm": 0.4585736080937011, "learning_rate": 8.124212705219071e-06, "loss": 0.3394, "step": 9978 }, { "epoch": 0.57, "grad_norm": 0.4703515008785008, "learning_rate": 8.122384857104006e-06, "loss": 0.2791, "step": 9979 }, { "epoch": 0.57, "grad_norm": 0.3081159193556577, "learning_rate": 8.120557074010677e-06, "loss": 0.251, "step": 9980 }, { "epoch": 0.57, "grad_norm": 1.0902723825131568, "learning_rate": 8.118729356002371e-06, "loss": 0.6268, "step": 9981 }, { "epoch": 0.57, "grad_norm": 0.3609668953923149, "learning_rate": 8.11690170314239e-06, "loss": 0.3442, "step": 9982 }, { "epoch": 0.57, "grad_norm": 0.23808650024305908, "learning_rate": 8.115074115494022e-06, "loss": 0.1346, "step": 9983 }, { "epoch": 0.57, "grad_norm": 0.34721727640723965, "learning_rate": 8.113246593120554e-06, "loss": 0.2742, "step": 9984 }, { "epoch": 0.57, "grad_norm": 0.8939576818059674, "learning_rate": 8.111419136085278e-06, "loss": 0.4012, "step": 9985 }, { "epoch": 0.57, "grad_norm": 0.37324261637531364, "learning_rate": 8.109591744451472e-06, "loss": 0.2016, "step": 9986 }, { "epoch": 0.57, "grad_norm": 0.407045014028079, "learning_rate": 8.107764418282427e-06, "loss": 0.3083, "step": 9987 }, { "epoch": 0.57, "grad_norm": 1.0490869448387106, "learning_rate": 8.105937157641416e-06, "loss": 0.6916, "step": 9988 }, { "epoch": 0.57, "grad_norm": 0.2294748663492301, "learning_rate": 8.104109962591722e-06, "loss": 0.1641, "step": 9989 }, { "epoch": 0.57, "grad_norm": 0.28437388048864876, "learning_rate": 8.102282833196616e-06, "loss": 0.2555, "step": 9990 }, { "epoch": 0.57, "grad_norm": 0.774892354817154, "learning_rate": 8.100455769519377e-06, "loss": 0.4188, "step": 9991 }, { "epoch": 0.57, "grad_norm": 0.29569982792830685, "learning_rate": 8.09862877162327e-06, "loss": 0.1961, "step": 9992 }, { "epoch": 0.57, "grad_norm": 1.0072369091205406, "learning_rate": 8.096801839571569e-06, "loss": 0.6177, "step": 9993 }, { "epoch": 0.57, "grad_norm": 0.3855864777216197, "learning_rate": 8.094974973427541e-06, "loss": 0.3251, "step": 9994 }, { "epoch": 0.57, "grad_norm": 0.39142238746136504, "learning_rate": 8.093148173254445e-06, "loss": 0.2776, "step": 9995 }, { "epoch": 0.57, "grad_norm": 0.23644514454316207, "learning_rate": 8.091321439115543e-06, "loss": 0.1532, "step": 9996 }, { "epoch": 0.57, "grad_norm": 0.7087803679347151, "learning_rate": 8.089494771074102e-06, "loss": 0.4177, "step": 9997 }, { "epoch": 0.57, "grad_norm": 0.2797651307049413, "learning_rate": 8.08766816919337e-06, "loss": 0.2555, "step": 9998 }, { "epoch": 0.57, "grad_norm": 0.4959220267266461, "learning_rate": 8.085841633536611e-06, "loss": 0.2582, "step": 9999 }, { "epoch": 0.57, "grad_norm": 1.0009644158738145, "learning_rate": 8.084015164167071e-06, "loss": 0.6752, "step": 10000 }, { "epoch": 0.57, "grad_norm": 0.32313335118258313, "learning_rate": 8.082188761148007e-06, "loss": 0.1926, "step": 10001 }, { "epoch": 0.57, "grad_norm": 0.2669066233121326, "learning_rate": 8.08036242454266e-06, "loss": 0.2554, "step": 10002 }, { "epoch": 0.57, "grad_norm": 0.4574088631849031, "learning_rate": 8.078536154414283e-06, "loss": 0.2827, "step": 10003 }, { "epoch": 0.57, "grad_norm": 0.507204777496856, "learning_rate": 8.076709950826113e-06, "loss": 0.2304, "step": 10004 }, { "epoch": 0.57, "grad_norm": 0.38781786620956266, "learning_rate": 8.074883813841397e-06, "loss": 0.2686, "step": 10005 }, { "epoch": 0.57, "grad_norm": 0.36301702077513615, "learning_rate": 8.073057743523371e-06, "loss": 0.3049, "step": 10006 }, { "epoch": 0.57, "grad_norm": 0.4861354405825026, "learning_rate": 8.071231739935272e-06, "loss": 0.3207, "step": 10007 }, { "epoch": 0.58, "grad_norm": 0.35373086697907874, "learning_rate": 8.069405803140338e-06, "loss": 0.3251, "step": 10008 }, { "epoch": 0.58, "grad_norm": 0.2171049695260447, "learning_rate": 8.067579933201793e-06, "loss": 0.0853, "step": 10009 }, { "epoch": 0.58, "grad_norm": 0.26086832974147256, "learning_rate": 8.065754130182876e-06, "loss": 0.2698, "step": 10010 }, { "epoch": 0.58, "grad_norm": 0.5370388860704075, "learning_rate": 8.063928394146806e-06, "loss": 0.4125, "step": 10011 }, { "epoch": 0.58, "grad_norm": 0.7740071781472809, "learning_rate": 8.062102725156818e-06, "loss": 0.3091, "step": 10012 }, { "epoch": 0.58, "grad_norm": 0.36948235193836254, "learning_rate": 8.060277123276125e-06, "loss": 0.3018, "step": 10013 }, { "epoch": 0.58, "grad_norm": 0.40449012511592974, "learning_rate": 8.058451588567954e-06, "loss": 0.3211, "step": 10014 }, { "epoch": 0.58, "grad_norm": 0.23658106450923316, "learning_rate": 8.05662612109552e-06, "loss": 0.1578, "step": 10015 }, { "epoch": 0.58, "grad_norm": 0.3824604885675158, "learning_rate": 8.05480072092204e-06, "loss": 0.2864, "step": 10016 }, { "epoch": 0.58, "grad_norm": 0.5705382545976666, "learning_rate": 8.052975388110726e-06, "loss": 0.3922, "step": 10017 }, { "epoch": 0.58, "grad_norm": 0.3746308523465573, "learning_rate": 8.051150122724793e-06, "loss": 0.2776, "step": 10018 }, { "epoch": 0.58, "grad_norm": 0.6099067258468585, "learning_rate": 8.049324924827447e-06, "loss": 0.3158, "step": 10019 }, { "epoch": 0.58, "grad_norm": 0.38826376456495426, "learning_rate": 8.047499794481894e-06, "loss": 0.3084, "step": 10020 }, { "epoch": 0.58, "grad_norm": 0.24314199497277397, "learning_rate": 8.045674731751338e-06, "loss": 0.2092, "step": 10021 }, { "epoch": 0.58, "grad_norm": 0.31113245462098016, "learning_rate": 8.043849736698986e-06, "loss": 0.1859, "step": 10022 }, { "epoch": 0.58, "grad_norm": 0.37576660120750494, "learning_rate": 8.04202480938803e-06, "loss": 0.3239, "step": 10023 }, { "epoch": 0.58, "grad_norm": 0.7344082199207784, "learning_rate": 8.040199949881672e-06, "loss": 0.4354, "step": 10024 }, { "epoch": 0.58, "grad_norm": 0.45688911337935884, "learning_rate": 8.038375158243108e-06, "loss": 0.2071, "step": 10025 }, { "epoch": 0.58, "grad_norm": 0.2761160102991935, "learning_rate": 8.036550434535522e-06, "loss": 0.2761, "step": 10026 }, { "epoch": 0.58, "grad_norm": 1.061528670972983, "learning_rate": 8.034725778822114e-06, "loss": 0.5102, "step": 10027 }, { "epoch": 0.58, "grad_norm": 0.16482060521116781, "learning_rate": 8.032901191166071e-06, "loss": 0.0969, "step": 10028 }, { "epoch": 0.58, "grad_norm": 0.30696431779330907, "learning_rate": 8.03107667163057e-06, "loss": 0.2962, "step": 10029 }, { "epoch": 0.58, "grad_norm": 0.5561323310489525, "learning_rate": 8.029252220278802e-06, "loss": 0.3496, "step": 10030 }, { "epoch": 0.58, "grad_norm": 0.45238876227242075, "learning_rate": 8.02742783717394e-06, "loss": 0.2456, "step": 10031 }, { "epoch": 0.58, "grad_norm": 0.5396238256737506, "learning_rate": 8.025603522379172e-06, "loss": 0.3749, "step": 10032 }, { "epoch": 0.58, "grad_norm": 0.3195175238113092, "learning_rate": 8.023779275957668e-06, "loss": 0.2389, "step": 10033 }, { "epoch": 0.58, "grad_norm": 0.24479568402304927, "learning_rate": 8.021955097972602e-06, "loss": 0.2131, "step": 10034 }, { "epoch": 0.58, "grad_norm": 0.3826033703959546, "learning_rate": 8.020130988487146e-06, "loss": 0.2464, "step": 10035 }, { "epoch": 0.58, "grad_norm": 0.7693896830289918, "learning_rate": 8.01830694756447e-06, "loss": 0.4276, "step": 10036 }, { "epoch": 0.58, "grad_norm": 0.3963773983506641, "learning_rate": 8.016482975267738e-06, "loss": 0.2943, "step": 10037 }, { "epoch": 0.58, "grad_norm": 0.28363379615809237, "learning_rate": 8.014659071660113e-06, "loss": 0.2321, "step": 10038 }, { "epoch": 0.58, "grad_norm": 0.41409455127608996, "learning_rate": 8.012835236804764e-06, "loss": 0.2602, "step": 10039 }, { "epoch": 0.58, "grad_norm": 0.33374553909505866, "learning_rate": 8.01101147076484e-06, "loss": 0.2047, "step": 10040 }, { "epoch": 0.58, "grad_norm": 0.3514808777272365, "learning_rate": 8.009187773603508e-06, "loss": 0.246, "step": 10041 }, { "epoch": 0.58, "grad_norm": 0.3632666739681491, "learning_rate": 8.007364145383914e-06, "loss": 0.3166, "step": 10042 }, { "epoch": 0.58, "grad_norm": 0.809247352433276, "learning_rate": 8.005540586169216e-06, "loss": 0.4513, "step": 10043 }, { "epoch": 0.58, "grad_norm": 0.31999804489355055, "learning_rate": 8.003717096022561e-06, "loss": 0.2042, "step": 10044 }, { "epoch": 0.58, "grad_norm": 0.45816885307178257, "learning_rate": 8.001893675007098e-06, "loss": 0.3312, "step": 10045 }, { "epoch": 0.58, "grad_norm": 0.218345571007771, "learning_rate": 8.00007032318597e-06, "loss": 0.1845, "step": 10046 }, { "epoch": 0.58, "grad_norm": 0.337060392878494, "learning_rate": 7.99824704062232e-06, "loss": 0.2972, "step": 10047 }, { "epoch": 0.58, "grad_norm": 0.7301838529307065, "learning_rate": 7.996423827379292e-06, "loss": 0.3609, "step": 10048 }, { "epoch": 0.58, "grad_norm": 0.32799631732078843, "learning_rate": 7.994600683520018e-06, "loss": 0.3072, "step": 10049 }, { "epoch": 0.58, "grad_norm": 0.4005912109187204, "learning_rate": 7.992777609107638e-06, "loss": 0.3005, "step": 10050 }, { "epoch": 0.58, "grad_norm": 0.8157284221590041, "learning_rate": 7.99095460420528e-06, "loss": 0.3047, "step": 10051 }, { "epoch": 0.58, "grad_norm": 0.3615354345631635, "learning_rate": 7.989131668876081e-06, "loss": 0.2347, "step": 10052 }, { "epoch": 0.58, "grad_norm": 0.3831280211895221, "learning_rate": 7.987308803183164e-06, "loss": 0.3028, "step": 10053 }, { "epoch": 0.58, "grad_norm": 0.3977051857632209, "learning_rate": 7.985486007189658e-06, "loss": 0.2917, "step": 10054 }, { "epoch": 0.58, "grad_norm": 0.3577210765059678, "learning_rate": 7.983663280958682e-06, "loss": 0.2307, "step": 10055 }, { "epoch": 0.58, "grad_norm": 0.37861640099556787, "learning_rate": 7.981840624553364e-06, "loss": 0.2822, "step": 10056 }, { "epoch": 0.58, "grad_norm": 0.3719306516421558, "learning_rate": 7.980018038036815e-06, "loss": 0.313, "step": 10057 }, { "epoch": 0.58, "grad_norm": 0.7053848501492725, "learning_rate": 7.978195521472157e-06, "loss": 0.3032, "step": 10058 }, { "epoch": 0.58, "grad_norm": 0.3060816409264136, "learning_rate": 7.976373074922498e-06, "loss": 0.2589, "step": 10059 }, { "epoch": 0.58, "grad_norm": 0.7008923801011527, "learning_rate": 7.974550698450956e-06, "loss": 0.5585, "step": 10060 }, { "epoch": 0.58, "grad_norm": 0.22270529904066977, "learning_rate": 7.972728392120634e-06, "loss": 0.1837, "step": 10061 }, { "epoch": 0.58, "grad_norm": 0.2558814298202647, "learning_rate": 7.970906155994646e-06, "loss": 0.2174, "step": 10062 }, { "epoch": 0.58, "grad_norm": 1.0344165023559295, "learning_rate": 7.969083990136084e-06, "loss": 0.5085, "step": 10063 }, { "epoch": 0.58, "grad_norm": 0.7397533513729492, "learning_rate": 7.967261894608058e-06, "loss": 0.3023, "step": 10064 }, { "epoch": 0.58, "grad_norm": 0.298771462575836, "learning_rate": 7.965439869473664e-06, "loss": 0.2718, "step": 10065 }, { "epoch": 0.58, "grad_norm": 0.492680151076164, "learning_rate": 7.963617914796002e-06, "loss": 0.3987, "step": 10066 }, { "epoch": 0.58, "grad_norm": 0.17351851611022887, "learning_rate": 7.961796030638162e-06, "loss": 0.096, "step": 10067 }, { "epoch": 0.58, "grad_norm": 0.34316673447226265, "learning_rate": 7.95997421706324e-06, "loss": 0.2893, "step": 10068 }, { "epoch": 0.58, "grad_norm": 0.3809433763662325, "learning_rate": 7.95815247413432e-06, "loss": 0.3244, "step": 10069 }, { "epoch": 0.58, "grad_norm": 0.8279519992417633, "learning_rate": 7.956330801914495e-06, "loss": 0.3853, "step": 10070 }, { "epoch": 0.58, "grad_norm": 0.3236230187941723, "learning_rate": 7.954509200466845e-06, "loss": 0.2006, "step": 10071 }, { "epoch": 0.58, "grad_norm": 0.9845789891659482, "learning_rate": 7.952687669854453e-06, "loss": 0.7532, "step": 10072 }, { "epoch": 0.58, "grad_norm": 0.2719432805518241, "learning_rate": 7.950866210140401e-06, "loss": 0.2321, "step": 10073 }, { "epoch": 0.58, "grad_norm": 0.25669260749844697, "learning_rate": 7.949044821387761e-06, "loss": 0.1774, "step": 10074 }, { "epoch": 0.58, "grad_norm": 0.8589351191506319, "learning_rate": 7.947223503659613e-06, "loss": 0.4398, "step": 10075 }, { "epoch": 0.58, "grad_norm": 0.8104627329921519, "learning_rate": 7.945402257019026e-06, "loss": 0.4334, "step": 10076 }, { "epoch": 0.58, "grad_norm": 0.28954849191593923, "learning_rate": 7.943581081529072e-06, "loss": 0.2135, "step": 10077 }, { "epoch": 0.58, "grad_norm": 0.5205399924008268, "learning_rate": 7.941759977252815e-06, "loss": 0.4581, "step": 10078 }, { "epoch": 0.58, "grad_norm": 0.28954303976259166, "learning_rate": 7.939938944253321e-06, "loss": 0.1576, "step": 10079 }, { "epoch": 0.58, "grad_norm": 0.3201876263622221, "learning_rate": 7.938117982593653e-06, "loss": 0.203, "step": 10080 }, { "epoch": 0.58, "grad_norm": 0.6596627329890012, "learning_rate": 7.936297092336872e-06, "loss": 0.3682, "step": 10081 }, { "epoch": 0.58, "grad_norm": 0.5906861427138822, "learning_rate": 7.934476273546032e-06, "loss": 0.3605, "step": 10082 }, { "epoch": 0.58, "grad_norm": 0.30195190043595455, "learning_rate": 7.932655526284192e-06, "loss": 0.2666, "step": 10083 }, { "epoch": 0.58, "grad_norm": 0.992932228624963, "learning_rate": 7.930834850614399e-06, "loss": 0.5499, "step": 10084 }, { "epoch": 0.58, "grad_norm": 0.28475196579428813, "learning_rate": 7.92901424659971e-06, "loss": 0.2582, "step": 10085 }, { "epoch": 0.58, "grad_norm": 0.32554733501197314, "learning_rate": 7.927193714303166e-06, "loss": 0.2448, "step": 10086 }, { "epoch": 0.58, "grad_norm": 0.38443162661917063, "learning_rate": 7.925373253787817e-06, "loss": 0.2281, "step": 10087 }, { "epoch": 0.58, "grad_norm": 0.4907863733563514, "learning_rate": 7.923552865116701e-06, "loss": 0.3353, "step": 10088 }, { "epoch": 0.58, "grad_norm": 0.39379032559924637, "learning_rate": 7.92173254835286e-06, "loss": 0.2464, "step": 10089 }, { "epoch": 0.58, "grad_norm": 0.37205057590132345, "learning_rate": 7.919912303559334e-06, "loss": 0.2769, "step": 10090 }, { "epoch": 0.58, "grad_norm": 0.37416042075941736, "learning_rate": 7.91809213079915e-06, "loss": 0.2306, "step": 10091 }, { "epoch": 0.58, "grad_norm": 0.36138527919034713, "learning_rate": 7.916272030135353e-06, "loss": 0.2915, "step": 10092 }, { "epoch": 0.58, "grad_norm": 0.3876317520648567, "learning_rate": 7.91445200163096e-06, "loss": 0.2792, "step": 10093 }, { "epoch": 0.58, "grad_norm": 0.3951992348618812, "learning_rate": 7.912632045349008e-06, "loss": 0.2647, "step": 10094 }, { "epoch": 0.58, "grad_norm": 0.33494661203789694, "learning_rate": 7.910812161352517e-06, "loss": 0.2683, "step": 10095 }, { "epoch": 0.58, "grad_norm": 0.9173367452830046, "learning_rate": 7.908992349704515e-06, "loss": 0.7132, "step": 10096 }, { "epoch": 0.58, "grad_norm": 0.3487243075292584, "learning_rate": 7.907172610468015e-06, "loss": 0.2215, "step": 10097 }, { "epoch": 0.58, "grad_norm": 0.32608359551160127, "learning_rate": 7.905352943706035e-06, "loss": 0.2646, "step": 10098 }, { "epoch": 0.58, "grad_norm": 0.42778870571825733, "learning_rate": 7.903533349481596e-06, "loss": 0.2771, "step": 10099 }, { "epoch": 0.58, "grad_norm": 0.29251505997739846, "learning_rate": 7.901713827857705e-06, "loss": 0.1692, "step": 10100 }, { "epoch": 0.58, "grad_norm": 0.28561704547379524, "learning_rate": 7.899894378897374e-06, "loss": 0.275, "step": 10101 }, { "epoch": 0.58, "grad_norm": 1.1525092467625975, "learning_rate": 7.898075002663612e-06, "loss": 0.7976, "step": 10102 }, { "epoch": 0.58, "grad_norm": 0.6225456716829496, "learning_rate": 7.89625569921942e-06, "loss": 0.335, "step": 10103 }, { "epoch": 0.58, "grad_norm": 0.3281355566608314, "learning_rate": 7.894436468627804e-06, "loss": 0.2564, "step": 10104 }, { "epoch": 0.58, "grad_norm": 0.36381880629843233, "learning_rate": 7.892617310951761e-06, "loss": 0.3175, "step": 10105 }, { "epoch": 0.58, "grad_norm": 0.2159222699318665, "learning_rate": 7.890798226254291e-06, "loss": 0.1054, "step": 10106 }, { "epoch": 0.58, "grad_norm": 0.35946385596297437, "learning_rate": 7.888979214598387e-06, "loss": 0.2295, "step": 10107 }, { "epoch": 0.58, "grad_norm": 0.4936241577077283, "learning_rate": 7.887160276047045e-06, "loss": 0.376, "step": 10108 }, { "epoch": 0.58, "grad_norm": 0.33073585211025036, "learning_rate": 7.885341410663248e-06, "loss": 0.2954, "step": 10109 }, { "epoch": 0.58, "grad_norm": 0.34079160331467057, "learning_rate": 7.883522618509989e-06, "loss": 0.2056, "step": 10110 }, { "epoch": 0.58, "grad_norm": 0.351615537794108, "learning_rate": 7.881703899650249e-06, "loss": 0.2591, "step": 10111 }, { "epoch": 0.58, "grad_norm": 0.39871038149815147, "learning_rate": 7.879885254147014e-06, "loss": 0.2566, "step": 10112 }, { "epoch": 0.58, "grad_norm": 0.24185991194383902, "learning_rate": 7.878066682063262e-06, "loss": 0.2185, "step": 10113 }, { "epoch": 0.58, "grad_norm": 1.558532170715619, "learning_rate": 7.876248183461967e-06, "loss": 0.7215, "step": 10114 }, { "epoch": 0.58, "grad_norm": 0.6384019101854085, "learning_rate": 7.874429758406108e-06, "loss": 0.3869, "step": 10115 }, { "epoch": 0.58, "grad_norm": 0.3028402041371424, "learning_rate": 7.872611406958653e-06, "loss": 0.2154, "step": 10116 }, { "epoch": 0.58, "grad_norm": 0.3622538842476559, "learning_rate": 7.870793129182577e-06, "loss": 0.2996, "step": 10117 }, { "epoch": 0.58, "grad_norm": 0.27173806317093663, "learning_rate": 7.86897492514084e-06, "loss": 0.1832, "step": 10118 }, { "epoch": 0.58, "grad_norm": 0.30861485287327556, "learning_rate": 7.86715679489641e-06, "loss": 0.2098, "step": 10119 }, { "epoch": 0.58, "grad_norm": 1.2068126960800758, "learning_rate": 7.86533873851225e-06, "loss": 0.6609, "step": 10120 }, { "epoch": 0.58, "grad_norm": 0.36384977954229525, "learning_rate": 7.863520756051317e-06, "loss": 0.3154, "step": 10121 }, { "epoch": 0.58, "grad_norm": 0.4049015882904989, "learning_rate": 7.861702847576568e-06, "loss": 0.293, "step": 10122 }, { "epoch": 0.58, "grad_norm": 0.5157148425875564, "learning_rate": 7.859885013150959e-06, "loss": 0.2408, "step": 10123 }, { "epoch": 0.58, "grad_norm": 0.23292577160493733, "learning_rate": 7.858067252837437e-06, "loss": 0.2067, "step": 10124 }, { "epoch": 0.58, "grad_norm": 0.5576874400588665, "learning_rate": 7.856249566698957e-06, "loss": 0.3294, "step": 10125 }, { "epoch": 0.58, "grad_norm": 0.331359061847375, "learning_rate": 7.854431954798463e-06, "loss": 0.2648, "step": 10126 }, { "epoch": 0.58, "grad_norm": 0.5790983417813569, "learning_rate": 7.852614417198894e-06, "loss": 0.3896, "step": 10127 }, { "epoch": 0.58, "grad_norm": 0.5886527390523273, "learning_rate": 7.850796953963198e-06, "loss": 0.3371, "step": 10128 }, { "epoch": 0.58, "grad_norm": 0.2781699195992228, "learning_rate": 7.848979565154314e-06, "loss": 0.2399, "step": 10129 }, { "epoch": 0.58, "grad_norm": 0.25233740774202973, "learning_rate": 7.847162250835171e-06, "loss": 0.1594, "step": 10130 }, { "epoch": 0.58, "grad_norm": 0.5636084765264904, "learning_rate": 7.845345011068709e-06, "loss": 0.4195, "step": 10131 }, { "epoch": 0.58, "grad_norm": 0.29482534737792404, "learning_rate": 7.843527845917855e-06, "loss": 0.2604, "step": 10132 }, { "epoch": 0.58, "grad_norm": 0.44948437825709303, "learning_rate": 7.84171075544554e-06, "loss": 0.3688, "step": 10133 }, { "epoch": 0.58, "grad_norm": 0.386973790913395, "learning_rate": 7.839893739714686e-06, "loss": 0.2615, "step": 10134 }, { "epoch": 0.58, "grad_norm": 0.4697085406586142, "learning_rate": 7.83807679878822e-06, "loss": 0.3673, "step": 10135 }, { "epoch": 0.58, "grad_norm": 0.2578535107262174, "learning_rate": 7.836259932729062e-06, "loss": 0.203, "step": 10136 }, { "epoch": 0.58, "grad_norm": 0.3996683546043715, "learning_rate": 7.834443141600131e-06, "loss": 0.3033, "step": 10137 }, { "epoch": 0.58, "grad_norm": 0.5165287449268846, "learning_rate": 7.83262642546434e-06, "loss": 0.3747, "step": 10138 }, { "epoch": 0.58, "grad_norm": 0.4618223329699041, "learning_rate": 7.830809784384602e-06, "loss": 0.2441, "step": 10139 }, { "epoch": 0.58, "grad_norm": 0.30623960016940566, "learning_rate": 7.828993218423829e-06, "loss": 0.2678, "step": 10140 }, { "epoch": 0.58, "grad_norm": 0.35976134707155416, "learning_rate": 7.827176727644925e-06, "loss": 0.3079, "step": 10141 }, { "epoch": 0.58, "grad_norm": 0.28324400036157693, "learning_rate": 7.825360312110801e-06, "loss": 0.1523, "step": 10142 }, { "epoch": 0.58, "grad_norm": 0.7363848108703631, "learning_rate": 7.823543971884353e-06, "loss": 0.3966, "step": 10143 }, { "epoch": 0.58, "grad_norm": 0.3260077080948101, "learning_rate": 7.821727707028486e-06, "loss": 0.2802, "step": 10144 }, { "epoch": 0.58, "grad_norm": 0.27632507657679467, "learning_rate": 7.81991151760609e-06, "loss": 0.2405, "step": 10145 }, { "epoch": 0.58, "grad_norm": 0.3488908220712134, "learning_rate": 7.81809540368007e-06, "loss": 0.1139, "step": 10146 }, { "epoch": 0.58, "grad_norm": 0.37529873675347386, "learning_rate": 7.81627936531331e-06, "loss": 0.2968, "step": 10147 }, { "epoch": 0.58, "grad_norm": 0.3596295347448901, "learning_rate": 7.814463402568703e-06, "loss": 0.3217, "step": 10148 }, { "epoch": 0.58, "grad_norm": 0.6919007612501898, "learning_rate": 7.812647515509131e-06, "loss": 0.245, "step": 10149 }, { "epoch": 0.58, "grad_norm": 0.36005726352566714, "learning_rate": 7.810831704197486e-06, "loss": 0.3193, "step": 10150 }, { "epoch": 0.58, "grad_norm": 0.3847714209623518, "learning_rate": 7.80901596869664e-06, "loss": 0.2601, "step": 10151 }, { "epoch": 0.58, "grad_norm": 0.21052130026797297, "learning_rate": 7.807200309069482e-06, "loss": 0.1744, "step": 10152 }, { "epoch": 0.58, "grad_norm": 0.40305583102744774, "learning_rate": 7.805384725378881e-06, "loss": 0.3051, "step": 10153 }, { "epoch": 0.58, "grad_norm": 1.0107321479306213, "learning_rate": 7.803569217687711e-06, "loss": 0.5662, "step": 10154 }, { "epoch": 0.58, "grad_norm": 0.6122037063934481, "learning_rate": 7.801753786058847e-06, "loss": 0.2446, "step": 10155 }, { "epoch": 0.58, "grad_norm": 0.5214675627145766, "learning_rate": 7.799938430555152e-06, "loss": 0.3822, "step": 10156 }, { "epoch": 0.58, "grad_norm": 0.35482096271578345, "learning_rate": 7.798123151239497e-06, "loss": 0.3462, "step": 10157 }, { "epoch": 0.58, "grad_norm": 0.24059969586075008, "learning_rate": 7.79630794817474e-06, "loss": 0.1636, "step": 10158 }, { "epoch": 0.58, "grad_norm": 0.39992023315441017, "learning_rate": 7.794492821423747e-06, "loss": 0.2323, "step": 10159 }, { "epoch": 0.58, "grad_norm": 0.36592149517228917, "learning_rate": 7.79267777104937e-06, "loss": 0.3129, "step": 10160 }, { "epoch": 0.58, "grad_norm": 0.6464934747226485, "learning_rate": 7.79086279711447e-06, "loss": 0.3873, "step": 10161 }, { "epoch": 0.58, "grad_norm": 0.32063480202839684, "learning_rate": 7.789047899681893e-06, "loss": 0.2247, "step": 10162 }, { "epoch": 0.58, "grad_norm": 0.2778589463458745, "learning_rate": 7.787233078814497e-06, "loss": 0.2119, "step": 10163 }, { "epoch": 0.58, "grad_norm": 0.3474062922703908, "learning_rate": 7.785418334575122e-06, "loss": 0.3213, "step": 10164 }, { "epoch": 0.58, "grad_norm": 0.3227767768744291, "learning_rate": 7.783603667026616e-06, "loss": 0.2222, "step": 10165 }, { "epoch": 0.58, "grad_norm": 0.7112062417756925, "learning_rate": 7.781789076231815e-06, "loss": 0.387, "step": 10166 }, { "epoch": 0.58, "grad_norm": 0.5940613791092809, "learning_rate": 7.779974562253568e-06, "loss": 0.3781, "step": 10167 }, { "epoch": 0.58, "grad_norm": 0.24669557310007448, "learning_rate": 7.778160125154702e-06, "loss": 0.2217, "step": 10168 }, { "epoch": 0.58, "grad_norm": 1.3234491850879115, "learning_rate": 7.776345764998059e-06, "loss": 0.8241, "step": 10169 }, { "epoch": 0.58, "grad_norm": 0.2135022809142091, "learning_rate": 7.774531481846464e-06, "loss": 0.1409, "step": 10170 }, { "epoch": 0.58, "grad_norm": 0.33929896012752037, "learning_rate": 7.77271727576275e-06, "loss": 0.2906, "step": 10171 }, { "epoch": 0.58, "grad_norm": 0.423664471917811, "learning_rate": 7.770903146809738e-06, "loss": 0.2725, "step": 10172 }, { "epoch": 0.58, "grad_norm": 0.6369728062982355, "learning_rate": 7.769089095050258e-06, "loss": 0.3813, "step": 10173 }, { "epoch": 0.58, "grad_norm": 0.3777810720627077, "learning_rate": 7.767275120547123e-06, "loss": 0.2917, "step": 10174 }, { "epoch": 0.58, "grad_norm": 0.24151161950766775, "learning_rate": 7.765461223363158e-06, "loss": 0.1624, "step": 10175 }, { "epoch": 0.58, "grad_norm": 0.26233751290851975, "learning_rate": 7.76364740356117e-06, "loss": 0.2287, "step": 10176 }, { "epoch": 0.58, "grad_norm": 0.4027707555176561, "learning_rate": 7.76183366120398e-06, "loss": 0.333, "step": 10177 }, { "epoch": 0.58, "grad_norm": 0.9316383400032427, "learning_rate": 7.760019996354396e-06, "loss": 0.3163, "step": 10178 }, { "epoch": 0.58, "grad_norm": 0.900838231219768, "learning_rate": 7.75820640907522e-06, "loss": 0.6042, "step": 10179 }, { "epoch": 0.58, "grad_norm": 0.26204651119783984, "learning_rate": 7.75639289942926e-06, "loss": 0.2604, "step": 10180 }, { "epoch": 0.58, "grad_norm": 0.4479463747107403, "learning_rate": 7.754579467479318e-06, "loss": 0.2993, "step": 10181 }, { "epoch": 0.59, "grad_norm": 0.33894552794765487, "learning_rate": 7.752766113288192e-06, "loss": 0.1901, "step": 10182 }, { "epoch": 0.59, "grad_norm": 0.3762907960074976, "learning_rate": 7.750952836918679e-06, "loss": 0.2689, "step": 10183 }, { "epoch": 0.59, "grad_norm": 0.414454215601207, "learning_rate": 7.749139638433573e-06, "loss": 0.3139, "step": 10184 }, { "epoch": 0.59, "grad_norm": 0.586844077895301, "learning_rate": 7.747326517895662e-06, "loss": 0.135, "step": 10185 }, { "epoch": 0.59, "grad_norm": 0.32530862088865625, "learning_rate": 7.74551347536774e-06, "loss": 0.2699, "step": 10186 }, { "epoch": 0.59, "grad_norm": 1.1672584420814525, "learning_rate": 7.743700510912588e-06, "loss": 0.6975, "step": 10187 }, { "epoch": 0.59, "grad_norm": 0.24495780308060752, "learning_rate": 7.741887624592992e-06, "loss": 0.2082, "step": 10188 }, { "epoch": 0.59, "grad_norm": 0.36084956108999233, "learning_rate": 7.740074816471727e-06, "loss": 0.277, "step": 10189 }, { "epoch": 0.59, "grad_norm": 0.40954337704045063, "learning_rate": 7.738262086611578e-06, "loss": 0.2866, "step": 10190 }, { "epoch": 0.59, "grad_norm": 0.36418034114752734, "learning_rate": 7.736449435075314e-06, "loss": 0.2365, "step": 10191 }, { "epoch": 0.59, "grad_norm": 0.35156018638606407, "learning_rate": 7.734636861925706e-06, "loss": 0.2822, "step": 10192 }, { "epoch": 0.59, "grad_norm": 0.4751770010443096, "learning_rate": 7.732824367225531e-06, "loss": 0.3951, "step": 10193 }, { "epoch": 0.59, "grad_norm": 0.6841245197795643, "learning_rate": 7.731011951037547e-06, "loss": 0.2865, "step": 10194 }, { "epoch": 0.59, "grad_norm": 0.3483252023779255, "learning_rate": 7.729199613424523e-06, "loss": 0.2767, "step": 10195 }, { "epoch": 0.59, "grad_norm": 0.24313842780450307, "learning_rate": 7.727387354449217e-06, "loss": 0.2301, "step": 10196 }, { "epoch": 0.59, "grad_norm": 1.0930305495887416, "learning_rate": 7.725575174174395e-06, "loss": 0.6972, "step": 10197 }, { "epoch": 0.59, "grad_norm": 0.28099806807697314, "learning_rate": 7.723763072662804e-06, "loss": 0.2012, "step": 10198 }, { "epoch": 0.59, "grad_norm": 0.8655932878922294, "learning_rate": 7.721951049977196e-06, "loss": 0.4398, "step": 10199 }, { "epoch": 0.59, "grad_norm": 0.36319675934158235, "learning_rate": 7.72013910618033e-06, "loss": 0.3273, "step": 10200 }, { "epoch": 0.59, "grad_norm": 0.31370298470256497, "learning_rate": 7.718327241334944e-06, "loss": 0.2333, "step": 10201 }, { "epoch": 0.59, "grad_norm": 0.2954558356084548, "learning_rate": 7.716515455503791e-06, "loss": 0.201, "step": 10202 }, { "epoch": 0.59, "grad_norm": 0.4505771810527777, "learning_rate": 7.71470374874961e-06, "loss": 0.322, "step": 10203 }, { "epoch": 0.59, "grad_norm": 0.27661425856954885, "learning_rate": 7.712892121135136e-06, "loss": 0.2182, "step": 10204 }, { "epoch": 0.59, "grad_norm": 1.2487777153308748, "learning_rate": 7.711080572723113e-06, "loss": 0.7325, "step": 10205 }, { "epoch": 0.59, "grad_norm": 0.7749108515324832, "learning_rate": 7.709269103576269e-06, "loss": 0.3924, "step": 10206 }, { "epoch": 0.59, "grad_norm": 0.31900576203015873, "learning_rate": 7.70745771375734e-06, "loss": 0.1904, "step": 10207 }, { "epoch": 0.59, "grad_norm": 0.3050746601489578, "learning_rate": 7.70564640332905e-06, "loss": 0.2411, "step": 10208 }, { "epoch": 0.59, "grad_norm": 0.4378828524958338, "learning_rate": 7.703835172354127e-06, "loss": 0.251, "step": 10209 }, { "epoch": 0.59, "grad_norm": 0.32933250069623154, "learning_rate": 7.702024020895292e-06, "loss": 0.241, "step": 10210 }, { "epoch": 0.59, "grad_norm": 0.8241027953814687, "learning_rate": 7.70021294901527e-06, "loss": 0.3369, "step": 10211 }, { "epoch": 0.59, "grad_norm": 0.3543263899051379, "learning_rate": 7.69840195677677e-06, "loss": 0.3201, "step": 10212 }, { "epoch": 0.59, "grad_norm": 0.4099252481168575, "learning_rate": 7.696591044242513e-06, "loss": 0.3004, "step": 10213 }, { "epoch": 0.59, "grad_norm": 0.2820915527848439, "learning_rate": 7.694780211475209e-06, "loss": 0.166, "step": 10214 }, { "epoch": 0.59, "grad_norm": 0.3524674573849591, "learning_rate": 7.692969458537568e-06, "loss": 0.3248, "step": 10215 }, { "epoch": 0.59, "grad_norm": 0.43080167749690207, "learning_rate": 7.691158785492294e-06, "loss": 0.3089, "step": 10216 }, { "epoch": 0.59, "grad_norm": 0.6922394291237297, "learning_rate": 7.689348192402095e-06, "loss": 0.2959, "step": 10217 }, { "epoch": 0.59, "grad_norm": 0.9169189348266817, "learning_rate": 7.687537679329668e-06, "loss": 0.507, "step": 10218 }, { "epoch": 0.59, "grad_norm": 0.3090444070036527, "learning_rate": 7.685727246337709e-06, "loss": 0.2482, "step": 10219 }, { "epoch": 0.59, "grad_norm": 0.26794700844517866, "learning_rate": 7.683916893488918e-06, "loss": 0.2302, "step": 10220 }, { "epoch": 0.59, "grad_norm": 0.29467839115121064, "learning_rate": 7.682106620845984e-06, "loss": 0.1529, "step": 10221 }, { "epoch": 0.59, "grad_norm": 0.35073917502149066, "learning_rate": 7.6802964284716e-06, "loss": 0.2598, "step": 10222 }, { "epoch": 0.59, "grad_norm": 1.202371112662339, "learning_rate": 7.678486316428449e-06, "loss": 0.4024, "step": 10223 }, { "epoch": 0.59, "grad_norm": 0.3608808093489747, "learning_rate": 7.676676284779217e-06, "loss": 0.276, "step": 10224 }, { "epoch": 0.59, "grad_norm": 0.32842709343076865, "learning_rate": 7.674866333586586e-06, "loss": 0.2837, "step": 10225 }, { "epoch": 0.59, "grad_norm": 0.9377059450455212, "learning_rate": 7.673056462913235e-06, "loss": 0.5578, "step": 10226 }, { "epoch": 0.59, "grad_norm": 0.22635475620796228, "learning_rate": 7.671246672821837e-06, "loss": 0.1744, "step": 10227 }, { "epoch": 0.59, "grad_norm": 0.34285947837407843, "learning_rate": 7.669436963375067e-06, "loss": 0.2717, "step": 10228 }, { "epoch": 0.59, "grad_norm": 1.1313131442190563, "learning_rate": 7.667627334635595e-06, "loss": 0.46, "step": 10229 }, { "epoch": 0.59, "grad_norm": 0.8438766470568757, "learning_rate": 7.665817786666088e-06, "loss": 0.3498, "step": 10230 }, { "epoch": 0.59, "grad_norm": 0.4222669207775579, "learning_rate": 7.664008319529215e-06, "loss": 0.3328, "step": 10231 }, { "epoch": 0.59, "grad_norm": 0.3097461985519247, "learning_rate": 7.66219893328763e-06, "loss": 0.2693, "step": 10232 }, { "epoch": 0.59, "grad_norm": 0.4686433548216791, "learning_rate": 7.660389628003993e-06, "loss": 0.3129, "step": 10233 }, { "epoch": 0.59, "grad_norm": 0.42214270318871294, "learning_rate": 7.658580403740965e-06, "loss": 0.2034, "step": 10234 }, { "epoch": 0.59, "grad_norm": 0.5099855157917578, "learning_rate": 7.656771260561195e-06, "loss": 0.3126, "step": 10235 }, { "epoch": 0.59, "grad_norm": 0.2866231632582778, "learning_rate": 7.654962198527338e-06, "loss": 0.2458, "step": 10236 }, { "epoch": 0.59, "grad_norm": 0.2955888837904505, "learning_rate": 7.653153217702036e-06, "loss": 0.1982, "step": 10237 }, { "epoch": 0.59, "grad_norm": 0.38594128734646455, "learning_rate": 7.651344318147941e-06, "loss": 0.24, "step": 10238 }, { "epoch": 0.59, "grad_norm": 0.493756437636117, "learning_rate": 7.649535499927688e-06, "loss": 0.3752, "step": 10239 }, { "epoch": 0.59, "grad_norm": 0.25207716691183124, "learning_rate": 7.647726763103923e-06, "loss": 0.2299, "step": 10240 }, { "epoch": 0.59, "grad_norm": 0.7780560932689861, "learning_rate": 7.645918107739274e-06, "loss": 0.4704, "step": 10241 }, { "epoch": 0.59, "grad_norm": 0.3710784879455275, "learning_rate": 7.644109533896384e-06, "loss": 0.2544, "step": 10242 }, { "epoch": 0.59, "grad_norm": 0.28552083656844823, "learning_rate": 7.642301041637879e-06, "loss": 0.2177, "step": 10243 }, { "epoch": 0.59, "grad_norm": 0.47267057079111247, "learning_rate": 7.640492631026387e-06, "loss": 0.3323, "step": 10244 }, { "epoch": 0.59, "grad_norm": 0.7320353696301287, "learning_rate": 7.638684302124533e-06, "loss": 0.4111, "step": 10245 }, { "epoch": 0.59, "grad_norm": 0.4219758171502595, "learning_rate": 7.63687605499494e-06, "loss": 0.2955, "step": 10246 }, { "epoch": 0.59, "grad_norm": 0.36371007108240666, "learning_rate": 7.635067889700228e-06, "loss": 0.2585, "step": 10247 }, { "epoch": 0.59, "grad_norm": 0.24237484085577543, "learning_rate": 7.633259806303012e-06, "loss": 0.2022, "step": 10248 }, { "epoch": 0.59, "grad_norm": 0.5130982347891825, "learning_rate": 7.63145180486591e-06, "loss": 0.2591, "step": 10249 }, { "epoch": 0.59, "grad_norm": 0.3943639466503955, "learning_rate": 7.629643885451527e-06, "loss": 0.212, "step": 10250 }, { "epoch": 0.59, "grad_norm": 0.42780601563784476, "learning_rate": 7.627836048122477e-06, "loss": 0.3231, "step": 10251 }, { "epoch": 0.59, "grad_norm": 0.5485089590488623, "learning_rate": 7.626028292941361e-06, "loss": 0.3429, "step": 10252 }, { "epoch": 0.59, "grad_norm": 0.38528395368808604, "learning_rate": 7.624220619970784e-06, "loss": 0.2701, "step": 10253 }, { "epoch": 0.59, "grad_norm": 0.27098911193826375, "learning_rate": 7.622413029273343e-06, "loss": 0.1638, "step": 10254 }, { "epoch": 0.59, "grad_norm": 0.301421858486742, "learning_rate": 7.62060552091164e-06, "loss": 0.2811, "step": 10255 }, { "epoch": 0.59, "grad_norm": 0.3772924937493043, "learning_rate": 7.618798094948262e-06, "loss": 0.2382, "step": 10256 }, { "epoch": 0.59, "grad_norm": 0.6013924440847807, "learning_rate": 7.616990751445806e-06, "loss": 0.4171, "step": 10257 }, { "epoch": 0.59, "grad_norm": 0.39066029970772426, "learning_rate": 7.615183490466858e-06, "loss": 0.2633, "step": 10258 }, { "epoch": 0.59, "grad_norm": 0.36730340180357374, "learning_rate": 7.613376312074001e-06, "loss": 0.3297, "step": 10259 }, { "epoch": 0.59, "grad_norm": 0.2195929862317336, "learning_rate": 7.611569216329821e-06, "loss": 0.1535, "step": 10260 }, { "epoch": 0.59, "grad_norm": 0.40569141532500297, "learning_rate": 7.609762203296896e-06, "loss": 0.2855, "step": 10261 }, { "epoch": 0.59, "grad_norm": 0.566404154555934, "learning_rate": 7.607955273037804e-06, "loss": 0.4164, "step": 10262 }, { "epoch": 0.59, "grad_norm": 0.3857204802136643, "learning_rate": 7.606148425615117e-06, "loss": 0.2772, "step": 10263 }, { "epoch": 0.59, "grad_norm": 0.608997704118891, "learning_rate": 7.604341661091409e-06, "loss": 0.3627, "step": 10264 }, { "epoch": 0.59, "grad_norm": 0.4045959883896538, "learning_rate": 7.602534979529246e-06, "loss": 0.3597, "step": 10265 }, { "epoch": 0.59, "grad_norm": 0.20847234205880777, "learning_rate": 7.600728380991191e-06, "loss": 0.1583, "step": 10266 }, { "epoch": 0.59, "grad_norm": 0.39509917853189525, "learning_rate": 7.598921865539811e-06, "loss": 0.2954, "step": 10267 }, { "epoch": 0.59, "grad_norm": 0.33854641736778623, "learning_rate": 7.597115433237664e-06, "loss": 0.2913, "step": 10268 }, { "epoch": 0.59, "grad_norm": 0.5218405160890431, "learning_rate": 7.5953090841473035e-06, "loss": 0.3258, "step": 10269 }, { "epoch": 0.59, "grad_norm": 0.5757741501759367, "learning_rate": 7.593502818331289e-06, "loss": 0.3423, "step": 10270 }, { "epoch": 0.59, "grad_norm": 0.2601837400754058, "learning_rate": 7.5916966358521645e-06, "loss": 0.2893, "step": 10271 }, { "epoch": 0.59, "grad_norm": 0.43141156338626285, "learning_rate": 7.589890536772486e-06, "loss": 0.2517, "step": 10272 }, { "epoch": 0.59, "grad_norm": 0.2912064861117238, "learning_rate": 7.588084521154791e-06, "loss": 0.1262, "step": 10273 }, { "epoch": 0.59, "grad_norm": 0.39090096704594307, "learning_rate": 7.586278589061628e-06, "loss": 0.3112, "step": 10274 }, { "epoch": 0.59, "grad_norm": 0.50511253032244, "learning_rate": 7.584472740555533e-06, "loss": 0.3057, "step": 10275 }, { "epoch": 0.59, "grad_norm": 0.3452619407210834, "learning_rate": 7.582666975699043e-06, "loss": 0.1574, "step": 10276 }, { "epoch": 0.59, "grad_norm": 0.37024431661053503, "learning_rate": 7.5808612945546915e-06, "loss": 0.3077, "step": 10277 }, { "epoch": 0.59, "grad_norm": 0.44955413310834985, "learning_rate": 7.5790556971850095e-06, "loss": 0.2907, "step": 10278 }, { "epoch": 0.59, "grad_norm": 0.22616688185308217, "learning_rate": 7.577250183652523e-06, "loss": 0.1954, "step": 10279 }, { "epoch": 0.59, "grad_norm": 0.4488134009283226, "learning_rate": 7.575444754019762e-06, "loss": 0.3517, "step": 10280 }, { "epoch": 0.59, "grad_norm": 0.6806739153782752, "learning_rate": 7.5736394083492414e-06, "loss": 0.4589, "step": 10281 }, { "epoch": 0.59, "grad_norm": 0.38788290607606046, "learning_rate": 7.571834146703486e-06, "loss": 0.1904, "step": 10282 }, { "epoch": 0.59, "grad_norm": 0.3014316610003963, "learning_rate": 7.57002896914501e-06, "loss": 0.2887, "step": 10283 }, { "epoch": 0.59, "grad_norm": 0.4034575913681455, "learning_rate": 7.568223875736325e-06, "loss": 0.2604, "step": 10284 }, { "epoch": 0.59, "grad_norm": 0.6416524913164146, "learning_rate": 7.566418866539944e-06, "loss": 0.3879, "step": 10285 }, { "epoch": 0.59, "grad_norm": 0.23814333645697722, "learning_rate": 7.5646139416183705e-06, "loss": 0.1818, "step": 10286 }, { "epoch": 0.59, "grad_norm": 0.35517257625022275, "learning_rate": 7.562809101034114e-06, "loss": 0.3117, "step": 10287 }, { "epoch": 0.59, "grad_norm": 1.2153446890355475, "learning_rate": 7.56100434484967e-06, "loss": 0.7712, "step": 10288 }, { "epoch": 0.59, "grad_norm": 0.30753262429141587, "learning_rate": 7.559199673127545e-06, "loss": 0.1986, "step": 10289 }, { "epoch": 0.59, "grad_norm": 0.7300295591145592, "learning_rate": 7.557395085930227e-06, "loss": 0.4217, "step": 10290 }, { "epoch": 0.59, "grad_norm": 0.32301624520605615, "learning_rate": 7.555590583320214e-06, "loss": 0.2986, "step": 10291 }, { "epoch": 0.59, "grad_norm": 0.255707287279116, "learning_rate": 7.553786165359993e-06, "loss": 0.1879, "step": 10292 }, { "epoch": 0.59, "grad_norm": 0.41385565707671423, "learning_rate": 7.551981832112054e-06, "loss": 0.2826, "step": 10293 }, { "epoch": 0.59, "grad_norm": 0.38640746910827395, "learning_rate": 7.550177583638876e-06, "loss": 0.3198, "step": 10294 }, { "epoch": 0.59, "grad_norm": 0.2565067070850015, "learning_rate": 7.548373420002945e-06, "loss": 0.2217, "step": 10295 }, { "epoch": 0.59, "grad_norm": 0.7864258593626743, "learning_rate": 7.546569341266737e-06, "loss": 0.4573, "step": 10296 }, { "epoch": 0.59, "grad_norm": 0.5094781014286576, "learning_rate": 7.544765347492727e-06, "loss": 0.3626, "step": 10297 }, { "epoch": 0.59, "grad_norm": 0.3531764204079477, "learning_rate": 7.542961438743389e-06, "loss": 0.2905, "step": 10298 }, { "epoch": 0.59, "grad_norm": 0.22608108952594327, "learning_rate": 7.54115761508119e-06, "loss": 0.1933, "step": 10299 }, { "epoch": 0.59, "grad_norm": 0.5899693029472266, "learning_rate": 7.539353876568594e-06, "loss": 0.4156, "step": 10300 }, { "epoch": 0.59, "grad_norm": 0.3815342442871858, "learning_rate": 7.537550223268071e-06, "loss": 0.3133, "step": 10301 }, { "epoch": 0.59, "grad_norm": 0.7859397443567046, "learning_rate": 7.5357466552420745e-06, "loss": 0.2873, "step": 10302 }, { "epoch": 0.59, "grad_norm": 0.46196064535471065, "learning_rate": 7.533943172553068e-06, "loss": 0.336, "step": 10303 }, { "epoch": 0.59, "grad_norm": 0.3143589100536943, "learning_rate": 7.5321397752635e-06, "loss": 0.2493, "step": 10304 }, { "epoch": 0.59, "grad_norm": 0.1956301211002063, "learning_rate": 7.53033646343583e-06, "loss": 0.116, "step": 10305 }, { "epoch": 0.59, "grad_norm": 0.33209653400560535, "learning_rate": 7.528533237132498e-06, "loss": 0.3106, "step": 10306 }, { "epoch": 0.59, "grad_norm": 0.324432066107435, "learning_rate": 7.526730096415957e-06, "loss": 0.2727, "step": 10307 }, { "epoch": 0.59, "grad_norm": 0.6904170847854164, "learning_rate": 7.524927041348646e-06, "loss": 0.4153, "step": 10308 }, { "epoch": 0.59, "grad_norm": 0.5469584121601104, "learning_rate": 7.523124071993004e-06, "loss": 0.2054, "step": 10309 }, { "epoch": 0.59, "grad_norm": 0.34453117937491873, "learning_rate": 7.521321188411469e-06, "loss": 0.2789, "step": 10310 }, { "epoch": 0.59, "grad_norm": 0.2578278431274199, "learning_rate": 7.519518390666474e-06, "loss": 0.237, "step": 10311 }, { "epoch": 0.59, "grad_norm": 0.22170453038599644, "learning_rate": 7.517715678820452e-06, "loss": 0.1511, "step": 10312 }, { "epoch": 0.59, "grad_norm": 0.3583110798163475, "learning_rate": 7.515913052935827e-06, "loss": 0.2837, "step": 10313 }, { "epoch": 0.59, "grad_norm": 0.7549106305530019, "learning_rate": 7.514110513075028e-06, "loss": 0.4292, "step": 10314 }, { "epoch": 0.59, "grad_norm": 0.291936518934735, "learning_rate": 7.512308059300474e-06, "loss": 0.2353, "step": 10315 }, { "epoch": 0.59, "grad_norm": 0.3369054517703914, "learning_rate": 7.510505691674586e-06, "loss": 0.2834, "step": 10316 }, { "epoch": 0.59, "grad_norm": 0.2868584847338976, "learning_rate": 7.5087034102597775e-06, "loss": 0.1644, "step": 10317 }, { "epoch": 0.59, "grad_norm": 0.2998619310436137, "learning_rate": 7.506901215118465e-06, "loss": 0.2527, "step": 10318 }, { "epoch": 0.59, "grad_norm": 0.36454420205043037, "learning_rate": 7.505099106313053e-06, "loss": 0.2659, "step": 10319 }, { "epoch": 0.59, "grad_norm": 1.0795147065212092, "learning_rate": 7.503297083905955e-06, "loss": 0.3916, "step": 10320 }, { "epoch": 0.59, "grad_norm": 1.1310884841179982, "learning_rate": 7.5014951479595684e-06, "loss": 0.5808, "step": 10321 }, { "epoch": 0.59, "grad_norm": 0.2958718654952075, "learning_rate": 7.499693298536301e-06, "loss": 0.1898, "step": 10322 }, { "epoch": 0.59, "grad_norm": 0.28992196328142583, "learning_rate": 7.497891535698546e-06, "loss": 0.2596, "step": 10323 }, { "epoch": 0.59, "grad_norm": 1.1602792649842537, "learning_rate": 7.496089859508697e-06, "loss": 0.5329, "step": 10324 }, { "epoch": 0.59, "grad_norm": 0.3745377074556588, "learning_rate": 7.494288270029152e-06, "loss": 0.2368, "step": 10325 }, { "epoch": 0.59, "grad_norm": 0.31340930333139033, "learning_rate": 7.492486767322293e-06, "loss": 0.2712, "step": 10326 }, { "epoch": 0.59, "grad_norm": 0.46084638494659036, "learning_rate": 7.490685351450513e-06, "loss": 0.3427, "step": 10327 }, { "epoch": 0.59, "grad_norm": 0.3007749876753642, "learning_rate": 7.488884022476189e-06, "loss": 0.1969, "step": 10328 }, { "epoch": 0.59, "grad_norm": 1.0453565752421365, "learning_rate": 7.487082780461704e-06, "loss": 0.5567, "step": 10329 }, { "epoch": 0.59, "grad_norm": 0.40266828324910653, "learning_rate": 7.485281625469432e-06, "loss": 0.3253, "step": 10330 }, { "epoch": 0.59, "grad_norm": 0.33743676652775795, "learning_rate": 7.483480557561753e-06, "loss": 0.2332, "step": 10331 }, { "epoch": 0.59, "grad_norm": 0.4111867303618263, "learning_rate": 7.481679576801035e-06, "loss": 0.2775, "step": 10332 }, { "epoch": 0.59, "grad_norm": 0.4693763211900533, "learning_rate": 7.479878683249642e-06, "loss": 0.2668, "step": 10333 }, { "epoch": 0.59, "grad_norm": 0.503106972794314, "learning_rate": 7.478077876969943e-06, "loss": 0.2492, "step": 10334 }, { "epoch": 0.59, "grad_norm": 0.32620964008429154, "learning_rate": 7.476277158024299e-06, "loss": 0.2576, "step": 10335 }, { "epoch": 0.59, "grad_norm": 0.6618248472899589, "learning_rate": 7.474476526475066e-06, "loss": 0.4087, "step": 10336 }, { "epoch": 0.59, "grad_norm": 0.42152985701301704, "learning_rate": 7.4726759823846054e-06, "loss": 0.2839, "step": 10337 }, { "epoch": 0.59, "grad_norm": 0.2510070165287194, "learning_rate": 7.470875525815263e-06, "loss": 0.2232, "step": 10338 }, { "epoch": 0.59, "grad_norm": 0.42724280486977506, "learning_rate": 7.4690751568293955e-06, "loss": 0.2797, "step": 10339 }, { "epoch": 0.59, "grad_norm": 0.37480903963553386, "learning_rate": 7.467274875489345e-06, "loss": 0.2427, "step": 10340 }, { "epoch": 0.59, "grad_norm": 0.41897775646501684, "learning_rate": 7.465474681857459e-06, "loss": 0.2865, "step": 10341 }, { "epoch": 0.59, "grad_norm": 0.3516394732341963, "learning_rate": 7.463674575996072e-06, "loss": 0.3245, "step": 10342 }, { "epoch": 0.59, "grad_norm": 0.3376477205658888, "learning_rate": 7.461874557967528e-06, "loss": 0.2722, "step": 10343 }, { "epoch": 0.59, "grad_norm": 0.2938787637187035, "learning_rate": 7.4600746278341575e-06, "loss": 0.1601, "step": 10344 }, { "epoch": 0.59, "grad_norm": 0.4272505094482967, "learning_rate": 7.458274785658295e-06, "loss": 0.2518, "step": 10345 }, { "epoch": 0.59, "grad_norm": 0.32516909890328166, "learning_rate": 7.4564750315022645e-06, "loss": 0.2598, "step": 10346 }, { "epoch": 0.59, "grad_norm": 0.36114353203460814, "learning_rate": 7.454675365428397e-06, "loss": 0.305, "step": 10347 }, { "epoch": 0.59, "grad_norm": 0.5091804731487372, "learning_rate": 7.452875787499012e-06, "loss": 0.3558, "step": 10348 }, { "epoch": 0.59, "grad_norm": 0.3474769886170156, "learning_rate": 7.451076297776427e-06, "loss": 0.2804, "step": 10349 }, { "epoch": 0.59, "grad_norm": 0.45404733187295016, "learning_rate": 7.4492768963229635e-06, "loss": 0.3461, "step": 10350 }, { "epoch": 0.59, "grad_norm": 0.22564809690050838, "learning_rate": 7.447477583200928e-06, "loss": 0.1469, "step": 10351 }, { "epoch": 0.59, "grad_norm": 0.41250335520063286, "learning_rate": 7.445678358472637e-06, "loss": 0.2752, "step": 10352 }, { "epoch": 0.59, "grad_norm": 0.43262651395981655, "learning_rate": 7.443879222200392e-06, "loss": 0.3522, "step": 10353 }, { "epoch": 0.59, "grad_norm": 0.3365479924421112, "learning_rate": 7.442080174446502e-06, "loss": 0.2761, "step": 10354 }, { "epoch": 0.59, "grad_norm": 0.39423987473222083, "learning_rate": 7.440281215273262e-06, "loss": 0.2763, "step": 10355 }, { "epoch": 0.6, "grad_norm": 0.4763702671153097, "learning_rate": 7.438482344742977e-06, "loss": 0.3736, "step": 10356 }, { "epoch": 0.6, "grad_norm": 0.24311212151397302, "learning_rate": 7.436683562917937e-06, "loss": 0.1081, "step": 10357 }, { "epoch": 0.6, "grad_norm": 0.31815626651918666, "learning_rate": 7.4348848698604345e-06, "loss": 0.2684, "step": 10358 }, { "epoch": 0.6, "grad_norm": 0.3554260506520077, "learning_rate": 7.433086265632759e-06, "loss": 0.3245, "step": 10359 }, { "epoch": 0.6, "grad_norm": 0.7899975431739951, "learning_rate": 7.431287750297196e-06, "loss": 0.5236, "step": 10360 }, { "epoch": 0.6, "grad_norm": 0.324058350565275, "learning_rate": 7.429489323916028e-06, "loss": 0.2124, "step": 10361 }, { "epoch": 0.6, "grad_norm": 0.30049014342614966, "learning_rate": 7.427690986551534e-06, "loss": 0.2998, "step": 10362 }, { "epoch": 0.6, "grad_norm": 0.2735467911648441, "learning_rate": 7.42589273826599e-06, "loss": 0.1753, "step": 10363 }, { "epoch": 0.6, "grad_norm": 0.3070856253154252, "learning_rate": 7.42409457912167e-06, "loss": 0.214, "step": 10364 }, { "epoch": 0.6, "grad_norm": 0.7289098597123872, "learning_rate": 7.422296509180844e-06, "loss": 0.3994, "step": 10365 }, { "epoch": 0.6, "grad_norm": 0.3466921745387132, "learning_rate": 7.420498528505783e-06, "loss": 0.3249, "step": 10366 }, { "epoch": 0.6, "grad_norm": 0.30005732279309993, "learning_rate": 7.418700637158742e-06, "loss": 0.2126, "step": 10367 }, { "epoch": 0.6, "grad_norm": 1.4382862328192985, "learning_rate": 7.416902835201989e-06, "loss": 0.7743, "step": 10368 }, { "epoch": 0.6, "grad_norm": 0.3143363107928643, "learning_rate": 7.415105122697777e-06, "loss": 0.2053, "step": 10369 }, { "epoch": 0.6, "grad_norm": 0.27691108069319204, "learning_rate": 7.413307499708367e-06, "loss": 0.2016, "step": 10370 }, { "epoch": 0.6, "grad_norm": 0.47985595670757103, "learning_rate": 7.411509966296004e-06, "loss": 0.297, "step": 10371 }, { "epoch": 0.6, "grad_norm": 0.9858514760671856, "learning_rate": 7.409712522522942e-06, "loss": 0.7043, "step": 10372 }, { "epoch": 0.6, "grad_norm": 0.6016526088054295, "learning_rate": 7.407915168451423e-06, "loss": 0.3614, "step": 10373 }, { "epoch": 0.6, "grad_norm": 0.29457207851404604, "learning_rate": 7.40611790414369e-06, "loss": 0.24, "step": 10374 }, { "epoch": 0.6, "grad_norm": 0.5271657134892388, "learning_rate": 7.404320729661982e-06, "loss": 0.2722, "step": 10375 }, { "epoch": 0.6, "grad_norm": 0.4621286015570544, "learning_rate": 7.402523645068536e-06, "loss": 0.3243, "step": 10376 }, { "epoch": 0.6, "grad_norm": 0.26337030099005204, "learning_rate": 7.400726650425585e-06, "loss": 0.2056, "step": 10377 }, { "epoch": 0.6, "grad_norm": 0.337834575079801, "learning_rate": 7.3989297457953565e-06, "loss": 0.3024, "step": 10378 }, { "epoch": 0.6, "grad_norm": 0.5600185258598154, "learning_rate": 7.3971329312400805e-06, "loss": 0.4083, "step": 10379 }, { "epoch": 0.6, "grad_norm": 0.4112617760836703, "learning_rate": 7.395336206821979e-06, "loss": 0.2426, "step": 10380 }, { "epoch": 0.6, "grad_norm": 0.615001228721209, "learning_rate": 7.393539572603274e-06, "loss": 0.3951, "step": 10381 }, { "epoch": 0.6, "grad_norm": 0.2863126488000933, "learning_rate": 7.391743028646179e-06, "loss": 0.2512, "step": 10382 }, { "epoch": 0.6, "grad_norm": 0.31806544952047155, "learning_rate": 7.3899465750129116e-06, "loss": 0.2348, "step": 10383 }, { "epoch": 0.6, "grad_norm": 0.27767063661119495, "learning_rate": 7.388150211765682e-06, "loss": 0.1522, "step": 10384 }, { "epoch": 0.6, "grad_norm": 0.40574913552979075, "learning_rate": 7.3863539389667e-06, "loss": 0.2952, "step": 10385 }, { "epoch": 0.6, "grad_norm": 0.3449818810488741, "learning_rate": 7.384557756678166e-06, "loss": 0.2971, "step": 10386 }, { "epoch": 0.6, "grad_norm": 0.4392748651396246, "learning_rate": 7.382761664962287e-06, "loss": 0.2963, "step": 10387 }, { "epoch": 0.6, "grad_norm": 0.5556219375952675, "learning_rate": 7.380965663881259e-06, "loss": 0.3425, "step": 10388 }, { "epoch": 0.6, "grad_norm": 0.2330582030181791, "learning_rate": 7.379169753497275e-06, "loss": 0.2051, "step": 10389 }, { "epoch": 0.6, "grad_norm": 0.3359612146899458, "learning_rate": 7.377373933872531e-06, "loss": 0.2831, "step": 10390 }, { "epoch": 0.6, "grad_norm": 0.7637438385972759, "learning_rate": 7.375578205069213e-06, "loss": 0.3904, "step": 10391 }, { "epoch": 0.6, "grad_norm": 0.3545649951392472, "learning_rate": 7.373782567149514e-06, "loss": 0.2763, "step": 10392 }, { "epoch": 0.6, "grad_norm": 0.6164382213937144, "learning_rate": 7.371987020175606e-06, "loss": 0.3046, "step": 10393 }, { "epoch": 0.6, "grad_norm": 0.3547602337809164, "learning_rate": 7.370191564209679e-06, "loss": 0.2542, "step": 10394 }, { "epoch": 0.6, "grad_norm": 0.2775262885159086, "learning_rate": 7.368396199313901e-06, "loss": 0.2352, "step": 10395 }, { "epoch": 0.6, "grad_norm": 0.4603446729991688, "learning_rate": 7.3666009255504534e-06, "loss": 0.2823, "step": 10396 }, { "epoch": 0.6, "grad_norm": 0.41311951116192697, "learning_rate": 7.364805742981499e-06, "loss": 0.2084, "step": 10397 }, { "epoch": 0.6, "grad_norm": 0.26981663565088704, "learning_rate": 7.363010651669211e-06, "loss": 0.2726, "step": 10398 }, { "epoch": 0.6, "grad_norm": 0.7117455054970516, "learning_rate": 7.361215651675753e-06, "loss": 0.398, "step": 10399 }, { "epoch": 0.6, "grad_norm": 0.4345400774009875, "learning_rate": 7.359420743063282e-06, "loss": 0.1569, "step": 10400 }, { "epoch": 0.6, "grad_norm": 0.3219544373698642, "learning_rate": 7.357625925893954e-06, "loss": 0.2259, "step": 10401 }, { "epoch": 0.6, "grad_norm": 0.2688178658293408, "learning_rate": 7.355831200229928e-06, "loss": 0.2649, "step": 10402 }, { "epoch": 0.6, "grad_norm": 0.3136728201989251, "learning_rate": 7.354036566133354e-06, "loss": 0.2091, "step": 10403 }, { "epoch": 0.6, "grad_norm": 0.4715407097290975, "learning_rate": 7.3522420236663805e-06, "loss": 0.3276, "step": 10404 }, { "epoch": 0.6, "grad_norm": 0.4956804964246323, "learning_rate": 7.350447572891148e-06, "loss": 0.3307, "step": 10405 }, { "epoch": 0.6, "grad_norm": 0.32953209301390884, "learning_rate": 7.348653213869807e-06, "loss": 0.1928, "step": 10406 }, { "epoch": 0.6, "grad_norm": 0.40875599533641027, "learning_rate": 7.346858946664488e-06, "loss": 0.3065, "step": 10407 }, { "epoch": 0.6, "grad_norm": 0.2591224485745581, "learning_rate": 7.345064771337332e-06, "loss": 0.1801, "step": 10408 }, { "epoch": 0.6, "grad_norm": 0.4714728778705214, "learning_rate": 7.343270687950468e-06, "loss": 0.36, "step": 10409 }, { "epoch": 0.6, "grad_norm": 0.28898646326817645, "learning_rate": 7.341476696566026e-06, "loss": 0.2228, "step": 10410 }, { "epoch": 0.6, "grad_norm": 0.7426477752866582, "learning_rate": 7.33968279724613e-06, "loss": 0.4396, "step": 10411 }, { "epoch": 0.6, "grad_norm": 1.1024879194552417, "learning_rate": 7.337888990052906e-06, "loss": 0.5185, "step": 10412 }, { "epoch": 0.6, "grad_norm": 0.23906818831035612, "learning_rate": 7.336095275048474e-06, "loss": 0.1846, "step": 10413 }, { "epoch": 0.6, "grad_norm": 0.3227646272577517, "learning_rate": 7.334301652294944e-06, "loss": 0.2678, "step": 10414 }, { "epoch": 0.6, "grad_norm": 0.5806326984574692, "learning_rate": 7.332508121854435e-06, "loss": 0.4073, "step": 10415 }, { "epoch": 0.6, "grad_norm": 0.34300519661195633, "learning_rate": 7.330714683789053e-06, "loss": 0.2575, "step": 10416 }, { "epoch": 0.6, "grad_norm": 0.5015264244296673, "learning_rate": 7.32892133816091e-06, "loss": 0.3357, "step": 10417 }, { "epoch": 0.6, "grad_norm": 0.42152132629199246, "learning_rate": 7.327128085032103e-06, "loss": 0.3183, "step": 10418 }, { "epoch": 0.6, "grad_norm": 0.3411341644741504, "learning_rate": 7.325334924464737e-06, "loss": 0.2233, "step": 10419 }, { "epoch": 0.6, "grad_norm": 0.4057527596698589, "learning_rate": 7.323541856520908e-06, "loss": 0.3004, "step": 10420 }, { "epoch": 0.6, "grad_norm": 0.3473167164819613, "learning_rate": 7.32174888126271e-06, "loss": 0.2923, "step": 10421 }, { "epoch": 0.6, "grad_norm": 0.36276706144912185, "learning_rate": 7.3199559987522305e-06, "loss": 0.288, "step": 10422 }, { "epoch": 0.6, "grad_norm": 0.41822234452984713, "learning_rate": 7.3181632090515635e-06, "loss": 0.2161, "step": 10423 }, { "epoch": 0.6, "grad_norm": 0.5414425489075652, "learning_rate": 7.316370512222785e-06, "loss": 0.3239, "step": 10424 }, { "epoch": 0.6, "grad_norm": 0.378916321206655, "learning_rate": 7.314577908327982e-06, "loss": 0.2869, "step": 10425 }, { "epoch": 0.6, "grad_norm": 0.33481596176071216, "learning_rate": 7.312785397429231e-06, "loss": 0.2687, "step": 10426 }, { "epoch": 0.6, "grad_norm": 0.5608026074266522, "learning_rate": 7.310992979588607e-06, "loss": 0.3707, "step": 10427 }, { "epoch": 0.6, "grad_norm": 0.319216032928723, "learning_rate": 7.30920065486818e-06, "loss": 0.2487, "step": 10428 }, { "epoch": 0.6, "grad_norm": 0.23822935077111004, "learning_rate": 7.307408423330016e-06, "loss": 0.1923, "step": 10429 }, { "epoch": 0.6, "grad_norm": 1.1734266520628216, "learning_rate": 7.305616285036186e-06, "loss": 0.5906, "step": 10430 }, { "epoch": 0.6, "grad_norm": 0.306891867740223, "learning_rate": 7.303824240048744e-06, "loss": 0.2423, "step": 10431 }, { "epoch": 0.6, "grad_norm": 0.6941208094628137, "learning_rate": 7.3020322884297565e-06, "loss": 0.3179, "step": 10432 }, { "epoch": 0.6, "grad_norm": 0.31808162658254446, "learning_rate": 7.300240430241278e-06, "loss": 0.2968, "step": 10433 }, { "epoch": 0.6, "grad_norm": 0.4021974465762369, "learning_rate": 7.298448665545352e-06, "loss": 0.2697, "step": 10434 }, { "epoch": 0.6, "grad_norm": 0.2515530206184095, "learning_rate": 7.296656994404034e-06, "loss": 0.1669, "step": 10435 }, { "epoch": 0.6, "grad_norm": 0.5861578574807791, "learning_rate": 7.294865416879366e-06, "loss": 0.1531, "step": 10436 }, { "epoch": 0.6, "grad_norm": 0.34651374064157503, "learning_rate": 7.293073933033394e-06, "loss": 0.2677, "step": 10437 }, { "epoch": 0.6, "grad_norm": 0.37290784368695534, "learning_rate": 7.291282542928158e-06, "loss": 0.3219, "step": 10438 }, { "epoch": 0.6, "grad_norm": 0.8520457116127256, "learning_rate": 7.289491246625686e-06, "loss": 0.3167, "step": 10439 }, { "epoch": 0.6, "grad_norm": 0.3610971456454583, "learning_rate": 7.287700044188019e-06, "loss": 0.2736, "step": 10440 }, { "epoch": 0.6, "grad_norm": 0.27075648165360194, "learning_rate": 7.28590893567718e-06, "loss": 0.2578, "step": 10441 }, { "epoch": 0.6, "grad_norm": 0.467437287940649, "learning_rate": 7.2841179211552005e-06, "loss": 0.1746, "step": 10442 }, { "epoch": 0.6, "grad_norm": 0.3203387589486802, "learning_rate": 7.282327000684099e-06, "loss": 0.2521, "step": 10443 }, { "epoch": 0.6, "grad_norm": 1.0366281064937823, "learning_rate": 7.280536174325897e-06, "loss": 0.4184, "step": 10444 }, { "epoch": 0.6, "grad_norm": 0.32022139792761206, "learning_rate": 7.27874544214261e-06, "loss": 0.2512, "step": 10445 }, { "epoch": 0.6, "grad_norm": 0.31124170915094695, "learning_rate": 7.276954804196252e-06, "loss": 0.2607, "step": 10446 }, { "epoch": 0.6, "grad_norm": 0.27313298584475565, "learning_rate": 7.2751642605488305e-06, "loss": 0.1748, "step": 10447 }, { "epoch": 0.6, "grad_norm": 1.4671493965457703, "learning_rate": 7.273373811262356e-06, "loss": 0.7225, "step": 10448 }, { "epoch": 0.6, "grad_norm": 0.30145172575689483, "learning_rate": 7.271583456398827e-06, "loss": 0.2257, "step": 10449 }, { "epoch": 0.6, "grad_norm": 0.7093270985040872, "learning_rate": 7.269793196020247e-06, "loss": 0.3663, "step": 10450 }, { "epoch": 0.6, "grad_norm": 1.0567717494520565, "learning_rate": 7.26800303018861e-06, "loss": 0.7185, "step": 10451 }, { "epoch": 0.6, "grad_norm": 0.3041414305207806, "learning_rate": 7.266212958965912e-06, "loss": 0.2066, "step": 10452 }, { "epoch": 0.6, "grad_norm": 0.37389481781956213, "learning_rate": 7.264422982414143e-06, "loss": 0.3267, "step": 10453 }, { "epoch": 0.6, "grad_norm": 0.25075241124657155, "learning_rate": 7.2626331005952845e-06, "loss": 0.1912, "step": 10454 }, { "epoch": 0.6, "grad_norm": 0.3700868988947285, "learning_rate": 7.260843313571328e-06, "loss": 0.2145, "step": 10455 }, { "epoch": 0.6, "grad_norm": 0.9142862153982831, "learning_rate": 7.259053621404246e-06, "loss": 0.4353, "step": 10456 }, { "epoch": 0.6, "grad_norm": 0.3873001010445203, "learning_rate": 7.2572640241560225e-06, "loss": 0.3062, "step": 10457 }, { "epoch": 0.6, "grad_norm": 0.3067948858367847, "learning_rate": 7.255474521888624e-06, "loss": 0.1958, "step": 10458 }, { "epoch": 0.6, "grad_norm": 0.3374418148179951, "learning_rate": 7.253685114664029e-06, "loss": 0.232, "step": 10459 }, { "epoch": 0.6, "grad_norm": 0.45806778271637055, "learning_rate": 7.251895802544197e-06, "loss": 0.3377, "step": 10460 }, { "epoch": 0.6, "grad_norm": 0.4224168796644813, "learning_rate": 7.250106585591098e-06, "loss": 0.2808, "step": 10461 }, { "epoch": 0.6, "grad_norm": 0.4315638186581728, "learning_rate": 7.2483174638666876e-06, "loss": 0.2695, "step": 10462 }, { "epoch": 0.6, "grad_norm": 1.1670725205025345, "learning_rate": 7.246528437432927e-06, "loss": 0.7757, "step": 10463 }, { "epoch": 0.6, "grad_norm": 0.3893678908800406, "learning_rate": 7.244739506351765e-06, "loss": 0.2822, "step": 10464 }, { "epoch": 0.6, "grad_norm": 0.27797550100587226, "learning_rate": 7.242950670685159e-06, "loss": 0.222, "step": 10465 }, { "epoch": 0.6, "grad_norm": 0.4390055988054794, "learning_rate": 7.2411619304950535e-06, "loss": 0.26, "step": 10466 }, { "epoch": 0.6, "grad_norm": 0.32031797829067743, "learning_rate": 7.239373285843392e-06, "loss": 0.2501, "step": 10467 }, { "epoch": 0.6, "grad_norm": 0.6696812537335045, "learning_rate": 7.237584736792112e-06, "loss": 0.3122, "step": 10468 }, { "epoch": 0.6, "grad_norm": 0.34998214322910576, "learning_rate": 7.235796283403153e-06, "loss": 0.3351, "step": 10469 }, { "epoch": 0.6, "grad_norm": 0.3469237876049126, "learning_rate": 7.234007925738451e-06, "loss": 0.2644, "step": 10470 }, { "epoch": 0.6, "grad_norm": 1.4179928240007145, "learning_rate": 7.2322196638599365e-06, "loss": 0.5999, "step": 10471 }, { "epoch": 0.6, "grad_norm": 0.4999890912597717, "learning_rate": 7.230431497829533e-06, "loss": 0.2942, "step": 10472 }, { "epoch": 0.6, "grad_norm": 0.2984927447908153, "learning_rate": 7.228643427709172e-06, "loss": 0.2701, "step": 10473 }, { "epoch": 0.6, "grad_norm": 0.28578890500066373, "learning_rate": 7.226855453560766e-06, "loss": 0.1861, "step": 10474 }, { "epoch": 0.6, "grad_norm": 0.8567557080685262, "learning_rate": 7.2250675754462384e-06, "loss": 0.4243, "step": 10475 }, { "epoch": 0.6, "grad_norm": 0.38207266662063444, "learning_rate": 7.2232797934275e-06, "loss": 0.2831, "step": 10476 }, { "epoch": 0.6, "grad_norm": 0.32210840344326147, "learning_rate": 7.221492107566466e-06, "loss": 0.285, "step": 10477 }, { "epoch": 0.6, "grad_norm": 0.527301928692191, "learning_rate": 7.2197045179250395e-06, "loss": 0.2243, "step": 10478 }, { "epoch": 0.6, "grad_norm": 0.3561268181099593, "learning_rate": 7.217917024565124e-06, "loss": 0.2425, "step": 10479 }, { "epoch": 0.6, "grad_norm": 0.38431117081131044, "learning_rate": 7.216129627548625e-06, "loss": 0.271, "step": 10480 }, { "epoch": 0.6, "grad_norm": 0.35747867342515993, "learning_rate": 7.214342326937434e-06, "loss": 0.2597, "step": 10481 }, { "epoch": 0.6, "grad_norm": 0.3926495922665949, "learning_rate": 7.212555122793452e-06, "loss": 0.2489, "step": 10482 }, { "epoch": 0.6, "grad_norm": 0.4978920915555641, "learning_rate": 7.210768015178563e-06, "loss": 0.3426, "step": 10483 }, { "epoch": 0.6, "grad_norm": 0.7036657391861827, "learning_rate": 7.208981004154661e-06, "loss": 0.4456, "step": 10484 }, { "epoch": 0.6, "grad_norm": 0.26724477915832684, "learning_rate": 7.2071940897836235e-06, "loss": 0.2103, "step": 10485 }, { "epoch": 0.6, "grad_norm": 0.24639680661311067, "learning_rate": 7.205407272127336e-06, "loss": 0.181, "step": 10486 }, { "epoch": 0.6, "grad_norm": 1.1345625130187684, "learning_rate": 7.203620551247675e-06, "loss": 0.7537, "step": 10487 }, { "epoch": 0.6, "grad_norm": 0.32662252851637746, "learning_rate": 7.201833927206514e-06, "loss": 0.206, "step": 10488 }, { "epoch": 0.6, "grad_norm": 0.3779858157814377, "learning_rate": 7.200047400065722e-06, "loss": 0.2848, "step": 10489 }, { "epoch": 0.6, "grad_norm": 0.7855434602278417, "learning_rate": 7.198260969887171e-06, "loss": 0.4231, "step": 10490 }, { "epoch": 0.6, "grad_norm": 0.22071700621294713, "learning_rate": 7.196474636732722e-06, "loss": 0.1673, "step": 10491 }, { "epoch": 0.6, "grad_norm": 0.41724723056633095, "learning_rate": 7.194688400664232e-06, "loss": 0.2648, "step": 10492 }, { "epoch": 0.6, "grad_norm": 0.33254747854591676, "learning_rate": 7.192902261743566e-06, "loss": 0.3185, "step": 10493 }, { "epoch": 0.6, "grad_norm": 0.23978557568988323, "learning_rate": 7.191116220032572e-06, "loss": 0.0635, "step": 10494 }, { "epoch": 0.6, "grad_norm": 0.45877974911373315, "learning_rate": 7.189330275593104e-06, "loss": 0.2984, "step": 10495 }, { "epoch": 0.6, "grad_norm": 0.36561107076275134, "learning_rate": 7.187544428487006e-06, "loss": 0.2965, "step": 10496 }, { "epoch": 0.6, "grad_norm": 0.6271544762259579, "learning_rate": 7.1857586787761246e-06, "loss": 0.3096, "step": 10497 }, { "epoch": 0.6, "grad_norm": 0.22321373342280323, "learning_rate": 7.183973026522297e-06, "loss": 0.1597, "step": 10498 }, { "epoch": 0.6, "grad_norm": 1.1306791168483041, "learning_rate": 7.182187471787365e-06, "loss": 0.7098, "step": 10499 }, { "epoch": 0.6, "grad_norm": 0.5794004017247366, "learning_rate": 7.180402014633159e-06, "loss": 0.2908, "step": 10500 }, { "epoch": 0.6, "grad_norm": 0.27354208770268323, "learning_rate": 7.178616655121513e-06, "loss": 0.2454, "step": 10501 }, { "epoch": 0.6, "grad_norm": 1.158877404681521, "learning_rate": 7.176831393314248e-06, "loss": 0.6185, "step": 10502 }, { "epoch": 0.6, "grad_norm": 0.5729698953409218, "learning_rate": 7.175046229273191e-06, "loss": 0.3404, "step": 10503 }, { "epoch": 0.6, "grad_norm": 0.2731478718166219, "learning_rate": 7.17326116306016e-06, "loss": 0.196, "step": 10504 }, { "epoch": 0.6, "grad_norm": 0.34421103264516534, "learning_rate": 7.171476194736975e-06, "loss": 0.3172, "step": 10505 }, { "epoch": 0.6, "grad_norm": 0.4021245776566278, "learning_rate": 7.169691324365447e-06, "loss": 0.2735, "step": 10506 }, { "epoch": 0.6, "grad_norm": 0.4057698150407605, "learning_rate": 7.167906552007387e-06, "loss": 0.2176, "step": 10507 }, { "epoch": 0.6, "grad_norm": 0.39037278889321536, "learning_rate": 7.166121877724599e-06, "loss": 0.3269, "step": 10508 }, { "epoch": 0.6, "grad_norm": 0.42136169762460707, "learning_rate": 7.164337301578892e-06, "loss": 0.2538, "step": 10509 }, { "epoch": 0.6, "grad_norm": 0.3061269281320481, "learning_rate": 7.162552823632059e-06, "loss": 0.2145, "step": 10510 }, { "epoch": 0.6, "grad_norm": 0.6243819662075308, "learning_rate": 7.160768443945902e-06, "loss": 0.3622, "step": 10511 }, { "epoch": 0.6, "grad_norm": 0.39212060454543934, "learning_rate": 7.15898416258221e-06, "loss": 0.3269, "step": 10512 }, { "epoch": 0.6, "grad_norm": 0.29126800036976597, "learning_rate": 7.157199979602777e-06, "loss": 0.2787, "step": 10513 }, { "epoch": 0.6, "grad_norm": 0.4578357233133374, "learning_rate": 7.155415895069385e-06, "loss": 0.1526, "step": 10514 }, { "epoch": 0.6, "grad_norm": 0.7504728210353762, "learning_rate": 7.153631909043818e-06, "loss": 0.3463, "step": 10515 }, { "epoch": 0.6, "grad_norm": 0.39710388219731657, "learning_rate": 7.151848021587855e-06, "loss": 0.2964, "step": 10516 }, { "epoch": 0.6, "grad_norm": 0.3785129422319976, "learning_rate": 7.150064232763274e-06, "loss": 0.2846, "step": 10517 }, { "epoch": 0.6, "grad_norm": 0.5141174513877448, "learning_rate": 7.1482805426318465e-06, "loss": 0.314, "step": 10518 }, { "epoch": 0.6, "grad_norm": 0.2443119966552638, "learning_rate": 7.146496951255339e-06, "loss": 0.2207, "step": 10519 }, { "epoch": 0.6, "grad_norm": 0.30675794437787113, "learning_rate": 7.144713458695521e-06, "loss": 0.2244, "step": 10520 }, { "epoch": 0.6, "grad_norm": 0.5894451000930164, "learning_rate": 7.1429300650141505e-06, "loss": 0.3308, "step": 10521 }, { "epoch": 0.6, "grad_norm": 0.3686690194975854, "learning_rate": 7.141146770272993e-06, "loss": 0.3187, "step": 10522 }, { "epoch": 0.6, "grad_norm": 0.6833298869205651, "learning_rate": 7.139363574533797e-06, "loss": 0.3821, "step": 10523 }, { "epoch": 0.6, "grad_norm": 0.28139648241122606, "learning_rate": 7.137580477858319e-06, "loss": 0.231, "step": 10524 }, { "epoch": 0.6, "grad_norm": 0.41488090474727446, "learning_rate": 7.1357974803083044e-06, "loss": 0.3398, "step": 10525 }, { "epoch": 0.6, "grad_norm": 0.30323806929317104, "learning_rate": 7.134014581945501e-06, "loss": 0.1871, "step": 10526 }, { "epoch": 0.6, "grad_norm": 0.3600629089927518, "learning_rate": 7.132231782831649e-06, "loss": 0.1966, "step": 10527 }, { "epoch": 0.6, "grad_norm": 0.50493454952615, "learning_rate": 7.130449083028488e-06, "loss": 0.3759, "step": 10528 }, { "epoch": 0.6, "grad_norm": 0.4093641362730516, "learning_rate": 7.1286664825977505e-06, "loss": 0.3145, "step": 10529 }, { "epoch": 0.6, "grad_norm": 0.6043612819812036, "learning_rate": 7.1268839816011695e-06, "loss": 0.2636, "step": 10530 }, { "epoch": 0.61, "grad_norm": 0.24807553607158486, "learning_rate": 7.125101580100474e-06, "loss": 0.2093, "step": 10531 }, { "epoch": 0.61, "grad_norm": 0.2676598452867699, "learning_rate": 7.123319278157385e-06, "loss": 0.2648, "step": 10532 }, { "epoch": 0.61, "grad_norm": 0.5550732423895852, "learning_rate": 7.121537075833629e-06, "loss": 0.1338, "step": 10533 }, { "epoch": 0.61, "grad_norm": 0.36036269402932336, "learning_rate": 7.119754973190915e-06, "loss": 0.2997, "step": 10534 }, { "epoch": 0.61, "grad_norm": 0.9122378994044027, "learning_rate": 7.11797297029097e-06, "loss": 0.3795, "step": 10535 }, { "epoch": 0.61, "grad_norm": 0.3485600601021822, "learning_rate": 7.116191067195494e-06, "loss": 0.3105, "step": 10536 }, { "epoch": 0.61, "grad_norm": 0.30727339279065635, "learning_rate": 7.114409263966195e-06, "loss": 0.1943, "step": 10537 }, { "epoch": 0.61, "grad_norm": 0.2655701703598823, "learning_rate": 7.11262756066478e-06, "loss": 0.1737, "step": 10538 }, { "epoch": 0.61, "grad_norm": 0.6735221155033528, "learning_rate": 7.110845957352948e-06, "loss": 0.3591, "step": 10539 }, { "epoch": 0.61, "grad_norm": 0.27521311307189994, "learning_rate": 7.109064454092398e-06, "loss": 0.2416, "step": 10540 }, { "epoch": 0.61, "grad_norm": 0.854134216843187, "learning_rate": 7.1072830509448185e-06, "loss": 0.4508, "step": 10541 }, { "epoch": 0.61, "grad_norm": 0.6999782759176724, "learning_rate": 7.105501747971906e-06, "loss": 0.4854, "step": 10542 }, { "epoch": 0.61, "grad_norm": 0.3321506116907094, "learning_rate": 7.103720545235342e-06, "loss": 0.2268, "step": 10543 }, { "epoch": 0.61, "grad_norm": 0.233533515493403, "learning_rate": 7.10193944279681e-06, "loss": 0.2295, "step": 10544 }, { "epoch": 0.61, "grad_norm": 0.7011227141792372, "learning_rate": 7.100158440717993e-06, "loss": 0.3943, "step": 10545 }, { "epoch": 0.61, "grad_norm": 0.3676451737884998, "learning_rate": 7.098377539060562e-06, "loss": 0.273, "step": 10546 }, { "epoch": 0.61, "grad_norm": 0.7447326581606627, "learning_rate": 7.096596737886194e-06, "loss": 0.3583, "step": 10547 }, { "epoch": 0.61, "grad_norm": 0.32197795833723225, "learning_rate": 7.0948160372565534e-06, "loss": 0.2779, "step": 10548 }, { "epoch": 0.61, "grad_norm": 0.3593098276921128, "learning_rate": 7.093035437233311e-06, "loss": 0.2799, "step": 10549 }, { "epoch": 0.61, "grad_norm": 0.1792310565543837, "learning_rate": 7.091254937878125e-06, "loss": 0.084, "step": 10550 }, { "epoch": 0.61, "grad_norm": 1.0643531416224592, "learning_rate": 7.089474539252656e-06, "loss": 0.3914, "step": 10551 }, { "epoch": 0.61, "grad_norm": 0.250677757331347, "learning_rate": 7.087694241418558e-06, "loss": 0.2396, "step": 10552 }, { "epoch": 0.61, "grad_norm": 0.3975599441810498, "learning_rate": 7.085914044437485e-06, "loss": 0.2871, "step": 10553 }, { "epoch": 0.61, "grad_norm": 0.9224701306896524, "learning_rate": 7.084133948371081e-06, "loss": 0.6096, "step": 10554 }, { "epoch": 0.61, "grad_norm": 0.3374955882849087, "learning_rate": 7.082353953280995e-06, "loss": 0.2439, "step": 10555 }, { "epoch": 0.61, "grad_norm": 0.21337914040021594, "learning_rate": 7.080574059228866e-06, "loss": 0.1718, "step": 10556 }, { "epoch": 0.61, "grad_norm": 1.2038444194000044, "learning_rate": 7.07879426627633e-06, "loss": 0.3988, "step": 10557 }, { "epoch": 0.61, "grad_norm": 0.30269484417471876, "learning_rate": 7.077014574485025e-06, "loss": 0.24, "step": 10558 }, { "epoch": 0.61, "grad_norm": 1.2401651526461381, "learning_rate": 7.075234983916577e-06, "loss": 0.7724, "step": 10559 }, { "epoch": 0.61, "grad_norm": 0.30485211566640963, "learning_rate": 7.073455494632618e-06, "loss": 0.2483, "step": 10560 }, { "epoch": 0.61, "grad_norm": 0.46309273195641676, "learning_rate": 7.071676106694767e-06, "loss": 0.2658, "step": 10561 }, { "epoch": 0.61, "grad_norm": 0.32683809943240216, "learning_rate": 7.06989682016465e-06, "loss": 0.2232, "step": 10562 }, { "epoch": 0.61, "grad_norm": 0.7879418679957149, "learning_rate": 7.068117635103877e-06, "loss": 0.2834, "step": 10563 }, { "epoch": 0.61, "grad_norm": 0.3973588991375235, "learning_rate": 7.066338551574066e-06, "loss": 0.29, "step": 10564 }, { "epoch": 0.61, "grad_norm": 0.4464487646966575, "learning_rate": 7.064559569636824e-06, "loss": 0.3641, "step": 10565 }, { "epoch": 0.61, "grad_norm": 0.885248157283508, "learning_rate": 7.062780689353758e-06, "loss": 0.3694, "step": 10566 }, { "epoch": 0.61, "grad_norm": 0.3276928966908981, "learning_rate": 7.06100191078647e-06, "loss": 0.2578, "step": 10567 }, { "epoch": 0.61, "grad_norm": 0.35618267701930156, "learning_rate": 7.0592232339965664e-06, "loss": 0.3178, "step": 10568 }, { "epoch": 0.61, "grad_norm": 0.3768634547209685, "learning_rate": 7.057444659045627e-06, "loss": 0.2148, "step": 10569 }, { "epoch": 0.61, "grad_norm": 0.25590767906812234, "learning_rate": 7.055666185995256e-06, "loss": 0.2129, "step": 10570 }, { "epoch": 0.61, "grad_norm": 0.5079144727780354, "learning_rate": 7.053887814907036e-06, "loss": 0.3775, "step": 10571 }, { "epoch": 0.61, "grad_norm": 0.47995334935170597, "learning_rate": 7.0521095458425555e-06, "loss": 0.3491, "step": 10572 }, { "epoch": 0.61, "grad_norm": 0.3018031483350368, "learning_rate": 7.050331378863395e-06, "loss": 0.1862, "step": 10573 }, { "epoch": 0.61, "grad_norm": 0.7799879504359073, "learning_rate": 7.048553314031132e-06, "loss": 0.4413, "step": 10574 }, { "epoch": 0.61, "grad_norm": 0.34979493541325096, "learning_rate": 7.04677535140734e-06, "loss": 0.2612, "step": 10575 }, { "epoch": 0.61, "grad_norm": 0.2285568510508859, "learning_rate": 7.0449974910535916e-06, "loss": 0.1851, "step": 10576 }, { "epoch": 0.61, "grad_norm": 1.0304051087015562, "learning_rate": 7.043219733031452e-06, "loss": 0.627, "step": 10577 }, { "epoch": 0.61, "grad_norm": 1.0139546657916503, "learning_rate": 7.041442077402487e-06, "loss": 0.6937, "step": 10578 }, { "epoch": 0.61, "grad_norm": 0.30342367295446254, "learning_rate": 7.0396645242282535e-06, "loss": 0.2022, "step": 10579 }, { "epoch": 0.61, "grad_norm": 0.3986109508000254, "learning_rate": 7.037887073570313e-06, "loss": 0.3156, "step": 10580 }, { "epoch": 0.61, "grad_norm": 0.4877704840722932, "learning_rate": 7.036109725490214e-06, "loss": 0.2859, "step": 10581 }, { "epoch": 0.61, "grad_norm": 0.30519393626632985, "learning_rate": 7.03433248004951e-06, "loss": 0.1873, "step": 10582 }, { "epoch": 0.61, "grad_norm": 0.41057454427454104, "learning_rate": 7.032555337309743e-06, "loss": 0.3229, "step": 10583 }, { "epoch": 0.61, "grad_norm": 0.3539347623622069, "learning_rate": 7.030778297332457e-06, "loss": 0.3001, "step": 10584 }, { "epoch": 0.61, "grad_norm": 0.3887406352092294, "learning_rate": 7.0290013601791905e-06, "loss": 0.2601, "step": 10585 }, { "epoch": 0.61, "grad_norm": 0.47380536198285, "learning_rate": 7.027224525911479e-06, "loss": 0.2907, "step": 10586 }, { "epoch": 0.61, "grad_norm": 0.4561919825253446, "learning_rate": 7.025447794590856e-06, "loss": 0.3529, "step": 10587 }, { "epoch": 0.61, "grad_norm": 0.24317420310642943, "learning_rate": 7.023671166278845e-06, "loss": 0.2204, "step": 10588 }, { "epoch": 0.61, "grad_norm": 0.30300578946259576, "learning_rate": 7.021894641036977e-06, "loss": 0.1961, "step": 10589 }, { "epoch": 0.61, "grad_norm": 0.7928562012811823, "learning_rate": 7.020118218926767e-06, "loss": 0.5509, "step": 10590 }, { "epoch": 0.61, "grad_norm": 0.31281127151054094, "learning_rate": 7.018341900009738e-06, "loss": 0.2595, "step": 10591 }, { "epoch": 0.61, "grad_norm": 0.378034705279621, "learning_rate": 7.0165656843473965e-06, "loss": 0.2803, "step": 10592 }, { "epoch": 0.61, "grad_norm": 0.9236198996788825, "learning_rate": 7.0147895720012596e-06, "loss": 0.5131, "step": 10593 }, { "epoch": 0.61, "grad_norm": 0.24966395258526514, "learning_rate": 7.01301356303283e-06, "loss": 0.1917, "step": 10594 }, { "epoch": 0.61, "grad_norm": 0.9200006479669353, "learning_rate": 7.011237657503615e-06, "loss": 0.5364, "step": 10595 }, { "epoch": 0.61, "grad_norm": 0.2708229012641202, "learning_rate": 7.009461855475111e-06, "loss": 0.2601, "step": 10596 }, { "epoch": 0.61, "grad_norm": 0.42431345222551275, "learning_rate": 7.00768615700881e-06, "loss": 0.3102, "step": 10597 }, { "epoch": 0.61, "grad_norm": 0.49933535085134173, "learning_rate": 7.005910562166213e-06, "loss": 0.3538, "step": 10598 }, { "epoch": 0.61, "grad_norm": 0.32711074269410184, "learning_rate": 7.004135071008803e-06, "loss": 0.244, "step": 10599 }, { "epoch": 0.61, "grad_norm": 0.3911638518785674, "learning_rate": 7.0023596835980676e-06, "loss": 0.2621, "step": 10600 }, { "epoch": 0.61, "grad_norm": 0.549319709524697, "learning_rate": 7.000584399995486e-06, "loss": 0.4431, "step": 10601 }, { "epoch": 0.61, "grad_norm": 0.4383392566950631, "learning_rate": 6.998809220262541e-06, "loss": 0.2549, "step": 10602 }, { "epoch": 0.61, "grad_norm": 0.39079099029956776, "learning_rate": 6.997034144460702e-06, "loss": 0.2778, "step": 10603 }, { "epoch": 0.61, "grad_norm": 0.24881046372954682, "learning_rate": 6.995259172651441e-06, "loss": 0.2452, "step": 10604 }, { "epoch": 0.61, "grad_norm": 1.0911990822931978, "learning_rate": 6.993484304896225e-06, "loss": 0.2585, "step": 10605 }, { "epoch": 0.61, "grad_norm": 0.3859135519020631, "learning_rate": 6.991709541256517e-06, "loss": 0.271, "step": 10606 }, { "epoch": 0.61, "grad_norm": 0.38547240651856923, "learning_rate": 6.98993488179378e-06, "loss": 0.328, "step": 10607 }, { "epoch": 0.61, "grad_norm": 0.496427375438408, "learning_rate": 6.988160326569471e-06, "loss": 0.2881, "step": 10608 }, { "epoch": 0.61, "grad_norm": 0.2807034932115483, "learning_rate": 6.986385875645036e-06, "loss": 0.2477, "step": 10609 }, { "epoch": 0.61, "grad_norm": 0.36666726706200375, "learning_rate": 6.984611529081931e-06, "loss": 0.2671, "step": 10610 }, { "epoch": 0.61, "grad_norm": 0.3784004456555454, "learning_rate": 6.982837286941598e-06, "loss": 0.2992, "step": 10611 }, { "epoch": 0.61, "grad_norm": 0.2947344402549206, "learning_rate": 6.981063149285481e-06, "loss": 0.1862, "step": 10612 }, { "epoch": 0.61, "grad_norm": 1.2011332586742978, "learning_rate": 6.979289116175014e-06, "loss": 0.7797, "step": 10613 }, { "epoch": 0.61, "grad_norm": 0.6269447977839719, "learning_rate": 6.977515187671639e-06, "loss": 0.3863, "step": 10614 }, { "epoch": 0.61, "grad_norm": 0.28525954215061056, "learning_rate": 6.975741363836781e-06, "loss": 0.2034, "step": 10615 }, { "epoch": 0.61, "grad_norm": 0.23251089488001686, "learning_rate": 6.973967644731872e-06, "loss": 0.2167, "step": 10616 }, { "epoch": 0.61, "grad_norm": 1.136268898483301, "learning_rate": 6.972194030418329e-06, "loss": 0.4865, "step": 10617 }, { "epoch": 0.61, "grad_norm": 0.35214424867594674, "learning_rate": 6.97042052095758e-06, "loss": 0.1561, "step": 10618 }, { "epoch": 0.61, "grad_norm": 0.3186430841898647, "learning_rate": 6.968647116411036e-06, "loss": 0.2863, "step": 10619 }, { "epoch": 0.61, "grad_norm": 0.5067307018719389, "learning_rate": 6.966873816840114e-06, "loss": 0.3595, "step": 10620 }, { "epoch": 0.61, "grad_norm": 0.33263369625653055, "learning_rate": 6.96510062230622e-06, "loss": 0.1733, "step": 10621 }, { "epoch": 0.61, "grad_norm": 0.23286758847398575, "learning_rate": 6.963327532870763e-06, "loss": 0.1742, "step": 10622 }, { "epoch": 0.61, "grad_norm": 0.4730282585689458, "learning_rate": 6.961554548595142e-06, "loss": 0.326, "step": 10623 }, { "epoch": 0.61, "grad_norm": 0.3619899467460046, "learning_rate": 6.959781669540754e-06, "loss": 0.2461, "step": 10624 }, { "epoch": 0.61, "grad_norm": 0.3841805210107079, "learning_rate": 6.958008895769e-06, "loss": 0.2792, "step": 10625 }, { "epoch": 0.61, "grad_norm": 0.7631851504994283, "learning_rate": 6.956236227341262e-06, "loss": 0.5202, "step": 10626 }, { "epoch": 0.61, "grad_norm": 0.2999124611334779, "learning_rate": 6.954463664318937e-06, "loss": 0.2546, "step": 10627 }, { "epoch": 0.61, "grad_norm": 0.24820162596488865, "learning_rate": 6.952691206763402e-06, "loss": 0.1693, "step": 10628 }, { "epoch": 0.61, "grad_norm": 1.2347189617039587, "learning_rate": 6.950918854736041e-06, "loss": 0.5083, "step": 10629 }, { "epoch": 0.61, "grad_norm": 0.5522730367103457, "learning_rate": 6.949146608298227e-06, "loss": 0.3568, "step": 10630 }, { "epoch": 0.61, "grad_norm": 0.3104954162518697, "learning_rate": 6.947374467511336e-06, "loss": 0.2486, "step": 10631 }, { "epoch": 0.61, "grad_norm": 0.4520113873626869, "learning_rate": 6.945602432436736e-06, "loss": 0.3982, "step": 10632 }, { "epoch": 0.61, "grad_norm": 0.4093916598564017, "learning_rate": 6.9438305031357935e-06, "loss": 0.2927, "step": 10633 }, { "epoch": 0.61, "grad_norm": 0.3338504922517524, "learning_rate": 6.9420586796698655e-06, "loss": 0.219, "step": 10634 }, { "epoch": 0.61, "grad_norm": 0.25665018793378996, "learning_rate": 6.940286962100318e-06, "loss": 0.1841, "step": 10635 }, { "epoch": 0.61, "grad_norm": 0.6598078007493531, "learning_rate": 6.938515350488503e-06, "loss": 0.4409, "step": 10636 }, { "epoch": 0.61, "grad_norm": 0.3300845858986062, "learning_rate": 6.936743844895768e-06, "loss": 0.2797, "step": 10637 }, { "epoch": 0.61, "grad_norm": 0.4680762857712185, "learning_rate": 6.934972445383459e-06, "loss": 0.2779, "step": 10638 }, { "epoch": 0.61, "grad_norm": 0.4200145663298372, "learning_rate": 6.933201152012925e-06, "loss": 0.3078, "step": 10639 }, { "epoch": 0.61, "grad_norm": 0.27962340097798727, "learning_rate": 6.931429964845501e-06, "loss": 0.2241, "step": 10640 }, { "epoch": 0.61, "grad_norm": 0.53746389745485, "learning_rate": 6.929658883942527e-06, "loss": 0.1577, "step": 10641 }, { "epoch": 0.61, "grad_norm": 0.4959989835213774, "learning_rate": 6.927887909365333e-06, "loss": 0.3364, "step": 10642 }, { "epoch": 0.61, "grad_norm": 0.29723094294413305, "learning_rate": 6.92611704117525e-06, "loss": 0.2759, "step": 10643 }, { "epoch": 0.61, "grad_norm": 0.8817731319299101, "learning_rate": 6.924346279433599e-06, "loss": 0.4836, "step": 10644 }, { "epoch": 0.61, "grad_norm": 0.603693846570787, "learning_rate": 6.922575624201706e-06, "loss": 0.379, "step": 10645 }, { "epoch": 0.61, "grad_norm": 0.31016181140642807, "learning_rate": 6.920805075540886e-06, "loss": 0.2332, "step": 10646 }, { "epoch": 0.61, "grad_norm": 0.27221079855815977, "learning_rate": 6.919034633512456e-06, "loss": 0.2433, "step": 10647 }, { "epoch": 0.61, "grad_norm": 0.6010295063712859, "learning_rate": 6.917264298177724e-06, "loss": 0.267, "step": 10648 }, { "epoch": 0.61, "grad_norm": 0.4000427804407078, "learning_rate": 6.915494069597993e-06, "loss": 0.3225, "step": 10649 }, { "epoch": 0.61, "grad_norm": 1.0407707069428398, "learning_rate": 6.913723947834574e-06, "loss": 0.6438, "step": 10650 }, { "epoch": 0.61, "grad_norm": 0.2765627603584837, "learning_rate": 6.9119539329487585e-06, "loss": 0.2274, "step": 10651 }, { "epoch": 0.61, "grad_norm": 0.5224307700746306, "learning_rate": 6.9101840250018485e-06, "loss": 0.3428, "step": 10652 }, { "epoch": 0.61, "grad_norm": 0.35898777209765914, "learning_rate": 6.908414224055129e-06, "loss": 0.2134, "step": 10653 }, { "epoch": 0.61, "grad_norm": 0.7316390060143079, "learning_rate": 6.906644530169896e-06, "loss": 0.2693, "step": 10654 }, { "epoch": 0.61, "grad_norm": 0.2562741289721085, "learning_rate": 6.904874943407427e-06, "loss": 0.2569, "step": 10655 }, { "epoch": 0.61, "grad_norm": 1.057082723143132, "learning_rate": 6.903105463829007e-06, "loss": 0.7312, "step": 10656 }, { "epoch": 0.61, "grad_norm": 0.6832390986635581, "learning_rate": 6.901336091495912e-06, "loss": 0.159, "step": 10657 }, { "epoch": 0.61, "grad_norm": 0.2674851553546089, "learning_rate": 6.899566826469415e-06, "loss": 0.2218, "step": 10658 }, { "epoch": 0.61, "grad_norm": 0.3579968138322811, "learning_rate": 6.897797668810784e-06, "loss": 0.2911, "step": 10659 }, { "epoch": 0.61, "grad_norm": 0.5464163869475679, "learning_rate": 6.896028618581287e-06, "loss": 0.3132, "step": 10660 }, { "epoch": 0.61, "grad_norm": 0.3000321311735532, "learning_rate": 6.894259675842188e-06, "loss": 0.2106, "step": 10661 }, { "epoch": 0.61, "grad_norm": 1.2299271449853884, "learning_rate": 6.892490840654739e-06, "loss": 0.8311, "step": 10662 }, { "epoch": 0.61, "grad_norm": 0.30179860641260736, "learning_rate": 6.890722113080201e-06, "loss": 0.2821, "step": 10663 }, { "epoch": 0.61, "grad_norm": 0.34845039622272467, "learning_rate": 6.888953493179819e-06, "loss": 0.2284, "step": 10664 }, { "epoch": 0.61, "grad_norm": 0.5995945108100336, "learning_rate": 6.88718498101485e-06, "loss": 0.3831, "step": 10665 }, { "epoch": 0.61, "grad_norm": 0.25283872062952506, "learning_rate": 6.885416576646525e-06, "loss": 0.1791, "step": 10666 }, { "epoch": 0.61, "grad_norm": 0.2785774684194344, "learning_rate": 6.883648280136094e-06, "loss": 0.223, "step": 10667 }, { "epoch": 0.61, "grad_norm": 1.1132804366543598, "learning_rate": 6.881880091544786e-06, "loss": 0.7453, "step": 10668 }, { "epoch": 0.61, "grad_norm": 0.8623000767888052, "learning_rate": 6.880112010933839e-06, "loss": 0.5088, "step": 10669 }, { "epoch": 0.61, "grad_norm": 0.3251227489998159, "learning_rate": 6.878344038364481e-06, "loss": 0.2274, "step": 10670 }, { "epoch": 0.61, "grad_norm": 0.36645085483371054, "learning_rate": 6.8765761738979305e-06, "loss": 0.3253, "step": 10671 }, { "epoch": 0.61, "grad_norm": 0.26849276242470155, "learning_rate": 6.874808417595415e-06, "loss": 0.1771, "step": 10672 }, { "epoch": 0.61, "grad_norm": 0.32862638825590396, "learning_rate": 6.87304076951815e-06, "loss": 0.2591, "step": 10673 }, { "epoch": 0.61, "grad_norm": 0.4298652986509392, "learning_rate": 6.871273229727346e-06, "loss": 0.2987, "step": 10674 }, { "epoch": 0.61, "grad_norm": 0.5045099892284276, "learning_rate": 6.869505798284217e-06, "loss": 0.3762, "step": 10675 }, { "epoch": 0.61, "grad_norm": 0.32738198225492715, "learning_rate": 6.867738475249967e-06, "loss": 0.28, "step": 10676 }, { "epoch": 0.61, "grad_norm": 0.6053803182315074, "learning_rate": 6.8659712606858e-06, "loss": 0.292, "step": 10677 }, { "epoch": 0.61, "grad_norm": 0.2784146187816311, "learning_rate": 6.8642041546529115e-06, "loss": 0.2397, "step": 10678 }, { "epoch": 0.61, "grad_norm": 0.25915374026684634, "learning_rate": 6.8624371572125e-06, "loss": 0.1908, "step": 10679 }, { "epoch": 0.61, "grad_norm": 1.1034569399626564, "learning_rate": 6.860670268425754e-06, "loss": 0.5397, "step": 10680 }, { "epoch": 0.61, "grad_norm": 0.6580196847714953, "learning_rate": 6.858903488353863e-06, "loss": 0.3914, "step": 10681 }, { "epoch": 0.61, "grad_norm": 0.3087985735861544, "learning_rate": 6.857136817058007e-06, "loss": 0.2731, "step": 10682 }, { "epoch": 0.61, "grad_norm": 0.30721390171367746, "learning_rate": 6.855370254599369e-06, "loss": 0.2289, "step": 10683 }, { "epoch": 0.61, "grad_norm": 0.22683232782377963, "learning_rate": 6.853603801039124e-06, "loss": 0.156, "step": 10684 }, { "epoch": 0.61, "grad_norm": 0.3619861963806639, "learning_rate": 6.8518374564384434e-06, "loss": 0.2869, "step": 10685 }, { "epoch": 0.61, "grad_norm": 1.2769919962248444, "learning_rate": 6.850071220858496e-06, "loss": 0.6362, "step": 10686 }, { "epoch": 0.61, "grad_norm": 0.36211924335233864, "learning_rate": 6.84830509436045e-06, "loss": 0.2483, "step": 10687 }, { "epoch": 0.61, "grad_norm": 0.372473918353896, "learning_rate": 6.846539077005461e-06, "loss": 0.2733, "step": 10688 }, { "epoch": 0.61, "grad_norm": 0.7227237641851568, "learning_rate": 6.844773168854686e-06, "loss": 0.4258, "step": 10689 }, { "epoch": 0.61, "grad_norm": 0.25478526632383197, "learning_rate": 6.843007369969283e-06, "loss": 0.1721, "step": 10690 }, { "epoch": 0.61, "grad_norm": 0.2592583923802607, "learning_rate": 6.841241680410398e-06, "loss": 0.2386, "step": 10691 }, { "epoch": 0.61, "grad_norm": 0.417897430422118, "learning_rate": 6.83947610023918e-06, "loss": 0.3193, "step": 10692 }, { "epoch": 0.61, "grad_norm": 0.8573806813691548, "learning_rate": 6.837710629516765e-06, "loss": 0.3226, "step": 10693 }, { "epoch": 0.61, "grad_norm": 0.32550275471629125, "learning_rate": 6.835945268304298e-06, "loss": 0.2556, "step": 10694 }, { "epoch": 0.61, "grad_norm": 0.3535437138265614, "learning_rate": 6.834180016662908e-06, "loss": 0.3155, "step": 10695 }, { "epoch": 0.61, "grad_norm": 0.9949706011291491, "learning_rate": 6.8324148746537286e-06, "loss": 0.3106, "step": 10696 }, { "epoch": 0.61, "grad_norm": 0.31547765324720395, "learning_rate": 6.830649842337885e-06, "loss": 0.2539, "step": 10697 }, { "epoch": 0.61, "grad_norm": 1.0524343916133838, "learning_rate": 6.828884919776504e-06, "loss": 0.5325, "step": 10698 }, { "epoch": 0.61, "grad_norm": 0.44576274356069756, "learning_rate": 6.827120107030698e-06, "loss": 0.3259, "step": 10699 }, { "epoch": 0.61, "grad_norm": 0.22639385024816566, "learning_rate": 6.82535540416159e-06, "loss": 0.1372, "step": 10700 }, { "epoch": 0.61, "grad_norm": 1.1446799361802587, "learning_rate": 6.823590811230287e-06, "loss": 0.6339, "step": 10701 }, { "epoch": 0.61, "grad_norm": 0.3551463442681128, "learning_rate": 6.821826328297896e-06, "loss": 0.3097, "step": 10702 }, { "epoch": 0.61, "grad_norm": 0.34226727600361334, "learning_rate": 6.820061955425527e-06, "loss": 0.1847, "step": 10703 }, { "epoch": 0.61, "grad_norm": 0.5462335453113937, "learning_rate": 6.818297692674273e-06, "loss": 0.3826, "step": 10704 }, { "epoch": 0.62, "grad_norm": 0.41512218730788686, "learning_rate": 6.81653354010523e-06, "loss": 0.2327, "step": 10705 }, { "epoch": 0.62, "grad_norm": 0.2849103841623909, "learning_rate": 6.8147694977794975e-06, "loss": 0.1901, "step": 10706 }, { "epoch": 0.62, "grad_norm": 0.35804096430942806, "learning_rate": 6.813005565758158e-06, "loss": 0.3045, "step": 10707 }, { "epoch": 0.62, "grad_norm": 0.959727916273778, "learning_rate": 6.8112417441022995e-06, "loss": 0.5161, "step": 10708 }, { "epoch": 0.62, "grad_norm": 0.41141512882499015, "learning_rate": 6.809478032873002e-06, "loss": 0.3103, "step": 10709 }, { "epoch": 0.62, "grad_norm": 0.28673660446727955, "learning_rate": 6.807714432131343e-06, "loss": 0.2599, "step": 10710 }, { "epoch": 0.62, "grad_norm": 0.6706818173645254, "learning_rate": 6.805950941938395e-06, "loss": 0.4362, "step": 10711 }, { "epoch": 0.62, "grad_norm": 0.2853954047662548, "learning_rate": 6.804187562355231e-06, "loss": 0.2041, "step": 10712 }, { "epoch": 0.62, "grad_norm": 0.3040463174783864, "learning_rate": 6.802424293442914e-06, "loss": 0.1679, "step": 10713 }, { "epoch": 0.62, "grad_norm": 0.3824192547178403, "learning_rate": 6.800661135262505e-06, "loss": 0.3014, "step": 10714 }, { "epoch": 0.62, "grad_norm": 0.3310073163699963, "learning_rate": 6.7988980878750636e-06, "loss": 0.2803, "step": 10715 }, { "epoch": 0.62, "grad_norm": 0.900982088435711, "learning_rate": 6.797135151341643e-06, "loss": 0.3219, "step": 10716 }, { "epoch": 0.62, "grad_norm": 0.42156896906947466, "learning_rate": 6.7953723257232955e-06, "loss": 0.3128, "step": 10717 }, { "epoch": 0.62, "grad_norm": 0.2688101831184305, "learning_rate": 6.793609611081064e-06, "loss": 0.2541, "step": 10718 }, { "epoch": 0.62, "grad_norm": 0.3140732036560362, "learning_rate": 6.791847007475998e-06, "loss": 0.1823, "step": 10719 }, { "epoch": 0.62, "grad_norm": 1.0008202280028524, "learning_rate": 6.7900845149691285e-06, "loss": 0.631, "step": 10720 }, { "epoch": 0.62, "grad_norm": 0.42576299687385705, "learning_rate": 6.7883221336214965e-06, "loss": 0.2948, "step": 10721 }, { "epoch": 0.62, "grad_norm": 0.42623039582871874, "learning_rate": 6.7865598634941295e-06, "loss": 0.3172, "step": 10722 }, { "epoch": 0.62, "grad_norm": 0.45379008885941363, "learning_rate": 6.784797704648058e-06, "loss": 0.2807, "step": 10723 }, { "epoch": 0.62, "grad_norm": 0.3814091792954231, "learning_rate": 6.7830356571443016e-06, "loss": 0.2453, "step": 10724 }, { "epoch": 0.62, "grad_norm": 0.2705471683167067, "learning_rate": 6.7812737210438836e-06, "loss": 0.1876, "step": 10725 }, { "epoch": 0.62, "grad_norm": 0.3377882881037869, "learning_rate": 6.77951189640782e-06, "loss": 0.2592, "step": 10726 }, { "epoch": 0.62, "grad_norm": 0.3912153744933908, "learning_rate": 6.777750183297117e-06, "loss": 0.26, "step": 10727 }, { "epoch": 0.62, "grad_norm": 0.43342392314247236, "learning_rate": 6.77598858177279e-06, "loss": 0.3417, "step": 10728 }, { "epoch": 0.62, "grad_norm": 0.9386708011142606, "learning_rate": 6.774227091895835e-06, "loss": 0.3769, "step": 10729 }, { "epoch": 0.62, "grad_norm": 0.3215869104097005, "learning_rate": 6.772465713727262e-06, "loss": 0.2572, "step": 10730 }, { "epoch": 0.62, "grad_norm": 0.21444487048175678, "learning_rate": 6.77070444732806e-06, "loss": 0.1924, "step": 10731 }, { "epoch": 0.62, "grad_norm": 0.7105654768008676, "learning_rate": 6.768943292759226e-06, "loss": 0.3145, "step": 10732 }, { "epoch": 0.62, "grad_norm": 0.8871355072259255, "learning_rate": 6.767182250081744e-06, "loss": 0.2807, "step": 10733 }, { "epoch": 0.62, "grad_norm": 0.36136097231280245, "learning_rate": 6.765421319356605e-06, "loss": 0.3122, "step": 10734 }, { "epoch": 0.62, "grad_norm": 0.45744692480747945, "learning_rate": 6.763660500644783e-06, "loss": 0.3692, "step": 10735 }, { "epoch": 0.62, "grad_norm": 0.31629793854850224, "learning_rate": 6.761899794007262e-06, "loss": 0.2086, "step": 10736 }, { "epoch": 0.62, "grad_norm": 0.25178578119165373, "learning_rate": 6.760139199505014e-06, "loss": 0.1469, "step": 10737 }, { "epoch": 0.62, "grad_norm": 0.3273390465451645, "learning_rate": 6.758378717199004e-06, "loss": 0.2983, "step": 10738 }, { "epoch": 0.62, "grad_norm": 0.58071175528015, "learning_rate": 6.756618347150196e-06, "loss": 0.2238, "step": 10739 }, { "epoch": 0.62, "grad_norm": 0.3900166059812892, "learning_rate": 6.7548580894195585e-06, "loss": 0.3206, "step": 10740 }, { "epoch": 0.62, "grad_norm": 2.5014095471100632, "learning_rate": 6.753097944068043e-06, "loss": 0.6091, "step": 10741 }, { "epoch": 0.62, "grad_norm": 0.27374873890951196, "learning_rate": 6.7513379111566105e-06, "loss": 0.2153, "step": 10742 }, { "epoch": 0.62, "grad_norm": 0.2331727972943417, "learning_rate": 6.749577990746202e-06, "loss": 0.2058, "step": 10743 }, { "epoch": 0.62, "grad_norm": 0.6106539407307946, "learning_rate": 6.74781818289777e-06, "loss": 0.3983, "step": 10744 }, { "epoch": 0.62, "grad_norm": 0.3191253890892391, "learning_rate": 6.746058487672253e-06, "loss": 0.1622, "step": 10745 }, { "epoch": 0.62, "grad_norm": 0.2832733787647948, "learning_rate": 6.744298905130593e-06, "loss": 0.2955, "step": 10746 }, { "epoch": 0.62, "grad_norm": 1.2000591687611528, "learning_rate": 6.74253943533372e-06, "loss": 0.7825, "step": 10747 }, { "epoch": 0.62, "grad_norm": 0.5827279940418081, "learning_rate": 6.740780078342568e-06, "loss": 0.304, "step": 10748 }, { "epoch": 0.62, "grad_norm": 0.2421944177584202, "learning_rate": 6.7390208342180595e-06, "loss": 0.1781, "step": 10749 }, { "epoch": 0.62, "grad_norm": 0.3432118231078927, "learning_rate": 6.737261703021123e-06, "loss": 0.3119, "step": 10750 }, { "epoch": 0.62, "grad_norm": 0.6051458957393929, "learning_rate": 6.735502684812669e-06, "loss": 0.371, "step": 10751 }, { "epoch": 0.62, "grad_norm": 0.4037052100199354, "learning_rate": 6.73374377965362e-06, "loss": 0.2492, "step": 10752 }, { "epoch": 0.62, "grad_norm": 0.9394090165245456, "learning_rate": 6.731984987604882e-06, "loss": 0.5348, "step": 10753 }, { "epoch": 0.62, "grad_norm": 0.29799092639937913, "learning_rate": 6.730226308727363e-06, "loss": 0.2664, "step": 10754 }, { "epoch": 0.62, "grad_norm": 0.44911316464676126, "learning_rate": 6.728467743081968e-06, "loss": 0.2381, "step": 10755 }, { "epoch": 0.62, "grad_norm": 0.3233394168263766, "learning_rate": 6.726709290729592e-06, "loss": 0.2382, "step": 10756 }, { "epoch": 0.62, "grad_norm": 0.5916566364028347, "learning_rate": 6.724950951731135e-06, "loss": 0.3323, "step": 10757 }, { "epoch": 0.62, "grad_norm": 0.2714572194543101, "learning_rate": 6.723192726147482e-06, "loss": 0.2394, "step": 10758 }, { "epoch": 0.62, "grad_norm": 1.0438216384131889, "learning_rate": 6.721434614039528e-06, "loss": 0.6214, "step": 10759 }, { "epoch": 0.62, "grad_norm": 0.8915962859834544, "learning_rate": 6.719676615468149e-06, "loss": 0.4534, "step": 10760 }, { "epoch": 0.62, "grad_norm": 0.3335692130553307, "learning_rate": 6.717918730494231e-06, "loss": 0.2893, "step": 10761 }, { "epoch": 0.62, "grad_norm": 0.23904715452470016, "learning_rate": 6.716160959178644e-06, "loss": 0.2044, "step": 10762 }, { "epoch": 0.62, "grad_norm": 0.77472616741585, "learning_rate": 6.714403301582263e-06, "loss": 0.4212, "step": 10763 }, { "epoch": 0.62, "grad_norm": 0.3341935232637106, "learning_rate": 6.712645757765952e-06, "loss": 0.272, "step": 10764 }, { "epoch": 0.62, "grad_norm": 0.8951143831388909, "learning_rate": 6.710888327790581e-06, "loss": 0.3213, "step": 10765 }, { "epoch": 0.62, "grad_norm": 0.29522917110829133, "learning_rate": 6.709131011717005e-06, "loss": 0.2728, "step": 10766 }, { "epoch": 0.62, "grad_norm": 0.3995050553221213, "learning_rate": 6.707373809606077e-06, "loss": 0.3016, "step": 10767 }, { "epoch": 0.62, "grad_norm": 0.33594474411352, "learning_rate": 6.705616721518655e-06, "loss": 0.1888, "step": 10768 }, { "epoch": 0.62, "grad_norm": 0.2893697612282927, "learning_rate": 6.703859747515584e-06, "loss": 0.1952, "step": 10769 }, { "epoch": 0.62, "grad_norm": 0.27035597075734846, "learning_rate": 6.702102887657709e-06, "loss": 0.2757, "step": 10770 }, { "epoch": 0.62, "grad_norm": 0.8535729454967663, "learning_rate": 6.7003461420058715e-06, "loss": 0.3506, "step": 10771 }, { "epoch": 0.62, "grad_norm": 0.5549308852351893, "learning_rate": 6.6985895106209005e-06, "loss": 0.3496, "step": 10772 }, { "epoch": 0.62, "grad_norm": 0.36265593609148106, "learning_rate": 6.696832993563636e-06, "loss": 0.2992, "step": 10773 }, { "epoch": 0.62, "grad_norm": 0.3523666206537711, "learning_rate": 6.695076590894899e-06, "loss": 0.2899, "step": 10774 }, { "epoch": 0.62, "grad_norm": 0.2912704248423908, "learning_rate": 6.693320302675521e-06, "loss": 0.1405, "step": 10775 }, { "epoch": 0.62, "grad_norm": 0.3451346608345089, "learning_rate": 6.6915641289663154e-06, "loss": 0.2796, "step": 10776 }, { "epoch": 0.62, "grad_norm": 0.3192817030035515, "learning_rate": 6.689808069828105e-06, "loss": 0.2486, "step": 10777 }, { "epoch": 0.62, "grad_norm": 0.38933740283548857, "learning_rate": 6.688052125321698e-06, "loss": 0.2439, "step": 10778 }, { "epoch": 0.62, "grad_norm": 0.3694365237432283, "learning_rate": 6.686296295507903e-06, "loss": 0.2851, "step": 10779 }, { "epoch": 0.62, "grad_norm": 0.4782734090964818, "learning_rate": 6.684540580447525e-06, "loss": 0.2538, "step": 10780 }, { "epoch": 0.62, "grad_norm": 0.4149927816314323, "learning_rate": 6.682784980201363e-06, "loss": 0.2624, "step": 10781 }, { "epoch": 0.62, "grad_norm": 0.2933724186560608, "learning_rate": 6.6810294948302165e-06, "loss": 0.2648, "step": 10782 }, { "epoch": 0.62, "grad_norm": 0.46882515485188336, "learning_rate": 6.679274124394874e-06, "loss": 0.2259, "step": 10783 }, { "epoch": 0.62, "grad_norm": 0.5844321532824087, "learning_rate": 6.677518868956128e-06, "loss": 0.2892, "step": 10784 }, { "epoch": 0.62, "grad_norm": 0.29395991528266874, "learning_rate": 6.675763728574758e-06, "loss": 0.2712, "step": 10785 }, { "epoch": 0.62, "grad_norm": 0.47841665979336606, "learning_rate": 6.67400870331155e-06, "loss": 0.3708, "step": 10786 }, { "epoch": 0.62, "grad_norm": 0.7244194775477389, "learning_rate": 6.672253793227273e-06, "loss": 0.4866, "step": 10787 }, { "epoch": 0.62, "grad_norm": 0.3039119401991573, "learning_rate": 6.670498998382708e-06, "loss": 0.2247, "step": 10788 }, { "epoch": 0.62, "grad_norm": 0.35386920706771574, "learning_rate": 6.668744318838618e-06, "loss": 0.2584, "step": 10789 }, { "epoch": 0.62, "grad_norm": 0.31571310808178393, "learning_rate": 6.66698975465577e-06, "loss": 0.2504, "step": 10790 }, { "epoch": 0.62, "grad_norm": 0.3214123531686496, "learning_rate": 6.665235305894925e-06, "loss": 0.2037, "step": 10791 }, { "epoch": 0.62, "grad_norm": 0.902925238719976, "learning_rate": 6.663480972616835e-06, "loss": 0.4962, "step": 10792 }, { "epoch": 0.62, "grad_norm": 0.3389188225905232, "learning_rate": 6.661726754882256e-06, "loss": 0.2932, "step": 10793 }, { "epoch": 0.62, "grad_norm": 0.3137494677922765, "learning_rate": 6.659972652751936e-06, "loss": 0.2132, "step": 10794 }, { "epoch": 0.62, "grad_norm": 0.39839423898137716, "learning_rate": 6.658218666286621e-06, "loss": 0.3217, "step": 10795 }, { "epoch": 0.62, "grad_norm": 0.40810260556052064, "learning_rate": 6.656464795547048e-06, "loss": 0.2755, "step": 10796 }, { "epoch": 0.62, "grad_norm": 0.34635085809256516, "learning_rate": 6.654711040593957e-06, "loss": 0.2657, "step": 10797 }, { "epoch": 0.62, "grad_norm": 0.3303025437976195, "learning_rate": 6.652957401488076e-06, "loss": 0.2489, "step": 10798 }, { "epoch": 0.62, "grad_norm": 1.1771107826528375, "learning_rate": 6.651203878290139e-06, "loss": 0.7729, "step": 10799 }, { "epoch": 0.62, "grad_norm": 0.3086966114240212, "learning_rate": 6.649450471060865e-06, "loss": 0.245, "step": 10800 }, { "epoch": 0.62, "grad_norm": 0.42800613630194834, "learning_rate": 6.64769717986098e-06, "loss": 0.2697, "step": 10801 }, { "epoch": 0.62, "grad_norm": 0.26776956632235843, "learning_rate": 6.6459440047511955e-06, "loss": 0.2168, "step": 10802 }, { "epoch": 0.62, "grad_norm": 0.3370355453206775, "learning_rate": 6.6441909457922286e-06, "loss": 0.2665, "step": 10803 }, { "epoch": 0.62, "grad_norm": 1.0228746361906365, "learning_rate": 6.642438003044781e-06, "loss": 0.3131, "step": 10804 }, { "epoch": 0.62, "grad_norm": 0.35770512175069535, "learning_rate": 6.640685176569568e-06, "loss": 0.324, "step": 10805 }, { "epoch": 0.62, "grad_norm": 0.31252442730763696, "learning_rate": 6.638932466427277e-06, "loss": 0.248, "step": 10806 }, { "epoch": 0.62, "grad_norm": 0.7175209362515863, "learning_rate": 6.637179872678612e-06, "loss": 0.3296, "step": 10807 }, { "epoch": 0.62, "grad_norm": 0.23272726866411692, "learning_rate": 6.635427395384262e-06, "loss": 0.1369, "step": 10808 }, { "epoch": 0.62, "grad_norm": 0.4007091085654962, "learning_rate": 6.633675034604918e-06, "loss": 0.2966, "step": 10809 }, { "epoch": 0.62, "grad_norm": 0.348395608259044, "learning_rate": 6.6319227904012605e-06, "loss": 0.2952, "step": 10810 }, { "epoch": 0.62, "grad_norm": 0.7075777072812005, "learning_rate": 6.630170662833974e-06, "loss": 0.2966, "step": 10811 }, { "epoch": 0.62, "grad_norm": 0.359803224019601, "learning_rate": 6.62841865196373e-06, "loss": 0.2858, "step": 10812 }, { "epoch": 0.62, "grad_norm": 0.3924621489778972, "learning_rate": 6.626666757851208e-06, "loss": 0.3285, "step": 10813 }, { "epoch": 0.62, "grad_norm": 0.3379307119515423, "learning_rate": 6.624914980557067e-06, "loss": 0.1459, "step": 10814 }, { "epoch": 0.62, "grad_norm": 0.25824750797348944, "learning_rate": 6.623163320141977e-06, "loss": 0.2055, "step": 10815 }, { "epoch": 0.62, "grad_norm": 0.9254204327897717, "learning_rate": 6.621411776666593e-06, "loss": 0.5132, "step": 10816 }, { "epoch": 0.62, "grad_norm": 0.3290210750926486, "learning_rate": 6.619660350191577e-06, "loss": 0.2528, "step": 10817 }, { "epoch": 0.62, "grad_norm": 0.3258960929973952, "learning_rate": 6.617909040777578e-06, "loss": 0.2723, "step": 10818 }, { "epoch": 0.62, "grad_norm": 0.6898961033010748, "learning_rate": 6.6161578484852405e-06, "loss": 0.4741, "step": 10819 }, { "epoch": 0.62, "grad_norm": 0.1955724790230162, "learning_rate": 6.614406773375215e-06, "loss": 0.0904, "step": 10820 }, { "epoch": 0.62, "grad_norm": 0.31062886283654306, "learning_rate": 6.612655815508135e-06, "loss": 0.2714, "step": 10821 }, { "epoch": 0.62, "grad_norm": 0.5003777175917457, "learning_rate": 6.610904974944638e-06, "loss": 0.3493, "step": 10822 }, { "epoch": 0.62, "grad_norm": 0.6498721179269221, "learning_rate": 6.609154251745356e-06, "loss": 0.4102, "step": 10823 }, { "epoch": 0.62, "grad_norm": 0.31094275540668453, "learning_rate": 6.607403645970919e-06, "loss": 0.2138, "step": 10824 }, { "epoch": 0.62, "grad_norm": 0.35222845924002294, "learning_rate": 6.605653157681945e-06, "loss": 0.308, "step": 10825 }, { "epoch": 0.62, "grad_norm": 0.5014401149672492, "learning_rate": 6.603902786939058e-06, "loss": 0.3462, "step": 10826 }, { "epoch": 0.62, "grad_norm": 0.23552220497581033, "learning_rate": 6.60215253380287e-06, "loss": 0.1319, "step": 10827 }, { "epoch": 0.62, "grad_norm": 0.7787992405401837, "learning_rate": 6.600402398333995e-06, "loss": 0.3407, "step": 10828 }, { "epoch": 0.62, "grad_norm": 0.36508668218733975, "learning_rate": 6.598652380593037e-06, "loss": 0.3031, "step": 10829 }, { "epoch": 0.62, "grad_norm": 0.3085726407226889, "learning_rate": 6.596902480640603e-06, "loss": 0.1881, "step": 10830 }, { "epoch": 0.62, "grad_norm": 0.7405743438567102, "learning_rate": 6.595152698537289e-06, "loss": 0.5434, "step": 10831 }, { "epoch": 0.62, "grad_norm": 1.4452690524635368, "learning_rate": 6.59340303434369e-06, "loss": 0.7422, "step": 10832 }, { "epoch": 0.62, "grad_norm": 0.21649306649578795, "learning_rate": 6.591653488120398e-06, "loss": 0.1843, "step": 10833 }, { "epoch": 0.62, "grad_norm": 0.34346378810527417, "learning_rate": 6.589904059927998e-06, "loss": 0.2565, "step": 10834 }, { "epoch": 0.62, "grad_norm": 0.692637088445479, "learning_rate": 6.588154749827076e-06, "loss": 0.4296, "step": 10835 }, { "epoch": 0.62, "grad_norm": 0.41281261424206633, "learning_rate": 6.586405557878206e-06, "loss": 0.287, "step": 10836 }, { "epoch": 0.62, "grad_norm": 0.32134511517364006, "learning_rate": 6.584656484141967e-06, "loss": 0.2603, "step": 10837 }, { "epoch": 0.62, "grad_norm": 0.80282010542682, "learning_rate": 6.582907528678928e-06, "loss": 0.4662, "step": 10838 }, { "epoch": 0.62, "grad_norm": 0.33148783656373965, "learning_rate": 6.5811586915496515e-06, "loss": 0.2478, "step": 10839 }, { "epoch": 0.62, "grad_norm": 0.37101099026547674, "learning_rate": 6.579409972814703e-06, "loss": 0.1527, "step": 10840 }, { "epoch": 0.62, "grad_norm": 0.3358065314278554, "learning_rate": 6.577661372534639e-06, "loss": 0.3153, "step": 10841 }, { "epoch": 0.62, "grad_norm": 0.3190398698999832, "learning_rate": 6.575912890770017e-06, "loss": 0.2436, "step": 10842 }, { "epoch": 0.62, "grad_norm": 0.8837230043317458, "learning_rate": 6.574164527581383e-06, "loss": 0.4759, "step": 10843 }, { "epoch": 0.62, "grad_norm": 0.47649999430816803, "learning_rate": 6.5724162830292835e-06, "loss": 0.3134, "step": 10844 }, { "epoch": 0.62, "grad_norm": 0.3922580154206632, "learning_rate": 6.570668157174263e-06, "loss": 0.2656, "step": 10845 }, { "epoch": 0.62, "grad_norm": 0.23499303324710186, "learning_rate": 6.568920150076854e-06, "loss": 0.1829, "step": 10846 }, { "epoch": 0.62, "grad_norm": 0.6347531042335608, "learning_rate": 6.567172261797594e-06, "loss": 0.417, "step": 10847 }, { "epoch": 0.62, "grad_norm": 0.39361758639994104, "learning_rate": 6.5654244923970105e-06, "loss": 0.2663, "step": 10848 }, { "epoch": 0.62, "grad_norm": 0.3087842118794902, "learning_rate": 6.56367684193563e-06, "loss": 0.2918, "step": 10849 }, { "epoch": 0.62, "grad_norm": 1.0242043905970053, "learning_rate": 6.561929310473971e-06, "loss": 0.2512, "step": 10850 }, { "epoch": 0.62, "grad_norm": 0.4242460543027238, "learning_rate": 6.560181898072554e-06, "loss": 0.3042, "step": 10851 }, { "epoch": 0.62, "grad_norm": 0.5240664744635831, "learning_rate": 6.558434604791888e-06, "loss": 0.3468, "step": 10852 }, { "epoch": 0.62, "grad_norm": 0.22341814224458637, "learning_rate": 6.556687430692486e-06, "loss": 0.1698, "step": 10853 }, { "epoch": 0.62, "grad_norm": 0.39096154916868614, "learning_rate": 6.5549403758348485e-06, "loss": 0.2807, "step": 10854 }, { "epoch": 0.62, "grad_norm": 0.5309980404520158, "learning_rate": 6.553193440279479e-06, "loss": 0.3881, "step": 10855 }, { "epoch": 0.62, "grad_norm": 0.4725538900448848, "learning_rate": 6.551446624086873e-06, "loss": 0.2522, "step": 10856 }, { "epoch": 0.62, "grad_norm": 0.3080050015821684, "learning_rate": 6.549699927317519e-06, "loss": 0.2818, "step": 10857 }, { "epoch": 0.62, "grad_norm": 0.5715657627569951, "learning_rate": 6.5479533500319105e-06, "loss": 0.35, "step": 10858 }, { "epoch": 0.62, "grad_norm": 0.2846118138159484, "learning_rate": 6.546206892290527e-06, "loss": 0.1529, "step": 10859 }, { "epoch": 0.62, "grad_norm": 0.3320757102260107, "learning_rate": 6.544460554153853e-06, "loss": 0.2813, "step": 10860 }, { "epoch": 0.62, "grad_norm": 0.36196437928371844, "learning_rate": 6.542714335682359e-06, "loss": 0.2973, "step": 10861 }, { "epoch": 0.62, "grad_norm": 1.141389722693649, "learning_rate": 6.54096823693652e-06, "loss": 0.4734, "step": 10862 }, { "epoch": 0.62, "grad_norm": 0.6271265112294695, "learning_rate": 6.5392222579768015e-06, "loss": 0.2178, "step": 10863 }, { "epoch": 0.62, "grad_norm": 0.3753408561849779, "learning_rate": 6.537476398863669e-06, "loss": 0.313, "step": 10864 }, { "epoch": 0.62, "grad_norm": 0.33153725793470623, "learning_rate": 6.535730659657577e-06, "loss": 0.2618, "step": 10865 }, { "epoch": 0.62, "grad_norm": 0.32391570737586556, "learning_rate": 6.533985040418988e-06, "loss": 0.1546, "step": 10866 }, { "epoch": 0.62, "grad_norm": 0.3128620256105588, "learning_rate": 6.532239541208343e-06, "loss": 0.2564, "step": 10867 }, { "epoch": 0.62, "grad_norm": 0.4533892575766774, "learning_rate": 6.530494162086098e-06, "loss": 0.3441, "step": 10868 }, { "epoch": 0.62, "grad_norm": 0.28938108521272216, "learning_rate": 6.5287489031126875e-06, "loss": 0.2152, "step": 10869 }, { "epoch": 0.62, "grad_norm": 0.46521166367836747, "learning_rate": 6.527003764348555e-06, "loss": 0.3301, "step": 10870 }, { "epoch": 0.62, "grad_norm": 1.1228207484521606, "learning_rate": 6.5252587458541325e-06, "loss": 0.5712, "step": 10871 }, { "epoch": 0.62, "grad_norm": 0.5512502762042788, "learning_rate": 6.523513847689854e-06, "loss": 0.3708, "step": 10872 }, { "epoch": 0.62, "grad_norm": 0.19166979867318232, "learning_rate": 6.521769069916136e-06, "loss": 0.1914, "step": 10873 }, { "epoch": 0.62, "grad_norm": 0.9560087983933109, "learning_rate": 6.520024412593409e-06, "loss": 0.4243, "step": 10874 }, { "epoch": 0.62, "grad_norm": 0.5336182223969617, "learning_rate": 6.518279875782083e-06, "loss": 0.3538, "step": 10875 }, { "epoch": 0.62, "grad_norm": 0.3886374618315691, "learning_rate": 6.516535459542579e-06, "loss": 0.2293, "step": 10876 }, { "epoch": 0.62, "grad_norm": 0.40788284876238157, "learning_rate": 6.514791163935299e-06, "loss": 0.3177, "step": 10877 }, { "epoch": 0.62, "grad_norm": 0.5755561972390177, "learning_rate": 6.513046989020653e-06, "loss": 0.3556, "step": 10878 }, { "epoch": 0.63, "grad_norm": 0.23691729198980177, "learning_rate": 6.5113029348590384e-06, "loss": 0.152, "step": 10879 }, { "epoch": 0.63, "grad_norm": 0.4475169294764376, "learning_rate": 6.509559001510854e-06, "loss": 0.2962, "step": 10880 }, { "epoch": 0.63, "grad_norm": 0.7213839327976953, "learning_rate": 6.5078151890364916e-06, "loss": 0.3129, "step": 10881 }, { "epoch": 0.63, "grad_norm": 0.3567362019743413, "learning_rate": 6.50607149749634e-06, "loss": 0.2346, "step": 10882 }, { "epoch": 0.63, "grad_norm": 0.8685527444154926, "learning_rate": 6.504327926950782e-06, "loss": 0.5479, "step": 10883 }, { "epoch": 0.63, "grad_norm": 0.42927802250755426, "learning_rate": 6.502584477460195e-06, "loss": 0.3064, "step": 10884 }, { "epoch": 0.63, "grad_norm": 0.2919765365381944, "learning_rate": 6.50084114908496e-06, "loss": 0.2674, "step": 10885 }, { "epoch": 0.63, "grad_norm": 0.5838456550127608, "learning_rate": 6.4990979418854436e-06, "loss": 0.2418, "step": 10886 }, { "epoch": 0.63, "grad_norm": 0.33539401157915516, "learning_rate": 6.497354855922016e-06, "loss": 0.2074, "step": 10887 }, { "epoch": 0.63, "grad_norm": 0.36912521934440096, "learning_rate": 6.495611891255038e-06, "loss": 0.2864, "step": 10888 }, { "epoch": 0.63, "grad_norm": 0.332611184205583, "learning_rate": 6.493869047944872e-06, "loss": 0.2492, "step": 10889 }, { "epoch": 0.63, "grad_norm": 0.5711716515617774, "learning_rate": 6.4921263260518664e-06, "loss": 0.413, "step": 10890 }, { "epoch": 0.63, "grad_norm": 0.34918005553161635, "learning_rate": 6.490383725636377e-06, "loss": 0.2892, "step": 10891 }, { "epoch": 0.63, "grad_norm": 0.321025479817607, "learning_rate": 6.488641246758749e-06, "loss": 0.2723, "step": 10892 }, { "epoch": 0.63, "grad_norm": 0.2324885317833231, "learning_rate": 6.486898889479323e-06, "loss": 0.1738, "step": 10893 }, { "epoch": 0.63, "grad_norm": 0.38333854647224463, "learning_rate": 6.485156653858438e-06, "loss": 0.3174, "step": 10894 }, { "epoch": 0.63, "grad_norm": 1.008010824991491, "learning_rate": 6.483414539956426e-06, "loss": 0.3672, "step": 10895 }, { "epoch": 0.63, "grad_norm": 0.27193926530077744, "learning_rate": 6.48167254783362e-06, "loss": 0.2538, "step": 10896 }, { "epoch": 0.63, "grad_norm": 0.41061131285869107, "learning_rate": 6.479930677550338e-06, "loss": 0.3058, "step": 10897 }, { "epoch": 0.63, "grad_norm": 0.6100862999061782, "learning_rate": 6.478188929166909e-06, "loss": 0.3987, "step": 10898 }, { "epoch": 0.63, "grad_norm": 0.15737671574549414, "learning_rate": 6.476447302743643e-06, "loss": 0.0739, "step": 10899 }, { "epoch": 0.63, "grad_norm": 0.3672614455951174, "learning_rate": 6.474705798340857e-06, "loss": 0.3108, "step": 10900 }, { "epoch": 0.63, "grad_norm": 0.37121567073539297, "learning_rate": 6.472964416018857e-06, "loss": 0.3178, "step": 10901 }, { "epoch": 0.63, "grad_norm": 0.5386380238554062, "learning_rate": 6.471223155837949e-06, "loss": 0.2583, "step": 10902 }, { "epoch": 0.63, "grad_norm": 0.3702006272772363, "learning_rate": 6.469482017858428e-06, "loss": 0.2944, "step": 10903 }, { "epoch": 0.63, "grad_norm": 0.3343193983572874, "learning_rate": 6.4677410021405975e-06, "loss": 0.3281, "step": 10904 }, { "epoch": 0.63, "grad_norm": 0.15994451590807493, "learning_rate": 6.46600010874474e-06, "loss": 0.0712, "step": 10905 }, { "epoch": 0.63, "grad_norm": 0.35321148685659404, "learning_rate": 6.4642593377311515e-06, "loss": 0.2867, "step": 10906 }, { "epoch": 0.63, "grad_norm": 1.1587678178893002, "learning_rate": 6.462518689160109e-06, "loss": 0.568, "step": 10907 }, { "epoch": 0.63, "grad_norm": 0.30479496232056075, "learning_rate": 6.460778163091891e-06, "loss": 0.2539, "step": 10908 }, { "epoch": 0.63, "grad_norm": 0.35727687245385015, "learning_rate": 6.45903775958677e-06, "loss": 0.2846, "step": 10909 }, { "epoch": 0.63, "grad_norm": 2.4649919245936944, "learning_rate": 6.457297478705023e-06, "loss": 0.7402, "step": 10910 }, { "epoch": 0.63, "grad_norm": 0.2619218553658152, "learning_rate": 6.45555732050691e-06, "loss": 0.1705, "step": 10911 }, { "epoch": 0.63, "grad_norm": 0.33591317094101153, "learning_rate": 6.4538172850526955e-06, "loss": 0.2024, "step": 10912 }, { "epoch": 0.63, "grad_norm": 0.341833146387076, "learning_rate": 6.452077372402634e-06, "loss": 0.3078, "step": 10913 }, { "epoch": 0.63, "grad_norm": 0.5891021497920335, "learning_rate": 6.450337582616983e-06, "loss": 0.3681, "step": 10914 }, { "epoch": 0.63, "grad_norm": 0.34289810526304676, "learning_rate": 6.448597915755988e-06, "loss": 0.2137, "step": 10915 }, { "epoch": 0.63, "grad_norm": 0.359263118619319, "learning_rate": 6.446858371879896e-06, "loss": 0.312, "step": 10916 }, { "epoch": 0.63, "grad_norm": 0.28334773046172257, "learning_rate": 6.445118951048942e-06, "loss": 0.173, "step": 10917 }, { "epoch": 0.63, "grad_norm": 0.30036622389977835, "learning_rate": 6.44337965332337e-06, "loss": 0.19, "step": 10918 }, { "epoch": 0.63, "grad_norm": 0.749494957815434, "learning_rate": 6.4416404787634045e-06, "loss": 0.4418, "step": 10919 }, { "epoch": 0.63, "grad_norm": 0.3391009390947701, "learning_rate": 6.439901427429278e-06, "loss": 0.3278, "step": 10920 }, { "epoch": 0.63, "grad_norm": 0.4741224416315272, "learning_rate": 6.438162499381212e-06, "loss": 0.2293, "step": 10921 }, { "epoch": 0.63, "grad_norm": 0.9750360327136708, "learning_rate": 6.4364236946794234e-06, "loss": 0.6323, "step": 10922 }, { "epoch": 0.63, "grad_norm": 0.24827351464326222, "learning_rate": 6.434685013384132e-06, "loss": 0.1511, "step": 10923 }, { "epoch": 0.63, "grad_norm": 0.288203382381012, "learning_rate": 6.432946455555542e-06, "loss": 0.2516, "step": 10924 }, { "epoch": 0.63, "grad_norm": 0.463203852610262, "learning_rate": 6.4312080212538665e-06, "loss": 0.2672, "step": 10925 }, { "epoch": 0.63, "grad_norm": 0.7318010513536188, "learning_rate": 6.4294697105393e-06, "loss": 0.4274, "step": 10926 }, { "epoch": 0.63, "grad_norm": 0.3059976691995668, "learning_rate": 6.427731523472047e-06, "loss": 0.2783, "step": 10927 }, { "epoch": 0.63, "grad_norm": 0.3061553802177348, "learning_rate": 6.425993460112297e-06, "loss": 0.2512, "step": 10928 }, { "epoch": 0.63, "grad_norm": 0.2644528421377624, "learning_rate": 6.424255520520239e-06, "loss": 0.1606, "step": 10929 }, { "epoch": 0.63, "grad_norm": 0.3216069885908651, "learning_rate": 6.422517704756057e-06, "loss": 0.2645, "step": 10930 }, { "epoch": 0.63, "grad_norm": 1.0873383787909738, "learning_rate": 6.420780012879937e-06, "loss": 0.3225, "step": 10931 }, { "epoch": 0.63, "grad_norm": 0.32131116306748664, "learning_rate": 6.419042444952048e-06, "loss": 0.2816, "step": 10932 }, { "epoch": 0.63, "grad_norm": 0.3490888556275218, "learning_rate": 6.417305001032567e-06, "loss": 0.2824, "step": 10933 }, { "epoch": 0.63, "grad_norm": 1.0053545569436766, "learning_rate": 6.415567681181658e-06, "loss": 0.4126, "step": 10934 }, { "epoch": 0.63, "grad_norm": 0.3229247233746252, "learning_rate": 6.413830485459488e-06, "loss": 0.2473, "step": 10935 }, { "epoch": 0.63, "grad_norm": 0.3380905219858116, "learning_rate": 6.412093413926213e-06, "loss": 0.2688, "step": 10936 }, { "epoch": 0.63, "grad_norm": 0.554045753814118, "learning_rate": 6.410356466641989e-06, "loss": 0.298, "step": 10937 }, { "epoch": 0.63, "grad_norm": 0.9802965063335598, "learning_rate": 6.408619643666967e-06, "loss": 0.3922, "step": 10938 }, { "epoch": 0.63, "grad_norm": 0.31719321797306044, "learning_rate": 6.40688294506129e-06, "loss": 0.2581, "step": 10939 }, { "epoch": 0.63, "grad_norm": 0.3305320623675954, "learning_rate": 6.405146370885107e-06, "loss": 0.2959, "step": 10940 }, { "epoch": 0.63, "grad_norm": 0.2577489342671599, "learning_rate": 6.403409921198548e-06, "loss": 0.1013, "step": 10941 }, { "epoch": 0.63, "grad_norm": 0.3603206692947732, "learning_rate": 6.401673596061747e-06, "loss": 0.2713, "step": 10942 }, { "epoch": 0.63, "grad_norm": 1.004731951323438, "learning_rate": 6.399937395534837e-06, "loss": 0.4131, "step": 10943 }, { "epoch": 0.63, "grad_norm": 0.26455269733315795, "learning_rate": 6.398201319677937e-06, "loss": 0.2145, "step": 10944 }, { "epoch": 0.63, "grad_norm": 0.31962065915692844, "learning_rate": 6.396465368551172e-06, "loss": 0.2746, "step": 10945 }, { "epoch": 0.63, "grad_norm": 1.1313165310387425, "learning_rate": 6.394729542214657e-06, "loss": 0.606, "step": 10946 }, { "epoch": 0.63, "grad_norm": 0.33812898844419714, "learning_rate": 6.392993840728503e-06, "loss": 0.2592, "step": 10947 }, { "epoch": 0.63, "grad_norm": 0.3277772485530417, "learning_rate": 6.391258264152818e-06, "loss": 0.2676, "step": 10948 }, { "epoch": 0.63, "grad_norm": 0.7622917250054192, "learning_rate": 6.389522812547701e-06, "loss": 0.4147, "step": 10949 }, { "epoch": 0.63, "grad_norm": 0.2837893256968949, "learning_rate": 6.3877874859732556e-06, "loss": 0.1997, "step": 10950 }, { "epoch": 0.63, "grad_norm": 0.3097014346057528, "learning_rate": 6.386052284489575e-06, "loss": 0.1805, "step": 10951 }, { "epoch": 0.63, "grad_norm": 0.3016138484836072, "learning_rate": 6.3843172081567474e-06, "loss": 0.2933, "step": 10952 }, { "epoch": 0.63, "grad_norm": 0.7273665127231179, "learning_rate": 6.382582257034858e-06, "loss": 0.4313, "step": 10953 }, { "epoch": 0.63, "grad_norm": 0.3243297248965466, "learning_rate": 6.380847431183992e-06, "loss": 0.188, "step": 10954 }, { "epoch": 0.63, "grad_norm": 0.3351138381356627, "learning_rate": 6.379112730664222e-06, "loss": 0.2968, "step": 10955 }, { "epoch": 0.63, "grad_norm": 0.3233328725092727, "learning_rate": 6.377378155535625e-06, "loss": 0.2423, "step": 10956 }, { "epoch": 0.63, "grad_norm": 0.2918832243355452, "learning_rate": 6.375643705858263e-06, "loss": 0.2093, "step": 10957 }, { "epoch": 0.63, "grad_norm": 1.0029877128585176, "learning_rate": 6.373909381692207e-06, "loss": 0.5905, "step": 10958 }, { "epoch": 0.63, "grad_norm": 0.5027694083886098, "learning_rate": 6.372175183097511e-06, "loss": 0.3369, "step": 10959 }, { "epoch": 0.63, "grad_norm": 0.3078451694449207, "learning_rate": 6.370441110134233e-06, "loss": 0.2701, "step": 10960 }, { "epoch": 0.63, "grad_norm": 0.41308557058341894, "learning_rate": 6.3687071628624244e-06, "loss": 0.2483, "step": 10961 }, { "epoch": 0.63, "grad_norm": 0.45906823035018285, "learning_rate": 6.366973341342128e-06, "loss": 0.3574, "step": 10962 }, { "epoch": 0.63, "grad_norm": 0.34703098652586084, "learning_rate": 6.365239645633392e-06, "loss": 0.2705, "step": 10963 }, { "epoch": 0.63, "grad_norm": 0.26359333923079414, "learning_rate": 6.3635060757962485e-06, "loss": 0.1914, "step": 10964 }, { "epoch": 0.63, "grad_norm": 0.5789237996497145, "learning_rate": 6.361772631890735e-06, "loss": 0.3915, "step": 10965 }, { "epoch": 0.63, "grad_norm": 0.3167200194383066, "learning_rate": 6.360039313976875e-06, "loss": 0.2645, "step": 10966 }, { "epoch": 0.63, "grad_norm": 1.3052671121934587, "learning_rate": 6.3583061221147015e-06, "loss": 0.2044, "step": 10967 }, { "epoch": 0.63, "grad_norm": 0.3444639163727557, "learning_rate": 6.356573056364227e-06, "loss": 0.3192, "step": 10968 }, { "epoch": 0.63, "grad_norm": 0.5307240095449588, "learning_rate": 6.354840116785473e-06, "loss": 0.3267, "step": 10969 }, { "epoch": 0.63, "grad_norm": 0.25389779069515805, "learning_rate": 6.353107303438447e-06, "loss": 0.1643, "step": 10970 }, { "epoch": 0.63, "grad_norm": 0.4045083838798069, "learning_rate": 6.351374616383161e-06, "loss": 0.3156, "step": 10971 }, { "epoch": 0.63, "grad_norm": 0.6028023578671253, "learning_rate": 6.349642055679613e-06, "loss": 0.3478, "step": 10972 }, { "epoch": 0.63, "grad_norm": 0.4182115635796257, "learning_rate": 6.347909621387809e-06, "loss": 0.2903, "step": 10973 }, { "epoch": 0.63, "grad_norm": 1.0543676184663318, "learning_rate": 6.346177313567732e-06, "loss": 0.4733, "step": 10974 }, { "epoch": 0.63, "grad_norm": 0.2926592072070201, "learning_rate": 6.34444513227938e-06, "loss": 0.2497, "step": 10975 }, { "epoch": 0.63, "grad_norm": 0.24753098131598456, "learning_rate": 6.342713077582733e-06, "loss": 0.2406, "step": 10976 }, { "epoch": 0.63, "grad_norm": 0.5753767227553651, "learning_rate": 6.340981149537777e-06, "loss": 0.3157, "step": 10977 }, { "epoch": 0.63, "grad_norm": 0.3953220853323611, "learning_rate": 6.339249348204485e-06, "loss": 0.2584, "step": 10978 }, { "epoch": 0.63, "grad_norm": 0.5473609875755407, "learning_rate": 6.337517673642833e-06, "loss": 0.3299, "step": 10979 }, { "epoch": 0.63, "grad_norm": 0.3745830109479958, "learning_rate": 6.335786125912784e-06, "loss": 0.2747, "step": 10980 }, { "epoch": 0.63, "grad_norm": 0.40675845180689163, "learning_rate": 6.3340547050743055e-06, "loss": 0.3007, "step": 10981 }, { "epoch": 0.63, "grad_norm": 0.690730580115114, "learning_rate": 6.332323411187353e-06, "loss": 0.3359, "step": 10982 }, { "epoch": 0.63, "grad_norm": 0.2285847486188215, "learning_rate": 6.330592244311885e-06, "loss": 0.1904, "step": 10983 }, { "epoch": 0.63, "grad_norm": 0.3287690816760705, "learning_rate": 6.328861204507848e-06, "loss": 0.2549, "step": 10984 }, { "epoch": 0.63, "grad_norm": 1.4816162350887843, "learning_rate": 6.327130291835192e-06, "loss": 0.8568, "step": 10985 }, { "epoch": 0.63, "grad_norm": 1.1107702877033343, "learning_rate": 6.325399506353855e-06, "loss": 0.8058, "step": 10986 }, { "epoch": 0.63, "grad_norm": 0.32706815193793987, "learning_rate": 6.323668848123774e-06, "loss": 0.19, "step": 10987 }, { "epoch": 0.63, "grad_norm": 0.3039169646645536, "learning_rate": 6.321938317204886e-06, "loss": 0.2826, "step": 10988 }, { "epoch": 0.63, "grad_norm": 0.34350298686577224, "learning_rate": 6.320207913657111e-06, "loss": 0.223, "step": 10989 }, { "epoch": 0.63, "grad_norm": 0.43173203039681673, "learning_rate": 6.3184776375403814e-06, "loss": 0.1458, "step": 10990 }, { "epoch": 0.63, "grad_norm": 0.32354358590641585, "learning_rate": 6.3167474889146096e-06, "loss": 0.292, "step": 10991 }, { "epoch": 0.63, "grad_norm": 0.4753435688955507, "learning_rate": 6.315017467839717e-06, "loss": 0.3936, "step": 10992 }, { "epoch": 0.63, "grad_norm": 0.37915678416069587, "learning_rate": 6.313287574375609e-06, "loss": 0.2156, "step": 10993 }, { "epoch": 0.63, "grad_norm": 0.4090258131741896, "learning_rate": 6.311557808582196e-06, "loss": 0.3467, "step": 10994 }, { "epoch": 0.63, "grad_norm": 0.3414960965925519, "learning_rate": 6.309828170519376e-06, "loss": 0.2537, "step": 10995 }, { "epoch": 0.63, "grad_norm": 0.2555111542785431, "learning_rate": 6.308098660247049e-06, "loss": 0.1574, "step": 10996 }, { "epoch": 0.63, "grad_norm": 0.53907874280059, "learning_rate": 6.306369277825104e-06, "loss": 0.3552, "step": 10997 }, { "epoch": 0.63, "grad_norm": 0.8653136044186649, "learning_rate": 6.304640023313435e-06, "loss": 0.5482, "step": 10998 }, { "epoch": 0.63, "grad_norm": 0.2936106670828241, "learning_rate": 6.302910896771921e-06, "loss": 0.2673, "step": 10999 }, { "epoch": 0.63, "grad_norm": 0.3666631543606978, "learning_rate": 6.301181898260444e-06, "loss": 0.2542, "step": 11000 }, { "epoch": 0.63, "grad_norm": 0.42116700454476547, "learning_rate": 6.299453027838881e-06, "loss": 0.2626, "step": 11001 }, { "epoch": 0.63, "grad_norm": 0.33832310295296864, "learning_rate": 6.297724285567098e-06, "loss": 0.2159, "step": 11002 }, { "epoch": 0.63, "grad_norm": 0.3384603298577127, "learning_rate": 6.295995671504965e-06, "loss": 0.2478, "step": 11003 }, { "epoch": 0.63, "grad_norm": 0.4787703629140393, "learning_rate": 6.294267185712342e-06, "loss": 0.361, "step": 11004 }, { "epoch": 0.63, "grad_norm": 0.588450481399667, "learning_rate": 6.292538828249087e-06, "loss": 0.355, "step": 11005 }, { "epoch": 0.63, "grad_norm": 0.32064689647937217, "learning_rate": 6.290810599175052e-06, "loss": 0.234, "step": 11006 }, { "epoch": 0.63, "grad_norm": 0.3023253847261538, "learning_rate": 6.289082498550091e-06, "loss": 0.2633, "step": 11007 }, { "epoch": 0.63, "grad_norm": 0.33301628269246675, "learning_rate": 6.287354526434042e-06, "loss": 0.1976, "step": 11008 }, { "epoch": 0.63, "grad_norm": 0.3279184303807924, "learning_rate": 6.285626682886743e-06, "loss": 0.2236, "step": 11009 }, { "epoch": 0.63, "grad_norm": 0.7138001020794481, "learning_rate": 6.283898967968034e-06, "loss": 0.4225, "step": 11010 }, { "epoch": 0.63, "grad_norm": 0.35085811541230716, "learning_rate": 6.282171381737742e-06, "loss": 0.2892, "step": 11011 }, { "epoch": 0.63, "grad_norm": 0.35646638300198263, "learning_rate": 6.280443924255697e-06, "loss": 0.3308, "step": 11012 }, { "epoch": 0.63, "grad_norm": 0.9594251689271127, "learning_rate": 6.27871659558172e-06, "loss": 0.2799, "step": 11013 }, { "epoch": 0.63, "grad_norm": 0.2198364833708466, "learning_rate": 6.276989395775625e-06, "loss": 0.1496, "step": 11014 }, { "epoch": 0.63, "grad_norm": 0.28285740443505136, "learning_rate": 6.275262324897229e-06, "loss": 0.2867, "step": 11015 }, { "epoch": 0.63, "grad_norm": 0.7428097831158089, "learning_rate": 6.273535383006336e-06, "loss": 0.319, "step": 11016 }, { "epoch": 0.63, "grad_norm": 0.5076884926327849, "learning_rate": 6.271808570162754e-06, "loss": 0.3514, "step": 11017 }, { "epoch": 0.63, "grad_norm": 0.3783077303287608, "learning_rate": 6.27008188642628e-06, "loss": 0.3001, "step": 11018 }, { "epoch": 0.63, "grad_norm": 0.3307956499614347, "learning_rate": 6.268355331856713e-06, "loss": 0.2285, "step": 11019 }, { "epoch": 0.63, "grad_norm": 0.2362548513026544, "learning_rate": 6.266628906513836e-06, "loss": 0.1491, "step": 11020 }, { "epoch": 0.63, "grad_norm": 0.42279259430228205, "learning_rate": 6.264902610457442e-06, "loss": 0.316, "step": 11021 }, { "epoch": 0.63, "grad_norm": 0.575956482245312, "learning_rate": 6.263176443747309e-06, "loss": 0.3045, "step": 11022 }, { "epoch": 0.63, "grad_norm": 0.48818995665095016, "learning_rate": 6.261450406443217e-06, "loss": 0.3887, "step": 11023 }, { "epoch": 0.63, "grad_norm": 0.3348148241071795, "learning_rate": 6.259724498604933e-06, "loss": 0.2703, "step": 11024 }, { "epoch": 0.63, "grad_norm": 1.1273760167300912, "learning_rate": 6.257998720292233e-06, "loss": 0.5308, "step": 11025 }, { "epoch": 0.63, "grad_norm": 0.1848169085706313, "learning_rate": 6.256273071564874e-06, "loss": 0.088, "step": 11026 }, { "epoch": 0.63, "grad_norm": 0.25545609687182125, "learning_rate": 6.254547552482617e-06, "loss": 0.2522, "step": 11027 }, { "epoch": 0.63, "grad_norm": 0.7623754548706887, "learning_rate": 6.25282216310522e-06, "loss": 0.368, "step": 11028 }, { "epoch": 0.63, "grad_norm": 0.4726985840995557, "learning_rate": 6.2510969034924265e-06, "loss": 0.3078, "step": 11029 }, { "epoch": 0.63, "grad_norm": 0.3572847810692785, "learning_rate": 6.249371773703989e-06, "loss": 0.276, "step": 11030 }, { "epoch": 0.63, "grad_norm": 0.3708677390096252, "learning_rate": 6.247646773799645e-06, "loss": 0.3205, "step": 11031 }, { "epoch": 0.63, "grad_norm": 0.25304006716718513, "learning_rate": 6.245921903839132e-06, "loss": 0.123, "step": 11032 }, { "epoch": 0.63, "grad_norm": 0.3925028301065731, "learning_rate": 6.24419716388218e-06, "loss": 0.2976, "step": 11033 }, { "epoch": 0.63, "grad_norm": 0.8941776485349109, "learning_rate": 6.242472553988521e-06, "loss": 0.4912, "step": 11034 }, { "epoch": 0.63, "grad_norm": 0.3501794684964001, "learning_rate": 6.240748074217875e-06, "loss": 0.3189, "step": 11035 }, { "epoch": 0.63, "grad_norm": 0.3330008477936822, "learning_rate": 6.239023724629962e-06, "loss": 0.217, "step": 11036 }, { "epoch": 0.63, "grad_norm": 1.1036007794647276, "learning_rate": 6.237299505284495e-06, "loss": 0.616, "step": 11037 }, { "epoch": 0.63, "grad_norm": 0.3554645171715663, "learning_rate": 6.235575416241185e-06, "loss": 0.3099, "step": 11038 }, { "epoch": 0.63, "grad_norm": 0.26021400716218845, "learning_rate": 6.233851457559736e-06, "loss": 0.2198, "step": 11039 }, { "epoch": 0.63, "grad_norm": 0.35160884390360947, "learning_rate": 6.232127629299849e-06, "loss": 0.2377, "step": 11040 }, { "epoch": 0.63, "grad_norm": 1.0184502874359644, "learning_rate": 6.230403931521224e-06, "loss": 0.7252, "step": 11041 }, { "epoch": 0.63, "grad_norm": 0.27161208909389467, "learning_rate": 6.228680364283546e-06, "loss": 0.1799, "step": 11042 }, { "epoch": 0.63, "grad_norm": 0.3497995921510243, "learning_rate": 6.226956927646504e-06, "loss": 0.3126, "step": 11043 }, { "epoch": 0.63, "grad_norm": 0.6248331696318262, "learning_rate": 6.225233621669782e-06, "loss": 0.4057, "step": 11044 }, { "epoch": 0.63, "grad_norm": 0.3119749624889606, "learning_rate": 6.2235104464130545e-06, "loss": 0.2302, "step": 11045 }, { "epoch": 0.63, "grad_norm": 0.3756060506238427, "learning_rate": 6.221787401936002e-06, "loss": 0.2444, "step": 11046 }, { "epoch": 0.63, "grad_norm": 0.34670055217013945, "learning_rate": 6.220064488298285e-06, "loss": 0.3051, "step": 11047 }, { "epoch": 0.63, "grad_norm": 0.249174577275586, "learning_rate": 6.2183417055595765e-06, "loss": 0.2063, "step": 11048 }, { "epoch": 0.63, "grad_norm": 1.0123588001422943, "learning_rate": 6.216619053779529e-06, "loss": 0.2817, "step": 11049 }, { "epoch": 0.63, "grad_norm": 0.36445402991388354, "learning_rate": 6.214896533017803e-06, "loss": 0.3022, "step": 11050 }, { "epoch": 0.63, "grad_norm": 0.3092688596231202, "learning_rate": 6.213174143334046e-06, "loss": 0.2614, "step": 11051 }, { "epoch": 0.63, "grad_norm": 0.25530111498148117, "learning_rate": 6.211451884787907e-06, "loss": 0.1012, "step": 11052 }, { "epoch": 0.64, "grad_norm": 0.4294403584004139, "learning_rate": 6.209729757439026e-06, "loss": 0.3003, "step": 11053 }, { "epoch": 0.64, "grad_norm": 0.4008704554780664, "learning_rate": 6.208007761347039e-06, "loss": 0.2886, "step": 11054 }, { "epoch": 0.64, "grad_norm": 0.27196718819293497, "learning_rate": 6.206285896571582e-06, "loss": 0.24, "step": 11055 }, { "epoch": 0.64, "grad_norm": 0.5460900718518568, "learning_rate": 6.20456416317228e-06, "loss": 0.332, "step": 11056 }, { "epoch": 0.64, "grad_norm": 0.369685863562834, "learning_rate": 6.202842561208759e-06, "loss": 0.2815, "step": 11057 }, { "epoch": 0.64, "grad_norm": 0.5100637075537648, "learning_rate": 6.201121090740634e-06, "loss": 0.2929, "step": 11058 }, { "epoch": 0.64, "grad_norm": 0.316403402827174, "learning_rate": 6.199399751827525e-06, "loss": 0.2596, "step": 11059 }, { "epoch": 0.64, "grad_norm": 0.3212154547207368, "learning_rate": 6.197678544529037e-06, "loss": 0.2387, "step": 11060 }, { "epoch": 0.64, "grad_norm": 0.5344441277836296, "learning_rate": 6.195957468904781e-06, "loss": 0.3082, "step": 11061 }, { "epoch": 0.64, "grad_norm": 0.3320258149067593, "learning_rate": 6.19423652501435e-06, "loss": 0.2633, "step": 11062 }, { "epoch": 0.64, "grad_norm": 0.3367919993055683, "learning_rate": 6.192515712917348e-06, "loss": 0.2523, "step": 11063 }, { "epoch": 0.64, "grad_norm": 1.0809686276937551, "learning_rate": 6.19079503267336e-06, "loss": 0.6319, "step": 11064 }, { "epoch": 0.64, "grad_norm": 0.19407826999737932, "learning_rate": 6.189074484341979e-06, "loss": 0.098, "step": 11065 }, { "epoch": 0.64, "grad_norm": 0.31577201724467685, "learning_rate": 6.187354067982785e-06, "loss": 0.2449, "step": 11066 }, { "epoch": 0.64, "grad_norm": 0.3403924479356509, "learning_rate": 6.185633783655354e-06, "loss": 0.302, "step": 11067 }, { "epoch": 0.64, "grad_norm": 0.6402462185739675, "learning_rate": 6.183913631419263e-06, "loss": 0.3312, "step": 11068 }, { "epoch": 0.64, "grad_norm": 0.3382422319781781, "learning_rate": 6.182193611334075e-06, "loss": 0.258, "step": 11069 }, { "epoch": 0.64, "grad_norm": 1.2736662713881572, "learning_rate": 6.180473723459361e-06, "loss": 0.7406, "step": 11070 }, { "epoch": 0.64, "grad_norm": 0.25041480842921693, "learning_rate": 6.178753967854677e-06, "loss": 0.2126, "step": 11071 }, { "epoch": 0.64, "grad_norm": 0.4063472874123882, "learning_rate": 6.17703434457958e-06, "loss": 0.2927, "step": 11072 }, { "epoch": 0.64, "grad_norm": 0.586628542560072, "learning_rate": 6.175314853693617e-06, "loss": 0.3441, "step": 11073 }, { "epoch": 0.64, "grad_norm": 0.27702603918539737, "learning_rate": 6.173595495256338e-06, "loss": 0.2467, "step": 11074 }, { "epoch": 0.64, "grad_norm": 0.35689733543792085, "learning_rate": 6.1718762693272846e-06, "loss": 0.1613, "step": 11075 }, { "epoch": 0.64, "grad_norm": 0.46985107766144624, "learning_rate": 6.170157175965988e-06, "loss": 0.3795, "step": 11076 }, { "epoch": 0.64, "grad_norm": 0.7798022987856814, "learning_rate": 6.168438215231984e-06, "loss": 0.499, "step": 11077 }, { "epoch": 0.64, "grad_norm": 0.2888764256268748, "learning_rate": 6.166719387184802e-06, "loss": 0.1906, "step": 11078 }, { "epoch": 0.64, "grad_norm": 0.32604639861053947, "learning_rate": 6.16500069188396e-06, "loss": 0.3296, "step": 11079 }, { "epoch": 0.64, "grad_norm": 0.2507700628659636, "learning_rate": 6.163282129388981e-06, "loss": 0.1362, "step": 11080 }, { "epoch": 0.64, "grad_norm": 0.31590120501365204, "learning_rate": 6.1615636997593745e-06, "loss": 0.2005, "step": 11081 }, { "epoch": 0.64, "grad_norm": 0.3610564542876327, "learning_rate": 6.159845403054654e-06, "loss": 0.332, "step": 11082 }, { "epoch": 0.64, "grad_norm": 1.2608533543659348, "learning_rate": 6.15812723933432e-06, "loss": 0.7569, "step": 11083 }, { "epoch": 0.64, "grad_norm": 0.32773053299135346, "learning_rate": 6.1564092086578765e-06, "loss": 0.2223, "step": 11084 }, { "epoch": 0.64, "grad_norm": 0.8451695808115459, "learning_rate": 6.154691311084816e-06, "loss": 0.4394, "step": 11085 }, { "epoch": 0.64, "grad_norm": 0.21285824327119043, "learning_rate": 6.152973546674631e-06, "loss": 0.2111, "step": 11086 }, { "epoch": 0.64, "grad_norm": 0.3162598405639108, "learning_rate": 6.151255915486804e-06, "loss": 0.2425, "step": 11087 }, { "epoch": 0.64, "grad_norm": 0.9304264514798442, "learning_rate": 6.1495384175808224e-06, "loss": 0.3778, "step": 11088 }, { "epoch": 0.64, "grad_norm": 0.7140149435298279, "learning_rate": 6.147821053016159e-06, "loss": 0.4178, "step": 11089 }, { "epoch": 0.64, "grad_norm": 0.3320040093447702, "learning_rate": 6.146103821852286e-06, "loss": 0.245, "step": 11090 }, { "epoch": 0.64, "grad_norm": 0.35658129823456214, "learning_rate": 6.144386724148674e-06, "loss": 0.2434, "step": 11091 }, { "epoch": 0.64, "grad_norm": 0.27249855103507575, "learning_rate": 6.142669759964781e-06, "loss": 0.1735, "step": 11092 }, { "epoch": 0.64, "grad_norm": 0.5892126845666474, "learning_rate": 6.140952929360071e-06, "loss": 0.3012, "step": 11093 }, { "epoch": 0.64, "grad_norm": 0.24465733364449624, "learning_rate": 6.139236232393993e-06, "loss": 0.2379, "step": 11094 }, { "epoch": 0.64, "grad_norm": 0.9972780676501973, "learning_rate": 6.137519669126e-06, "loss": 0.4295, "step": 11095 }, { "epoch": 0.64, "grad_norm": 0.4816808218850385, "learning_rate": 6.135803239615532e-06, "loss": 0.337, "step": 11096 }, { "epoch": 0.64, "grad_norm": 0.2635477301210972, "learning_rate": 6.134086943922034e-06, "loss": 0.183, "step": 11097 }, { "epoch": 0.64, "grad_norm": 0.2615090099141512, "learning_rate": 6.132370782104937e-06, "loss": 0.2292, "step": 11098 }, { "epoch": 0.64, "grad_norm": 0.38708784016928843, "learning_rate": 6.130654754223676e-06, "loss": 0.2688, "step": 11099 }, { "epoch": 0.64, "grad_norm": 0.5112058574827448, "learning_rate": 6.128938860337672e-06, "loss": 0.3646, "step": 11100 }, { "epoch": 0.64, "grad_norm": 0.7461043466785471, "learning_rate": 6.127223100506351e-06, "loss": 0.3124, "step": 11101 }, { "epoch": 0.64, "grad_norm": 0.27170610461361683, "learning_rate": 6.125507474789125e-06, "loss": 0.2624, "step": 11102 }, { "epoch": 0.64, "grad_norm": 0.5161349562940573, "learning_rate": 6.123791983245411e-06, "loss": 0.3863, "step": 11103 }, { "epoch": 0.64, "grad_norm": 0.21589302078484102, "learning_rate": 6.122076625934612e-06, "loss": 0.1027, "step": 11104 }, { "epoch": 0.64, "grad_norm": 0.3764534243966187, "learning_rate": 6.120361402916135e-06, "loss": 0.2836, "step": 11105 }, { "epoch": 0.64, "grad_norm": 0.37975502883, "learning_rate": 6.118646314249376e-06, "loss": 0.3184, "step": 11106 }, { "epoch": 0.64, "grad_norm": 0.6739943039680442, "learning_rate": 6.116931359993725e-06, "loss": 0.3041, "step": 11107 }, { "epoch": 0.64, "grad_norm": 0.37209910595351725, "learning_rate": 6.115216540208577e-06, "loss": 0.2577, "step": 11108 }, { "epoch": 0.64, "grad_norm": 0.5528497756841371, "learning_rate": 6.1135018549533146e-06, "loss": 0.3213, "step": 11109 }, { "epoch": 0.64, "grad_norm": 0.21630491935510923, "learning_rate": 6.111787304287312e-06, "loss": 0.1668, "step": 11110 }, { "epoch": 0.64, "grad_norm": 0.6091154648621666, "learning_rate": 6.11007288826995e-06, "loss": 0.3458, "step": 11111 }, { "epoch": 0.64, "grad_norm": 0.35893455503756927, "learning_rate": 6.108358606960595e-06, "loss": 0.31, "step": 11112 }, { "epoch": 0.64, "grad_norm": 0.7157112727794527, "learning_rate": 6.1066444604186156e-06, "loss": 0.4223, "step": 11113 }, { "epoch": 0.64, "grad_norm": 0.26562757365475465, "learning_rate": 6.104930448703369e-06, "loss": 0.2132, "step": 11114 }, { "epoch": 0.64, "grad_norm": 0.3765400989158581, "learning_rate": 6.1032165718742154e-06, "loss": 0.296, "step": 11115 }, { "epoch": 0.64, "grad_norm": 0.4711689009780174, "learning_rate": 6.1015028299905025e-06, "loss": 0.2469, "step": 11116 }, { "epoch": 0.64, "grad_norm": 0.27771652468962005, "learning_rate": 6.0997892231115805e-06, "loss": 0.1337, "step": 11117 }, { "epoch": 0.64, "grad_norm": 0.2506367775544241, "learning_rate": 6.098075751296792e-06, "loss": 0.272, "step": 11118 }, { "epoch": 0.64, "grad_norm": 0.7113697594382398, "learning_rate": 6.096362414605468e-06, "loss": 0.4545, "step": 11119 }, { "epoch": 0.64, "grad_norm": 0.31511420097201853, "learning_rate": 6.0946492130969494e-06, "loss": 0.1979, "step": 11120 }, { "epoch": 0.64, "grad_norm": 0.48430785170819585, "learning_rate": 6.092936146830557e-06, "loss": 0.3365, "step": 11121 }, { "epoch": 0.64, "grad_norm": 0.34387547840728383, "learning_rate": 6.091223215865621e-06, "loss": 0.2893, "step": 11122 }, { "epoch": 0.64, "grad_norm": 0.23731111105713615, "learning_rate": 6.089510420261455e-06, "loss": 0.1881, "step": 11123 }, { "epoch": 0.64, "grad_norm": 0.3714432234815818, "learning_rate": 6.087797760077376e-06, "loss": 0.265, "step": 11124 }, { "epoch": 0.64, "grad_norm": 0.7696874757906164, "learning_rate": 6.086085235372692e-06, "loss": 0.488, "step": 11125 }, { "epoch": 0.64, "grad_norm": 0.29488211422426086, "learning_rate": 6.084372846206709e-06, "loss": 0.2643, "step": 11126 }, { "epoch": 0.64, "grad_norm": 0.3843671048783373, "learning_rate": 6.0826605926387226e-06, "loss": 0.2295, "step": 11127 }, { "epoch": 0.64, "grad_norm": 0.28084906133884785, "learning_rate": 6.080948474728036e-06, "loss": 0.1574, "step": 11128 }, { "epoch": 0.64, "grad_norm": 0.8237786686226553, "learning_rate": 6.079236492533931e-06, "loss": 0.3501, "step": 11129 }, { "epoch": 0.64, "grad_norm": 0.2653302823814511, "learning_rate": 6.077524646115701e-06, "loss": 0.2377, "step": 11130 }, { "epoch": 0.64, "grad_norm": 0.790921659400872, "learning_rate": 6.075812935532623e-06, "loss": 0.5031, "step": 11131 }, { "epoch": 0.64, "grad_norm": 0.732432702456682, "learning_rate": 6.074101360843973e-06, "loss": 0.4053, "step": 11132 }, { "epoch": 0.64, "grad_norm": 0.3450032202174695, "learning_rate": 6.072389922109027e-06, "loss": 0.2106, "step": 11133 }, { "epoch": 0.64, "grad_norm": 0.3755738306929151, "learning_rate": 6.070678619387045e-06, "loss": 0.2833, "step": 11134 }, { "epoch": 0.64, "grad_norm": 0.923733611804049, "learning_rate": 6.068967452737296e-06, "loss": 0.3767, "step": 11135 }, { "epoch": 0.64, "grad_norm": 0.2933452155571611, "learning_rate": 6.067256422219034e-06, "loss": 0.2224, "step": 11136 }, { "epoch": 0.64, "grad_norm": 0.29408326213010355, "learning_rate": 6.065545527891514e-06, "loss": 0.1626, "step": 11137 }, { "epoch": 0.64, "grad_norm": 0.3024053064991005, "learning_rate": 6.063834769813982e-06, "loss": 0.2829, "step": 11138 }, { "epoch": 0.64, "grad_norm": 0.441073941280975, "learning_rate": 6.062124148045685e-06, "loss": 0.3102, "step": 11139 }, { "epoch": 0.64, "grad_norm": 1.033460670183201, "learning_rate": 6.060413662645856e-06, "loss": 0.3068, "step": 11140 }, { "epoch": 0.64, "grad_norm": 0.3800496285695335, "learning_rate": 6.058703313673735e-06, "loss": 0.3042, "step": 11141 }, { "epoch": 0.64, "grad_norm": 0.3069885259774946, "learning_rate": 6.0569931011885504e-06, "loss": 0.2701, "step": 11142 }, { "epoch": 0.64, "grad_norm": 0.261025683645621, "learning_rate": 6.055283025249526e-06, "loss": 0.1452, "step": 11143 }, { "epoch": 0.64, "grad_norm": 0.30579045165578644, "learning_rate": 6.053573085915875e-06, "loss": 0.1788, "step": 11144 }, { "epoch": 0.64, "grad_norm": 0.41347157047586297, "learning_rate": 6.0518632832468215e-06, "loss": 0.3075, "step": 11145 }, { "epoch": 0.64, "grad_norm": 0.509072555332188, "learning_rate": 6.050153617301571e-06, "loss": 0.2722, "step": 11146 }, { "epoch": 0.64, "grad_norm": 1.0137824837962006, "learning_rate": 6.048444088139334e-06, "loss": 0.4353, "step": 11147 }, { "epoch": 0.64, "grad_norm": 0.32813531727045664, "learning_rate": 6.0467346958193056e-06, "loss": 0.253, "step": 11148 }, { "epoch": 0.64, "grad_norm": 0.3297149666866975, "learning_rate": 6.045025440400684e-06, "loss": 0.2803, "step": 11149 }, { "epoch": 0.64, "grad_norm": 0.2517094708835535, "learning_rate": 6.043316321942663e-06, "loss": 0.1736, "step": 11150 }, { "epoch": 0.64, "grad_norm": 0.36579356484526454, "learning_rate": 6.0416073405044274e-06, "loss": 0.2721, "step": 11151 }, { "epoch": 0.64, "grad_norm": 1.009514354771467, "learning_rate": 6.039898496145159e-06, "loss": 0.4483, "step": 11152 }, { "epoch": 0.64, "grad_norm": 0.46789848161172504, "learning_rate": 6.038189788924036e-06, "loss": 0.2603, "step": 11153 }, { "epoch": 0.64, "grad_norm": 0.28716040798789005, "learning_rate": 6.03648121890023e-06, "loss": 0.2665, "step": 11154 }, { "epoch": 0.64, "grad_norm": 1.2217392161585139, "learning_rate": 6.03477278613291e-06, "loss": 0.7532, "step": 11155 }, { "epoch": 0.64, "grad_norm": 0.20730714099122807, "learning_rate": 6.033064490681238e-06, "loss": 0.1146, "step": 11156 }, { "epoch": 0.64, "grad_norm": 0.4212369317329507, "learning_rate": 6.031356332604369e-06, "loss": 0.2847, "step": 11157 }, { "epoch": 0.64, "grad_norm": 0.3950415550882442, "learning_rate": 6.029648311961462e-06, "loss": 0.3112, "step": 11158 }, { "epoch": 0.64, "grad_norm": 0.5599324152074959, "learning_rate": 6.027940428811662e-06, "loss": 0.1964, "step": 11159 }, { "epoch": 0.64, "grad_norm": 0.3724627817967568, "learning_rate": 6.026232683214115e-06, "loss": 0.2767, "step": 11160 }, { "epoch": 0.64, "grad_norm": 0.4752167622283187, "learning_rate": 6.024525075227959e-06, "loss": 0.3687, "step": 11161 }, { "epoch": 0.64, "grad_norm": 0.21954940331324588, "learning_rate": 6.02281760491233e-06, "loss": 0.1958, "step": 11162 }, { "epoch": 0.64, "grad_norm": 0.33151403738859675, "learning_rate": 6.021110272326354e-06, "loss": 0.2124, "step": 11163 }, { "epoch": 0.64, "grad_norm": 0.515384369202005, "learning_rate": 6.0194030775291605e-06, "loss": 0.3731, "step": 11164 }, { "epoch": 0.64, "grad_norm": 0.4626758506358267, "learning_rate": 6.017696020579864e-06, "loss": 0.3026, "step": 11165 }, { "epoch": 0.64, "grad_norm": 0.26893646286578476, "learning_rate": 6.015989101537586e-06, "loss": 0.2204, "step": 11166 }, { "epoch": 0.64, "grad_norm": 1.1058572603376675, "learning_rate": 6.0142823204614335e-06, "loss": 0.703, "step": 11167 }, { "epoch": 0.64, "grad_norm": 0.3964445682878911, "learning_rate": 6.012575677410512e-06, "loss": 0.2333, "step": 11168 }, { "epoch": 0.64, "grad_norm": 0.27704603202525324, "learning_rate": 6.010869172443923e-06, "loss": 0.2202, "step": 11169 }, { "epoch": 0.64, "grad_norm": 0.3433573699670649, "learning_rate": 6.0091628056207655e-06, "loss": 0.2727, "step": 11170 }, { "epoch": 0.64, "grad_norm": 0.9905163871846923, "learning_rate": 6.007456577000128e-06, "loss": 0.4821, "step": 11171 }, { "epoch": 0.64, "grad_norm": 0.35401947805745243, "learning_rate": 6.005750486641095e-06, "loss": 0.2056, "step": 11172 }, { "epoch": 0.64, "grad_norm": 0.5022193592434819, "learning_rate": 6.004044534602753e-06, "loss": 0.3671, "step": 11173 }, { "epoch": 0.64, "grad_norm": 0.3889968137655978, "learning_rate": 6.002338720944174e-06, "loss": 0.3389, "step": 11174 }, { "epoch": 0.64, "grad_norm": 0.32816842990029704, "learning_rate": 6.000633045724438e-06, "loss": 0.2712, "step": 11175 }, { "epoch": 0.64, "grad_norm": 0.2643109087774337, "learning_rate": 5.998927509002608e-06, "loss": 0.136, "step": 11176 }, { "epoch": 0.64, "grad_norm": 0.34977099868176076, "learning_rate": 5.997222110837742e-06, "loss": 0.2911, "step": 11177 }, { "epoch": 0.64, "grad_norm": 0.41325953968084655, "learning_rate": 5.995516851288904e-06, "loss": 0.2771, "step": 11178 }, { "epoch": 0.64, "grad_norm": 0.5048678658640868, "learning_rate": 5.9938117304151445e-06, "loss": 0.3181, "step": 11179 }, { "epoch": 0.64, "grad_norm": 0.5855639790646341, "learning_rate": 5.992106748275513e-06, "loss": 0.3861, "step": 11180 }, { "epoch": 0.64, "grad_norm": 0.3731060722176844, "learning_rate": 5.990401904929051e-06, "loss": 0.2846, "step": 11181 }, { "epoch": 0.64, "grad_norm": 0.2450716350813472, "learning_rate": 5.988697200434801e-06, "loss": 0.1784, "step": 11182 }, { "epoch": 0.64, "grad_norm": 1.588488452674224, "learning_rate": 5.986992634851794e-06, "loss": 0.7367, "step": 11183 }, { "epoch": 0.64, "grad_norm": 0.33778700717578847, "learning_rate": 5.985288208239057e-06, "loss": 0.2592, "step": 11184 }, { "epoch": 0.64, "grad_norm": 0.38914509602391384, "learning_rate": 5.98358392065562e-06, "loss": 0.2963, "step": 11185 }, { "epoch": 0.64, "grad_norm": 0.780635173262054, "learning_rate": 5.981879772160497e-06, "loss": 0.3924, "step": 11186 }, { "epoch": 0.64, "grad_norm": 0.31361656933869875, "learning_rate": 5.980175762812705e-06, "loss": 0.2679, "step": 11187 }, { "epoch": 0.64, "grad_norm": 0.4583142651536605, "learning_rate": 5.978471892671254e-06, "loss": 0.2751, "step": 11188 }, { "epoch": 0.64, "grad_norm": 0.2585065185246987, "learning_rate": 5.976768161795149e-06, "loss": 0.1919, "step": 11189 }, { "epoch": 0.64, "grad_norm": 0.3744518103024148, "learning_rate": 5.975064570243387e-06, "loss": 0.2544, "step": 11190 }, { "epoch": 0.64, "grad_norm": 1.2344220965569719, "learning_rate": 5.973361118074969e-06, "loss": 0.8546, "step": 11191 }, { "epoch": 0.64, "grad_norm": 0.9938392858124002, "learning_rate": 5.97165780534888e-06, "loss": 0.3239, "step": 11192 }, { "epoch": 0.64, "grad_norm": 0.2816535477742484, "learning_rate": 5.969954632124111e-06, "loss": 0.248, "step": 11193 }, { "epoch": 0.64, "grad_norm": 0.5002961541221576, "learning_rate": 5.968251598459636e-06, "loss": 0.3412, "step": 11194 }, { "epoch": 0.64, "grad_norm": 0.2849071332633207, "learning_rate": 5.966548704414436e-06, "loss": 0.1259, "step": 11195 }, { "epoch": 0.64, "grad_norm": 0.38572866452620297, "learning_rate": 5.964845950047484e-06, "loss": 0.2577, "step": 11196 }, { "epoch": 0.64, "grad_norm": 0.3144011371244218, "learning_rate": 5.96314333541774e-06, "loss": 0.2866, "step": 11197 }, { "epoch": 0.64, "grad_norm": 1.0859733305344872, "learning_rate": 5.961440860584169e-06, "loss": 0.4253, "step": 11198 }, { "epoch": 0.64, "grad_norm": 0.31667468432575874, "learning_rate": 5.959738525605727e-06, "loss": 0.1599, "step": 11199 }, { "epoch": 0.64, "grad_norm": 0.3265305092885602, "learning_rate": 5.958036330541368e-06, "loss": 0.2316, "step": 11200 }, { "epoch": 0.64, "grad_norm": 0.2730721162752112, "learning_rate": 5.956334275450035e-06, "loss": 0.2342, "step": 11201 }, { "epoch": 0.64, "grad_norm": 0.315786099286474, "learning_rate": 5.954632360390673e-06, "loss": 0.1946, "step": 11202 }, { "epoch": 0.64, "grad_norm": 0.8586307279331272, "learning_rate": 5.9529305854222185e-06, "loss": 0.4156, "step": 11203 }, { "epoch": 0.64, "grad_norm": 0.8222346678946091, "learning_rate": 5.951228950603605e-06, "loss": 0.5293, "step": 11204 }, { "epoch": 0.64, "grad_norm": 0.26079077888105623, "learning_rate": 5.949527455993756e-06, "loss": 0.2104, "step": 11205 }, { "epoch": 0.64, "grad_norm": 0.4512259533268354, "learning_rate": 5.947826101651599e-06, "loss": 0.3257, "step": 11206 }, { "epoch": 0.64, "grad_norm": 0.32723592880119196, "learning_rate": 5.946124887636049e-06, "loss": 0.2213, "step": 11207 }, { "epoch": 0.64, "grad_norm": 0.29865161019025654, "learning_rate": 5.944423814006022e-06, "loss": 0.2226, "step": 11208 }, { "epoch": 0.64, "grad_norm": 0.35149010732275404, "learning_rate": 5.9427228808204216e-06, "loss": 0.3059, "step": 11209 }, { "epoch": 0.64, "grad_norm": 0.7682225597339026, "learning_rate": 5.941022088138158e-06, "loss": 0.4489, "step": 11210 }, { "epoch": 0.64, "grad_norm": 0.30994659665733854, "learning_rate": 5.939321436018119e-06, "loss": 0.2518, "step": 11211 }, { "epoch": 0.64, "grad_norm": 0.5923893515591114, "learning_rate": 5.937620924519207e-06, "loss": 0.0215, "step": 11212 }, { "epoch": 0.64, "grad_norm": 0.22704968985868573, "learning_rate": 5.935920553700305e-06, "loss": 0.216, "step": 11213 }, { "epoch": 0.64, "grad_norm": 0.5706346113528733, "learning_rate": 5.934220323620303e-06, "loss": 0.3533, "step": 11214 }, { "epoch": 0.64, "grad_norm": 0.37401496655785443, "learning_rate": 5.932520234338073e-06, "loss": 0.2737, "step": 11215 }, { "epoch": 0.64, "grad_norm": 0.4734541768133188, "learning_rate": 5.930820285912495e-06, "loss": 0.4071, "step": 11216 }, { "epoch": 0.64, "grad_norm": 0.3539907526938965, "learning_rate": 5.9291204784024335e-06, "loss": 0.2553, "step": 11217 }, { "epoch": 0.64, "grad_norm": 0.3659412287344784, "learning_rate": 5.9274208118667565e-06, "loss": 0.2484, "step": 11218 }, { "epoch": 0.64, "grad_norm": 0.3113800892023193, "learning_rate": 5.92572128636432e-06, "loss": 0.1939, "step": 11219 }, { "epoch": 0.64, "grad_norm": 0.5020546282550093, "learning_rate": 5.924021901953983e-06, "loss": 0.2901, "step": 11220 }, { "epoch": 0.64, "grad_norm": 0.286313888428949, "learning_rate": 5.922322658694591e-06, "loss": 0.2489, "step": 11221 }, { "epoch": 0.64, "grad_norm": 1.2063295574915158, "learning_rate": 5.920623556644987e-06, "loss": 0.7685, "step": 11222 }, { "epoch": 0.64, "grad_norm": 0.422377259307182, "learning_rate": 5.918924595864017e-06, "loss": 0.3199, "step": 11223 }, { "epoch": 0.64, "grad_norm": 0.5829496855100268, "learning_rate": 5.917225776410511e-06, "loss": 0.3525, "step": 11224 }, { "epoch": 0.64, "grad_norm": 0.2512331675050159, "learning_rate": 5.915527098343302e-06, "loss": 0.206, "step": 11225 }, { "epoch": 0.64, "grad_norm": 0.3745193433663806, "learning_rate": 5.913828561721214e-06, "loss": 0.2792, "step": 11226 }, { "epoch": 0.65, "grad_norm": 0.3169721307067816, "learning_rate": 5.912130166603066e-06, "loss": 0.2322, "step": 11227 }, { "epoch": 0.65, "grad_norm": 0.4240458248846178, "learning_rate": 5.910431913047674e-06, "loss": 0.2787, "step": 11228 }, { "epoch": 0.65, "grad_norm": 0.30928589966688147, "learning_rate": 5.908733801113851e-06, "loss": 0.2619, "step": 11229 }, { "epoch": 0.65, "grad_norm": 0.48813601012960506, "learning_rate": 5.907035830860399e-06, "loss": 0.3456, "step": 11230 }, { "epoch": 0.65, "grad_norm": 0.5823485197085368, "learning_rate": 5.905338002346122e-06, "loss": 0.314, "step": 11231 }, { "epoch": 0.65, "grad_norm": 0.5900398729201102, "learning_rate": 5.9036403156298125e-06, "loss": 0.2937, "step": 11232 }, { "epoch": 0.65, "grad_norm": 0.23746694604233817, "learning_rate": 5.901942770770264e-06, "loss": 0.2338, "step": 11233 }, { "epoch": 0.65, "grad_norm": 0.30105969621280293, "learning_rate": 5.900245367826258e-06, "loss": 0.1899, "step": 11234 }, { "epoch": 0.65, "grad_norm": 0.6304257335183989, "learning_rate": 5.898548106856583e-06, "loss": 0.3072, "step": 11235 }, { "epoch": 0.65, "grad_norm": 0.4284540690270265, "learning_rate": 5.896850987920009e-06, "loss": 0.2833, "step": 11236 }, { "epoch": 0.65, "grad_norm": 0.38906981985827127, "learning_rate": 5.895154011075308e-06, "loss": 0.3207, "step": 11237 }, { "epoch": 0.65, "grad_norm": 0.5208798117829698, "learning_rate": 5.893457176381248e-06, "loss": 0.1669, "step": 11238 }, { "epoch": 0.65, "grad_norm": 0.39622311730384707, "learning_rate": 5.891760483896587e-06, "loss": 0.3249, "step": 11239 }, { "epoch": 0.65, "grad_norm": 0.38496094157001054, "learning_rate": 5.890063933680087e-06, "loss": 0.2616, "step": 11240 }, { "epoch": 0.65, "grad_norm": 0.22651439846232713, "learning_rate": 5.8883675257904936e-06, "loss": 0.1797, "step": 11241 }, { "epoch": 0.65, "grad_norm": 0.38679464880782893, "learning_rate": 5.886671260286558e-06, "loss": 0.3031, "step": 11242 }, { "epoch": 0.65, "grad_norm": 0.7959092880908866, "learning_rate": 5.884975137227018e-06, "loss": 0.3909, "step": 11243 }, { "epoch": 0.65, "grad_norm": 0.3346253622241024, "learning_rate": 5.883279156670616e-06, "loss": 0.1724, "step": 11244 }, { "epoch": 0.65, "grad_norm": 0.30811296607418726, "learning_rate": 5.881583318676078e-06, "loss": 0.2709, "step": 11245 }, { "epoch": 0.65, "grad_norm": 0.3766558287583714, "learning_rate": 5.879887623302131e-06, "loss": 0.3055, "step": 11246 }, { "epoch": 0.65, "grad_norm": 0.23333225379841346, "learning_rate": 5.8781920706075e-06, "loss": 0.1275, "step": 11247 }, { "epoch": 0.65, "grad_norm": 0.5988756712228822, "learning_rate": 5.876496660650899e-06, "loss": 0.3427, "step": 11248 }, { "epoch": 0.65, "grad_norm": 0.37202182636931236, "learning_rate": 5.874801393491041e-06, "loss": 0.301, "step": 11249 }, { "epoch": 0.65, "grad_norm": 0.705704673592431, "learning_rate": 5.873106269186635e-06, "loss": 0.3855, "step": 11250 }, { "epoch": 0.65, "grad_norm": 0.3430225343190707, "learning_rate": 5.871411287796379e-06, "loss": 0.229, "step": 11251 }, { "epoch": 0.65, "grad_norm": 0.3532476705395196, "learning_rate": 5.869716449378975e-06, "loss": 0.3249, "step": 11252 }, { "epoch": 0.65, "grad_norm": 0.21846891869803547, "learning_rate": 5.8680217539931106e-06, "loss": 0.147, "step": 11253 }, { "epoch": 0.65, "grad_norm": 0.3593853724638047, "learning_rate": 5.866327201697477e-06, "loss": 0.2326, "step": 11254 }, { "epoch": 0.65, "grad_norm": 0.7948326466630459, "learning_rate": 5.864632792550753e-06, "loss": 0.4388, "step": 11255 }, { "epoch": 0.65, "grad_norm": 0.4301961477693214, "learning_rate": 5.862938526611619e-06, "loss": 0.2876, "step": 11256 }, { "epoch": 0.65, "grad_norm": 0.27854050842859307, "learning_rate": 5.861244403938744e-06, "loss": 0.232, "step": 11257 }, { "epoch": 0.65, "grad_norm": 1.2624624061673055, "learning_rate": 5.859550424590801e-06, "loss": 0.7603, "step": 11258 }, { "epoch": 0.65, "grad_norm": 0.3089760684352078, "learning_rate": 5.857856588626445e-06, "loss": 0.182, "step": 11259 }, { "epoch": 0.65, "grad_norm": 0.23799079783274543, "learning_rate": 5.856162896104339e-06, "loss": 0.2428, "step": 11260 }, { "epoch": 0.65, "grad_norm": 0.874747782088808, "learning_rate": 5.854469347083134e-06, "loss": 0.5215, "step": 11261 }, { "epoch": 0.65, "grad_norm": 0.5987070815263701, "learning_rate": 5.852775941621476e-06, "loss": 0.2821, "step": 11262 }, { "epoch": 0.65, "grad_norm": 0.3813507046247867, "learning_rate": 5.851082679778011e-06, "loss": 0.2997, "step": 11263 }, { "epoch": 0.65, "grad_norm": 0.3432165598746957, "learning_rate": 5.8493895616113714e-06, "loss": 0.2648, "step": 11264 }, { "epoch": 0.65, "grad_norm": 0.49326946299327873, "learning_rate": 5.847696587180195e-06, "loss": 0.3269, "step": 11265 }, { "epoch": 0.65, "grad_norm": 0.3997606558563779, "learning_rate": 5.846003756543106e-06, "loss": 0.309, "step": 11266 }, { "epoch": 0.65, "grad_norm": 0.2854817732641904, "learning_rate": 5.844311069758729e-06, "loss": 0.138, "step": 11267 }, { "epoch": 0.65, "grad_norm": 0.359787125128394, "learning_rate": 5.842618526885679e-06, "loss": 0.2986, "step": 11268 }, { "epoch": 0.65, "grad_norm": 0.34338489317305765, "learning_rate": 5.840926127982573e-06, "loss": 0.2825, "step": 11269 }, { "epoch": 0.65, "grad_norm": 0.7416336040716597, "learning_rate": 5.839233873108016e-06, "loss": 0.3722, "step": 11270 }, { "epoch": 0.65, "grad_norm": 0.6592265725324533, "learning_rate": 5.837541762320609e-06, "loss": 0.4155, "step": 11271 }, { "epoch": 0.65, "grad_norm": 0.3190564215746574, "learning_rate": 5.835849795678954e-06, "loss": 0.2622, "step": 11272 }, { "epoch": 0.65, "grad_norm": 0.19298277235341782, "learning_rate": 5.834157973241643e-06, "loss": 0.1614, "step": 11273 }, { "epoch": 0.65, "grad_norm": 0.7677838234055707, "learning_rate": 5.83246629506726e-06, "loss": 0.4286, "step": 11274 }, { "epoch": 0.65, "grad_norm": 0.3608993586517985, "learning_rate": 5.830774761214392e-06, "loss": 0.303, "step": 11275 }, { "epoch": 0.65, "grad_norm": 0.5166381381403379, "learning_rate": 5.829083371741609e-06, "loss": 0.3991, "step": 11276 }, { "epoch": 0.65, "grad_norm": 0.5044253316790134, "learning_rate": 5.827392126707499e-06, "loss": 0.2718, "step": 11277 }, { "epoch": 0.65, "grad_norm": 0.3718745113889626, "learning_rate": 5.825701026170616e-06, "loss": 0.3091, "step": 11278 }, { "epoch": 0.65, "grad_norm": 0.28428241777519586, "learning_rate": 5.824010070189523e-06, "loss": 0.2016, "step": 11279 }, { "epoch": 0.65, "grad_norm": 0.30819648744024025, "learning_rate": 5.8223192588227836e-06, "loss": 0.2175, "step": 11280 }, { "epoch": 0.65, "grad_norm": 0.3475980694959061, "learning_rate": 5.820628592128952e-06, "loss": 0.275, "step": 11281 }, { "epoch": 0.65, "grad_norm": 0.711060174788503, "learning_rate": 5.81893807016657e-06, "loss": 0.4232, "step": 11282 }, { "epoch": 0.65, "grad_norm": 0.5564311587922617, "learning_rate": 5.817247692994179e-06, "loss": 0.2324, "step": 11283 }, { "epoch": 0.65, "grad_norm": 0.39698374618945803, "learning_rate": 5.815557460670326e-06, "loss": 0.2794, "step": 11284 }, { "epoch": 0.65, "grad_norm": 0.24435255765312222, "learning_rate": 5.813867373253537e-06, "loss": 0.2001, "step": 11285 }, { "epoch": 0.65, "grad_norm": 0.7686243817440529, "learning_rate": 5.8121774308023415e-06, "loss": 0.4895, "step": 11286 }, { "epoch": 0.65, "grad_norm": 0.31839227350161425, "learning_rate": 5.810487633375261e-06, "loss": 0.2133, "step": 11287 }, { "epoch": 0.65, "grad_norm": 0.4042563230540896, "learning_rate": 5.80879798103081e-06, "loss": 0.3192, "step": 11288 }, { "epoch": 0.65, "grad_norm": 0.76019850506445, "learning_rate": 5.807108473827508e-06, "loss": 0.4039, "step": 11289 }, { "epoch": 0.65, "grad_norm": 0.30882169987209035, "learning_rate": 5.80541911182386e-06, "loss": 0.2067, "step": 11290 }, { "epoch": 0.65, "grad_norm": 0.2653860564848561, "learning_rate": 5.803729895078368e-06, "loss": 0.1633, "step": 11291 }, { "epoch": 0.65, "grad_norm": 0.37243490114112704, "learning_rate": 5.802040823649524e-06, "loss": 0.3116, "step": 11292 }, { "epoch": 0.65, "grad_norm": 0.2908543475298292, "learning_rate": 5.800351897595832e-06, "loss": 0.2062, "step": 11293 }, { "epoch": 0.65, "grad_norm": 0.7543086085745012, "learning_rate": 5.7986631169757715e-06, "loss": 0.4082, "step": 11294 }, { "epoch": 0.65, "grad_norm": 0.7486544921576171, "learning_rate": 5.796974481847827e-06, "loss": 0.4599, "step": 11295 }, { "epoch": 0.65, "grad_norm": 0.2232167232788598, "learning_rate": 5.795285992270472e-06, "loss": 0.216, "step": 11296 }, { "epoch": 0.65, "grad_norm": 0.47861387869738203, "learning_rate": 5.793597648302185e-06, "loss": 0.258, "step": 11297 }, { "epoch": 0.65, "grad_norm": 0.4226253650595357, "learning_rate": 5.791909450001432e-06, "loss": 0.3018, "step": 11298 }, { "epoch": 0.65, "grad_norm": 0.32740022436539834, "learning_rate": 5.790221397426672e-06, "loss": 0.2581, "step": 11299 }, { "epoch": 0.65, "grad_norm": 0.37377043137715343, "learning_rate": 5.7885334906363656e-06, "loss": 0.2573, "step": 11300 }, { "epoch": 0.65, "grad_norm": 1.0508988726333814, "learning_rate": 5.786845729688958e-06, "loss": 0.5739, "step": 11301 }, { "epoch": 0.65, "grad_norm": 0.39097791401524284, "learning_rate": 5.785158114642906e-06, "loss": 0.2412, "step": 11302 }, { "epoch": 0.65, "grad_norm": 0.2146535619035754, "learning_rate": 5.783470645556648e-06, "loss": 0.1288, "step": 11303 }, { "epoch": 0.65, "grad_norm": 0.30364120884793583, "learning_rate": 5.781783322488619e-06, "loss": 0.2825, "step": 11304 }, { "epoch": 0.65, "grad_norm": 0.35964780048498673, "learning_rate": 5.78009614549725e-06, "loss": 0.2912, "step": 11305 }, { "epoch": 0.65, "grad_norm": 0.7982606825690333, "learning_rate": 5.778409114640973e-06, "loss": 0.3289, "step": 11306 }, { "epoch": 0.65, "grad_norm": 1.2002096946727734, "learning_rate": 5.776722229978206e-06, "loss": 0.5363, "step": 11307 }, { "epoch": 0.65, "grad_norm": 0.26494917021712633, "learning_rate": 5.775035491567367e-06, "loss": 0.2465, "step": 11308 }, { "epoch": 0.65, "grad_norm": 0.2207873813549719, "learning_rate": 5.773348899466864e-06, "loss": 0.1466, "step": 11309 }, { "epoch": 0.65, "grad_norm": 0.8187241696416505, "learning_rate": 5.7716624537351105e-06, "loss": 0.4156, "step": 11310 }, { "epoch": 0.65, "grad_norm": 0.3221642458006116, "learning_rate": 5.769976154430507e-06, "loss": 0.2634, "step": 11311 }, { "epoch": 0.65, "grad_norm": 0.3845684244489153, "learning_rate": 5.768290001611446e-06, "loss": 0.2996, "step": 11312 }, { "epoch": 0.65, "grad_norm": 0.9866515649350825, "learning_rate": 5.7666039953363155e-06, "loss": 0.3581, "step": 11313 }, { "epoch": 0.65, "grad_norm": 0.3284899038074513, "learning_rate": 5.76491813566351e-06, "loss": 0.2465, "step": 11314 }, { "epoch": 0.65, "grad_norm": 1.3074311625669295, "learning_rate": 5.763232422651407e-06, "loss": 0.5833, "step": 11315 }, { "epoch": 0.65, "grad_norm": 0.2768885144280665, "learning_rate": 5.761546856358384e-06, "loss": 0.2403, "step": 11316 }, { "epoch": 0.65, "grad_norm": 0.34299468161640334, "learning_rate": 5.759861436842806e-06, "loss": 0.2699, "step": 11317 }, { "epoch": 0.65, "grad_norm": 0.846850459365096, "learning_rate": 5.7581761641630485e-06, "loss": 0.5027, "step": 11318 }, { "epoch": 0.65, "grad_norm": 0.25050625975393576, "learning_rate": 5.756491038377469e-06, "loss": 0.211, "step": 11319 }, { "epoch": 0.65, "grad_norm": 0.39637983568228946, "learning_rate": 5.754806059544421e-06, "loss": 0.2666, "step": 11320 }, { "epoch": 0.65, "grad_norm": 0.5585156954349844, "learning_rate": 5.753121227722254e-06, "loss": 0.3317, "step": 11321 }, { "epoch": 0.65, "grad_norm": 0.4894209756295622, "learning_rate": 5.7514365429693186e-06, "loss": 0.2484, "step": 11322 }, { "epoch": 0.65, "grad_norm": 0.39527617870681714, "learning_rate": 5.749752005343954e-06, "loss": 0.2635, "step": 11323 }, { "epoch": 0.65, "grad_norm": 0.33485572046557144, "learning_rate": 5.7480676149044945e-06, "loss": 0.2856, "step": 11324 }, { "epoch": 0.65, "grad_norm": 0.4181430086739952, "learning_rate": 5.746383371709267e-06, "loss": 0.2432, "step": 11325 }, { "epoch": 0.65, "grad_norm": 0.28865695519714385, "learning_rate": 5.7446992758166035e-06, "loss": 0.1886, "step": 11326 }, { "epoch": 0.65, "grad_norm": 0.43098022642011, "learning_rate": 5.743015327284822e-06, "loss": 0.332, "step": 11327 }, { "epoch": 0.65, "grad_norm": 0.46720475592837557, "learning_rate": 5.7413315261722355e-06, "loss": 0.3363, "step": 11328 }, { "epoch": 0.65, "grad_norm": 0.3041795818931702, "learning_rate": 5.739647872537157e-06, "loss": 0.2173, "step": 11329 }, { "epoch": 0.65, "grad_norm": 1.087779922116115, "learning_rate": 5.737964366437885e-06, "loss": 0.8105, "step": 11330 }, { "epoch": 0.65, "grad_norm": 0.23944258282874695, "learning_rate": 5.736281007932727e-06, "loss": 0.181, "step": 11331 }, { "epoch": 0.65, "grad_norm": 0.27234929845391487, "learning_rate": 5.734597797079974e-06, "loss": 0.2099, "step": 11332 }, { "epoch": 0.65, "grad_norm": 0.7745522496107576, "learning_rate": 5.732914733937917e-06, "loss": 0.4524, "step": 11333 }, { "epoch": 0.65, "grad_norm": 0.7024537449114042, "learning_rate": 5.731231818564834e-06, "loss": 0.4207, "step": 11334 }, { "epoch": 0.65, "grad_norm": 0.3230484251573659, "learning_rate": 5.729549051019014e-06, "loss": 0.2026, "step": 11335 }, { "epoch": 0.65, "grad_norm": 0.3556886103604453, "learning_rate": 5.7278664313587275e-06, "loss": 0.3461, "step": 11336 }, { "epoch": 0.65, "grad_norm": 0.30108299641494074, "learning_rate": 5.726183959642242e-06, "loss": 0.1966, "step": 11337 }, { "epoch": 0.65, "grad_norm": 0.6097747958982636, "learning_rate": 5.724501635927818e-06, "loss": 0.3249, "step": 11338 }, { "epoch": 0.65, "grad_norm": 0.37914027796049266, "learning_rate": 5.722819460273723e-06, "loss": 0.2742, "step": 11339 }, { "epoch": 0.65, "grad_norm": 0.35501758807427714, "learning_rate": 5.7211374327382066e-06, "loss": 0.2916, "step": 11340 }, { "epoch": 0.65, "grad_norm": 0.5940137811131664, "learning_rate": 5.719455553379516e-06, "loss": 0.3368, "step": 11341 }, { "epoch": 0.65, "grad_norm": 0.36694725493194763, "learning_rate": 5.717773822255896e-06, "loss": 0.2532, "step": 11342 }, { "epoch": 0.65, "grad_norm": 0.23800875625112516, "learning_rate": 5.71609223942558e-06, "loss": 0.2069, "step": 11343 }, { "epoch": 0.65, "grad_norm": 0.37304610142960154, "learning_rate": 5.7144108049468106e-06, "loss": 0.2867, "step": 11344 }, { "epoch": 0.65, "grad_norm": 0.36018737364414755, "learning_rate": 5.712729518877813e-06, "loss": 0.2725, "step": 11345 }, { "epoch": 0.65, "grad_norm": 1.454786122363037, "learning_rate": 5.711048381276801e-06, "loss": 0.838, "step": 11346 }, { "epoch": 0.65, "grad_norm": 0.4161241033055131, "learning_rate": 5.709367392202003e-06, "loss": 0.2913, "step": 11347 }, { "epoch": 0.65, "grad_norm": 0.28370028246480244, "learning_rate": 5.707686551711628e-06, "loss": 0.2552, "step": 11348 }, { "epoch": 0.65, "grad_norm": 0.2784506630346675, "learning_rate": 5.706005859863883e-06, "loss": 0.1635, "step": 11349 }, { "epoch": 0.65, "grad_norm": 0.36016396685166263, "learning_rate": 5.704325316716966e-06, "loss": 0.3082, "step": 11350 }, { "epoch": 0.65, "grad_norm": 0.4467132067514661, "learning_rate": 5.702644922329083e-06, "loss": 0.3476, "step": 11351 }, { "epoch": 0.65, "grad_norm": 0.327846838982423, "learning_rate": 5.70096467675842e-06, "loss": 0.2569, "step": 11352 }, { "epoch": 0.65, "grad_norm": 0.367416999026494, "learning_rate": 5.699284580063167e-06, "loss": 0.2946, "step": 11353 }, { "epoch": 0.65, "grad_norm": 0.5459229122762558, "learning_rate": 5.697604632301504e-06, "loss": 0.4332, "step": 11354 }, { "epoch": 0.65, "grad_norm": 0.25668900132390404, "learning_rate": 5.695924833531603e-06, "loss": 0.2039, "step": 11355 }, { "epoch": 0.65, "grad_norm": 0.4629903377649034, "learning_rate": 5.6942451838116445e-06, "loss": 0.2769, "step": 11356 }, { "epoch": 0.65, "grad_norm": 0.2880039976122195, "learning_rate": 5.69256568319979e-06, "loss": 0.2513, "step": 11357 }, { "epoch": 0.65, "grad_norm": 1.0645379082384252, "learning_rate": 5.6908863317542e-06, "loss": 0.3649, "step": 11358 }, { "epoch": 0.65, "grad_norm": 0.3809085828908202, "learning_rate": 5.689207129533027e-06, "loss": 0.3278, "step": 11359 }, { "epoch": 0.65, "grad_norm": 0.33329868938159685, "learning_rate": 5.687528076594432e-06, "loss": 0.3125, "step": 11360 }, { "epoch": 0.65, "grad_norm": 0.7300849087139614, "learning_rate": 5.685849172996551e-06, "loss": 0.411, "step": 11361 }, { "epoch": 0.65, "grad_norm": 0.4960419001943459, "learning_rate": 5.6841704187975296e-06, "loss": 0.2599, "step": 11362 }, { "epoch": 0.65, "grad_norm": 0.21882316289564832, "learning_rate": 5.682491814055497e-06, "loss": 0.2088, "step": 11363 }, { "epoch": 0.65, "grad_norm": 0.4726847855526382, "learning_rate": 5.680813358828592e-06, "loss": 0.3169, "step": 11364 }, { "epoch": 0.65, "grad_norm": 0.30338445447650797, "learning_rate": 5.679135053174932e-06, "loss": 0.1485, "step": 11365 }, { "epoch": 0.65, "grad_norm": 0.4310514017451592, "learning_rate": 5.677456897152641e-06, "loss": 0.3493, "step": 11366 }, { "epoch": 0.65, "grad_norm": 0.33338075483482793, "learning_rate": 5.6757788908198316e-06, "loss": 0.2944, "step": 11367 }, { "epoch": 0.65, "grad_norm": 0.4127582369100299, "learning_rate": 5.674101034234609e-06, "loss": 0.239, "step": 11368 }, { "epoch": 0.65, "grad_norm": 0.3351444220704729, "learning_rate": 5.672423327455085e-06, "loss": 0.2188, "step": 11369 }, { "epoch": 0.65, "grad_norm": 0.4345390366631468, "learning_rate": 5.670745770539356e-06, "loss": 0.2443, "step": 11370 }, { "epoch": 0.65, "grad_norm": 0.24478960709092923, "learning_rate": 5.669068363545516e-06, "loss": 0.2213, "step": 11371 }, { "epoch": 0.65, "grad_norm": 0.4821758480038891, "learning_rate": 5.667391106531647e-06, "loss": 0.3277, "step": 11372 }, { "epoch": 0.65, "grad_norm": 0.7428114884812085, "learning_rate": 5.665713999555842e-06, "loss": 0.3687, "step": 11373 }, { "epoch": 0.65, "grad_norm": 0.7485401252645069, "learning_rate": 5.6640370426761735e-06, "loss": 0.4149, "step": 11374 }, { "epoch": 0.65, "grad_norm": 0.23688874268890042, "learning_rate": 5.662360235950717e-06, "loss": 0.1964, "step": 11375 }, { "epoch": 0.65, "grad_norm": 0.31576975671951585, "learning_rate": 5.6606835794375346e-06, "loss": 0.254, "step": 11376 }, { "epoch": 0.65, "grad_norm": 0.7108945036865616, "learning_rate": 5.659007073194697e-06, "loss": 0.4271, "step": 11377 }, { "epoch": 0.65, "grad_norm": 0.33881004573315643, "learning_rate": 5.657330717280258e-06, "loss": 0.2427, "step": 11378 }, { "epoch": 0.65, "grad_norm": 0.35024537485757967, "learning_rate": 5.655654511752274e-06, "loss": 0.2942, "step": 11379 }, { "epoch": 0.65, "grad_norm": 0.6233162286325228, "learning_rate": 5.653978456668779e-06, "loss": 0.3727, "step": 11380 }, { "epoch": 0.65, "grad_norm": 0.23743888501996108, "learning_rate": 5.652302552087827e-06, "loss": 0.1682, "step": 11381 }, { "epoch": 0.65, "grad_norm": 0.42937799121144055, "learning_rate": 5.6506267980674515e-06, "loss": 0.2655, "step": 11382 }, { "epoch": 0.65, "grad_norm": 0.3103419789202479, "learning_rate": 5.648951194665683e-06, "loss": 0.294, "step": 11383 }, { "epoch": 0.65, "grad_norm": 0.32653676277691335, "learning_rate": 5.647275741940543e-06, "loss": 0.2316, "step": 11384 }, { "epoch": 0.65, "grad_norm": 0.6254068439076284, "learning_rate": 5.645600439950061e-06, "loss": 0.4076, "step": 11385 }, { "epoch": 0.65, "grad_norm": 0.8407360463120334, "learning_rate": 5.643925288752248e-06, "loss": 0.4822, "step": 11386 }, { "epoch": 0.65, "grad_norm": 0.35286540111542963, "learning_rate": 5.642250288405116e-06, "loss": 0.2868, "step": 11387 }, { "epoch": 0.65, "grad_norm": 0.2163730802370492, "learning_rate": 5.6405754389666635e-06, "loss": 0.1894, "step": 11388 }, { "epoch": 0.65, "grad_norm": 0.5102026088195443, "learning_rate": 5.638900740494901e-06, "loss": 0.3567, "step": 11389 }, { "epoch": 0.65, "grad_norm": 0.39499275164180747, "learning_rate": 5.637226193047818e-06, "loss": 0.293, "step": 11390 }, { "epoch": 0.65, "grad_norm": 0.36184693966404863, "learning_rate": 5.635551796683405e-06, "loss": 0.2894, "step": 11391 }, { "epoch": 0.65, "grad_norm": 1.1693194311109523, "learning_rate": 5.633877551459646e-06, "loss": 0.5964, "step": 11392 }, { "epoch": 0.65, "grad_norm": 0.3155464664914508, "learning_rate": 5.6322034574345145e-06, "loss": 0.2347, "step": 11393 }, { "epoch": 0.65, "grad_norm": 0.20528795990945872, "learning_rate": 5.630529514665993e-06, "loss": 0.1466, "step": 11394 }, { "epoch": 0.65, "grad_norm": 0.4960690183431173, "learning_rate": 5.628855723212048e-06, "loss": 0.3488, "step": 11395 }, { "epoch": 0.65, "grad_norm": 0.32121279561481897, "learning_rate": 5.62718208313064e-06, "loss": 0.2645, "step": 11396 }, { "epoch": 0.65, "grad_norm": 0.7035742236195769, "learning_rate": 5.625508594479725e-06, "loss": 0.3335, "step": 11397 }, { "epoch": 0.65, "grad_norm": 0.7735193256482944, "learning_rate": 5.6238352573172635e-06, "loss": 0.4442, "step": 11398 }, { "epoch": 0.65, "grad_norm": 0.26150894556463467, "learning_rate": 5.622162071701198e-06, "loss": 0.2663, "step": 11399 }, { "epoch": 0.65, "grad_norm": 0.27248367312206967, "learning_rate": 5.6204890376894735e-06, "loss": 0.1802, "step": 11400 }, { "epoch": 0.66, "grad_norm": 0.6700607475194417, "learning_rate": 5.61881615534002e-06, "loss": 0.2683, "step": 11401 }, { "epoch": 0.66, "grad_norm": 0.3534029225730532, "learning_rate": 5.617143424710778e-06, "loss": 0.3028, "step": 11402 }, { "epoch": 0.66, "grad_norm": 0.3681851782516326, "learning_rate": 5.615470845859672e-06, "loss": 0.2829, "step": 11403 }, { "epoch": 0.66, "grad_norm": 0.5710242786946949, "learning_rate": 5.613798418844623e-06, "loss": 0.2027, "step": 11404 }, { "epoch": 0.66, "grad_norm": 0.37252093370364825, "learning_rate": 5.6121261437235445e-06, "loss": 0.2886, "step": 11405 }, { "epoch": 0.66, "grad_norm": 0.30991829143518573, "learning_rate": 5.6104540205543445e-06, "loss": 0.1872, "step": 11406 }, { "epoch": 0.66, "grad_norm": 0.3134833617394962, "learning_rate": 5.608782049394938e-06, "loss": 0.2361, "step": 11407 }, { "epoch": 0.66, "grad_norm": 0.3926136787003498, "learning_rate": 5.60711023030322e-06, "loss": 0.3061, "step": 11408 }, { "epoch": 0.66, "grad_norm": 0.9496778327004386, "learning_rate": 5.605438563337087e-06, "loss": 0.7436, "step": 11409 }, { "epoch": 0.66, "grad_norm": 0.4551020413608209, "learning_rate": 5.6037670485544215e-06, "loss": 0.2394, "step": 11410 }, { "epoch": 0.66, "grad_norm": 0.2614793357489382, "learning_rate": 5.60209568601312e-06, "loss": 0.2574, "step": 11411 }, { "epoch": 0.66, "grad_norm": 0.4548863557191528, "learning_rate": 5.600424475771058e-06, "loss": 0.2345, "step": 11412 }, { "epoch": 0.66, "grad_norm": 0.6171749330064369, "learning_rate": 5.5987534178861e-06, "loss": 0.3804, "step": 11413 }, { "epoch": 0.66, "grad_norm": 0.310695729368847, "learning_rate": 5.5970825124161255e-06, "loss": 0.2084, "step": 11414 }, { "epoch": 0.66, "grad_norm": 0.2857621372988274, "learning_rate": 5.595411759418995e-06, "loss": 0.2615, "step": 11415 }, { "epoch": 0.66, "grad_norm": 0.9880804735860319, "learning_rate": 5.5937411589525655e-06, "loss": 0.5304, "step": 11416 }, { "epoch": 0.66, "grad_norm": 0.29446071991324213, "learning_rate": 5.592070711074691e-06, "loss": 0.1841, "step": 11417 }, { "epoch": 0.66, "grad_norm": 0.7950217222504905, "learning_rate": 5.590400415843214e-06, "loss": 0.4106, "step": 11418 }, { "epoch": 0.66, "grad_norm": 0.2692860474214037, "learning_rate": 5.5887302733159835e-06, "loss": 0.2516, "step": 11419 }, { "epoch": 0.66, "grad_norm": 0.28812730174273965, "learning_rate": 5.587060283550835e-06, "loss": 0.2093, "step": 11420 }, { "epoch": 0.66, "grad_norm": 0.4135254933127519, "learning_rate": 5.585390446605598e-06, "loss": 0.2756, "step": 11421 }, { "epoch": 0.66, "grad_norm": 0.3391811932650513, "learning_rate": 5.583720762538097e-06, "loss": 0.302, "step": 11422 }, { "epoch": 0.66, "grad_norm": 0.5581481167123312, "learning_rate": 5.58205123140616e-06, "loss": 0.1832, "step": 11423 }, { "epoch": 0.66, "grad_norm": 0.8472758677182067, "learning_rate": 5.5803818532676e-06, "loss": 0.402, "step": 11424 }, { "epoch": 0.66, "grad_norm": 0.8200728581102539, "learning_rate": 5.578712628180225e-06, "loss": 0.4662, "step": 11425 }, { "epoch": 0.66, "grad_norm": 0.3875008712683409, "learning_rate": 5.577043556201838e-06, "loss": 0.2686, "step": 11426 }, { "epoch": 0.66, "grad_norm": 0.2416269514785201, "learning_rate": 5.575374637390246e-06, "loss": 0.2104, "step": 11427 }, { "epoch": 0.66, "grad_norm": 0.4898967331695952, "learning_rate": 5.573705871803241e-06, "loss": 0.2438, "step": 11428 }, { "epoch": 0.66, "grad_norm": 0.3172534747512469, "learning_rate": 5.57203725949861e-06, "loss": 0.2663, "step": 11429 }, { "epoch": 0.66, "grad_norm": 0.9321900723666333, "learning_rate": 5.570368800534139e-06, "loss": 0.3019, "step": 11430 }, { "epoch": 0.66, "grad_norm": 0.4761349234786449, "learning_rate": 5.568700494967603e-06, "loss": 0.3314, "step": 11431 }, { "epoch": 0.66, "grad_norm": 0.32465852015725005, "learning_rate": 5.567032342856781e-06, "loss": 0.2739, "step": 11432 }, { "epoch": 0.66, "grad_norm": 0.1793227954302825, "learning_rate": 5.565364344259438e-06, "loss": 0.1204, "step": 11433 }, { "epoch": 0.66, "grad_norm": 0.3672817087183113, "learning_rate": 5.563696499233337e-06, "loss": 0.3124, "step": 11434 }, { "epoch": 0.66, "grad_norm": 0.33119602560147793, "learning_rate": 5.562028807836233e-06, "loss": 0.2328, "step": 11435 }, { "epoch": 0.66, "grad_norm": 0.7227754858322918, "learning_rate": 5.560361270125884e-06, "loss": 0.3104, "step": 11436 }, { "epoch": 0.66, "grad_norm": 1.0485837440591883, "learning_rate": 5.558693886160032e-06, "loss": 0.5809, "step": 11437 }, { "epoch": 0.66, "grad_norm": 0.32761246481860695, "learning_rate": 5.557026655996422e-06, "loss": 0.2612, "step": 11438 }, { "epoch": 0.66, "grad_norm": 0.37388839229218224, "learning_rate": 5.555359579692782e-06, "loss": 0.3131, "step": 11439 }, { "epoch": 0.66, "grad_norm": 0.339682737117197, "learning_rate": 5.553692657306853e-06, "loss": 0.1579, "step": 11440 }, { "epoch": 0.66, "grad_norm": 0.33306031343411013, "learning_rate": 5.552025888896356e-06, "loss": 0.2583, "step": 11441 }, { "epoch": 0.66, "grad_norm": 0.9423663180233574, "learning_rate": 5.550359274519012e-06, "loss": 0.4231, "step": 11442 }, { "epoch": 0.66, "grad_norm": 0.3801574172959991, "learning_rate": 5.54869281423253e-06, "loss": 0.2682, "step": 11443 }, { "epoch": 0.66, "grad_norm": 0.37924059353551987, "learning_rate": 5.547026508094629e-06, "loss": 0.2664, "step": 11444 }, { "epoch": 0.66, "grad_norm": 0.5090379498547105, "learning_rate": 5.545360356163009e-06, "loss": 0.3892, "step": 11445 }, { "epoch": 0.66, "grad_norm": 0.22891711398429887, "learning_rate": 5.54369435849537e-06, "loss": 0.182, "step": 11446 }, { "epoch": 0.66, "grad_norm": 0.47266754852632326, "learning_rate": 5.5420285151493995e-06, "loss": 0.3223, "step": 11447 }, { "epoch": 0.66, "grad_norm": 0.6746597708337676, "learning_rate": 5.540362826182791e-06, "loss": 0.3292, "step": 11448 }, { "epoch": 0.66, "grad_norm": 1.2132277880660471, "learning_rate": 5.538697291653228e-06, "loss": 0.7311, "step": 11449 }, { "epoch": 0.66, "grad_norm": 0.3082460870951119, "learning_rate": 5.537031911618385e-06, "loss": 0.2031, "step": 11450 }, { "epoch": 0.66, "grad_norm": 0.3597103355681424, "learning_rate": 5.53536668613593e-06, "loss": 0.3081, "step": 11451 }, { "epoch": 0.66, "grad_norm": 0.41272000121983643, "learning_rate": 5.5337016152635396e-06, "loss": 0.2683, "step": 11452 }, { "epoch": 0.66, "grad_norm": 0.3017020610938978, "learning_rate": 5.53203669905887e-06, "loss": 0.2304, "step": 11453 }, { "epoch": 0.66, "grad_norm": 0.43437243190707603, "learning_rate": 5.530371937579577e-06, "loss": 0.2326, "step": 11454 }, { "epoch": 0.66, "grad_norm": 0.3317204639922343, "learning_rate": 5.528707330883308e-06, "loss": 0.2883, "step": 11455 }, { "epoch": 0.66, "grad_norm": 0.3327247393119727, "learning_rate": 5.527042879027715e-06, "loss": 0.2176, "step": 11456 }, { "epoch": 0.66, "grad_norm": 1.3365246378891962, "learning_rate": 5.525378582070438e-06, "loss": 0.7339, "step": 11457 }, { "epoch": 0.66, "grad_norm": 0.3413124951106821, "learning_rate": 5.523714440069104e-06, "loss": 0.3221, "step": 11458 }, { "epoch": 0.66, "grad_norm": 0.3312639650942997, "learning_rate": 5.522050453081349e-06, "loss": 0.2081, "step": 11459 }, { "epoch": 0.66, "grad_norm": 0.25778157585553163, "learning_rate": 5.5203866211647904e-06, "loss": 0.1958, "step": 11460 }, { "epoch": 0.66, "grad_norm": 0.9463391804603914, "learning_rate": 5.518722944377053e-06, "loss": 0.507, "step": 11461 }, { "epoch": 0.66, "grad_norm": 0.37823758384435996, "learning_rate": 5.517059422775748e-06, "loss": 0.3032, "step": 11462 }, { "epoch": 0.66, "grad_norm": 0.2921793595846805, "learning_rate": 5.515396056418482e-06, "loss": 0.2408, "step": 11463 }, { "epoch": 0.66, "grad_norm": 0.5883154221839674, "learning_rate": 5.513732845362856e-06, "loss": 0.4082, "step": 11464 }, { "epoch": 0.66, "grad_norm": 0.2867425278106484, "learning_rate": 5.51206978966647e-06, "loss": 0.1848, "step": 11465 }, { "epoch": 0.66, "grad_norm": 0.24205597069931772, "learning_rate": 5.510406889386914e-06, "loss": 0.2085, "step": 11466 }, { "epoch": 0.66, "grad_norm": 1.2740952812449393, "learning_rate": 5.5087441445817765e-06, "loss": 0.7298, "step": 11467 }, { "epoch": 0.66, "grad_norm": 0.5389920094979618, "learning_rate": 5.507081555308631e-06, "loss": 0.3645, "step": 11468 }, { "epoch": 0.66, "grad_norm": 0.3339186256662309, "learning_rate": 5.505419121625062e-06, "loss": 0.2649, "step": 11469 }, { "epoch": 0.66, "grad_norm": 0.3448488174392606, "learning_rate": 5.503756843588635e-06, "loss": 0.3087, "step": 11470 }, { "epoch": 0.66, "grad_norm": 0.3839012951403879, "learning_rate": 5.502094721256916e-06, "loss": 0.2503, "step": 11471 }, { "epoch": 0.66, "grad_norm": 0.24422334705090812, "learning_rate": 5.500432754687464e-06, "loss": 0.1323, "step": 11472 }, { "epoch": 0.66, "grad_norm": 1.0341671776790258, "learning_rate": 5.498770943937828e-06, "loss": 0.6798, "step": 11473 }, { "epoch": 0.66, "grad_norm": 0.2849180917522527, "learning_rate": 5.497109289065563e-06, "loss": 0.2485, "step": 11474 }, { "epoch": 0.66, "grad_norm": 0.5072468134724325, "learning_rate": 5.495447790128211e-06, "loss": 0.3332, "step": 11475 }, { "epoch": 0.66, "grad_norm": 0.6364321717273036, "learning_rate": 5.493786447183308e-06, "loss": 0.3118, "step": 11476 }, { "epoch": 0.66, "grad_norm": 0.603457958394203, "learning_rate": 5.4921252602883834e-06, "loss": 0.2669, "step": 11477 }, { "epoch": 0.66, "grad_norm": 0.24790100570896947, "learning_rate": 5.490464229500969e-06, "loss": 0.2388, "step": 11478 }, { "epoch": 0.66, "grad_norm": 0.26144460223892885, "learning_rate": 5.488803354878587e-06, "loss": 0.2021, "step": 11479 }, { "epoch": 0.66, "grad_norm": 0.5581526805294116, "learning_rate": 5.487142636478749e-06, "loss": 0.3137, "step": 11480 }, { "epoch": 0.66, "grad_norm": 0.46154960985058663, "learning_rate": 5.485482074358968e-06, "loss": 0.3153, "step": 11481 }, { "epoch": 0.66, "grad_norm": 0.34216719846377985, "learning_rate": 5.48382166857675e-06, "loss": 0.2507, "step": 11482 }, { "epoch": 0.66, "grad_norm": 0.5615865278512538, "learning_rate": 5.482161419189591e-06, "loss": 0.2432, "step": 11483 }, { "epoch": 0.66, "grad_norm": 0.23731768796120098, "learning_rate": 5.4805013262549885e-06, "loss": 0.1988, "step": 11484 }, { "epoch": 0.66, "grad_norm": 1.286312030117045, "learning_rate": 5.478841389830427e-06, "loss": 0.4339, "step": 11485 }, { "epoch": 0.66, "grad_norm": 0.30921205394689105, "learning_rate": 5.477181609973399e-06, "loss": 0.2737, "step": 11486 }, { "epoch": 0.66, "grad_norm": 0.38646319079933794, "learning_rate": 5.475521986741377e-06, "loss": 0.2982, "step": 11487 }, { "epoch": 0.66, "grad_norm": 1.0393640176508394, "learning_rate": 5.4738625201918324e-06, "loss": 0.5382, "step": 11488 }, { "epoch": 0.66, "grad_norm": 0.2587507359924883, "learning_rate": 5.472203210382231e-06, "loss": 0.0732, "step": 11489 }, { "epoch": 0.66, "grad_norm": 0.25179914104749707, "learning_rate": 5.470544057370042e-06, "loss": 0.2673, "step": 11490 }, { "epoch": 0.66, "grad_norm": 0.26847000724967895, "learning_rate": 5.468885061212716e-06, "loss": 0.1861, "step": 11491 }, { "epoch": 0.66, "grad_norm": 0.33831228269888364, "learning_rate": 5.467226221967707e-06, "loss": 0.2408, "step": 11492 }, { "epoch": 0.66, "grad_norm": 0.49982395528851653, "learning_rate": 5.465567539692455e-06, "loss": 0.3434, "step": 11493 }, { "epoch": 0.66, "grad_norm": 0.3667710672250758, "learning_rate": 5.463909014444409e-06, "loss": 0.316, "step": 11494 }, { "epoch": 0.66, "grad_norm": 0.5919667616125639, "learning_rate": 5.462250646280997e-06, "loss": 0.174, "step": 11495 }, { "epoch": 0.66, "grad_norm": 0.2919001787559771, "learning_rate": 5.460592435259651e-06, "loss": 0.2295, "step": 11496 }, { "epoch": 0.66, "grad_norm": 0.45198607078143405, "learning_rate": 5.458934381437793e-06, "loss": 0.3138, "step": 11497 }, { "epoch": 0.66, "grad_norm": 0.29738566743482925, "learning_rate": 5.457276484872839e-06, "loss": 0.2406, "step": 11498 }, { "epoch": 0.66, "grad_norm": 0.34804859280565137, "learning_rate": 5.455618745622209e-06, "loss": 0.2933, "step": 11499 }, { "epoch": 0.66, "grad_norm": 1.1076657921418147, "learning_rate": 5.453961163743304e-06, "loss": 0.6148, "step": 11500 }, { "epoch": 0.66, "grad_norm": 0.7959871319468619, "learning_rate": 5.452303739293532e-06, "loss": 0.3982, "step": 11501 }, { "epoch": 0.66, "grad_norm": 0.2500213796943707, "learning_rate": 5.4506464723302784e-06, "loss": 0.2233, "step": 11502 }, { "epoch": 0.66, "grad_norm": 0.39779781556021504, "learning_rate": 5.448989362910949e-06, "loss": 0.2594, "step": 11503 }, { "epoch": 0.66, "grad_norm": 0.5295136834773769, "learning_rate": 5.447332411092921e-06, "loss": 0.3786, "step": 11504 }, { "epoch": 0.66, "grad_norm": 0.23936075071175678, "learning_rate": 5.445675616933576e-06, "loss": 0.1819, "step": 11505 }, { "epoch": 0.66, "grad_norm": 0.34567033680043985, "learning_rate": 5.444018980490284e-06, "loss": 0.2925, "step": 11506 }, { "epoch": 0.66, "grad_norm": 0.7141315652138195, "learning_rate": 5.4423625018204226e-06, "loss": 0.3823, "step": 11507 }, { "epoch": 0.66, "grad_norm": 0.32578121816347494, "learning_rate": 5.440706180981352e-06, "loss": 0.2104, "step": 11508 }, { "epoch": 0.66, "grad_norm": 0.633359243897076, "learning_rate": 5.439050018030432e-06, "loss": 0.368, "step": 11509 }, { "epoch": 0.66, "grad_norm": 0.3036594850395294, "learning_rate": 5.437394013025012e-06, "loss": 0.2951, "step": 11510 }, { "epoch": 0.66, "grad_norm": 0.3218116260567253, "learning_rate": 5.435738166022437e-06, "loss": 0.2107, "step": 11511 }, { "epoch": 0.66, "grad_norm": 0.27971976076190586, "learning_rate": 5.434082477080058e-06, "loss": 0.1691, "step": 11512 }, { "epoch": 0.66, "grad_norm": 0.37628575012172416, "learning_rate": 5.432426946255206e-06, "loss": 0.2813, "step": 11513 }, { "epoch": 0.66, "grad_norm": 0.29629151440877577, "learning_rate": 5.4307715736052125e-06, "loss": 0.2711, "step": 11514 }, { "epoch": 0.66, "grad_norm": 0.7761394695030567, "learning_rate": 5.429116359187403e-06, "loss": 0.3176, "step": 11515 }, { "epoch": 0.66, "grad_norm": 1.1914877581468113, "learning_rate": 5.427461303059096e-06, "loss": 0.8457, "step": 11516 }, { "epoch": 0.66, "grad_norm": 0.3173092424468873, "learning_rate": 5.425806405277609e-06, "loss": 0.2569, "step": 11517 }, { "epoch": 0.66, "grad_norm": 0.24185940664168293, "learning_rate": 5.424151665900246e-06, "loss": 0.1908, "step": 11518 }, { "epoch": 0.66, "grad_norm": 1.2671168707668683, "learning_rate": 5.422497084984317e-06, "loss": 0.5888, "step": 11519 }, { "epoch": 0.66, "grad_norm": 0.3204691857610319, "learning_rate": 5.420842662587118e-06, "loss": 0.2625, "step": 11520 }, { "epoch": 0.66, "grad_norm": 0.837128435683929, "learning_rate": 5.41918839876594e-06, "loss": 0.3109, "step": 11521 }, { "epoch": 0.66, "grad_norm": 0.28996702322416396, "learning_rate": 5.41753429357807e-06, "loss": 0.2688, "step": 11522 }, { "epoch": 0.66, "grad_norm": 0.3468731094237857, "learning_rate": 5.4158803470807875e-06, "loss": 0.2796, "step": 11523 }, { "epoch": 0.66, "grad_norm": 0.25427661796028395, "learning_rate": 5.414226559331375e-06, "loss": 0.1582, "step": 11524 }, { "epoch": 0.66, "grad_norm": 0.28924380253126614, "learning_rate": 5.4125729303871e-06, "loss": 0.229, "step": 11525 }, { "epoch": 0.66, "grad_norm": 0.371977089760644, "learning_rate": 5.410919460305226e-06, "loss": 0.2852, "step": 11526 }, { "epoch": 0.66, "grad_norm": 0.7166682064653169, "learning_rate": 5.409266149143011e-06, "loss": 0.4062, "step": 11527 }, { "epoch": 0.66, "grad_norm": 0.5786444654582379, "learning_rate": 5.407612996957716e-06, "loss": 0.3038, "step": 11528 }, { "epoch": 0.66, "grad_norm": 0.34804236614724643, "learning_rate": 5.405960003806585e-06, "loss": 0.2652, "step": 11529 }, { "epoch": 0.66, "grad_norm": 0.22885662878947813, "learning_rate": 5.4043071697468604e-06, "loss": 0.2186, "step": 11530 }, { "epoch": 0.66, "grad_norm": 0.6317423866420934, "learning_rate": 5.4026544948357795e-06, "loss": 0.2647, "step": 11531 }, { "epoch": 0.66, "grad_norm": 0.3832186602134028, "learning_rate": 5.401001979130578e-06, "loss": 0.273, "step": 11532 }, { "epoch": 0.66, "grad_norm": 0.46543784414296346, "learning_rate": 5.399349622688479e-06, "loss": 0.3423, "step": 11533 }, { "epoch": 0.66, "grad_norm": 0.444348249465614, "learning_rate": 5.397697425566707e-06, "loss": 0.2682, "step": 11534 }, { "epoch": 0.66, "grad_norm": 0.32841907606679893, "learning_rate": 5.396045387822474e-06, "loss": 0.2644, "step": 11535 }, { "epoch": 0.66, "grad_norm": 0.29513111111681184, "learning_rate": 5.394393509512987e-06, "loss": 0.1942, "step": 11536 }, { "epoch": 0.66, "grad_norm": 0.4622638865061429, "learning_rate": 5.392741790695459e-06, "loss": 0.346, "step": 11537 }, { "epoch": 0.66, "grad_norm": 0.2652974325826972, "learning_rate": 5.391090231427086e-06, "loss": 0.2254, "step": 11538 }, { "epoch": 0.66, "grad_norm": 0.8429674842171873, "learning_rate": 5.389438831765059e-06, "loss": 0.4517, "step": 11539 }, { "epoch": 0.66, "grad_norm": 1.2820194430463543, "learning_rate": 5.387787591766562e-06, "loss": 0.8453, "step": 11540 }, { "epoch": 0.66, "grad_norm": 0.25818239711603863, "learning_rate": 5.386136511488789e-06, "loss": 0.211, "step": 11541 }, { "epoch": 0.66, "grad_norm": 0.4809465676464306, "learning_rate": 5.384485590988908e-06, "loss": 0.3567, "step": 11542 }, { "epoch": 0.66, "grad_norm": 0.5760262213588997, "learning_rate": 5.382834830324093e-06, "loss": 0.2892, "step": 11543 }, { "epoch": 0.66, "grad_norm": 0.2266235624622428, "learning_rate": 5.381184229551506e-06, "loss": 0.1549, "step": 11544 }, { "epoch": 0.66, "grad_norm": 0.5227017467399918, "learning_rate": 5.379533788728313e-06, "loss": 0.3697, "step": 11545 }, { "epoch": 0.66, "grad_norm": 0.48189898826149535, "learning_rate": 5.377883507911668e-06, "loss": 0.4144, "step": 11546 }, { "epoch": 0.66, "grad_norm": 0.2966848658482328, "learning_rate": 5.376233387158722e-06, "loss": 0.1897, "step": 11547 }, { "epoch": 0.66, "grad_norm": 0.6944979318698845, "learning_rate": 5.3745834265266054e-06, "loss": 0.4308, "step": 11548 }, { "epoch": 0.66, "grad_norm": 0.2628428496235638, "learning_rate": 5.372933626072472e-06, "loss": 0.2565, "step": 11549 }, { "epoch": 0.66, "grad_norm": 0.3982433001251939, "learning_rate": 5.371283985853446e-06, "loss": 0.2565, "step": 11550 }, { "epoch": 0.66, "grad_norm": 0.31675267485780206, "learning_rate": 5.369634505926658e-06, "loss": 0.1933, "step": 11551 }, { "epoch": 0.66, "grad_norm": 1.1224053933698808, "learning_rate": 5.367985186349223e-06, "loss": 0.7319, "step": 11552 }, { "epoch": 0.66, "grad_norm": 0.3152515760305445, "learning_rate": 5.3663360271782675e-06, "loss": 0.2678, "step": 11553 }, { "epoch": 0.66, "grad_norm": 0.3356411560582424, "learning_rate": 5.364687028470894e-06, "loss": 0.2476, "step": 11554 }, { "epoch": 0.66, "grad_norm": 0.6719810096510042, "learning_rate": 5.363038190284211e-06, "loss": 0.4357, "step": 11555 }, { "epoch": 0.66, "grad_norm": 0.2516818101597573, "learning_rate": 5.36138951267531e-06, "loss": 0.2007, "step": 11556 }, { "epoch": 0.66, "grad_norm": 0.2570646063899158, "learning_rate": 5.359740995701297e-06, "loss": 0.1896, "step": 11557 }, { "epoch": 0.66, "grad_norm": 1.0684404877013456, "learning_rate": 5.358092639419252e-06, "loss": 0.7644, "step": 11558 }, { "epoch": 0.66, "grad_norm": 0.313824020741263, "learning_rate": 5.356444443886262e-06, "loss": 0.2709, "step": 11559 }, { "epoch": 0.66, "grad_norm": 0.7877441786641255, "learning_rate": 5.3547964091593955e-06, "loss": 0.3024, "step": 11560 }, { "epoch": 0.66, "grad_norm": 0.3567833071601899, "learning_rate": 5.353148535295733e-06, "loss": 0.3206, "step": 11561 }, { "epoch": 0.66, "grad_norm": 0.24311132372342875, "learning_rate": 5.351500822352338e-06, "loss": 0.1998, "step": 11562 }, { "epoch": 0.66, "grad_norm": 0.4527933293358373, "learning_rate": 5.3498532703862685e-06, "loss": 0.2622, "step": 11563 }, { "epoch": 0.66, "grad_norm": 0.4517449777092792, "learning_rate": 5.34820587945458e-06, "loss": 0.2963, "step": 11564 }, { "epoch": 0.66, "grad_norm": 0.4205026001874601, "learning_rate": 5.34655864961432e-06, "loss": 0.2904, "step": 11565 }, { "epoch": 0.66, "grad_norm": 0.5018741672324546, "learning_rate": 5.344911580922536e-06, "loss": 0.3114, "step": 11566 }, { "epoch": 0.66, "grad_norm": 0.3871254278340785, "learning_rate": 5.343264673436264e-06, "loss": 0.1549, "step": 11567 }, { "epoch": 0.66, "grad_norm": 0.392704311459362, "learning_rate": 5.341617927212537e-06, "loss": 0.2629, "step": 11568 }, { "epoch": 0.66, "grad_norm": 0.2654887678301144, "learning_rate": 5.339971342308377e-06, "loss": 0.2367, "step": 11569 }, { "epoch": 0.66, "grad_norm": 0.9922960293520513, "learning_rate": 5.33832491878081e-06, "loss": 0.5467, "step": 11570 }, { "epoch": 0.66, "grad_norm": 0.41226520432516683, "learning_rate": 5.3366786566868545e-06, "loss": 0.2878, "step": 11571 }, { "epoch": 0.66, "grad_norm": 0.4855856005335587, "learning_rate": 5.335032556083515e-06, "loss": 0.3625, "step": 11572 }, { "epoch": 0.66, "grad_norm": 0.37447574962277336, "learning_rate": 5.333386617027793e-06, "loss": 0.2506, "step": 11573 }, { "epoch": 0.66, "grad_norm": 0.32366398711854405, "learning_rate": 5.331740839576697e-06, "loss": 0.2546, "step": 11574 }, { "epoch": 0.67, "grad_norm": 0.2625217824484109, "learning_rate": 5.330095223787214e-06, "loss": 0.1951, "step": 11575 }, { "epoch": 0.67, "grad_norm": 0.779074955761385, "learning_rate": 5.3284497697163325e-06, "loss": 0.5442, "step": 11576 }, { "epoch": 0.67, "grad_norm": 0.27009579975714293, "learning_rate": 5.326804477421035e-06, "loss": 0.2266, "step": 11577 }, { "epoch": 0.67, "grad_norm": 0.42870519581357974, "learning_rate": 5.325159346958293e-06, "loss": 0.3498, "step": 11578 }, { "epoch": 0.67, "grad_norm": 1.0770530022845466, "learning_rate": 5.323514378385086e-06, "loss": 0.5653, "step": 11579 }, { "epoch": 0.67, "grad_norm": 0.25141483196515735, "learning_rate": 5.321869571758375e-06, "loss": 0.1638, "step": 11580 }, { "epoch": 0.67, "grad_norm": 0.31187570399237746, "learning_rate": 5.32022492713512e-06, "loss": 0.2445, "step": 11581 }, { "epoch": 0.67, "grad_norm": 0.7460901109990338, "learning_rate": 5.318580444572276e-06, "loss": 0.4426, "step": 11582 }, { "epoch": 0.67, "grad_norm": 0.43048688929882034, "learning_rate": 5.316936124126788e-06, "loss": 0.2262, "step": 11583 }, { "epoch": 0.67, "grad_norm": 0.40015207220498034, "learning_rate": 5.3152919658556e-06, "loss": 0.3021, "step": 11584 }, { "epoch": 0.67, "grad_norm": 0.3664291203534989, "learning_rate": 5.313647969815647e-06, "loss": 0.2887, "step": 11585 }, { "epoch": 0.67, "grad_norm": 0.351670785679463, "learning_rate": 5.312004136063866e-06, "loss": 0.184, "step": 11586 }, { "epoch": 0.67, "grad_norm": 0.31282841073476086, "learning_rate": 5.310360464657183e-06, "loss": 0.2668, "step": 11587 }, { "epoch": 0.67, "grad_norm": 0.9626155482266147, "learning_rate": 5.308716955652513e-06, "loss": 0.4174, "step": 11588 }, { "epoch": 0.67, "grad_norm": 0.2985410671209383, "learning_rate": 5.3070736091067734e-06, "loss": 0.267, "step": 11589 }, { "epoch": 0.67, "grad_norm": 0.30113331654362685, "learning_rate": 5.30543042507687e-06, "loss": 0.2067, "step": 11590 }, { "epoch": 0.67, "grad_norm": 1.0546116846708513, "learning_rate": 5.303787403619711e-06, "loss": 0.4606, "step": 11591 }, { "epoch": 0.67, "grad_norm": 0.3379662424665475, "learning_rate": 5.302144544792194e-06, "loss": 0.2209, "step": 11592 }, { "epoch": 0.67, "grad_norm": 0.2875890580534507, "learning_rate": 5.300501848651209e-06, "loss": 0.244, "step": 11593 }, { "epoch": 0.67, "grad_norm": 1.0134444255063182, "learning_rate": 5.298859315253639e-06, "loss": 0.3835, "step": 11594 }, { "epoch": 0.67, "grad_norm": 0.41250760331534936, "learning_rate": 5.297216944656371e-06, "loss": 0.3064, "step": 11595 }, { "epoch": 0.67, "grad_norm": 0.30618938735708207, "learning_rate": 5.29557473691628e-06, "loss": 0.1658, "step": 11596 }, { "epoch": 0.67, "grad_norm": 0.3628548944099711, "learning_rate": 5.293932692090233e-06, "loss": 0.3127, "step": 11597 }, { "epoch": 0.67, "grad_norm": 0.3740872155732483, "learning_rate": 5.292290810235092e-06, "loss": 0.2613, "step": 11598 }, { "epoch": 0.67, "grad_norm": 0.5642476827593211, "learning_rate": 5.29064909140772e-06, "loss": 0.2667, "step": 11599 }, { "epoch": 0.67, "grad_norm": 0.5899163594087162, "learning_rate": 5.289007535664967e-06, "loss": 0.3414, "step": 11600 }, { "epoch": 0.67, "grad_norm": 0.40825253228739095, "learning_rate": 5.287366143063682e-06, "loss": 0.3243, "step": 11601 }, { "epoch": 0.67, "grad_norm": 0.2748846662378343, "learning_rate": 5.285724913660704e-06, "loss": 0.2326, "step": 11602 }, { "epoch": 0.67, "grad_norm": 0.43119599601784436, "learning_rate": 5.284083847512866e-06, "loss": 0.1601, "step": 11603 }, { "epoch": 0.67, "grad_norm": 0.8477040777808765, "learning_rate": 5.282442944677005e-06, "loss": 0.4426, "step": 11604 }, { "epoch": 0.67, "grad_norm": 0.27256531636671766, "learning_rate": 5.280802205209943e-06, "loss": 0.2666, "step": 11605 }, { "epoch": 0.67, "grad_norm": 0.5546628272888353, "learning_rate": 5.279161629168497e-06, "loss": 0.3214, "step": 11606 }, { "epoch": 0.67, "grad_norm": 0.5307811121875399, "learning_rate": 5.2775212166094755e-06, "loss": 0.3207, "step": 11607 }, { "epoch": 0.67, "grad_norm": 0.2547089293698558, "learning_rate": 5.275880967589697e-06, "loss": 0.2159, "step": 11608 }, { "epoch": 0.67, "grad_norm": 0.3482597708001019, "learning_rate": 5.274240882165958e-06, "loss": 0.2253, "step": 11609 }, { "epoch": 0.67, "grad_norm": 0.5292646057639918, "learning_rate": 5.272600960395051e-06, "loss": 0.283, "step": 11610 }, { "epoch": 0.67, "grad_norm": 0.3723792760127392, "learning_rate": 5.270961202333769e-06, "loss": 0.292, "step": 11611 }, { "epoch": 0.67, "grad_norm": 0.7733747358805955, "learning_rate": 5.2693216080388984e-06, "loss": 0.4955, "step": 11612 }, { "epoch": 0.67, "grad_norm": 0.29065692377208907, "learning_rate": 5.267682177567219e-06, "loss": 0.2262, "step": 11613 }, { "epoch": 0.67, "grad_norm": 0.37946091941589666, "learning_rate": 5.266042910975501e-06, "loss": 0.3012, "step": 11614 }, { "epoch": 0.67, "grad_norm": 0.28954371908158133, "learning_rate": 5.264403808320514e-06, "loss": 0.1924, "step": 11615 }, { "epoch": 0.67, "grad_norm": 0.36618728513841026, "learning_rate": 5.26276486965902e-06, "loss": 0.2213, "step": 11616 }, { "epoch": 0.67, "grad_norm": 0.3013556099336767, "learning_rate": 5.261126095047774e-06, "loss": 0.2854, "step": 11617 }, { "epoch": 0.67, "grad_norm": 0.6303827060308433, "learning_rate": 5.259487484543528e-06, "loss": 0.4274, "step": 11618 }, { "epoch": 0.67, "grad_norm": 0.3363842194928553, "learning_rate": 5.257849038203022e-06, "loss": 0.1905, "step": 11619 }, { "epoch": 0.67, "grad_norm": 0.2881946093992692, "learning_rate": 5.256210756083004e-06, "loss": 0.2326, "step": 11620 }, { "epoch": 0.67, "grad_norm": 0.2732752542283715, "learning_rate": 5.254572638240204e-06, "loss": 0.2327, "step": 11621 }, { "epoch": 0.67, "grad_norm": 0.6126870326894789, "learning_rate": 5.252934684731349e-06, "loss": 0.2707, "step": 11622 }, { "epoch": 0.67, "grad_norm": 0.38004426464239194, "learning_rate": 5.251296895613158e-06, "loss": 0.2675, "step": 11623 }, { "epoch": 0.67, "grad_norm": 0.3487591686090327, "learning_rate": 5.249659270942355e-06, "loss": 0.3091, "step": 11624 }, { "epoch": 0.67, "grad_norm": 0.809507380773592, "learning_rate": 5.248021810775647e-06, "loss": 0.4803, "step": 11625 }, { "epoch": 0.67, "grad_norm": 0.24443885048236402, "learning_rate": 5.2463845151697404e-06, "loss": 0.1772, "step": 11626 }, { "epoch": 0.67, "grad_norm": 0.4368490630538348, "learning_rate": 5.2447473841813335e-06, "loss": 0.2457, "step": 11627 }, { "epoch": 0.67, "grad_norm": 0.4995512757478243, "learning_rate": 5.243110417867117e-06, "loss": 0.3012, "step": 11628 }, { "epoch": 0.67, "grad_norm": 0.29444216072671153, "learning_rate": 5.241473616283783e-06, "loss": 0.2354, "step": 11629 }, { "epoch": 0.67, "grad_norm": 1.4348647745786949, "learning_rate": 5.239836979488015e-06, "loss": 0.7923, "step": 11630 }, { "epoch": 0.67, "grad_norm": 0.7504570213240344, "learning_rate": 5.238200507536488e-06, "loss": 0.4943, "step": 11631 }, { "epoch": 0.67, "grad_norm": 0.31611857707224705, "learning_rate": 5.23656420048587e-06, "loss": 0.2007, "step": 11632 }, { "epoch": 0.67, "grad_norm": 0.2280572315656847, "learning_rate": 5.23492805839283e-06, "loss": 0.2107, "step": 11633 }, { "epoch": 0.67, "grad_norm": 0.5290475222067625, "learning_rate": 5.233292081314027e-06, "loss": 0.3361, "step": 11634 }, { "epoch": 0.67, "grad_norm": 0.33595680323497873, "learning_rate": 5.231656269306116e-06, "loss": 0.2121, "step": 11635 }, { "epoch": 0.67, "grad_norm": 0.34149735897754147, "learning_rate": 5.230020622425738e-06, "loss": 0.2989, "step": 11636 }, { "epoch": 0.67, "grad_norm": 0.5854348227990583, "learning_rate": 5.228385140729545e-06, "loss": 0.4182, "step": 11637 }, { "epoch": 0.67, "grad_norm": 0.37979442690169474, "learning_rate": 5.226749824274169e-06, "loss": 0.2815, "step": 11638 }, { "epoch": 0.67, "grad_norm": 0.4566569153966813, "learning_rate": 5.225114673116243e-06, "loss": 0.239, "step": 11639 }, { "epoch": 0.67, "grad_norm": 0.2744528609459148, "learning_rate": 5.223479687312388e-06, "loss": 0.2327, "step": 11640 }, { "epoch": 0.67, "grad_norm": 0.36138822589171504, "learning_rate": 5.2218448669192235e-06, "loss": 0.2638, "step": 11641 }, { "epoch": 0.67, "grad_norm": 0.8235685157243273, "learning_rate": 5.220210211993371e-06, "loss": 0.457, "step": 11642 }, { "epoch": 0.67, "grad_norm": 1.145506050031028, "learning_rate": 5.21857572259143e-06, "loss": 0.6852, "step": 11643 }, { "epoch": 0.67, "grad_norm": 0.32843776235279604, "learning_rate": 5.216941398770009e-06, "loss": 0.2588, "step": 11644 }, { "epoch": 0.67, "grad_norm": 0.33328103317540847, "learning_rate": 5.215307240585696e-06, "loss": 0.2655, "step": 11645 }, { "epoch": 0.67, "grad_norm": 0.34161222318740736, "learning_rate": 5.213673248095092e-06, "loss": 0.2008, "step": 11646 }, { "epoch": 0.67, "grad_norm": 0.34405362854914645, "learning_rate": 5.212039421354779e-06, "loss": 0.2812, "step": 11647 }, { "epoch": 0.67, "grad_norm": 0.3367695864863821, "learning_rate": 5.2104057604213335e-06, "loss": 0.2562, "step": 11648 }, { "epoch": 0.67, "grad_norm": 0.7079692534150634, "learning_rate": 5.208772265351332e-06, "loss": 0.426, "step": 11649 }, { "epoch": 0.67, "grad_norm": 0.35211947295926666, "learning_rate": 5.207138936201339e-06, "loss": 0.2587, "step": 11650 }, { "epoch": 0.67, "grad_norm": 0.6453440156091795, "learning_rate": 5.205505773027919e-06, "loss": 0.3904, "step": 11651 }, { "epoch": 0.67, "grad_norm": 0.2491559057765381, "learning_rate": 5.203872775887628e-06, "loss": 0.1939, "step": 11652 }, { "epoch": 0.67, "grad_norm": 0.362328629160573, "learning_rate": 5.202239944837013e-06, "loss": 0.283, "step": 11653 }, { "epoch": 0.67, "grad_norm": 0.4889478459733575, "learning_rate": 5.200607279932626e-06, "loss": 0.3449, "step": 11654 }, { "epoch": 0.67, "grad_norm": 0.6228592787338032, "learning_rate": 5.198974781231003e-06, "loss": 0.308, "step": 11655 }, { "epoch": 0.67, "grad_norm": 0.30679920540472044, "learning_rate": 5.197342448788676e-06, "loss": 0.2397, "step": 11656 }, { "epoch": 0.67, "grad_norm": 0.3359198585706779, "learning_rate": 5.19571028266217e-06, "loss": 0.3035, "step": 11657 }, { "epoch": 0.67, "grad_norm": 0.22988331423007832, "learning_rate": 5.194078282908015e-06, "loss": 0.0879, "step": 11658 }, { "epoch": 0.67, "grad_norm": 0.32083574318893576, "learning_rate": 5.192446449582722e-06, "loss": 0.2683, "step": 11659 }, { "epoch": 0.67, "grad_norm": 0.37311945503442323, "learning_rate": 5.190814782742801e-06, "loss": 0.3295, "step": 11660 }, { "epoch": 0.67, "grad_norm": 0.9174082847992806, "learning_rate": 5.1891832824447545e-06, "loss": 0.3407, "step": 11661 }, { "epoch": 0.67, "grad_norm": 0.311522227954505, "learning_rate": 5.1875519487450865e-06, "loss": 0.2712, "step": 11662 }, { "epoch": 0.67, "grad_norm": 0.7068267725620065, "learning_rate": 5.185920781700288e-06, "loss": 0.4194, "step": 11663 }, { "epoch": 0.67, "grad_norm": 0.3690138293865834, "learning_rate": 5.184289781366847e-06, "loss": 0.2892, "step": 11664 }, { "epoch": 0.67, "grad_norm": 0.2054627742441291, "learning_rate": 5.182658947801242e-06, "loss": 0.1386, "step": 11665 }, { "epoch": 0.67, "grad_norm": 1.2891895203008483, "learning_rate": 5.1810282810599475e-06, "loss": 0.6783, "step": 11666 }, { "epoch": 0.67, "grad_norm": 0.8395194551653319, "learning_rate": 5.17939778119944e-06, "loss": 0.412, "step": 11667 }, { "epoch": 0.67, "grad_norm": 0.2798409401034943, "learning_rate": 5.1777674482761805e-06, "loss": 0.2069, "step": 11668 }, { "epoch": 0.67, "grad_norm": 0.44920481661776707, "learning_rate": 5.176137282346627e-06, "loss": 0.3641, "step": 11669 }, { "epoch": 0.67, "grad_norm": 0.29918314203834345, "learning_rate": 5.174507283467228e-06, "loss": 0.1879, "step": 11670 }, { "epoch": 0.67, "grad_norm": 0.32543040157224057, "learning_rate": 5.172877451694438e-06, "loss": 0.197, "step": 11671 }, { "epoch": 0.67, "grad_norm": 0.36804171214608106, "learning_rate": 5.171247787084694e-06, "loss": 0.3286, "step": 11672 }, { "epoch": 0.67, "grad_norm": 0.9862885552626823, "learning_rate": 5.169618289694432e-06, "loss": 0.3905, "step": 11673 }, { "epoch": 0.67, "grad_norm": 0.2984786136741482, "learning_rate": 5.167988959580077e-06, "loss": 0.1931, "step": 11674 }, { "epoch": 0.67, "grad_norm": 1.0670030322829498, "learning_rate": 5.16635979679806e-06, "loss": 0.5376, "step": 11675 }, { "epoch": 0.67, "grad_norm": 0.34866352029633485, "learning_rate": 5.1647308014047955e-06, "loss": 0.2992, "step": 11676 }, { "epoch": 0.67, "grad_norm": 0.33441485196590653, "learning_rate": 5.163101973456696e-06, "loss": 0.2794, "step": 11677 }, { "epoch": 0.67, "grad_norm": 0.2906375868270081, "learning_rate": 5.161473313010162e-06, "loss": 0.15, "step": 11678 }, { "epoch": 0.67, "grad_norm": 0.9677833792997231, "learning_rate": 5.159844820121605e-06, "loss": 0.4, "step": 11679 }, { "epoch": 0.67, "grad_norm": 0.2778530544704572, "learning_rate": 5.158216494847412e-06, "loss": 0.2421, "step": 11680 }, { "epoch": 0.67, "grad_norm": 0.44108767628976453, "learning_rate": 5.156588337243974e-06, "loss": 0.2371, "step": 11681 }, { "epoch": 0.67, "grad_norm": 0.9779485327404348, "learning_rate": 5.154960347367675e-06, "loss": 0.4578, "step": 11682 }, { "epoch": 0.67, "grad_norm": 0.27024184345027874, "learning_rate": 5.153332525274888e-06, "loss": 0.225, "step": 11683 }, { "epoch": 0.67, "grad_norm": 0.3015393644257497, "learning_rate": 5.1517048710219895e-06, "loss": 0.2489, "step": 11684 }, { "epoch": 0.67, "grad_norm": 0.8142713116682633, "learning_rate": 5.150077384665342e-06, "loss": 0.397, "step": 11685 }, { "epoch": 0.67, "grad_norm": 0.333100189552544, "learning_rate": 5.148450066261303e-06, "loss": 0.2676, "step": 11686 }, { "epoch": 0.67, "grad_norm": 0.4376687450485571, "learning_rate": 5.146822915866232e-06, "loss": 0.24, "step": 11687 }, { "epoch": 0.67, "grad_norm": 0.3483112994923384, "learning_rate": 5.145195933536476e-06, "loss": 0.2511, "step": 11688 }, { "epoch": 0.67, "grad_norm": 0.4132548196316583, "learning_rate": 5.143569119328376e-06, "loss": 0.2765, "step": 11689 }, { "epoch": 0.67, "grad_norm": 0.7860273994083556, "learning_rate": 5.141942473298264e-06, "loss": 0.2614, "step": 11690 }, { "epoch": 0.67, "grad_norm": 0.3396154895768527, "learning_rate": 5.140315995502478e-06, "loss": 0.2806, "step": 11691 }, { "epoch": 0.67, "grad_norm": 0.4005484919825302, "learning_rate": 5.1386896859973425e-06, "loss": 0.2488, "step": 11692 }, { "epoch": 0.67, "grad_norm": 0.33537309338762605, "learning_rate": 5.1370635448391736e-06, "loss": 0.2494, "step": 11693 }, { "epoch": 0.67, "grad_norm": 1.2551963101728314, "learning_rate": 5.135437572084284e-06, "loss": 0.2223, "step": 11694 }, { "epoch": 0.67, "grad_norm": 0.39248745343059227, "learning_rate": 5.133811767788979e-06, "loss": 0.2952, "step": 11695 }, { "epoch": 0.67, "grad_norm": 0.29648525882978316, "learning_rate": 5.132186132009567e-06, "loss": 0.2726, "step": 11696 }, { "epoch": 0.67, "grad_norm": 0.9013794775813733, "learning_rate": 5.13056066480234e-06, "loss": 0.357, "step": 11697 }, { "epoch": 0.67, "grad_norm": 0.34343934566226364, "learning_rate": 5.128935366223588e-06, "loss": 0.2625, "step": 11698 }, { "epoch": 0.67, "grad_norm": 0.2714716915290179, "learning_rate": 5.12731023632959e-06, "loss": 0.1737, "step": 11699 }, { "epoch": 0.67, "grad_norm": 0.3910270132526932, "learning_rate": 5.125685275176633e-06, "loss": 0.3165, "step": 11700 }, { "epoch": 0.67, "grad_norm": 0.35071460165870216, "learning_rate": 5.124060482820986e-06, "loss": 0.1882, "step": 11701 }, { "epoch": 0.67, "grad_norm": 0.9215729327737139, "learning_rate": 5.122435859318915e-06, "loss": 0.4102, "step": 11702 }, { "epoch": 0.67, "grad_norm": 0.45771396649945395, "learning_rate": 5.120811404726675e-06, "loss": 0.3709, "step": 11703 }, { "epoch": 0.67, "grad_norm": 0.27670028601173474, "learning_rate": 5.119187119100533e-06, "loss": 0.2096, "step": 11704 }, { "epoch": 0.67, "grad_norm": 0.26230396682039764, "learning_rate": 5.117563002496728e-06, "loss": 0.168, "step": 11705 }, { "epoch": 0.67, "grad_norm": 1.0274803578898541, "learning_rate": 5.115939054971508e-06, "loss": 0.4535, "step": 11706 }, { "epoch": 0.67, "grad_norm": 0.6142974067442903, "learning_rate": 5.114315276581108e-06, "loss": 0.2165, "step": 11707 }, { "epoch": 0.67, "grad_norm": 0.2637060378536606, "learning_rate": 5.1126916673817575e-06, "loss": 0.2753, "step": 11708 }, { "epoch": 0.67, "grad_norm": 1.119474613305846, "learning_rate": 5.111068227429686e-06, "loss": 0.6991, "step": 11709 }, { "epoch": 0.67, "grad_norm": 0.3746375472716624, "learning_rate": 5.109444956781113e-06, "loss": 0.1687, "step": 11710 }, { "epoch": 0.67, "grad_norm": 0.26177824937585215, "learning_rate": 5.10782185549225e-06, "loss": 0.2256, "step": 11711 }, { "epoch": 0.67, "grad_norm": 0.48526546688989247, "learning_rate": 5.106198923619302e-06, "loss": 0.3127, "step": 11712 }, { "epoch": 0.67, "grad_norm": 0.6618222141302967, "learning_rate": 5.10457616121848e-06, "loss": 0.2913, "step": 11713 }, { "epoch": 0.67, "grad_norm": 0.45600963586374926, "learning_rate": 5.102953568345973e-06, "loss": 0.2749, "step": 11714 }, { "epoch": 0.67, "grad_norm": 0.4655809092972486, "learning_rate": 5.101331145057975e-06, "loss": 0.3785, "step": 11715 }, { "epoch": 0.67, "grad_norm": 0.36798216280033647, "learning_rate": 5.0997088914106685e-06, "loss": 0.2713, "step": 11716 }, { "epoch": 0.67, "grad_norm": 0.21140015509665294, "learning_rate": 5.098086807460232e-06, "loss": 0.1602, "step": 11717 }, { "epoch": 0.67, "grad_norm": 0.7371500660216942, "learning_rate": 5.096464893262838e-06, "loss": 0.3948, "step": 11718 }, { "epoch": 0.67, "grad_norm": 0.3635716936363465, "learning_rate": 5.094843148874654e-06, "loss": 0.2994, "step": 11719 }, { "epoch": 0.67, "grad_norm": 0.3033816677570989, "learning_rate": 5.0932215743518375e-06, "loss": 0.2358, "step": 11720 }, { "epoch": 0.67, "grad_norm": 1.0560147653471157, "learning_rate": 5.0916001697505506e-06, "loss": 0.6911, "step": 11721 }, { "epoch": 0.67, "grad_norm": 0.3864890302897584, "learning_rate": 5.089978935126939e-06, "loss": 0.2657, "step": 11722 }, { "epoch": 0.67, "grad_norm": 0.22419003265245574, "learning_rate": 5.088357870537146e-06, "loss": 0.1348, "step": 11723 }, { "epoch": 0.67, "grad_norm": 0.3287953682638831, "learning_rate": 5.086736976037304e-06, "loss": 0.3103, "step": 11724 }, { "epoch": 0.67, "grad_norm": 0.7126230773256188, "learning_rate": 5.085116251683554e-06, "loss": 0.3805, "step": 11725 }, { "epoch": 0.67, "grad_norm": 0.34080945340313146, "learning_rate": 5.083495697532016e-06, "loss": 0.2898, "step": 11726 }, { "epoch": 0.67, "grad_norm": 0.32289359281894736, "learning_rate": 5.081875313638811e-06, "loss": 0.2766, "step": 11727 }, { "epoch": 0.67, "grad_norm": 0.7495934803198708, "learning_rate": 5.080255100060048e-06, "loss": 0.3571, "step": 11728 }, { "epoch": 0.67, "grad_norm": 0.2572948506856289, "learning_rate": 5.078635056851844e-06, "loss": 0.2007, "step": 11729 }, { "epoch": 0.67, "grad_norm": 0.5697537663129776, "learning_rate": 5.077015184070296e-06, "loss": 0.2481, "step": 11730 }, { "epoch": 0.67, "grad_norm": 0.33024815771221205, "learning_rate": 5.075395481771501e-06, "loss": 0.2487, "step": 11731 }, { "epoch": 0.67, "grad_norm": 0.29132435493008624, "learning_rate": 5.073775950011548e-06, "loss": 0.2737, "step": 11732 }, { "epoch": 0.67, "grad_norm": 1.1874298610861767, "learning_rate": 5.072156588846519e-06, "loss": 0.5421, "step": 11733 }, { "epoch": 0.67, "grad_norm": 0.5883251543734425, "learning_rate": 5.070537398332498e-06, "loss": 0.3345, "step": 11734 }, { "epoch": 0.67, "grad_norm": 0.27343202258408916, "learning_rate": 5.068918378525555e-06, "loss": 0.2571, "step": 11735 }, { "epoch": 0.67, "grad_norm": 0.6437468881803801, "learning_rate": 5.067299529481758e-06, "loss": 0.2749, "step": 11736 }, { "epoch": 0.67, "grad_norm": 0.34159851863097196, "learning_rate": 5.065680851257162e-06, "loss": 0.1786, "step": 11737 }, { "epoch": 0.67, "grad_norm": 0.37613981223297227, "learning_rate": 5.0640623439078285e-06, "loss": 0.2851, "step": 11738 }, { "epoch": 0.67, "grad_norm": 0.365571260834238, "learning_rate": 5.062444007489804e-06, "loss": 0.2933, "step": 11739 }, { "epoch": 0.67, "grad_norm": 0.346912717482984, "learning_rate": 5.060825842059132e-06, "loss": 0.1916, "step": 11740 }, { "epoch": 0.67, "grad_norm": 0.47141640995945183, "learning_rate": 5.059207847671845e-06, "loss": 0.3235, "step": 11741 }, { "epoch": 0.67, "grad_norm": 0.45849898304823045, "learning_rate": 5.05759002438398e-06, "loss": 0.2979, "step": 11742 }, { "epoch": 0.67, "grad_norm": 0.25376962264594627, "learning_rate": 5.055972372251562e-06, "loss": 0.1864, "step": 11743 }, { "epoch": 0.67, "grad_norm": 0.3635867761463181, "learning_rate": 5.054354891330607e-06, "loss": 0.2763, "step": 11744 }, { "epoch": 0.67, "grad_norm": 1.209100047476115, "learning_rate": 5.05273758167713e-06, "loss": 0.7096, "step": 11745 }, { "epoch": 0.67, "grad_norm": 0.5569931214330035, "learning_rate": 5.051120443347134e-06, "loss": 0.2018, "step": 11746 }, { "epoch": 0.67, "grad_norm": 0.2892336497399014, "learning_rate": 5.049503476396627e-06, "loss": 0.2789, "step": 11747 }, { "epoch": 0.67, "grad_norm": 0.43334647063447024, "learning_rate": 5.047886680881603e-06, "loss": 0.3457, "step": 11748 }, { "epoch": 0.68, "grad_norm": 0.165429228208656, "learning_rate": 5.0462700568580495e-06, "loss": 0.0847, "step": 11749 }, { "epoch": 0.68, "grad_norm": 0.4192615834692852, "learning_rate": 5.044653604381952e-06, "loss": 0.3126, "step": 11750 }, { "epoch": 0.68, "grad_norm": 0.3438659632511523, "learning_rate": 5.043037323509285e-06, "loss": 0.3159, "step": 11751 }, { "epoch": 0.68, "grad_norm": 0.6009103614653322, "learning_rate": 5.041421214296025e-06, "loss": 0.375, "step": 11752 }, { "epoch": 0.68, "grad_norm": 0.32373127606272445, "learning_rate": 5.039805276798128e-06, "loss": 0.2451, "step": 11753 }, { "epoch": 0.68, "grad_norm": 1.0671526753829008, "learning_rate": 5.0381895110715676e-06, "loss": 0.5263, "step": 11754 }, { "epoch": 0.68, "grad_norm": 0.23259453773066574, "learning_rate": 5.03657391717229e-06, "loss": 0.1934, "step": 11755 }, { "epoch": 0.68, "grad_norm": 0.3365964910715124, "learning_rate": 5.0349584951562445e-06, "loss": 0.2178, "step": 11756 }, { "epoch": 0.68, "grad_norm": 1.0692943385938543, "learning_rate": 5.033343245079373e-06, "loss": 0.7361, "step": 11757 }, { "epoch": 0.68, "grad_norm": 0.5576953322811902, "learning_rate": 5.031728166997607e-06, "loss": 0.3471, "step": 11758 }, { "epoch": 0.68, "grad_norm": 0.2808777177692286, "learning_rate": 5.0301132609668845e-06, "loss": 0.2165, "step": 11759 }, { "epoch": 0.68, "grad_norm": 0.5072273706254729, "learning_rate": 5.028498527043126e-06, "loss": 0.3641, "step": 11760 }, { "epoch": 0.68, "grad_norm": 0.2782055835137195, "learning_rate": 5.026883965282252e-06, "loss": 0.1837, "step": 11761 }, { "epoch": 0.68, "grad_norm": 0.322179200138884, "learning_rate": 5.025269575740166e-06, "loss": 0.222, "step": 11762 }, { "epoch": 0.68, "grad_norm": 0.3592214488272502, "learning_rate": 5.023655358472786e-06, "loss": 0.3299, "step": 11763 }, { "epoch": 0.68, "grad_norm": 0.6839481155341293, "learning_rate": 5.022041313536006e-06, "loss": 0.4145, "step": 11764 }, { "epoch": 0.68, "grad_norm": 0.302911265362752, "learning_rate": 5.020427440985721e-06, "loss": 0.2343, "step": 11765 }, { "epoch": 0.68, "grad_norm": 1.2681543614082758, "learning_rate": 5.018813740877817e-06, "loss": 0.234, "step": 11766 }, { "epoch": 0.68, "grad_norm": 0.23444856503788336, "learning_rate": 5.0172002132681815e-06, "loss": 0.2076, "step": 11767 }, { "epoch": 0.68, "grad_norm": 0.32344719208450956, "learning_rate": 5.0155868582126886e-06, "loss": 0.262, "step": 11768 }, { "epoch": 0.68, "grad_norm": 0.8788977231247462, "learning_rate": 5.0139736757672095e-06, "loss": 0.3568, "step": 11769 }, { "epoch": 0.68, "grad_norm": 0.5589684828125013, "learning_rate": 5.012360665987607e-06, "loss": 0.3362, "step": 11770 }, { "epoch": 0.68, "grad_norm": 0.2517561774348396, "learning_rate": 5.010747828929736e-06, "loss": 0.2556, "step": 11771 }, { "epoch": 0.68, "grad_norm": 1.1552259575630777, "learning_rate": 5.009135164649457e-06, "loss": 0.2947, "step": 11772 }, { "epoch": 0.68, "grad_norm": 0.35356777969622866, "learning_rate": 5.007522673202613e-06, "loss": 0.2245, "step": 11773 }, { "epoch": 0.68, "grad_norm": 0.33910329035869946, "learning_rate": 5.005910354645043e-06, "loss": 0.2669, "step": 11774 }, { "epoch": 0.68, "grad_norm": 0.3495175522409411, "learning_rate": 5.0042982090325805e-06, "loss": 0.3104, "step": 11775 }, { "epoch": 0.68, "grad_norm": 0.622377722237779, "learning_rate": 5.002686236421059e-06, "loss": 0.2137, "step": 11776 }, { "epoch": 0.68, "grad_norm": 0.34094064564462573, "learning_rate": 5.0010744368663e-06, "loss": 0.2648, "step": 11777 }, { "epoch": 0.68, "grad_norm": 1.1026202721985212, "learning_rate": 4.999462810424116e-06, "loss": 0.4879, "step": 11778 }, { "epoch": 0.68, "grad_norm": 0.3465454930026479, "learning_rate": 4.9978513571503175e-06, "loss": 0.2476, "step": 11779 }, { "epoch": 0.68, "grad_norm": 0.295672774198805, "learning_rate": 4.996240077100713e-06, "loss": 0.242, "step": 11780 }, { "epoch": 0.68, "grad_norm": 0.6049472071811616, "learning_rate": 4.994628970331102e-06, "loss": 0.3999, "step": 11781 }, { "epoch": 0.68, "grad_norm": 0.29679226403336495, "learning_rate": 4.993018036897274e-06, "loss": 0.1961, "step": 11782 }, { "epoch": 0.68, "grad_norm": 0.26024884929211345, "learning_rate": 4.991407276855016e-06, "loss": 0.2608, "step": 11783 }, { "epoch": 0.68, "grad_norm": 0.41637954941656796, "learning_rate": 4.989796690260108e-06, "loss": 0.2302, "step": 11784 }, { "epoch": 0.68, "grad_norm": 1.0573213732138746, "learning_rate": 4.988186277168325e-06, "loss": 0.3642, "step": 11785 }, { "epoch": 0.68, "grad_norm": 0.32321682893392756, "learning_rate": 4.9865760376354365e-06, "loss": 0.2565, "step": 11786 }, { "epoch": 0.68, "grad_norm": 0.3490489850595347, "learning_rate": 4.9849659717172e-06, "loss": 0.2924, "step": 11787 }, { "epoch": 0.68, "grad_norm": 0.4404451474161239, "learning_rate": 4.98335607946938e-06, "loss": 0.3545, "step": 11788 }, { "epoch": 0.68, "grad_norm": 0.23778656116027025, "learning_rate": 4.981746360947724e-06, "loss": 0.1595, "step": 11789 }, { "epoch": 0.68, "grad_norm": 1.3534561202280206, "learning_rate": 4.980136816207974e-06, "loss": 0.6599, "step": 11790 }, { "epoch": 0.68, "grad_norm": 0.3459827769144183, "learning_rate": 4.978527445305869e-06, "loss": 0.3084, "step": 11791 }, { "epoch": 0.68, "grad_norm": 0.2924247907379295, "learning_rate": 4.976918248297145e-06, "loss": 0.1902, "step": 11792 }, { "epoch": 0.68, "grad_norm": 0.524691825002447, "learning_rate": 4.9753092252375245e-06, "loss": 0.396, "step": 11793 }, { "epoch": 0.68, "grad_norm": 0.36268677838139596, "learning_rate": 4.973700376182732e-06, "loss": 0.3425, "step": 11794 }, { "epoch": 0.68, "grad_norm": 0.2732204910654022, "learning_rate": 4.972091701188478e-06, "loss": 0.1814, "step": 11795 }, { "epoch": 0.68, "grad_norm": 0.2662254490724855, "learning_rate": 4.970483200310468e-06, "loss": 0.1567, "step": 11796 }, { "epoch": 0.68, "grad_norm": 0.6628178672031443, "learning_rate": 4.968874873604414e-06, "loss": 0.4117, "step": 11797 }, { "epoch": 0.68, "grad_norm": 0.31714829171919484, "learning_rate": 4.967266721126005e-06, "loss": 0.1911, "step": 11798 }, { "epoch": 0.68, "grad_norm": 0.3405285791476989, "learning_rate": 4.965658742930934e-06, "loss": 0.3089, "step": 11799 }, { "epoch": 0.68, "grad_norm": 1.0963642128915825, "learning_rate": 4.964050939074881e-06, "loss": 0.6414, "step": 11800 }, { "epoch": 0.68, "grad_norm": 0.2131071516902079, "learning_rate": 4.962443309613529e-06, "loss": 0.1658, "step": 11801 }, { "epoch": 0.68, "grad_norm": 0.3574377659432091, "learning_rate": 4.96083585460255e-06, "loss": 0.2421, "step": 11802 }, { "epoch": 0.68, "grad_norm": 0.4570436088730508, "learning_rate": 4.95922857409761e-06, "loss": 0.3248, "step": 11803 }, { "epoch": 0.68, "grad_norm": 0.3235139120968666, "learning_rate": 4.9576214681543626e-06, "loss": 0.2579, "step": 11804 }, { "epoch": 0.68, "grad_norm": 0.8548199042981579, "learning_rate": 4.956014536828471e-06, "loss": 0.3425, "step": 11805 }, { "epoch": 0.68, "grad_norm": 0.3368131841542381, "learning_rate": 4.954407780175578e-06, "loss": 0.3074, "step": 11806 }, { "epoch": 0.68, "grad_norm": 0.3103485250750534, "learning_rate": 4.952801198251328e-06, "loss": 0.2388, "step": 11807 }, { "epoch": 0.68, "grad_norm": 0.2644081194182972, "learning_rate": 4.95119479111135e-06, "loss": 0.091, "step": 11808 }, { "epoch": 0.68, "grad_norm": 0.811557495156051, "learning_rate": 4.949588558811285e-06, "loss": 0.3806, "step": 11809 }, { "epoch": 0.68, "grad_norm": 0.4076560589755643, "learning_rate": 4.947982501406749e-06, "loss": 0.2817, "step": 11810 }, { "epoch": 0.68, "grad_norm": 0.29270911302935926, "learning_rate": 4.946376618953364e-06, "loss": 0.2591, "step": 11811 }, { "epoch": 0.68, "grad_norm": 0.9784414407077597, "learning_rate": 4.944770911506739e-06, "loss": 0.536, "step": 11812 }, { "epoch": 0.68, "grad_norm": 0.3713203487743198, "learning_rate": 4.9431653791224744e-06, "loss": 0.2449, "step": 11813 }, { "epoch": 0.68, "grad_norm": 0.26768673340053445, "learning_rate": 4.941560021856181e-06, "loss": 0.2029, "step": 11814 }, { "epoch": 0.68, "grad_norm": 0.4008652783655156, "learning_rate": 4.9399548397634455e-06, "loss": 0.2715, "step": 11815 }, { "epoch": 0.68, "grad_norm": 0.3935508821109515, "learning_rate": 4.938349832899856e-06, "loss": 0.2394, "step": 11816 }, { "epoch": 0.68, "grad_norm": 0.49750616813772636, "learning_rate": 4.9367450013209905e-06, "loss": 0.3878, "step": 11817 }, { "epoch": 0.68, "grad_norm": 0.3181744075806371, "learning_rate": 4.935140345082436e-06, "loss": 0.2385, "step": 11818 }, { "epoch": 0.68, "grad_norm": 0.42534101653733136, "learning_rate": 4.93353586423975e-06, "loss": 0.3122, "step": 11819 }, { "epoch": 0.68, "grad_norm": 0.27271478591220527, "learning_rate": 4.9319315588484954e-06, "loss": 0.2234, "step": 11820 }, { "epoch": 0.68, "grad_norm": 0.6360729424545637, "learning_rate": 4.930327428964235e-06, "loss": 0.3097, "step": 11821 }, { "epoch": 0.68, "grad_norm": 0.32253078555839093, "learning_rate": 4.9287234746425195e-06, "loss": 0.2473, "step": 11822 }, { "epoch": 0.68, "grad_norm": 0.34936474152769675, "learning_rate": 4.927119695938891e-06, "loss": 0.3168, "step": 11823 }, { "epoch": 0.68, "grad_norm": 0.9379810581274167, "learning_rate": 4.925516092908891e-06, "loss": 0.3485, "step": 11824 }, { "epoch": 0.68, "grad_norm": 0.31432849488872344, "learning_rate": 4.923912665608045e-06, "loss": 0.2603, "step": 11825 }, { "epoch": 0.68, "grad_norm": 0.4927456068221821, "learning_rate": 4.9223094140918894e-06, "loss": 0.2566, "step": 11826 }, { "epoch": 0.68, "grad_norm": 0.31740940794607303, "learning_rate": 4.920706338415941e-06, "loss": 0.2725, "step": 11827 }, { "epoch": 0.68, "grad_norm": 0.35754575878697037, "learning_rate": 4.919103438635713e-06, "loss": 0.1741, "step": 11828 }, { "epoch": 0.68, "grad_norm": 0.4985427045345408, "learning_rate": 4.91750071480671e-06, "loss": 0.3825, "step": 11829 }, { "epoch": 0.68, "grad_norm": 0.33708352874029796, "learning_rate": 4.915898166984443e-06, "loss": 0.2834, "step": 11830 }, { "epoch": 0.68, "grad_norm": 0.5475661290159132, "learning_rate": 4.914295795224404e-06, "loss": 0.1531, "step": 11831 }, { "epoch": 0.68, "grad_norm": 0.38665851593133643, "learning_rate": 4.912693599582083e-06, "loss": 0.3186, "step": 11832 }, { "epoch": 0.68, "grad_norm": 0.2821804515532111, "learning_rate": 4.91109158011296e-06, "loss": 0.191, "step": 11833 }, { "epoch": 0.68, "grad_norm": 0.2936069143811596, "learning_rate": 4.909489736872521e-06, "loss": 0.1984, "step": 11834 }, { "epoch": 0.68, "grad_norm": 0.5737718239366725, "learning_rate": 4.907888069916234e-06, "loss": 0.317, "step": 11835 }, { "epoch": 0.68, "grad_norm": 1.2137512039405558, "learning_rate": 4.906286579299563e-06, "loss": 0.6646, "step": 11836 }, { "epoch": 0.68, "grad_norm": 0.36728729290760725, "learning_rate": 4.904685265077969e-06, "loss": 0.19, "step": 11837 }, { "epoch": 0.68, "grad_norm": 0.30145925210114205, "learning_rate": 4.903084127306901e-06, "loss": 0.2932, "step": 11838 }, { "epoch": 0.68, "grad_norm": 0.44813104620717403, "learning_rate": 4.901483166041815e-06, "loss": 0.272, "step": 11839 }, { "epoch": 0.68, "grad_norm": 0.3188875124250454, "learning_rate": 4.899882381338147e-06, "loss": 0.2182, "step": 11840 }, { "epoch": 0.68, "grad_norm": 0.3501168527999765, "learning_rate": 4.898281773251333e-06, "loss": 0.254, "step": 11841 }, { "epoch": 0.68, "grad_norm": 0.3725052385902562, "learning_rate": 4.896681341836798e-06, "loss": 0.3149, "step": 11842 }, { "epoch": 0.68, "grad_norm": 0.3919431187346392, "learning_rate": 4.895081087149974e-06, "loss": 0.2764, "step": 11843 }, { "epoch": 0.68, "grad_norm": 0.447679970754077, "learning_rate": 4.8934810092462705e-06, "loss": 0.283, "step": 11844 }, { "epoch": 0.68, "grad_norm": 0.27528776173796865, "learning_rate": 4.891881108181101e-06, "loss": 0.1694, "step": 11845 }, { "epoch": 0.68, "grad_norm": 0.2515033888633789, "learning_rate": 4.890281384009865e-06, "loss": 0.2506, "step": 11846 }, { "epoch": 0.68, "grad_norm": 0.4647181444523978, "learning_rate": 4.8886818367879686e-06, "loss": 0.2796, "step": 11847 }, { "epoch": 0.68, "grad_norm": 0.6205120870967442, "learning_rate": 4.8870824665708e-06, "loss": 0.4248, "step": 11848 }, { "epoch": 0.68, "grad_norm": 0.6572729053267969, "learning_rate": 4.885483273413747e-06, "loss": 0.3524, "step": 11849 }, { "epoch": 0.68, "grad_norm": 0.26634984300357595, "learning_rate": 4.883884257372188e-06, "loss": 0.2635, "step": 11850 }, { "epoch": 0.68, "grad_norm": 0.21217690977582984, "learning_rate": 4.882285418501497e-06, "loss": 0.1465, "step": 11851 }, { "epoch": 0.68, "grad_norm": 0.823238938356525, "learning_rate": 4.88068675685704e-06, "loss": 0.5252, "step": 11852 }, { "epoch": 0.68, "grad_norm": 0.346625383613797, "learning_rate": 4.879088272494184e-06, "loss": 0.2784, "step": 11853 }, { "epoch": 0.68, "grad_norm": 0.3474056837835713, "learning_rate": 4.877489965468274e-06, "loss": 0.2554, "step": 11854 }, { "epoch": 0.68, "grad_norm": 0.6079999000177458, "learning_rate": 4.875891835834672e-06, "loss": 0.3113, "step": 11855 }, { "epoch": 0.68, "grad_norm": 0.37799226612572256, "learning_rate": 4.874293883648714e-06, "loss": 0.3038, "step": 11856 }, { "epoch": 0.68, "grad_norm": 0.43480049777111607, "learning_rate": 4.8726961089657385e-06, "loss": 0.1556, "step": 11857 }, { "epoch": 0.68, "grad_norm": 0.255060595881029, "learning_rate": 4.871098511841073e-06, "loss": 0.2094, "step": 11858 }, { "epoch": 0.68, "grad_norm": 0.37794696995047256, "learning_rate": 4.8695010923300505e-06, "loss": 0.2933, "step": 11859 }, { "epoch": 0.68, "grad_norm": 0.703634639952293, "learning_rate": 4.867903850487983e-06, "loss": 0.3383, "step": 11860 }, { "epoch": 0.68, "grad_norm": 0.5711648018980142, "learning_rate": 4.866306786370184e-06, "loss": 0.3568, "step": 11861 }, { "epoch": 0.68, "grad_norm": 0.3153893572774383, "learning_rate": 4.864709900031961e-06, "loss": 0.3094, "step": 11862 }, { "epoch": 0.68, "grad_norm": 0.46738359154640297, "learning_rate": 4.86311319152861e-06, "loss": 0.3345, "step": 11863 }, { "epoch": 0.68, "grad_norm": 0.174118788912665, "learning_rate": 4.8615166609154315e-06, "loss": 0.1225, "step": 11864 }, { "epoch": 0.68, "grad_norm": 0.5155715979717324, "learning_rate": 4.85992030824771e-06, "loss": 0.3517, "step": 11865 }, { "epoch": 0.68, "grad_norm": 0.39699707131238365, "learning_rate": 4.858324133580727e-06, "loss": 0.3108, "step": 11866 }, { "epoch": 0.68, "grad_norm": 0.723771118865917, "learning_rate": 4.856728136969755e-06, "loss": 0.265, "step": 11867 }, { "epoch": 0.68, "grad_norm": 0.34812703384012267, "learning_rate": 4.85513231847007e-06, "loss": 0.258, "step": 11868 }, { "epoch": 0.68, "grad_norm": 0.46778392750169046, "learning_rate": 4.853536678136932e-06, "loss": 0.3343, "step": 11869 }, { "epoch": 0.68, "grad_norm": 0.26524386390299387, "learning_rate": 4.851941216025597e-06, "loss": 0.1666, "step": 11870 }, { "epoch": 0.68, "grad_norm": 0.3529335727394295, "learning_rate": 4.850345932191313e-06, "loss": 0.2873, "step": 11871 }, { "epoch": 0.68, "grad_norm": 0.6736113996854561, "learning_rate": 4.848750826689332e-06, "loss": 0.373, "step": 11872 }, { "epoch": 0.68, "grad_norm": 0.2454207027025233, "learning_rate": 4.8471558995748865e-06, "loss": 0.1576, "step": 11873 }, { "epoch": 0.68, "grad_norm": 0.29709806190197335, "learning_rate": 4.845561150903212e-06, "loss": 0.2612, "step": 11874 }, { "epoch": 0.68, "grad_norm": 1.1809594524008666, "learning_rate": 4.843966580729533e-06, "loss": 0.6127, "step": 11875 }, { "epoch": 0.68, "grad_norm": 0.6862481533740115, "learning_rate": 4.842372189109066e-06, "loss": 0.4077, "step": 11876 }, { "epoch": 0.68, "grad_norm": 0.6072253682769295, "learning_rate": 4.840777976097032e-06, "loss": 0.2227, "step": 11877 }, { "epoch": 0.68, "grad_norm": 0.35301852333241995, "learning_rate": 4.839183941748635e-06, "loss": 0.3078, "step": 11878 }, { "epoch": 0.68, "grad_norm": 0.2620722298074846, "learning_rate": 4.837590086119076e-06, "loss": 0.1697, "step": 11879 }, { "epoch": 0.68, "grad_norm": 0.32669040181183934, "learning_rate": 4.835996409263546e-06, "loss": 0.2134, "step": 11880 }, { "epoch": 0.68, "grad_norm": 1.089646351240924, "learning_rate": 4.834402911237243e-06, "loss": 0.4845, "step": 11881 }, { "epoch": 0.68, "grad_norm": 0.29818673852673244, "learning_rate": 4.832809592095344e-06, "loss": 0.2822, "step": 11882 }, { "epoch": 0.68, "grad_norm": 0.329051440364525, "learning_rate": 4.831216451893027e-06, "loss": 0.2178, "step": 11883 }, { "epoch": 0.68, "grad_norm": 0.6968295293375228, "learning_rate": 4.829623490685459e-06, "loss": 0.4373, "step": 11884 }, { "epoch": 0.68, "grad_norm": 0.23540721441096973, "learning_rate": 4.828030708527814e-06, "loss": 0.1897, "step": 11885 }, { "epoch": 0.68, "grad_norm": 0.34736558596523004, "learning_rate": 4.826438105475239e-06, "loss": 0.2225, "step": 11886 }, { "epoch": 0.68, "grad_norm": 1.0944632104089678, "learning_rate": 4.824845681582892e-06, "loss": 0.5581, "step": 11887 }, { "epoch": 0.68, "grad_norm": 0.8324240460651077, "learning_rate": 4.82325343690591e-06, "loss": 0.414, "step": 11888 }, { "epoch": 0.68, "grad_norm": 0.32586291593223726, "learning_rate": 4.821661371499444e-06, "loss": 0.2691, "step": 11889 }, { "epoch": 0.68, "grad_norm": 0.33201111450208354, "learning_rate": 4.820069485418622e-06, "loss": 0.259, "step": 11890 }, { "epoch": 0.68, "grad_norm": 0.2894218031621524, "learning_rate": 4.818477778718571e-06, "loss": 0.1965, "step": 11891 }, { "epoch": 0.68, "grad_norm": 0.34646058218222653, "learning_rate": 4.8168862514544075e-06, "loss": 0.2612, "step": 11892 }, { "epoch": 0.68, "grad_norm": 0.9011059324528964, "learning_rate": 4.815294903681254e-06, "loss": 0.0465, "step": 11893 }, { "epoch": 0.68, "grad_norm": 0.38189133211873966, "learning_rate": 4.813703735454216e-06, "loss": 0.2818, "step": 11894 }, { "epoch": 0.68, "grad_norm": 0.37774095648872624, "learning_rate": 4.812112746828394e-06, "loss": 0.2809, "step": 11895 }, { "epoch": 0.68, "grad_norm": 0.9196950014411767, "learning_rate": 4.810521937858881e-06, "loss": 0.5367, "step": 11896 }, { "epoch": 0.68, "grad_norm": 0.25821957577363874, "learning_rate": 4.808931308600774e-06, "loss": 0.2529, "step": 11897 }, { "epoch": 0.68, "grad_norm": 0.33995182083802594, "learning_rate": 4.807340859109152e-06, "loss": 0.2719, "step": 11898 }, { "epoch": 0.68, "grad_norm": 0.408344759125669, "learning_rate": 4.805750589439092e-06, "loss": 0.1797, "step": 11899 }, { "epoch": 0.68, "grad_norm": 0.577276240726866, "learning_rate": 4.804160499645667e-06, "loss": 0.3866, "step": 11900 }, { "epoch": 0.68, "grad_norm": 0.33678995159423364, "learning_rate": 4.802570589783937e-06, "loss": 0.2425, "step": 11901 }, { "epoch": 0.68, "grad_norm": 0.3749231267150535, "learning_rate": 4.800980859908967e-06, "loss": 0.3353, "step": 11902 }, { "epoch": 0.68, "grad_norm": 0.39713015624552944, "learning_rate": 4.799391310075806e-06, "loss": 0.1311, "step": 11903 }, { "epoch": 0.68, "grad_norm": 0.2695977078762357, "learning_rate": 4.7978019403395e-06, "loss": 0.2272, "step": 11904 }, { "epoch": 0.68, "grad_norm": 0.5527853027413822, "learning_rate": 4.796212750755087e-06, "loss": 0.3386, "step": 11905 }, { "epoch": 0.68, "grad_norm": 0.5264787695999982, "learning_rate": 4.794623741377605e-06, "loss": 0.2782, "step": 11906 }, { "epoch": 0.68, "grad_norm": 0.3201557404278092, "learning_rate": 4.79303491226208e-06, "loss": 0.2416, "step": 11907 }, { "epoch": 0.68, "grad_norm": 1.1192785579323365, "learning_rate": 4.791446263463531e-06, "loss": 0.7994, "step": 11908 }, { "epoch": 0.68, "grad_norm": 0.26545596616417333, "learning_rate": 4.7898577950369704e-06, "loss": 0.2056, "step": 11909 }, { "epoch": 0.68, "grad_norm": 0.24110527748871047, "learning_rate": 4.788269507037415e-06, "loss": 0.1904, "step": 11910 }, { "epoch": 0.68, "grad_norm": 0.8106310137327224, "learning_rate": 4.786681399519862e-06, "loss": 0.416, "step": 11911 }, { "epoch": 0.68, "grad_norm": 0.7495867803826378, "learning_rate": 4.785093472539307e-06, "loss": 0.3306, "step": 11912 }, { "epoch": 0.68, "grad_norm": 0.2882924469905718, "learning_rate": 4.783505726150738e-06, "loss": 0.2652, "step": 11913 }, { "epoch": 0.68, "grad_norm": 0.4408666504381776, "learning_rate": 4.781918160409145e-06, "loss": 0.3201, "step": 11914 }, { "epoch": 0.68, "grad_norm": 0.4572909264973247, "learning_rate": 4.780330775369501e-06, "loss": 0.2594, "step": 11915 }, { "epoch": 0.68, "grad_norm": 0.2702213205694998, "learning_rate": 4.778743571086779e-06, "loss": 0.1873, "step": 11916 }, { "epoch": 0.68, "grad_norm": 0.5390198271391285, "learning_rate": 4.777156547615942e-06, "loss": 0.2586, "step": 11917 }, { "epoch": 0.68, "grad_norm": 0.5337637998050275, "learning_rate": 4.775569705011945e-06, "loss": 0.361, "step": 11918 }, { "epoch": 0.68, "grad_norm": 0.30304924911307957, "learning_rate": 4.773983043329753e-06, "loss": 0.1909, "step": 11919 }, { "epoch": 0.68, "grad_norm": 1.1874428884612134, "learning_rate": 4.7723965626243e-06, "loss": 0.7611, "step": 11920 }, { "epoch": 0.68, "grad_norm": 0.33172829141311116, "learning_rate": 4.770810262950524e-06, "loss": 0.2931, "step": 11921 }, { "epoch": 0.68, "grad_norm": 0.31447195769036773, "learning_rate": 4.769224144363368e-06, "loss": 0.2002, "step": 11922 }, { "epoch": 0.69, "grad_norm": 0.49881291519374765, "learning_rate": 4.767638206917755e-06, "loss": 0.2958, "step": 11923 }, { "epoch": 0.69, "grad_norm": 0.3918596183860913, "learning_rate": 4.766052450668606e-06, "loss": 0.2039, "step": 11924 }, { "epoch": 0.69, "grad_norm": 0.27808781938656596, "learning_rate": 4.764466875670836e-06, "loss": 0.2249, "step": 11925 }, { "epoch": 0.69, "grad_norm": 0.4956137686581436, "learning_rate": 4.762881481979349e-06, "loss": 0.3887, "step": 11926 }, { "epoch": 0.69, "grad_norm": 1.111755849842336, "learning_rate": 4.761296269649054e-06, "loss": 0.4972, "step": 11927 }, { "epoch": 0.69, "grad_norm": 0.30684184917618834, "learning_rate": 4.759711238734844e-06, "loss": 0.2521, "step": 11928 }, { "epoch": 0.69, "grad_norm": 0.4735692273389909, "learning_rate": 4.75812638929161e-06, "loss": 0.2749, "step": 11929 }, { "epoch": 0.69, "grad_norm": 0.2613868405056514, "learning_rate": 4.756541721374228e-06, "loss": 0.2068, "step": 11930 }, { "epoch": 0.69, "grad_norm": 0.33203601499476443, "learning_rate": 4.7549572350375864e-06, "loss": 0.248, "step": 11931 }, { "epoch": 0.69, "grad_norm": 0.9132904905924238, "learning_rate": 4.753372930336548e-06, "loss": 0.4699, "step": 11932 }, { "epoch": 0.69, "grad_norm": 0.32872137130223583, "learning_rate": 4.751788807325981e-06, "loss": 0.2959, "step": 11933 }, { "epoch": 0.69, "grad_norm": 0.396983128779614, "learning_rate": 4.750204866060738e-06, "loss": 0.2726, "step": 11934 }, { "epoch": 0.69, "grad_norm": 0.5185433454178232, "learning_rate": 4.748621106595679e-06, "loss": 0.2699, "step": 11935 }, { "epoch": 0.69, "grad_norm": 0.2455213486640146, "learning_rate": 4.747037528985644e-06, "loss": 0.1981, "step": 11936 }, { "epoch": 0.69, "grad_norm": 0.4123647936906814, "learning_rate": 4.745454133285474e-06, "loss": 0.2777, "step": 11937 }, { "epoch": 0.69, "grad_norm": 0.3633484612973121, "learning_rate": 4.743870919549998e-06, "loss": 0.3307, "step": 11938 }, { "epoch": 0.69, "grad_norm": 0.6143787094869488, "learning_rate": 4.74228788783405e-06, "loss": 0.3097, "step": 11939 }, { "epoch": 0.69, "grad_norm": 0.3848915895137807, "learning_rate": 4.740705038192444e-06, "loss": 0.2994, "step": 11940 }, { "epoch": 0.69, "grad_norm": 0.303569188063324, "learning_rate": 4.7391223706799994e-06, "loss": 0.2996, "step": 11941 }, { "epoch": 0.69, "grad_norm": 0.22025983968170426, "learning_rate": 4.73753988535152e-06, "loss": 0.0816, "step": 11942 }, { "epoch": 0.69, "grad_norm": 0.3947879607288585, "learning_rate": 4.735957582261803e-06, "loss": 0.2905, "step": 11943 }, { "epoch": 0.69, "grad_norm": 0.5295208092043296, "learning_rate": 4.7343754614656536e-06, "loss": 0.3748, "step": 11944 }, { "epoch": 0.69, "grad_norm": 0.3434557916596737, "learning_rate": 4.732793523017856e-06, "loss": 0.2484, "step": 11945 }, { "epoch": 0.69, "grad_norm": 0.4718786351336028, "learning_rate": 4.73121176697319e-06, "loss": 0.3152, "step": 11946 }, { "epoch": 0.69, "grad_norm": 0.5218283957356089, "learning_rate": 4.729630193386433e-06, "loss": 0.3238, "step": 11947 }, { "epoch": 0.69, "grad_norm": 0.158404783519397, "learning_rate": 4.728048802312358e-06, "loss": 0.0912, "step": 11948 }, { "epoch": 0.69, "grad_norm": 0.27999339984469956, "learning_rate": 4.726467593805726e-06, "loss": 0.2589, "step": 11949 }, { "epoch": 0.69, "grad_norm": 0.5390317862693036, "learning_rate": 4.724886567921295e-06, "loss": 0.4137, "step": 11950 }, { "epoch": 0.69, "grad_norm": 0.5653285205882527, "learning_rate": 4.723305724713812e-06, "loss": 0.3685, "step": 11951 }, { "epoch": 0.69, "grad_norm": 0.4366394060085917, "learning_rate": 4.721725064238028e-06, "loss": 0.1037, "step": 11952 }, { "epoch": 0.69, "grad_norm": 0.28421369940574165, "learning_rate": 4.720144586548681e-06, "loss": 0.2764, "step": 11953 }, { "epoch": 0.69, "grad_norm": 0.3944676448912269, "learning_rate": 4.718564291700497e-06, "loss": 0.2295, "step": 11954 }, { "epoch": 0.69, "grad_norm": 0.34010771117451527, "learning_rate": 4.7169841797482005e-06, "loss": 0.1541, "step": 11955 }, { "epoch": 0.69, "grad_norm": 0.3757324807553204, "learning_rate": 4.7154042507465195e-06, "loss": 0.2925, "step": 11956 }, { "epoch": 0.69, "grad_norm": 0.2737937072179737, "learning_rate": 4.713824504750161e-06, "loss": 0.2567, "step": 11957 }, { "epoch": 0.69, "grad_norm": 0.33085688299480903, "learning_rate": 4.7122449418138325e-06, "loss": 0.1568, "step": 11958 }, { "epoch": 0.69, "grad_norm": 0.5549992341677822, "learning_rate": 4.710665561992232e-06, "loss": 0.3311, "step": 11959 }, { "epoch": 0.69, "grad_norm": 0.43458610126481007, "learning_rate": 4.709086365340057e-06, "loss": 0.2455, "step": 11960 }, { "epoch": 0.69, "grad_norm": 0.24458519823897504, "learning_rate": 4.707507351911995e-06, "loss": 0.2202, "step": 11961 }, { "epoch": 0.69, "grad_norm": 0.4809860354275576, "learning_rate": 4.705928521762726e-06, "loss": 0.3509, "step": 11962 }, { "epoch": 0.69, "grad_norm": 0.6523562611627876, "learning_rate": 4.7043498749469204e-06, "loss": 0.2662, "step": 11963 }, { "epoch": 0.69, "grad_norm": 0.6412526764903204, "learning_rate": 4.702771411519256e-06, "loss": 0.3287, "step": 11964 }, { "epoch": 0.69, "grad_norm": 0.2694814585384461, "learning_rate": 4.701193131534389e-06, "loss": 0.2403, "step": 11965 }, { "epoch": 0.69, "grad_norm": 0.9540802506457705, "learning_rate": 4.699615035046975e-06, "loss": 0.5154, "step": 11966 }, { "epoch": 0.69, "grad_norm": 0.3832799734593423, "learning_rate": 4.698037122111665e-06, "loss": 0.3095, "step": 11967 }, { "epoch": 0.69, "grad_norm": 0.5151002559842178, "learning_rate": 4.696459392783098e-06, "loss": 0.2608, "step": 11968 }, { "epoch": 0.69, "grad_norm": 0.2970906609548814, "learning_rate": 4.694881847115918e-06, "loss": 0.2769, "step": 11969 }, { "epoch": 0.69, "grad_norm": 0.3673952699013813, "learning_rate": 4.69330448516475e-06, "loss": 0.2208, "step": 11970 }, { "epoch": 0.69, "grad_norm": 0.3823198217572113, "learning_rate": 4.691727306984222e-06, "loss": 0.2333, "step": 11971 }, { "epoch": 0.69, "grad_norm": 0.4797977748585798, "learning_rate": 4.690150312628944e-06, "loss": 0.3882, "step": 11972 }, { "epoch": 0.69, "grad_norm": 0.42047099420085404, "learning_rate": 4.688573502153536e-06, "loss": 0.3205, "step": 11973 }, { "epoch": 0.69, "grad_norm": 0.35808423918623256, "learning_rate": 4.6869968756126e-06, "loss": 0.2417, "step": 11974 }, { "epoch": 0.69, "grad_norm": 0.2642813721183281, "learning_rate": 4.685420433060732e-06, "loss": 0.1715, "step": 11975 }, { "epoch": 0.69, "grad_norm": 0.42438116021427397, "learning_rate": 4.683844174552523e-06, "loss": 0.2682, "step": 11976 }, { "epoch": 0.69, "grad_norm": 0.2952126454257907, "learning_rate": 4.682268100142567e-06, "loss": 0.2697, "step": 11977 }, { "epoch": 0.69, "grad_norm": 1.3042987725864317, "learning_rate": 4.680692209885436e-06, "loss": 0.3308, "step": 11978 }, { "epoch": 0.69, "grad_norm": 0.50954991915773, "learning_rate": 4.679116503835706e-06, "loss": 0.3273, "step": 11979 }, { "epoch": 0.69, "grad_norm": 0.35979928673775013, "learning_rate": 4.6775409820479415e-06, "loss": 0.311, "step": 11980 }, { "epoch": 0.69, "grad_norm": 0.34865340009068274, "learning_rate": 4.675965644576701e-06, "loss": 0.2457, "step": 11981 }, { "epoch": 0.69, "grad_norm": 0.2343614065070297, "learning_rate": 4.674390491476545e-06, "loss": 0.161, "step": 11982 }, { "epoch": 0.69, "grad_norm": 0.39514869929001734, "learning_rate": 4.672815522802018e-06, "loss": 0.3022, "step": 11983 }, { "epoch": 0.69, "grad_norm": 0.42193017601516436, "learning_rate": 4.671240738607659e-06, "loss": 0.2649, "step": 11984 }, { "epoch": 0.69, "grad_norm": 0.3575772895676162, "learning_rate": 4.669666138948001e-06, "loss": 0.3064, "step": 11985 }, { "epoch": 0.69, "grad_norm": 0.37733439670751967, "learning_rate": 4.668091723877584e-06, "loss": 0.2944, "step": 11986 }, { "epoch": 0.69, "grad_norm": 0.363482865984895, "learning_rate": 4.666517493450916e-06, "loss": 0.1889, "step": 11987 }, { "epoch": 0.69, "grad_norm": 0.2920184473283872, "learning_rate": 4.664943447722514e-06, "loss": 0.1943, "step": 11988 }, { "epoch": 0.69, "grad_norm": 0.268324034496146, "learning_rate": 4.6633695867468955e-06, "loss": 0.2651, "step": 11989 }, { "epoch": 0.69, "grad_norm": 0.6601723005250368, "learning_rate": 4.661795910578558e-06, "loss": 0.382, "step": 11990 }, { "epoch": 0.69, "grad_norm": 0.6064327864030062, "learning_rate": 4.660222419271999e-06, "loss": 0.2746, "step": 11991 }, { "epoch": 0.69, "grad_norm": 0.3675959255251272, "learning_rate": 4.658649112881709e-06, "loss": 0.3073, "step": 11992 }, { "epoch": 0.69, "grad_norm": 0.36307823733203226, "learning_rate": 4.657075991462165e-06, "loss": 0.3006, "step": 11993 }, { "epoch": 0.69, "grad_norm": 0.2183623643397165, "learning_rate": 4.6555030550678544e-06, "loss": 0.0842, "step": 11994 }, { "epoch": 0.69, "grad_norm": 0.35620400135270036, "learning_rate": 4.6539303037532435e-06, "loss": 0.2911, "step": 11995 }, { "epoch": 0.69, "grad_norm": 0.9075387144091024, "learning_rate": 4.652357737572796e-06, "loss": 0.392, "step": 11996 }, { "epoch": 0.69, "grad_norm": 0.30833975615137077, "learning_rate": 4.650785356580967e-06, "loss": 0.2674, "step": 11997 }, { "epoch": 0.69, "grad_norm": 0.36252082852686, "learning_rate": 4.649213160832213e-06, "loss": 0.3089, "step": 11998 }, { "epoch": 0.69, "grad_norm": 1.0088006244221261, "learning_rate": 4.647641150380978e-06, "loss": 0.562, "step": 11999 }, { "epoch": 0.69, "grad_norm": 0.17551431395096048, "learning_rate": 4.6460693252817e-06, "loss": 0.1311, "step": 12000 }, { "epoch": 0.69, "grad_norm": 0.32796221667677156, "learning_rate": 4.644497685588808e-06, "loss": 0.2919, "step": 12001 }, { "epoch": 0.69, "grad_norm": 1.211749796355846, "learning_rate": 4.642926231356734e-06, "loss": 0.3921, "step": 12002 }, { "epoch": 0.69, "grad_norm": 0.6840428587949849, "learning_rate": 4.641354962639894e-06, "loss": 0.4082, "step": 12003 }, { "epoch": 0.69, "grad_norm": 0.3276160490754549, "learning_rate": 4.639783879492701e-06, "loss": 0.2112, "step": 12004 }, { "epoch": 0.69, "grad_norm": 0.3450703208904191, "learning_rate": 4.638212981969562e-06, "loss": 0.2976, "step": 12005 }, { "epoch": 0.69, "grad_norm": 0.22299481634481944, "learning_rate": 4.636642270124874e-06, "loss": 0.1401, "step": 12006 }, { "epoch": 0.69, "grad_norm": 0.4736605076802758, "learning_rate": 4.6350717440130366e-06, "loss": 0.2279, "step": 12007 }, { "epoch": 0.69, "grad_norm": 0.6337811756742268, "learning_rate": 4.633501403688434e-06, "loss": 0.3114, "step": 12008 }, { "epoch": 0.69, "grad_norm": 1.0475717388652215, "learning_rate": 4.631931249205447e-06, "loss": 0.5844, "step": 12009 }, { "epoch": 0.69, "grad_norm": 0.29755432704987433, "learning_rate": 4.630361280618446e-06, "loss": 0.2081, "step": 12010 }, { "epoch": 0.69, "grad_norm": 1.1889278166919943, "learning_rate": 4.628791497981807e-06, "loss": 0.6442, "step": 12011 }, { "epoch": 0.69, "grad_norm": 0.2764738843094473, "learning_rate": 4.627221901349887e-06, "loss": 0.2578, "step": 12012 }, { "epoch": 0.69, "grad_norm": 0.24776928880783564, "learning_rate": 4.625652490777042e-06, "loss": 0.1999, "step": 12013 }, { "epoch": 0.69, "grad_norm": 0.7327164556440673, "learning_rate": 4.624083266317616e-06, "loss": 0.3085, "step": 12014 }, { "epoch": 0.69, "grad_norm": 1.1927932070668756, "learning_rate": 4.62251422802596e-06, "loss": 0.7523, "step": 12015 }, { "epoch": 0.69, "grad_norm": 0.3228198567523802, "learning_rate": 4.620945375956404e-06, "loss": 0.2437, "step": 12016 }, { "epoch": 0.69, "grad_norm": 0.5191991671756184, "learning_rate": 4.619376710163279e-06, "loss": 0.2589, "step": 12017 }, { "epoch": 0.69, "grad_norm": 0.7583758410617182, "learning_rate": 4.617808230700907e-06, "loss": 0.42, "step": 12018 }, { "epoch": 0.69, "grad_norm": 0.33941374151587894, "learning_rate": 4.6162399376236e-06, "loss": 0.2747, "step": 12019 }, { "epoch": 0.69, "grad_norm": 0.2758703659931883, "learning_rate": 4.614671830985681e-06, "loss": 0.2094, "step": 12020 }, { "epoch": 0.69, "grad_norm": 0.354847088036182, "learning_rate": 4.613103910841441e-06, "loss": 0.21, "step": 12021 }, { "epoch": 0.69, "grad_norm": 0.3544508126717817, "learning_rate": 4.611536177245176e-06, "loss": 0.2592, "step": 12022 }, { "epoch": 0.69, "grad_norm": 1.0195307111162017, "learning_rate": 4.609968630251187e-06, "loss": 0.3661, "step": 12023 }, { "epoch": 0.69, "grad_norm": 0.3860554430872647, "learning_rate": 4.608401269913751e-06, "loss": 0.2795, "step": 12024 }, { "epoch": 0.69, "grad_norm": 0.39062012651266825, "learning_rate": 4.606834096287148e-06, "loss": 0.2738, "step": 12025 }, { "epoch": 0.69, "grad_norm": 0.3576834430909374, "learning_rate": 4.605267109425645e-06, "loss": 0.2074, "step": 12026 }, { "epoch": 0.69, "grad_norm": 1.050741505415178, "learning_rate": 4.6037003093835135e-06, "loss": 0.4651, "step": 12027 }, { "epoch": 0.69, "grad_norm": 0.3340140311716859, "learning_rate": 4.602133696215007e-06, "loss": 0.2685, "step": 12028 }, { "epoch": 0.69, "grad_norm": 0.3868998595979161, "learning_rate": 4.6005672699743795e-06, "loss": 0.3129, "step": 12029 }, { "epoch": 0.69, "grad_norm": 0.6958039333913612, "learning_rate": 4.599001030715876e-06, "loss": 0.2917, "step": 12030 }, { "epoch": 0.69, "grad_norm": 0.3372161814167908, "learning_rate": 4.59743497849373e-06, "loss": 0.274, "step": 12031 }, { "epoch": 0.69, "grad_norm": 0.3879938736639436, "learning_rate": 4.5958691133621815e-06, "loss": 0.3127, "step": 12032 }, { "epoch": 0.69, "grad_norm": 0.14895293830728926, "learning_rate": 4.594303435375454e-06, "loss": 0.1142, "step": 12033 }, { "epoch": 0.69, "grad_norm": 0.322727915954783, "learning_rate": 4.592737944587766e-06, "loss": 0.2661, "step": 12034 }, { "epoch": 0.69, "grad_norm": 1.1372554738979654, "learning_rate": 4.591172641053326e-06, "loss": 0.5845, "step": 12035 }, { "epoch": 0.69, "grad_norm": 0.42201688444677404, "learning_rate": 4.589607524826351e-06, "loss": 0.2639, "step": 12036 }, { "epoch": 0.69, "grad_norm": 0.3165929336205737, "learning_rate": 4.588042595961032e-06, "loss": 0.2747, "step": 12037 }, { "epoch": 0.69, "grad_norm": 0.40563172992912694, "learning_rate": 4.586477854511566e-06, "loss": 0.2555, "step": 12038 }, { "epoch": 0.69, "grad_norm": 0.3947859210351605, "learning_rate": 4.584913300532135e-06, "loss": 0.2764, "step": 12039 }, { "epoch": 0.69, "grad_norm": 0.28751873344914913, "learning_rate": 4.583348934076929e-06, "loss": 0.1917, "step": 12040 }, { "epoch": 0.69, "grad_norm": 0.3649164575475577, "learning_rate": 4.581784755200115e-06, "loss": 0.2989, "step": 12041 }, { "epoch": 0.69, "grad_norm": 0.7260664375548328, "learning_rate": 4.580220763955863e-06, "loss": 0.3636, "step": 12042 }, { "epoch": 0.69, "grad_norm": 0.29090161691547395, "learning_rate": 4.578656960398328e-06, "loss": 0.1897, "step": 12043 }, { "epoch": 0.69, "grad_norm": 0.3837560896918546, "learning_rate": 4.577093344581674e-06, "loss": 0.316, "step": 12044 }, { "epoch": 0.69, "grad_norm": 0.3944502241833138, "learning_rate": 4.575529916560043e-06, "loss": 0.2643, "step": 12045 }, { "epoch": 0.69, "grad_norm": 0.28381455429202385, "learning_rate": 4.573966676387579e-06, "loss": 0.2047, "step": 12046 }, { "epoch": 0.69, "grad_norm": 0.5857048817229604, "learning_rate": 4.5724036241184144e-06, "loss": 0.2819, "step": 12047 }, { "epoch": 0.69, "grad_norm": 0.3373748374080497, "learning_rate": 4.5708407598066766e-06, "loss": 0.2939, "step": 12048 }, { "epoch": 0.69, "grad_norm": 0.285983173917611, "learning_rate": 4.569278083506492e-06, "loss": 0.1824, "step": 12049 }, { "epoch": 0.69, "grad_norm": 1.1284028289918933, "learning_rate": 4.567715595271976e-06, "loss": 0.5352, "step": 12050 }, { "epoch": 0.69, "grad_norm": 1.1099867271284871, "learning_rate": 4.566153295157233e-06, "loss": 0.7988, "step": 12051 }, { "epoch": 0.69, "grad_norm": 0.2915399836846607, "learning_rate": 4.5645911832163654e-06, "loss": 0.2437, "step": 12052 }, { "epoch": 0.69, "grad_norm": 0.35039432313772084, "learning_rate": 4.563029259503474e-06, "loss": 0.2095, "step": 12053 }, { "epoch": 0.69, "grad_norm": 0.41625909045888376, "learning_rate": 4.561467524072651e-06, "loss": 0.2736, "step": 12054 }, { "epoch": 0.69, "grad_norm": 0.308228128875875, "learning_rate": 4.5599059769779654e-06, "loss": 0.2516, "step": 12055 }, { "epoch": 0.69, "grad_norm": 0.34633389816855337, "learning_rate": 4.558344618273506e-06, "loss": 0.2532, "step": 12056 }, { "epoch": 0.69, "grad_norm": 1.1119503132303514, "learning_rate": 4.556783448013338e-06, "loss": 0.7745, "step": 12057 }, { "epoch": 0.69, "grad_norm": 0.505912833656344, "learning_rate": 4.555222466251525e-06, "loss": 0.3091, "step": 12058 }, { "epoch": 0.69, "grad_norm": 0.30427640882311047, "learning_rate": 4.553661673042123e-06, "loss": 0.2245, "step": 12059 }, { "epoch": 0.69, "grad_norm": 0.2822146065378598, "learning_rate": 4.552101068439181e-06, "loss": 0.2482, "step": 12060 }, { "epoch": 0.69, "grad_norm": 0.3780466110606745, "learning_rate": 4.550540652496748e-06, "loss": 0.2474, "step": 12061 }, { "epoch": 0.69, "grad_norm": 0.5094716111081256, "learning_rate": 4.548980425268857e-06, "loss": 0.2451, "step": 12062 }, { "epoch": 0.69, "grad_norm": 1.2888871968821543, "learning_rate": 4.5474203868095415e-06, "loss": 0.4701, "step": 12063 }, { "epoch": 0.69, "grad_norm": 0.2587492497087311, "learning_rate": 4.545860537172818e-06, "loss": 0.2568, "step": 12064 }, { "epoch": 0.69, "grad_norm": 0.46174224389574064, "learning_rate": 4.5443008764127135e-06, "loss": 0.3415, "step": 12065 }, { "epoch": 0.69, "grad_norm": 0.24079285479608747, "learning_rate": 4.542741404583235e-06, "loss": 0.1051, "step": 12066 }, { "epoch": 0.69, "grad_norm": 0.42779305694074776, "learning_rate": 4.541182121738388e-06, "loss": 0.2387, "step": 12067 }, { "epoch": 0.69, "grad_norm": 0.28243846220049185, "learning_rate": 4.539623027932165e-06, "loss": 0.2727, "step": 12068 }, { "epoch": 0.69, "grad_norm": 0.7645806896247914, "learning_rate": 4.538064123218565e-06, "loss": 0.34, "step": 12069 }, { "epoch": 0.69, "grad_norm": 0.34980810066617957, "learning_rate": 4.53650540765157e-06, "loss": 0.2661, "step": 12070 }, { "epoch": 0.69, "grad_norm": 0.5862188882028873, "learning_rate": 4.534946881285158e-06, "loss": 0.3972, "step": 12071 }, { "epoch": 0.69, "grad_norm": 0.21256587958188933, "learning_rate": 4.533388544173301e-06, "loss": 0.169, "step": 12072 }, { "epoch": 0.69, "grad_norm": 0.3133687014658715, "learning_rate": 4.531830396369959e-06, "loss": 0.2401, "step": 12073 }, { "epoch": 0.69, "grad_norm": 1.1903642275444457, "learning_rate": 4.530272437929099e-06, "loss": 0.6457, "step": 12074 }, { "epoch": 0.69, "grad_norm": 0.3660557403373204, "learning_rate": 4.528714668904669e-06, "loss": 0.2687, "step": 12075 }, { "epoch": 0.69, "grad_norm": 0.5739283505931541, "learning_rate": 4.527157089350616e-06, "loss": 0.3292, "step": 12076 }, { "epoch": 0.69, "grad_norm": 0.4039211294031608, "learning_rate": 4.525599699320873e-06, "loss": 0.3117, "step": 12077 }, { "epoch": 0.69, "grad_norm": 0.30685207485541593, "learning_rate": 4.52404249886938e-06, "loss": 0.1924, "step": 12078 }, { "epoch": 0.69, "grad_norm": 0.287329467106963, "learning_rate": 4.5224854880500615e-06, "loss": 0.0706, "step": 12079 }, { "epoch": 0.69, "grad_norm": 0.27929117817412435, "learning_rate": 4.520928666916834e-06, "loss": 0.2665, "step": 12080 }, { "epoch": 0.69, "grad_norm": 0.7506304080374441, "learning_rate": 4.519372035523607e-06, "loss": 0.3991, "step": 12081 }, { "epoch": 0.69, "grad_norm": 0.4259104980903786, "learning_rate": 4.517815593924295e-06, "loss": 0.2043, "step": 12082 }, { "epoch": 0.69, "grad_norm": 0.3686775706552464, "learning_rate": 4.5162593421727926e-06, "loss": 0.3217, "step": 12083 }, { "epoch": 0.69, "grad_norm": 0.4937075304767455, "learning_rate": 4.514703280322995e-06, "loss": 0.3287, "step": 12084 }, { "epoch": 0.69, "grad_norm": 0.2030857822006487, "learning_rate": 4.513147408428786e-06, "loss": 0.1249, "step": 12085 }, { "epoch": 0.69, "grad_norm": 0.5124655651149372, "learning_rate": 4.5115917265440425e-06, "loss": 0.3465, "step": 12086 }, { "epoch": 0.69, "grad_norm": 0.4821076032344685, "learning_rate": 4.510036234722645e-06, "loss": 0.3284, "step": 12087 }, { "epoch": 0.69, "grad_norm": 0.2927910612808501, "learning_rate": 4.5084809330184605e-06, "loss": 0.2206, "step": 12088 }, { "epoch": 0.69, "grad_norm": 0.5136650655164572, "learning_rate": 4.506925821485338e-06, "loss": 0.3624, "step": 12089 }, { "epoch": 0.69, "grad_norm": 0.2936563962334066, "learning_rate": 4.505370900177142e-06, "loss": 0.1928, "step": 12090 }, { "epoch": 0.69, "grad_norm": 0.3934100319552002, "learning_rate": 4.503816169147715e-06, "loss": 0.2727, "step": 12091 }, { "epoch": 0.69, "grad_norm": 0.28160661063648673, "learning_rate": 4.502261628450898e-06, "loss": 0.2303, "step": 12092 }, { "epoch": 0.69, "grad_norm": 0.5788519867248023, "learning_rate": 4.5007072781405205e-06, "loss": 0.3512, "step": 12093 }, { "epoch": 0.69, "grad_norm": 0.6096906100436412, "learning_rate": 4.4991531182704166e-06, "loss": 0.3835, "step": 12094 }, { "epoch": 0.69, "grad_norm": 0.28201262052309894, "learning_rate": 4.497599148894404e-06, "loss": 0.2467, "step": 12095 }, { "epoch": 0.69, "grad_norm": 0.33592745821692854, "learning_rate": 4.496045370066296e-06, "loss": 0.2624, "step": 12096 }, { "epoch": 0.7, "grad_norm": 0.3753798828666515, "learning_rate": 4.494491781839901e-06, "loss": 0.2396, "step": 12097 }, { "epoch": 0.7, "grad_norm": 0.3387213970720124, "learning_rate": 4.492938384269015e-06, "loss": 0.249, "step": 12098 }, { "epoch": 0.7, "grad_norm": 0.33277551615459394, "learning_rate": 4.491385177407439e-06, "loss": 0.293, "step": 12099 }, { "epoch": 0.7, "grad_norm": 0.7391401050418194, "learning_rate": 4.489832161308958e-06, "loss": 0.4181, "step": 12100 }, { "epoch": 0.7, "grad_norm": 0.3603244564137188, "learning_rate": 4.488279336027353e-06, "loss": 0.3054, "step": 12101 }, { "epoch": 0.7, "grad_norm": 0.3353997447238762, "learning_rate": 4.486726701616393e-06, "loss": 0.0981, "step": 12102 }, { "epoch": 0.7, "grad_norm": 0.2523472651585536, "learning_rate": 4.485174258129854e-06, "loss": 0.2128, "step": 12103 }, { "epoch": 0.7, "grad_norm": 0.29579376641255173, "learning_rate": 4.483622005621493e-06, "loss": 0.2761, "step": 12104 }, { "epoch": 0.7, "grad_norm": 0.67653447878386, "learning_rate": 4.4820699441450655e-06, "loss": 0.3091, "step": 12105 }, { "epoch": 0.7, "grad_norm": 0.6401876821418024, "learning_rate": 4.4805180737543145e-06, "loss": 0.4376, "step": 12106 }, { "epoch": 0.7, "grad_norm": 0.3921815615134285, "learning_rate": 4.478966394502988e-06, "loss": 0.3295, "step": 12107 }, { "epoch": 0.7, "grad_norm": 0.3475348263674097, "learning_rate": 4.4774149064448195e-06, "loss": 0.2545, "step": 12108 }, { "epoch": 0.7, "grad_norm": 0.36534583192677034, "learning_rate": 4.475863609633534e-06, "loss": 0.2564, "step": 12109 }, { "epoch": 0.7, "grad_norm": 0.3668623445667178, "learning_rate": 4.474312504122854e-06, "loss": 0.2889, "step": 12110 }, { "epoch": 0.7, "grad_norm": 0.25610872600300666, "learning_rate": 4.472761589966493e-06, "loss": 0.2137, "step": 12111 }, { "epoch": 0.7, "grad_norm": 0.561497219037444, "learning_rate": 4.471210867218161e-06, "loss": 0.3434, "step": 12112 }, { "epoch": 0.7, "grad_norm": 0.37026942810244867, "learning_rate": 4.4696603359315604e-06, "loss": 0.3116, "step": 12113 }, { "epoch": 0.7, "grad_norm": 0.7497924302268585, "learning_rate": 4.468109996160385e-06, "loss": 0.4706, "step": 12114 }, { "epoch": 0.7, "grad_norm": 0.3076550596172886, "learning_rate": 4.466559847958318e-06, "loss": 0.2392, "step": 12115 }, { "epoch": 0.7, "grad_norm": 0.3388503851948695, "learning_rate": 4.46500989137905e-06, "loss": 0.2912, "step": 12116 }, { "epoch": 0.7, "grad_norm": 0.3243942487719419, "learning_rate": 4.463460126476251e-06, "loss": 0.2058, "step": 12117 }, { "epoch": 0.7, "grad_norm": 0.46445889232908727, "learning_rate": 4.46191055330359e-06, "loss": 0.2573, "step": 12118 }, { "epoch": 0.7, "grad_norm": 0.26405584070347937, "learning_rate": 4.460361171914724e-06, "loss": 0.2552, "step": 12119 }, { "epoch": 0.7, "grad_norm": 0.48086867532454386, "learning_rate": 4.458811982363317e-06, "loss": 0.2995, "step": 12120 }, { "epoch": 0.7, "grad_norm": 0.6352621011962262, "learning_rate": 4.457262984703015e-06, "loss": 0.2686, "step": 12121 }, { "epoch": 0.7, "grad_norm": 0.2669809765909583, "learning_rate": 4.455714178987456e-06, "loss": 0.2236, "step": 12122 }, { "epoch": 0.7, "grad_norm": 0.26373342863482124, "learning_rate": 4.454165565270272e-06, "loss": 0.2267, "step": 12123 }, { "epoch": 0.7, "grad_norm": 0.47415110258209775, "learning_rate": 4.452617143605099e-06, "loss": 0.2042, "step": 12124 }, { "epoch": 0.7, "grad_norm": 0.37038910052018265, "learning_rate": 4.451068914045556e-06, "loss": 0.3001, "step": 12125 }, { "epoch": 0.7, "grad_norm": 0.7943156231991123, "learning_rate": 4.449520876645258e-06, "loss": 0.372, "step": 12126 }, { "epoch": 0.7, "grad_norm": 0.30751059252070245, "learning_rate": 4.447973031457809e-06, "loss": 0.2835, "step": 12127 }, { "epoch": 0.7, "grad_norm": 0.31206151442044566, "learning_rate": 4.4464253785368205e-06, "loss": 0.1877, "step": 12128 }, { "epoch": 0.7, "grad_norm": 0.2591005552248067, "learning_rate": 4.4448779179358815e-06, "loss": 0.1728, "step": 12129 }, { "epoch": 0.7, "grad_norm": 1.248204418833787, "learning_rate": 4.443330649708581e-06, "loss": 0.7986, "step": 12130 }, { "epoch": 0.7, "grad_norm": 0.2736973391515441, "learning_rate": 4.441783573908498e-06, "loss": 0.2117, "step": 12131 }, { "epoch": 0.7, "grad_norm": 0.5426384659039937, "learning_rate": 4.440236690589215e-06, "loss": 0.3368, "step": 12132 }, { "epoch": 0.7, "grad_norm": 0.9915066429385122, "learning_rate": 4.438689999804295e-06, "loss": 0.4082, "step": 12133 }, { "epoch": 0.7, "grad_norm": 0.2815498231479696, "learning_rate": 4.437143501607302e-06, "loss": 0.1841, "step": 12134 }, { "epoch": 0.7, "grad_norm": 0.21873970118674596, "learning_rate": 4.435597196051789e-06, "loss": 0.2132, "step": 12135 }, { "epoch": 0.7, "grad_norm": 1.2154872539731227, "learning_rate": 4.434051083191304e-06, "loss": 0.742, "step": 12136 }, { "epoch": 0.7, "grad_norm": 0.3401125230433799, "learning_rate": 4.432505163079394e-06, "loss": 0.1953, "step": 12137 }, { "epoch": 0.7, "grad_norm": 1.0219315897519718, "learning_rate": 4.4309594357695895e-06, "loss": 0.4175, "step": 12138 }, { "epoch": 0.7, "grad_norm": 0.37693401492937234, "learning_rate": 4.429413901315421e-06, "loss": 0.3112, "step": 12139 }, { "epoch": 0.7, "grad_norm": 0.3591687661022238, "learning_rate": 4.4278685597704065e-06, "loss": 0.2951, "step": 12140 }, { "epoch": 0.7, "grad_norm": 0.434384821341122, "learning_rate": 4.426323411188067e-06, "loss": 0.1519, "step": 12141 }, { "epoch": 0.7, "grad_norm": 0.2933368604620994, "learning_rate": 4.424778455621908e-06, "loss": 0.25, "step": 12142 }, { "epoch": 0.7, "grad_norm": 0.32729484470318493, "learning_rate": 4.4232336931254324e-06, "loss": 0.2727, "step": 12143 }, { "epoch": 0.7, "grad_norm": 0.7742472208938666, "learning_rate": 4.42168912375213e-06, "loss": 0.2935, "step": 12144 }, { "epoch": 0.7, "grad_norm": 0.790094629215079, "learning_rate": 4.420144747555497e-06, "loss": 0.3164, "step": 12145 }, { "epoch": 0.7, "grad_norm": 0.355497378756577, "learning_rate": 4.418600564589012e-06, "loss": 0.273, "step": 12146 }, { "epoch": 0.7, "grad_norm": 0.33562205802520717, "learning_rate": 4.417056574906148e-06, "loss": 0.2513, "step": 12147 }, { "epoch": 0.7, "grad_norm": 0.9439148205438117, "learning_rate": 4.415512778560376e-06, "loss": 0.5202, "step": 12148 }, { "epoch": 0.7, "grad_norm": 0.31573271535824526, "learning_rate": 4.413969175605152e-06, "loss": 0.2504, "step": 12149 }, { "epoch": 0.7, "grad_norm": 0.30110495517556174, "learning_rate": 4.412425766093939e-06, "loss": 0.1944, "step": 12150 }, { "epoch": 0.7, "grad_norm": 0.5043638552372725, "learning_rate": 4.410882550080182e-06, "loss": 0.3179, "step": 12151 }, { "epoch": 0.7, "grad_norm": 0.33032605242595947, "learning_rate": 4.409339527617321e-06, "loss": 0.2619, "step": 12152 }, { "epoch": 0.7, "grad_norm": 1.2303501312225467, "learning_rate": 4.407796698758788e-06, "loss": 0.6729, "step": 12153 }, { "epoch": 0.7, "grad_norm": 0.46335850324152866, "learning_rate": 4.40625406355802e-06, "loss": 0.2766, "step": 12154 }, { "epoch": 0.7, "grad_norm": 0.2846449360546112, "learning_rate": 4.404711622068436e-06, "loss": 0.2599, "step": 12155 }, { "epoch": 0.7, "grad_norm": 0.7503421917007584, "learning_rate": 4.40316937434344e-06, "loss": 0.4376, "step": 12156 }, { "epoch": 0.7, "grad_norm": 0.2820811862011241, "learning_rate": 4.401627320436453e-06, "loss": 0.1031, "step": 12157 }, { "epoch": 0.7, "grad_norm": 0.33872988149067856, "learning_rate": 4.40008546040087e-06, "loss": 0.2569, "step": 12158 }, { "epoch": 0.7, "grad_norm": 0.3510877902653597, "learning_rate": 4.3985437942900865e-06, "loss": 0.3007, "step": 12159 }, { "epoch": 0.7, "grad_norm": 0.8283159971853021, "learning_rate": 4.397002322157492e-06, "loss": 0.3384, "step": 12160 }, { "epoch": 0.7, "grad_norm": 0.3278628379074249, "learning_rate": 4.395461044056462e-06, "loss": 0.2431, "step": 12161 }, { "epoch": 0.7, "grad_norm": 0.4197960471554202, "learning_rate": 4.393919960040377e-06, "loss": 0.2814, "step": 12162 }, { "epoch": 0.7, "grad_norm": 0.2417208244068282, "learning_rate": 4.392379070162604e-06, "loss": 0.1892, "step": 12163 }, { "epoch": 0.7, "grad_norm": 0.3290819278571764, "learning_rate": 4.390838374476503e-06, "loss": 0.2373, "step": 12164 }, { "epoch": 0.7, "grad_norm": 0.9639347889069623, "learning_rate": 4.3892978730354245e-06, "loss": 0.5774, "step": 12165 }, { "epoch": 0.7, "grad_norm": 0.32514156426728147, "learning_rate": 4.387757565892722e-06, "loss": 0.3137, "step": 12166 }, { "epoch": 0.7, "grad_norm": 0.2997269568806797, "learning_rate": 4.386217453101735e-06, "loss": 0.1819, "step": 12167 }, { "epoch": 0.7, "grad_norm": 0.44106553840301743, "learning_rate": 4.384677534715794e-06, "loss": 0.2737, "step": 12168 }, { "epoch": 0.7, "grad_norm": 0.40065246533447957, "learning_rate": 4.383137810788226e-06, "loss": 0.2289, "step": 12169 }, { "epoch": 0.7, "grad_norm": 0.3319847681859942, "learning_rate": 4.381598281372358e-06, "loss": 0.1977, "step": 12170 }, { "epoch": 0.7, "grad_norm": 0.3532188786608427, "learning_rate": 4.3800589465215e-06, "loss": 0.3221, "step": 12171 }, { "epoch": 0.7, "grad_norm": 0.8397267713928125, "learning_rate": 4.378519806288959e-06, "loss": 0.3961, "step": 12172 }, { "epoch": 0.7, "grad_norm": 0.32192933397243806, "learning_rate": 4.376980860728031e-06, "loss": 0.2236, "step": 12173 }, { "epoch": 0.7, "grad_norm": 1.0490275553766555, "learning_rate": 4.375442109892019e-06, "loss": 0.527, "step": 12174 }, { "epoch": 0.7, "grad_norm": 0.22301783924245172, "learning_rate": 4.373903553834203e-06, "loss": 0.2092, "step": 12175 }, { "epoch": 0.7, "grad_norm": 0.3305834602905949, "learning_rate": 4.372365192607866e-06, "loss": 0.2539, "step": 12176 }, { "epoch": 0.7, "grad_norm": 1.0340970094704132, "learning_rate": 4.370827026266281e-06, "loss": 0.2975, "step": 12177 }, { "epoch": 0.7, "grad_norm": 0.37116083450859394, "learning_rate": 4.36928905486271e-06, "loss": 0.299, "step": 12178 }, { "epoch": 0.7, "grad_norm": 0.29651541573485524, "learning_rate": 4.3677512784504195e-06, "loss": 0.2427, "step": 12179 }, { "epoch": 0.7, "grad_norm": 1.0371072230107874, "learning_rate": 4.366213697082661e-06, "loss": 0.3959, "step": 12180 }, { "epoch": 0.7, "grad_norm": 0.3476298224017206, "learning_rate": 4.3646763108126796e-06, "loss": 0.1951, "step": 12181 }, { "epoch": 0.7, "grad_norm": 0.651854837110252, "learning_rate": 4.363139119693712e-06, "loss": 0.3743, "step": 12182 }, { "epoch": 0.7, "grad_norm": 0.3424674523302693, "learning_rate": 4.361602123778998e-06, "loss": 0.2393, "step": 12183 }, { "epoch": 0.7, "grad_norm": 1.009983692526136, "learning_rate": 4.360065323121759e-06, "loss": 0.4591, "step": 12184 }, { "epoch": 0.7, "grad_norm": 0.4010361679084553, "learning_rate": 4.358528717775217e-06, "loss": 0.2785, "step": 12185 }, { "epoch": 0.7, "grad_norm": 0.35234124130496747, "learning_rate": 4.356992307792578e-06, "loss": 0.2476, "step": 12186 }, { "epoch": 0.7, "grad_norm": 0.23746666293127477, "learning_rate": 4.355456093227056e-06, "loss": 0.1907, "step": 12187 }, { "epoch": 0.7, "grad_norm": 0.39902282031332437, "learning_rate": 4.353920074131848e-06, "loss": 0.2616, "step": 12188 }, { "epoch": 0.7, "grad_norm": 0.437743857169606, "learning_rate": 4.352384250560147e-06, "loss": 0.3208, "step": 12189 }, { "epoch": 0.7, "grad_norm": 0.3195775949375206, "learning_rate": 4.350848622565131e-06, "loss": 0.239, "step": 12190 }, { "epoch": 0.7, "grad_norm": 0.3694916234759852, "learning_rate": 4.349313190199988e-06, "loss": 0.28, "step": 12191 }, { "epoch": 0.7, "grad_norm": 0.5313708496767161, "learning_rate": 4.347777953517885e-06, "loss": 0.3363, "step": 12192 }, { "epoch": 0.7, "grad_norm": 0.19723128611524043, "learning_rate": 4.3462429125719884e-06, "loss": 0.0851, "step": 12193 }, { "epoch": 0.7, "grad_norm": 0.3500548154240149, "learning_rate": 4.344708067415454e-06, "loss": 0.263, "step": 12194 }, { "epoch": 0.7, "grad_norm": 0.38467142893601863, "learning_rate": 4.34317341810144e-06, "loss": 0.2988, "step": 12195 }, { "epoch": 0.7, "grad_norm": 0.6932149267264256, "learning_rate": 4.341638964683086e-06, "loss": 0.3442, "step": 12196 }, { "epoch": 0.7, "grad_norm": 0.3163438844619936, "learning_rate": 4.3401047072135315e-06, "loss": 0.2652, "step": 12197 }, { "epoch": 0.7, "grad_norm": 1.2479930111845223, "learning_rate": 4.338570645745904e-06, "loss": 0.5147, "step": 12198 }, { "epoch": 0.7, "grad_norm": 0.21571739388095768, "learning_rate": 4.337036780333336e-06, "loss": 0.1895, "step": 12199 }, { "epoch": 0.7, "grad_norm": 0.5681859271955713, "learning_rate": 4.33550311102894e-06, "loss": 0.3091, "step": 12200 }, { "epoch": 0.7, "grad_norm": 0.4052305180015126, "learning_rate": 4.333969637885827e-06, "loss": 0.3169, "step": 12201 }, { "epoch": 0.7, "grad_norm": 0.33747729303466445, "learning_rate": 4.332436360957104e-06, "loss": 0.3031, "step": 12202 }, { "epoch": 0.7, "grad_norm": 0.34873869056387957, "learning_rate": 4.3309032802958605e-06, "loss": 0.1527, "step": 12203 }, { "epoch": 0.7, "grad_norm": 0.4009960812786949, "learning_rate": 4.329370395955198e-06, "loss": 0.301, "step": 12204 }, { "epoch": 0.7, "grad_norm": 0.6642800794115589, "learning_rate": 4.3278377079881935e-06, "loss": 0.3961, "step": 12205 }, { "epoch": 0.7, "grad_norm": 0.30282343673125794, "learning_rate": 4.326305216447926e-06, "loss": 0.2192, "step": 12206 }, { "epoch": 0.7, "grad_norm": 0.23522905895128113, "learning_rate": 4.32477292138746e-06, "loss": 0.2117, "step": 12207 }, { "epoch": 0.7, "grad_norm": 1.2941628507913063, "learning_rate": 4.3232408228598685e-06, "loss": 0.6561, "step": 12208 }, { "epoch": 0.7, "grad_norm": 0.35384968435253333, "learning_rate": 4.321708920918203e-06, "loss": 0.2012, "step": 12209 }, { "epoch": 0.7, "grad_norm": 0.3530687951144725, "learning_rate": 4.320177215615513e-06, "loss": 0.2766, "step": 12210 }, { "epoch": 0.7, "grad_norm": 0.5241546712916147, "learning_rate": 4.318645707004839e-06, "loss": 0.3251, "step": 12211 }, { "epoch": 0.7, "grad_norm": 0.36823411666661837, "learning_rate": 4.317114395139222e-06, "loss": 0.2413, "step": 12212 }, { "epoch": 0.7, "grad_norm": 0.2259296402410964, "learning_rate": 4.3155832800716905e-06, "loss": 0.1746, "step": 12213 }, { "epoch": 0.7, "grad_norm": 0.35860128935561664, "learning_rate": 4.314052361855265e-06, "loss": 0.3109, "step": 12214 }, { "epoch": 0.7, "grad_norm": 0.5859399504943746, "learning_rate": 4.312521640542961e-06, "loss": 0.4153, "step": 12215 }, { "epoch": 0.7, "grad_norm": 0.35181741535289196, "learning_rate": 4.310991116187786e-06, "loss": 0.2138, "step": 12216 }, { "epoch": 0.7, "grad_norm": 0.5705186817412079, "learning_rate": 4.309460788842747e-06, "loss": 0.3709, "step": 12217 }, { "epoch": 0.7, "grad_norm": 0.3280362062848473, "learning_rate": 4.307930658560836e-06, "loss": 0.2838, "step": 12218 }, { "epoch": 0.7, "grad_norm": 0.25938678514674396, "learning_rate": 4.306400725395041e-06, "loss": 0.1739, "step": 12219 }, { "epoch": 0.7, "grad_norm": 0.3705889257477343, "learning_rate": 4.304870989398341e-06, "loss": 0.2753, "step": 12220 }, { "epoch": 0.7, "grad_norm": 0.7826792297259539, "learning_rate": 4.303341450623717e-06, "loss": 0.3297, "step": 12221 }, { "epoch": 0.7, "grad_norm": 0.25656625473960787, "learning_rate": 4.301812109124134e-06, "loss": 0.2215, "step": 12222 }, { "epoch": 0.7, "grad_norm": 0.7552748960134755, "learning_rate": 4.300282964952553e-06, "loss": 0.3884, "step": 12223 }, { "epoch": 0.7, "grad_norm": 0.6690995984223479, "learning_rate": 4.2987540181619265e-06, "loss": 0.3853, "step": 12224 }, { "epoch": 0.7, "grad_norm": 0.2465990758728293, "learning_rate": 4.2972252688052055e-06, "loss": 0.1742, "step": 12225 }, { "epoch": 0.7, "grad_norm": 0.3394607609597814, "learning_rate": 4.295696716935326e-06, "loss": 0.2924, "step": 12226 }, { "epoch": 0.7, "grad_norm": 0.3112479870105338, "learning_rate": 4.294168362605224e-06, "loss": 0.2147, "step": 12227 }, { "epoch": 0.7, "grad_norm": 0.3997767864323401, "learning_rate": 4.292640205867824e-06, "loss": 0.2892, "step": 12228 }, { "epoch": 0.7, "grad_norm": 1.072222696374089, "learning_rate": 4.291112246776052e-06, "loss": 0.2978, "step": 12229 }, { "epoch": 0.7, "grad_norm": 0.2996227549076089, "learning_rate": 4.2895844853828165e-06, "loss": 0.2741, "step": 12230 }, { "epoch": 0.7, "grad_norm": 0.40168932524681566, "learning_rate": 4.288056921741024e-06, "loss": 0.3001, "step": 12231 }, { "epoch": 0.7, "grad_norm": 0.2768315986265087, "learning_rate": 4.286529555903572e-06, "loss": 0.1643, "step": 12232 }, { "epoch": 0.7, "grad_norm": 0.41305074420474364, "learning_rate": 4.285002387923359e-06, "loss": 0.2157, "step": 12233 }, { "epoch": 0.7, "grad_norm": 0.3232120840888913, "learning_rate": 4.283475417853268e-06, "loss": 0.2766, "step": 12234 }, { "epoch": 0.7, "grad_norm": 0.7148956900870683, "learning_rate": 4.2819486457461765e-06, "loss": 0.2733, "step": 12235 }, { "epoch": 0.7, "grad_norm": 0.8019061600739739, "learning_rate": 4.280422071654955e-06, "loss": 0.4053, "step": 12236 }, { "epoch": 0.7, "grad_norm": 0.3394041675400556, "learning_rate": 4.278895695632474e-06, "loss": 0.2784, "step": 12237 }, { "epoch": 0.7, "grad_norm": 0.3341011586741274, "learning_rate": 4.27736951773159e-06, "loss": 0.2744, "step": 12238 }, { "epoch": 0.7, "grad_norm": 0.35152470282886145, "learning_rate": 4.275843538005153e-06, "loss": 0.2208, "step": 12239 }, { "epoch": 0.7, "grad_norm": 0.370400992897311, "learning_rate": 4.274317756506008e-06, "loss": 0.2785, "step": 12240 }, { "epoch": 0.7, "grad_norm": 0.4693231268558691, "learning_rate": 4.2727921732869894e-06, "loss": 0.2495, "step": 12241 }, { "epoch": 0.7, "grad_norm": 0.3241862213090096, "learning_rate": 4.271266788400935e-06, "loss": 0.2379, "step": 12242 }, { "epoch": 0.7, "grad_norm": 0.3449514730274378, "learning_rate": 4.269741601900667e-06, "loss": 0.2681, "step": 12243 }, { "epoch": 0.7, "grad_norm": 1.1807650540650891, "learning_rate": 4.268216613838998e-06, "loss": 0.6882, "step": 12244 }, { "epoch": 0.7, "grad_norm": 0.36468184086103056, "learning_rate": 4.266691824268739e-06, "loss": 0.2161, "step": 12245 }, { "epoch": 0.7, "grad_norm": 0.29744735921142973, "learning_rate": 4.2651672332427e-06, "loss": 0.264, "step": 12246 }, { "epoch": 0.7, "grad_norm": 0.476773513987374, "learning_rate": 4.263642840813672e-06, "loss": 0.2558, "step": 12247 }, { "epoch": 0.7, "grad_norm": 0.5281422302262536, "learning_rate": 4.262118647034447e-06, "loss": 0.132, "step": 12248 }, { "epoch": 0.7, "grad_norm": 0.3918244540090564, "learning_rate": 4.260594651957801e-06, "loss": 0.3159, "step": 12249 }, { "epoch": 0.7, "grad_norm": 0.3831510721886654, "learning_rate": 4.25907085563652e-06, "loss": 0.3368, "step": 12250 }, { "epoch": 0.7, "grad_norm": 0.518415226887272, "learning_rate": 4.257547258123369e-06, "loss": 0.2439, "step": 12251 }, { "epoch": 0.7, "grad_norm": 0.3912430449227991, "learning_rate": 4.256023859471109e-06, "loss": 0.2949, "step": 12252 }, { "epoch": 0.7, "grad_norm": 0.31137797694610614, "learning_rate": 4.254500659732496e-06, "loss": 0.1849, "step": 12253 }, { "epoch": 0.7, "grad_norm": 0.36367056958104427, "learning_rate": 4.2529776589602735e-06, "loss": 0.3097, "step": 12254 }, { "epoch": 0.7, "grad_norm": 0.28821543574715613, "learning_rate": 4.251454857207193e-06, "loss": 0.1862, "step": 12255 }, { "epoch": 0.7, "grad_norm": 0.7304920210259426, "learning_rate": 4.249932254525985e-06, "loss": 0.4968, "step": 12256 }, { "epoch": 0.7, "grad_norm": 0.4805174118924019, "learning_rate": 4.24840985096937e-06, "loss": 0.3333, "step": 12257 }, { "epoch": 0.7, "grad_norm": 0.24509749223680857, "learning_rate": 4.246887646590077e-06, "loss": 0.2109, "step": 12258 }, { "epoch": 0.7, "grad_norm": 0.30668412572681175, "learning_rate": 4.245365641440818e-06, "loss": 0.2188, "step": 12259 }, { "epoch": 0.7, "grad_norm": 1.0420482711700545, "learning_rate": 4.243843835574299e-06, "loss": 0.4492, "step": 12260 }, { "epoch": 0.7, "grad_norm": 0.29947922607947813, "learning_rate": 4.242322229043218e-06, "loss": 0.2126, "step": 12261 }, { "epoch": 0.7, "grad_norm": 0.34042241000669604, "learning_rate": 4.240800821900274e-06, "loss": 0.2878, "step": 12262 }, { "epoch": 0.7, "grad_norm": 0.8679278842448671, "learning_rate": 4.23927961419815e-06, "loss": 0.4032, "step": 12263 }, { "epoch": 0.7, "grad_norm": 0.3370249737144881, "learning_rate": 4.237758605989523e-06, "loss": 0.247, "step": 12264 }, { "epoch": 0.7, "grad_norm": 0.15104224638072863, "learning_rate": 4.236237797327071e-06, "loss": 0.071, "step": 12265 }, { "epoch": 0.7, "grad_norm": 0.37245443877274914, "learning_rate": 4.2347171882634505e-06, "loss": 0.3221, "step": 12266 }, { "epoch": 0.7, "grad_norm": 0.3957859482382533, "learning_rate": 4.2331967788513295e-06, "loss": 0.2676, "step": 12267 }, { "epoch": 0.7, "grad_norm": 0.4817458267951734, "learning_rate": 4.231676569143357e-06, "loss": 0.2875, "step": 12268 }, { "epoch": 0.7, "grad_norm": 0.33385576628286906, "learning_rate": 4.230156559192177e-06, "loss": 0.3, "step": 12269 }, { "epoch": 0.7, "grad_norm": 0.3606609119285642, "learning_rate": 4.228636749050422e-06, "loss": 0.2815, "step": 12270 }, { "epoch": 0.71, "grad_norm": 0.28662475275203025, "learning_rate": 4.227117138770733e-06, "loss": 0.1133, "step": 12271 }, { "epoch": 0.71, "grad_norm": 1.1924756559482446, "learning_rate": 4.225597728405729e-06, "loss": 0.5055, "step": 12272 }, { "epoch": 0.71, "grad_norm": 0.33127274692223163, "learning_rate": 4.224078518008028e-06, "loss": 0.2566, "step": 12273 }, { "epoch": 0.71, "grad_norm": 0.39603641748968726, "learning_rate": 4.222559507630235e-06, "loss": 0.2764, "step": 12274 }, { "epoch": 0.71, "grad_norm": 1.1371302801557202, "learning_rate": 4.221040697324962e-06, "loss": 0.8109, "step": 12275 }, { "epoch": 0.71, "grad_norm": 0.35190049784212624, "learning_rate": 4.2195220871448005e-06, "loss": 0.2424, "step": 12276 }, { "epoch": 0.71, "grad_norm": 0.27023187100195273, "learning_rate": 4.218003677142342e-06, "loss": 0.2132, "step": 12277 }, { "epoch": 0.71, "grad_norm": 0.48459025177133447, "learning_rate": 4.216485467370163e-06, "loss": 0.2421, "step": 12278 }, { "epoch": 0.71, "grad_norm": 0.30829697404074013, "learning_rate": 4.214967457880846e-06, "loss": 0.2431, "step": 12279 }, { "epoch": 0.71, "grad_norm": 0.6722535491136014, "learning_rate": 4.213449648726958e-06, "loss": 0.4095, "step": 12280 }, { "epoch": 0.71, "grad_norm": 0.30788458676053476, "learning_rate": 4.211932039961061e-06, "loss": 0.2747, "step": 12281 }, { "epoch": 0.71, "grad_norm": 0.3495341382200821, "learning_rate": 4.210414631635707e-06, "loss": 0.2409, "step": 12282 }, { "epoch": 0.71, "grad_norm": 1.1231503038430486, "learning_rate": 4.208897423803443e-06, "loss": 0.5617, "step": 12283 }, { "epoch": 0.71, "grad_norm": 0.3074058591745467, "learning_rate": 4.207380416516815e-06, "loss": 0.162, "step": 12284 }, { "epoch": 0.71, "grad_norm": 0.4244766221986541, "learning_rate": 4.2058636098283545e-06, "loss": 0.294, "step": 12285 }, { "epoch": 0.71, "grad_norm": 0.31602488202105283, "learning_rate": 4.204347003790588e-06, "loss": 0.3086, "step": 12286 }, { "epoch": 0.71, "grad_norm": 0.9753506360078198, "learning_rate": 4.202830598456032e-06, "loss": 0.5152, "step": 12287 }, { "epoch": 0.71, "grad_norm": 0.4134773260026491, "learning_rate": 4.201314393877206e-06, "loss": 0.2844, "step": 12288 }, { "epoch": 0.71, "grad_norm": 0.3680992182220188, "learning_rate": 4.199798390106613e-06, "loss": 0.2835, "step": 12289 }, { "epoch": 0.71, "grad_norm": 0.29708444907868753, "learning_rate": 4.198282587196757e-06, "loss": 0.2302, "step": 12290 }, { "epoch": 0.71, "grad_norm": 0.2937281945675394, "learning_rate": 4.196766985200118e-06, "loss": 0.1866, "step": 12291 }, { "epoch": 0.71, "grad_norm": 0.6844793590759544, "learning_rate": 4.195251584169192e-06, "loss": 0.4549, "step": 12292 }, { "epoch": 0.71, "grad_norm": 0.5098854869023844, "learning_rate": 4.193736384156455e-06, "loss": 0.3973, "step": 12293 }, { "epoch": 0.71, "grad_norm": 0.2564448270712793, "learning_rate": 4.192221385214377e-06, "loss": 0.2072, "step": 12294 }, { "epoch": 0.71, "grad_norm": 0.5101193979062211, "learning_rate": 4.190706587395418e-06, "loss": 0.2752, "step": 12295 }, { "epoch": 0.71, "grad_norm": 0.4351332109330603, "learning_rate": 4.189191990752044e-06, "loss": 0.271, "step": 12296 }, { "epoch": 0.71, "grad_norm": 0.26640761033236676, "learning_rate": 4.187677595336702e-06, "loss": 0.2164, "step": 12297 }, { "epoch": 0.71, "grad_norm": 0.7739642089022813, "learning_rate": 4.186163401201835e-06, "loss": 0.3333, "step": 12298 }, { "epoch": 0.71, "grad_norm": 0.9586181627625986, "learning_rate": 4.184649408399876e-06, "loss": 0.6132, "step": 12299 }, { "epoch": 0.71, "grad_norm": 0.3301622040533963, "learning_rate": 4.183135616983261e-06, "loss": 0.1892, "step": 12300 }, { "epoch": 0.71, "grad_norm": 0.4567133495984465, "learning_rate": 4.181622027004409e-06, "loss": 0.3046, "step": 12301 }, { "epoch": 0.71, "grad_norm": 0.46370032678492656, "learning_rate": 4.1801086385157366e-06, "loss": 0.3539, "step": 12302 }, { "epoch": 0.71, "grad_norm": 0.4066621177522798, "learning_rate": 4.178595451569648e-06, "loss": 0.2692, "step": 12303 }, { "epoch": 0.71, "grad_norm": 0.20560129040918335, "learning_rate": 4.177082466218553e-06, "loss": 0.1409, "step": 12304 }, { "epoch": 0.71, "grad_norm": 0.35498442687133175, "learning_rate": 4.17556968251484e-06, "loss": 0.3217, "step": 12305 }, { "epoch": 0.71, "grad_norm": 0.6429202460112974, "learning_rate": 4.1740571005109e-06, "loss": 0.3263, "step": 12306 }, { "epoch": 0.71, "grad_norm": 0.35829501157032695, "learning_rate": 4.1725447202591115e-06, "loss": 0.2361, "step": 12307 }, { "epoch": 0.71, "grad_norm": 0.6702766382148628, "learning_rate": 4.171032541811846e-06, "loss": 0.3579, "step": 12308 }, { "epoch": 0.71, "grad_norm": 0.302468040201284, "learning_rate": 4.169520565221476e-06, "loss": 0.2649, "step": 12309 }, { "epoch": 0.71, "grad_norm": 0.19513857944378782, "learning_rate": 4.1680087905403575e-06, "loss": 0.1667, "step": 12310 }, { "epoch": 0.71, "grad_norm": 1.3169755916645531, "learning_rate": 4.166497217820844e-06, "loss": 0.7212, "step": 12311 }, { "epoch": 0.71, "grad_norm": 0.3903916545542141, "learning_rate": 4.164985847115279e-06, "loss": 0.2538, "step": 12312 }, { "epoch": 0.71, "grad_norm": 0.3307900193032403, "learning_rate": 4.163474678476004e-06, "loss": 0.2559, "step": 12313 }, { "epoch": 0.71, "grad_norm": 0.7210475091920264, "learning_rate": 4.161963711955351e-06, "loss": 0.3969, "step": 12314 }, { "epoch": 0.71, "grad_norm": 0.4130701912582865, "learning_rate": 4.1604529476056446e-06, "loss": 0.2791, "step": 12315 }, { "epoch": 0.71, "grad_norm": 0.33158894769484887, "learning_rate": 4.158942385479198e-06, "loss": 0.2499, "step": 12316 }, { "epoch": 0.71, "grad_norm": 0.26527730099168056, "learning_rate": 4.157432025628327e-06, "loss": 0.2226, "step": 12317 }, { "epoch": 0.71, "grad_norm": 0.4024841563367093, "learning_rate": 4.155921868105336e-06, "loss": 0.2536, "step": 12318 }, { "epoch": 0.71, "grad_norm": 0.4715939185767074, "learning_rate": 4.154411912962518e-06, "loss": 0.3245, "step": 12319 }, { "epoch": 0.71, "grad_norm": 0.49784484653712924, "learning_rate": 4.152902160252165e-06, "loss": 0.2754, "step": 12320 }, { "epoch": 0.71, "grad_norm": 0.3167715593401071, "learning_rate": 4.151392610026554e-06, "loss": 0.2556, "step": 12321 }, { "epoch": 0.71, "grad_norm": 0.5362943498624293, "learning_rate": 4.149883262337969e-06, "loss": 0.361, "step": 12322 }, { "epoch": 0.71, "grad_norm": 0.2270767119225587, "learning_rate": 4.148374117238676e-06, "loss": 0.1251, "step": 12323 }, { "epoch": 0.71, "grad_norm": 0.5482866300378484, "learning_rate": 4.1468651747809366e-06, "loss": 0.2773, "step": 12324 }, { "epoch": 0.71, "grad_norm": 0.26991337180134006, "learning_rate": 4.145356435017003e-06, "loss": 0.262, "step": 12325 }, { "epoch": 0.71, "grad_norm": 0.8129780685542745, "learning_rate": 4.143847897999124e-06, "loss": 0.3138, "step": 12326 }, { "epoch": 0.71, "grad_norm": 0.6230381356562327, "learning_rate": 4.142339563779542e-06, "loss": 0.3043, "step": 12327 }, { "epoch": 0.71, "grad_norm": 0.4264045585072286, "learning_rate": 4.140831432410484e-06, "loss": 0.3266, "step": 12328 }, { "epoch": 0.71, "grad_norm": 0.3738143640469887, "learning_rate": 4.139323503944186e-06, "loss": 0.3328, "step": 12329 }, { "epoch": 0.71, "grad_norm": 0.2431647354371285, "learning_rate": 4.1378157784328625e-06, "loss": 0.1254, "step": 12330 }, { "epoch": 0.71, "grad_norm": 0.5894238839413188, "learning_rate": 4.136308255928726e-06, "loss": 0.3434, "step": 12331 }, { "epoch": 0.71, "grad_norm": 1.2000110180565327, "learning_rate": 4.134800936483983e-06, "loss": 0.6542, "step": 12332 }, { "epoch": 0.71, "grad_norm": 0.267646159596712, "learning_rate": 4.1332938201508285e-06, "loss": 0.2183, "step": 12333 }, { "epoch": 0.71, "grad_norm": 0.5490669982217542, "learning_rate": 4.13178690698146e-06, "loss": 0.3078, "step": 12334 }, { "epoch": 0.71, "grad_norm": 0.40224921477044767, "learning_rate": 4.130280197028058e-06, "loss": 0.2917, "step": 12335 }, { "epoch": 0.71, "grad_norm": 0.398436666474072, "learning_rate": 4.128773690342801e-06, "loss": 0.2357, "step": 12336 }, { "epoch": 0.71, "grad_norm": 0.3085871901913578, "learning_rate": 4.127267386977854e-06, "loss": 0.2875, "step": 12337 }, { "epoch": 0.71, "grad_norm": 0.41757727862480615, "learning_rate": 4.125761286985389e-06, "loss": 0.2504, "step": 12338 }, { "epoch": 0.71, "grad_norm": 0.5953337494643126, "learning_rate": 4.124255390417558e-06, "loss": 0.2831, "step": 12339 }, { "epoch": 0.71, "grad_norm": 0.3973615565133707, "learning_rate": 4.122749697326511e-06, "loss": 0.2354, "step": 12340 }, { "epoch": 0.71, "grad_norm": 0.3576969019418588, "learning_rate": 4.121244207764384e-06, "loss": 0.3025, "step": 12341 }, { "epoch": 0.71, "grad_norm": 0.6161027575033992, "learning_rate": 4.119738921783323e-06, "loss": 0.3604, "step": 12342 }, { "epoch": 0.71, "grad_norm": 0.22322602570107805, "learning_rate": 4.118233839435449e-06, "loss": 0.1689, "step": 12343 }, { "epoch": 0.71, "grad_norm": 0.5065158568778143, "learning_rate": 4.1167289607728845e-06, "loss": 0.3368, "step": 12344 }, { "epoch": 0.71, "grad_norm": 0.4057898445010308, "learning_rate": 4.1152242858477435e-06, "loss": 0.2837, "step": 12345 }, { "epoch": 0.71, "grad_norm": 0.31512309786509607, "learning_rate": 4.113719814712127e-06, "loss": 0.2346, "step": 12346 }, { "epoch": 0.71, "grad_norm": 0.6702265380981722, "learning_rate": 4.112215547418145e-06, "loss": 0.4688, "step": 12347 }, { "epoch": 0.71, "grad_norm": 0.38723962767239795, "learning_rate": 4.110711484017886e-06, "loss": 0.3277, "step": 12348 }, { "epoch": 0.71, "grad_norm": 0.238084010943153, "learning_rate": 4.1092076245634346e-06, "loss": 0.1993, "step": 12349 }, { "epoch": 0.71, "grad_norm": 0.4857992938940668, "learning_rate": 4.107703969106867e-06, "loss": 0.2588, "step": 12350 }, { "epoch": 0.71, "grad_norm": 0.617699439344278, "learning_rate": 4.10620051770026e-06, "loss": 0.3235, "step": 12351 }, { "epoch": 0.71, "grad_norm": 0.4087980308933262, "learning_rate": 4.104697270395676e-06, "loss": 0.3254, "step": 12352 }, { "epoch": 0.71, "grad_norm": 0.34914826637736407, "learning_rate": 4.103194227245172e-06, "loss": 0.2729, "step": 12353 }, { "epoch": 0.71, "grad_norm": 0.5393994201005855, "learning_rate": 4.101691388300795e-06, "loss": 0.3091, "step": 12354 }, { "epoch": 0.71, "grad_norm": 0.42117791947230276, "learning_rate": 4.100188753614595e-06, "loss": 0.2889, "step": 12355 }, { "epoch": 0.71, "grad_norm": 0.2679960829980229, "learning_rate": 4.098686323238604e-06, "loss": 0.1591, "step": 12356 }, { "epoch": 0.71, "grad_norm": 0.3767455945961782, "learning_rate": 4.097184097224853e-06, "loss": 0.263, "step": 12357 }, { "epoch": 0.71, "grad_norm": 0.3979800959449096, "learning_rate": 4.095682075625363e-06, "loss": 0.2972, "step": 12358 }, { "epoch": 0.71, "grad_norm": 0.7153844837257711, "learning_rate": 4.094180258492147e-06, "loss": 0.3261, "step": 12359 }, { "epoch": 0.71, "grad_norm": 0.33698757282514463, "learning_rate": 4.092678645877217e-06, "loss": 0.3312, "step": 12360 }, { "epoch": 0.71, "grad_norm": 0.32736161830074756, "learning_rate": 4.09117723783257e-06, "loss": 0.2544, "step": 12361 }, { "epoch": 0.71, "grad_norm": 0.29749907521333085, "learning_rate": 4.089676034410198e-06, "loss": 0.1178, "step": 12362 }, { "epoch": 0.71, "grad_norm": 0.7603494708438945, "learning_rate": 4.088175035662095e-06, "loss": 0.4403, "step": 12363 }, { "epoch": 0.71, "grad_norm": 0.3803334125043377, "learning_rate": 4.086674241640235e-06, "loss": 0.2635, "step": 12364 }, { "epoch": 0.71, "grad_norm": 0.3682851850725568, "learning_rate": 4.085173652396593e-06, "loss": 0.3203, "step": 12365 }, { "epoch": 0.71, "grad_norm": 0.5811058200485942, "learning_rate": 4.083673267983128e-06, "loss": 0.2581, "step": 12366 }, { "epoch": 0.71, "grad_norm": 0.34836499493050266, "learning_rate": 4.0821730884518085e-06, "loss": 0.2758, "step": 12367 }, { "epoch": 0.71, "grad_norm": 0.2767104843102272, "learning_rate": 4.08067311385458e-06, "loss": 0.1553, "step": 12368 }, { "epoch": 0.71, "grad_norm": 0.30352089800280146, "learning_rate": 4.079173344243387e-06, "loss": 0.2286, "step": 12369 }, { "epoch": 0.71, "grad_norm": 0.32423430858686303, "learning_rate": 4.077673779670166e-06, "loss": 0.2643, "step": 12370 }, { "epoch": 0.71, "grad_norm": 0.6190946400648354, "learning_rate": 4.076174420186844e-06, "loss": 0.4023, "step": 12371 }, { "epoch": 0.71, "grad_norm": 0.3159074215735569, "learning_rate": 4.07467526584535e-06, "loss": 0.2145, "step": 12372 }, { "epoch": 0.71, "grad_norm": 0.30664118077870395, "learning_rate": 4.073176316697598e-06, "loss": 0.2746, "step": 12373 }, { "epoch": 0.71, "grad_norm": 0.3017502296489269, "learning_rate": 4.071677572795495e-06, "loss": 0.1723, "step": 12374 }, { "epoch": 0.71, "grad_norm": 0.4615780722507267, "learning_rate": 4.0701790341909386e-06, "loss": 0.2075, "step": 12375 }, { "epoch": 0.71, "grad_norm": 0.3855982088594073, "learning_rate": 4.068680700935831e-06, "loss": 0.3036, "step": 12376 }, { "epoch": 0.71, "grad_norm": 0.36671811376397273, "learning_rate": 4.0671825730820555e-06, "loss": 0.3023, "step": 12377 }, { "epoch": 0.71, "grad_norm": 1.2575854114457314, "learning_rate": 4.065684650681493e-06, "loss": 0.75, "step": 12378 }, { "epoch": 0.71, "grad_norm": 0.34502650981498256, "learning_rate": 4.064186933786012e-06, "loss": 0.2024, "step": 12379 }, { "epoch": 0.71, "grad_norm": 0.22705977337178385, "learning_rate": 4.062689422447487e-06, "loss": 0.2112, "step": 12380 }, { "epoch": 0.71, "grad_norm": 0.640804257150291, "learning_rate": 4.061192116717771e-06, "loss": 0.4038, "step": 12381 }, { "epoch": 0.71, "grad_norm": 0.33188078306411595, "learning_rate": 4.0596950166487146e-06, "loss": 0.2323, "step": 12382 }, { "epoch": 0.71, "grad_norm": 1.2416745783711352, "learning_rate": 4.058198122292167e-06, "loss": 0.6851, "step": 12383 }, { "epoch": 0.71, "grad_norm": 0.33509544879795256, "learning_rate": 4.0567014336999584e-06, "loss": 0.2977, "step": 12384 }, { "epoch": 0.71, "grad_norm": 0.35947747921768164, "learning_rate": 4.055204950923927e-06, "loss": 0.205, "step": 12385 }, { "epoch": 0.71, "grad_norm": 0.32272880351486805, "learning_rate": 4.053708674015893e-06, "loss": 0.1713, "step": 12386 }, { "epoch": 0.71, "grad_norm": 0.5147264850139122, "learning_rate": 4.052212603027672e-06, "loss": 0.3099, "step": 12387 }, { "epoch": 0.71, "grad_norm": 0.2537057854401847, "learning_rate": 4.050716738011068e-06, "loss": 0.1977, "step": 12388 }, { "epoch": 0.71, "grad_norm": 0.5015595237780548, "learning_rate": 4.049221079017892e-06, "loss": 0.3651, "step": 12389 }, { "epoch": 0.71, "grad_norm": 1.2806204433781172, "learning_rate": 4.0477256260999344e-06, "loss": 0.6888, "step": 12390 }, { "epoch": 0.71, "grad_norm": 0.3961070936444837, "learning_rate": 4.046230379308982e-06, "loss": 0.2544, "step": 12391 }, { "epoch": 0.71, "grad_norm": 0.24351026173987872, "learning_rate": 4.0447353386968155e-06, "loss": 0.2007, "step": 12392 }, { "epoch": 0.71, "grad_norm": 0.5317384655177135, "learning_rate": 4.043240504315209e-06, "loss": 0.3628, "step": 12393 }, { "epoch": 0.71, "grad_norm": 0.36782911635286475, "learning_rate": 4.041745876215927e-06, "loss": 0.3094, "step": 12394 }, { "epoch": 0.71, "grad_norm": 0.26601632760932586, "learning_rate": 4.040251454450729e-06, "loss": 0.1666, "step": 12395 }, { "epoch": 0.71, "grad_norm": 0.36052225817436, "learning_rate": 4.038757239071364e-06, "loss": 0.3037, "step": 12396 }, { "epoch": 0.71, "grad_norm": 0.3159879747015827, "learning_rate": 4.037263230129583e-06, "loss": 0.2501, "step": 12397 }, { "epoch": 0.71, "grad_norm": 0.6144153528165102, "learning_rate": 4.035769427677118e-06, "loss": 0.2974, "step": 12398 }, { "epoch": 0.71, "grad_norm": 0.4335431228327503, "learning_rate": 4.034275831765702e-06, "loss": 0.2698, "step": 12399 }, { "epoch": 0.71, "grad_norm": 0.2705356897806853, "learning_rate": 4.032782442447055e-06, "loss": 0.249, "step": 12400 }, { "epoch": 0.71, "grad_norm": 0.27198063920594956, "learning_rate": 4.031289259772898e-06, "loss": 0.1899, "step": 12401 }, { "epoch": 0.71, "grad_norm": 0.9726204979369423, "learning_rate": 4.029796283794938e-06, "loss": 0.5632, "step": 12402 }, { "epoch": 0.71, "grad_norm": 0.3511151019796598, "learning_rate": 4.028303514564876e-06, "loss": 0.2668, "step": 12403 }, { "epoch": 0.71, "grad_norm": 0.46440585552108493, "learning_rate": 4.026810952134402e-06, "loss": 0.2985, "step": 12404 }, { "epoch": 0.71, "grad_norm": 0.3997705620102345, "learning_rate": 4.025318596555212e-06, "loss": 0.2284, "step": 12405 }, { "epoch": 0.71, "grad_norm": 0.24602939683462624, "learning_rate": 4.023826447878982e-06, "loss": 0.2192, "step": 12406 }, { "epoch": 0.71, "grad_norm": 1.1948345929606112, "learning_rate": 4.022334506157386e-06, "loss": 0.8185, "step": 12407 }, { "epoch": 0.71, "grad_norm": 0.33598971886925083, "learning_rate": 4.020842771442085e-06, "loss": 0.2542, "step": 12408 }, { "epoch": 0.71, "grad_norm": 0.42240133665361956, "learning_rate": 4.019351243784745e-06, "loss": 0.2873, "step": 12409 }, { "epoch": 0.71, "grad_norm": 0.5062951214860231, "learning_rate": 4.017859923237014e-06, "loss": 0.3348, "step": 12410 }, { "epoch": 0.71, "grad_norm": 0.34173636455748163, "learning_rate": 4.016368809850537e-06, "loss": 0.2076, "step": 12411 }, { "epoch": 0.71, "grad_norm": 0.3865772364660087, "learning_rate": 4.01487790367695e-06, "loss": 0.2696, "step": 12412 }, { "epoch": 0.71, "grad_norm": 0.3557344917335959, "learning_rate": 4.013387204767881e-06, "loss": 0.3212, "step": 12413 }, { "epoch": 0.71, "grad_norm": 0.23541794800684201, "learning_rate": 4.01189671317496e-06, "loss": 0.0956, "step": 12414 }, { "epoch": 0.71, "grad_norm": 0.2951339678252151, "learning_rate": 4.0104064289497965e-06, "loss": 0.2571, "step": 12415 }, { "epoch": 0.71, "grad_norm": 0.33535513867717087, "learning_rate": 4.008916352144002e-06, "loss": 0.302, "step": 12416 }, { "epoch": 0.71, "grad_norm": 1.12245964652745, "learning_rate": 4.007426482809172e-06, "loss": 0.5287, "step": 12417 }, { "epoch": 0.71, "grad_norm": 0.29781734362720846, "learning_rate": 4.0059368209969106e-06, "loss": 0.186, "step": 12418 }, { "epoch": 0.71, "grad_norm": 1.0779840391036732, "learning_rate": 4.004447366758798e-06, "loss": 0.7548, "step": 12419 }, { "epoch": 0.71, "grad_norm": 0.22319618891727672, "learning_rate": 4.002958120146415e-06, "loss": 0.2061, "step": 12420 }, { "epoch": 0.71, "grad_norm": 0.3003774906221277, "learning_rate": 4.001469081211332e-06, "loss": 0.2077, "step": 12421 }, { "epoch": 0.71, "grad_norm": 0.6334985632628138, "learning_rate": 3.99998025000512e-06, "loss": 0.3586, "step": 12422 }, { "epoch": 0.71, "grad_norm": 1.0323028337477798, "learning_rate": 3.998491626579334e-06, "loss": 0.4385, "step": 12423 }, { "epoch": 0.71, "grad_norm": 0.22590105407607644, "learning_rate": 3.997003210985524e-06, "loss": 0.2142, "step": 12424 }, { "epoch": 0.71, "grad_norm": 1.2186487143817915, "learning_rate": 3.995515003275235e-06, "loss": 0.7572, "step": 12425 }, { "epoch": 0.71, "grad_norm": 0.320271494595398, "learning_rate": 3.9940270035000036e-06, "loss": 0.2219, "step": 12426 }, { "epoch": 0.71, "grad_norm": 0.5789444426466166, "learning_rate": 3.992539211711359e-06, "loss": 0.3055, "step": 12427 }, { "epoch": 0.71, "grad_norm": 0.2750210705197789, "learning_rate": 3.991051627960822e-06, "loss": 0.2382, "step": 12428 }, { "epoch": 0.71, "grad_norm": 1.1626005918199633, "learning_rate": 3.989564252299907e-06, "loss": 0.6162, "step": 12429 }, { "epoch": 0.71, "grad_norm": 0.6137501022840672, "learning_rate": 3.988077084780126e-06, "loss": 0.3585, "step": 12430 }, { "epoch": 0.71, "grad_norm": 0.3564797669004418, "learning_rate": 3.986590125452977e-06, "loss": 0.2574, "step": 12431 }, { "epoch": 0.71, "grad_norm": 0.3595173618942061, "learning_rate": 3.985103374369954e-06, "loss": 0.3004, "step": 12432 }, { "epoch": 0.71, "grad_norm": 0.5610988564310685, "learning_rate": 3.983616831582538e-06, "loss": 0.3092, "step": 12433 }, { "epoch": 0.71, "grad_norm": 0.22865917764490962, "learning_rate": 3.9821304971422155e-06, "loss": 0.153, "step": 12434 }, { "epoch": 0.71, "grad_norm": 1.0396124580607748, "learning_rate": 3.980644371100457e-06, "loss": 0.4856, "step": 12435 }, { "epoch": 0.71, "grad_norm": 0.2712024160349323, "learning_rate": 3.979158453508724e-06, "loss": 0.2664, "step": 12436 }, { "epoch": 0.71, "grad_norm": 0.45818436095554166, "learning_rate": 3.977672744418475e-06, "loss": 0.2934, "step": 12437 }, { "epoch": 0.71, "grad_norm": 0.7060226833636093, "learning_rate": 3.976187243881156e-06, "loss": 0.3856, "step": 12438 }, { "epoch": 0.71, "grad_norm": 0.28119753881737464, "learning_rate": 3.974701951948218e-06, "loss": 0.207, "step": 12439 }, { "epoch": 0.71, "grad_norm": 0.3156999954213079, "learning_rate": 3.973216868671092e-06, "loss": 0.2725, "step": 12440 }, { "epoch": 0.71, "grad_norm": 0.3275821325716865, "learning_rate": 3.9717319941012054e-06, "loss": 0.0955, "step": 12441 }, { "epoch": 0.71, "grad_norm": 0.4230111041588665, "learning_rate": 3.970247328289979e-06, "loss": 0.3222, "step": 12442 }, { "epoch": 0.71, "grad_norm": 0.5330638370371196, "learning_rate": 3.96876287128883e-06, "loss": 0.3889, "step": 12443 }, { "epoch": 0.71, "grad_norm": 0.48238254940743125, "learning_rate": 3.967278623149165e-06, "loss": 0.2652, "step": 12444 }, { "epoch": 0.72, "grad_norm": 0.6377281456624062, "learning_rate": 3.965794583922382e-06, "loss": 0.27, "step": 12445 }, { "epoch": 0.72, "grad_norm": 0.24998486209393883, "learning_rate": 3.964310753659869e-06, "loss": 0.1951, "step": 12446 }, { "epoch": 0.72, "grad_norm": 0.3267863704261433, "learning_rate": 3.9628271324130185e-06, "loss": 0.2329, "step": 12447 }, { "epoch": 0.72, "grad_norm": 0.5376672392151259, "learning_rate": 3.961343720233204e-06, "loss": 0.2847, "step": 12448 }, { "epoch": 0.72, "grad_norm": 0.41943787136242416, "learning_rate": 3.9598605171717976e-06, "loss": 0.3178, "step": 12449 }, { "epoch": 0.72, "grad_norm": 0.6504493113239271, "learning_rate": 3.958377523280162e-06, "loss": 0.3003, "step": 12450 }, { "epoch": 0.72, "grad_norm": 0.6739410311061353, "learning_rate": 3.956894738609649e-06, "loss": 0.3241, "step": 12451 }, { "epoch": 0.72, "grad_norm": 0.25881186952173607, "learning_rate": 3.955412163211615e-06, "loss": 0.2511, "step": 12452 }, { "epoch": 0.72, "grad_norm": 0.4730776211188233, "learning_rate": 3.953929797137398e-06, "loss": 0.2741, "step": 12453 }, { "epoch": 0.72, "grad_norm": 0.32294979003618474, "learning_rate": 3.9524476404383324e-06, "loss": 0.1982, "step": 12454 }, { "epoch": 0.72, "grad_norm": 0.4427233885442675, "learning_rate": 3.9509656931657405e-06, "loss": 0.3236, "step": 12455 }, { "epoch": 0.72, "grad_norm": 0.5624680637454126, "learning_rate": 3.949483955370951e-06, "loss": 0.3349, "step": 12456 }, { "epoch": 0.72, "grad_norm": 0.3926441797137969, "learning_rate": 3.9480024271052715e-06, "loss": 0.1955, "step": 12457 }, { "epoch": 0.72, "grad_norm": 0.33987897951718266, "learning_rate": 3.946521108420008e-06, "loss": 0.2365, "step": 12458 }, { "epoch": 0.72, "grad_norm": 0.3372391079565309, "learning_rate": 3.945039999366458e-06, "loss": 0.2494, "step": 12459 }, { "epoch": 0.72, "grad_norm": 0.3119762747576032, "learning_rate": 3.9435590999959115e-06, "loss": 0.2094, "step": 12460 }, { "epoch": 0.72, "grad_norm": 0.5354947724202691, "learning_rate": 3.942078410359655e-06, "loss": 0.3259, "step": 12461 }, { "epoch": 0.72, "grad_norm": 0.7620520172406171, "learning_rate": 3.940597930508962e-06, "loss": 0.4483, "step": 12462 }, { "epoch": 0.72, "grad_norm": 0.3951157539623328, "learning_rate": 3.939117660495098e-06, "loss": 0.2072, "step": 12463 }, { "epoch": 0.72, "grad_norm": 0.30130478030699875, "learning_rate": 3.937637600369332e-06, "loss": 0.2685, "step": 12464 }, { "epoch": 0.72, "grad_norm": 0.317880848548765, "learning_rate": 3.936157750182915e-06, "loss": 0.1775, "step": 12465 }, { "epoch": 0.72, "grad_norm": 0.7654264548014176, "learning_rate": 3.934678109987096e-06, "loss": 0.3159, "step": 12466 }, { "epoch": 0.72, "grad_norm": 0.35211292954046547, "learning_rate": 3.933198679833108e-06, "loss": 0.2436, "step": 12467 }, { "epoch": 0.72, "grad_norm": 0.48058464826750036, "learning_rate": 3.931719459772193e-06, "loss": 0.4169, "step": 12468 }, { "epoch": 0.72, "grad_norm": 0.7896326662596463, "learning_rate": 3.9302404498555725e-06, "loss": 0.3491, "step": 12469 }, { "epoch": 0.72, "grad_norm": 0.1967202253818277, "learning_rate": 3.928761650134464e-06, "loss": 0.149, "step": 12470 }, { "epoch": 0.72, "grad_norm": 0.5161953619328937, "learning_rate": 3.927283060660075e-06, "loss": 0.326, "step": 12471 }, { "epoch": 0.72, "grad_norm": 0.4444720336390856, "learning_rate": 3.925804681483614e-06, "loss": 0.3244, "step": 12472 }, { "epoch": 0.72, "grad_norm": 0.36218587264417024, "learning_rate": 3.924326512656279e-06, "loss": 0.243, "step": 12473 }, { "epoch": 0.72, "grad_norm": 1.308785521351059, "learning_rate": 3.922848554229254e-06, "loss": 0.7986, "step": 12474 }, { "epoch": 0.72, "grad_norm": 0.37676328938957093, "learning_rate": 3.921370806253722e-06, "loss": 0.2643, "step": 12475 }, { "epoch": 0.72, "grad_norm": 0.29437300765355345, "learning_rate": 3.919893268780854e-06, "loss": 0.2193, "step": 12476 }, { "epoch": 0.72, "grad_norm": 0.3144016115644878, "learning_rate": 3.918415941861825e-06, "loss": 0.2123, "step": 12477 }, { "epoch": 0.72, "grad_norm": 0.49162002682484635, "learning_rate": 3.91693882554779e-06, "loss": 0.2872, "step": 12478 }, { "epoch": 0.72, "grad_norm": 0.36777889700838373, "learning_rate": 3.915461919889903e-06, "loss": 0.3064, "step": 12479 }, { "epoch": 0.72, "grad_norm": 0.3299043967705451, "learning_rate": 3.913985224939303e-06, "loss": 0.2594, "step": 12480 }, { "epoch": 0.72, "grad_norm": 0.790198056679378, "learning_rate": 3.912508740747137e-06, "loss": 0.3766, "step": 12481 }, { "epoch": 0.72, "grad_norm": 0.34173548287831607, "learning_rate": 3.911032467364531e-06, "loss": 0.2535, "step": 12482 }, { "epoch": 0.72, "grad_norm": 0.2538296150592013, "learning_rate": 3.909556404842609e-06, "loss": 0.1983, "step": 12483 }, { "epoch": 0.72, "grad_norm": 0.8236400542230081, "learning_rate": 3.908080553232484e-06, "loss": 0.4423, "step": 12484 }, { "epoch": 0.72, "grad_norm": 0.3028413178151663, "learning_rate": 3.906604912585271e-06, "loss": 0.2541, "step": 12485 }, { "epoch": 0.72, "grad_norm": 0.951138237415222, "learning_rate": 3.905129482952067e-06, "loss": 0.4526, "step": 12486 }, { "epoch": 0.72, "grad_norm": 0.3129589276357312, "learning_rate": 3.903654264383967e-06, "loss": 0.2508, "step": 12487 }, { "epoch": 0.72, "grad_norm": 0.3663928277836569, "learning_rate": 3.902179256932058e-06, "loss": 0.2743, "step": 12488 }, { "epoch": 0.72, "grad_norm": 0.776248988057081, "learning_rate": 3.900704460647416e-06, "loss": 0.29, "step": 12489 }, { "epoch": 0.72, "grad_norm": 0.32241816415937624, "learning_rate": 3.89922987558112e-06, "loss": 0.1965, "step": 12490 }, { "epoch": 0.72, "grad_norm": 0.2956955229384876, "learning_rate": 3.897755501784231e-06, "loss": 0.2823, "step": 12491 }, { "epoch": 0.72, "grad_norm": 0.49019798055890323, "learning_rate": 3.896281339307805e-06, "loss": 0.3925, "step": 12492 }, { "epoch": 0.72, "grad_norm": 0.8539817933659437, "learning_rate": 3.8948073882028945e-06, "loss": 0.1781, "step": 12493 }, { "epoch": 0.72, "grad_norm": 0.3545876480577167, "learning_rate": 3.893333648520542e-06, "loss": 0.2658, "step": 12494 }, { "epoch": 0.72, "grad_norm": 0.3817268627937381, "learning_rate": 3.891860120311784e-06, "loss": 0.3108, "step": 12495 }, { "epoch": 0.72, "grad_norm": 0.4087311870618779, "learning_rate": 3.890386803627642e-06, "loss": 0.1709, "step": 12496 }, { "epoch": 0.72, "grad_norm": 0.4015110688746216, "learning_rate": 3.888913698519145e-06, "loss": 0.3142, "step": 12497 }, { "epoch": 0.72, "grad_norm": 0.4151975450528388, "learning_rate": 3.887440805037306e-06, "loss": 0.2699, "step": 12498 }, { "epoch": 0.72, "grad_norm": 0.3269623807011317, "learning_rate": 3.885968123233128e-06, "loss": 0.233, "step": 12499 }, { "epoch": 0.72, "grad_norm": 0.35990831564539366, "learning_rate": 3.884495653157611e-06, "loss": 0.292, "step": 12500 }, { "epoch": 0.72, "grad_norm": 0.67394425063061, "learning_rate": 3.883023394861742e-06, "loss": 0.3572, "step": 12501 }, { "epoch": 0.72, "grad_norm": 0.33215648058639724, "learning_rate": 3.881551348396515e-06, "loss": 0.1395, "step": 12502 }, { "epoch": 0.72, "grad_norm": 0.2825531167285477, "learning_rate": 3.880079513812901e-06, "loss": 0.2577, "step": 12503 }, { "epoch": 0.72, "grad_norm": 0.3434631773890578, "learning_rate": 3.878607891161871e-06, "loss": 0.2822, "step": 12504 }, { "epoch": 0.72, "grad_norm": 0.8360101546679122, "learning_rate": 3.8771364804943825e-06, "loss": 0.4662, "step": 12505 }, { "epoch": 0.72, "grad_norm": 0.31128367410087177, "learning_rate": 3.8756652818613975e-06, "loss": 0.2256, "step": 12506 }, { "epoch": 0.72, "grad_norm": 0.3889226384732306, "learning_rate": 3.8741942953138616e-06, "loss": 0.3229, "step": 12507 }, { "epoch": 0.72, "grad_norm": 1.4681783448097931, "learning_rate": 3.872723520902713e-06, "loss": 0.7554, "step": 12508 }, { "epoch": 0.72, "grad_norm": 0.2748888158937971, "learning_rate": 3.87125295867888e-06, "loss": 0.2006, "step": 12509 }, { "epoch": 0.72, "grad_norm": 0.27882312944220194, "learning_rate": 3.8697826086933e-06, "loss": 0.1742, "step": 12510 }, { "epoch": 0.72, "grad_norm": 0.3533427566146823, "learning_rate": 3.868312470996884e-06, "loss": 0.3008, "step": 12511 }, { "epoch": 0.72, "grad_norm": 0.3111571689105808, "learning_rate": 3.866842545640542e-06, "loss": 0.1881, "step": 12512 }, { "epoch": 0.72, "grad_norm": 0.6455308980072993, "learning_rate": 3.86537283267518e-06, "loss": 0.3791, "step": 12513 }, { "epoch": 0.72, "grad_norm": 0.4809467316313205, "learning_rate": 3.863903332151689e-06, "loss": 0.3647, "step": 12514 }, { "epoch": 0.72, "grad_norm": 0.40863296174877306, "learning_rate": 3.862434044120966e-06, "loss": 0.2974, "step": 12515 }, { "epoch": 0.72, "grad_norm": 0.20722922763128526, "learning_rate": 3.860964968633888e-06, "loss": 0.1662, "step": 12516 }, { "epoch": 0.72, "grad_norm": 0.5038681834454446, "learning_rate": 3.859496105741328e-06, "loss": 0.3576, "step": 12517 }, { "epoch": 0.72, "grad_norm": 0.3730892496342433, "learning_rate": 3.858027455494152e-06, "loss": 0.271, "step": 12518 }, { "epoch": 0.72, "grad_norm": 0.37789847744452215, "learning_rate": 3.856559017943223e-06, "loss": 0.2646, "step": 12519 }, { "epoch": 0.72, "grad_norm": 1.2120655893657206, "learning_rate": 3.8550907931393925e-06, "loss": 0.5642, "step": 12520 }, { "epoch": 0.72, "grad_norm": 0.32447720363858357, "learning_rate": 3.853622781133503e-06, "loss": 0.2848, "step": 12521 }, { "epoch": 0.72, "grad_norm": 0.3132306682340447, "learning_rate": 3.852154981976388e-06, "loss": 0.2106, "step": 12522 }, { "epoch": 0.72, "grad_norm": 0.43512513969447275, "learning_rate": 3.8506873957188865e-06, "loss": 0.3212, "step": 12523 }, { "epoch": 0.72, "grad_norm": 0.32854106565529423, "learning_rate": 3.849220022411815e-06, "loss": 0.261, "step": 12524 }, { "epoch": 0.72, "grad_norm": 0.4033956109292242, "learning_rate": 3.84775286210599e-06, "loss": 0.1076, "step": 12525 }, { "epoch": 0.72, "grad_norm": 0.4484468926497038, "learning_rate": 3.846285914852216e-06, "loss": 0.3562, "step": 12526 }, { "epoch": 0.72, "grad_norm": 0.2682409012322284, "learning_rate": 3.844819180701302e-06, "loss": 0.2414, "step": 12527 }, { "epoch": 0.72, "grad_norm": 1.2722056250509972, "learning_rate": 3.843352659704032e-06, "loss": 0.6141, "step": 12528 }, { "epoch": 0.72, "grad_norm": 0.4828470045708207, "learning_rate": 3.841886351911195e-06, "loss": 0.227, "step": 12529 }, { "epoch": 0.72, "grad_norm": 0.30225508543561413, "learning_rate": 3.840420257373565e-06, "loss": 0.245, "step": 12530 }, { "epoch": 0.72, "grad_norm": 0.29599291538165057, "learning_rate": 3.83895437614192e-06, "loss": 0.246, "step": 12531 }, { "epoch": 0.72, "grad_norm": 1.1752706306155982, "learning_rate": 3.837488708267021e-06, "loss": 0.3813, "step": 12532 }, { "epoch": 0.72, "grad_norm": 0.43554275734913983, "learning_rate": 3.836023253799621e-06, "loss": 0.2608, "step": 12533 }, { "epoch": 0.72, "grad_norm": 0.4608820204490836, "learning_rate": 3.834558012790469e-06, "loss": 0.347, "step": 12534 }, { "epoch": 0.72, "grad_norm": 0.2756554502113018, "learning_rate": 3.833092985290311e-06, "loss": 0.2264, "step": 12535 }, { "epoch": 0.72, "grad_norm": 0.39145909647593713, "learning_rate": 3.831628171349877e-06, "loss": 0.2618, "step": 12536 }, { "epoch": 0.72, "grad_norm": 0.49976557663593313, "learning_rate": 3.8301635710198946e-06, "loss": 0.2346, "step": 12537 }, { "epoch": 0.72, "grad_norm": 0.4174397744132169, "learning_rate": 3.828699184351079e-06, "loss": 0.2437, "step": 12538 }, { "epoch": 0.72, "grad_norm": 0.276496262939729, "learning_rate": 3.8272350113941494e-06, "loss": 0.2565, "step": 12539 }, { "epoch": 0.72, "grad_norm": 0.5865133389721945, "learning_rate": 3.825771052199805e-06, "loss": 0.4208, "step": 12540 }, { "epoch": 0.72, "grad_norm": 0.8880953147875467, "learning_rate": 3.824307306818745e-06, "loss": 0.4872, "step": 12541 }, { "epoch": 0.72, "grad_norm": 0.2268528697619566, "learning_rate": 3.822843775301656e-06, "loss": 0.1535, "step": 12542 }, { "epoch": 0.72, "grad_norm": 0.29010451586633673, "learning_rate": 3.821380457699217e-06, "loss": 0.2439, "step": 12543 }, { "epoch": 0.72, "grad_norm": 1.2260624615809157, "learning_rate": 3.819917354062113e-06, "loss": 0.6806, "step": 12544 }, { "epoch": 0.72, "grad_norm": 0.2824308471763668, "learning_rate": 3.8184544644410026e-06, "loss": 0.2042, "step": 12545 }, { "epoch": 0.72, "grad_norm": 0.776619086277665, "learning_rate": 3.816991788886551e-06, "loss": 0.4011, "step": 12546 }, { "epoch": 0.72, "grad_norm": 0.3530785405930903, "learning_rate": 3.815529327449402e-06, "loss": 0.312, "step": 12547 }, { "epoch": 0.72, "grad_norm": 0.32265673340370904, "learning_rate": 3.8140670801802114e-06, "loss": 0.1984, "step": 12548 }, { "epoch": 0.72, "grad_norm": 0.2522603406858445, "learning_rate": 3.8126050471296116e-06, "loss": 0.1534, "step": 12549 }, { "epoch": 0.72, "grad_norm": 0.3592744062620737, "learning_rate": 3.811143228348233e-06, "loss": 0.3229, "step": 12550 }, { "epoch": 0.72, "grad_norm": 0.33175709985865126, "learning_rate": 3.809681623886694e-06, "loss": 0.1751, "step": 12551 }, { "epoch": 0.72, "grad_norm": 0.5046438803543758, "learning_rate": 3.8082202337956187e-06, "loss": 0.3209, "step": 12552 }, { "epoch": 0.72, "grad_norm": 1.0417354951774573, "learning_rate": 3.80675905812561e-06, "loss": 0.524, "step": 12553 }, { "epoch": 0.72, "grad_norm": 0.3264891220161526, "learning_rate": 3.805298096927269e-06, "loss": 0.2089, "step": 12554 }, { "epoch": 0.72, "grad_norm": 0.2294114627721482, "learning_rate": 3.803837350251188e-06, "loss": 0.2131, "step": 12555 }, { "epoch": 0.72, "grad_norm": 1.2952230032564946, "learning_rate": 3.8023768181479493e-06, "loss": 0.8189, "step": 12556 }, { "epoch": 0.72, "grad_norm": 0.48200138614488336, "learning_rate": 3.800916500668139e-06, "loss": 0.3078, "step": 12557 }, { "epoch": 0.72, "grad_norm": 0.2545052188435359, "learning_rate": 3.7994563978623243e-06, "loss": 0.2414, "step": 12558 }, { "epoch": 0.72, "grad_norm": 1.1399172653767695, "learning_rate": 3.7979965097810667e-06, "loss": 0.5805, "step": 12559 }, { "epoch": 0.72, "grad_norm": 0.4024592476572722, "learning_rate": 3.7965368364749244e-06, "loss": 0.2655, "step": 12560 }, { "epoch": 0.72, "grad_norm": 0.2189980848773631, "learning_rate": 3.7950773779944437e-06, "loss": 0.1199, "step": 12561 }, { "epoch": 0.72, "grad_norm": 0.31760219818640645, "learning_rate": 3.793618134390168e-06, "loss": 0.3045, "step": 12562 }, { "epoch": 0.72, "grad_norm": 0.3990174933506131, "learning_rate": 3.792159105712625e-06, "loss": 0.2764, "step": 12563 }, { "epoch": 0.72, "grad_norm": 0.49668825548514706, "learning_rate": 3.7907002920123482e-06, "loss": 0.2711, "step": 12564 }, { "epoch": 0.72, "grad_norm": 1.2066436895640726, "learning_rate": 3.7892416933398534e-06, "loss": 0.5866, "step": 12565 }, { "epoch": 0.72, "grad_norm": 0.325699463496659, "learning_rate": 3.7877833097456527e-06, "loss": 0.259, "step": 12566 }, { "epoch": 0.72, "grad_norm": 0.2975927184507548, "learning_rate": 3.786325141280248e-06, "loss": 0.2501, "step": 12567 }, { "epoch": 0.72, "grad_norm": 0.43994328199667015, "learning_rate": 3.7848671879941334e-06, "loss": 0.2036, "step": 12568 }, { "epoch": 0.72, "grad_norm": 0.6533205368181108, "learning_rate": 3.783409449937804e-06, "loss": 0.3164, "step": 12569 }, { "epoch": 0.72, "grad_norm": 0.3752405058155906, "learning_rate": 3.7819519271617377e-06, "loss": 0.3023, "step": 12570 }, { "epoch": 0.72, "grad_norm": 0.3617519561068833, "learning_rate": 3.7804946197164096e-06, "loss": 0.2507, "step": 12571 }, { "epoch": 0.72, "grad_norm": 0.594735705756224, "learning_rate": 3.779037527652282e-06, "loss": 0.3228, "step": 12572 }, { "epoch": 0.72, "grad_norm": 0.2359376307305643, "learning_rate": 3.77758065101982e-06, "loss": 0.1857, "step": 12573 }, { "epoch": 0.72, "grad_norm": 0.35542427665950116, "learning_rate": 3.7761239898694724e-06, "loss": 0.2582, "step": 12574 }, { "epoch": 0.72, "grad_norm": 0.5222486191552719, "learning_rate": 3.774667544251683e-06, "loss": 0.309, "step": 12575 }, { "epoch": 0.72, "grad_norm": 0.41862420835222075, "learning_rate": 3.773211314216887e-06, "loss": 0.3407, "step": 12576 }, { "epoch": 0.72, "grad_norm": 1.3031113013277529, "learning_rate": 3.7717552998155184e-06, "loss": 0.3212, "step": 12577 }, { "epoch": 0.72, "grad_norm": 0.3062819204901965, "learning_rate": 3.770299501097995e-06, "loss": 0.2475, "step": 12578 }, { "epoch": 0.72, "grad_norm": 0.29069925258267515, "learning_rate": 3.768843918114733e-06, "loss": 0.2404, "step": 12579 }, { "epoch": 0.72, "grad_norm": 0.4692780214015884, "learning_rate": 3.767388550916138e-06, "loss": 0.2927, "step": 12580 }, { "epoch": 0.72, "grad_norm": 0.3419993754544002, "learning_rate": 3.7659333995526047e-06, "loss": 0.2067, "step": 12581 }, { "epoch": 0.72, "grad_norm": 0.5372210817123204, "learning_rate": 3.7644784640745346e-06, "loss": 0.3642, "step": 12582 }, { "epoch": 0.72, "grad_norm": 0.3720534268613179, "learning_rate": 3.763023744532307e-06, "loss": 0.3042, "step": 12583 }, { "epoch": 0.72, "grad_norm": 0.3256481791308857, "learning_rate": 3.761569240976298e-06, "loss": 0.1504, "step": 12584 }, { "epoch": 0.72, "grad_norm": 0.4828728938480592, "learning_rate": 3.7601149534568757e-06, "loss": 0.3779, "step": 12585 }, { "epoch": 0.72, "grad_norm": 0.313460913744006, "learning_rate": 3.7586608820244076e-06, "loss": 0.3181, "step": 12586 }, { "epoch": 0.72, "grad_norm": 0.18315300828709036, "learning_rate": 3.7572070267292438e-06, "loss": 0.0854, "step": 12587 }, { "epoch": 0.72, "grad_norm": 0.33954649548166543, "learning_rate": 3.7557533876217325e-06, "loss": 0.2789, "step": 12588 }, { "epoch": 0.72, "grad_norm": 1.1214543359616003, "learning_rate": 3.7542999647522094e-06, "loss": 0.5101, "step": 12589 }, { "epoch": 0.72, "grad_norm": 0.3266141732037362, "learning_rate": 3.7528467581710137e-06, "loss": 0.2917, "step": 12590 }, { "epoch": 0.72, "grad_norm": 0.3392692953249234, "learning_rate": 3.7513937679284664e-06, "loss": 0.2326, "step": 12591 }, { "epoch": 0.72, "grad_norm": 0.704023761433763, "learning_rate": 3.749940994074884e-06, "loss": 0.4103, "step": 12592 }, { "epoch": 0.72, "grad_norm": 0.3830258897079524, "learning_rate": 3.7484884366605758e-06, "loss": 0.2555, "step": 12593 }, { "epoch": 0.72, "grad_norm": 0.24571652256923193, "learning_rate": 3.7470360957358442e-06, "loss": 0.2247, "step": 12594 }, { "epoch": 0.72, "grad_norm": 0.4350436646267506, "learning_rate": 3.7455839713509844e-06, "loss": 0.2822, "step": 12595 }, { "epoch": 0.72, "grad_norm": 0.5916263926081792, "learning_rate": 3.7441320635562828e-06, "loss": 0.3318, "step": 12596 }, { "epoch": 0.72, "grad_norm": 0.3253913954279248, "learning_rate": 3.7426803724020143e-06, "loss": 0.2366, "step": 12597 }, { "epoch": 0.72, "grad_norm": 0.3462154083645354, "learning_rate": 3.7412288979384604e-06, "loss": 0.2957, "step": 12598 }, { "epoch": 0.72, "grad_norm": 0.36298865887781684, "learning_rate": 3.739777640215879e-06, "loss": 0.2289, "step": 12599 }, { "epoch": 0.72, "grad_norm": 0.37829378006780723, "learning_rate": 3.7383265992845297e-06, "loss": 0.2398, "step": 12600 }, { "epoch": 0.72, "grad_norm": 0.4908548396772708, "learning_rate": 3.736875775194657e-06, "loss": 0.2657, "step": 12601 }, { "epoch": 0.72, "grad_norm": 0.34330907741617966, "learning_rate": 3.7354251679965103e-06, "loss": 0.2757, "step": 12602 }, { "epoch": 0.72, "grad_norm": 0.3923379452795223, "learning_rate": 3.7339747777403212e-06, "loss": 0.2949, "step": 12603 }, { "epoch": 0.72, "grad_norm": 0.8516521788185919, "learning_rate": 3.7325246044763164e-06, "loss": 0.2947, "step": 12604 }, { "epoch": 0.72, "grad_norm": 1.2060849155640518, "learning_rate": 3.7310746482547143e-06, "loss": 0.7756, "step": 12605 }, { "epoch": 0.72, "grad_norm": 0.2613915751889735, "learning_rate": 3.729624909125724e-06, "loss": 0.2592, "step": 12606 }, { "epoch": 0.72, "grad_norm": 0.25492878460525664, "learning_rate": 3.7281753871395575e-06, "loss": 0.1748, "step": 12607 }, { "epoch": 0.72, "grad_norm": 0.6764847935708693, "learning_rate": 3.726726082346408e-06, "loss": 0.3877, "step": 12608 }, { "epoch": 0.72, "grad_norm": 0.3204321858680907, "learning_rate": 3.725276994796463e-06, "loss": 0.2423, "step": 12609 }, { "epoch": 0.72, "grad_norm": 0.3596000242651779, "learning_rate": 3.7238281245399032e-06, "loss": 0.2481, "step": 12610 }, { "epoch": 0.72, "grad_norm": 0.7745774779469131, "learning_rate": 3.72237947162691e-06, "loss": 0.5167, "step": 12611 }, { "epoch": 0.72, "grad_norm": 0.35232144352690364, "learning_rate": 3.7209310361076445e-06, "loss": 0.2963, "step": 12612 }, { "epoch": 0.72, "grad_norm": 0.24440360054586446, "learning_rate": 3.719482818032267e-06, "loss": 0.1043, "step": 12613 }, { "epoch": 0.72, "grad_norm": 0.3260683902895594, "learning_rate": 3.7180348174509275e-06, "loss": 0.2785, "step": 12614 }, { "epoch": 0.72, "grad_norm": 0.34981957635207345, "learning_rate": 3.7165870344137746e-06, "loss": 0.2824, "step": 12615 }, { "epoch": 0.72, "grad_norm": 1.22822645248553, "learning_rate": 3.715139468970942e-06, "loss": 0.6816, "step": 12616 }, { "epoch": 0.72, "grad_norm": 0.3974630087585372, "learning_rate": 3.7136921211725595e-06, "loss": 0.2532, "step": 12617 }, { "epoch": 0.72, "grad_norm": 0.34005021932049256, "learning_rate": 3.7122449910687495e-06, "loss": 0.2688, "step": 12618 }, { "epoch": 0.73, "grad_norm": 0.3135211846099563, "learning_rate": 3.710798078709621e-06, "loss": 0.2613, "step": 12619 }, { "epoch": 0.73, "grad_norm": 0.3350723208691652, "learning_rate": 3.7093513841452876e-06, "loss": 0.1817, "step": 12620 }, { "epoch": 0.73, "grad_norm": 0.3935505259433406, "learning_rate": 3.7079049074258465e-06, "loss": 0.3123, "step": 12621 }, { "epoch": 0.73, "grad_norm": 0.36173769233262987, "learning_rate": 3.7064586486013865e-06, "loss": 0.3023, "step": 12622 }, { "epoch": 0.73, "grad_norm": 1.1886936765534302, "learning_rate": 3.7050126077219908e-06, "loss": 0.4164, "step": 12623 }, { "epoch": 0.73, "grad_norm": 0.31195240262411766, "learning_rate": 3.70356678483774e-06, "loss": 0.2534, "step": 12624 }, { "epoch": 0.73, "grad_norm": 0.43851398106112804, "learning_rate": 3.702121179998701e-06, "loss": 0.3313, "step": 12625 }, { "epoch": 0.73, "grad_norm": 0.3608270575366872, "learning_rate": 3.7006757932549355e-06, "loss": 0.2095, "step": 12626 }, { "epoch": 0.73, "grad_norm": 0.2572942190550343, "learning_rate": 3.6992306246564923e-06, "loss": 0.2211, "step": 12627 }, { "epoch": 0.73, "grad_norm": 1.2467478374867251, "learning_rate": 3.697785674253428e-06, "loss": 0.6807, "step": 12628 }, { "epoch": 0.73, "grad_norm": 0.4778900262033747, "learning_rate": 3.696340942095772e-06, "loss": 0.3873, "step": 12629 }, { "epoch": 0.73, "grad_norm": 0.2611211344507252, "learning_rate": 3.6948964282335576e-06, "loss": 0.2184, "step": 12630 }, { "epoch": 0.73, "grad_norm": 0.6519226464029577, "learning_rate": 3.693452132716806e-06, "loss": 0.3749, "step": 12631 }, { "epoch": 0.73, "grad_norm": 0.37288735997878936, "learning_rate": 3.6920080555955396e-06, "loss": 0.2098, "step": 12632 }, { "epoch": 0.73, "grad_norm": 0.2948673783502911, "learning_rate": 3.6905641969197626e-06, "loss": 0.1943, "step": 12633 }, { "epoch": 0.73, "grad_norm": 0.34946669359400256, "learning_rate": 3.689120556739475e-06, "loss": 0.3015, "step": 12634 }, { "epoch": 0.73, "grad_norm": 1.262967952928276, "learning_rate": 3.687677135104669e-06, "loss": 0.7467, "step": 12635 }, { "epoch": 0.73, "grad_norm": 0.2994023778760764, "learning_rate": 3.6862339320653353e-06, "loss": 0.1952, "step": 12636 }, { "epoch": 0.73, "grad_norm": 0.6864311781365865, "learning_rate": 3.6847909476714495e-06, "loss": 0.3597, "step": 12637 }, { "epoch": 0.73, "grad_norm": 0.2598362738797895, "learning_rate": 3.683348181972981e-06, "loss": 0.2505, "step": 12638 }, { "epoch": 0.73, "grad_norm": 0.3127138752691672, "learning_rate": 3.68190563501989e-06, "loss": 0.1789, "step": 12639 }, { "epoch": 0.73, "grad_norm": 0.47588630014745503, "learning_rate": 3.6804633068621388e-06, "loss": 0.3032, "step": 12640 }, { "epoch": 0.73, "grad_norm": 0.34987437967800145, "learning_rate": 3.6790211975496714e-06, "loss": 0.3093, "step": 12641 }, { "epoch": 0.73, "grad_norm": 0.3070022736126839, "learning_rate": 3.6775793071324283e-06, "loss": 0.2453, "step": 12642 }, { "epoch": 0.73, "grad_norm": 0.7542534218968645, "learning_rate": 3.6761376356603385e-06, "loss": 0.3064, "step": 12643 }, { "epoch": 0.73, "grad_norm": 0.39024087041958594, "learning_rate": 3.674696183183334e-06, "loss": 0.225, "step": 12644 }, { "epoch": 0.73, "grad_norm": 0.2456900281354834, "learning_rate": 3.6732549497513292e-06, "loss": 0.2106, "step": 12645 }, { "epoch": 0.73, "grad_norm": 0.34175475930678456, "learning_rate": 3.6718139354142326e-06, "loss": 0.2592, "step": 12646 }, { "epoch": 0.73, "grad_norm": 0.8023321846699596, "learning_rate": 3.670373140221947e-06, "loss": 0.452, "step": 12647 }, { "epoch": 0.73, "grad_norm": 0.3495489226786334, "learning_rate": 3.6689325642243643e-06, "loss": 0.2793, "step": 12648 }, { "epoch": 0.73, "grad_norm": 0.7430728352602514, "learning_rate": 3.6674922074713783e-06, "loss": 0.3452, "step": 12649 }, { "epoch": 0.73, "grad_norm": 0.3347746450775112, "learning_rate": 3.6660520700128642e-06, "loss": 0.3015, "step": 12650 }, { "epoch": 0.73, "grad_norm": 0.24771906724986534, "learning_rate": 3.6646121518986954e-06, "loss": 0.2161, "step": 12651 }, { "epoch": 0.73, "grad_norm": 0.3430615488225278, "learning_rate": 3.6631724531787314e-06, "loss": 0.113, "step": 12652 }, { "epoch": 0.73, "grad_norm": 0.38006058780149277, "learning_rate": 3.6617329739028373e-06, "loss": 0.2922, "step": 12653 }, { "epoch": 0.73, "grad_norm": 0.3295860270105801, "learning_rate": 3.660293714120856e-06, "loss": 0.2493, "step": 12654 }, { "epoch": 0.73, "grad_norm": 0.8069415336157884, "learning_rate": 3.6588546738826325e-06, "loss": 0.4577, "step": 12655 }, { "epoch": 0.73, "grad_norm": 0.44498482845603343, "learning_rate": 3.6574158532379944e-06, "loss": 0.1469, "step": 12656 }, { "epoch": 0.73, "grad_norm": 0.40957331326092905, "learning_rate": 3.6559772522367765e-06, "loss": 0.3075, "step": 12657 }, { "epoch": 0.73, "grad_norm": 0.24643548781053554, "learning_rate": 3.6545388709287933e-06, "loss": 0.2624, "step": 12658 }, { "epoch": 0.73, "grad_norm": 0.7845058080505076, "learning_rate": 3.653100709363856e-06, "loss": 0.3092, "step": 12659 }, { "epoch": 0.73, "grad_norm": 0.38306157889681686, "learning_rate": 3.651662767591768e-06, "loss": 0.2797, "step": 12660 }, { "epoch": 0.73, "grad_norm": 0.39024767404061234, "learning_rate": 3.650225045662322e-06, "loss": 0.3401, "step": 12661 }, { "epoch": 0.73, "grad_norm": 0.46207148468081677, "learning_rate": 3.6487875436253173e-06, "loss": 0.2373, "step": 12662 }, { "epoch": 0.73, "grad_norm": 0.3593077845975231, "learning_rate": 3.6473502615305233e-06, "loss": 0.2678, "step": 12663 }, { "epoch": 0.73, "grad_norm": 0.3862979083597893, "learning_rate": 3.645913199427713e-06, "loss": 0.2191, "step": 12664 }, { "epoch": 0.73, "grad_norm": 0.34427342537308164, "learning_rate": 3.6444763573666586e-06, "loss": 0.2569, "step": 12665 }, { "epoch": 0.73, "grad_norm": 0.3287239171784687, "learning_rate": 3.643039735397115e-06, "loss": 0.2551, "step": 12666 }, { "epoch": 0.73, "grad_norm": 1.2073091689438713, "learning_rate": 3.6416033335688306e-06, "loss": 0.8047, "step": 12667 }, { "epoch": 0.73, "grad_norm": 1.14833529116643, "learning_rate": 3.640167151931547e-06, "loss": 0.4932, "step": 12668 }, { "epoch": 0.73, "grad_norm": 0.2683170080436344, "learning_rate": 3.6387311905350053e-06, "loss": 0.2103, "step": 12669 }, { "epoch": 0.73, "grad_norm": 0.27189369694643967, "learning_rate": 3.637295449428928e-06, "loss": 0.2041, "step": 12670 }, { "epoch": 0.73, "grad_norm": 0.5783715207547905, "learning_rate": 3.6358599286630367e-06, "loss": 0.3914, "step": 12671 }, { "epoch": 0.73, "grad_norm": 0.2876284884270302, "learning_rate": 3.634424628287041e-06, "loss": 0.191, "step": 12672 }, { "epoch": 0.73, "grad_norm": 0.47055640385816155, "learning_rate": 3.632989548350645e-06, "loss": 0.3705, "step": 12673 }, { "epoch": 0.73, "grad_norm": 0.526309013570079, "learning_rate": 3.631554688903549e-06, "loss": 0.3535, "step": 12674 }, { "epoch": 0.73, "grad_norm": 0.3751698398541402, "learning_rate": 3.6301200499954416e-06, "loss": 0.1871, "step": 12675 }, { "epoch": 0.73, "grad_norm": 0.3542442085533244, "learning_rate": 3.6286856316760023e-06, "loss": 0.2631, "step": 12676 }, { "epoch": 0.73, "grad_norm": 0.273470471007027, "learning_rate": 3.6272514339949015e-06, "loss": 0.2747, "step": 12677 }, { "epoch": 0.73, "grad_norm": 0.5734636947485215, "learning_rate": 3.6258174570018133e-06, "loss": 0.3421, "step": 12678 }, { "epoch": 0.73, "grad_norm": 0.3687365959759761, "learning_rate": 3.6243837007463933e-06, "loss": 0.262, "step": 12679 }, { "epoch": 0.73, "grad_norm": 1.2296519963082793, "learning_rate": 3.6229501652782904e-06, "loss": 0.5555, "step": 12680 }, { "epoch": 0.73, "grad_norm": 0.31969153444544296, "learning_rate": 3.6215168506471466e-06, "loss": 0.2874, "step": 12681 }, { "epoch": 0.73, "grad_norm": 0.4076483909163558, "learning_rate": 3.6200837569026036e-06, "loss": 0.2688, "step": 12682 }, { "epoch": 0.73, "grad_norm": 0.41300092423729834, "learning_rate": 3.618650884094285e-06, "loss": 0.2668, "step": 12683 }, { "epoch": 0.73, "grad_norm": 0.3944044972280825, "learning_rate": 3.617218232271812e-06, "loss": 0.2958, "step": 12684 }, { "epoch": 0.73, "grad_norm": 0.25016697584075415, "learning_rate": 3.615785801484797e-06, "loss": 0.1869, "step": 12685 }, { "epoch": 0.73, "grad_norm": 0.4950885923845611, "learning_rate": 3.6143535917828422e-06, "loss": 0.3181, "step": 12686 }, { "epoch": 0.73, "grad_norm": 0.369686886187919, "learning_rate": 3.612921603215551e-06, "loss": 0.263, "step": 12687 }, { "epoch": 0.73, "grad_norm": 0.5134965413780258, "learning_rate": 3.6114898358325103e-06, "loss": 0.2762, "step": 12688 }, { "epoch": 0.73, "grad_norm": 0.3598094168898819, "learning_rate": 3.6100582896833012e-06, "loss": 0.3195, "step": 12689 }, { "epoch": 0.73, "grad_norm": 0.5903895067372618, "learning_rate": 3.6086269648174965e-06, "loss": 0.3883, "step": 12690 }, { "epoch": 0.73, "grad_norm": 0.285502437384372, "learning_rate": 3.607195861284668e-06, "loss": 0.2375, "step": 12691 }, { "epoch": 0.73, "grad_norm": 0.29724734450311907, "learning_rate": 3.605764979134372e-06, "loss": 0.1829, "step": 12692 }, { "epoch": 0.73, "grad_norm": 0.38014274107318985, "learning_rate": 3.6043343184161593e-06, "loss": 0.2531, "step": 12693 }, { "epoch": 0.73, "grad_norm": 0.3741144539894074, "learning_rate": 3.602903879179571e-06, "loss": 0.3121, "step": 12694 }, { "epoch": 0.73, "grad_norm": 0.9836418738839882, "learning_rate": 3.601473661474154e-06, "loss": 0.3773, "step": 12695 }, { "epoch": 0.73, "grad_norm": 0.5388543502057055, "learning_rate": 3.600043665349424e-06, "loss": 0.33, "step": 12696 }, { "epoch": 0.73, "grad_norm": 0.2299485082398649, "learning_rate": 3.5986138908549073e-06, "loss": 0.2319, "step": 12697 }, { "epoch": 0.73, "grad_norm": 0.4261332556361552, "learning_rate": 3.597184338040114e-06, "loss": 0.166, "step": 12698 }, { "epoch": 0.73, "grad_norm": 0.4785686820285072, "learning_rate": 3.595755006954553e-06, "loss": 0.3013, "step": 12699 }, { "epoch": 0.73, "grad_norm": 0.40973404733494684, "learning_rate": 3.5943258976477226e-06, "loss": 0.2931, "step": 12700 }, { "epoch": 0.73, "grad_norm": 0.32627133475863995, "learning_rate": 3.5928970101691096e-06, "loss": 0.2481, "step": 12701 }, { "epoch": 0.73, "grad_norm": 0.5170098162809887, "learning_rate": 3.5914683445681954e-06, "loss": 0.3466, "step": 12702 }, { "epoch": 0.73, "grad_norm": 0.287299691816571, "learning_rate": 3.59003990089446e-06, "loss": 0.2333, "step": 12703 }, { "epoch": 0.73, "grad_norm": 0.3970243877751687, "learning_rate": 3.588611679197366e-06, "loss": 0.2657, "step": 12704 }, { "epoch": 0.73, "grad_norm": 0.2731068096527432, "learning_rate": 3.587183679526375e-06, "loss": 0.2241, "step": 12705 }, { "epoch": 0.73, "grad_norm": 0.5270425973716926, "learning_rate": 3.585755901930934e-06, "loss": 0.3451, "step": 12706 }, { "epoch": 0.73, "grad_norm": 1.2638374003320183, "learning_rate": 3.5843283464604927e-06, "loss": 0.7912, "step": 12707 }, { "epoch": 0.73, "grad_norm": 0.4955313922411486, "learning_rate": 3.582901013164486e-06, "loss": 0.1773, "step": 12708 }, { "epoch": 0.73, "grad_norm": 0.25806863051136353, "learning_rate": 3.5814739020923405e-06, "loss": 0.2594, "step": 12709 }, { "epoch": 0.73, "grad_norm": 0.2920727776909075, "learning_rate": 3.5800470132934785e-06, "loss": 0.2262, "step": 12710 }, { "epoch": 0.73, "grad_norm": 0.7188461489974537, "learning_rate": 3.5786203468173087e-06, "loss": 0.2492, "step": 12711 }, { "epoch": 0.73, "grad_norm": 0.3632276005566369, "learning_rate": 3.5771939027132428e-06, "loss": 0.2854, "step": 12712 }, { "epoch": 0.73, "grad_norm": 0.3989870298151256, "learning_rate": 3.5757676810306775e-06, "loss": 0.3184, "step": 12713 }, { "epoch": 0.73, "grad_norm": 0.3194280116657528, "learning_rate": 3.5743416818189993e-06, "loss": 0.1514, "step": 12714 }, { "epoch": 0.73, "grad_norm": 0.353739585397453, "learning_rate": 3.5729159051275895e-06, "loss": 0.2931, "step": 12715 }, { "epoch": 0.73, "grad_norm": 0.6406462606108349, "learning_rate": 3.5714903510058296e-06, "loss": 0.3994, "step": 12716 }, { "epoch": 0.73, "grad_norm": 0.2038712219236113, "learning_rate": 3.570065019503082e-06, "loss": 0.1946, "step": 12717 }, { "epoch": 0.73, "grad_norm": 0.3350444004903638, "learning_rate": 3.5686399106687064e-06, "loss": 0.2076, "step": 12718 }, { "epoch": 0.73, "grad_norm": 1.097246714693747, "learning_rate": 3.567215024552051e-06, "loss": 0.6239, "step": 12719 }, { "epoch": 0.73, "grad_norm": 0.46965475632734666, "learning_rate": 3.5657903612024658e-06, "loss": 0.3749, "step": 12720 }, { "epoch": 0.73, "grad_norm": 0.263875280635042, "learning_rate": 3.5643659206692837e-06, "loss": 0.2184, "step": 12721 }, { "epoch": 0.73, "grad_norm": 0.5866450994990379, "learning_rate": 3.562941703001832e-06, "loss": 0.3771, "step": 12722 }, { "epoch": 0.73, "grad_norm": 0.19861196590825053, "learning_rate": 3.5615177082494334e-06, "loss": 0.1384, "step": 12723 }, { "epoch": 0.73, "grad_norm": 0.3572080133883536, "learning_rate": 3.5600939364613963e-06, "loss": 0.217, "step": 12724 }, { "epoch": 0.73, "grad_norm": 0.41415889915049825, "learning_rate": 3.5586703876870333e-06, "loss": 0.3052, "step": 12725 }, { "epoch": 0.73, "grad_norm": 0.5154925408780823, "learning_rate": 3.557247061975636e-06, "loss": 0.3153, "step": 12726 }, { "epoch": 0.73, "grad_norm": 0.34276841548401615, "learning_rate": 3.5558239593764978e-06, "loss": 0.2438, "step": 12727 }, { "epoch": 0.73, "grad_norm": 0.42795797835213084, "learning_rate": 3.554401079938894e-06, "loss": 0.329, "step": 12728 }, { "epoch": 0.73, "grad_norm": 0.234860424746158, "learning_rate": 3.552978423712111e-06, "loss": 0.1943, "step": 12729 }, { "epoch": 0.73, "grad_norm": 0.36464014173959514, "learning_rate": 3.5515559907454045e-06, "loss": 0.2748, "step": 12730 }, { "epoch": 0.73, "grad_norm": 0.9060753945623665, "learning_rate": 3.550133781088033e-06, "loss": 0.5051, "step": 12731 }, { "epoch": 0.73, "grad_norm": 0.3529980523946801, "learning_rate": 3.5487117947892558e-06, "loss": 0.2867, "step": 12732 }, { "epoch": 0.73, "grad_norm": 0.3674842048475259, "learning_rate": 3.5472900318983105e-06, "loss": 0.2548, "step": 12733 }, { "epoch": 0.73, "grad_norm": 0.8401740489172301, "learning_rate": 3.545868492464435e-06, "loss": 0.3047, "step": 12734 }, { "epoch": 0.73, "grad_norm": 0.27141746267388867, "learning_rate": 3.544447176536855e-06, "loss": 0.1553, "step": 12735 }, { "epoch": 0.73, "grad_norm": 0.32057223006156993, "learning_rate": 3.543026084164789e-06, "loss": 0.2634, "step": 12736 }, { "epoch": 0.73, "grad_norm": 0.3573243078189275, "learning_rate": 3.5416052153974546e-06, "loss": 0.2645, "step": 12737 }, { "epoch": 0.73, "grad_norm": 0.6295162887590943, "learning_rate": 3.5401845702840543e-06, "loss": 0.4241, "step": 12738 }, { "epoch": 0.73, "grad_norm": 0.38271116549580053, "learning_rate": 3.5387641488737855e-06, "loss": 0.2994, "step": 12739 }, { "epoch": 0.73, "grad_norm": 0.44089101642573486, "learning_rate": 3.5373439512158315e-06, "loss": 0.2251, "step": 12740 }, { "epoch": 0.73, "grad_norm": 0.2504442391142626, "learning_rate": 3.5359239773593833e-06, "loss": 0.2017, "step": 12741 }, { "epoch": 0.73, "grad_norm": 0.3315826635141327, "learning_rate": 3.534504227353609e-06, "loss": 0.2718, "step": 12742 }, { "epoch": 0.73, "grad_norm": 1.0141255217066432, "learning_rate": 3.5330847012476754e-06, "loss": 0.6677, "step": 12743 }, { "epoch": 0.73, "grad_norm": 0.2908904563347626, "learning_rate": 3.5316653990907367e-06, "loss": 0.2313, "step": 12744 }, { "epoch": 0.73, "grad_norm": 0.35460691259874044, "learning_rate": 3.5302463209319514e-06, "loss": 0.317, "step": 12745 }, { "epoch": 0.73, "grad_norm": 0.6951267542723861, "learning_rate": 3.5288274668204568e-06, "loss": 0.4621, "step": 12746 }, { "epoch": 0.73, "grad_norm": 0.15127146330290486, "learning_rate": 3.527408836805389e-06, "loss": 0.0716, "step": 12747 }, { "epoch": 0.73, "grad_norm": 0.297568465211192, "learning_rate": 3.525990430935876e-06, "loss": 0.2372, "step": 12748 }, { "epoch": 0.73, "grad_norm": 0.321758839001798, "learning_rate": 3.524572249261031e-06, "loss": 0.3338, "step": 12749 }, { "epoch": 0.73, "grad_norm": 0.5864810260206559, "learning_rate": 3.5231542918299753e-06, "loss": 0.3112, "step": 12750 }, { "epoch": 0.73, "grad_norm": 0.2883308570588384, "learning_rate": 3.5217365586918073e-06, "loss": 0.2406, "step": 12751 }, { "epoch": 0.73, "grad_norm": 0.47306256806256525, "learning_rate": 3.5203190498956242e-06, "loss": 0.3095, "step": 12752 }, { "epoch": 0.73, "grad_norm": 0.49735107729836175, "learning_rate": 3.518901765490509e-06, "loss": 0.347, "step": 12753 }, { "epoch": 0.73, "grad_norm": 0.17686563294193186, "learning_rate": 3.517484705525551e-06, "loss": 0.1394, "step": 12754 }, { "epoch": 0.73, "grad_norm": 0.8933579186696481, "learning_rate": 3.5160678700498197e-06, "loss": 0.5273, "step": 12755 }, { "epoch": 0.73, "grad_norm": 0.3495559430890266, "learning_rate": 3.5146512591123783e-06, "loss": 0.3032, "step": 12756 }, { "epoch": 0.73, "grad_norm": 0.28892578502445293, "learning_rate": 3.513234872762282e-06, "loss": 0.1972, "step": 12757 }, { "epoch": 0.73, "grad_norm": 1.0657258271815264, "learning_rate": 3.511818711048587e-06, "loss": 0.5283, "step": 12758 }, { "epoch": 0.73, "grad_norm": 0.2885840837702499, "learning_rate": 3.5104027740203305e-06, "loss": 0.1883, "step": 12759 }, { "epoch": 0.73, "grad_norm": 0.2869873138895025, "learning_rate": 3.5089870617265465e-06, "loss": 0.1858, "step": 12760 }, { "epoch": 0.73, "grad_norm": 0.352194227970117, "learning_rate": 3.5075715742162586e-06, "loss": 0.3088, "step": 12761 }, { "epoch": 0.73, "grad_norm": 0.7226569774131096, "learning_rate": 3.506156311538491e-06, "loss": 0.4724, "step": 12762 }, { "epoch": 0.73, "grad_norm": 0.29774972113115467, "learning_rate": 3.504741273742254e-06, "loss": 0.194, "step": 12763 }, { "epoch": 0.73, "grad_norm": 0.33947635583751484, "learning_rate": 3.503326460876545e-06, "loss": 0.2951, "step": 12764 }, { "epoch": 0.73, "grad_norm": 1.1678877911120926, "learning_rate": 3.5019118729903566e-06, "loss": 0.4789, "step": 12765 }, { "epoch": 0.73, "grad_norm": 0.3045109971875889, "learning_rate": 3.5004975101326854e-06, "loss": 0.2444, "step": 12766 }, { "epoch": 0.73, "grad_norm": 0.5454583862261513, "learning_rate": 3.4990833723525054e-06, "loss": 0.2266, "step": 12767 }, { "epoch": 0.73, "grad_norm": 0.3329777623579333, "learning_rate": 3.497669459698788e-06, "loss": 0.3025, "step": 12768 }, { "epoch": 0.73, "grad_norm": 0.3145139009440519, "learning_rate": 3.496255772220495e-06, "loss": 0.2587, "step": 12769 }, { "epoch": 0.73, "grad_norm": 0.23984063415861187, "learning_rate": 3.4948423099665883e-06, "loss": 0.1022, "step": 12770 }, { "epoch": 0.73, "grad_norm": 0.8220955217574739, "learning_rate": 3.493429072986013e-06, "loss": 0.402, "step": 12771 }, { "epoch": 0.73, "grad_norm": 0.2833979664828243, "learning_rate": 3.492016061327709e-06, "loss": 0.249, "step": 12772 }, { "epoch": 0.73, "grad_norm": 0.4655948518759838, "learning_rate": 3.490603275040605e-06, "loss": 0.2649, "step": 12773 }, { "epoch": 0.73, "grad_norm": 0.39936457950351845, "learning_rate": 3.4891907141736324e-06, "loss": 0.289, "step": 12774 }, { "epoch": 0.73, "grad_norm": 0.2320835341969602, "learning_rate": 3.487778378775707e-06, "loss": 0.1958, "step": 12775 }, { "epoch": 0.73, "grad_norm": 0.3366992622728923, "learning_rate": 3.4863662688957355e-06, "loss": 0.2385, "step": 12776 }, { "epoch": 0.73, "grad_norm": 0.8735030497179362, "learning_rate": 3.4849543845826195e-06, "loss": 0.373, "step": 12777 }, { "epoch": 0.73, "grad_norm": 0.3820901509586576, "learning_rate": 3.4835427258852507e-06, "loss": 0.2542, "step": 12778 }, { "epoch": 0.73, "grad_norm": 0.4889596251920278, "learning_rate": 3.4821312928525197e-06, "loss": 0.3594, "step": 12779 }, { "epoch": 0.73, "grad_norm": 0.3395884327853301, "learning_rate": 3.4807200855333024e-06, "loss": 0.2753, "step": 12780 }, { "epoch": 0.73, "grad_norm": 0.41027269658708754, "learning_rate": 3.479309103976467e-06, "loss": 0.2878, "step": 12781 }, { "epoch": 0.73, "grad_norm": 0.2492148324473185, "learning_rate": 3.4778983482308746e-06, "loss": 0.1902, "step": 12782 }, { "epoch": 0.73, "grad_norm": 0.7729463052691775, "learning_rate": 3.4764878183453855e-06, "loss": 0.2883, "step": 12783 }, { "epoch": 0.73, "grad_norm": 0.2636022559064381, "learning_rate": 3.475077514368842e-06, "loss": 0.2694, "step": 12784 }, { "epoch": 0.73, "grad_norm": 0.4722767331960266, "learning_rate": 3.4736674363500846e-06, "loss": 0.3413, "step": 12785 }, { "epoch": 0.73, "grad_norm": 0.9849108722772745, "learning_rate": 3.472257584337939e-06, "loss": 0.527, "step": 12786 }, { "epoch": 0.73, "grad_norm": 0.2514472326522464, "learning_rate": 3.470847958381236e-06, "loss": 0.2084, "step": 12787 }, { "epoch": 0.73, "grad_norm": 0.2962379312373279, "learning_rate": 3.469438558528787e-06, "loss": 0.2507, "step": 12788 }, { "epoch": 0.73, "grad_norm": 0.7197590638637732, "learning_rate": 3.468029384829401e-06, "loss": 0.2857, "step": 12789 }, { "epoch": 0.73, "grad_norm": 0.34947417036129547, "learning_rate": 3.466620437331876e-06, "loss": 0.2543, "step": 12790 }, { "epoch": 0.73, "grad_norm": 1.3024804970661898, "learning_rate": 3.4652117160850006e-06, "loss": 0.5634, "step": 12791 }, { "epoch": 0.73, "grad_norm": 0.35195261144446016, "learning_rate": 3.463803221137566e-06, "loss": 0.2948, "step": 12792 }, { "epoch": 0.74, "grad_norm": 0.3084578488343683, "learning_rate": 3.462394952538345e-06, "loss": 0.1994, "step": 12793 }, { "epoch": 0.74, "grad_norm": 0.22815859921668444, "learning_rate": 3.460986910336106e-06, "loss": 0.1535, "step": 12794 }, { "epoch": 0.74, "grad_norm": 0.7811335529012565, "learning_rate": 3.459579094579605e-06, "loss": 0.3796, "step": 12795 }, { "epoch": 0.74, "grad_norm": 0.27942275007513806, "learning_rate": 3.4581715053176023e-06, "loss": 0.1953, "step": 12796 }, { "epoch": 0.74, "grad_norm": 0.3920486415648585, "learning_rate": 3.456764142598843e-06, "loss": 0.3051, "step": 12797 }, { "epoch": 0.74, "grad_norm": 1.016852076671105, "learning_rate": 3.455357006472052e-06, "loss": 0.657, "step": 12798 }, { "epoch": 0.74, "grad_norm": 0.33966130409641193, "learning_rate": 3.4539500969859706e-06, "loss": 0.1705, "step": 12799 }, { "epoch": 0.74, "grad_norm": 0.22172259172292844, "learning_rate": 3.4525434141893166e-06, "loss": 0.2172, "step": 12800 }, { "epoch": 0.74, "grad_norm": 0.7058709583737947, "learning_rate": 3.4511369581308017e-06, "loss": 0.3947, "step": 12801 }, { "epoch": 0.74, "grad_norm": 0.3354166197148681, "learning_rate": 3.449730728859132e-06, "loss": 0.1666, "step": 12802 }, { "epoch": 0.74, "grad_norm": 0.5337470482155633, "learning_rate": 3.4483247264230034e-06, "loss": 0.3789, "step": 12803 }, { "epoch": 0.74, "grad_norm": 0.33592213852392117, "learning_rate": 3.4469189508711098e-06, "loss": 0.3046, "step": 12804 }, { "epoch": 0.74, "grad_norm": 0.6246678196271466, "learning_rate": 3.445513402252132e-06, "loss": 0.3147, "step": 12805 }, { "epoch": 0.74, "grad_norm": 0.31427524782070476, "learning_rate": 3.444108080614743e-06, "loss": 0.204, "step": 12806 }, { "epoch": 0.74, "grad_norm": 0.48310103519188197, "learning_rate": 3.4427029860076056e-06, "loss": 0.2649, "step": 12807 }, { "epoch": 0.74, "grad_norm": 0.26865324112297956, "learning_rate": 3.441298118479386e-06, "loss": 0.2608, "step": 12808 }, { "epoch": 0.74, "grad_norm": 0.5907160387384897, "learning_rate": 3.4398934780787297e-06, "loss": 0.2304, "step": 12809 }, { "epoch": 0.74, "grad_norm": 1.1524510092382603, "learning_rate": 3.43848906485428e-06, "loss": 0.7557, "step": 12810 }, { "epoch": 0.74, "grad_norm": 0.38459895702479086, "learning_rate": 3.4370848788546695e-06, "loss": 0.2584, "step": 12811 }, { "epoch": 0.74, "grad_norm": 0.2760382853690984, "learning_rate": 3.4356809201285303e-06, "loss": 0.241, "step": 12812 }, { "epoch": 0.74, "grad_norm": 0.32014199369104235, "learning_rate": 3.4342771887244784e-06, "loss": 0.2341, "step": 12813 }, { "epoch": 0.74, "grad_norm": 0.5749750430673263, "learning_rate": 3.4328736846911247e-06, "loss": 0.3045, "step": 12814 }, { "epoch": 0.74, "grad_norm": 0.38232768236464837, "learning_rate": 3.4314704080770744e-06, "loss": 0.2291, "step": 12815 }, { "epoch": 0.74, "grad_norm": 0.3446190128141687, "learning_rate": 3.4300673589309163e-06, "loss": 0.3249, "step": 12816 }, { "epoch": 0.74, "grad_norm": 0.5823714362679814, "learning_rate": 3.428664537301247e-06, "loss": 0.2962, "step": 12817 }, { "epoch": 0.74, "grad_norm": 0.3685845728880524, "learning_rate": 3.4272619432366427e-06, "loss": 0.3151, "step": 12818 }, { "epoch": 0.74, "grad_norm": 0.2341879964186778, "learning_rate": 3.425859576785674e-06, "loss": 0.1659, "step": 12819 }, { "epoch": 0.74, "grad_norm": 0.4030323576618036, "learning_rate": 3.4244574379969032e-06, "loss": 0.2683, "step": 12820 }, { "epoch": 0.74, "grad_norm": 0.4474160431672498, "learning_rate": 3.4230555269188903e-06, "loss": 0.2913, "step": 12821 }, { "epoch": 0.74, "grad_norm": 1.0297083176155641, "learning_rate": 3.4216538436001836e-06, "loss": 0.5263, "step": 12822 }, { "epoch": 0.74, "grad_norm": 0.38588698705698504, "learning_rate": 3.4202523880893202e-06, "loss": 0.2781, "step": 12823 }, { "epoch": 0.74, "grad_norm": 0.30214966289449274, "learning_rate": 3.4188511604348297e-06, "loss": 0.2844, "step": 12824 }, { "epoch": 0.74, "grad_norm": 0.21761652521172148, "learning_rate": 3.417450160685245e-06, "loss": 0.0645, "step": 12825 }, { "epoch": 0.74, "grad_norm": 0.32300800479962954, "learning_rate": 3.416049388889078e-06, "loss": 0.1809, "step": 12826 }, { "epoch": 0.74, "grad_norm": 0.3752187308252932, "learning_rate": 3.4146488450948367e-06, "loss": 0.2808, "step": 12827 }, { "epoch": 0.74, "grad_norm": 0.3238753458144825, "learning_rate": 3.413248529351023e-06, "loss": 0.2652, "step": 12828 }, { "epoch": 0.74, "grad_norm": 0.4298149619699671, "learning_rate": 3.411848441706127e-06, "loss": 0.2901, "step": 12829 }, { "epoch": 0.74, "grad_norm": 0.4810941246561356, "learning_rate": 3.410448582208642e-06, "loss": 0.3489, "step": 12830 }, { "epoch": 0.74, "grad_norm": 0.5252502309396361, "learning_rate": 3.409048950907037e-06, "loss": 0.3361, "step": 12831 }, { "epoch": 0.74, "grad_norm": 0.1994109208173046, "learning_rate": 3.4076495478497795e-06, "loss": 0.1346, "step": 12832 }, { "epoch": 0.74, "grad_norm": 0.4101863021105691, "learning_rate": 3.406250373085337e-06, "loss": 0.3074, "step": 12833 }, { "epoch": 0.74, "grad_norm": 0.6969992266110001, "learning_rate": 3.4048514266621612e-06, "loss": 0.4448, "step": 12834 }, { "epoch": 0.74, "grad_norm": 0.4233808236326501, "learning_rate": 3.403452708628697e-06, "loss": 0.238, "step": 12835 }, { "epoch": 0.74, "grad_norm": 0.29572459303480436, "learning_rate": 3.4020542190333795e-06, "loss": 0.2596, "step": 12836 }, { "epoch": 0.74, "grad_norm": 0.49815984479914355, "learning_rate": 3.4006559579246425e-06, "loss": 0.2174, "step": 12837 }, { "epoch": 0.74, "grad_norm": 0.2761033725607673, "learning_rate": 3.3992579253509062e-06, "loss": 0.0935, "step": 12838 }, { "epoch": 0.74, "grad_norm": 0.32754610459515865, "learning_rate": 3.3978601213605842e-06, "loss": 0.2857, "step": 12839 }, { "epoch": 0.74, "grad_norm": 0.4294008648810787, "learning_rate": 3.3964625460020827e-06, "loss": 0.311, "step": 12840 }, { "epoch": 0.74, "grad_norm": 0.6594774691330908, "learning_rate": 3.395065199323796e-06, "loss": 0.3494, "step": 12841 }, { "epoch": 0.74, "grad_norm": 0.32425053227691586, "learning_rate": 3.393668081374121e-06, "loss": 0.2053, "step": 12842 }, { "epoch": 0.74, "grad_norm": 0.5089151987838674, "learning_rate": 3.3922711922014352e-06, "loss": 0.3626, "step": 12843 }, { "epoch": 0.74, "grad_norm": 0.21503853351438224, "learning_rate": 3.3908745318541146e-06, "loss": 0.1865, "step": 12844 }, { "epoch": 0.74, "grad_norm": 0.31100684636950715, "learning_rate": 3.389478100380521e-06, "loss": 0.2, "step": 12845 }, { "epoch": 0.74, "grad_norm": 0.7679876511139743, "learning_rate": 3.3880818978290196e-06, "loss": 0.3937, "step": 12846 }, { "epoch": 0.74, "grad_norm": 0.32560271064088603, "learning_rate": 3.386685924247959e-06, "loss": 0.2838, "step": 12847 }, { "epoch": 0.74, "grad_norm": 0.32516907110621057, "learning_rate": 3.3852901796856796e-06, "loss": 0.21, "step": 12848 }, { "epoch": 0.74, "grad_norm": 1.2717231476692796, "learning_rate": 3.3838946641905134e-06, "loss": 0.4662, "step": 12849 }, { "epoch": 0.74, "grad_norm": 0.2335607078138597, "learning_rate": 3.382499377810794e-06, "loss": 0.1513, "step": 12850 }, { "epoch": 0.74, "grad_norm": 0.2814240890847466, "learning_rate": 3.3811043205948366e-06, "loss": 0.2342, "step": 12851 }, { "epoch": 0.74, "grad_norm": 0.5990969852514557, "learning_rate": 3.3797094925909526e-06, "loss": 0.3364, "step": 12852 }, { "epoch": 0.74, "grad_norm": 1.1668272839736702, "learning_rate": 3.378314893847443e-06, "loss": 0.7382, "step": 12853 }, { "epoch": 0.74, "grad_norm": 0.3361253693355029, "learning_rate": 3.3769205244126013e-06, "loss": 0.2619, "step": 12854 }, { "epoch": 0.74, "grad_norm": 0.357664507868556, "learning_rate": 3.3755263843347196e-06, "loss": 0.2275, "step": 12855 }, { "epoch": 0.74, "grad_norm": 0.26691235584553286, "learning_rate": 3.3741324736620752e-06, "loss": 0.1768, "step": 12856 }, { "epoch": 0.74, "grad_norm": 0.3316452448222894, "learning_rate": 3.3727387924429377e-06, "loss": 0.2648, "step": 12857 }, { "epoch": 0.74, "grad_norm": 1.095159106391374, "learning_rate": 3.371345340725568e-06, "loss": 0.2964, "step": 12858 }, { "epoch": 0.74, "grad_norm": 0.29582495886406085, "learning_rate": 3.3699521185582274e-06, "loss": 0.2747, "step": 12859 }, { "epoch": 0.74, "grad_norm": 0.34966377054219355, "learning_rate": 3.3685591259891592e-06, "loss": 0.2708, "step": 12860 }, { "epoch": 0.74, "grad_norm": 1.3699008142557696, "learning_rate": 3.367166363066604e-06, "loss": 0.2283, "step": 12861 }, { "epoch": 0.74, "grad_norm": 0.37940865289293624, "learning_rate": 3.3657738298387886e-06, "loss": 0.2389, "step": 12862 }, { "epoch": 0.74, "grad_norm": 0.29870327609057534, "learning_rate": 3.3643815263539438e-06, "loss": 0.2815, "step": 12863 }, { "epoch": 0.74, "grad_norm": 0.6121185536613202, "learning_rate": 3.3629894526602847e-06, "loss": 0.2784, "step": 12864 }, { "epoch": 0.74, "grad_norm": 0.39370032195369115, "learning_rate": 3.361597608806012e-06, "loss": 0.2998, "step": 12865 }, { "epoch": 0.74, "grad_norm": 0.3425273598020543, "learning_rate": 3.360205994839326e-06, "loss": 0.2651, "step": 12866 }, { "epoch": 0.74, "grad_norm": 0.38555273129152684, "learning_rate": 3.358814610808424e-06, "loss": 0.3122, "step": 12867 }, { "epoch": 0.74, "grad_norm": 0.6651039322509703, "learning_rate": 3.3574234567614862e-06, "loss": 0.2088, "step": 12868 }, { "epoch": 0.74, "grad_norm": 0.3512050670033464, "learning_rate": 3.356032532746688e-06, "loss": 0.2761, "step": 12869 }, { "epoch": 0.74, "grad_norm": 0.5160563391559385, "learning_rate": 3.354641838812195e-06, "loss": 0.3093, "step": 12870 }, { "epoch": 0.74, "grad_norm": 0.2944111295460084, "learning_rate": 3.353251375006171e-06, "loss": 0.2444, "step": 12871 }, { "epoch": 0.74, "grad_norm": 0.23733746897925087, "learning_rate": 3.3518611413767675e-06, "loss": 0.2007, "step": 12872 }, { "epoch": 0.74, "grad_norm": 1.2606823606756283, "learning_rate": 3.3504711379721267e-06, "loss": 0.4898, "step": 12873 }, { "epoch": 0.74, "grad_norm": 0.8636552913098463, "learning_rate": 3.3490813648403808e-06, "loss": 0.3021, "step": 12874 }, { "epoch": 0.74, "grad_norm": 0.2671159928686584, "learning_rate": 3.347691822029665e-06, "loss": 0.2538, "step": 12875 }, { "epoch": 0.74, "grad_norm": 0.5173905617386279, "learning_rate": 3.346302509588095e-06, "loss": 0.3694, "step": 12876 }, { "epoch": 0.74, "grad_norm": 0.17909842675389065, "learning_rate": 3.344913427563784e-06, "loss": 0.1285, "step": 12877 }, { "epoch": 0.74, "grad_norm": 0.30187736747915783, "learning_rate": 3.343524576004833e-06, "loss": 0.2529, "step": 12878 }, { "epoch": 0.74, "grad_norm": 0.34252991641349273, "learning_rate": 3.342135954959338e-06, "loss": 0.2783, "step": 12879 }, { "epoch": 0.74, "grad_norm": 0.9488231871291415, "learning_rate": 3.3407475644753907e-06, "loss": 0.3745, "step": 12880 }, { "epoch": 0.74, "grad_norm": 0.29818740197209903, "learning_rate": 3.3393594046010693e-06, "loss": 0.208, "step": 12881 }, { "epoch": 0.74, "grad_norm": 1.210855898826414, "learning_rate": 3.3379714753844463e-06, "loss": 0.633, "step": 12882 }, { "epoch": 0.74, "grad_norm": 0.25512255050159116, "learning_rate": 3.3365837768735798e-06, "loss": 0.2485, "step": 12883 }, { "epoch": 0.74, "grad_norm": 0.29905505580538666, "learning_rate": 3.335196309116534e-06, "loss": 0.1868, "step": 12884 }, { "epoch": 0.74, "grad_norm": 0.5038307443785937, "learning_rate": 3.3338090721613547e-06, "loss": 0.2664, "step": 12885 }, { "epoch": 0.74, "grad_norm": 0.5952356862239482, "learning_rate": 3.332422066056079e-06, "loss": 0.3309, "step": 12886 }, { "epoch": 0.74, "grad_norm": 0.2904352494406116, "learning_rate": 3.3310352908487387e-06, "loss": 0.1837, "step": 12887 }, { "epoch": 0.74, "grad_norm": 0.5566544726291275, "learning_rate": 3.3296487465873617e-06, "loss": 0.3727, "step": 12888 }, { "epoch": 0.74, "grad_norm": 1.338582329143749, "learning_rate": 3.328262433319962e-06, "loss": 0.7545, "step": 12889 }, { "epoch": 0.74, "grad_norm": 0.18720255509199032, "learning_rate": 3.3268763510945477e-06, "loss": 0.1371, "step": 12890 }, { "epoch": 0.74, "grad_norm": 0.3551963786547063, "learning_rate": 3.325490499959114e-06, "loss": 0.2981, "step": 12891 }, { "epoch": 0.74, "grad_norm": 0.6958333900137992, "learning_rate": 3.3241048799616616e-06, "loss": 0.4138, "step": 12892 }, { "epoch": 0.74, "grad_norm": 0.3327651923692982, "learning_rate": 3.3227194911501705e-06, "loss": 0.267, "step": 12893 }, { "epoch": 0.74, "grad_norm": 1.2262784695854978, "learning_rate": 3.3213343335726157e-06, "loss": 0.3285, "step": 12894 }, { "epoch": 0.74, "grad_norm": 0.35721902129772304, "learning_rate": 3.3199494072769657e-06, "loss": 0.3184, "step": 12895 }, { "epoch": 0.74, "grad_norm": 0.34515026641036906, "learning_rate": 3.3185647123111776e-06, "loss": 0.2565, "step": 12896 }, { "epoch": 0.74, "grad_norm": 0.4102070107736664, "learning_rate": 3.3171802487232087e-06, "loss": 0.1409, "step": 12897 }, { "epoch": 0.74, "grad_norm": 0.6009426131881008, "learning_rate": 3.3157960165610035e-06, "loss": 0.333, "step": 12898 }, { "epoch": 0.74, "grad_norm": 0.26019221615739213, "learning_rate": 3.314412015872489e-06, "loss": 0.253, "step": 12899 }, { "epoch": 0.74, "grad_norm": 1.239600152449063, "learning_rate": 3.313028246705603e-06, "loss": 0.3466, "step": 12900 }, { "epoch": 0.74, "grad_norm": 0.4657976696878919, "learning_rate": 3.3116447091082593e-06, "loss": 0.3098, "step": 12901 }, { "epoch": 0.74, "grad_norm": 0.3989409195000362, "learning_rate": 3.310261403128373e-06, "loss": 0.2568, "step": 12902 }, { "epoch": 0.74, "grad_norm": 0.28897844817397594, "learning_rate": 3.3088783288138436e-06, "loss": 0.2548, "step": 12903 }, { "epoch": 0.74, "grad_norm": 0.4652505440048004, "learning_rate": 3.307495486212572e-06, "loss": 0.2714, "step": 12904 }, { "epoch": 0.74, "grad_norm": 0.388391044884601, "learning_rate": 3.306112875372445e-06, "loss": 0.2605, "step": 12905 }, { "epoch": 0.74, "grad_norm": 0.5730762996066526, "learning_rate": 3.3047304963413407e-06, "loss": 0.3871, "step": 12906 }, { "epoch": 0.74, "grad_norm": 0.3801978509249738, "learning_rate": 3.3033483491671316e-06, "loss": 0.2487, "step": 12907 }, { "epoch": 0.74, "grad_norm": 0.4299270873716904, "learning_rate": 3.3019664338976787e-06, "loss": 0.2827, "step": 12908 }, { "epoch": 0.74, "grad_norm": 0.463795814167745, "learning_rate": 3.300584750580842e-06, "loss": 0.3093, "step": 12909 }, { "epoch": 0.74, "grad_norm": 0.2269061260272887, "learning_rate": 3.2992032992644686e-06, "loss": 0.1444, "step": 12910 }, { "epoch": 0.74, "grad_norm": 0.2822473309634225, "learning_rate": 3.2978220799963955e-06, "loss": 0.2462, "step": 12911 }, { "epoch": 0.74, "grad_norm": 1.558699342627396, "learning_rate": 3.2964410928244526e-06, "loss": 0.739, "step": 12912 }, { "epoch": 0.74, "grad_norm": 0.8887185637630161, "learning_rate": 3.2950603377964706e-06, "loss": 0.3034, "step": 12913 }, { "epoch": 0.74, "grad_norm": 0.3173946544253534, "learning_rate": 3.29367981496026e-06, "loss": 0.2621, "step": 12914 }, { "epoch": 0.74, "grad_norm": 0.3141577245945441, "learning_rate": 3.29229952436363e-06, "loss": 0.2978, "step": 12915 }, { "epoch": 0.74, "grad_norm": 0.3278313577965663, "learning_rate": 3.2909194660543742e-06, "loss": 0.1864, "step": 12916 }, { "epoch": 0.74, "grad_norm": 0.37208100190566185, "learning_rate": 3.289539640080294e-06, "loss": 0.1933, "step": 12917 }, { "epoch": 0.74, "grad_norm": 1.3163936504011386, "learning_rate": 3.288160046489166e-06, "loss": 0.8189, "step": 12918 }, { "epoch": 0.74, "grad_norm": 0.3683715557013782, "learning_rate": 3.2867806853287675e-06, "loss": 0.2944, "step": 12919 }, { "epoch": 0.74, "grad_norm": 0.37802142040575226, "learning_rate": 3.2854015566468643e-06, "loss": 0.1896, "step": 12920 }, { "epoch": 0.74, "grad_norm": 0.6399011890671984, "learning_rate": 3.284022660491214e-06, "loss": 0.3545, "step": 12921 }, { "epoch": 0.74, "grad_norm": 0.22873817154729156, "learning_rate": 3.2826439969095737e-06, "loss": 0.2049, "step": 12922 }, { "epoch": 0.74, "grad_norm": 0.3336483088508467, "learning_rate": 3.281265565949683e-06, "loss": 0.1561, "step": 12923 }, { "epoch": 0.74, "grad_norm": 0.5578981476572488, "learning_rate": 3.2798873676592755e-06, "loss": 0.4109, "step": 12924 }, { "epoch": 0.74, "grad_norm": 0.6593018831054235, "learning_rate": 3.2785094020860777e-06, "loss": 0.3806, "step": 12925 }, { "epoch": 0.74, "grad_norm": 0.36905433775237373, "learning_rate": 3.277131669277813e-06, "loss": 0.2017, "step": 12926 }, { "epoch": 0.74, "grad_norm": 0.3000027094105889, "learning_rate": 3.275754169282189e-06, "loss": 0.2773, "step": 12927 }, { "epoch": 0.74, "grad_norm": 0.42583234394958763, "learning_rate": 3.2743769021469096e-06, "loss": 0.2012, "step": 12928 }, { "epoch": 0.74, "grad_norm": 0.2963137515632942, "learning_rate": 3.2729998679196663e-06, "loss": 0.1866, "step": 12929 }, { "epoch": 0.74, "grad_norm": 0.2859908008194674, "learning_rate": 3.2716230666481506e-06, "loss": 0.2344, "step": 12930 }, { "epoch": 0.74, "grad_norm": 0.458580153617439, "learning_rate": 3.2702464983800386e-06, "loss": 0.3144, "step": 12931 }, { "epoch": 0.74, "grad_norm": 0.3739316025400197, "learning_rate": 3.2688701631630047e-06, "loss": 0.2944, "step": 12932 }, { "epoch": 0.74, "grad_norm": 0.5003894526988127, "learning_rate": 3.2674940610447005e-06, "loss": 0.2209, "step": 12933 }, { "epoch": 0.74, "grad_norm": 0.2815996714466963, "learning_rate": 3.2661181920727913e-06, "loss": 0.2559, "step": 12934 }, { "epoch": 0.74, "grad_norm": 0.31949144720216227, "learning_rate": 3.2647425562949196e-06, "loss": 0.2264, "step": 12935 }, { "epoch": 0.74, "grad_norm": 0.43227858764097016, "learning_rate": 3.263367153758723e-06, "loss": 0.2649, "step": 12936 }, { "epoch": 0.74, "grad_norm": 0.6654739405314112, "learning_rate": 3.26199198451183e-06, "loss": 0.3732, "step": 12937 }, { "epoch": 0.74, "grad_norm": 0.4519785519202604, "learning_rate": 3.2606170486018662e-06, "loss": 0.312, "step": 12938 }, { "epoch": 0.74, "grad_norm": 0.29017049811434276, "learning_rate": 3.2592423460764457e-06, "loss": 0.2275, "step": 12939 }, { "epoch": 0.74, "grad_norm": 0.36334147109219417, "learning_rate": 3.257867876983173e-06, "loss": 0.1923, "step": 12940 }, { "epoch": 0.74, "grad_norm": 0.5450956167249345, "learning_rate": 3.256493641369641e-06, "loss": 0.315, "step": 12941 }, { "epoch": 0.74, "grad_norm": 0.4020490717875685, "learning_rate": 3.2551196392834496e-06, "loss": 0.3194, "step": 12942 }, { "epoch": 0.74, "grad_norm": 0.32817468224538004, "learning_rate": 3.2537458707721735e-06, "loss": 0.2755, "step": 12943 }, { "epoch": 0.74, "grad_norm": 0.5816663916046452, "learning_rate": 3.252372335883388e-06, "loss": 0.3265, "step": 12944 }, { "epoch": 0.74, "grad_norm": 0.3815486034011904, "learning_rate": 3.250999034664659e-06, "loss": 0.2811, "step": 12945 }, { "epoch": 0.74, "grad_norm": 0.3526554552274751, "learning_rate": 3.24962596716354e-06, "loss": 0.2452, "step": 12946 }, { "epoch": 0.74, "grad_norm": 0.24509700046609045, "learning_rate": 3.2482531334275856e-06, "loss": 0.1689, "step": 12947 }, { "epoch": 0.74, "grad_norm": 0.3338020441543314, "learning_rate": 3.2468805335043363e-06, "loss": 0.3091, "step": 12948 }, { "epoch": 0.74, "grad_norm": 0.9020955309045062, "learning_rate": 3.2455081674413226e-06, "loss": 0.3683, "step": 12949 }, { "epoch": 0.74, "grad_norm": 0.3050120934170287, "learning_rate": 3.2441360352860675e-06, "loss": 0.2703, "step": 12950 }, { "epoch": 0.74, "grad_norm": 0.41887257623271296, "learning_rate": 3.2427641370860953e-06, "loss": 0.273, "step": 12951 }, { "epoch": 0.74, "grad_norm": 0.42530719140404794, "learning_rate": 3.241392472888909e-06, "loss": 0.1314, "step": 12952 }, { "epoch": 0.74, "grad_norm": 0.38376438245705596, "learning_rate": 3.240021042742012e-06, "loss": 0.2803, "step": 12953 }, { "epoch": 0.74, "grad_norm": 0.47996045275196836, "learning_rate": 3.2386498466928916e-06, "loss": 0.3495, "step": 12954 }, { "epoch": 0.74, "grad_norm": 0.3492844356549713, "learning_rate": 3.237278884789039e-06, "loss": 0.3225, "step": 12955 }, { "epoch": 0.74, "grad_norm": 0.21621202245976492, "learning_rate": 3.235908157077929e-06, "loss": 0.0953, "step": 12956 }, { "epoch": 0.74, "grad_norm": 0.37093987547274837, "learning_rate": 3.234537663607028e-06, "loss": 0.2762, "step": 12957 }, { "epoch": 0.74, "grad_norm": 0.33780033787498953, "learning_rate": 3.233167404423797e-06, "loss": 0.2939, "step": 12958 }, { "epoch": 0.74, "grad_norm": 0.8582542909743728, "learning_rate": 3.231797379575684e-06, "loss": 0.2335, "step": 12959 }, { "epoch": 0.74, "grad_norm": 0.34765139094691233, "learning_rate": 3.230427589110141e-06, "loss": 0.2791, "step": 12960 }, { "epoch": 0.74, "grad_norm": 1.1022929147980127, "learning_rate": 3.229058033074599e-06, "loss": 0.7693, "step": 12961 }, { "epoch": 0.74, "grad_norm": 0.2048980960713727, "learning_rate": 3.227688711516486e-06, "loss": 0.18, "step": 12962 }, { "epoch": 0.74, "grad_norm": 0.3108323466882076, "learning_rate": 3.2263196244832183e-06, "loss": 0.243, "step": 12963 }, { "epoch": 0.74, "grad_norm": 1.351882544006569, "learning_rate": 3.224950772022214e-06, "loss": 0.5611, "step": 12964 }, { "epoch": 0.74, "grad_norm": 0.633610003120019, "learning_rate": 3.223582154180873e-06, "loss": 0.2477, "step": 12965 }, { "epoch": 0.74, "grad_norm": 0.293079591213699, "learning_rate": 3.2222137710065915e-06, "loss": 0.2608, "step": 12966 }, { "epoch": 0.75, "grad_norm": 1.2513694259705406, "learning_rate": 3.2208456225467554e-06, "loss": 0.748, "step": 12967 }, { "epoch": 0.75, "grad_norm": 0.2295037376326925, "learning_rate": 3.219477708848743e-06, "loss": 0.1619, "step": 12968 }, { "epoch": 0.75, "grad_norm": 0.3458317104854778, "learning_rate": 3.2181100299599268e-06, "loss": 0.2019, "step": 12969 }, { "epoch": 0.75, "grad_norm": 0.3743554598909583, "learning_rate": 3.2167425859276678e-06, "loss": 0.3023, "step": 12970 }, { "epoch": 0.75, "grad_norm": 0.5944735307244252, "learning_rate": 3.215375376799319e-06, "loss": 0.3297, "step": 12971 }, { "epoch": 0.75, "grad_norm": 0.3758967024239241, "learning_rate": 3.214008402622232e-06, "loss": 0.2157, "step": 12972 }, { "epoch": 0.75, "grad_norm": 1.0402206032842936, "learning_rate": 3.2126416634437428e-06, "loss": 0.6525, "step": 12973 }, { "epoch": 0.75, "grad_norm": 0.2605416145760141, "learning_rate": 3.2112751593111803e-06, "loss": 0.2228, "step": 12974 }, { "epoch": 0.75, "grad_norm": 0.2663291579048332, "learning_rate": 3.2099088902718635e-06, "loss": 0.1584, "step": 12975 }, { "epoch": 0.75, "grad_norm": 0.7818741848797203, "learning_rate": 3.2085428563731137e-06, "loss": 0.4055, "step": 12976 }, { "epoch": 0.75, "grad_norm": 1.0304535099556142, "learning_rate": 3.207177057662233e-06, "loss": 0.4526, "step": 12977 }, { "epoch": 0.75, "grad_norm": 0.23643939698926889, "learning_rate": 3.205811494186518e-06, "loss": 0.2101, "step": 12978 }, { "epoch": 0.75, "grad_norm": 1.1088209673773897, "learning_rate": 3.2044461659932557e-06, "loss": 0.7036, "step": 12979 }, { "epoch": 0.75, "grad_norm": 0.286646221853249, "learning_rate": 3.2030810731297334e-06, "loss": 0.1906, "step": 12980 }, { "epoch": 0.75, "grad_norm": 0.3675670697198052, "learning_rate": 3.2017162156432222e-06, "loss": 0.2737, "step": 12981 }, { "epoch": 0.75, "grad_norm": 0.42808874432795885, "learning_rate": 3.2003515935809858e-06, "loss": 0.248, "step": 12982 }, { "epoch": 0.75, "grad_norm": 0.9420101336635036, "learning_rate": 3.1989872069902804e-06, "loss": 0.3599, "step": 12983 }, { "epoch": 0.75, "grad_norm": 0.3487176167214536, "learning_rate": 3.197623055918354e-06, "loss": 0.2668, "step": 12984 }, { "epoch": 0.75, "grad_norm": 1.4368984598467063, "learning_rate": 3.196259140412451e-06, "loss": 0.4066, "step": 12985 }, { "epoch": 0.75, "grad_norm": 0.2809539051773061, "learning_rate": 3.1948954605198014e-06, "loss": 0.2199, "step": 12986 }, { "epoch": 0.75, "grad_norm": 0.3451613359950105, "learning_rate": 3.193532016287629e-06, "loss": 0.2908, "step": 12987 }, { "epoch": 0.75, "grad_norm": 0.5062091570808256, "learning_rate": 3.1921688077631476e-06, "loss": 0.2377, "step": 12988 }, { "epoch": 0.75, "grad_norm": 0.39386024451333884, "learning_rate": 3.19080583499357e-06, "loss": 0.2658, "step": 12989 }, { "epoch": 0.75, "grad_norm": 0.3344590966805647, "learning_rate": 3.189443098026094e-06, "loss": 0.2519, "step": 12990 }, { "epoch": 0.75, "grad_norm": 0.41114489266247534, "learning_rate": 3.188080596907911e-06, "loss": 0.2871, "step": 12991 }, { "epoch": 0.75, "grad_norm": 0.3871947212487157, "learning_rate": 3.1867183316862005e-06, "loss": 0.2061, "step": 12992 }, { "epoch": 0.75, "grad_norm": 0.3329933325144057, "learning_rate": 3.1853563024081446e-06, "loss": 0.2933, "step": 12993 }, { "epoch": 0.75, "grad_norm": 0.3416280510762696, "learning_rate": 3.183994509120907e-06, "loss": 0.3046, "step": 12994 }, { "epoch": 0.75, "grad_norm": 0.5561745393411215, "learning_rate": 3.182632951871646e-06, "loss": 0.1458, "step": 12995 }, { "epoch": 0.75, "grad_norm": 0.32559993723873837, "learning_rate": 3.18127163070751e-06, "loss": 0.2581, "step": 12996 }, { "epoch": 0.75, "grad_norm": 0.5157714793821341, "learning_rate": 3.1799105456756463e-06, "loss": 0.3919, "step": 12997 }, { "epoch": 0.75, "grad_norm": 0.45047672562360686, "learning_rate": 3.1785496968231877e-06, "loss": 0.2458, "step": 12998 }, { "epoch": 0.75, "grad_norm": 0.3044374430749099, "learning_rate": 3.1771890841972643e-06, "loss": 0.2535, "step": 12999 }, { "epoch": 0.75, "grad_norm": 0.4077212584271979, "learning_rate": 3.1758287078449812e-06, "loss": 0.2976, "step": 13000 }, { "epoch": 0.75, "grad_norm": 0.2829686186815421, "learning_rate": 3.174468567813461e-06, "loss": 0.1982, "step": 13001 }, { "epoch": 0.75, "grad_norm": 0.3156669845287359, "learning_rate": 3.1731086641497997e-06, "loss": 0.2602, "step": 13002 }, { "epoch": 0.75, "grad_norm": 1.0682277454930609, "learning_rate": 3.171748996901093e-06, "loss": 0.7463, "step": 13003 }, { "epoch": 0.75, "grad_norm": 0.9602481865968809, "learning_rate": 3.1703895661144213e-06, "loss": 0.3494, "step": 13004 }, { "epoch": 0.75, "grad_norm": 0.2875938503824787, "learning_rate": 3.1690303718368675e-06, "loss": 0.1851, "step": 13005 }, { "epoch": 0.75, "grad_norm": 0.3280084081282049, "learning_rate": 3.1676714141154998e-06, "loss": 0.2983, "step": 13006 }, { "epoch": 0.75, "grad_norm": 0.28378818890057067, "learning_rate": 3.1663126929973766e-06, "loss": 0.1949, "step": 13007 }, { "epoch": 0.75, "grad_norm": 0.29700206228395926, "learning_rate": 3.1649542085295503e-06, "loss": 0.1941, "step": 13008 }, { "epoch": 0.75, "grad_norm": 0.4696627068472638, "learning_rate": 3.163595960759063e-06, "loss": 0.3623, "step": 13009 }, { "epoch": 0.75, "grad_norm": 0.47099670577215524, "learning_rate": 3.162237949732957e-06, "loss": 0.3376, "step": 13010 }, { "epoch": 0.75, "grad_norm": 0.2863904511902697, "learning_rate": 3.1608801754982564e-06, "loss": 0.1883, "step": 13011 }, { "epoch": 0.75, "grad_norm": 0.33279925478274053, "learning_rate": 3.1595226381019817e-06, "loss": 0.1941, "step": 13012 }, { "epoch": 0.75, "grad_norm": 0.5530973813912018, "learning_rate": 3.15816533759114e-06, "loss": 0.3521, "step": 13013 }, { "epoch": 0.75, "grad_norm": 0.23414052147715458, "learning_rate": 3.1568082740127425e-06, "loss": 0.2081, "step": 13014 }, { "epoch": 0.75, "grad_norm": 0.9721095642376917, "learning_rate": 3.1554514474137797e-06, "loss": 0.5654, "step": 13015 }, { "epoch": 0.75, "grad_norm": 0.5991458451958498, "learning_rate": 3.154094857841239e-06, "loss": 0.397, "step": 13016 }, { "epoch": 0.75, "grad_norm": 0.31615058172984073, "learning_rate": 3.152738505342097e-06, "loss": 0.2359, "step": 13017 }, { "epoch": 0.75, "grad_norm": 0.36932228495972225, "learning_rate": 3.1513823899633276e-06, "loss": 0.2451, "step": 13018 }, { "epoch": 0.75, "grad_norm": 0.2985834736662441, "learning_rate": 3.1500265117518926e-06, "loss": 0.2106, "step": 13019 }, { "epoch": 0.75, "grad_norm": 0.31142451629486045, "learning_rate": 3.148670870754744e-06, "loss": 0.2453, "step": 13020 }, { "epoch": 0.75, "grad_norm": 0.7538480855399459, "learning_rate": 3.1473154670188255e-06, "loss": 0.3306, "step": 13021 }, { "epoch": 0.75, "grad_norm": 0.3409916931889605, "learning_rate": 3.145960300591081e-06, "loss": 0.2929, "step": 13022 }, { "epoch": 0.75, "grad_norm": 0.3780237167598833, "learning_rate": 3.1446053715184367e-06, "loss": 0.2681, "step": 13023 }, { "epoch": 0.75, "grad_norm": 0.5302115363553981, "learning_rate": 3.1432506798478134e-06, "loss": 0.2266, "step": 13024 }, { "epoch": 0.75, "grad_norm": 0.20758940749083343, "learning_rate": 3.1418962256261256e-06, "loss": 0.2004, "step": 13025 }, { "epoch": 0.75, "grad_norm": 0.3761950098958392, "learning_rate": 3.1405420089002713e-06, "loss": 0.2534, "step": 13026 }, { "epoch": 0.75, "grad_norm": 0.6177190474844302, "learning_rate": 3.1391880297171574e-06, "loss": 0.2827, "step": 13027 }, { "epoch": 0.75, "grad_norm": 0.686885988108865, "learning_rate": 3.1378342881236657e-06, "loss": 0.3982, "step": 13028 }, { "epoch": 0.75, "grad_norm": 0.42586124579859064, "learning_rate": 3.1364807841666776e-06, "loss": 0.2896, "step": 13029 }, { "epoch": 0.75, "grad_norm": 0.2834899923338788, "learning_rate": 3.1351275178930616e-06, "loss": 0.2551, "step": 13030 }, { "epoch": 0.75, "grad_norm": 0.17052564170743853, "learning_rate": 3.133774489349688e-06, "loss": 0.0897, "step": 13031 }, { "epoch": 0.75, "grad_norm": 0.3672592846418563, "learning_rate": 3.1324216985834088e-06, "loss": 0.2728, "step": 13032 }, { "epoch": 0.75, "grad_norm": 0.41687149656453454, "learning_rate": 3.1310691456410703e-06, "loss": 0.3083, "step": 13033 }, { "epoch": 0.75, "grad_norm": 0.5042667901150454, "learning_rate": 3.1297168305695125e-06, "loss": 0.2768, "step": 13034 }, { "epoch": 0.75, "grad_norm": 0.3323112138388973, "learning_rate": 3.128364753415565e-06, "loss": 0.2576, "step": 13035 }, { "epoch": 0.75, "grad_norm": 1.2650359396451536, "learning_rate": 3.127012914226051e-06, "loss": 0.4992, "step": 13036 }, { "epoch": 0.75, "grad_norm": 0.256286588856045, "learning_rate": 3.125661313047783e-06, "loss": 0.2135, "step": 13037 }, { "epoch": 0.75, "grad_norm": 0.30430831676033854, "learning_rate": 3.1243099499275666e-06, "loss": 0.2453, "step": 13038 }, { "epoch": 0.75, "grad_norm": 0.7948641796041248, "learning_rate": 3.1229588249122034e-06, "loss": 0.4419, "step": 13039 }, { "epoch": 0.75, "grad_norm": 0.46946317228487927, "learning_rate": 3.12160793804848e-06, "loss": 0.2173, "step": 13040 }, { "epoch": 0.75, "grad_norm": 0.34864930416461026, "learning_rate": 3.120257289383178e-06, "loss": 0.2764, "step": 13041 }, { "epoch": 0.75, "grad_norm": 0.4017789633656141, "learning_rate": 3.1189068789630672e-06, "loss": 0.2943, "step": 13042 }, { "epoch": 0.75, "grad_norm": 0.4361244313994006, "learning_rate": 3.117556706834919e-06, "loss": 0.2576, "step": 13043 }, { "epoch": 0.75, "grad_norm": 0.727950689186441, "learning_rate": 3.116206773045486e-06, "loss": 0.212, "step": 13044 }, { "epoch": 0.75, "grad_norm": 0.33404541531367393, "learning_rate": 3.1148570776415153e-06, "loss": 0.2929, "step": 13045 }, { "epoch": 0.75, "grad_norm": 0.3258378879850681, "learning_rate": 3.1135076206697456e-06, "loss": 0.2331, "step": 13046 }, { "epoch": 0.75, "grad_norm": 0.3613632019699203, "learning_rate": 3.112158402176915e-06, "loss": 0.1676, "step": 13047 }, { "epoch": 0.75, "grad_norm": 0.5140961520189742, "learning_rate": 3.110809422209742e-06, "loss": 0.353, "step": 13048 }, { "epoch": 0.75, "grad_norm": 0.34242223527610316, "learning_rate": 3.109460680814942e-06, "loss": 0.2875, "step": 13049 }, { "epoch": 0.75, "grad_norm": 0.4790141871496615, "learning_rate": 3.108112178039222e-06, "loss": 0.2166, "step": 13050 }, { "epoch": 0.75, "grad_norm": 0.5128694160336205, "learning_rate": 3.106763913929278e-06, "loss": 0.3545, "step": 13051 }, { "epoch": 0.75, "grad_norm": 0.25136946231934676, "learning_rate": 3.1054158885318075e-06, "loss": 0.1828, "step": 13052 }, { "epoch": 0.75, "grad_norm": 0.27467417356526297, "learning_rate": 3.104068101893487e-06, "loss": 0.232, "step": 13053 }, { "epoch": 0.75, "grad_norm": 0.5452312702861496, "learning_rate": 3.102720554060993e-06, "loss": 0.3419, "step": 13054 }, { "epoch": 0.75, "grad_norm": 0.770701859262707, "learning_rate": 3.101373245080985e-06, "loss": 0.4232, "step": 13055 }, { "epoch": 0.75, "grad_norm": 0.3705818865576376, "learning_rate": 3.100026175000128e-06, "loss": 0.2777, "step": 13056 }, { "epoch": 0.75, "grad_norm": 0.3255903926001107, "learning_rate": 3.0986793438650686e-06, "loss": 0.2491, "step": 13057 }, { "epoch": 0.75, "grad_norm": 0.29833688651425433, "learning_rate": 3.097332751722447e-06, "loss": 0.265, "step": 13058 }, { "epoch": 0.75, "grad_norm": 0.280230038707804, "learning_rate": 3.095986398618892e-06, "loss": 0.1964, "step": 13059 }, { "epoch": 0.75, "grad_norm": 0.5305521770532992, "learning_rate": 3.094640284601034e-06, "loss": 0.2259, "step": 13060 }, { "epoch": 0.75, "grad_norm": 0.3592098971698116, "learning_rate": 3.093294409715486e-06, "loss": 0.3027, "step": 13061 }, { "epoch": 0.75, "grad_norm": 0.7451073577020624, "learning_rate": 3.0919487740088563e-06, "loss": 0.3797, "step": 13062 }, { "epoch": 0.75, "grad_norm": 0.3298793993154702, "learning_rate": 3.090603377527742e-06, "loss": 0.2196, "step": 13063 }, { "epoch": 0.75, "grad_norm": 0.23678147433076321, "learning_rate": 3.0892582203187337e-06, "loss": 0.184, "step": 13064 }, { "epoch": 0.75, "grad_norm": 0.36824332059382947, "learning_rate": 3.087913302428419e-06, "loss": 0.2789, "step": 13065 }, { "epoch": 0.75, "grad_norm": 0.3972684606203196, "learning_rate": 3.0865686239033687e-06, "loss": 0.2218, "step": 13066 }, { "epoch": 0.75, "grad_norm": 0.7531669824152212, "learning_rate": 3.085224184790151e-06, "loss": 0.3647, "step": 13067 }, { "epoch": 0.75, "grad_norm": 0.5677699683736633, "learning_rate": 3.083879985135322e-06, "loss": 0.3255, "step": 13068 }, { "epoch": 0.75, "grad_norm": 0.25851571654009503, "learning_rate": 3.082536024985431e-06, "loss": 0.2738, "step": 13069 }, { "epoch": 0.75, "grad_norm": 0.9987692197544271, "learning_rate": 3.0811923043870206e-06, "loss": 0.4699, "step": 13070 }, { "epoch": 0.75, "grad_norm": 0.2454516431873957, "learning_rate": 3.0798488233866196e-06, "loss": 0.1554, "step": 13071 }, { "epoch": 0.75, "grad_norm": 0.3892847958002378, "learning_rate": 3.0785055820307595e-06, "loss": 0.2817, "step": 13072 }, { "epoch": 0.75, "grad_norm": 0.3326192433773473, "learning_rate": 3.077162580365953e-06, "loss": 0.2553, "step": 13073 }, { "epoch": 0.75, "grad_norm": 0.6536009982343787, "learning_rate": 3.07581981843871e-06, "loss": 0.3113, "step": 13074 }, { "epoch": 0.75, "grad_norm": 0.36056431727318683, "learning_rate": 3.0744772962955283e-06, "loss": 0.2842, "step": 13075 }, { "epoch": 0.75, "grad_norm": 0.26550009542834724, "learning_rate": 3.0731350139828963e-06, "loss": 0.2026, "step": 13076 }, { "epoch": 0.75, "grad_norm": 0.23690984887180597, "learning_rate": 3.071792971547305e-06, "loss": 0.2057, "step": 13077 }, { "epoch": 0.75, "grad_norm": 0.5352459519460655, "learning_rate": 3.0704511690352246e-06, "loss": 0.3448, "step": 13078 }, { "epoch": 0.75, "grad_norm": 0.7568358868361343, "learning_rate": 3.0691096064931226e-06, "loss": 0.3506, "step": 13079 }, { "epoch": 0.75, "grad_norm": 0.8462214742386458, "learning_rate": 3.0677682839674526e-06, "loss": 0.2057, "step": 13080 }, { "epoch": 0.75, "grad_norm": 0.2447690890806674, "learning_rate": 3.0664272015046735e-06, "loss": 0.2524, "step": 13081 }, { "epoch": 0.75, "grad_norm": 1.2331150403392583, "learning_rate": 3.0650863591512215e-06, "loss": 0.6398, "step": 13082 }, { "epoch": 0.75, "grad_norm": 0.32002625401700546, "learning_rate": 3.063745756953531e-06, "loss": 0.1126, "step": 13083 }, { "epoch": 0.75, "grad_norm": 0.38562212812440144, "learning_rate": 3.062405394958022e-06, "loss": 0.2825, "step": 13084 }, { "epoch": 0.75, "grad_norm": 0.4755195083621667, "learning_rate": 3.061065273211121e-06, "loss": 0.2858, "step": 13085 }, { "epoch": 0.75, "grad_norm": 0.5813518421794341, "learning_rate": 3.0597253917592308e-06, "loss": 0.1095, "step": 13086 }, { "epoch": 0.75, "grad_norm": 0.38714185646623833, "learning_rate": 3.0583857506487514e-06, "loss": 0.33, "step": 13087 }, { "epoch": 0.75, "grad_norm": 1.2848101672311991, "learning_rate": 3.057046349926075e-06, "loss": 0.7524, "step": 13088 }, { "epoch": 0.75, "grad_norm": 0.2332084154602774, "learning_rate": 3.0557071896375824e-06, "loss": 0.1926, "step": 13089 }, { "epoch": 0.75, "grad_norm": 0.3755476958237447, "learning_rate": 3.054368269829654e-06, "loss": 0.2989, "step": 13090 }, { "epoch": 0.75, "grad_norm": 0.46469134330943285, "learning_rate": 3.0530295905486527e-06, "loss": 0.2978, "step": 13091 }, { "epoch": 0.75, "grad_norm": 0.4148719623586969, "learning_rate": 3.0516911518409387e-06, "loss": 0.3245, "step": 13092 }, { "epoch": 0.75, "grad_norm": 0.28103354751520443, "learning_rate": 3.0503529537528585e-06, "loss": 0.2178, "step": 13093 }, { "epoch": 0.75, "grad_norm": 1.1477225314022288, "learning_rate": 3.04901499633076e-06, "loss": 0.7313, "step": 13094 }, { "epoch": 0.75, "grad_norm": 0.6204403347702722, "learning_rate": 3.047677279620973e-06, "loss": 0.3122, "step": 13095 }, { "epoch": 0.75, "grad_norm": 0.3277311313482158, "learning_rate": 3.0463398036698222e-06, "loss": 0.2242, "step": 13096 }, { "epoch": 0.75, "grad_norm": 0.24065037007139628, "learning_rate": 3.0450025685236227e-06, "loss": 0.2127, "step": 13097 }, { "epoch": 0.75, "grad_norm": 1.3160438437281266, "learning_rate": 3.043665574228688e-06, "loss": 0.677, "step": 13098 }, { "epoch": 0.75, "grad_norm": 0.305915681969301, "learning_rate": 3.042328820831315e-06, "loss": 0.2111, "step": 13099 }, { "epoch": 0.75, "grad_norm": 0.47529902211212993, "learning_rate": 3.040992308377796e-06, "loss": 0.3485, "step": 13100 }, { "epoch": 0.75, "grad_norm": 0.4365889963384517, "learning_rate": 3.0396560369144145e-06, "loss": 0.3292, "step": 13101 }, { "epoch": 0.75, "grad_norm": 0.3014203717649501, "learning_rate": 3.038320006487445e-06, "loss": 0.2112, "step": 13102 }, { "epoch": 0.75, "grad_norm": 0.2850257890850647, "learning_rate": 3.036984217143154e-06, "loss": 0.1755, "step": 13103 }, { "epoch": 0.75, "grad_norm": 0.3429237306537405, "learning_rate": 3.0356486689278e-06, "loss": 0.3056, "step": 13104 }, { "epoch": 0.75, "grad_norm": 0.3024864803605085, "learning_rate": 3.034313361887631e-06, "loss": 0.242, "step": 13105 }, { "epoch": 0.75, "grad_norm": 0.8906428296674906, "learning_rate": 3.0329782960688926e-06, "loss": 0.3281, "step": 13106 }, { "epoch": 0.75, "grad_norm": 0.5836982031317877, "learning_rate": 3.031643471517817e-06, "loss": 0.3389, "step": 13107 }, { "epoch": 0.75, "grad_norm": 0.3551032461668801, "learning_rate": 3.0303088882806276e-06, "loss": 0.2684, "step": 13108 }, { "epoch": 0.75, "grad_norm": 0.2158504718862656, "learning_rate": 3.028974546403539e-06, "loss": 0.1674, "step": 13109 }, { "epoch": 0.75, "grad_norm": 0.7293506132465545, "learning_rate": 3.027640445932766e-06, "loss": 0.4317, "step": 13110 }, { "epoch": 0.75, "grad_norm": 0.36427266642415257, "learning_rate": 3.0263065869145035e-06, "loss": 0.26, "step": 13111 }, { "epoch": 0.75, "grad_norm": 0.5653448581871906, "learning_rate": 3.024972969394944e-06, "loss": 0.2791, "step": 13112 }, { "epoch": 0.75, "grad_norm": 0.4739343898820716, "learning_rate": 3.023639593420271e-06, "loss": 0.3312, "step": 13113 }, { "epoch": 0.75, "grad_norm": 0.36681901328564676, "learning_rate": 3.022306459036656e-06, "loss": 0.2701, "step": 13114 }, { "epoch": 0.75, "grad_norm": 0.29844337218234673, "learning_rate": 3.0209735662902706e-06, "loss": 0.1129, "step": 13115 }, { "epoch": 0.75, "grad_norm": 0.35840107821791856, "learning_rate": 3.019640915227271e-06, "loss": 0.2941, "step": 13116 }, { "epoch": 0.75, "grad_norm": 0.3529763050954908, "learning_rate": 3.0183085058938068e-06, "loss": 0.2667, "step": 13117 }, { "epoch": 0.75, "grad_norm": 0.8960601500229003, "learning_rate": 3.016976338336015e-06, "loss": 0.3917, "step": 13118 }, { "epoch": 0.75, "grad_norm": 0.8699740633932406, "learning_rate": 3.015644412600036e-06, "loss": 0.2787, "step": 13119 }, { "epoch": 0.75, "grad_norm": 0.316723083079564, "learning_rate": 3.0143127287319895e-06, "loss": 0.2392, "step": 13120 }, { "epoch": 0.75, "grad_norm": 0.2449187149952753, "learning_rate": 3.012981286777994e-06, "loss": 0.2233, "step": 13121 }, { "epoch": 0.75, "grad_norm": 1.1979911990400887, "learning_rate": 3.0116500867841525e-06, "loss": 0.4074, "step": 13122 }, { "epoch": 0.75, "grad_norm": 0.3560474829287622, "learning_rate": 3.0103191287965715e-06, "loss": 0.2798, "step": 13123 }, { "epoch": 0.75, "grad_norm": 0.9490243159614561, "learning_rate": 3.008988412861338e-06, "loss": 0.3888, "step": 13124 }, { "epoch": 0.75, "grad_norm": 0.34330007122672573, "learning_rate": 3.007657939024535e-06, "loss": 0.2458, "step": 13125 }, { "epoch": 0.75, "grad_norm": 0.31736090452794763, "learning_rate": 3.006327707332235e-06, "loss": 0.2582, "step": 13126 }, { "epoch": 0.75, "grad_norm": 0.4724601701873738, "learning_rate": 3.004997717830508e-06, "loss": 0.2203, "step": 13127 }, { "epoch": 0.75, "grad_norm": 0.2326895772207429, "learning_rate": 3.003667970565409e-06, "loss": 0.1987, "step": 13128 }, { "epoch": 0.75, "grad_norm": 0.3605528897187082, "learning_rate": 3.002338465582988e-06, "loss": 0.2826, "step": 13129 }, { "epoch": 0.75, "grad_norm": 0.9734472280455081, "learning_rate": 3.0010092029292835e-06, "loss": 0.3781, "step": 13130 }, { "epoch": 0.75, "grad_norm": 1.0805909522215216, "learning_rate": 2.9996801826503275e-06, "loss": 0.5344, "step": 13131 }, { "epoch": 0.75, "grad_norm": 0.2773513817656313, "learning_rate": 2.9983514047921493e-06, "loss": 0.1889, "step": 13132 }, { "epoch": 0.75, "grad_norm": 0.35829546695635645, "learning_rate": 2.9970228694007598e-06, "loss": 0.3274, "step": 13133 }, { "epoch": 0.75, "grad_norm": 0.4805023126371464, "learning_rate": 2.995694576522168e-06, "loss": 0.2938, "step": 13134 }, { "epoch": 0.75, "grad_norm": 0.32261178589923567, "learning_rate": 2.9943665262023714e-06, "loss": 0.2114, "step": 13135 }, { "epoch": 0.75, "grad_norm": 0.36506769087255786, "learning_rate": 2.993038718487361e-06, "loss": 0.3249, "step": 13136 }, { "epoch": 0.75, "grad_norm": 0.3643438925093802, "learning_rate": 2.991711153423118e-06, "loss": 0.215, "step": 13137 }, { "epoch": 0.75, "grad_norm": 0.33487426259480946, "learning_rate": 2.9903838310556133e-06, "loss": 0.1942, "step": 13138 }, { "epoch": 0.75, "grad_norm": 0.4488417837759212, "learning_rate": 2.989056751430819e-06, "loss": 0.2472, "step": 13139 }, { "epoch": 0.75, "grad_norm": 0.3362232531745725, "learning_rate": 2.987729914594687e-06, "loss": 0.3241, "step": 13140 }, { "epoch": 0.76, "grad_norm": 0.29966928249023517, "learning_rate": 2.9864033205931675e-06, "loss": 0.2217, "step": 13141 }, { "epoch": 0.76, "grad_norm": 0.863128083275095, "learning_rate": 2.9850769694721982e-06, "loss": 0.5081, "step": 13142 }, { "epoch": 0.76, "grad_norm": 0.39978155517146674, "learning_rate": 2.9837508612777087e-06, "loss": 0.2601, "step": 13143 }, { "epoch": 0.76, "grad_norm": 0.2719235906065331, "learning_rate": 2.9824249960556294e-06, "loss": 0.2627, "step": 13144 }, { "epoch": 0.76, "grad_norm": 0.5339971689860459, "learning_rate": 2.9810993738518702e-06, "loss": 0.2528, "step": 13145 }, { "epoch": 0.76, "grad_norm": 0.7843067034359767, "learning_rate": 2.9797739947123383e-06, "loss": 0.4102, "step": 13146 }, { "epoch": 0.76, "grad_norm": 0.3894676336923343, "learning_rate": 2.9784488586829272e-06, "loss": 0.2814, "step": 13147 }, { "epoch": 0.76, "grad_norm": 0.2722933405316551, "learning_rate": 2.9771239658095342e-06, "loss": 0.249, "step": 13148 }, { "epoch": 0.76, "grad_norm": 0.2812989717087355, "learning_rate": 2.975799316138035e-06, "loss": 0.187, "step": 13149 }, { "epoch": 0.76, "grad_norm": 0.39908579003212574, "learning_rate": 2.9744749097143046e-06, "loss": 0.2796, "step": 13150 }, { "epoch": 0.76, "grad_norm": 0.6090914983354809, "learning_rate": 2.9731507465842025e-06, "loss": 0.2293, "step": 13151 }, { "epoch": 0.76, "grad_norm": 0.4057610007364994, "learning_rate": 2.97182682679359e-06, "loss": 0.304, "step": 13152 }, { "epoch": 0.76, "grad_norm": 0.4026742727902014, "learning_rate": 2.970503150388313e-06, "loss": 0.3015, "step": 13153 }, { "epoch": 0.76, "grad_norm": 0.29655660277141555, "learning_rate": 2.96917971741421e-06, "loss": 0.1956, "step": 13154 }, { "epoch": 0.76, "grad_norm": 0.4737388090525767, "learning_rate": 2.9678565279171113e-06, "loss": 0.2305, "step": 13155 }, { "epoch": 0.76, "grad_norm": 0.25693414461923525, "learning_rate": 2.9665335819428354e-06, "loss": 0.2463, "step": 13156 }, { "epoch": 0.76, "grad_norm": 0.5539166843080455, "learning_rate": 2.9652108795372016e-06, "loss": 0.323, "step": 13157 }, { "epoch": 0.76, "grad_norm": 0.6583760802800553, "learning_rate": 2.963888420746013e-06, "loss": 0.2997, "step": 13158 }, { "epoch": 0.76, "grad_norm": 0.3184635760317434, "learning_rate": 2.962566205615065e-06, "loss": 0.2428, "step": 13159 }, { "epoch": 0.76, "grad_norm": 0.45352708893623567, "learning_rate": 2.9612442341901448e-06, "loss": 0.3603, "step": 13160 }, { "epoch": 0.76, "grad_norm": 0.22507968518561244, "learning_rate": 2.9599225065170356e-06, "loss": 0.1509, "step": 13161 }, { "epoch": 0.76, "grad_norm": 0.32301562997476935, "learning_rate": 2.9586010226415085e-06, "loss": 0.2435, "step": 13162 }, { "epoch": 0.76, "grad_norm": 0.9494394947854288, "learning_rate": 2.9572797826093256e-06, "loss": 0.4594, "step": 13163 }, { "epoch": 0.76, "grad_norm": 0.36294253708825114, "learning_rate": 2.9559587864662365e-06, "loss": 0.2539, "step": 13164 }, { "epoch": 0.76, "grad_norm": 0.6077925906601935, "learning_rate": 2.9546380342579962e-06, "loss": 0.3771, "step": 13165 }, { "epoch": 0.76, "grad_norm": 0.3826961493987988, "learning_rate": 2.953317526030337e-06, "loss": 0.3284, "step": 13166 }, { "epoch": 0.76, "grad_norm": 0.26520872395312056, "learning_rate": 2.9519972618289894e-06, "loss": 0.2096, "step": 13167 }, { "epoch": 0.76, "grad_norm": 0.3666621982903667, "learning_rate": 2.9506772416996732e-06, "loss": 0.1855, "step": 13168 }, { "epoch": 0.76, "grad_norm": 0.3930965638463575, "learning_rate": 2.9493574656881006e-06, "loss": 0.29, "step": 13169 }, { "epoch": 0.76, "grad_norm": 0.8084312110357765, "learning_rate": 2.9480379338399757e-06, "loss": 0.3901, "step": 13170 }, { "epoch": 0.76, "grad_norm": 0.3213576415786909, "learning_rate": 2.9467186462009943e-06, "loss": 0.1748, "step": 13171 }, { "epoch": 0.76, "grad_norm": 0.2886036323658359, "learning_rate": 2.94539960281684e-06, "loss": 0.2953, "step": 13172 }, { "epoch": 0.76, "grad_norm": 0.44952697496270905, "learning_rate": 2.944080803733197e-06, "loss": 0.2489, "step": 13173 }, { "epoch": 0.76, "grad_norm": 0.24913717295057006, "learning_rate": 2.942762248995733e-06, "loss": 0.1602, "step": 13174 }, { "epoch": 0.76, "grad_norm": 0.5314385966365966, "learning_rate": 2.9414439386501082e-06, "loss": 0.3329, "step": 13175 }, { "epoch": 0.76, "grad_norm": 0.3573835288969693, "learning_rate": 2.9401258727419723e-06, "loss": 0.3176, "step": 13176 }, { "epoch": 0.76, "grad_norm": 0.31895803358139446, "learning_rate": 2.938808051316978e-06, "loss": 0.1936, "step": 13177 }, { "epoch": 0.76, "grad_norm": 0.5156004074194172, "learning_rate": 2.937490474420758e-06, "loss": 0.3777, "step": 13178 }, { "epoch": 0.76, "grad_norm": 0.3266568107674821, "learning_rate": 2.9361731420989382e-06, "loss": 0.1728, "step": 13179 }, { "epoch": 0.76, "grad_norm": 0.2562266580469743, "learning_rate": 2.9348560543971383e-06, "loss": 0.2493, "step": 13180 }, { "epoch": 0.76, "grad_norm": 0.4862114642170611, "learning_rate": 2.933539211360966e-06, "loss": 0.2606, "step": 13181 }, { "epoch": 0.76, "grad_norm": 0.8436076426893645, "learning_rate": 2.932222613036032e-06, "loss": 0.4518, "step": 13182 }, { "epoch": 0.76, "grad_norm": 0.8093127081004269, "learning_rate": 2.930906259467924e-06, "loss": 0.3676, "step": 13183 }, { "epoch": 0.76, "grad_norm": 0.24389401278509923, "learning_rate": 2.9295901507022275e-06, "loss": 0.2305, "step": 13184 }, { "epoch": 0.76, "grad_norm": 0.5109376582936659, "learning_rate": 2.928274286784517e-06, "loss": 0.2928, "step": 13185 }, { "epoch": 0.76, "grad_norm": 0.6477468458115577, "learning_rate": 2.9269586677603677e-06, "loss": 0.3776, "step": 13186 }, { "epoch": 0.76, "grad_norm": 0.28910265146819936, "learning_rate": 2.9256432936753354e-06, "loss": 0.1995, "step": 13187 }, { "epoch": 0.76, "grad_norm": 0.350700075545137, "learning_rate": 2.924328164574972e-06, "loss": 0.3169, "step": 13188 }, { "epoch": 0.76, "grad_norm": 0.7686568143004432, "learning_rate": 2.923013280504816e-06, "loss": 0.4884, "step": 13189 }, { "epoch": 0.76, "grad_norm": 0.3587136303027198, "learning_rate": 2.9216986415104097e-06, "loss": 0.2156, "step": 13190 }, { "epoch": 0.76, "grad_norm": 0.7374740757701493, "learning_rate": 2.9203842476372747e-06, "loss": 0.3926, "step": 13191 }, { "epoch": 0.76, "grad_norm": 0.2838512447883044, "learning_rate": 2.9190700989309285e-06, "loss": 0.2682, "step": 13192 }, { "epoch": 0.76, "grad_norm": 0.290622311729339, "learning_rate": 2.9177561954368804e-06, "loss": 0.2525, "step": 13193 }, { "epoch": 0.76, "grad_norm": 0.5359851867679772, "learning_rate": 2.916442537200629e-06, "loss": 0.1526, "step": 13194 }, { "epoch": 0.76, "grad_norm": 0.37080513662796255, "learning_rate": 2.9151291242676692e-06, "loss": 0.2884, "step": 13195 }, { "epoch": 0.76, "grad_norm": 0.30793946557758933, "learning_rate": 2.9138159566834834e-06, "loss": 0.2773, "step": 13196 }, { "epoch": 0.76, "grad_norm": 0.8039012607707117, "learning_rate": 2.912503034493547e-06, "loss": 0.2705, "step": 13197 }, { "epoch": 0.76, "grad_norm": 0.5254037076250225, "learning_rate": 2.911190357743322e-06, "loss": 0.3449, "step": 13198 }, { "epoch": 0.76, "grad_norm": 0.387990735879651, "learning_rate": 2.909877926478274e-06, "loss": 0.3074, "step": 13199 }, { "epoch": 0.76, "grad_norm": 0.22161277090349127, "learning_rate": 2.9085657407438485e-06, "loss": 0.1872, "step": 13200 }, { "epoch": 0.76, "grad_norm": 0.5582834510824618, "learning_rate": 2.9072538005854855e-06, "loss": 0.3282, "step": 13201 }, { "epoch": 0.76, "grad_norm": 0.40851544103697307, "learning_rate": 2.9059421060486193e-06, "loss": 0.3156, "step": 13202 }, { "epoch": 0.76, "grad_norm": 0.35263245779401653, "learning_rate": 2.904630657178672e-06, "loss": 0.2656, "step": 13203 }, { "epoch": 0.76, "grad_norm": 0.6917076508307758, "learning_rate": 2.903319454021061e-06, "loss": 0.3521, "step": 13204 }, { "epoch": 0.76, "grad_norm": 0.34619152479873244, "learning_rate": 2.9020084966211913e-06, "loss": 0.2813, "step": 13205 }, { "epoch": 0.76, "grad_norm": 0.2818608501668898, "learning_rate": 2.900697785024459e-06, "loss": 0.1854, "step": 13206 }, { "epoch": 0.76, "grad_norm": 0.2772904606806348, "learning_rate": 2.89938731927626e-06, "loss": 0.2237, "step": 13207 }, { "epoch": 0.76, "grad_norm": 0.3456570326485675, "learning_rate": 2.8980770994219743e-06, "loss": 0.2759, "step": 13208 }, { "epoch": 0.76, "grad_norm": 0.6722228995159082, "learning_rate": 2.8967671255069717e-06, "loss": 0.3808, "step": 13209 }, { "epoch": 0.76, "grad_norm": 0.5544282846657669, "learning_rate": 2.8954573975766156e-06, "loss": 0.1777, "step": 13210 }, { "epoch": 0.76, "grad_norm": 0.33520447957453975, "learning_rate": 2.8941479156762675e-06, "loss": 0.2917, "step": 13211 }, { "epoch": 0.76, "grad_norm": 0.23424922457156772, "learning_rate": 2.892838679851272e-06, "loss": 0.1917, "step": 13212 }, { "epoch": 0.76, "grad_norm": 0.3888829363306553, "learning_rate": 2.891529690146966e-06, "loss": 0.2193, "step": 13213 }, { "epoch": 0.76, "grad_norm": 0.39275318593597286, "learning_rate": 2.8902209466086794e-06, "loss": 0.3079, "step": 13214 }, { "epoch": 0.76, "grad_norm": 0.6074206260942908, "learning_rate": 2.8889124492817377e-06, "loss": 0.3461, "step": 13215 }, { "epoch": 0.76, "grad_norm": 0.4121332734309223, "learning_rate": 2.887604198211453e-06, "loss": 0.2094, "step": 13216 }, { "epoch": 0.76, "grad_norm": 0.4086837165721845, "learning_rate": 2.886296193443129e-06, "loss": 0.2966, "step": 13217 }, { "epoch": 0.76, "grad_norm": 0.31241744718481923, "learning_rate": 2.8849884350220614e-06, "loss": 0.2266, "step": 13218 }, { "epoch": 0.76, "grad_norm": 0.3312463483694761, "learning_rate": 2.883680922993536e-06, "loss": 0.2859, "step": 13219 }, { "epoch": 0.76, "grad_norm": 0.3602296698578734, "learning_rate": 2.882373657402836e-06, "loss": 0.2236, "step": 13220 }, { "epoch": 0.76, "grad_norm": 0.6188669949771659, "learning_rate": 2.8810666382952314e-06, "loss": 0.3728, "step": 13221 }, { "epoch": 0.76, "grad_norm": 1.3765443199588423, "learning_rate": 2.879759865715982e-06, "loss": 0.5777, "step": 13222 }, { "epoch": 0.76, "grad_norm": 0.24491184844570574, "learning_rate": 2.87845333971034e-06, "loss": 0.2082, "step": 13223 }, { "epoch": 0.76, "grad_norm": 0.31949829603425767, "learning_rate": 2.877147060323555e-06, "loss": 0.2706, "step": 13224 }, { "epoch": 0.76, "grad_norm": 0.8517481211049055, "learning_rate": 2.875841027600862e-06, "loss": 0.4386, "step": 13225 }, { "epoch": 0.76, "grad_norm": 0.2866011692470818, "learning_rate": 2.8745352415874872e-06, "loss": 0.2287, "step": 13226 }, { "epoch": 0.76, "grad_norm": 0.3154714117522481, "learning_rate": 2.873229702328647e-06, "loss": 0.251, "step": 13227 }, { "epoch": 0.76, "grad_norm": 0.47557481422269005, "learning_rate": 2.8719244098695597e-06, "loss": 0.3048, "step": 13228 }, { "epoch": 0.76, "grad_norm": 0.28793328291945003, "learning_rate": 2.8706193642554237e-06, "loss": 0.1866, "step": 13229 }, { "epoch": 0.76, "grad_norm": 1.1838012367861193, "learning_rate": 2.8693145655314327e-06, "loss": 0.6212, "step": 13230 }, { "epoch": 0.76, "grad_norm": 0.3389412390749291, "learning_rate": 2.86801001374277e-06, "loss": 0.284, "step": 13231 }, { "epoch": 0.76, "grad_norm": 0.3654452826305494, "learning_rate": 2.8667057089346127e-06, "loss": 0.2889, "step": 13232 }, { "epoch": 0.76, "grad_norm": 0.14945398474158922, "learning_rate": 2.865401651152132e-06, "loss": 0.0971, "step": 13233 }, { "epoch": 0.76, "grad_norm": 0.8662144879164575, "learning_rate": 2.864097840440485e-06, "loss": 0.3408, "step": 13234 }, { "epoch": 0.76, "grad_norm": 0.33595790732355874, "learning_rate": 2.8627942768448234e-06, "loss": 0.2453, "step": 13235 }, { "epoch": 0.76, "grad_norm": 0.34996311712384964, "learning_rate": 2.861490960410289e-06, "loss": 0.2498, "step": 13236 }, { "epoch": 0.76, "grad_norm": 0.4919206950943628, "learning_rate": 2.8601878911820168e-06, "loss": 0.3234, "step": 13237 }, { "epoch": 0.76, "grad_norm": 0.3451700720073584, "learning_rate": 2.8588850692051296e-06, "loss": 0.2718, "step": 13238 }, { "epoch": 0.76, "grad_norm": 0.1936044629355527, "learning_rate": 2.857582494524742e-06, "loss": 0.1738, "step": 13239 }, { "epoch": 0.76, "grad_norm": 1.196637382273675, "learning_rate": 2.8562801671859697e-06, "loss": 0.5177, "step": 13240 }, { "epoch": 0.76, "grad_norm": 0.3138689667270411, "learning_rate": 2.8549780872339073e-06, "loss": 0.2528, "step": 13241 }, { "epoch": 0.76, "grad_norm": 0.7183731018697147, "learning_rate": 2.8536762547136464e-06, "loss": 0.3813, "step": 13242 }, { "epoch": 0.76, "grad_norm": 0.3149057673584101, "learning_rate": 2.85237466967027e-06, "loss": 0.2506, "step": 13243 }, { "epoch": 0.76, "grad_norm": 0.31630528529321505, "learning_rate": 2.851073332148848e-06, "loss": 0.2517, "step": 13244 }, { "epoch": 0.76, "grad_norm": 0.4393966081472135, "learning_rate": 2.849772242194453e-06, "loss": 0.2605, "step": 13245 }, { "epoch": 0.76, "grad_norm": 0.3954421837785276, "learning_rate": 2.8484713998521364e-06, "loss": 0.1002, "step": 13246 }, { "epoch": 0.76, "grad_norm": 0.25876846616548765, "learning_rate": 2.847170805166949e-06, "loss": 0.2416, "step": 13247 }, { "epoch": 0.76, "grad_norm": 0.47757103299646225, "learning_rate": 2.8458704581839247e-06, "loss": 0.3262, "step": 13248 }, { "epoch": 0.76, "grad_norm": 0.9076141858611277, "learning_rate": 2.844570358948103e-06, "loss": 0.3371, "step": 13249 }, { "epoch": 0.76, "grad_norm": 0.3070964861074088, "learning_rate": 2.843270507504502e-06, "loss": 0.2474, "step": 13250 }, { "epoch": 0.76, "grad_norm": 0.37473028523472246, "learning_rate": 2.8419709038981345e-06, "loss": 0.2904, "step": 13251 }, { "epoch": 0.76, "grad_norm": 0.1318432605062941, "learning_rate": 2.840671548174004e-06, "loss": 0.0704, "step": 13252 }, { "epoch": 0.76, "grad_norm": 0.39967919119913864, "learning_rate": 2.8393724403771137e-06, "loss": 0.2531, "step": 13253 }, { "epoch": 0.76, "grad_norm": 1.0046598432534393, "learning_rate": 2.8380735805524475e-06, "loss": 0.3925, "step": 13254 }, { "epoch": 0.76, "grad_norm": 0.3777098827872954, "learning_rate": 2.8367749687449853e-06, "loss": 0.334, "step": 13255 }, { "epoch": 0.76, "grad_norm": 0.3147497604198872, "learning_rate": 2.835476604999695e-06, "loss": 0.1906, "step": 13256 }, { "epoch": 0.76, "grad_norm": 0.4294056542690755, "learning_rate": 2.8341784893615443e-06, "loss": 0.2689, "step": 13257 }, { "epoch": 0.76, "grad_norm": 0.43842450172880576, "learning_rate": 2.8328806218754855e-06, "loss": 0.2174, "step": 13258 }, { "epoch": 0.76, "grad_norm": 0.25153714414036416, "learning_rate": 2.831583002586461e-06, "loss": 0.2005, "step": 13259 }, { "epoch": 0.76, "grad_norm": 0.5546683602382102, "learning_rate": 2.83028563153941e-06, "loss": 0.3272, "step": 13260 }, { "epoch": 0.76, "grad_norm": 1.3431270553899364, "learning_rate": 2.8289885087792557e-06, "loss": 0.6881, "step": 13261 }, { "epoch": 0.76, "grad_norm": 0.2682221801490365, "learning_rate": 2.827691634350924e-06, "loss": 0.2002, "step": 13262 }, { "epoch": 0.76, "grad_norm": 0.3618717416925984, "learning_rate": 2.826395008299323e-06, "loss": 0.29, "step": 13263 }, { "epoch": 0.76, "grad_norm": 0.3130034952304038, "learning_rate": 2.8250986306693553e-06, "loss": 0.1539, "step": 13264 }, { "epoch": 0.76, "grad_norm": 0.3121843996017311, "learning_rate": 2.823802501505909e-06, "loss": 0.2223, "step": 13265 }, { "epoch": 0.76, "grad_norm": 0.7111069282689343, "learning_rate": 2.8225066208538765e-06, "loss": 0.4252, "step": 13266 }, { "epoch": 0.76, "grad_norm": 0.3587339369972459, "learning_rate": 2.821210988758132e-06, "loss": 0.3243, "step": 13267 }, { "epoch": 0.76, "grad_norm": 0.31053986252393684, "learning_rate": 2.8199156052635412e-06, "loss": 0.2829, "step": 13268 }, { "epoch": 0.76, "grad_norm": 1.3607437392178636, "learning_rate": 2.8186204704149643e-06, "loss": 0.2799, "step": 13269 }, { "epoch": 0.76, "grad_norm": 0.259545723920443, "learning_rate": 2.817325584257252e-06, "loss": 0.2053, "step": 13270 }, { "epoch": 0.76, "grad_norm": 0.36841259735356224, "learning_rate": 2.8160309468352465e-06, "loss": 0.3017, "step": 13271 }, { "epoch": 0.76, "grad_norm": 0.38905719201812056, "learning_rate": 2.81473655819378e-06, "loss": 0.2515, "step": 13272 }, { "epoch": 0.76, "grad_norm": 1.0396985241949528, "learning_rate": 2.813442418377674e-06, "loss": 0.7514, "step": 13273 }, { "epoch": 0.76, "grad_norm": 0.39836820205825924, "learning_rate": 2.812148527431752e-06, "loss": 0.2428, "step": 13274 }, { "epoch": 0.76, "grad_norm": 0.305449725807176, "learning_rate": 2.8108548854008166e-06, "loss": 0.242, "step": 13275 }, { "epoch": 0.76, "grad_norm": 0.44681122919529676, "learning_rate": 2.8095614923296676e-06, "loss": 0.2259, "step": 13276 }, { "epoch": 0.76, "grad_norm": 0.3816614658218707, "learning_rate": 2.8082683482630912e-06, "loss": 0.2823, "step": 13277 }, { "epoch": 0.76, "grad_norm": 0.2946987437084205, "learning_rate": 2.806975453245877e-06, "loss": 0.2036, "step": 13278 }, { "epoch": 0.76, "grad_norm": 0.49942919812183445, "learning_rate": 2.8056828073227925e-06, "loss": 0.3453, "step": 13279 }, { "epoch": 0.76, "grad_norm": 0.40850999837166446, "learning_rate": 2.804390410538603e-06, "loss": 0.3256, "step": 13280 }, { "epoch": 0.76, "grad_norm": 0.5696317559539408, "learning_rate": 2.803098262938062e-06, "loss": 0.3323, "step": 13281 }, { "epoch": 0.76, "grad_norm": 0.4290707314341114, "learning_rate": 2.801806364565921e-06, "loss": 0.2762, "step": 13282 }, { "epoch": 0.76, "grad_norm": 0.26581716980266745, "learning_rate": 2.8005147154669166e-06, "loss": 0.2421, "step": 13283 }, { "epoch": 0.76, "grad_norm": 0.299496740652704, "learning_rate": 2.7992233156857784e-06, "loss": 0.1897, "step": 13284 }, { "epoch": 0.76, "grad_norm": 1.0457263308117586, "learning_rate": 2.7979321652672266e-06, "loss": 0.5193, "step": 13285 }, { "epoch": 0.76, "grad_norm": 0.3313512064073587, "learning_rate": 2.79664126425597e-06, "loss": 0.2552, "step": 13286 }, { "epoch": 0.76, "grad_norm": 0.3630065685033784, "learning_rate": 2.795350612696721e-06, "loss": 0.276, "step": 13287 }, { "epoch": 0.76, "grad_norm": 0.6448201348910892, "learning_rate": 2.794060210634171e-06, "loss": 0.2862, "step": 13288 }, { "epoch": 0.76, "grad_norm": 0.6742071686259051, "learning_rate": 2.7927700581130046e-06, "loss": 0.3216, "step": 13289 }, { "epoch": 0.76, "grad_norm": 0.2330554908362521, "learning_rate": 2.7914801551778994e-06, "loss": 0.2058, "step": 13290 }, { "epoch": 0.76, "grad_norm": 0.31746453612708525, "learning_rate": 2.7901905018735287e-06, "loss": 0.2666, "step": 13291 }, { "epoch": 0.76, "grad_norm": 0.6076098034746343, "learning_rate": 2.7889010982445508e-06, "loss": 0.3607, "step": 13292 }, { "epoch": 0.76, "grad_norm": 0.3652652449404285, "learning_rate": 2.7876119443356177e-06, "loss": 0.3044, "step": 13293 }, { "epoch": 0.76, "grad_norm": 0.441685084264975, "learning_rate": 2.7863230401913698e-06, "loss": 0.3271, "step": 13294 }, { "epoch": 0.76, "grad_norm": 0.28157585778291394, "learning_rate": 2.7850343858564487e-06, "loss": 0.1958, "step": 13295 }, { "epoch": 0.76, "grad_norm": 0.2538193604013508, "learning_rate": 2.7837459813754765e-06, "loss": 0.1991, "step": 13296 }, { "epoch": 0.76, "grad_norm": 1.1707178137167724, "learning_rate": 2.782457826793069e-06, "loss": 0.6914, "step": 13297 }, { "epoch": 0.76, "grad_norm": 0.4149696440496968, "learning_rate": 2.781169922153838e-06, "loss": 0.1949, "step": 13298 }, { "epoch": 0.76, "grad_norm": 0.2861185610671488, "learning_rate": 2.7798822675023795e-06, "loss": 0.2823, "step": 13299 }, { "epoch": 0.76, "grad_norm": 0.7178983508096588, "learning_rate": 2.7785948628832904e-06, "loss": 0.3858, "step": 13300 }, { "epoch": 0.76, "grad_norm": 0.4103924325196549, "learning_rate": 2.7773077083411502e-06, "loss": 0.1833, "step": 13301 }, { "epoch": 0.76, "grad_norm": 0.244686535238677, "learning_rate": 2.776020803920533e-06, "loss": 0.1998, "step": 13302 }, { "epoch": 0.76, "grad_norm": 0.355293710352619, "learning_rate": 2.774734149666005e-06, "loss": 0.3177, "step": 13303 }, { "epoch": 0.76, "grad_norm": 0.2862677955158507, "learning_rate": 2.773447745622123e-06, "loss": 0.2068, "step": 13304 }, { "epoch": 0.76, "grad_norm": 0.6852767845171588, "learning_rate": 2.7721615918334355e-06, "loss": 0.4025, "step": 13305 }, { "epoch": 0.76, "grad_norm": 0.36885958549727094, "learning_rate": 2.7708756883444776e-06, "loss": 0.297, "step": 13306 }, { "epoch": 0.76, "grad_norm": 0.5868284645412355, "learning_rate": 2.7695900351997864e-06, "loss": 0.3458, "step": 13307 }, { "epoch": 0.76, "grad_norm": 0.27339180895296544, "learning_rate": 2.7683046324438822e-06, "loss": 0.1766, "step": 13308 }, { "epoch": 0.76, "grad_norm": 0.36055577891955803, "learning_rate": 2.7670194801212768e-06, "loss": 0.2466, "step": 13309 }, { "epoch": 0.76, "grad_norm": 0.41817920256675367, "learning_rate": 2.7657345782764765e-06, "loss": 0.3027, "step": 13310 }, { "epoch": 0.76, "grad_norm": 0.2847934263341108, "learning_rate": 2.7644499269539728e-06, "loss": 0.242, "step": 13311 }, { "epoch": 0.76, "grad_norm": 0.9291208397012233, "learning_rate": 2.7631655261982605e-06, "loss": 0.4407, "step": 13312 }, { "epoch": 0.76, "grad_norm": 0.7871185563291784, "learning_rate": 2.7618813760538145e-06, "loss": 0.3596, "step": 13313 }, { "epoch": 0.76, "grad_norm": 0.2395530050943581, "learning_rate": 2.7605974765651057e-06, "loss": 0.2066, "step": 13314 }, { "epoch": 0.77, "grad_norm": 0.43243853087168727, "learning_rate": 2.759313827776592e-06, "loss": 0.2994, "step": 13315 }, { "epoch": 0.77, "grad_norm": 0.499861531513214, "learning_rate": 2.758030429732732e-06, "loss": 0.3264, "step": 13316 }, { "epoch": 0.77, "grad_norm": 0.4019932945242539, "learning_rate": 2.7567472824779663e-06, "loss": 0.2627, "step": 13317 }, { "epoch": 0.77, "grad_norm": 0.35582583706274945, "learning_rate": 2.7554643860567308e-06, "loss": 0.2845, "step": 13318 }, { "epoch": 0.77, "grad_norm": 0.5481213981526601, "learning_rate": 2.75418174051345e-06, "loss": 0.2395, "step": 13319 }, { "epoch": 0.77, "grad_norm": 0.41611532347043945, "learning_rate": 2.7528993458925457e-06, "loss": 0.3025, "step": 13320 }, { "epoch": 0.77, "grad_norm": 0.3799576843178077, "learning_rate": 2.751617202238427e-06, "loss": 0.1868, "step": 13321 }, { "epoch": 0.77, "grad_norm": 0.2908916015181389, "learning_rate": 2.750335309595491e-06, "loss": 0.2636, "step": 13322 }, { "epoch": 0.77, "grad_norm": 0.43567171245253916, "learning_rate": 2.7490536680081325e-06, "loss": 0.2941, "step": 13323 }, { "epoch": 0.77, "grad_norm": 0.4234412711982623, "learning_rate": 2.7477722775207303e-06, "loss": 0.1738, "step": 13324 }, { "epoch": 0.77, "grad_norm": 0.5576484292752899, "learning_rate": 2.746491138177666e-06, "loss": 0.2826, "step": 13325 }, { "epoch": 0.77, "grad_norm": 0.3903393434473755, "learning_rate": 2.745210250023301e-06, "loss": 0.3149, "step": 13326 }, { "epoch": 0.77, "grad_norm": 0.34900301912911175, "learning_rate": 2.743929613101993e-06, "loss": 0.2765, "step": 13327 }, { "epoch": 0.77, "grad_norm": 0.8648744675226305, "learning_rate": 2.7426492274580883e-06, "loss": 0.3671, "step": 13328 }, { "epoch": 0.77, "grad_norm": 0.3467725051148224, "learning_rate": 2.7413690931359316e-06, "loss": 0.3054, "step": 13329 }, { "epoch": 0.77, "grad_norm": 0.22494972847791722, "learning_rate": 2.7400892101798504e-06, "loss": 0.2063, "step": 13330 }, { "epoch": 0.77, "grad_norm": 0.7541721363633993, "learning_rate": 2.7388095786341682e-06, "loss": 0.1194, "step": 13331 }, { "epoch": 0.77, "grad_norm": 0.36934648699727984, "learning_rate": 2.7375301985431947e-06, "loss": 0.2673, "step": 13332 }, { "epoch": 0.77, "grad_norm": 0.8084421696547506, "learning_rate": 2.736251069951241e-06, "loss": 0.4386, "step": 13333 }, { "epoch": 0.77, "grad_norm": 0.29545160567203277, "learning_rate": 2.734972192902601e-06, "loss": 0.2284, "step": 13334 }, { "epoch": 0.77, "grad_norm": 0.36633179660452714, "learning_rate": 2.733693567441561e-06, "loss": 0.3288, "step": 13335 }, { "epoch": 0.77, "grad_norm": 0.24642775837098382, "learning_rate": 2.732415193612401e-06, "loss": 0.1621, "step": 13336 }, { "epoch": 0.77, "grad_norm": 0.5111131791764394, "learning_rate": 2.73113707145939e-06, "loss": 0.1199, "step": 13337 }, { "epoch": 0.77, "grad_norm": 0.37770395293264974, "learning_rate": 2.7298592010267887e-06, "loss": 0.3027, "step": 13338 }, { "epoch": 0.77, "grad_norm": 0.3905628714529268, "learning_rate": 2.7285815823588513e-06, "loss": 0.2994, "step": 13339 }, { "epoch": 0.77, "grad_norm": 0.5113595561516364, "learning_rate": 2.7273042154998188e-06, "loss": 0.2487, "step": 13340 }, { "epoch": 0.77, "grad_norm": 0.40329210177460667, "learning_rate": 2.726027100493931e-06, "loss": 0.3088, "step": 13341 }, { "epoch": 0.77, "grad_norm": 0.2326471863617773, "learning_rate": 2.724750237385412e-06, "loss": 0.2229, "step": 13342 }, { "epoch": 0.77, "grad_norm": 1.0041471856553938, "learning_rate": 2.723473626218479e-06, "loss": 0.4367, "step": 13343 }, { "epoch": 0.77, "grad_norm": 0.3045613574682275, "learning_rate": 2.722197267037339e-06, "loss": 0.1968, "step": 13344 }, { "epoch": 0.77, "grad_norm": 0.5701953085284389, "learning_rate": 2.7209211598861975e-06, "loss": 0.3772, "step": 13345 }, { "epoch": 0.77, "grad_norm": 0.3510620371677265, "learning_rate": 2.719645304809242e-06, "loss": 0.294, "step": 13346 }, { "epoch": 0.77, "grad_norm": 0.2921036385897743, "learning_rate": 2.7183697018506584e-06, "loss": 0.2072, "step": 13347 }, { "epoch": 0.77, "grad_norm": 0.2440216161862196, "learning_rate": 2.7170943510546177e-06, "loss": 0.1587, "step": 13348 }, { "epoch": 0.77, "grad_norm": 0.822130467598231, "learning_rate": 2.715819252465284e-06, "loss": 0.5094, "step": 13349 }, { "epoch": 0.77, "grad_norm": 0.24005749395345125, "learning_rate": 2.714544406126819e-06, "loss": 0.2119, "step": 13350 }, { "epoch": 0.77, "grad_norm": 0.6209963089409438, "learning_rate": 2.713269812083369e-06, "loss": 0.3862, "step": 13351 }, { "epoch": 0.77, "grad_norm": 1.0539666138783272, "learning_rate": 2.711995470379071e-06, "loss": 0.6506, "step": 13352 }, { "epoch": 0.77, "grad_norm": 0.33989522147319307, "learning_rate": 2.7107213810580536e-06, "loss": 0.1925, "step": 13353 }, { "epoch": 0.77, "grad_norm": 0.2547793738056594, "learning_rate": 2.709447544164444e-06, "loss": 0.2418, "step": 13354 }, { "epoch": 0.77, "grad_norm": 0.3889272161112719, "learning_rate": 2.708173959742353e-06, "loss": 0.2009, "step": 13355 }, { "epoch": 0.77, "grad_norm": 0.36124339525567295, "learning_rate": 2.7069006278358844e-06, "loss": 0.2852, "step": 13356 }, { "epoch": 0.77, "grad_norm": 0.866977479010928, "learning_rate": 2.70562754848913e-06, "loss": 0.29, "step": 13357 }, { "epoch": 0.77, "grad_norm": 0.32296997474230627, "learning_rate": 2.704354721746183e-06, "loss": 0.2969, "step": 13358 }, { "epoch": 0.77, "grad_norm": 0.34896980548098877, "learning_rate": 2.703082147651118e-06, "loss": 0.2693, "step": 13359 }, { "epoch": 0.77, "grad_norm": 0.2370600806689802, "learning_rate": 2.7018098262480053e-06, "loss": 0.103, "step": 13360 }, { "epoch": 0.77, "grad_norm": 0.5435084819332936, "learning_rate": 2.700537757580901e-06, "loss": 0.3418, "step": 13361 }, { "epoch": 0.77, "grad_norm": 0.31758347380995683, "learning_rate": 2.699265941693863e-06, "loss": 0.2566, "step": 13362 }, { "epoch": 0.77, "grad_norm": 0.4792746559962163, "learning_rate": 2.6979943786309315e-06, "loss": 0.2685, "step": 13363 }, { "epoch": 0.77, "grad_norm": 1.2601019771202395, "learning_rate": 2.6967230684361413e-06, "loss": 0.7461, "step": 13364 }, { "epoch": 0.77, "grad_norm": 0.3264807957462211, "learning_rate": 2.6954520111535166e-06, "loss": 0.251, "step": 13365 }, { "epoch": 0.77, "grad_norm": 0.23864470808248076, "learning_rate": 2.694181206827071e-06, "loss": 0.1862, "step": 13366 }, { "epoch": 0.77, "grad_norm": 0.6936911184017519, "learning_rate": 2.69291065550082e-06, "loss": 0.3615, "step": 13367 }, { "epoch": 0.77, "grad_norm": 0.311294629785046, "learning_rate": 2.691640357218759e-06, "loss": 0.2626, "step": 13368 }, { "epoch": 0.77, "grad_norm": 1.1184392357841415, "learning_rate": 2.690370312024878e-06, "loss": 0.4929, "step": 13369 }, { "epoch": 0.77, "grad_norm": 0.32713334536191796, "learning_rate": 2.6891005199631558e-06, "loss": 0.2735, "step": 13370 }, { "epoch": 0.77, "grad_norm": 0.3248615650269381, "learning_rate": 2.6878309810775738e-06, "loss": 0.2617, "step": 13371 }, { "epoch": 0.77, "grad_norm": 1.4369216717341033, "learning_rate": 2.6865616954120878e-06, "loss": 0.5723, "step": 13372 }, { "epoch": 0.77, "grad_norm": 0.5156635728031603, "learning_rate": 2.6852926630106558e-06, "loss": 0.2641, "step": 13373 }, { "epoch": 0.77, "grad_norm": 0.3373075279153563, "learning_rate": 2.6840238839172206e-06, "loss": 0.2585, "step": 13374 }, { "epoch": 0.77, "grad_norm": 0.36128908717495617, "learning_rate": 2.682755358175728e-06, "loss": 0.1984, "step": 13375 }, { "epoch": 0.77, "grad_norm": 0.71767892274232, "learning_rate": 2.6814870858301013e-06, "loss": 0.2892, "step": 13376 }, { "epoch": 0.77, "grad_norm": 0.36937216939202855, "learning_rate": 2.6802190669242634e-06, "loss": 0.2787, "step": 13377 }, { "epoch": 0.77, "grad_norm": 0.3642005381982544, "learning_rate": 2.6789513015021207e-06, "loss": 0.2875, "step": 13378 }, { "epoch": 0.77, "grad_norm": 0.44540040503421996, "learning_rate": 2.6776837896075824e-06, "loss": 0.2292, "step": 13379 }, { "epoch": 0.77, "grad_norm": 0.33180184811006774, "learning_rate": 2.6764165312845402e-06, "loss": 0.2614, "step": 13380 }, { "epoch": 0.77, "grad_norm": 0.32777961922851084, "learning_rate": 2.675149526576879e-06, "loss": 0.2594, "step": 13381 }, { "epoch": 0.77, "grad_norm": 0.4898636512120215, "learning_rate": 2.67388277552847e-06, "loss": 0.3877, "step": 13382 }, { "epoch": 0.77, "grad_norm": 0.2721793650080163, "learning_rate": 2.67261627818319e-06, "loss": 0.197, "step": 13383 }, { "epoch": 0.77, "grad_norm": 0.7355116149856278, "learning_rate": 2.671350034584893e-06, "loss": 0.3658, "step": 13384 }, { "epoch": 0.77, "grad_norm": 0.47438055021689923, "learning_rate": 2.670084044777429e-06, "loss": 0.3412, "step": 13385 }, { "epoch": 0.77, "grad_norm": 0.2641302345028114, "learning_rate": 2.668818308804636e-06, "loss": 0.2146, "step": 13386 }, { "epoch": 0.77, "grad_norm": 0.25613544160514196, "learning_rate": 2.6675528267103534e-06, "loss": 0.1642, "step": 13387 }, { "epoch": 0.77, "grad_norm": 1.011992324523561, "learning_rate": 2.6662875985384007e-06, "loss": 0.6767, "step": 13388 }, { "epoch": 0.77, "grad_norm": 0.3127942387817402, "learning_rate": 2.665022624332593e-06, "loss": 0.2063, "step": 13389 }, { "epoch": 0.77, "grad_norm": 0.3537706267658949, "learning_rate": 2.6637579041367357e-06, "loss": 0.2924, "step": 13390 }, { "epoch": 0.77, "grad_norm": 0.71475916044793, "learning_rate": 2.6624934379946243e-06, "loss": 0.3884, "step": 13391 }, { "epoch": 0.77, "grad_norm": 0.21694765840296173, "learning_rate": 2.661229225950054e-06, "loss": 0.1554, "step": 13392 }, { "epoch": 0.77, "grad_norm": 0.35457329355787714, "learning_rate": 2.659965268046798e-06, "loss": 0.2834, "step": 13393 }, { "epoch": 0.77, "grad_norm": 0.4947678076948455, "learning_rate": 2.6587015643286295e-06, "loss": 0.3978, "step": 13394 }, { "epoch": 0.77, "grad_norm": 0.5838803034932754, "learning_rate": 2.657438114839308e-06, "loss": 0.3296, "step": 13395 }, { "epoch": 0.77, "grad_norm": 0.3963018840650698, "learning_rate": 2.6561749196225915e-06, "loss": 0.2608, "step": 13396 }, { "epoch": 0.77, "grad_norm": 0.3411749326825493, "learning_rate": 2.654911978722222e-06, "loss": 0.2992, "step": 13397 }, { "epoch": 0.77, "grad_norm": 0.4121763965343542, "learning_rate": 2.6536492921819346e-06, "loss": 0.2847, "step": 13398 }, { "epoch": 0.77, "grad_norm": 0.23315859992533955, "learning_rate": 2.6523868600454526e-06, "loss": 0.143, "step": 13399 }, { "epoch": 0.77, "grad_norm": 1.2290226217364666, "learning_rate": 2.6511246823565016e-06, "loss": 0.6549, "step": 13400 }, { "epoch": 0.77, "grad_norm": 0.48684221439801073, "learning_rate": 2.649862759158787e-06, "loss": 0.2985, "step": 13401 }, { "epoch": 0.77, "grad_norm": 0.27189307366780724, "learning_rate": 2.648601090496008e-06, "loss": 0.2455, "step": 13402 }, { "epoch": 0.77, "grad_norm": 1.2851418916503219, "learning_rate": 2.6473396764118575e-06, "loss": 0.5728, "step": 13403 }, { "epoch": 0.77, "grad_norm": 0.3704241967181858, "learning_rate": 2.646078516950018e-06, "loss": 0.2355, "step": 13404 }, { "epoch": 0.77, "grad_norm": 0.32679798644361935, "learning_rate": 2.6448176121541634e-06, "loss": 0.2539, "step": 13405 }, { "epoch": 0.77, "grad_norm": 0.42009343086991197, "learning_rate": 2.643556962067958e-06, "loss": 0.2625, "step": 13406 }, { "epoch": 0.77, "grad_norm": 0.3364463651284445, "learning_rate": 2.6422965667350566e-06, "loss": 0.2599, "step": 13407 }, { "epoch": 0.77, "grad_norm": 0.4603678526160474, "learning_rate": 2.6410364261991108e-06, "loss": 0.2815, "step": 13408 }, { "epoch": 0.77, "grad_norm": 0.34026034801807636, "learning_rate": 2.6397765405037577e-06, "loss": 0.2485, "step": 13409 }, { "epoch": 0.77, "grad_norm": 0.32456868640721753, "learning_rate": 2.6385169096926265e-06, "loss": 0.2346, "step": 13410 }, { "epoch": 0.77, "grad_norm": 0.49966563733385355, "learning_rate": 2.637257533809334e-06, "loss": 0.2526, "step": 13411 }, { "epoch": 0.77, "grad_norm": 0.6283074877544407, "learning_rate": 2.6359984128975013e-06, "loss": 0.33, "step": 13412 }, { "epoch": 0.77, "grad_norm": 0.36892761734499885, "learning_rate": 2.6347395470007254e-06, "loss": 0.2784, "step": 13413 }, { "epoch": 0.77, "grad_norm": 0.27568567981994324, "learning_rate": 2.6334809361626034e-06, "loss": 0.2485, "step": 13414 }, { "epoch": 0.77, "grad_norm": 1.3477396319559145, "learning_rate": 2.632222580426719e-06, "loss": 0.2197, "step": 13415 }, { "epoch": 0.77, "grad_norm": 0.5419492347153053, "learning_rate": 2.6309644798366474e-06, "loss": 0.2762, "step": 13416 }, { "epoch": 0.77, "grad_norm": 0.3174658239757126, "learning_rate": 2.6297066344359612e-06, "loss": 0.2882, "step": 13417 }, { "epoch": 0.77, "grad_norm": 0.47016789004810144, "learning_rate": 2.6284490442682186e-06, "loss": 0.3389, "step": 13418 }, { "epoch": 0.77, "grad_norm": 0.3946797301594869, "learning_rate": 2.6271917093769673e-06, "loss": 0.2183, "step": 13419 }, { "epoch": 0.77, "grad_norm": 0.23602412255899952, "learning_rate": 2.6259346298057476e-06, "loss": 0.1842, "step": 13420 }, { "epoch": 0.77, "grad_norm": 0.34069892961807774, "learning_rate": 2.6246778055980983e-06, "loss": 0.2678, "step": 13421 }, { "epoch": 0.77, "grad_norm": 0.5221130511938119, "learning_rate": 2.6234212367975375e-06, "loss": 0.1991, "step": 13422 }, { "epoch": 0.77, "grad_norm": 0.6077899342263603, "learning_rate": 2.6221649234475845e-06, "loss": 0.4096, "step": 13423 }, { "epoch": 0.77, "grad_norm": 0.6059573421941735, "learning_rate": 2.620908865591738e-06, "loss": 0.3912, "step": 13424 }, { "epoch": 0.77, "grad_norm": 0.25779870510643993, "learning_rate": 2.619653063273504e-06, "loss": 0.2187, "step": 13425 }, { "epoch": 0.77, "grad_norm": 0.3211535151394236, "learning_rate": 2.618397516536367e-06, "loss": 0.2455, "step": 13426 }, { "epoch": 0.77, "grad_norm": 0.42875680879462036, "learning_rate": 2.6171422254238067e-06, "loss": 0.2418, "step": 13427 }, { "epoch": 0.77, "grad_norm": 0.4063859681100399, "learning_rate": 2.6158871899792927e-06, "loss": 0.1835, "step": 13428 }, { "epoch": 0.77, "grad_norm": 0.3628029769796745, "learning_rate": 2.6146324102462862e-06, "loss": 0.2992, "step": 13429 }, { "epoch": 0.77, "grad_norm": 0.5550301775668914, "learning_rate": 2.6133778862682433e-06, "loss": 0.3215, "step": 13430 }, { "epoch": 0.77, "grad_norm": 0.7979129683971643, "learning_rate": 2.612123618088608e-06, "loss": 0.4975, "step": 13431 }, { "epoch": 0.77, "grad_norm": 0.1949401131221945, "learning_rate": 2.610869605750813e-06, "loss": 0.138, "step": 13432 }, { "epoch": 0.77, "grad_norm": 0.35522383806764823, "learning_rate": 2.6096158492982837e-06, "loss": 0.2854, "step": 13433 }, { "epoch": 0.77, "grad_norm": 0.8215836187570505, "learning_rate": 2.6083623487744423e-06, "loss": 0.4089, "step": 13434 }, { "epoch": 0.77, "grad_norm": 0.32566860474390014, "learning_rate": 2.6071091042226947e-06, "loss": 0.2306, "step": 13435 }, { "epoch": 0.77, "grad_norm": 0.7199078363127668, "learning_rate": 2.6058561156864415e-06, "loss": 0.4138, "step": 13436 }, { "epoch": 0.77, "grad_norm": 0.3209376148836106, "learning_rate": 2.60460338320907e-06, "loss": 0.3103, "step": 13437 }, { "epoch": 0.77, "grad_norm": 0.25042851717939346, "learning_rate": 2.603350906833971e-06, "loss": 0.1709, "step": 13438 }, { "epoch": 0.77, "grad_norm": 0.482862170243484, "learning_rate": 2.6020986866045085e-06, "loss": 0.2197, "step": 13439 }, { "epoch": 0.77, "grad_norm": 0.5706303570276031, "learning_rate": 2.600846722564051e-06, "loss": 0.2956, "step": 13440 }, { "epoch": 0.77, "grad_norm": 0.2703381171096475, "learning_rate": 2.59959501475595e-06, "loss": 0.233, "step": 13441 }, { "epoch": 0.77, "grad_norm": 0.8217067922733584, "learning_rate": 2.5983435632235586e-06, "loss": 0.4331, "step": 13442 }, { "epoch": 0.77, "grad_norm": 0.4817791637134085, "learning_rate": 2.597092368010212e-06, "loss": 0.2811, "step": 13443 }, { "epoch": 0.77, "grad_norm": 0.42074169351000873, "learning_rate": 2.5958414291592384e-06, "loss": 0.3039, "step": 13444 }, { "epoch": 0.77, "grad_norm": 0.22653893104331904, "learning_rate": 2.594590746713953e-06, "loss": 0.1724, "step": 13445 }, { "epoch": 0.77, "grad_norm": 0.5405885473781558, "learning_rate": 2.5933403207176766e-06, "loss": 0.2775, "step": 13446 }, { "epoch": 0.77, "grad_norm": 0.3693178605084256, "learning_rate": 2.5920901512137052e-06, "loss": 0.296, "step": 13447 }, { "epoch": 0.77, "grad_norm": 0.5107901097900251, "learning_rate": 2.5908402382453337e-06, "loss": 0.291, "step": 13448 }, { "epoch": 0.77, "grad_norm": 0.39069716983926167, "learning_rate": 2.589590581855843e-06, "loss": 0.2842, "step": 13449 }, { "epoch": 0.77, "grad_norm": 0.3709867346737115, "learning_rate": 2.588341182088514e-06, "loss": 0.2768, "step": 13450 }, { "epoch": 0.77, "grad_norm": 0.30386942428806196, "learning_rate": 2.587092038986613e-06, "loss": 0.1273, "step": 13451 }, { "epoch": 0.77, "grad_norm": 0.5989646935983985, "learning_rate": 2.5858431525933946e-06, "loss": 0.3982, "step": 13452 }, { "epoch": 0.77, "grad_norm": 0.24380382171509266, "learning_rate": 2.5845945229521095e-06, "loss": 0.2705, "step": 13453 }, { "epoch": 0.77, "grad_norm": 0.7642863991718087, "learning_rate": 2.5833461501059933e-06, "loss": 0.3247, "step": 13454 }, { "epoch": 0.77, "grad_norm": 0.7580810104910045, "learning_rate": 2.5820980340982847e-06, "loss": 0.4048, "step": 13455 }, { "epoch": 0.77, "grad_norm": 0.30818453796959994, "learning_rate": 2.5808501749722024e-06, "loss": 0.2299, "step": 13456 }, { "epoch": 0.77, "grad_norm": 0.27604039325913704, "learning_rate": 2.5796025727709595e-06, "loss": 0.2518, "step": 13457 }, { "epoch": 0.77, "grad_norm": 0.8081791607239174, "learning_rate": 2.5783552275377567e-06, "loss": 0.2606, "step": 13458 }, { "epoch": 0.77, "grad_norm": 0.374970126576345, "learning_rate": 2.577108139315797e-06, "loss": 0.2767, "step": 13459 }, { "epoch": 0.77, "grad_norm": 1.1719009812907486, "learning_rate": 2.575861308148263e-06, "loss": 0.748, "step": 13460 }, { "epoch": 0.77, "grad_norm": 0.28830064194617916, "learning_rate": 2.574614734078332e-06, "loss": 0.2245, "step": 13461 }, { "epoch": 0.77, "grad_norm": 0.40624340668557385, "learning_rate": 2.5733684171491713e-06, "loss": 0.3191, "step": 13462 }, { "epoch": 0.77, "grad_norm": 0.31625190451738877, "learning_rate": 2.5721223574039466e-06, "loss": 0.1969, "step": 13463 }, { "epoch": 0.77, "grad_norm": 0.42735286500023417, "learning_rate": 2.570876554885804e-06, "loss": 0.2341, "step": 13464 }, { "epoch": 0.77, "grad_norm": 0.3077642913701503, "learning_rate": 2.5696310096378875e-06, "loss": 0.2794, "step": 13465 }, { "epoch": 0.77, "grad_norm": 1.1827315444704343, "learning_rate": 2.568385721703329e-06, "loss": 0.7581, "step": 13466 }, { "epoch": 0.77, "grad_norm": 1.4823498393056806, "learning_rate": 2.5671406911252506e-06, "loss": 0.2698, "step": 13467 }, { "epoch": 0.77, "grad_norm": 0.3486648926153057, "learning_rate": 2.5658959179467734e-06, "loss": 0.2548, "step": 13468 }, { "epoch": 0.77, "grad_norm": 0.3394923711719963, "learning_rate": 2.5646514022110013e-06, "loss": 0.2921, "step": 13469 }, { "epoch": 0.77, "grad_norm": 0.4458713311099046, "learning_rate": 2.563407143961032e-06, "loss": 0.2497, "step": 13470 }, { "epoch": 0.77, "grad_norm": 0.23033084641571222, "learning_rate": 2.5621631432399496e-06, "loss": 0.1549, "step": 13471 }, { "epoch": 0.77, "grad_norm": 0.470600873287133, "learning_rate": 2.5609194000908434e-06, "loss": 0.3824, "step": 13472 }, { "epoch": 0.77, "grad_norm": 0.5083980754662374, "learning_rate": 2.5596759145567763e-06, "loss": 0.3503, "step": 13473 }, { "epoch": 0.77, "grad_norm": 0.3457918738633052, "learning_rate": 2.5584326866808084e-06, "loss": 0.1994, "step": 13474 }, { "epoch": 0.77, "grad_norm": 0.8561374677835701, "learning_rate": 2.557189716506e-06, "loss": 0.3989, "step": 13475 }, { "epoch": 0.77, "grad_norm": 0.3040416433764094, "learning_rate": 2.555947004075392e-06, "loss": 0.2597, "step": 13476 }, { "epoch": 0.77, "grad_norm": 0.22530902436018296, "learning_rate": 2.5547045494320187e-06, "loss": 0.1685, "step": 13477 }, { "epoch": 0.77, "grad_norm": 1.4681940576762507, "learning_rate": 2.5534623526189075e-06, "loss": 0.7422, "step": 13478 }, { "epoch": 0.77, "grad_norm": 0.8041062456322301, "learning_rate": 2.5522204136790707e-06, "loss": 0.4247, "step": 13479 }, { "epoch": 0.77, "grad_norm": 0.3134178270242085, "learning_rate": 2.5509787326555245e-06, "loss": 0.2055, "step": 13480 }, { "epoch": 0.77, "grad_norm": 0.37965284021285, "learning_rate": 2.5497373095912638e-06, "loss": 0.3178, "step": 13481 }, { "epoch": 0.77, "grad_norm": 0.3664967026991713, "learning_rate": 2.5484961445292798e-06, "loss": 0.2107, "step": 13482 }, { "epoch": 0.77, "grad_norm": 0.348010313926068, "learning_rate": 2.5472552375125514e-06, "loss": 0.2591, "step": 13483 }, { "epoch": 0.77, "grad_norm": 0.27596022773032486, "learning_rate": 2.546014588584057e-06, "loss": 0.2083, "step": 13484 }, { "epoch": 0.77, "grad_norm": 0.5443468371920548, "learning_rate": 2.5447741977867556e-06, "loss": 0.3054, "step": 13485 }, { "epoch": 0.77, "grad_norm": 0.32076582873013293, "learning_rate": 2.543534065163604e-06, "loss": 0.2684, "step": 13486 }, { "epoch": 0.77, "grad_norm": 0.7357780117610295, "learning_rate": 2.542294190757544e-06, "loss": 0.3032, "step": 13487 }, { "epoch": 0.77, "grad_norm": 0.33348082391285816, "learning_rate": 2.541054574611518e-06, "loss": 0.2992, "step": 13488 }, { "epoch": 0.78, "grad_norm": 0.24713867181114496, "learning_rate": 2.539815216768452e-06, "loss": 0.2075, "step": 13489 }, { "epoch": 0.78, "grad_norm": 0.2824735073527296, "learning_rate": 2.5385761172712642e-06, "loss": 0.1629, "step": 13490 }, { "epoch": 0.78, "grad_norm": 0.7765884467550676, "learning_rate": 2.537337276162861e-06, "loss": 0.4139, "step": 13491 }, { "epoch": 0.78, "grad_norm": 0.31663661173967594, "learning_rate": 2.5360986934861507e-06, "loss": 0.2695, "step": 13492 }, { "epoch": 0.78, "grad_norm": 0.3654158248056001, "learning_rate": 2.5348603692840214e-06, "loss": 0.3131, "step": 13493 }, { "epoch": 0.78, "grad_norm": 0.5894688790078348, "learning_rate": 2.5336223035993566e-06, "loss": 0.1376, "step": 13494 }, { "epoch": 0.78, "grad_norm": 0.3125708863235182, "learning_rate": 2.53238449647503e-06, "loss": 0.2578, "step": 13495 }, { "epoch": 0.78, "grad_norm": 1.1793852459395195, "learning_rate": 2.5311469479539043e-06, "loss": 0.7588, "step": 13496 }, { "epoch": 0.78, "grad_norm": 0.2854328646753722, "learning_rate": 2.5299096580788416e-06, "loss": 0.2246, "step": 13497 }, { "epoch": 0.78, "grad_norm": 0.411565998534182, "learning_rate": 2.5286726268926864e-06, "loss": 0.299, "step": 13498 }, { "epoch": 0.78, "grad_norm": 0.47853799166222705, "learning_rate": 2.5274358544382773e-06, "loss": 0.3148, "step": 13499 }, { "epoch": 0.78, "grad_norm": 0.32126409083539537, "learning_rate": 2.5261993407584394e-06, "loss": 0.2218, "step": 13500 }, { "epoch": 0.78, "grad_norm": 0.35565643253633095, "learning_rate": 2.5249630858960006e-06, "loss": 0.2473, "step": 13501 }, { "epoch": 0.78, "grad_norm": 1.0739827262463106, "learning_rate": 2.5237270898937684e-06, "loss": 0.7007, "step": 13502 }, { "epoch": 0.78, "grad_norm": 0.4259404404044619, "learning_rate": 2.522491352794545e-06, "loss": 0.2166, "step": 13503 }, { "epoch": 0.78, "grad_norm": 0.32529796843916775, "learning_rate": 2.521255874641122e-06, "loss": 0.2448, "step": 13504 }, { "epoch": 0.78, "grad_norm": 0.24437696676073684, "learning_rate": 2.5200206554762897e-06, "loss": 0.2097, "step": 13505 }, { "epoch": 0.78, "grad_norm": 1.2143331452246588, "learning_rate": 2.5187856953428237e-06, "loss": 0.4804, "step": 13506 }, { "epoch": 0.78, "grad_norm": 0.31524982654847683, "learning_rate": 2.5175509942834843e-06, "loss": 0.2062, "step": 13507 }, { "epoch": 0.78, "grad_norm": 0.3559843324635449, "learning_rate": 2.516316552341028e-06, "loss": 0.3224, "step": 13508 }, { "epoch": 0.78, "grad_norm": 0.5991992837613603, "learning_rate": 2.515082369558212e-06, "loss": 0.3749, "step": 13509 }, { "epoch": 0.78, "grad_norm": 0.24645117400291955, "learning_rate": 2.513848445977771e-06, "loss": 0.182, "step": 13510 }, { "epoch": 0.78, "grad_norm": 0.48245437149625137, "learning_rate": 2.5126147816424364e-06, "loss": 0.2498, "step": 13511 }, { "epoch": 0.78, "grad_norm": 0.3648211240975149, "learning_rate": 2.5113813765949267e-06, "loss": 0.3079, "step": 13512 }, { "epoch": 0.78, "grad_norm": 0.28731932512581504, "learning_rate": 2.5101482308779625e-06, "loss": 0.1939, "step": 13513 }, { "epoch": 0.78, "grad_norm": 0.9704475832943743, "learning_rate": 2.508915344534242e-06, "loss": 0.4582, "step": 13514 }, { "epoch": 0.78, "grad_norm": 0.6266296632397085, "learning_rate": 2.50768271760646e-06, "loss": 0.3931, "step": 13515 }, { "epoch": 0.78, "grad_norm": 0.27616660652349634, "learning_rate": 2.5064503501373017e-06, "loss": 0.2008, "step": 13516 }, { "epoch": 0.78, "grad_norm": 0.2513559465711763, "learning_rate": 2.505218242169448e-06, "loss": 0.2077, "step": 13517 }, { "epoch": 0.78, "grad_norm": 1.3205543433577367, "learning_rate": 2.5039863937455645e-06, "loss": 0.484, "step": 13518 }, { "epoch": 0.78, "grad_norm": 0.6244415397855417, "learning_rate": 2.5027548049083094e-06, "loss": 0.3689, "step": 13519 }, { "epoch": 0.78, "grad_norm": 0.3004803209648444, "learning_rate": 2.5015234757003326e-06, "loss": 0.2546, "step": 13520 }, { "epoch": 0.78, "grad_norm": 0.7853617262016618, "learning_rate": 2.500292406164273e-06, "loss": 0.3941, "step": 13521 }, { "epoch": 0.78, "grad_norm": 0.3676844253504336, "learning_rate": 2.4990615963427688e-06, "loss": 0.2752, "step": 13522 }, { "epoch": 0.78, "grad_norm": 0.22875365405484152, "learning_rate": 2.4978310462784373e-06, "loss": 0.1272, "step": 13523 }, { "epoch": 0.78, "grad_norm": 0.35103757648372264, "learning_rate": 2.496600756013895e-06, "loss": 0.3124, "step": 13524 }, { "epoch": 0.78, "grad_norm": 0.3743025090473022, "learning_rate": 2.4953707255917426e-06, "loss": 0.2631, "step": 13525 }, { "epoch": 0.78, "grad_norm": 0.4822393785473432, "learning_rate": 2.4941409550545824e-06, "loss": 0.2804, "step": 13526 }, { "epoch": 0.78, "grad_norm": 1.1309185431941364, "learning_rate": 2.492911444444999e-06, "loss": 0.5591, "step": 13527 }, { "epoch": 0.78, "grad_norm": 0.25582945570068455, "learning_rate": 2.491682193805568e-06, "loss": 0.2465, "step": 13528 }, { "epoch": 0.78, "grad_norm": 0.24118639331888805, "learning_rate": 2.4904532031788577e-06, "loss": 0.1293, "step": 13529 }, { "epoch": 0.78, "grad_norm": 1.2057509069942007, "learning_rate": 2.489224472607432e-06, "loss": 0.5991, "step": 13530 }, { "epoch": 0.78, "grad_norm": 0.4075751183842601, "learning_rate": 2.487996002133841e-06, "loss": 0.2751, "step": 13531 }, { "epoch": 0.78, "grad_norm": 0.3821138342755005, "learning_rate": 2.486767791800625e-06, "loss": 0.2959, "step": 13532 }, { "epoch": 0.78, "grad_norm": 0.4553999305831225, "learning_rate": 2.4855398416503173e-06, "loss": 0.3063, "step": 13533 }, { "epoch": 0.78, "grad_norm": 0.39885559025142264, "learning_rate": 2.4843121517254386e-06, "loss": 0.2575, "step": 13534 }, { "epoch": 0.78, "grad_norm": 0.2631468133379456, "learning_rate": 2.4830847220685096e-06, "loss": 0.2009, "step": 13535 }, { "epoch": 0.78, "grad_norm": 0.33484657861419903, "learning_rate": 2.4818575527220347e-06, "loss": 0.2423, "step": 13536 }, { "epoch": 0.78, "grad_norm": 0.5743804627272125, "learning_rate": 2.4806306437285075e-06, "loss": 0.3096, "step": 13537 }, { "epoch": 0.78, "grad_norm": 0.36102512843751833, "learning_rate": 2.479403995130416e-06, "loss": 0.2962, "step": 13538 }, { "epoch": 0.78, "grad_norm": 0.4681753138150729, "learning_rate": 2.4781776069702446e-06, "loss": 0.2943, "step": 13539 }, { "epoch": 0.78, "grad_norm": 0.40481623216969953, "learning_rate": 2.4769514792904603e-06, "loss": 0.2884, "step": 13540 }, { "epoch": 0.78, "grad_norm": 0.2316043899089237, "learning_rate": 2.4757256121335182e-06, "loss": 0.195, "step": 13541 }, { "epoch": 0.78, "grad_norm": 0.8906998454080058, "learning_rate": 2.4745000055418767e-06, "loss": 0.2673, "step": 13542 }, { "epoch": 0.78, "grad_norm": 0.3756343888756985, "learning_rate": 2.4732746595579772e-06, "loss": 0.2692, "step": 13543 }, { "epoch": 0.78, "grad_norm": 0.289010965803477, "learning_rate": 2.4720495742242522e-06, "loss": 0.2824, "step": 13544 }, { "epoch": 0.78, "grad_norm": 1.2453049542327923, "learning_rate": 2.4708247495831263e-06, "loss": 0.7404, "step": 13545 }, { "epoch": 0.78, "grad_norm": 0.36664623886660264, "learning_rate": 2.4696001856770137e-06, "loss": 0.1901, "step": 13546 }, { "epoch": 0.78, "grad_norm": 0.32350893720100893, "learning_rate": 2.468375882548325e-06, "loss": 0.2296, "step": 13547 }, { "epoch": 0.78, "grad_norm": 0.28448727451578076, "learning_rate": 2.4671518402394554e-06, "loss": 0.2343, "step": 13548 }, { "epoch": 0.78, "grad_norm": 0.38941415911483, "learning_rate": 2.4659280587927935e-06, "loss": 0.2319, "step": 13549 }, { "epoch": 0.78, "grad_norm": 0.540433968384648, "learning_rate": 2.464704538250717e-06, "loss": 0.2962, "step": 13550 }, { "epoch": 0.78, "grad_norm": 1.0027508666664908, "learning_rate": 2.463481278655601e-06, "loss": 0.6688, "step": 13551 }, { "epoch": 0.78, "grad_norm": 0.2635421406849195, "learning_rate": 2.4622582800498042e-06, "loss": 0.2202, "step": 13552 }, { "epoch": 0.78, "grad_norm": 0.3419175327603542, "learning_rate": 2.4610355424756782e-06, "loss": 0.2265, "step": 13553 }, { "epoch": 0.78, "grad_norm": 0.43706452382046834, "learning_rate": 2.4598130659755647e-06, "loss": 0.2644, "step": 13554 }, { "epoch": 0.78, "grad_norm": 0.47010643053265033, "learning_rate": 2.4585908505918034e-06, "loss": 0.1736, "step": 13555 }, { "epoch": 0.78, "grad_norm": 0.2701575469928234, "learning_rate": 2.4573688963667176e-06, "loss": 0.2637, "step": 13556 }, { "epoch": 0.78, "grad_norm": 1.1200006981818171, "learning_rate": 2.4561472033426213e-06, "loss": 0.8067, "step": 13557 }, { "epoch": 0.78, "grad_norm": 0.8991589933463543, "learning_rate": 2.4549257715618234e-06, "loss": 0.3886, "step": 13558 }, { "epoch": 0.78, "grad_norm": 0.2317283658849086, "learning_rate": 2.4537046010666187e-06, "loss": 0.1782, "step": 13559 }, { "epoch": 0.78, "grad_norm": 0.3356415177168732, "learning_rate": 2.452483691899302e-06, "loss": 0.2841, "step": 13560 }, { "epoch": 0.78, "grad_norm": 0.4554604862081215, "learning_rate": 2.45126304410215e-06, "loss": 0.2736, "step": 13561 }, { "epoch": 0.78, "grad_norm": 0.33245473228630973, "learning_rate": 2.450042657717435e-06, "loss": 0.2231, "step": 13562 }, { "epoch": 0.78, "grad_norm": 1.1404646769499847, "learning_rate": 2.4488225327874147e-06, "loss": 0.657, "step": 13563 }, { "epoch": 0.78, "grad_norm": 0.33060524305523453, "learning_rate": 2.4476026693543485e-06, "loss": 0.2643, "step": 13564 }, { "epoch": 0.78, "grad_norm": 0.4029416650173851, "learning_rate": 2.4463830674604773e-06, "loss": 0.2251, "step": 13565 }, { "epoch": 0.78, "grad_norm": 0.532161244083854, "learning_rate": 2.4451637271480357e-06, "loss": 0.3595, "step": 13566 }, { "epoch": 0.78, "grad_norm": 0.22270028050530902, "learning_rate": 2.4439446484592466e-06, "loss": 0.1721, "step": 13567 }, { "epoch": 0.78, "grad_norm": 0.5652419091857782, "learning_rate": 2.442725831436331e-06, "loss": 0.2662, "step": 13568 }, { "epoch": 0.78, "grad_norm": 0.8543186545019308, "learning_rate": 2.4415072761214963e-06, "loss": 0.5066, "step": 13569 }, { "epoch": 0.78, "grad_norm": 0.5957280739948273, "learning_rate": 2.4402889825569396e-06, "loss": 0.3107, "step": 13570 }, { "epoch": 0.78, "grad_norm": 0.4228437651054475, "learning_rate": 2.4390709507848497e-06, "loss": 0.2888, "step": 13571 }, { "epoch": 0.78, "grad_norm": 0.3197597496407284, "learning_rate": 2.4378531808474048e-06, "loss": 0.2583, "step": 13572 }, { "epoch": 0.78, "grad_norm": 0.2274135399939389, "learning_rate": 2.4366356727867847e-06, "loss": 0.156, "step": 13573 }, { "epoch": 0.78, "grad_norm": 0.45599695397853623, "learning_rate": 2.435418426645144e-06, "loss": 0.2762, "step": 13574 }, { "epoch": 0.78, "grad_norm": 0.3557905396710193, "learning_rate": 2.4342014424646343e-06, "loss": 0.2661, "step": 13575 }, { "epoch": 0.78, "grad_norm": 0.7923026226601533, "learning_rate": 2.4329847202874058e-06, "loss": 0.3719, "step": 13576 }, { "epoch": 0.78, "grad_norm": 0.7790919251786718, "learning_rate": 2.4317682601555913e-06, "loss": 0.2882, "step": 13577 }, { "epoch": 0.78, "grad_norm": 0.9627289488428679, "learning_rate": 2.4305520621113175e-06, "loss": 0.2932, "step": 13578 }, { "epoch": 0.78, "grad_norm": 0.2430678228878389, "learning_rate": 2.4293361261966965e-06, "loss": 0.2217, "step": 13579 }, { "epoch": 0.78, "grad_norm": 0.24952073494447208, "learning_rate": 2.4281204524538425e-06, "loss": 0.2056, "step": 13580 }, { "epoch": 0.78, "grad_norm": 1.1887519774137303, "learning_rate": 2.426905040924853e-06, "loss": 0.7538, "step": 13581 }, { "epoch": 0.78, "grad_norm": 0.6848642705531716, "learning_rate": 2.4256898916518145e-06, "loss": 0.2607, "step": 13582 }, { "epoch": 0.78, "grad_norm": 0.34377998627378237, "learning_rate": 2.4244750046768105e-06, "loss": 0.2902, "step": 13583 }, { "epoch": 0.78, "grad_norm": 0.35628235811761977, "learning_rate": 2.4232603800419087e-06, "loss": 0.3136, "step": 13584 }, { "epoch": 0.78, "grad_norm": 0.1818921796340036, "learning_rate": 2.4220460177891757e-06, "loss": 0.0816, "step": 13585 }, { "epoch": 0.78, "grad_norm": 0.3849502874315825, "learning_rate": 2.4208319179606643e-06, "loss": 0.3033, "step": 13586 }, { "epoch": 0.78, "grad_norm": 0.47780436818363103, "learning_rate": 2.419618080598417e-06, "loss": 0.3461, "step": 13587 }, { "epoch": 0.78, "grad_norm": 0.4695138835770297, "learning_rate": 2.418404505744467e-06, "loss": 0.222, "step": 13588 }, { "epoch": 0.78, "grad_norm": 0.5030258966165988, "learning_rate": 2.4171911934408464e-06, "loss": 0.2913, "step": 13589 }, { "epoch": 0.78, "grad_norm": 0.9470056441285403, "learning_rate": 2.4159781437295684e-06, "loss": 0.4908, "step": 13590 }, { "epoch": 0.78, "grad_norm": 0.2590739492221378, "learning_rate": 2.414765356652641e-06, "loss": 0.1915, "step": 13591 }, { "epoch": 0.78, "grad_norm": 0.2734733858017858, "learning_rate": 2.4135528322520597e-06, "loss": 0.2396, "step": 13592 }, { "epoch": 0.78, "grad_norm": 0.47194836624512676, "learning_rate": 2.4123405705698213e-06, "loss": 0.2789, "step": 13593 }, { "epoch": 0.78, "grad_norm": 0.7394128282774476, "learning_rate": 2.4111285716479015e-06, "loss": 0.3719, "step": 13594 }, { "epoch": 0.78, "grad_norm": 0.2604512556637114, "learning_rate": 2.4099168355282743e-06, "loss": 0.1993, "step": 13595 }, { "epoch": 0.78, "grad_norm": 0.49077072597667926, "learning_rate": 2.4087053622529e-06, "loss": 0.351, "step": 13596 }, { "epoch": 0.78, "grad_norm": 1.1799394846564877, "learning_rate": 2.4074941518637295e-06, "loss": 0.5699, "step": 13597 }, { "epoch": 0.78, "grad_norm": 0.3030824419466015, "learning_rate": 2.406283204402714e-06, "loss": 0.2124, "step": 13598 }, { "epoch": 0.78, "grad_norm": 0.46405446573905923, "learning_rate": 2.405072519911783e-06, "loss": 0.31, "step": 13599 }, { "epoch": 0.78, "grad_norm": 0.32419622170740764, "learning_rate": 2.4038620984328655e-06, "loss": 0.2435, "step": 13600 }, { "epoch": 0.78, "grad_norm": 0.21380776837026694, "learning_rate": 2.4026519400078728e-06, "loss": 0.1533, "step": 13601 }, { "epoch": 0.78, "grad_norm": 1.3300209163280692, "learning_rate": 2.401442044678721e-06, "loss": 0.5964, "step": 13602 }, { "epoch": 0.78, "grad_norm": 0.35887510821296326, "learning_rate": 2.4002324124873033e-06, "loss": 0.2981, "step": 13603 }, { "epoch": 0.78, "grad_norm": 0.2783468021005403, "learning_rate": 2.3990230434755112e-06, "loss": 0.1819, "step": 13604 }, { "epoch": 0.78, "grad_norm": 0.7379956763914063, "learning_rate": 2.3978139376852206e-06, "loss": 0.4326, "step": 13605 }, { "epoch": 0.78, "grad_norm": 0.3247249377272244, "learning_rate": 2.3966050951583096e-06, "loss": 0.1835, "step": 13606 }, { "epoch": 0.78, "grad_norm": 0.3669611320334294, "learning_rate": 2.39539651593664e-06, "loss": 0.2728, "step": 13607 }, { "epoch": 0.78, "grad_norm": 0.35511621181556124, "learning_rate": 2.3941882000620586e-06, "loss": 0.2297, "step": 13608 }, { "epoch": 0.78, "grad_norm": 1.2103759075595983, "learning_rate": 2.3929801475764113e-06, "loss": 0.498, "step": 13609 }, { "epoch": 0.78, "grad_norm": 0.33080955912933513, "learning_rate": 2.391772358521536e-06, "loss": 0.2461, "step": 13610 }, { "epoch": 0.78, "grad_norm": 0.37747048144770934, "learning_rate": 2.3905648329392574e-06, "loss": 0.2751, "step": 13611 }, { "epoch": 0.78, "grad_norm": 0.8894374852257119, "learning_rate": 2.389357570871391e-06, "loss": 0.4819, "step": 13612 }, { "epoch": 0.78, "grad_norm": 0.24421514760385282, "learning_rate": 2.3881505723597422e-06, "loss": 0.202, "step": 13613 }, { "epoch": 0.78, "grad_norm": 0.40684410356900697, "learning_rate": 2.386943837446114e-06, "loss": 0.1062, "step": 13614 }, { "epoch": 0.78, "grad_norm": 0.3562939479119407, "learning_rate": 2.385737366172294e-06, "loss": 0.3223, "step": 13615 }, { "epoch": 0.78, "grad_norm": 0.34011491819599793, "learning_rate": 2.3845311585800612e-06, "loss": 0.2739, "step": 13616 }, { "epoch": 0.78, "grad_norm": 0.7845879219744151, "learning_rate": 2.3833252147111853e-06, "loss": 0.2916, "step": 13617 }, { "epoch": 0.78, "grad_norm": 0.343820832697006, "learning_rate": 2.382119534607431e-06, "loss": 0.2475, "step": 13618 }, { "epoch": 0.78, "grad_norm": 0.25729081529380954, "learning_rate": 2.38091411831055e-06, "loss": 0.2516, "step": 13619 }, { "epoch": 0.78, "grad_norm": 0.5159852122593639, "learning_rate": 2.379708965862285e-06, "loss": 0.2322, "step": 13620 }, { "epoch": 0.78, "grad_norm": 0.8836787867400838, "learning_rate": 2.3785040773043686e-06, "loss": 0.3155, "step": 13621 }, { "epoch": 0.78, "grad_norm": 0.4244576302501707, "learning_rate": 2.3772994526785308e-06, "loss": 0.2892, "step": 13622 }, { "epoch": 0.78, "grad_norm": 0.29146153581979123, "learning_rate": 2.376095092026486e-06, "loss": 0.289, "step": 13623 }, { "epoch": 0.78, "grad_norm": 0.6333592123809318, "learning_rate": 2.37489099538994e-06, "loss": 0.2585, "step": 13624 }, { "epoch": 0.78, "grad_norm": 0.3367141969825039, "learning_rate": 2.3736871628105907e-06, "loss": 0.2642, "step": 13625 }, { "epoch": 0.78, "grad_norm": 0.2573235517922094, "learning_rate": 2.372483594330124e-06, "loss": 0.1519, "step": 13626 }, { "epoch": 0.78, "grad_norm": 0.3441524091740002, "learning_rate": 2.3712802899902256e-06, "loss": 0.258, "step": 13627 }, { "epoch": 0.78, "grad_norm": 0.31942754964550046, "learning_rate": 2.3700772498325617e-06, "loss": 0.2458, "step": 13628 }, { "epoch": 0.78, "grad_norm": 0.7120882141052624, "learning_rate": 2.3688744738987955e-06, "loss": 0.3966, "step": 13629 }, { "epoch": 0.78, "grad_norm": 1.3333702259355196, "learning_rate": 2.3676719622305754e-06, "loss": 0.2666, "step": 13630 }, { "epoch": 0.78, "grad_norm": 0.2738749937750857, "learning_rate": 2.3664697148695494e-06, "loss": 0.2535, "step": 13631 }, { "epoch": 0.78, "grad_norm": 0.24507540728163543, "learning_rate": 2.365267731857349e-06, "loss": 0.1795, "step": 13632 }, { "epoch": 0.78, "grad_norm": 0.7427338888923851, "learning_rate": 2.3640660132356e-06, "loss": 0.3918, "step": 13633 }, { "epoch": 0.78, "grad_norm": 0.3615965217499706, "learning_rate": 2.362864559045912e-06, "loss": 0.2184, "step": 13634 }, { "epoch": 0.78, "grad_norm": 0.34559986606325616, "learning_rate": 2.3616633693298996e-06, "loss": 0.2857, "step": 13635 }, { "epoch": 0.78, "grad_norm": 1.076346574627362, "learning_rate": 2.360462444129156e-06, "loss": 0.6005, "step": 13636 }, { "epoch": 0.78, "grad_norm": 0.2748717407836768, "learning_rate": 2.3592617834852694e-06, "loss": 0.1638, "step": 13637 }, { "epoch": 0.78, "grad_norm": 0.4605339516609135, "learning_rate": 2.358061387439818e-06, "loss": 0.3061, "step": 13638 }, { "epoch": 0.78, "grad_norm": 0.36174894634119437, "learning_rate": 2.356861256034371e-06, "loss": 0.3071, "step": 13639 }, { "epoch": 0.78, "grad_norm": 0.40535929812470445, "learning_rate": 2.355661389310492e-06, "loss": 0.2054, "step": 13640 }, { "epoch": 0.78, "grad_norm": 0.5112923460526705, "learning_rate": 2.354461787309733e-06, "loss": 0.3737, "step": 13641 }, { "epoch": 0.78, "grad_norm": 0.5087292017954329, "learning_rate": 2.353262450073628e-06, "loss": 0.3251, "step": 13642 }, { "epoch": 0.78, "grad_norm": 0.29548431419215865, "learning_rate": 2.3520633776437187e-06, "loss": 0.199, "step": 13643 }, { "epoch": 0.78, "grad_norm": 0.26742981963554274, "learning_rate": 2.3508645700615253e-06, "loss": 0.1937, "step": 13644 }, { "epoch": 0.78, "grad_norm": 0.7458493819978979, "learning_rate": 2.3496660273685633e-06, "loss": 0.3586, "step": 13645 }, { "epoch": 0.78, "grad_norm": 0.398854225716011, "learning_rate": 2.348467749606335e-06, "loss": 0.2928, "step": 13646 }, { "epoch": 0.78, "grad_norm": 0.2920827396093392, "learning_rate": 2.347269736816341e-06, "loss": 0.2593, "step": 13647 }, { "epoch": 0.78, "grad_norm": 1.4477857127683085, "learning_rate": 2.3460719890400687e-06, "loss": 0.748, "step": 13648 }, { "epoch": 0.78, "grad_norm": 0.45883127272812063, "learning_rate": 2.3448745063189937e-06, "loss": 0.2472, "step": 13649 }, { "epoch": 0.78, "grad_norm": 0.2593031866666469, "learning_rate": 2.3436772886945847e-06, "loss": 0.1717, "step": 13650 }, { "epoch": 0.78, "grad_norm": 0.3450260325923007, "learning_rate": 2.3424803362083005e-06, "loss": 0.3068, "step": 13651 }, { "epoch": 0.78, "grad_norm": 0.4138606948993418, "learning_rate": 2.3412836489015945e-06, "loss": 0.2737, "step": 13652 }, { "epoch": 0.78, "grad_norm": 0.46342922046762425, "learning_rate": 2.340087226815907e-06, "loss": 0.2678, "step": 13653 }, { "epoch": 0.78, "grad_norm": 0.5116239699745199, "learning_rate": 2.338891069992669e-06, "loss": 0.3142, "step": 13654 }, { "epoch": 0.78, "grad_norm": 0.3018463186424405, "learning_rate": 2.3376951784733014e-06, "loss": 0.2575, "step": 13655 }, { "epoch": 0.78, "grad_norm": 0.5042604633777217, "learning_rate": 2.336499552299223e-06, "loss": 0.3364, "step": 13656 }, { "epoch": 0.78, "grad_norm": 0.3450572288678373, "learning_rate": 2.3353041915118357e-06, "loss": 0.1711, "step": 13657 }, { "epoch": 0.78, "grad_norm": 0.5783242085477613, "learning_rate": 2.3341090961525347e-06, "loss": 0.3407, "step": 13658 }, { "epoch": 0.78, "grad_norm": 0.26315711604683223, "learning_rate": 2.3329142662627026e-06, "loss": 0.2855, "step": 13659 }, { "epoch": 0.78, "grad_norm": 1.3037750222352267, "learning_rate": 2.3317197018837233e-06, "loss": 0.3251, "step": 13660 }, { "epoch": 0.78, "grad_norm": 0.6003607719198905, "learning_rate": 2.330525403056961e-06, "loss": 0.3302, "step": 13661 }, { "epoch": 0.78, "grad_norm": 0.397572728143814, "learning_rate": 2.329331369823774e-06, "loss": 0.3095, "step": 13662 }, { "epoch": 0.79, "grad_norm": 0.23107023596230547, "learning_rate": 2.3281376022255107e-06, "loss": 0.1822, "step": 13663 }, { "epoch": 0.79, "grad_norm": 0.5463080263376949, "learning_rate": 2.326944100303511e-06, "loss": 0.323, "step": 13664 }, { "epoch": 0.79, "grad_norm": 0.389194949108078, "learning_rate": 2.32575086409911e-06, "loss": 0.3304, "step": 13665 }, { "epoch": 0.79, "grad_norm": 0.5020850413515092, "learning_rate": 2.3245578936536263e-06, "loss": 0.2403, "step": 13666 }, { "epoch": 0.79, "grad_norm": 0.3099713566620966, "learning_rate": 2.323365189008372e-06, "loss": 0.2437, "step": 13667 }, { "epoch": 0.79, "grad_norm": 0.4927324531266989, "learning_rate": 2.3221727502046487e-06, "loss": 0.3289, "step": 13668 }, { "epoch": 0.79, "grad_norm": 0.5103081683200503, "learning_rate": 2.3209805772837557e-06, "loss": 0.2563, "step": 13669 }, { "epoch": 0.79, "grad_norm": 0.23731099735038388, "learning_rate": 2.3197886702869756e-06, "loss": 0.1511, "step": 13670 }, { "epoch": 0.79, "grad_norm": 0.2998413324216504, "learning_rate": 2.3185970292555827e-06, "loss": 0.294, "step": 13671 }, { "epoch": 0.79, "grad_norm": 1.1268300194743812, "learning_rate": 2.317405654230842e-06, "loss": 0.5717, "step": 13672 }, { "epoch": 0.79, "grad_norm": 0.42392272265792735, "learning_rate": 2.3162145452540164e-06, "loss": 0.2239, "step": 13673 }, { "epoch": 0.79, "grad_norm": 0.5320072799016502, "learning_rate": 2.3150237023663503e-06, "loss": 0.3257, "step": 13674 }, { "epoch": 0.79, "grad_norm": 0.3519616895462257, "learning_rate": 2.3138331256090853e-06, "loss": 0.2953, "step": 13675 }, { "epoch": 0.79, "grad_norm": 0.2006587422306295, "learning_rate": 2.312642815023444e-06, "loss": 0.1078, "step": 13676 }, { "epoch": 0.79, "grad_norm": 0.3494649945683863, "learning_rate": 2.311452770650653e-06, "loss": 0.2756, "step": 13677 }, { "epoch": 0.79, "grad_norm": 0.27624763479241404, "learning_rate": 2.3102629925319233e-06, "loss": 0.2441, "step": 13678 }, { "epoch": 0.79, "grad_norm": 0.5364171797371735, "learning_rate": 2.3090734807084545e-06, "loss": 0.2007, "step": 13679 }, { "epoch": 0.79, "grad_norm": 0.3835854642103055, "learning_rate": 2.307884235221438e-06, "loss": 0.2958, "step": 13680 }, { "epoch": 0.79, "grad_norm": 0.5006981875240444, "learning_rate": 2.3066952561120616e-06, "loss": 0.2308, "step": 13681 }, { "epoch": 0.79, "grad_norm": 0.3935079231579874, "learning_rate": 2.3055065434214983e-06, "loss": 0.2848, "step": 13682 }, { "epoch": 0.79, "grad_norm": 0.2531872170122483, "learning_rate": 2.3043180971909128e-06, "loss": 0.2199, "step": 13683 }, { "epoch": 0.79, "grad_norm": 0.42624480063631087, "learning_rate": 2.3031299174614572e-06, "loss": 0.2557, "step": 13684 }, { "epoch": 0.79, "grad_norm": 0.6061150551108329, "learning_rate": 2.3019420042742856e-06, "loss": 0.3503, "step": 13685 }, { "epoch": 0.79, "grad_norm": 0.24096818349359286, "learning_rate": 2.3007543576705303e-06, "loss": 0.2254, "step": 13686 }, { "epoch": 0.79, "grad_norm": 1.5103883995679313, "learning_rate": 2.299566977691321e-06, "loss": 0.508, "step": 13687 }, { "epoch": 0.79, "grad_norm": 0.5508991805415692, "learning_rate": 2.2983798643777755e-06, "loss": 0.3538, "step": 13688 }, { "epoch": 0.79, "grad_norm": 0.3684502029643531, "learning_rate": 2.297193017771002e-06, "loss": 0.2364, "step": 13689 }, { "epoch": 0.79, "grad_norm": 0.28819135601703405, "learning_rate": 2.296006437912106e-06, "loss": 0.2523, "step": 13690 }, { "epoch": 0.79, "grad_norm": 0.37804061739241884, "learning_rate": 2.2948201248421754e-06, "loss": 0.2241, "step": 13691 }, { "epoch": 0.79, "grad_norm": 0.40514152634974704, "learning_rate": 2.2936340786022926e-06, "loss": 0.2199, "step": 13692 }, { "epoch": 0.79, "grad_norm": 1.195907334850774, "learning_rate": 2.2924482992335272e-06, "loss": 0.4781, "step": 13693 }, { "epoch": 0.79, "grad_norm": 0.3126316257768985, "learning_rate": 2.291262786776949e-06, "loss": 0.2628, "step": 13694 }, { "epoch": 0.79, "grad_norm": 0.3317162176271463, "learning_rate": 2.2900775412736086e-06, "loss": 0.2689, "step": 13695 }, { "epoch": 0.79, "grad_norm": 0.502522546503961, "learning_rate": 2.288892562764552e-06, "loss": 0.2287, "step": 13696 }, { "epoch": 0.79, "grad_norm": 0.4025773704640404, "learning_rate": 2.28770785129081e-06, "loss": 0.2352, "step": 13697 }, { "epoch": 0.79, "grad_norm": 0.3119324736608787, "learning_rate": 2.286523406893418e-06, "loss": 0.2736, "step": 13698 }, { "epoch": 0.79, "grad_norm": 0.46774707777608343, "learning_rate": 2.285339229613388e-06, "loss": 0.2402, "step": 13699 }, { "epoch": 0.79, "grad_norm": 0.7445728385464858, "learning_rate": 2.2841553194917288e-06, "loss": 0.5074, "step": 13700 }, { "epoch": 0.79, "grad_norm": 0.3503867920780646, "learning_rate": 2.2829716765694397e-06, "loss": 0.2881, "step": 13701 }, { "epoch": 0.79, "grad_norm": 0.31801113224197763, "learning_rate": 2.2817883008875065e-06, "loss": 0.2597, "step": 13702 }, { "epoch": 0.79, "grad_norm": 0.2962235175090054, "learning_rate": 2.2806051924869144e-06, "loss": 0.1875, "step": 13703 }, { "epoch": 0.79, "grad_norm": 0.3308268216739318, "learning_rate": 2.2794223514086333e-06, "loss": 0.25, "step": 13704 }, { "epoch": 0.79, "grad_norm": 1.2976921395533858, "learning_rate": 2.2782397776936237e-06, "loss": 0.2965, "step": 13705 }, { "epoch": 0.79, "grad_norm": 0.3459148821492143, "learning_rate": 2.277057471382836e-06, "loss": 0.3103, "step": 13706 }, { "epoch": 0.79, "grad_norm": 0.3780665332879215, "learning_rate": 2.2758754325172194e-06, "loss": 0.2764, "step": 13707 }, { "epoch": 0.79, "grad_norm": 0.7272877027788468, "learning_rate": 2.274693661137707e-06, "loss": 0.3907, "step": 13708 }, { "epoch": 0.79, "grad_norm": 0.16950972666281172, "learning_rate": 2.273512157285215e-06, "loss": 0.0706, "step": 13709 }, { "epoch": 0.79, "grad_norm": 0.40555071835046586, "learning_rate": 2.272330921000667e-06, "loss": 0.2703, "step": 13710 }, { "epoch": 0.79, "grad_norm": 0.3774955734684031, "learning_rate": 2.271149952324968e-06, "loss": 0.3077, "step": 13711 }, { "epoch": 0.79, "grad_norm": 0.6892753385861591, "learning_rate": 2.2699692512990135e-06, "loss": 0.2721, "step": 13712 }, { "epoch": 0.79, "grad_norm": 0.341879768930826, "learning_rate": 2.268788817963692e-06, "loss": 0.2822, "step": 13713 }, { "epoch": 0.79, "grad_norm": 0.36570922717372284, "learning_rate": 2.2676086523598773e-06, "loss": 0.3289, "step": 13714 }, { "epoch": 0.79, "grad_norm": 0.3003448965263176, "learning_rate": 2.266428754528446e-06, "loss": 0.0964, "step": 13715 }, { "epoch": 0.79, "grad_norm": 0.33720536672084483, "learning_rate": 2.2652491245102537e-06, "loss": 0.2491, "step": 13716 }, { "epoch": 0.79, "grad_norm": 1.161787269663438, "learning_rate": 2.2640697623461517e-06, "loss": 0.5815, "step": 13717 }, { "epoch": 0.79, "grad_norm": 0.29078226584492334, "learning_rate": 2.262890668076979e-06, "loss": 0.2155, "step": 13718 }, { "epoch": 0.79, "grad_norm": 0.36384414626106976, "learning_rate": 2.2617118417435725e-06, "loss": 0.2685, "step": 13719 }, { "epoch": 0.79, "grad_norm": 0.9616109497695068, "learning_rate": 2.260533283386751e-06, "loss": 0.4991, "step": 13720 }, { "epoch": 0.79, "grad_norm": 0.30110611270374804, "learning_rate": 2.25935499304733e-06, "loss": 0.1999, "step": 13721 }, { "epoch": 0.79, "grad_norm": 0.23948729147878228, "learning_rate": 2.2581769707661107e-06, "loss": 0.2069, "step": 13722 }, { "epoch": 0.79, "grad_norm": 1.4181780988657122, "learning_rate": 2.256999216583892e-06, "loss": 0.4937, "step": 13723 }, { "epoch": 0.79, "grad_norm": 0.7846008419436977, "learning_rate": 2.2558217305414564e-06, "loss": 0.4049, "step": 13724 }, { "epoch": 0.79, "grad_norm": 0.2869992147760968, "learning_rate": 2.2546445126795822e-06, "loss": 0.1819, "step": 13725 }, { "epoch": 0.79, "grad_norm": 0.34869844428817903, "learning_rate": 2.2534675630390366e-06, "loss": 0.3234, "step": 13726 }, { "epoch": 0.79, "grad_norm": 0.3008860845713572, "learning_rate": 2.2522908816605716e-06, "loss": 0.1825, "step": 13727 }, { "epoch": 0.79, "grad_norm": 0.28978102239520515, "learning_rate": 2.251114468584944e-06, "loss": 0.1858, "step": 13728 }, { "epoch": 0.79, "grad_norm": 0.8083556054139469, "learning_rate": 2.2499383238528894e-06, "loss": 0.4195, "step": 13729 }, { "epoch": 0.79, "grad_norm": 0.3769533565114214, "learning_rate": 2.2487624475051364e-06, "loss": 0.2837, "step": 13730 }, { "epoch": 0.79, "grad_norm": 0.39109306811626965, "learning_rate": 2.2475868395824043e-06, "loss": 0.2734, "step": 13731 }, { "epoch": 0.79, "grad_norm": 0.7981003418145786, "learning_rate": 2.2464115001254096e-06, "loss": 0.3579, "step": 13732 }, { "epoch": 0.79, "grad_norm": 0.4237085330933973, "learning_rate": 2.245236429174851e-06, "loss": 0.2282, "step": 13733 }, { "epoch": 0.79, "grad_norm": 0.26938040168964505, "learning_rate": 2.244061626771421e-06, "loss": 0.2416, "step": 13734 }, { "epoch": 0.79, "grad_norm": 0.33512199491603817, "learning_rate": 2.2428870929558012e-06, "loss": 0.2038, "step": 13735 }, { "epoch": 0.79, "grad_norm": 0.7622171998110745, "learning_rate": 2.2417128277686694e-06, "loss": 0.4113, "step": 13736 }, { "epoch": 0.79, "grad_norm": 0.34080168091289903, "learning_rate": 2.2405388312506903e-06, "loss": 0.2377, "step": 13737 }, { "epoch": 0.79, "grad_norm": 0.34730323144893105, "learning_rate": 2.239365103442517e-06, "loss": 0.2583, "step": 13738 }, { "epoch": 0.79, "grad_norm": 1.0072696237450594, "learning_rate": 2.238191644384794e-06, "loss": 0.5047, "step": 13739 }, { "epoch": 0.79, "grad_norm": 0.24308109364594496, "learning_rate": 2.237018454118163e-06, "loss": 0.2012, "step": 13740 }, { "epoch": 0.79, "grad_norm": 0.5470089584947886, "learning_rate": 2.2358455326832496e-06, "loss": 0.2059, "step": 13741 }, { "epoch": 0.79, "grad_norm": 0.34034621021093414, "learning_rate": 2.234672880120674e-06, "loss": 0.2917, "step": 13742 }, { "epoch": 0.79, "grad_norm": 0.3159765482745945, "learning_rate": 2.233500496471037e-06, "loss": 0.2669, "step": 13743 }, { "epoch": 0.79, "grad_norm": 1.1613176017519513, "learning_rate": 2.2323283817749463e-06, "loss": 0.7208, "step": 13744 }, { "epoch": 0.79, "grad_norm": 0.338617416056063, "learning_rate": 2.2311565360729903e-06, "loss": 0.2283, "step": 13745 }, { "epoch": 0.79, "grad_norm": 0.3873334398545606, "learning_rate": 2.2299849594057487e-06, "loss": 0.2524, "step": 13746 }, { "epoch": 0.79, "grad_norm": 0.2770440136651813, "learning_rate": 2.2288136518137914e-06, "loss": 0.2116, "step": 13747 }, { "epoch": 0.79, "grad_norm": 0.678257433651019, "learning_rate": 2.227642613337686e-06, "loss": 0.2842, "step": 13748 }, { "epoch": 0.79, "grad_norm": 0.34415897142683244, "learning_rate": 2.2264718440179835e-06, "loss": 0.2728, "step": 13749 }, { "epoch": 0.79, "grad_norm": 0.3417732021973484, "learning_rate": 2.2253013438952253e-06, "loss": 0.2948, "step": 13750 }, { "epoch": 0.79, "grad_norm": 1.7331090075356483, "learning_rate": 2.224131113009945e-06, "loss": 0.1448, "step": 13751 }, { "epoch": 0.79, "grad_norm": 0.35091864102953746, "learning_rate": 2.222961151402674e-06, "loss": 0.2522, "step": 13752 }, { "epoch": 0.79, "grad_norm": 0.44152654129417956, "learning_rate": 2.2217914591139222e-06, "loss": 0.3178, "step": 13753 }, { "epoch": 0.79, "grad_norm": 0.2511167710167237, "learning_rate": 2.2206220361841978e-06, "loss": 0.14, "step": 13754 }, { "epoch": 0.79, "grad_norm": 0.31288446501141626, "learning_rate": 2.2194528826539984e-06, "loss": 0.2369, "step": 13755 }, { "epoch": 0.79, "grad_norm": 1.312231502877169, "learning_rate": 2.218283998563808e-06, "loss": 0.6555, "step": 13756 }, { "epoch": 0.79, "grad_norm": 0.5150153647059441, "learning_rate": 2.2171153839541114e-06, "loss": 0.3475, "step": 13757 }, { "epoch": 0.79, "grad_norm": 0.27675543088755755, "learning_rate": 2.2159470388653737e-06, "loss": 0.2179, "step": 13758 }, { "epoch": 0.79, "grad_norm": 0.8129461695250663, "learning_rate": 2.2147789633380555e-06, "loss": 0.401, "step": 13759 }, { "epoch": 0.79, "grad_norm": 0.3186014787104289, "learning_rate": 2.213611157412605e-06, "loss": 0.1735, "step": 13760 }, { "epoch": 0.79, "grad_norm": 0.2907238321322088, "learning_rate": 2.2124436211294676e-06, "loss": 0.1941, "step": 13761 }, { "epoch": 0.79, "grad_norm": 0.4140887679030627, "learning_rate": 2.2112763545290728e-06, "loss": 0.3082, "step": 13762 }, { "epoch": 0.79, "grad_norm": 1.1476560055281388, "learning_rate": 2.2101093576518416e-06, "loss": 0.3793, "step": 13763 }, { "epoch": 0.79, "grad_norm": 0.3858913981085996, "learning_rate": 2.2089426305381865e-06, "loss": 0.2011, "step": 13764 }, { "epoch": 0.79, "grad_norm": 0.39061485494929044, "learning_rate": 2.2077761732285165e-06, "loss": 0.3134, "step": 13765 }, { "epoch": 0.79, "grad_norm": 0.31571158619901163, "learning_rate": 2.206609985763222e-06, "loss": 0.2483, "step": 13766 }, { "epoch": 0.79, "grad_norm": 0.3321054971659981, "learning_rate": 2.2054440681826896e-06, "loss": 0.1801, "step": 13767 }, { "epoch": 0.79, "grad_norm": 0.34644794016271147, "learning_rate": 2.2042784205272927e-06, "loss": 0.2443, "step": 13768 }, { "epoch": 0.79, "grad_norm": 0.48747689057722515, "learning_rate": 2.203113042837396e-06, "loss": 0.324, "step": 13769 }, { "epoch": 0.79, "grad_norm": 0.3253743126407448, "learning_rate": 2.2019479351533625e-06, "loss": 0.2552, "step": 13770 }, { "epoch": 0.79, "grad_norm": 0.5069718524012903, "learning_rate": 2.2007830975155366e-06, "loss": 0.2568, "step": 13771 }, { "epoch": 0.79, "grad_norm": 1.1996810800114002, "learning_rate": 2.199618529964257e-06, "loss": 0.6718, "step": 13772 }, { "epoch": 0.79, "grad_norm": 0.3826603271787008, "learning_rate": 2.198454232539848e-06, "loss": 0.2895, "step": 13773 }, { "epoch": 0.79, "grad_norm": 0.19718297829960185, "learning_rate": 2.1972902052826384e-06, "loss": 0.1763, "step": 13774 }, { "epoch": 0.79, "grad_norm": 0.6762841450323746, "learning_rate": 2.1961264482329326e-06, "loss": 0.3604, "step": 13775 }, { "epoch": 0.79, "grad_norm": 0.523526720693821, "learning_rate": 2.194962961431032e-06, "loss": 0.2805, "step": 13776 }, { "epoch": 0.79, "grad_norm": 0.348195317352207, "learning_rate": 2.1937997449172287e-06, "loss": 0.2318, "step": 13777 }, { "epoch": 0.79, "grad_norm": 0.49968051927791735, "learning_rate": 2.192636798731804e-06, "loss": 0.3517, "step": 13778 }, { "epoch": 0.79, "grad_norm": 0.3862758209154229, "learning_rate": 2.1914741229150315e-06, "loss": 0.2706, "step": 13779 }, { "epoch": 0.79, "grad_norm": 0.20524174445611812, "learning_rate": 2.1903117175071754e-06, "loss": 0.1168, "step": 13780 }, { "epoch": 0.79, "grad_norm": 0.33607769190284076, "learning_rate": 2.1891495825484856e-06, "loss": 0.2942, "step": 13781 }, { "epoch": 0.79, "grad_norm": 0.681396311351655, "learning_rate": 2.1879877180792117e-06, "loss": 0.3332, "step": 13782 }, { "epoch": 0.79, "grad_norm": 0.3897404025541323, "learning_rate": 2.186826124139587e-06, "loss": 0.2887, "step": 13783 }, { "epoch": 0.79, "grad_norm": 1.199733904627748, "learning_rate": 2.185664800769839e-06, "loss": 0.3483, "step": 13784 }, { "epoch": 0.79, "grad_norm": 0.3624521652254083, "learning_rate": 2.1845037480101793e-06, "loss": 0.2673, "step": 13785 }, { "epoch": 0.79, "grad_norm": 0.23852031573910368, "learning_rate": 2.183342965900821e-06, "loss": 0.2303, "step": 13786 }, { "epoch": 0.79, "grad_norm": 0.6022579071701457, "learning_rate": 2.18218245448196e-06, "loss": 0.2888, "step": 13787 }, { "epoch": 0.79, "grad_norm": 0.29140171805652, "learning_rate": 2.1810222137937855e-06, "loss": 0.1838, "step": 13788 }, { "epoch": 0.79, "grad_norm": 0.3210911846698981, "learning_rate": 2.1798622438764716e-06, "loss": 0.2784, "step": 13789 }, { "epoch": 0.79, "grad_norm": 0.4872387173728299, "learning_rate": 2.1787025447701947e-06, "loss": 0.2583, "step": 13790 }, { "epoch": 0.79, "grad_norm": 0.3987621580560564, "learning_rate": 2.177543116515113e-06, "loss": 0.286, "step": 13791 }, { "epoch": 0.79, "grad_norm": 0.4894773404094698, "learning_rate": 2.176383959151377e-06, "loss": 0.3393, "step": 13792 }, { "epoch": 0.79, "grad_norm": 0.2736323672900733, "learning_rate": 2.175225072719127e-06, "loss": 0.2155, "step": 13793 }, { "epoch": 0.79, "grad_norm": 0.2835026201314597, "learning_rate": 2.174066457258495e-06, "loss": 0.1832, "step": 13794 }, { "epoch": 0.79, "grad_norm": 0.5998237761061286, "learning_rate": 2.172908112809606e-06, "loss": 0.3561, "step": 13795 }, { "epoch": 0.79, "grad_norm": 1.1528927782639422, "learning_rate": 2.1717500394125735e-06, "loss": 0.6232, "step": 13796 }, { "epoch": 0.79, "grad_norm": 0.2608272389082933, "learning_rate": 2.1705922371075005e-06, "loss": 0.2213, "step": 13797 }, { "epoch": 0.79, "grad_norm": 0.5306546366421715, "learning_rate": 2.169434705934479e-06, "loss": 0.308, "step": 13798 }, { "epoch": 0.79, "grad_norm": 0.4938087384739634, "learning_rate": 2.1682774459335987e-06, "loss": 0.2987, "step": 13799 }, { "epoch": 0.79, "grad_norm": 0.1622853869015634, "learning_rate": 2.1671204571449345e-06, "loss": 0.0694, "step": 13800 }, { "epoch": 0.79, "grad_norm": 0.3014907139564795, "learning_rate": 2.165963739608552e-06, "loss": 0.274, "step": 13801 }, { "epoch": 0.79, "grad_norm": 0.4934739195565253, "learning_rate": 2.164807293364506e-06, "loss": 0.3363, "step": 13802 }, { "epoch": 0.79, "grad_norm": 0.4667111156007383, "learning_rate": 2.1636511184528484e-06, "loss": 0.1946, "step": 13803 }, { "epoch": 0.79, "grad_norm": 0.4077960646746423, "learning_rate": 2.162495214913616e-06, "loss": 0.3125, "step": 13804 }, { "epoch": 0.79, "grad_norm": 0.32122695724294886, "learning_rate": 2.1613395827868366e-06, "loss": 0.2702, "step": 13805 }, { "epoch": 0.79, "grad_norm": 0.14965512931372155, "learning_rate": 2.160184222112531e-06, "loss": 0.0691, "step": 13806 }, { "epoch": 0.79, "grad_norm": 0.4639504102647323, "learning_rate": 2.159029132930707e-06, "loss": 0.2847, "step": 13807 }, { "epoch": 0.79, "grad_norm": 1.364104155468768, "learning_rate": 2.1578743152813676e-06, "loss": 0.7156, "step": 13808 }, { "epoch": 0.79, "grad_norm": 0.3139451351224941, "learning_rate": 2.156719769204505e-06, "loss": 0.2972, "step": 13809 }, { "epoch": 0.79, "grad_norm": 0.3411874232698844, "learning_rate": 2.155565494740098e-06, "loss": 0.2292, "step": 13810 }, { "epoch": 0.79, "grad_norm": 1.5518338776407492, "learning_rate": 2.1544114919281223e-06, "loss": 0.616, "step": 13811 }, { "epoch": 0.79, "grad_norm": 0.20569821108771164, "learning_rate": 2.153257760808538e-06, "loss": 0.146, "step": 13812 }, { "epoch": 0.79, "grad_norm": 0.2839430285999692, "learning_rate": 2.152104301421302e-06, "loss": 0.2202, "step": 13813 }, { "epoch": 0.79, "grad_norm": 0.4522044299979987, "learning_rate": 2.150951113806351e-06, "loss": 0.3257, "step": 13814 }, { "epoch": 0.79, "grad_norm": 0.4669588799542236, "learning_rate": 2.1497981980036297e-06, "loss": 0.3026, "step": 13815 }, { "epoch": 0.79, "grad_norm": 0.37379373753426265, "learning_rate": 2.1486455540530593e-06, "loss": 0.2319, "step": 13816 }, { "epoch": 0.79, "grad_norm": 0.3565690261654594, "learning_rate": 2.1474931819945555e-06, "loss": 0.3044, "step": 13817 }, { "epoch": 0.79, "grad_norm": 0.27704374870348236, "learning_rate": 2.1463410818680253e-06, "loss": 0.1727, "step": 13818 }, { "epoch": 0.79, "grad_norm": 0.3302566985170461, "learning_rate": 2.1451892537133624e-06, "loss": 0.2454, "step": 13819 }, { "epoch": 0.79, "grad_norm": 0.41198364612002364, "learning_rate": 2.1440376975704614e-06, "loss": 0.2576, "step": 13820 }, { "epoch": 0.79, "grad_norm": 0.42590625607150334, "learning_rate": 2.142886413479197e-06, "loss": 0.3075, "step": 13821 }, { "epoch": 0.79, "grad_norm": 0.3323355752642059, "learning_rate": 2.1417354014794378e-06, "loss": 0.2635, "step": 13822 }, { "epoch": 0.79, "grad_norm": 1.287156112327123, "learning_rate": 2.1405846616110416e-06, "loss": 0.3389, "step": 13823 }, { "epoch": 0.79, "grad_norm": 0.27619945652710726, "learning_rate": 2.1394341939138618e-06, "loss": 0.149, "step": 13824 }, { "epoch": 0.79, "grad_norm": 0.2555642275552039, "learning_rate": 2.1382839984277395e-06, "loss": 0.2574, "step": 13825 }, { "epoch": 0.79, "grad_norm": 0.687890090018683, "learning_rate": 2.137134075192504e-06, "loss": 0.2607, "step": 13826 }, { "epoch": 0.79, "grad_norm": 0.6251334392979514, "learning_rate": 2.135984424247974e-06, "loss": 0.3902, "step": 13827 }, { "epoch": 0.79, "grad_norm": 0.35576024720872323, "learning_rate": 2.1348350456339684e-06, "loss": 0.2901, "step": 13828 }, { "epoch": 0.79, "grad_norm": 0.33393058087833427, "learning_rate": 2.1336859393902864e-06, "loss": 0.2403, "step": 13829 }, { "epoch": 0.79, "grad_norm": 0.3272136365997171, "learning_rate": 2.1325371055567236e-06, "loss": 0.16, "step": 13830 }, { "epoch": 0.79, "grad_norm": 0.38288933342849923, "learning_rate": 2.1313885441730607e-06, "loss": 0.2884, "step": 13831 }, { "epoch": 0.79, "grad_norm": 0.6989908981679257, "learning_rate": 2.1302402552790723e-06, "loss": 0.3487, "step": 13832 }, { "epoch": 0.79, "grad_norm": 0.26166129345641775, "learning_rate": 2.1290922389145284e-06, "loss": 0.2073, "step": 13833 }, { "epoch": 0.79, "grad_norm": 0.38623162282324663, "learning_rate": 2.1279444951191806e-06, "loss": 0.3036, "step": 13834 }, { "epoch": 0.79, "grad_norm": 1.1632051177823823, "learning_rate": 2.1267970239327773e-06, "loss": 0.5774, "step": 13835 }, { "epoch": 0.79, "grad_norm": 0.27563291221288966, "learning_rate": 2.1256498253950518e-06, "loss": 0.1642, "step": 13836 }, { "epoch": 0.8, "grad_norm": 0.27018901283686697, "learning_rate": 2.124502899545737e-06, "loss": 0.2479, "step": 13837 }, { "epoch": 0.8, "grad_norm": 0.5005766607922495, "learning_rate": 2.1233562464245483e-06, "loss": 0.2709, "step": 13838 }, { "epoch": 0.8, "grad_norm": 1.167362155705901, "learning_rate": 2.122209866071194e-06, "loss": 0.4289, "step": 13839 }, { "epoch": 0.8, "grad_norm": 0.3193880909220907, "learning_rate": 2.12106375852537e-06, "loss": 0.2474, "step": 13840 }, { "epoch": 0.8, "grad_norm": 0.35005318057106716, "learning_rate": 2.119917923826773e-06, "loss": 0.2889, "step": 13841 }, { "epoch": 0.8, "grad_norm": 0.33854876303697296, "learning_rate": 2.118772362015078e-06, "loss": 0.0908, "step": 13842 }, { "epoch": 0.8, "grad_norm": 0.3633576105580375, "learning_rate": 2.117627073129961e-06, "loss": 0.2801, "step": 13843 }, { "epoch": 0.8, "grad_norm": 0.42219924579575663, "learning_rate": 2.1164820572110734e-06, "loss": 0.2845, "step": 13844 }, { "epoch": 0.8, "grad_norm": 0.35881211061754714, "learning_rate": 2.115337314298077e-06, "loss": 0.3175, "step": 13845 }, { "epoch": 0.8, "grad_norm": 0.2868125822886127, "learning_rate": 2.1141928444306094e-06, "loss": 0.2033, "step": 13846 }, { "epoch": 0.8, "grad_norm": 1.1661974844904361, "learning_rate": 2.113048647648305e-06, "loss": 0.4777, "step": 13847 }, { "epoch": 0.8, "grad_norm": 0.3302764354075652, "learning_rate": 2.1119047239907833e-06, "loss": 0.2756, "step": 13848 }, { "epoch": 0.8, "grad_norm": 0.29027735567796065, "learning_rate": 2.110761073497665e-06, "loss": 0.1903, "step": 13849 }, { "epoch": 0.8, "grad_norm": 0.48117388677328654, "learning_rate": 2.1096176962085513e-06, "loss": 0.262, "step": 13850 }, { "epoch": 0.8, "grad_norm": 0.4337568500734956, "learning_rate": 2.1084745921630377e-06, "loss": 0.3096, "step": 13851 }, { "epoch": 0.8, "grad_norm": 0.284048549328376, "learning_rate": 2.107331761400707e-06, "loss": 0.1813, "step": 13852 }, { "epoch": 0.8, "grad_norm": 0.3678509849823864, "learning_rate": 2.1061892039611407e-06, "loss": 0.2978, "step": 13853 }, { "epoch": 0.8, "grad_norm": 0.9895870132408121, "learning_rate": 2.105046919883903e-06, "loss": 0.4123, "step": 13854 }, { "epoch": 0.8, "grad_norm": 0.30690631485293857, "learning_rate": 2.1039049092085507e-06, "loss": 0.2116, "step": 13855 }, { "epoch": 0.8, "grad_norm": 0.2885275065069769, "learning_rate": 2.102763171974629e-06, "loss": 0.232, "step": 13856 }, { "epoch": 0.8, "grad_norm": 0.39902371245566787, "learning_rate": 2.1016217082216815e-06, "loss": 0.2799, "step": 13857 }, { "epoch": 0.8, "grad_norm": 0.31971621759090113, "learning_rate": 2.100480517989235e-06, "loss": 0.259, "step": 13858 }, { "epoch": 0.8, "grad_norm": 0.8751263325898722, "learning_rate": 2.099339601316809e-06, "loss": 0.2952, "step": 13859 }, { "epoch": 0.8, "grad_norm": 0.34455456165955256, "learning_rate": 2.098198958243911e-06, "loss": 0.303, "step": 13860 }, { "epoch": 0.8, "grad_norm": 0.3229178337704223, "learning_rate": 2.0970585888100425e-06, "loss": 0.266, "step": 13861 }, { "epoch": 0.8, "grad_norm": 0.8776049310548688, "learning_rate": 2.0959184930546973e-06, "loss": 0.2564, "step": 13862 }, { "epoch": 0.8, "grad_norm": 0.47569729397496585, "learning_rate": 2.0947786710173545e-06, "loss": 0.2821, "step": 13863 }, { "epoch": 0.8, "grad_norm": 0.2504288310318657, "learning_rate": 2.0936391227374874e-06, "loss": 0.2158, "step": 13864 }, { "epoch": 0.8, "grad_norm": 0.35032213602144774, "learning_rate": 2.0924998482545535e-06, "loss": 0.2503, "step": 13865 }, { "epoch": 0.8, "grad_norm": 0.6552856340397468, "learning_rate": 2.0913608476080138e-06, "loss": 0.3592, "step": 13866 }, { "epoch": 0.8, "grad_norm": 0.3133790814738462, "learning_rate": 2.090222120837306e-06, "loss": 0.2509, "step": 13867 }, { "epoch": 0.8, "grad_norm": 0.47362331697258986, "learning_rate": 2.089083667981868e-06, "loss": 0.2264, "step": 13868 }, { "epoch": 0.8, "grad_norm": 0.4945426270079345, "learning_rate": 2.087945489081119e-06, "loss": 0.4079, "step": 13869 }, { "epoch": 0.8, "grad_norm": 0.3993652479527239, "learning_rate": 2.0868075841744795e-06, "loss": 0.2696, "step": 13870 }, { "epoch": 0.8, "grad_norm": 0.26067501072855986, "learning_rate": 2.0856699533013535e-06, "loss": 0.1919, "step": 13871 }, { "epoch": 0.8, "grad_norm": 0.3616882633023731, "learning_rate": 2.0845325965011375e-06, "loss": 0.264, "step": 13872 }, { "epoch": 0.8, "grad_norm": 0.3935744589887008, "learning_rate": 2.083395513813217e-06, "loss": 0.277, "step": 13873 }, { "epoch": 0.8, "grad_norm": 0.662955242143687, "learning_rate": 2.082258705276966e-06, "loss": 0.3611, "step": 13874 }, { "epoch": 0.8, "grad_norm": 1.0458268635013401, "learning_rate": 2.0811221709317587e-06, "loss": 0.5303, "step": 13875 }, { "epoch": 0.8, "grad_norm": 0.23084717455090384, "learning_rate": 2.0799859108169496e-06, "loss": 0.2034, "step": 13876 }, { "epoch": 0.8, "grad_norm": 0.3093465003844103, "learning_rate": 2.0788499249718887e-06, "loss": 0.2502, "step": 13877 }, { "epoch": 0.8, "grad_norm": 1.0381983387677298, "learning_rate": 2.077714213435914e-06, "loss": 0.2885, "step": 13878 }, { "epoch": 0.8, "grad_norm": 0.32579748606769765, "learning_rate": 2.0765787762483545e-06, "loss": 0.2467, "step": 13879 }, { "epoch": 0.8, "grad_norm": 1.1957787010002299, "learning_rate": 2.075443613448532e-06, "loss": 0.4765, "step": 13880 }, { "epoch": 0.8, "grad_norm": 0.33500602148308517, "learning_rate": 2.0743087250757544e-06, "loss": 0.2632, "step": 13881 }, { "epoch": 0.8, "grad_norm": 0.3294045154993524, "learning_rate": 2.073174111169327e-06, "loss": 0.2471, "step": 13882 }, { "epoch": 0.8, "grad_norm": 0.8524795933817103, "learning_rate": 2.072039771768539e-06, "loss": 0.3734, "step": 13883 }, { "epoch": 0.8, "grad_norm": 0.21219881196196438, "learning_rate": 2.0709057069126726e-06, "loss": 0.2046, "step": 13884 }, { "epoch": 0.8, "grad_norm": 0.3405533089230494, "learning_rate": 2.0697719166410013e-06, "loss": 0.2002, "step": 13885 }, { "epoch": 0.8, "grad_norm": 1.5915861638020246, "learning_rate": 2.068638400992784e-06, "loss": 0.757, "step": 13886 }, { "epoch": 0.8, "grad_norm": 1.2417936476128963, "learning_rate": 2.0675051600072817e-06, "loss": 0.7115, "step": 13887 }, { "epoch": 0.8, "grad_norm": 0.2680697096757186, "learning_rate": 2.0663721937237334e-06, "loss": 0.1963, "step": 13888 }, { "epoch": 0.8, "grad_norm": 0.3929872162313532, "learning_rate": 2.0652395021813752e-06, "loss": 0.3059, "step": 13889 }, { "epoch": 0.8, "grad_norm": 0.36593534101852726, "learning_rate": 2.064107085419429e-06, "loss": 0.1969, "step": 13890 }, { "epoch": 0.8, "grad_norm": 0.32655675523124694, "learning_rate": 2.062974943477116e-06, "loss": 0.1665, "step": 13891 }, { "epoch": 0.8, "grad_norm": 0.29384082638874254, "learning_rate": 2.0618430763936402e-06, "loss": 0.2684, "step": 13892 }, { "epoch": 0.8, "grad_norm": 1.209531727244767, "learning_rate": 2.0607114842081966e-06, "loss": 0.7586, "step": 13893 }, { "epoch": 0.8, "grad_norm": 0.3941791338393727, "learning_rate": 2.0595801669599704e-06, "loss": 0.2967, "step": 13894 }, { "epoch": 0.8, "grad_norm": 0.5237764192291912, "learning_rate": 2.0584491246881443e-06, "loss": 0.2825, "step": 13895 }, { "epoch": 0.8, "grad_norm": 0.2794762521852401, "learning_rate": 2.0573183574318832e-06, "loss": 0.249, "step": 13896 }, { "epoch": 0.8, "grad_norm": 0.2774111684414447, "learning_rate": 2.0561878652303458e-06, "loss": 0.1959, "step": 13897 }, { "epoch": 0.8, "grad_norm": 0.5438540602781272, "learning_rate": 2.0550576481226814e-06, "loss": 0.212, "step": 13898 }, { "epoch": 0.8, "grad_norm": 0.6511150660244753, "learning_rate": 2.0539277061480256e-06, "loss": 0.4511, "step": 13899 }, { "epoch": 0.8, "grad_norm": 0.2423417364066042, "learning_rate": 2.0527980393455147e-06, "loss": 0.2466, "step": 13900 }, { "epoch": 0.8, "grad_norm": 0.47122913695901136, "learning_rate": 2.051668647754267e-06, "loss": 0.2733, "step": 13901 }, { "epoch": 0.8, "grad_norm": 0.27193278941468646, "learning_rate": 2.0505395314133915e-06, "loss": 0.156, "step": 13902 }, { "epoch": 0.8, "grad_norm": 0.5962094572756668, "learning_rate": 2.049410690361987e-06, "loss": 0.304, "step": 13903 }, { "epoch": 0.8, "grad_norm": 0.26728260818607263, "learning_rate": 2.0482821246391515e-06, "loss": 0.2332, "step": 13904 }, { "epoch": 0.8, "grad_norm": 0.7884511286921686, "learning_rate": 2.0471538342839637e-06, "loss": 0.4379, "step": 13905 }, { "epoch": 0.8, "grad_norm": 0.5466734579354183, "learning_rate": 2.0460258193354963e-06, "loss": 0.3473, "step": 13906 }, { "epoch": 0.8, "grad_norm": 0.39502877345893916, "learning_rate": 2.0448980798328113e-06, "loss": 0.3107, "step": 13907 }, { "epoch": 0.8, "grad_norm": 0.19179045423602922, "learning_rate": 2.043770615814966e-06, "loss": 0.1632, "step": 13908 }, { "epoch": 0.8, "grad_norm": 0.5438731088748233, "learning_rate": 2.0426434273210016e-06, "loss": 0.2428, "step": 13909 }, { "epoch": 0.8, "grad_norm": 0.3807065412872983, "learning_rate": 2.041516514389954e-06, "loss": 0.2902, "step": 13910 }, { "epoch": 0.8, "grad_norm": 0.7299876932540644, "learning_rate": 2.0403898770608466e-06, "loss": 0.3083, "step": 13911 }, { "epoch": 0.8, "grad_norm": 0.3073229067569145, "learning_rate": 2.0392635153726958e-06, "loss": 0.2584, "step": 13912 }, { "epoch": 0.8, "grad_norm": 0.38588900994842523, "learning_rate": 2.0381374293645072e-06, "loss": 0.2999, "step": 13913 }, { "epoch": 0.8, "grad_norm": 0.47918109962924327, "learning_rate": 2.0370116190752763e-06, "loss": 0.1199, "step": 13914 }, { "epoch": 0.8, "grad_norm": 0.3032568846110088, "learning_rate": 2.035886084543989e-06, "loss": 0.19, "step": 13915 }, { "epoch": 0.8, "grad_norm": 0.3048366839804276, "learning_rate": 2.0347608258096263e-06, "loss": 0.2932, "step": 13916 }, { "epoch": 0.8, "grad_norm": 0.860092659438108, "learning_rate": 2.0336358429111534e-06, "loss": 0.3054, "step": 13917 }, { "epoch": 0.8, "grad_norm": 0.410237488915634, "learning_rate": 2.0325111358875295e-06, "loss": 0.2969, "step": 13918 }, { "epoch": 0.8, "grad_norm": 0.5844699302701855, "learning_rate": 2.031386704777698e-06, "loss": 0.3557, "step": 13919 }, { "epoch": 0.8, "grad_norm": 0.3807335069363862, "learning_rate": 2.0302625496206065e-06, "loss": 0.2834, "step": 13920 }, { "epoch": 0.8, "grad_norm": 0.19674851030297955, "learning_rate": 2.0291386704551795e-06, "loss": 0.0822, "step": 13921 }, { "epoch": 0.8, "grad_norm": 0.4132502470459073, "learning_rate": 2.028015067320338e-06, "loss": 0.3011, "step": 13922 }, { "epoch": 0.8, "grad_norm": 0.6473087562464866, "learning_rate": 2.0268917402549914e-06, "loss": 0.3178, "step": 13923 }, { "epoch": 0.8, "grad_norm": 0.36673530265989734, "learning_rate": 2.0257686892980387e-06, "loss": 0.2243, "step": 13924 }, { "epoch": 0.8, "grad_norm": 0.36203099449274834, "learning_rate": 2.0246459144883767e-06, "loss": 0.2867, "step": 13925 }, { "epoch": 0.8, "grad_norm": 0.5054331029168437, "learning_rate": 2.023523415864883e-06, "loss": 0.2783, "step": 13926 }, { "epoch": 0.8, "grad_norm": 0.28828519889572235, "learning_rate": 2.02240119346643e-06, "loss": 0.0938, "step": 13927 }, { "epoch": 0.8, "grad_norm": 0.3016104369032031, "learning_rate": 2.0212792473318788e-06, "loss": 0.2868, "step": 13928 }, { "epoch": 0.8, "grad_norm": 0.6227814296163309, "learning_rate": 2.020157577500086e-06, "loss": 0.379, "step": 13929 }, { "epoch": 0.8, "grad_norm": 0.36615003349825465, "learning_rate": 2.019036184009894e-06, "loss": 0.2155, "step": 13930 }, { "epoch": 0.8, "grad_norm": 0.40065400364964354, "learning_rate": 2.0179150669001347e-06, "loss": 0.3056, "step": 13931 }, { "epoch": 0.8, "grad_norm": 0.359059617026297, "learning_rate": 2.0167942262096317e-06, "loss": 0.3046, "step": 13932 }, { "epoch": 0.8, "grad_norm": 0.3025116844483224, "learning_rate": 2.0156736619772034e-06, "loss": 0.1751, "step": 13933 }, { "epoch": 0.8, "grad_norm": 0.3910522596201756, "learning_rate": 2.0145533742416536e-06, "loss": 0.2408, "step": 13934 }, { "epoch": 0.8, "grad_norm": 0.8924655555675018, "learning_rate": 2.013433363041777e-06, "loss": 0.4662, "step": 13935 }, { "epoch": 0.8, "grad_norm": 0.3086428880041467, "learning_rate": 2.012313628416359e-06, "loss": 0.2813, "step": 13936 }, { "epoch": 0.8, "grad_norm": 0.3740806939413502, "learning_rate": 2.0111941704041738e-06, "loss": 0.2224, "step": 13937 }, { "epoch": 0.8, "grad_norm": 1.312160171802165, "learning_rate": 2.0100749890439943e-06, "loss": 0.5937, "step": 13938 }, { "epoch": 0.8, "grad_norm": 0.39192858923914753, "learning_rate": 2.0089560843745737e-06, "loss": 0.2937, "step": 13939 }, { "epoch": 0.8, "grad_norm": 0.28806615219896364, "learning_rate": 2.0078374564346605e-06, "loss": 0.2294, "step": 13940 }, { "epoch": 0.8, "grad_norm": 0.28823469322612705, "learning_rate": 2.0067191052629897e-06, "loss": 0.1929, "step": 13941 }, { "epoch": 0.8, "grad_norm": 1.1471591644672199, "learning_rate": 2.0056010308982954e-06, "loss": 0.6341, "step": 13942 }, { "epoch": 0.8, "grad_norm": 0.30332531713716976, "learning_rate": 2.0044832333792942e-06, "loss": 0.1903, "step": 13943 }, { "epoch": 0.8, "grad_norm": 0.37927445804426185, "learning_rate": 2.003365712744694e-06, "loss": 0.286, "step": 13944 }, { "epoch": 0.8, "grad_norm": 0.8720692601753824, "learning_rate": 2.0022484690331957e-06, "loss": 0.3939, "step": 13945 }, { "epoch": 0.8, "grad_norm": 0.326152282045429, "learning_rate": 2.0011315022834887e-06, "loss": 0.2417, "step": 13946 }, { "epoch": 0.8, "grad_norm": 0.25737955441315835, "learning_rate": 2.000014812534253e-06, "loss": 0.121, "step": 13947 }, { "epoch": 0.8, "grad_norm": 0.3269429544155623, "learning_rate": 1.9988983998241616e-06, "loss": 0.2851, "step": 13948 }, { "epoch": 0.8, "grad_norm": 0.262006983013765, "learning_rate": 1.9977822641918722e-06, "loss": 0.2173, "step": 13949 }, { "epoch": 0.8, "grad_norm": 1.0942238387697292, "learning_rate": 1.996666405676041e-06, "loss": 0.2634, "step": 13950 }, { "epoch": 0.8, "grad_norm": 0.31107848659817255, "learning_rate": 1.9955508243153075e-06, "loss": 0.2901, "step": 13951 }, { "epoch": 0.8, "grad_norm": 0.36351275425339735, "learning_rate": 1.9944355201483057e-06, "loss": 0.2643, "step": 13952 }, { "epoch": 0.8, "grad_norm": 0.27098007640215643, "learning_rate": 1.993320493213654e-06, "loss": 0.1226, "step": 13953 }, { "epoch": 0.8, "grad_norm": 0.3990952691061257, "learning_rate": 1.992205743549972e-06, "loss": 0.2757, "step": 13954 }, { "epoch": 0.8, "grad_norm": 0.3607559119535387, "learning_rate": 1.991091271195862e-06, "loss": 0.2886, "step": 13955 }, { "epoch": 0.8, "grad_norm": 0.32360319442150737, "learning_rate": 1.989977076189916e-06, "loss": 0.254, "step": 13956 }, { "epoch": 0.8, "grad_norm": 0.5376372309933372, "learning_rate": 1.9888631585707165e-06, "loss": 0.3056, "step": 13957 }, { "epoch": 0.8, "grad_norm": 0.4290201609712683, "learning_rate": 1.987749518376845e-06, "loss": 0.2751, "step": 13958 }, { "epoch": 0.8, "grad_norm": 0.49871685728533977, "learning_rate": 1.986636155646862e-06, "loss": 0.3373, "step": 13959 }, { "epoch": 0.8, "grad_norm": 0.27146044669137814, "learning_rate": 1.985523070419324e-06, "loss": 0.1775, "step": 13960 }, { "epoch": 0.8, "grad_norm": 0.2442071282951535, "learning_rate": 1.984410262732779e-06, "loss": 0.1945, "step": 13961 }, { "epoch": 0.8, "grad_norm": 0.6881344071763977, "learning_rate": 1.9832977326257587e-06, "loss": 0.3537, "step": 13962 }, { "epoch": 0.8, "grad_norm": 0.32311184584433045, "learning_rate": 1.9821854801367947e-06, "loss": 0.2321, "step": 13963 }, { "epoch": 0.8, "grad_norm": 0.29921551388686407, "learning_rate": 1.981073505304404e-06, "loss": 0.2423, "step": 13964 }, { "epoch": 0.8, "grad_norm": 1.1952034277542511, "learning_rate": 1.9799618081670925e-06, "loss": 0.6672, "step": 13965 }, { "epoch": 0.8, "grad_norm": 0.16565970362132512, "learning_rate": 1.978850388763356e-06, "loss": 0.0987, "step": 13966 }, { "epoch": 0.8, "grad_norm": 0.28583421178478075, "learning_rate": 1.977739247131688e-06, "loss": 0.2643, "step": 13967 }, { "epoch": 0.8, "grad_norm": 0.45246208200159804, "learning_rate": 1.976628383310566e-06, "loss": 0.3104, "step": 13968 }, { "epoch": 0.8, "grad_norm": 0.7345956913222442, "learning_rate": 1.9755177973384575e-06, "loss": 0.2461, "step": 13969 }, { "epoch": 0.8, "grad_norm": 0.3468748142933442, "learning_rate": 1.9744074892538203e-06, "loss": 0.2622, "step": 13970 }, { "epoch": 0.8, "grad_norm": 0.4893550654394881, "learning_rate": 1.9732974590951083e-06, "loss": 0.3401, "step": 13971 }, { "epoch": 0.8, "grad_norm": 0.313095481399203, "learning_rate": 1.972187706900761e-06, "loss": 0.256, "step": 13972 }, { "epoch": 0.8, "grad_norm": 0.28891551172507257, "learning_rate": 1.9710782327092083e-06, "loss": 0.1845, "step": 13973 }, { "epoch": 0.8, "grad_norm": 0.45100541255152954, "learning_rate": 1.9699690365588674e-06, "loss": 0.3064, "step": 13974 }, { "epoch": 0.8, "grad_norm": 0.329722050488096, "learning_rate": 1.9688601184881572e-06, "loss": 0.2946, "step": 13975 }, { "epoch": 0.8, "grad_norm": 0.28484365336909273, "learning_rate": 1.9677514785354747e-06, "loss": 0.1848, "step": 13976 }, { "epoch": 0.8, "grad_norm": 1.0415921858447894, "learning_rate": 1.966643116739214e-06, "loss": 0.5841, "step": 13977 }, { "epoch": 0.8, "grad_norm": 0.945207146977298, "learning_rate": 1.9655350331377563e-06, "loss": 0.4686, "step": 13978 }, { "epoch": 0.8, "grad_norm": 0.32279578400528663, "learning_rate": 1.964427227769475e-06, "loss": 0.1987, "step": 13979 }, { "epoch": 0.8, "grad_norm": 0.3187970274152468, "learning_rate": 1.9633197006727333e-06, "loss": 0.2472, "step": 13980 }, { "epoch": 0.8, "grad_norm": 0.4706050178267718, "learning_rate": 1.9622124518858855e-06, "loss": 0.215, "step": 13981 }, { "epoch": 0.8, "grad_norm": 0.32904291628440924, "learning_rate": 1.9611054814472707e-06, "loss": 0.2531, "step": 13982 }, { "epoch": 0.8, "grad_norm": 0.3076825890196617, "learning_rate": 1.959998789395231e-06, "loss": 0.2521, "step": 13983 }, { "epoch": 0.8, "grad_norm": 0.808660652729481, "learning_rate": 1.9588923757680878e-06, "loss": 0.496, "step": 13984 }, { "epoch": 0.8, "grad_norm": 0.3336398442274031, "learning_rate": 1.9577862406041558e-06, "loss": 0.2611, "step": 13985 }, { "epoch": 0.8, "grad_norm": 0.741654787071884, "learning_rate": 1.956680383941737e-06, "loss": 0.1866, "step": 13986 }, { "epoch": 0.8, "grad_norm": 0.2751490630762147, "learning_rate": 1.9555748058191337e-06, "loss": 0.2453, "step": 13987 }, { "epoch": 0.8, "grad_norm": 0.3371685877572128, "learning_rate": 1.9544695062746286e-06, "loss": 0.2371, "step": 13988 }, { "epoch": 0.8, "grad_norm": 0.8680164276785397, "learning_rate": 1.9533644853464996e-06, "loss": 0.4721, "step": 13989 }, { "epoch": 0.8, "grad_norm": 0.47686024734591553, "learning_rate": 1.952259743073012e-06, "loss": 0.3312, "step": 13990 }, { "epoch": 0.8, "grad_norm": 0.39545690708722075, "learning_rate": 1.9511552794924194e-06, "loss": 0.2362, "step": 13991 }, { "epoch": 0.8, "grad_norm": 0.36269709669257977, "learning_rate": 1.9500510946429772e-06, "loss": 0.2391, "step": 13992 }, { "epoch": 0.8, "grad_norm": 0.31630341720542904, "learning_rate": 1.9489471885629196e-06, "loss": 0.1847, "step": 13993 }, { "epoch": 0.8, "grad_norm": 0.40712626311337746, "learning_rate": 1.9478435612904744e-06, "loss": 0.2752, "step": 13994 }, { "epoch": 0.8, "grad_norm": 0.2942320985895319, "learning_rate": 1.946740212863858e-06, "loss": 0.288, "step": 13995 }, { "epoch": 0.8, "grad_norm": 0.7675447935881212, "learning_rate": 1.945637143321284e-06, "loss": 0.3028, "step": 13996 }, { "epoch": 0.8, "grad_norm": 0.400006777628331, "learning_rate": 1.9445343527009497e-06, "loss": 0.2919, "step": 13997 }, { "epoch": 0.8, "grad_norm": 0.3556966067366299, "learning_rate": 1.9434318410410435e-06, "loss": 0.247, "step": 13998 }, { "epoch": 0.8, "grad_norm": 0.25185255386753236, "learning_rate": 1.942329608379745e-06, "loss": 0.183, "step": 13999 }, { "epoch": 0.8, "grad_norm": 0.31969515942588655, "learning_rate": 1.9412276547552276e-06, "loss": 0.2485, "step": 14000 }, { "epoch": 0.8, "grad_norm": 0.9991830838318048, "learning_rate": 1.9401259802056495e-06, "loss": 0.5334, "step": 14001 }, { "epoch": 0.8, "grad_norm": 0.42432293133392635, "learning_rate": 1.9390245847691625e-06, "loss": 0.2674, "step": 14002 }, { "epoch": 0.8, "grad_norm": 0.257524155935746, "learning_rate": 1.9379234684839075e-06, "loss": 0.244, "step": 14003 }, { "epoch": 0.8, "grad_norm": 1.2512902112125146, "learning_rate": 1.9368226313880134e-06, "loss": 0.5929, "step": 14004 }, { "epoch": 0.8, "grad_norm": 0.2558079446835865, "learning_rate": 1.935722073519608e-06, "loss": 0.0865, "step": 14005 }, { "epoch": 0.8, "grad_norm": 0.30514259612396377, "learning_rate": 1.9346217949168e-06, "loss": 0.2563, "step": 14006 }, { "epoch": 0.8, "grad_norm": 0.32676000041131353, "learning_rate": 1.933521795617692e-06, "loss": 0.2903, "step": 14007 }, { "epoch": 0.8, "grad_norm": 0.6566724718751233, "learning_rate": 1.932422075660376e-06, "loss": 0.3683, "step": 14008 }, { "epoch": 0.8, "grad_norm": 0.31808490481525514, "learning_rate": 1.931322635082938e-06, "loss": 0.175, "step": 14009 }, { "epoch": 0.8, "grad_norm": 0.5415586763759216, "learning_rate": 1.9302234739234507e-06, "loss": 0.3217, "step": 14010 }, { "epoch": 0.8, "grad_norm": 0.2155972087042718, "learning_rate": 1.9291245922199776e-06, "loss": 0.2108, "step": 14011 }, { "epoch": 0.81, "grad_norm": 0.40196080536904666, "learning_rate": 1.9280259900105723e-06, "loss": 0.1741, "step": 14012 }, { "epoch": 0.81, "grad_norm": 0.5085855651773016, "learning_rate": 1.9269276673332806e-06, "loss": 0.3481, "step": 14013 }, { "epoch": 0.81, "grad_norm": 0.6857508871116539, "learning_rate": 1.9258296242261355e-06, "loss": 0.4188, "step": 14014 }, { "epoch": 0.81, "grad_norm": 0.24640439130406608, "learning_rate": 1.9247318607271637e-06, "loss": 0.2135, "step": 14015 }, { "epoch": 0.81, "grad_norm": 0.5920314104837552, "learning_rate": 1.923634376874378e-06, "loss": 0.3719, "step": 14016 }, { "epoch": 0.81, "grad_norm": 0.25891151164406034, "learning_rate": 1.9225371727057897e-06, "loss": 0.1558, "step": 14017 }, { "epoch": 0.81, "grad_norm": 0.3841785028881913, "learning_rate": 1.921440248259391e-06, "loss": 0.2032, "step": 14018 }, { "epoch": 0.81, "grad_norm": 0.2969167120395358, "learning_rate": 1.9203436035731694e-06, "loss": 0.2832, "step": 14019 }, { "epoch": 0.81, "grad_norm": 0.8309038714782914, "learning_rate": 1.919247238685098e-06, "loss": 0.5118, "step": 14020 }, { "epoch": 0.81, "grad_norm": 0.7674720192887823, "learning_rate": 1.918151153633151e-06, "loss": 0.2878, "step": 14021 }, { "epoch": 0.81, "grad_norm": 0.564361057127029, "learning_rate": 1.917055348455281e-06, "loss": 0.2283, "step": 14022 }, { "epoch": 0.81, "grad_norm": 0.2814577681747703, "learning_rate": 1.9159598231894385e-06, "loss": 0.2676, "step": 14023 }, { "epoch": 0.81, "grad_norm": 0.2937360059194529, "learning_rate": 1.9148645778735555e-06, "loss": 0.2098, "step": 14024 }, { "epoch": 0.81, "grad_norm": 0.5653575255098695, "learning_rate": 1.9137696125455672e-06, "loss": 0.2534, "step": 14025 }, { "epoch": 0.81, "grad_norm": 0.5171731933248757, "learning_rate": 1.91267492724339e-06, "loss": 0.3388, "step": 14026 }, { "epoch": 0.81, "grad_norm": 0.3752775546359505, "learning_rate": 1.911580522004931e-06, "loss": 0.2683, "step": 14027 }, { "epoch": 0.81, "grad_norm": 0.368645831443498, "learning_rate": 1.910486396868092e-06, "loss": 0.2158, "step": 14028 }, { "epoch": 0.81, "grad_norm": 0.30316861367267955, "learning_rate": 1.909392551870759e-06, "loss": 0.1882, "step": 14029 }, { "epoch": 0.81, "grad_norm": 0.6934510092482077, "learning_rate": 1.908298987050815e-06, "loss": 0.3114, "step": 14030 }, { "epoch": 0.81, "grad_norm": 0.24347718869590526, "learning_rate": 1.907205702446131e-06, "loss": 0.2422, "step": 14031 }, { "epoch": 0.81, "grad_norm": 1.259711777802538, "learning_rate": 1.9061126980945644e-06, "loss": 0.657, "step": 14032 }, { "epoch": 0.81, "grad_norm": 0.572298799907, "learning_rate": 1.9050199740339648e-06, "loss": 0.3185, "step": 14033 }, { "epoch": 0.81, "grad_norm": 0.42220044917716687, "learning_rate": 1.9039275303021775e-06, "loss": 0.2817, "step": 14034 }, { "epoch": 0.81, "grad_norm": 0.34749331845809894, "learning_rate": 1.9028353669370315e-06, "loss": 0.2275, "step": 14035 }, { "epoch": 0.81, "grad_norm": 0.49512336442006893, "learning_rate": 1.9017434839763493e-06, "loss": 0.3007, "step": 14036 }, { "epoch": 0.81, "grad_norm": 0.2254669338598251, "learning_rate": 1.900651881457939e-06, "loss": 0.1896, "step": 14037 }, { "epoch": 0.81, "grad_norm": 0.4076412787846548, "learning_rate": 1.8995605594196086e-06, "loss": 0.2732, "step": 14038 }, { "epoch": 0.81, "grad_norm": 0.32296030460251207, "learning_rate": 1.8984695178991475e-06, "loss": 0.2963, "step": 14039 }, { "epoch": 0.81, "grad_norm": 0.5481564846950949, "learning_rate": 1.8973787569343394e-06, "loss": 0.3206, "step": 14040 }, { "epoch": 0.81, "grad_norm": 0.6209723598460225, "learning_rate": 1.8962882765629552e-06, "loss": 0.2801, "step": 14041 }, { "epoch": 0.81, "grad_norm": 0.3055173237798795, "learning_rate": 1.8951980768227586e-06, "loss": 0.2729, "step": 14042 }, { "epoch": 0.81, "grad_norm": 0.35224208256032374, "learning_rate": 1.8941081577515053e-06, "loss": 0.2993, "step": 14043 }, { "epoch": 0.81, "grad_norm": 0.28113135249311766, "learning_rate": 1.8930185193869376e-06, "loss": 0.1722, "step": 14044 }, { "epoch": 0.81, "grad_norm": 0.29298152458518495, "learning_rate": 1.8919291617667912e-06, "loss": 0.1652, "step": 14045 }, { "epoch": 0.81, "grad_norm": 0.45232617515848045, "learning_rate": 1.8908400849287889e-06, "loss": 0.2993, "step": 14046 }, { "epoch": 0.81, "grad_norm": 0.37147528154819126, "learning_rate": 1.8897512889106451e-06, "loss": 0.3054, "step": 14047 }, { "epoch": 0.81, "grad_norm": 0.5620327131313082, "learning_rate": 1.8886627737500663e-06, "loss": 0.2291, "step": 14048 }, { "epoch": 0.81, "grad_norm": 0.26947274618159855, "learning_rate": 1.8875745394847434e-06, "loss": 0.2188, "step": 14049 }, { "epoch": 0.81, "grad_norm": 0.3342387244403811, "learning_rate": 1.8864865861523684e-06, "loss": 0.3209, "step": 14050 }, { "epoch": 0.81, "grad_norm": 0.16985618467855765, "learning_rate": 1.8853989137906137e-06, "loss": 0.0886, "step": 14051 }, { "epoch": 0.81, "grad_norm": 0.38349041687568947, "learning_rate": 1.8843115224371467e-06, "loss": 0.2769, "step": 14052 }, { "epoch": 0.81, "grad_norm": 0.6251738290863288, "learning_rate": 1.8832244121296217e-06, "loss": 0.3818, "step": 14053 }, { "epoch": 0.81, "grad_norm": 0.3763987776386038, "learning_rate": 1.8821375829056842e-06, "loss": 0.2174, "step": 14054 }, { "epoch": 0.81, "grad_norm": 0.31215780044633634, "learning_rate": 1.8810510348029753e-06, "loss": 0.2912, "step": 14055 }, { "epoch": 0.81, "grad_norm": 1.225420066022846, "learning_rate": 1.8799647678591203e-06, "loss": 0.7848, "step": 14056 }, { "epoch": 0.81, "grad_norm": 0.2622067946912736, "learning_rate": 1.878878782111736e-06, "loss": 0.1747, "step": 14057 }, { "epoch": 0.81, "grad_norm": 0.35062197492639324, "learning_rate": 1.8777930775984277e-06, "loss": 0.222, "step": 14058 }, { "epoch": 0.81, "grad_norm": 0.36092983810301527, "learning_rate": 1.8767076543567986e-06, "loss": 0.305, "step": 14059 }, { "epoch": 0.81, "grad_norm": 0.5759708595809394, "learning_rate": 1.8756225124244332e-06, "loss": 0.3023, "step": 14060 }, { "epoch": 0.81, "grad_norm": 0.3373531812343405, "learning_rate": 1.8745376518389113e-06, "loss": 0.2121, "step": 14061 }, { "epoch": 0.81, "grad_norm": 0.3674552882376826, "learning_rate": 1.8734530726377997e-06, "loss": 0.3401, "step": 14062 }, { "epoch": 0.81, "grad_norm": 0.20581830272049467, "learning_rate": 1.8723687748586605e-06, "loss": 0.1439, "step": 14063 }, { "epoch": 0.81, "grad_norm": 0.3971041780692171, "learning_rate": 1.8712847585390403e-06, "loss": 0.2296, "step": 14064 }, { "epoch": 0.81, "grad_norm": 0.7877507215245848, "learning_rate": 1.8702010237164803e-06, "loss": 0.3901, "step": 14065 }, { "epoch": 0.81, "grad_norm": 0.3564769039000346, "learning_rate": 1.8691175704285091e-06, "loss": 0.3047, "step": 14066 }, { "epoch": 0.81, "grad_norm": 0.32404927429594854, "learning_rate": 1.8680343987126448e-06, "loss": 0.1954, "step": 14067 }, { "epoch": 0.81, "grad_norm": 1.125143640483104, "learning_rate": 1.8669515086064006e-06, "loss": 0.7126, "step": 14068 }, { "epoch": 0.81, "grad_norm": 0.4371310503844042, "learning_rate": 1.8658689001472775e-06, "loss": 0.2545, "step": 14069 }, { "epoch": 0.81, "grad_norm": 0.26684346084216576, "learning_rate": 1.8647865733727644e-06, "loss": 0.2519, "step": 14070 }, { "epoch": 0.81, "grad_norm": 0.30304389288387373, "learning_rate": 1.8637045283203391e-06, "loss": 0.1918, "step": 14071 }, { "epoch": 0.81, "grad_norm": 0.9331663827577262, "learning_rate": 1.8626227650274787e-06, "loss": 0.4022, "step": 14072 }, { "epoch": 0.81, "grad_norm": 0.3671040035406563, "learning_rate": 1.8615412835316426e-06, "loss": 0.2767, "step": 14073 }, { "epoch": 0.81, "grad_norm": 0.33313642195654125, "learning_rate": 1.8604600838702814e-06, "loss": 0.2546, "step": 14074 }, { "epoch": 0.81, "grad_norm": 0.6134663014323616, "learning_rate": 1.8593791660808357e-06, "loss": 0.3682, "step": 14075 }, { "epoch": 0.81, "grad_norm": 0.35912139079324334, "learning_rate": 1.8582985302007405e-06, "loss": 0.2771, "step": 14076 }, { "epoch": 0.81, "grad_norm": 0.3628918133959204, "learning_rate": 1.8572181762674192e-06, "loss": 0.1353, "step": 14077 }, { "epoch": 0.81, "grad_norm": 0.31315581719439217, "learning_rate": 1.8561381043182803e-06, "loss": 0.2639, "step": 14078 }, { "epoch": 0.81, "grad_norm": 0.3602637118248707, "learning_rate": 1.8550583143907274e-06, "loss": 0.2798, "step": 14079 }, { "epoch": 0.81, "grad_norm": 0.7198841723664006, "learning_rate": 1.8539788065221598e-06, "loss": 0.351, "step": 14080 }, { "epoch": 0.81, "grad_norm": 0.6480899378091401, "learning_rate": 1.8528995807499528e-06, "loss": 0.3457, "step": 14081 }, { "epoch": 0.81, "grad_norm": 0.2939543727068227, "learning_rate": 1.8518206371114833e-06, "loss": 0.264, "step": 14082 }, { "epoch": 0.81, "grad_norm": 0.2582146968956923, "learning_rate": 1.8507419756441114e-06, "loss": 0.193, "step": 14083 }, { "epoch": 0.81, "grad_norm": 1.4650946807988512, "learning_rate": 1.8496635963851973e-06, "loss": 0.1613, "step": 14084 }, { "epoch": 0.81, "grad_norm": 0.30943678757627774, "learning_rate": 1.8485854993720831e-06, "loss": 0.2519, "step": 14085 }, { "epoch": 0.81, "grad_norm": 0.33098435069101717, "learning_rate": 1.8475076846421025e-06, "loss": 0.303, "step": 14086 }, { "epoch": 0.81, "grad_norm": 0.8136153140125502, "learning_rate": 1.8464301522325767e-06, "loss": 0.3076, "step": 14087 }, { "epoch": 0.81, "grad_norm": 0.3524686554462998, "learning_rate": 1.8453529021808282e-06, "loss": 0.2519, "step": 14088 }, { "epoch": 0.81, "grad_norm": 0.4539966783959743, "learning_rate": 1.8442759345241567e-06, "loss": 0.2466, "step": 14089 }, { "epoch": 0.81, "grad_norm": 0.2479546022298384, "learning_rate": 1.8431992492998595e-06, "loss": 0.1885, "step": 14090 }, { "epoch": 0.81, "grad_norm": 0.2979881583987387, "learning_rate": 1.8421228465452213e-06, "loss": 0.2557, "step": 14091 }, { "epoch": 0.81, "grad_norm": 0.8722453179515083, "learning_rate": 1.8410467262975152e-06, "loss": 0.5817, "step": 14092 }, { "epoch": 0.81, "grad_norm": 0.5745595604603996, "learning_rate": 1.8399708885940136e-06, "loss": 0.2642, "step": 14093 }, { "epoch": 0.81, "grad_norm": 0.28733919145554143, "learning_rate": 1.8388953334719684e-06, "loss": 0.2482, "step": 14094 }, { "epoch": 0.81, "grad_norm": 0.34111214746268476, "learning_rate": 1.837820060968627e-06, "loss": 0.2435, "step": 14095 }, { "epoch": 0.81, "grad_norm": 0.5477066304455874, "learning_rate": 1.8367450711212232e-06, "loss": 0.2656, "step": 14096 }, { "epoch": 0.81, "grad_norm": 0.2817456644619849, "learning_rate": 1.8356703639669904e-06, "loss": 0.1833, "step": 14097 }, { "epoch": 0.81, "grad_norm": 0.34795741382488465, "learning_rate": 1.8345959395431401e-06, "loss": 0.2963, "step": 14098 }, { "epoch": 0.81, "grad_norm": 0.6400339452738504, "learning_rate": 1.8335217978868825e-06, "loss": 0.4564, "step": 14099 }, { "epoch": 0.81, "grad_norm": 0.2979103780116249, "learning_rate": 1.832447939035411e-06, "loss": 0.1861, "step": 14100 }, { "epoch": 0.81, "grad_norm": 0.375418491704937, "learning_rate": 1.8313743630259184e-06, "loss": 0.1906, "step": 14101 }, { "epoch": 0.81, "grad_norm": 0.3613002064565976, "learning_rate": 1.8303010698955803e-06, "loss": 0.2885, "step": 14102 }, { "epoch": 0.81, "grad_norm": 0.331150447620705, "learning_rate": 1.8292280596815649e-06, "loss": 0.1993, "step": 14103 }, { "epoch": 0.81, "grad_norm": 0.5396453246374567, "learning_rate": 1.8281553324210278e-06, "loss": 0.3703, "step": 14104 }, { "epoch": 0.81, "grad_norm": 1.2441896449953618, "learning_rate": 1.8270828881511238e-06, "loss": 0.6757, "step": 14105 }, { "epoch": 0.81, "grad_norm": 0.22987355409113447, "learning_rate": 1.8260107269089865e-06, "loss": 0.206, "step": 14106 }, { "epoch": 0.81, "grad_norm": 1.4125169977730276, "learning_rate": 1.8249388487317465e-06, "loss": 0.6422, "step": 14107 }, { "epoch": 0.81, "grad_norm": 0.3321933632573765, "learning_rate": 1.823867253656524e-06, "loss": 0.1871, "step": 14108 }, { "epoch": 0.81, "grad_norm": 0.33090979186783415, "learning_rate": 1.8227959417204222e-06, "loss": 0.2569, "step": 14109 }, { "epoch": 0.81, "grad_norm": 0.3767301347240297, "learning_rate": 1.8217249129605496e-06, "loss": 0.2513, "step": 14110 }, { "epoch": 0.81, "grad_norm": 0.9509296598908838, "learning_rate": 1.820654167413991e-06, "loss": 0.5372, "step": 14111 }, { "epoch": 0.81, "grad_norm": 0.3118256550146787, "learning_rate": 1.8195837051178267e-06, "loss": 0.2466, "step": 14112 }, { "epoch": 0.81, "grad_norm": 0.21152041292581364, "learning_rate": 1.8185135261091247e-06, "loss": 0.0666, "step": 14113 }, { "epoch": 0.81, "grad_norm": 0.2945684219244831, "learning_rate": 1.817443630424952e-06, "loss": 0.2494, "step": 14114 }, { "epoch": 0.81, "grad_norm": 0.3782111624907756, "learning_rate": 1.8163740181023526e-06, "loss": 0.2492, "step": 14115 }, { "epoch": 0.81, "grad_norm": 0.4657972759725943, "learning_rate": 1.8153046891783654e-06, "loss": 0.2856, "step": 14116 }, { "epoch": 0.81, "grad_norm": 0.340304507867154, "learning_rate": 1.8142356436900288e-06, "loss": 0.3124, "step": 14117 }, { "epoch": 0.81, "grad_norm": 0.3706279739554896, "learning_rate": 1.8131668816743586e-06, "loss": 0.2635, "step": 14118 }, { "epoch": 0.81, "grad_norm": 0.6320484353599375, "learning_rate": 1.8120984031683686e-06, "loss": 0.2199, "step": 14119 }, { "epoch": 0.81, "grad_norm": 0.3099710789466092, "learning_rate": 1.811030208209058e-06, "loss": 0.1903, "step": 14120 }, { "epoch": 0.81, "grad_norm": 0.4168781646528722, "learning_rate": 1.8099622968334163e-06, "loss": 0.2884, "step": 14121 }, { "epoch": 0.81, "grad_norm": 0.28981006334400455, "learning_rate": 1.8088946690784314e-06, "loss": 0.2911, "step": 14122 }, { "epoch": 0.81, "grad_norm": 1.0230460545064057, "learning_rate": 1.8078273249810718e-06, "loss": 0.4709, "step": 14123 }, { "epoch": 0.81, "grad_norm": 0.36665359776477047, "learning_rate": 1.806760264578299e-06, "loss": 0.2622, "step": 14124 }, { "epoch": 0.81, "grad_norm": 1.4271593257453827, "learning_rate": 1.8056934879070642e-06, "loss": 0.5857, "step": 14125 }, { "epoch": 0.81, "grad_norm": 0.2838783227056397, "learning_rate": 1.8046269950043138e-06, "loss": 0.208, "step": 14126 }, { "epoch": 0.81, "grad_norm": 0.3194121737248507, "learning_rate": 1.803560785906977e-06, "loss": 0.2564, "step": 14127 }, { "epoch": 0.81, "grad_norm": 0.4439798130696629, "learning_rate": 1.8024948606519787e-06, "loss": 0.2703, "step": 14128 }, { "epoch": 0.81, "grad_norm": 0.301214908000095, "learning_rate": 1.8014292192762285e-06, "loss": 0.2513, "step": 14129 }, { "epoch": 0.81, "grad_norm": 0.3262329360305483, "learning_rate": 1.8003638618166342e-06, "loss": 0.257, "step": 14130 }, { "epoch": 0.81, "grad_norm": 1.184176185516564, "learning_rate": 1.7992987883100877e-06, "loss": 0.4787, "step": 14131 }, { "epoch": 0.81, "grad_norm": 0.6445822576434146, "learning_rate": 1.7982339987934705e-06, "loss": 0.2847, "step": 14132 }, { "epoch": 0.81, "grad_norm": 0.40948848681739713, "learning_rate": 1.7971694933036576e-06, "loss": 0.2681, "step": 14133 }, { "epoch": 0.81, "grad_norm": 0.23710816941660431, "learning_rate": 1.7961052718775096e-06, "loss": 0.2382, "step": 14134 }, { "epoch": 0.81, "grad_norm": 0.47714786473531884, "learning_rate": 1.7950413345518858e-06, "loss": 0.2881, "step": 14135 }, { "epoch": 0.81, "grad_norm": 0.4062079094273239, "learning_rate": 1.7939776813636278e-06, "loss": 0.179, "step": 14136 }, { "epoch": 0.81, "grad_norm": 0.3923200485078189, "learning_rate": 1.7929143123495695e-06, "loss": 0.2899, "step": 14137 }, { "epoch": 0.81, "grad_norm": 0.4917783194741124, "learning_rate": 1.7918512275465338e-06, "loss": 0.3125, "step": 14138 }, { "epoch": 0.81, "grad_norm": 0.3656188792929695, "learning_rate": 1.790788426991339e-06, "loss": 0.1853, "step": 14139 }, { "epoch": 0.81, "grad_norm": 0.5413502468999466, "learning_rate": 1.7897259107207888e-06, "loss": 0.4162, "step": 14140 }, { "epoch": 0.81, "grad_norm": 0.32025923404560214, "learning_rate": 1.7886636787716761e-06, "loss": 0.2454, "step": 14141 }, { "epoch": 0.81, "grad_norm": 0.2429282363963751, "learning_rate": 1.787601731180786e-06, "loss": 0.173, "step": 14142 }, { "epoch": 0.81, "grad_norm": 0.5438855371114105, "learning_rate": 1.7865400679848953e-06, "loss": 0.3524, "step": 14143 }, { "epoch": 0.81, "grad_norm": 0.6026762575249064, "learning_rate": 1.7854786892207709e-06, "loss": 0.4291, "step": 14144 }, { "epoch": 0.81, "grad_norm": 0.418008323429287, "learning_rate": 1.7844175949251653e-06, "loss": 0.2601, "step": 14145 }, { "epoch": 0.81, "grad_norm": 0.2651924993585563, "learning_rate": 1.7833567851348254e-06, "loss": 0.232, "step": 14146 }, { "epoch": 0.81, "grad_norm": 0.2537043627469472, "learning_rate": 1.7822962598864868e-06, "loss": 0.1781, "step": 14147 }, { "epoch": 0.81, "grad_norm": 0.37665750496002826, "learning_rate": 1.7812360192168742e-06, "loss": 0.2524, "step": 14148 }, { "epoch": 0.81, "grad_norm": 0.353511059581225, "learning_rate": 1.7801760631627064e-06, "loss": 0.2421, "step": 14149 }, { "epoch": 0.81, "grad_norm": 0.7572573431074694, "learning_rate": 1.7791163917606846e-06, "loss": 0.3338, "step": 14150 }, { "epoch": 0.81, "grad_norm": 0.8686696236646662, "learning_rate": 1.7780570050475122e-06, "loss": 0.3671, "step": 14151 }, { "epoch": 0.81, "grad_norm": 0.3290711277167447, "learning_rate": 1.7769979030598706e-06, "loss": 0.231, "step": 14152 }, { "epoch": 0.81, "grad_norm": 0.26873437490942453, "learning_rate": 1.7759390858344395e-06, "loss": 0.2475, "step": 14153 }, { "epoch": 0.81, "grad_norm": 0.3209443331607646, "learning_rate": 1.7748805534078805e-06, "loss": 0.1887, "step": 14154 }, { "epoch": 0.81, "grad_norm": 0.423738269074664, "learning_rate": 1.773822305816857e-06, "loss": 0.2495, "step": 14155 }, { "epoch": 0.81, "grad_norm": 0.8545198353982074, "learning_rate": 1.7727643430980135e-06, "loss": 0.411, "step": 14156 }, { "epoch": 0.81, "grad_norm": 0.37808128113435296, "learning_rate": 1.7717066652879877e-06, "loss": 0.3064, "step": 14157 }, { "epoch": 0.81, "grad_norm": 0.2977203707383954, "learning_rate": 1.770649272423406e-06, "loss": 0.276, "step": 14158 }, { "epoch": 0.81, "grad_norm": 0.2657906575491931, "learning_rate": 1.7695921645408832e-06, "loss": 0.1227, "step": 14159 }, { "epoch": 0.81, "grad_norm": 0.33109296938461874, "learning_rate": 1.7685353416770322e-06, "loss": 0.2161, "step": 14160 }, { "epoch": 0.81, "grad_norm": 0.24742271713944344, "learning_rate": 1.7674788038684488e-06, "loss": 0.258, "step": 14161 }, { "epoch": 0.81, "grad_norm": 0.9296375061584038, "learning_rate": 1.7664225511517196e-06, "loss": 0.2892, "step": 14162 }, { "epoch": 0.81, "grad_norm": 0.427928964518346, "learning_rate": 1.7653665835634214e-06, "loss": 0.2758, "step": 14163 }, { "epoch": 0.81, "grad_norm": 0.5014102721249964, "learning_rate": 1.7643109011401272e-06, "loss": 0.3853, "step": 14164 }, { "epoch": 0.81, "grad_norm": 0.36698527266326064, "learning_rate": 1.7632555039183918e-06, "loss": 0.2581, "step": 14165 }, { "epoch": 0.81, "grad_norm": 0.40372847248005095, "learning_rate": 1.762200391934764e-06, "loss": 0.2874, "step": 14166 }, { "epoch": 0.81, "grad_norm": 0.5512841267027948, "learning_rate": 1.7611455652257802e-06, "loss": 0.3146, "step": 14167 }, { "epoch": 0.81, "grad_norm": 0.2772187100793543, "learning_rate": 1.7600910238279745e-06, "loss": 0.096, "step": 14168 }, { "epoch": 0.81, "grad_norm": 0.32362380832747006, "learning_rate": 1.7590367677778607e-06, "loss": 0.2657, "step": 14169 }, { "epoch": 0.81, "grad_norm": 0.3396696390434374, "learning_rate": 1.7579827971119501e-06, "loss": 0.2754, "step": 14170 }, { "epoch": 0.81, "grad_norm": 0.9538742821374108, "learning_rate": 1.756929111866741e-06, "loss": 0.4811, "step": 14171 }, { "epoch": 0.81, "grad_norm": 0.6335048928071565, "learning_rate": 1.7558757120787196e-06, "loss": 0.2464, "step": 14172 }, { "epoch": 0.81, "grad_norm": 0.29133111716058585, "learning_rate": 1.7548225977843703e-06, "loss": 0.2619, "step": 14173 }, { "epoch": 0.81, "grad_norm": 0.23845768488865898, "learning_rate": 1.7537697690201604e-06, "loss": 0.1823, "step": 14174 }, { "epoch": 0.81, "grad_norm": 0.5880555726294913, "learning_rate": 1.7527172258225479e-06, "loss": 0.143, "step": 14175 }, { "epoch": 0.81, "grad_norm": 0.3607590540833608, "learning_rate": 1.7516649682279807e-06, "loss": 0.2992, "step": 14176 }, { "epoch": 0.81, "grad_norm": 0.3758488924017635, "learning_rate": 1.7506129962729046e-06, "loss": 0.2974, "step": 14177 }, { "epoch": 0.81, "grad_norm": 0.6228724462522425, "learning_rate": 1.7495613099937447e-06, "loss": 0.2398, "step": 14178 }, { "epoch": 0.81, "grad_norm": 0.28615182796675526, "learning_rate": 1.748509909426922e-06, "loss": 0.2267, "step": 14179 }, { "epoch": 0.81, "grad_norm": 0.5160793879630342, "learning_rate": 1.747458794608844e-06, "loss": 0.2544, "step": 14180 }, { "epoch": 0.81, "grad_norm": 0.28378991284710486, "learning_rate": 1.7464079655759181e-06, "loss": 0.2048, "step": 14181 }, { "epoch": 0.81, "grad_norm": 0.39445167751605087, "learning_rate": 1.7453574223645265e-06, "loss": 0.3056, "step": 14182 }, { "epoch": 0.81, "grad_norm": 0.7048727007429841, "learning_rate": 1.7443071650110532e-06, "loss": 0.4061, "step": 14183 }, { "epoch": 0.81, "grad_norm": 0.511720675101496, "learning_rate": 1.743257193551865e-06, "loss": 0.2747, "step": 14184 }, { "epoch": 0.81, "grad_norm": 0.27744333813057853, "learning_rate": 1.742207508023327e-06, "loss": 0.2208, "step": 14185 }, { "epoch": 0.82, "grad_norm": 0.2413983160625698, "learning_rate": 1.741158108461788e-06, "loss": 0.1841, "step": 14186 }, { "epoch": 0.82, "grad_norm": 0.9634224363895272, "learning_rate": 1.7401089949035888e-06, "loss": 0.4651, "step": 14187 }, { "epoch": 0.82, "grad_norm": 0.3454464012521359, "learning_rate": 1.7390601673850582e-06, "loss": 0.2216, "step": 14188 }, { "epoch": 0.82, "grad_norm": 0.36039692383388244, "learning_rate": 1.7380116259425205e-06, "loss": 0.2878, "step": 14189 }, { "epoch": 0.82, "grad_norm": 1.0262897010065517, "learning_rate": 1.7369633706122845e-06, "loss": 0.457, "step": 14190 }, { "epoch": 0.82, "grad_norm": 0.32525366190097454, "learning_rate": 1.7359154014306523e-06, "loss": 0.1925, "step": 14191 }, { "epoch": 0.82, "grad_norm": 0.25124789227221145, "learning_rate": 1.7348677184339114e-06, "loss": 0.173, "step": 14192 }, { "epoch": 0.82, "grad_norm": 0.31799897298231583, "learning_rate": 1.7338203216583493e-06, "loss": 0.2934, "step": 14193 }, { "epoch": 0.82, "grad_norm": 0.3329687176740427, "learning_rate": 1.732773211140233e-06, "loss": 0.2172, "step": 14194 }, { "epoch": 0.82, "grad_norm": 0.6666999484070938, "learning_rate": 1.7317263869158252e-06, "loss": 0.3835, "step": 14195 }, { "epoch": 0.82, "grad_norm": 0.840595293551172, "learning_rate": 1.7306798490213783e-06, "loss": 0.4352, "step": 14196 }, { "epoch": 0.82, "grad_norm": 0.24835261216243176, "learning_rate": 1.729633597493129e-06, "loss": 0.2628, "step": 14197 }, { "epoch": 0.82, "grad_norm": 0.24067764811215303, "learning_rate": 1.7285876323673144e-06, "loss": 0.0807, "step": 14198 }, { "epoch": 0.82, "grad_norm": 0.6601672537032992, "learning_rate": 1.7275419536801552e-06, "loss": 0.3598, "step": 14199 }, { "epoch": 0.82, "grad_norm": 0.366001352720004, "learning_rate": 1.7264965614678631e-06, "loss": 0.2781, "step": 14200 }, { "epoch": 0.82, "grad_norm": 0.3601613755013287, "learning_rate": 1.7254514557666358e-06, "loss": 0.2612, "step": 14201 }, { "epoch": 0.82, "grad_norm": 0.847066894311364, "learning_rate": 1.7244066366126722e-06, "loss": 0.3791, "step": 14202 }, { "epoch": 0.82, "grad_norm": 0.35177083832531264, "learning_rate": 1.72336210404215e-06, "loss": 0.2582, "step": 14203 }, { "epoch": 0.82, "grad_norm": 0.23594743243529212, "learning_rate": 1.7223178580912426e-06, "loss": 0.147, "step": 14204 }, { "epoch": 0.82, "grad_norm": 0.41703845501159037, "learning_rate": 1.7212738987961086e-06, "loss": 0.3409, "step": 14205 }, { "epoch": 0.82, "grad_norm": 0.3426632852760692, "learning_rate": 1.7202302261929071e-06, "loss": 0.283, "step": 14206 }, { "epoch": 0.82, "grad_norm": 0.7113318712831286, "learning_rate": 1.7191868403177757e-06, "loss": 0.3203, "step": 14207 }, { "epoch": 0.82, "grad_norm": 0.5202922764450023, "learning_rate": 1.7181437412068491e-06, "loss": 0.3489, "step": 14208 }, { "epoch": 0.82, "grad_norm": 0.29408898192812816, "learning_rate": 1.717100928896246e-06, "loss": 0.2608, "step": 14209 }, { "epoch": 0.82, "grad_norm": 0.32304266882818133, "learning_rate": 1.7160584034220828e-06, "loss": 0.1571, "step": 14210 }, { "epoch": 0.82, "grad_norm": 0.573358791143515, "learning_rate": 1.7150161648204622e-06, "loss": 0.2802, "step": 14211 }, { "epoch": 0.82, "grad_norm": 0.30989601757233975, "learning_rate": 1.713974213127475e-06, "loss": 0.2593, "step": 14212 }, { "epoch": 0.82, "grad_norm": 0.36938090057695033, "learning_rate": 1.7129325483792048e-06, "loss": 0.3223, "step": 14213 }, { "epoch": 0.82, "grad_norm": 1.4585879963319681, "learning_rate": 1.7118911706117213e-06, "loss": 0.326, "step": 14214 }, { "epoch": 0.82, "grad_norm": 0.32485245497389287, "learning_rate": 1.710850079861095e-06, "loss": 0.2697, "step": 14215 }, { "epoch": 0.82, "grad_norm": 0.44404969059636795, "learning_rate": 1.7098092761633722e-06, "loss": 0.247, "step": 14216 }, { "epoch": 0.82, "grad_norm": 0.32738510941378074, "learning_rate": 1.7087687595545943e-06, "loss": 0.2433, "step": 14217 }, { "epoch": 0.82, "grad_norm": 0.34031209603664575, "learning_rate": 1.7077285300708002e-06, "loss": 0.2835, "step": 14218 }, { "epoch": 0.82, "grad_norm": 0.8347750409587513, "learning_rate": 1.706688587748011e-06, "loss": 0.5498, "step": 14219 }, { "epoch": 0.82, "grad_norm": 0.2600218973847508, "learning_rate": 1.7056489326222392e-06, "loss": 0.2242, "step": 14220 }, { "epoch": 0.82, "grad_norm": 0.2922422219720576, "learning_rate": 1.7046095647294859e-06, "loss": 0.1806, "step": 14221 }, { "epoch": 0.82, "grad_norm": 0.9252160238532011, "learning_rate": 1.70357048410575e-06, "loss": 0.3704, "step": 14222 }, { "epoch": 0.82, "grad_norm": 0.46332376444231493, "learning_rate": 1.7025316907870105e-06, "loss": 0.2813, "step": 14223 }, { "epoch": 0.82, "grad_norm": 0.28819936990494277, "learning_rate": 1.7014931848092409e-06, "loss": 0.1896, "step": 14224 }, { "epoch": 0.82, "grad_norm": 0.32904385789124907, "learning_rate": 1.700454966208407e-06, "loss": 0.313, "step": 14225 }, { "epoch": 0.82, "grad_norm": 0.3804473251777745, "learning_rate": 1.6994170350204576e-06, "loss": 0.2184, "step": 14226 }, { "epoch": 0.82, "grad_norm": 0.31666399354697766, "learning_rate": 1.6983793912813418e-06, "loss": 0.1884, "step": 14227 }, { "epoch": 0.82, "grad_norm": 0.35107911647497736, "learning_rate": 1.6973420350269909e-06, "loss": 0.2859, "step": 14228 }, { "epoch": 0.82, "grad_norm": 0.689539750224186, "learning_rate": 1.6963049662933273e-06, "loss": 0.3485, "step": 14229 }, { "epoch": 0.82, "grad_norm": 0.2906909140943072, "learning_rate": 1.6952681851162644e-06, "loss": 0.1952, "step": 14230 }, { "epoch": 0.82, "grad_norm": 1.1242614537154207, "learning_rate": 1.6942316915317091e-06, "loss": 0.7466, "step": 14231 }, { "epoch": 0.82, "grad_norm": 0.23330529085667315, "learning_rate": 1.6931954855755527e-06, "loss": 0.2129, "step": 14232 }, { "epoch": 0.82, "grad_norm": 0.3182776146060527, "learning_rate": 1.6921595672836811e-06, "loss": 0.235, "step": 14233 }, { "epoch": 0.82, "grad_norm": 1.1253697865393655, "learning_rate": 1.6911239366919618e-06, "loss": 0.2787, "step": 14234 }, { "epoch": 0.82, "grad_norm": 0.6584813922771027, "learning_rate": 1.6900885938362677e-06, "loss": 0.4179, "step": 14235 }, { "epoch": 0.82, "grad_norm": 0.31570298371256733, "learning_rate": 1.6890535387524465e-06, "loss": 0.2391, "step": 14236 }, { "epoch": 0.82, "grad_norm": 0.3460198335510046, "learning_rate": 1.6880187714763453e-06, "loss": 0.2658, "step": 14237 }, { "epoch": 0.82, "grad_norm": 0.3434216073340945, "learning_rate": 1.6869842920437961e-06, "loss": 0.1589, "step": 14238 }, { "epoch": 0.82, "grad_norm": 0.37042829811528694, "learning_rate": 1.6859501004906208e-06, "loss": 0.2287, "step": 14239 }, { "epoch": 0.82, "grad_norm": 0.3684246063480538, "learning_rate": 1.6849161968526384e-06, "loss": 0.258, "step": 14240 }, { "epoch": 0.82, "grad_norm": 0.49687821321936904, "learning_rate": 1.6838825811656512e-06, "loss": 0.3434, "step": 14241 }, { "epoch": 0.82, "grad_norm": 0.4044736505228071, "learning_rate": 1.6828492534654516e-06, "loss": 0.2857, "step": 14242 }, { "epoch": 0.82, "grad_norm": 0.2720523490521247, "learning_rate": 1.6818162137878224e-06, "loss": 0.1785, "step": 14243 }, { "epoch": 0.82, "grad_norm": 0.2924842153658913, "learning_rate": 1.6807834621685426e-06, "loss": 0.2572, "step": 14244 }, { "epoch": 0.82, "grad_norm": 0.42620199736044223, "learning_rate": 1.6797509986433746e-06, "loss": 0.2771, "step": 14245 }, { "epoch": 0.82, "grad_norm": 0.4717554256016822, "learning_rate": 1.678718823248071e-06, "loss": 0.3129, "step": 14246 }, { "epoch": 0.82, "grad_norm": 0.8840619730677773, "learning_rate": 1.6776869360183746e-06, "loss": 0.2366, "step": 14247 }, { "epoch": 0.82, "grad_norm": 0.3197993001659107, "learning_rate": 1.6766553369900241e-06, "loss": 0.2557, "step": 14248 }, { "epoch": 0.82, "grad_norm": 0.32803477233922695, "learning_rate": 1.6756240261987434e-06, "loss": 0.3214, "step": 14249 }, { "epoch": 0.82, "grad_norm": 0.3035876936412637, "learning_rate": 1.6745930036802428e-06, "loss": 0.1272, "step": 14250 }, { "epoch": 0.82, "grad_norm": 0.32877403922823634, "learning_rate": 1.6735622694702259e-06, "loss": 0.2405, "step": 14251 }, { "epoch": 0.82, "grad_norm": 1.3599618100090622, "learning_rate": 1.6725318236043908e-06, "loss": 0.3988, "step": 14252 }, { "epoch": 0.82, "grad_norm": 0.3396785730073297, "learning_rate": 1.6715016661184225e-06, "loss": 0.2526, "step": 14253 }, { "epoch": 0.82, "grad_norm": 0.3797148679905969, "learning_rate": 1.6704717970479923e-06, "loss": 0.2691, "step": 14254 }, { "epoch": 0.82, "grad_norm": 0.5481283074189417, "learning_rate": 1.6694422164287627e-06, "loss": 0.3733, "step": 14255 }, { "epoch": 0.82, "grad_norm": 0.26139269536422804, "learning_rate": 1.6684129242963943e-06, "loss": 0.2224, "step": 14256 }, { "epoch": 0.82, "grad_norm": 0.40947654935266997, "learning_rate": 1.6673839206865283e-06, "loss": 0.2732, "step": 14257 }, { "epoch": 0.82, "grad_norm": 0.3235516674745909, "learning_rate": 1.6663552056347975e-06, "loss": 0.2544, "step": 14258 }, { "epoch": 0.82, "grad_norm": 1.1695979780325698, "learning_rate": 1.6653267791768258e-06, "loss": 0.5732, "step": 14259 }, { "epoch": 0.82, "grad_norm": 0.2849039189767827, "learning_rate": 1.6642986413482321e-06, "loss": 0.1958, "step": 14260 }, { "epoch": 0.82, "grad_norm": 0.38973166723076286, "learning_rate": 1.663270792184618e-06, "loss": 0.2984, "step": 14261 }, { "epoch": 0.82, "grad_norm": 0.8836587032487782, "learning_rate": 1.6622432317215776e-06, "loss": 0.4592, "step": 14262 }, { "epoch": 0.82, "grad_norm": 0.40255447112780424, "learning_rate": 1.6612159599946954e-06, "loss": 0.2373, "step": 14263 }, { "epoch": 0.82, "grad_norm": 0.20061146179604905, "learning_rate": 1.660188977039544e-06, "loss": 0.2094, "step": 14264 }, { "epoch": 0.82, "grad_norm": 1.583325395018498, "learning_rate": 1.659162282891692e-06, "loss": 0.7721, "step": 14265 }, { "epoch": 0.82, "grad_norm": 0.32561047876624966, "learning_rate": 1.6581358775866907e-06, "loss": 0.1869, "step": 14266 }, { "epoch": 0.82, "grad_norm": 0.5064695995116827, "learning_rate": 1.6571097611600862e-06, "loss": 0.3667, "step": 14267 }, { "epoch": 0.82, "grad_norm": 0.3729173917284346, "learning_rate": 1.6560839336474088e-06, "loss": 0.3192, "step": 14268 }, { "epoch": 0.82, "grad_norm": 0.3673667832560838, "learning_rate": 1.6550583950841891e-06, "loss": 0.2171, "step": 14269 }, { "epoch": 0.82, "grad_norm": 0.2352059959306334, "learning_rate": 1.6540331455059377e-06, "loss": 0.1389, "step": 14270 }, { "epoch": 0.82, "grad_norm": 1.2082826393706474, "learning_rate": 1.6530081849481595e-06, "loss": 0.5537, "step": 14271 }, { "epoch": 0.82, "grad_norm": 0.27082498922394227, "learning_rate": 1.6519835134463468e-06, "loss": 0.2635, "step": 14272 }, { "epoch": 0.82, "grad_norm": 0.4933710914894767, "learning_rate": 1.6509591310359886e-06, "loss": 0.2803, "step": 14273 }, { "epoch": 0.82, "grad_norm": 0.6557255957824724, "learning_rate": 1.649935037752557e-06, "loss": 0.3822, "step": 14274 }, { "epoch": 0.82, "grad_norm": 0.8281936757764693, "learning_rate": 1.648911233631516e-06, "loss": 0.3157, "step": 14275 }, { "epoch": 0.82, "grad_norm": 0.20637211972769265, "learning_rate": 1.6478877187083187e-06, "loss": 0.1921, "step": 14276 }, { "epoch": 0.82, "grad_norm": 0.4550300837789682, "learning_rate": 1.6468644930184097e-06, "loss": 0.2334, "step": 14277 }, { "epoch": 0.82, "grad_norm": 0.5805410905455298, "learning_rate": 1.6458415565972253e-06, "loss": 0.3295, "step": 14278 }, { "epoch": 0.82, "grad_norm": 0.43031247602796713, "learning_rate": 1.6448189094801891e-06, "loss": 0.2736, "step": 14279 }, { "epoch": 0.82, "grad_norm": 0.36717439759245385, "learning_rate": 1.6437965517027143e-06, "loss": 0.2979, "step": 14280 }, { "epoch": 0.82, "grad_norm": 0.6289420966546745, "learning_rate": 1.6427744833002036e-06, "loss": 0.3179, "step": 14281 }, { "epoch": 0.82, "grad_norm": 0.24428017486398262, "learning_rate": 1.6417527043080583e-06, "loss": 0.1558, "step": 14282 }, { "epoch": 0.82, "grad_norm": 1.1756826326190717, "learning_rate": 1.6407312147616539e-06, "loss": 0.4512, "step": 14283 }, { "epoch": 0.82, "grad_norm": 0.3007377368607552, "learning_rate": 1.6397100146963662e-06, "loss": 0.2467, "step": 14284 }, { "epoch": 0.82, "grad_norm": 0.38786311492343356, "learning_rate": 1.6386891041475639e-06, "loss": 0.3136, "step": 14285 }, { "epoch": 0.82, "grad_norm": 0.719273995861604, "learning_rate": 1.6376684831505984e-06, "loss": 0.2903, "step": 14286 }, { "epoch": 0.82, "grad_norm": 0.616064577176051, "learning_rate": 1.636648151740814e-06, "loss": 0.2994, "step": 14287 }, { "epoch": 0.82, "grad_norm": 0.29643428083943496, "learning_rate": 1.6356281099535432e-06, "loss": 0.2724, "step": 14288 }, { "epoch": 0.82, "grad_norm": 0.27973403877242464, "learning_rate": 1.63460835782411e-06, "loss": 0.1537, "step": 14289 }, { "epoch": 0.82, "grad_norm": 0.3902255474494219, "learning_rate": 1.633588895387832e-06, "loss": 0.2725, "step": 14290 }, { "epoch": 0.82, "grad_norm": 0.6206832393439994, "learning_rate": 1.6325697226800109e-06, "loss": 0.3138, "step": 14291 }, { "epoch": 0.82, "grad_norm": 0.32000206528220054, "learning_rate": 1.6315508397359391e-06, "loss": 0.2513, "step": 14292 }, { "epoch": 0.82, "grad_norm": 0.9261649174707342, "learning_rate": 1.6305322465909012e-06, "loss": 0.3988, "step": 14293 }, { "epoch": 0.82, "grad_norm": 0.3895447592390385, "learning_rate": 1.6295139432801732e-06, "loss": 0.2633, "step": 14294 }, { "epoch": 0.82, "grad_norm": 0.20039587976898893, "learning_rate": 1.628495929839018e-06, "loss": 0.1685, "step": 14295 }, { "epoch": 0.82, "grad_norm": 0.678502925193134, "learning_rate": 1.6274782063026883e-06, "loss": 0.3645, "step": 14296 }, { "epoch": 0.82, "grad_norm": 0.3716671212985712, "learning_rate": 1.6264607727064253e-06, "loss": 0.3074, "step": 14297 }, { "epoch": 0.82, "grad_norm": 0.8235463841034898, "learning_rate": 1.6254436290854691e-06, "loss": 0.4441, "step": 14298 }, { "epoch": 0.82, "grad_norm": 0.6843383792249415, "learning_rate": 1.62442677547504e-06, "loss": 0.1194, "step": 14299 }, { "epoch": 0.82, "grad_norm": 0.25397188356352135, "learning_rate": 1.62341021191035e-06, "loss": 0.267, "step": 14300 }, { "epoch": 0.82, "grad_norm": 0.32104682332962464, "learning_rate": 1.6223939384266064e-06, "loss": 0.1979, "step": 14301 }, { "epoch": 0.82, "grad_norm": 0.6705088211808614, "learning_rate": 1.6213779550589959e-06, "loss": 0.2417, "step": 14302 }, { "epoch": 0.82, "grad_norm": 0.41855789850269925, "learning_rate": 1.6203622618427105e-06, "loss": 0.2967, "step": 14303 }, { "epoch": 0.82, "grad_norm": 0.3418585073461008, "learning_rate": 1.6193468588129192e-06, "loss": 0.311, "step": 14304 }, { "epoch": 0.82, "grad_norm": 0.8013089892375115, "learning_rate": 1.6183317460047853e-06, "loss": 0.1193, "step": 14305 }, { "epoch": 0.82, "grad_norm": 0.4221278782395689, "learning_rate": 1.6173169234534602e-06, "loss": 0.2809, "step": 14306 }, { "epoch": 0.82, "grad_norm": 0.286743200585038, "learning_rate": 1.6163023911940923e-06, "loss": 0.2143, "step": 14307 }, { "epoch": 0.82, "grad_norm": 0.4101065643986548, "learning_rate": 1.6152881492618123e-06, "loss": 0.2727, "step": 14308 }, { "epoch": 0.82, "grad_norm": 0.3389018916742762, "learning_rate": 1.614274197691743e-06, "loss": 0.2422, "step": 14309 }, { "epoch": 0.82, "grad_norm": 1.02805515707719, "learning_rate": 1.6132605365189945e-06, "loss": 0.6135, "step": 14310 }, { "epoch": 0.82, "grad_norm": 0.46498023154122176, "learning_rate": 1.6122471657786764e-06, "loss": 0.336, "step": 14311 }, { "epoch": 0.82, "grad_norm": 0.24162416824879168, "learning_rate": 1.6112340855058784e-06, "loss": 0.199, "step": 14312 }, { "epoch": 0.82, "grad_norm": 0.4745324261022142, "learning_rate": 1.6102212957356821e-06, "loss": 0.2715, "step": 14313 }, { "epoch": 0.82, "grad_norm": 0.8109722485644276, "learning_rate": 1.6092087965031623e-06, "loss": 0.4024, "step": 14314 }, { "epoch": 0.82, "grad_norm": 0.29032677627833714, "learning_rate": 1.6081965878433781e-06, "loss": 0.1993, "step": 14315 }, { "epoch": 0.82, "grad_norm": 0.2764951495219204, "learning_rate": 1.6071846697913907e-06, "loss": 0.2629, "step": 14316 }, { "epoch": 0.82, "grad_norm": 1.0850913168148355, "learning_rate": 1.6061730423822353e-06, "loss": 0.4369, "step": 14317 }, { "epoch": 0.82, "grad_norm": 0.3368554761044434, "learning_rate": 1.6051617056509427e-06, "loss": 0.1969, "step": 14318 }, { "epoch": 0.82, "grad_norm": 0.6791350794668766, "learning_rate": 1.604150659632543e-06, "loss": 0.3668, "step": 14319 }, { "epoch": 0.82, "grad_norm": 0.2824110904939739, "learning_rate": 1.6031399043620444e-06, "loss": 0.2334, "step": 14320 }, { "epoch": 0.82, "grad_norm": 0.3433389634591825, "learning_rate": 1.6021294398744491e-06, "loss": 0.2899, "step": 14321 }, { "epoch": 0.82, "grad_norm": 0.27915034093289076, "learning_rate": 1.6011192662047493e-06, "loss": 0.1297, "step": 14322 }, { "epoch": 0.82, "grad_norm": 0.36788278715290085, "learning_rate": 1.6001093833879288e-06, "loss": 0.3075, "step": 14323 }, { "epoch": 0.82, "grad_norm": 0.3496665619411987, "learning_rate": 1.5990997914589602e-06, "loss": 0.2855, "step": 14324 }, { "epoch": 0.82, "grad_norm": 0.5939639419698534, "learning_rate": 1.598090490452805e-06, "loss": 0.2828, "step": 14325 }, { "epoch": 0.82, "grad_norm": 0.7697198306103441, "learning_rate": 1.5970814804044143e-06, "loss": 0.4741, "step": 14326 }, { "epoch": 0.82, "grad_norm": 0.3498249717359082, "learning_rate": 1.5960727613487282e-06, "loss": 0.2813, "step": 14327 }, { "epoch": 0.82, "grad_norm": 0.2114390332677578, "learning_rate": 1.5950643333206827e-06, "loss": 0.1838, "step": 14328 }, { "epoch": 0.82, "grad_norm": 1.312899098888681, "learning_rate": 1.5940561963551982e-06, "loss": 0.3905, "step": 14329 }, { "epoch": 0.82, "grad_norm": 0.39726885707439447, "learning_rate": 1.5930483504871863e-06, "loss": 0.2603, "step": 14330 }, { "epoch": 0.82, "grad_norm": 0.3441962419375129, "learning_rate": 1.5920407957515472e-06, "loss": 0.2621, "step": 14331 }, { "epoch": 0.82, "grad_norm": 1.244026243921077, "learning_rate": 1.5910335321831749e-06, "loss": 0.7247, "step": 14332 }, { "epoch": 0.82, "grad_norm": 0.32768146343407234, "learning_rate": 1.5900265598169507e-06, "loss": 0.2683, "step": 14333 }, { "epoch": 0.82, "grad_norm": 0.2208843892022444, "learning_rate": 1.5890198786877442e-06, "loss": 0.1797, "step": 14334 }, { "epoch": 0.82, "grad_norm": 0.3084248363985326, "learning_rate": 1.5880134888304155e-06, "loss": 0.2296, "step": 14335 }, { "epoch": 0.82, "grad_norm": 0.3367945172727081, "learning_rate": 1.58700739027982e-06, "loss": 0.2464, "step": 14336 }, { "epoch": 0.82, "grad_norm": 0.6608801653776333, "learning_rate": 1.5860015830707976e-06, "loss": 0.3797, "step": 14337 }, { "epoch": 0.82, "grad_norm": 1.5594061912195747, "learning_rate": 1.5849960672381781e-06, "loss": 0.3649, "step": 14338 }, { "epoch": 0.82, "grad_norm": 0.26050111086502553, "learning_rate": 1.5839908428167806e-06, "loss": 0.2343, "step": 14339 }, { "epoch": 0.82, "grad_norm": 0.537863403344283, "learning_rate": 1.5829859098414202e-06, "loss": 0.3252, "step": 14340 }, { "epoch": 0.82, "grad_norm": 0.3168205101844687, "learning_rate": 1.5819812683468971e-06, "loss": 0.1417, "step": 14341 }, { "epoch": 0.82, "grad_norm": 0.36311874865663823, "learning_rate": 1.5809769183680001e-06, "loss": 0.2854, "step": 14342 }, { "epoch": 0.82, "grad_norm": 0.44223521127969234, "learning_rate": 1.5799728599395093e-06, "loss": 0.3553, "step": 14343 }, { "epoch": 0.82, "grad_norm": 0.40043078151688744, "learning_rate": 1.5789690930961955e-06, "loss": 0.2374, "step": 14344 }, { "epoch": 0.82, "grad_norm": 0.3187473275956427, "learning_rate": 1.577965617872821e-06, "loss": 0.2568, "step": 14345 }, { "epoch": 0.82, "grad_norm": 0.40970369506063714, "learning_rate": 1.5769624343041356e-06, "loss": 0.2775, "step": 14346 }, { "epoch": 0.82, "grad_norm": 0.27328936575328555, "learning_rate": 1.5759595424248798e-06, "loss": 0.2324, "step": 14347 }, { "epoch": 0.82, "grad_norm": 0.3200545522278074, "learning_rate": 1.5749569422697786e-06, "loss": 0.2029, "step": 14348 }, { "epoch": 0.82, "grad_norm": 0.7813268400056336, "learning_rate": 1.57395463387356e-06, "loss": 0.4164, "step": 14349 }, { "epoch": 0.82, "grad_norm": 1.362908227303668, "learning_rate": 1.572952617270932e-06, "loss": 0.7034, "step": 14350 }, { "epoch": 0.82, "grad_norm": 0.2630012527604643, "learning_rate": 1.5719508924965876e-06, "loss": 0.2133, "step": 14351 }, { "epoch": 0.82, "grad_norm": 0.5084932701306548, "learning_rate": 1.5709494595852238e-06, "loss": 0.3867, "step": 14352 }, { "epoch": 0.82, "grad_norm": 0.4391672927929206, "learning_rate": 1.569948318571517e-06, "loss": 0.2645, "step": 14353 }, { "epoch": 0.82, "grad_norm": 0.24787677119492113, "learning_rate": 1.5689474694901386e-06, "loss": 0.1697, "step": 14354 }, { "epoch": 0.82, "grad_norm": 0.5045982631903566, "learning_rate": 1.5679469123757463e-06, "loss": 0.331, "step": 14355 }, { "epoch": 0.82, "grad_norm": 0.5158521633631827, "learning_rate": 1.566946647262988e-06, "loss": 0.3973, "step": 14356 }, { "epoch": 0.82, "grad_norm": 0.2975250660129979, "learning_rate": 1.5659466741865059e-06, "loss": 0.2084, "step": 14357 }, { "epoch": 0.82, "grad_norm": 0.882686185713709, "learning_rate": 1.5649469931809291e-06, "loss": 0.4715, "step": 14358 }, { "epoch": 0.82, "grad_norm": 0.4060897628478708, "learning_rate": 1.5639476042808743e-06, "loss": 0.3158, "step": 14359 }, { "epoch": 0.83, "grad_norm": 0.37822526003360196, "learning_rate": 1.5629485075209494e-06, "loss": 0.2885, "step": 14360 }, { "epoch": 0.83, "grad_norm": 0.26592851936967965, "learning_rate": 1.5619497029357566e-06, "loss": 0.1386, "step": 14361 }, { "epoch": 0.83, "grad_norm": 0.48297293822615317, "learning_rate": 1.5609511905598828e-06, "loss": 0.3686, "step": 14362 }, { "epoch": 0.83, "grad_norm": 0.3980489462284402, "learning_rate": 1.559952970427907e-06, "loss": 0.2771, "step": 14363 }, { "epoch": 0.83, "grad_norm": 0.3248249460067187, "learning_rate": 1.5589550425743938e-06, "loss": 0.2326, "step": 14364 }, { "epoch": 0.83, "grad_norm": 0.7106496612344982, "learning_rate": 1.5579574070339077e-06, "loss": 0.397, "step": 14365 }, { "epoch": 0.83, "grad_norm": 0.25155760454843434, "learning_rate": 1.5569600638409931e-06, "loss": 0.179, "step": 14366 }, { "epoch": 0.83, "grad_norm": 0.23556004098954053, "learning_rate": 1.5559630130301885e-06, "loss": 0.1974, "step": 14367 }, { "epoch": 0.83, "grad_norm": 1.3610236878509954, "learning_rate": 1.5549662546360223e-06, "loss": 0.7535, "step": 14368 }, { "epoch": 0.83, "grad_norm": 0.4102886902865449, "learning_rate": 1.5539697886930082e-06, "loss": 0.3094, "step": 14369 }, { "epoch": 0.83, "grad_norm": 0.4806336247620259, "learning_rate": 1.5529736152356601e-06, "loss": 0.2572, "step": 14370 }, { "epoch": 0.83, "grad_norm": 0.3447873590811517, "learning_rate": 1.551977734298472e-06, "loss": 0.3017, "step": 14371 }, { "epoch": 0.83, "grad_norm": 0.3259821776760638, "learning_rate": 1.5509821459159312e-06, "loss": 0.2352, "step": 14372 }, { "epoch": 0.83, "grad_norm": 0.35766169837779577, "learning_rate": 1.5499868501225135e-06, "loss": 0.2043, "step": 14373 }, { "epoch": 0.83, "grad_norm": 0.45759062221042746, "learning_rate": 1.548991846952691e-06, "loss": 0.2734, "step": 14374 }, { "epoch": 0.83, "grad_norm": 0.26891305999108633, "learning_rate": 1.5479971364409163e-06, "loss": 0.2624, "step": 14375 }, { "epoch": 0.83, "grad_norm": 0.6950747504087013, "learning_rate": 1.5470027186216386e-06, "loss": 0.3777, "step": 14376 }, { "epoch": 0.83, "grad_norm": 0.8274621163981223, "learning_rate": 1.5460085935292902e-06, "loss": 0.2935, "step": 14377 }, { "epoch": 0.83, "grad_norm": 0.5470660206047755, "learning_rate": 1.5450147611983024e-06, "loss": 0.2792, "step": 14378 }, { "epoch": 0.83, "grad_norm": 0.22596268465440378, "learning_rate": 1.5440212216630902e-06, "loss": 0.2247, "step": 14379 }, { "epoch": 0.83, "grad_norm": 0.4563189462879652, "learning_rate": 1.54302797495806e-06, "loss": 0.2061, "step": 14380 }, { "epoch": 0.83, "grad_norm": 0.408530641518608, "learning_rate": 1.5420350211176072e-06, "loss": 0.2611, "step": 14381 }, { "epoch": 0.83, "grad_norm": 0.4614799902770901, "learning_rate": 1.541042360176115e-06, "loss": 0.3243, "step": 14382 }, { "epoch": 0.83, "grad_norm": 0.3767321614513136, "learning_rate": 1.5400499921679647e-06, "loss": 0.2978, "step": 14383 }, { "epoch": 0.83, "grad_norm": 0.32608810726150617, "learning_rate": 1.5390579171275222e-06, "loss": 0.1694, "step": 14384 }, { "epoch": 0.83, "grad_norm": 0.2786817942246057, "learning_rate": 1.5380661350891346e-06, "loss": 0.1812, "step": 14385 }, { "epoch": 0.83, "grad_norm": 1.2423112132808052, "learning_rate": 1.5370746460871555e-06, "loss": 0.7655, "step": 14386 }, { "epoch": 0.83, "grad_norm": 0.24830800114573304, "learning_rate": 1.5360834501559185e-06, "loss": 0.2066, "step": 14387 }, { "epoch": 0.83, "grad_norm": 0.6941033661044869, "learning_rate": 1.5350925473297462e-06, "loss": 0.3225, "step": 14388 }, { "epoch": 0.83, "grad_norm": 1.3297493516790682, "learning_rate": 1.5341019376429533e-06, "loss": 0.4718, "step": 14389 }, { "epoch": 0.83, "grad_norm": 0.33027397591322355, "learning_rate": 1.5331116211298492e-06, "loss": 0.1663, "step": 14390 }, { "epoch": 0.83, "grad_norm": 0.3162742331726112, "learning_rate": 1.532121597824725e-06, "loss": 0.2882, "step": 14391 }, { "epoch": 0.83, "grad_norm": 0.3078259870411867, "learning_rate": 1.5311318677618658e-06, "loss": 0.1998, "step": 14392 }, { "epoch": 0.83, "grad_norm": 0.3203383686794931, "learning_rate": 1.5301424309755464e-06, "loss": 0.2119, "step": 14393 }, { "epoch": 0.83, "grad_norm": 0.9170395837276937, "learning_rate": 1.529153287500027e-06, "loss": 0.5176, "step": 14394 }, { "epoch": 0.83, "grad_norm": 0.3675141498763632, "learning_rate": 1.5281644373695682e-06, "loss": 0.2876, "step": 14395 }, { "epoch": 0.83, "grad_norm": 0.6819063271267362, "learning_rate": 1.52717588061841e-06, "loss": 0.26, "step": 14396 }, { "epoch": 0.83, "grad_norm": 0.18812990097663482, "learning_rate": 1.5261876172807865e-06, "loss": 0.1458, "step": 14397 }, { "epoch": 0.83, "grad_norm": 0.4405755811998149, "learning_rate": 1.5251996473909202e-06, "loss": 0.3262, "step": 14398 }, { "epoch": 0.83, "grad_norm": 0.4274068515417533, "learning_rate": 1.5242119709830272e-06, "loss": 0.3208, "step": 14399 }, { "epoch": 0.83, "grad_norm": 0.30076883740926846, "learning_rate": 1.5232245880913088e-06, "loss": 0.2194, "step": 14400 }, { "epoch": 0.83, "grad_norm": 1.2838196412791696, "learning_rate": 1.5222374987499588e-06, "loss": 0.536, "step": 14401 }, { "epoch": 0.83, "grad_norm": 0.5728611563163679, "learning_rate": 1.5212507029931578e-06, "loss": 0.2791, "step": 14402 }, { "epoch": 0.83, "grad_norm": 0.22643317321614864, "learning_rate": 1.5202642008550827e-06, "loss": 0.2097, "step": 14403 }, { "epoch": 0.83, "grad_norm": 0.781228803059026, "learning_rate": 1.519277992369893e-06, "loss": 0.3939, "step": 14404 }, { "epoch": 0.83, "grad_norm": 0.5420511061950547, "learning_rate": 1.5182920775717425e-06, "loss": 0.3284, "step": 14405 }, { "epoch": 0.83, "grad_norm": 0.23424207699255722, "learning_rate": 1.5173064564947714e-06, "loss": 0.2109, "step": 14406 }, { "epoch": 0.83, "grad_norm": 0.4855977165429987, "learning_rate": 1.5163211291731116e-06, "loss": 0.3538, "step": 14407 }, { "epoch": 0.83, "grad_norm": 0.7778771909877253, "learning_rate": 1.5153360956408891e-06, "loss": 0.3002, "step": 14408 }, { "epoch": 0.83, "grad_norm": 0.40367431465738035, "learning_rate": 1.514351355932212e-06, "loss": 0.2746, "step": 14409 }, { "epoch": 0.83, "grad_norm": 0.4594177145672264, "learning_rate": 1.513366910081182e-06, "loss": 0.2778, "step": 14410 }, { "epoch": 0.83, "grad_norm": 0.3612788772247959, "learning_rate": 1.5123827581218898e-06, "loss": 0.3111, "step": 14411 }, { "epoch": 0.83, "grad_norm": 0.3966527721965122, "learning_rate": 1.5113989000884189e-06, "loss": 0.303, "step": 14412 }, { "epoch": 0.83, "grad_norm": 0.17782252117943567, "learning_rate": 1.51041533601484e-06, "loss": 0.0841, "step": 14413 }, { "epoch": 0.83, "grad_norm": 0.3769117084741131, "learning_rate": 1.5094320659352123e-06, "loss": 0.2748, "step": 14414 }, { "epoch": 0.83, "grad_norm": 0.3041994326675657, "learning_rate": 1.5084490898835857e-06, "loss": 0.2886, "step": 14415 }, { "epoch": 0.83, "grad_norm": 0.6275402594647362, "learning_rate": 1.5074664078940039e-06, "loss": 0.2981, "step": 14416 }, { "epoch": 0.83, "grad_norm": 0.7745986505170465, "learning_rate": 1.5064840200004972e-06, "loss": 0.4663, "step": 14417 }, { "epoch": 0.83, "grad_norm": 0.26455906511621774, "learning_rate": 1.5055019262370807e-06, "loss": 0.2277, "step": 14418 }, { "epoch": 0.83, "grad_norm": 0.2744273616921841, "learning_rate": 1.5045201266377662e-06, "loss": 0.191, "step": 14419 }, { "epoch": 0.83, "grad_norm": 0.8188194127786869, "learning_rate": 1.5035386212365554e-06, "loss": 0.4115, "step": 14420 }, { "epoch": 0.83, "grad_norm": 0.3522078195909208, "learning_rate": 1.502557410067438e-06, "loss": 0.2702, "step": 14421 }, { "epoch": 0.83, "grad_norm": 0.6756378638949322, "learning_rate": 1.5015764931643916e-06, "loss": 0.4208, "step": 14422 }, { "epoch": 0.83, "grad_norm": 0.25982068134200115, "learning_rate": 1.5005958705613833e-06, "loss": 0.2264, "step": 14423 }, { "epoch": 0.83, "grad_norm": 0.3699918270593306, "learning_rate": 1.4996155422923764e-06, "loss": 0.3032, "step": 14424 }, { "epoch": 0.83, "grad_norm": 0.31773506752213887, "learning_rate": 1.4986355083913184e-06, "loss": 0.1678, "step": 14425 }, { "epoch": 0.83, "grad_norm": 0.28175897232877206, "learning_rate": 1.4976557688921478e-06, "loss": 0.2038, "step": 14426 }, { "epoch": 0.83, "grad_norm": 0.3882773152820447, "learning_rate": 1.4966763238287885e-06, "loss": 0.2799, "step": 14427 }, { "epoch": 0.83, "grad_norm": 0.6931088691932338, "learning_rate": 1.4956971732351655e-06, "loss": 0.3976, "step": 14428 }, { "epoch": 0.83, "grad_norm": 0.3691483478603107, "learning_rate": 1.4947183171451841e-06, "loss": 0.2152, "step": 14429 }, { "epoch": 0.83, "grad_norm": 0.38071812733088667, "learning_rate": 1.4937397555927413e-06, "loss": 0.2909, "step": 14430 }, { "epoch": 0.83, "grad_norm": 0.2094977356719134, "learning_rate": 1.4927614886117248e-06, "loss": 0.1925, "step": 14431 }, { "epoch": 0.83, "grad_norm": 0.5582480502897131, "learning_rate": 1.4917835162360107e-06, "loss": 0.2479, "step": 14432 }, { "epoch": 0.83, "grad_norm": 0.3547248993099505, "learning_rate": 1.4908058384994684e-06, "loss": 0.2633, "step": 14433 }, { "epoch": 0.83, "grad_norm": 0.33898191546731055, "learning_rate": 1.4898284554359555e-06, "loss": 0.2929, "step": 14434 }, { "epoch": 0.83, "grad_norm": 1.20134478839783, "learning_rate": 1.4888513670793159e-06, "loss": 0.7416, "step": 14435 }, { "epoch": 0.83, "grad_norm": 0.31287229804617556, "learning_rate": 1.4878745734633859e-06, "loss": 0.1874, "step": 14436 }, { "epoch": 0.83, "grad_norm": 0.28716920169026183, "learning_rate": 1.4868980746219953e-06, "loss": 0.1744, "step": 14437 }, { "epoch": 0.83, "grad_norm": 0.3602326317426348, "learning_rate": 1.485921870588959e-06, "loss": 0.3254, "step": 14438 }, { "epoch": 0.83, "grad_norm": 0.34644487846977096, "learning_rate": 1.4849459613980821e-06, "loss": 0.2226, "step": 14439 }, { "epoch": 0.83, "grad_norm": 1.7099936095796247, "learning_rate": 1.4839703470831568e-06, "loss": 0.6051, "step": 14440 }, { "epoch": 0.83, "grad_norm": 1.1426420559205335, "learning_rate": 1.4829950276779759e-06, "loss": 0.6309, "step": 14441 }, { "epoch": 0.83, "grad_norm": 0.21521110544361674, "learning_rate": 1.4820200032163102e-06, "loss": 0.2127, "step": 14442 }, { "epoch": 0.83, "grad_norm": 0.5594517974144692, "learning_rate": 1.481045273731926e-06, "loss": 0.2786, "step": 14443 }, { "epoch": 0.83, "grad_norm": 0.6070929376142952, "learning_rate": 1.480070839258575e-06, "loss": 0.3338, "step": 14444 }, { "epoch": 0.83, "grad_norm": 0.2462055074718749, "learning_rate": 1.479096699830007e-06, "loss": 0.1646, "step": 14445 }, { "epoch": 0.83, "grad_norm": 0.3872583829909997, "learning_rate": 1.4781228554799544e-06, "loss": 0.3043, "step": 14446 }, { "epoch": 0.83, "grad_norm": 0.7217473632643663, "learning_rate": 1.4771493062421393e-06, "loss": 0.5007, "step": 14447 }, { "epoch": 0.83, "grad_norm": 0.3543168866164041, "learning_rate": 1.4761760521502788e-06, "loss": 0.272, "step": 14448 }, { "epoch": 0.83, "grad_norm": 0.694609659735516, "learning_rate": 1.4752030932380723e-06, "loss": 0.2679, "step": 14449 }, { "epoch": 0.83, "grad_norm": 0.2668551292038593, "learning_rate": 1.4742304295392173e-06, "loss": 0.2415, "step": 14450 }, { "epoch": 0.83, "grad_norm": 0.348151555424367, "learning_rate": 1.4732580610873991e-06, "loss": 0.2819, "step": 14451 }, { "epoch": 0.83, "grad_norm": 0.4865809063782709, "learning_rate": 1.4722859879162831e-06, "loss": 0.164, "step": 14452 }, { "epoch": 0.83, "grad_norm": 1.3427781510155108, "learning_rate": 1.471314210059539e-06, "loss": 0.8253, "step": 14453 }, { "epoch": 0.83, "grad_norm": 0.2627245576209838, "learning_rate": 1.4703427275508175e-06, "loss": 0.2502, "step": 14454 }, { "epoch": 0.83, "grad_norm": 0.38749675227945757, "learning_rate": 1.4693715404237595e-06, "loss": 0.2567, "step": 14455 }, { "epoch": 0.83, "grad_norm": 0.48887376832848684, "learning_rate": 1.4684006487119996e-06, "loss": 0.2749, "step": 14456 }, { "epoch": 0.83, "grad_norm": 0.23372320232067798, "learning_rate": 1.4674300524491548e-06, "loss": 0.1902, "step": 14457 }, { "epoch": 0.83, "grad_norm": 0.3504977034974888, "learning_rate": 1.466459751668843e-06, "loss": 0.2363, "step": 14458 }, { "epoch": 0.83, "grad_norm": 1.1512347307415474, "learning_rate": 1.4654897464046624e-06, "loss": 0.7318, "step": 14459 }, { "epoch": 0.83, "grad_norm": 0.33497458339458847, "learning_rate": 1.4645200366902056e-06, "loss": 0.2622, "step": 14460 }, { "epoch": 0.83, "grad_norm": 0.6891307734834573, "learning_rate": 1.4635506225590511e-06, "loss": 0.3649, "step": 14461 }, { "epoch": 0.83, "grad_norm": 0.24648296086863, "learning_rate": 1.4625815040447733e-06, "loss": 0.1953, "step": 14462 }, { "epoch": 0.83, "grad_norm": 0.3198739485686858, "learning_rate": 1.4616126811809305e-06, "loss": 0.2548, "step": 14463 }, { "epoch": 0.83, "grad_norm": 0.4785902437377649, "learning_rate": 1.4606441540010742e-06, "loss": 0.2262, "step": 14464 }, { "epoch": 0.83, "grad_norm": 0.42064172693904334, "learning_rate": 1.4596759225387401e-06, "loss": 0.2391, "step": 14465 }, { "epoch": 0.83, "grad_norm": 0.3519446079196051, "learning_rate": 1.4587079868274644e-06, "loss": 0.2952, "step": 14466 }, { "epoch": 0.83, "grad_norm": 0.4042615656613845, "learning_rate": 1.4577403469007645e-06, "loss": 0.3237, "step": 14467 }, { "epoch": 0.83, "grad_norm": 0.35400671879270007, "learning_rate": 1.4567730027921489e-06, "loss": 0.0845, "step": 14468 }, { "epoch": 0.83, "grad_norm": 0.3576298559652004, "learning_rate": 1.4558059545351144e-06, "loss": 0.2974, "step": 14469 }, { "epoch": 0.83, "grad_norm": 0.2690274370266391, "learning_rate": 1.4548392021631541e-06, "loss": 0.264, "step": 14470 }, { "epoch": 0.83, "grad_norm": 1.260476076309206, "learning_rate": 1.4538727457097447e-06, "loss": 0.7678, "step": 14471 }, { "epoch": 0.83, "grad_norm": 0.32094729111190423, "learning_rate": 1.4529065852083557e-06, "loss": 0.2062, "step": 14472 }, { "epoch": 0.83, "grad_norm": 0.618603149339349, "learning_rate": 1.451940720692443e-06, "loss": 0.3733, "step": 14473 }, { "epoch": 0.83, "grad_norm": 0.3205669864381226, "learning_rate": 1.450975152195454e-06, "loss": 0.2918, "step": 14474 }, { "epoch": 0.83, "grad_norm": 0.3185380448055098, "learning_rate": 1.4500098797508289e-06, "loss": 0.1982, "step": 14475 }, { "epoch": 0.83, "grad_norm": 0.24713688538065756, "learning_rate": 1.4490449033919952e-06, "loss": 0.1745, "step": 14476 }, { "epoch": 0.83, "grad_norm": 0.7044439761827327, "learning_rate": 1.4480802231523682e-06, "loss": 0.4051, "step": 14477 }, { "epoch": 0.83, "grad_norm": 0.22536764749533747, "learning_rate": 1.447115839065354e-06, "loss": 0.2154, "step": 14478 }, { "epoch": 0.83, "grad_norm": 0.754993666762245, "learning_rate": 1.446151751164352e-06, "loss": 0.4199, "step": 14479 }, { "epoch": 0.83, "grad_norm": 1.5464917011221442, "learning_rate": 1.4451879594827467e-06, "loss": 0.4158, "step": 14480 }, { "epoch": 0.83, "grad_norm": 0.22848279331701107, "learning_rate": 1.444224464053916e-06, "loss": 0.1559, "step": 14481 }, { "epoch": 0.83, "grad_norm": 0.26781759688718365, "learning_rate": 1.44326126491122e-06, "loss": 0.2418, "step": 14482 }, { "epoch": 0.83, "grad_norm": 0.7385480231144739, "learning_rate": 1.4422983620880215e-06, "loss": 0.4098, "step": 14483 }, { "epoch": 0.83, "grad_norm": 0.5600152349811515, "learning_rate": 1.4413357556176633e-06, "loss": 0.3144, "step": 14484 }, { "epoch": 0.83, "grad_norm": 0.38866276568802743, "learning_rate": 1.4403734455334816e-06, "loss": 0.2366, "step": 14485 }, { "epoch": 0.83, "grad_norm": 0.3608062477208995, "learning_rate": 1.4394114318687947e-06, "loss": 0.2916, "step": 14486 }, { "epoch": 0.83, "grad_norm": 0.39215757913532784, "learning_rate": 1.4384497146569242e-06, "loss": 0.2683, "step": 14487 }, { "epoch": 0.83, "grad_norm": 0.28871817789259424, "learning_rate": 1.437488293931173e-06, "loss": 0.1842, "step": 14488 }, { "epoch": 0.83, "grad_norm": 0.7837969352840899, "learning_rate": 1.436527169724833e-06, "loss": 0.3826, "step": 14489 }, { "epoch": 0.83, "grad_norm": 0.252579482719496, "learning_rate": 1.4355663420711863e-06, "loss": 0.2509, "step": 14490 }, { "epoch": 0.83, "grad_norm": 0.38934719167353166, "learning_rate": 1.434605811003511e-06, "loss": 0.179, "step": 14491 }, { "epoch": 0.83, "grad_norm": 1.327017593625259, "learning_rate": 1.4336455765550684e-06, "loss": 0.5127, "step": 14492 }, { "epoch": 0.83, "grad_norm": 0.3263390069030087, "learning_rate": 1.4326856387591114e-06, "loss": 0.1991, "step": 14493 }, { "epoch": 0.83, "grad_norm": 0.26938715579382766, "learning_rate": 1.4317259976488806e-06, "loss": 0.2504, "step": 14494 }, { "epoch": 0.83, "grad_norm": 0.642656981752686, "learning_rate": 1.4307666532576115e-06, "loss": 0.3615, "step": 14495 }, { "epoch": 0.83, "grad_norm": 0.31108348734056135, "learning_rate": 1.429807605618525e-06, "loss": 0.2628, "step": 14496 }, { "epoch": 0.83, "grad_norm": 0.5051662006316499, "learning_rate": 1.4288488547648328e-06, "loss": 0.2379, "step": 14497 }, { "epoch": 0.83, "grad_norm": 0.37356454597055017, "learning_rate": 1.4278904007297356e-06, "loss": 0.2442, "step": 14498 }, { "epoch": 0.83, "grad_norm": 0.3072156818751481, "learning_rate": 1.4269322435464229e-06, "loss": 0.2397, "step": 14499 }, { "epoch": 0.83, "grad_norm": 0.9057485951009321, "learning_rate": 1.425974383248081e-06, "loss": 0.516, "step": 14500 }, { "epoch": 0.83, "grad_norm": 0.3275530243981576, "learning_rate": 1.425016819867876e-06, "loss": 0.2634, "step": 14501 }, { "epoch": 0.83, "grad_norm": 0.5692330083855973, "learning_rate": 1.4240595534389712e-06, "loss": 0.2918, "step": 14502 }, { "epoch": 0.83, "grad_norm": 0.22938802148421475, "learning_rate": 1.4231025839945123e-06, "loss": 0.1967, "step": 14503 }, { "epoch": 0.83, "grad_norm": 1.3925463484862073, "learning_rate": 1.422145911567645e-06, "loss": 0.192, "step": 14504 }, { "epoch": 0.83, "grad_norm": 0.661586160240331, "learning_rate": 1.4211895361914961e-06, "loss": 0.2866, "step": 14505 }, { "epoch": 0.83, "grad_norm": 0.24761832146932847, "learning_rate": 1.4202334578991838e-06, "loss": 0.2535, "step": 14506 }, { "epoch": 0.83, "grad_norm": 0.6307606543288709, "learning_rate": 1.419277676723816e-06, "loss": 0.3035, "step": 14507 }, { "epoch": 0.83, "grad_norm": 0.3912702731263923, "learning_rate": 1.4183221926984958e-06, "loss": 0.294, "step": 14508 }, { "epoch": 0.83, "grad_norm": 0.21325530333087425, "learning_rate": 1.4173670058563082e-06, "loss": 0.2035, "step": 14509 }, { "epoch": 0.83, "grad_norm": 0.46726091602695835, "learning_rate": 1.4164121162303335e-06, "loss": 0.2975, "step": 14510 }, { "epoch": 0.83, "grad_norm": 0.4739114950446037, "learning_rate": 1.4154575238536373e-06, "loss": 0.1686, "step": 14511 }, { "epoch": 0.83, "grad_norm": 0.4402434355675197, "learning_rate": 1.4145032287592753e-06, "loss": 0.3343, "step": 14512 }, { "epoch": 0.83, "grad_norm": 0.5244827607300405, "learning_rate": 1.4135492309803e-06, "loss": 0.3307, "step": 14513 }, { "epoch": 0.83, "grad_norm": 0.277632422490554, "learning_rate": 1.4125955305497453e-06, "loss": 0.2066, "step": 14514 }, { "epoch": 0.83, "grad_norm": 0.32009804462674135, "learning_rate": 1.4116421275006386e-06, "loss": 0.2483, "step": 14515 }, { "epoch": 0.83, "grad_norm": 0.6266221865588981, "learning_rate": 1.410689021865993e-06, "loss": 0.2119, "step": 14516 }, { "epoch": 0.83, "grad_norm": 0.3426461166367405, "learning_rate": 1.4097362136788196e-06, "loss": 0.225, "step": 14517 }, { "epoch": 0.83, "grad_norm": 0.32656014034310793, "learning_rate": 1.408783702972112e-06, "loss": 0.2931, "step": 14518 }, { "epoch": 0.83, "grad_norm": 0.8290687136751642, "learning_rate": 1.4078314897788558e-06, "loss": 0.4557, "step": 14519 }, { "epoch": 0.83, "grad_norm": 0.3310944158287448, "learning_rate": 1.4068795741320241e-06, "loss": 0.152, "step": 14520 }, { "epoch": 0.83, "grad_norm": 0.23025651377461948, "learning_rate": 1.4059279560645845e-06, "loss": 0.2156, "step": 14521 }, { "epoch": 0.83, "grad_norm": 0.3482816224844952, "learning_rate": 1.4049766356094897e-06, "loss": 0.2382, "step": 14522 }, { "epoch": 0.83, "grad_norm": 0.6896887320183515, "learning_rate": 1.4040256127996842e-06, "loss": 0.345, "step": 14523 }, { "epoch": 0.83, "grad_norm": 0.376874453876573, "learning_rate": 1.403074887668101e-06, "loss": 0.2253, "step": 14524 }, { "epoch": 0.83, "grad_norm": 0.34520731206085, "learning_rate": 1.4021244602476658e-06, "loss": 0.3182, "step": 14525 }, { "epoch": 0.83, "grad_norm": 0.590729596317791, "learning_rate": 1.401174330571291e-06, "loss": 0.4018, "step": 14526 }, { "epoch": 0.83, "grad_norm": 0.24916310579425358, "learning_rate": 1.4002244986718793e-06, "loss": 0.1518, "step": 14527 }, { "epoch": 0.83, "grad_norm": 0.9781155735931566, "learning_rate": 1.3992749645823224e-06, "loss": 0.4291, "step": 14528 }, { "epoch": 0.83, "grad_norm": 0.37701695438860766, "learning_rate": 1.3983257283355044e-06, "loss": 0.2982, "step": 14529 }, { "epoch": 0.83, "grad_norm": 0.2968145176708719, "learning_rate": 1.3973767899642976e-06, "loss": 0.2351, "step": 14530 }, { "epoch": 0.83, "grad_norm": 1.2241899528039462, "learning_rate": 1.396428149501562e-06, "loss": 0.8343, "step": 14531 }, { "epoch": 0.83, "grad_norm": 0.6203633467767298, "learning_rate": 1.3954798069801468e-06, "loss": 0.4185, "step": 14532 }, { "epoch": 0.83, "grad_norm": 0.2830199485054341, "learning_rate": 1.394531762432899e-06, "loss": 0.2174, "step": 14533 }, { "epoch": 0.84, "grad_norm": 0.3082002175777337, "learning_rate": 1.3935840158926461e-06, "loss": 0.2218, "step": 14534 }, { "epoch": 0.84, "grad_norm": 0.5911035214641597, "learning_rate": 1.3926365673922082e-06, "loss": 0.3011, "step": 14535 }, { "epoch": 0.84, "grad_norm": 0.40215627658380904, "learning_rate": 1.3916894169643969e-06, "loss": 0.3002, "step": 14536 }, { "epoch": 0.84, "grad_norm": 0.3721036297309638, "learning_rate": 1.390742564642007e-06, "loss": 0.2617, "step": 14537 }, { "epoch": 0.84, "grad_norm": 0.6464102731862245, "learning_rate": 1.3897960104578357e-06, "loss": 0.3488, "step": 14538 }, { "epoch": 0.84, "grad_norm": 0.4497840173503336, "learning_rate": 1.3888497544446578e-06, "loss": 0.2986, "step": 14539 }, { "epoch": 0.84, "grad_norm": 0.3635716294730615, "learning_rate": 1.3879037966352426e-06, "loss": 0.192, "step": 14540 }, { "epoch": 0.84, "grad_norm": 0.31564871045658416, "learning_rate": 1.3869581370623464e-06, "loss": 0.2819, "step": 14541 }, { "epoch": 0.84, "grad_norm": 0.35301383748174164, "learning_rate": 1.3860127757587215e-06, "loss": 0.2734, "step": 14542 }, { "epoch": 0.84, "grad_norm": 0.9236845077921836, "learning_rate": 1.3850677127571033e-06, "loss": 0.444, "step": 14543 }, { "epoch": 0.84, "grad_norm": 0.8576242151750284, "learning_rate": 1.3841229480902207e-06, "loss": 0.449, "step": 14544 }, { "epoch": 0.84, "grad_norm": 0.317992615475425, "learning_rate": 1.3831784817907867e-06, "loss": 0.27, "step": 14545 }, { "epoch": 0.84, "grad_norm": 0.4192884397471493, "learning_rate": 1.382234313891515e-06, "loss": 0.3332, "step": 14546 }, { "epoch": 0.84, "grad_norm": 0.281125353055018, "learning_rate": 1.3812904444250973e-06, "loss": 0.1339, "step": 14547 }, { "epoch": 0.84, "grad_norm": 0.4240781429981899, "learning_rate": 1.3803468734242208e-06, "loss": 0.2947, "step": 14548 }, { "epoch": 0.84, "grad_norm": 0.34789522278090906, "learning_rate": 1.3794036009215628e-06, "loss": 0.3019, "step": 14549 }, { "epoch": 0.84, "grad_norm": 0.6447680621171787, "learning_rate": 1.3784606269497835e-06, "loss": 0.1991, "step": 14550 }, { "epoch": 0.84, "grad_norm": 0.42031524895275557, "learning_rate": 1.377517951541545e-06, "loss": 0.2907, "step": 14551 }, { "epoch": 0.84, "grad_norm": 0.5888956245708498, "learning_rate": 1.3765755747294906e-06, "loss": 0.357, "step": 14552 }, { "epoch": 0.84, "grad_norm": 0.22556962698291796, "learning_rate": 1.3756334965462502e-06, "loss": 0.1829, "step": 14553 }, { "epoch": 0.84, "grad_norm": 0.2882609123569876, "learning_rate": 1.3746917170244522e-06, "loss": 0.2108, "step": 14554 }, { "epoch": 0.84, "grad_norm": 1.3360077461689062, "learning_rate": 1.3737502361967092e-06, "loss": 0.6013, "step": 14555 }, { "epoch": 0.84, "grad_norm": 0.8700106534469142, "learning_rate": 1.3728090540956241e-06, "loss": 0.2877, "step": 14556 }, { "epoch": 0.84, "grad_norm": 0.2599726917673043, "learning_rate": 1.3718681707537895e-06, "loss": 0.2489, "step": 14557 }, { "epoch": 0.84, "grad_norm": 0.4713611148460899, "learning_rate": 1.3709275862037908e-06, "loss": 0.3234, "step": 14558 }, { "epoch": 0.84, "grad_norm": 0.2975155229884535, "learning_rate": 1.3699873004781983e-06, "loss": 0.1767, "step": 14559 }, { "epoch": 0.84, "grad_norm": 0.31430545435525203, "learning_rate": 1.369047313609575e-06, "loss": 0.1938, "step": 14560 }, { "epoch": 0.84, "grad_norm": 0.3427307670134785, "learning_rate": 1.3681076256304715e-06, "loss": 0.3059, "step": 14561 }, { "epoch": 0.84, "grad_norm": 0.8615199044778867, "learning_rate": 1.3671682365734273e-06, "loss": 0.4229, "step": 14562 }, { "epoch": 0.84, "grad_norm": 0.31342070762916385, "learning_rate": 1.3662291464709787e-06, "loss": 0.2215, "step": 14563 }, { "epoch": 0.84, "grad_norm": 0.8984623397142127, "learning_rate": 1.365290355355644e-06, "loss": 0.4016, "step": 14564 }, { "epoch": 0.84, "grad_norm": 0.2759744627759715, "learning_rate": 1.3643518632599317e-06, "loss": 0.2307, "step": 14565 }, { "epoch": 0.84, "grad_norm": 0.2279301524681025, "learning_rate": 1.3634136702163415e-06, "loss": 0.1532, "step": 14566 }, { "epoch": 0.84, "grad_norm": 1.1406842605842138, "learning_rate": 1.362475776257367e-06, "loss": 0.7491, "step": 14567 }, { "epoch": 0.84, "grad_norm": 0.586708889053408, "learning_rate": 1.3615381814154848e-06, "loss": 0.3272, "step": 14568 }, { "epoch": 0.84, "grad_norm": 0.28618586482601904, "learning_rate": 1.3606008857231634e-06, "loss": 0.222, "step": 14569 }, { "epoch": 0.84, "grad_norm": 0.5022053060065834, "learning_rate": 1.3596638892128599e-06, "loss": 0.3399, "step": 14570 }, { "epoch": 0.84, "grad_norm": 0.2986255975167238, "learning_rate": 1.3587271919170276e-06, "loss": 0.1707, "step": 14571 }, { "epoch": 0.84, "grad_norm": 0.35800026833904114, "learning_rate": 1.3577907938681e-06, "loss": 0.2807, "step": 14572 }, { "epoch": 0.84, "grad_norm": 0.32434733804341365, "learning_rate": 1.356854695098505e-06, "loss": 0.256, "step": 14573 }, { "epoch": 0.84, "grad_norm": 0.7751751650216921, "learning_rate": 1.3559188956406587e-06, "loss": 0.3757, "step": 14574 }, { "epoch": 0.84, "grad_norm": 0.3422898662062519, "learning_rate": 1.354983395526972e-06, "loss": 0.2493, "step": 14575 }, { "epoch": 0.84, "grad_norm": 0.5236379453340966, "learning_rate": 1.3540481947898377e-06, "loss": 0.2442, "step": 14576 }, { "epoch": 0.84, "grad_norm": 0.4769062265048514, "learning_rate": 1.3531132934616432e-06, "loss": 0.3491, "step": 14577 }, { "epoch": 0.84, "grad_norm": 0.26125303282449164, "learning_rate": 1.3521786915747636e-06, "loss": 0.1996, "step": 14578 }, { "epoch": 0.84, "grad_norm": 0.4615285075486771, "learning_rate": 1.3512443891615612e-06, "loss": 0.2125, "step": 14579 }, { "epoch": 0.84, "grad_norm": 0.4793211131504592, "learning_rate": 1.3503103862543964e-06, "loss": 0.3389, "step": 14580 }, { "epoch": 0.84, "grad_norm": 0.27583841894197575, "learning_rate": 1.3493766828856113e-06, "loss": 0.2668, "step": 14581 }, { "epoch": 0.84, "grad_norm": 1.3888028705359416, "learning_rate": 1.348443279087539e-06, "loss": 0.247, "step": 14582 }, { "epoch": 0.84, "grad_norm": 0.5835514940606136, "learning_rate": 1.3475101748925024e-06, "loss": 0.2396, "step": 14583 }, { "epoch": 0.84, "grad_norm": 0.3265317907950052, "learning_rate": 1.3465773703328177e-06, "loss": 0.2626, "step": 14584 }, { "epoch": 0.84, "grad_norm": 0.34205468813191087, "learning_rate": 1.3456448654407871e-06, "loss": 0.2876, "step": 14585 }, { "epoch": 0.84, "grad_norm": 0.7978010123803017, "learning_rate": 1.3447126602487026e-06, "loss": 0.3144, "step": 14586 }, { "epoch": 0.84, "grad_norm": 0.31640964650076336, "learning_rate": 1.343780754788847e-06, "loss": 0.2539, "step": 14587 }, { "epoch": 0.84, "grad_norm": 0.35450717795972697, "learning_rate": 1.3428491490934904e-06, "loss": 0.1691, "step": 14588 }, { "epoch": 0.84, "grad_norm": 0.29752438649468915, "learning_rate": 1.3419178431948964e-06, "loss": 0.2152, "step": 14589 }, { "epoch": 0.84, "grad_norm": 0.34828109234051496, "learning_rate": 1.3409868371253155e-06, "loss": 0.266, "step": 14590 }, { "epoch": 0.84, "grad_norm": 0.400501330494556, "learning_rate": 1.3400561309169845e-06, "loss": 0.2795, "step": 14591 }, { "epoch": 0.84, "grad_norm": 0.32453042151607786, "learning_rate": 1.3391257246021404e-06, "loss": 0.2643, "step": 14592 }, { "epoch": 0.84, "grad_norm": 0.3316450046499373, "learning_rate": 1.3381956182130008e-06, "loss": 0.2635, "step": 14593 }, { "epoch": 0.84, "grad_norm": 0.5078673764787308, "learning_rate": 1.3372658117817738e-06, "loss": 0.2464, "step": 14594 }, { "epoch": 0.84, "grad_norm": 1.7956324440457225, "learning_rate": 1.3363363053406564e-06, "loss": 0.1765, "step": 14595 }, { "epoch": 0.84, "grad_norm": 0.306535566421557, "learning_rate": 1.3354070989218426e-06, "loss": 0.2439, "step": 14596 }, { "epoch": 0.84, "grad_norm": 0.35559449451805636, "learning_rate": 1.334478192557509e-06, "loss": 0.3066, "step": 14597 }, { "epoch": 0.84, "grad_norm": 0.9039033717873965, "learning_rate": 1.333549586279822e-06, "loss": 0.4907, "step": 14598 }, { "epoch": 0.84, "grad_norm": 0.23362546523475522, "learning_rate": 1.3326212801209392e-06, "loss": 0.1655, "step": 14599 }, { "epoch": 0.84, "grad_norm": 0.41212155732044253, "learning_rate": 1.3316932741130106e-06, "loss": 0.2784, "step": 14600 }, { "epoch": 0.84, "grad_norm": 0.37043529185291274, "learning_rate": 1.3307655682881704e-06, "loss": 0.2799, "step": 14601 }, { "epoch": 0.84, "grad_norm": 0.3008023321460584, "learning_rate": 1.3298381626785461e-06, "loss": 0.2106, "step": 14602 }, { "epoch": 0.84, "grad_norm": 0.6480680200258, "learning_rate": 1.3289110573162534e-06, "loss": 0.3836, "step": 14603 }, { "epoch": 0.84, "grad_norm": 0.3419447209679332, "learning_rate": 1.3279842522333964e-06, "loss": 0.3303, "step": 14604 }, { "epoch": 0.84, "grad_norm": 0.28926904223399996, "learning_rate": 1.3270577474620737e-06, "loss": 0.1862, "step": 14605 }, { "epoch": 0.84, "grad_norm": 0.333294240317376, "learning_rate": 1.326131543034368e-06, "loss": 0.1797, "step": 14606 }, { "epoch": 0.84, "grad_norm": 0.7753789409052013, "learning_rate": 1.3252056389823542e-06, "loss": 0.3864, "step": 14607 }, { "epoch": 0.84, "grad_norm": 0.4255672538071689, "learning_rate": 1.3242800353380935e-06, "loss": 0.2057, "step": 14608 }, { "epoch": 0.84, "grad_norm": 0.30257530347677447, "learning_rate": 1.3233547321336449e-06, "loss": 0.2747, "step": 14609 }, { "epoch": 0.84, "grad_norm": 1.1949937386415568, "learning_rate": 1.322429729401048e-06, "loss": 0.6453, "step": 14610 }, { "epoch": 0.84, "grad_norm": 0.39220038644195765, "learning_rate": 1.3215050271723372e-06, "loss": 0.2691, "step": 14611 }, { "epoch": 0.84, "grad_norm": 0.19771103947243748, "learning_rate": 1.3205806254795316e-06, "loss": 0.177, "step": 14612 }, { "epoch": 0.84, "grad_norm": 1.340854636617087, "learning_rate": 1.3196565243546477e-06, "loss": 0.6778, "step": 14613 }, { "epoch": 0.84, "grad_norm": 0.40782738073281505, "learning_rate": 1.3187327238296855e-06, "loss": 0.2768, "step": 14614 }, { "epoch": 0.84, "grad_norm": 0.48597633138533963, "learning_rate": 1.3178092239366357e-06, "loss": 0.2581, "step": 14615 }, { "epoch": 0.84, "grad_norm": 0.35702165673584346, "learning_rate": 1.316886024707479e-06, "loss": 0.298, "step": 14616 }, { "epoch": 0.84, "grad_norm": 0.43957135064561503, "learning_rate": 1.3159631261741835e-06, "loss": 0.2841, "step": 14617 }, { "epoch": 0.84, "grad_norm": 0.2303763190919167, "learning_rate": 1.315040528368714e-06, "loss": 0.1285, "step": 14618 }, { "epoch": 0.84, "grad_norm": 0.7590643493551357, "learning_rate": 1.3141182313230173e-06, "loss": 0.3696, "step": 14619 }, { "epoch": 0.84, "grad_norm": 0.3120861870291768, "learning_rate": 1.313196235069033e-06, "loss": 0.2635, "step": 14620 }, { "epoch": 0.84, "grad_norm": 0.3375885377689183, "learning_rate": 1.3122745396386893e-06, "loss": 0.2568, "step": 14621 }, { "epoch": 0.84, "grad_norm": 1.0596125325492274, "learning_rate": 1.311353145063905e-06, "loss": 0.6659, "step": 14622 }, { "epoch": 0.84, "grad_norm": 0.32892295818439166, "learning_rate": 1.3104320513765867e-06, "loss": 0.2509, "step": 14623 }, { "epoch": 0.84, "grad_norm": 0.3057490849134694, "learning_rate": 1.3095112586086322e-06, "loss": 0.1777, "step": 14624 }, { "epoch": 0.84, "grad_norm": 0.35619765249654883, "learning_rate": 1.3085907667919295e-06, "loss": 0.2711, "step": 14625 }, { "epoch": 0.84, "grad_norm": 0.5930467136699953, "learning_rate": 1.3076705759583562e-06, "loss": 0.2686, "step": 14626 }, { "epoch": 0.84, "grad_norm": 0.4096537171452396, "learning_rate": 1.3067506861397771e-06, "loss": 0.3089, "step": 14627 }, { "epoch": 0.84, "grad_norm": 0.31363251366168626, "learning_rate": 1.3058310973680478e-06, "loss": 0.2472, "step": 14628 }, { "epoch": 0.84, "grad_norm": 0.6261934366587812, "learning_rate": 1.3049118096750102e-06, "loss": 0.322, "step": 14629 }, { "epoch": 0.84, "grad_norm": 0.23884814580511757, "learning_rate": 1.3039928230925058e-06, "loss": 0.2023, "step": 14630 }, { "epoch": 0.84, "grad_norm": 0.7587479042041084, "learning_rate": 1.303074137652357e-06, "loss": 0.2866, "step": 14631 }, { "epoch": 0.84, "grad_norm": 0.3200019958475434, "learning_rate": 1.302155753386376e-06, "loss": 0.2691, "step": 14632 }, { "epoch": 0.84, "grad_norm": 0.38354019565695807, "learning_rate": 1.3012376703263652e-06, "loss": 0.3087, "step": 14633 }, { "epoch": 0.84, "grad_norm": 1.2891788763038219, "learning_rate": 1.3003198885041212e-06, "loss": 0.757, "step": 14634 }, { "epoch": 0.84, "grad_norm": 0.3551321237848732, "learning_rate": 1.2994024079514257e-06, "loss": 0.161, "step": 14635 }, { "epoch": 0.84, "grad_norm": 0.2695399257198015, "learning_rate": 1.2984852287000515e-06, "loss": 0.2375, "step": 14636 }, { "epoch": 0.84, "grad_norm": 0.4797545726288128, "learning_rate": 1.297568350781757e-06, "loss": 0.3093, "step": 14637 }, { "epoch": 0.84, "grad_norm": 0.29754951174458355, "learning_rate": 1.296651774228298e-06, "loss": 0.1445, "step": 14638 }, { "epoch": 0.84, "grad_norm": 0.573274589294748, "learning_rate": 1.2957354990714145e-06, "loss": 0.3267, "step": 14639 }, { "epoch": 0.84, "grad_norm": 0.3397778778825856, "learning_rate": 1.2948195253428364e-06, "loss": 0.3433, "step": 14640 }, { "epoch": 0.84, "grad_norm": 0.3975827403188928, "learning_rate": 1.2939038530742832e-06, "loss": 0.1751, "step": 14641 }, { "epoch": 0.84, "grad_norm": 0.5512201139354378, "learning_rate": 1.2929884822974626e-06, "loss": 0.3175, "step": 14642 }, { "epoch": 0.84, "grad_norm": 0.3345476677624379, "learning_rate": 1.2920734130440793e-06, "loss": 0.2317, "step": 14643 }, { "epoch": 0.84, "grad_norm": 0.27178999005541327, "learning_rate": 1.2911586453458203e-06, "loss": 0.1999, "step": 14644 }, { "epoch": 0.84, "grad_norm": 0.393265473755049, "learning_rate": 1.2902441792343611e-06, "loss": 0.2757, "step": 14645 }, { "epoch": 0.84, "grad_norm": 1.0589891218703797, "learning_rate": 1.2893300147413702e-06, "loss": 0.5945, "step": 14646 }, { "epoch": 0.84, "grad_norm": 0.7805817018750784, "learning_rate": 1.2884161518985083e-06, "loss": 0.356, "step": 14647 }, { "epoch": 0.84, "grad_norm": 0.24815947867005486, "learning_rate": 1.2875025907374206e-06, "loss": 0.2283, "step": 14648 }, { "epoch": 0.84, "grad_norm": 0.7997982329037202, "learning_rate": 1.2865893312897438e-06, "loss": 0.3963, "step": 14649 }, { "epoch": 0.84, "grad_norm": 0.24864551993847825, "learning_rate": 1.2856763735871003e-06, "loss": 0.1527, "step": 14650 }, { "epoch": 0.84, "grad_norm": 0.3838687286845385, "learning_rate": 1.2847637176611128e-06, "loss": 0.2545, "step": 14651 }, { "epoch": 0.84, "grad_norm": 0.33946848678803093, "learning_rate": 1.2838513635433824e-06, "loss": 0.2888, "step": 14652 }, { "epoch": 0.84, "grad_norm": 0.5313711952676399, "learning_rate": 1.2829393112655052e-06, "loss": 0.3075, "step": 14653 }, { "epoch": 0.84, "grad_norm": 0.35368230488042623, "learning_rate": 1.2820275608590638e-06, "loss": 0.2496, "step": 14654 }, { "epoch": 0.84, "grad_norm": 0.5093236803162801, "learning_rate": 1.2811161123556337e-06, "loss": 0.2507, "step": 14655 }, { "epoch": 0.84, "grad_norm": 0.2429324623945723, "learning_rate": 1.2802049657867777e-06, "loss": 0.2158, "step": 14656 }, { "epoch": 0.84, "grad_norm": 0.4075378255586554, "learning_rate": 1.2792941211840481e-06, "loss": 0.2347, "step": 14657 }, { "epoch": 0.84, "grad_norm": 0.813489974515319, "learning_rate": 1.2783835785789867e-06, "loss": 0.4763, "step": 14658 }, { "epoch": 0.84, "grad_norm": 0.5005999740093483, "learning_rate": 1.277473338003129e-06, "loss": 0.3164, "step": 14659 }, { "epoch": 0.84, "grad_norm": 0.3206738895171215, "learning_rate": 1.2765633994879933e-06, "loss": 0.2816, "step": 14660 }, { "epoch": 0.84, "grad_norm": 0.5703995448615973, "learning_rate": 1.2756537630650934e-06, "loss": 0.2392, "step": 14661 }, { "epoch": 0.84, "grad_norm": 0.24137241230342263, "learning_rate": 1.274744428765926e-06, "loss": 0.1746, "step": 14662 }, { "epoch": 0.84, "grad_norm": 0.3367852646989017, "learning_rate": 1.2738353966219863e-06, "loss": 0.2836, "step": 14663 }, { "epoch": 0.84, "grad_norm": 0.3384807639190101, "learning_rate": 1.2729266666647511e-06, "loss": 0.264, "step": 14664 }, { "epoch": 0.84, "grad_norm": 0.6314320406801325, "learning_rate": 1.2720182389256896e-06, "loss": 0.362, "step": 14665 }, { "epoch": 0.84, "grad_norm": 0.3457808849956551, "learning_rate": 1.2711101134362624e-06, "loss": 0.2846, "step": 14666 }, { "epoch": 0.84, "grad_norm": 1.5896610511400546, "learning_rate": 1.2702022902279132e-06, "loss": 0.1807, "step": 14667 }, { "epoch": 0.84, "grad_norm": 0.20164738798995183, "learning_rate": 1.2692947693320867e-06, "loss": 0.1918, "step": 14668 }, { "epoch": 0.84, "grad_norm": 0.34464558067401213, "learning_rate": 1.2683875507802058e-06, "loss": 0.2875, "step": 14669 }, { "epoch": 0.84, "grad_norm": 0.7667195525112961, "learning_rate": 1.2674806346036895e-06, "loss": 0.3144, "step": 14670 }, { "epoch": 0.84, "grad_norm": 0.524432798062477, "learning_rate": 1.2665740208339406e-06, "loss": 0.3225, "step": 14671 }, { "epoch": 0.84, "grad_norm": 0.25184485692333597, "learning_rate": 1.2656677095023607e-06, "loss": 0.2584, "step": 14672 }, { "epoch": 0.84, "grad_norm": 1.4804813262002852, "learning_rate": 1.2647617006403312e-06, "loss": 0.4913, "step": 14673 }, { "epoch": 0.84, "grad_norm": 0.16779157793554556, "learning_rate": 1.2638559942792294e-06, "loss": 0.0875, "step": 14674 }, { "epoch": 0.84, "grad_norm": 0.39519690266180457, "learning_rate": 1.2629505904504158e-06, "loss": 0.3048, "step": 14675 }, { "epoch": 0.84, "grad_norm": 0.3542800298985227, "learning_rate": 1.2620454891852507e-06, "loss": 0.3001, "step": 14676 }, { "epoch": 0.84, "grad_norm": 0.38034372533991734, "learning_rate": 1.2611406905150736e-06, "loss": 0.1732, "step": 14677 }, { "epoch": 0.84, "grad_norm": 0.38478117119739746, "learning_rate": 1.2602361944712193e-06, "loss": 0.2879, "step": 14678 }, { "epoch": 0.84, "grad_norm": 0.5953891231925955, "learning_rate": 1.2593320010850096e-06, "loss": 0.329, "step": 14679 }, { "epoch": 0.84, "grad_norm": 0.4313422987262344, "learning_rate": 1.258428110387754e-06, "loss": 0.2362, "step": 14680 }, { "epoch": 0.84, "grad_norm": 0.3322900970369777, "learning_rate": 1.2575245224107602e-06, "loss": 0.2568, "step": 14681 }, { "epoch": 0.84, "grad_norm": 0.5778985999746848, "learning_rate": 1.256621237185316e-06, "loss": 0.388, "step": 14682 }, { "epoch": 0.84, "grad_norm": 0.25766138829972673, "learning_rate": 1.2557182547427016e-06, "loss": 0.1669, "step": 14683 }, { "epoch": 0.84, "grad_norm": 0.23468154882529396, "learning_rate": 1.2548155751141867e-06, "loss": 0.2158, "step": 14684 }, { "epoch": 0.84, "grad_norm": 1.4762527134046515, "learning_rate": 1.2539131983310349e-06, "loss": 0.5962, "step": 14685 }, { "epoch": 0.84, "grad_norm": 1.3406802197347614, "learning_rate": 1.2530111244244925e-06, "loss": 0.6108, "step": 14686 }, { "epoch": 0.84, "grad_norm": 0.2989282297239217, "learning_rate": 1.2521093534257977e-06, "loss": 0.2035, "step": 14687 }, { "epoch": 0.84, "grad_norm": 0.32967555204294186, "learning_rate": 1.2512078853661813e-06, "loss": 0.2942, "step": 14688 }, { "epoch": 0.84, "grad_norm": 0.26575499268107783, "learning_rate": 1.2503067202768592e-06, "loss": 0.1926, "step": 14689 }, { "epoch": 0.84, "grad_norm": 0.34793399148411513, "learning_rate": 1.2494058581890388e-06, "loss": 0.1929, "step": 14690 }, { "epoch": 0.84, "grad_norm": 0.44843755470370344, "learning_rate": 1.2485052991339174e-06, "loss": 0.2978, "step": 14691 }, { "epoch": 0.84, "grad_norm": 0.5016887458702256, "learning_rate": 1.247605043142679e-06, "loss": 0.2965, "step": 14692 }, { "epoch": 0.84, "grad_norm": 0.35071326656103574, "learning_rate": 1.2467050902465038e-06, "loss": 0.2073, "step": 14693 }, { "epoch": 0.84, "grad_norm": 0.7348697816631484, "learning_rate": 1.2458054404765552e-06, "loss": 0.4092, "step": 14694 }, { "epoch": 0.84, "grad_norm": 0.3464063282503142, "learning_rate": 1.2449060938639869e-06, "loss": 0.3186, "step": 14695 }, { "epoch": 0.84, "grad_norm": 0.2497142048809561, "learning_rate": 1.2440070504399426e-06, "loss": 0.1682, "step": 14696 }, { "epoch": 0.84, "grad_norm": 0.5003880498698998, "learning_rate": 1.243108310235559e-06, "loss": 0.2321, "step": 14697 }, { "epoch": 0.84, "grad_norm": 0.6704924239750555, "learning_rate": 1.2422098732819587e-06, "loss": 0.3914, "step": 14698 }, { "epoch": 0.84, "grad_norm": 0.30569771907205934, "learning_rate": 1.2413117396102548e-06, "loss": 0.2449, "step": 14699 }, { "epoch": 0.84, "grad_norm": 0.3158934485374403, "learning_rate": 1.2404139092515455e-06, "loss": 0.2607, "step": 14700 }, { "epoch": 0.84, "grad_norm": 0.46902780635018154, "learning_rate": 1.2395163822369283e-06, "loss": 0.2458, "step": 14701 }, { "epoch": 0.84, "grad_norm": 0.24793381285511187, "learning_rate": 1.2386191585974815e-06, "loss": 0.2093, "step": 14702 }, { "epoch": 0.84, "grad_norm": 0.568270893805069, "learning_rate": 1.2377222383642773e-06, "loss": 0.2455, "step": 14703 }, { "epoch": 0.84, "grad_norm": 0.4898535112631793, "learning_rate": 1.2368256215683727e-06, "loss": 0.3084, "step": 14704 }, { "epoch": 0.84, "grad_norm": 0.3244437239492716, "learning_rate": 1.235929308240822e-06, "loss": 0.2521, "step": 14705 }, { "epoch": 0.84, "grad_norm": 0.8715598875641944, "learning_rate": 1.2350332984126623e-06, "loss": 0.3682, "step": 14706 }, { "epoch": 0.84, "grad_norm": 0.32979803200018204, "learning_rate": 1.2341375921149224e-06, "loss": 0.2899, "step": 14707 }, { "epoch": 0.85, "grad_norm": 0.2545636420010435, "learning_rate": 1.2332421893786218e-06, "loss": 0.2019, "step": 14708 }, { "epoch": 0.85, "grad_norm": 0.4792169662322209, "learning_rate": 1.2323470902347645e-06, "loss": 0.2032, "step": 14709 }, { "epoch": 0.85, "grad_norm": 0.5781818608555402, "learning_rate": 1.2314522947143526e-06, "loss": 0.2895, "step": 14710 }, { "epoch": 0.85, "grad_norm": 0.4198601434035437, "learning_rate": 1.23055780284837e-06, "loss": 0.27, "step": 14711 }, { "epoch": 0.85, "grad_norm": 0.2760695555581516, "learning_rate": 1.2296636146677942e-06, "loss": 0.2711, "step": 14712 }, { "epoch": 0.85, "grad_norm": 0.8359298804618515, "learning_rate": 1.2287697302035883e-06, "loss": 0.2242, "step": 14713 }, { "epoch": 0.85, "grad_norm": 0.35699267197134954, "learning_rate": 1.227876149486712e-06, "loss": 0.2875, "step": 14714 }, { "epoch": 0.85, "grad_norm": 0.25071982956057953, "learning_rate": 1.226982872548107e-06, "loss": 0.1969, "step": 14715 }, { "epoch": 0.85, "grad_norm": 0.43126805352484987, "learning_rate": 1.2260898994187075e-06, "loss": 0.2655, "step": 14716 }, { "epoch": 0.85, "grad_norm": 0.3063248769232248, "learning_rate": 1.2251972301294358e-06, "loss": 0.2335, "step": 14717 }, { "epoch": 0.85, "grad_norm": 1.0832029188795427, "learning_rate": 1.2243048647112078e-06, "loss": 0.7687, "step": 14718 }, { "epoch": 0.85, "grad_norm": 0.3293628692072448, "learning_rate": 1.2234128031949266e-06, "loss": 0.2336, "step": 14719 }, { "epoch": 0.85, "grad_norm": 0.3376833571189543, "learning_rate": 1.222521045611481e-06, "loss": 0.2651, "step": 14720 }, { "epoch": 0.85, "grad_norm": 0.37187860367678754, "learning_rate": 1.2216295919917553e-06, "loss": 0.2057, "step": 14721 }, { "epoch": 0.85, "grad_norm": 0.8919618832579139, "learning_rate": 1.220738442366619e-06, "loss": 0.4321, "step": 14722 }, { "epoch": 0.85, "grad_norm": 0.2582549238915909, "learning_rate": 1.2198475967669333e-06, "loss": 0.21, "step": 14723 }, { "epoch": 0.85, "grad_norm": 0.4988948087103634, "learning_rate": 1.2189570552235475e-06, "loss": 0.3751, "step": 14724 }, { "epoch": 0.85, "grad_norm": 1.0828966430021156, "learning_rate": 1.2180668177672984e-06, "loss": 0.552, "step": 14725 }, { "epoch": 0.85, "grad_norm": 0.2535459801459226, "learning_rate": 1.217176884429021e-06, "loss": 0.1598, "step": 14726 }, { "epoch": 0.85, "grad_norm": 0.4715556154628492, "learning_rate": 1.21628725523953e-06, "loss": 0.3407, "step": 14727 }, { "epoch": 0.85, "grad_norm": 0.333192726383167, "learning_rate": 1.2153979302296338e-06, "loss": 0.2542, "step": 14728 }, { "epoch": 0.85, "grad_norm": 0.3590585580216333, "learning_rate": 1.2145089094301265e-06, "loss": 0.1648, "step": 14729 }, { "epoch": 0.85, "grad_norm": 0.4915144416030057, "learning_rate": 1.2136201928718005e-06, "loss": 0.3565, "step": 14730 }, { "epoch": 0.85, "grad_norm": 0.3515123559483987, "learning_rate": 1.21273178058543e-06, "loss": 0.2912, "step": 14731 }, { "epoch": 0.85, "grad_norm": 0.38349679742074594, "learning_rate": 1.21184367260178e-06, "loss": 0.1698, "step": 14732 }, { "epoch": 0.85, "grad_norm": 0.40644587846781555, "learning_rate": 1.2109558689516054e-06, "loss": 0.3135, "step": 14733 }, { "epoch": 0.85, "grad_norm": 0.3399566033908146, "learning_rate": 1.210068369665649e-06, "loss": 0.1875, "step": 14734 }, { "epoch": 0.85, "grad_norm": 0.31943117545798166, "learning_rate": 1.2091811747746484e-06, "loss": 0.2578, "step": 14735 }, { "epoch": 0.85, "grad_norm": 0.3502609198287449, "learning_rate": 1.208294284309327e-06, "loss": 0.2722, "step": 14736 }, { "epoch": 0.85, "grad_norm": 1.4569221299642199, "learning_rate": 1.2074076983003956e-06, "loss": 0.6047, "step": 14737 }, { "epoch": 0.85, "grad_norm": 0.40102742209080916, "learning_rate": 1.2065214167785554e-06, "loss": 0.2702, "step": 14738 }, { "epoch": 0.85, "grad_norm": 0.29010025220729196, "learning_rate": 1.2056354397745029e-06, "loss": 0.2478, "step": 14739 }, { "epoch": 0.85, "grad_norm": 0.3096437615449813, "learning_rate": 1.2047497673189169e-06, "loss": 0.1753, "step": 14740 }, { "epoch": 0.85, "grad_norm": 0.3867388418154201, "learning_rate": 1.2038643994424682e-06, "loss": 0.2674, "step": 14741 }, { "epoch": 0.85, "grad_norm": 0.4669314450298384, "learning_rate": 1.2029793361758146e-06, "loss": 0.2701, "step": 14742 }, { "epoch": 0.85, "grad_norm": 0.36142721745288964, "learning_rate": 1.2020945775496107e-06, "loss": 0.2782, "step": 14743 }, { "epoch": 0.85, "grad_norm": 0.2954197524558824, "learning_rate": 1.201210123594494e-06, "loss": 0.2351, "step": 14744 }, { "epoch": 0.85, "grad_norm": 0.6366188960797053, "learning_rate": 1.200325974341091e-06, "loss": 0.3014, "step": 14745 }, { "epoch": 0.85, "grad_norm": 0.23679746876696145, "learning_rate": 1.199442129820022e-06, "loss": 0.1938, "step": 14746 }, { "epoch": 0.85, "grad_norm": 0.3110930947191409, "learning_rate": 1.1985585900618912e-06, "loss": 0.2664, "step": 14747 }, { "epoch": 0.85, "grad_norm": 0.5009217757277309, "learning_rate": 1.1976753550972998e-06, "loss": 0.3774, "step": 14748 }, { "epoch": 0.85, "grad_norm": 0.5908704134342048, "learning_rate": 1.196792424956833e-06, "loss": 0.2874, "step": 14749 }, { "epoch": 0.85, "grad_norm": 0.527797718182077, "learning_rate": 1.1959097996710656e-06, "loss": 0.2624, "step": 14750 }, { "epoch": 0.85, "grad_norm": 0.25644427630796324, "learning_rate": 1.1950274792705618e-06, "loss": 0.2696, "step": 14751 }, { "epoch": 0.85, "grad_norm": 0.16068604115031754, "learning_rate": 1.1941454637858784e-06, "loss": 0.0712, "step": 14752 }, { "epoch": 0.85, "grad_norm": 0.5669102028958434, "learning_rate": 1.19326375324756e-06, "loss": 0.3149, "step": 14753 }, { "epoch": 0.85, "grad_norm": 0.37777598291555, "learning_rate": 1.1923823476861395e-06, "loss": 0.3254, "step": 14754 }, { "epoch": 0.85, "grad_norm": 0.3227218948916543, "learning_rate": 1.1915012471321385e-06, "loss": 0.2587, "step": 14755 }, { "epoch": 0.85, "grad_norm": 0.3981084157797156, "learning_rate": 1.1906204516160713e-06, "loss": 0.3059, "step": 14756 }, { "epoch": 0.85, "grad_norm": 0.5493564974138063, "learning_rate": 1.189739961168439e-06, "loss": 0.3309, "step": 14757 }, { "epoch": 0.85, "grad_norm": 0.31382816631129834, "learning_rate": 1.1888597758197319e-06, "loss": 0.1758, "step": 14758 }, { "epoch": 0.85, "grad_norm": 0.25289083122443995, "learning_rate": 1.1879798956004307e-06, "loss": 0.2251, "step": 14759 }, { "epoch": 0.85, "grad_norm": 0.5491193092701862, "learning_rate": 1.1871003205410092e-06, "loss": 0.4062, "step": 14760 }, { "epoch": 0.85, "grad_norm": 0.6820129227771387, "learning_rate": 1.186221050671924e-06, "loss": 0.3746, "step": 14761 }, { "epoch": 0.85, "grad_norm": 0.358774579788031, "learning_rate": 1.1853420860236253e-06, "loss": 0.1911, "step": 14762 }, { "epoch": 0.85, "grad_norm": 0.30419959453244505, "learning_rate": 1.1844634266265487e-06, "loss": 0.266, "step": 14763 }, { "epoch": 0.85, "grad_norm": 0.4638461851125854, "learning_rate": 1.1835850725111264e-06, "loss": 0.2343, "step": 14764 }, { "epoch": 0.85, "grad_norm": 0.22267885583250036, "learning_rate": 1.1827070237077743e-06, "loss": 0.1363, "step": 14765 }, { "epoch": 0.85, "grad_norm": 0.5251370761115037, "learning_rate": 1.1818292802468989e-06, "loss": 0.3503, "step": 14766 }, { "epoch": 0.85, "grad_norm": 0.4038183648154914, "learning_rate": 1.1809518421588939e-06, "loss": 0.2937, "step": 14767 }, { "epoch": 0.85, "grad_norm": 0.5477079411295382, "learning_rate": 1.1800747094741493e-06, "loss": 0.1356, "step": 14768 }, { "epoch": 0.85, "grad_norm": 0.42781575837813424, "learning_rate": 1.1791978822230388e-06, "loss": 0.2874, "step": 14769 }, { "epoch": 0.85, "grad_norm": 0.38030178099469747, "learning_rate": 1.1783213604359268e-06, "loss": 0.2965, "step": 14770 }, { "epoch": 0.85, "grad_norm": 0.19456608161814554, "learning_rate": 1.1774451441431655e-06, "loss": 0.0866, "step": 14771 }, { "epoch": 0.85, "grad_norm": 0.39569769749296096, "learning_rate": 1.1765692333750977e-06, "loss": 0.2675, "step": 14772 }, { "epoch": 0.85, "grad_norm": 0.6547391841835668, "learning_rate": 1.17569362816206e-06, "loss": 0.3861, "step": 14773 }, { "epoch": 0.85, "grad_norm": 0.2570354731252588, "learning_rate": 1.174818328534373e-06, "loss": 0.1988, "step": 14774 }, { "epoch": 0.85, "grad_norm": 0.27903556542289093, "learning_rate": 1.1739433345223482e-06, "loss": 0.2137, "step": 14775 }, { "epoch": 0.85, "grad_norm": 1.4667329346849802, "learning_rate": 1.1730686461562835e-06, "loss": 0.4638, "step": 14776 }, { "epoch": 0.85, "grad_norm": 0.5838213979491272, "learning_rate": 1.172194263466474e-06, "loss": 0.3362, "step": 14777 }, { "epoch": 0.85, "grad_norm": 0.3959474359561361, "learning_rate": 1.1713201864831968e-06, "loss": 0.2403, "step": 14778 }, { "epoch": 0.85, "grad_norm": 0.33580085060755466, "learning_rate": 1.1704464152367234e-06, "loss": 0.2984, "step": 14779 }, { "epoch": 0.85, "grad_norm": 0.22515204910858588, "learning_rate": 1.1695729497573082e-06, "loss": 0.156, "step": 14780 }, { "epoch": 0.85, "grad_norm": 0.41575650449527546, "learning_rate": 1.168699790075204e-06, "loss": 0.2199, "step": 14781 }, { "epoch": 0.85, "grad_norm": 0.5596586766328981, "learning_rate": 1.1678269362206463e-06, "loss": 0.3317, "step": 14782 }, { "epoch": 0.85, "grad_norm": 0.3552412822573221, "learning_rate": 1.166954388223862e-06, "loss": 0.25, "step": 14783 }, { "epoch": 0.85, "grad_norm": 0.36014971889060016, "learning_rate": 1.1660821461150673e-06, "loss": 0.2429, "step": 14784 }, { "epoch": 0.85, "grad_norm": 0.42642570719810025, "learning_rate": 1.1652102099244667e-06, "loss": 0.2757, "step": 14785 }, { "epoch": 0.85, "grad_norm": 0.2994411664078834, "learning_rate": 1.1643385796822582e-06, "loss": 0.2318, "step": 14786 }, { "epoch": 0.85, "grad_norm": 0.29557420750535496, "learning_rate": 1.1634672554186243e-06, "loss": 0.259, "step": 14787 }, { "epoch": 0.85, "grad_norm": 1.675274623281233, "learning_rate": 1.16259623716374e-06, "loss": 0.2517, "step": 14788 }, { "epoch": 0.85, "grad_norm": 0.6074414344124812, "learning_rate": 1.1617255249477677e-06, "loss": 0.316, "step": 14789 }, { "epoch": 0.85, "grad_norm": 0.360726252157882, "learning_rate": 1.16085511880086e-06, "loss": 0.2795, "step": 14790 }, { "epoch": 0.85, "grad_norm": 0.32731888538386056, "learning_rate": 1.1599850187531603e-06, "loss": 0.2495, "step": 14791 }, { "epoch": 0.85, "grad_norm": 0.2693011419688268, "learning_rate": 1.1591152248347959e-06, "loss": 0.159, "step": 14792 }, { "epoch": 0.85, "grad_norm": 0.3694108954988957, "learning_rate": 1.1582457370758948e-06, "loss": 0.2588, "step": 14793 }, { "epoch": 0.85, "grad_norm": 0.42252599397338797, "learning_rate": 1.157376555506562e-06, "loss": 0.2219, "step": 14794 }, { "epoch": 0.85, "grad_norm": 0.3670240559105416, "learning_rate": 1.1565076801568997e-06, "loss": 0.2801, "step": 14795 }, { "epoch": 0.85, "grad_norm": 0.4043612092318145, "learning_rate": 1.1556391110569965e-06, "loss": 0.2843, "step": 14796 }, { "epoch": 0.85, "grad_norm": 1.1551526431797676, "learning_rate": 1.1547708482369279e-06, "loss": 0.719, "step": 14797 }, { "epoch": 0.85, "grad_norm": 0.22288561492500292, "learning_rate": 1.1539028917267668e-06, "loss": 0.1688, "step": 14798 }, { "epoch": 0.85, "grad_norm": 0.2946033880771682, "learning_rate": 1.1530352415565683e-06, "loss": 0.2451, "step": 14799 }, { "epoch": 0.85, "grad_norm": 0.9260136414904041, "learning_rate": 1.152167897756379e-06, "loss": 0.3756, "step": 14800 }, { "epoch": 0.85, "grad_norm": 0.46156669228028563, "learning_rate": 1.1513008603562327e-06, "loss": 0.2187, "step": 14801 }, { "epoch": 0.85, "grad_norm": 0.4098473307632537, "learning_rate": 1.1504341293861588e-06, "loss": 0.3288, "step": 14802 }, { "epoch": 0.85, "grad_norm": 0.3391465615133372, "learning_rate": 1.149567704876171e-06, "loss": 0.3124, "step": 14803 }, { "epoch": 0.85, "grad_norm": 0.24197185666517396, "learning_rate": 1.1487015868562723e-06, "loss": 0.0848, "step": 14804 }, { "epoch": 0.85, "grad_norm": 0.30112775030509276, "learning_rate": 1.147835775356455e-06, "loss": 0.2428, "step": 14805 }, { "epoch": 0.85, "grad_norm": 0.31423519705737857, "learning_rate": 1.1469702704067064e-06, "loss": 0.3014, "step": 14806 }, { "epoch": 0.85, "grad_norm": 0.7919262173209054, "learning_rate": 1.146105072036997e-06, "loss": 0.2429, "step": 14807 }, { "epoch": 0.85, "grad_norm": 0.35309423242059595, "learning_rate": 1.1452401802772884e-06, "loss": 0.2779, "step": 14808 }, { "epoch": 0.85, "grad_norm": 1.1765512888066414, "learning_rate": 1.144375595157532e-06, "loss": 0.7295, "step": 14809 }, { "epoch": 0.85, "grad_norm": 0.26690693768113766, "learning_rate": 1.143511316707665e-06, "loss": 0.2349, "step": 14810 }, { "epoch": 0.85, "grad_norm": 0.22741761840826571, "learning_rate": 1.1426473449576225e-06, "loss": 0.1552, "step": 14811 }, { "epoch": 0.85, "grad_norm": 0.8487452061726126, "learning_rate": 1.1417836799373205e-06, "loss": 0.3599, "step": 14812 }, { "epoch": 0.85, "grad_norm": 0.7051579592509625, "learning_rate": 1.1409203216766706e-06, "loss": 0.4184, "step": 14813 }, { "epoch": 0.85, "grad_norm": 0.24162397410920866, "learning_rate": 1.1400572702055657e-06, "loss": 0.1935, "step": 14814 }, { "epoch": 0.85, "grad_norm": 0.45583372486177876, "learning_rate": 1.1391945255538994e-06, "loss": 0.3813, "step": 14815 }, { "epoch": 0.85, "grad_norm": 0.4161358729060462, "learning_rate": 1.1383320877515446e-06, "loss": 0.2391, "step": 14816 }, { "epoch": 0.85, "grad_norm": 0.26035907038958234, "learning_rate": 1.1374699568283698e-06, "loss": 0.1717, "step": 14817 }, { "epoch": 0.85, "grad_norm": 0.32519859938343304, "learning_rate": 1.1366081328142264e-06, "loss": 0.2887, "step": 14818 }, { "epoch": 0.85, "grad_norm": 0.6847648618208944, "learning_rate": 1.135746615738965e-06, "loss": 0.3494, "step": 14819 }, { "epoch": 0.85, "grad_norm": 0.32858251668966476, "learning_rate": 1.1348854056324166e-06, "loss": 0.212, "step": 14820 }, { "epoch": 0.85, "grad_norm": 1.1507405287489314, "learning_rate": 1.1340245025244045e-06, "loss": 0.7616, "step": 14821 }, { "epoch": 0.85, "grad_norm": 0.36743606071499174, "learning_rate": 1.133163906444742e-06, "loss": 0.3013, "step": 14822 }, { "epoch": 0.85, "grad_norm": 0.3275912685999875, "learning_rate": 1.132303617423236e-06, "loss": 0.2387, "step": 14823 }, { "epoch": 0.85, "grad_norm": 0.34411631210529814, "learning_rate": 1.131443635489672e-06, "loss": 0.1776, "step": 14824 }, { "epoch": 0.85, "grad_norm": 1.4080656699672398, "learning_rate": 1.1305839606738334e-06, "loss": 0.717, "step": 14825 }, { "epoch": 0.85, "grad_norm": 0.25755544714127737, "learning_rate": 1.129724593005489e-06, "loss": 0.2429, "step": 14826 }, { "epoch": 0.85, "grad_norm": 0.4605922701134536, "learning_rate": 1.1288655325144027e-06, "loss": 0.3024, "step": 14827 }, { "epoch": 0.85, "grad_norm": 0.8821629142995729, "learning_rate": 1.1280067792303218e-06, "loss": 0.4034, "step": 14828 }, { "epoch": 0.85, "grad_norm": 0.3203927459270146, "learning_rate": 1.1271483331829835e-06, "loss": 0.2634, "step": 14829 }, { "epoch": 0.85, "grad_norm": 0.33166642821629216, "learning_rate": 1.1262901944021165e-06, "loss": 0.245, "step": 14830 }, { "epoch": 0.85, "grad_norm": 0.3227943214683181, "learning_rate": 1.125432362917439e-06, "loss": 0.1672, "step": 14831 }, { "epoch": 0.85, "grad_norm": 0.31125204319825606, "learning_rate": 1.1245748387586575e-06, "loss": 0.2647, "step": 14832 }, { "epoch": 0.85, "grad_norm": 0.8780581738774083, "learning_rate": 1.123717621955468e-06, "loss": 0.364, "step": 14833 }, { "epoch": 0.85, "grad_norm": 0.3650475322050246, "learning_rate": 1.1228607125375534e-06, "loss": 0.2866, "step": 14834 }, { "epoch": 0.85, "grad_norm": 0.40110708736483236, "learning_rate": 1.1220041105345935e-06, "loss": 0.2838, "step": 14835 }, { "epoch": 0.85, "grad_norm": 0.560535169035649, "learning_rate": 1.121147815976248e-06, "loss": 0.3457, "step": 14836 }, { "epoch": 0.85, "grad_norm": 0.20911265248523273, "learning_rate": 1.1202918288921727e-06, "loss": 0.143, "step": 14837 }, { "epoch": 0.85, "grad_norm": 0.3534204337682163, "learning_rate": 1.1194361493120099e-06, "loss": 0.268, "step": 14838 }, { "epoch": 0.85, "grad_norm": 0.47013886187275117, "learning_rate": 1.118580777265388e-06, "loss": 0.3605, "step": 14839 }, { "epoch": 0.85, "grad_norm": 0.7626126324582352, "learning_rate": 1.1177257127819353e-06, "loss": 0.3003, "step": 14840 }, { "epoch": 0.85, "grad_norm": 0.31851659287056955, "learning_rate": 1.1168709558912583e-06, "loss": 0.2653, "step": 14841 }, { "epoch": 0.85, "grad_norm": 0.3898838568972243, "learning_rate": 1.116016506622959e-06, "loss": 0.2784, "step": 14842 }, { "epoch": 0.85, "grad_norm": 0.15368938464097603, "learning_rate": 1.1151623650066224e-06, "loss": 0.0877, "step": 14843 }, { "epoch": 0.85, "grad_norm": 0.30142584311757015, "learning_rate": 1.114308531071835e-06, "loss": 0.238, "step": 14844 }, { "epoch": 0.85, "grad_norm": 1.2307746884376518, "learning_rate": 1.1134550048481596e-06, "loss": 0.5617, "step": 14845 }, { "epoch": 0.85, "grad_norm": 0.3237698295002861, "learning_rate": 1.1126017863651562e-06, "loss": 0.2563, "step": 14846 }, { "epoch": 0.85, "grad_norm": 0.31133573341743925, "learning_rate": 1.1117488756523677e-06, "loss": 0.2625, "step": 14847 }, { "epoch": 0.85, "grad_norm": 1.3573196658449307, "learning_rate": 1.1108962727393368e-06, "loss": 0.5345, "step": 14848 }, { "epoch": 0.85, "grad_norm": 0.27633793321812017, "learning_rate": 1.110043977655585e-06, "loss": 0.1962, "step": 14849 }, { "epoch": 0.85, "grad_norm": 0.23377373058249695, "learning_rate": 1.109191990430628e-06, "loss": 0.2015, "step": 14850 }, { "epoch": 0.85, "grad_norm": 1.2731592859606273, "learning_rate": 1.1083403110939695e-06, "loss": 0.7351, "step": 14851 }, { "epoch": 0.85, "grad_norm": 0.635556152341861, "learning_rate": 1.107488939675102e-06, "loss": 0.374, "step": 14852 }, { "epoch": 0.85, "grad_norm": 0.3848628568892182, "learning_rate": 1.1066378762035125e-06, "loss": 0.1684, "step": 14853 }, { "epoch": 0.85, "grad_norm": 0.3072768771702361, "learning_rate": 1.1057871207086713e-06, "loss": 0.2661, "step": 14854 }, { "epoch": 0.85, "grad_norm": 0.3946099595301403, "learning_rate": 1.1049366732200383e-06, "loss": 0.2413, "step": 14855 }, { "epoch": 0.85, "grad_norm": 0.3380880546143608, "learning_rate": 1.104086533767067e-06, "loss": 0.1868, "step": 14856 }, { "epoch": 0.85, "grad_norm": 0.4442492965914318, "learning_rate": 1.1032367023791957e-06, "loss": 0.3175, "step": 14857 }, { "epoch": 0.85, "grad_norm": 0.2528655784441553, "learning_rate": 1.1023871790858553e-06, "loss": 0.2477, "step": 14858 }, { "epoch": 0.85, "grad_norm": 0.28372712548736984, "learning_rate": 1.1015379639164625e-06, "loss": 0.1826, "step": 14859 }, { "epoch": 0.85, "grad_norm": 1.1834437417520254, "learning_rate": 1.100689056900429e-06, "loss": 0.5269, "step": 14860 }, { "epoch": 0.85, "grad_norm": 0.3866428294105834, "learning_rate": 1.0998404580671507e-06, "loss": 0.2489, "step": 14861 }, { "epoch": 0.85, "grad_norm": 0.25646265297625703, "learning_rate": 1.0989921674460146e-06, "loss": 0.2444, "step": 14862 }, { "epoch": 0.85, "grad_norm": 0.44426050608931333, "learning_rate": 1.0981441850663976e-06, "loss": 0.263, "step": 14863 }, { "epoch": 0.85, "grad_norm": 0.43875055594403534, "learning_rate": 1.0972965109576628e-06, "loss": 0.25, "step": 14864 }, { "epoch": 0.85, "grad_norm": 0.4141839426559247, "learning_rate": 1.0964491451491677e-06, "loss": 0.2651, "step": 14865 }, { "epoch": 0.85, "grad_norm": 0.2750628065825932, "learning_rate": 1.0956020876702567e-06, "loss": 0.2278, "step": 14866 }, { "epoch": 0.85, "grad_norm": 1.0146697997317078, "learning_rate": 1.094755338550263e-06, "loss": 0.5553, "step": 14867 }, { "epoch": 0.85, "grad_norm": 0.3163524879755541, "learning_rate": 1.0939088978185053e-06, "loss": 0.2456, "step": 14868 }, { "epoch": 0.85, "grad_norm": 0.6128769729330104, "learning_rate": 1.0930627655043036e-06, "loss": 0.2857, "step": 14869 }, { "epoch": 0.85, "grad_norm": 0.23060783739572102, "learning_rate": 1.0922169416369531e-06, "loss": 0.2126, "step": 14870 }, { "epoch": 0.85, "grad_norm": 0.6672465753746528, "learning_rate": 1.0913714262457486e-06, "loss": 0.3115, "step": 14871 }, { "epoch": 0.85, "grad_norm": 0.3828226055546082, "learning_rate": 1.0905262193599665e-06, "loss": 0.3032, "step": 14872 }, { "epoch": 0.85, "grad_norm": 0.30441911790214365, "learning_rate": 1.0896813210088797e-06, "loss": 0.2394, "step": 14873 }, { "epoch": 0.85, "grad_norm": 0.6542472129796099, "learning_rate": 1.0888367312217452e-06, "loss": 0.2881, "step": 14874 }, { "epoch": 0.85, "grad_norm": 0.37036987367157137, "learning_rate": 1.0879924500278116e-06, "loss": 0.2922, "step": 14875 }, { "epoch": 0.85, "grad_norm": 0.2711333428099278, "learning_rate": 1.087148477456317e-06, "loss": 0.1101, "step": 14876 }, { "epoch": 0.85, "grad_norm": 0.3898315605171779, "learning_rate": 1.0863048135364851e-06, "loss": 0.2556, "step": 14877 }, { "epoch": 0.85, "grad_norm": 0.32269214206454216, "learning_rate": 1.0854614582975353e-06, "loss": 0.2821, "step": 14878 }, { "epoch": 0.85, "grad_norm": 1.4122935758067834, "learning_rate": 1.084618411768673e-06, "loss": 0.3141, "step": 14879 }, { "epoch": 0.85, "grad_norm": 0.40451562657458473, "learning_rate": 1.0837756739790916e-06, "loss": 0.269, "step": 14880 }, { "epoch": 0.85, "grad_norm": 0.36283485462304216, "learning_rate": 1.0829332449579732e-06, "loss": 0.322, "step": 14881 }, { "epoch": 0.86, "grad_norm": 0.28832612026905335, "learning_rate": 1.0820911247344944e-06, "loss": 0.1806, "step": 14882 }, { "epoch": 0.86, "grad_norm": 0.26597397855279037, "learning_rate": 1.0812493133378166e-06, "loss": 0.1913, "step": 14883 }, { "epoch": 0.86, "grad_norm": 0.5791713121666998, "learning_rate": 1.0804078107970917e-06, "loss": 0.3546, "step": 14884 }, { "epoch": 0.86, "grad_norm": 0.5003590603484814, "learning_rate": 1.0795666171414597e-06, "loss": 0.3228, "step": 14885 }, { "epoch": 0.86, "grad_norm": 0.2956405417591853, "learning_rate": 1.0787257324000533e-06, "loss": 0.2204, "step": 14886 }, { "epoch": 0.86, "grad_norm": 0.5080100270148686, "learning_rate": 1.077885156601991e-06, "loss": 0.3585, "step": 14887 }, { "epoch": 0.86, "grad_norm": 0.49071658954089276, "learning_rate": 1.0770448897763818e-06, "loss": 0.2817, "step": 14888 }, { "epoch": 0.86, "grad_norm": 0.15267670632605457, "learning_rate": 1.0762049319523248e-06, "loss": 0.0704, "step": 14889 }, { "epoch": 0.86, "grad_norm": 0.2648087586985502, "learning_rate": 1.075365283158908e-06, "loss": 0.2688, "step": 14890 }, { "epoch": 0.86, "grad_norm": 0.6311471408354534, "learning_rate": 1.0745259434252065e-06, "loss": 0.3905, "step": 14891 }, { "epoch": 0.86, "grad_norm": 0.671787505272611, "learning_rate": 1.0736869127802884e-06, "loss": 0.1974, "step": 14892 }, { "epoch": 0.86, "grad_norm": 0.2957218483145482, "learning_rate": 1.0728481912532062e-06, "loss": 0.2763, "step": 14893 }, { "epoch": 0.86, "grad_norm": 0.32442168752672784, "learning_rate": 1.07200977887301e-06, "loss": 0.2365, "step": 14894 }, { "epoch": 0.86, "grad_norm": 0.3205645128811862, "learning_rate": 1.0711716756687307e-06, "loss": 0.0799, "step": 14895 }, { "epoch": 0.86, "grad_norm": 0.40600771972047073, "learning_rate": 1.070333881669392e-06, "loss": 0.2881, "step": 14896 }, { "epoch": 0.86, "grad_norm": 0.4958254382676697, "learning_rate": 1.0694963969040062e-06, "loss": 0.3264, "step": 14897 }, { "epoch": 0.86, "grad_norm": 0.40568174234009224, "learning_rate": 1.0686592214015766e-06, "loss": 0.2968, "step": 14898 }, { "epoch": 0.86, "grad_norm": 0.32870509799899306, "learning_rate": 1.067822355191095e-06, "loss": 0.235, "step": 14899 }, { "epoch": 0.86, "grad_norm": 1.0628361533545403, "learning_rate": 1.0669857983015408e-06, "loss": 0.4543, "step": 14900 }, { "epoch": 0.86, "grad_norm": 0.20065123792574188, "learning_rate": 1.0661495507618845e-06, "loss": 0.1679, "step": 14901 }, { "epoch": 0.86, "grad_norm": 0.29083032401850334, "learning_rate": 1.0653136126010832e-06, "loss": 0.223, "step": 14902 }, { "epoch": 0.86, "grad_norm": 0.5620063770534373, "learning_rate": 1.06447798384809e-06, "loss": 0.3486, "step": 14903 }, { "epoch": 0.86, "grad_norm": 0.554067422855577, "learning_rate": 1.0636426645318387e-06, "loss": 0.3698, "step": 14904 }, { "epoch": 0.86, "grad_norm": 0.28406674047779545, "learning_rate": 1.0628076546812583e-06, "loss": 0.2318, "step": 14905 }, { "epoch": 0.86, "grad_norm": 0.5269874383643349, "learning_rate": 1.0619729543252622e-06, "loss": 0.3355, "step": 14906 }, { "epoch": 0.86, "grad_norm": 0.24121729656072172, "learning_rate": 1.0611385634927607e-06, "loss": 0.1645, "step": 14907 }, { "epoch": 0.86, "grad_norm": 0.35797358852460026, "learning_rate": 1.0603044822126463e-06, "loss": 0.2266, "step": 14908 }, { "epoch": 0.86, "grad_norm": 0.3378453061765035, "learning_rate": 1.0594707105138024e-06, "loss": 0.2897, "step": 14909 }, { "epoch": 0.86, "grad_norm": 0.8746212764884232, "learning_rate": 1.0586372484251018e-06, "loss": 0.391, "step": 14910 }, { "epoch": 0.86, "grad_norm": 0.32721325459226824, "learning_rate": 1.057804095975411e-06, "loss": 0.2678, "step": 14911 }, { "epoch": 0.86, "grad_norm": 1.5419081597400264, "learning_rate": 1.0569712531935805e-06, "loss": 0.277, "step": 14912 }, { "epoch": 0.86, "grad_norm": 0.2279118449162286, "learning_rate": 1.0561387201084494e-06, "loss": 0.206, "step": 14913 }, { "epoch": 0.86, "grad_norm": 0.288549030985663, "learning_rate": 1.0553064967488514e-06, "loss": 0.2256, "step": 14914 }, { "epoch": 0.86, "grad_norm": 0.6606990633306803, "learning_rate": 1.054474583143602e-06, "loss": 0.2891, "step": 14915 }, { "epoch": 0.86, "grad_norm": 0.6655593582916631, "learning_rate": 1.0536429793215152e-06, "loss": 0.399, "step": 14916 }, { "epoch": 0.86, "grad_norm": 0.24780553816203488, "learning_rate": 1.0528116853113867e-06, "loss": 0.2537, "step": 14917 }, { "epoch": 0.86, "grad_norm": 1.4479855245872628, "learning_rate": 1.0519807011420057e-06, "loss": 0.2714, "step": 14918 }, { "epoch": 0.86, "grad_norm": 0.6947298411391986, "learning_rate": 1.051150026842146e-06, "loss": 0.3632, "step": 14919 }, { "epoch": 0.86, "grad_norm": 0.3823419776922345, "learning_rate": 1.0503196624405775e-06, "loss": 0.259, "step": 14920 }, { "epoch": 0.86, "grad_norm": 0.2174358692451919, "learning_rate": 1.0494896079660554e-06, "loss": 0.1818, "step": 14921 }, { "epoch": 0.86, "grad_norm": 0.6206679548783893, "learning_rate": 1.0486598634473221e-06, "loss": 0.3312, "step": 14922 }, { "epoch": 0.86, "grad_norm": 0.40084767726771736, "learning_rate": 1.0478304289131115e-06, "loss": 0.3024, "step": 14923 }, { "epoch": 0.86, "grad_norm": 1.3130018162299788, "learning_rate": 1.0470013043921523e-06, "loss": 0.6565, "step": 14924 }, { "epoch": 0.86, "grad_norm": 0.317664336895266, "learning_rate": 1.046172489913151e-06, "loss": 0.237, "step": 14925 }, { "epoch": 0.86, "grad_norm": 0.3425571725675746, "learning_rate": 1.0453439855048108e-06, "loss": 0.2797, "step": 14926 }, { "epoch": 0.86, "grad_norm": 0.30894367529548855, "learning_rate": 1.0445157911958214e-06, "loss": 0.1792, "step": 14927 }, { "epoch": 0.86, "grad_norm": 1.014875460132905, "learning_rate": 1.0436879070148675e-06, "loss": 0.5139, "step": 14928 }, { "epoch": 0.86, "grad_norm": 0.26322226528383447, "learning_rate": 1.042860332990615e-06, "loss": 0.2515, "step": 14929 }, { "epoch": 0.86, "grad_norm": 0.5905553635015627, "learning_rate": 1.0420330691517256e-06, "loss": 0.3113, "step": 14930 }, { "epoch": 0.86, "grad_norm": 0.5689957343239428, "learning_rate": 1.0412061155268428e-06, "loss": 0.2489, "step": 14931 }, { "epoch": 0.86, "grad_norm": 0.3543218393322141, "learning_rate": 1.0403794721446092e-06, "loss": 0.257, "step": 14932 }, { "epoch": 0.86, "grad_norm": 0.386805524160091, "learning_rate": 1.03955313903365e-06, "loss": 0.3118, "step": 14933 }, { "epoch": 0.86, "grad_norm": 0.18668486006155857, "learning_rate": 1.0387271162225787e-06, "loss": 0.1076, "step": 14934 }, { "epoch": 0.86, "grad_norm": 0.39926953655521297, "learning_rate": 1.0379014037400014e-06, "loss": 0.2844, "step": 14935 }, { "epoch": 0.86, "grad_norm": 0.8440325835444368, "learning_rate": 1.0370760016145142e-06, "loss": 0.4694, "step": 14936 }, { "epoch": 0.86, "grad_norm": 0.3958906252639677, "learning_rate": 1.036250909874701e-06, "loss": 0.3087, "step": 14937 }, { "epoch": 0.86, "grad_norm": 0.2876575469257746, "learning_rate": 1.0354261285491319e-06, "loss": 0.188, "step": 14938 }, { "epoch": 0.86, "grad_norm": 0.24698100951360044, "learning_rate": 1.0346016576663686e-06, "loss": 0.1631, "step": 14939 }, { "epoch": 0.86, "grad_norm": 0.8047880203921186, "learning_rate": 1.0337774972549675e-06, "loss": 0.5828, "step": 14940 }, { "epoch": 0.86, "grad_norm": 0.2618812698548808, "learning_rate": 1.0329536473434653e-06, "loss": 0.2086, "step": 14941 }, { "epoch": 0.86, "grad_norm": 0.513815456683671, "learning_rate": 1.032130107960393e-06, "loss": 0.3363, "step": 14942 }, { "epoch": 0.86, "grad_norm": 0.6800722467991159, "learning_rate": 1.0313068791342683e-06, "loss": 0.3421, "step": 14943 }, { "epoch": 0.86, "grad_norm": 0.2965021973445377, "learning_rate": 1.0304839608936002e-06, "loss": 0.197, "step": 14944 }, { "epoch": 0.86, "grad_norm": 0.28748538312306504, "learning_rate": 1.0296613532668875e-06, "loss": 0.2301, "step": 14945 }, { "epoch": 0.86, "grad_norm": 1.1561225785911142, "learning_rate": 1.0288390562826178e-06, "loss": 0.697, "step": 14946 }, { "epoch": 0.86, "grad_norm": 0.22153187673332958, "learning_rate": 1.0280170699692648e-06, "loss": 0.1684, "step": 14947 }, { "epoch": 0.86, "grad_norm": 0.4287418102379653, "learning_rate": 1.0271953943552938e-06, "loss": 0.3314, "step": 14948 }, { "epoch": 0.86, "grad_norm": 0.5123710533271378, "learning_rate": 1.0263740294691615e-06, "loss": 0.3298, "step": 14949 }, { "epoch": 0.86, "grad_norm": 0.31713937270697534, "learning_rate": 1.0255529753393112e-06, "loss": 0.2379, "step": 14950 }, { "epoch": 0.86, "grad_norm": 1.6366738751403234, "learning_rate": 1.0247322319941745e-06, "loss": 0.2046, "step": 14951 }, { "epoch": 0.86, "grad_norm": 0.45705670046593916, "learning_rate": 1.023911799462174e-06, "loss": 0.3897, "step": 14952 }, { "epoch": 0.86, "grad_norm": 0.27150260711708984, "learning_rate": 1.0230916777717226e-06, "loss": 0.2514, "step": 14953 }, { "epoch": 0.86, "grad_norm": 0.5578410636590693, "learning_rate": 1.0222718669512211e-06, "loss": 0.2062, "step": 14954 }, { "epoch": 0.86, "grad_norm": 0.5180872333426131, "learning_rate": 1.0214523670290587e-06, "loss": 0.2397, "step": 14955 }, { "epoch": 0.86, "grad_norm": 0.33763059935600714, "learning_rate": 1.0206331780336154e-06, "loss": 0.255, "step": 14956 }, { "epoch": 0.86, "grad_norm": 0.34511369249002094, "learning_rate": 1.0198142999932559e-06, "loss": 0.2375, "step": 14957 }, { "epoch": 0.86, "grad_norm": 0.8756372942785727, "learning_rate": 1.0189957329363465e-06, "loss": 0.5402, "step": 14958 }, { "epoch": 0.86, "grad_norm": 0.40461371137970736, "learning_rate": 1.0181774768912255e-06, "loss": 0.2893, "step": 14959 }, { "epoch": 0.86, "grad_norm": 0.38107539894620857, "learning_rate": 1.0173595318862305e-06, "loss": 0.2672, "step": 14960 }, { "epoch": 0.86, "grad_norm": 0.22422982508923442, "learning_rate": 1.01654189794969e-06, "loss": 0.1835, "step": 14961 }, { "epoch": 0.86, "grad_norm": 0.33600638655085446, "learning_rate": 1.0157245751099188e-06, "loss": 0.2601, "step": 14962 }, { "epoch": 0.86, "grad_norm": 1.4870843447136277, "learning_rate": 1.0149075633952178e-06, "loss": 0.5065, "step": 14963 }, { "epoch": 0.86, "grad_norm": 0.4281829927047752, "learning_rate": 1.0140908628338796e-06, "loss": 0.2577, "step": 14964 }, { "epoch": 0.86, "grad_norm": 0.2609028341284687, "learning_rate": 1.013274473454191e-06, "loss": 0.2483, "step": 14965 }, { "epoch": 0.86, "grad_norm": 0.6522575513963987, "learning_rate": 1.0124583952844214e-06, "loss": 0.3881, "step": 14966 }, { "epoch": 0.86, "grad_norm": 0.2859295010336732, "learning_rate": 1.0116426283528301e-06, "loss": 0.0838, "step": 14967 }, { "epoch": 0.86, "grad_norm": 0.32065626081543513, "learning_rate": 1.0108271726876684e-06, "loss": 0.239, "step": 14968 }, { "epoch": 0.86, "grad_norm": 0.34553309967875206, "learning_rate": 1.0100120283171733e-06, "loss": 0.2821, "step": 14969 }, { "epoch": 0.86, "grad_norm": 0.7021634157856494, "learning_rate": 1.0091971952695768e-06, "loss": 0.3325, "step": 14970 }, { "epoch": 0.86, "grad_norm": 0.3073095774730255, "learning_rate": 1.008382673573095e-06, "loss": 0.2581, "step": 14971 }, { "epoch": 0.86, "grad_norm": 0.8590640171628728, "learning_rate": 1.0075684632559346e-06, "loss": 0.4479, "step": 14972 }, { "epoch": 0.86, "grad_norm": 0.22537341360656152, "learning_rate": 1.0067545643462895e-06, "loss": 0.2082, "step": 14973 }, { "epoch": 0.86, "grad_norm": 0.3298064748892064, "learning_rate": 1.0059409768723495e-06, "loss": 0.1858, "step": 14974 }, { "epoch": 0.86, "grad_norm": 1.147862075159621, "learning_rate": 1.0051277008622861e-06, "loss": 0.5509, "step": 14975 }, { "epoch": 0.86, "grad_norm": 0.31479894074709697, "learning_rate": 1.004314736344264e-06, "loss": 0.2775, "step": 14976 }, { "epoch": 0.86, "grad_norm": 0.3955090912581276, "learning_rate": 1.0035020833464338e-06, "loss": 0.1986, "step": 14977 }, { "epoch": 0.86, "grad_norm": 0.5281904876913323, "learning_rate": 1.0026897418969417e-06, "loss": 0.3507, "step": 14978 }, { "epoch": 0.86, "grad_norm": 0.30591248057503395, "learning_rate": 1.0018777120239165e-06, "loss": 0.1526, "step": 14979 }, { "epoch": 0.86, "grad_norm": 0.37828916984177724, "learning_rate": 1.0010659937554789e-06, "loss": 0.1737, "step": 14980 }, { "epoch": 0.86, "grad_norm": 0.31146064451236316, "learning_rate": 1.00025458711974e-06, "loss": 0.2923, "step": 14981 }, { "epoch": 0.86, "grad_norm": 0.7401631176839336, "learning_rate": 9.99443492144795e-07, "loss": 0.3723, "step": 14982 }, { "epoch": 0.86, "grad_norm": 0.29215619453143027, "learning_rate": 9.986327088587378e-07, "loss": 0.2067, "step": 14983 }, { "epoch": 0.86, "grad_norm": 0.34927543617015566, "learning_rate": 9.978222372896417e-07, "loss": 0.3224, "step": 14984 }, { "epoch": 0.86, "grad_norm": 0.42741603016621244, "learning_rate": 9.970120774655744e-07, "loss": 0.2293, "step": 14985 }, { "epoch": 0.86, "grad_norm": 0.31526568971659064, "learning_rate": 9.9620222941459e-07, "loss": 0.2038, "step": 14986 }, { "epoch": 0.86, "grad_norm": 0.5250602102861858, "learning_rate": 9.953926931647372e-07, "loss": 0.2446, "step": 14987 }, { "epoch": 0.86, "grad_norm": 0.3332569485498697, "learning_rate": 9.945834687440491e-07, "loss": 0.297, "step": 14988 }, { "epoch": 0.86, "grad_norm": 0.40668710547493864, "learning_rate": 9.937745561805478e-07, "loss": 0.2977, "step": 14989 }, { "epoch": 0.86, "grad_norm": 0.5698087862797053, "learning_rate": 9.92965955502244e-07, "loss": 0.2558, "step": 14990 }, { "epoch": 0.86, "grad_norm": 0.252459910213438, "learning_rate": 9.921576667371458e-07, "loss": 0.1633, "step": 14991 }, { "epoch": 0.86, "grad_norm": 0.3259181171702919, "learning_rate": 9.91349689913238e-07, "loss": 0.2698, "step": 14992 }, { "epoch": 0.86, "grad_norm": 0.33737889420877937, "learning_rate": 9.90542025058503e-07, "loss": 0.2462, "step": 14993 }, { "epoch": 0.86, "grad_norm": 0.6389429058168169, "learning_rate": 9.897346722009095e-07, "loss": 0.3715, "step": 14994 }, { "epoch": 0.86, "grad_norm": 0.6251259359040213, "learning_rate": 9.889276313684171e-07, "loss": 0.3601, "step": 14995 }, { "epoch": 0.86, "grad_norm": 0.2480426106504089, "learning_rate": 9.88120902588975e-07, "loss": 0.2357, "step": 14996 }, { "epoch": 0.86, "grad_norm": 0.3518028245212191, "learning_rate": 9.87314485890517e-07, "loss": 0.1729, "step": 14997 }, { "epoch": 0.86, "grad_norm": 0.6043858429194576, "learning_rate": 9.8650838130097e-07, "loss": 0.3186, "step": 14998 }, { "epoch": 0.86, "grad_norm": 0.37360874956552786, "learning_rate": 9.857025888482518e-07, "loss": 0.2994, "step": 14999 }, { "epoch": 0.86, "grad_norm": 0.33171762764892554, "learning_rate": 9.848971085602655e-07, "loss": 0.2454, "step": 15000 }, { "epoch": 0.86, "grad_norm": 0.620980650429492, "learning_rate": 9.84091940464904e-07, "loss": 0.2839, "step": 15001 }, { "epoch": 0.86, "grad_norm": 0.4069755655302794, "learning_rate": 9.832870845900488e-07, "loss": 0.3088, "step": 15002 }, { "epoch": 0.86, "grad_norm": 0.17326883779583802, "learning_rate": 9.824825409635763e-07, "loss": 0.0697, "step": 15003 }, { "epoch": 0.86, "grad_norm": 0.3107041090929514, "learning_rate": 9.816783096133463e-07, "loss": 0.2584, "step": 15004 }, { "epoch": 0.86, "grad_norm": 0.354031848651155, "learning_rate": 9.80874390567208e-07, "loss": 0.2983, "step": 15005 }, { "epoch": 0.86, "grad_norm": 0.8439772860687007, "learning_rate": 9.800707838530021e-07, "loss": 0.312, "step": 15006 }, { "epoch": 0.86, "grad_norm": 0.41508853480396724, "learning_rate": 9.792674894985553e-07, "loss": 0.2951, "step": 15007 }, { "epoch": 0.86, "grad_norm": 0.5387336155751442, "learning_rate": 9.78464507531689e-07, "loss": 0.3999, "step": 15008 }, { "epoch": 0.86, "grad_norm": 0.30233725667514955, "learning_rate": 9.776618379802093e-07, "loss": 0.1861, "step": 15009 }, { "epoch": 0.86, "grad_norm": 0.4230746134209038, "learning_rate": 9.768594808719113e-07, "loss": 0.2633, "step": 15010 }, { "epoch": 0.86, "grad_norm": 0.48842838044318104, "learning_rate": 9.76057436234581e-07, "loss": 0.3434, "step": 15011 }, { "epoch": 0.86, "grad_norm": 0.283475291759844, "learning_rate": 9.752557040959943e-07, "loss": 0.2384, "step": 15012 }, { "epoch": 0.86, "grad_norm": 0.32374263109755874, "learning_rate": 9.744542844839145e-07, "loss": 0.1523, "step": 15013 }, { "epoch": 0.86, "grad_norm": 0.3835245373644557, "learning_rate": 9.736531774260948e-07, "loss": 0.3108, "step": 15014 }, { "epoch": 0.86, "grad_norm": 0.8657684258513281, "learning_rate": 9.728523829502768e-07, "loss": 0.365, "step": 15015 }, { "epoch": 0.86, "grad_norm": 0.2924587753221655, "learning_rate": 9.720519010841933e-07, "loss": 0.2123, "step": 15016 }, { "epoch": 0.86, "grad_norm": 0.3928088182505235, "learning_rate": 9.712517318555637e-07, "loss": 0.3112, "step": 15017 }, { "epoch": 0.86, "grad_norm": 0.29042677444468945, "learning_rate": 9.704518752921e-07, "loss": 0.1606, "step": 15018 }, { "epoch": 0.86, "grad_norm": 0.3775481237884491, "learning_rate": 9.696523314214978e-07, "loss": 0.1905, "step": 15019 }, { "epoch": 0.86, "grad_norm": 0.2703635013510745, "learning_rate": 9.688531002714464e-07, "loss": 0.2672, "step": 15020 }, { "epoch": 0.86, "grad_norm": 1.2847216612253773, "learning_rate": 9.680541818696254e-07, "loss": 0.5317, "step": 15021 }, { "epoch": 0.86, "grad_norm": 0.5357320718859065, "learning_rate": 9.672555762436997e-07, "loss": 0.2516, "step": 15022 }, { "epoch": 0.86, "grad_norm": 0.29402400287893316, "learning_rate": 9.66457283421325e-07, "loss": 0.2488, "step": 15023 }, { "epoch": 0.86, "grad_norm": 0.2730938089664483, "learning_rate": 9.656593034301432e-07, "loss": 0.2212, "step": 15024 }, { "epoch": 0.86, "grad_norm": 0.5329117555031294, "learning_rate": 9.648616362977959e-07, "loss": 0.3298, "step": 15025 }, { "epoch": 0.86, "grad_norm": 0.3581964345211006, "learning_rate": 9.640642820518997e-07, "loss": 0.2328, "step": 15026 }, { "epoch": 0.86, "grad_norm": 0.6362827691871921, "learning_rate": 9.63267240720067e-07, "loss": 0.3489, "step": 15027 }, { "epoch": 0.86, "grad_norm": 0.29267465950559424, "learning_rate": 9.62470512329904e-07, "loss": 0.2516, "step": 15028 }, { "epoch": 0.86, "grad_norm": 0.36347141522829296, "learning_rate": 9.616740969089967e-07, "loss": 0.2125, "step": 15029 }, { "epoch": 0.86, "grad_norm": 0.2846777407910682, "learning_rate": 9.608779944849278e-07, "loss": 0.1992, "step": 15030 }, { "epoch": 0.86, "grad_norm": 1.2644418677329352, "learning_rate": 9.600822050852654e-07, "loss": 0.7545, "step": 15031 }, { "epoch": 0.86, "grad_norm": 0.21804919025258576, "learning_rate": 9.592867287375652e-07, "loss": 0.2162, "step": 15032 }, { "epoch": 0.86, "grad_norm": 0.593650145047429, "learning_rate": 9.584915654693782e-07, "loss": 0.3514, "step": 15033 }, { "epoch": 0.86, "grad_norm": 0.9895576760248582, "learning_rate": 9.576967153082406e-07, "loss": 0.4155, "step": 15034 }, { "epoch": 0.86, "grad_norm": 0.23430158277619348, "learning_rate": 9.569021782816767e-07, "loss": 0.1612, "step": 15035 }, { "epoch": 0.86, "grad_norm": 0.27556174932560695, "learning_rate": 9.561079544171992e-07, "loss": 0.2646, "step": 15036 }, { "epoch": 0.86, "grad_norm": 0.808225331928117, "learning_rate": 9.553140437423157e-07, "loss": 0.5553, "step": 15037 }, { "epoch": 0.86, "grad_norm": 0.37205200444797243, "learning_rate": 9.545204462845192e-07, "loss": 0.2542, "step": 15038 }, { "epoch": 0.86, "grad_norm": 0.6746390579793102, "learning_rate": 9.537271620712896e-07, "loss": 0.2945, "step": 15039 }, { "epoch": 0.86, "grad_norm": 0.3469141740881783, "learning_rate": 9.529341911300982e-07, "loss": 0.2678, "step": 15040 }, { "epoch": 0.86, "grad_norm": 0.3863948928230087, "learning_rate": 9.521415334884088e-07, "loss": 0.2668, "step": 15041 }, { "epoch": 0.86, "grad_norm": 0.34364358012632695, "learning_rate": 9.513491891736681e-07, "loss": 0.126, "step": 15042 }, { "epoch": 0.86, "grad_norm": 0.26545776152588485, "learning_rate": 9.505571582133166e-07, "loss": 0.2173, "step": 15043 }, { "epoch": 0.86, "grad_norm": 0.34794469679286133, "learning_rate": 9.497654406347812e-07, "loss": 0.2684, "step": 15044 }, { "epoch": 0.86, "grad_norm": 0.4445266623529254, "learning_rate": 9.489740364654776e-07, "loss": 0.2681, "step": 15045 }, { "epoch": 0.86, "grad_norm": 0.6025872062236165, "learning_rate": 9.481829457328162e-07, "loss": 0.3286, "step": 15046 }, { "epoch": 0.86, "grad_norm": 0.37119355184782665, "learning_rate": 9.473921684641896e-07, "loss": 0.2974, "step": 15047 }, { "epoch": 0.86, "grad_norm": 0.3547465998055965, "learning_rate": 9.466017046869835e-07, "loss": 0.3058, "step": 15048 }, { "epoch": 0.86, "grad_norm": 0.8409961481560209, "learning_rate": 9.458115544285684e-07, "loss": 0.3469, "step": 15049 }, { "epoch": 0.86, "grad_norm": 0.32572567903383914, "learning_rate": 9.450217177163123e-07, "loss": 0.2468, "step": 15050 }, { "epoch": 0.86, "grad_norm": 0.2476762696013451, "learning_rate": 9.442321945775646e-07, "loss": 0.206, "step": 15051 }, { "epoch": 0.86, "grad_norm": 0.638595287493113, "learning_rate": 9.434429850396665e-07, "loss": 0.2482, "step": 15052 }, { "epoch": 0.86, "grad_norm": 0.33552988930315564, "learning_rate": 9.426540891299463e-07, "loss": 0.254, "step": 15053 }, { "epoch": 0.86, "grad_norm": 1.286349510665451, "learning_rate": 9.418655068757276e-07, "loss": 0.6941, "step": 15054 }, { "epoch": 0.86, "grad_norm": 0.39628036912695536, "learning_rate": 9.410772383043176e-07, "loss": 0.2364, "step": 15055 }, { "epoch": 0.87, "grad_norm": 0.2941849588804919, "learning_rate": 9.402892834430122e-07, "loss": 0.2882, "step": 15056 }, { "epoch": 0.87, "grad_norm": 0.42903190992151646, "learning_rate": 9.395016423190984e-07, "loss": 0.2431, "step": 15057 }, { "epoch": 0.87, "grad_norm": 0.4266723252802292, "learning_rate": 9.387143149598543e-07, "loss": 0.0998, "step": 15058 }, { "epoch": 0.87, "grad_norm": 0.3951244070626443, "learning_rate": 9.379273013925449e-07, "loss": 0.2723, "step": 15059 }, { "epoch": 0.87, "grad_norm": 0.3569081459857391, "learning_rate": 9.371406016444229e-07, "loss": 0.3066, "step": 15060 }, { "epoch": 0.87, "grad_norm": 0.7314097994167829, "learning_rate": 9.363542157427297e-07, "loss": 0.3921, "step": 15061 }, { "epoch": 0.87, "grad_norm": 0.2963625225107329, "learning_rate": 9.355681437147024e-07, "loss": 0.1872, "step": 15062 }, { "epoch": 0.87, "grad_norm": 0.3283347937303761, "learning_rate": 9.347823855875604e-07, "loss": 0.2963, "step": 15063 }, { "epoch": 0.87, "grad_norm": 0.297389999754831, "learning_rate": 9.339969413885142e-07, "loss": 0.2066, "step": 15064 }, { "epoch": 0.87, "grad_norm": 0.35212332109937267, "learning_rate": 9.33211811144763e-07, "loss": 0.1968, "step": 15065 }, { "epoch": 0.87, "grad_norm": 0.8605096147432884, "learning_rate": 9.324269948834985e-07, "loss": 0.4698, "step": 15066 }, { "epoch": 0.87, "grad_norm": 0.30120778723725855, "learning_rate": 9.316424926318967e-07, "loss": 0.2745, "step": 15067 }, { "epoch": 0.87, "grad_norm": 0.3371975813552838, "learning_rate": 9.30858304417126e-07, "loss": 0.2051, "step": 15068 }, { "epoch": 0.87, "grad_norm": 0.2826029807524462, "learning_rate": 9.300744302663401e-07, "loss": 0.2156, "step": 15069 }, { "epoch": 0.87, "grad_norm": 1.4581018247895585, "learning_rate": 9.292908702066883e-07, "loss": 0.7467, "step": 15070 }, { "epoch": 0.87, "grad_norm": 0.3579852541537003, "learning_rate": 9.285076242653035e-07, "loss": 0.1911, "step": 15071 }, { "epoch": 0.87, "grad_norm": 0.34615695380006883, "learning_rate": 9.277246924693106e-07, "loss": 0.2995, "step": 15072 }, { "epoch": 0.87, "grad_norm": 0.6649173448975993, "learning_rate": 9.269420748458202e-07, "loss": 0.3692, "step": 15073 }, { "epoch": 0.87, "grad_norm": 0.3270716735112408, "learning_rate": 9.261597714219351e-07, "loss": 0.2589, "step": 15074 }, { "epoch": 0.87, "grad_norm": 0.5415825274888945, "learning_rate": 9.253777822247479e-07, "loss": 0.2645, "step": 15075 }, { "epoch": 0.87, "grad_norm": 0.28196842106823133, "learning_rate": 9.24596107281338e-07, "loss": 0.1962, "step": 15076 }, { "epoch": 0.87, "grad_norm": 0.306635893181428, "learning_rate": 9.238147466187742e-07, "loss": 0.2415, "step": 15077 }, { "epoch": 0.87, "grad_norm": 0.7446253266487983, "learning_rate": 9.230337002641144e-07, "loss": 0.3045, "step": 15078 }, { "epoch": 0.87, "grad_norm": 0.3272308542677074, "learning_rate": 9.222529682444081e-07, "loss": 0.2904, "step": 15079 }, { "epoch": 0.87, "grad_norm": 0.3088128697895234, "learning_rate": 9.214725505866929e-07, "loss": 0.2339, "step": 15080 }, { "epoch": 0.87, "grad_norm": 0.9928672480486425, "learning_rate": 9.206924473179913e-07, "loss": 0.3694, "step": 15081 }, { "epoch": 0.87, "grad_norm": 0.3445131715535893, "learning_rate": 9.199126584653184e-07, "loss": 0.1636, "step": 15082 }, { "epoch": 0.87, "grad_norm": 0.3972284436044029, "learning_rate": 9.191331840556816e-07, "loss": 0.2828, "step": 15083 }, { "epoch": 0.87, "grad_norm": 0.27892382887335904, "learning_rate": 9.183540241160715e-07, "loss": 0.2442, "step": 15084 }, { "epoch": 0.87, "grad_norm": 1.0493720657729202, "learning_rate": 9.175751786734722e-07, "loss": 0.5299, "step": 15085 }, { "epoch": 0.87, "grad_norm": 0.313880941212933, "learning_rate": 9.167966477548529e-07, "loss": 0.2344, "step": 15086 }, { "epoch": 0.87, "grad_norm": 0.5243978675286038, "learning_rate": 9.160184313871745e-07, "loss": 0.3414, "step": 15087 }, { "epoch": 0.87, "grad_norm": 0.23520310415924797, "learning_rate": 9.152405295973877e-07, "loss": 0.1365, "step": 15088 }, { "epoch": 0.87, "grad_norm": 0.315451777087604, "learning_rate": 9.144629424124318e-07, "loss": 0.2393, "step": 15089 }, { "epoch": 0.87, "grad_norm": 0.6343131932703363, "learning_rate": 9.136856698592323e-07, "loss": 0.3756, "step": 15090 }, { "epoch": 0.87, "grad_norm": 0.33787984330910664, "learning_rate": 9.129087119647062e-07, "loss": 0.2509, "step": 15091 }, { "epoch": 0.87, "grad_norm": 0.31430488166281095, "learning_rate": 9.121320687557622e-07, "loss": 0.247, "step": 15092 }, { "epoch": 0.87, "grad_norm": 1.2083642377805037, "learning_rate": 9.113557402592965e-07, "loss": 0.5698, "step": 15093 }, { "epoch": 0.87, "grad_norm": 0.272352156350627, "learning_rate": 9.105797265021865e-07, "loss": 0.123, "step": 15094 }, { "epoch": 0.87, "grad_norm": 0.26727226411819544, "learning_rate": 9.098040275113118e-07, "loss": 0.2503, "step": 15095 }, { "epoch": 0.87, "grad_norm": 0.4165842297184612, "learning_rate": 9.09028643313532e-07, "loss": 0.2908, "step": 15096 }, { "epoch": 0.87, "grad_norm": 0.8381958437662107, "learning_rate": 9.082535739357001e-07, "loss": 0.3653, "step": 15097 }, { "epoch": 0.87, "grad_norm": 0.3056167343849403, "learning_rate": 9.074788194046557e-07, "loss": 0.2505, "step": 15098 }, { "epoch": 0.87, "grad_norm": 0.5242561797322017, "learning_rate": 9.067043797472264e-07, "loss": 0.321, "step": 15099 }, { "epoch": 0.87, "grad_norm": 0.2684495155203418, "learning_rate": 9.05930254990236e-07, "loss": 0.2153, "step": 15100 }, { "epoch": 0.87, "grad_norm": 0.4350688261100347, "learning_rate": 9.0515644516049e-07, "loss": 0.188, "step": 15101 }, { "epoch": 0.87, "grad_norm": 0.5120139042621656, "learning_rate": 9.043829502847845e-07, "loss": 0.343, "step": 15102 }, { "epoch": 0.87, "grad_norm": 0.35141044392890897, "learning_rate": 9.036097703899049e-07, "loss": 0.329, "step": 15103 }, { "epoch": 0.87, "grad_norm": 0.3241126312677885, "learning_rate": 9.028369055026287e-07, "loss": 0.1678, "step": 15104 }, { "epoch": 0.87, "grad_norm": 0.6065691927483036, "learning_rate": 9.020643556497211e-07, "loss": 0.3254, "step": 15105 }, { "epoch": 0.87, "grad_norm": 0.7583614764645674, "learning_rate": 9.012921208579317e-07, "loss": 0.3871, "step": 15106 }, { "epoch": 0.87, "grad_norm": 0.25091863341591597, "learning_rate": 9.005202011540037e-07, "loss": 0.2057, "step": 15107 }, { "epoch": 0.87, "grad_norm": 0.290779990812648, "learning_rate": 8.997485965646724e-07, "loss": 0.2231, "step": 15108 }, { "epoch": 0.87, "grad_norm": 1.2478961440750131, "learning_rate": 8.989773071166552e-07, "loss": 0.8038, "step": 15109 }, { "epoch": 0.87, "grad_norm": 0.2953342710533784, "learning_rate": 8.982063328366631e-07, "loss": 0.1998, "step": 15110 }, { "epoch": 0.87, "grad_norm": 0.5276949035579362, "learning_rate": 8.974356737513934e-07, "loss": 0.3769, "step": 15111 }, { "epoch": 0.87, "grad_norm": 0.4520637612174389, "learning_rate": 8.966653298875339e-07, "loss": 0.3065, "step": 15112 }, { "epoch": 0.87, "grad_norm": 0.3955320368936747, "learning_rate": 8.958953012717641e-07, "loss": 0.308, "step": 15113 }, { "epoch": 0.87, "grad_norm": 0.25203374514690974, "learning_rate": 8.951255879307486e-07, "loss": 0.1288, "step": 15114 }, { "epoch": 0.87, "grad_norm": 0.3478143118831568, "learning_rate": 8.943561898911424e-07, "loss": 0.3261, "step": 15115 }, { "epoch": 0.87, "grad_norm": 0.7966981365506505, "learning_rate": 8.935871071795876e-07, "loss": 0.3132, "step": 15116 }, { "epoch": 0.87, "grad_norm": 0.34943890063486505, "learning_rate": 8.928183398227219e-07, "loss": 0.2148, "step": 15117 }, { "epoch": 0.87, "grad_norm": 0.42402665162699893, "learning_rate": 8.920498878471651e-07, "loss": 0.2931, "step": 15118 }, { "epoch": 0.87, "grad_norm": 0.374210011293103, "learning_rate": 8.912817512795302e-07, "loss": 0.2996, "step": 15119 }, { "epoch": 0.87, "grad_norm": 0.2495954819765893, "learning_rate": 8.905139301464139e-07, "loss": 0.1498, "step": 15120 }, { "epoch": 0.87, "grad_norm": 1.1354208775040406, "learning_rate": 8.897464244744103e-07, "loss": 0.808, "step": 15121 }, { "epoch": 0.87, "grad_norm": 0.691843031359836, "learning_rate": 8.88979234290096e-07, "loss": 0.347, "step": 15122 }, { "epoch": 0.87, "grad_norm": 0.25810167310409116, "learning_rate": 8.882123596200387e-07, "loss": 0.2779, "step": 15123 }, { "epoch": 0.87, "grad_norm": 0.664422180294338, "learning_rate": 8.874458004907971e-07, "loss": 0.2791, "step": 15124 }, { "epoch": 0.87, "grad_norm": 0.5426279817665275, "learning_rate": 8.866795569289122e-07, "loss": 0.266, "step": 15125 }, { "epoch": 0.87, "grad_norm": 0.31902793423548365, "learning_rate": 8.859136289609272e-07, "loss": 0.2559, "step": 15126 }, { "epoch": 0.87, "grad_norm": 0.3112222568220345, "learning_rate": 8.851480166133586e-07, "loss": 0.2472, "step": 15127 }, { "epoch": 0.87, "grad_norm": 0.2615724470027559, "learning_rate": 8.843827199127208e-07, "loss": 0.2101, "step": 15128 }, { "epoch": 0.87, "grad_norm": 0.541148258436055, "learning_rate": 8.836177388855183e-07, "loss": 0.3172, "step": 15129 }, { "epoch": 0.87, "grad_norm": 0.4333991198695846, "learning_rate": 8.82853073558243e-07, "loss": 0.2728, "step": 15130 }, { "epoch": 0.87, "grad_norm": 0.3145467505444975, "learning_rate": 8.820887239573728e-07, "loss": 0.2714, "step": 15131 }, { "epoch": 0.87, "grad_norm": 0.5162011923967506, "learning_rate": 8.813246901093763e-07, "loss": 0.3445, "step": 15132 }, { "epoch": 0.87, "grad_norm": 0.1839520405237534, "learning_rate": 8.80560972040716e-07, "loss": 0.123, "step": 15133 }, { "epoch": 0.87, "grad_norm": 0.5364721309908936, "learning_rate": 8.797975697778361e-07, "loss": 0.3405, "step": 15134 }, { "epoch": 0.87, "grad_norm": 0.2625779164027942, "learning_rate": 8.790344833471753e-07, "loss": 0.2696, "step": 15135 }, { "epoch": 0.87, "grad_norm": 0.7388355872405135, "learning_rate": 8.782717127751572e-07, "loss": 0.3782, "step": 15136 }, { "epoch": 0.87, "grad_norm": 0.7507920836315224, "learning_rate": 8.775092580881961e-07, "loss": 0.1164, "step": 15137 }, { "epoch": 0.87, "grad_norm": 0.3979987615357961, "learning_rate": 8.767471193126987e-07, "loss": 0.2978, "step": 15138 }, { "epoch": 0.87, "grad_norm": 0.3314991768932052, "learning_rate": 8.75985296475057e-07, "loss": 0.2987, "step": 15139 }, { "epoch": 0.87, "grad_norm": 0.3167246892166629, "learning_rate": 8.752237896016513e-07, "loss": 0.1281, "step": 15140 }, { "epoch": 0.87, "grad_norm": 0.37153197907359864, "learning_rate": 8.744625987188516e-07, "loss": 0.3026, "step": 15141 }, { "epoch": 0.87, "grad_norm": 0.50583112778751, "learning_rate": 8.737017238530221e-07, "loss": 0.2881, "step": 15142 }, { "epoch": 0.87, "grad_norm": 0.30099834150968574, "learning_rate": 8.729411650305086e-07, "loss": 0.2031, "step": 15143 }, { "epoch": 0.87, "grad_norm": 0.40016934367057977, "learning_rate": 8.721809222776512e-07, "loss": 0.3229, "step": 15144 }, { "epoch": 0.87, "grad_norm": 1.1087364033476483, "learning_rate": 8.71420995620773e-07, "loss": 0.7046, "step": 15145 }, { "epoch": 0.87, "grad_norm": 0.24438582944496376, "learning_rate": 8.706613850861955e-07, "loss": 0.1804, "step": 15146 }, { "epoch": 0.87, "grad_norm": 0.2983420842661307, "learning_rate": 8.69902090700222e-07, "loss": 0.2841, "step": 15147 }, { "epoch": 0.87, "grad_norm": 0.47140707285888844, "learning_rate": 8.691431124891458e-07, "loss": 0.259, "step": 15148 }, { "epoch": 0.87, "grad_norm": 0.5977151144145306, "learning_rate": 8.683844504792516e-07, "loss": 0.2461, "step": 15149 }, { "epoch": 0.87, "grad_norm": 0.3293070448411575, "learning_rate": 8.676261046968082e-07, "loss": 0.2188, "step": 15150 }, { "epoch": 0.87, "grad_norm": 0.35148732981494435, "learning_rate": 8.668680751680836e-07, "loss": 0.3271, "step": 15151 }, { "epoch": 0.87, "grad_norm": 0.5480960876299462, "learning_rate": 8.661103619193235e-07, "loss": 0.3355, "step": 15152 }, { "epoch": 0.87, "grad_norm": 0.3996302396384634, "learning_rate": 8.653529649767689e-07, "loss": 0.2401, "step": 15153 }, { "epoch": 0.87, "grad_norm": 0.28270246411719746, "learning_rate": 8.64595884366648e-07, "loss": 0.194, "step": 15154 }, { "epoch": 0.87, "grad_norm": 0.453050144014881, "learning_rate": 8.638391201151786e-07, "loss": 0.2769, "step": 15155 }, { "epoch": 0.87, "grad_norm": 0.29278379222233086, "learning_rate": 8.630826722485686e-07, "loss": 0.1997, "step": 15156 }, { "epoch": 0.87, "grad_norm": 0.6741223471885249, "learning_rate": 8.623265407930126e-07, "loss": 0.4103, "step": 15157 }, { "epoch": 0.87, "grad_norm": 0.46914591529300426, "learning_rate": 8.615707257746942e-07, "loss": 0.3219, "step": 15158 }, { "epoch": 0.87, "grad_norm": 0.2827772048682046, "learning_rate": 8.608152272197901e-07, "loss": 0.2253, "step": 15159 }, { "epoch": 0.87, "grad_norm": 0.31540560824285985, "learning_rate": 8.600600451544638e-07, "loss": 0.1769, "step": 15160 }, { "epoch": 0.87, "grad_norm": 1.0518770248703995, "learning_rate": 8.593051796048623e-07, "loss": 0.3286, "step": 15161 }, { "epoch": 0.87, "grad_norm": 0.3572068024893413, "learning_rate": 8.58550630597128e-07, "loss": 0.2964, "step": 15162 }, { "epoch": 0.87, "grad_norm": 0.30446816276194233, "learning_rate": 8.577963981573944e-07, "loss": 0.238, "step": 15163 }, { "epoch": 0.87, "grad_norm": 0.5673975171436122, "learning_rate": 8.570424823117785e-07, "loss": 0.3419, "step": 15164 }, { "epoch": 0.87, "grad_norm": 0.38060387474042917, "learning_rate": 8.56288883086388e-07, "loss": 0.2754, "step": 15165 }, { "epoch": 0.87, "grad_norm": 0.19743050333583234, "learning_rate": 8.55535600507319e-07, "loss": 0.1249, "step": 15166 }, { "epoch": 0.87, "grad_norm": 0.4663590562791462, "learning_rate": 8.547826346006594e-07, "loss": 0.2865, "step": 15167 }, { "epoch": 0.87, "grad_norm": 0.3707588567212866, "learning_rate": 8.540299853924849e-07, "loss": 0.3017, "step": 15168 }, { "epoch": 0.87, "grad_norm": 0.7253472188843394, "learning_rate": 8.532776529088582e-07, "loss": 0.304, "step": 15169 }, { "epoch": 0.87, "grad_norm": 0.27858164729369395, "learning_rate": 8.525256371758317e-07, "loss": 0.2592, "step": 15170 }, { "epoch": 0.87, "grad_norm": 0.38438389855409005, "learning_rate": 8.517739382194512e-07, "loss": 0.2976, "step": 15171 }, { "epoch": 0.87, "grad_norm": 0.35220266077902224, "learning_rate": 8.510225560657459e-07, "loss": 0.0923, "step": 15172 }, { "epoch": 0.87, "grad_norm": 1.3201247021161922, "learning_rate": 8.50271490740735e-07, "loss": 0.5593, "step": 15173 }, { "epoch": 0.87, "grad_norm": 0.32291369852792134, "learning_rate": 8.495207422704299e-07, "loss": 0.2695, "step": 15174 }, { "epoch": 0.87, "grad_norm": 0.34051148799422554, "learning_rate": 8.487703106808254e-07, "loss": 0.2965, "step": 15175 }, { "epoch": 0.87, "grad_norm": 1.5064423885781546, "learning_rate": 8.480201959979139e-07, "loss": 0.3937, "step": 15176 }, { "epoch": 0.87, "grad_norm": 0.31970419618362195, "learning_rate": 8.472703982476694e-07, "loss": 0.2563, "step": 15177 }, { "epoch": 0.87, "grad_norm": 0.23113554443336126, "learning_rate": 8.465209174560574e-07, "loss": 0.1821, "step": 15178 }, { "epoch": 0.87, "grad_norm": 0.4125928845847924, "learning_rate": 8.457717536490307e-07, "loss": 0.2227, "step": 15179 }, { "epoch": 0.87, "grad_norm": 0.33512692119389803, "learning_rate": 8.450229068525351e-07, "loss": 0.2999, "step": 15180 }, { "epoch": 0.87, "grad_norm": 0.612047740136571, "learning_rate": 8.442743770925044e-07, "loss": 0.3659, "step": 15181 }, { "epoch": 0.87, "grad_norm": 0.32224924038452785, "learning_rate": 8.435261643948567e-07, "loss": 0.2515, "step": 15182 }, { "epoch": 0.87, "grad_norm": 0.4883487465666448, "learning_rate": 8.427782687855035e-07, "loss": 0.264, "step": 15183 }, { "epoch": 0.87, "grad_norm": 0.5277231776949652, "learning_rate": 8.420306902903464e-07, "loss": 0.217, "step": 15184 }, { "epoch": 0.87, "grad_norm": 0.3979449991906311, "learning_rate": 8.412834289352734e-07, "loss": 0.171, "step": 15185 }, { "epoch": 0.87, "grad_norm": 0.36033874060148274, "learning_rate": 8.405364847461606e-07, "loss": 0.2843, "step": 15186 }, { "epoch": 0.87, "grad_norm": 0.3147674767160009, "learning_rate": 8.397898577488739e-07, "loss": 0.3099, "step": 15187 }, { "epoch": 0.87, "grad_norm": 1.3165994656358568, "learning_rate": 8.390435479692726e-07, "loss": 0.7248, "step": 15188 }, { "epoch": 0.87, "grad_norm": 0.2795136011073454, "learning_rate": 8.382975554331985e-07, "loss": 0.1823, "step": 15189 }, { "epoch": 0.87, "grad_norm": 0.33718281834093666, "learning_rate": 8.375518801664873e-07, "loss": 0.2743, "step": 15190 }, { "epoch": 0.87, "grad_norm": 0.31695562935276533, "learning_rate": 8.368065221949595e-07, "loss": 0.2208, "step": 15191 }, { "epoch": 0.87, "grad_norm": 0.3135609234920389, "learning_rate": 8.360614815444268e-07, "loss": 0.2054, "step": 15192 }, { "epoch": 0.87, "grad_norm": 0.9047987658050868, "learning_rate": 8.353167582406918e-07, "loss": 0.4717, "step": 15193 }, { "epoch": 0.87, "grad_norm": 0.32879063579291573, "learning_rate": 8.345723523095462e-07, "loss": 0.2928, "step": 15194 }, { "epoch": 0.87, "grad_norm": 0.33331989963810826, "learning_rate": 8.338282637767614e-07, "loss": 0.1939, "step": 15195 }, { "epoch": 0.87, "grad_norm": 0.4331013516301031, "learning_rate": 8.330844926681114e-07, "loss": 0.2289, "step": 15196 }, { "epoch": 0.87, "grad_norm": 0.4826586936930948, "learning_rate": 8.323410390093523e-07, "loss": 0.2577, "step": 15197 }, { "epoch": 0.87, "grad_norm": 0.20794647329271113, "learning_rate": 8.315979028262277e-07, "loss": 0.2144, "step": 15198 }, { "epoch": 0.87, "grad_norm": 0.9167179052248758, "learning_rate": 8.308550841444718e-07, "loss": 0.493, "step": 15199 }, { "epoch": 0.87, "grad_norm": 0.9684662978300949, "learning_rate": 8.301125829898126e-07, "loss": 0.6385, "step": 15200 }, { "epoch": 0.87, "grad_norm": 0.31667062423994347, "learning_rate": 8.293703993879599e-07, "loss": 0.262, "step": 15201 }, { "epoch": 0.87, "grad_norm": 0.3174213408244066, "learning_rate": 8.28628533364616e-07, "loss": 0.2297, "step": 15202 }, { "epoch": 0.87, "grad_norm": 0.6367147811096763, "learning_rate": 8.278869849454718e-07, "loss": 0.3327, "step": 15203 }, { "epoch": 0.87, "grad_norm": 0.41740650979949356, "learning_rate": 8.271457541562045e-07, "loss": 0.2915, "step": 15204 }, { "epoch": 0.87, "grad_norm": 0.19492825352143867, "learning_rate": 8.264048410224879e-07, "loss": 0.15, "step": 15205 }, { "epoch": 0.87, "grad_norm": 0.34968830067182716, "learning_rate": 8.25664245569976e-07, "loss": 0.2954, "step": 15206 }, { "epoch": 0.87, "grad_norm": 0.38126528864990045, "learning_rate": 8.249239678243171e-07, "loss": 0.2521, "step": 15207 }, { "epoch": 0.87, "grad_norm": 0.4991098352243059, "learning_rate": 8.241840078111452e-07, "loss": 0.2371, "step": 15208 }, { "epoch": 0.87, "grad_norm": 0.7523629811440098, "learning_rate": 8.234443655560886e-07, "loss": 0.3825, "step": 15209 }, { "epoch": 0.87, "grad_norm": 0.27570811560535324, "learning_rate": 8.227050410847592e-07, "loss": 0.268, "step": 15210 }, { "epoch": 0.87, "grad_norm": 0.23902070137790352, "learning_rate": 8.219660344227587e-07, "loss": 0.2191, "step": 15211 }, { "epoch": 0.87, "grad_norm": 1.005631527918455, "learning_rate": 8.212273455956787e-07, "loss": 0.3593, "step": 15212 }, { "epoch": 0.87, "grad_norm": 0.31857938219013965, "learning_rate": 8.204889746291022e-07, "loss": 0.2688, "step": 15213 }, { "epoch": 0.87, "grad_norm": 0.38049878062907533, "learning_rate": 8.197509215485988e-07, "loss": 0.2892, "step": 15214 }, { "epoch": 0.87, "grad_norm": 0.7259267300163725, "learning_rate": 8.190131863797246e-07, "loss": 0.2979, "step": 15215 }, { "epoch": 0.87, "grad_norm": 0.3431148587829527, "learning_rate": 8.182757691480303e-07, "loss": 0.2404, "step": 15216 }, { "epoch": 0.87, "grad_norm": 0.4465896566952469, "learning_rate": 8.175386698790489e-07, "loss": 0.3118, "step": 15217 }, { "epoch": 0.87, "grad_norm": 0.27790041083033745, "learning_rate": 8.168018885983109e-07, "loss": 0.2146, "step": 15218 }, { "epoch": 0.87, "grad_norm": 0.3068585203990971, "learning_rate": 8.160654253313282e-07, "loss": 0.2547, "step": 15219 }, { "epoch": 0.87, "grad_norm": 0.7012299019399189, "learning_rate": 8.15329280103605e-07, "loss": 0.3996, "step": 15220 }, { "epoch": 0.87, "grad_norm": 0.3526300473721663, "learning_rate": 8.145934529406319e-07, "loss": 0.2639, "step": 15221 }, { "epoch": 0.87, "grad_norm": 0.4329912494767619, "learning_rate": 8.13857943867894e-07, "loss": 0.275, "step": 15222 }, { "epoch": 0.87, "grad_norm": 0.5705532148120219, "learning_rate": 8.13122752910861e-07, "loss": 0.3568, "step": 15223 }, { "epoch": 0.87, "grad_norm": 0.29636431949967834, "learning_rate": 8.123878800949914e-07, "loss": 0.2048, "step": 15224 }, { "epoch": 0.87, "grad_norm": 0.35704937882642657, "learning_rate": 8.116533254457337e-07, "loss": 0.186, "step": 15225 }, { "epoch": 0.87, "grad_norm": 0.3023244294858326, "learning_rate": 8.109190889885277e-07, "loss": 0.2889, "step": 15226 }, { "epoch": 0.87, "grad_norm": 1.1384083091604984, "learning_rate": 8.101851707487973e-07, "loss": 0.4531, "step": 15227 }, { "epoch": 0.87, "grad_norm": 0.36463495630098414, "learning_rate": 8.094515707519623e-07, "loss": 0.1598, "step": 15228 }, { "epoch": 0.87, "grad_norm": 0.544002008754593, "learning_rate": 8.087182890234202e-07, "loss": 0.3947, "step": 15229 }, { "epoch": 0.88, "grad_norm": 0.25996577539505555, "learning_rate": 8.079853255885705e-07, "loss": 0.2442, "step": 15230 }, { "epoch": 0.88, "grad_norm": 0.22928359992702024, "learning_rate": 8.072526804727943e-07, "loss": 0.1601, "step": 15231 }, { "epoch": 0.88, "grad_norm": 0.548664404540612, "learning_rate": 8.06520353701461e-07, "loss": 0.369, "step": 15232 }, { "epoch": 0.88, "grad_norm": 1.4342997506272013, "learning_rate": 8.057883452999316e-07, "loss": 0.5444, "step": 15233 }, { "epoch": 0.88, "grad_norm": 0.22084093260537044, "learning_rate": 8.050566552935579e-07, "loss": 0.2115, "step": 15234 }, { "epoch": 0.88, "grad_norm": 1.2855665998775354, "learning_rate": 8.043252837076776e-07, "loss": 0.6016, "step": 15235 }, { "epoch": 0.88, "grad_norm": 0.43839777602461616, "learning_rate": 8.03594230567617e-07, "loss": 0.3084, "step": 15236 }, { "epoch": 0.88, "grad_norm": 0.3919309008857394, "learning_rate": 8.028634958986903e-07, "loss": 0.271, "step": 15237 }, { "epoch": 0.88, "grad_norm": 0.2905695696643427, "learning_rate": 8.021330797262072e-07, "loss": 0.2399, "step": 15238 }, { "epoch": 0.88, "grad_norm": 0.5220398686554123, "learning_rate": 8.01402982075461e-07, "loss": 0.2565, "step": 15239 }, { "epoch": 0.88, "grad_norm": 0.5903199356384773, "learning_rate": 8.006732029717335e-07, "loss": 0.2823, "step": 15240 }, { "epoch": 0.88, "grad_norm": 0.3598694129624296, "learning_rate": 7.999437424402967e-07, "loss": 0.2532, "step": 15241 }, { "epoch": 0.88, "grad_norm": 0.33400510821115537, "learning_rate": 7.992146005064105e-07, "loss": 0.3282, "step": 15242 }, { "epoch": 0.88, "grad_norm": 0.5293015346627737, "learning_rate": 7.984857771953303e-07, "loss": 0.2665, "step": 15243 }, { "epoch": 0.88, "grad_norm": 0.21466042777465733, "learning_rate": 7.977572725322913e-07, "loss": 0.1591, "step": 15244 }, { "epoch": 0.88, "grad_norm": 0.4726424277010858, "learning_rate": 7.970290865425212e-07, "loss": 0.3258, "step": 15245 }, { "epoch": 0.88, "grad_norm": 0.4202027933289129, "learning_rate": 7.963012192512376e-07, "loss": 0.2639, "step": 15246 }, { "epoch": 0.88, "grad_norm": 0.3945913782257165, "learning_rate": 7.955736706836481e-07, "loss": 0.2548, "step": 15247 }, { "epoch": 0.88, "grad_norm": 0.6633476242535613, "learning_rate": 7.94846440864947e-07, "loss": 0.3651, "step": 15248 }, { "epoch": 0.88, "grad_norm": 0.3693773855404697, "learning_rate": 7.941195298203175e-07, "loss": 0.2616, "step": 15249 }, { "epoch": 0.88, "grad_norm": 0.260791519573847, "learning_rate": 7.933929375749317e-07, "loss": 0.2281, "step": 15250 }, { "epoch": 0.88, "grad_norm": 0.5155000696175669, "learning_rate": 7.926666641539538e-07, "loss": 0.1134, "step": 15251 }, { "epoch": 0.88, "grad_norm": 0.3980620444030963, "learning_rate": 7.919407095825337e-07, "loss": 0.2641, "step": 15252 }, { "epoch": 0.88, "grad_norm": 0.5403914162100106, "learning_rate": 7.912150738858104e-07, "loss": 0.3727, "step": 15253 }, { "epoch": 0.88, "grad_norm": 0.3470276410754074, "learning_rate": 7.904897570889136e-07, "loss": 0.2602, "step": 15254 }, { "epoch": 0.88, "grad_norm": 0.3700558072591282, "learning_rate": 7.897647592169578e-07, "loss": 0.2559, "step": 15255 }, { "epoch": 0.88, "grad_norm": 0.531602700604024, "learning_rate": 7.890400802950548e-07, "loss": 0.3307, "step": 15256 }, { "epoch": 0.88, "grad_norm": 0.25900870477851295, "learning_rate": 7.883157203482982e-07, "loss": 0.1727, "step": 15257 }, { "epoch": 0.88, "grad_norm": 0.5904195157887109, "learning_rate": 7.875916794017713e-07, "loss": 0.2851, "step": 15258 }, { "epoch": 0.88, "grad_norm": 0.38738009696102815, "learning_rate": 7.868679574805472e-07, "loss": 0.3192, "step": 15259 }, { "epoch": 0.88, "grad_norm": 0.6042547956260029, "learning_rate": 7.861445546096902e-07, "loss": 0.3113, "step": 15260 }, { "epoch": 0.88, "grad_norm": 0.29172509317629647, "learning_rate": 7.854214708142538e-07, "loss": 0.2586, "step": 15261 }, { "epoch": 0.88, "grad_norm": 0.3643435241482438, "learning_rate": 7.846987061192723e-07, "loss": 0.2565, "step": 15262 }, { "epoch": 0.88, "grad_norm": 0.3800601781197826, "learning_rate": 7.839762605497791e-07, "loss": 0.1566, "step": 15263 }, { "epoch": 0.88, "grad_norm": 0.30248056133318446, "learning_rate": 7.83254134130793e-07, "loss": 0.1106, "step": 15264 }, { "epoch": 0.88, "grad_norm": 0.3365648909541551, "learning_rate": 7.825323268873187e-07, "loss": 0.2817, "step": 15265 }, { "epoch": 0.88, "grad_norm": 0.4406402840444798, "learning_rate": 7.818108388443546e-07, "loss": 0.3342, "step": 15266 }, { "epoch": 0.88, "grad_norm": 0.7090457854227306, "learning_rate": 7.810896700268822e-07, "loss": 0.223, "step": 15267 }, { "epoch": 0.88, "grad_norm": 0.2873213588453266, "learning_rate": 7.803688204598803e-07, "loss": 0.2282, "step": 15268 }, { "epoch": 0.88, "grad_norm": 0.36227730773662065, "learning_rate": 7.796482901683089e-07, "loss": 0.2392, "step": 15269 }, { "epoch": 0.88, "grad_norm": 0.3593253101252005, "learning_rate": 7.789280791771214e-07, "loss": 0.1979, "step": 15270 }, { "epoch": 0.88, "grad_norm": 0.3621820947758829, "learning_rate": 7.782081875112568e-07, "loss": 0.2827, "step": 15271 }, { "epoch": 0.88, "grad_norm": 0.6322813462372843, "learning_rate": 7.774886151956473e-07, "loss": 0.3907, "step": 15272 }, { "epoch": 0.88, "grad_norm": 0.2985759612155513, "learning_rate": 7.767693622552097e-07, "loss": 0.2031, "step": 15273 }, { "epoch": 0.88, "grad_norm": 0.40022176584749375, "learning_rate": 7.76050428714854e-07, "loss": 0.2846, "step": 15274 }, { "epoch": 0.88, "grad_norm": 0.3104592255829074, "learning_rate": 7.753318145994727e-07, "loss": 0.1448, "step": 15275 }, { "epoch": 0.88, "grad_norm": 0.49120492596421095, "learning_rate": 7.746135199339556e-07, "loss": 0.3065, "step": 15276 }, { "epoch": 0.88, "grad_norm": 0.28632281845942664, "learning_rate": 7.738955447431762e-07, "loss": 0.2348, "step": 15277 }, { "epoch": 0.88, "grad_norm": 0.4665069876855719, "learning_rate": 7.731778890519969e-07, "loss": 0.3292, "step": 15278 }, { "epoch": 0.88, "grad_norm": 1.4626098353168415, "learning_rate": 7.724605528852702e-07, "loss": 0.5868, "step": 15279 }, { "epoch": 0.88, "grad_norm": 0.35047118850359527, "learning_rate": 7.717435362678361e-07, "loss": 0.1945, "step": 15280 }, { "epoch": 0.88, "grad_norm": 0.23788708299943856, "learning_rate": 7.71026839224529e-07, "loss": 0.2045, "step": 15281 }, { "epoch": 0.88, "grad_norm": 0.6010442606371211, "learning_rate": 7.703104617801649e-07, "loss": 0.3379, "step": 15282 }, { "epoch": 0.88, "grad_norm": 0.32129184135251976, "learning_rate": 7.695944039595526e-07, "loss": 0.2314, "step": 15283 }, { "epoch": 0.88, "grad_norm": 1.2358162206069345, "learning_rate": 7.688786657874881e-07, "loss": 0.757, "step": 15284 }, { "epoch": 0.88, "grad_norm": 0.302450566459852, "learning_rate": 7.681632472887601e-07, "loss": 0.2894, "step": 15285 }, { "epoch": 0.88, "grad_norm": 0.36525775072758804, "learning_rate": 7.674481484881413e-07, "loss": 0.2433, "step": 15286 }, { "epoch": 0.88, "grad_norm": 0.12990322688717965, "learning_rate": 7.667333694103962e-07, "loss": 0.07, "step": 15287 }, { "epoch": 0.88, "grad_norm": 0.530598155745847, "learning_rate": 7.660189100802762e-07, "loss": 0.3354, "step": 15288 }, { "epoch": 0.88, "grad_norm": 0.26335616434420217, "learning_rate": 7.653047705225258e-07, "loss": 0.2555, "step": 15289 }, { "epoch": 0.88, "grad_norm": 0.4268927274168955, "learning_rate": 7.645909507618732e-07, "loss": 0.2675, "step": 15290 }, { "epoch": 0.88, "grad_norm": 0.7647099515322016, "learning_rate": 7.638774508230395e-07, "loss": 0.4027, "step": 15291 }, { "epoch": 0.88, "grad_norm": 0.3398062115060478, "learning_rate": 7.631642707307319e-07, "loss": 0.2611, "step": 15292 }, { "epoch": 0.88, "grad_norm": 0.2679660624561897, "learning_rate": 7.624514105096492e-07, "loss": 0.2, "step": 15293 }, { "epoch": 0.88, "grad_norm": 0.6026562844344492, "learning_rate": 7.617388701844764e-07, "loss": 0.3465, "step": 15294 }, { "epoch": 0.88, "grad_norm": 0.25505961167579344, "learning_rate": 7.610266497798913e-07, "loss": 0.2145, "step": 15295 }, { "epoch": 0.88, "grad_norm": 1.200358480855169, "learning_rate": 7.603147493205531e-07, "loss": 0.3905, "step": 15296 }, { "epoch": 0.88, "grad_norm": 0.3565559048959195, "learning_rate": 7.59603168831119e-07, "loss": 0.2969, "step": 15297 }, { "epoch": 0.88, "grad_norm": 0.3149087956944596, "learning_rate": 7.588919083362301e-07, "loss": 0.2396, "step": 15298 }, { "epoch": 0.88, "grad_norm": 0.6796516619216567, "learning_rate": 7.581809678605167e-07, "loss": 0.3689, "step": 15299 }, { "epoch": 0.88, "grad_norm": 0.34306570273858433, "learning_rate": 7.574703474285971e-07, "loss": 0.2018, "step": 15300 }, { "epoch": 0.88, "grad_norm": 0.253075691212108, "learning_rate": 7.567600470650849e-07, "loss": 0.2466, "step": 15301 }, { "epoch": 0.88, "grad_norm": 0.41639118533895647, "learning_rate": 7.560500667945736e-07, "loss": 0.2633, "step": 15302 }, { "epoch": 0.88, "grad_norm": 0.9729597607745787, "learning_rate": 7.553404066416514e-07, "loss": 0.2259, "step": 15303 }, { "epoch": 0.88, "grad_norm": 0.36267534769957355, "learning_rate": 7.546310666308909e-07, "loss": 0.2609, "step": 15304 }, { "epoch": 0.88, "grad_norm": 0.3762109035053571, "learning_rate": 7.539220467868613e-07, "loss": 0.2912, "step": 15305 }, { "epoch": 0.88, "grad_norm": 0.37831387911988446, "learning_rate": 7.532133471341141e-07, "loss": 0.1914, "step": 15306 }, { "epoch": 0.88, "grad_norm": 0.2679166493911752, "learning_rate": 7.525049676971907e-07, "loss": 0.2282, "step": 15307 }, { "epoch": 0.88, "grad_norm": 1.1468120655160323, "learning_rate": 7.517969085006227e-07, "loss": 0.7836, "step": 15308 }, { "epoch": 0.88, "grad_norm": 0.3255850537957562, "learning_rate": 7.510891695689282e-07, "loss": 0.2195, "step": 15309 }, { "epoch": 0.88, "grad_norm": 0.35726564990652665, "learning_rate": 7.503817509266198e-07, "loss": 0.2898, "step": 15310 }, { "epoch": 0.88, "grad_norm": 0.6503155915878299, "learning_rate": 7.496746525981935e-07, "loss": 0.3677, "step": 15311 }, { "epoch": 0.88, "grad_norm": 0.37840816000222754, "learning_rate": 7.489678746081364e-07, "loss": 0.2404, "step": 15312 }, { "epoch": 0.88, "grad_norm": 0.29412005921689116, "learning_rate": 7.482614169809222e-07, "loss": 0.1814, "step": 15313 }, { "epoch": 0.88, "grad_norm": 0.4733546443356241, "learning_rate": 7.475552797410191e-07, "loss": 0.396, "step": 15314 }, { "epoch": 0.88, "grad_norm": 0.44759966457147815, "learning_rate": 7.468494629128786e-07, "loss": 0.2897, "step": 15315 }, { "epoch": 0.88, "grad_norm": 0.32367457949655865, "learning_rate": 7.461439665209435e-07, "loss": 0.2157, "step": 15316 }, { "epoch": 0.88, "grad_norm": 0.4080924876089095, "learning_rate": 7.454387905896432e-07, "loss": 0.306, "step": 15317 }, { "epoch": 0.88, "grad_norm": 1.2171090379852552, "learning_rate": 7.447339351434013e-07, "loss": 0.4721, "step": 15318 }, { "epoch": 0.88, "grad_norm": 0.2858970690734332, "learning_rate": 7.44029400206625e-07, "loss": 0.1938, "step": 15319 }, { "epoch": 0.88, "grad_norm": 1.2425084966006583, "learning_rate": 7.433251858037127e-07, "loss": 0.7118, "step": 15320 }, { "epoch": 0.88, "grad_norm": 0.2196747731628239, "learning_rate": 7.426212919590503e-07, "loss": 0.2056, "step": 15321 }, { "epoch": 0.88, "grad_norm": 0.29882954862740696, "learning_rate": 7.419177186970139e-07, "loss": 0.2113, "step": 15322 }, { "epoch": 0.88, "grad_norm": 0.8049704432913872, "learning_rate": 7.412144660419706e-07, "loss": 0.4391, "step": 15323 }, { "epoch": 0.88, "grad_norm": 0.5968983898094012, "learning_rate": 7.405115340182723e-07, "loss": 0.3162, "step": 15324 }, { "epoch": 0.88, "grad_norm": 0.28930754291418537, "learning_rate": 7.398089226502603e-07, "loss": 0.2629, "step": 15325 }, { "epoch": 0.88, "grad_norm": 1.1015836359844582, "learning_rate": 7.391066319622664e-07, "loss": 0.5367, "step": 15326 }, { "epoch": 0.88, "grad_norm": 0.3365458212983297, "learning_rate": 7.384046619786123e-07, "loss": 0.2162, "step": 15327 }, { "epoch": 0.88, "grad_norm": 0.44176905324324517, "learning_rate": 7.377030127236073e-07, "loss": 0.2576, "step": 15328 }, { "epoch": 0.88, "grad_norm": 0.2811618795819501, "learning_rate": 7.370016842215488e-07, "loss": 0.2339, "step": 15329 }, { "epoch": 0.88, "grad_norm": 1.281599200325273, "learning_rate": 7.363006764967228e-07, "loss": 0.6169, "step": 15330 }, { "epoch": 0.88, "grad_norm": 0.3883951981613511, "learning_rate": 7.355999895734067e-07, "loss": 0.2458, "step": 15331 }, { "epoch": 0.88, "grad_norm": 0.35906664183412457, "learning_rate": 7.348996234758643e-07, "loss": 0.2571, "step": 15332 }, { "epoch": 0.88, "grad_norm": 0.4223644010226172, "learning_rate": 7.341995782283506e-07, "loss": 0.2908, "step": 15333 }, { "epoch": 0.88, "grad_norm": 0.39088380406576523, "learning_rate": 7.334998538551042e-07, "loss": 0.273, "step": 15334 }, { "epoch": 0.88, "grad_norm": 0.3214635287901621, "learning_rate": 7.328004503803609e-07, "loss": 0.1508, "step": 15335 }, { "epoch": 0.88, "grad_norm": 0.5600623509613721, "learning_rate": 7.321013678283407e-07, "loss": 0.3081, "step": 15336 }, { "epoch": 0.88, "grad_norm": 0.272040944032163, "learning_rate": 7.314026062232504e-07, "loss": 0.244, "step": 15337 }, { "epoch": 0.88, "grad_norm": 1.168178953668496, "learning_rate": 7.307041655892877e-07, "loss": 0.7761, "step": 15338 }, { "epoch": 0.88, "grad_norm": 0.5828528196274684, "learning_rate": 7.300060459506431e-07, "loss": 0.3033, "step": 15339 }, { "epoch": 0.88, "grad_norm": 0.25286178191941955, "learning_rate": 7.293082473314905e-07, "loss": 0.2107, "step": 15340 }, { "epoch": 0.88, "grad_norm": 0.2580447143807208, "learning_rate": 7.286107697559952e-07, "loss": 0.2347, "step": 15341 }, { "epoch": 0.88, "grad_norm": 1.3155362942915954, "learning_rate": 7.279136132483078e-07, "loss": 0.2337, "step": 15342 }, { "epoch": 0.88, "grad_norm": 0.3069915058726905, "learning_rate": 7.272167778325756e-07, "loss": 0.2612, "step": 15343 }, { "epoch": 0.88, "grad_norm": 0.872607402488223, "learning_rate": 7.265202635329272e-07, "loss": 0.5209, "step": 15344 }, { "epoch": 0.88, "grad_norm": 0.33572217596314796, "learning_rate": 7.258240703734832e-07, "loss": 0.2534, "step": 15345 }, { "epoch": 0.88, "grad_norm": 0.36957113937852054, "learning_rate": 7.251281983783532e-07, "loss": 0.2541, "step": 15346 }, { "epoch": 0.88, "grad_norm": 0.25658032069472986, "learning_rate": 7.244326475716323e-07, "loss": 0.1934, "step": 15347 }, { "epoch": 0.88, "grad_norm": 0.36518152555370853, "learning_rate": 7.237374179774125e-07, "loss": 0.2461, "step": 15348 }, { "epoch": 0.88, "grad_norm": 0.3860018083993852, "learning_rate": 7.230425096197669e-07, "loss": 0.2512, "step": 15349 }, { "epoch": 0.88, "grad_norm": 0.4407144779168672, "learning_rate": 7.223479225227603e-07, "loss": 0.3225, "step": 15350 }, { "epoch": 0.88, "grad_norm": 0.8099477334987211, "learning_rate": 7.216536567104449e-07, "loss": 0.4107, "step": 15351 }, { "epoch": 0.88, "grad_norm": 0.3715733078958953, "learning_rate": 7.209597122068657e-07, "loss": 0.1808, "step": 15352 }, { "epoch": 0.88, "grad_norm": 0.2045474274396735, "learning_rate": 7.202660890360524e-07, "loss": 0.2173, "step": 15353 }, { "epoch": 0.88, "grad_norm": 1.600767535273116, "learning_rate": 7.195727872220248e-07, "loss": 0.7834, "step": 15354 }, { "epoch": 0.88, "grad_norm": 0.2723621726814691, "learning_rate": 7.188798067887926e-07, "loss": 0.1995, "step": 15355 }, { "epoch": 0.88, "grad_norm": 0.4313450825328893, "learning_rate": 7.181871477603542e-07, "loss": 0.3322, "step": 15356 }, { "epoch": 0.88, "grad_norm": 0.43795447255344067, "learning_rate": 7.174948101606949e-07, "loss": 0.3239, "step": 15357 }, { "epoch": 0.88, "grad_norm": 0.3489367764018725, "learning_rate": 7.168027940137923e-07, "loss": 0.1882, "step": 15358 }, { "epoch": 0.88, "grad_norm": 0.2626421008762479, "learning_rate": 7.161110993436093e-07, "loss": 0.1515, "step": 15359 }, { "epoch": 0.88, "grad_norm": 0.3860502345972539, "learning_rate": 7.15419726174098e-07, "loss": 0.2858, "step": 15360 }, { "epoch": 0.88, "grad_norm": 0.36349650078670603, "learning_rate": 7.147286745292049e-07, "loss": 0.1971, "step": 15361 }, { "epoch": 0.88, "grad_norm": 0.44592750277136384, "learning_rate": 7.140379444328571e-07, "loss": 0.3327, "step": 15362 }, { "epoch": 0.88, "grad_norm": 1.0136525434025818, "learning_rate": 7.13347535908977e-07, "loss": 0.5487, "step": 15363 }, { "epoch": 0.88, "grad_norm": 0.405578078064654, "learning_rate": 7.126574489814719e-07, "loss": 0.2968, "step": 15364 }, { "epoch": 0.88, "grad_norm": 0.2241058466957011, "learning_rate": 7.119676836742407e-07, "loss": 0.1817, "step": 15365 }, { "epoch": 0.88, "grad_norm": 0.5356592030749281, "learning_rate": 7.112782400111684e-07, "loss": 0.2669, "step": 15366 }, { "epoch": 0.88, "grad_norm": 0.6030656386460477, "learning_rate": 7.105891180161306e-07, "loss": 0.3748, "step": 15367 }, { "epoch": 0.88, "grad_norm": 0.24970782740649353, "learning_rate": 7.099003177129926e-07, "loss": 0.2436, "step": 15368 }, { "epoch": 0.88, "grad_norm": 1.3865134035245326, "learning_rate": 7.092118391256076e-07, "loss": 0.6055, "step": 15369 }, { "epoch": 0.88, "grad_norm": 0.7176124074822394, "learning_rate": 7.085236822778174e-07, "loss": 0.2688, "step": 15370 }, { "epoch": 0.88, "grad_norm": 0.2343011862368419, "learning_rate": 7.078358471934521e-07, "loss": 0.1487, "step": 15371 }, { "epoch": 0.88, "grad_norm": 0.378432979445646, "learning_rate": 7.071483338963303e-07, "loss": 0.3006, "step": 15372 }, { "epoch": 0.88, "grad_norm": 0.48086836045064396, "learning_rate": 7.064611424102641e-07, "loss": 0.2961, "step": 15373 }, { "epoch": 0.88, "grad_norm": 0.37437277656526946, "learning_rate": 7.057742727590478e-07, "loss": 0.3221, "step": 15374 }, { "epoch": 0.88, "grad_norm": 0.9436427928262109, "learning_rate": 7.050877249664701e-07, "loss": 0.3674, "step": 15375 }, { "epoch": 0.88, "grad_norm": 0.2964656910026224, "learning_rate": 7.04401499056302e-07, "loss": 0.2569, "step": 15376 }, { "epoch": 0.88, "grad_norm": 0.3690822885989384, "learning_rate": 7.037155950523123e-07, "loss": 0.307, "step": 15377 }, { "epoch": 0.88, "grad_norm": 0.26884006202752997, "learning_rate": 7.030300129782519e-07, "loss": 0.131, "step": 15378 }, { "epoch": 0.88, "grad_norm": 0.40264916010642365, "learning_rate": 7.023447528578631e-07, "loss": 0.2621, "step": 15379 }, { "epoch": 0.88, "grad_norm": 0.35794945612338336, "learning_rate": 7.016598147148735e-07, "loss": 0.2974, "step": 15380 }, { "epoch": 0.88, "grad_norm": 0.5066463148507542, "learning_rate": 7.009751985730062e-07, "loss": 0.2751, "step": 15381 }, { "epoch": 0.88, "grad_norm": 0.8604249835369419, "learning_rate": 7.00290904455968e-07, "loss": 0.3744, "step": 15382 }, { "epoch": 0.88, "grad_norm": 0.29377213308048267, "learning_rate": 6.996069323874555e-07, "loss": 0.2061, "step": 15383 }, { "epoch": 0.88, "grad_norm": 0.30323452832902037, "learning_rate": 6.989232823911551e-07, "loss": 0.2308, "step": 15384 }, { "epoch": 0.88, "grad_norm": 0.7647519096570284, "learning_rate": 6.982399544907403e-07, "loss": 0.3747, "step": 15385 }, { "epoch": 0.88, "grad_norm": 0.36798900089852343, "learning_rate": 6.975569487098766e-07, "loss": 0.3002, "step": 15386 }, { "epoch": 0.88, "grad_norm": 1.3360407176215778, "learning_rate": 6.968742650722172e-07, "loss": 0.7616, "step": 15387 }, { "epoch": 0.88, "grad_norm": 0.2787925854981777, "learning_rate": 6.961919036014009e-07, "loss": 0.2048, "step": 15388 }, { "epoch": 0.88, "grad_norm": 0.4689798303850397, "learning_rate": 6.955098643210578e-07, "loss": 0.2941, "step": 15389 }, { "epoch": 0.88, "grad_norm": 0.641487238466754, "learning_rate": 6.94828147254809e-07, "loss": 0.3726, "step": 15390 }, { "epoch": 0.88, "grad_norm": 0.2461102831506282, "learning_rate": 6.941467524262613e-07, "loss": 0.1119, "step": 15391 }, { "epoch": 0.88, "grad_norm": 0.25551098270018807, "learning_rate": 6.934656798590122e-07, "loss": 0.281, "step": 15392 }, { "epoch": 0.88, "grad_norm": 1.1202496115317333, "learning_rate": 6.927849295766442e-07, "loss": 0.5997, "step": 15393 }, { "epoch": 0.88, "grad_norm": 0.6445255900823864, "learning_rate": 6.92104501602735e-07, "loss": 0.0993, "step": 15394 }, { "epoch": 0.88, "grad_norm": 0.40685012455812186, "learning_rate": 6.91424395960848e-07, "loss": 0.295, "step": 15395 }, { "epoch": 0.88, "grad_norm": 0.3592962271783745, "learning_rate": 6.907446126745332e-07, "loss": 0.3047, "step": 15396 }, { "epoch": 0.88, "grad_norm": 0.2749244052812137, "learning_rate": 6.900651517673318e-07, "loss": 0.1568, "step": 15397 }, { "epoch": 0.88, "grad_norm": 0.6044148011731777, "learning_rate": 6.893860132627739e-07, "loss": 0.3646, "step": 15398 }, { "epoch": 0.88, "grad_norm": 0.49796483369447875, "learning_rate": 6.887071971843783e-07, "loss": 0.2891, "step": 15399 }, { "epoch": 0.88, "grad_norm": 0.33803796040103634, "learning_rate": 6.880287035556521e-07, "loss": 0.2728, "step": 15400 }, { "epoch": 0.88, "grad_norm": 0.343782499945726, "learning_rate": 6.873505324000895e-07, "loss": 0.2097, "step": 15401 }, { "epoch": 0.88, "grad_norm": 0.6284160615889932, "learning_rate": 6.866726837411797e-07, "loss": 0.3594, "step": 15402 }, { "epoch": 0.88, "grad_norm": 0.3844903560072926, "learning_rate": 6.859951576023937e-07, "loss": 0.2492, "step": 15403 }, { "epoch": 0.89, "grad_norm": 0.22190970897036372, "learning_rate": 6.853179540071963e-07, "loss": 0.2188, "step": 15404 }, { "epoch": 0.89, "grad_norm": 0.46152722721730727, "learning_rate": 6.846410729790342e-07, "loss": 0.2762, "step": 15405 }, { "epoch": 0.89, "grad_norm": 0.6960344758067857, "learning_rate": 6.839645145413543e-07, "loss": 0.3359, "step": 15406 }, { "epoch": 0.89, "grad_norm": 0.3315547930125616, "learning_rate": 6.832882787175809e-07, "loss": 0.2416, "step": 15407 }, { "epoch": 0.89, "grad_norm": 0.34418183719622625, "learning_rate": 6.826123655311356e-07, "loss": 0.2972, "step": 15408 }, { "epoch": 0.89, "grad_norm": 0.7768735537156152, "learning_rate": 6.819367750054217e-07, "loss": 0.4253, "step": 15409 }, { "epoch": 0.89, "grad_norm": 0.3022317412638133, "learning_rate": 6.812615071638363e-07, "loss": 0.2187, "step": 15410 }, { "epoch": 0.89, "grad_norm": 0.30097106667699985, "learning_rate": 6.805865620297659e-07, "loss": 0.1357, "step": 15411 }, { "epoch": 0.89, "grad_norm": 0.3699431546810101, "learning_rate": 6.799119396265807e-07, "loss": 0.2815, "step": 15412 }, { "epoch": 0.89, "grad_norm": 0.36422006459287853, "learning_rate": 6.792376399776457e-07, "loss": 0.2794, "step": 15413 }, { "epoch": 0.89, "grad_norm": 0.7376350234779959, "learning_rate": 6.785636631063075e-07, "loss": 0.2866, "step": 15414 }, { "epoch": 0.89, "grad_norm": 0.4651567840856827, "learning_rate": 6.778900090359119e-07, "loss": 0.3252, "step": 15415 }, { "epoch": 0.89, "grad_norm": 0.3479007390754298, "learning_rate": 6.772166777897838e-07, "loss": 0.2525, "step": 15416 }, { "epoch": 0.89, "grad_norm": 0.20645592583378722, "learning_rate": 6.765436693912408e-07, "loss": 0.1553, "step": 15417 }, { "epoch": 0.89, "grad_norm": 0.5925146855129958, "learning_rate": 6.758709838635879e-07, "loss": 0.3312, "step": 15418 }, { "epoch": 0.89, "grad_norm": 0.36018604511633256, "learning_rate": 6.751986212301242e-07, "loss": 0.2976, "step": 15419 }, { "epoch": 0.89, "grad_norm": 0.34434040713944425, "learning_rate": 6.745265815141311e-07, "loss": 0.2374, "step": 15420 }, { "epoch": 0.89, "grad_norm": 1.2688158338463227, "learning_rate": 6.73854864738881e-07, "loss": 0.5905, "step": 15421 }, { "epoch": 0.89, "grad_norm": 0.3130992444743742, "learning_rate": 6.731834709276353e-07, "loss": 0.2281, "step": 15422 }, { "epoch": 0.89, "grad_norm": 0.373687076761885, "learning_rate": 6.725124001036454e-07, "loss": 0.2071, "step": 15423 }, { "epoch": 0.89, "grad_norm": 0.49870489961270104, "learning_rate": 6.718416522901506e-07, "loss": 0.3269, "step": 15424 }, { "epoch": 0.89, "grad_norm": 0.23765638274613635, "learning_rate": 6.711712275103776e-07, "loss": 0.221, "step": 15425 }, { "epoch": 0.89, "grad_norm": 1.3561355023422061, "learning_rate": 6.705011257875449e-07, "loss": 0.5169, "step": 15426 }, { "epoch": 0.89, "grad_norm": 0.3737441156518405, "learning_rate": 6.698313471448547e-07, "loss": 0.2337, "step": 15427 }, { "epoch": 0.89, "grad_norm": 0.28412374842415977, "learning_rate": 6.691618916055053e-07, "loss": 0.2634, "step": 15428 }, { "epoch": 0.89, "grad_norm": 1.260617821916191, "learning_rate": 6.684927591926793e-07, "loss": 0.6065, "step": 15429 }, { "epoch": 0.89, "grad_norm": 0.416719995810165, "learning_rate": 6.678239499295469e-07, "loss": 0.1985, "step": 15430 }, { "epoch": 0.89, "grad_norm": 0.3731138532303315, "learning_rate": 6.671554638392696e-07, "loss": 0.2707, "step": 15431 }, { "epoch": 0.89, "grad_norm": 0.26428445121349753, "learning_rate": 6.664873009449979e-07, "loss": 0.2391, "step": 15432 }, { "epoch": 0.89, "grad_norm": 0.4735796217441745, "learning_rate": 6.658194612698687e-07, "loss": 0.2467, "step": 15433 }, { "epoch": 0.89, "grad_norm": 0.3392053344013183, "learning_rate": 6.651519448370092e-07, "loss": 0.2764, "step": 15434 }, { "epoch": 0.89, "grad_norm": 0.4614700653109635, "learning_rate": 6.644847516695385e-07, "loss": 0.3329, "step": 15435 }, { "epoch": 0.89, "grad_norm": 0.3974557145871179, "learning_rate": 6.638178817905594e-07, "loss": 0.2162, "step": 15436 }, { "epoch": 0.89, "grad_norm": 0.31729301878064864, "learning_rate": 6.631513352231644e-07, "loss": 0.2351, "step": 15437 }, { "epoch": 0.89, "grad_norm": 0.6014970938156791, "learning_rate": 6.624851119904385e-07, "loss": 0.2425, "step": 15438 }, { "epoch": 0.89, "grad_norm": 0.46471253231576026, "learning_rate": 6.618192121154488e-07, "loss": 0.3678, "step": 15439 }, { "epoch": 0.89, "grad_norm": 0.3236888175086646, "learning_rate": 6.611536356212612e-07, "loss": 0.2154, "step": 15440 }, { "epoch": 0.89, "grad_norm": 0.5426087946165549, "learning_rate": 6.604883825309205e-07, "loss": 0.3937, "step": 15441 }, { "epoch": 0.89, "grad_norm": 1.5414638027935905, "learning_rate": 6.598234528674663e-07, "loss": 0.6158, "step": 15442 }, { "epoch": 0.89, "grad_norm": 0.18180585805377175, "learning_rate": 6.591588466539222e-07, "loss": 0.1348, "step": 15443 }, { "epoch": 0.89, "grad_norm": 0.33920005873970366, "learning_rate": 6.584945639133067e-07, "loss": 0.299, "step": 15444 }, { "epoch": 0.89, "grad_norm": 1.3235160572056892, "learning_rate": 6.578306046686234e-07, "loss": 0.6165, "step": 15445 }, { "epoch": 0.89, "grad_norm": 0.31586260327256654, "learning_rate": 6.57166968942865e-07, "loss": 0.2081, "step": 15446 }, { "epoch": 0.89, "grad_norm": 0.4470041807775742, "learning_rate": 6.565036567590099e-07, "loss": 0.3292, "step": 15447 }, { "epoch": 0.89, "grad_norm": 0.50546493909433, "learning_rate": 6.558406681400342e-07, "loss": 0.3378, "step": 15448 }, { "epoch": 0.89, "grad_norm": 0.33419202257022956, "learning_rate": 6.55178003108894e-07, "loss": 0.2611, "step": 15449 }, { "epoch": 0.89, "grad_norm": 0.16134420138718436, "learning_rate": 6.545156616885373e-07, "loss": 0.071, "step": 15450 }, { "epoch": 0.89, "grad_norm": 0.32891260695083896, "learning_rate": 6.538536439019016e-07, "loss": 0.3021, "step": 15451 }, { "epoch": 0.89, "grad_norm": 0.37903738502174683, "learning_rate": 6.531919497719097e-07, "loss": 0.2468, "step": 15452 }, { "epoch": 0.89, "grad_norm": 0.4409361141174233, "learning_rate": 6.52530579321482e-07, "loss": 0.2674, "step": 15453 }, { "epoch": 0.89, "grad_norm": 0.5984776888889717, "learning_rate": 6.51869532573517e-07, "loss": 0.2518, "step": 15454 }, { "epoch": 0.89, "grad_norm": 0.37448200080698674, "learning_rate": 6.512088095509095e-07, "loss": 0.2645, "step": 15455 }, { "epoch": 0.89, "grad_norm": 0.24355986018680098, "learning_rate": 6.505484102765358e-07, "loss": 0.1973, "step": 15456 }, { "epoch": 0.89, "grad_norm": 0.8738256093138846, "learning_rate": 6.498883347732709e-07, "loss": 0.4653, "step": 15457 }, { "epoch": 0.89, "grad_norm": 0.39720267810411064, "learning_rate": 6.492285830639711e-07, "loss": 0.2898, "step": 15458 }, { "epoch": 0.89, "grad_norm": 0.284470585352059, "learning_rate": 6.485691551714835e-07, "loss": 0.2527, "step": 15459 }, { "epoch": 0.89, "grad_norm": 1.3712482470378675, "learning_rate": 6.47910051118642e-07, "loss": 0.5142, "step": 15460 }, { "epoch": 0.89, "grad_norm": 0.3337385065605379, "learning_rate": 6.472512709282752e-07, "loss": 0.2579, "step": 15461 }, { "epoch": 0.89, "grad_norm": 0.2548757339390069, "learning_rate": 6.465928146231937e-07, "loss": 0.1738, "step": 15462 }, { "epoch": 0.89, "grad_norm": 0.33354702595338487, "learning_rate": 6.459346822262014e-07, "loss": 0.2561, "step": 15463 }, { "epoch": 0.89, "grad_norm": 0.31942036759526615, "learning_rate": 6.45276873760089e-07, "loss": 0.2559, "step": 15464 }, { "epoch": 0.89, "grad_norm": 0.8281221679177381, "learning_rate": 6.44619389247636e-07, "loss": 0.4434, "step": 15465 }, { "epoch": 0.89, "grad_norm": 1.4513030484506004, "learning_rate": 6.439622287116121e-07, "loss": 0.2286, "step": 15466 }, { "epoch": 0.89, "grad_norm": 0.2902777682810745, "learning_rate": 6.433053921747734e-07, "loss": 0.2554, "step": 15467 }, { "epoch": 0.89, "grad_norm": 0.22307685318256087, "learning_rate": 6.42648879659864e-07, "loss": 0.1937, "step": 15468 }, { "epoch": 0.89, "grad_norm": 0.6450982193385442, "learning_rate": 6.419926911896246e-07, "loss": 0.2968, "step": 15469 }, { "epoch": 0.89, "grad_norm": 0.40342553770057044, "learning_rate": 6.413368267867748e-07, "loss": 0.2651, "step": 15470 }, { "epoch": 0.89, "grad_norm": 0.3625069981846449, "learning_rate": 6.406812864740286e-07, "loss": 0.3225, "step": 15471 }, { "epoch": 0.89, "grad_norm": 0.5406013839654554, "learning_rate": 6.400260702740857e-07, "loss": 0.2295, "step": 15472 }, { "epoch": 0.89, "grad_norm": 0.3813327291140086, "learning_rate": 6.39371178209639e-07, "loss": 0.2555, "step": 15473 }, { "epoch": 0.89, "grad_norm": 0.248468868214602, "learning_rate": 6.387166103033659e-07, "loss": 0.1838, "step": 15474 }, { "epoch": 0.89, "grad_norm": 0.3369238767144438, "learning_rate": 6.38062366577934e-07, "loss": 0.2943, "step": 15475 }, { "epoch": 0.89, "grad_norm": 0.49041949518461936, "learning_rate": 6.374084470559993e-07, "loss": 0.2287, "step": 15476 }, { "epoch": 0.89, "grad_norm": 0.5345573593990525, "learning_rate": 6.367548517602062e-07, "loss": 0.3918, "step": 15477 }, { "epoch": 0.89, "grad_norm": 1.267321575127367, "learning_rate": 6.36101580713191e-07, "loss": 0.4503, "step": 15478 }, { "epoch": 0.89, "grad_norm": 0.24989474901885625, "learning_rate": 6.354486339375765e-07, "loss": 0.2039, "step": 15479 }, { "epoch": 0.89, "grad_norm": 0.30215440765844515, "learning_rate": 6.347960114559726e-07, "loss": 0.2355, "step": 15480 }, { "epoch": 0.89, "grad_norm": 0.47055963128076644, "learning_rate": 6.341437132909778e-07, "loss": 0.2552, "step": 15481 }, { "epoch": 0.89, "grad_norm": 0.28025144025055243, "learning_rate": 6.334917394651863e-07, "loss": 0.1913, "step": 15482 }, { "epoch": 0.89, "grad_norm": 0.47105549542319564, "learning_rate": 6.328400900011722e-07, "loss": 0.3661, "step": 15483 }, { "epoch": 0.89, "grad_norm": 0.5301274686641892, "learning_rate": 6.321887649215031e-07, "loss": 0.3488, "step": 15484 }, { "epoch": 0.89, "grad_norm": 0.34412718839375167, "learning_rate": 6.31537764248733e-07, "loss": 0.1598, "step": 15485 }, { "epoch": 0.89, "grad_norm": 0.49793609859674237, "learning_rate": 6.308870880054085e-07, "loss": 0.3045, "step": 15486 }, { "epoch": 0.89, "grad_norm": 0.2893282719469128, "learning_rate": 6.302367362140616e-07, "loss": 0.2616, "step": 15487 }, { "epoch": 0.89, "grad_norm": 0.34383649815869993, "learning_rate": 6.295867088972141e-07, "loss": 0.2231, "step": 15488 }, { "epoch": 0.89, "grad_norm": 0.3281453009232224, "learning_rate": 6.289370060773748e-07, "loss": 0.2295, "step": 15489 }, { "epoch": 0.89, "grad_norm": 0.536206100347878, "learning_rate": 6.282876277770433e-07, "loss": 0.2901, "step": 15490 }, { "epoch": 0.89, "grad_norm": 0.3532249976082039, "learning_rate": 6.276385740187097e-07, "loss": 0.258, "step": 15491 }, { "epoch": 0.89, "grad_norm": 0.3603329578354436, "learning_rate": 6.26989844824849e-07, "loss": 0.2514, "step": 15492 }, { "epoch": 0.89, "grad_norm": 0.9432767223998106, "learning_rate": 6.263414402179269e-07, "loss": 0.402, "step": 15493 }, { "epoch": 0.89, "grad_norm": 0.30261911972964056, "learning_rate": 6.256933602203963e-07, "loss": 0.2115, "step": 15494 }, { "epoch": 0.89, "grad_norm": 0.23543770198075897, "learning_rate": 6.250456048547027e-07, "loss": 0.2335, "step": 15495 }, { "epoch": 0.89, "grad_norm": 0.4415768113353055, "learning_rate": 6.243981741432769e-07, "loss": 0.2614, "step": 15496 }, { "epoch": 0.89, "grad_norm": 0.5812569473819925, "learning_rate": 6.23751068108539e-07, "loss": 0.3034, "step": 15497 }, { "epoch": 0.89, "grad_norm": 0.36478079392776935, "learning_rate": 6.231042867728987e-07, "loss": 0.2275, "step": 15498 }, { "epoch": 0.89, "grad_norm": 0.3750949965379845, "learning_rate": 6.224578301587536e-07, "loss": 0.29, "step": 15499 }, { "epoch": 0.89, "grad_norm": 0.266345866146398, "learning_rate": 6.218116982884903e-07, "loss": 0.2056, "step": 15500 }, { "epoch": 0.89, "grad_norm": 0.5182817369574994, "learning_rate": 6.211658911844854e-07, "loss": 0.3603, "step": 15501 }, { "epoch": 0.89, "grad_norm": 0.32107800900252353, "learning_rate": 6.205204088690997e-07, "loss": 0.1834, "step": 15502 }, { "epoch": 0.89, "grad_norm": 0.3540271412785568, "learning_rate": 6.198752513646911e-07, "loss": 0.2723, "step": 15503 }, { "epoch": 0.89, "grad_norm": 0.5177309374569469, "learning_rate": 6.192304186935993e-07, "loss": 0.3293, "step": 15504 }, { "epoch": 0.89, "grad_norm": 0.8842981293585628, "learning_rate": 6.185859108781544e-07, "loss": 0.2621, "step": 15505 }, { "epoch": 0.89, "grad_norm": 0.8063127372023199, "learning_rate": 6.179417279406752e-07, "loss": 0.5123, "step": 15506 }, { "epoch": 0.89, "grad_norm": 0.2330689085737917, "learning_rate": 6.172978699034715e-07, "loss": 0.2458, "step": 15507 }, { "epoch": 0.89, "grad_norm": 0.2998705740226622, "learning_rate": 6.166543367888389e-07, "loss": 0.1521, "step": 15508 }, { "epoch": 0.89, "grad_norm": 0.5991385238658149, "learning_rate": 6.160111286190629e-07, "loss": 0.3678, "step": 15509 }, { "epoch": 0.89, "grad_norm": 0.41892626382614, "learning_rate": 6.153682454164167e-07, "loss": 0.2752, "step": 15510 }, { "epoch": 0.89, "grad_norm": 0.38775365797584355, "learning_rate": 6.14725687203167e-07, "loss": 0.2397, "step": 15511 }, { "epoch": 0.89, "grad_norm": 0.6237671895525037, "learning_rate": 6.140834540015617e-07, "loss": 0.3729, "step": 15512 }, { "epoch": 0.89, "grad_norm": 0.29434266789911306, "learning_rate": 6.134415458338439e-07, "loss": 0.2485, "step": 15513 }, { "epoch": 0.89, "grad_norm": 0.44565125934806743, "learning_rate": 6.127999627222414e-07, "loss": 0.238, "step": 15514 }, { "epoch": 0.89, "grad_norm": 0.28957696302459607, "learning_rate": 6.121587046889709e-07, "loss": 0.2293, "step": 15515 }, { "epoch": 0.89, "grad_norm": 0.381380545026879, "learning_rate": 6.115177717562426e-07, "loss": 0.3005, "step": 15516 }, { "epoch": 0.89, "grad_norm": 1.280503866463015, "learning_rate": 6.108771639462496e-07, "loss": 0.4066, "step": 15517 }, { "epoch": 0.89, "grad_norm": 0.31632331249443907, "learning_rate": 6.102368812811776e-07, "loss": 0.2172, "step": 15518 }, { "epoch": 0.89, "grad_norm": 0.30631108778593535, "learning_rate": 6.095969237831956e-07, "loss": 0.2671, "step": 15519 }, { "epoch": 0.89, "grad_norm": 0.4657106174751502, "learning_rate": 6.089572914744712e-07, "loss": 0.2943, "step": 15520 }, { "epoch": 0.89, "grad_norm": 0.3192673232648922, "learning_rate": 6.083179843771513e-07, "loss": 0.1749, "step": 15521 }, { "epoch": 0.89, "grad_norm": 0.5648607556412087, "learning_rate": 6.076790025133761e-07, "loss": 0.3423, "step": 15522 }, { "epoch": 0.89, "grad_norm": 0.38998891302247396, "learning_rate": 6.070403459052721e-07, "loss": 0.3006, "step": 15523 }, { "epoch": 0.89, "grad_norm": 0.47612449309706734, "learning_rate": 6.064020145749572e-07, "loss": 0.2506, "step": 15524 }, { "epoch": 0.89, "grad_norm": 0.3699572894444798, "learning_rate": 6.057640085445371e-07, "loss": 0.2902, "step": 15525 }, { "epoch": 0.89, "grad_norm": 0.33072600556263054, "learning_rate": 6.051263278361064e-07, "loss": 0.2912, "step": 15526 }, { "epoch": 0.89, "grad_norm": 0.5229226557439318, "learning_rate": 6.04488972471744e-07, "loss": 0.2665, "step": 15527 }, { "epoch": 0.89, "grad_norm": 0.23899799542644962, "learning_rate": 6.038519424735268e-07, "loss": 0.1533, "step": 15528 }, { "epoch": 0.89, "grad_norm": 1.2521803906393414, "learning_rate": 6.032152378635125e-07, "loss": 0.71, "step": 15529 }, { "epoch": 0.89, "grad_norm": 0.332009032357602, "learning_rate": 6.025788586637516e-07, "loss": 0.3173, "step": 15530 }, { "epoch": 0.89, "grad_norm": 0.3054581468802576, "learning_rate": 6.019428048962794e-07, "loss": 0.2127, "step": 15531 }, { "epoch": 0.89, "grad_norm": 0.6988792384416537, "learning_rate": 6.013070765831242e-07, "loss": 0.3557, "step": 15532 }, { "epoch": 0.89, "grad_norm": 0.2612692825758358, "learning_rate": 6.006716737463003e-07, "loss": 0.1574, "step": 15533 }, { "epoch": 0.89, "grad_norm": 0.35768416541218084, "learning_rate": 6.000365964078125e-07, "loss": 0.2071, "step": 15534 }, { "epoch": 0.89, "grad_norm": 0.360522914540225, "learning_rate": 5.99401844589651e-07, "loss": 0.2844, "step": 15535 }, { "epoch": 0.89, "grad_norm": 0.7207181920540063, "learning_rate": 5.987674183138015e-07, "loss": 0.4223, "step": 15536 }, { "epoch": 0.89, "grad_norm": 0.34250474149267907, "learning_rate": 5.98133317602233e-07, "loss": 0.2512, "step": 15537 }, { "epoch": 0.89, "grad_norm": 0.42762939818118384, "learning_rate": 5.974995424769026e-07, "loss": 0.2639, "step": 15538 }, { "epoch": 0.89, "grad_norm": 0.28191737906201586, "learning_rate": 5.968660929597581e-07, "loss": 0.2076, "step": 15539 }, { "epoch": 0.89, "grad_norm": 0.3401995555671143, "learning_rate": 5.962329690727353e-07, "loss": 0.2638, "step": 15540 }, { "epoch": 0.89, "grad_norm": 0.5155883682044767, "learning_rate": 5.956001708377623e-07, "loss": 0.1423, "step": 15541 }, { "epoch": 0.89, "grad_norm": 0.3712414578625411, "learning_rate": 5.949676982767505e-07, "loss": 0.3232, "step": 15542 }, { "epoch": 0.89, "grad_norm": 0.32353924162903186, "learning_rate": 5.943355514116033e-07, "loss": 0.2585, "step": 15543 }, { "epoch": 0.89, "grad_norm": 0.7305438124068102, "learning_rate": 5.937037302642101e-07, "loss": 0.3036, "step": 15544 }, { "epoch": 0.89, "grad_norm": 0.29228028451163485, "learning_rate": 5.930722348564533e-07, "loss": 0.1928, "step": 15545 }, { "epoch": 0.89, "grad_norm": 0.34765287698989866, "learning_rate": 5.924410652102009e-07, "loss": 0.255, "step": 15546 }, { "epoch": 0.89, "grad_norm": 0.3763887260898906, "learning_rate": 5.918102213473087e-07, "loss": 0.2487, "step": 15547 }, { "epoch": 0.89, "grad_norm": 0.6285264044725666, "learning_rate": 5.911797032896239e-07, "loss": 0.3656, "step": 15548 }, { "epoch": 0.89, "grad_norm": 0.33477221937072554, "learning_rate": 5.905495110589821e-07, "loss": 0.2613, "step": 15549 }, { "epoch": 0.89, "grad_norm": 1.252759326879859, "learning_rate": 5.89919644677206e-07, "loss": 0.8002, "step": 15550 }, { "epoch": 0.89, "grad_norm": 0.3344029733336263, "learning_rate": 5.892901041661092e-07, "loss": 0.2161, "step": 15551 }, { "epoch": 0.89, "grad_norm": 0.20909466560736703, "learning_rate": 5.886608895474888e-07, "loss": 0.1845, "step": 15552 }, { "epoch": 0.89, "grad_norm": 1.1886244262668104, "learning_rate": 5.880320008431384e-07, "loss": 0.6513, "step": 15553 }, { "epoch": 0.89, "grad_norm": 0.29300410427650325, "learning_rate": 5.874034380748362e-07, "loss": 0.2342, "step": 15554 }, { "epoch": 0.89, "grad_norm": 0.34717227520428, "learning_rate": 5.867752012643469e-07, "loss": 0.285, "step": 15555 }, { "epoch": 0.89, "grad_norm": 0.7185220879859632, "learning_rate": 5.861472904334287e-07, "loss": 0.3823, "step": 15556 }, { "epoch": 0.89, "grad_norm": 0.48824641014498565, "learning_rate": 5.855197056038231e-07, "loss": 0.1113, "step": 15557 }, { "epoch": 0.89, "grad_norm": 0.34978211070504545, "learning_rate": 5.848924467972661e-07, "loss": 0.2534, "step": 15558 }, { "epoch": 0.89, "grad_norm": 0.257049533946427, "learning_rate": 5.842655140354791e-07, "loss": 0.2372, "step": 15559 }, { "epoch": 0.89, "grad_norm": 0.596660439530161, "learning_rate": 5.836389073401727e-07, "loss": 0.283, "step": 15560 }, { "epoch": 0.89, "grad_norm": 0.31315917280327593, "learning_rate": 5.830126267330449e-07, "loss": 0.2764, "step": 15561 }, { "epoch": 0.89, "grad_norm": 0.35240639619956515, "learning_rate": 5.823866722357863e-07, "loss": 0.3343, "step": 15562 }, { "epoch": 0.89, "grad_norm": 1.620476298451587, "learning_rate": 5.817610438700716e-07, "loss": 0.5078, "step": 15563 }, { "epoch": 0.89, "grad_norm": 0.2704294455548528, "learning_rate": 5.811357416575681e-07, "loss": 0.1573, "step": 15564 }, { "epoch": 0.89, "grad_norm": 0.4428142185190504, "learning_rate": 5.805107656199272e-07, "loss": 0.2494, "step": 15565 }, { "epoch": 0.89, "grad_norm": 0.3292874439000769, "learning_rate": 5.79886115778795e-07, "loss": 0.2965, "step": 15566 }, { "epoch": 0.89, "grad_norm": 0.30624266788213034, "learning_rate": 5.792617921558008e-07, "loss": 0.1981, "step": 15567 }, { "epoch": 0.89, "grad_norm": 1.2556421070753658, "learning_rate": 5.786377947725652e-07, "loss": 0.736, "step": 15568 }, { "epoch": 0.89, "grad_norm": 1.5193881640144948, "learning_rate": 5.780141236506975e-07, "loss": 0.567, "step": 15569 }, { "epoch": 0.89, "grad_norm": 0.22623774423910054, "learning_rate": 5.77390778811796e-07, "loss": 0.2157, "step": 15570 }, { "epoch": 0.89, "grad_norm": 0.28603043110314136, "learning_rate": 5.767677602774469e-07, "loss": 0.1825, "step": 15571 }, { "epoch": 0.89, "grad_norm": 0.5968504572934171, "learning_rate": 5.761450680692249e-07, "loss": 0.3951, "step": 15572 }, { "epoch": 0.89, "grad_norm": 0.2821106282963114, "learning_rate": 5.755227022086918e-07, "loss": 0.1834, "step": 15573 }, { "epoch": 0.89, "grad_norm": 0.3511480956550723, "learning_rate": 5.749006627174048e-07, "loss": 0.3039, "step": 15574 }, { "epoch": 0.89, "grad_norm": 1.2740202637398323, "learning_rate": 5.742789496169021e-07, "loss": 0.4536, "step": 15575 }, { "epoch": 0.89, "grad_norm": 0.36967814771582713, "learning_rate": 5.736575629287145e-07, "loss": 0.2536, "step": 15576 }, { "epoch": 0.89, "grad_norm": 0.3182282890886471, "learning_rate": 5.730365026743579e-07, "loss": 0.2127, "step": 15577 }, { "epoch": 0.9, "grad_norm": 0.2597618110444984, "learning_rate": 5.72415768875344e-07, "loss": 0.2431, "step": 15578 }, { "epoch": 0.9, "grad_norm": 0.40873608355276525, "learning_rate": 5.717953615531668e-07, "loss": 0.2492, "step": 15579 }, { "epoch": 0.9, "grad_norm": 0.5488195331582982, "learning_rate": 5.711752807293102e-07, "loss": 0.2588, "step": 15580 }, { "epoch": 0.9, "grad_norm": 1.40896979648835, "learning_rate": 5.705555264252483e-07, "loss": 0.5422, "step": 15581 }, { "epoch": 0.9, "grad_norm": 0.2511592502937873, "learning_rate": 5.699360986624414e-07, "loss": 0.2355, "step": 15582 }, { "epoch": 0.9, "grad_norm": 0.5226165719071535, "learning_rate": 5.693169974623435e-07, "loss": 0.2665, "step": 15583 }, { "epoch": 0.9, "grad_norm": 0.4341487348411721, "learning_rate": 5.686982228463933e-07, "loss": 0.2977, "step": 15584 }, { "epoch": 0.9, "grad_norm": 0.2937040743182103, "learning_rate": 5.680797748360168e-07, "loss": 0.2368, "step": 15585 }, { "epoch": 0.9, "grad_norm": 0.24353668748816057, "learning_rate": 5.674616534526312e-07, "loss": 0.1934, "step": 15586 }, { "epoch": 0.9, "grad_norm": 1.674275057185165, "learning_rate": 5.66843858717645e-07, "loss": 0.4936, "step": 15587 }, { "epoch": 0.9, "grad_norm": 0.33392941409136523, "learning_rate": 5.66226390652449e-07, "loss": 0.2589, "step": 15588 }, { "epoch": 0.9, "grad_norm": 0.5883240032566877, "learning_rate": 5.656092492784282e-07, "loss": 0.3758, "step": 15589 }, { "epoch": 0.9, "grad_norm": 0.3373756640161482, "learning_rate": 5.649924346169522e-07, "loss": 0.2541, "step": 15590 }, { "epoch": 0.9, "grad_norm": 0.5502096680976466, "learning_rate": 5.643759466893839e-07, "loss": 0.3176, "step": 15591 }, { "epoch": 0.9, "grad_norm": 0.23347398396256974, "learning_rate": 5.637597855170707e-07, "loss": 0.1993, "step": 15592 }, { "epoch": 0.9, "grad_norm": 0.3502673113280859, "learning_rate": 5.631439511213499e-07, "loss": 0.2337, "step": 15593 }, { "epoch": 0.9, "grad_norm": 0.3869961913722, "learning_rate": 5.625284435235478e-07, "loss": 0.2529, "step": 15594 }, { "epoch": 0.9, "grad_norm": 0.676089532643328, "learning_rate": 5.619132627449797e-07, "loss": 0.3139, "step": 15595 }, { "epoch": 0.9, "grad_norm": 1.3271539215905461, "learning_rate": 5.612984088069507e-07, "loss": 0.3681, "step": 15596 }, { "epoch": 0.9, "grad_norm": 0.41527056555619096, "learning_rate": 5.606838817307514e-07, "loss": 0.2618, "step": 15597 }, { "epoch": 0.9, "grad_norm": 0.20296961040711048, "learning_rate": 5.600696815376639e-07, "loss": 0.207, "step": 15598 }, { "epoch": 0.9, "grad_norm": 0.7358714300620751, "learning_rate": 5.594558082489565e-07, "loss": 0.2504, "step": 15599 }, { "epoch": 0.9, "grad_norm": 0.37675397571544944, "learning_rate": 5.58842261885889e-07, "loss": 0.277, "step": 15600 }, { "epoch": 0.9, "grad_norm": 0.6241152115560328, "learning_rate": 5.582290424697078e-07, "loss": 0.2907, "step": 15601 }, { "epoch": 0.9, "grad_norm": 0.3432574165161779, "learning_rate": 5.576161500216481e-07, "loss": 0.3207, "step": 15602 }, { "epoch": 0.9, "grad_norm": 0.32805857188412496, "learning_rate": 5.570035845629362e-07, "loss": 0.207, "step": 15603 }, { "epoch": 0.9, "grad_norm": 0.3566758030118385, "learning_rate": 5.563913461147841e-07, "loss": 0.2272, "step": 15604 }, { "epoch": 0.9, "grad_norm": 0.33717272175655655, "learning_rate": 5.557794346983936e-07, "loss": 0.2613, "step": 15605 }, { "epoch": 0.9, "grad_norm": 0.25959042748500355, "learning_rate": 5.551678503349545e-07, "loss": 0.2154, "step": 15606 }, { "epoch": 0.9, "grad_norm": 0.8584149753122465, "learning_rate": 5.545565930456464e-07, "loss": 0.4831, "step": 15607 }, { "epoch": 0.9, "grad_norm": 1.2299027222034258, "learning_rate": 5.539456628516382e-07, "loss": 0.7181, "step": 15608 }, { "epoch": 0.9, "grad_norm": 0.7561475327791286, "learning_rate": 5.53335059774085e-07, "loss": 0.1436, "step": 15609 }, { "epoch": 0.9, "grad_norm": 0.2262751318435284, "learning_rate": 5.527247838341332e-07, "loss": 0.2338, "step": 15610 }, { "epoch": 0.9, "grad_norm": 0.40729868452013945, "learning_rate": 5.521148350529137e-07, "loss": 0.2774, "step": 15611 }, { "epoch": 0.9, "grad_norm": 0.8980545590794237, "learning_rate": 5.51505213451553e-07, "loss": 0.3379, "step": 15612 }, { "epoch": 0.9, "grad_norm": 0.3863824149031958, "learning_rate": 5.508959190511609e-07, "loss": 0.2108, "step": 15613 }, { "epoch": 0.9, "grad_norm": 0.33532397215502635, "learning_rate": 5.502869518728359e-07, "loss": 0.3238, "step": 15614 }, { "epoch": 0.9, "grad_norm": 0.5569176551550571, "learning_rate": 5.49678311937667e-07, "loss": 0.3709, "step": 15615 }, { "epoch": 0.9, "grad_norm": 0.36933007186294964, "learning_rate": 5.490699992667326e-07, "loss": 0.2373, "step": 15616 }, { "epoch": 0.9, "grad_norm": 0.45251797358873475, "learning_rate": 5.48462013881097e-07, "loss": 0.2836, "step": 15617 }, { "epoch": 0.9, "grad_norm": 0.23540557748561147, "learning_rate": 5.478543558018167e-07, "loss": 0.2141, "step": 15618 }, { "epoch": 0.9, "grad_norm": 0.4132520532778912, "learning_rate": 5.472470250499328e-07, "loss": 0.2118, "step": 15619 }, { "epoch": 0.9, "grad_norm": 1.2284634022109895, "learning_rate": 5.466400216464774e-07, "loss": 0.7019, "step": 15620 }, { "epoch": 0.9, "grad_norm": 0.3909616326979364, "learning_rate": 5.460333456124722e-07, "loss": 0.2444, "step": 15621 }, { "epoch": 0.9, "grad_norm": 0.280909579303553, "learning_rate": 5.454269969689252e-07, "loss": 0.2283, "step": 15622 }, { "epoch": 0.9, "grad_norm": 0.6943885695673067, "learning_rate": 5.448209757368361e-07, "loss": 0.3467, "step": 15623 }, { "epoch": 0.9, "grad_norm": 0.214468125396571, "learning_rate": 5.442152819371882e-07, "loss": 0.1708, "step": 15624 }, { "epoch": 0.9, "grad_norm": 0.5495278130056973, "learning_rate": 5.436099155909592e-07, "loss": 0.3312, "step": 15625 }, { "epoch": 0.9, "grad_norm": 0.314657048380129, "learning_rate": 5.430048767191121e-07, "loss": 0.2532, "step": 15626 }, { "epoch": 0.9, "grad_norm": 0.5585893096454551, "learning_rate": 5.424001653426003e-07, "loss": 0.3362, "step": 15627 }, { "epoch": 0.9, "grad_norm": 0.36554119112794337, "learning_rate": 5.417957814823627e-07, "loss": 0.2746, "step": 15628 }, { "epoch": 0.9, "grad_norm": 0.3853663692179569, "learning_rate": 5.411917251593313e-07, "loss": 0.2478, "step": 15629 }, { "epoch": 0.9, "grad_norm": 0.21621671494700923, "learning_rate": 5.405879963944238e-07, "loss": 0.1392, "step": 15630 }, { "epoch": 0.9, "grad_norm": 0.4678193698155111, "learning_rate": 5.39984595208547e-07, "loss": 0.2854, "step": 15631 }, { "epoch": 0.9, "grad_norm": 0.8065746449842706, "learning_rate": 5.393815216225972e-07, "loss": 0.3653, "step": 15632 }, { "epoch": 0.9, "grad_norm": 0.3145087220616073, "learning_rate": 5.387787756574592e-07, "loss": 0.2735, "step": 15633 }, { "epoch": 0.9, "grad_norm": 0.3790211085952469, "learning_rate": 5.381763573340049e-07, "loss": 0.2876, "step": 15634 }, { "epoch": 0.9, "grad_norm": 0.9396952767327181, "learning_rate": 5.375742666730955e-07, "loss": 0.2666, "step": 15635 }, { "epoch": 0.9, "grad_norm": 0.23651179482052748, "learning_rate": 5.36972503695582e-07, "loss": 0.1632, "step": 15636 }, { "epoch": 0.9, "grad_norm": 0.31046373469158267, "learning_rate": 5.363710684223045e-07, "loss": 0.264, "step": 15637 }, { "epoch": 0.9, "grad_norm": 0.48144032175554063, "learning_rate": 5.357699608740907e-07, "loss": 0.3497, "step": 15638 }, { "epoch": 0.9, "grad_norm": 0.538184412368237, "learning_rate": 5.351691810717552e-07, "loss": 0.2247, "step": 15639 }, { "epoch": 0.9, "grad_norm": 0.42659928158946236, "learning_rate": 5.345687290361035e-07, "loss": 0.3106, "step": 15640 }, { "epoch": 0.9, "grad_norm": 0.6164374849438661, "learning_rate": 5.339686047879311e-07, "loss": 0.3437, "step": 15641 }, { "epoch": 0.9, "grad_norm": 0.21254629178819517, "learning_rate": 5.333688083480182e-07, "loss": 0.1303, "step": 15642 }, { "epoch": 0.9, "grad_norm": 0.4149495701684598, "learning_rate": 5.327693397371369e-07, "loss": 0.2945, "step": 15643 }, { "epoch": 0.9, "grad_norm": 0.8395100725171276, "learning_rate": 5.321701989760452e-07, "loss": 0.4472, "step": 15644 }, { "epoch": 0.9, "grad_norm": 0.2746630354052698, "learning_rate": 5.315713860854921e-07, "loss": 0.2244, "step": 15645 }, { "epoch": 0.9, "grad_norm": 0.3804893461734729, "learning_rate": 5.309729010862163e-07, "loss": 0.3088, "step": 15646 }, { "epoch": 0.9, "grad_norm": 1.3065235001536424, "learning_rate": 5.303747439989415e-07, "loss": 0.4975, "step": 15647 }, { "epoch": 0.9, "grad_norm": 0.1564879560582931, "learning_rate": 5.29776914844382e-07, "loss": 0.0707, "step": 15648 }, { "epoch": 0.9, "grad_norm": 0.285253213852128, "learning_rate": 5.291794136432393e-07, "loss": 0.2486, "step": 15649 }, { "epoch": 0.9, "grad_norm": 0.5030656323905838, "learning_rate": 5.285822404162066e-07, "loss": 0.3373, "step": 15650 }, { "epoch": 0.9, "grad_norm": 0.7274055892952281, "learning_rate": 5.279853951839653e-07, "loss": 0.4009, "step": 15651 }, { "epoch": 0.9, "grad_norm": 0.3379741609185097, "learning_rate": 5.27388877967181e-07, "loss": 0.2087, "step": 15652 }, { "epoch": 0.9, "grad_norm": 0.5153461591870591, "learning_rate": 5.267926887865127e-07, "loss": 0.2932, "step": 15653 }, { "epoch": 0.9, "grad_norm": 0.33147772787988616, "learning_rate": 5.261968276626062e-07, "loss": 0.2518, "step": 15654 }, { "epoch": 0.9, "grad_norm": 0.2169728639824801, "learning_rate": 5.256012946160971e-07, "loss": 0.1588, "step": 15655 }, { "epoch": 0.9, "grad_norm": 0.7769396824390693, "learning_rate": 5.250060896676068e-07, "loss": 0.4372, "step": 15656 }, { "epoch": 0.9, "grad_norm": 0.3540478962758659, "learning_rate": 5.244112128377477e-07, "loss": 0.2981, "step": 15657 }, { "epoch": 0.9, "grad_norm": 0.31449744620930736, "learning_rate": 5.238166641471221e-07, "loss": 0.2103, "step": 15658 }, { "epoch": 0.9, "grad_norm": 1.3570129545424443, "learning_rate": 5.23222443616318e-07, "loss": 0.4924, "step": 15659 }, { "epoch": 0.9, "grad_norm": 0.28912954651128253, "learning_rate": 5.226285512659123e-07, "loss": 0.1437, "step": 15660 }, { "epoch": 0.9, "grad_norm": 0.2773332070677685, "learning_rate": 5.220349871164732e-07, "loss": 0.2187, "step": 15661 }, { "epoch": 0.9, "grad_norm": 0.4015892649555536, "learning_rate": 5.214417511885539e-07, "loss": 0.308, "step": 15662 }, { "epoch": 0.9, "grad_norm": 0.6413421071522666, "learning_rate": 5.208488435026992e-07, "loss": 0.4395, "step": 15663 }, { "epoch": 0.9, "grad_norm": 0.4767384537292822, "learning_rate": 5.202562640794429e-07, "loss": 0.2826, "step": 15664 }, { "epoch": 0.9, "grad_norm": 0.38488084441750964, "learning_rate": 5.196640129393038e-07, "loss": 0.2458, "step": 15665 }, { "epoch": 0.9, "grad_norm": 1.3259987938419826, "learning_rate": 5.190720901027901e-07, "loss": 0.5462, "step": 15666 }, { "epoch": 0.9, "grad_norm": 0.2478349774975706, "learning_rate": 5.184804955904066e-07, "loss": 0.2062, "step": 15667 }, { "epoch": 0.9, "grad_norm": 0.5459979152515864, "learning_rate": 5.178892294226334e-07, "loss": 0.3124, "step": 15668 }, { "epoch": 0.9, "grad_norm": 0.35238957217762756, "learning_rate": 5.172982916199465e-07, "loss": 0.3126, "step": 15669 }, { "epoch": 0.9, "grad_norm": 0.3040986720543595, "learning_rate": 5.167076822028149e-07, "loss": 0.2522, "step": 15670 }, { "epoch": 0.9, "grad_norm": 0.23105199235660073, "learning_rate": 5.16117401191687e-07, "loss": 0.0663, "step": 15671 }, { "epoch": 0.9, "grad_norm": 0.4992153585528372, "learning_rate": 5.155274486070072e-07, "loss": 0.3346, "step": 15672 }, { "epoch": 0.9, "grad_norm": 0.261839200482087, "learning_rate": 5.149378244692027e-07, "loss": 0.2322, "step": 15673 }, { "epoch": 0.9, "grad_norm": 0.5089769058171006, "learning_rate": 5.143485287986927e-07, "loss": 0.2294, "step": 15674 }, { "epoch": 0.9, "grad_norm": 0.3794901247148647, "learning_rate": 5.137595616158863e-07, "loss": 0.2656, "step": 15675 }, { "epoch": 0.9, "grad_norm": 0.33643466430036256, "learning_rate": 5.131709229411785e-07, "loss": 0.2438, "step": 15676 }, { "epoch": 0.9, "grad_norm": 0.3647097642493535, "learning_rate": 5.12582612794954e-07, "loss": 0.282, "step": 15677 }, { "epoch": 0.9, "grad_norm": 0.7738986859788042, "learning_rate": 5.119946311975843e-07, "loss": 0.2076, "step": 15678 }, { "epoch": 0.9, "grad_norm": 0.34867075246133533, "learning_rate": 5.114069781694331e-07, "loss": 0.2548, "step": 15679 }, { "epoch": 0.9, "grad_norm": 0.6216173151236968, "learning_rate": 5.108196537308507e-07, "loss": 0.3724, "step": 15680 }, { "epoch": 0.9, "grad_norm": 0.2894107217697507, "learning_rate": 5.102326579021754e-07, "loss": 0.2367, "step": 15681 }, { "epoch": 0.9, "grad_norm": 0.33805317461218487, "learning_rate": 5.096459907037344e-07, "loss": 0.2521, "step": 15682 }, { "epoch": 0.9, "grad_norm": 0.30537643202596476, "learning_rate": 5.090596521558455e-07, "loss": 0.1848, "step": 15683 }, { "epoch": 0.9, "grad_norm": 0.44721678914316987, "learning_rate": 5.084736422788128e-07, "loss": 0.2464, "step": 15684 }, { "epoch": 0.9, "grad_norm": 0.25725829442115, "learning_rate": 5.078879610929299e-07, "loss": 0.2437, "step": 15685 }, { "epoch": 0.9, "grad_norm": 0.6813280238885259, "learning_rate": 5.073026086184785e-07, "loss": 0.4465, "step": 15686 }, { "epoch": 0.9, "grad_norm": 0.22755669708281842, "learning_rate": 5.067175848757288e-07, "loss": 0.1386, "step": 15687 }, { "epoch": 0.9, "grad_norm": 0.23563842364379814, "learning_rate": 5.061328898849416e-07, "loss": 0.1959, "step": 15688 }, { "epoch": 0.9, "grad_norm": 0.3534225275012728, "learning_rate": 5.055485236663638e-07, "loss": 0.2719, "step": 15689 }, { "epoch": 0.9, "grad_norm": 0.6615023017392966, "learning_rate": 5.049644862402336e-07, "loss": 0.3489, "step": 15690 }, { "epoch": 0.9, "grad_norm": 0.30468397890498616, "learning_rate": 5.043807776267729e-07, "loss": 0.203, "step": 15691 }, { "epoch": 0.9, "grad_norm": 1.2458868096231006, "learning_rate": 5.037973978461985e-07, "loss": 0.6232, "step": 15692 }, { "epoch": 0.9, "grad_norm": 0.3412161058565019, "learning_rate": 5.032143469187123e-07, "loss": 0.3043, "step": 15693 }, { "epoch": 0.9, "grad_norm": 0.2687736892933335, "learning_rate": 5.026316248645047e-07, "loss": 0.1856, "step": 15694 }, { "epoch": 0.9, "grad_norm": 0.2705685324835451, "learning_rate": 5.020492317037539e-07, "loss": 0.1572, "step": 15695 }, { "epoch": 0.9, "grad_norm": 0.4139012116678503, "learning_rate": 5.014671674566319e-07, "loss": 0.3283, "step": 15696 }, { "epoch": 0.9, "grad_norm": 0.304048179285271, "learning_rate": 5.008854321432932e-07, "loss": 0.1848, "step": 15697 }, { "epoch": 0.9, "grad_norm": 0.5339104269414403, "learning_rate": 5.003040257838831e-07, "loss": 0.3313, "step": 15698 }, { "epoch": 0.9, "grad_norm": 1.1617194889579199, "learning_rate": 4.997229483985366e-07, "loss": 0.4965, "step": 15699 }, { "epoch": 0.9, "grad_norm": 0.4026837415595859, "learning_rate": 4.991422000073753e-07, "loss": 0.2646, "step": 15700 }, { "epoch": 0.9, "grad_norm": 0.21280087447699006, "learning_rate": 4.985617806305121e-07, "loss": 0.1822, "step": 15701 }, { "epoch": 0.9, "grad_norm": 0.7633811247449117, "learning_rate": 4.979816902880441e-07, "loss": 0.3467, "step": 15702 }, { "epoch": 0.9, "grad_norm": 0.3855766888818858, "learning_rate": 4.97401929000062e-07, "loss": 0.2693, "step": 15703 }, { "epoch": 0.9, "grad_norm": 0.34135917725265313, "learning_rate": 4.968224967866431e-07, "loss": 0.2474, "step": 15704 }, { "epoch": 0.9, "grad_norm": 0.4960999651515975, "learning_rate": 4.962433936678523e-07, "loss": 0.3536, "step": 15705 }, { "epoch": 0.9, "grad_norm": 0.4109252916237451, "learning_rate": 4.956646196637438e-07, "loss": 0.2418, "step": 15706 }, { "epoch": 0.9, "grad_norm": 0.3962821887700003, "learning_rate": 4.950861747943603e-07, "loss": 0.203, "step": 15707 }, { "epoch": 0.9, "grad_norm": 0.32311958760614523, "learning_rate": 4.945080590797346e-07, "loss": 0.2377, "step": 15708 }, { "epoch": 0.9, "grad_norm": 0.2812146473652669, "learning_rate": 4.939302725398865e-07, "loss": 0.2372, "step": 15709 }, { "epoch": 0.9, "grad_norm": 1.489036443344534, "learning_rate": 4.933528151948241e-07, "loss": 0.2296, "step": 15710 }, { "epoch": 0.9, "grad_norm": 1.1842550860891952, "learning_rate": 4.92775687064545e-07, "loss": 0.8144, "step": 15711 }, { "epoch": 0.9, "grad_norm": 0.3325210959542368, "learning_rate": 4.921988881690332e-07, "loss": 0.2314, "step": 15712 }, { "epoch": 0.9, "grad_norm": 0.4057706489840598, "learning_rate": 4.91622418528267e-07, "loss": 0.3007, "step": 15713 }, { "epoch": 0.9, "grad_norm": 0.33307672193546267, "learning_rate": 4.910462781622072e-07, "loss": 0.1754, "step": 15714 }, { "epoch": 0.9, "grad_norm": 0.6227748994436654, "learning_rate": 4.904704670908067e-07, "loss": 0.3603, "step": 15715 }, { "epoch": 0.9, "grad_norm": 0.42078372345346, "learning_rate": 4.89894985334003e-07, "loss": 0.2779, "step": 15716 }, { "epoch": 0.9, "grad_norm": 0.30774036410484645, "learning_rate": 4.893198329117277e-07, "loss": 0.2554, "step": 15717 }, { "epoch": 0.9, "grad_norm": 0.41423426847798006, "learning_rate": 4.887450098438984e-07, "loss": 0.2504, "step": 15718 }, { "epoch": 0.9, "grad_norm": 0.4670151159835837, "learning_rate": 4.881705161504202e-07, "loss": 0.3101, "step": 15719 }, { "epoch": 0.9, "grad_norm": 0.21198178399623216, "learning_rate": 4.875963518511872e-07, "loss": 0.1375, "step": 15720 }, { "epoch": 0.9, "grad_norm": 0.3436783552350103, "learning_rate": 4.870225169660836e-07, "loss": 0.2452, "step": 15721 }, { "epoch": 0.9, "grad_norm": 0.6392564449176125, "learning_rate": 4.864490115149823e-07, "loss": 0.3374, "step": 15722 }, { "epoch": 0.9, "grad_norm": 0.781527813043474, "learning_rate": 4.858758355177418e-07, "loss": 0.337, "step": 15723 }, { "epoch": 0.9, "grad_norm": 0.3130251635811382, "learning_rate": 4.853029889942129e-07, "loss": 0.2628, "step": 15724 }, { "epoch": 0.9, "grad_norm": 0.35452990662259, "learning_rate": 4.84730471964231e-07, "loss": 0.2988, "step": 15725 }, { "epoch": 0.9, "grad_norm": 0.41564857624579266, "learning_rate": 4.841582844476244e-07, "loss": 0.2544, "step": 15726 }, { "epoch": 0.9, "grad_norm": 0.2698526571349199, "learning_rate": 4.835864264642076e-07, "loss": 0.1338, "step": 15727 }, { "epoch": 0.9, "grad_norm": 0.5453059779722633, "learning_rate": 4.830148980337834e-07, "loss": 0.3428, "step": 15728 }, { "epoch": 0.9, "grad_norm": 0.3471356881056745, "learning_rate": 4.824436991761428e-07, "loss": 0.3259, "step": 15729 }, { "epoch": 0.9, "grad_norm": 0.34684817296127346, "learning_rate": 4.818728299110686e-07, "loss": 0.2164, "step": 15730 }, { "epoch": 0.9, "grad_norm": 0.8953070877213622, "learning_rate": 4.813022902583286e-07, "loss": 0.4185, "step": 15731 }, { "epoch": 0.9, "grad_norm": 0.41813356179096717, "learning_rate": 4.807320802376824e-07, "loss": 0.2948, "step": 15732 }, { "epoch": 0.9, "grad_norm": 0.1447814308629961, "learning_rate": 4.801621998688722e-07, "loss": 0.0716, "step": 15733 }, { "epoch": 0.9, "grad_norm": 0.3883904878065946, "learning_rate": 4.795926491716396e-07, "loss": 0.3139, "step": 15734 }, { "epoch": 0.9, "grad_norm": 0.6516174961256956, "learning_rate": 4.790234281657025e-07, "loss": 0.3845, "step": 15735 }, { "epoch": 0.9, "grad_norm": 0.3672484706266206, "learning_rate": 4.784545368707738e-07, "loss": 0.204, "step": 15736 }, { "epoch": 0.9, "grad_norm": 0.32729802797217145, "learning_rate": 4.778859753065545e-07, "loss": 0.2943, "step": 15737 }, { "epoch": 0.9, "grad_norm": 0.4014564746228084, "learning_rate": 4.773177434927356e-07, "loss": 0.1903, "step": 15738 }, { "epoch": 0.9, "grad_norm": 0.42003454062639795, "learning_rate": 4.767498414489935e-07, "loss": 0.2329, "step": 15739 }, { "epoch": 0.9, "grad_norm": 0.25069726353847577, "learning_rate": 4.7618226919499465e-07, "loss": 0.233, "step": 15740 }, { "epoch": 0.9, "grad_norm": 0.7447684355120814, "learning_rate": 4.756150267503934e-07, "loss": 0.4077, "step": 15741 }, { "epoch": 0.9, "grad_norm": 0.5399344736483198, "learning_rate": 4.750481141348362e-07, "loss": 0.3129, "step": 15742 }, { "epoch": 0.9, "grad_norm": 0.34196695458758986, "learning_rate": 4.7448153136795185e-07, "loss": 0.2277, "step": 15743 }, { "epoch": 0.9, "grad_norm": 0.38306387196871694, "learning_rate": 4.739152784693635e-07, "loss": 0.2897, "step": 15744 }, { "epoch": 0.9, "grad_norm": 0.2161941592051655, "learning_rate": 4.733493554586777e-07, "loss": 0.1393, "step": 15745 }, { "epoch": 0.9, "grad_norm": 0.359758705296901, "learning_rate": 4.727837623554954e-07, "loss": 0.2245, "step": 15746 }, { "epoch": 0.9, "grad_norm": 0.6364944282856332, "learning_rate": 4.722184991794021e-07, "loss": 0.368, "step": 15747 }, { "epoch": 0.9, "grad_norm": 0.29869548642001714, "learning_rate": 4.7165356594997215e-07, "loss": 0.2794, "step": 15748 }, { "epoch": 0.9, "grad_norm": 0.3514622933369766, "learning_rate": 4.710889626867687e-07, "loss": 0.2274, "step": 15749 }, { "epoch": 0.9, "grad_norm": 0.534507202273457, "learning_rate": 4.7052468940934405e-07, "loss": 0.2445, "step": 15750 }, { "epoch": 0.9, "grad_norm": 0.40165702496329014, "learning_rate": 4.699607461372413e-07, "loss": 0.2209, "step": 15751 }, { "epoch": 0.91, "grad_norm": 0.29933132482387254, "learning_rate": 4.6939713288998824e-07, "loss": 0.2888, "step": 15752 }, { "epoch": 0.91, "grad_norm": 0.39195373443976395, "learning_rate": 4.6883384968710146e-07, "loss": 0.275, "step": 15753 }, { "epoch": 0.91, "grad_norm": 0.804794875047898, "learning_rate": 4.682708965480887e-07, "loss": 0.447, "step": 15754 }, { "epoch": 0.91, "grad_norm": 0.38284619581894447, "learning_rate": 4.677082734924454e-07, "loss": 0.2555, "step": 15755 }, { "epoch": 0.91, "grad_norm": 0.34044252869584235, "learning_rate": 4.67145980539655e-07, "loss": 0.2351, "step": 15756 }, { "epoch": 0.91, "grad_norm": 0.2561805020204338, "learning_rate": 4.665840177091885e-07, "loss": 0.1603, "step": 15757 }, { "epoch": 0.91, "grad_norm": 0.3286363690720469, "learning_rate": 4.66022385020507e-07, "loss": 0.2712, "step": 15758 }, { "epoch": 0.91, "grad_norm": 0.7834022205802545, "learning_rate": 4.6546108249306163e-07, "loss": 0.3245, "step": 15759 }, { "epoch": 0.91, "grad_norm": 0.2849494240717211, "learning_rate": 4.649001101462891e-07, "loss": 0.2749, "step": 15760 }, { "epoch": 0.91, "grad_norm": 0.3713613181819242, "learning_rate": 4.6433946799961605e-07, "loss": 0.2917, "step": 15761 }, { "epoch": 0.91, "grad_norm": 1.3943161939139344, "learning_rate": 4.6377915607245583e-07, "loss": 0.218, "step": 15762 }, { "epoch": 0.91, "grad_norm": 0.35360704239027846, "learning_rate": 4.6321917438421294e-07, "loss": 0.2162, "step": 15763 }, { "epoch": 0.91, "grad_norm": 0.29520125023410765, "learning_rate": 4.626595229542818e-07, "loss": 0.2779, "step": 15764 }, { "epoch": 0.91, "grad_norm": 0.40025625010104504, "learning_rate": 4.621002018020404e-07, "loss": 0.3336, "step": 15765 }, { "epoch": 0.91, "grad_norm": 0.18183834726311107, "learning_rate": 4.615412109468587e-07, "loss": 0.1206, "step": 15766 }, { "epoch": 0.91, "grad_norm": 0.3861098147618549, "learning_rate": 4.6098255040809447e-07, "loss": 0.275, "step": 15767 }, { "epoch": 0.91, "grad_norm": 0.39267454900302395, "learning_rate": 4.604242202050957e-07, "loss": 0.2835, "step": 15768 }, { "epoch": 0.91, "grad_norm": 0.8010348120564672, "learning_rate": 4.5986622035719575e-07, "loss": 0.1828, "step": 15769 }, { "epoch": 0.91, "grad_norm": 0.35292055566037195, "learning_rate": 4.59308550883717e-07, "loss": 0.2744, "step": 15770 }, { "epoch": 0.91, "grad_norm": 0.44243941838987, "learning_rate": 4.5875121180397276e-07, "loss": 0.2978, "step": 15771 }, { "epoch": 0.91, "grad_norm": 0.29214452693872744, "learning_rate": 4.581942031372655e-07, "loss": 0.2196, "step": 15772 }, { "epoch": 0.91, "grad_norm": 0.3078542986697614, "learning_rate": 4.5763752490288194e-07, "loss": 0.242, "step": 15773 }, { "epoch": 0.91, "grad_norm": 0.8844199684923539, "learning_rate": 4.570811771201e-07, "loss": 0.4156, "step": 15774 }, { "epoch": 0.91, "grad_norm": 0.6335765581868591, "learning_rate": 4.5652515980818546e-07, "loss": 0.3745, "step": 15775 }, { "epoch": 0.91, "grad_norm": 0.22674350740370733, "learning_rate": 4.5596947298639614e-07, "loss": 0.212, "step": 15776 }, { "epoch": 0.91, "grad_norm": 1.7659474533173485, "learning_rate": 4.554141166739734e-07, "loss": 0.5087, "step": 15777 }, { "epoch": 0.91, "grad_norm": 0.2534645385419527, "learning_rate": 4.548590908901496e-07, "loss": 0.1883, "step": 15778 }, { "epoch": 0.91, "grad_norm": 0.283779004129321, "learning_rate": 4.5430439565414263e-07, "loss": 0.1839, "step": 15779 }, { "epoch": 0.91, "grad_norm": 0.3476013206398067, "learning_rate": 4.5375003098516613e-07, "loss": 0.2717, "step": 15780 }, { "epoch": 0.91, "grad_norm": 0.5541988538944898, "learning_rate": 4.5319599690241576e-07, "loss": 0.3387, "step": 15781 }, { "epoch": 0.91, "grad_norm": 0.34841705919786675, "learning_rate": 4.5264229342507736e-07, "loss": 0.2278, "step": 15782 }, { "epoch": 0.91, "grad_norm": 1.2826263441085395, "learning_rate": 4.5208892057232446e-07, "loss": 0.6443, "step": 15783 }, { "epoch": 0.91, "grad_norm": 0.25060351873355025, "learning_rate": 4.515358783633228e-07, "loss": 0.238, "step": 15784 }, { "epoch": 0.91, "grad_norm": 0.2457619049739712, "learning_rate": 4.5098316681722266e-07, "loss": 0.1596, "step": 15785 }, { "epoch": 0.91, "grad_norm": 0.7165815667752252, "learning_rate": 4.5043078595316536e-07, "loss": 0.3654, "step": 15786 }, { "epoch": 0.91, "grad_norm": 0.45627630512144096, "learning_rate": 4.4987873579027784e-07, "loss": 0.3349, "step": 15787 }, { "epoch": 0.91, "grad_norm": 0.3075011835404249, "learning_rate": 4.493270163476804e-07, "loss": 0.2568, "step": 15788 }, { "epoch": 0.91, "grad_norm": 0.48719044332200573, "learning_rate": 4.4877562764447766e-07, "loss": 0.2597, "step": 15789 }, { "epoch": 0.91, "grad_norm": 0.5000417018144054, "learning_rate": 4.4822456969976444e-07, "loss": 0.2893, "step": 15790 }, { "epoch": 0.91, "grad_norm": 0.2556509931355065, "learning_rate": 4.4767384253262326e-07, "loss": 0.2022, "step": 15791 }, { "epoch": 0.91, "grad_norm": 0.3334183800000595, "learning_rate": 4.4712344616212433e-07, "loss": 0.246, "step": 15792 }, { "epoch": 0.91, "grad_norm": 0.978515196075592, "learning_rate": 4.4657338060733246e-07, "loss": 0.4858, "step": 15793 }, { "epoch": 0.91, "grad_norm": 0.3399680398954725, "learning_rate": 4.4602364588729243e-07, "loss": 0.2275, "step": 15794 }, { "epoch": 0.91, "grad_norm": 1.5585273086353948, "learning_rate": 4.454742420210434e-07, "loss": 0.3749, "step": 15795 }, { "epoch": 0.91, "grad_norm": 0.3308699135535816, "learning_rate": 4.44925169027608e-07, "loss": 0.3088, "step": 15796 }, { "epoch": 0.91, "grad_norm": 0.24890530106124995, "learning_rate": 4.4437642692600534e-07, "loss": 0.1998, "step": 15797 }, { "epoch": 0.91, "grad_norm": 0.42886569435392907, "learning_rate": 4.4382801573523595e-07, "loss": 0.1926, "step": 15798 }, { "epoch": 0.91, "grad_norm": 0.3308619773627193, "learning_rate": 4.4327993547429225e-07, "loss": 0.2951, "step": 15799 }, { "epoch": 0.91, "grad_norm": 0.32885301500455993, "learning_rate": 4.427321861621514e-07, "loss": 0.2736, "step": 15800 }, { "epoch": 0.91, "grad_norm": 1.1618546884805907, "learning_rate": 4.4218476781778483e-07, "loss": 0.5503, "step": 15801 }, { "epoch": 0.91, "grad_norm": 0.35167729216980037, "learning_rate": 4.416376804601508e-07, "loss": 0.1287, "step": 15802 }, { "epoch": 0.91, "grad_norm": 0.3502201067992776, "learning_rate": 4.410909241081918e-07, "loss": 0.2668, "step": 15803 }, { "epoch": 0.91, "grad_norm": 0.3340832982352592, "learning_rate": 4.405444987808405e-07, "loss": 0.2879, "step": 15804 }, { "epoch": 0.91, "grad_norm": 0.43546774821008033, "learning_rate": 4.39998404497024e-07, "loss": 0.1536, "step": 15805 }, { "epoch": 0.91, "grad_norm": 0.3112353537779798, "learning_rate": 4.3945264127565166e-07, "loss": 0.2528, "step": 15806 }, { "epoch": 0.91, "grad_norm": 0.551693833136559, "learning_rate": 4.389072091356239e-07, "loss": 0.3786, "step": 15807 }, { "epoch": 0.91, "grad_norm": 0.49880144907478224, "learning_rate": 4.383621080958267e-07, "loss": 0.2335, "step": 15808 }, { "epoch": 0.91, "grad_norm": 0.3142694304193792, "learning_rate": 4.378173381751394e-07, "loss": 0.2585, "step": 15809 }, { "epoch": 0.91, "grad_norm": 0.40112648470346424, "learning_rate": 4.372728993924269e-07, "loss": 0.266, "step": 15810 }, { "epoch": 0.91, "grad_norm": 0.2930212171494269, "learning_rate": 4.3672879176654303e-07, "loss": 0.1907, "step": 15811 }, { "epoch": 0.91, "grad_norm": 0.34939185440615567, "learning_rate": 4.3618501531632717e-07, "loss": 0.282, "step": 15812 }, { "epoch": 0.91, "grad_norm": 1.1777285605456114, "learning_rate": 4.3564157006061535e-07, "loss": 0.685, "step": 15813 }, { "epoch": 0.91, "grad_norm": 0.7188027543719689, "learning_rate": 4.3509845601822474e-07, "loss": 0.4602, "step": 15814 }, { "epoch": 0.91, "grad_norm": 0.30789053770206504, "learning_rate": 4.3455567320796366e-07, "loss": 0.216, "step": 15815 }, { "epoch": 0.91, "grad_norm": 0.37512687043786214, "learning_rate": 4.34013221648627e-07, "loss": 0.3312, "step": 15816 }, { "epoch": 0.91, "grad_norm": 0.3616392023467653, "learning_rate": 4.3347110135900094e-07, "loss": 0.1928, "step": 15817 }, { "epoch": 0.91, "grad_norm": 0.31199271737258344, "learning_rate": 4.329293123578604e-07, "loss": 0.1853, "step": 15818 }, { "epoch": 0.91, "grad_norm": 0.4895600138237852, "learning_rate": 4.3238785466396596e-07, "loss": 0.3757, "step": 15819 }, { "epoch": 0.91, "grad_norm": 0.45924672686202866, "learning_rate": 4.318467282960681e-07, "loss": 0.3507, "step": 15820 }, { "epoch": 0.91, "grad_norm": 0.3120934451623118, "learning_rate": 4.3130593327290637e-07, "loss": 0.1888, "step": 15821 }, { "epoch": 0.91, "grad_norm": 0.770298360679297, "learning_rate": 4.307654696132102e-07, "loss": 0.3835, "step": 15822 }, { "epoch": 0.91, "grad_norm": 0.21490372065099173, "learning_rate": 4.302253373356935e-07, "loss": 0.2062, "step": 15823 }, { "epoch": 0.91, "grad_norm": 0.2986581235557719, "learning_rate": 4.296855364590624e-07, "loss": 0.1844, "step": 15824 }, { "epoch": 0.91, "grad_norm": 1.069549265915451, "learning_rate": 4.2914606700200755e-07, "loss": 0.7423, "step": 15825 }, { "epoch": 0.91, "grad_norm": 0.6599039669777897, "learning_rate": 4.286069289832151e-07, "loss": 0.3809, "step": 15826 }, { "epoch": 0.91, "grad_norm": 0.419239808764103, "learning_rate": 4.280681224213523e-07, "loss": 0.2807, "step": 15827 }, { "epoch": 0.91, "grad_norm": 0.31727020613520757, "learning_rate": 4.2752964733507984e-07, "loss": 0.2382, "step": 15828 }, { "epoch": 0.91, "grad_norm": 0.29397819184063156, "learning_rate": 4.2699150374304275e-07, "loss": 0.1889, "step": 15829 }, { "epoch": 0.91, "grad_norm": 0.4446204152271284, "learning_rate": 4.2645369166387727e-07, "loss": 0.2613, "step": 15830 }, { "epoch": 0.91, "grad_norm": 0.39269983702966865, "learning_rate": 4.259162111162107e-07, "loss": 0.2553, "step": 15831 }, { "epoch": 0.91, "grad_norm": 0.45515828284942017, "learning_rate": 4.2537906211865375e-07, "loss": 0.3144, "step": 15832 }, { "epoch": 0.91, "grad_norm": 0.3112784755800918, "learning_rate": 4.2484224468980815e-07, "loss": 0.2632, "step": 15833 }, { "epoch": 0.91, "grad_norm": 1.8587599044679906, "learning_rate": 4.243057588482624e-07, "loss": 0.2075, "step": 15834 }, { "epoch": 0.91, "grad_norm": 0.23261596393289166, "learning_rate": 4.237696046125994e-07, "loss": 0.2007, "step": 15835 }, { "epoch": 0.91, "grad_norm": 0.4171560765483998, "learning_rate": 4.232337820013821e-07, "loss": 0.2908, "step": 15836 }, { "epoch": 0.91, "grad_norm": 0.6894450007615852, "learning_rate": 4.226982910331656e-07, "loss": 0.2665, "step": 15837 }, { "epoch": 0.91, "grad_norm": 0.631586559127634, "learning_rate": 4.2216313172649623e-07, "loss": 0.3694, "step": 15838 }, { "epoch": 0.91, "grad_norm": 0.3701344195302594, "learning_rate": 4.2162830409990583e-07, "loss": 0.2664, "step": 15839 }, { "epoch": 0.91, "grad_norm": 0.32238862938044927, "learning_rate": 4.2109380817191626e-07, "loss": 0.2694, "step": 15840 }, { "epoch": 0.91, "grad_norm": 0.36861815680269605, "learning_rate": 4.205596439610349e-07, "loss": 0.1159, "step": 15841 }, { "epoch": 0.91, "grad_norm": 0.3877743415272305, "learning_rate": 4.2002581148576136e-07, "loss": 0.2594, "step": 15842 }, { "epoch": 0.91, "grad_norm": 0.35220346095956107, "learning_rate": 4.194923107645821e-07, "loss": 0.3235, "step": 15843 }, { "epoch": 0.91, "grad_norm": 0.5239046538366025, "learning_rate": 4.189591418159722e-07, "loss": 0.2708, "step": 15844 }, { "epoch": 0.91, "grad_norm": 0.39195351825581, "learning_rate": 4.1842630465839586e-07, "loss": 0.2784, "step": 15845 }, { "epoch": 0.91, "grad_norm": 0.5495615089399805, "learning_rate": 4.178937993103027e-07, "loss": 0.3197, "step": 15846 }, { "epoch": 0.91, "grad_norm": 0.27379809113010045, "learning_rate": 4.1736162579013694e-07, "loss": 0.1906, "step": 15847 }, { "epoch": 0.91, "grad_norm": 0.2684468644693002, "learning_rate": 4.16829784116326e-07, "loss": 0.2042, "step": 15848 }, { "epoch": 0.91, "grad_norm": 0.4441215646728875, "learning_rate": 4.1629827430728743e-07, "loss": 0.2991, "step": 15849 }, { "epoch": 0.91, "grad_norm": 0.7808231682802714, "learning_rate": 4.157670963814264e-07, "loss": 0.3301, "step": 15850 }, { "epoch": 0.91, "grad_norm": 0.2581293927684206, "learning_rate": 4.1523625035713943e-07, "loss": 0.2583, "step": 15851 }, { "epoch": 0.91, "grad_norm": 0.6004003120556647, "learning_rate": 4.147057362528095e-07, "loss": 0.3379, "step": 15852 }, { "epoch": 0.91, "grad_norm": 0.5608318585976009, "learning_rate": 4.141755540868075e-07, "loss": 0.2723, "step": 15853 }, { "epoch": 0.91, "grad_norm": 0.3815204124123482, "learning_rate": 4.1364570387749324e-07, "loss": 0.213, "step": 15854 }, { "epoch": 0.91, "grad_norm": 0.3536760296474735, "learning_rate": 4.1311618564321534e-07, "loss": 0.2938, "step": 15855 }, { "epoch": 0.91, "grad_norm": 0.4356058557524649, "learning_rate": 4.1258699940231353e-07, "loss": 0.355, "step": 15856 }, { "epoch": 0.91, "grad_norm": 0.18578229436004529, "learning_rate": 4.120581451731109e-07, "loss": 0.0713, "step": 15857 }, { "epoch": 0.91, "grad_norm": 0.4081226800618796, "learning_rate": 4.1152962297392297e-07, "loss": 0.273, "step": 15858 }, { "epoch": 0.91, "grad_norm": 0.37698659558030323, "learning_rate": 4.110014328230505e-07, "loss": 0.3109, "step": 15859 }, { "epoch": 0.91, "grad_norm": 0.5250626358401506, "learning_rate": 4.104735747387867e-07, "loss": 0.2035, "step": 15860 }, { "epoch": 0.91, "grad_norm": 0.3725837020347488, "learning_rate": 4.099460487394114e-07, "loss": 0.3016, "step": 15861 }, { "epoch": 0.91, "grad_norm": 1.2275656487782942, "learning_rate": 4.09418854843191e-07, "loss": 0.7537, "step": 15862 }, { "epoch": 0.91, "grad_norm": 0.2383154149448102, "learning_rate": 4.0889199306838323e-07, "loss": 0.2219, "step": 15863 }, { "epoch": 0.91, "grad_norm": 0.3139643851031367, "learning_rate": 4.083654634332335e-07, "loss": 0.1762, "step": 15864 }, { "epoch": 0.91, "grad_norm": 0.9530704152814369, "learning_rate": 4.07839265955976e-07, "loss": 0.4449, "step": 15865 }, { "epoch": 0.91, "grad_norm": 0.5492062970741735, "learning_rate": 4.073134006548318e-07, "loss": 0.2962, "step": 15866 }, { "epoch": 0.91, "grad_norm": 0.25208480398575406, "learning_rate": 4.06787867548013e-07, "loss": 0.2353, "step": 15867 }, { "epoch": 0.91, "grad_norm": 1.1304469249273643, "learning_rate": 4.062626666537162e-07, "loss": 0.8001, "step": 15868 }, { "epoch": 0.91, "grad_norm": 0.19056843516548483, "learning_rate": 4.0573779799013226e-07, "loss": 0.1272, "step": 15869 }, { "epoch": 0.91, "grad_norm": 0.3996361879336976, "learning_rate": 4.0521326157543563e-07, "loss": 0.2277, "step": 15870 }, { "epoch": 0.91, "grad_norm": 0.3637325778437535, "learning_rate": 4.046890574277895e-07, "loss": 0.2886, "step": 15871 }, { "epoch": 0.91, "grad_norm": 0.5237881779689043, "learning_rate": 4.0416518556534944e-07, "loss": 0.3128, "step": 15872 }, { "epoch": 0.91, "grad_norm": 0.3676819772907118, "learning_rate": 4.0364164600625753e-07, "loss": 0.2235, "step": 15873 }, { "epoch": 0.91, "grad_norm": 0.47406659233561166, "learning_rate": 4.0311843876864155e-07, "loss": 0.3483, "step": 15874 }, { "epoch": 0.91, "grad_norm": 0.2780747777411099, "learning_rate": 4.025955638706203e-07, "loss": 0.2054, "step": 15875 }, { "epoch": 0.91, "grad_norm": 0.28877513130409865, "learning_rate": 4.020730213303037e-07, "loss": 0.2163, "step": 15876 }, { "epoch": 0.91, "grad_norm": 0.6012066973017609, "learning_rate": 4.015508111657862e-07, "loss": 0.2896, "step": 15877 }, { "epoch": 0.91, "grad_norm": 0.77096149159783, "learning_rate": 4.0102893339515e-07, "loss": 0.3788, "step": 15878 }, { "epoch": 0.91, "grad_norm": 0.24983255991293724, "learning_rate": 4.005073880364696e-07, "loss": 0.2577, "step": 15879 }, { "epoch": 0.91, "grad_norm": 0.8911082860363854, "learning_rate": 3.999861751078049e-07, "loss": 0.5244, "step": 15880 }, { "epoch": 0.91, "grad_norm": 0.21183230763143193, "learning_rate": 3.994652946272071e-07, "loss": 0.1423, "step": 15881 }, { "epoch": 0.91, "grad_norm": 0.39131821841894676, "learning_rate": 3.989447466127128e-07, "loss": 0.2829, "step": 15882 }, { "epoch": 0.91, "grad_norm": 0.32030932741388807, "learning_rate": 3.984245310823498e-07, "loss": 0.2383, "step": 15883 }, { "epoch": 0.91, "grad_norm": 0.5922727533151578, "learning_rate": 3.9790464805413044e-07, "loss": 0.3336, "step": 15884 }, { "epoch": 0.91, "grad_norm": 0.3930583813091873, "learning_rate": 3.973850975460614e-07, "loss": 0.33, "step": 15885 }, { "epoch": 0.91, "grad_norm": 0.4788539037882134, "learning_rate": 3.9686587957613377e-07, "loss": 0.2603, "step": 15886 }, { "epoch": 0.91, "grad_norm": 0.2870566855876852, "learning_rate": 3.963469941623288e-07, "loss": 0.2128, "step": 15887 }, { "epoch": 0.91, "grad_norm": 0.2673197346089193, "learning_rate": 3.958284413226121e-07, "loss": 0.2114, "step": 15888 }, { "epoch": 0.91, "grad_norm": 0.5354140355201578, "learning_rate": 3.9531022107494486e-07, "loss": 0.3267, "step": 15889 }, { "epoch": 0.91, "grad_norm": 0.35721314864795733, "learning_rate": 3.9479233343727165e-07, "loss": 0.1804, "step": 15890 }, { "epoch": 0.91, "grad_norm": 0.28916473867304665, "learning_rate": 3.9427477842752693e-07, "loss": 0.2645, "step": 15891 }, { "epoch": 0.91, "grad_norm": 1.0814982506679522, "learning_rate": 3.9375755606363306e-07, "loss": 0.6821, "step": 15892 }, { "epoch": 0.91, "grad_norm": 0.4496409680340063, "learning_rate": 3.9324066636350136e-07, "loss": 0.1017, "step": 15893 }, { "epoch": 0.91, "grad_norm": 0.3384135104379383, "learning_rate": 3.92724109345034e-07, "loss": 0.2612, "step": 15894 }, { "epoch": 0.91, "grad_norm": 0.2919069332707678, "learning_rate": 3.922078850261168e-07, "loss": 0.2677, "step": 15895 }, { "epoch": 0.91, "grad_norm": 1.075665763003214, "learning_rate": 3.9169199342462774e-07, "loss": 0.1625, "step": 15896 }, { "epoch": 0.91, "grad_norm": 0.3534241016716571, "learning_rate": 3.9117643455843016e-07, "loss": 0.2953, "step": 15897 }, { "epoch": 0.91, "grad_norm": 0.5174200198693831, "learning_rate": 3.906612084453809e-07, "loss": 0.3611, "step": 15898 }, { "epoch": 0.91, "grad_norm": 0.408525886252803, "learning_rate": 3.9014631510332135e-07, "loss": 0.2211, "step": 15899 }, { "epoch": 0.91, "grad_norm": 0.396189831826831, "learning_rate": 3.896317545500805e-07, "loss": 0.2782, "step": 15900 }, { "epoch": 0.91, "grad_norm": 0.29171815179801414, "learning_rate": 3.8911752680347857e-07, "loss": 0.1854, "step": 15901 }, { "epoch": 0.91, "grad_norm": 0.3603406957800757, "learning_rate": 3.8860363188132356e-07, "loss": 0.2891, "step": 15902 }, { "epoch": 0.91, "grad_norm": 0.2890968051915757, "learning_rate": 3.880900698014134e-07, "loss": 0.1957, "step": 15903 }, { "epoch": 0.91, "grad_norm": 1.2876865594992615, "learning_rate": 3.8757684058152947e-07, "loss": 0.7166, "step": 15904 }, { "epoch": 0.91, "grad_norm": 0.602658793155093, "learning_rate": 3.8706394423944524e-07, "loss": 0.3449, "step": 15905 }, { "epoch": 0.91, "grad_norm": 0.34185570155092393, "learning_rate": 3.8655138079292444e-07, "loss": 0.2216, "step": 15906 }, { "epoch": 0.91, "grad_norm": 0.2891611853218796, "learning_rate": 3.8603915025971605e-07, "loss": 0.2511, "step": 15907 }, { "epoch": 0.91, "grad_norm": 0.3595831908817427, "learning_rate": 3.855272526575582e-07, "loss": 0.2175, "step": 15908 }, { "epoch": 0.91, "grad_norm": 0.31805676469838995, "learning_rate": 3.8501568800417663e-07, "loss": 0.2128, "step": 15909 }, { "epoch": 0.91, "grad_norm": 0.3700899135798883, "learning_rate": 3.845044563172895e-07, "loss": 0.3155, "step": 15910 }, { "epoch": 0.91, "grad_norm": 0.6586535137821252, "learning_rate": 3.8399355761460036e-07, "loss": 0.3655, "step": 15911 }, { "epoch": 0.91, "grad_norm": 0.3068097109261226, "learning_rate": 3.8348299191380057e-07, "loss": 0.2049, "step": 15912 }, { "epoch": 0.91, "grad_norm": 0.288649576944417, "learning_rate": 3.8297275923256936e-07, "loss": 0.1484, "step": 15913 }, { "epoch": 0.91, "grad_norm": 0.3339644155133282, "learning_rate": 3.824628595885793e-07, "loss": 0.2961, "step": 15914 }, { "epoch": 0.91, "grad_norm": 0.3166963614134166, "learning_rate": 3.8195329299948737e-07, "loss": 0.2589, "step": 15915 }, { "epoch": 0.91, "grad_norm": 0.8291946937549082, "learning_rate": 3.814440594829394e-07, "loss": 0.3325, "step": 15916 }, { "epoch": 0.91, "grad_norm": 0.6188592393447846, "learning_rate": 3.8093515905656797e-07, "loss": 0.3916, "step": 15917 }, { "epoch": 0.91, "grad_norm": 0.3410052252876215, "learning_rate": 3.804265917380001e-07, "loss": 0.2626, "step": 15918 }, { "epoch": 0.91, "grad_norm": 0.3440402045586406, "learning_rate": 3.7991835754484616e-07, "loss": 0.224, "step": 15919 }, { "epoch": 0.91, "grad_norm": 0.2800763142332023, "learning_rate": 3.794104564947054e-07, "loss": 0.1761, "step": 15920 }, { "epoch": 0.91, "grad_norm": 0.31545824296470104, "learning_rate": 3.789028886051671e-07, "loss": 0.2512, "step": 15921 }, { "epoch": 0.91, "grad_norm": 0.5130003457284722, "learning_rate": 3.7839565389380606e-07, "loss": 0.2595, "step": 15922 }, { "epoch": 0.91, "grad_norm": 0.4206333487665775, "learning_rate": 3.7788875237819156e-07, "loss": 0.3264, "step": 15923 }, { "epoch": 0.91, "grad_norm": 0.4075238314358624, "learning_rate": 3.7738218407587514e-07, "loss": 0.2339, "step": 15924 }, { "epoch": 0.91, "grad_norm": 0.40120391979014103, "learning_rate": 3.768759490044005e-07, "loss": 0.1618, "step": 15925 }, { "epoch": 0.92, "grad_norm": 0.24959491992631752, "learning_rate": 3.76370047181297e-07, "loss": 0.2329, "step": 15926 }, { "epoch": 0.92, "grad_norm": 0.35098746815589077, "learning_rate": 3.7586447862408617e-07, "loss": 0.2625, "step": 15927 }, { "epoch": 0.92, "grad_norm": 0.6981973004763733, "learning_rate": 3.7535924335027396e-07, "loss": 0.4138, "step": 15928 }, { "epoch": 0.92, "grad_norm": 0.7501774342098396, "learning_rate": 3.7485434137735754e-07, "loss": 0.2783, "step": 15929 }, { "epoch": 0.92, "grad_norm": 0.3149510966890482, "learning_rate": 3.743497727228207e-07, "loss": 0.2552, "step": 15930 }, { "epoch": 0.92, "grad_norm": 0.3666851503761762, "learning_rate": 3.738455374041372e-07, "loss": 0.3152, "step": 15931 }, { "epoch": 0.92, "grad_norm": 0.1810162314400088, "learning_rate": 3.7334163543876977e-07, "loss": 0.098, "step": 15932 }, { "epoch": 0.92, "grad_norm": 0.304726241285453, "learning_rate": 3.7283806684416777e-07, "loss": 0.259, "step": 15933 }, { "epoch": 0.92, "grad_norm": 0.3382470948180131, "learning_rate": 3.723348316377695e-07, "loss": 0.2932, "step": 15934 }, { "epoch": 0.92, "grad_norm": 0.8426507956023479, "learning_rate": 3.718319298369999e-07, "loss": 0.3081, "step": 15935 }, { "epoch": 0.92, "grad_norm": 0.35343634631778686, "learning_rate": 3.7132936145927835e-07, "loss": 0.2407, "step": 15936 }, { "epoch": 0.92, "grad_norm": 1.545706171785692, "learning_rate": 3.708271265220087e-07, "loss": 0.5846, "step": 15937 }, { "epoch": 0.92, "grad_norm": 0.2584584444218997, "learning_rate": 3.703252250425782e-07, "loss": 0.2462, "step": 15938 }, { "epoch": 0.92, "grad_norm": 0.28528019760679263, "learning_rate": 3.6982365703837286e-07, "loss": 0.2056, "step": 15939 }, { "epoch": 0.92, "grad_norm": 0.6252324364531351, "learning_rate": 3.6932242252675997e-07, "loss": 0.3615, "step": 15940 }, { "epoch": 0.92, "grad_norm": 0.5203778132134718, "learning_rate": 3.6882152152509674e-07, "loss": 0.2695, "step": 15941 }, { "epoch": 0.92, "grad_norm": 0.28609701313106567, "learning_rate": 3.683209540507304e-07, "loss": 0.1995, "step": 15942 }, { "epoch": 0.92, "grad_norm": 0.4592887764684259, "learning_rate": 3.678207201209949e-07, "loss": 0.2873, "step": 15943 }, { "epoch": 0.92, "grad_norm": 0.4645147472870708, "learning_rate": 3.673208197532152e-07, "loss": 0.2766, "step": 15944 }, { "epoch": 0.92, "grad_norm": 0.34419848756733823, "learning_rate": 3.6682125296469973e-07, "loss": 0.1808, "step": 15945 }, { "epoch": 0.92, "grad_norm": 0.3378083035715725, "learning_rate": 3.6632201977275126e-07, "loss": 0.3049, "step": 15946 }, { "epoch": 0.92, "grad_norm": 0.45811410825743665, "learning_rate": 3.658231201946549e-07, "loss": 0.2619, "step": 15947 }, { "epoch": 0.92, "grad_norm": 0.28931462287240073, "learning_rate": 3.6532455424769133e-07, "loss": 0.1857, "step": 15948 }, { "epoch": 0.92, "grad_norm": 1.285888454753432, "learning_rate": 3.6482632194912436e-07, "loss": 0.4671, "step": 15949 }, { "epoch": 0.92, "grad_norm": 0.36370191667119234, "learning_rate": 3.64328423316207e-07, "loss": 0.3186, "step": 15950 }, { "epoch": 0.92, "grad_norm": 0.39727285401672247, "learning_rate": 3.638308583661809e-07, "loss": 0.2611, "step": 15951 }, { "epoch": 0.92, "grad_norm": 0.42335563840042184, "learning_rate": 3.633336271162791e-07, "loss": 0.2897, "step": 15952 }, { "epoch": 0.92, "grad_norm": 0.32788117129585664, "learning_rate": 3.6283672958371987e-07, "loss": 0.1735, "step": 15953 }, { "epoch": 0.92, "grad_norm": 0.2670674166828153, "learning_rate": 3.623401657857095e-07, "loss": 0.2516, "step": 15954 }, { "epoch": 0.92, "grad_norm": 0.5172019125312697, "learning_rate": 3.618439357394443e-07, "loss": 0.2023, "step": 15955 }, { "epoch": 0.92, "grad_norm": 0.765943640524781, "learning_rate": 3.613480394621094e-07, "loss": 0.3744, "step": 15956 }, { "epoch": 0.92, "grad_norm": 0.3254959834274066, "learning_rate": 3.608524769708788e-07, "loss": 0.2652, "step": 15957 }, { "epoch": 0.92, "grad_norm": 0.29855816184102585, "learning_rate": 3.6035724828291096e-07, "loss": 0.2446, "step": 15958 }, { "epoch": 0.92, "grad_norm": 0.28100532394478656, "learning_rate": 3.598623534153578e-07, "loss": 0.2126, "step": 15959 }, { "epoch": 0.92, "grad_norm": 0.42355858312063266, "learning_rate": 3.593677923853556e-07, "loss": 0.3086, "step": 15960 }, { "epoch": 0.92, "grad_norm": 0.5485094544149791, "learning_rate": 3.5887356521003283e-07, "loss": 0.2255, "step": 15961 }, { "epoch": 0.92, "grad_norm": 0.3308684754469159, "learning_rate": 3.583796719065047e-07, "loss": 0.3017, "step": 15962 }, { "epoch": 0.92, "grad_norm": 0.5477406568092534, "learning_rate": 3.578861124918731e-07, "loss": 0.3118, "step": 15963 }, { "epoch": 0.92, "grad_norm": 0.37672975801509384, "learning_rate": 3.5739288698323107e-07, "loss": 0.3116, "step": 15964 }, { "epoch": 0.92, "grad_norm": 0.20393972677598093, "learning_rate": 3.568999953976582e-07, "loss": 0.154, "step": 15965 }, { "epoch": 0.92, "grad_norm": 0.3064175262555501, "learning_rate": 3.564074377522253e-07, "loss": 0.2573, "step": 15966 }, { "epoch": 0.92, "grad_norm": 0.5742413490053281, "learning_rate": 3.5591521406398654e-07, "loss": 0.3097, "step": 15967 }, { "epoch": 0.92, "grad_norm": 0.5365842583125046, "learning_rate": 3.554233243499894e-07, "loss": 0.2825, "step": 15968 }, { "epoch": 0.92, "grad_norm": 0.43114460143022276, "learning_rate": 3.5493176862726794e-07, "loss": 0.253, "step": 15969 }, { "epoch": 0.92, "grad_norm": 0.2924940242715081, "learning_rate": 3.5444054691284535e-07, "loss": 0.2877, "step": 15970 }, { "epoch": 0.92, "grad_norm": 0.2374504232369525, "learning_rate": 3.539496592237335e-07, "loss": 0.1154, "step": 15971 }, { "epoch": 0.92, "grad_norm": 0.2552973385212853, "learning_rate": 3.5345910557692655e-07, "loss": 0.1876, "step": 15972 }, { "epoch": 0.92, "grad_norm": 0.4709639877224451, "learning_rate": 3.529688859894176e-07, "loss": 0.3019, "step": 15973 }, { "epoch": 0.92, "grad_norm": 0.3310973746232077, "learning_rate": 3.5247900047818193e-07, "loss": 0.2604, "step": 15974 }, { "epoch": 0.92, "grad_norm": 0.43855069686197606, "learning_rate": 3.5198944906018273e-07, "loss": 0.269, "step": 15975 }, { "epoch": 0.92, "grad_norm": 0.5369183319927688, "learning_rate": 3.5150023175237303e-07, "loss": 0.3707, "step": 15976 }, { "epoch": 0.92, "grad_norm": 0.3176682852295683, "learning_rate": 3.5101134857169704e-07, "loss": 0.2594, "step": 15977 }, { "epoch": 0.92, "grad_norm": 0.24078063210633607, "learning_rate": 3.505227995350824e-07, "loss": 0.1773, "step": 15978 }, { "epoch": 0.92, "grad_norm": 0.5265718725264137, "learning_rate": 3.5003458465944884e-07, "loss": 0.3234, "step": 15979 }, { "epoch": 0.92, "grad_norm": 0.667710297637546, "learning_rate": 3.495467039617018e-07, "loss": 0.3737, "step": 15980 }, { "epoch": 0.92, "grad_norm": 0.7002359606242801, "learning_rate": 3.4905915745873763e-07, "loss": 0.1409, "step": 15981 }, { "epoch": 0.92, "grad_norm": 0.2570986693620255, "learning_rate": 3.4857194516744075e-07, "loss": 0.2817, "step": 15982 }, { "epoch": 0.92, "grad_norm": 0.4987963183833602, "learning_rate": 3.4808506710468204e-07, "loss": 0.2641, "step": 15983 }, { "epoch": 0.92, "grad_norm": 0.28035991687745376, "learning_rate": 3.4759852328732136e-07, "loss": 0.1183, "step": 15984 }, { "epoch": 0.92, "grad_norm": 0.3520688668657019, "learning_rate": 3.4711231373220854e-07, "loss": 0.2765, "step": 15985 }, { "epoch": 0.92, "grad_norm": 0.3869463660741132, "learning_rate": 3.466264384561824e-07, "loss": 0.2959, "step": 15986 }, { "epoch": 0.92, "grad_norm": 0.41454146732657704, "learning_rate": 3.461408974760683e-07, "loss": 0.1616, "step": 15987 }, { "epoch": 0.92, "grad_norm": 0.5291939781843723, "learning_rate": 3.456556908086783e-07, "loss": 0.4009, "step": 15988 }, { "epoch": 0.92, "grad_norm": 0.48867694572595083, "learning_rate": 3.4517081847081693e-07, "loss": 0.3361, "step": 15989 }, { "epoch": 0.92, "grad_norm": 0.30895000784527976, "learning_rate": 3.44686280479275e-07, "loss": 0.2123, "step": 15990 }, { "epoch": 0.92, "grad_norm": 0.4070864437798445, "learning_rate": 3.442020768508325e-07, "loss": 0.2457, "step": 15991 }, { "epoch": 0.92, "grad_norm": 0.525896819446171, "learning_rate": 3.4371820760225606e-07, "loss": 0.2261, "step": 15992 }, { "epoch": 0.92, "grad_norm": 0.389551843659952, "learning_rate": 3.432346727503033e-07, "loss": 0.2679, "step": 15993 }, { "epoch": 0.92, "grad_norm": 0.29140564211839626, "learning_rate": 3.427514723117187e-07, "loss": 0.2335, "step": 15994 }, { "epoch": 0.92, "grad_norm": 1.1747614721852366, "learning_rate": 3.4226860630323545e-07, "loss": 0.604, "step": 15995 }, { "epoch": 0.92, "grad_norm": 0.4861490590325455, "learning_rate": 3.4178607474157464e-07, "loss": 0.3133, "step": 15996 }, { "epoch": 0.92, "grad_norm": 0.2647163166220459, "learning_rate": 3.413038776434474e-07, "loss": 0.1841, "step": 15997 }, { "epoch": 0.92, "grad_norm": 0.30848881097191544, "learning_rate": 3.408220150255492e-07, "loss": 0.243, "step": 15998 }, { "epoch": 0.92, "grad_norm": 0.7268327398882253, "learning_rate": 3.403404869045712e-07, "loss": 0.3952, "step": 15999 }, { "epoch": 0.92, "grad_norm": 0.3471361648604528, "learning_rate": 3.398592932971878e-07, "loss": 0.2387, "step": 16000 }, { "epoch": 0.92, "grad_norm": 0.4117894608929416, "learning_rate": 3.393784342200601e-07, "loss": 0.2862, "step": 16001 }, { "epoch": 0.92, "grad_norm": 0.7838377566107934, "learning_rate": 3.388979096898415e-07, "loss": 0.3532, "step": 16002 }, { "epoch": 0.92, "grad_norm": 0.34177113425113487, "learning_rate": 3.3841771972317414e-07, "loss": 0.269, "step": 16003 }, { "epoch": 0.92, "grad_norm": 0.301355411444743, "learning_rate": 3.3793786433668596e-07, "loss": 0.0814, "step": 16004 }, { "epoch": 0.92, "grad_norm": 0.30844575612555714, "learning_rate": 3.3745834354699247e-07, "loss": 0.262, "step": 16005 }, { "epoch": 0.92, "grad_norm": 0.3747679957008508, "learning_rate": 3.3697915737070154e-07, "loss": 0.306, "step": 16006 }, { "epoch": 0.92, "grad_norm": 0.7646152394209245, "learning_rate": 3.365003058244076e-07, "loss": 0.3003, "step": 16007 }, { "epoch": 0.92, "grad_norm": 0.536131589767793, "learning_rate": 3.3602178892469193e-07, "loss": 0.3079, "step": 16008 }, { "epoch": 0.92, "grad_norm": 0.3947217103658755, "learning_rate": 3.355436066881268e-07, "loss": 0.283, "step": 16009 }, { "epoch": 0.92, "grad_norm": 0.22066852034689402, "learning_rate": 3.3506575913127006e-07, "loss": 0.1689, "step": 16010 }, { "epoch": 0.92, "grad_norm": 0.5902292519591517, "learning_rate": 3.3458824627067067e-07, "loss": 0.3642, "step": 16011 }, { "epoch": 0.92, "grad_norm": 0.381931762583577, "learning_rate": 3.3411106812286544e-07, "loss": 0.3037, "step": 16012 }, { "epoch": 0.92, "grad_norm": 0.33341807366384135, "learning_rate": 3.336342247043778e-07, "loss": 0.2449, "step": 16013 }, { "epoch": 0.92, "grad_norm": 0.5143593485060982, "learning_rate": 3.331577160317201e-07, "loss": 0.294, "step": 16014 }, { "epoch": 0.92, "grad_norm": 0.4661460986029127, "learning_rate": 3.3268154212139583e-07, "loss": 0.2655, "step": 16015 }, { "epoch": 0.92, "grad_norm": 0.3431806133508932, "learning_rate": 3.3220570298989507e-07, "loss": 0.1848, "step": 16016 }, { "epoch": 0.92, "grad_norm": 0.30176925676400573, "learning_rate": 3.317301986536947e-07, "loss": 0.2456, "step": 16017 }, { "epoch": 0.92, "grad_norm": 0.3353558657145527, "learning_rate": 3.3125502912926044e-07, "loss": 0.2543, "step": 16018 }, { "epoch": 0.92, "grad_norm": 0.5655804810882348, "learning_rate": 3.307801944330491e-07, "loss": 0.3556, "step": 16019 }, { "epoch": 0.92, "grad_norm": 0.33186886555261347, "learning_rate": 3.303056945815053e-07, "loss": 0.0592, "step": 16020 }, { "epoch": 0.92, "grad_norm": 0.27348568532345147, "learning_rate": 3.2983152959105924e-07, "loss": 0.2488, "step": 16021 }, { "epoch": 0.92, "grad_norm": 0.25027602062501897, "learning_rate": 3.2935769947813003e-07, "loss": 0.2053, "step": 16022 }, { "epoch": 0.92, "grad_norm": 0.810186576406879, "learning_rate": 3.2888420425912783e-07, "loss": 0.2417, "step": 16023 }, { "epoch": 0.92, "grad_norm": 0.347940790008678, "learning_rate": 3.2841104395045174e-07, "loss": 0.2546, "step": 16024 }, { "epoch": 0.92, "grad_norm": 0.43409577516476583, "learning_rate": 3.279382185684843e-07, "loss": 0.332, "step": 16025 }, { "epoch": 0.92, "grad_norm": 0.5257069977199804, "learning_rate": 3.274657281296001e-07, "loss": 0.317, "step": 16026 }, { "epoch": 0.92, "grad_norm": 0.3115977415753507, "learning_rate": 3.269935726501616e-07, "loss": 0.1908, "step": 16027 }, { "epoch": 0.92, "grad_norm": 0.31145969908220766, "learning_rate": 3.265217521465203e-07, "loss": 0.1627, "step": 16028 }, { "epoch": 0.92, "grad_norm": 0.304969715955574, "learning_rate": 3.2605026663501403e-07, "loss": 0.271, "step": 16029 }, { "epoch": 0.92, "grad_norm": 0.3198391503543351, "learning_rate": 3.2557911613197213e-07, "loss": 0.2093, "step": 16030 }, { "epoch": 0.92, "grad_norm": 0.5495654930323556, "learning_rate": 3.251083006537081e-07, "loss": 0.3726, "step": 16031 }, { "epoch": 0.92, "grad_norm": 0.9376399677814717, "learning_rate": 3.24637820216529e-07, "loss": 0.4465, "step": 16032 }, { "epoch": 0.92, "grad_norm": 0.27483983067880535, "learning_rate": 3.241676748367251e-07, "loss": 0.2189, "step": 16033 }, { "epoch": 0.92, "grad_norm": 0.3926888204978273, "learning_rate": 3.2369786453057996e-07, "loss": 0.2467, "step": 16034 }, { "epoch": 0.92, "grad_norm": 0.8656459700472929, "learning_rate": 3.232283893143595e-07, "loss": 0.4894, "step": 16035 }, { "epoch": 0.92, "grad_norm": 0.347362517901405, "learning_rate": 3.2275924920432525e-07, "loss": 0.2104, "step": 16036 }, { "epoch": 0.92, "grad_norm": 0.3434981831692561, "learning_rate": 3.2229044421672183e-07, "loss": 0.3017, "step": 16037 }, { "epoch": 0.92, "grad_norm": 0.6020647893512497, "learning_rate": 3.2182197436778527e-07, "loss": 0.212, "step": 16038 }, { "epoch": 0.92, "grad_norm": 0.33810505101496374, "learning_rate": 3.213538396737359e-07, "loss": 0.2483, "step": 16039 }, { "epoch": 0.92, "grad_norm": 0.31886277314169437, "learning_rate": 3.2088604015078737e-07, "loss": 0.0928, "step": 16040 }, { "epoch": 0.92, "grad_norm": 0.3547676364178774, "learning_rate": 3.20418575815139e-07, "loss": 0.2903, "step": 16041 }, { "epoch": 0.92, "grad_norm": 0.330343531602598, "learning_rate": 3.1995144668298004e-07, "loss": 0.2459, "step": 16042 }, { "epoch": 0.92, "grad_norm": 0.8111748137958689, "learning_rate": 3.1948465277048424e-07, "loss": 0.3584, "step": 16043 }, { "epoch": 0.92, "grad_norm": 0.35521287147768266, "learning_rate": 3.190181940938197e-07, "loss": 0.2285, "step": 16044 }, { "epoch": 0.92, "grad_norm": 0.2691973622179739, "learning_rate": 3.185520706691392e-07, "loss": 0.2408, "step": 16045 }, { "epoch": 0.92, "grad_norm": 1.3561969579912896, "learning_rate": 3.1808628251258413e-07, "loss": 0.2051, "step": 16046 }, { "epoch": 0.92, "grad_norm": 0.6097716949414717, "learning_rate": 3.1762082964028385e-07, "loss": 0.3688, "step": 16047 }, { "epoch": 0.92, "grad_norm": 0.37746575318449793, "learning_rate": 3.171557120683588e-07, "loss": 0.2568, "step": 16048 }, { "epoch": 0.92, "grad_norm": 0.28599774774097053, "learning_rate": 3.166909298129139e-07, "loss": 0.2567, "step": 16049 }, { "epoch": 0.92, "grad_norm": 0.2873137393901833, "learning_rate": 3.162264828900474e-07, "loss": 0.1658, "step": 16050 }, { "epoch": 0.92, "grad_norm": 0.3359843271780522, "learning_rate": 3.1576237131584084e-07, "loss": 0.2672, "step": 16051 }, { "epoch": 0.92, "grad_norm": 0.8905987280455674, "learning_rate": 3.1529859510636585e-07, "loss": 0.4539, "step": 16052 }, { "epoch": 0.92, "grad_norm": 0.3531928411300171, "learning_rate": 3.1483515427768506e-07, "loss": 0.2499, "step": 16053 }, { "epoch": 0.92, "grad_norm": 0.32954114792842953, "learning_rate": 3.143720488458457e-07, "loss": 0.2502, "step": 16054 }, { "epoch": 0.92, "grad_norm": 0.3633234307925469, "learning_rate": 3.139092788268872e-07, "loss": 0.2719, "step": 16055 }, { "epoch": 0.92, "grad_norm": 0.29719624462330824, "learning_rate": 3.1344684423683214e-07, "loss": 0.1561, "step": 16056 }, { "epoch": 0.92, "grad_norm": 0.27943018114548945, "learning_rate": 3.1298474509169676e-07, "loss": 0.2495, "step": 16057 }, { "epoch": 0.92, "grad_norm": 0.8756150438352185, "learning_rate": 3.1252298140748374e-07, "loss": 0.3744, "step": 16058 }, { "epoch": 0.92, "grad_norm": 0.719734852026446, "learning_rate": 3.120615532001836e-07, "loss": 0.2874, "step": 16059 }, { "epoch": 0.92, "grad_norm": 0.3029303968028634, "learning_rate": 3.1160046048577365e-07, "loss": 0.2542, "step": 16060 }, { "epoch": 0.92, "grad_norm": 0.3691633990742467, "learning_rate": 3.1113970328022433e-07, "loss": 0.3045, "step": 16061 }, { "epoch": 0.92, "grad_norm": 0.3109473557320388, "learning_rate": 3.106792815994908e-07, "loss": 0.1067, "step": 16062 }, { "epoch": 0.92, "grad_norm": 0.332168543197011, "learning_rate": 3.1021919545951683e-07, "loss": 0.2444, "step": 16063 }, { "epoch": 0.92, "grad_norm": 0.8139045767802737, "learning_rate": 3.0975944487623534e-07, "loss": 0.3702, "step": 16064 }, { "epoch": 0.92, "grad_norm": 0.33745728439735845, "learning_rate": 3.093000298655668e-07, "loss": 0.296, "step": 16065 }, { "epoch": 0.92, "grad_norm": 0.32467491783135444, "learning_rate": 3.0884095044342197e-07, "loss": 0.18, "step": 16066 }, { "epoch": 0.92, "grad_norm": 0.5142360344956464, "learning_rate": 3.0838220662569807e-07, "loss": 0.3945, "step": 16067 }, { "epoch": 0.92, "grad_norm": 0.23229234122279033, "learning_rate": 3.0792379842828234e-07, "loss": 0.2061, "step": 16068 }, { "epoch": 0.92, "grad_norm": 0.4327552771597201, "learning_rate": 3.074657258670477e-07, "loss": 0.1711, "step": 16069 }, { "epoch": 0.92, "grad_norm": 0.4375268155712646, "learning_rate": 3.070079889578592e-07, "loss": 0.3044, "step": 16070 }, { "epoch": 0.92, "grad_norm": 0.9263858301221611, "learning_rate": 3.0655058771656755e-07, "loss": 0.4369, "step": 16071 }, { "epoch": 0.92, "grad_norm": 0.2924196438960813, "learning_rate": 3.060935221590111e-07, "loss": 0.1915, "step": 16072 }, { "epoch": 0.92, "grad_norm": 0.3187869889458041, "learning_rate": 3.056367923010195e-07, "loss": 0.2959, "step": 16073 }, { "epoch": 0.92, "grad_norm": 0.2271257776392587, "learning_rate": 3.0518039815841004e-07, "loss": 0.13, "step": 16074 }, { "epoch": 0.92, "grad_norm": 0.3284032596538808, "learning_rate": 3.0472433974698566e-07, "loss": 0.2056, "step": 16075 }, { "epoch": 0.92, "grad_norm": 0.7342704365795844, "learning_rate": 3.042686170825404e-07, "loss": 0.3854, "step": 16076 }, { "epoch": 0.92, "grad_norm": 0.34661328109772904, "learning_rate": 3.0381323018085496e-07, "loss": 0.3171, "step": 16077 }, { "epoch": 0.92, "grad_norm": 0.38368727591985285, "learning_rate": 3.0335817905770115e-07, "loss": 0.2618, "step": 16078 }, { "epoch": 0.92, "grad_norm": 0.5057502110746248, "learning_rate": 3.029034637288375e-07, "loss": 0.2623, "step": 16079 }, { "epoch": 0.92, "grad_norm": 0.2947538085012821, "learning_rate": 3.024490842100092e-07, "loss": 0.2066, "step": 16080 }, { "epoch": 0.92, "grad_norm": 0.26156274907930127, "learning_rate": 3.019950405169514e-07, "loss": 0.2431, "step": 16081 }, { "epoch": 0.92, "grad_norm": 0.5329165928014303, "learning_rate": 3.015413326653893e-07, "loss": 0.2872, "step": 16082 }, { "epoch": 0.92, "grad_norm": 1.347908220713831, "learning_rate": 3.0108796067103376e-07, "loss": 0.5954, "step": 16083 }, { "epoch": 0.92, "grad_norm": 0.5714727630722087, "learning_rate": 3.0063492454958434e-07, "loss": 0.3317, "step": 16084 }, { "epoch": 0.92, "grad_norm": 0.2701745129360195, "learning_rate": 3.001822243167307e-07, "loss": 0.2408, "step": 16085 }, { "epoch": 0.92, "grad_norm": 0.43758200115949175, "learning_rate": 2.997298599881493e-07, "loss": 0.2901, "step": 16086 }, { "epoch": 0.92, "grad_norm": 0.576522748883847, "learning_rate": 2.992778315795064e-07, "loss": 0.3509, "step": 16087 }, { "epoch": 0.92, "grad_norm": 0.23680880305406177, "learning_rate": 2.988261391064551e-07, "loss": 0.2101, "step": 16088 }, { "epoch": 0.92, "grad_norm": 0.4501988796850427, "learning_rate": 2.9837478258463725e-07, "loss": 0.324, "step": 16089 }, { "epoch": 0.92, "grad_norm": 0.5710226158588361, "learning_rate": 2.9792376202968263e-07, "loss": 0.3863, "step": 16090 }, { "epoch": 0.92, "grad_norm": 0.41143826535651795, "learning_rate": 2.974730774572121e-07, "loss": 0.2862, "step": 16091 }, { "epoch": 0.92, "grad_norm": 0.45703676201711263, "learning_rate": 2.970227288828309e-07, "loss": 0.2569, "step": 16092 }, { "epoch": 0.92, "grad_norm": 0.30212625877525195, "learning_rate": 2.965727163221366e-07, "loss": 0.2634, "step": 16093 }, { "epoch": 0.92, "grad_norm": 0.2798986506628534, "learning_rate": 2.9612303979071e-07, "loss": 0.2054, "step": 16094 }, { "epoch": 0.92, "grad_norm": 1.641861194235436, "learning_rate": 2.9567369930412646e-07, "loss": 0.3078, "step": 16095 }, { "epoch": 0.92, "grad_norm": 0.30785882672148424, "learning_rate": 2.9522469487794467e-07, "loss": 0.2824, "step": 16096 }, { "epoch": 0.92, "grad_norm": 0.37031077069707474, "learning_rate": 2.9477602652771554e-07, "loss": 0.318, "step": 16097 }, { "epoch": 0.92, "grad_norm": 0.6730152886289593, "learning_rate": 2.943276942689732e-07, "loss": 0.2634, "step": 16098 }, { "epoch": 0.92, "grad_norm": 0.4854497688120956, "learning_rate": 2.9387969811724757e-07, "loss": 0.2925, "step": 16099 }, { "epoch": 0.93, "grad_norm": 0.35689041820120426, "learning_rate": 2.9343203808804953e-07, "loss": 0.252, "step": 16100 }, { "epoch": 0.93, "grad_norm": 0.29101679336873626, "learning_rate": 2.9298471419688335e-07, "loss": 0.2409, "step": 16101 }, { "epoch": 0.93, "grad_norm": 0.33551396449091, "learning_rate": 2.925377264592388e-07, "loss": 0.1526, "step": 16102 }, { "epoch": 0.93, "grad_norm": 0.403795727148337, "learning_rate": 2.9209107489059474e-07, "loss": 0.2942, "step": 16103 }, { "epoch": 0.93, "grad_norm": 0.35408713603736885, "learning_rate": 2.916447595064198e-07, "loss": 0.2958, "step": 16104 }, { "epoch": 0.93, "grad_norm": 0.6365072948732012, "learning_rate": 2.9119878032216944e-07, "loss": 0.1797, "step": 16105 }, { "epoch": 0.93, "grad_norm": 0.2916939704666153, "learning_rate": 2.90753137353289e-07, "loss": 0.2252, "step": 16106 }, { "epoch": 0.93, "grad_norm": 0.46903057760889666, "learning_rate": 2.903078306152085e-07, "loss": 0.2796, "step": 16107 }, { "epoch": 0.93, "grad_norm": 0.30356242185573823, "learning_rate": 2.8986286012335105e-07, "loss": 0.2131, "step": 16108 }, { "epoch": 0.93, "grad_norm": 0.3753404509166921, "learning_rate": 2.8941822589312443e-07, "loss": 0.3212, "step": 16109 }, { "epoch": 0.93, "grad_norm": 0.6360470819564403, "learning_rate": 2.889739279399262e-07, "loss": 0.3638, "step": 16110 }, { "epoch": 0.93, "grad_norm": 0.33098743442000983, "learning_rate": 2.885299662791452e-07, "loss": 0.1529, "step": 16111 }, { "epoch": 0.93, "grad_norm": 0.3058883589226098, "learning_rate": 2.880863409261525e-07, "loss": 0.28, "step": 16112 }, { "epoch": 0.93, "grad_norm": 0.25707865836058724, "learning_rate": 2.876430518963136e-07, "loss": 0.1953, "step": 16113 }, { "epoch": 0.93, "grad_norm": 0.6049082040792976, "learning_rate": 2.872000992049773e-07, "loss": 0.3962, "step": 16114 }, { "epoch": 0.93, "grad_norm": 0.36179534195311464, "learning_rate": 2.867574828674824e-07, "loss": 0.2377, "step": 16115 }, { "epoch": 0.93, "grad_norm": 0.35134754663657647, "learning_rate": 2.8631520289916004e-07, "loss": 0.305, "step": 16116 }, { "epoch": 0.93, "grad_norm": 0.6873273078284741, "learning_rate": 2.858732593153246e-07, "loss": 0.2425, "step": 16117 }, { "epoch": 0.93, "grad_norm": 0.4355865580407592, "learning_rate": 2.8543165213128057e-07, "loss": 0.2198, "step": 16118 }, { "epoch": 0.93, "grad_norm": 0.2721525238168374, "learning_rate": 2.8499038136231894e-07, "loss": 0.2223, "step": 16119 }, { "epoch": 0.93, "grad_norm": 0.3305990370089696, "learning_rate": 2.845494470237242e-07, "loss": 0.2855, "step": 16120 }, { "epoch": 0.93, "grad_norm": 0.3257204072520625, "learning_rate": 2.841088491307642e-07, "loss": 0.2172, "step": 16121 }, { "epoch": 0.93, "grad_norm": 0.7299514903666677, "learning_rate": 2.8366858769869663e-07, "loss": 0.4215, "step": 16122 }, { "epoch": 0.93, "grad_norm": 1.0502896884292527, "learning_rate": 2.8322866274276715e-07, "loss": 0.3683, "step": 16123 }, { "epoch": 0.93, "grad_norm": 0.2414259325709617, "learning_rate": 2.8278907427821245e-07, "loss": 0.223, "step": 16124 }, { "epoch": 0.93, "grad_norm": 0.4580905634662621, "learning_rate": 2.8234982232025365e-07, "loss": 0.2462, "step": 16125 }, { "epoch": 0.93, "grad_norm": 0.6134757196152566, "learning_rate": 2.8191090688410305e-07, "loss": 0.3354, "step": 16126 }, { "epoch": 0.93, "grad_norm": 0.34813149354241363, "learning_rate": 2.8147232798496074e-07, "loss": 0.2758, "step": 16127 }, { "epoch": 0.93, "grad_norm": 0.2445101180031528, "learning_rate": 2.810340856380112e-07, "loss": 0.1985, "step": 16128 }, { "epoch": 0.93, "grad_norm": 0.9321604201835165, "learning_rate": 2.8059617985843557e-07, "loss": 0.3523, "step": 16129 }, { "epoch": 0.93, "grad_norm": 0.36475217838282614, "learning_rate": 2.801586106613963e-07, "loss": 0.2633, "step": 16130 }, { "epoch": 0.93, "grad_norm": 0.9028096980700738, "learning_rate": 2.797213780620456e-07, "loss": 0.2922, "step": 16131 }, { "epoch": 0.93, "grad_norm": 0.2847058382844327, "learning_rate": 2.7928448207552474e-07, "loss": 0.2463, "step": 16132 }, { "epoch": 0.93, "grad_norm": 0.38924969775553325, "learning_rate": 2.7884792271696603e-07, "loss": 0.3169, "step": 16133 }, { "epoch": 0.93, "grad_norm": 0.24599208379016946, "learning_rate": 2.784117000014852e-07, "loss": 0.1124, "step": 16134 }, { "epoch": 0.93, "grad_norm": 0.6790767935493142, "learning_rate": 2.7797581394418907e-07, "loss": 0.3211, "step": 16135 }, { "epoch": 0.93, "grad_norm": 0.308392724398636, "learning_rate": 2.7754026456017104e-07, "loss": 0.2932, "step": 16136 }, { "epoch": 0.93, "grad_norm": 0.4612725789887395, "learning_rate": 2.7710505186451684e-07, "loss": 0.2619, "step": 16137 }, { "epoch": 0.93, "grad_norm": 0.6119165445617544, "learning_rate": 2.766701758722967e-07, "loss": 0.3625, "step": 16138 }, { "epoch": 0.93, "grad_norm": 0.3401083159351843, "learning_rate": 2.7623563659857186e-07, "loss": 0.3047, "step": 16139 }, { "epoch": 0.93, "grad_norm": 0.22356893956156418, "learning_rate": 2.758014340583859e-07, "loss": 0.216, "step": 16140 }, { "epoch": 0.93, "grad_norm": 1.9203710855895133, "learning_rate": 2.7536756826678e-07, "loss": 0.199, "step": 16141 }, { "epoch": 0.93, "grad_norm": 0.3429263728146882, "learning_rate": 2.749340392387767e-07, "loss": 0.2606, "step": 16142 }, { "epoch": 0.93, "grad_norm": 0.6493811930991484, "learning_rate": 2.745008469893884e-07, "loss": 0.3888, "step": 16143 }, { "epoch": 0.93, "grad_norm": 0.3245298437449268, "learning_rate": 2.740679915336175e-07, "loss": 0.2407, "step": 16144 }, { "epoch": 0.93, "grad_norm": 0.3407142144619474, "learning_rate": 2.7363547288645544e-07, "loss": 0.3001, "step": 16145 }, { "epoch": 0.93, "grad_norm": 0.4127223640520308, "learning_rate": 2.732032910628779e-07, "loss": 0.2387, "step": 16146 }, { "epoch": 0.93, "grad_norm": 0.4410479453956271, "learning_rate": 2.727714460778519e-07, "loss": 0.0986, "step": 16147 }, { "epoch": 0.93, "grad_norm": 0.23114652387431825, "learning_rate": 2.7233993794633205e-07, "loss": 0.2363, "step": 16148 }, { "epoch": 0.93, "grad_norm": 0.7707876240880364, "learning_rate": 2.7190876668326207e-07, "loss": 0.4322, "step": 16149 }, { "epoch": 0.93, "grad_norm": 0.5681333562696601, "learning_rate": 2.7147793230357434e-07, "loss": 0.3007, "step": 16150 }, { "epoch": 0.93, "grad_norm": 0.334019256141992, "learning_rate": 2.71047434822187e-07, "loss": 0.2636, "step": 16151 }, { "epoch": 0.93, "grad_norm": 0.25145171079300954, "learning_rate": 2.7061727425400695e-07, "loss": 0.2368, "step": 16152 }, { "epoch": 0.93, "grad_norm": 0.37370438219164975, "learning_rate": 2.701874506139335e-07, "loss": 0.2354, "step": 16153 }, { "epoch": 0.93, "grad_norm": 0.32680459189413763, "learning_rate": 2.6975796391685125e-07, "loss": 0.1946, "step": 16154 }, { "epoch": 0.93, "grad_norm": 0.5853293839882767, "learning_rate": 2.6932881417763067e-07, "loss": 0.342, "step": 16155 }, { "epoch": 0.93, "grad_norm": 0.3247975509729384, "learning_rate": 2.689000014111365e-07, "loss": 0.3094, "step": 16156 }, { "epoch": 0.93, "grad_norm": 0.33160684037157295, "learning_rate": 2.684715256322146e-07, "loss": 0.2061, "step": 16157 }, { "epoch": 0.93, "grad_norm": 0.36609071143521194, "learning_rate": 2.680433868557064e-07, "loss": 0.2004, "step": 16158 }, { "epoch": 0.93, "grad_norm": 1.2305571895568488, "learning_rate": 2.676155850964379e-07, "loss": 0.4733, "step": 16159 }, { "epoch": 0.93, "grad_norm": 0.22411989695587825, "learning_rate": 2.6718812036922283e-07, "loss": 0.2119, "step": 16160 }, { "epoch": 0.93, "grad_norm": 0.6334435102540285, "learning_rate": 2.6676099268886367e-07, "loss": 0.3685, "step": 16161 }, { "epoch": 0.93, "grad_norm": 1.2434403340381204, "learning_rate": 2.6633420207015426e-07, "loss": 0.8358, "step": 16162 }, { "epoch": 0.93, "grad_norm": 0.2768553025982602, "learning_rate": 2.659077485278716e-07, "loss": 0.1891, "step": 16163 }, { "epoch": 0.93, "grad_norm": 1.8087700309256254, "learning_rate": 2.6548163207678614e-07, "loss": 0.3039, "step": 16164 }, { "epoch": 0.93, "grad_norm": 0.3263779764663397, "learning_rate": 2.6505585273165156e-07, "loss": 0.1664, "step": 16165 }, { "epoch": 0.93, "grad_norm": 0.31180764115083154, "learning_rate": 2.6463041050721615e-07, "loss": 0.2553, "step": 16166 }, { "epoch": 0.93, "grad_norm": 0.8241373179238439, "learning_rate": 2.642053054182103e-07, "loss": 0.3087, "step": 16167 }, { "epoch": 0.93, "grad_norm": 0.3446713828640923, "learning_rate": 2.637805374793556e-07, "loss": 0.3118, "step": 16168 }, { "epoch": 0.93, "grad_norm": 0.32013571828818826, "learning_rate": 2.633561067053625e-07, "loss": 0.2598, "step": 16169 }, { "epoch": 0.93, "grad_norm": 0.4038815496361895, "learning_rate": 2.629320131109281e-07, "loss": 0.1137, "step": 16170 }, { "epoch": 0.93, "grad_norm": 0.2753926358926855, "learning_rate": 2.6250825671074065e-07, "loss": 0.2389, "step": 16171 }, { "epoch": 0.93, "grad_norm": 0.4119719794526924, "learning_rate": 2.6208483751947286e-07, "loss": 0.2821, "step": 16172 }, { "epoch": 0.93, "grad_norm": 0.47065265482665175, "learning_rate": 2.6166175555178864e-07, "loss": 0.2604, "step": 16173 }, { "epoch": 0.93, "grad_norm": 0.7827261475775145, "learning_rate": 2.612390108223395e-07, "loss": 0.579, "step": 16174 }, { "epoch": 0.93, "grad_norm": 0.33742011781888565, "learning_rate": 2.6081660334576376e-07, "loss": 0.2539, "step": 16175 }, { "epoch": 0.93, "grad_norm": 0.35489690817213293, "learning_rate": 2.6039453313669084e-07, "loss": 0.224, "step": 16176 }, { "epoch": 0.93, "grad_norm": 0.40363854862203224, "learning_rate": 2.599728002097346e-07, "loss": 0.2276, "step": 16177 }, { "epoch": 0.93, "grad_norm": 0.2998525771419882, "learning_rate": 2.5955140457950334e-07, "loss": 0.254, "step": 16178 }, { "epoch": 0.93, "grad_norm": 0.3225328457185091, "learning_rate": 2.591303462605876e-07, "loss": 0.2568, "step": 16179 }, { "epoch": 0.93, "grad_norm": 0.40344121357429547, "learning_rate": 2.587096252675703e-07, "loss": 0.2584, "step": 16180 }, { "epoch": 0.93, "grad_norm": 0.4726299423223656, "learning_rate": 2.5828924161501847e-07, "loss": 0.2424, "step": 16181 }, { "epoch": 0.93, "grad_norm": 1.2427679629599802, "learning_rate": 2.5786919531749056e-07, "loss": 0.5494, "step": 16182 }, { "epoch": 0.93, "grad_norm": 0.3302371945217039, "learning_rate": 2.5744948638953495e-07, "loss": 0.2585, "step": 16183 }, { "epoch": 0.93, "grad_norm": 0.2960406165691513, "learning_rate": 2.570301148456833e-07, "loss": 0.246, "step": 16184 }, { "epoch": 0.93, "grad_norm": 0.22857777782773264, "learning_rate": 2.566110807004618e-07, "loss": 0.1342, "step": 16185 }, { "epoch": 0.93, "grad_norm": 0.9455741587639533, "learning_rate": 2.5619238396837665e-07, "loss": 0.5122, "step": 16186 }, { "epoch": 0.93, "grad_norm": 0.3025016716582683, "learning_rate": 2.5577402466393286e-07, "loss": 0.238, "step": 16187 }, { "epoch": 0.93, "grad_norm": 0.39578317038127414, "learning_rate": 2.5535600280161444e-07, "loss": 0.3052, "step": 16188 }, { "epoch": 0.93, "grad_norm": 0.5815913779636789, "learning_rate": 2.549383183958998e-07, "loss": 0.3511, "step": 16189 }, { "epoch": 0.93, "grad_norm": 0.38518298786026156, "learning_rate": 2.5452097146125063e-07, "loss": 0.1683, "step": 16190 }, { "epoch": 0.93, "grad_norm": 0.22205133621720435, "learning_rate": 2.5410396201212105e-07, "loss": 0.2007, "step": 16191 }, { "epoch": 0.93, "grad_norm": 0.5057323622641279, "learning_rate": 2.536872900629539e-07, "loss": 0.3944, "step": 16192 }, { "epoch": 0.93, "grad_norm": 0.3595273845184508, "learning_rate": 2.5327095562817537e-07, "loss": 0.1673, "step": 16193 }, { "epoch": 0.93, "grad_norm": 0.5360787331974713, "learning_rate": 2.528549587222051e-07, "loss": 0.3074, "step": 16194 }, { "epoch": 0.93, "grad_norm": 0.442058729074706, "learning_rate": 2.524392993594482e-07, "loss": 0.3311, "step": 16195 }, { "epoch": 0.93, "grad_norm": 0.2855255735605113, "learning_rate": 2.5202397755429876e-07, "loss": 0.2012, "step": 16196 }, { "epoch": 0.93, "grad_norm": 0.23458277362119945, "learning_rate": 2.5160899332114075e-07, "loss": 0.1566, "step": 16197 }, { "epoch": 0.93, "grad_norm": 1.307433304730941, "learning_rate": 2.5119434667434384e-07, "loss": 0.7377, "step": 16198 }, { "epoch": 0.93, "grad_norm": 0.3418249915562307, "learning_rate": 2.507800376282665e-07, "loss": 0.1928, "step": 16199 }, { "epoch": 0.93, "grad_norm": 0.3322723765898175, "learning_rate": 2.5036606619725847e-07, "loss": 0.3077, "step": 16200 }, { "epoch": 0.93, "grad_norm": 0.7679882980968665, "learning_rate": 2.499524323956537e-07, "loss": 0.4053, "step": 16201 }, { "epoch": 0.93, "grad_norm": 0.5105729769391152, "learning_rate": 2.495391362377775e-07, "loss": 0.2702, "step": 16202 }, { "epoch": 0.93, "grad_norm": 0.20248897495158377, "learning_rate": 2.4912617773794057e-07, "loss": 0.16, "step": 16203 }, { "epoch": 0.93, "grad_norm": 0.45523324068273235, "learning_rate": 2.4871355691044595e-07, "loss": 0.3446, "step": 16204 }, { "epoch": 0.93, "grad_norm": 0.31116609474106843, "learning_rate": 2.4830127376958113e-07, "loss": 0.2482, "step": 16205 }, { "epoch": 0.93, "grad_norm": 0.7147870871308801, "learning_rate": 2.4788932832962465e-07, "loss": 0.2833, "step": 16206 }, { "epoch": 0.93, "grad_norm": 0.36457125106980254, "learning_rate": 2.4747772060484064e-07, "loss": 0.2912, "step": 16207 }, { "epoch": 0.93, "grad_norm": 0.5240461723284267, "learning_rate": 2.470664506094844e-07, "loss": 0.2765, "step": 16208 }, { "epoch": 0.93, "grad_norm": 0.22721409677761747, "learning_rate": 2.466555183577968e-07, "loss": 0.146, "step": 16209 }, { "epoch": 0.93, "grad_norm": 0.8567658532302702, "learning_rate": 2.462449238640097e-07, "loss": 0.5097, "step": 16210 }, { "epoch": 0.93, "grad_norm": 0.37293244224659416, "learning_rate": 2.458346671423406e-07, "loss": 0.2645, "step": 16211 }, { "epoch": 0.93, "grad_norm": 0.27145054299585214, "learning_rate": 2.4542474820699823e-07, "loss": 0.244, "step": 16212 }, { "epoch": 0.93, "grad_norm": 1.3981516737715696, "learning_rate": 2.450151670721768e-07, "loss": 0.4875, "step": 16213 }, { "epoch": 0.93, "grad_norm": 0.5986289510427129, "learning_rate": 2.446059237520615e-07, "loss": 0.2993, "step": 16214 }, { "epoch": 0.93, "grad_norm": 0.22481400806917337, "learning_rate": 2.441970182608222e-07, "loss": 0.2347, "step": 16215 }, { "epoch": 0.93, "grad_norm": 0.30778683391514594, "learning_rate": 2.43788450612622e-07, "loss": 0.1411, "step": 16216 }, { "epoch": 0.93, "grad_norm": 0.41000123055651855, "learning_rate": 2.433802208216085e-07, "loss": 0.2763, "step": 16217 }, { "epoch": 0.93, "grad_norm": 0.4744266292202893, "learning_rate": 2.42972328901917e-07, "loss": 0.3694, "step": 16218 }, { "epoch": 0.93, "grad_norm": 0.3541890062856887, "learning_rate": 2.4256477486767517e-07, "loss": 0.2324, "step": 16219 }, { "epoch": 0.93, "grad_norm": 0.41346592434653073, "learning_rate": 2.4215755873299497e-07, "loss": 0.2446, "step": 16220 }, { "epoch": 0.93, "grad_norm": 0.5884279680128669, "learning_rate": 2.4175068051197957e-07, "loss": 0.3041, "step": 16221 }, { "epoch": 0.93, "grad_norm": 0.4402183600759972, "learning_rate": 2.413441402187178e-07, "loss": 0.2166, "step": 16222 }, { "epoch": 0.93, "grad_norm": 0.2384371145087381, "learning_rate": 2.4093793786728934e-07, "loss": 0.2403, "step": 16223 }, { "epoch": 0.93, "grad_norm": 0.4502437190455561, "learning_rate": 2.4053207347175976e-07, "loss": 0.3311, "step": 16224 }, { "epoch": 0.93, "grad_norm": 0.4684769525518414, "learning_rate": 2.401265470461855e-07, "loss": 0.12, "step": 16225 }, { "epoch": 0.93, "grad_norm": 0.5547322450352692, "learning_rate": 2.397213586046099e-07, "loss": 0.2999, "step": 16226 }, { "epoch": 0.93, "grad_norm": 0.2818633700967168, "learning_rate": 2.3931650816106267e-07, "loss": 0.2502, "step": 16227 }, { "epoch": 0.93, "grad_norm": 0.4363782611754942, "learning_rate": 2.3891199572956493e-07, "loss": 0.3209, "step": 16228 }, { "epoch": 0.93, "grad_norm": 0.47684193737555375, "learning_rate": 2.3850782132412544e-07, "loss": 0.2063, "step": 16229 }, { "epoch": 0.93, "grad_norm": 0.3005978735016527, "learning_rate": 2.3810398495874076e-07, "loss": 0.2733, "step": 16230 }, { "epoch": 0.93, "grad_norm": 0.2758614772557928, "learning_rate": 2.377004866473953e-07, "loss": 0.252, "step": 16231 }, { "epoch": 0.93, "grad_norm": 0.5232494032312904, "learning_rate": 2.3729732640406233e-07, "loss": 0.0864, "step": 16232 }, { "epoch": 0.93, "grad_norm": 0.43196860650976154, "learning_rate": 2.3689450424270179e-07, "loss": 0.2845, "step": 16233 }, { "epoch": 0.93, "grad_norm": 0.6300939297391976, "learning_rate": 2.364920201772658e-07, "loss": 0.3533, "step": 16234 }, { "epoch": 0.93, "grad_norm": 0.24702854532908614, "learning_rate": 2.3608987422169106e-07, "loss": 0.2239, "step": 16235 }, { "epoch": 0.93, "grad_norm": 0.5147166917845664, "learning_rate": 2.3568806638990527e-07, "loss": 0.384, "step": 16236 }, { "epoch": 0.93, "grad_norm": 0.34917477325603674, "learning_rate": 2.3528659669581954e-07, "loss": 0.1735, "step": 16237 }, { "epoch": 0.93, "grad_norm": 0.35170576502926393, "learning_rate": 2.3488546515334054e-07, "loss": 0.1596, "step": 16238 }, { "epoch": 0.93, "grad_norm": 0.35057358099260666, "learning_rate": 2.3448467177635826e-07, "loss": 0.291, "step": 16239 }, { "epoch": 0.93, "grad_norm": 0.43437427364444037, "learning_rate": 2.3408421657875158e-07, "loss": 0.3281, "step": 16240 }, { "epoch": 0.93, "grad_norm": 0.4994355097839041, "learning_rate": 2.3368409957438832e-07, "loss": 0.3334, "step": 16241 }, { "epoch": 0.93, "grad_norm": 0.3989427545060095, "learning_rate": 2.3328432077712516e-07, "loss": 0.2308, "step": 16242 }, { "epoch": 0.93, "grad_norm": 0.22736027420925461, "learning_rate": 2.3288488020080546e-07, "loss": 0.2094, "step": 16243 }, { "epoch": 0.93, "grad_norm": 1.355549695082054, "learning_rate": 2.3248577785926264e-07, "loss": 0.3942, "step": 16244 }, { "epoch": 0.93, "grad_norm": 0.3061093657317648, "learning_rate": 2.3208701376631672e-07, "loss": 0.2012, "step": 16245 }, { "epoch": 0.93, "grad_norm": 0.4283494643377407, "learning_rate": 2.3168858793577774e-07, "loss": 0.3167, "step": 16246 }, { "epoch": 0.93, "grad_norm": 0.40586491113206713, "learning_rate": 2.3129050038144362e-07, "loss": 0.3005, "step": 16247 }, { "epoch": 0.93, "grad_norm": 0.30763307469813905, "learning_rate": 2.3089275111709886e-07, "loss": 0.2203, "step": 16248 }, { "epoch": 0.93, "grad_norm": 0.2669467232153082, "learning_rate": 2.3049534015651686e-07, "loss": 0.1807, "step": 16249 }, { "epoch": 0.93, "grad_norm": 0.7019907586813147, "learning_rate": 2.3009826751346332e-07, "loss": 0.3552, "step": 16250 }, { "epoch": 0.93, "grad_norm": 0.2421896836266599, "learning_rate": 2.2970153320168498e-07, "loss": 0.2243, "step": 16251 }, { "epoch": 0.93, "grad_norm": 0.8531496453021868, "learning_rate": 2.293051372349231e-07, "loss": 0.404, "step": 16252 }, { "epoch": 0.93, "grad_norm": 0.7552706084359576, "learning_rate": 2.2890907962690335e-07, "loss": 0.548, "step": 16253 }, { "epoch": 0.93, "grad_norm": 0.3945319567289995, "learning_rate": 2.2851336039134363e-07, "loss": 0.2455, "step": 16254 }, { "epoch": 0.93, "grad_norm": 0.21129773765826465, "learning_rate": 2.2811797954194527e-07, "loss": 0.172, "step": 16255 }, { "epoch": 0.93, "grad_norm": 0.5848773278732337, "learning_rate": 2.2772293709240168e-07, "loss": 0.3105, "step": 16256 }, { "epoch": 0.93, "grad_norm": 0.39209953000005016, "learning_rate": 2.2732823305639197e-07, "loss": 0.3028, "step": 16257 }, { "epoch": 0.93, "grad_norm": 0.45565939341834677, "learning_rate": 2.2693386744758405e-07, "loss": 0.2577, "step": 16258 }, { "epoch": 0.93, "grad_norm": 0.3638561329873051, "learning_rate": 2.2653984027963704e-07, "loss": 0.2771, "step": 16259 }, { "epoch": 0.93, "grad_norm": 0.3977592178558374, "learning_rate": 2.2614615156619556e-07, "loss": 0.3164, "step": 16260 }, { "epoch": 0.93, "grad_norm": 0.2486224244899392, "learning_rate": 2.2575280132089315e-07, "loss": 0.0902, "step": 16261 }, { "epoch": 0.93, "grad_norm": 0.41724306546277456, "learning_rate": 2.2535978955734895e-07, "loss": 0.2994, "step": 16262 }, { "epoch": 0.93, "grad_norm": 0.26274641689573147, "learning_rate": 2.2496711628917644e-07, "loss": 0.2548, "step": 16263 }, { "epoch": 0.93, "grad_norm": 0.7297326479778752, "learning_rate": 2.2457478152997148e-07, "loss": 0.4205, "step": 16264 }, { "epoch": 0.93, "grad_norm": 1.0018169427080976, "learning_rate": 2.2418278529332203e-07, "loss": 0.4632, "step": 16265 }, { "epoch": 0.93, "grad_norm": 0.3327648178772939, "learning_rate": 2.2379112759280176e-07, "loss": 0.2609, "step": 16266 }, { "epoch": 0.93, "grad_norm": 0.23721807132324804, "learning_rate": 2.2339980844197527e-07, "loss": 0.2063, "step": 16267 }, { "epoch": 0.93, "grad_norm": 0.5633019302848433, "learning_rate": 2.2300882785439183e-07, "loss": 0.2385, "step": 16268 }, { "epoch": 0.93, "grad_norm": 0.39111215606894656, "learning_rate": 2.2261818584359386e-07, "loss": 0.2894, "step": 16269 }, { "epoch": 0.93, "grad_norm": 1.4038427231112138, "learning_rate": 2.222278824231061e-07, "loss": 0.4999, "step": 16270 }, { "epoch": 0.93, "grad_norm": 0.2967884736462052, "learning_rate": 2.2183791760644668e-07, "loss": 0.2486, "step": 16271 }, { "epoch": 0.93, "grad_norm": 0.34349186837418483, "learning_rate": 2.214482914071203e-07, "loss": 0.2819, "step": 16272 }, { "epoch": 0.93, "grad_norm": 0.8920189124308012, "learning_rate": 2.2105900383861956e-07, "loss": 0.4225, "step": 16273 }, { "epoch": 0.94, "grad_norm": 0.28684107801549114, "learning_rate": 2.2067005491442362e-07, "loss": 0.244, "step": 16274 }, { "epoch": 0.94, "grad_norm": 0.3494953028521881, "learning_rate": 2.2028144464800393e-07, "loss": 0.275, "step": 16275 }, { "epoch": 0.94, "grad_norm": 0.35670384309614495, "learning_rate": 2.1989317305281755e-07, "loss": 0.179, "step": 16276 }, { "epoch": 0.94, "grad_norm": 0.996066078136281, "learning_rate": 2.1950524014231033e-07, "loss": 0.4171, "step": 16277 }, { "epoch": 0.94, "grad_norm": 0.347523126063495, "learning_rate": 2.191176459299138e-07, "loss": 0.1977, "step": 16278 }, { "epoch": 0.94, "grad_norm": 0.32388881265928143, "learning_rate": 2.1873039042905497e-07, "loss": 0.2703, "step": 16279 }, { "epoch": 0.94, "grad_norm": 0.39745643594948593, "learning_rate": 2.183434736531409e-07, "loss": 0.2683, "step": 16280 }, { "epoch": 0.94, "grad_norm": 0.22006091226414445, "learning_rate": 2.1795689561557308e-07, "loss": 0.152, "step": 16281 }, { "epoch": 0.94, "grad_norm": 0.34373104169964486, "learning_rate": 2.175706563297353e-07, "loss": 0.2868, "step": 16282 }, { "epoch": 0.94, "grad_norm": 1.263619920512286, "learning_rate": 2.1718475580900567e-07, "loss": 0.7482, "step": 16283 }, { "epoch": 0.94, "grad_norm": 0.29616054478209414, "learning_rate": 2.16799194066748e-07, "loss": 0.1906, "step": 16284 }, { "epoch": 0.94, "grad_norm": 0.5626197863716071, "learning_rate": 2.164139711163138e-07, "loss": 0.3607, "step": 16285 }, { "epoch": 0.94, "grad_norm": 0.3590447014920199, "learning_rate": 2.160290869710424e-07, "loss": 0.3023, "step": 16286 }, { "epoch": 0.94, "grad_norm": 0.22648512679657767, "learning_rate": 2.1564454164426207e-07, "loss": 0.1571, "step": 16287 }, { "epoch": 0.94, "grad_norm": 0.4333570135949622, "learning_rate": 2.1526033514929213e-07, "loss": 0.2604, "step": 16288 }, { "epoch": 0.94, "grad_norm": 1.220905292397416, "learning_rate": 2.1487646749943524e-07, "loss": 0.5481, "step": 16289 }, { "epoch": 0.94, "grad_norm": 0.32203788282666024, "learning_rate": 2.1449293870798637e-07, "loss": 0.2688, "step": 16290 }, { "epoch": 0.94, "grad_norm": 0.34693965055254655, "learning_rate": 2.1410974878822487e-07, "loss": 0.2552, "step": 16291 }, { "epoch": 0.94, "grad_norm": 0.551396319559656, "learning_rate": 2.1372689775342238e-07, "loss": 0.2369, "step": 16292 }, { "epoch": 0.94, "grad_norm": 0.30887122673256734, "learning_rate": 2.1334438561683713e-07, "loss": 0.2373, "step": 16293 }, { "epoch": 0.94, "grad_norm": 0.22820054330620598, "learning_rate": 2.1296221239171523e-07, "loss": 0.1915, "step": 16294 }, { "epoch": 0.94, "grad_norm": 1.137231060487121, "learning_rate": 2.125803780912894e-07, "loss": 0.7172, "step": 16295 }, { "epoch": 0.94, "grad_norm": 0.40545905296169865, "learning_rate": 2.1219888272878575e-07, "loss": 0.2602, "step": 16296 }, { "epoch": 0.94, "grad_norm": 0.4730775018614647, "learning_rate": 2.118177263174137e-07, "loss": 0.2668, "step": 16297 }, { "epoch": 0.94, "grad_norm": 0.34205215126746363, "learning_rate": 2.114369088703727e-07, "loss": 0.2772, "step": 16298 }, { "epoch": 0.94, "grad_norm": 0.32729918038453004, "learning_rate": 2.1105643040085112e-07, "loss": 0.2464, "step": 16299 }, { "epoch": 0.94, "grad_norm": 0.24639056641925142, "learning_rate": 2.106762909220228e-07, "loss": 0.1184, "step": 16300 }, { "epoch": 0.94, "grad_norm": 0.99073060626058, "learning_rate": 2.10296490447055e-07, "loss": 0.4329, "step": 16301 }, { "epoch": 0.94, "grad_norm": 0.32121737540817974, "learning_rate": 2.0991702898909838e-07, "loss": 0.2518, "step": 16302 }, { "epoch": 0.94, "grad_norm": 0.3301117105900323, "learning_rate": 2.0953790656129457e-07, "loss": 0.3058, "step": 16303 }, { "epoch": 0.94, "grad_norm": 1.7456922022694272, "learning_rate": 2.091591231767709e-07, "loss": 0.1374, "step": 16304 }, { "epoch": 0.94, "grad_norm": 0.32467499870042676, "learning_rate": 2.0878067884864683e-07, "loss": 0.2509, "step": 16305 }, { "epoch": 0.94, "grad_norm": 0.39956950076075687, "learning_rate": 2.0840257359002635e-07, "loss": 0.2391, "step": 16306 }, { "epoch": 0.94, "grad_norm": 0.3293116288624257, "learning_rate": 2.0802480741400456e-07, "loss": 0.2718, "step": 16307 }, { "epoch": 0.94, "grad_norm": 0.3131425030863296, "learning_rate": 2.0764738033366095e-07, "loss": 0.2571, "step": 16308 }, { "epoch": 0.94, "grad_norm": 0.44822453408391827, "learning_rate": 2.0727029236206953e-07, "loss": 0.237, "step": 16309 }, { "epoch": 0.94, "grad_norm": 0.3492819711372927, "learning_rate": 2.068935435122854e-07, "loss": 0.2403, "step": 16310 }, { "epoch": 0.94, "grad_norm": 0.3019190826036198, "learning_rate": 2.0651713379735706e-07, "loss": 0.2593, "step": 16311 }, { "epoch": 0.94, "grad_norm": 0.4584884405827178, "learning_rate": 2.0614106323031846e-07, "loss": 0.2696, "step": 16312 }, { "epoch": 0.94, "grad_norm": 0.411254505830708, "learning_rate": 2.0576533182419477e-07, "loss": 0.2811, "step": 16313 }, { "epoch": 0.94, "grad_norm": 0.3902520439694165, "learning_rate": 2.053899395919956e-07, "loss": 0.2648, "step": 16314 }, { "epoch": 0.94, "grad_norm": 0.353453516685148, "learning_rate": 2.0501488654672276e-07, "loss": 0.2453, "step": 16315 }, { "epoch": 0.94, "grad_norm": 1.1755006016717067, "learning_rate": 2.0464017270136139e-07, "loss": 0.4489, "step": 16316 }, { "epoch": 0.94, "grad_norm": 0.36115634958306614, "learning_rate": 2.0426579806889114e-07, "loss": 0.1607, "step": 16317 }, { "epoch": 0.94, "grad_norm": 0.2925206750954721, "learning_rate": 2.0389176266227494e-07, "loss": 0.3033, "step": 16318 }, { "epoch": 0.94, "grad_norm": 0.7559070874858043, "learning_rate": 2.0351806649446582e-07, "loss": 0.4248, "step": 16319 }, { "epoch": 0.94, "grad_norm": 0.3267369295851818, "learning_rate": 2.0314470957840337e-07, "loss": 0.2155, "step": 16320 }, { "epoch": 0.94, "grad_norm": 0.27187878348807465, "learning_rate": 2.0277169192701951e-07, "loss": 0.1862, "step": 16321 }, { "epoch": 0.94, "grad_norm": 0.3450218801283784, "learning_rate": 2.0239901355323166e-07, "loss": 0.2994, "step": 16322 }, { "epoch": 0.94, "grad_norm": 0.33760499589787035, "learning_rate": 2.0202667446994396e-07, "loss": 0.1877, "step": 16323 }, { "epoch": 0.94, "grad_norm": 1.3079162281268204, "learning_rate": 2.016546746900505e-07, "loss": 0.5858, "step": 16324 }, { "epoch": 0.94, "grad_norm": 0.5508684131706696, "learning_rate": 2.0128301422643437e-07, "loss": 0.3595, "step": 16325 }, { "epoch": 0.94, "grad_norm": 0.22361769791366404, "learning_rate": 2.0091169309196635e-07, "loss": 0.219, "step": 16326 }, { "epoch": 0.94, "grad_norm": 0.27103442373090697, "learning_rate": 2.0054071129950503e-07, "loss": 0.1882, "step": 16327 }, { "epoch": 0.94, "grad_norm": 1.4862555952033187, "learning_rate": 2.0017006886189793e-07, "loss": 0.5859, "step": 16328 }, { "epoch": 0.94, "grad_norm": 0.43694139275442706, "learning_rate": 1.9979976579197924e-07, "loss": 0.2516, "step": 16329 }, { "epoch": 0.94, "grad_norm": 0.2777763051503804, "learning_rate": 1.9942980210257313e-07, "loss": 0.2499, "step": 16330 }, { "epoch": 0.94, "grad_norm": 0.5401113539028809, "learning_rate": 1.9906017780649267e-07, "loss": 0.3562, "step": 16331 }, { "epoch": 0.94, "grad_norm": 0.5545585769788984, "learning_rate": 1.9869089291653544e-07, "loss": 0.3261, "step": 16332 }, { "epoch": 0.94, "grad_norm": 0.24578812393967553, "learning_rate": 1.9832194744549117e-07, "loss": 0.1533, "step": 16333 }, { "epoch": 0.94, "grad_norm": 0.3471473882109775, "learning_rate": 1.9795334140613741e-07, "loss": 0.2771, "step": 16334 }, { "epoch": 0.94, "grad_norm": 0.7065238619042722, "learning_rate": 1.9758507481123734e-07, "loss": 0.3106, "step": 16335 }, { "epoch": 0.94, "grad_norm": 0.37428051274745233, "learning_rate": 1.9721714767354516e-07, "loss": 0.2519, "step": 16336 }, { "epoch": 0.94, "grad_norm": 0.8941617265103616, "learning_rate": 1.968495600058018e-07, "loss": 0.5307, "step": 16337 }, { "epoch": 0.94, "grad_norm": 0.31408047918318077, "learning_rate": 1.9648231182073484e-07, "loss": 0.2839, "step": 16338 }, { "epoch": 0.94, "grad_norm": 0.21954413848341273, "learning_rate": 1.9611540313106526e-07, "loss": 0.1522, "step": 16339 }, { "epoch": 0.94, "grad_norm": 1.4172099353996386, "learning_rate": 1.957488339494973e-07, "loss": 0.4308, "step": 16340 }, { "epoch": 0.94, "grad_norm": 0.4576229631078692, "learning_rate": 1.9538260428872636e-07, "loss": 0.2894, "step": 16341 }, { "epoch": 0.94, "grad_norm": 0.29422998452537247, "learning_rate": 1.9501671416143342e-07, "loss": 0.2752, "step": 16342 }, { "epoch": 0.94, "grad_norm": 0.7309287759530217, "learning_rate": 1.9465116358029057e-07, "loss": 0.3446, "step": 16343 }, { "epoch": 0.94, "grad_norm": 0.3900915110844801, "learning_rate": 1.9428595255795657e-07, "loss": 0.2888, "step": 16344 }, { "epoch": 0.94, "grad_norm": 0.37353761465583235, "learning_rate": 1.9392108110707686e-07, "loss": 0.2302, "step": 16345 }, { "epoch": 0.94, "grad_norm": 0.3062970278982408, "learning_rate": 1.935565492402891e-07, "loss": 0.2056, "step": 16346 }, { "epoch": 0.94, "grad_norm": 0.5981284968410989, "learning_rate": 1.9319235697021766e-07, "loss": 0.3264, "step": 16347 }, { "epoch": 0.94, "grad_norm": 0.35572231889161077, "learning_rate": 1.9282850430947242e-07, "loss": 0.3058, "step": 16348 }, { "epoch": 0.94, "grad_norm": 0.3250188011219933, "learning_rate": 1.9246499127065333e-07, "loss": 0.2664, "step": 16349 }, { "epoch": 0.94, "grad_norm": 0.5737774291728073, "learning_rate": 1.9210181786635028e-07, "loss": 0.2784, "step": 16350 }, { "epoch": 0.94, "grad_norm": 0.4012088600693107, "learning_rate": 1.9173898410913995e-07, "loss": 0.2982, "step": 16351 }, { "epoch": 0.94, "grad_norm": 0.29848629529359066, "learning_rate": 1.9137649001158665e-07, "loss": 0.1829, "step": 16352 }, { "epoch": 0.94, "grad_norm": 0.48598014526128647, "learning_rate": 1.9101433558624483e-07, "loss": 0.2018, "step": 16353 }, { "epoch": 0.94, "grad_norm": 0.26365262037215137, "learning_rate": 1.9065252084565222e-07, "loss": 0.2652, "step": 16354 }, { "epoch": 0.94, "grad_norm": 1.2940730599519867, "learning_rate": 1.9029104580234325e-07, "loss": 0.7013, "step": 16355 }, { "epoch": 0.94, "grad_norm": 0.6116318818103755, "learning_rate": 1.8992991046883236e-07, "loss": 0.1767, "step": 16356 }, { "epoch": 0.94, "grad_norm": 0.23131950704158802, "learning_rate": 1.895691148576273e-07, "loss": 0.1767, "step": 16357 }, { "epoch": 0.94, "grad_norm": 0.3293918268569444, "learning_rate": 1.8920865898122143e-07, "loss": 0.2735, "step": 16358 }, { "epoch": 0.94, "grad_norm": 0.4511372616577314, "learning_rate": 1.888485428520992e-07, "loss": 0.2337, "step": 16359 }, { "epoch": 0.94, "grad_norm": 0.3753456647512689, "learning_rate": 1.884887664827284e-07, "loss": 0.2896, "step": 16360 }, { "epoch": 0.94, "grad_norm": 0.44961231456689443, "learning_rate": 1.881293298855713e-07, "loss": 0.3533, "step": 16361 }, { "epoch": 0.94, "grad_norm": 0.28662578208318146, "learning_rate": 1.877702330730724e-07, "loss": 0.1942, "step": 16362 }, { "epoch": 0.94, "grad_norm": 0.6551363416683533, "learning_rate": 1.874114760576684e-07, "loss": 0.3072, "step": 16363 }, { "epoch": 0.94, "grad_norm": 0.32419575634159736, "learning_rate": 1.870530588517827e-07, "loss": 0.1952, "step": 16364 }, { "epoch": 0.94, "grad_norm": 0.3382957834583971, "learning_rate": 1.8669498146782871e-07, "loss": 0.2804, "step": 16365 }, { "epoch": 0.94, "grad_norm": 0.27619581453105596, "learning_rate": 1.863372439182054e-07, "loss": 0.2148, "step": 16366 }, { "epoch": 0.94, "grad_norm": 1.2113862017013661, "learning_rate": 1.8597984621530063e-07, "loss": 0.8154, "step": 16367 }, { "epoch": 0.94, "grad_norm": 1.391102015454029, "learning_rate": 1.8562278837149228e-07, "loss": 0.4795, "step": 16368 }, { "epoch": 0.94, "grad_norm": 0.3298554751561711, "learning_rate": 1.852660703991438e-07, "loss": 0.1875, "step": 16369 }, { "epoch": 0.94, "grad_norm": 0.33732172554386936, "learning_rate": 1.8490969231061085e-07, "loss": 0.3013, "step": 16370 }, { "epoch": 0.94, "grad_norm": 0.41691582029462937, "learning_rate": 1.8455365411823134e-07, "loss": 0.2349, "step": 16371 }, { "epoch": 0.94, "grad_norm": 0.23903592330484194, "learning_rate": 1.8419795583433763e-07, "loss": 0.1662, "step": 16372 }, { "epoch": 0.94, "grad_norm": 0.48747978730964747, "learning_rate": 1.8384259747124766e-07, "loss": 0.3685, "step": 16373 }, { "epoch": 0.94, "grad_norm": 0.4383352697210021, "learning_rate": 1.834875790412649e-07, "loss": 0.3244, "step": 16374 }, { "epoch": 0.94, "grad_norm": 0.34083610981257056, "learning_rate": 1.831329005566851e-07, "loss": 0.2185, "step": 16375 }, { "epoch": 0.94, "grad_norm": 0.6216981447352833, "learning_rate": 1.8277856202979282e-07, "loss": 0.3541, "step": 16376 }, { "epoch": 0.94, "grad_norm": 0.2519742262206242, "learning_rate": 1.8242456347285498e-07, "loss": 0.2207, "step": 16377 }, { "epoch": 0.94, "grad_norm": 0.27485795073992864, "learning_rate": 1.8207090489813284e-07, "loss": 0.2107, "step": 16378 }, { "epoch": 0.94, "grad_norm": 0.9112149768074835, "learning_rate": 1.8171758631787327e-07, "loss": 0.5171, "step": 16379 }, { "epoch": 0.94, "grad_norm": 0.6028629951517451, "learning_rate": 1.8136460774431097e-07, "loss": 0.2255, "step": 16380 }, { "epoch": 0.94, "grad_norm": 0.3830929246588872, "learning_rate": 1.8101196918967056e-07, "loss": 0.2811, "step": 16381 }, { "epoch": 0.94, "grad_norm": 0.3110335698157431, "learning_rate": 1.806596706661634e-07, "loss": 0.2559, "step": 16382 }, { "epoch": 0.94, "grad_norm": 0.3532742177417911, "learning_rate": 1.8030771218598863e-07, "loss": 0.2153, "step": 16383 }, { "epoch": 0.94, "grad_norm": 0.27118052800532905, "learning_rate": 1.799560937613365e-07, "loss": 0.2216, "step": 16384 }, { "epoch": 0.94, "grad_norm": 0.30009922060906546, "learning_rate": 1.7960481540438278e-07, "loss": 0.259, "step": 16385 }, { "epoch": 0.94, "grad_norm": 1.2439511677777768, "learning_rate": 1.7925387712729113e-07, "loss": 0.3727, "step": 16386 }, { "epoch": 0.94, "grad_norm": 0.3074681980467877, "learning_rate": 1.7890327894221515e-07, "loss": 0.2405, "step": 16387 }, { "epoch": 0.94, "grad_norm": 0.6218933087042393, "learning_rate": 1.7855302086129734e-07, "loss": 0.2886, "step": 16388 }, { "epoch": 0.94, "grad_norm": 0.25243688522148056, "learning_rate": 1.7820310289666577e-07, "loss": 0.2225, "step": 16389 }, { "epoch": 0.94, "grad_norm": 0.24248323345775227, "learning_rate": 1.7785352506043852e-07, "loss": 0.2009, "step": 16390 }, { "epoch": 0.94, "grad_norm": 1.140361489090267, "learning_rate": 1.7750428736472146e-07, "loss": 0.7097, "step": 16391 }, { "epoch": 0.94, "grad_norm": 0.6494164060996802, "learning_rate": 1.7715538982160717e-07, "loss": 0.2132, "step": 16392 }, { "epoch": 0.94, "grad_norm": 0.29027986290742447, "learning_rate": 1.7680683244318154e-07, "loss": 0.274, "step": 16393 }, { "epoch": 0.94, "grad_norm": 0.4739590651850567, "learning_rate": 1.7645861524151152e-07, "loss": 0.3406, "step": 16394 }, { "epoch": 0.94, "grad_norm": 0.47333802545971043, "learning_rate": 1.7611073822865753e-07, "loss": 0.1103, "step": 16395 }, { "epoch": 0.94, "grad_norm": 0.34722614276277375, "learning_rate": 1.7576320141666548e-07, "loss": 0.297, "step": 16396 }, { "epoch": 0.94, "grad_norm": 0.4741094317552018, "learning_rate": 1.7541600481757238e-07, "loss": 0.3714, "step": 16397 }, { "epoch": 0.94, "grad_norm": 0.30253332304639474, "learning_rate": 1.7506914844340084e-07, "loss": 0.2009, "step": 16398 }, { "epoch": 0.94, "grad_norm": 0.3499583597663478, "learning_rate": 1.7472263230616126e-07, "loss": 0.2693, "step": 16399 }, { "epoch": 0.94, "grad_norm": 0.9313377074422858, "learning_rate": 1.7437645641785404e-07, "loss": 0.4164, "step": 16400 }, { "epoch": 0.94, "grad_norm": 0.3084187512136342, "learning_rate": 1.7403062079046851e-07, "loss": 0.2116, "step": 16401 }, { "epoch": 0.94, "grad_norm": 0.3905940549961325, "learning_rate": 1.736851254359795e-07, "loss": 0.2868, "step": 16402 }, { "epoch": 0.94, "grad_norm": 0.45116394117225134, "learning_rate": 1.7333997036635296e-07, "loss": 0.3161, "step": 16403 }, { "epoch": 0.94, "grad_norm": 0.587337210292463, "learning_rate": 1.7299515559354052e-07, "loss": 0.3472, "step": 16404 }, { "epoch": 0.94, "grad_norm": 0.24303637781952572, "learning_rate": 1.7265068112948257e-07, "loss": 0.1978, "step": 16405 }, { "epoch": 0.94, "grad_norm": 0.3114290454839184, "learning_rate": 1.7230654698610848e-07, "loss": 0.2325, "step": 16406 }, { "epoch": 0.94, "grad_norm": 1.4056712180482611, "learning_rate": 1.7196275317533761e-07, "loss": 0.4493, "step": 16407 }, { "epoch": 0.94, "grad_norm": 0.28533591224264054, "learning_rate": 1.7161929970907266e-07, "loss": 0.2084, "step": 16408 }, { "epoch": 0.94, "grad_norm": 0.33232194989980035, "learning_rate": 1.7127618659920963e-07, "loss": 0.2976, "step": 16409 }, { "epoch": 0.94, "grad_norm": 0.5558040330434648, "learning_rate": 1.7093341385762907e-07, "loss": 0.3583, "step": 16410 }, { "epoch": 0.94, "grad_norm": 0.1846507440261721, "learning_rate": 1.7059098149620257e-07, "loss": 0.1403, "step": 16411 }, { "epoch": 0.94, "grad_norm": 1.3482449752952768, "learning_rate": 1.702488895267862e-07, "loss": 0.5575, "step": 16412 }, { "epoch": 0.94, "grad_norm": 0.3510275776236248, "learning_rate": 1.6990713796122938e-07, "loss": 0.2993, "step": 16413 }, { "epoch": 0.94, "grad_norm": 0.2897476651472436, "learning_rate": 1.6956572681136485e-07, "loss": 0.1903, "step": 16414 }, { "epoch": 0.94, "grad_norm": 0.6730125448322015, "learning_rate": 1.692246560890176e-07, "loss": 0.4091, "step": 16415 }, { "epoch": 0.94, "grad_norm": 0.43855648011694554, "learning_rate": 1.688839258059971e-07, "loss": 0.3141, "step": 16416 }, { "epoch": 0.94, "grad_norm": 0.4039676932669491, "learning_rate": 1.6854353597410278e-07, "loss": 0.2686, "step": 16417 }, { "epoch": 0.94, "grad_norm": 0.24169214358315877, "learning_rate": 1.68203486605123e-07, "loss": 0.1656, "step": 16418 }, { "epoch": 0.94, "grad_norm": 1.4027029186189741, "learning_rate": 1.6786377771083496e-07, "loss": 0.5126, "step": 16419 }, { "epoch": 0.94, "grad_norm": 0.4016732571923031, "learning_rate": 1.675244093030015e-07, "loss": 0.272, "step": 16420 }, { "epoch": 0.94, "grad_norm": 0.31401939566076453, "learning_rate": 1.6718538139337325e-07, "loss": 0.251, "step": 16421 }, { "epoch": 0.94, "grad_norm": 0.7274054388947614, "learning_rate": 1.6684669399369412e-07, "loss": 0.4075, "step": 16422 }, { "epoch": 0.94, "grad_norm": 0.3134544640656414, "learning_rate": 1.6650834711569031e-07, "loss": 0.2514, "step": 16423 }, { "epoch": 0.94, "grad_norm": 0.29983200999934284, "learning_rate": 1.661703407710802e-07, "loss": 0.0921, "step": 16424 }, { "epoch": 0.94, "grad_norm": 0.3718216113469689, "learning_rate": 1.6583267497156663e-07, "loss": 0.3093, "step": 16425 }, { "epoch": 0.94, "grad_norm": 0.3162630195231657, "learning_rate": 1.6549534972884584e-07, "loss": 0.2512, "step": 16426 }, { "epoch": 0.94, "grad_norm": 0.5407154342108497, "learning_rate": 1.6515836505459848e-07, "loss": 0.3433, "step": 16427 }, { "epoch": 0.94, "grad_norm": 0.4685866469053301, "learning_rate": 1.648217209604941e-07, "loss": 0.2603, "step": 16428 }, { "epoch": 0.94, "grad_norm": 0.2709135275382767, "learning_rate": 1.6448541745819113e-07, "loss": 0.2503, "step": 16429 }, { "epoch": 0.94, "grad_norm": 0.3388604124213988, "learning_rate": 1.6414945455933363e-07, "loss": 0.1982, "step": 16430 }, { "epoch": 0.94, "grad_norm": 0.9826337080521574, "learning_rate": 1.638138322755578e-07, "loss": 0.2629, "step": 16431 }, { "epoch": 0.94, "grad_norm": 0.300363016379023, "learning_rate": 1.634785506184866e-07, "loss": 0.2463, "step": 16432 }, { "epoch": 0.94, "grad_norm": 0.34111656134447776, "learning_rate": 1.6314360959973075e-07, "loss": 0.2959, "step": 16433 }, { "epoch": 0.94, "grad_norm": 1.0121583179292197, "learning_rate": 1.628090092308876e-07, "loss": 0.3706, "step": 16434 }, { "epoch": 0.94, "grad_norm": 0.39314249015309166, "learning_rate": 1.6247474952354568e-07, "loss": 0.2884, "step": 16435 }, { "epoch": 0.94, "grad_norm": 0.2459622684578627, "learning_rate": 1.621408304892802e-07, "loss": 0.19, "step": 16436 }, { "epoch": 0.94, "grad_norm": 0.3447675748059979, "learning_rate": 1.618072521396552e-07, "loss": 0.247, "step": 16437 }, { "epoch": 0.94, "grad_norm": 0.41316622660519126, "learning_rate": 1.6147401448622145e-07, "loss": 0.2924, "step": 16438 }, { "epoch": 0.94, "grad_norm": 0.44203133836751707, "learning_rate": 1.6114111754051976e-07, "loss": 0.3195, "step": 16439 }, { "epoch": 0.94, "grad_norm": 0.48436498902588176, "learning_rate": 1.6080856131407862e-07, "loss": 0.3431, "step": 16440 }, { "epoch": 0.94, "grad_norm": 0.28460937853923546, "learning_rate": 1.6047634581841331e-07, "loss": 0.184, "step": 16441 }, { "epoch": 0.94, "grad_norm": 0.2880830569609324, "learning_rate": 1.6014447106502907e-07, "loss": 0.1768, "step": 16442 }, { "epoch": 0.94, "grad_norm": 0.8736808553813734, "learning_rate": 1.5981293706541888e-07, "loss": 0.3771, "step": 16443 }, { "epoch": 0.94, "grad_norm": 0.31317366638812105, "learning_rate": 1.5948174383106362e-07, "loss": 0.2167, "step": 16444 }, { "epoch": 0.94, "grad_norm": 0.3277336661706098, "learning_rate": 1.5915089137343186e-07, "loss": 0.3097, "step": 16445 }, { "epoch": 0.94, "grad_norm": 1.1496737135542834, "learning_rate": 1.5882037970398111e-07, "loss": 0.7183, "step": 16446 }, { "epoch": 0.94, "grad_norm": 0.29250473166722574, "learning_rate": 1.584902088341589e-07, "loss": 0.1838, "step": 16447 }, { "epoch": 0.95, "grad_norm": 0.2878214529095352, "learning_rate": 1.5816037877539715e-07, "loss": 0.1679, "step": 16448 }, { "epoch": 0.95, "grad_norm": 0.3309627514911893, "learning_rate": 1.5783088953911784e-07, "loss": 0.3004, "step": 16449 }, { "epoch": 0.95, "grad_norm": 0.33326136046621196, "learning_rate": 1.5750174113673077e-07, "loss": 0.2167, "step": 16450 }, { "epoch": 0.95, "grad_norm": 1.179074426417154, "learning_rate": 1.5717293357963682e-07, "loss": 0.5454, "step": 16451 }, { "epoch": 0.95, "grad_norm": 0.4564239880365591, "learning_rate": 1.5684446687922017e-07, "loss": 0.3404, "step": 16452 }, { "epoch": 0.95, "grad_norm": 0.30674601902753773, "learning_rate": 1.565163410468562e-07, "loss": 0.2332, "step": 16453 }, { "epoch": 0.95, "grad_norm": 0.2190549081107368, "learning_rate": 1.5618855609390803e-07, "loss": 0.1242, "step": 16454 }, { "epoch": 0.95, "grad_norm": 0.5729376141354351, "learning_rate": 1.558611120317266e-07, "loss": 0.3633, "step": 16455 }, { "epoch": 0.95, "grad_norm": 0.5545582014596254, "learning_rate": 1.5553400887165172e-07, "loss": 0.2793, "step": 16456 }, { "epoch": 0.95, "grad_norm": 0.2458146065487911, "learning_rate": 1.5520724662501207e-07, "loss": 0.2315, "step": 16457 }, { "epoch": 0.95, "grad_norm": 1.1168303612442845, "learning_rate": 1.5488082530312087e-07, "loss": 0.694, "step": 16458 }, { "epoch": 0.95, "grad_norm": 0.6203300463305141, "learning_rate": 1.545547449172835e-07, "loss": 0.2942, "step": 16459 }, { "epoch": 0.95, "grad_norm": 0.24751408093210903, "learning_rate": 1.5422900547879206e-07, "loss": 0.199, "step": 16460 }, { "epoch": 0.95, "grad_norm": 0.4335318922233136, "learning_rate": 1.5390360699892636e-07, "loss": 0.3313, "step": 16461 }, { "epoch": 0.95, "grad_norm": 0.3373096852746386, "learning_rate": 1.5357854948895634e-07, "loss": 0.2321, "step": 16462 }, { "epoch": 0.95, "grad_norm": 0.3630705794262418, "learning_rate": 1.532538329601363e-07, "loss": 0.2461, "step": 16463 }, { "epoch": 0.95, "grad_norm": 0.5126707395414409, "learning_rate": 1.529294574237139e-07, "loss": 0.3202, "step": 16464 }, { "epoch": 0.95, "grad_norm": 0.2925570376583863, "learning_rate": 1.5260542289092016e-07, "loss": 0.2425, "step": 16465 }, { "epoch": 0.95, "grad_norm": 0.5729303620205985, "learning_rate": 1.5228172937297837e-07, "loss": 0.3226, "step": 16466 }, { "epoch": 0.95, "grad_norm": 0.4405506344298435, "learning_rate": 1.5195837688109506e-07, "loss": 0.2103, "step": 16467 }, { "epoch": 0.95, "grad_norm": 0.23129493919039548, "learning_rate": 1.5163536542647018e-07, "loss": 0.2064, "step": 16468 }, { "epoch": 0.95, "grad_norm": 0.34775353831476136, "learning_rate": 1.5131269502029034e-07, "loss": 0.3053, "step": 16469 }, { "epoch": 0.95, "grad_norm": 1.0191758063869858, "learning_rate": 1.509903656737277e-07, "loss": 0.5071, "step": 16470 }, { "epoch": 0.95, "grad_norm": 0.6469909048210354, "learning_rate": 1.506683773979445e-07, "loss": 0.352, "step": 16471 }, { "epoch": 0.95, "grad_norm": 0.3790611624365381, "learning_rate": 1.5034673020409173e-07, "loss": 0.2984, "step": 16472 }, { "epoch": 0.95, "grad_norm": 0.2816475223008202, "learning_rate": 1.5002542410330946e-07, "loss": 0.2091, "step": 16473 }, { "epoch": 0.95, "grad_norm": 0.33164768844534576, "learning_rate": 1.4970445910672205e-07, "loss": 0.1769, "step": 16474 }, { "epoch": 0.95, "grad_norm": 0.400650506425723, "learning_rate": 1.493838352254462e-07, "loss": 0.3036, "step": 16475 }, { "epoch": 0.95, "grad_norm": 0.32037790280791784, "learning_rate": 1.4906355247058412e-07, "loss": 0.2392, "step": 16476 }, { "epoch": 0.95, "grad_norm": 0.605568545808694, "learning_rate": 1.487436108532292e-07, "loss": 0.33, "step": 16477 }, { "epoch": 0.95, "grad_norm": 0.3674500331935406, "learning_rate": 1.4842401038445808e-07, "loss": 0.2867, "step": 16478 }, { "epoch": 0.95, "grad_norm": 0.9639296926238876, "learning_rate": 1.4810475107533973e-07, "loss": 0.3768, "step": 16479 }, { "epoch": 0.95, "grad_norm": 0.2325175694217112, "learning_rate": 1.4778583293692972e-07, "loss": 0.1823, "step": 16480 }, { "epoch": 0.95, "grad_norm": 0.30507855821640345, "learning_rate": 1.4746725598027367e-07, "loss": 0.257, "step": 16481 }, { "epoch": 0.95, "grad_norm": 1.3321349423875684, "learning_rate": 1.4714902021640277e-07, "loss": 0.7486, "step": 16482 }, { "epoch": 0.95, "grad_norm": 0.5040720847135953, "learning_rate": 1.4683112565633706e-07, "loss": 0.2263, "step": 16483 }, { "epoch": 0.95, "grad_norm": 0.3693885098403536, "learning_rate": 1.4651357231108555e-07, "loss": 0.2884, "step": 16484 }, { "epoch": 0.95, "grad_norm": 0.3716837935292826, "learning_rate": 1.4619636019164608e-07, "loss": 0.3051, "step": 16485 }, { "epoch": 0.95, "grad_norm": 0.20352716304565405, "learning_rate": 1.458794893090032e-07, "loss": 0.1185, "step": 16486 }, { "epoch": 0.95, "grad_norm": 0.5404248538254562, "learning_rate": 1.4556295967412925e-07, "loss": 0.3645, "step": 16487 }, { "epoch": 0.95, "grad_norm": 0.35677414223681986, "learning_rate": 1.4524677129798547e-07, "loss": 0.2902, "step": 16488 }, { "epoch": 0.95, "grad_norm": 0.5106964245865684, "learning_rate": 1.449309241915231e-07, "loss": 0.1938, "step": 16489 }, { "epoch": 0.95, "grad_norm": 0.3935425552716978, "learning_rate": 1.4461541836568004e-07, "loss": 0.3145, "step": 16490 }, { "epoch": 0.95, "grad_norm": 1.3403643170390191, "learning_rate": 1.443002538313798e-07, "loss": 0.5119, "step": 16491 }, { "epoch": 0.95, "grad_norm": 0.26232504943660767, "learning_rate": 1.4398543059953918e-07, "loss": 0.2427, "step": 16492 }, { "epoch": 0.95, "grad_norm": 0.30415871484094553, "learning_rate": 1.4367094868105725e-07, "loss": 0.2046, "step": 16493 }, { "epoch": 0.95, "grad_norm": 0.42062338088496104, "learning_rate": 1.433568080868286e-07, "loss": 0.2729, "step": 16494 }, { "epoch": 0.95, "grad_norm": 0.5711244914388044, "learning_rate": 1.4304300882772903e-07, "loss": 0.3911, "step": 16495 }, { "epoch": 0.95, "grad_norm": 0.2384222357994241, "learning_rate": 1.4272955091462648e-07, "loss": 0.2252, "step": 16496 }, { "epoch": 0.95, "grad_norm": 1.3215017477278463, "learning_rate": 1.424164343583745e-07, "loss": 0.532, "step": 16497 }, { "epoch": 0.95, "grad_norm": 1.0583273806987223, "learning_rate": 1.4210365916981882e-07, "loss": 0.4026, "step": 16498 }, { "epoch": 0.95, "grad_norm": 0.3118227358818172, "learning_rate": 1.4179122535978862e-07, "loss": 0.2325, "step": 16499 }, { "epoch": 0.95, "grad_norm": 0.34214996786542395, "learning_rate": 1.414791329391052e-07, "loss": 0.3061, "step": 16500 }, { "epoch": 0.95, "grad_norm": 0.3931429662229714, "learning_rate": 1.4116738191857437e-07, "loss": 0.3039, "step": 16501 }, { "epoch": 0.95, "grad_norm": 0.23757841266563745, "learning_rate": 1.4085597230899418e-07, "loss": 0.167, "step": 16502 }, { "epoch": 0.95, "grad_norm": 1.2952326611305722, "learning_rate": 1.4054490412114817e-07, "loss": 0.5202, "step": 16503 }, { "epoch": 0.95, "grad_norm": 0.3303112645977888, "learning_rate": 1.402341773658078e-07, "loss": 0.2998, "step": 16504 }, { "epoch": 0.95, "grad_norm": 0.3496116138968531, "learning_rate": 1.3992379205373219e-07, "loss": 0.2641, "step": 16505 }, { "epoch": 0.95, "grad_norm": 0.6284477927092317, "learning_rate": 1.3961374819567386e-07, "loss": 0.3239, "step": 16506 }, { "epoch": 0.95, "grad_norm": 0.29321421158262606, "learning_rate": 1.3930404580236646e-07, "loss": 0.1722, "step": 16507 }, { "epoch": 0.95, "grad_norm": 0.27799789142741405, "learning_rate": 1.3899468488453583e-07, "loss": 0.2652, "step": 16508 }, { "epoch": 0.95, "grad_norm": 0.5390190495988705, "learning_rate": 1.3868566545289563e-07, "loss": 0.2118, "step": 16509 }, { "epoch": 0.95, "grad_norm": 1.2106732546022587, "learning_rate": 1.383769875181462e-07, "loss": 0.6369, "step": 16510 }, { "epoch": 0.95, "grad_norm": 0.3303768375574665, "learning_rate": 1.38068651090979e-07, "loss": 0.2693, "step": 16511 }, { "epoch": 0.95, "grad_norm": 0.31858858994348516, "learning_rate": 1.377606561820699e-07, "loss": 0.2569, "step": 16512 }, { "epoch": 0.95, "grad_norm": 0.41073201497419076, "learning_rate": 1.3745300280208373e-07, "loss": 0.249, "step": 16513 }, { "epoch": 0.95, "grad_norm": 0.25752711484513774, "learning_rate": 1.371456909616764e-07, "loss": 0.2332, "step": 16514 }, { "epoch": 0.95, "grad_norm": 1.4245672657375734, "learning_rate": 1.3683872067149052e-07, "loss": 0.6246, "step": 16515 }, { "epoch": 0.95, "grad_norm": 0.27505237674289085, "learning_rate": 1.3653209194215534e-07, "loss": 0.2159, "step": 16516 }, { "epoch": 0.95, "grad_norm": 0.3313341959115871, "learning_rate": 1.3622580478428903e-07, "loss": 0.2806, "step": 16517 }, { "epoch": 0.95, "grad_norm": 0.6390544637914571, "learning_rate": 1.3591985920849981e-07, "loss": 0.3746, "step": 16518 }, { "epoch": 0.95, "grad_norm": 0.29075587750528226, "learning_rate": 1.356142552253814e-07, "loss": 0.1736, "step": 16519 }, { "epoch": 0.95, "grad_norm": 0.28201683673325467, "learning_rate": 1.3530899284551756e-07, "loss": 0.2628, "step": 16520 }, { "epoch": 0.95, "grad_norm": 0.5231549591416089, "learning_rate": 1.3500407207947875e-07, "loss": 0.2276, "step": 16521 }, { "epoch": 0.95, "grad_norm": 0.6591201808581352, "learning_rate": 1.3469949293782426e-07, "loss": 0.2343, "step": 16522 }, { "epoch": 0.95, "grad_norm": 0.35442038050860036, "learning_rate": 1.3439525543110232e-07, "loss": 0.285, "step": 16523 }, { "epoch": 0.95, "grad_norm": 0.3256582404882992, "learning_rate": 1.3409135956984897e-07, "loss": 0.2858, "step": 16524 }, { "epoch": 0.95, "grad_norm": 0.4867984784902485, "learning_rate": 1.337878053645869e-07, "loss": 0.1892, "step": 16525 }, { "epoch": 0.95, "grad_norm": 0.2675448451742374, "learning_rate": 1.334845928258288e-07, "loss": 0.2195, "step": 16526 }, { "epoch": 0.95, "grad_norm": 0.4860536461056225, "learning_rate": 1.331817219640752e-07, "loss": 0.2422, "step": 16527 }, { "epoch": 0.95, "grad_norm": 0.3300567767788356, "learning_rate": 1.3287919278981544e-07, "loss": 0.2799, "step": 16528 }, { "epoch": 0.95, "grad_norm": 0.33493831219224574, "learning_rate": 1.3257700531352334e-07, "loss": 0.2111, "step": 16529 }, { "epoch": 0.95, "grad_norm": 0.7971942715761426, "learning_rate": 1.3227515954566506e-07, "loss": 0.4725, "step": 16530 }, { "epoch": 0.95, "grad_norm": 1.2101032035959423, "learning_rate": 1.319736554966955e-07, "loss": 0.4656, "step": 16531 }, { "epoch": 0.95, "grad_norm": 0.2161492687029256, "learning_rate": 1.31672493177053e-07, "loss": 0.2144, "step": 16532 }, { "epoch": 0.95, "grad_norm": 0.2706126200232461, "learning_rate": 1.3137167259716698e-07, "loss": 0.1613, "step": 16533 }, { "epoch": 0.95, "grad_norm": 0.7201379209477112, "learning_rate": 1.310711937674569e-07, "loss": 0.3627, "step": 16534 }, { "epoch": 0.95, "grad_norm": 0.3188833536266433, "learning_rate": 1.3077105669832556e-07, "loss": 0.2127, "step": 16535 }, { "epoch": 0.95, "grad_norm": 0.353800166876077, "learning_rate": 1.3047126140016907e-07, "loss": 0.29, "step": 16536 }, { "epoch": 0.95, "grad_norm": 1.152362615048209, "learning_rate": 1.3017180788336804e-07, "loss": 0.5908, "step": 16537 }, { "epoch": 0.95, "grad_norm": 0.2201443708188749, "learning_rate": 1.29872696158293e-07, "loss": 0.1572, "step": 16538 }, { "epoch": 0.95, "grad_norm": 0.39768952646586436, "learning_rate": 1.295739262353013e-07, "loss": 0.2506, "step": 16539 }, { "epoch": 0.95, "grad_norm": 0.3711468804434103, "learning_rate": 1.2927549812474128e-07, "loss": 0.3222, "step": 16540 }, { "epoch": 0.95, "grad_norm": 0.3875931478557323, "learning_rate": 1.2897741183694578e-07, "loss": 0.2718, "step": 16541 }, { "epoch": 0.95, "grad_norm": 0.5040208709722376, "learning_rate": 1.286796673822388e-07, "loss": 0.2931, "step": 16542 }, { "epoch": 0.95, "grad_norm": 0.3644096645649208, "learning_rate": 1.2838226477092875e-07, "loss": 0.2835, "step": 16543 }, { "epoch": 0.95, "grad_norm": 0.43199632651889797, "learning_rate": 1.2808520401331737e-07, "loss": 0.2786, "step": 16544 }, { "epoch": 0.95, "grad_norm": 0.26276909694452727, "learning_rate": 1.27788485119692e-07, "loss": 0.142, "step": 16545 }, { "epoch": 0.95, "grad_norm": 0.6481758566901482, "learning_rate": 1.2749210810032664e-07, "loss": 0.3816, "step": 16546 }, { "epoch": 0.95, "grad_norm": 0.3026318614423293, "learning_rate": 1.2719607296548309e-07, "loss": 0.2441, "step": 16547 }, { "epoch": 0.95, "grad_norm": 0.31330158406573605, "learning_rate": 1.2690037972541646e-07, "loss": 0.2622, "step": 16548 }, { "epoch": 0.95, "grad_norm": 1.5618122703655006, "learning_rate": 1.2660502839036526e-07, "loss": 0.5628, "step": 16549 }, { "epoch": 0.95, "grad_norm": 0.3455198798238632, "learning_rate": 1.2631001897055683e-07, "loss": 0.2721, "step": 16550 }, { "epoch": 0.95, "grad_norm": 0.2731384236231438, "learning_rate": 1.2601535147620746e-07, "loss": 0.1375, "step": 16551 }, { "epoch": 0.95, "grad_norm": 0.3394524321219831, "learning_rate": 1.2572102591752234e-07, "loss": 0.3059, "step": 16552 }, { "epoch": 0.95, "grad_norm": 0.30354042132773396, "learning_rate": 1.2542704230469326e-07, "loss": 0.2326, "step": 16553 }, { "epoch": 0.95, "grad_norm": 1.1255505639202197, "learning_rate": 1.2513340064790102e-07, "loss": 0.6941, "step": 16554 }, { "epoch": 0.95, "grad_norm": 0.35766936306315206, "learning_rate": 1.2484010095731414e-07, "loss": 0.246, "step": 16555 }, { "epoch": 0.95, "grad_norm": 0.3323040325151218, "learning_rate": 1.2454714324309115e-07, "loss": 0.2591, "step": 16556 }, { "epoch": 0.95, "grad_norm": 0.7689015974335848, "learning_rate": 1.2425452751537503e-07, "loss": 0.3713, "step": 16557 }, { "epoch": 0.95, "grad_norm": 0.2691954444647513, "learning_rate": 1.2396225378430105e-07, "loss": 0.144, "step": 16558 }, { "epoch": 0.95, "grad_norm": 0.38664284669524407, "learning_rate": 1.2367032205998775e-07, "loss": 0.2492, "step": 16559 }, { "epoch": 0.95, "grad_norm": 0.30289171963027917, "learning_rate": 1.2337873235254704e-07, "loss": 0.3089, "step": 16560 }, { "epoch": 0.95, "grad_norm": 1.3377802262789582, "learning_rate": 1.2308748467207753e-07, "loss": 0.2227, "step": 16561 }, { "epoch": 0.95, "grad_norm": 0.39685089389976136, "learning_rate": 1.2279657902866226e-07, "loss": 0.2741, "step": 16562 }, { "epoch": 0.95, "grad_norm": 0.34991767076091934, "learning_rate": 1.225060154323776e-07, "loss": 0.2956, "step": 16563 }, { "epoch": 0.95, "grad_norm": 0.2420095759472581, "learning_rate": 1.222157938932833e-07, "loss": 0.1584, "step": 16564 }, { "epoch": 0.95, "grad_norm": 0.4161152844228987, "learning_rate": 1.219259144214324e-07, "loss": 0.2804, "step": 16565 }, { "epoch": 0.95, "grad_norm": 0.5728617107352475, "learning_rate": 1.216363770268625e-07, "loss": 0.4119, "step": 16566 }, { "epoch": 0.95, "grad_norm": 0.5569364520388921, "learning_rate": 1.2134718171960103e-07, "loss": 0.3633, "step": 16567 }, { "epoch": 0.95, "grad_norm": 0.2639185558147655, "learning_rate": 1.2105832850966004e-07, "loss": 0.2085, "step": 16568 }, { "epoch": 0.95, "grad_norm": 0.59735708477971, "learning_rate": 1.2076981740704485e-07, "loss": 0.3744, "step": 16569 }, { "epoch": 0.95, "grad_norm": 0.34765113855029134, "learning_rate": 1.2048164842174636e-07, "loss": 0.1818, "step": 16570 }, { "epoch": 0.95, "grad_norm": 0.2991838323871734, "learning_rate": 1.2019382156374326e-07, "loss": 0.1983, "step": 16571 }, { "epoch": 0.95, "grad_norm": 0.33619733070672253, "learning_rate": 1.1990633684300424e-07, "loss": 0.2832, "step": 16572 }, { "epoch": 0.95, "grad_norm": 0.9724457561401293, "learning_rate": 1.1961919426948244e-07, "loss": 0.4762, "step": 16573 }, { "epoch": 0.95, "grad_norm": 0.3870489682298207, "learning_rate": 1.1933239385312324e-07, "loss": 0.2104, "step": 16574 }, { "epoch": 0.95, "grad_norm": 0.361921769610525, "learning_rate": 1.190459356038598e-07, "loss": 0.2939, "step": 16575 }, { "epoch": 0.95, "grad_norm": 0.3518873026409385, "learning_rate": 1.1875981953160975e-07, "loss": 0.2099, "step": 16576 }, { "epoch": 0.95, "grad_norm": 0.3527276444123807, "learning_rate": 1.1847404564628185e-07, "loss": 0.1475, "step": 16577 }, { "epoch": 0.95, "grad_norm": 0.2743531371236152, "learning_rate": 1.181886139577737e-07, "loss": 0.2313, "step": 16578 }, { "epoch": 0.95, "grad_norm": 0.338837035760285, "learning_rate": 1.1790352447596853e-07, "loss": 0.3039, "step": 16579 }, { "epoch": 0.95, "grad_norm": 0.5475170217274716, "learning_rate": 1.1761877721073845e-07, "loss": 0.3508, "step": 16580 }, { "epoch": 0.95, "grad_norm": 0.5025763106213428, "learning_rate": 1.1733437217194665e-07, "loss": 0.2398, "step": 16581 }, { "epoch": 0.95, "grad_norm": 0.535478845267447, "learning_rate": 1.1705030936943973e-07, "loss": 0.2167, "step": 16582 }, { "epoch": 0.95, "grad_norm": 0.3968889128451416, "learning_rate": 1.1676658881305647e-07, "loss": 0.304, "step": 16583 }, { "epoch": 0.95, "grad_norm": 0.21367273624373442, "learning_rate": 1.1648321051262012e-07, "loss": 0.196, "step": 16584 }, { "epoch": 0.95, "grad_norm": 0.80451182074773, "learning_rate": 1.1620017447794507e-07, "loss": 0.3895, "step": 16585 }, { "epoch": 0.95, "grad_norm": 0.5701986470439201, "learning_rate": 1.1591748071883458e-07, "loss": 0.2978, "step": 16586 }, { "epoch": 0.95, "grad_norm": 0.2685745996616732, "learning_rate": 1.1563512924507525e-07, "loss": 0.246, "step": 16587 }, { "epoch": 0.95, "grad_norm": 1.4199611521261792, "learning_rate": 1.1535312006644706e-07, "loss": 0.7062, "step": 16588 }, { "epoch": 0.95, "grad_norm": 0.3948249665352689, "learning_rate": 1.150714531927144e-07, "loss": 0.2906, "step": 16589 }, { "epoch": 0.95, "grad_norm": 0.5276002741172402, "learning_rate": 1.1479012863363282e-07, "loss": 0.3783, "step": 16590 }, { "epoch": 0.95, "grad_norm": 0.24658030549592613, "learning_rate": 1.1450914639894451e-07, "loss": 0.2207, "step": 16591 }, { "epoch": 0.95, "grad_norm": 0.3155271224796519, "learning_rate": 1.1422850649837836e-07, "loss": 0.223, "step": 16592 }, { "epoch": 0.95, "grad_norm": 0.5925497152046855, "learning_rate": 1.1394820894165437e-07, "loss": 0.2979, "step": 16593 }, { "epoch": 0.95, "grad_norm": 1.5226704293130526, "learning_rate": 1.1366825373847923e-07, "loss": 0.2853, "step": 16594 }, { "epoch": 0.95, "grad_norm": 0.4706632168946928, "learning_rate": 1.1338864089854629e-07, "loss": 0.2925, "step": 16595 }, { "epoch": 0.95, "grad_norm": 0.35243983811365676, "learning_rate": 1.1310937043154113e-07, "loss": 0.2916, "step": 16596 }, { "epoch": 0.95, "grad_norm": 0.31547774494065683, "learning_rate": 1.128304423471327e-07, "loss": 0.1765, "step": 16597 }, { "epoch": 0.95, "grad_norm": 0.640983379760982, "learning_rate": 1.1255185665497992e-07, "loss": 0.3632, "step": 16598 }, { "epoch": 0.95, "grad_norm": 0.2510122641943343, "learning_rate": 1.1227361336473175e-07, "loss": 0.256, "step": 16599 }, { "epoch": 0.95, "grad_norm": 1.4540579733239993, "learning_rate": 1.119957124860238e-07, "loss": 0.203, "step": 16600 }, { "epoch": 0.95, "grad_norm": 0.6368166691950049, "learning_rate": 1.1171815402847841e-07, "loss": 0.3969, "step": 16601 }, { "epoch": 0.95, "grad_norm": 0.351165725596696, "learning_rate": 1.1144093800170786e-07, "loss": 0.2781, "step": 16602 }, { "epoch": 0.95, "grad_norm": 0.3683204475698124, "learning_rate": 1.1116406441531335e-07, "loss": 0.3086, "step": 16603 }, { "epoch": 0.95, "grad_norm": 0.23192559345215039, "learning_rate": 1.1088753327888169e-07, "loss": 0.1139, "step": 16604 }, { "epoch": 0.95, "grad_norm": 0.4094259485578723, "learning_rate": 1.1061134460198964e-07, "loss": 0.2794, "step": 16605 }, { "epoch": 0.95, "grad_norm": 1.2833862226613364, "learning_rate": 1.1033549839420066e-07, "loss": 0.5491, "step": 16606 }, { "epoch": 0.95, "grad_norm": 0.2600204249198676, "learning_rate": 1.1005999466506822e-07, "loss": 0.2268, "step": 16607 }, { "epoch": 0.95, "grad_norm": 0.36192005945864036, "learning_rate": 1.0978483342413359e-07, "loss": 0.288, "step": 16608 }, { "epoch": 0.95, "grad_norm": 0.5971870477119776, "learning_rate": 1.0951001468092471e-07, "loss": 0.3491, "step": 16609 }, { "epoch": 0.95, "grad_norm": 0.1664000066752861, "learning_rate": 1.0923553844495838e-07, "loss": 0.1133, "step": 16610 }, { "epoch": 0.95, "grad_norm": 0.3779109104685889, "learning_rate": 1.0896140472574035e-07, "loss": 0.3052, "step": 16611 }, { "epoch": 0.95, "grad_norm": 0.48651148623474383, "learning_rate": 1.0868761353276414e-07, "loss": 0.3653, "step": 16612 }, { "epoch": 0.95, "grad_norm": 0.6419661588127807, "learning_rate": 1.0841416487550994e-07, "loss": 0.2488, "step": 16613 }, { "epoch": 0.95, "grad_norm": 0.3649414029689163, "learning_rate": 1.0814105876344794e-07, "loss": 0.2928, "step": 16614 }, { "epoch": 0.95, "grad_norm": 0.3596194680368247, "learning_rate": 1.0786829520603503e-07, "loss": 0.3083, "step": 16615 }, { "epoch": 0.95, "grad_norm": 0.2692613514757282, "learning_rate": 1.0759587421271811e-07, "loss": 0.1606, "step": 16616 }, { "epoch": 0.95, "grad_norm": 0.3453726228128851, "learning_rate": 1.0732379579293184e-07, "loss": 0.1923, "step": 16617 }, { "epoch": 0.95, "grad_norm": 1.3122016638571212, "learning_rate": 1.0705205995609536e-07, "loss": 0.5729, "step": 16618 }, { "epoch": 0.95, "grad_norm": 0.3154318714815696, "learning_rate": 1.0678066671162113e-07, "loss": 0.2759, "step": 16619 }, { "epoch": 0.95, "grad_norm": 0.3255968830256943, "learning_rate": 1.0650961606890719e-07, "loss": 0.2382, "step": 16620 }, { "epoch": 0.95, "grad_norm": 1.2859265206571604, "learning_rate": 1.062389080373405e-07, "loss": 0.8102, "step": 16621 }, { "epoch": 0.96, "grad_norm": 0.26955193137320504, "learning_rate": 1.0596854262629352e-07, "loss": 0.1498, "step": 16622 }, { "epoch": 0.96, "grad_norm": 0.24722787460287485, "learning_rate": 1.0569851984513102e-07, "loss": 0.2281, "step": 16623 }, { "epoch": 0.96, "grad_norm": 1.3302779187313543, "learning_rate": 1.0542883970320328e-07, "loss": 0.5446, "step": 16624 }, { "epoch": 0.96, "grad_norm": 0.5332660462662445, "learning_rate": 1.051595022098506e-07, "loss": 0.3458, "step": 16625 }, { "epoch": 0.96, "grad_norm": 0.30330182429363667, "learning_rate": 1.0489050737439777e-07, "loss": 0.2059, "step": 16626 }, { "epoch": 0.96, "grad_norm": 0.36778833648956744, "learning_rate": 1.0462185520616064e-07, "loss": 0.315, "step": 16627 }, { "epoch": 0.96, "grad_norm": 0.23797657274293577, "learning_rate": 1.0435354571444401e-07, "loss": 0.1594, "step": 16628 }, { "epoch": 0.96, "grad_norm": 0.3561924831436635, "learning_rate": 1.0408557890853821e-07, "loss": 0.2595, "step": 16629 }, { "epoch": 0.96, "grad_norm": 0.4752465406071092, "learning_rate": 1.038179547977236e-07, "loss": 0.2584, "step": 16630 }, { "epoch": 0.96, "grad_norm": 0.39518269810907725, "learning_rate": 1.0355067339126723e-07, "loss": 0.2972, "step": 16631 }, { "epoch": 0.96, "grad_norm": 0.36175612579907546, "learning_rate": 1.0328373469842502e-07, "loss": 0.2588, "step": 16632 }, { "epoch": 0.96, "grad_norm": 1.571137981694321, "learning_rate": 1.0301713872844288e-07, "loss": 0.414, "step": 16633 }, { "epoch": 0.96, "grad_norm": 0.3156274111668739, "learning_rate": 1.0275088549055123e-07, "loss": 0.226, "step": 16634 }, { "epoch": 0.96, "grad_norm": 0.2669295187752056, "learning_rate": 1.0248497499396936e-07, "loss": 0.2455, "step": 16635 }, { "epoch": 0.96, "grad_norm": 0.4475781213109659, "learning_rate": 1.022194072479088e-07, "loss": 0.1764, "step": 16636 }, { "epoch": 0.96, "grad_norm": 0.7397982641900223, "learning_rate": 1.019541822615644e-07, "loss": 0.4622, "step": 16637 }, { "epoch": 0.96, "grad_norm": 0.32650303175664785, "learning_rate": 1.0168930004412103e-07, "loss": 0.2467, "step": 16638 }, { "epoch": 0.96, "grad_norm": 0.3796386468131196, "learning_rate": 1.0142476060475137e-07, "loss": 0.2462, "step": 16639 }, { "epoch": 0.96, "grad_norm": 0.8287190883309027, "learning_rate": 1.0116056395261586e-07, "loss": 0.327, "step": 16640 }, { "epoch": 0.96, "grad_norm": 0.20625582020929892, "learning_rate": 1.0089671009686497e-07, "loss": 0.2011, "step": 16641 }, { "epoch": 0.96, "grad_norm": 0.5948962808042882, "learning_rate": 1.0063319904663471e-07, "loss": 0.4045, "step": 16642 }, { "epoch": 0.96, "grad_norm": 0.29778674786226755, "learning_rate": 1.0037003081105223e-07, "loss": 0.2352, "step": 16643 }, { "epoch": 0.96, "grad_norm": 0.33424304060632115, "learning_rate": 1.0010720539922914e-07, "loss": 0.2542, "step": 16644 }, { "epoch": 0.96, "grad_norm": 1.1934592174173186, "learning_rate": 9.984472282026814e-08, "loss": 0.702, "step": 16645 }, { "epoch": 0.96, "grad_norm": 0.3274171866966553, "learning_rate": 9.958258308325975e-08, "loss": 0.2261, "step": 16646 }, { "epoch": 0.96, "grad_norm": 0.34650531219482306, "learning_rate": 9.932078619727892e-08, "loss": 0.2777, "step": 16647 }, { "epoch": 0.96, "grad_norm": 0.3254717650580907, "learning_rate": 9.905933217139397e-08, "loss": 0.2203, "step": 16648 }, { "epoch": 0.96, "grad_norm": 0.7048051219031087, "learning_rate": 9.879822101465874e-08, "loss": 0.2505, "step": 16649 }, { "epoch": 0.96, "grad_norm": 0.3501335317692874, "learning_rate": 9.853745273611604e-08, "loss": 0.275, "step": 16650 }, { "epoch": 0.96, "grad_norm": 0.34539266905883126, "learning_rate": 9.827702734479528e-08, "loss": 0.3104, "step": 16651 }, { "epoch": 0.96, "grad_norm": 2.0335167832865215, "learning_rate": 9.801694484971369e-08, "loss": 0.2343, "step": 16652 }, { "epoch": 0.96, "grad_norm": 0.30600946235260734, "learning_rate": 9.775720525988076e-08, "loss": 0.2483, "step": 16653 }, { "epoch": 0.96, "grad_norm": 0.3128501072923363, "learning_rate": 9.749780858429036e-08, "loss": 0.2964, "step": 16654 }, { "epoch": 0.96, "grad_norm": 0.2661670102671037, "learning_rate": 9.723875483192536e-08, "loss": 0.1817, "step": 16655 }, { "epoch": 0.96, "grad_norm": 0.2967207607658545, "learning_rate": 9.69800440117552e-08, "loss": 0.2061, "step": 16656 }, { "epoch": 0.96, "grad_norm": 1.3288708256421893, "learning_rate": 9.672167613274053e-08, "loss": 0.7043, "step": 16657 }, { "epoch": 0.96, "grad_norm": 0.3621252628942739, "learning_rate": 9.64636512038286e-08, "loss": 0.2824, "step": 16658 }, { "epoch": 0.96, "grad_norm": 0.2996790863119093, "learning_rate": 9.620596923395454e-08, "loss": 0.2111, "step": 16659 }, { "epoch": 0.96, "grad_norm": 0.7797861094274411, "learning_rate": 9.594863023204226e-08, "loss": 0.3865, "step": 16660 }, { "epoch": 0.96, "grad_norm": 0.24464123847918023, "learning_rate": 9.569163420700245e-08, "loss": 0.183, "step": 16661 }, { "epoch": 0.96, "grad_norm": 0.3100921626514089, "learning_rate": 9.543498116773576e-08, "loss": 0.1861, "step": 16662 }, { "epoch": 0.96, "grad_norm": 0.3610536933026674, "learning_rate": 9.51786711231295e-08, "loss": 0.2764, "step": 16663 }, { "epoch": 0.96, "grad_norm": 0.6986584506879514, "learning_rate": 9.492270408206106e-08, "loss": 0.3745, "step": 16664 }, { "epoch": 0.96, "grad_norm": 0.2895372923549709, "learning_rate": 9.46670800533922e-08, "loss": 0.1944, "step": 16665 }, { "epoch": 0.96, "grad_norm": 0.3473184880519653, "learning_rate": 9.441179904597697e-08, "loss": 0.3016, "step": 16666 }, { "epoch": 0.96, "grad_norm": 0.38263612931781815, "learning_rate": 9.415686106865496e-08, "loss": 0.2469, "step": 16667 }, { "epoch": 0.96, "grad_norm": 0.4296040570362124, "learning_rate": 9.390226613025466e-08, "loss": 0.248, "step": 16668 }, { "epoch": 0.96, "grad_norm": 0.31216699027746436, "learning_rate": 9.364801423959235e-08, "loss": 0.1625, "step": 16669 }, { "epoch": 0.96, "grad_norm": 0.33552034711426854, "learning_rate": 9.339410540547433e-08, "loss": 0.2803, "step": 16670 }, { "epoch": 0.96, "grad_norm": 0.3310185221024679, "learning_rate": 9.314053963669245e-08, "loss": 0.254, "step": 16671 }, { "epoch": 0.96, "grad_norm": 0.7250290339843236, "learning_rate": 9.288731694202747e-08, "loss": 0.2567, "step": 16672 }, { "epoch": 0.96, "grad_norm": 1.2242252500615893, "learning_rate": 9.263443733024791e-08, "loss": 0.7467, "step": 16673 }, { "epoch": 0.96, "grad_norm": 0.32309648425980414, "learning_rate": 9.238190081011345e-08, "loss": 0.2512, "step": 16674 }, { "epoch": 0.96, "grad_norm": 0.19987086897763234, "learning_rate": 9.212970739036709e-08, "loss": 0.1648, "step": 16675 }, { "epoch": 0.96, "grad_norm": 0.5338994500229073, "learning_rate": 9.187785707974183e-08, "loss": 0.3529, "step": 16676 }, { "epoch": 0.96, "grad_norm": 0.32266698545901074, "learning_rate": 9.162634988696184e-08, "loss": 0.2438, "step": 16677 }, { "epoch": 0.96, "grad_norm": 0.5167703108082058, "learning_rate": 9.137518582073345e-08, "loss": 0.3471, "step": 16678 }, { "epoch": 0.96, "grad_norm": 0.4186625502370127, "learning_rate": 9.112436488975751e-08, "loss": 0.2912, "step": 16679 }, { "epoch": 0.96, "grad_norm": 0.3821134635943973, "learning_rate": 9.087388710271927e-08, "loss": 0.2757, "step": 16680 }, { "epoch": 0.96, "grad_norm": 0.25700002896802115, "learning_rate": 9.06237524682918e-08, "loss": 0.1689, "step": 16681 }, { "epoch": 0.96, "grad_norm": 0.3561604016730922, "learning_rate": 9.037396099513707e-08, "loss": 0.2703, "step": 16682 }, { "epoch": 0.96, "grad_norm": 0.39261394030257535, "learning_rate": 9.012451269190592e-08, "loss": 0.2591, "step": 16683 }, { "epoch": 0.96, "grad_norm": 0.6363852734792065, "learning_rate": 8.987540756723811e-08, "loss": 0.3443, "step": 16684 }, { "epoch": 0.96, "grad_norm": 1.3103970018735047, "learning_rate": 8.962664562975676e-08, "loss": 0.3247, "step": 16685 }, { "epoch": 0.96, "grad_norm": 0.3028875377252202, "learning_rate": 8.93782268880794e-08, "loss": 0.2357, "step": 16686 }, { "epoch": 0.96, "grad_norm": 0.2752372436372155, "learning_rate": 8.913015135080805e-08, "loss": 0.2525, "step": 16687 }, { "epoch": 0.96, "grad_norm": 0.36823697113929454, "learning_rate": 8.888241902653361e-08, "loss": 0.1942, "step": 16688 }, { "epoch": 0.96, "grad_norm": 0.4023075320401017, "learning_rate": 8.863502992383477e-08, "loss": 0.2551, "step": 16689 }, { "epoch": 0.96, "grad_norm": 0.2965441139243796, "learning_rate": 8.838798405127802e-08, "loss": 0.2671, "step": 16690 }, { "epoch": 0.96, "grad_norm": 1.325159274198456, "learning_rate": 8.814128141741984e-08, "loss": 0.6663, "step": 16691 }, { "epoch": 0.96, "grad_norm": 0.2863213468271409, "learning_rate": 8.78949220308023e-08, "loss": 0.2085, "step": 16692 }, { "epoch": 0.96, "grad_norm": 0.7635493682524934, "learning_rate": 8.764890589995745e-08, "loss": 0.4148, "step": 16693 }, { "epoch": 0.96, "grad_norm": 0.2738614036932938, "learning_rate": 8.740323303340514e-08, "loss": 0.2462, "step": 16694 }, { "epoch": 0.96, "grad_norm": 0.26102284400214015, "learning_rate": 8.715790343965192e-08, "loss": 0.1567, "step": 16695 }, { "epoch": 0.96, "grad_norm": 1.418513087327854, "learning_rate": 8.691291712719541e-08, "loss": 0.4353, "step": 16696 }, { "epoch": 0.96, "grad_norm": 1.1335322604001175, "learning_rate": 8.666827410451772e-08, "loss": 0.7633, "step": 16697 }, { "epoch": 0.96, "grad_norm": 0.24218728721153193, "learning_rate": 8.642397438008987e-08, "loss": 0.2166, "step": 16698 }, { "epoch": 0.96, "grad_norm": 0.44698841899876174, "learning_rate": 8.618001796237507e-08, "loss": 0.3161, "step": 16699 }, { "epoch": 0.96, "grad_norm": 0.8319313930088971, "learning_rate": 8.593640485981991e-08, "loss": 0.194, "step": 16700 }, { "epoch": 0.96, "grad_norm": 0.3124725689478206, "learning_rate": 8.569313508086097e-08, "loss": 0.0728, "step": 16701 }, { "epoch": 0.96, "grad_norm": 0.2663921373594587, "learning_rate": 8.545020863392262e-08, "loss": 0.2698, "step": 16702 }, { "epoch": 0.96, "grad_norm": 1.0964726846573836, "learning_rate": 8.520762552741591e-08, "loss": 0.5473, "step": 16703 }, { "epoch": 0.96, "grad_norm": 0.5929084586700467, "learning_rate": 8.496538576974412e-08, "loss": 0.3214, "step": 16704 }, { "epoch": 0.96, "grad_norm": 0.3420457220417289, "learning_rate": 8.472348936929387e-08, "loss": 0.2463, "step": 16705 }, { "epoch": 0.96, "grad_norm": 0.33912810289817497, "learning_rate": 8.448193633444291e-08, "loss": 0.3064, "step": 16706 }, { "epoch": 0.96, "grad_norm": 0.2234517371912271, "learning_rate": 8.424072667355565e-08, "loss": 0.1544, "step": 16707 }, { "epoch": 0.96, "grad_norm": 0.44621753014878657, "learning_rate": 8.399986039498653e-08, "loss": 0.2155, "step": 16708 }, { "epoch": 0.96, "grad_norm": 0.8496786623009368, "learning_rate": 8.375933750707554e-08, "loss": 0.4511, "step": 16709 }, { "epoch": 0.96, "grad_norm": 0.2951581272073628, "learning_rate": 8.35191580181527e-08, "loss": 0.2546, "step": 16710 }, { "epoch": 0.96, "grad_norm": 0.3656795630471628, "learning_rate": 8.327932193653355e-08, "loss": 0.2506, "step": 16711 }, { "epoch": 0.96, "grad_norm": 0.5578558576279988, "learning_rate": 8.30398292705259e-08, "loss": 0.2439, "step": 16712 }, { "epoch": 0.96, "grad_norm": 0.27556519383523753, "learning_rate": 8.280068002842312e-08, "loss": 0.2176, "step": 16713 }, { "epoch": 0.96, "grad_norm": 0.2691015371715588, "learning_rate": 8.256187421850636e-08, "loss": 0.2203, "step": 16714 }, { "epoch": 0.96, "grad_norm": 0.6831366435714303, "learning_rate": 8.232341184904458e-08, "loss": 0.3823, "step": 16715 }, { "epoch": 0.96, "grad_norm": 0.39566843622236897, "learning_rate": 8.20852929282967e-08, "loss": 0.2655, "step": 16716 }, { "epoch": 0.96, "grad_norm": 0.5305390025366589, "learning_rate": 8.184751746450947e-08, "loss": 0.3602, "step": 16717 }, { "epoch": 0.96, "grad_norm": 0.3804937384566211, "learning_rate": 8.16100854659152e-08, "loss": 0.243, "step": 16718 }, { "epoch": 0.96, "grad_norm": 0.24366733203716592, "learning_rate": 8.13729969407373e-08, "loss": 0.1461, "step": 16719 }, { "epoch": 0.96, "grad_norm": 0.3626781267030705, "learning_rate": 8.113625189718588e-08, "loss": 0.2923, "step": 16720 }, { "epoch": 0.96, "grad_norm": 0.3382050876755194, "learning_rate": 8.089985034346104e-08, "loss": 0.2521, "step": 16721 }, { "epoch": 0.96, "grad_norm": 0.6097586807838828, "learning_rate": 8.066379228774624e-08, "loss": 0.3351, "step": 16722 }, { "epoch": 0.96, "grad_norm": 0.34159479554399735, "learning_rate": 8.042807773821826e-08, "loss": 0.285, "step": 16723 }, { "epoch": 0.96, "grad_norm": 1.4091610780001418, "learning_rate": 8.019270670303946e-08, "loss": 0.2663, "step": 16724 }, { "epoch": 0.96, "grad_norm": 0.2633520677569486, "learning_rate": 7.995767919036002e-08, "loss": 0.1632, "step": 16725 }, { "epoch": 0.96, "grad_norm": 0.25363763611530493, "learning_rate": 7.972299520832005e-08, "loss": 0.2677, "step": 16726 }, { "epoch": 0.96, "grad_norm": 1.0371752079589232, "learning_rate": 7.948865476504641e-08, "loss": 0.2723, "step": 16727 }, { "epoch": 0.96, "grad_norm": 0.5501126198181006, "learning_rate": 7.925465786865372e-08, "loss": 0.3554, "step": 16728 }, { "epoch": 0.96, "grad_norm": 0.3783143100194757, "learning_rate": 7.902100452724548e-08, "loss": 0.3148, "step": 16729 }, { "epoch": 0.96, "grad_norm": 0.35404309098921694, "learning_rate": 7.878769474891413e-08, "loss": 0.2972, "step": 16730 }, { "epoch": 0.96, "grad_norm": 0.19637934444938035, "learning_rate": 7.855472854173763e-08, "loss": 0.0779, "step": 16731 }, { "epoch": 0.96, "grad_norm": 0.3869329443831731, "learning_rate": 7.832210591378398e-08, "loss": 0.2814, "step": 16732 }, { "epoch": 0.96, "grad_norm": 0.42170939268755797, "learning_rate": 7.808982687311006e-08, "loss": 0.3055, "step": 16733 }, { "epoch": 0.96, "grad_norm": 0.2777667725765214, "learning_rate": 7.785789142775834e-08, "loss": 0.2124, "step": 16734 }, { "epoch": 0.96, "grad_norm": 0.5345760328975254, "learning_rate": 7.762629958576129e-08, "loss": 0.3851, "step": 16735 }, { "epoch": 0.96, "grad_norm": 1.316711270975337, "learning_rate": 7.739505135513803e-08, "loss": 0.541, "step": 16736 }, { "epoch": 0.96, "grad_norm": 0.2516530707494128, "learning_rate": 7.716414674389771e-08, "loss": 0.148, "step": 16737 }, { "epoch": 0.96, "grad_norm": 0.23169324602476324, "learning_rate": 7.693358576003617e-08, "loss": 0.214, "step": 16738 }, { "epoch": 0.96, "grad_norm": 0.5344331125889435, "learning_rate": 7.670336841153925e-08, "loss": 0.3644, "step": 16739 }, { "epoch": 0.96, "grad_norm": 0.4752035059563894, "learning_rate": 7.64734947063761e-08, "loss": 0.2514, "step": 16740 }, { "epoch": 0.96, "grad_norm": 0.36429165610906206, "learning_rate": 7.624396465251038e-08, "loss": 0.2732, "step": 16741 }, { "epoch": 0.96, "grad_norm": 0.3708596666694886, "learning_rate": 7.601477825788905e-08, "loss": 0.2794, "step": 16742 }, { "epoch": 0.96, "grad_norm": 0.3845077104005944, "learning_rate": 7.57859355304491e-08, "loss": 0.1939, "step": 16743 }, { "epoch": 0.96, "grad_norm": 0.3137435856663455, "learning_rate": 7.55574364781153e-08, "loss": 0.2363, "step": 16744 }, { "epoch": 0.96, "grad_norm": 0.33829037017062646, "learning_rate": 7.532928110880133e-08, "loss": 0.2301, "step": 16745 }, { "epoch": 0.96, "grad_norm": 0.40890395308043054, "learning_rate": 7.510146943040641e-08, "loss": 0.3042, "step": 16746 }, { "epoch": 0.96, "grad_norm": 0.3051775333200117, "learning_rate": 7.487400145082203e-08, "loss": 0.2107, "step": 16747 }, { "epoch": 0.96, "grad_norm": 1.3662893373185263, "learning_rate": 7.464687717792407e-08, "loss": 0.4789, "step": 16748 }, { "epoch": 0.96, "grad_norm": 0.33082857403563304, "learning_rate": 7.442009661957738e-08, "loss": 0.2837, "step": 16749 }, { "epoch": 0.96, "grad_norm": 0.33747596972844696, "learning_rate": 7.41936597836368e-08, "loss": 0.2082, "step": 16750 }, { "epoch": 0.96, "grad_norm": 0.3298350815450901, "learning_rate": 7.396756667794158e-08, "loss": 0.2119, "step": 16751 }, { "epoch": 0.96, "grad_norm": 0.7593553931238317, "learning_rate": 7.374181731032326e-08, "loss": 0.5444, "step": 16752 }, { "epoch": 0.96, "grad_norm": 0.3203570525125074, "learning_rate": 7.35164116885989e-08, "loss": 0.2598, "step": 16753 }, { "epoch": 0.96, "grad_norm": 0.33154457148997163, "learning_rate": 7.329134982057562e-08, "loss": 0.2206, "step": 16754 }, { "epoch": 0.96, "grad_norm": 0.7366957717865998, "learning_rate": 7.306663171404494e-08, "loss": 0.3779, "step": 16755 }, { "epoch": 0.96, "grad_norm": 0.30293447315905003, "learning_rate": 7.284225737678952e-08, "loss": 0.2432, "step": 16756 }, { "epoch": 0.96, "grad_norm": 0.25522354037367, "learning_rate": 7.261822681657982e-08, "loss": 0.1827, "step": 16757 }, { "epoch": 0.96, "grad_norm": 1.388424240051691, "learning_rate": 7.239454004117519e-08, "loss": 0.8719, "step": 16758 }, { "epoch": 0.96, "grad_norm": 0.31357229248114443, "learning_rate": 7.217119705831943e-08, "loss": 0.2698, "step": 16759 }, { "epoch": 0.96, "grad_norm": 0.8776215112811757, "learning_rate": 7.19481978757497e-08, "loss": 0.2509, "step": 16760 }, { "epoch": 0.96, "grad_norm": 0.3567502082271883, "learning_rate": 7.172554250118535e-08, "loss": 0.3052, "step": 16761 }, { "epoch": 0.96, "grad_norm": 0.3012814796438905, "learning_rate": 7.150323094233912e-08, "loss": 0.2416, "step": 16762 }, { "epoch": 0.96, "grad_norm": 0.44014096773481015, "learning_rate": 7.128126320690931e-08, "loss": 0.0926, "step": 16763 }, { "epoch": 0.96, "grad_norm": 0.7301742900613, "learning_rate": 7.105963930258308e-08, "loss": 0.4903, "step": 16764 }, { "epoch": 0.96, "grad_norm": 0.22739964947646552, "learning_rate": 7.083835923703319e-08, "loss": 0.2198, "step": 16765 }, { "epoch": 0.96, "grad_norm": 0.4968594490313015, "learning_rate": 7.061742301792462e-08, "loss": 0.3535, "step": 16766 }, { "epoch": 0.96, "grad_norm": 0.49300324716515354, "learning_rate": 7.039683065290792e-08, "loss": 0.2496, "step": 16767 }, { "epoch": 0.96, "grad_norm": 0.3440836293024113, "learning_rate": 7.017658214962142e-08, "loss": 0.2844, "step": 16768 }, { "epoch": 0.96, "grad_norm": 0.7243014548865391, "learning_rate": 6.995667751569346e-08, "loss": 0.3251, "step": 16769 }, { "epoch": 0.96, "grad_norm": 0.3702799611297618, "learning_rate": 6.973711675873795e-08, "loss": 0.2672, "step": 16770 }, { "epoch": 0.96, "grad_norm": 0.37034677737138777, "learning_rate": 6.951789988635992e-08, "loss": 0.2754, "step": 16771 }, { "epoch": 0.96, "grad_norm": 0.33444108241821474, "learning_rate": 6.929902690614998e-08, "loss": 0.2079, "step": 16772 }, { "epoch": 0.96, "grad_norm": 0.3614151509687135, "learning_rate": 6.90804978256876e-08, "loss": 0.2578, "step": 16773 }, { "epoch": 0.96, "grad_norm": 0.34113505165220437, "learning_rate": 6.886231265254007e-08, "loss": 0.2359, "step": 16774 }, { "epoch": 0.96, "grad_norm": 1.2018390028919796, "learning_rate": 6.864447139426356e-08, "loss": 0.5018, "step": 16775 }, { "epoch": 0.96, "grad_norm": 0.3092635740058215, "learning_rate": 6.842697405840204e-08, "loss": 0.1559, "step": 16776 }, { "epoch": 0.96, "grad_norm": 0.2583079412979954, "learning_rate": 6.820982065248837e-08, "loss": 0.252, "step": 16777 }, { "epoch": 0.96, "grad_norm": 0.3110707555011408, "learning_rate": 6.79930111840399e-08, "loss": 0.2555, "step": 16778 }, { "epoch": 0.96, "grad_norm": 0.9741701424650024, "learning_rate": 6.777654566056724e-08, "loss": 0.3839, "step": 16779 }, { "epoch": 0.96, "grad_norm": 0.3323721668501855, "learning_rate": 6.756042408956554e-08, "loss": 0.1876, "step": 16780 }, { "epoch": 0.96, "grad_norm": 0.5357257838410326, "learning_rate": 6.73446464785199e-08, "loss": 0.3502, "step": 16781 }, { "epoch": 0.96, "grad_norm": 0.44370665817522104, "learning_rate": 6.712921283490103e-08, "loss": 0.3634, "step": 16782 }, { "epoch": 0.96, "grad_norm": 0.28311071631336937, "learning_rate": 6.691412316617075e-08, "loss": 0.1912, "step": 16783 }, { "epoch": 0.96, "grad_norm": 0.5741547571259257, "learning_rate": 6.66993774797775e-08, "loss": 0.3838, "step": 16784 }, { "epoch": 0.96, "grad_norm": 0.2227072118964059, "learning_rate": 6.64849757831576e-08, "loss": 0.205, "step": 16785 }, { "epoch": 0.96, "grad_norm": 0.2907201308397229, "learning_rate": 6.627091808373509e-08, "loss": 0.1812, "step": 16786 }, { "epoch": 0.96, "grad_norm": 1.299735719613476, "learning_rate": 6.605720438892515e-08, "loss": 0.7159, "step": 16787 }, { "epoch": 0.96, "grad_norm": 0.4745022016836694, "learning_rate": 6.584383470612631e-08, "loss": 0.3508, "step": 16788 }, { "epoch": 0.96, "grad_norm": 0.2982418095480476, "learning_rate": 6.563080904272712e-08, "loss": 0.1854, "step": 16789 }, { "epoch": 0.96, "grad_norm": 0.3813855226753747, "learning_rate": 6.54181274061072e-08, "loss": 0.3044, "step": 16790 }, { "epoch": 0.96, "grad_norm": 0.32301668216479656, "learning_rate": 6.520578980362957e-08, "loss": 0.1707, "step": 16791 }, { "epoch": 0.96, "grad_norm": 0.41005198828464845, "learning_rate": 6.499379624264834e-08, "loss": 0.2734, "step": 16792 }, { "epoch": 0.96, "grad_norm": 0.277449872251276, "learning_rate": 6.478214673050542e-08, "loss": 0.2324, "step": 16793 }, { "epoch": 0.96, "grad_norm": 1.2249416877156012, "learning_rate": 6.45708412745294e-08, "loss": 0.7376, "step": 16794 }, { "epoch": 0.96, "grad_norm": 0.3198672617571951, "learning_rate": 6.435987988203662e-08, "loss": 0.241, "step": 16795 }, { "epoch": 0.97, "grad_norm": 0.6394970517054889, "learning_rate": 6.414926256033461e-08, "loss": 0.2757, "step": 16796 }, { "epoch": 0.97, "grad_norm": 0.22601691033016935, "learning_rate": 6.393898931671749e-08, "loss": 0.2039, "step": 16797 }, { "epoch": 0.97, "grad_norm": 0.33681906182926935, "learning_rate": 6.372906015846502e-08, "loss": 0.2788, "step": 16798 }, { "epoch": 0.97, "grad_norm": 1.303947428546528, "learning_rate": 6.351947509284695e-08, "loss": 0.2012, "step": 16799 }, { "epoch": 0.97, "grad_norm": 0.44996544450292303, "learning_rate": 6.331023412712411e-08, "loss": 0.3338, "step": 16800 }, { "epoch": 0.97, "grad_norm": 0.2537708506596748, "learning_rate": 6.310133726853962e-08, "loss": 0.23, "step": 16801 }, { "epoch": 0.97, "grad_norm": 0.8024519107979677, "learning_rate": 6.289278452432768e-08, "loss": 0.3105, "step": 16802 }, { "epoch": 0.97, "grad_norm": 0.3602472486183106, "learning_rate": 6.268457590171251e-08, "loss": 0.1993, "step": 16803 }, { "epoch": 0.97, "grad_norm": 0.5559272095999094, "learning_rate": 6.247671140790279e-08, "loss": 0.2386, "step": 16804 }, { "epoch": 0.97, "grad_norm": 0.2690640064334697, "learning_rate": 6.226919105009721e-08, "loss": 0.269, "step": 16805 }, { "epoch": 0.97, "grad_norm": 0.5439058823170019, "learning_rate": 6.206201483548224e-08, "loss": 0.3134, "step": 16806 }, { "epoch": 0.97, "grad_norm": 0.4256086949463779, "learning_rate": 6.185518277123215e-08, "loss": 0.2782, "step": 16807 }, { "epoch": 0.97, "grad_norm": 0.5052015564502045, "learning_rate": 6.164869486451008e-08, "loss": 0.3196, "step": 16808 }, { "epoch": 0.97, "grad_norm": 0.20772748215582018, "learning_rate": 6.144255112246589e-08, "loss": 0.1723, "step": 16809 }, { "epoch": 0.97, "grad_norm": 0.40896993870319204, "learning_rate": 6.12367515522394e-08, "loss": 0.3089, "step": 16810 }, { "epoch": 0.97, "grad_norm": 0.5635204637734937, "learning_rate": 6.103129616095605e-08, "loss": 0.3505, "step": 16811 }, { "epoch": 0.97, "grad_norm": 0.4005554484110214, "learning_rate": 6.082618495573234e-08, "loss": 0.2544, "step": 16812 }, { "epoch": 0.97, "grad_norm": 0.33308458781620476, "learning_rate": 6.062141794366927e-08, "loss": 0.2608, "step": 16813 }, { "epoch": 0.97, "grad_norm": 0.66920581948784, "learning_rate": 6.041699513186005e-08, "loss": 0.4106, "step": 16814 }, { "epoch": 0.97, "grad_norm": 0.32580508384371565, "learning_rate": 6.021291652738348e-08, "loss": 0.09, "step": 16815 }, { "epoch": 0.97, "grad_norm": 0.3156954468518769, "learning_rate": 6.0009182137305e-08, "loss": 0.241, "step": 16816 }, { "epoch": 0.97, "grad_norm": 0.35874233754083895, "learning_rate": 5.98057919686823e-08, "loss": 0.3018, "step": 16817 }, { "epoch": 0.97, "grad_norm": 0.5415758946024039, "learning_rate": 5.9602746028556425e-08, "loss": 0.3908, "step": 16818 }, { "epoch": 0.97, "grad_norm": 0.32112130011980533, "learning_rate": 5.9400044323960625e-08, "loss": 0.2137, "step": 16819 }, { "epoch": 0.97, "grad_norm": 1.3192494544105202, "learning_rate": 5.919768686191263e-08, "loss": 0.6982, "step": 16820 }, { "epoch": 0.97, "grad_norm": 0.27660356577726986, "learning_rate": 5.8995673649422383e-08, "loss": 0.2368, "step": 16821 }, { "epoch": 0.97, "grad_norm": 0.31310707140945643, "learning_rate": 5.879400469348429e-08, "loss": 0.0847, "step": 16822 }, { "epoch": 0.97, "grad_norm": 0.38135073290776306, "learning_rate": 5.859268000108276e-08, "loss": 0.3013, "step": 16823 }, { "epoch": 0.97, "grad_norm": 0.3411340438737624, "learning_rate": 5.8391699579188885e-08, "loss": 0.2999, "step": 16824 }, { "epoch": 0.97, "grad_norm": 0.3468521220123775, "learning_rate": 5.819106343476266e-08, "loss": 0.1525, "step": 16825 }, { "epoch": 0.97, "grad_norm": 0.38139159913608844, "learning_rate": 5.799077157475297e-08, "loss": 0.3019, "step": 16826 }, { "epoch": 0.97, "grad_norm": 0.35172703587555404, "learning_rate": 5.779082400609426e-08, "loss": 0.1948, "step": 16827 }, { "epoch": 0.97, "grad_norm": 0.3715864195679574, "learning_rate": 5.7591220735712105e-08, "loss": 0.1732, "step": 16828 }, { "epoch": 0.97, "grad_norm": 0.2882289558296544, "learning_rate": 5.7391961770519865e-08, "loss": 0.3003, "step": 16829 }, { "epoch": 0.97, "grad_norm": 0.8134034249660722, "learning_rate": 5.7193047117415356e-08, "loss": 0.4185, "step": 16830 }, { "epoch": 0.97, "grad_norm": 0.5514544419384692, "learning_rate": 5.699447678328751e-08, "loss": 0.373, "step": 16831 }, { "epoch": 0.97, "grad_norm": 0.29257468097948075, "learning_rate": 5.6796250775014164e-08, "loss": 0.2282, "step": 16832 }, { "epoch": 0.97, "grad_norm": 0.5000178068678629, "learning_rate": 5.6598369099458705e-08, "loss": 0.2971, "step": 16833 }, { "epoch": 0.97, "grad_norm": 0.22716384655094343, "learning_rate": 5.640083176347455e-08, "loss": 0.1603, "step": 16834 }, { "epoch": 0.97, "grad_norm": 0.3518311840836283, "learning_rate": 5.620363877390178e-08, "loss": 0.2388, "step": 16835 }, { "epoch": 0.97, "grad_norm": 0.45494879435889785, "learning_rate": 5.600679013756938e-08, "loss": 0.3139, "step": 16836 }, { "epoch": 0.97, "grad_norm": 0.31631780442761426, "learning_rate": 5.581028586129411e-08, "loss": 0.2677, "step": 16837 }, { "epoch": 0.97, "grad_norm": 0.5723125886588851, "learning_rate": 5.561412595188165e-08, "loss": 0.2285, "step": 16838 }, { "epoch": 0.97, "grad_norm": 0.9015022205745714, "learning_rate": 5.541831041612322e-08, "loss": 0.4787, "step": 16839 }, { "epoch": 0.97, "grad_norm": 0.37034387207268293, "learning_rate": 5.5222839260802294e-08, "loss": 0.2729, "step": 16840 }, { "epoch": 0.97, "grad_norm": 0.30520368304807893, "learning_rate": 5.502771249268568e-08, "loss": 0.2811, "step": 16841 }, { "epoch": 0.97, "grad_norm": 0.2898764190409881, "learning_rate": 5.483293011853241e-08, "loss": 0.145, "step": 16842 }, { "epoch": 0.97, "grad_norm": 0.756039605785083, "learning_rate": 5.4638492145087096e-08, "loss": 0.3867, "step": 16843 }, { "epoch": 0.97, "grad_norm": 0.3807809778880975, "learning_rate": 5.4444398579083235e-08, "loss": 0.2543, "step": 16844 }, { "epoch": 0.97, "grad_norm": 0.37486671787998804, "learning_rate": 5.425064942724212e-08, "loss": 0.2385, "step": 16845 }, { "epoch": 0.97, "grad_norm": 0.596115630845532, "learning_rate": 5.4057244696272826e-08, "loss": 0.3478, "step": 16846 }, { "epoch": 0.97, "grad_norm": 0.36027385165231446, "learning_rate": 5.386418439287444e-08, "loss": 0.2733, "step": 16847 }, { "epoch": 0.97, "grad_norm": 0.26856822778458994, "learning_rate": 5.3671468523731617e-08, "loss": 0.1785, "step": 16848 }, { "epoch": 0.97, "grad_norm": 0.37989515893189935, "learning_rate": 5.3479097095516795e-08, "loss": 0.3023, "step": 16849 }, { "epoch": 0.97, "grad_norm": 0.2918242952616663, "learning_rate": 5.328707011489465e-08, "loss": 0.2222, "step": 16850 }, { "epoch": 0.97, "grad_norm": 0.7682960082736956, "learning_rate": 5.3095387588512074e-08, "loss": 0.2587, "step": 16851 }, { "epoch": 0.97, "grad_norm": 0.36395943813381715, "learning_rate": 5.2904049523009315e-08, "loss": 0.2663, "step": 16852 }, { "epoch": 0.97, "grad_norm": 0.38743575817296055, "learning_rate": 5.271305592501108e-08, "loss": 0.3176, "step": 16853 }, { "epoch": 0.97, "grad_norm": 0.417320125421086, "learning_rate": 5.252240680113319e-08, "loss": 0.2276, "step": 16854 }, { "epoch": 0.97, "grad_norm": 0.22973767123986893, "learning_rate": 5.233210215797591e-08, "loss": 0.104, "step": 16855 }, { "epoch": 0.97, "grad_norm": 0.3841946691455176, "learning_rate": 5.2142142002129524e-08, "loss": 0.3001, "step": 16856 }, { "epoch": 0.97, "grad_norm": 0.34093585258614106, "learning_rate": 5.195252634017434e-08, "loss": 0.2781, "step": 16857 }, { "epoch": 0.97, "grad_norm": 0.5523198990332471, "learning_rate": 5.1763255178673974e-08, "loss": 0.2075, "step": 16858 }, { "epoch": 0.97, "grad_norm": 0.4296501087072229, "learning_rate": 5.1574328524184316e-08, "loss": 0.2917, "step": 16859 }, { "epoch": 0.97, "grad_norm": 0.47266839506660946, "learning_rate": 5.1385746383249e-08, "loss": 0.32, "step": 16860 }, { "epoch": 0.97, "grad_norm": 0.21272369188801774, "learning_rate": 5.1197508762397265e-08, "loss": 0.1139, "step": 16861 }, { "epoch": 0.97, "grad_norm": 0.2604253939433585, "learning_rate": 5.1009615668147217e-08, "loss": 0.2097, "step": 16862 }, { "epoch": 0.97, "grad_norm": 0.610360799421692, "learning_rate": 5.082206710700699e-08, "loss": 0.3483, "step": 16863 }, { "epoch": 0.97, "grad_norm": 0.295175344003875, "learning_rate": 5.063486308547028e-08, "loss": 0.2115, "step": 16864 }, { "epoch": 0.97, "grad_norm": 0.33680233097645323, "learning_rate": 5.044800361002078e-08, "loss": 0.2811, "step": 16865 }, { "epoch": 0.97, "grad_norm": 1.372302629109178, "learning_rate": 5.0261488687128876e-08, "loss": 0.62, "step": 16866 }, { "epoch": 0.97, "grad_norm": 0.24720787872927993, "learning_rate": 5.007531832325385e-08, "loss": 0.1813, "step": 16867 }, { "epoch": 0.97, "grad_norm": 0.2469605731267985, "learning_rate": 4.9889492524842766e-08, "loss": 0.2309, "step": 16868 }, { "epoch": 0.97, "grad_norm": 0.5687789725130987, "learning_rate": 4.970401129833047e-08, "loss": 0.3839, "step": 16869 }, { "epoch": 0.97, "grad_norm": 0.7360556561940557, "learning_rate": 4.9518874650139604e-08, "loss": 0.3072, "step": 16870 }, { "epoch": 0.97, "grad_norm": 0.36413256453148496, "learning_rate": 4.933408258668393e-08, "loss": 0.2074, "step": 16871 }, { "epoch": 0.97, "grad_norm": 0.33474022926915215, "learning_rate": 4.914963511436055e-08, "loss": 0.2959, "step": 16872 }, { "epoch": 0.97, "grad_norm": 0.3531737978180413, "learning_rate": 4.896553223955658e-08, "loss": 0.2193, "step": 16873 }, { "epoch": 0.97, "grad_norm": 0.3508589055917248, "learning_rate": 4.878177396864914e-08, "loss": 0.2221, "step": 16874 }, { "epoch": 0.97, "grad_norm": 0.42703325762760735, "learning_rate": 4.859836030800091e-08, "loss": 0.261, "step": 16875 }, { "epoch": 0.97, "grad_norm": 0.29443074994484736, "learning_rate": 4.8415291263962383e-08, "loss": 0.2574, "step": 16876 }, { "epoch": 0.97, "grad_norm": 0.37489421083434077, "learning_rate": 4.823256684287625e-08, "loss": 0.2146, "step": 16877 }, { "epoch": 0.97, "grad_norm": 1.1846274794014118, "learning_rate": 4.805018705106745e-08, "loss": 0.6589, "step": 16878 }, { "epoch": 0.97, "grad_norm": 0.9644830863353985, "learning_rate": 4.7868151894852054e-08, "loss": 0.3864, "step": 16879 }, { "epoch": 0.97, "grad_norm": 0.24407539568102385, "learning_rate": 4.768646138053501e-08, "loss": 0.2382, "step": 16880 }, { "epoch": 0.97, "grad_norm": 0.2693149381967699, "learning_rate": 4.750511551440906e-08, "loss": 0.189, "step": 16881 }, { "epoch": 0.97, "grad_norm": 1.318880587886877, "learning_rate": 4.732411430275141e-08, "loss": 0.455, "step": 16882 }, { "epoch": 0.97, "grad_norm": 0.32987152777098505, "learning_rate": 4.71434577518326e-08, "loss": 0.2373, "step": 16883 }, { "epoch": 0.97, "grad_norm": 0.33653567396866907, "learning_rate": 4.696314586790762e-08, "loss": 0.2356, "step": 16884 }, { "epoch": 0.97, "grad_norm": 0.6980998648832283, "learning_rate": 4.6783178657221486e-08, "loss": 0.4677, "step": 16885 }, { "epoch": 0.97, "grad_norm": 0.32680210012665406, "learning_rate": 4.6603556126004756e-08, "loss": 0.2597, "step": 16886 }, { "epoch": 0.97, "grad_norm": 0.23594603800560673, "learning_rate": 4.642427828047913e-08, "loss": 0.1121, "step": 16887 }, { "epoch": 0.97, "grad_norm": 0.32282382268927773, "learning_rate": 4.624534512685297e-08, "loss": 0.2613, "step": 16888 }, { "epoch": 0.97, "grad_norm": 0.3497080476951133, "learning_rate": 4.6066756671322434e-08, "loss": 0.2655, "step": 16889 }, { "epoch": 0.97, "grad_norm": 0.9983668851762743, "learning_rate": 4.588851292007257e-08, "loss": 0.5257, "step": 16890 }, { "epoch": 0.97, "grad_norm": 0.40613672708852766, "learning_rate": 4.5710613879275115e-08, "loss": 0.2886, "step": 16891 }, { "epoch": 0.97, "grad_norm": 0.3492455486029241, "learning_rate": 4.5533059555090684e-08, "loss": 0.2554, "step": 16892 }, { "epoch": 0.97, "grad_norm": 0.4983795945378701, "learning_rate": 4.535584995366882e-08, "loss": 0.3268, "step": 16893 }, { "epoch": 0.97, "grad_norm": 0.2967937991637615, "learning_rate": 4.517898508114571e-08, "loss": 0.0822, "step": 16894 }, { "epoch": 0.97, "grad_norm": 0.3401773758082715, "learning_rate": 4.500246494364535e-08, "loss": 0.2568, "step": 16895 }, { "epoch": 0.97, "grad_norm": 0.33076498271613985, "learning_rate": 4.482628954728285e-08, "loss": 0.3311, "step": 16896 }, { "epoch": 0.97, "grad_norm": 0.6425786521864342, "learning_rate": 4.465045889815778e-08, "loss": 0.3003, "step": 16897 }, { "epoch": 0.97, "grad_norm": 0.2972202986795993, "learning_rate": 4.447497300235859e-08, "loss": 0.2615, "step": 16898 }, { "epoch": 0.97, "grad_norm": 0.3100186032119291, "learning_rate": 4.4299831865962653e-08, "loss": 0.1789, "step": 16899 }, { "epoch": 0.97, "grad_norm": 0.3280509719318347, "learning_rate": 4.412503549503622e-08, "loss": 0.2357, "step": 16900 }, { "epoch": 0.97, "grad_norm": 0.33145619269969046, "learning_rate": 4.3950583895631116e-08, "loss": 0.2536, "step": 16901 }, { "epoch": 0.97, "grad_norm": 0.7463227424917745, "learning_rate": 4.377647707379029e-08, "loss": 0.5095, "step": 16902 }, { "epoch": 0.97, "grad_norm": 0.31520197157597846, "learning_rate": 4.360271503554114e-08, "loss": 0.2509, "step": 16903 }, { "epoch": 0.97, "grad_norm": 0.33216346525271806, "learning_rate": 4.342929778690108e-08, "loss": 0.2673, "step": 16904 }, { "epoch": 0.97, "grad_norm": 1.4729941097580517, "learning_rate": 4.325622533387752e-08, "loss": 0.5392, "step": 16905 }, { "epoch": 0.97, "grad_norm": 0.3713416552428508, "learning_rate": 4.308349768246234e-08, "loss": 0.1466, "step": 16906 }, { "epoch": 0.97, "grad_norm": 0.28593857290381, "learning_rate": 4.291111483863741e-08, "loss": 0.1936, "step": 16907 }, { "epoch": 0.97, "grad_norm": 0.34487575236955365, "learning_rate": 4.273907680837241e-08, "loss": 0.2875, "step": 16908 }, { "epoch": 0.97, "grad_norm": 0.5930762811522095, "learning_rate": 4.2567383597624804e-08, "loss": 0.394, "step": 16909 }, { "epoch": 0.97, "grad_norm": 0.2870976878198615, "learning_rate": 4.239603521234092e-08, "loss": 0.188, "step": 16910 }, { "epoch": 0.97, "grad_norm": 1.5038426691479727, "learning_rate": 4.2225031658453816e-08, "loss": 0.6532, "step": 16911 }, { "epoch": 0.97, "grad_norm": 0.22444525476756616, "learning_rate": 4.205437294188541e-08, "loss": 0.2015, "step": 16912 }, { "epoch": 0.97, "grad_norm": 0.2935048318220947, "learning_rate": 4.1884059068546534e-08, "loss": 0.1828, "step": 16913 }, { "epoch": 0.97, "grad_norm": 0.5488045078725402, "learning_rate": 4.171409004433358e-08, "loss": 0.3798, "step": 16914 }, { "epoch": 0.97, "grad_norm": 0.887381775060886, "learning_rate": 4.154446587513406e-08, "loss": 0.5585, "step": 16915 }, { "epoch": 0.97, "grad_norm": 0.2382508451881509, "learning_rate": 4.137518656682216e-08, "loss": 0.2414, "step": 16916 }, { "epoch": 0.97, "grad_norm": 0.6288484829732698, "learning_rate": 4.120625212525875e-08, "loss": 0.2158, "step": 16917 }, { "epoch": 0.97, "grad_norm": 0.2514810105291575, "learning_rate": 4.10376625562936e-08, "loss": 0.1322, "step": 16918 }, { "epoch": 0.97, "grad_norm": 0.4334570249701634, "learning_rate": 4.086941786576759e-08, "loss": 0.2788, "step": 16919 }, { "epoch": 0.97, "grad_norm": 0.2745662783710371, "learning_rate": 4.070151805950384e-08, "loss": 0.2448, "step": 16920 }, { "epoch": 0.97, "grad_norm": 0.8311620037180578, "learning_rate": 4.053396314331881e-08, "loss": 0.5275, "step": 16921 }, { "epoch": 0.97, "grad_norm": 0.3124622685734896, "learning_rate": 4.0366753123014526e-08, "loss": 0.2508, "step": 16922 }, { "epoch": 0.97, "grad_norm": 1.9206862580118762, "learning_rate": 4.0199888004381907e-08, "loss": 0.2277, "step": 16923 }, { "epoch": 0.97, "grad_norm": 0.22491161238978494, "learning_rate": 4.003336779319855e-08, "loss": 0.2061, "step": 16924 }, { "epoch": 0.97, "grad_norm": 0.3165803242891882, "learning_rate": 3.9867192495230965e-08, "loss": 0.2662, "step": 16925 }, { "epoch": 0.97, "grad_norm": 0.6313708675545323, "learning_rate": 3.970136211623343e-08, "loss": 0.2733, "step": 16926 }, { "epoch": 0.97, "grad_norm": 0.3125624883391193, "learning_rate": 3.9535876661951356e-08, "loss": 0.3, "step": 16927 }, { "epoch": 0.97, "grad_norm": 0.5088215981290167, "learning_rate": 3.937073613811237e-08, "loss": 0.2573, "step": 16928 }, { "epoch": 0.97, "grad_norm": 0.422677352010954, "learning_rate": 3.920594055043636e-08, "loss": 0.2661, "step": 16929 }, { "epoch": 0.97, "grad_norm": 0.32130140929879897, "learning_rate": 3.9041489904629857e-08, "loss": 0.1425, "step": 16930 }, { "epoch": 0.97, "grad_norm": 0.4239268555816805, "learning_rate": 3.8877384206389426e-08, "loss": 0.3111, "step": 16931 }, { "epoch": 0.97, "grad_norm": 0.29273897539680954, "learning_rate": 3.8713623461396066e-08, "loss": 0.2897, "step": 16932 }, { "epoch": 0.97, "grad_norm": 1.7208118477900014, "learning_rate": 3.855020767532191e-08, "loss": 0.3823, "step": 16933 }, { "epoch": 0.97, "grad_norm": 0.39365836561310663, "learning_rate": 3.8387136853825776e-08, "loss": 0.2538, "step": 16934 }, { "epoch": 0.97, "grad_norm": 0.3864142535331856, "learning_rate": 3.822441100255425e-08, "loss": 0.2928, "step": 16935 }, { "epoch": 0.97, "grad_norm": 0.4594334507085826, "learning_rate": 3.806203012714394e-08, "loss": 0.2365, "step": 16936 }, { "epoch": 0.97, "grad_norm": 0.36622375232538923, "learning_rate": 3.7899994233216996e-08, "loss": 0.2491, "step": 16937 }, { "epoch": 0.97, "grad_norm": 0.2574689612287729, "learning_rate": 3.77383033263834e-08, "loss": 0.2046, "step": 16938 }, { "epoch": 0.97, "grad_norm": 0.32583886214228214, "learning_rate": 3.757695741224532e-08, "loss": 0.2641, "step": 16939 }, { "epoch": 0.97, "grad_norm": 0.30582437989842254, "learning_rate": 3.7415956496388295e-08, "loss": 0.2625, "step": 16940 }, { "epoch": 0.97, "grad_norm": 1.272482518357978, "learning_rate": 3.7255300584388976e-08, "loss": 0.6096, "step": 16941 }, { "epoch": 0.97, "grad_norm": 0.6476407086373608, "learning_rate": 3.709498968180958e-08, "loss": 0.371, "step": 16942 }, { "epoch": 0.97, "grad_norm": 0.24076540832073126, "learning_rate": 3.693502379420233e-08, "loss": 0.1992, "step": 16943 }, { "epoch": 0.97, "grad_norm": 0.47951685652698056, "learning_rate": 3.677540292710724e-08, "loss": 0.3247, "step": 16944 }, { "epoch": 0.97, "grad_norm": 0.434123742526139, "learning_rate": 3.6616127086051e-08, "loss": 0.3129, "step": 16945 }, { "epoch": 0.97, "grad_norm": 0.1810864281589373, "learning_rate": 3.64571962765492e-08, "loss": 0.0715, "step": 16946 }, { "epoch": 0.97, "grad_norm": 0.32436565764474184, "learning_rate": 3.629861050410743e-08, "loss": 0.2812, "step": 16947 }, { "epoch": 0.97, "grad_norm": 0.4181731715703347, "learning_rate": 3.6140369774215755e-08, "loss": 0.3126, "step": 16948 }, { "epoch": 0.97, "grad_norm": 0.5866495465077649, "learning_rate": 3.5982474092355334e-08, "loss": 0.1783, "step": 16949 }, { "epoch": 0.97, "grad_norm": 0.28246328115634156, "learning_rate": 3.58249234639918e-08, "loss": 0.2231, "step": 16950 }, { "epoch": 0.97, "grad_norm": 0.3389229225390919, "learning_rate": 3.566771789458412e-08, "loss": 0.3261, "step": 16951 }, { "epoch": 0.97, "grad_norm": 0.21439535606761287, "learning_rate": 3.55108573895746e-08, "loss": 0.1327, "step": 16952 }, { "epoch": 0.97, "grad_norm": 0.5498107837423801, "learning_rate": 3.535434195439558e-08, "loss": 0.3353, "step": 16953 }, { "epoch": 0.97, "grad_norm": 0.6198576731338329, "learning_rate": 3.5198171594467145e-08, "loss": 0.3519, "step": 16954 }, { "epoch": 0.97, "grad_norm": 0.38369011378607276, "learning_rate": 3.504234631519721e-08, "loss": 0.2421, "step": 16955 }, { "epoch": 0.97, "grad_norm": 0.2903915426775922, "learning_rate": 3.4886866121982555e-08, "loss": 0.2236, "step": 16956 }, { "epoch": 0.97, "grad_norm": 1.078379182005405, "learning_rate": 3.473173102020666e-08, "loss": 0.7698, "step": 16957 }, { "epoch": 0.97, "grad_norm": 0.2756133154465786, "learning_rate": 3.4576941015243003e-08, "loss": 0.1562, "step": 16958 }, { "epoch": 0.97, "grad_norm": 0.27127846778563536, "learning_rate": 3.4422496112451745e-08, "loss": 0.2316, "step": 16959 }, { "epoch": 0.97, "grad_norm": 0.40386928409495443, "learning_rate": 3.426839631718082e-08, "loss": 0.3061, "step": 16960 }, { "epoch": 0.97, "grad_norm": 0.4168805787576852, "learning_rate": 3.411464163476597e-08, "loss": 0.2353, "step": 16961 }, { "epoch": 0.97, "grad_norm": 0.47659370916054206, "learning_rate": 3.3961232070532927e-08, "loss": 0.2327, "step": 16962 }, { "epoch": 0.97, "grad_norm": 0.34162007685684154, "learning_rate": 3.380816762979411e-08, "loss": 0.3236, "step": 16963 }, { "epoch": 0.97, "grad_norm": 0.2637327032439912, "learning_rate": 3.3655448317849725e-08, "loss": 0.1839, "step": 16964 }, { "epoch": 0.97, "grad_norm": 0.4139189954002851, "learning_rate": 3.350307413998888e-08, "loss": 0.2518, "step": 16965 }, { "epoch": 0.97, "grad_norm": 0.6670530591028055, "learning_rate": 3.335104510148734e-08, "loss": 0.3724, "step": 16966 }, { "epoch": 0.97, "grad_norm": 0.31514853852025554, "learning_rate": 3.3199361207610916e-08, "loss": 0.2825, "step": 16967 }, { "epoch": 0.97, "grad_norm": 0.39693340246090936, "learning_rate": 3.304802246361205e-08, "loss": 0.2747, "step": 16968 }, { "epoch": 0.97, "grad_norm": 0.937981789113555, "learning_rate": 3.2897028874731006e-08, "loss": 0.5085, "step": 16969 }, { "epoch": 0.98, "grad_norm": 0.35797398099669914, "learning_rate": 3.274638044619805e-08, "loss": 0.2483, "step": 16970 }, { "epoch": 0.98, "grad_norm": 0.24761901524686186, "learning_rate": 3.2596077183228993e-08, "loss": 0.2691, "step": 16971 }, { "epoch": 0.98, "grad_norm": 0.4298641626817817, "learning_rate": 3.244611909102857e-08, "loss": 0.1868, "step": 16972 }, { "epoch": 0.98, "grad_norm": 0.6401200224266734, "learning_rate": 3.229650617479152e-08, "loss": 0.3064, "step": 16973 }, { "epoch": 0.98, "grad_norm": 0.3937630678490877, "learning_rate": 3.2147238439697026e-08, "loss": 0.3115, "step": 16974 }, { "epoch": 0.98, "grad_norm": 0.31684257741207245, "learning_rate": 3.19983158909154e-08, "loss": 0.248, "step": 16975 }, { "epoch": 0.98, "grad_norm": 0.47739044032650685, "learning_rate": 3.1849738533603625e-08, "loss": 0.273, "step": 16976 }, { "epoch": 0.98, "grad_norm": 0.4075103274369792, "learning_rate": 3.1701506372906476e-08, "loss": 0.2934, "step": 16977 }, { "epoch": 0.98, "grad_norm": 0.3212448915171606, "learning_rate": 3.155361941395763e-08, "loss": 0.1268, "step": 16978 }, { "epoch": 0.98, "grad_norm": 0.32874130733070184, "learning_rate": 3.140607766187853e-08, "loss": 0.251, "step": 16979 }, { "epoch": 0.98, "grad_norm": 0.39753759795242666, "learning_rate": 3.125888112177733e-08, "loss": 0.3094, "step": 16980 }, { "epoch": 0.98, "grad_norm": 0.8441004038029325, "learning_rate": 3.1112029798753274e-08, "loss": 0.5548, "step": 16981 }, { "epoch": 0.98, "grad_norm": 0.5623939560608282, "learning_rate": 3.096552369789119e-08, "loss": 0.2477, "step": 16982 }, { "epoch": 0.98, "grad_norm": 0.24903041506543536, "learning_rate": 3.081936282426368e-08, "loss": 0.2619, "step": 16983 }, { "epoch": 0.98, "grad_norm": 0.29800042452218295, "learning_rate": 3.067354718293336e-08, "loss": 0.1459, "step": 16984 }, { "epoch": 0.98, "grad_norm": 0.8593406624322196, "learning_rate": 3.0528076778949536e-08, "loss": 0.1092, "step": 16985 }, { "epoch": 0.98, "grad_norm": 0.34340164329049183, "learning_rate": 3.038295161734928e-08, "loss": 0.2913, "step": 16986 }, { "epoch": 0.98, "grad_norm": 0.32860953048223024, "learning_rate": 3.023817170315857e-08, "loss": 0.2968, "step": 16987 }, { "epoch": 0.98, "grad_norm": 0.6455934770032342, "learning_rate": 3.0093737041392293e-08, "loss": 0.2445, "step": 16988 }, { "epoch": 0.98, "grad_norm": 0.4043626940495485, "learning_rate": 2.994964763704977e-08, "loss": 0.2695, "step": 16989 }, { "epoch": 0.98, "grad_norm": 0.22943688435845902, "learning_rate": 2.980590349512258e-08, "loss": 0.16, "step": 16990 }, { "epoch": 0.98, "grad_norm": 0.3037975887387826, "learning_rate": 2.966250462058895e-08, "loss": 0.2288, "step": 16991 }, { "epoch": 0.98, "grad_norm": 0.3512971318276826, "learning_rate": 2.9519451018413804e-08, "loss": 0.2918, "step": 16992 }, { "epoch": 0.98, "grad_norm": 0.9975814522598434, "learning_rate": 2.9376742693550954e-08, "loss": 0.4212, "step": 16993 }, { "epoch": 0.98, "grad_norm": 0.7888704691744978, "learning_rate": 2.9234379650943113e-08, "loss": 0.3391, "step": 16994 }, { "epoch": 0.98, "grad_norm": 0.2782399609130223, "learning_rate": 2.9092361895519673e-08, "loss": 0.2218, "step": 16995 }, { "epoch": 0.98, "grad_norm": 0.22388047250656798, "learning_rate": 2.895068943219892e-08, "loss": 0.1858, "step": 16996 }, { "epoch": 0.98, "grad_norm": 0.8291268218917041, "learning_rate": 2.880936226588693e-08, "loss": 0.3896, "step": 16997 }, { "epoch": 0.98, "grad_norm": 0.3064483112237091, "learning_rate": 2.866838040147868e-08, "loss": 0.21, "step": 16998 }, { "epoch": 0.98, "grad_norm": 0.3857237505293713, "learning_rate": 2.8527743843854704e-08, "loss": 0.309, "step": 16999 }, { "epoch": 0.98, "grad_norm": 0.8772816244367653, "learning_rate": 2.8387452597886666e-08, "loss": 0.4426, "step": 17000 }, { "epoch": 0.98, "grad_norm": 0.3463263519606244, "learning_rate": 2.824750666843179e-08, "loss": 0.1876, "step": 17001 }, { "epoch": 0.98, "grad_norm": 0.26138233646250353, "learning_rate": 2.810790606033731e-08, "loss": 0.203, "step": 17002 }, { "epoch": 0.98, "grad_norm": 0.5924213734480587, "learning_rate": 2.7968650778438245e-08, "loss": 0.2967, "step": 17003 }, { "epoch": 0.98, "grad_norm": 0.3005423451194237, "learning_rate": 2.7829740827555185e-08, "loss": 0.2472, "step": 17004 }, { "epoch": 0.98, "grad_norm": 0.6290851599106121, "learning_rate": 2.769117621249873e-08, "loss": 0.3087, "step": 17005 }, { "epoch": 0.98, "grad_norm": 0.5028037464387212, "learning_rate": 2.7552956938068364e-08, "loss": 0.366, "step": 17006 }, { "epoch": 0.98, "grad_norm": 0.26322989882195386, "learning_rate": 2.741508300905138e-08, "loss": 0.2482, "step": 17007 }, { "epoch": 0.98, "grad_norm": 1.7885337997270865, "learning_rate": 2.727755443021951e-08, "loss": 0.1453, "step": 17008 }, { "epoch": 0.98, "grad_norm": 0.3236513169203668, "learning_rate": 2.7140371206337834e-08, "loss": 0.2128, "step": 17009 }, { "epoch": 0.98, "grad_norm": 0.410045538047261, "learning_rate": 2.7003533342156995e-08, "loss": 0.2497, "step": 17010 }, { "epoch": 0.98, "grad_norm": 0.3294855926484948, "learning_rate": 2.6867040842414316e-08, "loss": 0.2396, "step": 17011 }, { "epoch": 0.98, "grad_norm": 0.7913529950119497, "learning_rate": 2.6730893711837124e-08, "loss": 0.4242, "step": 17012 }, { "epoch": 0.98, "grad_norm": 0.34653414142176686, "learning_rate": 2.6595091955139428e-08, "loss": 0.2621, "step": 17013 }, { "epoch": 0.98, "grad_norm": 0.27650331142096424, "learning_rate": 2.6459635577026355e-08, "loss": 0.1645, "step": 17014 }, { "epoch": 0.98, "grad_norm": 0.34663882196846774, "learning_rate": 2.6324524582186374e-08, "loss": 0.2303, "step": 17015 }, { "epoch": 0.98, "grad_norm": 0.34330890321049773, "learning_rate": 2.6189758975299074e-08, "loss": 0.2632, "step": 17016 }, { "epoch": 0.98, "grad_norm": 0.6417232033799707, "learning_rate": 2.6055338761031835e-08, "loss": 0.4148, "step": 17017 }, { "epoch": 0.98, "grad_norm": 0.3119913260890965, "learning_rate": 2.592126394403982e-08, "loss": 0.2295, "step": 17018 }, { "epoch": 0.98, "grad_norm": 0.37198388859885884, "learning_rate": 2.5787534528964875e-08, "loss": 0.2445, "step": 17019 }, { "epoch": 0.98, "grad_norm": 1.3027242854603744, "learning_rate": 2.5654150520438848e-08, "loss": 0.3769, "step": 17020 }, { "epoch": 0.98, "grad_norm": 0.3261165338744473, "learning_rate": 2.5521111923080266e-08, "loss": 0.1333, "step": 17021 }, { "epoch": 0.98, "grad_norm": 0.3275988014909243, "learning_rate": 2.5388418741497668e-08, "loss": 0.2532, "step": 17022 }, { "epoch": 0.98, "grad_norm": 0.3388604127745746, "learning_rate": 2.5256070980284042e-08, "loss": 0.2924, "step": 17023 }, { "epoch": 0.98, "grad_norm": 1.4462487671630624, "learning_rate": 2.5124068644024613e-08, "loss": 0.2823, "step": 17024 }, { "epoch": 0.98, "grad_norm": 0.3366263902770327, "learning_rate": 2.4992411737289057e-08, "loss": 0.2503, "step": 17025 }, { "epoch": 0.98, "grad_norm": 1.4836462318839685, "learning_rate": 2.4861100264638172e-08, "loss": 0.6723, "step": 17026 }, { "epoch": 0.98, "grad_norm": 0.27144366896564376, "learning_rate": 2.473013423061832e-08, "loss": 0.221, "step": 17027 }, { "epoch": 0.98, "grad_norm": 0.31117856512925224, "learning_rate": 2.459951363976476e-08, "loss": 0.2513, "step": 17028 }, { "epoch": 0.98, "grad_norm": 0.38810367971759135, "learning_rate": 2.4469238496600546e-08, "loss": 0.2505, "step": 17029 }, { "epoch": 0.98, "grad_norm": 0.33623722779449305, "learning_rate": 2.433930880563762e-08, "loss": 0.2981, "step": 17030 }, { "epoch": 0.98, "grad_norm": 0.2907484340689656, "learning_rate": 2.4209724571376826e-08, "loss": 0.1823, "step": 17031 }, { "epoch": 0.98, "grad_norm": 1.158270296591638, "learning_rate": 2.4080485798302355e-08, "loss": 0.4486, "step": 17032 }, { "epoch": 0.98, "grad_norm": 0.5911414484895722, "learning_rate": 2.395159249089285e-08, "loss": 0.3196, "step": 17033 }, { "epoch": 0.98, "grad_norm": 0.2877702121234691, "learning_rate": 2.3823044653610295e-08, "loss": 0.1904, "step": 17034 }, { "epoch": 0.98, "grad_norm": 0.2822114281047928, "learning_rate": 2.3694842290907792e-08, "loss": 0.2648, "step": 17035 }, { "epoch": 0.98, "grad_norm": 0.4802254996038075, "learning_rate": 2.356698540722291e-08, "loss": 0.2575, "step": 17036 }, { "epoch": 0.98, "grad_norm": 0.2828728976084193, "learning_rate": 2.343947400698432e-08, "loss": 0.1845, "step": 17037 }, { "epoch": 0.98, "grad_norm": 0.3516289049644702, "learning_rate": 2.3312308094607382e-08, "loss": 0.2959, "step": 17038 }, { "epoch": 0.98, "grad_norm": 0.7073007426790366, "learning_rate": 2.3185487674497463e-08, "loss": 0.3777, "step": 17039 }, { "epoch": 0.98, "grad_norm": 0.3036204352872146, "learning_rate": 2.3059012751044386e-08, "loss": 0.2045, "step": 17040 }, { "epoch": 0.98, "grad_norm": 1.0686989334600154, "learning_rate": 2.2932883328629087e-08, "loss": 0.7031, "step": 17041 }, { "epoch": 0.98, "grad_norm": 0.26671675957112734, "learning_rate": 2.280709941161807e-08, "loss": 0.2414, "step": 17042 }, { "epoch": 0.98, "grad_norm": 0.3138331144274908, "learning_rate": 2.268166100436897e-08, "loss": 0.2286, "step": 17043 }, { "epoch": 0.98, "grad_norm": 0.5218556651137621, "learning_rate": 2.2556568111223866e-08, "loss": 0.2401, "step": 17044 }, { "epoch": 0.98, "grad_norm": 0.7268865897870525, "learning_rate": 2.2431820736517062e-08, "loss": 0.3442, "step": 17045 }, { "epoch": 0.98, "grad_norm": 0.34937516121279033, "learning_rate": 2.2307418884566225e-08, "loss": 0.2662, "step": 17046 }, { "epoch": 0.98, "grad_norm": 0.32484541126394684, "learning_rate": 2.2183362559681232e-08, "loss": 0.2435, "step": 17047 }, { "epoch": 0.98, "grad_norm": 0.2538310862070762, "learning_rate": 2.205965176615643e-08, "loss": 0.1857, "step": 17048 }, { "epoch": 0.98, "grad_norm": 0.36626610756991873, "learning_rate": 2.1936286508278393e-08, "loss": 0.234, "step": 17049 }, { "epoch": 0.98, "grad_norm": 0.31560292290674424, "learning_rate": 2.1813266790315922e-08, "loss": 0.2447, "step": 17050 }, { "epoch": 0.98, "grad_norm": 0.8116602561966607, "learning_rate": 2.169059261653228e-08, "loss": 0.3647, "step": 17051 }, { "epoch": 0.98, "grad_norm": 0.646895185331955, "learning_rate": 2.1568263991174065e-08, "loss": 0.2433, "step": 17052 }, { "epoch": 0.98, "grad_norm": 0.3426135098172122, "learning_rate": 2.144628091847678e-08, "loss": 0.2603, "step": 17053 }, { "epoch": 0.98, "grad_norm": 0.2182673378928602, "learning_rate": 2.1324643402667045e-08, "loss": 0.2074, "step": 17054 }, { "epoch": 0.98, "grad_norm": 0.4041287114922914, "learning_rate": 2.1203351447954824e-08, "loss": 0.2777, "step": 17055 }, { "epoch": 0.98, "grad_norm": 0.4575676863248586, "learning_rate": 2.10824050585412e-08, "loss": 0.2954, "step": 17056 }, { "epoch": 0.98, "grad_norm": 1.0410511963285844, "learning_rate": 2.0961804238616156e-08, "loss": 0.2411, "step": 17057 }, { "epoch": 0.98, "grad_norm": 0.29720838275806993, "learning_rate": 2.0841548992354132e-08, "loss": 0.2345, "step": 17058 }, { "epoch": 0.98, "grad_norm": 0.37419418678071553, "learning_rate": 2.0721639323919573e-08, "loss": 0.3246, "step": 17059 }, { "epoch": 0.98, "grad_norm": 0.16552952889999026, "learning_rate": 2.0602075237465825e-08, "loss": 0.0844, "step": 17060 }, { "epoch": 0.98, "grad_norm": 0.41038045330779327, "learning_rate": 2.0482856737132906e-08, "loss": 0.2749, "step": 17061 }, { "epoch": 0.98, "grad_norm": 0.29247558336905394, "learning_rate": 2.0363983827049737e-08, "loss": 0.2884, "step": 17062 }, { "epoch": 0.98, "grad_norm": 1.2705941693360938, "learning_rate": 2.0245456511333028e-08, "loss": 0.2699, "step": 17063 }, { "epoch": 0.98, "grad_norm": 0.3362945356885263, "learning_rate": 2.012727479408616e-08, "loss": 0.2474, "step": 17064 }, { "epoch": 0.98, "grad_norm": 1.231997007937773, "learning_rate": 2.0009438679403636e-08, "loss": 0.6624, "step": 17065 }, { "epoch": 0.98, "grad_norm": 0.329125988206052, "learning_rate": 1.9891948171364417e-08, "loss": 0.2482, "step": 17066 }, { "epoch": 0.98, "grad_norm": 0.4091946800044302, "learning_rate": 1.9774803274038578e-08, "loss": 0.2754, "step": 17067 }, { "epoch": 0.98, "grad_norm": 0.35837796725133636, "learning_rate": 1.9658003991480656e-08, "loss": 0.258, "step": 17068 }, { "epoch": 0.98, "grad_norm": 0.5680477821646825, "learning_rate": 1.9541550327738524e-08, "loss": 0.2052, "step": 17069 }, { "epoch": 0.98, "grad_norm": 0.2690535902574051, "learning_rate": 1.942544228684229e-08, "loss": 0.2, "step": 17070 }, { "epoch": 0.98, "grad_norm": 0.3929295717834575, "learning_rate": 1.930967987281429e-08, "loss": 0.3381, "step": 17071 }, { "epoch": 0.98, "grad_norm": 0.8100108431977137, "learning_rate": 1.9194263089662435e-08, "loss": 0.4147, "step": 17072 }, { "epoch": 0.98, "grad_norm": 0.4949306445932574, "learning_rate": 1.9079191941384635e-08, "loss": 0.2105, "step": 17073 }, { "epoch": 0.98, "grad_norm": 0.2799353141961879, "learning_rate": 1.8964466431964367e-08, "loss": 0.2766, "step": 17074 }, { "epoch": 0.98, "grad_norm": 0.25292916616971384, "learning_rate": 1.8850086565376236e-08, "loss": 0.1642, "step": 17075 }, { "epoch": 0.98, "grad_norm": 0.43814973227571347, "learning_rate": 1.8736052345580403e-08, "loss": 0.188, "step": 17076 }, { "epoch": 0.98, "grad_norm": 0.5327994711284099, "learning_rate": 1.862236377652593e-08, "loss": 0.3839, "step": 17077 }, { "epoch": 0.98, "grad_norm": 0.35915994320120836, "learning_rate": 1.8509020862149673e-08, "loss": 0.3232, "step": 17078 }, { "epoch": 0.98, "grad_norm": 0.4653115137961894, "learning_rate": 1.8396023606376268e-08, "loss": 0.3291, "step": 17079 }, { "epoch": 0.98, "grad_norm": 0.29836713476307714, "learning_rate": 1.828337201311925e-08, "loss": 0.1782, "step": 17080 }, { "epoch": 0.98, "grad_norm": 0.5164581462048005, "learning_rate": 1.817106608628105e-08, "loss": 0.2172, "step": 17081 }, { "epoch": 0.98, "grad_norm": 0.28972253741358867, "learning_rate": 1.805910582974857e-08, "loss": 0.2673, "step": 17082 }, { "epoch": 0.98, "grad_norm": 0.3477015924607169, "learning_rate": 1.7947491247399808e-08, "loss": 0.2427, "step": 17083 }, { "epoch": 0.98, "grad_norm": 0.577825325414684, "learning_rate": 1.783622234310056e-08, "loss": 0.3721, "step": 17084 }, { "epoch": 0.98, "grad_norm": 0.41418340683900945, "learning_rate": 1.772529912070442e-08, "loss": 0.2571, "step": 17085 }, { "epoch": 0.98, "grad_norm": 0.2782321577020592, "learning_rate": 1.7614721584051643e-08, "loss": 0.2388, "step": 17086 }, { "epoch": 0.98, "grad_norm": 0.3467169897838902, "learning_rate": 1.7504489736971385e-08, "loss": 0.1657, "step": 17087 }, { "epoch": 0.98, "grad_norm": 0.7927895004364096, "learning_rate": 1.73946035832806e-08, "loss": 0.5113, "step": 17088 }, { "epoch": 0.98, "grad_norm": 0.29960578060265763, "learning_rate": 1.728506312678624e-08, "loss": 0.2258, "step": 17089 }, { "epoch": 0.98, "grad_norm": 0.32245881570389173, "learning_rate": 1.7175868371281936e-08, "loss": 0.3108, "step": 17090 }, { "epoch": 0.98, "grad_norm": 0.951002243046634, "learning_rate": 1.7067019320546886e-08, "loss": 0.3718, "step": 17091 }, { "epoch": 0.98, "grad_norm": 0.38421464500733016, "learning_rate": 1.6958515978351407e-08, "loss": 0.2816, "step": 17092 }, { "epoch": 0.98, "grad_norm": 0.15888882951589792, "learning_rate": 1.6850358348453612e-08, "loss": 0.0663, "step": 17093 }, { "epoch": 0.98, "grad_norm": 0.3003963983604446, "learning_rate": 1.6742546434598272e-08, "loss": 0.2845, "step": 17094 }, { "epoch": 0.98, "grad_norm": 0.36253705809280756, "learning_rate": 1.6635080240520186e-08, "loss": 0.2953, "step": 17095 }, { "epoch": 0.98, "grad_norm": 0.6824644163387067, "learning_rate": 1.6527959769939705e-08, "loss": 0.2952, "step": 17096 }, { "epoch": 0.98, "grad_norm": 0.6366164928559752, "learning_rate": 1.6421185026566088e-08, "loss": 0.2909, "step": 17097 }, { "epoch": 0.98, "grad_norm": 0.24641835079060603, "learning_rate": 1.631475601409749e-08, "loss": 0.2552, "step": 17098 }, { "epoch": 0.98, "grad_norm": 0.31674059694920764, "learning_rate": 1.6208672736219843e-08, "loss": 0.0891, "step": 17099 }, { "epoch": 0.98, "grad_norm": 0.5181426442377051, "learning_rate": 1.6102935196606883e-08, "loss": 0.3263, "step": 17100 }, { "epoch": 0.98, "grad_norm": 0.3799271362409939, "learning_rate": 1.5997543398919013e-08, "loss": 0.2932, "step": 17101 }, { "epoch": 0.98, "grad_norm": 0.32179637117374854, "learning_rate": 1.5892497346807754e-08, "loss": 0.271, "step": 17102 }, { "epoch": 0.98, "grad_norm": 0.7109955981462678, "learning_rate": 1.57877970439102e-08, "loss": 0.3308, "step": 17103 }, { "epoch": 0.98, "grad_norm": 0.3955832021279465, "learning_rate": 1.568344249385123e-08, "loss": 0.2954, "step": 17104 }, { "epoch": 0.98, "grad_norm": 0.28272887435926286, "learning_rate": 1.5579433700245727e-08, "loss": 0.1893, "step": 17105 }, { "epoch": 0.98, "grad_norm": 0.3781045982315596, "learning_rate": 1.5475770666694145e-08, "loss": 0.2298, "step": 17106 }, { "epoch": 0.98, "grad_norm": 0.3939611312355522, "learning_rate": 1.5372453396788057e-08, "loss": 0.284, "step": 17107 }, { "epoch": 0.98, "grad_norm": 0.7118015447019852, "learning_rate": 1.52694818941046e-08, "loss": 0.439, "step": 17108 }, { "epoch": 0.98, "grad_norm": 0.4439561205053077, "learning_rate": 1.516685616220981e-08, "loss": 0.1962, "step": 17109 }, { "epoch": 0.98, "grad_norm": 0.28543838310868797, "learning_rate": 1.506457620465751e-08, "loss": 0.2636, "step": 17110 }, { "epoch": 0.98, "grad_norm": 0.32304533937000707, "learning_rate": 1.4962642024989316e-08, "loss": 0.1582, "step": 17111 }, { "epoch": 0.98, "grad_norm": 0.556431431252915, "learning_rate": 1.4861053626734623e-08, "loss": 0.2584, "step": 17112 }, { "epoch": 0.98, "grad_norm": 0.3452856886959734, "learning_rate": 1.475981101341284e-08, "loss": 0.2698, "step": 17113 }, { "epoch": 0.98, "grad_norm": 0.34444279352265655, "learning_rate": 1.4658914188530049e-08, "loss": 0.3137, "step": 17114 }, { "epoch": 0.98, "grad_norm": 1.0069469226681833, "learning_rate": 1.4558363155579013e-08, "loss": 0.1646, "step": 17115 }, { "epoch": 0.98, "grad_norm": 0.3590631076036183, "learning_rate": 1.4458157918042503e-08, "loss": 0.2807, "step": 17116 }, { "epoch": 0.98, "grad_norm": 0.3076290801115484, "learning_rate": 1.4358298479391076e-08, "loss": 0.2447, "step": 17117 }, { "epoch": 0.98, "grad_norm": 0.4345708272045855, "learning_rate": 1.4258784843081963e-08, "loss": 0.3126, "step": 17118 }, { "epoch": 0.98, "grad_norm": 0.35796512209441345, "learning_rate": 1.415961701256241e-08, "loss": 0.2191, "step": 17119 }, { "epoch": 0.98, "grad_norm": 0.43834411557560704, "learning_rate": 1.4060794991265226e-08, "loss": 0.3003, "step": 17120 }, { "epoch": 0.98, "grad_norm": 0.3023620756441359, "learning_rate": 1.3962318782613226e-08, "loss": 0.267, "step": 17121 }, { "epoch": 0.98, "grad_norm": 0.33571420647735895, "learning_rate": 1.386418839001702e-08, "loss": 0.1946, "step": 17122 }, { "epoch": 0.98, "grad_norm": 0.7962831409761743, "learning_rate": 1.3766403816873886e-08, "loss": 0.3344, "step": 17123 }, { "epoch": 0.98, "grad_norm": 0.40328605944188584, "learning_rate": 1.3668965066571115e-08, "loss": 0.2527, "step": 17124 }, { "epoch": 0.98, "grad_norm": 0.27251997461785155, "learning_rate": 1.3571872142483789e-08, "loss": 0.1927, "step": 17125 }, { "epoch": 0.98, "grad_norm": 0.27747022869745464, "learning_rate": 1.3475125047971438e-08, "loss": 0.2665, "step": 17126 }, { "epoch": 0.98, "grad_norm": 1.2894017763114165, "learning_rate": 1.3378723786386938e-08, "loss": 0.6075, "step": 17127 }, { "epoch": 0.98, "grad_norm": 0.3370384947172715, "learning_rate": 1.3282668361067619e-08, "loss": 0.185, "step": 17128 }, { "epoch": 0.98, "grad_norm": 0.33942967898729376, "learning_rate": 1.3186958775339709e-08, "loss": 0.3023, "step": 17129 }, { "epoch": 0.98, "grad_norm": 0.6884605520324051, "learning_rate": 1.3091595032518333e-08, "loss": 0.3898, "step": 17130 }, { "epoch": 0.98, "grad_norm": 0.32177530158067874, "learning_rate": 1.2996577135906408e-08, "loss": 0.2731, "step": 17131 }, { "epoch": 0.98, "grad_norm": 0.22547779418670175, "learning_rate": 1.290190508879241e-08, "loss": 0.1392, "step": 17132 }, { "epoch": 0.98, "grad_norm": 0.26112428619009537, "learning_rate": 1.280757889445594e-08, "loss": 0.2366, "step": 17133 }, { "epoch": 0.98, "grad_norm": 0.35491189542978396, "learning_rate": 1.2713598556164386e-08, "loss": 0.2418, "step": 17134 }, { "epoch": 0.98, "grad_norm": 0.6705417107991852, "learning_rate": 1.2619964077170699e-08, "loss": 0.2883, "step": 17135 }, { "epoch": 0.98, "grad_norm": 0.7273092453633878, "learning_rate": 1.252667546071784e-08, "loss": 0.3569, "step": 17136 }, { "epoch": 0.98, "grad_norm": 0.3310531820505807, "learning_rate": 1.2433732710037671e-08, "loss": 0.2706, "step": 17137 }, { "epoch": 0.98, "grad_norm": 0.23516781376709966, "learning_rate": 1.2341135828347617e-08, "loss": 0.2121, "step": 17138 }, { "epoch": 0.98, "grad_norm": 0.5182882686249745, "learning_rate": 1.2248884818854001e-08, "loss": 0.2102, "step": 17139 }, { "epoch": 0.98, "grad_norm": 0.347744576905726, "learning_rate": 1.2156979684753157e-08, "loss": 0.2669, "step": 17140 }, { "epoch": 0.98, "grad_norm": 0.30419991686902886, "learning_rate": 1.2065420429225872e-08, "loss": 0.2622, "step": 17141 }, { "epoch": 0.98, "grad_norm": 1.6698959296242852, "learning_rate": 1.1974207055444054e-08, "loss": 0.6203, "step": 17142 }, { "epoch": 0.98, "grad_norm": 0.3200165060424897, "learning_rate": 1.1883339566565178e-08, "loss": 0.2531, "step": 17143 }, { "epoch": 0.99, "grad_norm": 0.411575213284914, "learning_rate": 1.1792817965736725e-08, "loss": 0.2983, "step": 17144 }, { "epoch": 0.99, "grad_norm": 0.2582650364241364, "learning_rate": 1.1702642256093965e-08, "loss": 0.1813, "step": 17145 }, { "epoch": 0.99, "grad_norm": 0.3650810325649037, "learning_rate": 1.1612812440758847e-08, "loss": 0.2605, "step": 17146 }, { "epoch": 0.99, "grad_norm": 0.5358995044698758, "learning_rate": 1.1523328522843324e-08, "loss": 0.3528, "step": 17147 }, { "epoch": 0.99, "grad_norm": 0.9446703893782842, "learning_rate": 1.1434190505443809e-08, "loss": 0.384, "step": 17148 }, { "epoch": 0.99, "grad_norm": 0.2741899067962737, "learning_rate": 1.1345398391650053e-08, "loss": 0.2475, "step": 17149 }, { "epoch": 0.99, "grad_norm": 0.5004404118735434, "learning_rate": 1.1256952184535153e-08, "loss": 0.3734, "step": 17150 }, { "epoch": 0.99, "grad_norm": 0.363040297891496, "learning_rate": 1.1168851887163323e-08, "loss": 0.1338, "step": 17151 }, { "epoch": 0.99, "grad_norm": 0.3385171036240212, "learning_rate": 1.1081097502584349e-08, "loss": 0.2484, "step": 17152 }, { "epoch": 0.99, "grad_norm": 0.4682014674094445, "learning_rate": 1.0993689033836907e-08, "loss": 0.3082, "step": 17153 }, { "epoch": 0.99, "grad_norm": 0.541802237828164, "learning_rate": 1.090662648394858e-08, "loss": 0.2646, "step": 17154 }, { "epoch": 0.99, "grad_norm": 0.4216574372674074, "learning_rate": 1.0819909855933618e-08, "loss": 0.2951, "step": 17155 }, { "epoch": 0.99, "grad_norm": 0.5286671846393574, "learning_rate": 1.073353915279629e-08, "loss": 0.3723, "step": 17156 }, { "epoch": 0.99, "grad_norm": 0.26883762854101706, "learning_rate": 1.0647514377527535e-08, "loss": 0.2437, "step": 17157 }, { "epoch": 0.99, "grad_norm": 0.2838856173997619, "learning_rate": 1.0561835533104969e-08, "loss": 0.1802, "step": 17158 }, { "epoch": 0.99, "grad_norm": 0.48001083283485046, "learning_rate": 1.0476502622496221e-08, "loss": 0.2635, "step": 17159 }, { "epoch": 0.99, "grad_norm": 1.2070202127543408, "learning_rate": 1.0391515648657813e-08, "loss": 0.6532, "step": 17160 }, { "epoch": 0.99, "grad_norm": 0.24790652896927554, "learning_rate": 1.0306874614530727e-08, "loss": 0.2034, "step": 17161 }, { "epoch": 0.99, "grad_norm": 0.5456230843925484, "learning_rate": 1.022257952304706e-08, "loss": 0.3501, "step": 17162 }, { "epoch": 0.99, "grad_norm": 0.872555453274828, "learning_rate": 1.0138630377125591e-08, "loss": 0.3599, "step": 17163 }, { "epoch": 0.99, "grad_norm": 0.30090643775201775, "learning_rate": 1.0055027179675104e-08, "loss": 0.211, "step": 17164 }, { "epoch": 0.99, "grad_norm": 0.20651735036815919, "learning_rate": 9.971769933587728e-09, "loss": 0.1962, "step": 17165 }, { "epoch": 0.99, "grad_norm": 1.3557084430328847, "learning_rate": 9.888858641750044e-09, "loss": 0.5679, "step": 17166 }, { "epoch": 0.99, "grad_norm": 0.4083475818641006, "learning_rate": 9.806293307030868e-09, "loss": 0.2982, "step": 17167 }, { "epoch": 0.99, "grad_norm": 0.5826905889124464, "learning_rate": 9.724073932289024e-09, "loss": 0.2523, "step": 17168 }, { "epoch": 0.99, "grad_norm": 0.3194676400384813, "learning_rate": 9.642200520374457e-09, "loss": 0.299, "step": 17169 }, { "epoch": 0.99, "grad_norm": 0.29693589850133373, "learning_rate": 9.560673074120452e-09, "loss": 0.2682, "step": 17170 }, { "epoch": 0.99, "grad_norm": 0.16895972237669396, "learning_rate": 9.47949159635031e-09, "loss": 0.0703, "step": 17171 }, { "epoch": 0.99, "grad_norm": 0.46335545917394955, "learning_rate": 9.398656089876224e-09, "loss": 0.2993, "step": 17172 }, { "epoch": 0.99, "grad_norm": 0.29457119427898515, "learning_rate": 9.318166557497066e-09, "loss": 0.2394, "step": 17173 }, { "epoch": 0.99, "grad_norm": 0.47416681421393464, "learning_rate": 9.238023001999496e-09, "loss": 0.2596, "step": 17174 }, { "epoch": 0.99, "grad_norm": 0.6675035464853056, "learning_rate": 9.158225426160183e-09, "loss": 0.3474, "step": 17175 }, { "epoch": 0.99, "grad_norm": 0.5988924080851362, "learning_rate": 9.078773832742471e-09, "loss": 0.2719, "step": 17176 }, { "epoch": 0.99, "grad_norm": 0.18081883458352935, "learning_rate": 8.999668224496383e-09, "loss": 0.1884, "step": 17177 }, { "epoch": 0.99, "grad_norm": 1.1298631304084927, "learning_rate": 8.92090860416195e-09, "loss": 0.555, "step": 17178 }, { "epoch": 0.99, "grad_norm": 0.40152654309722646, "learning_rate": 8.842494974466986e-09, "loss": 0.2712, "step": 17179 }, { "epoch": 0.99, "grad_norm": 0.49817004543825866, "learning_rate": 8.764427338127102e-09, "loss": 0.3426, "step": 17180 }, { "epoch": 0.99, "grad_norm": 0.31938785931852637, "learning_rate": 8.686705697845688e-09, "loss": 0.2383, "step": 17181 }, { "epoch": 0.99, "grad_norm": 0.3866142623720773, "learning_rate": 8.609330056313926e-09, "loss": 0.2452, "step": 17182 }, { "epoch": 0.99, "grad_norm": 0.2502692400459461, "learning_rate": 8.532300416210781e-09, "loss": 0.1758, "step": 17183 }, { "epoch": 0.99, "grad_norm": 0.5478422286740277, "learning_rate": 8.455616780205234e-09, "loss": 0.243, "step": 17184 }, { "epoch": 0.99, "grad_norm": 0.3054428534977997, "learning_rate": 8.379279150951824e-09, "loss": 0.259, "step": 17185 }, { "epoch": 0.99, "grad_norm": 0.563806009862002, "learning_rate": 8.303287531093996e-09, "loss": 0.3525, "step": 17186 }, { "epoch": 0.99, "grad_norm": 0.6691683684864551, "learning_rate": 8.227641923264085e-09, "loss": 0.3025, "step": 17187 }, { "epoch": 0.99, "grad_norm": 0.3912003511948697, "learning_rate": 8.152342330081109e-09, "loss": 0.247, "step": 17188 }, { "epoch": 0.99, "grad_norm": 0.29904732930023953, "learning_rate": 8.077388754151872e-09, "loss": 0.2665, "step": 17189 }, { "epoch": 0.99, "grad_norm": 0.28293767680946147, "learning_rate": 8.002781198074294e-09, "loss": 0.1153, "step": 17190 }, { "epoch": 0.99, "grad_norm": 0.39065053153705637, "learning_rate": 7.928519664430756e-09, "loss": 0.2872, "step": 17191 }, { "epoch": 0.99, "grad_norm": 0.4524577827612803, "learning_rate": 7.854604155791423e-09, "loss": 0.3092, "step": 17192 }, { "epoch": 0.99, "grad_norm": 0.3234766790264359, "learning_rate": 7.78103467471869e-09, "loss": 0.301, "step": 17193 }, { "epoch": 0.99, "grad_norm": 0.5498364437172288, "learning_rate": 7.70781122375941e-09, "loss": 0.0939, "step": 17194 }, { "epoch": 0.99, "grad_norm": 0.290298275004318, "learning_rate": 7.634933805448219e-09, "loss": 0.2089, "step": 17195 }, { "epoch": 0.99, "grad_norm": 0.2698869203749294, "learning_rate": 7.562402422309767e-09, "loss": 0.2444, "step": 17196 }, { "epoch": 0.99, "grad_norm": 0.5583643820899699, "learning_rate": 7.490217076855377e-09, "loss": 0.1874, "step": 17197 }, { "epoch": 0.99, "grad_norm": 0.37483103695911446, "learning_rate": 7.418377771585273e-09, "loss": 0.3031, "step": 17198 }, { "epoch": 0.99, "grad_norm": 1.2057443310019202, "learning_rate": 7.346884508987462e-09, "loss": 0.5939, "step": 17199 }, { "epoch": 0.99, "grad_norm": 0.40814755288192517, "learning_rate": 7.275737291536633e-09, "loss": 0.1775, "step": 17200 }, { "epoch": 0.99, "grad_norm": 0.23334911812197162, "learning_rate": 7.204936121697481e-09, "loss": 0.2408, "step": 17201 }, { "epoch": 0.99, "grad_norm": 0.452400964531313, "learning_rate": 7.134481001922488e-09, "loss": 0.253, "step": 17202 }, { "epoch": 0.99, "grad_norm": 0.3952783924255758, "learning_rate": 7.064371934649706e-09, "loss": 0.2208, "step": 17203 }, { "epoch": 0.99, "grad_norm": 0.5326550442015726, "learning_rate": 6.9946089223082995e-09, "loss": 0.3627, "step": 17204 }, { "epoch": 0.99, "grad_norm": 0.34586123136046604, "learning_rate": 6.925191967314115e-09, "loss": 0.3045, "step": 17205 }, { "epoch": 0.99, "grad_norm": 0.7623488186505916, "learning_rate": 6.856121072070787e-09, "loss": 0.3254, "step": 17206 }, { "epoch": 0.99, "grad_norm": 0.4536690860312234, "learning_rate": 6.787396238969735e-09, "loss": 0.2152, "step": 17207 }, { "epoch": 0.99, "grad_norm": 0.23043597811227537, "learning_rate": 6.719017470392386e-09, "loss": 0.2123, "step": 17208 }, { "epoch": 0.99, "grad_norm": 0.39678585504428626, "learning_rate": 6.650984768704627e-09, "loss": 0.2842, "step": 17209 }, { "epoch": 0.99, "grad_norm": 0.484976822562212, "learning_rate": 6.583298136264571e-09, "loss": 0.2722, "step": 17210 }, { "epoch": 0.99, "grad_norm": 1.1659165788472297, "learning_rate": 6.515957575413678e-09, "loss": 0.5502, "step": 17211 }, { "epoch": 0.99, "grad_norm": 0.4491331575497026, "learning_rate": 6.448963088486748e-09, "loss": 0.3109, "step": 17212 }, { "epoch": 0.99, "grad_norm": 0.27854955150557587, "learning_rate": 6.382314677803037e-09, "loss": 0.2077, "step": 17213 }, { "epoch": 0.99, "grad_norm": 0.41626719286042163, "learning_rate": 6.316012345668476e-09, "loss": 0.2705, "step": 17214 }, { "epoch": 0.99, "grad_norm": 0.7680084509491535, "learning_rate": 6.2500560943812295e-09, "loss": 0.3548, "step": 17215 }, { "epoch": 0.99, "grad_norm": 0.25815630989197186, "learning_rate": 6.184445926225024e-09, "loss": 0.2255, "step": 17216 }, { "epoch": 0.99, "grad_norm": 0.3151753336653738, "learning_rate": 6.119181843471378e-09, "loss": 0.2446, "step": 17217 }, { "epoch": 0.99, "grad_norm": 0.9113337178352028, "learning_rate": 6.0542638483818136e-09, "loss": 0.3112, "step": 17218 }, { "epoch": 0.99, "grad_norm": 0.3686064816471827, "learning_rate": 5.989691943202314e-09, "loss": 0.2553, "step": 17219 }, { "epoch": 0.99, "grad_norm": 0.4338394855360507, "learning_rate": 5.925466130169977e-09, "loss": 0.2657, "step": 17220 }, { "epoch": 0.99, "grad_norm": 0.2913724177552018, "learning_rate": 5.86158641150969e-09, "loss": 0.2284, "step": 17221 }, { "epoch": 0.99, "grad_norm": 0.3690652954085403, "learning_rate": 5.798052789431907e-09, "loss": 0.3086, "step": 17222 }, { "epoch": 0.99, "grad_norm": 0.44592350230275896, "learning_rate": 5.734865266138201e-09, "loss": 0.1507, "step": 17223 }, { "epoch": 0.99, "grad_norm": 0.29599152713750027, "learning_rate": 5.67202384381682e-09, "loss": 0.2542, "step": 17224 }, { "epoch": 0.99, "grad_norm": 0.38827856045711373, "learning_rate": 5.609528524642694e-09, "loss": 0.3015, "step": 17225 }, { "epoch": 0.99, "grad_norm": 0.5828565839256031, "learning_rate": 5.547379310781864e-09, "loss": 0.2791, "step": 17226 }, { "epoch": 0.99, "grad_norm": 0.7721637320865353, "learning_rate": 5.485576204383725e-09, "loss": 0.4538, "step": 17227 }, { "epoch": 0.99, "grad_norm": 0.3448076249812229, "learning_rate": 5.424119207592115e-09, "loss": 0.2812, "step": 17228 }, { "epoch": 0.99, "grad_norm": 0.21500697134765617, "learning_rate": 5.3630083225331145e-09, "loss": 0.1745, "step": 17229 }, { "epoch": 0.99, "grad_norm": 1.028394499800441, "learning_rate": 5.302243551322806e-09, "loss": 0.4091, "step": 17230 }, { "epoch": 0.99, "grad_norm": 0.3682420380289012, "learning_rate": 5.2418248960661725e-09, "loss": 0.2909, "step": 17231 }, { "epoch": 0.99, "grad_norm": 0.3277072529659946, "learning_rate": 5.181752358854874e-09, "loss": 0.3007, "step": 17232 }, { "epoch": 0.99, "grad_norm": 0.4392349799183583, "learning_rate": 5.1220259417705806e-09, "loss": 0.1281, "step": 17233 }, { "epoch": 0.99, "grad_norm": 0.3499626554649006, "learning_rate": 5.0626456468805265e-09, "loss": 0.2961, "step": 17234 }, { "epoch": 0.99, "grad_norm": 0.28294895281739385, "learning_rate": 5.003611476240844e-09, "loss": 0.1918, "step": 17235 }, { "epoch": 0.99, "grad_norm": 0.30877597909823423, "learning_rate": 4.944923431896564e-09, "loss": 0.218, "step": 17236 }, { "epoch": 0.99, "grad_norm": 0.35228097598873376, "learning_rate": 4.886581515880506e-09, "loss": 0.2528, "step": 17237 }, { "epoch": 0.99, "grad_norm": 0.8101994452225071, "learning_rate": 4.828585730211055e-09, "loss": 0.3758, "step": 17238 }, { "epoch": 0.99, "grad_norm": 0.8340505753102018, "learning_rate": 4.770936076898825e-09, "loss": 0.2225, "step": 17239 }, { "epoch": 0.99, "grad_norm": 0.29161964930278367, "learning_rate": 4.713632557938885e-09, "loss": 0.2682, "step": 17240 }, { "epoch": 0.99, "grad_norm": 0.3629011642961109, "learning_rate": 4.6566751753163166e-09, "loss": 0.2429, "step": 17241 }, { "epoch": 0.99, "grad_norm": 0.455365093725818, "learning_rate": 4.600063931002874e-09, "loss": 0.2451, "step": 17242 }, { "epoch": 0.99, "grad_norm": 0.336152325585062, "learning_rate": 4.543798826959211e-09, "loss": 0.2254, "step": 17243 }, { "epoch": 0.99, "grad_norm": 0.3599515795554048, "learning_rate": 4.4878798651337705e-09, "loss": 0.2767, "step": 17244 }, { "epoch": 0.99, "grad_norm": 0.9079207839441679, "learning_rate": 4.4323070474638906e-09, "loss": 0.4227, "step": 17245 }, { "epoch": 0.99, "grad_norm": 0.3120814844657969, "learning_rate": 4.377080375873588e-09, "loss": 0.2139, "step": 17246 }, { "epoch": 0.99, "grad_norm": 0.2816458631063591, "learning_rate": 4.322199852274667e-09, "loss": 0.2005, "step": 17247 }, { "epoch": 0.99, "grad_norm": 0.34555457410158374, "learning_rate": 4.267665478567829e-09, "loss": 0.2776, "step": 17248 }, { "epoch": 0.99, "grad_norm": 0.3508270629408411, "learning_rate": 4.213477256642673e-09, "loss": 0.22, "step": 17249 }, { "epoch": 0.99, "grad_norm": 0.8936437076660343, "learning_rate": 4.159635188375477e-09, "loss": 0.3986, "step": 17250 }, { "epoch": 0.99, "grad_norm": 1.2272218348029869, "learning_rate": 4.106139275629195e-09, "loss": 0.6651, "step": 17251 }, { "epoch": 0.99, "grad_norm": 0.22314666519834417, "learning_rate": 4.0529895202579e-09, "loss": 0.2085, "step": 17252 }, { "epoch": 0.99, "grad_norm": 1.4471815041901033, "learning_rate": 4.00018592410123e-09, "loss": 0.6434, "step": 17253 }, { "epoch": 0.99, "grad_norm": 0.3906800807951317, "learning_rate": 3.947728488988833e-09, "loss": 0.2578, "step": 17254 }, { "epoch": 0.99, "grad_norm": 0.2559969138747373, "learning_rate": 3.895617216735925e-09, "loss": 0.199, "step": 17255 }, { "epoch": 0.99, "grad_norm": 0.3663610961053545, "learning_rate": 3.843852109148838e-09, "loss": 0.2337, "step": 17256 }, { "epoch": 0.99, "grad_norm": 1.1749116983977523, "learning_rate": 3.792433168019471e-09, "loss": 0.7044, "step": 17257 }, { "epoch": 0.99, "grad_norm": 0.32863069988556126, "learning_rate": 3.741360395127513e-09, "loss": 0.2506, "step": 17258 }, { "epoch": 0.99, "grad_norm": 0.7208685948356803, "learning_rate": 3.6906337922426593e-09, "loss": 0.2665, "step": 17259 }, { "epoch": 0.99, "grad_norm": 0.3628388138696765, "learning_rate": 3.640253361121282e-09, "loss": 0.309, "step": 17260 }, { "epoch": 0.99, "grad_norm": 0.36290377979163024, "learning_rate": 3.590219103508652e-09, "loss": 0.2359, "step": 17261 }, { "epoch": 0.99, "grad_norm": 0.3023237550567913, "learning_rate": 3.540531021135607e-09, "loss": 0.1374, "step": 17262 }, { "epoch": 0.99, "grad_norm": 0.3912028186810661, "learning_rate": 3.491189115725213e-09, "loss": 0.3087, "step": 17263 }, { "epoch": 0.99, "grad_norm": 0.3489042396908763, "learning_rate": 3.4421933889849936e-09, "loss": 0.2758, "step": 17264 }, { "epoch": 0.99, "grad_norm": 0.3906870321219293, "learning_rate": 3.3935438426113687e-09, "loss": 0.262, "step": 17265 }, { "epoch": 0.99, "grad_norm": 0.9233473327277593, "learning_rate": 3.3452404782896577e-09, "loss": 0.4265, "step": 17266 }, { "epoch": 0.99, "grad_norm": 0.2371891988216668, "learning_rate": 3.2972832976918557e-09, "loss": 0.1978, "step": 17267 }, { "epoch": 0.99, "grad_norm": 0.26164169308525725, "learning_rate": 3.2496723024799672e-09, "loss": 0.2508, "step": 17268 }, { "epoch": 0.99, "grad_norm": 1.0699215886321223, "learning_rate": 3.2024074943015626e-09, "loss": 0.4488, "step": 17269 }, { "epoch": 0.99, "grad_norm": 0.3099846689515529, "learning_rate": 3.1554888747942213e-09, "loss": 0.2469, "step": 17270 }, { "epoch": 0.99, "grad_norm": 0.6191979829959496, "learning_rate": 3.10891644558331e-09, "loss": 0.3591, "step": 17271 }, { "epoch": 0.99, "grad_norm": 0.34374932351710824, "learning_rate": 3.0626902082797615e-09, "loss": 0.2373, "step": 17272 }, { "epoch": 0.99, "grad_norm": 0.34319801121283044, "learning_rate": 3.0168101644845183e-09, "loss": 0.2517, "step": 17273 }, { "epoch": 0.99, "grad_norm": 0.38067607576018514, "learning_rate": 2.9712763157885293e-09, "loss": 0.1608, "step": 17274 }, { "epoch": 0.99, "grad_norm": 0.3449780354209424, "learning_rate": 2.9260886637672014e-09, "loss": 0.2681, "step": 17275 }, { "epoch": 0.99, "grad_norm": 0.30939155012806313, "learning_rate": 2.881247209984839e-09, "loss": 0.2485, "step": 17276 }, { "epoch": 0.99, "grad_norm": 0.6205897551993651, "learning_rate": 2.8367519559957537e-09, "loss": 0.3882, "step": 17277 }, { "epoch": 0.99, "grad_norm": 1.1477515882330112, "learning_rate": 2.792602903339825e-09, "loss": 0.2529, "step": 17278 }, { "epoch": 0.99, "grad_norm": 0.38265407718892425, "learning_rate": 2.7488000535458303e-09, "loss": 0.2418, "step": 17279 }, { "epoch": 0.99, "grad_norm": 0.19879501472534586, "learning_rate": 2.7053434081314447e-09, "loss": 0.2037, "step": 17280 }, { "epoch": 0.99, "grad_norm": 0.9142794037777149, "learning_rate": 2.6622329686010196e-09, "loss": 0.4647, "step": 17281 }, { "epoch": 0.99, "grad_norm": 0.28941532117602325, "learning_rate": 2.619468736446695e-09, "loss": 0.1886, "step": 17282 }, { "epoch": 0.99, "grad_norm": 0.6324264222405012, "learning_rate": 2.5770507131517297e-09, "loss": 0.4013, "step": 17283 }, { "epoch": 0.99, "grad_norm": 0.328680654110541, "learning_rate": 2.5349789001827274e-09, "loss": 0.2716, "step": 17284 }, { "epoch": 0.99, "grad_norm": 0.2842382251427557, "learning_rate": 2.4932532989974113e-09, "loss": 0.1838, "step": 17285 }, { "epoch": 0.99, "grad_norm": 0.32087338368593543, "learning_rate": 2.4518739110412913e-09, "loss": 0.1578, "step": 17286 }, { "epoch": 0.99, "grad_norm": 0.46121708923217086, "learning_rate": 2.410840737746556e-09, "loss": 0.3881, "step": 17287 }, { "epoch": 0.99, "grad_norm": 0.25180801069509645, "learning_rate": 2.37015378053429e-09, "loss": 0.2123, "step": 17288 }, { "epoch": 0.99, "grad_norm": 0.6193436792258021, "learning_rate": 2.329813040814477e-09, "loss": 0.3735, "step": 17289 }, { "epoch": 0.99, "grad_norm": 1.283115078097724, "learning_rate": 2.289818519982667e-09, "loss": 0.4848, "step": 17290 }, { "epoch": 0.99, "grad_norm": 0.2806743140172014, "learning_rate": 2.2501702194244192e-09, "loss": 0.1837, "step": 17291 }, { "epoch": 0.99, "grad_norm": 0.28516428759808665, "learning_rate": 2.2108681405141885e-09, "loss": 0.2424, "step": 17292 }, { "epoch": 0.99, "grad_norm": 0.38473609798441183, "learning_rate": 2.1719122846097783e-09, "loss": 0.2753, "step": 17293 }, { "epoch": 0.99, "grad_norm": 0.31851548707689087, "learning_rate": 2.1333026530634403e-09, "loss": 0.238, "step": 17294 }, { "epoch": 0.99, "grad_norm": 1.181637073888693, "learning_rate": 2.0950392472107726e-09, "loss": 0.3125, "step": 17295 }, { "epoch": 0.99, "grad_norm": 0.3795736909128083, "learning_rate": 2.0571220683762717e-09, "loss": 0.2708, "step": 17296 }, { "epoch": 0.99, "grad_norm": 0.6764029907443159, "learning_rate": 2.019551117874441e-09, "loss": 0.3151, "step": 17297 }, { "epoch": 0.99, "grad_norm": 0.200451458051388, "learning_rate": 1.9823263970042416e-09, "loss": 0.1499, "step": 17298 }, { "epoch": 0.99, "grad_norm": 0.34620613205315875, "learning_rate": 1.9454479070579735e-09, "loss": 0.2849, "step": 17299 }, { "epoch": 0.99, "grad_norm": 0.5328903980935618, "learning_rate": 1.9089156493101722e-09, "loss": 0.3001, "step": 17300 }, { "epoch": 0.99, "grad_norm": 0.342820967041687, "learning_rate": 1.8727296250264924e-09, "loss": 0.2557, "step": 17301 }, { "epoch": 0.99, "grad_norm": 1.1405798965958176, "learning_rate": 1.8368898354603759e-09, "loss": 0.4979, "step": 17302 }, { "epoch": 0.99, "grad_norm": 0.3883494449017486, "learning_rate": 1.8013962818530516e-09, "loss": 0.273, "step": 17303 }, { "epoch": 0.99, "grad_norm": 0.21653975646538426, "learning_rate": 1.7662489654324267e-09, "loss": 0.206, "step": 17304 }, { "epoch": 0.99, "grad_norm": 0.7855936171737142, "learning_rate": 1.7314478874175255e-09, "loss": 0.406, "step": 17305 }, { "epoch": 0.99, "grad_norm": 0.43869514092740386, "learning_rate": 1.6969930490129406e-09, "loss": 0.2855, "step": 17306 }, { "epoch": 0.99, "grad_norm": 0.2802090575204732, "learning_rate": 1.662884451411051e-09, "loss": 0.2516, "step": 17307 }, { "epoch": 0.99, "grad_norm": 0.5347954446359646, "learning_rate": 1.6291220957942443e-09, "loss": 0.23, "step": 17308 }, { "epoch": 0.99, "grad_norm": 0.43073846683388817, "learning_rate": 1.5957059833293653e-09, "loss": 0.2637, "step": 17309 }, { "epoch": 0.99, "grad_norm": 0.5787939035509605, "learning_rate": 1.5626361151765967e-09, "loss": 0.3245, "step": 17310 }, { "epoch": 0.99, "grad_norm": 0.30066704859583493, "learning_rate": 1.5299124924794684e-09, "loss": 0.2534, "step": 17311 }, { "epoch": 0.99, "grad_norm": 0.3953423827246001, "learning_rate": 1.497535116371518e-09, "loss": 0.2949, "step": 17312 }, { "epoch": 0.99, "grad_norm": 0.5339570681109478, "learning_rate": 1.4655039879740706e-09, "loss": 0.3517, "step": 17313 }, { "epoch": 0.99, "grad_norm": 0.2465958574137639, "learning_rate": 1.4338191083962394e-09, "loss": 0.0919, "step": 17314 }, { "epoch": 0.99, "grad_norm": 0.3375883743148337, "learning_rate": 1.4024804787349244e-09, "loss": 0.2508, "step": 17315 }, { "epoch": 0.99, "grad_norm": 0.34858518129319876, "learning_rate": 1.371488100075924e-09, "loss": 0.2834, "step": 17316 }, { "epoch": 0.99, "grad_norm": 0.5732830362171144, "learning_rate": 1.3408419734928235e-09, "loss": 0.2936, "step": 17317 }, { "epoch": 1.0, "grad_norm": 0.5490046678633443, "learning_rate": 1.3105421000458861e-09, "loss": 0.3244, "step": 17318 }, { "epoch": 1.0, "grad_norm": 0.25973593846651, "learning_rate": 1.280588480785383e-09, "loss": 0.2386, "step": 17319 }, { "epoch": 1.0, "grad_norm": 0.40068654789339847, "learning_rate": 1.2509811167482622e-09, "loss": 0.2308, "step": 17320 }, { "epoch": 1.0, "grad_norm": 0.27862506869394177, "learning_rate": 1.2217200089592596e-09, "loss": 0.0688, "step": 17321 }, { "epoch": 1.0, "grad_norm": 0.3646002178721917, "learning_rate": 1.192805158432009e-09, "loss": 0.2923, "step": 17322 }, { "epoch": 1.0, "grad_norm": 0.4507387100999731, "learning_rate": 1.164236566167931e-09, "loss": 0.3267, "step": 17323 }, { "epoch": 1.0, "grad_norm": 0.2687264725141766, "learning_rate": 1.1360142331562351e-09, "loss": 0.2057, "step": 17324 }, { "epoch": 1.0, "grad_norm": 0.35005883485614064, "learning_rate": 1.1081381603750275e-09, "loss": 0.2811, "step": 17325 }, { "epoch": 1.0, "grad_norm": 0.47338451847770335, "learning_rate": 1.0806083487890917e-09, "loss": 0.2107, "step": 17326 }, { "epoch": 1.0, "grad_norm": 0.25788576820285475, "learning_rate": 1.0534247993509994e-09, "loss": 0.2088, "step": 17327 }, { "epoch": 1.0, "grad_norm": 0.5201529560961416, "learning_rate": 1.0265875130033298e-09, "loss": 0.3249, "step": 17328 }, { "epoch": 1.0, "grad_norm": 0.7291318527937328, "learning_rate": 1.0000964906753396e-09, "loss": 0.3989, "step": 17329 }, { "epoch": 1.0, "grad_norm": 0.6160151881883443, "learning_rate": 9.739517332829628e-10, "loss": 0.3685, "step": 17330 }, { "epoch": 1.0, "grad_norm": 0.2861802045422825, "learning_rate": 9.481532417332518e-10, "loss": 0.2201, "step": 17331 }, { "epoch": 1.0, "grad_norm": 0.33069292196442907, "learning_rate": 9.227010169188256e-10, "loss": 0.1932, "step": 17332 }, { "epoch": 1.0, "grad_norm": 0.5335904809825592, "learning_rate": 8.975950597212014e-10, "loss": 0.318, "step": 17333 }, { "epoch": 1.0, "grad_norm": 0.34095909495518917, "learning_rate": 8.728353710107939e-10, "loss": 0.217, "step": 17334 }, { "epoch": 1.0, "grad_norm": 0.3489415469794577, "learning_rate": 8.484219516435854e-10, "loss": 0.3089, "step": 17335 }, { "epoch": 1.0, "grad_norm": 0.77950741238855, "learning_rate": 8.243548024655656e-10, "loss": 0.4937, "step": 17336 }, { "epoch": 1.0, "grad_norm": 0.3601096759079263, "learning_rate": 8.006339243094019e-10, "loss": 0.2173, "step": 17337 }, { "epoch": 1.0, "grad_norm": 0.295337610488898, "learning_rate": 7.772593179977694e-10, "loss": 0.1954, "step": 17338 }, { "epoch": 1.0, "grad_norm": 0.30542651027800183, "learning_rate": 7.542309843400209e-10, "loss": 0.2761, "step": 17339 }, { "epoch": 1.0, "grad_norm": 0.31871131623653226, "learning_rate": 7.315489241332963e-10, "loss": 0.2258, "step": 17340 }, { "epoch": 1.0, "grad_norm": 1.4402612913254227, "learning_rate": 7.092131381625233e-10, "loss": 0.71, "step": 17341 }, { "epoch": 1.0, "grad_norm": 0.8675495422620723, "learning_rate": 6.872236272026378e-10, "loss": 0.4088, "step": 17342 }, { "epoch": 1.0, "grad_norm": 0.2562096236525775, "learning_rate": 6.655803920130322e-10, "loss": 0.2502, "step": 17343 }, { "epoch": 1.0, "grad_norm": 0.3887523928164326, "learning_rate": 6.442834333453274e-10, "loss": 0.1314, "step": 17344 }, { "epoch": 1.0, "grad_norm": 0.49486610459972546, "learning_rate": 6.233327519356014e-10, "loss": 0.3164, "step": 17345 }, { "epoch": 1.0, "grad_norm": 0.2790524317225934, "learning_rate": 6.0272834850994e-10, "loss": 0.2222, "step": 17346 }, { "epoch": 1.0, "grad_norm": 0.3245395596263069, "learning_rate": 5.824702237822167e-10, "loss": 0.2424, "step": 17347 }, { "epoch": 1.0, "grad_norm": 0.550353076174964, "learning_rate": 5.62558378452982e-10, "loss": 0.3918, "step": 17348 }, { "epoch": 1.0, "grad_norm": 0.3946960811855935, "learning_rate": 5.429928132127948e-10, "loss": 0.2718, "step": 17349 }, { "epoch": 1.0, "grad_norm": 0.577554881905842, "learning_rate": 5.23773528737781e-10, "loss": 0.289, "step": 17350 }, { "epoch": 1.0, "grad_norm": 0.26363339789068374, "learning_rate": 5.049005256951845e-10, "loss": 0.2336, "step": 17351 }, { "epoch": 1.0, "grad_norm": 0.2628831020493132, "learning_rate": 4.863738047378164e-10, "loss": 0.2008, "step": 17352 }, { "epoch": 1.0, "grad_norm": 1.316507968789768, "learning_rate": 4.681933665084959e-10, "loss": 0.3222, "step": 17353 }, { "epoch": 1.0, "grad_norm": 1.1554124886092956, "learning_rate": 4.5035921163449905e-10, "loss": 0.4016, "step": 17354 }, { "epoch": 1.0, "grad_norm": 0.24058819285647187, "learning_rate": 4.3287134073422e-10, "loss": 0.2455, "step": 17355 }, { "epoch": 1.0, "grad_norm": 0.6174960471662688, "learning_rate": 4.1572975441384055e-10, "loss": 0.4182, "step": 17356 }, { "epoch": 1.0, "grad_norm": 0.3324218340581116, "learning_rate": 3.9893445326733003e-10, "loss": 0.1355, "step": 17357 }, { "epoch": 1.0, "grad_norm": 0.26942764406218134, "learning_rate": 3.824854378753351e-10, "loss": 0.2026, "step": 17358 }, { "epoch": 1.0, "grad_norm": 0.35914740702980735, "learning_rate": 3.6638270880851034e-10, "loss": 0.2896, "step": 17359 }, { "epoch": 1.0, "grad_norm": 0.5455096931025434, "learning_rate": 3.5062626662307753e-10, "loss": 0.2895, "step": 17360 }, { "epoch": 1.0, "grad_norm": 0.3443424930766011, "learning_rate": 3.352161118652664e-10, "loss": 0.2766, "step": 17361 }, { "epoch": 1.0, "grad_norm": 0.6169748491571364, "learning_rate": 3.2015224506909414e-10, "loss": 0.3999, "step": 17362 }, { "epoch": 1.0, "grad_norm": 0.22643565427717527, "learning_rate": 3.0543466675636567e-10, "loss": 0.1819, "step": 17363 }, { "epoch": 1.0, "grad_norm": 0.33718256571038757, "learning_rate": 2.9106337743667336e-10, "loss": 0.2753, "step": 17364 }, { "epoch": 1.0, "grad_norm": 0.5574590750528694, "learning_rate": 2.7703837760739706e-10, "loss": 0.2991, "step": 17365 }, { "epoch": 1.0, "grad_norm": 0.36791725805618314, "learning_rate": 2.6335966775370423e-10, "loss": 0.2042, "step": 17366 }, { "epoch": 1.0, "grad_norm": 0.2976665692719068, "learning_rate": 2.500272483496602e-10, "loss": 0.2583, "step": 17367 }, { "epoch": 1.0, "grad_norm": 0.563196407125086, "learning_rate": 2.370411198582279e-10, "loss": 0.3444, "step": 17368 }, { "epoch": 1.0, "grad_norm": 0.4542464555776609, "learning_rate": 2.2440128272682716e-10, "loss": 0.2276, "step": 17369 }, { "epoch": 1.0, "grad_norm": 0.2292789630209967, "learning_rate": 2.1210773739510637e-10, "loss": 0.1631, "step": 17370 }, { "epoch": 1.0, "grad_norm": 0.3201790858158781, "learning_rate": 2.0016048428828095e-10, "loss": 0.2865, "step": 17371 }, { "epoch": 1.0, "grad_norm": 1.1917003624322648, "learning_rate": 1.8855952381935384e-10, "loss": 0.6258, "step": 17372 }, { "epoch": 1.0, "grad_norm": 0.29913451282453496, "learning_rate": 1.7730485639133598e-10, "loss": 0.2036, "step": 17373 }, { "epoch": 1.0, "grad_norm": 0.4936270409112451, "learning_rate": 1.6639648239280547e-10, "loss": 0.3253, "step": 17374 }, { "epoch": 1.0, "grad_norm": 0.5255633349642953, "learning_rate": 1.5583440220234835e-10, "loss": 0.3583, "step": 17375 }, { "epoch": 1.0, "grad_norm": 0.21257105141332128, "learning_rate": 1.4561861618411778e-10, "loss": 0.1564, "step": 17376 }, { "epoch": 1.0, "grad_norm": 0.4397563294608192, "learning_rate": 1.357491246944953e-10, "loss": 0.3039, "step": 17377 }, { "epoch": 1.0, "grad_norm": 0.43366888050053, "learning_rate": 1.2622592807320922e-10, "loss": 0.3299, "step": 17378 }, { "epoch": 1.0, "grad_norm": 0.25224273533898794, "learning_rate": 1.1704902665110596e-10, "loss": 0.2109, "step": 17379 }, { "epoch": 1.0, "grad_norm": 0.876683409290362, "learning_rate": 1.082184207445991e-10, "loss": 0.5038, "step": 17380 }, { "epoch": 1.0, "grad_norm": 0.5709334603042204, "learning_rate": 9.97341106612204e-11, "loss": 0.2467, "step": 17381 }, { "epoch": 1.0, "grad_norm": 0.3252570517790537, "learning_rate": 9.159609669406876e-11, "loss": 0.2698, "step": 17382 }, { "epoch": 1.0, "grad_norm": 0.2536394980429637, "learning_rate": 8.380437912514083e-11, "loss": 0.2018, "step": 17383 }, { "epoch": 1.0, "grad_norm": 0.5485757717623354, "learning_rate": 7.635895822311057e-11, "loss": 0.3742, "step": 17384 }, { "epoch": 1.0, "grad_norm": 0.3684524592933877, "learning_rate": 6.925983424777016e-11, "loss": 0.2536, "step": 17385 }, { "epoch": 1.0, "grad_norm": 0.3760098681531618, "learning_rate": 6.250700744336869e-11, "loss": 0.2418, "step": 17386 }, { "epoch": 1.0, "grad_norm": 0.44201853065463803, "learning_rate": 5.610047804527341e-11, "loss": 0.2955, "step": 17387 }, { "epoch": 1.0, "grad_norm": 0.3492915963630672, "learning_rate": 5.00402462733085e-11, "loss": 0.2622, "step": 17388 }, { "epoch": 1.0, "grad_norm": 0.40702154184454287, "learning_rate": 4.4326312338416333e-11, "loss": 0.1927, "step": 17389 }, { "epoch": 1.0, "grad_norm": 0.4248298337535507, "learning_rate": 3.895867643932683e-11, "loss": 0.3135, "step": 17390 }, { "epoch": 1.0, "grad_norm": 0.26443247178119667, "learning_rate": 3.3937338760337e-11, "loss": 0.2393, "step": 17391 }, { "epoch": 1.0, "grad_norm": 0.47992056831554575, "learning_rate": 2.92622994768621e-11, "loss": 0.1771, "step": 17392 }, { "epoch": 1.0, "grad_norm": 1.7800715125968598, "learning_rate": 2.4933558749884456e-11, "loss": 0.4894, "step": 17393 }, { "epoch": 1.0, "grad_norm": 0.2614907187670138, "learning_rate": 2.0951116729284182e-11, "loss": 0.2234, "step": 17394 }, { "epoch": 1.0, "grad_norm": 0.33747834456865705, "learning_rate": 1.731497355272893e-11, "loss": 0.319, "step": 17395 }, { "epoch": 1.0, "grad_norm": 0.5460667339487827, "learning_rate": 1.4025129346784127e-11, "loss": 0.3004, "step": 17396 }, { "epoch": 1.0, "grad_norm": 0.32489280345231525, "learning_rate": 1.1081584224692521e-11, "loss": 0.2555, "step": 17397 }, { "epoch": 1.0, "grad_norm": 0.5915103642932011, "learning_rate": 8.484338289704852e-12, "loss": 0.3031, "step": 17398 }, { "epoch": 1.0, "grad_norm": 0.37556640988554935, "learning_rate": 6.233391630638963e-12, "loss": 0.2357, "step": 17399 }, { "epoch": 1.0, "grad_norm": 0.29506593921851954, "learning_rate": 4.328744325210466e-12, "loss": 0.2447, "step": 17400 }, { "epoch": 1.0, "grad_norm": 0.7211757381438109, "learning_rate": 2.770396440032741e-12, "loss": 0.4654, "step": 17401 }, { "epoch": 1.0, "grad_norm": 0.316378855890181, "learning_rate": 1.5583480295067177e-12, "loss": 0.278, "step": 17402 }, { "epoch": 1.0, "grad_norm": 0.38606123702259176, "learning_rate": 6.925991336004245e-13, "loss": 0.2463, "step": 17403 }, { "epoch": 1.0, "grad_norm": 0.26007754178125586, "learning_rate": 1.7314978451032915e-13, "loss": 0.1748, "step": 17404 }, { "epoch": 1.0, "grad_norm": 1.5025672310396203, "learning_rate": 0.0, "loss": 0.1774, "step": 17405 }, { "epoch": 1.0, "step": 17405, "total_flos": 0.0, "train_loss": 0.3205628498764781, "train_runtime": 138539.6755, "train_samples_per_second": 60.365, "train_steps_per_second": 0.126 } ], "logging_steps": 1.0, "max_steps": 17405, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }