{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 37.1042463148092, "learning_rate": 3.4602076124567476e-08, "loss": 3.1037, "step": 1 }, { "epoch": 0.0, "grad_norm": 25.207250934654958, "learning_rate": 6.920415224913495e-08, "loss": 2.5937, "step": 2 }, { "epoch": 0.0, "grad_norm": 34.4866252238902, "learning_rate": 1.0380622837370243e-07, "loss": 3.1256, "step": 3 }, { "epoch": 0.0, "grad_norm": 37.287480505182415, "learning_rate": 1.384083044982699e-07, "loss": 3.208, "step": 4 }, { "epoch": 0.0, "grad_norm": 31.298895147565904, "learning_rate": 1.730103806228374e-07, "loss": 3.1655, "step": 5 }, { "epoch": 0.0, "grad_norm": 28.5099030032688, "learning_rate": 2.0761245674740486e-07, "loss": 2.9876, "step": 6 }, { "epoch": 0.0, "grad_norm": 30.91252751714465, "learning_rate": 2.422145328719723e-07, "loss": 3.0633, "step": 7 }, { "epoch": 0.0, "grad_norm": 34.07859194615419, "learning_rate": 2.768166089965398e-07, "loss": 3.0316, "step": 8 }, { "epoch": 0.0, "grad_norm": 25.74310170641338, "learning_rate": 3.114186851211073e-07, "loss": 2.5585, "step": 9 }, { "epoch": 0.0, "grad_norm": 29.18649162666028, "learning_rate": 3.460207612456748e-07, "loss": 2.9646, "step": 10 }, { "epoch": 0.0, "grad_norm": 35.07862998050571, "learning_rate": 3.8062283737024223e-07, "loss": 2.9688, "step": 11 }, { "epoch": 0.0, "grad_norm": 28.233677390485585, "learning_rate": 4.152249134948097e-07, "loss": 2.545, "step": 12 }, { "epoch": 0.0, "grad_norm": 31.86301568731232, "learning_rate": 4.498269896193772e-07, "loss": 3.1589, "step": 13 }, { "epoch": 0.0, "grad_norm": 31.297767276970973, "learning_rate": 4.844290657439446e-07, "loss": 2.9288, "step": 14 }, { "epoch": 0.0, "grad_norm": 31.327661562085037, "learning_rate": 5.190311418685121e-07, "loss": 3.187, "step": 15 }, { "epoch": 0.0, "grad_norm": 32.09228220848974, "learning_rate": 5.536332179930796e-07, "loss": 3.2806, "step": 16 }, { "epoch": 0.0, "grad_norm": 28.693100834543692, "learning_rate": 5.882352941176471e-07, "loss": 2.8149, "step": 17 }, { "epoch": 0.0, "grad_norm": 24.493063660463303, "learning_rate": 6.228373702422146e-07, "loss": 2.6128, "step": 18 }, { "epoch": 0.0, "grad_norm": 22.455923474790353, "learning_rate": 6.57439446366782e-07, "loss": 2.473, "step": 19 }, { "epoch": 0.0, "grad_norm": 29.804700164829907, "learning_rate": 6.920415224913496e-07, "loss": 2.8412, "step": 20 }, { "epoch": 0.0, "grad_norm": 26.638497790578334, "learning_rate": 7.266435986159171e-07, "loss": 2.5866, "step": 21 }, { "epoch": 0.0, "grad_norm": 21.04505500067805, "learning_rate": 7.612456747404845e-07, "loss": 2.3629, "step": 22 }, { "epoch": 0.0, "grad_norm": 20.49490636193544, "learning_rate": 7.958477508650519e-07, "loss": 2.0194, "step": 23 }, { "epoch": 0.0, "grad_norm": 19.347203073629032, "learning_rate": 8.304498269896194e-07, "loss": 1.9241, "step": 24 }, { "epoch": 0.0, "grad_norm": 18.464360513749167, "learning_rate": 8.650519031141868e-07, "loss": 1.9576, "step": 25 }, { "epoch": 0.0, "grad_norm": 15.683836407692967, "learning_rate": 8.996539792387544e-07, "loss": 1.8351, "step": 26 }, { "epoch": 0.0, "grad_norm": 15.676400385932228, "learning_rate": 9.342560553633219e-07, "loss": 1.8349, "step": 27 }, { "epoch": 0.0, "grad_norm": 13.392235449843433, "learning_rate": 9.688581314878893e-07, "loss": 1.6501, "step": 28 }, { "epoch": 0.0, "grad_norm": 16.527415581012864, "learning_rate": 1.0034602076124569e-06, "loss": 1.8315, "step": 29 }, { "epoch": 0.0, "grad_norm": 13.694825750097737, "learning_rate": 1.0380622837370243e-06, "loss": 1.5652, "step": 30 }, { "epoch": 0.0, "grad_norm": 18.763414643263694, "learning_rate": 1.0726643598615919e-06, "loss": 1.6854, "step": 31 }, { "epoch": 0.0, "grad_norm": 20.33865629455925, "learning_rate": 1.1072664359861592e-06, "loss": 1.5039, "step": 32 }, { "epoch": 0.0, "grad_norm": 12.763246152388538, "learning_rate": 1.1418685121107268e-06, "loss": 1.3142, "step": 33 }, { "epoch": 0.0, "grad_norm": 9.263903811995936, "learning_rate": 1.1764705882352942e-06, "loss": 1.2849, "step": 34 }, { "epoch": 0.0, "grad_norm": 6.439565470509954, "learning_rate": 1.2110726643598616e-06, "loss": 1.1053, "step": 35 }, { "epoch": 0.0, "grad_norm": 7.952049399538764, "learning_rate": 1.2456747404844292e-06, "loss": 1.1444, "step": 36 }, { "epoch": 0.0, "grad_norm": 8.01097935846218, "learning_rate": 1.2802768166089966e-06, "loss": 1.143, "step": 37 }, { "epoch": 0.0, "grad_norm": 9.871471103375363, "learning_rate": 1.314878892733564e-06, "loss": 1.1158, "step": 38 }, { "epoch": 0.0, "grad_norm": 8.728062914617196, "learning_rate": 1.3494809688581318e-06, "loss": 1.1664, "step": 39 }, { "epoch": 0.0, "grad_norm": 7.892186888010498, "learning_rate": 1.3840830449826992e-06, "loss": 1.0177, "step": 40 }, { "epoch": 0.0, "grad_norm": 8.409090504197081, "learning_rate": 1.4186851211072665e-06, "loss": 0.971, "step": 41 }, { "epoch": 0.0, "grad_norm": 5.601033155317786, "learning_rate": 1.4532871972318341e-06, "loss": 0.8399, "step": 42 }, { "epoch": 0.0, "grad_norm": 4.431069512628087, "learning_rate": 1.4878892733564015e-06, "loss": 0.8619, "step": 43 }, { "epoch": 0.0, "grad_norm": 4.253547266802608, "learning_rate": 1.522491349480969e-06, "loss": 0.8158, "step": 44 }, { "epoch": 0.0, "grad_norm": 3.451235861424146, "learning_rate": 1.5570934256055365e-06, "loss": 0.8109, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.3351939398703987, "learning_rate": 1.5916955017301039e-06, "loss": 0.7258, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.942797536420241, "learning_rate": 1.6262975778546713e-06, "loss": 0.6719, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.7074094109335363, "learning_rate": 1.6608996539792389e-06, "loss": 0.6014, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.9317496409320096, "learning_rate": 1.6955017301038063e-06, "loss": 0.6469, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.5351542200072026, "learning_rate": 1.7301038062283736e-06, "loss": 0.8268, "step": 50 }, { "epoch": 0.01, "grad_norm": 2.393778283674769, "learning_rate": 1.7647058823529414e-06, "loss": 0.6724, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.274905557406526, "learning_rate": 1.7993079584775088e-06, "loss": 0.6593, "step": 52 }, { "epoch": 0.01, "grad_norm": 2.8743291218680316, "learning_rate": 1.8339100346020764e-06, "loss": 0.7329, "step": 53 }, { "epoch": 0.01, "grad_norm": 2.5017134187558137, "learning_rate": 1.8685121107266438e-06, "loss": 0.7654, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.9875397548331608, "learning_rate": 1.9031141868512112e-06, "loss": 0.7153, "step": 55 }, { "epoch": 0.01, "grad_norm": 2.3596053776617127, "learning_rate": 1.9377162629757786e-06, "loss": 0.7149, "step": 56 }, { "epoch": 0.01, "grad_norm": 2.166401176661029, "learning_rate": 1.972318339100346e-06, "loss": 0.6804, "step": 57 }, { "epoch": 0.01, "grad_norm": 2.0951820308221527, "learning_rate": 2.0069204152249138e-06, "loss": 0.6972, "step": 58 }, { "epoch": 0.01, "grad_norm": 2.4657081002289796, "learning_rate": 2.041522491349481e-06, "loss": 0.6038, "step": 59 }, { "epoch": 0.01, "grad_norm": 2.760273057855939, "learning_rate": 2.0761245674740485e-06, "loss": 0.7049, "step": 60 }, { "epoch": 0.01, "grad_norm": 2.111128237734746, "learning_rate": 2.110726643598616e-06, "loss": 0.7256, "step": 61 }, { "epoch": 0.01, "grad_norm": 2.1541359357914462, "learning_rate": 2.1453287197231837e-06, "loss": 0.7687, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.169472440876441, "learning_rate": 2.1799307958477513e-06, "loss": 0.6799, "step": 63 }, { "epoch": 0.01, "grad_norm": 2.02142080617264, "learning_rate": 2.2145328719723185e-06, "loss": 0.5648, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.4144204497805015, "learning_rate": 2.249134948096886e-06, "loss": 0.7278, "step": 65 }, { "epoch": 0.01, "grad_norm": 2.4257419352048184, "learning_rate": 2.2837370242214537e-06, "loss": 0.691, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.3058060047031423, "learning_rate": 2.318339100346021e-06, "loss": 0.7196, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.963077781184842, "learning_rate": 2.3529411764705885e-06, "loss": 0.5515, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.312593059971846, "learning_rate": 2.387543252595156e-06, "loss": 0.7084, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.2385262578581604, "learning_rate": 2.4221453287197232e-06, "loss": 0.7047, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.1074433866202082, "learning_rate": 2.456747404844291e-06, "loss": 0.6752, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.1381474039570216, "learning_rate": 2.4913494809688584e-06, "loss": 0.7712, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.0078823847937706, "learning_rate": 2.5259515570934256e-06, "loss": 0.655, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.28321088070362, "learning_rate": 2.560553633217993e-06, "loss": 0.7052, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.310153415920439, "learning_rate": 2.5951557093425604e-06, "loss": 0.5941, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.165070016989384, "learning_rate": 2.629757785467128e-06, "loss": 0.6693, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.0797734502814063, "learning_rate": 2.6643598615916955e-06, "loss": 0.6087, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.2813786170710113, "learning_rate": 2.6989619377162636e-06, "loss": 0.7298, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.3968138762934, "learning_rate": 2.7335640138408307e-06, "loss": 0.769, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.130423945439428, "learning_rate": 2.7681660899653983e-06, "loss": 0.6529, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.3282052971504674, "learning_rate": 2.802768166089966e-06, "loss": 0.7926, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.282966381673334, "learning_rate": 2.837370242214533e-06, "loss": 0.6593, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.160804388058384, "learning_rate": 2.8719723183391007e-06, "loss": 0.7173, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.072121784879253, "learning_rate": 2.9065743944636683e-06, "loss": 0.6375, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.1103173238484403, "learning_rate": 2.9411764705882355e-06, "loss": 0.5965, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.3387135179333542, "learning_rate": 2.975778546712803e-06, "loss": 0.6975, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.3469544991445765, "learning_rate": 3.0103806228373707e-06, "loss": 0.6548, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.4499716727822958, "learning_rate": 3.044982698961938e-06, "loss": 0.7056, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.395912867646452, "learning_rate": 3.0795847750865054e-06, "loss": 0.6031, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.124375425140296, "learning_rate": 3.114186851211073e-06, "loss": 0.6674, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.4152158278140674, "learning_rate": 3.14878892733564e-06, "loss": 0.6561, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.645106503355264, "learning_rate": 3.1833910034602078e-06, "loss": 0.6203, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.170849019708104, "learning_rate": 3.2179930795847754e-06, "loss": 0.5372, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.085614854916659, "learning_rate": 3.2525951557093425e-06, "loss": 0.6429, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.1899737577611824, "learning_rate": 3.28719723183391e-06, "loss": 0.663, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.0251727260233188, "learning_rate": 3.3217993079584777e-06, "loss": 0.6597, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.3645519837714564, "learning_rate": 3.356401384083045e-06, "loss": 0.6406, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.4034145704451593, "learning_rate": 3.3910034602076125e-06, "loss": 0.7707, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.322115990647384, "learning_rate": 3.42560553633218e-06, "loss": 0.7373, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.326687603326004, "learning_rate": 3.4602076124567473e-06, "loss": 0.584, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.3114109061072936, "learning_rate": 3.4948096885813153e-06, "loss": 0.6832, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.195809015277355, "learning_rate": 3.529411764705883e-06, "loss": 0.609, "step": 102 }, { "epoch": 0.01, "grad_norm": 2.1867017908477737, "learning_rate": 3.5640138408304505e-06, "loss": 0.5833, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.0914140112949577, "learning_rate": 3.5986159169550177e-06, "loss": 0.6508, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.3076122744482297, "learning_rate": 3.6332179930795853e-06, "loss": 0.6108, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.1765304437298867, "learning_rate": 3.667820069204153e-06, "loss": 0.6641, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.6803492350052185, "learning_rate": 3.70242214532872e-06, "loss": 0.6669, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.199931218934597, "learning_rate": 3.7370242214532876e-06, "loss": 0.705, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.9358294457907268, "learning_rate": 3.7716262975778552e-06, "loss": 0.733, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.3313246015906355, "learning_rate": 3.8062283737024224e-06, "loss": 0.6512, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.7685678589026836, "learning_rate": 3.84083044982699e-06, "loss": 0.6651, "step": 111 }, { "epoch": 0.01, "grad_norm": 2.207787027694181, "learning_rate": 3.875432525951557e-06, "loss": 0.6167, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.343240858434686, "learning_rate": 3.910034602076125e-06, "loss": 0.7268, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.0089194459476643, "learning_rate": 3.944636678200692e-06, "loss": 0.5841, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.012684446361227, "learning_rate": 3.9792387543252595e-06, "loss": 0.5901, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.396831390118752, "learning_rate": 4.0138408304498275e-06, "loss": 0.706, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.2855393661408883, "learning_rate": 4.048442906574395e-06, "loss": 0.8205, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.015682700463381, "learning_rate": 4.083044982698962e-06, "loss": 0.5668, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.181291526395132, "learning_rate": 4.11764705882353e-06, "loss": 0.6465, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.2248428690994806, "learning_rate": 4.152249134948097e-06, "loss": 0.5714, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.202853322518201, "learning_rate": 4.186851211072664e-06, "loss": 0.6137, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.9078978560651283, "learning_rate": 4.221453287197232e-06, "loss": 0.5781, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.283481882986506, "learning_rate": 4.2560553633218e-06, "loss": 0.7298, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.2045034916450135, "learning_rate": 4.2906574394463675e-06, "loss": 0.6633, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.064486144309271, "learning_rate": 4.325259515570935e-06, "loss": 0.6187, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.191299849944456, "learning_rate": 4.359861591695503e-06, "loss": 0.6644, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.256115550781429, "learning_rate": 4.39446366782007e-06, "loss": 0.7002, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.4171539822945998, "learning_rate": 4.429065743944637e-06, "loss": 0.642, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.9676280086993452, "learning_rate": 4.463667820069205e-06, "loss": 0.6169, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.315131249667166, "learning_rate": 4.498269896193772e-06, "loss": 0.6301, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.016187038955443, "learning_rate": 4.532871972318339e-06, "loss": 0.6258, "step": 131 }, { "epoch": 0.01, "grad_norm": 2.1690571819984803, "learning_rate": 4.567474048442907e-06, "loss": 0.6108, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.228343012890392, "learning_rate": 4.6020761245674745e-06, "loss": 0.7412, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.3230648080604444, "learning_rate": 4.636678200692042e-06, "loss": 0.7101, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.2702336427350787, "learning_rate": 4.67128027681661e-06, "loss": 0.6535, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.9953352017886066, "learning_rate": 4.705882352941177e-06, "loss": 0.5654, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.1335665240811483, "learning_rate": 4.740484429065744e-06, "loss": 0.7365, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.0081551952295067, "learning_rate": 4.775086505190312e-06, "loss": 0.5654, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.608564793222911, "learning_rate": 4.809688581314879e-06, "loss": 0.6156, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.2903018042810865, "learning_rate": 4.8442906574394464e-06, "loss": 0.5923, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.606584727809493, "learning_rate": 4.8788927335640145e-06, "loss": 0.6669, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.2676586545153983, "learning_rate": 4.913494809688582e-06, "loss": 0.5231, "step": 142 }, { "epoch": 0.01, "grad_norm": 2.08733797395526, "learning_rate": 4.948096885813149e-06, "loss": 0.6447, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.280353041745458, "learning_rate": 4.982698961937717e-06, "loss": 0.7165, "step": 144 }, { "epoch": 0.02, "grad_norm": 2.2993638112411756, "learning_rate": 5.017301038062284e-06, "loss": 0.6171, "step": 145 }, { "epoch": 0.02, "grad_norm": 2.1155138048920445, "learning_rate": 5.051903114186851e-06, "loss": 0.678, "step": 146 }, { "epoch": 0.02, "grad_norm": 2.092000766414222, "learning_rate": 5.086505190311419e-06, "loss": 0.5561, "step": 147 }, { "epoch": 0.02, "grad_norm": 2.4110387919086103, "learning_rate": 5.121107266435986e-06, "loss": 0.6236, "step": 148 }, { "epoch": 0.02, "grad_norm": 2.4295417190634003, "learning_rate": 5.155709342560554e-06, "loss": 0.7297, "step": 149 }, { "epoch": 0.02, "grad_norm": 2.1821328342305524, "learning_rate": 5.190311418685121e-06, "loss": 0.8451, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.9821707599109635, "learning_rate": 5.224913494809689e-06, "loss": 0.5909, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.935677871558701, "learning_rate": 5.259515570934256e-06, "loss": 0.6249, "step": 152 }, { "epoch": 0.02, "grad_norm": 2.2167343078418633, "learning_rate": 5.294117647058824e-06, "loss": 0.6286, "step": 153 }, { "epoch": 0.02, "grad_norm": 2.177970005050448, "learning_rate": 5.328719723183391e-06, "loss": 0.631, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.2333962515280295, "learning_rate": 5.363321799307959e-06, "loss": 0.6793, "step": 155 }, { "epoch": 0.02, "grad_norm": 2.1950449323297736, "learning_rate": 5.397923875432527e-06, "loss": 0.6113, "step": 156 }, { "epoch": 0.02, "grad_norm": 2.172906333435821, "learning_rate": 5.4325259515570934e-06, "loss": 0.6419, "step": 157 }, { "epoch": 0.02, "grad_norm": 2.219433419070003, "learning_rate": 5.4671280276816615e-06, "loss": 0.6086, "step": 158 }, { "epoch": 0.02, "grad_norm": 2.1272302168820723, "learning_rate": 5.501730103806229e-06, "loss": 0.6471, "step": 159 }, { "epoch": 0.02, "grad_norm": 2.2725105069940783, "learning_rate": 5.536332179930797e-06, "loss": 0.5728, "step": 160 }, { "epoch": 0.02, "grad_norm": 2.3320622478808186, "learning_rate": 5.570934256055364e-06, "loss": 0.6638, "step": 161 }, { "epoch": 0.02, "grad_norm": 2.4359930813276236, "learning_rate": 5.605536332179932e-06, "loss": 0.6696, "step": 162 }, { "epoch": 0.02, "grad_norm": 2.225861962560492, "learning_rate": 5.640138408304498e-06, "loss": 0.6584, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.2669038485616375, "learning_rate": 5.674740484429066e-06, "loss": 0.6774, "step": 164 }, { "epoch": 0.02, "grad_norm": 2.3265153532039795, "learning_rate": 5.709342560553633e-06, "loss": 0.6741, "step": 165 }, { "epoch": 0.02, "grad_norm": 2.419471267005024, "learning_rate": 5.743944636678201e-06, "loss": 0.6057, "step": 166 }, { "epoch": 0.02, "grad_norm": 2.276964154368608, "learning_rate": 5.7785467128027686e-06, "loss": 0.5884, "step": 167 }, { "epoch": 0.02, "grad_norm": 2.410507009541092, "learning_rate": 5.8131487889273366e-06, "loss": 0.6878, "step": 168 }, { "epoch": 0.02, "grad_norm": 2.083503032279742, "learning_rate": 5.847750865051903e-06, "loss": 0.6552, "step": 169 }, { "epoch": 0.02, "grad_norm": 2.2509896017989117, "learning_rate": 5.882352941176471e-06, "loss": 0.6079, "step": 170 }, { "epoch": 0.02, "grad_norm": 2.1859289065791256, "learning_rate": 5.916955017301038e-06, "loss": 0.621, "step": 171 }, { "epoch": 0.02, "grad_norm": 2.147850941536798, "learning_rate": 5.951557093425606e-06, "loss": 0.5711, "step": 172 }, { "epoch": 0.02, "grad_norm": 2.1062480954749314, "learning_rate": 5.986159169550173e-06, "loss": 0.6168, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.025096030131394, "learning_rate": 6.020761245674741e-06, "loss": 0.6314, "step": 174 }, { "epoch": 0.02, "grad_norm": 2.1106251684861093, "learning_rate": 6.055363321799308e-06, "loss": 0.6165, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.9386258712780828, "learning_rate": 6.089965397923876e-06, "loss": 0.5944, "step": 176 }, { "epoch": 0.02, "grad_norm": 2.439704310973515, "learning_rate": 6.124567474048443e-06, "loss": 0.7301, "step": 177 }, { "epoch": 0.02, "grad_norm": 2.193053790974262, "learning_rate": 6.159169550173011e-06, "loss": 0.5184, "step": 178 }, { "epoch": 0.02, "grad_norm": 2.1972037628573364, "learning_rate": 6.193771626297579e-06, "loss": 0.6819, "step": 179 }, { "epoch": 0.02, "grad_norm": 2.108660851248478, "learning_rate": 6.228373702422146e-06, "loss": 0.6435, "step": 180 }, { "epoch": 0.02, "grad_norm": 2.36479259661094, "learning_rate": 6.262975778546714e-06, "loss": 0.5749, "step": 181 }, { "epoch": 0.02, "grad_norm": 2.0278184423083347, "learning_rate": 6.29757785467128e-06, "loss": 0.5742, "step": 182 }, { "epoch": 0.02, "grad_norm": 2.3224464327459984, "learning_rate": 6.332179930795848e-06, "loss": 0.6336, "step": 183 }, { "epoch": 0.02, "grad_norm": 2.3133190199193168, "learning_rate": 6.3667820069204156e-06, "loss": 0.6914, "step": 184 }, { "epoch": 0.02, "grad_norm": 2.188171505795143, "learning_rate": 6.401384083044984e-06, "loss": 0.7221, "step": 185 }, { "epoch": 0.02, "grad_norm": 2.58564757756167, "learning_rate": 6.435986159169551e-06, "loss": 0.6711, "step": 186 }, { "epoch": 0.02, "grad_norm": 2.122268004241992, "learning_rate": 6.470588235294119e-06, "loss": 0.6429, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.9690551649287422, "learning_rate": 6.505190311418685e-06, "loss": 0.6549, "step": 188 }, { "epoch": 0.02, "grad_norm": 2.3770166505133887, "learning_rate": 6.539792387543253e-06, "loss": 0.7386, "step": 189 }, { "epoch": 0.02, "grad_norm": 2.332845067178479, "learning_rate": 6.57439446366782e-06, "loss": 0.7431, "step": 190 }, { "epoch": 0.02, "grad_norm": 2.292052198271124, "learning_rate": 6.608996539792388e-06, "loss": 0.569, "step": 191 }, { "epoch": 0.02, "grad_norm": 2.2147634719443836, "learning_rate": 6.6435986159169555e-06, "loss": 0.7045, "step": 192 }, { "epoch": 0.02, "grad_norm": 2.328089977091099, "learning_rate": 6.6782006920415235e-06, "loss": 0.6364, "step": 193 }, { "epoch": 0.02, "grad_norm": 2.274054083618043, "learning_rate": 6.71280276816609e-06, "loss": 0.6056, "step": 194 }, { "epoch": 0.02, "grad_norm": 2.3784158616015643, "learning_rate": 6.747404844290658e-06, "loss": 0.6709, "step": 195 }, { "epoch": 0.02, "grad_norm": 2.0892411869210346, "learning_rate": 6.782006920415225e-06, "loss": 0.562, "step": 196 }, { "epoch": 0.02, "grad_norm": 2.1091681111983496, "learning_rate": 6.816608996539793e-06, "loss": 0.5607, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.8902408651238167, "learning_rate": 6.85121107266436e-06, "loss": 0.707, "step": 198 }, { "epoch": 0.02, "grad_norm": 2.561780143261129, "learning_rate": 6.885813148788928e-06, "loss": 0.6728, "step": 199 }, { "epoch": 0.02, "grad_norm": 2.0776566394374365, "learning_rate": 6.9204152249134946e-06, "loss": 0.6455, "step": 200 }, { "epoch": 0.02, "grad_norm": 2.215431897046199, "learning_rate": 6.9550173010380626e-06, "loss": 0.5443, "step": 201 }, { "epoch": 0.02, "grad_norm": 2.0873081020830857, "learning_rate": 6.989619377162631e-06, "loss": 0.6858, "step": 202 }, { "epoch": 0.02, "grad_norm": 2.1124011856789098, "learning_rate": 7.024221453287198e-06, "loss": 0.5993, "step": 203 }, { "epoch": 0.02, "grad_norm": 2.4320988871766493, "learning_rate": 7.058823529411766e-06, "loss": 0.6647, "step": 204 }, { "epoch": 0.02, "grad_norm": 2.035002393310255, "learning_rate": 7.093425605536333e-06, "loss": 0.619, "step": 205 }, { "epoch": 0.02, "grad_norm": 2.0914719636443317, "learning_rate": 7.128027681660901e-06, "loss": 0.7546, "step": 206 }, { "epoch": 0.02, "grad_norm": 2.008874075320383, "learning_rate": 7.162629757785467e-06, "loss": 0.6114, "step": 207 }, { "epoch": 0.02, "grad_norm": 2.193799041028231, "learning_rate": 7.197231833910035e-06, "loss": 0.7129, "step": 208 }, { "epoch": 0.02, "grad_norm": 2.197495559283392, "learning_rate": 7.2318339100346025e-06, "loss": 0.7395, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.8616873688955218, "learning_rate": 7.2664359861591705e-06, "loss": 0.5019, "step": 210 }, { "epoch": 0.02, "grad_norm": 2.432719510249827, "learning_rate": 7.301038062283738e-06, "loss": 0.7087, "step": 211 }, { "epoch": 0.02, "grad_norm": 2.3669190124323127, "learning_rate": 7.335640138408306e-06, "loss": 0.6296, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.1872694291480785, "learning_rate": 7.370242214532872e-06, "loss": 0.6005, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.7828744622515829, "learning_rate": 7.40484429065744e-06, "loss": 0.6305, "step": 214 }, { "epoch": 0.02, "grad_norm": 2.0040108138250723, "learning_rate": 7.439446366782007e-06, "loss": 0.6873, "step": 215 }, { "epoch": 0.02, "grad_norm": 2.0796961366745226, "learning_rate": 7.474048442906575e-06, "loss": 0.6407, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.1967248607492937, "learning_rate": 7.508650519031142e-06, "loss": 0.6707, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.9175115859089473, "learning_rate": 7.5432525951557104e-06, "loss": 0.6526, "step": 218 }, { "epoch": 0.02, "grad_norm": 2.0514016284982213, "learning_rate": 7.577854671280277e-06, "loss": 0.6123, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.5117956633223484, "learning_rate": 7.612456747404845e-06, "loss": 0.6014, "step": 220 }, { "epoch": 0.02, "grad_norm": 2.2156322226471796, "learning_rate": 7.647058823529411e-06, "loss": 0.6571, "step": 221 }, { "epoch": 0.02, "grad_norm": 2.2926948007219115, "learning_rate": 7.68166089965398e-06, "loss": 0.774, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.9721980908588659, "learning_rate": 7.716262975778547e-06, "loss": 0.6152, "step": 223 }, { "epoch": 0.02, "grad_norm": 2.111098976593361, "learning_rate": 7.750865051903114e-06, "loss": 0.6419, "step": 224 }, { "epoch": 0.02, "grad_norm": 2.300877354238802, "learning_rate": 7.785467128027683e-06, "loss": 0.7069, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.2866412650431434, "learning_rate": 7.82006920415225e-06, "loss": 0.6085, "step": 226 }, { "epoch": 0.02, "grad_norm": 2.0848079985085226, "learning_rate": 7.854671280276818e-06, "loss": 0.5879, "step": 227 }, { "epoch": 0.02, "grad_norm": 2.201530251445851, "learning_rate": 7.889273356401385e-06, "loss": 0.7731, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.0570052426900394, "learning_rate": 7.923875432525952e-06, "loss": 0.5881, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.1400020806991757, "learning_rate": 7.958477508650519e-06, "loss": 0.694, "step": 230 }, { "epoch": 0.02, "grad_norm": 2.1227819097860645, "learning_rate": 7.993079584775088e-06, "loss": 0.7255, "step": 231 }, { "epoch": 0.02, "grad_norm": 2.137417177560603, "learning_rate": 8.027681660899655e-06, "loss": 0.5723, "step": 232 }, { "epoch": 0.02, "grad_norm": 2.1108437635031136, "learning_rate": 8.062283737024222e-06, "loss": 0.6994, "step": 233 }, { "epoch": 0.02, "grad_norm": 2.124414466611215, "learning_rate": 8.09688581314879e-06, "loss": 0.6547, "step": 234 }, { "epoch": 0.02, "grad_norm": 2.0520237300562854, "learning_rate": 8.131487889273357e-06, "loss": 0.6525, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.9714185147383696, "learning_rate": 8.166089965397924e-06, "loss": 0.6038, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.939078039152394, "learning_rate": 8.200692041522493e-06, "loss": 0.711, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.0670237054998393, "learning_rate": 8.23529411764706e-06, "loss": 0.614, "step": 238 }, { "epoch": 0.02, "grad_norm": 2.242190530112608, "learning_rate": 8.269896193771627e-06, "loss": 0.6014, "step": 239 }, { "epoch": 0.02, "grad_norm": 2.1485542120755152, "learning_rate": 8.304498269896194e-06, "loss": 0.6358, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.8482103878471823, "learning_rate": 8.339100346020761e-06, "loss": 0.5711, "step": 241 }, { "epoch": 0.03, "grad_norm": 1.9737201478872424, "learning_rate": 8.373702422145328e-06, "loss": 0.6359, "step": 242 }, { "epoch": 0.03, "grad_norm": 2.4163559055070887, "learning_rate": 8.408304498269897e-06, "loss": 0.7363, "step": 243 }, { "epoch": 0.03, "grad_norm": 2.244452576938016, "learning_rate": 8.442906574394465e-06, "loss": 0.6451, "step": 244 }, { "epoch": 0.03, "grad_norm": 2.086004638946662, "learning_rate": 8.477508650519032e-06, "loss": 0.7345, "step": 245 }, { "epoch": 0.03, "grad_norm": 2.128590628871988, "learning_rate": 8.5121107266436e-06, "loss": 0.7326, "step": 246 }, { "epoch": 0.03, "grad_norm": 2.235562449068478, "learning_rate": 8.546712802768166e-06, "loss": 0.7656, "step": 247 }, { "epoch": 0.03, "grad_norm": 2.2509200985615063, "learning_rate": 8.581314878892735e-06, "loss": 0.6271, "step": 248 }, { "epoch": 0.03, "grad_norm": 2.295331842170622, "learning_rate": 8.615916955017302e-06, "loss": 0.6678, "step": 249 }, { "epoch": 0.03, "grad_norm": 2.391429221795401, "learning_rate": 8.65051903114187e-06, "loss": 0.7425, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.9529324621840602, "learning_rate": 8.685121107266436e-06, "loss": 0.5745, "step": 251 }, { "epoch": 0.03, "grad_norm": 2.246043588101345, "learning_rate": 8.719723183391005e-06, "loss": 0.6345, "step": 252 }, { "epoch": 0.03, "grad_norm": 2.4904984344511076, "learning_rate": 8.75432525951557e-06, "loss": 0.6939, "step": 253 }, { "epoch": 0.03, "grad_norm": 2.1642783247420363, "learning_rate": 8.78892733564014e-06, "loss": 0.6032, "step": 254 }, { "epoch": 0.03, "grad_norm": 2.1940931042243257, "learning_rate": 8.823529411764707e-06, "loss": 0.7277, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.8948954672045033, "learning_rate": 8.858131487889274e-06, "loss": 0.6893, "step": 256 }, { "epoch": 0.03, "grad_norm": 2.4582031636820525, "learning_rate": 8.892733564013841e-06, "loss": 0.7363, "step": 257 }, { "epoch": 0.03, "grad_norm": 2.3058632907633654, "learning_rate": 8.92733564013841e-06, "loss": 0.7746, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.8228869043171054, "learning_rate": 8.961937716262975e-06, "loss": 0.487, "step": 259 }, { "epoch": 0.03, "grad_norm": 2.219005308745244, "learning_rate": 8.996539792387544e-06, "loss": 0.7147, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.9983314588691001, "learning_rate": 9.031141868512112e-06, "loss": 0.6375, "step": 261 }, { "epoch": 0.03, "grad_norm": 2.5810389232454507, "learning_rate": 9.065743944636679e-06, "loss": 0.8198, "step": 262 }, { "epoch": 0.03, "grad_norm": 2.319366870420018, "learning_rate": 9.100346020761246e-06, "loss": 0.7102, "step": 263 }, { "epoch": 0.03, "grad_norm": 2.2528281665453656, "learning_rate": 9.134948096885815e-06, "loss": 0.6178, "step": 264 }, { "epoch": 0.03, "grad_norm": 2.268866438407876, "learning_rate": 9.16955017301038e-06, "loss": 0.649, "step": 265 }, { "epoch": 0.03, "grad_norm": 2.1190340028998707, "learning_rate": 9.204152249134949e-06, "loss": 0.6279, "step": 266 }, { "epoch": 0.03, "grad_norm": 2.0714620681045663, "learning_rate": 9.238754325259516e-06, "loss": 0.6938, "step": 267 }, { "epoch": 0.03, "grad_norm": 2.325221307942943, "learning_rate": 9.273356401384083e-06, "loss": 0.5867, "step": 268 }, { "epoch": 0.03, "grad_norm": 2.2470942028995986, "learning_rate": 9.307958477508652e-06, "loss": 0.6852, "step": 269 }, { "epoch": 0.03, "grad_norm": 2.007572350176665, "learning_rate": 9.34256055363322e-06, "loss": 0.5481, "step": 270 }, { "epoch": 0.03, "grad_norm": 2.0257482560883275, "learning_rate": 9.377162629757787e-06, "loss": 0.6386, "step": 271 }, { "epoch": 0.03, "grad_norm": 2.0485510426599878, "learning_rate": 9.411764705882354e-06, "loss": 0.7489, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.9623446612734317, "learning_rate": 9.446366782006921e-06, "loss": 0.5764, "step": 273 }, { "epoch": 0.03, "grad_norm": 2.127396095227505, "learning_rate": 9.480968858131488e-06, "loss": 0.6338, "step": 274 }, { "epoch": 0.03, "grad_norm": 2.215498491322338, "learning_rate": 9.515570934256057e-06, "loss": 0.758, "step": 275 }, { "epoch": 0.03, "grad_norm": 2.08023339836091, "learning_rate": 9.550173010380624e-06, "loss": 0.6621, "step": 276 }, { "epoch": 0.03, "grad_norm": 1.9378232010147092, "learning_rate": 9.584775086505191e-06, "loss": 0.6002, "step": 277 }, { "epoch": 0.03, "grad_norm": 2.1412793080715358, "learning_rate": 9.619377162629759e-06, "loss": 0.6288, "step": 278 }, { "epoch": 0.03, "grad_norm": 2.095326824165883, "learning_rate": 9.653979238754326e-06, "loss": 0.6168, "step": 279 }, { "epoch": 0.03, "grad_norm": 2.093548208464301, "learning_rate": 9.688581314878893e-06, "loss": 0.6185, "step": 280 }, { "epoch": 0.03, "grad_norm": 2.0683217073155142, "learning_rate": 9.723183391003462e-06, "loss": 0.6579, "step": 281 }, { "epoch": 0.03, "grad_norm": 2.2050921361510256, "learning_rate": 9.757785467128029e-06, "loss": 0.6564, "step": 282 }, { "epoch": 0.03, "grad_norm": 2.0666754378724512, "learning_rate": 9.792387543252596e-06, "loss": 0.6938, "step": 283 }, { "epoch": 0.03, "grad_norm": 2.0255579239228676, "learning_rate": 9.826989619377163e-06, "loss": 0.5832, "step": 284 }, { "epoch": 0.03, "grad_norm": 2.2059232192312477, "learning_rate": 9.86159169550173e-06, "loss": 0.6408, "step": 285 }, { "epoch": 0.03, "grad_norm": 2.227961737598211, "learning_rate": 9.896193771626298e-06, "loss": 0.6834, "step": 286 }, { "epoch": 0.03, "grad_norm": 2.384395993502413, "learning_rate": 9.930795847750866e-06, "loss": 0.6507, "step": 287 }, { "epoch": 0.03, "grad_norm": 2.2259501520536786, "learning_rate": 9.965397923875434e-06, "loss": 0.8218, "step": 288 }, { "epoch": 0.03, "grad_norm": 2.2153438165060004, "learning_rate": 1e-05, "loss": 0.7089, "step": 289 }, { "epoch": 0.03, "grad_norm": 2.090933521636028, "learning_rate": 9.99999971661075e-06, "loss": 0.6669, "step": 290 }, { "epoch": 0.03, "grad_norm": 2.2188852773516707, "learning_rate": 9.999998866443035e-06, "loss": 0.8329, "step": 291 }, { "epoch": 0.03, "grad_norm": 1.9643905919582283, "learning_rate": 9.999997449496947e-06, "loss": 0.604, "step": 292 }, { "epoch": 0.03, "grad_norm": 1.7302965204706007, "learning_rate": 9.999995465772652e-06, "loss": 0.5917, "step": 293 }, { "epoch": 0.03, "grad_norm": 1.836246496796778, "learning_rate": 9.99999291527037e-06, "loss": 0.5272, "step": 294 }, { "epoch": 0.03, "grad_norm": 2.2820920836002587, "learning_rate": 9.999989797990391e-06, "loss": 0.7376, "step": 295 }, { "epoch": 0.03, "grad_norm": 2.2425304223811446, "learning_rate": 9.999986113933071e-06, "loss": 0.6333, "step": 296 }, { "epoch": 0.03, "grad_norm": 2.021448989644418, "learning_rate": 9.999981863098825e-06, "loss": 0.5886, "step": 297 }, { "epoch": 0.03, "grad_norm": 2.1689623016635897, "learning_rate": 9.999977045488135e-06, "loss": 0.6014, "step": 298 }, { "epoch": 0.03, "grad_norm": 2.3336741309421347, "learning_rate": 9.999971661101551e-06, "loss": 0.7371, "step": 299 }, { "epoch": 0.03, "grad_norm": 2.1629679103619783, "learning_rate": 9.999965709939679e-06, "loss": 0.6577, "step": 300 }, { "epoch": 0.03, "grad_norm": 1.8935808966516685, "learning_rate": 9.999959192003194e-06, "loss": 0.5906, "step": 301 }, { "epoch": 0.03, "grad_norm": 2.1444391808967738, "learning_rate": 9.999952107292836e-06, "loss": 0.7357, "step": 302 }, { "epoch": 0.03, "grad_norm": 2.022296007147585, "learning_rate": 9.999944455809408e-06, "loss": 0.574, "step": 303 }, { "epoch": 0.03, "grad_norm": 2.063507571811703, "learning_rate": 9.999936237553777e-06, "loss": 0.6103, "step": 304 }, { "epoch": 0.03, "grad_norm": 2.1313448786683913, "learning_rate": 9.999927452526877e-06, "loss": 0.6104, "step": 305 }, { "epoch": 0.03, "grad_norm": 2.112051884258629, "learning_rate": 9.999918100729698e-06, "loss": 0.657, "step": 306 }, { "epoch": 0.03, "grad_norm": 2.2604850689998757, "learning_rate": 9.999908182163306e-06, "loss": 0.7052, "step": 307 }, { "epoch": 0.03, "grad_norm": 2.1654923977851617, "learning_rate": 9.999897696828822e-06, "loss": 0.6801, "step": 308 }, { "epoch": 0.03, "grad_norm": 1.996921654859269, "learning_rate": 9.999886644727436e-06, "loss": 0.6856, "step": 309 }, { "epoch": 0.03, "grad_norm": 2.145453425352305, "learning_rate": 9.999875025860401e-06, "loss": 0.6513, "step": 310 }, { "epoch": 0.03, "grad_norm": 2.234415993232513, "learning_rate": 9.999862840229033e-06, "loss": 0.6971, "step": 311 }, { "epoch": 0.03, "grad_norm": 2.0645659727143943, "learning_rate": 9.999850087834715e-06, "loss": 0.7306, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.8744568621278508, "learning_rate": 9.99983676867889e-06, "loss": 0.638, "step": 313 }, { "epoch": 0.03, "grad_norm": 2.3222295206374604, "learning_rate": 9.999822882763068e-06, "loss": 0.6553, "step": 314 }, { "epoch": 0.03, "grad_norm": 2.099484652851686, "learning_rate": 9.999808430088826e-06, "loss": 0.7374, "step": 315 }, { "epoch": 0.03, "grad_norm": 2.228145088882417, "learning_rate": 9.999793410657802e-06, "loss": 0.8169, "step": 316 }, { "epoch": 0.03, "grad_norm": 2.1291708934516986, "learning_rate": 9.999777824471694e-06, "loss": 0.631, "step": 317 }, { "epoch": 0.03, "grad_norm": 2.080530025890949, "learning_rate": 9.999761671532273e-06, "loss": 0.6407, "step": 318 }, { "epoch": 0.03, "grad_norm": 2.119009430136004, "learning_rate": 9.999744951841367e-06, "loss": 0.6849, "step": 319 }, { "epoch": 0.03, "grad_norm": 1.8955703890753102, "learning_rate": 9.999727665400876e-06, "loss": 0.7005, "step": 320 }, { "epoch": 0.03, "grad_norm": 2.1038990841547496, "learning_rate": 9.999709812212756e-06, "loss": 0.6685, "step": 321 }, { "epoch": 0.03, "grad_norm": 2.2589247469878213, "learning_rate": 9.99969139227903e-06, "loss": 0.7754, "step": 322 }, { "epoch": 0.03, "grad_norm": 2.1825354390943943, "learning_rate": 9.99967240560179e-06, "loss": 0.7121, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.916129116293965, "learning_rate": 9.999652852183184e-06, "loss": 0.5697, "step": 324 }, { "epoch": 0.03, "grad_norm": 2.0643197115454033, "learning_rate": 9.999632732025428e-06, "loss": 0.6152, "step": 325 }, { "epoch": 0.03, "grad_norm": 2.0612939818120957, "learning_rate": 9.99961204513081e-06, "loss": 0.6136, "step": 326 }, { "epoch": 0.03, "grad_norm": 2.216817383090462, "learning_rate": 9.999590791501665e-06, "loss": 0.5763, "step": 327 }, { "epoch": 0.03, "grad_norm": 2.0495658313065555, "learning_rate": 9.999568971140409e-06, "loss": 0.6226, "step": 328 }, { "epoch": 0.03, "grad_norm": 2.0958367292073277, "learning_rate": 9.999546584049513e-06, "loss": 0.7229, "step": 329 }, { "epoch": 0.03, "grad_norm": 2.0926501956699592, "learning_rate": 9.999523630231516e-06, "loss": 0.7376, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.9068405784141171, "learning_rate": 9.999500109689018e-06, "loss": 0.5987, "step": 331 }, { "epoch": 0.03, "grad_norm": 2.0006081400254963, "learning_rate": 9.999476022424688e-06, "loss": 0.6085, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.9085783103448943, "learning_rate": 9.999451368441254e-06, "loss": 0.564, "step": 333 }, { "epoch": 0.03, "grad_norm": 2.1115680818865235, "learning_rate": 9.999426147741512e-06, "loss": 0.6871, "step": 334 }, { "epoch": 0.03, "grad_norm": 2.1237154103762186, "learning_rate": 9.999400360328318e-06, "loss": 0.6901, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.929206892415265, "learning_rate": 9.999374006204601e-06, "loss": 0.6018, "step": 336 }, { "epoch": 0.04, "grad_norm": 2.0204061701835094, "learning_rate": 9.999347085373343e-06, "loss": 0.645, "step": 337 }, { "epoch": 0.04, "grad_norm": 1.9226738874188818, "learning_rate": 9.999319597837599e-06, "loss": 0.6821, "step": 338 }, { "epoch": 0.04, "grad_norm": 2.107623603422912, "learning_rate": 9.99929154360048e-06, "loss": 0.6378, "step": 339 }, { "epoch": 0.04, "grad_norm": 1.8743119821525225, "learning_rate": 9.999262922665172e-06, "loss": 0.59, "step": 340 }, { "epoch": 0.04, "grad_norm": 1.8550368642600081, "learning_rate": 9.999233735034916e-06, "loss": 0.6298, "step": 341 }, { "epoch": 0.04, "grad_norm": 2.079258305023673, "learning_rate": 9.999203980713023e-06, "loss": 0.7754, "step": 342 }, { "epoch": 0.04, "grad_norm": 2.16644850862517, "learning_rate": 9.999173659702863e-06, "loss": 0.5724, "step": 343 }, { "epoch": 0.04, "grad_norm": 1.7680175701749203, "learning_rate": 9.999142772007875e-06, "loss": 0.6038, "step": 344 }, { "epoch": 0.04, "grad_norm": 1.9697469124412608, "learning_rate": 9.999111317631559e-06, "loss": 0.6786, "step": 345 }, { "epoch": 0.04, "grad_norm": 2.0214076901251707, "learning_rate": 9.999079296577482e-06, "loss": 0.7071, "step": 346 }, { "epoch": 0.04, "grad_norm": 2.1220150741444526, "learning_rate": 9.99904670884927e-06, "loss": 0.7243, "step": 347 }, { "epoch": 0.04, "grad_norm": 2.023388262077159, "learning_rate": 9.999013554450624e-06, "loss": 0.8155, "step": 348 }, { "epoch": 0.04, "grad_norm": 2.047321068044065, "learning_rate": 9.998979833385296e-06, "loss": 0.6606, "step": 349 }, { "epoch": 0.04, "grad_norm": 2.090748901325982, "learning_rate": 9.998945545657113e-06, "loss": 0.6928, "step": 350 }, { "epoch": 0.04, "grad_norm": 2.052795459098353, "learning_rate": 9.998910691269957e-06, "loss": 0.8328, "step": 351 }, { "epoch": 0.04, "grad_norm": 2.402064265656104, "learning_rate": 9.998875270227781e-06, "loss": 0.672, "step": 352 }, { "epoch": 0.04, "grad_norm": 2.1481436238474636, "learning_rate": 9.998839282534602e-06, "loss": 0.6665, "step": 353 }, { "epoch": 0.04, "grad_norm": 2.2395539693277526, "learning_rate": 9.998802728194496e-06, "loss": 0.7083, "step": 354 }, { "epoch": 0.04, "grad_norm": 1.9776107871723176, "learning_rate": 9.998765607211612e-06, "loss": 0.6582, "step": 355 }, { "epoch": 0.04, "grad_norm": 2.147090458916406, "learning_rate": 9.99872791959015e-06, "loss": 0.6026, "step": 356 }, { "epoch": 0.04, "grad_norm": 2.034674606820822, "learning_rate": 9.99868966533439e-06, "loss": 0.6911, "step": 357 }, { "epoch": 0.04, "grad_norm": 2.0951350122740298, "learning_rate": 9.998650844448663e-06, "loss": 0.6416, "step": 358 }, { "epoch": 0.04, "grad_norm": 2.089497478746895, "learning_rate": 9.998611456937373e-06, "loss": 0.6531, "step": 359 }, { "epoch": 0.04, "grad_norm": 2.0664793585495898, "learning_rate": 9.998571502804982e-06, "loss": 0.6725, "step": 360 }, { "epoch": 0.04, "grad_norm": 1.8814458336505542, "learning_rate": 9.998530982056021e-06, "loss": 0.6481, "step": 361 }, { "epoch": 0.04, "grad_norm": 1.774755021585096, "learning_rate": 9.99848989469508e-06, "loss": 0.6176, "step": 362 }, { "epoch": 0.04, "grad_norm": 2.1620141766232686, "learning_rate": 9.998448240726822e-06, "loss": 0.6294, "step": 363 }, { "epoch": 0.04, "grad_norm": 2.288599249402456, "learning_rate": 9.998406020155964e-06, "loss": 0.6562, "step": 364 }, { "epoch": 0.04, "grad_norm": 1.9908199779664653, "learning_rate": 9.998363232987294e-06, "loss": 0.687, "step": 365 }, { "epoch": 0.04, "grad_norm": 2.139573273038414, "learning_rate": 9.998319879225662e-06, "loss": 0.591, "step": 366 }, { "epoch": 0.04, "grad_norm": 1.951025573732339, "learning_rate": 9.998275958875983e-06, "loss": 0.7088, "step": 367 }, { "epoch": 0.04, "grad_norm": 2.185154704796897, "learning_rate": 9.998231471943234e-06, "loss": 0.7013, "step": 368 }, { "epoch": 0.04, "grad_norm": 2.093786252997598, "learning_rate": 9.998186418432459e-06, "loss": 0.7835, "step": 369 }, { "epoch": 0.04, "grad_norm": 2.2585179169164817, "learning_rate": 9.998140798348766e-06, "loss": 0.7529, "step": 370 }, { "epoch": 0.04, "grad_norm": 2.086435056903612, "learning_rate": 9.998094611697322e-06, "loss": 0.783, "step": 371 }, { "epoch": 0.04, "grad_norm": 2.076639780170476, "learning_rate": 9.998047858483369e-06, "loss": 0.7017, "step": 372 }, { "epoch": 0.04, "grad_norm": 2.1634960145020017, "learning_rate": 9.9980005387122e-06, "loss": 0.7516, "step": 373 }, { "epoch": 0.04, "grad_norm": 2.359129077276345, "learning_rate": 9.997952652389184e-06, "loss": 0.7177, "step": 374 }, { "epoch": 0.04, "grad_norm": 2.69710045388187, "learning_rate": 9.997904199519748e-06, "loss": 0.7553, "step": 375 }, { "epoch": 0.04, "grad_norm": 1.932081602102441, "learning_rate": 9.997855180109383e-06, "loss": 0.7331, "step": 376 }, { "epoch": 0.04, "grad_norm": 2.155197890696538, "learning_rate": 9.997805594163646e-06, "loss": 0.7416, "step": 377 }, { "epoch": 0.04, "grad_norm": 1.9314889268578335, "learning_rate": 9.997755441688159e-06, "loss": 0.7268, "step": 378 }, { "epoch": 0.04, "grad_norm": 2.0768241365813447, "learning_rate": 9.997704722688607e-06, "loss": 0.696, "step": 379 }, { "epoch": 0.04, "grad_norm": 2.1302481157387874, "learning_rate": 9.997653437170739e-06, "loss": 0.8132, "step": 380 }, { "epoch": 0.04, "grad_norm": 1.8926932428974472, "learning_rate": 9.997601585140367e-06, "loss": 0.5856, "step": 381 }, { "epoch": 0.04, "grad_norm": 2.1087789610541, "learning_rate": 9.99754916660337e-06, "loss": 0.7283, "step": 382 }, { "epoch": 0.04, "grad_norm": 2.0973004274036833, "learning_rate": 9.997496181565691e-06, "loss": 0.615, "step": 383 }, { "epoch": 0.04, "grad_norm": 1.9798386111356991, "learning_rate": 9.997442630033333e-06, "loss": 0.6571, "step": 384 }, { "epoch": 0.04, "grad_norm": 1.9119680312757281, "learning_rate": 9.997388512012371e-06, "loss": 0.6829, "step": 385 }, { "epoch": 0.04, "grad_norm": 2.132931602798204, "learning_rate": 9.997333827508936e-06, "loss": 0.8136, "step": 386 }, { "epoch": 0.04, "grad_norm": 1.926068179217276, "learning_rate": 9.997278576529228e-06, "loss": 0.6462, "step": 387 }, { "epoch": 0.04, "grad_norm": 1.8398296773499356, "learning_rate": 9.997222759079507e-06, "loss": 0.6632, "step": 388 }, { "epoch": 0.04, "grad_norm": 2.2236470134644355, "learning_rate": 9.997166375166107e-06, "loss": 0.7079, "step": 389 }, { "epoch": 0.04, "grad_norm": 2.3302822862993993, "learning_rate": 9.997109424795415e-06, "loss": 0.6415, "step": 390 }, { "epoch": 0.04, "grad_norm": 2.0538248803181034, "learning_rate": 9.997051907973885e-06, "loss": 0.6829, "step": 391 }, { "epoch": 0.04, "grad_norm": 2.039125915190051, "learning_rate": 9.99699382470804e-06, "loss": 0.6388, "step": 392 }, { "epoch": 0.04, "grad_norm": 1.9562271717913728, "learning_rate": 9.996935175004462e-06, "loss": 0.5704, "step": 393 }, { "epoch": 0.04, "grad_norm": 2.4363968316295357, "learning_rate": 9.996875958869803e-06, "loss": 0.5985, "step": 394 }, { "epoch": 0.04, "grad_norm": 1.8544403613485045, "learning_rate": 9.996816176310771e-06, "loss": 0.5727, "step": 395 }, { "epoch": 0.04, "grad_norm": 1.8820693987504735, "learning_rate": 9.996755827334145e-06, "loss": 0.5675, "step": 396 }, { "epoch": 0.04, "grad_norm": 2.008444476428537, "learning_rate": 9.996694911946765e-06, "loss": 0.8005, "step": 397 }, { "epoch": 0.04, "grad_norm": 2.3530880181617486, "learning_rate": 9.996633430155537e-06, "loss": 0.7435, "step": 398 }, { "epoch": 0.04, "grad_norm": 4.325936114579622, "learning_rate": 9.99657138196743e-06, "loss": 0.6764, "step": 399 }, { "epoch": 0.04, "grad_norm": 2.780809596195899, "learning_rate": 9.996508767389477e-06, "loss": 0.7005, "step": 400 }, { "epoch": 0.04, "grad_norm": 1.915152329293021, "learning_rate": 9.996445586428776e-06, "loss": 0.6623, "step": 401 }, { "epoch": 0.04, "grad_norm": 6.956582503534672, "learning_rate": 9.996381839092489e-06, "loss": 0.7644, "step": 402 }, { "epoch": 0.04, "grad_norm": 5.432085078434551, "learning_rate": 9.996317525387841e-06, "loss": 0.7633, "step": 403 }, { "epoch": 0.04, "grad_norm": 2.2033123567972406, "learning_rate": 9.996252645322124e-06, "loss": 0.6479, "step": 404 }, { "epoch": 0.04, "grad_norm": 1.8373000696608504, "learning_rate": 9.996187198902693e-06, "loss": 0.605, "step": 405 }, { "epoch": 0.04, "grad_norm": 2.115896017831422, "learning_rate": 9.996121186136964e-06, "loss": 0.7126, "step": 406 }, { "epoch": 0.04, "grad_norm": 1.9436621726914547, "learning_rate": 9.996054607032425e-06, "loss": 0.5966, "step": 407 }, { "epoch": 0.04, "grad_norm": 3.2683255229608346, "learning_rate": 9.995987461596617e-06, "loss": 0.7094, "step": 408 }, { "epoch": 0.04, "grad_norm": 2.1334958544798432, "learning_rate": 9.995919749837154e-06, "loss": 0.7025, "step": 409 }, { "epoch": 0.04, "grad_norm": 3.3033075412526345, "learning_rate": 9.995851471761711e-06, "loss": 0.7209, "step": 410 }, { "epoch": 0.04, "grad_norm": 2.069310070603776, "learning_rate": 9.99578262737803e-06, "loss": 0.7111, "step": 411 }, { "epoch": 0.04, "grad_norm": 3.7124534446102473, "learning_rate": 9.995713216693913e-06, "loss": 0.7023, "step": 412 }, { "epoch": 0.04, "grad_norm": 2.116142709972029, "learning_rate": 9.995643239717228e-06, "loss": 0.6724, "step": 413 }, { "epoch": 0.04, "grad_norm": 2.04036069618067, "learning_rate": 9.995572696455907e-06, "loss": 0.7658, "step": 414 }, { "epoch": 0.04, "grad_norm": 2.080715602390722, "learning_rate": 9.995501586917949e-06, "loss": 0.67, "step": 415 }, { "epoch": 0.04, "grad_norm": 2.104421238433803, "learning_rate": 9.99542991111141e-06, "loss": 0.6032, "step": 416 }, { "epoch": 0.04, "grad_norm": 2.2301607740265124, "learning_rate": 9.995357669044418e-06, "loss": 0.616, "step": 417 }, { "epoch": 0.04, "grad_norm": 2.0462075925593584, "learning_rate": 9.995284860725162e-06, "loss": 0.6749, "step": 418 }, { "epoch": 0.04, "grad_norm": 2.100344727358706, "learning_rate": 9.995211486161896e-06, "loss": 0.6815, "step": 419 }, { "epoch": 0.04, "grad_norm": 2.3266601471436794, "learning_rate": 9.995137545362937e-06, "loss": 0.8017, "step": 420 }, { "epoch": 0.04, "grad_norm": 1.9367072875868272, "learning_rate": 9.995063038336663e-06, "loss": 0.6028, "step": 421 }, { "epoch": 0.04, "grad_norm": 2.2292281685756214, "learning_rate": 9.994987965091525e-06, "loss": 0.6752, "step": 422 }, { "epoch": 0.04, "grad_norm": 2.129402276054949, "learning_rate": 9.994912325636029e-06, "loss": 0.6371, "step": 423 }, { "epoch": 0.04, "grad_norm": 2.070140533823007, "learning_rate": 9.99483611997875e-06, "loss": 0.8579, "step": 424 }, { "epoch": 0.04, "grad_norm": 3.418866897472175, "learning_rate": 9.994759348128331e-06, "loss": 0.6827, "step": 425 }, { "epoch": 0.04, "grad_norm": 1.8482776838557704, "learning_rate": 9.994682010093468e-06, "loss": 0.6993, "step": 426 }, { "epoch": 0.04, "grad_norm": 2.2212930148340524, "learning_rate": 9.99460410588293e-06, "loss": 0.6308, "step": 427 }, { "epoch": 0.04, "grad_norm": 1.872863356848253, "learning_rate": 9.99452563550555e-06, "loss": 0.6843, "step": 428 }, { "epoch": 0.04, "grad_norm": 2.1550669400337763, "learning_rate": 9.99444659897022e-06, "loss": 0.6675, "step": 429 }, { "epoch": 0.04, "grad_norm": 2.3214088233689716, "learning_rate": 9.994366996285903e-06, "loss": 0.5922, "step": 430 }, { "epoch": 0.04, "grad_norm": 1.9772960079757542, "learning_rate": 9.994286827461616e-06, "loss": 0.5801, "step": 431 }, { "epoch": 0.04, "grad_norm": 2.0146780220081135, "learning_rate": 9.994206092506455e-06, "loss": 0.6058, "step": 432 }, { "epoch": 0.05, "grad_norm": 1.985534965943388, "learning_rate": 9.994124791429565e-06, "loss": 0.6692, "step": 433 }, { "epoch": 0.05, "grad_norm": 2.010878212245351, "learning_rate": 9.994042924240164e-06, "loss": 0.6293, "step": 434 }, { "epoch": 0.05, "grad_norm": 2.2715551445991986, "learning_rate": 9.993960490947533e-06, "loss": 0.6041, "step": 435 }, { "epoch": 0.05, "grad_norm": 2.2311954165914183, "learning_rate": 9.993877491561015e-06, "loss": 0.7262, "step": 436 }, { "epoch": 0.05, "grad_norm": 2.2721565123934253, "learning_rate": 9.99379392609002e-06, "loss": 0.5909, "step": 437 }, { "epoch": 0.05, "grad_norm": 2.0679330089748333, "learning_rate": 9.993709794544022e-06, "loss": 0.6332, "step": 438 }, { "epoch": 0.05, "grad_norm": 2.018072655643991, "learning_rate": 9.993625096932552e-06, "loss": 0.7004, "step": 439 }, { "epoch": 0.05, "grad_norm": 2.1394905686491663, "learning_rate": 9.993539833265216e-06, "loss": 0.6536, "step": 440 }, { "epoch": 0.05, "grad_norm": 2.040527633692036, "learning_rate": 9.993454003551676e-06, "loss": 0.6955, "step": 441 }, { "epoch": 0.05, "grad_norm": 1.890590599198691, "learning_rate": 9.993367607801666e-06, "loss": 0.5941, "step": 442 }, { "epoch": 0.05, "grad_norm": 1.9929644547312386, "learning_rate": 9.993280646024975e-06, "loss": 0.6812, "step": 443 }, { "epoch": 0.05, "grad_norm": 1.961053725767758, "learning_rate": 9.993193118231463e-06, "loss": 0.6022, "step": 444 }, { "epoch": 0.05, "grad_norm": 1.9181967448391344, "learning_rate": 9.993105024431049e-06, "loss": 0.6078, "step": 445 }, { "epoch": 0.05, "grad_norm": 2.968002625002131, "learning_rate": 9.99301636463372e-06, "loss": 0.6773, "step": 446 }, { "epoch": 0.05, "grad_norm": 2.4130336710151465, "learning_rate": 9.99292713884953e-06, "loss": 0.6979, "step": 447 }, { "epoch": 0.05, "grad_norm": 2.1316184739110313, "learning_rate": 9.992837347088589e-06, "loss": 0.6288, "step": 448 }, { "epoch": 0.05, "grad_norm": 1.892151196066065, "learning_rate": 9.992746989361075e-06, "loss": 0.6294, "step": 449 }, { "epoch": 0.05, "grad_norm": 1.9575225798059654, "learning_rate": 9.992656065677234e-06, "loss": 0.7004, "step": 450 }, { "epoch": 0.05, "grad_norm": 2.2829375163660637, "learning_rate": 9.99256457604737e-06, "loss": 0.7471, "step": 451 }, { "epoch": 0.05, "grad_norm": 1.9971984927642012, "learning_rate": 9.992472520481852e-06, "loss": 0.6576, "step": 452 }, { "epoch": 0.05, "grad_norm": 2.2277902080354606, "learning_rate": 9.99237989899112e-06, "loss": 0.7368, "step": 453 }, { "epoch": 0.05, "grad_norm": 1.9400694676938235, "learning_rate": 9.992286711585673e-06, "loss": 0.7523, "step": 454 }, { "epoch": 0.05, "grad_norm": 2.088769530783493, "learning_rate": 9.992192958276068e-06, "loss": 0.7252, "step": 455 }, { "epoch": 0.05, "grad_norm": 1.932950291206959, "learning_rate": 9.99209863907294e-06, "loss": 0.7604, "step": 456 }, { "epoch": 0.05, "grad_norm": 2.024874686283672, "learning_rate": 9.992003753986976e-06, "loss": 0.6524, "step": 457 }, { "epoch": 0.05, "grad_norm": 1.87618503750641, "learning_rate": 9.991908303028932e-06, "loss": 0.6223, "step": 458 }, { "epoch": 0.05, "grad_norm": 2.036129483108596, "learning_rate": 9.99181228620963e-06, "loss": 0.7189, "step": 459 }, { "epoch": 0.05, "grad_norm": 1.785804649378539, "learning_rate": 9.991715703539952e-06, "loss": 0.6044, "step": 460 }, { "epoch": 0.05, "grad_norm": 2.090553085564057, "learning_rate": 9.991618555030848e-06, "loss": 0.6913, "step": 461 }, { "epoch": 0.05, "grad_norm": 1.9098029143505888, "learning_rate": 9.991520840693331e-06, "loss": 0.7355, "step": 462 }, { "epoch": 0.05, "grad_norm": 2.1358723717085457, "learning_rate": 9.991422560538475e-06, "loss": 0.6728, "step": 463 }, { "epoch": 0.05, "grad_norm": 1.7182227416827212, "learning_rate": 9.991323714577421e-06, "loss": 0.6789, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.9285440404571363, "learning_rate": 9.991224302821374e-06, "loss": 0.5603, "step": 465 }, { "epoch": 0.05, "grad_norm": 2.317719534583608, "learning_rate": 9.991124325281603e-06, "loss": 0.8379, "step": 466 }, { "epoch": 0.05, "grad_norm": 1.8938461805189346, "learning_rate": 9.991023781969442e-06, "loss": 0.5413, "step": 467 }, { "epoch": 0.05, "grad_norm": 1.843694757502963, "learning_rate": 9.990922672896288e-06, "loss": 0.6094, "step": 468 }, { "epoch": 0.05, "grad_norm": 2.073935582799042, "learning_rate": 9.9908209980736e-06, "loss": 0.5987, "step": 469 }, { "epoch": 0.05, "grad_norm": 2.2169101162154323, "learning_rate": 9.990718757512906e-06, "loss": 0.6508, "step": 470 }, { "epoch": 0.05, "grad_norm": 2.149787600797134, "learning_rate": 9.990615951225797e-06, "loss": 0.7176, "step": 471 }, { "epoch": 0.05, "grad_norm": 1.9433589280184018, "learning_rate": 9.990512579223921e-06, "loss": 0.7356, "step": 472 }, { "epoch": 0.05, "grad_norm": 2.0519977838803003, "learning_rate": 9.990408641519e-06, "loss": 0.6478, "step": 473 }, { "epoch": 0.05, "grad_norm": 1.9732250611772912, "learning_rate": 9.990304138122818e-06, "loss": 0.7158, "step": 474 }, { "epoch": 0.05, "grad_norm": 2.0862447871994005, "learning_rate": 9.990199069047216e-06, "loss": 0.7862, "step": 475 }, { "epoch": 0.05, "grad_norm": 1.9627513971284538, "learning_rate": 9.990093434304104e-06, "loss": 0.7475, "step": 476 }, { "epoch": 0.05, "grad_norm": 2.5440687784424054, "learning_rate": 9.989987233905462e-06, "loss": 0.6919, "step": 477 }, { "epoch": 0.05, "grad_norm": 2.011594577111942, "learning_rate": 9.989880467863323e-06, "loss": 0.6319, "step": 478 }, { "epoch": 0.05, "grad_norm": 1.8685240663183704, "learning_rate": 9.989773136189793e-06, "loss": 0.6188, "step": 479 }, { "epoch": 0.05, "grad_norm": 2.031643677974096, "learning_rate": 9.989665238897036e-06, "loss": 0.7435, "step": 480 }, { "epoch": 0.05, "grad_norm": 1.9163910387696532, "learning_rate": 9.989556775997284e-06, "loss": 0.6668, "step": 481 }, { "epoch": 0.05, "grad_norm": 2.017766119421384, "learning_rate": 9.989447747502834e-06, "loss": 0.6212, "step": 482 }, { "epoch": 0.05, "grad_norm": 2.145062820353988, "learning_rate": 9.98933815342604e-06, "loss": 0.8157, "step": 483 }, { "epoch": 0.05, "grad_norm": 1.9832549864486289, "learning_rate": 9.989227993779332e-06, "loss": 0.6679, "step": 484 }, { "epoch": 0.05, "grad_norm": 1.8769152114042982, "learning_rate": 9.98911726857519e-06, "loss": 0.6053, "step": 485 }, { "epoch": 0.05, "grad_norm": 2.135665021547759, "learning_rate": 9.98900597782617e-06, "loss": 0.7071, "step": 486 }, { "epoch": 0.05, "grad_norm": 2.1442490339858016, "learning_rate": 9.988894121544885e-06, "loss": 0.671, "step": 487 }, { "epoch": 0.05, "grad_norm": 2.3109603092567896, "learning_rate": 9.988781699744016e-06, "loss": 0.6207, "step": 488 }, { "epoch": 0.05, "grad_norm": 5.03188736806228, "learning_rate": 9.988668712436306e-06, "loss": 0.6313, "step": 489 }, { "epoch": 0.05, "grad_norm": 2.019550529610631, "learning_rate": 9.988555159634563e-06, "loss": 0.6206, "step": 490 }, { "epoch": 0.05, "grad_norm": 2.0704169777481303, "learning_rate": 9.98844104135166e-06, "loss": 0.6363, "step": 491 }, { "epoch": 0.05, "grad_norm": 1.9685873032572212, "learning_rate": 9.98832635760053e-06, "loss": 0.6772, "step": 492 }, { "epoch": 0.05, "grad_norm": 1.8690658409387129, "learning_rate": 9.988211108394177e-06, "loss": 0.6678, "step": 493 }, { "epoch": 0.05, "grad_norm": 2.192593402844829, "learning_rate": 9.98809529374566e-06, "loss": 0.5719, "step": 494 }, { "epoch": 0.05, "grad_norm": 2.064493939887632, "learning_rate": 9.987978913668112e-06, "loss": 0.6047, "step": 495 }, { "epoch": 0.05, "grad_norm": 1.9756919384468792, "learning_rate": 9.987861968174723e-06, "loss": 0.6073, "step": 496 }, { "epoch": 0.05, "grad_norm": 1.9737506247163636, "learning_rate": 9.987744457278753e-06, "loss": 0.6947, "step": 497 }, { "epoch": 0.05, "grad_norm": 1.904287026030052, "learning_rate": 9.987626380993516e-06, "loss": 0.7352, "step": 498 }, { "epoch": 0.05, "grad_norm": 2.0224472723257634, "learning_rate": 9.987507739332401e-06, "loss": 0.614, "step": 499 }, { "epoch": 0.05, "grad_norm": 2.0295872447401706, "learning_rate": 9.987388532308858e-06, "loss": 0.6972, "step": 500 }, { "epoch": 0.05, "grad_norm": 2.0149908366267906, "learning_rate": 9.987268759936396e-06, "loss": 0.7121, "step": 501 }, { "epoch": 0.05, "grad_norm": 2.0962870953861183, "learning_rate": 9.987148422228591e-06, "loss": 0.6515, "step": 502 }, { "epoch": 0.05, "grad_norm": 1.984269566815456, "learning_rate": 9.98702751919909e-06, "loss": 0.7571, "step": 503 }, { "epoch": 0.05, "grad_norm": 2.1123697955385983, "learning_rate": 9.986906050861595e-06, "loss": 0.6053, "step": 504 }, { "epoch": 0.05, "grad_norm": 2.3344562773460167, "learning_rate": 9.986784017229873e-06, "loss": 0.7462, "step": 505 }, { "epoch": 0.05, "grad_norm": 1.9590396488402444, "learning_rate": 9.986661418317759e-06, "loss": 0.6522, "step": 506 }, { "epoch": 0.05, "grad_norm": 2.2031954129578954, "learning_rate": 9.986538254139151e-06, "loss": 0.742, "step": 507 }, { "epoch": 0.05, "grad_norm": 1.8872521125527797, "learning_rate": 9.98641452470801e-06, "loss": 0.5652, "step": 508 }, { "epoch": 0.05, "grad_norm": 2.0562007032310485, "learning_rate": 9.986290230038359e-06, "loss": 0.6684, "step": 509 }, { "epoch": 0.05, "grad_norm": 1.9331683041832113, "learning_rate": 9.986165370144291e-06, "loss": 0.5762, "step": 510 }, { "epoch": 0.05, "grad_norm": 1.9644374337511354, "learning_rate": 9.986039945039959e-06, "loss": 0.6082, "step": 511 }, { "epoch": 0.05, "grad_norm": 1.8950957493260308, "learning_rate": 9.985913954739577e-06, "loss": 0.5627, "step": 512 }, { "epoch": 0.05, "grad_norm": 1.897071876769113, "learning_rate": 9.985787399257431e-06, "loss": 0.6015, "step": 513 }, { "epoch": 0.05, "grad_norm": 2.2177366478263383, "learning_rate": 9.985660278607865e-06, "loss": 0.7003, "step": 514 }, { "epoch": 0.05, "grad_norm": 2.229245926344614, "learning_rate": 9.985532592805289e-06, "loss": 0.6129, "step": 515 }, { "epoch": 0.05, "grad_norm": 2.312963580341358, "learning_rate": 9.985404341864178e-06, "loss": 0.849, "step": 516 }, { "epoch": 0.05, "grad_norm": 2.121250005175201, "learning_rate": 9.985275525799069e-06, "loss": 0.6587, "step": 517 }, { "epoch": 0.05, "grad_norm": 1.999789191521043, "learning_rate": 9.985146144624563e-06, "loss": 0.7018, "step": 518 }, { "epoch": 0.05, "grad_norm": 1.6613885225525393, "learning_rate": 9.985016198355328e-06, "loss": 0.5701, "step": 519 }, { "epoch": 0.05, "grad_norm": 2.048330879977503, "learning_rate": 9.984885687006093e-06, "loss": 0.7511, "step": 520 }, { "epoch": 0.05, "grad_norm": 2.1663530153067065, "learning_rate": 9.98475461059165e-06, "loss": 0.5997, "step": 521 }, { "epoch": 0.05, "grad_norm": 2.1390771494454555, "learning_rate": 9.984622969126864e-06, "loss": 0.7191, "step": 522 }, { "epoch": 0.05, "grad_norm": 1.9328096420880079, "learning_rate": 9.984490762626651e-06, "loss": 0.6176, "step": 523 }, { "epoch": 0.05, "grad_norm": 1.850042905202259, "learning_rate": 9.984357991105999e-06, "loss": 0.7516, "step": 524 }, { "epoch": 0.05, "grad_norm": 2.125264947491382, "learning_rate": 9.984224654579959e-06, "loss": 0.6857, "step": 525 }, { "epoch": 0.05, "grad_norm": 2.34692785182022, "learning_rate": 9.984090753063647e-06, "loss": 0.6944, "step": 526 }, { "epoch": 0.05, "grad_norm": 2.0665039132353433, "learning_rate": 9.983956286572238e-06, "loss": 0.7515, "step": 527 }, { "epoch": 0.05, "grad_norm": 2.085772307929629, "learning_rate": 9.983821255120977e-06, "loss": 0.6996, "step": 528 }, { "epoch": 0.05, "grad_norm": 1.9510182753018945, "learning_rate": 9.98368565872517e-06, "loss": 0.7206, "step": 529 }, { "epoch": 0.06, "grad_norm": 2.357416665846521, "learning_rate": 9.983549497400187e-06, "loss": 0.5892, "step": 530 }, { "epoch": 0.06, "grad_norm": 2.108482481371756, "learning_rate": 9.983412771161463e-06, "loss": 0.605, "step": 531 }, { "epoch": 0.06, "grad_norm": 2.118696322223107, "learning_rate": 9.983275480024498e-06, "loss": 0.5796, "step": 532 }, { "epoch": 0.06, "grad_norm": 1.9269095508935703, "learning_rate": 9.983137624004851e-06, "loss": 0.5309, "step": 533 }, { "epoch": 0.06, "grad_norm": 1.9783461770372632, "learning_rate": 9.982999203118153e-06, "loss": 0.68, "step": 534 }, { "epoch": 0.06, "grad_norm": 2.1354909936380326, "learning_rate": 9.982860217380096e-06, "loss": 0.6303, "step": 535 }, { "epoch": 0.06, "grad_norm": 2.1276687172019457, "learning_rate": 9.982720666806427e-06, "loss": 0.6457, "step": 536 }, { "epoch": 0.06, "grad_norm": 2.5725435712780516, "learning_rate": 9.982580551412972e-06, "loss": 0.6247, "step": 537 }, { "epoch": 0.06, "grad_norm": 2.0827610945764903, "learning_rate": 9.982439871215612e-06, "loss": 0.7351, "step": 538 }, { "epoch": 0.06, "grad_norm": 1.928191390389645, "learning_rate": 9.982298626230295e-06, "loss": 0.6179, "step": 539 }, { "epoch": 0.06, "grad_norm": 2.0571831064927184, "learning_rate": 9.982156816473029e-06, "loss": 0.5946, "step": 540 }, { "epoch": 0.06, "grad_norm": 1.9831697955213374, "learning_rate": 9.982014441959891e-06, "loss": 0.7186, "step": 541 }, { "epoch": 0.06, "grad_norm": 1.8500259652367927, "learning_rate": 9.98187150270702e-06, "loss": 0.6823, "step": 542 }, { "epoch": 0.06, "grad_norm": 2.026854564673327, "learning_rate": 9.981727998730616e-06, "loss": 0.7699, "step": 543 }, { "epoch": 0.06, "grad_norm": 1.9807028683174117, "learning_rate": 9.98158393004695e-06, "loss": 0.6984, "step": 544 }, { "epoch": 0.06, "grad_norm": 2.589963467561242, "learning_rate": 9.981439296672352e-06, "loss": 0.822, "step": 545 }, { "epoch": 0.06, "grad_norm": 1.8283692667191949, "learning_rate": 9.981294098623215e-06, "loss": 0.668, "step": 546 }, { "epoch": 0.06, "grad_norm": 2.0929282423372433, "learning_rate": 9.981148335916e-06, "loss": 0.6257, "step": 547 }, { "epoch": 0.06, "grad_norm": 1.9273830056696597, "learning_rate": 9.98100200856723e-06, "loss": 0.6686, "step": 548 }, { "epoch": 0.06, "grad_norm": 2.1577089469497137, "learning_rate": 9.980855116593494e-06, "loss": 0.6959, "step": 549 }, { "epoch": 0.06, "grad_norm": 1.8461204532541409, "learning_rate": 9.980707660011437e-06, "loss": 0.6695, "step": 550 }, { "epoch": 0.06, "grad_norm": 2.0891244440911216, "learning_rate": 9.980559638837778e-06, "loss": 0.7155, "step": 551 }, { "epoch": 0.06, "grad_norm": 4.60684984565562, "learning_rate": 9.980411053089298e-06, "loss": 0.7105, "step": 552 }, { "epoch": 0.06, "grad_norm": 2.274419285519808, "learning_rate": 9.980261902782835e-06, "loss": 0.6461, "step": 553 }, { "epoch": 0.06, "grad_norm": 2.1995811698396714, "learning_rate": 9.9801121879353e-06, "loss": 0.6659, "step": 554 }, { "epoch": 0.06, "grad_norm": 2.24104794740926, "learning_rate": 9.979961908563663e-06, "loss": 0.6843, "step": 555 }, { "epoch": 0.06, "grad_norm": 2.027896058717004, "learning_rate": 9.97981106468496e-06, "loss": 0.6571, "step": 556 }, { "epoch": 0.06, "grad_norm": 1.8618556456848303, "learning_rate": 9.979659656316288e-06, "loss": 0.6097, "step": 557 }, { "epoch": 0.06, "grad_norm": 2.011473620144725, "learning_rate": 9.97950768347481e-06, "loss": 0.5543, "step": 558 }, { "epoch": 0.06, "grad_norm": 1.8198767395429643, "learning_rate": 9.979355146177754e-06, "loss": 0.5747, "step": 559 }, { "epoch": 0.06, "grad_norm": 2.1523169045634907, "learning_rate": 9.97920204444241e-06, "loss": 0.5956, "step": 560 }, { "epoch": 0.06, "grad_norm": 2.1313458275799584, "learning_rate": 9.979048378286134e-06, "loss": 0.6804, "step": 561 }, { "epoch": 0.06, "grad_norm": 1.972092650478314, "learning_rate": 9.978894147726346e-06, "loss": 0.685, "step": 562 }, { "epoch": 0.06, "grad_norm": 2.0429159532350414, "learning_rate": 9.978739352780528e-06, "loss": 0.5422, "step": 563 }, { "epoch": 0.06, "grad_norm": 2.4276858350172383, "learning_rate": 9.978583993466224e-06, "loss": 0.6217, "step": 564 }, { "epoch": 0.06, "grad_norm": 2.117733238279458, "learning_rate": 9.97842806980105e-06, "loss": 0.8119, "step": 565 }, { "epoch": 0.06, "grad_norm": 2.0507740651286666, "learning_rate": 9.978271581802677e-06, "loss": 0.6132, "step": 566 }, { "epoch": 0.06, "grad_norm": 1.8381248701512136, "learning_rate": 9.978114529488845e-06, "loss": 0.6151, "step": 567 }, { "epoch": 0.06, "grad_norm": 2.251649730917416, "learning_rate": 9.977956912877356e-06, "loss": 0.6969, "step": 568 }, { "epoch": 0.06, "grad_norm": 2.085417466229146, "learning_rate": 9.977798731986079e-06, "loss": 0.6001, "step": 569 }, { "epoch": 0.06, "grad_norm": 2.1417707930582948, "learning_rate": 9.977639986832943e-06, "loss": 0.6477, "step": 570 }, { "epoch": 0.06, "grad_norm": 2.002691296527279, "learning_rate": 9.977480677435942e-06, "loss": 0.6204, "step": 571 }, { "epoch": 0.06, "grad_norm": 2.3573137310537087, "learning_rate": 9.977320803813137e-06, "loss": 0.666, "step": 572 }, { "epoch": 0.06, "grad_norm": 2.13178658494689, "learning_rate": 9.977160365982647e-06, "loss": 0.6276, "step": 573 }, { "epoch": 0.06, "grad_norm": 1.9299279931740343, "learning_rate": 9.976999363962663e-06, "loss": 0.6463, "step": 574 }, { "epoch": 0.06, "grad_norm": 2.1014604248299285, "learning_rate": 9.97683779777143e-06, "loss": 0.6793, "step": 575 }, { "epoch": 0.06, "grad_norm": 1.8767137873590285, "learning_rate": 9.976675667427268e-06, "loss": 0.5687, "step": 576 }, { "epoch": 0.06, "grad_norm": 1.9640671342301481, "learning_rate": 9.976512972948553e-06, "loss": 0.6154, "step": 577 }, { "epoch": 0.06, "grad_norm": 1.7884320005984804, "learning_rate": 9.976349714353729e-06, "loss": 0.6065, "step": 578 }, { "epoch": 0.06, "grad_norm": 1.950622961463605, "learning_rate": 9.976185891661296e-06, "loss": 0.7064, "step": 579 }, { "epoch": 0.06, "grad_norm": 2.3083440495765513, "learning_rate": 9.976021504889833e-06, "loss": 0.6901, "step": 580 }, { "epoch": 0.06, "grad_norm": 1.963846800423321, "learning_rate": 9.975856554057968e-06, "loss": 0.5613, "step": 581 }, { "epoch": 0.06, "grad_norm": 2.0211788630851535, "learning_rate": 9.9756910391844e-06, "loss": 0.6375, "step": 582 }, { "epoch": 0.06, "grad_norm": 1.8268256773710685, "learning_rate": 9.975524960287895e-06, "loss": 0.671, "step": 583 }, { "epoch": 0.06, "grad_norm": 1.7600000040813513, "learning_rate": 9.975358317387277e-06, "loss": 0.5318, "step": 584 }, { "epoch": 0.06, "grad_norm": 2.049565771650243, "learning_rate": 9.975191110501432e-06, "loss": 0.6675, "step": 585 }, { "epoch": 0.06, "grad_norm": 2.314117076668117, "learning_rate": 9.975023339649317e-06, "loss": 0.6742, "step": 586 }, { "epoch": 0.06, "grad_norm": 2.0701957129127018, "learning_rate": 9.974855004849952e-06, "loss": 0.7539, "step": 587 }, { "epoch": 0.06, "grad_norm": 1.90648641172633, "learning_rate": 9.974686106122415e-06, "loss": 0.6037, "step": 588 }, { "epoch": 0.06, "grad_norm": 1.9875373685123054, "learning_rate": 9.974516643485852e-06, "loss": 0.7258, "step": 589 }, { "epoch": 0.06, "grad_norm": 2.2446293325637647, "learning_rate": 9.974346616959476e-06, "loss": 0.7915, "step": 590 }, { "epoch": 0.06, "grad_norm": 2.106574799745109, "learning_rate": 9.974176026562558e-06, "loss": 0.6764, "step": 591 }, { "epoch": 0.06, "grad_norm": 2.062901024543719, "learning_rate": 9.974004872314435e-06, "loss": 0.6992, "step": 592 }, { "epoch": 0.06, "grad_norm": 2.0645404013113877, "learning_rate": 9.97383315423451e-06, "loss": 0.7663, "step": 593 }, { "epoch": 0.06, "grad_norm": 2.1783453708050393, "learning_rate": 9.973660872342244e-06, "loss": 0.7534, "step": 594 }, { "epoch": 0.06, "grad_norm": 2.040114380019622, "learning_rate": 9.973488026657171e-06, "loss": 0.7122, "step": 595 }, { "epoch": 0.06, "grad_norm": 2.063422864247258, "learning_rate": 9.973314617198881e-06, "loss": 0.6104, "step": 596 }, { "epoch": 0.06, "grad_norm": 1.8912025270720374, "learning_rate": 9.973140643987034e-06, "loss": 0.611, "step": 597 }, { "epoch": 0.06, "grad_norm": 2.0682555441292996, "learning_rate": 9.972966107041349e-06, "loss": 0.748, "step": 598 }, { "epoch": 0.06, "grad_norm": 2.0628034068097745, "learning_rate": 9.97279100638161e-06, "loss": 0.7246, "step": 599 }, { "epoch": 0.06, "grad_norm": 2.084889035523409, "learning_rate": 9.972615342027667e-06, "loss": 0.7109, "step": 600 }, { "epoch": 0.06, "grad_norm": 1.8611852818863357, "learning_rate": 9.972439113999431e-06, "loss": 0.6385, "step": 601 }, { "epoch": 0.06, "grad_norm": 2.0416032338007635, "learning_rate": 9.97226232231688e-06, "loss": 0.7019, "step": 602 }, { "epoch": 0.06, "grad_norm": 1.9483099939451032, "learning_rate": 9.972084967000055e-06, "loss": 0.7654, "step": 603 }, { "epoch": 0.06, "grad_norm": 1.7570415982383194, "learning_rate": 9.971907048069058e-06, "loss": 0.5443, "step": 604 }, { "epoch": 0.06, "grad_norm": 2.3140913687038647, "learning_rate": 9.97172856554406e-06, "loss": 0.6249, "step": 605 }, { "epoch": 0.06, "grad_norm": 1.943667180111752, "learning_rate": 9.971549519445288e-06, "loss": 0.7652, "step": 606 }, { "epoch": 0.06, "grad_norm": 2.2683476874649537, "learning_rate": 9.971369909793043e-06, "loss": 0.5836, "step": 607 }, { "epoch": 0.06, "grad_norm": 1.9552541335662836, "learning_rate": 9.971189736607681e-06, "loss": 0.6996, "step": 608 }, { "epoch": 0.06, "grad_norm": 2.054084324981065, "learning_rate": 9.97100899990963e-06, "loss": 0.7218, "step": 609 }, { "epoch": 0.06, "grad_norm": 2.1292215277853184, "learning_rate": 9.970827699719372e-06, "loss": 0.7322, "step": 610 }, { "epoch": 0.06, "grad_norm": 2.2273055903437506, "learning_rate": 9.970645836057464e-06, "loss": 0.7003, "step": 611 }, { "epoch": 0.06, "grad_norm": 1.9357280807082045, "learning_rate": 9.97046340894452e-06, "loss": 0.7023, "step": 612 }, { "epoch": 0.06, "grad_norm": 2.197029414583496, "learning_rate": 9.970280418401215e-06, "loss": 0.68, "step": 613 }, { "epoch": 0.06, "grad_norm": 2.0053051191734634, "learning_rate": 9.970096864448296e-06, "loss": 0.7367, "step": 614 }, { "epoch": 0.06, "grad_norm": 2.1301040443505563, "learning_rate": 9.96991274710657e-06, "loss": 0.6358, "step": 615 }, { "epoch": 0.06, "grad_norm": 2.419659091613593, "learning_rate": 9.969728066396904e-06, "loss": 0.7072, "step": 616 }, { "epoch": 0.06, "grad_norm": 1.9261589500687413, "learning_rate": 9.969542822340238e-06, "loss": 0.6603, "step": 617 }, { "epoch": 0.06, "grad_norm": 2.1022673917783963, "learning_rate": 9.969357014957564e-06, "loss": 0.7254, "step": 618 }, { "epoch": 0.06, "grad_norm": 2.182516114339237, "learning_rate": 9.96917064426995e-06, "loss": 0.6666, "step": 619 }, { "epoch": 0.06, "grad_norm": 2.3098245453776904, "learning_rate": 9.968983710298522e-06, "loss": 0.7824, "step": 620 }, { "epoch": 0.06, "grad_norm": 1.9539441765468966, "learning_rate": 9.968796213064466e-06, "loss": 0.7051, "step": 621 }, { "epoch": 0.06, "grad_norm": 2.0138948888168176, "learning_rate": 9.968608152589038e-06, "loss": 0.7378, "step": 622 }, { "epoch": 0.06, "grad_norm": 2.175463284078723, "learning_rate": 9.968419528893555e-06, "loss": 0.6006, "step": 623 }, { "epoch": 0.06, "grad_norm": 2.3105811086307466, "learning_rate": 9.968230341999403e-06, "loss": 0.763, "step": 624 }, { "epoch": 0.06, "grad_norm": 2.08502012437374, "learning_rate": 9.96804059192802e-06, "loss": 0.6397, "step": 625 }, { "epoch": 0.07, "grad_norm": 2.5120934211323087, "learning_rate": 9.96785027870092e-06, "loss": 0.6508, "step": 626 }, { "epoch": 0.07, "grad_norm": 2.131440836614907, "learning_rate": 9.967659402339677e-06, "loss": 0.677, "step": 627 }, { "epoch": 0.07, "grad_norm": 2.2797304046639217, "learning_rate": 9.967467962865925e-06, "loss": 0.8344, "step": 628 }, { "epoch": 0.07, "grad_norm": 1.8568138944212114, "learning_rate": 9.967275960301364e-06, "loss": 0.6212, "step": 629 }, { "epoch": 0.07, "grad_norm": 1.9700358484439096, "learning_rate": 9.967083394667763e-06, "loss": 0.7429, "step": 630 }, { "epoch": 0.07, "grad_norm": 1.9885571768773675, "learning_rate": 9.966890265986947e-06, "loss": 0.6928, "step": 631 }, { "epoch": 0.07, "grad_norm": 1.8696680843723692, "learning_rate": 9.966696574280808e-06, "loss": 0.639, "step": 632 }, { "epoch": 0.07, "grad_norm": 1.9946364083173456, "learning_rate": 9.966502319571303e-06, "loss": 0.7326, "step": 633 }, { "epoch": 0.07, "grad_norm": 1.9618197771145713, "learning_rate": 9.966307501880452e-06, "loss": 0.6483, "step": 634 }, { "epoch": 0.07, "grad_norm": 1.9669889529206006, "learning_rate": 9.966112121230341e-06, "loss": 0.6443, "step": 635 }, { "epoch": 0.07, "grad_norm": 2.0924747286776855, "learning_rate": 9.965916177643112e-06, "loss": 0.738, "step": 636 }, { "epoch": 0.07, "grad_norm": 1.935928912743703, "learning_rate": 9.965719671140981e-06, "loss": 0.776, "step": 637 }, { "epoch": 0.07, "grad_norm": 2.047670358994524, "learning_rate": 9.965522601746222e-06, "loss": 0.767, "step": 638 }, { "epoch": 0.07, "grad_norm": 1.9296679711225158, "learning_rate": 9.965324969481172e-06, "loss": 0.6554, "step": 639 }, { "epoch": 0.07, "grad_norm": 2.1303563423399687, "learning_rate": 9.965126774368237e-06, "loss": 0.6197, "step": 640 }, { "epoch": 0.07, "grad_norm": 2.074249841183784, "learning_rate": 9.964928016429883e-06, "loss": 0.5803, "step": 641 }, { "epoch": 0.07, "grad_norm": 1.9354294943036128, "learning_rate": 9.964728695688635e-06, "loss": 0.5228, "step": 642 }, { "epoch": 0.07, "grad_norm": 1.8052649810413786, "learning_rate": 9.964528812167095e-06, "loss": 0.7194, "step": 643 }, { "epoch": 0.07, "grad_norm": 4.177670415548599, "learning_rate": 9.964328365887917e-06, "loss": 0.7194, "step": 644 }, { "epoch": 0.07, "grad_norm": 1.9997756821719956, "learning_rate": 9.964127356873821e-06, "loss": 0.6131, "step": 645 }, { "epoch": 0.07, "grad_norm": 1.8801154696599298, "learning_rate": 9.963925785147595e-06, "loss": 0.6044, "step": 646 }, { "epoch": 0.07, "grad_norm": 2.0091370827752275, "learning_rate": 9.96372365073209e-06, "loss": 0.7101, "step": 647 }, { "epoch": 0.07, "grad_norm": 1.839460149960612, "learning_rate": 9.963520953650214e-06, "loss": 0.6756, "step": 648 }, { "epoch": 0.07, "grad_norm": 2.036076135546113, "learning_rate": 9.963317693924947e-06, "loss": 0.6115, "step": 649 }, { "epoch": 0.07, "grad_norm": 2.545887352696099, "learning_rate": 9.963113871579332e-06, "loss": 0.7111, "step": 650 }, { "epoch": 0.07, "grad_norm": 2.0933171239683985, "learning_rate": 9.96290948663647e-06, "loss": 0.6289, "step": 651 }, { "epoch": 0.07, "grad_norm": 1.9363189028685541, "learning_rate": 9.962704539119528e-06, "loss": 0.6259, "step": 652 }, { "epoch": 0.07, "grad_norm": 2.064128197173807, "learning_rate": 9.962499029051742e-06, "loss": 0.6223, "step": 653 }, { "epoch": 0.07, "grad_norm": 1.8337003404299514, "learning_rate": 9.962292956456405e-06, "loss": 0.6341, "step": 654 }, { "epoch": 0.07, "grad_norm": 2.094853295361747, "learning_rate": 9.962086321356878e-06, "loss": 0.6245, "step": 655 }, { "epoch": 0.07, "grad_norm": 1.977672637587837, "learning_rate": 9.961879123776584e-06, "loss": 0.6979, "step": 656 }, { "epoch": 0.07, "grad_norm": 2.1712110101678403, "learning_rate": 9.961671363739008e-06, "loss": 0.6226, "step": 657 }, { "epoch": 0.07, "grad_norm": 1.951254338928478, "learning_rate": 9.961463041267703e-06, "loss": 0.6741, "step": 658 }, { "epoch": 0.07, "grad_norm": 2.170459383653736, "learning_rate": 9.961254156386282e-06, "loss": 0.6541, "step": 659 }, { "epoch": 0.07, "grad_norm": 2.1749433390231916, "learning_rate": 9.961044709118425e-06, "loss": 0.7235, "step": 660 }, { "epoch": 0.07, "grad_norm": 2.3401410783814383, "learning_rate": 9.960834699487873e-06, "loss": 0.6685, "step": 661 }, { "epoch": 0.07, "grad_norm": 1.9667835737612456, "learning_rate": 9.960624127518432e-06, "loss": 0.6615, "step": 662 }, { "epoch": 0.07, "grad_norm": 1.928408867792227, "learning_rate": 9.960412993233973e-06, "loss": 0.6487, "step": 663 }, { "epoch": 0.07, "grad_norm": 2.1721273990729517, "learning_rate": 9.960201296658425e-06, "loss": 0.6058, "step": 664 }, { "epoch": 0.07, "grad_norm": 2.391564010754368, "learning_rate": 9.959989037815789e-06, "loss": 0.5665, "step": 665 }, { "epoch": 0.07, "grad_norm": 1.944892387838344, "learning_rate": 9.959776216730125e-06, "loss": 0.6924, "step": 666 }, { "epoch": 0.07, "grad_norm": 2.236898508390765, "learning_rate": 9.959562833425557e-06, "loss": 0.6553, "step": 667 }, { "epoch": 0.07, "grad_norm": 1.9932574533230005, "learning_rate": 9.959348887926274e-06, "loss": 0.7432, "step": 668 }, { "epoch": 0.07, "grad_norm": 2.027491358248483, "learning_rate": 9.959134380256525e-06, "loss": 0.6426, "step": 669 }, { "epoch": 0.07, "grad_norm": 2.0001475575651027, "learning_rate": 9.95891931044063e-06, "loss": 0.6808, "step": 670 }, { "epoch": 0.07, "grad_norm": 1.820682926080947, "learning_rate": 9.958703678502966e-06, "loss": 0.6167, "step": 671 }, { "epoch": 0.07, "grad_norm": 2.018955263359039, "learning_rate": 9.958487484467976e-06, "loss": 0.6538, "step": 672 }, { "epoch": 0.07, "grad_norm": 2.040371696124914, "learning_rate": 9.958270728360166e-06, "loss": 0.6865, "step": 673 }, { "epoch": 0.07, "grad_norm": 2.1337195894507404, "learning_rate": 9.95805341020411e-06, "loss": 0.6423, "step": 674 }, { "epoch": 0.07, "grad_norm": 2.01030394805898, "learning_rate": 9.957835530024438e-06, "loss": 0.6973, "step": 675 }, { "epoch": 0.07, "grad_norm": 2.220551541879092, "learning_rate": 9.95761708784585e-06, "loss": 0.6665, "step": 676 }, { "epoch": 0.07, "grad_norm": 2.0803421961018715, "learning_rate": 9.95739808369311e-06, "loss": 0.711, "step": 677 }, { "epoch": 0.07, "grad_norm": 2.2906932549637853, "learning_rate": 9.95717851759104e-06, "loss": 0.7801, "step": 678 }, { "epoch": 0.07, "grad_norm": 2.13204944489717, "learning_rate": 9.956958389564528e-06, "loss": 0.6244, "step": 679 }, { "epoch": 0.07, "grad_norm": 2.0425735599412174, "learning_rate": 9.95673769963853e-06, "loss": 0.7015, "step": 680 }, { "epoch": 0.07, "grad_norm": 1.9345199013727183, "learning_rate": 9.956516447838063e-06, "loss": 0.6477, "step": 681 }, { "epoch": 0.07, "grad_norm": 1.7219188066839115, "learning_rate": 9.956294634188204e-06, "loss": 0.5635, "step": 682 }, { "epoch": 0.07, "grad_norm": 2.2382443419248705, "learning_rate": 9.956072258714097e-06, "loss": 0.7586, "step": 683 }, { "epoch": 0.07, "grad_norm": 1.9477024443776676, "learning_rate": 9.955849321440953e-06, "loss": 0.728, "step": 684 }, { "epoch": 0.07, "grad_norm": 1.9813355258791023, "learning_rate": 9.95562582239404e-06, "loss": 0.6703, "step": 685 }, { "epoch": 0.07, "grad_norm": 2.1476591448584776, "learning_rate": 9.955401761598693e-06, "loss": 0.6842, "step": 686 }, { "epoch": 0.07, "grad_norm": 2.0908641901740936, "learning_rate": 9.955177139080312e-06, "loss": 0.7313, "step": 687 }, { "epoch": 0.07, "grad_norm": 2.084194905379439, "learning_rate": 9.954951954864361e-06, "loss": 0.7477, "step": 688 }, { "epoch": 0.07, "grad_norm": 1.9635843996926026, "learning_rate": 9.954726208976361e-06, "loss": 0.6205, "step": 689 }, { "epoch": 0.07, "grad_norm": 1.9130869685081529, "learning_rate": 9.954499901441905e-06, "loss": 0.6619, "step": 690 }, { "epoch": 0.07, "grad_norm": 2.037636079788371, "learning_rate": 9.954273032286646e-06, "loss": 0.6274, "step": 691 }, { "epoch": 0.07, "grad_norm": 1.9224440773396205, "learning_rate": 9.9540456015363e-06, "loss": 0.6808, "step": 692 }, { "epoch": 0.07, "grad_norm": 2.134253881991058, "learning_rate": 9.953817609216647e-06, "loss": 0.7179, "step": 693 }, { "epoch": 0.07, "grad_norm": 2.120477071210494, "learning_rate": 9.953589055353534e-06, "loss": 0.6869, "step": 694 }, { "epoch": 0.07, "grad_norm": 2.05650737737351, "learning_rate": 9.953359939972866e-06, "loss": 0.6046, "step": 695 }, { "epoch": 0.07, "grad_norm": 1.886773870673862, "learning_rate": 9.953130263100615e-06, "loss": 0.7154, "step": 696 }, { "epoch": 0.07, "grad_norm": 2.036217797024567, "learning_rate": 9.952900024762818e-06, "loss": 0.761, "step": 697 }, { "epoch": 0.07, "grad_norm": 2.2783625596602746, "learning_rate": 9.952669224985572e-06, "loss": 0.6307, "step": 698 }, { "epoch": 0.07, "grad_norm": 1.9435212040471868, "learning_rate": 9.95243786379504e-06, "loss": 0.7085, "step": 699 }, { "epoch": 0.07, "grad_norm": 1.804310202195145, "learning_rate": 9.952205941217449e-06, "loss": 0.6781, "step": 700 }, { "epoch": 0.07, "grad_norm": 2.06225387355469, "learning_rate": 9.951973457279087e-06, "loss": 0.7211, "step": 701 }, { "epoch": 0.07, "grad_norm": 2.076961953947264, "learning_rate": 9.951740412006308e-06, "loss": 0.6094, "step": 702 }, { "epoch": 0.07, "grad_norm": 1.985672552340446, "learning_rate": 9.951506805425531e-06, "loss": 0.7098, "step": 703 }, { "epoch": 0.07, "grad_norm": 1.8527325941283108, "learning_rate": 9.951272637563233e-06, "loss": 0.5818, "step": 704 }, { "epoch": 0.07, "grad_norm": 1.9109812540751534, "learning_rate": 9.951037908445961e-06, "loss": 0.6898, "step": 705 }, { "epoch": 0.07, "grad_norm": 2.020324053220724, "learning_rate": 9.950802618100323e-06, "loss": 0.7053, "step": 706 }, { "epoch": 0.07, "grad_norm": 2.1678914704220067, "learning_rate": 9.950566766552989e-06, "loss": 0.6887, "step": 707 }, { "epoch": 0.07, "grad_norm": 2.0424556490788475, "learning_rate": 9.950330353830694e-06, "loss": 0.7281, "step": 708 }, { "epoch": 0.07, "grad_norm": 1.9996565401479152, "learning_rate": 9.950093379960238e-06, "loss": 0.6922, "step": 709 }, { "epoch": 0.07, "grad_norm": 1.9744601447827723, "learning_rate": 9.949855844968484e-06, "loss": 0.6319, "step": 710 }, { "epoch": 0.07, "grad_norm": 1.9385415848049423, "learning_rate": 9.949617748882354e-06, "loss": 0.6659, "step": 711 }, { "epoch": 0.07, "grad_norm": 2.078954845276994, "learning_rate": 9.949379091728843e-06, "loss": 0.6764, "step": 712 }, { "epoch": 0.07, "grad_norm": 1.9193955270459089, "learning_rate": 9.949139873535e-06, "loss": 0.6107, "step": 713 }, { "epoch": 0.07, "grad_norm": 1.8955909321628803, "learning_rate": 9.948900094327943e-06, "loss": 0.6845, "step": 714 }, { "epoch": 0.07, "grad_norm": 1.932089420380341, "learning_rate": 9.948659754134852e-06, "loss": 0.6588, "step": 715 }, { "epoch": 0.07, "grad_norm": 1.8948454482363528, "learning_rate": 9.948418852982973e-06, "loss": 0.5758, "step": 716 }, { "epoch": 0.07, "grad_norm": 1.8286956041099895, "learning_rate": 9.948177390899611e-06, "loss": 0.6549, "step": 717 }, { "epoch": 0.07, "grad_norm": 1.894187924245138, "learning_rate": 9.94793536791214e-06, "loss": 0.5799, "step": 718 }, { "epoch": 0.07, "grad_norm": 2.0176210263591576, "learning_rate": 9.94769278404799e-06, "loss": 0.6644, "step": 719 }, { "epoch": 0.07, "grad_norm": 1.8158957435685823, "learning_rate": 9.947449639334663e-06, "loss": 0.7072, "step": 720 }, { "epoch": 0.07, "grad_norm": 1.8892811407727792, "learning_rate": 9.94720593379972e-06, "loss": 0.6623, "step": 721 }, { "epoch": 0.08, "grad_norm": 2.201408847040572, "learning_rate": 9.946961667470787e-06, "loss": 0.6785, "step": 722 }, { "epoch": 0.08, "grad_norm": 1.8596998671238845, "learning_rate": 9.946716840375552e-06, "loss": 0.6325, "step": 723 }, { "epoch": 0.08, "grad_norm": 2.1929689551861618, "learning_rate": 9.946471452541768e-06, "loss": 0.7123, "step": 724 }, { "epoch": 0.08, "grad_norm": 1.8739563959458077, "learning_rate": 9.94622550399725e-06, "loss": 0.7399, "step": 725 }, { "epoch": 0.08, "grad_norm": 1.9897122706062023, "learning_rate": 9.945978994769878e-06, "loss": 0.7497, "step": 726 }, { "epoch": 0.08, "grad_norm": 1.9516686408380788, "learning_rate": 9.945731924887598e-06, "loss": 0.6577, "step": 727 }, { "epoch": 0.08, "grad_norm": 1.7720438709863602, "learning_rate": 9.945484294378413e-06, "loss": 0.6154, "step": 728 }, { "epoch": 0.08, "grad_norm": 2.249334522149298, "learning_rate": 9.945236103270395e-06, "loss": 0.7274, "step": 729 }, { "epoch": 0.08, "grad_norm": 1.8705068155172135, "learning_rate": 9.944987351591677e-06, "loss": 0.5798, "step": 730 }, { "epoch": 0.08, "grad_norm": 2.8901122235028085, "learning_rate": 9.944738039370458e-06, "loss": 0.7301, "step": 731 }, { "epoch": 0.08, "grad_norm": 2.0901188573895166, "learning_rate": 9.944488166635e-06, "loss": 0.6661, "step": 732 }, { "epoch": 0.08, "grad_norm": 2.0716119906288104, "learning_rate": 9.944237733413623e-06, "loss": 0.7104, "step": 733 }, { "epoch": 0.08, "grad_norm": 2.0084060471211003, "learning_rate": 9.943986739734718e-06, "loss": 0.7807, "step": 734 }, { "epoch": 0.08, "grad_norm": 2.126162332371512, "learning_rate": 9.943735185626739e-06, "loss": 0.7289, "step": 735 }, { "epoch": 0.08, "grad_norm": 2.048734123772361, "learning_rate": 9.943483071118197e-06, "loss": 0.7006, "step": 736 }, { "epoch": 0.08, "grad_norm": 2.232634046448412, "learning_rate": 9.94323039623767e-06, "loss": 0.7472, "step": 737 }, { "epoch": 0.08, "grad_norm": 2.0219318375835824, "learning_rate": 9.942977161013802e-06, "loss": 0.6657, "step": 738 }, { "epoch": 0.08, "grad_norm": 1.9114763004154225, "learning_rate": 9.942723365475301e-06, "loss": 0.7154, "step": 739 }, { "epoch": 0.08, "grad_norm": 2.2527373356434115, "learning_rate": 9.942469009650933e-06, "loss": 0.732, "step": 740 }, { "epoch": 0.08, "grad_norm": 2.2324878692245385, "learning_rate": 9.942214093569534e-06, "loss": 0.7158, "step": 741 }, { "epoch": 0.08, "grad_norm": 2.1365648202427594, "learning_rate": 9.941958617259994e-06, "loss": 0.722, "step": 742 }, { "epoch": 0.08, "grad_norm": 2.1565366949992963, "learning_rate": 9.941702580751278e-06, "loss": 0.7789, "step": 743 }, { "epoch": 0.08, "grad_norm": 1.9027629563938848, "learning_rate": 9.941445984072408e-06, "loss": 0.6342, "step": 744 }, { "epoch": 0.08, "grad_norm": 2.1215899854013403, "learning_rate": 9.941188827252471e-06, "loss": 0.6236, "step": 745 }, { "epoch": 0.08, "grad_norm": 2.124700681765373, "learning_rate": 9.940931110320615e-06, "loss": 0.6907, "step": 746 }, { "epoch": 0.08, "grad_norm": 1.894826319876532, "learning_rate": 9.940672833306056e-06, "loss": 0.648, "step": 747 }, { "epoch": 0.08, "grad_norm": 2.1683138583935646, "learning_rate": 9.940413996238071e-06, "loss": 0.6152, "step": 748 }, { "epoch": 0.08, "grad_norm": 2.304827301613214, "learning_rate": 9.940154599145998e-06, "loss": 0.6609, "step": 749 }, { "epoch": 0.08, "grad_norm": 2.1667628822195653, "learning_rate": 9.939894642059248e-06, "loss": 0.773, "step": 750 }, { "epoch": 0.08, "grad_norm": 2.2670965613851357, "learning_rate": 9.939634125007279e-06, "loss": 0.749, "step": 751 }, { "epoch": 0.08, "grad_norm": 1.9105308819933733, "learning_rate": 9.939373048019629e-06, "loss": 0.5988, "step": 752 }, { "epoch": 0.08, "grad_norm": 2.001684787938153, "learning_rate": 9.93911141112589e-06, "loss": 0.756, "step": 753 }, { "epoch": 0.08, "grad_norm": 1.9897912439886951, "learning_rate": 9.938849214355722e-06, "loss": 0.5951, "step": 754 }, { "epoch": 0.08, "grad_norm": 1.8260079718444404, "learning_rate": 9.938586457738844e-06, "loss": 0.6922, "step": 755 }, { "epoch": 0.08, "grad_norm": 2.0208943552103538, "learning_rate": 9.938323141305042e-06, "loss": 0.6881, "step": 756 }, { "epoch": 0.08, "grad_norm": 1.8892014681961153, "learning_rate": 9.938059265084163e-06, "loss": 0.6852, "step": 757 }, { "epoch": 0.08, "grad_norm": 2.132937380951946, "learning_rate": 9.937794829106122e-06, "loss": 0.7, "step": 758 }, { "epoch": 0.08, "grad_norm": 2.0088322043327507, "learning_rate": 9.937529833400892e-06, "loss": 0.6808, "step": 759 }, { "epoch": 0.08, "grad_norm": 2.041185119184499, "learning_rate": 9.937264277998513e-06, "loss": 0.5472, "step": 760 }, { "epoch": 0.08, "grad_norm": 1.8536310070000221, "learning_rate": 9.936998162929086e-06, "loss": 0.5643, "step": 761 }, { "epoch": 0.08, "grad_norm": 2.001083354207652, "learning_rate": 9.936731488222776e-06, "loss": 0.6498, "step": 762 }, { "epoch": 0.08, "grad_norm": 2.0706938113208717, "learning_rate": 9.936464253909817e-06, "loss": 0.6487, "step": 763 }, { "epoch": 0.08, "grad_norm": 2.0794572945560916, "learning_rate": 9.936196460020496e-06, "loss": 0.6505, "step": 764 }, { "epoch": 0.08, "grad_norm": 2.1561768544194146, "learning_rate": 9.93592810658517e-06, "loss": 0.6906, "step": 765 }, { "epoch": 0.08, "grad_norm": 2.008123268630148, "learning_rate": 9.935659193634261e-06, "loss": 0.6367, "step": 766 }, { "epoch": 0.08, "grad_norm": 2.25399791489673, "learning_rate": 9.935389721198249e-06, "loss": 0.7417, "step": 767 }, { "epoch": 0.08, "grad_norm": 1.8352434635664747, "learning_rate": 9.935119689307682e-06, "loss": 0.6074, "step": 768 }, { "epoch": 0.08, "grad_norm": 2.326860474330298, "learning_rate": 9.934849097993168e-06, "loss": 0.7976, "step": 769 }, { "epoch": 0.08, "grad_norm": 1.8668608391500925, "learning_rate": 9.934577947285382e-06, "loss": 0.7362, "step": 770 }, { "epoch": 0.08, "grad_norm": 1.877597080458074, "learning_rate": 9.934306237215057e-06, "loss": 0.5724, "step": 771 }, { "epoch": 0.08, "grad_norm": 1.8322003762305377, "learning_rate": 9.934033967812998e-06, "loss": 0.6854, "step": 772 }, { "epoch": 0.08, "grad_norm": 1.7621008758677785, "learning_rate": 9.933761139110065e-06, "loss": 0.6506, "step": 773 }, { "epoch": 0.08, "grad_norm": 2.0299854137963975, "learning_rate": 9.933487751137185e-06, "loss": 0.6442, "step": 774 }, { "epoch": 0.08, "grad_norm": 1.8777959177763561, "learning_rate": 9.93321380392535e-06, "loss": 0.7335, "step": 775 }, { "epoch": 0.08, "grad_norm": 2.3950292493331333, "learning_rate": 9.932939297505611e-06, "loss": 0.7287, "step": 776 }, { "epoch": 0.08, "grad_norm": 2.148775632603258, "learning_rate": 9.932664231909087e-06, "loss": 0.6965, "step": 777 }, { "epoch": 0.08, "grad_norm": 2.3030378873226254, "learning_rate": 9.932388607166954e-06, "loss": 0.5862, "step": 778 }, { "epoch": 0.08, "grad_norm": 1.8487026804028814, "learning_rate": 9.93211242331046e-06, "loss": 0.5834, "step": 779 }, { "epoch": 0.08, "grad_norm": 1.6659735801019722, "learning_rate": 9.931835680370912e-06, "loss": 0.6148, "step": 780 }, { "epoch": 0.08, "grad_norm": 2.0126658622092233, "learning_rate": 9.931558378379677e-06, "loss": 0.7435, "step": 781 }, { "epoch": 0.08, "grad_norm": 2.067686271560952, "learning_rate": 9.931280517368193e-06, "loss": 0.7257, "step": 782 }, { "epoch": 0.08, "grad_norm": 2.1745154175639487, "learning_rate": 9.931002097367954e-06, "loss": 0.6712, "step": 783 }, { "epoch": 0.08, "grad_norm": 2.045784496263562, "learning_rate": 9.930723118410521e-06, "loss": 0.6423, "step": 784 }, { "epoch": 0.08, "grad_norm": 2.270602416768308, "learning_rate": 9.93044358052752e-06, "loss": 0.6607, "step": 785 }, { "epoch": 0.08, "grad_norm": 1.845961952430096, "learning_rate": 9.930163483750636e-06, "loss": 0.6405, "step": 786 }, { "epoch": 0.08, "grad_norm": 1.681941760116097, "learning_rate": 9.929882828111619e-06, "loss": 0.6073, "step": 787 }, { "epoch": 0.08, "grad_norm": 2.1011423694291844, "learning_rate": 9.929601613642285e-06, "loss": 0.715, "step": 788 }, { "epoch": 0.08, "grad_norm": 1.7766205903261256, "learning_rate": 9.92931984037451e-06, "loss": 0.5952, "step": 789 }, { "epoch": 0.08, "grad_norm": 1.897836028769625, "learning_rate": 9.929037508340234e-06, "loss": 0.7663, "step": 790 }, { "epoch": 0.08, "grad_norm": 1.85405318753218, "learning_rate": 9.928754617571464e-06, "loss": 0.7752, "step": 791 }, { "epoch": 0.08, "grad_norm": 2.0696560918392723, "learning_rate": 9.928471168100264e-06, "loss": 0.5531, "step": 792 }, { "epoch": 0.08, "grad_norm": 2.2262392903553256, "learning_rate": 9.928187159958764e-06, "loss": 0.7426, "step": 793 }, { "epoch": 0.08, "grad_norm": 2.1714035118411035, "learning_rate": 9.927902593179163e-06, "loss": 0.6794, "step": 794 }, { "epoch": 0.08, "grad_norm": 2.1031096079567733, "learning_rate": 9.927617467793713e-06, "loss": 0.7313, "step": 795 }, { "epoch": 0.08, "grad_norm": 2.178237030703268, "learning_rate": 9.927331783834737e-06, "loss": 0.6139, "step": 796 }, { "epoch": 0.08, "grad_norm": 2.006402940126805, "learning_rate": 9.927045541334618e-06, "loss": 0.7167, "step": 797 }, { "epoch": 0.08, "grad_norm": 1.7349004505955714, "learning_rate": 9.926758740325803e-06, "loss": 0.5405, "step": 798 }, { "epoch": 0.08, "grad_norm": 1.7514974917690225, "learning_rate": 9.926471380840805e-06, "loss": 0.6142, "step": 799 }, { "epoch": 0.08, "grad_norm": 2.002352977374873, "learning_rate": 9.926183462912196e-06, "loss": 0.673, "step": 800 }, { "epoch": 0.08, "grad_norm": 1.820390271795529, "learning_rate": 9.92589498657261e-06, "loss": 0.5349, "step": 801 }, { "epoch": 0.08, "grad_norm": 2.061683813396355, "learning_rate": 9.925605951854754e-06, "loss": 0.7079, "step": 802 }, { "epoch": 0.08, "grad_norm": 1.9960408210755969, "learning_rate": 9.925316358791388e-06, "loss": 0.6418, "step": 803 }, { "epoch": 0.08, "grad_norm": 2.014159080687005, "learning_rate": 9.925026207415338e-06, "loss": 0.5767, "step": 804 }, { "epoch": 0.08, "grad_norm": 1.7464740551614473, "learning_rate": 9.924735497759497e-06, "loss": 0.5434, "step": 805 }, { "epoch": 0.08, "grad_norm": 1.9006992994964862, "learning_rate": 9.924444229856817e-06, "loss": 0.6631, "step": 806 }, { "epoch": 0.08, "grad_norm": 1.8335712721819453, "learning_rate": 9.924152403740315e-06, "loss": 0.6265, "step": 807 }, { "epoch": 0.08, "grad_norm": 1.9521569470070748, "learning_rate": 9.92386001944307e-06, "loss": 0.6922, "step": 808 }, { "epoch": 0.08, "grad_norm": 2.102488618677563, "learning_rate": 9.923567076998228e-06, "loss": 0.6094, "step": 809 }, { "epoch": 0.08, "grad_norm": 2.150033884300534, "learning_rate": 9.923273576438994e-06, "loss": 0.5725, "step": 810 }, { "epoch": 0.08, "grad_norm": 2.030112095841831, "learning_rate": 9.92297951779864e-06, "loss": 0.6741, "step": 811 }, { "epoch": 0.08, "grad_norm": 1.9791624948612083, "learning_rate": 9.922684901110496e-06, "loss": 0.6707, "step": 812 }, { "epoch": 0.08, "grad_norm": 2.2930407423495707, "learning_rate": 9.92238972640796e-06, "loss": 0.6664, "step": 813 }, { "epoch": 0.08, "grad_norm": 1.8906542205262489, "learning_rate": 9.922093993724492e-06, "loss": 0.6267, "step": 814 }, { "epoch": 0.08, "grad_norm": 1.9261005637706718, "learning_rate": 9.921797703093614e-06, "loss": 0.6355, "step": 815 }, { "epoch": 0.08, "grad_norm": 1.8862503970508953, "learning_rate": 9.921500854548916e-06, "loss": 0.6027, "step": 816 }, { "epoch": 0.08, "grad_norm": 2.1505482544915844, "learning_rate": 9.921203448124042e-06, "loss": 0.6235, "step": 817 }, { "epoch": 0.09, "grad_norm": 1.918340051848613, "learning_rate": 9.920905483852708e-06, "loss": 0.7459, "step": 818 }, { "epoch": 0.09, "grad_norm": 1.8334734949147398, "learning_rate": 9.920606961768689e-06, "loss": 0.628, "step": 819 }, { "epoch": 0.09, "grad_norm": 1.9869297816294038, "learning_rate": 9.920307881905824e-06, "loss": 0.6617, "step": 820 }, { "epoch": 0.09, "grad_norm": 1.9449685673150783, "learning_rate": 9.920008244298016e-06, "loss": 0.6223, "step": 821 }, { "epoch": 0.09, "grad_norm": 2.03168416029326, "learning_rate": 9.91970804897923e-06, "loss": 0.6794, "step": 822 }, { "epoch": 0.09, "grad_norm": 1.878625929884055, "learning_rate": 9.919407295983496e-06, "loss": 0.6658, "step": 823 }, { "epoch": 0.09, "grad_norm": 1.9650937939981765, "learning_rate": 9.919105985344906e-06, "loss": 0.5458, "step": 824 }, { "epoch": 0.09, "grad_norm": 2.0909683014256286, "learning_rate": 9.918804117097612e-06, "loss": 0.7894, "step": 825 }, { "epoch": 0.09, "grad_norm": 2.0613301951740173, "learning_rate": 9.918501691275837e-06, "loss": 0.6921, "step": 826 }, { "epoch": 0.09, "grad_norm": 2.0419520750382265, "learning_rate": 9.918198707913861e-06, "loss": 0.6937, "step": 827 }, { "epoch": 0.09, "grad_norm": 1.8581097977278722, "learning_rate": 9.917895167046027e-06, "loss": 0.6482, "step": 828 }, { "epoch": 0.09, "grad_norm": 1.8748461988863785, "learning_rate": 9.917591068706747e-06, "loss": 0.6154, "step": 829 }, { "epoch": 0.09, "grad_norm": 1.8925608747518752, "learning_rate": 9.917286412930489e-06, "loss": 0.6991, "step": 830 }, { "epoch": 0.09, "grad_norm": 1.8160060819942032, "learning_rate": 9.916981199751789e-06, "loss": 0.5783, "step": 831 }, { "epoch": 0.09, "grad_norm": 1.871521248122172, "learning_rate": 9.916675429205243e-06, "loss": 0.7109, "step": 832 }, { "epoch": 0.09, "grad_norm": 2.1074885386758977, "learning_rate": 9.916369101325514e-06, "loss": 0.7108, "step": 833 }, { "epoch": 0.09, "grad_norm": 2.1192414719954025, "learning_rate": 9.916062216147324e-06, "loss": 0.6341, "step": 834 }, { "epoch": 0.09, "grad_norm": 2.08239120677423, "learning_rate": 9.915754773705461e-06, "loss": 0.6375, "step": 835 }, { "epoch": 0.09, "grad_norm": 2.1316657605531093, "learning_rate": 9.915446774034776e-06, "loss": 0.7224, "step": 836 }, { "epoch": 0.09, "grad_norm": 2.281939628017067, "learning_rate": 9.915138217170184e-06, "loss": 0.7129, "step": 837 }, { "epoch": 0.09, "grad_norm": 1.8741130808070126, "learning_rate": 9.914829103146658e-06, "loss": 0.6406, "step": 838 }, { "epoch": 0.09, "grad_norm": 1.9684244785118747, "learning_rate": 9.91451943199924e-06, "loss": 0.7195, "step": 839 }, { "epoch": 0.09, "grad_norm": 2.0475888837677045, "learning_rate": 9.914209203763032e-06, "loss": 0.6547, "step": 840 }, { "epoch": 0.09, "grad_norm": 2.04921767979233, "learning_rate": 9.9138984184732e-06, "loss": 0.6901, "step": 841 }, { "epoch": 0.09, "grad_norm": 2.172629056653797, "learning_rate": 9.913587076164976e-06, "loss": 0.7042, "step": 842 }, { "epoch": 0.09, "grad_norm": 2.267660676241233, "learning_rate": 9.91327517687365e-06, "loss": 0.685, "step": 843 }, { "epoch": 0.09, "grad_norm": 1.7760733119676897, "learning_rate": 9.912962720634575e-06, "loss": 0.7247, "step": 844 }, { "epoch": 0.09, "grad_norm": 1.8798377763004066, "learning_rate": 9.912649707483174e-06, "loss": 0.6346, "step": 845 }, { "epoch": 0.09, "grad_norm": 1.869284735547129, "learning_rate": 9.91233613745493e-06, "loss": 0.6542, "step": 846 }, { "epoch": 0.09, "grad_norm": 1.926516930261669, "learning_rate": 9.912022010585385e-06, "loss": 0.5949, "step": 847 }, { "epoch": 0.09, "grad_norm": 1.972750668754731, "learning_rate": 9.911707326910145e-06, "loss": 0.5647, "step": 848 }, { "epoch": 0.09, "grad_norm": 1.7730609944376714, "learning_rate": 9.911392086464886e-06, "loss": 0.69, "step": 849 }, { "epoch": 0.09, "grad_norm": 2.0654042895948033, "learning_rate": 9.911076289285338e-06, "loss": 0.6786, "step": 850 }, { "epoch": 0.09, "grad_norm": 1.9272137533364486, "learning_rate": 9.910759935407301e-06, "loss": 0.6813, "step": 851 }, { "epoch": 0.09, "grad_norm": 2.076512024838693, "learning_rate": 9.910443024866636e-06, "loss": 0.7075, "step": 852 }, { "epoch": 0.09, "grad_norm": 1.9792218894356493, "learning_rate": 9.910125557699266e-06, "loss": 0.5437, "step": 853 }, { "epoch": 0.09, "grad_norm": 1.96480203139793, "learning_rate": 9.909807533941176e-06, "loss": 0.6791, "step": 854 }, { "epoch": 0.09, "grad_norm": 2.571498964350275, "learning_rate": 9.909488953628416e-06, "loss": 0.7562, "step": 855 }, { "epoch": 0.09, "grad_norm": 2.0928806202081334, "learning_rate": 9.909169816797102e-06, "loss": 0.6678, "step": 856 }, { "epoch": 0.09, "grad_norm": 2.1045247209467752, "learning_rate": 9.908850123483406e-06, "loss": 0.63, "step": 857 }, { "epoch": 0.09, "grad_norm": 2.0200550049787336, "learning_rate": 9.908529873723571e-06, "loss": 0.6311, "step": 858 }, { "epoch": 0.09, "grad_norm": 2.152052863379161, "learning_rate": 9.908209067553897e-06, "loss": 0.6502, "step": 859 }, { "epoch": 0.09, "grad_norm": 1.8592974529643835, "learning_rate": 9.907887705010748e-06, "loss": 0.6613, "step": 860 }, { "epoch": 0.09, "grad_norm": 1.9787376817789002, "learning_rate": 9.907565786130556e-06, "loss": 0.6377, "step": 861 }, { "epoch": 0.09, "grad_norm": 2.1440337219046173, "learning_rate": 9.907243310949806e-06, "loss": 0.7986, "step": 862 }, { "epoch": 0.09, "grad_norm": 1.8768089367088299, "learning_rate": 9.906920279505058e-06, "loss": 0.6163, "step": 863 }, { "epoch": 0.09, "grad_norm": 2.23557857803684, "learning_rate": 9.90659669183293e-06, "loss": 0.7218, "step": 864 }, { "epoch": 0.09, "grad_norm": 1.739848221698496, "learning_rate": 9.906272547970098e-06, "loss": 0.6745, "step": 865 }, { "epoch": 0.09, "grad_norm": 2.055645158811453, "learning_rate": 9.90594784795331e-06, "loss": 0.7725, "step": 866 }, { "epoch": 0.09, "grad_norm": 1.9873627014566904, "learning_rate": 9.905622591819368e-06, "loss": 0.6261, "step": 867 }, { "epoch": 0.09, "grad_norm": 2.2197485693258354, "learning_rate": 9.905296779605144e-06, "loss": 0.6184, "step": 868 }, { "epoch": 0.09, "grad_norm": 1.8826852954404683, "learning_rate": 9.904970411347574e-06, "loss": 0.5683, "step": 869 }, { "epoch": 0.09, "grad_norm": 1.8557977806798933, "learning_rate": 9.904643487083648e-06, "loss": 0.5471, "step": 870 }, { "epoch": 0.09, "grad_norm": 2.871864907218477, "learning_rate": 9.904316006850428e-06, "loss": 0.6914, "step": 871 }, { "epoch": 0.09, "grad_norm": 2.020931495944233, "learning_rate": 9.903987970685034e-06, "loss": 0.7881, "step": 872 }, { "epoch": 0.09, "grad_norm": 1.9461530000097567, "learning_rate": 9.903659378624652e-06, "loss": 0.6323, "step": 873 }, { "epoch": 0.09, "grad_norm": 1.9688203893811644, "learning_rate": 9.903330230706529e-06, "loss": 0.7003, "step": 874 }, { "epoch": 0.09, "grad_norm": 1.8043702789163878, "learning_rate": 9.903000526967977e-06, "loss": 0.6036, "step": 875 }, { "epoch": 0.09, "grad_norm": 1.950517288648671, "learning_rate": 9.90267026744637e-06, "loss": 0.6606, "step": 876 }, { "epoch": 0.09, "grad_norm": 2.053483126005183, "learning_rate": 9.902339452179142e-06, "loss": 0.6498, "step": 877 }, { "epoch": 0.09, "grad_norm": 1.8803225276792594, "learning_rate": 9.902008081203796e-06, "loss": 0.7156, "step": 878 }, { "epoch": 0.09, "grad_norm": 1.8191419159760944, "learning_rate": 9.901676154557893e-06, "loss": 0.6745, "step": 879 }, { "epoch": 0.09, "grad_norm": 1.8484125244211091, "learning_rate": 9.90134367227906e-06, "loss": 0.7064, "step": 880 }, { "epoch": 0.09, "grad_norm": 2.0590899215765233, "learning_rate": 9.901010634404983e-06, "loss": 0.656, "step": 881 }, { "epoch": 0.09, "grad_norm": 1.9746227071307032, "learning_rate": 9.900677040973418e-06, "loss": 0.7299, "step": 882 }, { "epoch": 0.09, "grad_norm": 2.073087199185832, "learning_rate": 9.900342892022176e-06, "loss": 0.7405, "step": 883 }, { "epoch": 0.09, "grad_norm": 2.0167336165515475, "learning_rate": 9.900008187589138e-06, "loss": 0.6863, "step": 884 }, { "epoch": 0.09, "grad_norm": 1.938178039873934, "learning_rate": 9.899672927712242e-06, "loss": 0.8384, "step": 885 }, { "epoch": 0.09, "grad_norm": 1.6977338968973716, "learning_rate": 9.899337112429492e-06, "loss": 0.6711, "step": 886 }, { "epoch": 0.09, "grad_norm": 1.9888290143305485, "learning_rate": 9.899000741778956e-06, "loss": 0.6642, "step": 887 }, { "epoch": 0.09, "grad_norm": 1.9917854977117735, "learning_rate": 9.898663815798761e-06, "loss": 0.6263, "step": 888 }, { "epoch": 0.09, "grad_norm": 2.0969486651793816, "learning_rate": 9.898326334527102e-06, "loss": 0.6614, "step": 889 }, { "epoch": 0.09, "grad_norm": 2.026670466094795, "learning_rate": 9.897988298002233e-06, "loss": 0.6663, "step": 890 }, { "epoch": 0.09, "grad_norm": 1.9815021235689791, "learning_rate": 9.897649706262474e-06, "loss": 0.6395, "step": 891 }, { "epoch": 0.09, "grad_norm": 2.2613612523304845, "learning_rate": 9.897310559346203e-06, "loss": 0.801, "step": 892 }, { "epoch": 0.09, "grad_norm": 1.821267540525475, "learning_rate": 9.896970857291868e-06, "loss": 0.6847, "step": 893 }, { "epoch": 0.09, "grad_norm": 2.0966029040749694, "learning_rate": 9.896630600137974e-06, "loss": 0.713, "step": 894 }, { "epoch": 0.09, "grad_norm": 1.920981843170863, "learning_rate": 9.896289787923092e-06, "loss": 0.6597, "step": 895 }, { "epoch": 0.09, "grad_norm": 1.9392140861418414, "learning_rate": 9.895948420685855e-06, "loss": 0.7259, "step": 896 }, { "epoch": 0.09, "grad_norm": 2.1670282608670166, "learning_rate": 9.895606498464956e-06, "loss": 0.7213, "step": 897 }, { "epoch": 0.09, "grad_norm": 2.0848482577609, "learning_rate": 9.895264021299158e-06, "loss": 0.7159, "step": 898 }, { "epoch": 0.09, "grad_norm": 1.716703867604487, "learning_rate": 9.894920989227282e-06, "loss": 0.5575, "step": 899 }, { "epoch": 0.09, "grad_norm": 1.7866881854548302, "learning_rate": 9.89457740228821e-06, "loss": 0.6121, "step": 900 }, { "epoch": 0.09, "grad_norm": 1.8671996440775374, "learning_rate": 9.894233260520893e-06, "loss": 0.5676, "step": 901 }, { "epoch": 0.09, "grad_norm": 1.9434408487574717, "learning_rate": 9.89388856396434e-06, "loss": 0.6724, "step": 902 }, { "epoch": 0.09, "grad_norm": 1.7936908863669354, "learning_rate": 9.893543312657623e-06, "loss": 0.6487, "step": 903 }, { "epoch": 0.09, "grad_norm": 2.027708079476974, "learning_rate": 9.89319750663988e-06, "loss": 0.6754, "step": 904 }, { "epoch": 0.09, "grad_norm": 2.0114888734872203, "learning_rate": 9.892851145950308e-06, "loss": 0.6927, "step": 905 }, { "epoch": 0.09, "grad_norm": 1.924424371946275, "learning_rate": 9.89250423062817e-06, "loss": 0.6884, "step": 906 }, { "epoch": 0.09, "grad_norm": 1.8961310755643144, "learning_rate": 9.892156760712793e-06, "loss": 0.6769, "step": 907 }, { "epoch": 0.09, "grad_norm": 1.9632794427660414, "learning_rate": 9.891808736243563e-06, "loss": 0.6999, "step": 908 }, { "epoch": 0.09, "grad_norm": 2.495602941098346, "learning_rate": 9.89146015725993e-06, "loss": 0.7327, "step": 909 }, { "epoch": 0.09, "grad_norm": 2.3402932804708834, "learning_rate": 9.891111023801405e-06, "loss": 0.636, "step": 910 }, { "epoch": 0.09, "grad_norm": 1.9472235512569598, "learning_rate": 9.89076133590757e-06, "loss": 0.6924, "step": 911 }, { "epoch": 0.09, "grad_norm": 1.9751978430386354, "learning_rate": 9.89041109361806e-06, "loss": 0.6588, "step": 912 }, { "epoch": 0.09, "grad_norm": 1.9931745510211398, "learning_rate": 9.89006029697258e-06, "loss": 0.7378, "step": 913 }, { "epoch": 0.1, "grad_norm": 1.9711240247848647, "learning_rate": 9.88970894601089e-06, "loss": 0.7044, "step": 914 }, { "epoch": 0.1, "grad_norm": 1.945758925356476, "learning_rate": 9.889357040772822e-06, "loss": 0.7053, "step": 915 }, { "epoch": 0.1, "grad_norm": 1.903319178019049, "learning_rate": 9.889004581298265e-06, "loss": 0.6292, "step": 916 }, { "epoch": 0.1, "grad_norm": 1.9525247895967082, "learning_rate": 9.888651567627173e-06, "loss": 0.5822, "step": 917 }, { "epoch": 0.1, "grad_norm": 2.0574072003726425, "learning_rate": 9.88829799979956e-06, "loss": 0.6102, "step": 918 }, { "epoch": 0.1, "grad_norm": 2.044284692228813, "learning_rate": 9.887943877855505e-06, "loss": 0.6571, "step": 919 }, { "epoch": 0.1, "grad_norm": 1.5942595611222257, "learning_rate": 9.887589201835154e-06, "loss": 0.5227, "step": 920 }, { "epoch": 0.1, "grad_norm": 1.8786375947915854, "learning_rate": 9.88723397177871e-06, "loss": 0.586, "step": 921 }, { "epoch": 0.1, "grad_norm": 1.8615389080365194, "learning_rate": 9.886878187726435e-06, "loss": 0.5845, "step": 922 }, { "epoch": 0.1, "grad_norm": 2.064836397948735, "learning_rate": 9.886521849718665e-06, "loss": 0.614, "step": 923 }, { "epoch": 0.1, "grad_norm": 1.8259649534315854, "learning_rate": 9.886164957795792e-06, "loss": 0.5673, "step": 924 }, { "epoch": 0.1, "grad_norm": 1.7726574431040119, "learning_rate": 9.885807511998269e-06, "loss": 0.6505, "step": 925 }, { "epoch": 0.1, "grad_norm": 1.9798645528179286, "learning_rate": 9.885449512366617e-06, "loss": 0.7142, "step": 926 }, { "epoch": 0.1, "grad_norm": 1.7854622151133066, "learning_rate": 9.885090958941416e-06, "loss": 0.6493, "step": 927 }, { "epoch": 0.1, "grad_norm": 1.9520396817553052, "learning_rate": 9.884731851763313e-06, "loss": 0.5635, "step": 928 }, { "epoch": 0.1, "grad_norm": 2.002592135377459, "learning_rate": 9.884372190873011e-06, "loss": 0.7452, "step": 929 }, { "epoch": 0.1, "grad_norm": 1.7836620496269653, "learning_rate": 9.88401197631128e-06, "loss": 0.6534, "step": 930 }, { "epoch": 0.1, "grad_norm": 1.8592328756550733, "learning_rate": 9.883651208118956e-06, "loss": 0.6744, "step": 931 }, { "epoch": 0.1, "grad_norm": 1.8997837483930826, "learning_rate": 9.88328988633693e-06, "loss": 0.7845, "step": 932 }, { "epoch": 0.1, "grad_norm": 2.1336391293428023, "learning_rate": 9.882928011006163e-06, "loss": 0.5957, "step": 933 }, { "epoch": 0.1, "grad_norm": 1.8537580965801874, "learning_rate": 9.882565582167673e-06, "loss": 0.6615, "step": 934 }, { "epoch": 0.1, "grad_norm": 1.857686711318004, "learning_rate": 9.882202599862545e-06, "loss": 0.6753, "step": 935 }, { "epoch": 0.1, "grad_norm": 1.9365530494057581, "learning_rate": 9.881839064131925e-06, "loss": 0.6454, "step": 936 }, { "epoch": 0.1, "grad_norm": 2.0038963286408062, "learning_rate": 9.88147497501702e-06, "loss": 0.6664, "step": 937 }, { "epoch": 0.1, "grad_norm": 2.3477040250832326, "learning_rate": 9.881110332559104e-06, "loss": 0.7557, "step": 938 }, { "epoch": 0.1, "grad_norm": 2.0700186962715974, "learning_rate": 9.88074513679951e-06, "loss": 0.6809, "step": 939 }, { "epoch": 0.1, "grad_norm": 1.7916439421510102, "learning_rate": 9.880379387779637e-06, "loss": 0.5874, "step": 940 }, { "epoch": 0.1, "grad_norm": 2.0284864284873567, "learning_rate": 9.880013085540942e-06, "loss": 0.7315, "step": 941 }, { "epoch": 0.1, "grad_norm": 2.079999840493714, "learning_rate": 9.879646230124949e-06, "loss": 0.7182, "step": 942 }, { "epoch": 0.1, "grad_norm": 1.981513474191177, "learning_rate": 9.879278821573241e-06, "loss": 0.6527, "step": 943 }, { "epoch": 0.1, "grad_norm": 2.082383292274721, "learning_rate": 9.87891085992747e-06, "loss": 0.6105, "step": 944 }, { "epoch": 0.1, "grad_norm": 1.772693746109168, "learning_rate": 9.878542345229342e-06, "loss": 0.6322, "step": 945 }, { "epoch": 0.1, "grad_norm": 1.7969774735537083, "learning_rate": 9.878173277520636e-06, "loss": 0.7354, "step": 946 }, { "epoch": 0.1, "grad_norm": 1.7088315539160064, "learning_rate": 9.877803656843182e-06, "loss": 0.6111, "step": 947 }, { "epoch": 0.1, "grad_norm": 2.0378349193456065, "learning_rate": 9.877433483238881e-06, "loss": 0.6576, "step": 948 }, { "epoch": 0.1, "grad_norm": 1.9073383214298338, "learning_rate": 9.877062756749694e-06, "loss": 0.7225, "step": 949 }, { "epoch": 0.1, "grad_norm": 1.9049859401892608, "learning_rate": 9.876691477417644e-06, "loss": 0.6158, "step": 950 }, { "epoch": 0.1, "grad_norm": 1.8083745188555216, "learning_rate": 9.876319645284821e-06, "loss": 0.6074, "step": 951 }, { "epoch": 0.1, "grad_norm": 2.0656455996647165, "learning_rate": 9.875947260393371e-06, "loss": 0.6854, "step": 952 }, { "epoch": 0.1, "grad_norm": 2.1464444197411945, "learning_rate": 9.875574322785508e-06, "loss": 0.6364, "step": 953 }, { "epoch": 0.1, "grad_norm": 1.8324837012367032, "learning_rate": 9.875200832503505e-06, "loss": 0.6757, "step": 954 }, { "epoch": 0.1, "grad_norm": 1.9747983267467775, "learning_rate": 9.8748267895897e-06, "loss": 0.7113, "step": 955 }, { "epoch": 0.1, "grad_norm": 1.7988094223419764, "learning_rate": 9.874452194086492e-06, "loss": 0.5959, "step": 956 }, { "epoch": 0.1, "grad_norm": 2.297513189742376, "learning_rate": 9.874077046036345e-06, "loss": 0.71, "step": 957 }, { "epoch": 0.1, "grad_norm": 2.0557038443885123, "learning_rate": 9.873701345481784e-06, "loss": 0.7535, "step": 958 }, { "epoch": 0.1, "grad_norm": 2.1326435760926987, "learning_rate": 9.873325092465395e-06, "loss": 0.7603, "step": 959 }, { "epoch": 0.1, "grad_norm": 1.980369170639488, "learning_rate": 9.872948287029833e-06, "loss": 0.6861, "step": 960 }, { "epoch": 0.1, "grad_norm": 2.004930978839084, "learning_rate": 9.872570929217804e-06, "loss": 0.6836, "step": 961 }, { "epoch": 0.1, "grad_norm": 2.0397373283212565, "learning_rate": 9.87219301907209e-06, "loss": 0.6653, "step": 962 }, { "epoch": 0.1, "grad_norm": 1.932230567382412, "learning_rate": 9.871814556635525e-06, "loss": 0.674, "step": 963 }, { "epoch": 0.1, "grad_norm": 1.9015175721561417, "learning_rate": 9.871435541951011e-06, "loss": 0.6211, "step": 964 }, { "epoch": 0.1, "grad_norm": 1.8075315769084097, "learning_rate": 9.871055975061514e-06, "loss": 0.7, "step": 965 }, { "epoch": 0.1, "grad_norm": 1.782486219766704, "learning_rate": 9.870675856010058e-06, "loss": 0.7312, "step": 966 }, { "epoch": 0.1, "grad_norm": 1.9553211188111437, "learning_rate": 9.87029518483973e-06, "loss": 0.6937, "step": 967 }, { "epoch": 0.1, "grad_norm": 2.077651871787083, "learning_rate": 9.869913961593685e-06, "loss": 0.7183, "step": 968 }, { "epoch": 0.1, "grad_norm": 1.8307112275173123, "learning_rate": 9.869532186315134e-06, "loss": 0.6975, "step": 969 }, { "epoch": 0.1, "grad_norm": 2.1319077853591404, "learning_rate": 9.869149859047355e-06, "loss": 0.6904, "step": 970 }, { "epoch": 0.1, "grad_norm": 2.1805511045289396, "learning_rate": 9.868766979833686e-06, "loss": 0.6585, "step": 971 }, { "epoch": 0.1, "grad_norm": 1.9877639959778115, "learning_rate": 9.868383548717529e-06, "loss": 0.6813, "step": 972 }, { "epoch": 0.1, "grad_norm": 2.4334437231614343, "learning_rate": 9.867999565742348e-06, "loss": 0.7393, "step": 973 }, { "epoch": 0.1, "grad_norm": 1.7359878345550932, "learning_rate": 9.867615030951668e-06, "loss": 0.6564, "step": 974 }, { "epoch": 0.1, "grad_norm": 1.8375121930806249, "learning_rate": 9.86722994438908e-06, "loss": 0.7168, "step": 975 }, { "epoch": 0.1, "grad_norm": 2.7151473512488256, "learning_rate": 9.866844306098238e-06, "loss": 0.6273, "step": 976 }, { "epoch": 0.1, "grad_norm": 1.830403049133302, "learning_rate": 9.866458116122852e-06, "loss": 0.6368, "step": 977 }, { "epoch": 0.1, "grad_norm": 1.970209094089418, "learning_rate": 9.866071374506701e-06, "loss": 0.769, "step": 978 }, { "epoch": 0.1, "grad_norm": 2.141400584070093, "learning_rate": 9.865684081293624e-06, "loss": 0.679, "step": 979 }, { "epoch": 0.1, "grad_norm": 1.690775547376312, "learning_rate": 9.865296236527523e-06, "loss": 0.6908, "step": 980 }, { "epoch": 0.1, "grad_norm": 2.043177681315642, "learning_rate": 9.86490784025236e-06, "loss": 0.6868, "step": 981 }, { "epoch": 0.1, "grad_norm": 1.833251578843401, "learning_rate": 9.864518892512167e-06, "loss": 0.6162, "step": 982 }, { "epoch": 0.1, "grad_norm": 2.1456831160855403, "learning_rate": 9.86412939335103e-06, "loss": 0.7424, "step": 983 }, { "epoch": 0.1, "grad_norm": 2.0893900408271007, "learning_rate": 9.8637393428131e-06, "loss": 0.6559, "step": 984 }, { "epoch": 0.1, "grad_norm": 2.3334547598341455, "learning_rate": 9.863348740942595e-06, "loss": 0.6998, "step": 985 }, { "epoch": 0.1, "grad_norm": 1.9053267391294475, "learning_rate": 9.86295758778379e-06, "loss": 0.5793, "step": 986 }, { "epoch": 0.1, "grad_norm": 2.053265187490565, "learning_rate": 9.862565883381025e-06, "loss": 0.6087, "step": 987 }, { "epoch": 0.1, "grad_norm": 1.8738984376187984, "learning_rate": 9.862173627778699e-06, "loss": 0.6344, "step": 988 }, { "epoch": 0.1, "grad_norm": 1.9312070177429788, "learning_rate": 9.861780821021282e-06, "loss": 0.6113, "step": 989 }, { "epoch": 0.1, "grad_norm": 2.0840209825648373, "learning_rate": 9.861387463153295e-06, "loss": 0.7107, "step": 990 }, { "epoch": 0.1, "grad_norm": 2.053479103718914, "learning_rate": 9.860993554219333e-06, "loss": 0.6901, "step": 991 }, { "epoch": 0.1, "grad_norm": 2.3013383389382795, "learning_rate": 9.860599094264043e-06, "loss": 0.6469, "step": 992 }, { "epoch": 0.1, "grad_norm": 1.8821636541147546, "learning_rate": 9.860204083332142e-06, "loss": 0.7016, "step": 993 }, { "epoch": 0.1, "grad_norm": 1.871488491257933, "learning_rate": 9.859808521468404e-06, "loss": 0.6663, "step": 994 }, { "epoch": 0.1, "grad_norm": 1.8671302354097286, "learning_rate": 9.859412408717672e-06, "loss": 0.67, "step": 995 }, { "epoch": 0.1, "grad_norm": 2.1881384088375335, "learning_rate": 9.859015745124844e-06, "loss": 0.5869, "step": 996 }, { "epoch": 0.1, "grad_norm": 1.8839529894787386, "learning_rate": 9.858618530734887e-06, "loss": 0.6813, "step": 997 }, { "epoch": 0.1, "grad_norm": 2.024864643623028, "learning_rate": 9.858220765592828e-06, "loss": 0.584, "step": 998 }, { "epoch": 0.1, "grad_norm": 1.8817794170694133, "learning_rate": 9.857822449743752e-06, "loss": 0.6694, "step": 999 }, { "epoch": 0.1, "grad_norm": 2.3342042069446327, "learning_rate": 9.857423583232812e-06, "loss": 0.8106, "step": 1000 }, { "epoch": 0.1, "grad_norm": 1.7941891959216587, "learning_rate": 9.857024166105224e-06, "loss": 0.6335, "step": 1001 }, { "epoch": 0.1, "grad_norm": 2.181704718868215, "learning_rate": 9.856624198406262e-06, "loss": 0.6525, "step": 1002 }, { "epoch": 0.1, "grad_norm": 1.908208488414037, "learning_rate": 9.856223680181267e-06, "loss": 0.6317, "step": 1003 }, { "epoch": 0.1, "grad_norm": 2.517562370686754, "learning_rate": 9.855822611475636e-06, "loss": 0.6677, "step": 1004 }, { "epoch": 0.1, "grad_norm": 1.755038812115677, "learning_rate": 9.855420992334836e-06, "loss": 0.6095, "step": 1005 }, { "epoch": 0.1, "grad_norm": 2.0790618780998917, "learning_rate": 9.85501882280439e-06, "loss": 0.7459, "step": 1006 }, { "epoch": 0.1, "grad_norm": 1.9235236943259157, "learning_rate": 9.85461610292989e-06, "loss": 0.6692, "step": 1007 }, { "epoch": 0.1, "grad_norm": 1.6253316092641337, "learning_rate": 9.854212832756984e-06, "loss": 0.6678, "step": 1008 }, { "epoch": 0.1, "grad_norm": 2.117501284290816, "learning_rate": 9.853809012331384e-06, "loss": 0.7446, "step": 1009 }, { "epoch": 0.1, "grad_norm": 2.156750435017462, "learning_rate": 9.853404641698866e-06, "loss": 0.6416, "step": 1010 }, { "epoch": 0.11, "grad_norm": 1.7000772300008768, "learning_rate": 9.85299972090527e-06, "loss": 0.6848, "step": 1011 }, { "epoch": 0.11, "grad_norm": 1.7492572101693626, "learning_rate": 9.852594249996494e-06, "loss": 0.5989, "step": 1012 }, { "epoch": 0.11, "grad_norm": 1.9475380721456124, "learning_rate": 9.852188229018502e-06, "loss": 0.6094, "step": 1013 }, { "epoch": 0.11, "grad_norm": 1.923991064008745, "learning_rate": 9.851781658017317e-06, "loss": 0.6641, "step": 1014 }, { "epoch": 0.11, "grad_norm": 2.2032915876519743, "learning_rate": 9.851374537039027e-06, "loss": 0.6064, "step": 1015 }, { "epoch": 0.11, "grad_norm": 2.112853405985047, "learning_rate": 9.850966866129779e-06, "loss": 0.6987, "step": 1016 }, { "epoch": 0.11, "grad_norm": 2.257447982728473, "learning_rate": 9.85055864533579e-06, "loss": 0.6616, "step": 1017 }, { "epoch": 0.11, "grad_norm": 1.9407354984677891, "learning_rate": 9.85014987470333e-06, "loss": 0.618, "step": 1018 }, { "epoch": 0.11, "grad_norm": 2.0091396799893797, "learning_rate": 9.849740554278738e-06, "loss": 0.7182, "step": 1019 }, { "epoch": 0.11, "grad_norm": 2.0832354129855615, "learning_rate": 9.849330684108409e-06, "loss": 0.6517, "step": 1020 }, { "epoch": 0.11, "grad_norm": 2.1550178340245205, "learning_rate": 9.848920264238809e-06, "loss": 0.8038, "step": 1021 }, { "epoch": 0.11, "grad_norm": 1.8398166027965528, "learning_rate": 9.84850929471646e-06, "loss": 0.5977, "step": 1022 }, { "epoch": 0.11, "grad_norm": 2.4712742132470282, "learning_rate": 9.848097775587944e-06, "loss": 0.7969, "step": 1023 }, { "epoch": 0.11, "grad_norm": 1.9800854460786408, "learning_rate": 9.847685706899913e-06, "loss": 0.6424, "step": 1024 }, { "epoch": 0.11, "grad_norm": 2.160887630420676, "learning_rate": 9.847273088699077e-06, "loss": 0.5793, "step": 1025 }, { "epoch": 0.11, "grad_norm": 1.7402490160067203, "learning_rate": 9.846859921032207e-06, "loss": 0.6306, "step": 1026 }, { "epoch": 0.11, "grad_norm": 1.9858410317423765, "learning_rate": 9.846446203946139e-06, "loss": 0.6972, "step": 1027 }, { "epoch": 0.11, "grad_norm": 1.7681585176033914, "learning_rate": 9.84603193748777e-06, "loss": 0.689, "step": 1028 }, { "epoch": 0.11, "grad_norm": 1.8047938502571725, "learning_rate": 9.84561712170406e-06, "loss": 0.5521, "step": 1029 }, { "epoch": 0.11, "grad_norm": 2.0028920908430434, "learning_rate": 9.84520175664203e-06, "loss": 0.693, "step": 1030 }, { "epoch": 0.11, "grad_norm": 2.074323617732677, "learning_rate": 9.844785842348764e-06, "loss": 0.7734, "step": 1031 }, { "epoch": 0.11, "grad_norm": 1.7873634371638705, "learning_rate": 9.844369378871409e-06, "loss": 0.5996, "step": 1032 }, { "epoch": 0.11, "grad_norm": 1.8578006878258289, "learning_rate": 9.84395236625717e-06, "loss": 0.5908, "step": 1033 }, { "epoch": 0.11, "grad_norm": 1.9206170170554828, "learning_rate": 9.843534804553323e-06, "loss": 0.6535, "step": 1034 }, { "epoch": 0.11, "grad_norm": 1.8725064621105443, "learning_rate": 9.843116693807199e-06, "loss": 0.5975, "step": 1035 }, { "epoch": 0.11, "grad_norm": 2.2188161598759315, "learning_rate": 9.842698034066192e-06, "loss": 0.7365, "step": 1036 }, { "epoch": 0.11, "grad_norm": 1.921338516012938, "learning_rate": 9.842278825377761e-06, "loss": 0.7024, "step": 1037 }, { "epoch": 0.11, "grad_norm": 2.2716226482375554, "learning_rate": 9.841859067789425e-06, "loss": 0.6117, "step": 1038 }, { "epoch": 0.11, "grad_norm": 1.8260022112754144, "learning_rate": 9.841438761348766e-06, "loss": 0.5538, "step": 1039 }, { "epoch": 0.11, "grad_norm": 1.8908219800095016, "learning_rate": 9.841017906103427e-06, "loss": 0.7356, "step": 1040 }, { "epoch": 0.11, "grad_norm": 2.021688449597609, "learning_rate": 9.840596502101117e-06, "loss": 0.6077, "step": 1041 }, { "epoch": 0.11, "grad_norm": 2.1832877381909452, "learning_rate": 9.840174549389603e-06, "loss": 0.6493, "step": 1042 }, { "epoch": 0.11, "grad_norm": 2.174239711824139, "learning_rate": 9.839752048016714e-06, "loss": 0.633, "step": 1043 }, { "epoch": 0.11, "grad_norm": 2.0734549390443435, "learning_rate": 9.839328998030347e-06, "loss": 0.7503, "step": 1044 }, { "epoch": 0.11, "grad_norm": 1.7587601682627316, "learning_rate": 9.838905399478453e-06, "loss": 0.6627, "step": 1045 }, { "epoch": 0.11, "grad_norm": 2.254845491574179, "learning_rate": 9.838481252409053e-06, "loss": 0.7206, "step": 1046 }, { "epoch": 0.11, "grad_norm": 2.0682471001717357, "learning_rate": 9.838056556870223e-06, "loss": 0.6788, "step": 1047 }, { "epoch": 0.11, "grad_norm": 1.9779991745810939, "learning_rate": 9.837631312910107e-06, "loss": 0.6749, "step": 1048 }, { "epoch": 0.11, "grad_norm": 1.9718255224988028, "learning_rate": 9.837205520576907e-06, "loss": 0.6956, "step": 1049 }, { "epoch": 0.11, "grad_norm": 1.8145469518157462, "learning_rate": 9.836779179918891e-06, "loss": 0.6854, "step": 1050 }, { "epoch": 0.11, "grad_norm": 1.9375617890849886, "learning_rate": 9.836352290984386e-06, "loss": 0.6184, "step": 1051 }, { "epoch": 0.11, "grad_norm": 1.9338031264153726, "learning_rate": 9.835924853821783e-06, "loss": 0.5955, "step": 1052 }, { "epoch": 0.11, "grad_norm": 1.9522052631485587, "learning_rate": 9.835496868479533e-06, "loss": 0.5681, "step": 1053 }, { "epoch": 0.11, "grad_norm": 1.8700014507217293, "learning_rate": 9.835068335006153e-06, "loss": 0.6794, "step": 1054 }, { "epoch": 0.11, "grad_norm": 1.9556206221572114, "learning_rate": 9.834639253450217e-06, "loss": 0.5931, "step": 1055 }, { "epoch": 0.11, "grad_norm": 2.1358836379045387, "learning_rate": 9.834209623860367e-06, "loss": 0.7265, "step": 1056 }, { "epoch": 0.11, "grad_norm": 1.8328538254509168, "learning_rate": 9.8337794462853e-06, "loss": 0.6213, "step": 1057 }, { "epoch": 0.11, "grad_norm": 1.9064305026210864, "learning_rate": 9.833348720773782e-06, "loss": 0.6486, "step": 1058 }, { "epoch": 0.11, "grad_norm": 1.887679929354486, "learning_rate": 9.832917447374637e-06, "loss": 0.7484, "step": 1059 }, { "epoch": 0.11, "grad_norm": 2.012238919175227, "learning_rate": 9.832485626136751e-06, "loss": 0.6625, "step": 1060 }, { "epoch": 0.11, "grad_norm": 2.0410737340872145, "learning_rate": 9.832053257109077e-06, "loss": 0.7055, "step": 1061 }, { "epoch": 0.11, "grad_norm": 1.7772590663935988, "learning_rate": 9.831620340340626e-06, "loss": 0.6214, "step": 1062 }, { "epoch": 0.11, "grad_norm": 1.8966041179438153, "learning_rate": 9.831186875880467e-06, "loss": 0.6835, "step": 1063 }, { "epoch": 0.11, "grad_norm": 1.8203185091064622, "learning_rate": 9.830752863777741e-06, "loss": 0.6473, "step": 1064 }, { "epoch": 0.11, "grad_norm": 1.993896863641108, "learning_rate": 9.830318304081642e-06, "loss": 0.7101, "step": 1065 }, { "epoch": 0.11, "grad_norm": 2.2042609661267836, "learning_rate": 9.829883196841433e-06, "loss": 0.6174, "step": 1066 }, { "epoch": 0.11, "grad_norm": 2.0253946730893886, "learning_rate": 9.829447542106434e-06, "loss": 0.7096, "step": 1067 }, { "epoch": 0.11, "grad_norm": 1.8318754462273392, "learning_rate": 9.829011339926028e-06, "loss": 0.662, "step": 1068 }, { "epoch": 0.11, "grad_norm": 1.9474284559392272, "learning_rate": 9.828574590349662e-06, "loss": 0.7439, "step": 1069 }, { "epoch": 0.11, "grad_norm": 2.07339427770848, "learning_rate": 9.828137293426844e-06, "loss": 0.7443, "step": 1070 }, { "epoch": 0.11, "grad_norm": 1.9755027213686065, "learning_rate": 9.827699449207147e-06, "loss": 0.6352, "step": 1071 }, { "epoch": 0.11, "grad_norm": 1.9897415411397834, "learning_rate": 9.827261057740198e-06, "loss": 0.7521, "step": 1072 }, { "epoch": 0.11, "grad_norm": 2.04504258074226, "learning_rate": 9.826822119075694e-06, "loss": 0.7339, "step": 1073 }, { "epoch": 0.11, "grad_norm": 2.2651880336477728, "learning_rate": 9.826382633263392e-06, "loss": 0.7575, "step": 1074 }, { "epoch": 0.11, "grad_norm": 2.0751191341447877, "learning_rate": 9.825942600353107e-06, "loss": 0.6432, "step": 1075 }, { "epoch": 0.11, "grad_norm": 1.9471220241261464, "learning_rate": 9.825502020394724e-06, "loss": 0.6165, "step": 1076 }, { "epoch": 0.11, "grad_norm": 2.0197119177571525, "learning_rate": 9.82506089343818e-06, "loss": 0.6898, "step": 1077 }, { "epoch": 0.11, "grad_norm": 2.2111513000318053, "learning_rate": 9.824619219533482e-06, "loss": 0.8225, "step": 1078 }, { "epoch": 0.11, "grad_norm": 2.072255616698154, "learning_rate": 9.824176998730698e-06, "loss": 0.5451, "step": 1079 }, { "epoch": 0.11, "grad_norm": 2.0203232264892574, "learning_rate": 9.823734231079953e-06, "loss": 0.6907, "step": 1080 }, { "epoch": 0.11, "grad_norm": 1.9978646542749907, "learning_rate": 9.823290916631438e-06, "loss": 0.7582, "step": 1081 }, { "epoch": 0.11, "grad_norm": 1.997655399908216, "learning_rate": 9.822847055435407e-06, "loss": 0.6675, "step": 1082 }, { "epoch": 0.11, "grad_norm": 2.600826888649854, "learning_rate": 9.822402647542173e-06, "loss": 0.6995, "step": 1083 }, { "epoch": 0.11, "grad_norm": 1.8108675661821911, "learning_rate": 9.82195769300211e-06, "loss": 0.7427, "step": 1084 }, { "epoch": 0.11, "grad_norm": 2.0866686825001666, "learning_rate": 9.821512191865662e-06, "loss": 0.653, "step": 1085 }, { "epoch": 0.11, "grad_norm": 1.9633193896611827, "learning_rate": 9.821066144183322e-06, "loss": 0.6388, "step": 1086 }, { "epoch": 0.11, "grad_norm": 1.7284744819995272, "learning_rate": 9.820619550005656e-06, "loss": 0.5978, "step": 1087 }, { "epoch": 0.11, "grad_norm": 1.9204126261783792, "learning_rate": 9.820172409383288e-06, "loss": 0.6652, "step": 1088 }, { "epoch": 0.11, "grad_norm": 1.9950911805464604, "learning_rate": 9.819724722366903e-06, "loss": 0.5597, "step": 1089 }, { "epoch": 0.11, "grad_norm": 1.9588977885654488, "learning_rate": 9.81927648900725e-06, "loss": 0.6817, "step": 1090 }, { "epoch": 0.11, "grad_norm": 1.9645877951323167, "learning_rate": 9.818827709355138e-06, "loss": 0.6927, "step": 1091 }, { "epoch": 0.11, "grad_norm": 1.9913277378900867, "learning_rate": 9.818378383461438e-06, "loss": 0.5648, "step": 1092 }, { "epoch": 0.11, "grad_norm": 2.167093446329645, "learning_rate": 9.817928511377085e-06, "loss": 0.721, "step": 1093 }, { "epoch": 0.11, "grad_norm": 2.006202235369134, "learning_rate": 9.817478093153074e-06, "loss": 0.6331, "step": 1094 }, { "epoch": 0.11, "grad_norm": 1.8430866473192573, "learning_rate": 9.817027128840462e-06, "loss": 0.6632, "step": 1095 }, { "epoch": 0.11, "grad_norm": 2.1072808477952307, "learning_rate": 9.816575618490368e-06, "loss": 0.677, "step": 1096 }, { "epoch": 0.11, "grad_norm": 1.8674328585658524, "learning_rate": 9.816123562153975e-06, "loss": 0.6894, "step": 1097 }, { "epoch": 0.11, "grad_norm": 1.9548566740415998, "learning_rate": 9.815670959882526e-06, "loss": 0.6354, "step": 1098 }, { "epoch": 0.11, "grad_norm": 1.958662754789633, "learning_rate": 9.815217811727325e-06, "loss": 0.6949, "step": 1099 }, { "epoch": 0.11, "grad_norm": 2.0309136527346965, "learning_rate": 9.814764117739737e-06, "loss": 0.6162, "step": 1100 }, { "epoch": 0.11, "grad_norm": 1.866529266557843, "learning_rate": 9.814309877971195e-06, "loss": 0.7048, "step": 1101 }, { "epoch": 0.11, "grad_norm": 1.7941888834949336, "learning_rate": 9.813855092473189e-06, "loss": 0.6341, "step": 1102 }, { "epoch": 0.11, "grad_norm": 1.8740743463137512, "learning_rate": 9.813399761297267e-06, "loss": 0.6796, "step": 1103 }, { "epoch": 0.11, "grad_norm": 1.9916876162956634, "learning_rate": 9.81294388449505e-06, "loss": 0.7041, "step": 1104 }, { "epoch": 0.11, "grad_norm": 2.328375158894037, "learning_rate": 9.812487462118207e-06, "loss": 0.7484, "step": 1105 }, { "epoch": 0.11, "grad_norm": 2.0851750400015714, "learning_rate": 9.812030494218484e-06, "loss": 0.6148, "step": 1106 }, { "epoch": 0.12, "grad_norm": 2.0721317087262685, "learning_rate": 9.811572980847674e-06, "loss": 0.6666, "step": 1107 }, { "epoch": 0.12, "grad_norm": 2.3735592824344534, "learning_rate": 9.811114922057642e-06, "loss": 0.7982, "step": 1108 }, { "epoch": 0.12, "grad_norm": 2.0309673267983404, "learning_rate": 9.810656317900312e-06, "loss": 0.5905, "step": 1109 }, { "epoch": 0.12, "grad_norm": 1.9784198177751062, "learning_rate": 9.810197168427667e-06, "loss": 0.7343, "step": 1110 }, { "epoch": 0.12, "grad_norm": 1.8042340469617641, "learning_rate": 9.809737473691758e-06, "loss": 0.6254, "step": 1111 }, { "epoch": 0.12, "grad_norm": 2.0968682452666974, "learning_rate": 9.80927723374469e-06, "loss": 0.6969, "step": 1112 }, { "epoch": 0.12, "grad_norm": 2.354904562448494, "learning_rate": 9.808816448638636e-06, "loss": 0.7833, "step": 1113 }, { "epoch": 0.12, "grad_norm": 2.163126833363332, "learning_rate": 9.808355118425827e-06, "loss": 0.6467, "step": 1114 }, { "epoch": 0.12, "grad_norm": 2.0346429879262433, "learning_rate": 9.807893243158562e-06, "loss": 0.6335, "step": 1115 }, { "epoch": 0.12, "grad_norm": 2.1759311495573788, "learning_rate": 9.80743082288919e-06, "loss": 0.7353, "step": 1116 }, { "epoch": 0.12, "grad_norm": 1.8028313593352352, "learning_rate": 9.806967857670135e-06, "loss": 0.609, "step": 1117 }, { "epoch": 0.12, "grad_norm": 1.7536865880903942, "learning_rate": 9.806504347553874e-06, "loss": 0.6642, "step": 1118 }, { "epoch": 0.12, "grad_norm": 1.9892546388260939, "learning_rate": 9.80604029259295e-06, "loss": 0.7288, "step": 1119 }, { "epoch": 0.12, "grad_norm": 1.9316832882932709, "learning_rate": 9.805575692839964e-06, "loss": 0.5975, "step": 1120 }, { "epoch": 0.12, "grad_norm": 1.8501490970207397, "learning_rate": 9.805110548347583e-06, "loss": 0.7319, "step": 1121 }, { "epoch": 0.12, "grad_norm": 1.9680500445329732, "learning_rate": 9.804644859168534e-06, "loss": 0.697, "step": 1122 }, { "epoch": 0.12, "grad_norm": 2.109835278999002, "learning_rate": 9.804178625355602e-06, "loss": 0.6059, "step": 1123 }, { "epoch": 0.12, "grad_norm": 2.145342051858608, "learning_rate": 9.803711846961641e-06, "loss": 0.7171, "step": 1124 }, { "epoch": 0.12, "grad_norm": 2.088705701522735, "learning_rate": 9.803244524039564e-06, "loss": 0.745, "step": 1125 }, { "epoch": 0.12, "grad_norm": 1.9112491216751644, "learning_rate": 9.802776656642341e-06, "loss": 0.6755, "step": 1126 }, { "epoch": 0.12, "grad_norm": 2.317044885396738, "learning_rate": 9.80230824482301e-06, "loss": 0.7217, "step": 1127 }, { "epoch": 0.12, "grad_norm": 2.359531160259817, "learning_rate": 9.801839288634664e-06, "loss": 0.7032, "step": 1128 }, { "epoch": 0.12, "grad_norm": 1.8376679215636302, "learning_rate": 9.801369788130468e-06, "loss": 0.5589, "step": 1129 }, { "epoch": 0.12, "grad_norm": 2.038462127646075, "learning_rate": 9.800899743363638e-06, "loss": 0.5897, "step": 1130 }, { "epoch": 0.12, "grad_norm": 1.958505348413095, "learning_rate": 9.80042915438746e-06, "loss": 0.6568, "step": 1131 }, { "epoch": 0.12, "grad_norm": 1.9417917565556433, "learning_rate": 9.799958021255275e-06, "loss": 0.7257, "step": 1132 }, { "epoch": 0.12, "grad_norm": 1.9224737730859809, "learning_rate": 9.799486344020488e-06, "loss": 0.6253, "step": 1133 }, { "epoch": 0.12, "grad_norm": 1.8633516811392656, "learning_rate": 9.79901412273657e-06, "loss": 0.6349, "step": 1134 }, { "epoch": 0.12, "grad_norm": 2.020034769094531, "learning_rate": 9.798541357457045e-06, "loss": 0.5633, "step": 1135 }, { "epoch": 0.12, "grad_norm": 1.9064146253836125, "learning_rate": 9.79806804823551e-06, "loss": 0.6964, "step": 1136 }, { "epoch": 0.12, "grad_norm": 1.9919505999551952, "learning_rate": 9.797594195125611e-06, "loss": 0.5642, "step": 1137 }, { "epoch": 0.12, "grad_norm": 2.09562120886189, "learning_rate": 9.797119798181066e-06, "loss": 0.6715, "step": 1138 }, { "epoch": 0.12, "grad_norm": 1.8738420808946492, "learning_rate": 9.796644857455648e-06, "loss": 0.6187, "step": 1139 }, { "epoch": 0.12, "grad_norm": 2.072085736447773, "learning_rate": 9.796169373003194e-06, "loss": 0.7333, "step": 1140 }, { "epoch": 0.12, "grad_norm": 2.0711394615651577, "learning_rate": 9.795693344877609e-06, "loss": 0.697, "step": 1141 }, { "epoch": 0.12, "grad_norm": 1.9046229443546778, "learning_rate": 9.795216773132846e-06, "loss": 0.7762, "step": 1142 }, { "epoch": 0.12, "grad_norm": 1.9207903000528437, "learning_rate": 9.794739657822929e-06, "loss": 0.6793, "step": 1143 }, { "epoch": 0.12, "grad_norm": 1.782458495479854, "learning_rate": 9.794261999001944e-06, "loss": 0.7346, "step": 1144 }, { "epoch": 0.12, "grad_norm": 1.7754725000419709, "learning_rate": 9.793783796724033e-06, "loss": 0.5992, "step": 1145 }, { "epoch": 0.12, "grad_norm": 1.8592470716968033, "learning_rate": 9.793305051043407e-06, "loss": 0.7468, "step": 1146 }, { "epoch": 0.12, "grad_norm": 1.857487138917833, "learning_rate": 9.792825762014333e-06, "loss": 0.6613, "step": 1147 }, { "epoch": 0.12, "grad_norm": 1.8265947454383122, "learning_rate": 9.79234592969114e-06, "loss": 0.6637, "step": 1148 }, { "epoch": 0.12, "grad_norm": 1.9552167151750668, "learning_rate": 9.79186555412822e-06, "loss": 0.7084, "step": 1149 }, { "epoch": 0.12, "grad_norm": 1.9545771112614327, "learning_rate": 9.791384635380028e-06, "loss": 0.7181, "step": 1150 }, { "epoch": 0.12, "grad_norm": 1.958640881269104, "learning_rate": 9.790903173501075e-06, "loss": 0.6016, "step": 1151 }, { "epoch": 0.12, "grad_norm": 1.9827078690692417, "learning_rate": 9.790421168545942e-06, "loss": 0.7443, "step": 1152 }, { "epoch": 0.12, "grad_norm": 2.133727647342895, "learning_rate": 9.789938620569265e-06, "loss": 0.731, "step": 1153 }, { "epoch": 0.12, "grad_norm": 2.2672150699453457, "learning_rate": 9.789455529625743e-06, "loss": 0.6824, "step": 1154 }, { "epoch": 0.12, "grad_norm": 1.91456235430189, "learning_rate": 9.788971895770138e-06, "loss": 0.6286, "step": 1155 }, { "epoch": 0.12, "grad_norm": 1.7757643292986403, "learning_rate": 9.788487719057273e-06, "loss": 0.614, "step": 1156 }, { "epoch": 0.12, "grad_norm": 2.1250419908716687, "learning_rate": 9.78800299954203e-06, "loss": 0.6005, "step": 1157 }, { "epoch": 0.12, "grad_norm": 1.8486976022057549, "learning_rate": 9.787517737279357e-06, "loss": 0.6048, "step": 1158 }, { "epoch": 0.12, "grad_norm": 2.3506355338274845, "learning_rate": 9.787031932324262e-06, "loss": 0.7821, "step": 1159 }, { "epoch": 0.12, "grad_norm": 1.8385308824731086, "learning_rate": 9.78654558473181e-06, "loss": 0.7732, "step": 1160 }, { "epoch": 0.12, "grad_norm": 1.8744460433610264, "learning_rate": 9.786058694557136e-06, "loss": 0.7004, "step": 1161 }, { "epoch": 0.12, "grad_norm": 1.7990251826604449, "learning_rate": 9.785571261855429e-06, "loss": 0.6008, "step": 1162 }, { "epoch": 0.12, "grad_norm": 2.0387781703421046, "learning_rate": 9.78508328668194e-06, "loss": 0.7354, "step": 1163 }, { "epoch": 0.12, "grad_norm": 1.8379294386830753, "learning_rate": 9.784594769091989e-06, "loss": 0.6562, "step": 1164 }, { "epoch": 0.12, "grad_norm": 1.951858093945663, "learning_rate": 9.78410570914095e-06, "loss": 0.5601, "step": 1165 }, { "epoch": 0.12, "grad_norm": 1.963636595805986, "learning_rate": 9.783616106884258e-06, "loss": 0.7216, "step": 1166 }, { "epoch": 0.12, "grad_norm": 2.0298983330028193, "learning_rate": 9.783125962377416e-06, "loss": 0.6501, "step": 1167 }, { "epoch": 0.12, "grad_norm": 2.273467968545706, "learning_rate": 9.782635275675983e-06, "loss": 0.7842, "step": 1168 }, { "epoch": 0.12, "grad_norm": 1.872811853225732, "learning_rate": 9.78214404683558e-06, "loss": 0.6591, "step": 1169 }, { "epoch": 0.12, "grad_norm": 1.8397528593162067, "learning_rate": 9.781652275911894e-06, "loss": 0.6264, "step": 1170 }, { "epoch": 0.12, "grad_norm": 1.8859285853928063, "learning_rate": 9.781159962960667e-06, "loss": 0.5893, "step": 1171 }, { "epoch": 0.12, "grad_norm": 1.905973149180084, "learning_rate": 9.780667108037706e-06, "loss": 0.6312, "step": 1172 }, { "epoch": 0.12, "grad_norm": 1.9716374922186313, "learning_rate": 9.78017371119888e-06, "loss": 0.6401, "step": 1173 }, { "epoch": 0.12, "grad_norm": 1.9769244317036858, "learning_rate": 9.77967977250012e-06, "loss": 0.6444, "step": 1174 }, { "epoch": 0.12, "grad_norm": 2.0872679872136737, "learning_rate": 9.779185291997412e-06, "loss": 0.761, "step": 1175 }, { "epoch": 0.12, "grad_norm": 1.9107214967319497, "learning_rate": 9.778690269746811e-06, "loss": 0.6832, "step": 1176 }, { "epoch": 0.12, "grad_norm": 1.7386000757691096, "learning_rate": 9.778194705804431e-06, "loss": 0.619, "step": 1177 }, { "epoch": 0.12, "grad_norm": 2.181891137096523, "learning_rate": 9.777698600226446e-06, "loss": 0.7406, "step": 1178 }, { "epoch": 0.12, "grad_norm": 2.012111320499033, "learning_rate": 9.777201953069092e-06, "loss": 0.8356, "step": 1179 }, { "epoch": 0.12, "grad_norm": 2.2266386752482403, "learning_rate": 9.776704764388668e-06, "loss": 0.7769, "step": 1180 }, { "epoch": 0.12, "grad_norm": 2.0481630064459275, "learning_rate": 9.776207034241534e-06, "loss": 0.706, "step": 1181 }, { "epoch": 0.12, "grad_norm": 2.1147866234960637, "learning_rate": 9.775708762684106e-06, "loss": 0.6764, "step": 1182 }, { "epoch": 0.12, "grad_norm": 1.9980720033432766, "learning_rate": 9.775209949772872e-06, "loss": 0.5919, "step": 1183 }, { "epoch": 0.12, "grad_norm": 1.9142784484222324, "learning_rate": 9.774710595564371e-06, "loss": 0.7484, "step": 1184 }, { "epoch": 0.12, "grad_norm": 2.0104955568224896, "learning_rate": 9.774210700115209e-06, "loss": 0.6362, "step": 1185 }, { "epoch": 0.12, "grad_norm": 2.060715102831267, "learning_rate": 9.773710263482053e-06, "loss": 0.8285, "step": 1186 }, { "epoch": 0.12, "grad_norm": 2.375817415064652, "learning_rate": 9.77320928572163e-06, "loss": 0.6754, "step": 1187 }, { "epoch": 0.12, "grad_norm": 1.9904053183850345, "learning_rate": 9.772707766890726e-06, "loss": 0.5919, "step": 1188 }, { "epoch": 0.12, "grad_norm": 1.8189149884702953, "learning_rate": 9.772205707046194e-06, "loss": 0.6269, "step": 1189 }, { "epoch": 0.12, "grad_norm": 2.0893307362744165, "learning_rate": 9.771703106244945e-06, "loss": 0.6025, "step": 1190 }, { "epoch": 0.12, "grad_norm": 1.872217178188383, "learning_rate": 9.77119996454395e-06, "loss": 0.6917, "step": 1191 }, { "epoch": 0.12, "grad_norm": 1.8516747196972398, "learning_rate": 9.770696282000245e-06, "loss": 0.6101, "step": 1192 }, { "epoch": 0.12, "grad_norm": 1.7350140128893028, "learning_rate": 9.770192058670924e-06, "loss": 0.7053, "step": 1193 }, { "epoch": 0.12, "grad_norm": 2.124522427032116, "learning_rate": 9.769687294613143e-06, "loss": 0.7225, "step": 1194 }, { "epoch": 0.12, "grad_norm": 1.6443356705537862, "learning_rate": 9.769181989884123e-06, "loss": 0.6327, "step": 1195 }, { "epoch": 0.12, "grad_norm": 1.8787937842514755, "learning_rate": 9.76867614454114e-06, "loss": 0.685, "step": 1196 }, { "epoch": 0.12, "grad_norm": 2.054491694078201, "learning_rate": 9.768169758641535e-06, "loss": 0.6185, "step": 1197 }, { "epoch": 0.12, "grad_norm": 2.0466021116932374, "learning_rate": 9.767662832242711e-06, "loss": 0.722, "step": 1198 }, { "epoch": 0.12, "grad_norm": 1.9725740768686584, "learning_rate": 9.76715536540213e-06, "loss": 0.6419, "step": 1199 }, { "epoch": 0.12, "grad_norm": 1.8607471593989677, "learning_rate": 9.766647358177317e-06, "loss": 0.5834, "step": 1200 }, { "epoch": 0.12, "grad_norm": 1.8176363699369578, "learning_rate": 9.766138810625855e-06, "loss": 0.5901, "step": 1201 }, { "epoch": 0.12, "grad_norm": 1.902595364021203, "learning_rate": 9.765629722805394e-06, "loss": 0.642, "step": 1202 }, { "epoch": 0.13, "grad_norm": 1.9999598335834559, "learning_rate": 9.765120094773641e-06, "loss": 0.7298, "step": 1203 }, { "epoch": 0.13, "grad_norm": 1.8506077363965423, "learning_rate": 9.764609926588365e-06, "loss": 0.6875, "step": 1204 }, { "epoch": 0.13, "grad_norm": 2.0798876015732035, "learning_rate": 9.764099218307396e-06, "loss": 0.7775, "step": 1205 }, { "epoch": 0.13, "grad_norm": 1.8982097105923708, "learning_rate": 9.763587969988626e-06, "loss": 0.6821, "step": 1206 }, { "epoch": 0.13, "grad_norm": 2.051160655228281, "learning_rate": 9.763076181690008e-06, "loss": 0.6225, "step": 1207 }, { "epoch": 0.13, "grad_norm": 1.7605857993358935, "learning_rate": 9.762563853469557e-06, "loss": 0.6409, "step": 1208 }, { "epoch": 0.13, "grad_norm": 1.9105078969357518, "learning_rate": 9.762050985385348e-06, "loss": 0.5791, "step": 1209 }, { "epoch": 0.13, "grad_norm": 1.7956662172842133, "learning_rate": 9.761537577495514e-06, "loss": 0.6378, "step": 1210 }, { "epoch": 0.13, "grad_norm": 1.9382003285098017, "learning_rate": 9.761023629858258e-06, "loss": 0.5976, "step": 1211 }, { "epoch": 0.13, "grad_norm": 1.7644171198864156, "learning_rate": 9.760509142531837e-06, "loss": 0.6155, "step": 1212 }, { "epoch": 0.13, "grad_norm": 3.0937484763695258, "learning_rate": 9.759994115574571e-06, "loss": 0.7084, "step": 1213 }, { "epoch": 0.13, "grad_norm": 2.145729104645392, "learning_rate": 9.759478549044839e-06, "loss": 0.6385, "step": 1214 }, { "epoch": 0.13, "grad_norm": 1.7488025165240058, "learning_rate": 9.758962443001086e-06, "loss": 0.6194, "step": 1215 }, { "epoch": 0.13, "grad_norm": 2.008568905377275, "learning_rate": 9.758445797501816e-06, "loss": 0.5682, "step": 1216 }, { "epoch": 0.13, "grad_norm": 1.998000158709324, "learning_rate": 9.757928612605592e-06, "loss": 0.6302, "step": 1217 }, { "epoch": 0.13, "grad_norm": 1.929368096839565, "learning_rate": 9.75741088837104e-06, "loss": 0.7117, "step": 1218 }, { "epoch": 0.13, "grad_norm": 1.7873401858196971, "learning_rate": 9.756892624856848e-06, "loss": 0.5734, "step": 1219 }, { "epoch": 0.13, "grad_norm": 1.8865122527560079, "learning_rate": 9.756373822121762e-06, "loss": 0.6212, "step": 1220 }, { "epoch": 0.13, "grad_norm": 2.0688832226726075, "learning_rate": 9.755854480224596e-06, "loss": 0.6526, "step": 1221 }, { "epoch": 0.13, "grad_norm": 1.8863437615329601, "learning_rate": 9.755334599224215e-06, "loss": 0.6192, "step": 1222 }, { "epoch": 0.13, "grad_norm": 2.0972603354248753, "learning_rate": 9.754814179179552e-06, "loss": 0.6935, "step": 1223 }, { "epoch": 0.13, "grad_norm": 1.8672839080556047, "learning_rate": 9.754293220149602e-06, "loss": 0.6415, "step": 1224 }, { "epoch": 0.13, "grad_norm": 2.1056721713435014, "learning_rate": 9.753771722193417e-06, "loss": 0.6053, "step": 1225 }, { "epoch": 0.13, "grad_norm": 1.9996835954094754, "learning_rate": 9.753249685370112e-06, "loss": 0.7002, "step": 1226 }, { "epoch": 0.13, "grad_norm": 2.183109070932942, "learning_rate": 9.752727109738859e-06, "loss": 0.6372, "step": 1227 }, { "epoch": 0.13, "grad_norm": 1.9844165571120553, "learning_rate": 9.752203995358902e-06, "loss": 0.6659, "step": 1228 }, { "epoch": 0.13, "grad_norm": 1.910791627714843, "learning_rate": 9.751680342289536e-06, "loss": 0.6214, "step": 1229 }, { "epoch": 0.13, "grad_norm": 1.9193510028750618, "learning_rate": 9.751156150590117e-06, "loss": 0.6494, "step": 1230 }, { "epoch": 0.13, "grad_norm": 2.2006190540580146, "learning_rate": 9.750631420320069e-06, "loss": 0.6964, "step": 1231 }, { "epoch": 0.13, "grad_norm": 1.8660242845944797, "learning_rate": 9.750106151538871e-06, "loss": 0.6691, "step": 1232 }, { "epoch": 0.13, "grad_norm": 1.7139198365306583, "learning_rate": 9.749580344306067e-06, "loss": 0.5357, "step": 1233 }, { "epoch": 0.13, "grad_norm": 1.9794675828758101, "learning_rate": 9.74905399868126e-06, "loss": 0.6414, "step": 1234 }, { "epoch": 0.13, "grad_norm": 2.053972284777646, "learning_rate": 9.748527114724111e-06, "loss": 0.6585, "step": 1235 }, { "epoch": 0.13, "grad_norm": 1.8160635181241953, "learning_rate": 9.74799969249435e-06, "loss": 0.5822, "step": 1236 }, { "epoch": 0.13, "grad_norm": 1.9594988518586947, "learning_rate": 9.74747173205176e-06, "loss": 0.6526, "step": 1237 }, { "epoch": 0.13, "grad_norm": 2.1104409643045696, "learning_rate": 9.74694323345619e-06, "loss": 0.6936, "step": 1238 }, { "epoch": 0.13, "grad_norm": 1.9216660825585377, "learning_rate": 9.746414196767548e-06, "loss": 0.6798, "step": 1239 }, { "epoch": 0.13, "grad_norm": 2.1185106363995607, "learning_rate": 9.745884622045803e-06, "loss": 0.6708, "step": 1240 }, { "epoch": 0.13, "grad_norm": 1.7835909517236361, "learning_rate": 9.745354509350983e-06, "loss": 0.7385, "step": 1241 }, { "epoch": 0.13, "grad_norm": 2.3602417280460326, "learning_rate": 9.744823858743186e-06, "loss": 0.643, "step": 1242 }, { "epoch": 0.13, "grad_norm": 1.837677043941556, "learning_rate": 9.744292670282557e-06, "loss": 0.6202, "step": 1243 }, { "epoch": 0.13, "grad_norm": 1.9538243258295147, "learning_rate": 9.743760944029313e-06, "loss": 0.6141, "step": 1244 }, { "epoch": 0.13, "grad_norm": 1.7094725933192672, "learning_rate": 9.743228680043729e-06, "loss": 0.5873, "step": 1245 }, { "epoch": 0.13, "grad_norm": 1.898722893037348, "learning_rate": 9.742695878386136e-06, "loss": 0.6622, "step": 1246 }, { "epoch": 0.13, "grad_norm": 2.3475080247477265, "learning_rate": 9.742162539116936e-06, "loss": 0.6986, "step": 1247 }, { "epoch": 0.13, "grad_norm": 1.8734682037675061, "learning_rate": 9.74162866229658e-06, "loss": 0.6931, "step": 1248 }, { "epoch": 0.13, "grad_norm": 1.8891784901953723, "learning_rate": 9.74109424798559e-06, "loss": 0.6264, "step": 1249 }, { "epoch": 0.13, "grad_norm": 2.0263925697740683, "learning_rate": 9.740559296244543e-06, "loss": 0.6856, "step": 1250 }, { "epoch": 0.13, "grad_norm": 2.2614934669273516, "learning_rate": 9.74002380713408e-06, "loss": 0.6658, "step": 1251 }, { "epoch": 0.13, "grad_norm": 1.9590626287273527, "learning_rate": 9.7394877807149e-06, "loss": 0.6018, "step": 1252 }, { "epoch": 0.13, "grad_norm": 1.8968171625714494, "learning_rate": 9.738951217047767e-06, "loss": 0.6378, "step": 1253 }, { "epoch": 0.13, "grad_norm": 2.7508689880032957, "learning_rate": 9.738414116193503e-06, "loss": 0.7191, "step": 1254 }, { "epoch": 0.13, "grad_norm": 2.1485846944757117, "learning_rate": 9.737876478212989e-06, "loss": 0.7119, "step": 1255 }, { "epoch": 0.13, "grad_norm": 2.0333547103913663, "learning_rate": 9.737338303167173e-06, "loss": 0.6638, "step": 1256 }, { "epoch": 0.13, "grad_norm": 1.8862361580061715, "learning_rate": 9.736799591117057e-06, "loss": 0.7242, "step": 1257 }, { "epoch": 0.13, "grad_norm": 1.654552718644028, "learning_rate": 9.73626034212371e-06, "loss": 0.6576, "step": 1258 }, { "epoch": 0.13, "grad_norm": 1.8740868267530033, "learning_rate": 9.735720556248256e-06, "loss": 0.7337, "step": 1259 }, { "epoch": 0.13, "grad_norm": 1.9350463131930447, "learning_rate": 9.735180233551884e-06, "loss": 0.823, "step": 1260 }, { "epoch": 0.13, "grad_norm": 2.0570724020864093, "learning_rate": 9.734639374095845e-06, "loss": 0.6597, "step": 1261 }, { "epoch": 0.13, "grad_norm": 1.8547501290708488, "learning_rate": 9.734097977941446e-06, "loss": 0.6362, "step": 1262 }, { "epoch": 0.13, "grad_norm": 1.9035543204587644, "learning_rate": 9.733556045150057e-06, "loss": 0.6648, "step": 1263 }, { "epoch": 0.13, "grad_norm": 2.0108959931171557, "learning_rate": 9.73301357578311e-06, "loss": 0.7228, "step": 1264 }, { "epoch": 0.13, "grad_norm": 1.865013262710532, "learning_rate": 9.732470569902097e-06, "loss": 0.7365, "step": 1265 }, { "epoch": 0.13, "grad_norm": 2.217716624827927, "learning_rate": 9.731927027568569e-06, "loss": 0.7753, "step": 1266 }, { "epoch": 0.13, "grad_norm": 1.7979547620947482, "learning_rate": 9.731382948844143e-06, "loss": 0.5906, "step": 1267 }, { "epoch": 0.13, "grad_norm": 1.7508977673648394, "learning_rate": 9.730838333790493e-06, "loss": 0.6117, "step": 1268 }, { "epoch": 0.13, "grad_norm": 1.6893969959243762, "learning_rate": 9.73029318246935e-06, "loss": 0.6358, "step": 1269 }, { "epoch": 0.13, "grad_norm": 2.166240330404318, "learning_rate": 9.729747494942515e-06, "loss": 0.6924, "step": 1270 }, { "epoch": 0.13, "grad_norm": 2.000608514191742, "learning_rate": 9.729201271271842e-06, "loss": 0.7347, "step": 1271 }, { "epoch": 0.13, "grad_norm": 2.103793789959187, "learning_rate": 9.728654511519251e-06, "loss": 0.7285, "step": 1272 }, { "epoch": 0.13, "grad_norm": 1.8409733568718099, "learning_rate": 9.728107215746717e-06, "loss": 0.692, "step": 1273 }, { "epoch": 0.13, "grad_norm": 1.963562475533344, "learning_rate": 9.72755938401628e-06, "loss": 0.6062, "step": 1274 }, { "epoch": 0.13, "grad_norm": 1.8176381153691845, "learning_rate": 9.727011016390044e-06, "loss": 0.7648, "step": 1275 }, { "epoch": 0.13, "grad_norm": 1.8358596377570153, "learning_rate": 9.726462112930165e-06, "loss": 0.6272, "step": 1276 }, { "epoch": 0.13, "grad_norm": 1.8482133621035386, "learning_rate": 9.725912673698863e-06, "loss": 0.6437, "step": 1277 }, { "epoch": 0.13, "grad_norm": 1.7093243741213524, "learning_rate": 9.725362698758425e-06, "loss": 0.5643, "step": 1278 }, { "epoch": 0.13, "grad_norm": 1.9313380353034446, "learning_rate": 9.72481218817119e-06, "loss": 0.7326, "step": 1279 }, { "epoch": 0.13, "grad_norm": 2.0110578630440696, "learning_rate": 9.724261141999564e-06, "loss": 0.7809, "step": 1280 }, { "epoch": 0.13, "grad_norm": 2.0212582494727056, "learning_rate": 9.723709560306009e-06, "loss": 0.7539, "step": 1281 }, { "epoch": 0.13, "grad_norm": 1.956126223537327, "learning_rate": 9.723157443153053e-06, "loss": 0.6653, "step": 1282 }, { "epoch": 0.13, "grad_norm": 1.8182910112574044, "learning_rate": 9.722604790603279e-06, "loss": 0.7183, "step": 1283 }, { "epoch": 0.13, "grad_norm": 2.108210363465132, "learning_rate": 9.722051602719333e-06, "loss": 0.712, "step": 1284 }, { "epoch": 0.13, "grad_norm": 1.9131048983229353, "learning_rate": 9.721497879563924e-06, "loss": 0.7338, "step": 1285 }, { "epoch": 0.13, "grad_norm": 1.949411903606035, "learning_rate": 9.72094362119982e-06, "loss": 0.6708, "step": 1286 }, { "epoch": 0.13, "grad_norm": 2.0746178969752416, "learning_rate": 9.720388827689843e-06, "loss": 0.563, "step": 1287 }, { "epoch": 0.13, "grad_norm": 2.190230314831356, "learning_rate": 9.719833499096891e-06, "loss": 0.6351, "step": 1288 }, { "epoch": 0.13, "grad_norm": 1.8460044124762562, "learning_rate": 9.71927763548391e-06, "loss": 0.6958, "step": 1289 }, { "epoch": 0.13, "grad_norm": 1.6752039016736493, "learning_rate": 9.718721236913909e-06, "loss": 0.5744, "step": 1290 }, { "epoch": 0.13, "grad_norm": 1.9927244031221076, "learning_rate": 9.718164303449961e-06, "loss": 0.7696, "step": 1291 }, { "epoch": 0.13, "grad_norm": 1.8995517099170962, "learning_rate": 9.717606835155195e-06, "loss": 0.6031, "step": 1292 }, { "epoch": 0.13, "grad_norm": 1.940650266443028, "learning_rate": 9.717048832092806e-06, "loss": 0.6741, "step": 1293 }, { "epoch": 0.13, "grad_norm": 1.578177156458987, "learning_rate": 9.716490294326046e-06, "loss": 0.5341, "step": 1294 }, { "epoch": 0.13, "grad_norm": 2.3682750170322975, "learning_rate": 9.715931221918227e-06, "loss": 0.6361, "step": 1295 }, { "epoch": 0.13, "grad_norm": 1.8435349334772175, "learning_rate": 9.715371614932725e-06, "loss": 0.6257, "step": 1296 }, { "epoch": 0.13, "grad_norm": 1.9264375579621833, "learning_rate": 9.714811473432973e-06, "loss": 0.7395, "step": 1297 }, { "epoch": 0.13, "grad_norm": 1.9461581602936884, "learning_rate": 9.714250797482468e-06, "loss": 0.6327, "step": 1298 }, { "epoch": 0.14, "grad_norm": 2.1144212881507425, "learning_rate": 9.713689587144762e-06, "loss": 0.6785, "step": 1299 }, { "epoch": 0.14, "grad_norm": 1.6928260272004876, "learning_rate": 9.713127842483476e-06, "loss": 0.5079, "step": 1300 }, { "epoch": 0.14, "grad_norm": 2.3246045980360783, "learning_rate": 9.712565563562286e-06, "loss": 0.7336, "step": 1301 }, { "epoch": 0.14, "grad_norm": 1.7227636255508532, "learning_rate": 9.712002750444926e-06, "loss": 0.7164, "step": 1302 }, { "epoch": 0.14, "grad_norm": 1.8982343901775387, "learning_rate": 9.7114394031952e-06, "loss": 0.708, "step": 1303 }, { "epoch": 0.14, "grad_norm": 2.1258743453940028, "learning_rate": 9.71087552187696e-06, "loss": 0.7317, "step": 1304 }, { "epoch": 0.14, "grad_norm": 1.9721335267277356, "learning_rate": 9.710311106554132e-06, "loss": 0.6371, "step": 1305 }, { "epoch": 0.14, "grad_norm": 1.7625782159118968, "learning_rate": 9.70974615729069e-06, "loss": 0.5839, "step": 1306 }, { "epoch": 0.14, "grad_norm": 2.4807222261088127, "learning_rate": 9.709180674150677e-06, "loss": 0.7081, "step": 1307 }, { "epoch": 0.14, "grad_norm": 1.9903239990967319, "learning_rate": 9.708614657198194e-06, "loss": 0.6491, "step": 1308 }, { "epoch": 0.14, "grad_norm": 1.925530757651826, "learning_rate": 9.7080481064974e-06, "loss": 0.7496, "step": 1309 }, { "epoch": 0.14, "grad_norm": 1.9292533506628393, "learning_rate": 9.70748102211252e-06, "loss": 0.6853, "step": 1310 }, { "epoch": 0.14, "grad_norm": 1.9690106352432073, "learning_rate": 9.706913404107832e-06, "loss": 0.5786, "step": 1311 }, { "epoch": 0.14, "grad_norm": 2.0319183887210044, "learning_rate": 9.706345252547681e-06, "loss": 0.6004, "step": 1312 }, { "epoch": 0.14, "grad_norm": 1.9599275382294734, "learning_rate": 9.705776567496473e-06, "loss": 0.7236, "step": 1313 }, { "epoch": 0.14, "grad_norm": 1.9230052599444754, "learning_rate": 9.705207349018668e-06, "loss": 0.708, "step": 1314 }, { "epoch": 0.14, "grad_norm": 1.8972194347454603, "learning_rate": 9.70463759717879e-06, "loss": 0.716, "step": 1315 }, { "epoch": 0.14, "grad_norm": 2.010675575041699, "learning_rate": 9.704067312041426e-06, "loss": 0.6905, "step": 1316 }, { "epoch": 0.14, "grad_norm": 2.2746498328993505, "learning_rate": 9.703496493671219e-06, "loss": 0.5917, "step": 1317 }, { "epoch": 0.14, "grad_norm": 1.9221483056550144, "learning_rate": 9.702925142132876e-06, "loss": 0.6797, "step": 1318 }, { "epoch": 0.14, "grad_norm": 2.0044204535629047, "learning_rate": 9.702353257491162e-06, "loss": 0.7663, "step": 1319 }, { "epoch": 0.14, "grad_norm": 2.133217438263875, "learning_rate": 9.701780839810903e-06, "loss": 0.7136, "step": 1320 }, { "epoch": 0.14, "grad_norm": 1.84335445808742, "learning_rate": 9.701207889156989e-06, "loss": 0.6814, "step": 1321 }, { "epoch": 0.14, "grad_norm": 1.984334565307144, "learning_rate": 9.700634405594364e-06, "loss": 0.6403, "step": 1322 }, { "epoch": 0.14, "grad_norm": 1.827908300013016, "learning_rate": 9.700060389188035e-06, "loss": 0.6725, "step": 1323 }, { "epoch": 0.14, "grad_norm": 2.009494835552701, "learning_rate": 9.699485840003072e-06, "loss": 0.6841, "step": 1324 }, { "epoch": 0.14, "grad_norm": 1.91704570730664, "learning_rate": 9.698910758104603e-06, "loss": 0.6762, "step": 1325 }, { "epoch": 0.14, "grad_norm": 2.005127161184001, "learning_rate": 9.698335143557818e-06, "loss": 0.7461, "step": 1326 }, { "epoch": 0.14, "grad_norm": 2.1535407683860095, "learning_rate": 9.697758996427962e-06, "loss": 0.7321, "step": 1327 }, { "epoch": 0.14, "grad_norm": 1.9080003028880403, "learning_rate": 9.69718231678035e-06, "loss": 0.672, "step": 1328 }, { "epoch": 0.14, "grad_norm": 1.9437619321838724, "learning_rate": 9.696605104680348e-06, "loss": 0.7428, "step": 1329 }, { "epoch": 0.14, "grad_norm": 2.037382120827923, "learning_rate": 9.696027360193387e-06, "loss": 0.7703, "step": 1330 }, { "epoch": 0.14, "grad_norm": 1.8931689172830715, "learning_rate": 9.69544908338496e-06, "loss": 0.6491, "step": 1331 }, { "epoch": 0.14, "grad_norm": 1.8790236237302318, "learning_rate": 9.694870274320616e-06, "loss": 0.6525, "step": 1332 }, { "epoch": 0.14, "grad_norm": 1.910244499985207, "learning_rate": 9.694290933065966e-06, "loss": 0.6577, "step": 1333 }, { "epoch": 0.14, "grad_norm": 1.9579528999546667, "learning_rate": 9.693711059686682e-06, "loss": 0.6902, "step": 1334 }, { "epoch": 0.14, "grad_norm": 1.9396330418699732, "learning_rate": 9.693130654248497e-06, "loss": 0.7542, "step": 1335 }, { "epoch": 0.14, "grad_norm": 1.9042496696960487, "learning_rate": 9.692549716817202e-06, "loss": 0.5711, "step": 1336 }, { "epoch": 0.14, "grad_norm": 1.8895202592925349, "learning_rate": 9.691968247458648e-06, "loss": 0.6255, "step": 1337 }, { "epoch": 0.14, "grad_norm": 2.0965642202557544, "learning_rate": 9.691386246238753e-06, "loss": 0.7484, "step": 1338 }, { "epoch": 0.14, "grad_norm": 1.8015674513354294, "learning_rate": 9.690803713223485e-06, "loss": 0.6473, "step": 1339 }, { "epoch": 0.14, "grad_norm": 1.9184186707119182, "learning_rate": 9.690220648478879e-06, "loss": 0.5988, "step": 1340 }, { "epoch": 0.14, "grad_norm": 1.9770192689507424, "learning_rate": 9.689637052071031e-06, "loss": 0.7, "step": 1341 }, { "epoch": 0.14, "grad_norm": 2.0449862402255508, "learning_rate": 9.689052924066091e-06, "loss": 0.6675, "step": 1342 }, { "epoch": 0.14, "grad_norm": 1.8859554632950104, "learning_rate": 9.688468264530278e-06, "loss": 0.676, "step": 1343 }, { "epoch": 0.14, "grad_norm": 2.006052287719628, "learning_rate": 9.68788307352986e-06, "loss": 0.7827, "step": 1344 }, { "epoch": 0.14, "grad_norm": 1.8945746237944512, "learning_rate": 9.687297351131179e-06, "loss": 0.7134, "step": 1345 }, { "epoch": 0.14, "grad_norm": 2.02227847196895, "learning_rate": 9.686711097400625e-06, "loss": 0.7382, "step": 1346 }, { "epoch": 0.14, "grad_norm": 1.878484044957481, "learning_rate": 9.686124312404656e-06, "loss": 0.7726, "step": 1347 }, { "epoch": 0.14, "grad_norm": 1.630184078610918, "learning_rate": 9.685536996209785e-06, "loss": 0.5358, "step": 1348 }, { "epoch": 0.14, "grad_norm": 1.85620192643215, "learning_rate": 9.68494914888259e-06, "loss": 0.6286, "step": 1349 }, { "epoch": 0.14, "grad_norm": 2.0114268912492403, "learning_rate": 9.684360770489704e-06, "loss": 0.6904, "step": 1350 }, { "epoch": 0.14, "grad_norm": 1.6746633261022315, "learning_rate": 9.683771861097825e-06, "loss": 0.6124, "step": 1351 }, { "epoch": 0.14, "grad_norm": 1.9084737163323533, "learning_rate": 9.68318242077371e-06, "loss": 0.6784, "step": 1352 }, { "epoch": 0.14, "grad_norm": 2.053592952699433, "learning_rate": 9.682592449584174e-06, "loss": 0.7364, "step": 1353 }, { "epoch": 0.14, "grad_norm": 1.8759874911112053, "learning_rate": 9.682001947596093e-06, "loss": 0.7605, "step": 1354 }, { "epoch": 0.14, "grad_norm": 2.051882469686672, "learning_rate": 9.681410914876406e-06, "loss": 0.6258, "step": 1355 }, { "epoch": 0.14, "grad_norm": 1.8668394817877236, "learning_rate": 9.68081935149211e-06, "loss": 0.6545, "step": 1356 }, { "epoch": 0.14, "grad_norm": 2.151948478937282, "learning_rate": 9.680227257510257e-06, "loss": 0.7088, "step": 1357 }, { "epoch": 0.14, "grad_norm": 1.896198321769729, "learning_rate": 9.67963463299797e-06, "loss": 0.6797, "step": 1358 }, { "epoch": 0.14, "grad_norm": 1.8333904019961873, "learning_rate": 9.679041478022424e-06, "loss": 0.6455, "step": 1359 }, { "epoch": 0.14, "grad_norm": 1.920785330687945, "learning_rate": 9.678447792650858e-06, "loss": 0.7701, "step": 1360 }, { "epoch": 0.14, "grad_norm": 1.9740522491158696, "learning_rate": 9.677853576950567e-06, "loss": 0.6122, "step": 1361 }, { "epoch": 0.14, "grad_norm": 2.0093433073912195, "learning_rate": 9.677258830988911e-06, "loss": 0.688, "step": 1362 }, { "epoch": 0.14, "grad_norm": 1.9899501677577536, "learning_rate": 9.676663554833307e-06, "loss": 0.6969, "step": 1363 }, { "epoch": 0.14, "grad_norm": 2.0662149446869846, "learning_rate": 9.676067748551232e-06, "loss": 0.7624, "step": 1364 }, { "epoch": 0.14, "grad_norm": 1.8510578010237781, "learning_rate": 9.675471412210225e-06, "loss": 0.6667, "step": 1365 }, { "epoch": 0.14, "grad_norm": 2.2586481507249565, "learning_rate": 9.674874545877886e-06, "loss": 0.7025, "step": 1366 }, { "epoch": 0.14, "grad_norm": 1.774087072360693, "learning_rate": 9.674277149621869e-06, "loss": 0.7498, "step": 1367 }, { "epoch": 0.14, "grad_norm": 1.9300929758419567, "learning_rate": 9.673679223509895e-06, "loss": 0.7127, "step": 1368 }, { "epoch": 0.14, "grad_norm": 2.0352754562115294, "learning_rate": 9.673080767609743e-06, "loss": 0.6448, "step": 1369 }, { "epoch": 0.14, "grad_norm": 1.8977730151853256, "learning_rate": 9.67248178198925e-06, "loss": 0.6987, "step": 1370 }, { "epoch": 0.14, "grad_norm": 2.004055084209392, "learning_rate": 9.671882266716315e-06, "loss": 0.8061, "step": 1371 }, { "epoch": 0.14, "grad_norm": 1.9454168688791278, "learning_rate": 9.671282221858897e-06, "loss": 0.7274, "step": 1372 }, { "epoch": 0.14, "grad_norm": 1.9198819052306686, "learning_rate": 9.670681647485012e-06, "loss": 0.6711, "step": 1373 }, { "epoch": 0.14, "grad_norm": 1.7097410217988573, "learning_rate": 9.670080543662742e-06, "loss": 0.6368, "step": 1374 }, { "epoch": 0.14, "grad_norm": 2.050598473467362, "learning_rate": 9.669478910460222e-06, "loss": 0.7401, "step": 1375 }, { "epoch": 0.14, "grad_norm": 1.9465646412985562, "learning_rate": 9.668876747945652e-06, "loss": 0.6021, "step": 1376 }, { "epoch": 0.14, "grad_norm": 1.9335106076774247, "learning_rate": 9.668274056187293e-06, "loss": 0.6118, "step": 1377 }, { "epoch": 0.14, "grad_norm": 2.022082750564337, "learning_rate": 9.66767083525346e-06, "loss": 0.6889, "step": 1378 }, { "epoch": 0.14, "grad_norm": 1.755623299169476, "learning_rate": 9.667067085212533e-06, "loss": 0.7057, "step": 1379 }, { "epoch": 0.14, "grad_norm": 1.928965446296919, "learning_rate": 9.66646280613295e-06, "loss": 0.5745, "step": 1380 }, { "epoch": 0.14, "grad_norm": 1.8155202857113888, "learning_rate": 9.665857998083212e-06, "loss": 0.6229, "step": 1381 }, { "epoch": 0.14, "grad_norm": 1.7813070676468963, "learning_rate": 9.665252661131874e-06, "loss": 0.5898, "step": 1382 }, { "epoch": 0.14, "grad_norm": 1.9563053298185136, "learning_rate": 9.664646795347556e-06, "loss": 0.7274, "step": 1383 }, { "epoch": 0.14, "grad_norm": 1.9349658516984791, "learning_rate": 9.664040400798937e-06, "loss": 0.6404, "step": 1384 }, { "epoch": 0.14, "grad_norm": 2.188084590268582, "learning_rate": 9.663433477554753e-06, "loss": 0.6934, "step": 1385 }, { "epoch": 0.14, "grad_norm": 2.2197372636124917, "learning_rate": 9.662826025683805e-06, "loss": 0.6701, "step": 1386 }, { "epoch": 0.14, "grad_norm": 1.9856243079185785, "learning_rate": 9.662218045254949e-06, "loss": 0.667, "step": 1387 }, { "epoch": 0.14, "grad_norm": 1.9225996019695955, "learning_rate": 9.661609536337104e-06, "loss": 0.6174, "step": 1388 }, { "epoch": 0.14, "grad_norm": 2.0817089630736727, "learning_rate": 9.661000498999248e-06, "loss": 0.7104, "step": 1389 }, { "epoch": 0.14, "grad_norm": 1.8238863336509912, "learning_rate": 9.660390933310418e-06, "loss": 0.612, "step": 1390 }, { "epoch": 0.14, "grad_norm": 1.8431042408210576, "learning_rate": 9.659780839339713e-06, "loss": 0.6929, "step": 1391 }, { "epoch": 0.14, "grad_norm": 1.889104137450392, "learning_rate": 9.65917021715629e-06, "loss": 0.6759, "step": 1392 }, { "epoch": 0.14, "grad_norm": 1.9497779511731066, "learning_rate": 9.658559066829365e-06, "loss": 0.6041, "step": 1393 }, { "epoch": 0.14, "grad_norm": 2.039428108567977, "learning_rate": 9.65794738842822e-06, "loss": 0.7342, "step": 1394 }, { "epoch": 0.15, "grad_norm": 2.0211962356651734, "learning_rate": 9.657335182022187e-06, "loss": 0.7797, "step": 1395 }, { "epoch": 0.15, "grad_norm": 1.9990273830284, "learning_rate": 9.656722447680664e-06, "loss": 0.7011, "step": 1396 }, { "epoch": 0.15, "grad_norm": 1.88149406805676, "learning_rate": 9.65610918547311e-06, "loss": 0.6511, "step": 1397 }, { "epoch": 0.15, "grad_norm": 1.6989907788026, "learning_rate": 9.655495395469042e-06, "loss": 0.5867, "step": 1398 }, { "epoch": 0.15, "grad_norm": 1.9148308369176725, "learning_rate": 9.654881077738035e-06, "loss": 0.577, "step": 1399 }, { "epoch": 0.15, "grad_norm": 1.8599257790893318, "learning_rate": 9.654266232349727e-06, "loss": 0.64, "step": 1400 }, { "epoch": 0.15, "grad_norm": 2.0076623853739903, "learning_rate": 9.653650859373811e-06, "loss": 0.7321, "step": 1401 }, { "epoch": 0.15, "grad_norm": 1.8313666092285423, "learning_rate": 9.653034958880045e-06, "loss": 0.6341, "step": 1402 }, { "epoch": 0.15, "grad_norm": 1.8722180403613942, "learning_rate": 9.652418530938248e-06, "loss": 0.7881, "step": 1403 }, { "epoch": 0.15, "grad_norm": 2.083375770806738, "learning_rate": 9.651801575618289e-06, "loss": 0.7394, "step": 1404 }, { "epoch": 0.15, "grad_norm": 1.6813245697465693, "learning_rate": 9.651184092990109e-06, "loss": 0.5917, "step": 1405 }, { "epoch": 0.15, "grad_norm": 1.8348700669336107, "learning_rate": 9.650566083123701e-06, "loss": 0.5931, "step": 1406 }, { "epoch": 0.15, "grad_norm": 2.0187798705574207, "learning_rate": 9.64994754608912e-06, "loss": 0.7104, "step": 1407 }, { "epoch": 0.15, "grad_norm": 1.949254115187299, "learning_rate": 9.649328481956481e-06, "loss": 0.7475, "step": 1408 }, { "epoch": 0.15, "grad_norm": 1.9312680985383277, "learning_rate": 9.64870889079596e-06, "loss": 0.6517, "step": 1409 }, { "epoch": 0.15, "grad_norm": 2.28060400124159, "learning_rate": 9.648088772677787e-06, "loss": 0.6683, "step": 1410 }, { "epoch": 0.15, "grad_norm": 1.9498068405147828, "learning_rate": 9.647468127672258e-06, "loss": 0.6945, "step": 1411 }, { "epoch": 0.15, "grad_norm": 2.3803787498905944, "learning_rate": 9.64684695584973e-06, "loss": 0.7755, "step": 1412 }, { "epoch": 0.15, "grad_norm": 1.7106971204478838, "learning_rate": 9.64622525728061e-06, "loss": 0.6462, "step": 1413 }, { "epoch": 0.15, "grad_norm": 1.720138728717716, "learning_rate": 9.645603032035375e-06, "loss": 0.5997, "step": 1414 }, { "epoch": 0.15, "grad_norm": 1.8333700801882125, "learning_rate": 9.644980280184559e-06, "loss": 0.6211, "step": 1415 }, { "epoch": 0.15, "grad_norm": 1.9341434824630561, "learning_rate": 9.644357001798752e-06, "loss": 0.5569, "step": 1416 }, { "epoch": 0.15, "grad_norm": 1.9348393525012055, "learning_rate": 9.643733196948607e-06, "loss": 0.5925, "step": 1417 }, { "epoch": 0.15, "grad_norm": 1.8537207494480576, "learning_rate": 9.643108865704836e-06, "loss": 0.686, "step": 1418 }, { "epoch": 0.15, "grad_norm": 1.9794781788094675, "learning_rate": 9.64248400813821e-06, "loss": 0.6331, "step": 1419 }, { "epoch": 0.15, "grad_norm": 1.8934282311552093, "learning_rate": 9.64185862431956e-06, "loss": 0.6109, "step": 1420 }, { "epoch": 0.15, "grad_norm": 1.9403239182134335, "learning_rate": 9.641232714319777e-06, "loss": 0.7185, "step": 1421 }, { "epoch": 0.15, "grad_norm": 2.157701765972057, "learning_rate": 9.640606278209812e-06, "loss": 0.7755, "step": 1422 }, { "epoch": 0.15, "grad_norm": 1.6740846643105078, "learning_rate": 9.639979316060675e-06, "loss": 0.6013, "step": 1423 }, { "epoch": 0.15, "grad_norm": 2.12677231177225, "learning_rate": 9.639351827943436e-06, "loss": 0.6144, "step": 1424 }, { "epoch": 0.15, "grad_norm": 2.118780104812153, "learning_rate": 9.638723813929224e-06, "loss": 0.7439, "step": 1425 }, { "epoch": 0.15, "grad_norm": 2.2357617900959728, "learning_rate": 9.638095274089226e-06, "loss": 0.6893, "step": 1426 }, { "epoch": 0.15, "grad_norm": 1.7246115215231097, "learning_rate": 9.637466208494694e-06, "loss": 0.7229, "step": 1427 }, { "epoch": 0.15, "grad_norm": 2.01688121939328, "learning_rate": 9.636836617216934e-06, "loss": 0.6563, "step": 1428 }, { "epoch": 0.15, "grad_norm": 1.916960546551675, "learning_rate": 9.636206500327316e-06, "loss": 0.734, "step": 1429 }, { "epoch": 0.15, "grad_norm": 1.9670238441866768, "learning_rate": 9.635575857897264e-06, "loss": 0.8007, "step": 1430 }, { "epoch": 0.15, "grad_norm": 1.9222282752835942, "learning_rate": 9.634944689998267e-06, "loss": 0.6885, "step": 1431 }, { "epoch": 0.15, "grad_norm": 1.8951313109327579, "learning_rate": 9.63431299670187e-06, "loss": 0.5511, "step": 1432 }, { "epoch": 0.15, "grad_norm": 2.087254254458319, "learning_rate": 9.633680778079682e-06, "loss": 0.7344, "step": 1433 }, { "epoch": 0.15, "grad_norm": 1.8839664631491326, "learning_rate": 9.633048034203365e-06, "loss": 0.7775, "step": 1434 }, { "epoch": 0.15, "grad_norm": 1.90221214075658, "learning_rate": 9.632414765144646e-06, "loss": 0.6708, "step": 1435 }, { "epoch": 0.15, "grad_norm": 2.078581620152747, "learning_rate": 9.631780970975311e-06, "loss": 0.6322, "step": 1436 }, { "epoch": 0.15, "grad_norm": 1.83487929881022, "learning_rate": 9.631146651767202e-06, "loss": 0.669, "step": 1437 }, { "epoch": 0.15, "grad_norm": 1.832923326521618, "learning_rate": 9.630511807592224e-06, "loss": 0.6101, "step": 1438 }, { "epoch": 0.15, "grad_norm": 2.1141323443224436, "learning_rate": 9.629876438522338e-06, "loss": 0.7117, "step": 1439 }, { "epoch": 0.15, "grad_norm": 2.06022961270754, "learning_rate": 9.62924054462957e-06, "loss": 0.7348, "step": 1440 }, { "epoch": 0.15, "grad_norm": 1.8267609596216754, "learning_rate": 9.628604125985999e-06, "loss": 0.7431, "step": 1441 }, { "epoch": 0.15, "grad_norm": 1.9004596571143735, "learning_rate": 9.627967182663768e-06, "loss": 0.7026, "step": 1442 }, { "epoch": 0.15, "grad_norm": 2.0016223705105727, "learning_rate": 9.627329714735079e-06, "loss": 0.6671, "step": 1443 }, { "epoch": 0.15, "grad_norm": 1.930460301179286, "learning_rate": 9.626691722272193e-06, "loss": 0.6102, "step": 1444 }, { "epoch": 0.15, "grad_norm": 1.9009166256026644, "learning_rate": 9.626053205347428e-06, "loss": 0.7215, "step": 1445 }, { "epoch": 0.15, "grad_norm": 2.18241662087263, "learning_rate": 9.625414164033163e-06, "loss": 0.7554, "step": 1446 }, { "epoch": 0.15, "grad_norm": 1.7751878116739175, "learning_rate": 9.62477459840184e-06, "loss": 0.6397, "step": 1447 }, { "epoch": 0.15, "grad_norm": 1.946828033661672, "learning_rate": 9.624134508525957e-06, "loss": 0.6376, "step": 1448 }, { "epoch": 0.15, "grad_norm": 2.096187878367154, "learning_rate": 9.623493894478069e-06, "loss": 0.782, "step": 1449 }, { "epoch": 0.15, "grad_norm": 1.6619296901186194, "learning_rate": 9.622852756330797e-06, "loss": 0.6232, "step": 1450 }, { "epoch": 0.15, "grad_norm": 2.130541226400497, "learning_rate": 9.622211094156812e-06, "loss": 0.6484, "step": 1451 }, { "epoch": 0.15, "grad_norm": 1.9178708685945536, "learning_rate": 9.621568908028857e-06, "loss": 0.7487, "step": 1452 }, { "epoch": 0.15, "grad_norm": 1.9169807937008756, "learning_rate": 9.620926198019724e-06, "loss": 0.6049, "step": 1453 }, { "epoch": 0.15, "grad_norm": 1.8835439671046228, "learning_rate": 9.620282964202267e-06, "loss": 0.7385, "step": 1454 }, { "epoch": 0.15, "grad_norm": 1.92405276657743, "learning_rate": 9.619639206649402e-06, "loss": 0.646, "step": 1455 }, { "epoch": 0.15, "grad_norm": 1.9741432328020077, "learning_rate": 9.618994925434103e-06, "loss": 0.6466, "step": 1456 }, { "epoch": 0.15, "grad_norm": 2.0886770963919425, "learning_rate": 9.618350120629398e-06, "loss": 0.614, "step": 1457 }, { "epoch": 0.15, "grad_norm": 1.8307278823762825, "learning_rate": 9.617704792308387e-06, "loss": 0.6774, "step": 1458 }, { "epoch": 0.15, "grad_norm": 2.003204931637325, "learning_rate": 9.617058940544216e-06, "loss": 0.6881, "step": 1459 }, { "epoch": 0.15, "grad_norm": 1.716683988327422, "learning_rate": 9.616412565410097e-06, "loss": 0.644, "step": 1460 }, { "epoch": 0.15, "grad_norm": 1.9389630819810084, "learning_rate": 9.615765666979302e-06, "loss": 0.6889, "step": 1461 }, { "epoch": 0.15, "grad_norm": 1.734722035180772, "learning_rate": 9.615118245325161e-06, "loss": 0.6487, "step": 1462 }, { "epoch": 0.15, "grad_norm": 1.9708045285573386, "learning_rate": 9.61447030052106e-06, "loss": 0.7563, "step": 1463 }, { "epoch": 0.15, "grad_norm": 2.045152715335636, "learning_rate": 9.613821832640448e-06, "loss": 0.6307, "step": 1464 }, { "epoch": 0.15, "grad_norm": 1.8477983077439863, "learning_rate": 9.613172841756835e-06, "loss": 0.5452, "step": 1465 }, { "epoch": 0.15, "grad_norm": 1.9003093700116294, "learning_rate": 9.612523327943786e-06, "loss": 0.6483, "step": 1466 }, { "epoch": 0.15, "grad_norm": 1.8638599615140454, "learning_rate": 9.611873291274927e-06, "loss": 0.7114, "step": 1467 }, { "epoch": 0.15, "grad_norm": 1.8520477349826896, "learning_rate": 9.611222731823944e-06, "loss": 0.6058, "step": 1468 }, { "epoch": 0.15, "grad_norm": 1.9085000538619996, "learning_rate": 9.61057164966458e-06, "loss": 0.6045, "step": 1469 }, { "epoch": 0.15, "grad_norm": 1.8006432662990515, "learning_rate": 9.609920044870643e-06, "loss": 0.6029, "step": 1470 }, { "epoch": 0.15, "grad_norm": 1.9179518990616973, "learning_rate": 9.60926791751599e-06, "loss": 0.7078, "step": 1471 }, { "epoch": 0.15, "grad_norm": 2.042582586101166, "learning_rate": 9.608615267674548e-06, "loss": 0.662, "step": 1472 }, { "epoch": 0.15, "grad_norm": 1.7613901736843571, "learning_rate": 9.607962095420297e-06, "loss": 0.6698, "step": 1473 }, { "epoch": 0.15, "grad_norm": 2.0988777706402897, "learning_rate": 9.607308400827277e-06, "loss": 0.6381, "step": 1474 }, { "epoch": 0.15, "grad_norm": 2.378994760615408, "learning_rate": 9.606654183969591e-06, "loss": 0.7589, "step": 1475 }, { "epoch": 0.15, "grad_norm": 1.8172655352330878, "learning_rate": 9.605999444921394e-06, "loss": 0.6398, "step": 1476 }, { "epoch": 0.15, "grad_norm": 2.037892899762544, "learning_rate": 9.605344183756908e-06, "loss": 0.7299, "step": 1477 }, { "epoch": 0.15, "grad_norm": 1.7584598825730333, "learning_rate": 9.60468840055041e-06, "loss": 0.7002, "step": 1478 }, { "epoch": 0.15, "grad_norm": 1.743347054484532, "learning_rate": 9.604032095376234e-06, "loss": 0.6489, "step": 1479 }, { "epoch": 0.15, "grad_norm": 1.8804620415715747, "learning_rate": 9.603375268308779e-06, "loss": 0.6259, "step": 1480 }, { "epoch": 0.15, "grad_norm": 2.170002273856052, "learning_rate": 9.602717919422499e-06, "loss": 0.7196, "step": 1481 }, { "epoch": 0.15, "grad_norm": 1.9421409150574973, "learning_rate": 9.602060048791908e-06, "loss": 0.6924, "step": 1482 }, { "epoch": 0.15, "grad_norm": 1.9717758470116444, "learning_rate": 9.60140165649158e-06, "loss": 0.7171, "step": 1483 }, { "epoch": 0.15, "grad_norm": 2.4743252790664467, "learning_rate": 9.600742742596146e-06, "loss": 0.7644, "step": 1484 }, { "epoch": 0.15, "grad_norm": 2.03725397790848, "learning_rate": 9.6000833071803e-06, "loss": 0.6426, "step": 1485 }, { "epoch": 0.15, "grad_norm": 2.004957380913632, "learning_rate": 9.599423350318791e-06, "loss": 0.7055, "step": 1486 }, { "epoch": 0.15, "grad_norm": 1.8582757642070944, "learning_rate": 9.598762872086428e-06, "loss": 0.6467, "step": 1487 }, { "epoch": 0.15, "grad_norm": 2.0838491494548603, "learning_rate": 9.598101872558085e-06, "loss": 0.6039, "step": 1488 }, { "epoch": 0.15, "grad_norm": 1.8164477514953552, "learning_rate": 9.597440351808684e-06, "loss": 0.6671, "step": 1489 }, { "epoch": 0.15, "grad_norm": 1.9808384701216841, "learning_rate": 9.596778309913215e-06, "loss": 0.6528, "step": 1490 }, { "epoch": 0.15, "grad_norm": 2.003749745611465, "learning_rate": 9.596115746946723e-06, "loss": 0.7215, "step": 1491 }, { "epoch": 0.16, "grad_norm": 1.9103354885895816, "learning_rate": 9.595452662984314e-06, "loss": 0.6408, "step": 1492 }, { "epoch": 0.16, "grad_norm": 2.056675445688021, "learning_rate": 9.594789058101154e-06, "loss": 0.729, "step": 1493 }, { "epoch": 0.16, "grad_norm": 2.008591475324911, "learning_rate": 9.594124932372465e-06, "loss": 0.6412, "step": 1494 }, { "epoch": 0.16, "grad_norm": 2.091130750278084, "learning_rate": 9.593460285873528e-06, "loss": 0.6649, "step": 1495 }, { "epoch": 0.16, "grad_norm": 1.920035920120745, "learning_rate": 9.592795118679686e-06, "loss": 0.6084, "step": 1496 }, { "epoch": 0.16, "grad_norm": 1.8093080186617532, "learning_rate": 9.59212943086634e-06, "loss": 0.6288, "step": 1497 }, { "epoch": 0.16, "grad_norm": 1.8862928362176083, "learning_rate": 9.591463222508947e-06, "loss": 0.6367, "step": 1498 }, { "epoch": 0.16, "grad_norm": 1.8277193723517655, "learning_rate": 9.590796493683028e-06, "loss": 0.7249, "step": 1499 }, { "epoch": 0.16, "grad_norm": 2.19331615809431, "learning_rate": 9.59012924446416e-06, "loss": 0.7191, "step": 1500 }, { "epoch": 0.16, "grad_norm": 1.6401164370086485, "learning_rate": 9.589461474927979e-06, "loss": 0.6349, "step": 1501 }, { "epoch": 0.16, "grad_norm": 2.357420255966235, "learning_rate": 9.588793185150182e-06, "loss": 0.6767, "step": 1502 }, { "epoch": 0.16, "grad_norm": 1.9435062989464436, "learning_rate": 9.58812437520652e-06, "loss": 0.6545, "step": 1503 }, { "epoch": 0.16, "grad_norm": 1.927844968000663, "learning_rate": 9.58745504517281e-06, "loss": 0.6596, "step": 1504 }, { "epoch": 0.16, "grad_norm": 1.7570483201394154, "learning_rate": 9.586785195124924e-06, "loss": 0.673, "step": 1505 }, { "epoch": 0.16, "grad_norm": 1.8155004596147315, "learning_rate": 9.586114825138792e-06, "loss": 0.683, "step": 1506 }, { "epoch": 0.16, "grad_norm": 1.8583467053252918, "learning_rate": 9.585443935290403e-06, "loss": 0.6367, "step": 1507 }, { "epoch": 0.16, "grad_norm": 1.9030635673277672, "learning_rate": 9.58477252565581e-06, "loss": 0.6275, "step": 1508 }, { "epoch": 0.16, "grad_norm": 1.788718336973656, "learning_rate": 9.584100596311117e-06, "loss": 0.6945, "step": 1509 }, { "epoch": 0.16, "grad_norm": 1.7351275011949883, "learning_rate": 9.583428147332493e-06, "loss": 0.5619, "step": 1510 }, { "epoch": 0.16, "grad_norm": 1.8450518135634049, "learning_rate": 9.582755178796164e-06, "loss": 0.6165, "step": 1511 }, { "epoch": 0.16, "grad_norm": 1.972540427042877, "learning_rate": 9.582081690778415e-06, "loss": 0.5639, "step": 1512 }, { "epoch": 0.16, "grad_norm": 1.9050637486054645, "learning_rate": 9.58140768335559e-06, "loss": 0.6104, "step": 1513 }, { "epoch": 0.16, "grad_norm": 2.0985873225265284, "learning_rate": 9.58073315660409e-06, "loss": 0.717, "step": 1514 }, { "epoch": 0.16, "grad_norm": 3.3097437422761256, "learning_rate": 9.580058110600377e-06, "loss": 0.617, "step": 1515 }, { "epoch": 0.16, "grad_norm": 1.9690754280591791, "learning_rate": 9.579382545420972e-06, "loss": 0.6871, "step": 1516 }, { "epoch": 0.16, "grad_norm": 1.782107200466709, "learning_rate": 9.578706461142455e-06, "loss": 0.7152, "step": 1517 }, { "epoch": 0.16, "grad_norm": 1.8507836163162588, "learning_rate": 9.578029857841462e-06, "loss": 0.673, "step": 1518 }, { "epoch": 0.16, "grad_norm": 1.890334013229442, "learning_rate": 9.577352735594692e-06, "loss": 0.7108, "step": 1519 }, { "epoch": 0.16, "grad_norm": 1.8323036417412149, "learning_rate": 9.576675094478898e-06, "loss": 0.5764, "step": 1520 }, { "epoch": 0.16, "grad_norm": 2.088690846097015, "learning_rate": 9.575996934570896e-06, "loss": 0.7671, "step": 1521 }, { "epoch": 0.16, "grad_norm": 1.896412011489828, "learning_rate": 9.575318255947558e-06, "loss": 0.6737, "step": 1522 }, { "epoch": 0.16, "grad_norm": 1.8886169195982683, "learning_rate": 9.57463905868582e-06, "loss": 0.6384, "step": 1523 }, { "epoch": 0.16, "grad_norm": 2.219113889815762, "learning_rate": 9.573959342862667e-06, "loss": 0.6598, "step": 1524 }, { "epoch": 0.16, "grad_norm": 2.0301247271789857, "learning_rate": 9.573279108555154e-06, "loss": 0.6046, "step": 1525 }, { "epoch": 0.16, "grad_norm": 1.9843830977227843, "learning_rate": 9.572598355840386e-06, "loss": 0.6969, "step": 1526 }, { "epoch": 0.16, "grad_norm": 1.8464680774728668, "learning_rate": 9.571917084795532e-06, "loss": 0.5952, "step": 1527 }, { "epoch": 0.16, "grad_norm": 1.7925551511247095, "learning_rate": 9.571235295497818e-06, "loss": 0.6387, "step": 1528 }, { "epoch": 0.16, "grad_norm": 1.7944467845098961, "learning_rate": 9.570552988024527e-06, "loss": 0.6061, "step": 1529 }, { "epoch": 0.16, "grad_norm": 2.1431104616084493, "learning_rate": 9.569870162453004e-06, "loss": 0.7645, "step": 1530 }, { "epoch": 0.16, "grad_norm": 1.9799001726970609, "learning_rate": 9.569186818860652e-06, "loss": 0.8162, "step": 1531 }, { "epoch": 0.16, "grad_norm": 2.029920054940862, "learning_rate": 9.568502957324928e-06, "loss": 0.6148, "step": 1532 }, { "epoch": 0.16, "grad_norm": 2.2030292163042304, "learning_rate": 9.567818577923356e-06, "loss": 0.7205, "step": 1533 }, { "epoch": 0.16, "grad_norm": 1.9233321976022704, "learning_rate": 9.567133680733512e-06, "loss": 0.6518, "step": 1534 }, { "epoch": 0.16, "grad_norm": 1.7910034855382688, "learning_rate": 9.566448265833034e-06, "loss": 0.6862, "step": 1535 }, { "epoch": 0.16, "grad_norm": 2.0613102871389777, "learning_rate": 9.565762333299616e-06, "loss": 0.7762, "step": 1536 }, { "epoch": 0.16, "grad_norm": 2.1927507319579074, "learning_rate": 9.565075883211015e-06, "loss": 0.7046, "step": 1537 }, { "epoch": 0.16, "grad_norm": 1.833817329880263, "learning_rate": 9.564388915645042e-06, "loss": 0.6771, "step": 1538 }, { "epoch": 0.16, "grad_norm": 1.9954731003848947, "learning_rate": 9.563701430679568e-06, "loss": 0.7152, "step": 1539 }, { "epoch": 0.16, "grad_norm": 1.9271801113839422, "learning_rate": 9.563013428392528e-06, "loss": 0.7178, "step": 1540 }, { "epoch": 0.16, "grad_norm": 1.9666689051116464, "learning_rate": 9.562324908861904e-06, "loss": 0.5967, "step": 1541 }, { "epoch": 0.16, "grad_norm": 2.020042739074386, "learning_rate": 9.561635872165747e-06, "loss": 0.6152, "step": 1542 }, { "epoch": 0.16, "grad_norm": 1.7887557940007968, "learning_rate": 9.560946318382166e-06, "loss": 0.6381, "step": 1543 }, { "epoch": 0.16, "grad_norm": 2.1556497061967823, "learning_rate": 9.560256247589321e-06, "loss": 0.5674, "step": 1544 }, { "epoch": 0.16, "grad_norm": 1.7794594694424544, "learning_rate": 9.559565659865439e-06, "loss": 0.6416, "step": 1545 }, { "epoch": 0.16, "grad_norm": 1.9390939469813973, "learning_rate": 9.558874555288801e-06, "loss": 0.7169, "step": 1546 }, { "epoch": 0.16, "grad_norm": 1.8786485630013083, "learning_rate": 9.558182933937747e-06, "loss": 0.5799, "step": 1547 }, { "epoch": 0.16, "grad_norm": 1.856557490530216, "learning_rate": 9.557490795890679e-06, "loss": 0.5993, "step": 1548 }, { "epoch": 0.16, "grad_norm": 1.974287937611842, "learning_rate": 9.55679814122605e-06, "loss": 0.7003, "step": 1549 }, { "epoch": 0.16, "grad_norm": 1.9169969331494634, "learning_rate": 9.556104970022378e-06, "loss": 0.7669, "step": 1550 }, { "epoch": 0.16, "grad_norm": 1.978215919977572, "learning_rate": 9.55541128235824e-06, "loss": 0.6096, "step": 1551 }, { "epoch": 0.16, "grad_norm": 1.8521148274637858, "learning_rate": 9.554717078312269e-06, "loss": 0.7282, "step": 1552 }, { "epoch": 0.16, "grad_norm": 2.0251613160942914, "learning_rate": 9.554022357963153e-06, "loss": 0.7232, "step": 1553 }, { "epoch": 0.16, "grad_norm": 2.0117740878267436, "learning_rate": 9.553327121389648e-06, "loss": 0.7499, "step": 1554 }, { "epoch": 0.16, "grad_norm": 1.8316021424004985, "learning_rate": 9.552631368670562e-06, "loss": 0.6253, "step": 1555 }, { "epoch": 0.16, "grad_norm": 1.7375512663566117, "learning_rate": 9.55193509988476e-06, "loss": 0.5862, "step": 1556 }, { "epoch": 0.16, "grad_norm": 2.226876317475117, "learning_rate": 9.55123831511117e-06, "loss": 0.6528, "step": 1557 }, { "epoch": 0.16, "grad_norm": 2.280983795980818, "learning_rate": 9.550541014428773e-06, "loss": 0.7361, "step": 1558 }, { "epoch": 0.16, "grad_norm": 1.949008109587866, "learning_rate": 9.549843197916616e-06, "loss": 0.6629, "step": 1559 }, { "epoch": 0.16, "grad_norm": 1.876199206288432, "learning_rate": 9.5491448656538e-06, "loss": 0.5759, "step": 1560 }, { "epoch": 0.16, "grad_norm": 2.0055504701332407, "learning_rate": 9.548446017719484e-06, "loss": 0.6555, "step": 1561 }, { "epoch": 0.16, "grad_norm": 1.8950761565227399, "learning_rate": 9.547746654192887e-06, "loss": 0.6419, "step": 1562 }, { "epoch": 0.16, "grad_norm": 2.147644898855912, "learning_rate": 9.547046775153285e-06, "loss": 0.7729, "step": 1563 }, { "epoch": 0.16, "grad_norm": 1.9930761501595675, "learning_rate": 9.546346380680015e-06, "loss": 0.757, "step": 1564 }, { "epoch": 0.16, "grad_norm": 2.03876848772226, "learning_rate": 9.545645470852466e-06, "loss": 0.7823, "step": 1565 }, { "epoch": 0.16, "grad_norm": 2.0341781603182385, "learning_rate": 9.544944045750097e-06, "loss": 0.6754, "step": 1566 }, { "epoch": 0.16, "grad_norm": 1.9935660269354207, "learning_rate": 9.544242105452414e-06, "loss": 0.6485, "step": 1567 }, { "epoch": 0.16, "grad_norm": 2.2846043842839254, "learning_rate": 9.543539650038987e-06, "loss": 0.6906, "step": 1568 }, { "epoch": 0.16, "grad_norm": 2.1741854085761876, "learning_rate": 9.542836679589443e-06, "loss": 0.6777, "step": 1569 }, { "epoch": 0.16, "grad_norm": 1.8573681266861257, "learning_rate": 9.542133194183469e-06, "loss": 0.6227, "step": 1570 }, { "epoch": 0.16, "grad_norm": 1.768231953559204, "learning_rate": 9.541429193900808e-06, "loss": 0.6056, "step": 1571 }, { "epoch": 0.16, "grad_norm": 1.8561209820293387, "learning_rate": 9.540724678821261e-06, "loss": 0.7023, "step": 1572 }, { "epoch": 0.16, "grad_norm": 1.769977024069318, "learning_rate": 9.540019649024692e-06, "loss": 0.6391, "step": 1573 }, { "epoch": 0.16, "grad_norm": 1.913878231055899, "learning_rate": 9.539314104591019e-06, "loss": 0.6818, "step": 1574 }, { "epoch": 0.16, "grad_norm": 2.0615080474215635, "learning_rate": 9.538608045600218e-06, "loss": 0.6203, "step": 1575 }, { "epoch": 0.16, "grad_norm": 1.872307597705844, "learning_rate": 9.537901472132324e-06, "loss": 0.6047, "step": 1576 }, { "epoch": 0.16, "grad_norm": 2.001299949883912, "learning_rate": 9.537194384267436e-06, "loss": 0.6852, "step": 1577 }, { "epoch": 0.16, "grad_norm": 1.9756233996839685, "learning_rate": 9.536486782085703e-06, "loss": 0.666, "step": 1578 }, { "epoch": 0.16, "grad_norm": 1.7817231162157137, "learning_rate": 9.535778665667334e-06, "loss": 0.7383, "step": 1579 }, { "epoch": 0.16, "grad_norm": 1.8093842277276013, "learning_rate": 9.535070035092603e-06, "loss": 0.7256, "step": 1580 }, { "epoch": 0.16, "grad_norm": 1.7685386576759308, "learning_rate": 9.534360890441833e-06, "loss": 0.5198, "step": 1581 }, { "epoch": 0.16, "grad_norm": 2.1297343383103717, "learning_rate": 9.533651231795412e-06, "loss": 0.706, "step": 1582 }, { "epoch": 0.16, "grad_norm": 2.059039551713341, "learning_rate": 9.532941059233782e-06, "loss": 0.7166, "step": 1583 }, { "epoch": 0.16, "grad_norm": 1.8142204295763857, "learning_rate": 9.532230372837446e-06, "loss": 0.7129, "step": 1584 }, { "epoch": 0.16, "grad_norm": 2.039237252791234, "learning_rate": 9.531519172686964e-06, "loss": 0.7075, "step": 1585 }, { "epoch": 0.16, "grad_norm": 1.8287133514681926, "learning_rate": 9.530807458862956e-06, "loss": 0.6532, "step": 1586 }, { "epoch": 0.16, "grad_norm": 1.7667190625126445, "learning_rate": 9.530095231446096e-06, "loss": 0.5883, "step": 1587 }, { "epoch": 0.17, "grad_norm": 1.8291627106185575, "learning_rate": 9.529382490517123e-06, "loss": 0.6794, "step": 1588 }, { "epoch": 0.17, "grad_norm": 1.9608081910222792, "learning_rate": 9.528669236156827e-06, "loss": 0.6054, "step": 1589 }, { "epoch": 0.17, "grad_norm": 1.9924170196955373, "learning_rate": 9.52795546844606e-06, "loss": 0.7171, "step": 1590 }, { "epoch": 0.17, "grad_norm": 1.6524851266305414, "learning_rate": 9.527241187465735e-06, "loss": 0.6372, "step": 1591 }, { "epoch": 0.17, "grad_norm": 2.160203220354208, "learning_rate": 9.526526393296814e-06, "loss": 0.7238, "step": 1592 }, { "epoch": 0.17, "grad_norm": 1.9441794717429497, "learning_rate": 9.525811086020327e-06, "loss": 0.639, "step": 1593 }, { "epoch": 0.17, "grad_norm": 2.0185775044915775, "learning_rate": 9.525095265717357e-06, "loss": 0.7046, "step": 1594 }, { "epoch": 0.17, "grad_norm": 1.9035968009066015, "learning_rate": 9.524378932469045e-06, "loss": 0.7046, "step": 1595 }, { "epoch": 0.17, "grad_norm": 1.616356917467379, "learning_rate": 9.523662086356596e-06, "loss": 0.6115, "step": 1596 }, { "epoch": 0.17, "grad_norm": 2.175508595395853, "learning_rate": 9.522944727461264e-06, "loss": 0.6567, "step": 1597 }, { "epoch": 0.17, "grad_norm": 2.1150702390919536, "learning_rate": 9.522226855864366e-06, "loss": 0.7182, "step": 1598 }, { "epoch": 0.17, "grad_norm": 1.7610105376084912, "learning_rate": 9.52150847164728e-06, "loss": 0.7062, "step": 1599 }, { "epoch": 0.17, "grad_norm": 1.8587370870268596, "learning_rate": 9.520789574891436e-06, "loss": 0.6725, "step": 1600 }, { "epoch": 0.17, "grad_norm": 2.246019651567058, "learning_rate": 9.520070165678325e-06, "loss": 0.6444, "step": 1601 }, { "epoch": 0.17, "grad_norm": 2.0514198751421873, "learning_rate": 9.5193502440895e-06, "loss": 0.7817, "step": 1602 }, { "epoch": 0.17, "grad_norm": 1.7478983922533107, "learning_rate": 9.518629810206564e-06, "loss": 0.5888, "step": 1603 }, { "epoch": 0.17, "grad_norm": 1.809367012317806, "learning_rate": 9.517908864111182e-06, "loss": 0.6072, "step": 1604 }, { "epoch": 0.17, "grad_norm": 1.8021793691886772, "learning_rate": 9.517187405885082e-06, "loss": 0.653, "step": 1605 }, { "epoch": 0.17, "grad_norm": 1.9639447106313237, "learning_rate": 9.516465435610041e-06, "loss": 0.7472, "step": 1606 }, { "epoch": 0.17, "grad_norm": 1.7756152134174483, "learning_rate": 9.515742953367899e-06, "loss": 0.5715, "step": 1607 }, { "epoch": 0.17, "grad_norm": 1.910782270315552, "learning_rate": 9.515019959240555e-06, "loss": 0.7156, "step": 1608 }, { "epoch": 0.17, "grad_norm": 1.9990951548136033, "learning_rate": 9.514296453309965e-06, "loss": 0.7261, "step": 1609 }, { "epoch": 0.17, "grad_norm": 1.6970077590281472, "learning_rate": 9.51357243565814e-06, "loss": 0.5351, "step": 1610 }, { "epoch": 0.17, "grad_norm": 2.1610796647883306, "learning_rate": 9.512847906367153e-06, "loss": 0.7213, "step": 1611 }, { "epoch": 0.17, "grad_norm": 2.0053270299753994, "learning_rate": 9.512122865519135e-06, "loss": 0.6144, "step": 1612 }, { "epoch": 0.17, "grad_norm": 2.246635639941283, "learning_rate": 9.51139731319627e-06, "loss": 0.6258, "step": 1613 }, { "epoch": 0.17, "grad_norm": 2.0140091119514767, "learning_rate": 9.510671249480806e-06, "loss": 0.6814, "step": 1614 }, { "epoch": 0.17, "grad_norm": 2.0477698552459644, "learning_rate": 9.509944674455047e-06, "loss": 0.7852, "step": 1615 }, { "epoch": 0.17, "grad_norm": 2.0320102733793246, "learning_rate": 9.509217588201351e-06, "loss": 0.7089, "step": 1616 }, { "epoch": 0.17, "grad_norm": 1.8191152766376675, "learning_rate": 9.508489990802142e-06, "loss": 0.6528, "step": 1617 }, { "epoch": 0.17, "grad_norm": 1.797405031205996, "learning_rate": 9.507761882339895e-06, "loss": 0.6468, "step": 1618 }, { "epoch": 0.17, "grad_norm": 1.9825745171869442, "learning_rate": 9.507033262897142e-06, "loss": 0.706, "step": 1619 }, { "epoch": 0.17, "grad_norm": 2.052359939793622, "learning_rate": 9.506304132556484e-06, "loss": 0.7695, "step": 1620 }, { "epoch": 0.17, "grad_norm": 1.9565939487072954, "learning_rate": 9.505574491400564e-06, "loss": 0.7543, "step": 1621 }, { "epoch": 0.17, "grad_norm": 1.819843813882747, "learning_rate": 9.504844339512096e-06, "loss": 0.5962, "step": 1622 }, { "epoch": 0.17, "grad_norm": 1.9030655320222276, "learning_rate": 9.504113676973846e-06, "loss": 0.7255, "step": 1623 }, { "epoch": 0.17, "grad_norm": 2.0056629019007954, "learning_rate": 9.503382503868637e-06, "loss": 0.8137, "step": 1624 }, { "epoch": 0.17, "grad_norm": 1.9704662860200837, "learning_rate": 9.502650820279354e-06, "loss": 0.6582, "step": 1625 }, { "epoch": 0.17, "grad_norm": 2.3756436822304345, "learning_rate": 9.501918626288935e-06, "loss": 0.7373, "step": 1626 }, { "epoch": 0.17, "grad_norm": 1.7756355257502683, "learning_rate": 9.50118592198038e-06, "loss": 0.5558, "step": 1627 }, { "epoch": 0.17, "grad_norm": 1.8277201108402616, "learning_rate": 9.500452707436744e-06, "loss": 0.6353, "step": 1628 }, { "epoch": 0.17, "grad_norm": 1.8042213079432121, "learning_rate": 9.499718982741143e-06, "loss": 0.758, "step": 1629 }, { "epoch": 0.17, "grad_norm": 1.8420878240688283, "learning_rate": 9.498984747976747e-06, "loss": 0.5722, "step": 1630 }, { "epoch": 0.17, "grad_norm": 2.045825387663684, "learning_rate": 9.498250003226787e-06, "loss": 0.6005, "step": 1631 }, { "epoch": 0.17, "grad_norm": 1.8251107667602975, "learning_rate": 9.49751474857455e-06, "loss": 0.6692, "step": 1632 }, { "epoch": 0.17, "grad_norm": 1.8065273338239225, "learning_rate": 9.496778984103381e-06, "loss": 0.709, "step": 1633 }, { "epoch": 0.17, "grad_norm": 1.8042150998335234, "learning_rate": 9.496042709896684e-06, "loss": 0.6163, "step": 1634 }, { "epoch": 0.17, "grad_norm": 1.898311108212609, "learning_rate": 9.495305926037918e-06, "loss": 0.647, "step": 1635 }, { "epoch": 0.17, "grad_norm": 1.9689807722120047, "learning_rate": 9.494568632610603e-06, "loss": 0.7747, "step": 1636 }, { "epoch": 0.17, "grad_norm": 1.8341428636567332, "learning_rate": 9.493830829698317e-06, "loss": 0.5898, "step": 1637 }, { "epoch": 0.17, "grad_norm": 2.1033082614827414, "learning_rate": 9.493092517384692e-06, "loss": 0.6607, "step": 1638 }, { "epoch": 0.17, "grad_norm": 1.829421971966054, "learning_rate": 9.49235369575342e-06, "loss": 0.7291, "step": 1639 }, { "epoch": 0.17, "grad_norm": 1.8996716409526258, "learning_rate": 9.49161436488825e-06, "loss": 0.7602, "step": 1640 }, { "epoch": 0.17, "grad_norm": 2.1680339164477433, "learning_rate": 9.49087452487299e-06, "loss": 0.6938, "step": 1641 }, { "epoch": 0.17, "grad_norm": 1.6508009166489983, "learning_rate": 9.490134175791507e-06, "loss": 0.6141, "step": 1642 }, { "epoch": 0.17, "grad_norm": 1.7767239402836295, "learning_rate": 9.489393317727724e-06, "loss": 0.618, "step": 1643 }, { "epoch": 0.17, "grad_norm": 2.038443432035079, "learning_rate": 9.488651950765617e-06, "loss": 0.6579, "step": 1644 }, { "epoch": 0.17, "grad_norm": 1.741168133888333, "learning_rate": 9.487910074989228e-06, "loss": 0.6494, "step": 1645 }, { "epoch": 0.17, "grad_norm": 1.857138742618028, "learning_rate": 9.48716769048265e-06, "loss": 0.5755, "step": 1646 }, { "epoch": 0.17, "grad_norm": 2.1591345481710036, "learning_rate": 9.48642479733004e-06, "loss": 0.6383, "step": 1647 }, { "epoch": 0.17, "grad_norm": 1.9364895503622959, "learning_rate": 9.485681395615607e-06, "loss": 0.6839, "step": 1648 }, { "epoch": 0.17, "grad_norm": 2.031828191064373, "learning_rate": 9.484937485423622e-06, "loss": 0.6035, "step": 1649 }, { "epoch": 0.17, "grad_norm": 1.754566896444676, "learning_rate": 9.484193066838408e-06, "loss": 0.6424, "step": 1650 }, { "epoch": 0.17, "grad_norm": 1.9246941194236256, "learning_rate": 9.48344813994435e-06, "loss": 0.6513, "step": 1651 }, { "epoch": 0.17, "grad_norm": 1.8684221297861972, "learning_rate": 9.482702704825892e-06, "loss": 0.7858, "step": 1652 }, { "epoch": 0.17, "grad_norm": 1.9414626199310694, "learning_rate": 9.481956761567531e-06, "loss": 0.6826, "step": 1653 }, { "epoch": 0.17, "grad_norm": 2.0316286317178727, "learning_rate": 9.481210310253826e-06, "loss": 0.7082, "step": 1654 }, { "epoch": 0.17, "grad_norm": 1.7661332172941508, "learning_rate": 9.480463350969388e-06, "loss": 0.6982, "step": 1655 }, { "epoch": 0.17, "grad_norm": 1.9932281813194153, "learning_rate": 9.479715883798895e-06, "loss": 0.64, "step": 1656 }, { "epoch": 0.17, "grad_norm": 2.0257008912279417, "learning_rate": 9.47896790882707e-06, "loss": 0.6237, "step": 1657 }, { "epoch": 0.17, "grad_norm": 1.7559027656428055, "learning_rate": 9.478219426138703e-06, "loss": 0.6753, "step": 1658 }, { "epoch": 0.17, "grad_norm": 1.8541314724538382, "learning_rate": 9.477470435818641e-06, "loss": 0.7334, "step": 1659 }, { "epoch": 0.17, "grad_norm": 1.9262745014920255, "learning_rate": 9.476720937951785e-06, "loss": 0.6096, "step": 1660 }, { "epoch": 0.17, "grad_norm": 2.00672485707719, "learning_rate": 9.47597093262309e-06, "loss": 0.6902, "step": 1661 }, { "epoch": 0.17, "grad_norm": 1.8392421439660025, "learning_rate": 9.475220419917581e-06, "loss": 0.7256, "step": 1662 }, { "epoch": 0.17, "grad_norm": 1.9341160956399497, "learning_rate": 9.47446939992033e-06, "loss": 0.7523, "step": 1663 }, { "epoch": 0.17, "grad_norm": 1.8634455622742643, "learning_rate": 9.473717872716469e-06, "loss": 0.7325, "step": 1664 }, { "epoch": 0.17, "grad_norm": 1.8513564589120213, "learning_rate": 9.472965838391187e-06, "loss": 0.6888, "step": 1665 }, { "epoch": 0.17, "grad_norm": 1.7884451440018407, "learning_rate": 9.47221329702973e-06, "loss": 0.5837, "step": 1666 }, { "epoch": 0.17, "grad_norm": 1.9881558844202416, "learning_rate": 9.471460248717406e-06, "loss": 0.6877, "step": 1667 }, { "epoch": 0.17, "grad_norm": 1.8388210711744746, "learning_rate": 9.470706693539578e-06, "loss": 0.6638, "step": 1668 }, { "epoch": 0.17, "grad_norm": 2.0549800910188534, "learning_rate": 9.469952631581663e-06, "loss": 0.7381, "step": 1669 }, { "epoch": 0.17, "grad_norm": 1.9226671283551982, "learning_rate": 9.469198062929139e-06, "loss": 0.6649, "step": 1670 }, { "epoch": 0.17, "grad_norm": 2.0660185079140896, "learning_rate": 9.468442987667542e-06, "loss": 0.7564, "step": 1671 }, { "epoch": 0.17, "grad_norm": 1.903836190555682, "learning_rate": 9.467687405882463e-06, "loss": 0.6271, "step": 1672 }, { "epoch": 0.17, "grad_norm": 2.0450685561813824, "learning_rate": 9.466931317659551e-06, "loss": 0.7239, "step": 1673 }, { "epoch": 0.17, "grad_norm": 2.005334746057944, "learning_rate": 9.466174723084514e-06, "loss": 0.7554, "step": 1674 }, { "epoch": 0.17, "grad_norm": 1.6452748276607783, "learning_rate": 9.465417622243116e-06, "loss": 0.6588, "step": 1675 }, { "epoch": 0.17, "grad_norm": 1.9042511113167548, "learning_rate": 9.464660015221177e-06, "loss": 0.5927, "step": 1676 }, { "epoch": 0.17, "grad_norm": 1.8411751279551711, "learning_rate": 9.463901902104579e-06, "loss": 0.6422, "step": 1677 }, { "epoch": 0.17, "grad_norm": 1.9047976740761143, "learning_rate": 9.463143282979258e-06, "loss": 0.7052, "step": 1678 }, { "epoch": 0.17, "grad_norm": 2.0239698176707055, "learning_rate": 9.462384157931203e-06, "loss": 0.6387, "step": 1679 }, { "epoch": 0.17, "grad_norm": 1.71917711556842, "learning_rate": 9.461624527046472e-06, "loss": 0.6492, "step": 1680 }, { "epoch": 0.17, "grad_norm": 1.7473641280277281, "learning_rate": 9.46086439041117e-06, "loss": 0.6081, "step": 1681 }, { "epoch": 0.17, "grad_norm": 2.038864253926127, "learning_rate": 9.460103748111462e-06, "loss": 0.6386, "step": 1682 }, { "epoch": 0.17, "grad_norm": 1.9706571596391416, "learning_rate": 9.459342600233575e-06, "loss": 0.7428, "step": 1683 }, { "epoch": 0.18, "grad_norm": 1.954883981835221, "learning_rate": 9.458580946863784e-06, "loss": 0.6086, "step": 1684 }, { "epoch": 0.18, "grad_norm": 2.0798834277695146, "learning_rate": 9.457818788088431e-06, "loss": 0.6907, "step": 1685 }, { "epoch": 0.18, "grad_norm": 1.854504688334079, "learning_rate": 9.45705612399391e-06, "loss": 0.6769, "step": 1686 }, { "epoch": 0.18, "grad_norm": 1.940980581413887, "learning_rate": 9.456292954666673e-06, "loss": 0.6356, "step": 1687 }, { "epoch": 0.18, "grad_norm": 2.010496752053228, "learning_rate": 9.45552928019323e-06, "loss": 0.6778, "step": 1688 }, { "epoch": 0.18, "grad_norm": 2.357313247435537, "learning_rate": 9.454765100660144e-06, "loss": 0.7522, "step": 1689 }, { "epoch": 0.18, "grad_norm": 2.084001616032715, "learning_rate": 9.454000416154046e-06, "loss": 0.7403, "step": 1690 }, { "epoch": 0.18, "grad_norm": 2.038729610751833, "learning_rate": 9.453235226761613e-06, "loss": 0.6874, "step": 1691 }, { "epoch": 0.18, "grad_norm": 1.9823748346716907, "learning_rate": 9.452469532569585e-06, "loss": 0.6065, "step": 1692 }, { "epoch": 0.18, "grad_norm": 1.998207692487797, "learning_rate": 9.451703333664756e-06, "loss": 0.6758, "step": 1693 }, { "epoch": 0.18, "grad_norm": 1.8763531828895923, "learning_rate": 9.45093663013398e-06, "loss": 0.6963, "step": 1694 }, { "epoch": 0.18, "grad_norm": 1.6424440865727896, "learning_rate": 9.45016942206417e-06, "loss": 0.6559, "step": 1695 }, { "epoch": 0.18, "grad_norm": 2.1430067998803564, "learning_rate": 9.449401709542289e-06, "loss": 0.7204, "step": 1696 }, { "epoch": 0.18, "grad_norm": 2.0047992594876627, "learning_rate": 9.448633492655363e-06, "loss": 0.7242, "step": 1697 }, { "epoch": 0.18, "grad_norm": 1.9198849998196943, "learning_rate": 9.447864771490476e-06, "loss": 0.743, "step": 1698 }, { "epoch": 0.18, "grad_norm": 1.8212504164264498, "learning_rate": 9.447095546134763e-06, "loss": 0.5829, "step": 1699 }, { "epoch": 0.18, "grad_norm": 1.8677748324266148, "learning_rate": 9.446325816675423e-06, "loss": 0.6371, "step": 1700 }, { "epoch": 0.18, "grad_norm": 2.0007664737650885, "learning_rate": 9.445555583199711e-06, "loss": 0.6646, "step": 1701 }, { "epoch": 0.18, "grad_norm": 1.8964503663253003, "learning_rate": 9.444784845794932e-06, "loss": 0.7622, "step": 1702 }, { "epoch": 0.18, "grad_norm": 1.9525551773329504, "learning_rate": 9.444013604548457e-06, "loss": 0.5986, "step": 1703 }, { "epoch": 0.18, "grad_norm": 2.357983873653802, "learning_rate": 9.44324185954771e-06, "loss": 0.5866, "step": 1704 }, { "epoch": 0.18, "grad_norm": 1.974929397878334, "learning_rate": 9.442469610880172e-06, "loss": 0.7261, "step": 1705 }, { "epoch": 0.18, "grad_norm": 2.422100306321085, "learning_rate": 9.441696858633382e-06, "loss": 0.7537, "step": 1706 }, { "epoch": 0.18, "grad_norm": 1.7334732730860278, "learning_rate": 9.440923602894937e-06, "loss": 0.6426, "step": 1707 }, { "epoch": 0.18, "grad_norm": 2.085845934820001, "learning_rate": 9.44014984375249e-06, "loss": 0.7491, "step": 1708 }, { "epoch": 0.18, "grad_norm": 1.957209376001055, "learning_rate": 9.439375581293751e-06, "loss": 0.6847, "step": 1709 }, { "epoch": 0.18, "grad_norm": 1.850758181214728, "learning_rate": 9.438600815606483e-06, "loss": 0.6593, "step": 1710 }, { "epoch": 0.18, "grad_norm": 1.9311284970137075, "learning_rate": 9.437825546778517e-06, "loss": 0.6886, "step": 1711 }, { "epoch": 0.18, "grad_norm": 1.7095134979680247, "learning_rate": 9.437049774897728e-06, "loss": 0.6179, "step": 1712 }, { "epoch": 0.18, "grad_norm": 2.048187229194672, "learning_rate": 9.436273500052056e-06, "loss": 0.7139, "step": 1713 }, { "epoch": 0.18, "grad_norm": 1.8280668981290464, "learning_rate": 9.4354967223295e-06, "loss": 0.6537, "step": 1714 }, { "epoch": 0.18, "grad_norm": 1.7321260549808468, "learning_rate": 9.434719441818106e-06, "loss": 0.5556, "step": 1715 }, { "epoch": 0.18, "grad_norm": 2.224063985682685, "learning_rate": 9.433941658605987e-06, "loss": 0.6958, "step": 1716 }, { "epoch": 0.18, "grad_norm": 4.244857086101658, "learning_rate": 9.43316337278131e-06, "loss": 0.6439, "step": 1717 }, { "epoch": 0.18, "grad_norm": 2.1301665791756372, "learning_rate": 9.432384584432294e-06, "loss": 0.7263, "step": 1718 }, { "epoch": 0.18, "grad_norm": 2.204264658265296, "learning_rate": 9.431605293647224e-06, "loss": 0.6973, "step": 1719 }, { "epoch": 0.18, "grad_norm": 1.9886649316067186, "learning_rate": 9.430825500514433e-06, "loss": 0.5987, "step": 1720 }, { "epoch": 0.18, "grad_norm": 1.9015353627790044, "learning_rate": 9.430045205122317e-06, "loss": 0.6792, "step": 1721 }, { "epoch": 0.18, "grad_norm": 1.746055037768087, "learning_rate": 9.429264407559328e-06, "loss": 0.6747, "step": 1722 }, { "epoch": 0.18, "grad_norm": 1.8539817079671905, "learning_rate": 9.42848310791397e-06, "loss": 0.6928, "step": 1723 }, { "epoch": 0.18, "grad_norm": 2.1077148459976724, "learning_rate": 9.427701306274812e-06, "loss": 0.6944, "step": 1724 }, { "epoch": 0.18, "grad_norm": 2.2949928578619594, "learning_rate": 9.426919002730473e-06, "loss": 0.6411, "step": 1725 }, { "epoch": 0.18, "grad_norm": 2.3089363293156584, "learning_rate": 9.426136197369633e-06, "loss": 0.7011, "step": 1726 }, { "epoch": 0.18, "grad_norm": 2.3775684030564825, "learning_rate": 9.425352890281028e-06, "loss": 0.6686, "step": 1727 }, { "epoch": 0.18, "grad_norm": 2.3000666445379516, "learning_rate": 9.424569081553447e-06, "loss": 0.576, "step": 1728 }, { "epoch": 0.18, "grad_norm": 2.2271722236338998, "learning_rate": 9.423784771275744e-06, "loss": 0.6952, "step": 1729 }, { "epoch": 0.18, "grad_norm": 1.9171166819419831, "learning_rate": 9.422999959536819e-06, "loss": 0.8124, "step": 1730 }, { "epoch": 0.18, "grad_norm": 1.9396569476898367, "learning_rate": 9.422214646425641e-06, "loss": 0.5715, "step": 1731 }, { "epoch": 0.18, "grad_norm": 2.1983267960374264, "learning_rate": 9.421428832031226e-06, "loss": 0.6675, "step": 1732 }, { "epoch": 0.18, "grad_norm": 2.2057794317489123, "learning_rate": 9.420642516442652e-06, "loss": 0.6696, "step": 1733 }, { "epoch": 0.18, "grad_norm": 1.719469875329039, "learning_rate": 9.41985569974905e-06, "loss": 0.6127, "step": 1734 }, { "epoch": 0.18, "grad_norm": 2.083556833840758, "learning_rate": 9.419068382039615e-06, "loss": 0.7165, "step": 1735 }, { "epoch": 0.18, "grad_norm": 2.1664458577953467, "learning_rate": 9.41828056340359e-06, "loss": 0.7521, "step": 1736 }, { "epoch": 0.18, "grad_norm": 1.9227176468266851, "learning_rate": 9.41749224393028e-06, "loss": 0.7552, "step": 1737 }, { "epoch": 0.18, "grad_norm": 1.9964207302492, "learning_rate": 9.416703423709044e-06, "loss": 0.6417, "step": 1738 }, { "epoch": 0.18, "grad_norm": 1.8401124624218785, "learning_rate": 9.415914102829302e-06, "loss": 0.6845, "step": 1739 }, { "epoch": 0.18, "grad_norm": 1.9982912657871517, "learning_rate": 9.415124281380525e-06, "loss": 0.7257, "step": 1740 }, { "epoch": 0.18, "grad_norm": 1.931542910740269, "learning_rate": 9.414333959452247e-06, "loss": 0.7005, "step": 1741 }, { "epoch": 0.18, "grad_norm": 2.0180011776973115, "learning_rate": 9.413543137134053e-06, "loss": 0.7342, "step": 1742 }, { "epoch": 0.18, "grad_norm": 1.8253225407310454, "learning_rate": 9.412751814515588e-06, "loss": 0.697, "step": 1743 }, { "epoch": 0.18, "grad_norm": 2.1203442175782348, "learning_rate": 9.411959991686554e-06, "loss": 0.7561, "step": 1744 }, { "epoch": 0.18, "grad_norm": 1.9259566109424078, "learning_rate": 9.411167668736707e-06, "loss": 0.5717, "step": 1745 }, { "epoch": 0.18, "grad_norm": 1.95383111394422, "learning_rate": 9.410374845755862e-06, "loss": 0.6709, "step": 1746 }, { "epoch": 0.18, "grad_norm": 2.4064921390288863, "learning_rate": 9.409581522833889e-06, "loss": 0.6546, "step": 1747 }, { "epoch": 0.18, "grad_norm": 1.9152540024539662, "learning_rate": 9.408787700060718e-06, "loss": 0.6543, "step": 1748 }, { "epoch": 0.18, "grad_norm": 2.1151996633398142, "learning_rate": 9.40799337752633e-06, "loss": 0.6748, "step": 1749 }, { "epoch": 0.18, "grad_norm": 1.8568166821966514, "learning_rate": 9.40719855532077e-06, "loss": 0.7002, "step": 1750 }, { "epoch": 0.18, "grad_norm": 2.0767917401632983, "learning_rate": 9.406403233534134e-06, "loss": 0.6867, "step": 1751 }, { "epoch": 0.18, "grad_norm": 2.143310125483721, "learning_rate": 9.405607412256573e-06, "loss": 0.7443, "step": 1752 }, { "epoch": 0.18, "grad_norm": 2.02257859190377, "learning_rate": 9.404811091578302e-06, "loss": 0.6455, "step": 1753 }, { "epoch": 0.18, "grad_norm": 1.9046396189464536, "learning_rate": 9.404014271589588e-06, "loss": 0.7036, "step": 1754 }, { "epoch": 0.18, "grad_norm": 1.7750985666179104, "learning_rate": 9.403216952380755e-06, "loss": 0.6842, "step": 1755 }, { "epoch": 0.18, "grad_norm": 1.9874375531893456, "learning_rate": 9.40241913404218e-06, "loss": 0.6559, "step": 1756 }, { "epoch": 0.18, "grad_norm": 1.886971108135804, "learning_rate": 9.401620816664305e-06, "loss": 0.6458, "step": 1757 }, { "epoch": 0.18, "grad_norm": 1.9427562819969475, "learning_rate": 9.400822000337622e-06, "loss": 0.7032, "step": 1758 }, { "epoch": 0.18, "grad_norm": 1.8186870029716242, "learning_rate": 9.400022685152683e-06, "loss": 0.6731, "step": 1759 }, { "epoch": 0.18, "grad_norm": 1.855438322366915, "learning_rate": 9.399222871200091e-06, "loss": 0.6359, "step": 1760 }, { "epoch": 0.18, "grad_norm": 2.23468285760032, "learning_rate": 9.398422558570512e-06, "loss": 0.7936, "step": 1761 }, { "epoch": 0.18, "grad_norm": 1.9850677319168017, "learning_rate": 9.397621747354666e-06, "loss": 0.5443, "step": 1762 }, { "epoch": 0.18, "grad_norm": 2.0215518938615653, "learning_rate": 9.39682043764333e-06, "loss": 0.7142, "step": 1763 }, { "epoch": 0.18, "grad_norm": 2.1960036942141676, "learning_rate": 9.396018629527336e-06, "loss": 0.6934, "step": 1764 }, { "epoch": 0.18, "grad_norm": 1.9005397240127722, "learning_rate": 9.395216323097573e-06, "loss": 0.7108, "step": 1765 }, { "epoch": 0.18, "grad_norm": 1.7738157201021318, "learning_rate": 9.394413518444989e-06, "loss": 0.6096, "step": 1766 }, { "epoch": 0.18, "grad_norm": 1.936537266975992, "learning_rate": 9.393610215660585e-06, "loss": 0.6309, "step": 1767 }, { "epoch": 0.18, "grad_norm": 1.9371372960509754, "learning_rate": 9.392806414835422e-06, "loss": 0.7551, "step": 1768 }, { "epoch": 0.18, "grad_norm": 1.9333032902331897, "learning_rate": 9.392002116060612e-06, "loss": 0.6436, "step": 1769 }, { "epoch": 0.18, "grad_norm": 2.027033584198372, "learning_rate": 9.391197319427328e-06, "loss": 0.6055, "step": 1770 }, { "epoch": 0.18, "grad_norm": 2.196135164479654, "learning_rate": 9.3903920250268e-06, "loss": 0.7347, "step": 1771 }, { "epoch": 0.18, "grad_norm": 1.731083189320279, "learning_rate": 9.389586232950312e-06, "loss": 0.5823, "step": 1772 }, { "epoch": 0.18, "grad_norm": 1.9930184050076185, "learning_rate": 9.388779943289204e-06, "loss": 0.6757, "step": 1773 }, { "epoch": 0.18, "grad_norm": 1.933884457327214, "learning_rate": 9.387973156134872e-06, "loss": 0.6756, "step": 1774 }, { "epoch": 0.18, "grad_norm": 2.391449358670151, "learning_rate": 9.387165871578774e-06, "loss": 0.6467, "step": 1775 }, { "epoch": 0.18, "grad_norm": 2.0315456443703153, "learning_rate": 9.38635808971242e-06, "loss": 0.629, "step": 1776 }, { "epoch": 0.18, "grad_norm": 2.051657440819527, "learning_rate": 9.385549810627374e-06, "loss": 0.6349, "step": 1777 }, { "epoch": 0.18, "grad_norm": 2.208159799053304, "learning_rate": 9.38474103441526e-06, "loss": 0.7332, "step": 1778 }, { "epoch": 0.18, "grad_norm": 2.003368483790586, "learning_rate": 9.383931761167757e-06, "loss": 0.6808, "step": 1779 }, { "epoch": 0.19, "grad_norm": 1.9662724949489288, "learning_rate": 9.383121990976602e-06, "loss": 0.7002, "step": 1780 }, { "epoch": 0.19, "grad_norm": 2.046593510996878, "learning_rate": 9.382311723933586e-06, "loss": 0.6752, "step": 1781 }, { "epoch": 0.19, "grad_norm": 1.7582074425722836, "learning_rate": 9.381500960130558e-06, "loss": 0.6253, "step": 1782 }, { "epoch": 0.19, "grad_norm": 1.8022736464724076, "learning_rate": 9.380689699659423e-06, "loss": 0.6269, "step": 1783 }, { "epoch": 0.19, "grad_norm": 1.850845380128415, "learning_rate": 9.37987794261214e-06, "loss": 0.6402, "step": 1784 }, { "epoch": 0.19, "grad_norm": 1.8174325837103, "learning_rate": 9.379065689080729e-06, "loss": 0.6395, "step": 1785 }, { "epoch": 0.19, "grad_norm": 1.965486494511778, "learning_rate": 9.378252939157262e-06, "loss": 0.6842, "step": 1786 }, { "epoch": 0.19, "grad_norm": 1.7872036813111714, "learning_rate": 9.377439692933869e-06, "loss": 0.702, "step": 1787 }, { "epoch": 0.19, "grad_norm": 2.056730874252554, "learning_rate": 9.376625950502736e-06, "loss": 0.6803, "step": 1788 }, { "epoch": 0.19, "grad_norm": 1.7395674020136729, "learning_rate": 9.375811711956106e-06, "loss": 0.6639, "step": 1789 }, { "epoch": 0.19, "grad_norm": 1.7736550746022077, "learning_rate": 9.374996977386276e-06, "loss": 0.6338, "step": 1790 }, { "epoch": 0.19, "grad_norm": 1.8660714665304707, "learning_rate": 9.374181746885603e-06, "loss": 0.5861, "step": 1791 }, { "epoch": 0.19, "grad_norm": 1.9014869405672314, "learning_rate": 9.373366020546498e-06, "loss": 0.7147, "step": 1792 }, { "epoch": 0.19, "grad_norm": 1.7393522079638204, "learning_rate": 9.372549798461426e-06, "loss": 0.5423, "step": 1793 }, { "epoch": 0.19, "grad_norm": 1.8812361480908122, "learning_rate": 9.371733080722911e-06, "loss": 0.7266, "step": 1794 }, { "epoch": 0.19, "grad_norm": 1.808933104084343, "learning_rate": 9.370915867423534e-06, "loss": 0.7232, "step": 1795 }, { "epoch": 0.19, "grad_norm": 1.7975636897079652, "learning_rate": 9.37009815865593e-06, "loss": 0.638, "step": 1796 }, { "epoch": 0.19, "grad_norm": 1.9655333154002042, "learning_rate": 9.369279954512791e-06, "loss": 0.81, "step": 1797 }, { "epoch": 0.19, "grad_norm": 2.1093837615576163, "learning_rate": 9.368461255086866e-06, "loss": 0.6529, "step": 1798 }, { "epoch": 0.19, "grad_norm": 1.8458410099324793, "learning_rate": 9.367642060470958e-06, "loss": 0.5708, "step": 1799 }, { "epoch": 0.19, "grad_norm": 1.7681980524691754, "learning_rate": 9.366822370757927e-06, "loss": 0.6468, "step": 1800 }, { "epoch": 0.19, "grad_norm": 2.0440408178376956, "learning_rate": 9.36600218604069e-06, "loss": 0.6947, "step": 1801 }, { "epoch": 0.19, "grad_norm": 1.709238322425783, "learning_rate": 9.36518150641222e-06, "loss": 0.6153, "step": 1802 }, { "epoch": 0.19, "grad_norm": 1.8525472802523686, "learning_rate": 9.364360331965545e-06, "loss": 0.6379, "step": 1803 }, { "epoch": 0.19, "grad_norm": 2.040402922812123, "learning_rate": 9.363538662793752e-06, "loss": 0.73, "step": 1804 }, { "epoch": 0.19, "grad_norm": 1.9562526941554614, "learning_rate": 9.362716498989979e-06, "loss": 0.6633, "step": 1805 }, { "epoch": 0.19, "grad_norm": 2.102318885795319, "learning_rate": 9.361893840647425e-06, "loss": 0.6032, "step": 1806 }, { "epoch": 0.19, "grad_norm": 1.934206711167693, "learning_rate": 9.361070687859341e-06, "loss": 0.6323, "step": 1807 }, { "epoch": 0.19, "grad_norm": 2.0505371589582104, "learning_rate": 9.36024704071904e-06, "loss": 0.7097, "step": 1808 }, { "epoch": 0.19, "grad_norm": 1.9380575372950994, "learning_rate": 9.359422899319882e-06, "loss": 0.7334, "step": 1809 }, { "epoch": 0.19, "grad_norm": 1.9703923692444525, "learning_rate": 9.358598263755289e-06, "loss": 0.6529, "step": 1810 }, { "epoch": 0.19, "grad_norm": 2.0041847525050547, "learning_rate": 9.357773134118743e-06, "loss": 0.7106, "step": 1811 }, { "epoch": 0.19, "grad_norm": 1.8824885667479772, "learning_rate": 9.356947510503771e-06, "loss": 0.6663, "step": 1812 }, { "epoch": 0.19, "grad_norm": 1.9126377503709864, "learning_rate": 9.356121393003968e-06, "loss": 0.6731, "step": 1813 }, { "epoch": 0.19, "grad_norm": 2.1234974613692432, "learning_rate": 9.355294781712974e-06, "loss": 0.668, "step": 1814 }, { "epoch": 0.19, "grad_norm": 2.427177580084952, "learning_rate": 9.354467676724491e-06, "loss": 0.6898, "step": 1815 }, { "epoch": 0.19, "grad_norm": 1.7188594239543076, "learning_rate": 9.35364007813228e-06, "loss": 0.6387, "step": 1816 }, { "epoch": 0.19, "grad_norm": 2.1711651881047604, "learning_rate": 9.352811986030147e-06, "loss": 0.7751, "step": 1817 }, { "epoch": 0.19, "grad_norm": 2.004661231210757, "learning_rate": 9.35198340051197e-06, "loss": 0.7885, "step": 1818 }, { "epoch": 0.19, "grad_norm": 2.202027654675994, "learning_rate": 9.351154321671667e-06, "loss": 0.714, "step": 1819 }, { "epoch": 0.19, "grad_norm": 2.108135480448231, "learning_rate": 9.350324749603221e-06, "loss": 0.6352, "step": 1820 }, { "epoch": 0.19, "grad_norm": 2.0603342012079517, "learning_rate": 9.34949468440067e-06, "loss": 0.6355, "step": 1821 }, { "epoch": 0.19, "grad_norm": 2.014812774582301, "learning_rate": 9.348664126158103e-06, "loss": 0.6858, "step": 1822 }, { "epoch": 0.19, "grad_norm": 1.967906011677732, "learning_rate": 9.34783307496967e-06, "loss": 0.7113, "step": 1823 }, { "epoch": 0.19, "grad_norm": 2.0691130215812827, "learning_rate": 9.347001530929579e-06, "loss": 0.7028, "step": 1824 }, { "epoch": 0.19, "grad_norm": 1.9448319235913356, "learning_rate": 9.346169494132086e-06, "loss": 0.6687, "step": 1825 }, { "epoch": 0.19, "grad_norm": 1.858179669232822, "learning_rate": 9.345336964671507e-06, "loss": 0.5961, "step": 1826 }, { "epoch": 0.19, "grad_norm": 1.6801584205129731, "learning_rate": 9.344503942642218e-06, "loss": 0.5703, "step": 1827 }, { "epoch": 0.19, "grad_norm": 1.8900232563367874, "learning_rate": 9.343670428138644e-06, "loss": 0.6756, "step": 1828 }, { "epoch": 0.19, "grad_norm": 1.8460935273114019, "learning_rate": 9.342836421255268e-06, "loss": 0.6328, "step": 1829 }, { "epoch": 0.19, "grad_norm": 1.8418240477099301, "learning_rate": 9.34200192208663e-06, "loss": 0.6728, "step": 1830 }, { "epoch": 0.19, "grad_norm": 1.8254277059194648, "learning_rate": 9.341166930727326e-06, "loss": 0.7245, "step": 1831 }, { "epoch": 0.19, "grad_norm": 1.88091998086299, "learning_rate": 9.340331447272006e-06, "loss": 0.6205, "step": 1832 }, { "epoch": 0.19, "grad_norm": 1.822431851717622, "learning_rate": 9.339495471815379e-06, "loss": 0.6293, "step": 1833 }, { "epoch": 0.19, "grad_norm": 1.8356744862913454, "learning_rate": 9.338659004452204e-06, "loss": 0.6785, "step": 1834 }, { "epoch": 0.19, "grad_norm": 1.853780372536723, "learning_rate": 9.337822045277303e-06, "loss": 0.6948, "step": 1835 }, { "epoch": 0.19, "grad_norm": 1.8247544720172622, "learning_rate": 9.336984594385547e-06, "loss": 0.546, "step": 1836 }, { "epoch": 0.19, "grad_norm": 1.7757233288010839, "learning_rate": 9.33614665187187e-06, "loss": 0.7182, "step": 1837 }, { "epoch": 0.19, "grad_norm": 2.1038639116244946, "learning_rate": 9.335308217831252e-06, "loss": 0.7425, "step": 1838 }, { "epoch": 0.19, "grad_norm": 1.9022465803573188, "learning_rate": 9.334469292358736e-06, "loss": 0.7418, "step": 1839 }, { "epoch": 0.19, "grad_norm": 1.982372166611791, "learning_rate": 9.333629875549424e-06, "loss": 0.6076, "step": 1840 }, { "epoch": 0.19, "grad_norm": 1.866068994191335, "learning_rate": 9.33278996749846e-06, "loss": 0.6356, "step": 1841 }, { "epoch": 0.19, "grad_norm": 1.9918446912193064, "learning_rate": 9.331949568301062e-06, "loss": 0.643, "step": 1842 }, { "epoch": 0.19, "grad_norm": 1.9290363357787772, "learning_rate": 9.331108678052485e-06, "loss": 0.6967, "step": 1843 }, { "epoch": 0.19, "grad_norm": 1.944946068496725, "learning_rate": 9.330267296848056e-06, "loss": 0.6828, "step": 1844 }, { "epoch": 0.19, "grad_norm": 2.2391804890671683, "learning_rate": 9.329425424783145e-06, "loss": 0.66, "step": 1845 }, { "epoch": 0.19, "grad_norm": 2.0239600614768927, "learning_rate": 9.328583061953187e-06, "loss": 0.6193, "step": 1846 }, { "epoch": 0.19, "grad_norm": 1.7541080391109118, "learning_rate": 9.327740208453666e-06, "loss": 0.6133, "step": 1847 }, { "epoch": 0.19, "grad_norm": 2.0824969747832474, "learning_rate": 9.326896864380124e-06, "loss": 0.7947, "step": 1848 }, { "epoch": 0.19, "grad_norm": 1.9952093471075816, "learning_rate": 9.326053029828162e-06, "loss": 0.6064, "step": 1849 }, { "epoch": 0.19, "grad_norm": 1.8993269296443138, "learning_rate": 9.32520870489343e-06, "loss": 0.6876, "step": 1850 }, { "epoch": 0.19, "grad_norm": 1.8474386957130455, "learning_rate": 9.32436388967164e-06, "loss": 0.6825, "step": 1851 }, { "epoch": 0.19, "grad_norm": 1.873580562276867, "learning_rate": 9.323518584258554e-06, "loss": 0.7735, "step": 1852 }, { "epoch": 0.19, "grad_norm": 1.923520628246674, "learning_rate": 9.32267278874999e-06, "loss": 0.678, "step": 1853 }, { "epoch": 0.19, "grad_norm": 1.7349404592036248, "learning_rate": 9.32182650324183e-06, "loss": 0.6314, "step": 1854 }, { "epoch": 0.19, "grad_norm": 1.983232010650299, "learning_rate": 9.320979727830004e-06, "loss": 0.663, "step": 1855 }, { "epoch": 0.19, "grad_norm": 2.116098848893127, "learning_rate": 9.320132462610495e-06, "loss": 0.6062, "step": 1856 }, { "epoch": 0.19, "grad_norm": 1.954496745764822, "learning_rate": 9.319284707679348e-06, "loss": 0.6566, "step": 1857 }, { "epoch": 0.19, "grad_norm": 1.944786321375141, "learning_rate": 9.318436463132661e-06, "loss": 0.7398, "step": 1858 }, { "epoch": 0.19, "grad_norm": 1.9358948718792182, "learning_rate": 9.317587729066585e-06, "loss": 0.6646, "step": 1859 }, { "epoch": 0.19, "grad_norm": 1.9699922116768356, "learning_rate": 9.316738505577331e-06, "loss": 0.7234, "step": 1860 }, { "epoch": 0.19, "grad_norm": 2.093839616518822, "learning_rate": 9.315888792761163e-06, "loss": 0.659, "step": 1861 }, { "epoch": 0.19, "grad_norm": 1.925384133731945, "learning_rate": 9.3150385907144e-06, "loss": 0.6902, "step": 1862 }, { "epoch": 0.19, "grad_norm": 1.808062266664772, "learning_rate": 9.31418789953342e-06, "loss": 0.576, "step": 1863 }, { "epoch": 0.19, "grad_norm": 2.073959612645908, "learning_rate": 9.31333671931465e-06, "loss": 0.7181, "step": 1864 }, { "epoch": 0.19, "grad_norm": 1.8375303473816842, "learning_rate": 9.312485050154578e-06, "loss": 0.6194, "step": 1865 }, { "epoch": 0.19, "grad_norm": 2.026136079077223, "learning_rate": 9.311632892149744e-06, "loss": 0.6778, "step": 1866 }, { "epoch": 0.19, "grad_norm": 2.0988523898716376, "learning_rate": 9.310780245396747e-06, "loss": 0.7, "step": 1867 }, { "epoch": 0.19, "grad_norm": 1.9595698067023832, "learning_rate": 9.309927109992239e-06, "loss": 0.5943, "step": 1868 }, { "epoch": 0.19, "grad_norm": 2.395683206373414, "learning_rate": 9.309073486032926e-06, "loss": 0.6852, "step": 1869 }, { "epoch": 0.19, "grad_norm": 1.8437940606159533, "learning_rate": 9.308219373615574e-06, "loss": 0.5946, "step": 1870 }, { "epoch": 0.19, "grad_norm": 2.23571475537344, "learning_rate": 9.307364772837e-06, "loss": 0.7415, "step": 1871 }, { "epoch": 0.19, "grad_norm": 2.038379272307801, "learning_rate": 9.306509683794077e-06, "loss": 0.7228, "step": 1872 }, { "epoch": 0.19, "grad_norm": 1.9487725835663436, "learning_rate": 9.305654106583735e-06, "loss": 0.7507, "step": 1873 }, { "epoch": 0.19, "grad_norm": 1.887653621297847, "learning_rate": 9.304798041302959e-06, "loss": 0.6303, "step": 1874 }, { "epoch": 0.19, "grad_norm": 1.884397519021714, "learning_rate": 9.30394148804879e-06, "loss": 0.7304, "step": 1875 }, { "epoch": 0.2, "grad_norm": 1.9021671805670946, "learning_rate": 9.303084446918317e-06, "loss": 0.6402, "step": 1876 }, { "epoch": 0.2, "grad_norm": 1.8668810936120013, "learning_rate": 9.302226918008699e-06, "loss": 0.6011, "step": 1877 }, { "epoch": 0.2, "grad_norm": 1.793475054066034, "learning_rate": 9.301368901417138e-06, "loss": 0.6594, "step": 1878 }, { "epoch": 0.2, "grad_norm": 2.120762189756595, "learning_rate": 9.300510397240894e-06, "loss": 0.7769, "step": 1879 }, { "epoch": 0.2, "grad_norm": 2.300340831850511, "learning_rate": 9.299651405577286e-06, "loss": 0.7338, "step": 1880 }, { "epoch": 0.2, "grad_norm": 1.9583934032957513, "learning_rate": 9.298791926523683e-06, "loss": 0.6799, "step": 1881 }, { "epoch": 0.2, "grad_norm": 1.9638398719688912, "learning_rate": 9.29793196017751e-06, "loss": 0.6121, "step": 1882 }, { "epoch": 0.2, "grad_norm": 2.078735718161527, "learning_rate": 9.297071506636256e-06, "loss": 0.7302, "step": 1883 }, { "epoch": 0.2, "grad_norm": 1.8398638850551559, "learning_rate": 9.296210565997453e-06, "loss": 0.6358, "step": 1884 }, { "epoch": 0.2, "grad_norm": 1.7991017595645746, "learning_rate": 9.295349138358693e-06, "loss": 0.6302, "step": 1885 }, { "epoch": 0.2, "grad_norm": 1.738381217294171, "learning_rate": 9.294487223817628e-06, "loss": 0.5517, "step": 1886 }, { "epoch": 0.2, "grad_norm": 1.7996019958686276, "learning_rate": 9.293624822471957e-06, "loss": 0.7795, "step": 1887 }, { "epoch": 0.2, "grad_norm": 2.04078306349116, "learning_rate": 9.29276193441944e-06, "loss": 0.7368, "step": 1888 }, { "epoch": 0.2, "grad_norm": 1.91957856399538, "learning_rate": 9.29189855975789e-06, "loss": 0.6685, "step": 1889 }, { "epoch": 0.2, "grad_norm": 1.8755599618267553, "learning_rate": 9.291034698585174e-06, "loss": 0.6246, "step": 1890 }, { "epoch": 0.2, "grad_norm": 1.874547678759945, "learning_rate": 9.290170350999217e-06, "loss": 0.6128, "step": 1891 }, { "epoch": 0.2, "grad_norm": 1.8915199997141332, "learning_rate": 9.289305517098e-06, "loss": 0.6731, "step": 1892 }, { "epoch": 0.2, "grad_norm": 1.6748872030367143, "learning_rate": 9.288440196979552e-06, "loss": 0.5941, "step": 1893 }, { "epoch": 0.2, "grad_norm": 1.7222395068337952, "learning_rate": 9.287574390741965e-06, "loss": 0.626, "step": 1894 }, { "epoch": 0.2, "grad_norm": 2.162350680248037, "learning_rate": 9.286708098483383e-06, "loss": 0.6518, "step": 1895 }, { "epoch": 0.2, "grad_norm": 1.8336812096087574, "learning_rate": 9.285841320302005e-06, "loss": 0.5188, "step": 1896 }, { "epoch": 0.2, "grad_norm": 2.2796155535184344, "learning_rate": 9.284974056296084e-06, "loss": 0.6926, "step": 1897 }, { "epoch": 0.2, "grad_norm": 1.7071784585343275, "learning_rate": 9.284106306563929e-06, "loss": 0.6714, "step": 1898 }, { "epoch": 0.2, "grad_norm": 1.6506461809111215, "learning_rate": 9.283238071203907e-06, "loss": 0.4856, "step": 1899 }, { "epoch": 0.2, "grad_norm": 1.8781568684820324, "learning_rate": 9.282369350314436e-06, "loss": 0.6134, "step": 1900 }, { "epoch": 0.2, "grad_norm": 1.9914978185733683, "learning_rate": 9.28150014399399e-06, "loss": 0.669, "step": 1901 }, { "epoch": 0.2, "grad_norm": 1.8618014748000549, "learning_rate": 9.280630452341099e-06, "loss": 0.7035, "step": 1902 }, { "epoch": 0.2, "grad_norm": 2.0645327853298525, "learning_rate": 9.279760275454348e-06, "loss": 0.6232, "step": 1903 }, { "epoch": 0.2, "grad_norm": 1.7147310105966709, "learning_rate": 9.278889613432375e-06, "loss": 0.571, "step": 1904 }, { "epoch": 0.2, "grad_norm": 1.9961133817036107, "learning_rate": 9.278018466373877e-06, "loss": 0.7313, "step": 1905 }, { "epoch": 0.2, "grad_norm": 1.7602136103564725, "learning_rate": 9.277146834377601e-06, "loss": 0.6825, "step": 1906 }, { "epoch": 0.2, "grad_norm": 1.8653246475670127, "learning_rate": 9.276274717542352e-06, "loss": 0.5975, "step": 1907 }, { "epoch": 0.2, "grad_norm": 1.9630113986010014, "learning_rate": 9.275402115966991e-06, "loss": 0.7427, "step": 1908 }, { "epoch": 0.2, "grad_norm": 1.959845410358148, "learning_rate": 9.27452902975043e-06, "loss": 0.6803, "step": 1909 }, { "epoch": 0.2, "grad_norm": 1.9040361049898615, "learning_rate": 9.273655458991639e-06, "loss": 0.5902, "step": 1910 }, { "epoch": 0.2, "grad_norm": 2.0371368293310326, "learning_rate": 9.272781403789644e-06, "loss": 0.77, "step": 1911 }, { "epoch": 0.2, "grad_norm": 1.8827798471230117, "learning_rate": 9.271906864243523e-06, "loss": 0.7044, "step": 1912 }, { "epoch": 0.2, "grad_norm": 1.7981269024380084, "learning_rate": 9.271031840452409e-06, "loss": 0.6282, "step": 1913 }, { "epoch": 0.2, "grad_norm": 1.927440464619091, "learning_rate": 9.270156332515492e-06, "loss": 0.6634, "step": 1914 }, { "epoch": 0.2, "grad_norm": 1.946925561163631, "learning_rate": 9.269280340532015e-06, "loss": 0.6884, "step": 1915 }, { "epoch": 0.2, "grad_norm": 1.7885164788997072, "learning_rate": 9.268403864601279e-06, "loss": 0.6771, "step": 1916 }, { "epoch": 0.2, "grad_norm": 1.740210348011536, "learning_rate": 9.267526904822634e-06, "loss": 0.5771, "step": 1917 }, { "epoch": 0.2, "grad_norm": 1.8515761495261118, "learning_rate": 9.266649461295491e-06, "loss": 0.5456, "step": 1918 }, { "epoch": 0.2, "grad_norm": 1.7858501985347042, "learning_rate": 9.265771534119313e-06, "loss": 0.6791, "step": 1919 }, { "epoch": 0.2, "grad_norm": 1.7471660234340287, "learning_rate": 9.264893123393618e-06, "loss": 0.6445, "step": 1920 }, { "epoch": 0.2, "grad_norm": 2.0050209490973847, "learning_rate": 9.264014229217978e-06, "loss": 0.6816, "step": 1921 }, { "epoch": 0.2, "grad_norm": 1.7946185867470288, "learning_rate": 9.263134851692019e-06, "loss": 0.6995, "step": 1922 }, { "epoch": 0.2, "grad_norm": 1.9338460880907744, "learning_rate": 9.262254990915427e-06, "loss": 0.7106, "step": 1923 }, { "epoch": 0.2, "grad_norm": 2.1389806917513488, "learning_rate": 9.261374646987939e-06, "loss": 0.73, "step": 1924 }, { "epoch": 0.2, "grad_norm": 2.5347394068987805, "learning_rate": 9.260493820009343e-06, "loss": 0.7632, "step": 1925 }, { "epoch": 0.2, "grad_norm": 1.9525928567638877, "learning_rate": 9.259612510079492e-06, "loss": 0.6774, "step": 1926 }, { "epoch": 0.2, "grad_norm": 2.0893340477312625, "learning_rate": 9.258730717298281e-06, "loss": 0.6821, "step": 1927 }, { "epoch": 0.2, "grad_norm": 2.0650730951225564, "learning_rate": 9.25784844176567e-06, "loss": 0.7423, "step": 1928 }, { "epoch": 0.2, "grad_norm": 1.7756513259013247, "learning_rate": 9.256965683581669e-06, "loss": 0.6076, "step": 1929 }, { "epoch": 0.2, "grad_norm": 1.8897211314293623, "learning_rate": 9.256082442846347e-06, "loss": 0.651, "step": 1930 }, { "epoch": 0.2, "grad_norm": 1.8280216607096946, "learning_rate": 9.255198719659818e-06, "loss": 0.7047, "step": 1931 }, { "epoch": 0.2, "grad_norm": 2.0086394653794373, "learning_rate": 9.25431451412226e-06, "loss": 0.7658, "step": 1932 }, { "epoch": 0.2, "grad_norm": 1.9127465298594584, "learning_rate": 9.253429826333904e-06, "loss": 0.618, "step": 1933 }, { "epoch": 0.2, "grad_norm": 1.826301070624459, "learning_rate": 9.252544656395033e-06, "loss": 0.7363, "step": 1934 }, { "epoch": 0.2, "grad_norm": 1.9814272544927, "learning_rate": 9.251659004405987e-06, "loss": 0.6725, "step": 1935 }, { "epoch": 0.2, "grad_norm": 1.9539602624207804, "learning_rate": 9.25077287046716e-06, "loss": 0.6789, "step": 1936 }, { "epoch": 0.2, "grad_norm": 2.0032162419200046, "learning_rate": 9.249886254678996e-06, "loss": 0.6548, "step": 1937 }, { "epoch": 0.2, "grad_norm": 1.7474671664321944, "learning_rate": 9.248999157142006e-06, "loss": 0.6904, "step": 1938 }, { "epoch": 0.2, "grad_norm": 1.8925018918591514, "learning_rate": 9.24811157795674e-06, "loss": 0.6627, "step": 1939 }, { "epoch": 0.2, "grad_norm": 2.102589642676861, "learning_rate": 9.247223517223816e-06, "loss": 0.6378, "step": 1940 }, { "epoch": 0.2, "grad_norm": 1.9593907278097766, "learning_rate": 9.246334975043896e-06, "loss": 0.7016, "step": 1941 }, { "epoch": 0.2, "grad_norm": 1.8473768840344285, "learning_rate": 9.245445951517705e-06, "loss": 0.6621, "step": 1942 }, { "epoch": 0.2, "grad_norm": 2.0796609104335326, "learning_rate": 9.244556446746014e-06, "loss": 0.7231, "step": 1943 }, { "epoch": 0.2, "grad_norm": 1.9370198559859941, "learning_rate": 9.243666460829659e-06, "loss": 0.7223, "step": 1944 }, { "epoch": 0.2, "grad_norm": 2.053469252910239, "learning_rate": 9.242775993869521e-06, "loss": 0.7472, "step": 1945 }, { "epoch": 0.2, "grad_norm": 1.9508358716738758, "learning_rate": 9.241885045966543e-06, "loss": 0.7247, "step": 1946 }, { "epoch": 0.2, "grad_norm": 2.2450279545330183, "learning_rate": 9.240993617221717e-06, "loss": 0.7591, "step": 1947 }, { "epoch": 0.2, "grad_norm": 1.98183928572626, "learning_rate": 9.24010170773609e-06, "loss": 0.5902, "step": 1948 }, { "epoch": 0.2, "grad_norm": 1.8675456918185938, "learning_rate": 9.239209317610766e-06, "loss": 0.6223, "step": 1949 }, { "epoch": 0.2, "grad_norm": 2.1191395908530715, "learning_rate": 9.238316446946907e-06, "loss": 0.5797, "step": 1950 }, { "epoch": 0.2, "grad_norm": 1.875641872294237, "learning_rate": 9.237423095845719e-06, "loss": 0.7013, "step": 1951 }, { "epoch": 0.2, "grad_norm": 1.7475234677089921, "learning_rate": 9.23652926440847e-06, "loss": 0.717, "step": 1952 }, { "epoch": 0.2, "grad_norm": 2.167896282874592, "learning_rate": 9.23563495273648e-06, "loss": 0.7803, "step": 1953 }, { "epoch": 0.2, "grad_norm": 1.7286001629951508, "learning_rate": 9.234740160931128e-06, "loss": 0.573, "step": 1954 }, { "epoch": 0.2, "grad_norm": 2.266229733508544, "learning_rate": 9.233844889093842e-06, "loss": 0.5986, "step": 1955 }, { "epoch": 0.2, "grad_norm": 2.032639531476447, "learning_rate": 9.232949137326104e-06, "loss": 0.6202, "step": 1956 }, { "epoch": 0.2, "grad_norm": 1.8892463261859485, "learning_rate": 9.232052905729455e-06, "loss": 0.7258, "step": 1957 }, { "epoch": 0.2, "grad_norm": 2.046805895646811, "learning_rate": 9.231156194405488e-06, "loss": 0.6311, "step": 1958 }, { "epoch": 0.2, "grad_norm": 2.125870271699572, "learning_rate": 9.230259003455849e-06, "loss": 0.7484, "step": 1959 }, { "epoch": 0.2, "grad_norm": 1.9802725052113708, "learning_rate": 9.229361332982241e-06, "loss": 0.7112, "step": 1960 }, { "epoch": 0.2, "grad_norm": 1.8447348073498075, "learning_rate": 9.228463183086417e-06, "loss": 0.6662, "step": 1961 }, { "epoch": 0.2, "grad_norm": 1.8752305247433687, "learning_rate": 9.227564553870192e-06, "loss": 0.6881, "step": 1962 }, { "epoch": 0.2, "grad_norm": 1.8026957544784308, "learning_rate": 9.226665445435428e-06, "loss": 0.5558, "step": 1963 }, { "epoch": 0.2, "grad_norm": 1.9002089055531495, "learning_rate": 9.225765857884044e-06, "loss": 0.6797, "step": 1964 }, { "epoch": 0.2, "grad_norm": 1.81457632840884, "learning_rate": 9.224865791318015e-06, "loss": 0.6085, "step": 1965 }, { "epoch": 0.2, "grad_norm": 1.7070620723972068, "learning_rate": 9.223965245839367e-06, "loss": 0.6723, "step": 1966 }, { "epoch": 0.2, "grad_norm": 2.105875693810757, "learning_rate": 9.223064221550183e-06, "loss": 0.7318, "step": 1967 }, { "epoch": 0.2, "grad_norm": 2.0605135978619633, "learning_rate": 9.222162718552598e-06, "loss": 0.8245, "step": 1968 }, { "epoch": 0.2, "grad_norm": 2.238718777820951, "learning_rate": 9.221260736948803e-06, "loss": 0.7279, "step": 1969 }, { "epoch": 0.2, "grad_norm": 2.106801695108455, "learning_rate": 9.220358276841044e-06, "loss": 0.6773, "step": 1970 }, { "epoch": 0.2, "grad_norm": 1.665299995213328, "learning_rate": 9.21945533833162e-06, "loss": 0.6073, "step": 1971 }, { "epoch": 0.2, "grad_norm": 1.7703299324370325, "learning_rate": 9.218551921522882e-06, "loss": 0.6295, "step": 1972 }, { "epoch": 0.21, "grad_norm": 1.9740036444553057, "learning_rate": 9.217648026517238e-06, "loss": 0.7084, "step": 1973 }, { "epoch": 0.21, "grad_norm": 1.950547982837811, "learning_rate": 9.216743653417154e-06, "loss": 0.6489, "step": 1974 }, { "epoch": 0.21, "grad_norm": 1.8744414611735833, "learning_rate": 9.215838802325139e-06, "loss": 0.6199, "step": 1975 }, { "epoch": 0.21, "grad_norm": 1.821711632746101, "learning_rate": 9.214933473343765e-06, "loss": 0.6074, "step": 1976 }, { "epoch": 0.21, "grad_norm": 2.105716529248528, "learning_rate": 9.21402766657566e-06, "loss": 0.6654, "step": 1977 }, { "epoch": 0.21, "grad_norm": 1.8437304299013715, "learning_rate": 9.2131213821235e-06, "loss": 0.5944, "step": 1978 }, { "epoch": 0.21, "grad_norm": 1.9858141668801854, "learning_rate": 9.212214620090016e-06, "loss": 0.7049, "step": 1979 }, { "epoch": 0.21, "grad_norm": 2.010065081274525, "learning_rate": 9.211307380577997e-06, "loss": 0.6696, "step": 1980 }, { "epoch": 0.21, "grad_norm": 1.8158239557845401, "learning_rate": 9.210399663690282e-06, "loss": 0.5995, "step": 1981 }, { "epoch": 0.21, "grad_norm": 1.81249302739702, "learning_rate": 9.209491469529767e-06, "loss": 0.5868, "step": 1982 }, { "epoch": 0.21, "grad_norm": 1.786339404329761, "learning_rate": 9.208582798199402e-06, "loss": 0.5719, "step": 1983 }, { "epoch": 0.21, "grad_norm": 1.9842702283554294, "learning_rate": 9.207673649802187e-06, "loss": 0.6408, "step": 1984 }, { "epoch": 0.21, "grad_norm": 3.5838760747233813, "learning_rate": 9.206764024441181e-06, "loss": 0.699, "step": 1985 }, { "epoch": 0.21, "grad_norm": 1.9062861026868705, "learning_rate": 9.205853922219494e-06, "loss": 0.5787, "step": 1986 }, { "epoch": 0.21, "grad_norm": 1.7433682125475243, "learning_rate": 9.204943343240293e-06, "loss": 0.6315, "step": 1987 }, { "epoch": 0.21, "grad_norm": 1.776540630355498, "learning_rate": 9.2040322876068e-06, "loss": 0.6349, "step": 1988 }, { "epoch": 0.21, "grad_norm": 1.8949934734507197, "learning_rate": 9.203120755422282e-06, "loss": 0.621, "step": 1989 }, { "epoch": 0.21, "grad_norm": 2.013754218571031, "learning_rate": 9.202208746790069e-06, "loss": 0.6994, "step": 1990 }, { "epoch": 0.21, "grad_norm": 1.6216738548604914, "learning_rate": 9.201296261813542e-06, "loss": 0.5664, "step": 1991 }, { "epoch": 0.21, "grad_norm": 1.746950760201878, "learning_rate": 9.20038330059614e-06, "loss": 0.591, "step": 1992 }, { "epoch": 0.21, "grad_norm": 2.044019666809054, "learning_rate": 9.199469863241349e-06, "loss": 0.6995, "step": 1993 }, { "epoch": 0.21, "grad_norm": 1.8783054256116338, "learning_rate": 9.19855594985271e-06, "loss": 0.6323, "step": 1994 }, { "epoch": 0.21, "grad_norm": 1.8318861488098133, "learning_rate": 9.197641560533826e-06, "loss": 0.7148, "step": 1995 }, { "epoch": 0.21, "grad_norm": 2.1884926195782684, "learning_rate": 9.196726695388345e-06, "loss": 0.6877, "step": 1996 }, { "epoch": 0.21, "grad_norm": 1.805667438158848, "learning_rate": 9.19581135451997e-06, "loss": 0.5927, "step": 1997 }, { "epoch": 0.21, "grad_norm": 1.6125439114431472, "learning_rate": 9.194895538032464e-06, "loss": 0.5711, "step": 1998 }, { "epoch": 0.21, "grad_norm": 1.8686776317461966, "learning_rate": 9.19397924602964e-06, "loss": 0.6378, "step": 1999 }, { "epoch": 0.21, "grad_norm": 1.726924867450233, "learning_rate": 9.193062478615363e-06, "loss": 0.6463, "step": 2000 }, { "epoch": 0.21, "grad_norm": 2.146670051321336, "learning_rate": 9.192145235893554e-06, "loss": 0.6873, "step": 2001 }, { "epoch": 0.21, "grad_norm": 1.902666731160066, "learning_rate": 9.191227517968189e-06, "loss": 0.6114, "step": 2002 }, { "epoch": 0.21, "grad_norm": 2.0687284258365057, "learning_rate": 9.190309324943294e-06, "loss": 0.712, "step": 2003 }, { "epoch": 0.21, "grad_norm": 1.9026767117952792, "learning_rate": 9.189390656922955e-06, "loss": 0.6617, "step": 2004 }, { "epoch": 0.21, "grad_norm": 1.8653739251485049, "learning_rate": 9.188471514011304e-06, "loss": 0.6892, "step": 2005 }, { "epoch": 0.21, "grad_norm": 1.954084908851961, "learning_rate": 9.187551896312536e-06, "loss": 0.6613, "step": 2006 }, { "epoch": 0.21, "grad_norm": 1.855601522830974, "learning_rate": 9.18663180393089e-06, "loss": 0.5557, "step": 2007 }, { "epoch": 0.21, "grad_norm": 2.0927778065386633, "learning_rate": 9.185711236970666e-06, "loss": 0.7094, "step": 2008 }, { "epoch": 0.21, "grad_norm": 1.8476031345890764, "learning_rate": 9.184790195536217e-06, "loss": 0.6551, "step": 2009 }, { "epoch": 0.21, "grad_norm": 1.9500926248954888, "learning_rate": 9.183868679731947e-06, "loss": 0.7358, "step": 2010 }, { "epoch": 0.21, "grad_norm": 1.8204670833802736, "learning_rate": 9.182946689662314e-06, "loss": 0.5735, "step": 2011 }, { "epoch": 0.21, "grad_norm": 1.8122772993948097, "learning_rate": 9.182024225431832e-06, "loss": 0.5838, "step": 2012 }, { "epoch": 0.21, "grad_norm": 2.0100838581998874, "learning_rate": 9.181101287145067e-06, "loss": 0.7309, "step": 2013 }, { "epoch": 0.21, "grad_norm": 2.0174504680138416, "learning_rate": 9.180177874906641e-06, "loss": 0.7442, "step": 2014 }, { "epoch": 0.21, "grad_norm": 1.8246358548978066, "learning_rate": 9.179253988821225e-06, "loss": 0.6281, "step": 2015 }, { "epoch": 0.21, "grad_norm": 1.894892646996983, "learning_rate": 9.17832962899355e-06, "loss": 0.7488, "step": 2016 }, { "epoch": 0.21, "grad_norm": 1.9804467894690987, "learning_rate": 9.177404795528395e-06, "loss": 0.5651, "step": 2017 }, { "epoch": 0.21, "grad_norm": 1.9679539263706014, "learning_rate": 9.176479488530594e-06, "loss": 0.7255, "step": 2018 }, { "epoch": 0.21, "grad_norm": 1.8442427437055013, "learning_rate": 9.17555370810504e-06, "loss": 0.6942, "step": 2019 }, { "epoch": 0.21, "grad_norm": 1.757489364681599, "learning_rate": 9.174627454356673e-06, "loss": 0.6402, "step": 2020 }, { "epoch": 0.21, "grad_norm": 2.0898518246682896, "learning_rate": 9.17370072739049e-06, "loss": 0.7267, "step": 2021 }, { "epoch": 0.21, "grad_norm": 1.620345040398183, "learning_rate": 9.172773527311541e-06, "loss": 0.5773, "step": 2022 }, { "epoch": 0.21, "grad_norm": 1.8525961488620617, "learning_rate": 9.171845854224925e-06, "loss": 0.6378, "step": 2023 }, { "epoch": 0.21, "grad_norm": 1.7575000021739289, "learning_rate": 9.170917708235806e-06, "loss": 0.7192, "step": 2024 }, { "epoch": 0.21, "grad_norm": 1.693136988470001, "learning_rate": 9.16998908944939e-06, "loss": 0.6771, "step": 2025 }, { "epoch": 0.21, "grad_norm": 1.9844607982204887, "learning_rate": 9.169059997970943e-06, "loss": 0.7001, "step": 2026 }, { "epoch": 0.21, "grad_norm": 2.0172894851733534, "learning_rate": 9.168130433905783e-06, "loss": 0.6599, "step": 2027 }, { "epoch": 0.21, "grad_norm": 1.721838081002367, "learning_rate": 9.167200397359279e-06, "loss": 0.6509, "step": 2028 }, { "epoch": 0.21, "grad_norm": 1.6521500094494894, "learning_rate": 9.16626988843686e-06, "loss": 0.575, "step": 2029 }, { "epoch": 0.21, "grad_norm": 1.8866447487668851, "learning_rate": 9.165338907244e-06, "loss": 0.6215, "step": 2030 }, { "epoch": 0.21, "grad_norm": 1.8011492071948654, "learning_rate": 9.164407453886234e-06, "loss": 0.7151, "step": 2031 }, { "epoch": 0.21, "grad_norm": 2.0920355496930485, "learning_rate": 9.163475528469148e-06, "loss": 0.6656, "step": 2032 }, { "epoch": 0.21, "grad_norm": 2.070222708565936, "learning_rate": 9.16254313109838e-06, "loss": 0.6258, "step": 2033 }, { "epoch": 0.21, "grad_norm": 2.123867145449913, "learning_rate": 9.16161026187962e-06, "loss": 0.66, "step": 2034 }, { "epoch": 0.21, "grad_norm": 1.8609378519738486, "learning_rate": 9.160676920918618e-06, "loss": 0.6742, "step": 2035 }, { "epoch": 0.21, "grad_norm": 1.9956472499189537, "learning_rate": 9.159743108321173e-06, "loss": 0.7092, "step": 2036 }, { "epoch": 0.21, "grad_norm": 2.308642760350203, "learning_rate": 9.158808824193135e-06, "loss": 0.7591, "step": 2037 }, { "epoch": 0.21, "grad_norm": 1.9751497331037764, "learning_rate": 9.157874068640414e-06, "loss": 0.6626, "step": 2038 }, { "epoch": 0.21, "grad_norm": 2.172657081296383, "learning_rate": 9.156938841768965e-06, "loss": 0.6358, "step": 2039 }, { "epoch": 0.21, "grad_norm": 1.7053858901126682, "learning_rate": 9.156003143684808e-06, "loss": 0.5645, "step": 2040 }, { "epoch": 0.21, "grad_norm": 1.669173862779289, "learning_rate": 9.155066974494005e-06, "loss": 0.5918, "step": 2041 }, { "epoch": 0.21, "grad_norm": 1.887929904534194, "learning_rate": 9.154130334302677e-06, "loss": 0.6318, "step": 2042 }, { "epoch": 0.21, "grad_norm": 1.8383647204550129, "learning_rate": 9.153193223216998e-06, "loss": 0.5643, "step": 2043 }, { "epoch": 0.21, "grad_norm": 1.8251294761789811, "learning_rate": 9.152255641343196e-06, "loss": 0.5895, "step": 2044 }, { "epoch": 0.21, "grad_norm": 1.8955577533945258, "learning_rate": 9.151317588787546e-06, "loss": 0.6413, "step": 2045 }, { "epoch": 0.21, "grad_norm": 1.6992665218554355, "learning_rate": 9.150379065656389e-06, "loss": 0.6495, "step": 2046 }, { "epoch": 0.21, "grad_norm": 2.267187657771848, "learning_rate": 9.149440072056109e-06, "loss": 0.686, "step": 2047 }, { "epoch": 0.21, "grad_norm": 1.8039609054222494, "learning_rate": 9.148500608093144e-06, "loss": 0.7414, "step": 2048 }, { "epoch": 0.21, "grad_norm": 1.7381527742569263, "learning_rate": 9.147560673873991e-06, "loss": 0.6521, "step": 2049 }, { "epoch": 0.21, "grad_norm": 1.9806331929152141, "learning_rate": 9.146620269505194e-06, "loss": 0.6065, "step": 2050 }, { "epoch": 0.21, "grad_norm": 1.8251796761712276, "learning_rate": 9.145679395093357e-06, "loss": 0.6152, "step": 2051 }, { "epoch": 0.21, "grad_norm": 1.9744130556131152, "learning_rate": 9.144738050745129e-06, "loss": 0.6398, "step": 2052 }, { "epoch": 0.21, "grad_norm": 1.8608757900689004, "learning_rate": 9.143796236567218e-06, "loss": 0.6509, "step": 2053 }, { "epoch": 0.21, "grad_norm": 2.0005118742578807, "learning_rate": 9.142853952666385e-06, "loss": 0.6495, "step": 2054 }, { "epoch": 0.21, "grad_norm": 1.8723374871113159, "learning_rate": 9.141911199149443e-06, "loss": 0.7244, "step": 2055 }, { "epoch": 0.21, "grad_norm": 1.9810936161123067, "learning_rate": 9.14096797612326e-06, "loss": 0.6901, "step": 2056 }, { "epoch": 0.21, "grad_norm": 1.7755395689456877, "learning_rate": 9.140024283694752e-06, "loss": 0.6726, "step": 2057 }, { "epoch": 0.21, "grad_norm": 2.0113646493029913, "learning_rate": 9.139080121970895e-06, "loss": 0.728, "step": 2058 }, { "epoch": 0.21, "grad_norm": 1.9395275583029608, "learning_rate": 9.138135491058715e-06, "loss": 0.6925, "step": 2059 }, { "epoch": 0.21, "grad_norm": 1.9086311561274867, "learning_rate": 9.13719039106529e-06, "loss": 0.6337, "step": 2060 }, { "epoch": 0.21, "grad_norm": 2.0135221869102757, "learning_rate": 9.136244822097754e-06, "loss": 0.7061, "step": 2061 }, { "epoch": 0.21, "grad_norm": 1.7953338697723547, "learning_rate": 9.13529878426329e-06, "loss": 0.6007, "step": 2062 }, { "epoch": 0.21, "grad_norm": 1.9043752154264741, "learning_rate": 9.134352277669139e-06, "loss": 0.7779, "step": 2063 }, { "epoch": 0.21, "grad_norm": 1.8572959317924327, "learning_rate": 9.13340530242259e-06, "loss": 0.6555, "step": 2064 }, { "epoch": 0.21, "grad_norm": 1.901098228455696, "learning_rate": 9.132457858630993e-06, "loss": 0.6443, "step": 2065 }, { "epoch": 0.21, "grad_norm": 1.8820392432706698, "learning_rate": 9.131509946401744e-06, "loss": 0.6921, "step": 2066 }, { "epoch": 0.21, "grad_norm": 1.63515390188287, "learning_rate": 9.130561565842293e-06, "loss": 0.6493, "step": 2067 }, { "epoch": 0.21, "grad_norm": 1.8004550658092822, "learning_rate": 9.129612717060145e-06, "loss": 0.6021, "step": 2068 }, { "epoch": 0.22, "grad_norm": 1.8135533498191938, "learning_rate": 9.128663400162859e-06, "loss": 0.6516, "step": 2069 }, { "epoch": 0.22, "grad_norm": 1.9883782219408912, "learning_rate": 9.12771361525804e-06, "loss": 0.5804, "step": 2070 }, { "epoch": 0.22, "grad_norm": 1.9267639414496214, "learning_rate": 9.12676336245336e-06, "loss": 0.7172, "step": 2071 }, { "epoch": 0.22, "grad_norm": 2.241296242570306, "learning_rate": 9.125812641856529e-06, "loss": 0.6466, "step": 2072 }, { "epoch": 0.22, "grad_norm": 1.8465439639348022, "learning_rate": 9.124861453575318e-06, "loss": 0.5546, "step": 2073 }, { "epoch": 0.22, "grad_norm": 1.8408819912841634, "learning_rate": 9.123909797717551e-06, "loss": 0.6116, "step": 2074 }, { "epoch": 0.22, "grad_norm": 2.076263596235832, "learning_rate": 9.122957674391103e-06, "loss": 0.7471, "step": 2075 }, { "epoch": 0.22, "grad_norm": 2.161931389146741, "learning_rate": 9.122005083703901e-06, "loss": 0.7757, "step": 2076 }, { "epoch": 0.22, "grad_norm": 1.832263381482899, "learning_rate": 9.12105202576393e-06, "loss": 0.6342, "step": 2077 }, { "epoch": 0.22, "grad_norm": 1.9116255589770395, "learning_rate": 9.120098500679222e-06, "loss": 0.5791, "step": 2078 }, { "epoch": 0.22, "grad_norm": 1.7512087408271144, "learning_rate": 9.119144508557867e-06, "loss": 0.6596, "step": 2079 }, { "epoch": 0.22, "grad_norm": 2.316242858851651, "learning_rate": 9.118190049508003e-06, "loss": 0.7156, "step": 2080 }, { "epoch": 0.22, "grad_norm": 2.098398279289849, "learning_rate": 9.117235123637822e-06, "loss": 0.7091, "step": 2081 }, { "epoch": 0.22, "grad_norm": 2.005617652197642, "learning_rate": 9.116279731055574e-06, "loss": 0.7734, "step": 2082 }, { "epoch": 0.22, "grad_norm": 2.3181625277963342, "learning_rate": 9.115323871869554e-06, "loss": 0.7269, "step": 2083 }, { "epoch": 0.22, "grad_norm": 2.09049499201878, "learning_rate": 9.11436754618812e-06, "loss": 0.6962, "step": 2084 }, { "epoch": 0.22, "grad_norm": 1.8233048319574325, "learning_rate": 9.113410754119671e-06, "loss": 0.657, "step": 2085 }, { "epoch": 0.22, "grad_norm": 1.7611399694198617, "learning_rate": 9.112453495772668e-06, "loss": 0.7066, "step": 2086 }, { "epoch": 0.22, "grad_norm": 2.30757117904449, "learning_rate": 9.111495771255623e-06, "loss": 0.7161, "step": 2087 }, { "epoch": 0.22, "grad_norm": 1.7222869131586123, "learning_rate": 9.110537580677094e-06, "loss": 0.6955, "step": 2088 }, { "epoch": 0.22, "grad_norm": 1.777824577605693, "learning_rate": 9.109578924145705e-06, "loss": 0.6405, "step": 2089 }, { "epoch": 0.22, "grad_norm": 1.867004091367024, "learning_rate": 9.108619801770117e-06, "loss": 0.7337, "step": 2090 }, { "epoch": 0.22, "grad_norm": 1.8341940643841907, "learning_rate": 9.10766021365906e-06, "loss": 0.6397, "step": 2091 }, { "epoch": 0.22, "grad_norm": 1.7108974210893473, "learning_rate": 9.106700159921301e-06, "loss": 0.6176, "step": 2092 }, { "epoch": 0.22, "grad_norm": 1.763586609221222, "learning_rate": 9.105739640665675e-06, "loss": 0.7577, "step": 2093 }, { "epoch": 0.22, "grad_norm": 1.8179769534117416, "learning_rate": 9.104778656001057e-06, "loss": 0.6602, "step": 2094 }, { "epoch": 0.22, "grad_norm": 2.0710656099544944, "learning_rate": 9.103817206036383e-06, "loss": 0.6258, "step": 2095 }, { "epoch": 0.22, "grad_norm": 1.9636515939076291, "learning_rate": 9.102855290880637e-06, "loss": 0.6908, "step": 2096 }, { "epoch": 0.22, "grad_norm": 1.7697645394492914, "learning_rate": 9.101892910642858e-06, "loss": 0.5772, "step": 2097 }, { "epoch": 0.22, "grad_norm": 1.8229562492894091, "learning_rate": 9.100930065432136e-06, "loss": 0.6319, "step": 2098 }, { "epoch": 0.22, "grad_norm": 1.9665961596361246, "learning_rate": 9.09996675535762e-06, "loss": 0.6321, "step": 2099 }, { "epoch": 0.22, "grad_norm": 1.9153659580729374, "learning_rate": 9.099002980528502e-06, "loss": 0.7386, "step": 2100 }, { "epoch": 0.22, "grad_norm": 1.8929607008969653, "learning_rate": 9.098038741054032e-06, "loss": 0.6038, "step": 2101 }, { "epoch": 0.22, "grad_norm": 2.122236111686674, "learning_rate": 9.097074037043512e-06, "loss": 0.7902, "step": 2102 }, { "epoch": 0.22, "grad_norm": 1.8063786343678705, "learning_rate": 9.0961088686063e-06, "loss": 0.6154, "step": 2103 }, { "epoch": 0.22, "grad_norm": 1.8859714107413783, "learning_rate": 9.095143235851797e-06, "loss": 0.625, "step": 2104 }, { "epoch": 0.22, "grad_norm": 1.8899963332781613, "learning_rate": 9.094177138889468e-06, "loss": 0.6285, "step": 2105 }, { "epoch": 0.22, "grad_norm": 1.823065246187759, "learning_rate": 9.093210577828826e-06, "loss": 0.5222, "step": 2106 }, { "epoch": 0.22, "grad_norm": 1.8409605037443102, "learning_rate": 9.092243552779434e-06, "loss": 0.545, "step": 2107 }, { "epoch": 0.22, "grad_norm": 1.7950723144319713, "learning_rate": 9.091276063850909e-06, "loss": 0.7386, "step": 2108 }, { "epoch": 0.22, "grad_norm": 1.8619428941975487, "learning_rate": 9.090308111152924e-06, "loss": 0.6613, "step": 2109 }, { "epoch": 0.22, "grad_norm": 1.8760132631684212, "learning_rate": 9.0893396947952e-06, "loss": 0.6567, "step": 2110 }, { "epoch": 0.22, "grad_norm": 2.5393612059524457, "learning_rate": 9.088370814887512e-06, "loss": 0.6192, "step": 2111 }, { "epoch": 0.22, "grad_norm": 2.1997633108009746, "learning_rate": 9.08740147153969e-06, "loss": 0.7291, "step": 2112 }, { "epoch": 0.22, "grad_norm": 2.0003762945667263, "learning_rate": 9.086431664861615e-06, "loss": 0.6385, "step": 2113 }, { "epoch": 0.22, "grad_norm": 1.9280568210847042, "learning_rate": 9.085461394963218e-06, "loss": 0.7926, "step": 2114 }, { "epoch": 0.22, "grad_norm": 2.0839385743826337, "learning_rate": 9.084490661954487e-06, "loss": 0.7176, "step": 2115 }, { "epoch": 0.22, "grad_norm": 1.751926913173965, "learning_rate": 9.083519465945456e-06, "loss": 0.6207, "step": 2116 }, { "epoch": 0.22, "grad_norm": 1.6790647212307659, "learning_rate": 9.082547807046218e-06, "loss": 0.5995, "step": 2117 }, { "epoch": 0.22, "grad_norm": 1.9768132587469367, "learning_rate": 9.081575685366919e-06, "loss": 0.6638, "step": 2118 }, { "epoch": 0.22, "grad_norm": 1.873044803611899, "learning_rate": 9.080603101017751e-06, "loss": 0.6287, "step": 2119 }, { "epoch": 0.22, "grad_norm": 1.9026675442854966, "learning_rate": 9.079630054108962e-06, "loss": 0.7277, "step": 2120 }, { "epoch": 0.22, "grad_norm": 1.786052227401782, "learning_rate": 9.078656544750854e-06, "loss": 0.5283, "step": 2121 }, { "epoch": 0.22, "grad_norm": 1.835617801364485, "learning_rate": 9.07768257305378e-06, "loss": 0.6734, "step": 2122 }, { "epoch": 0.22, "grad_norm": 1.9059077413421042, "learning_rate": 9.07670813912814e-06, "loss": 0.5662, "step": 2123 }, { "epoch": 0.22, "grad_norm": 1.8430365616658317, "learning_rate": 9.0757332430844e-06, "loss": 0.6786, "step": 2124 }, { "epoch": 0.22, "grad_norm": 2.0912152253587233, "learning_rate": 9.074757885033065e-06, "loss": 0.6699, "step": 2125 }, { "epoch": 0.22, "grad_norm": 1.8326045285025117, "learning_rate": 9.073782065084699e-06, "loss": 0.6516, "step": 2126 }, { "epoch": 0.22, "grad_norm": 2.1141153087285747, "learning_rate": 9.072805783349916e-06, "loss": 0.6504, "step": 2127 }, { "epoch": 0.22, "grad_norm": 1.7665606289047024, "learning_rate": 9.071829039939382e-06, "loss": 0.5843, "step": 2128 }, { "epoch": 0.22, "grad_norm": 1.965086242675083, "learning_rate": 9.070851834963818e-06, "loss": 0.606, "step": 2129 }, { "epoch": 0.22, "grad_norm": 1.8212064883638712, "learning_rate": 9.069874168533996e-06, "loss": 0.5638, "step": 2130 }, { "epoch": 0.22, "grad_norm": 1.914061308706072, "learning_rate": 9.06889604076074e-06, "loss": 0.7702, "step": 2131 }, { "epoch": 0.22, "grad_norm": 1.9281649798903766, "learning_rate": 9.067917451754926e-06, "loss": 0.6979, "step": 2132 }, { "epoch": 0.22, "grad_norm": 1.9724035219322607, "learning_rate": 9.06693840162748e-06, "loss": 0.6326, "step": 2133 }, { "epoch": 0.22, "grad_norm": 1.926538052376226, "learning_rate": 9.065958890489388e-06, "loss": 0.6708, "step": 2134 }, { "epoch": 0.22, "grad_norm": 1.8135660948677115, "learning_rate": 9.06497891845168e-06, "loss": 0.6908, "step": 2135 }, { "epoch": 0.22, "grad_norm": 2.0359781123918546, "learning_rate": 9.063998485625442e-06, "loss": 0.8177, "step": 2136 }, { "epoch": 0.22, "grad_norm": 1.8070490443578695, "learning_rate": 9.063017592121812e-06, "loss": 0.6308, "step": 2137 }, { "epoch": 0.22, "grad_norm": 2.1534040200827858, "learning_rate": 9.062036238051978e-06, "loss": 0.7568, "step": 2138 }, { "epoch": 0.22, "grad_norm": 1.9932070854185635, "learning_rate": 9.061054423527185e-06, "loss": 0.6454, "step": 2139 }, { "epoch": 0.22, "grad_norm": 2.06303057621518, "learning_rate": 9.060072148658726e-06, "loss": 0.7897, "step": 2140 }, { "epoch": 0.22, "grad_norm": 1.8541768464221198, "learning_rate": 9.059089413557946e-06, "loss": 0.7146, "step": 2141 }, { "epoch": 0.22, "grad_norm": 2.131577767008987, "learning_rate": 9.058106218336244e-06, "loss": 0.64, "step": 2142 }, { "epoch": 0.22, "grad_norm": 1.957951302391662, "learning_rate": 9.057122563105074e-06, "loss": 0.6772, "step": 2143 }, { "epoch": 0.22, "grad_norm": 1.8796034764382545, "learning_rate": 9.056138447975936e-06, "loss": 0.5828, "step": 2144 }, { "epoch": 0.22, "grad_norm": 1.9984362920448873, "learning_rate": 9.055153873060387e-06, "loss": 0.6582, "step": 2145 }, { "epoch": 0.22, "grad_norm": 1.818826471290181, "learning_rate": 9.05416883847003e-06, "loss": 0.6103, "step": 2146 }, { "epoch": 0.22, "grad_norm": 1.9010961716566377, "learning_rate": 9.05318334431653e-06, "loss": 0.6502, "step": 2147 }, { "epoch": 0.22, "grad_norm": 3.6452104619968555, "learning_rate": 9.052197390711594e-06, "loss": 0.6073, "step": 2148 }, { "epoch": 0.22, "grad_norm": 2.0593507671841054, "learning_rate": 9.051210977766987e-06, "loss": 0.7939, "step": 2149 }, { "epoch": 0.22, "grad_norm": 2.8947339812978212, "learning_rate": 9.050224105594525e-06, "loss": 0.7403, "step": 2150 }, { "epoch": 0.22, "grad_norm": 1.7701582298112364, "learning_rate": 9.049236774306073e-06, "loss": 0.7139, "step": 2151 }, { "epoch": 0.22, "grad_norm": 1.769772295770342, "learning_rate": 9.048248984013557e-06, "loss": 0.6318, "step": 2152 }, { "epoch": 0.22, "grad_norm": 1.8668708776556142, "learning_rate": 9.04726073482894e-06, "loss": 0.6786, "step": 2153 }, { "epoch": 0.22, "grad_norm": 2.8599052090254813, "learning_rate": 9.046272026864253e-06, "loss": 0.6792, "step": 2154 }, { "epoch": 0.22, "grad_norm": 2.306307988225551, "learning_rate": 9.045282860231567e-06, "loss": 0.8036, "step": 2155 }, { "epoch": 0.22, "grad_norm": 2.0248004612699835, "learning_rate": 9.044293235043013e-06, "loss": 0.5483, "step": 2156 }, { "epoch": 0.22, "grad_norm": 2.0040416841239312, "learning_rate": 9.04330315141077e-06, "loss": 0.5838, "step": 2157 }, { "epoch": 0.22, "grad_norm": 1.8934644260258988, "learning_rate": 9.042312609447066e-06, "loss": 0.7741, "step": 2158 }, { "epoch": 0.22, "grad_norm": 1.8369307032598101, "learning_rate": 9.041321609264189e-06, "loss": 0.6881, "step": 2159 }, { "epoch": 0.22, "grad_norm": 1.8734585208709953, "learning_rate": 9.040330150974472e-06, "loss": 0.7302, "step": 2160 }, { "epoch": 0.22, "grad_norm": 2.274845630281377, "learning_rate": 9.039338234690304e-06, "loss": 0.7115, "step": 2161 }, { "epoch": 0.22, "grad_norm": 1.9120910239340116, "learning_rate": 9.038345860524123e-06, "loss": 0.6324, "step": 2162 }, { "epoch": 0.22, "grad_norm": 1.9268141093838203, "learning_rate": 9.037353028588421e-06, "loss": 0.6863, "step": 2163 }, { "epoch": 0.22, "grad_norm": 1.7626090493830637, "learning_rate": 9.036359738995741e-06, "loss": 0.7137, "step": 2164 }, { "epoch": 0.23, "grad_norm": 1.9785455080305587, "learning_rate": 9.035365991858679e-06, "loss": 0.6363, "step": 2165 }, { "epoch": 0.23, "grad_norm": 1.7537820647478297, "learning_rate": 9.034371787289879e-06, "loss": 0.6999, "step": 2166 }, { "epoch": 0.23, "grad_norm": 1.8144705683970634, "learning_rate": 9.033377125402045e-06, "loss": 0.6111, "step": 2167 }, { "epoch": 0.23, "grad_norm": 2.0784711520480683, "learning_rate": 9.032382006307923e-06, "loss": 0.7317, "step": 2168 }, { "epoch": 0.23, "grad_norm": 2.055321365353015, "learning_rate": 9.031386430120315e-06, "loss": 0.6697, "step": 2169 }, { "epoch": 0.23, "grad_norm": 1.9973752694136535, "learning_rate": 9.030390396952077e-06, "loss": 0.6653, "step": 2170 }, { "epoch": 0.23, "grad_norm": 2.1908786377684004, "learning_rate": 9.029393906916118e-06, "loss": 0.7126, "step": 2171 }, { "epoch": 0.23, "grad_norm": 2.2196682641106045, "learning_rate": 9.028396960125392e-06, "loss": 0.6975, "step": 2172 }, { "epoch": 0.23, "grad_norm": 2.153676140622691, "learning_rate": 9.02739955669291e-06, "loss": 0.7135, "step": 2173 }, { "epoch": 0.23, "grad_norm": 1.7443698256344395, "learning_rate": 9.02640169673173e-06, "loss": 0.6252, "step": 2174 }, { "epoch": 0.23, "grad_norm": 1.8900431422297443, "learning_rate": 9.02540338035497e-06, "loss": 0.733, "step": 2175 }, { "epoch": 0.23, "grad_norm": 1.7568087601555695, "learning_rate": 9.024404607675792e-06, "loss": 0.673, "step": 2176 }, { "epoch": 0.23, "grad_norm": 1.898316533786659, "learning_rate": 9.023405378807413e-06, "loss": 0.6297, "step": 2177 }, { "epoch": 0.23, "grad_norm": 1.977725542941323, "learning_rate": 9.022405693863102e-06, "loss": 0.7268, "step": 2178 }, { "epoch": 0.23, "grad_norm": 2.1481357518729243, "learning_rate": 9.02140555295618e-06, "loss": 0.6296, "step": 2179 }, { "epoch": 0.23, "grad_norm": 1.9978345929549044, "learning_rate": 9.020404956200016e-06, "loss": 0.5896, "step": 2180 }, { "epoch": 0.23, "grad_norm": 1.84587899229959, "learning_rate": 9.019403903708036e-06, "loss": 0.6713, "step": 2181 }, { "epoch": 0.23, "grad_norm": 1.8092614755951855, "learning_rate": 9.018402395593711e-06, "loss": 0.6312, "step": 2182 }, { "epoch": 0.23, "grad_norm": 2.0435447786587178, "learning_rate": 9.017400431970572e-06, "loss": 0.6963, "step": 2183 }, { "epoch": 0.23, "grad_norm": 1.7252658369683462, "learning_rate": 9.016398012952196e-06, "loss": 0.5749, "step": 2184 }, { "epoch": 0.23, "grad_norm": 1.7329348782754614, "learning_rate": 9.015395138652212e-06, "loss": 0.6343, "step": 2185 }, { "epoch": 0.23, "grad_norm": 2.0542689022507004, "learning_rate": 9.014391809184302e-06, "loss": 0.754, "step": 2186 }, { "epoch": 0.23, "grad_norm": 2.0538719175252464, "learning_rate": 9.013388024662199e-06, "loss": 0.7492, "step": 2187 }, { "epoch": 0.23, "grad_norm": 2.194772716927874, "learning_rate": 9.012383785199688e-06, "loss": 0.6571, "step": 2188 }, { "epoch": 0.23, "grad_norm": 1.7314238687560968, "learning_rate": 9.011379090910605e-06, "loss": 0.5858, "step": 2189 }, { "epoch": 0.23, "grad_norm": 1.8309501359614229, "learning_rate": 9.010373941908839e-06, "loss": 0.561, "step": 2190 }, { "epoch": 0.23, "grad_norm": 1.9401424308724535, "learning_rate": 9.009368338308328e-06, "loss": 0.6818, "step": 2191 }, { "epoch": 0.23, "grad_norm": 2.0913313231859534, "learning_rate": 9.008362280223062e-06, "loss": 0.7318, "step": 2192 }, { "epoch": 0.23, "grad_norm": 2.01779899943987, "learning_rate": 9.007355767767085e-06, "loss": 0.6818, "step": 2193 }, { "epoch": 0.23, "grad_norm": 1.8462281597980958, "learning_rate": 9.006348801054491e-06, "loss": 0.6496, "step": 2194 }, { "epoch": 0.23, "grad_norm": 1.9187146206986834, "learning_rate": 9.005341380199426e-06, "loss": 0.6457, "step": 2195 }, { "epoch": 0.23, "grad_norm": 1.9328520519266845, "learning_rate": 9.004333505316085e-06, "loss": 0.7879, "step": 2196 }, { "epoch": 0.23, "grad_norm": 1.708215185089118, "learning_rate": 9.003325176518718e-06, "loss": 0.5946, "step": 2197 }, { "epoch": 0.23, "grad_norm": 1.9026124558937254, "learning_rate": 9.002316393921623e-06, "loss": 0.7187, "step": 2198 }, { "epoch": 0.23, "grad_norm": 2.002549571319192, "learning_rate": 9.001307157639153e-06, "loss": 0.711, "step": 2199 }, { "epoch": 0.23, "grad_norm": 1.7359598802500287, "learning_rate": 9.000297467785708e-06, "loss": 0.7036, "step": 2200 }, { "epoch": 0.23, "grad_norm": 1.9435004256949489, "learning_rate": 8.999287324475745e-06, "loss": 0.6485, "step": 2201 }, { "epoch": 0.23, "grad_norm": 1.9218778678631865, "learning_rate": 8.998276727823769e-06, "loss": 0.6781, "step": 2202 }, { "epoch": 0.23, "grad_norm": 1.8933013319011063, "learning_rate": 8.997265677944336e-06, "loss": 0.6569, "step": 2203 }, { "epoch": 0.23, "grad_norm": 1.9531255465327648, "learning_rate": 8.996254174952056e-06, "loss": 0.7081, "step": 2204 }, { "epoch": 0.23, "grad_norm": 2.0631797487414527, "learning_rate": 8.995242218961586e-06, "loss": 0.6542, "step": 2205 }, { "epoch": 0.23, "grad_norm": 2.0026963960183264, "learning_rate": 8.99422981008764e-06, "loss": 0.6282, "step": 2206 }, { "epoch": 0.23, "grad_norm": 1.9931938931672886, "learning_rate": 8.993216948444978e-06, "loss": 0.6685, "step": 2207 }, { "epoch": 0.23, "grad_norm": 2.03677992763092, "learning_rate": 8.992203634148412e-06, "loss": 0.5603, "step": 2208 }, { "epoch": 0.23, "grad_norm": 1.9775804447589407, "learning_rate": 8.991189867312813e-06, "loss": 0.7024, "step": 2209 }, { "epoch": 0.23, "grad_norm": 1.9936308622181156, "learning_rate": 8.990175648053093e-06, "loss": 0.7432, "step": 2210 }, { "epoch": 0.23, "grad_norm": 1.7983836508704376, "learning_rate": 8.989160976484218e-06, "loss": 0.6267, "step": 2211 }, { "epoch": 0.23, "grad_norm": 1.7341209015462455, "learning_rate": 8.98814585272121e-06, "loss": 0.6546, "step": 2212 }, { "epoch": 0.23, "grad_norm": 1.7535109689213118, "learning_rate": 8.987130276879137e-06, "loss": 0.6133, "step": 2213 }, { "epoch": 0.23, "grad_norm": 1.926070930858834, "learning_rate": 8.986114249073122e-06, "loss": 0.7027, "step": 2214 }, { "epoch": 0.23, "grad_norm": 1.9392805940904663, "learning_rate": 8.985097769418337e-06, "loss": 0.6536, "step": 2215 }, { "epoch": 0.23, "grad_norm": 2.0611644129881763, "learning_rate": 8.984080838030005e-06, "loss": 0.8039, "step": 2216 }, { "epoch": 0.23, "grad_norm": 2.142950425462867, "learning_rate": 8.983063455023402e-06, "loss": 0.7716, "step": 2217 }, { "epoch": 0.23, "grad_norm": 2.054058905566524, "learning_rate": 8.982045620513855e-06, "loss": 0.7707, "step": 2218 }, { "epoch": 0.23, "grad_norm": 1.889097548556331, "learning_rate": 8.981027334616737e-06, "loss": 0.5825, "step": 2219 }, { "epoch": 0.23, "grad_norm": 1.8795725986969594, "learning_rate": 8.98000859744748e-06, "loss": 0.6551, "step": 2220 }, { "epoch": 0.23, "grad_norm": 1.882519672163551, "learning_rate": 8.978989409121565e-06, "loss": 0.682, "step": 2221 }, { "epoch": 0.23, "grad_norm": 2.0537589705875603, "learning_rate": 8.97796976975452e-06, "loss": 0.7304, "step": 2222 }, { "epoch": 0.23, "grad_norm": 2.0767498129779574, "learning_rate": 8.976949679461928e-06, "loss": 0.7041, "step": 2223 }, { "epoch": 0.23, "grad_norm": 2.110118698229804, "learning_rate": 8.975929138359423e-06, "loss": 0.6485, "step": 2224 }, { "epoch": 0.23, "grad_norm": 1.9109900497230985, "learning_rate": 8.974908146562686e-06, "loss": 0.7517, "step": 2225 }, { "epoch": 0.23, "grad_norm": 2.015281369277223, "learning_rate": 8.973886704187457e-06, "loss": 0.6787, "step": 2226 }, { "epoch": 0.23, "grad_norm": 1.8262978970480288, "learning_rate": 8.972864811349518e-06, "loss": 0.6435, "step": 2227 }, { "epoch": 0.23, "grad_norm": 2.210288335596177, "learning_rate": 8.97184246816471e-06, "loss": 0.7442, "step": 2228 }, { "epoch": 0.23, "grad_norm": 1.8288254991676658, "learning_rate": 8.970819674748917e-06, "loss": 0.6972, "step": 2229 }, { "epoch": 0.23, "grad_norm": 1.7051979804580626, "learning_rate": 8.969796431218081e-06, "loss": 0.5441, "step": 2230 }, { "epoch": 0.23, "grad_norm": 1.9169044631050145, "learning_rate": 8.968772737688193e-06, "loss": 0.6199, "step": 2231 }, { "epoch": 0.23, "grad_norm": 2.0655732597769925, "learning_rate": 8.967748594275294e-06, "loss": 0.766, "step": 2232 }, { "epoch": 0.23, "grad_norm": 1.978801046277295, "learning_rate": 8.966724001095477e-06, "loss": 0.6957, "step": 2233 }, { "epoch": 0.23, "grad_norm": 1.9521332143429797, "learning_rate": 8.965698958264883e-06, "loss": 0.6472, "step": 2234 }, { "epoch": 0.23, "grad_norm": 1.83147289535707, "learning_rate": 8.96467346589971e-06, "loss": 0.6152, "step": 2235 }, { "epoch": 0.23, "grad_norm": 1.9956163284753403, "learning_rate": 8.963647524116202e-06, "loss": 0.6321, "step": 2236 }, { "epoch": 0.23, "grad_norm": 2.133422804155597, "learning_rate": 8.962621133030655e-06, "loss": 0.6059, "step": 2237 }, { "epoch": 0.23, "grad_norm": 1.9380345874646834, "learning_rate": 8.961594292759416e-06, "loss": 0.789, "step": 2238 }, { "epoch": 0.23, "grad_norm": 1.8865426268162737, "learning_rate": 8.960567003418882e-06, "loss": 0.6757, "step": 2239 }, { "epoch": 0.23, "grad_norm": 1.973482306954926, "learning_rate": 8.959539265125507e-06, "loss": 0.6109, "step": 2240 }, { "epoch": 0.23, "grad_norm": 1.8719939069545541, "learning_rate": 8.958511077995786e-06, "loss": 0.6642, "step": 2241 }, { "epoch": 0.23, "grad_norm": 2.027942589354299, "learning_rate": 8.957482442146271e-06, "loss": 0.6152, "step": 2242 }, { "epoch": 0.23, "grad_norm": 1.9377797775087606, "learning_rate": 8.956453357693565e-06, "loss": 0.6306, "step": 2243 }, { "epoch": 0.23, "grad_norm": 2.1397437040204927, "learning_rate": 8.955423824754319e-06, "loss": 0.6442, "step": 2244 }, { "epoch": 0.23, "grad_norm": 1.850879857064714, "learning_rate": 8.954393843445239e-06, "loss": 0.7046, "step": 2245 }, { "epoch": 0.23, "grad_norm": 1.9391040464860019, "learning_rate": 8.953363413883077e-06, "loss": 0.633, "step": 2246 }, { "epoch": 0.23, "grad_norm": 1.7458294438697912, "learning_rate": 8.952332536184639e-06, "loss": 0.6093, "step": 2247 }, { "epoch": 0.23, "grad_norm": 1.7462764140457245, "learning_rate": 8.951301210466779e-06, "loss": 0.6411, "step": 2248 }, { "epoch": 0.23, "grad_norm": 2.0005982387947276, "learning_rate": 8.950269436846405e-06, "loss": 0.6312, "step": 2249 }, { "epoch": 0.23, "grad_norm": 1.9155274879580744, "learning_rate": 8.949237215440476e-06, "loss": 0.6798, "step": 2250 }, { "epoch": 0.23, "grad_norm": 1.855490598220143, "learning_rate": 8.948204546365996e-06, "loss": 0.648, "step": 2251 }, { "epoch": 0.23, "grad_norm": 2.189347602052192, "learning_rate": 8.94717142974003e-06, "loss": 0.629, "step": 2252 }, { "epoch": 0.23, "grad_norm": 1.6403712428379003, "learning_rate": 8.946137865679683e-06, "loss": 0.5136, "step": 2253 }, { "epoch": 0.23, "grad_norm": 1.9275044475114163, "learning_rate": 8.945103854302118e-06, "loss": 0.6351, "step": 2254 }, { "epoch": 0.23, "grad_norm": 1.813897721890047, "learning_rate": 8.944069395724541e-06, "loss": 0.7123, "step": 2255 }, { "epoch": 0.23, "grad_norm": 2.0580374034292044, "learning_rate": 8.943034490064222e-06, "loss": 0.6749, "step": 2256 }, { "epoch": 0.23, "grad_norm": 2.0017399104314246, "learning_rate": 8.941999137438466e-06, "loss": 0.695, "step": 2257 }, { "epoch": 0.23, "grad_norm": 1.9378953715759386, "learning_rate": 8.940963337964642e-06, "loss": 0.6528, "step": 2258 }, { "epoch": 0.23, "grad_norm": 2.041326591727269, "learning_rate": 8.93992709176016e-06, "loss": 0.7131, "step": 2259 }, { "epoch": 0.23, "grad_norm": 1.9285147868178183, "learning_rate": 8.938890398942482e-06, "loss": 0.7492, "step": 2260 }, { "epoch": 0.24, "grad_norm": 1.8238045339749287, "learning_rate": 8.93785325962913e-06, "loss": 0.6304, "step": 2261 }, { "epoch": 0.24, "grad_norm": 2.0053183644082613, "learning_rate": 8.936815673937665e-06, "loss": 0.6946, "step": 2262 }, { "epoch": 0.24, "grad_norm": 2.056091891002725, "learning_rate": 8.935777641985704e-06, "loss": 0.7323, "step": 2263 }, { "epoch": 0.24, "grad_norm": 1.8523458813916833, "learning_rate": 8.934739163890914e-06, "loss": 0.6583, "step": 2264 }, { "epoch": 0.24, "grad_norm": 1.9742354499979657, "learning_rate": 8.933700239771013e-06, "loss": 0.6349, "step": 2265 }, { "epoch": 0.24, "grad_norm": 1.9663161560770108, "learning_rate": 8.932660869743766e-06, "loss": 0.6875, "step": 2266 }, { "epoch": 0.24, "grad_norm": 1.7729449620047906, "learning_rate": 8.931621053926998e-06, "loss": 0.5993, "step": 2267 }, { "epoch": 0.24, "grad_norm": 1.776396253549101, "learning_rate": 8.930580792438571e-06, "loss": 0.6088, "step": 2268 }, { "epoch": 0.24, "grad_norm": 1.643385454952664, "learning_rate": 8.929540085396409e-06, "loss": 0.6575, "step": 2269 }, { "epoch": 0.24, "grad_norm": 1.946490262609231, "learning_rate": 8.92849893291848e-06, "loss": 0.6845, "step": 2270 }, { "epoch": 0.24, "grad_norm": 1.9913037797693673, "learning_rate": 8.927457335122807e-06, "loss": 0.6021, "step": 2271 }, { "epoch": 0.24, "grad_norm": 1.9935906721529877, "learning_rate": 8.926415292127458e-06, "loss": 0.6359, "step": 2272 }, { "epoch": 0.24, "grad_norm": 1.9874498733381067, "learning_rate": 8.925372804050554e-06, "loss": 0.8141, "step": 2273 }, { "epoch": 0.24, "grad_norm": 1.8641504075702195, "learning_rate": 8.924329871010271e-06, "loss": 0.5803, "step": 2274 }, { "epoch": 0.24, "grad_norm": 2.0248361904003906, "learning_rate": 8.92328649312483e-06, "loss": 0.7611, "step": 2275 }, { "epoch": 0.24, "grad_norm": 1.8881478352562464, "learning_rate": 8.922242670512501e-06, "loss": 0.7352, "step": 2276 }, { "epoch": 0.24, "grad_norm": 1.643021574441532, "learning_rate": 8.92119840329161e-06, "loss": 0.5815, "step": 2277 }, { "epoch": 0.24, "grad_norm": 1.8384891810153987, "learning_rate": 8.92015369158053e-06, "loss": 0.6841, "step": 2278 }, { "epoch": 0.24, "grad_norm": 1.8084240539765695, "learning_rate": 8.919108535497684e-06, "loss": 0.6603, "step": 2279 }, { "epoch": 0.24, "grad_norm": 11.78225246310456, "learning_rate": 8.91806293516155e-06, "loss": 0.6892, "step": 2280 }, { "epoch": 0.24, "grad_norm": 2.0298859406349905, "learning_rate": 8.917016890690648e-06, "loss": 0.7486, "step": 2281 }, { "epoch": 0.24, "grad_norm": 2.0279639350940566, "learning_rate": 8.915970402203555e-06, "loss": 0.6696, "step": 2282 }, { "epoch": 0.24, "grad_norm": 1.994856285908655, "learning_rate": 8.914923469818897e-06, "loss": 0.6637, "step": 2283 }, { "epoch": 0.24, "grad_norm": 1.8271612294370347, "learning_rate": 8.913876093655351e-06, "loss": 0.6463, "step": 2284 }, { "epoch": 0.24, "grad_norm": 1.784844135622662, "learning_rate": 8.912828273831639e-06, "loss": 0.6222, "step": 2285 }, { "epoch": 0.24, "grad_norm": 1.7882797824424788, "learning_rate": 8.911780010466542e-06, "loss": 0.745, "step": 2286 }, { "epoch": 0.24, "grad_norm": 2.087910231596911, "learning_rate": 8.910731303678881e-06, "loss": 0.6256, "step": 2287 }, { "epoch": 0.24, "grad_norm": 1.9216932425265978, "learning_rate": 8.90968215358754e-06, "loss": 0.6616, "step": 2288 }, { "epoch": 0.24, "grad_norm": 1.9486809117012627, "learning_rate": 8.908632560311441e-06, "loss": 0.6927, "step": 2289 }, { "epoch": 0.24, "grad_norm": 1.8170629926411348, "learning_rate": 8.907582523969562e-06, "loss": 0.679, "step": 2290 }, { "epoch": 0.24, "grad_norm": 1.761269885787969, "learning_rate": 8.906532044680933e-06, "loss": 0.6878, "step": 2291 }, { "epoch": 0.24, "grad_norm": 1.8632205027463271, "learning_rate": 8.905481122564628e-06, "loss": 0.6873, "step": 2292 }, { "epoch": 0.24, "grad_norm": 1.9046793180822075, "learning_rate": 8.90442975773978e-06, "loss": 0.6389, "step": 2293 }, { "epoch": 0.24, "grad_norm": 1.9488081969903266, "learning_rate": 8.903377950325563e-06, "loss": 0.6598, "step": 2294 }, { "epoch": 0.24, "grad_norm": 2.1749846253223257, "learning_rate": 8.902325700441207e-06, "loss": 0.6878, "step": 2295 }, { "epoch": 0.24, "grad_norm": 1.7711244804453699, "learning_rate": 8.901273008205991e-06, "loss": 0.5985, "step": 2296 }, { "epoch": 0.24, "grad_norm": 1.8428616471257615, "learning_rate": 8.900219873739242e-06, "loss": 0.7583, "step": 2297 }, { "epoch": 0.24, "grad_norm": 2.2720506937274974, "learning_rate": 8.89916629716034e-06, "loss": 0.6609, "step": 2298 }, { "epoch": 0.24, "grad_norm": 2.124027712091581, "learning_rate": 8.898112278588713e-06, "loss": 0.6003, "step": 2299 }, { "epoch": 0.24, "grad_norm": 1.7844632084108558, "learning_rate": 8.897057818143842e-06, "loss": 0.6297, "step": 2300 }, { "epoch": 0.24, "grad_norm": 1.902743216888347, "learning_rate": 8.896002915945254e-06, "loss": 0.6839, "step": 2301 }, { "epoch": 0.24, "grad_norm": 1.7514502795812927, "learning_rate": 8.89494757211253e-06, "loss": 0.6339, "step": 2302 }, { "epoch": 0.24, "grad_norm": 1.648375420162822, "learning_rate": 8.893891786765298e-06, "loss": 0.5742, "step": 2303 }, { "epoch": 0.24, "grad_norm": 1.87874832019259, "learning_rate": 8.892835560023236e-06, "loss": 0.686, "step": 2304 }, { "epoch": 0.24, "grad_norm": 2.119276599077235, "learning_rate": 8.891778892006077e-06, "loss": 0.7002, "step": 2305 }, { "epoch": 0.24, "grad_norm": 1.8114144842978803, "learning_rate": 8.890721782833596e-06, "loss": 0.6562, "step": 2306 }, { "epoch": 0.24, "grad_norm": 1.9513465793448206, "learning_rate": 8.889664232625626e-06, "loss": 0.6201, "step": 2307 }, { "epoch": 0.24, "grad_norm": 1.9660798153237917, "learning_rate": 8.888606241502044e-06, "loss": 0.6004, "step": 2308 }, { "epoch": 0.24, "grad_norm": 1.9794487766731317, "learning_rate": 8.88754780958278e-06, "loss": 0.7013, "step": 2309 }, { "epoch": 0.24, "grad_norm": 1.7888928185017705, "learning_rate": 8.886488936987817e-06, "loss": 0.6845, "step": 2310 }, { "epoch": 0.24, "grad_norm": 1.7762297353081091, "learning_rate": 8.885429623837178e-06, "loss": 0.6759, "step": 2311 }, { "epoch": 0.24, "grad_norm": 1.96582047776657, "learning_rate": 8.884369870250945e-06, "loss": 0.6531, "step": 2312 }, { "epoch": 0.24, "grad_norm": 1.8501988880713138, "learning_rate": 8.883309676349247e-06, "loss": 0.6091, "step": 2313 }, { "epoch": 0.24, "grad_norm": 2.003392735286015, "learning_rate": 8.882249042252262e-06, "loss": 0.7573, "step": 2314 }, { "epoch": 0.24, "grad_norm": 1.9134386167380948, "learning_rate": 8.881187968080222e-06, "loss": 0.6455, "step": 2315 }, { "epoch": 0.24, "grad_norm": 1.870256379320453, "learning_rate": 8.880126453953403e-06, "loss": 0.655, "step": 2316 }, { "epoch": 0.24, "grad_norm": 1.759883021241307, "learning_rate": 8.879064499992133e-06, "loss": 0.7253, "step": 2317 }, { "epoch": 0.24, "grad_norm": 2.05074006883249, "learning_rate": 8.878002106316795e-06, "loss": 0.7489, "step": 2318 }, { "epoch": 0.24, "grad_norm": 3.367048592554034, "learning_rate": 8.876939273047813e-06, "loss": 0.6746, "step": 2319 }, { "epoch": 0.24, "grad_norm": 1.951477514551727, "learning_rate": 8.875876000305666e-06, "loss": 0.7181, "step": 2320 }, { "epoch": 0.24, "grad_norm": 1.839227616535758, "learning_rate": 8.874812288210883e-06, "loss": 0.6724, "step": 2321 }, { "epoch": 0.24, "grad_norm": 1.7580491487314909, "learning_rate": 8.87374813688404e-06, "loss": 0.7564, "step": 2322 }, { "epoch": 0.24, "grad_norm": 2.082563087261863, "learning_rate": 8.872683546445768e-06, "loss": 0.7732, "step": 2323 }, { "epoch": 0.24, "grad_norm": 1.947319163291198, "learning_rate": 8.871618517016742e-06, "loss": 0.6136, "step": 2324 }, { "epoch": 0.24, "grad_norm": 1.8749719303586856, "learning_rate": 8.870553048717689e-06, "loss": 0.6375, "step": 2325 }, { "epoch": 0.24, "grad_norm": 1.8189093114890664, "learning_rate": 8.86948714166939e-06, "loss": 0.6291, "step": 2326 }, { "epoch": 0.24, "grad_norm": 1.7890150891185206, "learning_rate": 8.868420795992662e-06, "loss": 0.5662, "step": 2327 }, { "epoch": 0.24, "grad_norm": 1.841854935089076, "learning_rate": 8.867354011808391e-06, "loss": 0.6774, "step": 2328 }, { "epoch": 0.24, "grad_norm": 1.9138052891691462, "learning_rate": 8.866286789237499e-06, "loss": 0.6585, "step": 2329 }, { "epoch": 0.24, "grad_norm": 1.9134802569985976, "learning_rate": 8.865219128400964e-06, "loss": 0.6528, "step": 2330 }, { "epoch": 0.24, "grad_norm": 2.1272177907438614, "learning_rate": 8.864151029419807e-06, "loss": 0.6907, "step": 2331 }, { "epoch": 0.24, "grad_norm": 1.8186258535110367, "learning_rate": 8.863082492415107e-06, "loss": 0.6954, "step": 2332 }, { "epoch": 0.24, "grad_norm": 1.65082051016591, "learning_rate": 8.86201351750799e-06, "loss": 0.5637, "step": 2333 }, { "epoch": 0.24, "grad_norm": 1.8293006727742915, "learning_rate": 8.860944104819625e-06, "loss": 0.719, "step": 2334 }, { "epoch": 0.24, "grad_norm": 1.8125833581391186, "learning_rate": 8.85987425447124e-06, "loss": 0.5463, "step": 2335 }, { "epoch": 0.24, "grad_norm": 1.8212561193038788, "learning_rate": 8.858803966584108e-06, "loss": 0.6743, "step": 2336 }, { "epoch": 0.24, "grad_norm": 1.8575482610539216, "learning_rate": 8.857733241279551e-06, "loss": 0.6718, "step": 2337 }, { "epoch": 0.24, "grad_norm": 2.1513939629386445, "learning_rate": 8.856662078678944e-06, "loss": 0.6537, "step": 2338 }, { "epoch": 0.24, "grad_norm": 1.8719378153923891, "learning_rate": 8.855590478903707e-06, "loss": 0.644, "step": 2339 }, { "epoch": 0.24, "grad_norm": 2.1098349957847162, "learning_rate": 8.854518442075313e-06, "loss": 0.7041, "step": 2340 }, { "epoch": 0.24, "grad_norm": 2.140614121778238, "learning_rate": 8.853445968315286e-06, "loss": 0.7882, "step": 2341 }, { "epoch": 0.24, "grad_norm": 1.79779569759238, "learning_rate": 8.852373057745192e-06, "loss": 0.6862, "step": 2342 }, { "epoch": 0.24, "grad_norm": 1.8913245263343401, "learning_rate": 8.851299710486655e-06, "loss": 0.6563, "step": 2343 }, { "epoch": 0.24, "grad_norm": 1.7848657576070133, "learning_rate": 8.850225926661344e-06, "loss": 0.5715, "step": 2344 }, { "epoch": 0.24, "grad_norm": 2.042072907360112, "learning_rate": 8.84915170639098e-06, "loss": 0.6642, "step": 2345 }, { "epoch": 0.24, "grad_norm": 1.882590587542723, "learning_rate": 8.848077049797327e-06, "loss": 0.5457, "step": 2346 }, { "epoch": 0.24, "grad_norm": 2.0577242147645443, "learning_rate": 8.847001957002211e-06, "loss": 0.6979, "step": 2347 }, { "epoch": 0.24, "grad_norm": 1.8692862797256795, "learning_rate": 8.845926428127493e-06, "loss": 0.678, "step": 2348 }, { "epoch": 0.24, "grad_norm": 1.780826018929952, "learning_rate": 8.844850463295096e-06, "loss": 0.6109, "step": 2349 }, { "epoch": 0.24, "grad_norm": 1.718886412604249, "learning_rate": 8.843774062626982e-06, "loss": 0.6187, "step": 2350 }, { "epoch": 0.24, "grad_norm": 2.0780976828794624, "learning_rate": 8.842697226245171e-06, "loss": 0.6609, "step": 2351 }, { "epoch": 0.24, "grad_norm": 2.1857009596687407, "learning_rate": 8.841619954271725e-06, "loss": 0.6179, "step": 2352 }, { "epoch": 0.24, "grad_norm": 1.9946623483439587, "learning_rate": 8.840542246828763e-06, "loss": 0.5932, "step": 2353 }, { "epoch": 0.24, "grad_norm": 1.9732400040300602, "learning_rate": 8.839464104038445e-06, "loss": 0.719, "step": 2354 }, { "epoch": 0.24, "grad_norm": 1.9466418799081235, "learning_rate": 8.838385526022989e-06, "loss": 0.6472, "step": 2355 }, { "epoch": 0.24, "grad_norm": 1.8709285020035453, "learning_rate": 8.83730651290465e-06, "loss": 0.6885, "step": 2356 }, { "epoch": 0.25, "grad_norm": 1.8144321685258162, "learning_rate": 8.836227064805751e-06, "loss": 0.6892, "step": 2357 }, { "epoch": 0.25, "grad_norm": 1.92972926188176, "learning_rate": 8.835147181848646e-06, "loss": 0.6428, "step": 2358 }, { "epoch": 0.25, "grad_norm": 1.832178705416403, "learning_rate": 8.83406686415575e-06, "loss": 0.6024, "step": 2359 }, { "epoch": 0.25, "grad_norm": 1.7708427249652576, "learning_rate": 8.832986111849522e-06, "loss": 0.5662, "step": 2360 }, { "epoch": 0.25, "grad_norm": 1.7190953214028308, "learning_rate": 8.831904925052468e-06, "loss": 0.5733, "step": 2361 }, { "epoch": 0.25, "grad_norm": 1.5950322378285189, "learning_rate": 8.830823303887152e-06, "loss": 0.5816, "step": 2362 }, { "epoch": 0.25, "grad_norm": 1.9181816926545536, "learning_rate": 8.829741248476178e-06, "loss": 0.6566, "step": 2363 }, { "epoch": 0.25, "grad_norm": 2.250978660572423, "learning_rate": 8.828658758942206e-06, "loss": 0.7142, "step": 2364 }, { "epoch": 0.25, "grad_norm": 1.9235552583017665, "learning_rate": 8.827575835407942e-06, "loss": 0.6098, "step": 2365 }, { "epoch": 0.25, "grad_norm": 1.6808897773713956, "learning_rate": 8.826492477996138e-06, "loss": 0.6614, "step": 2366 }, { "epoch": 0.25, "grad_norm": 1.6939820996787394, "learning_rate": 8.825408686829602e-06, "loss": 0.6475, "step": 2367 }, { "epoch": 0.25, "grad_norm": 2.005566835841052, "learning_rate": 8.824324462031189e-06, "loss": 0.7744, "step": 2368 }, { "epoch": 0.25, "grad_norm": 1.8165929910866654, "learning_rate": 8.823239803723799e-06, "loss": 0.7205, "step": 2369 }, { "epoch": 0.25, "grad_norm": 1.8947184852325152, "learning_rate": 8.822154712030386e-06, "loss": 0.6283, "step": 2370 }, { "epoch": 0.25, "grad_norm": 1.847875985087468, "learning_rate": 8.82106918707395e-06, "loss": 0.6647, "step": 2371 }, { "epoch": 0.25, "grad_norm": 1.7369026204140432, "learning_rate": 8.819983228977543e-06, "loss": 0.6001, "step": 2372 }, { "epoch": 0.25, "grad_norm": 1.7589257105558311, "learning_rate": 8.818896837864263e-06, "loss": 0.6021, "step": 2373 }, { "epoch": 0.25, "grad_norm": 1.782473528288657, "learning_rate": 8.81781001385726e-06, "loss": 0.6308, "step": 2374 }, { "epoch": 0.25, "grad_norm": 2.24267074124853, "learning_rate": 8.81672275707973e-06, "loss": 0.6647, "step": 2375 }, { "epoch": 0.25, "grad_norm": 1.7755566946531254, "learning_rate": 8.815635067654924e-06, "loss": 0.6279, "step": 2376 }, { "epoch": 0.25, "grad_norm": 1.7420803857728735, "learning_rate": 8.814546945706132e-06, "loss": 0.6252, "step": 2377 }, { "epoch": 0.25, "grad_norm": 2.00014949073347, "learning_rate": 8.813458391356702e-06, "loss": 0.6809, "step": 2378 }, { "epoch": 0.25, "grad_norm": 1.988415369348472, "learning_rate": 8.812369404730027e-06, "loss": 0.6487, "step": 2379 }, { "epoch": 0.25, "grad_norm": 1.9412075336525594, "learning_rate": 8.811279985949551e-06, "loss": 0.6755, "step": 2380 }, { "epoch": 0.25, "grad_norm": 1.9139607368009248, "learning_rate": 8.810190135138765e-06, "loss": 0.722, "step": 2381 }, { "epoch": 0.25, "grad_norm": 1.9545338507069623, "learning_rate": 8.80909985242121e-06, "loss": 0.6374, "step": 2382 }, { "epoch": 0.25, "grad_norm": 1.748644293222741, "learning_rate": 8.808009137920475e-06, "loss": 0.6028, "step": 2383 }, { "epoch": 0.25, "grad_norm": 2.0414545333900227, "learning_rate": 8.8069179917602e-06, "loss": 0.7474, "step": 2384 }, { "epoch": 0.25, "grad_norm": 2.06277922678675, "learning_rate": 8.805826414064071e-06, "loss": 0.6891, "step": 2385 }, { "epoch": 0.25, "grad_norm": 2.0083336554857874, "learning_rate": 8.804734404955825e-06, "loss": 0.656, "step": 2386 }, { "epoch": 0.25, "grad_norm": 1.660482284498147, "learning_rate": 8.80364196455925e-06, "loss": 0.5254, "step": 2387 }, { "epoch": 0.25, "grad_norm": 1.8909219127501342, "learning_rate": 8.802549092998176e-06, "loss": 0.6555, "step": 2388 }, { "epoch": 0.25, "grad_norm": 1.9055724159754508, "learning_rate": 8.80145579039649e-06, "loss": 0.6571, "step": 2389 }, { "epoch": 0.25, "grad_norm": 2.1750249859853925, "learning_rate": 8.800362056878123e-06, "loss": 0.7014, "step": 2390 }, { "epoch": 0.25, "grad_norm": 2.17662140669081, "learning_rate": 8.799267892567054e-06, "loss": 0.6947, "step": 2391 }, { "epoch": 0.25, "grad_norm": 1.8554578649297455, "learning_rate": 8.798173297587316e-06, "loss": 0.5301, "step": 2392 }, { "epoch": 0.25, "grad_norm": 1.8693214266194902, "learning_rate": 8.797078272062984e-06, "loss": 0.6241, "step": 2393 }, { "epoch": 0.25, "grad_norm": 10.116068708891031, "learning_rate": 8.795982816118189e-06, "loss": 0.7266, "step": 2394 }, { "epoch": 0.25, "grad_norm": 1.8822655074150916, "learning_rate": 8.794886929877104e-06, "loss": 0.6358, "step": 2395 }, { "epoch": 0.25, "grad_norm": 1.753515161356571, "learning_rate": 8.793790613463956e-06, "loss": 0.6498, "step": 2396 }, { "epoch": 0.25, "grad_norm": 2.1198668025700362, "learning_rate": 8.792693867003017e-06, "loss": 0.8171, "step": 2397 }, { "epoch": 0.25, "grad_norm": 1.9558213690977169, "learning_rate": 8.791596690618611e-06, "loss": 0.8011, "step": 2398 }, { "epoch": 0.25, "grad_norm": 2.071651445112021, "learning_rate": 8.79049908443511e-06, "loss": 0.6883, "step": 2399 }, { "epoch": 0.25, "grad_norm": 2.8405970903726203, "learning_rate": 8.789401048576932e-06, "loss": 0.7141, "step": 2400 }, { "epoch": 0.25, "grad_norm": 1.8874216666737584, "learning_rate": 8.788302583168546e-06, "loss": 0.6946, "step": 2401 }, { "epoch": 0.25, "grad_norm": 2.0114898090891806, "learning_rate": 8.78720368833447e-06, "loss": 0.5675, "step": 2402 }, { "epoch": 0.25, "grad_norm": 1.9083054611199246, "learning_rate": 8.78610436419927e-06, "loss": 0.6291, "step": 2403 }, { "epoch": 0.25, "grad_norm": 1.9970989061556155, "learning_rate": 8.785004610887559e-06, "loss": 0.6855, "step": 2404 }, { "epoch": 0.25, "grad_norm": 1.7311843615333404, "learning_rate": 8.783904428524002e-06, "loss": 0.5969, "step": 2405 }, { "epoch": 0.25, "grad_norm": 1.7874300778174452, "learning_rate": 8.782803817233312e-06, "loss": 0.6618, "step": 2406 }, { "epoch": 0.25, "grad_norm": 1.745175178235986, "learning_rate": 8.781702777140245e-06, "loss": 0.6895, "step": 2407 }, { "epoch": 0.25, "grad_norm": 1.9717874839652878, "learning_rate": 8.780601308369615e-06, "loss": 0.7072, "step": 2408 }, { "epoch": 0.25, "grad_norm": 2.286414310695891, "learning_rate": 8.779499411046279e-06, "loss": 0.6198, "step": 2409 }, { "epoch": 0.25, "grad_norm": 1.8423429664253486, "learning_rate": 8.778397085295141e-06, "loss": 0.5958, "step": 2410 }, { "epoch": 0.25, "grad_norm": 1.7508724073956576, "learning_rate": 8.777294331241157e-06, "loss": 0.6431, "step": 2411 }, { "epoch": 0.25, "grad_norm": 1.9141251716681176, "learning_rate": 8.77619114900933e-06, "loss": 0.6575, "step": 2412 }, { "epoch": 0.25, "grad_norm": 1.9832722148946702, "learning_rate": 8.775087538724714e-06, "loss": 0.6805, "step": 2413 }, { "epoch": 0.25, "grad_norm": 1.7600986147301019, "learning_rate": 8.773983500512408e-06, "loss": 0.7129, "step": 2414 }, { "epoch": 0.25, "grad_norm": 1.9150612390096766, "learning_rate": 8.772879034497561e-06, "loss": 0.8497, "step": 2415 }, { "epoch": 0.25, "grad_norm": 1.812339592854274, "learning_rate": 8.771774140805372e-06, "loss": 0.668, "step": 2416 }, { "epoch": 0.25, "grad_norm": 1.9986647398148865, "learning_rate": 8.770668819561085e-06, "loss": 0.7162, "step": 2417 }, { "epoch": 0.25, "grad_norm": 1.9456747118199362, "learning_rate": 8.769563070889995e-06, "loss": 0.7052, "step": 2418 }, { "epoch": 0.25, "grad_norm": 1.965657407221823, "learning_rate": 8.768456894917445e-06, "loss": 0.7759, "step": 2419 }, { "epoch": 0.25, "grad_norm": 1.949060659018235, "learning_rate": 8.767350291768827e-06, "loss": 0.7126, "step": 2420 }, { "epoch": 0.25, "grad_norm": 1.8428955759377683, "learning_rate": 8.76624326156958e-06, "loss": 0.6419, "step": 2421 }, { "epoch": 0.25, "grad_norm": 1.9139856823393193, "learning_rate": 8.765135804445192e-06, "loss": 0.6822, "step": 2422 }, { "epoch": 0.25, "grad_norm": 2.12635949355531, "learning_rate": 8.7640279205212e-06, "loss": 0.7338, "step": 2423 }, { "epoch": 0.25, "grad_norm": 1.871367384050979, "learning_rate": 8.76291960992319e-06, "loss": 0.6935, "step": 2424 }, { "epoch": 0.25, "grad_norm": 1.9112013632683538, "learning_rate": 8.761810872776793e-06, "loss": 0.6928, "step": 2425 }, { "epoch": 0.25, "grad_norm": 1.986560427331108, "learning_rate": 8.760701709207693e-06, "loss": 0.6297, "step": 2426 }, { "epoch": 0.25, "grad_norm": 1.8219563126246536, "learning_rate": 8.759592119341618e-06, "loss": 0.6592, "step": 2427 }, { "epoch": 0.25, "grad_norm": 2.063821372703502, "learning_rate": 8.758482103304348e-06, "loss": 0.6741, "step": 2428 }, { "epoch": 0.25, "grad_norm": 1.834418853775193, "learning_rate": 8.757371661221709e-06, "loss": 0.5968, "step": 2429 }, { "epoch": 0.25, "grad_norm": 1.8513650814208955, "learning_rate": 8.756260793219575e-06, "loss": 0.6706, "step": 2430 }, { "epoch": 0.25, "grad_norm": 2.205372225245357, "learning_rate": 8.755149499423871e-06, "loss": 0.7245, "step": 2431 }, { "epoch": 0.25, "grad_norm": 1.8523502602927382, "learning_rate": 8.754037779960566e-06, "loss": 0.6928, "step": 2432 }, { "epoch": 0.25, "grad_norm": 2.164253711937142, "learning_rate": 8.752925634955685e-06, "loss": 0.6091, "step": 2433 }, { "epoch": 0.25, "grad_norm": 1.7726757578138437, "learning_rate": 8.751813064535288e-06, "loss": 0.5928, "step": 2434 }, { "epoch": 0.25, "grad_norm": 1.9559306596077315, "learning_rate": 8.750700068825499e-06, "loss": 0.6752, "step": 2435 }, { "epoch": 0.25, "grad_norm": 1.7822366987620128, "learning_rate": 8.749586647952478e-06, "loss": 0.6989, "step": 2436 }, { "epoch": 0.25, "grad_norm": 2.2000460835818703, "learning_rate": 8.748472802042438e-06, "loss": 0.6844, "step": 2437 }, { "epoch": 0.25, "grad_norm": 2.0813206807667015, "learning_rate": 8.74735853122164e-06, "loss": 0.7764, "step": 2438 }, { "epoch": 0.25, "grad_norm": 1.810316239654606, "learning_rate": 8.746243835616392e-06, "loss": 0.6687, "step": 2439 }, { "epoch": 0.25, "grad_norm": 2.2509528243743127, "learning_rate": 8.745128715353055e-06, "loss": 0.576, "step": 2440 }, { "epoch": 0.25, "grad_norm": 2.073047694712885, "learning_rate": 8.74401317055803e-06, "loss": 0.7311, "step": 2441 }, { "epoch": 0.25, "grad_norm": 1.9258060000172623, "learning_rate": 8.742897201357772e-06, "loss": 0.6671, "step": 2442 }, { "epoch": 0.25, "grad_norm": 1.8935603943634967, "learning_rate": 8.741780807878783e-06, "loss": 0.7318, "step": 2443 }, { "epoch": 0.25, "grad_norm": 1.803740494965822, "learning_rate": 8.740663990247612e-06, "loss": 0.6143, "step": 2444 }, { "epoch": 0.25, "grad_norm": 1.8225712920487915, "learning_rate": 8.739546748590857e-06, "loss": 0.6634, "step": 2445 }, { "epoch": 0.25, "grad_norm": 1.79407696453161, "learning_rate": 8.738429083035162e-06, "loss": 0.6252, "step": 2446 }, { "epoch": 0.25, "grad_norm": 1.7820233409510757, "learning_rate": 8.737310993707225e-06, "loss": 0.6748, "step": 2447 }, { "epoch": 0.25, "grad_norm": 1.8736454414922534, "learning_rate": 8.736192480733782e-06, "loss": 0.6461, "step": 2448 }, { "epoch": 0.25, "grad_norm": 1.8092214496718138, "learning_rate": 8.735073544241627e-06, "loss": 0.5736, "step": 2449 }, { "epoch": 0.25, "grad_norm": 1.8901610250011938, "learning_rate": 8.733954184357596e-06, "loss": 0.5879, "step": 2450 }, { "epoch": 0.25, "grad_norm": 1.9058356933274532, "learning_rate": 8.732834401208575e-06, "loss": 0.7635, "step": 2451 }, { "epoch": 0.25, "grad_norm": 2.145236956293225, "learning_rate": 8.731714194921498e-06, "loss": 0.7627, "step": 2452 }, { "epoch": 0.25, "grad_norm": 1.9661727069908106, "learning_rate": 8.730593565623349e-06, "loss": 0.6266, "step": 2453 }, { "epoch": 0.26, "grad_norm": 1.9383169325757479, "learning_rate": 8.729472513441152e-06, "loss": 0.6724, "step": 2454 }, { "epoch": 0.26, "grad_norm": 2.0297056180550634, "learning_rate": 8.728351038501991e-06, "loss": 0.7136, "step": 2455 }, { "epoch": 0.26, "grad_norm": 2.2711745650680903, "learning_rate": 8.727229140932988e-06, "loss": 0.6877, "step": 2456 }, { "epoch": 0.26, "grad_norm": 1.8425469528169693, "learning_rate": 8.726106820861319e-06, "loss": 0.645, "step": 2457 }, { "epoch": 0.26, "grad_norm": 1.9820413892321256, "learning_rate": 8.724984078414202e-06, "loss": 0.6962, "step": 2458 }, { "epoch": 0.26, "grad_norm": 1.943380318626068, "learning_rate": 8.72386091371891e-06, "loss": 0.6888, "step": 2459 }, { "epoch": 0.26, "grad_norm": 1.8212199805904132, "learning_rate": 8.722737326902757e-06, "loss": 0.6929, "step": 2460 }, { "epoch": 0.26, "grad_norm": 1.8554451720860514, "learning_rate": 8.72161331809311e-06, "loss": 0.7531, "step": 2461 }, { "epoch": 0.26, "grad_norm": 1.9310112538498863, "learning_rate": 8.720488887417379e-06, "loss": 0.6156, "step": 2462 }, { "epoch": 0.26, "grad_norm": 1.8574398933023148, "learning_rate": 8.719364035003028e-06, "loss": 0.6742, "step": 2463 }, { "epoch": 0.26, "grad_norm": 1.9700711516872065, "learning_rate": 8.718238760977562e-06, "loss": 0.7087, "step": 2464 }, { "epoch": 0.26, "grad_norm": 1.896331167359135, "learning_rate": 8.71711306546854e-06, "loss": 0.6402, "step": 2465 }, { "epoch": 0.26, "grad_norm": 1.5871072247915825, "learning_rate": 8.715986948603566e-06, "loss": 0.5402, "step": 2466 }, { "epoch": 0.26, "grad_norm": 1.9043062384767355, "learning_rate": 8.71486041051029e-06, "loss": 0.7504, "step": 2467 }, { "epoch": 0.26, "grad_norm": 1.9304941660777277, "learning_rate": 8.713733451316415e-06, "loss": 0.7583, "step": 2468 }, { "epoch": 0.26, "grad_norm": 1.9562908438044804, "learning_rate": 8.712606071149683e-06, "loss": 0.6654, "step": 2469 }, { "epoch": 0.26, "grad_norm": 1.955293052258211, "learning_rate": 8.711478270137892e-06, "loss": 0.7673, "step": 2470 }, { "epoch": 0.26, "grad_norm": 1.7622527529578897, "learning_rate": 8.710350048408885e-06, "loss": 0.6916, "step": 2471 }, { "epoch": 0.26, "grad_norm": 1.9881603469858304, "learning_rate": 8.709221406090552e-06, "loss": 0.6934, "step": 2472 }, { "epoch": 0.26, "grad_norm": 2.0033670173359495, "learning_rate": 8.70809234331083e-06, "loss": 0.7154, "step": 2473 }, { "epoch": 0.26, "grad_norm": 1.777575663522896, "learning_rate": 8.706962860197707e-06, "loss": 0.6564, "step": 2474 }, { "epoch": 0.26, "grad_norm": 1.967548242810914, "learning_rate": 8.705832956879214e-06, "loss": 0.6439, "step": 2475 }, { "epoch": 0.26, "grad_norm": 1.94761621671732, "learning_rate": 8.704702633483431e-06, "loss": 0.6275, "step": 2476 }, { "epoch": 0.26, "grad_norm": 2.017817753341322, "learning_rate": 8.703571890138491e-06, "loss": 0.737, "step": 2477 }, { "epoch": 0.26, "grad_norm": 1.6563836459176866, "learning_rate": 8.702440726972565e-06, "loss": 0.5686, "step": 2478 }, { "epoch": 0.26, "grad_norm": 2.0567605849561037, "learning_rate": 8.701309144113881e-06, "loss": 0.6831, "step": 2479 }, { "epoch": 0.26, "grad_norm": 1.8114888761264734, "learning_rate": 8.700177141690708e-06, "loss": 0.6569, "step": 2480 }, { "epoch": 0.26, "grad_norm": 2.1350886758417156, "learning_rate": 8.699044719831368e-06, "loss": 0.712, "step": 2481 }, { "epoch": 0.26, "grad_norm": 1.8611495208476372, "learning_rate": 8.697911878664222e-06, "loss": 0.6709, "step": 2482 }, { "epoch": 0.26, "grad_norm": 1.940942812297296, "learning_rate": 8.69677861831769e-06, "loss": 0.656, "step": 2483 }, { "epoch": 0.26, "grad_norm": 1.8476105475176288, "learning_rate": 8.695644938920229e-06, "loss": 0.6289, "step": 2484 }, { "epoch": 0.26, "grad_norm": 2.0744387596970384, "learning_rate": 8.69451084060035e-06, "loss": 0.7183, "step": 2485 }, { "epoch": 0.26, "grad_norm": 1.9263953191832632, "learning_rate": 8.693376323486609e-06, "loss": 0.5893, "step": 2486 }, { "epoch": 0.26, "grad_norm": 1.6553134124256639, "learning_rate": 8.69224138770761e-06, "loss": 0.5539, "step": 2487 }, { "epoch": 0.26, "grad_norm": 1.7912036322986, "learning_rate": 8.691106033392004e-06, "loss": 0.6439, "step": 2488 }, { "epoch": 0.26, "grad_norm": 1.9551293930565374, "learning_rate": 8.689970260668494e-06, "loss": 0.6473, "step": 2489 }, { "epoch": 0.26, "grad_norm": 2.4836561713045784, "learning_rate": 8.688834069665819e-06, "loss": 0.7666, "step": 2490 }, { "epoch": 0.26, "grad_norm": 1.9702181315374108, "learning_rate": 8.687697460512779e-06, "loss": 0.5778, "step": 2491 }, { "epoch": 0.26, "grad_norm": 1.8888601359095911, "learning_rate": 8.686560433338212e-06, "loss": 0.6791, "step": 2492 }, { "epoch": 0.26, "grad_norm": 1.8130047281565391, "learning_rate": 8.685422988271005e-06, "loss": 0.7071, "step": 2493 }, { "epoch": 0.26, "grad_norm": 1.8530436031260942, "learning_rate": 8.684285125440099e-06, "loss": 0.6628, "step": 2494 }, { "epoch": 0.26, "grad_norm": 1.8370355003345429, "learning_rate": 8.683146844974473e-06, "loss": 0.6431, "step": 2495 }, { "epoch": 0.26, "grad_norm": 1.872017642208934, "learning_rate": 8.682008147003159e-06, "loss": 0.6496, "step": 2496 }, { "epoch": 0.26, "grad_norm": 2.194564775682107, "learning_rate": 8.680869031655234e-06, "loss": 0.7402, "step": 2497 }, { "epoch": 0.26, "grad_norm": 1.7391627982124838, "learning_rate": 8.679729499059826e-06, "loss": 0.6092, "step": 2498 }, { "epoch": 0.26, "grad_norm": 1.8041114226784951, "learning_rate": 8.678589549346103e-06, "loss": 0.6696, "step": 2499 }, { "epoch": 0.26, "grad_norm": 1.9286629979286871, "learning_rate": 8.67744918264329e-06, "loss": 0.6261, "step": 2500 }, { "epoch": 0.26, "grad_norm": 2.034026955174544, "learning_rate": 8.67630839908065e-06, "loss": 0.6834, "step": 2501 }, { "epoch": 0.26, "grad_norm": 1.7611055542338694, "learning_rate": 8.675167198787497e-06, "loss": 0.6267, "step": 2502 }, { "epoch": 0.26, "grad_norm": 2.0451242410055697, "learning_rate": 8.674025581893197e-06, "loss": 0.6848, "step": 2503 }, { "epoch": 0.26, "grad_norm": 1.939191563492884, "learning_rate": 8.672883548527156e-06, "loss": 0.6619, "step": 2504 }, { "epoch": 0.26, "grad_norm": 1.5780790969065914, "learning_rate": 8.671741098818829e-06, "loss": 0.7253, "step": 2505 }, { "epoch": 0.26, "grad_norm": 1.9719588150251077, "learning_rate": 8.67059823289772e-06, "loss": 0.667, "step": 2506 }, { "epoch": 0.26, "grad_norm": 1.9648668702584733, "learning_rate": 8.669454950893381e-06, "loss": 0.7402, "step": 2507 }, { "epoch": 0.26, "grad_norm": 1.9258196939936445, "learning_rate": 8.668311252935407e-06, "loss": 0.6772, "step": 2508 }, { "epoch": 0.26, "grad_norm": 2.0081060724360444, "learning_rate": 8.667167139153443e-06, "loss": 0.6405, "step": 2509 }, { "epoch": 0.26, "grad_norm": 1.6742198835544373, "learning_rate": 8.666022609677183e-06, "loss": 0.5199, "step": 2510 }, { "epoch": 0.26, "grad_norm": 1.8564714159417233, "learning_rate": 8.664877664636365e-06, "loss": 0.7075, "step": 2511 }, { "epoch": 0.26, "grad_norm": 1.7875909865802426, "learning_rate": 8.663732304160772e-06, "loss": 0.6749, "step": 2512 }, { "epoch": 0.26, "grad_norm": 1.8538473774971693, "learning_rate": 8.66258652838024e-06, "loss": 0.6399, "step": 2513 }, { "epoch": 0.26, "grad_norm": 1.8806935770428046, "learning_rate": 8.661440337424652e-06, "loss": 0.6695, "step": 2514 }, { "epoch": 0.26, "grad_norm": 1.8938221894953242, "learning_rate": 8.660293731423929e-06, "loss": 0.5308, "step": 2515 }, { "epoch": 0.26, "grad_norm": 1.7300456189211295, "learning_rate": 8.659146710508052e-06, "loss": 0.6649, "step": 2516 }, { "epoch": 0.26, "grad_norm": 1.9670066452324917, "learning_rate": 8.657999274807036e-06, "loss": 0.6295, "step": 2517 }, { "epoch": 0.26, "grad_norm": 2.421249790517567, "learning_rate": 8.656851424450954e-06, "loss": 0.5711, "step": 2518 }, { "epoch": 0.26, "grad_norm": 1.7941611225137957, "learning_rate": 8.655703159569919e-06, "loss": 0.6625, "step": 2519 }, { "epoch": 0.26, "grad_norm": 1.8645532699032437, "learning_rate": 8.654554480294094e-06, "loss": 0.6557, "step": 2520 }, { "epoch": 0.26, "grad_norm": 1.7914760505530758, "learning_rate": 8.653405386753688e-06, "loss": 0.6373, "step": 2521 }, { "epoch": 0.26, "grad_norm": 1.9904369150091747, "learning_rate": 8.652255879078959e-06, "loss": 0.6712, "step": 2522 }, { "epoch": 0.26, "grad_norm": 2.0797350090765514, "learning_rate": 8.651105957400208e-06, "loss": 0.75, "step": 2523 }, { "epoch": 0.26, "grad_norm": 2.0592593153415586, "learning_rate": 8.649955621847787e-06, "loss": 0.6536, "step": 2524 }, { "epoch": 0.26, "grad_norm": 1.9125424066671115, "learning_rate": 8.648804872552092e-06, "loss": 0.7074, "step": 2525 }, { "epoch": 0.26, "grad_norm": 1.9638190738119656, "learning_rate": 8.647653709643566e-06, "loss": 0.7199, "step": 2526 }, { "epoch": 0.26, "grad_norm": 1.7916417184920295, "learning_rate": 8.646502133252702e-06, "loss": 0.6874, "step": 2527 }, { "epoch": 0.26, "grad_norm": 1.8071976978774118, "learning_rate": 8.645350143510036e-06, "loss": 0.8068, "step": 2528 }, { "epoch": 0.26, "grad_norm": 1.9139163770847392, "learning_rate": 8.644197740546153e-06, "loss": 0.7273, "step": 2529 }, { "epoch": 0.26, "grad_norm": 1.902494211289062, "learning_rate": 8.643044924491688e-06, "loss": 0.7555, "step": 2530 }, { "epoch": 0.26, "grad_norm": 1.7887237449586197, "learning_rate": 8.641891695477314e-06, "loss": 0.7074, "step": 2531 }, { "epoch": 0.26, "grad_norm": 1.6889392543519213, "learning_rate": 8.640738053633758e-06, "loss": 0.6728, "step": 2532 }, { "epoch": 0.26, "grad_norm": 2.045472851926104, "learning_rate": 8.639583999091792e-06, "loss": 0.7177, "step": 2533 }, { "epoch": 0.26, "grad_norm": 1.8990351279183315, "learning_rate": 8.638429531982235e-06, "loss": 0.7403, "step": 2534 }, { "epoch": 0.26, "grad_norm": 1.7419245372014982, "learning_rate": 8.637274652435954e-06, "loss": 0.5934, "step": 2535 }, { "epoch": 0.26, "grad_norm": 2.0825259365129316, "learning_rate": 8.636119360583857e-06, "loss": 0.6051, "step": 2536 }, { "epoch": 0.26, "grad_norm": 1.8906745088867434, "learning_rate": 8.634963656556904e-06, "loss": 0.6227, "step": 2537 }, { "epoch": 0.26, "grad_norm": 1.8357251628237146, "learning_rate": 8.633807540486105e-06, "loss": 0.6398, "step": 2538 }, { "epoch": 0.26, "grad_norm": 1.8989039062807476, "learning_rate": 8.632651012502508e-06, "loss": 0.6443, "step": 2539 }, { "epoch": 0.26, "grad_norm": 1.7261862083314314, "learning_rate": 8.631494072737215e-06, "loss": 0.6061, "step": 2540 }, { "epoch": 0.26, "grad_norm": 1.76712139420288, "learning_rate": 8.630336721321368e-06, "loss": 0.564, "step": 2541 }, { "epoch": 0.26, "grad_norm": 1.8432886950858667, "learning_rate": 8.629178958386162e-06, "loss": 0.6403, "step": 2542 }, { "epoch": 0.26, "grad_norm": 1.9592036465215612, "learning_rate": 8.628020784062837e-06, "loss": 0.5919, "step": 2543 }, { "epoch": 0.26, "grad_norm": 1.8828642264460216, "learning_rate": 8.626862198482676e-06, "loss": 0.6855, "step": 2544 }, { "epoch": 0.26, "grad_norm": 1.8946806088170294, "learning_rate": 8.62570320177701e-06, "loss": 0.527, "step": 2545 }, { "epoch": 0.26, "grad_norm": 1.9271775419029487, "learning_rate": 8.624543794077223e-06, "loss": 0.7033, "step": 2546 }, { "epoch": 0.26, "grad_norm": 2.079607663238331, "learning_rate": 8.623383975514736e-06, "loss": 0.6912, "step": 2547 }, { "epoch": 0.26, "grad_norm": 1.923055418753926, "learning_rate": 8.622223746221024e-06, "loss": 0.7492, "step": 2548 }, { "epoch": 0.26, "grad_norm": 1.8699947533876318, "learning_rate": 8.621063106327604e-06, "loss": 0.6444, "step": 2549 }, { "epoch": 0.27, "grad_norm": 2.0911040969561068, "learning_rate": 8.619902055966043e-06, "loss": 0.7458, "step": 2550 }, { "epoch": 0.27, "grad_norm": 2.0972275911219986, "learning_rate": 8.618740595267949e-06, "loss": 0.7584, "step": 2551 }, { "epoch": 0.27, "grad_norm": 1.8754911401151375, "learning_rate": 8.617578724364984e-06, "loss": 0.636, "step": 2552 }, { "epoch": 0.27, "grad_norm": 2.0750752889395683, "learning_rate": 8.616416443388849e-06, "loss": 0.643, "step": 2553 }, { "epoch": 0.27, "grad_norm": 1.8972081219498664, "learning_rate": 8.615253752471297e-06, "loss": 0.6895, "step": 2554 }, { "epoch": 0.27, "grad_norm": 1.8417165581548547, "learning_rate": 8.61409065174413e-06, "loss": 0.6493, "step": 2555 }, { "epoch": 0.27, "grad_norm": 1.7181649813698556, "learning_rate": 8.612927141339184e-06, "loss": 0.5672, "step": 2556 }, { "epoch": 0.27, "grad_norm": 1.8312017035141213, "learning_rate": 8.611763221388356e-06, "loss": 0.6054, "step": 2557 }, { "epoch": 0.27, "grad_norm": 2.618425682813372, "learning_rate": 8.610598892023578e-06, "loss": 0.7074, "step": 2558 }, { "epoch": 0.27, "grad_norm": 1.9623646243149804, "learning_rate": 8.609434153376839e-06, "loss": 0.6801, "step": 2559 }, { "epoch": 0.27, "grad_norm": 1.7973271647036762, "learning_rate": 8.608269005580164e-06, "loss": 0.5592, "step": 2560 }, { "epoch": 0.27, "grad_norm": 1.8497187638997796, "learning_rate": 8.60710344876563e-06, "loss": 0.6174, "step": 2561 }, { "epoch": 0.27, "grad_norm": 1.6527277765494606, "learning_rate": 8.605937483065361e-06, "loss": 0.6983, "step": 2562 }, { "epoch": 0.27, "grad_norm": 1.8421684748336877, "learning_rate": 8.604771108611525e-06, "loss": 0.6781, "step": 2563 }, { "epoch": 0.27, "grad_norm": 1.9856957053064686, "learning_rate": 8.603604325536338e-06, "loss": 0.6826, "step": 2564 }, { "epoch": 0.27, "grad_norm": 1.9390978240487184, "learning_rate": 8.60243713397206e-06, "loss": 0.6792, "step": 2565 }, { "epoch": 0.27, "grad_norm": 1.8237849953951095, "learning_rate": 8.601269534051e-06, "loss": 0.7281, "step": 2566 }, { "epoch": 0.27, "grad_norm": 2.0146077881578974, "learning_rate": 8.600101525905512e-06, "loss": 0.6844, "step": 2567 }, { "epoch": 0.27, "grad_norm": 1.8280236844474076, "learning_rate": 8.598933109667995e-06, "loss": 0.6021, "step": 2568 }, { "epoch": 0.27, "grad_norm": 1.7861961197050547, "learning_rate": 8.597764285470897e-06, "loss": 0.5682, "step": 2569 }, { "epoch": 0.27, "grad_norm": 1.9280191246157654, "learning_rate": 8.596595053446713e-06, "loss": 0.5181, "step": 2570 }, { "epoch": 0.27, "grad_norm": 1.822416375671748, "learning_rate": 8.595425413727979e-06, "loss": 0.6518, "step": 2571 }, { "epoch": 0.27, "grad_norm": 2.2655542490958416, "learning_rate": 8.59425536644728e-06, "loss": 0.6768, "step": 2572 }, { "epoch": 0.27, "grad_norm": 1.8253287565260945, "learning_rate": 8.593084911737249e-06, "loss": 0.5769, "step": 2573 }, { "epoch": 0.27, "grad_norm": 2.1113111922024586, "learning_rate": 8.591914049730561e-06, "loss": 0.6449, "step": 2574 }, { "epoch": 0.27, "grad_norm": 2.0885463074387056, "learning_rate": 8.590742780559945e-06, "loss": 0.6036, "step": 2575 }, { "epoch": 0.27, "grad_norm": 2.0324217465623797, "learning_rate": 8.589571104358168e-06, "loss": 0.6981, "step": 2576 }, { "epoch": 0.27, "grad_norm": 2.1051060069157144, "learning_rate": 8.588399021258046e-06, "loss": 0.6511, "step": 2577 }, { "epoch": 0.27, "grad_norm": 1.849243572140936, "learning_rate": 8.587226531392443e-06, "loss": 0.5977, "step": 2578 }, { "epoch": 0.27, "grad_norm": 1.9913608116550334, "learning_rate": 8.586053634894264e-06, "loss": 0.6965, "step": 2579 }, { "epoch": 0.27, "grad_norm": 2.015092471598591, "learning_rate": 8.584880331896467e-06, "loss": 0.7351, "step": 2580 }, { "epoch": 0.27, "grad_norm": 1.9372041452614617, "learning_rate": 8.58370662253205e-06, "loss": 0.7634, "step": 2581 }, { "epoch": 0.27, "grad_norm": 1.832886288665305, "learning_rate": 8.582532506934063e-06, "loss": 0.6648, "step": 2582 }, { "epoch": 0.27, "grad_norm": 1.9608951852109446, "learning_rate": 8.581357985235595e-06, "loss": 0.6949, "step": 2583 }, { "epoch": 0.27, "grad_norm": 1.957191923854249, "learning_rate": 8.580183057569788e-06, "loss": 0.665, "step": 2584 }, { "epoch": 0.27, "grad_norm": 2.0127234717259053, "learning_rate": 8.579007724069823e-06, "loss": 0.6466, "step": 2585 }, { "epoch": 0.27, "grad_norm": 1.8341028012931, "learning_rate": 8.577831984868934e-06, "loss": 0.6804, "step": 2586 }, { "epoch": 0.27, "grad_norm": 1.8705519139581988, "learning_rate": 8.576655840100397e-06, "loss": 0.6218, "step": 2587 }, { "epoch": 0.27, "grad_norm": 1.776464815901934, "learning_rate": 8.575479289897533e-06, "loss": 0.6891, "step": 2588 }, { "epoch": 0.27, "grad_norm": 1.7028339642895287, "learning_rate": 8.574302334393712e-06, "loss": 0.5559, "step": 2589 }, { "epoch": 0.27, "grad_norm": 1.9135910153508136, "learning_rate": 8.573124973722349e-06, "loss": 0.6412, "step": 2590 }, { "epoch": 0.27, "grad_norm": 1.7931655177631687, "learning_rate": 8.571947208016904e-06, "loss": 0.6621, "step": 2591 }, { "epoch": 0.27, "grad_norm": 1.8046680636422858, "learning_rate": 8.570769037410885e-06, "loss": 0.6493, "step": 2592 }, { "epoch": 0.27, "grad_norm": 1.7590315838924429, "learning_rate": 8.56959046203784e-06, "loss": 0.6755, "step": 2593 }, { "epoch": 0.27, "grad_norm": 1.8703325066678136, "learning_rate": 8.568411482031372e-06, "loss": 0.644, "step": 2594 }, { "epoch": 0.27, "grad_norm": 1.9303932644741164, "learning_rate": 8.567232097525123e-06, "loss": 0.6309, "step": 2595 }, { "epoch": 0.27, "grad_norm": 1.8518576249903604, "learning_rate": 8.566052308652783e-06, "loss": 0.6986, "step": 2596 }, { "epoch": 0.27, "grad_norm": 1.8176810029507842, "learning_rate": 8.56487211554809e-06, "loss": 0.6124, "step": 2597 }, { "epoch": 0.27, "grad_norm": 1.806871269882299, "learning_rate": 8.563691518344822e-06, "loss": 0.5968, "step": 2598 }, { "epoch": 0.27, "grad_norm": 1.9774455484279878, "learning_rate": 8.562510517176807e-06, "loss": 0.6801, "step": 2599 }, { "epoch": 0.27, "grad_norm": 1.8888905845005723, "learning_rate": 8.561329112177918e-06, "loss": 0.6651, "step": 2600 }, { "epoch": 0.27, "grad_norm": 1.9363855520254039, "learning_rate": 8.560147303482078e-06, "loss": 0.7647, "step": 2601 }, { "epoch": 0.27, "grad_norm": 2.1291448409247398, "learning_rate": 8.558965091223248e-06, "loss": 0.7111, "step": 2602 }, { "epoch": 0.27, "grad_norm": 2.0391902138624625, "learning_rate": 8.55778247553544e-06, "loss": 0.6569, "step": 2603 }, { "epoch": 0.27, "grad_norm": 2.0624650689714334, "learning_rate": 8.55659945655271e-06, "loss": 0.6995, "step": 2604 }, { "epoch": 0.27, "grad_norm": 1.6672557776522174, "learning_rate": 8.555416034409158e-06, "loss": 0.5768, "step": 2605 }, { "epoch": 0.27, "grad_norm": 1.8330406340117817, "learning_rate": 8.554232209238935e-06, "loss": 0.7001, "step": 2606 }, { "epoch": 0.27, "grad_norm": 1.9879512597907347, "learning_rate": 8.553047981176232e-06, "loss": 0.6188, "step": 2607 }, { "epoch": 0.27, "grad_norm": 1.945608350625391, "learning_rate": 8.55186335035529e-06, "loss": 0.6776, "step": 2608 }, { "epoch": 0.27, "grad_norm": 2.0410863507175847, "learning_rate": 8.55067831691039e-06, "loss": 0.8017, "step": 2609 }, { "epoch": 0.27, "grad_norm": 1.9040127392524986, "learning_rate": 8.549492880975866e-06, "loss": 0.7686, "step": 2610 }, { "epoch": 0.27, "grad_norm": 1.7321283064625452, "learning_rate": 8.548307042686093e-06, "loss": 0.5713, "step": 2611 }, { "epoch": 0.27, "grad_norm": 2.016949276627831, "learning_rate": 8.54712080217549e-06, "loss": 0.6542, "step": 2612 }, { "epoch": 0.27, "grad_norm": 1.6897248893935806, "learning_rate": 8.545934159578527e-06, "loss": 0.6149, "step": 2613 }, { "epoch": 0.27, "grad_norm": 1.7965717499797786, "learning_rate": 8.544747115029717e-06, "loss": 0.6785, "step": 2614 }, { "epoch": 0.27, "grad_norm": 1.869943587447054, "learning_rate": 8.543559668663616e-06, "loss": 0.6481, "step": 2615 }, { "epoch": 0.27, "grad_norm": 1.9399110552269427, "learning_rate": 8.54237182061483e-06, "loss": 0.5535, "step": 2616 }, { "epoch": 0.27, "grad_norm": 1.8931184533667633, "learning_rate": 8.541183571018006e-06, "loss": 0.6471, "step": 2617 }, { "epoch": 0.27, "grad_norm": 1.9115419127837032, "learning_rate": 8.53999492000784e-06, "loss": 0.6745, "step": 2618 }, { "epoch": 0.27, "grad_norm": 1.8487042882352491, "learning_rate": 8.538805867719073e-06, "loss": 0.7526, "step": 2619 }, { "epoch": 0.27, "grad_norm": 1.9130883130049305, "learning_rate": 8.537616414286491e-06, "loss": 0.6541, "step": 2620 }, { "epoch": 0.27, "grad_norm": 1.804592790152373, "learning_rate": 8.536426559844923e-06, "loss": 0.6664, "step": 2621 }, { "epoch": 0.27, "grad_norm": 1.9861938106078298, "learning_rate": 8.53523630452925e-06, "loss": 0.7571, "step": 2622 }, { "epoch": 0.27, "grad_norm": 1.6363374746611283, "learning_rate": 8.53404564847439e-06, "loss": 0.5683, "step": 2623 }, { "epoch": 0.27, "grad_norm": 1.77836846394158, "learning_rate": 8.532854591815313e-06, "loss": 0.6264, "step": 2624 }, { "epoch": 0.27, "grad_norm": 1.8115689056920217, "learning_rate": 8.531663134687031e-06, "loss": 0.649, "step": 2625 }, { "epoch": 0.27, "grad_norm": 1.734131206157076, "learning_rate": 8.530471277224603e-06, "loss": 0.5828, "step": 2626 }, { "epoch": 0.27, "grad_norm": 2.1028348732686246, "learning_rate": 8.529279019563133e-06, "loss": 0.6539, "step": 2627 }, { "epoch": 0.27, "grad_norm": 2.1826963739866594, "learning_rate": 8.528086361837771e-06, "loss": 0.7534, "step": 2628 }, { "epoch": 0.27, "grad_norm": 1.852410404097433, "learning_rate": 8.526893304183708e-06, "loss": 0.591, "step": 2629 }, { "epoch": 0.27, "grad_norm": 1.6674908894896996, "learning_rate": 8.525699846736189e-06, "loss": 0.5761, "step": 2630 }, { "epoch": 0.27, "grad_norm": 2.1536689365949493, "learning_rate": 8.524505989630493e-06, "loss": 0.6215, "step": 2631 }, { "epoch": 0.27, "grad_norm": 1.7369192872406605, "learning_rate": 8.523311733001957e-06, "loss": 0.6372, "step": 2632 }, { "epoch": 0.27, "grad_norm": 2.491539215247505, "learning_rate": 8.522117076985955e-06, "loss": 0.6566, "step": 2633 }, { "epoch": 0.27, "grad_norm": 1.8660934135316272, "learning_rate": 8.520922021717903e-06, "loss": 0.6222, "step": 2634 }, { "epoch": 0.27, "grad_norm": 2.058563343023167, "learning_rate": 8.519726567333273e-06, "loss": 0.6289, "step": 2635 }, { "epoch": 0.27, "grad_norm": 2.0092068256350415, "learning_rate": 8.518530713967575e-06, "loss": 0.6672, "step": 2636 }, { "epoch": 0.27, "grad_norm": 1.8080677008085388, "learning_rate": 8.517334461756366e-06, "loss": 0.6223, "step": 2637 }, { "epoch": 0.27, "grad_norm": 1.9543683749556946, "learning_rate": 8.516137810835248e-06, "loss": 0.5789, "step": 2638 }, { "epoch": 0.27, "grad_norm": 2.2093630526599357, "learning_rate": 8.514940761339867e-06, "loss": 0.7268, "step": 2639 }, { "epoch": 0.27, "grad_norm": 2.2424846594153105, "learning_rate": 8.513743313405916e-06, "loss": 0.6877, "step": 2640 }, { "epoch": 0.27, "grad_norm": 4.710291182584347, "learning_rate": 8.512545467169133e-06, "loss": 0.6036, "step": 2641 }, { "epoch": 0.27, "grad_norm": 1.6276412226705352, "learning_rate": 8.5113472227653e-06, "loss": 0.517, "step": 2642 }, { "epoch": 0.27, "grad_norm": 1.802571808921564, "learning_rate": 8.510148580330246e-06, "loss": 0.5983, "step": 2643 }, { "epoch": 0.27, "grad_norm": 2.067739256881607, "learning_rate": 8.508949539999845e-06, "loss": 0.6387, "step": 2644 }, { "epoch": 0.27, "grad_norm": 1.7614511548342808, "learning_rate": 8.50775010191001e-06, "loss": 0.6034, "step": 2645 }, { "epoch": 0.28, "grad_norm": 1.852977153736663, "learning_rate": 8.50655026619671e-06, "loss": 0.6811, "step": 2646 }, { "epoch": 0.28, "grad_norm": 1.7666746273554579, "learning_rate": 8.505350032995946e-06, "loss": 0.662, "step": 2647 }, { "epoch": 0.28, "grad_norm": 1.9788840184781433, "learning_rate": 8.504149402443782e-06, "loss": 0.6714, "step": 2648 }, { "epoch": 0.28, "grad_norm": 1.763046798035941, "learning_rate": 8.502948374676307e-06, "loss": 0.6534, "step": 2649 }, { "epoch": 0.28, "grad_norm": 1.6872062703419626, "learning_rate": 8.501746949829668e-06, "loss": 0.6629, "step": 2650 }, { "epoch": 0.28, "grad_norm": 1.9880445367487545, "learning_rate": 8.500545128040052e-06, "loss": 0.6643, "step": 2651 }, { "epoch": 0.28, "grad_norm": 1.65755205744229, "learning_rate": 8.499342909443697e-06, "loss": 0.6829, "step": 2652 }, { "epoch": 0.28, "grad_norm": 1.9105964610284794, "learning_rate": 8.498140294176874e-06, "loss": 0.5986, "step": 2653 }, { "epoch": 0.28, "grad_norm": 1.7850157986482749, "learning_rate": 8.496937282375912e-06, "loss": 0.598, "step": 2654 }, { "epoch": 0.28, "grad_norm": 1.830316459639485, "learning_rate": 8.495733874177176e-06, "loss": 0.6988, "step": 2655 }, { "epoch": 0.28, "grad_norm": 1.8863090070314195, "learning_rate": 8.49453006971708e-06, "loss": 0.6376, "step": 2656 }, { "epoch": 0.28, "grad_norm": 1.9381452812775855, "learning_rate": 8.493325869132083e-06, "loss": 0.5406, "step": 2657 }, { "epoch": 0.28, "grad_norm": 1.9249092805322674, "learning_rate": 8.492121272558687e-06, "loss": 0.6696, "step": 2658 }, { "epoch": 0.28, "grad_norm": 2.110242213687224, "learning_rate": 8.49091628013344e-06, "loss": 0.7029, "step": 2659 }, { "epoch": 0.28, "grad_norm": 1.797032481255268, "learning_rate": 8.489710891992938e-06, "loss": 0.8027, "step": 2660 }, { "epoch": 0.28, "grad_norm": 2.070668589376598, "learning_rate": 8.488505108273813e-06, "loss": 0.7835, "step": 2661 }, { "epoch": 0.28, "grad_norm": 1.8527254181639246, "learning_rate": 8.487298929112751e-06, "loss": 0.5424, "step": 2662 }, { "epoch": 0.28, "grad_norm": 1.9530291879277955, "learning_rate": 8.486092354646478e-06, "loss": 0.6221, "step": 2663 }, { "epoch": 0.28, "grad_norm": 2.0275784280967564, "learning_rate": 8.484885385011765e-06, "loss": 0.6157, "step": 2664 }, { "epoch": 0.28, "grad_norm": 1.7405853747938145, "learning_rate": 8.483678020345433e-06, "loss": 0.6301, "step": 2665 }, { "epoch": 0.28, "grad_norm": 2.249414015385447, "learning_rate": 8.48247026078434e-06, "loss": 0.7239, "step": 2666 }, { "epoch": 0.28, "grad_norm": 1.8456342897721276, "learning_rate": 8.481262106465395e-06, "loss": 0.6632, "step": 2667 }, { "epoch": 0.28, "grad_norm": 1.9602153469722, "learning_rate": 8.480053557525544e-06, "loss": 0.795, "step": 2668 }, { "epoch": 0.28, "grad_norm": 1.969295867379851, "learning_rate": 8.478844614101792e-06, "loss": 0.6968, "step": 2669 }, { "epoch": 0.28, "grad_norm": 1.966399979210272, "learning_rate": 8.47763527633117e-06, "loss": 0.7567, "step": 2670 }, { "epoch": 0.28, "grad_norm": 1.9079604001646893, "learning_rate": 8.476425544350768e-06, "loss": 0.6965, "step": 2671 }, { "epoch": 0.28, "grad_norm": 1.8146309399032956, "learning_rate": 8.475215418297718e-06, "loss": 0.6707, "step": 2672 }, { "epoch": 0.28, "grad_norm": 1.6768376922133157, "learning_rate": 8.47400489830919e-06, "loss": 0.6126, "step": 2673 }, { "epoch": 0.28, "grad_norm": 1.9499300869990017, "learning_rate": 8.472793984522406e-06, "loss": 0.5496, "step": 2674 }, { "epoch": 0.28, "grad_norm": 2.1684600034276653, "learning_rate": 8.47158267707463e-06, "loss": 0.6461, "step": 2675 }, { "epoch": 0.28, "grad_norm": 1.817694567431662, "learning_rate": 8.470370976103171e-06, "loss": 0.5364, "step": 2676 }, { "epoch": 0.28, "grad_norm": 2.217641057731939, "learning_rate": 8.46915888174538e-06, "loss": 0.689, "step": 2677 }, { "epoch": 0.28, "grad_norm": 1.8021492176632457, "learning_rate": 8.467946394138657e-06, "loss": 0.6299, "step": 2678 }, { "epoch": 0.28, "grad_norm": 1.9107035431154975, "learning_rate": 8.466733513420442e-06, "loss": 0.5893, "step": 2679 }, { "epoch": 0.28, "grad_norm": 1.8742357813979735, "learning_rate": 8.465520239728225e-06, "loss": 0.6131, "step": 2680 }, { "epoch": 0.28, "grad_norm": 1.9727390371432398, "learning_rate": 8.464306573199536e-06, "loss": 0.6842, "step": 2681 }, { "epoch": 0.28, "grad_norm": 1.7240250249963895, "learning_rate": 8.46309251397195e-06, "loss": 0.6465, "step": 2682 }, { "epoch": 0.28, "grad_norm": 1.9189298287066967, "learning_rate": 8.461878062183092e-06, "loss": 0.6417, "step": 2683 }, { "epoch": 0.28, "grad_norm": 1.800900644472255, "learning_rate": 8.46066321797062e-06, "loss": 0.7715, "step": 2684 }, { "epoch": 0.28, "grad_norm": 1.8212217697731836, "learning_rate": 8.459447981472249e-06, "loss": 0.7056, "step": 2685 }, { "epoch": 0.28, "grad_norm": 1.9752112975069571, "learning_rate": 8.45823235282573e-06, "loss": 0.6689, "step": 2686 }, { "epoch": 0.28, "grad_norm": 1.9746070435343586, "learning_rate": 8.457016332168862e-06, "loss": 0.7456, "step": 2687 }, { "epoch": 0.28, "grad_norm": 3.432989926354537, "learning_rate": 8.455799919639489e-06, "loss": 0.593, "step": 2688 }, { "epoch": 0.28, "grad_norm": 1.982266556212586, "learning_rate": 8.454583115375498e-06, "loss": 0.6421, "step": 2689 }, { "epoch": 0.28, "grad_norm": 1.7985726638165644, "learning_rate": 8.45336591951482e-06, "loss": 0.5681, "step": 2690 }, { "epoch": 0.28, "grad_norm": 2.1677613588933418, "learning_rate": 8.452148332195434e-06, "loss": 0.7061, "step": 2691 }, { "epoch": 0.28, "grad_norm": 1.8779569941032115, "learning_rate": 8.450930353555355e-06, "loss": 0.6452, "step": 2692 }, { "epoch": 0.28, "grad_norm": 1.8980254003942776, "learning_rate": 8.449711983732652e-06, "loss": 0.7254, "step": 2693 }, { "epoch": 0.28, "grad_norm": 1.7453475241560552, "learning_rate": 8.448493222865432e-06, "loss": 0.6741, "step": 2694 }, { "epoch": 0.28, "grad_norm": 1.8262450441941662, "learning_rate": 8.447274071091848e-06, "loss": 0.7417, "step": 2695 }, { "epoch": 0.28, "grad_norm": 1.8776699685839715, "learning_rate": 8.446054528550104e-06, "loss": 0.6622, "step": 2696 }, { "epoch": 0.28, "grad_norm": 2.0960061725496963, "learning_rate": 8.444834595378434e-06, "loss": 0.6906, "step": 2697 }, { "epoch": 0.28, "grad_norm": 1.9118518648617624, "learning_rate": 8.443614271715128e-06, "loss": 0.6912, "step": 2698 }, { "epoch": 0.28, "grad_norm": 1.9194363297045363, "learning_rate": 8.442393557698517e-06, "loss": 0.5579, "step": 2699 }, { "epoch": 0.28, "grad_norm": 1.9081413101253448, "learning_rate": 8.441172453466974e-06, "loss": 0.6608, "step": 2700 }, { "epoch": 0.28, "grad_norm": 1.9249403721722926, "learning_rate": 8.43995095915892e-06, "loss": 0.7314, "step": 2701 }, { "epoch": 0.28, "grad_norm": 1.9096647611073485, "learning_rate": 8.438729074912819e-06, "loss": 0.5881, "step": 2702 }, { "epoch": 0.28, "grad_norm": 2.1034642501676415, "learning_rate": 8.437506800867176e-06, "loss": 0.7437, "step": 2703 }, { "epoch": 0.28, "grad_norm": 2.0906568978132567, "learning_rate": 8.436284137160544e-06, "loss": 0.685, "step": 2704 }, { "epoch": 0.28, "grad_norm": 1.7856893381417736, "learning_rate": 8.435061083931519e-06, "loss": 0.6323, "step": 2705 }, { "epoch": 0.28, "grad_norm": 1.9666249344130147, "learning_rate": 8.433837641318741e-06, "loss": 0.6732, "step": 2706 }, { "epoch": 0.28, "grad_norm": 2.122621535668428, "learning_rate": 8.432613809460895e-06, "loss": 0.6447, "step": 2707 }, { "epoch": 0.28, "grad_norm": 2.0054957981285253, "learning_rate": 8.431389588496708e-06, "loss": 0.6584, "step": 2708 }, { "epoch": 0.28, "grad_norm": 1.9393041610845445, "learning_rate": 8.430164978564952e-06, "loss": 0.6929, "step": 2709 }, { "epoch": 0.28, "grad_norm": 1.8108812484917312, "learning_rate": 8.428939979804445e-06, "loss": 0.6626, "step": 2710 }, { "epoch": 0.28, "grad_norm": 2.021521385977441, "learning_rate": 8.427714592354046e-06, "loss": 0.7671, "step": 2711 }, { "epoch": 0.28, "grad_norm": 2.0810704085129377, "learning_rate": 8.426488816352662e-06, "loss": 0.7076, "step": 2712 }, { "epoch": 0.28, "grad_norm": 1.9862619828862162, "learning_rate": 8.425262651939238e-06, "loss": 0.669, "step": 2713 }, { "epoch": 0.28, "grad_norm": 1.951010662132229, "learning_rate": 8.424036099252772e-06, "loss": 0.6356, "step": 2714 }, { "epoch": 0.28, "grad_norm": 1.9863665255721001, "learning_rate": 8.422809158432296e-06, "loss": 0.6026, "step": 2715 }, { "epoch": 0.28, "grad_norm": 1.6639586057276734, "learning_rate": 8.421581829616893e-06, "loss": 0.6527, "step": 2716 }, { "epoch": 0.28, "grad_norm": 2.103415737649651, "learning_rate": 8.42035411294569e-06, "loss": 0.5964, "step": 2717 }, { "epoch": 0.28, "grad_norm": 1.5707603548039, "learning_rate": 8.41912600855785e-06, "loss": 0.6068, "step": 2718 }, { "epoch": 0.28, "grad_norm": 1.8106080608902415, "learning_rate": 8.417897516592589e-06, "loss": 0.6864, "step": 2719 }, { "epoch": 0.28, "grad_norm": 2.1259873253643327, "learning_rate": 8.416668637189162e-06, "loss": 0.7358, "step": 2720 }, { "epoch": 0.28, "grad_norm": 2.083747722616653, "learning_rate": 8.415439370486872e-06, "loss": 0.6944, "step": 2721 }, { "epoch": 0.28, "grad_norm": 2.063196434811401, "learning_rate": 8.414209716625062e-06, "loss": 0.741, "step": 2722 }, { "epoch": 0.28, "grad_norm": 1.7907869386100392, "learning_rate": 8.41297967574312e-06, "loss": 0.7097, "step": 2723 }, { "epoch": 0.28, "grad_norm": 1.74621161890205, "learning_rate": 8.411749247980478e-06, "loss": 0.6878, "step": 2724 }, { "epoch": 0.28, "grad_norm": 1.7515260020420864, "learning_rate": 8.410518433476613e-06, "loss": 0.7515, "step": 2725 }, { "epoch": 0.28, "grad_norm": 1.9016574392369556, "learning_rate": 8.409287232371043e-06, "loss": 0.5963, "step": 2726 }, { "epoch": 0.28, "grad_norm": 1.8126661652045508, "learning_rate": 8.408055644803335e-06, "loss": 0.6511, "step": 2727 }, { "epoch": 0.28, "grad_norm": 1.8555841277858098, "learning_rate": 8.406823670913093e-06, "loss": 0.5926, "step": 2728 }, { "epoch": 0.28, "grad_norm": 2.0801913638478475, "learning_rate": 8.405591310839972e-06, "loss": 0.7424, "step": 2729 }, { "epoch": 0.28, "grad_norm": 1.8777432597286765, "learning_rate": 8.404358564723663e-06, "loss": 0.7011, "step": 2730 }, { "epoch": 0.28, "grad_norm": 1.8320426461813335, "learning_rate": 8.403125432703904e-06, "loss": 0.6469, "step": 2731 }, { "epoch": 0.28, "grad_norm": 1.8676234472685271, "learning_rate": 8.401891914920483e-06, "loss": 0.7347, "step": 2732 }, { "epoch": 0.28, "grad_norm": 2.1091699541707456, "learning_rate": 8.400658011513223e-06, "loss": 0.6774, "step": 2733 }, { "epoch": 0.28, "grad_norm": 1.778219564489563, "learning_rate": 8.399423722621994e-06, "loss": 0.708, "step": 2734 }, { "epoch": 0.28, "grad_norm": 1.791116263128344, "learning_rate": 8.398189048386708e-06, "loss": 0.5954, "step": 2735 }, { "epoch": 0.28, "grad_norm": 1.9654234959498464, "learning_rate": 8.396953988947327e-06, "loss": 0.7262, "step": 2736 }, { "epoch": 0.28, "grad_norm": 1.923570947146556, "learning_rate": 8.39571854444385e-06, "loss": 0.6263, "step": 2737 }, { "epoch": 0.28, "grad_norm": 1.9762691203393106, "learning_rate": 8.394482715016318e-06, "loss": 0.7634, "step": 2738 }, { "epoch": 0.28, "grad_norm": 1.785056907642776, "learning_rate": 8.393246500804825e-06, "loss": 0.6711, "step": 2739 }, { "epoch": 0.28, "grad_norm": 1.92259639664194, "learning_rate": 8.3920099019495e-06, "loss": 0.6447, "step": 2740 }, { "epoch": 0.28, "grad_norm": 1.9325806794278027, "learning_rate": 8.390772918590517e-06, "loss": 0.6193, "step": 2741 }, { "epoch": 0.29, "grad_norm": 1.796897789122905, "learning_rate": 8.389535550868098e-06, "loss": 0.5864, "step": 2742 }, { "epoch": 0.29, "grad_norm": 1.5789258620706366, "learning_rate": 8.388297798922505e-06, "loss": 0.6275, "step": 2743 }, { "epoch": 0.29, "grad_norm": 1.9466379670977967, "learning_rate": 8.387059662894043e-06, "loss": 0.6843, "step": 2744 }, { "epoch": 0.29, "grad_norm": 1.7072700706186492, "learning_rate": 8.385821142923064e-06, "loss": 0.669, "step": 2745 }, { "epoch": 0.29, "grad_norm": 1.848227815607721, "learning_rate": 8.38458223914996e-06, "loss": 0.6242, "step": 2746 }, { "epoch": 0.29, "grad_norm": 1.8308049578888184, "learning_rate": 8.383342951715165e-06, "loss": 0.6314, "step": 2747 }, { "epoch": 0.29, "grad_norm": 2.1493743941726224, "learning_rate": 8.382103280759164e-06, "loss": 0.6465, "step": 2748 }, { "epoch": 0.29, "grad_norm": 1.9101945267736256, "learning_rate": 8.380863226422478e-06, "loss": 0.6816, "step": 2749 }, { "epoch": 0.29, "grad_norm": 1.9718539473467542, "learning_rate": 8.379622788845675e-06, "loss": 0.7033, "step": 2750 }, { "epoch": 0.29, "grad_norm": 2.038835831098079, "learning_rate": 8.378381968169368e-06, "loss": 0.6988, "step": 2751 }, { "epoch": 0.29, "grad_norm": 1.7900118328139933, "learning_rate": 8.377140764534206e-06, "loss": 0.5064, "step": 2752 }, { "epoch": 0.29, "grad_norm": 1.7774886330392579, "learning_rate": 8.37589917808089e-06, "loss": 0.6096, "step": 2753 }, { "epoch": 0.29, "grad_norm": 1.809054839530392, "learning_rate": 8.37465720895016e-06, "loss": 0.6773, "step": 2754 }, { "epoch": 0.29, "grad_norm": 1.5992093185602265, "learning_rate": 8.373414857282802e-06, "loss": 0.5207, "step": 2755 }, { "epoch": 0.29, "grad_norm": 2.010983378957128, "learning_rate": 8.372172123219639e-06, "loss": 0.7494, "step": 2756 }, { "epoch": 0.29, "grad_norm": 1.9938803891005408, "learning_rate": 8.370929006901547e-06, "loss": 0.5927, "step": 2757 }, { "epoch": 0.29, "grad_norm": 1.8945446825004, "learning_rate": 8.36968550846944e-06, "loss": 0.6086, "step": 2758 }, { "epoch": 0.29, "grad_norm": 1.9169728105853157, "learning_rate": 8.368441628064273e-06, "loss": 0.6921, "step": 2759 }, { "epoch": 0.29, "grad_norm": 1.9901330548989136, "learning_rate": 8.367197365827047e-06, "loss": 0.6753, "step": 2760 }, { "epoch": 0.29, "grad_norm": 2.0824344592889297, "learning_rate": 8.365952721898806e-06, "loss": 0.6964, "step": 2761 }, { "epoch": 0.29, "grad_norm": 2.1360108572284773, "learning_rate": 8.364707696420642e-06, "loss": 0.7696, "step": 2762 }, { "epoch": 0.29, "grad_norm": 1.6831167248161611, "learning_rate": 8.363462289533681e-06, "loss": 0.5605, "step": 2763 }, { "epoch": 0.29, "grad_norm": 1.989356336067944, "learning_rate": 8.362216501379096e-06, "loss": 0.7506, "step": 2764 }, { "epoch": 0.29, "grad_norm": 1.5598840637317084, "learning_rate": 8.360970332098111e-06, "loss": 0.6175, "step": 2765 }, { "epoch": 0.29, "grad_norm": 2.1096417150952536, "learning_rate": 8.359723781831978e-06, "loss": 0.7308, "step": 2766 }, { "epoch": 0.29, "grad_norm": 1.6011126729548892, "learning_rate": 8.358476850722007e-06, "loss": 0.6811, "step": 2767 }, { "epoch": 0.29, "grad_norm": 1.9464596799550342, "learning_rate": 8.357229538909542e-06, "loss": 0.663, "step": 2768 }, { "epoch": 0.29, "grad_norm": 1.9137397893266275, "learning_rate": 8.355981846535972e-06, "loss": 0.6147, "step": 2769 }, { "epoch": 0.29, "grad_norm": 1.764067948839555, "learning_rate": 8.354733773742734e-06, "loss": 0.6074, "step": 2770 }, { "epoch": 0.29, "grad_norm": 1.9148936860052028, "learning_rate": 8.353485320671298e-06, "loss": 0.7877, "step": 2771 }, { "epoch": 0.29, "grad_norm": 1.7112798128547464, "learning_rate": 8.352236487463188e-06, "loss": 0.5686, "step": 2772 }, { "epoch": 0.29, "grad_norm": 1.992101185683906, "learning_rate": 8.350987274259966e-06, "loss": 0.7142, "step": 2773 }, { "epoch": 0.29, "grad_norm": 1.8753999534127264, "learning_rate": 8.349737681203234e-06, "loss": 0.6127, "step": 2774 }, { "epoch": 0.29, "grad_norm": 2.142936199132452, "learning_rate": 8.348487708434644e-06, "loss": 0.6528, "step": 2775 }, { "epoch": 0.29, "grad_norm": 2.1341310375948317, "learning_rate": 8.347237356095888e-06, "loss": 0.6646, "step": 2776 }, { "epoch": 0.29, "grad_norm": 1.9282855617561891, "learning_rate": 8.3459866243287e-06, "loss": 0.7142, "step": 2777 }, { "epoch": 0.29, "grad_norm": 1.8331210754757201, "learning_rate": 8.344735513274853e-06, "loss": 0.7037, "step": 2778 }, { "epoch": 0.29, "grad_norm": 1.9595823974640136, "learning_rate": 8.343484023076175e-06, "loss": 0.6657, "step": 2779 }, { "epoch": 0.29, "grad_norm": 1.8681476948952542, "learning_rate": 8.342232153874521e-06, "loss": 0.6335, "step": 2780 }, { "epoch": 0.29, "grad_norm": 1.7368048649716803, "learning_rate": 8.340979905811805e-06, "loss": 0.6599, "step": 2781 }, { "epoch": 0.29, "grad_norm": 2.688572412587195, "learning_rate": 8.339727279029974e-06, "loss": 0.6573, "step": 2782 }, { "epoch": 0.29, "grad_norm": 1.7254670078388807, "learning_rate": 8.33847427367102e-06, "loss": 0.7033, "step": 2783 }, { "epoch": 0.29, "grad_norm": 1.7445016985559958, "learning_rate": 8.337220889876978e-06, "loss": 0.6888, "step": 2784 }, { "epoch": 0.29, "grad_norm": 1.8956447209405711, "learning_rate": 8.335967127789929e-06, "loss": 0.6261, "step": 2785 }, { "epoch": 0.29, "grad_norm": 1.8284382496111118, "learning_rate": 8.334712987551989e-06, "loss": 0.6161, "step": 2786 }, { "epoch": 0.29, "grad_norm": 1.828969267565181, "learning_rate": 8.333458469305324e-06, "loss": 0.7489, "step": 2787 }, { "epoch": 0.29, "grad_norm": 1.7603821492238025, "learning_rate": 8.332203573192143e-06, "loss": 0.5797, "step": 2788 }, { "epoch": 0.29, "grad_norm": 2.0787481786361366, "learning_rate": 8.330948299354694e-06, "loss": 0.6936, "step": 2789 }, { "epoch": 0.29, "grad_norm": 2.0677329033718657, "learning_rate": 8.329692647935269e-06, "loss": 0.6972, "step": 2790 }, { "epoch": 0.29, "grad_norm": 1.775462102298857, "learning_rate": 8.328436619076203e-06, "loss": 0.6864, "step": 2791 }, { "epoch": 0.29, "grad_norm": 2.0303699301163443, "learning_rate": 8.327180212919877e-06, "loss": 0.6823, "step": 2792 }, { "epoch": 0.29, "grad_norm": 2.210558036257578, "learning_rate": 8.32592342960871e-06, "loss": 0.7242, "step": 2793 }, { "epoch": 0.29, "grad_norm": 2.1828645450156468, "learning_rate": 8.324666269285161e-06, "loss": 0.6827, "step": 2794 }, { "epoch": 0.29, "grad_norm": 1.6089239490667053, "learning_rate": 8.323408732091743e-06, "loss": 0.5493, "step": 2795 }, { "epoch": 0.29, "grad_norm": 1.7779698846657657, "learning_rate": 8.322150818171002e-06, "loss": 0.6296, "step": 2796 }, { "epoch": 0.29, "grad_norm": 2.0643568991443635, "learning_rate": 8.32089252766553e-06, "loss": 0.5536, "step": 2797 }, { "epoch": 0.29, "grad_norm": 1.86510472363077, "learning_rate": 8.319633860717963e-06, "loss": 0.6896, "step": 2798 }, { "epoch": 0.29, "grad_norm": 1.9510095493605686, "learning_rate": 8.318374817470976e-06, "loss": 0.6448, "step": 2799 }, { "epoch": 0.29, "grad_norm": 1.885516169850448, "learning_rate": 8.317115398067289e-06, "loss": 0.669, "step": 2800 }, { "epoch": 0.29, "grad_norm": 2.2572097683864305, "learning_rate": 8.315855602649662e-06, "loss": 0.5818, "step": 2801 }, { "epoch": 0.29, "grad_norm": 1.759056075445115, "learning_rate": 8.314595431360906e-06, "loss": 0.6457, "step": 2802 }, { "epoch": 0.29, "grad_norm": 1.6425939825571452, "learning_rate": 8.313334884343866e-06, "loss": 0.5231, "step": 2803 }, { "epoch": 0.29, "grad_norm": 1.7130113252654398, "learning_rate": 8.31207396174143e-06, "loss": 0.7554, "step": 2804 }, { "epoch": 0.29, "grad_norm": 1.955914565838696, "learning_rate": 8.310812663696531e-06, "loss": 0.627, "step": 2805 }, { "epoch": 0.29, "grad_norm": 1.6720147103849268, "learning_rate": 8.309550990352146e-06, "loss": 0.6079, "step": 2806 }, { "epoch": 0.29, "grad_norm": 2.237069935203191, "learning_rate": 8.308288941851295e-06, "loss": 0.7222, "step": 2807 }, { "epoch": 0.29, "grad_norm": 1.8290522376346103, "learning_rate": 8.307026518337033e-06, "loss": 0.5718, "step": 2808 }, { "epoch": 0.29, "grad_norm": 1.8328742008169372, "learning_rate": 8.305763719952467e-06, "loss": 0.6836, "step": 2809 }, { "epoch": 0.29, "grad_norm": 2.0006170517993533, "learning_rate": 8.304500546840742e-06, "loss": 0.6471, "step": 2810 }, { "epoch": 0.29, "grad_norm": 1.863878297149142, "learning_rate": 8.303236999145044e-06, "loss": 0.7006, "step": 2811 }, { "epoch": 0.29, "grad_norm": 1.9753606075612662, "learning_rate": 8.301973077008604e-06, "loss": 0.6209, "step": 2812 }, { "epoch": 0.29, "grad_norm": 1.85924478525618, "learning_rate": 8.300708780574695e-06, "loss": 0.58, "step": 2813 }, { "epoch": 0.29, "grad_norm": 1.9617682288103584, "learning_rate": 8.299444109986631e-06, "loss": 0.6333, "step": 2814 }, { "epoch": 0.29, "grad_norm": 1.9932942836246554, "learning_rate": 8.298179065387774e-06, "loss": 0.6311, "step": 2815 }, { "epoch": 0.29, "grad_norm": 1.9582877206850164, "learning_rate": 8.29691364692152e-06, "loss": 0.6961, "step": 2816 }, { "epoch": 0.29, "grad_norm": 2.011029682430342, "learning_rate": 8.295647854731312e-06, "loss": 0.6541, "step": 2817 }, { "epoch": 0.29, "grad_norm": 1.9245441043938138, "learning_rate": 8.294381688960634e-06, "loss": 0.6018, "step": 2818 }, { "epoch": 0.29, "grad_norm": 2.1310980060323508, "learning_rate": 8.293115149753016e-06, "loss": 0.7579, "step": 2819 }, { "epoch": 0.29, "grad_norm": 1.775629952527209, "learning_rate": 8.291848237252025e-06, "loss": 0.6678, "step": 2820 }, { "epoch": 0.29, "grad_norm": 1.8880663986965478, "learning_rate": 8.290580951601272e-06, "loss": 0.5957, "step": 2821 }, { "epoch": 0.29, "grad_norm": 2.269045732221966, "learning_rate": 8.289313292944415e-06, "loss": 0.6567, "step": 2822 }, { "epoch": 0.29, "grad_norm": 2.157095157644829, "learning_rate": 8.288045261425146e-06, "loss": 0.7711, "step": 2823 }, { "epoch": 0.29, "grad_norm": 1.7717605727416954, "learning_rate": 8.286776857187205e-06, "loss": 0.5993, "step": 2824 }, { "epoch": 0.29, "grad_norm": 2.2377882664385167, "learning_rate": 8.285508080374376e-06, "loss": 0.6189, "step": 2825 }, { "epoch": 0.29, "grad_norm": 1.9421413495128026, "learning_rate": 8.284238931130476e-06, "loss": 0.6647, "step": 2826 }, { "epoch": 0.29, "grad_norm": 1.9791339729087056, "learning_rate": 8.282969409599375e-06, "loss": 0.7541, "step": 2827 }, { "epoch": 0.29, "grad_norm": 2.2188976160973986, "learning_rate": 8.28169951592498e-06, "loss": 0.6117, "step": 2828 }, { "epoch": 0.29, "grad_norm": 2.018124174549173, "learning_rate": 8.280429250251238e-06, "loss": 0.6828, "step": 2829 }, { "epoch": 0.29, "grad_norm": 1.9341218682789678, "learning_rate": 8.279158612722145e-06, "loss": 0.628, "step": 2830 }, { "epoch": 0.29, "grad_norm": 1.861973629599436, "learning_rate": 8.27788760348173e-06, "loss": 0.6198, "step": 2831 }, { "epoch": 0.29, "grad_norm": 1.7662295415002316, "learning_rate": 8.276616222674072e-06, "loss": 0.6158, "step": 2832 }, { "epoch": 0.29, "grad_norm": 1.7746613606468469, "learning_rate": 8.275344470443292e-06, "loss": 0.6234, "step": 2833 }, { "epoch": 0.29, "grad_norm": 1.77084820465646, "learning_rate": 8.274072346933544e-06, "loss": 0.6325, "step": 2834 }, { "epoch": 0.29, "grad_norm": 1.9053142262270337, "learning_rate": 8.272799852289036e-06, "loss": 0.5998, "step": 2835 }, { "epoch": 0.29, "grad_norm": 1.9878612490236216, "learning_rate": 8.27152698665401e-06, "loss": 0.8256, "step": 2836 }, { "epoch": 0.29, "grad_norm": 1.7506593303708355, "learning_rate": 8.270253750172754e-06, "loss": 0.6899, "step": 2837 }, { "epoch": 0.3, "grad_norm": 1.9372791674384715, "learning_rate": 8.268980142989594e-06, "loss": 0.6556, "step": 2838 }, { "epoch": 0.3, "grad_norm": 1.9580485272446944, "learning_rate": 8.267706165248901e-06, "loss": 0.6014, "step": 2839 }, { "epoch": 0.3, "grad_norm": 1.94607284887436, "learning_rate": 8.266431817095094e-06, "loss": 0.7343, "step": 2840 }, { "epoch": 0.3, "grad_norm": 2.0233162109195444, "learning_rate": 8.265157098672617e-06, "loss": 0.6424, "step": 2841 }, { "epoch": 0.3, "grad_norm": 1.7612601997749828, "learning_rate": 8.263882010125974e-06, "loss": 0.6468, "step": 2842 }, { "epoch": 0.3, "grad_norm": 1.8166003406613074, "learning_rate": 8.262606551599701e-06, "loss": 0.7198, "step": 2843 }, { "epoch": 0.3, "grad_norm": 2.1777514169067005, "learning_rate": 8.261330723238381e-06, "loss": 0.6666, "step": 2844 }, { "epoch": 0.3, "grad_norm": 1.8431539782484165, "learning_rate": 8.260054525186634e-06, "loss": 0.6959, "step": 2845 }, { "epoch": 0.3, "grad_norm": 1.9351049160946372, "learning_rate": 8.258777957589124e-06, "loss": 0.6634, "step": 2846 }, { "epoch": 0.3, "grad_norm": 1.785975155097984, "learning_rate": 8.257501020590557e-06, "loss": 0.7822, "step": 2847 }, { "epoch": 0.3, "grad_norm": 1.7172233034670437, "learning_rate": 8.256223714335685e-06, "loss": 0.593, "step": 2848 }, { "epoch": 0.3, "grad_norm": 1.8331082125147111, "learning_rate": 8.254946038969294e-06, "loss": 0.5986, "step": 2849 }, { "epoch": 0.3, "grad_norm": 1.5989269880200359, "learning_rate": 8.253667994636216e-06, "loss": 0.6494, "step": 2850 }, { "epoch": 0.3, "grad_norm": 1.8126460703093885, "learning_rate": 8.252389581481328e-06, "loss": 0.6088, "step": 2851 }, { "epoch": 0.3, "grad_norm": 1.7710013320024187, "learning_rate": 8.25111079964954e-06, "loss": 0.692, "step": 2852 }, { "epoch": 0.3, "grad_norm": 1.7261220654564688, "learning_rate": 8.249831649285813e-06, "loss": 0.6587, "step": 2853 }, { "epoch": 0.3, "grad_norm": 1.8275573012523363, "learning_rate": 8.248552130535146e-06, "loss": 0.6926, "step": 2854 }, { "epoch": 0.3, "grad_norm": 1.8146408797397333, "learning_rate": 8.247272243542579e-06, "loss": 0.603, "step": 2855 }, { "epoch": 0.3, "grad_norm": 1.9636780696037397, "learning_rate": 8.245991988453193e-06, "loss": 0.7721, "step": 2856 }, { "epoch": 0.3, "grad_norm": 1.7874387137159384, "learning_rate": 8.244711365412113e-06, "loss": 0.5659, "step": 2857 }, { "epoch": 0.3, "grad_norm": 1.836389067622902, "learning_rate": 8.243430374564507e-06, "loss": 0.6837, "step": 2858 }, { "epoch": 0.3, "grad_norm": 1.7888108908675027, "learning_rate": 8.242149016055582e-06, "loss": 0.5791, "step": 2859 }, { "epoch": 0.3, "grad_norm": 1.8335701181284372, "learning_rate": 8.240867290030585e-06, "loss": 0.6948, "step": 2860 }, { "epoch": 0.3, "grad_norm": 2.0901484654745426, "learning_rate": 8.239585196634808e-06, "loss": 0.748, "step": 2861 }, { "epoch": 0.3, "grad_norm": 1.7432176517498563, "learning_rate": 8.238302736013587e-06, "loss": 0.6321, "step": 2862 }, { "epoch": 0.3, "grad_norm": 1.8447676911357693, "learning_rate": 8.237019908312289e-06, "loss": 0.6296, "step": 2863 }, { "epoch": 0.3, "grad_norm": 1.6818923204848246, "learning_rate": 8.235736713676336e-06, "loss": 0.5814, "step": 2864 }, { "epoch": 0.3, "grad_norm": 1.8930984108194275, "learning_rate": 8.234453152251183e-06, "loss": 0.5862, "step": 2865 }, { "epoch": 0.3, "grad_norm": 1.7856506372753727, "learning_rate": 8.23316922418233e-06, "loss": 0.6485, "step": 2866 }, { "epoch": 0.3, "grad_norm": 1.7134171960426727, "learning_rate": 8.231884929615315e-06, "loss": 0.7412, "step": 2867 }, { "epoch": 0.3, "grad_norm": 2.034932618216676, "learning_rate": 8.230600268695724e-06, "loss": 0.8004, "step": 2868 }, { "epoch": 0.3, "grad_norm": 1.8280220472034374, "learning_rate": 8.229315241569177e-06, "loss": 0.6732, "step": 2869 }, { "epoch": 0.3, "grad_norm": 2.2761201014870887, "learning_rate": 8.228029848381343e-06, "loss": 0.6222, "step": 2870 }, { "epoch": 0.3, "grad_norm": 1.9435641548208933, "learning_rate": 8.226744089277927e-06, "loss": 0.706, "step": 2871 }, { "epoch": 0.3, "grad_norm": 1.7705164240793172, "learning_rate": 8.225457964404675e-06, "loss": 0.6072, "step": 2872 }, { "epoch": 0.3, "grad_norm": 1.7796138793949614, "learning_rate": 8.224171473907379e-06, "loss": 0.7511, "step": 2873 }, { "epoch": 0.3, "grad_norm": 2.294358735763917, "learning_rate": 8.222884617931868e-06, "loss": 0.7495, "step": 2874 }, { "epoch": 0.3, "grad_norm": 1.9779657166432165, "learning_rate": 8.221597396624017e-06, "loss": 0.6661, "step": 2875 }, { "epoch": 0.3, "grad_norm": 1.821430815356854, "learning_rate": 8.220309810129739e-06, "loss": 0.6018, "step": 2876 }, { "epoch": 0.3, "grad_norm": 1.9649085361169925, "learning_rate": 8.219021858594989e-06, "loss": 0.6216, "step": 2877 }, { "epoch": 0.3, "grad_norm": 1.9075400329966081, "learning_rate": 8.217733542165762e-06, "loss": 0.5262, "step": 2878 }, { "epoch": 0.3, "grad_norm": 1.787468247986847, "learning_rate": 8.216444860988098e-06, "loss": 0.7078, "step": 2879 }, { "epoch": 0.3, "grad_norm": 1.9202828017719724, "learning_rate": 8.215155815208075e-06, "loss": 0.6898, "step": 2880 }, { "epoch": 0.3, "grad_norm": 2.074321753545804, "learning_rate": 8.213866404971817e-06, "loss": 0.6434, "step": 2881 }, { "epoch": 0.3, "grad_norm": 1.9239815198105046, "learning_rate": 8.212576630425482e-06, "loss": 0.6708, "step": 2882 }, { "epoch": 0.3, "grad_norm": 2.0109810934562273, "learning_rate": 8.211286491715274e-06, "loss": 0.6282, "step": 2883 }, { "epoch": 0.3, "grad_norm": 1.877253484772073, "learning_rate": 8.20999598898744e-06, "loss": 0.5615, "step": 2884 }, { "epoch": 0.3, "grad_norm": 1.9652383152117894, "learning_rate": 8.208705122388263e-06, "loss": 0.6393, "step": 2885 }, { "epoch": 0.3, "grad_norm": 1.9491407973172676, "learning_rate": 8.207413892064073e-06, "loss": 0.6992, "step": 2886 }, { "epoch": 0.3, "grad_norm": 1.7003145780895765, "learning_rate": 8.206122298161236e-06, "loss": 0.5622, "step": 2887 }, { "epoch": 0.3, "grad_norm": 1.8707928859913363, "learning_rate": 8.204830340826161e-06, "loss": 0.751, "step": 2888 }, { "epoch": 0.3, "grad_norm": 1.9977017222580762, "learning_rate": 8.203538020205301e-06, "loss": 0.6936, "step": 2889 }, { "epoch": 0.3, "grad_norm": 1.7954445118532534, "learning_rate": 8.202245336445146e-06, "loss": 0.6332, "step": 2890 }, { "epoch": 0.3, "grad_norm": 2.1212523482878796, "learning_rate": 8.200952289692233e-06, "loss": 0.7295, "step": 2891 }, { "epoch": 0.3, "grad_norm": 1.9203803961045498, "learning_rate": 8.199658880093132e-06, "loss": 0.6838, "step": 2892 }, { "epoch": 0.3, "grad_norm": 1.9804119787254033, "learning_rate": 8.198365107794457e-06, "loss": 0.6464, "step": 2893 }, { "epoch": 0.3, "grad_norm": 1.7118907526474592, "learning_rate": 8.19707097294287e-06, "loss": 0.6199, "step": 2894 }, { "epoch": 0.3, "grad_norm": 1.8173439975730064, "learning_rate": 8.195776475685061e-06, "loss": 0.6636, "step": 2895 }, { "epoch": 0.3, "grad_norm": 1.9947503806681284, "learning_rate": 8.194481616167777e-06, "loss": 0.7348, "step": 2896 }, { "epoch": 0.3, "grad_norm": 2.0059450385174444, "learning_rate": 8.193186394537792e-06, "loss": 0.671, "step": 2897 }, { "epoch": 0.3, "grad_norm": 1.8995286372082676, "learning_rate": 8.191890810941932e-06, "loss": 0.646, "step": 2898 }, { "epoch": 0.3, "grad_norm": 1.6680630849647606, "learning_rate": 8.190594865527052e-06, "loss": 0.6015, "step": 2899 }, { "epoch": 0.3, "grad_norm": 2.0300903756694364, "learning_rate": 8.18929855844006e-06, "loss": 0.6532, "step": 2900 }, { "epoch": 0.3, "grad_norm": 2.03213398770465, "learning_rate": 8.188001889827897e-06, "loss": 0.745, "step": 2901 }, { "epoch": 0.3, "grad_norm": 1.9409976976712575, "learning_rate": 8.18670485983755e-06, "loss": 0.6487, "step": 2902 }, { "epoch": 0.3, "grad_norm": 2.0263585046810744, "learning_rate": 8.185407468616042e-06, "loss": 0.5663, "step": 2903 }, { "epoch": 0.3, "grad_norm": 2.003893682630391, "learning_rate": 8.18410971631044e-06, "loss": 0.7368, "step": 2904 }, { "epoch": 0.3, "grad_norm": 1.8199743964798658, "learning_rate": 8.182811603067855e-06, "loss": 0.6108, "step": 2905 }, { "epoch": 0.3, "grad_norm": 1.9114442676440553, "learning_rate": 8.181513129035434e-06, "loss": 0.7262, "step": 2906 }, { "epoch": 0.3, "grad_norm": 2.138825809398571, "learning_rate": 8.180214294360365e-06, "loss": 0.6708, "step": 2907 }, { "epoch": 0.3, "grad_norm": 1.7635191853249457, "learning_rate": 8.178915099189877e-06, "loss": 0.5041, "step": 2908 }, { "epoch": 0.3, "grad_norm": 1.7070240123779603, "learning_rate": 8.177615543671247e-06, "loss": 0.5458, "step": 2909 }, { "epoch": 0.3, "grad_norm": 1.9043026323384222, "learning_rate": 8.176315627951781e-06, "loss": 0.6708, "step": 2910 }, { "epoch": 0.3, "grad_norm": 1.8512788197178098, "learning_rate": 8.175015352178835e-06, "loss": 0.6638, "step": 2911 }, { "epoch": 0.3, "grad_norm": 1.69736575656168, "learning_rate": 8.173714716499801e-06, "loss": 0.6792, "step": 2912 }, { "epoch": 0.3, "grad_norm": 1.7299590678396837, "learning_rate": 8.172413721062115e-06, "loss": 0.576, "step": 2913 }, { "epoch": 0.3, "grad_norm": 1.8580558642183693, "learning_rate": 8.171112366013252e-06, "loss": 0.6543, "step": 2914 }, { "epoch": 0.3, "grad_norm": 1.8468439712054643, "learning_rate": 8.169810651500728e-06, "loss": 0.6622, "step": 2915 }, { "epoch": 0.3, "grad_norm": 1.8010109531550258, "learning_rate": 8.168508577672096e-06, "loss": 0.6466, "step": 2916 }, { "epoch": 0.3, "grad_norm": 1.9057592922152613, "learning_rate": 8.167206144674959e-06, "loss": 0.5711, "step": 2917 }, { "epoch": 0.3, "grad_norm": 2.050683545427632, "learning_rate": 8.165903352656954e-06, "loss": 0.6602, "step": 2918 }, { "epoch": 0.3, "grad_norm": 1.8054973854791319, "learning_rate": 8.164600201765758e-06, "loss": 0.6585, "step": 2919 }, { "epoch": 0.3, "grad_norm": 1.9761637090582729, "learning_rate": 8.163296692149093e-06, "loss": 0.6079, "step": 2920 }, { "epoch": 0.3, "grad_norm": 1.88880213967494, "learning_rate": 8.161992823954715e-06, "loss": 0.6007, "step": 2921 }, { "epoch": 0.3, "grad_norm": 1.8416518885541622, "learning_rate": 8.160688597330428e-06, "loss": 0.6713, "step": 2922 }, { "epoch": 0.3, "grad_norm": 1.873829102606367, "learning_rate": 8.159384012424074e-06, "loss": 0.6165, "step": 2923 }, { "epoch": 0.3, "grad_norm": 1.8160124127691164, "learning_rate": 8.158079069383535e-06, "loss": 0.7594, "step": 2924 }, { "epoch": 0.3, "grad_norm": 1.863086365003482, "learning_rate": 8.156773768356733e-06, "loss": 0.6637, "step": 2925 }, { "epoch": 0.3, "grad_norm": 1.6903193748478216, "learning_rate": 8.155468109491632e-06, "loss": 0.5883, "step": 2926 }, { "epoch": 0.3, "grad_norm": 1.9542532517773603, "learning_rate": 8.154162092936233e-06, "loss": 0.7481, "step": 2927 }, { "epoch": 0.3, "grad_norm": 1.772285860147228, "learning_rate": 8.152855718838583e-06, "loss": 0.5753, "step": 2928 }, { "epoch": 0.3, "grad_norm": 1.9175001165132863, "learning_rate": 8.151548987346768e-06, "loss": 0.6404, "step": 2929 }, { "epoch": 0.3, "grad_norm": 2.1361197261230918, "learning_rate": 8.150241898608911e-06, "loss": 0.7385, "step": 2930 }, { "epoch": 0.3, "grad_norm": 1.6543334650805261, "learning_rate": 8.14893445277318e-06, "loss": 0.5488, "step": 2931 }, { "epoch": 0.3, "grad_norm": 2.2819717034684923, "learning_rate": 8.147626649987779e-06, "loss": 0.5632, "step": 2932 }, { "epoch": 0.3, "grad_norm": 1.9251150848421499, "learning_rate": 8.146318490400958e-06, "loss": 0.73, "step": 2933 }, { "epoch": 0.3, "grad_norm": 1.9096484516775554, "learning_rate": 8.145009974161002e-06, "loss": 0.6355, "step": 2934 }, { "epoch": 0.31, "grad_norm": 2.014743746968432, "learning_rate": 8.14370110141624e-06, "loss": 0.604, "step": 2935 }, { "epoch": 0.31, "grad_norm": 1.7574927081153062, "learning_rate": 8.142391872315038e-06, "loss": 0.5701, "step": 2936 }, { "epoch": 0.31, "grad_norm": 1.7754012395866758, "learning_rate": 8.141082287005808e-06, "loss": 0.7174, "step": 2937 }, { "epoch": 0.31, "grad_norm": 1.7225274879562285, "learning_rate": 8.139772345636996e-06, "loss": 0.5452, "step": 2938 }, { "epoch": 0.31, "grad_norm": 1.88076198186382, "learning_rate": 8.138462048357093e-06, "loss": 0.6864, "step": 2939 }, { "epoch": 0.31, "grad_norm": 2.099299123705203, "learning_rate": 8.137151395314628e-06, "loss": 0.6782, "step": 2940 }, { "epoch": 0.31, "grad_norm": 1.8633877364101679, "learning_rate": 8.13584038665817e-06, "loss": 0.6702, "step": 2941 }, { "epoch": 0.31, "grad_norm": 1.7629071756590513, "learning_rate": 8.134529022536332e-06, "loss": 0.6396, "step": 2942 }, { "epoch": 0.31, "grad_norm": 1.8434231548356848, "learning_rate": 8.133217303097764e-06, "loss": 0.7665, "step": 2943 }, { "epoch": 0.31, "grad_norm": 2.0000437586736637, "learning_rate": 8.131905228491155e-06, "loss": 0.655, "step": 2944 }, { "epoch": 0.31, "grad_norm": 2.0562680118666377, "learning_rate": 8.130592798865237e-06, "loss": 0.623, "step": 2945 }, { "epoch": 0.31, "grad_norm": 1.8727560280886, "learning_rate": 8.129280014368781e-06, "loss": 0.5804, "step": 2946 }, { "epoch": 0.31, "grad_norm": 1.7842157138360704, "learning_rate": 8.1279668751506e-06, "loss": 0.7247, "step": 2947 }, { "epoch": 0.31, "grad_norm": 1.8652448850439833, "learning_rate": 8.126653381359543e-06, "loss": 0.6968, "step": 2948 }, { "epoch": 0.31, "grad_norm": 2.2497973252323638, "learning_rate": 8.125339533144507e-06, "loss": 0.6329, "step": 2949 }, { "epoch": 0.31, "grad_norm": 1.7867873852205007, "learning_rate": 8.12402533065442e-06, "loss": 0.5789, "step": 2950 }, { "epoch": 0.31, "grad_norm": 2.077399777170234, "learning_rate": 8.122710774038253e-06, "loss": 0.6199, "step": 2951 }, { "epoch": 0.31, "grad_norm": 1.9012648354483244, "learning_rate": 8.121395863445023e-06, "loss": 0.7289, "step": 2952 }, { "epoch": 0.31, "grad_norm": 1.824018411625984, "learning_rate": 8.120080599023781e-06, "loss": 0.6642, "step": 2953 }, { "epoch": 0.31, "grad_norm": 1.9300181390182036, "learning_rate": 8.118764980923619e-06, "loss": 0.6938, "step": 2954 }, { "epoch": 0.31, "grad_norm": 1.8854008336004575, "learning_rate": 8.117449009293668e-06, "loss": 0.6819, "step": 2955 }, { "epoch": 0.31, "grad_norm": 1.966959364350495, "learning_rate": 8.116132684283104e-06, "loss": 0.6794, "step": 2956 }, { "epoch": 0.31, "grad_norm": 1.905465999690345, "learning_rate": 8.11481600604114e-06, "loss": 0.6242, "step": 2957 }, { "epoch": 0.31, "grad_norm": 1.945562288125105, "learning_rate": 8.113498974717027e-06, "loss": 0.5482, "step": 2958 }, { "epoch": 0.31, "grad_norm": 2.086970708973281, "learning_rate": 8.11218159046006e-06, "loss": 0.7742, "step": 2959 }, { "epoch": 0.31, "grad_norm": 1.8443539974736436, "learning_rate": 8.110863853419568e-06, "loss": 0.7127, "step": 2960 }, { "epoch": 0.31, "grad_norm": 2.890393352231358, "learning_rate": 8.10954576374493e-06, "loss": 0.6969, "step": 2961 }, { "epoch": 0.31, "grad_norm": 2.033669898074698, "learning_rate": 8.108227321585554e-06, "loss": 0.6983, "step": 2962 }, { "epoch": 0.31, "grad_norm": 1.9019326562773995, "learning_rate": 8.106908527090895e-06, "loss": 0.6637, "step": 2963 }, { "epoch": 0.31, "grad_norm": 1.9436938800459622, "learning_rate": 8.105589380410448e-06, "loss": 0.6138, "step": 2964 }, { "epoch": 0.31, "grad_norm": 1.931517898910024, "learning_rate": 8.10426988169374e-06, "loss": 0.6288, "step": 2965 }, { "epoch": 0.31, "grad_norm": 2.01533596468131, "learning_rate": 8.10295003109035e-06, "loss": 0.6901, "step": 2966 }, { "epoch": 0.31, "grad_norm": 1.9307107562996824, "learning_rate": 8.101629828749887e-06, "loss": 0.6474, "step": 2967 }, { "epoch": 0.31, "grad_norm": 1.7937486278251964, "learning_rate": 8.100309274822002e-06, "loss": 0.6022, "step": 2968 }, { "epoch": 0.31, "grad_norm": 1.8036146364537236, "learning_rate": 8.098988369456392e-06, "loss": 0.6685, "step": 2969 }, { "epoch": 0.31, "grad_norm": 1.8502562122591037, "learning_rate": 8.097667112802784e-06, "loss": 0.7181, "step": 2970 }, { "epoch": 0.31, "grad_norm": 1.8619088903053767, "learning_rate": 8.096345505010956e-06, "loss": 0.6382, "step": 2971 }, { "epoch": 0.31, "grad_norm": 2.0072848292508727, "learning_rate": 8.095023546230715e-06, "loss": 0.6269, "step": 2972 }, { "epoch": 0.31, "grad_norm": 2.0408515277535035, "learning_rate": 8.093701236611914e-06, "loss": 0.7009, "step": 2973 }, { "epoch": 0.31, "grad_norm": 1.8022765607025482, "learning_rate": 8.092378576304443e-06, "loss": 0.6222, "step": 2974 }, { "epoch": 0.31, "grad_norm": 2.1356198828409583, "learning_rate": 8.091055565458236e-06, "loss": 0.8086, "step": 2975 }, { "epoch": 0.31, "grad_norm": 1.7900732105124129, "learning_rate": 8.089732204223263e-06, "loss": 0.6219, "step": 2976 }, { "epoch": 0.31, "grad_norm": 1.9052543682026897, "learning_rate": 8.088408492749534e-06, "loss": 0.6403, "step": 2977 }, { "epoch": 0.31, "grad_norm": 1.831209876851521, "learning_rate": 8.087084431187096e-06, "loss": 0.6012, "step": 2978 }, { "epoch": 0.31, "grad_norm": 2.015018382951914, "learning_rate": 8.085760019686044e-06, "loss": 0.7143, "step": 2979 }, { "epoch": 0.31, "grad_norm": 1.990275012744501, "learning_rate": 8.084435258396504e-06, "loss": 0.6116, "step": 2980 }, { "epoch": 0.31, "grad_norm": 1.7622362259074167, "learning_rate": 8.08311014746865e-06, "loss": 0.62, "step": 2981 }, { "epoch": 0.31, "grad_norm": 2.102214569940529, "learning_rate": 8.081784687052683e-06, "loss": 0.5395, "step": 2982 }, { "epoch": 0.31, "grad_norm": 1.8920824650503145, "learning_rate": 8.080458877298861e-06, "loss": 0.6173, "step": 2983 }, { "epoch": 0.31, "grad_norm": 1.825716574244372, "learning_rate": 8.079132718357465e-06, "loss": 0.715, "step": 2984 }, { "epoch": 0.31, "grad_norm": 1.8137038198264506, "learning_rate": 8.077806210378824e-06, "loss": 0.6763, "step": 2985 }, { "epoch": 0.31, "grad_norm": 1.9100852008920197, "learning_rate": 8.076479353513308e-06, "loss": 0.7405, "step": 2986 }, { "epoch": 0.31, "grad_norm": 2.008643615507193, "learning_rate": 8.07515214791132e-06, "loss": 0.6724, "step": 2987 }, { "epoch": 0.31, "grad_norm": 1.9322056861892911, "learning_rate": 8.073824593723309e-06, "loss": 0.7223, "step": 2988 }, { "epoch": 0.31, "grad_norm": 1.9330171400421932, "learning_rate": 8.07249669109976e-06, "loss": 0.6259, "step": 2989 }, { "epoch": 0.31, "grad_norm": 2.132240605171131, "learning_rate": 8.071168440191199e-06, "loss": 0.6683, "step": 2990 }, { "epoch": 0.31, "grad_norm": 1.71224736007814, "learning_rate": 8.06983984114819e-06, "loss": 0.6645, "step": 2991 }, { "epoch": 0.31, "grad_norm": 2.1475200177604536, "learning_rate": 8.068510894121338e-06, "loss": 0.7044, "step": 2992 }, { "epoch": 0.31, "grad_norm": 1.8431375374949153, "learning_rate": 8.067181599261285e-06, "loss": 0.5972, "step": 2993 }, { "epoch": 0.31, "grad_norm": 1.823307176827159, "learning_rate": 8.065851956718716e-06, "loss": 0.6707, "step": 2994 }, { "epoch": 0.31, "grad_norm": 1.9937691220905867, "learning_rate": 8.064521966644351e-06, "loss": 0.6577, "step": 2995 }, { "epoch": 0.31, "grad_norm": 1.7340263444241693, "learning_rate": 8.063191629188958e-06, "loss": 0.5489, "step": 2996 }, { "epoch": 0.31, "grad_norm": 1.9549885348959344, "learning_rate": 8.06186094450333e-06, "loss": 0.7293, "step": 2997 }, { "epoch": 0.31, "grad_norm": 2.759727563771705, "learning_rate": 8.060529912738316e-06, "loss": 0.7183, "step": 2998 }, { "epoch": 0.31, "grad_norm": 1.8394811728272813, "learning_rate": 8.05919853404479e-06, "loss": 0.6787, "step": 2999 }, { "epoch": 0.31, "grad_norm": 1.8405697026850263, "learning_rate": 8.057866808573672e-06, "loss": 0.6668, "step": 3000 }, { "epoch": 0.31, "grad_norm": 1.8378753932405179, "learning_rate": 8.056534736475923e-06, "loss": 0.6546, "step": 3001 }, { "epoch": 0.31, "grad_norm": 1.7713096886810131, "learning_rate": 8.05520231790254e-06, "loss": 0.7496, "step": 3002 }, { "epoch": 0.31, "grad_norm": 1.8134475570918778, "learning_rate": 8.053869553004561e-06, "loss": 0.6435, "step": 3003 }, { "epoch": 0.31, "grad_norm": 1.847138795743933, "learning_rate": 8.052536441933062e-06, "loss": 0.626, "step": 3004 }, { "epoch": 0.31, "grad_norm": 1.8868783461313385, "learning_rate": 8.051202984839157e-06, "loss": 0.6443, "step": 3005 }, { "epoch": 0.31, "grad_norm": 1.6274506637183248, "learning_rate": 8.049869181874002e-06, "loss": 0.6456, "step": 3006 }, { "epoch": 0.31, "grad_norm": 1.7934457191553317, "learning_rate": 8.048535033188794e-06, "loss": 0.5856, "step": 3007 }, { "epoch": 0.31, "grad_norm": 1.925470936240325, "learning_rate": 8.04720053893476e-06, "loss": 0.7103, "step": 3008 }, { "epoch": 0.31, "grad_norm": 2.0746079297930913, "learning_rate": 8.04586569926318e-06, "loss": 0.8232, "step": 3009 }, { "epoch": 0.31, "grad_norm": 1.8229354357781145, "learning_rate": 8.04453051432536e-06, "loss": 0.5973, "step": 3010 }, { "epoch": 0.31, "grad_norm": 2.17337743919196, "learning_rate": 8.043194984272656e-06, "loss": 0.7325, "step": 3011 }, { "epoch": 0.31, "grad_norm": 1.8513812475733578, "learning_rate": 8.041859109256452e-06, "loss": 0.6306, "step": 3012 }, { "epoch": 0.31, "grad_norm": 2.117685022304302, "learning_rate": 8.04052288942818e-06, "loss": 0.7018, "step": 3013 }, { "epoch": 0.31, "grad_norm": 1.8467774888427202, "learning_rate": 8.03918632493931e-06, "loss": 0.6535, "step": 3014 }, { "epoch": 0.31, "grad_norm": 1.841308511882857, "learning_rate": 8.037849415941346e-06, "loss": 0.5757, "step": 3015 }, { "epoch": 0.31, "grad_norm": 1.971472468230098, "learning_rate": 8.036512162585834e-06, "loss": 0.641, "step": 3016 }, { "epoch": 0.31, "grad_norm": 2.0178646419829653, "learning_rate": 8.035174565024362e-06, "loss": 0.671, "step": 3017 }, { "epoch": 0.31, "grad_norm": 1.6146581959501902, "learning_rate": 8.033836623408556e-06, "loss": 0.6094, "step": 3018 }, { "epoch": 0.31, "grad_norm": 1.873134384365632, "learning_rate": 8.032498337890073e-06, "loss": 0.7449, "step": 3019 }, { "epoch": 0.31, "grad_norm": 2.0365935522496765, "learning_rate": 8.03115970862062e-06, "loss": 0.6673, "step": 3020 }, { "epoch": 0.31, "grad_norm": 1.8359556335408065, "learning_rate": 8.029820735751936e-06, "loss": 0.5654, "step": 3021 }, { "epoch": 0.31, "grad_norm": 1.7270952911249733, "learning_rate": 8.028481419435803e-06, "loss": 0.6331, "step": 3022 }, { "epoch": 0.31, "grad_norm": 2.2156910867047936, "learning_rate": 8.02714175982404e-06, "loss": 0.7084, "step": 3023 }, { "epoch": 0.31, "grad_norm": 1.995649963769197, "learning_rate": 8.025801757068504e-06, "loss": 0.6732, "step": 3024 }, { "epoch": 0.31, "grad_norm": 1.9708209577525355, "learning_rate": 8.024461411321092e-06, "loss": 0.6283, "step": 3025 }, { "epoch": 0.31, "grad_norm": 1.7503337006314423, "learning_rate": 8.02312072273374e-06, "loss": 0.6433, "step": 3026 }, { "epoch": 0.31, "grad_norm": 1.9351669006196723, "learning_rate": 8.021779691458422e-06, "loss": 0.6875, "step": 3027 }, { "epoch": 0.31, "grad_norm": 1.654807381732136, "learning_rate": 8.020438317647155e-06, "loss": 0.6628, "step": 3028 }, { "epoch": 0.31, "grad_norm": 1.8618456773903507, "learning_rate": 8.019096601451987e-06, "loss": 0.6953, "step": 3029 }, { "epoch": 0.31, "grad_norm": 1.9551349205252637, "learning_rate": 8.017754543025012e-06, "loss": 0.6496, "step": 3030 }, { "epoch": 0.32, "grad_norm": 1.780040566722473, "learning_rate": 8.016412142518356e-06, "loss": 0.5778, "step": 3031 }, { "epoch": 0.32, "grad_norm": 1.8404946597473568, "learning_rate": 8.015069400084194e-06, "loss": 0.7011, "step": 3032 }, { "epoch": 0.32, "grad_norm": 2.0243422271373612, "learning_rate": 8.013726315874729e-06, "loss": 0.579, "step": 3033 }, { "epoch": 0.32, "grad_norm": 1.7886309414150088, "learning_rate": 8.012382890042208e-06, "loss": 0.6459, "step": 3034 }, { "epoch": 0.32, "grad_norm": 1.8805496619276878, "learning_rate": 8.011039122738918e-06, "loss": 0.6622, "step": 3035 }, { "epoch": 0.32, "grad_norm": 1.839648694046673, "learning_rate": 8.00969501411718e-06, "loss": 0.6199, "step": 3036 }, { "epoch": 0.32, "grad_norm": 2.397442942159792, "learning_rate": 8.008350564329356e-06, "loss": 0.6051, "step": 3037 }, { "epoch": 0.32, "grad_norm": 2.031673857387813, "learning_rate": 8.00700577352785e-06, "loss": 0.6209, "step": 3038 }, { "epoch": 0.32, "grad_norm": 1.6669416983842271, "learning_rate": 8.0056606418651e-06, "loss": 0.6872, "step": 3039 }, { "epoch": 0.32, "grad_norm": 1.8867515977043507, "learning_rate": 8.004315169493586e-06, "loss": 0.5904, "step": 3040 }, { "epoch": 0.32, "grad_norm": 1.755108581811021, "learning_rate": 8.002969356565822e-06, "loss": 0.7178, "step": 3041 }, { "epoch": 0.32, "grad_norm": 1.955755740139808, "learning_rate": 8.001623203234366e-06, "loss": 0.6451, "step": 3042 }, { "epoch": 0.32, "grad_norm": 1.7756454101534862, "learning_rate": 8.00027670965181e-06, "loss": 0.6337, "step": 3043 }, { "epoch": 0.32, "grad_norm": 1.8908982993281924, "learning_rate": 7.998929875970788e-06, "loss": 0.7074, "step": 3044 }, { "epoch": 0.32, "grad_norm": 1.8663129056512406, "learning_rate": 7.99758270234397e-06, "loss": 0.5291, "step": 3045 }, { "epoch": 0.32, "grad_norm": 1.701969315380072, "learning_rate": 7.99623518892407e-06, "loss": 0.6038, "step": 3046 }, { "epoch": 0.32, "grad_norm": 2.104064850307174, "learning_rate": 7.994887335863832e-06, "loss": 0.7409, "step": 3047 }, { "epoch": 0.32, "grad_norm": 1.8828855956551462, "learning_rate": 7.993539143316044e-06, "loss": 0.6762, "step": 3048 }, { "epoch": 0.32, "grad_norm": 1.7104446736903443, "learning_rate": 7.992190611433532e-06, "loss": 0.5661, "step": 3049 }, { "epoch": 0.32, "grad_norm": 2.07219151701992, "learning_rate": 7.99084174036916e-06, "loss": 0.69, "step": 3050 }, { "epoch": 0.32, "grad_norm": 1.9078394195572277, "learning_rate": 7.989492530275829e-06, "loss": 0.767, "step": 3051 }, { "epoch": 0.32, "grad_norm": 1.8675014184687138, "learning_rate": 7.988142981306479e-06, "loss": 0.615, "step": 3052 }, { "epoch": 0.32, "grad_norm": 1.713066568608695, "learning_rate": 7.98679309361409e-06, "loss": 0.6929, "step": 3053 }, { "epoch": 0.32, "grad_norm": 1.9366046584274337, "learning_rate": 7.985442867351682e-06, "loss": 0.7056, "step": 3054 }, { "epoch": 0.32, "grad_norm": 1.7526108241072664, "learning_rate": 7.984092302672306e-06, "loss": 0.618, "step": 3055 }, { "epoch": 0.32, "grad_norm": 1.8741774360912886, "learning_rate": 7.98274139972906e-06, "loss": 0.7573, "step": 3056 }, { "epoch": 0.32, "grad_norm": 1.8106219976633589, "learning_rate": 7.981390158675076e-06, "loss": 0.6292, "step": 3057 }, { "epoch": 0.32, "grad_norm": 1.8965265207010533, "learning_rate": 7.980038579663523e-06, "loss": 0.7095, "step": 3058 }, { "epoch": 0.32, "grad_norm": 1.7954077915009716, "learning_rate": 7.978686662847612e-06, "loss": 0.6326, "step": 3059 }, { "epoch": 0.32, "grad_norm": 2.0120709348450947, "learning_rate": 7.977334408380588e-06, "loss": 0.6684, "step": 3060 }, { "epoch": 0.32, "grad_norm": 1.7127762611593402, "learning_rate": 7.975981816415741e-06, "loss": 0.5592, "step": 3061 }, { "epoch": 0.32, "grad_norm": 1.9838766263136216, "learning_rate": 7.974628887106391e-06, "loss": 0.5739, "step": 3062 }, { "epoch": 0.32, "grad_norm": 1.8024132426453374, "learning_rate": 7.973275620605903e-06, "loss": 0.6057, "step": 3063 }, { "epoch": 0.32, "grad_norm": 1.662452177175643, "learning_rate": 7.971922017067674e-06, "loss": 0.5933, "step": 3064 }, { "epoch": 0.32, "grad_norm": 1.9625977664980205, "learning_rate": 7.970568076645149e-06, "loss": 0.5994, "step": 3065 }, { "epoch": 0.32, "grad_norm": 2.006758324902627, "learning_rate": 7.969213799491799e-06, "loss": 0.6422, "step": 3066 }, { "epoch": 0.32, "grad_norm": 1.883116643768088, "learning_rate": 7.96785918576114e-06, "loss": 0.6657, "step": 3067 }, { "epoch": 0.32, "grad_norm": 2.162848986767221, "learning_rate": 7.966504235606726e-06, "loss": 0.7306, "step": 3068 }, { "epoch": 0.32, "grad_norm": 2.1148968110067687, "learning_rate": 7.965148949182148e-06, "loss": 0.6631, "step": 3069 }, { "epoch": 0.32, "grad_norm": 1.7761884516910715, "learning_rate": 7.963793326641038e-06, "loss": 0.6629, "step": 3070 }, { "epoch": 0.32, "grad_norm": 1.9040220164550217, "learning_rate": 7.96243736813706e-06, "loss": 0.6321, "step": 3071 }, { "epoch": 0.32, "grad_norm": 1.993692247613143, "learning_rate": 7.961081073823921e-06, "loss": 0.6548, "step": 3072 }, { "epoch": 0.32, "grad_norm": 1.7461663526967959, "learning_rate": 7.959724443855366e-06, "loss": 0.6201, "step": 3073 }, { "epoch": 0.32, "grad_norm": 1.9928755049849687, "learning_rate": 7.958367478385172e-06, "loss": 0.6746, "step": 3074 }, { "epoch": 0.32, "grad_norm": 1.7754560830370942, "learning_rate": 7.957010177567167e-06, "loss": 0.6824, "step": 3075 }, { "epoch": 0.32, "grad_norm": 2.019669666508399, "learning_rate": 7.955652541555198e-06, "loss": 0.6759, "step": 3076 }, { "epoch": 0.32, "grad_norm": 1.868951468579273, "learning_rate": 7.954294570503171e-06, "loss": 0.7548, "step": 3077 }, { "epoch": 0.32, "grad_norm": 1.845669333759357, "learning_rate": 7.952936264565015e-06, "loss": 0.6105, "step": 3078 }, { "epoch": 0.32, "grad_norm": 1.821381002259593, "learning_rate": 7.951577623894701e-06, "loss": 0.6323, "step": 3079 }, { "epoch": 0.32, "grad_norm": 1.9570419308484384, "learning_rate": 7.950218648646242e-06, "loss": 0.708, "step": 3080 }, { "epoch": 0.32, "grad_norm": 1.8858894304328615, "learning_rate": 7.948859338973682e-06, "loss": 0.6006, "step": 3081 }, { "epoch": 0.32, "grad_norm": 1.7831579814605896, "learning_rate": 7.947499695031108e-06, "loss": 0.7099, "step": 3082 }, { "epoch": 0.32, "grad_norm": 1.8783313094793848, "learning_rate": 7.946139716972644e-06, "loss": 0.732, "step": 3083 }, { "epoch": 0.32, "grad_norm": 2.0736000767971667, "learning_rate": 7.94477940495245e-06, "loss": 0.7732, "step": 3084 }, { "epoch": 0.32, "grad_norm": 2.077886034109067, "learning_rate": 7.943418759124727e-06, "loss": 0.6822, "step": 3085 }, { "epoch": 0.32, "grad_norm": 1.7930850277354924, "learning_rate": 7.94205777964371e-06, "loss": 0.6518, "step": 3086 }, { "epoch": 0.32, "grad_norm": 1.8907545370407564, "learning_rate": 7.940696466663674e-06, "loss": 0.6399, "step": 3087 }, { "epoch": 0.32, "grad_norm": 1.838018871743233, "learning_rate": 7.939334820338933e-06, "loss": 0.5909, "step": 3088 }, { "epoch": 0.32, "grad_norm": 1.9243946073025104, "learning_rate": 7.937972840823836e-06, "loss": 0.6856, "step": 3089 }, { "epoch": 0.32, "grad_norm": 2.0112204626320285, "learning_rate": 7.93661052827277e-06, "loss": 0.7325, "step": 3090 }, { "epoch": 0.32, "grad_norm": 1.9773351694370116, "learning_rate": 7.935247882840164e-06, "loss": 0.6951, "step": 3091 }, { "epoch": 0.32, "grad_norm": 1.8575930787334212, "learning_rate": 7.933884904680482e-06, "loss": 0.6292, "step": 3092 }, { "epoch": 0.32, "grad_norm": 1.8549941118391462, "learning_rate": 7.93252159394822e-06, "loss": 0.7249, "step": 3093 }, { "epoch": 0.32, "grad_norm": 1.8390883570912002, "learning_rate": 7.931157950797923e-06, "loss": 0.5553, "step": 3094 }, { "epoch": 0.32, "grad_norm": 1.979121650127033, "learning_rate": 7.929793975384164e-06, "loss": 0.6871, "step": 3095 }, { "epoch": 0.32, "grad_norm": 1.8303793323181003, "learning_rate": 7.92842966786156e-06, "loss": 0.6484, "step": 3096 }, { "epoch": 0.32, "grad_norm": 1.8775457173794086, "learning_rate": 7.92706502838476e-06, "loss": 0.7069, "step": 3097 }, { "epoch": 0.32, "grad_norm": 1.7512552354294855, "learning_rate": 7.925700057108455e-06, "loss": 0.6866, "step": 3098 }, { "epoch": 0.32, "grad_norm": 2.036055977573231, "learning_rate": 7.924334754187373e-06, "loss": 0.6938, "step": 3099 }, { "epoch": 0.32, "grad_norm": 1.9444583395675479, "learning_rate": 7.92296911977628e-06, "loss": 0.6017, "step": 3100 }, { "epoch": 0.32, "grad_norm": 1.7671077634825694, "learning_rate": 7.921603154029976e-06, "loss": 0.6438, "step": 3101 }, { "epoch": 0.32, "grad_norm": 1.7985327525448345, "learning_rate": 7.920236857103301e-06, "loss": 0.5896, "step": 3102 }, { "epoch": 0.32, "grad_norm": 1.9693985426170422, "learning_rate": 7.918870229151134e-06, "loss": 0.6559, "step": 3103 }, { "epoch": 0.32, "grad_norm": 1.756742744849359, "learning_rate": 7.91750327032839e-06, "loss": 0.6919, "step": 3104 }, { "epoch": 0.32, "grad_norm": 1.7404813813771645, "learning_rate": 7.91613598079002e-06, "loss": 0.6627, "step": 3105 }, { "epoch": 0.32, "grad_norm": 1.7682475752042812, "learning_rate": 7.914768360691017e-06, "loss": 0.6542, "step": 3106 }, { "epoch": 0.32, "grad_norm": 1.9117815109314706, "learning_rate": 7.913400410186406e-06, "loss": 0.6916, "step": 3107 }, { "epoch": 0.32, "grad_norm": 2.0622229043342157, "learning_rate": 7.912032129431251e-06, "loss": 0.7196, "step": 3108 }, { "epoch": 0.32, "grad_norm": 1.767446056151579, "learning_rate": 7.91066351858066e-06, "loss": 0.6045, "step": 3109 }, { "epoch": 0.32, "grad_norm": 1.8688982836066614, "learning_rate": 7.909294577789765e-06, "loss": 0.6622, "step": 3110 }, { "epoch": 0.32, "grad_norm": 2.005588406771868, "learning_rate": 7.907925307213748e-06, "loss": 0.7336, "step": 3111 }, { "epoch": 0.32, "grad_norm": 1.8008940358030987, "learning_rate": 7.906555707007823e-06, "loss": 0.6675, "step": 3112 }, { "epoch": 0.32, "grad_norm": 1.912571767612056, "learning_rate": 7.905185777327242e-06, "loss": 0.6646, "step": 3113 }, { "epoch": 0.32, "grad_norm": 1.6446609630621263, "learning_rate": 7.903815518327295e-06, "loss": 0.6515, "step": 3114 }, { "epoch": 0.32, "grad_norm": 1.982274144676952, "learning_rate": 7.902444930163308e-06, "loss": 0.6789, "step": 3115 }, { "epoch": 0.32, "grad_norm": 1.8551387879880945, "learning_rate": 7.901074012990645e-06, "loss": 0.5955, "step": 3116 }, { "epoch": 0.32, "grad_norm": 1.943758584895994, "learning_rate": 7.899702766964705e-06, "loss": 0.7111, "step": 3117 }, { "epoch": 0.32, "grad_norm": 1.8648274627942667, "learning_rate": 7.898331192240929e-06, "loss": 0.7082, "step": 3118 }, { "epoch": 0.32, "grad_norm": 1.9477847308322247, "learning_rate": 7.896959288974792e-06, "loss": 0.7108, "step": 3119 }, { "epoch": 0.32, "grad_norm": 1.885086662138018, "learning_rate": 7.89558705732181e-06, "loss": 0.6246, "step": 3120 }, { "epoch": 0.32, "grad_norm": 1.9888061668612529, "learning_rate": 7.894214497437528e-06, "loss": 0.7098, "step": 3121 }, { "epoch": 0.32, "grad_norm": 1.9341899975077337, "learning_rate": 7.892841609477538e-06, "loss": 0.5994, "step": 3122 }, { "epoch": 0.32, "grad_norm": 1.9079692239038495, "learning_rate": 7.891468393597464e-06, "loss": 0.6772, "step": 3123 }, { "epoch": 0.32, "grad_norm": 2.038691877984083, "learning_rate": 7.890094849952964e-06, "loss": 0.6783, "step": 3124 }, { "epoch": 0.32, "grad_norm": 1.7539539211322792, "learning_rate": 7.888720978699742e-06, "loss": 0.6765, "step": 3125 }, { "epoch": 0.32, "grad_norm": 1.8011122274916558, "learning_rate": 7.88734677999353e-06, "loss": 0.6645, "step": 3126 }, { "epoch": 0.33, "grad_norm": 1.9309782177573798, "learning_rate": 7.885972253990104e-06, "loss": 0.711, "step": 3127 }, { "epoch": 0.33, "grad_norm": 1.8744956494093434, "learning_rate": 7.884597400845273e-06, "loss": 0.6176, "step": 3128 }, { "epoch": 0.33, "grad_norm": 1.8638622827816949, "learning_rate": 7.883222220714886e-06, "loss": 0.7032, "step": 3129 }, { "epoch": 0.33, "grad_norm": 1.776577837690079, "learning_rate": 7.881846713754826e-06, "loss": 0.6416, "step": 3130 }, { "epoch": 0.33, "grad_norm": 1.9796058522752333, "learning_rate": 7.880470880121015e-06, "loss": 0.628, "step": 3131 }, { "epoch": 0.33, "grad_norm": 1.9181012593383686, "learning_rate": 7.879094719969412e-06, "loss": 0.6968, "step": 3132 }, { "epoch": 0.33, "grad_norm": 1.7820935673678577, "learning_rate": 7.87771823345601e-06, "loss": 0.6633, "step": 3133 }, { "epoch": 0.33, "grad_norm": 1.7651158173417194, "learning_rate": 7.876341420736847e-06, "loss": 0.6512, "step": 3134 }, { "epoch": 0.33, "grad_norm": 1.8206169688036722, "learning_rate": 7.874964281967988e-06, "loss": 0.6045, "step": 3135 }, { "epoch": 0.33, "grad_norm": 2.0786032378811115, "learning_rate": 7.87358681730554e-06, "loss": 0.7924, "step": 3136 }, { "epoch": 0.33, "grad_norm": 2.1478641904169677, "learning_rate": 7.872209026905648e-06, "loss": 0.8035, "step": 3137 }, { "epoch": 0.33, "grad_norm": 1.8482328317441383, "learning_rate": 7.870830910924491e-06, "loss": 0.6109, "step": 3138 }, { "epoch": 0.33, "grad_norm": 1.9746459109210937, "learning_rate": 7.869452469518291e-06, "loss": 0.6593, "step": 3139 }, { "epoch": 0.33, "grad_norm": 2.3263447551499503, "learning_rate": 7.868073702843294e-06, "loss": 0.7088, "step": 3140 }, { "epoch": 0.33, "grad_norm": 1.790599477651934, "learning_rate": 7.866694611055796e-06, "loss": 0.6343, "step": 3141 }, { "epoch": 0.33, "grad_norm": 2.031239757144938, "learning_rate": 7.865315194312125e-06, "loss": 0.6643, "step": 3142 }, { "epoch": 0.33, "grad_norm": 2.0555372887136754, "learning_rate": 7.863935452768645e-06, "loss": 0.6539, "step": 3143 }, { "epoch": 0.33, "grad_norm": 1.9539749091015681, "learning_rate": 7.862555386581758e-06, "loss": 0.6238, "step": 3144 }, { "epoch": 0.33, "grad_norm": 1.7643599459994184, "learning_rate": 7.861174995907901e-06, "loss": 0.6501, "step": 3145 }, { "epoch": 0.33, "grad_norm": 1.7358705962081895, "learning_rate": 7.85979428090355e-06, "loss": 0.6352, "step": 3146 }, { "epoch": 0.33, "grad_norm": 1.8716834585310764, "learning_rate": 7.858413241725219e-06, "loss": 0.6743, "step": 3147 }, { "epoch": 0.33, "grad_norm": 1.9704127723150717, "learning_rate": 7.857031878529452e-06, "loss": 0.7033, "step": 3148 }, { "epoch": 0.33, "grad_norm": 1.9876812359612106, "learning_rate": 7.855650191472836e-06, "loss": 0.6894, "step": 3149 }, { "epoch": 0.33, "grad_norm": 1.8676202472340744, "learning_rate": 7.854268180711997e-06, "loss": 0.6776, "step": 3150 }, { "epoch": 0.33, "grad_norm": 2.016838800702593, "learning_rate": 7.852885846403591e-06, "loss": 0.5783, "step": 3151 }, { "epoch": 0.33, "grad_norm": 2.059826072634291, "learning_rate": 7.851503188704312e-06, "loss": 0.6996, "step": 3152 }, { "epoch": 0.33, "grad_norm": 1.9390691952182482, "learning_rate": 7.850120207770893e-06, "loss": 0.7219, "step": 3153 }, { "epoch": 0.33, "grad_norm": 1.8477942788865895, "learning_rate": 7.848736903760106e-06, "loss": 0.6796, "step": 3154 }, { "epoch": 0.33, "grad_norm": 1.7632667358112966, "learning_rate": 7.847353276828751e-06, "loss": 0.719, "step": 3155 }, { "epoch": 0.33, "grad_norm": 1.7943354805743594, "learning_rate": 7.845969327133673e-06, "loss": 0.6816, "step": 3156 }, { "epoch": 0.33, "grad_norm": 2.114565393931848, "learning_rate": 7.84458505483175e-06, "loss": 0.7039, "step": 3157 }, { "epoch": 0.33, "grad_norm": 2.0487194410229215, "learning_rate": 7.8432004600799e-06, "loss": 0.6646, "step": 3158 }, { "epoch": 0.33, "grad_norm": 2.196991395817911, "learning_rate": 7.84181554303507e-06, "loss": 0.7162, "step": 3159 }, { "epoch": 0.33, "grad_norm": 1.805330609982835, "learning_rate": 7.840430303854251e-06, "loss": 0.6998, "step": 3160 }, { "epoch": 0.33, "grad_norm": 1.945875095821067, "learning_rate": 7.839044742694466e-06, "loss": 0.6201, "step": 3161 }, { "epoch": 0.33, "grad_norm": 1.8994297100188413, "learning_rate": 7.83765885971278e-06, "loss": 0.6215, "step": 3162 }, { "epoch": 0.33, "grad_norm": 1.7735390094819343, "learning_rate": 7.836272655066286e-06, "loss": 0.6738, "step": 3163 }, { "epoch": 0.33, "grad_norm": 2.0506083726297994, "learning_rate": 7.83488612891212e-06, "loss": 0.6146, "step": 3164 }, { "epoch": 0.33, "grad_norm": 2.0616518744706984, "learning_rate": 7.833499281407455e-06, "loss": 0.6962, "step": 3165 }, { "epoch": 0.33, "grad_norm": 1.9640484511919805, "learning_rate": 7.832112112709496e-06, "loss": 0.6537, "step": 3166 }, { "epoch": 0.33, "grad_norm": 1.8916743371522864, "learning_rate": 7.830724622975485e-06, "loss": 0.6021, "step": 3167 }, { "epoch": 0.33, "grad_norm": 2.068753121157626, "learning_rate": 7.829336812362703e-06, "loss": 0.6653, "step": 3168 }, { "epoch": 0.33, "grad_norm": 1.8414392119014225, "learning_rate": 7.827948681028467e-06, "loss": 0.6749, "step": 3169 }, { "epoch": 0.33, "grad_norm": 1.8041772814742751, "learning_rate": 7.826560229130132e-06, "loss": 0.6561, "step": 3170 }, { "epoch": 0.33, "grad_norm": 2.277001270558421, "learning_rate": 7.82517145682508e-06, "loss": 0.7026, "step": 3171 }, { "epoch": 0.33, "grad_norm": 1.9932702774934739, "learning_rate": 7.823782364270743e-06, "loss": 0.6764, "step": 3172 }, { "epoch": 0.33, "grad_norm": 1.9152540765115291, "learning_rate": 7.82239295162458e-06, "loss": 0.5991, "step": 3173 }, { "epoch": 0.33, "grad_norm": 2.0171036692649142, "learning_rate": 7.821003219044087e-06, "loss": 0.6495, "step": 3174 }, { "epoch": 0.33, "grad_norm": 1.9017373728063542, "learning_rate": 7.819613166686802e-06, "loss": 0.6444, "step": 3175 }, { "epoch": 0.33, "grad_norm": 1.9178976145922144, "learning_rate": 7.818222794710293e-06, "loss": 0.61, "step": 3176 }, { "epoch": 0.33, "grad_norm": 2.3536736709136723, "learning_rate": 7.816832103272165e-06, "loss": 0.5589, "step": 3177 }, { "epoch": 0.33, "grad_norm": 2.5473526112248663, "learning_rate": 7.815441092530064e-06, "loss": 0.7009, "step": 3178 }, { "epoch": 0.33, "grad_norm": 1.9025338748284415, "learning_rate": 7.814049762641668e-06, "loss": 0.665, "step": 3179 }, { "epoch": 0.33, "grad_norm": 1.9662648376077074, "learning_rate": 7.812658113764691e-06, "loss": 0.8229, "step": 3180 }, { "epoch": 0.33, "grad_norm": 1.8632716806392016, "learning_rate": 7.811266146056886e-06, "loss": 0.7367, "step": 3181 }, { "epoch": 0.33, "grad_norm": 2.082465852054203, "learning_rate": 7.80987385967604e-06, "loss": 0.696, "step": 3182 }, { "epoch": 0.33, "grad_norm": 1.9024013485348124, "learning_rate": 7.808481254779975e-06, "loss": 0.7804, "step": 3183 }, { "epoch": 0.33, "grad_norm": 1.785436285512577, "learning_rate": 7.807088331526553e-06, "loss": 0.4849, "step": 3184 }, { "epoch": 0.33, "grad_norm": 1.933460760859134, "learning_rate": 7.805695090073668e-06, "loss": 0.6479, "step": 3185 }, { "epoch": 0.33, "grad_norm": 1.8728996996369847, "learning_rate": 7.804301530579253e-06, "loss": 0.6551, "step": 3186 }, { "epoch": 0.33, "grad_norm": 1.9368897835831256, "learning_rate": 7.802907653201275e-06, "loss": 0.7015, "step": 3187 }, { "epoch": 0.33, "grad_norm": 1.8425480322102283, "learning_rate": 7.801513458097741e-06, "loss": 0.6353, "step": 3188 }, { "epoch": 0.33, "grad_norm": 1.9522627071910101, "learning_rate": 7.800118945426684e-06, "loss": 0.6558, "step": 3189 }, { "epoch": 0.33, "grad_norm": 1.9459414208532624, "learning_rate": 7.798724115346188e-06, "loss": 0.6414, "step": 3190 }, { "epoch": 0.33, "grad_norm": 1.8198890780658505, "learning_rate": 7.797328968014359e-06, "loss": 0.6503, "step": 3191 }, { "epoch": 0.33, "grad_norm": 1.989239342204068, "learning_rate": 7.795933503589349e-06, "loss": 0.6533, "step": 3192 }, { "epoch": 0.33, "grad_norm": 1.9017341494606674, "learning_rate": 7.79453772222934e-06, "loss": 0.6688, "step": 3193 }, { "epoch": 0.33, "grad_norm": 1.790821468643048, "learning_rate": 7.793141624092551e-06, "loss": 0.6868, "step": 3194 }, { "epoch": 0.33, "grad_norm": 2.1081525065527806, "learning_rate": 7.791745209337239e-06, "loss": 0.7204, "step": 3195 }, { "epoch": 0.33, "grad_norm": 1.9121572482641518, "learning_rate": 7.790348478121695e-06, "loss": 0.7305, "step": 3196 }, { "epoch": 0.33, "grad_norm": 2.197598958597094, "learning_rate": 7.788951430604246e-06, "loss": 0.7218, "step": 3197 }, { "epoch": 0.33, "grad_norm": 1.7643539813858016, "learning_rate": 7.787554066943256e-06, "loss": 0.5945, "step": 3198 }, { "epoch": 0.33, "grad_norm": 1.9199918036679502, "learning_rate": 7.786156387297126e-06, "loss": 0.6765, "step": 3199 }, { "epoch": 0.33, "grad_norm": 1.9184094585478977, "learning_rate": 7.784758391824286e-06, "loss": 0.649, "step": 3200 }, { "epoch": 0.33, "grad_norm": 1.9359112716415736, "learning_rate": 7.783360080683212e-06, "loss": 0.6215, "step": 3201 }, { "epoch": 0.33, "grad_norm": 1.6650658879147164, "learning_rate": 7.781961454032407e-06, "loss": 0.6007, "step": 3202 }, { "epoch": 0.33, "grad_norm": 1.8712142665583873, "learning_rate": 7.780562512030414e-06, "loss": 0.6509, "step": 3203 }, { "epoch": 0.33, "grad_norm": 1.9979865163071713, "learning_rate": 7.77916325483581e-06, "loss": 0.7924, "step": 3204 }, { "epoch": 0.33, "grad_norm": 1.9307099915203076, "learning_rate": 7.777763682607214e-06, "loss": 0.6831, "step": 3205 }, { "epoch": 0.33, "grad_norm": 1.9719246908216845, "learning_rate": 7.77636379550327e-06, "loss": 0.6781, "step": 3206 }, { "epoch": 0.33, "grad_norm": 2.281909905417469, "learning_rate": 7.774963593682667e-06, "loss": 0.7001, "step": 3207 }, { "epoch": 0.33, "grad_norm": 1.9134686519705317, "learning_rate": 7.773563077304123e-06, "loss": 0.6533, "step": 3208 }, { "epoch": 0.33, "grad_norm": 1.7375846606061787, "learning_rate": 7.772162246526394e-06, "loss": 0.6, "step": 3209 }, { "epoch": 0.33, "grad_norm": 1.9210680843386594, "learning_rate": 7.770761101508274e-06, "loss": 0.6591, "step": 3210 }, { "epoch": 0.33, "grad_norm": 1.858930054263607, "learning_rate": 7.769359642408591e-06, "loss": 0.7036, "step": 3211 }, { "epoch": 0.33, "grad_norm": 2.024890925973368, "learning_rate": 7.767957869386208e-06, "loss": 0.6565, "step": 3212 }, { "epoch": 0.33, "grad_norm": 1.8619952533612665, "learning_rate": 7.766555782600023e-06, "loss": 0.5979, "step": 3213 }, { "epoch": 0.33, "grad_norm": 1.6727164069719362, "learning_rate": 7.765153382208972e-06, "loss": 0.6987, "step": 3214 }, { "epoch": 0.33, "grad_norm": 1.8507203779819241, "learning_rate": 7.763750668372023e-06, "loss": 0.628, "step": 3215 }, { "epoch": 0.33, "grad_norm": 1.9341511347273956, "learning_rate": 7.762347641248182e-06, "loss": 0.607, "step": 3216 }, { "epoch": 0.33, "grad_norm": 1.8954280950807452, "learning_rate": 7.760944300996494e-06, "loss": 0.6493, "step": 3217 }, { "epoch": 0.33, "grad_norm": 2.103132213288938, "learning_rate": 7.759540647776031e-06, "loss": 0.656, "step": 3218 }, { "epoch": 0.33, "grad_norm": 1.964201049747272, "learning_rate": 7.758136681745907e-06, "loss": 0.6294, "step": 3219 }, { "epoch": 0.33, "grad_norm": 1.731784592727429, "learning_rate": 7.756732403065269e-06, "loss": 0.633, "step": 3220 }, { "epoch": 0.33, "grad_norm": 1.8723486158466771, "learning_rate": 7.7553278118933e-06, "loss": 0.5642, "step": 3221 }, { "epoch": 0.33, "grad_norm": 1.912846816349759, "learning_rate": 7.75392290838922e-06, "loss": 0.6209, "step": 3222 }, { "epoch": 0.34, "grad_norm": 2.0355518598418763, "learning_rate": 7.75251769271228e-06, "loss": 0.6866, "step": 3223 }, { "epoch": 0.34, "grad_norm": 1.8024911163155974, "learning_rate": 7.751112165021771e-06, "loss": 0.6965, "step": 3224 }, { "epoch": 0.34, "grad_norm": 1.992035602097195, "learning_rate": 7.749706325477017e-06, "loss": 0.727, "step": 3225 }, { "epoch": 0.34, "grad_norm": 2.012344638600577, "learning_rate": 7.74830017423738e-06, "loss": 0.6522, "step": 3226 }, { "epoch": 0.34, "grad_norm": 1.7334097631987118, "learning_rate": 7.74689371146225e-06, "loss": 0.7054, "step": 3227 }, { "epoch": 0.34, "grad_norm": 1.910822938912061, "learning_rate": 7.745486937311065e-06, "loss": 0.699, "step": 3228 }, { "epoch": 0.34, "grad_norm": 2.189272683241941, "learning_rate": 7.744079851943286e-06, "loss": 0.6648, "step": 3229 }, { "epoch": 0.34, "grad_norm": 1.777075786244854, "learning_rate": 7.742672455518413e-06, "loss": 0.7164, "step": 3230 }, { "epoch": 0.34, "grad_norm": 1.7858757362545894, "learning_rate": 7.741264748195984e-06, "loss": 0.6449, "step": 3231 }, { "epoch": 0.34, "grad_norm": 1.9753861500283088, "learning_rate": 7.739856730135575e-06, "loss": 0.6902, "step": 3232 }, { "epoch": 0.34, "grad_norm": 1.6698498743407861, "learning_rate": 7.738448401496785e-06, "loss": 0.6012, "step": 3233 }, { "epoch": 0.34, "grad_norm": 2.0095261492551115, "learning_rate": 7.737039762439263e-06, "loss": 0.7093, "step": 3234 }, { "epoch": 0.34, "grad_norm": 1.7863240773519469, "learning_rate": 7.735630813122683e-06, "loss": 0.6411, "step": 3235 }, { "epoch": 0.34, "grad_norm": 1.7699717846306777, "learning_rate": 7.734221553706756e-06, "loss": 0.6196, "step": 3236 }, { "epoch": 0.34, "grad_norm": 1.8298275662451435, "learning_rate": 7.732811984351232e-06, "loss": 0.6401, "step": 3237 }, { "epoch": 0.34, "grad_norm": 1.8757903882230937, "learning_rate": 7.731402105215892e-06, "loss": 0.6505, "step": 3238 }, { "epoch": 0.34, "grad_norm": 1.8771166804066328, "learning_rate": 7.729991916460558e-06, "loss": 0.6448, "step": 3239 }, { "epoch": 0.34, "grad_norm": 2.0422800495620725, "learning_rate": 7.728581418245078e-06, "loss": 0.7242, "step": 3240 }, { "epoch": 0.34, "grad_norm": 1.9500585202348317, "learning_rate": 7.727170610729341e-06, "loss": 0.6406, "step": 3241 }, { "epoch": 0.34, "grad_norm": 1.9179487116731881, "learning_rate": 7.725759494073272e-06, "loss": 0.709, "step": 3242 }, { "epoch": 0.34, "grad_norm": 1.8442830499995486, "learning_rate": 7.72434806843683e-06, "loss": 0.5964, "step": 3243 }, { "epoch": 0.34, "grad_norm": 1.9596900219378959, "learning_rate": 7.722936333980002e-06, "loss": 0.6349, "step": 3244 }, { "epoch": 0.34, "grad_norm": 1.8473944971850758, "learning_rate": 7.721524290862821e-06, "loss": 0.6101, "step": 3245 }, { "epoch": 0.34, "grad_norm": 1.903216419149971, "learning_rate": 7.720111939245351e-06, "loss": 0.6553, "step": 3246 }, { "epoch": 0.34, "grad_norm": 2.1706479120378255, "learning_rate": 7.71869927928769e-06, "loss": 0.5984, "step": 3247 }, { "epoch": 0.34, "grad_norm": 1.9082270614604948, "learning_rate": 7.717286311149967e-06, "loss": 0.5559, "step": 3248 }, { "epoch": 0.34, "grad_norm": 1.8074082902284876, "learning_rate": 7.715873034992354e-06, "loss": 0.6667, "step": 3249 }, { "epoch": 0.34, "grad_norm": 2.4590764946541355, "learning_rate": 7.714459450975052e-06, "loss": 0.6183, "step": 3250 }, { "epoch": 0.34, "grad_norm": 1.7625406627291564, "learning_rate": 7.7130455592583e-06, "loss": 0.7122, "step": 3251 }, { "epoch": 0.34, "grad_norm": 2.104638475133327, "learning_rate": 7.71163136000237e-06, "loss": 0.6527, "step": 3252 }, { "epoch": 0.34, "grad_norm": 1.8891265825485541, "learning_rate": 7.710216853367568e-06, "loss": 0.6901, "step": 3253 }, { "epoch": 0.34, "grad_norm": 2.2146816583793743, "learning_rate": 7.708802039514238e-06, "loss": 0.7759, "step": 3254 }, { "epoch": 0.34, "grad_norm": 1.9382427214432836, "learning_rate": 7.707386918602759e-06, "loss": 0.6797, "step": 3255 }, { "epoch": 0.34, "grad_norm": 2.070382342712708, "learning_rate": 7.70597149079354e-06, "loss": 0.6483, "step": 3256 }, { "epoch": 0.34, "grad_norm": 1.767128407067787, "learning_rate": 7.70455575624703e-06, "loss": 0.5626, "step": 3257 }, { "epoch": 0.34, "grad_norm": 1.7058603097337257, "learning_rate": 7.70313971512371e-06, "loss": 0.4997, "step": 3258 }, { "epoch": 0.34, "grad_norm": 1.7513131467756609, "learning_rate": 7.701723367584094e-06, "loss": 0.6709, "step": 3259 }, { "epoch": 0.34, "grad_norm": 1.8236846567523217, "learning_rate": 7.700306713788735e-06, "loss": 0.6769, "step": 3260 }, { "epoch": 0.34, "grad_norm": 2.1815094050601966, "learning_rate": 7.69888975389822e-06, "loss": 0.7312, "step": 3261 }, { "epoch": 0.34, "grad_norm": 1.8049118711336998, "learning_rate": 7.697472488073168e-06, "loss": 0.6029, "step": 3262 }, { "epoch": 0.34, "grad_norm": 1.6981106891148934, "learning_rate": 7.696054916474235e-06, "loss": 0.6433, "step": 3263 }, { "epoch": 0.34, "grad_norm": 1.891118335430464, "learning_rate": 7.694637039262109e-06, "loss": 0.6485, "step": 3264 }, { "epoch": 0.34, "grad_norm": 1.7889227423611733, "learning_rate": 7.693218856597515e-06, "loss": 0.6024, "step": 3265 }, { "epoch": 0.34, "grad_norm": 1.9373484978474884, "learning_rate": 7.691800368641214e-06, "loss": 0.6609, "step": 3266 }, { "epoch": 0.34, "grad_norm": 1.7546189984312635, "learning_rate": 7.690381575553998e-06, "loss": 0.6001, "step": 3267 }, { "epoch": 0.34, "grad_norm": 1.742031273653571, "learning_rate": 7.688962477496696e-06, "loss": 0.6134, "step": 3268 }, { "epoch": 0.34, "grad_norm": 1.8461145661001945, "learning_rate": 7.68754307463017e-06, "loss": 0.6021, "step": 3269 }, { "epoch": 0.34, "grad_norm": 1.852835261703502, "learning_rate": 7.68612336711532e-06, "loss": 0.6454, "step": 3270 }, { "epoch": 0.34, "grad_norm": 2.2319112287773493, "learning_rate": 7.684703355113074e-06, "loss": 0.799, "step": 3271 }, { "epoch": 0.34, "grad_norm": 1.8107744027642172, "learning_rate": 7.683283038784402e-06, "loss": 0.6029, "step": 3272 }, { "epoch": 0.34, "grad_norm": 2.0367129090031293, "learning_rate": 7.681862418290302e-06, "loss": 0.7111, "step": 3273 }, { "epoch": 0.34, "grad_norm": 1.8995914346640856, "learning_rate": 7.68044149379181e-06, "loss": 0.6072, "step": 3274 }, { "epoch": 0.34, "grad_norm": 1.8071434072693784, "learning_rate": 7.679020265449999e-06, "loss": 0.6942, "step": 3275 }, { "epoch": 0.34, "grad_norm": 1.8946083630450248, "learning_rate": 7.67759873342597e-06, "loss": 0.666, "step": 3276 }, { "epoch": 0.34, "grad_norm": 1.8185369692074362, "learning_rate": 7.676176897880862e-06, "loss": 0.6498, "step": 3277 }, { "epoch": 0.34, "grad_norm": 1.8510420885532568, "learning_rate": 7.67475475897585e-06, "loss": 0.5591, "step": 3278 }, { "epoch": 0.34, "grad_norm": 1.8125241947077075, "learning_rate": 7.67333231687214e-06, "loss": 0.6652, "step": 3279 }, { "epoch": 0.34, "grad_norm": 1.6649027457440944, "learning_rate": 7.671909571730974e-06, "loss": 0.6202, "step": 3280 }, { "epoch": 0.34, "grad_norm": 1.6610788823267293, "learning_rate": 7.67048652371363e-06, "loss": 0.5144, "step": 3281 }, { "epoch": 0.34, "grad_norm": 2.051859824910609, "learning_rate": 7.669063172981415e-06, "loss": 0.8101, "step": 3282 }, { "epoch": 0.34, "grad_norm": 1.8135258363900928, "learning_rate": 7.667639519695678e-06, "loss": 0.6548, "step": 3283 }, { "epoch": 0.34, "grad_norm": 2.0789065749765734, "learning_rate": 7.666215564017797e-06, "loss": 0.7535, "step": 3284 }, { "epoch": 0.34, "grad_norm": 1.9976097148077065, "learning_rate": 7.664791306109183e-06, "loss": 0.6333, "step": 3285 }, { "epoch": 0.34, "grad_norm": 1.792724714904917, "learning_rate": 7.663366746131286e-06, "loss": 0.6677, "step": 3286 }, { "epoch": 0.34, "grad_norm": 2.01046056944132, "learning_rate": 7.661941884245589e-06, "loss": 0.5846, "step": 3287 }, { "epoch": 0.34, "grad_norm": 1.8378314305059873, "learning_rate": 7.660516720613606e-06, "loss": 0.7189, "step": 3288 }, { "epoch": 0.34, "grad_norm": 2.116097571217621, "learning_rate": 7.659091255396888e-06, "loss": 0.6876, "step": 3289 }, { "epoch": 0.34, "grad_norm": 2.452763925603077, "learning_rate": 7.65766548875702e-06, "loss": 0.6209, "step": 3290 }, { "epoch": 0.34, "grad_norm": 1.7086733403610908, "learning_rate": 7.656239420855621e-06, "loss": 0.6561, "step": 3291 }, { "epoch": 0.34, "grad_norm": 2.1055982116655105, "learning_rate": 7.654813051854345e-06, "loss": 0.6497, "step": 3292 }, { "epoch": 0.34, "grad_norm": 1.945103058631345, "learning_rate": 7.653386381914874e-06, "loss": 0.7458, "step": 3293 }, { "epoch": 0.34, "grad_norm": 1.8639467359374053, "learning_rate": 7.651959411198934e-06, "loss": 0.6667, "step": 3294 }, { "epoch": 0.34, "grad_norm": 1.9314142195588189, "learning_rate": 7.65053213986828e-06, "loss": 0.6434, "step": 3295 }, { "epoch": 0.34, "grad_norm": 1.968743977461504, "learning_rate": 7.649104568084701e-06, "loss": 0.6727, "step": 3296 }, { "epoch": 0.34, "grad_norm": 1.6173702518058752, "learning_rate": 7.64767669601002e-06, "loss": 0.6738, "step": 3297 }, { "epoch": 0.34, "grad_norm": 1.819260191517557, "learning_rate": 7.646248523806092e-06, "loss": 0.7187, "step": 3298 }, { "epoch": 0.34, "grad_norm": 1.711390241856095, "learning_rate": 7.644820051634813e-06, "loss": 0.5777, "step": 3299 }, { "epoch": 0.34, "grad_norm": 2.371999947619344, "learning_rate": 7.643391279658106e-06, "loss": 0.5607, "step": 3300 }, { "epoch": 0.34, "grad_norm": 2.080268194932253, "learning_rate": 7.64196220803793e-06, "loss": 0.6454, "step": 3301 }, { "epoch": 0.34, "grad_norm": 1.8161890897658788, "learning_rate": 7.640532836936279e-06, "loss": 0.6145, "step": 3302 }, { "epoch": 0.34, "grad_norm": 1.8867414994265745, "learning_rate": 7.639103166515179e-06, "loss": 0.7367, "step": 3303 }, { "epoch": 0.34, "grad_norm": 2.0829266047249013, "learning_rate": 7.637673196936694e-06, "loss": 0.7419, "step": 3304 }, { "epoch": 0.34, "grad_norm": 2.021524026140808, "learning_rate": 7.636242928362918e-06, "loss": 0.7149, "step": 3305 }, { "epoch": 0.34, "grad_norm": 1.9487343164474362, "learning_rate": 7.634812360955982e-06, "loss": 0.7065, "step": 3306 }, { "epoch": 0.34, "grad_norm": 2.0147112197597092, "learning_rate": 7.633381494878042e-06, "loss": 0.6809, "step": 3307 }, { "epoch": 0.34, "grad_norm": 2.033897246264916, "learning_rate": 7.631950330291305e-06, "loss": 0.6792, "step": 3308 }, { "epoch": 0.34, "grad_norm": 1.7911815368426338, "learning_rate": 7.630518867357994e-06, "loss": 0.6244, "step": 3309 }, { "epoch": 0.34, "grad_norm": 1.8465211407886768, "learning_rate": 7.629087106240376e-06, "loss": 0.6385, "step": 3310 }, { "epoch": 0.34, "grad_norm": 1.8768778769605858, "learning_rate": 7.6276550471007486e-06, "loss": 0.6325, "step": 3311 }, { "epoch": 0.34, "grad_norm": 1.8761662832688346, "learning_rate": 7.626222690101445e-06, "loss": 0.6617, "step": 3312 }, { "epoch": 0.34, "grad_norm": 1.8842109296938618, "learning_rate": 7.624790035404831e-06, "loss": 0.6187, "step": 3313 }, { "epoch": 0.34, "grad_norm": 1.998556004167101, "learning_rate": 7.623357083173306e-06, "loss": 0.6287, "step": 3314 }, { "epoch": 0.34, "grad_norm": 1.9369889183368985, "learning_rate": 7.621923833569301e-06, "loss": 0.6092, "step": 3315 }, { "epoch": 0.34, "grad_norm": 1.8729477963392878, "learning_rate": 7.620490286755286e-06, "loss": 0.6911, "step": 3316 }, { "epoch": 0.34, "grad_norm": 1.9822928384022405, "learning_rate": 7.619056442893762e-06, "loss": 0.662, "step": 3317 }, { "epoch": 0.34, "grad_norm": 1.7248626081500673, "learning_rate": 7.61762230214726e-06, "loss": 0.6479, "step": 3318 }, { "epoch": 0.35, "grad_norm": 2.0789146795929487, "learning_rate": 7.616187864678352e-06, "loss": 0.6182, "step": 3319 }, { "epoch": 0.35, "grad_norm": 1.838839059416301, "learning_rate": 7.614753130649638e-06, "loss": 0.656, "step": 3320 }, { "epoch": 0.35, "grad_norm": 1.7187828108507008, "learning_rate": 7.613318100223752e-06, "loss": 0.6863, "step": 3321 }, { "epoch": 0.35, "grad_norm": 1.695488574423953, "learning_rate": 7.611882773563364e-06, "loss": 0.6258, "step": 3322 }, { "epoch": 0.35, "grad_norm": 1.758042093943283, "learning_rate": 7.610447150831175e-06, "loss": 0.6084, "step": 3323 }, { "epoch": 0.35, "grad_norm": 1.9600377009168812, "learning_rate": 7.609011232189925e-06, "loss": 0.6382, "step": 3324 }, { "epoch": 0.35, "grad_norm": 2.004029691758048, "learning_rate": 7.60757501780238e-06, "loss": 0.6046, "step": 3325 }, { "epoch": 0.35, "grad_norm": 1.6927785653074294, "learning_rate": 7.6061385078313424e-06, "loss": 0.6248, "step": 3326 }, { "epoch": 0.35, "grad_norm": 1.7976204301469354, "learning_rate": 7.604701702439652e-06, "loss": 0.6413, "step": 3327 }, { "epoch": 0.35, "grad_norm": 1.8544041865004253, "learning_rate": 7.603264601790178e-06, "loss": 0.6129, "step": 3328 }, { "epoch": 0.35, "grad_norm": 1.708847319417589, "learning_rate": 7.601827206045822e-06, "loss": 0.5682, "step": 3329 }, { "epoch": 0.35, "grad_norm": 1.8900069868675693, "learning_rate": 7.600389515369522e-06, "loss": 0.6713, "step": 3330 }, { "epoch": 0.35, "grad_norm": 1.667577105224544, "learning_rate": 7.59895152992425e-06, "loss": 0.5176, "step": 3331 }, { "epoch": 0.35, "grad_norm": 1.823563725373987, "learning_rate": 7.597513249873008e-06, "loss": 0.6509, "step": 3332 }, { "epoch": 0.35, "grad_norm": 2.1014119569769187, "learning_rate": 7.5960746753788335e-06, "loss": 0.7477, "step": 3333 }, { "epoch": 0.35, "grad_norm": 1.9398729490943276, "learning_rate": 7.594635806604797e-06, "loss": 0.6432, "step": 3334 }, { "epoch": 0.35, "grad_norm": 1.689141864675774, "learning_rate": 7.593196643714005e-06, "loss": 0.6297, "step": 3335 }, { "epoch": 0.35, "grad_norm": 1.8997762411676566, "learning_rate": 7.5917571868695905e-06, "loss": 0.6331, "step": 3336 }, { "epoch": 0.35, "grad_norm": 2.0613676536332335, "learning_rate": 7.5903174362347265e-06, "loss": 0.6842, "step": 3337 }, { "epoch": 0.35, "grad_norm": 1.9957895885750523, "learning_rate": 7.5888773919726176e-06, "loss": 0.7165, "step": 3338 }, { "epoch": 0.35, "grad_norm": 2.018970903898421, "learning_rate": 7.5874370542465005e-06, "loss": 0.6857, "step": 3339 }, { "epoch": 0.35, "grad_norm": 1.961825625490136, "learning_rate": 7.585996423219643e-06, "loss": 0.6977, "step": 3340 }, { "epoch": 0.35, "grad_norm": 2.1271983391631766, "learning_rate": 7.584555499055355e-06, "loss": 0.7688, "step": 3341 }, { "epoch": 0.35, "grad_norm": 1.7660688433101426, "learning_rate": 7.5831142819169664e-06, "loss": 0.5719, "step": 3342 }, { "epoch": 0.35, "grad_norm": 1.9487869971090308, "learning_rate": 7.581672771967854e-06, "loss": 0.7091, "step": 3343 }, { "epoch": 0.35, "grad_norm": 1.9584035184171076, "learning_rate": 7.5802309693714145e-06, "loss": 0.6265, "step": 3344 }, { "epoch": 0.35, "grad_norm": 1.8973868826432616, "learning_rate": 7.57878887429109e-06, "loss": 0.5412, "step": 3345 }, { "epoch": 0.35, "grad_norm": 1.8292437136670205, "learning_rate": 7.5773464868903465e-06, "loss": 0.5505, "step": 3346 }, { "epoch": 0.35, "grad_norm": 2.110059781420011, "learning_rate": 7.57590380733269e-06, "loss": 0.6323, "step": 3347 }, { "epoch": 0.35, "grad_norm": 1.8430200609730625, "learning_rate": 7.574460835781654e-06, "loss": 0.5947, "step": 3348 }, { "epoch": 0.35, "grad_norm": 1.8841432114527707, "learning_rate": 7.573017572400807e-06, "loss": 0.6783, "step": 3349 }, { "epoch": 0.35, "grad_norm": 2.013283679598883, "learning_rate": 7.571574017353755e-06, "loss": 0.6922, "step": 3350 }, { "epoch": 0.35, "grad_norm": 1.773668795659689, "learning_rate": 7.570130170804129e-06, "loss": 0.6641, "step": 3351 }, { "epoch": 0.35, "grad_norm": 1.746076214972598, "learning_rate": 7.5686860329156e-06, "loss": 0.5992, "step": 3352 }, { "epoch": 0.35, "grad_norm": 1.8380004264405159, "learning_rate": 7.567241603851866e-06, "loss": 0.6498, "step": 3353 }, { "epoch": 0.35, "grad_norm": 1.908526988253742, "learning_rate": 7.565796883776666e-06, "loss": 0.679, "step": 3354 }, { "epoch": 0.35, "grad_norm": 1.7663503430585459, "learning_rate": 7.564351872853763e-06, "loss": 0.6488, "step": 3355 }, { "epoch": 0.35, "grad_norm": 1.8698403441208586, "learning_rate": 7.5629065712469595e-06, "loss": 0.6297, "step": 3356 }, { "epoch": 0.35, "grad_norm": 1.7693710236384135, "learning_rate": 7.561460979120088e-06, "loss": 0.6881, "step": 3357 }, { "epoch": 0.35, "grad_norm": 1.8736827010560169, "learning_rate": 7.560015096637015e-06, "loss": 0.643, "step": 3358 }, { "epoch": 0.35, "grad_norm": 1.9932305255398057, "learning_rate": 7.558568923961638e-06, "loss": 0.6615, "step": 3359 }, { "epoch": 0.35, "grad_norm": 1.7329221135497108, "learning_rate": 7.557122461257891e-06, "loss": 0.5559, "step": 3360 }, { "epoch": 0.35, "grad_norm": 1.7334989136140522, "learning_rate": 7.555675708689738e-06, "loss": 0.7119, "step": 3361 }, { "epoch": 0.35, "grad_norm": 1.941030820066443, "learning_rate": 7.554228666421176e-06, "loss": 0.7062, "step": 3362 }, { "epoch": 0.35, "grad_norm": 1.6938476484173985, "learning_rate": 7.552781334616237e-06, "loss": 0.7022, "step": 3363 }, { "epoch": 0.35, "grad_norm": 2.086372882874223, "learning_rate": 7.551333713438982e-06, "loss": 0.5912, "step": 3364 }, { "epoch": 0.35, "grad_norm": 2.0681430639202976, "learning_rate": 7.549885803053509e-06, "loss": 0.6965, "step": 3365 }, { "epoch": 0.35, "grad_norm": 1.8155626765570805, "learning_rate": 7.548437603623947e-06, "loss": 0.6455, "step": 3366 }, { "epoch": 0.35, "grad_norm": 2.0742329496020178, "learning_rate": 7.546989115314456e-06, "loss": 0.6947, "step": 3367 }, { "epoch": 0.35, "grad_norm": 1.7556510523856663, "learning_rate": 7.5455403382892325e-06, "loss": 0.6135, "step": 3368 }, { "epoch": 0.35, "grad_norm": 1.9078576489775798, "learning_rate": 7.544091272712501e-06, "loss": 0.6017, "step": 3369 }, { "epoch": 0.35, "grad_norm": 1.6812110139609036, "learning_rate": 7.542641918748526e-06, "loss": 0.584, "step": 3370 }, { "epoch": 0.35, "grad_norm": 2.1753283760243174, "learning_rate": 7.541192276561594e-06, "loss": 0.6506, "step": 3371 }, { "epoch": 0.35, "grad_norm": 1.9246927540441021, "learning_rate": 7.539742346316035e-06, "loss": 0.6418, "step": 3372 }, { "epoch": 0.35, "grad_norm": 2.0764921092291715, "learning_rate": 7.538292128176204e-06, "loss": 0.7311, "step": 3373 }, { "epoch": 0.35, "grad_norm": 2.0016266904008444, "learning_rate": 7.536841622306491e-06, "loss": 0.6698, "step": 3374 }, { "epoch": 0.35, "grad_norm": 1.7476993025192442, "learning_rate": 7.535390828871322e-06, "loss": 0.5843, "step": 3375 }, { "epoch": 0.35, "grad_norm": 2.1443050656742795, "learning_rate": 7.5339397480351525e-06, "loss": 0.6661, "step": 3376 }, { "epoch": 0.35, "grad_norm": 1.9640498566143108, "learning_rate": 7.532488379962468e-06, "loss": 0.6737, "step": 3377 }, { "epoch": 0.35, "grad_norm": 1.894660279182222, "learning_rate": 7.531036724817791e-06, "loss": 0.6363, "step": 3378 }, { "epoch": 0.35, "grad_norm": 1.865129793714187, "learning_rate": 7.529584782765675e-06, "loss": 0.6154, "step": 3379 }, { "epoch": 0.35, "grad_norm": 2.0631351014194634, "learning_rate": 7.528132553970706e-06, "loss": 0.6748, "step": 3380 }, { "epoch": 0.35, "grad_norm": 1.6704861005463274, "learning_rate": 7.526680038597502e-06, "loss": 0.6883, "step": 3381 }, { "epoch": 0.35, "grad_norm": 2.093149404086043, "learning_rate": 7.525227236810715e-06, "loss": 0.5749, "step": 3382 }, { "epoch": 0.35, "grad_norm": 2.0045663651778387, "learning_rate": 7.523774148775027e-06, "loss": 0.7112, "step": 3383 }, { "epoch": 0.35, "grad_norm": 1.879588638216167, "learning_rate": 7.522320774655154e-06, "loss": 0.632, "step": 3384 }, { "epoch": 0.35, "grad_norm": 1.796450452129187, "learning_rate": 7.520867114615844e-06, "loss": 0.6143, "step": 3385 }, { "epoch": 0.35, "grad_norm": 1.9221269954863394, "learning_rate": 7.519413168821878e-06, "loss": 0.6371, "step": 3386 }, { "epoch": 0.35, "grad_norm": 2.245278008970985, "learning_rate": 7.5179589374380705e-06, "loss": 0.6296, "step": 3387 }, { "epoch": 0.35, "grad_norm": 1.928866713539612, "learning_rate": 7.516504420629264e-06, "loss": 0.6113, "step": 3388 }, { "epoch": 0.35, "grad_norm": 2.295181510678045, "learning_rate": 7.515049618560337e-06, "loss": 0.6031, "step": 3389 }, { "epoch": 0.35, "grad_norm": 2.195352983128163, "learning_rate": 7.513594531396202e-06, "loss": 0.6993, "step": 3390 }, { "epoch": 0.35, "grad_norm": 1.7327908331334299, "learning_rate": 7.512139159301801e-06, "loss": 0.5751, "step": 3391 }, { "epoch": 0.35, "grad_norm": 1.7569641738070485, "learning_rate": 7.510683502442105e-06, "loss": 0.6413, "step": 3392 }, { "epoch": 0.35, "grad_norm": 1.6720116832024288, "learning_rate": 7.5092275609821254e-06, "loss": 0.6304, "step": 3393 }, { "epoch": 0.35, "grad_norm": 2.164668470616303, "learning_rate": 7.5077713350869e-06, "loss": 0.7014, "step": 3394 }, { "epoch": 0.35, "grad_norm": 2.0418743773840204, "learning_rate": 7.506314824921498e-06, "loss": 0.7441, "step": 3395 }, { "epoch": 0.35, "grad_norm": 1.9352876217904793, "learning_rate": 7.504858030651026e-06, "loss": 0.7133, "step": 3396 }, { "epoch": 0.35, "grad_norm": 1.9893676571328642, "learning_rate": 7.503400952440618e-06, "loss": 0.679, "step": 3397 }, { "epoch": 0.35, "grad_norm": 2.0088136972631725, "learning_rate": 7.501943590455445e-06, "loss": 0.622, "step": 3398 }, { "epoch": 0.35, "grad_norm": 1.635984512656272, "learning_rate": 7.500485944860705e-06, "loss": 0.5652, "step": 3399 }, { "epoch": 0.35, "grad_norm": 1.870219344342775, "learning_rate": 7.49902801582163e-06, "loss": 0.676, "step": 3400 }, { "epoch": 0.35, "grad_norm": 2.19177716835321, "learning_rate": 7.497569803503486e-06, "loss": 0.7368, "step": 3401 }, { "epoch": 0.35, "grad_norm": 1.9242849370350925, "learning_rate": 7.49611130807157e-06, "loss": 0.7252, "step": 3402 }, { "epoch": 0.35, "grad_norm": 1.9182235720120104, "learning_rate": 7.494652529691209e-06, "loss": 0.713, "step": 3403 }, { "epoch": 0.35, "grad_norm": 1.7468572198863737, "learning_rate": 7.493193468527764e-06, "loss": 0.643, "step": 3404 }, { "epoch": 0.35, "grad_norm": 1.7308546976807355, "learning_rate": 7.491734124746628e-06, "loss": 0.6003, "step": 3405 }, { "epoch": 0.35, "grad_norm": 1.8679022278259034, "learning_rate": 7.490274498513228e-06, "loss": 0.7414, "step": 3406 }, { "epoch": 0.35, "grad_norm": 2.0451720769063906, "learning_rate": 7.488814589993019e-06, "loss": 0.6454, "step": 3407 }, { "epoch": 0.35, "grad_norm": 1.954452155359003, "learning_rate": 7.487354399351491e-06, "loss": 0.7069, "step": 3408 }, { "epoch": 0.35, "grad_norm": 2.0412457459452837, "learning_rate": 7.485893926754164e-06, "loss": 0.706, "step": 3409 }, { "epoch": 0.35, "grad_norm": 1.756504654826153, "learning_rate": 7.484433172366592e-06, "loss": 0.5949, "step": 3410 }, { "epoch": 0.35, "grad_norm": 2.0975397217258207, "learning_rate": 7.482972136354359e-06, "loss": 0.827, "step": 3411 }, { "epoch": 0.35, "grad_norm": 1.9019894290221375, "learning_rate": 7.48151081888308e-06, "loss": 0.6112, "step": 3412 }, { "epoch": 0.35, "grad_norm": 1.961151827754993, "learning_rate": 7.480049220118407e-06, "loss": 0.6559, "step": 3413 }, { "epoch": 0.35, "grad_norm": 2.1285287007697966, "learning_rate": 7.478587340226019e-06, "loss": 0.6444, "step": 3414 }, { "epoch": 0.35, "grad_norm": 1.986160997932849, "learning_rate": 7.477125179371628e-06, "loss": 0.6846, "step": 3415 }, { "epoch": 0.36, "grad_norm": 1.8237130387040967, "learning_rate": 7.475662737720981e-06, "loss": 0.6, "step": 3416 }, { "epoch": 0.36, "grad_norm": 1.851416521972734, "learning_rate": 7.47420001543985e-06, "loss": 0.7632, "step": 3417 }, { "epoch": 0.36, "grad_norm": 1.9104748163467893, "learning_rate": 7.472737012694045e-06, "loss": 0.5842, "step": 3418 }, { "epoch": 0.36, "grad_norm": 1.9035555954382448, "learning_rate": 7.471273729649404e-06, "loss": 0.7967, "step": 3419 }, { "epoch": 0.36, "grad_norm": 2.0514769058465228, "learning_rate": 7.469810166471802e-06, "loss": 0.6309, "step": 3420 }, { "epoch": 0.36, "grad_norm": 1.831229869638447, "learning_rate": 7.46834632332714e-06, "loss": 0.6609, "step": 3421 }, { "epoch": 0.36, "grad_norm": 2.0840899836154985, "learning_rate": 7.466882200381352e-06, "loss": 0.6543, "step": 3422 }, { "epoch": 0.36, "grad_norm": 1.7908059902561366, "learning_rate": 7.465417797800406e-06, "loss": 0.6881, "step": 3423 }, { "epoch": 0.36, "grad_norm": 1.9837969625415905, "learning_rate": 7.463953115750302e-06, "loss": 0.633, "step": 3424 }, { "epoch": 0.36, "grad_norm": 1.6172551301392455, "learning_rate": 7.462488154397067e-06, "loss": 0.6234, "step": 3425 }, { "epoch": 0.36, "grad_norm": 1.927272898423346, "learning_rate": 7.461022913906764e-06, "loss": 0.6417, "step": 3426 }, { "epoch": 0.36, "grad_norm": 2.0551613282894583, "learning_rate": 7.459557394445486e-06, "loss": 0.6011, "step": 3427 }, { "epoch": 0.36, "grad_norm": 2.0067846400605243, "learning_rate": 7.458091596179359e-06, "loss": 0.6854, "step": 3428 }, { "epoch": 0.36, "grad_norm": 2.242882214853698, "learning_rate": 7.4566255192745384e-06, "loss": 0.6827, "step": 3429 }, { "epoch": 0.36, "grad_norm": 1.9060789364117876, "learning_rate": 7.455159163897213e-06, "loss": 0.6727, "step": 3430 }, { "epoch": 0.36, "grad_norm": 1.9612708236391978, "learning_rate": 7.453692530213603e-06, "loss": 0.6663, "step": 3431 }, { "epoch": 0.36, "grad_norm": 2.3743911219968425, "learning_rate": 7.452225618389959e-06, "loss": 0.6794, "step": 3432 }, { "epoch": 0.36, "grad_norm": 1.8840472801485948, "learning_rate": 7.4507584285925625e-06, "loss": 0.6563, "step": 3433 }, { "epoch": 0.36, "grad_norm": 1.7866444653145974, "learning_rate": 7.4492909609877304e-06, "loss": 0.7027, "step": 3434 }, { "epoch": 0.36, "grad_norm": 1.8941736336086783, "learning_rate": 7.447823215741807e-06, "loss": 0.6318, "step": 3435 }, { "epoch": 0.36, "grad_norm": 1.782248833519918, "learning_rate": 7.446355193021171e-06, "loss": 0.7005, "step": 3436 }, { "epoch": 0.36, "grad_norm": 1.9330931951718446, "learning_rate": 7.444886892992229e-06, "loss": 0.7213, "step": 3437 }, { "epoch": 0.36, "grad_norm": 1.8453021176715279, "learning_rate": 7.443418315821422e-06, "loss": 0.6963, "step": 3438 }, { "epoch": 0.36, "grad_norm": 1.7615865281146448, "learning_rate": 7.441949461675223e-06, "loss": 0.6233, "step": 3439 }, { "epoch": 0.36, "grad_norm": 1.9187017752193514, "learning_rate": 7.4404803307201345e-06, "loss": 0.6402, "step": 3440 }, { "epoch": 0.36, "grad_norm": 1.8966505444836648, "learning_rate": 7.4390109231226895e-06, "loss": 0.6392, "step": 3441 }, { "epoch": 0.36, "grad_norm": 1.6365254365980388, "learning_rate": 7.437541239049453e-06, "loss": 0.6298, "step": 3442 }, { "epoch": 0.36, "grad_norm": 1.7596906938070285, "learning_rate": 7.436071278667024e-06, "loss": 0.599, "step": 3443 }, { "epoch": 0.36, "grad_norm": 1.787527162481923, "learning_rate": 7.4346010421420314e-06, "loss": 0.6659, "step": 3444 }, { "epoch": 0.36, "grad_norm": 2.1953328947218864, "learning_rate": 7.433130529641133e-06, "loss": 0.7025, "step": 3445 }, { "epoch": 0.36, "grad_norm": 1.8839951723071167, "learning_rate": 7.431659741331022e-06, "loss": 0.6574, "step": 3446 }, { "epoch": 0.36, "grad_norm": 1.7025871231466374, "learning_rate": 7.430188677378418e-06, "loss": 0.6093, "step": 3447 }, { "epoch": 0.36, "grad_norm": 1.8718629399629925, "learning_rate": 7.4287173379500764e-06, "loss": 0.6797, "step": 3448 }, { "epoch": 0.36, "grad_norm": 1.9082997423204202, "learning_rate": 7.427245723212781e-06, "loss": 0.5676, "step": 3449 }, { "epoch": 0.36, "grad_norm": 2.4745611002431454, "learning_rate": 7.425773833333349e-06, "loss": 0.6926, "step": 3450 }, { "epoch": 0.36, "grad_norm": 1.7970418489642042, "learning_rate": 7.424301668478626e-06, "loss": 0.7225, "step": 3451 }, { "epoch": 0.36, "grad_norm": 2.0818049027970567, "learning_rate": 7.422829228815491e-06, "loss": 0.6821, "step": 3452 }, { "epoch": 0.36, "grad_norm": 1.9152217689013808, "learning_rate": 7.421356514510853e-06, "loss": 0.674, "step": 3453 }, { "epoch": 0.36, "grad_norm": 1.948628354944815, "learning_rate": 7.419883525731653e-06, "loss": 0.6712, "step": 3454 }, { "epoch": 0.36, "grad_norm": 1.7921525560109597, "learning_rate": 7.418410262644862e-06, "loss": 0.7164, "step": 3455 }, { "epoch": 0.36, "grad_norm": 1.8241838649494904, "learning_rate": 7.416936725417483e-06, "loss": 0.6467, "step": 3456 }, { "epoch": 0.36, "grad_norm": 1.9392896030044404, "learning_rate": 7.415462914216551e-06, "loss": 0.647, "step": 3457 }, { "epoch": 0.36, "grad_norm": 1.9242618966552065, "learning_rate": 7.41398882920913e-06, "loss": 0.7629, "step": 3458 }, { "epoch": 0.36, "grad_norm": 1.8239619089818286, "learning_rate": 7.4125144705623155e-06, "loss": 0.5769, "step": 3459 }, { "epoch": 0.36, "grad_norm": 1.9414170395727977, "learning_rate": 7.411039838443234e-06, "loss": 0.7273, "step": 3460 }, { "epoch": 0.36, "grad_norm": 1.796824341854242, "learning_rate": 7.409564933019046e-06, "loss": 0.6165, "step": 3461 }, { "epoch": 0.36, "grad_norm": 1.834416215786427, "learning_rate": 7.408089754456939e-06, "loss": 0.5974, "step": 3462 }, { "epoch": 0.36, "grad_norm": 1.8172898388225946, "learning_rate": 7.406614302924131e-06, "loss": 0.5895, "step": 3463 }, { "epoch": 0.36, "grad_norm": 1.9176599567813366, "learning_rate": 7.405138578587876e-06, "loss": 0.6056, "step": 3464 }, { "epoch": 0.36, "grad_norm": 1.892048274582568, "learning_rate": 7.403662581615454e-06, "loss": 0.7066, "step": 3465 }, { "epoch": 0.36, "grad_norm": 1.8893025379528445, "learning_rate": 7.402186312174177e-06, "loss": 0.6479, "step": 3466 }, { "epoch": 0.36, "grad_norm": 1.8654581009570594, "learning_rate": 7.4007097704313894e-06, "loss": 0.5893, "step": 3467 }, { "epoch": 0.36, "grad_norm": 1.8272464271313358, "learning_rate": 7.399232956554468e-06, "loss": 0.7083, "step": 3468 }, { "epoch": 0.36, "grad_norm": 1.810292265366264, "learning_rate": 7.397755870710813e-06, "loss": 0.6151, "step": 3469 }, { "epoch": 0.36, "grad_norm": 1.8509332314834515, "learning_rate": 7.396278513067865e-06, "loss": 0.616, "step": 3470 }, { "epoch": 0.36, "grad_norm": 1.6915564968768657, "learning_rate": 7.394800883793087e-06, "loss": 0.6099, "step": 3471 }, { "epoch": 0.36, "grad_norm": 1.735592555743763, "learning_rate": 7.393322983053982e-06, "loss": 0.5545, "step": 3472 }, { "epoch": 0.36, "grad_norm": 2.2703853679664783, "learning_rate": 7.391844811018074e-06, "loss": 0.6984, "step": 3473 }, { "epoch": 0.36, "grad_norm": 2.0112032870742116, "learning_rate": 7.390366367852923e-06, "loss": 0.6332, "step": 3474 }, { "epoch": 0.36, "grad_norm": 1.8616175779663644, "learning_rate": 7.38888765372612e-06, "loss": 0.5883, "step": 3475 }, { "epoch": 0.36, "grad_norm": 1.712242784645238, "learning_rate": 7.387408668805285e-06, "loss": 0.7109, "step": 3476 }, { "epoch": 0.36, "grad_norm": 1.6442545413702605, "learning_rate": 7.38592941325807e-06, "loss": 0.6177, "step": 3477 }, { "epoch": 0.36, "grad_norm": 1.9352886207798174, "learning_rate": 7.384449887252156e-06, "loss": 0.7331, "step": 3478 }, { "epoch": 0.36, "grad_norm": 1.918089359140376, "learning_rate": 7.382970090955258e-06, "loss": 0.607, "step": 3479 }, { "epoch": 0.36, "grad_norm": 1.653803914611329, "learning_rate": 7.381490024535117e-06, "loss": 0.5688, "step": 3480 }, { "epoch": 0.36, "grad_norm": 1.8646016700584298, "learning_rate": 7.380009688159507e-06, "loss": 0.7111, "step": 3481 }, { "epoch": 0.36, "grad_norm": 1.9421559203462602, "learning_rate": 7.378529081996233e-06, "loss": 0.5779, "step": 3482 }, { "epoch": 0.36, "grad_norm": 1.7791816060422943, "learning_rate": 7.377048206213132e-06, "loss": 0.5849, "step": 3483 }, { "epoch": 0.36, "grad_norm": 1.9856777896158313, "learning_rate": 7.375567060978067e-06, "loss": 0.6638, "step": 3484 }, { "epoch": 0.36, "grad_norm": 1.6344717716116572, "learning_rate": 7.374085646458935e-06, "loss": 0.6223, "step": 3485 }, { "epoch": 0.36, "grad_norm": 1.7495251385174526, "learning_rate": 7.372603962823664e-06, "loss": 0.7742, "step": 3486 }, { "epoch": 0.36, "grad_norm": 2.020767726437643, "learning_rate": 7.3711220102402105e-06, "loss": 0.5837, "step": 3487 }, { "epoch": 0.36, "grad_norm": 2.0961729815094214, "learning_rate": 7.369639788876561e-06, "loss": 0.6203, "step": 3488 }, { "epoch": 0.36, "grad_norm": 1.9022781368389061, "learning_rate": 7.3681572989007365e-06, "loss": 0.7023, "step": 3489 }, { "epoch": 0.36, "grad_norm": 2.0866961157120456, "learning_rate": 7.366674540480784e-06, "loss": 0.7635, "step": 3490 }, { "epoch": 0.36, "grad_norm": 2.507173647122414, "learning_rate": 7.365191513784782e-06, "loss": 0.5723, "step": 3491 }, { "epoch": 0.36, "grad_norm": 2.0748742922266414, "learning_rate": 7.363708218980841e-06, "loss": 0.7909, "step": 3492 }, { "epoch": 0.36, "grad_norm": 1.8333987770622944, "learning_rate": 7.3622246562371e-06, "loss": 0.6826, "step": 3493 }, { "epoch": 0.36, "grad_norm": 1.70355206649613, "learning_rate": 7.360740825721732e-06, "loss": 0.5902, "step": 3494 }, { "epoch": 0.36, "grad_norm": 1.9576190398806632, "learning_rate": 7.3592567276029336e-06, "loss": 0.6535, "step": 3495 }, { "epoch": 0.36, "grad_norm": 1.8723793599589569, "learning_rate": 7.357772362048939e-06, "loss": 0.6424, "step": 3496 }, { "epoch": 0.36, "grad_norm": 1.8059004571656276, "learning_rate": 7.356287729228007e-06, "loss": 0.6448, "step": 3497 }, { "epoch": 0.36, "grad_norm": 1.7440998752846177, "learning_rate": 7.354802829308432e-06, "loss": 0.68, "step": 3498 }, { "epoch": 0.36, "grad_norm": 1.9870844742740057, "learning_rate": 7.353317662458532e-06, "loss": 0.7523, "step": 3499 }, { "epoch": 0.36, "grad_norm": 1.6846891263344534, "learning_rate": 7.351832228846664e-06, "loss": 0.5949, "step": 3500 }, { "epoch": 0.36, "grad_norm": 2.146763984783295, "learning_rate": 7.3503465286412064e-06, "loss": 0.732, "step": 3501 }, { "epoch": 0.36, "grad_norm": 1.9106542801719677, "learning_rate": 7.348860562010574e-06, "loss": 0.571, "step": 3502 }, { "epoch": 0.36, "grad_norm": 1.7867908684838822, "learning_rate": 7.347374329123209e-06, "loss": 0.6187, "step": 3503 }, { "epoch": 0.36, "grad_norm": 1.8933431553615583, "learning_rate": 7.345887830147583e-06, "loss": 0.5439, "step": 3504 }, { "epoch": 0.36, "grad_norm": 2.1452897179479513, "learning_rate": 7.344401065252203e-06, "loss": 0.6209, "step": 3505 }, { "epoch": 0.36, "grad_norm": 1.8240488714231755, "learning_rate": 7.3429140346055975e-06, "loss": 0.6676, "step": 3506 }, { "epoch": 0.36, "grad_norm": 2.1529547922669914, "learning_rate": 7.341426738376332e-06, "loss": 0.5676, "step": 3507 }, { "epoch": 0.36, "grad_norm": 2.0283041565520574, "learning_rate": 7.339939176733e-06, "loss": 0.7162, "step": 3508 }, { "epoch": 0.36, "grad_norm": 2.1571072349133633, "learning_rate": 7.338451349844225e-06, "loss": 0.8388, "step": 3509 }, { "epoch": 0.36, "grad_norm": 1.8719672067212767, "learning_rate": 7.336963257878662e-06, "loss": 0.7124, "step": 3510 }, { "epoch": 0.36, "grad_norm": 1.877417040669118, "learning_rate": 7.335474901004992e-06, "loss": 0.7178, "step": 3511 }, { "epoch": 0.37, "grad_norm": 1.8934927176612808, "learning_rate": 7.33398627939193e-06, "loss": 0.5567, "step": 3512 }, { "epoch": 0.37, "grad_norm": 1.8796050148787842, "learning_rate": 7.332497393208221e-06, "loss": 0.6233, "step": 3513 }, { "epoch": 0.37, "grad_norm": 1.83490148528102, "learning_rate": 7.331008242622637e-06, "loss": 0.6343, "step": 3514 }, { "epoch": 0.37, "grad_norm": 1.8143441207248123, "learning_rate": 7.329518827803983e-06, "loss": 0.6808, "step": 3515 }, { "epoch": 0.37, "grad_norm": 2.1087748223352754, "learning_rate": 7.328029148921093e-06, "loss": 0.5901, "step": 3516 }, { "epoch": 0.37, "grad_norm": 1.7491095171764681, "learning_rate": 7.326539206142829e-06, "loss": 0.566, "step": 3517 }, { "epoch": 0.37, "grad_norm": 1.8990983602988085, "learning_rate": 7.325048999638083e-06, "loss": 0.647, "step": 3518 }, { "epoch": 0.37, "grad_norm": 1.7817325075047465, "learning_rate": 7.323558529575783e-06, "loss": 0.6294, "step": 3519 }, { "epoch": 0.37, "grad_norm": 1.822167484873732, "learning_rate": 7.32206779612488e-06, "loss": 0.599, "step": 3520 }, { "epoch": 0.37, "grad_norm": 1.910179885511139, "learning_rate": 7.320576799454355e-06, "loss": 0.6137, "step": 3521 }, { "epoch": 0.37, "grad_norm": 1.9373630796040067, "learning_rate": 7.319085539733225e-06, "loss": 0.5921, "step": 3522 }, { "epoch": 0.37, "grad_norm": 1.8558805706601034, "learning_rate": 7.317594017130529e-06, "loss": 0.6571, "step": 3523 }, { "epoch": 0.37, "grad_norm": 1.7509386776872393, "learning_rate": 7.316102231815343e-06, "loss": 0.65, "step": 3524 }, { "epoch": 0.37, "grad_norm": 1.87626594632814, "learning_rate": 7.3146101839567665e-06, "loss": 0.6148, "step": 3525 }, { "epoch": 0.37, "grad_norm": 1.9780604464642324, "learning_rate": 7.313117873723933e-06, "loss": 0.5868, "step": 3526 }, { "epoch": 0.37, "grad_norm": 1.7614477042809984, "learning_rate": 7.311625301286005e-06, "loss": 0.6357, "step": 3527 }, { "epoch": 0.37, "grad_norm": 1.8327860152596689, "learning_rate": 7.310132466812172e-06, "loss": 0.5737, "step": 3528 }, { "epoch": 0.37, "grad_norm": 1.8844138267432626, "learning_rate": 7.308639370471658e-06, "loss": 0.6322, "step": 3529 }, { "epoch": 0.37, "grad_norm": 1.8674359027203937, "learning_rate": 7.30714601243371e-06, "loss": 0.5701, "step": 3530 }, { "epoch": 0.37, "grad_norm": 1.8123042050551081, "learning_rate": 7.3056523928676145e-06, "loss": 0.6558, "step": 3531 }, { "epoch": 0.37, "grad_norm": 1.7883251931061648, "learning_rate": 7.304158511942676e-06, "loss": 0.6277, "step": 3532 }, { "epoch": 0.37, "grad_norm": 2.0045012224678205, "learning_rate": 7.302664369828238e-06, "loss": 0.7663, "step": 3533 }, { "epoch": 0.37, "grad_norm": 1.9524543531778042, "learning_rate": 7.3011699666936685e-06, "loss": 0.7106, "step": 3534 }, { "epoch": 0.37, "grad_norm": 1.9413034212258677, "learning_rate": 7.299675302708368e-06, "loss": 0.6501, "step": 3535 }, { "epoch": 0.37, "grad_norm": 1.6892597011874646, "learning_rate": 7.298180378041763e-06, "loss": 0.5715, "step": 3536 }, { "epoch": 0.37, "grad_norm": 1.9208949618622175, "learning_rate": 7.296685192863313e-06, "loss": 0.6828, "step": 3537 }, { "epoch": 0.37, "grad_norm": 2.1550436825626966, "learning_rate": 7.295189747342507e-06, "loss": 0.6368, "step": 3538 }, { "epoch": 0.37, "grad_norm": 1.6240302138516085, "learning_rate": 7.29369404164886e-06, "loss": 0.5729, "step": 3539 }, { "epoch": 0.37, "grad_norm": 1.9753475779628318, "learning_rate": 7.2921980759519195e-06, "loss": 0.6921, "step": 3540 }, { "epoch": 0.37, "grad_norm": 1.871059765624354, "learning_rate": 7.290701850421263e-06, "loss": 0.7101, "step": 3541 }, { "epoch": 0.37, "grad_norm": 1.8488824199834615, "learning_rate": 7.289205365226495e-06, "loss": 0.7121, "step": 3542 }, { "epoch": 0.37, "grad_norm": 1.979776721083964, "learning_rate": 7.28770862053725e-06, "loss": 0.6629, "step": 3543 }, { "epoch": 0.37, "grad_norm": 1.7690159529643779, "learning_rate": 7.286211616523193e-06, "loss": 0.5671, "step": 3544 }, { "epoch": 0.37, "grad_norm": 2.0002953479570516, "learning_rate": 7.28471435335402e-06, "loss": 0.5838, "step": 3545 }, { "epoch": 0.37, "grad_norm": 1.8849684513390108, "learning_rate": 7.2832168311994514e-06, "loss": 0.6947, "step": 3546 }, { "epoch": 0.37, "grad_norm": 1.8271816423627594, "learning_rate": 7.281719050229241e-06, "loss": 0.6798, "step": 3547 }, { "epoch": 0.37, "grad_norm": 1.9154738481542926, "learning_rate": 7.280221010613171e-06, "loss": 0.5558, "step": 3548 }, { "epoch": 0.37, "grad_norm": 1.7236896270573931, "learning_rate": 7.278722712521054e-06, "loss": 0.6129, "step": 3549 }, { "epoch": 0.37, "grad_norm": 1.858416081191063, "learning_rate": 7.277224156122728e-06, "loss": 0.7925, "step": 3550 }, { "epoch": 0.37, "grad_norm": 1.9349250790205446, "learning_rate": 7.275725341588064e-06, "loss": 0.7287, "step": 3551 }, { "epoch": 0.37, "grad_norm": 2.0965449424230016, "learning_rate": 7.2742262690869615e-06, "loss": 0.7357, "step": 3552 }, { "epoch": 0.37, "grad_norm": 1.9572073961053111, "learning_rate": 7.272726938789348e-06, "loss": 0.6857, "step": 3553 }, { "epoch": 0.37, "grad_norm": 1.6355516242540316, "learning_rate": 7.2712273508651834e-06, "loss": 0.578, "step": 3554 }, { "epoch": 0.37, "grad_norm": 1.8240038253760955, "learning_rate": 7.269727505484452e-06, "loss": 0.7063, "step": 3555 }, { "epoch": 0.37, "grad_norm": 1.9522137436656415, "learning_rate": 7.268227402817171e-06, "loss": 0.6384, "step": 3556 }, { "epoch": 0.37, "grad_norm": 1.8803868241469999, "learning_rate": 7.266727043033386e-06, "loss": 0.6292, "step": 3557 }, { "epoch": 0.37, "grad_norm": 1.7416415007506427, "learning_rate": 7.26522642630317e-06, "loss": 0.5993, "step": 3558 }, { "epoch": 0.37, "grad_norm": 1.7285779931370737, "learning_rate": 7.263725552796628e-06, "loss": 0.6292, "step": 3559 }, { "epoch": 0.37, "grad_norm": 1.8865673304095096, "learning_rate": 7.262224422683891e-06, "loss": 0.6817, "step": 3560 }, { "epoch": 0.37, "grad_norm": 1.8541330421002968, "learning_rate": 7.260723036135122e-06, "loss": 0.5544, "step": 3561 }, { "epoch": 0.37, "grad_norm": 1.5749650639128956, "learning_rate": 7.259221393320511e-06, "loss": 0.637, "step": 3562 }, { "epoch": 0.37, "grad_norm": 1.821273194460373, "learning_rate": 7.257719494410278e-06, "loss": 0.5897, "step": 3563 }, { "epoch": 0.37, "grad_norm": 1.8149563401389326, "learning_rate": 7.2562173395746725e-06, "loss": 0.6168, "step": 3564 }, { "epoch": 0.37, "grad_norm": 2.0211465063160863, "learning_rate": 7.25471492898397e-06, "loss": 0.6834, "step": 3565 }, { "epoch": 0.37, "grad_norm": 1.6758100886152418, "learning_rate": 7.25321226280848e-06, "loss": 0.5116, "step": 3566 }, { "epoch": 0.37, "grad_norm": 1.9332635703531122, "learning_rate": 7.251709341218536e-06, "loss": 0.7101, "step": 3567 }, { "epoch": 0.37, "grad_norm": 1.9021202447821917, "learning_rate": 7.250206164384506e-06, "loss": 0.7104, "step": 3568 }, { "epoch": 0.37, "grad_norm": 2.1677945215060594, "learning_rate": 7.24870273247678e-06, "loss": 0.7328, "step": 3569 }, { "epoch": 0.37, "grad_norm": 1.8317538366672943, "learning_rate": 7.247199045665781e-06, "loss": 0.6999, "step": 3570 }, { "epoch": 0.37, "grad_norm": 1.9648854126394588, "learning_rate": 7.245695104121963e-06, "loss": 0.6013, "step": 3571 }, { "epoch": 0.37, "grad_norm": 1.7893874702530328, "learning_rate": 7.244190908015805e-06, "loss": 0.6356, "step": 3572 }, { "epoch": 0.37, "grad_norm": 2.4006419796596727, "learning_rate": 7.242686457517815e-06, "loss": 0.7903, "step": 3573 }, { "epoch": 0.37, "grad_norm": 1.8682533212867887, "learning_rate": 7.241181752798534e-06, "loss": 0.7252, "step": 3574 }, { "epoch": 0.37, "grad_norm": 1.968275479562997, "learning_rate": 7.239676794028526e-06, "loss": 0.667, "step": 3575 }, { "epoch": 0.37, "grad_norm": 1.5473586071098482, "learning_rate": 7.238171581378388e-06, "loss": 0.5816, "step": 3576 }, { "epoch": 0.37, "grad_norm": 1.773785035800144, "learning_rate": 7.236666115018744e-06, "loss": 0.6212, "step": 3577 }, { "epoch": 0.37, "grad_norm": 1.8332500658652024, "learning_rate": 7.235160395120247e-06, "loss": 0.6962, "step": 3578 }, { "epoch": 0.37, "grad_norm": 1.5568737353280147, "learning_rate": 7.2336544218535776e-06, "loss": 0.559, "step": 3579 }, { "epoch": 0.37, "grad_norm": 1.8913861420333955, "learning_rate": 7.23214819538945e-06, "loss": 0.553, "step": 3580 }, { "epoch": 0.37, "grad_norm": 1.9256262438864884, "learning_rate": 7.230641715898602e-06, "loss": 0.7225, "step": 3581 }, { "epoch": 0.37, "grad_norm": 1.9428757885250234, "learning_rate": 7.2291349835518e-06, "loss": 0.6232, "step": 3582 }, { "epoch": 0.37, "grad_norm": 1.847661038967175, "learning_rate": 7.227627998519843e-06, "loss": 0.6888, "step": 3583 }, { "epoch": 0.37, "grad_norm": 1.7968971064453072, "learning_rate": 7.226120760973554e-06, "loss": 0.6808, "step": 3584 }, { "epoch": 0.37, "grad_norm": 1.8316340872317762, "learning_rate": 7.224613271083789e-06, "loss": 0.6496, "step": 3585 }, { "epoch": 0.37, "grad_norm": 1.7751193674739747, "learning_rate": 7.22310552902143e-06, "loss": 0.6898, "step": 3586 }, { "epoch": 0.37, "grad_norm": 2.0056699940699625, "learning_rate": 7.221597534957389e-06, "loss": 0.6359, "step": 3587 }, { "epoch": 0.37, "grad_norm": 2.2114079963540183, "learning_rate": 7.220089289062603e-06, "loss": 0.7437, "step": 3588 }, { "epoch": 0.37, "grad_norm": 1.8708371960322898, "learning_rate": 7.218580791508043e-06, "loss": 0.6704, "step": 3589 }, { "epoch": 0.37, "grad_norm": 1.8610230872176512, "learning_rate": 7.217072042464706e-06, "loss": 0.5883, "step": 3590 }, { "epoch": 0.37, "grad_norm": 1.7328612845093743, "learning_rate": 7.215563042103614e-06, "loss": 0.6694, "step": 3591 }, { "epoch": 0.37, "grad_norm": 1.7591041086632069, "learning_rate": 7.214053790595823e-06, "loss": 0.7389, "step": 3592 }, { "epoch": 0.37, "grad_norm": 1.9763016947888363, "learning_rate": 7.212544288112415e-06, "loss": 0.7703, "step": 3593 }, { "epoch": 0.37, "grad_norm": 1.739904384320081, "learning_rate": 7.211034534824503e-06, "loss": 0.6461, "step": 3594 }, { "epoch": 0.37, "grad_norm": 1.725500951731821, "learning_rate": 7.209524530903223e-06, "loss": 0.6184, "step": 3595 }, { "epoch": 0.37, "grad_norm": 1.9718658533362245, "learning_rate": 7.208014276519741e-06, "loss": 0.6249, "step": 3596 }, { "epoch": 0.37, "grad_norm": 1.7782536173137125, "learning_rate": 7.206503771845259e-06, "loss": 0.7426, "step": 3597 }, { "epoch": 0.37, "grad_norm": 2.0669465358505135, "learning_rate": 7.2049930170509965e-06, "loss": 0.6984, "step": 3598 }, { "epoch": 0.37, "grad_norm": 1.9149032831444073, "learning_rate": 7.2034820123082075e-06, "loss": 0.5994, "step": 3599 }, { "epoch": 0.37, "grad_norm": 1.8824750319453147, "learning_rate": 7.201970757788172e-06, "loss": 0.5991, "step": 3600 }, { "epoch": 0.37, "grad_norm": 1.897325064986783, "learning_rate": 7.200459253662202e-06, "loss": 0.6076, "step": 3601 }, { "epoch": 0.37, "grad_norm": 1.9589204398556992, "learning_rate": 7.198947500101632e-06, "loss": 0.6531, "step": 3602 }, { "epoch": 0.37, "grad_norm": 1.6587850379948488, "learning_rate": 7.19743549727783e-06, "loss": 0.6824, "step": 3603 }, { "epoch": 0.37, "grad_norm": 1.9014993866573917, "learning_rate": 7.195923245362188e-06, "loss": 0.6682, "step": 3604 }, { "epoch": 0.37, "grad_norm": 1.872178215470094, "learning_rate": 7.194410744526132e-06, "loss": 0.5496, "step": 3605 }, { "epoch": 0.37, "grad_norm": 1.971682951834879, "learning_rate": 7.192897994941111e-06, "loss": 0.6729, "step": 3606 }, { "epoch": 0.37, "grad_norm": 2.097197443870445, "learning_rate": 7.191384996778601e-06, "loss": 0.7267, "step": 3607 }, { "epoch": 0.38, "grad_norm": 2.104883778259818, "learning_rate": 7.189871750210111e-06, "loss": 0.5755, "step": 3608 }, { "epoch": 0.38, "grad_norm": 2.038992765805888, "learning_rate": 7.1883582554071776e-06, "loss": 0.596, "step": 3609 }, { "epoch": 0.38, "grad_norm": 1.8255786200273927, "learning_rate": 7.1868445125413625e-06, "loss": 0.5536, "step": 3610 }, { "epoch": 0.38, "grad_norm": 1.649347481136232, "learning_rate": 7.1853305217842565e-06, "loss": 0.6403, "step": 3611 }, { "epoch": 0.38, "grad_norm": 1.7709514913740092, "learning_rate": 7.183816283307481e-06, "loss": 0.6644, "step": 3612 }, { "epoch": 0.38, "grad_norm": 1.8054415691274552, "learning_rate": 7.1823017972826815e-06, "loss": 0.5759, "step": 3613 }, { "epoch": 0.38, "grad_norm": 1.908794948479207, "learning_rate": 7.180787063881534e-06, "loss": 0.7358, "step": 3614 }, { "epoch": 0.38, "grad_norm": 1.8067734916298452, "learning_rate": 7.179272083275744e-06, "loss": 0.6164, "step": 3615 }, { "epoch": 0.38, "grad_norm": 2.1927195824585604, "learning_rate": 7.177756855637042e-06, "loss": 0.7013, "step": 3616 }, { "epoch": 0.38, "grad_norm": 1.853731488800181, "learning_rate": 7.1762413811371855e-06, "loss": 0.6514, "step": 3617 }, { "epoch": 0.38, "grad_norm": 1.6905411710563716, "learning_rate": 7.174725659947966e-06, "loss": 0.5478, "step": 3618 }, { "epoch": 0.38, "grad_norm": 1.8778815442471106, "learning_rate": 7.173209692241199e-06, "loss": 0.6467, "step": 3619 }, { "epoch": 0.38, "grad_norm": 2.066809489639149, "learning_rate": 7.171693478188724e-06, "loss": 0.7792, "step": 3620 }, { "epoch": 0.38, "grad_norm": 2.182844071235007, "learning_rate": 7.170177017962415e-06, "loss": 0.6966, "step": 3621 }, { "epoch": 0.38, "grad_norm": 1.9173244135871332, "learning_rate": 7.168660311734173e-06, "loss": 0.6599, "step": 3622 }, { "epoch": 0.38, "grad_norm": 1.8071165425130356, "learning_rate": 7.167143359675924e-06, "loss": 0.5384, "step": 3623 }, { "epoch": 0.38, "grad_norm": 1.818643370303639, "learning_rate": 7.1656261619596205e-06, "loss": 0.5815, "step": 3624 }, { "epoch": 0.38, "grad_norm": 1.9981940575117898, "learning_rate": 7.1641087187572485e-06, "loss": 0.6087, "step": 3625 }, { "epoch": 0.38, "grad_norm": 1.7155423758343529, "learning_rate": 7.16259103024082e-06, "loss": 0.5151, "step": 3626 }, { "epoch": 0.38, "grad_norm": 1.8508271654042858, "learning_rate": 7.161073096582371e-06, "loss": 0.6903, "step": 3627 }, { "epoch": 0.38, "grad_norm": 1.9064304886131862, "learning_rate": 7.159554917953968e-06, "loss": 0.6077, "step": 3628 }, { "epoch": 0.38, "grad_norm": 1.9992795993108614, "learning_rate": 7.158036494527707e-06, "loss": 0.8044, "step": 3629 }, { "epoch": 0.38, "grad_norm": 1.8289307895653182, "learning_rate": 7.156517826475708e-06, "loss": 0.6744, "step": 3630 }, { "epoch": 0.38, "grad_norm": 1.9640255147580412, "learning_rate": 7.154998913970124e-06, "loss": 0.6093, "step": 3631 }, { "epoch": 0.38, "grad_norm": 1.8704100255026406, "learning_rate": 7.153479757183127e-06, "loss": 0.5818, "step": 3632 }, { "epoch": 0.38, "grad_norm": 1.8530421196673053, "learning_rate": 7.1519603562869265e-06, "loss": 0.5494, "step": 3633 }, { "epoch": 0.38, "grad_norm": 1.859849961294324, "learning_rate": 7.150440711453754e-06, "loss": 0.6875, "step": 3634 }, { "epoch": 0.38, "grad_norm": 1.9767165128139512, "learning_rate": 7.148920822855869e-06, "loss": 0.6612, "step": 3635 }, { "epoch": 0.38, "grad_norm": 2.0058716132606684, "learning_rate": 7.1474006906655605e-06, "loss": 0.7287, "step": 3636 }, { "epoch": 0.38, "grad_norm": 1.7254558229061596, "learning_rate": 7.145880315055145e-06, "loss": 0.6355, "step": 3637 }, { "epoch": 0.38, "grad_norm": 1.5572686981014636, "learning_rate": 7.144359696196964e-06, "loss": 0.6584, "step": 3638 }, { "epoch": 0.38, "grad_norm": 1.9052815448096254, "learning_rate": 7.142838834263388e-06, "loss": 0.6688, "step": 3639 }, { "epoch": 0.38, "grad_norm": 1.6860169875157103, "learning_rate": 7.141317729426817e-06, "loss": 0.6196, "step": 3640 }, { "epoch": 0.38, "grad_norm": 1.7721664305732985, "learning_rate": 7.139796381859676e-06, "loss": 0.6633, "step": 3641 }, { "epoch": 0.38, "grad_norm": 1.7826626559988565, "learning_rate": 7.138274791734421e-06, "loss": 0.6367, "step": 3642 }, { "epoch": 0.38, "grad_norm": 1.990311955470244, "learning_rate": 7.136752959223527e-06, "loss": 0.6497, "step": 3643 }, { "epoch": 0.38, "grad_norm": 2.074898158189017, "learning_rate": 7.1352308844995086e-06, "loss": 0.6088, "step": 3644 }, { "epoch": 0.38, "grad_norm": 1.9277929482740352, "learning_rate": 7.133708567734898e-06, "loss": 0.5938, "step": 3645 }, { "epoch": 0.38, "grad_norm": 1.7516961666712483, "learning_rate": 7.13218600910226e-06, "loss": 0.7127, "step": 3646 }, { "epoch": 0.38, "grad_norm": 1.889977062266395, "learning_rate": 7.1306632087741844e-06, "loss": 0.6515, "step": 3647 }, { "epoch": 0.38, "grad_norm": 2.0117401818077862, "learning_rate": 7.12914016692329e-06, "loss": 0.708, "step": 3648 }, { "epoch": 0.38, "grad_norm": 1.8988679344144204, "learning_rate": 7.1276168837222215e-06, "loss": 0.707, "step": 3649 }, { "epoch": 0.38, "grad_norm": 1.992469048910512, "learning_rate": 7.1260933593436535e-06, "loss": 0.6132, "step": 3650 }, { "epoch": 0.38, "grad_norm": 1.8079967436056992, "learning_rate": 7.1245695939602834e-06, "loss": 0.6561, "step": 3651 }, { "epoch": 0.38, "grad_norm": 1.6363093178305284, "learning_rate": 7.12304558774484e-06, "loss": 0.5939, "step": 3652 }, { "epoch": 0.38, "grad_norm": 1.7910381053908828, "learning_rate": 7.121521340870079e-06, "loss": 0.673, "step": 3653 }, { "epoch": 0.38, "grad_norm": 1.9706296825523928, "learning_rate": 7.119996853508781e-06, "loss": 0.6262, "step": 3654 }, { "epoch": 0.38, "grad_norm": 1.869840494562361, "learning_rate": 7.1184721258337575e-06, "loss": 0.6058, "step": 3655 }, { "epoch": 0.38, "grad_norm": 1.926379615057285, "learning_rate": 7.116947158017842e-06, "loss": 0.7076, "step": 3656 }, { "epoch": 0.38, "grad_norm": 1.7795167202195803, "learning_rate": 7.115421950233902e-06, "loss": 0.6051, "step": 3657 }, { "epoch": 0.38, "grad_norm": 1.9578043275804096, "learning_rate": 7.113896502654824e-06, "loss": 0.605, "step": 3658 }, { "epoch": 0.38, "grad_norm": 1.7369262214424697, "learning_rate": 7.112370815453531e-06, "loss": 0.5664, "step": 3659 }, { "epoch": 0.38, "grad_norm": 2.0109937094073906, "learning_rate": 7.110844888802966e-06, "loss": 0.6973, "step": 3660 }, { "epoch": 0.38, "grad_norm": 1.9212899018541152, "learning_rate": 7.1093187228760995e-06, "loss": 0.7831, "step": 3661 }, { "epoch": 0.38, "grad_norm": 1.9675443765140932, "learning_rate": 7.107792317845934e-06, "loss": 0.7564, "step": 3662 }, { "epoch": 0.38, "grad_norm": 2.5186964557046534, "learning_rate": 7.106265673885494e-06, "loss": 0.8091, "step": 3663 }, { "epoch": 0.38, "grad_norm": 2.172007104728858, "learning_rate": 7.104738791167837e-06, "loss": 0.6913, "step": 3664 }, { "epoch": 0.38, "grad_norm": 1.767126716740684, "learning_rate": 7.103211669866039e-06, "loss": 0.6973, "step": 3665 }, { "epoch": 0.38, "grad_norm": 1.7754637022542625, "learning_rate": 7.1016843101532115e-06, "loss": 0.6218, "step": 3666 }, { "epoch": 0.38, "grad_norm": 1.7831012329808487, "learning_rate": 7.100156712202488e-06, "loss": 0.685, "step": 3667 }, { "epoch": 0.38, "grad_norm": 1.8985315383249024, "learning_rate": 7.098628876187031e-06, "loss": 0.6087, "step": 3668 }, { "epoch": 0.38, "grad_norm": 1.9257692672338116, "learning_rate": 7.0971008022800295e-06, "loss": 0.6588, "step": 3669 }, { "epoch": 0.38, "grad_norm": 2.048096408415564, "learning_rate": 7.095572490654698e-06, "loss": 0.7406, "step": 3670 }, { "epoch": 0.38, "grad_norm": 1.80679778795583, "learning_rate": 7.094043941484282e-06, "loss": 0.6449, "step": 3671 }, { "epoch": 0.38, "grad_norm": 1.924580745209938, "learning_rate": 7.092515154942048e-06, "loss": 0.7301, "step": 3672 }, { "epoch": 0.38, "grad_norm": 1.7817683854261057, "learning_rate": 7.090986131201294e-06, "loss": 0.6471, "step": 3673 }, { "epoch": 0.38, "grad_norm": 1.7928879478359547, "learning_rate": 7.089456870435344e-06, "loss": 0.6514, "step": 3674 }, { "epoch": 0.38, "grad_norm": 2.0696609601855616, "learning_rate": 7.087927372817549e-06, "loss": 0.6171, "step": 3675 }, { "epoch": 0.38, "grad_norm": 1.6884810491961613, "learning_rate": 7.086397638521285e-06, "loss": 0.6483, "step": 3676 }, { "epoch": 0.38, "grad_norm": 1.7331759057428129, "learning_rate": 7.084867667719957e-06, "loss": 0.5589, "step": 3677 }, { "epoch": 0.38, "grad_norm": 1.9600799820017218, "learning_rate": 7.083337460586995e-06, "loss": 0.7712, "step": 3678 }, { "epoch": 0.38, "grad_norm": 1.9698872325167316, "learning_rate": 7.0818070172958585e-06, "loss": 0.6482, "step": 3679 }, { "epoch": 0.38, "grad_norm": 1.772990633413805, "learning_rate": 7.080276338020029e-06, "loss": 0.6664, "step": 3680 }, { "epoch": 0.38, "grad_norm": 1.9925905028374946, "learning_rate": 7.07874542293302e-06, "loss": 0.7052, "step": 3681 }, { "epoch": 0.38, "grad_norm": 1.9775724860452595, "learning_rate": 7.077214272208369e-06, "loss": 0.6453, "step": 3682 }, { "epoch": 0.38, "grad_norm": 2.101909580042845, "learning_rate": 7.07568288601964e-06, "loss": 0.7348, "step": 3683 }, { "epoch": 0.38, "grad_norm": 1.7865633356521249, "learning_rate": 7.074151264540425e-06, "loss": 0.5937, "step": 3684 }, { "epoch": 0.38, "grad_norm": 1.912028189475162, "learning_rate": 7.072619407944343e-06, "loss": 0.6418, "step": 3685 }, { "epoch": 0.38, "grad_norm": 1.7555446608686078, "learning_rate": 7.071087316405037e-06, "loss": 0.7161, "step": 3686 }, { "epoch": 0.38, "grad_norm": 1.7423318666131733, "learning_rate": 7.069554990096178e-06, "loss": 0.6073, "step": 3687 }, { "epoch": 0.38, "grad_norm": 1.727743961687725, "learning_rate": 7.068022429191465e-06, "loss": 0.6346, "step": 3688 }, { "epoch": 0.38, "grad_norm": 2.020998879626979, "learning_rate": 7.066489633864624e-06, "loss": 0.6432, "step": 3689 }, { "epoch": 0.38, "grad_norm": 1.7904324410819448, "learning_rate": 7.064956604289402e-06, "loss": 0.5901, "step": 3690 }, { "epoch": 0.38, "grad_norm": 1.6681103595815747, "learning_rate": 7.0634233406395806e-06, "loss": 0.6236, "step": 3691 }, { "epoch": 0.38, "grad_norm": 2.019485959225672, "learning_rate": 7.061889843088961e-06, "loss": 0.6031, "step": 3692 }, { "epoch": 0.38, "grad_norm": 1.6974985157799625, "learning_rate": 7.060356111811376e-06, "loss": 0.5825, "step": 3693 }, { "epoch": 0.38, "grad_norm": 2.007966294692052, "learning_rate": 7.058822146980684e-06, "loss": 0.6565, "step": 3694 }, { "epoch": 0.38, "grad_norm": 1.8677225337817733, "learning_rate": 7.0572879487707645e-06, "loss": 0.6306, "step": 3695 }, { "epoch": 0.38, "grad_norm": 1.9268857672131279, "learning_rate": 7.05575351735553e-06, "loss": 0.6039, "step": 3696 }, { "epoch": 0.38, "grad_norm": 1.8004826415621484, "learning_rate": 7.054218852908918e-06, "loss": 0.7229, "step": 3697 }, { "epoch": 0.38, "grad_norm": 1.86814066122037, "learning_rate": 7.05268395560489e-06, "loss": 0.6712, "step": 3698 }, { "epoch": 0.38, "grad_norm": 1.8861582510455044, "learning_rate": 7.051148825617435e-06, "loss": 0.6199, "step": 3699 }, { "epoch": 0.38, "grad_norm": 2.0271375461938907, "learning_rate": 7.0496134631205705e-06, "loss": 0.6532, "step": 3700 }, { "epoch": 0.38, "grad_norm": 1.7322689461414547, "learning_rate": 7.048077868288338e-06, "loss": 0.6175, "step": 3701 }, { "epoch": 0.38, "grad_norm": 1.7427810559006416, "learning_rate": 7.046542041294804e-06, "loss": 0.624, "step": 3702 }, { "epoch": 0.38, "grad_norm": 1.814216437196377, "learning_rate": 7.045005982314065e-06, "loss": 0.5836, "step": 3703 }, { "epoch": 0.39, "grad_norm": 2.008142687023798, "learning_rate": 7.0434696915202415e-06, "loss": 0.7704, "step": 3704 }, { "epoch": 0.39, "grad_norm": 1.7855230650520368, "learning_rate": 7.041933169087482e-06, "loss": 0.6452, "step": 3705 }, { "epoch": 0.39, "grad_norm": 1.985668972035989, "learning_rate": 7.040396415189959e-06, "loss": 0.7503, "step": 3706 }, { "epoch": 0.39, "grad_norm": 1.8844199217844504, "learning_rate": 7.038859430001872e-06, "loss": 0.6682, "step": 3707 }, { "epoch": 0.39, "grad_norm": 1.6502541814904381, "learning_rate": 7.037322213697448e-06, "loss": 0.5502, "step": 3708 }, { "epoch": 0.39, "grad_norm": 2.323083971904915, "learning_rate": 7.035784766450938e-06, "loss": 0.5438, "step": 3709 }, { "epoch": 0.39, "grad_norm": 1.9612136420018857, "learning_rate": 7.034247088436621e-06, "loss": 0.7139, "step": 3710 }, { "epoch": 0.39, "grad_norm": 1.8172381656336853, "learning_rate": 7.032709179828803e-06, "loss": 0.6152, "step": 3711 }, { "epoch": 0.39, "grad_norm": 1.8903375116755927, "learning_rate": 7.031171040801813e-06, "loss": 0.6615, "step": 3712 }, { "epoch": 0.39, "grad_norm": 2.0513450644693183, "learning_rate": 7.029632671530008e-06, "loss": 0.7287, "step": 3713 }, { "epoch": 0.39, "grad_norm": 2.364069746083148, "learning_rate": 7.02809407218777e-06, "loss": 0.7201, "step": 3714 }, { "epoch": 0.39, "grad_norm": 1.789762597807389, "learning_rate": 7.026555242949511e-06, "loss": 0.6513, "step": 3715 }, { "epoch": 0.39, "grad_norm": 1.7415445101122051, "learning_rate": 7.0250161839896636e-06, "loss": 0.5658, "step": 3716 }, { "epoch": 0.39, "grad_norm": 1.9838077381226866, "learning_rate": 7.02347689548269e-06, "loss": 0.643, "step": 3717 }, { "epoch": 0.39, "grad_norm": 2.0244134936944933, "learning_rate": 7.021937377603076e-06, "loss": 0.7017, "step": 3718 }, { "epoch": 0.39, "grad_norm": 1.9745168936105955, "learning_rate": 7.020397630525336e-06, "loss": 0.6745, "step": 3719 }, { "epoch": 0.39, "grad_norm": 1.7289255604430116, "learning_rate": 7.018857654424008e-06, "loss": 0.6488, "step": 3720 }, { "epoch": 0.39, "grad_norm": 1.9155400352019034, "learning_rate": 7.017317449473658e-06, "loss": 0.6324, "step": 3721 }, { "epoch": 0.39, "grad_norm": 1.5674009208190292, "learning_rate": 7.015777015848877e-06, "loss": 0.6442, "step": 3722 }, { "epoch": 0.39, "grad_norm": 1.876596679522037, "learning_rate": 7.0142363537242815e-06, "loss": 0.6725, "step": 3723 }, { "epoch": 0.39, "grad_norm": 1.8965716533361012, "learning_rate": 7.012695463274515e-06, "loss": 0.6449, "step": 3724 }, { "epoch": 0.39, "grad_norm": 1.8872516694937251, "learning_rate": 7.0111543446742444e-06, "loss": 0.6431, "step": 3725 }, { "epoch": 0.39, "grad_norm": 2.1515953153188776, "learning_rate": 7.0096129980981674e-06, "loss": 0.6978, "step": 3726 }, { "epoch": 0.39, "grad_norm": 2.0270766691639523, "learning_rate": 7.008071423721004e-06, "loss": 0.6044, "step": 3727 }, { "epoch": 0.39, "grad_norm": 2.0184644859313225, "learning_rate": 7.006529621717496e-06, "loss": 0.696, "step": 3728 }, { "epoch": 0.39, "grad_norm": 2.0626010069202447, "learning_rate": 7.00498759226242e-06, "loss": 0.6551, "step": 3729 }, { "epoch": 0.39, "grad_norm": 2.016147799155721, "learning_rate": 7.003445335530572e-06, "loss": 0.641, "step": 3730 }, { "epoch": 0.39, "grad_norm": 1.8731480687364606, "learning_rate": 7.001902851696775e-06, "loss": 0.5508, "step": 3731 }, { "epoch": 0.39, "grad_norm": 1.7525210729280494, "learning_rate": 7.000360140935881e-06, "loss": 0.5715, "step": 3732 }, { "epoch": 0.39, "grad_norm": 1.7791094074416964, "learning_rate": 6.998817203422763e-06, "loss": 0.6188, "step": 3733 }, { "epoch": 0.39, "grad_norm": 1.7107178217967776, "learning_rate": 6.997274039332323e-06, "loss": 0.6057, "step": 3734 }, { "epoch": 0.39, "grad_norm": 2.1689931360984835, "learning_rate": 6.995730648839485e-06, "loss": 0.6081, "step": 3735 }, { "epoch": 0.39, "grad_norm": 1.9944082514802253, "learning_rate": 6.9941870321192015e-06, "loss": 0.6827, "step": 3736 }, { "epoch": 0.39, "grad_norm": 1.9902720881288807, "learning_rate": 6.992643189346453e-06, "loss": 0.6835, "step": 3737 }, { "epoch": 0.39, "grad_norm": 1.9676878976709407, "learning_rate": 6.991099120696243e-06, "loss": 0.6268, "step": 3738 }, { "epoch": 0.39, "grad_norm": 1.9265199475254238, "learning_rate": 6.989554826343597e-06, "loss": 0.6599, "step": 3739 }, { "epoch": 0.39, "grad_norm": 1.6498327098254237, "learning_rate": 6.988010306463571e-06, "loss": 0.5858, "step": 3740 }, { "epoch": 0.39, "grad_norm": 1.8263087431164569, "learning_rate": 6.986465561231246e-06, "loss": 0.5895, "step": 3741 }, { "epoch": 0.39, "grad_norm": 1.9555452689076078, "learning_rate": 6.984920590821726e-06, "loss": 0.6573, "step": 3742 }, { "epoch": 0.39, "grad_norm": 2.0437770842120933, "learning_rate": 6.983375395410146e-06, "loss": 0.6526, "step": 3743 }, { "epoch": 0.39, "grad_norm": 1.869433885945104, "learning_rate": 6.981829975171658e-06, "loss": 0.6379, "step": 3744 }, { "epoch": 0.39, "grad_norm": 1.8994878040677412, "learning_rate": 6.9802843302814475e-06, "loss": 0.673, "step": 3745 }, { "epoch": 0.39, "grad_norm": 1.8642847780082168, "learning_rate": 6.97873846091472e-06, "loss": 0.6918, "step": 3746 }, { "epoch": 0.39, "grad_norm": 1.940150418676658, "learning_rate": 6.977192367246709e-06, "loss": 0.6778, "step": 3747 }, { "epoch": 0.39, "grad_norm": 1.952659328164777, "learning_rate": 6.975646049452673e-06, "loss": 0.6567, "step": 3748 }, { "epoch": 0.39, "grad_norm": 1.897858212581815, "learning_rate": 6.9740995077079e-06, "loss": 0.6589, "step": 3749 }, { "epoch": 0.39, "grad_norm": 1.9798414395768777, "learning_rate": 6.972552742187693e-06, "loss": 0.6554, "step": 3750 }, { "epoch": 0.39, "grad_norm": 1.947474102381345, "learning_rate": 6.971005753067391e-06, "loss": 0.6939, "step": 3751 }, { "epoch": 0.39, "grad_norm": 1.8593020322548108, "learning_rate": 6.96945854052235e-06, "loss": 0.6908, "step": 3752 }, { "epoch": 0.39, "grad_norm": 1.8790691411742364, "learning_rate": 6.96791110472796e-06, "loss": 0.6918, "step": 3753 }, { "epoch": 0.39, "grad_norm": 2.009560441823809, "learning_rate": 6.966363445859629e-06, "loss": 0.701, "step": 3754 }, { "epoch": 0.39, "grad_norm": 1.7247879670505806, "learning_rate": 6.964815564092792e-06, "loss": 0.5531, "step": 3755 }, { "epoch": 0.39, "grad_norm": 1.9237024626907298, "learning_rate": 6.9632674596029135e-06, "loss": 0.7353, "step": 3756 }, { "epoch": 0.39, "grad_norm": 1.8423072923305135, "learning_rate": 6.9617191325654785e-06, "loss": 0.6292, "step": 3757 }, { "epoch": 0.39, "grad_norm": 2.095217792173662, "learning_rate": 6.9601705831559985e-06, "loss": 0.7027, "step": 3758 }, { "epoch": 0.39, "grad_norm": 1.9172392464043688, "learning_rate": 6.95862181155001e-06, "loss": 0.5968, "step": 3759 }, { "epoch": 0.39, "grad_norm": 1.7885688923040335, "learning_rate": 6.957072817923074e-06, "loss": 0.6571, "step": 3760 }, { "epoch": 0.39, "grad_norm": 2.0189100338935813, "learning_rate": 6.95552360245078e-06, "loss": 0.6102, "step": 3761 }, { "epoch": 0.39, "grad_norm": 2.176085342804834, "learning_rate": 6.95397416530874e-06, "loss": 0.7103, "step": 3762 }, { "epoch": 0.39, "grad_norm": 1.8281330590058102, "learning_rate": 6.95242450667259e-06, "loss": 0.5678, "step": 3763 }, { "epoch": 0.39, "grad_norm": 1.780282107248268, "learning_rate": 6.950874626717996e-06, "loss": 0.5708, "step": 3764 }, { "epoch": 0.39, "grad_norm": 1.696269074894569, "learning_rate": 6.949324525620642e-06, "loss": 0.627, "step": 3765 }, { "epoch": 0.39, "grad_norm": 2.028234243684498, "learning_rate": 6.947774203556241e-06, "loss": 0.6769, "step": 3766 }, { "epoch": 0.39, "grad_norm": 1.8100157337051184, "learning_rate": 6.946223660700535e-06, "loss": 0.6096, "step": 3767 }, { "epoch": 0.39, "grad_norm": 2.0304897261390935, "learning_rate": 6.944672897229282e-06, "loss": 0.7408, "step": 3768 }, { "epoch": 0.39, "grad_norm": 1.8186687992645447, "learning_rate": 6.943121913318272e-06, "loss": 0.6727, "step": 3769 }, { "epoch": 0.39, "grad_norm": 2.014696798801752, "learning_rate": 6.941570709143317e-06, "loss": 0.6679, "step": 3770 }, { "epoch": 0.39, "grad_norm": 1.9573005847526657, "learning_rate": 6.9400192848802575e-06, "loss": 0.7203, "step": 3771 }, { "epoch": 0.39, "grad_norm": 1.870931516660422, "learning_rate": 6.938467640704953e-06, "loss": 0.6407, "step": 3772 }, { "epoch": 0.39, "grad_norm": 1.924348926934089, "learning_rate": 6.936915776793293e-06, "loss": 0.7739, "step": 3773 }, { "epoch": 0.39, "grad_norm": 1.8219644545452585, "learning_rate": 6.935363693321189e-06, "loss": 0.7424, "step": 3774 }, { "epoch": 0.39, "grad_norm": 1.8909820062964133, "learning_rate": 6.93381139046458e-06, "loss": 0.5874, "step": 3775 }, { "epoch": 0.39, "grad_norm": 1.851095708702634, "learning_rate": 6.932258868399426e-06, "loss": 0.5811, "step": 3776 }, { "epoch": 0.39, "grad_norm": 1.9626956710437433, "learning_rate": 6.930706127301718e-06, "loss": 0.6269, "step": 3777 }, { "epoch": 0.39, "grad_norm": 1.8588515617089818, "learning_rate": 6.9291531673474645e-06, "loss": 0.6689, "step": 3778 }, { "epoch": 0.39, "grad_norm": 2.0129296892502437, "learning_rate": 6.9275999887127045e-06, "loss": 0.692, "step": 3779 }, { "epoch": 0.39, "grad_norm": 1.967573552691157, "learning_rate": 6.926046591573498e-06, "loss": 0.6498, "step": 3780 }, { "epoch": 0.39, "grad_norm": 1.9177145711520847, "learning_rate": 6.924492976105932e-06, "loss": 0.6621, "step": 3781 }, { "epoch": 0.39, "grad_norm": 1.8662266389846618, "learning_rate": 6.922939142486118e-06, "loss": 0.7561, "step": 3782 }, { "epoch": 0.39, "grad_norm": 2.0519301872983853, "learning_rate": 6.921385090890193e-06, "loss": 0.613, "step": 3783 }, { "epoch": 0.39, "grad_norm": 1.9506463097256144, "learning_rate": 6.919830821494314e-06, "loss": 0.771, "step": 3784 }, { "epoch": 0.39, "grad_norm": 2.035175531222123, "learning_rate": 6.918276334474671e-06, "loss": 0.7823, "step": 3785 }, { "epoch": 0.39, "grad_norm": 1.7749634341037155, "learning_rate": 6.916721630007471e-06, "loss": 0.6171, "step": 3786 }, { "epoch": 0.39, "grad_norm": 2.135332931852134, "learning_rate": 6.91516670826895e-06, "loss": 0.5635, "step": 3787 }, { "epoch": 0.39, "grad_norm": 1.7881778397885084, "learning_rate": 6.913611569435366e-06, "loss": 0.5461, "step": 3788 }, { "epoch": 0.39, "grad_norm": 1.7450638856633924, "learning_rate": 6.912056213683001e-06, "loss": 0.6384, "step": 3789 }, { "epoch": 0.39, "grad_norm": 1.9663931312471843, "learning_rate": 6.9105006411881695e-06, "loss": 0.6423, "step": 3790 }, { "epoch": 0.39, "grad_norm": 2.012129630221285, "learning_rate": 6.9089448521271995e-06, "loss": 0.6986, "step": 3791 }, { "epoch": 0.39, "grad_norm": 1.933878118917115, "learning_rate": 6.9073888466764495e-06, "loss": 0.6312, "step": 3792 }, { "epoch": 0.39, "grad_norm": 1.8464628698639192, "learning_rate": 6.905832625012301e-06, "loss": 0.6321, "step": 3793 }, { "epoch": 0.39, "grad_norm": 2.062510373103962, "learning_rate": 6.904276187311163e-06, "loss": 0.6971, "step": 3794 }, { "epoch": 0.39, "grad_norm": 1.93374853425991, "learning_rate": 6.9027195337494645e-06, "loss": 0.5843, "step": 3795 }, { "epoch": 0.39, "grad_norm": 2.1647078071719457, "learning_rate": 6.901162664503662e-06, "loss": 0.5992, "step": 3796 }, { "epoch": 0.39, "grad_norm": 1.9599458350519412, "learning_rate": 6.899605579750236e-06, "loss": 0.6586, "step": 3797 }, { "epoch": 0.39, "grad_norm": 2.2163360975899313, "learning_rate": 6.898048279665689e-06, "loss": 0.6925, "step": 3798 }, { "epoch": 0.39, "grad_norm": 1.7824221315080275, "learning_rate": 6.896490764426551e-06, "loss": 0.6436, "step": 3799 }, { "epoch": 0.4, "grad_norm": 1.9842651710271497, "learning_rate": 6.8949330342093756e-06, "loss": 0.7, "step": 3800 }, { "epoch": 0.4, "grad_norm": 1.8505223212020494, "learning_rate": 6.893375089190741e-06, "loss": 0.6553, "step": 3801 }, { "epoch": 0.4, "grad_norm": 1.8419752520577877, "learning_rate": 6.891816929547247e-06, "loss": 0.5906, "step": 3802 }, { "epoch": 0.4, "grad_norm": 1.6868313861269602, "learning_rate": 6.890258555455521e-06, "loss": 0.6291, "step": 3803 }, { "epoch": 0.4, "grad_norm": 1.906131986603101, "learning_rate": 6.888699967092215e-06, "loss": 0.6008, "step": 3804 }, { "epoch": 0.4, "grad_norm": 1.756561257424716, "learning_rate": 6.887141164634001e-06, "loss": 0.5851, "step": 3805 }, { "epoch": 0.4, "grad_norm": 1.8379054874503786, "learning_rate": 6.885582148257579e-06, "loss": 0.6615, "step": 3806 }, { "epoch": 0.4, "grad_norm": 1.8589992016721577, "learning_rate": 6.884022918139675e-06, "loss": 0.5656, "step": 3807 }, { "epoch": 0.4, "grad_norm": 1.7750855346733305, "learning_rate": 6.882463474457034e-06, "loss": 0.684, "step": 3808 }, { "epoch": 0.4, "grad_norm": 1.7331464972792372, "learning_rate": 6.8809038173864285e-06, "loss": 0.5875, "step": 3809 }, { "epoch": 0.4, "grad_norm": 1.8145592971627584, "learning_rate": 6.879343947104653e-06, "loss": 0.6518, "step": 3810 }, { "epoch": 0.4, "grad_norm": 1.7186609313479941, "learning_rate": 6.877783863788531e-06, "loss": 0.6398, "step": 3811 }, { "epoch": 0.4, "grad_norm": 2.0593901439717217, "learning_rate": 6.876223567614904e-06, "loss": 0.6199, "step": 3812 }, { "epoch": 0.4, "grad_norm": 2.051921874821278, "learning_rate": 6.874663058760642e-06, "loss": 0.6652, "step": 3813 }, { "epoch": 0.4, "grad_norm": 1.983268125178121, "learning_rate": 6.873102337402637e-06, "loss": 0.6742, "step": 3814 }, { "epoch": 0.4, "grad_norm": 1.9250957085532272, "learning_rate": 6.871541403717808e-06, "loss": 0.5963, "step": 3815 }, { "epoch": 0.4, "grad_norm": 1.8923565653388212, "learning_rate": 6.86998025788309e-06, "loss": 0.6559, "step": 3816 }, { "epoch": 0.4, "grad_norm": 1.7717289543577657, "learning_rate": 6.868418900075452e-06, "loss": 0.6317, "step": 3817 }, { "epoch": 0.4, "grad_norm": 1.8393903443302437, "learning_rate": 6.866857330471882e-06, "loss": 0.6645, "step": 3818 }, { "epoch": 0.4, "grad_norm": 1.907056188925765, "learning_rate": 6.8652955492493944e-06, "loss": 0.6674, "step": 3819 }, { "epoch": 0.4, "grad_norm": 1.7792952155871635, "learning_rate": 6.863733556585023e-06, "loss": 0.6211, "step": 3820 }, { "epoch": 0.4, "grad_norm": 2.0070176175353485, "learning_rate": 6.862171352655831e-06, "loss": 0.5849, "step": 3821 }, { "epoch": 0.4, "grad_norm": 1.927108955804964, "learning_rate": 6.8606089376389006e-06, "loss": 0.6479, "step": 3822 }, { "epoch": 0.4, "grad_norm": 1.8876654284510865, "learning_rate": 6.859046311711344e-06, "loss": 0.6571, "step": 3823 }, { "epoch": 0.4, "grad_norm": 1.9645805494724002, "learning_rate": 6.85748347505029e-06, "loss": 0.6443, "step": 3824 }, { "epoch": 0.4, "grad_norm": 2.033078807453262, "learning_rate": 6.855920427832898e-06, "loss": 0.7693, "step": 3825 }, { "epoch": 0.4, "grad_norm": 1.741023709894303, "learning_rate": 6.854357170236346e-06, "loss": 0.5633, "step": 3826 }, { "epoch": 0.4, "grad_norm": 1.8327831597190622, "learning_rate": 6.85279370243784e-06, "loss": 0.6629, "step": 3827 }, { "epoch": 0.4, "grad_norm": 2.138936128449846, "learning_rate": 6.851230024614608e-06, "loss": 0.6216, "step": 3828 }, { "epoch": 0.4, "grad_norm": 1.9998323937030877, "learning_rate": 6.8496661369439e-06, "loss": 0.6371, "step": 3829 }, { "epoch": 0.4, "grad_norm": 1.956674866863184, "learning_rate": 6.848102039602993e-06, "loss": 0.6175, "step": 3830 }, { "epoch": 0.4, "grad_norm": 2.1315466856909713, "learning_rate": 6.846537732769185e-06, "loss": 0.7057, "step": 3831 }, { "epoch": 0.4, "grad_norm": 1.8023054657266382, "learning_rate": 6.844973216619801e-06, "loss": 0.7285, "step": 3832 }, { "epoch": 0.4, "grad_norm": 2.331237965000702, "learning_rate": 6.843408491332186e-06, "loss": 0.6631, "step": 3833 }, { "epoch": 0.4, "grad_norm": 1.8747243292576776, "learning_rate": 6.841843557083714e-06, "loss": 0.623, "step": 3834 }, { "epoch": 0.4, "grad_norm": 2.081104098621447, "learning_rate": 6.840278414051774e-06, "loss": 0.6736, "step": 3835 }, { "epoch": 0.4, "grad_norm": 2.027824141424956, "learning_rate": 6.838713062413788e-06, "loss": 0.7356, "step": 3836 }, { "epoch": 0.4, "grad_norm": 1.8448960739370412, "learning_rate": 6.8371475023471945e-06, "loss": 0.689, "step": 3837 }, { "epoch": 0.4, "grad_norm": 1.9094274178849335, "learning_rate": 6.835581734029462e-06, "loss": 0.64, "step": 3838 }, { "epoch": 0.4, "grad_norm": 1.8412279763358383, "learning_rate": 6.834015757638076e-06, "loss": 0.6305, "step": 3839 }, { "epoch": 0.4, "grad_norm": 1.8963150898529104, "learning_rate": 6.8324495733505515e-06, "loss": 0.5916, "step": 3840 }, { "epoch": 0.4, "grad_norm": 2.006814141934046, "learning_rate": 6.830883181344423e-06, "loss": 0.6003, "step": 3841 }, { "epoch": 0.4, "grad_norm": 1.761995258515656, "learning_rate": 6.829316581797249e-06, "loss": 0.5449, "step": 3842 }, { "epoch": 0.4, "grad_norm": 1.8751428173287201, "learning_rate": 6.827749774886616e-06, "loss": 0.6368, "step": 3843 }, { "epoch": 0.4, "grad_norm": 1.8238649095624873, "learning_rate": 6.826182760790127e-06, "loss": 0.5485, "step": 3844 }, { "epoch": 0.4, "grad_norm": 1.8050371465050767, "learning_rate": 6.824615539685413e-06, "loss": 0.6776, "step": 3845 }, { "epoch": 0.4, "grad_norm": 1.8185148092018413, "learning_rate": 6.823048111750128e-06, "loss": 0.5611, "step": 3846 }, { "epoch": 0.4, "grad_norm": 1.9137674036077919, "learning_rate": 6.821480477161948e-06, "loss": 0.7279, "step": 3847 }, { "epoch": 0.4, "grad_norm": 2.278371530895591, "learning_rate": 6.819912636098574e-06, "loss": 0.6727, "step": 3848 }, { "epoch": 0.4, "grad_norm": 1.9336374035262587, "learning_rate": 6.81834458873773e-06, "loss": 0.5828, "step": 3849 }, { "epoch": 0.4, "grad_norm": 1.608424844887298, "learning_rate": 6.816776335257162e-06, "loss": 0.6486, "step": 3850 }, { "epoch": 0.4, "grad_norm": 1.9733951460532413, "learning_rate": 6.815207875834641e-06, "loss": 0.7597, "step": 3851 }, { "epoch": 0.4, "grad_norm": 1.8337771137234302, "learning_rate": 6.8136392106479624e-06, "loss": 0.6421, "step": 3852 }, { "epoch": 0.4, "grad_norm": 1.88536398864315, "learning_rate": 6.81207033987494e-06, "loss": 0.5933, "step": 3853 }, { "epoch": 0.4, "grad_norm": 1.8852332420912083, "learning_rate": 6.810501263693416e-06, "loss": 0.6265, "step": 3854 }, { "epoch": 0.4, "grad_norm": 1.6270870662218326, "learning_rate": 6.808931982281255e-06, "loss": 0.5371, "step": 3855 }, { "epoch": 0.4, "grad_norm": 1.9198351870779027, "learning_rate": 6.807362495816344e-06, "loss": 0.6244, "step": 3856 }, { "epoch": 0.4, "grad_norm": 1.9626894008745388, "learning_rate": 6.805792804476592e-06, "loss": 0.6162, "step": 3857 }, { "epoch": 0.4, "grad_norm": 1.847790506596514, "learning_rate": 6.8042229084399325e-06, "loss": 0.656, "step": 3858 }, { "epoch": 0.4, "grad_norm": 1.9491524882255473, "learning_rate": 6.802652807884322e-06, "loss": 0.6687, "step": 3859 }, { "epoch": 0.4, "grad_norm": 1.8982335002271724, "learning_rate": 6.801082502987742e-06, "loss": 0.6663, "step": 3860 }, { "epoch": 0.4, "grad_norm": 2.0630552583208925, "learning_rate": 6.799511993928195e-06, "loss": 0.5751, "step": 3861 }, { "epoch": 0.4, "grad_norm": 1.9407115444354701, "learning_rate": 6.797941280883706e-06, "loss": 0.6156, "step": 3862 }, { "epoch": 0.4, "grad_norm": 1.855317202450553, "learning_rate": 6.796370364032324e-06, "loss": 0.6032, "step": 3863 }, { "epoch": 0.4, "grad_norm": 1.8158488358917717, "learning_rate": 6.794799243552123e-06, "loss": 0.5607, "step": 3864 }, { "epoch": 0.4, "grad_norm": 1.7489231484293017, "learning_rate": 6.793227919621197e-06, "loss": 0.7611, "step": 3865 }, { "epoch": 0.4, "grad_norm": 2.0671408277409413, "learning_rate": 6.791656392417666e-06, "loss": 0.7821, "step": 3866 }, { "epoch": 0.4, "grad_norm": 1.6697050142330605, "learning_rate": 6.790084662119671e-06, "loss": 0.6472, "step": 3867 }, { "epoch": 0.4, "grad_norm": 1.9443126027690214, "learning_rate": 6.7885127289053765e-06, "loss": 0.6885, "step": 3868 }, { "epoch": 0.4, "grad_norm": 1.7689567655002967, "learning_rate": 6.78694059295297e-06, "loss": 0.6111, "step": 3869 }, { "epoch": 0.4, "grad_norm": 1.9659917977999166, "learning_rate": 6.785368254440661e-06, "loss": 0.5973, "step": 3870 }, { "epoch": 0.4, "grad_norm": 1.9024907067927386, "learning_rate": 6.783795713546686e-06, "loss": 0.6616, "step": 3871 }, { "epoch": 0.4, "grad_norm": 1.8879653406046633, "learning_rate": 6.782222970449298e-06, "loss": 0.6182, "step": 3872 }, { "epoch": 0.4, "grad_norm": 1.8947196759807639, "learning_rate": 6.780650025326778e-06, "loss": 0.6899, "step": 3873 }, { "epoch": 0.4, "grad_norm": 1.8450879931056883, "learning_rate": 6.779076878357429e-06, "loss": 0.5877, "step": 3874 }, { "epoch": 0.4, "grad_norm": 1.8933404415566144, "learning_rate": 6.777503529719576e-06, "loss": 0.7101, "step": 3875 }, { "epoch": 0.4, "grad_norm": 1.7558373826056832, "learning_rate": 6.775929979591565e-06, "loss": 0.6396, "step": 3876 }, { "epoch": 0.4, "grad_norm": 2.0003562552284433, "learning_rate": 6.774356228151768e-06, "loss": 0.6069, "step": 3877 }, { "epoch": 0.4, "grad_norm": 1.9255259994750547, "learning_rate": 6.772782275578582e-06, "loss": 0.5854, "step": 3878 }, { "epoch": 0.4, "grad_norm": 1.8642977093526387, "learning_rate": 6.771208122050418e-06, "loss": 0.6715, "step": 3879 }, { "epoch": 0.4, "grad_norm": 1.8022100159125873, "learning_rate": 6.769633767745718e-06, "loss": 0.6321, "step": 3880 }, { "epoch": 0.4, "grad_norm": 1.8278453367976983, "learning_rate": 6.768059212842944e-06, "loss": 0.592, "step": 3881 }, { "epoch": 0.4, "grad_norm": 1.8746751891331823, "learning_rate": 6.7664844575205816e-06, "loss": 0.6962, "step": 3882 }, { "epoch": 0.4, "grad_norm": 1.999998865342274, "learning_rate": 6.764909501957136e-06, "loss": 0.665, "step": 3883 }, { "epoch": 0.4, "grad_norm": 1.8866066346814458, "learning_rate": 6.76333434633114e-06, "loss": 0.7158, "step": 3884 }, { "epoch": 0.4, "grad_norm": 1.854320769553393, "learning_rate": 6.761758990821143e-06, "loss": 0.6665, "step": 3885 }, { "epoch": 0.4, "grad_norm": 2.0265802747703274, "learning_rate": 6.760183435605725e-06, "loss": 0.6098, "step": 3886 }, { "epoch": 0.4, "grad_norm": 1.8563571894286828, "learning_rate": 6.758607680863481e-06, "loss": 0.6027, "step": 3887 }, { "epoch": 0.4, "grad_norm": 1.8388299982466285, "learning_rate": 6.757031726773033e-06, "loss": 0.6205, "step": 3888 }, { "epoch": 0.4, "grad_norm": 1.9306457408739781, "learning_rate": 6.755455573513025e-06, "loss": 0.6471, "step": 3889 }, { "epoch": 0.4, "grad_norm": 1.8282397443953233, "learning_rate": 6.75387922126212e-06, "loss": 0.6413, "step": 3890 }, { "epoch": 0.4, "grad_norm": 1.8013519821627202, "learning_rate": 6.752302670199009e-06, "loss": 0.5619, "step": 3891 }, { "epoch": 0.4, "grad_norm": 2.279002945506568, "learning_rate": 6.750725920502402e-06, "loss": 0.7127, "step": 3892 }, { "epoch": 0.4, "grad_norm": 2.0640115448453646, "learning_rate": 6.749148972351034e-06, "loss": 0.6407, "step": 3893 }, { "epoch": 0.4, "grad_norm": 2.0687523081357693, "learning_rate": 6.74757182592366e-06, "loss": 0.6546, "step": 3894 }, { "epoch": 0.4, "grad_norm": 2.116509052754373, "learning_rate": 6.7459944813990585e-06, "loss": 0.737, "step": 3895 }, { "epoch": 0.4, "grad_norm": 2.0452835675623184, "learning_rate": 6.744416938956031e-06, "loss": 0.7485, "step": 3896 }, { "epoch": 0.41, "grad_norm": 1.8635310962131926, "learning_rate": 6.7428391987734e-06, "loss": 0.639, "step": 3897 }, { "epoch": 0.41, "grad_norm": 2.00514277899685, "learning_rate": 6.741261261030013e-06, "loss": 0.7099, "step": 3898 }, { "epoch": 0.41, "grad_norm": 1.7909492085460657, "learning_rate": 6.739683125904737e-06, "loss": 0.7444, "step": 3899 }, { "epoch": 0.41, "grad_norm": 1.9311294006580535, "learning_rate": 6.7381047935764625e-06, "loss": 0.6704, "step": 3900 }, { "epoch": 0.41, "grad_norm": 2.066959770065368, "learning_rate": 6.736526264224101e-06, "loss": 0.6768, "step": 3901 }, { "epoch": 0.41, "grad_norm": 1.7730917255734966, "learning_rate": 6.7349475380265926e-06, "loss": 0.6414, "step": 3902 }, { "epoch": 0.41, "grad_norm": 1.7538215933569536, "learning_rate": 6.7333686151628895e-06, "loss": 0.7574, "step": 3903 }, { "epoch": 0.41, "grad_norm": 1.8843523002908729, "learning_rate": 6.731789495811975e-06, "loss": 0.6446, "step": 3904 }, { "epoch": 0.41, "grad_norm": 1.9711212147816433, "learning_rate": 6.730210180152852e-06, "loss": 0.6436, "step": 3905 }, { "epoch": 0.41, "grad_norm": 1.7667975906547377, "learning_rate": 6.728630668364541e-06, "loss": 0.5743, "step": 3906 }, { "epoch": 0.41, "grad_norm": 1.747514957971742, "learning_rate": 6.7270509606260915e-06, "loss": 0.5884, "step": 3907 }, { "epoch": 0.41, "grad_norm": 1.8617280757448222, "learning_rate": 6.725471057116573e-06, "loss": 0.7538, "step": 3908 }, { "epoch": 0.41, "grad_norm": 2.103545907282549, "learning_rate": 6.7238909580150735e-06, "loss": 0.6881, "step": 3909 }, { "epoch": 0.41, "grad_norm": 1.8513840301905578, "learning_rate": 6.7223106635007085e-06, "loss": 0.671, "step": 3910 }, { "epoch": 0.41, "grad_norm": 2.049540873747753, "learning_rate": 6.720730173752613e-06, "loss": 0.5919, "step": 3911 }, { "epoch": 0.41, "grad_norm": 1.8190429314370908, "learning_rate": 6.719149488949945e-06, "loss": 0.5832, "step": 3912 }, { "epoch": 0.41, "grad_norm": 1.8679025089120667, "learning_rate": 6.717568609271883e-06, "loss": 0.7133, "step": 3913 }, { "epoch": 0.41, "grad_norm": 2.0178764743414006, "learning_rate": 6.715987534897629e-06, "loss": 0.7242, "step": 3914 }, { "epoch": 0.41, "grad_norm": 1.9092744073317511, "learning_rate": 6.714406266006408e-06, "loss": 0.6669, "step": 3915 }, { "epoch": 0.41, "grad_norm": 1.7023005026783422, "learning_rate": 6.712824802777465e-06, "loss": 0.6614, "step": 3916 }, { "epoch": 0.41, "grad_norm": 2.077414840829166, "learning_rate": 6.711243145390066e-06, "loss": 0.7272, "step": 3917 }, { "epoch": 0.41, "grad_norm": 1.5880707527072129, "learning_rate": 6.709661294023504e-06, "loss": 0.5592, "step": 3918 }, { "epoch": 0.41, "grad_norm": 2.0116835193591376, "learning_rate": 6.708079248857091e-06, "loss": 0.7295, "step": 3919 }, { "epoch": 0.41, "grad_norm": 1.9509358664824894, "learning_rate": 6.706497010070157e-06, "loss": 0.6443, "step": 3920 }, { "epoch": 0.41, "grad_norm": 1.9452163843629293, "learning_rate": 6.704914577842062e-06, "loss": 0.6771, "step": 3921 }, { "epoch": 0.41, "grad_norm": 1.7279799757953567, "learning_rate": 6.703331952352181e-06, "loss": 0.6676, "step": 3922 }, { "epoch": 0.41, "grad_norm": 1.9451199834666655, "learning_rate": 6.701749133779916e-06, "loss": 0.5865, "step": 3923 }, { "epoch": 0.41, "grad_norm": 1.813795054616006, "learning_rate": 6.700166122304686e-06, "loss": 0.6689, "step": 3924 }, { "epoch": 0.41, "grad_norm": 1.8965308229743518, "learning_rate": 6.698582918105934e-06, "loss": 0.6256, "step": 3925 }, { "epoch": 0.41, "grad_norm": 1.7884201565417062, "learning_rate": 6.696999521363128e-06, "loss": 0.6363, "step": 3926 }, { "epoch": 0.41, "grad_norm": 1.9635557773440546, "learning_rate": 6.695415932255753e-06, "loss": 0.653, "step": 3927 }, { "epoch": 0.41, "grad_norm": 2.0890326348091453, "learning_rate": 6.69383215096332e-06, "loss": 0.7069, "step": 3928 }, { "epoch": 0.41, "grad_norm": 1.6538146640381228, "learning_rate": 6.692248177665357e-06, "loss": 0.5993, "step": 3929 }, { "epoch": 0.41, "grad_norm": 1.9705437684955363, "learning_rate": 6.690664012541418e-06, "loss": 0.7516, "step": 3930 }, { "epoch": 0.41, "grad_norm": 1.7444055802195977, "learning_rate": 6.689079655771076e-06, "loss": 0.6767, "step": 3931 }, { "epoch": 0.41, "grad_norm": 1.8819144065805602, "learning_rate": 6.687495107533928e-06, "loss": 0.7314, "step": 3932 }, { "epoch": 0.41, "grad_norm": 1.8479615838505195, "learning_rate": 6.685910368009592e-06, "loss": 0.6272, "step": 3933 }, { "epoch": 0.41, "grad_norm": 2.0909081795401017, "learning_rate": 6.684325437377704e-06, "loss": 0.681, "step": 3934 }, { "epoch": 0.41, "grad_norm": 1.9637392727671243, "learning_rate": 6.682740315817929e-06, "loss": 0.6803, "step": 3935 }, { "epoch": 0.41, "grad_norm": 1.7789446449482587, "learning_rate": 6.681155003509949e-06, "loss": 0.6188, "step": 3936 }, { "epoch": 0.41, "grad_norm": 1.8522898997130828, "learning_rate": 6.679569500633466e-06, "loss": 0.6477, "step": 3937 }, { "epoch": 0.41, "grad_norm": 1.5571151829309526, "learning_rate": 6.6779838073682066e-06, "loss": 0.5387, "step": 3938 }, { "epoch": 0.41, "grad_norm": 1.7670313948733032, "learning_rate": 6.676397923893918e-06, "loss": 0.6602, "step": 3939 }, { "epoch": 0.41, "grad_norm": 2.027370857469276, "learning_rate": 6.67481185039037e-06, "loss": 0.6638, "step": 3940 }, { "epoch": 0.41, "grad_norm": 1.9916936077066447, "learning_rate": 6.673225587037354e-06, "loss": 0.6213, "step": 3941 }, { "epoch": 0.41, "grad_norm": 1.9133600244144617, "learning_rate": 6.671639134014679e-06, "loss": 0.5868, "step": 3942 }, { "epoch": 0.41, "grad_norm": 1.8755522182462092, "learning_rate": 6.670052491502182e-06, "loss": 0.6124, "step": 3943 }, { "epoch": 0.41, "grad_norm": 2.3078342997813386, "learning_rate": 6.668465659679714e-06, "loss": 0.7289, "step": 3944 }, { "epoch": 0.41, "grad_norm": 1.9514380326839884, "learning_rate": 6.666878638727154e-06, "loss": 0.6594, "step": 3945 }, { "epoch": 0.41, "grad_norm": 2.133201334464456, "learning_rate": 6.6652914288243996e-06, "loss": 0.5717, "step": 3946 }, { "epoch": 0.41, "grad_norm": 1.9012313927156967, "learning_rate": 6.66370403015137e-06, "loss": 0.5791, "step": 3947 }, { "epoch": 0.41, "grad_norm": 1.9722937856370126, "learning_rate": 6.662116442888007e-06, "loss": 0.6285, "step": 3948 }, { "epoch": 0.41, "grad_norm": 1.903817678915892, "learning_rate": 6.66052866721427e-06, "loss": 0.6406, "step": 3949 }, { "epoch": 0.41, "grad_norm": 1.9979888015586342, "learning_rate": 6.6589407033101435e-06, "loss": 0.7432, "step": 3950 }, { "epoch": 0.41, "grad_norm": 1.6259230128595419, "learning_rate": 6.657352551355634e-06, "loss": 0.6007, "step": 3951 }, { "epoch": 0.41, "grad_norm": 1.9194025746637722, "learning_rate": 6.655764211530767e-06, "loss": 0.7022, "step": 3952 }, { "epoch": 0.41, "grad_norm": 1.735014946320013, "learning_rate": 6.654175684015587e-06, "loss": 0.6331, "step": 3953 }, { "epoch": 0.41, "grad_norm": 1.9823415478074962, "learning_rate": 6.652586968990164e-06, "loss": 0.7171, "step": 3954 }, { "epoch": 0.41, "grad_norm": 1.7583769540998349, "learning_rate": 6.650998066634589e-06, "loss": 0.6572, "step": 3955 }, { "epoch": 0.41, "grad_norm": 1.8600867587835666, "learning_rate": 6.649408977128975e-06, "loss": 0.6734, "step": 3956 }, { "epoch": 0.41, "grad_norm": 1.862698659637831, "learning_rate": 6.64781970065345e-06, "loss": 0.615, "step": 3957 }, { "epoch": 0.41, "grad_norm": 1.9564006592363403, "learning_rate": 6.646230237388172e-06, "loss": 0.6748, "step": 3958 }, { "epoch": 0.41, "grad_norm": 1.8968835571667682, "learning_rate": 6.644640587513313e-06, "loss": 0.6716, "step": 3959 }, { "epoch": 0.41, "grad_norm": 1.869462572063707, "learning_rate": 6.643050751209067e-06, "loss": 0.7545, "step": 3960 }, { "epoch": 0.41, "grad_norm": 1.8778880002152252, "learning_rate": 6.641460728655654e-06, "loss": 0.6778, "step": 3961 }, { "epoch": 0.41, "grad_norm": 1.7399480890625565, "learning_rate": 6.6398705200333125e-06, "loss": 0.5642, "step": 3962 }, { "epoch": 0.41, "grad_norm": 1.5578811285592717, "learning_rate": 6.6382801255223e-06, "loss": 0.4749, "step": 3963 }, { "epoch": 0.41, "grad_norm": 1.6948492903335945, "learning_rate": 6.636689545302898e-06, "loss": 0.6076, "step": 3964 }, { "epoch": 0.41, "grad_norm": 1.5766728927525278, "learning_rate": 6.6350987795554056e-06, "loss": 0.5974, "step": 3965 }, { "epoch": 0.41, "grad_norm": 1.9685332103178712, "learning_rate": 6.633507828460148e-06, "loss": 0.712, "step": 3966 }, { "epoch": 0.41, "grad_norm": 1.748252872817549, "learning_rate": 6.631916692197466e-06, "loss": 0.5779, "step": 3967 }, { "epoch": 0.41, "grad_norm": 2.2011682557023478, "learning_rate": 6.6303253709477276e-06, "loss": 0.756, "step": 3968 }, { "epoch": 0.41, "grad_norm": 1.870775828998848, "learning_rate": 6.628733864891315e-06, "loss": 0.6796, "step": 3969 }, { "epoch": 0.41, "grad_norm": 1.9934558724921358, "learning_rate": 6.627142174208634e-06, "loss": 0.6594, "step": 3970 }, { "epoch": 0.41, "grad_norm": 1.762238827723651, "learning_rate": 6.625550299080115e-06, "loss": 0.673, "step": 3971 }, { "epoch": 0.41, "grad_norm": 1.7996931164327568, "learning_rate": 6.623958239686204e-06, "loss": 0.599, "step": 3972 }, { "epoch": 0.41, "grad_norm": 1.7062603543348756, "learning_rate": 6.622365996207368e-06, "loss": 0.6358, "step": 3973 }, { "epoch": 0.41, "grad_norm": 1.8352990023889209, "learning_rate": 6.620773568824101e-06, "loss": 0.5925, "step": 3974 }, { "epoch": 0.41, "grad_norm": 1.777374114911824, "learning_rate": 6.619180957716913e-06, "loss": 0.5977, "step": 3975 }, { "epoch": 0.41, "grad_norm": 1.8645416702797122, "learning_rate": 6.617588163066333e-06, "loss": 0.6726, "step": 3976 }, { "epoch": 0.41, "grad_norm": 1.986094087122366, "learning_rate": 6.615995185052915e-06, "loss": 0.6859, "step": 3977 }, { "epoch": 0.41, "grad_norm": 1.9671637070485966, "learning_rate": 6.614402023857231e-06, "loss": 0.6191, "step": 3978 }, { "epoch": 0.41, "grad_norm": 1.992735241698695, "learning_rate": 6.612808679659878e-06, "loss": 0.7177, "step": 3979 }, { "epoch": 0.41, "grad_norm": 1.8594009010630315, "learning_rate": 6.611215152641466e-06, "loss": 0.6817, "step": 3980 }, { "epoch": 0.41, "grad_norm": 1.8366393363968638, "learning_rate": 6.609621442982634e-06, "loss": 0.7283, "step": 3981 }, { "epoch": 0.41, "grad_norm": 1.8880200232029714, "learning_rate": 6.608027550864038e-06, "loss": 0.6786, "step": 3982 }, { "epoch": 0.41, "grad_norm": 1.7713852962235712, "learning_rate": 6.606433476466352e-06, "loss": 0.6204, "step": 3983 }, { "epoch": 0.41, "grad_norm": 1.951459700873205, "learning_rate": 6.604839219970276e-06, "loss": 0.5822, "step": 3984 }, { "epoch": 0.41, "grad_norm": 1.941984778501092, "learning_rate": 6.603244781556527e-06, "loss": 0.816, "step": 3985 }, { "epoch": 0.41, "grad_norm": 1.6869770005515161, "learning_rate": 6.601650161405844e-06, "loss": 0.6415, "step": 3986 }, { "epoch": 0.41, "grad_norm": 1.7618809871558578, "learning_rate": 6.600055359698984e-06, "loss": 0.718, "step": 3987 }, { "epoch": 0.41, "grad_norm": 1.8940876608861175, "learning_rate": 6.598460376616731e-06, "loss": 0.695, "step": 3988 }, { "epoch": 0.41, "grad_norm": 1.582975947302647, "learning_rate": 6.596865212339885e-06, "loss": 0.5889, "step": 3989 }, { "epoch": 0.41, "grad_norm": 2.0959689544385873, "learning_rate": 6.595269867049262e-06, "loss": 0.7246, "step": 3990 }, { "epoch": 0.41, "grad_norm": 1.875274315991869, "learning_rate": 6.5936743409257085e-06, "loss": 0.6586, "step": 3991 }, { "epoch": 0.41, "grad_norm": 1.952777633657918, "learning_rate": 6.592078634150084e-06, "loss": 0.6749, "step": 3992 }, { "epoch": 0.42, "grad_norm": 2.0330590435053755, "learning_rate": 6.590482746903273e-06, "loss": 0.694, "step": 3993 }, { "epoch": 0.42, "grad_norm": 2.040171030726033, "learning_rate": 6.588886679366177e-06, "loss": 0.8347, "step": 3994 }, { "epoch": 0.42, "grad_norm": 1.9779967255959818, "learning_rate": 6.587290431719718e-06, "loss": 0.5998, "step": 3995 }, { "epoch": 0.42, "grad_norm": 1.9489545122956449, "learning_rate": 6.585694004144844e-06, "loss": 0.622, "step": 3996 }, { "epoch": 0.42, "grad_norm": 1.7578029357926648, "learning_rate": 6.584097396822514e-06, "loss": 0.6302, "step": 3997 }, { "epoch": 0.42, "grad_norm": 1.851928626062996, "learning_rate": 6.582500609933715e-06, "loss": 0.6829, "step": 3998 }, { "epoch": 0.42, "grad_norm": 1.7174238910726383, "learning_rate": 6.580903643659453e-06, "loss": 0.6014, "step": 3999 }, { "epoch": 0.42, "grad_norm": 1.8121816554070762, "learning_rate": 6.579306498180753e-06, "loss": 0.6708, "step": 4000 }, { "epoch": 0.42, "grad_norm": 1.8179688321936576, "learning_rate": 6.577709173678658e-06, "loss": 0.6709, "step": 4001 }, { "epoch": 0.42, "grad_norm": 2.0852265758278596, "learning_rate": 6.5761116703342365e-06, "loss": 0.7801, "step": 4002 }, { "epoch": 0.42, "grad_norm": 1.725498003504001, "learning_rate": 6.574513988328572e-06, "loss": 0.6907, "step": 4003 }, { "epoch": 0.42, "grad_norm": 1.9877426312997266, "learning_rate": 6.572916127842775e-06, "loss": 0.5943, "step": 4004 }, { "epoch": 0.42, "grad_norm": 1.7800607172380565, "learning_rate": 6.5713180890579675e-06, "loss": 0.5852, "step": 4005 }, { "epoch": 0.42, "grad_norm": 2.0937490532286516, "learning_rate": 6.569719872155299e-06, "loss": 0.7188, "step": 4006 }, { "epoch": 0.42, "grad_norm": 1.7241389489460623, "learning_rate": 6.568121477315936e-06, "loss": 0.4979, "step": 4007 }, { "epoch": 0.42, "grad_norm": 2.007324352220353, "learning_rate": 6.566522904721066e-06, "loss": 0.6385, "step": 4008 }, { "epoch": 0.42, "grad_norm": 2.2119954176168126, "learning_rate": 6.564924154551895e-06, "loss": 0.6259, "step": 4009 }, { "epoch": 0.42, "grad_norm": 2.1210351984877147, "learning_rate": 6.563325226989652e-06, "loss": 0.707, "step": 4010 }, { "epoch": 0.42, "grad_norm": 1.8997902827936242, "learning_rate": 6.561726122215585e-06, "loss": 0.7243, "step": 4011 }, { "epoch": 0.42, "grad_norm": 1.6957497867748266, "learning_rate": 6.560126840410958e-06, "loss": 0.6136, "step": 4012 }, { "epoch": 0.42, "grad_norm": 1.7441208168504188, "learning_rate": 6.558527381757063e-06, "loss": 0.6152, "step": 4013 }, { "epoch": 0.42, "grad_norm": 1.790485685409849, "learning_rate": 6.556927746435204e-06, "loss": 0.6593, "step": 4014 }, { "epoch": 0.42, "grad_norm": 2.0974652382452303, "learning_rate": 6.555327934626714e-06, "loss": 0.6737, "step": 4015 }, { "epoch": 0.42, "grad_norm": 1.8269551144468505, "learning_rate": 6.553727946512935e-06, "loss": 0.6143, "step": 4016 }, { "epoch": 0.42, "grad_norm": 1.7153314087472655, "learning_rate": 6.552127782275238e-06, "loss": 0.6397, "step": 4017 }, { "epoch": 0.42, "grad_norm": 2.090028098890312, "learning_rate": 6.55052744209501e-06, "loss": 0.6303, "step": 4018 }, { "epoch": 0.42, "grad_norm": 2.009734384358688, "learning_rate": 6.548926926153659e-06, "loss": 0.6369, "step": 4019 }, { "epoch": 0.42, "grad_norm": 1.770723520702836, "learning_rate": 6.5473262346326125e-06, "loss": 0.5863, "step": 4020 }, { "epoch": 0.42, "grad_norm": 2.0075361710627084, "learning_rate": 6.545725367713317e-06, "loss": 0.5489, "step": 4021 }, { "epoch": 0.42, "grad_norm": 1.7245698235105078, "learning_rate": 6.5441243255772426e-06, "loss": 0.6108, "step": 4022 }, { "epoch": 0.42, "grad_norm": 1.8035379353837957, "learning_rate": 6.542523108405873e-06, "loss": 0.8067, "step": 4023 }, { "epoch": 0.42, "grad_norm": 1.8715746504239854, "learning_rate": 6.540921716380717e-06, "loss": 0.6967, "step": 4024 }, { "epoch": 0.42, "grad_norm": 2.096205344372492, "learning_rate": 6.539320149683302e-06, "loss": 0.645, "step": 4025 }, { "epoch": 0.42, "grad_norm": 1.7646255765648868, "learning_rate": 6.537718408495174e-06, "loss": 0.7169, "step": 4026 }, { "epoch": 0.42, "grad_norm": 1.8536865089842989, "learning_rate": 6.536116492997899e-06, "loss": 0.5697, "step": 4027 }, { "epoch": 0.42, "grad_norm": 2.0152237179046915, "learning_rate": 6.534514403373064e-06, "loss": 0.6279, "step": 4028 }, { "epoch": 0.42, "grad_norm": 1.8954231608810324, "learning_rate": 6.5329121398022756e-06, "loss": 0.665, "step": 4029 }, { "epoch": 0.42, "grad_norm": 1.8970903222431375, "learning_rate": 6.531309702467159e-06, "loss": 0.7078, "step": 4030 }, { "epoch": 0.42, "grad_norm": 2.135112232684426, "learning_rate": 6.52970709154936e-06, "loss": 0.7513, "step": 4031 }, { "epoch": 0.42, "grad_norm": 1.9549109974607481, "learning_rate": 6.528104307230542e-06, "loss": 0.6653, "step": 4032 }, { "epoch": 0.42, "grad_norm": 1.7139953691811927, "learning_rate": 6.526501349692392e-06, "loss": 0.591, "step": 4033 }, { "epoch": 0.42, "grad_norm": 1.6627286877312124, "learning_rate": 6.524898219116612e-06, "loss": 0.6157, "step": 4034 }, { "epoch": 0.42, "grad_norm": 2.0560769425457464, "learning_rate": 6.523294915684928e-06, "loss": 0.634, "step": 4035 }, { "epoch": 0.42, "grad_norm": 1.8113394568340626, "learning_rate": 6.5216914395790806e-06, "loss": 0.6815, "step": 4036 }, { "epoch": 0.42, "grad_norm": 1.9270712852849294, "learning_rate": 6.520087790980838e-06, "loss": 0.6843, "step": 4037 }, { "epoch": 0.42, "grad_norm": 1.9527943178109233, "learning_rate": 6.51848397007198e-06, "loss": 0.709, "step": 4038 }, { "epoch": 0.42, "grad_norm": 2.023544759218534, "learning_rate": 6.516879977034307e-06, "loss": 0.7007, "step": 4039 }, { "epoch": 0.42, "grad_norm": 2.0956614909800018, "learning_rate": 6.515275812049644e-06, "loss": 0.6164, "step": 4040 }, { "epoch": 0.42, "grad_norm": 2.0323171139807004, "learning_rate": 6.51367147529983e-06, "loss": 0.706, "step": 4041 }, { "epoch": 0.42, "grad_norm": 1.8683385638918015, "learning_rate": 6.512066966966728e-06, "loss": 0.6089, "step": 4042 }, { "epoch": 0.42, "grad_norm": 2.02288354013809, "learning_rate": 6.510462287232216e-06, "loss": 0.7069, "step": 4043 }, { "epoch": 0.42, "grad_norm": 1.8013988532696286, "learning_rate": 6.508857436278195e-06, "loss": 0.6228, "step": 4044 }, { "epoch": 0.42, "grad_norm": 2.0158224191382668, "learning_rate": 6.5072524142865825e-06, "loss": 0.7294, "step": 4045 }, { "epoch": 0.42, "grad_norm": 1.6978751958811078, "learning_rate": 6.505647221439317e-06, "loss": 0.527, "step": 4046 }, { "epoch": 0.42, "grad_norm": 1.7626559524080982, "learning_rate": 6.504041857918359e-06, "loss": 0.5494, "step": 4047 }, { "epoch": 0.42, "grad_norm": 2.052676030295689, "learning_rate": 6.502436323905683e-06, "loss": 0.7799, "step": 4048 }, { "epoch": 0.42, "grad_norm": 1.9461214027379186, "learning_rate": 6.500830619583286e-06, "loss": 0.6377, "step": 4049 }, { "epoch": 0.42, "grad_norm": 1.8652319862322915, "learning_rate": 6.499224745133184e-06, "loss": 0.6572, "step": 4050 }, { "epoch": 0.42, "grad_norm": 2.077328808264135, "learning_rate": 6.4976187007374116e-06, "loss": 0.7387, "step": 4051 }, { "epoch": 0.42, "grad_norm": 1.8648108669301284, "learning_rate": 6.496012486578024e-06, "loss": 0.6635, "step": 4052 }, { "epoch": 0.42, "grad_norm": 1.535449841127006, "learning_rate": 6.494406102837093e-06, "loss": 0.573, "step": 4053 }, { "epoch": 0.42, "grad_norm": 1.7169141635665444, "learning_rate": 6.492799549696712e-06, "loss": 0.5881, "step": 4054 }, { "epoch": 0.42, "grad_norm": 1.8801509275305481, "learning_rate": 6.4911928273389946e-06, "loss": 0.7089, "step": 4055 }, { "epoch": 0.42, "grad_norm": 1.6912175530324325, "learning_rate": 6.4895859359460714e-06, "loss": 0.6018, "step": 4056 }, { "epoch": 0.42, "grad_norm": 1.8383688047454536, "learning_rate": 6.487978875700091e-06, "loss": 0.6198, "step": 4057 }, { "epoch": 0.42, "grad_norm": 1.8638435936380096, "learning_rate": 6.486371646783223e-06, "loss": 0.6198, "step": 4058 }, { "epoch": 0.42, "grad_norm": 1.7715201966443364, "learning_rate": 6.4847642493776585e-06, "loss": 0.6964, "step": 4059 }, { "epoch": 0.42, "grad_norm": 1.7922800486547616, "learning_rate": 6.4831566836656024e-06, "loss": 0.5816, "step": 4060 }, { "epoch": 0.42, "grad_norm": 1.7727987030469636, "learning_rate": 6.481548949829282e-06, "loss": 0.6143, "step": 4061 }, { "epoch": 0.42, "grad_norm": 2.024752836474955, "learning_rate": 6.479941048050944e-06, "loss": 0.6124, "step": 4062 }, { "epoch": 0.42, "grad_norm": 1.8250783973226659, "learning_rate": 6.478332978512853e-06, "loss": 0.6522, "step": 4063 }, { "epoch": 0.42, "grad_norm": 1.9493846686603185, "learning_rate": 6.476724741397293e-06, "loss": 0.6507, "step": 4064 }, { "epoch": 0.42, "grad_norm": 1.755309105876831, "learning_rate": 6.4751163368865665e-06, "loss": 0.6361, "step": 4065 }, { "epoch": 0.42, "grad_norm": 1.810159349170945, "learning_rate": 6.473507765162994e-06, "loss": 0.6304, "step": 4066 }, { "epoch": 0.42, "grad_norm": 1.8676885311500284, "learning_rate": 6.47189902640892e-06, "loss": 0.7234, "step": 4067 }, { "epoch": 0.42, "grad_norm": 1.6933054623850488, "learning_rate": 6.4702901208067e-06, "loss": 0.6732, "step": 4068 }, { "epoch": 0.42, "grad_norm": 1.7455580715939196, "learning_rate": 6.468681048538715e-06, "loss": 0.6288, "step": 4069 }, { "epoch": 0.42, "grad_norm": 1.8551247654424352, "learning_rate": 6.467071809787363e-06, "loss": 0.6904, "step": 4070 }, { "epoch": 0.42, "grad_norm": 2.2777286315308367, "learning_rate": 6.4654624047350575e-06, "loss": 0.7492, "step": 4071 }, { "epoch": 0.42, "grad_norm": 1.6700799417004726, "learning_rate": 6.463852833564236e-06, "loss": 0.5802, "step": 4072 }, { "epoch": 0.42, "grad_norm": 1.829777392822749, "learning_rate": 6.462243096457352e-06, "loss": 0.6539, "step": 4073 }, { "epoch": 0.42, "grad_norm": 1.8592653629194054, "learning_rate": 6.460633193596879e-06, "loss": 0.5977, "step": 4074 }, { "epoch": 0.42, "grad_norm": 1.9504686835427847, "learning_rate": 6.459023125165308e-06, "loss": 0.7809, "step": 4075 }, { "epoch": 0.42, "grad_norm": 2.0166498080138373, "learning_rate": 6.4574128913451495e-06, "loss": 0.6218, "step": 4076 }, { "epoch": 0.42, "grad_norm": 1.9365597515134503, "learning_rate": 6.4558024923189336e-06, "loss": 0.678, "step": 4077 }, { "epoch": 0.42, "grad_norm": 1.7631181530060356, "learning_rate": 6.454191928269207e-06, "loss": 0.6319, "step": 4078 }, { "epoch": 0.42, "grad_norm": 1.861501700664707, "learning_rate": 6.452581199378536e-06, "loss": 0.6315, "step": 4079 }, { "epoch": 0.42, "grad_norm": 2.1103702750302067, "learning_rate": 6.450970305829507e-06, "loss": 0.7092, "step": 4080 }, { "epoch": 0.42, "grad_norm": 1.740618758516033, "learning_rate": 6.449359247804724e-06, "loss": 0.6223, "step": 4081 }, { "epoch": 0.42, "grad_norm": 1.8901679485332747, "learning_rate": 6.447748025486809e-06, "loss": 0.5887, "step": 4082 }, { "epoch": 0.42, "grad_norm": 1.9819488844550182, "learning_rate": 6.4461366390584025e-06, "loss": 0.7952, "step": 4083 }, { "epoch": 0.42, "grad_norm": 1.9219125754878177, "learning_rate": 6.444525088702166e-06, "loss": 0.8131, "step": 4084 }, { "epoch": 0.42, "grad_norm": 1.7999306822008587, "learning_rate": 6.442913374600778e-06, "loss": 0.6919, "step": 4085 }, { "epoch": 0.42, "grad_norm": 1.9952306260295138, "learning_rate": 6.441301496936934e-06, "loss": 0.6228, "step": 4086 }, { "epoch": 0.42, "grad_norm": 1.6397261652327657, "learning_rate": 6.4396894558933495e-06, "loss": 0.5822, "step": 4087 }, { "epoch": 0.42, "grad_norm": 2.0119623346471935, "learning_rate": 6.438077251652759e-06, "loss": 0.6775, "step": 4088 }, { "epoch": 0.43, "grad_norm": 1.883160572533884, "learning_rate": 6.436464884397917e-06, "loss": 0.5655, "step": 4089 }, { "epoch": 0.43, "grad_norm": 1.9595553513484605, "learning_rate": 6.434852354311592e-06, "loss": 0.6677, "step": 4090 }, { "epoch": 0.43, "grad_norm": 1.8563543420640487, "learning_rate": 6.433239661576574e-06, "loss": 0.6959, "step": 4091 }, { "epoch": 0.43, "grad_norm": 1.979965895251359, "learning_rate": 6.431626806375671e-06, "loss": 0.6135, "step": 4092 }, { "epoch": 0.43, "grad_norm": 1.895092434982727, "learning_rate": 6.4300137888917104e-06, "loss": 0.5857, "step": 4093 }, { "epoch": 0.43, "grad_norm": 1.8786371189307325, "learning_rate": 6.428400609307535e-06, "loss": 0.6763, "step": 4094 }, { "epoch": 0.43, "grad_norm": 1.9249867638405949, "learning_rate": 6.426787267806009e-06, "loss": 0.5852, "step": 4095 }, { "epoch": 0.43, "grad_norm": 2.076984042439822, "learning_rate": 6.425173764570014e-06, "loss": 0.5923, "step": 4096 }, { "epoch": 0.43, "grad_norm": 1.9017582414587852, "learning_rate": 6.42356009978245e-06, "loss": 0.6385, "step": 4097 }, { "epoch": 0.43, "grad_norm": 1.98427043016857, "learning_rate": 6.421946273626234e-06, "loss": 0.6316, "step": 4098 }, { "epoch": 0.43, "grad_norm": 2.085670532511142, "learning_rate": 6.420332286284303e-06, "loss": 0.6466, "step": 4099 }, { "epoch": 0.43, "grad_norm": 1.9356842581183769, "learning_rate": 6.418718137939614e-06, "loss": 0.6055, "step": 4100 }, { "epoch": 0.43, "grad_norm": 2.099063254616092, "learning_rate": 6.417103828775135e-06, "loss": 0.6641, "step": 4101 }, { "epoch": 0.43, "grad_norm": 2.2228334066963855, "learning_rate": 6.41548935897386e-06, "loss": 0.784, "step": 4102 }, { "epoch": 0.43, "grad_norm": 1.7704647109970402, "learning_rate": 6.4138747287187984e-06, "loss": 0.6703, "step": 4103 }, { "epoch": 0.43, "grad_norm": 1.9802068841142038, "learning_rate": 6.412259938192978e-06, "loss": 0.6845, "step": 4104 }, { "epoch": 0.43, "grad_norm": 2.4689051914936173, "learning_rate": 6.410644987579444e-06, "loss": 0.594, "step": 4105 }, { "epoch": 0.43, "grad_norm": 2.212128219379263, "learning_rate": 6.409029877061259e-06, "loss": 0.7405, "step": 4106 }, { "epoch": 0.43, "grad_norm": 1.875314160139368, "learning_rate": 6.407414606821507e-06, "loss": 0.6231, "step": 4107 }, { "epoch": 0.43, "grad_norm": 1.8218343066015268, "learning_rate": 6.405799177043289e-06, "loss": 0.6286, "step": 4108 }, { "epoch": 0.43, "grad_norm": 2.0688539285382137, "learning_rate": 6.4041835879097205e-06, "loss": 0.7444, "step": 4109 }, { "epoch": 0.43, "grad_norm": 1.7472797608134105, "learning_rate": 6.402567839603937e-06, "loss": 0.6215, "step": 4110 }, { "epoch": 0.43, "grad_norm": 1.9131271318603962, "learning_rate": 6.400951932309097e-06, "loss": 0.6368, "step": 4111 }, { "epoch": 0.43, "grad_norm": 2.028012533221227, "learning_rate": 6.399335866208367e-06, "loss": 0.6568, "step": 4112 }, { "epoch": 0.43, "grad_norm": 1.8051750494724248, "learning_rate": 6.397719641484943e-06, "loss": 0.6733, "step": 4113 }, { "epoch": 0.43, "grad_norm": 1.9604187629140273, "learning_rate": 6.39610325832203e-06, "loss": 0.6956, "step": 4114 }, { "epoch": 0.43, "grad_norm": 1.7146717528468935, "learning_rate": 6.394486716902857e-06, "loss": 0.567, "step": 4115 }, { "epoch": 0.43, "grad_norm": 1.8587330203323895, "learning_rate": 6.392870017410665e-06, "loss": 0.7145, "step": 4116 }, { "epoch": 0.43, "grad_norm": 1.886070724526401, "learning_rate": 6.3912531600287166e-06, "loss": 0.6478, "step": 4117 }, { "epoch": 0.43, "grad_norm": 1.8676316071674293, "learning_rate": 6.389636144940294e-06, "loss": 0.6376, "step": 4118 }, { "epoch": 0.43, "grad_norm": 2.152777218544474, "learning_rate": 6.388018972328693e-06, "loss": 0.6583, "step": 4119 }, { "epoch": 0.43, "grad_norm": 1.7736923671339342, "learning_rate": 6.386401642377231e-06, "loss": 0.6683, "step": 4120 }, { "epoch": 0.43, "grad_norm": 1.7318840943278404, "learning_rate": 6.384784155269239e-06, "loss": 0.6245, "step": 4121 }, { "epoch": 0.43, "grad_norm": 2.1659556826960933, "learning_rate": 6.383166511188072e-06, "loss": 0.7848, "step": 4122 }, { "epoch": 0.43, "grad_norm": 1.8961148222331055, "learning_rate": 6.381548710317096e-06, "loss": 0.7207, "step": 4123 }, { "epoch": 0.43, "grad_norm": 1.8419789827418658, "learning_rate": 6.3799307528397e-06, "loss": 0.7183, "step": 4124 }, { "epoch": 0.43, "grad_norm": 1.8897939224972424, "learning_rate": 6.378312638939286e-06, "loss": 0.5796, "step": 4125 }, { "epoch": 0.43, "grad_norm": 1.9689256228329488, "learning_rate": 6.37669436879928e-06, "loss": 0.7358, "step": 4126 }, { "epoch": 0.43, "grad_norm": 1.8892196667301127, "learning_rate": 6.375075942603119e-06, "loss": 0.7024, "step": 4127 }, { "epoch": 0.43, "grad_norm": 1.9259521778924813, "learning_rate": 6.373457360534263e-06, "loss": 0.6811, "step": 4128 }, { "epoch": 0.43, "grad_norm": 1.8004120338996794, "learning_rate": 6.371838622776187e-06, "loss": 0.6131, "step": 4129 }, { "epoch": 0.43, "grad_norm": 1.935184387430914, "learning_rate": 6.370219729512383e-06, "loss": 0.6478, "step": 4130 }, { "epoch": 0.43, "grad_norm": 1.909323332437752, "learning_rate": 6.368600680926364e-06, "loss": 0.7664, "step": 4131 }, { "epoch": 0.43, "grad_norm": 2.1700474542918915, "learning_rate": 6.3669814772016555e-06, "loss": 0.7379, "step": 4132 }, { "epoch": 0.43, "grad_norm": 1.8763611045110336, "learning_rate": 6.365362118521807e-06, "loss": 0.6204, "step": 4133 }, { "epoch": 0.43, "grad_norm": 1.7139413076250838, "learning_rate": 6.363742605070379e-06, "loss": 0.7147, "step": 4134 }, { "epoch": 0.43, "grad_norm": 1.9044300533734067, "learning_rate": 6.362122937030952e-06, "loss": 0.6914, "step": 4135 }, { "epoch": 0.43, "grad_norm": 1.9154252110669694, "learning_rate": 6.360503114587129e-06, "loss": 0.615, "step": 4136 }, { "epoch": 0.43, "grad_norm": 1.739297540790688, "learning_rate": 6.3588831379225226e-06, "loss": 0.6519, "step": 4137 }, { "epoch": 0.43, "grad_norm": 1.7433831357411251, "learning_rate": 6.357263007220767e-06, "loss": 0.6645, "step": 4138 }, { "epoch": 0.43, "grad_norm": 1.9514132458981708, "learning_rate": 6.355642722665512e-06, "loss": 0.7732, "step": 4139 }, { "epoch": 0.43, "grad_norm": 1.8361076741980893, "learning_rate": 6.354022284440429e-06, "loss": 0.6559, "step": 4140 }, { "epoch": 0.43, "grad_norm": 1.9369982517217328, "learning_rate": 6.352401692729202e-06, "loss": 0.6168, "step": 4141 }, { "epoch": 0.43, "grad_norm": 1.5500501779631404, "learning_rate": 6.3507809477155335e-06, "loss": 0.6486, "step": 4142 }, { "epoch": 0.43, "grad_norm": 1.762058079092894, "learning_rate": 6.349160049583146e-06, "loss": 0.6379, "step": 4143 }, { "epoch": 0.43, "grad_norm": 1.6547077639256238, "learning_rate": 6.347538998515778e-06, "loss": 0.6427, "step": 4144 }, { "epoch": 0.43, "grad_norm": 1.863644426003244, "learning_rate": 6.345917794697183e-06, "loss": 0.682, "step": 4145 }, { "epoch": 0.43, "grad_norm": 2.0289147358075525, "learning_rate": 6.344296438311134e-06, "loss": 0.6736, "step": 4146 }, { "epoch": 0.43, "grad_norm": 2.0453181169725765, "learning_rate": 6.342674929541424e-06, "loss": 0.6646, "step": 4147 }, { "epoch": 0.43, "grad_norm": 1.814620528092348, "learning_rate": 6.341053268571855e-06, "loss": 0.6626, "step": 4148 }, { "epoch": 0.43, "grad_norm": 1.9840600275863907, "learning_rate": 6.3394314555862545e-06, "loss": 0.6618, "step": 4149 }, { "epoch": 0.43, "grad_norm": 1.8379449832426429, "learning_rate": 6.337809490768465e-06, "loss": 0.6177, "step": 4150 }, { "epoch": 0.43, "grad_norm": 1.8309354258877755, "learning_rate": 6.336187374302344e-06, "loss": 0.7409, "step": 4151 }, { "epoch": 0.43, "grad_norm": 1.7755918403707267, "learning_rate": 6.334565106371768e-06, "loss": 0.6348, "step": 4152 }, { "epoch": 0.43, "grad_norm": 1.8224093319923627, "learning_rate": 6.332942687160632e-06, "loss": 0.6293, "step": 4153 }, { "epoch": 0.43, "grad_norm": 2.0566967543569863, "learning_rate": 6.331320116852842e-06, "loss": 0.6957, "step": 4154 }, { "epoch": 0.43, "grad_norm": 1.8244220543225507, "learning_rate": 6.329697395632332e-06, "loss": 0.6038, "step": 4155 }, { "epoch": 0.43, "grad_norm": 1.623371055393367, "learning_rate": 6.328074523683041e-06, "loss": 0.6673, "step": 4156 }, { "epoch": 0.43, "grad_norm": 2.2657238349497706, "learning_rate": 6.326451501188933e-06, "loss": 0.6658, "step": 4157 }, { "epoch": 0.43, "grad_norm": 1.9014605197412302, "learning_rate": 6.324828328333986e-06, "loss": 0.6146, "step": 4158 }, { "epoch": 0.43, "grad_norm": 1.7894161936132815, "learning_rate": 6.323205005302199e-06, "loss": 0.6886, "step": 4159 }, { "epoch": 0.43, "grad_norm": 1.8682449812414834, "learning_rate": 6.321581532277581e-06, "loss": 0.6771, "step": 4160 }, { "epoch": 0.43, "grad_norm": 1.7046598099665882, "learning_rate": 6.319957909444163e-06, "loss": 0.6186, "step": 4161 }, { "epoch": 0.43, "grad_norm": 1.963434024206442, "learning_rate": 6.318334136985993e-06, "loss": 0.6581, "step": 4162 }, { "epoch": 0.43, "grad_norm": 1.9731526290449264, "learning_rate": 6.316710215087136e-06, "loss": 0.6845, "step": 4163 }, { "epoch": 0.43, "grad_norm": 1.8947817321763945, "learning_rate": 6.31508614393167e-06, "loss": 0.71, "step": 4164 }, { "epoch": 0.43, "grad_norm": 1.6776835190313186, "learning_rate": 6.313461923703693e-06, "loss": 0.5618, "step": 4165 }, { "epoch": 0.43, "grad_norm": 2.0755723255709397, "learning_rate": 6.311837554587322e-06, "loss": 0.6423, "step": 4166 }, { "epoch": 0.43, "grad_norm": 1.8678892690364675, "learning_rate": 6.3102130367666855e-06, "loss": 0.6015, "step": 4167 }, { "epoch": 0.43, "grad_norm": 1.9535946234388366, "learning_rate": 6.308588370425934e-06, "loss": 0.6249, "step": 4168 }, { "epoch": 0.43, "grad_norm": 1.9754689426313052, "learning_rate": 6.306963555749231e-06, "loss": 0.6902, "step": 4169 }, { "epoch": 0.43, "grad_norm": 1.7827061123763464, "learning_rate": 6.305338592920762e-06, "loss": 0.6728, "step": 4170 }, { "epoch": 0.43, "grad_norm": 1.737874266961729, "learning_rate": 6.303713482124721e-06, "loss": 0.6458, "step": 4171 }, { "epoch": 0.43, "grad_norm": 1.8841336594214666, "learning_rate": 6.302088223545327e-06, "loss": 0.6342, "step": 4172 }, { "epoch": 0.43, "grad_norm": 1.8336447518739336, "learning_rate": 6.30046281736681e-06, "loss": 0.5722, "step": 4173 }, { "epoch": 0.43, "grad_norm": 1.9792552077265053, "learning_rate": 6.298837263773423e-06, "loss": 0.619, "step": 4174 }, { "epoch": 0.43, "grad_norm": 1.8990343746392675, "learning_rate": 6.297211562949427e-06, "loss": 0.6661, "step": 4175 }, { "epoch": 0.43, "grad_norm": 2.012978566826468, "learning_rate": 6.2955857150791055e-06, "loss": 0.6569, "step": 4176 }, { "epoch": 0.43, "grad_norm": 1.988969857473836, "learning_rate": 6.29395972034676e-06, "loss": 0.6604, "step": 4177 }, { "epoch": 0.43, "grad_norm": 1.7627911436862862, "learning_rate": 6.2923335789367044e-06, "loss": 0.5787, "step": 4178 }, { "epoch": 0.43, "grad_norm": 1.9374367211852295, "learning_rate": 6.290707291033272e-06, "loss": 0.6177, "step": 4179 }, { "epoch": 0.43, "grad_norm": 2.0083371897370674, "learning_rate": 6.289080856820811e-06, "loss": 0.6268, "step": 4180 }, { "epoch": 0.43, "grad_norm": 2.0569173930723967, "learning_rate": 6.287454276483687e-06, "loss": 0.6635, "step": 4181 }, { "epoch": 0.43, "grad_norm": 1.7807436006960025, "learning_rate": 6.285827550206282e-06, "loss": 0.535, "step": 4182 }, { "epoch": 0.43, "grad_norm": 2.101638627215469, "learning_rate": 6.284200678172997e-06, "loss": 0.7426, "step": 4183 }, { "epoch": 0.43, "grad_norm": 1.7072537972165742, "learning_rate": 6.282573660568245e-06, "loss": 0.7568, "step": 4184 }, { "epoch": 0.44, "grad_norm": 1.9695668809384006, "learning_rate": 6.2809464975764575e-06, "loss": 0.5676, "step": 4185 }, { "epoch": 0.44, "grad_norm": 1.767265477943679, "learning_rate": 6.279319189382084e-06, "loss": 0.601, "step": 4186 }, { "epoch": 0.44, "grad_norm": 1.7094500604340592, "learning_rate": 6.2776917361695876e-06, "loss": 0.6147, "step": 4187 }, { "epoch": 0.44, "grad_norm": 1.8402827980509162, "learning_rate": 6.276064138123453e-06, "loss": 0.732, "step": 4188 }, { "epoch": 0.44, "grad_norm": 1.9402004321921997, "learning_rate": 6.274436395428171e-06, "loss": 0.6128, "step": 4189 }, { "epoch": 0.44, "grad_norm": 1.9031106270161509, "learning_rate": 6.272808508268262e-06, "loss": 0.6311, "step": 4190 }, { "epoch": 0.44, "grad_norm": 2.033607806685722, "learning_rate": 6.2711804768282535e-06, "loss": 0.5969, "step": 4191 }, { "epoch": 0.44, "grad_norm": 1.800733372504521, "learning_rate": 6.269552301292693e-06, "loss": 0.5638, "step": 4192 }, { "epoch": 0.44, "grad_norm": 1.639474658639473, "learning_rate": 6.267923981846141e-06, "loss": 0.5257, "step": 4193 }, { "epoch": 0.44, "grad_norm": 2.023559643318534, "learning_rate": 6.26629551867318e-06, "loss": 0.5674, "step": 4194 }, { "epoch": 0.44, "grad_norm": 1.874292710750846, "learning_rate": 6.264666911958404e-06, "loss": 0.6333, "step": 4195 }, { "epoch": 0.44, "grad_norm": 1.8563287999328968, "learning_rate": 6.263038161886426e-06, "loss": 0.7358, "step": 4196 }, { "epoch": 0.44, "grad_norm": 1.7453380961988425, "learning_rate": 6.261409268641872e-06, "loss": 0.6227, "step": 4197 }, { "epoch": 0.44, "grad_norm": 1.751372462067731, "learning_rate": 6.259780232409389e-06, "loss": 0.6074, "step": 4198 }, { "epoch": 0.44, "grad_norm": 1.880312499572035, "learning_rate": 6.2581510533736346e-06, "loss": 0.6836, "step": 4199 }, { "epoch": 0.44, "grad_norm": 1.866018226892596, "learning_rate": 6.25652173171929e-06, "loss": 0.6002, "step": 4200 }, { "epoch": 0.44, "grad_norm": 1.8337619090346275, "learning_rate": 6.254892267631042e-06, "loss": 0.6665, "step": 4201 }, { "epoch": 0.44, "grad_norm": 1.821660317307866, "learning_rate": 6.2532626612936035e-06, "loss": 0.6837, "step": 4202 }, { "epoch": 0.44, "grad_norm": 1.852300675093971, "learning_rate": 6.2516329128917e-06, "loss": 0.6855, "step": 4203 }, { "epoch": 0.44, "grad_norm": 2.067378628400363, "learning_rate": 6.250003022610071e-06, "loss": 0.684, "step": 4204 }, { "epoch": 0.44, "grad_norm": 1.766568461268017, "learning_rate": 6.248372990633475e-06, "loss": 0.6828, "step": 4205 }, { "epoch": 0.44, "grad_norm": 2.121014004372851, "learning_rate": 6.246742817146684e-06, "loss": 0.6887, "step": 4206 }, { "epoch": 0.44, "grad_norm": 1.916239882668666, "learning_rate": 6.2451125023344895e-06, "loss": 0.7135, "step": 4207 }, { "epoch": 0.44, "grad_norm": 2.0778830065986345, "learning_rate": 6.243482046381696e-06, "loss": 0.6779, "step": 4208 }, { "epoch": 0.44, "grad_norm": 1.869012297808762, "learning_rate": 6.2418514494731245e-06, "loss": 0.6217, "step": 4209 }, { "epoch": 0.44, "grad_norm": 1.650570523552766, "learning_rate": 6.240220711793612e-06, "loss": 0.6305, "step": 4210 }, { "epoch": 0.44, "grad_norm": 1.775718554562567, "learning_rate": 6.238589833528015e-06, "loss": 0.6848, "step": 4211 }, { "epoch": 0.44, "grad_norm": 2.010000193787045, "learning_rate": 6.236958814861199e-06, "loss": 0.6825, "step": 4212 }, { "epoch": 0.44, "grad_norm": 1.7651378794347117, "learning_rate": 6.2353276559780515e-06, "loss": 0.5645, "step": 4213 }, { "epoch": 0.44, "grad_norm": 1.722675513011501, "learning_rate": 6.233696357063472e-06, "loss": 0.668, "step": 4214 }, { "epoch": 0.44, "grad_norm": 2.1181437150465214, "learning_rate": 6.23206491830238e-06, "loss": 0.7165, "step": 4215 }, { "epoch": 0.44, "grad_norm": 1.9627449801841879, "learning_rate": 6.230433339879706e-06, "loss": 0.6373, "step": 4216 }, { "epoch": 0.44, "grad_norm": 1.747089518779566, "learning_rate": 6.2288016219804e-06, "loss": 0.5995, "step": 4217 }, { "epoch": 0.44, "grad_norm": 1.6960776760193497, "learning_rate": 6.2271697647894265e-06, "loss": 0.6389, "step": 4218 }, { "epoch": 0.44, "grad_norm": 1.9381104103996185, "learning_rate": 6.225537768491766e-06, "loss": 0.6122, "step": 4219 }, { "epoch": 0.44, "grad_norm": 1.9867789174214119, "learning_rate": 6.223905633272414e-06, "loss": 0.6503, "step": 4220 }, { "epoch": 0.44, "grad_norm": 2.2355853167261124, "learning_rate": 6.2222733593163805e-06, "loss": 0.7641, "step": 4221 }, { "epoch": 0.44, "grad_norm": 1.796152824199846, "learning_rate": 6.220640946808697e-06, "loss": 0.5726, "step": 4222 }, { "epoch": 0.44, "grad_norm": 1.8490238088076443, "learning_rate": 6.219008395934405e-06, "loss": 0.5832, "step": 4223 }, { "epoch": 0.44, "grad_norm": 1.734880478046965, "learning_rate": 6.217375706878561e-06, "loss": 0.6244, "step": 4224 }, { "epoch": 0.44, "grad_norm": 1.8760334967705816, "learning_rate": 6.215742879826244e-06, "loss": 0.6718, "step": 4225 }, { "epoch": 0.44, "grad_norm": 1.8641079257282516, "learning_rate": 6.214109914962542e-06, "loss": 0.6378, "step": 4226 }, { "epoch": 0.44, "grad_norm": 1.9736071636844352, "learning_rate": 6.21247681247256e-06, "loss": 0.7905, "step": 4227 }, { "epoch": 0.44, "grad_norm": 1.657253663363964, "learning_rate": 6.210843572541421e-06, "loss": 0.6086, "step": 4228 }, { "epoch": 0.44, "grad_norm": 1.9013689343551632, "learning_rate": 6.20921019535426e-06, "loss": 0.6301, "step": 4229 }, { "epoch": 0.44, "grad_norm": 1.9593538236517356, "learning_rate": 6.207576681096233e-06, "loss": 0.7359, "step": 4230 }, { "epoch": 0.44, "grad_norm": 1.9607098259709497, "learning_rate": 6.205943029952505e-06, "loss": 0.6411, "step": 4231 }, { "epoch": 0.44, "grad_norm": 1.917909444411163, "learning_rate": 6.204309242108262e-06, "loss": 0.6877, "step": 4232 }, { "epoch": 0.44, "grad_norm": 1.9283331737073521, "learning_rate": 6.202675317748702e-06, "loss": 0.6473, "step": 4233 }, { "epoch": 0.44, "grad_norm": 1.820140476133725, "learning_rate": 6.201041257059039e-06, "loss": 0.623, "step": 4234 }, { "epoch": 0.44, "grad_norm": 1.8303703060759442, "learning_rate": 6.199407060224503e-06, "loss": 0.6681, "step": 4235 }, { "epoch": 0.44, "grad_norm": 2.1784835352215315, "learning_rate": 6.197772727430341e-06, "loss": 0.6553, "step": 4236 }, { "epoch": 0.44, "grad_norm": 2.057402259459153, "learning_rate": 6.196138258861815e-06, "loss": 0.6533, "step": 4237 }, { "epoch": 0.44, "grad_norm": 1.9346080271003194, "learning_rate": 6.194503654704198e-06, "loss": 0.6091, "step": 4238 }, { "epoch": 0.44, "grad_norm": 1.7055354507122449, "learning_rate": 6.192868915142782e-06, "loss": 0.6281, "step": 4239 }, { "epoch": 0.44, "grad_norm": 1.9143105967875074, "learning_rate": 6.191234040362879e-06, "loss": 0.5833, "step": 4240 }, { "epoch": 0.44, "grad_norm": 1.7670799000079251, "learning_rate": 6.189599030549804e-06, "loss": 0.6947, "step": 4241 }, { "epoch": 0.44, "grad_norm": 1.8540914584065187, "learning_rate": 6.1879638858889e-06, "loss": 0.7129, "step": 4242 }, { "epoch": 0.44, "grad_norm": 1.7533761314777891, "learning_rate": 6.186328606565518e-06, "loss": 0.5931, "step": 4243 }, { "epoch": 0.44, "grad_norm": 1.8074134710317675, "learning_rate": 6.184693192765028e-06, "loss": 0.7204, "step": 4244 }, { "epoch": 0.44, "grad_norm": 2.0892333656674364, "learning_rate": 6.18305764467281e-06, "loss": 0.7689, "step": 4245 }, { "epoch": 0.44, "grad_norm": 1.903938283307967, "learning_rate": 6.181421962474267e-06, "loss": 0.6832, "step": 4246 }, { "epoch": 0.44, "grad_norm": 1.9806689141914189, "learning_rate": 6.179786146354808e-06, "loss": 0.6931, "step": 4247 }, { "epoch": 0.44, "grad_norm": 1.977935900264373, "learning_rate": 6.178150196499868e-06, "loss": 0.6945, "step": 4248 }, { "epoch": 0.44, "grad_norm": 1.8313727726369613, "learning_rate": 6.176514113094885e-06, "loss": 0.5794, "step": 4249 }, { "epoch": 0.44, "grad_norm": 2.081898696849267, "learning_rate": 6.174877896325323e-06, "loss": 0.6605, "step": 4250 }, { "epoch": 0.44, "grad_norm": 1.9063195075574626, "learning_rate": 6.173241546376654e-06, "loss": 0.6784, "step": 4251 }, { "epoch": 0.44, "grad_norm": 1.8626522818570608, "learning_rate": 6.171605063434368e-06, "loss": 0.6452, "step": 4252 }, { "epoch": 0.44, "grad_norm": 1.8826580292136437, "learning_rate": 6.169968447683971e-06, "loss": 0.6847, "step": 4253 }, { "epoch": 0.44, "grad_norm": 1.7157387012544394, "learning_rate": 6.168331699310982e-06, "loss": 0.5853, "step": 4254 }, { "epoch": 0.44, "grad_norm": 1.9401064441075455, "learning_rate": 6.1666948185009355e-06, "loss": 0.6954, "step": 4255 }, { "epoch": 0.44, "grad_norm": 2.0024822246974736, "learning_rate": 6.165057805439382e-06, "loss": 0.6232, "step": 4256 }, { "epoch": 0.44, "grad_norm": 2.171084207492589, "learning_rate": 6.1634206603118844e-06, "loss": 0.672, "step": 4257 }, { "epoch": 0.44, "grad_norm": 1.8833760760024472, "learning_rate": 6.161783383304024e-06, "loss": 0.6084, "step": 4258 }, { "epoch": 0.44, "grad_norm": 1.8417561591740161, "learning_rate": 6.160145974601397e-06, "loss": 0.6039, "step": 4259 }, { "epoch": 0.44, "grad_norm": 2.0709052955398577, "learning_rate": 6.158508434389608e-06, "loss": 0.6919, "step": 4260 }, { "epoch": 0.44, "grad_norm": 1.8776488381311174, "learning_rate": 6.156870762854287e-06, "loss": 0.7309, "step": 4261 }, { "epoch": 0.44, "grad_norm": 1.873816021513749, "learning_rate": 6.155232960181071e-06, "loss": 0.6691, "step": 4262 }, { "epoch": 0.44, "grad_norm": 1.7901395019323316, "learning_rate": 6.153595026555613e-06, "loss": 0.6377, "step": 4263 }, { "epoch": 0.44, "grad_norm": 1.75387062711662, "learning_rate": 6.151956962163584e-06, "loss": 0.6408, "step": 4264 }, { "epoch": 0.44, "grad_norm": 1.770309730929508, "learning_rate": 6.150318767190668e-06, "loss": 0.6725, "step": 4265 }, { "epoch": 0.44, "grad_norm": 1.919943910487086, "learning_rate": 6.148680441822563e-06, "loss": 0.5916, "step": 4266 }, { "epoch": 0.44, "grad_norm": 1.8486164691741107, "learning_rate": 6.1470419862449825e-06, "loss": 0.5883, "step": 4267 }, { "epoch": 0.44, "grad_norm": 1.9032761553068864, "learning_rate": 6.1454034006436545e-06, "loss": 0.5558, "step": 4268 }, { "epoch": 0.44, "grad_norm": 1.9283442009335932, "learning_rate": 6.143764685204323e-06, "loss": 0.6445, "step": 4269 }, { "epoch": 0.44, "grad_norm": 1.7842124977670573, "learning_rate": 6.142125840112746e-06, "loss": 0.6604, "step": 4270 }, { "epoch": 0.44, "grad_norm": 1.6599561789243407, "learning_rate": 6.140486865554693e-06, "loss": 0.6057, "step": 4271 }, { "epoch": 0.44, "grad_norm": 1.7994386291779092, "learning_rate": 6.138847761715955e-06, "loss": 0.5795, "step": 4272 }, { "epoch": 0.44, "grad_norm": 2.1473723111235663, "learning_rate": 6.137208528782331e-06, "loss": 0.6959, "step": 4273 }, { "epoch": 0.44, "grad_norm": 1.725204692644165, "learning_rate": 6.1355691669396386e-06, "loss": 0.5396, "step": 4274 }, { "epoch": 0.44, "grad_norm": 2.0090637781380067, "learning_rate": 6.133929676373709e-06, "loss": 0.6082, "step": 4275 }, { "epoch": 0.44, "grad_norm": 1.9772578144450519, "learning_rate": 6.132290057270387e-06, "loss": 0.6552, "step": 4276 }, { "epoch": 0.44, "grad_norm": 2.0108173014531148, "learning_rate": 6.130650309815535e-06, "loss": 0.6897, "step": 4277 }, { "epoch": 0.44, "grad_norm": 1.823894686140687, "learning_rate": 6.129010434195023e-06, "loss": 0.6536, "step": 4278 }, { "epoch": 0.44, "grad_norm": 1.6203568412684168, "learning_rate": 6.127370430594745e-06, "loss": 0.5311, "step": 4279 }, { "epoch": 0.44, "grad_norm": 2.014024329591531, "learning_rate": 6.125730299200601e-06, "loss": 0.6031, "step": 4280 }, { "epoch": 0.45, "grad_norm": 1.944148231455446, "learning_rate": 6.124090040198514e-06, "loss": 0.667, "step": 4281 }, { "epoch": 0.45, "grad_norm": 1.960939325536016, "learning_rate": 6.122449653774411e-06, "loss": 0.7211, "step": 4282 }, { "epoch": 0.45, "grad_norm": 1.9195307639129888, "learning_rate": 6.120809140114243e-06, "loss": 0.6355, "step": 4283 }, { "epoch": 0.45, "grad_norm": 1.7758071657350973, "learning_rate": 6.119168499403971e-06, "loss": 0.6558, "step": 4284 }, { "epoch": 0.45, "grad_norm": 1.9774042812835415, "learning_rate": 6.11752773182957e-06, "loss": 0.6619, "step": 4285 }, { "epoch": 0.45, "grad_norm": 1.8617259628540834, "learning_rate": 6.115886837577031e-06, "loss": 0.5504, "step": 4286 }, { "epoch": 0.45, "grad_norm": 1.8015457930515442, "learning_rate": 6.114245816832359e-06, "loss": 0.5942, "step": 4287 }, { "epoch": 0.45, "grad_norm": 1.8809551738481627, "learning_rate": 6.112604669781572e-06, "loss": 0.6016, "step": 4288 }, { "epoch": 0.45, "grad_norm": 1.8090247840030873, "learning_rate": 6.110963396610705e-06, "loss": 0.583, "step": 4289 }, { "epoch": 0.45, "grad_norm": 2.000768037728457, "learning_rate": 6.109321997505804e-06, "loss": 0.6731, "step": 4290 }, { "epoch": 0.45, "grad_norm": 1.9743242871047149, "learning_rate": 6.107680472652931e-06, "loss": 0.715, "step": 4291 }, { "epoch": 0.45, "grad_norm": 1.9282462151001922, "learning_rate": 6.106038822238165e-06, "loss": 0.6739, "step": 4292 }, { "epoch": 0.45, "grad_norm": 2.1015185797276303, "learning_rate": 6.104397046447593e-06, "loss": 0.708, "step": 4293 }, { "epoch": 0.45, "grad_norm": 1.748887075305813, "learning_rate": 6.1027551454673204e-06, "loss": 0.7053, "step": 4294 }, { "epoch": 0.45, "grad_norm": 1.8395064781208474, "learning_rate": 6.1011131194834675e-06, "loss": 0.6444, "step": 4295 }, { "epoch": 0.45, "grad_norm": 1.7803548664428446, "learning_rate": 6.099470968682168e-06, "loss": 0.5716, "step": 4296 }, { "epoch": 0.45, "grad_norm": 1.6009153734353425, "learning_rate": 6.097828693249565e-06, "loss": 0.6457, "step": 4297 }, { "epoch": 0.45, "grad_norm": 1.647971075805321, "learning_rate": 6.0961862933718215e-06, "loss": 0.5746, "step": 4298 }, { "epoch": 0.45, "grad_norm": 1.8482386722821733, "learning_rate": 6.0945437692351166e-06, "loss": 0.6898, "step": 4299 }, { "epoch": 0.45, "grad_norm": 1.8232321333867145, "learning_rate": 6.092901121025634e-06, "loss": 0.6632, "step": 4300 }, { "epoch": 0.45, "grad_norm": 1.881039170540099, "learning_rate": 6.091258348929581e-06, "loss": 0.6537, "step": 4301 }, { "epoch": 0.45, "grad_norm": 2.0396835214722606, "learning_rate": 6.089615453133173e-06, "loss": 0.6914, "step": 4302 }, { "epoch": 0.45, "grad_norm": 1.8316939185491101, "learning_rate": 6.0879724338226454e-06, "loss": 0.5984, "step": 4303 }, { "epoch": 0.45, "grad_norm": 1.7744519568370833, "learning_rate": 6.086329291184238e-06, "loss": 0.5976, "step": 4304 }, { "epoch": 0.45, "grad_norm": 1.945342822305884, "learning_rate": 6.084686025404216e-06, "loss": 0.5689, "step": 4305 }, { "epoch": 0.45, "grad_norm": 1.660347128085138, "learning_rate": 6.08304263666885e-06, "loss": 0.5238, "step": 4306 }, { "epoch": 0.45, "grad_norm": 1.9805416996449299, "learning_rate": 6.081399125164429e-06, "loss": 0.7111, "step": 4307 }, { "epoch": 0.45, "grad_norm": 1.8201579374736838, "learning_rate": 6.079755491077251e-06, "loss": 0.62, "step": 4308 }, { "epoch": 0.45, "grad_norm": 1.671229966988917, "learning_rate": 6.0781117345936345e-06, "loss": 0.5775, "step": 4309 }, { "epoch": 0.45, "grad_norm": 1.8676293114126867, "learning_rate": 6.07646785589991e-06, "loss": 0.7568, "step": 4310 }, { "epoch": 0.45, "grad_norm": 1.8390499419653883, "learning_rate": 6.074823855182416e-06, "loss": 0.6798, "step": 4311 }, { "epoch": 0.45, "grad_norm": 1.7786191474157553, "learning_rate": 6.073179732627512e-06, "loss": 0.5254, "step": 4312 }, { "epoch": 0.45, "grad_norm": 1.7017606975225539, "learning_rate": 6.0715354884215685e-06, "loss": 0.5916, "step": 4313 }, { "epoch": 0.45, "grad_norm": 1.8331971488804444, "learning_rate": 6.069891122750971e-06, "loss": 0.6283, "step": 4314 }, { "epoch": 0.45, "grad_norm": 2.0707708625988377, "learning_rate": 6.068246635802115e-06, "loss": 0.6521, "step": 4315 }, { "epoch": 0.45, "grad_norm": 2.1080846184243596, "learning_rate": 6.066602027761414e-06, "loss": 0.6613, "step": 4316 }, { "epoch": 0.45, "grad_norm": 1.8541610045993289, "learning_rate": 6.064957298815295e-06, "loss": 0.7219, "step": 4317 }, { "epoch": 0.45, "grad_norm": 1.7233308318810572, "learning_rate": 6.063312449150196e-06, "loss": 0.7256, "step": 4318 }, { "epoch": 0.45, "grad_norm": 1.7786958252668963, "learning_rate": 6.06166747895257e-06, "loss": 0.6196, "step": 4319 }, { "epoch": 0.45, "grad_norm": 1.8992649487983346, "learning_rate": 6.060022388408883e-06, "loss": 0.6449, "step": 4320 }, { "epoch": 0.45, "grad_norm": 1.8783947566207382, "learning_rate": 6.0583771777056166e-06, "loss": 0.6849, "step": 4321 }, { "epoch": 0.45, "grad_norm": 1.947749299526801, "learning_rate": 6.056731847029265e-06, "loss": 0.7143, "step": 4322 }, { "epoch": 0.45, "grad_norm": 1.9019855284634992, "learning_rate": 6.055086396566334e-06, "loss": 0.5855, "step": 4323 }, { "epoch": 0.45, "grad_norm": 1.831529691156291, "learning_rate": 6.0534408265033485e-06, "loss": 0.6443, "step": 4324 }, { "epoch": 0.45, "grad_norm": 1.8942202831397477, "learning_rate": 6.05179513702684e-06, "loss": 0.5609, "step": 4325 }, { "epoch": 0.45, "grad_norm": 1.9975511737435045, "learning_rate": 6.050149328323358e-06, "loss": 0.6611, "step": 4326 }, { "epoch": 0.45, "grad_norm": 1.970690832774122, "learning_rate": 6.048503400579463e-06, "loss": 0.657, "step": 4327 }, { "epoch": 0.45, "grad_norm": 1.903442299604656, "learning_rate": 6.046857353981732e-06, "loss": 0.6878, "step": 4328 }, { "epoch": 0.45, "grad_norm": 1.9033342278105545, "learning_rate": 6.045211188716753e-06, "loss": 0.6339, "step": 4329 }, { "epoch": 0.45, "grad_norm": 1.9358946001912258, "learning_rate": 6.043564904971129e-06, "loss": 0.7204, "step": 4330 }, { "epoch": 0.45, "grad_norm": 1.562197508869273, "learning_rate": 6.041918502931473e-06, "loss": 0.5725, "step": 4331 }, { "epoch": 0.45, "grad_norm": 1.7487153609771282, "learning_rate": 6.040271982784417e-06, "loss": 0.6848, "step": 4332 }, { "epoch": 0.45, "grad_norm": 2.0537934311353845, "learning_rate": 6.038625344716603e-06, "loss": 0.7616, "step": 4333 }, { "epoch": 0.45, "grad_norm": 1.880860377128153, "learning_rate": 6.036978588914684e-06, "loss": 0.6721, "step": 4334 }, { "epoch": 0.45, "grad_norm": 1.7029662258523208, "learning_rate": 6.035331715565333e-06, "loss": 0.6612, "step": 4335 }, { "epoch": 0.45, "grad_norm": 2.0649190844874443, "learning_rate": 6.0336847248552335e-06, "loss": 0.7124, "step": 4336 }, { "epoch": 0.45, "grad_norm": 1.8863676505857916, "learning_rate": 6.032037616971075e-06, "loss": 0.6383, "step": 4337 }, { "epoch": 0.45, "grad_norm": 1.6913686311949407, "learning_rate": 6.030390392099571e-06, "loss": 0.5815, "step": 4338 }, { "epoch": 0.45, "grad_norm": 1.6854186885544427, "learning_rate": 6.028743050427442e-06, "loss": 0.5173, "step": 4339 }, { "epoch": 0.45, "grad_norm": 2.0603592121426995, "learning_rate": 6.027095592141428e-06, "loss": 0.65, "step": 4340 }, { "epoch": 0.45, "grad_norm": 1.8627823283033291, "learning_rate": 6.025448017428272e-06, "loss": 0.5187, "step": 4341 }, { "epoch": 0.45, "grad_norm": 1.8550463706984868, "learning_rate": 6.023800326474738e-06, "loss": 0.6554, "step": 4342 }, { "epoch": 0.45, "grad_norm": 2.01413399369854, "learning_rate": 6.022152519467601e-06, "loss": 0.5523, "step": 4343 }, { "epoch": 0.45, "grad_norm": 1.871011023204209, "learning_rate": 6.020504596593652e-06, "loss": 0.5976, "step": 4344 }, { "epoch": 0.45, "grad_norm": 1.9291226273099116, "learning_rate": 6.018856558039689e-06, "loss": 0.5815, "step": 4345 }, { "epoch": 0.45, "grad_norm": 2.1033708436285283, "learning_rate": 6.017208403992527e-06, "loss": 0.6804, "step": 4346 }, { "epoch": 0.45, "grad_norm": 1.7746131482935668, "learning_rate": 6.015560134638997e-06, "loss": 0.6312, "step": 4347 }, { "epoch": 0.45, "grad_norm": 1.736794612224939, "learning_rate": 6.013911750165935e-06, "loss": 0.688, "step": 4348 }, { "epoch": 0.45, "grad_norm": 1.8071088364104275, "learning_rate": 6.012263250760199e-06, "loss": 0.6993, "step": 4349 }, { "epoch": 0.45, "grad_norm": 1.767310249059785, "learning_rate": 6.0106146366086514e-06, "loss": 0.5741, "step": 4350 }, { "epoch": 0.45, "grad_norm": 2.0526654951756416, "learning_rate": 6.0089659078981765e-06, "loss": 0.6669, "step": 4351 }, { "epoch": 0.45, "grad_norm": 1.6899837975267995, "learning_rate": 6.007317064815664e-06, "loss": 0.5892, "step": 4352 }, { "epoch": 0.45, "grad_norm": 1.9256094251339038, "learning_rate": 6.0056681075480206e-06, "loss": 0.6298, "step": 4353 }, { "epoch": 0.45, "grad_norm": 2.018125474079877, "learning_rate": 6.004019036282165e-06, "loss": 0.687, "step": 4354 }, { "epoch": 0.45, "grad_norm": 1.8216632857004333, "learning_rate": 6.002369851205029e-06, "loss": 0.6037, "step": 4355 }, { "epoch": 0.45, "grad_norm": 1.934654363397389, "learning_rate": 6.000720552503557e-06, "loss": 0.6146, "step": 4356 }, { "epoch": 0.45, "grad_norm": 1.816258512462237, "learning_rate": 5.999071140364708e-06, "loss": 0.6523, "step": 4357 }, { "epoch": 0.45, "grad_norm": 1.8239092670075012, "learning_rate": 5.997421614975449e-06, "loss": 0.6458, "step": 4358 }, { "epoch": 0.45, "grad_norm": 1.9149484450077814, "learning_rate": 5.995771976522765e-06, "loss": 0.7412, "step": 4359 }, { "epoch": 0.45, "grad_norm": 1.9121047542571723, "learning_rate": 5.9941222251936525e-06, "loss": 0.6096, "step": 4360 }, { "epoch": 0.45, "grad_norm": 1.8481439735718255, "learning_rate": 5.992472361175118e-06, "loss": 0.6237, "step": 4361 }, { "epoch": 0.45, "grad_norm": 2.0655990944889124, "learning_rate": 5.990822384654187e-06, "loss": 0.6851, "step": 4362 }, { "epoch": 0.45, "grad_norm": 1.8831730913470377, "learning_rate": 5.989172295817889e-06, "loss": 0.618, "step": 4363 }, { "epoch": 0.45, "grad_norm": 1.8724322214686773, "learning_rate": 5.9875220948532745e-06, "loss": 0.6682, "step": 4364 }, { "epoch": 0.45, "grad_norm": 1.905634382989326, "learning_rate": 5.9858717819474e-06, "loss": 0.647, "step": 4365 }, { "epoch": 0.45, "grad_norm": 2.0386048289605534, "learning_rate": 5.984221357287342e-06, "loss": 0.6118, "step": 4366 }, { "epoch": 0.45, "grad_norm": 1.8630627415405536, "learning_rate": 5.982570821060182e-06, "loss": 0.6513, "step": 4367 }, { "epoch": 0.45, "grad_norm": 2.146377572209457, "learning_rate": 5.980920173453019e-06, "loss": 0.6575, "step": 4368 }, { "epoch": 0.45, "grad_norm": 1.8648330548708527, "learning_rate": 5.979269414652964e-06, "loss": 0.5981, "step": 4369 }, { "epoch": 0.45, "grad_norm": 1.7479863045414308, "learning_rate": 5.977618544847139e-06, "loss": 0.6314, "step": 4370 }, { "epoch": 0.45, "grad_norm": 1.9122892283559052, "learning_rate": 5.975967564222679e-06, "loss": 0.6404, "step": 4371 }, { "epoch": 0.45, "grad_norm": 1.7702998645102086, "learning_rate": 5.974316472966732e-06, "loss": 0.6091, "step": 4372 }, { "epoch": 0.45, "grad_norm": 1.9419264167413117, "learning_rate": 5.9726652712664625e-06, "loss": 0.6424, "step": 4373 }, { "epoch": 0.45, "grad_norm": 1.924232841922094, "learning_rate": 5.971013959309038e-06, "loss": 0.6804, "step": 4374 }, { "epoch": 0.45, "grad_norm": 2.0329707067511555, "learning_rate": 5.969362537281647e-06, "loss": 0.7667, "step": 4375 }, { "epoch": 0.45, "grad_norm": 1.8286541545437696, "learning_rate": 5.967711005371487e-06, "loss": 0.6649, "step": 4376 }, { "epoch": 0.45, "grad_norm": 2.036373560381382, "learning_rate": 5.966059363765771e-06, "loss": 0.6073, "step": 4377 }, { "epoch": 0.46, "grad_norm": 1.8738880734455083, "learning_rate": 5.9644076126517166e-06, "loss": 0.6915, "step": 4378 }, { "epoch": 0.46, "grad_norm": 2.049257494525163, "learning_rate": 5.962755752216564e-06, "loss": 0.6387, "step": 4379 }, { "epoch": 0.46, "grad_norm": 1.693094678717967, "learning_rate": 5.961103782647558e-06, "loss": 0.4983, "step": 4380 }, { "epoch": 0.46, "grad_norm": 1.9267764438897257, "learning_rate": 5.959451704131962e-06, "loss": 0.594, "step": 4381 }, { "epoch": 0.46, "grad_norm": 1.966784263635863, "learning_rate": 5.957799516857046e-06, "loss": 0.6696, "step": 4382 }, { "epoch": 0.46, "grad_norm": 1.917751357367465, "learning_rate": 5.9561472210100955e-06, "loss": 0.616, "step": 4383 }, { "epoch": 0.46, "grad_norm": 1.9782860809483898, "learning_rate": 5.954494816778408e-06, "loss": 0.719, "step": 4384 }, { "epoch": 0.46, "grad_norm": 1.8408410543098181, "learning_rate": 5.952842304349291e-06, "loss": 0.6837, "step": 4385 }, { "epoch": 0.46, "grad_norm": 2.0510629750021216, "learning_rate": 5.951189683910069e-06, "loss": 0.6411, "step": 4386 }, { "epoch": 0.46, "grad_norm": 1.7191965103067213, "learning_rate": 5.949536955648074e-06, "loss": 0.5532, "step": 4387 }, { "epoch": 0.46, "grad_norm": 1.7432450197461355, "learning_rate": 5.947884119750656e-06, "loss": 0.652, "step": 4388 }, { "epoch": 0.46, "grad_norm": 1.7602986319167306, "learning_rate": 5.946231176405166e-06, "loss": 0.5931, "step": 4389 }, { "epoch": 0.46, "grad_norm": 1.9640168883565872, "learning_rate": 5.944578125798981e-06, "loss": 0.6641, "step": 4390 }, { "epoch": 0.46, "grad_norm": 2.2045820879343583, "learning_rate": 5.94292496811948e-06, "loss": 0.6991, "step": 4391 }, { "epoch": 0.46, "grad_norm": 1.6992537195980884, "learning_rate": 5.94127170355406e-06, "loss": 0.5314, "step": 4392 }, { "epoch": 0.46, "grad_norm": 1.7760267943580061, "learning_rate": 5.939618332290128e-06, "loss": 0.6334, "step": 4393 }, { "epoch": 0.46, "grad_norm": 2.0270949968085743, "learning_rate": 5.937964854515101e-06, "loss": 0.7455, "step": 4394 }, { "epoch": 0.46, "grad_norm": 1.853300870630623, "learning_rate": 5.936311270416415e-06, "loss": 0.5298, "step": 4395 }, { "epoch": 0.46, "grad_norm": 2.094047846726863, "learning_rate": 5.9346575801815064e-06, "loss": 0.6803, "step": 4396 }, { "epoch": 0.46, "grad_norm": 1.5348679190305892, "learning_rate": 5.933003783997835e-06, "loss": 0.5962, "step": 4397 }, { "epoch": 0.46, "grad_norm": 1.7849133821203453, "learning_rate": 5.931349882052866e-06, "loss": 0.6919, "step": 4398 }, { "epoch": 0.46, "grad_norm": 1.7708118222363871, "learning_rate": 5.929695874534081e-06, "loss": 0.701, "step": 4399 }, { "epoch": 0.46, "grad_norm": 1.9364917331436853, "learning_rate": 5.928041761628968e-06, "loss": 0.614, "step": 4400 }, { "epoch": 0.46, "grad_norm": 1.81362935636992, "learning_rate": 5.926387543525031e-06, "loss": 0.7095, "step": 4401 }, { "epoch": 0.46, "grad_norm": 1.9855548797846845, "learning_rate": 5.924733220409786e-06, "loss": 0.6378, "step": 4402 }, { "epoch": 0.46, "grad_norm": 1.9860147094040368, "learning_rate": 5.9230787924707625e-06, "loss": 0.717, "step": 4403 }, { "epoch": 0.46, "grad_norm": 1.799813809623052, "learning_rate": 5.921424259895493e-06, "loss": 0.6095, "step": 4404 }, { "epoch": 0.46, "grad_norm": 1.854974605347049, "learning_rate": 5.919769622871533e-06, "loss": 0.614, "step": 4405 }, { "epoch": 0.46, "grad_norm": 1.8929793362499432, "learning_rate": 5.918114881586444e-06, "loss": 0.6377, "step": 4406 }, { "epoch": 0.46, "grad_norm": 1.8735159541726778, "learning_rate": 5.9164600362278005e-06, "loss": 0.5564, "step": 4407 }, { "epoch": 0.46, "grad_norm": 1.9266281340327402, "learning_rate": 5.914805086983187e-06, "loss": 0.6363, "step": 4408 }, { "epoch": 0.46, "grad_norm": 1.91652283894417, "learning_rate": 5.913150034040203e-06, "loss": 0.5966, "step": 4409 }, { "epoch": 0.46, "grad_norm": 1.8967692186120793, "learning_rate": 5.9114948775864585e-06, "loss": 0.5763, "step": 4410 }, { "epoch": 0.46, "grad_norm": 1.868068960782959, "learning_rate": 5.909839617809574e-06, "loss": 0.5677, "step": 4411 }, { "epoch": 0.46, "grad_norm": 1.949627880291475, "learning_rate": 5.908184254897183e-06, "loss": 0.6456, "step": 4412 }, { "epoch": 0.46, "grad_norm": 1.8952854730038935, "learning_rate": 5.906528789036929e-06, "loss": 0.7244, "step": 4413 }, { "epoch": 0.46, "grad_norm": 1.88180194047267, "learning_rate": 5.904873220416472e-06, "loss": 0.6549, "step": 4414 }, { "epoch": 0.46, "grad_norm": 1.9535874023387245, "learning_rate": 5.903217549223477e-06, "loss": 0.6613, "step": 4415 }, { "epoch": 0.46, "grad_norm": 1.8438707598961703, "learning_rate": 5.901561775645623e-06, "loss": 0.5778, "step": 4416 }, { "epoch": 0.46, "grad_norm": 2.0575987277313716, "learning_rate": 5.8999058998706046e-06, "loss": 0.656, "step": 4417 }, { "epoch": 0.46, "grad_norm": 1.8539834505786594, "learning_rate": 5.898249922086123e-06, "loss": 0.7293, "step": 4418 }, { "epoch": 0.46, "grad_norm": 1.8979032405655898, "learning_rate": 5.896593842479893e-06, "loss": 0.7049, "step": 4419 }, { "epoch": 0.46, "grad_norm": 2.0946306162111976, "learning_rate": 5.89493766123964e-06, "loss": 0.6983, "step": 4420 }, { "epoch": 0.46, "grad_norm": 2.1908352500873205, "learning_rate": 5.893281378553104e-06, "loss": 0.7129, "step": 4421 }, { "epoch": 0.46, "grad_norm": 2.0150748552490465, "learning_rate": 5.891624994608029e-06, "loss": 0.6083, "step": 4422 }, { "epoch": 0.46, "grad_norm": 1.7215755278309917, "learning_rate": 5.8899685095921814e-06, "loss": 0.656, "step": 4423 }, { "epoch": 0.46, "grad_norm": 1.9343024267813007, "learning_rate": 5.888311923693328e-06, "loss": 0.5971, "step": 4424 }, { "epoch": 0.46, "grad_norm": 1.8744245976889982, "learning_rate": 5.886655237099257e-06, "loss": 0.6771, "step": 4425 }, { "epoch": 0.46, "grad_norm": 1.9031240731966292, "learning_rate": 5.88499844999776e-06, "loss": 0.6218, "step": 4426 }, { "epoch": 0.46, "grad_norm": 1.9729786683428097, "learning_rate": 5.8833415625766455e-06, "loss": 0.5538, "step": 4427 }, { "epoch": 0.46, "grad_norm": 1.834573070869529, "learning_rate": 5.881684575023729e-06, "loss": 0.6215, "step": 4428 }, { "epoch": 0.46, "grad_norm": 1.760291997270579, "learning_rate": 5.880027487526842e-06, "loss": 0.5801, "step": 4429 }, { "epoch": 0.46, "grad_norm": 1.9670809366698832, "learning_rate": 5.878370300273821e-06, "loss": 0.6537, "step": 4430 }, { "epoch": 0.46, "grad_norm": 2.011201336258629, "learning_rate": 5.876713013452521e-06, "loss": 0.7965, "step": 4431 }, { "epoch": 0.46, "grad_norm": 2.0312721214511242, "learning_rate": 5.875055627250804e-06, "loss": 0.6555, "step": 4432 }, { "epoch": 0.46, "grad_norm": 1.7256318681000062, "learning_rate": 5.873398141856545e-06, "loss": 0.7285, "step": 4433 }, { "epoch": 0.46, "grad_norm": 1.7330021830336968, "learning_rate": 5.871740557457626e-06, "loss": 0.5818, "step": 4434 }, { "epoch": 0.46, "grad_norm": 1.7225502241433897, "learning_rate": 5.870082874241947e-06, "loss": 0.5089, "step": 4435 }, { "epoch": 0.46, "grad_norm": 1.9286229634377934, "learning_rate": 5.868425092397416e-06, "loss": 0.7606, "step": 4436 }, { "epoch": 0.46, "grad_norm": 1.9414863379591762, "learning_rate": 5.86676721211195e-06, "loss": 0.6719, "step": 4437 }, { "epoch": 0.46, "grad_norm": 1.8080730220213328, "learning_rate": 5.86510923357348e-06, "loss": 0.6629, "step": 4438 }, { "epoch": 0.46, "grad_norm": 1.7384087585471595, "learning_rate": 5.8634511569699486e-06, "loss": 0.728, "step": 4439 }, { "epoch": 0.46, "grad_norm": 1.8723704862477661, "learning_rate": 5.861792982489306e-06, "loss": 0.6252, "step": 4440 }, { "epoch": 0.46, "grad_norm": 1.7642246326221103, "learning_rate": 5.860134710319517e-06, "loss": 0.6327, "step": 4441 }, { "epoch": 0.46, "grad_norm": 1.8926388005449055, "learning_rate": 5.858476340648555e-06, "loss": 0.6858, "step": 4442 }, { "epoch": 0.46, "grad_norm": 1.8708903747298244, "learning_rate": 5.856817873664409e-06, "loss": 0.7199, "step": 4443 }, { "epoch": 0.46, "grad_norm": 1.6894652296859378, "learning_rate": 5.855159309555072e-06, "loss": 0.5358, "step": 4444 }, { "epoch": 0.46, "grad_norm": 1.8180878241005396, "learning_rate": 5.853500648508552e-06, "loss": 0.6665, "step": 4445 }, { "epoch": 0.46, "grad_norm": 1.9056202180080377, "learning_rate": 5.8518418907128694e-06, "loss": 0.6349, "step": 4446 }, { "epoch": 0.46, "grad_norm": 2.0279056733244483, "learning_rate": 5.850183036356054e-06, "loss": 0.6681, "step": 4447 }, { "epoch": 0.46, "grad_norm": 1.7326930647206062, "learning_rate": 5.8485240856261446e-06, "loss": 0.5012, "step": 4448 }, { "epoch": 0.46, "grad_norm": 1.7860636370758964, "learning_rate": 5.846865038711194e-06, "loss": 0.7176, "step": 4449 }, { "epoch": 0.46, "grad_norm": 1.9843512953094944, "learning_rate": 5.845205895799264e-06, "loss": 0.7526, "step": 4450 }, { "epoch": 0.46, "grad_norm": 2.010199635494966, "learning_rate": 5.8435466570784295e-06, "loss": 0.6164, "step": 4451 }, { "epoch": 0.46, "grad_norm": 1.892725992106848, "learning_rate": 5.8418873227367724e-06, "loss": 0.6222, "step": 4452 }, { "epoch": 0.46, "grad_norm": 2.0294869818788763, "learning_rate": 5.840227892962388e-06, "loss": 0.6154, "step": 4453 }, { "epoch": 0.46, "grad_norm": 2.0869333452841192, "learning_rate": 5.838568367943383e-06, "loss": 0.6628, "step": 4454 }, { "epoch": 0.46, "grad_norm": 1.6602804318639692, "learning_rate": 5.8369087478678755e-06, "loss": 0.6215, "step": 4455 }, { "epoch": 0.46, "grad_norm": 1.8406790377489561, "learning_rate": 5.835249032923989e-06, "loss": 0.6173, "step": 4456 }, { "epoch": 0.46, "grad_norm": 1.6276517300331053, "learning_rate": 5.833589223299865e-06, "loss": 0.6091, "step": 4457 }, { "epoch": 0.46, "grad_norm": 1.820277411526944, "learning_rate": 5.831929319183651e-06, "loss": 0.6295, "step": 4458 }, { "epoch": 0.46, "grad_norm": 1.9676714418497274, "learning_rate": 5.830269320763507e-06, "loss": 0.7288, "step": 4459 }, { "epoch": 0.46, "grad_norm": 1.8329203395615346, "learning_rate": 5.828609228227603e-06, "loss": 0.7023, "step": 4460 }, { "epoch": 0.46, "grad_norm": 1.8305837504641471, "learning_rate": 5.82694904176412e-06, "loss": 0.6172, "step": 4461 }, { "epoch": 0.46, "grad_norm": 2.1586280747536115, "learning_rate": 5.825288761561248e-06, "loss": 0.7142, "step": 4462 }, { "epoch": 0.46, "grad_norm": 1.8557818338452512, "learning_rate": 5.823628387807193e-06, "loss": 0.5918, "step": 4463 }, { "epoch": 0.46, "grad_norm": 2.0083257509743015, "learning_rate": 5.821967920690165e-06, "loss": 0.6542, "step": 4464 }, { "epoch": 0.46, "grad_norm": 1.7232403898211963, "learning_rate": 5.82030736039839e-06, "loss": 0.5959, "step": 4465 }, { "epoch": 0.46, "grad_norm": 1.690743499181069, "learning_rate": 5.818646707120098e-06, "loss": 0.6244, "step": 4466 }, { "epoch": 0.46, "grad_norm": 1.946693473563528, "learning_rate": 5.8169859610435355e-06, "loss": 0.5974, "step": 4467 }, { "epoch": 0.46, "grad_norm": 1.8758207732410697, "learning_rate": 5.815325122356959e-06, "loss": 0.6168, "step": 4468 }, { "epoch": 0.46, "grad_norm": 1.847333041662102, "learning_rate": 5.813664191248631e-06, "loss": 0.702, "step": 4469 }, { "epoch": 0.46, "grad_norm": 1.8080621215350756, "learning_rate": 5.8120031679068315e-06, "loss": 0.5091, "step": 4470 }, { "epoch": 0.46, "grad_norm": 1.7877506559716392, "learning_rate": 5.810342052519842e-06, "loss": 0.6393, "step": 4471 }, { "epoch": 0.46, "grad_norm": 1.8901858594534553, "learning_rate": 5.808680845275963e-06, "loss": 0.5721, "step": 4472 }, { "epoch": 0.46, "grad_norm": 1.873495548260194, "learning_rate": 5.8070195463635025e-06, "loss": 0.6208, "step": 4473 }, { "epoch": 0.47, "grad_norm": 1.9721690840833237, "learning_rate": 5.8053581559707754e-06, "loss": 0.6252, "step": 4474 }, { "epoch": 0.47, "grad_norm": 1.8923298290697836, "learning_rate": 5.80369667428611e-06, "loss": 0.6648, "step": 4475 }, { "epoch": 0.47, "grad_norm": 2.0497196950258973, "learning_rate": 5.802035101497846e-06, "loss": 0.6719, "step": 4476 }, { "epoch": 0.47, "grad_norm": 1.7156531826874144, "learning_rate": 5.800373437794334e-06, "loss": 0.6103, "step": 4477 }, { "epoch": 0.47, "grad_norm": 1.8561943349164527, "learning_rate": 5.798711683363929e-06, "loss": 0.6015, "step": 4478 }, { "epoch": 0.47, "grad_norm": 1.722452492222899, "learning_rate": 5.797049838395001e-06, "loss": 0.5597, "step": 4479 }, { "epoch": 0.47, "grad_norm": 1.8749034479522824, "learning_rate": 5.795387903075933e-06, "loss": 0.6504, "step": 4480 }, { "epoch": 0.47, "grad_norm": 1.7635470279845804, "learning_rate": 5.7937258775951125e-06, "loss": 0.5879, "step": 4481 }, { "epoch": 0.47, "grad_norm": 1.8571087504016002, "learning_rate": 5.792063762140938e-06, "loss": 0.6682, "step": 4482 }, { "epoch": 0.47, "grad_norm": 1.8506275346708279, "learning_rate": 5.790401556901822e-06, "loss": 0.7062, "step": 4483 }, { "epoch": 0.47, "grad_norm": 1.9054042143788954, "learning_rate": 5.788739262066185e-06, "loss": 0.5929, "step": 4484 }, { "epoch": 0.47, "grad_norm": 1.745527884049558, "learning_rate": 5.787076877822457e-06, "loss": 0.6085, "step": 4485 }, { "epoch": 0.47, "grad_norm": 1.8825036097416747, "learning_rate": 5.7854144043590775e-06, "loss": 0.6847, "step": 4486 }, { "epoch": 0.47, "grad_norm": 2.023138211383369, "learning_rate": 5.7837518418645e-06, "loss": 0.6308, "step": 4487 }, { "epoch": 0.47, "grad_norm": 1.9547099231146705, "learning_rate": 5.782089190527185e-06, "loss": 0.6312, "step": 4488 }, { "epoch": 0.47, "grad_norm": 1.643185636278992, "learning_rate": 5.7804264505356e-06, "loss": 0.6085, "step": 4489 }, { "epoch": 0.47, "grad_norm": 2.0414113888224117, "learning_rate": 5.7787636220782294e-06, "loss": 0.6321, "step": 4490 }, { "epoch": 0.47, "grad_norm": 2.2067355496136902, "learning_rate": 5.777100705343565e-06, "loss": 0.7242, "step": 4491 }, { "epoch": 0.47, "grad_norm": 1.9603021200117243, "learning_rate": 5.775437700520103e-06, "loss": 0.59, "step": 4492 }, { "epoch": 0.47, "grad_norm": 1.9097724088822532, "learning_rate": 5.7737746077963605e-06, "loss": 0.714, "step": 4493 }, { "epoch": 0.47, "grad_norm": 1.6876366699927718, "learning_rate": 5.772111427360855e-06, "loss": 0.5481, "step": 4494 }, { "epoch": 0.47, "grad_norm": 1.8767910335072366, "learning_rate": 5.770448159402118e-06, "loss": 0.6646, "step": 4495 }, { "epoch": 0.47, "grad_norm": 2.0294806073429137, "learning_rate": 5.7687848041086905e-06, "loss": 0.6843, "step": 4496 }, { "epoch": 0.47, "grad_norm": 1.9588128637096367, "learning_rate": 5.767121361669125e-06, "loss": 0.7105, "step": 4497 }, { "epoch": 0.47, "grad_norm": 1.9716151486499722, "learning_rate": 5.765457832271979e-06, "loss": 0.6373, "step": 4498 }, { "epoch": 0.47, "grad_norm": 2.080359918608944, "learning_rate": 5.763794216105826e-06, "loss": 0.7501, "step": 4499 }, { "epoch": 0.47, "grad_norm": 1.8964238274940508, "learning_rate": 5.762130513359244e-06, "loss": 0.5701, "step": 4500 }, { "epoch": 0.47, "grad_norm": 1.8669777706044983, "learning_rate": 5.760466724220824e-06, "loss": 0.5573, "step": 4501 }, { "epoch": 0.47, "grad_norm": 1.9046577228447057, "learning_rate": 5.758802848879169e-06, "loss": 0.6396, "step": 4502 }, { "epoch": 0.47, "grad_norm": 2.2300958145019742, "learning_rate": 5.757138887522884e-06, "loss": 0.7542, "step": 4503 }, { "epoch": 0.47, "grad_norm": 1.8800972518796621, "learning_rate": 5.75547484034059e-06, "loss": 0.5727, "step": 4504 }, { "epoch": 0.47, "grad_norm": 2.2108595105155944, "learning_rate": 5.753810707520918e-06, "loss": 0.5708, "step": 4505 }, { "epoch": 0.47, "grad_norm": 1.9557531259523118, "learning_rate": 5.7521464892525055e-06, "loss": 0.668, "step": 4506 }, { "epoch": 0.47, "grad_norm": 1.6753004989326905, "learning_rate": 5.750482185724001e-06, "loss": 0.5802, "step": 4507 }, { "epoch": 0.47, "grad_norm": 1.8277555372897838, "learning_rate": 5.748817797124063e-06, "loss": 0.5803, "step": 4508 }, { "epoch": 0.47, "grad_norm": 1.9809336995417062, "learning_rate": 5.74715332364136e-06, "loss": 0.6485, "step": 4509 }, { "epoch": 0.47, "grad_norm": 1.944164497840986, "learning_rate": 5.7454887654645706e-06, "loss": 0.554, "step": 4510 }, { "epoch": 0.47, "grad_norm": 1.9367604540968901, "learning_rate": 5.743824122782379e-06, "loss": 0.7028, "step": 4511 }, { "epoch": 0.47, "grad_norm": 2.0476021643490068, "learning_rate": 5.7421593957834835e-06, "loss": 0.5873, "step": 4512 }, { "epoch": 0.47, "grad_norm": 1.8303359306005593, "learning_rate": 5.74049458465659e-06, "loss": 0.5952, "step": 4513 }, { "epoch": 0.47, "grad_norm": 1.6641282597478058, "learning_rate": 5.738829689590415e-06, "loss": 0.6916, "step": 4514 }, { "epoch": 0.47, "grad_norm": 1.9218716826790592, "learning_rate": 5.7371647107736824e-06, "loss": 0.5552, "step": 4515 }, { "epoch": 0.47, "grad_norm": 1.8021875591091798, "learning_rate": 5.73549964839513e-06, "loss": 0.6477, "step": 4516 }, { "epoch": 0.47, "grad_norm": 1.874696675514852, "learning_rate": 5.7338345026434995e-06, "loss": 0.6552, "step": 4517 }, { "epoch": 0.47, "grad_norm": 1.7124112887912268, "learning_rate": 5.732169273707545e-06, "loss": 0.6077, "step": 4518 }, { "epoch": 0.47, "grad_norm": 2.036393377024413, "learning_rate": 5.73050396177603e-06, "loss": 0.7247, "step": 4519 }, { "epoch": 0.47, "grad_norm": 1.7701392918915468, "learning_rate": 5.728838567037728e-06, "loss": 0.7127, "step": 4520 }, { "epoch": 0.47, "grad_norm": 1.9231725547933194, "learning_rate": 5.72717308968142e-06, "loss": 0.6524, "step": 4521 }, { "epoch": 0.47, "grad_norm": 2.080539493901293, "learning_rate": 5.725507529895898e-06, "loss": 0.6405, "step": 4522 }, { "epoch": 0.47, "grad_norm": 1.8921086679701191, "learning_rate": 5.723841887869961e-06, "loss": 0.6963, "step": 4523 }, { "epoch": 0.47, "grad_norm": 1.982435709140776, "learning_rate": 5.72217616379242e-06, "loss": 0.553, "step": 4524 }, { "epoch": 0.47, "grad_norm": 1.869479487988279, "learning_rate": 5.7205103578520956e-06, "loss": 0.5267, "step": 4525 }, { "epoch": 0.47, "grad_norm": 1.8967605276922301, "learning_rate": 5.7188444702378155e-06, "loss": 0.6433, "step": 4526 }, { "epoch": 0.47, "grad_norm": 2.1663730345064023, "learning_rate": 5.717178501138416e-06, "loss": 0.7024, "step": 4527 }, { "epoch": 0.47, "grad_norm": 1.6711389674607338, "learning_rate": 5.715512450742749e-06, "loss": 0.6199, "step": 4528 }, { "epoch": 0.47, "grad_norm": 2.037140945002469, "learning_rate": 5.713846319239664e-06, "loss": 0.6665, "step": 4529 }, { "epoch": 0.47, "grad_norm": 2.022157294672067, "learning_rate": 5.71218010681803e-06, "loss": 0.7589, "step": 4530 }, { "epoch": 0.47, "grad_norm": 1.7878692251576156, "learning_rate": 5.710513813666722e-06, "loss": 0.6059, "step": 4531 }, { "epoch": 0.47, "grad_norm": 1.9980205127983506, "learning_rate": 5.708847439974625e-06, "loss": 0.6536, "step": 4532 }, { "epoch": 0.47, "grad_norm": 2.109109182112321, "learning_rate": 5.707180985930629e-06, "loss": 0.6149, "step": 4533 }, { "epoch": 0.47, "grad_norm": 2.035240334490125, "learning_rate": 5.705514451723637e-06, "loss": 0.6236, "step": 4534 }, { "epoch": 0.47, "grad_norm": 1.991215297512859, "learning_rate": 5.703847837542562e-06, "loss": 0.6123, "step": 4535 }, { "epoch": 0.47, "grad_norm": 1.8754198782296225, "learning_rate": 5.702181143576323e-06, "loss": 0.5805, "step": 4536 }, { "epoch": 0.47, "grad_norm": 1.8767609705655672, "learning_rate": 5.7005143700138474e-06, "loss": 0.5803, "step": 4537 }, { "epoch": 0.47, "grad_norm": 1.8788940690815126, "learning_rate": 5.698847517044076e-06, "loss": 0.6342, "step": 4538 }, { "epoch": 0.47, "grad_norm": 2.046203066451224, "learning_rate": 5.697180584855957e-06, "loss": 0.7154, "step": 4539 }, { "epoch": 0.47, "grad_norm": 2.147823782012038, "learning_rate": 5.6955135736384425e-06, "loss": 0.6689, "step": 4540 }, { "epoch": 0.47, "grad_norm": 1.9211938256886534, "learning_rate": 5.693846483580501e-06, "loss": 0.7068, "step": 4541 }, { "epoch": 0.47, "grad_norm": 1.7172972276987455, "learning_rate": 5.692179314871104e-06, "loss": 0.5695, "step": 4542 }, { "epoch": 0.47, "grad_norm": 1.9061594156772637, "learning_rate": 5.69051206769924e-06, "loss": 0.6323, "step": 4543 }, { "epoch": 0.47, "grad_norm": 1.9402592924801827, "learning_rate": 5.688844742253895e-06, "loss": 0.7288, "step": 4544 }, { "epoch": 0.47, "grad_norm": 1.9548934582721385, "learning_rate": 5.687177338724073e-06, "loss": 0.6359, "step": 4545 }, { "epoch": 0.47, "grad_norm": 1.9058935634603749, "learning_rate": 5.685509857298781e-06, "loss": 0.6532, "step": 4546 }, { "epoch": 0.47, "grad_norm": 1.8694790048885155, "learning_rate": 5.683842298167041e-06, "loss": 0.6986, "step": 4547 }, { "epoch": 0.47, "grad_norm": 2.0055822862463293, "learning_rate": 5.68217466151788e-06, "loss": 0.5503, "step": 4548 }, { "epoch": 0.47, "grad_norm": 1.8668045735488943, "learning_rate": 5.680506947540331e-06, "loss": 0.7325, "step": 4549 }, { "epoch": 0.47, "grad_norm": 1.6947983432539857, "learning_rate": 5.67883915642344e-06, "loss": 0.692, "step": 4550 }, { "epoch": 0.47, "grad_norm": 1.8895950172582314, "learning_rate": 5.677171288356263e-06, "loss": 0.7109, "step": 4551 }, { "epoch": 0.47, "grad_norm": 1.971379110045142, "learning_rate": 5.675503343527861e-06, "loss": 0.623, "step": 4552 }, { "epoch": 0.47, "grad_norm": 2.030884339212526, "learning_rate": 5.673835322127304e-06, "loss": 0.6353, "step": 4553 }, { "epoch": 0.47, "grad_norm": 1.928163277407566, "learning_rate": 5.672167224343673e-06, "loss": 0.6159, "step": 4554 }, { "epoch": 0.47, "grad_norm": 1.908799846546338, "learning_rate": 5.670499050366055e-06, "loss": 0.5873, "step": 4555 }, { "epoch": 0.47, "grad_norm": 2.141916280219293, "learning_rate": 5.668830800383548e-06, "loss": 0.8722, "step": 4556 }, { "epoch": 0.47, "grad_norm": 1.9235665075383108, "learning_rate": 5.667162474585258e-06, "loss": 0.6594, "step": 4557 }, { "epoch": 0.47, "grad_norm": 1.9022698171629637, "learning_rate": 5.6654940731602995e-06, "loss": 0.674, "step": 4558 }, { "epoch": 0.47, "grad_norm": 2.0388621559832467, "learning_rate": 5.663825596297794e-06, "loss": 0.6344, "step": 4559 }, { "epoch": 0.47, "grad_norm": 1.689495512348284, "learning_rate": 5.662157044186873e-06, "loss": 0.5544, "step": 4560 }, { "epoch": 0.47, "grad_norm": 2.0045306909764857, "learning_rate": 5.6604884170166765e-06, "loss": 0.673, "step": 4561 }, { "epoch": 0.47, "grad_norm": 1.9111890412837886, "learning_rate": 5.658819714976355e-06, "loss": 0.6942, "step": 4562 }, { "epoch": 0.47, "grad_norm": 1.9998776939328162, "learning_rate": 5.657150938255062e-06, "loss": 0.701, "step": 4563 }, { "epoch": 0.47, "grad_norm": 2.0427375075903735, "learning_rate": 5.655482087041965e-06, "loss": 0.6718, "step": 4564 }, { "epoch": 0.47, "grad_norm": 2.0273043034155704, "learning_rate": 5.653813161526237e-06, "loss": 0.6799, "step": 4565 }, { "epoch": 0.47, "grad_norm": 1.9518930981995333, "learning_rate": 5.6521441618970605e-06, "loss": 0.5647, "step": 4566 }, { "epoch": 0.47, "grad_norm": 1.7441388879377546, "learning_rate": 5.6504750883436275e-06, "loss": 0.5699, "step": 4567 }, { "epoch": 0.47, "grad_norm": 1.890032316812201, "learning_rate": 5.648805941055135e-06, "loss": 0.6521, "step": 4568 }, { "epoch": 0.47, "grad_norm": 1.880305597341674, "learning_rate": 5.647136720220791e-06, "loss": 0.6335, "step": 4569 }, { "epoch": 0.48, "grad_norm": 1.8075253023263849, "learning_rate": 5.64546742602981e-06, "loss": 0.6353, "step": 4570 }, { "epoch": 0.48, "grad_norm": 1.7588936557722914, "learning_rate": 5.643798058671418e-06, "loss": 0.6952, "step": 4571 }, { "epoch": 0.48, "grad_norm": 1.6995840290285382, "learning_rate": 5.6421286183348465e-06, "loss": 0.6305, "step": 4572 }, { "epoch": 0.48, "grad_norm": 2.0948395121790315, "learning_rate": 5.640459105209337e-06, "loss": 0.6411, "step": 4573 }, { "epoch": 0.48, "grad_norm": 1.9736848664004365, "learning_rate": 5.638789519484137e-06, "loss": 0.6777, "step": 4574 }, { "epoch": 0.48, "grad_norm": 2.0435556431950803, "learning_rate": 5.637119861348504e-06, "loss": 0.7337, "step": 4575 }, { "epoch": 0.48, "grad_norm": 2.0847525177688038, "learning_rate": 5.6354501309917034e-06, "loss": 0.5759, "step": 4576 }, { "epoch": 0.48, "grad_norm": 1.659038326232906, "learning_rate": 5.633780328603008e-06, "loss": 0.5909, "step": 4577 }, { "epoch": 0.48, "grad_norm": 1.9345623802471068, "learning_rate": 5.6321104543717e-06, "loss": 0.6019, "step": 4578 }, { "epoch": 0.48, "grad_norm": 1.963218170933064, "learning_rate": 5.630440508487068e-06, "loss": 0.6093, "step": 4579 }, { "epoch": 0.48, "grad_norm": 1.7722605156571707, "learning_rate": 5.628770491138414e-06, "loss": 0.5577, "step": 4580 }, { "epoch": 0.48, "grad_norm": 1.9424797388437025, "learning_rate": 5.627100402515038e-06, "loss": 0.6364, "step": 4581 }, { "epoch": 0.48, "grad_norm": 1.9240402046319094, "learning_rate": 5.625430242806258e-06, "loss": 0.7009, "step": 4582 }, { "epoch": 0.48, "grad_norm": 1.721892694944862, "learning_rate": 5.623760012201394e-06, "loss": 0.6482, "step": 4583 }, { "epoch": 0.48, "grad_norm": 1.977992478992338, "learning_rate": 5.62208971088978e-06, "loss": 0.7306, "step": 4584 }, { "epoch": 0.48, "grad_norm": 1.847701541976097, "learning_rate": 5.62041933906075e-06, "loss": 0.6842, "step": 4585 }, { "epoch": 0.48, "grad_norm": 1.8640134483922797, "learning_rate": 5.618748896903652e-06, "loss": 0.6163, "step": 4586 }, { "epoch": 0.48, "grad_norm": 1.8814485760535749, "learning_rate": 5.617078384607839e-06, "loss": 0.7321, "step": 4587 }, { "epoch": 0.48, "grad_norm": 1.6377353046992302, "learning_rate": 5.615407802362675e-06, "loss": 0.6036, "step": 4588 }, { "epoch": 0.48, "grad_norm": 1.8481119867884892, "learning_rate": 5.613737150357528e-06, "loss": 0.629, "step": 4589 }, { "epoch": 0.48, "grad_norm": 1.892284220082062, "learning_rate": 5.6120664287817765e-06, "loss": 0.6487, "step": 4590 }, { "epoch": 0.48, "grad_norm": 2.021796552026886, "learning_rate": 5.610395637824808e-06, "loss": 0.7287, "step": 4591 }, { "epoch": 0.48, "grad_norm": 1.8874399763646057, "learning_rate": 5.608724777676013e-06, "loss": 0.6372, "step": 4592 }, { "epoch": 0.48, "grad_norm": 1.7794562570973433, "learning_rate": 5.607053848524796e-06, "loss": 0.6554, "step": 4593 }, { "epoch": 0.48, "grad_norm": 1.741221722198036, "learning_rate": 5.605382850560565e-06, "loss": 0.6421, "step": 4594 }, { "epoch": 0.48, "grad_norm": 1.7314815048933372, "learning_rate": 5.603711783972738e-06, "loss": 0.5944, "step": 4595 }, { "epoch": 0.48, "grad_norm": 1.8385199882160486, "learning_rate": 5.6020406489507385e-06, "loss": 0.6879, "step": 4596 }, { "epoch": 0.48, "grad_norm": 1.9110860400813556, "learning_rate": 5.600369445683999e-06, "loss": 0.555, "step": 4597 }, { "epoch": 0.48, "grad_norm": 1.9249828682309444, "learning_rate": 5.5986981743619615e-06, "loss": 0.6981, "step": 4598 }, { "epoch": 0.48, "grad_norm": 1.949901847401595, "learning_rate": 5.597026835174075e-06, "loss": 0.6773, "step": 4599 }, { "epoch": 0.48, "grad_norm": 1.8957329701386099, "learning_rate": 5.5953554283097925e-06, "loss": 0.7059, "step": 4600 }, { "epoch": 0.48, "grad_norm": 1.9662603076387533, "learning_rate": 5.593683953958579e-06, "loss": 0.5705, "step": 4601 }, { "epoch": 0.48, "grad_norm": 1.9760343575599348, "learning_rate": 5.592012412309905e-06, "loss": 0.5734, "step": 4602 }, { "epoch": 0.48, "grad_norm": 1.9314288640416741, "learning_rate": 5.590340803553249e-06, "loss": 0.5705, "step": 4603 }, { "epoch": 0.48, "grad_norm": 1.7749475103591477, "learning_rate": 5.5886691278780995e-06, "loss": 0.5766, "step": 4604 }, { "epoch": 0.48, "grad_norm": 1.8301652296483235, "learning_rate": 5.586997385473949e-06, "loss": 0.5995, "step": 4605 }, { "epoch": 0.48, "grad_norm": 2.0224492387711708, "learning_rate": 5.5853255765302995e-06, "loss": 0.7307, "step": 4606 }, { "epoch": 0.48, "grad_norm": 1.7800813580039285, "learning_rate": 5.583653701236658e-06, "loss": 0.5758, "step": 4607 }, { "epoch": 0.48, "grad_norm": 1.7387567730906435, "learning_rate": 5.581981759782543e-06, "loss": 0.6738, "step": 4608 }, { "epoch": 0.48, "grad_norm": 1.8039219026028164, "learning_rate": 5.580309752357479e-06, "loss": 0.582, "step": 4609 }, { "epoch": 0.48, "grad_norm": 1.9569486267401757, "learning_rate": 5.578637679150997e-06, "loss": 0.6614, "step": 4610 }, { "epoch": 0.48, "grad_norm": 1.7888601680101468, "learning_rate": 5.576965540352637e-06, "loss": 0.5691, "step": 4611 }, { "epoch": 0.48, "grad_norm": 1.7861531175452023, "learning_rate": 5.575293336151943e-06, "loss": 0.5806, "step": 4612 }, { "epoch": 0.48, "grad_norm": 1.9083526025670454, "learning_rate": 5.573621066738471e-06, "loss": 0.6261, "step": 4613 }, { "epoch": 0.48, "grad_norm": 1.737846510421099, "learning_rate": 5.571948732301781e-06, "loss": 0.6108, "step": 4614 }, { "epoch": 0.48, "grad_norm": 2.0423564349394105, "learning_rate": 5.570276333031441e-06, "loss": 0.6206, "step": 4615 }, { "epoch": 0.48, "grad_norm": 2.125783143784693, "learning_rate": 5.568603869117029e-06, "loss": 0.7213, "step": 4616 }, { "epoch": 0.48, "grad_norm": 1.8570347144204054, "learning_rate": 5.56693134074813e-06, "loss": 0.6892, "step": 4617 }, { "epoch": 0.48, "grad_norm": 1.7588279865564456, "learning_rate": 5.56525874811433e-06, "loss": 0.618, "step": 4618 }, { "epoch": 0.48, "grad_norm": 1.7432706073652564, "learning_rate": 5.563586091405229e-06, "loss": 0.5623, "step": 4619 }, { "epoch": 0.48, "grad_norm": 1.847040133653717, "learning_rate": 5.561913370810432e-06, "loss": 0.6304, "step": 4620 }, { "epoch": 0.48, "grad_norm": 2.0289255167643967, "learning_rate": 5.560240586519553e-06, "loss": 0.5503, "step": 4621 }, { "epoch": 0.48, "grad_norm": 1.781866236186172, "learning_rate": 5.558567738722208e-06, "loss": 0.5261, "step": 4622 }, { "epoch": 0.48, "grad_norm": 2.270523192504574, "learning_rate": 5.556894827608027e-06, "loss": 0.7636, "step": 4623 }, { "epoch": 0.48, "grad_norm": 1.925337493063739, "learning_rate": 5.555221853366644e-06, "loss": 0.503, "step": 4624 }, { "epoch": 0.48, "grad_norm": 1.7839496985845051, "learning_rate": 5.5535488161876994e-06, "loss": 0.612, "step": 4625 }, { "epoch": 0.48, "grad_norm": 1.6814669083880103, "learning_rate": 5.55187571626084e-06, "loss": 0.6839, "step": 4626 }, { "epoch": 0.48, "grad_norm": 1.6368160551509423, "learning_rate": 5.550202553775723e-06, "loss": 0.6327, "step": 4627 }, { "epoch": 0.48, "grad_norm": 1.7793440986251825, "learning_rate": 5.548529328922012e-06, "loss": 0.5759, "step": 4628 }, { "epoch": 0.48, "grad_norm": 1.9763629323931438, "learning_rate": 5.546856041889374e-06, "loss": 0.6881, "step": 4629 }, { "epoch": 0.48, "grad_norm": 1.9111816163433664, "learning_rate": 5.545182692867486e-06, "loss": 0.5799, "step": 4630 }, { "epoch": 0.48, "grad_norm": 1.825419245447919, "learning_rate": 5.543509282046031e-06, "loss": 0.6236, "step": 4631 }, { "epoch": 0.48, "grad_norm": 1.8821953354276433, "learning_rate": 5.541835809614704e-06, "loss": 0.6645, "step": 4632 }, { "epoch": 0.48, "grad_norm": 1.800286495436237, "learning_rate": 5.540162275763198e-06, "loss": 0.5669, "step": 4633 }, { "epoch": 0.48, "grad_norm": 1.9310568135270056, "learning_rate": 5.53848868068122e-06, "loss": 0.6105, "step": 4634 }, { "epoch": 0.48, "grad_norm": 1.894980337439149, "learning_rate": 5.53681502455848e-06, "loss": 0.6786, "step": 4635 }, { "epoch": 0.48, "grad_norm": 1.966074480215953, "learning_rate": 5.535141307584697e-06, "loss": 0.6413, "step": 4636 }, { "epoch": 0.48, "grad_norm": 1.8636160131590067, "learning_rate": 5.5334675299495975e-06, "loss": 0.5534, "step": 4637 }, { "epoch": 0.48, "grad_norm": 1.9035811073752469, "learning_rate": 5.531793691842912e-06, "loss": 0.7617, "step": 4638 }, { "epoch": 0.48, "grad_norm": 1.9211832192257259, "learning_rate": 5.530119793454381e-06, "loss": 0.6682, "step": 4639 }, { "epoch": 0.48, "grad_norm": 1.990268702208091, "learning_rate": 5.52844583497375e-06, "loss": 0.6658, "step": 4640 }, { "epoch": 0.48, "grad_norm": 2.048537657034261, "learning_rate": 5.52677181659077e-06, "loss": 0.7124, "step": 4641 }, { "epoch": 0.48, "grad_norm": 2.241227644645234, "learning_rate": 5.525097738495204e-06, "loss": 0.6851, "step": 4642 }, { "epoch": 0.48, "grad_norm": 1.7727776823270593, "learning_rate": 5.523423600876816e-06, "loss": 0.5747, "step": 4643 }, { "epoch": 0.48, "grad_norm": 1.9624937546211163, "learning_rate": 5.521749403925379e-06, "loss": 0.6018, "step": 4644 }, { "epoch": 0.48, "grad_norm": 1.8723540282901674, "learning_rate": 5.520075147830674e-06, "loss": 0.6188, "step": 4645 }, { "epoch": 0.48, "grad_norm": 1.8822938715140052, "learning_rate": 5.518400832782485e-06, "loss": 0.5811, "step": 4646 }, { "epoch": 0.48, "grad_norm": 1.9055839171728313, "learning_rate": 5.516726458970608e-06, "loss": 0.5566, "step": 4647 }, { "epoch": 0.48, "grad_norm": 1.805471104267127, "learning_rate": 5.515052026584842e-06, "loss": 0.6422, "step": 4648 }, { "epoch": 0.48, "grad_norm": 1.8946969960370972, "learning_rate": 5.513377535814992e-06, "loss": 0.7232, "step": 4649 }, { "epoch": 0.48, "grad_norm": 1.996885687942003, "learning_rate": 5.511702986850873e-06, "loss": 0.5903, "step": 4650 }, { "epoch": 0.48, "grad_norm": 1.9418199161319338, "learning_rate": 5.510028379882304e-06, "loss": 0.6816, "step": 4651 }, { "epoch": 0.48, "grad_norm": 1.9209458855987755, "learning_rate": 5.508353715099111e-06, "loss": 0.7325, "step": 4652 }, { "epoch": 0.48, "grad_norm": 1.737261300068071, "learning_rate": 5.506678992691126e-06, "loss": 0.578, "step": 4653 }, { "epoch": 0.48, "grad_norm": 1.887091236693906, "learning_rate": 5.50500421284819e-06, "loss": 0.648, "step": 4654 }, { "epoch": 0.48, "grad_norm": 1.9141217370293513, "learning_rate": 5.503329375760148e-06, "loss": 0.5891, "step": 4655 }, { "epoch": 0.48, "grad_norm": 2.240905822821042, "learning_rate": 5.5016544816168515e-06, "loss": 0.6495, "step": 4656 }, { "epoch": 0.48, "grad_norm": 2.008628318504785, "learning_rate": 5.49997953060816e-06, "loss": 0.6581, "step": 4657 }, { "epoch": 0.48, "grad_norm": 1.848748059234952, "learning_rate": 5.498304522923941e-06, "loss": 0.6991, "step": 4658 }, { "epoch": 0.48, "grad_norm": 1.9226179328864823, "learning_rate": 5.4966294587540626e-06, "loss": 0.62, "step": 4659 }, { "epoch": 0.48, "grad_norm": 1.731765557271535, "learning_rate": 5.494954338288404e-06, "loss": 0.5619, "step": 4660 }, { "epoch": 0.48, "grad_norm": 1.9932290535373656, "learning_rate": 5.493279161716851e-06, "loss": 0.6834, "step": 4661 }, { "epoch": 0.48, "grad_norm": 1.7461404202900574, "learning_rate": 5.491603929229293e-06, "loss": 0.6477, "step": 4662 }, { "epoch": 0.48, "grad_norm": 1.9588893964565277, "learning_rate": 5.4899286410156275e-06, "loss": 0.7275, "step": 4663 }, { "epoch": 0.48, "grad_norm": 1.888923777184811, "learning_rate": 5.488253297265757e-06, "loss": 0.6459, "step": 4664 }, { "epoch": 0.48, "grad_norm": 1.990188644636464, "learning_rate": 5.486577898169595e-06, "loss": 0.6675, "step": 4665 }, { "epoch": 0.49, "grad_norm": 1.8803794705216603, "learning_rate": 5.484902443917053e-06, "loss": 0.6864, "step": 4666 }, { "epoch": 0.49, "grad_norm": 2.0001158625188173, "learning_rate": 5.483226934698055e-06, "loss": 0.6569, "step": 4667 }, { "epoch": 0.49, "grad_norm": 1.8830150639111904, "learning_rate": 5.4815513707025306e-06, "loss": 0.6279, "step": 4668 }, { "epoch": 0.49, "grad_norm": 2.023732196650425, "learning_rate": 5.479875752120414e-06, "loss": 0.6609, "step": 4669 }, { "epoch": 0.49, "grad_norm": 2.0625613008373773, "learning_rate": 5.478200079141644e-06, "loss": 0.643, "step": 4670 }, { "epoch": 0.49, "grad_norm": 1.9591030994917111, "learning_rate": 5.47652435195617e-06, "loss": 0.6557, "step": 4671 }, { "epoch": 0.49, "grad_norm": 1.7978853519603617, "learning_rate": 5.4748485707539435e-06, "loss": 0.5881, "step": 4672 }, { "epoch": 0.49, "grad_norm": 1.9473749824242983, "learning_rate": 5.473172735724927e-06, "loss": 0.6415, "step": 4673 }, { "epoch": 0.49, "grad_norm": 2.058316827182397, "learning_rate": 5.471496847059082e-06, "loss": 0.7541, "step": 4674 }, { "epoch": 0.49, "grad_norm": 2.0498700867540416, "learning_rate": 5.469820904946383e-06, "loss": 0.6762, "step": 4675 }, { "epoch": 0.49, "grad_norm": 1.9141503530490744, "learning_rate": 5.468144909576807e-06, "loss": 0.5577, "step": 4676 }, { "epoch": 0.49, "grad_norm": 1.797991597019518, "learning_rate": 5.466468861140337e-06, "loss": 0.5654, "step": 4677 }, { "epoch": 0.49, "grad_norm": 1.8613963899363541, "learning_rate": 5.464792759826962e-06, "loss": 0.6946, "step": 4678 }, { "epoch": 0.49, "grad_norm": 1.8335011635462246, "learning_rate": 5.463116605826678e-06, "loss": 0.678, "step": 4679 }, { "epoch": 0.49, "grad_norm": 2.0050714504137965, "learning_rate": 5.4614403993294895e-06, "loss": 0.6334, "step": 4680 }, { "epoch": 0.49, "grad_norm": 2.032874636356343, "learning_rate": 5.4597641405254e-06, "loss": 0.6813, "step": 4681 }, { "epoch": 0.49, "grad_norm": 1.790986980802673, "learning_rate": 5.458087829604423e-06, "loss": 0.6778, "step": 4682 }, { "epoch": 0.49, "grad_norm": 1.751145689736165, "learning_rate": 5.456411466756584e-06, "loss": 0.5752, "step": 4683 }, { "epoch": 0.49, "grad_norm": 1.697096974282548, "learning_rate": 5.4547350521719e-06, "loss": 0.567, "step": 4684 }, { "epoch": 0.49, "grad_norm": 2.1498813486938393, "learning_rate": 5.453058586040406e-06, "loss": 0.6605, "step": 4685 }, { "epoch": 0.49, "grad_norm": 1.9907803657150827, "learning_rate": 5.45138206855214e-06, "loss": 0.6133, "step": 4686 }, { "epoch": 0.49, "grad_norm": 2.1637345099530534, "learning_rate": 5.4497054998971445e-06, "loss": 0.6557, "step": 4687 }, { "epoch": 0.49, "grad_norm": 1.8252761461235034, "learning_rate": 5.448028880265467e-06, "loss": 0.6198, "step": 4688 }, { "epoch": 0.49, "grad_norm": 1.82343633197036, "learning_rate": 5.446352209847161e-06, "loss": 0.7194, "step": 4689 }, { "epoch": 0.49, "grad_norm": 2.0614527120208, "learning_rate": 5.444675488832288e-06, "loss": 0.6723, "step": 4690 }, { "epoch": 0.49, "grad_norm": 2.1820987619351175, "learning_rate": 5.442998717410916e-06, "loss": 0.778, "step": 4691 }, { "epoch": 0.49, "grad_norm": 2.0025828276893134, "learning_rate": 5.441321895773112e-06, "loss": 0.6472, "step": 4692 }, { "epoch": 0.49, "grad_norm": 1.9803099147219958, "learning_rate": 5.439645024108956e-06, "loss": 0.6677, "step": 4693 }, { "epoch": 0.49, "grad_norm": 1.8356847480376939, "learning_rate": 5.4379681026085305e-06, "loss": 0.5864, "step": 4694 }, { "epoch": 0.49, "grad_norm": 1.9702510121981012, "learning_rate": 5.436291131461926e-06, "loss": 0.643, "step": 4695 }, { "epoch": 0.49, "grad_norm": 2.259676272993004, "learning_rate": 5.434614110859233e-06, "loss": 0.7192, "step": 4696 }, { "epoch": 0.49, "grad_norm": 1.8027623450713939, "learning_rate": 5.432937040990553e-06, "loss": 0.6741, "step": 4697 }, { "epoch": 0.49, "grad_norm": 2.2863564801237377, "learning_rate": 5.431259922045995e-06, "loss": 0.7341, "step": 4698 }, { "epoch": 0.49, "grad_norm": 1.8466916983922386, "learning_rate": 5.429582754215664e-06, "loss": 0.5891, "step": 4699 }, { "epoch": 0.49, "grad_norm": 1.7862194556644164, "learning_rate": 5.427905537689679e-06, "loss": 0.638, "step": 4700 }, { "epoch": 0.49, "grad_norm": 1.8250276402626973, "learning_rate": 5.426228272658163e-06, "loss": 0.6587, "step": 4701 }, { "epoch": 0.49, "grad_norm": 1.9226029242682035, "learning_rate": 5.424550959311244e-06, "loss": 0.6189, "step": 4702 }, { "epoch": 0.49, "grad_norm": 1.778075048144127, "learning_rate": 5.422873597839052e-06, "loss": 0.5834, "step": 4703 }, { "epoch": 0.49, "grad_norm": 1.8704703373919398, "learning_rate": 5.4211961884317285e-06, "loss": 0.609, "step": 4704 }, { "epoch": 0.49, "grad_norm": 1.9872381122558112, "learning_rate": 5.4195187312794165e-06, "loss": 0.6613, "step": 4705 }, { "epoch": 0.49, "grad_norm": 2.2208206315817356, "learning_rate": 5.417841226572263e-06, "loss": 0.6202, "step": 4706 }, { "epoch": 0.49, "grad_norm": 1.8030691134210728, "learning_rate": 5.416163674500429e-06, "loss": 0.5852, "step": 4707 }, { "epoch": 0.49, "grad_norm": 1.9087645300174885, "learning_rate": 5.4144860752540675e-06, "loss": 0.5521, "step": 4708 }, { "epoch": 0.49, "grad_norm": 2.003121434948416, "learning_rate": 5.412808429023346e-06, "loss": 0.6437, "step": 4709 }, { "epoch": 0.49, "grad_norm": 1.805314904231961, "learning_rate": 5.4111307359984375e-06, "loss": 0.6137, "step": 4710 }, { "epoch": 0.49, "grad_norm": 1.8556671218808498, "learning_rate": 5.409452996369517e-06, "loss": 0.5666, "step": 4711 }, { "epoch": 0.49, "grad_norm": 1.913852383240618, "learning_rate": 5.407775210326765e-06, "loss": 0.593, "step": 4712 }, { "epoch": 0.49, "grad_norm": 1.8579997869434959, "learning_rate": 5.40609737806037e-06, "loss": 0.6143, "step": 4713 }, { "epoch": 0.49, "grad_norm": 1.955230915460718, "learning_rate": 5.404419499760521e-06, "loss": 0.6543, "step": 4714 }, { "epoch": 0.49, "grad_norm": 1.918928720060036, "learning_rate": 5.402741575617417e-06, "loss": 0.5767, "step": 4715 }, { "epoch": 0.49, "grad_norm": 1.9450222578852545, "learning_rate": 5.401063605821259e-06, "loss": 0.599, "step": 4716 }, { "epoch": 0.49, "grad_norm": 1.830693102223202, "learning_rate": 5.399385590562257e-06, "loss": 0.6338, "step": 4717 }, { "epoch": 0.49, "grad_norm": 1.86583491311485, "learning_rate": 5.397707530030621e-06, "loss": 0.5858, "step": 4718 }, { "epoch": 0.49, "grad_norm": 1.9159187468657468, "learning_rate": 5.3960294244165705e-06, "loss": 0.66, "step": 4719 }, { "epoch": 0.49, "grad_norm": 1.9568612561515344, "learning_rate": 5.394351273910327e-06, "loss": 0.6735, "step": 4720 }, { "epoch": 0.49, "grad_norm": 1.8762483550864508, "learning_rate": 5.392673078702118e-06, "loss": 0.6049, "step": 4721 }, { "epoch": 0.49, "grad_norm": 1.7436999227726706, "learning_rate": 5.390994838982178e-06, "loss": 0.5753, "step": 4722 }, { "epoch": 0.49, "grad_norm": 2.012137021393326, "learning_rate": 5.3893165549407435e-06, "loss": 0.6212, "step": 4723 }, { "epoch": 0.49, "grad_norm": 1.8836151547134943, "learning_rate": 5.38763822676806e-06, "loss": 0.6909, "step": 4724 }, { "epoch": 0.49, "grad_norm": 1.804096332876596, "learning_rate": 5.385959854654374e-06, "loss": 0.5916, "step": 4725 }, { "epoch": 0.49, "grad_norm": 1.737330588597606, "learning_rate": 5.384281438789937e-06, "loss": 0.6128, "step": 4726 }, { "epoch": 0.49, "grad_norm": 1.8569231685201164, "learning_rate": 5.382602979365009e-06, "loss": 0.6701, "step": 4727 }, { "epoch": 0.49, "grad_norm": 1.7276227842031793, "learning_rate": 5.380924476569854e-06, "loss": 0.5959, "step": 4728 }, { "epoch": 0.49, "grad_norm": 1.8705463387699754, "learning_rate": 5.379245930594738e-06, "loss": 0.6266, "step": 4729 }, { "epoch": 0.49, "grad_norm": 1.9820668249263957, "learning_rate": 5.3775673416299325e-06, "loss": 0.7306, "step": 4730 }, { "epoch": 0.49, "grad_norm": 2.1991728959408166, "learning_rate": 5.375888709865718e-06, "loss": 0.6499, "step": 4731 }, { "epoch": 0.49, "grad_norm": 1.9049944223482733, "learning_rate": 5.374210035492375e-06, "loss": 0.6072, "step": 4732 }, { "epoch": 0.49, "grad_norm": 1.7630256763054708, "learning_rate": 5.372531318700192e-06, "loss": 0.5893, "step": 4733 }, { "epoch": 0.49, "grad_norm": 1.8797816070120186, "learning_rate": 5.370852559679461e-06, "loss": 0.6232, "step": 4734 }, { "epoch": 0.49, "grad_norm": 1.9335377315595026, "learning_rate": 5.36917375862048e-06, "loss": 0.6255, "step": 4735 }, { "epoch": 0.49, "grad_norm": 1.8935577231729566, "learning_rate": 5.367494915713547e-06, "loss": 0.6538, "step": 4736 }, { "epoch": 0.49, "grad_norm": 2.159238536605417, "learning_rate": 5.365816031148971e-06, "loss": 0.6783, "step": 4737 }, { "epoch": 0.49, "grad_norm": 2.2691128833223444, "learning_rate": 5.364137105117062e-06, "loss": 0.6657, "step": 4738 }, { "epoch": 0.49, "grad_norm": 1.9293045676431337, "learning_rate": 5.362458137808139e-06, "loss": 0.6729, "step": 4739 }, { "epoch": 0.49, "grad_norm": 1.9461395972368574, "learning_rate": 5.360779129412519e-06, "loss": 0.5758, "step": 4740 }, { "epoch": 0.49, "grad_norm": 1.7469143360040644, "learning_rate": 5.359100080120527e-06, "loss": 0.5882, "step": 4741 }, { "epoch": 0.49, "grad_norm": 1.525512152522639, "learning_rate": 5.357420990122495e-06, "loss": 0.5615, "step": 4742 }, { "epoch": 0.49, "grad_norm": 1.8314068903897658, "learning_rate": 5.355741859608756e-06, "loss": 0.6769, "step": 4743 }, { "epoch": 0.49, "grad_norm": 1.8789868416667825, "learning_rate": 5.35406268876965e-06, "loss": 0.5629, "step": 4744 }, { "epoch": 0.49, "grad_norm": 1.9337216447416474, "learning_rate": 5.352383477795522e-06, "loss": 0.6372, "step": 4745 }, { "epoch": 0.49, "grad_norm": 1.991741251184157, "learning_rate": 5.3507042268767165e-06, "loss": 0.7707, "step": 4746 }, { "epoch": 0.49, "grad_norm": 1.7991416686125543, "learning_rate": 5.3490249362035875e-06, "loss": 0.6465, "step": 4747 }, { "epoch": 0.49, "grad_norm": 2.2012059216577566, "learning_rate": 5.347345605966493e-06, "loss": 0.7182, "step": 4748 }, { "epoch": 0.49, "grad_norm": 1.9724821199819178, "learning_rate": 5.345666236355794e-06, "loss": 0.6756, "step": 4749 }, { "epoch": 0.49, "grad_norm": 1.8347424754598582, "learning_rate": 5.343986827561859e-06, "loss": 0.6407, "step": 4750 }, { "epoch": 0.49, "grad_norm": 1.8192154867135653, "learning_rate": 5.342307379775053e-06, "loss": 0.6398, "step": 4751 }, { "epoch": 0.49, "grad_norm": 1.9939590161819776, "learning_rate": 5.340627893185757e-06, "loss": 0.5802, "step": 4752 }, { "epoch": 0.49, "grad_norm": 2.1746888356632277, "learning_rate": 5.338948367984347e-06, "loss": 0.7248, "step": 4753 }, { "epoch": 0.49, "grad_norm": 1.7532309609679348, "learning_rate": 5.337268804361208e-06, "loss": 0.5655, "step": 4754 }, { "epoch": 0.49, "grad_norm": 1.727385613972203, "learning_rate": 5.335589202506727e-06, "loss": 0.5939, "step": 4755 }, { "epoch": 0.49, "grad_norm": 2.0887581011841823, "learning_rate": 5.3339095626112965e-06, "loss": 0.6019, "step": 4756 }, { "epoch": 0.49, "grad_norm": 1.8319707154087208, "learning_rate": 5.332229884865316e-06, "loss": 0.6287, "step": 4757 }, { "epoch": 0.49, "grad_norm": 1.935043766822851, "learning_rate": 5.3305501694591836e-06, "loss": 0.6483, "step": 4758 }, { "epoch": 0.49, "grad_norm": 1.8084928100607975, "learning_rate": 5.3288704165833035e-06, "loss": 0.5682, "step": 4759 }, { "epoch": 0.49, "grad_norm": 1.884803293181149, "learning_rate": 5.327190626428089e-06, "loss": 0.6029, "step": 4760 }, { "epoch": 0.49, "grad_norm": 1.932522766945742, "learning_rate": 5.325510799183953e-06, "loss": 0.6771, "step": 4761 }, { "epoch": 0.5, "grad_norm": 1.9866449112920452, "learning_rate": 5.32383093504131e-06, "loss": 0.5524, "step": 4762 }, { "epoch": 0.5, "grad_norm": 1.8167555076320083, "learning_rate": 5.3221510341905855e-06, "loss": 0.6437, "step": 4763 }, { "epoch": 0.5, "grad_norm": 1.8329283982720728, "learning_rate": 5.320471096822206e-06, "loss": 0.5299, "step": 4764 }, { "epoch": 0.5, "grad_norm": 1.9138426461164462, "learning_rate": 5.318791123126601e-06, "loss": 0.62, "step": 4765 }, { "epoch": 0.5, "grad_norm": 2.1010868001824097, "learning_rate": 5.3171111132942045e-06, "loss": 0.658, "step": 4766 }, { "epoch": 0.5, "grad_norm": 1.9813575064514743, "learning_rate": 5.315431067515456e-06, "loss": 0.7796, "step": 4767 }, { "epoch": 0.5, "grad_norm": 1.9144527346597267, "learning_rate": 5.313750985980799e-06, "loss": 0.6166, "step": 4768 }, { "epoch": 0.5, "grad_norm": 1.9394591482862733, "learning_rate": 5.312070868880678e-06, "loss": 0.6915, "step": 4769 }, { "epoch": 0.5, "grad_norm": 1.7787970809473537, "learning_rate": 5.310390716405546e-06, "loss": 0.5507, "step": 4770 }, { "epoch": 0.5, "grad_norm": 1.9257353515354803, "learning_rate": 5.308710528745856e-06, "loss": 0.5736, "step": 4771 }, { "epoch": 0.5, "grad_norm": 1.899196278729454, "learning_rate": 5.3070303060920706e-06, "loss": 0.6948, "step": 4772 }, { "epoch": 0.5, "grad_norm": 1.8548166587030892, "learning_rate": 5.305350048634648e-06, "loss": 0.6213, "step": 4773 }, { "epoch": 0.5, "grad_norm": 1.72877940583896, "learning_rate": 5.303669756564057e-06, "loss": 0.5737, "step": 4774 }, { "epoch": 0.5, "grad_norm": 1.81760933387422, "learning_rate": 5.301989430070767e-06, "loss": 0.5618, "step": 4775 }, { "epoch": 0.5, "grad_norm": 1.9269910883598707, "learning_rate": 5.300309069345257e-06, "loss": 0.6532, "step": 4776 }, { "epoch": 0.5, "grad_norm": 1.9463546960698885, "learning_rate": 5.298628674578e-06, "loss": 0.6358, "step": 4777 }, { "epoch": 0.5, "grad_norm": 1.8442409478011446, "learning_rate": 5.296948245959481e-06, "loss": 0.6798, "step": 4778 }, { "epoch": 0.5, "grad_norm": 1.6872881787647978, "learning_rate": 5.295267783680186e-06, "loss": 0.615, "step": 4779 }, { "epoch": 0.5, "grad_norm": 2.020457824524539, "learning_rate": 5.293587287930605e-06, "loss": 0.7365, "step": 4780 }, { "epoch": 0.5, "grad_norm": 1.9419792797429891, "learning_rate": 5.291906758901231e-06, "loss": 0.6422, "step": 4781 }, { "epoch": 0.5, "grad_norm": 1.9961829611233421, "learning_rate": 5.290226196782562e-06, "loss": 0.6491, "step": 4782 }, { "epoch": 0.5, "grad_norm": 1.8346912090258714, "learning_rate": 5.2885456017651e-06, "loss": 0.6669, "step": 4783 }, { "epoch": 0.5, "grad_norm": 1.9113822633234845, "learning_rate": 5.286864974039349e-06, "loss": 0.6546, "step": 4784 }, { "epoch": 0.5, "grad_norm": 1.9088972328489655, "learning_rate": 5.285184313795818e-06, "loss": 0.6967, "step": 4785 }, { "epoch": 0.5, "grad_norm": 1.9770384150188363, "learning_rate": 5.28350362122502e-06, "loss": 0.6551, "step": 4786 }, { "epoch": 0.5, "grad_norm": 1.983698674791684, "learning_rate": 5.281822896517471e-06, "loss": 0.6672, "step": 4787 }, { "epoch": 0.5, "grad_norm": 1.9723552036785195, "learning_rate": 5.280142139863689e-06, "loss": 0.6123, "step": 4788 }, { "epoch": 0.5, "grad_norm": 1.7502841190125016, "learning_rate": 5.278461351454199e-06, "loss": 0.6269, "step": 4789 }, { "epoch": 0.5, "grad_norm": 1.8380073595702189, "learning_rate": 5.276780531479528e-06, "loss": 0.629, "step": 4790 }, { "epoch": 0.5, "grad_norm": 1.9008197624107583, "learning_rate": 5.275099680130207e-06, "loss": 0.686, "step": 4791 }, { "epoch": 0.5, "grad_norm": 1.6690452997820755, "learning_rate": 5.273418797596769e-06, "loss": 0.541, "step": 4792 }, { "epoch": 0.5, "grad_norm": 1.7704221672792175, "learning_rate": 5.271737884069751e-06, "loss": 0.7384, "step": 4793 }, { "epoch": 0.5, "grad_norm": 1.8289870533935189, "learning_rate": 5.270056939739695e-06, "loss": 0.7028, "step": 4794 }, { "epoch": 0.5, "grad_norm": 1.9957521734241734, "learning_rate": 5.268375964797147e-06, "loss": 0.8097, "step": 4795 }, { "epoch": 0.5, "grad_norm": 1.8245717485844726, "learning_rate": 5.266694959432651e-06, "loss": 0.5797, "step": 4796 }, { "epoch": 0.5, "grad_norm": 1.6513891472143982, "learning_rate": 5.265013923836763e-06, "loss": 0.5582, "step": 4797 }, { "epoch": 0.5, "grad_norm": 1.8439248589354222, "learning_rate": 5.263332858200037e-06, "loss": 0.5971, "step": 4798 }, { "epoch": 0.5, "grad_norm": 1.7693969050117528, "learning_rate": 5.261651762713029e-06, "loss": 0.6627, "step": 4799 }, { "epoch": 0.5, "grad_norm": 1.8799400076854698, "learning_rate": 5.259970637566303e-06, "loss": 0.6186, "step": 4800 }, { "epoch": 0.5, "grad_norm": 1.810317566636923, "learning_rate": 5.2582894829504225e-06, "loss": 0.6289, "step": 4801 }, { "epoch": 0.5, "grad_norm": 2.1064888897228604, "learning_rate": 5.256608299055959e-06, "loss": 0.7368, "step": 4802 }, { "epoch": 0.5, "grad_norm": 1.7934611876160385, "learning_rate": 5.254927086073481e-06, "loss": 0.5756, "step": 4803 }, { "epoch": 0.5, "grad_norm": 1.918347419758823, "learning_rate": 5.253245844193564e-06, "loss": 0.6471, "step": 4804 }, { "epoch": 0.5, "grad_norm": 1.921103053022868, "learning_rate": 5.251564573606789e-06, "loss": 0.6237, "step": 4805 }, { "epoch": 0.5, "grad_norm": 1.881055921289424, "learning_rate": 5.249883274503734e-06, "loss": 0.655, "step": 4806 }, { "epoch": 0.5, "grad_norm": 2.1128203412785695, "learning_rate": 5.248201947074986e-06, "loss": 0.6767, "step": 4807 }, { "epoch": 0.5, "grad_norm": 1.8473802354895168, "learning_rate": 5.246520591511133e-06, "loss": 0.7472, "step": 4808 }, { "epoch": 0.5, "grad_norm": 1.939482551753464, "learning_rate": 5.244839208002766e-06, "loss": 0.7308, "step": 4809 }, { "epoch": 0.5, "grad_norm": 1.924260276947548, "learning_rate": 5.243157796740478e-06, "loss": 0.6115, "step": 4810 }, { "epoch": 0.5, "grad_norm": 2.0874541041869157, "learning_rate": 5.241476357914869e-06, "loss": 0.7236, "step": 4811 }, { "epoch": 0.5, "grad_norm": 1.837733357562263, "learning_rate": 5.239794891716538e-06, "loss": 0.6035, "step": 4812 }, { "epoch": 0.5, "grad_norm": 1.8910288208809416, "learning_rate": 5.238113398336089e-06, "loss": 0.6271, "step": 4813 }, { "epoch": 0.5, "grad_norm": 2.0685585225934435, "learning_rate": 5.236431877964129e-06, "loss": 0.7462, "step": 4814 }, { "epoch": 0.5, "grad_norm": 1.901940101007261, "learning_rate": 5.234750330791268e-06, "loss": 0.6331, "step": 4815 }, { "epoch": 0.5, "grad_norm": 1.7745538373385725, "learning_rate": 5.23306875700812e-06, "loss": 0.5965, "step": 4816 }, { "epoch": 0.5, "grad_norm": 1.914938178129142, "learning_rate": 5.231387156805299e-06, "loss": 0.6152, "step": 4817 }, { "epoch": 0.5, "grad_norm": 1.9602214851083848, "learning_rate": 5.229705530373424e-06, "loss": 0.619, "step": 4818 }, { "epoch": 0.5, "grad_norm": 1.8208173429792096, "learning_rate": 5.228023877903119e-06, "loss": 0.5953, "step": 4819 }, { "epoch": 0.5, "grad_norm": 1.8886106395780093, "learning_rate": 5.22634219958501e-06, "loss": 0.6042, "step": 4820 }, { "epoch": 0.5, "grad_norm": 1.8052470224663513, "learning_rate": 5.224660495609719e-06, "loss": 0.5851, "step": 4821 }, { "epoch": 0.5, "grad_norm": 1.8864420425653845, "learning_rate": 5.222978766167881e-06, "loss": 0.6513, "step": 4822 }, { "epoch": 0.5, "grad_norm": 1.8881947704523596, "learning_rate": 5.221297011450129e-06, "loss": 0.6889, "step": 4823 }, { "epoch": 0.5, "grad_norm": 1.7349782274775427, "learning_rate": 5.219615231647102e-06, "loss": 0.7054, "step": 4824 }, { "epoch": 0.5, "grad_norm": 1.876831428970342, "learning_rate": 5.2179334269494345e-06, "loss": 0.698, "step": 4825 }, { "epoch": 0.5, "grad_norm": 1.7635223691101098, "learning_rate": 5.21625159754777e-06, "loss": 0.6371, "step": 4826 }, { "epoch": 0.5, "grad_norm": 1.7059481236334688, "learning_rate": 5.214569743632756e-06, "loss": 0.5452, "step": 4827 }, { "epoch": 0.5, "grad_norm": 1.5553605927294225, "learning_rate": 5.212887865395038e-06, "loss": 0.566, "step": 4828 }, { "epoch": 0.5, "grad_norm": 1.9616001578735676, "learning_rate": 5.211205963025268e-06, "loss": 0.7019, "step": 4829 }, { "epoch": 0.5, "grad_norm": 1.8648642120464367, "learning_rate": 5.209524036714096e-06, "loss": 0.67, "step": 4830 }, { "epoch": 0.5, "grad_norm": 2.0163559085077853, "learning_rate": 5.207842086652183e-06, "loss": 0.6602, "step": 4831 }, { "epoch": 0.5, "grad_norm": 2.035789772992233, "learning_rate": 5.206160113030182e-06, "loss": 0.6346, "step": 4832 }, { "epoch": 0.5, "grad_norm": 1.9600216866470923, "learning_rate": 5.204478116038758e-06, "loss": 0.6541, "step": 4833 }, { "epoch": 0.5, "grad_norm": 1.7741794956531685, "learning_rate": 5.202796095868574e-06, "loss": 0.5933, "step": 4834 }, { "epoch": 0.5, "grad_norm": 1.7335196713309575, "learning_rate": 5.201114052710299e-06, "loss": 0.6089, "step": 4835 }, { "epoch": 0.5, "grad_norm": 1.9376194087329808, "learning_rate": 5.1994319867545974e-06, "loss": 0.7502, "step": 4836 }, { "epoch": 0.5, "grad_norm": 1.8325179415301216, "learning_rate": 5.197749898192144e-06, "loss": 0.6688, "step": 4837 }, { "epoch": 0.5, "grad_norm": 1.9692153336399745, "learning_rate": 5.196067787213611e-06, "loss": 0.6632, "step": 4838 }, { "epoch": 0.5, "grad_norm": 1.800237625965685, "learning_rate": 5.1943856540096795e-06, "loss": 0.648, "step": 4839 }, { "epoch": 0.5, "grad_norm": 1.795377760515686, "learning_rate": 5.1927034987710245e-06, "loss": 0.6032, "step": 4840 }, { "epoch": 0.5, "grad_norm": 1.895454260117348, "learning_rate": 5.19102132168833e-06, "loss": 0.6731, "step": 4841 }, { "epoch": 0.5, "grad_norm": 2.039687589229877, "learning_rate": 5.189339122952281e-06, "loss": 0.6824, "step": 4842 }, { "epoch": 0.5, "grad_norm": 1.8781273228678457, "learning_rate": 5.18765690275356e-06, "loss": 0.6945, "step": 4843 }, { "epoch": 0.5, "grad_norm": 1.8013673164406765, "learning_rate": 5.185974661282862e-06, "loss": 0.6765, "step": 4844 }, { "epoch": 0.5, "grad_norm": 1.98407236347405, "learning_rate": 5.184292398730876e-06, "loss": 0.7248, "step": 4845 }, { "epoch": 0.5, "grad_norm": 2.0038873123117757, "learning_rate": 5.182610115288296e-06, "loss": 0.6836, "step": 4846 }, { "epoch": 0.5, "grad_norm": 1.660770204955728, "learning_rate": 5.180927811145818e-06, "loss": 0.56, "step": 4847 }, { "epoch": 0.5, "grad_norm": 2.17998760571231, "learning_rate": 5.179245486494141e-06, "loss": 0.7484, "step": 4848 }, { "epoch": 0.5, "grad_norm": 1.9112108041835019, "learning_rate": 5.177563141523967e-06, "loss": 0.7095, "step": 4849 }, { "epoch": 0.5, "grad_norm": 2.160618119718771, "learning_rate": 5.175880776425999e-06, "loss": 0.7162, "step": 4850 }, { "epoch": 0.5, "grad_norm": 1.85331993962675, "learning_rate": 5.174198391390942e-06, "loss": 0.6401, "step": 4851 }, { "epoch": 0.5, "grad_norm": 1.939094194390486, "learning_rate": 5.172515986609504e-06, "loss": 0.6593, "step": 4852 }, { "epoch": 0.5, "grad_norm": 1.9149904541209817, "learning_rate": 5.170833562272398e-06, "loss": 0.6141, "step": 4853 }, { "epoch": 0.5, "grad_norm": 1.9222041139959078, "learning_rate": 5.169151118570332e-06, "loss": 0.6764, "step": 4854 }, { "epoch": 0.5, "grad_norm": 1.7669182377758037, "learning_rate": 5.167468655694022e-06, "loss": 0.538, "step": 4855 }, { "epoch": 0.5, "grad_norm": 2.1867857975643434, "learning_rate": 5.165786173834187e-06, "loss": 0.656, "step": 4856 }, { "epoch": 0.5, "grad_norm": 1.6534862157740111, "learning_rate": 5.164103673181544e-06, "loss": 0.6398, "step": 4857 }, { "epoch": 0.5, "grad_norm": 2.1437201670617037, "learning_rate": 5.162421153926814e-06, "loss": 0.7164, "step": 4858 }, { "epoch": 0.51, "grad_norm": 1.6553379755707762, "learning_rate": 5.16073861626072e-06, "loss": 0.6298, "step": 4859 }, { "epoch": 0.51, "grad_norm": 1.8648222159713144, "learning_rate": 5.1590560603739885e-06, "loss": 0.697, "step": 4860 }, { "epoch": 0.51, "grad_norm": 1.961783728362329, "learning_rate": 5.157373486457346e-06, "loss": 0.6445, "step": 4861 }, { "epoch": 0.51, "grad_norm": 1.7476062727074282, "learning_rate": 5.15569089470152e-06, "loss": 0.4982, "step": 4862 }, { "epoch": 0.51, "grad_norm": 2.0169770654335872, "learning_rate": 5.1540082852972455e-06, "loss": 0.6628, "step": 4863 }, { "epoch": 0.51, "grad_norm": 1.7595848868953443, "learning_rate": 5.152325658435254e-06, "loss": 0.6338, "step": 4864 }, { "epoch": 0.51, "grad_norm": 1.6235278140692069, "learning_rate": 5.15064301430628e-06, "loss": 0.5293, "step": 4865 }, { "epoch": 0.51, "grad_norm": 1.8762127911073818, "learning_rate": 5.148960353101063e-06, "loss": 0.5885, "step": 4866 }, { "epoch": 0.51, "grad_norm": 2.155003322145285, "learning_rate": 5.147277675010339e-06, "loss": 0.749, "step": 4867 }, { "epoch": 0.51, "grad_norm": 1.9059516874727496, "learning_rate": 5.145594980224853e-06, "loss": 0.6446, "step": 4868 }, { "epoch": 0.51, "grad_norm": 1.7276881296721822, "learning_rate": 5.143912268935345e-06, "loss": 0.5343, "step": 4869 }, { "epoch": 0.51, "grad_norm": 2.069942567716001, "learning_rate": 5.14222954133256e-06, "loss": 0.6633, "step": 4870 }, { "epoch": 0.51, "grad_norm": 2.0174980514678205, "learning_rate": 5.140546797607248e-06, "loss": 0.6722, "step": 4871 }, { "epoch": 0.51, "grad_norm": 1.9082251784968078, "learning_rate": 5.138864037950155e-06, "loss": 0.5613, "step": 4872 }, { "epoch": 0.51, "grad_norm": 1.8732227894779176, "learning_rate": 5.137181262552031e-06, "loss": 0.6477, "step": 4873 }, { "epoch": 0.51, "grad_norm": 1.6832285896137942, "learning_rate": 5.135498471603629e-06, "loss": 0.6062, "step": 4874 }, { "epoch": 0.51, "grad_norm": 1.6344271952482194, "learning_rate": 5.133815665295704e-06, "loss": 0.676, "step": 4875 }, { "epoch": 0.51, "grad_norm": 1.7922029487358937, "learning_rate": 5.13213284381901e-06, "loss": 0.571, "step": 4876 }, { "epoch": 0.51, "grad_norm": 1.8548110893994498, "learning_rate": 5.1304500073643045e-06, "loss": 0.5944, "step": 4877 }, { "epoch": 0.51, "grad_norm": 6.780740593676751, "learning_rate": 5.128767156122347e-06, "loss": 0.5777, "step": 4878 }, { "epoch": 0.51, "grad_norm": 2.247295270187268, "learning_rate": 5.1270842902839e-06, "loss": 0.7154, "step": 4879 }, { "epoch": 0.51, "grad_norm": 1.9584193373117975, "learning_rate": 5.125401410039723e-06, "loss": 0.6757, "step": 4880 }, { "epoch": 0.51, "grad_norm": 2.095672743950007, "learning_rate": 5.123718515580581e-06, "loss": 0.6605, "step": 4881 }, { "epoch": 0.51, "grad_norm": 1.8048789293075735, "learning_rate": 5.1220356070972414e-06, "loss": 0.7379, "step": 4882 }, { "epoch": 0.51, "grad_norm": 1.9146554643863576, "learning_rate": 5.120352684780469e-06, "loss": 0.5925, "step": 4883 }, { "epoch": 0.51, "grad_norm": 2.1568243392901105, "learning_rate": 5.118669748821034e-06, "loss": 0.7402, "step": 4884 }, { "epoch": 0.51, "grad_norm": 1.9630846444451566, "learning_rate": 5.116986799409708e-06, "loss": 0.6983, "step": 4885 }, { "epoch": 0.51, "grad_norm": 1.939417972921571, "learning_rate": 5.11530383673726e-06, "loss": 0.6694, "step": 4886 }, { "epoch": 0.51, "grad_norm": 2.15676437427674, "learning_rate": 5.1136208609944644e-06, "loss": 0.6987, "step": 4887 }, { "epoch": 0.51, "grad_norm": 1.9490400227157634, "learning_rate": 5.111937872372097e-06, "loss": 0.6834, "step": 4888 }, { "epoch": 0.51, "grad_norm": 1.89411260769405, "learning_rate": 5.110254871060933e-06, "loss": 0.6891, "step": 4889 }, { "epoch": 0.51, "grad_norm": 1.8278879530072518, "learning_rate": 5.108571857251754e-06, "loss": 0.5864, "step": 4890 }, { "epoch": 0.51, "grad_norm": 1.959630964015898, "learning_rate": 5.106888831135334e-06, "loss": 0.5718, "step": 4891 }, { "epoch": 0.51, "grad_norm": 1.887053393441401, "learning_rate": 5.105205792902456e-06, "loss": 0.5991, "step": 4892 }, { "epoch": 0.51, "grad_norm": 1.8419676864917576, "learning_rate": 5.103522742743901e-06, "loss": 0.6203, "step": 4893 }, { "epoch": 0.51, "grad_norm": 1.9813179222203285, "learning_rate": 5.101839680850454e-06, "loss": 0.7061, "step": 4894 }, { "epoch": 0.51, "grad_norm": 1.9051838482841017, "learning_rate": 5.100156607412899e-06, "loss": 0.663, "step": 4895 }, { "epoch": 0.51, "grad_norm": 1.7946750412220063, "learning_rate": 5.09847352262202e-06, "loss": 0.6395, "step": 4896 }, { "epoch": 0.51, "grad_norm": 1.692629416537992, "learning_rate": 5.096790426668608e-06, "loss": 0.6797, "step": 4897 }, { "epoch": 0.51, "grad_norm": 1.6992150985739385, "learning_rate": 5.095107319743449e-06, "loss": 0.6609, "step": 4898 }, { "epoch": 0.51, "grad_norm": 2.0328131001769885, "learning_rate": 5.093424202037333e-06, "loss": 0.6063, "step": 4899 }, { "epoch": 0.51, "grad_norm": 1.8965582288309775, "learning_rate": 5.09174107374105e-06, "loss": 0.7277, "step": 4900 }, { "epoch": 0.51, "grad_norm": 1.9789015178424736, "learning_rate": 5.090057935045395e-06, "loss": 0.7337, "step": 4901 }, { "epoch": 0.51, "grad_norm": 2.0892636946117795, "learning_rate": 5.088374786141159e-06, "loss": 0.5095, "step": 4902 }, { "epoch": 0.51, "grad_norm": 1.9916605863411267, "learning_rate": 5.086691627219137e-06, "loss": 0.6281, "step": 4903 }, { "epoch": 0.51, "grad_norm": 1.8199559774598433, "learning_rate": 5.085008458470126e-06, "loss": 0.6696, "step": 4904 }, { "epoch": 0.51, "grad_norm": 2.2905345302233284, "learning_rate": 5.0833252800849205e-06, "loss": 0.723, "step": 4905 }, { "epoch": 0.51, "grad_norm": 1.8128746657582677, "learning_rate": 5.0816420922543195e-06, "loss": 0.6756, "step": 4906 }, { "epoch": 0.51, "grad_norm": 1.6940473207507076, "learning_rate": 5.079958895169122e-06, "loss": 0.6189, "step": 4907 }, { "epoch": 0.51, "grad_norm": 2.099894774247856, "learning_rate": 5.078275689020129e-06, "loss": 0.6214, "step": 4908 }, { "epoch": 0.51, "grad_norm": 1.923333567399948, "learning_rate": 5.076592473998141e-06, "loss": 0.5972, "step": 4909 }, { "epoch": 0.51, "grad_norm": 1.742185055247871, "learning_rate": 5.0749092502939575e-06, "loss": 0.6876, "step": 4910 }, { "epoch": 0.51, "grad_norm": 1.7921342848541095, "learning_rate": 5.073226018098385e-06, "loss": 0.5127, "step": 4911 }, { "epoch": 0.51, "grad_norm": 1.9020236365196561, "learning_rate": 5.071542777602225e-06, "loss": 0.5513, "step": 4912 }, { "epoch": 0.51, "grad_norm": 1.8385169703695514, "learning_rate": 5.0698595289962845e-06, "loss": 0.6028, "step": 4913 }, { "epoch": 0.51, "grad_norm": 1.868450962690883, "learning_rate": 5.068176272471368e-06, "loss": 0.676, "step": 4914 }, { "epoch": 0.51, "grad_norm": 1.8850082304527058, "learning_rate": 5.066493008218282e-06, "loss": 0.6854, "step": 4915 }, { "epoch": 0.51, "grad_norm": 1.9577619267509585, "learning_rate": 5.064809736427835e-06, "loss": 0.6569, "step": 4916 }, { "epoch": 0.51, "grad_norm": 1.9076916716005377, "learning_rate": 5.0631264572908334e-06, "loss": 0.5663, "step": 4917 }, { "epoch": 0.51, "grad_norm": 2.2592632992616415, "learning_rate": 5.0614431709980895e-06, "loss": 0.669, "step": 4918 }, { "epoch": 0.51, "grad_norm": 1.6810611347190536, "learning_rate": 5.059759877740411e-06, "loss": 0.5439, "step": 4919 }, { "epoch": 0.51, "grad_norm": 2.2498791112061545, "learning_rate": 5.058076577708611e-06, "loss": 0.7829, "step": 4920 }, { "epoch": 0.51, "grad_norm": 1.7291877715984016, "learning_rate": 5.056393271093498e-06, "loss": 0.6395, "step": 4921 }, { "epoch": 0.51, "grad_norm": 1.6289988487754408, "learning_rate": 5.0547099580858874e-06, "loss": 0.5376, "step": 4922 }, { "epoch": 0.51, "grad_norm": 2.213277225739156, "learning_rate": 5.053026638876591e-06, "loss": 0.7141, "step": 4923 }, { "epoch": 0.51, "grad_norm": 2.141769948061226, "learning_rate": 5.0513433136564236e-06, "loss": 0.6407, "step": 4924 }, { "epoch": 0.51, "grad_norm": 1.804589027364479, "learning_rate": 5.049659982616199e-06, "loss": 0.5956, "step": 4925 }, { "epoch": 0.51, "grad_norm": 1.9554108740288227, "learning_rate": 5.047976645946732e-06, "loss": 0.6802, "step": 4926 }, { "epoch": 0.51, "grad_norm": 1.6593553009678443, "learning_rate": 5.046293303838838e-06, "loss": 0.6175, "step": 4927 }, { "epoch": 0.51, "grad_norm": 2.25531640266805, "learning_rate": 5.044609956483335e-06, "loss": 0.7248, "step": 4928 }, { "epoch": 0.51, "grad_norm": 1.8342676675179805, "learning_rate": 5.042926604071039e-06, "loss": 0.567, "step": 4929 }, { "epoch": 0.51, "grad_norm": 1.983517953815852, "learning_rate": 5.0412432467927674e-06, "loss": 0.728, "step": 4930 }, { "epoch": 0.51, "grad_norm": 2.0957696190309636, "learning_rate": 5.039559884839339e-06, "loss": 0.6731, "step": 4931 }, { "epoch": 0.51, "grad_norm": 1.7798860206636986, "learning_rate": 5.037876518401572e-06, "loss": 0.6234, "step": 4932 }, { "epoch": 0.51, "grad_norm": 1.8782106374167673, "learning_rate": 5.036193147670286e-06, "loss": 0.5433, "step": 4933 }, { "epoch": 0.51, "grad_norm": 1.965876390350256, "learning_rate": 5.0345097728363e-06, "loss": 0.7375, "step": 4934 }, { "epoch": 0.51, "grad_norm": 1.6555281848960024, "learning_rate": 5.032826394090435e-06, "loss": 0.5369, "step": 4935 }, { "epoch": 0.51, "grad_norm": 2.0343517692919293, "learning_rate": 5.031143011623511e-06, "loss": 0.6064, "step": 4936 }, { "epoch": 0.51, "grad_norm": 1.7335127141852276, "learning_rate": 5.02945962562635e-06, "loss": 0.6594, "step": 4937 }, { "epoch": 0.51, "grad_norm": 1.7992246411216586, "learning_rate": 5.027776236289772e-06, "loss": 0.7669, "step": 4938 }, { "epoch": 0.51, "grad_norm": 1.7969757038163299, "learning_rate": 5.026092843804599e-06, "loss": 0.5572, "step": 4939 }, { "epoch": 0.51, "grad_norm": 2.2179435808357195, "learning_rate": 5.024409448361653e-06, "loss": 0.6424, "step": 4940 }, { "epoch": 0.51, "grad_norm": 1.7330945557046764, "learning_rate": 5.022726050151756e-06, "loss": 0.6363, "step": 4941 }, { "epoch": 0.51, "grad_norm": 1.6773497431028328, "learning_rate": 5.0210426493657335e-06, "loss": 0.6059, "step": 4942 }, { "epoch": 0.51, "grad_norm": 1.7029285789828024, "learning_rate": 5.019359246194406e-06, "loss": 0.5937, "step": 4943 }, { "epoch": 0.51, "grad_norm": 1.7350035183219865, "learning_rate": 5.017675840828597e-06, "loss": 0.5811, "step": 4944 }, { "epoch": 0.51, "grad_norm": 1.8633238467146478, "learning_rate": 5.0159924334591316e-06, "loss": 0.6168, "step": 4945 }, { "epoch": 0.51, "grad_norm": 2.0472909051472272, "learning_rate": 5.014309024276833e-06, "loss": 0.6218, "step": 4946 }, { "epoch": 0.51, "grad_norm": 1.889431669608345, "learning_rate": 5.012625613472525e-06, "loss": 0.6791, "step": 4947 }, { "epoch": 0.51, "grad_norm": 1.7828852466791096, "learning_rate": 5.010942201237031e-06, "loss": 0.5815, "step": 4948 }, { "epoch": 0.51, "grad_norm": 1.7605094733233677, "learning_rate": 5.009258787761178e-06, "loss": 0.6806, "step": 4949 }, { "epoch": 0.51, "grad_norm": 2.106865906855424, "learning_rate": 5.007575373235786e-06, "loss": 0.5734, "step": 4950 }, { "epoch": 0.51, "grad_norm": 2.069021850878164, "learning_rate": 5.005891957851683e-06, "loss": 0.6051, "step": 4951 }, { "epoch": 0.51, "grad_norm": 1.953305074899751, "learning_rate": 5.004208541799693e-06, "loss": 0.6234, "step": 4952 }, { "epoch": 0.51, "grad_norm": 1.6461163901567595, "learning_rate": 5.002525125270641e-06, "loss": 0.7198, "step": 4953 }, { "epoch": 0.51, "grad_norm": 1.9211725095625716, "learning_rate": 5.000841708455351e-06, "loss": 0.5863, "step": 4954 }, { "epoch": 0.52, "grad_norm": 1.8963853637765036, "learning_rate": 4.99915829154465e-06, "loss": 0.6632, "step": 4955 }, { "epoch": 0.52, "grad_norm": 1.9670876231350454, "learning_rate": 4.997474874729361e-06, "loss": 0.7981, "step": 4956 }, { "epoch": 0.52, "grad_norm": 1.7891379226397384, "learning_rate": 4.995791458200309e-06, "loss": 0.5285, "step": 4957 }, { "epoch": 0.52, "grad_norm": 2.176966656664999, "learning_rate": 4.994108042148318e-06, "loss": 0.702, "step": 4958 }, { "epoch": 0.52, "grad_norm": 1.9299774614223446, "learning_rate": 4.992424626764216e-06, "loss": 0.6392, "step": 4959 }, { "epoch": 0.52, "grad_norm": 1.699908367772429, "learning_rate": 4.990741212238825e-06, "loss": 0.5981, "step": 4960 }, { "epoch": 0.52, "grad_norm": 1.6255346707577691, "learning_rate": 4.98905779876297e-06, "loss": 0.5514, "step": 4961 }, { "epoch": 0.52, "grad_norm": 1.8368275633671942, "learning_rate": 4.987374386527478e-06, "loss": 0.6648, "step": 4962 }, { "epoch": 0.52, "grad_norm": 1.8212502460777635, "learning_rate": 4.985690975723168e-06, "loss": 0.6234, "step": 4963 }, { "epoch": 0.52, "grad_norm": 1.5672021052986176, "learning_rate": 4.984007566540869e-06, "loss": 0.5612, "step": 4964 }, { "epoch": 0.52, "grad_norm": 1.8762480041490517, "learning_rate": 4.982324159171404e-06, "loss": 0.6543, "step": 4965 }, { "epoch": 0.52, "grad_norm": 1.867185684724656, "learning_rate": 4.980640753805595e-06, "loss": 0.6743, "step": 4966 }, { "epoch": 0.52, "grad_norm": 1.979608168978776, "learning_rate": 4.978957350634267e-06, "loss": 0.6139, "step": 4967 }, { "epoch": 0.52, "grad_norm": 2.0889995333117413, "learning_rate": 4.977273949848244e-06, "loss": 0.6246, "step": 4968 }, { "epoch": 0.52, "grad_norm": 2.0304842602058533, "learning_rate": 4.975590551638348e-06, "loss": 0.6666, "step": 4969 }, { "epoch": 0.52, "grad_norm": 2.0355864818291303, "learning_rate": 4.973907156195405e-06, "loss": 0.637, "step": 4970 }, { "epoch": 0.52, "grad_norm": 1.8969754159542844, "learning_rate": 4.972223763710231e-06, "loss": 0.6201, "step": 4971 }, { "epoch": 0.52, "grad_norm": 2.2601088567596532, "learning_rate": 4.970540374373653e-06, "loss": 0.6495, "step": 4972 }, { "epoch": 0.52, "grad_norm": 1.86528365828505, "learning_rate": 4.96885698837649e-06, "loss": 0.5782, "step": 4973 }, { "epoch": 0.52, "grad_norm": 1.9862084426505078, "learning_rate": 4.967173605909566e-06, "loss": 0.6409, "step": 4974 }, { "epoch": 0.52, "grad_norm": 1.7833941900740438, "learning_rate": 4.9654902271637005e-06, "loss": 0.7328, "step": 4975 }, { "epoch": 0.52, "grad_norm": 1.8010266943749187, "learning_rate": 4.963806852329715e-06, "loss": 0.6425, "step": 4976 }, { "epoch": 0.52, "grad_norm": 1.9009609692301752, "learning_rate": 4.962123481598431e-06, "loss": 0.6324, "step": 4977 }, { "epoch": 0.52, "grad_norm": 1.8052576908809401, "learning_rate": 4.9604401151606626e-06, "loss": 0.6518, "step": 4978 }, { "epoch": 0.52, "grad_norm": 1.868428200480606, "learning_rate": 4.958756753207234e-06, "loss": 0.5987, "step": 4979 }, { "epoch": 0.52, "grad_norm": 1.9367859434196406, "learning_rate": 4.957073395928963e-06, "loss": 0.7087, "step": 4980 }, { "epoch": 0.52, "grad_norm": 1.7339870392195045, "learning_rate": 4.955390043516666e-06, "loss": 0.5802, "step": 4981 }, { "epoch": 0.52, "grad_norm": 2.0035904262119333, "learning_rate": 4.953706696161163e-06, "loss": 0.7026, "step": 4982 }, { "epoch": 0.52, "grad_norm": 1.961898238315474, "learning_rate": 4.952023354053269e-06, "loss": 0.6831, "step": 4983 }, { "epoch": 0.52, "grad_norm": 1.8107631912130409, "learning_rate": 4.950340017383802e-06, "loss": 0.6986, "step": 4984 }, { "epoch": 0.52, "grad_norm": 1.7360843176805136, "learning_rate": 4.948656686343577e-06, "loss": 0.7014, "step": 4985 }, { "epoch": 0.52, "grad_norm": 1.880010839932374, "learning_rate": 4.946973361123411e-06, "loss": 0.6949, "step": 4986 }, { "epoch": 0.52, "grad_norm": 2.0900365683376148, "learning_rate": 4.945290041914114e-06, "loss": 0.7737, "step": 4987 }, { "epoch": 0.52, "grad_norm": 1.7786822013502936, "learning_rate": 4.943606728906503e-06, "loss": 0.5823, "step": 4988 }, { "epoch": 0.52, "grad_norm": 2.00799604136966, "learning_rate": 4.941923422291392e-06, "loss": 0.6422, "step": 4989 }, { "epoch": 0.52, "grad_norm": 2.0586128906008607, "learning_rate": 4.94024012225959e-06, "loss": 0.7, "step": 4990 }, { "epoch": 0.52, "grad_norm": 1.7841514369236677, "learning_rate": 4.938556829001912e-06, "loss": 0.5756, "step": 4991 }, { "epoch": 0.52, "grad_norm": 2.306439783095156, "learning_rate": 4.936873542709168e-06, "loss": 0.5948, "step": 4992 }, { "epoch": 0.52, "grad_norm": 2.615959858500367, "learning_rate": 4.935190263572168e-06, "loss": 0.5653, "step": 4993 }, { "epoch": 0.52, "grad_norm": 1.7733694335394845, "learning_rate": 4.93350699178172e-06, "loss": 0.5998, "step": 4994 }, { "epoch": 0.52, "grad_norm": 1.7428928640514691, "learning_rate": 4.931823727528634e-06, "loss": 0.6163, "step": 4995 }, { "epoch": 0.52, "grad_norm": 1.8644342122695163, "learning_rate": 4.930140471003716e-06, "loss": 0.7548, "step": 4996 }, { "epoch": 0.52, "grad_norm": 2.0175758276054747, "learning_rate": 4.9284572223977755e-06, "loss": 0.6279, "step": 4997 }, { "epoch": 0.52, "grad_norm": 2.0749950976688205, "learning_rate": 4.926773981901616e-06, "loss": 0.6444, "step": 4998 }, { "epoch": 0.52, "grad_norm": 2.124837336083513, "learning_rate": 4.925090749706045e-06, "loss": 0.719, "step": 4999 }, { "epoch": 0.52, "grad_norm": 1.993800913838882, "learning_rate": 4.9234075260018615e-06, "loss": 0.6828, "step": 5000 }, { "epoch": 0.52, "grad_norm": 1.8242495509149763, "learning_rate": 4.921724310979872e-06, "loss": 0.6616, "step": 5001 }, { "epoch": 0.52, "grad_norm": 1.8393977819228315, "learning_rate": 4.920041104830879e-06, "loss": 0.5354, "step": 5002 }, { "epoch": 0.52, "grad_norm": 1.890344553733854, "learning_rate": 4.918357907745681e-06, "loss": 0.5357, "step": 5003 }, { "epoch": 0.52, "grad_norm": 1.8790617991665943, "learning_rate": 4.91667471991508e-06, "loss": 0.5759, "step": 5004 }, { "epoch": 0.52, "grad_norm": 1.8795858588418088, "learning_rate": 4.914991541529875e-06, "loss": 0.6199, "step": 5005 }, { "epoch": 0.52, "grad_norm": 1.7329455799381723, "learning_rate": 4.913308372780863e-06, "loss": 0.6488, "step": 5006 }, { "epoch": 0.52, "grad_norm": 1.9222191221821798, "learning_rate": 4.9116252138588435e-06, "loss": 0.6239, "step": 5007 }, { "epoch": 0.52, "grad_norm": 1.8479397466575056, "learning_rate": 4.909942064954607e-06, "loss": 0.6731, "step": 5008 }, { "epoch": 0.52, "grad_norm": 1.9196852172679921, "learning_rate": 4.908258926258951e-06, "loss": 0.607, "step": 5009 }, { "epoch": 0.52, "grad_norm": 1.8166403880883872, "learning_rate": 4.906575797962669e-06, "loss": 0.6431, "step": 5010 }, { "epoch": 0.52, "grad_norm": 1.7465182013878113, "learning_rate": 4.904892680256553e-06, "loss": 0.6119, "step": 5011 }, { "epoch": 0.52, "grad_norm": 1.7704646382888727, "learning_rate": 4.903209573331393e-06, "loss": 0.6068, "step": 5012 }, { "epoch": 0.52, "grad_norm": 1.9521348314630844, "learning_rate": 4.90152647737798e-06, "loss": 0.5722, "step": 5013 }, { "epoch": 0.52, "grad_norm": 1.8961939935911354, "learning_rate": 4.899843392587104e-06, "loss": 0.6284, "step": 5014 }, { "epoch": 0.52, "grad_norm": 2.06014596993545, "learning_rate": 4.8981603191495484e-06, "loss": 0.6333, "step": 5015 }, { "epoch": 0.52, "grad_norm": 1.8220721139660931, "learning_rate": 4.8964772572561e-06, "loss": 0.5629, "step": 5016 }, { "epoch": 0.52, "grad_norm": 1.9131734154680402, "learning_rate": 4.894794207097546e-06, "loss": 0.6336, "step": 5017 }, { "epoch": 0.52, "grad_norm": 2.0006845737302243, "learning_rate": 4.893111168864668e-06, "loss": 0.6024, "step": 5018 }, { "epoch": 0.52, "grad_norm": 1.835189048852703, "learning_rate": 4.891428142748247e-06, "loss": 0.5964, "step": 5019 }, { "epoch": 0.52, "grad_norm": 1.9859963215195038, "learning_rate": 4.889745128939067e-06, "loss": 0.7307, "step": 5020 }, { "epoch": 0.52, "grad_norm": 2.0406766351110996, "learning_rate": 4.888062127627904e-06, "loss": 0.6828, "step": 5021 }, { "epoch": 0.52, "grad_norm": 1.9519571650177585, "learning_rate": 4.886379139005537e-06, "loss": 0.6814, "step": 5022 }, { "epoch": 0.52, "grad_norm": 1.8257334568373909, "learning_rate": 4.884696163262742e-06, "loss": 0.5058, "step": 5023 }, { "epoch": 0.52, "grad_norm": 1.7128904695532874, "learning_rate": 4.883013200590294e-06, "loss": 0.5868, "step": 5024 }, { "epoch": 0.52, "grad_norm": 1.7085841774434567, "learning_rate": 4.881330251178968e-06, "loss": 0.5638, "step": 5025 }, { "epoch": 0.52, "grad_norm": 1.908140564370563, "learning_rate": 4.879647315219533e-06, "loss": 0.5806, "step": 5026 }, { "epoch": 0.52, "grad_norm": 1.8237970784787472, "learning_rate": 4.87796439290276e-06, "loss": 0.6137, "step": 5027 }, { "epoch": 0.52, "grad_norm": 1.884695749390379, "learning_rate": 4.87628148441942e-06, "loss": 0.6748, "step": 5028 }, { "epoch": 0.52, "grad_norm": 1.8186368146388783, "learning_rate": 4.874598589960279e-06, "loss": 0.695, "step": 5029 }, { "epoch": 0.52, "grad_norm": 1.7967346368568278, "learning_rate": 4.8729157097161025e-06, "loss": 0.6156, "step": 5030 }, { "epoch": 0.52, "grad_norm": 1.786633454376535, "learning_rate": 4.871232843877654e-06, "loss": 0.562, "step": 5031 }, { "epoch": 0.52, "grad_norm": 1.9850275764992054, "learning_rate": 4.869549992635697e-06, "loss": 0.5847, "step": 5032 }, { "epoch": 0.52, "grad_norm": 1.8984219117890422, "learning_rate": 4.867867156180992e-06, "loss": 0.622, "step": 5033 }, { "epoch": 0.52, "grad_norm": 1.7075309788258288, "learning_rate": 4.866184334704297e-06, "loss": 0.5809, "step": 5034 }, { "epoch": 0.52, "grad_norm": 1.8264943957080289, "learning_rate": 4.864501528396371e-06, "loss": 0.6548, "step": 5035 }, { "epoch": 0.52, "grad_norm": 2.025846349325443, "learning_rate": 4.862818737447971e-06, "loss": 0.6318, "step": 5036 }, { "epoch": 0.52, "grad_norm": 1.7219540198512069, "learning_rate": 4.861135962049847e-06, "loss": 0.6695, "step": 5037 }, { "epoch": 0.52, "grad_norm": 2.077354289814275, "learning_rate": 4.859453202392753e-06, "loss": 0.6169, "step": 5038 }, { "epoch": 0.52, "grad_norm": 1.9060446882946842, "learning_rate": 4.8577704586674405e-06, "loss": 0.5871, "step": 5039 }, { "epoch": 0.52, "grad_norm": 2.2842872317930714, "learning_rate": 4.856087731064656e-06, "loss": 0.6755, "step": 5040 }, { "epoch": 0.52, "grad_norm": 1.8767079908441326, "learning_rate": 4.854405019775148e-06, "loss": 0.6126, "step": 5041 }, { "epoch": 0.52, "grad_norm": 1.7312471324735106, "learning_rate": 4.852722324989661e-06, "loss": 0.6224, "step": 5042 }, { "epoch": 0.52, "grad_norm": 1.9361134166550507, "learning_rate": 4.851039646898938e-06, "loss": 0.6556, "step": 5043 }, { "epoch": 0.52, "grad_norm": 1.7027530285331263, "learning_rate": 4.8493569856937215e-06, "loss": 0.6058, "step": 5044 }, { "epoch": 0.52, "grad_norm": 2.1245655764901317, "learning_rate": 4.847674341564748e-06, "loss": 0.7028, "step": 5045 }, { "epoch": 0.52, "grad_norm": 1.9143911466692012, "learning_rate": 4.845991714702755e-06, "loss": 0.6948, "step": 5046 }, { "epoch": 0.52, "grad_norm": 1.941124018073597, "learning_rate": 4.844309105298481e-06, "loss": 0.7231, "step": 5047 }, { "epoch": 0.52, "grad_norm": 1.9438368169759088, "learning_rate": 4.842626513542656e-06, "loss": 0.6155, "step": 5048 }, { "epoch": 0.52, "grad_norm": 1.828675457167432, "learning_rate": 4.840943939626012e-06, "loss": 0.6513, "step": 5049 }, { "epoch": 0.52, "grad_norm": 1.8271797659739548, "learning_rate": 4.83926138373928e-06, "loss": 0.5988, "step": 5050 }, { "epoch": 0.53, "grad_norm": 3.9452851427531885, "learning_rate": 4.8375788460731885e-06, "loss": 0.6387, "step": 5051 }, { "epoch": 0.53, "grad_norm": 1.9841860268134885, "learning_rate": 4.8358963268184585e-06, "loss": 0.6832, "step": 5052 }, { "epoch": 0.53, "grad_norm": 1.8792424593951718, "learning_rate": 4.8342138261658145e-06, "loss": 0.5336, "step": 5053 }, { "epoch": 0.53, "grad_norm": 1.9310634981620276, "learning_rate": 4.832531344305979e-06, "loss": 0.6749, "step": 5054 }, { "epoch": 0.53, "grad_norm": 2.02272191555057, "learning_rate": 4.8308488814296695e-06, "loss": 0.7115, "step": 5055 }, { "epoch": 0.53, "grad_norm": 1.7549744426219025, "learning_rate": 4.829166437727603e-06, "loss": 0.6443, "step": 5056 }, { "epoch": 0.53, "grad_norm": 2.4305012850264225, "learning_rate": 4.827484013390496e-06, "loss": 0.5472, "step": 5057 }, { "epoch": 0.53, "grad_norm": 2.1868137796135327, "learning_rate": 4.825801608609059e-06, "loss": 0.6223, "step": 5058 }, { "epoch": 0.53, "grad_norm": 1.853626000392213, "learning_rate": 4.824119223574002e-06, "loss": 0.6385, "step": 5059 }, { "epoch": 0.53, "grad_norm": 1.8454461648434337, "learning_rate": 4.8224368584760345e-06, "loss": 0.6323, "step": 5060 }, { "epoch": 0.53, "grad_norm": 1.735519333587952, "learning_rate": 4.82075451350586e-06, "loss": 0.6135, "step": 5061 }, { "epoch": 0.53, "grad_norm": 1.858344450733772, "learning_rate": 4.819072188854183e-06, "loss": 0.535, "step": 5062 }, { "epoch": 0.53, "grad_norm": 2.0553850757925702, "learning_rate": 4.817389884711706e-06, "loss": 0.652, "step": 5063 }, { "epoch": 0.53, "grad_norm": 1.9772353357592571, "learning_rate": 4.815707601269126e-06, "loss": 0.662, "step": 5064 }, { "epoch": 0.53, "grad_norm": 1.7625156674615081, "learning_rate": 4.814025338717139e-06, "loss": 0.6364, "step": 5065 }, { "epoch": 0.53, "grad_norm": 1.9316812486984454, "learning_rate": 4.812343097246442e-06, "loss": 0.6193, "step": 5066 }, { "epoch": 0.53, "grad_norm": 1.4902859699492574, "learning_rate": 4.8106608770477225e-06, "loss": 0.4888, "step": 5067 }, { "epoch": 0.53, "grad_norm": 2.191617491974796, "learning_rate": 4.808978678311672e-06, "loss": 0.5908, "step": 5068 }, { "epoch": 0.53, "grad_norm": 1.9555631768420776, "learning_rate": 4.807296501228977e-06, "loss": 0.6095, "step": 5069 }, { "epoch": 0.53, "grad_norm": 1.9063575635693246, "learning_rate": 4.805614345990322e-06, "loss": 0.6828, "step": 5070 }, { "epoch": 0.53, "grad_norm": 1.7436840319728293, "learning_rate": 4.803932212786389e-06, "loss": 0.7226, "step": 5071 }, { "epoch": 0.53, "grad_norm": 2.2328858534263847, "learning_rate": 4.802250101807857e-06, "loss": 0.6088, "step": 5072 }, { "epoch": 0.53, "grad_norm": 1.6911436875533297, "learning_rate": 4.800568013245405e-06, "loss": 0.6307, "step": 5073 }, { "epoch": 0.53, "grad_norm": 1.9214187162207514, "learning_rate": 4.798885947289705e-06, "loss": 0.6821, "step": 5074 }, { "epoch": 0.53, "grad_norm": 1.7311494622263512, "learning_rate": 4.797203904131427e-06, "loss": 0.6244, "step": 5075 }, { "epoch": 0.53, "grad_norm": 2.00546387408505, "learning_rate": 4.795521883961243e-06, "loss": 0.6301, "step": 5076 }, { "epoch": 0.53, "grad_norm": 2.7067685876166543, "learning_rate": 4.793839886969819e-06, "loss": 0.6525, "step": 5077 }, { "epoch": 0.53, "grad_norm": 1.9479052680870534, "learning_rate": 4.792157913347819e-06, "loss": 0.6248, "step": 5078 }, { "epoch": 0.53, "grad_norm": 1.9878951234618571, "learning_rate": 4.790475963285904e-06, "loss": 0.6752, "step": 5079 }, { "epoch": 0.53, "grad_norm": 1.8307605231277921, "learning_rate": 4.788794036974733e-06, "loss": 0.6728, "step": 5080 }, { "epoch": 0.53, "grad_norm": 2.007850691507178, "learning_rate": 4.787112134604964e-06, "loss": 0.6615, "step": 5081 }, { "epoch": 0.53, "grad_norm": 1.9682209210971686, "learning_rate": 4.785430256367246e-06, "loss": 0.6212, "step": 5082 }, { "epoch": 0.53, "grad_norm": 1.9539068327837195, "learning_rate": 4.783748402452231e-06, "loss": 0.6382, "step": 5083 }, { "epoch": 0.53, "grad_norm": 1.8254711972644915, "learning_rate": 4.782066573050567e-06, "loss": 0.5879, "step": 5084 }, { "epoch": 0.53, "grad_norm": 1.9498763509941295, "learning_rate": 4.7803847683529e-06, "loss": 0.6181, "step": 5085 }, { "epoch": 0.53, "grad_norm": 1.8874003778516166, "learning_rate": 4.77870298854987e-06, "loss": 0.6335, "step": 5086 }, { "epoch": 0.53, "grad_norm": 1.8312048896987314, "learning_rate": 4.777021233832119e-06, "loss": 0.5927, "step": 5087 }, { "epoch": 0.53, "grad_norm": 1.9016075310030311, "learning_rate": 4.775339504390283e-06, "loss": 0.6854, "step": 5088 }, { "epoch": 0.53, "grad_norm": 2.0648643513408897, "learning_rate": 4.7736578004149936e-06, "loss": 0.585, "step": 5089 }, { "epoch": 0.53, "grad_norm": 1.7637557948369473, "learning_rate": 4.771976122096882e-06, "loss": 0.638, "step": 5090 }, { "epoch": 0.53, "grad_norm": 2.2250046913316366, "learning_rate": 4.7702944696265766e-06, "loss": 0.6797, "step": 5091 }, { "epoch": 0.53, "grad_norm": 1.8031799845090113, "learning_rate": 4.768612843194703e-06, "loss": 0.6511, "step": 5092 }, { "epoch": 0.53, "grad_norm": 1.8677023618333652, "learning_rate": 4.766931242991882e-06, "loss": 0.555, "step": 5093 }, { "epoch": 0.53, "grad_norm": 1.9985670391581052, "learning_rate": 4.765249669208733e-06, "loss": 0.6718, "step": 5094 }, { "epoch": 0.53, "grad_norm": 1.7495206295185197, "learning_rate": 4.763568122035873e-06, "loss": 0.7245, "step": 5095 }, { "epoch": 0.53, "grad_norm": 1.8561316308119256, "learning_rate": 4.761886601663913e-06, "loss": 0.7281, "step": 5096 }, { "epoch": 0.53, "grad_norm": 1.8464447577024359, "learning_rate": 4.760205108283465e-06, "loss": 0.7014, "step": 5097 }, { "epoch": 0.53, "grad_norm": 1.912072520084057, "learning_rate": 4.758523642085133e-06, "loss": 0.683, "step": 5098 }, { "epoch": 0.53, "grad_norm": 1.9941678964102907, "learning_rate": 4.756842203259523e-06, "loss": 0.5755, "step": 5099 }, { "epoch": 0.53, "grad_norm": 1.951575529813139, "learning_rate": 4.755160791997235e-06, "loss": 0.7646, "step": 5100 }, { "epoch": 0.53, "grad_norm": 1.8620977956553646, "learning_rate": 4.753479408488868e-06, "loss": 0.6415, "step": 5101 }, { "epoch": 0.53, "grad_norm": 1.9029340613448509, "learning_rate": 4.751798052925015e-06, "loss": 0.6514, "step": 5102 }, { "epoch": 0.53, "grad_norm": 1.8930213798910867, "learning_rate": 4.750116725496268e-06, "loss": 0.6322, "step": 5103 }, { "epoch": 0.53, "grad_norm": 1.804024019566647, "learning_rate": 4.7484354263932136e-06, "loss": 0.6197, "step": 5104 }, { "epoch": 0.53, "grad_norm": 1.7759620876156021, "learning_rate": 4.746754155806437e-06, "loss": 0.5931, "step": 5105 }, { "epoch": 0.53, "grad_norm": 1.728832946061556, "learning_rate": 4.74507291392652e-06, "loss": 0.5501, "step": 5106 }, { "epoch": 0.53, "grad_norm": 2.116357919100813, "learning_rate": 4.743391700944042e-06, "loss": 0.6326, "step": 5107 }, { "epoch": 0.53, "grad_norm": 1.841309504995033, "learning_rate": 4.7417105170495775e-06, "loss": 0.6341, "step": 5108 }, { "epoch": 0.53, "grad_norm": 1.9423787604865643, "learning_rate": 4.740029362433698e-06, "loss": 0.6879, "step": 5109 }, { "epoch": 0.53, "grad_norm": 1.9253807388055482, "learning_rate": 4.7383482372869735e-06, "loss": 0.5657, "step": 5110 }, { "epoch": 0.53, "grad_norm": 1.9821619032453928, "learning_rate": 4.736667141799965e-06, "loss": 0.7289, "step": 5111 }, { "epoch": 0.53, "grad_norm": 1.908684744520375, "learning_rate": 4.734986076163238e-06, "loss": 0.7003, "step": 5112 }, { "epoch": 0.53, "grad_norm": 1.9596783586093547, "learning_rate": 4.73330504056735e-06, "loss": 0.6458, "step": 5113 }, { "epoch": 0.53, "grad_norm": 1.9224228062346809, "learning_rate": 4.731624035202856e-06, "loss": 0.7468, "step": 5114 }, { "epoch": 0.53, "grad_norm": 1.8680844649352413, "learning_rate": 4.729943060260306e-06, "loss": 0.6674, "step": 5115 }, { "epoch": 0.53, "grad_norm": 2.024935667135311, "learning_rate": 4.72826211593025e-06, "loss": 0.7308, "step": 5116 }, { "epoch": 0.53, "grad_norm": 1.8095294446492074, "learning_rate": 4.726581202403232e-06, "loss": 0.7611, "step": 5117 }, { "epoch": 0.53, "grad_norm": 2.0807128692653345, "learning_rate": 4.7249003198697955e-06, "loss": 0.6249, "step": 5118 }, { "epoch": 0.53, "grad_norm": 2.0181745603197236, "learning_rate": 4.723219468520474e-06, "loss": 0.6167, "step": 5119 }, { "epoch": 0.53, "grad_norm": 1.7391558203953414, "learning_rate": 4.721538648545802e-06, "loss": 0.6373, "step": 5120 }, { "epoch": 0.53, "grad_norm": 1.7577092949947608, "learning_rate": 4.719857860136312e-06, "loss": 0.6041, "step": 5121 }, { "epoch": 0.53, "grad_norm": 1.9735884394796157, "learning_rate": 4.7181771034825306e-06, "loss": 0.6166, "step": 5122 }, { "epoch": 0.53, "grad_norm": 1.9220027882098172, "learning_rate": 4.716496378774981e-06, "loss": 0.524, "step": 5123 }, { "epoch": 0.53, "grad_norm": 1.9092831897320568, "learning_rate": 4.714815686204182e-06, "loss": 0.5647, "step": 5124 }, { "epoch": 0.53, "grad_norm": 1.9953171291519414, "learning_rate": 4.713135025960652e-06, "loss": 0.7217, "step": 5125 }, { "epoch": 0.53, "grad_norm": 1.6846329284575117, "learning_rate": 4.711454398234902e-06, "loss": 0.5023, "step": 5126 }, { "epoch": 0.53, "grad_norm": 1.8337482270315908, "learning_rate": 4.7097738032174394e-06, "loss": 0.6636, "step": 5127 }, { "epoch": 0.53, "grad_norm": 1.7405246564961698, "learning_rate": 4.708093241098771e-06, "loss": 0.6347, "step": 5128 }, { "epoch": 0.53, "grad_norm": 1.7954045338519318, "learning_rate": 4.7064127120693965e-06, "loss": 0.6376, "step": 5129 }, { "epoch": 0.53, "grad_norm": 1.8846517693769833, "learning_rate": 4.704732216319815e-06, "loss": 0.636, "step": 5130 }, { "epoch": 0.53, "grad_norm": 1.8830831159833605, "learning_rate": 4.7030517540405195e-06, "loss": 0.6288, "step": 5131 }, { "epoch": 0.53, "grad_norm": 1.8156056135854486, "learning_rate": 4.701371325422002e-06, "loss": 0.6129, "step": 5132 }, { "epoch": 0.53, "grad_norm": 2.0101280335971166, "learning_rate": 4.6996909306547455e-06, "loss": 0.7395, "step": 5133 }, { "epoch": 0.53, "grad_norm": 2.321181377226723, "learning_rate": 4.6980105699292335e-06, "loss": 0.6699, "step": 5134 }, { "epoch": 0.53, "grad_norm": 2.226576509137887, "learning_rate": 4.696330243435945e-06, "loss": 0.687, "step": 5135 }, { "epoch": 0.53, "grad_norm": 1.9204092632555354, "learning_rate": 4.694649951365354e-06, "loss": 0.5934, "step": 5136 }, { "epoch": 0.53, "grad_norm": 2.0429760454559283, "learning_rate": 4.692969693907931e-06, "loss": 0.6716, "step": 5137 }, { "epoch": 0.53, "grad_norm": 1.9277314108498504, "learning_rate": 4.691289471254144e-06, "loss": 0.6667, "step": 5138 }, { "epoch": 0.53, "grad_norm": 1.988347172280978, "learning_rate": 4.689609283594454e-06, "loss": 0.6952, "step": 5139 }, { "epoch": 0.53, "grad_norm": 1.952429859380367, "learning_rate": 4.6879291311193244e-06, "loss": 0.579, "step": 5140 }, { "epoch": 0.53, "grad_norm": 1.804754920642748, "learning_rate": 4.686249014019203e-06, "loss": 0.6225, "step": 5141 }, { "epoch": 0.53, "grad_norm": 1.989089474261446, "learning_rate": 4.684568932484546e-06, "loss": 0.6693, "step": 5142 }, { "epoch": 0.53, "grad_norm": 1.983466565946788, "learning_rate": 4.682888886705797e-06, "loss": 0.6234, "step": 5143 }, { "epoch": 0.53, "grad_norm": 1.7356926775014248, "learning_rate": 4.6812088768734e-06, "loss": 0.5841, "step": 5144 }, { "epoch": 0.53, "grad_norm": 1.9370988809301124, "learning_rate": 4.679528903177795e-06, "loss": 0.6462, "step": 5145 }, { "epoch": 0.53, "grad_norm": 1.9192368651984106, "learning_rate": 4.6778489658094145e-06, "loss": 0.6877, "step": 5146 }, { "epoch": 0.54, "grad_norm": 1.82458653830636, "learning_rate": 4.676169064958692e-06, "loss": 0.6435, "step": 5147 }, { "epoch": 0.54, "grad_norm": 2.0128828499828373, "learning_rate": 4.674489200816051e-06, "loss": 0.6844, "step": 5148 }, { "epoch": 0.54, "grad_norm": 2.079968396447985, "learning_rate": 4.672809373571913e-06, "loss": 0.712, "step": 5149 }, { "epoch": 0.54, "grad_norm": 1.9981225791183472, "learning_rate": 4.671129583416697e-06, "loss": 0.6063, "step": 5150 }, { "epoch": 0.54, "grad_norm": 1.800390445160367, "learning_rate": 4.669449830540819e-06, "loss": 0.6671, "step": 5151 }, { "epoch": 0.54, "grad_norm": 1.7464047867362262, "learning_rate": 4.6677701151346856e-06, "loss": 0.6108, "step": 5152 }, { "epoch": 0.54, "grad_norm": 1.8788397288269676, "learning_rate": 4.6660904373887035e-06, "loss": 0.6478, "step": 5153 }, { "epoch": 0.54, "grad_norm": 1.8506751756270443, "learning_rate": 4.664410797493275e-06, "loss": 0.6116, "step": 5154 }, { "epoch": 0.54, "grad_norm": 1.8466483682885082, "learning_rate": 4.662731195638794e-06, "loss": 0.5673, "step": 5155 }, { "epoch": 0.54, "grad_norm": 1.9893512470431758, "learning_rate": 4.661051632015655e-06, "loss": 0.5976, "step": 5156 }, { "epoch": 0.54, "grad_norm": 1.8473962326091087, "learning_rate": 4.659372106814244e-06, "loss": 0.7179, "step": 5157 }, { "epoch": 0.54, "grad_norm": 1.8880905203942298, "learning_rate": 4.657692620224948e-06, "loss": 0.6614, "step": 5158 }, { "epoch": 0.54, "grad_norm": 1.8210595211398692, "learning_rate": 4.656013172438143e-06, "loss": 0.6492, "step": 5159 }, { "epoch": 0.54, "grad_norm": 1.8642394980355337, "learning_rate": 4.654333763644206e-06, "loss": 0.6072, "step": 5160 }, { "epoch": 0.54, "grad_norm": 2.113480454514094, "learning_rate": 4.652654394033508e-06, "loss": 0.7173, "step": 5161 }, { "epoch": 0.54, "grad_norm": 1.7522034431581206, "learning_rate": 4.650975063796414e-06, "loss": 0.6015, "step": 5162 }, { "epoch": 0.54, "grad_norm": 2.2933826515210276, "learning_rate": 4.649295773123285e-06, "loss": 0.5954, "step": 5163 }, { "epoch": 0.54, "grad_norm": 1.5870787892330132, "learning_rate": 4.6476165222044795e-06, "loss": 0.6106, "step": 5164 }, { "epoch": 0.54, "grad_norm": 1.897198998438171, "learning_rate": 4.645937311230351e-06, "loss": 0.581, "step": 5165 }, { "epoch": 0.54, "grad_norm": 1.9011534788596396, "learning_rate": 4.6442581403912444e-06, "loss": 0.7063, "step": 5166 }, { "epoch": 0.54, "grad_norm": 1.8255651882129653, "learning_rate": 4.6425790098775064e-06, "loss": 0.7078, "step": 5167 }, { "epoch": 0.54, "grad_norm": 2.103934341812746, "learning_rate": 4.6408999198794744e-06, "loss": 0.7187, "step": 5168 }, { "epoch": 0.54, "grad_norm": 1.8022669695183133, "learning_rate": 4.639220870587484e-06, "loss": 0.6677, "step": 5169 }, { "epoch": 0.54, "grad_norm": 2.0815425124291265, "learning_rate": 4.637541862191864e-06, "loss": 0.7668, "step": 5170 }, { "epoch": 0.54, "grad_norm": 1.6168920667058806, "learning_rate": 4.635862894882939e-06, "loss": 0.6407, "step": 5171 }, { "epoch": 0.54, "grad_norm": 1.8448181111407145, "learning_rate": 4.634183968851031e-06, "loss": 0.5558, "step": 5172 }, { "epoch": 0.54, "grad_norm": 1.8625072882231783, "learning_rate": 4.632505084286455e-06, "loss": 0.5622, "step": 5173 }, { "epoch": 0.54, "grad_norm": 2.02673876945062, "learning_rate": 4.630826241379522e-06, "loss": 0.6759, "step": 5174 }, { "epoch": 0.54, "grad_norm": 2.001843218175361, "learning_rate": 4.629147440320539e-06, "loss": 0.7657, "step": 5175 }, { "epoch": 0.54, "grad_norm": 1.9718566782218825, "learning_rate": 4.627468681299808e-06, "loss": 0.6772, "step": 5176 }, { "epoch": 0.54, "grad_norm": 1.8470501784821054, "learning_rate": 4.625789964507626e-06, "loss": 0.6576, "step": 5177 }, { "epoch": 0.54, "grad_norm": 1.9660622527248055, "learning_rate": 4.624111290134284e-06, "loss": 0.7013, "step": 5178 }, { "epoch": 0.54, "grad_norm": 1.8012877386792032, "learning_rate": 4.622432658370069e-06, "loss": 0.7023, "step": 5179 }, { "epoch": 0.54, "grad_norm": 1.8651094026382105, "learning_rate": 4.620754069405265e-06, "loss": 0.7387, "step": 5180 }, { "epoch": 0.54, "grad_norm": 1.984823336261961, "learning_rate": 4.619075523430147e-06, "loss": 0.6433, "step": 5181 }, { "epoch": 0.54, "grad_norm": 1.8084018306588718, "learning_rate": 4.617397020634991e-06, "loss": 0.4955, "step": 5182 }, { "epoch": 0.54, "grad_norm": 1.7058235501308092, "learning_rate": 4.615718561210063e-06, "loss": 0.4806, "step": 5183 }, { "epoch": 0.54, "grad_norm": 2.135606445952955, "learning_rate": 4.614040145345629e-06, "loss": 0.6489, "step": 5184 }, { "epoch": 0.54, "grad_norm": 1.9003261832974596, "learning_rate": 4.612361773231942e-06, "loss": 0.6782, "step": 5185 }, { "epoch": 0.54, "grad_norm": 1.766062587324516, "learning_rate": 4.610683445059257e-06, "loss": 0.6328, "step": 5186 }, { "epoch": 0.54, "grad_norm": 1.772434598760773, "learning_rate": 4.609005161017824e-06, "loss": 0.5695, "step": 5187 }, { "epoch": 0.54, "grad_norm": 1.9399750167465266, "learning_rate": 4.607326921297883e-06, "loss": 0.7608, "step": 5188 }, { "epoch": 0.54, "grad_norm": 1.834185074488284, "learning_rate": 4.605648726089674e-06, "loss": 0.6278, "step": 5189 }, { "epoch": 0.54, "grad_norm": 1.9981329160306083, "learning_rate": 4.60397057558343e-06, "loss": 0.6317, "step": 5190 }, { "epoch": 0.54, "grad_norm": 2.075228145121904, "learning_rate": 4.602292469969381e-06, "loss": 0.627, "step": 5191 }, { "epoch": 0.54, "grad_norm": 1.7500511714057594, "learning_rate": 4.600614409437745e-06, "loss": 0.6107, "step": 5192 }, { "epoch": 0.54, "grad_norm": 2.0761176947995206, "learning_rate": 4.5989363941787415e-06, "loss": 0.6552, "step": 5193 }, { "epoch": 0.54, "grad_norm": 1.788070090950307, "learning_rate": 4.597258424382584e-06, "loss": 0.5552, "step": 5194 }, { "epoch": 0.54, "grad_norm": 1.8921091625857311, "learning_rate": 4.5955805002394804e-06, "loss": 0.6449, "step": 5195 }, { "epoch": 0.54, "grad_norm": 1.8761192118955454, "learning_rate": 4.593902621939632e-06, "loss": 0.609, "step": 5196 }, { "epoch": 0.54, "grad_norm": 2.054485578286654, "learning_rate": 4.592224789673235e-06, "loss": 0.5932, "step": 5197 }, { "epoch": 0.54, "grad_norm": 1.9350066838823927, "learning_rate": 4.590547003630484e-06, "loss": 0.691, "step": 5198 }, { "epoch": 0.54, "grad_norm": 1.9608958757814123, "learning_rate": 4.588869264001563e-06, "loss": 0.5844, "step": 5199 }, { "epoch": 0.54, "grad_norm": 1.9773425293593354, "learning_rate": 4.587191570976655e-06, "loss": 0.669, "step": 5200 }, { "epoch": 0.54, "grad_norm": 1.9251634580282069, "learning_rate": 4.585513924745935e-06, "loss": 0.6388, "step": 5201 }, { "epoch": 0.54, "grad_norm": 1.7757867948720212, "learning_rate": 4.583836325499573e-06, "loss": 0.5827, "step": 5202 }, { "epoch": 0.54, "grad_norm": 2.085264127591141, "learning_rate": 4.5821587734277374e-06, "loss": 0.532, "step": 5203 }, { "epoch": 0.54, "grad_norm": 1.75056064511947, "learning_rate": 4.580481268720585e-06, "loss": 0.538, "step": 5204 }, { "epoch": 0.54, "grad_norm": 1.8790372630466574, "learning_rate": 4.578803811568272e-06, "loss": 0.5644, "step": 5205 }, { "epoch": 0.54, "grad_norm": 1.9390235343364985, "learning_rate": 4.5771264021609494e-06, "loss": 0.5906, "step": 5206 }, { "epoch": 0.54, "grad_norm": 1.924353396395898, "learning_rate": 4.575449040688758e-06, "loss": 0.5973, "step": 5207 }, { "epoch": 0.54, "grad_norm": 1.7020159742103511, "learning_rate": 4.573771727341838e-06, "loss": 0.5145, "step": 5208 }, { "epoch": 0.54, "grad_norm": 1.7708593482880979, "learning_rate": 4.572094462310322e-06, "loss": 0.6289, "step": 5209 }, { "epoch": 0.54, "grad_norm": 2.166457630399841, "learning_rate": 4.570417245784337e-06, "loss": 0.6675, "step": 5210 }, { "epoch": 0.54, "grad_norm": 1.877828575644666, "learning_rate": 4.568740077954007e-06, "loss": 0.6011, "step": 5211 }, { "epoch": 0.54, "grad_norm": 1.911475306416678, "learning_rate": 4.567062959009446e-06, "loss": 0.569, "step": 5212 }, { "epoch": 0.54, "grad_norm": 1.8913396335881767, "learning_rate": 4.565385889140767e-06, "loss": 0.6125, "step": 5213 }, { "epoch": 0.54, "grad_norm": 2.08561786360724, "learning_rate": 4.563708868538077e-06, "loss": 0.7155, "step": 5214 }, { "epoch": 0.54, "grad_norm": 2.154811342394879, "learning_rate": 4.56203189739147e-06, "loss": 0.5944, "step": 5215 }, { "epoch": 0.54, "grad_norm": 2.0035311683983323, "learning_rate": 4.560354975891045e-06, "loss": 0.6358, "step": 5216 }, { "epoch": 0.54, "grad_norm": 1.9237950319146873, "learning_rate": 4.55867810422689e-06, "loss": 0.612, "step": 5217 }, { "epoch": 0.54, "grad_norm": 1.891746378991884, "learning_rate": 4.557001282589086e-06, "loss": 0.6303, "step": 5218 }, { "epoch": 0.54, "grad_norm": 1.7132159636798743, "learning_rate": 4.5553245111677124e-06, "loss": 0.5618, "step": 5219 }, { "epoch": 0.54, "grad_norm": 1.9582069007610816, "learning_rate": 4.55364779015284e-06, "loss": 0.7596, "step": 5220 }, { "epoch": 0.54, "grad_norm": 1.8416862170002277, "learning_rate": 4.5519711197345365e-06, "loss": 0.6514, "step": 5221 }, { "epoch": 0.54, "grad_norm": 1.7109777078724329, "learning_rate": 4.550294500102858e-06, "loss": 0.6504, "step": 5222 }, { "epoch": 0.54, "grad_norm": 1.7965413196363942, "learning_rate": 4.548617931447861e-06, "loss": 0.6029, "step": 5223 }, { "epoch": 0.54, "grad_norm": 1.7730256764319179, "learning_rate": 4.546941413959595e-06, "loss": 0.6525, "step": 5224 }, { "epoch": 0.54, "grad_norm": 2.003273238543562, "learning_rate": 4.545264947828101e-06, "loss": 0.6805, "step": 5225 }, { "epoch": 0.54, "grad_norm": 2.010674257758387, "learning_rate": 4.5435885332434184e-06, "loss": 0.5269, "step": 5226 }, { "epoch": 0.54, "grad_norm": 1.8445652402675543, "learning_rate": 4.541912170395576e-06, "loss": 0.6639, "step": 5227 }, { "epoch": 0.54, "grad_norm": 1.937635194164857, "learning_rate": 4.540235859474603e-06, "loss": 0.5969, "step": 5228 }, { "epoch": 0.54, "grad_norm": 1.7232427319669361, "learning_rate": 4.538559600670513e-06, "loss": 0.575, "step": 5229 }, { "epoch": 0.54, "grad_norm": 1.9439853705055208, "learning_rate": 4.5368833941733235e-06, "loss": 0.5975, "step": 5230 }, { "epoch": 0.54, "grad_norm": 1.8694910106701905, "learning_rate": 4.53520724017304e-06, "loss": 0.5624, "step": 5231 }, { "epoch": 0.54, "grad_norm": 2.012545765815874, "learning_rate": 4.533531138859665e-06, "loss": 0.5813, "step": 5232 }, { "epoch": 0.54, "grad_norm": 1.8435415628000271, "learning_rate": 4.531855090423195e-06, "loss": 0.606, "step": 5233 }, { "epoch": 0.54, "grad_norm": 1.9556583134206118, "learning_rate": 4.5301790950536175e-06, "loss": 0.6116, "step": 5234 }, { "epoch": 0.54, "grad_norm": 2.0787950978231047, "learning_rate": 4.5285031529409184e-06, "loss": 0.6888, "step": 5235 }, { "epoch": 0.54, "grad_norm": 1.853465607687972, "learning_rate": 4.526827264275076e-06, "loss": 0.5916, "step": 5236 }, { "epoch": 0.54, "grad_norm": 1.7968352682356779, "learning_rate": 4.525151429246057e-06, "loss": 0.6473, "step": 5237 }, { "epoch": 0.54, "grad_norm": 2.2194788816263666, "learning_rate": 4.523475648043832e-06, "loss": 0.6571, "step": 5238 }, { "epoch": 0.54, "grad_norm": 1.7577666502701064, "learning_rate": 4.521799920858358e-06, "loss": 0.6082, "step": 5239 }, { "epoch": 0.54, "grad_norm": 1.9041366214064464, "learning_rate": 4.520124247879588e-06, "loss": 0.678, "step": 5240 }, { "epoch": 0.54, "grad_norm": 1.865248415982124, "learning_rate": 4.51844862929747e-06, "loss": 0.6431, "step": 5241 }, { "epoch": 0.54, "grad_norm": 1.8755663337410842, "learning_rate": 4.516773065301946e-06, "loss": 0.6337, "step": 5242 }, { "epoch": 0.55, "grad_norm": 1.8953438735645665, "learning_rate": 4.515097556082949e-06, "loss": 0.5888, "step": 5243 }, { "epoch": 0.55, "grad_norm": 1.803506322598369, "learning_rate": 4.513422101830408e-06, "loss": 0.5827, "step": 5244 }, { "epoch": 0.55, "grad_norm": 1.9563878164296573, "learning_rate": 4.5117467027342435e-06, "loss": 0.5976, "step": 5245 }, { "epoch": 0.55, "grad_norm": 1.7276638684043466, "learning_rate": 4.510071358984373e-06, "loss": 0.6353, "step": 5246 }, { "epoch": 0.55, "grad_norm": 1.9039495600414362, "learning_rate": 4.508396070770708e-06, "loss": 0.6116, "step": 5247 }, { "epoch": 0.55, "grad_norm": 1.930911155262903, "learning_rate": 4.50672083828315e-06, "loss": 0.6507, "step": 5248 }, { "epoch": 0.55, "grad_norm": 1.7348548611102907, "learning_rate": 4.505045661711596e-06, "loss": 0.633, "step": 5249 }, { "epoch": 0.55, "grad_norm": 2.0192760027137764, "learning_rate": 4.503370541245937e-06, "loss": 0.6852, "step": 5250 }, { "epoch": 0.55, "grad_norm": 2.291914253911129, "learning_rate": 4.501695477076062e-06, "loss": 0.7381, "step": 5251 }, { "epoch": 0.55, "grad_norm": 1.8818408471589059, "learning_rate": 4.5000204693918405e-06, "loss": 0.6618, "step": 5252 }, { "epoch": 0.55, "grad_norm": 1.6450990278976414, "learning_rate": 4.49834551838315e-06, "loss": 0.7165, "step": 5253 }, { "epoch": 0.55, "grad_norm": 1.8655688952363387, "learning_rate": 4.496670624239854e-06, "loss": 0.5881, "step": 5254 }, { "epoch": 0.55, "grad_norm": 1.5881419810270465, "learning_rate": 4.494995787151811e-06, "loss": 0.5591, "step": 5255 }, { "epoch": 0.55, "grad_norm": 1.9868107004461213, "learning_rate": 4.493321007308875e-06, "loss": 0.6265, "step": 5256 }, { "epoch": 0.55, "grad_norm": 1.748853627472903, "learning_rate": 4.49164628490089e-06, "loss": 0.6645, "step": 5257 }, { "epoch": 0.55, "grad_norm": 1.6825341367268272, "learning_rate": 4.489971620117699e-06, "loss": 0.5963, "step": 5258 }, { "epoch": 0.55, "grad_norm": 1.7378224177710997, "learning_rate": 4.4882970131491286e-06, "loss": 0.6074, "step": 5259 }, { "epoch": 0.55, "grad_norm": 1.7043117434948243, "learning_rate": 4.4866224641850095e-06, "loss": 0.5871, "step": 5260 }, { "epoch": 0.55, "grad_norm": 1.873293864202997, "learning_rate": 4.48494797341516e-06, "loss": 0.6175, "step": 5261 }, { "epoch": 0.55, "grad_norm": 1.8950760665680089, "learning_rate": 4.4832735410293924e-06, "loss": 0.6416, "step": 5262 }, { "epoch": 0.55, "grad_norm": 1.825762291742463, "learning_rate": 4.481599167217515e-06, "loss": 0.6834, "step": 5263 }, { "epoch": 0.55, "grad_norm": 1.7127983027636853, "learning_rate": 4.479924852169327e-06, "loss": 0.6113, "step": 5264 }, { "epoch": 0.55, "grad_norm": 2.2666902600585157, "learning_rate": 4.478250596074623e-06, "loss": 0.5987, "step": 5265 }, { "epoch": 0.55, "grad_norm": 2.0911717000348458, "learning_rate": 4.476576399123187e-06, "loss": 0.5852, "step": 5266 }, { "epoch": 0.55, "grad_norm": 2.0671663566535403, "learning_rate": 4.4749022615047975e-06, "loss": 0.6781, "step": 5267 }, { "epoch": 0.55, "grad_norm": 1.7386282025860975, "learning_rate": 4.4732281834092305e-06, "loss": 0.6234, "step": 5268 }, { "epoch": 0.55, "grad_norm": 2.031240480955804, "learning_rate": 4.471554165026251e-06, "loss": 0.6807, "step": 5269 }, { "epoch": 0.55, "grad_norm": 1.9289143528508108, "learning_rate": 4.46988020654562e-06, "loss": 0.6729, "step": 5270 }, { "epoch": 0.55, "grad_norm": 2.0153197310458943, "learning_rate": 4.4682063081570885e-06, "loss": 0.631, "step": 5271 }, { "epoch": 0.55, "grad_norm": 1.8926835051803474, "learning_rate": 4.466532470050403e-06, "loss": 0.6079, "step": 5272 }, { "epoch": 0.55, "grad_norm": 1.9793089069024377, "learning_rate": 4.464858692415304e-06, "loss": 0.6527, "step": 5273 }, { "epoch": 0.55, "grad_norm": 1.9025404029621105, "learning_rate": 4.463184975441522e-06, "loss": 0.6627, "step": 5274 }, { "epoch": 0.55, "grad_norm": 1.9089159496126418, "learning_rate": 4.461511319318782e-06, "loss": 0.6441, "step": 5275 }, { "epoch": 0.55, "grad_norm": 1.904444956313747, "learning_rate": 4.4598377242368026e-06, "loss": 0.6431, "step": 5276 }, { "epoch": 0.55, "grad_norm": 1.9622376326034894, "learning_rate": 4.458164190385297e-06, "loss": 0.6903, "step": 5277 }, { "epoch": 0.55, "grad_norm": 1.8203493701420281, "learning_rate": 4.456490717953969e-06, "loss": 0.5699, "step": 5278 }, { "epoch": 0.55, "grad_norm": 1.7996933125103711, "learning_rate": 4.454817307132515e-06, "loss": 0.5578, "step": 5279 }, { "epoch": 0.55, "grad_norm": 1.8862630623229697, "learning_rate": 4.4531439581106295e-06, "loss": 0.6777, "step": 5280 }, { "epoch": 0.55, "grad_norm": 1.9189441415408823, "learning_rate": 4.451470671077991e-06, "loss": 0.6002, "step": 5281 }, { "epoch": 0.55, "grad_norm": 1.7339129039063892, "learning_rate": 4.449797446224279e-06, "loss": 0.6599, "step": 5282 }, { "epoch": 0.55, "grad_norm": 2.141150906787522, "learning_rate": 4.4481242837391615e-06, "loss": 0.6243, "step": 5283 }, { "epoch": 0.55, "grad_norm": 2.006503116981974, "learning_rate": 4.446451183812302e-06, "loss": 0.5795, "step": 5284 }, { "epoch": 0.55, "grad_norm": 2.045139071894576, "learning_rate": 4.4447781466333565e-06, "loss": 0.5821, "step": 5285 }, { "epoch": 0.55, "grad_norm": 1.8897413641576013, "learning_rate": 4.4431051723919725e-06, "loss": 0.4936, "step": 5286 }, { "epoch": 0.55, "grad_norm": 1.8631944101350673, "learning_rate": 4.441432261277794e-06, "loss": 0.5666, "step": 5287 }, { "epoch": 0.55, "grad_norm": 1.8750818481890121, "learning_rate": 4.43975941348045e-06, "loss": 0.6188, "step": 5288 }, { "epoch": 0.55, "grad_norm": 1.7830177570624304, "learning_rate": 4.43808662918957e-06, "loss": 0.5248, "step": 5289 }, { "epoch": 0.55, "grad_norm": 1.9469663022421002, "learning_rate": 4.436413908594773e-06, "loss": 0.699, "step": 5290 }, { "epoch": 0.55, "grad_norm": 1.9735300322889926, "learning_rate": 4.434741251885671e-06, "loss": 0.6604, "step": 5291 }, { "epoch": 0.55, "grad_norm": 1.7706573081264123, "learning_rate": 4.433068659251871e-06, "loss": 0.8055, "step": 5292 }, { "epoch": 0.55, "grad_norm": 2.1555746130188567, "learning_rate": 4.43139613088297e-06, "loss": 0.6699, "step": 5293 }, { "epoch": 0.55, "grad_norm": 1.9824503792487942, "learning_rate": 4.429723666968559e-06, "loss": 0.7092, "step": 5294 }, { "epoch": 0.55, "grad_norm": 2.1311700772834556, "learning_rate": 4.4280512676982215e-06, "loss": 0.7339, "step": 5295 }, { "epoch": 0.55, "grad_norm": 1.7811696362624156, "learning_rate": 4.426378933261532e-06, "loss": 0.6008, "step": 5296 }, { "epoch": 0.55, "grad_norm": 1.7760256644912769, "learning_rate": 4.424706663848059e-06, "loss": 0.6531, "step": 5297 }, { "epoch": 0.55, "grad_norm": 1.7122666917208644, "learning_rate": 4.423034459647365e-06, "loss": 0.6768, "step": 5298 }, { "epoch": 0.55, "grad_norm": 2.1077740469187316, "learning_rate": 4.421362320849003e-06, "loss": 0.7002, "step": 5299 }, { "epoch": 0.55, "grad_norm": 2.1373646086172955, "learning_rate": 4.419690247642521e-06, "loss": 0.5956, "step": 5300 }, { "epoch": 0.55, "grad_norm": 1.8105651786972892, "learning_rate": 4.418018240217457e-06, "loss": 0.6231, "step": 5301 }, { "epoch": 0.55, "grad_norm": 1.7954524837112216, "learning_rate": 4.416346298763343e-06, "loss": 0.5767, "step": 5302 }, { "epoch": 0.55, "grad_norm": 1.797535057274918, "learning_rate": 4.414674423469702e-06, "loss": 0.5235, "step": 5303 }, { "epoch": 0.55, "grad_norm": 1.8580491980634726, "learning_rate": 4.413002614526052e-06, "loss": 0.6158, "step": 5304 }, { "epoch": 0.55, "grad_norm": 2.1384624398890035, "learning_rate": 4.411330872121901e-06, "loss": 0.638, "step": 5305 }, { "epoch": 0.55, "grad_norm": 1.838188572332752, "learning_rate": 4.4096591964467515e-06, "loss": 0.5328, "step": 5306 }, { "epoch": 0.55, "grad_norm": 2.00835612910002, "learning_rate": 4.407987587690097e-06, "loss": 0.6725, "step": 5307 }, { "epoch": 0.55, "grad_norm": 1.9969651486276907, "learning_rate": 4.406316046041423e-06, "loss": 0.7185, "step": 5308 }, { "epoch": 0.55, "grad_norm": 2.1341862742091475, "learning_rate": 4.404644571690208e-06, "loss": 0.713, "step": 5309 }, { "epoch": 0.55, "grad_norm": 1.9766606231136952, "learning_rate": 4.402973164825927e-06, "loss": 0.5984, "step": 5310 }, { "epoch": 0.55, "grad_norm": 1.8695986242532783, "learning_rate": 4.401301825638039e-06, "loss": 0.6824, "step": 5311 }, { "epoch": 0.55, "grad_norm": 2.025287723277109, "learning_rate": 4.399630554316002e-06, "loss": 0.5779, "step": 5312 }, { "epoch": 0.55, "grad_norm": 1.843453109578057, "learning_rate": 4.397959351049263e-06, "loss": 0.5695, "step": 5313 }, { "epoch": 0.55, "grad_norm": 1.9383448410386213, "learning_rate": 4.396288216027264e-06, "loss": 0.5474, "step": 5314 }, { "epoch": 0.55, "grad_norm": 1.8430720816288715, "learning_rate": 4.394617149439435e-06, "loss": 0.6376, "step": 5315 }, { "epoch": 0.55, "grad_norm": 1.8636058676938005, "learning_rate": 4.392946151475204e-06, "loss": 0.6708, "step": 5316 }, { "epoch": 0.55, "grad_norm": 1.8075379505938336, "learning_rate": 4.391275222323989e-06, "loss": 0.623, "step": 5317 }, { "epoch": 0.55, "grad_norm": 3.282958892491627, "learning_rate": 4.389604362175194e-06, "loss": 0.6761, "step": 5318 }, { "epoch": 0.55, "grad_norm": 1.9314202760968853, "learning_rate": 4.387933571218224e-06, "loss": 0.6157, "step": 5319 }, { "epoch": 0.55, "grad_norm": 1.8084471261587793, "learning_rate": 4.386262849642474e-06, "loss": 0.5921, "step": 5320 }, { "epoch": 0.55, "grad_norm": 1.7878979830871655, "learning_rate": 4.384592197637327e-06, "loss": 0.6585, "step": 5321 }, { "epoch": 0.55, "grad_norm": 1.7272057096771558, "learning_rate": 4.382921615392162e-06, "loss": 0.6154, "step": 5322 }, { "epoch": 0.55, "grad_norm": 1.7715100059111006, "learning_rate": 4.381251103096349e-06, "loss": 0.5159, "step": 5323 }, { "epoch": 0.55, "grad_norm": 1.8160415729278552, "learning_rate": 4.379580660939253e-06, "loss": 0.6188, "step": 5324 }, { "epoch": 0.55, "grad_norm": 2.0714128617370844, "learning_rate": 4.377910289110222e-06, "loss": 0.6339, "step": 5325 }, { "epoch": 0.55, "grad_norm": 1.7709774757228993, "learning_rate": 4.376239987798606e-06, "loss": 0.5522, "step": 5326 }, { "epoch": 0.55, "grad_norm": 1.6953184185859074, "learning_rate": 4.3745697571937434e-06, "loss": 0.6141, "step": 5327 }, { "epoch": 0.55, "grad_norm": 2.0788554843615246, "learning_rate": 4.372899597484963e-06, "loss": 0.6108, "step": 5328 }, { "epoch": 0.55, "grad_norm": 1.8961589961491185, "learning_rate": 4.371229508861588e-06, "loss": 0.6662, "step": 5329 }, { "epoch": 0.55, "grad_norm": 1.7425613651137846, "learning_rate": 4.369559491512932e-06, "loss": 0.5696, "step": 5330 }, { "epoch": 0.55, "grad_norm": 1.7939534103840276, "learning_rate": 4.367889545628301e-06, "loss": 0.5928, "step": 5331 }, { "epoch": 0.55, "grad_norm": 1.8404672607265415, "learning_rate": 4.366219671396995e-06, "loss": 0.607, "step": 5332 }, { "epoch": 0.55, "grad_norm": 1.9584221320985873, "learning_rate": 4.364549869008299e-06, "loss": 0.6284, "step": 5333 }, { "epoch": 0.55, "grad_norm": 1.9320337232975733, "learning_rate": 4.362880138651498e-06, "loss": 0.7023, "step": 5334 }, { "epoch": 0.55, "grad_norm": 1.8876641008267565, "learning_rate": 4.361210480515865e-06, "loss": 0.61, "step": 5335 }, { "epoch": 0.55, "grad_norm": 1.7938844774124776, "learning_rate": 4.3595408947906644e-06, "loss": 0.7137, "step": 5336 }, { "epoch": 0.55, "grad_norm": 1.8127042217334888, "learning_rate": 4.3578713816651535e-06, "loss": 0.5189, "step": 5337 }, { "epoch": 0.55, "grad_norm": 1.7419189776304915, "learning_rate": 4.356201941328582e-06, "loss": 0.6613, "step": 5338 }, { "epoch": 0.55, "grad_norm": 1.8832170874848826, "learning_rate": 4.354532573970191e-06, "loss": 0.6425, "step": 5339 }, { "epoch": 0.56, "grad_norm": 1.821648531778137, "learning_rate": 4.352863279779211e-06, "loss": 0.6567, "step": 5340 }, { "epoch": 0.56, "grad_norm": 1.8233645260795328, "learning_rate": 4.351194058944866e-06, "loss": 0.5911, "step": 5341 }, { "epoch": 0.56, "grad_norm": 1.9133087513735505, "learning_rate": 4.349524911656373e-06, "loss": 0.6188, "step": 5342 }, { "epoch": 0.56, "grad_norm": 1.672726443042582, "learning_rate": 4.34785583810294e-06, "loss": 0.5733, "step": 5343 }, { "epoch": 0.56, "grad_norm": 1.9222023216966218, "learning_rate": 4.346186838473764e-06, "loss": 0.6429, "step": 5344 }, { "epoch": 0.56, "grad_norm": 1.757050865618816, "learning_rate": 4.344517912958036e-06, "loss": 0.6731, "step": 5345 }, { "epoch": 0.56, "grad_norm": 1.7741802896340304, "learning_rate": 4.342849061744939e-06, "loss": 0.6015, "step": 5346 }, { "epoch": 0.56, "grad_norm": 1.7665348586924297, "learning_rate": 4.341180285023648e-06, "loss": 0.5387, "step": 5347 }, { "epoch": 0.56, "grad_norm": 1.9006331306865378, "learning_rate": 4.339511582983325e-06, "loss": 0.6501, "step": 5348 }, { "epoch": 0.56, "grad_norm": 2.171585780899604, "learning_rate": 4.337842955813129e-06, "loss": 0.6288, "step": 5349 }, { "epoch": 0.56, "grad_norm": 1.7377998975322286, "learning_rate": 4.336174403702208e-06, "loss": 0.6438, "step": 5350 }, { "epoch": 0.56, "grad_norm": 1.9683352192208223, "learning_rate": 4.334505926839702e-06, "loss": 0.6363, "step": 5351 }, { "epoch": 0.56, "grad_norm": 1.8374943205843048, "learning_rate": 4.332837525414743e-06, "loss": 0.5948, "step": 5352 }, { "epoch": 0.56, "grad_norm": 1.7886524450076886, "learning_rate": 4.331169199616453e-06, "loss": 0.6511, "step": 5353 }, { "epoch": 0.56, "grad_norm": 2.0533622511419374, "learning_rate": 4.329500949633947e-06, "loss": 0.6977, "step": 5354 }, { "epoch": 0.56, "grad_norm": 2.0519868425621772, "learning_rate": 4.32783277565633e-06, "loss": 0.7681, "step": 5355 }, { "epoch": 0.56, "grad_norm": 1.9748725112796992, "learning_rate": 4.326164677872698e-06, "loss": 0.626, "step": 5356 }, { "epoch": 0.56, "grad_norm": 1.8229150751926968, "learning_rate": 4.324496656472141e-06, "loss": 0.6979, "step": 5357 }, { "epoch": 0.56, "grad_norm": 1.8616048764016284, "learning_rate": 4.322828711643737e-06, "loss": 0.5955, "step": 5358 }, { "epoch": 0.56, "grad_norm": 1.7219793227650209, "learning_rate": 4.32116084357656e-06, "loss": 0.6187, "step": 5359 }, { "epoch": 0.56, "grad_norm": 1.7727210332779848, "learning_rate": 4.31949305245967e-06, "loss": 0.6097, "step": 5360 }, { "epoch": 0.56, "grad_norm": 1.7854736896677759, "learning_rate": 4.317825338482123e-06, "loss": 0.667, "step": 5361 }, { "epoch": 0.56, "grad_norm": 2.0495980515372945, "learning_rate": 4.31615770183296e-06, "loss": 0.631, "step": 5362 }, { "epoch": 0.56, "grad_norm": 2.0096774454763766, "learning_rate": 4.31449014270122e-06, "loss": 0.6482, "step": 5363 }, { "epoch": 0.56, "grad_norm": 2.2165036324005603, "learning_rate": 4.312822661275929e-06, "loss": 0.7564, "step": 5364 }, { "epoch": 0.56, "grad_norm": 1.857956424239191, "learning_rate": 4.3111552577461066e-06, "loss": 0.592, "step": 5365 }, { "epoch": 0.56, "grad_norm": 2.0795780405956905, "learning_rate": 4.309487932300762e-06, "loss": 0.7031, "step": 5366 }, { "epoch": 0.56, "grad_norm": 1.9276173353067232, "learning_rate": 4.307820685128896e-06, "loss": 0.6087, "step": 5367 }, { "epoch": 0.56, "grad_norm": 1.6763284047956102, "learning_rate": 4.3061535164195e-06, "loss": 0.7254, "step": 5368 }, { "epoch": 0.56, "grad_norm": 2.0186762339298694, "learning_rate": 4.304486426361561e-06, "loss": 0.6859, "step": 5369 }, { "epoch": 0.56, "grad_norm": 1.8808991283500813, "learning_rate": 4.302819415144046e-06, "loss": 0.5656, "step": 5370 }, { "epoch": 0.56, "grad_norm": 1.894815287220838, "learning_rate": 4.301152482955926e-06, "loss": 0.6015, "step": 5371 }, { "epoch": 0.56, "grad_norm": 1.781652807228387, "learning_rate": 4.299485629986153e-06, "loss": 0.5619, "step": 5372 }, { "epoch": 0.56, "grad_norm": 1.9419047981480841, "learning_rate": 4.297818856423679e-06, "loss": 0.6505, "step": 5373 }, { "epoch": 0.56, "grad_norm": 2.0120179327646777, "learning_rate": 4.296152162457439e-06, "loss": 0.6558, "step": 5374 }, { "epoch": 0.56, "grad_norm": 1.8836238693944019, "learning_rate": 4.294485548276363e-06, "loss": 0.6515, "step": 5375 }, { "epoch": 0.56, "grad_norm": 1.8504429409168075, "learning_rate": 4.292819014069372e-06, "loss": 0.7023, "step": 5376 }, { "epoch": 0.56, "grad_norm": 2.0390665006750743, "learning_rate": 4.291152560025377e-06, "loss": 0.7807, "step": 5377 }, { "epoch": 0.56, "grad_norm": 1.7922684370026463, "learning_rate": 4.2894861863332785e-06, "loss": 0.6747, "step": 5378 }, { "epoch": 0.56, "grad_norm": 1.9449512984578232, "learning_rate": 4.2878198931819705e-06, "loss": 0.6697, "step": 5379 }, { "epoch": 0.56, "grad_norm": 2.0122903659992213, "learning_rate": 4.286153680760338e-06, "loss": 0.7135, "step": 5380 }, { "epoch": 0.56, "grad_norm": 1.9004201271206218, "learning_rate": 4.284487549257254e-06, "loss": 0.4542, "step": 5381 }, { "epoch": 0.56, "grad_norm": 1.672893473626159, "learning_rate": 4.282821498861585e-06, "loss": 0.5413, "step": 5382 }, { "epoch": 0.56, "grad_norm": 1.8436366187307252, "learning_rate": 4.281155529762186e-06, "loss": 0.6247, "step": 5383 }, { "epoch": 0.56, "grad_norm": 1.676623195970577, "learning_rate": 4.279489642147906e-06, "loss": 0.6366, "step": 5384 }, { "epoch": 0.56, "grad_norm": 1.8849736907682577, "learning_rate": 4.277823836207581e-06, "loss": 0.6423, "step": 5385 }, { "epoch": 0.56, "grad_norm": 1.6504346249070367, "learning_rate": 4.276158112130041e-06, "loss": 0.6051, "step": 5386 }, { "epoch": 0.56, "grad_norm": 1.900270675572181, "learning_rate": 4.274492470104105e-06, "loss": 0.7088, "step": 5387 }, { "epoch": 0.56, "grad_norm": 2.0141651357876653, "learning_rate": 4.272826910318581e-06, "loss": 0.6249, "step": 5388 }, { "epoch": 0.56, "grad_norm": 1.9112805090759801, "learning_rate": 4.271161432962273e-06, "loss": 0.6067, "step": 5389 }, { "epoch": 0.56, "grad_norm": 2.034156020188733, "learning_rate": 4.2694960382239705e-06, "loss": 0.644, "step": 5390 }, { "epoch": 0.56, "grad_norm": 1.9930605463073325, "learning_rate": 4.2678307262924575e-06, "loss": 0.6368, "step": 5391 }, { "epoch": 0.56, "grad_norm": 1.906858020595567, "learning_rate": 4.266165497356503e-06, "loss": 0.5987, "step": 5392 }, { "epoch": 0.56, "grad_norm": 1.8406121682236543, "learning_rate": 4.264500351604872e-06, "loss": 0.6536, "step": 5393 }, { "epoch": 0.56, "grad_norm": 1.8254428663126403, "learning_rate": 4.262835289226318e-06, "loss": 0.5699, "step": 5394 }, { "epoch": 0.56, "grad_norm": 1.9270412949134141, "learning_rate": 4.261170310409586e-06, "loss": 0.7475, "step": 5395 }, { "epoch": 0.56, "grad_norm": 1.8066282234062603, "learning_rate": 4.259505415343411e-06, "loss": 0.6369, "step": 5396 }, { "epoch": 0.56, "grad_norm": 1.758284076012403, "learning_rate": 4.257840604216517e-06, "loss": 0.6009, "step": 5397 }, { "epoch": 0.56, "grad_norm": 1.6749754336124096, "learning_rate": 4.2561758772176244e-06, "loss": 0.6205, "step": 5398 }, { "epoch": 0.56, "grad_norm": 1.9275539454621302, "learning_rate": 4.254511234535432e-06, "loss": 0.5659, "step": 5399 }, { "epoch": 0.56, "grad_norm": 1.9011018789471603, "learning_rate": 4.252846676358641e-06, "loss": 0.6691, "step": 5400 }, { "epoch": 0.56, "grad_norm": 2.0596941216693483, "learning_rate": 4.251182202875938e-06, "loss": 0.6916, "step": 5401 }, { "epoch": 0.56, "grad_norm": 1.857656244516316, "learning_rate": 4.249517814276e-06, "loss": 0.6174, "step": 5402 }, { "epoch": 0.56, "grad_norm": 1.9254466566790163, "learning_rate": 4.247853510747495e-06, "loss": 0.586, "step": 5403 }, { "epoch": 0.56, "grad_norm": 1.7917004971343924, "learning_rate": 4.2461892924790825e-06, "loss": 0.5538, "step": 5404 }, { "epoch": 0.56, "grad_norm": 1.9172145911881517, "learning_rate": 4.244525159659409e-06, "loss": 0.6444, "step": 5405 }, { "epoch": 0.56, "grad_norm": 2.0655398744581377, "learning_rate": 4.2428611124771184e-06, "loss": 0.6404, "step": 5406 }, { "epoch": 0.56, "grad_norm": 1.8304720049482348, "learning_rate": 4.241197151120834e-06, "loss": 0.6175, "step": 5407 }, { "epoch": 0.56, "grad_norm": 1.81762951481684, "learning_rate": 4.239533275779177e-06, "loss": 0.6268, "step": 5408 }, { "epoch": 0.56, "grad_norm": 1.9408357676082384, "learning_rate": 4.237869486640758e-06, "loss": 0.6111, "step": 5409 }, { "epoch": 0.56, "grad_norm": 2.486652228429193, "learning_rate": 4.236205783894176e-06, "loss": 0.6691, "step": 5410 }, { "epoch": 0.56, "grad_norm": 1.6509217437191293, "learning_rate": 4.2345421677280215e-06, "loss": 0.6332, "step": 5411 }, { "epoch": 0.56, "grad_norm": 1.8758194011046228, "learning_rate": 4.232878638330877e-06, "loss": 0.6156, "step": 5412 }, { "epoch": 0.56, "grad_norm": 1.9285754226908345, "learning_rate": 4.231215195891311e-06, "loss": 0.6819, "step": 5413 }, { "epoch": 0.56, "grad_norm": 2.248208337442681, "learning_rate": 4.229551840597884e-06, "loss": 0.6568, "step": 5414 }, { "epoch": 0.56, "grad_norm": 1.7753810739067684, "learning_rate": 4.227888572639148e-06, "loss": 0.5761, "step": 5415 }, { "epoch": 0.56, "grad_norm": 2.0888773685296074, "learning_rate": 4.226225392203641e-06, "loss": 0.7153, "step": 5416 }, { "epoch": 0.56, "grad_norm": 1.7249346313721021, "learning_rate": 4.224562299479897e-06, "loss": 0.547, "step": 5417 }, { "epoch": 0.56, "grad_norm": 1.711135526070868, "learning_rate": 4.222899294656437e-06, "loss": 0.6079, "step": 5418 }, { "epoch": 0.56, "grad_norm": 1.9324287494379544, "learning_rate": 4.221236377921771e-06, "loss": 0.5704, "step": 5419 }, { "epoch": 0.56, "grad_norm": 1.7926007338434826, "learning_rate": 4.219573549464403e-06, "loss": 0.5625, "step": 5420 }, { "epoch": 0.56, "grad_norm": 1.9062785913816724, "learning_rate": 4.2179108094728185e-06, "loss": 0.6407, "step": 5421 }, { "epoch": 0.56, "grad_norm": 1.7912130192025735, "learning_rate": 4.2162481581355015e-06, "loss": 0.5941, "step": 5422 }, { "epoch": 0.56, "grad_norm": 1.919288046883698, "learning_rate": 4.214585595640923e-06, "loss": 0.6401, "step": 5423 }, { "epoch": 0.56, "grad_norm": 1.8434935010857791, "learning_rate": 4.212923122177545e-06, "loss": 0.5237, "step": 5424 }, { "epoch": 0.56, "grad_norm": 2.0484794450464237, "learning_rate": 4.211260737933816e-06, "loss": 0.6333, "step": 5425 }, { "epoch": 0.56, "grad_norm": 2.0744597871080273, "learning_rate": 4.209598443098179e-06, "loss": 0.572, "step": 5426 }, { "epoch": 0.56, "grad_norm": 1.912927529947007, "learning_rate": 4.2079362378590625e-06, "loss": 0.6074, "step": 5427 }, { "epoch": 0.56, "grad_norm": 1.815141363610265, "learning_rate": 4.206274122404891e-06, "loss": 0.5915, "step": 5428 }, { "epoch": 0.56, "grad_norm": 1.7372331606973053, "learning_rate": 4.204612096924069e-06, "loss": 0.4789, "step": 5429 }, { "epoch": 0.56, "grad_norm": 1.8298977661303382, "learning_rate": 4.202950161605e-06, "loss": 0.6594, "step": 5430 }, { "epoch": 0.56, "grad_norm": 1.9098142487861431, "learning_rate": 4.201288316636073e-06, "loss": 0.6554, "step": 5431 }, { "epoch": 0.56, "grad_norm": 2.0474500584686406, "learning_rate": 4.199626562205668e-06, "loss": 0.6762, "step": 5432 }, { "epoch": 0.56, "grad_norm": 1.64859334265512, "learning_rate": 4.197964898502154e-06, "loss": 0.6535, "step": 5433 }, { "epoch": 0.56, "grad_norm": 2.0230013593164036, "learning_rate": 4.1963033257138904e-06, "loss": 0.5574, "step": 5434 }, { "epoch": 0.56, "grad_norm": 1.7973657989903222, "learning_rate": 4.194641844029227e-06, "loss": 0.6348, "step": 5435 }, { "epoch": 0.57, "grad_norm": 1.949821896226258, "learning_rate": 4.1929804536365e-06, "loss": 0.7132, "step": 5436 }, { "epoch": 0.57, "grad_norm": 1.8046228407634757, "learning_rate": 4.191319154724038e-06, "loss": 0.7059, "step": 5437 }, { "epoch": 0.57, "grad_norm": 1.8969165232037644, "learning_rate": 4.189657947480159e-06, "loss": 0.5655, "step": 5438 }, { "epoch": 0.57, "grad_norm": 1.803066198351578, "learning_rate": 4.187996832093171e-06, "loss": 0.5827, "step": 5439 }, { "epoch": 0.57, "grad_norm": 1.9186713994335214, "learning_rate": 4.186335808751369e-06, "loss": 0.6451, "step": 5440 }, { "epoch": 0.57, "grad_norm": 1.9254356727843782, "learning_rate": 4.184674877643042e-06, "loss": 0.6686, "step": 5441 }, { "epoch": 0.57, "grad_norm": 2.0999174784887145, "learning_rate": 4.183014038956465e-06, "loss": 0.6833, "step": 5442 }, { "epoch": 0.57, "grad_norm": 2.037687236980084, "learning_rate": 4.181353292879904e-06, "loss": 0.7428, "step": 5443 }, { "epoch": 0.57, "grad_norm": 1.7644928803447903, "learning_rate": 4.179692639601612e-06, "loss": 0.7074, "step": 5444 }, { "epoch": 0.57, "grad_norm": 1.9200170976748951, "learning_rate": 4.178032079309836e-06, "loss": 0.684, "step": 5445 }, { "epoch": 0.57, "grad_norm": 1.9507059714006074, "learning_rate": 4.176371612192808e-06, "loss": 0.6933, "step": 5446 }, { "epoch": 0.57, "grad_norm": 1.7315185417192958, "learning_rate": 4.174711238438752e-06, "loss": 0.6032, "step": 5447 }, { "epoch": 0.57, "grad_norm": 1.9013248224515067, "learning_rate": 4.173050958235882e-06, "loss": 0.6588, "step": 5448 }, { "epoch": 0.57, "grad_norm": 1.8963272377797982, "learning_rate": 4.171390771772399e-06, "loss": 0.5986, "step": 5449 }, { "epoch": 0.57, "grad_norm": 1.9082415935624142, "learning_rate": 4.169730679236496e-06, "loss": 0.6572, "step": 5450 }, { "epoch": 0.57, "grad_norm": 1.937334868978178, "learning_rate": 4.168070680816351e-06, "loss": 0.6632, "step": 5451 }, { "epoch": 0.57, "grad_norm": 1.8696062932881234, "learning_rate": 4.166410776700137e-06, "loss": 0.7399, "step": 5452 }, { "epoch": 0.57, "grad_norm": 2.1429005075050083, "learning_rate": 4.164750967076012e-06, "loss": 0.7346, "step": 5453 }, { "epoch": 0.57, "grad_norm": 1.743702093443798, "learning_rate": 4.163091252132126e-06, "loss": 0.6522, "step": 5454 }, { "epoch": 0.57, "grad_norm": 1.8587073840092287, "learning_rate": 4.1614316320566174e-06, "loss": 0.6181, "step": 5455 }, { "epoch": 0.57, "grad_norm": 1.8675162521113273, "learning_rate": 4.159772107037613e-06, "loss": 0.6114, "step": 5456 }, { "epoch": 0.57, "grad_norm": 1.912758383634301, "learning_rate": 4.158112677263231e-06, "loss": 0.6577, "step": 5457 }, { "epoch": 0.57, "grad_norm": 1.836405720086498, "learning_rate": 4.156453342921573e-06, "loss": 0.7016, "step": 5458 }, { "epoch": 0.57, "grad_norm": 1.9065901889070593, "learning_rate": 4.154794104200737e-06, "loss": 0.6624, "step": 5459 }, { "epoch": 0.57, "grad_norm": 1.9075230574920552, "learning_rate": 4.153134961288807e-06, "loss": 0.6226, "step": 5460 }, { "epoch": 0.57, "grad_norm": 1.8711562433698041, "learning_rate": 4.151475914373856e-06, "loss": 0.6264, "step": 5461 }, { "epoch": 0.57, "grad_norm": 1.8048208435526953, "learning_rate": 4.149816963643947e-06, "loss": 0.593, "step": 5462 }, { "epoch": 0.57, "grad_norm": 1.6991033271060485, "learning_rate": 4.1481581092871305e-06, "loss": 0.5303, "step": 5463 }, { "epoch": 0.57, "grad_norm": 1.7456484367991891, "learning_rate": 4.146499351491448e-06, "loss": 0.5549, "step": 5464 }, { "epoch": 0.57, "grad_norm": 1.7845115021251257, "learning_rate": 4.144840690444931e-06, "loss": 0.6408, "step": 5465 }, { "epoch": 0.57, "grad_norm": 2.07574862295293, "learning_rate": 4.143182126335594e-06, "loss": 0.6681, "step": 5466 }, { "epoch": 0.57, "grad_norm": 1.843927894099637, "learning_rate": 4.1415236593514454e-06, "loss": 0.5944, "step": 5467 }, { "epoch": 0.57, "grad_norm": 1.7210896919710381, "learning_rate": 4.139865289680485e-06, "loss": 0.6904, "step": 5468 }, { "epoch": 0.57, "grad_norm": 2.1143978026959203, "learning_rate": 4.138207017510696e-06, "loss": 0.7319, "step": 5469 }, { "epoch": 0.57, "grad_norm": 1.9674427272555244, "learning_rate": 4.136548843030052e-06, "loss": 0.6494, "step": 5470 }, { "epoch": 0.57, "grad_norm": 1.753820948119458, "learning_rate": 4.134890766426521e-06, "loss": 0.6728, "step": 5471 }, { "epoch": 0.57, "grad_norm": 1.6638752593393304, "learning_rate": 4.133232787888052e-06, "loss": 0.5565, "step": 5472 }, { "epoch": 0.57, "grad_norm": 2.1632980049827677, "learning_rate": 4.131574907602586e-06, "loss": 0.6462, "step": 5473 }, { "epoch": 0.57, "grad_norm": 2.0407862239585697, "learning_rate": 4.129917125758054e-06, "loss": 0.746, "step": 5474 }, { "epoch": 0.57, "grad_norm": 1.854988687102971, "learning_rate": 4.128259442542375e-06, "loss": 0.666, "step": 5475 }, { "epoch": 0.57, "grad_norm": 2.0598051706252316, "learning_rate": 4.126601858143457e-06, "loss": 0.6856, "step": 5476 }, { "epoch": 0.57, "grad_norm": 2.2416076777807854, "learning_rate": 4.124944372749197e-06, "loss": 0.7149, "step": 5477 }, { "epoch": 0.57, "grad_norm": 1.9940483790212515, "learning_rate": 4.12328698654748e-06, "loss": 0.678, "step": 5478 }, { "epoch": 0.57, "grad_norm": 1.9448515255469294, "learning_rate": 4.12162969972618e-06, "loss": 0.6333, "step": 5479 }, { "epoch": 0.57, "grad_norm": 1.7150709469269039, "learning_rate": 4.119972512473161e-06, "loss": 0.6189, "step": 5480 }, { "epoch": 0.57, "grad_norm": 1.8341509373262923, "learning_rate": 4.118315424976272e-06, "loss": 0.5073, "step": 5481 }, { "epoch": 0.57, "grad_norm": 1.9476966860099154, "learning_rate": 4.116658437423355e-06, "loss": 0.5557, "step": 5482 }, { "epoch": 0.57, "grad_norm": 1.6553886987138464, "learning_rate": 4.115001550002241e-06, "loss": 0.5275, "step": 5483 }, { "epoch": 0.57, "grad_norm": 1.9821949785533268, "learning_rate": 4.113344762900745e-06, "loss": 0.6504, "step": 5484 }, { "epoch": 0.57, "grad_norm": 1.7156562932709354, "learning_rate": 4.111688076306673e-06, "loss": 0.5742, "step": 5485 }, { "epoch": 0.57, "grad_norm": 2.0232269740630366, "learning_rate": 4.11003149040782e-06, "loss": 0.6493, "step": 5486 }, { "epoch": 0.57, "grad_norm": 1.8519773132537132, "learning_rate": 4.108375005391972e-06, "loss": 0.6184, "step": 5487 }, { "epoch": 0.57, "grad_norm": 1.793916358582051, "learning_rate": 4.106718621446899e-06, "loss": 0.5546, "step": 5488 }, { "epoch": 0.57, "grad_norm": 2.036030057119493, "learning_rate": 4.105062338760361e-06, "loss": 0.5802, "step": 5489 }, { "epoch": 0.57, "grad_norm": 1.7927973737970935, "learning_rate": 4.103406157520108e-06, "loss": 0.6194, "step": 5490 }, { "epoch": 0.57, "grad_norm": 1.6799135980861009, "learning_rate": 4.101750077913878e-06, "loss": 0.5536, "step": 5491 }, { "epoch": 0.57, "grad_norm": 2.009000457580272, "learning_rate": 4.100094100129396e-06, "loss": 0.7814, "step": 5492 }, { "epoch": 0.57, "grad_norm": 2.2549031104223727, "learning_rate": 4.098438224354377e-06, "loss": 0.628, "step": 5493 }, { "epoch": 0.57, "grad_norm": 1.7853953706906511, "learning_rate": 4.096782450776526e-06, "loss": 0.6338, "step": 5494 }, { "epoch": 0.57, "grad_norm": 1.9185594643941042, "learning_rate": 4.09512677958353e-06, "loss": 0.5703, "step": 5495 }, { "epoch": 0.57, "grad_norm": 1.8102601216846244, "learning_rate": 4.093471210963072e-06, "loss": 0.5525, "step": 5496 }, { "epoch": 0.57, "grad_norm": 1.9988743835333525, "learning_rate": 4.091815745102818e-06, "loss": 0.6161, "step": 5497 }, { "epoch": 0.57, "grad_norm": 1.9896802093368413, "learning_rate": 4.0901603821904264e-06, "loss": 0.6264, "step": 5498 }, { "epoch": 0.57, "grad_norm": 1.7811960844124857, "learning_rate": 4.088505122413542e-06, "loss": 0.6219, "step": 5499 }, { "epoch": 0.57, "grad_norm": 1.642234193822947, "learning_rate": 4.086849965959797e-06, "loss": 0.5674, "step": 5500 }, { "epoch": 0.57, "grad_norm": 2.1360764455341803, "learning_rate": 4.0851949130168135e-06, "loss": 0.763, "step": 5501 }, { "epoch": 0.57, "grad_norm": 1.8119817360590291, "learning_rate": 4.083539963772202e-06, "loss": 0.6736, "step": 5502 }, { "epoch": 0.57, "grad_norm": 2.0075224162284337, "learning_rate": 4.081885118413557e-06, "loss": 0.6061, "step": 5503 }, { "epoch": 0.57, "grad_norm": 1.9394585405022449, "learning_rate": 4.0802303771284685e-06, "loss": 0.6544, "step": 5504 }, { "epoch": 0.57, "grad_norm": 1.9226282044737457, "learning_rate": 4.078575740104508e-06, "loss": 0.6074, "step": 5505 }, { "epoch": 0.57, "grad_norm": 1.8428113935987287, "learning_rate": 4.07692120752924e-06, "loss": 0.5472, "step": 5506 }, { "epoch": 0.57, "grad_norm": 1.944596305847222, "learning_rate": 4.075266779590214e-06, "loss": 0.7045, "step": 5507 }, { "epoch": 0.57, "grad_norm": 2.164562137124973, "learning_rate": 4.073612456474969e-06, "loss": 0.5906, "step": 5508 }, { "epoch": 0.57, "grad_norm": 1.632993502982212, "learning_rate": 4.071958238371036e-06, "loss": 0.5673, "step": 5509 }, { "epoch": 0.57, "grad_norm": 1.9024162945461678, "learning_rate": 4.070304125465922e-06, "loss": 0.5853, "step": 5510 }, { "epoch": 0.57, "grad_norm": 1.9007911838792717, "learning_rate": 4.068650117947135e-06, "loss": 0.6144, "step": 5511 }, { "epoch": 0.57, "grad_norm": 1.9514097246456006, "learning_rate": 4.066996216002167e-06, "loss": 0.6627, "step": 5512 }, { "epoch": 0.57, "grad_norm": 2.018105134211878, "learning_rate": 4.065342419818495e-06, "loss": 0.6744, "step": 5513 }, { "epoch": 0.57, "grad_norm": 1.885304977006811, "learning_rate": 4.063688729583587e-06, "loss": 0.7094, "step": 5514 }, { "epoch": 0.57, "grad_norm": 2.0783420987217838, "learning_rate": 4.0620351454848986e-06, "loss": 0.586, "step": 5515 }, { "epoch": 0.57, "grad_norm": 2.088674812109934, "learning_rate": 4.0603816677098735e-06, "loss": 0.648, "step": 5516 }, { "epoch": 0.57, "grad_norm": 2.0529187492775622, "learning_rate": 4.058728296445941e-06, "loss": 0.688, "step": 5517 }, { "epoch": 0.57, "grad_norm": 2.07021175838815, "learning_rate": 4.057075031880521e-06, "loss": 0.6682, "step": 5518 }, { "epoch": 0.57, "grad_norm": 1.9584875967230313, "learning_rate": 4.055421874201021e-06, "loss": 0.6779, "step": 5519 }, { "epoch": 0.57, "grad_norm": 1.9478184359951374, "learning_rate": 4.053768823594835e-06, "loss": 0.6618, "step": 5520 }, { "epoch": 0.57, "grad_norm": 1.9949873298042147, "learning_rate": 4.052115880249347e-06, "loss": 0.5795, "step": 5521 }, { "epoch": 0.57, "grad_norm": 1.9743169662323066, "learning_rate": 4.050463044351927e-06, "loss": 0.5705, "step": 5522 }, { "epoch": 0.57, "grad_norm": 1.9749688882641023, "learning_rate": 4.048810316089932e-06, "loss": 0.6088, "step": 5523 }, { "epoch": 0.57, "grad_norm": 1.8137222978800382, "learning_rate": 4.0471576956507106e-06, "loss": 0.6511, "step": 5524 }, { "epoch": 0.57, "grad_norm": 2.0162638592127324, "learning_rate": 4.045505183221594e-06, "loss": 0.6694, "step": 5525 }, { "epoch": 0.57, "grad_norm": 2.231644995017, "learning_rate": 4.043852778989906e-06, "loss": 0.7455, "step": 5526 }, { "epoch": 0.57, "grad_norm": 2.1407694111031104, "learning_rate": 4.042200483142955e-06, "loss": 0.574, "step": 5527 }, { "epoch": 0.57, "grad_norm": 1.9598142070636422, "learning_rate": 4.040548295868039e-06, "loss": 0.5821, "step": 5528 }, { "epoch": 0.57, "grad_norm": 1.9740464430151743, "learning_rate": 4.038896217352442e-06, "loss": 0.6735, "step": 5529 }, { "epoch": 0.57, "grad_norm": 1.6941300405779327, "learning_rate": 4.037244247783437e-06, "loss": 0.6544, "step": 5530 }, { "epoch": 0.57, "grad_norm": 1.8565441269875411, "learning_rate": 4.035592387348285e-06, "loss": 0.6343, "step": 5531 }, { "epoch": 0.58, "grad_norm": 2.0685840879545663, "learning_rate": 4.033940636234233e-06, "loss": 0.6154, "step": 5532 }, { "epoch": 0.58, "grad_norm": 1.8535815659947346, "learning_rate": 4.0322889946285146e-06, "loss": 0.5411, "step": 5533 }, { "epoch": 0.58, "grad_norm": 1.8589830851032711, "learning_rate": 4.030637462718354e-06, "loss": 0.5412, "step": 5534 }, { "epoch": 0.58, "grad_norm": 1.7381406224431815, "learning_rate": 4.028986040690963e-06, "loss": 0.5865, "step": 5535 }, { "epoch": 0.58, "grad_norm": 1.8576683960163518, "learning_rate": 4.027334728733539e-06, "loss": 0.6374, "step": 5536 }, { "epoch": 0.58, "grad_norm": 1.9290621874870129, "learning_rate": 4.0256835270332676e-06, "loss": 0.6227, "step": 5537 }, { "epoch": 0.58, "grad_norm": 1.9821450643649243, "learning_rate": 4.024032435777321e-06, "loss": 0.5536, "step": 5538 }, { "epoch": 0.58, "grad_norm": 1.7135125156331292, "learning_rate": 4.022381455152863e-06, "loss": 0.6002, "step": 5539 }, { "epoch": 0.58, "grad_norm": 1.9281196949391677, "learning_rate": 4.020730585347038e-06, "loss": 0.7845, "step": 5540 }, { "epoch": 0.58, "grad_norm": 2.013273341855397, "learning_rate": 4.019079826546982e-06, "loss": 0.6069, "step": 5541 }, { "epoch": 0.58, "grad_norm": 1.8903020483922408, "learning_rate": 4.017429178939819e-06, "loss": 0.6474, "step": 5542 }, { "epoch": 0.58, "grad_norm": 1.980021206707033, "learning_rate": 4.01577864271266e-06, "loss": 0.6513, "step": 5543 }, { "epoch": 0.58, "grad_norm": 1.8662321684702983, "learning_rate": 4.0141282180526e-06, "loss": 0.5716, "step": 5544 }, { "epoch": 0.58, "grad_norm": 1.9032406858177675, "learning_rate": 4.012477905146726e-06, "loss": 0.6737, "step": 5545 }, { "epoch": 0.58, "grad_norm": 1.951128633499513, "learning_rate": 4.010827704182113e-06, "loss": 0.663, "step": 5546 }, { "epoch": 0.58, "grad_norm": 1.8364003831985076, "learning_rate": 4.009177615345816e-06, "loss": 0.6535, "step": 5547 }, { "epoch": 0.58, "grad_norm": 1.7446521841903204, "learning_rate": 4.007527638824883e-06, "loss": 0.5733, "step": 5548 }, { "epoch": 0.58, "grad_norm": 2.1231342278478005, "learning_rate": 4.005877774806349e-06, "loss": 0.6509, "step": 5549 }, { "epoch": 0.58, "grad_norm": 1.8927153794231493, "learning_rate": 4.004228023477236e-06, "loss": 0.6556, "step": 5550 }, { "epoch": 0.58, "grad_norm": 1.867765315475591, "learning_rate": 4.002578385024552e-06, "loss": 0.6168, "step": 5551 }, { "epoch": 0.58, "grad_norm": 1.8285478053225521, "learning_rate": 4.000928859635293e-06, "loss": 0.6157, "step": 5552 }, { "epoch": 0.58, "grad_norm": 1.7997858856785665, "learning_rate": 3.999279447496444e-06, "loss": 0.6565, "step": 5553 }, { "epoch": 0.58, "grad_norm": 2.0598783545296144, "learning_rate": 3.997630148794972e-06, "loss": 0.579, "step": 5554 }, { "epoch": 0.58, "grad_norm": 1.809587176377569, "learning_rate": 3.995980963717836e-06, "loss": 0.6207, "step": 5555 }, { "epoch": 0.58, "grad_norm": 1.921765056383696, "learning_rate": 3.994331892451981e-06, "loss": 0.6272, "step": 5556 }, { "epoch": 0.58, "grad_norm": 1.9168894037206115, "learning_rate": 3.992682935184338e-06, "loss": 0.552, "step": 5557 }, { "epoch": 0.58, "grad_norm": 1.9846547269975716, "learning_rate": 3.991034092101825e-06, "loss": 0.6359, "step": 5558 }, { "epoch": 0.58, "grad_norm": 1.9654747327693463, "learning_rate": 3.9893853633913485e-06, "loss": 0.7966, "step": 5559 }, { "epoch": 0.58, "grad_norm": 1.8248422292882072, "learning_rate": 3.987736749239804e-06, "loss": 0.694, "step": 5560 }, { "epoch": 0.58, "grad_norm": 1.7068304249889794, "learning_rate": 3.986088249834067e-06, "loss": 0.5808, "step": 5561 }, { "epoch": 0.58, "grad_norm": 1.9279785384262589, "learning_rate": 3.984439865361006e-06, "loss": 0.6398, "step": 5562 }, { "epoch": 0.58, "grad_norm": 1.8561935533701657, "learning_rate": 3.982791596007474e-06, "loss": 0.6948, "step": 5563 }, { "epoch": 0.58, "grad_norm": 2.037470040126886, "learning_rate": 3.981143441960312e-06, "loss": 0.6098, "step": 5564 }, { "epoch": 0.58, "grad_norm": 1.7159903024585526, "learning_rate": 3.979495403406349e-06, "loss": 0.6738, "step": 5565 }, { "epoch": 0.58, "grad_norm": 1.9450556424814869, "learning_rate": 3.977847480532399e-06, "loss": 0.6524, "step": 5566 }, { "epoch": 0.58, "grad_norm": 1.8435450586070443, "learning_rate": 3.976199673525263e-06, "loss": 0.6249, "step": 5567 }, { "epoch": 0.58, "grad_norm": 1.9349741783559788, "learning_rate": 3.974551982571732e-06, "loss": 0.7301, "step": 5568 }, { "epoch": 0.58, "grad_norm": 2.1214284817615923, "learning_rate": 3.9729044078585756e-06, "loss": 0.6747, "step": 5569 }, { "epoch": 0.58, "grad_norm": 1.9991903575884222, "learning_rate": 3.971256949572559e-06, "loss": 0.6091, "step": 5570 }, { "epoch": 0.58, "grad_norm": 1.962993073595112, "learning_rate": 3.969609607900431e-06, "loss": 0.5745, "step": 5571 }, { "epoch": 0.58, "grad_norm": 2.0994401626194543, "learning_rate": 3.967962383028927e-06, "loss": 0.5469, "step": 5572 }, { "epoch": 0.58, "grad_norm": 1.8638963329996499, "learning_rate": 3.966315275144769e-06, "loss": 0.6496, "step": 5573 }, { "epoch": 0.58, "grad_norm": 2.040176832504217, "learning_rate": 3.964668284434666e-06, "loss": 0.6288, "step": 5574 }, { "epoch": 0.58, "grad_norm": 2.022872445806577, "learning_rate": 3.963021411085315e-06, "loss": 0.6499, "step": 5575 }, { "epoch": 0.58, "grad_norm": 1.9155250142977756, "learning_rate": 3.9613746552834e-06, "loss": 0.5525, "step": 5576 }, { "epoch": 0.58, "grad_norm": 2.0230746897832876, "learning_rate": 3.959728017215585e-06, "loss": 0.5354, "step": 5577 }, { "epoch": 0.58, "grad_norm": 2.1162250972308643, "learning_rate": 3.958081497068528e-06, "loss": 0.6553, "step": 5578 }, { "epoch": 0.58, "grad_norm": 1.7692582229440683, "learning_rate": 3.9564350950288735e-06, "loss": 0.6553, "step": 5579 }, { "epoch": 0.58, "grad_norm": 1.8739200142352377, "learning_rate": 3.954788811283249e-06, "loss": 0.6072, "step": 5580 }, { "epoch": 0.58, "grad_norm": 1.7011178415660841, "learning_rate": 3.953142646018269e-06, "loss": 0.5354, "step": 5581 }, { "epoch": 0.58, "grad_norm": 1.8492767816456872, "learning_rate": 3.951496599420538e-06, "loss": 0.7356, "step": 5582 }, { "epoch": 0.58, "grad_norm": 2.0115047324799478, "learning_rate": 3.949850671676644e-06, "loss": 0.6773, "step": 5583 }, { "epoch": 0.58, "grad_norm": 2.068672760742573, "learning_rate": 3.948204862973161e-06, "loss": 0.6645, "step": 5584 }, { "epoch": 0.58, "grad_norm": 1.9316885249397973, "learning_rate": 3.946559173496653e-06, "loss": 0.7602, "step": 5585 }, { "epoch": 0.58, "grad_norm": 1.813002548695951, "learning_rate": 3.944913603433667e-06, "loss": 0.6286, "step": 5586 }, { "epoch": 0.58, "grad_norm": 1.7041134204449735, "learning_rate": 3.943268152970737e-06, "loss": 0.6044, "step": 5587 }, { "epoch": 0.58, "grad_norm": 1.9429926106788047, "learning_rate": 3.941622822294385e-06, "loss": 0.7283, "step": 5588 }, { "epoch": 0.58, "grad_norm": 1.9588914791860121, "learning_rate": 3.939977611591119e-06, "loss": 0.5775, "step": 5589 }, { "epoch": 0.58, "grad_norm": 1.8812887191683487, "learning_rate": 3.938332521047434e-06, "loss": 0.6613, "step": 5590 }, { "epoch": 0.58, "grad_norm": 1.9635352056032394, "learning_rate": 3.936687550849807e-06, "loss": 0.6232, "step": 5591 }, { "epoch": 0.58, "grad_norm": 2.0963605977584754, "learning_rate": 3.935042701184707e-06, "loss": 0.661, "step": 5592 }, { "epoch": 0.58, "grad_norm": 1.8334345997394716, "learning_rate": 3.9333979722385865e-06, "loss": 0.6448, "step": 5593 }, { "epoch": 0.58, "grad_norm": 1.802952264613462, "learning_rate": 3.931753364197886e-06, "loss": 0.6125, "step": 5594 }, { "epoch": 0.58, "grad_norm": 2.0955138413023127, "learning_rate": 3.93010887724903e-06, "loss": 0.6208, "step": 5595 }, { "epoch": 0.58, "grad_norm": 1.9727861271093967, "learning_rate": 3.928464511578432e-06, "loss": 0.6802, "step": 5596 }, { "epoch": 0.58, "grad_norm": 1.7483375199702063, "learning_rate": 3.926820267372489e-06, "loss": 0.5174, "step": 5597 }, { "epoch": 0.58, "grad_norm": 2.079593531305218, "learning_rate": 3.925176144817587e-06, "loss": 0.6895, "step": 5598 }, { "epoch": 0.58, "grad_norm": 2.115779088659057, "learning_rate": 3.9235321441000925e-06, "loss": 0.5738, "step": 5599 }, { "epoch": 0.58, "grad_norm": 2.0251026439895456, "learning_rate": 3.921888265406366e-06, "loss": 0.6485, "step": 5600 }, { "epoch": 0.58, "grad_norm": 1.7558793985516596, "learning_rate": 3.92024450892275e-06, "loss": 0.5925, "step": 5601 }, { "epoch": 0.58, "grad_norm": 1.860407325614981, "learning_rate": 3.918600874835573e-06, "loss": 0.5922, "step": 5602 }, { "epoch": 0.58, "grad_norm": 1.981471706438414, "learning_rate": 3.9169573633311505e-06, "loss": 0.6904, "step": 5603 }, { "epoch": 0.58, "grad_norm": 1.8167178665462902, "learning_rate": 3.915313974595784e-06, "loss": 0.5259, "step": 5604 }, { "epoch": 0.58, "grad_norm": 2.487307591563184, "learning_rate": 3.913670708815763e-06, "loss": 0.5648, "step": 5605 }, { "epoch": 0.58, "grad_norm": 1.6661233435057807, "learning_rate": 3.912027566177358e-06, "loss": 0.5738, "step": 5606 }, { "epoch": 0.58, "grad_norm": 2.049360076678344, "learning_rate": 3.910384546866828e-06, "loss": 0.6835, "step": 5607 }, { "epoch": 0.58, "grad_norm": 2.0422476557675906, "learning_rate": 3.9087416510704206e-06, "loss": 0.7541, "step": 5608 }, { "epoch": 0.58, "grad_norm": 2.0434482018310782, "learning_rate": 3.907098878974367e-06, "loss": 0.6639, "step": 5609 }, { "epoch": 0.58, "grad_norm": 2.1404651279397346, "learning_rate": 3.905456230764886e-06, "loss": 0.6493, "step": 5610 }, { "epoch": 0.58, "grad_norm": 2.1477378402307594, "learning_rate": 3.903813706628179e-06, "loss": 0.6731, "step": 5611 }, { "epoch": 0.58, "grad_norm": 1.6555033642042376, "learning_rate": 3.902171306750436e-06, "loss": 0.6208, "step": 5612 }, { "epoch": 0.58, "grad_norm": 1.9267343051568924, "learning_rate": 3.900529031317836e-06, "loss": 0.5961, "step": 5613 }, { "epoch": 0.58, "grad_norm": 2.0221313434196153, "learning_rate": 3.898886880516533e-06, "loss": 0.5812, "step": 5614 }, { "epoch": 0.58, "grad_norm": 1.9915391709592674, "learning_rate": 3.89724485453268e-06, "loss": 0.5604, "step": 5615 }, { "epoch": 0.58, "grad_norm": 1.9024492756973865, "learning_rate": 3.895602953552408e-06, "loss": 0.6102, "step": 5616 }, { "epoch": 0.58, "grad_norm": 2.0749917123549704, "learning_rate": 3.893961177761835e-06, "loss": 0.6328, "step": 5617 }, { "epoch": 0.58, "grad_norm": 1.6907876354852427, "learning_rate": 3.8923195273470686e-06, "loss": 0.6116, "step": 5618 }, { "epoch": 0.58, "grad_norm": 1.9039400354395462, "learning_rate": 3.890678002494196e-06, "loss": 0.693, "step": 5619 }, { "epoch": 0.58, "grad_norm": 2.109421413941969, "learning_rate": 3.889036603389297e-06, "loss": 0.6047, "step": 5620 }, { "epoch": 0.58, "grad_norm": 1.7020168441413386, "learning_rate": 3.887395330218429e-06, "loss": 0.6034, "step": 5621 }, { "epoch": 0.58, "grad_norm": 1.9585934375817815, "learning_rate": 3.885754183167642e-06, "loss": 0.5768, "step": 5622 }, { "epoch": 0.58, "grad_norm": 2.21563878724902, "learning_rate": 3.884113162422971e-06, "loss": 0.6703, "step": 5623 }, { "epoch": 0.58, "grad_norm": 1.8662332887713378, "learning_rate": 3.882472268170432e-06, "loss": 0.5811, "step": 5624 }, { "epoch": 0.58, "grad_norm": 2.0872931585370282, "learning_rate": 3.8808315005960305e-06, "loss": 0.6711, "step": 5625 }, { "epoch": 0.58, "grad_norm": 1.8026638556504297, "learning_rate": 3.879190859885758e-06, "loss": 0.6181, "step": 5626 }, { "epoch": 0.58, "grad_norm": 1.7741256125064448, "learning_rate": 3.877550346225591e-06, "loss": 0.7367, "step": 5627 }, { "epoch": 0.59, "grad_norm": 2.012507567318553, "learning_rate": 3.8759099598014895e-06, "loss": 0.6178, "step": 5628 }, { "epoch": 0.59, "grad_norm": 2.0531286926195667, "learning_rate": 3.8742697007994e-06, "loss": 0.6073, "step": 5629 }, { "epoch": 0.59, "grad_norm": 1.8202572945880635, "learning_rate": 3.872629569405257e-06, "loss": 0.6469, "step": 5630 }, { "epoch": 0.59, "grad_norm": 1.796969026042274, "learning_rate": 3.870989565804979e-06, "loss": 0.6439, "step": 5631 }, { "epoch": 0.59, "grad_norm": 2.1220990590037574, "learning_rate": 3.869349690184468e-06, "loss": 0.6905, "step": 5632 }, { "epoch": 0.59, "grad_norm": 2.0824399282601846, "learning_rate": 3.867709942729613e-06, "loss": 0.693, "step": 5633 }, { "epoch": 0.59, "grad_norm": 1.7845464618082878, "learning_rate": 3.866070323626291e-06, "loss": 0.5927, "step": 5634 }, { "epoch": 0.59, "grad_norm": 1.9299347986415278, "learning_rate": 3.864430833060363e-06, "loss": 0.6342, "step": 5635 }, { "epoch": 0.59, "grad_norm": 1.7117444670691249, "learning_rate": 3.86279147121767e-06, "loss": 0.6953, "step": 5636 }, { "epoch": 0.59, "grad_norm": 1.828004952003326, "learning_rate": 3.8611522382840476e-06, "loss": 0.6119, "step": 5637 }, { "epoch": 0.59, "grad_norm": 1.8602018760685013, "learning_rate": 3.859513134445308e-06, "loss": 0.6153, "step": 5638 }, { "epoch": 0.59, "grad_norm": 1.8653514922373067, "learning_rate": 3.857874159887256e-06, "loss": 0.5868, "step": 5639 }, { "epoch": 0.59, "grad_norm": 2.06982897795329, "learning_rate": 3.856235314795678e-06, "loss": 0.6817, "step": 5640 }, { "epoch": 0.59, "grad_norm": 2.1179240984389147, "learning_rate": 3.8545965993563454e-06, "loss": 0.6013, "step": 5641 }, { "epoch": 0.59, "grad_norm": 1.819858665984067, "learning_rate": 3.85295801375502e-06, "loss": 0.5963, "step": 5642 }, { "epoch": 0.59, "grad_norm": 1.9063952687832015, "learning_rate": 3.851319558177439e-06, "loss": 0.6431, "step": 5643 }, { "epoch": 0.59, "grad_norm": 2.191630045938788, "learning_rate": 3.8496812328093335e-06, "loss": 0.6341, "step": 5644 }, { "epoch": 0.59, "grad_norm": 2.070202153822423, "learning_rate": 3.848043037836417e-06, "loss": 0.6904, "step": 5645 }, { "epoch": 0.59, "grad_norm": 2.223577708768498, "learning_rate": 3.846404973444388e-06, "loss": 0.6196, "step": 5646 }, { "epoch": 0.59, "grad_norm": 1.8708658636793065, "learning_rate": 3.8447670398189305e-06, "loss": 0.6542, "step": 5647 }, { "epoch": 0.59, "grad_norm": 1.9332690937542027, "learning_rate": 3.843129237145713e-06, "loss": 0.6483, "step": 5648 }, { "epoch": 0.59, "grad_norm": 1.8760128233078004, "learning_rate": 3.841491565610393e-06, "loss": 0.6364, "step": 5649 }, { "epoch": 0.59, "grad_norm": 1.833179006583722, "learning_rate": 3.839854025398606e-06, "loss": 0.5539, "step": 5650 }, { "epoch": 0.59, "grad_norm": 1.7178320341789401, "learning_rate": 3.838216616695977e-06, "loss": 0.5955, "step": 5651 }, { "epoch": 0.59, "grad_norm": 2.043925966235596, "learning_rate": 3.836579339688116e-06, "loss": 0.5994, "step": 5652 }, { "epoch": 0.59, "grad_norm": 2.013863381361781, "learning_rate": 3.83494219456062e-06, "loss": 0.7002, "step": 5653 }, { "epoch": 0.59, "grad_norm": 1.7479848563782898, "learning_rate": 3.833305181499065e-06, "loss": 0.6117, "step": 5654 }, { "epoch": 0.59, "grad_norm": 1.843085118521347, "learning_rate": 3.831668300689019e-06, "loss": 0.5921, "step": 5655 }, { "epoch": 0.59, "grad_norm": 1.8414043257902304, "learning_rate": 3.8300315523160295e-06, "loss": 0.6534, "step": 5656 }, { "epoch": 0.59, "grad_norm": 1.8521076862406898, "learning_rate": 3.828394936565633e-06, "loss": 0.6813, "step": 5657 }, { "epoch": 0.59, "grad_norm": 1.964166539127327, "learning_rate": 3.826758453623348e-06, "loss": 0.5947, "step": 5658 }, { "epoch": 0.59, "grad_norm": 1.7844201494772378, "learning_rate": 3.825122103674679e-06, "loss": 0.6167, "step": 5659 }, { "epoch": 0.59, "grad_norm": 1.7340840036629404, "learning_rate": 3.823485886905116e-06, "loss": 0.6266, "step": 5660 }, { "epoch": 0.59, "grad_norm": 1.8099618683785281, "learning_rate": 3.821849803500134e-06, "loss": 0.6138, "step": 5661 }, { "epoch": 0.59, "grad_norm": 1.944243244863238, "learning_rate": 3.820213853645193e-06, "loss": 0.6171, "step": 5662 }, { "epoch": 0.59, "grad_norm": 1.8178877996961225, "learning_rate": 3.8185780375257356e-06, "loss": 0.6275, "step": 5663 }, { "epoch": 0.59, "grad_norm": 1.9352643805301815, "learning_rate": 3.816942355327191e-06, "loss": 0.5816, "step": 5664 }, { "epoch": 0.59, "grad_norm": 1.8325507491789237, "learning_rate": 3.815306807234974e-06, "loss": 0.6927, "step": 5665 }, { "epoch": 0.59, "grad_norm": 1.837901350432591, "learning_rate": 3.8136713934344825e-06, "loss": 0.6256, "step": 5666 }, { "epoch": 0.59, "grad_norm": 2.111254838340173, "learning_rate": 3.812036114111101e-06, "loss": 0.5796, "step": 5667 }, { "epoch": 0.59, "grad_norm": 1.8187308591668736, "learning_rate": 3.8104009694501963e-06, "loss": 0.6156, "step": 5668 }, { "epoch": 0.59, "grad_norm": 1.8485495400562826, "learning_rate": 3.808765959637123e-06, "loss": 0.7708, "step": 5669 }, { "epoch": 0.59, "grad_norm": 1.8249275555676456, "learning_rate": 3.8071310848572173e-06, "loss": 0.5503, "step": 5670 }, { "epoch": 0.59, "grad_norm": 2.1032973373284682, "learning_rate": 3.8054963452958025e-06, "loss": 0.6755, "step": 5671 }, { "epoch": 0.59, "grad_norm": 1.9971403602282247, "learning_rate": 3.8038617411381876e-06, "loss": 0.6573, "step": 5672 }, { "epoch": 0.59, "grad_norm": 1.795638769861292, "learning_rate": 3.80222727256966e-06, "loss": 0.6491, "step": 5673 }, { "epoch": 0.59, "grad_norm": 1.871045292034552, "learning_rate": 3.800592939775498e-06, "loss": 0.5779, "step": 5674 }, { "epoch": 0.59, "grad_norm": 2.1159675641961306, "learning_rate": 3.798958742940963e-06, "loss": 0.6505, "step": 5675 }, { "epoch": 0.59, "grad_norm": 1.9957808122960192, "learning_rate": 3.7973246822513e-06, "loss": 0.6975, "step": 5676 }, { "epoch": 0.59, "grad_norm": 1.8929123275811768, "learning_rate": 3.795690757891739e-06, "loss": 0.6495, "step": 5677 }, { "epoch": 0.59, "grad_norm": 1.8000004498952757, "learning_rate": 3.794056970047495e-06, "loss": 0.6205, "step": 5678 }, { "epoch": 0.59, "grad_norm": 2.016000272727104, "learning_rate": 3.7924233189037697e-06, "loss": 0.6353, "step": 5679 }, { "epoch": 0.59, "grad_norm": 1.9150551526282293, "learning_rate": 3.7907898046457416e-06, "loss": 0.6361, "step": 5680 }, { "epoch": 0.59, "grad_norm": 1.905395762182365, "learning_rate": 3.789156427458581e-06, "loss": 0.6509, "step": 5681 }, { "epoch": 0.59, "grad_norm": 2.0270735773997557, "learning_rate": 3.787523187527442e-06, "loss": 0.6225, "step": 5682 }, { "epoch": 0.59, "grad_norm": 1.8523260107443824, "learning_rate": 3.7858900850374596e-06, "loss": 0.5487, "step": 5683 }, { "epoch": 0.59, "grad_norm": 1.8400704301179405, "learning_rate": 3.7842571201737568e-06, "loss": 0.6822, "step": 5684 }, { "epoch": 0.59, "grad_norm": 2.213633936194134, "learning_rate": 3.7826242931214386e-06, "loss": 0.6599, "step": 5685 }, { "epoch": 0.59, "grad_norm": 1.847807446734992, "learning_rate": 3.780991604065598e-06, "loss": 0.5762, "step": 5686 }, { "epoch": 0.59, "grad_norm": 1.7717364388265258, "learning_rate": 3.7793590531913047e-06, "loss": 0.5142, "step": 5687 }, { "epoch": 0.59, "grad_norm": 2.06824980539038, "learning_rate": 3.777726640683621e-06, "loss": 0.6043, "step": 5688 }, { "epoch": 0.59, "grad_norm": 2.0951430699266713, "learning_rate": 3.7760943667275884e-06, "loss": 0.6901, "step": 5689 }, { "epoch": 0.59, "grad_norm": 1.9928453372788413, "learning_rate": 3.7744622315082358e-06, "loss": 0.5764, "step": 5690 }, { "epoch": 0.59, "grad_norm": 2.0971652173091364, "learning_rate": 3.7728302352105743e-06, "loss": 0.6022, "step": 5691 }, { "epoch": 0.59, "grad_norm": 2.0164644631521598, "learning_rate": 3.7711983780196006e-06, "loss": 0.5844, "step": 5692 }, { "epoch": 0.59, "grad_norm": 2.121542379135994, "learning_rate": 3.7695666601202944e-06, "loss": 0.7505, "step": 5693 }, { "epoch": 0.59, "grad_norm": 1.7159088934028508, "learning_rate": 3.767935081697622e-06, "loss": 0.5846, "step": 5694 }, { "epoch": 0.59, "grad_norm": 1.7520439948117041, "learning_rate": 3.766303642936529e-06, "loss": 0.5672, "step": 5695 }, { "epoch": 0.59, "grad_norm": 1.7646205963873725, "learning_rate": 3.76467234402195e-06, "loss": 0.699, "step": 5696 }, { "epoch": 0.59, "grad_norm": 1.7997889644069125, "learning_rate": 3.763041185138802e-06, "loss": 0.6302, "step": 5697 }, { "epoch": 0.59, "grad_norm": 1.7988871525135666, "learning_rate": 3.7614101664719866e-06, "loss": 0.651, "step": 5698 }, { "epoch": 0.59, "grad_norm": 1.8411621906976277, "learning_rate": 3.759779288206388e-06, "loss": 0.6294, "step": 5699 }, { "epoch": 0.59, "grad_norm": 1.8931612809567004, "learning_rate": 3.758148550526877e-06, "loss": 0.5979, "step": 5700 }, { "epoch": 0.59, "grad_norm": 1.8264053952182302, "learning_rate": 3.7565179536183067e-06, "loss": 0.6323, "step": 5701 }, { "epoch": 0.59, "grad_norm": 1.8994850468593727, "learning_rate": 3.7548874976655126e-06, "loss": 0.6624, "step": 5702 }, { "epoch": 0.59, "grad_norm": 1.8525791579411612, "learning_rate": 3.7532571828533175e-06, "loss": 0.5778, "step": 5703 }, { "epoch": 0.59, "grad_norm": 1.914265211398974, "learning_rate": 3.751627009366527e-06, "loss": 0.6037, "step": 5704 }, { "epoch": 0.59, "grad_norm": 1.9860797713158134, "learning_rate": 3.7499969773899304e-06, "loss": 0.6279, "step": 5705 }, { "epoch": 0.59, "grad_norm": 2.4453142555832756, "learning_rate": 3.748367087108301e-06, "loss": 0.6271, "step": 5706 }, { "epoch": 0.59, "grad_norm": 2.3538812133651965, "learning_rate": 3.7467373387063973e-06, "loss": 0.715, "step": 5707 }, { "epoch": 0.59, "grad_norm": 2.0233372795899656, "learning_rate": 3.7451077323689587e-06, "loss": 0.5361, "step": 5708 }, { "epoch": 0.59, "grad_norm": 1.8810865910977235, "learning_rate": 3.7434782682807137e-06, "loss": 0.6368, "step": 5709 }, { "epoch": 0.59, "grad_norm": 1.9864037253071856, "learning_rate": 3.7418489466263663e-06, "loss": 0.6532, "step": 5710 }, { "epoch": 0.59, "grad_norm": 1.7612719043231966, "learning_rate": 3.740219767590613e-06, "loss": 0.6277, "step": 5711 }, { "epoch": 0.59, "grad_norm": 1.9914586483658314, "learning_rate": 3.738590731358129e-06, "loss": 0.6322, "step": 5712 }, { "epoch": 0.59, "grad_norm": 2.0569132735077074, "learning_rate": 3.736961838113575e-06, "loss": 0.5606, "step": 5713 }, { "epoch": 0.59, "grad_norm": 1.7802065674190661, "learning_rate": 3.7353330880415963e-06, "loss": 0.5631, "step": 5714 }, { "epoch": 0.59, "grad_norm": 1.919709196487885, "learning_rate": 3.7337044813268204e-06, "loss": 0.6351, "step": 5715 }, { "epoch": 0.59, "grad_norm": 1.805335375870117, "learning_rate": 3.732076018153861e-06, "loss": 0.5069, "step": 5716 }, { "epoch": 0.59, "grad_norm": 1.9844777052067826, "learning_rate": 3.73044769870731e-06, "loss": 0.6736, "step": 5717 }, { "epoch": 0.59, "grad_norm": 1.9877835438191704, "learning_rate": 3.728819523171748e-06, "loss": 0.6128, "step": 5718 }, { "epoch": 0.59, "grad_norm": 1.8767135977824283, "learning_rate": 3.7271914917317397e-06, "loss": 0.7063, "step": 5719 }, { "epoch": 0.59, "grad_norm": 1.979452378878533, "learning_rate": 3.7255636045718295e-06, "loss": 0.7067, "step": 5720 }, { "epoch": 0.59, "grad_norm": 1.88533991292467, "learning_rate": 3.723935861876549e-06, "loss": 0.6882, "step": 5721 }, { "epoch": 0.59, "grad_norm": 2.027102272390913, "learning_rate": 3.722308263830412e-06, "loss": 0.659, "step": 5722 }, { "epoch": 0.59, "grad_norm": 2.023782394536249, "learning_rate": 3.720680810617917e-06, "loss": 0.6214, "step": 5723 }, { "epoch": 0.6, "grad_norm": 2.229036069263612, "learning_rate": 3.719053502423543e-06, "loss": 0.6, "step": 5724 }, { "epoch": 0.6, "grad_norm": 1.8634729871510112, "learning_rate": 3.7174263394317565e-06, "loss": 0.5997, "step": 5725 }, { "epoch": 0.6, "grad_norm": 2.162803651867326, "learning_rate": 3.715799321827004e-06, "loss": 0.7181, "step": 5726 }, { "epoch": 0.6, "grad_norm": 1.7501057047105535, "learning_rate": 3.714172449793718e-06, "loss": 0.6726, "step": 5727 }, { "epoch": 0.6, "grad_norm": 1.821385233254919, "learning_rate": 3.7125457235163144e-06, "loss": 0.6426, "step": 5728 }, { "epoch": 0.6, "grad_norm": 1.87698360403373, "learning_rate": 3.7109191431791902e-06, "loss": 0.5506, "step": 5729 }, { "epoch": 0.6, "grad_norm": 1.9541379681456583, "learning_rate": 3.7092927089667293e-06, "loss": 0.6531, "step": 5730 }, { "epoch": 0.6, "grad_norm": 1.9558109805930095, "learning_rate": 3.7076664210632972e-06, "loss": 0.665, "step": 5731 }, { "epoch": 0.6, "grad_norm": 1.8327803792621096, "learning_rate": 3.7060402796532414e-06, "loss": 0.5689, "step": 5732 }, { "epoch": 0.6, "grad_norm": 1.7268754577479737, "learning_rate": 3.7044142849208953e-06, "loss": 0.6506, "step": 5733 }, { "epoch": 0.6, "grad_norm": 1.8842322171157952, "learning_rate": 3.7027884370505753e-06, "loss": 0.6301, "step": 5734 }, { "epoch": 0.6, "grad_norm": 1.7694469325931714, "learning_rate": 3.701162736226579e-06, "loss": 0.5931, "step": 5735 }, { "epoch": 0.6, "grad_norm": 1.9551315711768495, "learning_rate": 3.699537182633189e-06, "loss": 0.6189, "step": 5736 }, { "epoch": 0.6, "grad_norm": 2.0096838711866436, "learning_rate": 3.6979117764546735e-06, "loss": 0.7111, "step": 5737 }, { "epoch": 0.6, "grad_norm": 1.688583682617302, "learning_rate": 3.6962865178752804e-06, "loss": 0.5609, "step": 5738 }, { "epoch": 0.6, "grad_norm": 1.8099317008081695, "learning_rate": 3.69466140707924e-06, "loss": 0.7394, "step": 5739 }, { "epoch": 0.6, "grad_norm": 2.0311676895133384, "learning_rate": 3.6930364442507693e-06, "loss": 0.6102, "step": 5740 }, { "epoch": 0.6, "grad_norm": 1.953417217917916, "learning_rate": 3.6914116295740678e-06, "loss": 0.6152, "step": 5741 }, { "epoch": 0.6, "grad_norm": 1.871734797375731, "learning_rate": 3.6897869632333157e-06, "loss": 0.554, "step": 5742 }, { "epoch": 0.6, "grad_norm": 1.9371609557756013, "learning_rate": 3.6881624454126797e-06, "loss": 0.6307, "step": 5743 }, { "epoch": 0.6, "grad_norm": 1.9869409405219924, "learning_rate": 3.686538076296307e-06, "loss": 0.6393, "step": 5744 }, { "epoch": 0.6, "grad_norm": 2.1734737870284913, "learning_rate": 3.6849138560683305e-06, "loss": 0.6261, "step": 5745 }, { "epoch": 0.6, "grad_norm": 1.9689826803452837, "learning_rate": 3.683289784912866e-06, "loss": 0.5688, "step": 5746 }, { "epoch": 0.6, "grad_norm": 1.8841430860004258, "learning_rate": 3.681665863014008e-06, "loss": 0.5939, "step": 5747 }, { "epoch": 0.6, "grad_norm": 2.068574065898926, "learning_rate": 3.6800420905558378e-06, "loss": 0.6049, "step": 5748 }, { "epoch": 0.6, "grad_norm": 1.8856912763649123, "learning_rate": 3.6784184677224204e-06, "loss": 0.6656, "step": 5749 }, { "epoch": 0.6, "grad_norm": 2.067502585225236, "learning_rate": 3.6767949946978026e-06, "loss": 0.6474, "step": 5750 }, { "epoch": 0.6, "grad_norm": 2.041166249135522, "learning_rate": 3.6751716716660146e-06, "loss": 0.5992, "step": 5751 }, { "epoch": 0.6, "grad_norm": 1.934731123565553, "learning_rate": 3.673548498811068e-06, "loss": 0.6147, "step": 5752 }, { "epoch": 0.6, "grad_norm": 1.7182470344213379, "learning_rate": 3.671925476316962e-06, "loss": 0.6081, "step": 5753 }, { "epoch": 0.6, "grad_norm": 2.2804452109132676, "learning_rate": 3.6703026043676715e-06, "loss": 0.7114, "step": 5754 }, { "epoch": 0.6, "grad_norm": 2.216666784325599, "learning_rate": 3.6686798831471594e-06, "loss": 0.6938, "step": 5755 }, { "epoch": 0.6, "grad_norm": 1.7060098887404596, "learning_rate": 3.6670573128393704e-06, "loss": 0.562, "step": 5756 }, { "epoch": 0.6, "grad_norm": 1.781597914983373, "learning_rate": 3.6654348936282324e-06, "loss": 0.6731, "step": 5757 }, { "epoch": 0.6, "grad_norm": 1.880276423977131, "learning_rate": 3.663812625697657e-06, "loss": 0.6672, "step": 5758 }, { "epoch": 0.6, "grad_norm": 2.0046983496227435, "learning_rate": 3.6621905092315357e-06, "loss": 0.6595, "step": 5759 }, { "epoch": 0.6, "grad_norm": 1.8122835785911924, "learning_rate": 3.6605685444137463e-06, "loss": 0.6348, "step": 5760 }, { "epoch": 0.6, "grad_norm": 1.8979209319451233, "learning_rate": 3.658946731428147e-06, "loss": 0.6303, "step": 5761 }, { "epoch": 0.6, "grad_norm": 1.9871359315563444, "learning_rate": 3.6573250704585783e-06, "loss": 0.673, "step": 5762 }, { "epoch": 0.6, "grad_norm": 2.174620750981435, "learning_rate": 3.655703561688867e-06, "loss": 0.7261, "step": 5763 }, { "epoch": 0.6, "grad_norm": 1.9509384450832072, "learning_rate": 3.6540822053028185e-06, "loss": 0.603, "step": 5764 }, { "epoch": 0.6, "grad_norm": 2.123960386898783, "learning_rate": 3.6524610014842234e-06, "loss": 0.6584, "step": 5765 }, { "epoch": 0.6, "grad_norm": 1.8173412461480742, "learning_rate": 3.6508399504168547e-06, "loss": 0.6287, "step": 5766 }, { "epoch": 0.6, "grad_norm": 1.9404913184513584, "learning_rate": 3.6492190522844673e-06, "loss": 0.656, "step": 5767 }, { "epoch": 0.6, "grad_norm": 1.7743176436764714, "learning_rate": 3.647598307270801e-06, "loss": 0.5844, "step": 5768 }, { "epoch": 0.6, "grad_norm": 1.8949196446662082, "learning_rate": 3.6459777155595733e-06, "loss": 0.6704, "step": 5769 }, { "epoch": 0.6, "grad_norm": 2.2475526530936225, "learning_rate": 3.64435727733449e-06, "loss": 0.6862, "step": 5770 }, { "epoch": 0.6, "grad_norm": 1.9168800541401423, "learning_rate": 3.6427369927792354e-06, "loss": 0.5947, "step": 5771 }, { "epoch": 0.6, "grad_norm": 1.9223629195860992, "learning_rate": 3.6411168620774795e-06, "loss": 0.5721, "step": 5772 }, { "epoch": 0.6, "grad_norm": 1.9231743963862156, "learning_rate": 3.639496885412872e-06, "loss": 0.5919, "step": 5773 }, { "epoch": 0.6, "grad_norm": 1.6447425060429046, "learning_rate": 3.6378770629690476e-06, "loss": 0.581, "step": 5774 }, { "epoch": 0.6, "grad_norm": 2.0694344031077168, "learning_rate": 3.6362573949296242e-06, "loss": 0.586, "step": 5775 }, { "epoch": 0.6, "grad_norm": 1.7199178479200063, "learning_rate": 3.634637881478196e-06, "loss": 0.6447, "step": 5776 }, { "epoch": 0.6, "grad_norm": 2.1261329402743763, "learning_rate": 3.633018522798346e-06, "loss": 0.5911, "step": 5777 }, { "epoch": 0.6, "grad_norm": 2.0734597415985627, "learning_rate": 3.6313993190736375e-06, "loss": 0.6408, "step": 5778 }, { "epoch": 0.6, "grad_norm": 2.0006256078542535, "learning_rate": 3.629780270487617e-06, "loss": 0.6043, "step": 5779 }, { "epoch": 0.6, "grad_norm": 1.9745647927432075, "learning_rate": 3.628161377223814e-06, "loss": 0.7071, "step": 5780 }, { "epoch": 0.6, "grad_norm": 1.6602642905256981, "learning_rate": 3.626542639465738e-06, "loss": 0.5602, "step": 5781 }, { "epoch": 0.6, "grad_norm": 1.9001677004630113, "learning_rate": 3.624924057396883e-06, "loss": 0.6861, "step": 5782 }, { "epoch": 0.6, "grad_norm": 2.0813915739240447, "learning_rate": 3.6233056312007226e-06, "loss": 0.7129, "step": 5783 }, { "epoch": 0.6, "grad_norm": 2.0518818520506477, "learning_rate": 3.6216873610607155e-06, "loss": 0.5889, "step": 5784 }, { "epoch": 0.6, "grad_norm": 1.726267024963287, "learning_rate": 3.620069247160303e-06, "loss": 0.6046, "step": 5785 }, { "epoch": 0.6, "grad_norm": 1.6789114214946894, "learning_rate": 3.618451289682905e-06, "loss": 0.6614, "step": 5786 }, { "epoch": 0.6, "grad_norm": 1.8891420844312576, "learning_rate": 3.6168334888119295e-06, "loss": 0.604, "step": 5787 }, { "epoch": 0.6, "grad_norm": 1.6271989700635565, "learning_rate": 3.6152158447307607e-06, "loss": 0.5604, "step": 5788 }, { "epoch": 0.6, "grad_norm": 1.960085702524464, "learning_rate": 3.6135983576227694e-06, "loss": 0.7115, "step": 5789 }, { "epoch": 0.6, "grad_norm": 2.055291396604353, "learning_rate": 3.6119810276713085e-06, "loss": 0.6249, "step": 5790 }, { "epoch": 0.6, "grad_norm": 1.8777115393478256, "learning_rate": 3.6103638550597074e-06, "loss": 0.6346, "step": 5791 }, { "epoch": 0.6, "grad_norm": 1.8694877602773852, "learning_rate": 3.6087468399712842e-06, "loss": 0.6529, "step": 5792 }, { "epoch": 0.6, "grad_norm": 2.007269671680019, "learning_rate": 3.607129982589337e-06, "loss": 0.5934, "step": 5793 }, { "epoch": 0.6, "grad_norm": 1.771068848538593, "learning_rate": 3.6055132830971446e-06, "loss": 0.5829, "step": 5794 }, { "epoch": 0.6, "grad_norm": 1.9968602046327504, "learning_rate": 3.60389674167797e-06, "loss": 0.6455, "step": 5795 }, { "epoch": 0.6, "grad_norm": 1.9085289823709048, "learning_rate": 3.6022803585150574e-06, "loss": 0.6621, "step": 5796 }, { "epoch": 0.6, "grad_norm": 1.8562577460156162, "learning_rate": 3.6006641337916335e-06, "loss": 0.6193, "step": 5797 }, { "epoch": 0.6, "grad_norm": 2.0116263130134566, "learning_rate": 3.5990480676909055e-06, "loss": 0.7013, "step": 5798 }, { "epoch": 0.6, "grad_norm": 1.8696463786190631, "learning_rate": 3.597432160396064e-06, "loss": 0.592, "step": 5799 }, { "epoch": 0.6, "grad_norm": 1.9258705805458398, "learning_rate": 3.5958164120902816e-06, "loss": 0.6927, "step": 5800 }, { "epoch": 0.6, "grad_norm": 1.788877244074425, "learning_rate": 3.5942008229567128e-06, "loss": 0.6083, "step": 5801 }, { "epoch": 0.6, "grad_norm": 1.819861269764366, "learning_rate": 3.592585393178494e-06, "loss": 0.5593, "step": 5802 }, { "epoch": 0.6, "grad_norm": 2.176572903100845, "learning_rate": 3.590970122938742e-06, "loss": 0.6081, "step": 5803 }, { "epoch": 0.6, "grad_norm": 1.8421415635256122, "learning_rate": 3.589355012420558e-06, "loss": 0.5901, "step": 5804 }, { "epoch": 0.6, "grad_norm": 1.7830410387814137, "learning_rate": 3.587740061807024e-06, "loss": 0.7421, "step": 5805 }, { "epoch": 0.6, "grad_norm": 1.901456241678915, "learning_rate": 3.5861252712812032e-06, "loss": 0.5899, "step": 5806 }, { "epoch": 0.6, "grad_norm": 1.740330243536855, "learning_rate": 3.5845106410261417e-06, "loss": 0.6182, "step": 5807 }, { "epoch": 0.6, "grad_norm": 1.9849175676505417, "learning_rate": 3.5828961712248667e-06, "loss": 0.6465, "step": 5808 }, { "epoch": 0.6, "grad_norm": 1.7925579462512344, "learning_rate": 3.5812818620603883e-06, "loss": 0.6665, "step": 5809 }, { "epoch": 0.6, "grad_norm": 1.963203797068177, "learning_rate": 3.579667713715697e-06, "loss": 0.6477, "step": 5810 }, { "epoch": 0.6, "grad_norm": 1.90177521042788, "learning_rate": 3.5780537263737657e-06, "loss": 0.717, "step": 5811 }, { "epoch": 0.6, "grad_norm": 1.9095440405290003, "learning_rate": 3.576439900217552e-06, "loss": 0.6262, "step": 5812 }, { "epoch": 0.6, "grad_norm": 2.0776672586275713, "learning_rate": 3.574826235429988e-06, "loss": 0.6042, "step": 5813 }, { "epoch": 0.6, "grad_norm": 2.115976450204016, "learning_rate": 3.5732127321939925e-06, "loss": 0.7406, "step": 5814 }, { "epoch": 0.6, "grad_norm": 1.8460290362187606, "learning_rate": 3.5715993906924663e-06, "loss": 0.5892, "step": 5815 }, { "epoch": 0.6, "grad_norm": 1.8060615761663243, "learning_rate": 3.5699862111082917e-06, "loss": 0.6119, "step": 5816 }, { "epoch": 0.6, "grad_norm": 1.8786816582722556, "learning_rate": 3.56837319362433e-06, "loss": 0.6141, "step": 5817 }, { "epoch": 0.6, "grad_norm": 1.8902089472404162, "learning_rate": 3.566760338423427e-06, "loss": 0.6255, "step": 5818 }, { "epoch": 0.6, "grad_norm": 1.859106348664016, "learning_rate": 3.5651476456884103e-06, "loss": 0.6933, "step": 5819 }, { "epoch": 0.6, "grad_norm": 2.06543221051437, "learning_rate": 3.5635351156020853e-06, "loss": 0.6413, "step": 5820 }, { "epoch": 0.61, "grad_norm": 1.929133796522785, "learning_rate": 3.5619227483472417e-06, "loss": 0.6346, "step": 5821 }, { "epoch": 0.61, "grad_norm": 1.7355976170586247, "learning_rate": 3.560310544106652e-06, "loss": 0.5988, "step": 5822 }, { "epoch": 0.61, "grad_norm": 1.876723408507209, "learning_rate": 3.5586985030630685e-06, "loss": 0.5479, "step": 5823 }, { "epoch": 0.61, "grad_norm": 1.7976909367236422, "learning_rate": 3.557086625399224e-06, "loss": 0.6151, "step": 5824 }, { "epoch": 0.61, "grad_norm": 1.7457066314116323, "learning_rate": 3.555474911297835e-06, "loss": 0.6609, "step": 5825 }, { "epoch": 0.61, "grad_norm": 1.8359401944106575, "learning_rate": 3.553863360941598e-06, "loss": 0.6886, "step": 5826 }, { "epoch": 0.61, "grad_norm": 1.9190157263515015, "learning_rate": 3.552251974513194e-06, "loss": 0.6556, "step": 5827 }, { "epoch": 0.61, "grad_norm": 1.7954112329008878, "learning_rate": 3.5506407521952783e-06, "loss": 0.598, "step": 5828 }, { "epoch": 0.61, "grad_norm": 1.7418220797119244, "learning_rate": 3.5490296941704948e-06, "loss": 0.6735, "step": 5829 }, { "epoch": 0.61, "grad_norm": 1.8969973614397408, "learning_rate": 3.547418800621466e-06, "loss": 0.6207, "step": 5830 }, { "epoch": 0.61, "grad_norm": 1.950442180880578, "learning_rate": 3.545808071730795e-06, "loss": 0.604, "step": 5831 }, { "epoch": 0.61, "grad_norm": 1.6785701177368428, "learning_rate": 3.544197507681068e-06, "loss": 0.6512, "step": 5832 }, { "epoch": 0.61, "grad_norm": 1.7768027074664066, "learning_rate": 3.5425871086548513e-06, "loss": 0.5867, "step": 5833 }, { "epoch": 0.61, "grad_norm": 1.8996288570794853, "learning_rate": 3.540976874834694e-06, "loss": 0.6475, "step": 5834 }, { "epoch": 0.61, "grad_norm": 2.1207513817114783, "learning_rate": 3.539366806403123e-06, "loss": 0.6733, "step": 5835 }, { "epoch": 0.61, "grad_norm": 1.8518276147104666, "learning_rate": 3.5377569035426494e-06, "loss": 0.6066, "step": 5836 }, { "epoch": 0.61, "grad_norm": 1.9845097936191345, "learning_rate": 3.536147166435765e-06, "loss": 0.7186, "step": 5837 }, { "epoch": 0.61, "grad_norm": 1.8353019441328098, "learning_rate": 3.534537595264944e-06, "loss": 0.6514, "step": 5838 }, { "epoch": 0.61, "grad_norm": 1.7635849381509756, "learning_rate": 3.532928190212639e-06, "loss": 0.6148, "step": 5839 }, { "epoch": 0.61, "grad_norm": 1.650677732272397, "learning_rate": 3.5313189514612867e-06, "loss": 0.5851, "step": 5840 }, { "epoch": 0.61, "grad_norm": 1.9688433425039455, "learning_rate": 3.529709879193301e-06, "loss": 0.607, "step": 5841 }, { "epoch": 0.61, "grad_norm": 1.7671648226840087, "learning_rate": 3.5281009735910822e-06, "loss": 0.5911, "step": 5842 }, { "epoch": 0.61, "grad_norm": 2.4477526780203385, "learning_rate": 3.5264922348370066e-06, "loss": 0.6202, "step": 5843 }, { "epoch": 0.61, "grad_norm": 1.7697614181136327, "learning_rate": 3.524883663113435e-06, "loss": 0.6076, "step": 5844 }, { "epoch": 0.61, "grad_norm": 1.8436384469037794, "learning_rate": 3.523275258602708e-06, "loss": 0.667, "step": 5845 }, { "epoch": 0.61, "grad_norm": 1.7263835471812945, "learning_rate": 3.5216670214871475e-06, "loss": 0.6649, "step": 5846 }, { "epoch": 0.61, "grad_norm": 1.7266754034601193, "learning_rate": 3.520058951949056e-06, "loss": 0.5832, "step": 5847 }, { "epoch": 0.61, "grad_norm": 1.9772191883626575, "learning_rate": 3.518451050170718e-06, "loss": 0.6297, "step": 5848 }, { "epoch": 0.61, "grad_norm": 1.903340236332747, "learning_rate": 3.5168433163344005e-06, "loss": 0.6192, "step": 5849 }, { "epoch": 0.61, "grad_norm": 1.7564343946683172, "learning_rate": 3.5152357506223444e-06, "loss": 0.5977, "step": 5850 }, { "epoch": 0.61, "grad_norm": 1.7624806514627709, "learning_rate": 3.5136283532167786e-06, "loss": 0.578, "step": 5851 }, { "epoch": 0.61, "grad_norm": 1.7577685153115692, "learning_rate": 3.5120211242999115e-06, "loss": 0.623, "step": 5852 }, { "epoch": 0.61, "grad_norm": 2.0273575366018726, "learning_rate": 3.5104140640539302e-06, "loss": 0.5983, "step": 5853 }, { "epoch": 0.61, "grad_norm": 1.945975183296312, "learning_rate": 3.508807172661006e-06, "loss": 0.6439, "step": 5854 }, { "epoch": 0.61, "grad_norm": 1.9502084132528867, "learning_rate": 3.5072004503032876e-06, "loss": 0.6141, "step": 5855 }, { "epoch": 0.61, "grad_norm": 1.7862146823553806, "learning_rate": 3.5055938971629096e-06, "loss": 0.587, "step": 5856 }, { "epoch": 0.61, "grad_norm": 1.8574712577859098, "learning_rate": 3.5039875134219784e-06, "loss": 0.6534, "step": 5857 }, { "epoch": 0.61, "grad_norm": 1.8248444230202323, "learning_rate": 3.5023812992625905e-06, "loss": 0.625, "step": 5858 }, { "epoch": 0.61, "grad_norm": 1.85422148267425, "learning_rate": 3.5007752548668173e-06, "loss": 0.5738, "step": 5859 }, { "epoch": 0.61, "grad_norm": 1.638592929436304, "learning_rate": 3.499169380416715e-06, "loss": 0.5042, "step": 5860 }, { "epoch": 0.61, "grad_norm": 2.0112792439563982, "learning_rate": 3.4975636760943177e-06, "loss": 0.5882, "step": 5861 }, { "epoch": 0.61, "grad_norm": 1.7295258052488887, "learning_rate": 3.4959581420816413e-06, "loss": 0.6108, "step": 5862 }, { "epoch": 0.61, "grad_norm": 1.9019332337346095, "learning_rate": 3.4943527785606824e-06, "loss": 0.6732, "step": 5863 }, { "epoch": 0.61, "grad_norm": 1.9081740044039712, "learning_rate": 3.49274758571342e-06, "loss": 0.5051, "step": 5864 }, { "epoch": 0.61, "grad_norm": 1.8635557360728945, "learning_rate": 3.491142563721808e-06, "loss": 0.6049, "step": 5865 }, { "epoch": 0.61, "grad_norm": 1.583335793635603, "learning_rate": 3.489537712767786e-06, "loss": 0.5906, "step": 5866 }, { "epoch": 0.61, "grad_norm": 1.7730511646734124, "learning_rate": 3.487933033033274e-06, "loss": 0.6527, "step": 5867 }, { "epoch": 0.61, "grad_norm": 2.125335096700481, "learning_rate": 3.486328524700171e-06, "loss": 0.5927, "step": 5868 }, { "epoch": 0.61, "grad_norm": 2.0856948814252436, "learning_rate": 3.4847241879503574e-06, "loss": 0.7139, "step": 5869 }, { "epoch": 0.61, "grad_norm": 1.8465188230889573, "learning_rate": 3.4831200229656935e-06, "loss": 0.6565, "step": 5870 }, { "epoch": 0.61, "grad_norm": 1.8051867760992228, "learning_rate": 3.4815160299280225e-06, "loss": 0.5733, "step": 5871 }, { "epoch": 0.61, "grad_norm": 1.9613480970154895, "learning_rate": 3.4799122090191638e-06, "loss": 0.633, "step": 5872 }, { "epoch": 0.61, "grad_norm": 1.7007000511001091, "learning_rate": 3.47830856042092e-06, "loss": 0.5902, "step": 5873 }, { "epoch": 0.61, "grad_norm": 1.7704526339579787, "learning_rate": 3.476705084315074e-06, "loss": 0.5984, "step": 5874 }, { "epoch": 0.61, "grad_norm": 2.0012874669262692, "learning_rate": 3.47510178088339e-06, "loss": 0.6879, "step": 5875 }, { "epoch": 0.61, "grad_norm": 1.8050110100253667, "learning_rate": 3.4734986503076096e-06, "loss": 0.5875, "step": 5876 }, { "epoch": 0.61, "grad_norm": 2.132743737087264, "learning_rate": 3.4718956927694593e-06, "loss": 0.6638, "step": 5877 }, { "epoch": 0.61, "grad_norm": 1.848961511546181, "learning_rate": 3.4702929084506433e-06, "loss": 0.6154, "step": 5878 }, { "epoch": 0.61, "grad_norm": 1.7426350866830753, "learning_rate": 3.468690297532843e-06, "loss": 0.57, "step": 5879 }, { "epoch": 0.61, "grad_norm": 1.9103389699630549, "learning_rate": 3.467087860197726e-06, "loss": 0.6717, "step": 5880 }, { "epoch": 0.61, "grad_norm": 1.9028495231307951, "learning_rate": 3.4654855966269373e-06, "loss": 0.625, "step": 5881 }, { "epoch": 0.61, "grad_norm": 1.888079361566811, "learning_rate": 3.4638835070021027e-06, "loss": 0.6302, "step": 5882 }, { "epoch": 0.61, "grad_norm": 1.845351370141749, "learning_rate": 3.462281591504828e-06, "loss": 0.6333, "step": 5883 }, { "epoch": 0.61, "grad_norm": 1.732023585322755, "learning_rate": 3.4606798503166994e-06, "loss": 0.6475, "step": 5884 }, { "epoch": 0.61, "grad_norm": 1.8978626563200731, "learning_rate": 3.4590782836192837e-06, "loss": 0.6542, "step": 5885 }, { "epoch": 0.61, "grad_norm": 1.869401160752364, "learning_rate": 3.45747689159413e-06, "loss": 0.5564, "step": 5886 }, { "epoch": 0.61, "grad_norm": 1.868999390580027, "learning_rate": 3.455875674422761e-06, "loss": 0.579, "step": 5887 }, { "epoch": 0.61, "grad_norm": 1.832886798253401, "learning_rate": 3.4542746322866842e-06, "loss": 0.4929, "step": 5888 }, { "epoch": 0.61, "grad_norm": 1.913784743810917, "learning_rate": 3.452673765367389e-06, "loss": 0.6379, "step": 5889 }, { "epoch": 0.61, "grad_norm": 1.9179201733844766, "learning_rate": 3.4510730738463417e-06, "loss": 0.6739, "step": 5890 }, { "epoch": 0.61, "grad_norm": 1.8110837161807765, "learning_rate": 3.4494725579049904e-06, "loss": 0.6973, "step": 5891 }, { "epoch": 0.61, "grad_norm": 1.987201446085457, "learning_rate": 3.4478722177247624e-06, "loss": 0.6087, "step": 5892 }, { "epoch": 0.61, "grad_norm": 1.9980930635109033, "learning_rate": 3.4462720534870673e-06, "loss": 0.8181, "step": 5893 }, { "epoch": 0.61, "grad_norm": 1.8706741387059407, "learning_rate": 3.4446720653732883e-06, "loss": 0.6902, "step": 5894 }, { "epoch": 0.61, "grad_norm": 1.9409228527860711, "learning_rate": 3.4430722535647966e-06, "loss": 0.6133, "step": 5895 }, { "epoch": 0.61, "grad_norm": 2.0029309963359085, "learning_rate": 3.4414726182429388e-06, "loss": 0.6602, "step": 5896 }, { "epoch": 0.61, "grad_norm": 1.8613225733512109, "learning_rate": 3.439873159589043e-06, "loss": 0.6484, "step": 5897 }, { "epoch": 0.61, "grad_norm": 2.332715989410047, "learning_rate": 3.438273877784417e-06, "loss": 0.6361, "step": 5898 }, { "epoch": 0.61, "grad_norm": 1.988217349346568, "learning_rate": 3.4366747730103486e-06, "loss": 0.5936, "step": 5899 }, { "epoch": 0.61, "grad_norm": 1.8721773260571348, "learning_rate": 3.435075845448105e-06, "loss": 0.5663, "step": 5900 }, { "epoch": 0.61, "grad_norm": 1.9144622101502546, "learning_rate": 3.4334770952789354e-06, "loss": 0.5372, "step": 5901 }, { "epoch": 0.61, "grad_norm": 2.1063634623324843, "learning_rate": 3.4318785226840646e-06, "loss": 0.5908, "step": 5902 }, { "epoch": 0.61, "grad_norm": 1.920747599443631, "learning_rate": 3.4302801278447028e-06, "loss": 0.6181, "step": 5903 }, { "epoch": 0.61, "grad_norm": 2.3644717939435353, "learning_rate": 3.4286819109420346e-06, "loss": 0.7202, "step": 5904 }, { "epoch": 0.61, "grad_norm": 1.9060094333028947, "learning_rate": 3.4270838721572277e-06, "loss": 0.6332, "step": 5905 }, { "epoch": 0.61, "grad_norm": 1.8361097469958723, "learning_rate": 3.4254860116714284e-06, "loss": 0.6645, "step": 5906 }, { "epoch": 0.61, "grad_norm": 1.9388690291273964, "learning_rate": 3.4238883296657656e-06, "loss": 0.5735, "step": 5907 }, { "epoch": 0.61, "grad_norm": 1.9303120106787002, "learning_rate": 3.4222908263213438e-06, "loss": 0.6181, "step": 5908 }, { "epoch": 0.61, "grad_norm": 1.8319157145588998, "learning_rate": 3.4206935018192496e-06, "loss": 0.5852, "step": 5909 }, { "epoch": 0.61, "grad_norm": 1.8890578866580727, "learning_rate": 3.4190963563405482e-06, "loss": 0.6554, "step": 5910 }, { "epoch": 0.61, "grad_norm": 1.719008543508964, "learning_rate": 3.4174993900662854e-06, "loss": 0.5783, "step": 5911 }, { "epoch": 0.61, "grad_norm": 1.9381651528278054, "learning_rate": 3.4159026031774873e-06, "loss": 0.6329, "step": 5912 }, { "epoch": 0.61, "grad_norm": 1.9071649235590062, "learning_rate": 3.4143059958551576e-06, "loss": 0.5583, "step": 5913 }, { "epoch": 0.61, "grad_norm": 2.6027346173908796, "learning_rate": 3.4127095682802823e-06, "loss": 0.6465, "step": 5914 }, { "epoch": 0.61, "grad_norm": 1.7520728181991414, "learning_rate": 3.4111133206338257e-06, "loss": 0.615, "step": 5915 }, { "epoch": 0.61, "grad_norm": 2.061567472873294, "learning_rate": 3.409517253096729e-06, "loss": 0.6207, "step": 5916 }, { "epoch": 0.62, "grad_norm": 2.017959712602565, "learning_rate": 3.407921365849917e-06, "loss": 0.6219, "step": 5917 }, { "epoch": 0.62, "grad_norm": 1.6541870573418043, "learning_rate": 3.406325659074293e-06, "loss": 0.5519, "step": 5918 }, { "epoch": 0.62, "grad_norm": 2.0052219645499934, "learning_rate": 3.404730132950739e-06, "loss": 0.6655, "step": 5919 }, { "epoch": 0.62, "grad_norm": 2.373349253904208, "learning_rate": 3.403134787660117e-06, "loss": 0.638, "step": 5920 }, { "epoch": 0.62, "grad_norm": 1.9857596614742279, "learning_rate": 3.4015396233832687e-06, "loss": 0.6129, "step": 5921 }, { "epoch": 0.62, "grad_norm": 1.9865545357725798, "learning_rate": 3.3999446403010156e-06, "loss": 0.6958, "step": 5922 }, { "epoch": 0.62, "grad_norm": 2.2225225785812492, "learning_rate": 3.398349838594159e-06, "loss": 0.6527, "step": 5923 }, { "epoch": 0.62, "grad_norm": 1.965865099296652, "learning_rate": 3.3967552184434753e-06, "loss": 0.6616, "step": 5924 }, { "epoch": 0.62, "grad_norm": 1.7599925725199548, "learning_rate": 3.395160780029726e-06, "loss": 0.6546, "step": 5925 }, { "epoch": 0.62, "grad_norm": 1.7892999068048223, "learning_rate": 3.393566523533649e-06, "loss": 0.5618, "step": 5926 }, { "epoch": 0.62, "grad_norm": 1.907038190708332, "learning_rate": 3.391972449135964e-06, "loss": 0.6594, "step": 5927 }, { "epoch": 0.62, "grad_norm": 2.0532475771298313, "learning_rate": 3.3903785570173665e-06, "loss": 0.7017, "step": 5928 }, { "epoch": 0.62, "grad_norm": 1.8848373537785805, "learning_rate": 3.388784847358534e-06, "loss": 0.636, "step": 5929 }, { "epoch": 0.62, "grad_norm": 1.710821951197328, "learning_rate": 3.387191320340125e-06, "loss": 0.6399, "step": 5930 }, { "epoch": 0.62, "grad_norm": 1.8487227564053192, "learning_rate": 3.3855979761427705e-06, "loss": 0.6817, "step": 5931 }, { "epoch": 0.62, "grad_norm": 1.907129570306386, "learning_rate": 3.384004814947087e-06, "loss": 0.6738, "step": 5932 }, { "epoch": 0.62, "grad_norm": 1.8995663612234321, "learning_rate": 3.382411836933669e-06, "loss": 0.5573, "step": 5933 }, { "epoch": 0.62, "grad_norm": 1.756986647365497, "learning_rate": 3.3808190422830887e-06, "loss": 0.6431, "step": 5934 }, { "epoch": 0.62, "grad_norm": 1.7715018375793272, "learning_rate": 3.379226431175899e-06, "loss": 0.7003, "step": 5935 }, { "epoch": 0.62, "grad_norm": 2.035648914953771, "learning_rate": 3.377634003792632e-06, "loss": 0.6059, "step": 5936 }, { "epoch": 0.62, "grad_norm": 2.014535610980337, "learning_rate": 3.3760417603137976e-06, "loss": 0.5789, "step": 5937 }, { "epoch": 0.62, "grad_norm": 1.7341296370744583, "learning_rate": 3.374449700919887e-06, "loss": 0.6037, "step": 5938 }, { "epoch": 0.62, "grad_norm": 1.9227588740393737, "learning_rate": 3.372857825791367e-06, "loss": 0.7159, "step": 5939 }, { "epoch": 0.62, "grad_norm": 1.8532934737084343, "learning_rate": 3.371266135108687e-06, "loss": 0.6038, "step": 5940 }, { "epoch": 0.62, "grad_norm": 1.8469207224534798, "learning_rate": 3.3696746290522737e-06, "loss": 0.6791, "step": 5941 }, { "epoch": 0.62, "grad_norm": 1.9161878551856997, "learning_rate": 3.368083307802535e-06, "loss": 0.5931, "step": 5942 }, { "epoch": 0.62, "grad_norm": 1.7666500679293722, "learning_rate": 3.3664921715398534e-06, "loss": 0.5605, "step": 5943 }, { "epoch": 0.62, "grad_norm": 1.9254176807282661, "learning_rate": 3.3649012204445953e-06, "loss": 0.564, "step": 5944 }, { "epoch": 0.62, "grad_norm": 1.9573810160395808, "learning_rate": 3.3633104546971052e-06, "loss": 0.6968, "step": 5945 }, { "epoch": 0.62, "grad_norm": 2.0044533425543785, "learning_rate": 3.3617198744777023e-06, "loss": 0.6491, "step": 5946 }, { "epoch": 0.62, "grad_norm": 1.763232630775274, "learning_rate": 3.3601294799666896e-06, "loss": 0.6697, "step": 5947 }, { "epoch": 0.62, "grad_norm": 2.098459423812884, "learning_rate": 3.3585392713443464e-06, "loss": 0.572, "step": 5948 }, { "epoch": 0.62, "grad_norm": 1.6914031236102578, "learning_rate": 3.356949248790934e-06, "loss": 0.5091, "step": 5949 }, { "epoch": 0.62, "grad_norm": 2.1716317317901366, "learning_rate": 3.3553594124866897e-06, "loss": 0.5757, "step": 5950 }, { "epoch": 0.62, "grad_norm": 2.0048322695335354, "learning_rate": 3.3537697626118286e-06, "loss": 0.5663, "step": 5951 }, { "epoch": 0.62, "grad_norm": 1.9550871662432008, "learning_rate": 3.3521802993465513e-06, "loss": 0.6769, "step": 5952 }, { "epoch": 0.62, "grad_norm": 1.94484331544079, "learning_rate": 3.350591022871027e-06, "loss": 0.6025, "step": 5953 }, { "epoch": 0.62, "grad_norm": 1.8226399499008687, "learning_rate": 3.349001933365411e-06, "loss": 0.6214, "step": 5954 }, { "epoch": 0.62, "grad_norm": 1.8242034051010882, "learning_rate": 3.3474130310098373e-06, "loss": 0.646, "step": 5955 }, { "epoch": 0.62, "grad_norm": 1.9622212853904062, "learning_rate": 3.345824315984415e-06, "loss": 0.6289, "step": 5956 }, { "epoch": 0.62, "grad_norm": 2.120458213494939, "learning_rate": 3.3442357884692354e-06, "loss": 0.6833, "step": 5957 }, { "epoch": 0.62, "grad_norm": 2.1252064549314236, "learning_rate": 3.3426474486443673e-06, "loss": 0.5385, "step": 5958 }, { "epoch": 0.62, "grad_norm": 1.9993862933069262, "learning_rate": 3.3410592966898565e-06, "loss": 0.5866, "step": 5959 }, { "epoch": 0.62, "grad_norm": 1.794889673681397, "learning_rate": 3.3394713327857325e-06, "loss": 0.5263, "step": 5960 }, { "epoch": 0.62, "grad_norm": 1.856636747820928, "learning_rate": 3.3378835571119953e-06, "loss": 0.5611, "step": 5961 }, { "epoch": 0.62, "grad_norm": 1.7722291953552272, "learning_rate": 3.3362959698486307e-06, "loss": 0.6446, "step": 5962 }, { "epoch": 0.62, "grad_norm": 2.0901179185866945, "learning_rate": 3.3347085711756012e-06, "loss": 0.5646, "step": 5963 }, { "epoch": 0.62, "grad_norm": 1.836967165172361, "learning_rate": 3.333121361272847e-06, "loss": 0.5881, "step": 5964 }, { "epoch": 0.62, "grad_norm": 2.102039726838115, "learning_rate": 3.331534340320287e-06, "loss": 0.644, "step": 5965 }, { "epoch": 0.62, "grad_norm": 1.9283561150549664, "learning_rate": 3.3299475084978195e-06, "loss": 0.67, "step": 5966 }, { "epoch": 0.62, "grad_norm": 1.9259529340587442, "learning_rate": 3.328360865985323e-06, "loss": 0.5941, "step": 5967 }, { "epoch": 0.62, "grad_norm": 1.943892691297661, "learning_rate": 3.3267744129626483e-06, "loss": 0.6427, "step": 5968 }, { "epoch": 0.62, "grad_norm": 1.9953597123623206, "learning_rate": 3.3251881496096313e-06, "loss": 0.6807, "step": 5969 }, { "epoch": 0.62, "grad_norm": 1.6739874388513096, "learning_rate": 3.3236020761060834e-06, "loss": 0.4788, "step": 5970 }, { "epoch": 0.62, "grad_norm": 2.0506706084869584, "learning_rate": 3.322016192631795e-06, "loss": 0.6735, "step": 5971 }, { "epoch": 0.62, "grad_norm": 1.9665235766949143, "learning_rate": 3.320430499366536e-06, "loss": 0.6661, "step": 5972 }, { "epoch": 0.62, "grad_norm": 1.7042912287859477, "learning_rate": 3.3188449964900527e-06, "loss": 0.5868, "step": 5973 }, { "epoch": 0.62, "grad_norm": 1.7429369415654616, "learning_rate": 3.3172596841820713e-06, "loss": 0.5986, "step": 5974 }, { "epoch": 0.62, "grad_norm": 1.846606375173926, "learning_rate": 3.315674562622297e-06, "loss": 0.604, "step": 5975 }, { "epoch": 0.62, "grad_norm": 1.8538542112748246, "learning_rate": 3.31408963199041e-06, "loss": 0.6191, "step": 5976 }, { "epoch": 0.62, "grad_norm": 1.9148060253706294, "learning_rate": 3.312504892466073e-06, "loss": 0.6505, "step": 5977 }, { "epoch": 0.62, "grad_norm": 1.78279362426924, "learning_rate": 3.310920344228925e-06, "loss": 0.6188, "step": 5978 }, { "epoch": 0.62, "grad_norm": 1.8717477405727647, "learning_rate": 3.3093359874585832e-06, "loss": 0.6894, "step": 5979 }, { "epoch": 0.62, "grad_norm": 1.749990881968654, "learning_rate": 3.3077518223346448e-06, "loss": 0.6087, "step": 5980 }, { "epoch": 0.62, "grad_norm": 1.7233031652705608, "learning_rate": 3.3061678490366824e-06, "loss": 0.649, "step": 5981 }, { "epoch": 0.62, "grad_norm": 1.962583380637211, "learning_rate": 3.3045840677442485e-06, "loss": 0.5858, "step": 5982 }, { "epoch": 0.62, "grad_norm": 2.1968870344761626, "learning_rate": 3.303000478636874e-06, "loss": 0.6908, "step": 5983 }, { "epoch": 0.62, "grad_norm": 1.9286378413854515, "learning_rate": 3.3014170818940677e-06, "loss": 0.5823, "step": 5984 }, { "epoch": 0.62, "grad_norm": 1.9232889386442449, "learning_rate": 3.2998338776953163e-06, "loss": 0.6119, "step": 5985 }, { "epoch": 0.62, "grad_norm": 1.8910875657751063, "learning_rate": 3.2982508662200864e-06, "loss": 0.6875, "step": 5986 }, { "epoch": 0.62, "grad_norm": 1.9224963141768185, "learning_rate": 3.2966680476478196e-06, "loss": 0.6215, "step": 5987 }, { "epoch": 0.62, "grad_norm": 2.2111799563581465, "learning_rate": 3.295085422157939e-06, "loss": 0.6311, "step": 5988 }, { "epoch": 0.62, "grad_norm": 1.7623188008375033, "learning_rate": 3.2935029899298444e-06, "loss": 0.5608, "step": 5989 }, { "epoch": 0.62, "grad_norm": 1.8634780884349817, "learning_rate": 3.291920751142912e-06, "loss": 0.669, "step": 5990 }, { "epoch": 0.62, "grad_norm": 2.0076300266293696, "learning_rate": 3.290338705976497e-06, "loss": 0.6168, "step": 5991 }, { "epoch": 0.62, "grad_norm": 1.9389766540870652, "learning_rate": 3.2887568546099346e-06, "loss": 0.6409, "step": 5992 }, { "epoch": 0.62, "grad_norm": 1.8669408346664602, "learning_rate": 3.287175197222537e-06, "loss": 0.6464, "step": 5993 }, { "epoch": 0.62, "grad_norm": 2.1106017656825755, "learning_rate": 3.2855937339935933e-06, "loss": 0.6207, "step": 5994 }, { "epoch": 0.62, "grad_norm": 1.8279681125103633, "learning_rate": 3.284012465102372e-06, "loss": 0.5878, "step": 5995 }, { "epoch": 0.62, "grad_norm": 2.096349156684369, "learning_rate": 3.282431390728118e-06, "loss": 0.6014, "step": 5996 }, { "epoch": 0.62, "grad_norm": 2.032892431253479, "learning_rate": 3.280850511050058e-06, "loss": 0.689, "step": 5997 }, { "epoch": 0.62, "grad_norm": 1.9785676746663783, "learning_rate": 3.279269826247389e-06, "loss": 0.4741, "step": 5998 }, { "epoch": 0.62, "grad_norm": 1.676119831126687, "learning_rate": 3.2776893364992936e-06, "loss": 0.5467, "step": 5999 }, { "epoch": 0.62, "grad_norm": 1.90052864684741, "learning_rate": 3.2761090419849286e-06, "loss": 0.5841, "step": 6000 }, { "epoch": 0.62, "grad_norm": 2.229316068199629, "learning_rate": 3.2745289428834294e-06, "loss": 0.5785, "step": 6001 }, { "epoch": 0.62, "grad_norm": 2.0934347201144146, "learning_rate": 3.2729490393739093e-06, "loss": 0.6735, "step": 6002 }, { "epoch": 0.62, "grad_norm": 2.048084478335236, "learning_rate": 3.2713693316354593e-06, "loss": 0.6202, "step": 6003 }, { "epoch": 0.62, "grad_norm": 1.9404776557417234, "learning_rate": 3.269789819847151e-06, "loss": 0.6798, "step": 6004 }, { "epoch": 0.62, "grad_norm": 1.9162170778231924, "learning_rate": 3.2682105041880264e-06, "loss": 0.5845, "step": 6005 }, { "epoch": 0.62, "grad_norm": 1.9545142603110053, "learning_rate": 3.2666313848371113e-06, "loss": 0.6539, "step": 6006 }, { "epoch": 0.62, "grad_norm": 1.794309367132827, "learning_rate": 3.265052461973409e-06, "loss": 0.5696, "step": 6007 }, { "epoch": 0.62, "grad_norm": 1.842487367019058, "learning_rate": 3.2634737357758994e-06, "loss": 0.6149, "step": 6008 }, { "epoch": 0.62, "grad_norm": 1.723064752719736, "learning_rate": 3.26189520642354e-06, "loss": 0.5282, "step": 6009 }, { "epoch": 0.62, "grad_norm": 2.096348196467705, "learning_rate": 3.2603168740952645e-06, "loss": 0.5349, "step": 6010 }, { "epoch": 0.62, "grad_norm": 2.0586495432214025, "learning_rate": 3.2587387389699895e-06, "loss": 0.6791, "step": 6011 }, { "epoch": 0.62, "grad_norm": 1.8244663325194614, "learning_rate": 3.257160801226601e-06, "loss": 0.6393, "step": 6012 }, { "epoch": 0.63, "grad_norm": 1.8902371345502247, "learning_rate": 3.255583061043971e-06, "loss": 0.4821, "step": 6013 }, { "epoch": 0.63, "grad_norm": 1.719480343071909, "learning_rate": 3.2540055186009428e-06, "loss": 0.5497, "step": 6014 }, { "epoch": 0.63, "grad_norm": 1.876300759582372, "learning_rate": 3.252428174076341e-06, "loss": 0.6434, "step": 6015 }, { "epoch": 0.63, "grad_norm": 2.3262050183986918, "learning_rate": 3.250851027648967e-06, "loss": 0.6501, "step": 6016 }, { "epoch": 0.63, "grad_norm": 1.8741006195400267, "learning_rate": 3.2492740794975985e-06, "loss": 0.5569, "step": 6017 }, { "epoch": 0.63, "grad_norm": 1.992043370643798, "learning_rate": 3.247697329800992e-06, "loss": 0.7077, "step": 6018 }, { "epoch": 0.63, "grad_norm": 2.0062231422855503, "learning_rate": 3.246120778737883e-06, "loss": 0.6345, "step": 6019 }, { "epoch": 0.63, "grad_norm": 1.8615543687360108, "learning_rate": 3.2445444264869783e-06, "loss": 0.6295, "step": 6020 }, { "epoch": 0.63, "grad_norm": 1.9316303752921364, "learning_rate": 3.2429682732269685e-06, "loss": 0.5939, "step": 6021 }, { "epoch": 0.63, "grad_norm": 1.9484002498233846, "learning_rate": 3.2413923191365203e-06, "loss": 0.6531, "step": 6022 }, { "epoch": 0.63, "grad_norm": 2.0450463816269737, "learning_rate": 3.239816564394276e-06, "loss": 0.5847, "step": 6023 }, { "epoch": 0.63, "grad_norm": 1.8031836096624272, "learning_rate": 3.2382410091788567e-06, "loss": 0.6577, "step": 6024 }, { "epoch": 0.63, "grad_norm": 2.2460149673113237, "learning_rate": 3.2366656536688614e-06, "loss": 0.5988, "step": 6025 }, { "epoch": 0.63, "grad_norm": 1.896275207069309, "learning_rate": 3.235090498042866e-06, "loss": 0.5827, "step": 6026 }, { "epoch": 0.63, "grad_norm": 1.9320872061025782, "learning_rate": 3.2335155424794205e-06, "loss": 0.6269, "step": 6027 }, { "epoch": 0.63, "grad_norm": 1.6624219058989238, "learning_rate": 3.2319407871570574e-06, "loss": 0.663, "step": 6028 }, { "epoch": 0.63, "grad_norm": 1.8285245189929658, "learning_rate": 3.2303662322542835e-06, "loss": 0.6115, "step": 6029 }, { "epoch": 0.63, "grad_norm": 1.9162669741798801, "learning_rate": 3.228791877949583e-06, "loss": 0.6262, "step": 6030 }, { "epoch": 0.63, "grad_norm": 1.9680777538685632, "learning_rate": 3.2272177244214198e-06, "loss": 0.5613, "step": 6031 }, { "epoch": 0.63, "grad_norm": 2.0801896158128046, "learning_rate": 3.2256437718482312e-06, "loss": 0.5686, "step": 6032 }, { "epoch": 0.63, "grad_norm": 2.217100063165203, "learning_rate": 3.2240700204084353e-06, "loss": 0.6403, "step": 6033 }, { "epoch": 0.63, "grad_norm": 1.8945485168220204, "learning_rate": 3.222496470280427e-06, "loss": 0.6851, "step": 6034 }, { "epoch": 0.63, "grad_norm": 1.8383948040993052, "learning_rate": 3.220923121642573e-06, "loss": 0.6221, "step": 6035 }, { "epoch": 0.63, "grad_norm": 1.9516341763000284, "learning_rate": 3.219349974673223e-06, "loss": 0.62, "step": 6036 }, { "epoch": 0.63, "grad_norm": 1.8320310813825704, "learning_rate": 3.217777029550703e-06, "loss": 0.6195, "step": 6037 }, { "epoch": 0.63, "grad_norm": 1.847472645041628, "learning_rate": 3.2162042864533154e-06, "loss": 0.56, "step": 6038 }, { "epoch": 0.63, "grad_norm": 1.978049182310532, "learning_rate": 3.214631745559339e-06, "loss": 0.6112, "step": 6039 }, { "epoch": 0.63, "grad_norm": 1.8176012495141787, "learning_rate": 3.2130594070470307e-06, "loss": 0.5219, "step": 6040 }, { "epoch": 0.63, "grad_norm": 1.8666199364201022, "learning_rate": 3.2114872710946243e-06, "loss": 0.6095, "step": 6041 }, { "epoch": 0.63, "grad_norm": 1.969500493585429, "learning_rate": 3.2099153378803294e-06, "loss": 0.6168, "step": 6042 }, { "epoch": 0.63, "grad_norm": 1.7974069563510562, "learning_rate": 3.2083436075823353e-06, "loss": 0.6153, "step": 6043 }, { "epoch": 0.63, "grad_norm": 1.8639577027509828, "learning_rate": 3.206772080378804e-06, "loss": 0.612, "step": 6044 }, { "epoch": 0.63, "grad_norm": 1.9270717940972086, "learning_rate": 3.205200756447878e-06, "loss": 0.6043, "step": 6045 }, { "epoch": 0.63, "grad_norm": 1.7652996412307986, "learning_rate": 3.2036296359676777e-06, "loss": 0.6276, "step": 6046 }, { "epoch": 0.63, "grad_norm": 1.995920741483894, "learning_rate": 3.2020587191162956e-06, "loss": 0.482, "step": 6047 }, { "epoch": 0.63, "grad_norm": 1.9227514888406252, "learning_rate": 3.2004880060718072e-06, "loss": 0.6573, "step": 6048 }, { "epoch": 0.63, "grad_norm": 1.7486882907902446, "learning_rate": 3.1989174970122594e-06, "loss": 0.6222, "step": 6049 }, { "epoch": 0.63, "grad_norm": 2.138887268029901, "learning_rate": 3.197347192115679e-06, "loss": 0.5945, "step": 6050 }, { "epoch": 0.63, "grad_norm": 2.109380295021703, "learning_rate": 3.1957770915600696e-06, "loss": 0.6232, "step": 6051 }, { "epoch": 0.63, "grad_norm": 1.7196849956781948, "learning_rate": 3.19420719552341e-06, "loss": 0.4851, "step": 6052 }, { "epoch": 0.63, "grad_norm": 1.917722969325608, "learning_rate": 3.1926375041836573e-06, "loss": 0.5262, "step": 6053 }, { "epoch": 0.63, "grad_norm": 1.8732009281586104, "learning_rate": 3.1910680177187453e-06, "loss": 0.6462, "step": 6054 }, { "epoch": 0.63, "grad_norm": 1.9473297187009115, "learning_rate": 3.189498736306584e-06, "loss": 0.6695, "step": 6055 }, { "epoch": 0.63, "grad_norm": 1.7892239645860206, "learning_rate": 3.187929660125063e-06, "loss": 0.5702, "step": 6056 }, { "epoch": 0.63, "grad_norm": 1.888941220834544, "learning_rate": 3.186360789352041e-06, "loss": 0.6687, "step": 6057 }, { "epoch": 0.63, "grad_norm": 2.0322577338706784, "learning_rate": 3.1847921241653614e-06, "loss": 0.6612, "step": 6058 }, { "epoch": 0.63, "grad_norm": 1.9369396393065463, "learning_rate": 3.18322366474284e-06, "loss": 0.6982, "step": 6059 }, { "epoch": 0.63, "grad_norm": 2.277936713753932, "learning_rate": 3.181655411262272e-06, "loss": 0.5452, "step": 6060 }, { "epoch": 0.63, "grad_norm": 1.9074840722455486, "learning_rate": 3.1800873639014276e-06, "loss": 0.4769, "step": 6061 }, { "epoch": 0.63, "grad_norm": 1.9101919714628626, "learning_rate": 3.1785195228380527e-06, "loss": 0.6405, "step": 6062 }, { "epoch": 0.63, "grad_norm": 1.9123614225065813, "learning_rate": 3.176951888249875e-06, "loss": 0.6504, "step": 6063 }, { "epoch": 0.63, "grad_norm": 2.2213263420816998, "learning_rate": 3.1753844603145894e-06, "loss": 0.6375, "step": 6064 }, { "epoch": 0.63, "grad_norm": 1.9608161818046423, "learning_rate": 3.1738172392098752e-06, "loss": 0.5604, "step": 6065 }, { "epoch": 0.63, "grad_norm": 1.995234308228533, "learning_rate": 3.172250225113386e-06, "loss": 0.6577, "step": 6066 }, { "epoch": 0.63, "grad_norm": 2.0377980498966806, "learning_rate": 3.170683418202751e-06, "loss": 0.6402, "step": 6067 }, { "epoch": 0.63, "grad_norm": 1.8853013788369504, "learning_rate": 3.1691168186555778e-06, "loss": 0.5516, "step": 6068 }, { "epoch": 0.63, "grad_norm": 1.71003810845978, "learning_rate": 3.1675504266494493e-06, "loss": 0.6465, "step": 6069 }, { "epoch": 0.63, "grad_norm": 1.6686351788488085, "learning_rate": 3.1659842423619237e-06, "loss": 0.5669, "step": 6070 }, { "epoch": 0.63, "grad_norm": 2.061531475156852, "learning_rate": 3.1644182659705403e-06, "loss": 0.5795, "step": 6071 }, { "epoch": 0.63, "grad_norm": 1.762438560341012, "learning_rate": 3.162852497652807e-06, "loss": 0.6474, "step": 6072 }, { "epoch": 0.63, "grad_norm": 1.9593057376669718, "learning_rate": 3.161286937586214e-06, "loss": 0.6712, "step": 6073 }, { "epoch": 0.63, "grad_norm": 2.0318391689656377, "learning_rate": 3.159721585948228e-06, "loss": 0.6308, "step": 6074 }, { "epoch": 0.63, "grad_norm": 1.931145018821637, "learning_rate": 3.158156442916288e-06, "loss": 0.7147, "step": 6075 }, { "epoch": 0.63, "grad_norm": 2.337635038951802, "learning_rate": 3.156591508667814e-06, "loss": 0.734, "step": 6076 }, { "epoch": 0.63, "grad_norm": 1.9031520641680848, "learning_rate": 3.1550267833801993e-06, "loss": 0.6095, "step": 6077 }, { "epoch": 0.63, "grad_norm": 1.7487631178062244, "learning_rate": 3.1534622672308165e-06, "loss": 0.6306, "step": 6078 }, { "epoch": 0.63, "grad_norm": 1.9160299182081961, "learning_rate": 3.151897960397009e-06, "loss": 0.6177, "step": 6079 }, { "epoch": 0.63, "grad_norm": 1.9860175732897307, "learning_rate": 3.150333863056102e-06, "loss": 0.7167, "step": 6080 }, { "epoch": 0.63, "grad_norm": 1.7346053348798447, "learning_rate": 3.148769975385394e-06, "loss": 0.5572, "step": 6081 }, { "epoch": 0.63, "grad_norm": 1.7993775431735983, "learning_rate": 3.147206297562162e-06, "loss": 0.576, "step": 6082 }, { "epoch": 0.63, "grad_norm": 1.8139002403350746, "learning_rate": 3.1456428297636555e-06, "loss": 0.5244, "step": 6083 }, { "epoch": 0.63, "grad_norm": 1.6929454845149765, "learning_rate": 3.1440795721671036e-06, "loss": 0.7666, "step": 6084 }, { "epoch": 0.63, "grad_norm": 1.7528536995455826, "learning_rate": 3.1425165249497118e-06, "loss": 0.599, "step": 6085 }, { "epoch": 0.63, "grad_norm": 2.0191237440247836, "learning_rate": 3.140953688288658e-06, "loss": 0.6796, "step": 6086 }, { "epoch": 0.63, "grad_norm": 1.9680374968614391, "learning_rate": 3.1393910623611007e-06, "loss": 0.6019, "step": 6087 }, { "epoch": 0.63, "grad_norm": 1.9442953822047284, "learning_rate": 3.137828647344171e-06, "loss": 0.6288, "step": 6088 }, { "epoch": 0.63, "grad_norm": 1.981265816746576, "learning_rate": 3.1362664434149782e-06, "loss": 0.5979, "step": 6089 }, { "epoch": 0.63, "grad_norm": 1.726191123392917, "learning_rate": 3.134704450750607e-06, "loss": 0.6121, "step": 6090 }, { "epoch": 0.63, "grad_norm": 1.6721715460247808, "learning_rate": 3.133142669528118e-06, "loss": 0.5627, "step": 6091 }, { "epoch": 0.63, "grad_norm": 1.9570130159305819, "learning_rate": 3.1315810999245483e-06, "loss": 0.6396, "step": 6092 }, { "epoch": 0.63, "grad_norm": 1.995162107251734, "learning_rate": 3.1300197421169125e-06, "loss": 0.7216, "step": 6093 }, { "epoch": 0.63, "grad_norm": 2.1451911595083795, "learning_rate": 3.1284585962821957e-06, "loss": 0.6261, "step": 6094 }, { "epoch": 0.63, "grad_norm": 1.901859286098706, "learning_rate": 3.126897662597364e-06, "loss": 0.677, "step": 6095 }, { "epoch": 0.63, "grad_norm": 1.889639426022135, "learning_rate": 3.1253369412393584e-06, "loss": 0.6535, "step": 6096 }, { "epoch": 0.63, "grad_norm": 1.8686229215951038, "learning_rate": 3.1237764323850964e-06, "loss": 0.6528, "step": 6097 }, { "epoch": 0.63, "grad_norm": 2.311920832241536, "learning_rate": 3.12221613621147e-06, "loss": 0.6193, "step": 6098 }, { "epoch": 0.63, "grad_norm": 1.9239392502057686, "learning_rate": 3.1206560528953467e-06, "loss": 0.6082, "step": 6099 }, { "epoch": 0.63, "grad_norm": 1.8118553614299049, "learning_rate": 3.1190961826135744e-06, "loss": 0.635, "step": 6100 }, { "epoch": 0.63, "grad_norm": 1.7947350380331701, "learning_rate": 3.1175365255429685e-06, "loss": 0.572, "step": 6101 }, { "epoch": 0.63, "grad_norm": 1.840175405623914, "learning_rate": 3.115977081860327e-06, "loss": 0.56, "step": 6102 }, { "epoch": 0.63, "grad_norm": 2.014363206044062, "learning_rate": 3.1144178517424217e-06, "loss": 0.5143, "step": 6103 }, { "epoch": 0.63, "grad_norm": 1.692597253312618, "learning_rate": 3.1128588353660006e-06, "loss": 0.5608, "step": 6104 }, { "epoch": 0.63, "grad_norm": 2.2750803784856393, "learning_rate": 3.111300032907787e-06, "loss": 0.5791, "step": 6105 }, { "epoch": 0.63, "grad_norm": 1.7369305752415842, "learning_rate": 3.1097414445444796e-06, "loss": 0.5949, "step": 6106 }, { "epoch": 0.63, "grad_norm": 2.220660084433447, "learning_rate": 3.1081830704527535e-06, "loss": 0.6184, "step": 6107 }, { "epoch": 0.63, "grad_norm": 1.8162843137045013, "learning_rate": 3.1066249108092616e-06, "loss": 0.6274, "step": 6108 }, { "epoch": 0.64, "grad_norm": 1.7452644073711159, "learning_rate": 3.1050669657906257e-06, "loss": 0.5825, "step": 6109 }, { "epoch": 0.64, "grad_norm": 2.109057932821261, "learning_rate": 3.10350923557345e-06, "loss": 0.6562, "step": 6110 }, { "epoch": 0.64, "grad_norm": 2.07479718970697, "learning_rate": 3.101951720334312e-06, "loss": 0.5884, "step": 6111 }, { "epoch": 0.64, "grad_norm": 1.9154797157879695, "learning_rate": 3.1003944202497655e-06, "loss": 0.5808, "step": 6112 }, { "epoch": 0.64, "grad_norm": 2.0766652476271794, "learning_rate": 3.0988373354963387e-06, "loss": 0.6579, "step": 6113 }, { "epoch": 0.64, "grad_norm": 2.0534225279153886, "learning_rate": 3.097280466250536e-06, "loss": 0.6693, "step": 6114 }, { "epoch": 0.64, "grad_norm": 1.9602854542078891, "learning_rate": 3.0957238126888384e-06, "loss": 0.5914, "step": 6115 }, { "epoch": 0.64, "grad_norm": 1.8801721093726407, "learning_rate": 3.0941673749877e-06, "loss": 0.6304, "step": 6116 }, { "epoch": 0.64, "grad_norm": 2.0461787180132682, "learning_rate": 3.0926111533235526e-06, "loss": 0.6503, "step": 6117 }, { "epoch": 0.64, "grad_norm": 1.9878343487415226, "learning_rate": 3.091055147872802e-06, "loss": 0.7512, "step": 6118 }, { "epoch": 0.64, "grad_norm": 2.1283625476528787, "learning_rate": 3.0894993588118318e-06, "loss": 0.5694, "step": 6119 }, { "epoch": 0.64, "grad_norm": 1.8045352095592917, "learning_rate": 3.087943786316999e-06, "loss": 0.6016, "step": 6120 }, { "epoch": 0.64, "grad_norm": 1.8931603547365077, "learning_rate": 3.0863884305646364e-06, "loss": 0.5812, "step": 6121 }, { "epoch": 0.64, "grad_norm": 1.804868568230148, "learning_rate": 3.0848332917310532e-06, "loss": 0.5961, "step": 6122 }, { "epoch": 0.64, "grad_norm": 1.9803781360337822, "learning_rate": 3.0832783699925307e-06, "loss": 0.7075, "step": 6123 }, { "epoch": 0.64, "grad_norm": 1.8810190808368576, "learning_rate": 3.081723665525331e-06, "loss": 0.6251, "step": 6124 }, { "epoch": 0.64, "grad_norm": 1.8230914084353704, "learning_rate": 3.0801691785056863e-06, "loss": 0.6425, "step": 6125 }, { "epoch": 0.64, "grad_norm": 1.801356178216089, "learning_rate": 3.0786149091098087e-06, "loss": 0.5365, "step": 6126 }, { "epoch": 0.64, "grad_norm": 1.8914863670162068, "learning_rate": 3.0770608575138825e-06, "loss": 0.6569, "step": 6127 }, { "epoch": 0.64, "grad_norm": 1.8896895490041579, "learning_rate": 3.075507023894069e-06, "loss": 0.6015, "step": 6128 }, { "epoch": 0.64, "grad_norm": 2.1105225623734287, "learning_rate": 3.0739534084265032e-06, "loss": 0.6685, "step": 6129 }, { "epoch": 0.64, "grad_norm": 1.9830540304838769, "learning_rate": 3.072400011287299e-06, "loss": 0.6105, "step": 6130 }, { "epoch": 0.64, "grad_norm": 2.1810675903640586, "learning_rate": 3.0708468326525376e-06, "loss": 0.6237, "step": 6131 }, { "epoch": 0.64, "grad_norm": 1.8404334794364534, "learning_rate": 3.069293872698284e-06, "loss": 0.6088, "step": 6132 }, { "epoch": 0.64, "grad_norm": 1.772357721649005, "learning_rate": 3.0677411316005744e-06, "loss": 0.6274, "step": 6133 }, { "epoch": 0.64, "grad_norm": 1.8695683122224707, "learning_rate": 3.066188609535421e-06, "loss": 0.7137, "step": 6134 }, { "epoch": 0.64, "grad_norm": 1.7470798861679386, "learning_rate": 3.0646363066788114e-06, "loss": 0.5847, "step": 6135 }, { "epoch": 0.64, "grad_norm": 1.8760125269130379, "learning_rate": 3.063084223206708e-06, "loss": 0.6395, "step": 6136 }, { "epoch": 0.64, "grad_norm": 1.879803637103201, "learning_rate": 3.0615323592950495e-06, "loss": 0.5537, "step": 6137 }, { "epoch": 0.64, "grad_norm": 2.02465353465098, "learning_rate": 3.0599807151197446e-06, "loss": 0.6646, "step": 6138 }, { "epoch": 0.64, "grad_norm": 2.013051093932397, "learning_rate": 3.0584292908566836e-06, "loss": 0.601, "step": 6139 }, { "epoch": 0.64, "grad_norm": 1.8921168905387726, "learning_rate": 3.056878086681729e-06, "loss": 0.5706, "step": 6140 }, { "epoch": 0.64, "grad_norm": 1.6381413943978955, "learning_rate": 3.055327102770719e-06, "loss": 0.5007, "step": 6141 }, { "epoch": 0.64, "grad_norm": 1.9367879723399168, "learning_rate": 3.053776339299467e-06, "loss": 0.5682, "step": 6142 }, { "epoch": 0.64, "grad_norm": 1.8429248685663757, "learning_rate": 3.0522257964437586e-06, "loss": 0.5974, "step": 6143 }, { "epoch": 0.64, "grad_norm": 1.9168065728136365, "learning_rate": 3.050675474379361e-06, "loss": 0.5974, "step": 6144 }, { "epoch": 0.64, "grad_norm": 1.749866080818118, "learning_rate": 3.0491253732820063e-06, "loss": 0.6956, "step": 6145 }, { "epoch": 0.64, "grad_norm": 1.9739705708202513, "learning_rate": 3.0475754933274106e-06, "loss": 0.6371, "step": 6146 }, { "epoch": 0.64, "grad_norm": 1.7874894202016325, "learning_rate": 3.0460258346912615e-06, "loss": 0.6762, "step": 6147 }, { "epoch": 0.64, "grad_norm": 1.9124508607094237, "learning_rate": 3.044476397549221e-06, "loss": 0.6693, "step": 6148 }, { "epoch": 0.64, "grad_norm": 1.7300970486859781, "learning_rate": 3.042927182076927e-06, "loss": 0.5399, "step": 6149 }, { "epoch": 0.64, "grad_norm": 1.9977396120353856, "learning_rate": 3.0413781884499916e-06, "loss": 0.6662, "step": 6150 }, { "epoch": 0.64, "grad_norm": 1.7582271936489247, "learning_rate": 3.0398294168440023e-06, "loss": 0.6091, "step": 6151 }, { "epoch": 0.64, "grad_norm": 2.0479440125108272, "learning_rate": 3.0382808674345228e-06, "loss": 0.6703, "step": 6152 }, { "epoch": 0.64, "grad_norm": 1.9526668255453477, "learning_rate": 3.036732540397087e-06, "loss": 0.527, "step": 6153 }, { "epoch": 0.64, "grad_norm": 1.7155652955499163, "learning_rate": 3.035184435907208e-06, "loss": 0.5533, "step": 6154 }, { "epoch": 0.64, "grad_norm": 1.9812973705820782, "learning_rate": 3.0336365541403723e-06, "loss": 0.5941, "step": 6155 }, { "epoch": 0.64, "grad_norm": 1.8769962170542873, "learning_rate": 3.0320888952720414e-06, "loss": 0.596, "step": 6156 }, { "epoch": 0.64, "grad_norm": 1.923258773106532, "learning_rate": 3.0305414594776505e-06, "loss": 0.6878, "step": 6157 }, { "epoch": 0.64, "grad_norm": 2.3812002253021145, "learning_rate": 3.0289942469326106e-06, "loss": 0.5918, "step": 6158 }, { "epoch": 0.64, "grad_norm": 1.7311215954566284, "learning_rate": 3.0274472578123095e-06, "loss": 0.5567, "step": 6159 }, { "epoch": 0.64, "grad_norm": 1.8309946265303754, "learning_rate": 3.0259004922921033e-06, "loss": 0.5843, "step": 6160 }, { "epoch": 0.64, "grad_norm": 1.8394554565201098, "learning_rate": 3.0243539505473275e-06, "loss": 0.529, "step": 6161 }, { "epoch": 0.64, "grad_norm": 1.8519372361597473, "learning_rate": 3.0228076327532925e-06, "loss": 0.5625, "step": 6162 }, { "epoch": 0.64, "grad_norm": 1.8204661064383223, "learning_rate": 3.021261539085282e-06, "loss": 0.5559, "step": 6163 }, { "epoch": 0.64, "grad_norm": 1.782344163712384, "learning_rate": 3.019715669718554e-06, "loss": 0.604, "step": 6164 }, { "epoch": 0.64, "grad_norm": 1.6640325517922043, "learning_rate": 3.018170024828343e-06, "loss": 0.5919, "step": 6165 }, { "epoch": 0.64, "grad_norm": 2.037145171502773, "learning_rate": 3.016624604589855e-06, "loss": 0.6316, "step": 6166 }, { "epoch": 0.64, "grad_norm": 1.6769480920552722, "learning_rate": 3.0150794091782753e-06, "loss": 0.6897, "step": 6167 }, { "epoch": 0.64, "grad_norm": 2.0464765956664976, "learning_rate": 3.013534438768756e-06, "loss": 0.63, "step": 6168 }, { "epoch": 0.64, "grad_norm": 1.916989468935177, "learning_rate": 3.0119896935364305e-06, "loss": 0.5742, "step": 6169 }, { "epoch": 0.64, "grad_norm": 1.9472328083428492, "learning_rate": 3.010445173656405e-06, "loss": 0.6717, "step": 6170 }, { "epoch": 0.64, "grad_norm": 1.8992859253335288, "learning_rate": 3.0089008793037587e-06, "loss": 0.6509, "step": 6171 }, { "epoch": 0.64, "grad_norm": 1.916102090617852, "learning_rate": 3.0073568106535465e-06, "loss": 0.6445, "step": 6172 }, { "epoch": 0.64, "grad_norm": 1.9064952084707214, "learning_rate": 3.005812967880798e-06, "loss": 0.6296, "step": 6173 }, { "epoch": 0.64, "grad_norm": 1.807783199230649, "learning_rate": 3.004269351160518e-06, "loss": 0.6381, "step": 6174 }, { "epoch": 0.64, "grad_norm": 1.9174580476574532, "learning_rate": 3.00272596066768e-06, "loss": 0.6323, "step": 6175 }, { "epoch": 0.64, "grad_norm": 1.9492380466914858, "learning_rate": 3.001182796577239e-06, "loss": 0.6298, "step": 6176 }, { "epoch": 0.64, "grad_norm": 1.6924745865542088, "learning_rate": 2.9996398590641203e-06, "loss": 0.5399, "step": 6177 }, { "epoch": 0.64, "grad_norm": 2.033275499357687, "learning_rate": 2.998097148303225e-06, "loss": 0.7353, "step": 6178 }, { "epoch": 0.64, "grad_norm": 1.9026486542697536, "learning_rate": 2.9965546644694287e-06, "loss": 0.5794, "step": 6179 }, { "epoch": 0.64, "grad_norm": 2.0228080110290105, "learning_rate": 2.995012407737581e-06, "loss": 0.6467, "step": 6180 }, { "epoch": 0.64, "grad_norm": 1.7861014751773872, "learning_rate": 2.993470378282505e-06, "loss": 0.6066, "step": 6181 }, { "epoch": 0.64, "grad_norm": 1.839263166878586, "learning_rate": 2.9919285762789983e-06, "loss": 0.5448, "step": 6182 }, { "epoch": 0.64, "grad_norm": 1.7072274410368804, "learning_rate": 2.990387001901834e-06, "loss": 0.5578, "step": 6183 }, { "epoch": 0.64, "grad_norm": 1.6431673164569158, "learning_rate": 2.988845655325756e-06, "loss": 0.6351, "step": 6184 }, { "epoch": 0.64, "grad_norm": 1.9010572335147635, "learning_rate": 2.987304536725486e-06, "loss": 0.6327, "step": 6185 }, { "epoch": 0.64, "grad_norm": 1.6632693290336715, "learning_rate": 2.9857636462757193e-06, "loss": 0.5914, "step": 6186 }, { "epoch": 0.64, "grad_norm": 2.1089846908801015, "learning_rate": 2.984222984151124e-06, "loss": 0.6263, "step": 6187 }, { "epoch": 0.64, "grad_norm": 1.817502491313046, "learning_rate": 2.9826825505263427e-06, "loss": 0.6505, "step": 6188 }, { "epoch": 0.64, "grad_norm": 1.8606164928292765, "learning_rate": 2.981142345575994e-06, "loss": 0.6494, "step": 6189 }, { "epoch": 0.64, "grad_norm": 1.8351620719634854, "learning_rate": 2.979602369474667e-06, "loss": 0.522, "step": 6190 }, { "epoch": 0.64, "grad_norm": 1.6703557627008776, "learning_rate": 2.9780626223969256e-06, "loss": 0.5111, "step": 6191 }, { "epoch": 0.64, "grad_norm": 1.9164204387222075, "learning_rate": 2.976523104517312e-06, "loss": 0.6429, "step": 6192 }, { "epoch": 0.64, "grad_norm": 1.9499476254969241, "learning_rate": 2.9749838160103372e-06, "loss": 0.6298, "step": 6193 }, { "epoch": 0.64, "grad_norm": 2.0213576043893964, "learning_rate": 2.9734447570504898e-06, "loss": 0.6417, "step": 6194 }, { "epoch": 0.64, "grad_norm": 1.8219746597887712, "learning_rate": 2.97190592781223e-06, "loss": 0.6355, "step": 6195 }, { "epoch": 0.64, "grad_norm": 1.9916439598009843, "learning_rate": 2.9703673284699945e-06, "loss": 0.7587, "step": 6196 }, { "epoch": 0.64, "grad_norm": 1.8151195802546238, "learning_rate": 2.9688289591981887e-06, "loss": 0.6119, "step": 6197 }, { "epoch": 0.64, "grad_norm": 1.9629854400858462, "learning_rate": 2.9672908201711986e-06, "loss": 0.6639, "step": 6198 }, { "epoch": 0.64, "grad_norm": 2.0632728967170935, "learning_rate": 2.96575291156338e-06, "loss": 0.6161, "step": 6199 }, { "epoch": 0.64, "grad_norm": 2.0544901998541976, "learning_rate": 2.9642152335490633e-06, "loss": 0.5926, "step": 6200 }, { "epoch": 0.64, "grad_norm": 1.8887067530096628, "learning_rate": 2.9626777863025535e-06, "loss": 0.6645, "step": 6201 }, { "epoch": 0.64, "grad_norm": 1.9767964985943947, "learning_rate": 2.961140569998129e-06, "loss": 0.5331, "step": 6202 }, { "epoch": 0.64, "grad_norm": 1.9912020438130313, "learning_rate": 2.959603584810041e-06, "loss": 0.595, "step": 6203 }, { "epoch": 0.64, "grad_norm": 1.8860676490378074, "learning_rate": 2.9580668309125203e-06, "loss": 0.6943, "step": 6204 }, { "epoch": 0.65, "grad_norm": 2.2330331418419878, "learning_rate": 2.95653030847976e-06, "loss": 0.6634, "step": 6205 }, { "epoch": 0.65, "grad_norm": 2.081180271310904, "learning_rate": 2.954994017685937e-06, "loss": 0.5837, "step": 6206 }, { "epoch": 0.65, "grad_norm": 1.8421768186562804, "learning_rate": 2.9534579587051976e-06, "loss": 0.6476, "step": 6207 }, { "epoch": 0.65, "grad_norm": 1.8404952869659887, "learning_rate": 2.9519221317116644e-06, "loss": 0.6179, "step": 6208 }, { "epoch": 0.65, "grad_norm": 1.8764757322689638, "learning_rate": 2.9503865368794303e-06, "loss": 0.5937, "step": 6209 }, { "epoch": 0.65, "grad_norm": 2.0970231465593434, "learning_rate": 2.948851174382565e-06, "loss": 0.6625, "step": 6210 }, { "epoch": 0.65, "grad_norm": 2.198985907859825, "learning_rate": 2.947316044395112e-06, "loss": 0.7062, "step": 6211 }, { "epoch": 0.65, "grad_norm": 1.9215926364945755, "learning_rate": 2.9457811470910837e-06, "loss": 0.6313, "step": 6212 }, { "epoch": 0.65, "grad_norm": 1.8199210140099955, "learning_rate": 2.944246482644471e-06, "loss": 0.583, "step": 6213 }, { "epoch": 0.65, "grad_norm": 2.1819976857706016, "learning_rate": 2.9427120512292368e-06, "loss": 0.7136, "step": 6214 }, { "epoch": 0.65, "grad_norm": 2.0989651592874425, "learning_rate": 2.941177853019318e-06, "loss": 0.6884, "step": 6215 }, { "epoch": 0.65, "grad_norm": 1.8185220766754597, "learning_rate": 2.9396438881886234e-06, "loss": 0.6199, "step": 6216 }, { "epoch": 0.65, "grad_norm": 1.8386182832764755, "learning_rate": 2.9381101569110393e-06, "loss": 0.5703, "step": 6217 }, { "epoch": 0.65, "grad_norm": 1.7960263200181878, "learning_rate": 2.936576659360421e-06, "loss": 0.5296, "step": 6218 }, { "epoch": 0.65, "grad_norm": 2.0704183498446374, "learning_rate": 2.9350433957105995e-06, "loss": 0.6312, "step": 6219 }, { "epoch": 0.65, "grad_norm": 1.7147161698387252, "learning_rate": 2.933510366135378e-06, "loss": 0.5166, "step": 6220 }, { "epoch": 0.65, "grad_norm": 2.0667639388096304, "learning_rate": 2.9319775708085364e-06, "loss": 0.6563, "step": 6221 }, { "epoch": 0.65, "grad_norm": 1.8954356153973286, "learning_rate": 2.930445009903824e-06, "loss": 0.6817, "step": 6222 }, { "epoch": 0.65, "grad_norm": 2.1322815169531784, "learning_rate": 2.9289126835949657e-06, "loss": 0.6872, "step": 6223 }, { "epoch": 0.65, "grad_norm": 1.9819330643264859, "learning_rate": 2.9273805920556586e-06, "loss": 0.6843, "step": 6224 }, { "epoch": 0.65, "grad_norm": 2.065487847700474, "learning_rate": 2.9258487354595754e-06, "loss": 0.723, "step": 6225 }, { "epoch": 0.65, "grad_norm": 1.928418370629797, "learning_rate": 2.9243171139803617e-06, "loss": 0.6875, "step": 6226 }, { "epoch": 0.65, "grad_norm": 1.9522013519745338, "learning_rate": 2.9227857277916325e-06, "loss": 0.655, "step": 6227 }, { "epoch": 0.65, "grad_norm": 2.082709028367856, "learning_rate": 2.9212545770669814e-06, "loss": 0.6324, "step": 6228 }, { "epoch": 0.65, "grad_norm": 2.057952654711287, "learning_rate": 2.919723661979972e-06, "loss": 0.5704, "step": 6229 }, { "epoch": 0.65, "grad_norm": 1.9576733185893804, "learning_rate": 2.918192982704143e-06, "loss": 0.6425, "step": 6230 }, { "epoch": 0.65, "grad_norm": 1.6660456041502691, "learning_rate": 2.9166625394130066e-06, "loss": 0.5195, "step": 6231 }, { "epoch": 0.65, "grad_norm": 1.9031996084090193, "learning_rate": 2.9151323322800433e-06, "loss": 0.7252, "step": 6232 }, { "epoch": 0.65, "grad_norm": 1.933943035659668, "learning_rate": 2.913602361478716e-06, "loss": 0.5009, "step": 6233 }, { "epoch": 0.65, "grad_norm": 2.235638647690862, "learning_rate": 2.912072627182453e-06, "loss": 0.6169, "step": 6234 }, { "epoch": 0.65, "grad_norm": 1.7939098430985856, "learning_rate": 2.910543129564658e-06, "loss": 0.6096, "step": 6235 }, { "epoch": 0.65, "grad_norm": 1.9999371977297142, "learning_rate": 2.9090138687987075e-06, "loss": 0.626, "step": 6236 }, { "epoch": 0.65, "grad_norm": 1.7553733344385136, "learning_rate": 2.9074848450579545e-06, "loss": 0.6289, "step": 6237 }, { "epoch": 0.65, "grad_norm": 2.013563698558594, "learning_rate": 2.9059560585157197e-06, "loss": 0.6329, "step": 6238 }, { "epoch": 0.65, "grad_norm": 1.9821038414510794, "learning_rate": 2.9044275093453034e-06, "loss": 0.5404, "step": 6239 }, { "epoch": 0.65, "grad_norm": 1.880071424325999, "learning_rate": 2.9028991977199705e-06, "loss": 0.6771, "step": 6240 }, { "epoch": 0.65, "grad_norm": 1.8332617137796834, "learning_rate": 2.9013711238129693e-06, "loss": 0.6157, "step": 6241 }, { "epoch": 0.65, "grad_norm": 1.9401173115427022, "learning_rate": 2.899843287797513e-06, "loss": 0.645, "step": 6242 }, { "epoch": 0.65, "grad_norm": 2.0386914173802833, "learning_rate": 2.8983156898467885e-06, "loss": 0.6974, "step": 6243 }, { "epoch": 0.65, "grad_norm": 1.6509330084534442, "learning_rate": 2.896788330133962e-06, "loss": 0.5447, "step": 6244 }, { "epoch": 0.65, "grad_norm": 1.94624189730017, "learning_rate": 2.8952612088321636e-06, "loss": 0.6139, "step": 6245 }, { "epoch": 0.65, "grad_norm": 1.9680166782133846, "learning_rate": 2.893734326114506e-06, "loss": 0.653, "step": 6246 }, { "epoch": 0.65, "grad_norm": 1.9087806201292232, "learning_rate": 2.8922076821540657e-06, "loss": 0.5587, "step": 6247 }, { "epoch": 0.65, "grad_norm": 1.9714706233304224, "learning_rate": 2.8906812771239034e-06, "loss": 0.666, "step": 6248 }, { "epoch": 0.65, "grad_norm": 1.8099975489747797, "learning_rate": 2.889155111197036e-06, "loss": 0.5629, "step": 6249 }, { "epoch": 0.65, "grad_norm": 1.8264082657553349, "learning_rate": 2.887629184546471e-06, "loss": 0.5704, "step": 6250 }, { "epoch": 0.65, "grad_norm": 2.2058717301943824, "learning_rate": 2.8861034973451753e-06, "loss": 0.7257, "step": 6251 }, { "epoch": 0.65, "grad_norm": 1.8518154957166706, "learning_rate": 2.8845780497660996e-06, "loss": 0.6928, "step": 6252 }, { "epoch": 0.65, "grad_norm": 1.9484583542830034, "learning_rate": 2.883052841982157e-06, "loss": 0.6393, "step": 6253 }, { "epoch": 0.65, "grad_norm": 1.7555508796894785, "learning_rate": 2.8815278741662433e-06, "loss": 0.5886, "step": 6254 }, { "epoch": 0.65, "grad_norm": 1.994950526398456, "learning_rate": 2.88000314649122e-06, "loss": 0.5842, "step": 6255 }, { "epoch": 0.65, "grad_norm": 1.8662457743534389, "learning_rate": 2.878478659129923e-06, "loss": 0.584, "step": 6256 }, { "epoch": 0.65, "grad_norm": 1.9420532601252392, "learning_rate": 2.8769544122551606e-06, "loss": 0.7123, "step": 6257 }, { "epoch": 0.65, "grad_norm": 1.835196222874027, "learning_rate": 2.875430406039719e-06, "loss": 0.5903, "step": 6258 }, { "epoch": 0.65, "grad_norm": 1.8756813320395385, "learning_rate": 2.873906640656348e-06, "loss": 0.5721, "step": 6259 }, { "epoch": 0.65, "grad_norm": 1.9765833048671797, "learning_rate": 2.8723831162777806e-06, "loss": 0.6045, "step": 6260 }, { "epoch": 0.65, "grad_norm": 2.032795441691923, "learning_rate": 2.8708598330767105e-06, "loss": 0.6522, "step": 6261 }, { "epoch": 0.65, "grad_norm": 1.7505441325588476, "learning_rate": 2.869336791225817e-06, "loss": 0.5361, "step": 6262 }, { "epoch": 0.65, "grad_norm": 1.9496901968364277, "learning_rate": 2.867813990897742e-06, "loss": 0.6319, "step": 6263 }, { "epoch": 0.65, "grad_norm": 1.848210036629578, "learning_rate": 2.8662914322651046e-06, "loss": 0.5804, "step": 6264 }, { "epoch": 0.65, "grad_norm": 2.0674380320579235, "learning_rate": 2.864769115500493e-06, "loss": 0.5805, "step": 6265 }, { "epoch": 0.65, "grad_norm": 1.864513334235877, "learning_rate": 2.8632470407764746e-06, "loss": 0.6263, "step": 6266 }, { "epoch": 0.65, "grad_norm": 1.9868287714690414, "learning_rate": 2.8617252082655813e-06, "loss": 0.568, "step": 6267 }, { "epoch": 0.65, "grad_norm": 2.003702026459403, "learning_rate": 2.860203618140325e-06, "loss": 0.6686, "step": 6268 }, { "epoch": 0.65, "grad_norm": 1.9781283187382133, "learning_rate": 2.858682270573183e-06, "loss": 0.5742, "step": 6269 }, { "epoch": 0.65, "grad_norm": 1.8962333723378362, "learning_rate": 2.857161165736613e-06, "loss": 0.562, "step": 6270 }, { "epoch": 0.65, "grad_norm": 1.9548515931569148, "learning_rate": 2.8556403038030385e-06, "loss": 0.7561, "step": 6271 }, { "epoch": 0.65, "grad_norm": 2.0789855989958763, "learning_rate": 2.8541196849448582e-06, "loss": 0.5907, "step": 6272 }, { "epoch": 0.65, "grad_norm": 1.9881196918457014, "learning_rate": 2.8525993093344407e-06, "loss": 0.6955, "step": 6273 }, { "epoch": 0.65, "grad_norm": 1.968486206575867, "learning_rate": 2.8510791771441327e-06, "loss": 0.5586, "step": 6274 }, { "epoch": 0.65, "grad_norm": 1.7880365080779919, "learning_rate": 2.8495592885462476e-06, "loss": 0.5935, "step": 6275 }, { "epoch": 0.65, "grad_norm": 2.2478113171852816, "learning_rate": 2.848039643713075e-06, "loss": 0.5541, "step": 6276 }, { "epoch": 0.65, "grad_norm": 1.888739198842372, "learning_rate": 2.8465202428168753e-06, "loss": 0.6149, "step": 6277 }, { "epoch": 0.65, "grad_norm": 2.1536997825091837, "learning_rate": 2.8450010860298784e-06, "loss": 0.7143, "step": 6278 }, { "epoch": 0.65, "grad_norm": 1.814516674311857, "learning_rate": 2.8434821735242935e-06, "loss": 0.6282, "step": 6279 }, { "epoch": 0.65, "grad_norm": 2.01424137795222, "learning_rate": 2.841963505472294e-06, "loss": 0.6298, "step": 6280 }, { "epoch": 0.65, "grad_norm": 1.9768897380178496, "learning_rate": 2.8404450820460326e-06, "loss": 0.6343, "step": 6281 }, { "epoch": 0.65, "grad_norm": 2.327163802642382, "learning_rate": 2.838926903417629e-06, "loss": 0.7132, "step": 6282 }, { "epoch": 0.65, "grad_norm": 1.968349224205387, "learning_rate": 2.837408969759181e-06, "loss": 0.5983, "step": 6283 }, { "epoch": 0.65, "grad_norm": 1.7460529943975944, "learning_rate": 2.8358912812427497e-06, "loss": 0.5936, "step": 6284 }, { "epoch": 0.65, "grad_norm": 2.0593792416805363, "learning_rate": 2.834373838040382e-06, "loss": 0.6621, "step": 6285 }, { "epoch": 0.65, "grad_norm": 2.0565360915182045, "learning_rate": 2.832856640324078e-06, "loss": 0.6497, "step": 6286 }, { "epoch": 0.65, "grad_norm": 1.9303234583627442, "learning_rate": 2.831339688265829e-06, "loss": 0.6376, "step": 6287 }, { "epoch": 0.65, "grad_norm": 2.1135210848654555, "learning_rate": 2.829822982037585e-06, "loss": 0.71, "step": 6288 }, { "epoch": 0.65, "grad_norm": 2.0335602868059968, "learning_rate": 2.8283065218112775e-06, "loss": 0.6106, "step": 6289 }, { "epoch": 0.65, "grad_norm": 2.0336428429947935, "learning_rate": 2.826790307758802e-06, "loss": 0.7293, "step": 6290 }, { "epoch": 0.65, "grad_norm": 1.9485197680578654, "learning_rate": 2.8252743400520345e-06, "loss": 0.6481, "step": 6291 }, { "epoch": 0.65, "grad_norm": 1.9136820372082457, "learning_rate": 2.8237586188628153e-06, "loss": 0.5989, "step": 6292 }, { "epoch": 0.65, "grad_norm": 1.8096490093595976, "learning_rate": 2.8222431443629617e-06, "loss": 0.5179, "step": 6293 }, { "epoch": 0.65, "grad_norm": 1.9304167213931087, "learning_rate": 2.820727916724257e-06, "loss": 0.6775, "step": 6294 }, { "epoch": 0.65, "grad_norm": 1.853944468773223, "learning_rate": 2.8192129361184685e-06, "loss": 0.6527, "step": 6295 }, { "epoch": 0.65, "grad_norm": 1.9454826200033084, "learning_rate": 2.8176982027173206e-06, "loss": 0.5919, "step": 6296 }, { "epoch": 0.65, "grad_norm": 2.0221472638370903, "learning_rate": 2.816183716692522e-06, "loss": 0.5724, "step": 6297 }, { "epoch": 0.65, "grad_norm": 2.116282365722759, "learning_rate": 2.8146694782157447e-06, "loss": 0.6334, "step": 6298 }, { "epoch": 0.65, "grad_norm": 1.9071368964119593, "learning_rate": 2.8131554874586396e-06, "loss": 0.612, "step": 6299 }, { "epoch": 0.65, "grad_norm": 1.9872247909004435, "learning_rate": 2.8116417445928245e-06, "loss": 0.6012, "step": 6300 }, { "epoch": 0.65, "grad_norm": 1.976827376780296, "learning_rate": 2.810128249789892e-06, "loss": 0.6022, "step": 6301 }, { "epoch": 0.66, "grad_norm": 1.805685551078074, "learning_rate": 2.808615003221401e-06, "loss": 0.5421, "step": 6302 }, { "epoch": 0.66, "grad_norm": 1.8361096040269862, "learning_rate": 2.8071020050588927e-06, "loss": 0.5828, "step": 6303 }, { "epoch": 0.66, "grad_norm": 2.117462557354621, "learning_rate": 2.8055892554738683e-06, "loss": 0.67, "step": 6304 }, { "epoch": 0.66, "grad_norm": 1.9521166314972613, "learning_rate": 2.804076754637812e-06, "loss": 0.6384, "step": 6305 }, { "epoch": 0.66, "grad_norm": 1.887459953486852, "learning_rate": 2.8025645027221704e-06, "loss": 0.5704, "step": 6306 }, { "epoch": 0.66, "grad_norm": 1.867313977993055, "learning_rate": 2.801052499898369e-06, "loss": 0.5305, "step": 6307 }, { "epoch": 0.66, "grad_norm": 1.9997508923479637, "learning_rate": 2.7995407463378004e-06, "loss": 0.6093, "step": 6308 }, { "epoch": 0.66, "grad_norm": 1.8703093721931023, "learning_rate": 2.7980292422118282e-06, "loss": 0.613, "step": 6309 }, { "epoch": 0.66, "grad_norm": 2.061021653564945, "learning_rate": 2.7965179876917946e-06, "loss": 0.5967, "step": 6310 }, { "epoch": 0.66, "grad_norm": 1.843007255448845, "learning_rate": 2.795006982949006e-06, "loss": 0.6791, "step": 6311 }, { "epoch": 0.66, "grad_norm": 1.9714166575628622, "learning_rate": 2.7934962281547422e-06, "loss": 0.6519, "step": 6312 }, { "epoch": 0.66, "grad_norm": 1.7854552666884103, "learning_rate": 2.7919857234802593e-06, "loss": 0.5518, "step": 6313 }, { "epoch": 0.66, "grad_norm": 1.9856717506321173, "learning_rate": 2.7904754690967808e-06, "loss": 0.6034, "step": 6314 }, { "epoch": 0.66, "grad_norm": 2.2295217500421063, "learning_rate": 2.7889654651754987e-06, "loss": 0.53, "step": 6315 }, { "epoch": 0.66, "grad_norm": 1.6963943668827721, "learning_rate": 2.7874557118875863e-06, "loss": 0.5937, "step": 6316 }, { "epoch": 0.66, "grad_norm": 2.053957569068724, "learning_rate": 2.785946209404178e-06, "loss": 0.7016, "step": 6317 }, { "epoch": 0.66, "grad_norm": 1.911492913171324, "learning_rate": 2.784436957896388e-06, "loss": 0.7116, "step": 6318 }, { "epoch": 0.66, "grad_norm": 1.8810697368746152, "learning_rate": 2.7829279575352953e-06, "loss": 0.5795, "step": 6319 }, { "epoch": 0.66, "grad_norm": 1.9914019124478899, "learning_rate": 2.781419208491958e-06, "loss": 0.5958, "step": 6320 }, { "epoch": 0.66, "grad_norm": 1.712755966691504, "learning_rate": 2.7799107109373956e-06, "loss": 0.5469, "step": 6321 }, { "epoch": 0.66, "grad_norm": 1.8143583254344464, "learning_rate": 2.7784024650426133e-06, "loss": 0.6282, "step": 6322 }, { "epoch": 0.66, "grad_norm": 2.1419849838155, "learning_rate": 2.7768944709785705e-06, "loss": 0.7249, "step": 6323 }, { "epoch": 0.66, "grad_norm": 1.7170329232042192, "learning_rate": 2.775386728916212e-06, "loss": 0.5251, "step": 6324 }, { "epoch": 0.66, "grad_norm": 2.742228854334704, "learning_rate": 2.7738792390264456e-06, "loss": 0.6825, "step": 6325 }, { "epoch": 0.66, "grad_norm": 1.8459902103849364, "learning_rate": 2.772372001480159e-06, "loss": 0.6152, "step": 6326 }, { "epoch": 0.66, "grad_norm": 1.8776493794011735, "learning_rate": 2.7708650164482e-06, "loss": 0.639, "step": 6327 }, { "epoch": 0.66, "grad_norm": 2.063971101948811, "learning_rate": 2.7693582841013996e-06, "loss": 0.6364, "step": 6328 }, { "epoch": 0.66, "grad_norm": 2.036669543108079, "learning_rate": 2.767851804610552e-06, "loss": 0.64, "step": 6329 }, { "epoch": 0.66, "grad_norm": 1.6546039954617904, "learning_rate": 2.7663455781464245e-06, "loss": 0.5424, "step": 6330 }, { "epoch": 0.66, "grad_norm": 1.8036933164626912, "learning_rate": 2.7648396048797554e-06, "loss": 0.5899, "step": 6331 }, { "epoch": 0.66, "grad_norm": 1.8285545473432434, "learning_rate": 2.7633338849812593e-06, "loss": 0.5668, "step": 6332 }, { "epoch": 0.66, "grad_norm": 1.8127905625169214, "learning_rate": 2.7618284186216137e-06, "loss": 0.6676, "step": 6333 }, { "epoch": 0.66, "grad_norm": 2.153394766265645, "learning_rate": 2.760323205971476e-06, "loss": 0.6618, "step": 6334 }, { "epoch": 0.66, "grad_norm": 1.9747255781767126, "learning_rate": 2.7588182472014668e-06, "loss": 0.6172, "step": 6335 }, { "epoch": 0.66, "grad_norm": 2.0717810839011723, "learning_rate": 2.757313542482185e-06, "loss": 0.6448, "step": 6336 }, { "epoch": 0.66, "grad_norm": 1.9391684607298043, "learning_rate": 2.7558090919841972e-06, "loss": 0.6767, "step": 6337 }, { "epoch": 0.66, "grad_norm": 1.900234867377518, "learning_rate": 2.7543048958780395e-06, "loss": 0.567, "step": 6338 }, { "epoch": 0.66, "grad_norm": 2.026675318690998, "learning_rate": 2.7528009543342197e-06, "loss": 0.6441, "step": 6339 }, { "epoch": 0.66, "grad_norm": 1.9912333544995164, "learning_rate": 2.751297267523223e-06, "loss": 0.6838, "step": 6340 }, { "epoch": 0.66, "grad_norm": 1.8455002654000203, "learning_rate": 2.7497938356154957e-06, "loss": 0.4949, "step": 6341 }, { "epoch": 0.66, "grad_norm": 1.944518401536242, "learning_rate": 2.748290658781465e-06, "loss": 0.5808, "step": 6342 }, { "epoch": 0.66, "grad_norm": 1.8734162282177562, "learning_rate": 2.746787737191521e-06, "loss": 0.6036, "step": 6343 }, { "epoch": 0.66, "grad_norm": 2.0569689392989594, "learning_rate": 2.7452850710160305e-06, "loss": 0.6238, "step": 6344 }, { "epoch": 0.66, "grad_norm": 1.859492968004961, "learning_rate": 2.7437826604253296e-06, "loss": 0.6002, "step": 6345 }, { "epoch": 0.66, "grad_norm": 1.9239528734480238, "learning_rate": 2.7422805055897224e-06, "loss": 0.6592, "step": 6346 }, { "epoch": 0.66, "grad_norm": 1.7751066614137307, "learning_rate": 2.74077860667949e-06, "loss": 0.6482, "step": 6347 }, { "epoch": 0.66, "grad_norm": 1.8529491712765056, "learning_rate": 2.7392769638648775e-06, "loss": 0.6862, "step": 6348 }, { "epoch": 0.66, "grad_norm": 1.8998307106205168, "learning_rate": 2.73777557731611e-06, "loss": 0.6126, "step": 6349 }, { "epoch": 0.66, "grad_norm": 2.118611629913783, "learning_rate": 2.736274447203373e-06, "loss": 0.627, "step": 6350 }, { "epoch": 0.66, "grad_norm": 1.9556808686347844, "learning_rate": 2.7347735736968318e-06, "loss": 0.6527, "step": 6351 }, { "epoch": 0.66, "grad_norm": 2.013462843348238, "learning_rate": 2.733272956966615e-06, "loss": 0.7476, "step": 6352 }, { "epoch": 0.66, "grad_norm": 1.8135008649655475, "learning_rate": 2.7317725971828302e-06, "loss": 0.6531, "step": 6353 }, { "epoch": 0.66, "grad_norm": 1.9304868034857994, "learning_rate": 2.7302724945155486e-06, "loss": 0.6678, "step": 6354 }, { "epoch": 0.66, "grad_norm": 1.9647717480570688, "learning_rate": 2.728772649134818e-06, "loss": 0.6529, "step": 6355 }, { "epoch": 0.66, "grad_norm": 1.920259935679486, "learning_rate": 2.7272730612106513e-06, "loss": 0.5923, "step": 6356 }, { "epoch": 0.66, "grad_norm": 2.1502531709807124, "learning_rate": 2.7257737309130393e-06, "loss": 0.6569, "step": 6357 }, { "epoch": 0.66, "grad_norm": 1.9442863146247567, "learning_rate": 2.7242746584119364e-06, "loss": 0.6219, "step": 6358 }, { "epoch": 0.66, "grad_norm": 1.8695957383517858, "learning_rate": 2.722775843877275e-06, "loss": 0.5603, "step": 6359 }, { "epoch": 0.66, "grad_norm": 2.1452012554076223, "learning_rate": 2.7212772874789484e-06, "loss": 0.6129, "step": 6360 }, { "epoch": 0.66, "grad_norm": 1.7405043357807028, "learning_rate": 2.7197789893868307e-06, "loss": 0.6054, "step": 6361 }, { "epoch": 0.66, "grad_norm": 1.8611986630402495, "learning_rate": 2.7182809497707594e-06, "loss": 0.647, "step": 6362 }, { "epoch": 0.66, "grad_norm": 1.8830375405003674, "learning_rate": 2.7167831688005502e-06, "loss": 0.5694, "step": 6363 }, { "epoch": 0.66, "grad_norm": 1.7562835116328723, "learning_rate": 2.715285646645981e-06, "loss": 0.6591, "step": 6364 }, { "epoch": 0.66, "grad_norm": 1.7368453184663584, "learning_rate": 2.7137883834768076e-06, "loss": 0.5398, "step": 6365 }, { "epoch": 0.66, "grad_norm": 1.9427737888494707, "learning_rate": 2.7122913794627528e-06, "loss": 0.6606, "step": 6366 }, { "epoch": 0.66, "grad_norm": 1.8309498526643528, "learning_rate": 2.710794634773508e-06, "loss": 0.5763, "step": 6367 }, { "epoch": 0.66, "grad_norm": 1.8981317509223192, "learning_rate": 2.7092981495787387e-06, "loss": 0.5609, "step": 6368 }, { "epoch": 0.66, "grad_norm": 2.0381064382923353, "learning_rate": 2.7078019240480826e-06, "loss": 0.5834, "step": 6369 }, { "epoch": 0.66, "grad_norm": 2.0360199613465335, "learning_rate": 2.706305958351141e-06, "loss": 0.5999, "step": 6370 }, { "epoch": 0.66, "grad_norm": 1.8851392663074493, "learning_rate": 2.7048102526574948e-06, "loss": 0.5561, "step": 6371 }, { "epoch": 0.66, "grad_norm": 2.0319791070048185, "learning_rate": 2.7033148071366866e-06, "loss": 0.6536, "step": 6372 }, { "epoch": 0.66, "grad_norm": 1.9323663914875056, "learning_rate": 2.7018196219582404e-06, "loss": 0.59, "step": 6373 }, { "epoch": 0.66, "grad_norm": 2.148618386983816, "learning_rate": 2.700324697291634e-06, "loss": 0.6185, "step": 6374 }, { "epoch": 0.66, "grad_norm": 1.875273962996191, "learning_rate": 2.698830033306334e-06, "loss": 0.6204, "step": 6375 }, { "epoch": 0.66, "grad_norm": 2.050493079361734, "learning_rate": 2.6973356301717633e-06, "loss": 0.6846, "step": 6376 }, { "epoch": 0.66, "grad_norm": 1.9716423747607381, "learning_rate": 2.6958414880573257e-06, "loss": 0.552, "step": 6377 }, { "epoch": 0.66, "grad_norm": 2.0713906418446144, "learning_rate": 2.694347607132387e-06, "loss": 0.6723, "step": 6378 }, { "epoch": 0.66, "grad_norm": 1.9824685803451119, "learning_rate": 2.692853987566291e-06, "loss": 0.5323, "step": 6379 }, { "epoch": 0.66, "grad_norm": 2.0917417738256425, "learning_rate": 2.6913606295283436e-06, "loss": 0.7374, "step": 6380 }, { "epoch": 0.66, "grad_norm": 1.8222154905337453, "learning_rate": 2.689867533187829e-06, "loss": 0.6312, "step": 6381 }, { "epoch": 0.66, "grad_norm": 2.0814088840248615, "learning_rate": 2.688374698713997e-06, "loss": 0.6002, "step": 6382 }, { "epoch": 0.66, "grad_norm": 2.193140630954233, "learning_rate": 2.6868821262760673e-06, "loss": 0.732, "step": 6383 }, { "epoch": 0.66, "grad_norm": 1.753162209988003, "learning_rate": 2.6853898160432347e-06, "loss": 0.536, "step": 6384 }, { "epoch": 0.66, "grad_norm": 1.7609052391335116, "learning_rate": 2.683897768184657e-06, "loss": 0.523, "step": 6385 }, { "epoch": 0.66, "grad_norm": 1.892836073277086, "learning_rate": 2.6824059828694715e-06, "loss": 0.6081, "step": 6386 }, { "epoch": 0.66, "grad_norm": 1.644640787589624, "learning_rate": 2.6809144602667747e-06, "loss": 0.4926, "step": 6387 }, { "epoch": 0.66, "grad_norm": 1.7857425923134511, "learning_rate": 2.6794232005456468e-06, "loss": 0.6601, "step": 6388 }, { "epoch": 0.66, "grad_norm": 1.9604816465995976, "learning_rate": 2.6779322038751217e-06, "loss": 0.6702, "step": 6389 }, { "epoch": 0.66, "grad_norm": 1.938706881138097, "learning_rate": 2.676441470424218e-06, "loss": 0.5822, "step": 6390 }, { "epoch": 0.66, "grad_norm": 1.9790553819807086, "learning_rate": 2.674951000361916e-06, "loss": 0.5655, "step": 6391 }, { "epoch": 0.66, "grad_norm": 2.078457287587751, "learning_rate": 2.673460793857173e-06, "loss": 0.6475, "step": 6392 }, { "epoch": 0.66, "grad_norm": 2.1570179625592285, "learning_rate": 2.6719708510789077e-06, "loss": 0.651, "step": 6393 }, { "epoch": 0.66, "grad_norm": 1.820206500935895, "learning_rate": 2.6704811721960174e-06, "loss": 0.6093, "step": 6394 }, { "epoch": 0.66, "grad_norm": 1.8501666310944005, "learning_rate": 2.6689917573773615e-06, "loss": 0.6263, "step": 6395 }, { "epoch": 0.66, "grad_norm": 2.0029446573228453, "learning_rate": 2.6675026067917808e-06, "loss": 0.6917, "step": 6396 }, { "epoch": 0.66, "grad_norm": 1.8953373502231254, "learning_rate": 2.6660137206080703e-06, "loss": 0.6434, "step": 6397 }, { "epoch": 0.67, "grad_norm": 2.2750848498069702, "learning_rate": 2.66452509899501e-06, "loss": 0.6846, "step": 6398 }, { "epoch": 0.67, "grad_norm": 1.7810113358824438, "learning_rate": 2.66303674212134e-06, "loss": 0.6938, "step": 6399 }, { "epoch": 0.67, "grad_norm": 1.8671929417800113, "learning_rate": 2.6615486501557765e-06, "loss": 0.6661, "step": 6400 }, { "epoch": 0.67, "grad_norm": 1.8051102463110213, "learning_rate": 2.660060823267001e-06, "loss": 0.5891, "step": 6401 }, { "epoch": 0.67, "grad_norm": 1.9343515924482275, "learning_rate": 2.6585732616236705e-06, "loss": 0.6584, "step": 6402 }, { "epoch": 0.67, "grad_norm": 1.8462900458666527, "learning_rate": 2.657085965394406e-06, "loss": 0.6426, "step": 6403 }, { "epoch": 0.67, "grad_norm": 1.9951702962287277, "learning_rate": 2.655598934747801e-06, "loss": 0.5901, "step": 6404 }, { "epoch": 0.67, "grad_norm": 1.9579632915860867, "learning_rate": 2.654112169852418e-06, "loss": 0.6673, "step": 6405 }, { "epoch": 0.67, "grad_norm": 1.8767384202403952, "learning_rate": 2.652625670876794e-06, "loss": 0.6249, "step": 6406 }, { "epoch": 0.67, "grad_norm": 1.965704158295992, "learning_rate": 2.6511394379894274e-06, "loss": 0.632, "step": 6407 }, { "epoch": 0.67, "grad_norm": 2.1880462552255833, "learning_rate": 2.6496534713587952e-06, "loss": 0.8164, "step": 6408 }, { "epoch": 0.67, "grad_norm": 1.9072224748583726, "learning_rate": 2.648167771153337e-06, "loss": 0.5904, "step": 6409 }, { "epoch": 0.67, "grad_norm": 1.8003432003879964, "learning_rate": 2.6466823375414686e-06, "loss": 0.5516, "step": 6410 }, { "epoch": 0.67, "grad_norm": 1.940253426639748, "learning_rate": 2.6451971706915713e-06, "loss": 0.6669, "step": 6411 }, { "epoch": 0.67, "grad_norm": 1.784289714434628, "learning_rate": 2.6437122707719964e-06, "loss": 0.6114, "step": 6412 }, { "epoch": 0.67, "grad_norm": 1.8651482926009573, "learning_rate": 2.6422276379510635e-06, "loss": 0.564, "step": 6413 }, { "epoch": 0.67, "grad_norm": 1.7832427381408453, "learning_rate": 2.6407432723970694e-06, "loss": 0.6461, "step": 6414 }, { "epoch": 0.67, "grad_norm": 1.8443549195554543, "learning_rate": 2.6392591742782704e-06, "loss": 0.6369, "step": 6415 }, { "epoch": 0.67, "grad_norm": 1.8770648648089452, "learning_rate": 2.637775343762902e-06, "loss": 0.6854, "step": 6416 }, { "epoch": 0.67, "grad_norm": 1.7688361860428412, "learning_rate": 2.6362917810191597e-06, "loss": 0.5941, "step": 6417 }, { "epoch": 0.67, "grad_norm": 2.052556551651509, "learning_rate": 2.634808486215219e-06, "loss": 0.6625, "step": 6418 }, { "epoch": 0.67, "grad_norm": 1.8203041172722234, "learning_rate": 2.633325459519218e-06, "loss": 0.5459, "step": 6419 }, { "epoch": 0.67, "grad_norm": 1.9295198108217466, "learning_rate": 2.6318427010992644e-06, "loss": 0.6982, "step": 6420 }, { "epoch": 0.67, "grad_norm": 1.8076841531615386, "learning_rate": 2.6303602111234394e-06, "loss": 0.6608, "step": 6421 }, { "epoch": 0.67, "grad_norm": 1.9542372550586, "learning_rate": 2.6288779897597894e-06, "loss": 0.6433, "step": 6422 }, { "epoch": 0.67, "grad_norm": 1.7732041112945003, "learning_rate": 2.627396037176336e-06, "loss": 0.6528, "step": 6423 }, { "epoch": 0.67, "grad_norm": 1.6589412606005547, "learning_rate": 2.6259143535410635e-06, "loss": 0.6287, "step": 6424 }, { "epoch": 0.67, "grad_norm": 1.9124574288954266, "learning_rate": 2.6244329390219347e-06, "loss": 0.6141, "step": 6425 }, { "epoch": 0.67, "grad_norm": 2.0180214984374745, "learning_rate": 2.6229517937868687e-06, "loss": 0.6131, "step": 6426 }, { "epoch": 0.67, "grad_norm": 2.402149675102522, "learning_rate": 2.621470918003768e-06, "loss": 0.6226, "step": 6427 }, { "epoch": 0.67, "grad_norm": 2.0034872878783734, "learning_rate": 2.6199903118404934e-06, "loss": 0.6166, "step": 6428 }, { "epoch": 0.67, "grad_norm": 1.9773378931296095, "learning_rate": 2.6185099754648846e-06, "loss": 0.6453, "step": 6429 }, { "epoch": 0.67, "grad_norm": 1.7467829991028916, "learning_rate": 2.617029909044742e-06, "loss": 0.5366, "step": 6430 }, { "epoch": 0.67, "grad_norm": 1.802869220129952, "learning_rate": 2.615550112747844e-06, "loss": 0.6091, "step": 6431 }, { "epoch": 0.67, "grad_norm": 2.0939771876409927, "learning_rate": 2.614070586741929e-06, "loss": 0.666, "step": 6432 }, { "epoch": 0.67, "grad_norm": 1.8675237371257338, "learning_rate": 2.612591331194717e-06, "loss": 0.6055, "step": 6433 }, { "epoch": 0.67, "grad_norm": 1.8547868852263747, "learning_rate": 2.611112346273881e-06, "loss": 0.7287, "step": 6434 }, { "epoch": 0.67, "grad_norm": 2.125456167041699, "learning_rate": 2.6096336321470796e-06, "loss": 0.6753, "step": 6435 }, { "epoch": 0.67, "grad_norm": 1.7754033248669119, "learning_rate": 2.608155188981927e-06, "loss": 0.6194, "step": 6436 }, { "epoch": 0.67, "grad_norm": 1.9086265288313402, "learning_rate": 2.6066770169460198e-06, "loss": 0.6433, "step": 6437 }, { "epoch": 0.67, "grad_norm": 2.0396739748065693, "learning_rate": 2.605199116206912e-06, "loss": 0.632, "step": 6438 }, { "epoch": 0.67, "grad_norm": 2.0428624717964166, "learning_rate": 2.603721486932137e-06, "loss": 0.5821, "step": 6439 }, { "epoch": 0.67, "grad_norm": 1.9531009300773505, "learning_rate": 2.602244129289189e-06, "loss": 0.6206, "step": 6440 }, { "epoch": 0.67, "grad_norm": 1.6950662619278845, "learning_rate": 2.6007670434455357e-06, "loss": 0.6074, "step": 6441 }, { "epoch": 0.67, "grad_norm": 2.2333214792807508, "learning_rate": 2.599290229568612e-06, "loss": 0.6618, "step": 6442 }, { "epoch": 0.67, "grad_norm": 1.8847057365906073, "learning_rate": 2.5978136878258255e-06, "loss": 0.5443, "step": 6443 }, { "epoch": 0.67, "grad_norm": 1.9719360697628705, "learning_rate": 2.596337418384548e-06, "loss": 0.568, "step": 6444 }, { "epoch": 0.67, "grad_norm": 2.078216650575824, "learning_rate": 2.594861421412126e-06, "loss": 0.714, "step": 6445 }, { "epoch": 0.67, "grad_norm": 1.7669285768038225, "learning_rate": 2.5933856970758693e-06, "loss": 0.6496, "step": 6446 }, { "epoch": 0.67, "grad_norm": 1.7929630099134348, "learning_rate": 2.591910245543063e-06, "loss": 0.5728, "step": 6447 }, { "epoch": 0.67, "grad_norm": 1.9304811982301202, "learning_rate": 2.5904350669809554e-06, "loss": 0.6409, "step": 6448 }, { "epoch": 0.67, "grad_norm": 1.9345487202393299, "learning_rate": 2.5889601615567657e-06, "loss": 0.6752, "step": 6449 }, { "epoch": 0.67, "grad_norm": 2.2505125627362528, "learning_rate": 2.5874855294376853e-06, "loss": 0.5156, "step": 6450 }, { "epoch": 0.67, "grad_norm": 2.0098075540433054, "learning_rate": 2.586011170790872e-06, "loss": 0.6312, "step": 6451 }, { "epoch": 0.67, "grad_norm": 1.7605667785806696, "learning_rate": 2.5845370857834497e-06, "loss": 0.5879, "step": 6452 }, { "epoch": 0.67, "grad_norm": 1.9979475108404847, "learning_rate": 2.583063274582518e-06, "loss": 0.669, "step": 6453 }, { "epoch": 0.67, "grad_norm": 1.9387775598018375, "learning_rate": 2.581589737355138e-06, "loss": 0.6661, "step": 6454 }, { "epoch": 0.67, "grad_norm": 2.017076403377001, "learning_rate": 2.5801164742683484e-06, "loss": 0.625, "step": 6455 }, { "epoch": 0.67, "grad_norm": 1.9337213692942614, "learning_rate": 2.5786434854891482e-06, "loss": 0.719, "step": 6456 }, { "epoch": 0.67, "grad_norm": 2.0865829156729423, "learning_rate": 2.5771707711845096e-06, "loss": 0.6503, "step": 6457 }, { "epoch": 0.67, "grad_norm": 2.2744014743252934, "learning_rate": 2.5756983315213748e-06, "loss": 0.7139, "step": 6458 }, { "epoch": 0.67, "grad_norm": 1.6805694624059913, "learning_rate": 2.5742261666666506e-06, "loss": 0.6167, "step": 6459 }, { "epoch": 0.67, "grad_norm": 1.8025014515127096, "learning_rate": 2.5727542767872188e-06, "loss": 0.6049, "step": 6460 }, { "epoch": 0.67, "grad_norm": 1.8839136762795483, "learning_rate": 2.5712826620499227e-06, "loss": 0.5673, "step": 6461 }, { "epoch": 0.67, "grad_norm": 1.997611542367455, "learning_rate": 2.569811322621584e-06, "loss": 0.6339, "step": 6462 }, { "epoch": 0.67, "grad_norm": 2.107752055939515, "learning_rate": 2.5683402586689788e-06, "loss": 0.6669, "step": 6463 }, { "epoch": 0.67, "grad_norm": 2.1043518474334606, "learning_rate": 2.5668694703588683e-06, "loss": 0.626, "step": 6464 }, { "epoch": 0.67, "grad_norm": 1.904909097519436, "learning_rate": 2.565398957857969e-06, "loss": 0.5783, "step": 6465 }, { "epoch": 0.67, "grad_norm": 1.9161233226944596, "learning_rate": 2.5639287213329767e-06, "loss": 0.6308, "step": 6466 }, { "epoch": 0.67, "grad_norm": 2.0568730245550317, "learning_rate": 2.5624587609505475e-06, "loss": 0.6569, "step": 6467 }, { "epoch": 0.67, "grad_norm": 1.8986904541545218, "learning_rate": 2.5609890768773126e-06, "loss": 0.5687, "step": 6468 }, { "epoch": 0.67, "grad_norm": 1.925080769355097, "learning_rate": 2.5595196692798664e-06, "loss": 0.6665, "step": 6469 }, { "epoch": 0.67, "grad_norm": 2.1620329086880075, "learning_rate": 2.5580505383247796e-06, "loss": 0.6607, "step": 6470 }, { "epoch": 0.67, "grad_norm": 1.8802935287691611, "learning_rate": 2.5565816841785785e-06, "loss": 0.6097, "step": 6471 }, { "epoch": 0.67, "grad_norm": 2.046944025991196, "learning_rate": 2.555113107007773e-06, "loss": 0.6806, "step": 6472 }, { "epoch": 0.67, "grad_norm": 1.8878402057696602, "learning_rate": 2.55364480697883e-06, "loss": 0.6483, "step": 6473 }, { "epoch": 0.67, "grad_norm": 1.8425257728641522, "learning_rate": 2.5521767842581947e-06, "loss": 0.5859, "step": 6474 }, { "epoch": 0.67, "grad_norm": 1.6780569604641604, "learning_rate": 2.5507090390122704e-06, "loss": 0.657, "step": 6475 }, { "epoch": 0.67, "grad_norm": 1.9087001854008494, "learning_rate": 2.5492415714074387e-06, "loss": 0.7312, "step": 6476 }, { "epoch": 0.67, "grad_norm": 2.1493828843849827, "learning_rate": 2.5477743816100443e-06, "loss": 0.7286, "step": 6477 }, { "epoch": 0.67, "grad_norm": 1.8397832863127606, "learning_rate": 2.5463074697864006e-06, "loss": 0.5727, "step": 6478 }, { "epoch": 0.67, "grad_norm": 1.9416792168693122, "learning_rate": 2.544840836102789e-06, "loss": 0.6589, "step": 6479 }, { "epoch": 0.67, "grad_norm": 1.7431185986584572, "learning_rate": 2.543374480725464e-06, "loss": 0.5923, "step": 6480 }, { "epoch": 0.67, "grad_norm": 1.8136659982858228, "learning_rate": 2.5419084038206422e-06, "loss": 0.616, "step": 6481 }, { "epoch": 0.67, "grad_norm": 1.793225841672104, "learning_rate": 2.540442605554516e-06, "loss": 0.6643, "step": 6482 }, { "epoch": 0.67, "grad_norm": 1.6887884282795877, "learning_rate": 2.5389770860932374e-06, "loss": 0.6283, "step": 6483 }, { "epoch": 0.67, "grad_norm": 2.0298876307442724, "learning_rate": 2.5375118456029345e-06, "loss": 0.5921, "step": 6484 }, { "epoch": 0.67, "grad_norm": 1.861398698981882, "learning_rate": 2.5360468842497004e-06, "loss": 0.5887, "step": 6485 }, { "epoch": 0.67, "grad_norm": 2.031756505301046, "learning_rate": 2.5345822021995934e-06, "loss": 0.7035, "step": 6486 }, { "epoch": 0.67, "grad_norm": 1.9440950572916977, "learning_rate": 2.5331177996186494e-06, "loss": 0.6454, "step": 6487 }, { "epoch": 0.67, "grad_norm": 2.018185652460288, "learning_rate": 2.5316536766728605e-06, "loss": 0.6834, "step": 6488 }, { "epoch": 0.67, "grad_norm": 1.786345392863452, "learning_rate": 2.5301898335281994e-06, "loss": 0.6012, "step": 6489 }, { "epoch": 0.67, "grad_norm": 2.094253077479235, "learning_rate": 2.5287262703505973e-06, "loss": 0.6962, "step": 6490 }, { "epoch": 0.67, "grad_norm": 2.016490719493658, "learning_rate": 2.5272629873059564e-06, "loss": 0.7104, "step": 6491 }, { "epoch": 0.67, "grad_norm": 1.942400372969221, "learning_rate": 2.525799984560152e-06, "loss": 0.6432, "step": 6492 }, { "epoch": 0.67, "grad_norm": 1.841903429983575, "learning_rate": 2.524337262279022e-06, "loss": 0.621, "step": 6493 }, { "epoch": 0.68, "grad_norm": 1.8022659474271276, "learning_rate": 2.5228748206283716e-06, "loss": 0.6844, "step": 6494 }, { "epoch": 0.68, "grad_norm": 1.9084102317140246, "learning_rate": 2.521412659773982e-06, "loss": 0.653, "step": 6495 }, { "epoch": 0.68, "grad_norm": 2.003649909793403, "learning_rate": 2.5199507798815926e-06, "loss": 0.6809, "step": 6496 }, { "epoch": 0.68, "grad_norm": 1.806198288915302, "learning_rate": 2.5184891811169203e-06, "loss": 0.513, "step": 6497 }, { "epoch": 0.68, "grad_norm": 2.0289321659239388, "learning_rate": 2.5170278636456413e-06, "loss": 0.6757, "step": 6498 }, { "epoch": 0.68, "grad_norm": 2.1476325520610784, "learning_rate": 2.515566827633411e-06, "loss": 0.6057, "step": 6499 }, { "epoch": 0.68, "grad_norm": 2.1238370270123315, "learning_rate": 2.5141060732458366e-06, "loss": 0.6114, "step": 6500 }, { "epoch": 0.68, "grad_norm": 1.8882367928318948, "learning_rate": 2.512645600648511e-06, "loss": 0.6919, "step": 6501 }, { "epoch": 0.68, "grad_norm": 1.7336732581718404, "learning_rate": 2.511185410006981e-06, "loss": 0.6119, "step": 6502 }, { "epoch": 0.68, "grad_norm": 1.8563482336968113, "learning_rate": 2.5097255014867733e-06, "loss": 0.6606, "step": 6503 }, { "epoch": 0.68, "grad_norm": 2.1097536526043412, "learning_rate": 2.508265875253372e-06, "loss": 0.6581, "step": 6504 }, { "epoch": 0.68, "grad_norm": 2.0103292190475575, "learning_rate": 2.5068065314722378e-06, "loss": 0.5797, "step": 6505 }, { "epoch": 0.68, "grad_norm": 1.6519741874999505, "learning_rate": 2.5053474703087943e-06, "loss": 0.5882, "step": 6506 }, { "epoch": 0.68, "grad_norm": 2.0533959837032523, "learning_rate": 2.5038886919284333e-06, "loss": 0.7001, "step": 6507 }, { "epoch": 0.68, "grad_norm": 1.8805266087401824, "learning_rate": 2.5024301964965157e-06, "loss": 0.5486, "step": 6508 }, { "epoch": 0.68, "grad_norm": 2.1279785478582482, "learning_rate": 2.500971984178372e-06, "loss": 0.6061, "step": 6509 }, { "epoch": 0.68, "grad_norm": 1.938630985747579, "learning_rate": 2.4995140551392965e-06, "loss": 0.7083, "step": 6510 }, { "epoch": 0.68, "grad_norm": 1.8335443599118721, "learning_rate": 2.4980564095445562e-06, "loss": 0.5235, "step": 6511 }, { "epoch": 0.68, "grad_norm": 2.148717212825636, "learning_rate": 2.4965990475593814e-06, "loss": 0.5978, "step": 6512 }, { "epoch": 0.68, "grad_norm": 1.824758821910578, "learning_rate": 2.495141969348975e-06, "loss": 0.5902, "step": 6513 }, { "epoch": 0.68, "grad_norm": 1.7780524543423788, "learning_rate": 2.493685175078504e-06, "loss": 0.6284, "step": 6514 }, { "epoch": 0.68, "grad_norm": 1.8609798057641382, "learning_rate": 2.492228664913104e-06, "loss": 0.6661, "step": 6515 }, { "epoch": 0.68, "grad_norm": 1.8370681072012822, "learning_rate": 2.4907724390178762e-06, "loss": 0.5885, "step": 6516 }, { "epoch": 0.68, "grad_norm": 1.895445022270483, "learning_rate": 2.489316497557897e-06, "loss": 0.5775, "step": 6517 }, { "epoch": 0.68, "grad_norm": 1.7219155092631773, "learning_rate": 2.487860840698201e-06, "loss": 0.6289, "step": 6518 }, { "epoch": 0.68, "grad_norm": 2.0460365604603816, "learning_rate": 2.4864054686037993e-06, "loss": 0.6826, "step": 6519 }, { "epoch": 0.68, "grad_norm": 1.7147425195038142, "learning_rate": 2.4849503814396624e-06, "loss": 0.5136, "step": 6520 }, { "epoch": 0.68, "grad_norm": 1.6858038167010478, "learning_rate": 2.4834955793707376e-06, "loss": 0.6515, "step": 6521 }, { "epoch": 0.68, "grad_norm": 2.021509740545536, "learning_rate": 2.4820410625619325e-06, "loss": 0.5946, "step": 6522 }, { "epoch": 0.68, "grad_norm": 2.044099212949625, "learning_rate": 2.4805868311781228e-06, "loss": 0.6226, "step": 6523 }, { "epoch": 0.68, "grad_norm": 2.0554872349030506, "learning_rate": 2.4791328853841577e-06, "loss": 0.6877, "step": 6524 }, { "epoch": 0.68, "grad_norm": 1.7756247535084098, "learning_rate": 2.4776792253448465e-06, "loss": 0.6656, "step": 6525 }, { "epoch": 0.68, "grad_norm": 1.9002880458700975, "learning_rate": 2.4762258512249745e-06, "loss": 0.721, "step": 6526 }, { "epoch": 0.68, "grad_norm": 1.7127361731051247, "learning_rate": 2.4747727631892847e-06, "loss": 0.5513, "step": 6527 }, { "epoch": 0.68, "grad_norm": 1.804770412332777, "learning_rate": 2.4733199614024978e-06, "loss": 0.5056, "step": 6528 }, { "epoch": 0.68, "grad_norm": 1.9003328353324378, "learning_rate": 2.4718674460292945e-06, "loss": 0.5586, "step": 6529 }, { "epoch": 0.68, "grad_norm": 1.8474351176816664, "learning_rate": 2.470415217234326e-06, "loss": 0.5364, "step": 6530 }, { "epoch": 0.68, "grad_norm": 1.8999533795354315, "learning_rate": 2.468963275182209e-06, "loss": 0.6509, "step": 6531 }, { "epoch": 0.68, "grad_norm": 1.7821564463248776, "learning_rate": 2.467511620037533e-06, "loss": 0.6039, "step": 6532 }, { "epoch": 0.68, "grad_norm": 1.815283031049219, "learning_rate": 2.466060251964848e-06, "loss": 0.5468, "step": 6533 }, { "epoch": 0.68, "grad_norm": 1.7976932655952507, "learning_rate": 2.4646091711286783e-06, "loss": 0.6342, "step": 6534 }, { "epoch": 0.68, "grad_norm": 2.1496032676946073, "learning_rate": 2.4631583776935087e-06, "loss": 0.6687, "step": 6535 }, { "epoch": 0.68, "grad_norm": 1.9563009201475414, "learning_rate": 2.4617078718237996e-06, "loss": 0.5885, "step": 6536 }, { "epoch": 0.68, "grad_norm": 1.9025425900601731, "learning_rate": 2.4602576536839672e-06, "loss": 0.7126, "step": 6537 }, { "epoch": 0.68, "grad_norm": 2.2229623466585378, "learning_rate": 2.4588077234384084e-06, "loss": 0.6516, "step": 6538 }, { "epoch": 0.68, "grad_norm": 2.078758706067484, "learning_rate": 2.457358081251476e-06, "loss": 0.6454, "step": 6539 }, { "epoch": 0.68, "grad_norm": 2.206863288113695, "learning_rate": 2.4559087272875e-06, "loss": 0.5985, "step": 6540 }, { "epoch": 0.68, "grad_norm": 1.7974189151535742, "learning_rate": 2.454459661710768e-06, "loss": 0.6699, "step": 6541 }, { "epoch": 0.68, "grad_norm": 2.183890569895723, "learning_rate": 2.453010884685545e-06, "loss": 0.6428, "step": 6542 }, { "epoch": 0.68, "grad_norm": 2.1368283835643664, "learning_rate": 2.451562396376055e-06, "loss": 0.6995, "step": 6543 }, { "epoch": 0.68, "grad_norm": 1.8922048559177875, "learning_rate": 2.4501141969464936e-06, "loss": 0.7507, "step": 6544 }, { "epoch": 0.68, "grad_norm": 2.044874405618413, "learning_rate": 2.4486662865610194e-06, "loss": 0.682, "step": 6545 }, { "epoch": 0.68, "grad_norm": 1.903529162451403, "learning_rate": 2.447218665383766e-06, "loss": 0.5761, "step": 6546 }, { "epoch": 0.68, "grad_norm": 1.7651894250163067, "learning_rate": 2.445771333578825e-06, "loss": 0.5569, "step": 6547 }, { "epoch": 0.68, "grad_norm": 1.930097576630646, "learning_rate": 2.4443242913102645e-06, "loss": 0.6176, "step": 6548 }, { "epoch": 0.68, "grad_norm": 1.9430342493397708, "learning_rate": 2.44287753874211e-06, "loss": 0.5452, "step": 6549 }, { "epoch": 0.68, "grad_norm": 1.7755635352299155, "learning_rate": 2.4414310760383635e-06, "loss": 0.5851, "step": 6550 }, { "epoch": 0.68, "grad_norm": 1.8019542492761174, "learning_rate": 2.439984903362988e-06, "loss": 0.5942, "step": 6551 }, { "epoch": 0.68, "grad_norm": 1.8952673414148868, "learning_rate": 2.4385390208799153e-06, "loss": 0.5434, "step": 6552 }, { "epoch": 0.68, "grad_norm": 1.6146056407936027, "learning_rate": 2.437093428753042e-06, "loss": 0.53, "step": 6553 }, { "epoch": 0.68, "grad_norm": 2.0490607931816394, "learning_rate": 2.4356481271462396e-06, "loss": 0.6576, "step": 6554 }, { "epoch": 0.68, "grad_norm": 2.0066381095536445, "learning_rate": 2.434203116223336e-06, "loss": 0.6539, "step": 6555 }, { "epoch": 0.68, "grad_norm": 1.9100614870915273, "learning_rate": 2.4327583961481356e-06, "loss": 0.6054, "step": 6556 }, { "epoch": 0.68, "grad_norm": 1.9574341823528867, "learning_rate": 2.4313139670844016e-06, "loss": 0.618, "step": 6557 }, { "epoch": 0.68, "grad_norm": 1.8779301828805786, "learning_rate": 2.429869829195872e-06, "loss": 0.5621, "step": 6558 }, { "epoch": 0.68, "grad_norm": 2.06963245031177, "learning_rate": 2.4284259826462475e-06, "loss": 0.6054, "step": 6559 }, { "epoch": 0.68, "grad_norm": 2.074995237798964, "learning_rate": 2.4269824275991925e-06, "loss": 0.539, "step": 6560 }, { "epoch": 0.68, "grad_norm": 1.8641536750051724, "learning_rate": 2.425539164218348e-06, "loss": 0.5851, "step": 6561 }, { "epoch": 0.68, "grad_norm": 1.7855445771394514, "learning_rate": 2.4240961926673107e-06, "loss": 0.6371, "step": 6562 }, { "epoch": 0.68, "grad_norm": 2.033972613929812, "learning_rate": 2.422653513109654e-06, "loss": 0.6142, "step": 6563 }, { "epoch": 0.68, "grad_norm": 2.1191051352226, "learning_rate": 2.42121112570891e-06, "loss": 0.5777, "step": 6564 }, { "epoch": 0.68, "grad_norm": 1.8636220501849985, "learning_rate": 2.4197690306285855e-06, "loss": 0.6975, "step": 6565 }, { "epoch": 0.68, "grad_norm": 2.255684862296978, "learning_rate": 2.4183272280321477e-06, "loss": 0.6607, "step": 6566 }, { "epoch": 0.68, "grad_norm": 1.9075617579966504, "learning_rate": 2.416885718083035e-06, "loss": 0.5841, "step": 6567 }, { "epoch": 0.68, "grad_norm": 1.9785565108430303, "learning_rate": 2.4154445009446457e-06, "loss": 0.6007, "step": 6568 }, { "epoch": 0.68, "grad_norm": 2.046547103393187, "learning_rate": 2.414003576780357e-06, "loss": 0.6176, "step": 6569 }, { "epoch": 0.68, "grad_norm": 2.021562045047473, "learning_rate": 2.4125629457535003e-06, "loss": 0.6506, "step": 6570 }, { "epoch": 0.68, "grad_norm": 1.9063754184105355, "learning_rate": 2.4111226080273832e-06, "loss": 0.6206, "step": 6571 }, { "epoch": 0.68, "grad_norm": 1.7439814488302037, "learning_rate": 2.409682563765273e-06, "loss": 0.6125, "step": 6572 }, { "epoch": 0.68, "grad_norm": 1.7171532890783108, "learning_rate": 2.408242813130412e-06, "loss": 0.5728, "step": 6573 }, { "epoch": 0.68, "grad_norm": 1.9647678953123011, "learning_rate": 2.406803356285997e-06, "loss": 0.661, "step": 6574 }, { "epoch": 0.68, "grad_norm": 1.76511392894678, "learning_rate": 2.4053641933952043e-06, "loss": 0.4648, "step": 6575 }, { "epoch": 0.68, "grad_norm": 1.9107782987664588, "learning_rate": 2.4039253246211673e-06, "loss": 0.5542, "step": 6576 }, { "epoch": 0.68, "grad_norm": 1.912398300444987, "learning_rate": 2.402486750126994e-06, "loss": 0.6348, "step": 6577 }, { "epoch": 0.68, "grad_norm": 1.7869081393535953, "learning_rate": 2.401048470075751e-06, "loss": 0.5759, "step": 6578 }, { "epoch": 0.68, "grad_norm": 1.8508351567029224, "learning_rate": 2.399610484630479e-06, "loss": 0.6337, "step": 6579 }, { "epoch": 0.68, "grad_norm": 1.9020644166463498, "learning_rate": 2.3981727939541806e-06, "loss": 0.5074, "step": 6580 }, { "epoch": 0.68, "grad_norm": 1.8625920857309566, "learning_rate": 2.396735398209825e-06, "loss": 0.5701, "step": 6581 }, { "epoch": 0.68, "grad_norm": 1.8707396576414643, "learning_rate": 2.3952982975603494e-06, "loss": 0.6144, "step": 6582 }, { "epoch": 0.68, "grad_norm": 1.7674720029464268, "learning_rate": 2.3938614921686592e-06, "loss": 0.4661, "step": 6583 }, { "epoch": 0.68, "grad_norm": 2.1866573593617296, "learning_rate": 2.392424982197622e-06, "loss": 0.7057, "step": 6584 }, { "epoch": 0.68, "grad_norm": 1.5827145809561256, "learning_rate": 2.3909887678100774e-06, "loss": 0.494, "step": 6585 }, { "epoch": 0.68, "grad_norm": 1.9495572070111211, "learning_rate": 2.3895528491688246e-06, "loss": 0.6217, "step": 6586 }, { "epoch": 0.68, "grad_norm": 1.840105029036223, "learning_rate": 2.388117226436638e-06, "loss": 0.6843, "step": 6587 }, { "epoch": 0.68, "grad_norm": 2.1018687611008997, "learning_rate": 2.3866818997762507e-06, "loss": 0.6491, "step": 6588 }, { "epoch": 0.68, "grad_norm": 1.9982661992960102, "learning_rate": 2.3852468693503635e-06, "loss": 0.7238, "step": 6589 }, { "epoch": 0.69, "grad_norm": 1.8010975053442853, "learning_rate": 2.3838121353216494e-06, "loss": 0.6081, "step": 6590 }, { "epoch": 0.69, "grad_norm": 2.0237199118531266, "learning_rate": 2.3823776978527412e-06, "loss": 0.6033, "step": 6591 }, { "epoch": 0.69, "grad_norm": 1.8954053993498832, "learning_rate": 2.380943557106239e-06, "loss": 0.6438, "step": 6592 }, { "epoch": 0.69, "grad_norm": 1.8578242402001044, "learning_rate": 2.379509713244715e-06, "loss": 0.6441, "step": 6593 }, { "epoch": 0.69, "grad_norm": 1.913753592633508, "learning_rate": 2.3780761664306988e-06, "loss": 0.5426, "step": 6594 }, { "epoch": 0.69, "grad_norm": 1.8191071740448894, "learning_rate": 2.3766429168266958e-06, "loss": 0.5654, "step": 6595 }, { "epoch": 0.69, "grad_norm": 1.775553219293777, "learning_rate": 2.375209964595171e-06, "loss": 0.601, "step": 6596 }, { "epoch": 0.69, "grad_norm": 2.0852233066631856, "learning_rate": 2.3737773098985556e-06, "loss": 0.5894, "step": 6597 }, { "epoch": 0.69, "grad_norm": 1.811993129732441, "learning_rate": 2.3723449528992527e-06, "loss": 0.5774, "step": 6598 }, { "epoch": 0.69, "grad_norm": 2.3368875265028968, "learning_rate": 2.3709128937596248e-06, "loss": 0.6608, "step": 6599 }, { "epoch": 0.69, "grad_norm": 2.145791013358563, "learning_rate": 2.3694811326420074e-06, "loss": 0.6564, "step": 6600 }, { "epoch": 0.69, "grad_norm": 2.0766083446244052, "learning_rate": 2.3680496697086956e-06, "loss": 0.6666, "step": 6601 }, { "epoch": 0.69, "grad_norm": 1.9026813857247935, "learning_rate": 2.366618505121957e-06, "loss": 0.6696, "step": 6602 }, { "epoch": 0.69, "grad_norm": 1.7019084860970937, "learning_rate": 2.365187639044021e-06, "loss": 0.574, "step": 6603 }, { "epoch": 0.69, "grad_norm": 2.0689299845262132, "learning_rate": 2.3637570716370835e-06, "loss": 0.6265, "step": 6604 }, { "epoch": 0.69, "grad_norm": 2.0029581171015454, "learning_rate": 2.362326803063306e-06, "loss": 0.5978, "step": 6605 }, { "epoch": 0.69, "grad_norm": 1.7689417076989542, "learning_rate": 2.360896833484822e-06, "loss": 0.5353, "step": 6606 }, { "epoch": 0.69, "grad_norm": 2.0905140765067185, "learning_rate": 2.3594671630637223e-06, "loss": 0.6662, "step": 6607 }, { "epoch": 0.69, "grad_norm": 1.768719947566444, "learning_rate": 2.3580377919620716e-06, "loss": 0.6265, "step": 6608 }, { "epoch": 0.69, "grad_norm": 2.1019145030463755, "learning_rate": 2.3566087203418946e-06, "loss": 0.6763, "step": 6609 }, { "epoch": 0.69, "grad_norm": 1.8952776750300382, "learning_rate": 2.3551799483651894e-06, "loss": 0.5889, "step": 6610 }, { "epoch": 0.69, "grad_norm": 2.1165276188915803, "learning_rate": 2.3537514761939083e-06, "loss": 0.7131, "step": 6611 }, { "epoch": 0.69, "grad_norm": 1.8468004323684801, "learning_rate": 2.3523233039899827e-06, "loss": 0.6277, "step": 6612 }, { "epoch": 0.69, "grad_norm": 1.9635660498133092, "learning_rate": 2.3508954319153e-06, "loss": 0.577, "step": 6613 }, { "epoch": 0.69, "grad_norm": 2.173612272307502, "learning_rate": 2.3494678601317204e-06, "loss": 0.5449, "step": 6614 }, { "epoch": 0.69, "grad_norm": 2.09870439358293, "learning_rate": 2.3480405888010654e-06, "loss": 0.5512, "step": 6615 }, { "epoch": 0.69, "grad_norm": 1.9973381551511349, "learning_rate": 2.3466136180851274e-06, "loss": 0.5502, "step": 6616 }, { "epoch": 0.69, "grad_norm": 2.0266125822500776, "learning_rate": 2.345186948145659e-06, "loss": 0.5873, "step": 6617 }, { "epoch": 0.69, "grad_norm": 1.7407141658103176, "learning_rate": 2.343760579144382e-06, "loss": 0.6076, "step": 6618 }, { "epoch": 0.69, "grad_norm": 2.1528326826833104, "learning_rate": 2.342334511242982e-06, "loss": 0.6227, "step": 6619 }, { "epoch": 0.69, "grad_norm": 1.877769142307935, "learning_rate": 2.3409087446031144e-06, "loss": 0.5574, "step": 6620 }, { "epoch": 0.69, "grad_norm": 1.970392303151823, "learning_rate": 2.3394832793863955e-06, "loss": 0.5651, "step": 6621 }, { "epoch": 0.69, "grad_norm": 1.896484797097487, "learning_rate": 2.338058115754413e-06, "loss": 0.6181, "step": 6622 }, { "epoch": 0.69, "grad_norm": 1.8879614504411355, "learning_rate": 2.336633253868714e-06, "loss": 0.5099, "step": 6623 }, { "epoch": 0.69, "grad_norm": 1.9349750897671085, "learning_rate": 2.335208693890819e-06, "loss": 0.6151, "step": 6624 }, { "epoch": 0.69, "grad_norm": 1.9611926446015844, "learning_rate": 2.333784435982206e-06, "loss": 0.5171, "step": 6625 }, { "epoch": 0.69, "grad_norm": 2.0197304863197316, "learning_rate": 2.3323604803043225e-06, "loss": 0.6571, "step": 6626 }, { "epoch": 0.69, "grad_norm": 1.9238997930011825, "learning_rate": 2.3309368270185863e-06, "loss": 0.6127, "step": 6627 }, { "epoch": 0.69, "grad_norm": 1.9924563648365212, "learning_rate": 2.3295134762863713e-06, "loss": 0.6, "step": 6628 }, { "epoch": 0.69, "grad_norm": 2.1233139368564826, "learning_rate": 2.3280904282690268e-06, "loss": 0.6467, "step": 6629 }, { "epoch": 0.69, "grad_norm": 1.6964172462449443, "learning_rate": 2.3266676831278625e-06, "loss": 0.5363, "step": 6630 }, { "epoch": 0.69, "grad_norm": 2.111431211506792, "learning_rate": 2.325245241024151e-06, "loss": 0.5895, "step": 6631 }, { "epoch": 0.69, "grad_norm": 1.8468775750315043, "learning_rate": 2.3238231021191392e-06, "loss": 0.5522, "step": 6632 }, { "epoch": 0.69, "grad_norm": 1.865844535939304, "learning_rate": 2.3224012665740327e-06, "loss": 0.5739, "step": 6633 }, { "epoch": 0.69, "grad_norm": 2.0846033087576568, "learning_rate": 2.3209797345500025e-06, "loss": 0.7064, "step": 6634 }, { "epoch": 0.69, "grad_norm": 1.9161200839699233, "learning_rate": 2.3195585062081904e-06, "loss": 0.6521, "step": 6635 }, { "epoch": 0.69, "grad_norm": 2.0009721784284826, "learning_rate": 2.3181375817096986e-06, "loss": 0.6137, "step": 6636 }, { "epoch": 0.69, "grad_norm": 1.9519332289640217, "learning_rate": 2.3167169612155997e-06, "loss": 0.5762, "step": 6637 }, { "epoch": 0.69, "grad_norm": 1.837292165340319, "learning_rate": 2.315296644886926e-06, "loss": 0.6546, "step": 6638 }, { "epoch": 0.69, "grad_norm": 1.9042025982700623, "learning_rate": 2.313876632884683e-06, "loss": 0.6275, "step": 6639 }, { "epoch": 0.69, "grad_norm": 1.8454885332773772, "learning_rate": 2.3124569253698305e-06, "loss": 0.6006, "step": 6640 }, { "epoch": 0.69, "grad_norm": 1.7636093008937368, "learning_rate": 2.3110375225033056e-06, "loss": 0.5944, "step": 6641 }, { "epoch": 0.69, "grad_norm": 1.7390161950750196, "learning_rate": 2.3096184244460025e-06, "loss": 0.6379, "step": 6642 }, { "epoch": 0.69, "grad_norm": 2.069866503330104, "learning_rate": 2.3081996313587873e-06, "loss": 0.6273, "step": 6643 }, { "epoch": 0.69, "grad_norm": 1.752133432566502, "learning_rate": 2.306781143402485e-06, "loss": 0.5812, "step": 6644 }, { "epoch": 0.69, "grad_norm": 1.8767393290787764, "learning_rate": 2.305362960737893e-06, "loss": 0.563, "step": 6645 }, { "epoch": 0.69, "grad_norm": 2.088228977799822, "learning_rate": 2.3039450835257663e-06, "loss": 0.6123, "step": 6646 }, { "epoch": 0.69, "grad_norm": 1.8447026144205156, "learning_rate": 2.3025275119268352e-06, "loss": 0.5872, "step": 6647 }, { "epoch": 0.69, "grad_norm": 1.9480786651098552, "learning_rate": 2.3011102461017816e-06, "loss": 0.6293, "step": 6648 }, { "epoch": 0.69, "grad_norm": 1.9232095483162872, "learning_rate": 2.299693286211267e-06, "loss": 0.7181, "step": 6649 }, { "epoch": 0.69, "grad_norm": 1.9315203503161686, "learning_rate": 2.298276632415908e-06, "loss": 0.6642, "step": 6650 }, { "epoch": 0.69, "grad_norm": 2.075640982086266, "learning_rate": 2.296860284876293e-06, "loss": 0.6924, "step": 6651 }, { "epoch": 0.69, "grad_norm": 1.9253684895073375, "learning_rate": 2.2954442437529705e-06, "loss": 0.5573, "step": 6652 }, { "epoch": 0.69, "grad_norm": 1.9033756169578906, "learning_rate": 2.294028509206461e-06, "loss": 0.5732, "step": 6653 }, { "epoch": 0.69, "grad_norm": 2.097629789333268, "learning_rate": 2.292613081397243e-06, "loss": 0.7167, "step": 6654 }, { "epoch": 0.69, "grad_norm": 2.047506195550759, "learning_rate": 2.2911979604857636e-06, "loss": 0.5737, "step": 6655 }, { "epoch": 0.69, "grad_norm": 2.1135148834024093, "learning_rate": 2.289783146632434e-06, "loss": 0.6373, "step": 6656 }, { "epoch": 0.69, "grad_norm": 2.153401373307362, "learning_rate": 2.2883686399976335e-06, "loss": 0.6147, "step": 6657 }, { "epoch": 0.69, "grad_norm": 1.8547858747646147, "learning_rate": 2.2869544407417016e-06, "loss": 0.6559, "step": 6658 }, { "epoch": 0.69, "grad_norm": 1.871073109375712, "learning_rate": 2.2855405490249498e-06, "loss": 0.6087, "step": 6659 }, { "epoch": 0.69, "grad_norm": 1.9986690467749073, "learning_rate": 2.2841269650076468e-06, "loss": 0.6595, "step": 6660 }, { "epoch": 0.69, "grad_norm": 1.9087678195779618, "learning_rate": 2.282713688850034e-06, "loss": 0.5527, "step": 6661 }, { "epoch": 0.69, "grad_norm": 2.014305188228896, "learning_rate": 2.281300720712313e-06, "loss": 0.7236, "step": 6662 }, { "epoch": 0.69, "grad_norm": 1.9398579036796748, "learning_rate": 2.2798880607546486e-06, "loss": 0.5963, "step": 6663 }, { "epoch": 0.69, "grad_norm": 2.3211520585069536, "learning_rate": 2.2784757091371797e-06, "loss": 0.5343, "step": 6664 }, { "epoch": 0.69, "grad_norm": 2.0825157206727343, "learning_rate": 2.2770636660199983e-06, "loss": 0.6533, "step": 6665 }, { "epoch": 0.69, "grad_norm": 1.8372912903103717, "learning_rate": 2.275651931563173e-06, "loss": 0.7069, "step": 6666 }, { "epoch": 0.69, "grad_norm": 1.8541732518907224, "learning_rate": 2.274240505926728e-06, "loss": 0.6389, "step": 6667 }, { "epoch": 0.69, "grad_norm": 2.049805210637795, "learning_rate": 2.2728293892706595e-06, "loss": 0.6054, "step": 6668 }, { "epoch": 0.69, "grad_norm": 2.13594935059443, "learning_rate": 2.271418581754924e-06, "loss": 0.6606, "step": 6669 }, { "epoch": 0.69, "grad_norm": 2.0250909170956506, "learning_rate": 2.2700080835394444e-06, "loss": 0.6581, "step": 6670 }, { "epoch": 0.69, "grad_norm": 2.056761095650911, "learning_rate": 2.2685978947841077e-06, "loss": 0.6361, "step": 6671 }, { "epoch": 0.69, "grad_norm": 2.265738878544974, "learning_rate": 2.2671880156487695e-06, "loss": 0.7253, "step": 6672 }, { "epoch": 0.69, "grad_norm": 1.8661634959294517, "learning_rate": 2.265778446293245e-06, "loss": 0.6385, "step": 6673 }, { "epoch": 0.69, "grad_norm": 1.8345999506199009, "learning_rate": 2.264369186877319e-06, "loss": 0.6667, "step": 6674 }, { "epoch": 0.69, "grad_norm": 2.1188435197091833, "learning_rate": 2.2629602375607373e-06, "loss": 0.6107, "step": 6675 }, { "epoch": 0.69, "grad_norm": 1.766286234357666, "learning_rate": 2.2615515985032164e-06, "loss": 0.5546, "step": 6676 }, { "epoch": 0.69, "grad_norm": 1.9151920493197223, "learning_rate": 2.260143269864427e-06, "loss": 0.6048, "step": 6677 }, { "epoch": 0.69, "grad_norm": 1.7001073092306225, "learning_rate": 2.258735251804017e-06, "loss": 0.4986, "step": 6678 }, { "epoch": 0.69, "grad_norm": 1.995324748961039, "learning_rate": 2.2573275444815886e-06, "loss": 0.6454, "step": 6679 }, { "epoch": 0.69, "grad_norm": 2.121005092218295, "learning_rate": 2.255920148056717e-06, "loss": 0.7024, "step": 6680 }, { "epoch": 0.69, "grad_norm": 2.1720784642317454, "learning_rate": 2.2545130626889363e-06, "loss": 0.5723, "step": 6681 }, { "epoch": 0.69, "grad_norm": 2.0099728903305523, "learning_rate": 2.25310628853775e-06, "loss": 0.6592, "step": 6682 }, { "epoch": 0.69, "grad_norm": 2.1435430270674973, "learning_rate": 2.251699825762621e-06, "loss": 0.5571, "step": 6683 }, { "epoch": 0.69, "grad_norm": 1.9626458440348495, "learning_rate": 2.2502936745229852e-06, "loss": 0.6454, "step": 6684 }, { "epoch": 0.69, "grad_norm": 1.7483370034305972, "learning_rate": 2.2488878349782306e-06, "loss": 0.5895, "step": 6685 }, { "epoch": 0.7, "grad_norm": 1.9414199777315755, "learning_rate": 2.2474823072877226e-06, "loss": 0.6565, "step": 6686 }, { "epoch": 0.7, "grad_norm": 1.9913410893273478, "learning_rate": 2.2460770916107823e-06, "loss": 0.6351, "step": 6687 }, { "epoch": 0.7, "grad_norm": 2.04831872577914, "learning_rate": 2.244672188106702e-06, "loss": 0.5214, "step": 6688 }, { "epoch": 0.7, "grad_norm": 2.3021743565866615, "learning_rate": 2.243267596934732e-06, "loss": 0.6176, "step": 6689 }, { "epoch": 0.7, "grad_norm": 1.9373025833593858, "learning_rate": 2.241863318254095e-06, "loss": 0.6219, "step": 6690 }, { "epoch": 0.7, "grad_norm": 1.8598534415891308, "learning_rate": 2.2404593522239715e-06, "loss": 0.6604, "step": 6691 }, { "epoch": 0.7, "grad_norm": 1.6833312249023875, "learning_rate": 2.239055699003509e-06, "loss": 0.5226, "step": 6692 }, { "epoch": 0.7, "grad_norm": 1.963731738827018, "learning_rate": 2.2376523587518184e-06, "loss": 0.6036, "step": 6693 }, { "epoch": 0.7, "grad_norm": 2.017374029375341, "learning_rate": 2.23624933162798e-06, "loss": 0.7601, "step": 6694 }, { "epoch": 0.7, "grad_norm": 1.8393505053809505, "learning_rate": 2.23484661779103e-06, "loss": 0.602, "step": 6695 }, { "epoch": 0.7, "grad_norm": 2.012100195733019, "learning_rate": 2.2334442173999794e-06, "loss": 0.7229, "step": 6696 }, { "epoch": 0.7, "grad_norm": 1.829512485563068, "learning_rate": 2.232042130613793e-06, "loss": 0.6148, "step": 6697 }, { "epoch": 0.7, "grad_norm": 1.662329440268267, "learning_rate": 2.2306403575914103e-06, "loss": 0.6035, "step": 6698 }, { "epoch": 0.7, "grad_norm": 1.926230971491137, "learning_rate": 2.229238898491728e-06, "loss": 0.5508, "step": 6699 }, { "epoch": 0.7, "grad_norm": 1.9284111002937832, "learning_rate": 2.2278377534736067e-06, "loss": 0.652, "step": 6700 }, { "epoch": 0.7, "grad_norm": 2.023252463481603, "learning_rate": 2.2264369226958794e-06, "loss": 0.5936, "step": 6701 }, { "epoch": 0.7, "grad_norm": 2.0858121565124996, "learning_rate": 2.225036406317334e-06, "loss": 0.687, "step": 6702 }, { "epoch": 0.7, "grad_norm": 2.0405317157720906, "learning_rate": 2.2236362044967304e-06, "loss": 0.6622, "step": 6703 }, { "epoch": 0.7, "grad_norm": 1.8435761932262933, "learning_rate": 2.2222363173927853e-06, "loss": 0.6013, "step": 6704 }, { "epoch": 0.7, "grad_norm": 2.0622278757264785, "learning_rate": 2.2208367451641886e-06, "loss": 0.5633, "step": 6705 }, { "epoch": 0.7, "grad_norm": 1.8044115147358368, "learning_rate": 2.219437487969588e-06, "loss": 0.5886, "step": 6706 }, { "epoch": 0.7, "grad_norm": 1.869590527714706, "learning_rate": 2.2180385459675964e-06, "loss": 0.598, "step": 6707 }, { "epoch": 0.7, "grad_norm": 1.991125138912976, "learning_rate": 2.2166399193167905e-06, "loss": 0.6672, "step": 6708 }, { "epoch": 0.7, "grad_norm": 2.078202290454276, "learning_rate": 2.2152416081757154e-06, "loss": 0.5946, "step": 6709 }, { "epoch": 0.7, "grad_norm": 1.9841789691260552, "learning_rate": 2.213843612702876e-06, "loss": 0.624, "step": 6710 }, { "epoch": 0.7, "grad_norm": 1.9292348217406463, "learning_rate": 2.212445933056745e-06, "loss": 0.5677, "step": 6711 }, { "epoch": 0.7, "grad_norm": 1.7298685570499033, "learning_rate": 2.211048569395754e-06, "loss": 0.6022, "step": 6712 }, { "epoch": 0.7, "grad_norm": 1.9413989348010925, "learning_rate": 2.2096515218783084e-06, "loss": 0.5198, "step": 6713 }, { "epoch": 0.7, "grad_norm": 2.106895321722421, "learning_rate": 2.208254790662763e-06, "loss": 0.6963, "step": 6714 }, { "epoch": 0.7, "grad_norm": 1.899806937788191, "learning_rate": 2.2068583759074513e-06, "loss": 0.6799, "step": 6715 }, { "epoch": 0.7, "grad_norm": 1.919074168896033, "learning_rate": 2.2054622777706612e-06, "loss": 0.6711, "step": 6716 }, { "epoch": 0.7, "grad_norm": 1.8067940485711798, "learning_rate": 2.204066496410653e-06, "loss": 0.5445, "step": 6717 }, { "epoch": 0.7, "grad_norm": 1.9616475474941888, "learning_rate": 2.2026710319856407e-06, "loss": 0.6405, "step": 6718 }, { "epoch": 0.7, "grad_norm": 2.1720562157049415, "learning_rate": 2.2012758846538135e-06, "loss": 0.5855, "step": 6719 }, { "epoch": 0.7, "grad_norm": 1.9905847516723316, "learning_rate": 2.199881054573315e-06, "loss": 0.6609, "step": 6720 }, { "epoch": 0.7, "grad_norm": 1.8413328911147004, "learning_rate": 2.1984865419022633e-06, "loss": 0.515, "step": 6721 }, { "epoch": 0.7, "grad_norm": 1.7123430797565387, "learning_rate": 2.197092346798726e-06, "loss": 0.5539, "step": 6722 }, { "epoch": 0.7, "grad_norm": 1.809983657480813, "learning_rate": 2.1956984694207495e-06, "loss": 0.5613, "step": 6723 }, { "epoch": 0.7, "grad_norm": 1.9263268915106502, "learning_rate": 2.1943049099263333e-06, "loss": 0.6399, "step": 6724 }, { "epoch": 0.7, "grad_norm": 2.005700997387467, "learning_rate": 2.1929116684734493e-06, "loss": 0.746, "step": 6725 }, { "epoch": 0.7, "grad_norm": 2.057704807111089, "learning_rate": 2.1915187452200255e-06, "loss": 0.5946, "step": 6726 }, { "epoch": 0.7, "grad_norm": 1.9196933178433027, "learning_rate": 2.190126140323962e-06, "loss": 0.601, "step": 6727 }, { "epoch": 0.7, "grad_norm": 2.10808635939514, "learning_rate": 2.188733853943116e-06, "loss": 0.683, "step": 6728 }, { "epoch": 0.7, "grad_norm": 1.9521402765230733, "learning_rate": 2.1873418862353095e-06, "loss": 0.5989, "step": 6729 }, { "epoch": 0.7, "grad_norm": 1.9980956231048448, "learning_rate": 2.1859502373583336e-06, "loss": 0.6752, "step": 6730 }, { "epoch": 0.7, "grad_norm": 1.9240445744428603, "learning_rate": 2.184558907469938e-06, "loss": 0.739, "step": 6731 }, { "epoch": 0.7, "grad_norm": 1.7796759312492878, "learning_rate": 2.1831678967278356e-06, "loss": 0.6363, "step": 6732 }, { "epoch": 0.7, "grad_norm": 1.8523858957919184, "learning_rate": 2.18177720528971e-06, "loss": 0.6039, "step": 6733 }, { "epoch": 0.7, "grad_norm": 1.895909602685592, "learning_rate": 2.1803868333131996e-06, "loss": 0.5693, "step": 6734 }, { "epoch": 0.7, "grad_norm": 2.0462928765800226, "learning_rate": 2.1789967809559144e-06, "loss": 0.6011, "step": 6735 }, { "epoch": 0.7, "grad_norm": 1.8139579333458893, "learning_rate": 2.177607048375423e-06, "loss": 0.5731, "step": 6736 }, { "epoch": 0.7, "grad_norm": 1.9562113590511148, "learning_rate": 2.1762176357292582e-06, "loss": 0.6779, "step": 6737 }, { "epoch": 0.7, "grad_norm": 2.015963062808654, "learning_rate": 2.174828543174921e-06, "loss": 0.7395, "step": 6738 }, { "epoch": 0.7, "grad_norm": 1.7438058430933063, "learning_rate": 2.17343977086987e-06, "loss": 0.5682, "step": 6739 }, { "epoch": 0.7, "grad_norm": 2.3548843240144435, "learning_rate": 2.172051318971533e-06, "loss": 0.7333, "step": 6740 }, { "epoch": 0.7, "grad_norm": 1.8381592551491213, "learning_rate": 2.170663187637297e-06, "loss": 0.6598, "step": 6741 }, { "epoch": 0.7, "grad_norm": 2.153703994101113, "learning_rate": 2.169275377024516e-06, "loss": 0.713, "step": 6742 }, { "epoch": 0.7, "grad_norm": 1.9617566205072865, "learning_rate": 2.1678878872905063e-06, "loss": 0.6191, "step": 6743 }, { "epoch": 0.7, "grad_norm": 1.9815659603480604, "learning_rate": 2.1665007185925468e-06, "loss": 0.58, "step": 6744 }, { "epoch": 0.7, "grad_norm": 1.8177032013635215, "learning_rate": 2.16511387108788e-06, "loss": 0.4916, "step": 6745 }, { "epoch": 0.7, "grad_norm": 1.8158442719708523, "learning_rate": 2.1637273449337156e-06, "loss": 0.6302, "step": 6746 }, { "epoch": 0.7, "grad_norm": 2.027352451014181, "learning_rate": 2.1623411402872206e-06, "loss": 0.5283, "step": 6747 }, { "epoch": 0.7, "grad_norm": 2.4065306653749294, "learning_rate": 2.160955257305534e-06, "loss": 0.7473, "step": 6748 }, { "epoch": 0.7, "grad_norm": 2.218280632800332, "learning_rate": 2.159569696145749e-06, "loss": 0.6691, "step": 6749 }, { "epoch": 0.7, "grad_norm": 1.8586197979012782, "learning_rate": 2.158184456964932e-06, "loss": 0.5736, "step": 6750 }, { "epoch": 0.7, "grad_norm": 1.7866938651306266, "learning_rate": 2.1567995399201018e-06, "loss": 0.6215, "step": 6751 }, { "epoch": 0.7, "grad_norm": 2.135629174836658, "learning_rate": 2.155414945168251e-06, "loss": 0.6011, "step": 6752 }, { "epoch": 0.7, "grad_norm": 1.9452854659735015, "learning_rate": 2.1540306728663274e-06, "loss": 0.5866, "step": 6753 }, { "epoch": 0.7, "grad_norm": 2.086410546915578, "learning_rate": 2.152646723171251e-06, "loss": 0.5784, "step": 6754 }, { "epoch": 0.7, "grad_norm": 1.8368795960451758, "learning_rate": 2.1512630962398954e-06, "loss": 0.6354, "step": 6755 }, { "epoch": 0.7, "grad_norm": 2.070655836107693, "learning_rate": 2.1498797922291075e-06, "loss": 0.5932, "step": 6756 }, { "epoch": 0.7, "grad_norm": 1.7569569690003866, "learning_rate": 2.1484968112956884e-06, "loss": 0.5498, "step": 6757 }, { "epoch": 0.7, "grad_norm": 1.821829426027513, "learning_rate": 2.1471141535964126e-06, "loss": 0.6307, "step": 6758 }, { "epoch": 0.7, "grad_norm": 1.898338128453966, "learning_rate": 2.1457318192880043e-06, "loss": 0.6396, "step": 6759 }, { "epoch": 0.7, "grad_norm": 1.8115570550325248, "learning_rate": 2.144349808527165e-06, "loss": 0.6435, "step": 6760 }, { "epoch": 0.7, "grad_norm": 1.8336892208081155, "learning_rate": 2.14296812147055e-06, "loss": 0.5665, "step": 6761 }, { "epoch": 0.7, "grad_norm": 1.8332267785810863, "learning_rate": 2.1415867582747847e-06, "loss": 0.5371, "step": 6762 }, { "epoch": 0.7, "grad_norm": 1.9140615577561217, "learning_rate": 2.1402057190964503e-06, "loss": 0.5874, "step": 6763 }, { "epoch": 0.7, "grad_norm": 2.0702064176398993, "learning_rate": 2.1388250040921007e-06, "loss": 0.6677, "step": 6764 }, { "epoch": 0.7, "grad_norm": 1.6913066775000711, "learning_rate": 2.137444613418244e-06, "loss": 0.5909, "step": 6765 }, { "epoch": 0.7, "grad_norm": 1.8374709309177701, "learning_rate": 2.1360645472313556e-06, "loss": 0.5114, "step": 6766 }, { "epoch": 0.7, "grad_norm": 1.9940985284039772, "learning_rate": 2.134684805687876e-06, "loss": 0.7109, "step": 6767 }, { "epoch": 0.7, "grad_norm": 1.8285598217429264, "learning_rate": 2.1333053889442033e-06, "loss": 0.6866, "step": 6768 }, { "epoch": 0.7, "grad_norm": 1.8855850725628227, "learning_rate": 2.131926297156707e-06, "loss": 0.5933, "step": 6769 }, { "epoch": 0.7, "grad_norm": 2.004497545118367, "learning_rate": 2.130547530481712e-06, "loss": 0.6405, "step": 6770 }, { "epoch": 0.7, "grad_norm": 1.7597570513215508, "learning_rate": 2.1291690890755078e-06, "loss": 0.6394, "step": 6771 }, { "epoch": 0.7, "grad_norm": 2.0693726659785225, "learning_rate": 2.1277909730943526e-06, "loss": 0.6361, "step": 6772 }, { "epoch": 0.7, "grad_norm": 1.8601120817243084, "learning_rate": 2.126413182694461e-06, "loss": 0.5867, "step": 6773 }, { "epoch": 0.7, "grad_norm": 2.0572739362735324, "learning_rate": 2.125035718032013e-06, "loss": 0.7085, "step": 6774 }, { "epoch": 0.7, "grad_norm": 1.9037558097151903, "learning_rate": 2.123658579263155e-06, "loss": 0.6568, "step": 6775 }, { "epoch": 0.7, "grad_norm": 2.0402613319469007, "learning_rate": 2.1222817665439893e-06, "loss": 0.7298, "step": 6776 }, { "epoch": 0.7, "grad_norm": 1.9559818195133252, "learning_rate": 2.1209052800305897e-06, "loss": 0.6301, "step": 6777 }, { "epoch": 0.7, "grad_norm": 1.8799592134451022, "learning_rate": 2.119529119878985e-06, "loss": 0.5894, "step": 6778 }, { "epoch": 0.7, "grad_norm": 1.8268326940086235, "learning_rate": 2.1181532862451746e-06, "loss": 0.6109, "step": 6779 }, { "epoch": 0.7, "grad_norm": 2.115331740345243, "learning_rate": 2.1167777792851153e-06, "loss": 0.5923, "step": 6780 }, { "epoch": 0.7, "grad_norm": 2.0187530099260917, "learning_rate": 2.1154025991547283e-06, "loss": 0.5897, "step": 6781 }, { "epoch": 0.7, "grad_norm": 1.9106316038087288, "learning_rate": 2.114027746009897e-06, "loss": 0.6144, "step": 6782 }, { "epoch": 0.71, "grad_norm": 1.8569111712932218, "learning_rate": 2.112653220006472e-06, "loss": 0.6748, "step": 6783 }, { "epoch": 0.71, "grad_norm": 1.8045272023207288, "learning_rate": 2.1112790213002592e-06, "loss": 0.6509, "step": 6784 }, { "epoch": 0.71, "grad_norm": 2.1234892959481653, "learning_rate": 2.1099051500470368e-06, "loss": 0.78, "step": 6785 }, { "epoch": 0.71, "grad_norm": 1.77970791042375, "learning_rate": 2.1085316064025375e-06, "loss": 0.5062, "step": 6786 }, { "epoch": 0.71, "grad_norm": 1.9188195927515572, "learning_rate": 2.1071583905224643e-06, "loss": 0.5566, "step": 6787 }, { "epoch": 0.71, "grad_norm": 1.9184818718964722, "learning_rate": 2.105785502562472e-06, "loss": 0.6935, "step": 6788 }, { "epoch": 0.71, "grad_norm": 2.1765053123488634, "learning_rate": 2.1044129426781925e-06, "loss": 0.6602, "step": 6789 }, { "epoch": 0.71, "grad_norm": 2.0392376211455963, "learning_rate": 2.1030407110252077e-06, "loss": 0.6014, "step": 6790 }, { "epoch": 0.71, "grad_norm": 1.779595008058516, "learning_rate": 2.1016688077590726e-06, "loss": 0.6121, "step": 6791 }, { "epoch": 0.71, "grad_norm": 1.8969276425103183, "learning_rate": 2.100297233035296e-06, "loss": 0.6356, "step": 6792 }, { "epoch": 0.71, "grad_norm": 2.090929508806987, "learning_rate": 2.0989259870093575e-06, "loss": 0.7559, "step": 6793 }, { "epoch": 0.71, "grad_norm": 2.0298868348206267, "learning_rate": 2.0975550698366924e-06, "loss": 0.669, "step": 6794 }, { "epoch": 0.71, "grad_norm": 1.9494565795464749, "learning_rate": 2.096184481672707e-06, "loss": 0.5928, "step": 6795 }, { "epoch": 0.71, "grad_norm": 1.9858014912834072, "learning_rate": 2.0948142226727584e-06, "loss": 0.67, "step": 6796 }, { "epoch": 0.71, "grad_norm": 1.9407007847129225, "learning_rate": 2.0934442929921783e-06, "loss": 0.539, "step": 6797 }, { "epoch": 0.71, "grad_norm": 2.0414032198405, "learning_rate": 2.0920746927862523e-06, "loss": 0.6952, "step": 6798 }, { "epoch": 0.71, "grad_norm": 1.959079680325511, "learning_rate": 2.0907054222102367e-06, "loss": 0.5815, "step": 6799 }, { "epoch": 0.71, "grad_norm": 1.8445652931836316, "learning_rate": 2.0893364814193424e-06, "loss": 0.5937, "step": 6800 }, { "epoch": 0.71, "grad_norm": 1.8116444330336892, "learning_rate": 2.0879678705687495e-06, "loss": 0.667, "step": 6801 }, { "epoch": 0.71, "grad_norm": 1.8516217311768648, "learning_rate": 2.0865995898135965e-06, "loss": 0.5826, "step": 6802 }, { "epoch": 0.71, "grad_norm": 1.9105961253495354, "learning_rate": 2.0852316393089837e-06, "loss": 0.5449, "step": 6803 }, { "epoch": 0.71, "grad_norm": 2.1050591561753578, "learning_rate": 2.083864019209981e-06, "loss": 0.6291, "step": 6804 }, { "epoch": 0.71, "grad_norm": 1.8619748597791044, "learning_rate": 2.08249672967161e-06, "loss": 0.5646, "step": 6805 }, { "epoch": 0.71, "grad_norm": 1.8444285783299936, "learning_rate": 2.081129770848867e-06, "loss": 0.6131, "step": 6806 }, { "epoch": 0.71, "grad_norm": 2.271770773829363, "learning_rate": 2.079763142896699e-06, "loss": 0.5903, "step": 6807 }, { "epoch": 0.71, "grad_norm": 2.052492209017752, "learning_rate": 2.0783968459700253e-06, "loss": 0.607, "step": 6808 }, { "epoch": 0.71, "grad_norm": 1.8228278521126375, "learning_rate": 2.077030880223722e-06, "loss": 0.6362, "step": 6809 }, { "epoch": 0.71, "grad_norm": 2.1175619753666206, "learning_rate": 2.0756652458126285e-06, "loss": 0.635, "step": 6810 }, { "epoch": 0.71, "grad_norm": 1.85016309421988, "learning_rate": 2.074299942891546e-06, "loss": 0.6413, "step": 6811 }, { "epoch": 0.71, "grad_norm": 1.8552690267386498, "learning_rate": 2.0729349716152424e-06, "loss": 0.5696, "step": 6812 }, { "epoch": 0.71, "grad_norm": 2.1779947285160848, "learning_rate": 2.071570332138442e-06, "loss": 0.6923, "step": 6813 }, { "epoch": 0.71, "grad_norm": 1.8964953958247945, "learning_rate": 2.0702060246158378e-06, "loss": 0.6115, "step": 6814 }, { "epoch": 0.71, "grad_norm": 1.799391893677241, "learning_rate": 2.068842049202078e-06, "loss": 0.6503, "step": 6815 }, { "epoch": 0.71, "grad_norm": 1.8439040043393595, "learning_rate": 2.0674784060517803e-06, "loss": 0.6019, "step": 6816 }, { "epoch": 0.71, "grad_norm": 2.0442863866035004, "learning_rate": 2.066115095319521e-06, "loss": 0.5634, "step": 6817 }, { "epoch": 0.71, "grad_norm": 1.9730979415829402, "learning_rate": 2.0647521171598376e-06, "loss": 0.6527, "step": 6818 }, { "epoch": 0.71, "grad_norm": 1.965494335587785, "learning_rate": 2.0633894717272308e-06, "loss": 0.5889, "step": 6819 }, { "epoch": 0.71, "grad_norm": 2.0617167262437013, "learning_rate": 2.0620271591761666e-06, "loss": 0.6645, "step": 6820 }, { "epoch": 0.71, "grad_norm": 2.0622153316834453, "learning_rate": 2.060665179661068e-06, "loss": 0.6241, "step": 6821 }, { "epoch": 0.71, "grad_norm": 1.9657200879940147, "learning_rate": 2.0593035333363275e-06, "loss": 0.6282, "step": 6822 }, { "epoch": 0.71, "grad_norm": 2.1417828950209485, "learning_rate": 2.0579422203562905e-06, "loss": 0.6605, "step": 6823 }, { "epoch": 0.71, "grad_norm": 1.8466583378272763, "learning_rate": 2.056581240875276e-06, "loss": 0.5548, "step": 6824 }, { "epoch": 0.71, "grad_norm": 1.9783061847720764, "learning_rate": 2.055220595047551e-06, "loss": 0.5555, "step": 6825 }, { "epoch": 0.71, "grad_norm": 1.7190514762365583, "learning_rate": 2.053860283027358e-06, "loss": 0.5731, "step": 6826 }, { "epoch": 0.71, "grad_norm": 2.2693999624623724, "learning_rate": 2.0525003049688923e-06, "loss": 0.521, "step": 6827 }, { "epoch": 0.71, "grad_norm": 2.028005261832898, "learning_rate": 2.0511406610263196e-06, "loss": 0.6534, "step": 6828 }, { "epoch": 0.71, "grad_norm": 1.8211798337378464, "learning_rate": 2.0497813513537583e-06, "loss": 0.5156, "step": 6829 }, { "epoch": 0.71, "grad_norm": 2.0316985269130874, "learning_rate": 2.048422376105299e-06, "loss": 0.5355, "step": 6830 }, { "epoch": 0.71, "grad_norm": 2.035569698591049, "learning_rate": 2.047063735434985e-06, "loss": 0.5921, "step": 6831 }, { "epoch": 0.71, "grad_norm": 1.8627838823785647, "learning_rate": 2.045705429496831e-06, "loss": 0.5364, "step": 6832 }, { "epoch": 0.71, "grad_norm": 1.9083218950041783, "learning_rate": 2.044347458444802e-06, "loss": 0.6016, "step": 6833 }, { "epoch": 0.71, "grad_norm": 1.925848000350107, "learning_rate": 2.042989822432837e-06, "loss": 0.624, "step": 6834 }, { "epoch": 0.71, "grad_norm": 2.098946597015218, "learning_rate": 2.041632521614828e-06, "loss": 0.5922, "step": 6835 }, { "epoch": 0.71, "grad_norm": 2.100563969003895, "learning_rate": 2.040275556144637e-06, "loss": 0.6121, "step": 6836 }, { "epoch": 0.71, "grad_norm": 1.9737206038407717, "learning_rate": 2.03891892617608e-06, "loss": 0.5919, "step": 6837 }, { "epoch": 0.71, "grad_norm": 1.9297318075312375, "learning_rate": 2.0375626318629418e-06, "loss": 0.6429, "step": 6838 }, { "epoch": 0.71, "grad_norm": 2.4570564234245285, "learning_rate": 2.036206673358964e-06, "loss": 0.7009, "step": 6839 }, { "epoch": 0.71, "grad_norm": 1.7813614587881377, "learning_rate": 2.034851050817852e-06, "loss": 0.5578, "step": 6840 }, { "epoch": 0.71, "grad_norm": 2.100717069970721, "learning_rate": 2.0334957643932757e-06, "loss": 0.5658, "step": 6841 }, { "epoch": 0.71, "grad_norm": 2.02686330607163, "learning_rate": 2.032140814238861e-06, "loss": 0.5931, "step": 6842 }, { "epoch": 0.71, "grad_norm": 1.6161797864253376, "learning_rate": 2.030786200508203e-06, "loss": 0.4844, "step": 6843 }, { "epoch": 0.71, "grad_norm": 1.9412498996165422, "learning_rate": 2.0294319233548516e-06, "loss": 0.5354, "step": 6844 }, { "epoch": 0.71, "grad_norm": 1.986315746322376, "learning_rate": 2.028077982932325e-06, "loss": 0.6693, "step": 6845 }, { "epoch": 0.71, "grad_norm": 1.86339065340936, "learning_rate": 2.026724379394098e-06, "loss": 0.5476, "step": 6846 }, { "epoch": 0.71, "grad_norm": 1.7315228539679266, "learning_rate": 2.0253711128936104e-06, "loss": 0.5505, "step": 6847 }, { "epoch": 0.71, "grad_norm": 2.0758485452326267, "learning_rate": 2.0240181835842605e-06, "loss": 0.629, "step": 6848 }, { "epoch": 0.71, "grad_norm": 1.98434925798822, "learning_rate": 2.0226655916194127e-06, "loss": 0.555, "step": 6849 }, { "epoch": 0.71, "grad_norm": 1.963022411365478, "learning_rate": 2.0213133371523893e-06, "loss": 0.6524, "step": 6850 }, { "epoch": 0.71, "grad_norm": 2.0305066724656267, "learning_rate": 2.0199614203364787e-06, "loss": 0.6561, "step": 6851 }, { "epoch": 0.71, "grad_norm": 1.9822291874774338, "learning_rate": 2.018609841324925e-06, "loss": 0.584, "step": 6852 }, { "epoch": 0.71, "grad_norm": 2.2247395503229326, "learning_rate": 2.0172586002709403e-06, "loss": 0.6224, "step": 6853 }, { "epoch": 0.71, "grad_norm": 2.364632538967232, "learning_rate": 2.0159076973276954e-06, "loss": 0.6781, "step": 6854 }, { "epoch": 0.71, "grad_norm": 2.2066032797604134, "learning_rate": 2.014557132648321e-06, "loss": 0.7244, "step": 6855 }, { "epoch": 0.71, "grad_norm": 1.854399879858053, "learning_rate": 2.0132069063859107e-06, "loss": 0.5404, "step": 6856 }, { "epoch": 0.71, "grad_norm": 2.198675457264703, "learning_rate": 2.0118570186935234e-06, "loss": 0.679, "step": 6857 }, { "epoch": 0.71, "grad_norm": 1.9050712289570522, "learning_rate": 2.010507469724173e-06, "loss": 0.702, "step": 6858 }, { "epoch": 0.71, "grad_norm": 2.0735079350659302, "learning_rate": 2.0091582596308423e-06, "loss": 0.6322, "step": 6859 }, { "epoch": 0.71, "grad_norm": 2.010600304322372, "learning_rate": 2.007809388566468e-06, "loss": 0.7113, "step": 6860 }, { "epoch": 0.71, "grad_norm": 1.7636218721247439, "learning_rate": 2.0064608566839584e-06, "loss": 0.5361, "step": 6861 }, { "epoch": 0.71, "grad_norm": 1.9501623701778918, "learning_rate": 2.0051126641361697e-06, "loss": 0.6234, "step": 6862 }, { "epoch": 0.71, "grad_norm": 1.9960117280272525, "learning_rate": 2.0037648110759324e-06, "loss": 0.6327, "step": 6863 }, { "epoch": 0.71, "grad_norm": 1.817735464885767, "learning_rate": 2.0024172976560296e-06, "loss": 0.5358, "step": 6864 }, { "epoch": 0.71, "grad_norm": 2.1683050481880564, "learning_rate": 2.001070124029214e-06, "loss": 0.5889, "step": 6865 }, { "epoch": 0.71, "grad_norm": 1.8559272054114377, "learning_rate": 1.9997232903481916e-06, "loss": 0.5864, "step": 6866 }, { "epoch": 0.71, "grad_norm": 2.101506830003224, "learning_rate": 1.9983767967656364e-06, "loss": 0.6091, "step": 6867 }, { "epoch": 0.71, "grad_norm": 1.9674756726064457, "learning_rate": 1.9970306434341806e-06, "loss": 0.6021, "step": 6868 }, { "epoch": 0.71, "grad_norm": 1.9449109915690936, "learning_rate": 1.9956848305064156e-06, "loss": 0.5826, "step": 6869 }, { "epoch": 0.71, "grad_norm": 2.312848931186964, "learning_rate": 1.994339358134901e-06, "loss": 0.5566, "step": 6870 }, { "epoch": 0.71, "grad_norm": 1.8944276535814153, "learning_rate": 1.992994226472152e-06, "loss": 0.6597, "step": 6871 }, { "epoch": 0.71, "grad_norm": 2.045736585954296, "learning_rate": 1.9916494356706447e-06, "loss": 0.6818, "step": 6872 }, { "epoch": 0.71, "grad_norm": 1.8958380409998368, "learning_rate": 1.9903049858828226e-06, "loss": 0.5696, "step": 6873 }, { "epoch": 0.71, "grad_norm": 2.0705398801996773, "learning_rate": 1.9889608772610837e-06, "loss": 0.5976, "step": 6874 }, { "epoch": 0.71, "grad_norm": 2.0054209308593265, "learning_rate": 1.987617109957793e-06, "loss": 0.5429, "step": 6875 }, { "epoch": 0.71, "grad_norm": 2.041343124527013, "learning_rate": 1.9862736841252734e-06, "loss": 0.6029, "step": 6876 }, { "epoch": 0.71, "grad_norm": 1.918282480016842, "learning_rate": 1.984930599915807e-06, "loss": 0.6197, "step": 6877 }, { "epoch": 0.71, "grad_norm": 1.75857226275821, "learning_rate": 1.983587857481645e-06, "loss": 0.5959, "step": 6878 }, { "epoch": 0.72, "grad_norm": 1.918945998094483, "learning_rate": 1.9822454569749895e-06, "loss": 0.5751, "step": 6879 }, { "epoch": 0.72, "grad_norm": 2.102556873591886, "learning_rate": 1.980903398548015e-06, "loss": 0.6102, "step": 6880 }, { "epoch": 0.72, "grad_norm": 1.894996073709637, "learning_rate": 1.9795616823528457e-06, "loss": 0.6569, "step": 6881 }, { "epoch": 0.72, "grad_norm": 1.9034762382422525, "learning_rate": 1.978220308541578e-06, "loss": 0.6017, "step": 6882 }, { "epoch": 0.72, "grad_norm": 1.8933476962584155, "learning_rate": 1.9768792772662616e-06, "loss": 0.5561, "step": 6883 }, { "epoch": 0.72, "grad_norm": 1.804196672010393, "learning_rate": 1.9755385886789107e-06, "loss": 0.6527, "step": 6884 }, { "epoch": 0.72, "grad_norm": 1.8793796521053991, "learning_rate": 1.9741982429314977e-06, "loss": 0.6567, "step": 6885 }, { "epoch": 0.72, "grad_norm": 1.8753444491107256, "learning_rate": 1.972858240175962e-06, "loss": 0.6202, "step": 6886 }, { "epoch": 0.72, "grad_norm": 2.0290386500817785, "learning_rate": 1.9715185805641974e-06, "loss": 0.5861, "step": 6887 }, { "epoch": 0.72, "grad_norm": 1.847889462311458, "learning_rate": 1.970179264248065e-06, "loss": 0.6332, "step": 6888 }, { "epoch": 0.72, "grad_norm": 2.080563361469307, "learning_rate": 1.9688402913793804e-06, "loss": 0.6881, "step": 6889 }, { "epoch": 0.72, "grad_norm": 1.9954194703008998, "learning_rate": 1.967501662109928e-06, "loss": 0.6653, "step": 6890 }, { "epoch": 0.72, "grad_norm": 2.077546547685542, "learning_rate": 1.9661633765914467e-06, "loss": 0.6304, "step": 6891 }, { "epoch": 0.72, "grad_norm": 2.2298547879318162, "learning_rate": 1.964825434975639e-06, "loss": 0.6967, "step": 6892 }, { "epoch": 0.72, "grad_norm": 2.201827109812438, "learning_rate": 1.9634878374141662e-06, "loss": 0.5941, "step": 6893 }, { "epoch": 0.72, "grad_norm": 2.2384917852547126, "learning_rate": 1.962150584058657e-06, "loss": 0.666, "step": 6894 }, { "epoch": 0.72, "grad_norm": 2.113834279753955, "learning_rate": 1.9608136750606917e-06, "loss": 0.6328, "step": 6895 }, { "epoch": 0.72, "grad_norm": 2.117062026332053, "learning_rate": 1.959477110571821e-06, "loss": 0.6106, "step": 6896 }, { "epoch": 0.72, "grad_norm": 1.8239442513943565, "learning_rate": 1.958140890743549e-06, "loss": 0.5488, "step": 6897 }, { "epoch": 0.72, "grad_norm": 1.8460499061498403, "learning_rate": 1.956805015727348e-06, "loss": 0.4865, "step": 6898 }, { "epoch": 0.72, "grad_norm": 2.1680120538340493, "learning_rate": 1.955469485674641e-06, "loss": 0.6363, "step": 6899 }, { "epoch": 0.72, "grad_norm": 2.0433806599917523, "learning_rate": 1.9541343007368225e-06, "loss": 0.5699, "step": 6900 }, { "epoch": 0.72, "grad_norm": 1.9010351880597207, "learning_rate": 1.95279946106524e-06, "loss": 0.6144, "step": 6901 }, { "epoch": 0.72, "grad_norm": 2.015347226433184, "learning_rate": 1.951464966811209e-06, "loss": 0.5687, "step": 6902 }, { "epoch": 0.72, "grad_norm": 1.8436595068007384, "learning_rate": 1.9501308181259986e-06, "loss": 0.5836, "step": 6903 }, { "epoch": 0.72, "grad_norm": 1.8664745085447245, "learning_rate": 1.948797015160845e-06, "loss": 0.6421, "step": 6904 }, { "epoch": 0.72, "grad_norm": 2.125979204251544, "learning_rate": 1.947463558066941e-06, "loss": 0.6685, "step": 6905 }, { "epoch": 0.72, "grad_norm": 1.9993794382514192, "learning_rate": 1.94613044699544e-06, "loss": 0.628, "step": 6906 }, { "epoch": 0.72, "grad_norm": 1.7669592256552917, "learning_rate": 1.944797682097461e-06, "loss": 0.5624, "step": 6907 }, { "epoch": 0.72, "grad_norm": 1.8143771752464506, "learning_rate": 1.9434652635240775e-06, "loss": 0.6319, "step": 6908 }, { "epoch": 0.72, "grad_norm": 1.6875843744992083, "learning_rate": 1.9421331914263293e-06, "loss": 0.5459, "step": 6909 }, { "epoch": 0.72, "grad_norm": 2.0810872582365767, "learning_rate": 1.9408014659552133e-06, "loss": 0.6141, "step": 6910 }, { "epoch": 0.72, "grad_norm": 1.8317397564624678, "learning_rate": 1.9394700872616856e-06, "loss": 0.569, "step": 6911 }, { "epoch": 0.72, "grad_norm": 1.807931318726164, "learning_rate": 1.9381390554966705e-06, "loss": 0.6178, "step": 6912 }, { "epoch": 0.72, "grad_norm": 2.014838295762959, "learning_rate": 1.9368083708110454e-06, "loss": 0.5852, "step": 6913 }, { "epoch": 0.72, "grad_norm": 1.7712989338761949, "learning_rate": 1.935478033355649e-06, "loss": 0.6652, "step": 6914 }, { "epoch": 0.72, "grad_norm": 1.899489708641119, "learning_rate": 1.9341480432812867e-06, "loss": 0.5458, "step": 6915 }, { "epoch": 0.72, "grad_norm": 1.719903119059106, "learning_rate": 1.9328184007387163e-06, "loss": 0.5132, "step": 6916 }, { "epoch": 0.72, "grad_norm": 2.2829620839907903, "learning_rate": 1.9314891058786644e-06, "loss": 0.4828, "step": 6917 }, { "epoch": 0.72, "grad_norm": 1.8957640141264724, "learning_rate": 1.930160158851811e-06, "loss": 0.5468, "step": 6918 }, { "epoch": 0.72, "grad_norm": 1.975808639599023, "learning_rate": 1.9288315598088024e-06, "loss": 0.6222, "step": 6919 }, { "epoch": 0.72, "grad_norm": 1.9159184447005044, "learning_rate": 1.9275033089002413e-06, "loss": 0.5843, "step": 6920 }, { "epoch": 0.72, "grad_norm": 1.865664172333486, "learning_rate": 1.9261754062766937e-06, "loss": 0.667, "step": 6921 }, { "epoch": 0.72, "grad_norm": 1.8030021224323394, "learning_rate": 1.9248478520886815e-06, "loss": 0.5762, "step": 6922 }, { "epoch": 0.72, "grad_norm": 1.9875737286149218, "learning_rate": 1.923520646486695e-06, "loss": 0.5619, "step": 6923 }, { "epoch": 0.72, "grad_norm": 1.8218277493472246, "learning_rate": 1.9221937896211773e-06, "loss": 0.602, "step": 6924 }, { "epoch": 0.72, "grad_norm": 1.9715617137675558, "learning_rate": 1.920867281642538e-06, "loss": 0.6224, "step": 6925 }, { "epoch": 0.72, "grad_norm": 2.131266043950465, "learning_rate": 1.9195411227011403e-06, "loss": 0.5832, "step": 6926 }, { "epoch": 0.72, "grad_norm": 2.0327060331736404, "learning_rate": 1.9182153129473167e-06, "loss": 0.7407, "step": 6927 }, { "epoch": 0.72, "grad_norm": 1.8922822746296175, "learning_rate": 1.916889852531353e-06, "loss": 0.5791, "step": 6928 }, { "epoch": 0.72, "grad_norm": 1.9156447848338625, "learning_rate": 1.9155647416034972e-06, "loss": 0.5832, "step": 6929 }, { "epoch": 0.72, "grad_norm": 1.8836997633797954, "learning_rate": 1.914239980313958e-06, "loss": 0.6571, "step": 6930 }, { "epoch": 0.72, "grad_norm": 2.041638407251988, "learning_rate": 1.912915568812906e-06, "loss": 0.5327, "step": 6931 }, { "epoch": 0.72, "grad_norm": 1.958615762904026, "learning_rate": 1.9115915072504683e-06, "loss": 0.6959, "step": 6932 }, { "epoch": 0.72, "grad_norm": 1.9335752293452386, "learning_rate": 1.9102677957767384e-06, "loss": 0.6213, "step": 6933 }, { "epoch": 0.72, "grad_norm": 1.8751908142036637, "learning_rate": 1.9089444345417636e-06, "loss": 0.6393, "step": 6934 }, { "epoch": 0.72, "grad_norm": 1.8739242366864037, "learning_rate": 1.9076214236955585e-06, "loss": 0.6625, "step": 6935 }, { "epoch": 0.72, "grad_norm": 1.862653258537508, "learning_rate": 1.9062987633880876e-06, "loss": 0.6272, "step": 6936 }, { "epoch": 0.72, "grad_norm": 1.9757417242906068, "learning_rate": 1.9049764537692872e-06, "loss": 0.5792, "step": 6937 }, { "epoch": 0.72, "grad_norm": 2.0341787737940793, "learning_rate": 1.903654494989045e-06, "loss": 0.66, "step": 6938 }, { "epoch": 0.72, "grad_norm": 1.8427766740494667, "learning_rate": 1.9023328871972163e-06, "loss": 0.5716, "step": 6939 }, { "epoch": 0.72, "grad_norm": 2.024770792040896, "learning_rate": 1.9010116305436094e-06, "loss": 0.6085, "step": 6940 }, { "epoch": 0.72, "grad_norm": 1.8948892940354372, "learning_rate": 1.8996907251779988e-06, "loss": 0.5694, "step": 6941 }, { "epoch": 0.72, "grad_norm": 1.889140157202737, "learning_rate": 1.8983701712501163e-06, "loss": 0.6102, "step": 6942 }, { "epoch": 0.72, "grad_norm": 2.0194435530553716, "learning_rate": 1.8970499689096516e-06, "loss": 0.6385, "step": 6943 }, { "epoch": 0.72, "grad_norm": 1.9671159856485523, "learning_rate": 1.895730118306261e-06, "loss": 0.6185, "step": 6944 }, { "epoch": 0.72, "grad_norm": 1.906154410239305, "learning_rate": 1.8944106195895535e-06, "loss": 0.5676, "step": 6945 }, { "epoch": 0.72, "grad_norm": 2.1301700751404984, "learning_rate": 1.8930914729091055e-06, "loss": 0.6528, "step": 6946 }, { "epoch": 0.72, "grad_norm": 1.8831602503537106, "learning_rate": 1.8917726784144458e-06, "loss": 0.5776, "step": 6947 }, { "epoch": 0.72, "grad_norm": 2.062453945989891, "learning_rate": 1.890454236255071e-06, "loss": 0.61, "step": 6948 }, { "epoch": 0.72, "grad_norm": 1.9660287314255194, "learning_rate": 1.8891361465804326e-06, "loss": 0.6367, "step": 6949 }, { "epoch": 0.72, "grad_norm": 1.9541472768077464, "learning_rate": 1.8878184095399428e-06, "loss": 0.5979, "step": 6950 }, { "epoch": 0.72, "grad_norm": 1.7630023901018252, "learning_rate": 1.886501025282974e-06, "loss": 0.5508, "step": 6951 }, { "epoch": 0.72, "grad_norm": 1.693350200238252, "learning_rate": 1.8851839939588617e-06, "loss": 0.5917, "step": 6952 }, { "epoch": 0.72, "grad_norm": 1.9405268803538889, "learning_rate": 1.8838673157168956e-06, "loss": 0.6026, "step": 6953 }, { "epoch": 0.72, "grad_norm": 1.8552942459894206, "learning_rate": 1.8825509907063328e-06, "loss": 0.6218, "step": 6954 }, { "epoch": 0.72, "grad_norm": 1.8592020844670467, "learning_rate": 1.8812350190763822e-06, "loss": 0.6586, "step": 6955 }, { "epoch": 0.72, "grad_norm": 2.0291508697413643, "learning_rate": 1.8799194009762201e-06, "loss": 0.6224, "step": 6956 }, { "epoch": 0.72, "grad_norm": 2.0727455489975797, "learning_rate": 1.8786041365549784e-06, "loss": 0.7172, "step": 6957 }, { "epoch": 0.72, "grad_norm": 1.8228323748167872, "learning_rate": 1.8772892259617487e-06, "loss": 0.5831, "step": 6958 }, { "epoch": 0.72, "grad_norm": 1.8902269228729196, "learning_rate": 1.875974669345582e-06, "loss": 0.6655, "step": 6959 }, { "epoch": 0.72, "grad_norm": 1.962322524254648, "learning_rate": 1.8746604668554952e-06, "loss": 0.5862, "step": 6960 }, { "epoch": 0.72, "grad_norm": 2.2883723322771603, "learning_rate": 1.8733466186404565e-06, "loss": 0.6552, "step": 6961 }, { "epoch": 0.72, "grad_norm": 1.8072852247050788, "learning_rate": 1.8720331248494012e-06, "loss": 0.5782, "step": 6962 }, { "epoch": 0.72, "grad_norm": 2.123278381013698, "learning_rate": 1.8707199856312186e-06, "loss": 0.7116, "step": 6963 }, { "epoch": 0.72, "grad_norm": 2.0807833558357838, "learning_rate": 1.8694072011347636e-06, "loss": 0.7126, "step": 6964 }, { "epoch": 0.72, "grad_norm": 2.1994360140008165, "learning_rate": 1.8680947715088465e-06, "loss": 0.6547, "step": 6965 }, { "epoch": 0.72, "grad_norm": 2.3923087716494797, "learning_rate": 1.8667826969022379e-06, "loss": 0.6168, "step": 6966 }, { "epoch": 0.72, "grad_norm": 2.2628861411700947, "learning_rate": 1.8654709774636676e-06, "loss": 0.6133, "step": 6967 }, { "epoch": 0.72, "grad_norm": 1.9026703854680282, "learning_rate": 1.8641596133418305e-06, "loss": 0.5893, "step": 6968 }, { "epoch": 0.72, "grad_norm": 2.024038972343844, "learning_rate": 1.8628486046853728e-06, "loss": 0.6062, "step": 6969 }, { "epoch": 0.72, "grad_norm": 1.8290810303514669, "learning_rate": 1.8615379516429084e-06, "loss": 0.6898, "step": 6970 }, { "epoch": 0.72, "grad_norm": 1.9372867721866058, "learning_rate": 1.8602276543630044e-06, "loss": 0.6593, "step": 6971 }, { "epoch": 0.72, "grad_norm": 2.0472330529227487, "learning_rate": 1.858917712994195e-06, "loss": 0.5912, "step": 6972 }, { "epoch": 0.72, "grad_norm": 2.0346582715337576, "learning_rate": 1.8576081276849633e-06, "loss": 0.6109, "step": 6973 }, { "epoch": 0.72, "grad_norm": 1.881561791570671, "learning_rate": 1.8562988985837632e-06, "loss": 0.6301, "step": 6974 }, { "epoch": 0.73, "grad_norm": 1.9364107609270342, "learning_rate": 1.8549900258389992e-06, "loss": 0.5445, "step": 6975 }, { "epoch": 0.73, "grad_norm": 1.764202402624153, "learning_rate": 1.853681509599044e-06, "loss": 0.5492, "step": 6976 }, { "epoch": 0.73, "grad_norm": 1.9477673427273048, "learning_rate": 1.852373350012221e-06, "loss": 0.5653, "step": 6977 }, { "epoch": 0.73, "grad_norm": 2.0400789263657946, "learning_rate": 1.8510655472268212e-06, "loss": 0.5676, "step": 6978 }, { "epoch": 0.73, "grad_norm": 1.9127120678077745, "learning_rate": 1.8497581013910904e-06, "loss": 0.6438, "step": 6979 }, { "epoch": 0.73, "grad_norm": 1.9248975570888085, "learning_rate": 1.848451012653233e-06, "loss": 0.6097, "step": 6980 }, { "epoch": 0.73, "grad_norm": 1.9422713468844135, "learning_rate": 1.8471442811614177e-06, "loss": 0.6654, "step": 6981 }, { "epoch": 0.73, "grad_norm": 2.160167412360216, "learning_rate": 1.8458379070637678e-06, "loss": 0.6017, "step": 6982 }, { "epoch": 0.73, "grad_norm": 2.0222848743451896, "learning_rate": 1.8445318905083703e-06, "loss": 0.7048, "step": 6983 }, { "epoch": 0.73, "grad_norm": 2.057612384950042, "learning_rate": 1.843226231643267e-06, "loss": 0.6671, "step": 6984 }, { "epoch": 0.73, "grad_norm": 1.7047056832519873, "learning_rate": 1.8419209306164653e-06, "loss": 0.6048, "step": 6985 }, { "epoch": 0.73, "grad_norm": 2.060269652286786, "learning_rate": 1.8406159875759266e-06, "loss": 0.6438, "step": 6986 }, { "epoch": 0.73, "grad_norm": 2.159682548383432, "learning_rate": 1.8393114026695736e-06, "loss": 0.5876, "step": 6987 }, { "epoch": 0.73, "grad_norm": 1.8583493961058244, "learning_rate": 1.8380071760452862e-06, "loss": 0.7386, "step": 6988 }, { "epoch": 0.73, "grad_norm": 1.7793109875706183, "learning_rate": 1.83670330785091e-06, "loss": 0.5908, "step": 6989 }, { "epoch": 0.73, "grad_norm": 2.1854173295401274, "learning_rate": 1.8353997982342425e-06, "loss": 0.6927, "step": 6990 }, { "epoch": 0.73, "grad_norm": 1.6687385136463946, "learning_rate": 1.8340966473430477e-06, "loss": 0.5707, "step": 6991 }, { "epoch": 0.73, "grad_norm": 1.817059248514043, "learning_rate": 1.8327938553250407e-06, "loss": 0.5635, "step": 6992 }, { "epoch": 0.73, "grad_norm": 1.9641887104756441, "learning_rate": 1.8314914223279046e-06, "loss": 0.7102, "step": 6993 }, { "epoch": 0.73, "grad_norm": 1.7640602296208145, "learning_rate": 1.8301893484992755e-06, "loss": 0.6463, "step": 6994 }, { "epoch": 0.73, "grad_norm": 2.2745339932755324, "learning_rate": 1.8288876339867511e-06, "loss": 0.7781, "step": 6995 }, { "epoch": 0.73, "grad_norm": 1.8716453246074705, "learning_rate": 1.8275862789378862e-06, "loss": 0.6602, "step": 6996 }, { "epoch": 0.73, "grad_norm": 2.353529730295554, "learning_rate": 1.826285283500201e-06, "loss": 0.7568, "step": 6997 }, { "epoch": 0.73, "grad_norm": 1.9426701427221278, "learning_rate": 1.8249846478211663e-06, "loss": 0.7043, "step": 6998 }, { "epoch": 0.73, "grad_norm": 1.9052478319387323, "learning_rate": 1.8236843720482206e-06, "loss": 0.631, "step": 6999 }, { "epoch": 0.73, "grad_norm": 1.884005001155357, "learning_rate": 1.822384456328754e-06, "loss": 0.5732, "step": 7000 }, { "epoch": 0.73, "grad_norm": 1.906581597726097, "learning_rate": 1.8210849008101244e-06, "loss": 0.6591, "step": 7001 }, { "epoch": 0.73, "grad_norm": 2.032360075463133, "learning_rate": 1.8197857056396372e-06, "loss": 0.5087, "step": 7002 }, { "epoch": 0.73, "grad_norm": 2.072367018499558, "learning_rate": 1.8184868709645686e-06, "loss": 0.6859, "step": 7003 }, { "epoch": 0.73, "grad_norm": 1.9137751075131972, "learning_rate": 1.8171883969321458e-06, "loss": 0.6251, "step": 7004 }, { "epoch": 0.73, "grad_norm": 2.016277252510613, "learning_rate": 1.815890283689561e-06, "loss": 0.7028, "step": 7005 }, { "epoch": 0.73, "grad_norm": 1.8514033044622462, "learning_rate": 1.8145925313839597e-06, "loss": 0.6176, "step": 7006 }, { "epoch": 0.73, "grad_norm": 1.707417328439556, "learning_rate": 1.8132951401624527e-06, "loss": 0.5991, "step": 7007 }, { "epoch": 0.73, "grad_norm": 2.05162512516868, "learning_rate": 1.811998110172104e-06, "loss": 0.6503, "step": 7008 }, { "epoch": 0.73, "grad_norm": 1.8997173006855768, "learning_rate": 1.8107014415599416e-06, "loss": 0.6086, "step": 7009 }, { "epoch": 0.73, "grad_norm": 1.9842498026525295, "learning_rate": 1.8094051344729497e-06, "loss": 0.6816, "step": 7010 }, { "epoch": 0.73, "grad_norm": 1.8316376168242585, "learning_rate": 1.808109189058071e-06, "loss": 0.5557, "step": 7011 }, { "epoch": 0.73, "grad_norm": 1.841851848534049, "learning_rate": 1.8068136054622076e-06, "loss": 0.6038, "step": 7012 }, { "epoch": 0.73, "grad_norm": 1.7337353429307356, "learning_rate": 1.8055183838322243e-06, "loss": 0.6381, "step": 7013 }, { "epoch": 0.73, "grad_norm": 2.066314680142265, "learning_rate": 1.804223524314938e-06, "loss": 0.6052, "step": 7014 }, { "epoch": 0.73, "grad_norm": 1.9221531255760698, "learning_rate": 1.802929027057133e-06, "loss": 0.6784, "step": 7015 }, { "epoch": 0.73, "grad_norm": 1.8537567633360141, "learning_rate": 1.8016348922055448e-06, "loss": 0.6299, "step": 7016 }, { "epoch": 0.73, "grad_norm": 1.9730320203082514, "learning_rate": 1.8003411199068704e-06, "loss": 0.5932, "step": 7017 }, { "epoch": 0.73, "grad_norm": 1.9237631065008112, "learning_rate": 1.7990477103077691e-06, "loss": 0.6454, "step": 7018 }, { "epoch": 0.73, "grad_norm": 1.9063776911922903, "learning_rate": 1.7977546635548527e-06, "loss": 0.5453, "step": 7019 }, { "epoch": 0.73, "grad_norm": 1.7272938089567267, "learning_rate": 1.7964619797946991e-06, "loss": 0.5609, "step": 7020 }, { "epoch": 0.73, "grad_norm": 1.9139613548645424, "learning_rate": 1.7951696591738382e-06, "loss": 0.6083, "step": 7021 }, { "epoch": 0.73, "grad_norm": 2.0076928391817583, "learning_rate": 1.7938777018387648e-06, "loss": 0.6729, "step": 7022 }, { "epoch": 0.73, "grad_norm": 1.8414781413829706, "learning_rate": 1.7925861079359268e-06, "loss": 0.6657, "step": 7023 }, { "epoch": 0.73, "grad_norm": 1.9926494137956807, "learning_rate": 1.7912948776117385e-06, "loss": 0.5832, "step": 7024 }, { "epoch": 0.73, "grad_norm": 2.047037722371666, "learning_rate": 1.7900040110125611e-06, "loss": 0.5773, "step": 7025 }, { "epoch": 0.73, "grad_norm": 1.9110124757511402, "learning_rate": 1.7887135082847274e-06, "loss": 0.6048, "step": 7026 }, { "epoch": 0.73, "grad_norm": 1.9091170712990175, "learning_rate": 1.7874233695745191e-06, "loss": 0.5744, "step": 7027 }, { "epoch": 0.73, "grad_norm": 2.1501681795544605, "learning_rate": 1.786133595028185e-06, "loss": 0.6717, "step": 7028 }, { "epoch": 0.73, "grad_norm": 1.8727818568701433, "learning_rate": 1.7848441847919246e-06, "loss": 0.6262, "step": 7029 }, { "epoch": 0.73, "grad_norm": 2.0302444195043146, "learning_rate": 1.7835551390119033e-06, "loss": 0.6903, "step": 7030 }, { "epoch": 0.73, "grad_norm": 1.8811800640863703, "learning_rate": 1.7822664578342403e-06, "loss": 0.5732, "step": 7031 }, { "epoch": 0.73, "grad_norm": 2.0044708198114662, "learning_rate": 1.7809781414050147e-06, "loss": 0.6878, "step": 7032 }, { "epoch": 0.73, "grad_norm": 2.0627253959610394, "learning_rate": 1.7796901898702628e-06, "loss": 0.6258, "step": 7033 }, { "epoch": 0.73, "grad_norm": 1.9855655422507894, "learning_rate": 1.7784026033759844e-06, "loss": 0.7077, "step": 7034 }, { "epoch": 0.73, "grad_norm": 2.1111295924164546, "learning_rate": 1.777115382068132e-06, "loss": 0.6692, "step": 7035 }, { "epoch": 0.73, "grad_norm": 2.0552672782335533, "learning_rate": 1.7758285260926228e-06, "loss": 0.6351, "step": 7036 }, { "epoch": 0.73, "grad_norm": 1.790621899903027, "learning_rate": 1.7745420355953253e-06, "loss": 0.5409, "step": 7037 }, { "epoch": 0.73, "grad_norm": 2.0282472564011536, "learning_rate": 1.7732559107220765e-06, "loss": 0.6969, "step": 7038 }, { "epoch": 0.73, "grad_norm": 1.8675459459068524, "learning_rate": 1.7719701516186578e-06, "loss": 0.5922, "step": 7039 }, { "epoch": 0.73, "grad_norm": 2.2556574076998035, "learning_rate": 1.770684758430824e-06, "loss": 0.6478, "step": 7040 }, { "epoch": 0.73, "grad_norm": 2.4928599008992904, "learning_rate": 1.769399731304277e-06, "loss": 0.5609, "step": 7041 }, { "epoch": 0.73, "grad_norm": 1.8371152290767596, "learning_rate": 1.7681150703846867e-06, "loss": 0.6117, "step": 7042 }, { "epoch": 0.73, "grad_norm": 2.036972378473435, "learning_rate": 1.7668307758176717e-06, "loss": 0.6935, "step": 7043 }, { "epoch": 0.73, "grad_norm": 1.9198084244760627, "learning_rate": 1.7655468477488191e-06, "loss": 0.5391, "step": 7044 }, { "epoch": 0.73, "grad_norm": 1.7153890540288939, "learning_rate": 1.7642632863236653e-06, "loss": 0.5468, "step": 7045 }, { "epoch": 0.73, "grad_norm": 1.9903008100611193, "learning_rate": 1.7629800916877126e-06, "loss": 0.697, "step": 7046 }, { "epoch": 0.73, "grad_norm": 1.9312849352503563, "learning_rate": 1.7616972639864166e-06, "loss": 0.5932, "step": 7047 }, { "epoch": 0.73, "grad_norm": 1.9990339340200853, "learning_rate": 1.7604148033651925e-06, "loss": 0.6599, "step": 7048 }, { "epoch": 0.73, "grad_norm": 2.1694031802547125, "learning_rate": 1.7591327099694167e-06, "loss": 0.6993, "step": 7049 }, { "epoch": 0.73, "grad_norm": 1.8424469644992956, "learning_rate": 1.7578509839444202e-06, "loss": 0.5265, "step": 7050 }, { "epoch": 0.73, "grad_norm": 1.890458213661419, "learning_rate": 1.756569625435493e-06, "loss": 0.6508, "step": 7051 }, { "epoch": 0.73, "grad_norm": 2.0184026859189967, "learning_rate": 1.7552886345878879e-06, "loss": 0.6978, "step": 7052 }, { "epoch": 0.73, "grad_norm": 2.0127389742757353, "learning_rate": 1.7540080115468095e-06, "loss": 0.6505, "step": 7053 }, { "epoch": 0.73, "grad_norm": 2.3335684879728342, "learning_rate": 1.752727756457423e-06, "loss": 0.6635, "step": 7054 }, { "epoch": 0.73, "grad_norm": 1.9125226367301975, "learning_rate": 1.7514478694648563e-06, "loss": 0.6221, "step": 7055 }, { "epoch": 0.73, "grad_norm": 2.2096866866360534, "learning_rate": 1.7501683507141876e-06, "loss": 0.6165, "step": 7056 }, { "epoch": 0.73, "grad_norm": 1.8901019668590993, "learning_rate": 1.7488892003504615e-06, "loss": 0.6364, "step": 7057 }, { "epoch": 0.73, "grad_norm": 1.835005177582281, "learning_rate": 1.7476104185186737e-06, "loss": 0.5718, "step": 7058 }, { "epoch": 0.73, "grad_norm": 1.7368860915767115, "learning_rate": 1.7463320053637844e-06, "loss": 0.6054, "step": 7059 }, { "epoch": 0.73, "grad_norm": 2.0065600854979793, "learning_rate": 1.745053961030706e-06, "loss": 0.7173, "step": 7060 }, { "epoch": 0.73, "grad_norm": 1.9553760769747448, "learning_rate": 1.743776285664317e-06, "loss": 0.6248, "step": 7061 }, { "epoch": 0.73, "grad_norm": 1.9170304004440861, "learning_rate": 1.7424989794094426e-06, "loss": 0.5648, "step": 7062 }, { "epoch": 0.73, "grad_norm": 1.8843076141856252, "learning_rate": 1.7412220424108778e-06, "loss": 0.5752, "step": 7063 }, { "epoch": 0.73, "grad_norm": 1.892460870380063, "learning_rate": 1.739945474813367e-06, "loss": 0.6732, "step": 7064 }, { "epoch": 0.73, "grad_norm": 2.064212792184756, "learning_rate": 1.7386692767616204e-06, "loss": 0.6855, "step": 7065 }, { "epoch": 0.73, "grad_norm": 2.0042471676355587, "learning_rate": 1.737393448400298e-06, "loss": 0.6518, "step": 7066 }, { "epoch": 0.73, "grad_norm": 2.1605114964526453, "learning_rate": 1.7361179898740265e-06, "loss": 0.5507, "step": 7067 }, { "epoch": 0.73, "grad_norm": 1.9867984391295228, "learning_rate": 1.7348429013273844e-06, "loss": 0.583, "step": 7068 }, { "epoch": 0.73, "grad_norm": 1.8676294756248204, "learning_rate": 1.73356818290491e-06, "loss": 0.5919, "step": 7069 }, { "epoch": 0.73, "grad_norm": 2.0320162519150218, "learning_rate": 1.7322938347510986e-06, "loss": 0.6553, "step": 7070 }, { "epoch": 0.74, "grad_norm": 1.996766764699223, "learning_rate": 1.731019857010408e-06, "loss": 0.6392, "step": 7071 }, { "epoch": 0.74, "grad_norm": 2.0880116579155645, "learning_rate": 1.7297462498272476e-06, "loss": 0.6324, "step": 7072 }, { "epoch": 0.74, "grad_norm": 1.9530026108454182, "learning_rate": 1.728473013345991e-06, "loss": 0.6077, "step": 7073 }, { "epoch": 0.74, "grad_norm": 1.853123715597261, "learning_rate": 1.727200147710964e-06, "loss": 0.6759, "step": 7074 }, { "epoch": 0.74, "grad_norm": 1.6900306342788634, "learning_rate": 1.7259276530664577e-06, "loss": 0.6573, "step": 7075 }, { "epoch": 0.74, "grad_norm": 1.808885919868968, "learning_rate": 1.7246555295567102e-06, "loss": 0.6117, "step": 7076 }, { "epoch": 0.74, "grad_norm": 1.962491279043586, "learning_rate": 1.7233837773259288e-06, "loss": 0.563, "step": 7077 }, { "epoch": 0.74, "grad_norm": 2.0288719441359766, "learning_rate": 1.7221123965182712e-06, "loss": 0.5779, "step": 7078 }, { "epoch": 0.74, "grad_norm": 2.122216006034978, "learning_rate": 1.720841387277858e-06, "loss": 0.6406, "step": 7079 }, { "epoch": 0.74, "grad_norm": 1.8225722448977553, "learning_rate": 1.7195707497487624e-06, "loss": 0.6658, "step": 7080 }, { "epoch": 0.74, "grad_norm": 1.8260903677576232, "learning_rate": 1.7183004840750223e-06, "loss": 0.5738, "step": 7081 }, { "epoch": 0.74, "grad_norm": 1.9433119305101705, "learning_rate": 1.7170305904006252e-06, "loss": 0.6746, "step": 7082 }, { "epoch": 0.74, "grad_norm": 2.277323368339559, "learning_rate": 1.7157610688695248e-06, "loss": 0.6327, "step": 7083 }, { "epoch": 0.74, "grad_norm": 1.9858333985133685, "learning_rate": 1.714491919625627e-06, "loss": 0.6856, "step": 7084 }, { "epoch": 0.74, "grad_norm": 1.875314227630569, "learning_rate": 1.7132231428127949e-06, "loss": 0.5694, "step": 7085 }, { "epoch": 0.74, "grad_norm": 1.8591613869115624, "learning_rate": 1.7119547385748552e-06, "loss": 0.6008, "step": 7086 }, { "epoch": 0.74, "grad_norm": 2.1645944722824653, "learning_rate": 1.710686707055586e-06, "loss": 0.5587, "step": 7087 }, { "epoch": 0.74, "grad_norm": 1.934409516211822, "learning_rate": 1.7094190483987282e-06, "loss": 0.556, "step": 7088 }, { "epoch": 0.74, "grad_norm": 1.5143520523481275, "learning_rate": 1.708151762747977e-06, "loss": 0.5124, "step": 7089 }, { "epoch": 0.74, "grad_norm": 1.8513917629013414, "learning_rate": 1.7068848502469866e-06, "loss": 0.5633, "step": 7090 }, { "epoch": 0.74, "grad_norm": 2.0804716730668145, "learning_rate": 1.7056183110393666e-06, "loss": 0.7043, "step": 7091 }, { "epoch": 0.74, "grad_norm": 1.8489263596056118, "learning_rate": 1.7043521452686902e-06, "loss": 0.6114, "step": 7092 }, { "epoch": 0.74, "grad_norm": 1.9388587591896767, "learning_rate": 1.7030863530784814e-06, "loss": 0.5371, "step": 7093 }, { "epoch": 0.74, "grad_norm": 1.8122345033573635, "learning_rate": 1.7018209346122272e-06, "loss": 0.5691, "step": 7094 }, { "epoch": 0.74, "grad_norm": 2.0118884418469616, "learning_rate": 1.7005558900133678e-06, "loss": 0.6261, "step": 7095 }, { "epoch": 0.74, "grad_norm": 1.734039688883811, "learning_rate": 1.6992912194253065e-06, "loss": 0.5549, "step": 7096 }, { "epoch": 0.74, "grad_norm": 2.1291135005730237, "learning_rate": 1.6980269229913965e-06, "loss": 0.7039, "step": 7097 }, { "epoch": 0.74, "grad_norm": 2.040583129036302, "learning_rate": 1.696763000854959e-06, "loss": 0.6216, "step": 7098 }, { "epoch": 0.74, "grad_norm": 1.7362654789557477, "learning_rate": 1.69549945315926e-06, "loss": 0.5089, "step": 7099 }, { "epoch": 0.74, "grad_norm": 1.8933279122929687, "learning_rate": 1.6942362800475343e-06, "loss": 0.6009, "step": 7100 }, { "epoch": 0.74, "grad_norm": 2.0124255601190577, "learning_rate": 1.6929734816629674e-06, "loss": 0.672, "step": 7101 }, { "epoch": 0.74, "grad_norm": 2.0199051597392446, "learning_rate": 1.6917110581487067e-06, "loss": 0.5459, "step": 7102 }, { "epoch": 0.74, "grad_norm": 2.024380319703663, "learning_rate": 1.690449009647853e-06, "loss": 0.6074, "step": 7103 }, { "epoch": 0.74, "grad_norm": 1.7856784382758137, "learning_rate": 1.6891873363034693e-06, "loss": 0.4908, "step": 7104 }, { "epoch": 0.74, "grad_norm": 1.8101573491267204, "learning_rate": 1.6879260382585727e-06, "loss": 0.5492, "step": 7105 }, { "epoch": 0.74, "grad_norm": 1.9753378786530036, "learning_rate": 1.686665115656137e-06, "loss": 0.6705, "step": 7106 }, { "epoch": 0.74, "grad_norm": 1.9769168029519377, "learning_rate": 1.6854045686390947e-06, "loss": 0.6015, "step": 7107 }, { "epoch": 0.74, "grad_norm": 2.0534968822940582, "learning_rate": 1.6841443973503384e-06, "loss": 0.5757, "step": 7108 }, { "epoch": 0.74, "grad_norm": 2.1367136070784816, "learning_rate": 1.6828846019327128e-06, "loss": 0.6714, "step": 7109 }, { "epoch": 0.74, "grad_norm": 1.9896795722232372, "learning_rate": 1.6816251825290265e-06, "loss": 0.6377, "step": 7110 }, { "epoch": 0.74, "grad_norm": 2.051473929576098, "learning_rate": 1.680366139282038e-06, "loss": 0.6382, "step": 7111 }, { "epoch": 0.74, "grad_norm": 1.845927910849788, "learning_rate": 1.6791074723344719e-06, "loss": 0.6508, "step": 7112 }, { "epoch": 0.74, "grad_norm": 2.1883562893787234, "learning_rate": 1.6778491818289995e-06, "loss": 0.5516, "step": 7113 }, { "epoch": 0.74, "grad_norm": 2.334556242298653, "learning_rate": 1.6765912679082592e-06, "loss": 0.6544, "step": 7114 }, { "epoch": 0.74, "grad_norm": 1.7710546453451088, "learning_rate": 1.67533373071484e-06, "loss": 0.6163, "step": 7115 }, { "epoch": 0.74, "grad_norm": 2.0008488447691177, "learning_rate": 1.6740765703912942e-06, "loss": 0.6591, "step": 7116 }, { "epoch": 0.74, "grad_norm": 1.9710345983015822, "learning_rate": 1.6728197870801244e-06, "loss": 0.6042, "step": 7117 }, { "epoch": 0.74, "grad_norm": 2.0038441788739054, "learning_rate": 1.6715633809237974e-06, "loss": 0.5822, "step": 7118 }, { "epoch": 0.74, "grad_norm": 1.987280070196863, "learning_rate": 1.6703073520647316e-06, "loss": 0.5685, "step": 7119 }, { "epoch": 0.74, "grad_norm": 2.2229776012908724, "learning_rate": 1.6690517006453071e-06, "loss": 0.6201, "step": 7120 }, { "epoch": 0.74, "grad_norm": 2.0164190353791467, "learning_rate": 1.6677964268078584e-06, "loss": 0.6305, "step": 7121 }, { "epoch": 0.74, "grad_norm": 1.9884428866003194, "learning_rate": 1.6665415306946764e-06, "loss": 0.6417, "step": 7122 }, { "epoch": 0.74, "grad_norm": 1.7354582036347406, "learning_rate": 1.665287012448013e-06, "loss": 0.5151, "step": 7123 }, { "epoch": 0.74, "grad_norm": 2.2562445463263816, "learning_rate": 1.6640328722100723e-06, "loss": 0.6702, "step": 7124 }, { "epoch": 0.74, "grad_norm": 1.8771207954912414, "learning_rate": 1.6627791101230222e-06, "loss": 0.5857, "step": 7125 }, { "epoch": 0.74, "grad_norm": 2.507413450280522, "learning_rate": 1.6615257263289809e-06, "loss": 0.5257, "step": 7126 }, { "epoch": 0.74, "grad_norm": 2.057813853105924, "learning_rate": 1.6602727209700276e-06, "loss": 0.6122, "step": 7127 }, { "epoch": 0.74, "grad_norm": 2.309053131838771, "learning_rate": 1.659020094188195e-06, "loss": 0.5922, "step": 7128 }, { "epoch": 0.74, "grad_norm": 2.210830259594895, "learning_rate": 1.6577678461254797e-06, "loss": 0.6566, "step": 7129 }, { "epoch": 0.74, "grad_norm": 1.8767799929288522, "learning_rate": 1.6565159769238276e-06, "loss": 0.6357, "step": 7130 }, { "epoch": 0.74, "grad_norm": 1.9208667473650485, "learning_rate": 1.6552644867251483e-06, "loss": 0.5577, "step": 7131 }, { "epoch": 0.74, "grad_norm": 1.9185506346962085, "learning_rate": 1.6540133756713017e-06, "loss": 0.6611, "step": 7132 }, { "epoch": 0.74, "grad_norm": 1.8123867213865923, "learning_rate": 1.6527626439041128e-06, "loss": 0.5769, "step": 7133 }, { "epoch": 0.74, "grad_norm": 1.6904443041818638, "learning_rate": 1.6515122915653564e-06, "loss": 0.5196, "step": 7134 }, { "epoch": 0.74, "grad_norm": 1.8405788256050637, "learning_rate": 1.6502623187967675e-06, "loss": 0.5961, "step": 7135 }, { "epoch": 0.74, "grad_norm": 2.276571617181377, "learning_rate": 1.6490127257400363e-06, "loss": 0.5643, "step": 7136 }, { "epoch": 0.74, "grad_norm": 2.08819425335901, "learning_rate": 1.6477635125368136e-06, "loss": 0.6155, "step": 7137 }, { "epoch": 0.74, "grad_norm": 1.9634742797877143, "learning_rate": 1.6465146793287028e-06, "loss": 0.7454, "step": 7138 }, { "epoch": 0.74, "grad_norm": 1.9754650148020452, "learning_rate": 1.645266226257269e-06, "loss": 0.5814, "step": 7139 }, { "epoch": 0.74, "grad_norm": 1.9078401551432211, "learning_rate": 1.6440181534640277e-06, "loss": 0.6458, "step": 7140 }, { "epoch": 0.74, "grad_norm": 1.9830325654887706, "learning_rate": 1.6427704610904594e-06, "loss": 0.6258, "step": 7141 }, { "epoch": 0.74, "grad_norm": 1.9722224448626224, "learning_rate": 1.6415231492779942e-06, "loss": 0.6054, "step": 7142 }, { "epoch": 0.74, "grad_norm": 2.1482120325582414, "learning_rate": 1.640276218168023e-06, "loss": 0.5688, "step": 7143 }, { "epoch": 0.74, "grad_norm": 1.8700239696691592, "learning_rate": 1.6390296679018909e-06, "loss": 0.6565, "step": 7144 }, { "epoch": 0.74, "grad_norm": 2.0072980679497032, "learning_rate": 1.637783498620904e-06, "loss": 0.7163, "step": 7145 }, { "epoch": 0.74, "grad_norm": 1.744011926537701, "learning_rate": 1.6365377104663206e-06, "loss": 0.5255, "step": 7146 }, { "epoch": 0.74, "grad_norm": 2.1432552564208316, "learning_rate": 1.63529230357936e-06, "loss": 0.7444, "step": 7147 }, { "epoch": 0.74, "grad_norm": 2.0570680988223917, "learning_rate": 1.6340472781011935e-06, "loss": 0.6855, "step": 7148 }, { "epoch": 0.74, "grad_norm": 2.1021390615422724, "learning_rate": 1.6328026341729547e-06, "loss": 0.5668, "step": 7149 }, { "epoch": 0.74, "grad_norm": 2.1758359200731814, "learning_rate": 1.6315583719357298e-06, "loss": 0.6835, "step": 7150 }, { "epoch": 0.74, "grad_norm": 1.9660860516559455, "learning_rate": 1.630314491530563e-06, "loss": 0.5572, "step": 7151 }, { "epoch": 0.74, "grad_norm": 2.0277568894165072, "learning_rate": 1.6290709930984533e-06, "loss": 0.703, "step": 7152 }, { "epoch": 0.74, "grad_norm": 1.7083389944187983, "learning_rate": 1.6278278767803617e-06, "loss": 0.5905, "step": 7153 }, { "epoch": 0.74, "grad_norm": 1.7879894519009019, "learning_rate": 1.6265851427171996e-06, "loss": 0.5972, "step": 7154 }, { "epoch": 0.74, "grad_norm": 1.8278090663267998, "learning_rate": 1.625342791049841e-06, "loss": 0.6401, "step": 7155 }, { "epoch": 0.74, "grad_norm": 1.857344031323788, "learning_rate": 1.6241008219191107e-06, "loss": 0.5997, "step": 7156 }, { "epoch": 0.74, "grad_norm": 1.9243269148131965, "learning_rate": 1.622859235465795e-06, "loss": 0.6302, "step": 7157 }, { "epoch": 0.74, "grad_norm": 1.9731235103533473, "learning_rate": 1.6216180318306352e-06, "loss": 0.6789, "step": 7158 }, { "epoch": 0.74, "grad_norm": 2.1769733748993194, "learning_rate": 1.6203772111543247e-06, "loss": 0.5889, "step": 7159 }, { "epoch": 0.74, "grad_norm": 1.8509294311347555, "learning_rate": 1.6191367735775231e-06, "loss": 0.5998, "step": 7160 }, { "epoch": 0.74, "grad_norm": 1.8165697626069977, "learning_rate": 1.6178967192408367e-06, "loss": 0.5656, "step": 7161 }, { "epoch": 0.74, "grad_norm": 1.9585567515408426, "learning_rate": 1.616657048284836e-06, "loss": 0.67, "step": 7162 }, { "epoch": 0.74, "grad_norm": 1.8907536973431627, "learning_rate": 1.6154177608500415e-06, "loss": 0.6474, "step": 7163 }, { "epoch": 0.74, "grad_norm": 1.8221553661698418, "learning_rate": 1.6141788570769385e-06, "loss": 0.6146, "step": 7164 }, { "epoch": 0.74, "grad_norm": 1.8889485316058878, "learning_rate": 1.6129403371059576e-06, "loss": 0.6079, "step": 7165 }, { "epoch": 0.74, "grad_norm": 1.8222971298134154, "learning_rate": 1.611702201077497e-06, "loss": 0.6486, "step": 7166 }, { "epoch": 0.75, "grad_norm": 2.0447254833968795, "learning_rate": 1.610464449131902e-06, "loss": 0.6075, "step": 7167 }, { "epoch": 0.75, "grad_norm": 1.7665235599646913, "learning_rate": 1.609227081409484e-06, "loss": 0.6228, "step": 7168 }, { "epoch": 0.75, "grad_norm": 1.964368754990478, "learning_rate": 1.607990098050501e-06, "loss": 0.6175, "step": 7169 }, { "epoch": 0.75, "grad_norm": 2.029133317798616, "learning_rate": 1.6067534991951754e-06, "loss": 0.6734, "step": 7170 }, { "epoch": 0.75, "grad_norm": 1.8409734232164388, "learning_rate": 1.6055172849836826e-06, "loss": 0.6309, "step": 7171 }, { "epoch": 0.75, "grad_norm": 1.7783753720824762, "learning_rate": 1.6042814555561525e-06, "loss": 0.5715, "step": 7172 }, { "epoch": 0.75, "grad_norm": 1.698699527789748, "learning_rate": 1.603046011052673e-06, "loss": 0.6219, "step": 7173 }, { "epoch": 0.75, "grad_norm": 1.950831771168677, "learning_rate": 1.6018109516132917e-06, "loss": 0.6084, "step": 7174 }, { "epoch": 0.75, "grad_norm": 1.7085437834335682, "learning_rate": 1.600576277378007e-06, "loss": 0.5615, "step": 7175 }, { "epoch": 0.75, "grad_norm": 1.803822529222228, "learning_rate": 1.5993419884867783e-06, "loss": 0.5288, "step": 7176 }, { "epoch": 0.75, "grad_norm": 2.0799801699470155, "learning_rate": 1.5981080850795171e-06, "loss": 0.6573, "step": 7177 }, { "epoch": 0.75, "grad_norm": 2.074780152890361, "learning_rate": 1.5968745672960961e-06, "loss": 0.6823, "step": 7178 }, { "epoch": 0.75, "grad_norm": 1.9974810974208348, "learning_rate": 1.59564143527634e-06, "loss": 0.6208, "step": 7179 }, { "epoch": 0.75, "grad_norm": 2.024925483595224, "learning_rate": 1.5944086891600314e-06, "loss": 0.6646, "step": 7180 }, { "epoch": 0.75, "grad_norm": 2.182335939230614, "learning_rate": 1.5931763290869073e-06, "loss": 0.6291, "step": 7181 }, { "epoch": 0.75, "grad_norm": 2.1038938622619363, "learning_rate": 1.5919443551966662e-06, "loss": 0.5521, "step": 7182 }, { "epoch": 0.75, "grad_norm": 2.2416642109686515, "learning_rate": 1.5907127676289564e-06, "loss": 0.6313, "step": 7183 }, { "epoch": 0.75, "grad_norm": 1.8541008455898098, "learning_rate": 1.589481566523388e-06, "loss": 0.4875, "step": 7184 }, { "epoch": 0.75, "grad_norm": 1.9562324429343436, "learning_rate": 1.5882507520195218e-06, "loss": 0.5673, "step": 7185 }, { "epoch": 0.75, "grad_norm": 2.064914677261776, "learning_rate": 1.587020324256881e-06, "loss": 0.6345, "step": 7186 }, { "epoch": 0.75, "grad_norm": 1.9817317237939902, "learning_rate": 1.5857902833749395e-06, "loss": 0.5916, "step": 7187 }, { "epoch": 0.75, "grad_norm": 1.801570357563867, "learning_rate": 1.5845606295131284e-06, "loss": 0.5671, "step": 7188 }, { "epoch": 0.75, "grad_norm": 1.817221361338044, "learning_rate": 1.5833313628108388e-06, "loss": 0.5532, "step": 7189 }, { "epoch": 0.75, "grad_norm": 1.854113231046822, "learning_rate": 1.5821024834074134e-06, "loss": 0.6285, "step": 7190 }, { "epoch": 0.75, "grad_norm": 2.165618397080074, "learning_rate": 1.5808739914421512e-06, "loss": 0.5933, "step": 7191 }, { "epoch": 0.75, "grad_norm": 1.9316848594411447, "learning_rate": 1.5796458870543124e-06, "loss": 0.6431, "step": 7192 }, { "epoch": 0.75, "grad_norm": 2.0443009173966904, "learning_rate": 1.5784181703831059e-06, "loss": 0.6558, "step": 7193 }, { "epoch": 0.75, "grad_norm": 2.0513270749303976, "learning_rate": 1.577190841567704e-06, "loss": 0.6386, "step": 7194 }, { "epoch": 0.75, "grad_norm": 1.9096009741492599, "learning_rate": 1.575963900747229e-06, "loss": 0.6275, "step": 7195 }, { "epoch": 0.75, "grad_norm": 2.0390290313937824, "learning_rate": 1.5747373480607607e-06, "loss": 0.6873, "step": 7196 }, { "epoch": 0.75, "grad_norm": 1.9505996653243933, "learning_rate": 1.5735111836473393e-06, "loss": 0.6114, "step": 7197 }, { "epoch": 0.75, "grad_norm": 2.010251955965999, "learning_rate": 1.5722854076459538e-06, "loss": 0.6547, "step": 7198 }, { "epoch": 0.75, "grad_norm": 1.9576954525895818, "learning_rate": 1.5710600201955567e-06, "loss": 0.6175, "step": 7199 }, { "epoch": 0.75, "grad_norm": 1.971259045738693, "learning_rate": 1.5698350214350483e-06, "loss": 0.6752, "step": 7200 }, { "epoch": 0.75, "grad_norm": 1.9285497231593764, "learning_rate": 1.5686104115032952e-06, "loss": 0.5635, "step": 7201 }, { "epoch": 0.75, "grad_norm": 2.070817821852628, "learning_rate": 1.567386190539107e-06, "loss": 0.6503, "step": 7202 }, { "epoch": 0.75, "grad_norm": 1.8660756222471904, "learning_rate": 1.5661623586812607e-06, "loss": 0.548, "step": 7203 }, { "epoch": 0.75, "grad_norm": 1.941836274898934, "learning_rate": 1.5649389160684813e-06, "loss": 0.526, "step": 7204 }, { "epoch": 0.75, "grad_norm": 2.032513766314926, "learning_rate": 1.5637158628394572e-06, "loss": 0.5919, "step": 7205 }, { "epoch": 0.75, "grad_norm": 2.0981992425710727, "learning_rate": 1.5624931991328246e-06, "loss": 0.6707, "step": 7206 }, { "epoch": 0.75, "grad_norm": 2.0306085746889875, "learning_rate": 1.5612709250871822e-06, "loss": 0.6382, "step": 7207 }, { "epoch": 0.75, "grad_norm": 1.9767774040239565, "learning_rate": 1.5600490408410807e-06, "loss": 0.541, "step": 7208 }, { "epoch": 0.75, "grad_norm": 1.89306377323277, "learning_rate": 1.5588275465330277e-06, "loss": 0.6256, "step": 7209 }, { "epoch": 0.75, "grad_norm": 1.961898068094439, "learning_rate": 1.5576064423014846e-06, "loss": 0.6037, "step": 7210 }, { "epoch": 0.75, "grad_norm": 1.8167320785570311, "learning_rate": 1.5563857282848738e-06, "loss": 0.6319, "step": 7211 }, { "epoch": 0.75, "grad_norm": 2.0044052618558608, "learning_rate": 1.555165404621567e-06, "loss": 0.6699, "step": 7212 }, { "epoch": 0.75, "grad_norm": 1.8192002546413146, "learning_rate": 1.5539454714498985e-06, "loss": 0.5856, "step": 7213 }, { "epoch": 0.75, "grad_norm": 1.828513720311586, "learning_rate": 1.5527259289081508e-06, "loss": 0.6237, "step": 7214 }, { "epoch": 0.75, "grad_norm": 1.962328010297825, "learning_rate": 1.5515067771345694e-06, "loss": 0.7629, "step": 7215 }, { "epoch": 0.75, "grad_norm": 1.8235786907604312, "learning_rate": 1.5502880162673506e-06, "loss": 0.6629, "step": 7216 }, { "epoch": 0.75, "grad_norm": 2.1373109789266316, "learning_rate": 1.5490696464446475e-06, "loss": 0.6834, "step": 7217 }, { "epoch": 0.75, "grad_norm": 1.798807028243252, "learning_rate": 1.5478516678045686e-06, "loss": 0.6461, "step": 7218 }, { "epoch": 0.75, "grad_norm": 2.073920849769466, "learning_rate": 1.546634080485181e-06, "loss": 0.584, "step": 7219 }, { "epoch": 0.75, "grad_norm": 2.143982758312892, "learning_rate": 1.545416884624502e-06, "loss": 0.6263, "step": 7220 }, { "epoch": 0.75, "grad_norm": 1.868656609798202, "learning_rate": 1.5442000803605117e-06, "loss": 0.6062, "step": 7221 }, { "epoch": 0.75, "grad_norm": 2.102607752104285, "learning_rate": 1.5429836678311382e-06, "loss": 0.6158, "step": 7222 }, { "epoch": 0.75, "grad_norm": 2.015804353429865, "learning_rate": 1.5417676471742716e-06, "loss": 0.6549, "step": 7223 }, { "epoch": 0.75, "grad_norm": 1.6390174876555401, "learning_rate": 1.5405520185277533e-06, "loss": 0.553, "step": 7224 }, { "epoch": 0.75, "grad_norm": 2.0043174338965635, "learning_rate": 1.5393367820293809e-06, "loss": 0.5936, "step": 7225 }, { "epoch": 0.75, "grad_norm": 2.1450822606543487, "learning_rate": 1.5381219378169103e-06, "loss": 0.6166, "step": 7226 }, { "epoch": 0.75, "grad_norm": 1.9942427306633814, "learning_rate": 1.5369074860280509e-06, "loss": 0.5527, "step": 7227 }, { "epoch": 0.75, "grad_norm": 1.9786504142555035, "learning_rate": 1.5356934268004648e-06, "loss": 0.5666, "step": 7228 }, { "epoch": 0.75, "grad_norm": 1.6346339842485804, "learning_rate": 1.534479760271776e-06, "loss": 0.4102, "step": 7229 }, { "epoch": 0.75, "grad_norm": 2.1155433193814477, "learning_rate": 1.5332664865795594e-06, "loss": 0.6233, "step": 7230 }, { "epoch": 0.75, "grad_norm": 2.0461764504626054, "learning_rate": 1.532053605861345e-06, "loss": 0.659, "step": 7231 }, { "epoch": 0.75, "grad_norm": 1.9075305136903682, "learning_rate": 1.5308411182546224e-06, "loss": 0.6123, "step": 7232 }, { "epoch": 0.75, "grad_norm": 1.898062658797633, "learning_rate": 1.5296290238968303e-06, "loss": 0.6636, "step": 7233 }, { "epoch": 0.75, "grad_norm": 1.954671511226436, "learning_rate": 1.5284173229253712e-06, "loss": 0.5265, "step": 7234 }, { "epoch": 0.75, "grad_norm": 2.2142740269246506, "learning_rate": 1.527206015477594e-06, "loss": 0.5625, "step": 7235 }, { "epoch": 0.75, "grad_norm": 2.1394005619281624, "learning_rate": 1.5259951016908108e-06, "loss": 0.7228, "step": 7236 }, { "epoch": 0.75, "grad_norm": 2.1596350480306192, "learning_rate": 1.5247845817022827e-06, "loss": 0.6213, "step": 7237 }, { "epoch": 0.75, "grad_norm": 1.9822202370478568, "learning_rate": 1.5235744556492337e-06, "loss": 0.577, "step": 7238 }, { "epoch": 0.75, "grad_norm": 1.849473911231631, "learning_rate": 1.5223647236688317e-06, "loss": 0.5713, "step": 7239 }, { "epoch": 0.75, "grad_norm": 2.1778919669885215, "learning_rate": 1.5211553858982115e-06, "loss": 0.636, "step": 7240 }, { "epoch": 0.75, "grad_norm": 1.9120175964258541, "learning_rate": 1.5199464424744553e-06, "loss": 0.6798, "step": 7241 }, { "epoch": 0.75, "grad_norm": 2.2539495939251353, "learning_rate": 1.5187378935346075e-06, "loss": 0.4582, "step": 7242 }, { "epoch": 0.75, "grad_norm": 2.0770941489672294, "learning_rate": 1.5175297392156602e-06, "loss": 0.6639, "step": 7243 }, { "epoch": 0.75, "grad_norm": 1.6863575683493603, "learning_rate": 1.516321979654568e-06, "loss": 0.6796, "step": 7244 }, { "epoch": 0.75, "grad_norm": 2.05137600474315, "learning_rate": 1.5151146149882356e-06, "loss": 0.6001, "step": 7245 }, { "epoch": 0.75, "grad_norm": 2.04734677979564, "learning_rate": 1.513907645353525e-06, "loss": 0.7466, "step": 7246 }, { "epoch": 0.75, "grad_norm": 1.7640317190712616, "learning_rate": 1.5127010708872513e-06, "loss": 0.5521, "step": 7247 }, { "epoch": 0.75, "grad_norm": 1.9082985743121863, "learning_rate": 1.5114948917261896e-06, "loss": 0.6531, "step": 7248 }, { "epoch": 0.75, "grad_norm": 2.068888380150388, "learning_rate": 1.510289108007064e-06, "loss": 0.573, "step": 7249 }, { "epoch": 0.75, "grad_norm": 2.1610981787210704, "learning_rate": 1.5090837198665602e-06, "loss": 0.7273, "step": 7250 }, { "epoch": 0.75, "grad_norm": 1.8145500032819155, "learning_rate": 1.507878727441313e-06, "loss": 0.5476, "step": 7251 }, { "epoch": 0.75, "grad_norm": 2.1836728973091852, "learning_rate": 1.5066741308679183e-06, "loss": 0.7212, "step": 7252 }, { "epoch": 0.75, "grad_norm": 2.026098366272406, "learning_rate": 1.5054699302829217e-06, "loss": 0.6131, "step": 7253 }, { "epoch": 0.75, "grad_norm": 1.958268075225633, "learning_rate": 1.5042661258228268e-06, "loss": 0.7548, "step": 7254 }, { "epoch": 0.75, "grad_norm": 2.2799034428142844, "learning_rate": 1.5030627176240903e-06, "loss": 0.6088, "step": 7255 }, { "epoch": 0.75, "grad_norm": 1.8507112671260586, "learning_rate": 1.5018597058231276e-06, "loss": 0.6433, "step": 7256 }, { "epoch": 0.75, "grad_norm": 1.9787360681863888, "learning_rate": 1.500657090556305e-06, "loss": 0.6084, "step": 7257 }, { "epoch": 0.75, "grad_norm": 2.0335913118093716, "learning_rate": 1.4994548719599478e-06, "loss": 0.6161, "step": 7258 }, { "epoch": 0.75, "grad_norm": 1.913473741873713, "learning_rate": 1.4982530501703325e-06, "loss": 0.599, "step": 7259 }, { "epoch": 0.75, "grad_norm": 1.993400563984133, "learning_rate": 1.4970516253236938e-06, "loss": 0.6776, "step": 7260 }, { "epoch": 0.75, "grad_norm": 1.8738338404676722, "learning_rate": 1.4958505975562205e-06, "loss": 0.6608, "step": 7261 }, { "epoch": 0.75, "grad_norm": 1.9624680385206787, "learning_rate": 1.4946499670040526e-06, "loss": 0.646, "step": 7262 }, { "epoch": 0.75, "grad_norm": 2.1031705360635313, "learning_rate": 1.4934497338032926e-06, "loss": 0.6785, "step": 7263 }, { "epoch": 0.76, "grad_norm": 1.7375430132673397, "learning_rate": 1.4922498980899907e-06, "loss": 0.5263, "step": 7264 }, { "epoch": 0.76, "grad_norm": 1.942208339687299, "learning_rate": 1.4910504600001574e-06, "loss": 0.6087, "step": 7265 }, { "epoch": 0.76, "grad_norm": 1.7733528282248332, "learning_rate": 1.489851419669755e-06, "loss": 0.6096, "step": 7266 }, { "epoch": 0.76, "grad_norm": 1.9702015428371882, "learning_rate": 1.4886527772347015e-06, "loss": 0.6524, "step": 7267 }, { "epoch": 0.76, "grad_norm": 1.821655039369508, "learning_rate": 1.4874545328308681e-06, "loss": 0.6979, "step": 7268 }, { "epoch": 0.76, "grad_norm": 1.6085812027586697, "learning_rate": 1.486256686594086e-06, "loss": 0.4883, "step": 7269 }, { "epoch": 0.76, "grad_norm": 1.8103497969052265, "learning_rate": 1.4850592386601342e-06, "loss": 0.6498, "step": 7270 }, { "epoch": 0.76, "grad_norm": 1.8821065569520434, "learning_rate": 1.4838621891647537e-06, "loss": 0.5811, "step": 7271 }, { "epoch": 0.76, "grad_norm": 2.026422332113513, "learning_rate": 1.482665538243634e-06, "loss": 0.5675, "step": 7272 }, { "epoch": 0.76, "grad_norm": 2.021526459853876, "learning_rate": 1.4814692860324254e-06, "loss": 0.6165, "step": 7273 }, { "epoch": 0.76, "grad_norm": 2.2131193609875255, "learning_rate": 1.4802734326667261e-06, "loss": 0.6052, "step": 7274 }, { "epoch": 0.76, "grad_norm": 1.9863781617996716, "learning_rate": 1.4790779782820991e-06, "loss": 0.7431, "step": 7275 }, { "epoch": 0.76, "grad_norm": 1.899030267652968, "learning_rate": 1.4778829230140479e-06, "loss": 0.5902, "step": 7276 }, { "epoch": 0.76, "grad_norm": 2.104359170256935, "learning_rate": 1.4766882669980443e-06, "loss": 0.5782, "step": 7277 }, { "epoch": 0.76, "grad_norm": 2.3395206730302807, "learning_rate": 1.4754940103695065e-06, "loss": 0.6701, "step": 7278 }, { "epoch": 0.76, "grad_norm": 1.9128557347137265, "learning_rate": 1.4743001532638135e-06, "loss": 0.5963, "step": 7279 }, { "epoch": 0.76, "grad_norm": 1.839960847147049, "learning_rate": 1.473106695816292e-06, "loss": 0.5946, "step": 7280 }, { "epoch": 0.76, "grad_norm": 1.8804694955721557, "learning_rate": 1.4719136381622307e-06, "loss": 0.5972, "step": 7281 }, { "epoch": 0.76, "grad_norm": 2.1064096197759916, "learning_rate": 1.4707209804368683e-06, "loss": 0.6805, "step": 7282 }, { "epoch": 0.76, "grad_norm": 2.1508059506711388, "learning_rate": 1.4695287227753984e-06, "loss": 0.6387, "step": 7283 }, { "epoch": 0.76, "grad_norm": 1.934117540641856, "learning_rate": 1.4683368653129698e-06, "loss": 0.6755, "step": 7284 }, { "epoch": 0.76, "grad_norm": 1.874609864466516, "learning_rate": 1.4671454081846886e-06, "loss": 0.7757, "step": 7285 }, { "epoch": 0.76, "grad_norm": 1.8727817163935145, "learning_rate": 1.4659543515256103e-06, "loss": 0.7327, "step": 7286 }, { "epoch": 0.76, "grad_norm": 1.9625655507945419, "learning_rate": 1.4647636954707517e-06, "loss": 0.5665, "step": 7287 }, { "epoch": 0.76, "grad_norm": 1.9176017722412064, "learning_rate": 1.4635734401550761e-06, "loss": 0.5784, "step": 7288 }, { "epoch": 0.76, "grad_norm": 1.9382591162652874, "learning_rate": 1.4623835857135099e-06, "loss": 0.6005, "step": 7289 }, { "epoch": 0.76, "grad_norm": 2.0286029348295123, "learning_rate": 1.4611941322809282e-06, "loss": 0.6977, "step": 7290 }, { "epoch": 0.76, "grad_norm": 1.9278560441634012, "learning_rate": 1.4600050799921622e-06, "loss": 0.6472, "step": 7291 }, { "epoch": 0.76, "grad_norm": 1.9786842736043846, "learning_rate": 1.4588164289819956e-06, "loss": 0.7257, "step": 7292 }, { "epoch": 0.76, "grad_norm": 1.919764223108238, "learning_rate": 1.4576281793851726e-06, "loss": 0.5598, "step": 7293 }, { "epoch": 0.76, "grad_norm": 2.0586232862552425, "learning_rate": 1.456440331336385e-06, "loss": 0.5034, "step": 7294 }, { "epoch": 0.76, "grad_norm": 1.8835266264519421, "learning_rate": 1.4552528849702852e-06, "loss": 0.6227, "step": 7295 }, { "epoch": 0.76, "grad_norm": 1.9365442859333502, "learning_rate": 1.454065840421473e-06, "loss": 0.6033, "step": 7296 }, { "epoch": 0.76, "grad_norm": 1.8303763066257501, "learning_rate": 1.452879197824511e-06, "loss": 0.6193, "step": 7297 }, { "epoch": 0.76, "grad_norm": 1.714017227116502, "learning_rate": 1.45169295731391e-06, "loss": 0.5045, "step": 7298 }, { "epoch": 0.76, "grad_norm": 1.9553749197730281, "learning_rate": 1.450507119024135e-06, "loss": 0.5116, "step": 7299 }, { "epoch": 0.76, "grad_norm": 2.0489024004785206, "learning_rate": 1.4493216830896112e-06, "loss": 0.5511, "step": 7300 }, { "epoch": 0.76, "grad_norm": 1.9177337191952164, "learning_rate": 1.4481366496447113e-06, "loss": 0.6671, "step": 7301 }, { "epoch": 0.76, "grad_norm": 2.242369095193321, "learning_rate": 1.4469520188237684e-06, "loss": 0.6147, "step": 7302 }, { "epoch": 0.76, "grad_norm": 1.9271683681568583, "learning_rate": 1.4457677907610646e-06, "loss": 0.6747, "step": 7303 }, { "epoch": 0.76, "grad_norm": 1.9556100302069954, "learning_rate": 1.4445839655908432e-06, "loss": 0.6199, "step": 7304 }, { "epoch": 0.76, "grad_norm": 2.0446941771338847, "learning_rate": 1.4434005434472914e-06, "loss": 0.7253, "step": 7305 }, { "epoch": 0.76, "grad_norm": 2.180261152086753, "learning_rate": 1.4422175244645613e-06, "loss": 0.7472, "step": 7306 }, { "epoch": 0.76, "grad_norm": 1.9436496640175285, "learning_rate": 1.4410349087767521e-06, "loss": 0.681, "step": 7307 }, { "epoch": 0.76, "grad_norm": 1.957008494417684, "learning_rate": 1.4398526965179233e-06, "loss": 0.6194, "step": 7308 }, { "epoch": 0.76, "grad_norm": 1.9913658996313202, "learning_rate": 1.438670887822081e-06, "loss": 0.5928, "step": 7309 }, { "epoch": 0.76, "grad_norm": 1.9922918850877875, "learning_rate": 1.437489482823195e-06, "loss": 0.7113, "step": 7310 }, { "epoch": 0.76, "grad_norm": 1.9105299964949578, "learning_rate": 1.4363084816551798e-06, "loss": 0.5878, "step": 7311 }, { "epoch": 0.76, "grad_norm": 1.8437267826289236, "learning_rate": 1.4351278844519134e-06, "loss": 0.6025, "step": 7312 }, { "epoch": 0.76, "grad_norm": 2.2123053516034568, "learning_rate": 1.4339476913472177e-06, "loss": 0.6034, "step": 7313 }, { "epoch": 0.76, "grad_norm": 1.7734591969685631, "learning_rate": 1.4327679024748785e-06, "loss": 0.521, "step": 7314 }, { "epoch": 0.76, "grad_norm": 2.0931515989249583, "learning_rate": 1.4315885179686285e-06, "loss": 0.6181, "step": 7315 }, { "epoch": 0.76, "grad_norm": 1.8904524239638931, "learning_rate": 1.4304095379621607e-06, "loss": 0.5739, "step": 7316 }, { "epoch": 0.76, "grad_norm": 1.8580069179145808, "learning_rate": 1.4292309625891166e-06, "loss": 0.4972, "step": 7317 }, { "epoch": 0.76, "grad_norm": 1.8960742519739238, "learning_rate": 1.4280527919830966e-06, "loss": 0.5732, "step": 7318 }, { "epoch": 0.76, "grad_norm": 1.8541559313701252, "learning_rate": 1.4268750262776526e-06, "loss": 0.6536, "step": 7319 }, { "epoch": 0.76, "grad_norm": 2.079477738015932, "learning_rate": 1.42569766560629e-06, "loss": 0.6436, "step": 7320 }, { "epoch": 0.76, "grad_norm": 1.8833701025509348, "learning_rate": 1.4245207101024684e-06, "loss": 0.6474, "step": 7321 }, { "epoch": 0.76, "grad_norm": 2.0036752845734265, "learning_rate": 1.4233441598996055e-06, "loss": 0.6448, "step": 7322 }, { "epoch": 0.76, "grad_norm": 1.9424022901674687, "learning_rate": 1.4221680151310667e-06, "loss": 0.5813, "step": 7323 }, { "epoch": 0.76, "grad_norm": 1.776323088501909, "learning_rate": 1.420992275930178e-06, "loss": 0.6075, "step": 7324 }, { "epoch": 0.76, "grad_norm": 1.8971379539053441, "learning_rate": 1.4198169424302133e-06, "loss": 0.5884, "step": 7325 }, { "epoch": 0.76, "grad_norm": 1.9758928262230633, "learning_rate": 1.4186420147644053e-06, "loss": 0.6538, "step": 7326 }, { "epoch": 0.76, "grad_norm": 2.0378828824189505, "learning_rate": 1.4174674930659389e-06, "loss": 0.686, "step": 7327 }, { "epoch": 0.76, "grad_norm": 2.100855386114118, "learning_rate": 1.4162933774679494e-06, "loss": 0.5805, "step": 7328 }, { "epoch": 0.76, "grad_norm": 1.8241299180054797, "learning_rate": 1.4151196681035339e-06, "loss": 0.5597, "step": 7329 }, { "epoch": 0.76, "grad_norm": 2.3646475695480302, "learning_rate": 1.4139463651057377e-06, "loss": 0.696, "step": 7330 }, { "epoch": 0.76, "grad_norm": 1.7871636366438264, "learning_rate": 1.4127734686075589e-06, "loss": 0.5538, "step": 7331 }, { "epoch": 0.76, "grad_norm": 1.7624748734883664, "learning_rate": 1.4116009787419555e-06, "loss": 0.5067, "step": 7332 }, { "epoch": 0.76, "grad_norm": 1.8830917235961375, "learning_rate": 1.4104288956418326e-06, "loss": 0.6711, "step": 7333 }, { "epoch": 0.76, "grad_norm": 2.1333917259154473, "learning_rate": 1.4092572194400556e-06, "loss": 0.6778, "step": 7334 }, { "epoch": 0.76, "grad_norm": 1.8922106726635324, "learning_rate": 1.4080859502694399e-06, "loss": 0.5897, "step": 7335 }, { "epoch": 0.76, "grad_norm": 1.7979650669786016, "learning_rate": 1.406915088262753e-06, "loss": 0.6342, "step": 7336 }, { "epoch": 0.76, "grad_norm": 1.8262225902179414, "learning_rate": 1.4057446335527224e-06, "loss": 0.5744, "step": 7337 }, { "epoch": 0.76, "grad_norm": 1.939920334697821, "learning_rate": 1.4045745862720227e-06, "loss": 0.6456, "step": 7338 }, { "epoch": 0.76, "grad_norm": 2.046921032309175, "learning_rate": 1.4034049465532884e-06, "loss": 0.6499, "step": 7339 }, { "epoch": 0.76, "grad_norm": 2.0279532671601435, "learning_rate": 1.4022357145291022e-06, "loss": 0.6686, "step": 7340 }, { "epoch": 0.76, "grad_norm": 1.876032959240246, "learning_rate": 1.4010668903320068e-06, "loss": 0.65, "step": 7341 }, { "epoch": 0.76, "grad_norm": 2.010683002947994, "learning_rate": 1.3998984740944898e-06, "loss": 0.6212, "step": 7342 }, { "epoch": 0.76, "grad_norm": 1.9942519888855668, "learning_rate": 1.3987304659490019e-06, "loss": 0.6769, "step": 7343 }, { "epoch": 0.76, "grad_norm": 2.3241892030749804, "learning_rate": 1.397562866027941e-06, "loss": 0.7233, "step": 7344 }, { "epoch": 0.76, "grad_norm": 1.8842048203883948, "learning_rate": 1.3963956744636642e-06, "loss": 0.483, "step": 7345 }, { "epoch": 0.76, "grad_norm": 1.8132127750394096, "learning_rate": 1.3952288913884754e-06, "loss": 0.6027, "step": 7346 }, { "epoch": 0.76, "grad_norm": 1.9408602426373183, "learning_rate": 1.3940625169346406e-06, "loss": 0.6706, "step": 7347 }, { "epoch": 0.76, "grad_norm": 1.9509705460858806, "learning_rate": 1.3928965512343705e-06, "loss": 0.512, "step": 7348 }, { "epoch": 0.76, "grad_norm": 1.846187361487381, "learning_rate": 1.3917309944198392e-06, "loss": 0.5747, "step": 7349 }, { "epoch": 0.76, "grad_norm": 1.8360446999359556, "learning_rate": 1.390565846623163e-06, "loss": 0.5452, "step": 7350 }, { "epoch": 0.76, "grad_norm": 2.207526838462662, "learning_rate": 1.389401107976423e-06, "loss": 0.6348, "step": 7351 }, { "epoch": 0.76, "grad_norm": 1.6972499850348746, "learning_rate": 1.3882367786116458e-06, "loss": 0.6734, "step": 7352 }, { "epoch": 0.76, "grad_norm": 1.9465037911461067, "learning_rate": 1.3870728586608172e-06, "loss": 0.638, "step": 7353 }, { "epoch": 0.76, "grad_norm": 1.99105946713035, "learning_rate": 1.3859093482558717e-06, "loss": 0.4869, "step": 7354 }, { "epoch": 0.76, "grad_norm": 1.9083214942937838, "learning_rate": 1.3847462475287027e-06, "loss": 0.6243, "step": 7355 }, { "epoch": 0.76, "grad_norm": 1.8052126687935333, "learning_rate": 1.3835835566111527e-06, "loss": 0.5666, "step": 7356 }, { "epoch": 0.76, "grad_norm": 1.8247580687949765, "learning_rate": 1.3824212756350196e-06, "loss": 0.5969, "step": 7357 }, { "epoch": 0.76, "grad_norm": 1.931041342905744, "learning_rate": 1.3812594047320526e-06, "loss": 0.6333, "step": 7358 }, { "epoch": 0.76, "grad_norm": 2.003570523038699, "learning_rate": 1.3800979440339602e-06, "loss": 0.6259, "step": 7359 }, { "epoch": 0.77, "grad_norm": 1.9910752759176764, "learning_rate": 1.3789368936723967e-06, "loss": 0.596, "step": 7360 }, { "epoch": 0.77, "grad_norm": 2.204108423906913, "learning_rate": 1.3777762537789774e-06, "loss": 0.6022, "step": 7361 }, { "epoch": 0.77, "grad_norm": 1.7905234257212006, "learning_rate": 1.3766160244852645e-06, "loss": 0.666, "step": 7362 }, { "epoch": 0.77, "grad_norm": 1.785127840395526, "learning_rate": 1.375456205922779e-06, "loss": 0.6004, "step": 7363 }, { "epoch": 0.77, "grad_norm": 1.8382116223905691, "learning_rate": 1.3742967982229915e-06, "loss": 0.6217, "step": 7364 }, { "epoch": 0.77, "grad_norm": 1.8116921007155165, "learning_rate": 1.373137801517327e-06, "loss": 0.5808, "step": 7365 }, { "epoch": 0.77, "grad_norm": 2.0143148074817656, "learning_rate": 1.371979215937166e-06, "loss": 0.5981, "step": 7366 }, { "epoch": 0.77, "grad_norm": 1.945327577763545, "learning_rate": 1.3708210416138395e-06, "loss": 0.7188, "step": 7367 }, { "epoch": 0.77, "grad_norm": 1.9918025752529571, "learning_rate": 1.3696632786786328e-06, "loss": 0.6553, "step": 7368 }, { "epoch": 0.77, "grad_norm": 1.8128557697830252, "learning_rate": 1.368505927262787e-06, "loss": 0.54, "step": 7369 }, { "epoch": 0.77, "grad_norm": 1.8567139003564144, "learning_rate": 1.3673489874974916e-06, "loss": 0.6109, "step": 7370 }, { "epoch": 0.77, "grad_norm": 1.8570547914301103, "learning_rate": 1.3661924595138953e-06, "loss": 0.5828, "step": 7371 }, { "epoch": 0.77, "grad_norm": 2.0112052275541963, "learning_rate": 1.3650363434430957e-06, "loss": 0.643, "step": 7372 }, { "epoch": 0.77, "grad_norm": 1.9738896384606492, "learning_rate": 1.363880639416144e-06, "loss": 0.7166, "step": 7373 }, { "epoch": 0.77, "grad_norm": 1.9657301144532935, "learning_rate": 1.3627253475640484e-06, "loss": 0.5739, "step": 7374 }, { "epoch": 0.77, "grad_norm": 1.8189747674386199, "learning_rate": 1.3615704680177649e-06, "loss": 0.5863, "step": 7375 }, { "epoch": 0.77, "grad_norm": 1.9980086094799228, "learning_rate": 1.3604160009082084e-06, "loss": 0.6279, "step": 7376 }, { "epoch": 0.77, "grad_norm": 1.8661162265592808, "learning_rate": 1.359261946366242e-06, "loss": 0.6268, "step": 7377 }, { "epoch": 0.77, "grad_norm": 2.0760584257748347, "learning_rate": 1.3581083045226884e-06, "loss": 0.5509, "step": 7378 }, { "epoch": 0.77, "grad_norm": 1.890930457422948, "learning_rate": 1.3569550755083139e-06, "loss": 0.4827, "step": 7379 }, { "epoch": 0.77, "grad_norm": 1.8250307809861648, "learning_rate": 1.3558022594538473e-06, "loss": 0.6133, "step": 7380 }, { "epoch": 0.77, "grad_norm": 1.9776684236653719, "learning_rate": 1.3546498564899647e-06, "loss": 0.65, "step": 7381 }, { "epoch": 0.77, "grad_norm": 1.8375601370048764, "learning_rate": 1.3534978667472998e-06, "loss": 0.6446, "step": 7382 }, { "epoch": 0.77, "grad_norm": 1.882348930178244, "learning_rate": 1.3523462903564344e-06, "loss": 0.6684, "step": 7383 }, { "epoch": 0.77, "grad_norm": 2.009995487209896, "learning_rate": 1.3511951274479096e-06, "loss": 0.7314, "step": 7384 }, { "epoch": 0.77, "grad_norm": 1.9752306252999285, "learning_rate": 1.3500443781522131e-06, "loss": 0.5498, "step": 7385 }, { "epoch": 0.77, "grad_norm": 1.989631043234858, "learning_rate": 1.3488940425997937e-06, "loss": 0.6369, "step": 7386 }, { "epoch": 0.77, "grad_norm": 2.0437233289117995, "learning_rate": 1.3477441209210418e-06, "loss": 0.6656, "step": 7387 }, { "epoch": 0.77, "grad_norm": 1.8972633241525336, "learning_rate": 1.3465946132463125e-06, "loss": 0.6628, "step": 7388 }, { "epoch": 0.77, "grad_norm": 1.8438369722578372, "learning_rate": 1.3454455197059064e-06, "loss": 0.5667, "step": 7389 }, { "epoch": 0.77, "grad_norm": 2.0959707834954493, "learning_rate": 1.3442968404300822e-06, "loss": 0.5153, "step": 7390 }, { "epoch": 0.77, "grad_norm": 1.6795735736077286, "learning_rate": 1.3431485755490464e-06, "loss": 0.5253, "step": 7391 }, { "epoch": 0.77, "grad_norm": 1.8489557099642173, "learning_rate": 1.342000725192964e-06, "loss": 0.6495, "step": 7392 }, { "epoch": 0.77, "grad_norm": 2.0025930868134916, "learning_rate": 1.3408532894919502e-06, "loss": 0.6117, "step": 7393 }, { "epoch": 0.77, "grad_norm": 2.9342212627586495, "learning_rate": 1.3397062685760715e-06, "loss": 0.6195, "step": 7394 }, { "epoch": 0.77, "grad_norm": 2.3274325479107087, "learning_rate": 1.3385596625753494e-06, "loss": 0.774, "step": 7395 }, { "epoch": 0.77, "grad_norm": 2.007477669025387, "learning_rate": 1.3374134716197602e-06, "loss": 0.5757, "step": 7396 }, { "epoch": 0.77, "grad_norm": 2.1464769668013823, "learning_rate": 1.336267695839229e-06, "loss": 0.6546, "step": 7397 }, { "epoch": 0.77, "grad_norm": 2.063072317843152, "learning_rate": 1.3351223353636378e-06, "loss": 0.6652, "step": 7398 }, { "epoch": 0.77, "grad_norm": 2.23065638452988, "learning_rate": 1.3339773903228182e-06, "loss": 0.6505, "step": 7399 }, { "epoch": 0.77, "grad_norm": 2.0593425716320235, "learning_rate": 1.3328328608465586e-06, "loss": 0.6715, "step": 7400 }, { "epoch": 0.77, "grad_norm": 1.8612256997436372, "learning_rate": 1.3316887470645956e-06, "loss": 0.5658, "step": 7401 }, { "epoch": 0.77, "grad_norm": 2.092289554514257, "learning_rate": 1.3305450491066207e-06, "loss": 0.5539, "step": 7402 }, { "epoch": 0.77, "grad_norm": 2.1098319168701223, "learning_rate": 1.3294017671022812e-06, "loss": 0.6631, "step": 7403 }, { "epoch": 0.77, "grad_norm": 2.055282905776299, "learning_rate": 1.328258901181172e-06, "loss": 0.638, "step": 7404 }, { "epoch": 0.77, "grad_norm": 1.8517537301996196, "learning_rate": 1.3271164514728458e-06, "loss": 0.617, "step": 7405 }, { "epoch": 0.77, "grad_norm": 1.9401490645931452, "learning_rate": 1.3259744181068041e-06, "loss": 0.6754, "step": 7406 }, { "epoch": 0.77, "grad_norm": 2.0376313058684565, "learning_rate": 1.3248328012125022e-06, "loss": 0.5623, "step": 7407 }, { "epoch": 0.77, "grad_norm": 2.0528085589228446, "learning_rate": 1.3236916009193517e-06, "loss": 0.6334, "step": 7408 }, { "epoch": 0.77, "grad_norm": 1.9909521335765008, "learning_rate": 1.3225508173567125e-06, "loss": 0.6425, "step": 7409 }, { "epoch": 0.77, "grad_norm": 2.075921732696546, "learning_rate": 1.3214104506538971e-06, "loss": 0.5893, "step": 7410 }, { "epoch": 0.77, "grad_norm": 1.906763513338803, "learning_rate": 1.320270500940176e-06, "loss": 0.6562, "step": 7411 }, { "epoch": 0.77, "grad_norm": 1.9042795537620933, "learning_rate": 1.3191309683447662e-06, "loss": 0.5949, "step": 7412 }, { "epoch": 0.77, "grad_norm": 1.876715715793952, "learning_rate": 1.3179918529968422e-06, "loss": 0.6021, "step": 7413 }, { "epoch": 0.77, "grad_norm": 1.8171198061068894, "learning_rate": 1.3168531550255275e-06, "loss": 0.5601, "step": 7414 }, { "epoch": 0.77, "grad_norm": 1.8263304397349878, "learning_rate": 1.3157148745599035e-06, "loss": 0.6062, "step": 7415 }, { "epoch": 0.77, "grad_norm": 2.0517363085919533, "learning_rate": 1.3145770117289957e-06, "loss": 0.6128, "step": 7416 }, { "epoch": 0.77, "grad_norm": 1.9454726843452275, "learning_rate": 1.313439566661791e-06, "loss": 0.5095, "step": 7417 }, { "epoch": 0.77, "grad_norm": 1.9864994234352504, "learning_rate": 1.3123025394872224e-06, "loss": 0.567, "step": 7418 }, { "epoch": 0.77, "grad_norm": 2.0924159820519774, "learning_rate": 1.3111659303341824e-06, "loss": 0.627, "step": 7419 }, { "epoch": 0.77, "grad_norm": 2.0441005429459826, "learning_rate": 1.3100297393315077e-06, "loss": 0.6127, "step": 7420 }, { "epoch": 0.77, "grad_norm": 2.011198133677825, "learning_rate": 1.3088939666079958e-06, "loss": 0.6564, "step": 7421 }, { "epoch": 0.77, "grad_norm": 1.877266216814998, "learning_rate": 1.3077586122923896e-06, "loss": 0.565, "step": 7422 }, { "epoch": 0.77, "grad_norm": 2.1366794824633017, "learning_rate": 1.3066236765133933e-06, "loss": 0.5936, "step": 7423 }, { "epoch": 0.77, "grad_norm": 1.9038044812800596, "learning_rate": 1.3054891593996515e-06, "loss": 0.6186, "step": 7424 }, { "epoch": 0.77, "grad_norm": 2.1095772614580666, "learning_rate": 1.3043550610797728e-06, "loss": 0.6388, "step": 7425 }, { "epoch": 0.77, "grad_norm": 1.9489744092099393, "learning_rate": 1.3032213816823113e-06, "loss": 0.5858, "step": 7426 }, { "epoch": 0.77, "grad_norm": 2.0758525689119067, "learning_rate": 1.3020881213357783e-06, "loss": 0.6035, "step": 7427 }, { "epoch": 0.77, "grad_norm": 2.0709398095894356, "learning_rate": 1.3009552801686331e-06, "loss": 0.554, "step": 7428 }, { "epoch": 0.77, "grad_norm": 1.866922642817398, "learning_rate": 1.299822858309292e-06, "loss": 0.5272, "step": 7429 }, { "epoch": 0.77, "grad_norm": 1.8806440265200155, "learning_rate": 1.29869085588612e-06, "loss": 0.5618, "step": 7430 }, { "epoch": 0.77, "grad_norm": 1.998869396446172, "learning_rate": 1.2975592730274367e-06, "loss": 0.6345, "step": 7431 }, { "epoch": 0.77, "grad_norm": 1.8384656204172125, "learning_rate": 1.296428109861511e-06, "loss": 0.6149, "step": 7432 }, { "epoch": 0.77, "grad_norm": 2.233235086672102, "learning_rate": 1.2952973665165703e-06, "loss": 0.6092, "step": 7433 }, { "epoch": 0.77, "grad_norm": 2.6047023375364677, "learning_rate": 1.2941670431207882e-06, "loss": 0.6329, "step": 7434 }, { "epoch": 0.77, "grad_norm": 2.0698950270152126, "learning_rate": 1.293037139802295e-06, "loss": 0.655, "step": 7435 }, { "epoch": 0.77, "grad_norm": 2.403007806832603, "learning_rate": 1.2919076566891703e-06, "loss": 0.6415, "step": 7436 }, { "epoch": 0.77, "grad_norm": 2.1414097281187785, "learning_rate": 1.290778593909449e-06, "loss": 0.6067, "step": 7437 }, { "epoch": 0.77, "grad_norm": 2.2802961243526507, "learning_rate": 1.2896499515911165e-06, "loss": 0.7176, "step": 7438 }, { "epoch": 0.77, "grad_norm": 1.8863817546743002, "learning_rate": 1.2885217298621084e-06, "loss": 0.6193, "step": 7439 }, { "epoch": 0.77, "grad_norm": 2.044165537334881, "learning_rate": 1.2873939288503185e-06, "loss": 0.682, "step": 7440 }, { "epoch": 0.77, "grad_norm": 2.115113504513422, "learning_rate": 1.2862665486835861e-06, "loss": 0.6313, "step": 7441 }, { "epoch": 0.77, "grad_norm": 1.8510730137768605, "learning_rate": 1.2851395894897101e-06, "loss": 0.676, "step": 7442 }, { "epoch": 0.77, "grad_norm": 2.0138120838329927, "learning_rate": 1.2840130513964338e-06, "loss": 0.5752, "step": 7443 }, { "epoch": 0.77, "grad_norm": 1.8887083592061393, "learning_rate": 1.2828869345314599e-06, "loss": 0.5755, "step": 7444 }, { "epoch": 0.77, "grad_norm": 2.0255619098514455, "learning_rate": 1.2817612390224388e-06, "loss": 0.6166, "step": 7445 }, { "epoch": 0.77, "grad_norm": 1.9683394171262643, "learning_rate": 1.2806359649969746e-06, "loss": 0.5901, "step": 7446 }, { "epoch": 0.77, "grad_norm": 2.1220195600084066, "learning_rate": 1.2795111125826221e-06, "loss": 0.6434, "step": 7447 }, { "epoch": 0.77, "grad_norm": 1.8294003393097111, "learning_rate": 1.2783866819068923e-06, "loss": 0.6808, "step": 7448 }, { "epoch": 0.77, "grad_norm": 1.8189125265698487, "learning_rate": 1.2772626730972437e-06, "loss": 0.4841, "step": 7449 }, { "epoch": 0.77, "grad_norm": 1.6978368469146112, "learning_rate": 1.2761390862810907e-06, "loss": 0.6485, "step": 7450 }, { "epoch": 0.77, "grad_norm": 2.2209030633430737, "learning_rate": 1.2750159215857965e-06, "loss": 0.6692, "step": 7451 }, { "epoch": 0.77, "grad_norm": 1.929125071978997, "learning_rate": 1.2738931791386827e-06, "loss": 0.6442, "step": 7452 }, { "epoch": 0.77, "grad_norm": 1.799506153570008, "learning_rate": 1.2727708590670113e-06, "loss": 0.5547, "step": 7453 }, { "epoch": 0.77, "grad_norm": 2.13151128803415, "learning_rate": 1.2716489614980093e-06, "loss": 0.6322, "step": 7454 }, { "epoch": 0.77, "grad_norm": 2.134629474837925, "learning_rate": 1.2705274865588475e-06, "loss": 0.5887, "step": 7455 }, { "epoch": 0.78, "grad_norm": 1.858068512251421, "learning_rate": 1.2694064343766532e-06, "loss": 0.5537, "step": 7456 }, { "epoch": 0.78, "grad_norm": 1.8715567367821404, "learning_rate": 1.2682858050785018e-06, "loss": 0.6063, "step": 7457 }, { "epoch": 0.78, "grad_norm": 2.198960334446777, "learning_rate": 1.2671655987914261e-06, "loss": 0.5857, "step": 7458 }, { "epoch": 0.78, "grad_norm": 2.1149071876653958, "learning_rate": 1.266045815642405e-06, "loss": 0.5134, "step": 7459 }, { "epoch": 0.78, "grad_norm": 2.3036410022633413, "learning_rate": 1.2649264557583758e-06, "loss": 0.7023, "step": 7460 }, { "epoch": 0.78, "grad_norm": 1.725475153183577, "learning_rate": 1.2638075192662196e-06, "loss": 0.5848, "step": 7461 }, { "epoch": 0.78, "grad_norm": 1.7445337006329626, "learning_rate": 1.2626890062927781e-06, "loss": 0.5705, "step": 7462 }, { "epoch": 0.78, "grad_norm": 1.9445544003943256, "learning_rate": 1.2615709169648382e-06, "loss": 0.5653, "step": 7463 }, { "epoch": 0.78, "grad_norm": 1.86634440943093, "learning_rate": 1.2604532514091444e-06, "loss": 0.6699, "step": 7464 }, { "epoch": 0.78, "grad_norm": 1.884500167863542, "learning_rate": 1.2593360097523883e-06, "loss": 0.5513, "step": 7465 }, { "epoch": 0.78, "grad_norm": 2.0358534462218953, "learning_rate": 1.2582191921212172e-06, "loss": 0.6638, "step": 7466 }, { "epoch": 0.78, "grad_norm": 1.9210922659682166, "learning_rate": 1.257102798642229e-06, "loss": 0.5599, "step": 7467 }, { "epoch": 0.78, "grad_norm": 2.0339311660440456, "learning_rate": 1.2559868294419702e-06, "loss": 0.6709, "step": 7468 }, { "epoch": 0.78, "grad_norm": 1.9172549578337978, "learning_rate": 1.2548712846469469e-06, "loss": 0.5646, "step": 7469 }, { "epoch": 0.78, "grad_norm": 1.9444512462886203, "learning_rate": 1.2537561643836087e-06, "loss": 0.6041, "step": 7470 }, { "epoch": 0.78, "grad_norm": 2.0064010781768653, "learning_rate": 1.2526414687783616e-06, "loss": 0.6542, "step": 7471 }, { "epoch": 0.78, "grad_norm": 1.9136862206054561, "learning_rate": 1.2515271979575645e-06, "loss": 0.6535, "step": 7472 }, { "epoch": 0.78, "grad_norm": 1.791221271933324, "learning_rate": 1.2504133520475237e-06, "loss": 0.615, "step": 7473 }, { "epoch": 0.78, "grad_norm": 2.0331463728088317, "learning_rate": 1.249299931174503e-06, "loss": 0.7014, "step": 7474 }, { "epoch": 0.78, "grad_norm": 2.092796442100189, "learning_rate": 1.248186935464713e-06, "loss": 0.6129, "step": 7475 }, { "epoch": 0.78, "grad_norm": 2.0088765568010576, "learning_rate": 1.2470743650443167e-06, "loss": 0.6894, "step": 7476 }, { "epoch": 0.78, "grad_norm": 2.1889027363813027, "learning_rate": 1.2459622200394344e-06, "loss": 0.6256, "step": 7477 }, { "epoch": 0.78, "grad_norm": 1.9459517215941233, "learning_rate": 1.2448505005761297e-06, "loss": 0.6502, "step": 7478 }, { "epoch": 0.78, "grad_norm": 1.958059668778648, "learning_rate": 1.243739206780426e-06, "loss": 0.5418, "step": 7479 }, { "epoch": 0.78, "grad_norm": 2.0181150473837106, "learning_rate": 1.2426283387782916e-06, "loss": 0.686, "step": 7480 }, { "epoch": 0.78, "grad_norm": 1.8056526635316388, "learning_rate": 1.2415178966956531e-06, "loss": 0.6975, "step": 7481 }, { "epoch": 0.78, "grad_norm": 1.9223777564267408, "learning_rate": 1.2404078806583835e-06, "loss": 0.578, "step": 7482 }, { "epoch": 0.78, "grad_norm": 1.87336105244118, "learning_rate": 1.2392982907923096e-06, "loss": 0.5269, "step": 7483 }, { "epoch": 0.78, "grad_norm": 1.8268304700340083, "learning_rate": 1.2381891272232083e-06, "loss": 0.6357, "step": 7484 }, { "epoch": 0.78, "grad_norm": 1.8174515571326288, "learning_rate": 1.237080390076812e-06, "loss": 0.5843, "step": 7485 }, { "epoch": 0.78, "grad_norm": 1.9327780722397199, "learning_rate": 1.2359720794788006e-06, "loss": 0.6239, "step": 7486 }, { "epoch": 0.78, "grad_norm": 2.0991839672424932, "learning_rate": 1.2348641955548096e-06, "loss": 0.6605, "step": 7487 }, { "epoch": 0.78, "grad_norm": 1.8536105464415018, "learning_rate": 1.2337567384304206e-06, "loss": 0.6537, "step": 7488 }, { "epoch": 0.78, "grad_norm": 2.024081612397799, "learning_rate": 1.2326497082311756e-06, "loss": 0.6295, "step": 7489 }, { "epoch": 0.78, "grad_norm": 1.9662894712769203, "learning_rate": 1.231543105082556e-06, "loss": 0.7392, "step": 7490 }, { "epoch": 0.78, "grad_norm": 2.0201598746186793, "learning_rate": 1.230436929110007e-06, "loss": 0.6617, "step": 7491 }, { "epoch": 0.78, "grad_norm": 1.935461672788772, "learning_rate": 1.2293311804389162e-06, "loss": 0.6007, "step": 7492 }, { "epoch": 0.78, "grad_norm": 1.7867631971998374, "learning_rate": 1.2282258591946294e-06, "loss": 0.5533, "step": 7493 }, { "epoch": 0.78, "grad_norm": 1.9480879837331495, "learning_rate": 1.2271209655024386e-06, "loss": 0.5748, "step": 7494 }, { "epoch": 0.78, "grad_norm": 1.8358355416673764, "learning_rate": 1.2260164994875922e-06, "loss": 0.5568, "step": 7495 }, { "epoch": 0.78, "grad_norm": 1.916660508469549, "learning_rate": 1.224912461275287e-06, "loss": 0.5668, "step": 7496 }, { "epoch": 0.78, "grad_norm": 2.1041973386188726, "learning_rate": 1.2238088509906715e-06, "loss": 0.6088, "step": 7497 }, { "epoch": 0.78, "grad_norm": 1.8336319921741646, "learning_rate": 1.2227056687588445e-06, "loss": 0.6178, "step": 7498 }, { "epoch": 0.78, "grad_norm": 1.9408959767507967, "learning_rate": 1.221602914704862e-06, "loss": 0.6562, "step": 7499 }, { "epoch": 0.78, "grad_norm": 1.982054454174401, "learning_rate": 1.2205005889537231e-06, "loss": 0.6758, "step": 7500 }, { "epoch": 0.78, "grad_norm": 2.242903224347222, "learning_rate": 1.2193986916303862e-06, "loss": 0.6497, "step": 7501 }, { "epoch": 0.78, "grad_norm": 1.696679047260719, "learning_rate": 1.2182972228597555e-06, "loss": 0.6423, "step": 7502 }, { "epoch": 0.78, "grad_norm": 2.4159106630224647, "learning_rate": 1.2171961827666907e-06, "loss": 0.6576, "step": 7503 }, { "epoch": 0.78, "grad_norm": 2.15668495278231, "learning_rate": 1.2160955714759997e-06, "loss": 0.5918, "step": 7504 }, { "epoch": 0.78, "grad_norm": 2.082528155431938, "learning_rate": 1.2149953891124423e-06, "loss": 0.6091, "step": 7505 }, { "epoch": 0.78, "grad_norm": 1.9378865879606009, "learning_rate": 1.2138956358007325e-06, "loss": 0.6194, "step": 7506 }, { "epoch": 0.78, "grad_norm": 1.9807419537639468, "learning_rate": 1.2127963116655323e-06, "loss": 0.563, "step": 7507 }, { "epoch": 0.78, "grad_norm": 1.9001140039929365, "learning_rate": 1.2116974168314549e-06, "loss": 0.5075, "step": 7508 }, { "epoch": 0.78, "grad_norm": 1.9044446353892586, "learning_rate": 1.2105989514230699e-06, "loss": 0.5632, "step": 7509 }, { "epoch": 0.78, "grad_norm": 1.9036438238927869, "learning_rate": 1.2095009155648908e-06, "loss": 0.6259, "step": 7510 }, { "epoch": 0.78, "grad_norm": 2.0289114282767042, "learning_rate": 1.2084033093813897e-06, "loss": 0.6061, "step": 7511 }, { "epoch": 0.78, "grad_norm": 1.9769797796713287, "learning_rate": 1.2073061329969843e-06, "loss": 0.5597, "step": 7512 }, { "epoch": 0.78, "grad_norm": 1.829432862973179, "learning_rate": 1.2062093865360458e-06, "loss": 0.5307, "step": 7513 }, { "epoch": 0.78, "grad_norm": 1.9966239074516121, "learning_rate": 1.205113070122898e-06, "loss": 0.6177, "step": 7514 }, { "epoch": 0.78, "grad_norm": 2.0151026946951824, "learning_rate": 1.2040171838818128e-06, "loss": 0.5806, "step": 7515 }, { "epoch": 0.78, "grad_norm": 1.9245838703900684, "learning_rate": 1.202921727937017e-06, "loss": 0.5436, "step": 7516 }, { "epoch": 0.78, "grad_norm": 2.3269530223457475, "learning_rate": 1.201826702412685e-06, "loss": 0.6014, "step": 7517 }, { "epoch": 0.78, "grad_norm": 2.0196480903460583, "learning_rate": 1.2007321074329464e-06, "loss": 0.6542, "step": 7518 }, { "epoch": 0.78, "grad_norm": 2.0366225641189617, "learning_rate": 1.1996379431218792e-06, "loss": 0.5832, "step": 7519 }, { "epoch": 0.78, "grad_norm": 1.8691559040156394, "learning_rate": 1.1985442096035116e-06, "loss": 0.5792, "step": 7520 }, { "epoch": 0.78, "grad_norm": 1.97785016962828, "learning_rate": 1.1974509070018242e-06, "loss": 0.6102, "step": 7521 }, { "epoch": 0.78, "grad_norm": 1.909391688954166, "learning_rate": 1.1963580354407523e-06, "loss": 0.5965, "step": 7522 }, { "epoch": 0.78, "grad_norm": 1.8397548132442958, "learning_rate": 1.195265595044175e-06, "loss": 0.5612, "step": 7523 }, { "epoch": 0.78, "grad_norm": 1.8519815774481667, "learning_rate": 1.1941735859359305e-06, "loss": 0.6289, "step": 7524 }, { "epoch": 0.78, "grad_norm": 1.9325455805100002, "learning_rate": 1.193082008239801e-06, "loss": 0.7232, "step": 7525 }, { "epoch": 0.78, "grad_norm": 1.7899558628801764, "learning_rate": 1.1919908620795274e-06, "loss": 0.6425, "step": 7526 }, { "epoch": 0.78, "grad_norm": 1.7674583673545878, "learning_rate": 1.1909001475787917e-06, "loss": 0.6329, "step": 7527 }, { "epoch": 0.78, "grad_norm": 1.9014708835524068, "learning_rate": 1.189809864861237e-06, "loss": 0.5598, "step": 7528 }, { "epoch": 0.78, "grad_norm": 1.9850467848833522, "learning_rate": 1.1887200140504496e-06, "loss": 0.6087, "step": 7529 }, { "epoch": 0.78, "grad_norm": 1.8719486639364562, "learning_rate": 1.187630595269974e-06, "loss": 0.6111, "step": 7530 }, { "epoch": 0.78, "grad_norm": 1.8776002051132321, "learning_rate": 1.186541608643299e-06, "loss": 0.6164, "step": 7531 }, { "epoch": 0.78, "grad_norm": 2.10496286194707, "learning_rate": 1.1854530542938697e-06, "loss": 0.6653, "step": 7532 }, { "epoch": 0.78, "grad_norm": 1.792696936223161, "learning_rate": 1.184364932345079e-06, "loss": 0.6264, "step": 7533 }, { "epoch": 0.78, "grad_norm": 2.062502687433275, "learning_rate": 1.1832772429202716e-06, "loss": 0.6956, "step": 7534 }, { "epoch": 0.78, "grad_norm": 2.051811267436275, "learning_rate": 1.1821899861427415e-06, "loss": 0.6218, "step": 7535 }, { "epoch": 0.78, "grad_norm": 1.8590354079413396, "learning_rate": 1.1811031621357388e-06, "loss": 0.5887, "step": 7536 }, { "epoch": 0.78, "grad_norm": 1.8694047326150192, "learning_rate": 1.1800167710224585e-06, "loss": 0.5381, "step": 7537 }, { "epoch": 0.78, "grad_norm": 2.0369985151819887, "learning_rate": 1.1789308129260518e-06, "loss": 0.6614, "step": 7538 }, { "epoch": 0.78, "grad_norm": 2.024847817291823, "learning_rate": 1.1778452879696156e-06, "loss": 0.5751, "step": 7539 }, { "epoch": 0.78, "grad_norm": 2.0143477964838166, "learning_rate": 1.1767601962762025e-06, "loss": 0.668, "step": 7540 }, { "epoch": 0.78, "grad_norm": 2.1439637308422053, "learning_rate": 1.1756755379688133e-06, "loss": 0.6613, "step": 7541 }, { "epoch": 0.78, "grad_norm": 2.128249539956141, "learning_rate": 1.1745913131703983e-06, "loss": 0.594, "step": 7542 }, { "epoch": 0.78, "grad_norm": 2.042668732426268, "learning_rate": 1.1735075220038634e-06, "loss": 0.6774, "step": 7543 }, { "epoch": 0.78, "grad_norm": 1.7655693783203028, "learning_rate": 1.1724241645920597e-06, "loss": 0.5097, "step": 7544 }, { "epoch": 0.78, "grad_norm": 1.9980257594538535, "learning_rate": 1.1713412410577947e-06, "loss": 0.5953, "step": 7545 }, { "epoch": 0.78, "grad_norm": 1.9020372671647598, "learning_rate": 1.1702587515238228e-06, "loss": 0.6078, "step": 7546 }, { "epoch": 0.78, "grad_norm": 1.84747773445059, "learning_rate": 1.1691766961128486e-06, "loss": 0.6325, "step": 7547 }, { "epoch": 0.78, "grad_norm": 2.1840723467663334, "learning_rate": 1.1680950749475328e-06, "loss": 0.4747, "step": 7548 }, { "epoch": 0.78, "grad_norm": 2.01935125924819, "learning_rate": 1.1670138881504811e-06, "loss": 0.6862, "step": 7549 }, { "epoch": 0.78, "grad_norm": 1.790776100057919, "learning_rate": 1.165933135844251e-06, "loss": 0.6148, "step": 7550 }, { "epoch": 0.78, "grad_norm": 1.9127701595519246, "learning_rate": 1.1648528181513546e-06, "loss": 0.6695, "step": 7551 }, { "epoch": 0.79, "grad_norm": 1.8752750795920698, "learning_rate": 1.1637729351942496e-06, "loss": 0.6208, "step": 7552 }, { "epoch": 0.79, "grad_norm": 2.0838979472639543, "learning_rate": 1.16269348709535e-06, "loss": 0.6042, "step": 7553 }, { "epoch": 0.79, "grad_norm": 1.870248031496323, "learning_rate": 1.1616144739770134e-06, "loss": 0.5422, "step": 7554 }, { "epoch": 0.79, "grad_norm": 1.985975392199706, "learning_rate": 1.1605358959615559e-06, "loss": 0.6439, "step": 7555 }, { "epoch": 0.79, "grad_norm": 2.142807668782956, "learning_rate": 1.1594577531712392e-06, "loss": 0.638, "step": 7556 }, { "epoch": 0.79, "grad_norm": 1.8963683455946494, "learning_rate": 1.1583800457282763e-06, "loss": 0.641, "step": 7557 }, { "epoch": 0.79, "grad_norm": 1.8549487285628472, "learning_rate": 1.1573027737548304e-06, "loss": 0.5158, "step": 7558 }, { "epoch": 0.79, "grad_norm": 1.7469422801211996, "learning_rate": 1.156225937373019e-06, "loss": 0.5104, "step": 7559 }, { "epoch": 0.79, "grad_norm": 2.0926111621878314, "learning_rate": 1.1551495367049047e-06, "loss": 0.7087, "step": 7560 }, { "epoch": 0.79, "grad_norm": 2.126301992165716, "learning_rate": 1.154073571872507e-06, "loss": 0.6058, "step": 7561 }, { "epoch": 0.79, "grad_norm": 2.012168076568665, "learning_rate": 1.1529980429977899e-06, "loss": 0.6645, "step": 7562 }, { "epoch": 0.79, "grad_norm": 1.7738548550605582, "learning_rate": 1.151922950202674e-06, "loss": 0.6511, "step": 7563 }, { "epoch": 0.79, "grad_norm": 1.8809818116839208, "learning_rate": 1.1508482936090226e-06, "loss": 0.6599, "step": 7564 }, { "epoch": 0.79, "grad_norm": 1.9829487911729462, "learning_rate": 1.149774073338658e-06, "loss": 0.6738, "step": 7565 }, { "epoch": 0.79, "grad_norm": 2.1298374139158023, "learning_rate": 1.1487002895133458e-06, "loss": 0.6807, "step": 7566 }, { "epoch": 0.79, "grad_norm": 1.7396917186181862, "learning_rate": 1.1476269422548097e-06, "loss": 0.5649, "step": 7567 }, { "epoch": 0.79, "grad_norm": 2.163951307025049, "learning_rate": 1.1465540316847158e-06, "loss": 0.6927, "step": 7568 }, { "epoch": 0.79, "grad_norm": 1.7608713500076538, "learning_rate": 1.1454815579246874e-06, "loss": 0.5432, "step": 7569 }, { "epoch": 0.79, "grad_norm": 1.701479430283739, "learning_rate": 1.1444095210962946e-06, "loss": 0.5278, "step": 7570 }, { "epoch": 0.79, "grad_norm": 1.9439676814040012, "learning_rate": 1.1433379213210589e-06, "loss": 0.6163, "step": 7571 }, { "epoch": 0.79, "grad_norm": 2.041557258404316, "learning_rate": 1.14226675872045e-06, "loss": 0.5878, "step": 7572 }, { "epoch": 0.79, "grad_norm": 1.8275477847578068, "learning_rate": 1.1411960334158945e-06, "loss": 0.5765, "step": 7573 }, { "epoch": 0.79, "grad_norm": 1.9882523933445255, "learning_rate": 1.1401257455287612e-06, "loss": 0.6118, "step": 7574 }, { "epoch": 0.79, "grad_norm": 2.1243807472913923, "learning_rate": 1.1390558951803765e-06, "loss": 0.5873, "step": 7575 }, { "epoch": 0.79, "grad_norm": 2.036793715513729, "learning_rate": 1.1379864824920116e-06, "loss": 0.6901, "step": 7576 }, { "epoch": 0.79, "grad_norm": 2.0003545707561745, "learning_rate": 1.1369175075848931e-06, "loss": 0.6905, "step": 7577 }, { "epoch": 0.79, "grad_norm": 1.8163317896843112, "learning_rate": 1.135848970580194e-06, "loss": 0.5174, "step": 7578 }, { "epoch": 0.79, "grad_norm": 1.9890937817343028, "learning_rate": 1.1347808715990377e-06, "loss": 0.6099, "step": 7579 }, { "epoch": 0.79, "grad_norm": 2.0858479558113454, "learning_rate": 1.1337132107625015e-06, "loss": 0.5826, "step": 7580 }, { "epoch": 0.79, "grad_norm": 1.9155881518121252, "learning_rate": 1.1326459881916091e-06, "loss": 0.6876, "step": 7581 }, { "epoch": 0.79, "grad_norm": 1.9989956883343196, "learning_rate": 1.1315792040073381e-06, "loss": 0.6681, "step": 7582 }, { "epoch": 0.79, "grad_norm": 1.9550742344176189, "learning_rate": 1.1305128583306125e-06, "loss": 0.6161, "step": 7583 }, { "epoch": 0.79, "grad_norm": 2.0103579349657963, "learning_rate": 1.1294469512823109e-06, "loss": 0.6299, "step": 7584 }, { "epoch": 0.79, "grad_norm": 1.9054575542751568, "learning_rate": 1.128381482983259e-06, "loss": 0.6079, "step": 7585 }, { "epoch": 0.79, "grad_norm": 2.1391755475909, "learning_rate": 1.1273164535542336e-06, "loss": 0.6313, "step": 7586 }, { "epoch": 0.79, "grad_norm": 1.981591108859556, "learning_rate": 1.1262518631159602e-06, "loss": 0.5913, "step": 7587 }, { "epoch": 0.79, "grad_norm": 2.1379326081176777, "learning_rate": 1.125187711789119e-06, "loss": 0.6499, "step": 7588 }, { "epoch": 0.79, "grad_norm": 1.982491035473915, "learning_rate": 1.1241239996943348e-06, "loss": 0.6315, "step": 7589 }, { "epoch": 0.79, "grad_norm": 2.2170410264953224, "learning_rate": 1.1230607269521886e-06, "loss": 0.6647, "step": 7590 }, { "epoch": 0.79, "grad_norm": 2.1017656060939376, "learning_rate": 1.1219978936832054e-06, "loss": 0.6512, "step": 7591 }, { "epoch": 0.79, "grad_norm": 1.9103803015382639, "learning_rate": 1.1209355000078664e-06, "loss": 0.5763, "step": 7592 }, { "epoch": 0.79, "grad_norm": 1.8881668957666067, "learning_rate": 1.1198735460465987e-06, "loss": 0.5324, "step": 7593 }, { "epoch": 0.79, "grad_norm": 2.1092691410374727, "learning_rate": 1.1188120319197798e-06, "loss": 0.7296, "step": 7594 }, { "epoch": 0.79, "grad_norm": 1.7952823369616315, "learning_rate": 1.117750957747738e-06, "loss": 0.5827, "step": 7595 }, { "epoch": 0.79, "grad_norm": 1.9802795486580884, "learning_rate": 1.1166903236507549e-06, "loss": 0.6069, "step": 7596 }, { "epoch": 0.79, "grad_norm": 1.9392038562759872, "learning_rate": 1.1156301297490563e-06, "loss": 0.6014, "step": 7597 }, { "epoch": 0.79, "grad_norm": 2.0103638691186676, "learning_rate": 1.1145703761628234e-06, "loss": 0.6243, "step": 7598 }, { "epoch": 0.79, "grad_norm": 1.854549954527706, "learning_rate": 1.1135110630121837e-06, "loss": 0.5502, "step": 7599 }, { "epoch": 0.79, "grad_norm": 2.021354607363837, "learning_rate": 1.1124521904172202e-06, "loss": 0.5726, "step": 7600 }, { "epoch": 0.79, "grad_norm": 1.9661146192099226, "learning_rate": 1.1113937584979561e-06, "loss": 0.6839, "step": 7601 }, { "epoch": 0.79, "grad_norm": 1.7591677360991955, "learning_rate": 1.1103357673743752e-06, "loss": 0.6131, "step": 7602 }, { "epoch": 0.79, "grad_norm": 1.9398835866330275, "learning_rate": 1.109278217166404e-06, "loss": 0.6187, "step": 7603 }, { "epoch": 0.79, "grad_norm": 1.9393774468410334, "learning_rate": 1.1082211079939248e-06, "loss": 0.5964, "step": 7604 }, { "epoch": 0.79, "grad_norm": 2.067156283501053, "learning_rate": 1.107164439976764e-06, "loss": 0.7105, "step": 7605 }, { "epoch": 0.79, "grad_norm": 1.9167409651719103, "learning_rate": 1.106108213234704e-06, "loss": 0.656, "step": 7606 }, { "epoch": 0.79, "grad_norm": 2.1560390878979914, "learning_rate": 1.105052427887472e-06, "loss": 0.6422, "step": 7607 }, { "epoch": 0.79, "grad_norm": 2.118078967090903, "learning_rate": 1.1039970840547464e-06, "loss": 0.6541, "step": 7608 }, { "epoch": 0.79, "grad_norm": 1.8508371466187719, "learning_rate": 1.1029421818561592e-06, "loss": 0.4905, "step": 7609 }, { "epoch": 0.79, "grad_norm": 1.9440624024422692, "learning_rate": 1.1018877214112883e-06, "loss": 0.6826, "step": 7610 }, { "epoch": 0.79, "grad_norm": 1.849950642377605, "learning_rate": 1.1008337028396616e-06, "loss": 0.5475, "step": 7611 }, { "epoch": 0.79, "grad_norm": 1.9160204717719853, "learning_rate": 1.0997801262607599e-06, "loss": 0.6415, "step": 7612 }, { "epoch": 0.79, "grad_norm": 1.9539927844434095, "learning_rate": 1.0987269917940107e-06, "loss": 0.6261, "step": 7613 }, { "epoch": 0.79, "grad_norm": 2.1191670091655235, "learning_rate": 1.0976742995587941e-06, "loss": 0.6103, "step": 7614 }, { "epoch": 0.79, "grad_norm": 2.2618502739095514, "learning_rate": 1.096622049674439e-06, "loss": 0.5837, "step": 7615 }, { "epoch": 0.79, "grad_norm": 1.8741698346992601, "learning_rate": 1.095570242260221e-06, "loss": 0.6731, "step": 7616 }, { "epoch": 0.79, "grad_norm": 1.9071579986938094, "learning_rate": 1.094518877435372e-06, "loss": 0.5906, "step": 7617 }, { "epoch": 0.79, "grad_norm": 1.8615446372298599, "learning_rate": 1.093467955319068e-06, "loss": 0.5217, "step": 7618 }, { "epoch": 0.79, "grad_norm": 1.9575245781215822, "learning_rate": 1.0924174760304385e-06, "loss": 0.5396, "step": 7619 }, { "epoch": 0.79, "grad_norm": 1.8968632493253208, "learning_rate": 1.0913674396885598e-06, "loss": 0.5562, "step": 7620 }, { "epoch": 0.79, "grad_norm": 1.9487478206224738, "learning_rate": 1.090317846412461e-06, "loss": 0.6184, "step": 7621 }, { "epoch": 0.79, "grad_norm": 2.0424190189706133, "learning_rate": 1.0892686963211191e-06, "loss": 0.6212, "step": 7622 }, { "epoch": 0.79, "grad_norm": 2.174634093253347, "learning_rate": 1.0882199895334605e-06, "loss": 0.641, "step": 7623 }, { "epoch": 0.79, "grad_norm": 1.8967766592892383, "learning_rate": 1.0871717261683619e-06, "loss": 0.5959, "step": 7624 }, { "epoch": 0.79, "grad_norm": 2.0868235703345177, "learning_rate": 1.0861239063446511e-06, "loss": 0.6228, "step": 7625 }, { "epoch": 0.79, "grad_norm": 1.8227671520819473, "learning_rate": 1.0850765301811028e-06, "loss": 0.5928, "step": 7626 }, { "epoch": 0.79, "grad_norm": 1.9844970398015163, "learning_rate": 1.0840295977964454e-06, "loss": 0.7122, "step": 7627 }, { "epoch": 0.79, "grad_norm": 1.8711686783888133, "learning_rate": 1.0829831093093524e-06, "loss": 0.5506, "step": 7628 }, { "epoch": 0.79, "grad_norm": 2.045619614830849, "learning_rate": 1.0819370648384525e-06, "loss": 0.7085, "step": 7629 }, { "epoch": 0.79, "grad_norm": 1.7682681907229623, "learning_rate": 1.080891464502316e-06, "loss": 0.6398, "step": 7630 }, { "epoch": 0.79, "grad_norm": 1.7360472338932105, "learning_rate": 1.0798463084194715e-06, "loss": 0.5867, "step": 7631 }, { "epoch": 0.79, "grad_norm": 2.167967567642517, "learning_rate": 1.0788015967083904e-06, "loss": 0.693, "step": 7632 }, { "epoch": 0.79, "grad_norm": 1.8159747786768616, "learning_rate": 1.0777573294875005e-06, "loss": 0.5595, "step": 7633 }, { "epoch": 0.79, "grad_norm": 1.8337688164202244, "learning_rate": 1.076713506875171e-06, "loss": 0.6175, "step": 7634 }, { "epoch": 0.79, "grad_norm": 1.7006940068561807, "learning_rate": 1.0756701289897298e-06, "loss": 0.5631, "step": 7635 }, { "epoch": 0.79, "grad_norm": 1.8017376813538655, "learning_rate": 1.0746271959494453e-06, "loss": 0.5882, "step": 7636 }, { "epoch": 0.79, "grad_norm": 1.789860603798531, "learning_rate": 1.0735847078725452e-06, "loss": 0.6337, "step": 7637 }, { "epoch": 0.79, "grad_norm": 1.973652062162871, "learning_rate": 1.0725426648771952e-06, "loss": 0.6533, "step": 7638 }, { "epoch": 0.79, "grad_norm": 1.978809763923317, "learning_rate": 1.0715010670815212e-06, "loss": 0.5568, "step": 7639 }, { "epoch": 0.79, "grad_norm": 1.7844382460604202, "learning_rate": 1.070459914603592e-06, "loss": 0.5602, "step": 7640 }, { "epoch": 0.79, "grad_norm": 1.8403111784857593, "learning_rate": 1.0694192075614302e-06, "loss": 0.6194, "step": 7641 }, { "epoch": 0.79, "grad_norm": 1.7334240782019665, "learning_rate": 1.0683789460730037e-06, "loss": 0.5426, "step": 7642 }, { "epoch": 0.79, "grad_norm": 1.685316731739376, "learning_rate": 1.0673391302562342e-06, "loss": 0.4805, "step": 7643 }, { "epoch": 0.79, "grad_norm": 1.869559308757824, "learning_rate": 1.0662997602289899e-06, "loss": 0.7268, "step": 7644 }, { "epoch": 0.79, "grad_norm": 2.16910751822174, "learning_rate": 1.0652608361090877e-06, "loss": 0.6145, "step": 7645 }, { "epoch": 0.79, "grad_norm": 1.923836692448067, "learning_rate": 1.0642223580142985e-06, "loss": 0.5563, "step": 7646 }, { "epoch": 0.79, "grad_norm": 1.9053312003550178, "learning_rate": 1.0631843260623382e-06, "loss": 0.5602, "step": 7647 }, { "epoch": 0.8, "grad_norm": 1.9624951568475208, "learning_rate": 1.0621467403708718e-06, "loss": 0.5808, "step": 7648 }, { "epoch": 0.8, "grad_norm": 1.887605733437194, "learning_rate": 1.0611096010575196e-06, "loss": 0.632, "step": 7649 }, { "epoch": 0.8, "grad_norm": 1.9203647513784456, "learning_rate": 1.0600729082398425e-06, "loss": 0.6598, "step": 7650 }, { "epoch": 0.8, "grad_norm": 1.8767373807747643, "learning_rate": 1.0590366620353604e-06, "loss": 0.598, "step": 7651 }, { "epoch": 0.8, "grad_norm": 1.8056123438665657, "learning_rate": 1.058000862561535e-06, "loss": 0.5871, "step": 7652 }, { "epoch": 0.8, "grad_norm": 2.125561482859971, "learning_rate": 1.0569655099357795e-06, "loss": 0.6351, "step": 7653 }, { "epoch": 0.8, "grad_norm": 2.0398054017931213, "learning_rate": 1.0559306042754591e-06, "loss": 0.6699, "step": 7654 }, { "epoch": 0.8, "grad_norm": 2.0576078182108186, "learning_rate": 1.0548961456978835e-06, "loss": 0.6127, "step": 7655 }, { "epoch": 0.8, "grad_norm": 1.9026327912259209, "learning_rate": 1.0538621343203176e-06, "loss": 0.5675, "step": 7656 }, { "epoch": 0.8, "grad_norm": 2.0639922430174535, "learning_rate": 1.05282857025997e-06, "loss": 0.6958, "step": 7657 }, { "epoch": 0.8, "grad_norm": 2.112511747733857, "learning_rate": 1.051795453634003e-06, "loss": 0.6881, "step": 7658 }, { "epoch": 0.8, "grad_norm": 2.0806728442724602, "learning_rate": 1.0507627845595259e-06, "loss": 0.6517, "step": 7659 }, { "epoch": 0.8, "grad_norm": 1.8655768403232256, "learning_rate": 1.049730563153597e-06, "loss": 0.5419, "step": 7660 }, { "epoch": 0.8, "grad_norm": 2.107389670056977, "learning_rate": 1.0486987895332229e-06, "loss": 0.6122, "step": 7661 }, { "epoch": 0.8, "grad_norm": 2.001585005845773, "learning_rate": 1.0476674638153638e-06, "loss": 0.6874, "step": 7662 }, { "epoch": 0.8, "grad_norm": 1.8022025145340081, "learning_rate": 1.0466365861169242e-06, "loss": 0.5743, "step": 7663 }, { "epoch": 0.8, "grad_norm": 2.25061615492626, "learning_rate": 1.045606156554762e-06, "loss": 0.6585, "step": 7664 }, { "epoch": 0.8, "grad_norm": 1.8060804364597933, "learning_rate": 1.0445761752456806e-06, "loss": 0.5762, "step": 7665 }, { "epoch": 0.8, "grad_norm": 1.9562870642355572, "learning_rate": 1.0435466423064373e-06, "loss": 0.5267, "step": 7666 }, { "epoch": 0.8, "grad_norm": 1.8288657824591275, "learning_rate": 1.04251755785373e-06, "loss": 0.5505, "step": 7667 }, { "epoch": 0.8, "grad_norm": 1.9565933117918024, "learning_rate": 1.0414889220042163e-06, "loss": 0.6889, "step": 7668 }, { "epoch": 0.8, "grad_norm": 2.0077080060650956, "learning_rate": 1.0404607348744943e-06, "loss": 0.6004, "step": 7669 }, { "epoch": 0.8, "grad_norm": 1.8100825225512873, "learning_rate": 1.0394329965811178e-06, "loss": 0.5825, "step": 7670 }, { "epoch": 0.8, "grad_norm": 1.9809824856030736, "learning_rate": 1.038405707240585e-06, "loss": 0.5185, "step": 7671 }, { "epoch": 0.8, "grad_norm": 1.9217501283416114, "learning_rate": 1.0373788669693464e-06, "loss": 0.5785, "step": 7672 }, { "epoch": 0.8, "grad_norm": 2.056980482387888, "learning_rate": 1.0363524758837984e-06, "loss": 0.7201, "step": 7673 }, { "epoch": 0.8, "grad_norm": 2.0266442614164544, "learning_rate": 1.0353265341002916e-06, "loss": 0.5002, "step": 7674 }, { "epoch": 0.8, "grad_norm": 2.0009831878190654, "learning_rate": 1.034301041735118e-06, "loss": 0.5331, "step": 7675 }, { "epoch": 0.8, "grad_norm": 2.095824252013779, "learning_rate": 1.0332759989045254e-06, "loss": 0.6838, "step": 7676 }, { "epoch": 0.8, "grad_norm": 1.7209335276516577, "learning_rate": 1.0322514057247075e-06, "loss": 0.6151, "step": 7677 }, { "epoch": 0.8, "grad_norm": 1.9503500468360193, "learning_rate": 1.031227262311809e-06, "loss": 0.6328, "step": 7678 }, { "epoch": 0.8, "grad_norm": 1.9867515395161024, "learning_rate": 1.0302035687819202e-06, "loss": 0.6623, "step": 7679 }, { "epoch": 0.8, "grad_norm": 1.9160205255434155, "learning_rate": 1.0291803252510857e-06, "loss": 0.6412, "step": 7680 }, { "epoch": 0.8, "grad_norm": 1.9044848440465423, "learning_rate": 1.0281575318352937e-06, "loss": 0.5959, "step": 7681 }, { "epoch": 0.8, "grad_norm": 1.9017102445371714, "learning_rate": 1.0271351886504832e-06, "loss": 0.6255, "step": 7682 }, { "epoch": 0.8, "grad_norm": 1.9617797412488593, "learning_rate": 1.0261132958125452e-06, "loss": 0.5663, "step": 7683 }, { "epoch": 0.8, "grad_norm": 1.8326899544229418, "learning_rate": 1.025091853437314e-06, "loss": 0.5486, "step": 7684 }, { "epoch": 0.8, "grad_norm": 1.9429118963302403, "learning_rate": 1.0240708616405788e-06, "loss": 0.6459, "step": 7685 }, { "epoch": 0.8, "grad_norm": 1.9982778451386498, "learning_rate": 1.0230503205380732e-06, "loss": 0.6218, "step": 7686 }, { "epoch": 0.8, "grad_norm": 2.0367479976721032, "learning_rate": 1.0220302302454804e-06, "loss": 0.6262, "step": 7687 }, { "epoch": 0.8, "grad_norm": 2.1004913104077994, "learning_rate": 1.0210105908784362e-06, "loss": 0.6363, "step": 7688 }, { "epoch": 0.8, "grad_norm": 2.196885005810578, "learning_rate": 1.019991402552521e-06, "loss": 0.6211, "step": 7689 }, { "epoch": 0.8, "grad_norm": 2.0510349626748896, "learning_rate": 1.0189726653832637e-06, "loss": 0.5913, "step": 7690 }, { "epoch": 0.8, "grad_norm": 2.062389403716988, "learning_rate": 1.017954379486148e-06, "loss": 0.6006, "step": 7691 }, { "epoch": 0.8, "grad_norm": 2.000958354903393, "learning_rate": 1.0169365449765982e-06, "loss": 0.6191, "step": 7692 }, { "epoch": 0.8, "grad_norm": 2.1648733827702418, "learning_rate": 1.0159191619699955e-06, "loss": 0.6194, "step": 7693 }, { "epoch": 0.8, "grad_norm": 1.7312236576166993, "learning_rate": 1.014902230581663e-06, "loss": 0.5829, "step": 7694 }, { "epoch": 0.8, "grad_norm": 1.9281553571908774, "learning_rate": 1.0138857509268784e-06, "loss": 0.6326, "step": 7695 }, { "epoch": 0.8, "grad_norm": 1.6412211399878196, "learning_rate": 1.012869723120864e-06, "loss": 0.5582, "step": 7696 }, { "epoch": 0.8, "grad_norm": 2.005045612911496, "learning_rate": 1.0118541472787918e-06, "loss": 0.5659, "step": 7697 }, { "epoch": 0.8, "grad_norm": 1.9229217799978204, "learning_rate": 1.0108390235157828e-06, "loss": 0.5267, "step": 7698 }, { "epoch": 0.8, "grad_norm": 2.000719179387221, "learning_rate": 1.0098243519469091e-06, "loss": 0.5253, "step": 7699 }, { "epoch": 0.8, "grad_norm": 2.0842973123188866, "learning_rate": 1.0088101326871873e-06, "loss": 0.6451, "step": 7700 }, { "epoch": 0.8, "grad_norm": 1.6323359685648406, "learning_rate": 1.0077963658515872e-06, "loss": 0.531, "step": 7701 }, { "epoch": 0.8, "grad_norm": 2.0540576166204314, "learning_rate": 1.0067830515550224e-06, "loss": 0.6194, "step": 7702 }, { "epoch": 0.8, "grad_norm": 1.8564346985220948, "learning_rate": 1.0057701899123622e-06, "loss": 0.7364, "step": 7703 }, { "epoch": 0.8, "grad_norm": 1.7651410118055617, "learning_rate": 1.0047577810384146e-06, "loss": 0.6406, "step": 7704 }, { "epoch": 0.8, "grad_norm": 1.7861292875990769, "learning_rate": 1.003745825047946e-06, "loss": 0.5525, "step": 7705 }, { "epoch": 0.8, "grad_norm": 1.7860974870586726, "learning_rate": 1.002734322055664e-06, "loss": 0.604, "step": 7706 }, { "epoch": 0.8, "grad_norm": 1.9245464161068735, "learning_rate": 1.0017232721762322e-06, "loss": 0.6887, "step": 7707 }, { "epoch": 0.8, "grad_norm": 2.245870208702652, "learning_rate": 1.0007126755242557e-06, "loss": 0.5527, "step": 7708 }, { "epoch": 0.8, "grad_norm": 1.9829272032721494, "learning_rate": 9.997025322142934e-07, "loss": 0.5582, "step": 7709 }, { "epoch": 0.8, "grad_norm": 1.9923017209495424, "learning_rate": 9.986928423608493e-07, "loss": 0.647, "step": 7710 }, { "epoch": 0.8, "grad_norm": 2.027249908466499, "learning_rate": 9.976836060783806e-07, "loss": 0.6092, "step": 7711 }, { "epoch": 0.8, "grad_norm": 1.8182521544541843, "learning_rate": 9.966748234812845e-07, "loss": 0.5714, "step": 7712 }, { "epoch": 0.8, "grad_norm": 2.1289844169817194, "learning_rate": 9.956664946839173e-07, "loss": 0.5971, "step": 7713 }, { "epoch": 0.8, "grad_norm": 2.2228604368044356, "learning_rate": 9.946586198005754e-07, "loss": 0.6114, "step": 7714 }, { "epoch": 0.8, "grad_norm": 1.736074800532946, "learning_rate": 9.9365119894551e-07, "loss": 0.6053, "step": 7715 }, { "epoch": 0.8, "grad_norm": 2.0883144500735913, "learning_rate": 9.92644232232915e-07, "loss": 0.6102, "step": 7716 }, { "epoch": 0.8, "grad_norm": 2.0457525870447477, "learning_rate": 9.91637719776939e-07, "loss": 0.7014, "step": 7717 }, { "epoch": 0.8, "grad_norm": 1.9544938159774565, "learning_rate": 9.906316616916745e-07, "loss": 0.5763, "step": 7718 }, { "epoch": 0.8, "grad_norm": 1.7888980454640862, "learning_rate": 9.89626058091162e-07, "loss": 0.5796, "step": 7719 }, { "epoch": 0.8, "grad_norm": 2.138621337538482, "learning_rate": 9.886209090893955e-07, "loss": 0.6714, "step": 7720 }, { "epoch": 0.8, "grad_norm": 1.8656774932599596, "learning_rate": 9.876162148003121e-07, "loss": 0.5886, "step": 7721 }, { "epoch": 0.8, "grad_norm": 2.0270285492211597, "learning_rate": 9.866119753378018e-07, "loss": 0.6527, "step": 7722 }, { "epoch": 0.8, "grad_norm": 2.0333059598856047, "learning_rate": 9.856081908156984e-07, "loss": 0.6542, "step": 7723 }, { "epoch": 0.8, "grad_norm": 1.764098539500526, "learning_rate": 9.846048613477894e-07, "loss": 0.5076, "step": 7724 }, { "epoch": 0.8, "grad_norm": 1.8576540431009838, "learning_rate": 9.836019870478058e-07, "loss": 0.5937, "step": 7725 }, { "epoch": 0.8, "grad_norm": 1.8921832430445682, "learning_rate": 9.825995680294298e-07, "loss": 0.5207, "step": 7726 }, { "epoch": 0.8, "grad_norm": 1.8915147722807397, "learning_rate": 9.815976044062902e-07, "loss": 0.5271, "step": 7727 }, { "epoch": 0.8, "grad_norm": 1.8331960578185176, "learning_rate": 9.80596096291967e-07, "loss": 0.6478, "step": 7728 }, { "epoch": 0.8, "grad_norm": 1.7613187657217821, "learning_rate": 9.795950437999852e-07, "loss": 0.5245, "step": 7729 }, { "epoch": 0.8, "grad_norm": 2.0307503138525873, "learning_rate": 9.785944470438218e-07, "loss": 0.6444, "step": 7730 }, { "epoch": 0.8, "grad_norm": 1.7686848346745405, "learning_rate": 9.775943061368982e-07, "loss": 0.5941, "step": 7731 }, { "epoch": 0.8, "grad_norm": 2.345642126924032, "learning_rate": 9.765946211925882e-07, "loss": 0.6443, "step": 7732 }, { "epoch": 0.8, "grad_norm": 1.9272931213689597, "learning_rate": 9.755953923242102e-07, "loss": 0.5868, "step": 7733 }, { "epoch": 0.8, "grad_norm": 1.9544735196473395, "learning_rate": 9.74596619645033e-07, "loss": 0.6951, "step": 7734 }, { "epoch": 0.8, "grad_norm": 1.9460700117324157, "learning_rate": 9.735983032682716e-07, "loss": 0.6072, "step": 7735 }, { "epoch": 0.8, "grad_norm": 2.1090367377616435, "learning_rate": 9.726004433070935e-07, "loss": 0.7047, "step": 7736 }, { "epoch": 0.8, "grad_norm": 2.1982409577233133, "learning_rate": 9.716030398746096e-07, "loss": 0.6184, "step": 7737 }, { "epoch": 0.8, "grad_norm": 1.9379260663865387, "learning_rate": 9.706060930838834e-07, "loss": 0.6641, "step": 7738 }, { "epoch": 0.8, "grad_norm": 2.3158402348464384, "learning_rate": 9.69609603047922e-07, "loss": 0.6658, "step": 7739 }, { "epoch": 0.8, "grad_norm": 2.202051506082379, "learning_rate": 9.686135698796866e-07, "loss": 0.6234, "step": 7740 }, { "epoch": 0.8, "grad_norm": 1.946984139913474, "learning_rate": 9.676179936920793e-07, "loss": 0.7025, "step": 7741 }, { "epoch": 0.8, "grad_norm": 1.9651438174470215, "learning_rate": 9.666228745979571e-07, "loss": 0.6672, "step": 7742 }, { "epoch": 0.8, "grad_norm": 1.821834253966292, "learning_rate": 9.656282127101208e-07, "loss": 0.5473, "step": 7743 }, { "epoch": 0.8, "grad_norm": 1.8646490281696781, "learning_rate": 9.646340081413225e-07, "loss": 0.6667, "step": 7744 }, { "epoch": 0.81, "grad_norm": 1.9663132430420378, "learning_rate": 9.636402610042589e-07, "loss": 0.6145, "step": 7745 }, { "epoch": 0.81, "grad_norm": 1.8591207036266417, "learning_rate": 9.6264697141158e-07, "loss": 0.5758, "step": 7746 }, { "epoch": 0.81, "grad_norm": 1.8749424175544835, "learning_rate": 9.61654139475877e-07, "loss": 0.5613, "step": 7747 }, { "epoch": 0.81, "grad_norm": 1.8187123900362623, "learning_rate": 9.606617653096967e-07, "loss": 0.5405, "step": 7748 }, { "epoch": 0.81, "grad_norm": 2.0773843186976726, "learning_rate": 9.59669849025529e-07, "loss": 0.6071, "step": 7749 }, { "epoch": 0.81, "grad_norm": 1.9075752017328036, "learning_rate": 9.586783907358126e-07, "loss": 0.5405, "step": 7750 }, { "epoch": 0.81, "grad_norm": 1.9368497704701095, "learning_rate": 9.57687390552935e-07, "loss": 0.6222, "step": 7751 }, { "epoch": 0.81, "grad_norm": 1.894016291586957, "learning_rate": 9.566968485892324e-07, "loss": 0.6088, "step": 7752 }, { "epoch": 0.81, "grad_norm": 1.8925582435192947, "learning_rate": 9.557067649569873e-07, "loss": 0.5613, "step": 7753 }, { "epoch": 0.81, "grad_norm": 1.9135127751222611, "learning_rate": 9.54717139768433e-07, "loss": 0.6596, "step": 7754 }, { "epoch": 0.81, "grad_norm": 2.2074368490586944, "learning_rate": 9.537279731357485e-07, "loss": 0.6295, "step": 7755 }, { "epoch": 0.81, "grad_norm": 1.9786253685527062, "learning_rate": 9.527392651710598e-07, "loss": 0.6196, "step": 7756 }, { "epoch": 0.81, "grad_norm": 1.8644737769919695, "learning_rate": 9.517510159864452e-07, "loss": 0.5505, "step": 7757 }, { "epoch": 0.81, "grad_norm": 2.037734457999557, "learning_rate": 9.507632256939264e-07, "loss": 0.5701, "step": 7758 }, { "epoch": 0.81, "grad_norm": 1.8913067735074787, "learning_rate": 9.497758944054769e-07, "loss": 0.5228, "step": 7759 }, { "epoch": 0.81, "grad_norm": 1.961114075237607, "learning_rate": 9.487890222330137e-07, "loss": 0.5865, "step": 7760 }, { "epoch": 0.81, "grad_norm": 2.1418281069781733, "learning_rate": 9.478026092884074e-07, "loss": 0.7004, "step": 7761 }, { "epoch": 0.81, "grad_norm": 2.031618259141534, "learning_rate": 9.468166556834724e-07, "loss": 0.6048, "step": 7762 }, { "epoch": 0.81, "grad_norm": 1.983872334557737, "learning_rate": 9.458311615299714e-07, "loss": 0.6104, "step": 7763 }, { "epoch": 0.81, "grad_norm": 1.966621625986287, "learning_rate": 9.448461269396148e-07, "loss": 0.6129, "step": 7764 }, { "epoch": 0.81, "grad_norm": 1.8456010848415982, "learning_rate": 9.438615520240651e-07, "loss": 0.6677, "step": 7765 }, { "epoch": 0.81, "grad_norm": 1.9795646084529916, "learning_rate": 9.428774368949262e-07, "loss": 0.488, "step": 7766 }, { "epoch": 0.81, "grad_norm": 1.778709075372669, "learning_rate": 9.418937816637558e-07, "loss": 0.5943, "step": 7767 }, { "epoch": 0.81, "grad_norm": 2.0339698933276407, "learning_rate": 9.409105864420548e-07, "loss": 0.6256, "step": 7768 }, { "epoch": 0.81, "grad_norm": 2.1716627541691045, "learning_rate": 9.399278513412757e-07, "loss": 0.6345, "step": 7769 }, { "epoch": 0.81, "grad_norm": 2.099308376029622, "learning_rate": 9.389455764728167e-07, "loss": 0.5985, "step": 7770 }, { "epoch": 0.81, "grad_norm": 1.952829993448853, "learning_rate": 9.379637619480236e-07, "loss": 0.6046, "step": 7771 }, { "epoch": 0.81, "grad_norm": 1.9217150291138596, "learning_rate": 9.369824078781897e-07, "loss": 0.5854, "step": 7772 }, { "epoch": 0.81, "grad_norm": 2.0430771905247034, "learning_rate": 9.360015143745599e-07, "loss": 0.6265, "step": 7773 }, { "epoch": 0.81, "grad_norm": 1.9586997014673917, "learning_rate": 9.350210815483207e-07, "loss": 0.7064, "step": 7774 }, { "epoch": 0.81, "grad_norm": 2.0477443669373177, "learning_rate": 9.340411095106128e-07, "loss": 0.6626, "step": 7775 }, { "epoch": 0.81, "grad_norm": 1.8187859005943139, "learning_rate": 9.330615983725194e-07, "loss": 0.6062, "step": 7776 }, { "epoch": 0.81, "grad_norm": 1.8676725602236373, "learning_rate": 9.320825482450769e-07, "loss": 0.6194, "step": 7777 }, { "epoch": 0.81, "grad_norm": 2.009081149555021, "learning_rate": 9.311039592392612e-07, "loss": 0.6296, "step": 7778 }, { "epoch": 0.81, "grad_norm": 1.9506352943100582, "learning_rate": 9.30125831466005e-07, "loss": 0.6273, "step": 7779 }, { "epoch": 0.81, "grad_norm": 1.8634198547031477, "learning_rate": 9.291481650361822e-07, "loss": 0.58, "step": 7780 }, { "epoch": 0.81, "grad_norm": 2.2028182575661557, "learning_rate": 9.281709600606193e-07, "loss": 0.6037, "step": 7781 }, { "epoch": 0.81, "grad_norm": 1.9124910064744183, "learning_rate": 9.271942166500853e-07, "loss": 0.5988, "step": 7782 }, { "epoch": 0.81, "grad_norm": 2.0032674668960504, "learning_rate": 9.262179349153022e-07, "loss": 0.6078, "step": 7783 }, { "epoch": 0.81, "grad_norm": 2.0402604629089893, "learning_rate": 9.252421149669349e-07, "loss": 0.5139, "step": 7784 }, { "epoch": 0.81, "grad_norm": 1.9476800993967762, "learning_rate": 9.242667569156006e-07, "loss": 0.578, "step": 7785 }, { "epoch": 0.81, "grad_norm": 2.127765015120673, "learning_rate": 9.232918608718599e-07, "loss": 0.6612, "step": 7786 }, { "epoch": 0.81, "grad_norm": 1.963807687238336, "learning_rate": 9.223174269462237e-07, "loss": 0.5882, "step": 7787 }, { "epoch": 0.81, "grad_norm": 1.9031394484343827, "learning_rate": 9.213434552491479e-07, "loss": 0.5933, "step": 7788 }, { "epoch": 0.81, "grad_norm": 1.8294547712072886, "learning_rate": 9.203699458910397e-07, "loss": 0.6555, "step": 7789 }, { "epoch": 0.81, "grad_norm": 1.8309553228658446, "learning_rate": 9.193968989822504e-07, "loss": 0.5514, "step": 7790 }, { "epoch": 0.81, "grad_norm": 1.9206616651299844, "learning_rate": 9.184243146330829e-07, "loss": 0.6373, "step": 7791 }, { "epoch": 0.81, "grad_norm": 2.1123393182042958, "learning_rate": 9.174521929537827e-07, "loss": 0.6415, "step": 7792 }, { "epoch": 0.81, "grad_norm": 2.1317560935213113, "learning_rate": 9.164805340545457e-07, "loss": 0.6525, "step": 7793 }, { "epoch": 0.81, "grad_norm": 2.0224002996549735, "learning_rate": 9.15509338045516e-07, "loss": 0.6894, "step": 7794 }, { "epoch": 0.81, "grad_norm": 2.042260851573768, "learning_rate": 9.145386050367827e-07, "loss": 0.5762, "step": 7795 }, { "epoch": 0.81, "grad_norm": 1.9891557355289593, "learning_rate": 9.135683351383862e-07, "loss": 0.6294, "step": 7796 }, { "epoch": 0.81, "grad_norm": 2.022029699436418, "learning_rate": 9.125985284603095e-07, "loss": 0.5819, "step": 7797 }, { "epoch": 0.81, "grad_norm": 2.1109280193657596, "learning_rate": 9.116291851124887e-07, "loss": 0.6766, "step": 7798 }, { "epoch": 0.81, "grad_norm": 1.9502117247555344, "learning_rate": 9.106603052048019e-07, "loss": 0.5748, "step": 7799 }, { "epoch": 0.81, "grad_norm": 1.7462106726187883, "learning_rate": 9.096918888470785e-07, "loss": 0.4553, "step": 7800 }, { "epoch": 0.81, "grad_norm": 2.0378937226681795, "learning_rate": 9.087239361490919e-07, "loss": 0.677, "step": 7801 }, { "epoch": 0.81, "grad_norm": 1.8627644962745071, "learning_rate": 9.07756447220568e-07, "loss": 0.5913, "step": 7802 }, { "epoch": 0.81, "grad_norm": 1.9507831523660102, "learning_rate": 9.067894221711748e-07, "loss": 0.5372, "step": 7803 }, { "epoch": 0.81, "grad_norm": 1.9434714525311196, "learning_rate": 9.058228611105319e-07, "loss": 0.5585, "step": 7804 }, { "epoch": 0.81, "grad_norm": 1.784512235108847, "learning_rate": 9.048567641482031e-07, "loss": 0.6526, "step": 7805 }, { "epoch": 0.81, "grad_norm": 1.7646771163534876, "learning_rate": 9.038911313937021e-07, "loss": 0.5878, "step": 7806 }, { "epoch": 0.81, "grad_norm": 1.9503722707113982, "learning_rate": 9.02925962956489e-07, "loss": 0.6089, "step": 7807 }, { "epoch": 0.81, "grad_norm": 1.9933524346721043, "learning_rate": 9.019612589459703e-07, "loss": 0.7098, "step": 7808 }, { "epoch": 0.81, "grad_norm": 1.684179267546423, "learning_rate": 9.009970194714995e-07, "loss": 0.6127, "step": 7809 }, { "epoch": 0.81, "grad_norm": 1.947028893634746, "learning_rate": 9.00033244642382e-07, "loss": 0.571, "step": 7810 }, { "epoch": 0.81, "grad_norm": 1.8394267571336589, "learning_rate": 8.990699345678633e-07, "loss": 0.5227, "step": 7811 }, { "epoch": 0.81, "grad_norm": 2.1484606234393557, "learning_rate": 8.981070893571436e-07, "loss": 0.6421, "step": 7812 }, { "epoch": 0.81, "grad_norm": 2.310340099058539, "learning_rate": 8.971447091193641e-07, "loss": 0.6349, "step": 7813 }, { "epoch": 0.81, "grad_norm": 2.2442612121075705, "learning_rate": 8.961827939636198e-07, "loss": 0.6153, "step": 7814 }, { "epoch": 0.81, "grad_norm": 2.0223880288341167, "learning_rate": 8.952213439989443e-07, "loss": 0.5675, "step": 7815 }, { "epoch": 0.81, "grad_norm": 1.7330228615175547, "learning_rate": 8.942603593343269e-07, "loss": 0.5218, "step": 7816 }, { "epoch": 0.81, "grad_norm": 1.9915324011404267, "learning_rate": 8.932998400786985e-07, "loss": 0.6024, "step": 7817 }, { "epoch": 0.81, "grad_norm": 2.098901888535553, "learning_rate": 8.923397863409422e-07, "loss": 0.6626, "step": 7818 }, { "epoch": 0.81, "grad_norm": 2.0703368108294784, "learning_rate": 8.913801982298825e-07, "loss": 0.6209, "step": 7819 }, { "epoch": 0.81, "grad_norm": 2.0340207412306706, "learning_rate": 8.90421075854297e-07, "loss": 0.744, "step": 7820 }, { "epoch": 0.81, "grad_norm": 2.0633123671861373, "learning_rate": 8.894624193229051e-07, "loss": 0.6967, "step": 7821 }, { "epoch": 0.81, "grad_norm": 2.2559417118692306, "learning_rate": 8.885042287443785e-07, "loss": 0.5755, "step": 7822 }, { "epoch": 0.81, "grad_norm": 2.0898486974483483, "learning_rate": 8.875465042273323e-07, "loss": 0.5747, "step": 7823 }, { "epoch": 0.81, "grad_norm": 1.8473521886926423, "learning_rate": 8.865892458803288e-07, "loss": 0.5424, "step": 7824 }, { "epoch": 0.81, "grad_norm": 2.00638075054233, "learning_rate": 8.856324538118815e-07, "loss": 0.6036, "step": 7825 }, { "epoch": 0.81, "grad_norm": 2.1426291684833663, "learning_rate": 8.846761281304461e-07, "loss": 0.5502, "step": 7826 }, { "epoch": 0.81, "grad_norm": 1.9203586483594455, "learning_rate": 8.837202689444274e-07, "loss": 0.5921, "step": 7827 }, { "epoch": 0.81, "grad_norm": 1.9063737713261275, "learning_rate": 8.827648763621793e-07, "loss": 0.5702, "step": 7828 }, { "epoch": 0.81, "grad_norm": 1.7596477523436753, "learning_rate": 8.818099504919997e-07, "loss": 0.5492, "step": 7829 }, { "epoch": 0.81, "grad_norm": 1.878655835611806, "learning_rate": 8.808554914421341e-07, "loss": 0.5626, "step": 7830 }, { "epoch": 0.81, "grad_norm": 1.7387383408855732, "learning_rate": 8.799014993207783e-07, "loss": 0.5791, "step": 7831 }, { "epoch": 0.81, "grad_norm": 1.825422980516178, "learning_rate": 8.789479742360696e-07, "loss": 0.6566, "step": 7832 }, { "epoch": 0.81, "grad_norm": 1.836576312128298, "learning_rate": 8.779949162960988e-07, "loss": 0.4287, "step": 7833 }, { "epoch": 0.81, "grad_norm": 1.9235214625697135, "learning_rate": 8.770423256088978e-07, "loss": 0.648, "step": 7834 }, { "epoch": 0.81, "grad_norm": 1.8400134406808073, "learning_rate": 8.760902022824502e-07, "loss": 0.5477, "step": 7835 }, { "epoch": 0.81, "grad_norm": 2.095020666362253, "learning_rate": 8.751385464246836e-07, "loss": 0.5633, "step": 7836 }, { "epoch": 0.81, "grad_norm": 2.2320814488356078, "learning_rate": 8.74187358143474e-07, "loss": 0.6631, "step": 7837 }, { "epoch": 0.81, "grad_norm": 2.326172772622547, "learning_rate": 8.732366375466422e-07, "loss": 0.6657, "step": 7838 }, { "epoch": 0.81, "grad_norm": 2.1461369278054625, "learning_rate": 8.722863847419605e-07, "loss": 0.563, "step": 7839 }, { "epoch": 0.81, "grad_norm": 2.014512726119495, "learning_rate": 8.713365998371431e-07, "loss": 0.674, "step": 7840 }, { "epoch": 0.82, "grad_norm": 1.673982067794888, "learning_rate": 8.703872829398563e-07, "loss": 0.4947, "step": 7841 }, { "epoch": 0.82, "grad_norm": 1.7725031865905028, "learning_rate": 8.694384341577072e-07, "loss": 0.5893, "step": 7842 }, { "epoch": 0.82, "grad_norm": 1.84444502749441, "learning_rate": 8.684900535982566e-07, "loss": 0.6629, "step": 7843 }, { "epoch": 0.82, "grad_norm": 1.7431264743321058, "learning_rate": 8.675421413690072e-07, "loss": 0.6112, "step": 7844 }, { "epoch": 0.82, "grad_norm": 1.88630416944583, "learning_rate": 8.665946975774103e-07, "loss": 0.686, "step": 7845 }, { "epoch": 0.82, "grad_norm": 1.8938332117324725, "learning_rate": 8.656477223308623e-07, "loss": 0.596, "step": 7846 }, { "epoch": 0.82, "grad_norm": 1.856990540011655, "learning_rate": 8.647012157367118e-07, "loss": 0.6462, "step": 7847 }, { "epoch": 0.82, "grad_norm": 1.9573712099806473, "learning_rate": 8.63755177902248e-07, "loss": 0.5727, "step": 7848 }, { "epoch": 0.82, "grad_norm": 1.7953197325036485, "learning_rate": 8.62809608934711e-07, "loss": 0.5912, "step": 7849 }, { "epoch": 0.82, "grad_norm": 1.9467543418309012, "learning_rate": 8.618645089412852e-07, "loss": 0.643, "step": 7850 }, { "epoch": 0.82, "grad_norm": 1.9747829769385832, "learning_rate": 8.609198780291067e-07, "loss": 0.6153, "step": 7851 }, { "epoch": 0.82, "grad_norm": 1.8468696120697057, "learning_rate": 8.599757163052491e-07, "loss": 0.5527, "step": 7852 }, { "epoch": 0.82, "grad_norm": 2.0237754704208495, "learning_rate": 8.590320238767425e-07, "loss": 0.6137, "step": 7853 }, { "epoch": 0.82, "grad_norm": 1.9018135574384365, "learning_rate": 8.580888008505578e-07, "loss": 0.6224, "step": 7854 }, { "epoch": 0.82, "grad_norm": 1.9945743056461949, "learning_rate": 8.571460473336168e-07, "loss": 0.6345, "step": 7855 }, { "epoch": 0.82, "grad_norm": 1.8275906820507346, "learning_rate": 8.562037634327836e-07, "loss": 0.6913, "step": 7856 }, { "epoch": 0.82, "grad_norm": 1.7781481219644808, "learning_rate": 8.552619492548736e-07, "loss": 0.6142, "step": 7857 }, { "epoch": 0.82, "grad_norm": 2.101917955513664, "learning_rate": 8.543206049066461e-07, "loss": 0.5878, "step": 7858 }, { "epoch": 0.82, "grad_norm": 1.7860451310801435, "learning_rate": 8.533797304948066e-07, "loss": 0.6194, "step": 7859 }, { "epoch": 0.82, "grad_norm": 2.053297227977161, "learning_rate": 8.524393261260106e-07, "loss": 0.5577, "step": 7860 }, { "epoch": 0.82, "grad_norm": 1.8999012436315257, "learning_rate": 8.51499391906856e-07, "loss": 0.5324, "step": 7861 }, { "epoch": 0.82, "grad_norm": 1.9403626400781433, "learning_rate": 8.50559927943892e-07, "loss": 0.6044, "step": 7862 }, { "epoch": 0.82, "grad_norm": 1.9202525596061424, "learning_rate": 8.496209343436101e-07, "loss": 0.6422, "step": 7863 }, { "epoch": 0.82, "grad_norm": 1.9258908691490637, "learning_rate": 8.486824112124531e-07, "loss": 0.5877, "step": 7864 }, { "epoch": 0.82, "grad_norm": 2.139813896668843, "learning_rate": 8.477443586568068e-07, "loss": 0.6723, "step": 7865 }, { "epoch": 0.82, "grad_norm": 1.8864183561566554, "learning_rate": 8.46806776783004e-07, "loss": 0.5684, "step": 7866 }, { "epoch": 0.82, "grad_norm": 2.086021703795221, "learning_rate": 8.458696656973242e-07, "loss": 0.6466, "step": 7867 }, { "epoch": 0.82, "grad_norm": 1.7670328138359852, "learning_rate": 8.449330255059974e-07, "loss": 0.5655, "step": 7868 }, { "epoch": 0.82, "grad_norm": 2.109669089522648, "learning_rate": 8.439968563151935e-07, "loss": 0.5716, "step": 7869 }, { "epoch": 0.82, "grad_norm": 1.9731160176083082, "learning_rate": 8.430611582310355e-07, "loss": 0.6779, "step": 7870 }, { "epoch": 0.82, "grad_norm": 1.6833598941120023, "learning_rate": 8.421259313595881e-07, "loss": 0.5651, "step": 7871 }, { "epoch": 0.82, "grad_norm": 1.9782640426514457, "learning_rate": 8.411911758068664e-07, "loss": 0.5027, "step": 7872 }, { "epoch": 0.82, "grad_norm": 1.7638609114287134, "learning_rate": 8.402568916788295e-07, "loss": 0.6116, "step": 7873 }, { "epoch": 0.82, "grad_norm": 2.0287539947205198, "learning_rate": 8.393230790813834e-07, "loss": 0.6667, "step": 7874 }, { "epoch": 0.82, "grad_norm": 1.8293808842129964, "learning_rate": 8.383897381203804e-07, "loss": 0.6778, "step": 7875 }, { "epoch": 0.82, "grad_norm": 1.889493686769746, "learning_rate": 8.374568689016222e-07, "loss": 0.5777, "step": 7876 }, { "epoch": 0.82, "grad_norm": 2.0618821665052263, "learning_rate": 8.365244715308524e-07, "loss": 0.6482, "step": 7877 }, { "epoch": 0.82, "grad_norm": 1.979835255191372, "learning_rate": 8.355925461137659e-07, "loss": 0.5515, "step": 7878 }, { "epoch": 0.82, "grad_norm": 1.8234234080540386, "learning_rate": 8.346610927559995e-07, "loss": 0.5685, "step": 7879 }, { "epoch": 0.82, "grad_norm": 2.114466124057432, "learning_rate": 8.337301115631408e-07, "loss": 0.6513, "step": 7880 }, { "epoch": 0.82, "grad_norm": 2.0450819662924973, "learning_rate": 8.327996026407215e-07, "loss": 0.582, "step": 7881 }, { "epoch": 0.82, "grad_norm": 1.9534975150914224, "learning_rate": 8.318695660942188e-07, "loss": 0.6081, "step": 7882 }, { "epoch": 0.82, "grad_norm": 1.766548803762076, "learning_rate": 8.309400020290576e-07, "loss": 0.5923, "step": 7883 }, { "epoch": 0.82, "grad_norm": 1.8376364935303842, "learning_rate": 8.30010910550611e-07, "loss": 0.5752, "step": 7884 }, { "epoch": 0.82, "grad_norm": 2.1740334973065294, "learning_rate": 8.29082291764195e-07, "loss": 0.6675, "step": 7885 }, { "epoch": 0.82, "grad_norm": 1.955516410752621, "learning_rate": 8.281541457750752e-07, "loss": 0.6615, "step": 7886 }, { "epoch": 0.82, "grad_norm": 2.0513285670740427, "learning_rate": 8.272264726884611e-07, "loss": 0.6483, "step": 7887 }, { "epoch": 0.82, "grad_norm": 1.9204631948112747, "learning_rate": 8.262992726095126e-07, "loss": 0.63, "step": 7888 }, { "epoch": 0.82, "grad_norm": 1.920799123113018, "learning_rate": 8.253725456433281e-07, "loss": 0.574, "step": 7889 }, { "epoch": 0.82, "grad_norm": 2.0544728279229307, "learning_rate": 8.244462918949613e-07, "loss": 0.6066, "step": 7890 }, { "epoch": 0.82, "grad_norm": 1.8312075580282576, "learning_rate": 8.235205114694067e-07, "loss": 0.6475, "step": 7891 }, { "epoch": 0.82, "grad_norm": 2.138681797382035, "learning_rate": 8.225952044716079e-07, "loss": 0.6259, "step": 7892 }, { "epoch": 0.82, "grad_norm": 1.895533332438548, "learning_rate": 8.216703710064516e-07, "loss": 0.5747, "step": 7893 }, { "epoch": 0.82, "grad_norm": 1.904821942247295, "learning_rate": 8.207460111787763e-07, "loss": 0.589, "step": 7894 }, { "epoch": 0.82, "grad_norm": 1.9668496729404117, "learning_rate": 8.198221250933613e-07, "loss": 0.6433, "step": 7895 }, { "epoch": 0.82, "grad_norm": 1.9284587389594263, "learning_rate": 8.188987128549336e-07, "loss": 0.6273, "step": 7896 }, { "epoch": 0.82, "grad_norm": 2.1416050872893946, "learning_rate": 8.179757745681693e-07, "loss": 0.6704, "step": 7897 }, { "epoch": 0.82, "grad_norm": 1.7866996337890955, "learning_rate": 8.170533103376865e-07, "loss": 0.6691, "step": 7898 }, { "epoch": 0.82, "grad_norm": 1.8456024182305906, "learning_rate": 8.161313202680543e-07, "loss": 0.6148, "step": 7899 }, { "epoch": 0.82, "grad_norm": 1.7492541132835926, "learning_rate": 8.15209804463783e-07, "loss": 0.5606, "step": 7900 }, { "epoch": 0.82, "grad_norm": 1.9516406633526484, "learning_rate": 8.142887630293339e-07, "loss": 0.5595, "step": 7901 }, { "epoch": 0.82, "grad_norm": 1.7358148718068005, "learning_rate": 8.133681960691098e-07, "loss": 0.4818, "step": 7902 }, { "epoch": 0.82, "grad_norm": 1.963868509775501, "learning_rate": 8.124481036874665e-07, "loss": 0.5443, "step": 7903 }, { "epoch": 0.82, "grad_norm": 2.0190989920317866, "learning_rate": 8.115284859886963e-07, "loss": 0.5788, "step": 7904 }, { "epoch": 0.82, "grad_norm": 1.9193826384008805, "learning_rate": 8.106093430770473e-07, "loss": 0.7062, "step": 7905 }, { "epoch": 0.82, "grad_norm": 2.048467386932486, "learning_rate": 8.096906750567063e-07, "loss": 0.659, "step": 7906 }, { "epoch": 0.82, "grad_norm": 2.1066721308629646, "learning_rate": 8.087724820318127e-07, "loss": 0.6521, "step": 7907 }, { "epoch": 0.82, "grad_norm": 2.113774039359718, "learning_rate": 8.07854764106446e-07, "loss": 0.6454, "step": 7908 }, { "epoch": 0.82, "grad_norm": 1.9652147888264364, "learning_rate": 8.069375213846381e-07, "loss": 0.5461, "step": 7909 }, { "epoch": 0.82, "grad_norm": 1.8955929743963291, "learning_rate": 8.060207539703613e-07, "loss": 0.5916, "step": 7910 }, { "epoch": 0.82, "grad_norm": 2.202212850241475, "learning_rate": 8.051044619675368e-07, "loss": 0.7072, "step": 7911 }, { "epoch": 0.82, "grad_norm": 1.9825746509636721, "learning_rate": 8.041886454800307e-07, "loss": 0.6028, "step": 7912 }, { "epoch": 0.82, "grad_norm": 1.743284921250136, "learning_rate": 8.032733046116581e-07, "loss": 0.5952, "step": 7913 }, { "epoch": 0.82, "grad_norm": 1.9131860315901044, "learning_rate": 8.023584394661754e-07, "loss": 0.5394, "step": 7914 }, { "epoch": 0.82, "grad_norm": 2.08391122810736, "learning_rate": 8.014440501472909e-07, "loss": 0.6065, "step": 7915 }, { "epoch": 0.82, "grad_norm": 1.9814471916087633, "learning_rate": 8.005301367586532e-07, "loss": 0.5821, "step": 7916 }, { "epoch": 0.82, "grad_norm": 2.3000295637313712, "learning_rate": 7.996166994038618e-07, "loss": 0.558, "step": 7917 }, { "epoch": 0.82, "grad_norm": 2.0762593164954706, "learning_rate": 7.987037381864587e-07, "loss": 0.7169, "step": 7918 }, { "epoch": 0.82, "grad_norm": 1.7514422787011197, "learning_rate": 7.977912532099336e-07, "loss": 0.558, "step": 7919 }, { "epoch": 0.82, "grad_norm": 1.76970855988798, "learning_rate": 7.968792445777207e-07, "loss": 0.5681, "step": 7920 }, { "epoch": 0.82, "grad_norm": 1.9054210564705762, "learning_rate": 7.95967712393203e-07, "loss": 0.6098, "step": 7921 }, { "epoch": 0.82, "grad_norm": 1.9968109035396673, "learning_rate": 7.950566567597067e-07, "loss": 0.5548, "step": 7922 }, { "epoch": 0.82, "grad_norm": 2.0679678927457252, "learning_rate": 7.941460777805071e-07, "loss": 0.6346, "step": 7923 }, { "epoch": 0.82, "grad_norm": 1.771436468079132, "learning_rate": 7.932359755588204e-07, "loss": 0.6228, "step": 7924 }, { "epoch": 0.82, "grad_norm": 2.028353877337731, "learning_rate": 7.923263501978151e-07, "loss": 0.6094, "step": 7925 }, { "epoch": 0.82, "grad_norm": 1.9541780264877802, "learning_rate": 7.914172018006006e-07, "loss": 0.6535, "step": 7926 }, { "epoch": 0.82, "grad_norm": 2.033311941728741, "learning_rate": 7.905085304702348e-07, "loss": 0.6615, "step": 7927 }, { "epoch": 0.82, "grad_norm": 1.9664209507744381, "learning_rate": 7.896003363097194e-07, "loss": 0.613, "step": 7928 }, { "epoch": 0.82, "grad_norm": 2.047467628372388, "learning_rate": 7.886926194220051e-07, "loss": 0.6435, "step": 7929 }, { "epoch": 0.82, "grad_norm": 2.2322887628362986, "learning_rate": 7.87785379909985e-07, "loss": 0.4776, "step": 7930 }, { "epoch": 0.82, "grad_norm": 1.9161552547216953, "learning_rate": 7.86878617876502e-07, "loss": 0.5047, "step": 7931 }, { "epoch": 0.82, "grad_norm": 1.9666675466994157, "learning_rate": 7.859723334243414e-07, "loss": 0.6392, "step": 7932 }, { "epoch": 0.82, "grad_norm": 2.0673189448510128, "learning_rate": 7.850665266562352e-07, "loss": 0.697, "step": 7933 }, { "epoch": 0.82, "grad_norm": 2.02537722417873, "learning_rate": 7.841611976748637e-07, "loss": 0.6193, "step": 7934 }, { "epoch": 0.82, "grad_norm": 1.8026876495600388, "learning_rate": 7.832563465828486e-07, "loss": 0.6503, "step": 7935 }, { "epoch": 0.82, "grad_norm": 1.9214738333018813, "learning_rate": 7.823519734827623e-07, "loss": 0.5923, "step": 7936 }, { "epoch": 0.83, "grad_norm": 2.0886931902273393, "learning_rate": 7.814480784771184e-07, "loss": 0.6328, "step": 7937 }, { "epoch": 0.83, "grad_norm": 1.7978925952821656, "learning_rate": 7.805446616683815e-07, "loss": 0.6205, "step": 7938 }, { "epoch": 0.83, "grad_norm": 2.2547069940717863, "learning_rate": 7.796417231589553e-07, "loss": 0.5602, "step": 7939 }, { "epoch": 0.83, "grad_norm": 2.0421838146360134, "learning_rate": 7.78739263051198e-07, "loss": 0.6162, "step": 7940 }, { "epoch": 0.83, "grad_norm": 1.8776854409493586, "learning_rate": 7.778372814474028e-07, "loss": 0.5241, "step": 7941 }, { "epoch": 0.83, "grad_norm": 1.9343979647373022, "learning_rate": 7.769357784498189e-07, "loss": 0.6138, "step": 7942 }, { "epoch": 0.83, "grad_norm": 1.9126629837911133, "learning_rate": 7.760347541606339e-07, "loss": 0.5349, "step": 7943 }, { "epoch": 0.83, "grad_norm": 2.013112677164787, "learning_rate": 7.751342086819864e-07, "loss": 0.5842, "step": 7944 }, { "epoch": 0.83, "grad_norm": 2.1023021606726475, "learning_rate": 7.742341421159561e-07, "loss": 0.5927, "step": 7945 }, { "epoch": 0.83, "grad_norm": 1.9172493579642078, "learning_rate": 7.733345545645726e-07, "loss": 0.4841, "step": 7946 }, { "epoch": 0.83, "grad_norm": 1.8852024848426656, "learning_rate": 7.724354461298089e-07, "loss": 0.6012, "step": 7947 }, { "epoch": 0.83, "grad_norm": 1.8875527210328427, "learning_rate": 7.71536816913584e-07, "loss": 0.5591, "step": 7948 }, { "epoch": 0.83, "grad_norm": 2.1986943184940437, "learning_rate": 7.706386670177606e-07, "loss": 0.6477, "step": 7949 }, { "epoch": 0.83, "grad_norm": 1.9792660395007868, "learning_rate": 7.697409965441527e-07, "loss": 0.5966, "step": 7950 }, { "epoch": 0.83, "grad_norm": 2.0633054153372186, "learning_rate": 7.68843805594513e-07, "loss": 0.5333, "step": 7951 }, { "epoch": 0.83, "grad_norm": 2.0106706139040496, "learning_rate": 7.679470942705459e-07, "loss": 0.6114, "step": 7952 }, { "epoch": 0.83, "grad_norm": 1.9411577381023806, "learning_rate": 7.670508626738959e-07, "loss": 0.6204, "step": 7953 }, { "epoch": 0.83, "grad_norm": 1.8717425506845027, "learning_rate": 7.661551109061593e-07, "loss": 0.6533, "step": 7954 }, { "epoch": 0.83, "grad_norm": 1.957638620055607, "learning_rate": 7.652598390688731e-07, "loss": 0.61, "step": 7955 }, { "epoch": 0.83, "grad_norm": 1.7361028180084892, "learning_rate": 7.643650472635211e-07, "loss": 0.5245, "step": 7956 }, { "epoch": 0.83, "grad_norm": 2.062695795851483, "learning_rate": 7.634707355915321e-07, "loss": 0.5406, "step": 7957 }, { "epoch": 0.83, "grad_norm": 1.7556940200609303, "learning_rate": 7.625769041542841e-07, "loss": 0.6456, "step": 7958 }, { "epoch": 0.83, "grad_norm": 2.1244482890856484, "learning_rate": 7.616835530530947e-07, "loss": 0.5838, "step": 7959 }, { "epoch": 0.83, "grad_norm": 2.007384668816092, "learning_rate": 7.607906823892341e-07, "loss": 0.5308, "step": 7960 }, { "epoch": 0.83, "grad_norm": 1.8335296792574647, "learning_rate": 7.598982922639109e-07, "loss": 0.5689, "step": 7961 }, { "epoch": 0.83, "grad_norm": 1.89865804594614, "learning_rate": 7.590063827782851e-07, "loss": 0.6264, "step": 7962 }, { "epoch": 0.83, "grad_norm": 2.101400701382922, "learning_rate": 7.581149540334587e-07, "loss": 0.6344, "step": 7963 }, { "epoch": 0.83, "grad_norm": 1.8929227815753287, "learning_rate": 7.572240061304786e-07, "loss": 0.6135, "step": 7964 }, { "epoch": 0.83, "grad_norm": 1.9995425057991527, "learning_rate": 7.563335391703424e-07, "loss": 0.6244, "step": 7965 }, { "epoch": 0.83, "grad_norm": 1.8371114068334875, "learning_rate": 7.554435532539872e-07, "loss": 0.6308, "step": 7966 }, { "epoch": 0.83, "grad_norm": 1.85970602929272, "learning_rate": 7.545540484822972e-07, "loss": 0.5694, "step": 7967 }, { "epoch": 0.83, "grad_norm": 2.0786161787069113, "learning_rate": 7.536650249561056e-07, "loss": 0.7314, "step": 7968 }, { "epoch": 0.83, "grad_norm": 1.7588934796892286, "learning_rate": 7.527764827761863e-07, "loss": 0.5897, "step": 7969 }, { "epoch": 0.83, "grad_norm": 2.2854940957417815, "learning_rate": 7.518884220432599e-07, "loss": 0.6597, "step": 7970 }, { "epoch": 0.83, "grad_norm": 1.9764113169266813, "learning_rate": 7.510008428579956e-07, "loss": 0.5809, "step": 7971 }, { "epoch": 0.83, "grad_norm": 2.026543602570681, "learning_rate": 7.501137453210027e-07, "loss": 0.7416, "step": 7972 }, { "epoch": 0.83, "grad_norm": 2.177075001218227, "learning_rate": 7.492271295328419e-07, "loss": 0.57, "step": 7973 }, { "epoch": 0.83, "grad_norm": 1.8721218522062755, "learning_rate": 7.483409955940136e-07, "loss": 0.5692, "step": 7974 }, { "epoch": 0.83, "grad_norm": 1.8713817493079272, "learning_rate": 7.474553436049675e-07, "loss": 0.5742, "step": 7975 }, { "epoch": 0.83, "grad_norm": 1.8123154657334932, "learning_rate": 7.465701736660963e-07, "loss": 0.5795, "step": 7976 }, { "epoch": 0.83, "grad_norm": 1.8267308046975017, "learning_rate": 7.456854858777418e-07, "loss": 0.5542, "step": 7977 }, { "epoch": 0.83, "grad_norm": 2.124509762504682, "learning_rate": 7.448012803401843e-07, "loss": 0.5719, "step": 7978 }, { "epoch": 0.83, "grad_norm": 1.7888627753037367, "learning_rate": 7.43917557153656e-07, "loss": 0.6277, "step": 7979 }, { "epoch": 0.83, "grad_norm": 1.8413228879270656, "learning_rate": 7.430343164183312e-07, "loss": 0.5997, "step": 7980 }, { "epoch": 0.83, "grad_norm": 1.8432625198692136, "learning_rate": 7.421515582343308e-07, "loss": 0.6055, "step": 7981 }, { "epoch": 0.83, "grad_norm": 2.0281622706959856, "learning_rate": 7.412692827017193e-07, "loss": 0.5958, "step": 7982 }, { "epoch": 0.83, "grad_norm": 1.9798861010712308, "learning_rate": 7.4038748992051e-07, "loss": 0.5568, "step": 7983 }, { "epoch": 0.83, "grad_norm": 1.9779665626638276, "learning_rate": 7.395061799906578e-07, "loss": 0.6735, "step": 7984 }, { "epoch": 0.83, "grad_norm": 2.105242899193206, "learning_rate": 7.386253530120635e-07, "loss": 0.6218, "step": 7985 }, { "epoch": 0.83, "grad_norm": 1.792392367170328, "learning_rate": 7.377450090845733e-07, "loss": 0.6366, "step": 7986 }, { "epoch": 0.83, "grad_norm": 1.9121163417362366, "learning_rate": 7.368651483079819e-07, "loss": 0.5815, "step": 7987 }, { "epoch": 0.83, "grad_norm": 1.848675634361415, "learning_rate": 7.35985770782024e-07, "loss": 0.6224, "step": 7988 }, { "epoch": 0.83, "grad_norm": 1.9301143939260674, "learning_rate": 7.35106876606384e-07, "loss": 0.6844, "step": 7989 }, { "epoch": 0.83, "grad_norm": 1.9834997145749633, "learning_rate": 7.342284658806875e-07, "loss": 0.7157, "step": 7990 }, { "epoch": 0.83, "grad_norm": 1.8168292987948385, "learning_rate": 7.333505387045108e-07, "loss": 0.5979, "step": 7991 }, { "epoch": 0.83, "grad_norm": 1.8488952297008012, "learning_rate": 7.324730951773673e-07, "loss": 0.478, "step": 7992 }, { "epoch": 0.83, "grad_norm": 1.8987998955323417, "learning_rate": 7.315961353987234e-07, "loss": 0.6196, "step": 7993 }, { "epoch": 0.83, "grad_norm": 1.9061861574645693, "learning_rate": 7.307196594679855e-07, "loss": 0.5492, "step": 7994 }, { "epoch": 0.83, "grad_norm": 2.026536733296861, "learning_rate": 7.298436674845099e-07, "loss": 0.6364, "step": 7995 }, { "epoch": 0.83, "grad_norm": 2.4322053014985077, "learning_rate": 7.289681595475922e-07, "loss": 0.7198, "step": 7996 }, { "epoch": 0.83, "grad_norm": 1.851711818464298, "learning_rate": 7.280931357564791e-07, "loss": 0.6029, "step": 7997 }, { "epoch": 0.83, "grad_norm": 2.003585262968676, "learning_rate": 7.272185962103567e-07, "loss": 0.5255, "step": 7998 }, { "epoch": 0.83, "grad_norm": 1.808289594625113, "learning_rate": 7.263445410083614e-07, "loss": 0.6749, "step": 7999 }, { "epoch": 0.83, "grad_norm": 1.8692318647300077, "learning_rate": 7.254709702495721e-07, "loss": 0.5514, "step": 8000 }, { "epoch": 0.83, "grad_norm": 1.996444861038339, "learning_rate": 7.245978840330103e-07, "loss": 0.575, "step": 8001 }, { "epoch": 0.83, "grad_norm": 2.0228741290262593, "learning_rate": 7.23725282457649e-07, "loss": 0.6426, "step": 8002 }, { "epoch": 0.83, "grad_norm": 2.0017135428709207, "learning_rate": 7.228531656223997e-07, "loss": 0.6279, "step": 8003 }, { "epoch": 0.83, "grad_norm": 1.894666164788839, "learning_rate": 7.219815336261243e-07, "loss": 0.5261, "step": 8004 }, { "epoch": 0.83, "grad_norm": 2.1004694991805155, "learning_rate": 7.211103865676255e-07, "loss": 0.5761, "step": 8005 }, { "epoch": 0.83, "grad_norm": 1.9736439337576523, "learning_rate": 7.202397245456539e-07, "loss": 0.6219, "step": 8006 }, { "epoch": 0.83, "grad_norm": 1.9906929570826677, "learning_rate": 7.193695476589019e-07, "loss": 0.5685, "step": 8007 }, { "epoch": 0.83, "grad_norm": 1.7979796734065363, "learning_rate": 7.184998560060114e-07, "loss": 0.5869, "step": 8008 }, { "epoch": 0.83, "grad_norm": 1.9042889426569207, "learning_rate": 7.176306496855651e-07, "loss": 0.6146, "step": 8009 }, { "epoch": 0.83, "grad_norm": 1.9254016912472625, "learning_rate": 7.167619287960942e-07, "loss": 0.581, "step": 8010 }, { "epoch": 0.83, "grad_norm": 1.9512998168719646, "learning_rate": 7.158936934360711e-07, "loss": 0.6168, "step": 8011 }, { "epoch": 0.83, "grad_norm": 2.046608135870485, "learning_rate": 7.150259437039175e-07, "loss": 0.7063, "step": 8012 }, { "epoch": 0.83, "grad_norm": 1.878222927702504, "learning_rate": 7.14158679697996e-07, "loss": 0.6154, "step": 8013 }, { "epoch": 0.83, "grad_norm": 1.881994367788152, "learning_rate": 7.13291901516619e-07, "loss": 0.5284, "step": 8014 }, { "epoch": 0.83, "grad_norm": 2.123034281457884, "learning_rate": 7.124256092580357e-07, "loss": 0.6879, "step": 8015 }, { "epoch": 0.83, "grad_norm": 1.9677989441827224, "learning_rate": 7.11559803020449e-07, "loss": 0.6354, "step": 8016 }, { "epoch": 0.83, "grad_norm": 2.0902633305259357, "learning_rate": 7.106944829020013e-07, "loss": 0.5253, "step": 8017 }, { "epoch": 0.83, "grad_norm": 2.038512209808121, "learning_rate": 7.098296490007828e-07, "loss": 0.6774, "step": 8018 }, { "epoch": 0.83, "grad_norm": 1.7981807892646227, "learning_rate": 7.089653014148263e-07, "loss": 0.6563, "step": 8019 }, { "epoch": 0.83, "grad_norm": 2.004711187157496, "learning_rate": 7.081014402421115e-07, "loss": 0.6193, "step": 8020 }, { "epoch": 0.83, "grad_norm": 1.984447214093464, "learning_rate": 7.072380655805617e-07, "loss": 0.6576, "step": 8021 }, { "epoch": 0.83, "grad_norm": 1.9593774644521333, "learning_rate": 7.063751775280448e-07, "loss": 0.5683, "step": 8022 }, { "epoch": 0.83, "grad_norm": 2.012476005916973, "learning_rate": 7.055127761823732e-07, "loss": 0.6759, "step": 8023 }, { "epoch": 0.83, "grad_norm": 2.445817481217792, "learning_rate": 7.046508616413078e-07, "loss": 0.6529, "step": 8024 }, { "epoch": 0.83, "grad_norm": 2.1997155940021216, "learning_rate": 7.037894340025487e-07, "loss": 0.6053, "step": 8025 }, { "epoch": 0.83, "grad_norm": 1.5564007641049542, "learning_rate": 7.029284933637454e-07, "loss": 0.5903, "step": 8026 }, { "epoch": 0.83, "grad_norm": 2.1252616955848116, "learning_rate": 7.020680398224893e-07, "loss": 0.6022, "step": 8027 }, { "epoch": 0.83, "grad_norm": 1.877781753370922, "learning_rate": 7.012080734763205e-07, "loss": 0.6165, "step": 8028 }, { "epoch": 0.83, "grad_norm": 1.908376242028954, "learning_rate": 7.003485944227162e-07, "loss": 0.516, "step": 8029 }, { "epoch": 0.83, "grad_norm": 1.8883975626092215, "learning_rate": 6.994896027591074e-07, "loss": 0.5823, "step": 8030 }, { "epoch": 0.83, "grad_norm": 1.893765363384913, "learning_rate": 6.986310985828626e-07, "loss": 0.6621, "step": 8031 }, { "epoch": 0.83, "grad_norm": 1.939654375998763, "learning_rate": 6.977730819913015e-07, "loss": 0.609, "step": 8032 }, { "epoch": 0.84, "grad_norm": 1.9859890115855503, "learning_rate": 6.969155530816824e-07, "loss": 0.6512, "step": 8033 }, { "epoch": 0.84, "grad_norm": 2.036896841652315, "learning_rate": 6.960585119512125e-07, "loss": 0.5567, "step": 8034 }, { "epoch": 0.84, "grad_norm": 1.919130726074716, "learning_rate": 6.952019586970416e-07, "loss": 0.612, "step": 8035 }, { "epoch": 0.84, "grad_norm": 1.928540621573469, "learning_rate": 6.943458934162656e-07, "loss": 0.6911, "step": 8036 }, { "epoch": 0.84, "grad_norm": 1.9840568093800386, "learning_rate": 6.934903162059242e-07, "loss": 0.6367, "step": 8037 }, { "epoch": 0.84, "grad_norm": 2.005740659612198, "learning_rate": 6.92635227163001e-07, "loss": 0.6262, "step": 8038 }, { "epoch": 0.84, "grad_norm": 2.1474820104829084, "learning_rate": 6.917806263844268e-07, "loss": 0.6138, "step": 8039 }, { "epoch": 0.84, "grad_norm": 2.1811307190465885, "learning_rate": 6.909265139670735e-07, "loss": 0.5507, "step": 8040 }, { "epoch": 0.84, "grad_norm": 1.9714652581473788, "learning_rate": 6.900728900077619e-07, "loss": 0.6353, "step": 8041 }, { "epoch": 0.84, "grad_norm": 2.335366259929589, "learning_rate": 6.89219754603253e-07, "loss": 0.7156, "step": 8042 }, { "epoch": 0.84, "grad_norm": 2.0117714750139672, "learning_rate": 6.883671078502574e-07, "loss": 0.5447, "step": 8043 }, { "epoch": 0.84, "grad_norm": 2.117046336034858, "learning_rate": 6.875149498454237e-07, "loss": 0.7257, "step": 8044 }, { "epoch": 0.84, "grad_norm": 2.1060454141945257, "learning_rate": 6.866632806853518e-07, "loss": 0.7056, "step": 8045 }, { "epoch": 0.84, "grad_norm": 1.8680810716410823, "learning_rate": 6.858121004665813e-07, "loss": 0.5665, "step": 8046 }, { "epoch": 0.84, "grad_norm": 1.969897563001284, "learning_rate": 6.849614092856005e-07, "loss": 0.6128, "step": 8047 }, { "epoch": 0.84, "grad_norm": 2.038296389027259, "learning_rate": 6.841112072388373e-07, "loss": 0.6069, "step": 8048 }, { "epoch": 0.84, "grad_norm": 2.12047751623645, "learning_rate": 6.832614944226695e-07, "loss": 0.6651, "step": 8049 }, { "epoch": 0.84, "grad_norm": 1.78106945111783, "learning_rate": 6.824122709334152e-07, "loss": 0.6632, "step": 8050 }, { "epoch": 0.84, "grad_norm": 1.7715123778299606, "learning_rate": 6.815635368673418e-07, "loss": 0.5322, "step": 8051 }, { "epoch": 0.84, "grad_norm": 1.9202887091570335, "learning_rate": 6.807152923206528e-07, "loss": 0.6464, "step": 8052 }, { "epoch": 0.84, "grad_norm": 1.9938870648476452, "learning_rate": 6.798675373895064e-07, "loss": 0.6621, "step": 8053 }, { "epoch": 0.84, "grad_norm": 2.0986027995623178, "learning_rate": 6.790202721699968e-07, "loss": 0.6684, "step": 8054 }, { "epoch": 0.84, "grad_norm": 2.0705757135608573, "learning_rate": 6.781734967581699e-07, "loss": 0.5709, "step": 8055 }, { "epoch": 0.84, "grad_norm": 2.001524224677035, "learning_rate": 6.77327211250009e-07, "loss": 0.5792, "step": 8056 }, { "epoch": 0.84, "grad_norm": 1.7953776708987224, "learning_rate": 6.764814157414484e-07, "loss": 0.5446, "step": 8057 }, { "epoch": 0.84, "grad_norm": 1.9216694263397198, "learning_rate": 6.756361103283626e-07, "loss": 0.5869, "step": 8058 }, { "epoch": 0.84, "grad_norm": 2.23119073549652, "learning_rate": 6.747912951065722e-07, "loss": 0.72, "step": 8059 }, { "epoch": 0.84, "grad_norm": 1.887099107100426, "learning_rate": 6.739469701718398e-07, "loss": 0.5626, "step": 8060 }, { "epoch": 0.84, "grad_norm": 1.7724551449703356, "learning_rate": 6.731031356198769e-07, "loss": 0.5565, "step": 8061 }, { "epoch": 0.84, "grad_norm": 1.8592960000825143, "learning_rate": 6.722597915463352e-07, "loss": 0.6031, "step": 8062 }, { "epoch": 0.84, "grad_norm": 2.000586160391033, "learning_rate": 6.714169380468144e-07, "loss": 0.5987, "step": 8063 }, { "epoch": 0.84, "grad_norm": 1.8188177079291683, "learning_rate": 6.705745752168552e-07, "loss": 0.5919, "step": 8064 }, { "epoch": 0.84, "grad_norm": 1.8460808208474586, "learning_rate": 6.697327031519452e-07, "loss": 0.5777, "step": 8065 }, { "epoch": 0.84, "grad_norm": 2.0220325407867525, "learning_rate": 6.688913219475158e-07, "loss": 0.6628, "step": 8066 }, { "epoch": 0.84, "grad_norm": 1.9653708715581029, "learning_rate": 6.680504316989405e-07, "loss": 0.6537, "step": 8067 }, { "epoch": 0.84, "grad_norm": 2.1103633643710333, "learning_rate": 6.672100325015396e-07, "loss": 0.7001, "step": 8068 }, { "epoch": 0.84, "grad_norm": 1.930693121411337, "learning_rate": 6.663701244505788e-07, "loss": 0.6294, "step": 8069 }, { "epoch": 0.84, "grad_norm": 2.0081188554424685, "learning_rate": 6.655307076412637e-07, "loss": 0.6435, "step": 8070 }, { "epoch": 0.84, "grad_norm": 1.9797486027365578, "learning_rate": 6.646917821687504e-07, "loss": 0.6492, "step": 8071 }, { "epoch": 0.84, "grad_norm": 1.8818227613869194, "learning_rate": 6.638533481281323e-07, "loss": 0.5762, "step": 8072 }, { "epoch": 0.84, "grad_norm": 2.05125972802814, "learning_rate": 6.630154056144533e-07, "loss": 0.673, "step": 8073 }, { "epoch": 0.84, "grad_norm": 1.7769660589729557, "learning_rate": 6.621779547226986e-07, "loss": 0.6233, "step": 8074 }, { "epoch": 0.84, "grad_norm": 2.305973486593778, "learning_rate": 6.613409955477962e-07, "loss": 0.6659, "step": 8075 }, { "epoch": 0.84, "grad_norm": 1.8901669506607903, "learning_rate": 6.605045281846222e-07, "loss": 0.6144, "step": 8076 }, { "epoch": 0.84, "grad_norm": 2.0978112467258616, "learning_rate": 6.596685527279939e-07, "loss": 0.6885, "step": 8077 }, { "epoch": 0.84, "grad_norm": 2.004337979444083, "learning_rate": 6.588330692726747e-07, "loss": 0.6176, "step": 8078 }, { "epoch": 0.84, "grad_norm": 2.1383294050312145, "learning_rate": 6.579980779133705e-07, "loss": 0.674, "step": 8079 }, { "epoch": 0.84, "grad_norm": 1.8627858607830665, "learning_rate": 6.571635787447339e-07, "loss": 0.5736, "step": 8080 }, { "epoch": 0.84, "grad_norm": 2.059976230713306, "learning_rate": 6.563295718613577e-07, "loss": 0.7069, "step": 8081 }, { "epoch": 0.84, "grad_norm": 2.0325541362148702, "learning_rate": 6.554960573577834e-07, "loss": 0.586, "step": 8082 }, { "epoch": 0.84, "grad_norm": 1.9740764701873206, "learning_rate": 6.546630353284927e-07, "loss": 0.5326, "step": 8083 }, { "epoch": 0.84, "grad_norm": 1.9563646646591624, "learning_rate": 6.538305058679156e-07, "loss": 0.6275, "step": 8084 }, { "epoch": 0.84, "grad_norm": 2.0898251948049698, "learning_rate": 6.529984690704222e-07, "loss": 0.6423, "step": 8085 }, { "epoch": 0.84, "grad_norm": 2.2789120695548575, "learning_rate": 6.521669250303303e-07, "loss": 0.6563, "step": 8086 }, { "epoch": 0.84, "grad_norm": 1.9959617661791869, "learning_rate": 6.51335873841899e-07, "loss": 0.5937, "step": 8087 }, { "epoch": 0.84, "grad_norm": 2.0252537703122244, "learning_rate": 6.505053155993335e-07, "loss": 0.6138, "step": 8088 }, { "epoch": 0.84, "grad_norm": 1.9790505498454918, "learning_rate": 6.496752503967801e-07, "loss": 0.635, "step": 8089 }, { "epoch": 0.84, "grad_norm": 2.223228305084311, "learning_rate": 6.488456783283343e-07, "loss": 0.6743, "step": 8090 }, { "epoch": 0.84, "grad_norm": 2.2335882128652336, "learning_rate": 6.480165994880311e-07, "loss": 0.6312, "step": 8091 }, { "epoch": 0.84, "grad_norm": 2.153211792356647, "learning_rate": 6.471880139698523e-07, "loss": 0.6157, "step": 8092 }, { "epoch": 0.84, "grad_norm": 2.1742719609091137, "learning_rate": 6.463599218677214e-07, "loss": 0.6432, "step": 8093 }, { "epoch": 0.84, "grad_norm": 2.0043189581847036, "learning_rate": 6.455323232755095e-07, "loss": 0.6517, "step": 8094 }, { "epoch": 0.84, "grad_norm": 2.135564020927073, "learning_rate": 6.447052182870284e-07, "loss": 0.642, "step": 8095 }, { "epoch": 0.84, "grad_norm": 1.9084088820683192, "learning_rate": 6.438786069960345e-07, "loss": 0.6292, "step": 8096 }, { "epoch": 0.84, "grad_norm": 1.9232430703411438, "learning_rate": 6.430524894962292e-07, "loss": 0.6953, "step": 8097 }, { "epoch": 0.84, "grad_norm": 1.9925604766847647, "learning_rate": 6.422268658812591e-07, "loss": 0.511, "step": 8098 }, { "epoch": 0.84, "grad_norm": 2.0022550095449083, "learning_rate": 6.414017362447106e-07, "loss": 0.7423, "step": 8099 }, { "epoch": 0.84, "grad_norm": 1.761768263429008, "learning_rate": 6.405771006801198e-07, "loss": 0.5593, "step": 8100 }, { "epoch": 0.84, "grad_norm": 1.8505005255126203, "learning_rate": 6.397529592809615e-07, "loss": 0.6013, "step": 8101 }, { "epoch": 0.84, "grad_norm": 2.0629170850220375, "learning_rate": 6.389293121406592e-07, "loss": 0.6523, "step": 8102 }, { "epoch": 0.84, "grad_norm": 1.8557315352311599, "learning_rate": 6.381061593525762e-07, "loss": 0.6192, "step": 8103 }, { "epoch": 0.84, "grad_norm": 1.8596773536895244, "learning_rate": 6.372835010100215e-07, "loss": 0.5742, "step": 8104 }, { "epoch": 0.84, "grad_norm": 1.8124890471220407, "learning_rate": 6.364613372062489e-07, "loss": 0.5756, "step": 8105 }, { "epoch": 0.84, "grad_norm": 1.9876410029659157, "learning_rate": 6.356396680344556e-07, "loss": 0.6143, "step": 8106 }, { "epoch": 0.84, "grad_norm": 1.8949001243139065, "learning_rate": 6.34818493587781e-07, "loss": 0.6156, "step": 8107 }, { "epoch": 0.84, "grad_norm": 2.449001211705069, "learning_rate": 6.339978139593117e-07, "loss": 0.6886, "step": 8108 }, { "epoch": 0.84, "grad_norm": 1.9631147045408033, "learning_rate": 6.331776292420744e-07, "loss": 0.6128, "step": 8109 }, { "epoch": 0.84, "grad_norm": 1.9368516611472004, "learning_rate": 6.323579395290435e-07, "loss": 0.5734, "step": 8110 }, { "epoch": 0.84, "grad_norm": 1.8401589565954515, "learning_rate": 6.315387449131355e-07, "loss": 0.6432, "step": 8111 }, { "epoch": 0.84, "grad_norm": 1.8297334426235043, "learning_rate": 6.307200454872093e-07, "loss": 0.5988, "step": 8112 }, { "epoch": 0.84, "grad_norm": 1.703991110615374, "learning_rate": 6.299018413440705e-07, "loss": 0.5624, "step": 8113 }, { "epoch": 0.84, "grad_norm": 2.036173943589799, "learning_rate": 6.290841325764662e-07, "loss": 0.6786, "step": 8114 }, { "epoch": 0.84, "grad_norm": 1.9534205152629416, "learning_rate": 6.282669192770896e-07, "loss": 0.6305, "step": 8115 }, { "epoch": 0.84, "grad_norm": 2.203379643878614, "learning_rate": 6.274502015385747e-07, "loss": 0.61, "step": 8116 }, { "epoch": 0.84, "grad_norm": 2.0103680975969453, "learning_rate": 6.266339794535043e-07, "loss": 0.6402, "step": 8117 }, { "epoch": 0.84, "grad_norm": 1.916711741876715, "learning_rate": 6.258182531143975e-07, "loss": 0.56, "step": 8118 }, { "epoch": 0.84, "grad_norm": 1.941936713481425, "learning_rate": 6.250030226137249e-07, "loss": 0.6702, "step": 8119 }, { "epoch": 0.84, "grad_norm": 2.0225054472897175, "learning_rate": 6.241882880438949e-07, "loss": 0.6005, "step": 8120 }, { "epoch": 0.84, "grad_norm": 1.8856948884191638, "learning_rate": 6.233740494972651e-07, "loss": 0.5536, "step": 8121 }, { "epoch": 0.84, "grad_norm": 1.9042258310577747, "learning_rate": 6.225603070661318e-07, "loss": 0.5368, "step": 8122 }, { "epoch": 0.84, "grad_norm": 2.004419780870736, "learning_rate": 6.217470608427395e-07, "loss": 0.5644, "step": 8123 }, { "epoch": 0.84, "grad_norm": 1.9291607571745164, "learning_rate": 6.209343109192728e-07, "loss": 0.5955, "step": 8124 }, { "epoch": 0.84, "grad_norm": 2.125008453554657, "learning_rate": 6.201220573878613e-07, "loss": 0.694, "step": 8125 }, { "epoch": 0.84, "grad_norm": 2.0979216517156836, "learning_rate": 6.193103003405787e-07, "loss": 0.7127, "step": 8126 }, { "epoch": 0.84, "grad_norm": 2.134679203517581, "learning_rate": 6.184990398694435e-07, "loss": 0.6655, "step": 8127 }, { "epoch": 0.84, "grad_norm": 1.8835391406410493, "learning_rate": 6.176882760664149e-07, "loss": 0.545, "step": 8128 }, { "epoch": 0.85, "grad_norm": 1.9960800018697822, "learning_rate": 6.168780090233994e-07, "loss": 0.6728, "step": 8129 }, { "epoch": 0.85, "grad_norm": 2.419923774205455, "learning_rate": 6.160682388322436e-07, "loss": 0.5715, "step": 8130 }, { "epoch": 0.85, "grad_norm": 2.0042116275814434, "learning_rate": 6.152589655847413e-07, "loss": 0.5711, "step": 8131 }, { "epoch": 0.85, "grad_norm": 1.9220772781429358, "learning_rate": 6.14450189372628e-07, "loss": 0.5321, "step": 8132 }, { "epoch": 0.85, "grad_norm": 2.015869273943935, "learning_rate": 6.136419102875818e-07, "loss": 0.6413, "step": 8133 }, { "epoch": 0.85, "grad_norm": 1.7662095296346332, "learning_rate": 6.128341284212258e-07, "loss": 0.5262, "step": 8134 }, { "epoch": 0.85, "grad_norm": 2.303831477540973, "learning_rate": 6.120268438651283e-07, "loss": 0.7143, "step": 8135 }, { "epoch": 0.85, "grad_norm": 2.3042755907621832, "learning_rate": 6.112200567107978e-07, "loss": 0.7399, "step": 8136 }, { "epoch": 0.85, "grad_norm": 2.071953461037566, "learning_rate": 6.104137670496901e-07, "loss": 0.5517, "step": 8137 }, { "epoch": 0.85, "grad_norm": 1.9353300309979724, "learning_rate": 6.096079749732009e-07, "loss": 0.6453, "step": 8138 }, { "epoch": 0.85, "grad_norm": 1.986671989877575, "learning_rate": 6.088026805726727e-07, "loss": 0.6424, "step": 8139 }, { "epoch": 0.85, "grad_norm": 1.9911146195693263, "learning_rate": 6.079978839393896e-07, "loss": 0.5474, "step": 8140 }, { "epoch": 0.85, "grad_norm": 2.0317851662272974, "learning_rate": 6.071935851645794e-07, "loss": 0.5149, "step": 8141 }, { "epoch": 0.85, "grad_norm": 1.9665291558339688, "learning_rate": 6.063897843394151e-07, "loss": 0.6124, "step": 8142 }, { "epoch": 0.85, "grad_norm": 1.7078468937680698, "learning_rate": 6.055864815550106e-07, "loss": 0.5798, "step": 8143 }, { "epoch": 0.85, "grad_norm": 1.5752220742547929, "learning_rate": 6.047836769024268e-07, "loss": 0.5167, "step": 8144 }, { "epoch": 0.85, "grad_norm": 1.9074047548237727, "learning_rate": 6.03981370472665e-07, "loss": 0.6603, "step": 8145 }, { "epoch": 0.85, "grad_norm": 2.1307330401924562, "learning_rate": 6.031795623566705e-07, "loss": 0.6601, "step": 8146 }, { "epoch": 0.85, "grad_norm": 1.987581419382653, "learning_rate": 6.023782526453347e-07, "loss": 0.5877, "step": 8147 }, { "epoch": 0.85, "grad_norm": 1.9220353371587902, "learning_rate": 6.015774414294894e-07, "loss": 0.5671, "step": 8148 }, { "epoch": 0.85, "grad_norm": 1.8531354376553826, "learning_rate": 6.007771287999104e-07, "loss": 0.6311, "step": 8149 }, { "epoch": 0.85, "grad_norm": 2.0038979840409037, "learning_rate": 5.999773148473193e-07, "loss": 0.5818, "step": 8150 }, { "epoch": 0.85, "grad_norm": 1.7687166304424737, "learning_rate": 5.991779996623781e-07, "loss": 0.6035, "step": 8151 }, { "epoch": 0.85, "grad_norm": 2.126672984023645, "learning_rate": 5.983791833356955e-07, "loss": 0.5741, "step": 8152 }, { "epoch": 0.85, "grad_norm": 1.7741173112022408, "learning_rate": 5.975808659578197e-07, "loss": 0.5839, "step": 8153 }, { "epoch": 0.85, "grad_norm": 2.0262142221276727, "learning_rate": 5.967830476192476e-07, "loss": 0.5856, "step": 8154 }, { "epoch": 0.85, "grad_norm": 1.8676247618096367, "learning_rate": 5.959857284104132e-07, "loss": 0.6758, "step": 8155 }, { "epoch": 0.85, "grad_norm": 2.059672622044571, "learning_rate": 5.951889084216989e-07, "loss": 0.4721, "step": 8156 }, { "epoch": 0.85, "grad_norm": 2.1535823631847846, "learning_rate": 5.943925877434276e-07, "loss": 0.6405, "step": 8157 }, { "epoch": 0.85, "grad_norm": 2.1292794880898303, "learning_rate": 5.935967664658682e-07, "loss": 0.5985, "step": 8158 }, { "epoch": 0.85, "grad_norm": 1.960817142560252, "learning_rate": 5.928014446792308e-07, "loss": 0.5587, "step": 8159 }, { "epoch": 0.85, "grad_norm": 1.8036953737602626, "learning_rate": 5.920066224736703e-07, "loss": 0.5876, "step": 8160 }, { "epoch": 0.85, "grad_norm": 2.017494671286078, "learning_rate": 5.912122999392838e-07, "loss": 0.6468, "step": 8161 }, { "epoch": 0.85, "grad_norm": 1.9745326076384333, "learning_rate": 5.904184771661126e-07, "loss": 0.6042, "step": 8162 }, { "epoch": 0.85, "grad_norm": 2.115718656081801, "learning_rate": 5.896251542441395e-07, "loss": 0.6424, "step": 8163 }, { "epoch": 0.85, "grad_norm": 2.152383828453354, "learning_rate": 5.888323312632948e-07, "loss": 0.5845, "step": 8164 }, { "epoch": 0.85, "grad_norm": 1.8618214723348339, "learning_rate": 5.880400083134469e-07, "loss": 0.5711, "step": 8165 }, { "epoch": 0.85, "grad_norm": 1.9004306913700046, "learning_rate": 5.872481854844126e-07, "loss": 0.6468, "step": 8166 }, { "epoch": 0.85, "grad_norm": 2.079916826005283, "learning_rate": 5.864568628659473e-07, "loss": 0.5972, "step": 8167 }, { "epoch": 0.85, "grad_norm": 1.9150425887906342, "learning_rate": 5.856660405477538e-07, "loss": 0.5951, "step": 8168 }, { "epoch": 0.85, "grad_norm": 1.9768982180845163, "learning_rate": 5.848757186194753e-07, "loss": 0.661, "step": 8169 }, { "epoch": 0.85, "grad_norm": 2.0685273523691246, "learning_rate": 5.840858971707003e-07, "loss": 0.6187, "step": 8170 }, { "epoch": 0.85, "grad_norm": 2.20740535387764, "learning_rate": 5.83296576290957e-07, "loss": 0.61, "step": 8171 }, { "epoch": 0.85, "grad_norm": 2.077062544429413, "learning_rate": 5.825077560697224e-07, "loss": 0.6323, "step": 8172 }, { "epoch": 0.85, "grad_norm": 2.2229980382125083, "learning_rate": 5.817194365964113e-07, "loss": 0.6538, "step": 8173 }, { "epoch": 0.85, "grad_norm": 2.0776525637885204, "learning_rate": 5.809316179603863e-07, "loss": 0.6899, "step": 8174 }, { "epoch": 0.85, "grad_norm": 2.060688398838161, "learning_rate": 5.801443002509493e-07, "loss": 0.6166, "step": 8175 }, { "epoch": 0.85, "grad_norm": 1.9882609174919457, "learning_rate": 5.793574835573495e-07, "loss": 0.6165, "step": 8176 }, { "epoch": 0.85, "grad_norm": 2.1539749506952197, "learning_rate": 5.785711679687756e-07, "loss": 0.6126, "step": 8177 }, { "epoch": 0.85, "grad_norm": 2.009397380360969, "learning_rate": 5.777853535743605e-07, "loss": 0.539, "step": 8178 }, { "epoch": 0.85, "grad_norm": 1.701887467794953, "learning_rate": 5.770000404631815e-07, "loss": 0.5223, "step": 8179 }, { "epoch": 0.85, "grad_norm": 1.8337089423095534, "learning_rate": 5.762152287242578e-07, "loss": 0.523, "step": 8180 }, { "epoch": 0.85, "grad_norm": 2.190954607822149, "learning_rate": 5.754309184465534e-07, "loss": 0.5291, "step": 8181 }, { "epoch": 0.85, "grad_norm": 1.9339239124719618, "learning_rate": 5.746471097189727e-07, "loss": 0.6148, "step": 8182 }, { "epoch": 0.85, "grad_norm": 1.8286911849784362, "learning_rate": 5.738638026303672e-07, "loss": 0.6589, "step": 8183 }, { "epoch": 0.85, "grad_norm": 1.763185449223713, "learning_rate": 5.730809972695272e-07, "loss": 0.6251, "step": 8184 }, { "epoch": 0.85, "grad_norm": 2.144097951689894, "learning_rate": 5.72298693725189e-07, "loss": 0.6271, "step": 8185 }, { "epoch": 0.85, "grad_norm": 2.1576327396917168, "learning_rate": 5.715168920860298e-07, "loss": 0.5955, "step": 8186 }, { "epoch": 0.85, "grad_norm": 2.2777450538296677, "learning_rate": 5.707355924406738e-07, "loss": 0.7751, "step": 8187 }, { "epoch": 0.85, "grad_norm": 2.2047094872367006, "learning_rate": 5.699547948776829e-07, "loss": 0.6282, "step": 8188 }, { "epoch": 0.85, "grad_norm": 1.9091741539508933, "learning_rate": 5.691744994855675e-07, "loss": 0.6249, "step": 8189 }, { "epoch": 0.85, "grad_norm": 2.021439937726755, "learning_rate": 5.683947063527762e-07, "loss": 0.565, "step": 8190 }, { "epoch": 0.85, "grad_norm": 1.729258043483863, "learning_rate": 5.676154155677066e-07, "loss": 0.5358, "step": 8191 }, { "epoch": 0.85, "grad_norm": 1.929628451697072, "learning_rate": 5.668366272186915e-07, "loss": 0.5293, "step": 8192 }, { "epoch": 0.85, "grad_norm": 1.8951688227059889, "learning_rate": 5.660583413940135e-07, "loss": 0.6399, "step": 8193 }, { "epoch": 0.85, "grad_norm": 2.0138368029617912, "learning_rate": 5.652805581818943e-07, "loss": 0.5991, "step": 8194 }, { "epoch": 0.85, "grad_norm": 1.9355511215261672, "learning_rate": 5.645032776705023e-07, "loss": 0.5545, "step": 8195 }, { "epoch": 0.85, "grad_norm": 2.068165286932841, "learning_rate": 5.637264999479436e-07, "loss": 0.6252, "step": 8196 }, { "epoch": 0.85, "grad_norm": 1.8040737962816091, "learning_rate": 5.629502251022734e-07, "loss": 0.6947, "step": 8197 }, { "epoch": 0.85, "grad_norm": 2.1167954493638685, "learning_rate": 5.621744532214856e-07, "loss": 0.6703, "step": 8198 }, { "epoch": 0.85, "grad_norm": 1.8845230887915438, "learning_rate": 5.613991843935179e-07, "loss": 0.6548, "step": 8199 }, { "epoch": 0.85, "grad_norm": 1.8029172725787435, "learning_rate": 5.606244187062509e-07, "loss": 0.6001, "step": 8200 }, { "epoch": 0.85, "grad_norm": 1.9588614842084962, "learning_rate": 5.598501562475111e-07, "loss": 0.5883, "step": 8201 }, { "epoch": 0.85, "grad_norm": 1.7162881782665964, "learning_rate": 5.590763971050628e-07, "loss": 0.5193, "step": 8202 }, { "epoch": 0.85, "grad_norm": 2.048313840022909, "learning_rate": 5.583031413666185e-07, "loss": 0.6895, "step": 8203 }, { "epoch": 0.85, "grad_norm": 1.9457017560438867, "learning_rate": 5.575303891198286e-07, "loss": 0.5658, "step": 8204 }, { "epoch": 0.85, "grad_norm": 1.880442705012502, "learning_rate": 5.567581404522914e-07, "loss": 0.5756, "step": 8205 }, { "epoch": 0.85, "grad_norm": 1.8335074588378506, "learning_rate": 5.559863954515448e-07, "loss": 0.554, "step": 8206 }, { "epoch": 0.85, "grad_norm": 1.957864056813018, "learning_rate": 5.552151542050699e-07, "loss": 0.7151, "step": 8207 }, { "epoch": 0.85, "grad_norm": 2.0410162015627153, "learning_rate": 5.54444416800291e-07, "loss": 0.6489, "step": 8208 }, { "epoch": 0.85, "grad_norm": 1.966686920765713, "learning_rate": 5.536741833245773e-07, "loss": 0.5739, "step": 8209 }, { "epoch": 0.85, "grad_norm": 2.097614872666767, "learning_rate": 5.529044538652373e-07, "loss": 0.6079, "step": 8210 }, { "epoch": 0.85, "grad_norm": 1.8126670428732163, "learning_rate": 5.521352285095261e-07, "loss": 0.5025, "step": 8211 }, { "epoch": 0.85, "grad_norm": 1.9132465488593897, "learning_rate": 5.513665073446372e-07, "loss": 0.6297, "step": 8212 }, { "epoch": 0.85, "grad_norm": 1.9350220871821595, "learning_rate": 5.505982904577123e-07, "loss": 0.615, "step": 8213 }, { "epoch": 0.85, "grad_norm": 2.1347049899077284, "learning_rate": 5.49830577935832e-07, "loss": 0.5208, "step": 8214 }, { "epoch": 0.85, "grad_norm": 1.8637329068323893, "learning_rate": 5.490633698660197e-07, "loss": 0.5808, "step": 8215 }, { "epoch": 0.85, "grad_norm": 1.948838872573589, "learning_rate": 5.482966663352451e-07, "loss": 0.621, "step": 8216 }, { "epoch": 0.85, "grad_norm": 2.0493579656488325, "learning_rate": 5.47530467430416e-07, "loss": 0.622, "step": 8217 }, { "epoch": 0.85, "grad_norm": 1.918223916486134, "learning_rate": 5.467647732383879e-07, "loss": 0.608, "step": 8218 }, { "epoch": 0.85, "grad_norm": 1.7803045728773015, "learning_rate": 5.459995838459542e-07, "loss": 0.5959, "step": 8219 }, { "epoch": 0.85, "grad_norm": 1.9266062790106375, "learning_rate": 5.452348993398566e-07, "loss": 0.5599, "step": 8220 }, { "epoch": 0.85, "grad_norm": 1.7707431361868795, "learning_rate": 5.444707198067722e-07, "loss": 0.6657, "step": 8221 }, { "epoch": 0.85, "grad_norm": 1.8932789462090363, "learning_rate": 5.437070453333288e-07, "loss": 0.6384, "step": 8222 }, { "epoch": 0.85, "grad_norm": 2.236832426733744, "learning_rate": 5.429438760060906e-07, "loss": 0.5938, "step": 8223 }, { "epoch": 0.85, "grad_norm": 1.9219044021119076, "learning_rate": 5.421812119115699e-07, "loss": 0.6044, "step": 8224 }, { "epoch": 0.85, "grad_norm": 2.2466433945686757, "learning_rate": 5.414190531362162e-07, "loss": 0.6077, "step": 8225 }, { "epoch": 0.86, "grad_norm": 1.8069595964244884, "learning_rate": 5.406573997664267e-07, "loss": 0.5446, "step": 8226 }, { "epoch": 0.86, "grad_norm": 1.8885534423348074, "learning_rate": 5.398962518885375e-07, "loss": 0.5408, "step": 8227 }, { "epoch": 0.86, "grad_norm": 2.093745092026466, "learning_rate": 5.391356095888323e-07, "loss": 0.6371, "step": 8228 }, { "epoch": 0.86, "grad_norm": 1.9475019077484441, "learning_rate": 5.38375472953529e-07, "loss": 0.6218, "step": 8229 }, { "epoch": 0.86, "grad_norm": 2.050764295266713, "learning_rate": 5.376158420687977e-07, "loss": 0.7155, "step": 8230 }, { "epoch": 0.86, "grad_norm": 2.039172342224332, "learning_rate": 5.368567170207445e-07, "loss": 0.6407, "step": 8231 }, { "epoch": 0.86, "grad_norm": 1.9356989051958875, "learning_rate": 5.360980978954223e-07, "loss": 0.5751, "step": 8232 }, { "epoch": 0.86, "grad_norm": 1.908719869422782, "learning_rate": 5.353399847788233e-07, "loss": 0.6832, "step": 8233 }, { "epoch": 0.86, "grad_norm": 1.9156768167886171, "learning_rate": 5.345823777568859e-07, "loss": 0.6937, "step": 8234 }, { "epoch": 0.86, "grad_norm": 2.094528279582242, "learning_rate": 5.338252769154878e-07, "loss": 0.5975, "step": 8235 }, { "epoch": 0.86, "grad_norm": 1.9826801963733196, "learning_rate": 5.330686823404507e-07, "loss": 0.5109, "step": 8236 }, { "epoch": 0.86, "grad_norm": 2.1642191778237656, "learning_rate": 5.323125941175383e-07, "loss": 0.6296, "step": 8237 }, { "epoch": 0.86, "grad_norm": 2.1050313683713204, "learning_rate": 5.315570123324593e-07, "loss": 0.5841, "step": 8238 }, { "epoch": 0.86, "grad_norm": 1.86255900999276, "learning_rate": 5.308019370708612e-07, "loss": 0.5258, "step": 8239 }, { "epoch": 0.86, "grad_norm": 2.0814563238442214, "learning_rate": 5.300473684183382e-07, "loss": 0.6346, "step": 8240 }, { "epoch": 0.86, "grad_norm": 2.004366161593308, "learning_rate": 5.292933064604228e-07, "loss": 0.6448, "step": 8241 }, { "epoch": 0.86, "grad_norm": 2.0814251969657196, "learning_rate": 5.28539751282594e-07, "loss": 0.7272, "step": 8242 }, { "epoch": 0.86, "grad_norm": 1.8919312569321771, "learning_rate": 5.277867029702716e-07, "loss": 0.5276, "step": 8243 }, { "epoch": 0.86, "grad_norm": 1.9044934817666754, "learning_rate": 5.270341616088153e-07, "loss": 0.5631, "step": 8244 }, { "epoch": 0.86, "grad_norm": 1.8961844755275268, "learning_rate": 5.262821272835334e-07, "loss": 0.5554, "step": 8245 }, { "epoch": 0.86, "grad_norm": 2.1041404998731177, "learning_rate": 5.255306000796717e-07, "loss": 0.5704, "step": 8246 }, { "epoch": 0.86, "grad_norm": 2.039862049268198, "learning_rate": 5.24779580082419e-07, "loss": 0.5524, "step": 8247 }, { "epoch": 0.86, "grad_norm": 1.8011189836643542, "learning_rate": 5.240290673769099e-07, "loss": 0.5643, "step": 8248 }, { "epoch": 0.86, "grad_norm": 1.7674719230033473, "learning_rate": 5.23279062048217e-07, "loss": 0.5861, "step": 8249 }, { "epoch": 0.86, "grad_norm": 1.6875423096614102, "learning_rate": 5.225295641813599e-07, "loss": 0.5266, "step": 8250 }, { "epoch": 0.86, "grad_norm": 2.1684829276074384, "learning_rate": 5.217805738612975e-07, "loss": 0.563, "step": 8251 }, { "epoch": 0.86, "grad_norm": 2.008210226253471, "learning_rate": 5.210320911729311e-07, "loss": 0.5562, "step": 8252 }, { "epoch": 0.86, "grad_norm": 2.146514487983114, "learning_rate": 5.202841162011074e-07, "loss": 0.6285, "step": 8253 }, { "epoch": 0.86, "grad_norm": 2.070368443828562, "learning_rate": 5.195366490306114e-07, "loss": 0.5777, "step": 8254 }, { "epoch": 0.86, "grad_norm": 1.9585353530387317, "learning_rate": 5.187896897461752e-07, "loss": 0.5651, "step": 8255 }, { "epoch": 0.86, "grad_norm": 2.179156757136343, "learning_rate": 5.180432384324691e-07, "loss": 0.6963, "step": 8256 }, { "epoch": 0.86, "grad_norm": 1.9217899295161627, "learning_rate": 5.172972951741096e-07, "loss": 0.5194, "step": 8257 }, { "epoch": 0.86, "grad_norm": 1.7949492707964125, "learning_rate": 5.165518600556507e-07, "loss": 0.4705, "step": 8258 }, { "epoch": 0.86, "grad_norm": 2.089497265943696, "learning_rate": 5.158069331615939e-07, "loss": 0.6488, "step": 8259 }, { "epoch": 0.86, "grad_norm": 1.925905784209994, "learning_rate": 5.150625145763794e-07, "loss": 0.523, "step": 8260 }, { "epoch": 0.86, "grad_norm": 1.8012988786439579, "learning_rate": 5.143186043843934e-07, "loss": 0.5625, "step": 8261 }, { "epoch": 0.86, "grad_norm": 2.073140541720193, "learning_rate": 5.135752026699597e-07, "loss": 0.776, "step": 8262 }, { "epoch": 0.86, "grad_norm": 2.0938632113238547, "learning_rate": 5.128323095173498e-07, "loss": 0.6787, "step": 8263 }, { "epoch": 0.86, "grad_norm": 1.8505470192539382, "learning_rate": 5.12089925010773e-07, "loss": 0.6027, "step": 8264 }, { "epoch": 0.86, "grad_norm": 1.9385142627925132, "learning_rate": 5.113480492343847e-07, "loss": 0.5588, "step": 8265 }, { "epoch": 0.86, "grad_norm": 2.375733866717919, "learning_rate": 5.106066822722782e-07, "loss": 0.66, "step": 8266 }, { "epoch": 0.86, "grad_norm": 1.9370422187030516, "learning_rate": 5.098658242084937e-07, "loss": 0.6152, "step": 8267 }, { "epoch": 0.86, "grad_norm": 1.9627422041290383, "learning_rate": 5.091254751270097e-07, "loss": 0.6244, "step": 8268 }, { "epoch": 0.86, "grad_norm": 1.9673460760767303, "learning_rate": 5.083856351117511e-07, "loss": 0.6486, "step": 8269 }, { "epoch": 0.86, "grad_norm": 1.9677413688817145, "learning_rate": 5.076463042465812e-07, "loss": 0.6466, "step": 8270 }, { "epoch": 0.86, "grad_norm": 1.9076042255087213, "learning_rate": 5.069074826153097e-07, "loss": 0.7091, "step": 8271 }, { "epoch": 0.86, "grad_norm": 2.1522781967814955, "learning_rate": 5.061691703016841e-07, "loss": 0.5958, "step": 8272 }, { "epoch": 0.86, "grad_norm": 1.9512598833988453, "learning_rate": 5.054313673893979e-07, "loss": 0.6494, "step": 8273 }, { "epoch": 0.86, "grad_norm": 1.8889340369572525, "learning_rate": 5.046940739620826e-07, "loss": 0.5436, "step": 8274 }, { "epoch": 0.86, "grad_norm": 2.1089818787586467, "learning_rate": 5.039572901033179e-07, "loss": 0.6802, "step": 8275 }, { "epoch": 0.86, "grad_norm": 1.9862877189958716, "learning_rate": 5.0322101589662e-07, "loss": 0.6129, "step": 8276 }, { "epoch": 0.86, "grad_norm": 1.8141324404301526, "learning_rate": 5.024852514254513e-07, "loss": 0.5729, "step": 8277 }, { "epoch": 0.86, "grad_norm": 2.0791012063858805, "learning_rate": 5.017499967732137e-07, "loss": 0.6168, "step": 8278 }, { "epoch": 0.86, "grad_norm": 2.2094091772410938, "learning_rate": 5.010152520232536e-07, "loss": 0.6785, "step": 8279 }, { "epoch": 0.86, "grad_norm": 2.0704609733536175, "learning_rate": 5.002810172588584e-07, "loss": 0.6584, "step": 8280 }, { "epoch": 0.86, "grad_norm": 1.9853255752421886, "learning_rate": 4.995472925632567e-07, "loss": 0.6358, "step": 8281 }, { "epoch": 0.86, "grad_norm": 1.9531326047376758, "learning_rate": 4.988140780196221e-07, "loss": 0.652, "step": 8282 }, { "epoch": 0.86, "grad_norm": 2.057213242161284, "learning_rate": 4.980813737110662e-07, "loss": 0.601, "step": 8283 }, { "epoch": 0.86, "grad_norm": 2.105331445169244, "learning_rate": 4.973491797206481e-07, "loss": 0.5704, "step": 8284 }, { "epoch": 0.86, "grad_norm": 1.9364053789589217, "learning_rate": 4.966174961313646e-07, "loss": 0.6009, "step": 8285 }, { "epoch": 0.86, "grad_norm": 2.0882062233429544, "learning_rate": 4.958863230261551e-07, "loss": 0.5458, "step": 8286 }, { "epoch": 0.86, "grad_norm": 1.9799264378167545, "learning_rate": 4.951556604879049e-07, "loss": 0.582, "step": 8287 }, { "epoch": 0.86, "grad_norm": 2.021495582372774, "learning_rate": 4.94425508599437e-07, "loss": 0.5471, "step": 8288 }, { "epoch": 0.86, "grad_norm": 1.8753911264845755, "learning_rate": 4.936958674435178e-07, "loss": 0.6074, "step": 8289 }, { "epoch": 0.86, "grad_norm": 2.066023846671249, "learning_rate": 4.929667371028579e-07, "loss": 0.6597, "step": 8290 }, { "epoch": 0.86, "grad_norm": 2.150677404952387, "learning_rate": 4.922381176601066e-07, "loss": 0.5113, "step": 8291 }, { "epoch": 0.86, "grad_norm": 2.054898643812894, "learning_rate": 4.915100091978591e-07, "loss": 0.6124, "step": 8292 }, { "epoch": 0.86, "grad_norm": 1.6742532360960376, "learning_rate": 4.907824117986487e-07, "loss": 0.5931, "step": 8293 }, { "epoch": 0.86, "grad_norm": 1.8695315925969587, "learning_rate": 4.900553255449553e-07, "loss": 0.5876, "step": 8294 }, { "epoch": 0.86, "grad_norm": 1.8359415649511754, "learning_rate": 4.893287505191946e-07, "loss": 0.5155, "step": 8295 }, { "epoch": 0.86, "grad_norm": 2.06769544820391, "learning_rate": 4.886026868037313e-07, "loss": 0.5753, "step": 8296 }, { "epoch": 0.86, "grad_norm": 2.0138686917169335, "learning_rate": 4.878771344808664e-07, "loss": 0.5813, "step": 8297 }, { "epoch": 0.86, "grad_norm": 1.9528925103015966, "learning_rate": 4.871520936328478e-07, "loss": 0.5845, "step": 8298 }, { "epoch": 0.86, "grad_norm": 1.8439522634772305, "learning_rate": 4.864275643418603e-07, "loss": 0.502, "step": 8299 }, { "epoch": 0.86, "grad_norm": 2.2204098320288224, "learning_rate": 4.857035466900361e-07, "loss": 0.6992, "step": 8300 }, { "epoch": 0.86, "grad_norm": 2.3494412708615866, "learning_rate": 4.849800407594446e-07, "loss": 0.5811, "step": 8301 }, { "epoch": 0.86, "grad_norm": 2.3615193836986235, "learning_rate": 4.842570466321023e-07, "loss": 0.6805, "step": 8302 }, { "epoch": 0.86, "grad_norm": 1.9959876511710395, "learning_rate": 4.835345643899609e-07, "loss": 0.6178, "step": 8303 }, { "epoch": 0.86, "grad_norm": 2.1046472978265878, "learning_rate": 4.828125941149197e-07, "loss": 0.524, "step": 8304 }, { "epoch": 0.86, "grad_norm": 1.8352720574795676, "learning_rate": 4.820911358888181e-07, "loss": 0.6001, "step": 8305 }, { "epoch": 0.86, "grad_norm": 1.7599800381591997, "learning_rate": 4.813701897934375e-07, "loss": 0.5603, "step": 8306 }, { "epoch": 0.86, "grad_norm": 2.268456220684496, "learning_rate": 4.806497559105011e-07, "loss": 0.5543, "step": 8307 }, { "epoch": 0.86, "grad_norm": 1.8005414284190293, "learning_rate": 4.799298343216746e-07, "loss": 0.6011, "step": 8308 }, { "epoch": 0.86, "grad_norm": 1.925175771444624, "learning_rate": 4.792104251085655e-07, "loss": 0.5503, "step": 8309 }, { "epoch": 0.86, "grad_norm": 2.3450122517710894, "learning_rate": 4.784915283527219e-07, "loss": 0.6776, "step": 8310 }, { "epoch": 0.86, "grad_norm": 2.1628264585537806, "learning_rate": 4.777731441356342e-07, "loss": 0.5846, "step": 8311 }, { "epoch": 0.86, "grad_norm": 1.6450519414376317, "learning_rate": 4.770552725387378e-07, "loss": 0.5043, "step": 8312 }, { "epoch": 0.86, "grad_norm": 1.7482690149775968, "learning_rate": 4.763379136434054e-07, "loss": 0.6179, "step": 8313 }, { "epoch": 0.86, "grad_norm": 1.9801836667225938, "learning_rate": 4.7562106753095527e-07, "loss": 0.5958, "step": 8314 }, { "epoch": 0.86, "grad_norm": 1.9733287042306242, "learning_rate": 4.7490473428264406e-07, "loss": 0.7162, "step": 8315 }, { "epoch": 0.86, "grad_norm": 1.8725031647315058, "learning_rate": 4.741889139796746e-07, "loss": 0.5833, "step": 8316 }, { "epoch": 0.86, "grad_norm": 1.799784467033496, "learning_rate": 4.7347360670318756e-07, "loss": 0.6006, "step": 8317 }, { "epoch": 0.86, "grad_norm": 1.9366110237287402, "learning_rate": 4.727588125342669e-07, "loss": 0.5255, "step": 8318 }, { "epoch": 0.86, "grad_norm": 2.234824200192645, "learning_rate": 4.7204453155394013e-07, "loss": 0.6816, "step": 8319 }, { "epoch": 0.86, "grad_norm": 2.5122506337921116, "learning_rate": 4.7133076384317354e-07, "loss": 0.6069, "step": 8320 }, { "epoch": 0.86, "grad_norm": 2.119122687155672, "learning_rate": 4.70617509482878e-07, "loss": 0.6328, "step": 8321 }, { "epoch": 0.87, "grad_norm": 2.004192201331832, "learning_rate": 4.699047685539038e-07, "loss": 0.5006, "step": 8322 }, { "epoch": 0.87, "grad_norm": 1.916590258992686, "learning_rate": 4.6919254113704515e-07, "loss": 0.5811, "step": 8323 }, { "epoch": 0.87, "grad_norm": 1.6538686628596007, "learning_rate": 4.68480827313037e-07, "loss": 0.5711, "step": 8324 }, { "epoch": 0.87, "grad_norm": 1.6500225555034003, "learning_rate": 4.6776962716255593e-07, "loss": 0.5336, "step": 8325 }, { "epoch": 0.87, "grad_norm": 1.9802573141561102, "learning_rate": 4.670589407662196e-07, "loss": 0.5819, "step": 8326 }, { "epoch": 0.87, "grad_norm": 2.0088327902438525, "learning_rate": 4.663487682045903e-07, "loss": 0.5182, "step": 8327 }, { "epoch": 0.87, "grad_norm": 2.017583462703393, "learning_rate": 4.656391095581675e-07, "loss": 0.6915, "step": 8328 }, { "epoch": 0.87, "grad_norm": 2.1208095680699808, "learning_rate": 4.6492996490739796e-07, "loss": 0.6893, "step": 8329 }, { "epoch": 0.87, "grad_norm": 1.9534148806244596, "learning_rate": 4.6422133433266513e-07, "loss": 0.5895, "step": 8330 }, { "epoch": 0.87, "grad_norm": 1.8000895668108274, "learning_rate": 4.6351321791429924e-07, "loss": 0.5725, "step": 8331 }, { "epoch": 0.87, "grad_norm": 1.9340943599774802, "learning_rate": 4.62805615732565e-07, "loss": 0.6447, "step": 8332 }, { "epoch": 0.87, "grad_norm": 1.8956460910814394, "learning_rate": 4.6209852786767593e-07, "loss": 0.5395, "step": 8333 }, { "epoch": 0.87, "grad_norm": 1.709032151810743, "learning_rate": 4.613919543997836e-07, "loss": 0.5488, "step": 8334 }, { "epoch": 0.87, "grad_norm": 2.113498349240345, "learning_rate": 4.606858954089827e-07, "loss": 0.6202, "step": 8335 }, { "epoch": 0.87, "grad_norm": 2.034226927445555, "learning_rate": 4.599803509753081e-07, "loss": 0.6481, "step": 8336 }, { "epoch": 0.87, "grad_norm": 1.853159457928419, "learning_rate": 4.592753211787393e-07, "loss": 0.6445, "step": 8337 }, { "epoch": 0.87, "grad_norm": 2.1242559862625416, "learning_rate": 4.585708060991928e-07, "loss": 0.6081, "step": 8338 }, { "epoch": 0.87, "grad_norm": 2.2173927759277388, "learning_rate": 4.578668058165325e-07, "loss": 0.6526, "step": 8339 }, { "epoch": 0.87, "grad_norm": 1.9183380234550136, "learning_rate": 4.571633204105574e-07, "loss": 0.5716, "step": 8340 }, { "epoch": 0.87, "grad_norm": 1.852559147624212, "learning_rate": 4.564603499610143e-07, "loss": 0.6098, "step": 8341 }, { "epoch": 0.87, "grad_norm": 1.9970457306785048, "learning_rate": 4.5575789454758656e-07, "loss": 0.6656, "step": 8342 }, { "epoch": 0.87, "grad_norm": 2.102913775343231, "learning_rate": 4.5505595424990446e-07, "loss": 0.6814, "step": 8343 }, { "epoch": 0.87, "grad_norm": 2.1924316214071427, "learning_rate": 4.5435452914753377e-07, "loss": 0.5918, "step": 8344 }, { "epoch": 0.87, "grad_norm": 2.16432717318907, "learning_rate": 4.5365361931998696e-07, "loss": 0.6633, "step": 8345 }, { "epoch": 0.87, "grad_norm": 2.0535418697441377, "learning_rate": 4.5295322484671667e-07, "loss": 0.5843, "step": 8346 }, { "epoch": 0.87, "grad_norm": 1.9299352635441742, "learning_rate": 4.522533458071149e-07, "loss": 0.6031, "step": 8347 }, { "epoch": 0.87, "grad_norm": 1.8036773453246178, "learning_rate": 4.5155398228051707e-07, "loss": 0.5917, "step": 8348 }, { "epoch": 0.87, "grad_norm": 1.9397877871030635, "learning_rate": 4.508551343462014e-07, "loss": 0.6233, "step": 8349 }, { "epoch": 0.87, "grad_norm": 2.252675458486445, "learning_rate": 4.501568020833846e-07, "loss": 0.5963, "step": 8350 }, { "epoch": 0.87, "grad_norm": 2.04584672503017, "learning_rate": 4.4945898557122893e-07, "loss": 0.592, "step": 8351 }, { "epoch": 0.87, "grad_norm": 2.1792705337695466, "learning_rate": 4.4876168488883267e-07, "loss": 0.6744, "step": 8352 }, { "epoch": 0.87, "grad_norm": 1.852796108199507, "learning_rate": 4.4806490011524205e-07, "loss": 0.5102, "step": 8353 }, { "epoch": 0.87, "grad_norm": 1.5734956707152514, "learning_rate": 4.473686313294401e-07, "loss": 0.5494, "step": 8354 }, { "epoch": 0.87, "grad_norm": 1.7716365972878156, "learning_rate": 4.466728786103519e-07, "loss": 0.5574, "step": 8355 }, { "epoch": 0.87, "grad_norm": 1.9458084993387372, "learning_rate": 4.4597764203684725e-07, "loss": 0.5328, "step": 8356 }, { "epoch": 0.87, "grad_norm": 2.197405580309809, "learning_rate": 4.4528292168773303e-07, "loss": 0.6607, "step": 8357 }, { "epoch": 0.87, "grad_norm": 2.0426609447432154, "learning_rate": 4.445887176417613e-07, "loss": 0.6092, "step": 8358 }, { "epoch": 0.87, "grad_norm": 1.9637826343530485, "learning_rate": 4.4389502997762236e-07, "loss": 0.5815, "step": 8359 }, { "epoch": 0.87, "grad_norm": 1.9580038792141665, "learning_rate": 4.432018587739517e-07, "loss": 0.6124, "step": 8360 }, { "epoch": 0.87, "grad_norm": 1.8027490658262888, "learning_rate": 4.425092041093237e-07, "loss": 0.5564, "step": 8361 }, { "epoch": 0.87, "grad_norm": 2.890109404627932, "learning_rate": 4.418170660622539e-07, "loss": 0.6641, "step": 8362 }, { "epoch": 0.87, "grad_norm": 1.9496169997143404, "learning_rate": 4.4112544471119954e-07, "loss": 0.6565, "step": 8363 }, { "epoch": 0.87, "grad_norm": 2.1410985482384, "learning_rate": 4.404343401345612e-07, "loss": 0.7114, "step": 8364 }, { "epoch": 0.87, "grad_norm": 1.7444177282452187, "learning_rate": 4.39743752410679e-07, "loss": 0.6074, "step": 8365 }, { "epoch": 0.87, "grad_norm": 1.9722413049276115, "learning_rate": 4.390536816178353e-07, "loss": 0.6795, "step": 8366 }, { "epoch": 0.87, "grad_norm": 1.8931068079905056, "learning_rate": 4.3836412783425265e-07, "loss": 0.6789, "step": 8367 }, { "epoch": 0.87, "grad_norm": 1.956276084763643, "learning_rate": 4.3767509113809836e-07, "loss": 0.6538, "step": 8368 }, { "epoch": 0.87, "grad_norm": 1.7819181415754366, "learning_rate": 4.3698657160747504e-07, "loss": 0.6105, "step": 8369 }, { "epoch": 0.87, "grad_norm": 1.852985794251653, "learning_rate": 4.36298569320433e-07, "loss": 0.4922, "step": 8370 }, { "epoch": 0.87, "grad_norm": 2.074863975049127, "learning_rate": 4.3561108435495936e-07, "loss": 0.5791, "step": 8371 }, { "epoch": 0.87, "grad_norm": 1.8898011683002591, "learning_rate": 4.349241167889867e-07, "loss": 0.6205, "step": 8372 }, { "epoch": 0.87, "grad_norm": 1.859913410648297, "learning_rate": 4.342376667003845e-07, "loss": 0.6641, "step": 8373 }, { "epoch": 0.87, "grad_norm": 1.835582843864355, "learning_rate": 4.335517341669676e-07, "loss": 0.5306, "step": 8374 }, { "epoch": 0.87, "grad_norm": 2.1789774219199276, "learning_rate": 4.3286631926648834e-07, "loss": 0.6437, "step": 8375 }, { "epoch": 0.87, "grad_norm": 2.1690881296258766, "learning_rate": 4.321814220766457e-07, "loss": 0.5847, "step": 8376 }, { "epoch": 0.87, "grad_norm": 1.9311374676014503, "learning_rate": 4.3149704267507254e-07, "loss": 0.5837, "step": 8377 }, { "epoch": 0.87, "grad_norm": 1.8601406230446484, "learning_rate": 4.3081318113935013e-07, "loss": 0.5966, "step": 8378 }, { "epoch": 0.87, "grad_norm": 1.9117613858119065, "learning_rate": 4.3012983754699645e-07, "loss": 0.5844, "step": 8379 }, { "epoch": 0.87, "grad_norm": 2.1005104659206735, "learning_rate": 4.29447011975474e-07, "loss": 0.5632, "step": 8380 }, { "epoch": 0.87, "grad_norm": 1.8160285873217523, "learning_rate": 4.2876470450218254e-07, "loss": 0.5465, "step": 8381 }, { "epoch": 0.87, "grad_norm": 1.8764970657380244, "learning_rate": 4.2808291520446856e-07, "loss": 0.6718, "step": 8382 }, { "epoch": 0.87, "grad_norm": 1.9302392899798297, "learning_rate": 4.274016441596146e-07, "loss": 0.6263, "step": 8383 }, { "epoch": 0.87, "grad_norm": 2.380086794220247, "learning_rate": 4.267208914448467e-07, "loss": 0.6717, "step": 8384 }, { "epoch": 0.87, "grad_norm": 1.8467666239151335, "learning_rate": 4.2604065713733376e-07, "loss": 0.4995, "step": 8385 }, { "epoch": 0.87, "grad_norm": 2.1309058059197357, "learning_rate": 4.253609413141824e-07, "loss": 0.6071, "step": 8386 }, { "epoch": 0.87, "grad_norm": 2.056063519146599, "learning_rate": 4.2468174405244255e-07, "loss": 0.6373, "step": 8387 }, { "epoch": 0.87, "grad_norm": 1.997005731740509, "learning_rate": 4.240030654291061e-07, "loss": 0.6215, "step": 8388 }, { "epoch": 0.87, "grad_norm": 1.8805146093287572, "learning_rate": 4.2332490552110363e-07, "loss": 0.6959, "step": 8389 }, { "epoch": 0.87, "grad_norm": 1.9311754319444367, "learning_rate": 4.2264726440531036e-07, "loss": 0.6109, "step": 8390 }, { "epoch": 0.87, "grad_norm": 1.9639047760658772, "learning_rate": 4.2197014215853926e-07, "loss": 0.6419, "step": 8391 }, { "epoch": 0.87, "grad_norm": 1.9795175845484239, "learning_rate": 4.2129353885754564e-07, "loss": 0.6291, "step": 8392 }, { "epoch": 0.87, "grad_norm": 2.249186746122543, "learning_rate": 4.206174545790281e-07, "loss": 0.5673, "step": 8393 }, { "epoch": 0.87, "grad_norm": 2.253664003461183, "learning_rate": 4.199418893996232e-07, "loss": 0.6916, "step": 8394 }, { "epoch": 0.87, "grad_norm": 2.1217256531704485, "learning_rate": 4.192668433959113e-07, "loss": 0.6001, "step": 8395 }, { "epoch": 0.87, "grad_norm": 1.9901519915823938, "learning_rate": 4.1859231664441115e-07, "loss": 0.5294, "step": 8396 }, { "epoch": 0.87, "grad_norm": 1.9997277010717542, "learning_rate": 4.179183092215855e-07, "loss": 0.7411, "step": 8397 }, { "epoch": 0.87, "grad_norm": 2.165446184188909, "learning_rate": 4.172448212038371e-07, "loss": 0.6355, "step": 8398 }, { "epoch": 0.87, "grad_norm": 2.11255812027217, "learning_rate": 4.165718526675083e-07, "loss": 0.5731, "step": 8399 }, { "epoch": 0.87, "grad_norm": 1.919796366514845, "learning_rate": 4.158994036888847e-07, "loss": 0.5309, "step": 8400 }, { "epoch": 0.87, "grad_norm": 2.390639655248388, "learning_rate": 4.152274743441925e-07, "loss": 0.5966, "step": 8401 }, { "epoch": 0.87, "grad_norm": 1.738681794830981, "learning_rate": 4.1455606470959755e-07, "loss": 0.4943, "step": 8402 }, { "epoch": 0.87, "grad_norm": 2.2208990337370897, "learning_rate": 4.138851748612099e-07, "loss": 0.6778, "step": 8403 }, { "epoch": 0.87, "grad_norm": 2.0630590778482705, "learning_rate": 4.132148048750767e-07, "loss": 0.5795, "step": 8404 }, { "epoch": 0.87, "grad_norm": 1.9018986833626161, "learning_rate": 4.125449548271909e-07, "loss": 0.5599, "step": 8405 }, { "epoch": 0.87, "grad_norm": 2.0210569711520066, "learning_rate": 4.118756247934802e-07, "loss": 0.6431, "step": 8406 }, { "epoch": 0.87, "grad_norm": 2.0602854396292396, "learning_rate": 4.112068148498199e-07, "loss": 0.7268, "step": 8407 }, { "epoch": 0.87, "grad_norm": 1.9460298623096413, "learning_rate": 4.1053852507202117e-07, "loss": 0.6629, "step": 8408 }, { "epoch": 0.87, "grad_norm": 1.7595563936601968, "learning_rate": 4.098707555358411e-07, "loss": 0.5799, "step": 8409 }, { "epoch": 0.87, "grad_norm": 2.0271997118305483, "learning_rate": 4.092035063169725e-07, "loss": 0.6046, "step": 8410 }, { "epoch": 0.87, "grad_norm": 2.0289865415330928, "learning_rate": 4.0853677749105426e-07, "loss": 0.5695, "step": 8411 }, { "epoch": 0.87, "grad_norm": 1.9559717154468896, "learning_rate": 4.078705691336621e-07, "loss": 0.6128, "step": 8412 }, { "epoch": 0.87, "grad_norm": 1.8769271149503086, "learning_rate": 4.072048813203161e-07, "loss": 0.6361, "step": 8413 }, { "epoch": 0.87, "grad_norm": 2.0028628194390103, "learning_rate": 4.065397141264737e-07, "loss": 0.6202, "step": 8414 }, { "epoch": 0.87, "grad_norm": 1.9916985701401626, "learning_rate": 4.0587506762753747e-07, "loss": 0.6754, "step": 8415 }, { "epoch": 0.87, "grad_norm": 1.9127873478665742, "learning_rate": 4.05210941898847e-07, "loss": 0.5592, "step": 8416 }, { "epoch": 0.87, "grad_norm": 2.0282432551440217, "learning_rate": 4.045473370156866e-07, "loss": 0.7055, "step": 8417 }, { "epoch": 0.88, "grad_norm": 1.8459979565416385, "learning_rate": 4.038842530532777e-07, "loss": 0.5877, "step": 8418 }, { "epoch": 0.88, "grad_norm": 1.8860989137846904, "learning_rate": 4.032216900867869e-07, "loss": 0.5879, "step": 8419 }, { "epoch": 0.88, "grad_norm": 2.303077578313458, "learning_rate": 4.02559648191318e-07, "loss": 0.6251, "step": 8420 }, { "epoch": 0.88, "grad_norm": 2.0308831657626274, "learning_rate": 4.0189812744191647e-07, "loss": 0.6051, "step": 8421 }, { "epoch": 0.88, "grad_norm": 1.784559200096564, "learning_rate": 4.0123712791357185e-07, "loss": 0.5577, "step": 8422 }, { "epoch": 0.88, "grad_norm": 2.0297559541852874, "learning_rate": 4.005766496812097e-07, "loss": 0.6529, "step": 8423 }, { "epoch": 0.88, "grad_norm": 1.9111708728025478, "learning_rate": 3.999166928197007e-07, "loss": 0.5998, "step": 8424 }, { "epoch": 0.88, "grad_norm": 2.132095921466122, "learning_rate": 3.9925725740385503e-07, "loss": 0.6852, "step": 8425 }, { "epoch": 0.88, "grad_norm": 1.8697129123540708, "learning_rate": 3.985983435084212e-07, "loss": 0.5694, "step": 8426 }, { "epoch": 0.88, "grad_norm": 1.9320332116706223, "learning_rate": 3.979399512080928e-07, "loss": 0.6194, "step": 8427 }, { "epoch": 0.88, "grad_norm": 1.7865258361708811, "learning_rate": 3.972820805775024e-07, "loss": 0.5196, "step": 8428 }, { "epoch": 0.88, "grad_norm": 1.9635188121091862, "learning_rate": 3.966247316912214e-07, "loss": 0.6628, "step": 8429 }, { "epoch": 0.88, "grad_norm": 2.052755454266565, "learning_rate": 3.959679046237663e-07, "loss": 0.5529, "step": 8430 }, { "epoch": 0.88, "grad_norm": 2.2660073623651082, "learning_rate": 3.9531159944959094e-07, "loss": 0.5532, "step": 8431 }, { "epoch": 0.88, "grad_norm": 2.1619277389271128, "learning_rate": 3.9465581624309245e-07, "loss": 0.6274, "step": 8432 }, { "epoch": 0.88, "grad_norm": 1.9488850141266936, "learning_rate": 3.9400055507860525e-07, "loss": 0.5921, "step": 8433 }, { "epoch": 0.88, "grad_norm": 1.9310958922073735, "learning_rate": 3.933458160304099e-07, "loss": 0.5115, "step": 8434 }, { "epoch": 0.88, "grad_norm": 1.7377699202843322, "learning_rate": 3.9269159917272313e-07, "loss": 0.5034, "step": 8435 }, { "epoch": 0.88, "grad_norm": 2.020931667657547, "learning_rate": 3.920379045797046e-07, "loss": 0.6726, "step": 8436 }, { "epoch": 0.88, "grad_norm": 2.045919126231745, "learning_rate": 3.9138473232545326e-07, "loss": 0.6392, "step": 8437 }, { "epoch": 0.88, "grad_norm": 1.9556100117349695, "learning_rate": 3.907320824840111e-07, "loss": 0.6564, "step": 8438 }, { "epoch": 0.88, "grad_norm": 1.830463491913251, "learning_rate": 3.900799551293588e-07, "loss": 0.5916, "step": 8439 }, { "epoch": 0.88, "grad_norm": 2.117119952335453, "learning_rate": 3.8942835033542013e-07, "loss": 0.6425, "step": 8440 }, { "epoch": 0.88, "grad_norm": 2.351242053992765, "learning_rate": 3.8877726817605655e-07, "loss": 0.6671, "step": 8441 }, { "epoch": 0.88, "grad_norm": 1.9606052679245647, "learning_rate": 3.8812670872507454e-07, "loss": 0.572, "step": 8442 }, { "epoch": 0.88, "grad_norm": 2.001344741894956, "learning_rate": 3.874766720562151e-07, "loss": 0.6716, "step": 8443 }, { "epoch": 0.88, "grad_norm": 1.8404479443713242, "learning_rate": 3.8682715824316594e-07, "loss": 0.588, "step": 8444 }, { "epoch": 0.88, "grad_norm": 1.9912689065492977, "learning_rate": 3.861781673595516e-07, "loss": 0.5291, "step": 8445 }, { "epoch": 0.88, "grad_norm": 1.8709075400500266, "learning_rate": 3.855296994789415e-07, "loss": 0.6012, "step": 8446 }, { "epoch": 0.88, "grad_norm": 2.1625558589548106, "learning_rate": 3.8488175467484015e-07, "loss": 0.5877, "step": 8447 }, { "epoch": 0.88, "grad_norm": 2.0313618701656835, "learning_rate": 3.8423433302069824e-07, "loss": 0.6104, "step": 8448 }, { "epoch": 0.88, "grad_norm": 1.76595693209394, "learning_rate": 3.8358743458990267e-07, "loss": 0.5309, "step": 8449 }, { "epoch": 0.88, "grad_norm": 1.9556132619540931, "learning_rate": 3.829410594557859e-07, "loss": 0.5177, "step": 8450 }, { "epoch": 0.88, "grad_norm": 1.9424701856527413, "learning_rate": 3.8229520769161474e-07, "loss": 0.5511, "step": 8451 }, { "epoch": 0.88, "grad_norm": 2.0982651348249606, "learning_rate": 3.816498793706025e-07, "loss": 0.6457, "step": 8452 }, { "epoch": 0.88, "grad_norm": 2.1999610052684497, "learning_rate": 3.810050745658994e-07, "loss": 0.7338, "step": 8453 }, { "epoch": 0.88, "grad_norm": 1.9720801346355596, "learning_rate": 3.803607933505993e-07, "loss": 0.5504, "step": 8454 }, { "epoch": 0.88, "grad_norm": 1.9568723610962957, "learning_rate": 3.797170357977337e-07, "loss": 0.633, "step": 8455 }, { "epoch": 0.88, "grad_norm": 1.9100529824005883, "learning_rate": 3.790738019802775e-07, "loss": 0.6134, "step": 8456 }, { "epoch": 0.88, "grad_norm": 1.997788576607131, "learning_rate": 3.7843109197114426e-07, "loss": 0.7004, "step": 8457 }, { "epoch": 0.88, "grad_norm": 2.0031467913611256, "learning_rate": 3.7778890584318773e-07, "loss": 0.6788, "step": 8458 }, { "epoch": 0.88, "grad_norm": 1.9267261626502268, "learning_rate": 3.771472436692053e-07, "loss": 0.6607, "step": 8459 }, { "epoch": 0.88, "grad_norm": 2.0280693159384757, "learning_rate": 3.7650610552193157e-07, "loss": 0.6408, "step": 8460 }, { "epoch": 0.88, "grad_norm": 2.047634744310128, "learning_rate": 3.758654914740445e-07, "loss": 0.5969, "step": 8461 }, { "epoch": 0.88, "grad_norm": 2.1014846487101164, "learning_rate": 3.752254015981599e-07, "loss": 0.6298, "step": 8462 }, { "epoch": 0.88, "grad_norm": 1.9895447225484795, "learning_rate": 3.745858359668375e-07, "loss": 0.6054, "step": 8463 }, { "epoch": 0.88, "grad_norm": 1.90717225882143, "learning_rate": 3.7394679465257435e-07, "loss": 0.5842, "step": 8464 }, { "epoch": 0.88, "grad_norm": 2.051788494418991, "learning_rate": 3.7330827772780967e-07, "loss": 0.5711, "step": 8465 }, { "epoch": 0.88, "grad_norm": 1.9343377474924703, "learning_rate": 3.726702852649222e-07, "loss": 0.5839, "step": 8466 }, { "epoch": 0.88, "grad_norm": 2.3496780116095097, "learning_rate": 3.720328173362331e-07, "loss": 0.6441, "step": 8467 }, { "epoch": 0.88, "grad_norm": 2.050871079546467, "learning_rate": 3.713958740140022e-07, "loss": 0.6083, "step": 8468 }, { "epoch": 0.88, "grad_norm": 1.9364655434741551, "learning_rate": 3.7075945537043244e-07, "loss": 0.5007, "step": 8469 }, { "epoch": 0.88, "grad_norm": 2.1012266768594476, "learning_rate": 3.701235614776627e-07, "loss": 0.7012, "step": 8470 }, { "epoch": 0.88, "grad_norm": 2.0691007203153036, "learning_rate": 3.6948819240777756e-07, "loss": 0.5571, "step": 8471 }, { "epoch": 0.88, "grad_norm": 1.7160709095761875, "learning_rate": 3.688533482327994e-07, "loss": 0.5777, "step": 8472 }, { "epoch": 0.88, "grad_norm": 1.8206494989587712, "learning_rate": 3.6821902902469066e-07, "loss": 0.525, "step": 8473 }, { "epoch": 0.88, "grad_norm": 1.711893505353368, "learning_rate": 3.675852348553538e-07, "loss": 0.5737, "step": 8474 }, { "epoch": 0.88, "grad_norm": 2.033288025220472, "learning_rate": 3.6695196579663583e-07, "loss": 0.534, "step": 8475 }, { "epoch": 0.88, "grad_norm": 2.0240144326187415, "learning_rate": 3.6631922192031934e-07, "loss": 0.7105, "step": 8476 }, { "epoch": 0.88, "grad_norm": 2.2155007112046166, "learning_rate": 3.656870032981308e-07, "loss": 0.627, "step": 8477 }, { "epoch": 0.88, "grad_norm": 2.021651389737251, "learning_rate": 3.65055310001734e-07, "loss": 0.6434, "step": 8478 }, { "epoch": 0.88, "grad_norm": 2.350132802250893, "learning_rate": 3.6442414210273834e-07, "loss": 0.6545, "step": 8479 }, { "epoch": 0.88, "grad_norm": 2.043985363928126, "learning_rate": 3.637934996726861e-07, "loss": 0.6513, "step": 8480 }, { "epoch": 0.88, "grad_norm": 1.817926490943509, "learning_rate": 3.6316338278306715e-07, "loss": 0.621, "step": 8481 }, { "epoch": 0.88, "grad_norm": 2.1668932428840284, "learning_rate": 3.6253379150530676e-07, "loss": 0.6419, "step": 8482 }, { "epoch": 0.88, "grad_norm": 2.0924027965702496, "learning_rate": 3.619047259107744e-07, "loss": 0.7059, "step": 8483 }, { "epoch": 0.88, "grad_norm": 1.973897319602242, "learning_rate": 3.6127618607077754e-07, "loss": 0.6232, "step": 8484 }, { "epoch": 0.88, "grad_norm": 2.1019528532461513, "learning_rate": 3.606481720565652e-07, "loss": 0.6515, "step": 8485 }, { "epoch": 0.88, "grad_norm": 1.8078476936519057, "learning_rate": 3.600206839393261e-07, "loss": 0.5487, "step": 8486 }, { "epoch": 0.88, "grad_norm": 2.6102160702115826, "learning_rate": 3.593937217901894e-07, "loss": 0.6213, "step": 8487 }, { "epoch": 0.88, "grad_norm": 1.7697681408557449, "learning_rate": 3.587672856802238e-07, "loss": 0.6497, "step": 8488 }, { "epoch": 0.88, "grad_norm": 1.9259481594858332, "learning_rate": 3.581413756804414e-07, "loss": 0.6138, "step": 8489 }, { "epoch": 0.88, "grad_norm": 2.078149202383723, "learning_rate": 3.5751599186179155e-07, "loss": 0.604, "step": 8490 }, { "epoch": 0.88, "grad_norm": 2.014467331458678, "learning_rate": 3.568911342951653e-07, "loss": 0.7161, "step": 8491 }, { "epoch": 0.88, "grad_norm": 1.9429447928577988, "learning_rate": 3.5626680305139383e-07, "loss": 0.5646, "step": 8492 }, { "epoch": 0.88, "grad_norm": 1.8978526166540983, "learning_rate": 3.5564299820124883e-07, "loss": 0.6737, "step": 8493 }, { "epoch": 0.88, "grad_norm": 1.78340609310645, "learning_rate": 3.5501971981544206e-07, "loss": 0.6035, "step": 8494 }, { "epoch": 0.88, "grad_norm": 2.1841185692701757, "learning_rate": 3.5439696796462474e-07, "loss": 0.687, "step": 8495 }, { "epoch": 0.88, "grad_norm": 1.9069053525425088, "learning_rate": 3.5377474271939103e-07, "loss": 0.5794, "step": 8496 }, { "epoch": 0.88, "grad_norm": 1.9781368239694577, "learning_rate": 3.531530441502723e-07, "loss": 0.6754, "step": 8497 }, { "epoch": 0.88, "grad_norm": 1.787260799174934, "learning_rate": 3.525318723277427e-07, "loss": 0.621, "step": 8498 }, { "epoch": 0.88, "grad_norm": 1.8797198389956506, "learning_rate": 3.519112273222142e-07, "loss": 0.5571, "step": 8499 }, { "epoch": 0.88, "grad_norm": 2.0387277170307674, "learning_rate": 3.512911092040422e-07, "loss": 0.6346, "step": 8500 }, { "epoch": 0.88, "grad_norm": 1.9635088913195, "learning_rate": 3.506715180435194e-07, "loss": 0.6559, "step": 8501 }, { "epoch": 0.88, "grad_norm": 1.8491276195881368, "learning_rate": 3.500524539108807e-07, "loss": 0.5446, "step": 8502 }, { "epoch": 0.88, "grad_norm": 1.9939506343422873, "learning_rate": 3.4943391687629946e-07, "loss": 0.4936, "step": 8503 }, { "epoch": 0.88, "grad_norm": 1.9671648301858564, "learning_rate": 3.4881590700989175e-07, "loss": 0.5706, "step": 8504 }, { "epoch": 0.88, "grad_norm": 2.044394803943145, "learning_rate": 3.48198424381711e-07, "loss": 0.625, "step": 8505 }, { "epoch": 0.88, "grad_norm": 1.9877868545973922, "learning_rate": 3.475814690617541e-07, "loss": 0.5645, "step": 8506 }, { "epoch": 0.88, "grad_norm": 2.052299829237452, "learning_rate": 3.469650411199543e-07, "loss": 0.6363, "step": 8507 }, { "epoch": 0.88, "grad_norm": 2.07731152157618, "learning_rate": 3.4634914062618984e-07, "loss": 0.5551, "step": 8508 }, { "epoch": 0.88, "grad_norm": 1.8411429732595614, "learning_rate": 3.457337676502753e-07, "loss": 0.6741, "step": 8509 }, { "epoch": 0.88, "grad_norm": 1.7952870625461101, "learning_rate": 3.451189222619661e-07, "loss": 0.5644, "step": 8510 }, { "epoch": 0.88, "grad_norm": 2.024486086455206, "learning_rate": 3.445046045309586e-07, "loss": 0.6004, "step": 8511 }, { "epoch": 0.88, "grad_norm": 1.812082428978913, "learning_rate": 3.438908145268904e-07, "loss": 0.5468, "step": 8512 }, { "epoch": 0.88, "grad_norm": 1.9415016178715299, "learning_rate": 3.4327755231933603e-07, "loss": 0.6659, "step": 8513 }, { "epoch": 0.89, "grad_norm": 2.4030823748882164, "learning_rate": 3.426648179778147e-07, "loss": 0.6063, "step": 8514 }, { "epoch": 0.89, "grad_norm": 1.8938865020813795, "learning_rate": 3.420526115717815e-07, "loss": 0.5623, "step": 8515 }, { "epoch": 0.89, "grad_norm": 1.851080556637924, "learning_rate": 3.4144093317063586e-07, "loss": 0.5629, "step": 8516 }, { "epoch": 0.89, "grad_norm": 1.8727554350128697, "learning_rate": 3.4082978284371127e-07, "loss": 0.5925, "step": 8517 }, { "epoch": 0.89, "grad_norm": 1.9815146666422891, "learning_rate": 3.4021916066028837e-07, "loss": 0.5842, "step": 8518 }, { "epoch": 0.89, "grad_norm": 1.7584842117350237, "learning_rate": 3.396090666895829e-07, "loss": 0.7568, "step": 8519 }, { "epoch": 0.89, "grad_norm": 1.9915690900587948, "learning_rate": 3.3899950100075354e-07, "loss": 0.6972, "step": 8520 }, { "epoch": 0.89, "grad_norm": 1.7809472800894863, "learning_rate": 3.383904636628965e-07, "loss": 0.5753, "step": 8521 }, { "epoch": 0.89, "grad_norm": 1.8229838348029193, "learning_rate": 3.377819547450517e-07, "loss": 0.5927, "step": 8522 }, { "epoch": 0.89, "grad_norm": 1.8899181595589158, "learning_rate": 3.3717397431619614e-07, "loss": 0.6775, "step": 8523 }, { "epoch": 0.89, "grad_norm": 2.111746304571928, "learning_rate": 3.365665224452469e-07, "loss": 0.7002, "step": 8524 }, { "epoch": 0.89, "grad_norm": 1.7960693428634793, "learning_rate": 3.359595992010639e-07, "loss": 0.5953, "step": 8525 }, { "epoch": 0.89, "grad_norm": 2.0102795188304614, "learning_rate": 3.3535320465244494e-07, "loss": 0.6059, "step": 8526 }, { "epoch": 0.89, "grad_norm": 1.8455053001836659, "learning_rate": 3.3474733886812606e-07, "loss": 0.6622, "step": 8527 }, { "epoch": 0.89, "grad_norm": 1.9846483394387668, "learning_rate": 3.3414200191678903e-07, "loss": 0.6186, "step": 8528 }, { "epoch": 0.89, "grad_norm": 1.864288965670221, "learning_rate": 3.3353719386704954e-07, "loss": 0.54, "step": 8529 }, { "epoch": 0.89, "grad_norm": 1.8117014185107374, "learning_rate": 3.329329147874677e-07, "loss": 0.5355, "step": 8530 }, { "epoch": 0.89, "grad_norm": 2.050544467198889, "learning_rate": 3.3232916474654154e-07, "loss": 0.6408, "step": 8531 }, { "epoch": 0.89, "grad_norm": 1.9753592793290733, "learning_rate": 3.317259438127085e-07, "loss": 0.6121, "step": 8532 }, { "epoch": 0.89, "grad_norm": 1.9685029438749013, "learning_rate": 3.3112325205434834e-07, "loss": 0.5375, "step": 8533 }, { "epoch": 0.89, "grad_norm": 2.0390676005965145, "learning_rate": 3.305210895397792e-07, "loss": 0.6119, "step": 8534 }, { "epoch": 0.89, "grad_norm": 2.147082033891267, "learning_rate": 3.299194563372604e-07, "loss": 0.6216, "step": 8535 }, { "epoch": 0.89, "grad_norm": 1.9294865710173905, "learning_rate": 3.2931835251498845e-07, "loss": 0.5659, "step": 8536 }, { "epoch": 0.89, "grad_norm": 2.091471816118534, "learning_rate": 3.287177781411044e-07, "loss": 0.7131, "step": 8537 }, { "epoch": 0.89, "grad_norm": 1.9174669992475244, "learning_rate": 3.2811773328368604e-07, "loss": 0.6418, "step": 8538 }, { "epoch": 0.89, "grad_norm": 2.00937920247632, "learning_rate": 3.2751821801075055e-07, "loss": 0.5425, "step": 8539 }, { "epoch": 0.89, "grad_norm": 1.9361390403260308, "learning_rate": 3.26919232390257e-07, "loss": 0.6752, "step": 8540 }, { "epoch": 0.89, "grad_norm": 1.976383613072552, "learning_rate": 3.2632077649010495e-07, "loss": 0.6174, "step": 8541 }, { "epoch": 0.89, "grad_norm": 2.0442738816418555, "learning_rate": 3.2572285037813123e-07, "loss": 0.6375, "step": 8542 }, { "epoch": 0.89, "grad_norm": 1.9529903619318187, "learning_rate": 3.251254541221155e-07, "loss": 0.6939, "step": 8543 }, { "epoch": 0.89, "grad_norm": 1.8756515993312053, "learning_rate": 3.245285877897747e-07, "loss": 0.6838, "step": 8544 }, { "epoch": 0.89, "grad_norm": 2.0880957169144088, "learning_rate": 3.239322514487686e-07, "loss": 0.5843, "step": 8545 }, { "epoch": 0.89, "grad_norm": 1.9669044853905082, "learning_rate": 3.233364451666948e-07, "loss": 0.633, "step": 8546 }, { "epoch": 0.89, "grad_norm": 2.1809013639310924, "learning_rate": 3.227411690110904e-07, "loss": 0.7172, "step": 8547 }, { "epoch": 0.89, "grad_norm": 1.7695203042424918, "learning_rate": 3.2214642304943364e-07, "loss": 0.5523, "step": 8548 }, { "epoch": 0.89, "grad_norm": 2.427593226307166, "learning_rate": 3.215522073491434e-07, "loss": 0.5712, "step": 8549 }, { "epoch": 0.89, "grad_norm": 1.816262167078391, "learning_rate": 3.2095852197757625e-07, "loss": 0.5903, "step": 8550 }, { "epoch": 0.89, "grad_norm": 2.0096160380666808, "learning_rate": 3.203653670020307e-07, "loss": 0.6423, "step": 8551 }, { "epoch": 0.89, "grad_norm": 1.9964846253698296, "learning_rate": 3.1977274248974286e-07, "loss": 0.5813, "step": 8552 }, { "epoch": 0.89, "grad_norm": 2.057566069204673, "learning_rate": 3.1918064850789297e-07, "loss": 0.5512, "step": 8553 }, { "epoch": 0.89, "grad_norm": 1.9850613079984498, "learning_rate": 3.1858908512359456e-07, "loss": 0.5835, "step": 8554 }, { "epoch": 0.89, "grad_norm": 1.847173933051401, "learning_rate": 3.1799805240390723e-07, "loss": 0.6499, "step": 8555 }, { "epoch": 0.89, "grad_norm": 1.8814036275877406, "learning_rate": 3.1740755041582694e-07, "loss": 0.5895, "step": 8556 }, { "epoch": 0.89, "grad_norm": 2.072777684061676, "learning_rate": 3.1681757922629063e-07, "loss": 0.6682, "step": 8557 }, { "epoch": 0.89, "grad_norm": 1.8957016203434267, "learning_rate": 3.1622813890217483e-07, "loss": 0.5977, "step": 8558 }, { "epoch": 0.89, "grad_norm": 1.9201797550472164, "learning_rate": 3.156392295102967e-07, "loss": 0.4838, "step": 8559 }, { "epoch": 0.89, "grad_norm": 1.8415451231635673, "learning_rate": 3.1505085111741165e-07, "loss": 0.6179, "step": 8560 }, { "epoch": 0.89, "grad_norm": 1.7549802487786756, "learning_rate": 3.144630037902152e-07, "loss": 0.5285, "step": 8561 }, { "epoch": 0.89, "grad_norm": 1.982659812561418, "learning_rate": 3.1387568759534523e-07, "loss": 0.6163, "step": 8562 }, { "epoch": 0.89, "grad_norm": 1.875249224583908, "learning_rate": 3.132889025993746e-07, "loss": 0.6487, "step": 8563 }, { "epoch": 0.89, "grad_norm": 2.117308403382591, "learning_rate": 3.127026488688217e-07, "loss": 0.6205, "step": 8564 }, { "epoch": 0.89, "grad_norm": 2.1811566929908777, "learning_rate": 3.121169264701396e-07, "loss": 0.5575, "step": 8565 }, { "epoch": 0.89, "grad_norm": 1.7310740844214478, "learning_rate": 3.1153173546972395e-07, "loss": 0.5113, "step": 8566 }, { "epoch": 0.89, "grad_norm": 2.0363528766549575, "learning_rate": 3.109470759339095e-07, "loss": 0.6399, "step": 8567 }, { "epoch": 0.89, "grad_norm": 1.808682458516396, "learning_rate": 3.1036294792897103e-07, "loss": 0.5567, "step": 8568 }, { "epoch": 0.89, "grad_norm": 2.0304155594704945, "learning_rate": 3.097793515211211e-07, "loss": 0.638, "step": 8569 }, { "epoch": 0.89, "grad_norm": 1.8611948646613983, "learning_rate": 3.0919628677651636e-07, "loss": 0.5643, "step": 8570 }, { "epoch": 0.89, "grad_norm": 1.8861149189282882, "learning_rate": 3.086137537612488e-07, "loss": 0.5412, "step": 8571 }, { "epoch": 0.89, "grad_norm": 1.9998343579666824, "learning_rate": 3.080317525413523e-07, "loss": 0.6044, "step": 8572 }, { "epoch": 0.89, "grad_norm": 1.9620610866969879, "learning_rate": 3.074502831827997e-07, "loss": 0.658, "step": 8573 }, { "epoch": 0.89, "grad_norm": 1.9372850625188789, "learning_rate": 3.0686934575150484e-07, "loss": 0.6131, "step": 8574 }, { "epoch": 0.89, "grad_norm": 2.010086692177107, "learning_rate": 3.06288940313319e-07, "loss": 0.5936, "step": 8575 }, { "epoch": 0.89, "grad_norm": 1.7050273978062502, "learning_rate": 3.057090669340357e-07, "loss": 0.4815, "step": 8576 }, { "epoch": 0.89, "grad_norm": 2.011687551457418, "learning_rate": 3.0512972567938505e-07, "loss": 0.6161, "step": 8577 }, { "epoch": 0.89, "grad_norm": 1.9730483163807309, "learning_rate": 3.045509166150412e-07, "loss": 0.5797, "step": 8578 }, { "epoch": 0.89, "grad_norm": 1.9574359302513729, "learning_rate": 3.0397263980661283e-07, "loss": 0.6348, "step": 8579 }, { "epoch": 0.89, "grad_norm": 1.8261271087547564, "learning_rate": 3.0339489531965307e-07, "loss": 0.5946, "step": 8580 }, { "epoch": 0.89, "grad_norm": 2.01638250418493, "learning_rate": 3.028176832196511e-07, "loss": 0.6325, "step": 8581 }, { "epoch": 0.89, "grad_norm": 2.0561733418751396, "learning_rate": 3.022410035720391e-07, "loss": 0.657, "step": 8582 }, { "epoch": 0.89, "grad_norm": 1.8801117833119756, "learning_rate": 3.0166485644218423e-07, "loss": 0.579, "step": 8583 }, { "epoch": 0.89, "grad_norm": 2.17562854791616, "learning_rate": 3.010892418953981e-07, "loss": 0.627, "step": 8584 }, { "epoch": 0.89, "grad_norm": 1.8988445529001572, "learning_rate": 3.005141599969286e-07, "loss": 0.559, "step": 8585 }, { "epoch": 0.89, "grad_norm": 2.0209929465985708, "learning_rate": 2.999396108119662e-07, "loss": 0.5639, "step": 8586 }, { "epoch": 0.89, "grad_norm": 1.7706616500453063, "learning_rate": 2.9936559440563727e-07, "loss": 0.577, "step": 8587 }, { "epoch": 0.89, "grad_norm": 1.7695880019783534, "learning_rate": 2.9879211084301194e-07, "loss": 0.4723, "step": 8588 }, { "epoch": 0.89, "grad_norm": 1.8372393183117093, "learning_rate": 2.9821916018909603e-07, "loss": 0.5891, "step": 8589 }, { "epoch": 0.89, "grad_norm": 2.323982778463408, "learning_rate": 2.9764674250883917e-07, "loss": 0.6652, "step": 8590 }, { "epoch": 0.89, "grad_norm": 1.850804961341406, "learning_rate": 2.970748578671251e-07, "loss": 0.6241, "step": 8591 }, { "epoch": 0.89, "grad_norm": 1.947215875031301, "learning_rate": 2.9650350632878246e-07, "loss": 0.6403, "step": 8592 }, { "epoch": 0.89, "grad_norm": 2.0757901778312218, "learning_rate": 2.959326879585755e-07, "loss": 0.6949, "step": 8593 }, { "epoch": 0.89, "grad_norm": 2.3184331772097204, "learning_rate": 2.953624028212115e-07, "loss": 0.6928, "step": 8594 }, { "epoch": 0.89, "grad_norm": 2.3251534514180134, "learning_rate": 2.947926509813337e-07, "loss": 0.6444, "step": 8595 }, { "epoch": 0.89, "grad_norm": 1.9132798847854695, "learning_rate": 2.942234325035287e-07, "loss": 0.584, "step": 8596 }, { "epoch": 0.89, "grad_norm": 1.9965630415954567, "learning_rate": 2.9365474745231935e-07, "loss": 0.6394, "step": 8597 }, { "epoch": 0.89, "grad_norm": 2.044198645987738, "learning_rate": 2.9308659589216913e-07, "loss": 0.5541, "step": 8598 }, { "epoch": 0.89, "grad_norm": 1.897414457067917, "learning_rate": 2.925189778874826e-07, "loss": 0.5109, "step": 8599 }, { "epoch": 0.89, "grad_norm": 1.956046702740908, "learning_rate": 2.9195189350260055e-07, "loss": 0.6672, "step": 8600 }, { "epoch": 0.89, "grad_norm": 1.9022595348835627, "learning_rate": 2.9138534280180706e-07, "loss": 0.6785, "step": 8601 }, { "epoch": 0.89, "grad_norm": 2.026765034791194, "learning_rate": 2.908193258493236e-07, "loss": 0.6166, "step": 8602 }, { "epoch": 0.89, "grad_norm": 2.127506565979339, "learning_rate": 2.9025384270931043e-07, "loss": 0.5703, "step": 8603 }, { "epoch": 0.89, "grad_norm": 2.213452785659447, "learning_rate": 2.8968889344586915e-07, "loss": 0.6445, "step": 8604 }, { "epoch": 0.89, "grad_norm": 1.9799657762514415, "learning_rate": 2.8912447812303956e-07, "loss": 0.6423, "step": 8605 }, { "epoch": 0.89, "grad_norm": 2.1495360937337464, "learning_rate": 2.885605968048011e-07, "loss": 0.6764, "step": 8606 }, { "epoch": 0.89, "grad_norm": 1.715241435422298, "learning_rate": 2.8799724955507367e-07, "loss": 0.555, "step": 8607 }, { "epoch": 0.89, "grad_norm": 2.00052872630302, "learning_rate": 2.874344364377152e-07, "loss": 0.643, "step": 8608 }, { "epoch": 0.89, "grad_norm": 1.9284952108877513, "learning_rate": 2.868721575165245e-07, "loss": 0.6311, "step": 8609 }, { "epoch": 0.9, "grad_norm": 1.690339382498523, "learning_rate": 2.86310412855238e-07, "loss": 0.5235, "step": 8610 }, { "epoch": 0.9, "grad_norm": 1.9565147307410093, "learning_rate": 2.85749202517534e-07, "loss": 0.6184, "step": 8611 }, { "epoch": 0.9, "grad_norm": 2.0904675537845843, "learning_rate": 2.8518852656702845e-07, "loss": 0.6863, "step": 8612 }, { "epoch": 0.9, "grad_norm": 2.106472750498347, "learning_rate": 2.8462838506727707e-07, "loss": 0.697, "step": 8613 }, { "epoch": 0.9, "grad_norm": 1.961157989135241, "learning_rate": 2.8406877808177414e-07, "loss": 0.5415, "step": 8614 }, { "epoch": 0.9, "grad_norm": 1.954339439444753, "learning_rate": 2.8350970567395555e-07, "loss": 0.6069, "step": 8615 }, { "epoch": 0.9, "grad_norm": 2.020476422284541, "learning_rate": 2.8295116790719444e-07, "loss": 0.6706, "step": 8616 }, { "epoch": 0.9, "grad_norm": 2.092819502393807, "learning_rate": 2.8239316484480527e-07, "loss": 0.5515, "step": 8617 }, { "epoch": 0.9, "grad_norm": 1.835575899833264, "learning_rate": 2.8183569655003963e-07, "loss": 0.4899, "step": 8618 }, { "epoch": 0.9, "grad_norm": 2.0289917891127454, "learning_rate": 2.812787630860919e-07, "loss": 0.5401, "step": 8619 }, { "epoch": 0.9, "grad_norm": 1.811975427170239, "learning_rate": 2.807223645160906e-07, "loss": 0.5122, "step": 8620 }, { "epoch": 0.9, "grad_norm": 2.200065793760229, "learning_rate": 2.801665009031096e-07, "loss": 0.536, "step": 8621 }, { "epoch": 0.9, "grad_norm": 1.8723693676317084, "learning_rate": 2.796111723101563e-07, "loss": 0.5162, "step": 8622 }, { "epoch": 0.9, "grad_norm": 1.9167058977018545, "learning_rate": 2.7905637880018324e-07, "loss": 0.6457, "step": 8623 }, { "epoch": 0.9, "grad_norm": 1.9368506827247765, "learning_rate": 2.7850212043607715e-07, "loss": 0.6408, "step": 8624 }, { "epoch": 0.9, "grad_norm": 1.8101557694487689, "learning_rate": 2.779483972806685e-07, "loss": 0.6135, "step": 8625 }, { "epoch": 0.9, "grad_norm": 2.1315207329438612, "learning_rate": 2.773952093967225e-07, "loss": 0.5771, "step": 8626 }, { "epoch": 0.9, "grad_norm": 2.0848835082063646, "learning_rate": 2.7684255684694903e-07, "loss": 0.6573, "step": 8627 }, { "epoch": 0.9, "grad_norm": 1.91807737163709, "learning_rate": 2.7629043969399193e-07, "loss": 0.6022, "step": 8628 }, { "epoch": 0.9, "grad_norm": 2.118468230505594, "learning_rate": 2.7573885800043775e-07, "loss": 0.6365, "step": 8629 }, { "epoch": 0.9, "grad_norm": 1.9790619469804935, "learning_rate": 2.7518781182881096e-07, "loss": 0.6301, "step": 8630 }, { "epoch": 0.9, "grad_norm": 2.022098894804944, "learning_rate": 2.7463730124157706e-07, "loss": 0.6485, "step": 8631 }, { "epoch": 0.9, "grad_norm": 2.056757846364307, "learning_rate": 2.7408732630113787e-07, "loss": 0.565, "step": 8632 }, { "epoch": 0.9, "grad_norm": 1.9947893769894807, "learning_rate": 2.73537887069838e-07, "loss": 0.5146, "step": 8633 }, { "epoch": 0.9, "grad_norm": 2.231051088541484, "learning_rate": 2.729889836099581e-07, "loss": 0.5901, "step": 8634 }, { "epoch": 0.9, "grad_norm": 1.9660039530430966, "learning_rate": 2.7244061598371953e-07, "loss": 0.7301, "step": 8635 }, { "epoch": 0.9, "grad_norm": 1.9431122518352824, "learning_rate": 2.7189278425328426e-07, "loss": 0.6849, "step": 8636 }, { "epoch": 0.9, "grad_norm": 1.8396886613553558, "learning_rate": 2.713454884807504e-07, "loss": 0.5825, "step": 8637 }, { "epoch": 0.9, "grad_norm": 1.8078269413999202, "learning_rate": 2.707987287281583e-07, "loss": 0.6539, "step": 8638 }, { "epoch": 0.9, "grad_norm": 1.862585778610424, "learning_rate": 2.70252505057485e-07, "loss": 0.5778, "step": 8639 }, { "epoch": 0.9, "grad_norm": 1.894036420630567, "learning_rate": 2.6970681753065e-07, "loss": 0.5502, "step": 8640 }, { "epoch": 0.9, "grad_norm": 1.8819699326290948, "learning_rate": 2.691616662095092e-07, "loss": 0.6226, "step": 8641 }, { "epoch": 0.9, "grad_norm": 1.9204845961184525, "learning_rate": 2.686170511558578e-07, "loss": 0.637, "step": 8642 }, { "epoch": 0.9, "grad_norm": 1.8825697272527415, "learning_rate": 2.680729724314313e-07, "loss": 0.6391, "step": 8643 }, { "epoch": 0.9, "grad_norm": 1.8943724619603919, "learning_rate": 2.675294300979053e-07, "loss": 0.5944, "step": 8644 }, { "epoch": 0.9, "grad_norm": 2.259795468206444, "learning_rate": 2.6698642421689124e-07, "loss": 0.6657, "step": 8645 }, { "epoch": 0.9, "grad_norm": 1.9778845403528227, "learning_rate": 2.664439548499448e-07, "loss": 0.6692, "step": 8646 }, { "epoch": 0.9, "grad_norm": 2.0445734899171097, "learning_rate": 2.6590202205855506e-07, "loss": 0.6772, "step": 8647 }, { "epoch": 0.9, "grad_norm": 2.3171034519987823, "learning_rate": 2.6536062590415577e-07, "loss": 0.5824, "step": 8648 }, { "epoch": 0.9, "grad_norm": 1.8081871008672612, "learning_rate": 2.648197664481156e-07, "loss": 0.6396, "step": 8649 }, { "epoch": 0.9, "grad_norm": 2.1307670329224937, "learning_rate": 2.6427944375174484e-07, "loss": 0.6678, "step": 8650 }, { "epoch": 0.9, "grad_norm": 2.253854108832021, "learning_rate": 2.637396578762913e-07, "loss": 0.5559, "step": 8651 }, { "epoch": 0.9, "grad_norm": 2.094905790658024, "learning_rate": 2.6320040888294373e-07, "loss": 0.6969, "step": 8652 }, { "epoch": 0.9, "grad_norm": 1.8667934315030559, "learning_rate": 2.6266169683282827e-07, "loss": 0.5351, "step": 8653 }, { "epoch": 0.9, "grad_norm": 1.9530819540310358, "learning_rate": 2.621235217870116e-07, "loss": 0.6533, "step": 8654 }, { "epoch": 0.9, "grad_norm": 1.9977273869765968, "learning_rate": 2.615858838064983e-07, "loss": 0.6239, "step": 8655 }, { "epoch": 0.9, "grad_norm": 1.9377945345738095, "learning_rate": 2.6104878295223455e-07, "loss": 0.6392, "step": 8656 }, { "epoch": 0.9, "grad_norm": 2.0652175471062133, "learning_rate": 2.605122192851012e-07, "loss": 0.6128, "step": 8657 }, { "epoch": 0.9, "grad_norm": 2.0175796302069418, "learning_rate": 2.5997619286592224e-07, "loss": 0.6744, "step": 8658 }, { "epoch": 0.9, "grad_norm": 1.747368958393763, "learning_rate": 2.594407037554586e-07, "loss": 0.6059, "step": 8659 }, { "epoch": 0.9, "grad_norm": 2.2493432755306895, "learning_rate": 2.5890575201441224e-07, "loss": 0.6867, "step": 8660 }, { "epoch": 0.9, "grad_norm": 2.0016522845047566, "learning_rate": 2.5837133770342135e-07, "loss": 0.6622, "step": 8661 }, { "epoch": 0.9, "grad_norm": 2.2223145338114767, "learning_rate": 2.578374608830664e-07, "loss": 0.6139, "step": 8662 }, { "epoch": 0.9, "grad_norm": 2.004690950521899, "learning_rate": 2.5730412161386386e-07, "loss": 0.6133, "step": 8663 }, { "epoch": 0.9, "grad_norm": 1.9682711738064742, "learning_rate": 2.567713199562727e-07, "loss": 0.6053, "step": 8664 }, { "epoch": 0.9, "grad_norm": 1.9739531150421892, "learning_rate": 2.562390559706879e-07, "loss": 0.6154, "step": 8665 }, { "epoch": 0.9, "grad_norm": 2.1119152599912105, "learning_rate": 2.5570732971744395e-07, "loss": 0.6286, "step": 8666 }, { "epoch": 0.9, "grad_norm": 1.893301025135361, "learning_rate": 2.55176141256816e-07, "loss": 0.6225, "step": 8667 }, { "epoch": 0.9, "grad_norm": 2.1130422748129, "learning_rate": 2.54645490649017e-07, "loss": 0.6161, "step": 8668 }, { "epoch": 0.9, "grad_norm": 2.1581512462565833, "learning_rate": 2.5411537795419883e-07, "loss": 0.7077, "step": 8669 }, { "epoch": 0.9, "grad_norm": 1.9273252313199596, "learning_rate": 2.5358580323245396e-07, "loss": 0.6358, "step": 8670 }, { "epoch": 0.9, "grad_norm": 1.9499885610352081, "learning_rate": 2.530567665438116e-07, "loss": 0.5547, "step": 8671 }, { "epoch": 0.9, "grad_norm": 2.111913389915149, "learning_rate": 2.52528267948241e-07, "loss": 0.54, "step": 8672 }, { "epoch": 0.9, "grad_norm": 2.0260864755391164, "learning_rate": 2.520003075056521e-07, "loss": 0.5447, "step": 8673 }, { "epoch": 0.9, "grad_norm": 1.8671832739571868, "learning_rate": 2.5147288527588964e-07, "loss": 0.6723, "step": 8674 }, { "epoch": 0.9, "grad_norm": 1.7405827829670664, "learning_rate": 2.5094600131874205e-07, "loss": 0.5051, "step": 8675 }, { "epoch": 0.9, "grad_norm": 2.0584072380487015, "learning_rate": 2.5041965569393366e-07, "loss": 0.59, "step": 8676 }, { "epoch": 0.9, "grad_norm": 2.0569408420447046, "learning_rate": 2.498938484611296e-07, "loss": 0.6142, "step": 8677 }, { "epoch": 0.9, "grad_norm": 1.8546671023261774, "learning_rate": 2.4936857967993166e-07, "loss": 0.6434, "step": 8678 }, { "epoch": 0.9, "grad_norm": 2.012713259045472, "learning_rate": 2.4884384940988436e-07, "loss": 0.6429, "step": 8679 }, { "epoch": 0.9, "grad_norm": 2.1338257948479336, "learning_rate": 2.4831965771046574e-07, "loss": 0.6217, "step": 8680 }, { "epoch": 0.9, "grad_norm": 1.978217762866347, "learning_rate": 2.4779600464109874e-07, "loss": 0.6405, "step": 8681 }, { "epoch": 0.9, "grad_norm": 2.3616307774259555, "learning_rate": 2.4727289026114043e-07, "loss": 0.6803, "step": 8682 }, { "epoch": 0.9, "grad_norm": 2.1877070740639097, "learning_rate": 2.4675031462988995e-07, "loss": 0.6009, "step": 8683 }, { "epoch": 0.9, "grad_norm": 1.9569526648473758, "learning_rate": 2.462282778065839e-07, "loss": 0.6932, "step": 8684 }, { "epoch": 0.9, "grad_norm": 1.8683437289001, "learning_rate": 2.4570677985039817e-07, "loss": 0.5918, "step": 8685 }, { "epoch": 0.9, "grad_norm": 1.89796611020582, "learning_rate": 2.451858208204477e-07, "loss": 0.61, "step": 8686 }, { "epoch": 0.9, "grad_norm": 1.9025712390079088, "learning_rate": 2.446654007757865e-07, "loss": 0.5998, "step": 8687 }, { "epoch": 0.9, "grad_norm": 1.9360903945505326, "learning_rate": 2.441455197754056e-07, "loss": 0.5562, "step": 8688 }, { "epoch": 0.9, "grad_norm": 1.8305256442447795, "learning_rate": 2.436261778782378e-07, "loss": 0.6337, "step": 8689 }, { "epoch": 0.9, "grad_norm": 1.9413740891789482, "learning_rate": 2.431073751431529e-07, "loss": 0.6631, "step": 8690 }, { "epoch": 0.9, "grad_norm": 1.8320514002798973, "learning_rate": 2.4258911162896083e-07, "loss": 0.6427, "step": 8691 }, { "epoch": 0.9, "grad_norm": 1.6997980036645046, "learning_rate": 2.4207138739440914e-07, "loss": 0.5699, "step": 8692 }, { "epoch": 0.9, "grad_norm": 1.768129171675751, "learning_rate": 2.4155420249818596e-07, "loss": 0.5332, "step": 8693 }, { "epoch": 0.9, "grad_norm": 1.866393250711023, "learning_rate": 2.4103755699891487e-07, "loss": 0.5497, "step": 8694 }, { "epoch": 0.9, "grad_norm": 2.1352325058003974, "learning_rate": 2.405214509551623e-07, "loss": 0.5664, "step": 8695 }, { "epoch": 0.9, "grad_norm": 1.9594544194885912, "learning_rate": 2.40005884425431e-07, "loss": 0.5781, "step": 8696 }, { "epoch": 0.9, "grad_norm": 1.7619113591533766, "learning_rate": 2.3949085746816424e-07, "loss": 0.5066, "step": 8697 }, { "epoch": 0.9, "grad_norm": 2.1093914969198413, "learning_rate": 2.389763701417419e-07, "loss": 0.7102, "step": 8698 }, { "epoch": 0.9, "grad_norm": 2.10399786419913, "learning_rate": 2.3846242250448624e-07, "loss": 0.7246, "step": 8699 }, { "epoch": 0.9, "grad_norm": 2.0803726938672593, "learning_rate": 2.3794901461465402e-07, "loss": 0.7128, "step": 8700 }, { "epoch": 0.9, "grad_norm": 1.8698045186841377, "learning_rate": 2.3743614653044423e-07, "loss": 0.5957, "step": 8701 }, { "epoch": 0.9, "grad_norm": 1.6970683847366228, "learning_rate": 2.3692381830999255e-07, "loss": 0.5625, "step": 8702 }, { "epoch": 0.9, "grad_norm": 2.096324173910222, "learning_rate": 2.364120300113748e-07, "loss": 0.667, "step": 8703 }, { "epoch": 0.9, "grad_norm": 1.975751309515821, "learning_rate": 2.3590078169260512e-07, "loss": 0.5752, "step": 8704 }, { "epoch": 0.9, "grad_norm": 1.934365429629889, "learning_rate": 2.3539007341163656e-07, "loss": 0.6003, "step": 8705 }, { "epoch": 0.9, "grad_norm": 2.049087859270842, "learning_rate": 2.3487990522636005e-07, "loss": 0.6966, "step": 8706 }, { "epoch": 0.91, "grad_norm": 2.1228999756456597, "learning_rate": 2.3437027719460659e-07, "loss": 0.5632, "step": 8707 }, { "epoch": 0.91, "grad_norm": 1.988718489834557, "learning_rate": 2.3386118937414602e-07, "loss": 0.6215, "step": 8708 }, { "epoch": 0.91, "grad_norm": 1.9566653933319493, "learning_rate": 2.3335264182268502e-07, "loss": 0.4998, "step": 8709 }, { "epoch": 0.91, "grad_norm": 1.8116059731020802, "learning_rate": 2.328446345978713e-07, "loss": 0.505, "step": 8710 }, { "epoch": 0.91, "grad_norm": 1.9242008346185047, "learning_rate": 2.3233716775728943e-07, "loss": 0.5418, "step": 8711 }, { "epoch": 0.91, "grad_norm": 1.8515564888972094, "learning_rate": 2.3183024135846554e-07, "loss": 0.6389, "step": 8712 }, { "epoch": 0.91, "grad_norm": 1.8985578066102073, "learning_rate": 2.3132385545886037e-07, "loss": 0.5512, "step": 8713 }, { "epoch": 0.91, "grad_norm": 1.9046098103072053, "learning_rate": 2.3081801011587745e-07, "loss": 0.5628, "step": 8714 }, { "epoch": 0.91, "grad_norm": 2.1917228952182097, "learning_rate": 2.3031270538685701e-07, "loss": 0.664, "step": 8715 }, { "epoch": 0.91, "grad_norm": 2.5641131401356265, "learning_rate": 2.2980794132907713e-07, "loss": 0.7537, "step": 8716 }, { "epoch": 0.91, "grad_norm": 1.8083432847429621, "learning_rate": 2.2930371799975593e-07, "loss": 0.5948, "step": 8717 }, { "epoch": 0.91, "grad_norm": 2.157666039891891, "learning_rate": 2.2880003545605101e-07, "loss": 0.6228, "step": 8718 }, { "epoch": 0.91, "grad_norm": 1.9598888616221026, "learning_rate": 2.2829689375505616e-07, "loss": 0.6904, "step": 8719 }, { "epoch": 0.91, "grad_norm": 2.0529866203222418, "learning_rate": 2.2779429295380683e-07, "loss": 0.7202, "step": 8720 }, { "epoch": 0.91, "grad_norm": 2.025400351121733, "learning_rate": 2.2729223310927473e-07, "loss": 0.6518, "step": 8721 }, { "epoch": 0.91, "grad_norm": 1.643070030489791, "learning_rate": 2.267907142783715e-07, "loss": 0.5422, "step": 8722 }, { "epoch": 0.91, "grad_norm": 2.168941535240748, "learning_rate": 2.2628973651794783e-07, "loss": 0.6902, "step": 8723 }, { "epoch": 0.91, "grad_norm": 2.025938493633533, "learning_rate": 2.257892998847916e-07, "loss": 0.6059, "step": 8724 }, { "epoch": 0.91, "grad_norm": 1.9086079703312495, "learning_rate": 2.2528940443562964e-07, "loss": 0.5982, "step": 8725 }, { "epoch": 0.91, "grad_norm": 1.8506109120515981, "learning_rate": 2.247900502271294e-07, "loss": 0.5936, "step": 8726 }, { "epoch": 0.91, "grad_norm": 2.129293598828574, "learning_rate": 2.2429123731589397e-07, "loss": 0.4808, "step": 8727 }, { "epoch": 0.91, "grad_norm": 2.199598134027323, "learning_rate": 2.2379296575846809e-07, "loss": 0.6588, "step": 8728 }, { "epoch": 0.91, "grad_norm": 1.9581770705902077, "learning_rate": 2.2329523561133215e-07, "loss": 0.5429, "step": 8729 }, { "epoch": 0.91, "grad_norm": 2.070975877443173, "learning_rate": 2.2279804693090934e-07, "loss": 0.6249, "step": 8730 }, { "epoch": 0.91, "grad_norm": 1.8912896810784905, "learning_rate": 2.223013997735557e-07, "loss": 0.6175, "step": 8731 }, { "epoch": 0.91, "grad_norm": 2.104502168982993, "learning_rate": 2.2180529419557062e-07, "loss": 0.6044, "step": 8732 }, { "epoch": 0.91, "grad_norm": 1.868832405954279, "learning_rate": 2.213097302531897e-07, "loss": 0.5568, "step": 8733 }, { "epoch": 0.91, "grad_norm": 1.8040318964006492, "learning_rate": 2.208147080025891e-07, "loss": 0.5683, "step": 8734 }, { "epoch": 0.91, "grad_norm": 1.9255176394764801, "learning_rate": 2.2032022749988113e-07, "loss": 0.6989, "step": 8735 }, { "epoch": 0.91, "grad_norm": 1.8424660754915125, "learning_rate": 2.1982628880111988e-07, "loss": 0.578, "step": 8736 }, { "epoch": 0.91, "grad_norm": 1.9013342427890227, "learning_rate": 2.1933289196229335e-07, "loss": 0.6747, "step": 8737 }, { "epoch": 0.91, "grad_norm": 1.947218569687289, "learning_rate": 2.1884003703933343e-07, "loss": 0.5696, "step": 8738 }, { "epoch": 0.91, "grad_norm": 2.124368808695498, "learning_rate": 2.1834772408810712e-07, "loss": 0.6159, "step": 8739 }, { "epoch": 0.91, "grad_norm": 2.140229776900523, "learning_rate": 2.1785595316442032e-07, "loss": 0.6732, "step": 8740 }, { "epoch": 0.91, "grad_norm": 1.9381452957689793, "learning_rate": 2.17364724324019e-07, "loss": 0.5212, "step": 8741 }, { "epoch": 0.91, "grad_norm": 2.1126927729884417, "learning_rate": 2.1687403762258585e-07, "loss": 0.6332, "step": 8742 }, { "epoch": 0.91, "grad_norm": 2.0856561623513987, "learning_rate": 2.1638389311574358e-07, "loss": 0.7432, "step": 8743 }, { "epoch": 0.91, "grad_norm": 2.05971362182267, "learning_rate": 2.1589429085905278e-07, "loss": 0.6012, "step": 8744 }, { "epoch": 0.91, "grad_norm": 2.0239460998799848, "learning_rate": 2.1540523090801292e-07, "loss": 0.5419, "step": 8745 }, { "epoch": 0.91, "grad_norm": 1.9997879159734424, "learning_rate": 2.149167133180602e-07, "loss": 0.5633, "step": 8746 }, { "epoch": 0.91, "grad_norm": 2.059015466218665, "learning_rate": 2.1442873814457364e-07, "loss": 0.6194, "step": 8747 }, { "epoch": 0.91, "grad_norm": 2.525897595654169, "learning_rate": 2.1394130544286519e-07, "loss": 0.6233, "step": 8748 }, { "epoch": 0.91, "grad_norm": 1.9910958324419423, "learning_rate": 2.1345441526819e-07, "loss": 0.5732, "step": 8749 }, { "epoch": 0.91, "grad_norm": 1.9316768520667007, "learning_rate": 2.1296806767573897e-07, "loss": 0.667, "step": 8750 }, { "epoch": 0.91, "grad_norm": 2.1742785881902753, "learning_rate": 2.1248226272064299e-07, "loss": 0.6773, "step": 8751 }, { "epoch": 0.91, "grad_norm": 2.129579988218092, "learning_rate": 2.1199700045797077e-07, "loss": 0.5753, "step": 8752 }, { "epoch": 0.91, "grad_norm": 2.188883794073873, "learning_rate": 2.115122809427289e-07, "loss": 0.6846, "step": 8753 }, { "epoch": 0.91, "grad_norm": 1.8777793148808293, "learning_rate": 2.1102810422986286e-07, "loss": 0.5493, "step": 8754 }, { "epoch": 0.91, "grad_norm": 1.9743047341702247, "learning_rate": 2.105444703742582e-07, "loss": 0.608, "step": 8755 }, { "epoch": 0.91, "grad_norm": 1.95744377141077, "learning_rate": 2.1006137943073602e-07, "loss": 0.6769, "step": 8756 }, { "epoch": 0.91, "grad_norm": 1.8324319226472476, "learning_rate": 2.095788314540592e-07, "loss": 0.5666, "step": 8757 }, { "epoch": 0.91, "grad_norm": 1.6567842512967919, "learning_rate": 2.0909682649892516e-07, "loss": 0.5137, "step": 8758 }, { "epoch": 0.91, "grad_norm": 1.7813909272995272, "learning_rate": 2.08615364619974e-07, "loss": 0.6392, "step": 8759 }, { "epoch": 0.91, "grad_norm": 1.9961485822604668, "learning_rate": 2.0813444587178156e-07, "loss": 0.6946, "step": 8760 }, { "epoch": 0.91, "grad_norm": 1.9287070838365437, "learning_rate": 2.0765407030886197e-07, "loss": 0.6178, "step": 8761 }, { "epoch": 0.91, "grad_norm": 2.0168575757502505, "learning_rate": 2.0717423798566838e-07, "loss": 0.5699, "step": 8762 }, { "epoch": 0.91, "grad_norm": 1.940310038251417, "learning_rate": 2.0669494895659391e-07, "loss": 0.5678, "step": 8763 }, { "epoch": 0.91, "grad_norm": 1.8342339019571705, "learning_rate": 2.0621620327596735e-07, "loss": 0.6365, "step": 8764 }, { "epoch": 0.91, "grad_norm": 1.9270699483483476, "learning_rate": 2.057380009980581e-07, "loss": 0.5998, "step": 8765 }, { "epoch": 0.91, "grad_norm": 2.1596691648652593, "learning_rate": 2.0526034217707213e-07, "loss": 0.654, "step": 8766 }, { "epoch": 0.91, "grad_norm": 2.0471564825995765, "learning_rate": 2.0478322686715735e-07, "loss": 0.6248, "step": 8767 }, { "epoch": 0.91, "grad_norm": 2.009135979527017, "learning_rate": 2.0430665512239377e-07, "loss": 0.6103, "step": 8768 }, { "epoch": 0.91, "grad_norm": 2.0477920499162057, "learning_rate": 2.0383062699680601e-07, "loss": 0.7414, "step": 8769 }, { "epoch": 0.91, "grad_norm": 1.8535142344799098, "learning_rate": 2.0335514254435363e-07, "loss": 0.6597, "step": 8770 }, { "epoch": 0.91, "grad_norm": 1.7529105422442415, "learning_rate": 2.028802018189363e-07, "loss": 0.5557, "step": 8771 }, { "epoch": 0.91, "grad_norm": 2.064542466324043, "learning_rate": 2.0240580487438988e-07, "loss": 0.5888, "step": 8772 }, { "epoch": 0.91, "grad_norm": 2.1972196941591506, "learning_rate": 2.0193195176449188e-07, "loss": 0.7178, "step": 8773 }, { "epoch": 0.91, "grad_norm": 1.9128197640557918, "learning_rate": 2.0145864254295434e-07, "loss": 0.6155, "step": 8774 }, { "epoch": 0.91, "grad_norm": 1.9423327842910736, "learning_rate": 2.0098587726343156e-07, "loss": 0.5647, "step": 8775 }, { "epoch": 0.91, "grad_norm": 1.8146626640382046, "learning_rate": 2.0051365597951233e-07, "loss": 0.5116, "step": 8776 }, { "epoch": 0.91, "grad_norm": 2.1038513954027906, "learning_rate": 2.000419787447261e-07, "loss": 0.6498, "step": 8777 }, { "epoch": 0.91, "grad_norm": 1.9053349463179003, "learning_rate": 1.9957084561254114e-07, "loss": 0.6542, "step": 8778 }, { "epoch": 0.91, "grad_norm": 2.0831840015435117, "learning_rate": 1.9910025663636146e-07, "loss": 0.5748, "step": 8779 }, { "epoch": 0.91, "grad_norm": 1.997692900719902, "learning_rate": 1.9863021186953268e-07, "loss": 0.6376, "step": 8780 }, { "epoch": 0.91, "grad_norm": 2.1968215047753756, "learning_rate": 1.9816071136533665e-07, "loss": 0.6415, "step": 8781 }, { "epoch": 0.91, "grad_norm": 2.068923142726295, "learning_rate": 1.9769175517699302e-07, "loss": 0.6246, "step": 8782 }, { "epoch": 0.91, "grad_norm": 2.047076265368446, "learning_rate": 1.9722334335766092e-07, "loss": 0.5755, "step": 8783 }, { "epoch": 0.91, "grad_norm": 1.9159180146333123, "learning_rate": 1.9675547596043787e-07, "loss": 0.5303, "step": 8784 }, { "epoch": 0.91, "grad_norm": 2.054746023991903, "learning_rate": 1.962881530383587e-07, "loss": 0.6917, "step": 8785 }, { "epoch": 0.91, "grad_norm": 1.8723658262962497, "learning_rate": 1.9582137464439876e-07, "loss": 0.5934, "step": 8786 }, { "epoch": 0.91, "grad_norm": 1.9256110172198415, "learning_rate": 1.95355140831468e-07, "loss": 0.5799, "step": 8787 }, { "epoch": 0.91, "grad_norm": 2.0613496981654134, "learning_rate": 1.94889451652418e-07, "loss": 0.64, "step": 8788 }, { "epoch": 0.91, "grad_norm": 1.8928277560645934, "learning_rate": 1.9442430716003713e-07, "loss": 0.5309, "step": 8789 }, { "epoch": 0.91, "grad_norm": 2.211268371166142, "learning_rate": 1.9395970740705205e-07, "loss": 0.6969, "step": 8790 }, { "epoch": 0.91, "grad_norm": 2.0242471939323763, "learning_rate": 1.9349565244612678e-07, "loss": 0.6129, "step": 8791 }, { "epoch": 0.91, "grad_norm": 2.055751552611855, "learning_rate": 1.9303214232986588e-07, "loss": 0.6545, "step": 8792 }, { "epoch": 0.91, "grad_norm": 1.936122063029454, "learning_rate": 1.9256917711081014e-07, "loss": 0.5843, "step": 8793 }, { "epoch": 0.91, "grad_norm": 1.995976353696018, "learning_rate": 1.921067568414403e-07, "loss": 0.5792, "step": 8794 }, { "epoch": 0.91, "grad_norm": 1.9787829732592972, "learning_rate": 1.9164488157417284e-07, "loss": 0.6806, "step": 8795 }, { "epoch": 0.91, "grad_norm": 1.812060229047015, "learning_rate": 1.9118355136136523e-07, "loss": 0.587, "step": 8796 }, { "epoch": 0.91, "grad_norm": 2.0037208498765238, "learning_rate": 1.9072276625531127e-07, "loss": 0.5948, "step": 8797 }, { "epoch": 0.91, "grad_norm": 1.9635031676753756, "learning_rate": 1.9026252630824415e-07, "loss": 0.5545, "step": 8798 }, { "epoch": 0.91, "grad_norm": 1.910679902637944, "learning_rate": 1.898028315723338e-07, "loss": 0.5776, "step": 8799 }, { "epoch": 0.91, "grad_norm": 1.9739607133635115, "learning_rate": 1.8934368209969023e-07, "loss": 0.5948, "step": 8800 }, { "epoch": 0.91, "grad_norm": 1.9046769737170033, "learning_rate": 1.88885077942359e-07, "loss": 0.6231, "step": 8801 }, { "epoch": 0.91, "grad_norm": 1.9551105878545139, "learning_rate": 1.8842701915232743e-07, "loss": 0.52, "step": 8802 }, { "epoch": 0.92, "grad_norm": 1.8166528029787048, "learning_rate": 1.8796950578151785e-07, "loss": 0.4986, "step": 8803 }, { "epoch": 0.92, "grad_norm": 1.794656916003373, "learning_rate": 1.8751253788179325e-07, "loss": 0.6422, "step": 8804 }, { "epoch": 0.92, "grad_norm": 2.173722308812529, "learning_rate": 1.8705611550495218e-07, "loss": 0.6271, "step": 8805 }, { "epoch": 0.92, "grad_norm": 1.730807391557464, "learning_rate": 1.866002387027338e-07, "loss": 0.509, "step": 8806 }, { "epoch": 0.92, "grad_norm": 1.7958190844792186, "learning_rate": 1.8614490752681292e-07, "loss": 0.5214, "step": 8807 }, { "epoch": 0.92, "grad_norm": 1.9894503413864093, "learning_rate": 1.8569012202880599e-07, "loss": 0.5939, "step": 8808 }, { "epoch": 0.92, "grad_norm": 2.1598352658993623, "learning_rate": 1.852358822602629e-07, "loss": 0.5809, "step": 8809 }, { "epoch": 0.92, "grad_norm": 2.0071051604662937, "learning_rate": 1.847821882726769e-07, "loss": 0.605, "step": 8810 }, { "epoch": 0.92, "grad_norm": 2.0832891855842615, "learning_rate": 1.843290401174752e-07, "loss": 0.6761, "step": 8811 }, { "epoch": 0.92, "grad_norm": 2.0663532941669702, "learning_rate": 1.838764378460256e-07, "loss": 0.6676, "step": 8812 }, { "epoch": 0.92, "grad_norm": 2.0405747575233977, "learning_rate": 1.834243815096326e-07, "loss": 0.5634, "step": 8813 }, { "epoch": 0.92, "grad_norm": 2.086360893875116, "learning_rate": 1.829728711595391e-07, "loss": 0.6271, "step": 8814 }, { "epoch": 0.92, "grad_norm": 1.9649083555496234, "learning_rate": 1.825219068469275e-07, "loss": 0.6017, "step": 8815 }, { "epoch": 0.92, "grad_norm": 2.006301857023903, "learning_rate": 1.820714886229158e-07, "loss": 0.6402, "step": 8816 }, { "epoch": 0.92, "grad_norm": 2.0959766376734814, "learning_rate": 1.8162161653856257e-07, "loss": 0.5846, "step": 8817 }, { "epoch": 0.92, "grad_norm": 1.9703517347002897, "learning_rate": 1.8117229064486264e-07, "loss": 0.6658, "step": 8818 }, { "epoch": 0.92, "grad_norm": 1.9458890586613302, "learning_rate": 1.8072351099275077e-07, "loss": 0.5241, "step": 8819 }, { "epoch": 0.92, "grad_norm": 2.96184039789254, "learning_rate": 1.8027527763309682e-07, "loss": 0.5714, "step": 8820 }, { "epoch": 0.92, "grad_norm": 1.8509877835265842, "learning_rate": 1.798275906167124e-07, "loss": 0.621, "step": 8821 }, { "epoch": 0.92, "grad_norm": 1.8661444783986674, "learning_rate": 1.7938044999434412e-07, "loss": 0.6553, "step": 8822 }, { "epoch": 0.92, "grad_norm": 1.8170939072394554, "learning_rate": 1.7893385581667866e-07, "loss": 0.6345, "step": 8823 }, { "epoch": 0.92, "grad_norm": 1.9101906587424282, "learning_rate": 1.7848780813433942e-07, "loss": 0.604, "step": 8824 }, { "epoch": 0.92, "grad_norm": 2.2378093679248, "learning_rate": 1.7804230699788983e-07, "loss": 0.6194, "step": 8825 }, { "epoch": 0.92, "grad_norm": 2.28535490117752, "learning_rate": 1.7759735245782838e-07, "loss": 0.6506, "step": 8826 }, { "epoch": 0.92, "grad_norm": 1.969139478413972, "learning_rate": 1.7715294456459419e-07, "loss": 0.5687, "step": 8827 }, { "epoch": 0.92, "grad_norm": 1.9704670196931522, "learning_rate": 1.7670908336856252e-07, "loss": 0.6122, "step": 8828 }, { "epoch": 0.92, "grad_norm": 2.046800006905025, "learning_rate": 1.762657689200481e-07, "loss": 0.6601, "step": 8829 }, { "epoch": 0.92, "grad_norm": 2.0533236305561267, "learning_rate": 1.7582300126930296e-07, "loss": 0.6144, "step": 8830 }, { "epoch": 0.92, "grad_norm": 1.8968834934718648, "learning_rate": 1.7538078046651808e-07, "loss": 0.6129, "step": 8831 }, { "epoch": 0.92, "grad_norm": 1.8440975962291353, "learning_rate": 1.7493910656182056e-07, "loss": 0.5748, "step": 8832 }, { "epoch": 0.92, "grad_norm": 2.054011675246738, "learning_rate": 1.744979796052776e-07, "loss": 0.6494, "step": 8833 }, { "epoch": 0.92, "grad_norm": 2.350762103488185, "learning_rate": 1.7405739964689362e-07, "loss": 0.6574, "step": 8834 }, { "epoch": 0.92, "grad_norm": 2.194253819345266, "learning_rate": 1.7361736673660978e-07, "loss": 0.582, "step": 8835 }, { "epoch": 0.92, "grad_norm": 1.9923276521726647, "learning_rate": 1.7317788092430676e-07, "loss": 0.5446, "step": 8836 }, { "epoch": 0.92, "grad_norm": 1.9545991453195062, "learning_rate": 1.7273894225980303e-07, "loss": 0.5959, "step": 8837 }, { "epoch": 0.92, "grad_norm": 1.8931944180409803, "learning_rate": 1.7230055079285435e-07, "loss": 0.6128, "step": 8838 }, { "epoch": 0.92, "grad_norm": 2.064916087058621, "learning_rate": 1.7186270657315596e-07, "loss": 0.5658, "step": 8839 }, { "epoch": 0.92, "grad_norm": 2.014461259471736, "learning_rate": 1.7142540965033815e-07, "loss": 0.6944, "step": 8840 }, { "epoch": 0.92, "grad_norm": 2.0937362350654034, "learning_rate": 1.7098866007397296e-07, "loss": 0.6447, "step": 8841 }, { "epoch": 0.92, "grad_norm": 1.6339582774602281, "learning_rate": 1.70552457893568e-07, "loss": 0.5278, "step": 8842 }, { "epoch": 0.92, "grad_norm": 1.9130656985961463, "learning_rate": 1.701168031585676e-07, "loss": 0.594, "step": 8843 }, { "epoch": 0.92, "grad_norm": 1.8312506205126324, "learning_rate": 1.696816959183578e-07, "loss": 0.6467, "step": 8844 }, { "epoch": 0.92, "grad_norm": 2.241570074288019, "learning_rate": 1.6924713622225975e-07, "loss": 0.6756, "step": 8845 }, { "epoch": 0.92, "grad_norm": 2.1124221106992396, "learning_rate": 1.6881312411953288e-07, "loss": 0.6671, "step": 8846 }, { "epoch": 0.92, "grad_norm": 1.9213107738949424, "learning_rate": 1.6837965965937564e-07, "loss": 0.6519, "step": 8847 }, { "epoch": 0.92, "grad_norm": 1.8382074321131783, "learning_rate": 1.6794674289092316e-07, "loss": 0.5749, "step": 8848 }, { "epoch": 0.92, "grad_norm": 1.860478511195977, "learning_rate": 1.6751437386324842e-07, "loss": 0.5574, "step": 8849 }, { "epoch": 0.92, "grad_norm": 2.0995492889812497, "learning_rate": 1.6708255262536443e-07, "loss": 0.5592, "step": 8850 }, { "epoch": 0.92, "grad_norm": 2.0108036150741375, "learning_rate": 1.6665127922621927e-07, "loss": 0.5698, "step": 8851 }, { "epoch": 0.92, "grad_norm": 2.029768719174924, "learning_rate": 1.6622055371470103e-07, "loss": 0.5616, "step": 8852 }, { "epoch": 0.92, "grad_norm": 2.069671016263712, "learning_rate": 1.6579037613963456e-07, "loss": 0.7072, "step": 8853 }, { "epoch": 0.92, "grad_norm": 2.0304812240208134, "learning_rate": 1.6536074654978307e-07, "loss": 0.568, "step": 8854 }, { "epoch": 0.92, "grad_norm": 1.9320514761894227, "learning_rate": 1.6493166499384762e-07, "loss": 0.6442, "step": 8855 }, { "epoch": 0.92, "grad_norm": 2.0939593574383397, "learning_rate": 1.645031315204676e-07, "loss": 0.6313, "step": 8856 }, { "epoch": 0.92, "grad_norm": 2.0131860014511966, "learning_rate": 1.6407514617821752e-07, "loss": 0.7299, "step": 8857 }, { "epoch": 0.92, "grad_norm": 1.9602967302745016, "learning_rate": 1.6364770901561467e-07, "loss": 0.5883, "step": 8858 }, { "epoch": 0.92, "grad_norm": 2.029561620284226, "learning_rate": 1.6322082008110974e-07, "loss": 0.58, "step": 8859 }, { "epoch": 0.92, "grad_norm": 2.016812776940906, "learning_rate": 1.6279447942309345e-07, "loss": 0.687, "step": 8860 }, { "epoch": 0.92, "grad_norm": 2.138409567531603, "learning_rate": 1.6236868708989438e-07, "loss": 0.686, "step": 8861 }, { "epoch": 0.92, "grad_norm": 1.7290146664194534, "learning_rate": 1.619434431297784e-07, "loss": 0.559, "step": 8862 }, { "epoch": 0.92, "grad_norm": 1.9890454463913607, "learning_rate": 1.615187475909491e-07, "loss": 0.6277, "step": 8863 }, { "epoch": 0.92, "grad_norm": 1.9194698017431477, "learning_rate": 1.6109460052154802e-07, "loss": 0.5385, "step": 8864 }, { "epoch": 0.92, "grad_norm": 1.873346670966746, "learning_rate": 1.6067100196965447e-07, "loss": 0.548, "step": 8865 }, { "epoch": 0.92, "grad_norm": 1.7853281470209055, "learning_rate": 1.602479519832867e-07, "loss": 0.5921, "step": 8866 }, { "epoch": 0.92, "grad_norm": 2.007330702321665, "learning_rate": 1.5982545061039855e-07, "loss": 0.6518, "step": 8867 }, { "epoch": 0.92, "grad_norm": 1.9895802955142883, "learning_rate": 1.5940349789888398e-07, "loss": 0.6133, "step": 8868 }, { "epoch": 0.92, "grad_norm": 1.977575299921113, "learning_rate": 1.5898209389657305e-07, "loss": 0.6876, "step": 8869 }, { "epoch": 0.92, "grad_norm": 1.9175713363661888, "learning_rate": 1.585612386512353e-07, "loss": 0.594, "step": 8870 }, { "epoch": 0.92, "grad_norm": 2.0749683134033026, "learning_rate": 1.5814093221057647e-07, "loss": 0.5403, "step": 8871 }, { "epoch": 0.92, "grad_norm": 2.1233122307163885, "learning_rate": 1.5772117462224068e-07, "loss": 0.6821, "step": 8872 }, { "epoch": 0.92, "grad_norm": 2.0961791591392056, "learning_rate": 1.5730196593380877e-07, "loss": 0.644, "step": 8873 }, { "epoch": 0.92, "grad_norm": 1.944886823301487, "learning_rate": 1.5688330619280269e-07, "loss": 0.6329, "step": 8874 }, { "epoch": 0.92, "grad_norm": 1.8193650429809556, "learning_rate": 1.5646519544667783e-07, "loss": 0.5126, "step": 8875 }, { "epoch": 0.92, "grad_norm": 1.9800998991702814, "learning_rate": 1.5604763374283073e-07, "loss": 0.5899, "step": 8876 }, { "epoch": 0.92, "grad_norm": 2.0403033797812267, "learning_rate": 1.556306211285935e-07, "loss": 0.623, "step": 8877 }, { "epoch": 0.92, "grad_norm": 2.035385871609747, "learning_rate": 1.5521415765123783e-07, "loss": 0.544, "step": 8878 }, { "epoch": 0.92, "grad_norm": 1.8780946782463006, "learning_rate": 1.54798243357972e-07, "loss": 0.6026, "step": 8879 }, { "epoch": 0.92, "grad_norm": 2.4224720214407203, "learning_rate": 1.5438287829594113e-07, "loss": 0.6607, "step": 8880 }, { "epoch": 0.92, "grad_norm": 2.0498507932770895, "learning_rate": 1.5396806251223085e-07, "loss": 0.6207, "step": 8881 }, { "epoch": 0.92, "grad_norm": 2.011661827763579, "learning_rate": 1.535537960538619e-07, "loss": 0.6032, "step": 8882 }, { "epoch": 0.92, "grad_norm": 2.1615800677651666, "learning_rate": 1.5314007896779393e-07, "loss": 0.6291, "step": 8883 }, { "epoch": 0.92, "grad_norm": 2.2134352807827535, "learning_rate": 1.5272691130092443e-07, "loss": 0.5684, "step": 8884 }, { "epoch": 0.92, "grad_norm": 2.113655773007037, "learning_rate": 1.5231429310008817e-07, "loss": 0.6024, "step": 8885 }, { "epoch": 0.92, "grad_norm": 1.7751520094120168, "learning_rate": 1.5190222441205715e-07, "loss": 0.5622, "step": 8886 }, { "epoch": 0.92, "grad_norm": 1.9421602467307941, "learning_rate": 1.5149070528354238e-07, "loss": 0.6058, "step": 8887 }, { "epoch": 0.92, "grad_norm": 1.6698253787569741, "learning_rate": 1.510797357611915e-07, "loss": 0.6074, "step": 8888 }, { "epoch": 0.92, "grad_norm": 1.9009707059362406, "learning_rate": 1.5066931589159118e-07, "loss": 0.6354, "step": 8889 }, { "epoch": 0.92, "grad_norm": 1.7195770528720253, "learning_rate": 1.502594457212636e-07, "loss": 0.6722, "step": 8890 }, { "epoch": 0.92, "grad_norm": 1.9207288432623015, "learning_rate": 1.4985012529667052e-07, "loss": 0.6761, "step": 8891 }, { "epoch": 0.92, "grad_norm": 2.139605040626212, "learning_rate": 1.4944135466421095e-07, "loss": 0.6148, "step": 8892 }, { "epoch": 0.92, "grad_norm": 2.0452258517628152, "learning_rate": 1.4903313387022168e-07, "loss": 0.6248, "step": 8893 }, { "epoch": 0.92, "grad_norm": 2.107910493475442, "learning_rate": 1.4862546296097514e-07, "loss": 0.6257, "step": 8894 }, { "epoch": 0.92, "grad_norm": 1.9371679169071094, "learning_rate": 1.4821834198268493e-07, "loss": 0.6685, "step": 8895 }, { "epoch": 0.92, "grad_norm": 2.269015494506768, "learning_rate": 1.4781177098149912e-07, "loss": 0.6222, "step": 8896 }, { "epoch": 0.92, "grad_norm": 2.0322257763626053, "learning_rate": 1.4740575000350643e-07, "loss": 0.6172, "step": 8897 }, { "epoch": 0.92, "grad_norm": 1.9689096944036948, "learning_rate": 1.4700027909473003e-07, "loss": 0.5686, "step": 8898 }, { "epoch": 0.93, "grad_norm": 1.8577584797794915, "learning_rate": 1.4659535830113368e-07, "loss": 0.7123, "step": 8899 }, { "epoch": 0.93, "grad_norm": 2.087469919331919, "learning_rate": 1.461909876686174e-07, "loss": 0.6695, "step": 8900 }, { "epoch": 0.93, "grad_norm": 2.0831701171131347, "learning_rate": 1.4578716724301834e-07, "loss": 0.5916, "step": 8901 }, { "epoch": 0.93, "grad_norm": 1.765591749554606, "learning_rate": 1.4538389707011103e-07, "loss": 0.5457, "step": 8902 }, { "epoch": 0.93, "grad_norm": 2.0101458451306575, "learning_rate": 1.4498117719561056e-07, "loss": 0.655, "step": 8903 }, { "epoch": 0.93, "grad_norm": 2.057138304301439, "learning_rate": 1.445790076651654e-07, "loss": 0.6631, "step": 8904 }, { "epoch": 0.93, "grad_norm": 2.01208335411512, "learning_rate": 1.4417738852436523e-07, "loss": 0.6347, "step": 8905 }, { "epoch": 0.93, "grad_norm": 2.233841840702759, "learning_rate": 1.4377631981873474e-07, "loss": 0.6104, "step": 8906 }, { "epoch": 0.93, "grad_norm": 1.992020434510179, "learning_rate": 1.4337580159373864e-07, "loss": 0.5969, "step": 8907 }, { "epoch": 0.93, "grad_norm": 1.9569295294426226, "learning_rate": 1.4297583389477675e-07, "loss": 0.5636, "step": 8908 }, { "epoch": 0.93, "grad_norm": 2.0519576156254904, "learning_rate": 1.4257641676718891e-07, "loss": 0.6166, "step": 8909 }, { "epoch": 0.93, "grad_norm": 1.9110514363828413, "learning_rate": 1.4217755025624946e-07, "loss": 0.6078, "step": 8910 }, { "epoch": 0.93, "grad_norm": 1.8948115316189942, "learning_rate": 1.4177923440717445e-07, "loss": 0.6172, "step": 8911 }, { "epoch": 0.93, "grad_norm": 1.910510986217153, "learning_rate": 1.413814692651133e-07, "loss": 0.5804, "step": 8912 }, { "epoch": 0.93, "grad_norm": 1.8590318355294009, "learning_rate": 1.4098425487515665e-07, "loss": 0.622, "step": 8913 }, { "epoch": 0.93, "grad_norm": 1.9410471171303303, "learning_rate": 1.4058759128232957e-07, "loss": 0.5916, "step": 8914 }, { "epoch": 0.93, "grad_norm": 1.866173074226064, "learning_rate": 1.4019147853159663e-07, "loss": 0.5734, "step": 8915 }, { "epoch": 0.93, "grad_norm": 1.9339751206748805, "learning_rate": 1.397959166678603e-07, "loss": 0.5967, "step": 8916 }, { "epoch": 0.93, "grad_norm": 2.0814425508882373, "learning_rate": 1.3940090573595854e-07, "loss": 0.6032, "step": 8917 }, { "epoch": 0.93, "grad_norm": 1.7468867551652556, "learning_rate": 1.3900644578066892e-07, "loss": 0.5055, "step": 8918 }, { "epoch": 0.93, "grad_norm": 1.9505230853370727, "learning_rate": 1.386125368467045e-07, "loss": 0.5505, "step": 8919 }, { "epoch": 0.93, "grad_norm": 1.8138209073254565, "learning_rate": 1.3821917897871905e-07, "loss": 0.5381, "step": 8920 }, { "epoch": 0.93, "grad_norm": 1.8811701807030197, "learning_rate": 1.378263722213008e-07, "loss": 0.5717, "step": 8921 }, { "epoch": 0.93, "grad_norm": 1.6894404622210073, "learning_rate": 1.3743411661897688e-07, "loss": 0.5267, "step": 8922 }, { "epoch": 0.93, "grad_norm": 2.045490954150234, "learning_rate": 1.3704241221621062e-07, "loss": 0.6505, "step": 8923 }, { "epoch": 0.93, "grad_norm": 1.8548965079774178, "learning_rate": 1.366512590574054e-07, "loss": 0.5188, "step": 8924 }, { "epoch": 0.93, "grad_norm": 2.1367292127451414, "learning_rate": 1.3626065718689962e-07, "loss": 0.6023, "step": 8925 }, { "epoch": 0.93, "grad_norm": 1.9485421579702578, "learning_rate": 1.3587060664897178e-07, "loss": 0.5993, "step": 8926 }, { "epoch": 0.93, "grad_norm": 1.9473491176314763, "learning_rate": 1.3548110748783426e-07, "loss": 0.6362, "step": 8927 }, { "epoch": 0.93, "grad_norm": 2.150426581736567, "learning_rate": 1.3509215974764067e-07, "loss": 0.5799, "step": 8928 }, { "epoch": 0.93, "grad_norm": 2.145747658974006, "learning_rate": 1.347037634724796e-07, "loss": 0.5796, "step": 8929 }, { "epoch": 0.93, "grad_norm": 1.955864982526995, "learning_rate": 1.3431591870637862e-07, "loss": 0.7073, "step": 8930 }, { "epoch": 0.93, "grad_norm": 2.0360528975248453, "learning_rate": 1.3392862549330087e-07, "loss": 0.6163, "step": 8931 }, { "epoch": 0.93, "grad_norm": 2.066226650062881, "learning_rate": 1.3354188387715017e-07, "loss": 0.6395, "step": 8932 }, { "epoch": 0.93, "grad_norm": 1.7992072253913418, "learning_rate": 1.3315569390176364e-07, "loss": 0.4591, "step": 8933 }, { "epoch": 0.93, "grad_norm": 1.9422148778935013, "learning_rate": 1.3277005561092016e-07, "loss": 0.6084, "step": 8934 }, { "epoch": 0.93, "grad_norm": 2.099312537538956, "learning_rate": 1.3238496904833254e-07, "loss": 0.6624, "step": 8935 }, { "epoch": 0.93, "grad_norm": 2.106560416500555, "learning_rate": 1.3200043425765364e-07, "loss": 0.5163, "step": 8936 }, { "epoch": 0.93, "grad_norm": 2.0539286083044805, "learning_rate": 1.3161645128247247e-07, "loss": 0.6933, "step": 8937 }, { "epoch": 0.93, "grad_norm": 2.0500688798555298, "learning_rate": 1.3123302016631477e-07, "loss": 0.6283, "step": 8938 }, { "epoch": 0.93, "grad_norm": 2.0385543350456485, "learning_rate": 1.3085014095264527e-07, "loss": 0.5445, "step": 8939 }, { "epoch": 0.93, "grad_norm": 2.279005385123845, "learning_rate": 1.3046781368486638e-07, "loss": 0.6009, "step": 8940 }, { "epoch": 0.93, "grad_norm": 4.050341700548169, "learning_rate": 1.3008603840631516e-07, "loss": 0.6678, "step": 8941 }, { "epoch": 0.93, "grad_norm": 1.8506971858384356, "learning_rate": 1.2970481516026922e-07, "loss": 0.4761, "step": 8942 }, { "epoch": 0.93, "grad_norm": 2.009727635049314, "learning_rate": 1.293241439899423e-07, "loss": 0.5926, "step": 8943 }, { "epoch": 0.93, "grad_norm": 2.0818680696814704, "learning_rate": 1.28944024938486e-07, "loss": 0.6389, "step": 8944 }, { "epoch": 0.93, "grad_norm": 1.903971138639191, "learning_rate": 1.2856445804898866e-07, "loss": 0.5683, "step": 8945 }, { "epoch": 0.93, "grad_norm": 2.254377222554142, "learning_rate": 1.2818544336447637e-07, "loss": 0.5999, "step": 8946 }, { "epoch": 0.93, "grad_norm": 2.0107754811059886, "learning_rate": 1.2780698092791145e-07, "loss": 0.5586, "step": 8947 }, { "epoch": 0.93, "grad_norm": 1.811450767600721, "learning_rate": 1.274290707821968e-07, "loss": 0.5123, "step": 8948 }, { "epoch": 0.93, "grad_norm": 2.2487747274358436, "learning_rate": 1.2705171297016873e-07, "loss": 0.6004, "step": 8949 }, { "epoch": 0.93, "grad_norm": 2.0659723146256956, "learning_rate": 1.2667490753460465e-07, "loss": 0.6025, "step": 8950 }, { "epoch": 0.93, "grad_norm": 1.857448937961545, "learning_rate": 1.2629865451821656e-07, "loss": 0.6056, "step": 8951 }, { "epoch": 0.93, "grad_norm": 2.179553087454338, "learning_rate": 1.2592295396365527e-07, "loss": 0.668, "step": 8952 }, { "epoch": 0.93, "grad_norm": 2.002595953566603, "learning_rate": 1.2554780591350845e-07, "loss": 0.5864, "step": 8953 }, { "epoch": 0.93, "grad_norm": 2.0266575419282606, "learning_rate": 1.2517321041030096e-07, "loss": 0.6822, "step": 8954 }, { "epoch": 0.93, "grad_norm": 1.9381132086572017, "learning_rate": 1.2479916749649657e-07, "loss": 0.5354, "step": 8955 }, { "epoch": 0.93, "grad_norm": 1.877289303138945, "learning_rate": 1.2442567721449307e-07, "loss": 0.5831, "step": 8956 }, { "epoch": 0.93, "grad_norm": 2.0137513658224586, "learning_rate": 1.240527396066299e-07, "loss": 0.5456, "step": 8957 }, { "epoch": 0.93, "grad_norm": 1.8438678243217668, "learning_rate": 1.2368035471517993e-07, "loss": 0.578, "step": 8958 }, { "epoch": 0.93, "grad_norm": 2.0226666721004958, "learning_rate": 1.233085225823566e-07, "loss": 0.6474, "step": 8959 }, { "epoch": 0.93, "grad_norm": 1.9125971670943722, "learning_rate": 1.2293724325030786e-07, "loss": 0.6467, "step": 8960 }, { "epoch": 0.93, "grad_norm": 2.0296936030038264, "learning_rate": 1.2256651676112118e-07, "loss": 0.6714, "step": 8961 }, { "epoch": 0.93, "grad_norm": 1.9650618330501934, "learning_rate": 1.2219634315681962e-07, "loss": 0.6185, "step": 8962 }, { "epoch": 0.93, "grad_norm": 1.9626286181880022, "learning_rate": 1.2182672247936566e-07, "loss": 0.574, "step": 8963 }, { "epoch": 0.93, "grad_norm": 1.8374055121902448, "learning_rate": 1.214576547706575e-07, "loss": 0.5862, "step": 8964 }, { "epoch": 0.93, "grad_norm": 2.0329421955743108, "learning_rate": 1.210891400725306e-07, "loss": 0.5647, "step": 8965 }, { "epoch": 0.93, "grad_norm": 1.9931490923121717, "learning_rate": 1.2072117842675867e-07, "loss": 0.5806, "step": 8966 }, { "epoch": 0.93, "grad_norm": 1.829680075590195, "learning_rate": 1.2035376987505288e-07, "loss": 0.6675, "step": 8967 }, { "epoch": 0.93, "grad_norm": 1.847421420466505, "learning_rate": 1.199869144590593e-07, "loss": 0.5583, "step": 8968 }, { "epoch": 0.93, "grad_norm": 1.8374941888305347, "learning_rate": 1.196206122203647e-07, "loss": 0.5854, "step": 8969 }, { "epoch": 0.93, "grad_norm": 2.1554222663190084, "learning_rate": 1.192548632004903e-07, "loss": 0.7002, "step": 8970 }, { "epoch": 0.93, "grad_norm": 1.9239746198381393, "learning_rate": 1.188896674408968e-07, "loss": 0.6828, "step": 8971 }, { "epoch": 0.93, "grad_norm": 1.995032207458123, "learning_rate": 1.1852502498298058e-07, "loss": 0.5992, "step": 8972 }, { "epoch": 0.93, "grad_norm": 1.8669745553621828, "learning_rate": 1.1816093586807687e-07, "loss": 0.5442, "step": 8973 }, { "epoch": 0.93, "grad_norm": 2.1107807844678823, "learning_rate": 1.1779740013745655e-07, "loss": 0.5963, "step": 8974 }, { "epoch": 0.93, "grad_norm": 1.8560952940963382, "learning_rate": 1.174344178323289e-07, "loss": 0.6525, "step": 8975 }, { "epoch": 0.93, "grad_norm": 2.752115937083064, "learning_rate": 1.1707198899383875e-07, "loss": 0.6869, "step": 8976 }, { "epoch": 0.93, "grad_norm": 1.7421395713844312, "learning_rate": 1.1671011366307105e-07, "loss": 0.556, "step": 8977 }, { "epoch": 0.93, "grad_norm": 1.8270357499897079, "learning_rate": 1.163487918810452e-07, "loss": 0.5845, "step": 8978 }, { "epoch": 0.93, "grad_norm": 1.740650769846731, "learning_rate": 1.1598802368872009e-07, "loss": 0.5365, "step": 8979 }, { "epoch": 0.93, "grad_norm": 1.8365042275780272, "learning_rate": 1.1562780912699023e-07, "loss": 0.5251, "step": 8980 }, { "epoch": 0.93, "grad_norm": 1.886097167360647, "learning_rate": 1.1526814823668853e-07, "loss": 0.6388, "step": 8981 }, { "epoch": 0.93, "grad_norm": 1.8793682490212578, "learning_rate": 1.1490904105858403e-07, "loss": 0.6169, "step": 8982 }, { "epoch": 0.93, "grad_norm": 1.7536349174266053, "learning_rate": 1.1455048763338361e-07, "loss": 0.5711, "step": 8983 }, { "epoch": 0.93, "grad_norm": 1.898534836280984, "learning_rate": 1.1419248800173199e-07, "loss": 0.5357, "step": 8984 }, { "epoch": 0.93, "grad_norm": 1.8602519762704828, "learning_rate": 1.1383504220421004e-07, "loss": 0.7386, "step": 8985 }, { "epoch": 0.93, "grad_norm": 2.193413971204252, "learning_rate": 1.1347815028133536e-07, "loss": 0.6072, "step": 8986 }, { "epoch": 0.93, "grad_norm": 1.885655084013139, "learning_rate": 1.1312181227356556e-07, "loss": 0.7008, "step": 8987 }, { "epoch": 0.93, "grad_norm": 2.145617108224491, "learning_rate": 1.1276602822129223e-07, "loss": 0.6087, "step": 8988 }, { "epoch": 0.93, "grad_norm": 2.1748127911102175, "learning_rate": 1.1241079816484591e-07, "loss": 0.6246, "step": 8989 }, { "epoch": 0.93, "grad_norm": 2.0305142220354355, "learning_rate": 1.1205612214449434e-07, "loss": 0.6088, "step": 8990 }, { "epoch": 0.93, "grad_norm": 1.8258785832904052, "learning_rate": 1.1170200020044098e-07, "loss": 0.549, "step": 8991 }, { "epoch": 0.93, "grad_norm": 1.9969958511662391, "learning_rate": 1.1134843237282922e-07, "loss": 0.5544, "step": 8992 }, { "epoch": 0.93, "grad_norm": 1.9296466850652296, "learning_rate": 1.1099541870173591e-07, "loss": 0.6445, "step": 8993 }, { "epoch": 0.93, "grad_norm": 1.8055438751017052, "learning_rate": 1.1064295922717904e-07, "loss": 0.6468, "step": 8994 }, { "epoch": 0.94, "grad_norm": 2.133732513612682, "learning_rate": 1.1029105398911055e-07, "loss": 0.5653, "step": 8995 }, { "epoch": 0.94, "grad_norm": 1.8705602462539526, "learning_rate": 1.0993970302742297e-07, "loss": 0.6245, "step": 8996 }, { "epoch": 0.94, "grad_norm": 1.8390147969929742, "learning_rate": 1.0958890638194108e-07, "loss": 0.6049, "step": 8997 }, { "epoch": 0.94, "grad_norm": 1.9877021610759997, "learning_rate": 1.0923866409243144e-07, "loss": 0.5138, "step": 8998 }, { "epoch": 0.94, "grad_norm": 1.9376071697368407, "learning_rate": 1.0888897619859506e-07, "loss": 0.5826, "step": 8999 }, { "epoch": 0.94, "grad_norm": 1.8774347240206823, "learning_rate": 1.0853984274007246e-07, "loss": 0.64, "step": 9000 }, { "epoch": 0.94, "grad_norm": 2.083899548076166, "learning_rate": 1.0819126375643862e-07, "loss": 0.6244, "step": 9001 }, { "epoch": 0.94, "grad_norm": 2.158124305561164, "learning_rate": 1.0784323928720753e-07, "loss": 0.6079, "step": 9002 }, { "epoch": 0.94, "grad_norm": 1.932121628840871, "learning_rate": 1.0749576937182926e-07, "loss": 0.5824, "step": 9003 }, { "epoch": 0.94, "grad_norm": 2.052197137452352, "learning_rate": 1.0714885404969288e-07, "loss": 0.6556, "step": 9004 }, { "epoch": 0.94, "grad_norm": 1.9618385932007807, "learning_rate": 1.0680249336012139e-07, "loss": 0.5538, "step": 9005 }, { "epoch": 0.94, "grad_norm": 2.1643380059315005, "learning_rate": 1.064566873423778e-07, "loss": 0.5097, "step": 9006 }, { "epoch": 0.94, "grad_norm": 2.0854664855258283, "learning_rate": 1.0611143603566076e-07, "loss": 0.6295, "step": 9007 }, { "epoch": 0.94, "grad_norm": 1.9952119931589922, "learning_rate": 1.0576673947910732e-07, "loss": 0.599, "step": 9008 }, { "epoch": 0.94, "grad_norm": 1.9799496601050393, "learning_rate": 1.0542259771178898e-07, "loss": 0.6321, "step": 9009 }, { "epoch": 0.94, "grad_norm": 1.9816211810449393, "learning_rate": 1.0507901077271843e-07, "loss": 0.5686, "step": 9010 }, { "epoch": 0.94, "grad_norm": 1.8540076930182927, "learning_rate": 1.0473597870084174e-07, "loss": 0.5351, "step": 9011 }, { "epoch": 0.94, "grad_norm": 1.741984167133165, "learning_rate": 1.0439350153504446e-07, "loss": 0.5656, "step": 9012 }, { "epoch": 0.94, "grad_norm": 1.9845451433362102, "learning_rate": 1.0405157931414667e-07, "loss": 0.596, "step": 9013 }, { "epoch": 0.94, "grad_norm": 1.6155972863453967, "learning_rate": 1.0371021207690957e-07, "loss": 0.4353, "step": 9014 }, { "epoch": 0.94, "grad_norm": 1.8418415679031528, "learning_rate": 1.0336939986202666e-07, "loss": 0.6544, "step": 9015 }, { "epoch": 0.94, "grad_norm": 1.886523860804278, "learning_rate": 1.0302914270813314e-07, "loss": 0.6301, "step": 9016 }, { "epoch": 0.94, "grad_norm": 1.8822214646480715, "learning_rate": 1.0268944065379704e-07, "loss": 0.5913, "step": 9017 }, { "epoch": 0.94, "grad_norm": 2.0695162665439426, "learning_rate": 1.0235029373752758e-07, "loss": 0.629, "step": 9018 }, { "epoch": 0.94, "grad_norm": 2.001389219375752, "learning_rate": 1.0201170199776789e-07, "loss": 0.5956, "step": 9019 }, { "epoch": 0.94, "grad_norm": 2.1057510275279165, "learning_rate": 1.0167366547289892e-07, "loss": 0.6267, "step": 9020 }, { "epoch": 0.94, "grad_norm": 1.9837252540821202, "learning_rate": 1.0133618420123947e-07, "loss": 0.6496, "step": 9021 }, { "epoch": 0.94, "grad_norm": 2.04320201586192, "learning_rate": 1.0099925822104562e-07, "loss": 0.6135, "step": 9022 }, { "epoch": 0.94, "grad_norm": 1.9760632612491782, "learning_rate": 1.0066288757050846e-07, "loss": 0.6513, "step": 9023 }, { "epoch": 0.94, "grad_norm": 2.037409421200088, "learning_rate": 1.0032707228775917e-07, "loss": 0.5312, "step": 9024 }, { "epoch": 0.94, "grad_norm": 2.0731308535241197, "learning_rate": 9.999181241086231e-08, "loss": 0.6405, "step": 9025 }, { "epoch": 0.94, "grad_norm": 2.0034900044017414, "learning_rate": 9.965710797782358e-08, "loss": 0.6196, "step": 9026 }, { "epoch": 0.94, "grad_norm": 2.0664636409457473, "learning_rate": 9.932295902658263e-08, "loss": 0.7305, "step": 9027 }, { "epoch": 0.94, "grad_norm": 2.0057196873279506, "learning_rate": 9.898936559501637e-08, "loss": 0.6518, "step": 9028 }, { "epoch": 0.94, "grad_norm": 1.920647903527355, "learning_rate": 9.865632772094124e-08, "loss": 0.568, "step": 9029 }, { "epoch": 0.94, "grad_norm": 2.061404388610512, "learning_rate": 9.832384544210704e-08, "loss": 0.6644, "step": 9030 }, { "epoch": 0.94, "grad_norm": 2.051063631357103, "learning_rate": 9.799191879620474e-08, "loss": 0.6747, "step": 9031 }, { "epoch": 0.94, "grad_norm": 2.11086077130743, "learning_rate": 9.76605478208581e-08, "loss": 0.5537, "step": 9032 }, { "epoch": 0.94, "grad_norm": 1.8661021852558275, "learning_rate": 9.732973255363154e-08, "loss": 0.5615, "step": 9033 }, { "epoch": 0.94, "grad_norm": 1.8129931355838271, "learning_rate": 9.699947303202339e-08, "loss": 0.6432, "step": 9034 }, { "epoch": 0.94, "grad_norm": 1.8858498295677275, "learning_rate": 9.666976929347205e-08, "loss": 0.5187, "step": 9035 }, { "epoch": 0.94, "grad_norm": 2.167973700167487, "learning_rate": 9.634062137534927e-08, "loss": 0.6469, "step": 9036 }, { "epoch": 0.94, "grad_norm": 1.8363965553392205, "learning_rate": 9.601202931496745e-08, "loss": 0.5113, "step": 9037 }, { "epoch": 0.94, "grad_norm": 1.8634840550978502, "learning_rate": 9.5683993149574e-08, "loss": 0.6035, "step": 9038 }, { "epoch": 0.94, "grad_norm": 2.2143595974193717, "learning_rate": 9.535651291635362e-08, "loss": 0.6819, "step": 9039 }, { "epoch": 0.94, "grad_norm": 1.6812826245515171, "learning_rate": 9.502958865242774e-08, "loss": 0.5876, "step": 9040 }, { "epoch": 0.94, "grad_norm": 1.8248003304779605, "learning_rate": 9.470322039485614e-08, "loss": 0.5248, "step": 9041 }, { "epoch": 0.94, "grad_norm": 1.9421864031203093, "learning_rate": 9.437740818063312e-08, "loss": 0.6556, "step": 9042 }, { "epoch": 0.94, "grad_norm": 1.906943046443007, "learning_rate": 9.405215204669193e-08, "loss": 0.5409, "step": 9043 }, { "epoch": 0.94, "grad_norm": 2.0185391093753142, "learning_rate": 9.372745202990253e-08, "loss": 0.6874, "step": 9044 }, { "epoch": 0.94, "grad_norm": 1.9843021074005947, "learning_rate": 9.3403308167071e-08, "loss": 0.5017, "step": 9045 }, { "epoch": 0.94, "grad_norm": 1.9854311881929885, "learning_rate": 9.30797204949413e-08, "loss": 0.6676, "step": 9046 }, { "epoch": 0.94, "grad_norm": 2.0826537011454134, "learning_rate": 9.275668905019409e-08, "loss": 0.6396, "step": 9047 }, { "epoch": 0.94, "grad_norm": 2.039120363907294, "learning_rate": 9.243421386944618e-08, "loss": 0.65, "step": 9048 }, { "epoch": 0.94, "grad_norm": 1.8978766367979325, "learning_rate": 9.211229498925278e-08, "loss": 0.5569, "step": 9049 }, { "epoch": 0.94, "grad_norm": 1.7595071125989983, "learning_rate": 9.179093244610415e-08, "loss": 0.486, "step": 9050 }, { "epoch": 0.94, "grad_norm": 1.890778877362623, "learning_rate": 9.147012627642949e-08, "loss": 0.6964, "step": 9051 }, { "epoch": 0.94, "grad_norm": 2.005268335224658, "learning_rate": 9.114987651659357e-08, "loss": 0.5917, "step": 9052 }, { "epoch": 0.94, "grad_norm": 2.1730014002086895, "learning_rate": 9.083018320289849e-08, "loss": 0.6614, "step": 9053 }, { "epoch": 0.94, "grad_norm": 1.9735903938030195, "learning_rate": 9.051104637158359e-08, "loss": 0.5689, "step": 9054 }, { "epoch": 0.94, "grad_norm": 1.8456661586038106, "learning_rate": 9.019246605882492e-08, "loss": 0.5551, "step": 9055 }, { "epoch": 0.94, "grad_norm": 2.052240286399753, "learning_rate": 8.987444230073528e-08, "loss": 0.6064, "step": 9056 }, { "epoch": 0.94, "grad_norm": 1.8535713753595429, "learning_rate": 8.955697513336415e-08, "loss": 0.5835, "step": 9057 }, { "epoch": 0.94, "grad_norm": 1.8303132592401052, "learning_rate": 8.924006459269886e-08, "loss": 0.5045, "step": 9058 }, { "epoch": 0.94, "grad_norm": 1.901318346829754, "learning_rate": 8.892371071466176e-08, "loss": 0.6645, "step": 9059 }, { "epoch": 0.94, "grad_norm": 1.9257932527237893, "learning_rate": 8.860791353511532e-08, "loss": 0.6335, "step": 9060 }, { "epoch": 0.94, "grad_norm": 1.9043707207031637, "learning_rate": 8.829267308985535e-08, "loss": 0.5067, "step": 9061 }, { "epoch": 0.94, "grad_norm": 1.9917505790714765, "learning_rate": 8.797798941461655e-08, "loss": 0.6557, "step": 9062 }, { "epoch": 0.94, "grad_norm": 1.8937462209908957, "learning_rate": 8.766386254507043e-08, "loss": 0.5871, "step": 9063 }, { "epoch": 0.94, "grad_norm": 1.9406431576153542, "learning_rate": 8.735029251682515e-08, "loss": 0.5824, "step": 9064 }, { "epoch": 0.94, "grad_norm": 1.969695305422505, "learning_rate": 8.703727936542505e-08, "loss": 0.6058, "step": 9065 }, { "epoch": 0.94, "grad_norm": 1.9684624208295343, "learning_rate": 8.672482312635233e-08, "loss": 0.7302, "step": 9066 }, { "epoch": 0.94, "grad_norm": 1.9063600516524655, "learning_rate": 8.641292383502531e-08, "loss": 0.5817, "step": 9067 }, { "epoch": 0.94, "grad_norm": 1.913210044504492, "learning_rate": 8.610158152680071e-08, "loss": 0.6381, "step": 9068 }, { "epoch": 0.94, "grad_norm": 1.9742629875445776, "learning_rate": 8.579079623696917e-08, "loss": 0.7128, "step": 9069 }, { "epoch": 0.94, "grad_norm": 1.8004319724207307, "learning_rate": 8.548056800076199e-08, "loss": 0.6184, "step": 9070 }, { "epoch": 0.94, "grad_norm": 1.8260617741245229, "learning_rate": 8.517089685334323e-08, "loss": 0.665, "step": 9071 }, { "epoch": 0.94, "grad_norm": 2.0768267807130627, "learning_rate": 8.486178282981761e-08, "loss": 0.701, "step": 9072 }, { "epoch": 0.94, "grad_norm": 1.8819509447944622, "learning_rate": 8.455322596522375e-08, "loss": 0.6165, "step": 9073 }, { "epoch": 0.94, "grad_norm": 1.9499123138063066, "learning_rate": 8.424522629453924e-08, "loss": 0.6395, "step": 9074 }, { "epoch": 0.94, "grad_norm": 1.978374390635286, "learning_rate": 8.39377838526767e-08, "loss": 0.597, "step": 9075 }, { "epoch": 0.94, "grad_norm": 1.8940455664553872, "learning_rate": 8.363089867448715e-08, "loss": 0.5436, "step": 9076 }, { "epoch": 0.94, "grad_norm": 1.9531936063259534, "learning_rate": 8.332457079475831e-08, "loss": 0.5746, "step": 9077 }, { "epoch": 0.94, "grad_norm": 1.823839062817681, "learning_rate": 8.301880024821296e-08, "loss": 0.5773, "step": 9078 }, { "epoch": 0.94, "grad_norm": 2.0376195888887345, "learning_rate": 8.271358706951227e-08, "loss": 0.5653, "step": 9079 }, { "epoch": 0.94, "grad_norm": 1.999992907214282, "learning_rate": 8.240893129325412e-08, "loss": 0.6454, "step": 9080 }, { "epoch": 0.94, "grad_norm": 1.8778664959112257, "learning_rate": 8.210483295397309e-08, "loss": 0.6124, "step": 9081 }, { "epoch": 0.94, "grad_norm": 2.173549074135219, "learning_rate": 8.180129208613996e-08, "loss": 0.6335, "step": 9082 }, { "epoch": 0.94, "grad_norm": 1.9313462323770607, "learning_rate": 8.149830872416331e-08, "loss": 0.6208, "step": 9083 }, { "epoch": 0.94, "grad_norm": 2.0185542339124622, "learning_rate": 8.119588290238845e-08, "loss": 0.6189, "step": 9084 }, { "epoch": 0.94, "grad_norm": 1.9778143235997587, "learning_rate": 8.089401465509628e-08, "loss": 0.6107, "step": 9085 }, { "epoch": 0.94, "grad_norm": 1.8124396471473323, "learning_rate": 8.059270401650555e-08, "loss": 0.6052, "step": 9086 }, { "epoch": 0.94, "grad_norm": 1.8136504927323756, "learning_rate": 8.029195102077114e-08, "loss": 0.6718, "step": 9087 }, { "epoch": 0.94, "grad_norm": 1.9373249127008352, "learning_rate": 7.999175570198526e-08, "loss": 0.5822, "step": 9088 }, { "epoch": 0.94, "grad_norm": 2.048051219588817, "learning_rate": 7.969211809417732e-08, "loss": 0.6618, "step": 9089 }, { "epoch": 0.94, "grad_norm": 1.9640947013669483, "learning_rate": 7.939303823131295e-08, "loss": 0.6105, "step": 9090 }, { "epoch": 0.95, "grad_norm": 1.8903042757750237, "learning_rate": 7.909451614729335e-08, "loss": 0.5093, "step": 9091 }, { "epoch": 0.95, "grad_norm": 1.9700603234134746, "learning_rate": 7.87965518759598e-08, "loss": 0.6289, "step": 9092 }, { "epoch": 0.95, "grad_norm": 2.068173773432267, "learning_rate": 7.84991454510864e-08, "loss": 0.6136, "step": 9093 }, { "epoch": 0.95, "grad_norm": 1.9719085980940332, "learning_rate": 7.820229690638615e-08, "loss": 0.5706, "step": 9094 }, { "epoch": 0.95, "grad_norm": 1.8669821696404074, "learning_rate": 7.790600627550937e-08, "loss": 0.6763, "step": 9095 }, { "epoch": 0.95, "grad_norm": 2.0351969275458415, "learning_rate": 7.761027359204088e-08, "loss": 0.6411, "step": 9096 }, { "epoch": 0.95, "grad_norm": 2.034610079179534, "learning_rate": 7.731509888950551e-08, "loss": 0.5367, "step": 9097 }, { "epoch": 0.95, "grad_norm": 1.9903590314113853, "learning_rate": 7.702048220136149e-08, "loss": 0.6549, "step": 9098 }, { "epoch": 0.95, "grad_norm": 2.040096970601361, "learning_rate": 7.672642356100656e-08, "loss": 0.5158, "step": 9099 }, { "epoch": 0.95, "grad_norm": 1.8819140831818857, "learning_rate": 7.643292300177296e-08, "loss": 0.4779, "step": 9100 }, { "epoch": 0.95, "grad_norm": 1.684999361885513, "learning_rate": 7.613998055693073e-08, "loss": 0.6065, "step": 9101 }, { "epoch": 0.95, "grad_norm": 2.0139571541265853, "learning_rate": 7.584759625968663e-08, "loss": 0.554, "step": 9102 }, { "epoch": 0.95, "grad_norm": 1.8729824867562832, "learning_rate": 7.555577014318472e-08, "loss": 0.5385, "step": 9103 }, { "epoch": 0.95, "grad_norm": 2.080253251347515, "learning_rate": 7.526450224050407e-08, "loss": 0.5282, "step": 9104 }, { "epoch": 0.95, "grad_norm": 1.8347182779960427, "learning_rate": 7.497379258466275e-08, "loss": 0.5965, "step": 9105 }, { "epoch": 0.95, "grad_norm": 2.164534043831013, "learning_rate": 7.468364120861272e-08, "loss": 0.6141, "step": 9106 }, { "epoch": 0.95, "grad_norm": 2.2212477876578496, "learning_rate": 7.439404814524654e-08, "loss": 0.5259, "step": 9107 }, { "epoch": 0.95, "grad_norm": 1.87666751211576, "learning_rate": 7.410501342738963e-08, "loss": 0.5406, "step": 9108 }, { "epoch": 0.95, "grad_norm": 2.042540734017541, "learning_rate": 7.381653708780578e-08, "loss": 0.6218, "step": 9109 }, { "epoch": 0.95, "grad_norm": 1.9767662921202052, "learning_rate": 7.352861915919607e-08, "loss": 0.5197, "step": 9110 }, { "epoch": 0.95, "grad_norm": 1.9563546830435936, "learning_rate": 7.324125967419715e-08, "loss": 0.6026, "step": 9111 }, { "epoch": 0.95, "grad_norm": 2.0233014232164117, "learning_rate": 7.295445866538297e-08, "loss": 0.5401, "step": 9112 }, { "epoch": 0.95, "grad_norm": 2.059552075508856, "learning_rate": 7.26682161652642e-08, "loss": 0.5435, "step": 9113 }, { "epoch": 0.95, "grad_norm": 2.13299560125978, "learning_rate": 7.238253220628822e-08, "loss": 0.7473, "step": 9114 }, { "epoch": 0.95, "grad_norm": 2.2557692843805333, "learning_rate": 7.20974068208391e-08, "loss": 0.5964, "step": 9115 }, { "epoch": 0.95, "grad_norm": 1.9903729640281569, "learning_rate": 7.181284004123601e-08, "loss": 0.6408, "step": 9116 }, { "epoch": 0.95, "grad_norm": 1.8199528667987253, "learning_rate": 7.152883189973759e-08, "loss": 0.5784, "step": 9117 }, { "epoch": 0.95, "grad_norm": 2.35897713970099, "learning_rate": 7.124538242853751e-08, "loss": 0.6388, "step": 9118 }, { "epoch": 0.95, "grad_norm": 2.1030461531404296, "learning_rate": 7.096249165976621e-08, "loss": 0.6247, "step": 9119 }, { "epoch": 0.95, "grad_norm": 1.8662951848090812, "learning_rate": 7.06801596254908e-08, "loss": 0.6072, "step": 9120 }, { "epoch": 0.95, "grad_norm": 1.9138440363291855, "learning_rate": 7.039838635771623e-08, "loss": 0.5645, "step": 9121 }, { "epoch": 0.95, "grad_norm": 1.734258665755537, "learning_rate": 7.011717188838196e-08, "loss": 0.604, "step": 9122 }, { "epoch": 0.95, "grad_norm": 2.196151154914006, "learning_rate": 6.983651624936527e-08, "loss": 0.5313, "step": 9123 }, { "epoch": 0.95, "grad_norm": 1.7132829172978423, "learning_rate": 6.955641947248127e-08, "loss": 0.5929, "step": 9124 }, { "epoch": 0.95, "grad_norm": 1.9418755624331163, "learning_rate": 6.927688158947954e-08, "loss": 0.5897, "step": 9125 }, { "epoch": 0.95, "grad_norm": 1.9872540559183436, "learning_rate": 6.899790263204643e-08, "loss": 0.5944, "step": 9126 }, { "epoch": 0.95, "grad_norm": 2.1639015797194205, "learning_rate": 6.871948263180772e-08, "loss": 0.6074, "step": 9127 }, { "epoch": 0.95, "grad_norm": 1.9873225783662412, "learning_rate": 6.844162162032265e-08, "loss": 0.5684, "step": 9128 }, { "epoch": 0.95, "grad_norm": 2.0519891397743266, "learning_rate": 6.816431962908931e-08, "loss": 0.6091, "step": 9129 }, { "epoch": 0.95, "grad_norm": 1.9955079387546661, "learning_rate": 6.788757668954038e-08, "loss": 0.5752, "step": 9130 }, { "epoch": 0.95, "grad_norm": 2.0818890242182357, "learning_rate": 6.761139283304685e-08, "loss": 0.6259, "step": 9131 }, { "epoch": 0.95, "grad_norm": 1.9861722922728569, "learning_rate": 6.73357680909159e-08, "loss": 0.6536, "step": 9132 }, { "epoch": 0.95, "grad_norm": 1.955473365977711, "learning_rate": 6.706070249439034e-08, "loss": 0.5794, "step": 9133 }, { "epoch": 0.95, "grad_norm": 1.8569417282687593, "learning_rate": 6.678619607465131e-08, "loss": 0.5563, "step": 9134 }, { "epoch": 0.95, "grad_norm": 1.9271564060152484, "learning_rate": 6.651224886281504e-08, "loss": 0.6414, "step": 9135 }, { "epoch": 0.95, "grad_norm": 2.0471491330858442, "learning_rate": 6.623886088993559e-08, "loss": 0.5188, "step": 9136 }, { "epoch": 0.95, "grad_norm": 1.9551415429882093, "learning_rate": 6.59660321870026e-08, "loss": 0.5605, "step": 9137 }, { "epoch": 0.95, "grad_norm": 2.083480071147602, "learning_rate": 6.5693762784943e-08, "loss": 0.5904, "step": 9138 }, { "epoch": 0.95, "grad_norm": 1.828002306812625, "learning_rate": 6.542205271461988e-08, "loss": 0.5919, "step": 9139 }, { "epoch": 0.95, "grad_norm": 2.0114884598779534, "learning_rate": 6.515090200683361e-08, "loss": 0.5696, "step": 9140 }, { "epoch": 0.95, "grad_norm": 1.8440201739093747, "learning_rate": 6.48803106923196e-08, "loss": 0.6082, "step": 9141 }, { "epoch": 0.95, "grad_norm": 1.7543344260471143, "learning_rate": 6.461027880175219e-08, "loss": 0.6036, "step": 9142 }, { "epoch": 0.95, "grad_norm": 1.7472069441257652, "learning_rate": 6.434080636574025e-08, "loss": 0.6077, "step": 9143 }, { "epoch": 0.95, "grad_norm": 1.8148441768694168, "learning_rate": 6.407189341483044e-08, "loss": 0.5992, "step": 9144 }, { "epoch": 0.95, "grad_norm": 1.8986202705276924, "learning_rate": 6.380353997950506e-08, "loss": 0.5562, "step": 9145 }, { "epoch": 0.95, "grad_norm": 2.0313020160531634, "learning_rate": 6.35357460901842e-08, "loss": 0.5758, "step": 9146 }, { "epoch": 0.95, "grad_norm": 1.9249252266461854, "learning_rate": 6.326851177722304e-08, "loss": 0.5855, "step": 9147 }, { "epoch": 0.95, "grad_norm": 2.302195837407535, "learning_rate": 6.300183707091457e-08, "loss": 0.5979, "step": 9148 }, { "epoch": 0.95, "grad_norm": 1.942121352149436, "learning_rate": 6.273572200148792e-08, "loss": 0.5166, "step": 9149 }, { "epoch": 0.95, "grad_norm": 1.821356962288708, "learning_rate": 6.247016659910842e-08, "loss": 0.6556, "step": 9150 }, { "epoch": 0.95, "grad_norm": 2.0069073914638875, "learning_rate": 6.220517089387867e-08, "loss": 0.6701, "step": 9151 }, { "epoch": 0.95, "grad_norm": 1.7514971240436819, "learning_rate": 6.194073491583796e-08, "loss": 0.5036, "step": 9152 }, { "epoch": 0.95, "grad_norm": 2.0429327714393395, "learning_rate": 6.167685869495954e-08, "loss": 0.6446, "step": 9153 }, { "epoch": 0.95, "grad_norm": 2.136167526614985, "learning_rate": 6.141354226115781e-08, "loss": 0.6722, "step": 9154 }, { "epoch": 0.95, "grad_norm": 2.036591943231998, "learning_rate": 6.115078564427946e-08, "loss": 0.6177, "step": 9155 }, { "epoch": 0.95, "grad_norm": 1.957728748548829, "learning_rate": 6.088858887411064e-08, "loss": 0.5896, "step": 9156 }, { "epoch": 0.95, "grad_norm": 1.8733656466152195, "learning_rate": 6.06269519803715e-08, "loss": 0.6692, "step": 9157 }, { "epoch": 0.95, "grad_norm": 1.9703078943600634, "learning_rate": 6.036587499272161e-08, "loss": 0.5802, "step": 9158 }, { "epoch": 0.95, "grad_norm": 1.6708428863614988, "learning_rate": 6.010535794075455e-08, "loss": 0.4799, "step": 9159 }, { "epoch": 0.95, "grad_norm": 2.063875697905319, "learning_rate": 5.984540085400114e-08, "loss": 0.57, "step": 9160 }, { "epoch": 0.95, "grad_norm": 1.85335140179406, "learning_rate": 5.9586003761930024e-08, "loss": 0.5223, "step": 9161 }, { "epoch": 0.95, "grad_norm": 1.7270860909482966, "learning_rate": 5.932716669394489e-08, "loss": 0.5401, "step": 9162 }, { "epoch": 0.95, "grad_norm": 2.0866778648399165, "learning_rate": 5.9068889679385624e-08, "loss": 0.563, "step": 9163 }, { "epoch": 0.95, "grad_norm": 2.047048714162409, "learning_rate": 5.8811172747530457e-08, "loss": 0.5375, "step": 9164 }, { "epoch": 0.95, "grad_norm": 1.8605628615428378, "learning_rate": 5.855401592759269e-08, "loss": 0.5913, "step": 9165 }, { "epoch": 0.95, "grad_norm": 1.6883951918162357, "learning_rate": 5.8297419248722345e-08, "loss": 0.5417, "step": 9166 }, { "epoch": 0.95, "grad_norm": 1.9874976605092245, "learning_rate": 5.80413827400067e-08, "loss": 0.5758, "step": 9167 }, { "epoch": 0.95, "grad_norm": 2.095976679677836, "learning_rate": 5.778590643046811e-08, "loss": 0.5551, "step": 9168 }, { "epoch": 0.95, "grad_norm": 2.368713155416316, "learning_rate": 5.7530990349067285e-08, "loss": 0.5212, "step": 9169 }, { "epoch": 0.95, "grad_norm": 1.9132853451864427, "learning_rate": 5.727663452469945e-08, "loss": 0.6589, "step": 9170 }, { "epoch": 0.95, "grad_norm": 1.8104655617619578, "learning_rate": 5.702283898619765e-08, "loss": 0.5446, "step": 9171 }, { "epoch": 0.95, "grad_norm": 1.895549005171521, "learning_rate": 5.6769603762331096e-08, "loss": 0.6207, "step": 9172 }, { "epoch": 0.95, "grad_norm": 2.010640938641115, "learning_rate": 5.651692888180516e-08, "loss": 0.5883, "step": 9173 }, { "epoch": 0.95, "grad_norm": 1.9263129999779227, "learning_rate": 5.626481437326303e-08, "loss": 0.6767, "step": 9174 }, { "epoch": 0.95, "grad_norm": 1.9656014437831926, "learning_rate": 5.601326026528187e-08, "loss": 0.6445, "step": 9175 }, { "epoch": 0.95, "grad_norm": 2.0843236537066003, "learning_rate": 5.5762266586377734e-08, "loss": 0.6064, "step": 9176 }, { "epoch": 0.95, "grad_norm": 2.2182950535700776, "learning_rate": 5.551183336500177e-08, "loss": 0.597, "step": 9177 }, { "epoch": 0.95, "grad_norm": 2.0720744469238306, "learning_rate": 5.526196062954181e-08, "loss": 0.5568, "step": 9178 }, { "epoch": 0.95, "grad_norm": 1.9350111511493413, "learning_rate": 5.501264840832299e-08, "loss": 0.6638, "step": 9179 }, { "epoch": 0.95, "grad_norm": 1.9323446366770365, "learning_rate": 5.4763896729606023e-08, "loss": 0.578, "step": 9180 }, { "epoch": 0.95, "grad_norm": 2.09711440356542, "learning_rate": 5.451570562158892e-08, "loss": 0.6727, "step": 9181 }, { "epoch": 0.95, "grad_norm": 1.8486259014947013, "learning_rate": 5.4268075112403615e-08, "loss": 0.5326, "step": 9182 }, { "epoch": 0.95, "grad_norm": 2.002990590014281, "learning_rate": 5.402100523012266e-08, "loss": 0.6319, "step": 9183 }, { "epoch": 0.95, "grad_norm": 1.762628792862233, "learning_rate": 5.377449600275142e-08, "loss": 0.5471, "step": 9184 }, { "epoch": 0.95, "grad_norm": 1.80757756492264, "learning_rate": 5.352854745823366e-08, "loss": 0.5654, "step": 9185 }, { "epoch": 0.95, "grad_norm": 2.111766800819143, "learning_rate": 5.3283159624448745e-08, "loss": 0.6046, "step": 9186 }, { "epoch": 0.95, "grad_norm": 2.136818353546279, "learning_rate": 5.3038332529213865e-08, "loss": 0.633, "step": 9187 }, { "epoch": 0.96, "grad_norm": 1.7831004493513567, "learning_rate": 5.279406620028016e-08, "loss": 0.5601, "step": 9188 }, { "epoch": 0.96, "grad_norm": 1.9671860829871322, "learning_rate": 5.255036066533714e-08, "loss": 0.6037, "step": 9189 }, { "epoch": 0.96, "grad_norm": 2.2695919619215563, "learning_rate": 5.230721595201049e-08, "loss": 0.6201, "step": 9190 }, { "epoch": 0.96, "grad_norm": 1.9143495780323916, "learning_rate": 5.20646320878615e-08, "loss": 0.6209, "step": 9191 }, { "epoch": 0.96, "grad_norm": 1.7335234797845942, "learning_rate": 5.182260910038928e-08, "loss": 0.5238, "step": 9192 }, { "epoch": 0.96, "grad_norm": 2.1520062494936623, "learning_rate": 5.1581147017027434e-08, "loss": 0.5854, "step": 9193 }, { "epoch": 0.96, "grad_norm": 1.8251696136209343, "learning_rate": 5.134024586514796e-08, "loss": 0.5771, "step": 9194 }, { "epoch": 0.96, "grad_norm": 1.8451479021419956, "learning_rate": 5.109990567205792e-08, "loss": 0.5387, "step": 9195 }, { "epoch": 0.96, "grad_norm": 1.9829306324790406, "learning_rate": 5.08601264650016e-08, "loss": 0.5184, "step": 9196 }, { "epoch": 0.96, "grad_norm": 2.0004402204722864, "learning_rate": 5.06209082711584e-08, "loss": 0.5989, "step": 9197 }, { "epoch": 0.96, "grad_norm": 1.9331331565930703, "learning_rate": 5.038225111764605e-08, "loss": 0.6236, "step": 9198 }, { "epoch": 0.96, "grad_norm": 2.1842830535619893, "learning_rate": 5.0144155031517926e-08, "loss": 0.5366, "step": 9199 }, { "epoch": 0.96, "grad_norm": 2.0003594845759616, "learning_rate": 4.990662003976243e-08, "loss": 0.6162, "step": 9200 }, { "epoch": 0.96, "grad_norm": 1.9969106571586461, "learning_rate": 4.966964616930692e-08, "loss": 0.6471, "step": 9201 }, { "epoch": 0.96, "grad_norm": 1.971192026882797, "learning_rate": 4.943323344701212e-08, "loss": 0.6109, "step": 9202 }, { "epoch": 0.96, "grad_norm": 2.158762303401497, "learning_rate": 4.919738189967771e-08, "loss": 0.6428, "step": 9203 }, { "epoch": 0.96, "grad_norm": 1.6185547120279675, "learning_rate": 4.8962091554039524e-08, "loss": 0.6368, "step": 9204 }, { "epoch": 0.96, "grad_norm": 2.015253292430434, "learning_rate": 4.8727362436767344e-08, "loss": 0.6126, "step": 9205 }, { "epoch": 0.96, "grad_norm": 2.116270646369772, "learning_rate": 4.8493194574470436e-08, "loss": 0.6873, "step": 9206 }, { "epoch": 0.96, "grad_norm": 1.7183519515168044, "learning_rate": 4.825958799369201e-08, "loss": 0.508, "step": 9207 }, { "epoch": 0.96, "grad_norm": 1.7666831560398584, "learning_rate": 4.802654272091367e-08, "loss": 0.5841, "step": 9208 }, { "epoch": 0.96, "grad_norm": 1.8230875968232338, "learning_rate": 4.779405878255206e-08, "loss": 0.5549, "step": 9209 }, { "epoch": 0.96, "grad_norm": 2.152194890452504, "learning_rate": 4.756213620496053e-08, "loss": 0.6126, "step": 9210 }, { "epoch": 0.96, "grad_norm": 2.015006634328195, "learning_rate": 4.733077501442862e-08, "loss": 0.6799, "step": 9211 }, { "epoch": 0.96, "grad_norm": 2.256874151082544, "learning_rate": 4.709997523718257e-08, "loss": 0.5797, "step": 9212 }, { "epoch": 0.96, "grad_norm": 1.9338870307485518, "learning_rate": 4.6869736899385345e-08, "loss": 0.5389, "step": 9213 }, { "epoch": 0.96, "grad_norm": 1.866170129258445, "learning_rate": 4.664006002713495e-08, "loss": 0.6559, "step": 9214 }, { "epoch": 0.96, "grad_norm": 2.1576898953891415, "learning_rate": 4.641094464646667e-08, "loss": 0.6158, "step": 9215 }, { "epoch": 0.96, "grad_norm": 1.8488446862845445, "learning_rate": 4.618239078335307e-08, "loss": 0.5886, "step": 9216 }, { "epoch": 0.96, "grad_norm": 2.046528350714155, "learning_rate": 4.5954398463700647e-08, "loss": 0.6249, "step": 9217 }, { "epoch": 0.96, "grad_norm": 1.860971563552356, "learning_rate": 4.572696771335483e-08, "loss": 0.6386, "step": 9218 }, { "epoch": 0.96, "grad_norm": 1.9383068774147776, "learning_rate": 4.5500098558095565e-08, "loss": 0.5341, "step": 9219 }, { "epoch": 0.96, "grad_norm": 2.1917997000300655, "learning_rate": 4.5273791023639494e-08, "loss": 0.5904, "step": 9220 }, { "epoch": 0.96, "grad_norm": 2.0815462160143805, "learning_rate": 4.504804513564054e-08, "loss": 0.6328, "step": 9221 }, { "epoch": 0.96, "grad_norm": 1.8893378428676797, "learning_rate": 4.482286091968768e-08, "loss": 0.5827, "step": 9222 }, { "epoch": 0.96, "grad_norm": 1.8432453644442008, "learning_rate": 4.459823840130717e-08, "loss": 0.5942, "step": 9223 }, { "epoch": 0.96, "grad_norm": 2.0539820466658094, "learning_rate": 4.437417760596141e-08, "loss": 0.5402, "step": 9224 }, { "epoch": 0.96, "grad_norm": 2.0722178972880947, "learning_rate": 4.415067855904842e-08, "loss": 0.6071, "step": 9225 }, { "epoch": 0.96, "grad_norm": 2.2177273773031763, "learning_rate": 4.392774128590349e-08, "loss": 0.6256, "step": 9226 }, { "epoch": 0.96, "grad_norm": 2.0694785751628038, "learning_rate": 4.3705365811797515e-08, "loss": 0.668, "step": 9227 }, { "epoch": 0.96, "grad_norm": 1.8951352083608757, "learning_rate": 4.348355216193867e-08, "loss": 0.6042, "step": 9228 }, { "epoch": 0.96, "grad_norm": 2.028419678421276, "learning_rate": 4.326230036147017e-08, "loss": 0.5914, "step": 9229 }, { "epoch": 0.96, "grad_norm": 2.0823053199915336, "learning_rate": 4.30416104354725e-08, "loss": 0.5709, "step": 9230 }, { "epoch": 0.96, "grad_norm": 1.823570228290745, "learning_rate": 4.282148240896178e-08, "loss": 0.599, "step": 9231 }, { "epoch": 0.96, "grad_norm": 1.74260159641539, "learning_rate": 4.2601916306891365e-08, "loss": 0.5198, "step": 9232 }, { "epoch": 0.96, "grad_norm": 1.7015132786393319, "learning_rate": 4.2382912154150244e-08, "loss": 0.5233, "step": 9233 }, { "epoch": 0.96, "grad_norm": 1.961974043992251, "learning_rate": 4.216446997556245e-08, "loss": 0.5958, "step": 9234 }, { "epoch": 0.96, "grad_norm": 2.013194471656937, "learning_rate": 4.19465897958915e-08, "loss": 0.541, "step": 9235 }, { "epoch": 0.96, "grad_norm": 2.210012908565479, "learning_rate": 4.1729271639834315e-08, "loss": 0.6799, "step": 9236 }, { "epoch": 0.96, "grad_norm": 2.0115717834978177, "learning_rate": 4.151251553202562e-08, "loss": 0.5222, "step": 9237 }, { "epoch": 0.96, "grad_norm": 1.9749451130038853, "learning_rate": 4.129632149703522e-08, "loss": 0.6103, "step": 9238 }, { "epoch": 0.96, "grad_norm": 1.8871785221244468, "learning_rate": 4.1080689559370724e-08, "loss": 0.6575, "step": 9239 }, { "epoch": 0.96, "grad_norm": 2.039353039205422, "learning_rate": 4.086561974347536e-08, "loss": 0.6047, "step": 9240 }, { "epoch": 0.96, "grad_norm": 2.191591399613144, "learning_rate": 4.065111207372796e-08, "loss": 0.6459, "step": 9241 }, { "epoch": 0.96, "grad_norm": 2.071197060104028, "learning_rate": 4.043716657444407e-08, "loss": 0.6256, "step": 9242 }, { "epoch": 0.96, "grad_norm": 1.920300538975992, "learning_rate": 4.022378326987597e-08, "loss": 0.6548, "step": 9243 }, { "epoch": 0.96, "grad_norm": 2.0560108290708023, "learning_rate": 4.001096218421152e-08, "loss": 0.5192, "step": 9244 }, { "epoch": 0.96, "grad_norm": 2.002316195882517, "learning_rate": 3.9798703341575875e-08, "loss": 0.5916, "step": 9245 }, { "epoch": 0.96, "grad_norm": 2.191893706906724, "learning_rate": 3.9587006766029225e-08, "loss": 0.7022, "step": 9246 }, { "epoch": 0.96, "grad_norm": 1.960519681813049, "learning_rate": 3.937587248156904e-08, "loss": 0.5285, "step": 9247 }, { "epoch": 0.96, "grad_norm": 2.1307289887200067, "learning_rate": 3.916530051212841e-08, "loss": 0.7106, "step": 9248 }, { "epoch": 0.96, "grad_norm": 2.0097373182537246, "learning_rate": 3.8955290881576566e-08, "loss": 0.6415, "step": 9249 }, { "epoch": 0.96, "grad_norm": 2.088973178619039, "learning_rate": 3.874584361371947e-08, "loss": 0.619, "step": 9250 }, { "epoch": 0.96, "grad_norm": 2.3453820853091965, "learning_rate": 3.8536958732299234e-08, "loss": 0.7008, "step": 9251 }, { "epoch": 0.96, "grad_norm": 1.8925152599216863, "learning_rate": 3.832863626099359e-08, "loss": 0.6295, "step": 9252 }, { "epoch": 0.96, "grad_norm": 1.9730246462677488, "learning_rate": 3.8120876223418646e-08, "loss": 0.6821, "step": 9253 }, { "epoch": 0.96, "grad_norm": 1.995162008625557, "learning_rate": 3.791367864312334e-08, "loss": 0.5596, "step": 9254 }, { "epoch": 0.96, "grad_norm": 1.9608437281347626, "learning_rate": 3.770704354359611e-08, "loss": 0.6098, "step": 9255 }, { "epoch": 0.96, "grad_norm": 1.9119220279863318, "learning_rate": 3.750097094825933e-08, "loss": 0.5848, "step": 9256 }, { "epoch": 0.96, "grad_norm": 1.8167225502830717, "learning_rate": 3.729546088047264e-08, "loss": 0.5353, "step": 9257 }, { "epoch": 0.96, "grad_norm": 1.8657223958933558, "learning_rate": 3.709051336353187e-08, "loss": 0.5794, "step": 9258 }, { "epoch": 0.96, "grad_norm": 2.3673185647016375, "learning_rate": 3.688612842066952e-08, "loss": 0.6088, "step": 9259 }, { "epoch": 0.96, "grad_norm": 1.908061434806496, "learning_rate": 3.6682306075052634e-08, "loss": 0.519, "step": 9260 }, { "epoch": 0.96, "grad_norm": 1.9989992305528361, "learning_rate": 3.64790463497866e-08, "loss": 0.5655, "step": 9261 }, { "epoch": 0.96, "grad_norm": 2.077972598614312, "learning_rate": 3.627634926791246e-08, "loss": 0.6019, "step": 9262 }, { "epoch": 0.96, "grad_norm": 1.9263719734688765, "learning_rate": 3.6074214852405695e-08, "loss": 0.6626, "step": 9263 }, { "epoch": 0.96, "grad_norm": 1.8445050717486768, "learning_rate": 3.587264312618022e-08, "loss": 0.5336, "step": 9264 }, { "epoch": 0.96, "grad_norm": 2.085353318157501, "learning_rate": 3.567163411208552e-08, "loss": 0.5532, "step": 9265 }, { "epoch": 0.96, "grad_norm": 1.8421560673376067, "learning_rate": 3.547118783290615e-08, "loss": 0.6325, "step": 9266 }, { "epoch": 0.96, "grad_norm": 1.9754557962477426, "learning_rate": 3.527130431136505e-08, "loss": 0.5936, "step": 9267 }, { "epoch": 0.96, "grad_norm": 2.199110288587306, "learning_rate": 3.507198357011909e-08, "loss": 0.6782, "step": 9268 }, { "epoch": 0.96, "grad_norm": 1.9254296087735812, "learning_rate": 3.487322563176354e-08, "loss": 0.5547, "step": 9269 }, { "epoch": 0.96, "grad_norm": 2.0537058557901986, "learning_rate": 3.467503051882815e-08, "loss": 0.5562, "step": 9270 }, { "epoch": 0.96, "grad_norm": 1.8764811043642, "learning_rate": 3.4477398253778826e-08, "loss": 0.6119, "step": 9271 }, { "epoch": 0.96, "grad_norm": 1.9573268571815328, "learning_rate": 3.4280328859019885e-08, "loss": 0.6284, "step": 9272 }, { "epoch": 0.96, "grad_norm": 2.2651084124987184, "learning_rate": 3.408382235688845e-08, "loss": 0.5998, "step": 9273 }, { "epoch": 0.96, "grad_norm": 1.7190886471188342, "learning_rate": 3.388787876966115e-08, "loss": 0.5464, "step": 9274 }, { "epoch": 0.96, "grad_norm": 1.898764604876328, "learning_rate": 3.3692498119548e-08, "loss": 0.5864, "step": 9275 }, { "epoch": 0.96, "grad_norm": 1.9545866295420675, "learning_rate": 3.3497680428697943e-08, "loss": 0.6006, "step": 9276 }, { "epoch": 0.96, "grad_norm": 1.9882529876550887, "learning_rate": 3.330342571919332e-08, "loss": 0.6126, "step": 9277 }, { "epoch": 0.96, "grad_norm": 2.1666195490031344, "learning_rate": 3.3109734013055396e-08, "loss": 0.7462, "step": 9278 }, { "epoch": 0.96, "grad_norm": 1.8273279290401259, "learning_rate": 3.2916605332238284e-08, "loss": 0.6519, "step": 9279 }, { "epoch": 0.96, "grad_norm": 2.25238653257137, "learning_rate": 3.2724039698636135e-08, "loss": 0.6955, "step": 9280 }, { "epoch": 0.96, "grad_norm": 2.1016705536415943, "learning_rate": 3.2532037134076486e-08, "loss": 0.6079, "step": 9281 }, { "epoch": 0.96, "grad_norm": 2.0163167940465256, "learning_rate": 3.234059766032416e-08, "loss": 0.56, "step": 9282 }, { "epoch": 0.96, "grad_norm": 1.9318858697010097, "learning_rate": 3.214972129907956e-08, "loss": 0.6073, "step": 9283 }, { "epoch": 0.97, "grad_norm": 1.7757658787840196, "learning_rate": 3.195940807198039e-08, "loss": 0.5379, "step": 9284 }, { "epoch": 0.97, "grad_norm": 1.8460998635082828, "learning_rate": 3.1769658000598835e-08, "loss": 0.5544, "step": 9285 }, { "epoch": 0.97, "grad_norm": 2.0236445760390427, "learning_rate": 3.158047110644436e-08, "loss": 0.5358, "step": 9286 }, { "epoch": 0.97, "grad_norm": 1.9270061530827873, "learning_rate": 3.1391847410962573e-08, "loss": 0.6229, "step": 9287 }, { "epoch": 0.97, "grad_norm": 1.9604700327535756, "learning_rate": 3.1203786935535275e-08, "loss": 0.6332, "step": 9288 }, { "epoch": 0.97, "grad_norm": 1.9975031878953207, "learning_rate": 3.1016289701479296e-08, "loss": 0.552, "step": 9289 }, { "epoch": 0.97, "grad_norm": 1.8939971445756167, "learning_rate": 3.082935573004986e-08, "loss": 0.5689, "step": 9290 }, { "epoch": 0.97, "grad_norm": 1.804739917206502, "learning_rate": 3.064298504243612e-08, "loss": 0.6185, "step": 9291 }, { "epoch": 0.97, "grad_norm": 1.9170101195891174, "learning_rate": 3.0457177659764524e-08, "loss": 0.6434, "step": 9292 }, { "epoch": 0.97, "grad_norm": 1.934719583858853, "learning_rate": 3.02719336030971e-08, "loss": 0.6144, "step": 9293 }, { "epoch": 0.97, "grad_norm": 1.929080277217315, "learning_rate": 3.008725289343206e-08, "loss": 0.6113, "step": 9294 }, { "epoch": 0.97, "grad_norm": 2.0128393803689226, "learning_rate": 2.990313555170488e-08, "loss": 0.6372, "step": 9295 }, { "epoch": 0.97, "grad_norm": 1.9044953243775646, "learning_rate": 2.9719581598786072e-08, "loss": 0.6269, "step": 9296 }, { "epoch": 0.97, "grad_norm": 1.987130561174613, "learning_rate": 2.953659105548179e-08, "loss": 0.5736, "step": 9297 }, { "epoch": 0.97, "grad_norm": 1.9601612132663782, "learning_rate": 2.9354163942535983e-08, "loss": 0.5485, "step": 9298 }, { "epoch": 0.97, "grad_norm": 1.8276899215854248, "learning_rate": 2.9172300280627674e-08, "loss": 0.5822, "step": 9299 }, { "epoch": 0.97, "grad_norm": 1.9585863100809016, "learning_rate": 2.899100009037148e-08, "loss": 0.6306, "step": 9300 }, { "epoch": 0.97, "grad_norm": 1.8717322423573106, "learning_rate": 2.8810263392319293e-08, "loss": 0.5468, "step": 9301 }, { "epoch": 0.97, "grad_norm": 1.8433778506189058, "learning_rate": 2.863009020695917e-08, "loss": 0.6548, "step": 9302 }, { "epoch": 0.97, "grad_norm": 1.8375022349395036, "learning_rate": 2.845048055471311e-08, "loss": 0.635, "step": 9303 }, { "epoch": 0.97, "grad_norm": 1.986408985770117, "learning_rate": 2.8271434455942604e-08, "loss": 0.6994, "step": 9304 }, { "epoch": 0.97, "grad_norm": 1.7962105054371804, "learning_rate": 2.809295193094308e-08, "loss": 0.6022, "step": 9305 }, { "epoch": 0.97, "grad_norm": 2.129112709564285, "learning_rate": 2.7915032999946133e-08, "loss": 0.6974, "step": 9306 }, { "epoch": 0.97, "grad_norm": 2.2015618631548834, "learning_rate": 2.7737677683120077e-08, "loss": 0.5899, "step": 9307 }, { "epoch": 0.97, "grad_norm": 2.032080111074871, "learning_rate": 2.7560886000569386e-08, "loss": 0.6206, "step": 9308 }, { "epoch": 0.97, "grad_norm": 1.9551756649376424, "learning_rate": 2.7384657972334137e-08, "loss": 0.5469, "step": 9309 }, { "epoch": 0.97, "grad_norm": 1.957396990331527, "learning_rate": 2.7208993618390578e-08, "loss": 0.6458, "step": 9310 }, { "epoch": 0.97, "grad_norm": 1.9599537127158781, "learning_rate": 2.7033892958651665e-08, "loss": 0.6791, "step": 9311 }, { "epoch": 0.97, "grad_norm": 1.7920889911825337, "learning_rate": 2.6859356012965964e-08, "loss": 0.4747, "step": 9312 }, { "epoch": 0.97, "grad_norm": 1.944794784351862, "learning_rate": 2.6685382801118765e-08, "loss": 0.5882, "step": 9313 }, { "epoch": 0.97, "grad_norm": 1.8238895229633358, "learning_rate": 2.6511973342829843e-08, "loss": 0.5857, "step": 9314 }, { "epoch": 0.97, "grad_norm": 1.772948928337245, "learning_rate": 2.6339127657756814e-08, "loss": 0.5756, "step": 9315 }, { "epoch": 0.97, "grad_norm": 2.1942165661966677, "learning_rate": 2.6166845765492333e-08, "loss": 0.5701, "step": 9316 }, { "epoch": 0.97, "grad_norm": 1.980825317874205, "learning_rate": 2.5995127685566335e-08, "loss": 0.579, "step": 9317 }, { "epoch": 0.97, "grad_norm": 1.9101493425224827, "learning_rate": 2.5823973437442696e-08, "loss": 0.6031, "step": 9318 }, { "epoch": 0.97, "grad_norm": 1.8369726379682219, "learning_rate": 2.5653383040524228e-08, "loss": 0.6034, "step": 9319 }, { "epoch": 0.97, "grad_norm": 1.8791588215949244, "learning_rate": 2.5483356514147128e-08, "loss": 0.6301, "step": 9320 }, { "epoch": 0.97, "grad_norm": 1.9328706087321703, "learning_rate": 2.531389387758598e-08, "loss": 0.6878, "step": 9321 }, { "epoch": 0.97, "grad_norm": 2.0484986499553894, "learning_rate": 2.5144995150049312e-08, "loss": 0.6561, "step": 9322 }, { "epoch": 0.97, "grad_norm": 1.8432600727248396, "learning_rate": 2.497666035068347e-08, "loss": 0.5751, "step": 9323 }, { "epoch": 0.97, "grad_norm": 2.221330921501575, "learning_rate": 2.48088894985693e-08, "loss": 0.6359, "step": 9324 }, { "epoch": 0.97, "grad_norm": 2.0728270839250973, "learning_rate": 2.464168261272548e-08, "loss": 0.5583, "step": 9325 }, { "epoch": 0.97, "grad_norm": 2.0001901715742494, "learning_rate": 2.4475039712105742e-08, "loss": 0.6303, "step": 9326 }, { "epoch": 0.97, "grad_norm": 1.9665561149483282, "learning_rate": 2.4308960815599412e-08, "loss": 0.6741, "step": 9327 }, { "epoch": 0.97, "grad_norm": 2.008847781228865, "learning_rate": 2.4143445942033105e-08, "loss": 0.5627, "step": 9328 }, { "epoch": 0.97, "grad_norm": 1.9899789683173035, "learning_rate": 2.3978495110168477e-08, "loss": 0.6163, "step": 9329 }, { "epoch": 0.97, "grad_norm": 1.9583106013630442, "learning_rate": 2.38141083387039e-08, "loss": 0.6204, "step": 9330 }, { "epoch": 0.97, "grad_norm": 2.1674235835611495, "learning_rate": 2.3650285646273362e-08, "loss": 0.5559, "step": 9331 }, { "epoch": 0.97, "grad_norm": 1.8975429563101518, "learning_rate": 2.348702705144701e-08, "loss": 0.615, "step": 9332 }, { "epoch": 0.97, "grad_norm": 2.090178727546794, "learning_rate": 2.33243325727317e-08, "loss": 0.7192, "step": 9333 }, { "epoch": 0.97, "grad_norm": 1.881027148542671, "learning_rate": 2.3162202228569353e-08, "loss": 0.5326, "step": 9334 }, { "epoch": 0.97, "grad_norm": 3.9837370957943103, "learning_rate": 2.300063603733804e-08, "loss": 0.6979, "step": 9335 }, { "epoch": 0.97, "grad_norm": 1.7547614413184414, "learning_rate": 2.2839634017353118e-08, "loss": 0.5466, "step": 9336 }, { "epoch": 0.97, "grad_norm": 2.0185544324841307, "learning_rate": 2.267919618686443e-08, "loss": 0.6056, "step": 9337 }, { "epoch": 0.97, "grad_norm": 2.0292071357538766, "learning_rate": 2.251932256405853e-08, "loss": 0.6469, "step": 9338 }, { "epoch": 0.97, "grad_norm": 1.804027643043366, "learning_rate": 2.2360013167057602e-08, "loss": 0.5343, "step": 9339 }, { "epoch": 0.97, "grad_norm": 1.9766515344332558, "learning_rate": 2.220126801392164e-08, "loss": 0.5715, "step": 9340 }, { "epoch": 0.97, "grad_norm": 1.9002895224444987, "learning_rate": 2.2043087122644023e-08, "loss": 0.7093, "step": 9341 }, { "epoch": 0.97, "grad_norm": 2.1821936473764985, "learning_rate": 2.188547051115597e-08, "loss": 0.5981, "step": 9342 }, { "epoch": 0.97, "grad_norm": 1.9245969157170688, "learning_rate": 2.1728418197323742e-08, "loss": 0.5908, "step": 9343 }, { "epoch": 0.97, "grad_norm": 2.12228430228684, "learning_rate": 2.1571930198950874e-08, "loss": 0.5723, "step": 9344 }, { "epoch": 0.97, "grad_norm": 1.8732070599355801, "learning_rate": 2.1416006533775957e-08, "loss": 0.5704, "step": 9345 }, { "epoch": 0.97, "grad_norm": 1.897499138291424, "learning_rate": 2.1260647219473742e-08, "loss": 0.5922, "step": 9346 }, { "epoch": 0.97, "grad_norm": 2.1515440733884086, "learning_rate": 2.110585227365458e-08, "loss": 0.6734, "step": 9347 }, { "epoch": 0.97, "grad_norm": 2.0743929440733906, "learning_rate": 2.095162171386611e-08, "loss": 0.6869, "step": 9348 }, { "epoch": 0.97, "grad_norm": 1.9802962077153008, "learning_rate": 2.0797955557590454e-08, "loss": 0.5603, "step": 9349 }, { "epoch": 0.97, "grad_norm": 1.902902250037822, "learning_rate": 2.064485382224757e-08, "loss": 0.6033, "step": 9350 }, { "epoch": 0.97, "grad_norm": 1.9135159924715113, "learning_rate": 2.04923165251919e-08, "loss": 0.6096, "step": 9351 }, { "epoch": 0.97, "grad_norm": 2.001111887938867, "learning_rate": 2.0340343683714624e-08, "loss": 0.66, "step": 9352 }, { "epoch": 0.97, "grad_norm": 2.2845616405345233, "learning_rate": 2.018893531504196e-08, "loss": 0.7283, "step": 9353 }, { "epoch": 0.97, "grad_norm": 2.0236378220572275, "learning_rate": 2.0038091436337392e-08, "loss": 0.5974, "step": 9354 }, { "epoch": 0.97, "grad_norm": 1.7188227459501022, "learning_rate": 1.9887812064700028e-08, "loss": 0.5671, "step": 9355 }, { "epoch": 0.97, "grad_norm": 1.9222401255258874, "learning_rate": 1.973809721716513e-08, "loss": 0.5832, "step": 9356 }, { "epoch": 0.97, "grad_norm": 2.0028373714720367, "learning_rate": 1.9588946910703567e-08, "loss": 0.5924, "step": 9357 }, { "epoch": 0.97, "grad_norm": 1.8852559821091195, "learning_rate": 1.9440361162222367e-08, "loss": 0.6009, "step": 9358 }, { "epoch": 0.97, "grad_norm": 1.6645381563062454, "learning_rate": 1.929233998856417e-08, "loss": 0.5973, "step": 9359 }, { "epoch": 0.97, "grad_norm": 1.8784683299762617, "learning_rate": 1.914488340650833e-08, "loss": 0.6166, "step": 9360 }, { "epoch": 0.97, "grad_norm": 1.9423516733751873, "learning_rate": 1.8997991432769812e-08, "loss": 0.5375, "step": 9361 }, { "epoch": 0.97, "grad_norm": 1.896972762313844, "learning_rate": 1.8851664083999742e-08, "loss": 0.5419, "step": 9362 }, { "epoch": 0.97, "grad_norm": 1.9254153543066406, "learning_rate": 1.8705901376784852e-08, "loss": 0.5956, "step": 9363 }, { "epoch": 0.97, "grad_norm": 1.837387913964868, "learning_rate": 1.8560703327649144e-08, "loss": 0.5569, "step": 9364 }, { "epoch": 0.97, "grad_norm": 2.1808835722204445, "learning_rate": 1.8416069953050565e-08, "loss": 0.6779, "step": 9365 }, { "epoch": 0.97, "grad_norm": 2.1182269084331415, "learning_rate": 1.8272001269384886e-08, "loss": 0.6208, "step": 9366 }, { "epoch": 0.97, "grad_norm": 1.8505257750618553, "learning_rate": 1.812849729298238e-08, "loss": 0.5383, "step": 9367 }, { "epoch": 0.97, "grad_norm": 1.85357421039629, "learning_rate": 1.7985558040110594e-08, "loss": 0.657, "step": 9368 }, { "epoch": 0.97, "grad_norm": 2.0404214691662395, "learning_rate": 1.784318352697212e-08, "loss": 0.6548, "step": 9369 }, { "epoch": 0.97, "grad_norm": 2.0413228816834392, "learning_rate": 1.7701373769706265e-08, "loss": 0.6042, "step": 9370 }, { "epoch": 0.97, "grad_norm": 2.192428086651773, "learning_rate": 1.7560128784387953e-08, "loss": 0.6355, "step": 9371 }, { "epoch": 0.97, "grad_norm": 1.693491337066328, "learning_rate": 1.741944858702771e-08, "loss": 0.5473, "step": 9372 }, { "epoch": 0.97, "grad_norm": 1.9447757214859827, "learning_rate": 1.7279333193573332e-08, "loss": 0.5628, "step": 9373 }, { "epoch": 0.97, "grad_norm": 1.9521877617740033, "learning_rate": 1.7139782619906565e-08, "loss": 0.5326, "step": 9374 }, { "epoch": 0.97, "grad_norm": 2.1318514544476956, "learning_rate": 1.700079688184697e-08, "loss": 0.5991, "step": 9375 }, { "epoch": 0.97, "grad_norm": 2.111380016447978, "learning_rate": 1.686237599514917e-08, "loss": 0.5793, "step": 9376 }, { "epoch": 0.97, "grad_norm": 2.0061463420649823, "learning_rate": 1.672451997550395e-08, "loss": 0.5981, "step": 9377 }, { "epoch": 0.97, "grad_norm": 1.9289965935272066, "learning_rate": 1.658722883853825e-08, "loss": 0.6348, "step": 9378 }, { "epoch": 0.97, "grad_norm": 1.759822966934597, "learning_rate": 1.6450502599814622e-08, "loss": 0.5902, "step": 9379 }, { "epoch": 0.98, "grad_norm": 1.830609332103879, "learning_rate": 1.631434127483178e-08, "loss": 0.6012, "step": 9380 }, { "epoch": 0.98, "grad_norm": 1.903223961744007, "learning_rate": 1.6178744879024045e-08, "loss": 0.5514, "step": 9381 }, { "epoch": 0.98, "grad_norm": 1.9105186920277744, "learning_rate": 1.604371342776301e-08, "loss": 0.528, "step": 9382 }, { "epoch": 0.98, "grad_norm": 1.7738895886516455, "learning_rate": 1.5909246936354205e-08, "loss": 0.5702, "step": 9383 }, { "epoch": 0.98, "grad_norm": 1.9203976490248245, "learning_rate": 1.5775345420041e-08, "loss": 0.509, "step": 9384 }, { "epoch": 0.98, "grad_norm": 1.981620797130497, "learning_rate": 1.564200889400125e-08, "loss": 0.6442, "step": 9385 }, { "epoch": 0.98, "grad_norm": 1.686120661097064, "learning_rate": 1.5509237373349527e-08, "loss": 0.5022, "step": 9386 }, { "epoch": 0.98, "grad_norm": 2.011187623061088, "learning_rate": 1.537703087313658e-08, "loss": 0.6468, "step": 9387 }, { "epoch": 0.98, "grad_norm": 1.7837135872441217, "learning_rate": 1.5245389408348744e-08, "loss": 0.5189, "step": 9388 }, { "epoch": 0.98, "grad_norm": 1.9655077598433057, "learning_rate": 1.5114312993908532e-08, "loss": 0.5999, "step": 9389 }, { "epoch": 0.98, "grad_norm": 2.052384135182507, "learning_rate": 1.4983801644672948e-08, "loss": 0.5486, "step": 9390 }, { "epoch": 0.98, "grad_norm": 2.1277424515467844, "learning_rate": 1.4853855375437377e-08, "loss": 0.6428, "step": 9391 }, { "epoch": 0.98, "grad_norm": 1.977814419207284, "learning_rate": 1.4724474200931704e-08, "loss": 0.6306, "step": 9392 }, { "epoch": 0.98, "grad_norm": 2.138743859165059, "learning_rate": 1.4595658135822533e-08, "loss": 0.7269, "step": 9393 }, { "epoch": 0.98, "grad_norm": 1.7904427754103556, "learning_rate": 1.4467407194710958e-08, "loss": 0.5781, "step": 9394 }, { "epoch": 0.98, "grad_norm": 1.9993046254554308, "learning_rate": 1.4339721392135352e-08, "loss": 0.6969, "step": 9395 }, { "epoch": 0.98, "grad_norm": 2.0825943027963394, "learning_rate": 1.4212600742569694e-08, "loss": 0.6154, "step": 9396 }, { "epoch": 0.98, "grad_norm": 1.9421694999551333, "learning_rate": 1.4086045260423564e-08, "loss": 0.6173, "step": 9397 }, { "epoch": 0.98, "grad_norm": 2.0324173008481954, "learning_rate": 1.3960054960043267e-08, "loss": 0.6327, "step": 9398 }, { "epoch": 0.98, "grad_norm": 2.3108147574118845, "learning_rate": 1.3834629855710158e-08, "loss": 0.7125, "step": 9399 }, { "epoch": 0.98, "grad_norm": 1.973482473276303, "learning_rate": 1.370976996164175e-08, "loss": 0.6279, "step": 9400 }, { "epoch": 0.98, "grad_norm": 2.086224009235821, "learning_rate": 1.3585475291991724e-08, "loss": 0.5979, "step": 9401 }, { "epoch": 0.98, "grad_norm": 1.9357123351662384, "learning_rate": 1.3461745860849917e-08, "loss": 0.6264, "step": 9402 }, { "epoch": 0.98, "grad_norm": 1.7861483587494011, "learning_rate": 1.333858168224178e-08, "loss": 0.561, "step": 9403 }, { "epoch": 0.98, "grad_norm": 2.0311118954318155, "learning_rate": 1.3215982770128366e-08, "loss": 0.7493, "step": 9404 }, { "epoch": 0.98, "grad_norm": 2.045942396521214, "learning_rate": 1.3093949138406892e-08, "loss": 0.661, "step": 9405 }, { "epoch": 0.98, "grad_norm": 2.07625840929344, "learning_rate": 1.2972480800910181e-08, "loss": 0.6479, "step": 9406 }, { "epoch": 0.98, "grad_norm": 2.067265450826643, "learning_rate": 1.285157777140833e-08, "loss": 0.587, "step": 9407 }, { "epoch": 0.98, "grad_norm": 2.119763936274861, "learning_rate": 1.2731240063605931e-08, "loss": 0.5975, "step": 9408 }, { "epoch": 0.98, "grad_norm": 2.024916147212216, "learning_rate": 1.2611467691144297e-08, "loss": 0.6936, "step": 9409 }, { "epoch": 0.98, "grad_norm": 1.983127501150537, "learning_rate": 1.2492260667599232e-08, "loss": 0.6452, "step": 9410 }, { "epoch": 0.98, "grad_norm": 1.8649494084823937, "learning_rate": 1.2373619006484927e-08, "loss": 0.5512, "step": 9411 }, { "epoch": 0.98, "grad_norm": 2.0159592835650364, "learning_rate": 1.2255542721248959e-08, "loss": 0.4776, "step": 9412 }, { "epoch": 0.98, "grad_norm": 2.1385407875276283, "learning_rate": 1.213803182527673e-08, "loss": 0.617, "step": 9413 }, { "epoch": 0.98, "grad_norm": 1.834974861679934, "learning_rate": 1.2021086331888143e-08, "loss": 0.6695, "step": 9414 }, { "epoch": 0.98, "grad_norm": 2.0853828764496343, "learning_rate": 1.190470625434037e-08, "loss": 0.6612, "step": 9415 }, { "epoch": 0.98, "grad_norm": 1.98313825019437, "learning_rate": 1.1788891605825081e-08, "loss": 0.6369, "step": 9416 }, { "epoch": 0.98, "grad_norm": 2.117767892785416, "learning_rate": 1.1673642399470663e-08, "loss": 0.5904, "step": 9417 }, { "epoch": 0.98, "grad_norm": 2.003554226087581, "learning_rate": 1.1558958648341667e-08, "loss": 0.6737, "step": 9418 }, { "epoch": 0.98, "grad_norm": 1.8760117400728373, "learning_rate": 1.1444840365437692e-08, "loss": 0.5358, "step": 9419 }, { "epoch": 0.98, "grad_norm": 1.826652775318849, "learning_rate": 1.1331287563695059e-08, "loss": 0.5802, "step": 9420 }, { "epoch": 0.98, "grad_norm": 2.210283365637128, "learning_rate": 1.1218300255985137e-08, "loss": 0.6695, "step": 9421 }, { "epoch": 0.98, "grad_norm": 1.9114471453067057, "learning_rate": 1.1105878455116015e-08, "loss": 0.6807, "step": 9422 }, { "epoch": 0.98, "grad_norm": 2.308622820891408, "learning_rate": 1.0994022173831386e-08, "loss": 0.5271, "step": 9423 }, { "epoch": 0.98, "grad_norm": 2.154207219627266, "learning_rate": 1.088273142481111e-08, "loss": 0.6708, "step": 9424 }, { "epoch": 0.98, "grad_norm": 2.062686057137944, "learning_rate": 1.0772006220670094e-08, "loss": 0.6164, "step": 9425 }, { "epoch": 0.98, "grad_norm": 1.9871469414224412, "learning_rate": 1.0661846573959412e-08, "loss": 0.5549, "step": 9426 }, { "epoch": 0.98, "grad_norm": 1.9153912700098923, "learning_rate": 1.0552252497166849e-08, "loss": 0.5732, "step": 9427 }, { "epoch": 0.98, "grad_norm": 2.0642917207760196, "learning_rate": 1.0443224002715801e-08, "loss": 0.6291, "step": 9428 }, { "epoch": 0.98, "grad_norm": 1.8687845348076741, "learning_rate": 1.0334761102964163e-08, "loss": 0.5605, "step": 9429 }, { "epoch": 0.98, "grad_norm": 2.0119112889139665, "learning_rate": 1.022686381020821e-08, "loss": 0.6568, "step": 9430 }, { "epoch": 0.98, "grad_norm": 2.142024130237965, "learning_rate": 1.01195321366776e-08, "loss": 0.6009, "step": 9431 }, { "epoch": 0.98, "grad_norm": 1.9652097501051895, "learning_rate": 1.0012766094539273e-08, "loss": 0.5757, "step": 9432 }, { "epoch": 0.98, "grad_norm": 2.11686381269212, "learning_rate": 9.90656569589632e-09, "loss": 0.6156, "step": 9433 }, { "epoch": 0.98, "grad_norm": 1.8540572366176127, "learning_rate": 9.800930952786336e-09, "loss": 0.5143, "step": 9434 }, { "epoch": 0.98, "grad_norm": 1.8884066141241278, "learning_rate": 9.695861877184187e-09, "loss": 0.6136, "step": 9435 }, { "epoch": 0.98, "grad_norm": 1.8442181601552678, "learning_rate": 9.59135848099979e-09, "loss": 0.556, "step": 9436 }, { "epoch": 0.98, "grad_norm": 2.010704790123921, "learning_rate": 9.487420776079226e-09, "loss": 0.6489, "step": 9437 }, { "epoch": 0.98, "grad_norm": 2.1347997954400997, "learning_rate": 9.38404877420418e-09, "loss": 0.6559, "step": 9438 }, { "epoch": 0.98, "grad_norm": 2.0640248046696636, "learning_rate": 9.281242487093612e-09, "loss": 0.622, "step": 9439 }, { "epoch": 0.98, "grad_norm": 2.0903299573728393, "learning_rate": 9.179001926399866e-09, "loss": 0.5661, "step": 9440 }, { "epoch": 0.98, "grad_norm": 2.011961237897929, "learning_rate": 9.077327103713119e-09, "loss": 0.6185, "step": 9441 }, { "epoch": 0.98, "grad_norm": 1.9101374524807846, "learning_rate": 8.976218030558592e-09, "loss": 0.5676, "step": 9442 }, { "epoch": 0.98, "grad_norm": 1.997803767273175, "learning_rate": 8.87567471839712e-09, "loss": 0.5351, "step": 9443 }, { "epoch": 0.98, "grad_norm": 1.8370994226150568, "learning_rate": 8.775697178626807e-09, "loss": 0.5526, "step": 9444 }, { "epoch": 0.98, "grad_norm": 1.8661873226938193, "learning_rate": 8.676285422580255e-09, "loss": 0.6151, "step": 9445 }, { "epoch": 0.98, "grad_norm": 2.0505710073168952, "learning_rate": 8.577439461526782e-09, "loss": 0.593, "step": 9446 }, { "epoch": 0.98, "grad_norm": 1.9162215230441964, "learning_rate": 8.479159306670203e-09, "loss": 0.6494, "step": 9447 }, { "epoch": 0.98, "grad_norm": 1.9933094696955849, "learning_rate": 8.381444969151608e-09, "loss": 0.5787, "step": 9448 }, { "epoch": 0.98, "grad_norm": 1.8322768596191124, "learning_rate": 8.284296460047691e-09, "loss": 0.6197, "step": 9449 }, { "epoch": 0.98, "grad_norm": 1.9649119341867372, "learning_rate": 8.1877137903702e-09, "loss": 0.5414, "step": 9450 }, { "epoch": 0.98, "grad_norm": 1.8174793553111006, "learning_rate": 8.091696971068152e-09, "loss": 0.5382, "step": 9451 }, { "epoch": 0.98, "grad_norm": 2.024317306699993, "learning_rate": 7.996246013025067e-09, "loss": 0.5741, "step": 9452 }, { "epoch": 0.98, "grad_norm": 2.293287635439528, "learning_rate": 7.901360927061174e-09, "loss": 0.6518, "step": 9453 }, { "epoch": 0.98, "grad_norm": 2.086365621299315, "learning_rate": 7.807041723931763e-09, "loss": 0.5751, "step": 9454 }, { "epoch": 0.98, "grad_norm": 1.936273640010896, "learning_rate": 7.713288414328835e-09, "loss": 0.7171, "step": 9455 }, { "epoch": 0.98, "grad_norm": 1.768772970005881, "learning_rate": 7.620101008879444e-09, "loss": 0.498, "step": 9456 }, { "epoch": 0.98, "grad_norm": 1.8866484555298333, "learning_rate": 7.527479518147918e-09, "loss": 0.5336, "step": 9457 }, { "epoch": 0.98, "grad_norm": 1.9101721466577788, "learning_rate": 7.435423952631971e-09, "loss": 0.5666, "step": 9458 }, { "epoch": 0.98, "grad_norm": 2.1247138345157066, "learning_rate": 7.343934322767699e-09, "loss": 0.636, "step": 9459 }, { "epoch": 0.98, "grad_norm": 2.0203673144733316, "learning_rate": 7.253010638925695e-09, "loss": 0.4786, "step": 9460 }, { "epoch": 0.98, "grad_norm": 2.0489168644317868, "learning_rate": 7.1626529114127155e-09, "loss": 0.6674, "step": 9461 }, { "epoch": 0.98, "grad_norm": 2.014479744809945, "learning_rate": 7.072861150471122e-09, "loss": 0.6233, "step": 9462 }, { "epoch": 0.98, "grad_norm": 2.1318020390134382, "learning_rate": 6.98363536627944e-09, "loss": 0.6574, "step": 9463 }, { "epoch": 0.98, "grad_norm": 2.126617683494603, "learning_rate": 6.894975568951801e-09, "loss": 0.6201, "step": 9464 }, { "epoch": 0.98, "grad_norm": 2.1193407018704273, "learning_rate": 6.806881768539053e-09, "loss": 0.5861, "step": 9465 }, { "epoch": 0.98, "grad_norm": 2.1218760503414384, "learning_rate": 6.719353975025989e-09, "loss": 0.7565, "step": 9466 }, { "epoch": 0.98, "grad_norm": 1.9224166553741697, "learning_rate": 6.63239219833467e-09, "loss": 0.6918, "step": 9467 }, { "epoch": 0.98, "grad_norm": 1.90476914149826, "learning_rate": 6.5459964483233215e-09, "loss": 0.5775, "step": 9468 }, { "epoch": 0.98, "grad_norm": 2.0805059353113027, "learning_rate": 6.460166734785223e-09, "loss": 0.6907, "step": 9469 }, { "epoch": 0.98, "grad_norm": 2.039038658401796, "learning_rate": 6.374903067448701e-09, "loss": 0.5588, "step": 9470 }, { "epoch": 0.98, "grad_norm": 1.9051077555679738, "learning_rate": 6.290205455980469e-09, "loss": 0.6101, "step": 9471 }, { "epoch": 0.98, "grad_norm": 1.9114694925966835, "learning_rate": 6.2060739099800704e-09, "loss": 0.6417, "step": 9472 }, { "epoch": 0.98, "grad_norm": 2.0034549196304288, "learning_rate": 6.122508438984875e-09, "loss": 0.6337, "step": 9473 }, { "epoch": 0.98, "grad_norm": 1.929605593178843, "learning_rate": 6.039509052467862e-09, "loss": 0.6391, "step": 9474 }, { "epoch": 0.98, "grad_norm": 2.0565609104314695, "learning_rate": 5.957075759837061e-09, "loss": 0.6141, "step": 9475 }, { "epoch": 0.99, "grad_norm": 1.8833486655812495, "learning_rate": 5.875208570436663e-09, "loss": 0.5839, "step": 9476 }, { "epoch": 0.99, "grad_norm": 2.023643705570554, "learning_rate": 5.793907493546469e-09, "loss": 0.5759, "step": 9477 }, { "epoch": 0.99, "grad_norm": 1.8797156245993185, "learning_rate": 5.713172538383549e-09, "loss": 0.5925, "step": 9478 }, { "epoch": 0.99, "grad_norm": 2.029520402534025, "learning_rate": 5.6330037140989166e-09, "loss": 0.5766, "step": 9479 }, { "epoch": 0.99, "grad_norm": 2.1692397510491266, "learning_rate": 5.5534010297803034e-09, "loss": 0.5764, "step": 9480 }, { "epoch": 0.99, "grad_norm": 2.101204297729523, "learning_rate": 5.474364494451045e-09, "loss": 0.5781, "step": 9481 }, { "epoch": 0.99, "grad_norm": 1.9301730906644166, "learning_rate": 5.395894117070089e-09, "loss": 0.7224, "step": 9482 }, { "epoch": 0.99, "grad_norm": 1.7290399911078331, "learning_rate": 5.317989906533094e-09, "loss": 0.5331, "step": 9483 }, { "epoch": 0.99, "grad_norm": 1.7841568775539753, "learning_rate": 5.2406518716707766e-09, "loss": 0.63, "step": 9484 }, { "epoch": 0.99, "grad_norm": 2.154539636467135, "learning_rate": 5.1638800212494566e-09, "loss": 0.6452, "step": 9485 }, { "epoch": 0.99, "grad_norm": 2.0702545934630017, "learning_rate": 5.087674363972173e-09, "loss": 0.5551, "step": 9486 }, { "epoch": 0.99, "grad_norm": 1.9771019143634083, "learning_rate": 5.01203490847646e-09, "loss": 0.5728, "step": 9487 }, { "epoch": 0.99, "grad_norm": 1.8392551379936382, "learning_rate": 4.93696166333768e-09, "loss": 0.6022, "step": 9488 }, { "epoch": 0.99, "grad_norm": 1.9594213971787073, "learning_rate": 4.8624546370651395e-09, "loss": 0.6141, "step": 9489 }, { "epoch": 0.99, "grad_norm": 1.8586510143718253, "learning_rate": 4.788513838104858e-09, "loss": 0.6084, "step": 9490 }, { "epoch": 0.99, "grad_norm": 1.6622364581523985, "learning_rate": 4.7151392748379095e-09, "loss": 0.5573, "step": 9491 }, { "epoch": 0.99, "grad_norm": 2.07679473110362, "learning_rate": 4.642330955582641e-09, "loss": 0.6039, "step": 9492 }, { "epoch": 0.99, "grad_norm": 1.8678922922176417, "learning_rate": 4.57008888859134e-09, "loss": 0.5118, "step": 9493 }, { "epoch": 0.99, "grad_norm": 1.9392398900547896, "learning_rate": 4.498413082053566e-09, "loss": 0.6151, "step": 9494 }, { "epoch": 0.99, "grad_norm": 1.9618678399300282, "learning_rate": 4.42730354409393e-09, "loss": 0.6178, "step": 9495 }, { "epoch": 0.99, "grad_norm": 1.8062383269521096, "learning_rate": 4.356760282773209e-09, "loss": 0.5622, "step": 9496 }, { "epoch": 0.99, "grad_norm": 1.8782190565478545, "learning_rate": 4.286783306087783e-09, "loss": 0.5159, "step": 9497 }, { "epoch": 0.99, "grad_norm": 1.976593913395866, "learning_rate": 4.21737262197075e-09, "loss": 0.6743, "step": 9498 }, { "epoch": 0.99, "grad_norm": 1.9818379691434536, "learning_rate": 4.148528238289151e-09, "loss": 0.6773, "step": 9499 }, { "epoch": 0.99, "grad_norm": 2.0152612123522133, "learning_rate": 4.080250162847299e-09, "loss": 0.6145, "step": 9500 }, { "epoch": 0.99, "grad_norm": 2.1683488804338475, "learning_rate": 4.0125384033845586e-09, "loss": 0.5787, "step": 9501 }, { "epoch": 0.99, "grad_norm": 1.8175768439932203, "learning_rate": 3.945392967577011e-09, "loss": 0.555, "step": 9502 }, { "epoch": 0.99, "grad_norm": 2.1157456006926907, "learning_rate": 3.8788138630357905e-09, "loss": 0.5811, "step": 9503 }, { "epoch": 0.99, "grad_norm": 2.1523135188211087, "learning_rate": 3.812801097308194e-09, "loss": 0.6436, "step": 9504 }, { "epoch": 0.99, "grad_norm": 1.9801354903550479, "learning_rate": 3.747354677876569e-09, "loss": 0.7153, "step": 9505 }, { "epoch": 0.99, "grad_norm": 1.9816879347353105, "learning_rate": 3.682474612159981e-09, "loss": 0.561, "step": 9506 }, { "epoch": 0.99, "grad_norm": 2.036183290464764, "learning_rate": 3.6181609075131017e-09, "loss": 0.7561, "step": 9507 }, { "epoch": 0.99, "grad_norm": 2.1541774964882485, "learning_rate": 3.5544135712262116e-09, "loss": 0.642, "step": 9508 }, { "epoch": 0.99, "grad_norm": 2.2831346674446213, "learning_rate": 3.4912326105246418e-09, "loss": 0.6434, "step": 9509 }, { "epoch": 0.99, "grad_norm": 1.958292617985834, "learning_rate": 3.4286180325715513e-09, "loss": 0.6362, "step": 9510 }, { "epoch": 0.99, "grad_norm": 1.9314782542001665, "learning_rate": 3.3665698444640406e-09, "loss": 0.5673, "step": 9511 }, { "epoch": 0.99, "grad_norm": 1.991690621452874, "learning_rate": 3.3050880532359277e-09, "loss": 0.5816, "step": 9512 }, { "epoch": 0.99, "grad_norm": 2.5630124336650852, "learning_rate": 3.2441726658560825e-09, "loss": 0.591, "step": 9513 }, { "epoch": 0.99, "grad_norm": 1.98736244563508, "learning_rate": 3.183823689230092e-09, "loss": 0.661, "step": 9514 }, { "epoch": 0.99, "grad_norm": 1.976417843136004, "learning_rate": 3.1240411301980413e-09, "loss": 0.6741, "step": 9515 }, { "epoch": 0.99, "grad_norm": 2.0882239803944302, "learning_rate": 3.0648249955378405e-09, "loss": 0.6652, "step": 9516 }, { "epoch": 0.99, "grad_norm": 2.163565371948662, "learning_rate": 3.006175291960789e-09, "loss": 0.5793, "step": 9517 }, { "epoch": 0.99, "grad_norm": 2.00691462554776, "learning_rate": 2.9480920261154565e-09, "loss": 0.6054, "step": 9518 }, { "epoch": 0.99, "grad_norm": 2.076838889221059, "learning_rate": 2.8905752045865766e-09, "loss": 0.6402, "step": 9519 }, { "epoch": 0.99, "grad_norm": 2.115754866675787, "learning_rate": 2.833624833893933e-09, "loss": 0.6234, "step": 9520 }, { "epoch": 0.99, "grad_norm": 2.0742777041776113, "learning_rate": 2.7772409204923633e-09, "loss": 0.529, "step": 9521 }, { "epoch": 0.99, "grad_norm": 2.033700985807466, "learning_rate": 2.721423470773421e-09, "loss": 0.5545, "step": 9522 }, { "epoch": 0.99, "grad_norm": 1.8799003192685757, "learning_rate": 2.6661724910653774e-09, "loss": 0.626, "step": 9523 }, { "epoch": 0.99, "grad_norm": 2.276364282368124, "learning_rate": 2.6114879876298905e-09, "loss": 0.7034, "step": 9524 }, { "epoch": 0.99, "grad_norm": 1.9794806333649237, "learning_rate": 2.5573699666664455e-09, "loss": 0.5946, "step": 9525 }, { "epoch": 0.99, "grad_norm": 1.849392363068563, "learning_rate": 2.5038184343101346e-09, "loss": 0.6102, "step": 9526 }, { "epoch": 0.99, "grad_norm": 1.9217115956324478, "learning_rate": 2.4508333966305473e-09, "loss": 0.5229, "step": 9527 }, { "epoch": 0.99, "grad_norm": 2.019210354811399, "learning_rate": 2.3984148596339907e-09, "loss": 0.5405, "step": 9528 }, { "epoch": 0.99, "grad_norm": 2.3043229195920816, "learning_rate": 2.3465628292623776e-09, "loss": 0.6815, "step": 9529 }, { "epoch": 0.99, "grad_norm": 2.080083068675512, "learning_rate": 2.295277311393784e-09, "loss": 0.579, "step": 9530 }, { "epoch": 0.99, "grad_norm": 2.1199920401449552, "learning_rate": 2.2445583118413384e-09, "loss": 0.6651, "step": 9531 }, { "epoch": 0.99, "grad_norm": 2.0842626790362053, "learning_rate": 2.19440583635433e-09, "loss": 0.6451, "step": 9532 }, { "epoch": 0.99, "grad_norm": 2.0915589293813173, "learning_rate": 2.1448198906182106e-09, "loss": 0.6338, "step": 9533 }, { "epoch": 0.99, "grad_norm": 2.0306946472868996, "learning_rate": 2.0958004802529297e-09, "loss": 0.5401, "step": 9534 }, { "epoch": 0.99, "grad_norm": 2.055855540710789, "learning_rate": 2.047347610816819e-09, "loss": 0.586, "step": 9535 }, { "epoch": 0.99, "grad_norm": 1.9878400684546984, "learning_rate": 1.999461287800486e-09, "loss": 0.5352, "step": 9536 }, { "epoch": 0.99, "grad_norm": 1.905336891439375, "learning_rate": 1.9521415166329216e-09, "loss": 0.484, "step": 9537 }, { "epoch": 0.99, "grad_norm": 1.8815579402867484, "learning_rate": 1.9053883026781685e-09, "loss": 0.6296, "step": 9538 }, { "epoch": 0.99, "grad_norm": 1.984575489338281, "learning_rate": 1.8592016512358757e-09, "loss": 0.5511, "step": 9539 }, { "epoch": 0.99, "grad_norm": 2.046470877718721, "learning_rate": 1.8135815675418556e-09, "loss": 0.6024, "step": 9540 }, { "epoch": 0.99, "grad_norm": 1.9722402840427116, "learning_rate": 1.7685280567664165e-09, "loss": 0.5257, "step": 9541 }, { "epoch": 0.99, "grad_norm": 2.000880220808784, "learning_rate": 1.7240411240176946e-09, "loss": 0.5769, "step": 9542 }, { "epoch": 0.99, "grad_norm": 2.1063839838590925, "learning_rate": 1.680120774338323e-09, "loss": 0.6462, "step": 9543 }, { "epoch": 0.99, "grad_norm": 2.0056915245462923, "learning_rate": 1.6367670127059864e-09, "loss": 0.63, "step": 9544 }, { "epoch": 0.99, "grad_norm": 1.8659277173450295, "learning_rate": 1.5939798440367527e-09, "loss": 0.5298, "step": 9545 }, { "epoch": 0.99, "grad_norm": 1.864106975642594, "learning_rate": 1.5517592731789654e-09, "loss": 0.5465, "step": 9546 }, { "epoch": 0.99, "grad_norm": 1.9575734799120101, "learning_rate": 1.5101053049199065e-09, "loss": 0.5879, "step": 9547 }, { "epoch": 0.99, "grad_norm": 1.9740114393181525, "learning_rate": 1.4690179439807993e-09, "loss": 0.64, "step": 9548 }, { "epoch": 0.99, "grad_norm": 1.9215329174434153, "learning_rate": 1.4284971950195847e-09, "loss": 0.652, "step": 9549 }, { "epoch": 0.99, "grad_norm": 1.8879530552664527, "learning_rate": 1.3885430626287e-09, "loss": 0.5143, "step": 9550 }, { "epoch": 0.99, "grad_norm": 2.0021158704501776, "learning_rate": 1.3491555513378552e-09, "loss": 0.6374, "step": 9551 }, { "epoch": 0.99, "grad_norm": 1.750227327277223, "learning_rate": 1.310334665611257e-09, "loss": 0.6026, "step": 9552 }, { "epoch": 0.99, "grad_norm": 1.924162397266902, "learning_rate": 1.2720804098498297e-09, "loss": 0.6646, "step": 9553 }, { "epoch": 0.99, "grad_norm": 1.9347971214125042, "learning_rate": 1.2343927883901042e-09, "loss": 0.6712, "step": 9554 }, { "epoch": 0.99, "grad_norm": 2.006885502405168, "learning_rate": 1.1972718055036637e-09, "loss": 0.5202, "step": 9555 }, { "epoch": 0.99, "grad_norm": 1.689423303629976, "learning_rate": 1.1607174653988085e-09, "loss": 0.5011, "step": 9556 }, { "epoch": 0.99, "grad_norm": 1.9127280989270514, "learning_rate": 1.124729772219446e-09, "loss": 0.4981, "step": 9557 }, { "epoch": 0.99, "grad_norm": 2.0010664585250657, "learning_rate": 1.089308730043981e-09, "loss": 0.6541, "step": 9558 }, { "epoch": 0.99, "grad_norm": 2.07992521114073, "learning_rate": 1.0544543428886445e-09, "loss": 0.6736, "step": 9559 }, { "epoch": 0.99, "grad_norm": 1.9283583857469802, "learning_rate": 1.0201666147041656e-09, "loss": 0.6337, "step": 9560 }, { "epoch": 0.99, "grad_norm": 1.9700243707520468, "learning_rate": 9.864455493763247e-10, "loss": 0.6305, "step": 9561 }, { "epoch": 0.99, "grad_norm": 2.147094232957025, "learning_rate": 9.5329115072873e-10, "loss": 0.6686, "step": 9562 }, { "epoch": 0.99, "grad_norm": 2.079316149178158, "learning_rate": 9.207034225189315e-10, "loss": 0.5308, "step": 9563 }, { "epoch": 0.99, "grad_norm": 2.354067111579948, "learning_rate": 8.886823684417512e-10, "loss": 0.6647, "step": 9564 }, { "epoch": 0.99, "grad_norm": 1.7465751308270212, "learning_rate": 8.57227992125953e-10, "loss": 0.5295, "step": 9565 }, { "epoch": 0.99, "grad_norm": 1.9868861820598962, "learning_rate": 8.263402971375734e-10, "loss": 0.5493, "step": 9566 }, { "epoch": 0.99, "grad_norm": 2.22374318466106, "learning_rate": 7.960192869782557e-10, "loss": 0.6371, "step": 9567 }, { "epoch": 0.99, "grad_norm": 1.9923933780189522, "learning_rate": 7.662649650841403e-10, "loss": 0.5635, "step": 9568 }, { "epoch": 0.99, "grad_norm": 2.038120541474471, "learning_rate": 7.370773348286397e-10, "loss": 0.5779, "step": 9569 }, { "epoch": 0.99, "grad_norm": 1.9771844157809515, "learning_rate": 7.084563995202187e-10, "loss": 0.526, "step": 9570 }, { "epoch": 0.99, "grad_norm": 1.812053455374471, "learning_rate": 6.80402162403504e-10, "loss": 0.5634, "step": 9571 }, { "epoch": 1.0, "grad_norm": 1.9921272648147166, "learning_rate": 6.529146266587294e-10, "loss": 0.5553, "step": 9572 }, { "epoch": 1.0, "grad_norm": 1.859834751177411, "learning_rate": 6.259937954006257e-10, "loss": 0.6282, "step": 9573 }, { "epoch": 1.0, "grad_norm": 2.040795878230898, "learning_rate": 5.996396716823061e-10, "loss": 0.641, "step": 9574 }, { "epoch": 1.0, "grad_norm": 1.9718501791261143, "learning_rate": 5.738522584897155e-10, "loss": 0.5137, "step": 9575 }, { "epoch": 1.0, "grad_norm": 2.019272879708207, "learning_rate": 5.486315587471813e-10, "loss": 0.613, "step": 9576 }, { "epoch": 1.0, "grad_norm": 2.0140937375266206, "learning_rate": 5.239775753129728e-10, "loss": 0.5492, "step": 9577 }, { "epoch": 1.0, "grad_norm": 1.7876855888321224, "learning_rate": 4.998903109826314e-10, "loss": 0.5968, "step": 9578 }, { "epoch": 1.0, "grad_norm": 1.937692711387017, "learning_rate": 4.763697684850854e-10, "loss": 0.5326, "step": 9579 }, { "epoch": 1.0, "grad_norm": 1.8871781497039766, "learning_rate": 4.5341595048764564e-10, "loss": 0.5952, "step": 9580 }, { "epoch": 1.0, "grad_norm": 1.657956554866962, "learning_rate": 4.310288595921197e-10, "loss": 0.4911, "step": 9581 }, { "epoch": 1.0, "grad_norm": 1.953405180081528, "learning_rate": 4.0920849833592236e-10, "loss": 0.6131, "step": 9582 }, { "epoch": 1.0, "grad_norm": 2.091422857268565, "learning_rate": 3.879548691926305e-10, "loss": 0.6022, "step": 9583 }, { "epoch": 1.0, "grad_norm": 1.9263886894181483, "learning_rate": 3.6726797457198314e-10, "loss": 0.5952, "step": 9584 }, { "epoch": 1.0, "grad_norm": 2.146763105003528, "learning_rate": 3.471478168176612e-10, "loss": 0.6512, "step": 9585 }, { "epoch": 1.0, "grad_norm": 1.931113208646632, "learning_rate": 3.27594398211728e-10, "loss": 0.5383, "step": 9586 }, { "epoch": 1.0, "grad_norm": 1.9413320484945569, "learning_rate": 3.086077209701888e-10, "loss": 0.6305, "step": 9587 }, { "epoch": 1.0, "grad_norm": 1.917465576733899, "learning_rate": 2.901877872452108e-10, "loss": 0.5411, "step": 9588 }, { "epoch": 1.0, "grad_norm": 1.9595757628143469, "learning_rate": 2.723345991245685e-10, "loss": 0.5646, "step": 9589 }, { "epoch": 1.0, "grad_norm": 2.196310127527789, "learning_rate": 2.550481586321985e-10, "loss": 0.5639, "step": 9590 }, { "epoch": 1.0, "grad_norm": 1.900419785333137, "learning_rate": 2.3832846772819937e-10, "loss": 0.7065, "step": 9591 }, { "epoch": 1.0, "grad_norm": 1.9730652890498885, "learning_rate": 2.2217552830716693e-10, "loss": 0.6706, "step": 9592 }, { "epoch": 1.0, "grad_norm": 1.9593091425150002, "learning_rate": 2.0658934219985883e-10, "loss": 0.5465, "step": 9593 }, { "epoch": 1.0, "grad_norm": 2.135942361894095, "learning_rate": 1.9156991117430523e-10, "loss": 0.758, "step": 9594 }, { "epoch": 1.0, "grad_norm": 1.930526843929985, "learning_rate": 1.7711723693192296e-10, "loss": 0.5605, "step": 9595 }, { "epoch": 1.0, "grad_norm": 1.9256736889575217, "learning_rate": 1.6323132111084605e-10, "loss": 0.6454, "step": 9596 }, { "epoch": 1.0, "grad_norm": 2.064044604856687, "learning_rate": 1.49912165286481e-10, "loss": 0.6168, "step": 9597 }, { "epoch": 1.0, "grad_norm": 1.8522204350674754, "learning_rate": 1.3715977096706578e-10, "loss": 0.626, "step": 9598 }, { "epoch": 1.0, "grad_norm": 2.0195678223686127, "learning_rate": 1.24974139599221e-10, "loss": 0.7152, "step": 9599 }, { "epoch": 1.0, "grad_norm": 1.974776889449131, "learning_rate": 1.1335527256350898e-10, "loss": 0.5263, "step": 9600 }, { "epoch": 1.0, "grad_norm": 1.9816482265824762, "learning_rate": 1.0230317117776445e-10, "loss": 0.5823, "step": 9601 }, { "epoch": 1.0, "grad_norm": 1.9456288642203419, "learning_rate": 9.181783669431898e-11, "loss": 0.5181, "step": 9602 }, { "epoch": 1.0, "grad_norm": 1.8505141834481915, "learning_rate": 8.189927030222145e-11, "loss": 0.5149, "step": 9603 }, { "epoch": 1.0, "grad_norm": 1.9702244129196227, "learning_rate": 7.254747312501754e-11, "loss": 0.6123, "step": 9604 }, { "epoch": 1.0, "grad_norm": 1.9383150259419573, "learning_rate": 6.376244622297023e-11, "loss": 0.6127, "step": 9605 }, { "epoch": 1.0, "grad_norm": 1.9864553209022995, "learning_rate": 5.554419059250471e-11, "loss": 0.6042, "step": 9606 }, { "epoch": 1.0, "grad_norm": 1.9772866782374987, "learning_rate": 4.7892707164542975e-11, "loss": 0.5077, "step": 9607 }, { "epoch": 1.0, "grad_norm": 2.0014007844865565, "learning_rate": 4.080799680727943e-11, "loss": 0.6155, "step": 9608 }, { "epoch": 1.0, "grad_norm": 2.2486787110200264, "learning_rate": 3.42900603228502e-11, "loss": 0.5979, "step": 9609 }, { "epoch": 1.0, "grad_norm": 2.064265143022498, "learning_rate": 2.833889845010873e-11, "loss": 0.6211, "step": 9610 }, { "epoch": 1.0, "grad_norm": 2.081375211775521, "learning_rate": 2.2954511864625717e-11, "loss": 0.5994, "step": 9611 }, { "epoch": 1.0, "grad_norm": 1.7557657446910315, "learning_rate": 1.8136901175913602e-11, "loss": 0.6341, "step": 9612 }, { "epoch": 1.0, "grad_norm": 1.8772616582779724, "learning_rate": 1.3886066930202113e-11, "loss": 0.6788, "step": 9613 }, { "epoch": 1.0, "grad_norm": 1.6975943413231218, "learning_rate": 1.0202009609883157e-11, "loss": 0.4914, "step": 9614 }, { "epoch": 1.0, "grad_norm": 1.9089824368308848, "learning_rate": 7.084729631845477e-12, "loss": 0.5957, "step": 9615 }, { "epoch": 1.0, "grad_norm": 1.8543367974168876, "learning_rate": 4.534227349695108e-12, "loss": 0.6776, "step": 9616 }, { "epoch": 1.0, "grad_norm": 1.842851875350606, "learning_rate": 2.5505030532002594e-12, "loss": 0.5382, "step": 9617 }, { "epoch": 1.0, "grad_norm": 1.8926880273749038, "learning_rate": 1.13355696607087e-12, "loss": 0.6009, "step": 9618 }, { "epoch": 1.0, "grad_norm": 1.9155764490752814, "learning_rate": 2.8338924928927867e-13, "loss": 0.5445, "step": 9619 }, { "epoch": 1.0, "grad_norm": 1.8883391558284497, "learning_rate": 0.0, "loss": 0.6125, "step": 9620 }, { "epoch": 1.0, "step": 9620, "total_flos": 2995729119199232.0, "train_loss": 0.6451899706387966, "train_runtime": 324149.63, "train_samples_per_second": 3.799, "train_steps_per_second": 0.03 } ], "logging_steps": 1.0, "max_steps": 9620, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 2995729119199232.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }