|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9989708404802744, |
|
"eval_steps": 500, |
|
"global_step": 728, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 1.4022, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.4239, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 1.3843, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 1.3722, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 1.3411, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.3187, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 1.284, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 1.2492, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 1.2658, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.2173, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2302, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.2301, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 1.1855, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 1.2094, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 1.1788, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 1.1804, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 1.166, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.1256, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 1.1289, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.1392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 1.131, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1288, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9999900994429424e-05, |
|
"loss": 1.1198, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.999960397967811e-05, |
|
"loss": 1.1281, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9999108961627284e-05, |
|
"loss": 1.134, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9998415950078858e-05, |
|
"loss": 1.1148, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9997524958755226e-05, |
|
"loss": 1.1162, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9996436005299013e-05, |
|
"loss": 1.12, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.999514911127271e-05, |
|
"loss": 1.12, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9993664302158255e-05, |
|
"loss": 1.0938, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9991981607356517e-05, |
|
"loss": 1.0838, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9990101060186732e-05, |
|
"loss": 1.1078, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.998802269788583e-05, |
|
"loss": 1.1037, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9985746561607696e-05, |
|
"loss": 1.0804, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.998327269642237e-05, |
|
"loss": 1.0977, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.998060115131513e-05, |
|
"loss": 1.1036, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9977731979185556e-05, |
|
"loss": 1.1109, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9974665236846443e-05, |
|
"loss": 1.0937, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9971400985022712e-05, |
|
"loss": 1.0834, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9967939288350184e-05, |
|
"loss": 1.1002, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9964280215374312e-05, |
|
"loss": 1.0847, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9960423838548814e-05, |
|
"loss": 1.0845, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.995637023423425e-05, |
|
"loss": 1.0984, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9952119482696504e-05, |
|
"loss": 1.0836, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9947671668105185e-05, |
|
"loss": 1.082, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9943026878531985e-05, |
|
"loss": 1.0707, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9938185205948906e-05, |
|
"loss": 1.0545, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.993314674622646e-05, |
|
"loss": 1.0618, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.992791159913177e-05, |
|
"loss": 1.0514, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.992247986832658e-05, |
|
"loss": 1.0733, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.99168516613652e-05, |
|
"loss": 1.0712, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.991102708969241e-05, |
|
"loss": 1.0788, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9905006268641212e-05, |
|
"loss": 1.0744, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9898789317430577e-05, |
|
"loss": 1.0621, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9892376359163058e-05, |
|
"loss": 1.0598, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9885767520822377e-05, |
|
"loss": 1.095, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9878962933270896e-05, |
|
"loss": 1.0666, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.987196273124703e-05, |
|
"loss": 1.0657, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.986476705336258e-05, |
|
"loss": 1.0691, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9857376042099982e-05, |
|
"loss": 1.0663, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9849789843809496e-05, |
|
"loss": 1.0476, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9842008608706295e-05, |
|
"loss": 1.0509, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.983403249086751e-05, |
|
"loss": 1.0622, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.9825861648229154e-05, |
|
"loss": 1.0708, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.981749624258302e-05, |
|
"loss": 1.0672, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9808936439573455e-05, |
|
"loss": 1.0627, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9800182408694096e-05, |
|
"loss": 1.0726, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9791234323284515e-05, |
|
"loss": 1.0558, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9782092360526763e-05, |
|
"loss": 1.0677, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.977275670144189e-05, |
|
"loss": 1.0422, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9763227530886348e-05, |
|
"loss": 1.0364, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9753505037548334e-05, |
|
"loss": 1.0475, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.974358941394404e-05, |
|
"loss": 1.0508, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.973348085641387e-05, |
|
"loss": 1.0595, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.972317956511852e-05, |
|
"loss": 1.0528, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.971268574403503e-05, |
|
"loss": 1.0562, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.970199960095276e-05, |
|
"loss": 1.0329, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9691121347469235e-05, |
|
"loss": 1.045, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9680051198986004e-05, |
|
"loss": 1.0561, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9668789374704337e-05, |
|
"loss": 1.0449, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9657336097620904e-05, |
|
"loss": 1.0359, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.964569159452335e-05, |
|
"loss": 1.0359, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.963385609598581e-05, |
|
"loss": 1.0271, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9621829836364335e-05, |
|
"loss": 1.0563, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9609613053792276e-05, |
|
"loss": 1.0416, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9597205990175528e-05, |
|
"loss": 1.0578, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.958460889118778e-05, |
|
"loss": 1.0461, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9571822006265623e-05, |
|
"loss": 1.0262, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9558845588603625e-05, |
|
"loss": 1.0254, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9545679895149315e-05, |
|
"loss": 1.0642, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9532325186598093e-05, |
|
"loss": 1.0456, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.951878172738806e-05, |
|
"loss": 1.0358, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.9505049785694803e-05, |
|
"loss": 1.0409, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9491129633426068e-05, |
|
"loss": 1.0382, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9477021546216376e-05, |
|
"loss": 1.0415, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9462725803421566e-05, |
|
"loss": 1.0308, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9448242688113286e-05, |
|
"loss": 1.0376, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9433572487073343e-05, |
|
"loss": 1.0259, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9418715490788066e-05, |
|
"loss": 1.0496, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9403671993442534e-05, |
|
"loss": 1.0519, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9388442292914754e-05, |
|
"loss": 1.0418, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.937302669076976e-05, |
|
"loss": 1.0372, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9357425492253662e-05, |
|
"loss": 1.0347, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.934163900628756e-05, |
|
"loss": 1.0253, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9325667545461466e-05, |
|
"loss": 1.0401, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9309511426028105e-05, |
|
"loss": 1.0282, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9293170967896632e-05, |
|
"loss": 1.0306, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9276646494626333e-05, |
|
"loss": 1.0313, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9259938333420183e-05, |
|
"loss": 1.0433, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9243046815118387e-05, |
|
"loss": 1.0232, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.922597227419183e-05, |
|
"loss": 1.0222, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9208715048735446e-05, |
|
"loss": 1.0186, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9191275480461525e-05, |
|
"loss": 1.033, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9173653914692947e-05, |
|
"loss": 1.0342, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9155850700356345e-05, |
|
"loss": 1.035, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.91378661899752e-05, |
|
"loss": 1.0206, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9119700739662857e-05, |
|
"loss": 1.0435, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.910135470911547e-05, |
|
"loss": 1.0181, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.908282846160488e-05, |
|
"loss": 1.0267, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.9064122363971426e-05, |
|
"loss": 1.0365, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.904523678661669e-05, |
|
"loss": 1.0381, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9026172103496138e-05, |
|
"loss": 1.0048, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.900692869211174e-05, |
|
"loss": 1.0392, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.898750693350447e-05, |
|
"loss": 1.0278, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.8967907212246803e-05, |
|
"loss": 1.013, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8948129916435048e-05, |
|
"loss": 1.0385, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.8928175437681698e-05, |
|
"loss": 1.0168, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8908044171107658e-05, |
|
"loss": 1.0123, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8887736515334443e-05, |
|
"loss": 1.015, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8867252872476255e-05, |
|
"loss": 1.0265, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.884659364813205e-05, |
|
"loss": 0.9997, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8825759251377484e-05, |
|
"loss": 1.0109, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8804750094756827e-05, |
|
"loss": 1.0199, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.8783566594274783e-05, |
|
"loss": 0.9998, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8762209169388262e-05, |
|
"loss": 1.0088, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8740678242998077e-05, |
|
"loss": 1.0022, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8718974241440552e-05, |
|
"loss": 1.0216, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8697097594479103e-05, |
|
"loss": 1.0248, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.867504873529571e-05, |
|
"loss": 0.9974, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.865282810048235e-05, |
|
"loss": 1.0138, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8630436130032353e-05, |
|
"loss": 1.0004, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.860787326733168e-05, |
|
"loss": 1.0081, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.8585139959150144e-05, |
|
"loss": 1.0238, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.856223665563258e-05, |
|
"loss": 1.0328, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8539163810289914e-05, |
|
"loss": 1.0071, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8515921879990187e-05, |
|
"loss": 1.0134, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8492511324949516e-05, |
|
"loss": 1.0181, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8468932608722975e-05, |
|
"loss": 1.0363, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8445186198195406e-05, |
|
"loss": 1.0011, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8421272563572202e-05, |
|
"loss": 0.9993, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8397192178369965e-05, |
|
"loss": 1.0201, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.837294551940716e-05, |
|
"loss": 0.987, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.834853306679464e-05, |
|
"loss": 1.0106, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323955303926165e-05, |
|
"loss": 1.0034, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8299212717468825e-05, |
|
"loss": 1.0095, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8274305797353397e-05, |
|
"loss": 0.9921, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.824923503676465e-05, |
|
"loss": 0.9859, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.822400093213157e-05, |
|
"loss": 1.017, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8198603983117546e-05, |
|
"loss": 1.0118, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8173044692610466e-05, |
|
"loss": 0.9912, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8147323566712755e-05, |
|
"loss": 1.0162, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8121441114731366e-05, |
|
"loss": 1.0089, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.809539784916768e-05, |
|
"loss": 0.9752, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.806919428570737e-05, |
|
"loss": 1.007, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.804283094321019e-05, |
|
"loss": 1.0145, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8016308343699686e-05, |
|
"loss": 1.0008, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.798962701235289e-05, |
|
"loss": 1.0067, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.796278747748988e-05, |
|
"loss": 1.0017, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7935790270563345e-05, |
|
"loss": 1.0086, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.790863592614807e-05, |
|
"loss": 0.9884, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.788132498193032e-05, |
|
"loss": 1.0028, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7853857978697223e-05, |
|
"loss": 1.0055, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7826235460326043e-05, |
|
"loss": 1.005, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7798457973773418e-05, |
|
"loss": 1.002, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7770526069064525e-05, |
|
"loss": 0.9838, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7742440299282203e-05, |
|
"loss": 1.001, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7714201220555982e-05, |
|
"loss": 0.9984, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7685809392051084e-05, |
|
"loss": 1.0035, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.765726537595734e-05, |
|
"loss": 1.0076, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7628569737478076e-05, |
|
"loss": 0.9936, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7599723044818898e-05, |
|
"loss": 1.0053, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7570725869176468e-05, |
|
"loss": 0.9968, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7541578784727163e-05, |
|
"loss": 1.0059, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.751228236861573e-05, |
|
"loss": 1.0059, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.7482837200943845e-05, |
|
"loss": 1.0081, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7453243864758638e-05, |
|
"loss": 1.0215, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7423502946041133e-05, |
|
"loss": 0.9935, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.739361503369466e-05, |
|
"loss": 0.9945, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7363580719533173e-05, |
|
"loss": 0.9926, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.733340059826956e-05, |
|
"loss": 0.9946, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7303075267503845e-05, |
|
"loss": 1.0079, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7272605327711364e-05, |
|
"loss": 1.0212, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.7241991382230872e-05, |
|
"loss": 0.993, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.72112340372526e-05, |
|
"loss": 0.9843, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.718033390180624e-05, |
|
"loss": 0.9837, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.71492915877489e-05, |
|
"loss": 0.959, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7118107709752986e-05, |
|
"loss": 0.9895, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7086782885294026e-05, |
|
"loss": 0.99, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7055317734638444e-05, |
|
"loss": 1.006, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.702371288083127e-05, |
|
"loss": 1.0009, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6991968949683835e-05, |
|
"loss": 0.9758, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6960086569761332e-05, |
|
"loss": 0.9801, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6928066372370407e-05, |
|
"loss": 0.9833, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.689590899154664e-05, |
|
"loss": 0.9846, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6863615064042003e-05, |
|
"loss": 0.9752, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6831185229312237e-05, |
|
"loss": 0.9869, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.67986201295042e-05, |
|
"loss": 0.9869, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.676592040944315e-05, |
|
"loss": 0.9878, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6733086716619976e-05, |
|
"loss": 0.9938, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6700119701178378e-05, |
|
"loss": 1.0045, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.666702001590199e-05, |
|
"loss": 1.0088, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6633788316201455e-05, |
|
"loss": 0.998, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6600425260101453e-05, |
|
"loss": 1.0017, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6566931508227663e-05, |
|
"loss": 0.9995, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6533307723793688e-05, |
|
"loss": 1.0012, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.649955457258792e-05, |
|
"loss": 0.9807, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6465672722960365e-05, |
|
"loss": 0.9664, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6431662845809388e-05, |
|
"loss": 0.9707, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6397525614568446e-05, |
|
"loss": 0.983, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6363261705192757e-05, |
|
"loss": 1.0061, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6328871796145894e-05, |
|
"loss": 0.9899, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.629435656838637e-05, |
|
"loss": 0.9795, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6259716705354154e-05, |
|
"loss": 1.0002, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6224952892957122e-05, |
|
"loss": 0.9837, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6190065819557496e-05, |
|
"loss": 0.9872, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.615505617595819e-05, |
|
"loss": 0.9797, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6119924655389158e-05, |
|
"loss": 0.9926, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6084671953493645e-05, |
|
"loss": 0.9884, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6049298768314425e-05, |
|
"loss": 0.9918, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6013805800279977e-05, |
|
"loss": 0.9829, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5978193752190607e-05, |
|
"loss": 0.9854, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5942463329204546e-05, |
|
"loss": 0.9751, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5906615238823974e-05, |
|
"loss": 0.9945, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.5870650190881023e-05, |
|
"loss": 0.9957, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.583456889752371e-05, |
|
"loss": 1.0047, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.579837207320184e-05, |
|
"loss": 0.9921, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.5762060434652863e-05, |
|
"loss": 0.9839, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.572563470088768e-05, |
|
"loss": 0.9922, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.56890955931764e-05, |
|
"loss": 0.9752, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.565244383503407e-05, |
|
"loss": 0.9778, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5615680152206324e-05, |
|
"loss": 0.9795, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.557880527265505e-05, |
|
"loss": 0.9774, |
|
"step": 726 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 2184, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500.0, |
|
"total_flos": 4.694048596218085e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|