diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { - "best_metric": 2.486668109893799, - "best_model_checkpoint": "checkpoints-mistral-300M/checkpoint-56000", - "epoch": 7.5630581204949445, + "best_metric": 2.401254177093506, + "best_model_checkpoint": "checkpoints-mistral-300M/checkpoint-96000", + "epoch": 13.64072654838873, "eval_steps": 1000, - "global_step": 56000, + "global_step": 101000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -34055,13 +34055,27373 @@ "eval_samples_per_second": 59.99, "eval_steps_per_second": 5.0, "step": 56000 + }, + { + "epoch": 7.56, + "learning_rate": 4.1870747339754796e-05, + "loss": 2.4059, + "step": 56010 + }, + { + "epoch": 7.57, + "learning_rate": 4.182664262331522e-05, + "loss": 2.3906, + "step": 56020 + }, + { + "epoch": 7.57, + "learning_rate": 4.1782557384417925e-05, + "loss": 2.4007, + "step": 56030 + }, + { + "epoch": 7.57, + "learning_rate": 4.173849163100073e-05, + "loss": 2.3591, + "step": 56040 + }, + { + "epoch": 7.57, + "learning_rate": 4.169444537099814e-05, + "loss": 2.3478, + "step": 56050 + }, + { + "epoch": 7.57, + "learning_rate": 4.165041861234106e-05, + "loss": 2.3565, + "step": 56060 + }, + { + "epoch": 7.57, + "learning_rate": 4.160641136295678e-05, + "loss": 2.3565, + "step": 56070 + }, + { + "epoch": 7.57, + "learning_rate": 4.156242363076929e-05, + "loss": 2.3755, + "step": 56080 + }, + { + "epoch": 7.58, + "learning_rate": 4.151845542369888e-05, + "loss": 2.3951, + "step": 56090 + }, + { + "epoch": 7.58, + "learning_rate": 4.147450674966253e-05, + "loss": 2.3777, + "step": 56100 + }, + { + "epoch": 7.58, + "learning_rate": 4.1430577616573415e-05, + "loss": 2.3661, + "step": 56110 + }, + { + "epoch": 7.58, + "learning_rate": 4.138666803234147e-05, + "loss": 2.3462, + "step": 56120 + }, + { + "epoch": 7.58, + "learning_rate": 4.134277800487289e-05, + "loss": 2.3647, + "step": 56130 + }, + { + "epoch": 7.58, + "learning_rate": 4.129890754207058e-05, + "loss": 2.3753, + "step": 56140 + }, + { + "epoch": 7.58, + "learning_rate": 4.1255056651833775e-05, + "loss": 2.3671, + "step": 56150 + }, + { + "epoch": 7.59, + "learning_rate": 4.121122534205806e-05, + "loss": 2.361, + "step": 56160 + }, + { + "epoch": 7.59, + "learning_rate": 4.116741362063576e-05, + "loss": 2.3947, + "step": 56170 + }, + { + "epoch": 7.59, + "learning_rate": 4.1123621495455515e-05, + "loss": 2.3554, + "step": 56180 + }, + { + "epoch": 7.59, + "learning_rate": 4.107984897440252e-05, + "loss": 2.369, + "step": 56190 + }, + { + "epoch": 7.59, + "learning_rate": 4.1036096065358364e-05, + "loss": 2.339, + "step": 56200 + }, + { + "epoch": 7.59, + "learning_rate": 4.099236277620112e-05, + "loss": 2.3562, + "step": 56210 + }, + { + "epoch": 7.59, + "learning_rate": 4.094864911480531e-05, + "loss": 2.358, + "step": 56220 + }, + { + "epoch": 7.59, + "learning_rate": 4.0904955089042054e-05, + "loss": 2.3706, + "step": 56230 + }, + { + "epoch": 7.6, + "learning_rate": 4.086128070677877e-05, + "loss": 2.364, + "step": 56240 + }, + { + "epoch": 7.6, + "learning_rate": 4.081762597587941e-05, + "loss": 2.3612, + "step": 56250 + }, + { + "epoch": 7.6, + "learning_rate": 4.07739909042044e-05, + "loss": 2.3381, + "step": 56260 + }, + { + "epoch": 7.6, + "learning_rate": 4.073037549961054e-05, + "loss": 2.3497, + "step": 56270 + }, + { + "epoch": 7.6, + "learning_rate": 4.068677976995126e-05, + "loss": 2.3506, + "step": 56280 + }, + { + "epoch": 7.6, + "learning_rate": 4.0643203723076315e-05, + "loss": 2.3855, + "step": 56290 + }, + { + "epoch": 7.6, + "learning_rate": 4.059964736683192e-05, + "loss": 2.3654, + "step": 56300 + }, + { + "epoch": 7.61, + "learning_rate": 4.055611070906075e-05, + "loss": 2.379, + "step": 56310 + }, + { + "epoch": 7.61, + "learning_rate": 4.051259375760203e-05, + "loss": 2.3614, + "step": 56320 + }, + { + "epoch": 7.61, + "learning_rate": 4.046909652029132e-05, + "loss": 2.3609, + "step": 56330 + }, + { + "epoch": 7.61, + "learning_rate": 4.042561900496068e-05, + "loss": 2.3697, + "step": 56340 + }, + { + "epoch": 7.61, + "learning_rate": 4.038216121943859e-05, + "loss": 2.3966, + "step": 56350 + }, + { + "epoch": 7.61, + "learning_rate": 4.0338723171549966e-05, + "loss": 2.3521, + "step": 56360 + }, + { + "epoch": 7.61, + "learning_rate": 4.0295304869116293e-05, + "loss": 2.368, + "step": 56370 + }, + { + "epoch": 7.61, + "learning_rate": 4.025190631995535e-05, + "loss": 2.3571, + "step": 56380 + }, + { + "epoch": 7.62, + "learning_rate": 4.020852753188143e-05, + "loss": 2.3475, + "step": 56390 + }, + { + "epoch": 7.62, + "learning_rate": 4.0165168512705206e-05, + "loss": 2.3615, + "step": 56400 + }, + { + "epoch": 7.62, + "learning_rate": 4.012182927023395e-05, + "loss": 2.3585, + "step": 56410 + }, + { + "epoch": 7.62, + "learning_rate": 4.00785098122712e-05, + "loss": 2.318, + "step": 56420 + }, + { + "epoch": 7.62, + "learning_rate": 4.0035210146617005e-05, + "loss": 2.3888, + "step": 56430 + }, + { + "epoch": 7.62, + "learning_rate": 3.999193028106784e-05, + "loss": 2.3632, + "step": 56440 + }, + { + "epoch": 7.62, + "learning_rate": 3.9948670223416565e-05, + "loss": 2.3704, + "step": 56450 + }, + { + "epoch": 7.63, + "learning_rate": 3.990542998145262e-05, + "loss": 2.3451, + "step": 56460 + }, + { + "epoch": 7.63, + "learning_rate": 3.986220956296175e-05, + "loss": 2.3573, + "step": 56470 + }, + { + "epoch": 7.63, + "learning_rate": 3.981900897572615e-05, + "loss": 2.3571, + "step": 56480 + }, + { + "epoch": 7.63, + "learning_rate": 3.977582822752439e-05, + "loss": 2.3243, + "step": 56490 + }, + { + "epoch": 7.63, + "learning_rate": 3.973266732613167e-05, + "loss": 2.3619, + "step": 56500 + }, + { + "epoch": 7.63, + "learning_rate": 3.9689526279319405e-05, + "loss": 2.3474, + "step": 56510 + }, + { + "epoch": 7.63, + "learning_rate": 3.964640509485553e-05, + "loss": 2.3313, + "step": 56520 + }, + { + "epoch": 7.63, + "learning_rate": 3.960330378050435e-05, + "loss": 2.3591, + "step": 56530 + }, + { + "epoch": 7.64, + "learning_rate": 3.956022234402663e-05, + "loss": 2.3622, + "step": 56540 + }, + { + "epoch": 7.64, + "learning_rate": 3.951716079317961e-05, + "loss": 2.3607, + "step": 56550 + }, + { + "epoch": 7.64, + "learning_rate": 3.9474119135716844e-05, + "loss": 2.3115, + "step": 56560 + }, + { + "epoch": 7.64, + "learning_rate": 3.9431097379388376e-05, + "loss": 2.362, + "step": 56570 + }, + { + "epoch": 7.64, + "learning_rate": 3.938809553194063e-05, + "loss": 2.3156, + "step": 56580 + }, + { + "epoch": 7.64, + "learning_rate": 3.9345113601116405e-05, + "loss": 2.3482, + "step": 56590 + }, + { + "epoch": 7.64, + "learning_rate": 3.930215159465507e-05, + "loss": 2.3345, + "step": 56600 + }, + { + "epoch": 7.65, + "learning_rate": 3.925920952029226e-05, + "loss": 2.3703, + "step": 56610 + }, + { + "epoch": 7.65, + "learning_rate": 3.921628738576005e-05, + "loss": 2.3394, + "step": 56620 + }, + { + "epoch": 7.65, + "learning_rate": 3.9173385198786916e-05, + "loss": 2.3617, + "step": 56630 + }, + { + "epoch": 7.65, + "learning_rate": 3.9130502967097834e-05, + "loss": 2.3196, + "step": 56640 + }, + { + "epoch": 7.65, + "learning_rate": 3.9087640698414075e-05, + "loss": 2.349, + "step": 56650 + }, + { + "epoch": 7.65, + "learning_rate": 3.904479840045337e-05, + "loss": 2.3665, + "step": 56660 + }, + { + "epoch": 7.65, + "learning_rate": 3.9001976080929835e-05, + "loss": 2.3308, + "step": 56670 + }, + { + "epoch": 7.66, + "learning_rate": 3.895917374755396e-05, + "loss": 2.3443, + "step": 56680 + }, + { + "epoch": 7.66, + "learning_rate": 3.8916391408032746e-05, + "loss": 2.3335, + "step": 56690 + }, + { + "epoch": 7.66, + "learning_rate": 3.88736290700695e-05, + "loss": 2.3683, + "step": 56700 + }, + { + "epoch": 7.66, + "learning_rate": 3.883088674136392e-05, + "loss": 2.3684, + "step": 56710 + }, + { + "epoch": 7.66, + "learning_rate": 3.87881644296121e-05, + "loss": 2.3324, + "step": 56720 + }, + { + "epoch": 7.66, + "learning_rate": 3.874546214250666e-05, + "loss": 2.3486, + "step": 56730 + }, + { + "epoch": 7.66, + "learning_rate": 3.8702779887736405e-05, + "loss": 2.339, + "step": 56740 + }, + { + "epoch": 7.66, + "learning_rate": 3.866011767298677e-05, + "loss": 2.3221, + "step": 56750 + }, + { + "epoch": 7.67, + "learning_rate": 3.8617475505939345e-05, + "loss": 2.3272, + "step": 56760 + }, + { + "epoch": 7.67, + "learning_rate": 3.85748533942722e-05, + "loss": 2.3575, + "step": 56770 + }, + { + "epoch": 7.67, + "learning_rate": 3.853225134565991e-05, + "loss": 2.3334, + "step": 56780 + }, + { + "epoch": 7.67, + "learning_rate": 3.848966936777331e-05, + "loss": 2.3462, + "step": 56790 + }, + { + "epoch": 7.67, + "learning_rate": 3.8447107468279616e-05, + "loss": 2.3183, + "step": 56800 + }, + { + "epoch": 7.67, + "learning_rate": 3.8404565654842424e-05, + "loss": 2.3276, + "step": 56810 + }, + { + "epoch": 7.67, + "learning_rate": 3.8362043935121874e-05, + "loss": 2.3532, + "step": 56820 + }, + { + "epoch": 7.68, + "learning_rate": 3.8319542316774255e-05, + "loss": 2.3531, + "step": 56830 + }, + { + "epoch": 7.68, + "learning_rate": 3.8277060807452484e-05, + "loss": 2.3539, + "step": 56840 + }, + { + "epoch": 7.68, + "learning_rate": 3.8234599414805586e-05, + "loss": 2.338, + "step": 56850 + }, + { + "epoch": 7.68, + "learning_rate": 3.81921581464791e-05, + "loss": 2.3432, + "step": 56860 + }, + { + "epoch": 7.68, + "learning_rate": 3.8149737010115037e-05, + "loss": 2.326, + "step": 56870 + }, + { + "epoch": 7.68, + "learning_rate": 3.810733601335159e-05, + "loss": 2.3237, + "step": 56880 + }, + { + "epoch": 7.68, + "learning_rate": 3.8064955163823564e-05, + "loss": 2.3555, + "step": 56890 + }, + { + "epoch": 7.68, + "learning_rate": 3.80225944691618e-05, + "loss": 2.3421, + "step": 56900 + }, + { + "epoch": 7.69, + "learning_rate": 3.7980253936993875e-05, + "loss": 2.3284, + "step": 56910 + }, + { + "epoch": 7.69, + "learning_rate": 3.793793357494343e-05, + "loss": 2.3377, + "step": 56920 + }, + { + "epoch": 7.69, + "learning_rate": 3.789563339063078e-05, + "loss": 2.3624, + "step": 56930 + }, + { + "epoch": 7.69, + "learning_rate": 3.785335339167229e-05, + "loss": 2.3399, + "step": 56940 + }, + { + "epoch": 7.69, + "learning_rate": 3.7811093585680825e-05, + "loss": 2.3223, + "step": 56950 + }, + { + "epoch": 7.69, + "learning_rate": 3.776885398026574e-05, + "loss": 2.3471, + "step": 56960 + }, + { + "epoch": 7.69, + "learning_rate": 3.7726634583032536e-05, + "loss": 2.3401, + "step": 56970 + }, + { + "epoch": 7.7, + "learning_rate": 3.768443540158329e-05, + "loss": 2.35, + "step": 56980 + }, + { + "epoch": 7.7, + "learning_rate": 3.7642256443516185e-05, + "loss": 2.3585, + "step": 56990 + }, + { + "epoch": 7.7, + "learning_rate": 3.7600097716426035e-05, + "loss": 2.3568, + "step": 57000 + }, + { + "epoch": 7.7, + "eval_loss": 2.4452409744262695, + "eval_runtime": 1269.6433, + "eval_samples_per_second": 59.966, + "eval_steps_per_second": 4.997, + "step": 57000 + }, + { + "epoch": 7.7, + "learning_rate": 3.755795922790376e-05, + "loss": 2.3457, + "step": 57010 + }, + { + "epoch": 7.7, + "learning_rate": 3.751584098553689e-05, + "loss": 2.3495, + "step": 57020 + }, + { + "epoch": 7.7, + "learning_rate": 3.7473742996909134e-05, + "loss": 2.3283, + "step": 57030 + }, + { + "epoch": 7.7, + "learning_rate": 3.7431665269600484e-05, + "loss": 2.3556, + "step": 57040 + }, + { + "epoch": 7.71, + "learning_rate": 3.7389607811187534e-05, + "loss": 2.3424, + "step": 57050 + }, + { + "epoch": 7.71, + "learning_rate": 3.7347570629243e-05, + "loss": 2.3273, + "step": 57060 + }, + { + "epoch": 7.71, + "learning_rate": 3.7305553731336116e-05, + "loss": 2.3028, + "step": 57070 + }, + { + "epoch": 7.71, + "learning_rate": 3.726355712503235e-05, + "loss": 2.3354, + "step": 57080 + }, + { + "epoch": 7.71, + "learning_rate": 3.7221580817893545e-05, + "loss": 2.3538, + "step": 57090 + }, + { + "epoch": 7.71, + "learning_rate": 3.71796248174779e-05, + "loss": 2.3368, + "step": 57100 + }, + { + "epoch": 7.71, + "learning_rate": 3.713768913133991e-05, + "loss": 2.3206, + "step": 57110 + }, + { + "epoch": 7.71, + "learning_rate": 3.709577376703059e-05, + "loss": 2.3328, + "step": 57120 + }, + { + "epoch": 7.72, + "learning_rate": 3.705387873209699e-05, + "loss": 2.3531, + "step": 57130 + }, + { + "epoch": 7.72, + "learning_rate": 3.7012004034082764e-05, + "loss": 2.3424, + "step": 57140 + }, + { + "epoch": 7.72, + "learning_rate": 3.697014968052778e-05, + "loss": 2.3309, + "step": 57150 + }, + { + "epoch": 7.72, + "learning_rate": 3.692831567896833e-05, + "loss": 2.3564, + "step": 57160 + }, + { + "epoch": 7.72, + "learning_rate": 3.6886502036936944e-05, + "loss": 2.3151, + "step": 57170 + }, + { + "epoch": 7.72, + "learning_rate": 3.684470876196253e-05, + "loss": 2.3369, + "step": 57180 + }, + { + "epoch": 7.72, + "learning_rate": 3.6802935861570345e-05, + "loss": 2.3401, + "step": 57190 + }, + { + "epoch": 7.73, + "learning_rate": 3.676118334328188e-05, + "loss": 2.3318, + "step": 57200 + }, + { + "epoch": 7.73, + "learning_rate": 3.671945121461513e-05, + "loss": 2.3529, + "step": 57210 + }, + { + "epoch": 7.73, + "learning_rate": 3.66777394830843e-05, + "loss": 2.3475, + "step": 57220 + }, + { + "epoch": 7.73, + "learning_rate": 3.663604815619993e-05, + "loss": 2.3294, + "step": 57230 + }, + { + "epoch": 7.73, + "learning_rate": 3.6594377241468866e-05, + "loss": 2.3572, + "step": 57240 + }, + { + "epoch": 7.73, + "learning_rate": 3.655272674639439e-05, + "loss": 2.3487, + "step": 57250 + }, + { + "epoch": 7.73, + "learning_rate": 3.6511096678476006e-05, + "loss": 2.3178, + "step": 57260 + }, + { + "epoch": 7.73, + "learning_rate": 3.6469487045209535e-05, + "loss": 2.3433, + "step": 57270 + }, + { + "epoch": 7.74, + "learning_rate": 3.642789785408719e-05, + "loss": 2.3478, + "step": 57280 + }, + { + "epoch": 7.74, + "learning_rate": 3.6386329112597384e-05, + "loss": 2.3432, + "step": 57290 + }, + { + "epoch": 7.74, + "learning_rate": 3.634478082822503e-05, + "loss": 2.3185, + "step": 57300 + }, + { + "epoch": 7.74, + "learning_rate": 3.630325300845121e-05, + "loss": 2.3277, + "step": 57310 + }, + { + "epoch": 7.74, + "learning_rate": 3.6261745660753335e-05, + "loss": 2.3458, + "step": 57320 + }, + { + "epoch": 7.74, + "learning_rate": 3.6220258792605166e-05, + "loss": 2.3412, + "step": 57330 + }, + { + "epoch": 7.74, + "learning_rate": 3.6178792411476833e-05, + "loss": 2.3556, + "step": 57340 + }, + { + "epoch": 7.75, + "learning_rate": 3.6137346524834655e-05, + "loss": 2.3503, + "step": 57350 + }, + { + "epoch": 7.75, + "learning_rate": 3.609592114014135e-05, + "loss": 2.3389, + "step": 57360 + }, + { + "epoch": 7.75, + "learning_rate": 3.605451626485588e-05, + "loss": 2.3499, + "step": 57370 + }, + { + "epoch": 7.75, + "learning_rate": 3.601313190643354e-05, + "loss": 2.3472, + "step": 57380 + }, + { + "epoch": 7.75, + "learning_rate": 3.5971768072326015e-05, + "loss": 2.3494, + "step": 57390 + }, + { + "epoch": 7.75, + "learning_rate": 3.593042476998116e-05, + "loss": 2.3429, + "step": 57400 + }, + { + "epoch": 7.75, + "learning_rate": 3.588910200684321e-05, + "loss": 2.3304, + "step": 57410 + }, + { + "epoch": 7.76, + "learning_rate": 3.584779979035262e-05, + "loss": 2.3484, + "step": 57420 + }, + { + "epoch": 7.76, + "learning_rate": 3.580651812794632e-05, + "loss": 2.3575, + "step": 57430 + }, + { + "epoch": 7.76, + "learning_rate": 3.576525702705736e-05, + "loss": 2.3632, + "step": 57440 + }, + { + "epoch": 7.76, + "learning_rate": 3.5724016495115175e-05, + "loss": 2.3281, + "step": 57450 + }, + { + "epoch": 7.76, + "learning_rate": 3.5682796539545485e-05, + "loss": 2.3477, + "step": 57460 + }, + { + "epoch": 7.76, + "learning_rate": 3.564159716777021e-05, + "loss": 2.3106, + "step": 57470 + }, + { + "epoch": 7.76, + "learning_rate": 3.5600418387207784e-05, + "loss": 2.3171, + "step": 57480 + }, + { + "epoch": 7.76, + "learning_rate": 3.555926020527273e-05, + "loss": 2.3303, + "step": 57490 + }, + { + "epoch": 7.77, + "learning_rate": 3.5518122629375954e-05, + "loss": 2.3352, + "step": 57500 + }, + { + "epoch": 7.77, + "learning_rate": 3.547700566692458e-05, + "loss": 2.3431, + "step": 57510 + }, + { + "epoch": 7.77, + "learning_rate": 3.5435909325322135e-05, + "loss": 2.3256, + "step": 57520 + }, + { + "epoch": 7.77, + "learning_rate": 3.539483361196834e-05, + "loss": 2.3364, + "step": 57530 + }, + { + "epoch": 7.77, + "learning_rate": 3.535377853425925e-05, + "loss": 2.3369, + "step": 57540 + }, + { + "epoch": 7.77, + "learning_rate": 3.5312744099587146e-05, + "loss": 2.3373, + "step": 57550 + }, + { + "epoch": 7.77, + "learning_rate": 3.527173031534062e-05, + "loss": 2.3123, + "step": 57560 + }, + { + "epoch": 7.78, + "learning_rate": 3.523073718890462e-05, + "loss": 2.3213, + "step": 57570 + }, + { + "epoch": 7.78, + "learning_rate": 3.518976472766028e-05, + "loss": 2.3136, + "step": 57580 + }, + { + "epoch": 7.78, + "learning_rate": 3.514881293898504e-05, + "loss": 2.3088, + "step": 57590 + }, + { + "epoch": 7.78, + "learning_rate": 3.510788183025258e-05, + "loss": 2.347, + "step": 57600 + }, + { + "epoch": 7.78, + "learning_rate": 3.506697140883298e-05, + "loss": 2.334, + "step": 57610 + }, + { + "epoch": 7.78, + "learning_rate": 3.5026081682092466e-05, + "loss": 2.3216, + "step": 57620 + }, + { + "epoch": 7.78, + "learning_rate": 3.4985212657393605e-05, + "loss": 2.3527, + "step": 57630 + }, + { + "epoch": 7.78, + "learning_rate": 3.494436434209518e-05, + "loss": 2.3176, + "step": 57640 + }, + { + "epoch": 7.79, + "learning_rate": 3.4903536743552274e-05, + "loss": 2.3175, + "step": 57650 + }, + { + "epoch": 7.79, + "learning_rate": 3.486272986911633e-05, + "loss": 2.3292, + "step": 57660 + }, + { + "epoch": 7.79, + "learning_rate": 3.48219437261349e-05, + "loss": 2.3441, + "step": 57670 + }, + { + "epoch": 7.79, + "learning_rate": 3.478117832195192e-05, + "loss": 2.313, + "step": 57680 + }, + { + "epoch": 7.79, + "learning_rate": 3.474043366390749e-05, + "loss": 2.323, + "step": 57690 + }, + { + "epoch": 7.79, + "learning_rate": 3.46997097593381e-05, + "loss": 2.334, + "step": 57700 + }, + { + "epoch": 7.79, + "learning_rate": 3.465900661557642e-05, + "loss": 2.332, + "step": 57710 + }, + { + "epoch": 7.8, + "learning_rate": 3.461832423995142e-05, + "loss": 2.3329, + "step": 57720 + }, + { + "epoch": 7.8, + "learning_rate": 3.457766263978827e-05, + "loss": 2.3681, + "step": 57730 + }, + { + "epoch": 7.8, + "learning_rate": 3.4537021822408435e-05, + "loss": 2.356, + "step": 57740 + }, + { + "epoch": 7.8, + "learning_rate": 3.449640179512969e-05, + "loss": 2.3066, + "step": 57750 + }, + { + "epoch": 7.8, + "learning_rate": 3.445580256526601e-05, + "loss": 2.3342, + "step": 57760 + }, + { + "epoch": 7.8, + "learning_rate": 3.441522414012761e-05, + "loss": 2.347, + "step": 57770 + }, + { + "epoch": 7.8, + "learning_rate": 3.437466652702096e-05, + "loss": 2.339, + "step": 57780 + }, + { + "epoch": 7.81, + "learning_rate": 3.43341297332489e-05, + "loss": 2.343, + "step": 57790 + }, + { + "epoch": 7.81, + "learning_rate": 3.4293613766110356e-05, + "loss": 2.3327, + "step": 57800 + }, + { + "epoch": 7.81, + "learning_rate": 3.4253118632900594e-05, + "loss": 2.309, + "step": 57810 + }, + { + "epoch": 7.81, + "learning_rate": 3.42126443409111e-05, + "loss": 2.3335, + "step": 57820 + }, + { + "epoch": 7.81, + "learning_rate": 3.417219089742959e-05, + "loss": 2.3286, + "step": 57830 + }, + { + "epoch": 7.81, + "learning_rate": 3.4131758309740144e-05, + "loss": 2.3525, + "step": 57840 + }, + { + "epoch": 7.81, + "learning_rate": 3.409134658512288e-05, + "loss": 2.3236, + "step": 57850 + }, + { + "epoch": 7.81, + "learning_rate": 3.405095573085444e-05, + "loss": 2.3538, + "step": 57860 + }, + { + "epoch": 7.82, + "learning_rate": 3.401058575420734e-05, + "loss": 2.3131, + "step": 57870 + }, + { + "epoch": 7.82, + "learning_rate": 3.397023666245069e-05, + "loss": 2.3593, + "step": 57880 + }, + { + "epoch": 7.82, + "learning_rate": 3.3929908462849665e-05, + "loss": 2.3258, + "step": 57890 + }, + { + "epoch": 7.82, + "learning_rate": 3.3889601162665655e-05, + "loss": 2.35, + "step": 57900 + }, + { + "epoch": 7.82, + "learning_rate": 3.3849314769156385e-05, + "loss": 2.3291, + "step": 57910 + }, + { + "epoch": 7.82, + "learning_rate": 3.3809049289575684e-05, + "loss": 2.3268, + "step": 57920 + }, + { + "epoch": 7.82, + "learning_rate": 3.376880473117383e-05, + "loss": 2.3231, + "step": 57930 + }, + { + "epoch": 7.83, + "learning_rate": 3.372858110119706e-05, + "loss": 2.3359, + "step": 57940 + }, + { + "epoch": 7.83, + "learning_rate": 3.368837840688815e-05, + "loss": 2.3608, + "step": 57950 + }, + { + "epoch": 7.83, + "learning_rate": 3.364819665548575e-05, + "loss": 2.3473, + "step": 57960 + }, + { + "epoch": 7.83, + "learning_rate": 3.360803585422509e-05, + "loss": 2.3503, + "step": 57970 + }, + { + "epoch": 7.83, + "learning_rate": 3.356789601033737e-05, + "loss": 2.3186, + "step": 57980 + }, + { + "epoch": 7.83, + "learning_rate": 3.3527777131050124e-05, + "loss": 2.362, + "step": 57990 + }, + { + "epoch": 7.83, + "learning_rate": 3.348767922358719e-05, + "loss": 2.3553, + "step": 58000 + }, + { + "epoch": 7.83, + "eval_loss": 2.4380388259887695, + "eval_runtime": 1269.0276, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 58000 + }, + { + "epoch": 7.83, + "learning_rate": 3.344760229516841e-05, + "loss": 2.3205, + "step": 58010 + }, + { + "epoch": 7.84, + "learning_rate": 3.3407546353010075e-05, + "loss": 2.366, + "step": 58020 + }, + { + "epoch": 7.84, + "learning_rate": 3.336751140432452e-05, + "loss": 2.3337, + "step": 58030 + }, + { + "epoch": 7.84, + "learning_rate": 3.3327497456320475e-05, + "loss": 2.3345, + "step": 58040 + }, + { + "epoch": 7.84, + "learning_rate": 3.3287504516202756e-05, + "loss": 2.3426, + "step": 58050 + }, + { + "epoch": 7.84, + "learning_rate": 3.3247532591172426e-05, + "loss": 2.3443, + "step": 58060 + }, + { + "epoch": 7.84, + "learning_rate": 3.320758168842678e-05, + "loss": 2.3106, + "step": 58070 + }, + { + "epoch": 7.84, + "learning_rate": 3.316765181515925e-05, + "loss": 2.3283, + "step": 58080 + }, + { + "epoch": 7.85, + "learning_rate": 3.3127742978559735e-05, + "loss": 2.3228, + "step": 58090 + }, + { + "epoch": 7.85, + "learning_rate": 3.308785518581396e-05, + "loss": 2.3414, + "step": 58100 + }, + { + "epoch": 7.85, + "learning_rate": 3.3047988444104185e-05, + "loss": 2.3316, + "step": 58110 + }, + { + "epoch": 7.85, + "learning_rate": 3.30081427606087e-05, + "loss": 2.3269, + "step": 58120 + }, + { + "epoch": 7.85, + "learning_rate": 3.296831814250211e-05, + "loss": 2.3251, + "step": 58130 + }, + { + "epoch": 7.85, + "learning_rate": 3.292851459695522e-05, + "loss": 2.3408, + "step": 58140 + }, + { + "epoch": 7.85, + "learning_rate": 3.288873213113486e-05, + "loss": 2.3318, + "step": 58150 + }, + { + "epoch": 7.86, + "learning_rate": 3.284897075220432e-05, + "loss": 2.3246, + "step": 58160 + }, + { + "epoch": 7.86, + "learning_rate": 3.28092304673229e-05, + "loss": 2.352, + "step": 58170 + }, + { + "epoch": 7.86, + "learning_rate": 3.2769511283646284e-05, + "loss": 2.3389, + "step": 58180 + }, + { + "epoch": 7.86, + "learning_rate": 3.272981320832616e-05, + "loss": 2.335, + "step": 58190 + }, + { + "epoch": 7.86, + "learning_rate": 3.269013624851057e-05, + "loss": 2.3162, + "step": 58200 + }, + { + "epoch": 7.86, + "learning_rate": 3.265048041134359e-05, + "loss": 2.3378, + "step": 58210 + }, + { + "epoch": 7.86, + "learning_rate": 3.261084570396573e-05, + "loss": 2.3373, + "step": 58220 + }, + { + "epoch": 7.86, + "learning_rate": 3.2571232133513526e-05, + "loss": 2.3507, + "step": 58230 + }, + { + "epoch": 7.87, + "learning_rate": 3.253163970711961e-05, + "loss": 2.3384, + "step": 58240 + }, + { + "epoch": 7.87, + "learning_rate": 3.24920684319131e-05, + "loss": 2.3159, + "step": 58250 + }, + { + "epoch": 7.87, + "learning_rate": 3.245251831501904e-05, + "loss": 2.3311, + "step": 58260 + }, + { + "epoch": 7.87, + "learning_rate": 3.241298936355885e-05, + "loss": 2.3213, + "step": 58270 + }, + { + "epoch": 7.87, + "learning_rate": 3.237348158465001e-05, + "loss": 2.3428, + "step": 58280 + }, + { + "epoch": 7.87, + "learning_rate": 3.233399498540625e-05, + "loss": 2.3247, + "step": 58290 + }, + { + "epoch": 7.87, + "learning_rate": 3.2294529572937415e-05, + "loss": 2.3331, + "step": 58300 + }, + { + "epoch": 7.88, + "learning_rate": 3.2255085354349696e-05, + "loss": 2.3201, + "step": 58310 + }, + { + "epoch": 7.88, + "learning_rate": 3.2215662336745294e-05, + "loss": 2.2999, + "step": 58320 + }, + { + "epoch": 7.88, + "learning_rate": 3.2176260527222684e-05, + "loss": 2.3316, + "step": 58330 + }, + { + "epoch": 7.88, + "learning_rate": 3.2136879932876494e-05, + "loss": 2.3285, + "step": 58340 + }, + { + "epoch": 7.88, + "learning_rate": 3.209752056079747e-05, + "loss": 2.3398, + "step": 58350 + }, + { + "epoch": 7.88, + "learning_rate": 3.205818241807275e-05, + "loss": 2.3185, + "step": 58360 + }, + { + "epoch": 7.88, + "learning_rate": 3.2018865511785394e-05, + "loss": 2.3533, + "step": 58370 + }, + { + "epoch": 7.88, + "learning_rate": 3.197956984901481e-05, + "loss": 2.333, + "step": 58380 + }, + { + "epoch": 7.89, + "learning_rate": 3.194029543683642e-05, + "loss": 2.3383, + "step": 58390 + }, + { + "epoch": 7.89, + "learning_rate": 3.190104228232205e-05, + "loss": 2.3555, + "step": 58400 + }, + { + "epoch": 7.89, + "learning_rate": 3.18618103925395e-05, + "loss": 2.3419, + "step": 58410 + }, + { + "epoch": 7.89, + "learning_rate": 3.1822599774552826e-05, + "loss": 2.3144, + "step": 58420 + }, + { + "epoch": 7.89, + "learning_rate": 3.178341043542221e-05, + "loss": 2.3317, + "step": 58430 + }, + { + "epoch": 7.89, + "learning_rate": 3.1744242382204015e-05, + "loss": 2.3331, + "step": 58440 + }, + { + "epoch": 7.89, + "learning_rate": 3.170509562195086e-05, + "loss": 2.3555, + "step": 58450 + }, + { + "epoch": 7.9, + "learning_rate": 3.1665970161711406e-05, + "loss": 2.3398, + "step": 58460 + }, + { + "epoch": 7.9, + "learning_rate": 3.162686600853054e-05, + "loss": 2.3318, + "step": 58470 + }, + { + "epoch": 7.9, + "learning_rate": 3.1587783169449256e-05, + "loss": 2.3225, + "step": 58480 + }, + { + "epoch": 7.9, + "learning_rate": 3.154872165150481e-05, + "loss": 2.3441, + "step": 58490 + }, + { + "epoch": 7.9, + "learning_rate": 3.150968146173058e-05, + "loss": 2.3314, + "step": 58500 + }, + { + "epoch": 7.9, + "learning_rate": 3.147066260715604e-05, + "loss": 2.3313, + "step": 58510 + }, + { + "epoch": 7.9, + "learning_rate": 3.1431665094806883e-05, + "loss": 2.3191, + "step": 58520 + }, + { + "epoch": 7.91, + "learning_rate": 3.1392688931704904e-05, + "loss": 2.315, + "step": 58530 + }, + { + "epoch": 7.91, + "learning_rate": 3.135373412486819e-05, + "loss": 2.3557, + "step": 58540 + }, + { + "epoch": 7.91, + "learning_rate": 3.131480068131082e-05, + "loss": 2.3326, + "step": 58550 + }, + { + "epoch": 7.91, + "learning_rate": 3.1275888608043133e-05, + "loss": 2.3353, + "step": 58560 + }, + { + "epoch": 7.91, + "learning_rate": 3.123699791207156e-05, + "loss": 2.342, + "step": 58570 + }, + { + "epoch": 7.91, + "learning_rate": 3.1198128600398656e-05, + "loss": 2.3548, + "step": 58580 + }, + { + "epoch": 7.91, + "learning_rate": 3.115928068002328e-05, + "loss": 2.3248, + "step": 58590 + }, + { + "epoch": 7.91, + "learning_rate": 3.112045415794027e-05, + "loss": 2.3239, + "step": 58600 + }, + { + "epoch": 7.92, + "learning_rate": 3.108164904114069e-05, + "loss": 2.336, + "step": 58610 + }, + { + "epoch": 7.92, + "learning_rate": 3.104286533661169e-05, + "loss": 2.3429, + "step": 58620 + }, + { + "epoch": 7.92, + "learning_rate": 3.100410305133669e-05, + "loss": 2.3351, + "step": 58630 + }, + { + "epoch": 7.92, + "learning_rate": 3.096536219229512e-05, + "loss": 2.3383, + "step": 58640 + }, + { + "epoch": 7.92, + "learning_rate": 3.0926642766462636e-05, + "loss": 2.321, + "step": 58650 + }, + { + "epoch": 7.92, + "learning_rate": 3.0887944780810956e-05, + "loss": 2.3209, + "step": 58660 + }, + { + "epoch": 7.92, + "learning_rate": 3.0849268242307986e-05, + "loss": 2.3739, + "step": 58670 + }, + { + "epoch": 7.93, + "learning_rate": 3.081061315791782e-05, + "loss": 2.3601, + "step": 58680 + }, + { + "epoch": 7.93, + "learning_rate": 3.077197953460063e-05, + "loss": 2.3201, + "step": 58690 + }, + { + "epoch": 7.93, + "learning_rate": 3.073336737931272e-05, + "loss": 2.3414, + "step": 58700 + }, + { + "epoch": 7.93, + "learning_rate": 3.069477669900647e-05, + "loss": 2.3366, + "step": 58710 + }, + { + "epoch": 7.93, + "learning_rate": 3.065620750063059e-05, + "loss": 2.3282, + "step": 58720 + }, + { + "epoch": 7.93, + "learning_rate": 3.061765979112972e-05, + "loss": 2.3203, + "step": 58730 + }, + { + "epoch": 7.93, + "learning_rate": 3.057913357744471e-05, + "loss": 2.3464, + "step": 58740 + }, + { + "epoch": 7.93, + "learning_rate": 3.0540628866512564e-05, + "loss": 2.3164, + "step": 58750 + }, + { + "epoch": 7.94, + "learning_rate": 3.0502145665266314e-05, + "loss": 2.3191, + "step": 58760 + }, + { + "epoch": 7.94, + "learning_rate": 3.046368398063529e-05, + "loss": 2.3284, + "step": 58770 + }, + { + "epoch": 7.94, + "learning_rate": 3.04252438195448e-05, + "loss": 2.3408, + "step": 58780 + }, + { + "epoch": 7.94, + "learning_rate": 3.0386825188916337e-05, + "loss": 2.3226, + "step": 58790 + }, + { + "epoch": 7.94, + "learning_rate": 3.0348428095667437e-05, + "loss": 2.3367, + "step": 58800 + }, + { + "epoch": 7.94, + "learning_rate": 3.031005254671194e-05, + "loss": 2.3602, + "step": 58810 + }, + { + "epoch": 7.94, + "learning_rate": 3.0271698548959612e-05, + "loss": 2.3462, + "step": 58820 + }, + { + "epoch": 7.95, + "learning_rate": 3.0233366109316498e-05, + "loss": 2.3453, + "step": 58830 + }, + { + "epoch": 7.95, + "learning_rate": 3.0195055234684612e-05, + "loss": 2.3334, + "step": 58840 + }, + { + "epoch": 7.95, + "learning_rate": 3.0156765931962135e-05, + "loss": 2.333, + "step": 58850 + }, + { + "epoch": 7.95, + "learning_rate": 3.0118498208043453e-05, + "loss": 2.3251, + "step": 58860 + }, + { + "epoch": 7.95, + "learning_rate": 3.0080252069818933e-05, + "loss": 2.3161, + "step": 58870 + }, + { + "epoch": 7.95, + "learning_rate": 3.0042027524175255e-05, + "loss": 2.3088, + "step": 58880 + }, + { + "epoch": 7.95, + "learning_rate": 3.0003824577994878e-05, + "loss": 2.3215, + "step": 58890 + }, + { + "epoch": 7.96, + "learning_rate": 2.9965643238156734e-05, + "loss": 2.3246, + "step": 58900 + }, + { + "epoch": 7.96, + "learning_rate": 2.992748351153559e-05, + "loss": 2.3314, + "step": 58910 + }, + { + "epoch": 7.96, + "learning_rate": 2.988934540500256e-05, + "loss": 2.3285, + "step": 58920 + }, + { + "epoch": 7.96, + "learning_rate": 2.985122892542463e-05, + "loss": 2.3271, + "step": 58930 + }, + { + "epoch": 7.96, + "learning_rate": 2.9813134079664984e-05, + "loss": 2.3191, + "step": 58940 + }, + { + "epoch": 7.96, + "learning_rate": 2.9775060874583022e-05, + "loss": 2.3279, + "step": 58950 + }, + { + "epoch": 7.96, + "learning_rate": 2.973700931703406e-05, + "loss": 2.3639, + "step": 58960 + }, + { + "epoch": 7.96, + "learning_rate": 2.9698979413869718e-05, + "loss": 2.3162, + "step": 58970 + }, + { + "epoch": 7.97, + "learning_rate": 2.9660971171937463e-05, + "loss": 2.3423, + "step": 58980 + }, + { + "epoch": 7.97, + "learning_rate": 2.9622984598081128e-05, + "loss": 2.3376, + "step": 58990 + }, + { + "epoch": 7.97, + "learning_rate": 2.958501969914044e-05, + "loss": 2.3136, + "step": 59000 + }, + { + "epoch": 7.97, + "eval_loss": 2.4326837062835693, + "eval_runtime": 1269.0324, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 59000 + }, + { + "epoch": 7.97, + "learning_rate": 2.954707648195136e-05, + "loss": 2.3448, + "step": 59010 + }, + { + "epoch": 7.97, + "learning_rate": 2.9509154953345926e-05, + "loss": 2.3091, + "step": 59020 + }, + { + "epoch": 7.97, + "learning_rate": 2.947125512015209e-05, + "loss": 2.3125, + "step": 59030 + }, + { + "epoch": 7.97, + "learning_rate": 2.9433376989194186e-05, + "loss": 2.3406, + "step": 59040 + }, + { + "epoch": 7.98, + "learning_rate": 2.9395520567292396e-05, + "loss": 2.3109, + "step": 59050 + }, + { + "epoch": 7.98, + "learning_rate": 2.9357685861263224e-05, + "loss": 2.3572, + "step": 59060 + }, + { + "epoch": 7.98, + "learning_rate": 2.931987287791897e-05, + "loss": 2.3314, + "step": 59070 + }, + { + "epoch": 7.98, + "learning_rate": 2.9282081624068305e-05, + "loss": 2.3333, + "step": 59080 + }, + { + "epoch": 7.98, + "learning_rate": 2.9244312106515833e-05, + "loss": 2.368, + "step": 59090 + }, + { + "epoch": 7.98, + "learning_rate": 2.9206564332062232e-05, + "loss": 2.3189, + "step": 59100 + }, + { + "epoch": 7.98, + "learning_rate": 2.916883830750445e-05, + "loss": 2.3473, + "step": 59110 + }, + { + "epoch": 7.98, + "learning_rate": 2.9131134039635202e-05, + "loss": 2.2917, + "step": 59120 + }, + { + "epoch": 7.99, + "learning_rate": 2.909345153524359e-05, + "loss": 2.3035, + "step": 59130 + }, + { + "epoch": 7.99, + "learning_rate": 2.9055790801114603e-05, + "loss": 2.3223, + "step": 59140 + }, + { + "epoch": 7.99, + "learning_rate": 2.9018151844029452e-05, + "loss": 2.3257, + "step": 59150 + }, + { + "epoch": 7.99, + "learning_rate": 2.898053467076533e-05, + "loss": 2.3013, + "step": 59160 + }, + { + "epoch": 7.99, + "learning_rate": 2.8942939288095506e-05, + "loss": 2.3185, + "step": 59170 + }, + { + "epoch": 7.99, + "learning_rate": 2.8905365702789362e-05, + "loss": 2.3406, + "step": 59180 + }, + { + "epoch": 7.99, + "learning_rate": 2.8867813921612305e-05, + "loss": 2.3565, + "step": 59190 + }, + { + "epoch": 8.0, + "learning_rate": 2.8830283951325987e-05, + "loss": 2.3323, + "step": 59200 + }, + { + "epoch": 8.0, + "learning_rate": 2.879277579868783e-05, + "loss": 2.3337, + "step": 59210 + }, + { + "epoch": 8.0, + "learning_rate": 2.8755289470451622e-05, + "loss": 2.3194, + "step": 59220 + }, + { + "epoch": 8.0, + "learning_rate": 2.8717824973367014e-05, + "loss": 2.32, + "step": 59230 + }, + { + "epoch": 8.0, + "learning_rate": 2.8680382314179883e-05, + "loss": 2.305, + "step": 59240 + }, + { + "epoch": 8.0, + "learning_rate": 2.8642961499632084e-05, + "loss": 2.2792, + "step": 59250 + }, + { + "epoch": 8.0, + "learning_rate": 2.8605562536461557e-05, + "loss": 2.2701, + "step": 59260 + }, + { + "epoch": 8.01, + "learning_rate": 2.856818543140227e-05, + "loss": 2.2728, + "step": 59270 + }, + { + "epoch": 8.01, + "learning_rate": 2.85308301911843e-05, + "loss": 2.261, + "step": 59280 + }, + { + "epoch": 8.01, + "learning_rate": 2.8493496822533824e-05, + "loss": 2.2888, + "step": 59290 + }, + { + "epoch": 8.01, + "learning_rate": 2.8456185332172992e-05, + "loss": 2.2691, + "step": 59300 + }, + { + "epoch": 8.01, + "learning_rate": 2.84188957268201e-05, + "loss": 2.2804, + "step": 59310 + }, + { + "epoch": 8.01, + "learning_rate": 2.8381628013189376e-05, + "loss": 2.261, + "step": 59320 + }, + { + "epoch": 8.01, + "learning_rate": 2.834438219799129e-05, + "loss": 2.2752, + "step": 59330 + }, + { + "epoch": 8.01, + "learning_rate": 2.8307158287932253e-05, + "loss": 2.2448, + "step": 59340 + }, + { + "epoch": 8.02, + "learning_rate": 2.8269956289714706e-05, + "loss": 2.2735, + "step": 59350 + }, + { + "epoch": 8.02, + "learning_rate": 2.823277621003721e-05, + "loss": 2.2932, + "step": 59360 + }, + { + "epoch": 8.02, + "learning_rate": 2.8195618055594344e-05, + "loss": 2.3017, + "step": 59370 + }, + { + "epoch": 8.02, + "learning_rate": 2.8158481833076785e-05, + "loss": 2.2872, + "step": 59380 + }, + { + "epoch": 8.02, + "learning_rate": 2.812136754917122e-05, + "loss": 2.2832, + "step": 59390 + }, + { + "epoch": 8.02, + "learning_rate": 2.8084275210560386e-05, + "loss": 2.3165, + "step": 59400 + }, + { + "epoch": 8.02, + "learning_rate": 2.8047204823923024e-05, + "loss": 2.2778, + "step": 59410 + }, + { + "epoch": 8.03, + "learning_rate": 2.801015639593408e-05, + "loss": 2.292, + "step": 59420 + }, + { + "epoch": 8.03, + "learning_rate": 2.79731299332644e-05, + "loss": 2.2853, + "step": 59430 + }, + { + "epoch": 8.03, + "learning_rate": 2.79361254425809e-05, + "loss": 2.2774, + "step": 59440 + }, + { + "epoch": 8.03, + "learning_rate": 2.7899142930546547e-05, + "loss": 2.2974, + "step": 59450 + }, + { + "epoch": 8.03, + "learning_rate": 2.7862182403820355e-05, + "loss": 2.2687, + "step": 59460 + }, + { + "epoch": 8.03, + "learning_rate": 2.7825243869057444e-05, + "loss": 2.2674, + "step": 59470 + }, + { + "epoch": 8.03, + "learning_rate": 2.7788327332908865e-05, + "loss": 2.2996, + "step": 59480 + }, + { + "epoch": 8.03, + "learning_rate": 2.7751432802021783e-05, + "loss": 2.2919, + "step": 59490 + }, + { + "epoch": 8.04, + "learning_rate": 2.7714560283039316e-05, + "loss": 2.2939, + "step": 59500 + }, + { + "epoch": 8.04, + "learning_rate": 2.7677709782600756e-05, + "loss": 2.2781, + "step": 59510 + }, + { + "epoch": 8.04, + "learning_rate": 2.764088130734134e-05, + "loss": 2.2742, + "step": 59520 + }, + { + "epoch": 8.04, + "learning_rate": 2.76040748638923e-05, + "loss": 2.2869, + "step": 59530 + }, + { + "epoch": 8.04, + "learning_rate": 2.7567290458881003e-05, + "loss": 2.2744, + "step": 59540 + }, + { + "epoch": 8.04, + "learning_rate": 2.753052809893073e-05, + "loss": 2.2876, + "step": 59550 + }, + { + "epoch": 8.04, + "learning_rate": 2.749378779066096e-05, + "loss": 2.2808, + "step": 59560 + }, + { + "epoch": 8.05, + "learning_rate": 2.7457069540687026e-05, + "loss": 2.2729, + "step": 59570 + }, + { + "epoch": 8.05, + "learning_rate": 2.7420373355620395e-05, + "loss": 2.2865, + "step": 59580 + }, + { + "epoch": 8.05, + "learning_rate": 2.738369924206848e-05, + "loss": 2.279, + "step": 59590 + }, + { + "epoch": 8.05, + "learning_rate": 2.7347047206634866e-05, + "loss": 2.2872, + "step": 59600 + }, + { + "epoch": 8.05, + "learning_rate": 2.7310417255918986e-05, + "loss": 2.2802, + "step": 59610 + }, + { + "epoch": 8.05, + "learning_rate": 2.7273809396516422e-05, + "loss": 2.2981, + "step": 59620 + }, + { + "epoch": 8.05, + "learning_rate": 2.7237223635018714e-05, + "loss": 2.262, + "step": 59630 + }, + { + "epoch": 8.05, + "learning_rate": 2.7200659978013406e-05, + "loss": 2.2753, + "step": 59640 + }, + { + "epoch": 8.06, + "learning_rate": 2.716411843208417e-05, + "loss": 2.3008, + "step": 59650 + }, + { + "epoch": 8.06, + "learning_rate": 2.712759900381059e-05, + "loss": 2.2897, + "step": 59660 + }, + { + "epoch": 8.06, + "learning_rate": 2.709110169976831e-05, + "loss": 2.2793, + "step": 59670 + }, + { + "epoch": 8.06, + "learning_rate": 2.7054626526528932e-05, + "loss": 2.2619, + "step": 59680 + }, + { + "epoch": 8.06, + "learning_rate": 2.701817349066021e-05, + "loss": 2.2667, + "step": 59690 + }, + { + "epoch": 8.06, + "learning_rate": 2.6981742598725792e-05, + "loss": 2.2816, + "step": 59700 + }, + { + "epoch": 8.06, + "learning_rate": 2.694533385728536e-05, + "loss": 2.269, + "step": 59710 + }, + { + "epoch": 8.07, + "learning_rate": 2.6908947272894626e-05, + "loss": 2.2879, + "step": 59720 + }, + { + "epoch": 8.07, + "learning_rate": 2.687258285210528e-05, + "loss": 2.2922, + "step": 59730 + }, + { + "epoch": 8.07, + "learning_rate": 2.6836240601465097e-05, + "loss": 2.2951, + "step": 59740 + }, + { + "epoch": 8.07, + "learning_rate": 2.6799920527517817e-05, + "loss": 2.2841, + "step": 59750 + }, + { + "epoch": 8.07, + "learning_rate": 2.6763622636803133e-05, + "loss": 2.2619, + "step": 59760 + }, + { + "epoch": 8.07, + "learning_rate": 2.6727346935856793e-05, + "loss": 2.2761, + "step": 59770 + }, + { + "epoch": 8.07, + "learning_rate": 2.6691093431210596e-05, + "loss": 2.2632, + "step": 59780 + }, + { + "epoch": 8.08, + "learning_rate": 2.665486212939228e-05, + "loss": 2.2778, + "step": 59790 + }, + { + "epoch": 8.08, + "learning_rate": 2.6618653036925586e-05, + "loss": 2.2617, + "step": 59800 + }, + { + "epoch": 8.08, + "learning_rate": 2.6582466160330284e-05, + "loss": 2.2832, + "step": 59810 + }, + { + "epoch": 8.08, + "learning_rate": 2.6546301506122098e-05, + "loss": 2.3091, + "step": 59820 + }, + { + "epoch": 8.08, + "learning_rate": 2.6510159080812832e-05, + "loss": 2.2926, + "step": 59830 + }, + { + "epoch": 8.08, + "learning_rate": 2.6474038890910197e-05, + "loss": 2.3009, + "step": 59840 + }, + { + "epoch": 8.08, + "learning_rate": 2.6437940942918045e-05, + "loss": 2.2843, + "step": 59850 + }, + { + "epoch": 8.08, + "learning_rate": 2.6401865243335974e-05, + "loss": 2.2865, + "step": 59860 + }, + { + "epoch": 8.09, + "learning_rate": 2.6365811798659818e-05, + "loss": 2.2808, + "step": 59870 + }, + { + "epoch": 8.09, + "learning_rate": 2.6329780615381307e-05, + "loss": 2.283, + "step": 59880 + }, + { + "epoch": 8.09, + "learning_rate": 2.6293771699988132e-05, + "loss": 2.3176, + "step": 59890 + }, + { + "epoch": 8.09, + "learning_rate": 2.625778505896402e-05, + "loss": 2.3082, + "step": 59900 + }, + { + "epoch": 8.09, + "learning_rate": 2.6221820698788644e-05, + "loss": 2.2947, + "step": 59910 + }, + { + "epoch": 8.09, + "learning_rate": 2.618587862593775e-05, + "loss": 2.28, + "step": 59920 + }, + { + "epoch": 8.09, + "learning_rate": 2.614995884688297e-05, + "loss": 2.2873, + "step": 59930 + }, + { + "epoch": 8.1, + "learning_rate": 2.6114061368092053e-05, + "loss": 2.2876, + "step": 59940 + }, + { + "epoch": 8.1, + "learning_rate": 2.6078186196028522e-05, + "loss": 2.2325, + "step": 59950 + }, + { + "epoch": 8.1, + "learning_rate": 2.604233333715212e-05, + "loss": 2.2868, + "step": 59960 + }, + { + "epoch": 8.1, + "learning_rate": 2.6006502797918404e-05, + "loss": 2.2874, + "step": 59970 + }, + { + "epoch": 8.1, + "learning_rate": 2.5970694584778962e-05, + "loss": 2.2909, + "step": 59980 + }, + { + "epoch": 8.1, + "learning_rate": 2.5934908704181467e-05, + "loss": 2.2785, + "step": 59990 + }, + { + "epoch": 8.1, + "learning_rate": 2.589914516256932e-05, + "loss": 2.2752, + "step": 60000 + }, + { + "epoch": 8.1, + "eval_loss": 2.434035539627075, + "eval_runtime": 1269.0314, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 60000 + }, + { + "epoch": 8.1, + "learning_rate": 2.586340396638216e-05, + "loss": 2.2876, + "step": 60010 + }, + { + "epoch": 8.11, + "learning_rate": 2.582768512205545e-05, + "loss": 2.2852, + "step": 60020 + }, + { + "epoch": 8.11, + "learning_rate": 2.579198863602076e-05, + "loss": 2.2998, + "step": 60030 + }, + { + "epoch": 8.11, + "learning_rate": 2.5756314514705413e-05, + "loss": 2.2887, + "step": 60040 + }, + { + "epoch": 8.11, + "learning_rate": 2.572066276453295e-05, + "loss": 2.2831, + "step": 60050 + }, + { + "epoch": 8.11, + "learning_rate": 2.5685033391922744e-05, + "loss": 2.2774, + "step": 60060 + }, + { + "epoch": 8.11, + "learning_rate": 2.5649426403290123e-05, + "loss": 2.2884, + "step": 60070 + }, + { + "epoch": 8.11, + "learning_rate": 2.561384180504653e-05, + "loss": 2.2725, + "step": 60080 + }, + { + "epoch": 8.12, + "learning_rate": 2.5578279603599156e-05, + "loss": 2.3079, + "step": 60090 + }, + { + "epoch": 8.12, + "learning_rate": 2.5542739805351376e-05, + "loss": 2.2875, + "step": 60100 + }, + { + "epoch": 8.12, + "learning_rate": 2.5507222416702374e-05, + "loss": 2.3166, + "step": 60110 + }, + { + "epoch": 8.12, + "learning_rate": 2.5471727444047413e-05, + "loss": 2.2641, + "step": 60120 + }, + { + "epoch": 8.12, + "learning_rate": 2.5436254893777704e-05, + "loss": 2.2877, + "step": 60130 + }, + { + "epoch": 8.12, + "learning_rate": 2.540080477228023e-05, + "loss": 2.3026, + "step": 60140 + }, + { + "epoch": 8.12, + "learning_rate": 2.5365377085938228e-05, + "loss": 2.2566, + "step": 60150 + }, + { + "epoch": 8.13, + "learning_rate": 2.5329971841130687e-05, + "loss": 2.2719, + "step": 60160 + }, + { + "epoch": 8.13, + "learning_rate": 2.529458904423271e-05, + "loss": 2.2718, + "step": 60170 + }, + { + "epoch": 8.13, + "learning_rate": 2.5259228701615154e-05, + "loss": 2.2822, + "step": 60180 + }, + { + "epoch": 8.13, + "learning_rate": 2.5223890819645055e-05, + "loss": 2.2728, + "step": 60190 + }, + { + "epoch": 8.13, + "learning_rate": 2.5188575404685223e-05, + "loss": 2.2512, + "step": 60200 + }, + { + "epoch": 8.13, + "learning_rate": 2.5153282463094575e-05, + "loss": 2.2953, + "step": 60210 + }, + { + "epoch": 8.13, + "learning_rate": 2.5118012001227912e-05, + "loss": 2.2905, + "step": 60220 + }, + { + "epoch": 8.13, + "learning_rate": 2.5082764025435865e-05, + "loss": 2.2818, + "step": 60230 + }, + { + "epoch": 8.14, + "learning_rate": 2.504753854206526e-05, + "loss": 2.28, + "step": 60240 + }, + { + "epoch": 8.14, + "learning_rate": 2.5012335557458658e-05, + "loss": 2.2842, + "step": 60250 + }, + { + "epoch": 8.14, + "learning_rate": 2.4977155077954752e-05, + "loss": 2.2634, + "step": 60260 + }, + { + "epoch": 8.14, + "learning_rate": 2.4941997109888028e-05, + "loss": 2.2988, + "step": 60270 + }, + { + "epoch": 8.14, + "learning_rate": 2.4906861659589005e-05, + "loss": 2.2756, + "step": 60280 + }, + { + "epoch": 8.14, + "learning_rate": 2.4871748733384073e-05, + "loss": 2.2797, + "step": 60290 + }, + { + "epoch": 8.14, + "learning_rate": 2.4836658337595705e-05, + "loss": 2.2994, + "step": 60300 + }, + { + "epoch": 8.15, + "learning_rate": 2.480159047854215e-05, + "loss": 2.3133, + "step": 60310 + }, + { + "epoch": 8.15, + "learning_rate": 2.4766545162537722e-05, + "loss": 2.314, + "step": 60320 + }, + { + "epoch": 8.15, + "learning_rate": 2.4731522395892624e-05, + "loss": 2.2846, + "step": 60330 + }, + { + "epoch": 8.15, + "learning_rate": 2.4696522184912944e-05, + "loss": 2.2929, + "step": 60340 + }, + { + "epoch": 8.15, + "learning_rate": 2.4661544535900857e-05, + "loss": 2.2917, + "step": 60350 + }, + { + "epoch": 8.15, + "learning_rate": 2.462658945515436e-05, + "loss": 2.293, + "step": 60360 + }, + { + "epoch": 8.15, + "learning_rate": 2.459165694896742e-05, + "loss": 2.2715, + "step": 60370 + }, + { + "epoch": 8.15, + "learning_rate": 2.4556747023629868e-05, + "loss": 2.3106, + "step": 60380 + }, + { + "epoch": 8.16, + "learning_rate": 2.4521859685427647e-05, + "loss": 2.2864, + "step": 60390 + }, + { + "epoch": 8.16, + "learning_rate": 2.4486994940642463e-05, + "loss": 2.2673, + "step": 60400 + }, + { + "epoch": 8.16, + "learning_rate": 2.4452152795552028e-05, + "loss": 2.275, + "step": 60410 + }, + { + "epoch": 8.16, + "learning_rate": 2.441733325642994e-05, + "loss": 2.2626, + "step": 60420 + }, + { + "epoch": 8.16, + "learning_rate": 2.4382536329545748e-05, + "loss": 2.2847, + "step": 60430 + }, + { + "epoch": 8.16, + "learning_rate": 2.4347762021165012e-05, + "loss": 2.276, + "step": 60440 + }, + { + "epoch": 8.16, + "learning_rate": 2.4313010337549087e-05, + "loss": 2.2585, + "step": 60450 + }, + { + "epoch": 8.17, + "learning_rate": 2.427828128495532e-05, + "loss": 2.3073, + "step": 60460 + }, + { + "epoch": 8.17, + "learning_rate": 2.4243574869636955e-05, + "loss": 2.303, + "step": 60470 + }, + { + "epoch": 8.17, + "learning_rate": 2.4208891097843232e-05, + "loss": 2.2554, + "step": 60480 + }, + { + "epoch": 8.17, + "learning_rate": 2.417422997581923e-05, + "loss": 2.2796, + "step": 60490 + }, + { + "epoch": 8.17, + "learning_rate": 2.4139591509806e-05, + "loss": 2.269, + "step": 60500 + }, + { + "epoch": 8.17, + "learning_rate": 2.4104975706040463e-05, + "loss": 2.2763, + "step": 60510 + }, + { + "epoch": 8.17, + "learning_rate": 2.4070382570755486e-05, + "loss": 2.2793, + "step": 60520 + }, + { + "epoch": 8.18, + "learning_rate": 2.403581211017992e-05, + "loss": 2.2965, + "step": 60530 + }, + { + "epoch": 8.18, + "learning_rate": 2.4001264330538434e-05, + "loss": 2.2731, + "step": 60540 + }, + { + "epoch": 8.18, + "learning_rate": 2.3966739238051653e-05, + "loss": 2.2783, + "step": 60550 + }, + { + "epoch": 8.18, + "learning_rate": 2.3932236838936088e-05, + "loss": 2.2589, + "step": 60560 + }, + { + "epoch": 8.18, + "learning_rate": 2.3897757139404268e-05, + "loss": 2.2824, + "step": 60570 + }, + { + "epoch": 8.18, + "learning_rate": 2.3863300145664492e-05, + "loss": 2.2762, + "step": 60580 + }, + { + "epoch": 8.18, + "learning_rate": 2.3828865863921086e-05, + "loss": 2.2991, + "step": 60590 + }, + { + "epoch": 8.18, + "learning_rate": 2.3794454300374188e-05, + "loss": 2.2801, + "step": 60600 + }, + { + "epoch": 8.19, + "learning_rate": 2.3760065461219906e-05, + "loss": 2.2592, + "step": 60610 + }, + { + "epoch": 8.19, + "learning_rate": 2.372569935265029e-05, + "loss": 2.2746, + "step": 60620 + }, + { + "epoch": 8.19, + "learning_rate": 2.369135598085322e-05, + "loss": 2.2901, + "step": 60630 + }, + { + "epoch": 8.19, + "learning_rate": 2.365703535201252e-05, + "loss": 2.2802, + "step": 60640 + }, + { + "epoch": 8.19, + "learning_rate": 2.3622737472307914e-05, + "loss": 2.3105, + "step": 60650 + }, + { + "epoch": 8.19, + "learning_rate": 2.3588462347914987e-05, + "loss": 2.2866, + "step": 60660 + }, + { + "epoch": 8.19, + "learning_rate": 2.3554209985005345e-05, + "loss": 2.2836, + "step": 60670 + }, + { + "epoch": 8.2, + "learning_rate": 2.3519980389746402e-05, + "loss": 2.307, + "step": 60680 + }, + { + "epoch": 8.2, + "learning_rate": 2.348577356830146e-05, + "loss": 2.2732, + "step": 60690 + }, + { + "epoch": 8.2, + "learning_rate": 2.3451589526829723e-05, + "loss": 2.3188, + "step": 60700 + }, + { + "epoch": 8.2, + "learning_rate": 2.341742827148642e-05, + "loss": 2.2739, + "step": 60710 + }, + { + "epoch": 8.2, + "learning_rate": 2.3383289808422516e-05, + "loss": 2.2923, + "step": 60720 + }, + { + "epoch": 8.2, + "learning_rate": 2.3349174143784943e-05, + "loss": 2.3051, + "step": 60730 + }, + { + "epoch": 8.2, + "learning_rate": 2.3315081283716502e-05, + "loss": 2.3147, + "step": 60740 + }, + { + "epoch": 8.2, + "learning_rate": 2.32810112343559e-05, + "loss": 2.3078, + "step": 60750 + }, + { + "epoch": 8.21, + "learning_rate": 2.3246964001837798e-05, + "loss": 2.271, + "step": 60760 + }, + { + "epoch": 8.21, + "learning_rate": 2.3212939592292656e-05, + "loss": 2.2644, + "step": 60770 + }, + { + "epoch": 8.21, + "learning_rate": 2.317893801184686e-05, + "loss": 2.2431, + "step": 60780 + }, + { + "epoch": 8.21, + "learning_rate": 2.314495926662266e-05, + "loss": 2.2762, + "step": 60790 + }, + { + "epoch": 8.21, + "learning_rate": 2.311100336273828e-05, + "loss": 2.2673, + "step": 60800 + }, + { + "epoch": 8.21, + "learning_rate": 2.307707030630771e-05, + "loss": 2.2979, + "step": 60810 + }, + { + "epoch": 8.21, + "learning_rate": 2.3043160103440982e-05, + "loss": 2.2687, + "step": 60820 + }, + { + "epoch": 8.22, + "learning_rate": 2.3009272760243848e-05, + "loss": 2.2788, + "step": 60830 + }, + { + "epoch": 8.22, + "learning_rate": 2.297540828281797e-05, + "loss": 2.3006, + "step": 60840 + }, + { + "epoch": 8.22, + "learning_rate": 2.2941566677261027e-05, + "loss": 2.261, + "step": 60850 + }, + { + "epoch": 8.22, + "learning_rate": 2.2907747949666472e-05, + "loss": 2.2649, + "step": 60860 + }, + { + "epoch": 8.22, + "learning_rate": 2.2873952106123623e-05, + "loss": 2.2747, + "step": 60870 + }, + { + "epoch": 8.22, + "learning_rate": 2.2840179152717707e-05, + "loss": 2.2773, + "step": 60880 + }, + { + "epoch": 8.22, + "learning_rate": 2.28064290955299e-05, + "loss": 2.2716, + "step": 60890 + }, + { + "epoch": 8.23, + "learning_rate": 2.2772701940637117e-05, + "loss": 2.2998, + "step": 60900 + }, + { + "epoch": 8.23, + "learning_rate": 2.2738997694112304e-05, + "loss": 2.2623, + "step": 60910 + }, + { + "epoch": 8.23, + "learning_rate": 2.270531636202414e-05, + "loss": 2.2775, + "step": 60920 + }, + { + "epoch": 8.23, + "learning_rate": 2.267165795043719e-05, + "loss": 2.2618, + "step": 60930 + }, + { + "epoch": 8.23, + "learning_rate": 2.2638022465412054e-05, + "loss": 2.2746, + "step": 60940 + }, + { + "epoch": 8.23, + "learning_rate": 2.260440991300499e-05, + "loss": 2.2979, + "step": 60950 + }, + { + "epoch": 8.23, + "learning_rate": 2.257082029926834e-05, + "loss": 2.2893, + "step": 60960 + }, + { + "epoch": 8.23, + "learning_rate": 2.2537253630250064e-05, + "loss": 2.2937, + "step": 60970 + }, + { + "epoch": 8.24, + "learning_rate": 2.250370991199423e-05, + "loss": 2.2874, + "step": 60980 + }, + { + "epoch": 8.24, + "learning_rate": 2.24701891505406e-05, + "loss": 2.3037, + "step": 60990 + }, + { + "epoch": 8.24, + "learning_rate": 2.243669135192499e-05, + "loss": 2.2609, + "step": 61000 + }, + { + "epoch": 8.24, + "eval_loss": 2.4321961402893066, + "eval_runtime": 1269.1578, + "eval_samples_per_second": 59.989, + "eval_steps_per_second": 4.999, + "step": 61000 + }, + { + "epoch": 8.24, + "learning_rate": 2.2403216522178868e-05, + "loss": 2.2852, + "step": 61010 + }, + { + "epoch": 8.24, + "learning_rate": 2.2369764667329648e-05, + "loss": 2.2954, + "step": 61020 + }, + { + "epoch": 8.24, + "learning_rate": 2.2336335793400712e-05, + "loss": 2.2837, + "step": 61030 + }, + { + "epoch": 8.24, + "learning_rate": 2.230292990641111e-05, + "loss": 2.2713, + "step": 61040 + }, + { + "epoch": 8.25, + "learning_rate": 2.2269547012376015e-05, + "loss": 2.2678, + "step": 61050 + }, + { + "epoch": 8.25, + "learning_rate": 2.223618711730612e-05, + "loss": 2.2934, + "step": 61060 + }, + { + "epoch": 8.25, + "learning_rate": 2.2202850227208284e-05, + "loss": 2.2755, + "step": 61070 + }, + { + "epoch": 8.25, + "learning_rate": 2.2169536348085025e-05, + "loss": 2.2793, + "step": 61080 + }, + { + "epoch": 8.25, + "learning_rate": 2.2136245485934877e-05, + "loss": 2.2855, + "step": 61090 + }, + { + "epoch": 8.25, + "learning_rate": 2.2102977646752128e-05, + "loss": 2.2748, + "step": 61100 + }, + { + "epoch": 8.25, + "learning_rate": 2.2069732836526856e-05, + "loss": 2.2693, + "step": 61110 + }, + { + "epoch": 8.25, + "learning_rate": 2.203651106124515e-05, + "loss": 2.2964, + "step": 61120 + }, + { + "epoch": 8.26, + "learning_rate": 2.2003312326888828e-05, + "loss": 2.2533, + "step": 61130 + }, + { + "epoch": 8.26, + "learning_rate": 2.1970136639435658e-05, + "loss": 2.2673, + "step": 61140 + }, + { + "epoch": 8.26, + "learning_rate": 2.1936984004859197e-05, + "loss": 2.2711, + "step": 61150 + }, + { + "epoch": 8.26, + "learning_rate": 2.1903854429128836e-05, + "loss": 2.27, + "step": 61160 + }, + { + "epoch": 8.26, + "learning_rate": 2.187074791820987e-05, + "loss": 2.2796, + "step": 61170 + }, + { + "epoch": 8.26, + "learning_rate": 2.1837664478063343e-05, + "loss": 2.3061, + "step": 61180 + }, + { + "epoch": 8.26, + "learning_rate": 2.1804604114646345e-05, + "loss": 2.26, + "step": 61190 + }, + { + "epoch": 8.27, + "learning_rate": 2.1771566833911513e-05, + "loss": 2.3028, + "step": 61200 + }, + { + "epoch": 8.27, + "learning_rate": 2.1738552641807622e-05, + "loss": 2.2921, + "step": 61210 + }, + { + "epoch": 8.27, + "learning_rate": 2.1705561544279083e-05, + "loss": 2.2792, + "step": 61220 + }, + { + "epoch": 8.27, + "learning_rate": 2.1672593547266294e-05, + "loss": 2.2622, + "step": 61230 + }, + { + "epoch": 8.27, + "learning_rate": 2.163964865670539e-05, + "loss": 2.2936, + "step": 61240 + }, + { + "epoch": 8.27, + "learning_rate": 2.1606726878528375e-05, + "loss": 2.2644, + "step": 61250 + }, + { + "epoch": 8.27, + "learning_rate": 2.1573828218663107e-05, + "loss": 2.2586, + "step": 61260 + }, + { + "epoch": 8.28, + "learning_rate": 2.1540952683033235e-05, + "loss": 2.2662, + "step": 61270 + }, + { + "epoch": 8.28, + "learning_rate": 2.1508100277558353e-05, + "loss": 2.2812, + "step": 61280 + }, + { + "epoch": 8.28, + "learning_rate": 2.1475271008153798e-05, + "loss": 2.2783, + "step": 61290 + }, + { + "epoch": 8.28, + "learning_rate": 2.1442464880730725e-05, + "loss": 2.2923, + "step": 61300 + }, + { + "epoch": 8.28, + "learning_rate": 2.1409681901196175e-05, + "loss": 2.2988, + "step": 61310 + }, + { + "epoch": 8.28, + "learning_rate": 2.1376922075453023e-05, + "loss": 2.2611, + "step": 61320 + }, + { + "epoch": 8.28, + "learning_rate": 2.1344185409399955e-05, + "loss": 2.2842, + "step": 61330 + }, + { + "epoch": 8.28, + "learning_rate": 2.131147190893147e-05, + "loss": 2.28, + "step": 61340 + }, + { + "epoch": 8.29, + "learning_rate": 2.1278781579937924e-05, + "loss": 2.2854, + "step": 61350 + }, + { + "epoch": 8.29, + "learning_rate": 2.1246114428305477e-05, + "loss": 2.2854, + "step": 61360 + }, + { + "epoch": 8.29, + "learning_rate": 2.1213470459916153e-05, + "loss": 2.289, + "step": 61370 + }, + { + "epoch": 8.29, + "learning_rate": 2.1180849680647788e-05, + "loss": 2.2661, + "step": 61380 + }, + { + "epoch": 8.29, + "learning_rate": 2.1148252096374014e-05, + "loss": 2.2584, + "step": 61390 + }, + { + "epoch": 8.29, + "learning_rate": 2.111567771296427e-05, + "loss": 2.294, + "step": 61400 + }, + { + "epoch": 8.29, + "learning_rate": 2.1083126536283927e-05, + "loss": 2.2849, + "step": 61410 + }, + { + "epoch": 8.3, + "learning_rate": 2.105059857219409e-05, + "loss": 2.2901, + "step": 61420 + }, + { + "epoch": 8.3, + "learning_rate": 2.1018093826551645e-05, + "loss": 2.2806, + "step": 61430 + }, + { + "epoch": 8.3, + "learning_rate": 2.0985612305209404e-05, + "loss": 2.2917, + "step": 61440 + }, + { + "epoch": 8.3, + "learning_rate": 2.0953154014015883e-05, + "loss": 2.2958, + "step": 61450 + }, + { + "epoch": 8.3, + "learning_rate": 2.092071895881554e-05, + "loss": 2.2824, + "step": 61460 + }, + { + "epoch": 8.3, + "learning_rate": 2.0888307145448575e-05, + "loss": 2.2924, + "step": 61470 + }, + { + "epoch": 8.3, + "learning_rate": 2.0855918579750997e-05, + "loss": 2.2495, + "step": 61480 + }, + { + "epoch": 8.3, + "learning_rate": 2.082355326755461e-05, + "loss": 2.2817, + "step": 61490 + }, + { + "epoch": 8.31, + "learning_rate": 2.0791211214687153e-05, + "loss": 2.2901, + "step": 61500 + }, + { + "epoch": 8.31, + "learning_rate": 2.075889242697202e-05, + "loss": 2.2789, + "step": 61510 + }, + { + "epoch": 8.31, + "learning_rate": 2.0726596910228532e-05, + "loss": 2.2678, + "step": 61520 + }, + { + "epoch": 8.31, + "learning_rate": 2.069432467027173e-05, + "loss": 2.2762, + "step": 61530 + }, + { + "epoch": 8.31, + "learning_rate": 2.066207571291251e-05, + "loss": 2.2976, + "step": 61540 + }, + { + "epoch": 8.31, + "learning_rate": 2.0629850043957626e-05, + "loss": 2.2842, + "step": 61550 + }, + { + "epoch": 8.31, + "learning_rate": 2.059764766920954e-05, + "loss": 2.2797, + "step": 61560 + }, + { + "epoch": 8.32, + "learning_rate": 2.0565468594466595e-05, + "loss": 2.2791, + "step": 61570 + }, + { + "epoch": 8.32, + "learning_rate": 2.0533312825522842e-05, + "loss": 2.2667, + "step": 61580 + }, + { + "epoch": 8.32, + "learning_rate": 2.0501180368168313e-05, + "loss": 2.2845, + "step": 61590 + }, + { + "epoch": 8.32, + "learning_rate": 2.046907122818865e-05, + "loss": 2.278, + "step": 61600 + }, + { + "epoch": 8.32, + "learning_rate": 2.0436985411365427e-05, + "loss": 2.2807, + "step": 61610 + }, + { + "epoch": 8.32, + "learning_rate": 2.0404922923475935e-05, + "loss": 2.2776, + "step": 61620 + }, + { + "epoch": 8.32, + "learning_rate": 2.0372883770293297e-05, + "loss": 2.299, + "step": 61630 + }, + { + "epoch": 8.33, + "learning_rate": 2.0340867957586478e-05, + "loss": 2.2773, + "step": 61640 + }, + { + "epoch": 8.33, + "learning_rate": 2.0308875491120165e-05, + "loss": 2.3009, + "step": 61650 + }, + { + "epoch": 8.33, + "learning_rate": 2.02769063766549e-05, + "loss": 2.2633, + "step": 61660 + }, + { + "epoch": 8.33, + "learning_rate": 2.0244960619946966e-05, + "loss": 2.2805, + "step": 61670 + }, + { + "epoch": 8.33, + "learning_rate": 2.0213038226748506e-05, + "loss": 2.298, + "step": 61680 + }, + { + "epoch": 8.33, + "learning_rate": 2.018113920280741e-05, + "loss": 2.2873, + "step": 61690 + }, + { + "epoch": 8.33, + "learning_rate": 2.014926355386737e-05, + "loss": 2.2981, + "step": 61700 + }, + { + "epoch": 8.33, + "learning_rate": 2.0117411285667867e-05, + "loss": 2.2564, + "step": 61710 + }, + { + "epoch": 8.34, + "learning_rate": 2.0085582403944138e-05, + "loss": 2.2906, + "step": 61720 + }, + { + "epoch": 8.34, + "learning_rate": 2.0053776914427315e-05, + "loss": 2.265, + "step": 61730 + }, + { + "epoch": 8.34, + "learning_rate": 2.0021994822844217e-05, + "loss": 2.2789, + "step": 61740 + }, + { + "epoch": 8.34, + "learning_rate": 1.9990236134917497e-05, + "loss": 2.2742, + "step": 61750 + }, + { + "epoch": 8.34, + "learning_rate": 1.9958500856365535e-05, + "loss": 2.2842, + "step": 61760 + }, + { + "epoch": 8.34, + "learning_rate": 1.9926788992902586e-05, + "loss": 2.2905, + "step": 61770 + }, + { + "epoch": 8.34, + "learning_rate": 1.9895100550238642e-05, + "loss": 2.2777, + "step": 61780 + }, + { + "epoch": 8.35, + "learning_rate": 1.9863435534079463e-05, + "loss": 2.287, + "step": 61790 + }, + { + "epoch": 8.35, + "learning_rate": 1.9831793950126628e-05, + "loss": 2.2573, + "step": 61800 + }, + { + "epoch": 8.35, + "learning_rate": 1.980017580407741e-05, + "loss": 2.2612, + "step": 61810 + }, + { + "epoch": 8.35, + "learning_rate": 1.9768581101625008e-05, + "loss": 2.3016, + "step": 61820 + }, + { + "epoch": 8.35, + "learning_rate": 1.9737009848458303e-05, + "loss": 2.3136, + "step": 61830 + }, + { + "epoch": 8.35, + "learning_rate": 1.9705462050261945e-05, + "loss": 2.2676, + "step": 61840 + }, + { + "epoch": 8.35, + "learning_rate": 1.9673937712716374e-05, + "loss": 2.2641, + "step": 61850 + }, + { + "epoch": 8.35, + "learning_rate": 1.9642436841497857e-05, + "loss": 2.2677, + "step": 61860 + }, + { + "epoch": 8.36, + "learning_rate": 1.9610959442278396e-05, + "loss": 2.2945, + "step": 61870 + }, + { + "epoch": 8.36, + "learning_rate": 1.9579505520725747e-05, + "loss": 2.2853, + "step": 61880 + }, + { + "epoch": 8.36, + "learning_rate": 1.954807508250346e-05, + "loss": 2.2961, + "step": 61890 + }, + { + "epoch": 8.36, + "learning_rate": 1.951666813327083e-05, + "loss": 2.2755, + "step": 61900 + }, + { + "epoch": 8.36, + "learning_rate": 1.9485284678683012e-05, + "loss": 2.2876, + "step": 61910 + }, + { + "epoch": 8.36, + "learning_rate": 1.9453924724390785e-05, + "loss": 2.2632, + "step": 61920 + }, + { + "epoch": 8.36, + "learning_rate": 1.9422588276040902e-05, + "loss": 2.2581, + "step": 61930 + }, + { + "epoch": 8.37, + "learning_rate": 1.9391275339275615e-05, + "loss": 2.2938, + "step": 61940 + }, + { + "epoch": 8.37, + "learning_rate": 1.935998591973318e-05, + "loss": 2.2856, + "step": 61950 + }, + { + "epoch": 8.37, + "learning_rate": 1.9328720023047507e-05, + "loss": 2.2981, + "step": 61960 + }, + { + "epoch": 8.37, + "learning_rate": 1.9297477654848293e-05, + "loss": 2.2867, + "step": 61970 + }, + { + "epoch": 8.37, + "learning_rate": 1.926625882076097e-05, + "loss": 2.2768, + "step": 61980 + }, + { + "epoch": 8.37, + "learning_rate": 1.9235063526406745e-05, + "loss": 2.2806, + "step": 61990 + }, + { + "epoch": 8.37, + "learning_rate": 1.920389177740264e-05, + "loss": 2.2746, + "step": 62000 + }, + { + "epoch": 8.37, + "eval_loss": 2.4299051761627197, + "eval_runtime": 1269.0207, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 62000 + }, + { + "epoch": 8.38, + "learning_rate": 1.9172743579361365e-05, + "loss": 2.2811, + "step": 62010 + }, + { + "epoch": 8.38, + "learning_rate": 1.91416189378915e-05, + "loss": 2.2749, + "step": 62020 + }, + { + "epoch": 8.38, + "learning_rate": 1.911051785859718e-05, + "loss": 2.2429, + "step": 62030 + }, + { + "epoch": 8.38, + "learning_rate": 1.907944034707852e-05, + "loss": 2.2934, + "step": 62040 + }, + { + "epoch": 8.38, + "learning_rate": 1.9048386408931238e-05, + "loss": 2.2662, + "step": 62050 + }, + { + "epoch": 8.38, + "learning_rate": 1.9017356049746867e-05, + "loss": 2.2893, + "step": 62060 + }, + { + "epoch": 8.38, + "learning_rate": 1.8986349275112746e-05, + "loss": 2.3018, + "step": 62070 + }, + { + "epoch": 8.38, + "learning_rate": 1.895536609061182e-05, + "loss": 2.2645, + "step": 62080 + }, + { + "epoch": 8.39, + "learning_rate": 1.892440650182296e-05, + "loss": 2.2715, + "step": 62090 + }, + { + "epoch": 8.39, + "learning_rate": 1.8893470514320624e-05, + "loss": 2.2491, + "step": 62100 + }, + { + "epoch": 8.39, + "learning_rate": 1.8862558133675175e-05, + "loss": 2.2795, + "step": 62110 + }, + { + "epoch": 8.39, + "learning_rate": 1.8831669365452646e-05, + "loss": 2.2328, + "step": 62120 + }, + { + "epoch": 8.39, + "learning_rate": 1.8800804215214798e-05, + "loss": 2.2655, + "step": 62130 + }, + { + "epoch": 8.39, + "learning_rate": 1.8769962688519175e-05, + "loss": 2.2631, + "step": 62140 + }, + { + "epoch": 8.39, + "learning_rate": 1.873914479091903e-05, + "loss": 2.3048, + "step": 62150 + }, + { + "epoch": 8.4, + "learning_rate": 1.8708350527963478e-05, + "loss": 2.2834, + "step": 62160 + }, + { + "epoch": 8.4, + "learning_rate": 1.8677579905197183e-05, + "loss": 2.2881, + "step": 62170 + }, + { + "epoch": 8.4, + "learning_rate": 1.8646832928160726e-05, + "loss": 2.2884, + "step": 62180 + }, + { + "epoch": 8.4, + "learning_rate": 1.8616109602390317e-05, + "loss": 2.2844, + "step": 62190 + }, + { + "epoch": 8.4, + "learning_rate": 1.858540993341801e-05, + "loss": 2.2877, + "step": 62200 + }, + { + "epoch": 8.4, + "learning_rate": 1.8554733926771552e-05, + "loss": 2.2808, + "step": 62210 + }, + { + "epoch": 8.4, + "learning_rate": 1.8524081587974326e-05, + "loss": 2.2845, + "step": 62220 + }, + { + "epoch": 8.4, + "learning_rate": 1.8493452922545647e-05, + "loss": 2.2897, + "step": 62230 + }, + { + "epoch": 8.41, + "learning_rate": 1.846284793600039e-05, + "loss": 2.2766, + "step": 62240 + }, + { + "epoch": 8.41, + "learning_rate": 1.843226663384933e-05, + "loss": 2.289, + "step": 62250 + }, + { + "epoch": 8.41, + "learning_rate": 1.8401709021598844e-05, + "loss": 2.2796, + "step": 62260 + }, + { + "epoch": 8.41, + "learning_rate": 1.8371175104751114e-05, + "loss": 2.2715, + "step": 62270 + }, + { + "epoch": 8.41, + "learning_rate": 1.8340664888803974e-05, + "loss": 2.2957, + "step": 62280 + }, + { + "epoch": 8.41, + "learning_rate": 1.8310178379251134e-05, + "loss": 2.2833, + "step": 62290 + }, + { + "epoch": 8.41, + "learning_rate": 1.827971558158195e-05, + "loss": 2.2884, + "step": 62300 + }, + { + "epoch": 8.42, + "learning_rate": 1.824927650128142e-05, + "loss": 2.2699, + "step": 62310 + }, + { + "epoch": 8.42, + "learning_rate": 1.821886114383044e-05, + "loss": 2.3088, + "step": 62320 + }, + { + "epoch": 8.42, + "learning_rate": 1.8188469514705495e-05, + "loss": 2.281, + "step": 62330 + }, + { + "epoch": 8.42, + "learning_rate": 1.815810161937893e-05, + "loss": 2.2912, + "step": 62340 + }, + { + "epoch": 8.42, + "learning_rate": 1.8127757463318716e-05, + "loss": 2.2623, + "step": 62350 + }, + { + "epoch": 8.42, + "learning_rate": 1.8097437051988584e-05, + "loss": 2.2628, + "step": 62360 + }, + { + "epoch": 8.42, + "learning_rate": 1.8067140390847935e-05, + "loss": 2.2404, + "step": 62370 + }, + { + "epoch": 8.43, + "learning_rate": 1.803686748535202e-05, + "loss": 2.3156, + "step": 62380 + }, + { + "epoch": 8.43, + "learning_rate": 1.8006618340951694e-05, + "loss": 2.2653, + "step": 62390 + }, + { + "epoch": 8.43, + "learning_rate": 1.797639296309359e-05, + "loss": 2.2976, + "step": 62400 + }, + { + "epoch": 8.43, + "learning_rate": 1.794619135722003e-05, + "loss": 2.2754, + "step": 62410 + }, + { + "epoch": 8.43, + "learning_rate": 1.7916013528769053e-05, + "loss": 2.2638, + "step": 62420 + }, + { + "epoch": 8.43, + "learning_rate": 1.7885859483174507e-05, + "loss": 2.2594, + "step": 62430 + }, + { + "epoch": 8.43, + "learning_rate": 1.7855729225865823e-05, + "loss": 2.2798, + "step": 62440 + }, + { + "epoch": 8.43, + "learning_rate": 1.7825622762268254e-05, + "loss": 2.2801, + "step": 62450 + }, + { + "epoch": 8.44, + "learning_rate": 1.7795540097802668e-05, + "loss": 2.2868, + "step": 62460 + }, + { + "epoch": 8.44, + "learning_rate": 1.7765481237885764e-05, + "loss": 2.2868, + "step": 62470 + }, + { + "epoch": 8.44, + "learning_rate": 1.7735446187929898e-05, + "loss": 2.2753, + "step": 62480 + }, + { + "epoch": 8.44, + "learning_rate": 1.7705434953343122e-05, + "loss": 2.259, + "step": 62490 + }, + { + "epoch": 8.44, + "learning_rate": 1.76754475395292e-05, + "loss": 2.267, + "step": 62500 + }, + { + "epoch": 8.44, + "learning_rate": 1.764548395188762e-05, + "loss": 2.2693, + "step": 62510 + }, + { + "epoch": 8.44, + "learning_rate": 1.7615544195813636e-05, + "loss": 2.2592, + "step": 62520 + }, + { + "epoch": 8.45, + "learning_rate": 1.758562827669811e-05, + "loss": 2.2892, + "step": 62530 + }, + { + "epoch": 8.45, + "learning_rate": 1.7555736199927684e-05, + "loss": 2.2727, + "step": 62540 + }, + { + "epoch": 8.45, + "learning_rate": 1.752586797088465e-05, + "loss": 2.2869, + "step": 62550 + }, + { + "epoch": 8.45, + "learning_rate": 1.749602359494709e-05, + "loss": 2.2947, + "step": 62560 + }, + { + "epoch": 8.45, + "learning_rate": 1.7466203077488706e-05, + "loss": 2.2713, + "step": 62570 + }, + { + "epoch": 8.45, + "learning_rate": 1.743640642387895e-05, + "loss": 2.2738, + "step": 62580 + }, + { + "epoch": 8.45, + "learning_rate": 1.7406633639482954e-05, + "loss": 2.3112, + "step": 62590 + }, + { + "epoch": 8.45, + "learning_rate": 1.7376884729661556e-05, + "loss": 2.2771, + "step": 62600 + }, + { + "epoch": 8.46, + "learning_rate": 1.734715969977134e-05, + "loss": 2.2967, + "step": 62610 + }, + { + "epoch": 8.46, + "learning_rate": 1.7317458555164526e-05, + "loss": 2.2596, + "step": 62620 + }, + { + "epoch": 8.46, + "learning_rate": 1.728778130118907e-05, + "loss": 2.3161, + "step": 62630 + }, + { + "epoch": 8.46, + "learning_rate": 1.7258127943188575e-05, + "loss": 2.2729, + "step": 62640 + }, + { + "epoch": 8.46, + "learning_rate": 1.722849848650246e-05, + "loss": 2.2861, + "step": 62650 + }, + { + "epoch": 8.46, + "learning_rate": 1.7198892936465725e-05, + "loss": 2.2826, + "step": 62660 + }, + { + "epoch": 8.46, + "learning_rate": 1.7169311298409082e-05, + "loss": 2.2829, + "step": 62670 + }, + { + "epoch": 8.47, + "learning_rate": 1.7139753577658994e-05, + "loss": 2.2992, + "step": 62680 + }, + { + "epoch": 8.47, + "learning_rate": 1.7110219779537533e-05, + "loss": 2.2886, + "step": 62690 + }, + { + "epoch": 8.47, + "learning_rate": 1.7080709909362577e-05, + "loss": 2.2754, + "step": 62700 + }, + { + "epoch": 8.47, + "learning_rate": 1.7051223972447596e-05, + "loss": 2.2973, + "step": 62710 + }, + { + "epoch": 8.47, + "learning_rate": 1.7021761974101794e-05, + "loss": 2.2816, + "step": 62720 + }, + { + "epoch": 8.47, + "learning_rate": 1.6992323919630057e-05, + "loss": 2.2888, + "step": 62730 + }, + { + "epoch": 8.47, + "learning_rate": 1.696290981433293e-05, + "loss": 2.2714, + "step": 62740 + }, + { + "epoch": 8.47, + "learning_rate": 1.6933519663506727e-05, + "loss": 2.3208, + "step": 62750 + }, + { + "epoch": 8.48, + "learning_rate": 1.6904153472443365e-05, + "loss": 2.2687, + "step": 62760 + }, + { + "epoch": 8.48, + "learning_rate": 1.6874811246430497e-05, + "loss": 2.2654, + "step": 62770 + }, + { + "epoch": 8.48, + "learning_rate": 1.6845492990751397e-05, + "loss": 2.2864, + "step": 62780 + }, + { + "epoch": 8.48, + "learning_rate": 1.681619871068514e-05, + "loss": 2.2779, + "step": 62790 + }, + { + "epoch": 8.48, + "learning_rate": 1.678692841150638e-05, + "loss": 2.2802, + "step": 62800 + }, + { + "epoch": 8.48, + "learning_rate": 1.6757682098485474e-05, + "loss": 2.3053, + "step": 62810 + }, + { + "epoch": 8.48, + "learning_rate": 1.672845977688847e-05, + "loss": 2.2806, + "step": 62820 + }, + { + "epoch": 8.49, + "learning_rate": 1.6699261451977104e-05, + "loss": 2.2745, + "step": 62830 + }, + { + "epoch": 8.49, + "learning_rate": 1.6670087129008802e-05, + "loss": 2.3039, + "step": 62840 + }, + { + "epoch": 8.49, + "learning_rate": 1.6640936813236648e-05, + "loss": 2.2992, + "step": 62850 + }, + { + "epoch": 8.49, + "learning_rate": 1.6611810509909396e-05, + "loss": 2.2998, + "step": 62860 + }, + { + "epoch": 8.49, + "learning_rate": 1.6582708224271446e-05, + "loss": 2.2618, + "step": 62870 + }, + { + "epoch": 8.49, + "learning_rate": 1.6553629961562992e-05, + "loss": 2.2724, + "step": 62880 + }, + { + "epoch": 8.49, + "learning_rate": 1.652457572701978e-05, + "loss": 2.2947, + "step": 62890 + }, + { + "epoch": 8.5, + "learning_rate": 1.6495545525873322e-05, + "loss": 2.2803, + "step": 62900 + }, + { + "epoch": 8.5, + "learning_rate": 1.6466539363350706e-05, + "loss": 2.3, + "step": 62910 + }, + { + "epoch": 8.5, + "learning_rate": 1.6437557244674705e-05, + "loss": 2.2713, + "step": 62920 + }, + { + "epoch": 8.5, + "learning_rate": 1.640859917506389e-05, + "loss": 2.2815, + "step": 62930 + }, + { + "epoch": 8.5, + "learning_rate": 1.637966515973233e-05, + "loss": 2.2895, + "step": 62940 + }, + { + "epoch": 8.5, + "learning_rate": 1.6350755203889954e-05, + "loss": 2.2591, + "step": 62950 + }, + { + "epoch": 8.5, + "learning_rate": 1.6321869312742097e-05, + "loss": 2.2809, + "step": 62960 + }, + { + "epoch": 8.5, + "learning_rate": 1.6293007491490018e-05, + "loss": 2.302, + "step": 62970 + }, + { + "epoch": 8.51, + "learning_rate": 1.6264169745330475e-05, + "loss": 2.3023, + "step": 62980 + }, + { + "epoch": 8.51, + "learning_rate": 1.6235356079456048e-05, + "loss": 2.2526, + "step": 62990 + }, + { + "epoch": 8.51, + "learning_rate": 1.6206566499054803e-05, + "loss": 2.2666, + "step": 63000 + }, + { + "epoch": 8.51, + "eval_loss": 2.427412986755371, + "eval_runtime": 1269.0254, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 63000 + }, + { + "epoch": 8.51, + "learning_rate": 1.6177801009310527e-05, + "loss": 2.2902, + "step": 63010 + }, + { + "epoch": 8.51, + "learning_rate": 1.614905961540276e-05, + "loss": 2.2971, + "step": 63020 + }, + { + "epoch": 8.51, + "learning_rate": 1.612034232250658e-05, + "loss": 2.277, + "step": 63030 + }, + { + "epoch": 8.51, + "learning_rate": 1.6091649135792884e-05, + "loss": 2.2956, + "step": 63040 + }, + { + "epoch": 8.52, + "learning_rate": 1.6062980060427988e-05, + "loss": 2.2661, + "step": 63050 + }, + { + "epoch": 8.52, + "learning_rate": 1.6034335101574113e-05, + "loss": 2.2718, + "step": 63060 + }, + { + "epoch": 8.52, + "learning_rate": 1.600571426438895e-05, + "loss": 2.2972, + "step": 63070 + }, + { + "epoch": 8.52, + "learning_rate": 1.5977117554026e-05, + "loss": 2.2645, + "step": 63080 + }, + { + "epoch": 8.52, + "learning_rate": 1.5948544975634346e-05, + "loss": 2.277, + "step": 63090 + }, + { + "epoch": 8.52, + "learning_rate": 1.5919996534358635e-05, + "loss": 2.3066, + "step": 63100 + }, + { + "epoch": 8.52, + "learning_rate": 1.589147223533935e-05, + "loss": 2.2673, + "step": 63110 + }, + { + "epoch": 8.52, + "learning_rate": 1.5862972083712473e-05, + "loss": 2.2743, + "step": 63120 + }, + { + "epoch": 8.53, + "learning_rate": 1.5834496084609787e-05, + "loss": 2.2895, + "step": 63130 + }, + { + "epoch": 8.53, + "learning_rate": 1.5806044243158532e-05, + "loss": 2.2617, + "step": 63140 + }, + { + "epoch": 8.53, + "learning_rate": 1.5777616564481794e-05, + "loss": 2.2837, + "step": 63150 + }, + { + "epoch": 8.53, + "learning_rate": 1.5749213053698156e-05, + "loss": 2.267, + "step": 63160 + }, + { + "epoch": 8.53, + "learning_rate": 1.5720833715921965e-05, + "loss": 2.2818, + "step": 63170 + }, + { + "epoch": 8.53, + "learning_rate": 1.569247855626318e-05, + "loss": 2.2688, + "step": 63180 + }, + { + "epoch": 8.53, + "learning_rate": 1.566414757982729e-05, + "loss": 2.2786, + "step": 63190 + }, + { + "epoch": 8.54, + "learning_rate": 1.563584079171564e-05, + "loss": 2.2873, + "step": 63200 + }, + { + "epoch": 8.54, + "learning_rate": 1.5607558197025028e-05, + "loss": 2.2677, + "step": 63210 + }, + { + "epoch": 8.54, + "learning_rate": 1.5579299800848038e-05, + "loss": 2.269, + "step": 63220 + }, + { + "epoch": 8.54, + "learning_rate": 1.5551065608272823e-05, + "loss": 2.2796, + "step": 63230 + }, + { + "epoch": 8.54, + "learning_rate": 1.5522855624383195e-05, + "loss": 2.2842, + "step": 63240 + }, + { + "epoch": 8.54, + "learning_rate": 1.549466985425858e-05, + "loss": 2.2856, + "step": 63250 + }, + { + "epoch": 8.54, + "learning_rate": 1.5466508302974063e-05, + "loss": 2.2495, + "step": 63260 + }, + { + "epoch": 8.55, + "learning_rate": 1.543837097560044e-05, + "loss": 2.2996, + "step": 63270 + }, + { + "epoch": 8.55, + "learning_rate": 1.5410257877203976e-05, + "loss": 2.2769, + "step": 63280 + }, + { + "epoch": 8.55, + "learning_rate": 1.538216901284675e-05, + "loss": 2.2873, + "step": 63290 + }, + { + "epoch": 8.55, + "learning_rate": 1.5354104387586373e-05, + "loss": 2.3165, + "step": 63300 + }, + { + "epoch": 8.55, + "learning_rate": 1.5326064006476153e-05, + "loss": 2.2599, + "step": 63310 + }, + { + "epoch": 8.55, + "learning_rate": 1.5298047874564977e-05, + "loss": 2.3048, + "step": 63320 + }, + { + "epoch": 8.55, + "learning_rate": 1.5270055996897384e-05, + "loss": 2.2763, + "step": 63330 + }, + { + "epoch": 8.55, + "learning_rate": 1.5242088378513572e-05, + "loss": 2.2756, + "step": 63340 + }, + { + "epoch": 8.56, + "learning_rate": 1.5214145024449315e-05, + "loss": 2.2992, + "step": 63350 + }, + { + "epoch": 8.56, + "learning_rate": 1.5186225939736118e-05, + "loss": 2.2985, + "step": 63360 + }, + { + "epoch": 8.56, + "learning_rate": 1.5158331129400996e-05, + "loss": 2.3101, + "step": 63370 + }, + { + "epoch": 8.56, + "learning_rate": 1.5130460598466681e-05, + "loss": 2.2582, + "step": 63380 + }, + { + "epoch": 8.56, + "learning_rate": 1.5102614351951443e-05, + "loss": 2.2992, + "step": 63390 + }, + { + "epoch": 8.56, + "learning_rate": 1.5074792394869335e-05, + "loss": 2.2854, + "step": 63400 + }, + { + "epoch": 8.56, + "learning_rate": 1.5046994732229866e-05, + "loss": 2.2806, + "step": 63410 + }, + { + "epoch": 8.57, + "learning_rate": 1.501922136903827e-05, + "loss": 2.2873, + "step": 63420 + }, + { + "epoch": 8.57, + "learning_rate": 1.4991472310295377e-05, + "loss": 2.2736, + "step": 63430 + }, + { + "epoch": 8.57, + "learning_rate": 1.4963747560997608e-05, + "loss": 2.3065, + "step": 63440 + }, + { + "epoch": 8.57, + "learning_rate": 1.4936047126137101e-05, + "loss": 2.27, + "step": 63450 + }, + { + "epoch": 8.57, + "learning_rate": 1.4908371010701536e-05, + "loss": 2.2951, + "step": 63460 + }, + { + "epoch": 8.57, + "learning_rate": 1.4880719219674243e-05, + "loss": 2.2653, + "step": 63470 + }, + { + "epoch": 8.57, + "learning_rate": 1.4853091758034102e-05, + "loss": 2.2834, + "step": 63480 + }, + { + "epoch": 8.57, + "learning_rate": 1.4825488630755772e-05, + "loss": 2.2953, + "step": 63490 + }, + { + "epoch": 8.58, + "learning_rate": 1.4797909842809374e-05, + "loss": 2.3167, + "step": 63500 + }, + { + "epoch": 8.58, + "learning_rate": 1.4770355399160721e-05, + "loss": 2.2873, + "step": 63510 + }, + { + "epoch": 8.58, + "learning_rate": 1.4742825304771227e-05, + "loss": 2.2828, + "step": 63520 + }, + { + "epoch": 8.58, + "learning_rate": 1.4715319564597893e-05, + "loss": 2.286, + "step": 63530 + }, + { + "epoch": 8.58, + "learning_rate": 1.468783818359341e-05, + "loss": 2.268, + "step": 63540 + }, + { + "epoch": 8.58, + "learning_rate": 1.466038116670603e-05, + "loss": 2.2843, + "step": 63550 + }, + { + "epoch": 8.58, + "learning_rate": 1.463294851887959e-05, + "loss": 2.2516, + "step": 63560 + }, + { + "epoch": 8.59, + "learning_rate": 1.4605540245053581e-05, + "loss": 2.285, + "step": 63570 + }, + { + "epoch": 8.59, + "learning_rate": 1.4578156350163144e-05, + "loss": 2.2638, + "step": 63580 + }, + { + "epoch": 8.59, + "learning_rate": 1.4550796839138951e-05, + "loss": 2.2527, + "step": 63590 + }, + { + "epoch": 8.59, + "learning_rate": 1.4523461716907308e-05, + "loss": 2.2711, + "step": 63600 + }, + { + "epoch": 8.59, + "learning_rate": 1.4496150988390164e-05, + "loss": 2.2791, + "step": 63610 + }, + { + "epoch": 8.59, + "learning_rate": 1.4468864658505004e-05, + "loss": 2.3034, + "step": 63620 + }, + { + "epoch": 8.59, + "learning_rate": 1.4441602732165008e-05, + "loss": 2.2935, + "step": 63630 + }, + { + "epoch": 8.6, + "learning_rate": 1.4414365214278922e-05, + "loss": 2.3195, + "step": 63640 + }, + { + "epoch": 8.6, + "learning_rate": 1.4387152109751088e-05, + "loss": 2.2611, + "step": 63650 + }, + { + "epoch": 8.6, + "learning_rate": 1.4359963423481435e-05, + "loss": 2.2949, + "step": 63660 + }, + { + "epoch": 8.6, + "learning_rate": 1.433279916036555e-05, + "loss": 2.2934, + "step": 63670 + }, + { + "epoch": 8.6, + "learning_rate": 1.4305659325294583e-05, + "loss": 2.2653, + "step": 63680 + }, + { + "epoch": 8.6, + "learning_rate": 1.4278543923155295e-05, + "loss": 2.2755, + "step": 63690 + }, + { + "epoch": 8.6, + "learning_rate": 1.4251452958830045e-05, + "loss": 2.2872, + "step": 63700 + }, + { + "epoch": 8.6, + "learning_rate": 1.4224386437196766e-05, + "loss": 2.2571, + "step": 63710 + }, + { + "epoch": 8.61, + "learning_rate": 1.4197344363129076e-05, + "loss": 2.2757, + "step": 63720 + }, + { + "epoch": 8.61, + "learning_rate": 1.4170326741496096e-05, + "loss": 2.2808, + "step": 63730 + }, + { + "epoch": 8.61, + "learning_rate": 1.4143333577162586e-05, + "loss": 2.2571, + "step": 63740 + }, + { + "epoch": 8.61, + "learning_rate": 1.4116364874988855e-05, + "loss": 2.2572, + "step": 63750 + }, + { + "epoch": 8.61, + "learning_rate": 1.408942063983094e-05, + "loss": 2.2562, + "step": 63760 + }, + { + "epoch": 8.61, + "learning_rate": 1.4062500876540339e-05, + "loss": 2.2908, + "step": 63770 + }, + { + "epoch": 8.61, + "learning_rate": 1.403560558996416e-05, + "loss": 2.2855, + "step": 63780 + }, + { + "epoch": 8.62, + "learning_rate": 1.4008734784945164e-05, + "loss": 2.2775, + "step": 63790 + }, + { + "epoch": 8.62, + "learning_rate": 1.3981888466321629e-05, + "loss": 2.2717, + "step": 63800 + }, + { + "epoch": 8.62, + "learning_rate": 1.395506663892752e-05, + "loss": 2.2655, + "step": 63810 + }, + { + "epoch": 8.62, + "learning_rate": 1.3928269307592327e-05, + "loss": 2.317, + "step": 63820 + }, + { + "epoch": 8.62, + "learning_rate": 1.3901496477141106e-05, + "loss": 2.2426, + "step": 63830 + }, + { + "epoch": 8.62, + "learning_rate": 1.3874748152394533e-05, + "loss": 2.2666, + "step": 63840 + }, + { + "epoch": 8.62, + "learning_rate": 1.3848024338168923e-05, + "loss": 2.2785, + "step": 63850 + }, + { + "epoch": 8.62, + "learning_rate": 1.3821325039276094e-05, + "loss": 2.2828, + "step": 63860 + }, + { + "epoch": 8.63, + "learning_rate": 1.37946502605235e-05, + "loss": 2.2498, + "step": 63870 + }, + { + "epoch": 8.63, + "learning_rate": 1.3768000006714147e-05, + "loss": 2.2693, + "step": 63880 + }, + { + "epoch": 8.63, + "learning_rate": 1.3741374282646616e-05, + "loss": 2.3123, + "step": 63890 + }, + { + "epoch": 8.63, + "learning_rate": 1.3714773093115156e-05, + "loss": 2.2944, + "step": 63900 + }, + { + "epoch": 8.63, + "learning_rate": 1.36881964429095e-05, + "loss": 2.2677, + "step": 63910 + }, + { + "epoch": 8.63, + "learning_rate": 1.3661644336815069e-05, + "loss": 2.2872, + "step": 63920 + }, + { + "epoch": 8.63, + "learning_rate": 1.3635116779612675e-05, + "loss": 2.2669, + "step": 63930 + }, + { + "epoch": 8.64, + "learning_rate": 1.3608613776078946e-05, + "loss": 2.3057, + "step": 63940 + }, + { + "epoch": 8.64, + "learning_rate": 1.3582135330985933e-05, + "loss": 2.2931, + "step": 63950 + }, + { + "epoch": 8.64, + "learning_rate": 1.3555681449101301e-05, + "loss": 2.2718, + "step": 63960 + }, + { + "epoch": 8.64, + "learning_rate": 1.3529252135188296e-05, + "loss": 2.262, + "step": 63970 + }, + { + "epoch": 8.64, + "learning_rate": 1.350284739400574e-05, + "loss": 2.2919, + "step": 63980 + }, + { + "epoch": 8.64, + "learning_rate": 1.3476467230308047e-05, + "loss": 2.269, + "step": 63990 + }, + { + "epoch": 8.64, + "learning_rate": 1.3450111648845185e-05, + "loss": 2.2876, + "step": 64000 + }, + { + "epoch": 8.64, + "eval_loss": 2.4255166053771973, + "eval_runtime": 1269.0542, + "eval_samples_per_second": 59.993, + "eval_steps_per_second": 5.0, + "step": 64000 + }, + { + "epoch": 8.65, + "learning_rate": 1.342378065436276e-05, + "loss": 2.292, + "step": 64010 + }, + { + "epoch": 8.65, + "learning_rate": 1.3397474251601774e-05, + "loss": 2.2803, + "step": 64020 + }, + { + "epoch": 8.65, + "learning_rate": 1.3371192445299028e-05, + "loss": 2.2966, + "step": 64030 + }, + { + "epoch": 8.65, + "learning_rate": 1.3344935240186733e-05, + "loss": 2.2961, + "step": 64040 + }, + { + "epoch": 8.65, + "learning_rate": 1.3318702640992724e-05, + "loss": 2.2714, + "step": 64050 + }, + { + "epoch": 8.65, + "learning_rate": 1.3292494652440472e-05, + "loss": 2.2835, + "step": 64060 + }, + { + "epoch": 8.65, + "learning_rate": 1.3266311279248853e-05, + "loss": 2.286, + "step": 64070 + }, + { + "epoch": 8.65, + "learning_rate": 1.3240152526132463e-05, + "loss": 2.256, + "step": 64080 + }, + { + "epoch": 8.66, + "learning_rate": 1.3214018397801385e-05, + "loss": 2.2812, + "step": 64090 + }, + { + "epoch": 8.66, + "learning_rate": 1.3187908898961352e-05, + "loss": 2.2926, + "step": 64100 + }, + { + "epoch": 8.66, + "learning_rate": 1.3161824034313522e-05, + "loss": 2.2627, + "step": 64110 + }, + { + "epoch": 8.66, + "learning_rate": 1.3135763808554755e-05, + "loss": 2.2695, + "step": 64120 + }, + { + "epoch": 8.66, + "learning_rate": 1.3109728226377415e-05, + "loss": 2.2766, + "step": 64130 + }, + { + "epoch": 8.66, + "learning_rate": 1.3083717292469382e-05, + "loss": 2.2909, + "step": 64140 + }, + { + "epoch": 8.66, + "learning_rate": 1.3057731011514227e-05, + "loss": 2.2592, + "step": 64150 + }, + { + "epoch": 8.67, + "learning_rate": 1.3031769388190927e-05, + "loss": 2.254, + "step": 64160 + }, + { + "epoch": 8.67, + "learning_rate": 1.300583242717414e-05, + "loss": 2.2959, + "step": 64170 + }, + { + "epoch": 8.67, + "learning_rate": 1.2979920133134013e-05, + "loss": 2.2831, + "step": 64180 + }, + { + "epoch": 8.67, + "learning_rate": 1.2954032510736318e-05, + "loss": 2.2567, + "step": 64190 + }, + { + "epoch": 8.67, + "learning_rate": 1.2928169564642338e-05, + "loss": 2.285, + "step": 64200 + }, + { + "epoch": 8.67, + "learning_rate": 1.290233129950885e-05, + "loss": 2.2854, + "step": 64210 + }, + { + "epoch": 8.67, + "learning_rate": 1.2876517719988333e-05, + "loss": 2.3168, + "step": 64220 + }, + { + "epoch": 8.67, + "learning_rate": 1.2850728830728668e-05, + "loss": 2.2701, + "step": 64230 + }, + { + "epoch": 8.68, + "learning_rate": 1.2824964636373492e-05, + "loss": 2.2567, + "step": 64240 + }, + { + "epoch": 8.68, + "learning_rate": 1.2799225141561726e-05, + "loss": 2.2829, + "step": 64250 + }, + { + "epoch": 8.68, + "learning_rate": 1.2773510350928079e-05, + "loss": 2.2806, + "step": 64260 + }, + { + "epoch": 8.68, + "learning_rate": 1.2747820269102665e-05, + "loss": 2.2743, + "step": 64270 + }, + { + "epoch": 8.68, + "learning_rate": 1.272215490071125e-05, + "loss": 2.3117, + "step": 64280 + }, + { + "epoch": 8.68, + "learning_rate": 1.2696514250375122e-05, + "loss": 2.2668, + "step": 64290 + }, + { + "epoch": 8.68, + "learning_rate": 1.2670898322711003e-05, + "loss": 2.2694, + "step": 64300 + }, + { + "epoch": 8.69, + "learning_rate": 1.2645307122331354e-05, + "loss": 2.2511, + "step": 64310 + }, + { + "epoch": 8.69, + "learning_rate": 1.261974065384404e-05, + "loss": 2.2613, + "step": 64320 + }, + { + "epoch": 8.69, + "learning_rate": 1.259419892185256e-05, + "loss": 2.276, + "step": 64330 + }, + { + "epoch": 8.69, + "learning_rate": 1.2568681930955888e-05, + "loss": 2.2722, + "step": 64340 + }, + { + "epoch": 8.69, + "learning_rate": 1.2543189685748611e-05, + "loss": 2.3039, + "step": 64350 + }, + { + "epoch": 8.69, + "learning_rate": 1.2517722190820777e-05, + "loss": 2.293, + "step": 64360 + }, + { + "epoch": 8.69, + "learning_rate": 1.2492279450758086e-05, + "loss": 2.2538, + "step": 64370 + }, + { + "epoch": 8.7, + "learning_rate": 1.2466861470141703e-05, + "loss": 2.293, + "step": 64380 + }, + { + "epoch": 8.7, + "learning_rate": 1.2441468253548337e-05, + "loss": 2.2807, + "step": 64390 + }, + { + "epoch": 8.7, + "learning_rate": 1.2416099805550261e-05, + "loss": 2.2693, + "step": 64400 + }, + { + "epoch": 8.7, + "learning_rate": 1.2390756130715257e-05, + "loss": 2.2907, + "step": 64410 + }, + { + "epoch": 8.7, + "learning_rate": 1.2365437233606723e-05, + "loss": 2.28, + "step": 64420 + }, + { + "epoch": 8.7, + "learning_rate": 1.234014311878353e-05, + "loss": 2.2629, + "step": 64430 + }, + { + "epoch": 8.7, + "learning_rate": 1.2314873790800084e-05, + "loss": 2.2919, + "step": 64440 + }, + { + "epoch": 8.7, + "learning_rate": 1.2289629254206313e-05, + "loss": 2.249, + "step": 64450 + }, + { + "epoch": 8.71, + "learning_rate": 1.2264409513547796e-05, + "loss": 2.2752, + "step": 64460 + }, + { + "epoch": 8.71, + "learning_rate": 1.223921457336552e-05, + "loss": 2.2788, + "step": 64470 + }, + { + "epoch": 8.71, + "learning_rate": 1.2214044438196036e-05, + "loss": 2.2908, + "step": 64480 + }, + { + "epoch": 8.71, + "learning_rate": 1.2188899112571455e-05, + "loss": 2.2839, + "step": 64490 + }, + { + "epoch": 8.71, + "learning_rate": 1.2163778601019385e-05, + "loss": 2.2606, + "step": 64500 + }, + { + "epoch": 8.71, + "learning_rate": 1.2138682908063047e-05, + "loss": 2.2914, + "step": 64510 + }, + { + "epoch": 8.71, + "learning_rate": 1.2113612038221105e-05, + "loss": 2.2536, + "step": 64520 + }, + { + "epoch": 8.72, + "learning_rate": 1.2088565996007764e-05, + "loss": 2.2732, + "step": 64530 + }, + { + "epoch": 8.72, + "learning_rate": 1.2063544785932789e-05, + "loss": 2.2746, + "step": 64540 + }, + { + "epoch": 8.72, + "learning_rate": 1.2038548412501503e-05, + "loss": 2.2831, + "step": 64550 + }, + { + "epoch": 8.72, + "learning_rate": 1.2013576880214676e-05, + "loss": 2.3174, + "step": 64560 + }, + { + "epoch": 8.72, + "learning_rate": 1.198863019356866e-05, + "loss": 2.2805, + "step": 64570 + }, + { + "epoch": 8.72, + "learning_rate": 1.196370835705533e-05, + "loss": 2.3062, + "step": 64580 + }, + { + "epoch": 8.72, + "learning_rate": 1.1938811375162027e-05, + "loss": 2.2877, + "step": 64590 + }, + { + "epoch": 8.72, + "learning_rate": 1.1913939252371735e-05, + "loss": 2.255, + "step": 64600 + }, + { + "epoch": 8.73, + "learning_rate": 1.188909199316287e-05, + "loss": 2.2826, + "step": 64610 + }, + { + "epoch": 8.73, + "learning_rate": 1.1864269602009385e-05, + "loss": 2.2846, + "step": 64620 + }, + { + "epoch": 8.73, + "learning_rate": 1.1839472083380742e-05, + "loss": 2.2832, + "step": 64630 + }, + { + "epoch": 8.73, + "learning_rate": 1.1814699441741998e-05, + "loss": 2.2613, + "step": 64640 + }, + { + "epoch": 8.73, + "learning_rate": 1.1789951681553655e-05, + "loss": 2.2787, + "step": 64650 + }, + { + "epoch": 8.73, + "learning_rate": 1.1765228807271749e-05, + "loss": 2.2943, + "step": 64660 + }, + { + "epoch": 8.73, + "learning_rate": 1.1740530823347866e-05, + "loss": 2.2558, + "step": 64670 + }, + { + "epoch": 8.74, + "learning_rate": 1.171585773422905e-05, + "loss": 2.3028, + "step": 64680 + }, + { + "epoch": 8.74, + "learning_rate": 1.1691209544357965e-05, + "loss": 2.2798, + "step": 64690 + }, + { + "epoch": 8.74, + "learning_rate": 1.1666586258172694e-05, + "loss": 2.2811, + "step": 64700 + }, + { + "epoch": 8.74, + "learning_rate": 1.164198788010689e-05, + "loss": 2.2955, + "step": 64710 + }, + { + "epoch": 8.74, + "learning_rate": 1.1617414414589676e-05, + "loss": 2.2789, + "step": 64720 + }, + { + "epoch": 8.74, + "learning_rate": 1.1592865866045697e-05, + "loss": 2.2768, + "step": 64730 + }, + { + "epoch": 8.74, + "learning_rate": 1.1568342238895184e-05, + "loss": 2.2719, + "step": 64740 + }, + { + "epoch": 8.75, + "learning_rate": 1.1543843537553804e-05, + "loss": 2.2734, + "step": 64750 + }, + { + "epoch": 8.75, + "learning_rate": 1.1519369766432762e-05, + "loss": 2.297, + "step": 64760 + }, + { + "epoch": 8.75, + "learning_rate": 1.1494920929938733e-05, + "loss": 2.2954, + "step": 64770 + }, + { + "epoch": 8.75, + "learning_rate": 1.1470497032474013e-05, + "loss": 2.279, + "step": 64780 + }, + { + "epoch": 8.75, + "learning_rate": 1.1446098078436283e-05, + "loss": 2.2633, + "step": 64790 + }, + { + "epoch": 8.75, + "learning_rate": 1.1421724072218797e-05, + "loss": 2.2575, + "step": 64800 + }, + { + "epoch": 8.75, + "learning_rate": 1.1397375018210292e-05, + "loss": 2.2838, + "step": 64810 + }, + { + "epoch": 8.75, + "learning_rate": 1.1373050920795029e-05, + "loss": 2.2937, + "step": 64820 + }, + { + "epoch": 8.76, + "learning_rate": 1.1348751784352788e-05, + "loss": 2.2422, + "step": 64830 + }, + { + "epoch": 8.76, + "learning_rate": 1.1324477613258837e-05, + "loss": 2.2794, + "step": 64840 + }, + { + "epoch": 8.76, + "learning_rate": 1.1300228411883927e-05, + "loss": 2.3019, + "step": 64850 + }, + { + "epoch": 8.76, + "learning_rate": 1.1276004184594333e-05, + "loss": 2.2891, + "step": 64860 + }, + { + "epoch": 8.76, + "learning_rate": 1.1251804935751868e-05, + "loss": 2.2586, + "step": 64870 + }, + { + "epoch": 8.76, + "learning_rate": 1.122763066971376e-05, + "loss": 2.2816, + "step": 64880 + }, + { + "epoch": 8.76, + "learning_rate": 1.1203481390832898e-05, + "loss": 2.2883, + "step": 64890 + }, + { + "epoch": 8.77, + "learning_rate": 1.1179357103457466e-05, + "loss": 2.2593, + "step": 64900 + }, + { + "epoch": 8.77, + "learning_rate": 1.115525781193124e-05, + "loss": 2.3009, + "step": 64910 + }, + { + "epoch": 8.77, + "learning_rate": 1.1131183520593584e-05, + "loss": 2.3058, + "step": 64920 + }, + { + "epoch": 8.77, + "learning_rate": 1.1107134233779247e-05, + "loss": 2.2622, + "step": 64930 + }, + { + "epoch": 8.77, + "learning_rate": 1.1083109955818497e-05, + "loss": 2.2731, + "step": 64940 + }, + { + "epoch": 8.77, + "learning_rate": 1.1059110691037093e-05, + "loss": 2.2951, + "step": 64950 + }, + { + "epoch": 8.77, + "learning_rate": 1.1035136443756359e-05, + "loss": 2.254, + "step": 64960 + }, + { + "epoch": 8.77, + "learning_rate": 1.1011187218293011e-05, + "loss": 2.2429, + "step": 64970 + }, + { + "epoch": 8.78, + "learning_rate": 1.0987263018959397e-05, + "loss": 2.2607, + "step": 64980 + }, + { + "epoch": 8.78, + "learning_rate": 1.0963363850063205e-05, + "loss": 2.2862, + "step": 64990 + }, + { + "epoch": 8.78, + "learning_rate": 1.0939489715907662e-05, + "loss": 2.2789, + "step": 65000 + }, + { + "epoch": 8.78, + "eval_loss": 2.423799991607666, + "eval_runtime": 1269.0127, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 5.0, + "step": 65000 + }, + { + "epoch": 8.78, + "learning_rate": 0.00011032802507499742, + "loss": 2.298, + "step": 65010 + }, + { + "epoch": 8.78, + "learning_rate": 0.00011028710508691679, + "loss": 2.2818, + "step": 65020 + }, + { + "epoch": 8.78, + "learning_rate": 0.00011024618827678742, + "loss": 2.2924, + "step": 65030 + }, + { + "epoch": 8.78, + "learning_rate": 0.00011020527464788362, + "loss": 2.3126, + "step": 65040 + }, + { + "epoch": 8.79, + "learning_rate": 0.00011016436420347948, + "loss": 2.2822, + "step": 65050 + }, + { + "epoch": 8.79, + "learning_rate": 0.0001101234569468487, + "loss": 2.3013, + "step": 65060 + }, + { + "epoch": 8.79, + "learning_rate": 0.00011008255288126484, + "loss": 2.3096, + "step": 65070 + }, + { + "epoch": 8.79, + "learning_rate": 0.00011004165201000116, + "loss": 2.3275, + "step": 65080 + }, + { + "epoch": 8.79, + "learning_rate": 0.00011000075433633074, + "loss": 2.2697, + "step": 65090 + }, + { + "epoch": 8.79, + "learning_rate": 0.00010995985986352624, + "loss": 2.3019, + "step": 65100 + }, + { + "epoch": 8.79, + "learning_rate": 0.00010991896859486023, + "loss": 2.3125, + "step": 65110 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010987808053360495, + "loss": 2.3284, + "step": 65120 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010983719568303241, + "loss": 2.3277, + "step": 65130 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010979631404641428, + "loss": 2.3207, + "step": 65140 + }, + { + "epoch": 8.8, + "learning_rate": 0.0001097554356270221, + "loss": 2.3009, + "step": 65150 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010971456042812707, + "loss": 2.3218, + "step": 65160 + }, + { + "epoch": 8.8, + "learning_rate": 0.0001096736884530002, + "loss": 2.3457, + "step": 65170 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010963281970491208, + "loss": 2.3026, + "step": 65180 + }, + { + "epoch": 8.8, + "learning_rate": 0.00010959195418713318, + "loss": 2.3229, + "step": 65190 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010955109190293381, + "loss": 2.3555, + "step": 65200 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010951023285558385, + "loss": 2.3307, + "step": 65210 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010946937704835289, + "loss": 2.3357, + "step": 65220 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010942852448451037, + "loss": 2.3314, + "step": 65230 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010938767516732549, + "loss": 2.2866, + "step": 65240 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010934682910006712, + "loss": 2.3265, + "step": 65250 + }, + { + "epoch": 8.81, + "learning_rate": 0.00010930598628600387, + "loss": 2.3542, + "step": 65260 + }, + { + "epoch": 8.82, + "learning_rate": 0.0001092651467284041, + "loss": 2.3234, + "step": 65270 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010922431043053598, + "loss": 2.3185, + "step": 65280 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010918347739566734, + "loss": 2.3011, + "step": 65290 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010914264762706571, + "loss": 2.3168, + "step": 65300 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010910182112799848, + "loss": 2.3058, + "step": 65310 + }, + { + "epoch": 8.82, + "learning_rate": 0.0001090609979017327, + "loss": 2.3258, + "step": 65320 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010902017795153518, + "loss": 2.3216, + "step": 65330 + }, + { + "epoch": 8.82, + "learning_rate": 0.00010897936128067245, + "loss": 2.3203, + "step": 65340 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010893854789241075, + "loss": 2.3575, + "step": 65350 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010889773779001617, + "loss": 2.336, + "step": 65360 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010885693097675448, + "loss": 2.3056, + "step": 65370 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010881612745589104, + "loss": 2.32, + "step": 65380 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010877532723069114, + "loss": 2.3128, + "step": 65390 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010873453030441981, + "loss": 2.3343, + "step": 65400 + }, + { + "epoch": 8.83, + "learning_rate": 0.00010869373668034174, + "loss": 2.3212, + "step": 65410 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010865294636172126, + "loss": 2.3265, + "step": 65420 + }, + { + "epoch": 8.84, + "learning_rate": 0.0001086121593518226, + "loss": 2.2803, + "step": 65430 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010857137565390967, + "loss": 2.3106, + "step": 65440 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010853059527124616, + "loss": 2.3361, + "step": 65450 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010848981820709534, + "loss": 2.3107, + "step": 65460 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010844904446472038, + "loss": 2.3249, + "step": 65470 + }, + { + "epoch": 8.84, + "learning_rate": 0.00010840827404738412, + "loss": 2.3392, + "step": 65480 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010836750695834916, + "loss": 2.3613, + "step": 65490 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010832674320087774, + "loss": 2.352, + "step": 65500 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010828598277823196, + "loss": 2.3089, + "step": 65510 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010824522569367361, + "loss": 2.3179, + "step": 65520 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010820447195046418, + "loss": 2.3291, + "step": 65530 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010816372155186489, + "loss": 2.333, + "step": 65540 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010812297450113673, + "loss": 2.3415, + "step": 65550 + }, + { + "epoch": 8.85, + "learning_rate": 0.00010808223080154044, + "loss": 2.3432, + "step": 65560 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010804149045633646, + "loss": 2.3165, + "step": 65570 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010800075346878492, + "loss": 2.3421, + "step": 65580 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010796001984214567, + "loss": 2.3369, + "step": 65590 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010791928957967845, + "loss": 2.3183, + "step": 65600 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010787856268464267, + "loss": 2.3005, + "step": 65610 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010783783916029723, + "loss": 2.3284, + "step": 65620 + }, + { + "epoch": 8.86, + "learning_rate": 0.00010779711900990111, + "loss": 2.3353, + "step": 65630 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010775640223671279, + "loss": 2.3338, + "step": 65640 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010771568884399063, + "loss": 2.3238, + "step": 65650 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010767497883499258, + "loss": 2.323, + "step": 65660 + }, + { + "epoch": 8.87, + "learning_rate": 0.0001076342722129764, + "loss": 2.3202, + "step": 65670 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010759356898119956, + "loss": 2.3298, + "step": 65680 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010755286914291929, + "loss": 2.3072, + "step": 65690 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010751217270139248, + "loss": 2.3452, + "step": 65700 + }, + { + "epoch": 8.87, + "learning_rate": 0.00010747147965987578, + "loss": 2.3392, + "step": 65710 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010743079002162561, + "loss": 2.3526, + "step": 65720 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010739010378989813, + "loss": 2.2986, + "step": 65730 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010734942096794905, + "loss": 2.3602, + "step": 65740 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010730874155903405, + "loss": 2.332, + "step": 65750 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010726806556640836, + "loss": 2.3365, + "step": 65760 + }, + { + "epoch": 8.88, + "learning_rate": 0.0001072273929933271, + "loss": 2.3384, + "step": 65770 + }, + { + "epoch": 8.88, + "learning_rate": 0.00010718672384304488, + "loss": 2.302, + "step": 65780 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010714605811881622, + "loss": 2.2957, + "step": 65790 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010710539582389537, + "loss": 2.3329, + "step": 65800 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010706473696153627, + "loss": 2.3449, + "step": 65810 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010702408153499247, + "loss": 2.3305, + "step": 65820 + }, + { + "epoch": 8.89, + "learning_rate": 0.0001069834295475174, + "loss": 2.3192, + "step": 65830 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010694278100236417, + "loss": 2.3124, + "step": 65840 + }, + { + "epoch": 8.89, + "learning_rate": 0.00010690213590278565, + "loss": 2.3043, + "step": 65850 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010686149425203426, + "loss": 2.3322, + "step": 65860 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010682085605336236, + "loss": 2.346, + "step": 65870 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010678022131002195, + "loss": 2.3445, + "step": 65880 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010673959002526477, + "loss": 2.3325, + "step": 65890 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010669896220234216, + "loss": 2.331, + "step": 65900 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010665833784450539, + "loss": 2.3241, + "step": 65910 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010661771695500532, + "loss": 2.3273, + "step": 65920 + }, + { + "epoch": 8.9, + "learning_rate": 0.00010657709953709258, + "loss": 2.3786, + "step": 65930 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010653648559401745, + "loss": 2.3494, + "step": 65940 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010649587512903001, + "loss": 2.3227, + "step": 65950 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010645526814538006, + "loss": 2.3135, + "step": 65960 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010641466464631714, + "loss": 2.3401, + "step": 65970 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010637406463509034, + "loss": 2.3354, + "step": 65980 + }, + { + "epoch": 8.91, + "learning_rate": 0.00010633346811494866, + "loss": 2.3334, + "step": 65990 + }, + { + "epoch": 8.91, + "learning_rate": 0.0001062928750891408, + "loss": 2.3245, + "step": 66000 + }, + { + "epoch": 8.91, + "eval_loss": 2.463181734085083, + "eval_runtime": 1269.4771, + "eval_samples_per_second": 59.974, + "eval_steps_per_second": 4.998, + "step": 66000 + }, + { + "epoch": 8.92, + "learning_rate": 0.0001062522855609152, + "loss": 2.3069, + "step": 66010 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010621169953351982, + "loss": 2.3352, + "step": 66020 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010617111701020252, + "loss": 2.3111, + "step": 66030 + }, + { + "epoch": 8.92, + "learning_rate": 0.0001061305379942109, + "loss": 2.3607, + "step": 66040 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010608996248879219, + "loss": 2.308, + "step": 66050 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010604939049719334, + "loss": 2.3331, + "step": 66060 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010600882202266109, + "loss": 2.335, + "step": 66070 + }, + { + "epoch": 8.92, + "learning_rate": 0.00010596825706844184, + "loss": 2.3553, + "step": 66080 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010592769563778174, + "loss": 2.3339, + "step": 66090 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010588713773392661, + "loss": 2.3259, + "step": 66100 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010584658336012205, + "loss": 2.3591, + "step": 66110 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010580603251961335, + "loss": 2.3095, + "step": 66120 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010576548521564552, + "loss": 2.3324, + "step": 66130 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010572494145146329, + "loss": 2.3383, + "step": 66140 + }, + { + "epoch": 8.93, + "learning_rate": 0.00010568440123031105, + "loss": 2.3536, + "step": 66150 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010564386455543297, + "loss": 2.3267, + "step": 66160 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010560333143007304, + "loss": 2.3479, + "step": 66170 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010556280185747467, + "loss": 2.3463, + "step": 66180 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010552227584088125, + "loss": 2.3545, + "step": 66190 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010548175338353576, + "loss": 2.3268, + "step": 66200 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010544123448868109, + "loss": 2.3448, + "step": 66210 + }, + { + "epoch": 8.94, + "learning_rate": 0.00010540071915955947, + "loss": 2.3611, + "step": 66220 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010536020739941319, + "loss": 2.3535, + "step": 66230 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010531969921148411, + "loss": 2.3222, + "step": 66240 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010527919459901383, + "loss": 2.3093, + "step": 66250 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010523869356524362, + "loss": 2.3169, + "step": 66260 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010519819611341452, + "loss": 2.3183, + "step": 66270 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010515770224676728, + "loss": 2.3573, + "step": 66280 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010511721196854237, + "loss": 2.3544, + "step": 66290 + }, + { + "epoch": 8.95, + "learning_rate": 0.00010507672528197987, + "loss": 2.3199, + "step": 66300 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010503624219031972, + "loss": 2.3108, + "step": 66310 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010499576269680146, + "loss": 2.3443, + "step": 66320 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010495528680466448, + "loss": 2.3269, + "step": 66330 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010491481451714764, + "loss": 2.3324, + "step": 66340 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010487434583748973, + "loss": 2.3503, + "step": 66350 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010483388076892924, + "loss": 2.3197, + "step": 66360 + }, + { + "epoch": 8.96, + "learning_rate": 0.00010479341931470431, + "loss": 2.324, + "step": 66370 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010475296147805267, + "loss": 2.3471, + "step": 66380 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010471250726221197, + "loss": 2.3493, + "step": 66390 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010467205667041946, + "loss": 2.3294, + "step": 66400 + }, + { + "epoch": 8.97, + "learning_rate": 0.0001046316097059122, + "loss": 2.3345, + "step": 66410 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010459116637192679, + "loss": 2.3262, + "step": 66420 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010455072667169964, + "loss": 2.3483, + "step": 66430 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010451029060846691, + "loss": 2.2973, + "step": 66440 + }, + { + "epoch": 8.97, + "learning_rate": 0.00010446985818546443, + "loss": 2.3341, + "step": 66450 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010442942940592766, + "loss": 2.3696, + "step": 66460 + }, + { + "epoch": 8.98, + "learning_rate": 0.0001043890042730919, + "loss": 2.3518, + "step": 66470 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010434858279019205, + "loss": 2.3238, + "step": 66480 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010430816496046284, + "loss": 2.3411, + "step": 66490 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010426775078713854, + "loss": 2.3636, + "step": 66500 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010422734027345326, + "loss": 2.3255, + "step": 66510 + }, + { + "epoch": 8.98, + "learning_rate": 0.00010418693342264078, + "loss": 2.2983, + "step": 66520 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010414653023793464, + "loss": 2.3351, + "step": 66530 + }, + { + "epoch": 8.99, + "learning_rate": 0.0001041061307225679, + "loss": 2.3587, + "step": 66540 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010406573487977356, + "loss": 2.3191, + "step": 66550 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010402534271278417, + "loss": 2.3563, + "step": 66560 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010398495422483215, + "loss": 2.358, + "step": 66570 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010394456941914936, + "loss": 2.3265, + "step": 66580 + }, + { + "epoch": 8.99, + "learning_rate": 0.00010390418829896758, + "loss": 2.3412, + "step": 66590 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010386381086751822, + "loss": 2.3406, + "step": 66600 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010382343712803253, + "loss": 2.3635, + "step": 66610 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010378306708374117, + "loss": 2.3523, + "step": 66620 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010374270073787475, + "loss": 2.3207, + "step": 66630 + }, + { + "epoch": 9.0, + "learning_rate": 0.0001037023380936635, + "loss": 2.3281, + "step": 66640 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010366197915433745, + "loss": 2.3065, + "step": 66650 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010362162392312614, + "loss": 2.3141, + "step": 66660 + }, + { + "epoch": 9.0, + "learning_rate": 0.00010358127240325896, + "loss": 2.303, + "step": 66670 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010354092459796496, + "loss": 2.3302, + "step": 66680 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010350058051047298, + "loss": 2.3243, + "step": 66690 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010346024014401133, + "loss": 2.3088, + "step": 66700 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010341990350180829, + "loss": 2.2969, + "step": 66710 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010337957058709167, + "loss": 2.2866, + "step": 66720 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010333924140308915, + "loss": 2.2995, + "step": 66730 + }, + { + "epoch": 9.01, + "learning_rate": 0.00010329891595302778, + "loss": 2.3179, + "step": 66740 + }, + { + "epoch": 9.02, + "learning_rate": 0.0001032585942401347, + "loss": 2.304, + "step": 66750 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010321827626763653, + "loss": 2.3148, + "step": 66760 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010317796203875972, + "loss": 2.3425, + "step": 66770 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010313765155673021, + "loss": 2.3126, + "step": 66780 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010309734482477383, + "loss": 2.3193, + "step": 66790 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010305704184611602, + "loss": 2.3163, + "step": 66800 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010301674262398209, + "loss": 2.3048, + "step": 66810 + }, + { + "epoch": 9.02, + "learning_rate": 0.00010297644716159673, + "loss": 2.323, + "step": 66820 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010293615546218458, + "loss": 2.3172, + "step": 66830 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010289586752896995, + "loss": 2.2832, + "step": 66840 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010285558336517677, + "loss": 2.3142, + "step": 66850 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010281530297402869, + "loss": 2.3155, + "step": 66860 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010277502635874912, + "loss": 2.2911, + "step": 66870 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010273475352256106, + "loss": 2.3134, + "step": 66880 + }, + { + "epoch": 9.03, + "learning_rate": 0.00010269448446868735, + "loss": 2.3272, + "step": 66890 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010265421920035037, + "loss": 2.279, + "step": 66900 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010261395772077232, + "loss": 2.3218, + "step": 66910 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010257370003317502, + "loss": 2.3008, + "step": 66920 + }, + { + "epoch": 9.04, + "learning_rate": 0.0001025334461407801, + "loss": 2.3192, + "step": 66930 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010249319604680865, + "loss": 2.3154, + "step": 66940 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010245294975448173, + "loss": 2.2971, + "step": 66950 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010241270726701993, + "loss": 2.3211, + "step": 66960 + }, + { + "epoch": 9.04, + "learning_rate": 0.00010237246858764368, + "loss": 2.325, + "step": 66970 + }, + { + "epoch": 9.05, + "learning_rate": 0.00010233223371957284, + "loss": 2.3014, + "step": 66980 + }, + { + "epoch": 9.05, + "learning_rate": 0.00010229200266602723, + "loss": 2.3403, + "step": 66990 + }, + { + "epoch": 9.05, + "learning_rate": 0.0001022517754302262, + "loss": 2.337, + "step": 67000 + }, + { + "epoch": 9.05, + "eval_loss": 2.464355707168579, + "eval_runtime": 1269.0426, + "eval_samples_per_second": 59.994, + "eval_steps_per_second": 5.0, + "step": 67000 + }, + { + "epoch": 9.05, + "learning_rate": 0.000102211552015389, + "loss": 2.3159, + "step": 67010 + }, + { + "epoch": 9.05, + "learning_rate": 0.00010217133242473427, + "loss": 2.3263, + "step": 67020 + }, + { + "epoch": 9.05, + "learning_rate": 0.00010213111666148062, + "loss": 2.3228, + "step": 67030 + }, + { + "epoch": 9.05, + "learning_rate": 0.00010209090472884618, + "loss": 2.3465, + "step": 67040 + }, + { + "epoch": 9.06, + "learning_rate": 0.0001020506966300489, + "loss": 2.3447, + "step": 67050 + }, + { + "epoch": 9.06, + "learning_rate": 0.00010201049236830628, + "loss": 2.323, + "step": 67060 + }, + { + "epoch": 9.06, + "learning_rate": 0.00010197029194683563, + "loss": 2.3342, + "step": 67070 + }, + { + "epoch": 9.06, + "learning_rate": 0.00010193009536885391, + "loss": 2.3161, + "step": 67080 + }, + { + "epoch": 9.06, + "learning_rate": 0.0001018899026375778, + "loss": 2.3052, + "step": 67090 + }, + { + "epoch": 9.06, + "learning_rate": 0.00010184971375622363, + "loss": 2.3071, + "step": 67100 + }, + { + "epoch": 9.06, + "learning_rate": 0.00010180952872800744, + "loss": 2.3062, + "step": 67110 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010176934755614496, + "loss": 2.3128, + "step": 67120 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010172917024385169, + "loss": 2.3296, + "step": 67130 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010168899679434258, + "loss": 2.3431, + "step": 67140 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010164882721083253, + "loss": 2.3048, + "step": 67150 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010160866149653609, + "loss": 2.3151, + "step": 67160 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010156849965466743, + "loss": 2.3131, + "step": 67170 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010152834168844033, + "loss": 2.3155, + "step": 67180 + }, + { + "epoch": 9.07, + "learning_rate": 0.00010148818760106844, + "loss": 2.3135, + "step": 67190 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010144803739576499, + "loss": 2.3157, + "step": 67200 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010140789107574295, + "loss": 2.3193, + "step": 67210 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010136774864421492, + "loss": 2.3274, + "step": 67220 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010132761010439324, + "loss": 2.2954, + "step": 67230 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010128747545948996, + "loss": 2.3066, + "step": 67240 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010124734471271673, + "loss": 2.293, + "step": 67250 + }, + { + "epoch": 9.08, + "learning_rate": 0.00010120721786728494, + "loss": 2.3072, + "step": 67260 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010116709492640569, + "loss": 2.3308, + "step": 67270 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010112697589328973, + "loss": 2.324, + "step": 67280 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010108686077114757, + "loss": 2.3202, + "step": 67290 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010104674956318925, + "loss": 2.3002, + "step": 67300 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010100664227262465, + "loss": 2.3189, + "step": 67310 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010096653890266327, + "loss": 2.3028, + "step": 67320 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010092643945651438, + "loss": 2.3149, + "step": 67330 + }, + { + "epoch": 9.09, + "learning_rate": 0.00010088634393738675, + "loss": 2.3036, + "step": 67340 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010084625234848895, + "loss": 2.34, + "step": 67350 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010080616469302931, + "loss": 2.3238, + "step": 67360 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010076608097421582, + "loss": 2.2953, + "step": 67370 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010072600119525597, + "loss": 2.3213, + "step": 67380 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010068592535935714, + "loss": 2.327, + "step": 67390 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010064585346972628, + "loss": 2.3276, + "step": 67400 + }, + { + "epoch": 9.1, + "learning_rate": 0.00010060578552957017, + "loss": 2.3176, + "step": 67410 + }, + { + "epoch": 9.11, + "learning_rate": 0.00010056572154209509, + "loss": 2.3327, + "step": 67420 + }, + { + "epoch": 9.11, + "learning_rate": 0.00010052566151050709, + "loss": 2.3009, + "step": 67430 + }, + { + "epoch": 9.11, + "learning_rate": 0.0001004856054380119, + "loss": 2.3336, + "step": 67440 + }, + { + "epoch": 9.11, + "learning_rate": 0.00010044555332781498, + "loss": 2.3316, + "step": 67450 + }, + { + "epoch": 9.11, + "learning_rate": 0.00010040550518312138, + "loss": 2.3167, + "step": 67460 + }, + { + "epoch": 9.11, + "learning_rate": 0.0001003654610071359, + "loss": 2.3088, + "step": 67470 + }, + { + "epoch": 9.11, + "learning_rate": 0.00010032542080306297, + "loss": 2.3311, + "step": 67480 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010028538457410679, + "loss": 2.3262, + "step": 67490 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010024535232347115, + "loss": 2.3034, + "step": 67500 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010020532405435957, + "loss": 2.3428, + "step": 67510 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010016529976997519, + "loss": 2.3214, + "step": 67520 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010012527947352102, + "loss": 2.3312, + "step": 67530 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010008526316819942, + "loss": 2.3563, + "step": 67540 + }, + { + "epoch": 9.12, + "learning_rate": 0.0001000452508572127, + "loss": 2.3126, + "step": 67550 + }, + { + "epoch": 9.12, + "learning_rate": 0.00010000524254376281, + "loss": 2.3431, + "step": 67560 + }, + { + "epoch": 9.13, + "learning_rate": 9.996523823105139e-05, + "loss": 2.3198, + "step": 67570 + }, + { + "epoch": 9.13, + "learning_rate": 9.992523792227955e-05, + "loss": 2.3234, + "step": 67580 + }, + { + "epoch": 9.13, + "learning_rate": 9.988524162064833e-05, + "loss": 2.3005, + "step": 67590 + }, + { + "epoch": 9.13, + "learning_rate": 9.984524932935838e-05, + "loss": 2.3091, + "step": 67600 + }, + { + "epoch": 9.13, + "learning_rate": 9.980526105161001e-05, + "loss": 2.3043, + "step": 67610 + }, + { + "epoch": 9.13, + "learning_rate": 9.976527679060316e-05, + "loss": 2.3296, + "step": 67620 + }, + { + "epoch": 9.13, + "learning_rate": 9.972529654953751e-05, + "loss": 2.3063, + "step": 67630 + }, + { + "epoch": 9.14, + "learning_rate": 9.968532033161242e-05, + "loss": 2.3231, + "step": 67640 + }, + { + "epoch": 9.14, + "learning_rate": 9.964534814002695e-05, + "loss": 2.3274, + "step": 67650 + }, + { + "epoch": 9.14, + "learning_rate": 9.960537997797972e-05, + "loss": 2.3275, + "step": 67660 + }, + { + "epoch": 9.14, + "learning_rate": 9.956541584866915e-05, + "loss": 2.3183, + "step": 67670 + }, + { + "epoch": 9.14, + "learning_rate": 9.95254557552933e-05, + "loss": 2.3342, + "step": 67680 + }, + { + "epoch": 9.14, + "learning_rate": 9.948549970104993e-05, + "loss": 2.3329, + "step": 67690 + }, + { + "epoch": 9.14, + "learning_rate": 9.94455476891364e-05, + "loss": 2.3375, + "step": 67700 + }, + { + "epoch": 9.14, + "learning_rate": 9.94055997227498e-05, + "loss": 2.3058, + "step": 67710 + }, + { + "epoch": 9.15, + "learning_rate": 9.93656558050869e-05, + "loss": 2.3194, + "step": 67720 + }, + { + "epoch": 9.15, + "learning_rate": 9.932571593934424e-05, + "loss": 2.2998, + "step": 67730 + }, + { + "epoch": 9.15, + "learning_rate": 9.928578012871775e-05, + "loss": 2.3435, + "step": 67740 + }, + { + "epoch": 9.15, + "learning_rate": 9.92458483764033e-05, + "loss": 2.324, + "step": 67750 + }, + { + "epoch": 9.15, + "learning_rate": 9.920592068559639e-05, + "loss": 2.3025, + "step": 67760 + }, + { + "epoch": 9.15, + "learning_rate": 9.916599705949218e-05, + "loss": 2.3239, + "step": 67770 + }, + { + "epoch": 9.15, + "learning_rate": 9.912607750128539e-05, + "loss": 2.3278, + "step": 67780 + }, + { + "epoch": 9.16, + "learning_rate": 9.908616201417056e-05, + "loss": 2.32, + "step": 67790 + }, + { + "epoch": 9.16, + "learning_rate": 9.904625060134184e-05, + "loss": 2.3236, + "step": 67800 + }, + { + "epoch": 9.16, + "learning_rate": 9.900634326599311e-05, + "loss": 2.3224, + "step": 67810 + }, + { + "epoch": 9.16, + "learning_rate": 9.896644001131784e-05, + "loss": 2.3148, + "step": 67820 + }, + { + "epoch": 9.16, + "learning_rate": 9.892654084050923e-05, + "loss": 2.3264, + "step": 67830 + }, + { + "epoch": 9.16, + "learning_rate": 9.888664575676009e-05, + "loss": 2.3043, + "step": 67840 + }, + { + "epoch": 9.16, + "learning_rate": 9.884675476326306e-05, + "loss": 2.3107, + "step": 67850 + }, + { + "epoch": 9.17, + "learning_rate": 9.880686786321024e-05, + "loss": 2.3066, + "step": 67860 + }, + { + "epoch": 9.17, + "learning_rate": 9.876698505979355e-05, + "loss": 2.3422, + "step": 67870 + }, + { + "epoch": 9.17, + "learning_rate": 9.87271063562045e-05, + "loss": 2.2833, + "step": 67880 + }, + { + "epoch": 9.17, + "learning_rate": 9.868723175563439e-05, + "loss": 2.3166, + "step": 67890 + }, + { + "epoch": 9.17, + "learning_rate": 9.864736126127402e-05, + "loss": 2.3381, + "step": 67900 + }, + { + "epoch": 9.17, + "learning_rate": 9.860749487631401e-05, + "loss": 2.3354, + "step": 67910 + }, + { + "epoch": 9.17, + "learning_rate": 9.856763260394456e-05, + "loss": 2.2924, + "step": 67920 + }, + { + "epoch": 9.17, + "learning_rate": 9.852777444735569e-05, + "loss": 2.3272, + "step": 67930 + }, + { + "epoch": 9.18, + "learning_rate": 9.848792040973677e-05, + "loss": 2.3109, + "step": 67940 + }, + { + "epoch": 9.18, + "learning_rate": 9.844807049427715e-05, + "loss": 2.3272, + "step": 67950 + }, + { + "epoch": 9.18, + "learning_rate": 9.84082247041658e-05, + "loss": 2.3328, + "step": 67960 + }, + { + "epoch": 9.18, + "learning_rate": 9.836838304259127e-05, + "loss": 2.3022, + "step": 67970 + }, + { + "epoch": 9.18, + "learning_rate": 9.832854551274179e-05, + "loss": 2.321, + "step": 67980 + }, + { + "epoch": 9.18, + "learning_rate": 9.828871211780527e-05, + "loss": 2.3118, + "step": 67990 + }, + { + "epoch": 9.18, + "learning_rate": 9.824888286096935e-05, + "loss": 2.3403, + "step": 68000 + }, + { + "epoch": 9.18, + "eval_loss": 2.463167190551758, + "eval_runtime": 1269.0658, + "eval_samples_per_second": 59.993, + "eval_steps_per_second": 5.0, + "step": 68000 + }, + { + "epoch": 9.19, + "learning_rate": 9.820905774542127e-05, + "loss": 2.3087, + "step": 68010 + }, + { + "epoch": 9.19, + "learning_rate": 9.816923677434796e-05, + "loss": 2.3569, + "step": 68020 + }, + { + "epoch": 9.19, + "learning_rate": 9.812941995093603e-05, + "loss": 2.307, + "step": 68030 + }, + { + "epoch": 9.19, + "learning_rate": 9.808960727837173e-05, + "loss": 2.3398, + "step": 68040 + }, + { + "epoch": 9.19, + "learning_rate": 9.804979875984105e-05, + "loss": 2.3211, + "step": 68050 + }, + { + "epoch": 9.19, + "learning_rate": 9.80099943985295e-05, + "loss": 2.3359, + "step": 68060 + }, + { + "epoch": 9.19, + "learning_rate": 9.797019419762245e-05, + "loss": 2.3151, + "step": 68070 + }, + { + "epoch": 9.19, + "learning_rate": 9.793039816030475e-05, + "loss": 2.3348, + "step": 68080 + }, + { + "epoch": 9.2, + "learning_rate": 9.78906062897611e-05, + "loss": 2.3121, + "step": 68090 + }, + { + "epoch": 9.2, + "learning_rate": 9.785081858917564e-05, + "loss": 2.2957, + "step": 68100 + }, + { + "epoch": 9.2, + "learning_rate": 9.781103506173244e-05, + "loss": 2.3039, + "step": 68110 + }, + { + "epoch": 9.2, + "learning_rate": 9.777125571061503e-05, + "loss": 2.3177, + "step": 68120 + }, + { + "epoch": 9.2, + "learning_rate": 9.773148053900676e-05, + "loss": 2.3103, + "step": 68130 + }, + { + "epoch": 9.2, + "learning_rate": 9.769170955009045e-05, + "loss": 2.3279, + "step": 68140 + }, + { + "epoch": 9.2, + "learning_rate": 9.76519427470487e-05, + "loss": 2.3679, + "step": 68150 + }, + { + "epoch": 9.21, + "learning_rate": 9.761218013306386e-05, + "loss": 2.3181, + "step": 68160 + }, + { + "epoch": 9.21, + "learning_rate": 9.75724217113179e-05, + "loss": 2.3381, + "step": 68170 + }, + { + "epoch": 9.21, + "learning_rate": 9.753266748499226e-05, + "loss": 2.3194, + "step": 68180 + }, + { + "epoch": 9.21, + "learning_rate": 9.749291745726831e-05, + "loss": 2.3146, + "step": 68190 + }, + { + "epoch": 9.21, + "learning_rate": 9.745317163132694e-05, + "loss": 2.315, + "step": 68200 + }, + { + "epoch": 9.21, + "learning_rate": 9.741343001034876e-05, + "loss": 2.3128, + "step": 68210 + }, + { + "epoch": 9.21, + "learning_rate": 9.737369259751397e-05, + "loss": 2.323, + "step": 68220 + }, + { + "epoch": 9.22, + "learning_rate": 9.733395939600253e-05, + "loss": 2.3243, + "step": 68230 + }, + { + "epoch": 9.22, + "learning_rate": 9.729423040899397e-05, + "loss": 2.3692, + "step": 68240 + }, + { + "epoch": 9.22, + "learning_rate": 9.72545056396676e-05, + "loss": 2.3279, + "step": 68250 + }, + { + "epoch": 9.22, + "learning_rate": 9.721478509120225e-05, + "loss": 2.3441, + "step": 68260 + }, + { + "epoch": 9.22, + "learning_rate": 9.717506876677652e-05, + "loss": 2.3289, + "step": 68270 + }, + { + "epoch": 9.22, + "learning_rate": 9.713535666956863e-05, + "loss": 2.3018, + "step": 68280 + }, + { + "epoch": 9.22, + "learning_rate": 9.70956488027565e-05, + "loss": 2.3238, + "step": 68290 + }, + { + "epoch": 9.22, + "learning_rate": 9.705594516951762e-05, + "loss": 2.3407, + "step": 68300 + }, + { + "epoch": 9.23, + "learning_rate": 9.70162457730292e-05, + "loss": 2.3301, + "step": 68310 + }, + { + "epoch": 9.23, + "learning_rate": 9.697655061646817e-05, + "loss": 2.3171, + "step": 68320 + }, + { + "epoch": 9.23, + "learning_rate": 9.693685970301106e-05, + "loss": 2.3432, + "step": 68330 + }, + { + "epoch": 9.23, + "learning_rate": 9.689717303583398e-05, + "loss": 2.3146, + "step": 68340 + }, + { + "epoch": 9.23, + "learning_rate": 9.685749061811285e-05, + "loss": 2.3328, + "step": 68350 + }, + { + "epoch": 9.23, + "learning_rate": 9.681781245302313e-05, + "loss": 2.3317, + "step": 68360 + }, + { + "epoch": 9.23, + "learning_rate": 9.67781385437401e-05, + "loss": 2.3272, + "step": 68370 + }, + { + "epoch": 9.24, + "learning_rate": 9.673846889343849e-05, + "loss": 2.3403, + "step": 68380 + }, + { + "epoch": 9.24, + "learning_rate": 9.66988035052928e-05, + "loss": 2.3277, + "step": 68390 + }, + { + "epoch": 9.24, + "learning_rate": 9.665914238247721e-05, + "loss": 2.3301, + "step": 68400 + }, + { + "epoch": 9.24, + "learning_rate": 9.661948552816553e-05, + "loss": 2.3414, + "step": 68410 + }, + { + "epoch": 9.24, + "learning_rate": 9.657983294553119e-05, + "loss": 2.3295, + "step": 68420 + }, + { + "epoch": 9.24, + "learning_rate": 9.654018463774735e-05, + "loss": 2.3345, + "step": 68430 + }, + { + "epoch": 9.24, + "learning_rate": 9.650054060798678e-05, + "loss": 2.3263, + "step": 68440 + }, + { + "epoch": 9.24, + "learning_rate": 9.646090085942195e-05, + "loss": 2.3041, + "step": 68450 + }, + { + "epoch": 9.25, + "learning_rate": 9.64212653952249e-05, + "loss": 2.3022, + "step": 68460 + }, + { + "epoch": 9.25, + "learning_rate": 9.638163421856745e-05, + "loss": 2.3272, + "step": 68470 + }, + { + "epoch": 9.25, + "learning_rate": 9.634200733262093e-05, + "loss": 2.3312, + "step": 68480 + }, + { + "epoch": 9.25, + "learning_rate": 9.630238474055654e-05, + "loss": 2.3396, + "step": 68490 + }, + { + "epoch": 9.25, + "learning_rate": 9.626276644554483e-05, + "loss": 2.3296, + "step": 68500 + }, + { + "epoch": 9.25, + "learning_rate": 9.622315245075629e-05, + "loss": 2.3463, + "step": 68510 + }, + { + "epoch": 9.25, + "learning_rate": 9.618354275936094e-05, + "loss": 2.3321, + "step": 68520 + }, + { + "epoch": 9.26, + "learning_rate": 9.614393737452855e-05, + "loss": 2.3488, + "step": 68530 + }, + { + "epoch": 9.26, + "learning_rate": 9.610433629942833e-05, + "loss": 2.3242, + "step": 68540 + }, + { + "epoch": 9.26, + "learning_rate": 9.606473953722931e-05, + "loss": 2.3349, + "step": 68550 + }, + { + "epoch": 9.26, + "learning_rate": 9.60251470911002e-05, + "loss": 2.3217, + "step": 68560 + }, + { + "epoch": 9.26, + "learning_rate": 9.598555896420936e-05, + "loss": 2.3729, + "step": 68570 + }, + { + "epoch": 9.26, + "learning_rate": 9.594597515972462e-05, + "loss": 2.3303, + "step": 68580 + }, + { + "epoch": 9.26, + "learning_rate": 9.590639568081369e-05, + "loss": 2.3319, + "step": 68590 + }, + { + "epoch": 9.27, + "learning_rate": 9.586682053064383e-05, + "loss": 2.3337, + "step": 68600 + }, + { + "epoch": 9.27, + "learning_rate": 9.582724971238199e-05, + "loss": 2.3069, + "step": 68610 + }, + { + "epoch": 9.27, + "learning_rate": 9.578768322919468e-05, + "loss": 2.3141, + "step": 68620 + }, + { + "epoch": 9.27, + "learning_rate": 9.57481210842482e-05, + "loss": 2.3114, + "step": 68630 + }, + { + "epoch": 9.27, + "learning_rate": 9.570856328070841e-05, + "loss": 2.3228, + "step": 68640 + }, + { + "epoch": 9.27, + "learning_rate": 9.56690098217409e-05, + "loss": 2.3154, + "step": 68650 + }, + { + "epoch": 9.27, + "learning_rate": 9.562946071051079e-05, + "loss": 2.3135, + "step": 68660 + }, + { + "epoch": 9.27, + "learning_rate": 9.558991595018295e-05, + "loss": 2.3163, + "step": 68670 + }, + { + "epoch": 9.28, + "learning_rate": 9.55503755439219e-05, + "loss": 2.3264, + "step": 68680 + }, + { + "epoch": 9.28, + "learning_rate": 9.551083949489183e-05, + "loss": 2.335, + "step": 68690 + }, + { + "epoch": 9.28, + "learning_rate": 9.547130780625639e-05, + "loss": 2.3277, + "step": 68700 + }, + { + "epoch": 9.28, + "learning_rate": 9.543178048117914e-05, + "loss": 2.2883, + "step": 68710 + }, + { + "epoch": 9.28, + "learning_rate": 9.539225752282319e-05, + "loss": 2.3468, + "step": 68720 + }, + { + "epoch": 9.28, + "learning_rate": 9.535273893435131e-05, + "loss": 2.3042, + "step": 68730 + }, + { + "epoch": 9.28, + "learning_rate": 9.531322471892579e-05, + "loss": 2.308, + "step": 68740 + }, + { + "epoch": 9.29, + "learning_rate": 9.527371487970877e-05, + "loss": 2.3368, + "step": 68750 + }, + { + "epoch": 9.29, + "learning_rate": 9.523420941986187e-05, + "loss": 2.2988, + "step": 68760 + }, + { + "epoch": 9.29, + "learning_rate": 9.519470834254664e-05, + "loss": 2.33, + "step": 68770 + }, + { + "epoch": 9.29, + "learning_rate": 9.515521165092386e-05, + "loss": 2.3532, + "step": 68780 + }, + { + "epoch": 9.29, + "learning_rate": 9.511571934815427e-05, + "loss": 2.3027, + "step": 68790 + }, + { + "epoch": 9.29, + "learning_rate": 9.507623143739816e-05, + "loss": 2.315, + "step": 68800 + }, + { + "epoch": 9.29, + "learning_rate": 9.503674792181551e-05, + "loss": 2.315, + "step": 68810 + }, + { + "epoch": 9.29, + "learning_rate": 9.499726880456587e-05, + "loss": 2.3514, + "step": 68820 + }, + { + "epoch": 9.3, + "learning_rate": 9.495779408880848e-05, + "loss": 2.308, + "step": 68830 + }, + { + "epoch": 9.3, + "learning_rate": 9.491832377770227e-05, + "loss": 2.3033, + "step": 68840 + }, + { + "epoch": 9.3, + "learning_rate": 9.48788578744058e-05, + "loss": 2.3131, + "step": 68850 + }, + { + "epoch": 9.3, + "learning_rate": 9.483939638207716e-05, + "loss": 2.3533, + "step": 68860 + }, + { + "epoch": 9.3, + "learning_rate": 9.479993930387425e-05, + "loss": 2.3156, + "step": 68870 + }, + { + "epoch": 9.3, + "learning_rate": 9.476048664295455e-05, + "loss": 2.3362, + "step": 68880 + }, + { + "epoch": 9.3, + "learning_rate": 9.472103840247524e-05, + "loss": 2.3109, + "step": 68890 + }, + { + "epoch": 9.31, + "learning_rate": 9.468159458559294e-05, + "loss": 2.3415, + "step": 68900 + }, + { + "epoch": 9.31, + "learning_rate": 9.464215519546419e-05, + "loss": 2.31, + "step": 68910 + }, + { + "epoch": 9.31, + "learning_rate": 9.4602720235245e-05, + "loss": 2.3433, + "step": 68920 + }, + { + "epoch": 9.31, + "learning_rate": 9.456328970809118e-05, + "loss": 2.3171, + "step": 68930 + }, + { + "epoch": 9.31, + "learning_rate": 9.452386361715795e-05, + "loss": 2.3198, + "step": 68940 + }, + { + "epoch": 9.31, + "learning_rate": 9.448444196560037e-05, + "loss": 2.3121, + "step": 68950 + }, + { + "epoch": 9.31, + "learning_rate": 9.444502475657306e-05, + "loss": 2.3249, + "step": 68960 + }, + { + "epoch": 9.32, + "learning_rate": 9.440561199323041e-05, + "loss": 2.3408, + "step": 68970 + }, + { + "epoch": 9.32, + "learning_rate": 9.436620367872623e-05, + "loss": 2.3187, + "step": 68980 + }, + { + "epoch": 9.32, + "learning_rate": 9.432679981621413e-05, + "loss": 2.3065, + "step": 68990 + }, + { + "epoch": 9.32, + "learning_rate": 9.428740040884736e-05, + "loss": 2.324, + "step": 69000 + }, + { + "epoch": 9.32, + "eval_loss": 2.458608388900757, + "eval_runtime": 1269.1836, + "eval_samples_per_second": 59.987, + "eval_steps_per_second": 4.999, + "step": 69000 + }, + { + "epoch": 9.32, + "learning_rate": 9.42480054597788e-05, + "loss": 2.3242, + "step": 69010 + }, + { + "epoch": 9.32, + "learning_rate": 9.420861497216091e-05, + "loss": 2.3136, + "step": 69020 + }, + { + "epoch": 9.32, + "learning_rate": 9.416922894914584e-05, + "loss": 2.3549, + "step": 69030 + }, + { + "epoch": 9.32, + "learning_rate": 9.412984739388543e-05, + "loss": 2.3234, + "step": 69040 + }, + { + "epoch": 9.33, + "learning_rate": 9.409047030953111e-05, + "loss": 2.3454, + "step": 69050 + }, + { + "epoch": 9.33, + "learning_rate": 9.40510976992339e-05, + "loss": 2.3244, + "step": 69060 + }, + { + "epoch": 9.33, + "learning_rate": 9.401172956614456e-05, + "loss": 2.3477, + "step": 69070 + }, + { + "epoch": 9.33, + "learning_rate": 9.397236591341349e-05, + "loss": 2.3063, + "step": 69080 + }, + { + "epoch": 9.33, + "learning_rate": 9.393300674419067e-05, + "loss": 2.3015, + "step": 69090 + }, + { + "epoch": 9.33, + "learning_rate": 9.389365206162566e-05, + "loss": 2.3391, + "step": 69100 + }, + { + "epoch": 9.33, + "learning_rate": 9.385430186886786e-05, + "loss": 2.3336, + "step": 69110 + }, + { + "epoch": 9.34, + "learning_rate": 9.381495616906616e-05, + "loss": 2.3343, + "step": 69120 + }, + { + "epoch": 9.34, + "learning_rate": 9.377561496536914e-05, + "loss": 2.3251, + "step": 69130 + }, + { + "epoch": 9.34, + "learning_rate": 9.373627826092497e-05, + "loss": 2.3175, + "step": 69140 + }, + { + "epoch": 9.34, + "learning_rate": 9.369694605888151e-05, + "loss": 2.3035, + "step": 69150 + }, + { + "epoch": 9.34, + "learning_rate": 9.365761836238621e-05, + "loss": 2.3307, + "step": 69160 + }, + { + "epoch": 9.34, + "learning_rate": 9.361829517458636e-05, + "loss": 2.3145, + "step": 69170 + }, + { + "epoch": 9.34, + "learning_rate": 9.357897649862852e-05, + "loss": 2.3139, + "step": 69180 + }, + { + "epoch": 9.34, + "learning_rate": 9.353966233765919e-05, + "loss": 2.346, + "step": 69190 + }, + { + "epoch": 9.35, + "learning_rate": 9.35003526948244e-05, + "loss": 2.3071, + "step": 69200 + }, + { + "epoch": 9.35, + "learning_rate": 9.346104757326987e-05, + "loss": 2.3352, + "step": 69210 + }, + { + "epoch": 9.35, + "learning_rate": 9.342174697614084e-05, + "loss": 2.3201, + "step": 69220 + }, + { + "epoch": 9.35, + "learning_rate": 9.338245090658236e-05, + "loss": 2.3216, + "step": 69230 + }, + { + "epoch": 9.35, + "learning_rate": 9.334315936773893e-05, + "loss": 2.3065, + "step": 69240 + }, + { + "epoch": 9.35, + "learning_rate": 9.330387236275488e-05, + "loss": 2.307, + "step": 69250 + }, + { + "epoch": 9.35, + "learning_rate": 9.3264589894774e-05, + "loss": 2.3361, + "step": 69260 + }, + { + "epoch": 9.36, + "learning_rate": 9.322531196693983e-05, + "loss": 2.3054, + "step": 69270 + }, + { + "epoch": 9.36, + "learning_rate": 9.318603858239551e-05, + "loss": 2.3097, + "step": 69280 + }, + { + "epoch": 9.36, + "learning_rate": 9.314676974428392e-05, + "loss": 2.3278, + "step": 69290 + }, + { + "epoch": 9.36, + "learning_rate": 9.310750545574723e-05, + "loss": 2.3351, + "step": 69300 + }, + { + "epoch": 9.36, + "learning_rate": 9.30682457199277e-05, + "loss": 2.344, + "step": 69310 + }, + { + "epoch": 9.36, + "learning_rate": 9.302899053996698e-05, + "loss": 2.3426, + "step": 69320 + }, + { + "epoch": 9.36, + "learning_rate": 9.298973991900642e-05, + "loss": 2.3465, + "step": 69330 + }, + { + "epoch": 9.37, + "learning_rate": 9.295049386018686e-05, + "loss": 2.313, + "step": 69340 + }, + { + "epoch": 9.37, + "learning_rate": 9.291125236664899e-05, + "loss": 2.2986, + "step": 69350 + }, + { + "epoch": 9.37, + "learning_rate": 9.287201544153298e-05, + "loss": 2.3205, + "step": 69360 + }, + { + "epoch": 9.37, + "learning_rate": 9.28327830879788e-05, + "loss": 2.3039, + "step": 69370 + }, + { + "epoch": 9.37, + "learning_rate": 9.279355530912582e-05, + "loss": 2.3362, + "step": 69380 + }, + { + "epoch": 9.37, + "learning_rate": 9.275433210811324e-05, + "loss": 2.3084, + "step": 69390 + }, + { + "epoch": 9.37, + "learning_rate": 9.271511348807978e-05, + "loss": 2.3303, + "step": 69400 + }, + { + "epoch": 9.37, + "learning_rate": 9.267589945216393e-05, + "loss": 2.3196, + "step": 69410 + }, + { + "epoch": 9.38, + "learning_rate": 9.263669000350362e-05, + "loss": 2.3229, + "step": 69420 + }, + { + "epoch": 9.38, + "learning_rate": 9.259748514523653e-05, + "loss": 2.2878, + "step": 69430 + }, + { + "epoch": 9.38, + "learning_rate": 9.25582848805e-05, + "loss": 2.316, + "step": 69440 + }, + { + "epoch": 9.38, + "learning_rate": 9.251908921243093e-05, + "loss": 2.3384, + "step": 69450 + }, + { + "epoch": 9.38, + "learning_rate": 9.247989814416593e-05, + "loss": 2.3094, + "step": 69460 + }, + { + "epoch": 9.38, + "learning_rate": 9.24407116788411e-05, + "loss": 2.3308, + "step": 69470 + }, + { + "epoch": 9.38, + "learning_rate": 9.240152981959234e-05, + "loss": 2.3529, + "step": 69480 + }, + { + "epoch": 9.39, + "learning_rate": 9.236235256955507e-05, + "loss": 2.3226, + "step": 69490 + }, + { + "epoch": 9.39, + "learning_rate": 9.232317993186444e-05, + "loss": 2.335, + "step": 69500 + }, + { + "epoch": 9.39, + "learning_rate": 9.228401190965508e-05, + "loss": 2.3262, + "step": 69510 + }, + { + "epoch": 9.39, + "learning_rate": 9.224484850606136e-05, + "loss": 2.3487, + "step": 69520 + }, + { + "epoch": 9.39, + "learning_rate": 9.220568972421729e-05, + "loss": 2.2983, + "step": 69530 + }, + { + "epoch": 9.39, + "learning_rate": 9.216653556725652e-05, + "loss": 2.3295, + "step": 69540 + }, + { + "epoch": 9.39, + "learning_rate": 9.212738603831216e-05, + "loss": 2.3028, + "step": 69550 + }, + { + "epoch": 9.39, + "learning_rate": 9.208824114051718e-05, + "loss": 2.3422, + "step": 69560 + }, + { + "epoch": 9.4, + "learning_rate": 9.204910087700403e-05, + "loss": 2.3227, + "step": 69570 + }, + { + "epoch": 9.4, + "learning_rate": 9.200996525090493e-05, + "loss": 2.3214, + "step": 69580 + }, + { + "epoch": 9.4, + "learning_rate": 9.197083426535152e-05, + "loss": 2.3138, + "step": 69590 + }, + { + "epoch": 9.4, + "learning_rate": 9.193170792347521e-05, + "loss": 2.3419, + "step": 69600 + }, + { + "epoch": 9.4, + "learning_rate": 9.189258622840707e-05, + "loss": 2.2931, + "step": 69610 + }, + { + "epoch": 9.4, + "learning_rate": 9.18534691832777e-05, + "loss": 2.3191, + "step": 69620 + }, + { + "epoch": 9.4, + "learning_rate": 9.181435679121736e-05, + "loss": 2.3183, + "step": 69630 + }, + { + "epoch": 9.41, + "learning_rate": 9.177524905535596e-05, + "loss": 2.3209, + "step": 69640 + }, + { + "epoch": 9.41, + "learning_rate": 9.173614597882303e-05, + "loss": 2.305, + "step": 69650 + }, + { + "epoch": 9.41, + "learning_rate": 9.169704756474775e-05, + "loss": 2.3379, + "step": 69660 + }, + { + "epoch": 9.41, + "learning_rate": 9.165795381625884e-05, + "loss": 2.3386, + "step": 69670 + }, + { + "epoch": 9.41, + "learning_rate": 9.161886473648473e-05, + "loss": 2.338, + "step": 69680 + }, + { + "epoch": 9.41, + "learning_rate": 9.157978032855347e-05, + "loss": 2.3081, + "step": 69690 + }, + { + "epoch": 9.41, + "learning_rate": 9.154070059559274e-05, + "loss": 2.3197, + "step": 69700 + }, + { + "epoch": 9.42, + "learning_rate": 9.15016255407297e-05, + "loss": 2.3385, + "step": 69710 + }, + { + "epoch": 9.42, + "learning_rate": 9.146255516709139e-05, + "loss": 2.3233, + "step": 69720 + }, + { + "epoch": 9.42, + "learning_rate": 9.142348947780429e-05, + "loss": 2.3355, + "step": 69730 + }, + { + "epoch": 9.42, + "learning_rate": 9.138442847599463e-05, + "loss": 2.339, + "step": 69740 + }, + { + "epoch": 9.42, + "learning_rate": 9.134537216478808e-05, + "loss": 2.3234, + "step": 69750 + }, + { + "epoch": 9.42, + "learning_rate": 9.13063205473101e-05, + "loss": 2.3408, + "step": 69760 + }, + { + "epoch": 9.42, + "learning_rate": 9.12672736266857e-05, + "loss": 2.3325, + "step": 69770 + }, + { + "epoch": 9.42, + "learning_rate": 9.122823140603965e-05, + "loss": 2.3183, + "step": 69780 + }, + { + "epoch": 9.43, + "learning_rate": 9.11891938884961e-05, + "loss": 2.3217, + "step": 69790 + }, + { + "epoch": 9.43, + "learning_rate": 9.115016107717901e-05, + "loss": 2.3456, + "step": 69800 + }, + { + "epoch": 9.43, + "learning_rate": 9.111113297521189e-05, + "loss": 2.3273, + "step": 69810 + }, + { + "epoch": 9.43, + "learning_rate": 9.107210958571794e-05, + "loss": 2.2984, + "step": 69820 + }, + { + "epoch": 9.43, + "learning_rate": 9.103309091181985e-05, + "loss": 2.3115, + "step": 69830 + }, + { + "epoch": 9.43, + "learning_rate": 9.099407695664008e-05, + "loss": 2.332, + "step": 69840 + }, + { + "epoch": 9.43, + "learning_rate": 9.095506772330064e-05, + "loss": 2.3563, + "step": 69850 + }, + { + "epoch": 9.44, + "learning_rate": 9.091606321492317e-05, + "loss": 2.317, + "step": 69860 + }, + { + "epoch": 9.44, + "learning_rate": 9.087706343462893e-05, + "loss": 2.3531, + "step": 69870 + }, + { + "epoch": 9.44, + "learning_rate": 9.08380683855388e-05, + "loss": 2.3175, + "step": 69880 + }, + { + "epoch": 9.44, + "learning_rate": 9.079907807077331e-05, + "loss": 2.3307, + "step": 69890 + }, + { + "epoch": 9.44, + "learning_rate": 9.076009249345261e-05, + "loss": 2.3144, + "step": 69900 + }, + { + "epoch": 9.44, + "learning_rate": 9.072111165669634e-05, + "loss": 2.2892, + "step": 69910 + }, + { + "epoch": 9.44, + "learning_rate": 9.068213556362398e-05, + "loss": 2.3341, + "step": 69920 + }, + { + "epoch": 9.44, + "learning_rate": 9.064316421735446e-05, + "loss": 2.3626, + "step": 69930 + }, + { + "epoch": 9.45, + "learning_rate": 9.060419762100648e-05, + "loss": 2.3031, + "step": 69940 + }, + { + "epoch": 9.45, + "learning_rate": 9.056523577769814e-05, + "loss": 2.3293, + "step": 69950 + }, + { + "epoch": 9.45, + "learning_rate": 9.052627869054734e-05, + "loss": 2.3267, + "step": 69960 + }, + { + "epoch": 9.45, + "learning_rate": 9.048732636267155e-05, + "loss": 2.31, + "step": 69970 + }, + { + "epoch": 9.45, + "learning_rate": 9.044837879718796e-05, + "loss": 2.3295, + "step": 69980 + }, + { + "epoch": 9.45, + "learning_rate": 9.040943599721313e-05, + "loss": 2.3445, + "step": 69990 + }, + { + "epoch": 9.45, + "learning_rate": 9.037049796586341e-05, + "loss": 2.3394, + "step": 70000 + }, + { + "epoch": 9.45, + "eval_loss": 2.4543914794921875, + "eval_runtime": 1269.1098, + "eval_samples_per_second": 59.991, + "eval_steps_per_second": 5.0, + "step": 70000 + }, + { + "epoch": 9.46, + "learning_rate": 9.033156470625479e-05, + "loss": 2.3182, + "step": 70010 + }, + { + "epoch": 9.46, + "learning_rate": 9.029263622150287e-05, + "loss": 2.3422, + "step": 70020 + }, + { + "epoch": 9.46, + "learning_rate": 9.025371251472274e-05, + "loss": 2.3575, + "step": 70030 + }, + { + "epoch": 9.46, + "learning_rate": 9.021479358902924e-05, + "loss": 2.3077, + "step": 70040 + }, + { + "epoch": 9.46, + "learning_rate": 9.017587944753678e-05, + "loss": 2.3227, + "step": 70050 + }, + { + "epoch": 9.46, + "learning_rate": 9.013697009335941e-05, + "loss": 2.3383, + "step": 70060 + }, + { + "epoch": 9.46, + "learning_rate": 9.009806552961075e-05, + "loss": 2.333, + "step": 70070 + }, + { + "epoch": 9.46, + "learning_rate": 9.005916575940407e-05, + "loss": 2.3024, + "step": 70080 + }, + { + "epoch": 9.47, + "learning_rate": 9.002027078585227e-05, + "loss": 2.3309, + "step": 70090 + }, + { + "epoch": 9.47, + "learning_rate": 8.998138061206791e-05, + "loss": 2.3202, + "step": 70100 + }, + { + "epoch": 9.47, + "learning_rate": 8.994249524116294e-05, + "loss": 2.3323, + "step": 70110 + }, + { + "epoch": 9.47, + "learning_rate": 8.990361467624921e-05, + "loss": 2.3041, + "step": 70120 + }, + { + "epoch": 9.47, + "learning_rate": 8.986473892043805e-05, + "loss": 2.3313, + "step": 70130 + }, + { + "epoch": 9.47, + "learning_rate": 8.982586797684048e-05, + "loss": 2.3133, + "step": 70140 + }, + { + "epoch": 9.47, + "learning_rate": 8.978700184856694e-05, + "loss": 2.3407, + "step": 70150 + }, + { + "epoch": 9.48, + "learning_rate": 8.974814053872769e-05, + "loss": 2.3172, + "step": 70160 + }, + { + "epoch": 9.48, + "learning_rate": 8.970928405043251e-05, + "loss": 2.3258, + "step": 70170 + }, + { + "epoch": 9.48, + "learning_rate": 8.967043238679091e-05, + "loss": 2.3141, + "step": 70180 + }, + { + "epoch": 9.48, + "learning_rate": 8.96315855509118e-05, + "loss": 2.3215, + "step": 70190 + }, + { + "epoch": 9.48, + "learning_rate": 8.959274354590392e-05, + "loss": 2.3508, + "step": 70200 + }, + { + "epoch": 9.48, + "learning_rate": 8.955390637487546e-05, + "loss": 2.3328, + "step": 70210 + }, + { + "epoch": 9.48, + "learning_rate": 8.951507404093434e-05, + "loss": 2.3137, + "step": 70220 + }, + { + "epoch": 9.49, + "learning_rate": 8.947624654718803e-05, + "loss": 2.2952, + "step": 70230 + }, + { + "epoch": 9.49, + "learning_rate": 8.943742389674363e-05, + "loss": 2.3256, + "step": 70240 + }, + { + "epoch": 9.49, + "learning_rate": 8.939860609270783e-05, + "loss": 2.3407, + "step": 70250 + }, + { + "epoch": 9.49, + "learning_rate": 8.935979313818701e-05, + "loss": 2.2986, + "step": 70260 + }, + { + "epoch": 9.49, + "learning_rate": 8.932098503628705e-05, + "loss": 2.3394, + "step": 70270 + }, + { + "epoch": 9.49, + "learning_rate": 8.928218179011351e-05, + "loss": 2.3699, + "step": 70280 + }, + { + "epoch": 9.49, + "learning_rate": 8.924338340277155e-05, + "loss": 2.3234, + "step": 70290 + }, + { + "epoch": 9.49, + "learning_rate": 8.920458987736601e-05, + "loss": 2.329, + "step": 70300 + }, + { + "epoch": 9.5, + "learning_rate": 8.916580121700112e-05, + "loss": 2.3334, + "step": 70310 + }, + { + "epoch": 9.5, + "learning_rate": 8.912701742478097e-05, + "loss": 2.3382, + "step": 70320 + }, + { + "epoch": 9.5, + "learning_rate": 8.908823850380916e-05, + "loss": 2.3237, + "step": 70330 + }, + { + "epoch": 9.5, + "learning_rate": 8.904946445718894e-05, + "loss": 2.3514, + "step": 70340 + }, + { + "epoch": 9.5, + "learning_rate": 8.901069528802301e-05, + "loss": 2.3058, + "step": 70350 + }, + { + "epoch": 9.5, + "learning_rate": 8.89719309994139e-05, + "loss": 2.3269, + "step": 70360 + }, + { + "epoch": 9.5, + "learning_rate": 8.893317159446358e-05, + "loss": 2.3193, + "step": 70370 + }, + { + "epoch": 9.51, + "learning_rate": 8.889441707627383e-05, + "loss": 2.3449, + "step": 70380 + }, + { + "epoch": 9.51, + "learning_rate": 8.885566744794576e-05, + "loss": 2.3272, + "step": 70390 + }, + { + "epoch": 9.51, + "learning_rate": 8.88169227125803e-05, + "loss": 2.3205, + "step": 70400 + }, + { + "epoch": 9.51, + "learning_rate": 8.877818287327793e-05, + "loss": 2.3173, + "step": 70410 + }, + { + "epoch": 9.51, + "learning_rate": 8.873944793313875e-05, + "loss": 2.3537, + "step": 70420 + }, + { + "epoch": 9.51, + "learning_rate": 8.870071789526242e-05, + "loss": 2.3131, + "step": 70430 + }, + { + "epoch": 9.51, + "learning_rate": 8.866199276274824e-05, + "loss": 2.3306, + "step": 70440 + }, + { + "epoch": 9.51, + "learning_rate": 8.862327253869513e-05, + "loss": 2.3514, + "step": 70450 + }, + { + "epoch": 9.52, + "learning_rate": 8.858455722620164e-05, + "loss": 2.3121, + "step": 70460 + }, + { + "epoch": 9.52, + "learning_rate": 8.85458468283658e-05, + "loss": 2.3258, + "step": 70470 + }, + { + "epoch": 9.52, + "learning_rate": 8.850714134828542e-05, + "loss": 2.3298, + "step": 70480 + }, + { + "epoch": 9.52, + "learning_rate": 8.84684407890578e-05, + "loss": 2.3286, + "step": 70490 + }, + { + "epoch": 9.52, + "learning_rate": 8.842974515377995e-05, + "loss": 2.3207, + "step": 70500 + }, + { + "epoch": 9.52, + "learning_rate": 8.839105444554826e-05, + "loss": 2.3344, + "step": 70510 + }, + { + "epoch": 9.52, + "learning_rate": 8.835236866745903e-05, + "loss": 2.3464, + "step": 70520 + }, + { + "epoch": 9.53, + "learning_rate": 8.831368782260794e-05, + "loss": 2.3397, + "step": 70530 + }, + { + "epoch": 9.53, + "learning_rate": 8.827501191409046e-05, + "loss": 2.3446, + "step": 70540 + }, + { + "epoch": 9.53, + "learning_rate": 8.82363409450014e-05, + "loss": 2.3302, + "step": 70550 + }, + { + "epoch": 9.53, + "learning_rate": 8.819767491843544e-05, + "loss": 2.3161, + "step": 70560 + }, + { + "epoch": 9.53, + "learning_rate": 8.81590138374867e-05, + "loss": 2.3395, + "step": 70570 + }, + { + "epoch": 9.53, + "learning_rate": 8.812035770524905e-05, + "loss": 2.3591, + "step": 70580 + }, + { + "epoch": 9.53, + "learning_rate": 8.808170652481577e-05, + "loss": 2.3224, + "step": 70590 + }, + { + "epoch": 9.54, + "learning_rate": 8.804306029927992e-05, + "loss": 2.3132, + "step": 70600 + }, + { + "epoch": 9.54, + "learning_rate": 8.800441903173403e-05, + "loss": 2.3382, + "step": 70610 + }, + { + "epoch": 9.54, + "learning_rate": 8.79657827252704e-05, + "loss": 2.3273, + "step": 70620 + }, + { + "epoch": 9.54, + "learning_rate": 8.79271513829807e-05, + "loss": 2.3078, + "step": 70630 + }, + { + "epoch": 9.54, + "learning_rate": 8.788852500795643e-05, + "loss": 2.2964, + "step": 70640 + }, + { + "epoch": 9.54, + "learning_rate": 8.784990360328854e-05, + "loss": 2.3204, + "step": 70650 + }, + { + "epoch": 9.54, + "learning_rate": 8.781128717206772e-05, + "loss": 2.352, + "step": 70660 + }, + { + "epoch": 9.54, + "learning_rate": 8.777267571738405e-05, + "loss": 2.3287, + "step": 70670 + }, + { + "epoch": 9.55, + "learning_rate": 8.773406924232742e-05, + "loss": 2.3417, + "step": 70680 + }, + { + "epoch": 9.55, + "learning_rate": 8.769546774998724e-05, + "loss": 2.3419, + "step": 70690 + }, + { + "epoch": 9.55, + "learning_rate": 8.765687124345255e-05, + "loss": 2.3211, + "step": 70700 + }, + { + "epoch": 9.55, + "learning_rate": 8.76182797258119e-05, + "loss": 2.3009, + "step": 70710 + }, + { + "epoch": 9.55, + "learning_rate": 8.757969320015348e-05, + "loss": 2.3271, + "step": 70720 + }, + { + "epoch": 9.55, + "learning_rate": 8.754111166956522e-05, + "loss": 2.3154, + "step": 70730 + }, + { + "epoch": 9.55, + "learning_rate": 8.750253513713447e-05, + "loss": 2.3081, + "step": 70740 + }, + { + "epoch": 9.56, + "learning_rate": 8.746396360594826e-05, + "loss": 2.3252, + "step": 70750 + }, + { + "epoch": 9.56, + "learning_rate": 8.742539707909313e-05, + "loss": 2.3703, + "step": 70760 + }, + { + "epoch": 9.56, + "learning_rate": 8.738683555965543e-05, + "loss": 2.2995, + "step": 70770 + }, + { + "epoch": 9.56, + "learning_rate": 8.734827905072092e-05, + "loss": 2.3247, + "step": 70780 + }, + { + "epoch": 9.56, + "learning_rate": 8.730972755537491e-05, + "loss": 2.3403, + "step": 70790 + }, + { + "epoch": 9.56, + "learning_rate": 8.727118107670258e-05, + "loss": 2.3188, + "step": 70800 + }, + { + "epoch": 9.56, + "learning_rate": 8.723263961778839e-05, + "loss": 2.3202, + "step": 70810 + }, + { + "epoch": 9.56, + "learning_rate": 8.719410318171676e-05, + "loss": 2.3129, + "step": 70820 + }, + { + "epoch": 9.57, + "learning_rate": 8.715557177157124e-05, + "loss": 2.3438, + "step": 70830 + }, + { + "epoch": 9.57, + "learning_rate": 8.711704539043541e-05, + "loss": 2.346, + "step": 70840 + }, + { + "epoch": 9.57, + "learning_rate": 8.707852404139214e-05, + "loss": 2.3373, + "step": 70850 + }, + { + "epoch": 9.57, + "learning_rate": 8.704000772752418e-05, + "loss": 2.3394, + "step": 70860 + }, + { + "epoch": 9.57, + "learning_rate": 8.700149645191365e-05, + "loss": 2.3205, + "step": 70870 + }, + { + "epoch": 9.57, + "learning_rate": 8.69629902176423e-05, + "loss": 2.3391, + "step": 70880 + }, + { + "epoch": 9.57, + "learning_rate": 8.692448902779163e-05, + "loss": 2.3416, + "step": 70890 + }, + { + "epoch": 9.58, + "learning_rate": 8.688599288544256e-05, + "loss": 2.3124, + "step": 70900 + }, + { + "epoch": 9.58, + "learning_rate": 8.684750179367561e-05, + "loss": 2.3494, + "step": 70910 + }, + { + "epoch": 9.58, + "learning_rate": 8.68090157555711e-05, + "loss": 2.3271, + "step": 70920 + }, + { + "epoch": 9.58, + "learning_rate": 8.677053477420868e-05, + "loss": 2.3397, + "step": 70930 + }, + { + "epoch": 9.58, + "learning_rate": 8.673205885266782e-05, + "loss": 2.3117, + "step": 70940 + }, + { + "epoch": 9.58, + "learning_rate": 8.669358799402743e-05, + "loss": 2.3373, + "step": 70950 + }, + { + "epoch": 9.58, + "learning_rate": 8.665512220136601e-05, + "loss": 2.3492, + "step": 70960 + }, + { + "epoch": 9.59, + "learning_rate": 8.661666147776185e-05, + "loss": 2.3136, + "step": 70970 + }, + { + "epoch": 9.59, + "learning_rate": 8.657820582629265e-05, + "loss": 2.3141, + "step": 70980 + }, + { + "epoch": 9.59, + "learning_rate": 8.653975525003564e-05, + "loss": 2.3282, + "step": 70990 + }, + { + "epoch": 9.59, + "learning_rate": 8.650130975206793e-05, + "loss": 2.3457, + "step": 71000 + }, + { + "epoch": 9.59, + "eval_loss": 2.4499146938323975, + "eval_runtime": 1269.4328, + "eval_samples_per_second": 59.976, + "eval_steps_per_second": 4.998, + "step": 71000 + }, + { + "epoch": 9.59, + "learning_rate": 8.646286933546589e-05, + "loss": 2.2982, + "step": 71010 + }, + { + "epoch": 9.59, + "learning_rate": 8.642443400330585e-05, + "loss": 2.3313, + "step": 71020 + }, + { + "epoch": 9.59, + "learning_rate": 8.638600375866327e-05, + "loss": 2.3166, + "step": 71030 + }, + { + "epoch": 9.59, + "learning_rate": 8.634757860461366e-05, + "loss": 2.3371, + "step": 71040 + }, + { + "epoch": 9.6, + "learning_rate": 8.630915854423178e-05, + "loss": 2.3362, + "step": 71050 + }, + { + "epoch": 9.6, + "learning_rate": 8.627074358059225e-05, + "loss": 2.3279, + "step": 71060 + }, + { + "epoch": 9.6, + "learning_rate": 8.623233371676911e-05, + "loss": 2.3305, + "step": 71070 + }, + { + "epoch": 9.6, + "learning_rate": 8.619392895583598e-05, + "loss": 2.3236, + "step": 71080 + }, + { + "epoch": 9.6, + "learning_rate": 8.615552930086623e-05, + "loss": 2.3074, + "step": 71090 + }, + { + "epoch": 9.6, + "learning_rate": 8.611713475493268e-05, + "loss": 2.3289, + "step": 71100 + }, + { + "epoch": 9.6, + "learning_rate": 8.607874532110769e-05, + "loss": 2.3121, + "step": 71110 + }, + { + "epoch": 9.61, + "learning_rate": 8.604036100246346e-05, + "loss": 2.3154, + "step": 71120 + }, + { + "epoch": 9.61, + "learning_rate": 8.60019818020715e-05, + "loss": 2.3401, + "step": 71130 + }, + { + "epoch": 9.61, + "learning_rate": 8.596360772300316e-05, + "loss": 2.3392, + "step": 71140 + }, + { + "epoch": 9.61, + "learning_rate": 8.592523876832917e-05, + "loss": 2.3236, + "step": 71150 + }, + { + "epoch": 9.61, + "learning_rate": 8.588687494111989e-05, + "loss": 2.2912, + "step": 71160 + }, + { + "epoch": 9.61, + "learning_rate": 8.584851624444542e-05, + "loss": 2.3346, + "step": 71170 + }, + { + "epoch": 9.61, + "learning_rate": 8.581016268137531e-05, + "loss": 2.3203, + "step": 71180 + }, + { + "epoch": 9.61, + "learning_rate": 8.577181425497866e-05, + "loss": 2.3119, + "step": 71190 + }, + { + "epoch": 9.62, + "learning_rate": 8.573347096832433e-05, + "loss": 2.3134, + "step": 71200 + }, + { + "epoch": 9.62, + "learning_rate": 8.569513282448059e-05, + "loss": 2.335, + "step": 71210 + }, + { + "epoch": 9.62, + "learning_rate": 8.565679982651555e-05, + "loss": 2.3435, + "step": 71220 + }, + { + "epoch": 9.62, + "learning_rate": 8.561847197749648e-05, + "loss": 2.3026, + "step": 71230 + }, + { + "epoch": 9.62, + "learning_rate": 8.558014928049068e-05, + "loss": 2.3272, + "step": 71240 + }, + { + "epoch": 9.62, + "learning_rate": 8.554183173856474e-05, + "loss": 2.3089, + "step": 71250 + }, + { + "epoch": 9.62, + "learning_rate": 8.550351935478506e-05, + "loss": 2.3481, + "step": 71260 + }, + { + "epoch": 9.63, + "learning_rate": 8.54652121322175e-05, + "loss": 2.3044, + "step": 71270 + }, + { + "epoch": 9.63, + "learning_rate": 8.54269100739274e-05, + "loss": 2.3281, + "step": 71280 + }, + { + "epoch": 9.63, + "learning_rate": 8.538861318298e-05, + "loss": 2.3636, + "step": 71290 + }, + { + "epoch": 9.63, + "learning_rate": 8.53503214624398e-05, + "loss": 2.3192, + "step": 71300 + }, + { + "epoch": 9.63, + "learning_rate": 8.531203491537104e-05, + "loss": 2.3563, + "step": 71310 + }, + { + "epoch": 9.63, + "learning_rate": 8.527375354483758e-05, + "loss": 2.315, + "step": 71320 + }, + { + "epoch": 9.63, + "learning_rate": 8.523547735390276e-05, + "loss": 2.3046, + "step": 71330 + }, + { + "epoch": 9.64, + "learning_rate": 8.519720634562965e-05, + "loss": 2.3066, + "step": 71340 + }, + { + "epoch": 9.64, + "learning_rate": 8.515894052308075e-05, + "loss": 2.3259, + "step": 71350 + }, + { + "epoch": 9.64, + "learning_rate": 8.512067988931818e-05, + "loss": 2.313, + "step": 71360 + }, + { + "epoch": 9.64, + "learning_rate": 8.508242444740377e-05, + "loss": 2.3035, + "step": 71370 + }, + { + "epoch": 9.64, + "learning_rate": 8.504417420039878e-05, + "loss": 2.3352, + "step": 71380 + }, + { + "epoch": 9.64, + "learning_rate": 8.50059291513641e-05, + "loss": 2.3481, + "step": 71390 + }, + { + "epoch": 9.64, + "learning_rate": 8.496768930336028e-05, + "loss": 2.3325, + "step": 71400 + }, + { + "epoch": 9.64, + "learning_rate": 8.492945465944731e-05, + "loss": 2.3498, + "step": 71410 + }, + { + "epoch": 9.65, + "learning_rate": 8.489122522268501e-05, + "loss": 2.3107, + "step": 71420 + }, + { + "epoch": 9.65, + "learning_rate": 8.48530009961324e-05, + "loss": 2.3329, + "step": 71430 + }, + { + "epoch": 9.65, + "learning_rate": 8.481478198284846e-05, + "loss": 2.3168, + "step": 71440 + }, + { + "epoch": 9.65, + "learning_rate": 8.477656818589149e-05, + "loss": 2.3044, + "step": 71450 + }, + { + "epoch": 9.65, + "learning_rate": 8.473835960831959e-05, + "loss": 2.3306, + "step": 71460 + }, + { + "epoch": 9.65, + "learning_rate": 8.470015625319028e-05, + "loss": 2.3128, + "step": 71470 + }, + { + "epoch": 9.65, + "learning_rate": 8.466195812356066e-05, + "loss": 2.3233, + "step": 71480 + }, + { + "epoch": 9.66, + "learning_rate": 8.462376522248755e-05, + "loss": 2.3202, + "step": 71490 + }, + { + "epoch": 9.66, + "learning_rate": 8.458557755302727e-05, + "loss": 2.3412, + "step": 71500 + }, + { + "epoch": 9.66, + "learning_rate": 8.454739511823559e-05, + "loss": 2.3196, + "step": 71510 + }, + { + "epoch": 9.66, + "learning_rate": 8.450921792116814e-05, + "loss": 2.3198, + "step": 71520 + }, + { + "epoch": 9.66, + "learning_rate": 8.447104596487988e-05, + "loss": 2.3234, + "step": 71530 + }, + { + "epoch": 9.66, + "learning_rate": 8.443287925242553e-05, + "loss": 2.3257, + "step": 71540 + }, + { + "epoch": 9.66, + "learning_rate": 8.439471778685927e-05, + "loss": 2.3132, + "step": 71550 + }, + { + "epoch": 9.66, + "learning_rate": 8.435656157123484e-05, + "loss": 2.3372, + "step": 71560 + }, + { + "epoch": 9.67, + "learning_rate": 8.431841060860574e-05, + "loss": 2.3342, + "step": 71570 + }, + { + "epoch": 9.67, + "learning_rate": 8.428026490202489e-05, + "loss": 2.3362, + "step": 71580 + }, + { + "epoch": 9.67, + "learning_rate": 8.424212445454473e-05, + "loss": 2.3153, + "step": 71590 + }, + { + "epoch": 9.67, + "learning_rate": 8.420398926921752e-05, + "loss": 2.314, + "step": 71600 + }, + { + "epoch": 9.67, + "learning_rate": 8.416585934909485e-05, + "loss": 2.3185, + "step": 71610 + }, + { + "epoch": 9.67, + "learning_rate": 8.412773469722816e-05, + "loss": 2.318, + "step": 71620 + }, + { + "epoch": 9.67, + "learning_rate": 8.408961531666807e-05, + "loss": 2.358, + "step": 71630 + }, + { + "epoch": 9.68, + "learning_rate": 8.40515012104652e-05, + "loss": 2.3115, + "step": 71640 + }, + { + "epoch": 9.68, + "learning_rate": 8.401339238166944e-05, + "loss": 2.335, + "step": 71650 + }, + { + "epoch": 9.68, + "learning_rate": 8.397528883333048e-05, + "loss": 2.3397, + "step": 71660 + }, + { + "epoch": 9.68, + "learning_rate": 8.393719056849746e-05, + "loss": 2.2946, + "step": 71670 + }, + { + "epoch": 9.68, + "learning_rate": 8.3899097590219e-05, + "loss": 2.3319, + "step": 71680 + }, + { + "epoch": 9.68, + "learning_rate": 8.386100990154364e-05, + "loss": 2.3021, + "step": 71690 + }, + { + "epoch": 9.68, + "learning_rate": 8.382292750551912e-05, + "loss": 2.3035, + "step": 71700 + }, + { + "epoch": 9.69, + "learning_rate": 8.378485040519292e-05, + "loss": 2.316, + "step": 71710 + }, + { + "epoch": 9.69, + "learning_rate": 8.374677860361219e-05, + "loss": 2.3185, + "step": 71720 + }, + { + "epoch": 9.69, + "learning_rate": 8.37087121038234e-05, + "loss": 2.329, + "step": 71730 + }, + { + "epoch": 9.69, + "learning_rate": 8.36706509088729e-05, + "loss": 2.3385, + "step": 71740 + }, + { + "epoch": 9.69, + "learning_rate": 8.363259502180645e-05, + "loss": 2.3062, + "step": 71750 + }, + { + "epoch": 9.69, + "learning_rate": 8.359454444566927e-05, + "loss": 2.3465, + "step": 71760 + }, + { + "epoch": 9.69, + "learning_rate": 8.355649918350646e-05, + "loss": 2.3339, + "step": 71770 + }, + { + "epoch": 9.69, + "learning_rate": 8.351845923836241e-05, + "loss": 2.2929, + "step": 71780 + }, + { + "epoch": 9.7, + "learning_rate": 8.348042461328124e-05, + "loss": 2.3191, + "step": 71790 + }, + { + "epoch": 9.7, + "learning_rate": 8.344239531130653e-05, + "loss": 2.3026, + "step": 71800 + }, + { + "epoch": 9.7, + "learning_rate": 8.340437133548157e-05, + "loss": 2.3352, + "step": 71810 + }, + { + "epoch": 9.7, + "learning_rate": 8.336635268884926e-05, + "loss": 2.3239, + "step": 71820 + }, + { + "epoch": 9.7, + "learning_rate": 8.332833937445177e-05, + "loss": 2.3088, + "step": 71830 + }, + { + "epoch": 9.7, + "learning_rate": 8.329033139533118e-05, + "loss": 2.3457, + "step": 71840 + }, + { + "epoch": 9.7, + "learning_rate": 8.32523287545289e-05, + "loss": 2.3309, + "step": 71850 + }, + { + "epoch": 9.71, + "learning_rate": 8.321433145508616e-05, + "loss": 2.3408, + "step": 71860 + }, + { + "epoch": 9.71, + "learning_rate": 8.317633950004355e-05, + "loss": 2.3164, + "step": 71870 + }, + { + "epoch": 9.71, + "learning_rate": 8.313835289244124e-05, + "loss": 2.3096, + "step": 71880 + }, + { + "epoch": 9.71, + "learning_rate": 8.310037163531919e-05, + "loss": 2.3449, + "step": 71890 + }, + { + "epoch": 9.71, + "learning_rate": 8.306239573171667e-05, + "loss": 2.2952, + "step": 71900 + }, + { + "epoch": 9.71, + "learning_rate": 8.302442518467261e-05, + "loss": 2.328, + "step": 71910 + }, + { + "epoch": 9.71, + "learning_rate": 8.298645999722565e-05, + "loss": 2.3518, + "step": 71920 + }, + { + "epoch": 9.71, + "learning_rate": 8.294850017241376e-05, + "loss": 2.3238, + "step": 71930 + }, + { + "epoch": 9.72, + "learning_rate": 8.291054571327469e-05, + "loss": 2.34, + "step": 71940 + }, + { + "epoch": 9.72, + "learning_rate": 8.287259662284568e-05, + "loss": 2.3127, + "step": 71950 + }, + { + "epoch": 9.72, + "learning_rate": 8.283465290416342e-05, + "loss": 2.3194, + "step": 71960 + }, + { + "epoch": 9.72, + "learning_rate": 8.279671456026446e-05, + "loss": 2.3169, + "step": 71970 + }, + { + "epoch": 9.72, + "learning_rate": 8.275878159418463e-05, + "loss": 2.3029, + "step": 71980 + }, + { + "epoch": 9.72, + "learning_rate": 8.272085400895949e-05, + "loss": 2.3107, + "step": 71990 + }, + { + "epoch": 9.72, + "learning_rate": 8.268293180762404e-05, + "loss": 2.3384, + "step": 72000 + }, + { + "epoch": 9.72, + "eval_loss": 2.446279525756836, + "eval_runtime": 1269.5485, + "eval_samples_per_second": 59.97, + "eval_steps_per_second": 4.998, + "step": 72000 + }, + { + "epoch": 9.73, + "learning_rate": 8.264501499321299e-05, + "loss": 2.3068, + "step": 72010 + }, + { + "epoch": 9.73, + "learning_rate": 8.260710356876071e-05, + "loss": 2.3127, + "step": 72020 + }, + { + "epoch": 9.73, + "learning_rate": 8.256919753730074e-05, + "loss": 2.3312, + "step": 72030 + }, + { + "epoch": 9.73, + "learning_rate": 8.253129690186659e-05, + "loss": 2.3133, + "step": 72040 + }, + { + "epoch": 9.73, + "learning_rate": 8.249340166549112e-05, + "loss": 2.3072, + "step": 72050 + }, + { + "epoch": 9.73, + "learning_rate": 8.245551183120692e-05, + "loss": 2.3097, + "step": 72060 + }, + { + "epoch": 9.73, + "learning_rate": 8.241762740204599e-05, + "loss": 2.3782, + "step": 72070 + }, + { + "epoch": 9.74, + "learning_rate": 8.237974838103991e-05, + "loss": 2.3228, + "step": 72080 + }, + { + "epoch": 9.74, + "learning_rate": 8.234187477121998e-05, + "loss": 2.3365, + "step": 72090 + }, + { + "epoch": 9.74, + "learning_rate": 8.230400657561693e-05, + "loss": 2.3165, + "step": 72100 + }, + { + "epoch": 9.74, + "learning_rate": 8.226614379726103e-05, + "loss": 2.34, + "step": 72110 + }, + { + "epoch": 9.74, + "learning_rate": 8.222828643918231e-05, + "loss": 2.3741, + "step": 72120 + }, + { + "epoch": 9.74, + "learning_rate": 8.219043450441008e-05, + "loss": 2.3117, + "step": 72130 + }, + { + "epoch": 9.74, + "learning_rate": 8.215258799597353e-05, + "loss": 2.3264, + "step": 72140 + }, + { + "epoch": 9.74, + "learning_rate": 8.211474691690115e-05, + "loss": 2.31, + "step": 72150 + }, + { + "epoch": 9.75, + "learning_rate": 8.207691127022108e-05, + "loss": 2.3216, + "step": 72160 + }, + { + "epoch": 9.75, + "learning_rate": 8.203908105896117e-05, + "loss": 2.3223, + "step": 72170 + }, + { + "epoch": 9.75, + "learning_rate": 8.200125628614864e-05, + "loss": 2.3061, + "step": 72180 + }, + { + "epoch": 9.75, + "learning_rate": 8.196343695481035e-05, + "loss": 2.317, + "step": 72190 + }, + { + "epoch": 9.75, + "learning_rate": 8.192562306797265e-05, + "loss": 2.3211, + "step": 72200 + }, + { + "epoch": 9.75, + "learning_rate": 8.188781462866159e-05, + "loss": 2.3296, + "step": 72210 + }, + { + "epoch": 9.75, + "learning_rate": 8.185001163990287e-05, + "loss": 2.3312, + "step": 72220 + }, + { + "epoch": 9.76, + "learning_rate": 8.181221410472135e-05, + "loss": 2.3524, + "step": 72230 + }, + { + "epoch": 9.76, + "learning_rate": 8.177442202614184e-05, + "loss": 2.3249, + "step": 72240 + }, + { + "epoch": 9.76, + "learning_rate": 8.173663540718854e-05, + "loss": 2.3314, + "step": 72250 + }, + { + "epoch": 9.76, + "learning_rate": 8.169885425088534e-05, + "loss": 2.3096, + "step": 72260 + }, + { + "epoch": 9.76, + "learning_rate": 8.166107856025553e-05, + "loss": 2.3418, + "step": 72270 + }, + { + "epoch": 9.76, + "learning_rate": 8.162330833832198e-05, + "loss": 2.3204, + "step": 72280 + }, + { + "epoch": 9.76, + "learning_rate": 8.158554358810736e-05, + "loss": 2.3556, + "step": 72290 + }, + { + "epoch": 9.76, + "learning_rate": 8.15477843126336e-05, + "loss": 2.3055, + "step": 72300 + }, + { + "epoch": 9.77, + "learning_rate": 8.151003051492226e-05, + "loss": 2.3065, + "step": 72310 + }, + { + "epoch": 9.77, + "learning_rate": 8.14722821979947e-05, + "loss": 2.3045, + "step": 72320 + }, + { + "epoch": 9.77, + "learning_rate": 8.143453936487146e-05, + "loss": 2.3292, + "step": 72330 + }, + { + "epoch": 9.77, + "learning_rate": 8.139680201857302e-05, + "loss": 2.3238, + "step": 72340 + }, + { + "epoch": 9.77, + "learning_rate": 8.135907016211913e-05, + "loss": 2.3392, + "step": 72350 + }, + { + "epoch": 9.77, + "learning_rate": 8.132134379852921e-05, + "loss": 2.3239, + "step": 72360 + }, + { + "epoch": 9.77, + "learning_rate": 8.128362293082231e-05, + "loss": 2.3551, + "step": 72370 + }, + { + "epoch": 9.78, + "learning_rate": 8.124590756201697e-05, + "loss": 2.3554, + "step": 72380 + }, + { + "epoch": 9.78, + "learning_rate": 8.120819769513123e-05, + "loss": 2.339, + "step": 72390 + }, + { + "epoch": 9.78, + "learning_rate": 8.117049333318273e-05, + "loss": 2.343, + "step": 72400 + }, + { + "epoch": 9.78, + "learning_rate": 8.113279447918877e-05, + "loss": 2.34, + "step": 72410 + }, + { + "epoch": 9.78, + "learning_rate": 8.10951011361662e-05, + "loss": 2.3268, + "step": 72420 + }, + { + "epoch": 9.78, + "learning_rate": 8.105741330713115e-05, + "loss": 2.3213, + "step": 72430 + }, + { + "epoch": 9.78, + "learning_rate": 8.101973099509969e-05, + "loss": 2.3621, + "step": 72440 + }, + { + "epoch": 9.79, + "learning_rate": 8.098205420308716e-05, + "loss": 2.3484, + "step": 72450 + }, + { + "epoch": 9.79, + "learning_rate": 8.094438293410871e-05, + "loss": 2.3508, + "step": 72460 + }, + { + "epoch": 9.79, + "learning_rate": 8.090671719117884e-05, + "loss": 2.314, + "step": 72470 + }, + { + "epoch": 9.79, + "learning_rate": 8.086905697731164e-05, + "loss": 2.3237, + "step": 72480 + }, + { + "epoch": 9.79, + "learning_rate": 8.083140229552088e-05, + "loss": 2.3521, + "step": 72490 + }, + { + "epoch": 9.79, + "learning_rate": 8.079375314881978e-05, + "loss": 2.345, + "step": 72500 + }, + { + "epoch": 9.79, + "learning_rate": 8.075610954022109e-05, + "loss": 2.336, + "step": 72510 + }, + { + "epoch": 9.79, + "learning_rate": 8.071847147273726e-05, + "loss": 2.3152, + "step": 72520 + }, + { + "epoch": 9.8, + "learning_rate": 8.06808389493801e-05, + "loss": 2.3125, + "step": 72530 + }, + { + "epoch": 9.8, + "learning_rate": 8.06432119731612e-05, + "loss": 2.3431, + "step": 72540 + }, + { + "epoch": 9.8, + "learning_rate": 8.060559054709154e-05, + "loss": 2.3159, + "step": 72550 + }, + { + "epoch": 9.8, + "learning_rate": 8.056797467418164e-05, + "loss": 2.3364, + "step": 72560 + }, + { + "epoch": 9.8, + "learning_rate": 8.053036435744176e-05, + "loss": 2.3274, + "step": 72570 + }, + { + "epoch": 9.8, + "learning_rate": 8.049275959988153e-05, + "loss": 2.3399, + "step": 72580 + }, + { + "epoch": 9.8, + "learning_rate": 8.045516040451023e-05, + "loss": 2.3344, + "step": 72590 + }, + { + "epoch": 9.81, + "learning_rate": 8.041756677433657e-05, + "loss": 2.3327, + "step": 72600 + }, + { + "epoch": 9.81, + "learning_rate": 8.037997871236899e-05, + "loss": 2.3487, + "step": 72610 + }, + { + "epoch": 9.81, + "learning_rate": 8.034239622161552e-05, + "loss": 2.3181, + "step": 72620 + }, + { + "epoch": 9.81, + "learning_rate": 8.030481930508338e-05, + "loss": 2.3124, + "step": 72630 + }, + { + "epoch": 9.81, + "learning_rate": 8.02672479657798e-05, + "loss": 2.3469, + "step": 72640 + }, + { + "epoch": 9.81, + "learning_rate": 8.022968220671123e-05, + "loss": 2.3582, + "step": 72650 + }, + { + "epoch": 9.81, + "learning_rate": 8.01921220308839e-05, + "loss": 2.3381, + "step": 72660 + }, + { + "epoch": 9.81, + "learning_rate": 8.015456744130344e-05, + "loss": 2.3173, + "step": 72670 + }, + { + "epoch": 9.82, + "learning_rate": 8.011701844097504e-05, + "loss": 2.3267, + "step": 72680 + }, + { + "epoch": 9.82, + "learning_rate": 8.00794750329036e-05, + "loss": 2.3173, + "step": 72690 + }, + { + "epoch": 9.82, + "learning_rate": 8.00419372200934e-05, + "loss": 2.3142, + "step": 72700 + }, + { + "epoch": 9.82, + "learning_rate": 8.00044050055483e-05, + "loss": 2.3247, + "step": 72710 + }, + { + "epoch": 9.82, + "learning_rate": 7.996687839227181e-05, + "loss": 2.3183, + "step": 72720 + }, + { + "epoch": 9.82, + "learning_rate": 7.992935738326689e-05, + "loss": 2.3312, + "step": 72730 + }, + { + "epoch": 9.82, + "learning_rate": 7.989184198153613e-05, + "loss": 2.3302, + "step": 72740 + }, + { + "epoch": 9.83, + "learning_rate": 7.98543321900816e-05, + "loss": 2.296, + "step": 72750 + }, + { + "epoch": 9.83, + "learning_rate": 7.981682801190493e-05, + "loss": 2.3502, + "step": 72760 + }, + { + "epoch": 9.83, + "learning_rate": 7.977932945000738e-05, + "loss": 2.2959, + "step": 72770 + }, + { + "epoch": 9.83, + "learning_rate": 7.974183650738968e-05, + "loss": 2.3102, + "step": 72780 + }, + { + "epoch": 9.83, + "learning_rate": 7.970434918705214e-05, + "loss": 2.3401, + "step": 72790 + }, + { + "epoch": 9.83, + "learning_rate": 7.966686749199452e-05, + "loss": 2.3505, + "step": 72800 + }, + { + "epoch": 9.83, + "learning_rate": 7.962939142521632e-05, + "loss": 2.3316, + "step": 72810 + }, + { + "epoch": 9.84, + "learning_rate": 7.959192098971658e-05, + "loss": 2.3281, + "step": 72820 + }, + { + "epoch": 9.84, + "learning_rate": 7.95544561884936e-05, + "loss": 2.3243, + "step": 72830 + }, + { + "epoch": 9.84, + "learning_rate": 7.951699702454555e-05, + "loss": 2.3329, + "step": 72840 + }, + { + "epoch": 9.84, + "learning_rate": 7.947954350087e-05, + "loss": 2.3442, + "step": 72850 + }, + { + "epoch": 9.84, + "learning_rate": 7.944209562046416e-05, + "loss": 2.351, + "step": 72860 + }, + { + "epoch": 9.84, + "learning_rate": 7.940465338632466e-05, + "loss": 2.3243, + "step": 72870 + }, + { + "epoch": 9.84, + "learning_rate": 7.936721680144771e-05, + "loss": 2.3261, + "step": 72880 + }, + { + "epoch": 9.84, + "learning_rate": 7.932978586882923e-05, + "loss": 2.3444, + "step": 72890 + }, + { + "epoch": 9.85, + "learning_rate": 7.929236059146448e-05, + "loss": 2.3373, + "step": 72900 + }, + { + "epoch": 9.85, + "learning_rate": 7.925494097234831e-05, + "loss": 2.335, + "step": 72910 + }, + { + "epoch": 9.85, + "learning_rate": 7.921752701447526e-05, + "loss": 2.3288, + "step": 72920 + }, + { + "epoch": 9.85, + "learning_rate": 7.918011872083919e-05, + "loss": 2.3072, + "step": 72930 + }, + { + "epoch": 9.85, + "learning_rate": 7.914271609443381e-05, + "loss": 2.3305, + "step": 72940 + }, + { + "epoch": 9.85, + "learning_rate": 7.910531913825198e-05, + "loss": 2.3355, + "step": 72950 + }, + { + "epoch": 9.85, + "learning_rate": 7.906792785528643e-05, + "loss": 2.3132, + "step": 72960 + }, + { + "epoch": 9.86, + "learning_rate": 7.903054224852938e-05, + "loss": 2.3183, + "step": 72970 + }, + { + "epoch": 9.86, + "learning_rate": 7.899316232097247e-05, + "loss": 2.3356, + "step": 72980 + }, + { + "epoch": 9.86, + "learning_rate": 7.8955788075607e-05, + "loss": 2.2969, + "step": 72990 + }, + { + "epoch": 9.86, + "learning_rate": 7.89184195154237e-05, + "loss": 2.3195, + "step": 73000 + }, + { + "epoch": 9.86, + "eval_loss": 2.442723274230957, + "eval_runtime": 1269.4894, + "eval_samples_per_second": 59.973, + "eval_steps_per_second": 4.998, + "step": 73000 + }, + { + "epoch": 9.86, + "learning_rate": 7.888105664341296e-05, + "loss": 2.3405, + "step": 73010 + }, + { + "epoch": 9.86, + "learning_rate": 7.884369946256483e-05, + "loss": 2.3412, + "step": 73020 + }, + { + "epoch": 9.86, + "learning_rate": 7.880634797586846e-05, + "loss": 2.3368, + "step": 73030 + }, + { + "epoch": 9.86, + "learning_rate": 7.876900218631305e-05, + "loss": 2.3505, + "step": 73040 + }, + { + "epoch": 9.87, + "learning_rate": 7.8731662096887e-05, + "loss": 2.34, + "step": 73050 + }, + { + "epoch": 9.87, + "learning_rate": 7.869432771057852e-05, + "loss": 2.3367, + "step": 73060 + }, + { + "epoch": 9.87, + "learning_rate": 7.865699903037514e-05, + "loss": 2.3125, + "step": 73070 + }, + { + "epoch": 9.87, + "learning_rate": 7.861967605926396e-05, + "loss": 2.3156, + "step": 73080 + }, + { + "epoch": 9.87, + "learning_rate": 7.858235880023182e-05, + "loss": 2.3428, + "step": 73090 + }, + { + "epoch": 9.87, + "learning_rate": 7.85450472562649e-05, + "loss": 2.3348, + "step": 73100 + }, + { + "epoch": 9.87, + "learning_rate": 7.85077414303489e-05, + "loss": 2.3366, + "step": 73110 + }, + { + "epoch": 9.88, + "learning_rate": 7.847044132546932e-05, + "loss": 2.3302, + "step": 73120 + }, + { + "epoch": 9.88, + "learning_rate": 7.843314694461087e-05, + "loss": 2.3401, + "step": 73130 + }, + { + "epoch": 9.88, + "learning_rate": 7.839585829075815e-05, + "loss": 2.3344, + "step": 73140 + }, + { + "epoch": 9.88, + "learning_rate": 7.835857536689489e-05, + "loss": 2.3426, + "step": 73150 + }, + { + "epoch": 9.88, + "learning_rate": 7.83212981760047e-05, + "loss": 2.3094, + "step": 73160 + }, + { + "epoch": 9.88, + "learning_rate": 7.828402672107068e-05, + "loss": 2.3044, + "step": 73170 + }, + { + "epoch": 9.88, + "learning_rate": 7.824676100507536e-05, + "loss": 2.3035, + "step": 73180 + }, + { + "epoch": 9.88, + "learning_rate": 7.820950103100084e-05, + "loss": 2.3315, + "step": 73190 + }, + { + "epoch": 9.89, + "learning_rate": 7.817224680182873e-05, + "loss": 2.3362, + "step": 73200 + }, + { + "epoch": 9.89, + "learning_rate": 7.813499832054031e-05, + "loss": 2.3506, + "step": 73210 + }, + { + "epoch": 9.89, + "learning_rate": 7.80977555901164e-05, + "loss": 2.2887, + "step": 73220 + }, + { + "epoch": 9.89, + "learning_rate": 7.806051861353707e-05, + "loss": 2.3251, + "step": 73230 + }, + { + "epoch": 9.89, + "learning_rate": 7.802328739378232e-05, + "loss": 2.3021, + "step": 73240 + }, + { + "epoch": 9.89, + "learning_rate": 7.798606193383136e-05, + "loss": 2.3161, + "step": 73250 + }, + { + "epoch": 9.89, + "learning_rate": 7.794884223666327e-05, + "loss": 2.3154, + "step": 73260 + }, + { + "epoch": 9.9, + "learning_rate": 7.791162830525637e-05, + "loss": 2.3101, + "step": 73270 + }, + { + "epoch": 9.9, + "learning_rate": 7.78744201425886e-05, + "loss": 2.3441, + "step": 73280 + }, + { + "epoch": 9.9, + "learning_rate": 7.78372177516376e-05, + "loss": 2.3122, + "step": 73290 + }, + { + "epoch": 9.9, + "learning_rate": 7.780002113538035e-05, + "loss": 2.3155, + "step": 73300 + }, + { + "epoch": 9.9, + "learning_rate": 7.776283029679339e-05, + "loss": 2.2871, + "step": 73310 + }, + { + "epoch": 9.9, + "learning_rate": 7.772564523885294e-05, + "loss": 2.3248, + "step": 73320 + }, + { + "epoch": 9.9, + "learning_rate": 7.76884659645346e-05, + "loss": 2.3508, + "step": 73330 + }, + { + "epoch": 9.91, + "learning_rate": 7.765129247681372e-05, + "loss": 2.3322, + "step": 73340 + }, + { + "epoch": 9.91, + "learning_rate": 7.761412477866477e-05, + "loss": 2.3289, + "step": 73350 + }, + { + "epoch": 9.91, + "learning_rate": 7.75769628730622e-05, + "loss": 2.3293, + "step": 73360 + }, + { + "epoch": 9.91, + "learning_rate": 7.753980676297986e-05, + "loss": 2.3257, + "step": 73370 + }, + { + "epoch": 9.91, + "learning_rate": 7.750265645139104e-05, + "loss": 2.3062, + "step": 73380 + }, + { + "epoch": 9.91, + "learning_rate": 7.746551194126864e-05, + "loss": 2.2948, + "step": 73390 + }, + { + "epoch": 9.91, + "learning_rate": 7.742837323558498e-05, + "loss": 2.3186, + "step": 73400 + }, + { + "epoch": 9.91, + "learning_rate": 7.739124033731214e-05, + "loss": 2.3132, + "step": 73410 + }, + { + "epoch": 9.92, + "learning_rate": 7.735411324942167e-05, + "loss": 2.2895, + "step": 73420 + }, + { + "epoch": 9.92, + "learning_rate": 7.732070384065256e-05, + "loss": 2.3114, + "step": 73430 + }, + { + "epoch": 9.92, + "learning_rate": 7.72835878006731e-05, + "loss": 2.3158, + "step": 73440 + }, + { + "epoch": 9.92, + "learning_rate": 7.724647757969068e-05, + "loss": 2.3355, + "step": 73450 + }, + { + "epoch": 9.92, + "learning_rate": 7.720937318067489e-05, + "loss": 2.3104, + "step": 73460 + }, + { + "epoch": 9.92, + "learning_rate": 7.71722746065949e-05, + "loss": 2.3438, + "step": 73470 + }, + { + "epoch": 9.92, + "learning_rate": 7.713518186041959e-05, + "loss": 2.3307, + "step": 73480 + }, + { + "epoch": 9.93, + "learning_rate": 7.709809494511716e-05, + "loss": 2.3443, + "step": 73490 + }, + { + "epoch": 9.93, + "learning_rate": 7.706101386365537e-05, + "loss": 2.331, + "step": 73500 + }, + { + "epoch": 9.93, + "learning_rate": 7.702393861900165e-05, + "loss": 2.326, + "step": 73510 + }, + { + "epoch": 9.93, + "learning_rate": 7.698686921412282e-05, + "loss": 2.3392, + "step": 73520 + }, + { + "epoch": 9.93, + "learning_rate": 7.694980565198543e-05, + "loss": 2.271, + "step": 73530 + }, + { + "epoch": 9.93, + "learning_rate": 7.691274793555519e-05, + "loss": 2.3062, + "step": 73540 + }, + { + "epoch": 9.93, + "learning_rate": 7.687569606779777e-05, + "loss": 2.3192, + "step": 73550 + }, + { + "epoch": 9.93, + "learning_rate": 7.683865005167803e-05, + "loss": 2.3228, + "step": 73560 + }, + { + "epoch": 9.94, + "learning_rate": 7.680160989016067e-05, + "loss": 2.3511, + "step": 73570 + }, + { + "epoch": 9.94, + "learning_rate": 7.676457558620965e-05, + "loss": 2.3147, + "step": 73580 + }, + { + "epoch": 9.94, + "learning_rate": 7.672754714278855e-05, + "loss": 2.3201, + "step": 73590 + }, + { + "epoch": 9.94, + "learning_rate": 7.669052456286062e-05, + "loss": 2.3203, + "step": 73600 + }, + { + "epoch": 9.94, + "learning_rate": 7.665350784938845e-05, + "loss": 2.3201, + "step": 73610 + }, + { + "epoch": 9.94, + "learning_rate": 7.661649700533417e-05, + "loss": 2.3442, + "step": 73620 + }, + { + "epoch": 9.94, + "learning_rate": 7.657949203365967e-05, + "loss": 2.3143, + "step": 73630 + }, + { + "epoch": 9.95, + "learning_rate": 7.654249293732601e-05, + "loss": 2.3344, + "step": 73640 + }, + { + "epoch": 9.95, + "learning_rate": 7.650549971929414e-05, + "loss": 2.3371, + "step": 73650 + }, + { + "epoch": 9.95, + "learning_rate": 7.646851238252432e-05, + "loss": 2.2976, + "step": 73660 + }, + { + "epoch": 9.95, + "learning_rate": 7.643153092997632e-05, + "loss": 2.3583, + "step": 73670 + }, + { + "epoch": 9.95, + "learning_rate": 7.639455536460963e-05, + "loss": 2.3362, + "step": 73680 + }, + { + "epoch": 9.95, + "learning_rate": 7.635758568938309e-05, + "loss": 2.3178, + "step": 73690 + }, + { + "epoch": 9.95, + "learning_rate": 7.632062190725508e-05, + "loss": 2.3236, + "step": 73700 + }, + { + "epoch": 9.96, + "learning_rate": 7.628366402118367e-05, + "loss": 2.335, + "step": 73710 + }, + { + "epoch": 9.96, + "learning_rate": 7.624671203412623e-05, + "loss": 2.3298, + "step": 73720 + }, + { + "epoch": 9.96, + "learning_rate": 7.620976594903997e-05, + "loss": 2.3284, + "step": 73730 + }, + { + "epoch": 9.96, + "learning_rate": 7.617282576888115e-05, + "loss": 2.3405, + "step": 73740 + }, + { + "epoch": 9.96, + "learning_rate": 7.613589149660607e-05, + "loss": 2.3345, + "step": 73750 + }, + { + "epoch": 9.96, + "learning_rate": 7.609896313517017e-05, + "loss": 2.342, + "step": 73760 + }, + { + "epoch": 9.96, + "learning_rate": 7.606204068752874e-05, + "loss": 2.3303, + "step": 73770 + }, + { + "epoch": 9.96, + "learning_rate": 7.602512415663632e-05, + "loss": 2.3287, + "step": 73780 + }, + { + "epoch": 9.97, + "learning_rate": 7.598821354544702e-05, + "loss": 2.3382, + "step": 73790 + }, + { + "epoch": 9.97, + "learning_rate": 7.595130885691474e-05, + "loss": 2.3479, + "step": 73800 + }, + { + "epoch": 9.97, + "learning_rate": 7.591441009399258e-05, + "loss": 2.339, + "step": 73810 + }, + { + "epoch": 9.97, + "learning_rate": 7.587751725963326e-05, + "loss": 2.3257, + "step": 73820 + }, + { + "epoch": 9.97, + "learning_rate": 7.584063035678917e-05, + "loss": 2.3039, + "step": 73830 + }, + { + "epoch": 9.97, + "learning_rate": 7.5803749388412e-05, + "loss": 2.3207, + "step": 73840 + }, + { + "epoch": 9.97, + "learning_rate": 7.576687435745323e-05, + "loss": 2.3073, + "step": 73850 + }, + { + "epoch": 9.98, + "learning_rate": 7.573000526686359e-05, + "loss": 2.3336, + "step": 73860 + }, + { + "epoch": 9.98, + "learning_rate": 7.569314211959346e-05, + "loss": 2.2973, + "step": 73870 + }, + { + "epoch": 9.98, + "learning_rate": 7.565628491859286e-05, + "loss": 2.3278, + "step": 73880 + }, + { + "epoch": 9.98, + "learning_rate": 7.561943366681114e-05, + "loss": 2.3097, + "step": 73890 + }, + { + "epoch": 9.98, + "learning_rate": 7.55825883671972e-05, + "loss": 2.3274, + "step": 73900 + }, + { + "epoch": 9.98, + "learning_rate": 7.554574902269967e-05, + "loss": 2.3112, + "step": 73910 + }, + { + "epoch": 9.98, + "learning_rate": 7.550891563626636e-05, + "loss": 2.3235, + "step": 73920 + }, + { + "epoch": 9.98, + "learning_rate": 7.547208821084501e-05, + "loss": 2.3231, + "step": 73930 + }, + { + "epoch": 9.99, + "learning_rate": 7.543526674938246e-05, + "loss": 2.3159, + "step": 73940 + }, + { + "epoch": 9.99, + "learning_rate": 7.539845125482543e-05, + "loss": 2.2928, + "step": 73950 + }, + { + "epoch": 9.99, + "learning_rate": 7.536164173011988e-05, + "loss": 2.3303, + "step": 73960 + }, + { + "epoch": 9.99, + "learning_rate": 7.532483817821156e-05, + "loss": 2.3305, + "step": 73970 + }, + { + "epoch": 9.99, + "learning_rate": 7.528804060204554e-05, + "loss": 2.3057, + "step": 73980 + }, + { + "epoch": 9.99, + "learning_rate": 7.525124900456641e-05, + "loss": 2.3563, + "step": 73990 + }, + { + "epoch": 9.99, + "learning_rate": 7.52144633887185e-05, + "loss": 2.3183, + "step": 74000 + }, + { + "epoch": 9.99, + "eval_loss": 2.4382996559143066, + "eval_runtime": 1269.0959, + "eval_samples_per_second": 59.992, + "eval_steps_per_second": 5.0, + "step": 74000 + }, + { + "epoch": 10.0, + "learning_rate": 7.517768375744541e-05, + "loss": 2.3382, + "step": 74010 + }, + { + "epoch": 10.0, + "learning_rate": 7.514091011369036e-05, + "loss": 2.3106, + "step": 74020 + }, + { + "epoch": 10.0, + "learning_rate": 7.510414246039616e-05, + "loss": 2.3396, + "step": 74030 + }, + { + "epoch": 10.0, + "learning_rate": 7.506738080050496e-05, + "loss": 2.3195, + "step": 74040 + }, + { + "epoch": 10.0, + "learning_rate": 7.503062513695868e-05, + "loss": 2.2994, + "step": 74050 + }, + { + "epoch": 10.0, + "learning_rate": 7.499387547269857e-05, + "loss": 2.2542, + "step": 74060 + }, + { + "epoch": 10.0, + "learning_rate": 7.495713181066538e-05, + "loss": 2.285, + "step": 74070 + }, + { + "epoch": 10.01, + "learning_rate": 7.492039415379957e-05, + "loss": 2.2853, + "step": 74080 + }, + { + "epoch": 10.01, + "learning_rate": 7.488366250504095e-05, + "loss": 2.274, + "step": 74090 + }, + { + "epoch": 10.01, + "learning_rate": 7.484693686732887e-05, + "loss": 2.3008, + "step": 74100 + }, + { + "epoch": 10.01, + "learning_rate": 7.481021724360231e-05, + "loss": 2.2662, + "step": 74110 + }, + { + "epoch": 10.01, + "learning_rate": 7.477350363679957e-05, + "loss": 2.2901, + "step": 74120 + }, + { + "epoch": 10.01, + "learning_rate": 7.47367960498588e-05, + "loss": 2.2318, + "step": 74130 + }, + { + "epoch": 10.01, + "learning_rate": 7.47000944857172e-05, + "loss": 2.2923, + "step": 74140 + }, + { + "epoch": 10.01, + "learning_rate": 7.466339894731193e-05, + "loss": 2.2659, + "step": 74150 + }, + { + "epoch": 10.02, + "learning_rate": 7.462670943757936e-05, + "loss": 2.2859, + "step": 74160 + }, + { + "epoch": 10.02, + "learning_rate": 7.45900259594556e-05, + "loss": 2.2853, + "step": 74170 + }, + { + "epoch": 10.02, + "learning_rate": 7.455334851587615e-05, + "loss": 2.284, + "step": 74180 + }, + { + "epoch": 10.02, + "learning_rate": 7.4516677109776e-05, + "loss": 2.2795, + "step": 74190 + }, + { + "epoch": 10.02, + "learning_rate": 7.44800117440898e-05, + "loss": 2.2537, + "step": 74200 + }, + { + "epoch": 10.02, + "learning_rate": 7.444335242175158e-05, + "loss": 2.2663, + "step": 74210 + }, + { + "epoch": 10.02, + "learning_rate": 7.440669914569488e-05, + "loss": 2.2674, + "step": 74220 + }, + { + "epoch": 10.03, + "learning_rate": 7.437005191885293e-05, + "loss": 2.2734, + "step": 74230 + }, + { + "epoch": 10.03, + "learning_rate": 7.433341074415822e-05, + "loss": 2.2975, + "step": 74240 + }, + { + "epoch": 10.03, + "learning_rate": 7.429677562454305e-05, + "loss": 2.2357, + "step": 74250 + }, + { + "epoch": 10.03, + "learning_rate": 7.426014656293899e-05, + "loss": 2.2686, + "step": 74260 + }, + { + "epoch": 10.03, + "learning_rate": 7.422352356227715e-05, + "loss": 2.2922, + "step": 74270 + }, + { + "epoch": 10.03, + "learning_rate": 7.418690662548836e-05, + "loss": 2.3174, + "step": 74280 + }, + { + "epoch": 10.03, + "learning_rate": 7.415029575550274e-05, + "loss": 2.2796, + "step": 74290 + }, + { + "epoch": 10.03, + "learning_rate": 7.411369095524997e-05, + "loss": 2.26, + "step": 74300 + }, + { + "epoch": 10.04, + "learning_rate": 7.407709222765936e-05, + "loss": 2.2817, + "step": 74310 + }, + { + "epoch": 10.04, + "learning_rate": 7.404049957565961e-05, + "loss": 2.2818, + "step": 74320 + }, + { + "epoch": 10.04, + "learning_rate": 7.400391300217908e-05, + "loss": 2.2876, + "step": 74330 + }, + { + "epoch": 10.04, + "learning_rate": 7.396733251014533e-05, + "loss": 2.2688, + "step": 74340 + }, + { + "epoch": 10.04, + "learning_rate": 7.393075810248586e-05, + "loss": 2.2792, + "step": 74350 + }, + { + "epoch": 10.04, + "learning_rate": 7.389418978212732e-05, + "loss": 2.2799, + "step": 74360 + }, + { + "epoch": 10.04, + "learning_rate": 7.385762755199613e-05, + "loss": 2.2871, + "step": 74370 + }, + { + "epoch": 10.05, + "learning_rate": 7.38210714150181e-05, + "loss": 2.2777, + "step": 74380 + }, + { + "epoch": 10.05, + "learning_rate": 7.378452137411846e-05, + "loss": 2.2812, + "step": 74390 + }, + { + "epoch": 10.05, + "learning_rate": 7.374797743222221e-05, + "loss": 2.257, + "step": 74400 + }, + { + "epoch": 10.05, + "learning_rate": 7.371143959225363e-05, + "loss": 2.2838, + "step": 74410 + }, + { + "epoch": 10.05, + "learning_rate": 7.367490785713654e-05, + "loss": 2.278, + "step": 74420 + }, + { + "epoch": 10.05, + "learning_rate": 7.363838222979445e-05, + "loss": 2.2827, + "step": 74430 + }, + { + "epoch": 10.05, + "learning_rate": 7.360186271315013e-05, + "loss": 2.2847, + "step": 74440 + }, + { + "epoch": 10.06, + "learning_rate": 7.356534931012613e-05, + "loss": 2.2956, + "step": 74450 + }, + { + "epoch": 10.06, + "learning_rate": 7.352884202364428e-05, + "loss": 2.2798, + "step": 74460 + }, + { + "epoch": 10.06, + "learning_rate": 7.349234085662596e-05, + "loss": 2.2965, + "step": 74470 + }, + { + "epoch": 10.06, + "learning_rate": 7.345584581199223e-05, + "loss": 2.2983, + "step": 74480 + }, + { + "epoch": 10.06, + "learning_rate": 7.341935689266346e-05, + "loss": 2.2888, + "step": 74490 + }, + { + "epoch": 10.06, + "learning_rate": 7.338287410155956e-05, + "loss": 2.2695, + "step": 74500 + }, + { + "epoch": 10.06, + "learning_rate": 7.334639744160015e-05, + "loss": 2.2846, + "step": 74510 + }, + { + "epoch": 10.06, + "learning_rate": 7.330992691570405e-05, + "loss": 2.2776, + "step": 74520 + }, + { + "epoch": 10.07, + "learning_rate": 7.327346252678993e-05, + "loss": 2.2856, + "step": 74530 + }, + { + "epoch": 10.07, + "learning_rate": 7.323700427777555e-05, + "loss": 2.2801, + "step": 74540 + }, + { + "epoch": 10.07, + "learning_rate": 7.32005521715786e-05, + "loss": 2.3014, + "step": 74550 + }, + { + "epoch": 10.07, + "learning_rate": 7.316410621111597e-05, + "loss": 2.2945, + "step": 74560 + }, + { + "epoch": 10.07, + "learning_rate": 7.31276663993043e-05, + "loss": 2.2861, + "step": 74570 + }, + { + "epoch": 10.07, + "learning_rate": 7.309123273905955e-05, + "loss": 2.2648, + "step": 74580 + }, + { + "epoch": 10.07, + "learning_rate": 7.30548052332972e-05, + "loss": 2.2642, + "step": 74590 + }, + { + "epoch": 10.08, + "learning_rate": 7.301838388493244e-05, + "loss": 2.2877, + "step": 74600 + }, + { + "epoch": 10.08, + "learning_rate": 7.298196869687973e-05, + "loss": 2.2649, + "step": 74610 + }, + { + "epoch": 10.08, + "learning_rate": 7.294555967205309e-05, + "loss": 2.2688, + "step": 74620 + }, + { + "epoch": 10.08, + "learning_rate": 7.290915681336618e-05, + "loss": 2.2914, + "step": 74630 + }, + { + "epoch": 10.08, + "learning_rate": 7.287276012373196e-05, + "loss": 2.2729, + "step": 74640 + }, + { + "epoch": 10.08, + "learning_rate": 7.283636960606314e-05, + "loss": 2.2764, + "step": 74650 + }, + { + "epoch": 10.08, + "learning_rate": 7.279998526327174e-05, + "loss": 2.2877, + "step": 74660 + }, + { + "epoch": 10.08, + "learning_rate": 7.27636070982693e-05, + "loss": 2.2891, + "step": 74670 + }, + { + "epoch": 10.09, + "learning_rate": 7.272723511396702e-05, + "loss": 2.2695, + "step": 74680 + }, + { + "epoch": 10.09, + "learning_rate": 7.269086931327543e-05, + "loss": 2.2822, + "step": 74690 + }, + { + "epoch": 10.09, + "learning_rate": 7.265450969910465e-05, + "loss": 2.2802, + "step": 74700 + }, + { + "epoch": 10.09, + "learning_rate": 7.261815627436425e-05, + "loss": 2.3001, + "step": 74710 + }, + { + "epoch": 10.09, + "learning_rate": 7.258180904196338e-05, + "loss": 2.2765, + "step": 74720 + }, + { + "epoch": 10.09, + "learning_rate": 7.254546800481077e-05, + "loss": 2.2653, + "step": 74730 + }, + { + "epoch": 10.09, + "learning_rate": 7.250913316581436e-05, + "loss": 2.2761, + "step": 74740 + }, + { + "epoch": 10.1, + "learning_rate": 7.247280452788189e-05, + "loss": 2.2761, + "step": 74750 + }, + { + "epoch": 10.1, + "learning_rate": 7.243648209392042e-05, + "loss": 2.2513, + "step": 74760 + }, + { + "epoch": 10.1, + "learning_rate": 7.240016586683668e-05, + "loss": 2.2945, + "step": 74770 + }, + { + "epoch": 10.1, + "learning_rate": 7.236385584953676e-05, + "loss": 2.2818, + "step": 74780 + }, + { + "epoch": 10.1, + "learning_rate": 7.232755204492624e-05, + "loss": 2.2997, + "step": 74790 + }, + { + "epoch": 10.1, + "learning_rate": 7.229125445591039e-05, + "loss": 2.2752, + "step": 74800 + }, + { + "epoch": 10.1, + "learning_rate": 7.225496308539379e-05, + "loss": 2.2933, + "step": 74810 + }, + { + "epoch": 10.11, + "learning_rate": 7.221867793628054e-05, + "loss": 2.2772, + "step": 74820 + }, + { + "epoch": 10.11, + "learning_rate": 7.21823990114744e-05, + "loss": 2.2908, + "step": 74830 + }, + { + "epoch": 10.11, + "learning_rate": 7.214612631387842e-05, + "loss": 2.3003, + "step": 74840 + }, + { + "epoch": 10.11, + "learning_rate": 7.210985984639536e-05, + "loss": 2.2767, + "step": 74850 + }, + { + "epoch": 10.11, + "learning_rate": 7.207359961192732e-05, + "loss": 2.2683, + "step": 74860 + }, + { + "epoch": 10.11, + "learning_rate": 7.20373456133759e-05, + "loss": 2.2635, + "step": 74870 + }, + { + "epoch": 10.11, + "learning_rate": 7.200109785364239e-05, + "loss": 2.2833, + "step": 74880 + }, + { + "epoch": 10.11, + "learning_rate": 7.196485633562739e-05, + "loss": 2.2758, + "step": 74890 + }, + { + "epoch": 10.12, + "learning_rate": 7.192862106223103e-05, + "loss": 2.2966, + "step": 74900 + }, + { + "epoch": 10.12, + "learning_rate": 7.189239203635295e-05, + "loss": 2.2873, + "step": 74910 + }, + { + "epoch": 10.12, + "learning_rate": 7.185616926089235e-05, + "loss": 2.2645, + "step": 74920 + }, + { + "epoch": 10.12, + "learning_rate": 7.1819952738748e-05, + "loss": 2.2807, + "step": 74930 + }, + { + "epoch": 10.12, + "learning_rate": 7.178374247281786e-05, + "loss": 2.2707, + "step": 74940 + }, + { + "epoch": 10.12, + "learning_rate": 7.174753846599974e-05, + "loss": 2.284, + "step": 74950 + }, + { + "epoch": 10.12, + "learning_rate": 7.171134072119067e-05, + "loss": 2.2731, + "step": 74960 + }, + { + "epoch": 10.13, + "learning_rate": 7.167514924128745e-05, + "loss": 2.2863, + "step": 74970 + }, + { + "epoch": 10.13, + "learning_rate": 7.163896402918619e-05, + "loss": 2.2899, + "step": 74980 + }, + { + "epoch": 10.13, + "learning_rate": 7.160278508778243e-05, + "loss": 2.2872, + "step": 74990 + }, + { + "epoch": 10.13, + "learning_rate": 7.156661241997149e-05, + "loss": 2.3089, + "step": 75000 + }, + { + "epoch": 10.13, + "eval_loss": 2.4407835006713867, + "eval_runtime": 1269.632, + "eval_samples_per_second": 59.966, + "eval_steps_per_second": 4.998, + "step": 75000 + }, + { + "epoch": 10.13, + "learning_rate": 7.153044602864796e-05, + "loss": 2.272, + "step": 75010 + }, + { + "epoch": 10.13, + "learning_rate": 7.14942859167059e-05, + "loss": 2.2891, + "step": 75020 + }, + { + "epoch": 10.13, + "learning_rate": 7.145813208703911e-05, + "loss": 2.2711, + "step": 75030 + }, + { + "epoch": 10.13, + "learning_rate": 7.142198454254059e-05, + "loss": 2.2848, + "step": 75040 + }, + { + "epoch": 10.14, + "learning_rate": 7.13858432861031e-05, + "loss": 2.2882, + "step": 75050 + }, + { + "epoch": 10.14, + "learning_rate": 7.134970832061874e-05, + "loss": 2.2699, + "step": 75060 + }, + { + "epoch": 10.14, + "learning_rate": 7.131357964897906e-05, + "loss": 2.2703, + "step": 75070 + }, + { + "epoch": 10.14, + "learning_rate": 7.127745727407532e-05, + "loss": 2.2887, + "step": 75080 + }, + { + "epoch": 10.14, + "learning_rate": 7.124134119879807e-05, + "loss": 2.2635, + "step": 75090 + }, + { + "epoch": 10.14, + "learning_rate": 7.120523142603748e-05, + "loss": 2.2539, + "step": 75100 + }, + { + "epoch": 10.14, + "learning_rate": 7.116912795868307e-05, + "loss": 2.309, + "step": 75110 + }, + { + "epoch": 10.15, + "learning_rate": 7.113303079962401e-05, + "loss": 2.2625, + "step": 75120 + }, + { + "epoch": 10.15, + "learning_rate": 7.109693995174904e-05, + "loss": 2.276, + "step": 75130 + }, + { + "epoch": 10.15, + "learning_rate": 7.106085541794602e-05, + "loss": 2.2887, + "step": 75140 + }, + { + "epoch": 10.15, + "learning_rate": 7.102477720110275e-05, + "loss": 2.3016, + "step": 75150 + }, + { + "epoch": 10.15, + "learning_rate": 7.098870530410618e-05, + "loss": 2.2968, + "step": 75160 + }, + { + "epoch": 10.15, + "learning_rate": 7.0952639729843e-05, + "loss": 2.298, + "step": 75170 + }, + { + "epoch": 10.15, + "learning_rate": 7.091658048119926e-05, + "loss": 2.2978, + "step": 75180 + }, + { + "epoch": 10.16, + "learning_rate": 7.088052756106047e-05, + "loss": 2.3082, + "step": 75190 + }, + { + "epoch": 10.16, + "learning_rate": 7.084448097231182e-05, + "loss": 2.2648, + "step": 75200 + }, + { + "epoch": 10.16, + "learning_rate": 7.08084407178378e-05, + "loss": 2.3002, + "step": 75210 + }, + { + "epoch": 10.16, + "learning_rate": 7.077240680052242e-05, + "loss": 2.2578, + "step": 75220 + }, + { + "epoch": 10.16, + "learning_rate": 7.073637922324934e-05, + "loss": 2.2742, + "step": 75230 + }, + { + "epoch": 10.16, + "learning_rate": 7.070035798890147e-05, + "loss": 2.2786, + "step": 75240 + }, + { + "epoch": 10.16, + "learning_rate": 7.066434310036149e-05, + "loss": 2.2527, + "step": 75250 + }, + { + "epoch": 10.16, + "learning_rate": 7.062833456051134e-05, + "loss": 2.2931, + "step": 75260 + }, + { + "epoch": 10.17, + "learning_rate": 7.059233237223248e-05, + "loss": 2.2707, + "step": 75270 + }, + { + "epoch": 10.17, + "learning_rate": 7.055633653840605e-05, + "loss": 2.305, + "step": 75280 + }, + { + "epoch": 10.17, + "learning_rate": 7.05203470619125e-05, + "loss": 2.267, + "step": 75290 + }, + { + "epoch": 10.17, + "learning_rate": 7.048436394563178e-05, + "loss": 2.3066, + "step": 75300 + }, + { + "epoch": 10.17, + "learning_rate": 7.044838719244335e-05, + "loss": 2.2763, + "step": 75310 + }, + { + "epoch": 10.17, + "learning_rate": 7.041241680522624e-05, + "loss": 2.3101, + "step": 75320 + }, + { + "epoch": 10.17, + "learning_rate": 7.037645278685901e-05, + "loss": 2.2961, + "step": 75330 + }, + { + "epoch": 10.18, + "learning_rate": 7.034049514021941e-05, + "loss": 2.3077, + "step": 75340 + }, + { + "epoch": 10.18, + "learning_rate": 7.030454386818505e-05, + "loss": 2.2848, + "step": 75350 + }, + { + "epoch": 10.18, + "learning_rate": 7.026859897363272e-05, + "loss": 2.286, + "step": 75360 + }, + { + "epoch": 10.18, + "learning_rate": 7.0232660459439e-05, + "loss": 2.301, + "step": 75370 + }, + { + "epoch": 10.18, + "learning_rate": 7.019672832847972e-05, + "loss": 2.3134, + "step": 75380 + }, + { + "epoch": 10.18, + "learning_rate": 7.016080258363023e-05, + "loss": 2.2789, + "step": 75390 + }, + { + "epoch": 10.18, + "learning_rate": 7.012488322776555e-05, + "loss": 2.2761, + "step": 75400 + }, + { + "epoch": 10.18, + "learning_rate": 7.008897026375997e-05, + "loss": 2.2731, + "step": 75410 + }, + { + "epoch": 10.19, + "learning_rate": 7.005306369448735e-05, + "loss": 2.2786, + "step": 75420 + }, + { + "epoch": 10.19, + "learning_rate": 7.00171635228211e-05, + "loss": 2.2785, + "step": 75430 + }, + { + "epoch": 10.19, + "learning_rate": 6.998126975163401e-05, + "loss": 2.3212, + "step": 75440 + }, + { + "epoch": 10.19, + "learning_rate": 6.99453823837985e-05, + "loss": 2.2888, + "step": 75450 + }, + { + "epoch": 10.19, + "learning_rate": 6.990950142218635e-05, + "loss": 2.2683, + "step": 75460 + }, + { + "epoch": 10.19, + "learning_rate": 6.987362686966879e-05, + "loss": 2.2721, + "step": 75470 + }, + { + "epoch": 10.19, + "learning_rate": 6.983775872911673e-05, + "loss": 2.302, + "step": 75480 + }, + { + "epoch": 10.2, + "learning_rate": 6.98018970034004e-05, + "loss": 2.3044, + "step": 75490 + }, + { + "epoch": 10.2, + "learning_rate": 6.976604169538955e-05, + "loss": 2.2885, + "step": 75500 + }, + { + "epoch": 10.2, + "learning_rate": 6.973019280795343e-05, + "loss": 2.2901, + "step": 75510 + }, + { + "epoch": 10.2, + "learning_rate": 6.969435034396079e-05, + "loss": 2.2962, + "step": 75520 + }, + { + "epoch": 10.2, + "learning_rate": 6.965851430628e-05, + "loss": 2.2963, + "step": 75530 + }, + { + "epoch": 10.2, + "learning_rate": 6.962268469777851e-05, + "loss": 2.2809, + "step": 75540 + }, + { + "epoch": 10.2, + "learning_rate": 6.958686152132373e-05, + "loss": 2.2868, + "step": 75550 + }, + { + "epoch": 10.21, + "learning_rate": 6.95510447797822e-05, + "loss": 2.2925, + "step": 75560 + }, + { + "epoch": 10.21, + "learning_rate": 6.951523447602021e-05, + "loss": 2.2987, + "step": 75570 + }, + { + "epoch": 10.21, + "learning_rate": 6.947943061290338e-05, + "loss": 2.2927, + "step": 75580 + }, + { + "epoch": 10.21, + "learning_rate": 6.944363319329675e-05, + "loss": 2.257, + "step": 75590 + }, + { + "epoch": 10.21, + "learning_rate": 6.94078422200651e-05, + "loss": 2.276, + "step": 75600 + }, + { + "epoch": 10.21, + "learning_rate": 6.937205769607245e-05, + "loss": 2.2825, + "step": 75610 + }, + { + "epoch": 10.21, + "learning_rate": 6.933627962418233e-05, + "loss": 2.2841, + "step": 75620 + }, + { + "epoch": 10.21, + "learning_rate": 6.930050800725795e-05, + "loss": 2.2816, + "step": 75630 + }, + { + "epoch": 10.22, + "learning_rate": 6.926474284816175e-05, + "loss": 2.2938, + "step": 75640 + }, + { + "epoch": 10.22, + "learning_rate": 6.922898414975588e-05, + "loss": 2.2804, + "step": 75650 + }, + { + "epoch": 10.22, + "learning_rate": 6.919323191490179e-05, + "loss": 2.2823, + "step": 75660 + }, + { + "epoch": 10.22, + "learning_rate": 6.915748614646045e-05, + "loss": 2.2804, + "step": 75670 + }, + { + "epoch": 10.22, + "learning_rate": 6.912174684729246e-05, + "loss": 2.3074, + "step": 75680 + }, + { + "epoch": 10.22, + "learning_rate": 6.908601402025774e-05, + "loss": 2.2661, + "step": 75690 + }, + { + "epoch": 10.22, + "learning_rate": 6.905028766821573e-05, + "loss": 2.2593, + "step": 75700 + }, + { + "epoch": 10.23, + "learning_rate": 6.901456779402531e-05, + "loss": 2.2593, + "step": 75710 + }, + { + "epoch": 10.23, + "learning_rate": 6.897885440054497e-05, + "loss": 2.2788, + "step": 75720 + }, + { + "epoch": 10.23, + "learning_rate": 6.894314749063271e-05, + "loss": 2.2988, + "step": 75730 + }, + { + "epoch": 10.23, + "learning_rate": 6.890744706714568e-05, + "loss": 2.2889, + "step": 75740 + }, + { + "epoch": 10.23, + "learning_rate": 6.887175313294092e-05, + "loss": 2.2764, + "step": 75750 + }, + { + "epoch": 10.23, + "learning_rate": 6.883606569087466e-05, + "loss": 2.2834, + "step": 75760 + }, + { + "epoch": 10.23, + "learning_rate": 6.880038474380281e-05, + "loss": 2.2765, + "step": 75770 + }, + { + "epoch": 10.23, + "learning_rate": 6.876471029458065e-05, + "loss": 2.3067, + "step": 75780 + }, + { + "epoch": 10.24, + "learning_rate": 6.872904234606287e-05, + "loss": 2.2822, + "step": 75790 + }, + { + "epoch": 10.24, + "learning_rate": 6.869338090110389e-05, + "loss": 2.2782, + "step": 75800 + }, + { + "epoch": 10.24, + "learning_rate": 6.865772596255734e-05, + "loss": 2.2905, + "step": 75810 + }, + { + "epoch": 10.24, + "learning_rate": 6.862207753327644e-05, + "loss": 2.2804, + "step": 75820 + }, + { + "epoch": 10.24, + "learning_rate": 6.858643561611395e-05, + "loss": 2.271, + "step": 75830 + }, + { + "epoch": 10.24, + "learning_rate": 6.8550800213922e-05, + "loss": 2.2572, + "step": 75840 + }, + { + "epoch": 10.24, + "learning_rate": 6.851517132955229e-05, + "loss": 2.2733, + "step": 75850 + }, + { + "epoch": 10.25, + "learning_rate": 6.847954896585595e-05, + "loss": 2.3055, + "step": 75860 + }, + { + "epoch": 10.25, + "learning_rate": 6.844393312568352e-05, + "loss": 2.2974, + "step": 75870 + }, + { + "epoch": 10.25, + "learning_rate": 6.840832381188518e-05, + "loss": 2.2699, + "step": 75880 + }, + { + "epoch": 10.25, + "learning_rate": 6.837272102731049e-05, + "loss": 2.2937, + "step": 75890 + }, + { + "epoch": 10.25, + "learning_rate": 6.833712477480848e-05, + "loss": 2.2778, + "step": 75900 + }, + { + "epoch": 10.25, + "learning_rate": 6.830153505722759e-05, + "loss": 2.2821, + "step": 75910 + }, + { + "epoch": 10.25, + "learning_rate": 6.826595187741593e-05, + "loss": 2.3049, + "step": 75920 + }, + { + "epoch": 10.26, + "learning_rate": 6.823037523822103e-05, + "loss": 2.3026, + "step": 75930 + }, + { + "epoch": 10.26, + "learning_rate": 6.819480514248968e-05, + "loss": 2.2822, + "step": 75940 + }, + { + "epoch": 10.26, + "learning_rate": 6.815924159306844e-05, + "loss": 2.3048, + "step": 75950 + }, + { + "epoch": 10.26, + "learning_rate": 6.812368459280311e-05, + "loss": 2.2935, + "step": 75960 + }, + { + "epoch": 10.26, + "learning_rate": 6.808813414453922e-05, + "loss": 2.319, + "step": 75970 + }, + { + "epoch": 10.26, + "learning_rate": 6.805259025112152e-05, + "loss": 2.2787, + "step": 75980 + }, + { + "epoch": 10.26, + "learning_rate": 6.801705291539433e-05, + "loss": 2.2846, + "step": 75990 + }, + { + "epoch": 10.26, + "learning_rate": 6.798152214020154e-05, + "loss": 2.279, + "step": 76000 + }, + { + "epoch": 10.26, + "eval_loss": 2.4384381771087646, + "eval_runtime": 1269.5674, + "eval_samples_per_second": 59.969, + "eval_steps_per_second": 4.998, + "step": 76000 + }, + { + "epoch": 10.27, + "learning_rate": 6.794599792838641e-05, + "loss": 2.2895, + "step": 76010 + }, + { + "epoch": 10.27, + "learning_rate": 6.791048028279162e-05, + "loss": 2.2829, + "step": 76020 + }, + { + "epoch": 10.27, + "learning_rate": 6.787496920625954e-05, + "loss": 2.3112, + "step": 76030 + }, + { + "epoch": 10.27, + "learning_rate": 6.783946470163173e-05, + "loss": 2.2966, + "step": 76040 + }, + { + "epoch": 10.27, + "learning_rate": 6.78039667717496e-05, + "loss": 2.2717, + "step": 76050 + }, + { + "epoch": 10.27, + "learning_rate": 6.776847541945351e-05, + "loss": 2.2848, + "step": 76060 + }, + { + "epoch": 10.27, + "learning_rate": 6.773299064758374e-05, + "loss": 2.2937, + "step": 76070 + }, + { + "epoch": 10.28, + "learning_rate": 6.769751245897997e-05, + "loss": 2.289, + "step": 76080 + }, + { + "epoch": 10.28, + "learning_rate": 6.766204085648118e-05, + "loss": 2.2535, + "step": 76090 + }, + { + "epoch": 10.28, + "learning_rate": 6.762657584292593e-05, + "loss": 2.281, + "step": 76100 + }, + { + "epoch": 10.28, + "learning_rate": 6.75911174211522e-05, + "loss": 2.2784, + "step": 76110 + }, + { + "epoch": 10.28, + "learning_rate": 6.755566559399755e-05, + "loss": 2.2903, + "step": 76120 + }, + { + "epoch": 10.28, + "learning_rate": 6.752022036429902e-05, + "loss": 2.2918, + "step": 76130 + }, + { + "epoch": 10.28, + "learning_rate": 6.748478173489286e-05, + "loss": 2.2799, + "step": 76140 + }, + { + "epoch": 10.28, + "learning_rate": 6.744934970861513e-05, + "loss": 2.2952, + "step": 76150 + }, + { + "epoch": 10.29, + "learning_rate": 6.741392428830113e-05, + "loss": 2.2851, + "step": 76160 + }, + { + "epoch": 10.29, + "learning_rate": 6.737850547678579e-05, + "loss": 2.309, + "step": 76170 + }, + { + "epoch": 10.29, + "learning_rate": 6.73430932769034e-05, + "loss": 2.2763, + "step": 76180 + }, + { + "epoch": 10.29, + "learning_rate": 6.730768769148768e-05, + "loss": 2.2792, + "step": 76190 + }, + { + "epoch": 10.29, + "learning_rate": 6.727228872337202e-05, + "loss": 2.3108, + "step": 76200 + }, + { + "epoch": 10.29, + "learning_rate": 6.723689637538912e-05, + "loss": 2.2822, + "step": 76210 + }, + { + "epoch": 10.29, + "learning_rate": 6.720151065037112e-05, + "loss": 2.2966, + "step": 76220 + }, + { + "epoch": 10.3, + "learning_rate": 6.716613155114979e-05, + "loss": 2.3092, + "step": 76230 + }, + { + "epoch": 10.3, + "learning_rate": 6.713075908055617e-05, + "loss": 2.2931, + "step": 76240 + }, + { + "epoch": 10.3, + "learning_rate": 6.709539324142107e-05, + "loss": 2.2901, + "step": 76250 + }, + { + "epoch": 10.3, + "learning_rate": 6.706003403657434e-05, + "loss": 2.2975, + "step": 76260 + }, + { + "epoch": 10.3, + "learning_rate": 6.702468146884564e-05, + "loss": 2.3185, + "step": 76270 + }, + { + "epoch": 10.3, + "learning_rate": 6.698933554106406e-05, + "loss": 2.2842, + "step": 76280 + }, + { + "epoch": 10.3, + "learning_rate": 6.695399625605802e-05, + "loss": 2.2913, + "step": 76290 + }, + { + "epoch": 10.31, + "learning_rate": 6.691866361665548e-05, + "loss": 2.2785, + "step": 76300 + }, + { + "epoch": 10.31, + "learning_rate": 6.688333762568386e-05, + "loss": 2.2681, + "step": 76310 + }, + { + "epoch": 10.31, + "learning_rate": 6.684801828597005e-05, + "loss": 2.2691, + "step": 76320 + }, + { + "epoch": 10.31, + "learning_rate": 6.681270560034056e-05, + "loss": 2.2916, + "step": 76330 + }, + { + "epoch": 10.31, + "learning_rate": 6.677739957162102e-05, + "loss": 2.2904, + "step": 76340 + }, + { + "epoch": 10.31, + "learning_rate": 6.674210020263684e-05, + "loss": 2.2826, + "step": 76350 + }, + { + "epoch": 10.31, + "learning_rate": 6.670680749621273e-05, + "loss": 2.2954, + "step": 76360 + }, + { + "epoch": 10.31, + "learning_rate": 6.667152145517301e-05, + "loss": 2.2645, + "step": 76370 + }, + { + "epoch": 10.32, + "learning_rate": 6.663624208234132e-05, + "loss": 2.3007, + "step": 76380 + }, + { + "epoch": 10.32, + "learning_rate": 6.660096938054077e-05, + "loss": 2.3112, + "step": 76390 + }, + { + "epoch": 10.32, + "learning_rate": 6.656570335259414e-05, + "loss": 2.2731, + "step": 76400 + }, + { + "epoch": 10.32, + "learning_rate": 6.653044400132343e-05, + "loss": 2.2733, + "step": 76410 + }, + { + "epoch": 10.32, + "learning_rate": 6.649519132955016e-05, + "loss": 2.2939, + "step": 76420 + }, + { + "epoch": 10.32, + "learning_rate": 6.64599453400955e-05, + "loss": 2.2834, + "step": 76430 + }, + { + "epoch": 10.32, + "learning_rate": 6.64247060357798e-05, + "loss": 2.3035, + "step": 76440 + }, + { + "epoch": 10.33, + "learning_rate": 6.638947341942319e-05, + "loss": 2.2923, + "step": 76450 + }, + { + "epoch": 10.33, + "learning_rate": 6.63542474938449e-05, + "loss": 2.2919, + "step": 76460 + }, + { + "epoch": 10.33, + "learning_rate": 6.631902826186389e-05, + "loss": 2.2661, + "step": 76470 + }, + { + "epoch": 10.33, + "learning_rate": 6.62838157262986e-05, + "loss": 2.3292, + "step": 76480 + }, + { + "epoch": 10.33, + "learning_rate": 6.62486098899668e-05, + "loss": 2.2783, + "step": 76490 + }, + { + "epoch": 10.33, + "learning_rate": 6.621341075568573e-05, + "loss": 2.2618, + "step": 76500 + }, + { + "epoch": 10.33, + "learning_rate": 6.617821832627213e-05, + "loss": 2.2797, + "step": 76510 + }, + { + "epoch": 10.33, + "learning_rate": 6.614303260454223e-05, + "loss": 2.3157, + "step": 76520 + }, + { + "epoch": 10.34, + "learning_rate": 6.610785359331182e-05, + "loss": 2.275, + "step": 76530 + }, + { + "epoch": 10.34, + "learning_rate": 6.607268129539585e-05, + "loss": 2.3163, + "step": 76540 + }, + { + "epoch": 10.34, + "learning_rate": 6.603751571360903e-05, + "loss": 2.2885, + "step": 76550 + }, + { + "epoch": 10.34, + "learning_rate": 6.600235685076534e-05, + "loss": 2.2623, + "step": 76560 + }, + { + "epoch": 10.34, + "learning_rate": 6.59672047096784e-05, + "loss": 2.3004, + "step": 76570 + }, + { + "epoch": 10.34, + "learning_rate": 6.593205929316115e-05, + "loss": 2.2942, + "step": 76580 + }, + { + "epoch": 10.34, + "learning_rate": 6.589692060402599e-05, + "loss": 2.2927, + "step": 76590 + }, + { + "epoch": 10.35, + "learning_rate": 6.586178864508493e-05, + "loss": 2.307, + "step": 76600 + }, + { + "epoch": 10.35, + "learning_rate": 6.582666341914928e-05, + "loss": 2.3065, + "step": 76610 + }, + { + "epoch": 10.35, + "learning_rate": 6.579154492902982e-05, + "loss": 2.3072, + "step": 76620 + }, + { + "epoch": 10.35, + "learning_rate": 6.575643317753696e-05, + "loss": 2.2873, + "step": 76630 + }, + { + "epoch": 10.35, + "learning_rate": 6.572132816748034e-05, + "loss": 2.2848, + "step": 76640 + }, + { + "epoch": 10.35, + "learning_rate": 6.568622990166931e-05, + "loss": 2.287, + "step": 76650 + }, + { + "epoch": 10.35, + "learning_rate": 6.565113838291237e-05, + "loss": 2.2848, + "step": 76660 + }, + { + "epoch": 10.36, + "learning_rate": 6.561605361401774e-05, + "loss": 2.3058, + "step": 76670 + }, + { + "epoch": 10.36, + "learning_rate": 6.558097559779308e-05, + "loss": 2.3029, + "step": 76680 + }, + { + "epoch": 10.36, + "learning_rate": 6.554590433704539e-05, + "loss": 2.2928, + "step": 76690 + }, + { + "epoch": 10.36, + "learning_rate": 6.551083983458115e-05, + "loss": 2.2914, + "step": 76700 + }, + { + "epoch": 10.36, + "learning_rate": 6.547578209320632e-05, + "loss": 2.285, + "step": 76710 + }, + { + "epoch": 10.36, + "learning_rate": 6.544073111572643e-05, + "loss": 2.2699, + "step": 76720 + }, + { + "epoch": 10.36, + "learning_rate": 6.54056869049463e-05, + "loss": 2.2716, + "step": 76730 + }, + { + "epoch": 10.36, + "learning_rate": 6.537064946367021e-05, + "loss": 2.3183, + "step": 76740 + }, + { + "epoch": 10.37, + "learning_rate": 6.533561879470212e-05, + "loss": 2.2722, + "step": 76750 + }, + { + "epoch": 10.37, + "learning_rate": 6.530059490084517e-05, + "loss": 2.2961, + "step": 76760 + }, + { + "epoch": 10.37, + "learning_rate": 6.526557778490218e-05, + "loss": 2.2785, + "step": 76770 + }, + { + "epoch": 10.37, + "learning_rate": 6.523056744967527e-05, + "loss": 2.3033, + "step": 76780 + }, + { + "epoch": 10.37, + "learning_rate": 6.519556389796604e-05, + "loss": 2.3019, + "step": 76790 + }, + { + "epoch": 10.37, + "learning_rate": 6.51605671325757e-05, + "loss": 2.2826, + "step": 76800 + }, + { + "epoch": 10.37, + "learning_rate": 6.512557715630472e-05, + "loss": 2.2742, + "step": 76810 + }, + { + "epoch": 10.38, + "learning_rate": 6.509059397195306e-05, + "loss": 2.3159, + "step": 76820 + }, + { + "epoch": 10.38, + "learning_rate": 6.505561758232032e-05, + "loss": 2.2892, + "step": 76830 + }, + { + "epoch": 10.38, + "learning_rate": 6.502064799020529e-05, + "loss": 2.2933, + "step": 76840 + }, + { + "epoch": 10.38, + "learning_rate": 6.498568519840653e-05, + "loss": 2.2943, + "step": 76850 + }, + { + "epoch": 10.38, + "learning_rate": 6.495072920972162e-05, + "loss": 2.3162, + "step": 76860 + }, + { + "epoch": 10.38, + "learning_rate": 6.491578002694799e-05, + "loss": 2.3133, + "step": 76870 + }, + { + "epoch": 10.38, + "learning_rate": 6.488083765288242e-05, + "loss": 2.2815, + "step": 76880 + }, + { + "epoch": 10.38, + "learning_rate": 6.484590209032106e-05, + "loss": 2.2605, + "step": 76890 + }, + { + "epoch": 10.39, + "learning_rate": 6.481097334205956e-05, + "loss": 2.3016, + "step": 76900 + }, + { + "epoch": 10.39, + "learning_rate": 6.4776051410893e-05, + "loss": 2.2971, + "step": 76910 + }, + { + "epoch": 10.39, + "learning_rate": 6.474113629961603e-05, + "loss": 2.2624, + "step": 76920 + }, + { + "epoch": 10.39, + "learning_rate": 6.470622801102259e-05, + "loss": 2.2972, + "step": 76930 + }, + { + "epoch": 10.39, + "learning_rate": 6.467132654790615e-05, + "loss": 2.2702, + "step": 76940 + }, + { + "epoch": 10.39, + "learning_rate": 6.463643191305972e-05, + "loss": 2.2898, + "step": 76950 + }, + { + "epoch": 10.39, + "learning_rate": 6.460154410927554e-05, + "loss": 2.298, + "step": 76960 + }, + { + "epoch": 10.4, + "learning_rate": 6.456666313934559e-05, + "loss": 2.2669, + "step": 76970 + }, + { + "epoch": 10.4, + "learning_rate": 6.45317890060611e-05, + "loss": 2.2724, + "step": 76980 + }, + { + "epoch": 10.4, + "learning_rate": 6.449692171221271e-05, + "loss": 2.3033, + "step": 76990 + }, + { + "epoch": 10.4, + "learning_rate": 6.446206126059076e-05, + "loss": 2.2874, + "step": 77000 + }, + { + "epoch": 10.4, + "eval_loss": 2.435051679611206, + "eval_runtime": 1269.1365, + "eval_samples_per_second": 59.99, + "eval_steps_per_second": 4.999, + "step": 77000 + }, + { + "epoch": 10.4, + "learning_rate": 6.442720765398487e-05, + "loss": 2.3107, + "step": 77010 + }, + { + "epoch": 10.4, + "learning_rate": 6.439236089518399e-05, + "loss": 2.282, + "step": 77020 + }, + { + "epoch": 10.4, + "learning_rate": 6.435752098697687e-05, + "loss": 2.2794, + "step": 77030 + }, + { + "epoch": 10.41, + "learning_rate": 6.432268793215135e-05, + "loss": 2.2946, + "step": 77040 + }, + { + "epoch": 10.41, + "learning_rate": 6.428786173349504e-05, + "loss": 2.2772, + "step": 77050 + }, + { + "epoch": 10.41, + "learning_rate": 6.425304239379463e-05, + "loss": 2.2728, + "step": 77060 + }, + { + "epoch": 10.41, + "learning_rate": 6.421822991583663e-05, + "loss": 2.2928, + "step": 77070 + }, + { + "epoch": 10.41, + "learning_rate": 6.418342430240683e-05, + "loss": 2.2939, + "step": 77080 + }, + { + "epoch": 10.41, + "learning_rate": 6.414862555629048e-05, + "loss": 2.2903, + "step": 77090 + }, + { + "epoch": 10.41, + "learning_rate": 6.411383368027225e-05, + "loss": 2.2955, + "step": 77100 + }, + { + "epoch": 10.41, + "learning_rate": 6.407904867713626e-05, + "loss": 2.2993, + "step": 77110 + }, + { + "epoch": 10.42, + "learning_rate": 6.404427054966622e-05, + "loss": 2.271, + "step": 77120 + }, + { + "epoch": 10.42, + "learning_rate": 6.400949930064514e-05, + "loss": 2.2961, + "step": 77130 + }, + { + "epoch": 10.42, + "learning_rate": 6.397473493285545e-05, + "loss": 2.3345, + "step": 77140 + }, + { + "epoch": 10.42, + "learning_rate": 6.393997744907925e-05, + "loss": 2.2854, + "step": 77150 + }, + { + "epoch": 10.42, + "learning_rate": 6.390522685209781e-05, + "loss": 2.2907, + "step": 77160 + }, + { + "epoch": 10.42, + "learning_rate": 6.387048314469207e-05, + "loss": 2.2951, + "step": 77170 + }, + { + "epoch": 10.42, + "learning_rate": 6.383574632964231e-05, + "loss": 2.2926, + "step": 77180 + }, + { + "epoch": 10.43, + "learning_rate": 6.380101640972823e-05, + "loss": 2.2937, + "step": 77190 + }, + { + "epoch": 10.43, + "learning_rate": 6.376629338772912e-05, + "loss": 2.2831, + "step": 77200 + }, + { + "epoch": 10.43, + "learning_rate": 6.373157726642355e-05, + "loss": 2.2804, + "step": 77210 + }, + { + "epoch": 10.43, + "learning_rate": 6.36968680485896e-05, + "loss": 2.2992, + "step": 77220 + }, + { + "epoch": 10.43, + "learning_rate": 6.36621657370049e-05, + "loss": 2.2946, + "step": 77230 + }, + { + "epoch": 10.43, + "learning_rate": 6.362747033444635e-05, + "loss": 2.2904, + "step": 77240 + }, + { + "epoch": 10.43, + "learning_rate": 6.359278184369052e-05, + "loss": 2.3105, + "step": 77250 + }, + { + "epoch": 10.43, + "learning_rate": 6.355810026751309e-05, + "loss": 2.2957, + "step": 77260 + }, + { + "epoch": 10.44, + "learning_rate": 6.352342560868947e-05, + "loss": 2.2827, + "step": 77270 + }, + { + "epoch": 10.44, + "learning_rate": 6.348875786999454e-05, + "loss": 2.2912, + "step": 77280 + }, + { + "epoch": 10.44, + "learning_rate": 6.345409705420244e-05, + "loss": 2.2772, + "step": 77290 + }, + { + "epoch": 10.44, + "learning_rate": 6.341944316408685e-05, + "loss": 2.3104, + "step": 77300 + }, + { + "epoch": 10.44, + "learning_rate": 6.33847962024208e-05, + "loss": 2.2841, + "step": 77310 + }, + { + "epoch": 10.44, + "learning_rate": 6.335015617197698e-05, + "loss": 2.312, + "step": 77320 + }, + { + "epoch": 10.44, + "learning_rate": 6.331552307552734e-05, + "loss": 2.3048, + "step": 77330 + }, + { + "epoch": 10.45, + "learning_rate": 6.328089691584328e-05, + "loss": 2.3141, + "step": 77340 + }, + { + "epoch": 10.45, + "learning_rate": 6.324627769569578e-05, + "loss": 2.3042, + "step": 77350 + }, + { + "epoch": 10.45, + "learning_rate": 6.321166541785512e-05, + "loss": 2.2926, + "step": 77360 + }, + { + "epoch": 10.45, + "learning_rate": 6.317706008509113e-05, + "loss": 2.2865, + "step": 77370 + }, + { + "epoch": 10.45, + "learning_rate": 6.314246170017302e-05, + "loss": 2.2755, + "step": 77380 + }, + { + "epoch": 10.45, + "learning_rate": 6.310787026586942e-05, + "loss": 2.2796, + "step": 77390 + }, + { + "epoch": 10.45, + "learning_rate": 6.307328578494852e-05, + "loss": 2.2872, + "step": 77400 + }, + { + "epoch": 10.45, + "learning_rate": 6.303870826017784e-05, + "loss": 2.2898, + "step": 77410 + }, + { + "epoch": 10.46, + "learning_rate": 6.300413769432433e-05, + "loss": 2.2934, + "step": 77420 + }, + { + "epoch": 10.46, + "learning_rate": 6.296957409015455e-05, + "loss": 2.2928, + "step": 77430 + }, + { + "epoch": 10.46, + "learning_rate": 6.293501745043427e-05, + "loss": 2.2736, + "step": 77440 + }, + { + "epoch": 10.46, + "learning_rate": 6.290046777792899e-05, + "loss": 2.2939, + "step": 77450 + }, + { + "epoch": 10.46, + "learning_rate": 6.286592507540326e-05, + "loss": 2.2945, + "step": 77460 + }, + { + "epoch": 10.46, + "learning_rate": 6.283138934562141e-05, + "loss": 2.2923, + "step": 77470 + }, + { + "epoch": 10.46, + "learning_rate": 6.279686059134717e-05, + "loss": 2.3005, + "step": 77480 + }, + { + "epoch": 10.47, + "learning_rate": 6.276233881534356e-05, + "loss": 2.2809, + "step": 77490 + }, + { + "epoch": 10.47, + "learning_rate": 6.272782402037312e-05, + "loss": 2.2752, + "step": 77500 + }, + { + "epoch": 10.47, + "learning_rate": 6.269331620919779e-05, + "loss": 2.2721, + "step": 77510 + }, + { + "epoch": 10.47, + "learning_rate": 6.265881538457909e-05, + "loss": 2.2643, + "step": 77520 + }, + { + "epoch": 10.47, + "learning_rate": 6.262432154927786e-05, + "loss": 2.2974, + "step": 77530 + }, + { + "epoch": 10.47, + "learning_rate": 6.258983470605434e-05, + "loss": 2.2949, + "step": 77540 + }, + { + "epoch": 10.47, + "learning_rate": 6.255535485766836e-05, + "loss": 2.2999, + "step": 77550 + }, + { + "epoch": 10.48, + "learning_rate": 6.252088200687902e-05, + "loss": 2.35, + "step": 77560 + }, + { + "epoch": 10.48, + "learning_rate": 6.248641615644505e-05, + "loss": 2.3174, + "step": 77570 + }, + { + "epoch": 10.48, + "learning_rate": 6.245195730912446e-05, + "loss": 2.3191, + "step": 77580 + }, + { + "epoch": 10.48, + "learning_rate": 6.241750546767471e-05, + "loss": 2.3094, + "step": 77590 + }, + { + "epoch": 10.48, + "learning_rate": 6.238306063485284e-05, + "loss": 2.2964, + "step": 77600 + }, + { + "epoch": 10.48, + "learning_rate": 6.234862281341519e-05, + "loss": 2.3059, + "step": 77610 + }, + { + "epoch": 10.48, + "learning_rate": 6.231419200611752e-05, + "loss": 2.3062, + "step": 77620 + }, + { + "epoch": 10.48, + "learning_rate": 6.227976821571522e-05, + "loss": 2.264, + "step": 77630 + }, + { + "epoch": 10.49, + "learning_rate": 6.224535144496286e-05, + "loss": 2.3142, + "step": 77640 + }, + { + "epoch": 10.49, + "learning_rate": 6.221094169661476e-05, + "loss": 2.2902, + "step": 77650 + }, + { + "epoch": 10.49, + "learning_rate": 6.217653897342425e-05, + "loss": 2.2781, + "step": 77660 + }, + { + "epoch": 10.49, + "learning_rate": 6.21421432781445e-05, + "loss": 2.2672, + "step": 77670 + }, + { + "epoch": 10.49, + "learning_rate": 6.210775461352798e-05, + "loss": 2.2872, + "step": 77680 + }, + { + "epoch": 10.49, + "learning_rate": 6.207337298232653e-05, + "loss": 2.2814, + "step": 77690 + }, + { + "epoch": 10.49, + "learning_rate": 6.203899838729148e-05, + "loss": 2.302, + "step": 77700 + }, + { + "epoch": 10.5, + "learning_rate": 6.200463083117355e-05, + "loss": 2.3124, + "step": 77710 + }, + { + "epoch": 10.5, + "learning_rate": 6.197027031672303e-05, + "loss": 2.2752, + "step": 77720 + }, + { + "epoch": 10.5, + "learning_rate": 6.193591684668952e-05, + "loss": 2.2752, + "step": 77730 + }, + { + "epoch": 10.5, + "learning_rate": 6.190157042382202e-05, + "loss": 2.2728, + "step": 77740 + }, + { + "epoch": 10.5, + "learning_rate": 6.186723105086917e-05, + "loss": 2.2881, + "step": 77750 + }, + { + "epoch": 10.5, + "learning_rate": 6.18328987305788e-05, + "loss": 2.2903, + "step": 77760 + }, + { + "epoch": 10.5, + "learning_rate": 6.179857346569838e-05, + "loss": 2.2977, + "step": 77770 + }, + { + "epoch": 10.5, + "learning_rate": 6.176425525897471e-05, + "loss": 2.2901, + "step": 77780 + }, + { + "epoch": 10.51, + "learning_rate": 6.172994411315394e-05, + "loss": 2.2786, + "step": 77790 + }, + { + "epoch": 10.51, + "learning_rate": 6.169564003098189e-05, + "loss": 2.2717, + "step": 77800 + }, + { + "epoch": 10.51, + "learning_rate": 6.166134301520363e-05, + "loss": 2.3026, + "step": 77810 + }, + { + "epoch": 10.51, + "learning_rate": 6.162705306856366e-05, + "loss": 2.3037, + "step": 77820 + }, + { + "epoch": 10.51, + "learning_rate": 6.159277019380609e-05, + "loss": 2.3024, + "step": 77830 + }, + { + "epoch": 10.51, + "learning_rate": 6.15584943936742e-05, + "loss": 2.2744, + "step": 77840 + }, + { + "epoch": 10.51, + "learning_rate": 6.152422567091104e-05, + "loss": 2.2887, + "step": 77850 + }, + { + "epoch": 10.52, + "learning_rate": 6.148996402825866e-05, + "loss": 2.2872, + "step": 77860 + }, + { + "epoch": 10.52, + "learning_rate": 6.145570946845892e-05, + "loss": 2.2682, + "step": 77870 + }, + { + "epoch": 10.52, + "learning_rate": 6.142146199425301e-05, + "loss": 2.2777, + "step": 77880 + }, + { + "epoch": 10.52, + "learning_rate": 6.138722160838151e-05, + "loss": 2.2728, + "step": 77890 + }, + { + "epoch": 10.52, + "learning_rate": 6.135298831358438e-05, + "loss": 2.2684, + "step": 77900 + }, + { + "epoch": 10.52, + "learning_rate": 6.131876211260107e-05, + "loss": 2.2897, + "step": 77910 + }, + { + "epoch": 10.52, + "learning_rate": 6.128454300817055e-05, + "loss": 2.272, + "step": 77920 + }, + { + "epoch": 10.53, + "learning_rate": 6.125033100303104e-05, + "loss": 2.3143, + "step": 77930 + }, + { + "epoch": 10.53, + "learning_rate": 6.121612609992043e-05, + "loss": 2.2875, + "step": 77940 + }, + { + "epoch": 10.53, + "learning_rate": 6.11819283015758e-05, + "loss": 2.3092, + "step": 77950 + }, + { + "epoch": 10.53, + "learning_rate": 6.114773761073374e-05, + "loss": 2.3021, + "step": 77960 + }, + { + "epoch": 10.53, + "learning_rate": 6.11135540301304e-05, + "loss": 2.2813, + "step": 77970 + }, + { + "epoch": 10.53, + "learning_rate": 6.107937756250122e-05, + "loss": 2.2903, + "step": 77980 + }, + { + "epoch": 10.53, + "learning_rate": 6.104520821058103e-05, + "loss": 2.2945, + "step": 77990 + }, + { + "epoch": 10.53, + "learning_rate": 6.101104597710428e-05, + "loss": 2.274, + "step": 78000 + }, + { + "epoch": 10.53, + "eval_loss": 2.43227481842041, + "eval_runtime": 1269.5204, + "eval_samples_per_second": 59.971, + "eval_steps_per_second": 4.998, + "step": 78000 + }, + { + "epoch": 10.54, + "learning_rate": 6.0976890864804656e-05, + "loss": 2.295, + "step": 78010 + }, + { + "epoch": 10.54, + "learning_rate": 6.094274287641544e-05, + "loss": 2.308, + "step": 78020 + }, + { + "epoch": 10.54, + "learning_rate": 6.0908602014669235e-05, + "loss": 2.2972, + "step": 78030 + }, + { + "epoch": 10.54, + "learning_rate": 6.0874468282298016e-05, + "loss": 2.2975, + "step": 78040 + }, + { + "epoch": 10.54, + "learning_rate": 6.084034168203339e-05, + "loss": 2.2966, + "step": 78050 + }, + { + "epoch": 10.54, + "learning_rate": 6.0806222216606214e-05, + "loss": 2.3113, + "step": 78060 + }, + { + "epoch": 10.54, + "learning_rate": 6.077210988874686e-05, + "loss": 2.3093, + "step": 78070 + }, + { + "epoch": 10.55, + "learning_rate": 6.0738004701185046e-05, + "loss": 2.2944, + "step": 78080 + }, + { + "epoch": 10.55, + "learning_rate": 6.0703906656649997e-05, + "loss": 2.2833, + "step": 78090 + }, + { + "epoch": 10.55, + "learning_rate": 6.0669815757870506e-05, + "loss": 2.2676, + "step": 78100 + }, + { + "epoch": 10.55, + "learning_rate": 6.0635732007574366e-05, + "loss": 2.268, + "step": 78110 + }, + { + "epoch": 10.55, + "learning_rate": 6.060165540848927e-05, + "loss": 2.2755, + "step": 78120 + }, + { + "epoch": 10.55, + "learning_rate": 6.0567585963341994e-05, + "loss": 2.2766, + "step": 78130 + }, + { + "epoch": 10.55, + "learning_rate": 6.053352367485902e-05, + "loss": 2.2857, + "step": 78140 + }, + { + "epoch": 10.55, + "learning_rate": 6.0499468545766054e-05, + "loss": 2.2834, + "step": 78150 + }, + { + "epoch": 10.56, + "learning_rate": 6.0465420578788225e-05, + "loss": 2.3107, + "step": 78160 + }, + { + "epoch": 10.56, + "learning_rate": 6.0431379776650304e-05, + "loss": 2.2658, + "step": 78170 + }, + { + "epoch": 10.56, + "learning_rate": 6.0397346142076256e-05, + "loss": 2.2906, + "step": 78180 + }, + { + "epoch": 10.56, + "learning_rate": 6.03633196777895e-05, + "loss": 2.2933, + "step": 78190 + }, + { + "epoch": 10.56, + "learning_rate": 6.0329300386513086e-05, + "loss": 2.2867, + "step": 78200 + }, + { + "epoch": 10.56, + "learning_rate": 6.0295288270969213e-05, + "loss": 2.3138, + "step": 78210 + }, + { + "epoch": 10.56, + "learning_rate": 6.026128333387975e-05, + "loss": 2.2977, + "step": 78220 + }, + { + "epoch": 10.57, + "learning_rate": 6.022728557796583e-05, + "loss": 2.265, + "step": 78230 + }, + { + "epoch": 10.57, + "learning_rate": 6.0193295005947985e-05, + "loss": 2.2741, + "step": 78240 + }, + { + "epoch": 10.57, + "learning_rate": 6.015931162054639e-05, + "loss": 2.2933, + "step": 78250 + }, + { + "epoch": 10.57, + "learning_rate": 6.0125335424480425e-05, + "loss": 2.2991, + "step": 78260 + }, + { + "epoch": 10.57, + "learning_rate": 6.009136642046898e-05, + "loss": 2.2717, + "step": 78270 + }, + { + "epoch": 10.57, + "learning_rate": 6.005740461123031e-05, + "loss": 2.3055, + "step": 78280 + }, + { + "epoch": 10.57, + "learning_rate": 6.0023449999482194e-05, + "loss": 2.2672, + "step": 78290 + }, + { + "epoch": 10.58, + "learning_rate": 5.9989502587941904e-05, + "loss": 2.2768, + "step": 78300 + }, + { + "epoch": 10.58, + "learning_rate": 5.99555623793258e-05, + "loss": 2.31, + "step": 78310 + }, + { + "epoch": 10.58, + "learning_rate": 5.992162937635005e-05, + "loss": 2.2949, + "step": 78320 + }, + { + "epoch": 10.58, + "learning_rate": 5.988770358172995e-05, + "loss": 2.2786, + "step": 78330 + }, + { + "epoch": 10.58, + "learning_rate": 5.985378499818048e-05, + "loss": 2.2778, + "step": 78340 + }, + { + "epoch": 10.58, + "learning_rate": 5.981987362841586e-05, + "loss": 2.2803, + "step": 78350 + }, + { + "epoch": 10.58, + "learning_rate": 5.978596947514971e-05, + "loss": 2.2771, + "step": 78360 + }, + { + "epoch": 10.58, + "learning_rate": 5.975207254109529e-05, + "loss": 2.2877, + "step": 78370 + }, + { + "epoch": 10.59, + "learning_rate": 5.971818282896505e-05, + "loss": 2.3114, + "step": 78380 + }, + { + "epoch": 10.59, + "learning_rate": 5.968430034147092e-05, + "loss": 2.2926, + "step": 78390 + }, + { + "epoch": 10.59, + "learning_rate": 5.9650425081324374e-05, + "loss": 2.2887, + "step": 78400 + }, + { + "epoch": 10.59, + "learning_rate": 5.961655705123612e-05, + "loss": 2.2973, + "step": 78410 + }, + { + "epoch": 10.59, + "learning_rate": 5.958269625391649e-05, + "loss": 2.2772, + "step": 78420 + }, + { + "epoch": 10.59, + "learning_rate": 5.954884269207509e-05, + "loss": 2.2997, + "step": 78430 + }, + { + "epoch": 10.59, + "learning_rate": 5.9514996368420915e-05, + "loss": 2.2547, + "step": 78440 + }, + { + "epoch": 10.6, + "learning_rate": 5.948115728566259e-05, + "loss": 2.2791, + "step": 78450 + }, + { + "epoch": 10.6, + "learning_rate": 5.9447325446507934e-05, + "loss": 2.2986, + "step": 78460 + }, + { + "epoch": 10.6, + "learning_rate": 5.941350085366431e-05, + "loss": 2.2761, + "step": 78470 + }, + { + "epoch": 10.6, + "learning_rate": 5.937968350983839e-05, + "loss": 2.2805, + "step": 78480 + }, + { + "epoch": 10.6, + "learning_rate": 5.9345873417736426e-05, + "loss": 2.2661, + "step": 78490 + }, + { + "epoch": 10.6, + "learning_rate": 5.9312070580064095e-05, + "loss": 2.2993, + "step": 78500 + }, + { + "epoch": 10.6, + "learning_rate": 5.92782749995262e-05, + "loss": 2.2781, + "step": 78510 + }, + { + "epoch": 10.6, + "learning_rate": 5.9244486678827315e-05, + "loss": 2.2731, + "step": 78520 + }, + { + "epoch": 10.61, + "learning_rate": 5.921070562067121e-05, + "loss": 2.2779, + "step": 78530 + }, + { + "epoch": 10.61, + "learning_rate": 5.917693182776123e-05, + "loss": 2.2961, + "step": 78540 + }, + { + "epoch": 10.61, + "learning_rate": 5.9143165302800026e-05, + "loss": 2.3046, + "step": 78550 + }, + { + "epoch": 10.61, + "learning_rate": 5.910940604848966e-05, + "loss": 2.2645, + "step": 78560 + }, + { + "epoch": 10.61, + "learning_rate": 5.9075654067531716e-05, + "loss": 2.2972, + "step": 78570 + }, + { + "epoch": 10.61, + "learning_rate": 5.904190936262713e-05, + "loss": 2.2676, + "step": 78580 + }, + { + "epoch": 10.61, + "learning_rate": 5.900817193647619e-05, + "loss": 2.3157, + "step": 78590 + }, + { + "epoch": 10.62, + "learning_rate": 5.8974441791778746e-05, + "loss": 2.2877, + "step": 78600 + }, + { + "epoch": 10.62, + "learning_rate": 5.8940718931233944e-05, + "loss": 2.2691, + "step": 78610 + }, + { + "epoch": 10.62, + "learning_rate": 5.8907003357540446e-05, + "loss": 2.2873, + "step": 78620 + }, + { + "epoch": 10.62, + "learning_rate": 5.887329507339626e-05, + "loss": 2.2925, + "step": 78630 + }, + { + "epoch": 10.62, + "learning_rate": 5.883959408149877e-05, + "loss": 2.278, + "step": 78640 + }, + { + "epoch": 10.62, + "learning_rate": 5.880590038454492e-05, + "loss": 2.303, + "step": 78650 + }, + { + "epoch": 10.62, + "learning_rate": 5.877221398523095e-05, + "loss": 2.3034, + "step": 78660 + }, + { + "epoch": 10.63, + "learning_rate": 5.8738534886252544e-05, + "loss": 2.3074, + "step": 78670 + }, + { + "epoch": 10.63, + "learning_rate": 5.870486309030478e-05, + "loss": 2.3157, + "step": 78680 + }, + { + "epoch": 10.63, + "learning_rate": 5.8671198600082216e-05, + "loss": 2.2929, + "step": 78690 + }, + { + "epoch": 10.63, + "learning_rate": 5.863754141827889e-05, + "loss": 2.3178, + "step": 78700 + }, + { + "epoch": 10.63, + "learning_rate": 5.860389154758796e-05, + "loss": 2.296, + "step": 78710 + }, + { + "epoch": 10.63, + "learning_rate": 5.857024899070235e-05, + "loss": 2.2602, + "step": 78720 + }, + { + "epoch": 10.63, + "learning_rate": 5.853661375031414e-05, + "loss": 2.2957, + "step": 78730 + }, + { + "epoch": 10.63, + "learning_rate": 5.850298582911502e-05, + "loss": 2.3065, + "step": 78740 + }, + { + "epoch": 10.64, + "learning_rate": 5.846936522979595e-05, + "loss": 2.2874, + "step": 78750 + }, + { + "epoch": 10.64, + "learning_rate": 5.8435751955047314e-05, + "loss": 2.2855, + "step": 78760 + }, + { + "epoch": 10.64, + "learning_rate": 5.8402146007559065e-05, + "loss": 2.2957, + "step": 78770 + }, + { + "epoch": 10.64, + "learning_rate": 5.836854739002039e-05, + "loss": 2.2741, + "step": 78780 + }, + { + "epoch": 10.64, + "learning_rate": 5.8334956105119915e-05, + "loss": 2.295, + "step": 78790 + }, + { + "epoch": 10.64, + "learning_rate": 5.8301372155545824e-05, + "loss": 2.2828, + "step": 78800 + }, + { + "epoch": 10.64, + "learning_rate": 5.826779554398549e-05, + "loss": 2.316, + "step": 78810 + }, + { + "epoch": 10.65, + "learning_rate": 5.823422627312594e-05, + "loss": 2.2687, + "step": 78820 + }, + { + "epoch": 10.65, + "learning_rate": 5.820066434565344e-05, + "loss": 2.291, + "step": 78830 + }, + { + "epoch": 10.65, + "learning_rate": 5.816710976425365e-05, + "loss": 2.2914, + "step": 78840 + }, + { + "epoch": 10.65, + "learning_rate": 5.813356253161185e-05, + "loss": 2.2706, + "step": 78850 + }, + { + "epoch": 10.65, + "learning_rate": 5.810002265041252e-05, + "loss": 2.2933, + "step": 78860 + }, + { + "epoch": 10.65, + "learning_rate": 5.806649012333965e-05, + "loss": 2.2978, + "step": 78870 + }, + { + "epoch": 10.65, + "learning_rate": 5.803296495307654e-05, + "loss": 2.3164, + "step": 78880 + }, + { + "epoch": 10.65, + "learning_rate": 5.799944714230604e-05, + "loss": 2.2892, + "step": 78890 + }, + { + "epoch": 10.66, + "learning_rate": 5.796593669371047e-05, + "loss": 2.296, + "step": 78900 + }, + { + "epoch": 10.66, + "learning_rate": 5.79324336099712e-05, + "loss": 2.2453, + "step": 78910 + }, + { + "epoch": 10.66, + "learning_rate": 5.789893789376945e-05, + "loss": 2.2795, + "step": 78920 + }, + { + "epoch": 10.66, + "learning_rate": 5.786544954778553e-05, + "loss": 2.3039, + "step": 78930 + }, + { + "epoch": 10.66, + "learning_rate": 5.783196857469937e-05, + "loss": 2.3022, + "step": 78940 + }, + { + "epoch": 10.66, + "learning_rate": 5.7798494977190214e-05, + "loss": 2.2918, + "step": 78950 + }, + { + "epoch": 10.66, + "learning_rate": 5.7765028757936625e-05, + "loss": 2.2881, + "step": 78960 + }, + { + "epoch": 10.67, + "learning_rate": 5.773156991961682e-05, + "loss": 2.2795, + "step": 78970 + }, + { + "epoch": 10.67, + "learning_rate": 5.769811846490819e-05, + "loss": 2.2911, + "step": 78980 + }, + { + "epoch": 10.67, + "learning_rate": 5.766467439648762e-05, + "loss": 2.2898, + "step": 78990 + }, + { + "epoch": 10.67, + "learning_rate": 5.7631237717031475e-05, + "loss": 2.2707, + "step": 79000 + }, + { + "epoch": 10.67, + "eval_loss": 2.4284002780914307, + "eval_runtime": 1269.6161, + "eval_samples_per_second": 59.967, + "eval_steps_per_second": 4.998, + "step": 79000 + }, + { + "epoch": 10.67, + "learning_rate": 5.759780842921537e-05, + "loss": 2.2848, + "step": 79010 + }, + { + "epoch": 10.67, + "learning_rate": 5.756438653571453e-05, + "loss": 2.29, + "step": 79020 + }, + { + "epoch": 10.67, + "learning_rate": 5.7530972039203435e-05, + "loss": 2.3164, + "step": 79030 + }, + { + "epoch": 10.68, + "learning_rate": 5.749756494235595e-05, + "loss": 2.2912, + "step": 79040 + }, + { + "epoch": 10.68, + "learning_rate": 5.746416524784554e-05, + "loss": 2.2782, + "step": 79050 + }, + { + "epoch": 10.68, + "learning_rate": 5.74307729583449e-05, + "loss": 2.3148, + "step": 79060 + }, + { + "epoch": 10.68, + "learning_rate": 5.739738807652616e-05, + "loss": 2.2975, + "step": 79070 + }, + { + "epoch": 10.68, + "learning_rate": 5.7364010605060865e-05, + "loss": 2.2501, + "step": 79080 + }, + { + "epoch": 10.68, + "learning_rate": 5.7330640546620006e-05, + "loss": 2.2705, + "step": 79090 + }, + { + "epoch": 10.68, + "learning_rate": 5.729727790387409e-05, + "loss": 2.3016, + "step": 79100 + }, + { + "epoch": 10.68, + "learning_rate": 5.726392267949268e-05, + "loss": 2.304, + "step": 79110 + }, + { + "epoch": 10.69, + "learning_rate": 5.723057487614514e-05, + "loss": 2.2975, + "step": 79120 + }, + { + "epoch": 10.69, + "learning_rate": 5.719723449649994e-05, + "loss": 2.2867, + "step": 79130 + }, + { + "epoch": 10.69, + "learning_rate": 5.716390154322521e-05, + "loss": 2.2845, + "step": 79140 + }, + { + "epoch": 10.69, + "learning_rate": 5.7130576018988276e-05, + "loss": 2.2987, + "step": 79150 + }, + { + "epoch": 10.69, + "learning_rate": 5.709725792645592e-05, + "loss": 2.282, + "step": 79160 + }, + { + "epoch": 10.69, + "learning_rate": 5.7063947268294463e-05, + "loss": 2.2686, + "step": 79170 + }, + { + "epoch": 10.69, + "learning_rate": 5.703064404716947e-05, + "loss": 2.2966, + "step": 79180 + }, + { + "epoch": 10.7, + "learning_rate": 5.699734826574594e-05, + "loss": 2.2845, + "step": 79190 + }, + { + "epoch": 10.7, + "learning_rate": 5.696405992668837e-05, + "loss": 2.2971, + "step": 79200 + }, + { + "epoch": 10.7, + "learning_rate": 5.6930779032660536e-05, + "loss": 2.2675, + "step": 79210 + }, + { + "epoch": 10.7, + "learning_rate": 5.689750558632579e-05, + "loss": 2.2866, + "step": 79220 + }, + { + "epoch": 10.7, + "learning_rate": 5.6864239590346636e-05, + "loss": 2.2823, + "step": 79230 + }, + { + "epoch": 10.7, + "learning_rate": 5.683098104738516e-05, + "loss": 2.2909, + "step": 79240 + }, + { + "epoch": 10.7, + "learning_rate": 5.6797729960102914e-05, + "loss": 2.2884, + "step": 79250 + }, + { + "epoch": 10.7, + "learning_rate": 5.676448633116067e-05, + "loss": 2.2737, + "step": 79260 + }, + { + "epoch": 10.71, + "learning_rate": 5.673125016321872e-05, + "loss": 2.3109, + "step": 79270 + }, + { + "epoch": 10.71, + "learning_rate": 5.669802145893666e-05, + "loss": 2.2788, + "step": 79280 + }, + { + "epoch": 10.71, + "learning_rate": 5.666480022097361e-05, + "loss": 2.2852, + "step": 79290 + }, + { + "epoch": 10.71, + "learning_rate": 5.6631586451988134e-05, + "loss": 2.2868, + "step": 79300 + }, + { + "epoch": 10.71, + "learning_rate": 5.6598380154637915e-05, + "loss": 2.2887, + "step": 79310 + }, + { + "epoch": 10.71, + "learning_rate": 5.6565181331580344e-05, + "loss": 2.2737, + "step": 79320 + }, + { + "epoch": 10.71, + "learning_rate": 5.6531989985472036e-05, + "loss": 2.2874, + "step": 79330 + }, + { + "epoch": 10.72, + "learning_rate": 5.649880611896913e-05, + "loss": 2.2738, + "step": 79340 + }, + { + "epoch": 10.72, + "learning_rate": 5.646562973472709e-05, + "loss": 2.2959, + "step": 79350 + }, + { + "epoch": 10.72, + "learning_rate": 5.643246083540073e-05, + "loss": 2.2878, + "step": 79360 + }, + { + "epoch": 10.72, + "learning_rate": 5.6399299423644414e-05, + "loss": 2.2994, + "step": 79370 + }, + { + "epoch": 10.72, + "learning_rate": 5.63661455021118e-05, + "loss": 2.283, + "step": 79380 + }, + { + "epoch": 10.72, + "learning_rate": 5.63329990734559e-05, + "loss": 2.2741, + "step": 79390 + }, + { + "epoch": 10.72, + "learning_rate": 5.6299860140329294e-05, + "loss": 2.2766, + "step": 79400 + }, + { + "epoch": 10.73, + "learning_rate": 5.626672870538379e-05, + "loss": 2.2966, + "step": 79410 + }, + { + "epoch": 10.73, + "learning_rate": 5.623360477127078e-05, + "loss": 2.2676, + "step": 79420 + }, + { + "epoch": 10.73, + "learning_rate": 5.620048834064078e-05, + "loss": 2.2815, + "step": 79430 + }, + { + "epoch": 10.73, + "learning_rate": 5.6167379416143956e-05, + "loss": 2.2872, + "step": 79440 + }, + { + "epoch": 10.73, + "learning_rate": 5.613427800042985e-05, + "loss": 2.3198, + "step": 79450 + }, + { + "epoch": 10.73, + "learning_rate": 5.610118409614728e-05, + "loss": 2.3054, + "step": 79460 + }, + { + "epoch": 10.73, + "learning_rate": 5.606809770594454e-05, + "loss": 2.2876, + "step": 79470 + }, + { + "epoch": 10.73, + "learning_rate": 5.603501883246922e-05, + "loss": 2.2764, + "step": 79480 + }, + { + "epoch": 10.74, + "learning_rate": 5.600194747836849e-05, + "loss": 2.2756, + "step": 79490 + }, + { + "epoch": 10.74, + "learning_rate": 5.59688836462889e-05, + "loss": 2.2961, + "step": 79500 + }, + { + "epoch": 10.74, + "learning_rate": 5.593582733887614e-05, + "loss": 2.3134, + "step": 79510 + }, + { + "epoch": 10.74, + "learning_rate": 5.590277855877561e-05, + "loss": 2.2926, + "step": 79520 + }, + { + "epoch": 10.74, + "learning_rate": 5.5869737308631876e-05, + "loss": 2.254, + "step": 79530 + }, + { + "epoch": 10.74, + "learning_rate": 5.583670359108914e-05, + "loss": 2.2954, + "step": 79540 + }, + { + "epoch": 10.74, + "learning_rate": 5.580367740879078e-05, + "loss": 2.2802, + "step": 79550 + }, + { + "epoch": 10.75, + "learning_rate": 5.57706587643796e-05, + "loss": 2.2873, + "step": 79560 + }, + { + "epoch": 10.75, + "learning_rate": 5.5737647660497964e-05, + "loss": 2.2875, + "step": 79570 + }, + { + "epoch": 10.75, + "learning_rate": 5.5704644099787504e-05, + "loss": 2.2896, + "step": 79580 + }, + { + "epoch": 10.75, + "learning_rate": 5.567164808488918e-05, + "loss": 2.301, + "step": 79590 + }, + { + "epoch": 10.75, + "learning_rate": 5.5638659618443564e-05, + "loss": 2.2731, + "step": 79600 + }, + { + "epoch": 10.75, + "learning_rate": 5.560567870309038e-05, + "loss": 2.2719, + "step": 79610 + }, + { + "epoch": 10.75, + "learning_rate": 5.557270534146902e-05, + "loss": 2.2989, + "step": 79620 + }, + { + "epoch": 10.75, + "learning_rate": 5.5539739536217913e-05, + "loss": 2.274, + "step": 79630 + }, + { + "epoch": 10.76, + "learning_rate": 5.55067812899752e-05, + "loss": 2.2845, + "step": 79640 + }, + { + "epoch": 10.76, + "learning_rate": 5.547383060537834e-05, + "loss": 2.3124, + "step": 79650 + }, + { + "epoch": 10.76, + "learning_rate": 5.544088748506412e-05, + "loss": 2.2739, + "step": 79660 + }, + { + "epoch": 10.76, + "learning_rate": 5.5407951931668725e-05, + "loss": 2.2712, + "step": 79670 + }, + { + "epoch": 10.76, + "learning_rate": 5.537502394782773e-05, + "loss": 2.2952, + "step": 79680 + }, + { + "epoch": 10.76, + "learning_rate": 5.5342103536176194e-05, + "loss": 2.2665, + "step": 79690 + }, + { + "epoch": 10.76, + "learning_rate": 5.530919069934859e-05, + "loss": 2.2824, + "step": 79700 + }, + { + "epoch": 10.77, + "learning_rate": 5.5276285439978515e-05, + "loss": 2.2958, + "step": 79710 + }, + { + "epoch": 10.77, + "learning_rate": 5.524338776069931e-05, + "loss": 2.2555, + "step": 79720 + }, + { + "epoch": 10.77, + "learning_rate": 5.521049766414344e-05, + "loss": 2.2854, + "step": 79730 + }, + { + "epoch": 10.77, + "learning_rate": 5.517761515294299e-05, + "loss": 2.2866, + "step": 79740 + }, + { + "epoch": 10.77, + "learning_rate": 5.514474022972926e-05, + "loss": 2.2921, + "step": 79750 + }, + { + "epoch": 10.77, + "learning_rate": 5.5111872897132954e-05, + "loss": 2.2582, + "step": 79760 + }, + { + "epoch": 10.77, + "learning_rate": 5.5079013157784316e-05, + "loss": 2.2779, + "step": 79770 + }, + { + "epoch": 10.78, + "learning_rate": 5.504616101431285e-05, + "loss": 2.2926, + "step": 79780 + }, + { + "epoch": 10.78, + "learning_rate": 5.501331646934743e-05, + "loss": 2.2771, + "step": 79790 + }, + { + "epoch": 10.78, + "learning_rate": 5.4980479525516495e-05, + "loss": 2.2586, + "step": 79800 + }, + { + "epoch": 10.78, + "learning_rate": 5.494765018544763e-05, + "loss": 2.2869, + "step": 79810 + }, + { + "epoch": 10.78, + "learning_rate": 5.491482845176812e-05, + "loss": 2.2987, + "step": 79820 + }, + { + "epoch": 10.78, + "learning_rate": 5.488201432710426e-05, + "loss": 2.2827, + "step": 79830 + }, + { + "epoch": 10.78, + "learning_rate": 5.484920781408203e-05, + "loss": 2.2845, + "step": 79840 + }, + { + "epoch": 10.78, + "learning_rate": 5.481640891532676e-05, + "loss": 2.2711, + "step": 79850 + }, + { + "epoch": 10.79, + "learning_rate": 5.4783617633463085e-05, + "loss": 2.2954, + "step": 79860 + }, + { + "epoch": 10.79, + "learning_rate": 5.475083397111506e-05, + "loss": 2.2819, + "step": 79870 + }, + { + "epoch": 10.79, + "learning_rate": 5.471805793090607e-05, + "loss": 2.3067, + "step": 79880 + }, + { + "epoch": 10.79, + "learning_rate": 5.468528951545904e-05, + "loss": 2.269, + "step": 79890 + }, + { + "epoch": 10.79, + "learning_rate": 5.465252872739628e-05, + "loss": 2.2707, + "step": 79900 + }, + { + "epoch": 10.79, + "learning_rate": 5.461977556933922e-05, + "loss": 2.301, + "step": 79910 + }, + { + "epoch": 10.79, + "learning_rate": 5.4587030043909e-05, + "loss": 2.2915, + "step": 79920 + }, + { + "epoch": 10.8, + "learning_rate": 5.4554292153725955e-05, + "loss": 2.3048, + "step": 79930 + }, + { + "epoch": 10.8, + "learning_rate": 5.452156190140996e-05, + "loss": 2.2852, + "step": 79940 + }, + { + "epoch": 10.8, + "learning_rate": 5.4488839289580127e-05, + "loss": 2.2639, + "step": 79950 + }, + { + "epoch": 10.8, + "learning_rate": 5.445612432085499e-05, + "loss": 2.3, + "step": 79960 + }, + { + "epoch": 10.8, + "learning_rate": 5.4423416997852613e-05, + "loss": 2.2824, + "step": 79970 + }, + { + "epoch": 10.8, + "learning_rate": 5.439071732319027e-05, + "loss": 2.2732, + "step": 79980 + }, + { + "epoch": 10.8, + "learning_rate": 5.4358025299484645e-05, + "loss": 2.308, + "step": 79990 + }, + { + "epoch": 10.8, + "learning_rate": 5.4325340929351976e-05, + "loss": 2.273, + "step": 80000 + }, + { + "epoch": 10.8, + "eval_loss": 2.4251482486724854, + "eval_runtime": 1267.5227, + "eval_samples_per_second": 60.066, + "eval_steps_per_second": 5.006, + "step": 80000 + }, + { + "epoch": 10.81, + "learning_rate": 5.429266421540763e-05, + "loss": 2.2751, + "step": 80010 + }, + { + "epoch": 10.81, + "learning_rate": 5.42599951602667e-05, + "loss": 2.2898, + "step": 80020 + }, + { + "epoch": 10.81, + "learning_rate": 5.4227333766543233e-05, + "loss": 2.2951, + "step": 80030 + }, + { + "epoch": 10.81, + "learning_rate": 5.419468003685101e-05, + "loss": 2.302, + "step": 80040 + }, + { + "epoch": 10.81, + "learning_rate": 5.416203397380313e-05, + "loss": 2.2958, + "step": 80050 + }, + { + "epoch": 10.81, + "learning_rate": 5.412939558001199e-05, + "loss": 2.3045, + "step": 80060 + }, + { + "epoch": 10.81, + "learning_rate": 5.4096764858089406e-05, + "loss": 2.281, + "step": 80070 + }, + { + "epoch": 10.82, + "learning_rate": 5.406414181064654e-05, + "loss": 2.2879, + "step": 80080 + }, + { + "epoch": 10.82, + "learning_rate": 5.403152644029412e-05, + "loss": 2.2757, + "step": 80090 + }, + { + "epoch": 10.82, + "learning_rate": 5.3998918749642046e-05, + "loss": 2.29, + "step": 80100 + }, + { + "epoch": 10.82, + "learning_rate": 5.3966318741299655e-05, + "loss": 2.2615, + "step": 80110 + }, + { + "epoch": 10.82, + "learning_rate": 5.39337264178758e-05, + "loss": 2.2581, + "step": 80120 + }, + { + "epoch": 10.82, + "learning_rate": 5.390114178197854e-05, + "loss": 2.3091, + "step": 80130 + }, + { + "epoch": 10.82, + "learning_rate": 5.386856483621548e-05, + "loss": 2.2911, + "step": 80140 + }, + { + "epoch": 10.83, + "learning_rate": 5.3835995583193464e-05, + "loss": 2.2865, + "step": 80150 + }, + { + "epoch": 10.83, + "learning_rate": 5.380343402551878e-05, + "loss": 2.2772, + "step": 80160 + }, + { + "epoch": 10.83, + "learning_rate": 5.377088016579717e-05, + "loss": 2.3031, + "step": 80170 + }, + { + "epoch": 10.83, + "learning_rate": 5.373833400663366e-05, + "loss": 2.306, + "step": 80180 + }, + { + "epoch": 10.83, + "learning_rate": 5.370579555063265e-05, + "loss": 2.2853, + "step": 80190 + }, + { + "epoch": 10.83, + "learning_rate": 5.367326480039806e-05, + "loss": 2.2855, + "step": 80200 + }, + { + "epoch": 10.83, + "learning_rate": 5.3640741758533015e-05, + "loss": 2.3047, + "step": 80210 + }, + { + "epoch": 10.83, + "learning_rate": 5.360822642764025e-05, + "loss": 2.3235, + "step": 80220 + }, + { + "epoch": 10.84, + "learning_rate": 5.3575718810321555e-05, + "loss": 2.3011, + "step": 80230 + }, + { + "epoch": 10.84, + "learning_rate": 5.354321890917838e-05, + "loss": 2.265, + "step": 80240 + }, + { + "epoch": 10.84, + "learning_rate": 5.351072672681153e-05, + "loss": 2.275, + "step": 80250 + }, + { + "epoch": 10.84, + "learning_rate": 5.347824226582108e-05, + "loss": 2.2868, + "step": 80260 + }, + { + "epoch": 10.84, + "learning_rate": 5.344576552880653e-05, + "loss": 2.2789, + "step": 80270 + }, + { + "epoch": 10.84, + "learning_rate": 5.341329651836676e-05, + "loss": 2.3153, + "step": 80280 + }, + { + "epoch": 10.84, + "learning_rate": 5.338083523710008e-05, + "loss": 2.2779, + "step": 80290 + }, + { + "epoch": 10.85, + "learning_rate": 5.334838168760414e-05, + "loss": 2.3076, + "step": 80300 + }, + { + "epoch": 10.85, + "learning_rate": 5.3315935872475904e-05, + "loss": 2.3193, + "step": 80310 + }, + { + "epoch": 10.85, + "learning_rate": 5.328349779431191e-05, + "loss": 2.3109, + "step": 80320 + }, + { + "epoch": 10.85, + "learning_rate": 5.325106745570784e-05, + "loss": 2.3218, + "step": 80330 + }, + { + "epoch": 10.85, + "learning_rate": 5.321864485925896e-05, + "loss": 2.3136, + "step": 80340 + }, + { + "epoch": 10.85, + "learning_rate": 5.318623000755981e-05, + "loss": 2.2929, + "step": 80350 + }, + { + "epoch": 10.85, + "learning_rate": 5.3153822903204255e-05, + "loss": 2.2945, + "step": 80360 + }, + { + "epoch": 10.85, + "learning_rate": 5.3121423548785707e-05, + "loss": 2.2844, + "step": 80370 + }, + { + "epoch": 10.86, + "learning_rate": 5.308903194689685e-05, + "loss": 2.2494, + "step": 80380 + }, + { + "epoch": 10.86, + "learning_rate": 5.305664810012967e-05, + "loss": 2.2886, + "step": 80390 + }, + { + "epoch": 10.86, + "learning_rate": 5.302427201107576e-05, + "loss": 2.2692, + "step": 80400 + }, + { + "epoch": 10.86, + "learning_rate": 5.299190368232583e-05, + "loss": 2.2851, + "step": 80410 + }, + { + "epoch": 10.86, + "learning_rate": 5.295954311647027e-05, + "loss": 2.2946, + "step": 80420 + }, + { + "epoch": 10.86, + "learning_rate": 5.2927190316098455e-05, + "loss": 2.2967, + "step": 80430 + }, + { + "epoch": 10.86, + "learning_rate": 5.289484528379946e-05, + "loss": 2.28, + "step": 80440 + }, + { + "epoch": 10.87, + "learning_rate": 5.28625080221617e-05, + "loss": 2.2691, + "step": 80450 + }, + { + "epoch": 10.87, + "learning_rate": 5.2830178533772856e-05, + "loss": 2.274, + "step": 80460 + }, + { + "epoch": 10.87, + "learning_rate": 5.2797856821220016e-05, + "loss": 2.305, + "step": 80470 + }, + { + "epoch": 10.87, + "learning_rate": 5.2765542887089626e-05, + "loss": 2.2657, + "step": 80480 + }, + { + "epoch": 10.87, + "learning_rate": 5.2733236733967666e-05, + "loss": 2.3138, + "step": 80490 + }, + { + "epoch": 10.87, + "learning_rate": 5.2700938364439286e-05, + "loss": 2.2829, + "step": 80500 + }, + { + "epoch": 10.87, + "learning_rate": 5.2668647781089096e-05, + "loss": 2.2951, + "step": 80510 + }, + { + "epoch": 10.87, + "learning_rate": 5.2636364986501175e-05, + "loss": 2.2903, + "step": 80520 + }, + { + "epoch": 10.88, + "learning_rate": 5.2604089983258766e-05, + "loss": 2.2999, + "step": 80530 + }, + { + "epoch": 10.88, + "learning_rate": 5.257182277394477e-05, + "loss": 2.2873, + "step": 80540 + }, + { + "epoch": 10.88, + "learning_rate": 5.253956336114122e-05, + "loss": 2.306, + "step": 80550 + }, + { + "epoch": 10.88, + "learning_rate": 5.250731174742958e-05, + "loss": 2.287, + "step": 80560 + }, + { + "epoch": 10.88, + "learning_rate": 5.247506793539082e-05, + "loss": 2.3244, + "step": 80570 + }, + { + "epoch": 10.88, + "learning_rate": 5.244283192760515e-05, + "loss": 2.2891, + "step": 80580 + }, + { + "epoch": 10.88, + "learning_rate": 5.241060372665214e-05, + "loss": 2.3038, + "step": 80590 + }, + { + "epoch": 10.89, + "learning_rate": 5.237838333511091e-05, + "loss": 2.2691, + "step": 80600 + }, + { + "epoch": 10.89, + "learning_rate": 5.2346170755559706e-05, + "loss": 2.3065, + "step": 80610 + }, + { + "epoch": 10.89, + "learning_rate": 5.231396599057646e-05, + "loss": 2.2722, + "step": 80620 + }, + { + "epoch": 10.89, + "learning_rate": 5.2281769042738084e-05, + "loss": 2.2847, + "step": 80630 + }, + { + "epoch": 10.89, + "learning_rate": 5.224957991462119e-05, + "loss": 2.2837, + "step": 80640 + }, + { + "epoch": 10.89, + "learning_rate": 5.2217398608801685e-05, + "loss": 2.2845, + "step": 80650 + }, + { + "epoch": 10.89, + "learning_rate": 5.218522512785478e-05, + "loss": 2.2731, + "step": 80660 + }, + { + "epoch": 10.9, + "learning_rate": 5.215305947435511e-05, + "loss": 2.2979, + "step": 80670 + }, + { + "epoch": 10.9, + "learning_rate": 5.212090165087661e-05, + "loss": 2.2906, + "step": 80680 + }, + { + "epoch": 10.9, + "learning_rate": 5.208875165999274e-05, + "loss": 2.285, + "step": 80690 + }, + { + "epoch": 10.9, + "learning_rate": 5.205660950427622e-05, + "loss": 2.3084, + "step": 80700 + }, + { + "epoch": 10.9, + "learning_rate": 5.202447518629908e-05, + "loss": 2.2918, + "step": 80710 + }, + { + "epoch": 10.9, + "learning_rate": 5.1992348708632946e-05, + "loss": 2.2896, + "step": 80720 + }, + { + "epoch": 10.9, + "learning_rate": 5.1960230073848574e-05, + "loss": 2.32, + "step": 80730 + }, + { + "epoch": 10.9, + "learning_rate": 5.1928119284516274e-05, + "loss": 2.2792, + "step": 80740 + }, + { + "epoch": 10.91, + "learning_rate": 5.1896016343205624e-05, + "loss": 2.2994, + "step": 80750 + }, + { + "epoch": 10.91, + "learning_rate": 5.186392125248555e-05, + "loss": 2.2998, + "step": 80760 + }, + { + "epoch": 10.91, + "learning_rate": 5.183183401492451e-05, + "loss": 2.3022, + "step": 80770 + }, + { + "epoch": 10.91, + "learning_rate": 5.1799754633090144e-05, + "loss": 2.2932, + "step": 80780 + }, + { + "epoch": 10.91, + "learning_rate": 5.1767683109549545e-05, + "loss": 2.2625, + "step": 80790 + }, + { + "epoch": 10.91, + "learning_rate": 5.1735619446869225e-05, + "loss": 2.2682, + "step": 80800 + }, + { + "epoch": 10.91, + "learning_rate": 5.170356364761497e-05, + "loss": 2.2892, + "step": 80810 + }, + { + "epoch": 10.92, + "learning_rate": 5.1671515714352096e-05, + "loss": 2.2909, + "step": 80820 + }, + { + "epoch": 10.92, + "learning_rate": 5.1639475649645e-05, + "loss": 2.2913, + "step": 80830 + }, + { + "epoch": 10.92, + "learning_rate": 5.160744345605771e-05, + "loss": 2.2669, + "step": 80840 + }, + { + "epoch": 10.92, + "learning_rate": 5.157541913615363e-05, + "loss": 2.29, + "step": 80850 + }, + { + "epoch": 10.92, + "learning_rate": 5.154340269249537e-05, + "loss": 2.2832, + "step": 80860 + }, + { + "epoch": 10.92, + "learning_rate": 5.151139412764498e-05, + "loss": 2.288, + "step": 80870 + }, + { + "epoch": 10.92, + "learning_rate": 5.1479393444163865e-05, + "loss": 2.3023, + "step": 80880 + }, + { + "epoch": 10.92, + "learning_rate": 5.144740064461289e-05, + "loss": 2.3043, + "step": 80890 + }, + { + "epoch": 10.93, + "learning_rate": 5.141541573155219e-05, + "loss": 2.2672, + "step": 80900 + }, + { + "epoch": 10.93, + "learning_rate": 5.138343870754125e-05, + "loss": 2.3026, + "step": 80910 + }, + { + "epoch": 10.93, + "learning_rate": 5.135146957513905e-05, + "loss": 2.2905, + "step": 80920 + }, + { + "epoch": 10.93, + "learning_rate": 5.131950833690379e-05, + "loss": 2.2972, + "step": 80930 + }, + { + "epoch": 10.93, + "learning_rate": 5.128755499539318e-05, + "loss": 2.2918, + "step": 80940 + }, + { + "epoch": 10.93, + "learning_rate": 5.1255609553164206e-05, + "loss": 2.2765, + "step": 80950 + }, + { + "epoch": 10.93, + "learning_rate": 5.122367201277318e-05, + "loss": 2.2796, + "step": 80960 + }, + { + "epoch": 10.94, + "learning_rate": 5.119174237677594e-05, + "loss": 2.2653, + "step": 80970 + }, + { + "epoch": 10.94, + "learning_rate": 5.115982064772756e-05, + "loss": 2.2742, + "step": 80980 + }, + { + "epoch": 10.94, + "learning_rate": 5.112790682818245e-05, + "loss": 2.2945, + "step": 80990 + }, + { + "epoch": 10.94, + "learning_rate": 5.109600092069456e-05, + "loss": 2.2764, + "step": 81000 + }, + { + "epoch": 10.94, + "eval_loss": 2.421452760696411, + "eval_runtime": 1269.5215, + "eval_samples_per_second": 59.971, + "eval_steps_per_second": 4.998, + "step": 81000 + }, + { + "epoch": 10.94, + "learning_rate": 5.106410292781701e-05, + "loss": 2.3018, + "step": 81010 + }, + { + "epoch": 10.94, + "learning_rate": 5.103221285210253e-05, + "loss": 2.2967, + "step": 81020 + }, + { + "epoch": 10.94, + "learning_rate": 5.1000330696102846e-05, + "loss": 2.2983, + "step": 81030 + }, + { + "epoch": 10.95, + "learning_rate": 5.0968456462369385e-05, + "loss": 2.3023, + "step": 81040 + }, + { + "epoch": 10.95, + "learning_rate": 5.0936590153452875e-05, + "loss": 2.2806, + "step": 81050 + }, + { + "epoch": 10.95, + "learning_rate": 5.09047317719033e-05, + "loss": 2.313, + "step": 81060 + }, + { + "epoch": 10.95, + "learning_rate": 5.087288132027006e-05, + "loss": 2.2668, + "step": 81070 + }, + { + "epoch": 10.95, + "learning_rate": 5.0841038801101895e-05, + "loss": 2.2847, + "step": 81080 + }, + { + "epoch": 10.95, + "learning_rate": 5.080920421694706e-05, + "loss": 2.326, + "step": 81090 + }, + { + "epoch": 10.95, + "learning_rate": 5.077737757035296e-05, + "loss": 2.2891, + "step": 81100 + }, + { + "epoch": 10.95, + "learning_rate": 5.0745558863866456e-05, + "loss": 2.2859, + "step": 81110 + }, + { + "epoch": 10.96, + "learning_rate": 5.0713748100033866e-05, + "loss": 2.3108, + "step": 81120 + }, + { + "epoch": 10.96, + "learning_rate": 5.0681945281400686e-05, + "loss": 2.3087, + "step": 81130 + }, + { + "epoch": 10.96, + "learning_rate": 5.065015041051197e-05, + "loss": 2.273, + "step": 81140 + }, + { + "epoch": 10.96, + "learning_rate": 5.061836348991202e-05, + "loss": 2.2738, + "step": 81150 + }, + { + "epoch": 10.96, + "learning_rate": 5.0586584522144466e-05, + "loss": 2.2836, + "step": 81160 + }, + { + "epoch": 10.96, + "learning_rate": 5.0554813509752445e-05, + "loss": 2.2825, + "step": 81170 + }, + { + "epoch": 10.96, + "learning_rate": 5.052305045527833e-05, + "loss": 2.2935, + "step": 81180 + }, + { + "epoch": 10.97, + "learning_rate": 5.049129536126387e-05, + "loss": 2.2856, + "step": 81190 + }, + { + "epoch": 10.97, + "learning_rate": 5.045954823025029e-05, + "loss": 2.2946, + "step": 81200 + }, + { + "epoch": 10.97, + "learning_rate": 5.042780906477799e-05, + "loss": 2.2605, + "step": 81210 + }, + { + "epoch": 10.97, + "learning_rate": 5.0396077867387e-05, + "loss": 2.2499, + "step": 81220 + }, + { + "epoch": 10.97, + "learning_rate": 5.036435464061636e-05, + "loss": 2.2831, + "step": 81230 + }, + { + "epoch": 10.97, + "learning_rate": 5.0332639387004786e-05, + "loss": 2.2789, + "step": 81240 + }, + { + "epoch": 10.97, + "learning_rate": 5.0300932109090165e-05, + "loss": 2.2675, + "step": 81250 + }, + { + "epoch": 10.97, + "learning_rate": 5.026923280940988e-05, + "loss": 2.3216, + "step": 81260 + }, + { + "epoch": 10.98, + "learning_rate": 5.023754149050058e-05, + "loss": 2.3191, + "step": 81270 + }, + { + "epoch": 10.98, + "learning_rate": 5.0205858154898255e-05, + "loss": 2.2762, + "step": 81280 + }, + { + "epoch": 10.98, + "learning_rate": 5.017418280513839e-05, + "loss": 2.2838, + "step": 81290 + }, + { + "epoch": 10.98, + "learning_rate": 5.014251544375571e-05, + "loss": 2.2784, + "step": 81300 + }, + { + "epoch": 10.98, + "learning_rate": 5.01108560732843e-05, + "loss": 2.2607, + "step": 81310 + }, + { + "epoch": 10.98, + "learning_rate": 5.007920469625771e-05, + "loss": 2.2754, + "step": 81320 + }, + { + "epoch": 10.98, + "learning_rate": 5.004756131520871e-05, + "loss": 2.2746, + "step": 81330 + }, + { + "epoch": 10.99, + "learning_rate": 5.001592593266959e-05, + "loss": 2.2856, + "step": 81340 + }, + { + "epoch": 10.99, + "learning_rate": 4.9984298551171863e-05, + "loss": 2.2638, + "step": 81350 + }, + { + "epoch": 10.99, + "learning_rate": 4.9952679173246396e-05, + "loss": 2.2619, + "step": 81360 + }, + { + "epoch": 10.99, + "learning_rate": 4.992106780142359e-05, + "loss": 2.2789, + "step": 81370 + }, + { + "epoch": 10.99, + "learning_rate": 4.9889464438233046e-05, + "loss": 2.2674, + "step": 81380 + }, + { + "epoch": 10.99, + "learning_rate": 4.9857869086203686e-05, + "loss": 2.3019, + "step": 81390 + }, + { + "epoch": 10.99, + "learning_rate": 4.9826281747863965e-05, + "loss": 2.3092, + "step": 81400 + }, + { + "epoch": 11.0, + "learning_rate": 4.979470242574154e-05, + "loss": 2.282, + "step": 81410 + }, + { + "epoch": 11.0, + "learning_rate": 4.9763131122363595e-05, + "loss": 2.2997, + "step": 81420 + }, + { + "epoch": 11.0, + "learning_rate": 4.9731567840256406e-05, + "loss": 2.3026, + "step": 81430 + }, + { + "epoch": 11.0, + "learning_rate": 4.970001258194588e-05, + "loss": 2.2893, + "step": 81440 + }, + { + "epoch": 11.0, + "learning_rate": 4.9668465349957095e-05, + "loss": 2.2785, + "step": 81450 + }, + { + "epoch": 11.0, + "learning_rate": 4.963692614681464e-05, + "loss": 2.243, + "step": 81460 + }, + { + "epoch": 11.0, + "learning_rate": 4.960539497504235e-05, + "loss": 2.2427, + "step": 81470 + }, + { + "epoch": 11.0, + "learning_rate": 4.9573871837163384e-05, + "loss": 2.2414, + "step": 81480 + }, + { + "epoch": 11.01, + "learning_rate": 4.954235673570044e-05, + "loss": 2.245, + "step": 81490 + }, + { + "epoch": 11.01, + "learning_rate": 4.951084967317539e-05, + "loss": 2.2313, + "step": 81500 + }, + { + "epoch": 11.01, + "learning_rate": 4.9479350652109486e-05, + "loss": 2.219, + "step": 81510 + }, + { + "epoch": 11.01, + "learning_rate": 4.944785967502348e-05, + "loss": 2.2516, + "step": 81520 + }, + { + "epoch": 11.01, + "learning_rate": 4.941637674443727e-05, + "loss": 2.2551, + "step": 81530 + }, + { + "epoch": 11.01, + "learning_rate": 4.938490186287033e-05, + "loss": 2.241, + "step": 81540 + }, + { + "epoch": 11.01, + "learning_rate": 4.935343503284131e-05, + "loss": 2.2383, + "step": 81550 + }, + { + "epoch": 11.02, + "learning_rate": 4.932197625686828e-05, + "loss": 2.213, + "step": 81560 + }, + { + "epoch": 11.02, + "learning_rate": 4.9290525537468724e-05, + "loss": 2.2449, + "step": 81570 + }, + { + "epoch": 11.02, + "learning_rate": 4.92590828771594e-05, + "loss": 2.2708, + "step": 81580 + }, + { + "epoch": 11.02, + "learning_rate": 4.922764827845638e-05, + "loss": 2.2274, + "step": 81590 + }, + { + "epoch": 11.02, + "learning_rate": 4.919622174387529e-05, + "loss": 2.2443, + "step": 81600 + }, + { + "epoch": 11.02, + "learning_rate": 4.916480327593088e-05, + "loss": 2.2273, + "step": 81610 + }, + { + "epoch": 11.02, + "learning_rate": 4.913339287713746e-05, + "loss": 2.2218, + "step": 81620 + }, + { + "epoch": 11.02, + "learning_rate": 4.910199055000842e-05, + "loss": 2.2225, + "step": 81630 + }, + { + "epoch": 11.03, + "learning_rate": 4.907059629705685e-05, + "loss": 2.2662, + "step": 81640 + }, + { + "epoch": 11.03, + "learning_rate": 4.9039210120794864e-05, + "loss": 2.2634, + "step": 81650 + }, + { + "epoch": 11.03, + "learning_rate": 4.900783202373422e-05, + "loss": 2.2342, + "step": 81660 + }, + { + "epoch": 11.03, + "learning_rate": 4.897646200838583e-05, + "loss": 2.2585, + "step": 81670 + }, + { + "epoch": 11.03, + "learning_rate": 4.894510007725998e-05, + "loss": 2.2083, + "step": 81680 + }, + { + "epoch": 11.03, + "learning_rate": 4.891374623286644e-05, + "loss": 2.2787, + "step": 81690 + }, + { + "epoch": 11.03, + "learning_rate": 4.888240047771419e-05, + "loss": 2.2284, + "step": 81700 + }, + { + "epoch": 11.04, + "learning_rate": 4.885106281431157e-05, + "loss": 2.2471, + "step": 81710 + }, + { + "epoch": 11.04, + "learning_rate": 4.881973324516644e-05, + "loss": 2.2164, + "step": 81720 + }, + { + "epoch": 11.04, + "learning_rate": 4.878841177278576e-05, + "loss": 2.2687, + "step": 81730 + }, + { + "epoch": 11.04, + "learning_rate": 4.875709839967609e-05, + "loss": 2.2488, + "step": 81740 + }, + { + "epoch": 11.04, + "learning_rate": 4.872579312834318e-05, + "loss": 2.2631, + "step": 81750 + }, + { + "epoch": 11.04, + "learning_rate": 4.869449596129211e-05, + "loss": 2.2441, + "step": 81760 + }, + { + "epoch": 11.04, + "learning_rate": 4.86632069010275e-05, + "loss": 2.2561, + "step": 81770 + }, + { + "epoch": 11.05, + "learning_rate": 4.8631925950053105e-05, + "loss": 2.2234, + "step": 81780 + }, + { + "epoch": 11.05, + "learning_rate": 4.860065311087214e-05, + "loss": 2.2471, + "step": 81790 + }, + { + "epoch": 11.05, + "learning_rate": 4.8569388385987205e-05, + "loss": 2.2537, + "step": 81800 + }, + { + "epoch": 11.05, + "learning_rate": 4.8538131777900124e-05, + "loss": 2.2409, + "step": 81810 + }, + { + "epoch": 11.05, + "learning_rate": 4.850688328911229e-05, + "loss": 2.2457, + "step": 81820 + }, + { + "epoch": 11.05, + "learning_rate": 4.847564292212413e-05, + "loss": 2.2707, + "step": 81830 + }, + { + "epoch": 11.05, + "learning_rate": 4.844441067943571e-05, + "loss": 2.2723, + "step": 81840 + }, + { + "epoch": 11.05, + "learning_rate": 4.841318656354626e-05, + "loss": 2.2319, + "step": 81850 + }, + { + "epoch": 11.06, + "learning_rate": 4.838197057695451e-05, + "loss": 2.2326, + "step": 81860 + }, + { + "epoch": 11.06, + "learning_rate": 4.835076272215843e-05, + "loss": 2.2585, + "step": 81870 + }, + { + "epoch": 11.06, + "learning_rate": 4.831956300165533e-05, + "loss": 2.2301, + "step": 81880 + }, + { + "epoch": 11.06, + "learning_rate": 4.828837141794198e-05, + "loss": 2.2566, + "step": 81890 + }, + { + "epoch": 11.06, + "learning_rate": 4.8257187973514394e-05, + "loss": 2.2452, + "step": 81900 + }, + { + "epoch": 11.06, + "learning_rate": 4.8226012670867944e-05, + "loss": 2.2716, + "step": 81910 + }, + { + "epoch": 11.06, + "learning_rate": 4.8194845512497425e-05, + "loss": 2.2636, + "step": 81920 + }, + { + "epoch": 11.07, + "learning_rate": 4.816368650089689e-05, + "loss": 2.2385, + "step": 81930 + }, + { + "epoch": 11.07, + "learning_rate": 4.8132535638559826e-05, + "loss": 2.2484, + "step": 81940 + }, + { + "epoch": 11.07, + "learning_rate": 4.8101392927979005e-05, + "loss": 2.2401, + "step": 81950 + }, + { + "epoch": 11.07, + "learning_rate": 4.807025837164651e-05, + "loss": 2.2419, + "step": 81960 + }, + { + "epoch": 11.07, + "learning_rate": 4.803913197205393e-05, + "loss": 2.2662, + "step": 81970 + }, + { + "epoch": 11.07, + "learning_rate": 4.8008013731692024e-05, + "loss": 2.2746, + "step": 81980 + }, + { + "epoch": 11.07, + "learning_rate": 4.797690365305097e-05, + "loss": 2.2405, + "step": 81990 + }, + { + "epoch": 11.07, + "learning_rate": 4.794580173862034e-05, + "loss": 2.2423, + "step": 82000 + }, + { + "epoch": 11.07, + "eval_loss": 2.424318552017212, + "eval_runtime": 1269.6246, + "eval_samples_per_second": 59.967, + "eval_steps_per_second": 4.998, + "step": 82000 + }, + { + "epoch": 11.08, + "learning_rate": 4.791470799088894e-05, + "loss": 2.245, + "step": 82010 + }, + { + "epoch": 11.08, + "learning_rate": 4.7883622412345136e-05, + "loss": 2.26, + "step": 82020 + }, + { + "epoch": 11.08, + "learning_rate": 4.78525450054763e-05, + "loss": 2.2459, + "step": 82030 + }, + { + "epoch": 11.08, + "learning_rate": 4.782147577276948e-05, + "loss": 2.2524, + "step": 82040 + }, + { + "epoch": 11.08, + "learning_rate": 4.779041471671084e-05, + "loss": 2.2293, + "step": 82050 + }, + { + "epoch": 11.08, + "learning_rate": 4.7759361839786074e-05, + "loss": 2.2178, + "step": 82060 + }, + { + "epoch": 11.08, + "learning_rate": 4.77283171444801e-05, + "loss": 2.2805, + "step": 82070 + }, + { + "epoch": 11.09, + "learning_rate": 4.769728063327715e-05, + "loss": 2.2572, + "step": 82080 + }, + { + "epoch": 11.09, + "learning_rate": 4.7666252308660964e-05, + "loss": 2.2411, + "step": 82090 + }, + { + "epoch": 11.09, + "learning_rate": 4.763523217311448e-05, + "loss": 2.2728, + "step": 82100 + }, + { + "epoch": 11.09, + "learning_rate": 4.760422022911998e-05, + "loss": 2.2542, + "step": 82110 + }, + { + "epoch": 11.09, + "learning_rate": 4.757321647915924e-05, + "loss": 2.265, + "step": 82120 + }, + { + "epoch": 11.09, + "learning_rate": 4.754222092571316e-05, + "loss": 2.2536, + "step": 82130 + }, + { + "epoch": 11.09, + "learning_rate": 4.751123357126221e-05, + "loss": 2.2627, + "step": 82140 + }, + { + "epoch": 11.1, + "learning_rate": 4.7480254418286066e-05, + "loss": 2.2304, + "step": 82150 + }, + { + "epoch": 11.1, + "learning_rate": 4.744928346926371e-05, + "loss": 2.2527, + "step": 82160 + }, + { + "epoch": 11.1, + "learning_rate": 4.7418320726673625e-05, + "loss": 2.2282, + "step": 82170 + }, + { + "epoch": 11.1, + "learning_rate": 4.7387366192993516e-05, + "loss": 2.2566, + "step": 82180 + }, + { + "epoch": 11.1, + "learning_rate": 4.735641987070041e-05, + "loss": 2.264, + "step": 82190 + }, + { + "epoch": 11.1, + "learning_rate": 4.732548176227082e-05, + "loss": 2.2314, + "step": 82200 + }, + { + "epoch": 11.1, + "learning_rate": 4.729455187018043e-05, + "loss": 2.2534, + "step": 82210 + }, + { + "epoch": 11.1, + "learning_rate": 4.7263630196904465e-05, + "loss": 2.2341, + "step": 82220 + }, + { + "epoch": 11.11, + "learning_rate": 4.723580772008744e-05, + "loss": 2.2528, + "step": 82230 + }, + { + "epoch": 11.11, + "learning_rate": 4.72049016693752e-05, + "loss": 2.2426, + "step": 82240 + }, + { + "epoch": 11.11, + "learning_rate": 4.717400384465145e-05, + "loss": 2.2633, + "step": 82250 + }, + { + "epoch": 11.11, + "learning_rate": 4.714311424838869e-05, + "loss": 2.2218, + "step": 82260 + }, + { + "epoch": 11.11, + "learning_rate": 4.7112232883058736e-05, + "loss": 2.2453, + "step": 82270 + }, + { + "epoch": 11.11, + "learning_rate": 4.7081359751132914e-05, + "loss": 2.2553, + "step": 82280 + }, + { + "epoch": 11.11, + "learning_rate": 4.7050494855081735e-05, + "loss": 2.259, + "step": 82290 + }, + { + "epoch": 11.12, + "learning_rate": 4.701963819737507e-05, + "loss": 2.2393, + "step": 82300 + }, + { + "epoch": 11.12, + "learning_rate": 4.698878978048227e-05, + "loss": 2.2465, + "step": 82310 + }, + { + "epoch": 11.12, + "learning_rate": 4.6957949606871785e-05, + "loss": 2.2521, + "step": 82320 + }, + { + "epoch": 11.12, + "learning_rate": 4.692711767901174e-05, + "loss": 2.2667, + "step": 82330 + }, + { + "epoch": 11.12, + "learning_rate": 4.689629399936918e-05, + "loss": 2.247, + "step": 82340 + }, + { + "epoch": 11.12, + "learning_rate": 4.686547857041083e-05, + "loss": 2.2632, + "step": 82350 + }, + { + "epoch": 11.12, + "learning_rate": 4.683467139460266e-05, + "loss": 2.2376, + "step": 82360 + }, + { + "epoch": 11.12, + "learning_rate": 4.6803872474409924e-05, + "loss": 2.2602, + "step": 82370 + }, + { + "epoch": 11.13, + "learning_rate": 4.677308181229725e-05, + "loss": 2.2368, + "step": 82380 + }, + { + "epoch": 11.13, + "learning_rate": 4.674229941072855e-05, + "loss": 2.2497, + "step": 82390 + }, + { + "epoch": 11.13, + "learning_rate": 4.671152527216718e-05, + "loss": 2.2623, + "step": 82400 + }, + { + "epoch": 11.13, + "learning_rate": 4.668075939907589e-05, + "loss": 2.2419, + "step": 82410 + }, + { + "epoch": 11.13, + "learning_rate": 4.665000179391645e-05, + "loss": 2.2412, + "step": 82420 + }, + { + "epoch": 11.13, + "learning_rate": 4.6619252459150355e-05, + "loss": 2.2682, + "step": 82430 + }, + { + "epoch": 11.13, + "learning_rate": 4.658851139723812e-05, + "loss": 2.2694, + "step": 82440 + }, + { + "epoch": 11.14, + "learning_rate": 4.655777861063989e-05, + "loss": 2.2511, + "step": 82450 + }, + { + "epoch": 11.14, + "learning_rate": 4.652705410181491e-05, + "loss": 2.2464, + "step": 82460 + }, + { + "epoch": 11.14, + "learning_rate": 4.649633787322183e-05, + "loss": 2.2587, + "step": 82470 + }, + { + "epoch": 11.14, + "learning_rate": 4.6465629927318745e-05, + "loss": 2.2305, + "step": 82480 + }, + { + "epoch": 11.14, + "learning_rate": 4.6434930266562956e-05, + "loss": 2.2508, + "step": 82490 + }, + { + "epoch": 11.14, + "learning_rate": 4.6404238893411114e-05, + "loss": 2.2462, + "step": 82500 + }, + { + "epoch": 11.14, + "learning_rate": 4.637355581031929e-05, + "loss": 2.2292, + "step": 82510 + }, + { + "epoch": 11.15, + "learning_rate": 4.6342881019742794e-05, + "loss": 2.2241, + "step": 82520 + }, + { + "epoch": 11.15, + "learning_rate": 4.631221452413646e-05, + "loss": 2.2549, + "step": 82530 + }, + { + "epoch": 11.15, + "learning_rate": 4.628155632595409e-05, + "loss": 2.2514, + "step": 82540 + }, + { + "epoch": 11.15, + "learning_rate": 4.625090642764917e-05, + "loss": 2.2664, + "step": 82550 + }, + { + "epoch": 11.15, + "learning_rate": 4.622026483167446e-05, + "loss": 2.265, + "step": 82560 + }, + { + "epoch": 11.15, + "learning_rate": 4.6189631540481926e-05, + "loss": 2.2529, + "step": 82570 + }, + { + "epoch": 11.15, + "learning_rate": 4.615900655652294e-05, + "loss": 2.2489, + "step": 82580 + }, + { + "epoch": 11.15, + "learning_rate": 4.6128389882248186e-05, + "loss": 2.2335, + "step": 82590 + }, + { + "epoch": 11.16, + "learning_rate": 4.609778152010774e-05, + "loss": 2.2803, + "step": 82600 + }, + { + "epoch": 11.16, + "learning_rate": 4.606718147255108e-05, + "loss": 2.2643, + "step": 82610 + }, + { + "epoch": 11.16, + "learning_rate": 4.603658974202672e-05, + "loss": 2.2049, + "step": 82620 + }, + { + "epoch": 11.16, + "learning_rate": 4.600600633098287e-05, + "loss": 2.2738, + "step": 82630 + }, + { + "epoch": 11.16, + "learning_rate": 4.597543124186678e-05, + "loss": 2.2619, + "step": 82640 + }, + { + "epoch": 11.16, + "learning_rate": 4.594486447712529e-05, + "loss": 2.2404, + "step": 82650 + }, + { + "epoch": 11.16, + "learning_rate": 4.591430603920439e-05, + "loss": 2.2526, + "step": 82660 + }, + { + "epoch": 11.17, + "learning_rate": 4.588375593054943e-05, + "loss": 2.2237, + "step": 82670 + }, + { + "epoch": 11.17, + "learning_rate": 4.585321415360519e-05, + "loss": 2.2154, + "step": 82680 + }, + { + "epoch": 11.17, + "learning_rate": 4.582268071081571e-05, + "loss": 2.2483, + "step": 82690 + }, + { + "epoch": 11.17, + "learning_rate": 4.579215560462428e-05, + "loss": 2.2691, + "step": 82700 + }, + { + "epoch": 11.17, + "learning_rate": 4.5761638837473763e-05, + "loss": 2.2285, + "step": 82710 + }, + { + "epoch": 11.17, + "learning_rate": 4.573113041180607e-05, + "loss": 2.2747, + "step": 82720 + }, + { + "epoch": 11.17, + "learning_rate": 4.5700630330062735e-05, + "loss": 2.2642, + "step": 82730 + }, + { + "epoch": 11.17, + "learning_rate": 4.567013859468431e-05, + "loss": 2.2658, + "step": 82740 + }, + { + "epoch": 11.18, + "learning_rate": 4.563965520811089e-05, + "loss": 2.2462, + "step": 82750 + }, + { + "epoch": 11.18, + "learning_rate": 4.560918017278192e-05, + "loss": 2.2521, + "step": 82760 + }, + { + "epoch": 11.18, + "learning_rate": 4.557871349113606e-05, + "loss": 2.2667, + "step": 82770 + }, + { + "epoch": 11.18, + "learning_rate": 4.5548255165611356e-05, + "loss": 2.2677, + "step": 82780 + }, + { + "epoch": 11.18, + "learning_rate": 4.551780519864514e-05, + "loss": 2.2459, + "step": 82790 + }, + { + "epoch": 11.18, + "learning_rate": 4.5487363592674116e-05, + "loss": 2.2578, + "step": 82800 + }, + { + "epoch": 11.18, + "learning_rate": 4.5456930350134476e-05, + "loss": 2.2453, + "step": 82810 + }, + { + "epoch": 11.19, + "learning_rate": 4.542650547346135e-05, + "loss": 2.2658, + "step": 82820 + }, + { + "epoch": 11.19, + "learning_rate": 4.539608896508958e-05, + "loss": 2.2381, + "step": 82830 + }, + { + "epoch": 11.19, + "learning_rate": 4.536568082745313e-05, + "loss": 2.2516, + "step": 82840 + }, + { + "epoch": 11.19, + "learning_rate": 4.5335281062985405e-05, + "loss": 2.242, + "step": 82850 + }, + { + "epoch": 11.19, + "learning_rate": 4.5304889674119056e-05, + "loss": 2.2449, + "step": 82860 + }, + { + "epoch": 11.19, + "learning_rate": 4.5274506663286074e-05, + "loss": 2.2474, + "step": 82870 + }, + { + "epoch": 11.19, + "learning_rate": 4.5244132032917866e-05, + "loss": 2.2831, + "step": 82880 + }, + { + "epoch": 11.2, + "learning_rate": 4.521376578544509e-05, + "loss": 2.2647, + "step": 82890 + }, + { + "epoch": 11.2, + "learning_rate": 4.518340792329768e-05, + "loss": 2.2316, + "step": 82900 + }, + { + "epoch": 11.2, + "learning_rate": 4.515305844890507e-05, + "loss": 2.2553, + "step": 82910 + }, + { + "epoch": 11.2, + "learning_rate": 4.5122717364695816e-05, + "loss": 2.2428, + "step": 82920 + }, + { + "epoch": 11.2, + "learning_rate": 4.5092384673098064e-05, + "loss": 2.2477, + "step": 82930 + }, + { + "epoch": 11.2, + "learning_rate": 4.506206037653894e-05, + "loss": 2.2453, + "step": 82940 + }, + { + "epoch": 11.2, + "learning_rate": 4.503174447744518e-05, + "loss": 2.2453, + "step": 82950 + }, + { + "epoch": 11.2, + "learning_rate": 4.50014369782428e-05, + "loss": 2.2698, + "step": 82960 + }, + { + "epoch": 11.21, + "learning_rate": 4.497113788135706e-05, + "loss": 2.2357, + "step": 82970 + }, + { + "epoch": 11.21, + "learning_rate": 4.4940847189212595e-05, + "loss": 2.2562, + "step": 82980 + }, + { + "epoch": 11.21, + "learning_rate": 4.4910564904233304e-05, + "loss": 2.2335, + "step": 82990 + }, + { + "epoch": 11.21, + "learning_rate": 4.4880291028842576e-05, + "loss": 2.2444, + "step": 83000 + }, + { + "epoch": 11.21, + "eval_loss": 2.4225544929504395, + "eval_runtime": 1269.7097, + "eval_samples_per_second": 59.963, + "eval_steps_per_second": 4.997, + "step": 83000 + }, + { + "epoch": 11.21, + "learning_rate": 4.485002556546297e-05, + "loss": 2.2795, + "step": 83010 + }, + { + "epoch": 11.21, + "learning_rate": 4.4819768516516375e-05, + "loss": 2.2435, + "step": 83020 + }, + { + "epoch": 11.21, + "learning_rate": 4.478951988442415e-05, + "loss": 2.2505, + "step": 83030 + }, + { + "epoch": 11.22, + "learning_rate": 4.47592796716068e-05, + "loss": 2.2499, + "step": 83040 + }, + { + "epoch": 11.22, + "learning_rate": 4.472904788048432e-05, + "loss": 2.2537, + "step": 83050 + }, + { + "epoch": 11.22, + "learning_rate": 4.469882451347593e-05, + "loss": 2.2569, + "step": 83060 + }, + { + "epoch": 11.22, + "learning_rate": 4.4668609573000136e-05, + "loss": 2.2565, + "step": 83070 + }, + { + "epoch": 11.22, + "learning_rate": 4.4638403061474925e-05, + "loss": 2.2531, + "step": 83080 + }, + { + "epoch": 11.22, + "learning_rate": 4.4608204981317467e-05, + "loss": 2.2469, + "step": 83090 + }, + { + "epoch": 11.22, + "learning_rate": 4.4578015334944285e-05, + "loss": 2.2534, + "step": 83100 + }, + { + "epoch": 11.22, + "learning_rate": 4.454783412477132e-05, + "loss": 2.2593, + "step": 83110 + }, + { + "epoch": 11.23, + "learning_rate": 4.4517661353213676e-05, + "loss": 2.2562, + "step": 83120 + }, + { + "epoch": 11.23, + "learning_rate": 4.4487497022686045e-05, + "loss": 2.2485, + "step": 83130 + }, + { + "epoch": 11.23, + "learning_rate": 4.4457341135602044e-05, + "loss": 2.247, + "step": 83140 + }, + { + "epoch": 11.23, + "learning_rate": 4.442719369437494e-05, + "loss": 2.2531, + "step": 83150 + }, + { + "epoch": 11.23, + "learning_rate": 4.4397054701417294e-05, + "loss": 2.2328, + "step": 83160 + }, + { + "epoch": 11.23, + "learning_rate": 4.436692415914086e-05, + "loss": 2.2112, + "step": 83170 + }, + { + "epoch": 11.23, + "learning_rate": 4.43368020699568e-05, + "loss": 2.2432, + "step": 83180 + }, + { + "epoch": 11.24, + "learning_rate": 4.430668843627551e-05, + "loss": 2.2441, + "step": 83190 + }, + { + "epoch": 11.24, + "learning_rate": 4.427658326050688e-05, + "loss": 2.2463, + "step": 83200 + }, + { + "epoch": 11.24, + "learning_rate": 4.424648654505998e-05, + "loss": 2.2686, + "step": 83210 + }, + { + "epoch": 11.24, + "learning_rate": 4.421639829234318e-05, + "loss": 2.2617, + "step": 83220 + }, + { + "epoch": 11.24, + "learning_rate": 4.4186318504764365e-05, + "loss": 2.2578, + "step": 83230 + }, + { + "epoch": 11.24, + "learning_rate": 4.4156247184730484e-05, + "loss": 2.229, + "step": 83240 + }, + { + "epoch": 11.24, + "learning_rate": 4.4126184334648054e-05, + "loss": 2.2447, + "step": 83250 + }, + { + "epoch": 11.25, + "learning_rate": 4.409612995692273e-05, + "loss": 2.2324, + "step": 83260 + }, + { + "epoch": 11.25, + "learning_rate": 4.406608405395954e-05, + "loss": 2.2686, + "step": 83270 + }, + { + "epoch": 11.25, + "learning_rate": 4.403604662816293e-05, + "loss": 2.2553, + "step": 83280 + }, + { + "epoch": 11.25, + "learning_rate": 4.4006017681936534e-05, + "loss": 2.2566, + "step": 83290 + }, + { + "epoch": 11.25, + "learning_rate": 4.3975997217683346e-05, + "loss": 2.2256, + "step": 83300 + }, + { + "epoch": 11.25, + "learning_rate": 4.394598523780576e-05, + "loss": 2.2668, + "step": 83310 + }, + { + "epoch": 11.25, + "learning_rate": 4.391598174470537e-05, + "loss": 2.2709, + "step": 83320 + }, + { + "epoch": 11.25, + "learning_rate": 4.388598674078324e-05, + "loss": 2.25, + "step": 83330 + }, + { + "epoch": 11.26, + "learning_rate": 4.3856000228439536e-05, + "loss": 2.2677, + "step": 83340 + }, + { + "epoch": 11.26, + "learning_rate": 4.382602221007392e-05, + "loss": 2.2464, + "step": 83350 + }, + { + "epoch": 11.26, + "learning_rate": 4.3796052688085406e-05, + "loss": 2.2578, + "step": 83360 + }, + { + "epoch": 11.26, + "learning_rate": 4.3766091664872186e-05, + "loss": 2.2509, + "step": 83370 + }, + { + "epoch": 11.26, + "learning_rate": 4.373613914283184e-05, + "loss": 2.2421, + "step": 83380 + }, + { + "epoch": 11.26, + "learning_rate": 4.370619512436122e-05, + "loss": 2.2545, + "step": 83390 + }, + { + "epoch": 11.26, + "learning_rate": 4.3676259611856636e-05, + "loss": 2.2455, + "step": 83400 + }, + { + "epoch": 11.27, + "learning_rate": 4.364633260771355e-05, + "loss": 2.2507, + "step": 83410 + }, + { + "epoch": 11.27, + "learning_rate": 4.361641411432681e-05, + "loss": 2.2511, + "step": 83420 + }, + { + "epoch": 11.27, + "learning_rate": 4.358650413409065e-05, + "loss": 2.244, + "step": 83430 + }, + { + "epoch": 11.27, + "learning_rate": 4.355660266939849e-05, + "loss": 2.2235, + "step": 83440 + }, + { + "epoch": 11.27, + "learning_rate": 4.3526709722643214e-05, + "loss": 2.2432, + "step": 83450 + }, + { + "epoch": 11.27, + "learning_rate": 4.349682529621691e-05, + "loss": 2.2656, + "step": 83460 + }, + { + "epoch": 11.27, + "learning_rate": 4.3466949392510985e-05, + "loss": 2.2714, + "step": 83470 + }, + { + "epoch": 11.27, + "learning_rate": 4.343708201391628e-05, + "loss": 2.2752, + "step": 83480 + }, + { + "epoch": 11.28, + "learning_rate": 4.340722316282286e-05, + "loss": 2.2648, + "step": 83490 + }, + { + "epoch": 11.28, + "learning_rate": 4.337737284162004e-05, + "loss": 2.2249, + "step": 83500 + }, + { + "epoch": 11.28, + "learning_rate": 4.334753105269667e-05, + "loss": 2.2159, + "step": 83510 + }, + { + "epoch": 11.28, + "learning_rate": 4.331769779844065e-05, + "loss": 2.2438, + "step": 83520 + }, + { + "epoch": 11.28, + "learning_rate": 4.3287873081239495e-05, + "loss": 2.2394, + "step": 83530 + }, + { + "epoch": 11.28, + "learning_rate": 4.3258056903479675e-05, + "loss": 2.2439, + "step": 83540 + }, + { + "epoch": 11.28, + "learning_rate": 4.322824926754729e-05, + "loss": 2.2597, + "step": 83550 + }, + { + "epoch": 11.29, + "learning_rate": 4.319845017582767e-05, + "loss": 2.2512, + "step": 83560 + }, + { + "epoch": 11.29, + "learning_rate": 4.316865963070538e-05, + "loss": 2.2526, + "step": 83570 + }, + { + "epoch": 11.29, + "learning_rate": 4.313887763456439e-05, + "loss": 2.2582, + "step": 83580 + }, + { + "epoch": 11.29, + "learning_rate": 4.310910418978785e-05, + "loss": 2.2597, + "step": 83590 + }, + { + "epoch": 11.29, + "learning_rate": 4.307933929875845e-05, + "loss": 2.2518, + "step": 83600 + }, + { + "epoch": 11.29, + "learning_rate": 4.3049582963858014e-05, + "loss": 2.2475, + "step": 83610 + }, + { + "epoch": 11.29, + "learning_rate": 4.30198351874677e-05, + "loss": 2.2622, + "step": 83620 + }, + { + "epoch": 11.29, + "learning_rate": 4.29900959719681e-05, + "loss": 2.2321, + "step": 83630 + }, + { + "epoch": 11.3, + "learning_rate": 4.296036531973895e-05, + "loss": 2.2449, + "step": 83640 + }, + { + "epoch": 11.3, + "learning_rate": 4.2930643233159516e-05, + "loss": 2.2612, + "step": 83650 + }, + { + "epoch": 11.3, + "learning_rate": 4.290092971460815e-05, + "loss": 2.2513, + "step": 83660 + }, + { + "epoch": 11.3, + "learning_rate": 4.287122476646262e-05, + "loss": 2.2369, + "step": 83670 + }, + { + "epoch": 11.3, + "learning_rate": 4.284152839110007e-05, + "loss": 2.2449, + "step": 83680 + }, + { + "epoch": 11.3, + "learning_rate": 4.281184059089689e-05, + "loss": 2.2765, + "step": 83690 + }, + { + "epoch": 11.3, + "learning_rate": 4.278216136822871e-05, + "loss": 2.2751, + "step": 83700 + }, + { + "epoch": 11.31, + "learning_rate": 4.2752490725470655e-05, + "loss": 2.2421, + "step": 83710 + }, + { + "epoch": 11.31, + "learning_rate": 4.272282866499699e-05, + "loss": 2.2443, + "step": 83720 + }, + { + "epoch": 11.31, + "learning_rate": 4.269317518918147e-05, + "loss": 2.248, + "step": 83730 + }, + { + "epoch": 11.31, + "learning_rate": 4.2663530300396926e-05, + "loss": 2.2586, + "step": 83740 + }, + { + "epoch": 11.31, + "learning_rate": 4.263389400101568e-05, + "loss": 2.2591, + "step": 83750 + }, + { + "epoch": 11.31, + "learning_rate": 4.26042662934094e-05, + "loss": 2.2521, + "step": 83760 + }, + { + "epoch": 11.31, + "learning_rate": 4.257464717994891e-05, + "loss": 2.2299, + "step": 83770 + }, + { + "epoch": 11.32, + "learning_rate": 4.2545036663004453e-05, + "loss": 2.2469, + "step": 83780 + }, + { + "epoch": 11.32, + "learning_rate": 4.251543474494552e-05, + "loss": 2.253, + "step": 83790 + }, + { + "epoch": 11.32, + "learning_rate": 4.248584142814101e-05, + "loss": 2.2619, + "step": 83800 + }, + { + "epoch": 11.32, + "learning_rate": 4.2456256714959045e-05, + "loss": 2.2554, + "step": 83810 + }, + { + "epoch": 11.32, + "learning_rate": 4.242668060776704e-05, + "loss": 2.2346, + "step": 83820 + }, + { + "epoch": 11.32, + "learning_rate": 4.239711310893184e-05, + "loss": 2.2427, + "step": 83830 + }, + { + "epoch": 11.32, + "learning_rate": 4.2367554220819475e-05, + "loss": 2.2736, + "step": 83840 + }, + { + "epoch": 11.32, + "learning_rate": 4.2338003945795405e-05, + "loss": 2.2432, + "step": 83850 + }, + { + "epoch": 11.33, + "learning_rate": 4.2308462286224304e-05, + "loss": 2.2663, + "step": 83860 + }, + { + "epoch": 11.33, + "learning_rate": 4.2278929244470146e-05, + "loss": 2.2277, + "step": 83870 + }, + { + "epoch": 11.33, + "learning_rate": 4.224940482289634e-05, + "loss": 2.2424, + "step": 83880 + }, + { + "epoch": 11.33, + "learning_rate": 4.221988902386546e-05, + "loss": 2.2595, + "step": 83890 + }, + { + "epoch": 11.33, + "learning_rate": 4.219038184973944e-05, + "loss": 2.2832, + "step": 83900 + }, + { + "epoch": 11.33, + "learning_rate": 4.216088330287962e-05, + "loss": 2.2704, + "step": 83910 + }, + { + "epoch": 11.33, + "learning_rate": 4.2131393385646466e-05, + "loss": 2.2289, + "step": 83920 + }, + { + "epoch": 11.34, + "learning_rate": 4.2101912100400005e-05, + "loss": 2.2214, + "step": 83930 + }, + { + "epoch": 11.34, + "learning_rate": 4.207243944949923e-05, + "loss": 2.2472, + "step": 83940 + }, + { + "epoch": 11.34, + "learning_rate": 4.204297543530272e-05, + "loss": 2.2427, + "step": 83950 + }, + { + "epoch": 11.34, + "learning_rate": 4.201352006016833e-05, + "loss": 2.2418, + "step": 83960 + }, + { + "epoch": 11.34, + "learning_rate": 4.1984073326453155e-05, + "loss": 2.2555, + "step": 83970 + }, + { + "epoch": 11.34, + "learning_rate": 4.195463523651357e-05, + "loss": 2.2512, + "step": 83980 + }, + { + "epoch": 11.34, + "learning_rate": 4.192520579270529e-05, + "loss": 2.2549, + "step": 83990 + }, + { + "epoch": 11.34, + "learning_rate": 4.1895784997383405e-05, + "loss": 2.2831, + "step": 84000 + }, + { + "epoch": 11.34, + "eval_loss": 2.420787811279297, + "eval_runtime": 1269.4908, + "eval_samples_per_second": 59.973, + "eval_steps_per_second": 4.998, + "step": 84000 + }, + { + "epoch": 11.35, + "learning_rate": 4.1866372852902255e-05, + "loss": 2.2513, + "step": 84010 + }, + { + "epoch": 11.35, + "learning_rate": 4.183696936161545e-05, + "loss": 2.2547, + "step": 84020 + }, + { + "epoch": 11.35, + "learning_rate": 4.1807574525876006e-05, + "loss": 2.2535, + "step": 84030 + }, + { + "epoch": 11.35, + "learning_rate": 4.177818834803613e-05, + "loss": 2.2693, + "step": 84040 + }, + { + "epoch": 11.35, + "learning_rate": 4.174881083044746e-05, + "loss": 2.2306, + "step": 84050 + }, + { + "epoch": 11.35, + "learning_rate": 4.171944197546087e-05, + "loss": 2.2402, + "step": 84060 + }, + { + "epoch": 11.35, + "learning_rate": 4.169008178542646e-05, + "loss": 2.2731, + "step": 84070 + }, + { + "epoch": 11.36, + "learning_rate": 4.166073026269385e-05, + "loss": 2.2621, + "step": 84080 + }, + { + "epoch": 11.36, + "learning_rate": 4.163138740961179e-05, + "loss": 2.2426, + "step": 84090 + }, + { + "epoch": 11.36, + "learning_rate": 4.160205322852831e-05, + "loss": 2.2714, + "step": 84100 + }, + { + "epoch": 11.36, + "learning_rate": 4.157272772179097e-05, + "loss": 2.2529, + "step": 84110 + }, + { + "epoch": 11.36, + "learning_rate": 4.1543410891746346e-05, + "loss": 2.2735, + "step": 84120 + }, + { + "epoch": 11.36, + "learning_rate": 4.151410274074063e-05, + "loss": 2.2529, + "step": 84130 + }, + { + "epoch": 11.36, + "learning_rate": 4.148480327111897e-05, + "loss": 2.2617, + "step": 84140 + }, + { + "epoch": 11.37, + "learning_rate": 4.1455512485226086e-05, + "loss": 2.2798, + "step": 84150 + }, + { + "epoch": 11.37, + "learning_rate": 4.1426230385405956e-05, + "loss": 2.2402, + "step": 84160 + }, + { + "epoch": 11.37, + "learning_rate": 4.13969569740018e-05, + "loss": 2.2565, + "step": 84170 + }, + { + "epoch": 11.37, + "learning_rate": 4.136769225335615e-05, + "loss": 2.2643, + "step": 84180 + }, + { + "epoch": 11.37, + "learning_rate": 4.1338436225810834e-05, + "loss": 2.2671, + "step": 84190 + }, + { + "epoch": 11.37, + "learning_rate": 4.130918889370709e-05, + "loss": 2.2604, + "step": 84200 + }, + { + "epoch": 11.37, + "learning_rate": 4.1279950259385335e-05, + "loss": 2.2724, + "step": 84210 + }, + { + "epoch": 11.37, + "learning_rate": 4.12507203251853e-05, + "loss": 2.273, + "step": 84220 + }, + { + "epoch": 11.38, + "learning_rate": 4.122149909344615e-05, + "loss": 2.2556, + "step": 84230 + }, + { + "epoch": 11.38, + "learning_rate": 4.119228656650616e-05, + "loss": 2.2618, + "step": 84240 + }, + { + "epoch": 11.38, + "learning_rate": 4.1163082746703104e-05, + "loss": 2.2444, + "step": 84250 + }, + { + "epoch": 11.38, + "learning_rate": 4.1133887636373916e-05, + "loss": 2.2378, + "step": 84260 + }, + { + "epoch": 11.38, + "learning_rate": 4.110470123785482e-05, + "loss": 2.2552, + "step": 84270 + }, + { + "epoch": 11.38, + "learning_rate": 4.1075523553481536e-05, + "loss": 2.2419, + "step": 84280 + }, + { + "epoch": 11.38, + "learning_rate": 4.104635458558887e-05, + "loss": 2.2753, + "step": 84290 + }, + { + "epoch": 11.39, + "learning_rate": 4.101719433651099e-05, + "loss": 2.2632, + "step": 84300 + }, + { + "epoch": 11.39, + "learning_rate": 4.0988042808581476e-05, + "loss": 2.2506, + "step": 84310 + }, + { + "epoch": 11.39, + "learning_rate": 4.095890000413304e-05, + "loss": 2.2524, + "step": 84320 + }, + { + "epoch": 11.39, + "learning_rate": 4.0929765925497905e-05, + "loss": 2.2707, + "step": 84330 + }, + { + "epoch": 11.39, + "learning_rate": 4.0900640575007295e-05, + "loss": 2.2508, + "step": 84340 + }, + { + "epoch": 11.39, + "learning_rate": 4.087152395499206e-05, + "loss": 2.2335, + "step": 84350 + }, + { + "epoch": 11.39, + "learning_rate": 4.08424160677821e-05, + "loss": 2.2286, + "step": 84360 + }, + { + "epoch": 11.39, + "learning_rate": 4.081331691570681e-05, + "loss": 2.2349, + "step": 84370 + }, + { + "epoch": 11.4, + "learning_rate": 4.078713514930374e-05, + "loss": 2.2661, + "step": 84380 + }, + { + "epoch": 11.4, + "learning_rate": 4.0758052600398977e-05, + "loss": 2.2446, + "step": 84390 + }, + { + "epoch": 11.4, + "learning_rate": 4.072897879337989e-05, + "loss": 2.274, + "step": 84400 + }, + { + "epoch": 11.4, + "learning_rate": 4.0699913730572993e-05, + "loss": 2.2652, + "step": 84410 + }, + { + "epoch": 11.4, + "learning_rate": 4.067085741430424e-05, + "loss": 2.2663, + "step": 84420 + }, + { + "epoch": 11.4, + "learning_rate": 4.064180984689875e-05, + "loss": 2.2684, + "step": 84430 + }, + { + "epoch": 11.4, + "learning_rate": 4.0612771030681084e-05, + "loss": 2.2479, + "step": 84440 + }, + { + "epoch": 11.41, + "learning_rate": 4.058374096797495e-05, + "loss": 2.2714, + "step": 84450 + }, + { + "epoch": 11.41, + "learning_rate": 4.055471966110341e-05, + "loss": 2.2633, + "step": 84460 + }, + { + "epoch": 11.41, + "learning_rate": 4.0525707112388936e-05, + "loss": 2.2269, + "step": 84470 + }, + { + "epoch": 11.41, + "learning_rate": 4.049670332415312e-05, + "loss": 2.2486, + "step": 84480 + }, + { + "epoch": 11.41, + "learning_rate": 4.046770829871698e-05, + "loss": 2.2664, + "step": 84490 + }, + { + "epoch": 11.41, + "learning_rate": 4.043872203840071e-05, + "loss": 2.2472, + "step": 84500 + }, + { + "epoch": 11.41, + "learning_rate": 4.0409744545523934e-05, + "loss": 2.2534, + "step": 84510 + }, + { + "epoch": 11.42, + "learning_rate": 4.038077582240563e-05, + "loss": 2.2595, + "step": 84520 + }, + { + "epoch": 11.42, + "learning_rate": 4.0351815871363775e-05, + "loss": 2.2801, + "step": 84530 + }, + { + "epoch": 11.42, + "learning_rate": 4.032286469471598e-05, + "loss": 2.2843, + "step": 84540 + }, + { + "epoch": 11.42, + "learning_rate": 4.029392229477889e-05, + "loss": 2.2524, + "step": 84550 + }, + { + "epoch": 11.42, + "learning_rate": 4.026498867386869e-05, + "loss": 2.25, + "step": 84560 + }, + { + "epoch": 11.42, + "learning_rate": 4.02360638343007e-05, + "loss": 2.2444, + "step": 84570 + }, + { + "epoch": 11.42, + "learning_rate": 4.0207147778389484e-05, + "loss": 2.2624, + "step": 84580 + }, + { + "epoch": 11.42, + "learning_rate": 4.017824050844914e-05, + "loss": 2.2451, + "step": 84590 + }, + { + "epoch": 11.43, + "learning_rate": 4.014934202679285e-05, + "loss": 2.2231, + "step": 84600 + }, + { + "epoch": 11.43, + "learning_rate": 4.012045233573311e-05, + "loss": 2.2637, + "step": 84610 + }, + { + "epoch": 11.43, + "learning_rate": 4.009157143758187e-05, + "loss": 2.2492, + "step": 84620 + }, + { + "epoch": 11.43, + "learning_rate": 4.0062699334650163e-05, + "loss": 2.2424, + "step": 84630 + }, + { + "epoch": 11.43, + "learning_rate": 4.003383602924855e-05, + "loss": 2.254, + "step": 84640 + }, + { + "epoch": 11.43, + "learning_rate": 4.000498152368667e-05, + "loss": 2.2343, + "step": 84650 + }, + { + "epoch": 11.43, + "learning_rate": 3.9976135820273544e-05, + "loss": 2.2545, + "step": 84660 + }, + { + "epoch": 11.44, + "learning_rate": 3.994729892131757e-05, + "loss": 2.2581, + "step": 84670 + }, + { + "epoch": 11.44, + "learning_rate": 3.991847082912634e-05, + "loss": 2.2587, + "step": 84680 + }, + { + "epoch": 11.44, + "learning_rate": 3.988965154600674e-05, + "loss": 2.242, + "step": 84690 + }, + { + "epoch": 11.44, + "learning_rate": 3.986084107426496e-05, + "loss": 2.2592, + "step": 84700 + }, + { + "epoch": 11.44, + "learning_rate": 3.983203941620653e-05, + "loss": 2.2715, + "step": 84710 + }, + { + "epoch": 11.44, + "learning_rate": 3.980324657413637e-05, + "loss": 2.2668, + "step": 84720 + }, + { + "epoch": 11.44, + "learning_rate": 3.9774462550358356e-05, + "loss": 2.2558, + "step": 84730 + }, + { + "epoch": 11.44, + "learning_rate": 3.974568734717604e-05, + "loss": 2.2395, + "step": 84740 + }, + { + "epoch": 11.45, + "learning_rate": 3.971692096689199e-05, + "loss": 2.2669, + "step": 84750 + }, + { + "epoch": 11.45, + "learning_rate": 3.968816341180831e-05, + "loss": 2.2434, + "step": 84760 + }, + { + "epoch": 11.45, + "learning_rate": 3.9659414684226195e-05, + "loss": 2.2447, + "step": 84770 + }, + { + "epoch": 11.45, + "learning_rate": 3.963067478644617e-05, + "loss": 2.2484, + "step": 84780 + }, + { + "epoch": 11.45, + "learning_rate": 3.960194372076817e-05, + "loss": 2.2804, + "step": 84790 + }, + { + "epoch": 11.45, + "learning_rate": 3.9573221489491335e-05, + "loss": 2.2325, + "step": 84800 + }, + { + "epoch": 11.45, + "learning_rate": 3.954450809491404e-05, + "loss": 2.263, + "step": 84810 + }, + { + "epoch": 11.46, + "learning_rate": 3.9515803539334114e-05, + "loss": 2.2624, + "step": 84820 + }, + { + "epoch": 11.46, + "learning_rate": 3.9487107825048494e-05, + "loss": 2.2328, + "step": 84830 + }, + { + "epoch": 11.46, + "learning_rate": 3.945842095435359e-05, + "loss": 2.2699, + "step": 84840 + }, + { + "epoch": 11.46, + "learning_rate": 3.9429742929544976e-05, + "loss": 2.2685, + "step": 84850 + }, + { + "epoch": 11.46, + "learning_rate": 3.9401073752917535e-05, + "loss": 2.2674, + "step": 84860 + }, + { + "epoch": 11.46, + "learning_rate": 3.937241342676551e-05, + "loss": 2.2804, + "step": 84870 + }, + { + "epoch": 11.46, + "learning_rate": 3.9343761953382366e-05, + "loss": 2.2374, + "step": 84880 + }, + { + "epoch": 11.47, + "learning_rate": 3.9315119335060915e-05, + "loss": 2.2469, + "step": 84890 + }, + { + "epoch": 11.47, + "learning_rate": 3.928648557409314e-05, + "loss": 2.2643, + "step": 84900 + }, + { + "epoch": 11.47, + "learning_rate": 3.925786067277045e-05, + "loss": 2.2521, + "step": 84910 + }, + { + "epoch": 11.47, + "learning_rate": 3.922924463338363e-05, + "loss": 2.2354, + "step": 84920 + }, + { + "epoch": 11.47, + "learning_rate": 3.9200637458222415e-05, + "loss": 2.2813, + "step": 84930 + }, + { + "epoch": 11.47, + "learning_rate": 3.917203914957619e-05, + "loss": 2.2346, + "step": 84940 + }, + { + "epoch": 11.47, + "learning_rate": 3.914344970973339e-05, + "loss": 2.249, + "step": 84950 + }, + { + "epoch": 11.47, + "learning_rate": 3.9114869140981914e-05, + "loss": 2.2502, + "step": 84960 + }, + { + "epoch": 11.48, + "learning_rate": 3.9086297445608825e-05, + "loss": 2.2471, + "step": 84970 + }, + { + "epoch": 11.48, + "learning_rate": 3.9057734625900485e-05, + "loss": 2.2483, + "step": 84980 + }, + { + "epoch": 11.48, + "learning_rate": 3.902918068414265e-05, + "loss": 2.275, + "step": 84990 + }, + { + "epoch": 11.48, + "learning_rate": 3.9000635622620285e-05, + "loss": 2.2273, + "step": 85000 + }, + { + "epoch": 11.48, + "eval_loss": 2.41872239112854, + "eval_runtime": 1269.1084, + "eval_samples_per_second": 59.991, + "eval_steps_per_second": 5.0, + "step": 85000 + }, + { + "epoch": 11.48, + "learning_rate": 3.8972099443617585e-05, + "loss": 2.2679, + "step": 85010 + }, + { + "epoch": 11.48, + "learning_rate": 3.89435721494182e-05, + "loss": 2.2703, + "step": 85020 + }, + { + "epoch": 11.48, + "learning_rate": 3.89150537423049e-05, + "loss": 2.2657, + "step": 85030 + }, + { + "epoch": 11.49, + "learning_rate": 3.888654422455989e-05, + "loss": 2.2604, + "step": 85040 + }, + { + "epoch": 11.49, + "learning_rate": 3.885804359846455e-05, + "loss": 2.2762, + "step": 85050 + }, + { + "epoch": 11.49, + "learning_rate": 3.882955186629955e-05, + "loss": 2.2538, + "step": 85060 + }, + { + "epoch": 11.49, + "learning_rate": 3.880106903034497e-05, + "loss": 2.2577, + "step": 85070 + }, + { + "epoch": 11.49, + "learning_rate": 3.877259509288006e-05, + "loss": 2.2308, + "step": 85080 + }, + { + "epoch": 11.49, + "learning_rate": 3.874413005618339e-05, + "loss": 2.2528, + "step": 85090 + }, + { + "epoch": 11.49, + "learning_rate": 3.8715673922532774e-05, + "loss": 2.2535, + "step": 85100 + }, + { + "epoch": 11.49, + "learning_rate": 3.868722669420542e-05, + "loss": 2.2477, + "step": 85110 + }, + { + "epoch": 11.5, + "learning_rate": 3.865878837347786e-05, + "loss": 2.2688, + "step": 85120 + }, + { + "epoch": 11.5, + "learning_rate": 3.8630358962625596e-05, + "loss": 2.2746, + "step": 85130 + }, + { + "epoch": 11.5, + "learning_rate": 3.860193846392382e-05, + "loss": 2.2542, + "step": 85140 + }, + { + "epoch": 11.5, + "learning_rate": 3.857352687964671e-05, + "loss": 2.2324, + "step": 85150 + }, + { + "epoch": 11.5, + "learning_rate": 3.854512421206796e-05, + "loss": 2.2465, + "step": 85160 + }, + { + "epoch": 11.5, + "learning_rate": 3.851673046346039e-05, + "loss": 2.2471, + "step": 85170 + }, + { + "epoch": 11.5, + "learning_rate": 3.848834563609612e-05, + "loss": 2.2383, + "step": 85180 + }, + { + "epoch": 11.51, + "learning_rate": 3.845996973224667e-05, + "loss": 2.2257, + "step": 85190 + }, + { + "epoch": 11.51, + "learning_rate": 3.843160275418273e-05, + "loss": 2.2336, + "step": 85200 + }, + { + "epoch": 11.51, + "learning_rate": 3.840324470417427e-05, + "loss": 2.2658, + "step": 85210 + }, + { + "epoch": 11.51, + "learning_rate": 3.837489558449069e-05, + "loss": 2.2721, + "step": 85220 + }, + { + "epoch": 11.51, + "learning_rate": 3.834655539740046e-05, + "loss": 2.2607, + "step": 85230 + }, + { + "epoch": 11.51, + "learning_rate": 3.8318224145171616e-05, + "loss": 2.2669, + "step": 85240 + }, + { + "epoch": 11.51, + "learning_rate": 3.828990183007112e-05, + "loss": 2.2332, + "step": 85250 + }, + { + "epoch": 11.52, + "learning_rate": 3.82615884543655e-05, + "loss": 2.2397, + "step": 85260 + }, + { + "epoch": 11.52, + "learning_rate": 3.8233284020320546e-05, + "loss": 2.2456, + "step": 85270 + }, + { + "epoch": 11.52, + "learning_rate": 3.820498853020119e-05, + "loss": 2.2724, + "step": 85280 + }, + { + "epoch": 11.52, + "learning_rate": 3.817670198627176e-05, + "loss": 2.2354, + "step": 85290 + }, + { + "epoch": 11.52, + "learning_rate": 3.814842439079577e-05, + "loss": 2.2894, + "step": 85300 + }, + { + "epoch": 11.52, + "learning_rate": 3.812015574603613e-05, + "loss": 2.2673, + "step": 85310 + }, + { + "epoch": 11.52, + "learning_rate": 3.8091896054255085e-05, + "loss": 2.2554, + "step": 85320 + }, + { + "epoch": 11.52, + "learning_rate": 3.806364531771387e-05, + "loss": 2.243, + "step": 85330 + }, + { + "epoch": 11.53, + "learning_rate": 3.803540353867335e-05, + "loss": 2.2548, + "step": 85340 + }, + { + "epoch": 11.53, + "learning_rate": 3.800717071939341e-05, + "loss": 2.2415, + "step": 85350 + }, + { + "epoch": 11.53, + "learning_rate": 3.7978946862133446e-05, + "loss": 2.2653, + "step": 85360 + }, + { + "epoch": 11.53, + "learning_rate": 3.795073196915195e-05, + "loss": 2.2381, + "step": 85370 + }, + { + "epoch": 11.53, + "learning_rate": 3.792252604270674e-05, + "loss": 2.2625, + "step": 85380 + }, + { + "epoch": 11.53, + "learning_rate": 3.789432908505502e-05, + "loss": 2.2628, + "step": 85390 + }, + { + "epoch": 11.53, + "learning_rate": 3.7866141098453166e-05, + "loss": 2.2617, + "step": 85400 + }, + { + "epoch": 11.54, + "learning_rate": 3.783796208515683e-05, + "loss": 2.252, + "step": 85410 + }, + { + "epoch": 11.54, + "learning_rate": 3.7809792047421055e-05, + "loss": 2.2558, + "step": 85420 + }, + { + "epoch": 11.54, + "learning_rate": 3.7781630987500026e-05, + "loss": 2.2576, + "step": 85430 + }, + { + "epoch": 11.54, + "learning_rate": 3.7753478907647394e-05, + "loss": 2.2186, + "step": 85440 + }, + { + "epoch": 11.54, + "learning_rate": 3.772533581011582e-05, + "loss": 2.2627, + "step": 85450 + }, + { + "epoch": 11.54, + "learning_rate": 3.769720169715748e-05, + "loss": 2.253, + "step": 85460 + }, + { + "epoch": 11.54, + "learning_rate": 3.7669076571023795e-05, + "loss": 2.268, + "step": 85470 + }, + { + "epoch": 11.54, + "learning_rate": 3.764096043396539e-05, + "loss": 2.2438, + "step": 85480 + }, + { + "epoch": 11.55, + "learning_rate": 3.761285328823222e-05, + "loss": 2.2613, + "step": 85490 + }, + { + "epoch": 11.55, + "learning_rate": 3.7584755136073434e-05, + "loss": 2.2621, + "step": 85500 + }, + { + "epoch": 11.55, + "learning_rate": 3.75566659797376e-05, + "loss": 2.255, + "step": 85510 + }, + { + "epoch": 11.55, + "learning_rate": 3.752858582147258e-05, + "loss": 2.2866, + "step": 85520 + }, + { + "epoch": 11.55, + "learning_rate": 3.7500514663525275e-05, + "loss": 2.2806, + "step": 85530 + }, + { + "epoch": 11.55, + "learning_rate": 3.747245250814214e-05, + "loss": 2.2515, + "step": 85540 + }, + { + "epoch": 11.55, + "learning_rate": 3.7444399357568725e-05, + "loss": 2.2399, + "step": 85550 + }, + { + "epoch": 11.56, + "learning_rate": 3.741635521405e-05, + "loss": 2.2913, + "step": 85560 + }, + { + "epoch": 11.56, + "learning_rate": 3.7388320079830135e-05, + "loss": 2.2761, + "step": 85570 + }, + { + "epoch": 11.56, + "learning_rate": 3.736029395715251e-05, + "loss": 2.2556, + "step": 85580 + }, + { + "epoch": 11.56, + "learning_rate": 3.7332276848259975e-05, + "loss": 2.241, + "step": 85590 + }, + { + "epoch": 11.56, + "learning_rate": 3.73042687553945e-05, + "loss": 2.2683, + "step": 85600 + }, + { + "epoch": 11.56, + "learning_rate": 3.727626968079732e-05, + "loss": 2.2806, + "step": 85610 + }, + { + "epoch": 11.56, + "learning_rate": 3.724827962670913e-05, + "loss": 2.2419, + "step": 85620 + }, + { + "epoch": 11.57, + "learning_rate": 3.7220298595369655e-05, + "loss": 2.2304, + "step": 85630 + }, + { + "epoch": 11.57, + "learning_rate": 3.719232658901819e-05, + "loss": 2.2557, + "step": 85640 + }, + { + "epoch": 11.57, + "learning_rate": 3.716436360989295e-05, + "loss": 2.2472, + "step": 85650 + }, + { + "epoch": 11.57, + "learning_rate": 3.7136409660231734e-05, + "loss": 2.2533, + "step": 85660 + }, + { + "epoch": 11.57, + "learning_rate": 3.710846474227151e-05, + "loss": 2.2458, + "step": 85670 + }, + { + "epoch": 11.57, + "learning_rate": 3.7080528858248485e-05, + "loss": 2.2836, + "step": 85680 + }, + { + "epoch": 11.57, + "learning_rate": 3.705260201039821e-05, + "loss": 2.2657, + "step": 85690 + }, + { + "epoch": 11.57, + "learning_rate": 3.70246842009554e-05, + "loss": 2.243, + "step": 85700 + }, + { + "epoch": 11.58, + "learning_rate": 3.699677543215418e-05, + "loss": 2.2652, + "step": 85710 + }, + { + "epoch": 11.58, + "learning_rate": 3.696887570622799e-05, + "loss": 2.266, + "step": 85720 + }, + { + "epoch": 11.58, + "learning_rate": 3.6940985025409284e-05, + "loss": 2.2536, + "step": 85730 + }, + { + "epoch": 11.58, + "learning_rate": 3.6913103391930065e-05, + "loss": 2.2461, + "step": 85740 + }, + { + "epoch": 11.58, + "learning_rate": 3.688523080802146e-05, + "loss": 2.2437, + "step": 85750 + }, + { + "epoch": 11.58, + "learning_rate": 3.6857367275913976e-05, + "loss": 2.2458, + "step": 85760 + }, + { + "epoch": 11.58, + "learning_rate": 3.6829512797837324e-05, + "loss": 2.2815, + "step": 85770 + }, + { + "epoch": 11.59, + "learning_rate": 3.680166737602046e-05, + "loss": 2.2649, + "step": 85780 + }, + { + "epoch": 11.59, + "learning_rate": 3.6773831012691727e-05, + "loss": 2.2384, + "step": 85790 + }, + { + "epoch": 11.59, + "learning_rate": 3.674600371007866e-05, + "loss": 2.2623, + "step": 85800 + }, + { + "epoch": 11.59, + "learning_rate": 3.671818547040804e-05, + "loss": 2.2425, + "step": 85810 + }, + { + "epoch": 11.59, + "learning_rate": 3.669037629590602e-05, + "loss": 2.2642, + "step": 85820 + }, + { + "epoch": 11.59, + "learning_rate": 3.6662576188797957e-05, + "loss": 2.2341, + "step": 85830 + }, + { + "epoch": 11.59, + "learning_rate": 3.663478515130859e-05, + "loss": 2.2547, + "step": 85840 + }, + { + "epoch": 11.59, + "learning_rate": 3.660700318566168e-05, + "loss": 2.2511, + "step": 85850 + }, + { + "epoch": 11.6, + "learning_rate": 3.6579230294080494e-05, + "loss": 2.237, + "step": 85860 + }, + { + "epoch": 11.6, + "learning_rate": 3.655146647878761e-05, + "loss": 2.2527, + "step": 85870 + }, + { + "epoch": 11.6, + "learning_rate": 3.652371174200468e-05, + "loss": 2.2485, + "step": 85880 + }, + { + "epoch": 11.6, + "learning_rate": 3.649596608595273e-05, + "loss": 2.2424, + "step": 85890 + }, + { + "epoch": 11.6, + "learning_rate": 3.646822951285202e-05, + "loss": 2.2348, + "step": 85900 + }, + { + "epoch": 11.6, + "learning_rate": 3.6440502024922166e-05, + "loss": 2.2715, + "step": 85910 + }, + { + "epoch": 11.6, + "learning_rate": 3.6412783624382095e-05, + "loss": 2.2591, + "step": 85920 + }, + { + "epoch": 11.61, + "learning_rate": 3.638507431344976e-05, + "loss": 2.2422, + "step": 85930 + }, + { + "epoch": 11.61, + "learning_rate": 3.635737409434266e-05, + "loss": 2.2611, + "step": 85940 + }, + { + "epoch": 11.61, + "learning_rate": 3.6329682969277355e-05, + "loss": 2.269, + "step": 85950 + }, + { + "epoch": 11.61, + "learning_rate": 3.6302000940469876e-05, + "loss": 2.2405, + "step": 85960 + }, + { + "epoch": 11.61, + "learning_rate": 3.627432801013541e-05, + "loss": 2.2216, + "step": 85970 + }, + { + "epoch": 11.61, + "learning_rate": 3.624666418048835e-05, + "loss": 2.2494, + "step": 85980 + }, + { + "epoch": 11.61, + "learning_rate": 3.621900945374254e-05, + "loss": 2.2437, + "step": 85990 + }, + { + "epoch": 11.62, + "learning_rate": 3.619136383211097e-05, + "loss": 2.24, + "step": 86000 + }, + { + "epoch": 11.62, + "eval_loss": 2.4159932136535645, + "eval_runtime": 1266.6001, + "eval_samples_per_second": 60.11, + "eval_steps_per_second": 5.009, + "step": 86000 + }, + { + "epoch": 11.62, + "learning_rate": 3.616372731780586e-05, + "loss": 2.2454, + "step": 86010 + }, + { + "epoch": 11.62, + "learning_rate": 3.6136099913038896e-05, + "loss": 2.2753, + "step": 86020 + }, + { + "epoch": 11.62, + "learning_rate": 3.610848162002078e-05, + "loss": 2.2292, + "step": 86030 + }, + { + "epoch": 11.62, + "learning_rate": 3.608087244096178e-05, + "loss": 2.2566, + "step": 86040 + }, + { + "epoch": 11.62, + "learning_rate": 3.605327237807109e-05, + "loss": 2.2602, + "step": 86050 + }, + { + "epoch": 11.62, + "learning_rate": 3.6025681433557423e-05, + "loss": 2.2812, + "step": 86060 + }, + { + "epoch": 11.62, + "learning_rate": 3.599809960962874e-05, + "loss": 2.2561, + "step": 86070 + }, + { + "epoch": 11.63, + "learning_rate": 3.59705269084922e-05, + "loss": 2.2744, + "step": 86080 + }, + { + "epoch": 11.63, + "learning_rate": 3.594296333235425e-05, + "loss": 2.2212, + "step": 86090 + }, + { + "epoch": 11.63, + "learning_rate": 3.591540888342056e-05, + "loss": 2.2672, + "step": 86100 + }, + { + "epoch": 11.63, + "learning_rate": 3.588786356389623e-05, + "loss": 2.2501, + "step": 86110 + }, + { + "epoch": 11.63, + "learning_rate": 3.5860327375985464e-05, + "loss": 2.2923, + "step": 86120 + }, + { + "epoch": 11.63, + "learning_rate": 3.583280032189175e-05, + "loss": 2.2696, + "step": 86130 + }, + { + "epoch": 11.63, + "learning_rate": 3.5805282403817996e-05, + "loss": 2.2665, + "step": 86140 + }, + { + "epoch": 11.64, + "learning_rate": 3.577777362396617e-05, + "loss": 2.2346, + "step": 86150 + }, + { + "epoch": 11.64, + "learning_rate": 3.5750273984537696e-05, + "loss": 2.2338, + "step": 86160 + }, + { + "epoch": 11.64, + "learning_rate": 3.572278348773316e-05, + "loss": 2.2661, + "step": 86170 + }, + { + "epoch": 11.64, + "learning_rate": 3.569530213575236e-05, + "loss": 2.24, + "step": 86180 + }, + { + "epoch": 11.64, + "learning_rate": 3.566782993079455e-05, + "loss": 2.2366, + "step": 86190 + }, + { + "epoch": 11.64, + "learning_rate": 3.56403668750581e-05, + "loss": 2.2734, + "step": 86200 + }, + { + "epoch": 11.64, + "learning_rate": 3.561291297074066e-05, + "loss": 2.2457, + "step": 86210 + }, + { + "epoch": 11.64, + "learning_rate": 3.558546822003923e-05, + "loss": 2.2572, + "step": 86220 + }, + { + "epoch": 11.65, + "learning_rate": 3.5558032625149944e-05, + "loss": 2.2711, + "step": 86230 + }, + { + "epoch": 11.65, + "learning_rate": 3.5530606188268446e-05, + "loss": 2.2589, + "step": 86240 + }, + { + "epoch": 11.65, + "learning_rate": 3.5503188911589295e-05, + "loss": 2.2484, + "step": 86250 + }, + { + "epoch": 11.65, + "learning_rate": 3.547578079730658e-05, + "loss": 2.2606, + "step": 86260 + }, + { + "epoch": 11.65, + "learning_rate": 3.544838184761365e-05, + "loss": 2.2673, + "step": 86270 + }, + { + "epoch": 11.65, + "learning_rate": 3.5420992064703014e-05, + "loss": 2.2539, + "step": 86280 + }, + { + "epoch": 11.65, + "learning_rate": 3.539361145076647e-05, + "loss": 2.2982, + "step": 86290 + }, + { + "epoch": 11.66, + "learning_rate": 3.5366240007995066e-05, + "loss": 2.2708, + "step": 86300 + }, + { + "epoch": 11.66, + "learning_rate": 3.533887773857926e-05, + "loss": 2.2685, + "step": 86310 + }, + { + "epoch": 11.66, + "learning_rate": 3.531152464470857e-05, + "loss": 2.2644, + "step": 86320 + }, + { + "epoch": 11.66, + "learning_rate": 3.5284180728571895e-05, + "loss": 2.2739, + "step": 86330 + }, + { + "epoch": 11.66, + "learning_rate": 3.525684599235742e-05, + "loss": 2.2678, + "step": 86340 + }, + { + "epoch": 11.66, + "learning_rate": 3.52295204382525e-05, + "loss": 2.2461, + "step": 86350 + }, + { + "epoch": 11.66, + "learning_rate": 3.520220406844389e-05, + "loss": 2.2525, + "step": 86360 + }, + { + "epoch": 11.67, + "learning_rate": 3.517489688511749e-05, + "loss": 2.2821, + "step": 86370 + }, + { + "epoch": 11.67, + "learning_rate": 3.5147598890458446e-05, + "loss": 2.2609, + "step": 86380 + }, + { + "epoch": 11.67, + "learning_rate": 3.5120310086651346e-05, + "loss": 2.2762, + "step": 86390 + }, + { + "epoch": 11.67, + "learning_rate": 3.509303047587987e-05, + "loss": 2.2621, + "step": 86400 + }, + { + "epoch": 11.67, + "learning_rate": 3.506576006032698e-05, + "loss": 2.2454, + "step": 86410 + }, + { + "epoch": 11.67, + "learning_rate": 3.5038498842175004e-05, + "loss": 2.2658, + "step": 86420 + }, + { + "epoch": 11.67, + "learning_rate": 3.501124682360542e-05, + "loss": 2.262, + "step": 86430 + }, + { + "epoch": 11.67, + "learning_rate": 3.498400400679911e-05, + "loss": 2.2762, + "step": 86440 + }, + { + "epoch": 11.68, + "learning_rate": 3.4956770393936e-05, + "loss": 2.2713, + "step": 86450 + }, + { + "epoch": 11.68, + "learning_rate": 3.492954598719546e-05, + "loss": 2.2415, + "step": 86460 + }, + { + "epoch": 11.68, + "learning_rate": 3.490233078875615e-05, + "loss": 2.2533, + "step": 86470 + }, + { + "epoch": 11.68, + "learning_rate": 3.487512480079583e-05, + "loss": 2.2633, + "step": 86480 + }, + { + "epoch": 11.68, + "learning_rate": 3.484792802549166e-05, + "loss": 2.243, + "step": 86490 + }, + { + "epoch": 11.68, + "learning_rate": 3.482074046501992e-05, + "loss": 2.2435, + "step": 86500 + }, + { + "epoch": 11.68, + "learning_rate": 3.4793562121556385e-05, + "loss": 2.2606, + "step": 86510 + }, + { + "epoch": 11.69, + "learning_rate": 3.476639299727584e-05, + "loss": 2.2324, + "step": 86520 + }, + { + "epoch": 11.69, + "learning_rate": 3.473923309435246e-05, + "loss": 2.261, + "step": 86530 + }, + { + "epoch": 11.69, + "learning_rate": 3.471208241495974e-05, + "loss": 2.2802, + "step": 86540 + }, + { + "epoch": 11.69, + "learning_rate": 3.4684940961270255e-05, + "loss": 2.2521, + "step": 86550 + }, + { + "epoch": 11.69, + "learning_rate": 3.465780873545604e-05, + "loss": 2.2448, + "step": 86560 + }, + { + "epoch": 11.69, + "learning_rate": 3.463068573968828e-05, + "loss": 2.2636, + "step": 86570 + }, + { + "epoch": 11.69, + "learning_rate": 3.460357197613737e-05, + "loss": 2.2837, + "step": 86580 + }, + { + "epoch": 11.69, + "learning_rate": 3.4576467446973174e-05, + "loss": 2.2476, + "step": 86590 + }, + { + "epoch": 11.7, + "learning_rate": 3.4549372154364573e-05, + "loss": 2.248, + "step": 86600 + }, + { + "epoch": 11.7, + "learning_rate": 3.452228610047983e-05, + "loss": 2.2325, + "step": 86610 + }, + { + "epoch": 11.7, + "learning_rate": 3.4495209287486505e-05, + "loss": 2.2653, + "step": 86620 + }, + { + "epoch": 11.7, + "learning_rate": 3.446814171755131e-05, + "loss": 2.2671, + "step": 86630 + }, + { + "epoch": 11.7, + "learning_rate": 3.444108339284041e-05, + "loss": 2.2438, + "step": 86640 + }, + { + "epoch": 11.7, + "learning_rate": 3.441403431551889e-05, + "loss": 2.243, + "step": 86650 + }, + { + "epoch": 11.7, + "learning_rate": 3.438699448775141e-05, + "loss": 2.2426, + "step": 86660 + }, + { + "epoch": 11.71, + "learning_rate": 3.435996391170184e-05, + "loss": 2.2596, + "step": 86670 + }, + { + "epoch": 11.71, + "learning_rate": 3.433564430526381e-05, + "loss": 2.2849, + "step": 86680 + }, + { + "epoch": 11.71, + "learning_rate": 3.430863131343676e-05, + "loss": 2.2715, + "step": 86690 + }, + { + "epoch": 11.71, + "learning_rate": 3.4281627579598484e-05, + "loss": 2.2612, + "step": 86700 + }, + { + "epoch": 11.71, + "learning_rate": 3.425463310590985e-05, + "loss": 2.2472, + "step": 86710 + }, + { + "epoch": 11.71, + "learning_rate": 3.4227647894531e-05, + "loss": 2.2421, + "step": 86720 + }, + { + "epoch": 11.71, + "learning_rate": 3.420067194762147e-05, + "loss": 2.2762, + "step": 86730 + }, + { + "epoch": 11.71, + "learning_rate": 3.417370526733986e-05, + "loss": 2.275, + "step": 86740 + }, + { + "epoch": 11.72, + "learning_rate": 3.414674785584422e-05, + "loss": 2.25, + "step": 86750 + }, + { + "epoch": 11.72, + "learning_rate": 3.4119799715291753e-05, + "loss": 2.2738, + "step": 86760 + }, + { + "epoch": 11.72, + "learning_rate": 3.4092860847838846e-05, + "loss": 2.2784, + "step": 86770 + }, + { + "epoch": 11.72, + "learning_rate": 3.406593125564134e-05, + "loss": 2.2555, + "step": 86780 + }, + { + "epoch": 11.72, + "learning_rate": 3.403901094085418e-05, + "loss": 2.2682, + "step": 86790 + }, + { + "epoch": 11.72, + "learning_rate": 3.401209990563158e-05, + "loss": 2.2311, + "step": 86800 + }, + { + "epoch": 11.72, + "learning_rate": 3.3985198152127126e-05, + "loss": 2.2455, + "step": 86810 + }, + { + "epoch": 11.73, + "learning_rate": 3.3958305682493494e-05, + "loss": 2.2813, + "step": 86820 + }, + { + "epoch": 11.73, + "learning_rate": 3.393142249888283e-05, + "loss": 2.2639, + "step": 86830 + }, + { + "epoch": 11.73, + "learning_rate": 3.390454860344624e-05, + "loss": 2.2514, + "step": 86840 + }, + { + "epoch": 11.73, + "learning_rate": 3.387768399833438e-05, + "loss": 2.2602, + "step": 86850 + }, + { + "epoch": 11.73, + "learning_rate": 3.385082868569698e-05, + "loss": 2.242, + "step": 86860 + }, + { + "epoch": 11.73, + "learning_rate": 3.3823982667683164e-05, + "loss": 2.2474, + "step": 86870 + }, + { + "epoch": 11.73, + "learning_rate": 3.3797145946441166e-05, + "loss": 2.2528, + "step": 86880 + }, + { + "epoch": 11.74, + "learning_rate": 3.3770318524118536e-05, + "loss": 2.2699, + "step": 86890 + }, + { + "epoch": 11.74, + "learning_rate": 3.3743500402862144e-05, + "loss": 2.2558, + "step": 86900 + }, + { + "epoch": 11.74, + "learning_rate": 3.371669158481805e-05, + "loss": 2.2727, + "step": 86910 + }, + { + "epoch": 11.74, + "learning_rate": 3.3689892072131526e-05, + "loss": 2.2383, + "step": 86920 + }, + { + "epoch": 11.74, + "learning_rate": 3.3663101866947216e-05, + "loss": 2.253, + "step": 86930 + }, + { + "epoch": 11.74, + "learning_rate": 3.363632097140892e-05, + "loss": 2.2679, + "step": 86940 + }, + { + "epoch": 11.74, + "learning_rate": 3.360954938765975e-05, + "loss": 2.2306, + "step": 86950 + }, + { + "epoch": 11.74, + "learning_rate": 3.358278711784207e-05, + "loss": 2.2673, + "step": 86960 + }, + { + "epoch": 11.75, + "learning_rate": 3.355603416409741e-05, + "loss": 2.2441, + "step": 86970 + }, + { + "epoch": 11.75, + "learning_rate": 3.3529290528566716e-05, + "loss": 2.2482, + "step": 86980 + }, + { + "epoch": 11.75, + "learning_rate": 3.350255621339005e-05, + "loss": 2.228, + "step": 86990 + }, + { + "epoch": 11.75, + "learning_rate": 3.347583122070679e-05, + "loss": 2.2574, + "step": 87000 + }, + { + "epoch": 11.75, + "eval_loss": 2.4128317832946777, + "eval_runtime": 1268.9991, + "eval_samples_per_second": 59.996, + "eval_steps_per_second": 5.0, + "step": 87000 + }, + { + "epoch": 11.75, + "learning_rate": 3.3449115552655495e-05, + "loss": 2.2396, + "step": 87010 + }, + { + "epoch": 11.75, + "learning_rate": 3.3422409211374096e-05, + "loss": 2.2359, + "step": 87020 + }, + { + "epoch": 11.75, + "learning_rate": 3.339571219899979e-05, + "loss": 2.2363, + "step": 87030 + }, + { + "epoch": 11.76, + "learning_rate": 3.336902451766879e-05, + "loss": 2.2553, + "step": 87040 + }, + { + "epoch": 11.76, + "learning_rate": 3.334234616951685e-05, + "loss": 2.284, + "step": 87050 + }, + { + "epoch": 11.76, + "learning_rate": 3.33156771566788e-05, + "loss": 2.2833, + "step": 87060 + }, + { + "epoch": 11.76, + "learning_rate": 3.328901748128882e-05, + "loss": 2.2608, + "step": 87070 + }, + { + "epoch": 11.76, + "learning_rate": 3.326236714548029e-05, + "loss": 2.2577, + "step": 87080 + }, + { + "epoch": 11.76, + "learning_rate": 3.323572615138582e-05, + "loss": 2.2444, + "step": 87090 + }, + { + "epoch": 11.76, + "learning_rate": 3.3209094501137364e-05, + "loss": 2.2534, + "step": 87100 + }, + { + "epoch": 11.76, + "learning_rate": 3.318247219686603e-05, + "loss": 2.2393, + "step": 87110 + }, + { + "epoch": 11.77, + "learning_rate": 3.31558592407022e-05, + "loss": 2.2499, + "step": 87120 + }, + { + "epoch": 11.77, + "learning_rate": 3.312925563477557e-05, + "loss": 2.259, + "step": 87130 + }, + { + "epoch": 11.77, + "learning_rate": 3.310266138121501e-05, + "loss": 2.2143, + "step": 87140 + }, + { + "epoch": 11.77, + "learning_rate": 3.307607648214874e-05, + "loss": 2.2542, + "step": 87150 + }, + { + "epoch": 11.77, + "learning_rate": 3.30495009397041e-05, + "loss": 2.255, + "step": 87160 + }, + { + "epoch": 11.77, + "learning_rate": 3.302293475600775e-05, + "loss": 2.2248, + "step": 87170 + }, + { + "epoch": 11.77, + "learning_rate": 3.299637793318565e-05, + "loss": 2.2314, + "step": 87180 + }, + { + "epoch": 11.78, + "learning_rate": 3.296983047336295e-05, + "loss": 2.2742, + "step": 87190 + }, + { + "epoch": 11.78, + "learning_rate": 3.294329237866403e-05, + "loss": 2.2717, + "step": 87200 + }, + { + "epoch": 11.78, + "learning_rate": 3.2916763651212525e-05, + "loss": 2.2772, + "step": 87210 + }, + { + "epoch": 11.78, + "learning_rate": 3.28902442931314e-05, + "loss": 2.2465, + "step": 87220 + }, + { + "epoch": 11.78, + "learning_rate": 3.2863734306542865e-05, + "loss": 2.2283, + "step": 87230 + }, + { + "epoch": 11.78, + "learning_rate": 3.2837233693568204e-05, + "loss": 2.2491, + "step": 87240 + }, + { + "epoch": 11.78, + "learning_rate": 3.28107424563282e-05, + "loss": 2.2533, + "step": 87250 + }, + { + "epoch": 11.79, + "learning_rate": 3.278426059694267e-05, + "loss": 2.2307, + "step": 87260 + }, + { + "epoch": 11.79, + "learning_rate": 3.275778811753085e-05, + "loss": 2.243, + "step": 87270 + }, + { + "epoch": 11.79, + "learning_rate": 3.273132502021114e-05, + "loss": 2.274, + "step": 87280 + }, + { + "epoch": 11.79, + "learning_rate": 3.270487130710114e-05, + "loss": 2.26, + "step": 87290 + }, + { + "epoch": 11.79, + "learning_rate": 3.267842698031784e-05, + "loss": 2.2644, + "step": 87300 + }, + { + "epoch": 11.79, + "learning_rate": 3.265199204197736e-05, + "loss": 2.2322, + "step": 87310 + }, + { + "epoch": 11.79, + "learning_rate": 3.262556649419505e-05, + "loss": 2.2847, + "step": 87320 + }, + { + "epoch": 11.79, + "learning_rate": 3.2599150339085694e-05, + "loss": 2.232, + "step": 87330 + }, + { + "epoch": 11.8, + "learning_rate": 3.257274357876308e-05, + "loss": 2.2685, + "step": 87340 + }, + { + "epoch": 11.8, + "learning_rate": 3.2546346215340444e-05, + "loss": 2.2581, + "step": 87350 + }, + { + "epoch": 11.8, + "learning_rate": 3.251995825093014e-05, + "loss": 2.2493, + "step": 87360 + }, + { + "epoch": 11.8, + "learning_rate": 3.249357968764381e-05, + "loss": 2.2833, + "step": 87370 + }, + { + "epoch": 11.8, + "learning_rate": 3.246721052759239e-05, + "loss": 2.2727, + "step": 87380 + }, + { + "epoch": 11.8, + "learning_rate": 3.244085077288599e-05, + "loss": 2.2455, + "step": 87390 + }, + { + "epoch": 11.8, + "learning_rate": 3.2414500425634035e-05, + "loss": 2.2481, + "step": 87400 + }, + { + "epoch": 11.81, + "learning_rate": 3.238815948794508e-05, + "loss": 2.2802, + "step": 87410 + }, + { + "epoch": 11.81, + "learning_rate": 3.236182796192707e-05, + "loss": 2.2547, + "step": 87420 + }, + { + "epoch": 11.81, + "learning_rate": 3.233550584968722e-05, + "loss": 2.2344, + "step": 87430 + }, + { + "epoch": 11.81, + "learning_rate": 3.230919315333174e-05, + "loss": 2.275, + "step": 87440 + }, + { + "epoch": 11.81, + "learning_rate": 3.2282889874966385e-05, + "loss": 2.2507, + "step": 87450 + }, + { + "epoch": 11.81, + "learning_rate": 3.225659601669595e-05, + "loss": 2.2678, + "step": 87460 + }, + { + "epoch": 11.81, + "learning_rate": 3.22303115806246e-05, + "loss": 2.2468, + "step": 87470 + }, + { + "epoch": 11.81, + "learning_rate": 3.220403656885568e-05, + "loss": 2.2318, + "step": 87480 + }, + { + "epoch": 11.82, + "learning_rate": 3.217777098349178e-05, + "loss": 2.2436, + "step": 87490 + }, + { + "epoch": 11.82, + "learning_rate": 3.21515148266348e-05, + "loss": 2.2264, + "step": 87500 + }, + { + "epoch": 11.82, + "learning_rate": 3.212526810038583e-05, + "loss": 2.259, + "step": 87510 + }, + { + "epoch": 11.82, + "learning_rate": 3.209903080684516e-05, + "loss": 2.2481, + "step": 87520 + }, + { + "epoch": 11.82, + "learning_rate": 3.207280294811246e-05, + "loss": 2.2694, + "step": 87530 + }, + { + "epoch": 11.82, + "learning_rate": 3.204658452628648e-05, + "loss": 2.2597, + "step": 87540 + }, + { + "epoch": 11.82, + "learning_rate": 3.202037554346539e-05, + "loss": 2.2742, + "step": 87550 + }, + { + "epoch": 11.83, + "learning_rate": 3.1994176001746494e-05, + "loss": 2.2713, + "step": 87560 + }, + { + "epoch": 11.83, + "learning_rate": 3.1967985903226294e-05, + "loss": 2.2684, + "step": 87570 + }, + { + "epoch": 11.83, + "learning_rate": 3.19418052500007e-05, + "loss": 2.2764, + "step": 87580 + }, + { + "epoch": 11.83, + "learning_rate": 3.1915634044164714e-05, + "loss": 2.2284, + "step": 87590 + }, + { + "epoch": 11.83, + "learning_rate": 3.1889472287812665e-05, + "loss": 2.2504, + "step": 87600 + }, + { + "epoch": 11.83, + "learning_rate": 3.1863319983038035e-05, + "loss": 2.2722, + "step": 87610 + }, + { + "epoch": 11.83, + "learning_rate": 3.183717713193365e-05, + "loss": 2.2573, + "step": 87620 + }, + { + "epoch": 11.84, + "learning_rate": 3.1811043736591664e-05, + "loss": 2.2577, + "step": 87630 + }, + { + "epoch": 11.84, + "learning_rate": 3.1784919799103154e-05, + "loss": 2.2515, + "step": 87640 + }, + { + "epoch": 11.84, + "learning_rate": 3.175880532155877e-05, + "loss": 2.2441, + "step": 87650 + }, + { + "epoch": 11.84, + "learning_rate": 3.17327003060482e-05, + "loss": 2.2554, + "step": 87660 + }, + { + "epoch": 11.84, + "learning_rate": 3.1706604754660525e-05, + "loss": 2.2694, + "step": 87670 + }, + { + "epoch": 11.84, + "learning_rate": 3.168051866948395e-05, + "loss": 2.275, + "step": 87680 + }, + { + "epoch": 11.84, + "learning_rate": 3.165444205260594e-05, + "loss": 2.2448, + "step": 87690 + }, + { + "epoch": 11.84, + "learning_rate": 3.1628374906113305e-05, + "loss": 2.2709, + "step": 87700 + }, + { + "epoch": 11.85, + "learning_rate": 3.1602317232091954e-05, + "loss": 2.2691, + "step": 87710 + }, + { + "epoch": 11.85, + "learning_rate": 3.15762690326271e-05, + "loss": 2.2457, + "step": 87720 + }, + { + "epoch": 11.85, + "learning_rate": 3.1550230309803266e-05, + "loss": 2.2549, + "step": 87730 + }, + { + "epoch": 11.85, + "learning_rate": 3.152420106570408e-05, + "loss": 2.265, + "step": 87740 + }, + { + "epoch": 11.85, + "learning_rate": 3.149818130241256e-05, + "loss": 2.2417, + "step": 87750 + }, + { + "epoch": 11.85, + "learning_rate": 3.147217102201084e-05, + "loss": 2.26, + "step": 87760 + }, + { + "epoch": 11.85, + "learning_rate": 3.144617022658032e-05, + "loss": 2.277, + "step": 87770 + }, + { + "epoch": 11.86, + "learning_rate": 3.142017891820172e-05, + "loss": 2.2828, + "step": 87780 + }, + { + "epoch": 11.86, + "learning_rate": 3.139419709895495e-05, + "loss": 2.2539, + "step": 87790 + }, + { + "epoch": 11.86, + "learning_rate": 3.1368224770919124e-05, + "loss": 2.2667, + "step": 87800 + }, + { + "epoch": 11.86, + "learning_rate": 3.1342261936172594e-05, + "loss": 2.2719, + "step": 87810 + }, + { + "epoch": 11.86, + "learning_rate": 3.131630859679304e-05, + "loss": 2.2575, + "step": 87820 + }, + { + "epoch": 11.86, + "learning_rate": 3.12903647548574e-05, + "loss": 2.2725, + "step": 87830 + }, + { + "epoch": 11.86, + "learning_rate": 3.1264430412441633e-05, + "loss": 2.2761, + "step": 87840 + }, + { + "epoch": 11.86, + "learning_rate": 3.123850557162119e-05, + "loss": 2.2616, + "step": 87850 + }, + { + "epoch": 11.87, + "learning_rate": 3.1212590234470584e-05, + "loss": 2.2614, + "step": 87860 + }, + { + "epoch": 11.87, + "learning_rate": 3.118668440306374e-05, + "loss": 2.2485, + "step": 87870 + }, + { + "epoch": 11.87, + "learning_rate": 3.116078807947366e-05, + "loss": 2.2588, + "step": 87880 + }, + { + "epoch": 11.87, + "learning_rate": 3.113490126577262e-05, + "loss": 2.2815, + "step": 87890 + }, + { + "epoch": 11.87, + "learning_rate": 3.110902396403224e-05, + "loss": 2.2565, + "step": 87900 + }, + { + "epoch": 11.87, + "learning_rate": 3.1083156176323264e-05, + "loss": 2.2716, + "step": 87910 + }, + { + "epoch": 11.87, + "learning_rate": 3.10572979047157e-05, + "loss": 2.2572, + "step": 87920 + }, + { + "epoch": 11.88, + "learning_rate": 3.1031449151278854e-05, + "loss": 2.2618, + "step": 87930 + }, + { + "epoch": 11.88, + "learning_rate": 3.1005609918081154e-05, + "loss": 2.2723, + "step": 87940 + }, + { + "epoch": 11.88, + "learning_rate": 3.097978020719043e-05, + "loss": 2.2762, + "step": 87950 + }, + { + "epoch": 11.88, + "learning_rate": 3.0953960020673593e-05, + "loss": 2.2567, + "step": 87960 + }, + { + "epoch": 11.88, + "learning_rate": 3.0928149360596834e-05, + "loss": 2.2481, + "step": 87970 + }, + { + "epoch": 11.88, + "learning_rate": 3.090234822902568e-05, + "loss": 2.2415, + "step": 87980 + }, + { + "epoch": 11.88, + "learning_rate": 3.0876556628024754e-05, + "loss": 2.2489, + "step": 87990 + }, + { + "epoch": 11.89, + "learning_rate": 3.0850774559658075e-05, + "loss": 2.269, + "step": 88000 + }, + { + "epoch": 11.89, + "eval_loss": 2.410313606262207, + "eval_runtime": 1269.0076, + "eval_samples_per_second": 59.996, + "eval_steps_per_second": 5.0, + "step": 88000 + }, + { + "epoch": 11.89, + "learning_rate": 3.082500202598866e-05, + "loss": 2.2366, + "step": 88010 + }, + { + "epoch": 11.89, + "learning_rate": 3.0799239029078975e-05, + "loss": 2.2552, + "step": 88020 + }, + { + "epoch": 11.89, + "learning_rate": 3.077348557099072e-05, + "loss": 2.2643, + "step": 88030 + }, + { + "epoch": 11.89, + "learning_rate": 3.074774165378471e-05, + "loss": 2.2255, + "step": 88040 + }, + { + "epoch": 11.89, + "learning_rate": 3.072200727952106e-05, + "loss": 2.26, + "step": 88050 + }, + { + "epoch": 11.89, + "learning_rate": 3.069628245025907e-05, + "loss": 2.252, + "step": 88060 + }, + { + "epoch": 11.89, + "learning_rate": 3.067056716805742e-05, + "loss": 2.2495, + "step": 88070 + }, + { + "epoch": 11.9, + "learning_rate": 3.064486143497386e-05, + "loss": 2.2467, + "step": 88080 + }, + { + "epoch": 11.9, + "learning_rate": 3.061916525306543e-05, + "loss": 2.241, + "step": 88090 + }, + { + "epoch": 11.9, + "learning_rate": 3.0593478624388485e-05, + "loss": 2.2292, + "step": 88100 + }, + { + "epoch": 11.9, + "learning_rate": 3.056780155099848e-05, + "loss": 2.2353, + "step": 88110 + }, + { + "epoch": 11.9, + "learning_rate": 3.054213403495023e-05, + "loss": 2.2547, + "step": 88120 + }, + { + "epoch": 11.9, + "learning_rate": 3.0516476078297743e-05, + "loss": 2.257, + "step": 88130 + }, + { + "epoch": 11.9, + "learning_rate": 3.049082768309415e-05, + "loss": 2.2603, + "step": 88140 + }, + { + "epoch": 11.91, + "learning_rate": 3.046518885139203e-05, + "loss": 2.2199, + "step": 88150 + }, + { + "epoch": 11.91, + "learning_rate": 3.0439559585243046e-05, + "loss": 2.2634, + "step": 88160 + }, + { + "epoch": 11.91, + "learning_rate": 3.0413939886698084e-05, + "loss": 2.2445, + "step": 88170 + }, + { + "epoch": 11.91, + "learning_rate": 3.038832975780738e-05, + "loss": 2.271, + "step": 88180 + }, + { + "epoch": 11.91, + "learning_rate": 3.0362729200620285e-05, + "loss": 2.2581, + "step": 88190 + }, + { + "epoch": 11.91, + "learning_rate": 3.033713821718553e-05, + "loss": 2.2765, + "step": 88200 + }, + { + "epoch": 11.91, + "learning_rate": 3.0311556809550847e-05, + "loss": 2.2611, + "step": 88210 + }, + { + "epoch": 11.91, + "learning_rate": 3.028598497976341e-05, + "loss": 2.2454, + "step": 88220 + }, + { + "epoch": 11.92, + "learning_rate": 3.0260422729869587e-05, + "loss": 2.2627, + "step": 88230 + }, + { + "epoch": 11.92, + "learning_rate": 3.023487006191494e-05, + "loss": 2.2672, + "step": 88240 + }, + { + "epoch": 11.92, + "learning_rate": 3.0209326977944247e-05, + "loss": 2.272, + "step": 88250 + }, + { + "epoch": 11.92, + "learning_rate": 3.0183793480001528e-05, + "loss": 2.2612, + "step": 88260 + }, + { + "epoch": 11.92, + "learning_rate": 3.0158269570130112e-05, + "loss": 2.2085, + "step": 88270 + }, + { + "epoch": 11.92, + "learning_rate": 3.013275525037248e-05, + "loss": 2.2667, + "step": 88280 + }, + { + "epoch": 11.92, + "learning_rate": 3.010725052277032e-05, + "loss": 2.2384, + "step": 88290 + }, + { + "epoch": 11.93, + "learning_rate": 3.0081755389364682e-05, + "loss": 2.2616, + "step": 88300 + }, + { + "epoch": 11.93, + "learning_rate": 3.005626985219568e-05, + "loss": 2.2669, + "step": 88310 + }, + { + "epoch": 11.93, + "learning_rate": 3.0030793913302838e-05, + "loss": 2.2448, + "step": 88320 + }, + { + "epoch": 11.93, + "learning_rate": 3.0005327574724787e-05, + "loss": 2.2678, + "step": 88330 + }, + { + "epoch": 11.93, + "learning_rate": 2.9979870838499363e-05, + "loss": 2.2565, + "step": 88340 + }, + { + "epoch": 11.93, + "learning_rate": 2.9954423706663794e-05, + "loss": 2.256, + "step": 88350 + }, + { + "epoch": 11.93, + "learning_rate": 2.9928986181254394e-05, + "loss": 2.2335, + "step": 88360 + }, + { + "epoch": 11.94, + "learning_rate": 2.9903558264306705e-05, + "loss": 2.2402, + "step": 88370 + }, + { + "epoch": 11.94, + "learning_rate": 2.9878139957855646e-05, + "loss": 2.2611, + "step": 88380 + }, + { + "epoch": 11.94, + "learning_rate": 2.9852731263935176e-05, + "loss": 2.2709, + "step": 88390 + }, + { + "epoch": 11.94, + "learning_rate": 2.98273321845787e-05, + "loss": 2.2896, + "step": 88400 + }, + { + "epoch": 11.94, + "learning_rate": 2.980194272181858e-05, + "loss": 2.2541, + "step": 88410 + }, + { + "epoch": 11.94, + "learning_rate": 2.9776562877686634e-05, + "loss": 2.2442, + "step": 88420 + }, + { + "epoch": 11.94, + "learning_rate": 2.975119265421389e-05, + "loss": 2.2512, + "step": 88430 + }, + { + "epoch": 11.94, + "learning_rate": 2.9725832053430493e-05, + "loss": 2.2721, + "step": 88440 + }, + { + "epoch": 11.95, + "learning_rate": 2.97004810773659e-05, + "loss": 2.2637, + "step": 88450 + }, + { + "epoch": 11.95, + "learning_rate": 2.9675139728048708e-05, + "loss": 2.2517, + "step": 88460 + }, + { + "epoch": 11.95, + "learning_rate": 2.9649808007506925e-05, + "loss": 2.2501, + "step": 88470 + }, + { + "epoch": 11.95, + "learning_rate": 2.9624485917767615e-05, + "loss": 2.2345, + "step": 88480 + }, + { + "epoch": 11.95, + "learning_rate": 2.959917346085709e-05, + "loss": 2.2742, + "step": 88490 + }, + { + "epoch": 11.95, + "learning_rate": 2.9573870638801027e-05, + "loss": 2.2481, + "step": 88500 + }, + { + "epoch": 11.95, + "learning_rate": 2.9548577453624145e-05, + "loss": 2.2561, + "step": 88510 + }, + { + "epoch": 11.96, + "learning_rate": 2.952329390735057e-05, + "loss": 2.2489, + "step": 88520 + }, + { + "epoch": 11.96, + "learning_rate": 2.9498020002003537e-05, + "loss": 2.2667, + "step": 88530 + }, + { + "epoch": 11.96, + "learning_rate": 2.947275573960549e-05, + "loss": 2.2675, + "step": 88540 + }, + { + "epoch": 11.96, + "learning_rate": 2.9447501122178253e-05, + "loss": 2.2633, + "step": 88550 + }, + { + "epoch": 11.96, + "learning_rate": 2.9422256151742718e-05, + "loss": 2.2637, + "step": 88560 + }, + { + "epoch": 11.96, + "learning_rate": 2.939702083031904e-05, + "loss": 2.2385, + "step": 88570 + }, + { + "epoch": 11.96, + "learning_rate": 2.9371795159926704e-05, + "loss": 2.2439, + "step": 88580 + }, + { + "epoch": 11.96, + "learning_rate": 2.9346579142584275e-05, + "loss": 2.2778, + "step": 88590 + }, + { + "epoch": 11.97, + "learning_rate": 2.9321372780309743e-05, + "loss": 2.2562, + "step": 88600 + }, + { + "epoch": 11.97, + "learning_rate": 2.9296176075120025e-05, + "loss": 2.2638, + "step": 88610 + }, + { + "epoch": 11.97, + "learning_rate": 2.9270989029031527e-05, + "loss": 2.2845, + "step": 88620 + }, + { + "epoch": 11.97, + "learning_rate": 2.9245811644059835e-05, + "loss": 2.2529, + "step": 88630 + }, + { + "epoch": 11.97, + "learning_rate": 2.9220643922219688e-05, + "loss": 2.2809, + "step": 88640 + }, + { + "epoch": 11.97, + "learning_rate": 2.9195485865525094e-05, + "loss": 2.2461, + "step": 88650 + }, + { + "epoch": 11.97, + "learning_rate": 2.9170337475989218e-05, + "loss": 2.2421, + "step": 88660 + }, + { + "epoch": 11.98, + "learning_rate": 2.914519875562461e-05, + "loss": 2.281, + "step": 88670 + }, + { + "epoch": 11.98, + "learning_rate": 2.9120069706442904e-05, + "loss": 2.2486, + "step": 88680 + }, + { + "epoch": 11.98, + "learning_rate": 2.909495033045498e-05, + "loss": 2.2641, + "step": 88690 + }, + { + "epoch": 11.98, + "learning_rate": 2.9069840629671025e-05, + "loss": 2.2388, + "step": 88700 + }, + { + "epoch": 11.98, + "learning_rate": 2.904474060610032e-05, + "loss": 2.2881, + "step": 88710 + }, + { + "epoch": 11.98, + "learning_rate": 2.9019650261751554e-05, + "loss": 2.2415, + "step": 88720 + }, + { + "epoch": 11.98, + "learning_rate": 2.8994569598632472e-05, + "loss": 2.2381, + "step": 88730 + }, + { + "epoch": 11.99, + "learning_rate": 2.896949861875007e-05, + "loss": 2.2533, + "step": 88740 + }, + { + "epoch": 11.99, + "learning_rate": 2.8944437324110708e-05, + "loss": 2.2655, + "step": 88750 + }, + { + "epoch": 11.99, + "learning_rate": 2.89193857167198e-05, + "loss": 2.2376, + "step": 88760 + }, + { + "epoch": 11.99, + "learning_rate": 2.8894343798582025e-05, + "loss": 2.2569, + "step": 88770 + }, + { + "epoch": 11.99, + "learning_rate": 2.8869311571701413e-05, + "loss": 2.2375, + "step": 88780 + }, + { + "epoch": 11.99, + "learning_rate": 2.8844289038081013e-05, + "loss": 2.254, + "step": 88790 + }, + { + "epoch": 11.99, + "learning_rate": 2.8819276199723357e-05, + "loss": 2.2432, + "step": 88800 + }, + { + "epoch": 11.99, + "learning_rate": 2.8794273058629858e-05, + "loss": 2.2417, + "step": 88810 + }, + { + "epoch": 12.0, + "learning_rate": 2.8769279616801466e-05, + "loss": 2.2788, + "step": 88820 + }, + { + "epoch": 12.0, + "learning_rate": 2.874429587623825e-05, + "loss": 2.2439, + "step": 88830 + }, + { + "epoch": 12.0, + "learning_rate": 2.871932183893944e-05, + "loss": 2.2454, + "step": 88840 + }, + { + "epoch": 12.0, + "learning_rate": 2.869435750690356e-05, + "loss": 2.2812, + "step": 88850 + }, + { + "epoch": 12.0, + "learning_rate": 2.8669402882128273e-05, + "loss": 2.2306, + "step": 88860 + }, + { + "epoch": 12.0, + "learning_rate": 2.8644457966610634e-05, + "loss": 2.2076, + "step": 88870 + }, + { + "epoch": 12.0, + "learning_rate": 2.861952276234675e-05, + "loss": 2.2041, + "step": 88880 + }, + { + "epoch": 12.01, + "learning_rate": 2.8594597271331982e-05, + "loss": 2.2114, + "step": 88890 + }, + { + "epoch": 12.01, + "learning_rate": 2.8569681495561027e-05, + "loss": 2.2496, + "step": 88900 + }, + { + "epoch": 12.01, + "learning_rate": 2.8544775437027652e-05, + "loss": 2.2097, + "step": 88910 + }, + { + "epoch": 12.01, + "learning_rate": 2.851987909772498e-05, + "loss": 2.2543, + "step": 88920 + }, + { + "epoch": 12.01, + "learning_rate": 2.8494992479645267e-05, + "loss": 2.2301, + "step": 88930 + }, + { + "epoch": 12.01, + "learning_rate": 2.847011558477999e-05, + "loss": 2.2203, + "step": 88940 + }, + { + "epoch": 12.01, + "learning_rate": 2.8445248415119936e-05, + "loss": 2.2437, + "step": 88950 + }, + { + "epoch": 12.01, + "learning_rate": 2.8420390972655017e-05, + "loss": 2.2308, + "step": 88960 + }, + { + "epoch": 12.02, + "learning_rate": 2.8395543259374387e-05, + "loss": 2.2088, + "step": 88970 + }, + { + "epoch": 12.02, + "learning_rate": 2.8370705277266477e-05, + "loss": 2.2255, + "step": 88980 + }, + { + "epoch": 12.02, + "learning_rate": 2.834587702831886e-05, + "loss": 2.2249, + "step": 88990 + }, + { + "epoch": 12.02, + "learning_rate": 2.832105851451848e-05, + "loss": 2.2249, + "step": 89000 + }, + { + "epoch": 12.02, + "eval_loss": 2.4120471477508545, + "eval_runtime": 1269.0105, + "eval_samples_per_second": 59.996, + "eval_steps_per_second": 5.0, + "step": 89000 + }, + { + "epoch": 12.02, + "learning_rate": 2.8296249737851216e-05, + "loss": 2.1843, + "step": 89010 + }, + { + "epoch": 12.02, + "learning_rate": 2.8271450700302485e-05, + "loss": 2.1899, + "step": 89020 + }, + { + "epoch": 12.02, + "learning_rate": 2.824666140385669e-05, + "loss": 2.1925, + "step": 89030 + }, + { + "epoch": 12.03, + "learning_rate": 2.8221881850497634e-05, + "loss": 2.1798, + "step": 89040 + }, + { + "epoch": 12.03, + "learning_rate": 2.8197112042208232e-05, + "loss": 2.2077, + "step": 89050 + }, + { + "epoch": 12.03, + "learning_rate": 2.8172351980970575e-05, + "loss": 2.236, + "step": 89060 + }, + { + "epoch": 12.03, + "learning_rate": 2.8147601668766133e-05, + "loss": 2.1932, + "step": 89070 + }, + { + "epoch": 12.03, + "learning_rate": 2.8122861107575473e-05, + "loss": 2.2448, + "step": 89080 + }, + { + "epoch": 12.03, + "learning_rate": 2.8098130299378357e-05, + "loss": 2.1892, + "step": 89090 + }, + { + "epoch": 12.03, + "learning_rate": 2.8075880912446173e-05, + "loss": 2.2049, + "step": 89100 + }, + { + "epoch": 12.04, + "learning_rate": 2.8051168640388515e-05, + "loss": 2.2447, + "step": 89110 + }, + { + "epoch": 12.04, + "learning_rate": 2.8026466127061515e-05, + "loss": 2.2093, + "step": 89120 + }, + { + "epoch": 12.04, + "learning_rate": 2.8001773374441905e-05, + "loss": 2.2221, + "step": 89130 + }, + { + "epoch": 12.04, + "learning_rate": 2.797709038450574e-05, + "loss": 2.246, + "step": 89140 + }, + { + "epoch": 12.04, + "learning_rate": 2.795241715922819e-05, + "loss": 2.2311, + "step": 89150 + }, + { + "epoch": 12.04, + "learning_rate": 2.792775370058371e-05, + "loss": 2.2173, + "step": 89160 + }, + { + "epoch": 12.04, + "learning_rate": 2.7903100010545886e-05, + "loss": 2.2397, + "step": 89170 + }, + { + "epoch": 12.04, + "learning_rate": 2.7878456091087626e-05, + "loss": 2.2228, + "step": 89180 + }, + { + "epoch": 12.05, + "learning_rate": 2.7853821944181116e-05, + "loss": 2.2414, + "step": 89190 + }, + { + "epoch": 12.05, + "learning_rate": 2.782919757179746e-05, + "loss": 2.2332, + "step": 89200 + }, + { + "epoch": 12.05, + "learning_rate": 2.7804582975907325e-05, + "loss": 2.2225, + "step": 89210 + }, + { + "epoch": 12.05, + "learning_rate": 2.7779978158480378e-05, + "loss": 2.2515, + "step": 89220 + }, + { + "epoch": 12.05, + "learning_rate": 2.7755383121485608e-05, + "loss": 2.2106, + "step": 89230 + }, + { + "epoch": 12.05, + "learning_rate": 2.7730797866891198e-05, + "loss": 2.2259, + "step": 89240 + }, + { + "epoch": 12.05, + "learning_rate": 2.7706222396664485e-05, + "loss": 2.2184, + "step": 89250 + }, + { + "epoch": 12.06, + "learning_rate": 2.7681656712772132e-05, + "loss": 2.2025, + "step": 89260 + }, + { + "epoch": 12.06, + "learning_rate": 2.765710081717995e-05, + "loss": 2.2369, + "step": 89270 + }, + { + "epoch": 12.06, + "learning_rate": 2.7632554711852917e-05, + "loss": 2.2263, + "step": 89280 + }, + { + "epoch": 12.06, + "learning_rate": 2.7608018398755393e-05, + "loss": 2.2289, + "step": 89290 + }, + { + "epoch": 12.06, + "learning_rate": 2.758349187985076e-05, + "loss": 2.222, + "step": 89300 + }, + { + "epoch": 12.06, + "learning_rate": 2.7558975157101782e-05, + "loss": 2.2158, + "step": 89310 + }, + { + "epoch": 12.06, + "learning_rate": 2.7534468232470335e-05, + "loss": 2.1979, + "step": 89320 + }, + { + "epoch": 12.06, + "learning_rate": 2.750997110791749e-05, + "loss": 2.2182, + "step": 89330 + }, + { + "epoch": 12.07, + "learning_rate": 2.7485483785403674e-05, + "loss": 2.2072, + "step": 89340 + }, + { + "epoch": 12.07, + "learning_rate": 2.7461006266888386e-05, + "loss": 2.2172, + "step": 89350 + }, + { + "epoch": 12.07, + "learning_rate": 2.743653855433041e-05, + "loss": 2.2009, + "step": 89360 + }, + { + "epoch": 12.07, + "learning_rate": 2.7412080649687684e-05, + "loss": 2.2324, + "step": 89370 + }, + { + "epoch": 12.07, + "learning_rate": 2.7387632554917453e-05, + "loss": 2.2004, + "step": 89380 + }, + { + "epoch": 12.07, + "learning_rate": 2.736319427197619e-05, + "loss": 2.2142, + "step": 89390 + }, + { + "epoch": 12.07, + "learning_rate": 2.7338765802819397e-05, + "loss": 2.2462, + "step": 89400 + }, + { + "epoch": 12.08, + "learning_rate": 2.7314347149402005e-05, + "loss": 2.2158, + "step": 89410 + }, + { + "epoch": 12.08, + "learning_rate": 2.7289938313678006e-05, + "loss": 2.2372, + "step": 89420 + }, + { + "epoch": 12.08, + "learning_rate": 2.7265539297600752e-05, + "loss": 2.2128, + "step": 89430 + }, + { + "epoch": 12.08, + "learning_rate": 2.7241150103122683e-05, + "loss": 2.2097, + "step": 89440 + }, + { + "epoch": 12.08, + "learning_rate": 2.721677073219547e-05, + "loss": 2.2011, + "step": 89450 + }, + { + "epoch": 12.08, + "learning_rate": 2.71924011867701e-05, + "loss": 2.2142, + "step": 89460 + }, + { + "epoch": 12.08, + "learning_rate": 2.716804146879667e-05, + "loss": 2.2278, + "step": 89470 + }, + { + "epoch": 12.08, + "learning_rate": 2.7143691580224465e-05, + "loss": 2.2481, + "step": 89480 + }, + { + "epoch": 12.09, + "learning_rate": 2.7119351523002126e-05, + "loss": 2.2027, + "step": 89490 + }, + { + "epoch": 12.09, + "learning_rate": 2.7095021299077364e-05, + "loss": 2.2063, + "step": 89500 + }, + { + "epoch": 12.09, + "learning_rate": 2.7070700910397203e-05, + "loss": 2.2193, + "step": 89510 + }, + { + "epoch": 12.09, + "learning_rate": 2.704639035890782e-05, + "loss": 2.2277, + "step": 89520 + }, + { + "epoch": 12.09, + "learning_rate": 2.702208964655458e-05, + "loss": 2.2209, + "step": 89530 + }, + { + "epoch": 12.09, + "learning_rate": 2.699779877528217e-05, + "loss": 2.2195, + "step": 89540 + }, + { + "epoch": 12.09, + "learning_rate": 2.6973517747034408e-05, + "loss": 2.2129, + "step": 89550 + }, + { + "epoch": 12.1, + "learning_rate": 2.69492465637543e-05, + "loss": 2.1983, + "step": 89560 + }, + { + "epoch": 12.1, + "learning_rate": 2.6924985227384117e-05, + "loss": 2.2166, + "step": 89570 + }, + { + "epoch": 12.1, + "learning_rate": 2.6900733739865314e-05, + "loss": 2.2486, + "step": 89580 + }, + { + "epoch": 12.1, + "learning_rate": 2.687649210313868e-05, + "loss": 2.2188, + "step": 89590 + }, + { + "epoch": 12.1, + "learning_rate": 2.6852260319143943e-05, + "loss": 2.2352, + "step": 89600 + }, + { + "epoch": 12.1, + "learning_rate": 2.6828038389820316e-05, + "loss": 2.1857, + "step": 89610 + }, + { + "epoch": 12.1, + "learning_rate": 2.6803826317106052e-05, + "loss": 2.221, + "step": 89620 + }, + { + "epoch": 12.11, + "learning_rate": 2.677962410293875e-05, + "loss": 2.2093, + "step": 89630 + }, + { + "epoch": 12.11, + "learning_rate": 2.6755431749255112e-05, + "loss": 2.1951, + "step": 89640 + }, + { + "epoch": 12.11, + "learning_rate": 2.673124925799104e-05, + "loss": 2.241, + "step": 89650 + }, + { + "epoch": 12.11, + "learning_rate": 2.6707076631081763e-05, + "loss": 2.2325, + "step": 89660 + }, + { + "epoch": 12.11, + "learning_rate": 2.6682913870461625e-05, + "loss": 2.2569, + "step": 89670 + }, + { + "epoch": 12.11, + "learning_rate": 2.6658760978064174e-05, + "loss": 2.2314, + "step": 89680 + }, + { + "epoch": 12.11, + "learning_rate": 2.663461795582225e-05, + "loss": 2.2247, + "step": 89690 + }, + { + "epoch": 12.11, + "learning_rate": 2.6610484805667824e-05, + "loss": 2.2419, + "step": 89700 + }, + { + "epoch": 12.12, + "learning_rate": 2.6586361529532134e-05, + "loss": 2.238, + "step": 89710 + }, + { + "epoch": 12.12, + "learning_rate": 2.6562248129345586e-05, + "loss": 2.2297, + "step": 89720 + }, + { + "epoch": 12.12, + "learning_rate": 2.6538144607037786e-05, + "loss": 2.2392, + "step": 89730 + }, + { + "epoch": 12.12, + "learning_rate": 2.6514050964537624e-05, + "loss": 2.2517, + "step": 89740 + }, + { + "epoch": 12.12, + "learning_rate": 2.6489967203773127e-05, + "loss": 2.2141, + "step": 89750 + }, + { + "epoch": 12.12, + "learning_rate": 2.646589332667155e-05, + "loss": 2.2244, + "step": 89760 + }, + { + "epoch": 12.12, + "learning_rate": 2.6441829335159303e-05, + "loss": 2.2319, + "step": 89770 + }, + { + "epoch": 12.13, + "learning_rate": 2.6417775231162147e-05, + "loss": 2.201, + "step": 89780 + }, + { + "epoch": 12.13, + "learning_rate": 2.6393731016604993e-05, + "loss": 2.2452, + "step": 89790 + }, + { + "epoch": 12.13, + "learning_rate": 2.636969669341181e-05, + "loss": 2.2169, + "step": 89800 + }, + { + "epoch": 12.13, + "learning_rate": 2.6345672263506025e-05, + "loss": 2.2075, + "step": 89810 + }, + { + "epoch": 12.13, + "learning_rate": 2.6321657728810036e-05, + "loss": 2.2206, + "step": 89820 + }, + { + "epoch": 12.13, + "learning_rate": 2.629765309124568e-05, + "loss": 2.2091, + "step": 89830 + }, + { + "epoch": 12.13, + "learning_rate": 2.62736583527338e-05, + "loss": 2.2429, + "step": 89840 + }, + { + "epoch": 12.13, + "learning_rate": 2.6249673515194546e-05, + "loss": 2.2293, + "step": 89850 + }, + { + "epoch": 12.14, + "learning_rate": 2.622569858054728e-05, + "loss": 2.233, + "step": 89860 + }, + { + "epoch": 12.14, + "learning_rate": 2.6201733550710574e-05, + "loss": 2.1981, + "step": 89870 + }, + { + "epoch": 12.14, + "learning_rate": 2.6177778427602085e-05, + "loss": 2.2054, + "step": 89880 + }, + { + "epoch": 12.14, + "learning_rate": 2.6153833213138904e-05, + "loss": 2.2526, + "step": 89890 + }, + { + "epoch": 12.14, + "learning_rate": 2.6129897909237106e-05, + "loss": 2.2377, + "step": 89900 + }, + { + "epoch": 12.14, + "learning_rate": 2.6105972517812136e-05, + "loss": 2.1945, + "step": 89910 + }, + { + "epoch": 12.14, + "learning_rate": 2.608205704077857e-05, + "loss": 2.2187, + "step": 89920 + }, + { + "epoch": 12.15, + "learning_rate": 2.6058151480050132e-05, + "loss": 2.2255, + "step": 89930 + }, + { + "epoch": 12.15, + "learning_rate": 2.603425583753991e-05, + "loss": 2.2325, + "step": 89940 + }, + { + "epoch": 12.15, + "learning_rate": 2.6010370115160067e-05, + "loss": 2.2379, + "step": 89950 + }, + { + "epoch": 12.15, + "learning_rate": 2.5986494314822014e-05, + "loss": 2.2331, + "step": 89960 + }, + { + "epoch": 12.15, + "learning_rate": 2.596262843843634e-05, + "loss": 2.2421, + "step": 89970 + }, + { + "epoch": 12.15, + "learning_rate": 2.5938772487912895e-05, + "loss": 2.2329, + "step": 89980 + }, + { + "epoch": 12.15, + "learning_rate": 2.591492646516079e-05, + "loss": 2.2268, + "step": 89990 + }, + { + "epoch": 12.16, + "learning_rate": 2.5891090372088112e-05, + "loss": 2.2236, + "step": 90000 + }, + { + "epoch": 12.16, + "eval_loss": 2.4117822647094727, + "eval_runtime": 1269.0092, + "eval_samples_per_second": 59.996, + "eval_steps_per_second": 5.0, + "step": 90000 + }, + { + "epoch": 12.16, + "learning_rate": 2.586726421060239e-05, + "loss": 2.2347, + "step": 90010 + }, + { + "epoch": 12.16, + "learning_rate": 2.584344798261021e-05, + "loss": 2.191, + "step": 90020 + }, + { + "epoch": 12.16, + "learning_rate": 2.58196416900175e-05, + "loss": 2.2093, + "step": 90030 + }, + { + "epoch": 12.16, + "learning_rate": 2.579584533472929e-05, + "loss": 2.2308, + "step": 90040 + }, + { + "epoch": 12.16, + "learning_rate": 2.5772058918649764e-05, + "loss": 2.236, + "step": 90050 + }, + { + "epoch": 12.16, + "learning_rate": 2.5748282443682488e-05, + "loss": 2.2309, + "step": 90060 + }, + { + "epoch": 12.16, + "learning_rate": 2.572451591173009e-05, + "loss": 2.2295, + "step": 90070 + }, + { + "epoch": 12.17, + "learning_rate": 2.5700759324694404e-05, + "loss": 2.2123, + "step": 90080 + }, + { + "epoch": 12.17, + "learning_rate": 2.5677012684476582e-05, + "loss": 2.2227, + "step": 90090 + }, + { + "epoch": 12.17, + "learning_rate": 2.5653275992976828e-05, + "loss": 2.2371, + "step": 90100 + }, + { + "epoch": 12.17, + "learning_rate": 2.562954925209474e-05, + "loss": 2.2377, + "step": 90110 + }, + { + "epoch": 12.17, + "learning_rate": 2.5605832463728843e-05, + "loss": 2.2337, + "step": 90120 + }, + { + "epoch": 12.17, + "learning_rate": 2.558212562977714e-05, + "loss": 2.1929, + "step": 90130 + }, + { + "epoch": 12.17, + "learning_rate": 2.5558428752136745e-05, + "loss": 2.223, + "step": 90140 + }, + { + "epoch": 12.18, + "learning_rate": 2.5534741832703908e-05, + "loss": 2.2286, + "step": 90150 + }, + { + "epoch": 12.18, + "learning_rate": 2.5511064873374132e-05, + "loss": 2.2231, + "step": 90160 + }, + { + "epoch": 12.18, + "learning_rate": 2.5487397876042116e-05, + "loss": 2.2101, + "step": 90170 + }, + { + "epoch": 12.18, + "learning_rate": 2.546374084260176e-05, + "loss": 2.212, + "step": 90180 + }, + { + "epoch": 12.18, + "learning_rate": 2.5440093774946284e-05, + "loss": 2.2127, + "step": 90190 + }, + { + "epoch": 12.18, + "learning_rate": 2.5416456674967846e-05, + "loss": 2.2208, + "step": 90200 + }, + { + "epoch": 12.18, + "learning_rate": 2.539282954455806e-05, + "loss": 2.2211, + "step": 90210 + }, + { + "epoch": 12.18, + "learning_rate": 2.5369212385607562e-05, + "loss": 2.2418, + "step": 90220 + }, + { + "epoch": 12.19, + "learning_rate": 2.534560520000638e-05, + "loss": 2.2295, + "step": 90230 + }, + { + "epoch": 12.19, + "learning_rate": 2.5322007989643566e-05, + "loss": 2.2137, + "step": 90240 + }, + { + "epoch": 12.19, + "learning_rate": 2.5298420756407428e-05, + "loss": 2.2211, + "step": 90250 + }, + { + "epoch": 12.19, + "learning_rate": 2.5274843502185555e-05, + "loss": 2.2179, + "step": 90260 + }, + { + "epoch": 12.19, + "learning_rate": 2.5251276228864637e-05, + "loss": 2.2197, + "step": 90270 + }, + { + "epoch": 12.19, + "learning_rate": 2.5227718938330565e-05, + "loss": 2.2251, + "step": 90280 + }, + { + "epoch": 12.19, + "learning_rate": 2.5204171632468533e-05, + "loss": 2.2385, + "step": 90290 + }, + { + "epoch": 12.2, + "learning_rate": 2.5180634313162816e-05, + "loss": 2.2138, + "step": 90300 + }, + { + "epoch": 12.2, + "learning_rate": 2.515710698229704e-05, + "loss": 2.2252, + "step": 90310 + }, + { + "epoch": 12.2, + "learning_rate": 2.5133589641753787e-05, + "loss": 2.2274, + "step": 90320 + }, + { + "epoch": 12.2, + "learning_rate": 2.511008229341508e-05, + "loss": 2.2187, + "step": 90330 + }, + { + "epoch": 12.2, + "learning_rate": 2.508658493916208e-05, + "loss": 2.2317, + "step": 90340 + }, + { + "epoch": 12.2, + "learning_rate": 2.506309758087507e-05, + "loss": 2.1969, + "step": 90350 + }, + { + "epoch": 12.2, + "learning_rate": 2.5039620220433594e-05, + "loss": 2.1977, + "step": 90360 + }, + { + "epoch": 12.21, + "learning_rate": 2.501615285971635e-05, + "loss": 2.2503, + "step": 90370 + }, + { + "epoch": 12.21, + "learning_rate": 2.4992695500601302e-05, + "loss": 2.2278, + "step": 90380 + }, + { + "epoch": 12.21, + "learning_rate": 2.4969248144965646e-05, + "loss": 2.2408, + "step": 90390 + }, + { + "epoch": 12.21, + "learning_rate": 2.4945810794685588e-05, + "loss": 2.2304, + "step": 90400 + }, + { + "epoch": 12.21, + "learning_rate": 2.492238345163676e-05, + "loss": 2.1981, + "step": 90410 + }, + { + "epoch": 12.21, + "learning_rate": 2.489896611769382e-05, + "loss": 2.1961, + "step": 90420 + }, + { + "epoch": 12.21, + "learning_rate": 2.487555879473076e-05, + "loss": 2.2441, + "step": 90430 + }, + { + "epoch": 12.21, + "learning_rate": 2.485216148462068e-05, + "loss": 2.2077, + "step": 90440 + }, + { + "epoch": 12.22, + "learning_rate": 2.482877418923587e-05, + "loss": 2.2143, + "step": 90450 + }, + { + "epoch": 12.22, + "learning_rate": 2.4805396910447923e-05, + "loss": 2.262, + "step": 90460 + }, + { + "epoch": 12.22, + "learning_rate": 2.478202965012753e-05, + "loss": 2.264, + "step": 90470 + }, + { + "epoch": 12.22, + "learning_rate": 2.4758672410144585e-05, + "loss": 2.2188, + "step": 90480 + }, + { + "epoch": 12.22, + "learning_rate": 2.4735325192368272e-05, + "loss": 2.2468, + "step": 90490 + }, + { + "epoch": 12.22, + "learning_rate": 2.4711987998666828e-05, + "loss": 2.2417, + "step": 90500 + }, + { + "epoch": 12.22, + "learning_rate": 2.4688660830907893e-05, + "loss": 2.2169, + "step": 90510 + }, + { + "epoch": 12.23, + "learning_rate": 2.4665343690958023e-05, + "loss": 2.246, + "step": 90520 + }, + { + "epoch": 12.23, + "learning_rate": 2.464203658068321e-05, + "loss": 2.2194, + "step": 90530 + }, + { + "epoch": 12.23, + "learning_rate": 2.4618739501948613e-05, + "loss": 2.2148, + "step": 90540 + }, + { + "epoch": 12.23, + "learning_rate": 2.4595452456618474e-05, + "loss": 2.2389, + "step": 90550 + }, + { + "epoch": 12.23, + "learning_rate": 2.45721754465563e-05, + "loss": 2.2065, + "step": 90560 + }, + { + "epoch": 12.23, + "learning_rate": 2.4548908473624774e-05, + "loss": 2.2102, + "step": 90570 + }, + { + "epoch": 12.23, + "learning_rate": 2.4525651539685796e-05, + "loss": 2.2317, + "step": 90580 + }, + { + "epoch": 12.23, + "learning_rate": 2.450240464660057e-05, + "loss": 2.2173, + "step": 90590 + }, + { + "epoch": 12.24, + "learning_rate": 2.447916779622922e-05, + "loss": 2.2194, + "step": 90600 + }, + { + "epoch": 12.24, + "learning_rate": 2.4455940990431343e-05, + "loss": 2.2093, + "step": 90610 + }, + { + "epoch": 12.24, + "learning_rate": 2.4432724231065553e-05, + "loss": 2.2255, + "step": 90620 + }, + { + "epoch": 12.24, + "learning_rate": 2.4409517519989796e-05, + "loss": 2.2059, + "step": 90630 + }, + { + "epoch": 12.24, + "learning_rate": 2.438632085906112e-05, + "loss": 2.234, + "step": 90640 + }, + { + "epoch": 12.24, + "learning_rate": 2.436313425013574e-05, + "loss": 2.2152, + "step": 90650 + }, + { + "epoch": 12.24, + "learning_rate": 2.4339957695069205e-05, + "loss": 2.2235, + "step": 90660 + }, + { + "epoch": 12.25, + "learning_rate": 2.431679119571615e-05, + "loss": 2.2403, + "step": 90670 + }, + { + "epoch": 12.25, + "learning_rate": 2.429363475393039e-05, + "loss": 2.2133, + "step": 90680 + }, + { + "epoch": 12.25, + "learning_rate": 2.4270488371565027e-05, + "loss": 2.2104, + "step": 90690 + }, + { + "epoch": 12.25, + "learning_rate": 2.4247352050472264e-05, + "loss": 2.2267, + "step": 90700 + }, + { + "epoch": 12.25, + "learning_rate": 2.4224225792503632e-05, + "loss": 2.2354, + "step": 90710 + }, + { + "epoch": 12.25, + "learning_rate": 2.4201109599509638e-05, + "loss": 2.2462, + "step": 90720 + }, + { + "epoch": 12.25, + "learning_rate": 2.4178003473340167e-05, + "loss": 2.2512, + "step": 90730 + }, + { + "epoch": 12.26, + "learning_rate": 2.415490741584431e-05, + "loss": 2.2069, + "step": 90740 + }, + { + "epoch": 12.26, + "learning_rate": 2.413182142887022e-05, + "loss": 2.2433, + "step": 90750 + }, + { + "epoch": 12.26, + "learning_rate": 2.4108745514265315e-05, + "loss": 2.2401, + "step": 90760 + }, + { + "epoch": 12.26, + "learning_rate": 2.4085679673876175e-05, + "loss": 2.2531, + "step": 90770 + }, + { + "epoch": 12.26, + "learning_rate": 2.4062623909548683e-05, + "loss": 2.2216, + "step": 90780 + }, + { + "epoch": 12.26, + "learning_rate": 2.403957822312777e-05, + "loss": 2.2111, + "step": 90790 + }, + { + "epoch": 12.26, + "learning_rate": 2.4016542616457607e-05, + "loss": 2.2432, + "step": 90800 + }, + { + "epoch": 12.26, + "learning_rate": 2.3993517091381655e-05, + "loss": 2.22, + "step": 90810 + }, + { + "epoch": 12.27, + "learning_rate": 2.397050164974242e-05, + "loss": 2.254, + "step": 90820 + }, + { + "epoch": 12.27, + "learning_rate": 2.3947496293381713e-05, + "loss": 2.2162, + "step": 90830 + }, + { + "epoch": 12.27, + "learning_rate": 2.392450102414048e-05, + "loss": 2.2313, + "step": 90840 + }, + { + "epoch": 12.27, + "learning_rate": 2.3901515843858833e-05, + "loss": 2.2249, + "step": 90850 + }, + { + "epoch": 12.27, + "learning_rate": 2.3878540754376197e-05, + "loss": 2.2189, + "step": 90860 + }, + { + "epoch": 12.27, + "learning_rate": 2.385557575753107e-05, + "loss": 2.2189, + "step": 90870 + }, + { + "epoch": 12.27, + "learning_rate": 2.383262085516115e-05, + "loss": 2.2333, + "step": 90880 + }, + { + "epoch": 12.28, + "learning_rate": 2.380967604910345e-05, + "loss": 2.2353, + "step": 90890 + }, + { + "epoch": 12.28, + "learning_rate": 2.3786741341193964e-05, + "loss": 2.2305, + "step": 90900 + }, + { + "epoch": 12.28, + "learning_rate": 2.3763816733268164e-05, + "loss": 2.2415, + "step": 90910 + }, + { + "epoch": 12.28, + "learning_rate": 2.374090222716038e-05, + "loss": 2.231, + "step": 90920 + }, + { + "epoch": 12.28, + "learning_rate": 2.3717997824704377e-05, + "loss": 2.2359, + "step": 90930 + }, + { + "epoch": 12.28, + "learning_rate": 2.3695103527733057e-05, + "loss": 2.2225, + "step": 90940 + }, + { + "epoch": 12.28, + "learning_rate": 2.3672219338078485e-05, + "loss": 2.2064, + "step": 90950 + }, + { + "epoch": 12.28, + "learning_rate": 2.3649345257571915e-05, + "loss": 2.2369, + "step": 90960 + }, + { + "epoch": 12.29, + "learning_rate": 2.362648128804377e-05, + "loss": 2.2174, + "step": 90970 + }, + { + "epoch": 12.29, + "learning_rate": 2.3603627431323762e-05, + "loss": 2.2254, + "step": 90980 + }, + { + "epoch": 12.29, + "learning_rate": 2.35807836892407e-05, + "loss": 2.2378, + "step": 90990 + }, + { + "epoch": 12.29, + "learning_rate": 2.355795006362259e-05, + "loss": 2.2325, + "step": 91000 + }, + { + "epoch": 12.29, + "eval_loss": 2.4098644256591797, + "eval_runtime": 1268.9861, + "eval_samples_per_second": 59.997, + "eval_steps_per_second": 5.0, + "step": 91000 + }, + { + "epoch": 12.29, + "learning_rate": 2.353512655629668e-05, + "loss": 2.2031, + "step": 91010 + }, + { + "epoch": 12.29, + "learning_rate": 2.3512313169089346e-05, + "loss": 2.2083, + "step": 91020 + }, + { + "epoch": 12.29, + "learning_rate": 2.348950990382625e-05, + "loss": 2.2523, + "step": 91030 + }, + { + "epoch": 12.3, + "learning_rate": 2.346671676233214e-05, + "loss": 2.2391, + "step": 91040 + }, + { + "epoch": 12.3, + "learning_rate": 2.344393374643096e-05, + "loss": 2.2306, + "step": 91050 + }, + { + "epoch": 12.3, + "learning_rate": 2.3421160857945953e-05, + "loss": 2.2282, + "step": 91060 + }, + { + "epoch": 12.3, + "learning_rate": 2.3398398098699444e-05, + "loss": 2.2384, + "step": 91070 + }, + { + "epoch": 12.3, + "learning_rate": 2.3375645470512928e-05, + "loss": 2.2188, + "step": 91080 + }, + { + "epoch": 12.3, + "learning_rate": 2.3352902975207243e-05, + "loss": 2.2409, + "step": 91090 + }, + { + "epoch": 12.3, + "learning_rate": 2.333017061460222e-05, + "loss": 2.2024, + "step": 91100 + }, + { + "epoch": 12.31, + "learning_rate": 2.330972015673037e-05, + "loss": 2.2323, + "step": 91110 + }, + { + "epoch": 12.31, + "learning_rate": 2.3287007057067725e-05, + "loss": 2.2336, + "step": 91120 + }, + { + "epoch": 12.31, + "learning_rate": 2.3264304097378937e-05, + "loss": 2.2283, + "step": 91130 + }, + { + "epoch": 12.31, + "learning_rate": 2.3241611279480877e-05, + "loss": 2.22, + "step": 91140 + }, + { + "epoch": 12.31, + "learning_rate": 2.3218928605189417e-05, + "loss": 2.2265, + "step": 91150 + }, + { + "epoch": 12.31, + "learning_rate": 2.319625607631968e-05, + "loss": 2.2311, + "step": 91160 + }, + { + "epoch": 12.31, + "learning_rate": 2.3173593694686078e-05, + "loss": 2.2058, + "step": 91170 + }, + { + "epoch": 12.31, + "learning_rate": 2.3150941462102058e-05, + "loss": 2.204, + "step": 91180 + }, + { + "epoch": 12.32, + "learning_rate": 2.3128299380380322e-05, + "loss": 2.2183, + "step": 91190 + }, + { + "epoch": 12.32, + "learning_rate": 2.310566745133282e-05, + "loss": 2.2477, + "step": 91200 + }, + { + "epoch": 12.32, + "learning_rate": 2.3083045676770538e-05, + "loss": 2.2467, + "step": 91210 + }, + { + "epoch": 12.32, + "learning_rate": 2.3060434058503847e-05, + "loss": 2.2145, + "step": 91220 + }, + { + "epoch": 12.32, + "learning_rate": 2.3037832598342142e-05, + "loss": 2.2072, + "step": 91230 + }, + { + "epoch": 12.32, + "learning_rate": 2.3015241298094033e-05, + "loss": 2.2248, + "step": 91240 + }, + { + "epoch": 12.32, + "learning_rate": 2.299266015956744e-05, + "loss": 2.237, + "step": 91250 + }, + { + "epoch": 12.33, + "learning_rate": 2.2970089184569294e-05, + "loss": 2.2178, + "step": 91260 + }, + { + "epoch": 12.33, + "learning_rate": 2.2947528374905795e-05, + "loss": 2.2073, + "step": 91270 + }, + { + "epoch": 12.33, + "learning_rate": 2.2924977732382405e-05, + "loss": 2.2328, + "step": 91280 + }, + { + "epoch": 12.33, + "learning_rate": 2.2902437258803603e-05, + "loss": 2.2266, + "step": 91290 + }, + { + "epoch": 12.33, + "learning_rate": 2.2879906955973277e-05, + "loss": 2.2358, + "step": 91300 + }, + { + "epoch": 12.33, + "learning_rate": 2.285738682569422e-05, + "loss": 2.2397, + "step": 91310 + }, + { + "epoch": 12.33, + "learning_rate": 2.2834876869768655e-05, + "loss": 2.2108, + "step": 91320 + }, + { + "epoch": 12.33, + "learning_rate": 2.2812377089997873e-05, + "loss": 2.1901, + "step": 91330 + }, + { + "epoch": 12.34, + "learning_rate": 2.2789887488182406e-05, + "loss": 2.2082, + "step": 91340 + }, + { + "epoch": 12.34, + "learning_rate": 2.2767408066121913e-05, + "loss": 2.2253, + "step": 91350 + }, + { + "epoch": 12.34, + "learning_rate": 2.2744938825615265e-05, + "loss": 2.2077, + "step": 91360 + }, + { + "epoch": 12.34, + "learning_rate": 2.272247976846055e-05, + "loss": 2.2416, + "step": 91370 + }, + { + "epoch": 12.34, + "learning_rate": 2.2700030896455007e-05, + "loss": 2.2264, + "step": 91380 + }, + { + "epoch": 12.34, + "learning_rate": 2.2677592211395035e-05, + "loss": 2.2437, + "step": 91390 + }, + { + "epoch": 12.34, + "learning_rate": 2.265516371507628e-05, + "loss": 2.2331, + "step": 91400 + }, + { + "epoch": 12.35, + "learning_rate": 2.2632745409293507e-05, + "loss": 2.2451, + "step": 91410 + }, + { + "epoch": 12.35, + "learning_rate": 2.2610337295840762e-05, + "loss": 2.2276, + "step": 91420 + }, + { + "epoch": 12.35, + "learning_rate": 2.2587939376511167e-05, + "loss": 2.2378, + "step": 91430 + }, + { + "epoch": 12.35, + "learning_rate": 2.2565551653097048e-05, + "loss": 2.2306, + "step": 91440 + }, + { + "epoch": 12.35, + "learning_rate": 2.2543174127390025e-05, + "loss": 2.2094, + "step": 91450 + }, + { + "epoch": 12.35, + "learning_rate": 2.2520806801180746e-05, + "loss": 2.2243, + "step": 91460 + }, + { + "epoch": 12.35, + "learning_rate": 2.2498449676259117e-05, + "loss": 2.237, + "step": 91470 + }, + { + "epoch": 12.36, + "learning_rate": 2.247610275441427e-05, + "loss": 2.2329, + "step": 91480 + }, + { + "epoch": 12.36, + "learning_rate": 2.245376603743443e-05, + "loss": 2.2165, + "step": 91490 + }, + { + "epoch": 12.36, + "learning_rate": 2.2431439527107126e-05, + "loss": 2.2316, + "step": 91500 + }, + { + "epoch": 12.36, + "learning_rate": 2.2409123225218883e-05, + "loss": 2.2224, + "step": 91510 + }, + { + "epoch": 12.36, + "learning_rate": 2.2386817133555622e-05, + "loss": 2.2212, + "step": 91520 + }, + { + "epoch": 12.36, + "learning_rate": 2.2364521253902266e-05, + "loss": 2.2403, + "step": 91530 + }, + { + "epoch": 12.36, + "learning_rate": 2.234223558804309e-05, + "loss": 2.219, + "step": 91540 + }, + { + "epoch": 12.36, + "learning_rate": 2.2319960137761412e-05, + "loss": 2.226, + "step": 91550 + }, + { + "epoch": 12.37, + "learning_rate": 2.2297694904839743e-05, + "loss": 2.2057, + "step": 91560 + }, + { + "epoch": 12.37, + "learning_rate": 2.2275439891059904e-05, + "loss": 2.2426, + "step": 91570 + }, + { + "epoch": 12.37, + "learning_rate": 2.2253195098202776e-05, + "loss": 2.2635, + "step": 91580 + }, + { + "epoch": 12.37, + "learning_rate": 2.2230960528048415e-05, + "loss": 2.2232, + "step": 91590 + }, + { + "epoch": 12.37, + "learning_rate": 2.2208736182376163e-05, + "loss": 2.2171, + "step": 91600 + }, + { + "epoch": 12.37, + "learning_rate": 2.2186522062964433e-05, + "loss": 2.2224, + "step": 91610 + }, + { + "epoch": 12.37, + "learning_rate": 2.2164318171590938e-05, + "loss": 2.2499, + "step": 91620 + }, + { + "epoch": 12.38, + "learning_rate": 2.2142124510032437e-05, + "loss": 2.2365, + "step": 91630 + }, + { + "epoch": 12.38, + "learning_rate": 2.2119941080064946e-05, + "loss": 2.2284, + "step": 91640 + }, + { + "epoch": 12.38, + "learning_rate": 2.2097767883463692e-05, + "loss": 2.2085, + "step": 91650 + }, + { + "epoch": 12.38, + "learning_rate": 2.207560492200302e-05, + "loss": 2.2084, + "step": 91660 + }, + { + "epoch": 12.38, + "learning_rate": 2.2053452197456435e-05, + "loss": 2.2434, + "step": 91670 + }, + { + "epoch": 12.38, + "learning_rate": 2.2031309711596745e-05, + "loss": 2.223, + "step": 91680 + }, + { + "epoch": 12.38, + "learning_rate": 2.2009177466195804e-05, + "loss": 2.2459, + "step": 91690 + }, + { + "epoch": 12.38, + "learning_rate": 2.1987055463024793e-05, + "loss": 2.252, + "step": 91700 + }, + { + "epoch": 12.39, + "learning_rate": 2.1964943703853867e-05, + "loss": 2.2, + "step": 91710 + }, + { + "epoch": 12.39, + "learning_rate": 2.1942842190452554e-05, + "loss": 2.2486, + "step": 91720 + }, + { + "epoch": 12.39, + "learning_rate": 2.1920750924589424e-05, + "loss": 2.2214, + "step": 91730 + }, + { + "epoch": 12.39, + "learning_rate": 2.1898669908032367e-05, + "loss": 2.2453, + "step": 91740 + }, + { + "epoch": 12.39, + "learning_rate": 2.187659914254835e-05, + "loss": 2.2261, + "step": 91750 + }, + { + "epoch": 12.39, + "learning_rate": 2.1854538629903494e-05, + "loss": 2.2244, + "step": 91760 + }, + { + "epoch": 12.39, + "learning_rate": 2.183248837186322e-05, + "loss": 2.2138, + "step": 91770 + }, + { + "epoch": 12.4, + "learning_rate": 2.1810448370192035e-05, + "loss": 2.223, + "step": 91780 + }, + { + "epoch": 12.4, + "learning_rate": 2.1788418626653625e-05, + "loss": 2.2262, + "step": 91790 + }, + { + "epoch": 12.4, + "learning_rate": 2.1766399143010914e-05, + "loss": 2.205, + "step": 91800 + }, + { + "epoch": 12.4, + "learning_rate": 2.174438992102593e-05, + "loss": 2.1996, + "step": 91810 + }, + { + "epoch": 12.4, + "learning_rate": 2.172239096245999e-05, + "loss": 2.2337, + "step": 91820 + }, + { + "epoch": 12.4, + "learning_rate": 2.1700402269073475e-05, + "loss": 2.2384, + "step": 91830 + }, + { + "epoch": 12.4, + "learning_rate": 2.1678423842625942e-05, + "loss": 2.2182, + "step": 91840 + }, + { + "epoch": 12.41, + "learning_rate": 2.1656455684876267e-05, + "loss": 2.2115, + "step": 91850 + }, + { + "epoch": 12.41, + "learning_rate": 2.1634497797582377e-05, + "loss": 2.2451, + "step": 91860 + }, + { + "epoch": 12.41, + "learning_rate": 2.1612550182501404e-05, + "loss": 2.2411, + "step": 91870 + }, + { + "epoch": 12.41, + "learning_rate": 2.159061284138962e-05, + "loss": 2.2488, + "step": 91880 + }, + { + "epoch": 12.41, + "learning_rate": 2.1568685776002558e-05, + "loss": 2.2493, + "step": 91890 + }, + { + "epoch": 12.41, + "learning_rate": 2.1546768988094983e-05, + "loss": 2.2714, + "step": 91900 + }, + { + "epoch": 12.41, + "learning_rate": 2.1524862479420584e-05, + "loss": 2.1985, + "step": 91910 + }, + { + "epoch": 12.41, + "learning_rate": 2.15029662517325e-05, + "loss": 2.2081, + "step": 91920 + }, + { + "epoch": 12.42, + "learning_rate": 2.1481080306782877e-05, + "loss": 2.2127, + "step": 91930 + }, + { + "epoch": 12.42, + "learning_rate": 2.1459204646323148e-05, + "loss": 2.231, + "step": 91940 + }, + { + "epoch": 12.42, + "learning_rate": 2.1437339272103844e-05, + "loss": 2.2594, + "step": 91950 + }, + { + "epoch": 12.42, + "learning_rate": 2.1415484185874666e-05, + "loss": 2.216, + "step": 91960 + }, + { + "epoch": 12.42, + "learning_rate": 2.1393639389384615e-05, + "loss": 2.2125, + "step": 91970 + }, + { + "epoch": 12.42, + "learning_rate": 2.137180488438173e-05, + "loss": 2.2177, + "step": 91980 + }, + { + "epoch": 12.42, + "learning_rate": 2.134998067261322e-05, + "loss": 2.2235, + "step": 91990 + }, + { + "epoch": 12.43, + "learning_rate": 2.1328166755825633e-05, + "loss": 2.2176, + "step": 92000 + }, + { + "epoch": 12.43, + "eval_loss": 2.4086060523986816, + "eval_runtime": 1269.3719, + "eval_samples_per_second": 59.978, + "eval_steps_per_second": 4.999, + "step": 92000 + }, + { + "epoch": 12.43, + "learning_rate": 2.1306363135764516e-05, + "loss": 2.2402, + "step": 92010 + }, + { + "epoch": 12.43, + "learning_rate": 2.1284569814174708e-05, + "loss": 2.2256, + "step": 92020 + }, + { + "epoch": 12.43, + "learning_rate": 2.1262786792800163e-05, + "loss": 2.2356, + "step": 92030 + }, + { + "epoch": 12.43, + "learning_rate": 2.1241014073383987e-05, + "loss": 2.2135, + "step": 92040 + }, + { + "epoch": 12.43, + "learning_rate": 2.1219251657668572e-05, + "loss": 2.2379, + "step": 92050 + }, + { + "epoch": 12.43, + "learning_rate": 2.119749954739539e-05, + "loss": 2.2327, + "step": 92060 + }, + { + "epoch": 12.43, + "learning_rate": 2.1175757744305083e-05, + "loss": 2.222, + "step": 92070 + }, + { + "epoch": 12.44, + "learning_rate": 2.1154026250137505e-05, + "loss": 2.2457, + "step": 92080 + }, + { + "epoch": 12.44, + "learning_rate": 2.113230506663169e-05, + "loss": 2.2255, + "step": 92090 + }, + { + "epoch": 12.44, + "learning_rate": 2.1110594195525905e-05, + "loss": 2.2326, + "step": 92100 + }, + { + "epoch": 12.44, + "learning_rate": 2.1088893638557398e-05, + "loss": 2.2207, + "step": 92110 + }, + { + "epoch": 12.44, + "learning_rate": 2.1067203397462796e-05, + "loss": 2.2028, + "step": 92120 + }, + { + "epoch": 12.44, + "learning_rate": 2.104552347397776e-05, + "loss": 2.2093, + "step": 92130 + }, + { + "epoch": 12.44, + "learning_rate": 2.102385386983727e-05, + "loss": 2.2001, + "step": 92140 + }, + { + "epoch": 12.45, + "learning_rate": 2.1002194586775356e-05, + "loss": 2.2245, + "step": 92150 + }, + { + "epoch": 12.45, + "learning_rate": 2.0980545626525212e-05, + "loss": 2.2229, + "step": 92160 + }, + { + "epoch": 12.45, + "learning_rate": 2.0958906990819358e-05, + "loss": 2.2274, + "step": 92170 + }, + { + "epoch": 12.45, + "learning_rate": 2.0937278681389302e-05, + "loss": 2.181, + "step": 92180 + }, + { + "epoch": 12.45, + "learning_rate": 2.091566069996583e-05, + "loss": 2.1901, + "step": 92190 + }, + { + "epoch": 12.45, + "learning_rate": 2.0894053048278908e-05, + "loss": 2.2071, + "step": 92200 + }, + { + "epoch": 12.45, + "learning_rate": 2.0872455728057602e-05, + "loss": 2.2281, + "step": 92210 + }, + { + "epoch": 12.46, + "learning_rate": 2.0850868741030246e-05, + "loss": 2.2247, + "step": 92220 + }, + { + "epoch": 12.46, + "learning_rate": 2.0829292088924292e-05, + "loss": 2.2183, + "step": 92230 + }, + { + "epoch": 12.46, + "learning_rate": 2.0807725773466326e-05, + "loss": 2.232, + "step": 92240 + }, + { + "epoch": 12.46, + "learning_rate": 2.0786169796382203e-05, + "loss": 2.2621, + "step": 92250 + }, + { + "epoch": 12.46, + "learning_rate": 2.0764624159396903e-05, + "loss": 2.1754, + "step": 92260 + }, + { + "epoch": 12.46, + "learning_rate": 2.0743088864234535e-05, + "loss": 2.2388, + "step": 92270 + }, + { + "epoch": 12.46, + "learning_rate": 2.072156391261842e-05, + "loss": 2.2055, + "step": 92280 + }, + { + "epoch": 12.46, + "learning_rate": 2.0700049306271067e-05, + "loss": 2.2291, + "step": 92290 + }, + { + "epoch": 12.47, + "learning_rate": 2.0678545046914226e-05, + "loss": 2.237, + "step": 92300 + }, + { + "epoch": 12.47, + "learning_rate": 2.065705113626859e-05, + "loss": 2.2516, + "step": 92310 + }, + { + "epoch": 12.47, + "learning_rate": 2.0635567576054262e-05, + "loss": 2.2014, + "step": 92320 + }, + { + "epoch": 12.47, + "learning_rate": 2.0614094367990374e-05, + "loss": 2.2167, + "step": 92330 + }, + { + "epoch": 12.47, + "learning_rate": 2.059263151379535e-05, + "loss": 2.2223, + "step": 92340 + }, + { + "epoch": 12.47, + "learning_rate": 2.057117901518666e-05, + "loss": 2.2324, + "step": 92350 + }, + { + "epoch": 12.47, + "learning_rate": 2.054973687388099e-05, + "loss": 2.2325, + "step": 92360 + }, + { + "epoch": 12.48, + "learning_rate": 2.0528305091594262e-05, + "loss": 2.2096, + "step": 92370 + }, + { + "epoch": 12.48, + "learning_rate": 2.0506883670041468e-05, + "loss": 2.1884, + "step": 92380 + }, + { + "epoch": 12.48, + "learning_rate": 2.0485472610936827e-05, + "loss": 2.2401, + "step": 92390 + }, + { + "epoch": 12.48, + "learning_rate": 2.0464071915993745e-05, + "loss": 2.2122, + "step": 92400 + }, + { + "epoch": 12.48, + "learning_rate": 2.0442681586924726e-05, + "loss": 2.2141, + "step": 92410 + }, + { + "epoch": 12.48, + "learning_rate": 2.0421301625441545e-05, + "loss": 2.2109, + "step": 92420 + }, + { + "epoch": 12.48, + "learning_rate": 2.0399932033255085e-05, + "loss": 2.2521, + "step": 92430 + }, + { + "epoch": 12.48, + "learning_rate": 2.0378572812075344e-05, + "loss": 2.238, + "step": 92440 + }, + { + "epoch": 12.49, + "learning_rate": 2.0357223963611642e-05, + "loss": 2.2535, + "step": 92450 + }, + { + "epoch": 12.49, + "learning_rate": 2.033588548957234e-05, + "loss": 2.2248, + "step": 92460 + }, + { + "epoch": 12.49, + "learning_rate": 2.0314557391665025e-05, + "loss": 2.2302, + "step": 92470 + }, + { + "epoch": 12.49, + "learning_rate": 2.0293239671596385e-05, + "loss": 2.219, + "step": 92480 + }, + { + "epoch": 12.49, + "learning_rate": 2.0271932331072367e-05, + "loss": 2.2474, + "step": 92490 + }, + { + "epoch": 12.49, + "learning_rate": 2.0250635371798124e-05, + "loss": 2.2179, + "step": 92500 + }, + { + "epoch": 12.49, + "learning_rate": 2.022934879547778e-05, + "loss": 2.2209, + "step": 92510 + }, + { + "epoch": 12.5, + "learning_rate": 2.020807260381485e-05, + "loss": 2.1919, + "step": 92520 + }, + { + "epoch": 12.5, + "learning_rate": 2.0186806798511845e-05, + "loss": 2.2113, + "step": 92530 + }, + { + "epoch": 12.5, + "learning_rate": 2.0165551381270585e-05, + "loss": 2.2179, + "step": 92540 + }, + { + "epoch": 12.5, + "learning_rate": 2.014430635379199e-05, + "loss": 2.2343, + "step": 92550 + }, + { + "epoch": 12.5, + "learning_rate": 2.01230717177761e-05, + "loss": 2.2336, + "step": 92560 + }, + { + "epoch": 12.5, + "learning_rate": 2.0101847474922246e-05, + "loss": 2.2326, + "step": 92570 + }, + { + "epoch": 12.5, + "learning_rate": 2.0080633626928826e-05, + "loss": 2.2414, + "step": 92580 + }, + { + "epoch": 12.5, + "learning_rate": 2.0059430175493407e-05, + "loss": 2.2063, + "step": 92590 + }, + { + "epoch": 12.51, + "learning_rate": 2.0038237122312827e-05, + "loss": 2.2023, + "step": 92600 + }, + { + "epoch": 12.51, + "learning_rate": 2.001705446908295e-05, + "loss": 2.2186, + "step": 92610 + }, + { + "epoch": 12.51, + "learning_rate": 1.999588221749895e-05, + "loss": 2.2591, + "step": 92620 + }, + { + "epoch": 12.51, + "learning_rate": 1.9974720369255054e-05, + "loss": 2.2078, + "step": 92630 + }, + { + "epoch": 12.51, + "learning_rate": 1.9953568926044662e-05, + "loss": 2.248, + "step": 92640 + }, + { + "epoch": 12.51, + "learning_rate": 1.9932427889560465e-05, + "loss": 2.2084, + "step": 92650 + }, + { + "epoch": 12.51, + "learning_rate": 1.9911297261494214e-05, + "loss": 2.2405, + "step": 92660 + }, + { + "epoch": 12.52, + "learning_rate": 1.9890177043536804e-05, + "loss": 2.2453, + "step": 92670 + }, + { + "epoch": 12.52, + "learning_rate": 1.9869067237378333e-05, + "loss": 2.2229, + "step": 92680 + }, + { + "epoch": 12.52, + "learning_rate": 1.984796784470812e-05, + "loss": 2.2225, + "step": 92690 + }, + { + "epoch": 12.52, + "learning_rate": 1.9826878867214657e-05, + "loss": 2.2131, + "step": 92700 + }, + { + "epoch": 12.52, + "learning_rate": 1.9805800306585418e-05, + "loss": 2.2373, + "step": 92710 + }, + { + "epoch": 12.52, + "learning_rate": 1.9784732164507268e-05, + "loss": 2.2226, + "step": 92720 + }, + { + "epoch": 12.52, + "learning_rate": 1.9763674442666104e-05, + "loss": 2.2329, + "step": 92730 + }, + { + "epoch": 12.53, + "learning_rate": 1.9742627142747068e-05, + "loss": 2.1925, + "step": 92740 + }, + { + "epoch": 12.53, + "learning_rate": 1.9721590266434427e-05, + "loss": 2.2206, + "step": 92750 + }, + { + "epoch": 12.53, + "learning_rate": 1.970056381541157e-05, + "loss": 2.2343, + "step": 92760 + }, + { + "epoch": 12.53, + "learning_rate": 1.967954779136116e-05, + "loss": 2.2207, + "step": 92770 + }, + { + "epoch": 12.53, + "learning_rate": 1.9658542195964945e-05, + "loss": 2.248, + "step": 92780 + }, + { + "epoch": 12.53, + "learning_rate": 1.9637547030903822e-05, + "loss": 2.2269, + "step": 92790 + }, + { + "epoch": 12.53, + "learning_rate": 1.961656229785795e-05, + "loss": 2.1977, + "step": 92800 + }, + { + "epoch": 12.53, + "learning_rate": 1.9595587998506542e-05, + "loss": 2.2045, + "step": 92810 + }, + { + "epoch": 12.54, + "learning_rate": 1.9574624134528076e-05, + "loss": 2.2152, + "step": 92820 + }, + { + "epoch": 12.54, + "learning_rate": 1.9553670707600123e-05, + "loss": 2.2466, + "step": 92830 + }, + { + "epoch": 12.54, + "learning_rate": 1.9532727719399415e-05, + "loss": 2.2343, + "step": 92840 + }, + { + "epoch": 12.54, + "learning_rate": 1.9511795171601934e-05, + "loss": 2.235, + "step": 92850 + }, + { + "epoch": 12.54, + "learning_rate": 1.9490873065882724e-05, + "loss": 2.2112, + "step": 92860 + }, + { + "epoch": 12.54, + "learning_rate": 1.9469961403916072e-05, + "loss": 2.2192, + "step": 92870 + }, + { + "epoch": 12.54, + "learning_rate": 1.944906018737533e-05, + "loss": 2.2214, + "step": 92880 + }, + { + "epoch": 12.55, + "learning_rate": 1.9428169417933127e-05, + "loss": 2.2339, + "step": 92890 + }, + { + "epoch": 12.55, + "learning_rate": 1.940728909726127e-05, + "loss": 2.2471, + "step": 92900 + }, + { + "epoch": 12.55, + "learning_rate": 1.9386419227030542e-05, + "loss": 2.2155, + "step": 92910 + }, + { + "epoch": 12.55, + "learning_rate": 1.936555980891111e-05, + "loss": 2.2397, + "step": 92920 + }, + { + "epoch": 12.55, + "learning_rate": 1.934471084457215e-05, + "loss": 2.231, + "step": 92930 + }, + { + "epoch": 12.55, + "learning_rate": 1.932387233568211e-05, + "loss": 2.2353, + "step": 92940 + }, + { + "epoch": 12.55, + "learning_rate": 1.930304428390855e-05, + "loss": 2.2124, + "step": 92950 + }, + { + "epoch": 12.55, + "learning_rate": 1.9282226690918122e-05, + "loss": 2.2021, + "step": 92960 + }, + { + "epoch": 12.56, + "learning_rate": 1.926141955837682e-05, + "loss": 2.2739, + "step": 92970 + }, + { + "epoch": 12.56, + "learning_rate": 1.9240622887949663e-05, + "loss": 2.219, + "step": 92980 + }, + { + "epoch": 12.56, + "learning_rate": 1.9219836681300797e-05, + "loss": 2.2419, + "step": 92990 + }, + { + "epoch": 12.56, + "learning_rate": 1.9199060940093714e-05, + "loss": 2.237, + "step": 93000 + }, + { + "epoch": 12.56, + "eval_loss": 2.406857490539551, + "eval_runtime": 1269.1794, + "eval_samples_per_second": 59.988, + "eval_steps_per_second": 4.999, + "step": 93000 + }, + { + "epoch": 12.56, + "learning_rate": 1.9178295665990844e-05, + "loss": 2.2253, + "step": 93010 + }, + { + "epoch": 12.56, + "learning_rate": 1.9157540860654013e-05, + "loss": 2.2572, + "step": 93020 + }, + { + "epoch": 12.56, + "learning_rate": 1.9136796525743957e-05, + "loss": 2.21, + "step": 93030 + }, + { + "epoch": 12.57, + "learning_rate": 1.911606266292075e-05, + "loss": 2.2103, + "step": 93040 + }, + { + "epoch": 12.57, + "learning_rate": 1.909533927384363e-05, + "loss": 2.2428, + "step": 93050 + }, + { + "epoch": 12.57, + "learning_rate": 1.9074626360170918e-05, + "loss": 2.2502, + "step": 93060 + }, + { + "epoch": 12.57, + "learning_rate": 1.905392392356011e-05, + "loss": 2.208, + "step": 93070 + }, + { + "epoch": 12.57, + "learning_rate": 1.903323196566786e-05, + "loss": 2.2661, + "step": 93080 + }, + { + "epoch": 12.57, + "learning_rate": 1.901255048815003e-05, + "loss": 2.2563, + "step": 93090 + }, + { + "epoch": 12.57, + "learning_rate": 1.8991879492661694e-05, + "loss": 2.2292, + "step": 93100 + }, + { + "epoch": 12.58, + "learning_rate": 1.8971218980856863e-05, + "loss": 2.1974, + "step": 93110 + }, + { + "epoch": 12.58, + "learning_rate": 1.895056895438898e-05, + "loss": 2.2258, + "step": 93120 + }, + { + "epoch": 12.58, + "learning_rate": 1.892992941491042e-05, + "loss": 2.2105, + "step": 93130 + }, + { + "epoch": 12.58, + "learning_rate": 1.8909300364072937e-05, + "loss": 2.2175, + "step": 93140 + }, + { + "epoch": 12.58, + "learning_rate": 1.8888681803527257e-05, + "loss": 2.2368, + "step": 93150 + }, + { + "epoch": 12.58, + "learning_rate": 1.886807373492334e-05, + "loss": 2.2172, + "step": 93160 + }, + { + "epoch": 12.58, + "learning_rate": 1.884747615991035e-05, + "loss": 2.2145, + "step": 93170 + }, + { + "epoch": 12.58, + "learning_rate": 1.8826889080136558e-05, + "loss": 2.2076, + "step": 93180 + }, + { + "epoch": 12.59, + "learning_rate": 1.8806312497249376e-05, + "loss": 2.2249, + "step": 93190 + }, + { + "epoch": 12.59, + "learning_rate": 1.8785746412895454e-05, + "loss": 2.2347, + "step": 93200 + }, + { + "epoch": 12.59, + "learning_rate": 1.8765190828720505e-05, + "loss": 2.23, + "step": 93210 + }, + { + "epoch": 12.59, + "learning_rate": 1.8744645746369536e-05, + "loss": 2.2404, + "step": 93220 + }, + { + "epoch": 12.59, + "learning_rate": 1.872411116748651e-05, + "loss": 2.2183, + "step": 93230 + }, + { + "epoch": 12.59, + "learning_rate": 1.870358709371472e-05, + "loss": 2.2372, + "step": 93240 + }, + { + "epoch": 12.59, + "learning_rate": 1.8683073526696623e-05, + "loss": 2.2558, + "step": 93250 + }, + { + "epoch": 12.6, + "learning_rate": 1.8662570468073735e-05, + "loss": 2.2441, + "step": 93260 + }, + { + "epoch": 12.6, + "learning_rate": 1.8642077919486752e-05, + "loss": 2.23, + "step": 93270 + }, + { + "epoch": 12.6, + "learning_rate": 1.862159588257557e-05, + "loss": 2.2571, + "step": 93280 + }, + { + "epoch": 12.6, + "learning_rate": 1.860112435897922e-05, + "loss": 2.2288, + "step": 93290 + }, + { + "epoch": 12.6, + "learning_rate": 1.8580663350335977e-05, + "loss": 2.2151, + "step": 93300 + }, + { + "epoch": 12.6, + "learning_rate": 1.856021285828305e-05, + "loss": 2.2203, + "step": 93310 + }, + { + "epoch": 12.6, + "learning_rate": 1.8539772884457084e-05, + "loss": 2.2385, + "step": 93320 + }, + { + "epoch": 12.6, + "learning_rate": 1.8519343430493645e-05, + "loss": 2.2213, + "step": 93330 + }, + { + "epoch": 12.61, + "learning_rate": 1.849892449802766e-05, + "loss": 2.2235, + "step": 93340 + }, + { + "epoch": 12.61, + "learning_rate": 1.8478516088693064e-05, + "loss": 2.2109, + "step": 93350 + }, + { + "epoch": 12.61, + "learning_rate": 1.8458118204122975e-05, + "loss": 2.2397, + "step": 93360 + }, + { + "epoch": 12.61, + "learning_rate": 1.8437730845949755e-05, + "loss": 2.2284, + "step": 93370 + }, + { + "epoch": 12.61, + "learning_rate": 1.8417354015804854e-05, + "loss": 2.2067, + "step": 93380 + }, + { + "epoch": 12.61, + "learning_rate": 1.8396987715318827e-05, + "loss": 2.2162, + "step": 93390 + }, + { + "epoch": 12.61, + "learning_rate": 1.8376631946121546e-05, + "loss": 2.2135, + "step": 93400 + }, + { + "epoch": 12.62, + "learning_rate": 1.835628670984185e-05, + "loss": 2.2441, + "step": 93410 + }, + { + "epoch": 12.62, + "learning_rate": 1.8335952008107956e-05, + "loss": 2.2199, + "step": 93420 + }, + { + "epoch": 12.62, + "learning_rate": 1.8315627842546947e-05, + "loss": 2.1905, + "step": 93430 + }, + { + "epoch": 12.62, + "learning_rate": 1.829531421478532e-05, + "loss": 2.2261, + "step": 93440 + }, + { + "epoch": 12.62, + "learning_rate": 1.82770409609619e-05, + "loss": 2.2382, + "step": 93450 + }, + { + "epoch": 12.62, + "learning_rate": 1.8256747359496832e-05, + "loss": 2.2406, + "step": 93460 + }, + { + "epoch": 12.62, + "learning_rate": 1.8236464300542925e-05, + "loss": 2.2395, + "step": 93470 + }, + { + "epoch": 12.63, + "learning_rate": 1.8216191785723316e-05, + "loss": 2.2318, + "step": 93480 + }, + { + "epoch": 12.63, + "learning_rate": 1.8195929816660276e-05, + "loss": 2.2226, + "step": 93490 + }, + { + "epoch": 12.63, + "learning_rate": 1.817567839497519e-05, + "loss": 2.2375, + "step": 93500 + }, + { + "epoch": 12.63, + "learning_rate": 1.8155437522288694e-05, + "loss": 2.1982, + "step": 93510 + }, + { + "epoch": 12.63, + "learning_rate": 1.813520720022048e-05, + "loss": 2.2262, + "step": 93520 + }, + { + "epoch": 12.63, + "learning_rate": 1.81149874303895e-05, + "loss": 2.2067, + "step": 93530 + }, + { + "epoch": 12.63, + "learning_rate": 1.8094778214413775e-05, + "loss": 2.2605, + "step": 93540 + }, + { + "epoch": 12.63, + "learning_rate": 1.8074579553910478e-05, + "loss": 2.2324, + "step": 93550 + }, + { + "epoch": 12.64, + "learning_rate": 1.805439145049602e-05, + "loss": 2.2287, + "step": 93560 + }, + { + "epoch": 12.64, + "learning_rate": 1.8034213905785903e-05, + "loss": 2.2306, + "step": 93570 + }, + { + "epoch": 12.64, + "learning_rate": 1.801404692139476e-05, + "loss": 2.235, + "step": 93580 + }, + { + "epoch": 12.64, + "learning_rate": 1.7993890498936474e-05, + "loss": 2.2293, + "step": 93590 + }, + { + "epoch": 12.64, + "learning_rate": 1.797374464002398e-05, + "loss": 2.2242, + "step": 93600 + }, + { + "epoch": 12.64, + "learning_rate": 1.7953609346269483e-05, + "loss": 2.2394, + "step": 93610 + }, + { + "epoch": 12.64, + "learning_rate": 1.793348461928416e-05, + "loss": 2.2355, + "step": 93620 + }, + { + "epoch": 12.65, + "learning_rate": 1.7913370460678545e-05, + "loss": 2.2109, + "step": 93630 + }, + { + "epoch": 12.65, + "learning_rate": 1.789326687206218e-05, + "loss": 2.2391, + "step": 93640 + }, + { + "epoch": 12.65, + "learning_rate": 1.7873173855043876e-05, + "loss": 2.2023, + "step": 93650 + }, + { + "epoch": 12.65, + "learning_rate": 1.7853091411231503e-05, + "loss": 2.2184, + "step": 93660 + }, + { + "epoch": 12.65, + "learning_rate": 1.78330195422321e-05, + "loss": 2.2166, + "step": 93670 + }, + { + "epoch": 12.65, + "learning_rate": 1.781295824965196e-05, + "loss": 2.2084, + "step": 93680 + }, + { + "epoch": 12.65, + "learning_rate": 1.779290753509639e-05, + "loss": 2.2446, + "step": 93690 + }, + { + "epoch": 12.65, + "learning_rate": 1.7772867400169878e-05, + "loss": 2.2098, + "step": 93700 + }, + { + "epoch": 12.66, + "learning_rate": 1.7752837846476187e-05, + "loss": 2.2003, + "step": 93710 + }, + { + "epoch": 12.66, + "learning_rate": 1.773281887561807e-05, + "loss": 2.2262, + "step": 93720 + }, + { + "epoch": 12.66, + "learning_rate": 1.7712810489197565e-05, + "loss": 2.2422, + "step": 93730 + }, + { + "epoch": 12.66, + "learning_rate": 1.769281268881579e-05, + "loss": 2.2122, + "step": 93740 + }, + { + "epoch": 12.66, + "learning_rate": 1.7672825476072977e-05, + "loss": 2.2549, + "step": 93750 + }, + { + "epoch": 12.66, + "learning_rate": 1.7652848852568653e-05, + "loss": 2.221, + "step": 93760 + }, + { + "epoch": 12.66, + "learning_rate": 1.7632882819901367e-05, + "loss": 2.1962, + "step": 93770 + }, + { + "epoch": 12.67, + "learning_rate": 1.761292737966883e-05, + "loss": 2.2582, + "step": 93780 + }, + { + "epoch": 12.67, + "learning_rate": 1.7592982533468008e-05, + "loss": 2.2077, + "step": 93790 + }, + { + "epoch": 12.67, + "learning_rate": 1.7573048282894897e-05, + "loss": 2.2301, + "step": 93800 + }, + { + "epoch": 12.67, + "learning_rate": 1.7553124629544785e-05, + "loss": 2.201, + "step": 93810 + }, + { + "epoch": 12.67, + "learning_rate": 1.753321157501188e-05, + "loss": 2.2377, + "step": 93820 + }, + { + "epoch": 12.67, + "learning_rate": 1.7513309120889808e-05, + "loss": 2.226, + "step": 93830 + }, + { + "epoch": 12.67, + "learning_rate": 1.7493417268771158e-05, + "loss": 2.2366, + "step": 93840 + }, + { + "epoch": 12.68, + "learning_rate": 1.7473536020247797e-05, + "loss": 2.2078, + "step": 93850 + }, + { + "epoch": 12.68, + "learning_rate": 1.7453665376910664e-05, + "loss": 2.2469, + "step": 93860 + }, + { + "epoch": 12.68, + "learning_rate": 1.743380534034982e-05, + "loss": 2.2306, + "step": 93870 + }, + { + "epoch": 12.68, + "learning_rate": 1.74139559121546e-05, + "loss": 2.2248, + "step": 93880 + }, + { + "epoch": 12.68, + "learning_rate": 1.739411709391339e-05, + "loss": 2.2633, + "step": 93890 + }, + { + "epoch": 12.68, + "learning_rate": 1.7374288887213728e-05, + "loss": 2.2426, + "step": 93900 + }, + { + "epoch": 12.68, + "learning_rate": 1.7354471293642374e-05, + "loss": 2.2229, + "step": 93910 + }, + { + "epoch": 12.68, + "learning_rate": 1.7334664314785145e-05, + "loss": 2.2352, + "step": 93920 + }, + { + "epoch": 12.69, + "learning_rate": 1.7314867952227123e-05, + "loss": 2.2329, + "step": 93930 + }, + { + "epoch": 12.69, + "learning_rate": 1.729508220755244e-05, + "loss": 2.2049, + "step": 93940 + }, + { + "epoch": 12.69, + "learning_rate": 1.7275307082344396e-05, + "loss": 2.2222, + "step": 93950 + }, + { + "epoch": 12.69, + "learning_rate": 1.7255542578185495e-05, + "loss": 2.2275, + "step": 93960 + }, + { + "epoch": 12.69, + "learning_rate": 1.7235788696657334e-05, + "loss": 2.2433, + "step": 93970 + }, + { + "epoch": 12.69, + "learning_rate": 1.7216045439340664e-05, + "loss": 2.229, + "step": 93980 + }, + { + "epoch": 12.69, + "learning_rate": 1.7196312807815443e-05, + "loss": 2.2099, + "step": 93990 + }, + { + "epoch": 12.7, + "learning_rate": 1.7176590803660707e-05, + "loss": 2.2432, + "step": 94000 + }, + { + "epoch": 12.7, + "eval_loss": 2.4050042629241943, + "eval_runtime": 1269.1883, + "eval_samples_per_second": 59.987, + "eval_steps_per_second": 4.999, + "step": 94000 + }, + { + "epoch": 12.7, + "learning_rate": 1.7156879428454724e-05, + "loss": 2.2023, + "step": 94010 + }, + { + "epoch": 12.7, + "learning_rate": 1.7137178683774766e-05, + "loss": 2.2124, + "step": 94020 + }, + { + "epoch": 12.7, + "learning_rate": 1.711748857119744e-05, + "loss": 2.2149, + "step": 94030 + }, + { + "epoch": 12.7, + "learning_rate": 1.7097809092298336e-05, + "loss": 2.2453, + "step": 94040 + }, + { + "epoch": 12.7, + "learning_rate": 1.7078140248652344e-05, + "loss": 2.2021, + "step": 94050 + }, + { + "epoch": 12.7, + "learning_rate": 1.705848204183339e-05, + "loss": 2.2236, + "step": 94060 + }, + { + "epoch": 12.7, + "learning_rate": 1.7038834473414558e-05, + "loss": 2.21, + "step": 94070 + }, + { + "epoch": 12.71, + "learning_rate": 1.7019197544968167e-05, + "loss": 2.2574, + "step": 94080 + }, + { + "epoch": 12.71, + "learning_rate": 1.6999571258065585e-05, + "loss": 2.2274, + "step": 94090 + }, + { + "epoch": 12.71, + "learning_rate": 1.6979955614277347e-05, + "loss": 2.2381, + "step": 94100 + }, + { + "epoch": 12.71, + "learning_rate": 1.696035061517324e-05, + "loss": 2.2442, + "step": 94110 + }, + { + "epoch": 12.71, + "learning_rate": 1.694075626232203e-05, + "loss": 2.2079, + "step": 94120 + }, + { + "epoch": 12.71, + "learning_rate": 1.692117255729178e-05, + "loss": 2.2392, + "step": 94130 + }, + { + "epoch": 12.71, + "learning_rate": 1.6901599501649633e-05, + "loss": 2.2378, + "step": 94140 + }, + { + "epoch": 12.72, + "learning_rate": 1.6882037096961837e-05, + "loss": 2.2142, + "step": 94150 + }, + { + "epoch": 12.72, + "learning_rate": 1.686248534479389e-05, + "loss": 2.2211, + "step": 94160 + }, + { + "epoch": 12.72, + "learning_rate": 1.684294424671038e-05, + "loss": 2.2312, + "step": 94170 + }, + { + "epoch": 12.72, + "learning_rate": 1.682341380427501e-05, + "loss": 2.1923, + "step": 94180 + }, + { + "epoch": 12.72, + "learning_rate": 1.6803894019050707e-05, + "loss": 2.2344, + "step": 94190 + }, + { + "epoch": 12.72, + "learning_rate": 1.6784384892599485e-05, + "loss": 2.2057, + "step": 94200 + }, + { + "epoch": 12.72, + "learning_rate": 1.6764886426482577e-05, + "loss": 2.2196, + "step": 94210 + }, + { + "epoch": 12.73, + "learning_rate": 1.6745398622260204e-05, + "loss": 2.2263, + "step": 94220 + }, + { + "epoch": 12.73, + "learning_rate": 1.6725921481491934e-05, + "loss": 2.2255, + "step": 94230 + }, + { + "epoch": 12.73, + "learning_rate": 1.6706455005736334e-05, + "loss": 2.2155, + "step": 94240 + }, + { + "epoch": 12.73, + "learning_rate": 1.6686999196551233e-05, + "loss": 2.2351, + "step": 94250 + }, + { + "epoch": 12.73, + "learning_rate": 1.666755405549351e-05, + "loss": 2.2147, + "step": 94260 + }, + { + "epoch": 12.73, + "learning_rate": 1.6648119584119192e-05, + "loss": 2.2225, + "step": 94270 + }, + { + "epoch": 12.73, + "learning_rate": 1.6628695783983564e-05, + "loss": 2.2286, + "step": 94280 + }, + { + "epoch": 12.73, + "learning_rate": 1.6609282656640938e-05, + "loss": 2.1909, + "step": 94290 + }, + { + "epoch": 12.74, + "learning_rate": 1.6589880203644783e-05, + "loss": 2.2199, + "step": 94300 + }, + { + "epoch": 12.74, + "learning_rate": 1.6570488426547795e-05, + "loss": 2.2403, + "step": 94310 + }, + { + "epoch": 12.74, + "learning_rate": 1.655110732690173e-05, + "loss": 2.2352, + "step": 94320 + }, + { + "epoch": 12.74, + "learning_rate": 1.653173690625758e-05, + "loss": 2.2309, + "step": 94330 + }, + { + "epoch": 12.74, + "learning_rate": 1.6512377166165376e-05, + "loss": 2.2153, + "step": 94340 + }, + { + "epoch": 12.74, + "learning_rate": 1.6493028108174323e-05, + "loss": 2.2505, + "step": 94350 + }, + { + "epoch": 12.74, + "learning_rate": 1.6473689733832867e-05, + "loss": 2.2075, + "step": 94360 + }, + { + "epoch": 12.75, + "learning_rate": 1.645436204468849e-05, + "loss": 2.2326, + "step": 94370 + }, + { + "epoch": 12.75, + "learning_rate": 1.6435045042287836e-05, + "loss": 2.235, + "step": 94380 + }, + { + "epoch": 12.75, + "learning_rate": 1.6415738728176735e-05, + "loss": 2.2348, + "step": 94390 + }, + { + "epoch": 12.75, + "learning_rate": 1.639644310390012e-05, + "loss": 2.236, + "step": 94400 + }, + { + "epoch": 12.75, + "learning_rate": 1.6377158171002154e-05, + "loss": 2.2238, + "step": 94410 + }, + { + "epoch": 12.75, + "learning_rate": 1.6357883931025966e-05, + "loss": 2.2461, + "step": 94420 + }, + { + "epoch": 12.75, + "learning_rate": 1.6338620385514047e-05, + "loss": 2.2448, + "step": 94430 + }, + { + "epoch": 12.75, + "learning_rate": 1.6319367536007838e-05, + "loss": 2.2146, + "step": 94440 + }, + { + "epoch": 12.76, + "learning_rate": 1.630012538404808e-05, + "loss": 2.2232, + "step": 94450 + }, + { + "epoch": 12.76, + "learning_rate": 1.628089393117457e-05, + "loss": 2.1952, + "step": 94460 + }, + { + "epoch": 12.76, + "learning_rate": 1.626167317892625e-05, + "loss": 2.2556, + "step": 94470 + }, + { + "epoch": 12.76, + "learning_rate": 1.6242463128841248e-05, + "loss": 2.2131, + "step": 94480 + }, + { + "epoch": 12.76, + "learning_rate": 1.6223263782456825e-05, + "loss": 2.2171, + "step": 94490 + }, + { + "epoch": 12.76, + "learning_rate": 1.6204075141309315e-05, + "loss": 2.2195, + "step": 94500 + }, + { + "epoch": 12.76, + "learning_rate": 1.6184897206934323e-05, + "loss": 2.2304, + "step": 94510 + }, + { + "epoch": 12.77, + "learning_rate": 1.616572998086647e-05, + "loss": 2.2207, + "step": 94520 + }, + { + "epoch": 12.77, + "learning_rate": 1.614657346463965e-05, + "loss": 2.2163, + "step": 94530 + }, + { + "epoch": 12.77, + "learning_rate": 1.6127427659786768e-05, + "loss": 2.2206, + "step": 94540 + }, + { + "epoch": 12.77, + "learning_rate": 1.610829256783993e-05, + "loss": 2.2491, + "step": 94550 + }, + { + "epoch": 12.77, + "learning_rate": 1.6089168190330432e-05, + "loss": 2.2124, + "step": 94560 + }, + { + "epoch": 12.77, + "learning_rate": 1.607005452878863e-05, + "loss": 2.2511, + "step": 94570 + }, + { + "epoch": 12.77, + "learning_rate": 1.6050951584744047e-05, + "loss": 2.2353, + "step": 94580 + }, + { + "epoch": 12.78, + "learning_rate": 1.6031859359725402e-05, + "loss": 2.2038, + "step": 94590 + }, + { + "epoch": 12.78, + "learning_rate": 1.6012777855260484e-05, + "loss": 2.2213, + "step": 94600 + }, + { + "epoch": 12.78, + "learning_rate": 1.599370707287632e-05, + "loss": 2.2117, + "step": 94610 + }, + { + "epoch": 12.78, + "learning_rate": 1.5974647014098897e-05, + "loss": 2.2269, + "step": 94620 + }, + { + "epoch": 12.78, + "learning_rate": 1.5955597680453552e-05, + "loss": 2.216, + "step": 94630 + }, + { + "epoch": 12.78, + "learning_rate": 1.593655907346462e-05, + "loss": 2.2351, + "step": 94640 + }, + { + "epoch": 12.78, + "learning_rate": 1.5917531194655703e-05, + "loss": 2.2221, + "step": 94650 + }, + { + "epoch": 12.78, + "learning_rate": 1.589851404554941e-05, + "loss": 2.1902, + "step": 94660 + }, + { + "epoch": 12.79, + "learning_rate": 1.5879507627667543e-05, + "loss": 2.2454, + "step": 94670 + }, + { + "epoch": 12.79, + "learning_rate": 1.5860511942531116e-05, + "loss": 2.2018, + "step": 94680 + }, + { + "epoch": 12.79, + "learning_rate": 1.5841526991660186e-05, + "loss": 2.2391, + "step": 94690 + }, + { + "epoch": 12.79, + "learning_rate": 1.5822552776573967e-05, + "loss": 2.1972, + "step": 94700 + }, + { + "epoch": 12.79, + "learning_rate": 1.5803589298790874e-05, + "loss": 2.2223, + "step": 94710 + }, + { + "epoch": 12.79, + "learning_rate": 1.57846365598284e-05, + "loss": 2.2209, + "step": 94720 + }, + { + "epoch": 12.79, + "learning_rate": 1.5765694561203245e-05, + "loss": 2.2384, + "step": 94730 + }, + { + "epoch": 12.8, + "learning_rate": 1.5746763304431165e-05, + "loss": 2.1929, + "step": 94740 + }, + { + "epoch": 12.8, + "learning_rate": 1.5727842791027095e-05, + "loss": 2.2589, + "step": 94750 + }, + { + "epoch": 12.8, + "learning_rate": 1.5708933022505142e-05, + "loss": 2.2285, + "step": 94760 + }, + { + "epoch": 12.8, + "learning_rate": 1.5690034000378532e-05, + "loss": 2.238, + "step": 94770 + }, + { + "epoch": 12.8, + "learning_rate": 1.56711457261596e-05, + "loss": 2.2629, + "step": 94780 + }, + { + "epoch": 12.8, + "learning_rate": 1.5652268201359825e-05, + "loss": 2.2511, + "step": 94790 + }, + { + "epoch": 12.8, + "learning_rate": 1.5633401427489862e-05, + "loss": 2.2315, + "step": 94800 + }, + { + "epoch": 12.8, + "learning_rate": 1.561454540605957e-05, + "loss": 2.2393, + "step": 94810 + }, + { + "epoch": 12.81, + "learning_rate": 1.5595700138577728e-05, + "loss": 2.2301, + "step": 94820 + }, + { + "epoch": 12.81, + "learning_rate": 1.557686562655249e-05, + "loss": 2.2271, + "step": 94830 + }, + { + "epoch": 12.81, + "learning_rate": 1.555804187149101e-05, + "loss": 2.2408, + "step": 94840 + }, + { + "epoch": 12.81, + "learning_rate": 1.553922887489968e-05, + "loss": 2.2371, + "step": 94850 + }, + { + "epoch": 12.81, + "learning_rate": 1.5520426638283923e-05, + "loss": 2.2448, + "step": 94860 + }, + { + "epoch": 12.81, + "learning_rate": 1.550163516314834e-05, + "loss": 2.219, + "step": 94870 + }, + { + "epoch": 12.81, + "learning_rate": 1.5482854450996728e-05, + "loss": 2.2602, + "step": 94880 + }, + { + "epoch": 12.82, + "learning_rate": 1.546408450333197e-05, + "loss": 2.2157, + "step": 94890 + }, + { + "epoch": 12.82, + "learning_rate": 1.5445325321656077e-05, + "loss": 2.2229, + "step": 94900 + }, + { + "epoch": 12.82, + "learning_rate": 1.5426576907470235e-05, + "loss": 2.2313, + "step": 94910 + }, + { + "epoch": 12.82, + "learning_rate": 1.5407839262274725e-05, + "loss": 2.2283, + "step": 94920 + }, + { + "epoch": 12.82, + "learning_rate": 1.5389112387569047e-05, + "loss": 2.2115, + "step": 94930 + }, + { + "epoch": 12.82, + "learning_rate": 1.537039628485174e-05, + "loss": 2.2187, + "step": 94940 + }, + { + "epoch": 12.82, + "learning_rate": 1.5351690955620515e-05, + "loss": 2.2225, + "step": 94950 + }, + { + "epoch": 12.83, + "learning_rate": 1.5332996401372284e-05, + "loss": 2.2266, + "step": 94960 + }, + { + "epoch": 12.83, + "learning_rate": 1.531431262360301e-05, + "loss": 2.2102, + "step": 94970 + }, + { + "epoch": 12.83, + "learning_rate": 1.5295639623807832e-05, + "loss": 2.2361, + "step": 94980 + }, + { + "epoch": 12.83, + "learning_rate": 1.5276977403481007e-05, + "loss": 2.2307, + "step": 94990 + }, + { + "epoch": 12.83, + "learning_rate": 1.5258325964115936e-05, + "loss": 2.2214, + "step": 95000 + }, + { + "epoch": 12.83, + "eval_loss": 2.4032440185546875, + "eval_runtime": 1275.175, + "eval_samples_per_second": 59.706, + "eval_steps_per_second": 4.976, + "step": 95000 + }, + { + "epoch": 12.83, + "learning_rate": 1.5239685307205263e-05, + "loss": 2.2442, + "step": 95010 + }, + { + "epoch": 12.83, + "learning_rate": 1.5221055434240525e-05, + "loss": 2.2017, + "step": 95020 + }, + { + "epoch": 12.83, + "learning_rate": 1.5202436346712665e-05, + "loss": 2.2418, + "step": 95030 + }, + { + "epoch": 12.84, + "learning_rate": 1.5183828046111557e-05, + "loss": 2.2177, + "step": 95040 + }, + { + "epoch": 12.84, + "learning_rate": 1.516523053392636e-05, + "loss": 2.2339, + "step": 95050 + }, + { + "epoch": 12.84, + "learning_rate": 1.5146643811645282e-05, + "loss": 2.2128, + "step": 95060 + }, + { + "epoch": 12.84, + "learning_rate": 1.5128067880755651e-05, + "loss": 2.2292, + "step": 95070 + }, + { + "epoch": 12.84, + "learning_rate": 1.5109502742744046e-05, + "loss": 2.2461, + "step": 95080 + }, + { + "epoch": 12.84, + "learning_rate": 1.509094839909606e-05, + "loss": 2.2163, + "step": 95090 + }, + { + "epoch": 12.84, + "learning_rate": 1.5072404851296455e-05, + "loss": 2.2202, + "step": 95100 + }, + { + "epoch": 12.85, + "learning_rate": 1.5053872100829211e-05, + "loss": 2.2311, + "step": 95110 + }, + { + "epoch": 12.85, + "learning_rate": 1.5035350149177295e-05, + "loss": 2.2363, + "step": 95120 + }, + { + "epoch": 12.85, + "learning_rate": 1.5016838997822949e-05, + "loss": 2.234, + "step": 95130 + }, + { + "epoch": 12.85, + "learning_rate": 1.4998338648247493e-05, + "loss": 2.2202, + "step": 95140 + }, + { + "epoch": 12.85, + "learning_rate": 1.4979849101931341e-05, + "loss": 2.2105, + "step": 95150 + }, + { + "epoch": 12.85, + "learning_rate": 1.4961370360354142e-05, + "loss": 2.2215, + "step": 95160 + }, + { + "epoch": 12.85, + "learning_rate": 1.494290242499458e-05, + "loss": 2.2078, + "step": 95170 + }, + { + "epoch": 12.85, + "learning_rate": 1.492444529733054e-05, + "loss": 2.2321, + "step": 95180 + }, + { + "epoch": 12.86, + "learning_rate": 1.4905998978838974e-05, + "loss": 2.2373, + "step": 95190 + }, + { + "epoch": 12.86, + "learning_rate": 1.4887563470996067e-05, + "loss": 2.2266, + "step": 95200 + }, + { + "epoch": 12.86, + "learning_rate": 1.4869138775277107e-05, + "loss": 2.2079, + "step": 95210 + }, + { + "epoch": 12.86, + "learning_rate": 1.4850724893156413e-05, + "loss": 2.2098, + "step": 95220 + }, + { + "epoch": 12.86, + "learning_rate": 1.4832321826107608e-05, + "loss": 2.2089, + "step": 95230 + }, + { + "epoch": 12.86, + "learning_rate": 1.4813929575603279e-05, + "loss": 2.208, + "step": 95240 + }, + { + "epoch": 12.86, + "learning_rate": 1.4795548143115299e-05, + "loss": 2.2312, + "step": 95250 + }, + { + "epoch": 12.87, + "learning_rate": 1.477717753011461e-05, + "loss": 2.2004, + "step": 95260 + }, + { + "epoch": 12.87, + "learning_rate": 1.4758817738071217e-05, + "loss": 2.2263, + "step": 95270 + }, + { + "epoch": 12.87, + "learning_rate": 1.474046876845441e-05, + "loss": 2.2272, + "step": 95280 + }, + { + "epoch": 12.87, + "learning_rate": 1.4722130622732503e-05, + "loss": 2.2327, + "step": 95290 + }, + { + "epoch": 12.87, + "learning_rate": 1.4703803302372918e-05, + "loss": 2.2065, + "step": 95300 + }, + { + "epoch": 12.87, + "learning_rate": 1.4685486808842366e-05, + "loss": 2.2016, + "step": 95310 + }, + { + "epoch": 12.87, + "learning_rate": 1.4667181143606493e-05, + "loss": 2.2217, + "step": 95320 + }, + { + "epoch": 12.88, + "learning_rate": 1.4648886308130242e-05, + "loss": 2.2177, + "step": 95330 + }, + { + "epoch": 12.88, + "learning_rate": 1.4630602303877609e-05, + "loss": 2.1962, + "step": 95340 + }, + { + "epoch": 12.88, + "learning_rate": 1.4612329132311706e-05, + "loss": 2.2205, + "step": 95350 + }, + { + "epoch": 12.88, + "learning_rate": 1.4594066794894866e-05, + "loss": 2.2505, + "step": 95360 + }, + { + "epoch": 12.88, + "learning_rate": 1.4575815293088466e-05, + "loss": 2.2108, + "step": 95370 + }, + { + "epoch": 12.88, + "learning_rate": 1.4557574628353042e-05, + "loss": 2.2425, + "step": 95380 + }, + { + "epoch": 12.88, + "learning_rate": 1.4539344802148257e-05, + "loss": 2.2357, + "step": 95390 + }, + { + "epoch": 12.88, + "learning_rate": 1.4521125815932927e-05, + "loss": 2.2045, + "step": 95400 + }, + { + "epoch": 12.89, + "learning_rate": 1.4502917671165071e-05, + "loss": 2.2355, + "step": 95410 + }, + { + "epoch": 12.89, + "learning_rate": 1.4484720369301656e-05, + "loss": 2.2541, + "step": 95420 + }, + { + "epoch": 12.89, + "learning_rate": 1.4466533911798933e-05, + "loss": 2.2417, + "step": 95430 + }, + { + "epoch": 12.89, + "learning_rate": 1.4448358300112222e-05, + "loss": 2.2523, + "step": 95440 + }, + { + "epoch": 12.89, + "learning_rate": 1.4430193535696027e-05, + "loss": 2.2342, + "step": 95450 + }, + { + "epoch": 12.89, + "learning_rate": 1.4412039620003935e-05, + "loss": 2.2277, + "step": 95460 + }, + { + "epoch": 12.89, + "learning_rate": 1.4393896554488649e-05, + "loss": 2.2605, + "step": 95470 + }, + { + "epoch": 12.9, + "learning_rate": 1.4375764340602076e-05, + "loss": 2.2173, + "step": 95480 + }, + { + "epoch": 12.9, + "learning_rate": 1.4357642979795204e-05, + "loss": 2.2135, + "step": 95490 + }, + { + "epoch": 12.9, + "learning_rate": 1.4339532473518122e-05, + "loss": 2.2177, + "step": 95500 + }, + { + "epoch": 12.9, + "learning_rate": 1.4321432823220158e-05, + "loss": 2.2349, + "step": 95510 + }, + { + "epoch": 12.9, + "learning_rate": 1.4303344030349617e-05, + "loss": 2.2337, + "step": 95520 + }, + { + "epoch": 12.9, + "learning_rate": 1.4285266096354125e-05, + "loss": 2.2493, + "step": 95530 + }, + { + "epoch": 12.9, + "learning_rate": 1.4267199022680276e-05, + "loss": 2.2083, + "step": 95540 + }, + { + "epoch": 12.9, + "learning_rate": 1.4249142810773829e-05, + "loss": 2.2264, + "step": 95550 + }, + { + "epoch": 12.91, + "learning_rate": 1.4231097462079744e-05, + "loss": 2.2402, + "step": 95560 + }, + { + "epoch": 12.91, + "learning_rate": 1.4214865937495167e-05, + "loss": 2.2057, + "step": 95570 + }, + { + "epoch": 12.91, + "learning_rate": 1.4196841232882161e-05, + "loss": 2.2152, + "step": 95580 + }, + { + "epoch": 12.91, + "learning_rate": 1.4178827395666853e-05, + "loss": 2.2399, + "step": 95590 + }, + { + "epoch": 12.91, + "learning_rate": 1.4160824427290773e-05, + "loss": 2.2006, + "step": 95600 + }, + { + "epoch": 12.91, + "learning_rate": 1.414283232919452e-05, + "loss": 2.2183, + "step": 95610 + }, + { + "epoch": 12.91, + "learning_rate": 1.4124851102817942e-05, + "loss": 2.2225, + "step": 95620 + }, + { + "epoch": 12.92, + "learning_rate": 1.4106880749599903e-05, + "loss": 2.2176, + "step": 95630 + }, + { + "epoch": 12.92, + "learning_rate": 1.4088921270978487e-05, + "loss": 2.2053, + "step": 95640 + }, + { + "epoch": 12.92, + "learning_rate": 1.4070972668390878e-05, + "loss": 2.2646, + "step": 95650 + }, + { + "epoch": 12.92, + "learning_rate": 1.405303494327331e-05, + "loss": 2.2232, + "step": 95660 + }, + { + "epoch": 12.92, + "learning_rate": 1.4035108097061288e-05, + "loss": 2.2142, + "step": 95670 + }, + { + "epoch": 12.92, + "learning_rate": 1.4017192131189342e-05, + "loss": 2.2419, + "step": 95680 + }, + { + "epoch": 12.92, + "learning_rate": 1.3999287047091162e-05, + "loss": 2.231, + "step": 95690 + }, + { + "epoch": 12.92, + "learning_rate": 1.3981392846199585e-05, + "loss": 2.2128, + "step": 95700 + }, + { + "epoch": 12.93, + "learning_rate": 1.3963509529946532e-05, + "loss": 2.2251, + "step": 95710 + }, + { + "epoch": 12.93, + "learning_rate": 1.3945637099763173e-05, + "loss": 2.1967, + "step": 95720 + }, + { + "epoch": 12.93, + "learning_rate": 1.3927775557079584e-05, + "loss": 2.2515, + "step": 95730 + }, + { + "epoch": 12.93, + "learning_rate": 1.3909924903325154e-05, + "loss": 2.2066, + "step": 95740 + }, + { + "epoch": 12.93, + "learning_rate": 1.3892085139928406e-05, + "loss": 2.2197, + "step": 95750 + }, + { + "epoch": 12.93, + "learning_rate": 1.387425626831688e-05, + "loss": 2.2296, + "step": 95760 + }, + { + "epoch": 12.93, + "learning_rate": 1.3856438289917305e-05, + "loss": 2.2124, + "step": 95770 + }, + { + "epoch": 12.94, + "learning_rate": 1.3838631206155503e-05, + "loss": 2.2324, + "step": 95780 + }, + { + "epoch": 12.94, + "learning_rate": 1.3820835018456522e-05, + "loss": 2.2035, + "step": 95790 + }, + { + "epoch": 12.94, + "learning_rate": 1.380304972824442e-05, + "loss": 2.2161, + "step": 95800 + }, + { + "epoch": 12.94, + "learning_rate": 1.3785275336942425e-05, + "loss": 2.2176, + "step": 95810 + }, + { + "epoch": 12.94, + "learning_rate": 1.376751184597295e-05, + "loss": 2.2354, + "step": 95820 + }, + { + "epoch": 12.94, + "learning_rate": 1.3749759256757426e-05, + "loss": 2.2225, + "step": 95830 + }, + { + "epoch": 12.94, + "learning_rate": 1.3732017570716514e-05, + "loss": 2.2259, + "step": 95840 + }, + { + "epoch": 12.95, + "learning_rate": 1.3714286789269946e-05, + "loss": 2.2125, + "step": 95850 + }, + { + "epoch": 12.95, + "learning_rate": 1.3696566913836571e-05, + "loss": 2.2176, + "step": 95860 + }, + { + "epoch": 12.95, + "learning_rate": 1.3678857945834454e-05, + "loss": 2.2234, + "step": 95870 + }, + { + "epoch": 12.95, + "learning_rate": 1.3661159886680662e-05, + "loss": 2.235, + "step": 95880 + }, + { + "epoch": 12.95, + "learning_rate": 1.3643472737791444e-05, + "loss": 2.2236, + "step": 95890 + }, + { + "epoch": 12.95, + "learning_rate": 1.3625796500582237e-05, + "loss": 2.2285, + "step": 95900 + }, + { + "epoch": 12.95, + "learning_rate": 1.3608131176467508e-05, + "loss": 2.2357, + "step": 95910 + }, + { + "epoch": 12.95, + "learning_rate": 1.359047676686094e-05, + "loss": 2.2251, + "step": 95920 + }, + { + "epoch": 12.96, + "learning_rate": 1.3572833273175193e-05, + "loss": 2.2444, + "step": 95930 + }, + { + "epoch": 12.96, + "learning_rate": 1.3555200696822232e-05, + "loss": 2.2235, + "step": 95940 + }, + { + "epoch": 12.96, + "learning_rate": 1.3537579039213082e-05, + "loss": 2.2119, + "step": 95950 + }, + { + "epoch": 12.96, + "learning_rate": 1.3519968301757861e-05, + "loss": 2.2608, + "step": 95960 + }, + { + "epoch": 12.96, + "learning_rate": 1.3502368485865844e-05, + "loss": 2.2363, + "step": 95970 + }, + { + "epoch": 12.96, + "learning_rate": 1.3484779592945388e-05, + "loss": 2.256, + "step": 95980 + }, + { + "epoch": 12.96, + "learning_rate": 1.346720162440405e-05, + "loss": 2.2607, + "step": 95990 + }, + { + "epoch": 12.97, + "learning_rate": 1.3449634581648472e-05, + "loss": 2.2032, + "step": 96000 + }, + { + "epoch": 12.97, + "eval_loss": 2.401254177093506, + "eval_runtime": 1270.5885, + "eval_samples_per_second": 59.921, + "eval_steps_per_second": 4.994, + "step": 96000 + }, + { + "epoch": 12.97, + "learning_rate": 1.3432078466084395e-05, + "loss": 2.2359, + "step": 96010 + }, + { + "epoch": 12.97, + "learning_rate": 1.3414533279116747e-05, + "loss": 2.2226, + "step": 96020 + }, + { + "epoch": 12.97, + "learning_rate": 1.3396999022149535e-05, + "loss": 2.2261, + "step": 96030 + }, + { + "epoch": 12.97, + "learning_rate": 1.3379475696585906e-05, + "loss": 2.2086, + "step": 96040 + }, + { + "epoch": 12.97, + "learning_rate": 1.3361963303828156e-05, + "loss": 2.24, + "step": 96050 + }, + { + "epoch": 12.97, + "learning_rate": 1.3344461845277626e-05, + "loss": 2.2254, + "step": 96060 + }, + { + "epoch": 12.97, + "learning_rate": 1.33269713223349e-05, + "loss": 2.1811, + "step": 96070 + }, + { + "epoch": 12.98, + "learning_rate": 1.3309491736399591e-05, + "loss": 2.2479, + "step": 96080 + }, + { + "epoch": 12.98, + "learning_rate": 1.3292023088870463e-05, + "loss": 2.2206, + "step": 96090 + }, + { + "epoch": 12.98, + "learning_rate": 1.3274565381145447e-05, + "loss": 2.2302, + "step": 96100 + }, + { + "epoch": 12.98, + "learning_rate": 1.325711861462151e-05, + "loss": 2.2533, + "step": 96110 + }, + { + "epoch": 12.98, + "learning_rate": 1.3239682790694899e-05, + "loss": 2.1956, + "step": 96120 + }, + { + "epoch": 12.98, + "learning_rate": 1.3222257910760753e-05, + "loss": 2.2296, + "step": 96130 + }, + { + "epoch": 12.98, + "learning_rate": 1.3204843976213519e-05, + "loss": 2.2355, + "step": 96140 + }, + { + "epoch": 12.99, + "learning_rate": 1.318744098844675e-05, + "loss": 2.2399, + "step": 96150 + }, + { + "epoch": 12.99, + "learning_rate": 1.3170048948853084e-05, + "loss": 2.21, + "step": 96160 + }, + { + "epoch": 12.99, + "learning_rate": 1.3152667858824239e-05, + "loss": 2.2089, + "step": 96170 + }, + { + "epoch": 12.99, + "learning_rate": 1.313529771975112e-05, + "loss": 2.2111, + "step": 96180 + }, + { + "epoch": 12.99, + "learning_rate": 1.311793853302378e-05, + "loss": 2.2256, + "step": 96190 + }, + { + "epoch": 12.99, + "learning_rate": 1.3100590300031327e-05, + "loss": 2.2305, + "step": 96200 + }, + { + "epoch": 12.99, + "learning_rate": 1.3083253022161982e-05, + "loss": 2.2326, + "step": 96210 + }, + { + "epoch": 13.0, + "learning_rate": 1.3065926700803203e-05, + "loss": 2.2231, + "step": 96220 + }, + { + "epoch": 13.0, + "learning_rate": 1.304861133734143e-05, + "loss": 2.2583, + "step": 96230 + }, + { + "epoch": 13.0, + "learning_rate": 1.3031306933162373e-05, + "loss": 2.2178, + "step": 96240 + }, + { + "epoch": 13.0, + "learning_rate": 1.301401348965072e-05, + "loss": 2.2307, + "step": 96250 + }, + { + "epoch": 13.0, + "learning_rate": 1.2996731008190336e-05, + "loss": 2.1926, + "step": 96260 + }, + { + "epoch": 13.0, + "learning_rate": 1.297945949016428e-05, + "loss": 2.1905, + "step": 96270 + }, + { + "epoch": 13.0, + "learning_rate": 1.2962198936954626e-05, + "loss": 2.238, + "step": 96280 + }, + { + "epoch": 13.0, + "learning_rate": 1.2944949349942624e-05, + "loss": 2.2113, + "step": 96290 + }, + { + "epoch": 13.01, + "learning_rate": 1.2927710730508684e-05, + "loss": 2.1695, + "step": 96300 + }, + { + "epoch": 13.01, + "learning_rate": 1.291048308003222e-05, + "loss": 2.2097, + "step": 96310 + }, + { + "epoch": 13.01, + "learning_rate": 1.2893266399891943e-05, + "loss": 2.2106, + "step": 96320 + }, + { + "epoch": 13.01, + "learning_rate": 1.2876060691465473e-05, + "loss": 2.2055, + "step": 96330 + }, + { + "epoch": 13.01, + "learning_rate": 1.2858865956129704e-05, + "loss": 2.1814, + "step": 96340 + }, + { + "epoch": 13.01, + "learning_rate": 1.2841682195260671e-05, + "loss": 2.1985, + "step": 96350 + }, + { + "epoch": 13.01, + "learning_rate": 1.2824509410233425e-05, + "loss": 2.2073, + "step": 96360 + }, + { + "epoch": 13.02, + "learning_rate": 1.2807347602422196e-05, + "loss": 2.223, + "step": 96370 + }, + { + "epoch": 13.02, + "learning_rate": 1.279019677320029e-05, + "loss": 2.1933, + "step": 96380 + }, + { + "epoch": 13.02, + "learning_rate": 1.2773056923940223e-05, + "loss": 2.1855, + "step": 96390 + }, + { + "epoch": 13.02, + "learning_rate": 1.2755928056013565e-05, + "loss": 2.2056, + "step": 96400 + }, + { + "epoch": 13.02, + "learning_rate": 1.2738810170791004e-05, + "loss": 2.1966, + "step": 96410 + }, + { + "epoch": 13.02, + "learning_rate": 1.2721703269642392e-05, + "loss": 2.2055, + "step": 96420 + }, + { + "epoch": 13.02, + "learning_rate": 1.2704607353936635e-05, + "loss": 2.209, + "step": 96430 + }, + { + "epoch": 13.02, + "learning_rate": 1.2687522425041857e-05, + "loss": 2.1966, + "step": 96440 + }, + { + "epoch": 13.03, + "learning_rate": 1.267044848432523e-05, + "loss": 2.2037, + "step": 96450 + }, + { + "epoch": 13.03, + "learning_rate": 1.2653385533153027e-05, + "loss": 2.1976, + "step": 96460 + }, + { + "epoch": 13.03, + "learning_rate": 1.2636333572890722e-05, + "loss": 2.2281, + "step": 96470 + }, + { + "epoch": 13.03, + "learning_rate": 1.2619292604902875e-05, + "loss": 2.1796, + "step": 96480 + }, + { + "epoch": 13.03, + "learning_rate": 1.2602262630553112e-05, + "loss": 2.2005, + "step": 96490 + }, + { + "epoch": 13.03, + "learning_rate": 1.2585243651204258e-05, + "loss": 2.2076, + "step": 96500 + }, + { + "epoch": 13.03, + "learning_rate": 1.2568235668218207e-05, + "loss": 2.2003, + "step": 96510 + }, + { + "epoch": 13.04, + "learning_rate": 1.2551238682956055e-05, + "loss": 2.2062, + "step": 96520 + }, + { + "epoch": 13.04, + "learning_rate": 1.2534252696777847e-05, + "loss": 2.2026, + "step": 96530 + }, + { + "epoch": 13.04, + "learning_rate": 1.2517277711042927e-05, + "loss": 2.1971, + "step": 96540 + }, + { + "epoch": 13.04, + "learning_rate": 1.2500313727109645e-05, + "loss": 2.1913, + "step": 96550 + }, + { + "epoch": 13.04, + "learning_rate": 1.248336074633558e-05, + "loss": 2.1996, + "step": 96560 + }, + { + "epoch": 13.04, + "learning_rate": 1.2466418770077313e-05, + "loss": 2.1909, + "step": 96570 + }, + { + "epoch": 13.04, + "learning_rate": 1.2449487799690578e-05, + "loss": 2.1935, + "step": 96580 + }, + { + "epoch": 13.05, + "learning_rate": 1.2432567836530288e-05, + "loss": 2.183, + "step": 96590 + }, + { + "epoch": 13.05, + "learning_rate": 1.2415658881950414e-05, + "loss": 2.2118, + "step": 96600 + }, + { + "epoch": 13.05, + "learning_rate": 1.2398760937304036e-05, + "loss": 2.21, + "step": 96610 + }, + { + "epoch": 13.05, + "learning_rate": 1.2381874003943443e-05, + "loss": 2.2043, + "step": 96620 + }, + { + "epoch": 13.05, + "learning_rate": 1.2364998083219902e-05, + "loss": 2.2018, + "step": 96630 + }, + { + "epoch": 13.05, + "learning_rate": 1.2348133176483965e-05, + "loss": 2.1986, + "step": 96640 + }, + { + "epoch": 13.05, + "learning_rate": 1.2331279285085172e-05, + "loss": 2.2115, + "step": 96650 + }, + { + "epoch": 13.05, + "learning_rate": 1.2314436410372191e-05, + "loss": 2.2182, + "step": 96660 + }, + { + "epoch": 13.06, + "learning_rate": 1.2297604553692896e-05, + "loss": 2.2184, + "step": 96670 + }, + { + "epoch": 13.06, + "learning_rate": 1.228078371639421e-05, + "loss": 2.2278, + "step": 96680 + }, + { + "epoch": 13.06, + "learning_rate": 1.2263973899822154e-05, + "loss": 2.2251, + "step": 96690 + }, + { + "epoch": 13.06, + "learning_rate": 1.2247175105321954e-05, + "loss": 2.1951, + "step": 96700 + }, + { + "epoch": 13.06, + "learning_rate": 1.2230387334237868e-05, + "loss": 2.1906, + "step": 96710 + }, + { + "epoch": 13.06, + "learning_rate": 1.221361058791337e-05, + "loss": 2.2135, + "step": 96720 + }, + { + "epoch": 13.06, + "learning_rate": 1.219684486769087e-05, + "loss": 2.2049, + "step": 96730 + }, + { + "epoch": 13.07, + "learning_rate": 1.2180090174912127e-05, + "loss": 2.1704, + "step": 96740 + }, + { + "epoch": 13.07, + "learning_rate": 1.216334651091782e-05, + "loss": 2.1809, + "step": 96750 + }, + { + "epoch": 13.07, + "learning_rate": 1.2146613877047896e-05, + "loss": 2.2163, + "step": 96760 + }, + { + "epoch": 13.07, + "learning_rate": 1.2129892274641328e-05, + "loss": 2.1892, + "step": 96770 + }, + { + "epoch": 13.07, + "learning_rate": 1.2113181705036201e-05, + "loss": 2.2093, + "step": 96780 + }, + { + "epoch": 13.07, + "learning_rate": 1.209648216956981e-05, + "loss": 2.1915, + "step": 96790 + }, + { + "epoch": 13.07, + "learning_rate": 1.2079793669578469e-05, + "loss": 2.189, + "step": 96800 + }, + { + "epoch": 13.07, + "learning_rate": 1.2063116206397611e-05, + "loss": 2.2267, + "step": 96810 + }, + { + "epoch": 13.08, + "learning_rate": 1.2046449781361883e-05, + "loss": 2.2041, + "step": 96820 + }, + { + "epoch": 13.08, + "learning_rate": 1.2029794395804937e-05, + "loss": 2.2201, + "step": 96830 + }, + { + "epoch": 13.08, + "learning_rate": 1.201315005105964e-05, + "loss": 2.1665, + "step": 96840 + }, + { + "epoch": 13.08, + "learning_rate": 1.1996516748457873e-05, + "loss": 2.1785, + "step": 96850 + }, + { + "epoch": 13.08, + "learning_rate": 1.1979894489330694e-05, + "loss": 2.2242, + "step": 96860 + }, + { + "epoch": 13.08, + "learning_rate": 1.19632832750083e-05, + "loss": 2.2418, + "step": 96870 + }, + { + "epoch": 13.08, + "learning_rate": 1.1946683106819966e-05, + "loss": 2.2096, + "step": 96880 + }, + { + "epoch": 13.09, + "learning_rate": 1.1930093986094041e-05, + "loss": 2.2091, + "step": 96890 + }, + { + "epoch": 13.09, + "learning_rate": 1.19135159141581e-05, + "loss": 2.1805, + "step": 96900 + }, + { + "epoch": 13.09, + "learning_rate": 1.1896948892338731e-05, + "loss": 2.1938, + "step": 96910 + }, + { + "epoch": 13.09, + "learning_rate": 1.188039292196174e-05, + "loss": 2.2056, + "step": 96920 + }, + { + "epoch": 13.09, + "learning_rate": 1.18638480043519e-05, + "loss": 2.2127, + "step": 96930 + }, + { + "epoch": 13.09, + "learning_rate": 1.1847314140833254e-05, + "loss": 2.1924, + "step": 96940 + }, + { + "epoch": 13.09, + "learning_rate": 1.1830791332728856e-05, + "loss": 2.2031, + "step": 96950 + }, + { + "epoch": 13.1, + "learning_rate": 1.1814279581360935e-05, + "loss": 2.1959, + "step": 96960 + }, + { + "epoch": 13.1, + "learning_rate": 1.1797778888050818e-05, + "loss": 2.217, + "step": 96970 + }, + { + "epoch": 13.1, + "learning_rate": 1.1781289254118914e-05, + "loss": 2.2072, + "step": 96980 + }, + { + "epoch": 13.1, + "learning_rate": 1.1764810680884818e-05, + "loss": 2.1911, + "step": 96990 + }, + { + "epoch": 13.1, + "learning_rate": 1.1748343169667174e-05, + "loss": 2.2097, + "step": 97000 + }, + { + "epoch": 13.1, + "eval_loss": 2.4041683673858643, + "eval_runtime": 1269.5496, + "eval_samples_per_second": 59.97, + "eval_steps_per_second": 4.998, + "step": 97000 + }, + { + "epoch": 13.1, + "learning_rate": 1.1731886721783729e-05, + "loss": 2.1956, + "step": 97010 + }, + { + "epoch": 13.1, + "learning_rate": 1.1715441338551446e-05, + "loss": 2.1909, + "step": 97020 + }, + { + "epoch": 13.1, + "learning_rate": 1.1699007021286272e-05, + "loss": 2.1952, + "step": 97030 + }, + { + "epoch": 13.11, + "learning_rate": 1.1682583771303405e-05, + "loss": 2.209, + "step": 97040 + }, + { + "epoch": 13.11, + "learning_rate": 1.1666171589917046e-05, + "loss": 2.1874, + "step": 97050 + }, + { + "epoch": 13.11, + "learning_rate": 1.1649770478440507e-05, + "loss": 2.2329, + "step": 97060 + }, + { + "epoch": 13.11, + "learning_rate": 1.163338043818634e-05, + "loss": 2.2008, + "step": 97070 + }, + { + "epoch": 13.11, + "learning_rate": 1.161700147046608e-05, + "loss": 2.2056, + "step": 97080 + }, + { + "epoch": 13.11, + "learning_rate": 1.1600633576590412e-05, + "loss": 2.1786, + "step": 97090 + }, + { + "epoch": 13.11, + "learning_rate": 1.1584276757869204e-05, + "loss": 2.1981, + "step": 97100 + }, + { + "epoch": 13.12, + "learning_rate": 1.1567931015611293e-05, + "loss": 2.2216, + "step": 97110 + }, + { + "epoch": 13.12, + "learning_rate": 1.155159635112483e-05, + "loss": 2.2227, + "step": 97120 + }, + { + "epoch": 13.12, + "learning_rate": 1.1535272765716857e-05, + "loss": 2.1962, + "step": 97130 + }, + { + "epoch": 13.12, + "learning_rate": 1.1518960260693693e-05, + "loss": 2.2304, + "step": 97140 + }, + { + "epoch": 13.12, + "learning_rate": 1.1502658837360696e-05, + "loss": 2.2086, + "step": 97150 + }, + { + "epoch": 13.12, + "learning_rate": 1.1486368497022386e-05, + "loss": 2.1812, + "step": 97160 + }, + { + "epoch": 13.12, + "learning_rate": 1.1470089240982355e-05, + "loss": 2.1969, + "step": 97170 + }, + { + "epoch": 13.12, + "learning_rate": 1.1453821070543295e-05, + "loss": 2.2183, + "step": 97180 + }, + { + "epoch": 13.13, + "learning_rate": 1.143756398700708e-05, + "loss": 2.2113, + "step": 97190 + }, + { + "epoch": 13.13, + "learning_rate": 1.1421317991674623e-05, + "loss": 2.197, + "step": 97200 + }, + { + "epoch": 13.13, + "learning_rate": 1.1405083085845961e-05, + "loss": 2.1736, + "step": 97210 + }, + { + "epoch": 13.13, + "learning_rate": 1.1388859270820328e-05, + "loss": 2.1797, + "step": 97220 + }, + { + "epoch": 13.13, + "learning_rate": 1.1372646547895931e-05, + "loss": 2.2025, + "step": 97230 + }, + { + "epoch": 13.13, + "learning_rate": 1.1356444918370234e-05, + "loss": 2.2154, + "step": 97240 + }, + { + "epoch": 13.13, + "learning_rate": 1.1340254383539698e-05, + "loss": 2.1828, + "step": 97250 + }, + { + "epoch": 13.14, + "learning_rate": 1.1324074944699923e-05, + "loss": 2.2228, + "step": 97260 + }, + { + "epoch": 13.14, + "learning_rate": 1.1307906603145689e-05, + "loss": 2.1822, + "step": 97270 + }, + { + "epoch": 13.14, + "learning_rate": 1.1291749360170826e-05, + "loss": 2.1942, + "step": 97280 + }, + { + "epoch": 13.14, + "learning_rate": 1.1275603217068235e-05, + "loss": 2.2061, + "step": 97290 + }, + { + "epoch": 13.14, + "learning_rate": 1.1259468175130049e-05, + "loss": 2.1987, + "step": 97300 + }, + { + "epoch": 13.14, + "learning_rate": 1.12433442356474e-05, + "loss": 2.1863, + "step": 97310 + }, + { + "epoch": 13.14, + "learning_rate": 1.122723139991064e-05, + "loss": 2.2207, + "step": 97320 + }, + { + "epoch": 13.15, + "learning_rate": 1.1211129669209091e-05, + "loss": 2.2236, + "step": 97330 + }, + { + "epoch": 13.15, + "learning_rate": 1.11950390448313e-05, + "loss": 2.1885, + "step": 97340 + }, + { + "epoch": 13.15, + "learning_rate": 1.1178959528064873e-05, + "loss": 2.2132, + "step": 97350 + }, + { + "epoch": 13.15, + "learning_rate": 1.1162891120196582e-05, + "loss": 2.2034, + "step": 97360 + }, + { + "epoch": 13.15, + "learning_rate": 1.114683382251223e-05, + "loss": 2.1783, + "step": 97370 + }, + { + "epoch": 13.15, + "learning_rate": 1.113078763629679e-05, + "loss": 2.2299, + "step": 97380 + }, + { + "epoch": 13.15, + "learning_rate": 1.1114752562834333e-05, + "loss": 2.2016, + "step": 97390 + }, + { + "epoch": 13.15, + "learning_rate": 1.1098728603408036e-05, + "loss": 2.2128, + "step": 97400 + }, + { + "epoch": 13.16, + "learning_rate": 1.1082715759300154e-05, + "loss": 2.2036, + "step": 97410 + }, + { + "epoch": 13.16, + "learning_rate": 1.1066714031792145e-05, + "loss": 2.2044, + "step": 97420 + }, + { + "epoch": 13.16, + "learning_rate": 1.105072342216447e-05, + "loss": 2.1874, + "step": 97430 + }, + { + "epoch": 13.16, + "learning_rate": 1.103474393169677e-05, + "loss": 2.1992, + "step": 97440 + }, + { + "epoch": 13.16, + "learning_rate": 1.1018775561667791e-05, + "loss": 2.1804, + "step": 97450 + }, + { + "epoch": 13.16, + "learning_rate": 1.100281831335531e-05, + "loss": 2.2247, + "step": 97460 + }, + { + "epoch": 13.16, + "learning_rate": 1.0986872188036339e-05, + "loss": 2.2081, + "step": 97470 + }, + { + "epoch": 13.17, + "learning_rate": 1.0970937186986922e-05, + "loss": 2.2026, + "step": 97480 + }, + { + "epoch": 13.17, + "learning_rate": 1.0955013311482192e-05, + "loss": 2.212, + "step": 97490 + }, + { + "epoch": 13.17, + "learning_rate": 1.0939100562796493e-05, + "loss": 2.2071, + "step": 97500 + }, + { + "epoch": 13.17, + "learning_rate": 1.0923198942203144e-05, + "loss": 2.1814, + "step": 97510 + }, + { + "epoch": 13.17, + "learning_rate": 1.0907308450974722e-05, + "loss": 2.2042, + "step": 97520 + }, + { + "epoch": 13.17, + "learning_rate": 1.0891429090382747e-05, + "loss": 2.214, + "step": 97530 + }, + { + "epoch": 13.17, + "learning_rate": 1.0875560861697985e-05, + "loss": 2.2327, + "step": 97540 + }, + { + "epoch": 13.17, + "learning_rate": 1.0859703766190253e-05, + "loss": 2.2025, + "step": 97550 + }, + { + "epoch": 13.18, + "learning_rate": 1.0843857805128486e-05, + "loss": 2.2154, + "step": 97560 + }, + { + "epoch": 13.18, + "learning_rate": 1.0828022979780754e-05, + "loss": 2.1975, + "step": 97570 + }, + { + "epoch": 13.18, + "learning_rate": 1.0812199291414142e-05, + "loss": 2.2298, + "step": 97580 + }, + { + "epoch": 13.18, + "learning_rate": 1.0796386741294987e-05, + "loss": 2.2021, + "step": 97590 + }, + { + "epoch": 13.18, + "learning_rate": 1.0780585330688612e-05, + "loss": 2.2064, + "step": 97600 + }, + { + "epoch": 13.18, + "learning_rate": 1.0764795060859504e-05, + "loss": 2.2031, + "step": 97610 + }, + { + "epoch": 13.18, + "learning_rate": 1.074901593307127e-05, + "loss": 2.1919, + "step": 97620 + }, + { + "epoch": 13.19, + "learning_rate": 1.0733247948586566e-05, + "loss": 2.2038, + "step": 97630 + }, + { + "epoch": 13.19, + "learning_rate": 1.0717491108667248e-05, + "loss": 2.2185, + "step": 97640 + }, + { + "epoch": 13.19, + "learning_rate": 1.0701745414574209e-05, + "loss": 2.2328, + "step": 97650 + }, + { + "epoch": 13.19, + "learning_rate": 1.0686010867567424e-05, + "loss": 2.2361, + "step": 97660 + }, + { + "epoch": 13.19, + "learning_rate": 1.0670287468906086e-05, + "loss": 2.1944, + "step": 97670 + }, + { + "epoch": 13.19, + "learning_rate": 1.0656145942986177e-05, + "loss": 2.2046, + "step": 97680 + }, + { + "epoch": 13.19, + "learning_rate": 1.0640443729646818e-05, + "loss": 2.1981, + "step": 97690 + }, + { + "epoch": 13.2, + "learning_rate": 1.0624752668299308e-05, + "loss": 2.2069, + "step": 97700 + }, + { + "epoch": 13.2, + "learning_rate": 1.0609072760199356e-05, + "loss": 2.194, + "step": 97710 + }, + { + "epoch": 13.2, + "learning_rate": 1.0593404006601575e-05, + "loss": 2.2102, + "step": 97720 + }, + { + "epoch": 13.2, + "learning_rate": 1.0577746408759946e-05, + "loss": 2.2007, + "step": 97730 + }, + { + "epoch": 13.2, + "learning_rate": 1.0562099967927384e-05, + "loss": 2.1784, + "step": 97740 + }, + { + "epoch": 13.2, + "learning_rate": 1.0546464685356004e-05, + "loss": 2.2163, + "step": 97750 + }, + { + "epoch": 13.2, + "learning_rate": 1.0530840562296971e-05, + "loss": 2.2025, + "step": 97760 + }, + { + "epoch": 13.2, + "learning_rate": 1.0515227600000536e-05, + "loss": 2.2133, + "step": 97770 + }, + { + "epoch": 13.21, + "learning_rate": 1.0499625799716183e-05, + "loss": 2.2313, + "step": 97780 + }, + { + "epoch": 13.21, + "learning_rate": 1.0484035162692362e-05, + "loss": 2.1915, + "step": 97790 + }, + { + "epoch": 13.21, + "learning_rate": 1.0468455690176659e-05, + "loss": 2.2054, + "step": 97800 + }, + { + "epoch": 13.21, + "learning_rate": 1.0452887383415876e-05, + "loss": 2.1988, + "step": 97810 + }, + { + "epoch": 13.21, + "learning_rate": 1.043733024365575e-05, + "loss": 2.2011, + "step": 97820 + }, + { + "epoch": 13.21, + "learning_rate": 1.0421784272141303e-05, + "loss": 2.2051, + "step": 97830 + }, + { + "epoch": 13.21, + "learning_rate": 1.0406249470116473e-05, + "loss": 2.2106, + "step": 97840 + }, + { + "epoch": 13.22, + "learning_rate": 1.0390725838824465e-05, + "loss": 2.2154, + "step": 97850 + }, + { + "epoch": 13.22, + "learning_rate": 1.0375213379507536e-05, + "loss": 2.237, + "step": 97860 + }, + { + "epoch": 13.22, + "learning_rate": 1.0359712093407024e-05, + "loss": 2.208, + "step": 97870 + }, + { + "epoch": 13.22, + "learning_rate": 1.034422198176339e-05, + "loss": 2.1729, + "step": 97880 + }, + { + "epoch": 13.22, + "learning_rate": 1.0328743045816174e-05, + "loss": 2.2057, + "step": 97890 + }, + { + "epoch": 13.22, + "learning_rate": 1.0313275286804068e-05, + "loss": 2.2039, + "step": 97900 + }, + { + "epoch": 13.22, + "learning_rate": 1.0297818705964933e-05, + "loss": 2.1946, + "step": 97910 + }, + { + "epoch": 13.22, + "learning_rate": 1.0282373304535513e-05, + "loss": 2.2165, + "step": 97920 + }, + { + "epoch": 13.23, + "learning_rate": 1.0266939083751902e-05, + "loss": 2.2333, + "step": 97930 + }, + { + "epoch": 13.23, + "learning_rate": 1.0251516044849128e-05, + "loss": 2.1843, + "step": 97940 + }, + { + "epoch": 13.23, + "learning_rate": 1.023610418906144e-05, + "loss": 2.2095, + "step": 97950 + }, + { + "epoch": 13.23, + "learning_rate": 1.022070351762213e-05, + "loss": 2.2169, + "step": 97960 + }, + { + "epoch": 13.23, + "learning_rate": 1.0205314031763583e-05, + "loss": 2.1808, + "step": 97970 + }, + { + "epoch": 13.23, + "learning_rate": 1.0189935732717363e-05, + "loss": 2.2231, + "step": 97980 + }, + { + "epoch": 13.23, + "learning_rate": 1.0174568621714052e-05, + "loss": 2.2224, + "step": 97990 + }, + { + "epoch": 13.24, + "learning_rate": 1.0159212699983366e-05, + "loss": 2.2055, + "step": 98000 + }, + { + "epoch": 13.24, + "eval_loss": 2.4040699005126953, + "eval_runtime": 1269.6142, + "eval_samples_per_second": 59.967, + "eval_steps_per_second": 4.998, + "step": 98000 + }, + { + "epoch": 13.24, + "learning_rate": 1.0143867968754188e-05, + "loss": 2.2126, + "step": 98010 + }, + { + "epoch": 13.24, + "learning_rate": 1.012853442925437e-05, + "loss": 2.1931, + "step": 98020 + }, + { + "epoch": 13.24, + "learning_rate": 1.0113212082711064e-05, + "loss": 2.2014, + "step": 98030 + }, + { + "epoch": 13.24, + "learning_rate": 1.009790093035029e-05, + "loss": 2.1955, + "step": 98040 + }, + { + "epoch": 13.24, + "learning_rate": 1.0082600973397348e-05, + "loss": 2.225, + "step": 98050 + }, + { + "epoch": 13.24, + "learning_rate": 1.0067312213076629e-05, + "loss": 2.2284, + "step": 98060 + }, + { + "epoch": 13.25, + "learning_rate": 1.0052034650611534e-05, + "loss": 2.1989, + "step": 98070 + }, + { + "epoch": 13.25, + "learning_rate": 1.003676828722464e-05, + "loss": 2.1932, + "step": 98080 + }, + { + "epoch": 13.25, + "learning_rate": 1.0021513124137597e-05, + "loss": 2.1943, + "step": 98090 + }, + { + "epoch": 13.25, + "learning_rate": 1.0006269162571167e-05, + "loss": 2.2094, + "step": 98100 + }, + { + "epoch": 13.25, + "learning_rate": 9.99103640374529e-06, + "loss": 2.1887, + "step": 98110 + }, + { + "epoch": 13.25, + "learning_rate": 9.975814848878837e-06, + "loss": 2.2046, + "step": 98120 + }, + { + "epoch": 13.25, + "learning_rate": 9.960604499189955e-06, + "loss": 2.2195, + "step": 98130 + }, + { + "epoch": 13.25, + "learning_rate": 9.945405355895785e-06, + "loss": 2.1998, + "step": 98140 + }, + { + "epoch": 13.26, + "learning_rate": 9.930217420212639e-06, + "loss": 2.186, + "step": 98150 + }, + { + "epoch": 13.26, + "learning_rate": 9.915040693355892e-06, + "loss": 2.1855, + "step": 98160 + }, + { + "epoch": 13.26, + "learning_rate": 9.899875176540023e-06, + "loss": 2.2076, + "step": 98170 + }, + { + "epoch": 13.26, + "learning_rate": 9.884720870978647e-06, + "loss": 2.19, + "step": 98180 + }, + { + "epoch": 13.26, + "learning_rate": 9.869577777884457e-06, + "loss": 2.2035, + "step": 98190 + }, + { + "epoch": 13.26, + "learning_rate": 9.854445898469204e-06, + "loss": 2.2067, + "step": 98200 + }, + { + "epoch": 13.26, + "learning_rate": 9.839325233943852e-06, + "loss": 2.1957, + "step": 98210 + }, + { + "epoch": 13.27, + "learning_rate": 9.824215785518363e-06, + "loss": 2.1803, + "step": 98220 + }, + { + "epoch": 13.27, + "learning_rate": 9.809117554401908e-06, + "loss": 2.1956, + "step": 98230 + }, + { + "epoch": 13.27, + "learning_rate": 9.794030541802584e-06, + "loss": 2.1995, + "step": 98240 + }, + { + "epoch": 13.27, + "learning_rate": 9.778954748927775e-06, + "loss": 2.1806, + "step": 98250 + }, + { + "epoch": 13.27, + "learning_rate": 9.763890176983901e-06, + "loss": 2.1953, + "step": 98260 + }, + { + "epoch": 13.27, + "learning_rate": 9.748836827176465e-06, + "loss": 2.2072, + "step": 98270 + }, + { + "epoch": 13.27, + "learning_rate": 9.733794700710067e-06, + "loss": 2.2062, + "step": 98280 + }, + { + "epoch": 13.27, + "learning_rate": 9.718763798788415e-06, + "loss": 2.193, + "step": 98290 + }, + { + "epoch": 13.28, + "learning_rate": 9.703744122614343e-06, + "loss": 2.2153, + "step": 98300 + }, + { + "epoch": 13.28, + "learning_rate": 9.688735673389824e-06, + "loss": 2.2049, + "step": 98310 + }, + { + "epoch": 13.28, + "learning_rate": 9.6737384523158e-06, + "loss": 2.2114, + "step": 98320 + }, + { + "epoch": 13.28, + "learning_rate": 9.658752460592439e-06, + "loss": 2.2023, + "step": 98330 + }, + { + "epoch": 13.28, + "learning_rate": 9.643777699418953e-06, + "loss": 2.2146, + "step": 98340 + }, + { + "epoch": 13.28, + "learning_rate": 9.628814169993682e-06, + "loss": 2.1882, + "step": 98350 + }, + { + "epoch": 13.28, + "learning_rate": 9.613861873514066e-06, + "loss": 2.1742, + "step": 98360 + }, + { + "epoch": 13.29, + "learning_rate": 9.598920811176586e-06, + "loss": 2.203, + "step": 98370 + }, + { + "epoch": 13.29, + "learning_rate": 9.583990984176932e-06, + "loss": 2.229, + "step": 98380 + }, + { + "epoch": 13.29, + "learning_rate": 9.569072393709814e-06, + "loss": 2.224, + "step": 98390 + }, + { + "epoch": 13.29, + "learning_rate": 9.55416504096903e-06, + "loss": 2.2064, + "step": 98400 + }, + { + "epoch": 13.29, + "learning_rate": 9.539268927147576e-06, + "loss": 2.1951, + "step": 98410 + }, + { + "epoch": 13.29, + "learning_rate": 9.524384053437428e-06, + "loss": 2.2224, + "step": 98420 + }, + { + "epoch": 13.29, + "learning_rate": 9.509510421029787e-06, + "loss": 2.199, + "step": 98430 + }, + { + "epoch": 13.3, + "learning_rate": 9.494648031114798e-06, + "loss": 2.1838, + "step": 98440 + }, + { + "epoch": 13.3, + "learning_rate": 9.479796884881846e-06, + "loss": 2.1865, + "step": 98450 + }, + { + "epoch": 13.3, + "learning_rate": 9.464956983519411e-06, + "loss": 2.1849, + "step": 98460 + }, + { + "epoch": 13.3, + "learning_rate": 9.450128328214962e-06, + "loss": 2.2035, + "step": 98470 + }, + { + "epoch": 13.3, + "learning_rate": 9.435310920155165e-06, + "loss": 2.2059, + "step": 98480 + }, + { + "epoch": 13.3, + "learning_rate": 9.420504760525721e-06, + "loss": 2.1655, + "step": 98490 + }, + { + "epoch": 13.3, + "learning_rate": 9.405709850511517e-06, + "loss": 2.1981, + "step": 98500 + }, + { + "epoch": 13.3, + "learning_rate": 9.390926191296472e-06, + "loss": 2.2255, + "step": 98510 + }, + { + "epoch": 13.31, + "learning_rate": 9.376153784063573e-06, + "loss": 2.1945, + "step": 98520 + }, + { + "epoch": 13.31, + "learning_rate": 9.36139262999504e-06, + "loss": 2.2264, + "step": 98530 + }, + { + "epoch": 13.31, + "learning_rate": 9.34664273027203e-06, + "loss": 2.1924, + "step": 98540 + }, + { + "epoch": 13.31, + "learning_rate": 9.331904086074914e-06, + "loss": 2.205, + "step": 98550 + }, + { + "epoch": 13.31, + "learning_rate": 9.317176698583151e-06, + "loss": 2.2151, + "step": 98560 + }, + { + "epoch": 13.31, + "learning_rate": 9.302460568975195e-06, + "loss": 2.1788, + "step": 98570 + }, + { + "epoch": 13.31, + "learning_rate": 9.287755698428756e-06, + "loss": 2.2102, + "step": 98580 + }, + { + "epoch": 13.32, + "learning_rate": 9.273062088120526e-06, + "loss": 2.2078, + "step": 98590 + }, + { + "epoch": 13.32, + "learning_rate": 9.25837973922633e-06, + "loss": 2.2053, + "step": 98600 + }, + { + "epoch": 13.32, + "learning_rate": 9.243708652921112e-06, + "loss": 2.1796, + "step": 98610 + }, + { + "epoch": 13.32, + "learning_rate": 9.229048830378882e-06, + "loss": 2.2332, + "step": 98620 + }, + { + "epoch": 13.32, + "learning_rate": 9.214400272772804e-06, + "loss": 2.1989, + "step": 98630 + }, + { + "epoch": 13.32, + "learning_rate": 9.199762981275054e-06, + "loss": 2.1648, + "step": 98640 + }, + { + "epoch": 13.32, + "learning_rate": 9.185136957056948e-06, + "loss": 2.1907, + "step": 98650 + }, + { + "epoch": 13.32, + "learning_rate": 9.170522201288965e-06, + "loss": 2.1855, + "step": 98660 + }, + { + "epoch": 13.33, + "learning_rate": 9.155918715140602e-06, + "loss": 2.2157, + "step": 98670 + }, + { + "epoch": 13.33, + "learning_rate": 9.14132649978046e-06, + "loss": 2.1988, + "step": 98680 + }, + { + "epoch": 13.33, + "learning_rate": 9.126745556376236e-06, + "loss": 2.2296, + "step": 98690 + }, + { + "epoch": 13.33, + "learning_rate": 9.11217588609478e-06, + "loss": 2.2232, + "step": 98700 + }, + { + "epoch": 13.33, + "learning_rate": 9.09761749010201e-06, + "loss": 2.2242, + "step": 98710 + }, + { + "epoch": 13.33, + "learning_rate": 9.083070369562878e-06, + "loss": 2.2118, + "step": 98720 + }, + { + "epoch": 13.33, + "learning_rate": 9.068534525641552e-06, + "loss": 2.199, + "step": 98730 + }, + { + "epoch": 13.34, + "learning_rate": 9.054009959501185e-06, + "loss": 2.2029, + "step": 98740 + }, + { + "epoch": 13.34, + "learning_rate": 9.03949667230413e-06, + "loss": 2.2034, + "step": 98750 + }, + { + "epoch": 13.34, + "learning_rate": 9.024994665211776e-06, + "loss": 2.1922, + "step": 98760 + }, + { + "epoch": 13.34, + "learning_rate": 9.010503939384561e-06, + "loss": 2.195, + "step": 98770 + }, + { + "epoch": 13.34, + "learning_rate": 8.996024495982157e-06, + "loss": 2.1968, + "step": 98780 + }, + { + "epoch": 13.34, + "learning_rate": 8.98155633616322e-06, + "loss": 2.2254, + "step": 98790 + }, + { + "epoch": 13.34, + "learning_rate": 8.967099461085509e-06, + "loss": 2.1825, + "step": 98800 + }, + { + "epoch": 13.34, + "learning_rate": 8.952653871905963e-06, + "loss": 2.2418, + "step": 98810 + }, + { + "epoch": 13.35, + "learning_rate": 8.938219569780507e-06, + "loss": 2.1962, + "step": 98820 + }, + { + "epoch": 13.35, + "learning_rate": 8.923796555864304e-06, + "loss": 2.1682, + "step": 98830 + }, + { + "epoch": 13.35, + "learning_rate": 8.909384831311444e-06, + "loss": 2.1915, + "step": 98840 + }, + { + "epoch": 13.35, + "learning_rate": 8.894984397275207e-06, + "loss": 2.2083, + "step": 98850 + }, + { + "epoch": 13.35, + "learning_rate": 8.880595254908019e-06, + "loss": 2.2114, + "step": 98860 + }, + { + "epoch": 13.35, + "learning_rate": 8.866217405361308e-06, + "loss": 2.2084, + "step": 98870 + }, + { + "epoch": 13.35, + "learning_rate": 8.85185084978564e-06, + "loss": 2.197, + "step": 98880 + }, + { + "epoch": 13.36, + "learning_rate": 8.837495589330662e-06, + "loss": 2.2128, + "step": 98890 + }, + { + "epoch": 13.36, + "learning_rate": 8.823151625145136e-06, + "loss": 2.205, + "step": 98900 + }, + { + "epoch": 13.36, + "learning_rate": 8.808818958376913e-06, + "loss": 2.2021, + "step": 98910 + }, + { + "epoch": 13.36, + "learning_rate": 8.794497590172922e-06, + "loss": 2.2189, + "step": 98920 + }, + { + "epoch": 13.36, + "learning_rate": 8.780187521679233e-06, + "loss": 2.2026, + "step": 98930 + }, + { + "epoch": 13.36, + "learning_rate": 8.765888754040945e-06, + "loss": 2.2042, + "step": 98940 + }, + { + "epoch": 13.36, + "learning_rate": 8.751601288402327e-06, + "loss": 2.2082, + "step": 98950 + }, + { + "epoch": 13.37, + "learning_rate": 8.737325125906713e-06, + "loss": 2.2109, + "step": 98960 + }, + { + "epoch": 13.37, + "learning_rate": 8.723060267696474e-06, + "loss": 2.2003, + "step": 98970 + }, + { + "epoch": 13.37, + "learning_rate": 8.70880671491318e-06, + "loss": 2.2204, + "step": 98980 + }, + { + "epoch": 13.37, + "learning_rate": 8.694564468697434e-06, + "loss": 2.2051, + "step": 98990 + }, + { + "epoch": 13.37, + "learning_rate": 8.68033353018891e-06, + "loss": 2.2026, + "step": 99000 + }, + { + "epoch": 13.37, + "eval_loss": 2.403158664703369, + "eval_runtime": 1269.4248, + "eval_samples_per_second": 59.976, + "eval_steps_per_second": 4.998, + "step": 99000 + }, + { + "epoch": 13.37, + "learning_rate": 8.666113900526461e-06, + "loss": 2.2028, + "step": 99010 + }, + { + "epoch": 13.37, + "learning_rate": 8.651905580847963e-06, + "loss": 2.1781, + "step": 99020 + }, + { + "epoch": 13.37, + "learning_rate": 8.637708572290435e-06, + "loss": 2.1987, + "step": 99030 + }, + { + "epoch": 13.38, + "learning_rate": 8.623522875989924e-06, + "loss": 2.2305, + "step": 99040 + }, + { + "epoch": 13.38, + "learning_rate": 8.60934849308162e-06, + "loss": 2.1856, + "step": 99050 + }, + { + "epoch": 13.38, + "learning_rate": 8.595185424699846e-06, + "loss": 2.1764, + "step": 99060 + }, + { + "epoch": 13.38, + "learning_rate": 8.581033671977949e-06, + "loss": 2.2065, + "step": 99070 + }, + { + "epoch": 13.38, + "learning_rate": 8.566893236048389e-06, + "loss": 2.2348, + "step": 99080 + }, + { + "epoch": 13.38, + "learning_rate": 8.552764118042727e-06, + "loss": 2.2146, + "step": 99090 + }, + { + "epoch": 13.38, + "learning_rate": 8.53864631909164e-06, + "loss": 2.1937, + "step": 99100 + }, + { + "epoch": 13.39, + "learning_rate": 8.524539840324863e-06, + "loss": 2.1876, + "step": 99110 + }, + { + "epoch": 13.39, + "learning_rate": 8.51044468287122e-06, + "loss": 2.1938, + "step": 99120 + }, + { + "epoch": 13.39, + "learning_rate": 8.496360847858712e-06, + "loss": 2.2026, + "step": 99130 + }, + { + "epoch": 13.39, + "learning_rate": 8.482288336414289e-06, + "loss": 2.229, + "step": 99140 + }, + { + "epoch": 13.39, + "learning_rate": 8.468227149664164e-06, + "loss": 2.2138, + "step": 99150 + }, + { + "epoch": 13.39, + "learning_rate": 8.454177288733504e-06, + "loss": 2.2373, + "step": 99160 + }, + { + "epoch": 13.39, + "learning_rate": 8.440138754746629e-06, + "loss": 2.1873, + "step": 99170 + }, + { + "epoch": 13.39, + "learning_rate": 8.426111548826969e-06, + "loss": 2.2202, + "step": 99180 + }, + { + "epoch": 13.4, + "learning_rate": 8.412095672097013e-06, + "loss": 2.223, + "step": 99190 + }, + { + "epoch": 13.4, + "learning_rate": 8.398091125678314e-06, + "loss": 2.1832, + "step": 99200 + }, + { + "epoch": 13.4, + "learning_rate": 8.384097910691639e-06, + "loss": 2.1799, + "step": 99210 + }, + { + "epoch": 13.4, + "learning_rate": 8.370116028256696e-06, + "loss": 2.2099, + "step": 99220 + }, + { + "epoch": 13.4, + "learning_rate": 8.356145479492454e-06, + "loss": 2.2105, + "step": 99230 + }, + { + "epoch": 13.4, + "learning_rate": 8.342186265516754e-06, + "loss": 2.1805, + "step": 99240 + }, + { + "epoch": 13.4, + "learning_rate": 8.328238387446734e-06, + "loss": 2.1914, + "step": 99250 + }, + { + "epoch": 13.41, + "learning_rate": 8.314301846398552e-06, + "loss": 2.2172, + "step": 99260 + }, + { + "epoch": 13.41, + "learning_rate": 8.300376643487434e-06, + "loss": 2.1886, + "step": 99270 + }, + { + "epoch": 13.41, + "learning_rate": 8.286462779827736e-06, + "loss": 2.2083, + "step": 99280 + }, + { + "epoch": 13.41, + "learning_rate": 8.272560256532834e-06, + "loss": 2.213, + "step": 99290 + }, + { + "epoch": 13.41, + "learning_rate": 8.258669074715324e-06, + "loss": 2.2437, + "step": 99300 + }, + { + "epoch": 13.41, + "learning_rate": 8.244789235486782e-06, + "loss": 2.2031, + "step": 99310 + }, + { + "epoch": 13.41, + "learning_rate": 8.230920739957919e-06, + "loss": 2.1893, + "step": 99320 + }, + { + "epoch": 13.42, + "learning_rate": 8.217063589238548e-06, + "loss": 2.1995, + "step": 99330 + }, + { + "epoch": 13.42, + "learning_rate": 8.203217784437549e-06, + "loss": 2.1842, + "step": 99340 + }, + { + "epoch": 13.42, + "learning_rate": 8.189383326662918e-06, + "loss": 2.2006, + "step": 99350 + }, + { + "epoch": 13.42, + "learning_rate": 8.175560217021754e-06, + "loss": 2.2078, + "step": 99360 + }, + { + "epoch": 13.42, + "learning_rate": 8.161748456620154e-06, + "loss": 2.2194, + "step": 99370 + }, + { + "epoch": 13.42, + "learning_rate": 8.147948046563451e-06, + "loss": 2.2261, + "step": 99380 + }, + { + "epoch": 13.42, + "learning_rate": 8.13415898795598e-06, + "loss": 2.22, + "step": 99390 + }, + { + "epoch": 13.42, + "learning_rate": 8.120381281901156e-06, + "loss": 2.2186, + "step": 99400 + }, + { + "epoch": 13.43, + "learning_rate": 8.106614929501548e-06, + "loss": 2.2172, + "step": 99410 + }, + { + "epoch": 13.43, + "learning_rate": 8.092859931858741e-06, + "loss": 2.2189, + "step": 99420 + }, + { + "epoch": 13.43, + "learning_rate": 8.07911629007354e-06, + "loss": 2.2094, + "step": 99430 + }, + { + "epoch": 13.43, + "learning_rate": 8.065384005245645e-06, + "loss": 2.2385, + "step": 99440 + }, + { + "epoch": 13.43, + "learning_rate": 8.051663078474013e-06, + "loss": 2.2044, + "step": 99450 + }, + { + "epoch": 13.43, + "learning_rate": 8.037953510856648e-06, + "loss": 2.2138, + "step": 99460 + }, + { + "epoch": 13.43, + "learning_rate": 8.024255303490623e-06, + "loss": 2.203, + "step": 99470 + }, + { + "epoch": 13.44, + "learning_rate": 8.01056845747211e-06, + "loss": 2.1965, + "step": 99480 + }, + { + "epoch": 13.44, + "learning_rate": 7.996892973896336e-06, + "loss": 2.2139, + "step": 99490 + }, + { + "epoch": 13.44, + "learning_rate": 7.983228853857703e-06, + "loss": 2.2075, + "step": 99500 + }, + { + "epoch": 13.44, + "learning_rate": 7.969576098449659e-06, + "loss": 2.2002, + "step": 99510 + }, + { + "epoch": 13.44, + "learning_rate": 7.955934708764694e-06, + "loss": 2.1902, + "step": 99520 + }, + { + "epoch": 13.44, + "learning_rate": 7.942304685894502e-06, + "loss": 2.2314, + "step": 99530 + }, + { + "epoch": 13.44, + "learning_rate": 7.928686030929726e-06, + "loss": 2.1924, + "step": 99540 + }, + { + "epoch": 13.44, + "learning_rate": 7.915078744960245e-06, + "loss": 2.1927, + "step": 99550 + }, + { + "epoch": 13.45, + "learning_rate": 7.901482829074906e-06, + "loss": 2.2137, + "step": 99560 + }, + { + "epoch": 13.45, + "learning_rate": 7.887898284361704e-06, + "loss": 2.1887, + "step": 99570 + }, + { + "epoch": 13.45, + "learning_rate": 7.874325111907754e-06, + "loss": 2.1827, + "step": 99580 + }, + { + "epoch": 13.45, + "learning_rate": 7.860763312799184e-06, + "loss": 2.1719, + "step": 99590 + }, + { + "epoch": 13.45, + "learning_rate": 7.84721288812128e-06, + "loss": 2.215, + "step": 99600 + }, + { + "epoch": 13.45, + "learning_rate": 7.83367383895837e-06, + "loss": 2.2269, + "step": 99610 + }, + { + "epoch": 13.45, + "learning_rate": 7.820146166393892e-06, + "loss": 2.1944, + "step": 99620 + }, + { + "epoch": 13.46, + "learning_rate": 7.806629871510378e-06, + "loss": 2.1946, + "step": 99630 + }, + { + "epoch": 13.46, + "learning_rate": 7.793124955389463e-06, + "loss": 2.2048, + "step": 99640 + }, + { + "epoch": 13.46, + "learning_rate": 7.779631419111832e-06, + "loss": 2.1786, + "step": 99650 + }, + { + "epoch": 13.46, + "learning_rate": 7.766149263757255e-06, + "loss": 2.2084, + "step": 99660 + }, + { + "epoch": 13.46, + "learning_rate": 7.75267849040465e-06, + "loss": 2.1908, + "step": 99670 + }, + { + "epoch": 13.46, + "learning_rate": 7.739219100132043e-06, + "loss": 2.2126, + "step": 99680 + }, + { + "epoch": 13.46, + "learning_rate": 7.725771094016387e-06, + "loss": 2.2214, + "step": 99690 + }, + { + "epoch": 13.47, + "learning_rate": 7.712334473133902e-06, + "loss": 2.2415, + "step": 99700 + }, + { + "epoch": 13.47, + "learning_rate": 7.698909238559797e-06, + "loss": 2.2259, + "step": 99710 + }, + { + "epoch": 13.47, + "learning_rate": 7.685495391368445e-06, + "loss": 2.2036, + "step": 99720 + }, + { + "epoch": 13.47, + "learning_rate": 7.672092932633223e-06, + "loss": 2.1904, + "step": 99730 + }, + { + "epoch": 13.47, + "learning_rate": 7.658701863426653e-06, + "loss": 2.193, + "step": 99740 + }, + { + "epoch": 13.47, + "learning_rate": 7.645322184820329e-06, + "loss": 2.2005, + "step": 99750 + }, + { + "epoch": 13.47, + "learning_rate": 7.631953897884962e-06, + "loss": 2.2124, + "step": 99760 + }, + { + "epoch": 13.47, + "learning_rate": 7.618597003690263e-06, + "loss": 2.21, + "step": 99770 + }, + { + "epoch": 13.48, + "learning_rate": 7.6052515033051595e-06, + "loss": 2.218, + "step": 99780 + }, + { + "epoch": 13.48, + "learning_rate": 7.591917397797531e-06, + "loss": 2.2163, + "step": 99790 + }, + { + "epoch": 13.48, + "learning_rate": 7.578594688234491e-06, + "loss": 2.1915, + "step": 99800 + }, + { + "epoch": 13.48, + "learning_rate": 7.565283375682135e-06, + "loss": 2.2252, + "step": 99810 + }, + { + "epoch": 13.48, + "learning_rate": 7.55198346120563e-06, + "loss": 2.1862, + "step": 99820 + }, + { + "epoch": 13.48, + "learning_rate": 7.538694945869356e-06, + "loss": 2.1918, + "step": 99830 + }, + { + "epoch": 13.48, + "learning_rate": 7.526745029210468e-06, + "loss": 2.2057, + "step": 99840 + }, + { + "epoch": 13.49, + "learning_rate": 7.513478175169419e-06, + "loss": 2.1799, + "step": 99850 + }, + { + "epoch": 13.49, + "learning_rate": 7.500222723349886e-06, + "loss": 2.1973, + "step": 99860 + }, + { + "epoch": 13.49, + "learning_rate": 7.4869786748126044e-06, + "loss": 2.2012, + "step": 99870 + }, + { + "epoch": 13.49, + "learning_rate": 7.473746030617378e-06, + "loss": 2.2168, + "step": 99880 + }, + { + "epoch": 13.49, + "learning_rate": 7.460524791823174e-06, + "loss": 2.2098, + "step": 99890 + }, + { + "epoch": 13.49, + "learning_rate": 7.447314959487966e-06, + "loss": 2.2106, + "step": 99900 + }, + { + "epoch": 13.49, + "learning_rate": 7.434116534668838e-06, + "loss": 2.2046, + "step": 99910 + }, + { + "epoch": 13.49, + "learning_rate": 7.420929518422014e-06, + "loss": 2.1923, + "step": 99920 + }, + { + "epoch": 13.5, + "learning_rate": 7.4077539118027e-06, + "loss": 2.1912, + "step": 99930 + }, + { + "epoch": 13.5, + "learning_rate": 7.394589715865317e-06, + "loss": 2.1908, + "step": 99940 + }, + { + "epoch": 13.5, + "learning_rate": 7.381436931663259e-06, + "loss": 2.2282, + "step": 99950 + }, + { + "epoch": 13.5, + "learning_rate": 7.368295560249049e-06, + "loss": 2.2188, + "step": 99960 + }, + { + "epoch": 13.5, + "learning_rate": 7.355165602674329e-06, + "loss": 2.23, + "step": 99970 + }, + { + "epoch": 13.5, + "learning_rate": 7.342047059989792e-06, + "loss": 2.1958, + "step": 99980 + }, + { + "epoch": 13.5, + "learning_rate": 7.3289399332452145e-06, + "loss": 2.2049, + "step": 99990 + }, + { + "epoch": 13.51, + "learning_rate": 7.315844223489459e-06, + "loss": 2.1868, + "step": 100000 + }, + { + "epoch": 13.51, + "eval_loss": 2.402578592300415, + "eval_runtime": 1269.336, + "eval_samples_per_second": 59.98, + "eval_steps_per_second": 4.999, + "step": 100000 + }, + { + "epoch": 13.51, + "learning_rate": 7.302759931770502e-06, + "loss": 2.2089, + "step": 100010 + }, + { + "epoch": 13.51, + "learning_rate": 7.289687059135441e-06, + "loss": 2.1781, + "step": 100020 + }, + { + "epoch": 13.51, + "learning_rate": 7.276625606630304e-06, + "loss": 2.2477, + "step": 100030 + }, + { + "epoch": 13.51, + "learning_rate": 7.26357557530039e-06, + "loss": 2.1791, + "step": 100040 + }, + { + "epoch": 13.51, + "learning_rate": 7.250536966189962e-06, + "loss": 2.1935, + "step": 100050 + }, + { + "epoch": 13.51, + "learning_rate": 7.237509780342438e-06, + "loss": 2.1836, + "step": 100060 + }, + { + "epoch": 13.52, + "learning_rate": 7.224494018800297e-06, + "loss": 2.2214, + "step": 100070 + }, + { + "epoch": 13.52, + "learning_rate": 7.211489682605059e-06, + "loss": 2.215, + "step": 100080 + }, + { + "epoch": 13.52, + "learning_rate": 7.198496772797407e-06, + "loss": 2.2051, + "step": 100090 + }, + { + "epoch": 13.52, + "learning_rate": 7.185515290417077e-06, + "loss": 2.1906, + "step": 100100 + }, + { + "epoch": 13.52, + "learning_rate": 7.172545236502869e-06, + "loss": 2.2039, + "step": 100110 + }, + { + "epoch": 13.52, + "learning_rate": 7.1595866120927045e-06, + "loss": 2.2314, + "step": 100120 + }, + { + "epoch": 13.52, + "learning_rate": 7.146639418223554e-06, + "loss": 2.2213, + "step": 100130 + }, + { + "epoch": 13.52, + "learning_rate": 7.133703655931522e-06, + "loss": 2.2124, + "step": 100140 + }, + { + "epoch": 13.53, + "learning_rate": 7.120779326251747e-06, + "loss": 2.2092, + "step": 100150 + }, + { + "epoch": 13.53, + "learning_rate": 7.107866430218467e-06, + "loss": 2.1987, + "step": 100160 + }, + { + "epoch": 13.53, + "learning_rate": 7.094964968865058e-06, + "loss": 2.1872, + "step": 100170 + }, + { + "epoch": 13.53, + "learning_rate": 7.082074943223876e-06, + "loss": 2.2151, + "step": 100180 + }, + { + "epoch": 13.53, + "learning_rate": 7.069196354326479e-06, + "loss": 2.1733, + "step": 100190 + }, + { + "epoch": 13.53, + "learning_rate": 7.056329203203376e-06, + "loss": 2.2054, + "step": 100200 + }, + { + "epoch": 13.53, + "learning_rate": 7.043473490884294e-06, + "loss": 2.195, + "step": 100210 + }, + { + "epoch": 13.54, + "learning_rate": 7.030629218398026e-06, + "loss": 2.2032, + "step": 100220 + }, + { + "epoch": 13.54, + "learning_rate": 7.017796386772317e-06, + "loss": 2.2249, + "step": 100230 + }, + { + "epoch": 13.54, + "learning_rate": 7.004974997034163e-06, + "loss": 2.1902, + "step": 100240 + }, + { + "epoch": 13.54, + "learning_rate": 6.992165050209508e-06, + "loss": 2.2267, + "step": 100250 + }, + { + "epoch": 13.54, + "learning_rate": 6.979366547323517e-06, + "loss": 2.1868, + "step": 100260 + }, + { + "epoch": 13.54, + "learning_rate": 6.9665794894003204e-06, + "loss": 2.1873, + "step": 100270 + }, + { + "epoch": 13.54, + "learning_rate": 6.953803877463182e-06, + "loss": 2.1794, + "step": 100280 + }, + { + "epoch": 13.54, + "learning_rate": 6.941039712534469e-06, + "loss": 2.2042, + "step": 100290 + }, + { + "epoch": 13.55, + "learning_rate": 6.928286995635579e-06, + "loss": 2.2281, + "step": 100300 + }, + { + "epoch": 13.55, + "learning_rate": 6.915545727787047e-06, + "loss": 2.1981, + "step": 100310 + }, + { + "epoch": 13.55, + "learning_rate": 6.902815910008474e-06, + "loss": 2.2023, + "step": 100320 + }, + { + "epoch": 13.55, + "learning_rate": 6.890097543318513e-06, + "loss": 2.2254, + "step": 100330 + }, + { + "epoch": 13.55, + "learning_rate": 6.877390628734964e-06, + "loss": 2.1974, + "step": 100340 + }, + { + "epoch": 13.55, + "learning_rate": 6.8646951672746645e-06, + "loss": 2.1984, + "step": 100350 + }, + { + "epoch": 13.55, + "learning_rate": 6.852011159953502e-06, + "loss": 2.2353, + "step": 100360 + }, + { + "epoch": 13.56, + "learning_rate": 6.839338607786549e-06, + "loss": 2.2099, + "step": 100370 + }, + { + "epoch": 13.56, + "learning_rate": 6.826677511787859e-06, + "loss": 2.191, + "step": 100380 + }, + { + "epoch": 13.56, + "learning_rate": 6.81402787297069e-06, + "loss": 2.215, + "step": 100390 + }, + { + "epoch": 13.56, + "learning_rate": 6.801389692347181e-06, + "loss": 2.2121, + "step": 100400 + }, + { + "epoch": 13.56, + "learning_rate": 6.788762970928774e-06, + "loss": 2.2293, + "step": 100410 + }, + { + "epoch": 13.56, + "learning_rate": 6.776147709725876e-06, + "loss": 2.1984, + "step": 100420 + }, + { + "epoch": 13.56, + "learning_rate": 6.763543909747998e-06, + "loss": 2.1935, + "step": 100430 + }, + { + "epoch": 13.57, + "learning_rate": 6.750951572003732e-06, + "loss": 2.1866, + "step": 100440 + }, + { + "epoch": 13.57, + "learning_rate": 6.738370697500755e-06, + "loss": 2.1829, + "step": 100450 + }, + { + "epoch": 13.57, + "learning_rate": 6.725801287245846e-06, + "loss": 2.2139, + "step": 100460 + }, + { + "epoch": 13.57, + "learning_rate": 6.713243342244834e-06, + "loss": 2.185, + "step": 100470 + }, + { + "epoch": 13.57, + "learning_rate": 6.7006968635026135e-06, + "loss": 2.213, + "step": 100480 + }, + { + "epoch": 13.57, + "learning_rate": 6.688161852023266e-06, + "loss": 2.2146, + "step": 100490 + }, + { + "epoch": 13.57, + "learning_rate": 6.6756383088098056e-06, + "loss": 2.198, + "step": 100500 + }, + { + "epoch": 13.57, + "learning_rate": 6.66312623486448e-06, + "loss": 2.2188, + "step": 100510 + }, + { + "epoch": 13.58, + "learning_rate": 6.650625631188505e-06, + "loss": 2.1988, + "step": 100520 + }, + { + "epoch": 13.58, + "learning_rate": 6.6381364987821986e-06, + "loss": 2.1983, + "step": 100530 + }, + { + "epoch": 13.58, + "learning_rate": 6.625658838645026e-06, + "loss": 2.2031, + "step": 100540 + }, + { + "epoch": 13.58, + "learning_rate": 6.613192651775456e-06, + "loss": 2.1922, + "step": 100550 + }, + { + "epoch": 13.58, + "learning_rate": 6.600737939171074e-06, + "loss": 2.2062, + "step": 100560 + }, + { + "epoch": 13.58, + "learning_rate": 6.588294701828584e-06, + "loss": 2.1853, + "step": 100570 + }, + { + "epoch": 13.58, + "learning_rate": 6.57586294074367e-06, + "loss": 2.1875, + "step": 100580 + }, + { + "epoch": 13.59, + "learning_rate": 6.563442656911238e-06, + "loss": 2.2224, + "step": 100590 + }, + { + "epoch": 13.59, + "learning_rate": 6.5510338513251104e-06, + "loss": 2.1992, + "step": 100600 + }, + { + "epoch": 13.59, + "learning_rate": 6.538636524978324e-06, + "loss": 2.1961, + "step": 100610 + }, + { + "epoch": 13.59, + "learning_rate": 6.526250678862987e-06, + "loss": 2.1907, + "step": 100620 + }, + { + "epoch": 13.59, + "learning_rate": 6.513876313970206e-06, + "loss": 2.189, + "step": 100630 + }, + { + "epoch": 13.59, + "learning_rate": 6.501513431290223e-06, + "loss": 2.2031, + "step": 100640 + }, + { + "epoch": 13.59, + "learning_rate": 6.489162031812345e-06, + "loss": 2.2186, + "step": 100650 + }, + { + "epoch": 13.59, + "learning_rate": 6.476822116525015e-06, + "loss": 2.1978, + "step": 100660 + }, + { + "epoch": 13.6, + "learning_rate": 6.464493686415678e-06, + "loss": 2.2134, + "step": 100670 + }, + { + "epoch": 13.6, + "learning_rate": 6.452176742470877e-06, + "loss": 2.1969, + "step": 100680 + }, + { + "epoch": 13.6, + "learning_rate": 6.4398712856762905e-06, + "loss": 2.1947, + "step": 100690 + }, + { + "epoch": 13.6, + "learning_rate": 6.4275773170166136e-06, + "loss": 2.1936, + "step": 100700 + }, + { + "epoch": 13.6, + "learning_rate": 6.415294837475676e-06, + "loss": 2.1839, + "step": 100710 + }, + { + "epoch": 13.6, + "learning_rate": 6.403023848036342e-06, + "loss": 2.2028, + "step": 100720 + }, + { + "epoch": 13.6, + "learning_rate": 6.3907643496805605e-06, + "loss": 2.2015, + "step": 100730 + }, + { + "epoch": 13.61, + "learning_rate": 6.378516343389411e-06, + "loss": 2.2001, + "step": 100740 + }, + { + "epoch": 13.61, + "learning_rate": 6.3662798301429955e-06, + "loss": 2.2124, + "step": 100750 + }, + { + "epoch": 13.61, + "learning_rate": 6.3540548109204945e-06, + "loss": 2.1924, + "step": 100760 + }, + { + "epoch": 13.61, + "learning_rate": 6.341841286700245e-06, + "loss": 2.2121, + "step": 100770 + }, + { + "epoch": 13.61, + "learning_rate": 6.329639258459579e-06, + "loss": 2.2218, + "step": 100780 + }, + { + "epoch": 13.61, + "learning_rate": 6.3174487271749675e-06, + "loss": 2.1911, + "step": 100790 + }, + { + "epoch": 13.61, + "learning_rate": 6.305269693821879e-06, + "loss": 2.2245, + "step": 100800 + }, + { + "epoch": 13.62, + "learning_rate": 6.293102159374969e-06, + "loss": 2.2129, + "step": 100810 + }, + { + "epoch": 13.62, + "learning_rate": 6.280946124807923e-06, + "loss": 2.173, + "step": 100820 + }, + { + "epoch": 13.62, + "learning_rate": 6.268801591093481e-06, + "loss": 2.1948, + "step": 100830 + }, + { + "epoch": 13.62, + "learning_rate": 6.2566685592034985e-06, + "loss": 2.2074, + "step": 100840 + }, + { + "epoch": 13.62, + "learning_rate": 6.244547030108899e-06, + "loss": 2.1877, + "step": 100850 + }, + { + "epoch": 13.62, + "learning_rate": 6.2324370047796725e-06, + "loss": 2.1952, + "step": 100860 + }, + { + "epoch": 13.62, + "learning_rate": 6.220338484184928e-06, + "loss": 2.2106, + "step": 100870 + }, + { + "epoch": 13.62, + "learning_rate": 6.208251469292791e-06, + "loss": 2.2037, + "step": 100880 + }, + { + "epoch": 13.63, + "learning_rate": 6.196175961070554e-06, + "loss": 2.2145, + "step": 100890 + }, + { + "epoch": 13.63, + "learning_rate": 6.184111960484478e-06, + "loss": 2.2098, + "step": 100900 + }, + { + "epoch": 13.63, + "learning_rate": 6.172059468500007e-06, + "loss": 2.1998, + "step": 100910 + }, + { + "epoch": 13.63, + "learning_rate": 6.160018486081603e-06, + "loss": 2.2052, + "step": 100920 + }, + { + "epoch": 13.63, + "learning_rate": 6.147989014192811e-06, + "loss": 2.2069, + "step": 100930 + }, + { + "epoch": 13.63, + "learning_rate": 6.135971053796296e-06, + "loss": 2.2099, + "step": 100940 + }, + { + "epoch": 13.63, + "learning_rate": 6.123964605853754e-06, + "loss": 2.2023, + "step": 100950 + }, + { + "epoch": 13.64, + "learning_rate": 6.111969671325967e-06, + "loss": 2.1889, + "step": 100960 + }, + { + "epoch": 13.64, + "learning_rate": 6.09998625117285e-06, + "loss": 2.1995, + "step": 100970 + }, + { + "epoch": 13.64, + "learning_rate": 6.0880143463532874e-06, + "loss": 2.218, + "step": 100980 + }, + { + "epoch": 13.64, + "learning_rate": 6.076053957825411e-06, + "loss": 2.2252, + "step": 100990 + }, + { + "epoch": 13.64, + "learning_rate": 6.064105086546223e-06, + "loss": 2.1972, + "step": 101000 + }, + { + "epoch": 13.64, + "eval_loss": 2.4018149375915527, + "eval_runtime": 1270.4303, + "eval_samples_per_second": 59.929, + "eval_steps_per_second": 4.994, + "step": 101000 } ], "logging_steps": 10, "max_steps": 111060, "num_train_epochs": 15, "save_steps": 1000, - "total_flos": 1.5657271333748736e+19, + "total_flos": 3.1792627288375296e+19, "trial_name": null, "trial_params": null }