diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" deleted file mode 100644--- "a/last-checkpoint/trainer_state.json" +++ /dev/null @@ -1,11812 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 14.0, - "global_step": 972622, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 5.140331037318804e-06, - "loss": 9.66, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.0280662074637608e-05, - "loss": 7.9578, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.542099311195641e-05, - "loss": 6.7356, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 2.0561324149275216e-05, - "loss": 6.3535, - "step": 2000 - }, - { - "epoch": 0.04, - "learning_rate": 2.570165518659402e-05, - "loss": 6.0716, - "step": 2500 - }, - { - "epoch": 0.04, - "learning_rate": 3.084198622391282e-05, - "loss": 5.7806, - "step": 3000 - }, - { - "epoch": 0.05, - "learning_rate": 3.5982317261231625e-05, - "loss": 5.3296, - "step": 3500 - }, - { - "epoch": 0.06, - "learning_rate": 4.112264829855043e-05, - "loss": 4.8984, - "step": 4000 - }, - { - "epoch": 0.06, - "learning_rate": 4.6262979335869235e-05, - "loss": 4.5987, - "step": 4500 - }, - { - "epoch": 0.07, - "learning_rate": 5.140331037318804e-05, - "loss": 4.3705, - "step": 5000 - }, - { - "epoch": 0.08, - "learning_rate": 5.6543641410506844e-05, - "loss": 4.1875, - "step": 5500 - }, - { - "epoch": 0.09, - "learning_rate": 6.168397244782565e-05, - "loss": 4.0215, - "step": 6000 - }, - { - "epoch": 0.09, - "learning_rate": 6.682430348514444e-05, - "loss": 3.8753, - "step": 6500 - }, - { - "epoch": 0.1, - "learning_rate": 7.196463452246325e-05, - "loss": 3.7559, - "step": 7000 - }, - { - "epoch": 0.11, - "learning_rate": 7.710496555978204e-05, - "loss": 3.6464, - "step": 7500 - }, - { - "epoch": 0.12, - "learning_rate": 8.224529659710087e-05, - "loss": 3.5461, - "step": 8000 - }, - { - "epoch": 0.12, - "learning_rate": 8.738562763441966e-05, - "loss": 3.4592, - "step": 8500 - }, - { - "epoch": 0.13, - "learning_rate": 9.252595867173847e-05, - "loss": 3.3816, - "step": 9000 - }, - { - "epoch": 0.14, - "learning_rate": 9.766628970905726e-05, - "loss": 3.3121, - "step": 9500 - }, - { - "epoch": 0.14, - "learning_rate": 9.997164799900302e-05, - "loss": 3.2467, - "step": 10000 - }, - { - "epoch": 0.15, - "learning_rate": 9.991972125725027e-05, - "loss": 3.1821, - "step": 10500 - }, - { - "epoch": 0.16, - "learning_rate": 9.986779451549754e-05, - "loss": 3.1286, - "step": 11000 - }, - { - "epoch": 0.17, - "learning_rate": 9.981586777374481e-05, - "loss": 3.0801, - "step": 11500 - }, - { - "epoch": 0.17, - "learning_rate": 9.976394103199208e-05, - "loss": 3.0316, - "step": 12000 - }, - { - "epoch": 0.18, - "learning_rate": 9.971211814372284e-05, - "loss": 2.9902, - "step": 12500 - }, - { - "epoch": 0.19, - "learning_rate": 9.966019140197011e-05, - "loss": 2.9502, - "step": 13000 - }, - { - "epoch": 0.19, - "learning_rate": 9.960826466021737e-05, - "loss": 2.9131, - "step": 13500 - }, - { - "epoch": 0.2, - "learning_rate": 9.955633791846463e-05, - "loss": 2.8816, - "step": 14000 - }, - { - "epoch": 0.21, - "learning_rate": 9.95045150301954e-05, - "loss": 2.8481, - "step": 14500 - }, - { - "epoch": 0.22, - "learning_rate": 9.945258828844267e-05, - "loss": 2.8154, - "step": 15000 - }, - { - "epoch": 0.22, - "learning_rate": 9.940066154668994e-05, - "loss": 2.7873, - "step": 15500 - }, - { - "epoch": 0.23, - "learning_rate": 9.93487348049372e-05, - "loss": 2.7658, - "step": 16000 - }, - { - "epoch": 0.24, - "learning_rate": 9.929691191666797e-05, - "loss": 2.7377, - "step": 16500 - }, - { - "epoch": 0.24, - "learning_rate": 9.924498517491524e-05, - "loss": 2.7145, - "step": 17000 - }, - { - "epoch": 0.25, - "learning_rate": 9.91930584331625e-05, - "loss": 2.6943, - "step": 17500 - }, - { - "epoch": 0.26, - "learning_rate": 9.914113169140976e-05, - "loss": 2.6711, - "step": 18000 - }, - { - "epoch": 0.27, - "learning_rate": 9.908930880314053e-05, - "loss": 2.6557, - "step": 18500 - }, - { - "epoch": 0.27, - "learning_rate": 9.90373820613878e-05, - "loss": 2.6369, - "step": 19000 - }, - { - "epoch": 0.28, - "learning_rate": 9.898545531963507e-05, - "loss": 2.6161, - "step": 19500 - }, - { - "epoch": 0.29, - "learning_rate": 9.893352857788233e-05, - "loss": 2.6013, - "step": 20000 - }, - { - "epoch": 0.3, - "learning_rate": 9.88817056896131e-05, - "loss": 2.5856, - "step": 20500 - }, - { - "epoch": 0.3, - "learning_rate": 9.882977894786037e-05, - "loss": 2.5747, - "step": 21000 - }, - { - "epoch": 0.31, - "learning_rate": 9.877785220610762e-05, - "loss": 2.5553, - "step": 21500 - }, - { - "epoch": 0.32, - "learning_rate": 9.872592546435489e-05, - "loss": 2.5415, - "step": 22000 - }, - { - "epoch": 0.32, - "learning_rate": 9.867410257608566e-05, - "loss": 2.5352, - "step": 22500 - }, - { - "epoch": 0.33, - "learning_rate": 9.862217583433293e-05, - "loss": 2.5192, - "step": 23000 - }, - { - "epoch": 0.34, - "learning_rate": 9.85702490925802e-05, - "loss": 2.509, - "step": 23500 - }, - { - "epoch": 0.35, - "learning_rate": 9.851832235082745e-05, - "loss": 2.4963, - "step": 24000 - }, - { - "epoch": 0.35, - "learning_rate": 9.846649946255823e-05, - "loss": 2.4866, - "step": 24500 - }, - { - "epoch": 0.36, - "learning_rate": 9.841457272080548e-05, - "loss": 2.4738, - "step": 25000 - }, - { - "epoch": 0.37, - "learning_rate": 9.836264597905277e-05, - "loss": 2.4613, - "step": 25500 - }, - { - "epoch": 0.37, - "learning_rate": 9.831071923730002e-05, - "loss": 2.4566, - "step": 26000 - }, - { - "epoch": 0.38, - "learning_rate": 9.82588963490308e-05, - "loss": 2.4492, - "step": 26500 - }, - { - "epoch": 0.39, - "learning_rate": 9.820696960727805e-05, - "loss": 2.4387, - "step": 27000 - }, - { - "epoch": 0.4, - "learning_rate": 9.815504286552532e-05, - "loss": 2.431, - "step": 27500 - }, - { - "epoch": 0.4, - "learning_rate": 9.810311612377259e-05, - "loss": 2.4203, - "step": 28000 - }, - { - "epoch": 0.41, - "learning_rate": 9.805129323550336e-05, - "loss": 2.4134, - "step": 28500 - }, - { - "epoch": 0.42, - "learning_rate": 9.799936649375063e-05, - "loss": 2.4058, - "step": 29000 - }, - { - "epoch": 0.42, - "learning_rate": 9.794743975199788e-05, - "loss": 2.3948, - "step": 29500 - }, - { - "epoch": 0.43, - "learning_rate": 9.789551301024515e-05, - "loss": 2.3866, - "step": 30000 - }, - { - "epoch": 0.44, - "learning_rate": 9.784358626849242e-05, - "loss": 2.3806, - "step": 30500 - }, - { - "epoch": 0.45, - "learning_rate": 9.779165952673967e-05, - "loss": 2.3738, - "step": 31000 - }, - { - "epoch": 0.45, - "learning_rate": 9.773973278498695e-05, - "loss": 2.3675, - "step": 31500 - }, - { - "epoch": 0.46, - "learning_rate": 9.76879098967177e-05, - "loss": 2.3633, - "step": 32000 - }, - { - "epoch": 0.47, - "learning_rate": 9.763598315496499e-05, - "loss": 2.356, - "step": 32500 - }, - { - "epoch": 0.48, - "learning_rate": 9.758405641321224e-05, - "loss": 2.3454, - "step": 33000 - }, - { - "epoch": 0.48, - "learning_rate": 9.753212967145951e-05, - "loss": 2.3464, - "step": 33500 - }, - { - "epoch": 0.49, - "learning_rate": 9.748030678319028e-05, - "loss": 2.3384, - "step": 34000 - }, - { - "epoch": 0.5, - "learning_rate": 9.742838004143755e-05, - "loss": 2.3314, - "step": 34500 - }, - { - "epoch": 0.5, - "learning_rate": 9.737645329968481e-05, - "loss": 2.3223, - "step": 35000 - }, - { - "epoch": 0.51, - "learning_rate": 9.732452655793207e-05, - "loss": 2.3201, - "step": 35500 - }, - { - "epoch": 0.52, - "learning_rate": 9.727270366966285e-05, - "loss": 2.3198, - "step": 36000 - }, - { - "epoch": 0.53, - "learning_rate": 9.72207769279101e-05, - "loss": 2.3051, - "step": 36500 - }, - { - "epoch": 0.53, - "learning_rate": 9.716885018615738e-05, - "loss": 2.3041, - "step": 37000 - }, - { - "epoch": 0.54, - "learning_rate": 9.711692344440464e-05, - "loss": 2.3034, - "step": 37500 - }, - { - "epoch": 0.55, - "learning_rate": 9.70649967026519e-05, - "loss": 2.2964, - "step": 38000 - }, - { - "epoch": 0.55, - "learning_rate": 9.701317381438267e-05, - "loss": 2.2894, - "step": 38500 - }, - { - "epoch": 0.56, - "learning_rate": 9.696124707262993e-05, - "loss": 2.2769, - "step": 39000 - }, - { - "epoch": 0.57, - "learning_rate": 9.690932033087721e-05, - "loss": 2.2809, - "step": 39500 - }, - { - "epoch": 0.58, - "learning_rate": 9.685739358912447e-05, - "loss": 2.2743, - "step": 40000 - }, - { - "epoch": 0.58, - "learning_rate": 9.680557070085525e-05, - "loss": 2.2699, - "step": 40500 - }, - { - "epoch": 0.59, - "learning_rate": 9.67536439591025e-05, - "loss": 2.2668, - "step": 41000 - }, - { - "epoch": 0.6, - "learning_rate": 9.670171721734977e-05, - "loss": 2.2635, - "step": 41500 - }, - { - "epoch": 0.6, - "learning_rate": 9.664979047559704e-05, - "loss": 2.2637, - "step": 42000 - }, - { - "epoch": 0.61, - "learning_rate": 9.65979675873278e-05, - "loss": 2.2584, - "step": 42500 - }, - { - "epoch": 0.62, - "learning_rate": 9.654604084557507e-05, - "loss": 2.2535, - "step": 43000 - }, - { - "epoch": 0.63, - "learning_rate": 9.649411410382233e-05, - "loss": 2.2462, - "step": 43500 - }, - { - "epoch": 0.63, - "learning_rate": 9.64421873620696e-05, - "loss": 2.2427, - "step": 44000 - }, - { - "epoch": 0.64, - "learning_rate": 9.639036447380036e-05, - "loss": 2.242, - "step": 44500 - }, - { - "epoch": 0.65, - "learning_rate": 9.633843773204764e-05, - "loss": 2.2341, - "step": 45000 - }, - { - "epoch": 0.65, - "learning_rate": 9.62865109902949e-05, - "loss": 2.2321, - "step": 45500 - }, - { - "epoch": 0.66, - "learning_rate": 9.623458424854217e-05, - "loss": 2.2311, - "step": 46000 - }, - { - "epoch": 0.67, - "learning_rate": 9.618265750678943e-05, - "loss": 2.2242, - "step": 46500 - }, - { - "epoch": 0.68, - "learning_rate": 9.61308346185202e-05, - "loss": 2.2206, - "step": 47000 - }, - { - "epoch": 0.68, - "learning_rate": 9.607890787676747e-05, - "loss": 2.2164, - "step": 47500 - }, - { - "epoch": 0.69, - "learning_rate": 9.602698113501472e-05, - "loss": 2.217, - "step": 48000 - }, - { - "epoch": 0.7, - "learning_rate": 9.597505439326199e-05, - "loss": 2.2144, - "step": 48500 - }, - { - "epoch": 0.71, - "learning_rate": 9.592323150499276e-05, - "loss": 2.211, - "step": 49000 - }, - { - "epoch": 0.71, - "learning_rate": 9.587130476324003e-05, - "loss": 2.2056, - "step": 49500 - }, - { - "epoch": 0.72, - "learning_rate": 9.58193780214873e-05, - "loss": 2.2013, - "step": 50000 - }, - { - "epoch": 0.73, - "learning_rate": 9.576745127973455e-05, - "loss": 2.2074, - "step": 50500 - }, - { - "epoch": 0.73, - "learning_rate": 9.571562839146533e-05, - "loss": 2.1932, - "step": 51000 - }, - { - "epoch": 0.74, - "learning_rate": 9.566370164971258e-05, - "loss": 2.1911, - "step": 51500 - }, - { - "epoch": 0.75, - "learning_rate": 9.561177490795985e-05, - "loss": 2.1942, - "step": 52000 - }, - { - "epoch": 0.76, - "learning_rate": 9.555984816620712e-05, - "loss": 2.1868, - "step": 52500 - }, - { - "epoch": 0.76, - "learning_rate": 9.550792142445439e-05, - "loss": 2.1795, - "step": 53000 - }, - { - "epoch": 0.77, - "learning_rate": 9.545609853618515e-05, - "loss": 2.187, - "step": 53500 - }, - { - "epoch": 0.78, - "learning_rate": 9.540417179443242e-05, - "loss": 2.1752, - "step": 54000 - }, - { - "epoch": 0.78, - "learning_rate": 9.535224505267969e-05, - "loss": 2.1789, - "step": 54500 - }, - { - "epoch": 0.79, - "learning_rate": 9.530031831092695e-05, - "loss": 2.1755, - "step": 55000 - }, - { - "epoch": 0.8, - "learning_rate": 9.524849542265773e-05, - "loss": 2.1723, - "step": 55500 - }, - { - "epoch": 0.81, - "learning_rate": 9.519656868090498e-05, - "loss": 2.1722, - "step": 56000 - }, - { - "epoch": 0.81, - "learning_rate": 9.514464193915225e-05, - "loss": 2.166, - "step": 56500 - }, - { - "epoch": 0.82, - "learning_rate": 9.509271519739952e-05, - "loss": 2.1679, - "step": 57000 - }, - { - "epoch": 0.83, - "learning_rate": 9.504089230913028e-05, - "loss": 2.1619, - "step": 57500 - }, - { - "epoch": 0.83, - "learning_rate": 9.498896556737755e-05, - "loss": 2.1595, - "step": 58000 - }, - { - "epoch": 0.84, - "learning_rate": 9.493703882562482e-05, - "loss": 2.1557, - "step": 58500 - }, - { - "epoch": 0.85, - "learning_rate": 9.488511208387207e-05, - "loss": 2.1567, - "step": 59000 - }, - { - "epoch": 0.86, - "learning_rate": 9.483328919560285e-05, - "loss": 2.1504, - "step": 59500 - }, - { - "epoch": 0.86, - "learning_rate": 9.478136245385011e-05, - "loss": 2.1459, - "step": 60000 - }, - { - "epoch": 0.87, - "learning_rate": 9.472943571209738e-05, - "loss": 2.1499, - "step": 60500 - }, - { - "epoch": 0.88, - "learning_rate": 9.467750897034465e-05, - "loss": 2.146, - "step": 61000 - }, - { - "epoch": 0.89, - "learning_rate": 9.462568608207541e-05, - "loss": 2.1437, - "step": 61500 - }, - { - "epoch": 0.89, - "learning_rate": 9.457375934032268e-05, - "loss": 2.1437, - "step": 62000 - }, - { - "epoch": 0.9, - "learning_rate": 9.452183259856993e-05, - "loss": 2.1415, - "step": 62500 - }, - { - "epoch": 0.91, - "learning_rate": 9.44699058568172e-05, - "loss": 2.1406, - "step": 63000 - }, - { - "epoch": 0.91, - "learning_rate": 9.441808296854797e-05, - "loss": 2.1334, - "step": 63500 - }, - { - "epoch": 0.92, - "learning_rate": 9.436615622679524e-05, - "loss": 2.1327, - "step": 64000 - }, - { - "epoch": 0.93, - "learning_rate": 9.43142294850425e-05, - "loss": 2.1297, - "step": 64500 - }, - { - "epoch": 0.94, - "learning_rate": 9.426230274328977e-05, - "loss": 2.1275, - "step": 65000 - }, - { - "epoch": 0.94, - "learning_rate": 9.421037600153704e-05, - "loss": 2.1317, - "step": 65500 - }, - { - "epoch": 0.95, - "learning_rate": 9.415855311326781e-05, - "loss": 2.1305, - "step": 66000 - }, - { - "epoch": 0.96, - "learning_rate": 9.410662637151508e-05, - "loss": 2.1229, - "step": 66500 - }, - { - "epoch": 0.96, - "learning_rate": 9.405469962976233e-05, - "loss": 2.1212, - "step": 67000 - }, - { - "epoch": 0.97, - "learning_rate": 9.40027728880096e-05, - "loss": 2.1172, - "step": 67500 - }, - { - "epoch": 0.98, - "learning_rate": 9.395094999974037e-05, - "loss": 2.1139, - "step": 68000 - }, - { - "epoch": 0.99, - "learning_rate": 9.389902325798763e-05, - "loss": 2.1156, - "step": 68500 - }, - { - "epoch": 0.99, - "learning_rate": 9.38470965162349e-05, - "loss": 2.111, - "step": 69000 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.6055819008431793, - "eval_loss": 1.98311185836792, - "eval_runtime": 1276.0691, - "eval_samples_per_second": 422.342, - "eval_steps_per_second": 26.397, - "step": 69473 - }, - { - "epoch": 1.0, - "learning_rate": 9.379516977448216e-05, - "loss": 2.1118, - "step": 69500 - }, - { - "epoch": 1.01, - "learning_rate": 9.374334688621294e-05, - "loss": 2.1085, - "step": 70000 - }, - { - "epoch": 1.01, - "learning_rate": 9.369142014446019e-05, - "loss": 2.1029, - "step": 70500 - }, - { - "epoch": 1.02, - "learning_rate": 9.363949340270747e-05, - "loss": 2.1039, - "step": 71000 - }, - { - "epoch": 1.03, - "learning_rate": 9.358756666095473e-05, - "loss": 2.1002, - "step": 71500 - }, - { - "epoch": 1.04, - "learning_rate": 9.353574377268551e-05, - "loss": 2.1002, - "step": 72000 - }, - { - "epoch": 1.04, - "learning_rate": 9.348381703093276e-05, - "loss": 2.0995, - "step": 72500 - }, - { - "epoch": 1.05, - "learning_rate": 9.343189028918002e-05, - "loss": 2.1002, - "step": 73000 - }, - { - "epoch": 1.06, - "learning_rate": 9.33799635474273e-05, - "loss": 2.0938, - "step": 73500 - }, - { - "epoch": 1.07, - "learning_rate": 9.332814065915807e-05, - "loss": 2.0927, - "step": 74000 - }, - { - "epoch": 1.07, - "learning_rate": 9.327621391740533e-05, - "loss": 2.0887, - "step": 74500 - }, - { - "epoch": 1.08, - "learning_rate": 9.322428717565259e-05, - "loss": 2.0899, - "step": 75000 - }, - { - "epoch": 1.09, - "learning_rate": 9.317236043389986e-05, - "loss": 2.0898, - "step": 75500 - }, - { - "epoch": 1.09, - "learning_rate": 9.312053754563062e-05, - "loss": 2.0864, - "step": 76000 - }, - { - "epoch": 1.1, - "learning_rate": 9.306861080387789e-05, - "loss": 2.0843, - "step": 76500 - }, - { - "epoch": 1.11, - "learning_rate": 9.301668406212516e-05, - "loss": 2.0826, - "step": 77000 - }, - { - "epoch": 1.12, - "learning_rate": 9.296475732037241e-05, - "loss": 2.0859, - "step": 77500 - }, - { - "epoch": 1.12, - "learning_rate": 9.29129344321032e-05, - "loss": 2.0809, - "step": 78000 - }, - { - "epoch": 1.13, - "learning_rate": 9.286100769035045e-05, - "loss": 2.0811, - "step": 78500 - }, - { - "epoch": 1.14, - "learning_rate": 9.280908094859773e-05, - "loss": 2.079, - "step": 79000 - }, - { - "epoch": 1.14, - "learning_rate": 9.275715420684499e-05, - "loss": 2.0733, - "step": 79500 - }, - { - "epoch": 1.15, - "learning_rate": 9.270533131857577e-05, - "loss": 2.0784, - "step": 80000 - }, - { - "epoch": 1.16, - "learning_rate": 9.265340457682302e-05, - "loss": 2.0768, - "step": 80500 - }, - { - "epoch": 1.17, - "learning_rate": 9.260147783507029e-05, - "loss": 2.0749, - "step": 81000 - }, - { - "epoch": 1.17, - "learning_rate": 9.254955109331756e-05, - "loss": 2.0716, - "step": 81500 - }, - { - "epoch": 1.18, - "learning_rate": 9.249772820504832e-05, - "loss": 2.069, - "step": 82000 - }, - { - "epoch": 1.19, - "learning_rate": 9.244580146329559e-05, - "loss": 2.0639, - "step": 82500 - }, - { - "epoch": 1.19, - "learning_rate": 9.239387472154285e-05, - "loss": 2.069, - "step": 83000 - }, - { - "epoch": 1.2, - "learning_rate": 9.234194797979011e-05, - "loss": 2.0648, - "step": 83500 - }, - { - "epoch": 1.21, - "learning_rate": 9.229012509152088e-05, - "loss": 2.0674, - "step": 84000 - }, - { - "epoch": 1.22, - "learning_rate": 9.223819834976816e-05, - "loss": 2.0609, - "step": 84500 - }, - { - "epoch": 1.22, - "learning_rate": 9.218627160801542e-05, - "loss": 2.0592, - "step": 85000 - }, - { - "epoch": 1.23, - "learning_rate": 9.213434486626267e-05, - "loss": 2.058, - "step": 85500 - }, - { - "epoch": 1.24, - "learning_rate": 9.208252197799345e-05, - "loss": 2.0603, - "step": 86000 - }, - { - "epoch": 1.25, - "learning_rate": 9.20305952362407e-05, - "loss": 2.0562, - "step": 86500 - }, - { - "epoch": 1.25, - "learning_rate": 9.197866849448799e-05, - "loss": 2.0566, - "step": 87000 - }, - { - "epoch": 1.26, - "learning_rate": 9.192674175273524e-05, - "loss": 2.0546, - "step": 87500 - }, - { - "epoch": 1.27, - "learning_rate": 9.187491886446602e-05, - "loss": 2.0552, - "step": 88000 - }, - { - "epoch": 1.27, - "learning_rate": 9.182299212271328e-05, - "loss": 2.0493, - "step": 88500 - }, - { - "epoch": 1.28, - "learning_rate": 9.177106538096055e-05, - "loss": 2.0545, - "step": 89000 - }, - { - "epoch": 1.29, - "learning_rate": 9.171913863920781e-05, - "loss": 2.0494, - "step": 89500 - }, - { - "epoch": 1.3, - "learning_rate": 9.166721189745507e-05, - "loss": 2.0496, - "step": 90000 - }, - { - "epoch": 1.3, - "learning_rate": 9.161538900918585e-05, - "loss": 2.0467, - "step": 90500 - }, - { - "epoch": 1.31, - "learning_rate": 9.15634622674331e-05, - "loss": 2.047, - "step": 91000 - }, - { - "epoch": 1.32, - "learning_rate": 9.151153552568037e-05, - "loss": 2.048, - "step": 91500 - }, - { - "epoch": 1.32, - "learning_rate": 9.145960878392764e-05, - "loss": 2.0433, - "step": 92000 - }, - { - "epoch": 1.33, - "learning_rate": 9.14077858956584e-05, - "loss": 2.0442, - "step": 92500 - }, - { - "epoch": 1.34, - "learning_rate": 9.135585915390567e-05, - "loss": 2.0474, - "step": 93000 - }, - { - "epoch": 1.35, - "learning_rate": 9.130393241215294e-05, - "loss": 2.0364, - "step": 93500 - }, - { - "epoch": 1.35, - "learning_rate": 9.125200567040021e-05, - "loss": 2.0393, - "step": 94000 - }, - { - "epoch": 1.36, - "learning_rate": 9.120018278213098e-05, - "loss": 2.0371, - "step": 94500 - }, - { - "epoch": 1.37, - "learning_rate": 9.114825604037825e-05, - "loss": 2.0354, - "step": 95000 - }, - { - "epoch": 1.37, - "learning_rate": 9.10963292986255e-05, - "loss": 2.0355, - "step": 95500 - }, - { - "epoch": 1.38, - "learning_rate": 9.104440255687277e-05, - "loss": 2.0366, - "step": 96000 - }, - { - "epoch": 1.39, - "learning_rate": 9.099257966860354e-05, - "loss": 2.0339, - "step": 96500 - }, - { - "epoch": 1.4, - "learning_rate": 9.09406529268508e-05, - "loss": 2.0352, - "step": 97000 - }, - { - "epoch": 1.4, - "learning_rate": 9.088872618509807e-05, - "loss": 2.0305, - "step": 97500 - }, - { - "epoch": 1.41, - "learning_rate": 9.083679944334533e-05, - "loss": 2.0276, - "step": 98000 - }, - { - "epoch": 1.42, - "learning_rate": 9.07849765550761e-05, - "loss": 2.0291, - "step": 98500 - }, - { - "epoch": 1.43, - "learning_rate": 9.073304981332336e-05, - "loss": 2.0318, - "step": 99000 - }, - { - "epoch": 1.43, - "learning_rate": 9.068112307157063e-05, - "loss": 2.022, - "step": 99500 - }, - { - "epoch": 1.44, - "learning_rate": 9.06291963298179e-05, - "loss": 2.0272, - "step": 100000 - }, - { - "epoch": 1.45, - "learning_rate": 9.057737344154866e-05, - "loss": 2.0284, - "step": 100500 - }, - { - "epoch": 1.45, - "learning_rate": 9.052544669979593e-05, - "loss": 2.0258, - "step": 101000 - }, - { - "epoch": 1.46, - "learning_rate": 9.04735199580432e-05, - "loss": 2.0218, - "step": 101500 - }, - { - "epoch": 1.47, - "learning_rate": 9.042159321629046e-05, - "loss": 2.0221, - "step": 102000 - }, - { - "epoch": 1.48, - "learning_rate": 9.036977032802124e-05, - "loss": 2.0181, - "step": 102500 - }, - { - "epoch": 1.48, - "learning_rate": 9.031784358626849e-05, - "loss": 2.0207, - "step": 103000 - }, - { - "epoch": 1.49, - "learning_rate": 9.026591684451576e-05, - "loss": 2.022, - "step": 103500 - }, - { - "epoch": 1.5, - "learning_rate": 9.021399010276303e-05, - "loss": 2.0179, - "step": 104000 - }, - { - "epoch": 1.5, - "learning_rate": 9.016216721449379e-05, - "loss": 2.0189, - "step": 104500 - }, - { - "epoch": 1.51, - "learning_rate": 9.011024047274106e-05, - "loss": 2.0171, - "step": 105000 - }, - { - "epoch": 1.52, - "learning_rate": 9.005831373098833e-05, - "loss": 2.0176, - "step": 105500 - }, - { - "epoch": 1.53, - "learning_rate": 9.00063869892356e-05, - "loss": 2.0137, - "step": 106000 - }, - { - "epoch": 1.53, - "learning_rate": 8.995456410096636e-05, - "loss": 2.0115, - "step": 106500 - }, - { - "epoch": 1.54, - "learning_rate": 8.990263735921363e-05, - "loss": 2.0139, - "step": 107000 - }, - { - "epoch": 1.55, - "learning_rate": 8.985071061746089e-05, - "loss": 2.011, - "step": 107500 - }, - { - "epoch": 1.55, - "learning_rate": 8.979878387570816e-05, - "loss": 2.0085, - "step": 108000 - }, - { - "epoch": 1.56, - "learning_rate": 8.974696098743892e-05, - "loss": 2.0117, - "step": 108500 - }, - { - "epoch": 1.57, - "learning_rate": 8.969503424568619e-05, - "loss": 2.0095, - "step": 109000 - }, - { - "epoch": 1.58, - "learning_rate": 8.964310750393346e-05, - "loss": 2.0053, - "step": 109500 - }, - { - "epoch": 1.58, - "learning_rate": 8.959118076218071e-05, - "loss": 2.006, - "step": 110000 - }, - { - "epoch": 1.59, - "learning_rate": 8.953935787391149e-05, - "loss": 2.0108, - "step": 110500 - }, - { - "epoch": 1.6, - "learning_rate": 8.948743113215875e-05, - "loss": 2.0034, - "step": 111000 - }, - { - "epoch": 1.6, - "learning_rate": 8.943550439040602e-05, - "loss": 2.0072, - "step": 111500 - }, - { - "epoch": 1.61, - "learning_rate": 8.938357764865328e-05, - "loss": 2.005, - "step": 112000 - }, - { - "epoch": 1.62, - "learning_rate": 8.933175476038405e-05, - "loss": 2.0017, - "step": 112500 - }, - { - "epoch": 1.63, - "learning_rate": 8.927982801863132e-05, - "loss": 1.9997, - "step": 113000 - }, - { - "epoch": 1.63, - "learning_rate": 8.922790127687859e-05, - "loss": 1.9991, - "step": 113500 - }, - { - "epoch": 1.64, - "learning_rate": 8.917597453512585e-05, - "loss": 2.0034, - "step": 114000 - }, - { - "epoch": 1.65, - "learning_rate": 8.912415164685662e-05, - "loss": 1.9974, - "step": 114500 - }, - { - "epoch": 1.66, - "learning_rate": 8.907222490510389e-05, - "loss": 1.9995, - "step": 115000 - }, - { - "epoch": 1.66, - "learning_rate": 8.902029816335114e-05, - "loss": 1.9954, - "step": 115500 - }, - { - "epoch": 1.67, - "learning_rate": 8.896837142159841e-05, - "loss": 1.9951, - "step": 116000 - }, - { - "epoch": 1.68, - "learning_rate": 8.891654853332918e-05, - "loss": 1.9955, - "step": 116500 - }, - { - "epoch": 1.68, - "learning_rate": 8.886462179157645e-05, - "loss": 1.9954, - "step": 117000 - }, - { - "epoch": 1.69, - "learning_rate": 8.881269504982372e-05, - "loss": 1.9907, - "step": 117500 - }, - { - "epoch": 1.7, - "learning_rate": 8.876076830807097e-05, - "loss": 1.9932, - "step": 118000 - }, - { - "epoch": 1.71, - "learning_rate": 8.870894541980175e-05, - "loss": 1.9925, - "step": 118500 - }, - { - "epoch": 1.71, - "learning_rate": 8.8657018678049e-05, - "loss": 1.9931, - "step": 119000 - }, - { - "epoch": 1.72, - "learning_rate": 8.860509193629629e-05, - "loss": 1.9931, - "step": 119500 - }, - { - "epoch": 1.73, - "learning_rate": 8.855316519454354e-05, - "loss": 1.9863, - "step": 120000 - }, - { - "epoch": 1.73, - "learning_rate": 8.850134230627432e-05, - "loss": 1.9922, - "step": 120500 - }, - { - "epoch": 1.74, - "learning_rate": 8.844941556452158e-05, - "loss": 1.985, - "step": 121000 - }, - { - "epoch": 1.75, - "learning_rate": 8.839748882276883e-05, - "loss": 1.9867, - "step": 121500 - }, - { - "epoch": 1.76, - "learning_rate": 8.834556208101611e-05, - "loss": 1.988, - "step": 122000 - }, - { - "epoch": 1.76, - "learning_rate": 8.829373919274686e-05, - "loss": 1.9858, - "step": 122500 - }, - { - "epoch": 1.77, - "learning_rate": 8.824181245099415e-05, - "loss": 1.9871, - "step": 123000 - }, - { - "epoch": 1.78, - "learning_rate": 8.81898857092414e-05, - "loss": 1.9836, - "step": 123500 - }, - { - "epoch": 1.78, - "learning_rate": 8.813795896748867e-05, - "loss": 1.9807, - "step": 124000 - }, - { - "epoch": 1.79, - "learning_rate": 8.808613607921944e-05, - "loss": 1.9758, - "step": 124500 - }, - { - "epoch": 1.8, - "learning_rate": 8.80342093374667e-05, - "loss": 1.9839, - "step": 125000 - }, - { - "epoch": 1.81, - "learning_rate": 8.798228259571397e-05, - "loss": 1.9819, - "step": 125500 - }, - { - "epoch": 1.81, - "learning_rate": 8.793035585396123e-05, - "loss": 1.9805, - "step": 126000 - }, - { - "epoch": 1.82, - "learning_rate": 8.787853296569201e-05, - "loss": 1.9809, - "step": 126500 - }, - { - "epoch": 1.83, - "learning_rate": 8.782660622393926e-05, - "loss": 1.9795, - "step": 127000 - }, - { - "epoch": 1.84, - "learning_rate": 8.777467948218654e-05, - "loss": 1.9802, - "step": 127500 - }, - { - "epoch": 1.84, - "learning_rate": 8.77227527404338e-05, - "loss": 1.9773, - "step": 128000 - }, - { - "epoch": 1.85, - "learning_rate": 8.767082599868107e-05, - "loss": 1.9793, - "step": 128500 - }, - { - "epoch": 1.86, - "learning_rate": 8.761900311041183e-05, - "loss": 1.9731, - "step": 129000 - }, - { - "epoch": 1.86, - "learning_rate": 8.75670763686591e-05, - "loss": 1.973, - "step": 129500 - }, - { - "epoch": 1.87, - "learning_rate": 8.751514962690637e-05, - "loss": 1.9748, - "step": 130000 - }, - { - "epoch": 1.88, - "learning_rate": 8.746322288515362e-05, - "loss": 1.9715, - "step": 130500 - }, - { - "epoch": 1.89, - "learning_rate": 8.74113999968844e-05, - "loss": 1.9731, - "step": 131000 - }, - { - "epoch": 1.89, - "learning_rate": 8.735947325513166e-05, - "loss": 1.9728, - "step": 131500 - }, - { - "epoch": 1.9, - "learning_rate": 8.730754651337893e-05, - "loss": 1.9709, - "step": 132000 - }, - { - "epoch": 1.91, - "learning_rate": 8.72556197716262e-05, - "loss": 1.9718, - "step": 132500 - }, - { - "epoch": 1.91, - "learning_rate": 8.720379688335698e-05, - "loss": 1.9669, - "step": 133000 - }, - { - "epoch": 1.92, - "learning_rate": 8.715187014160423e-05, - "loss": 1.9707, - "step": 133500 - }, - { - "epoch": 1.93, - "learning_rate": 8.709994339985148e-05, - "loss": 1.9744, - "step": 134000 - }, - { - "epoch": 1.94, - "learning_rate": 8.704801665809877e-05, - "loss": 1.9699, - "step": 134500 - }, - { - "epoch": 1.94, - "learning_rate": 8.699619376982952e-05, - "loss": 1.9719, - "step": 135000 - }, - { - "epoch": 1.95, - "learning_rate": 8.69442670280768e-05, - "loss": 1.9733, - "step": 135500 - }, - { - "epoch": 1.96, - "learning_rate": 8.689234028632406e-05, - "loss": 1.9681, - "step": 136000 - }, - { - "epoch": 1.96, - "learning_rate": 8.684041354457132e-05, - "loss": 1.9698, - "step": 136500 - }, - { - "epoch": 1.97, - "learning_rate": 8.678859065630209e-05, - "loss": 1.9708, - "step": 137000 - }, - { - "epoch": 1.98, - "learning_rate": 8.673666391454936e-05, - "loss": 1.9622, - "step": 137500 - }, - { - "epoch": 1.99, - "learning_rate": 8.668473717279663e-05, - "loss": 1.9627, - "step": 138000 - }, - { - "epoch": 1.99, - "learning_rate": 8.663281043104388e-05, - "loss": 1.9667, - "step": 138500 - }, - { - "epoch": 2.0, - "eval_accuracy": 0.6277449496523109, - "eval_loss": 1.833423137664795, - "eval_runtime": 1270.2848, - "eval_samples_per_second": 424.265, - "eval_steps_per_second": 26.517, - "step": 138946 - }, - { - "epoch": 2.0, - "learning_rate": 8.658098754277466e-05, - "loss": 1.9637, - "step": 139000 - }, - { - "epoch": 2.01, - "learning_rate": 8.652906080102192e-05, - "loss": 1.9594, - "step": 139500 - }, - { - "epoch": 2.02, - "learning_rate": 8.647713405926918e-05, - "loss": 1.9558, - "step": 140000 - }, - { - "epoch": 2.02, - "learning_rate": 8.642520731751645e-05, - "loss": 1.959, - "step": 140500 - }, - { - "epoch": 2.03, - "learning_rate": 8.637338442924722e-05, - "loss": 1.9601, - "step": 141000 - }, - { - "epoch": 2.04, - "learning_rate": 8.632145768749449e-05, - "loss": 1.9579, - "step": 141500 - }, - { - "epoch": 2.04, - "learning_rate": 8.626953094574176e-05, - "loss": 1.9583, - "step": 142000 - }, - { - "epoch": 2.05, - "learning_rate": 8.621760420398902e-05, - "loss": 1.9565, - "step": 142500 - }, - { - "epoch": 2.06, - "learning_rate": 8.616578131571979e-05, - "loss": 1.9591, - "step": 143000 - }, - { - "epoch": 2.07, - "learning_rate": 8.611385457396706e-05, - "loss": 1.9529, - "step": 143500 - }, - { - "epoch": 2.07, - "learning_rate": 8.606192783221431e-05, - "loss": 1.9517, - "step": 144000 - }, - { - "epoch": 2.08, - "learning_rate": 8.601000109046158e-05, - "loss": 1.9522, - "step": 144500 - }, - { - "epoch": 2.09, - "learning_rate": 8.595817820219235e-05, - "loss": 1.9543, - "step": 145000 - }, - { - "epoch": 2.09, - "learning_rate": 8.590625146043962e-05, - "loss": 1.9546, - "step": 145500 - }, - { - "epoch": 2.1, - "learning_rate": 8.585432471868688e-05, - "loss": 1.9529, - "step": 146000 - }, - { - "epoch": 2.11, - "learning_rate": 8.580239797693414e-05, - "loss": 1.951, - "step": 146500 - }, - { - "epoch": 2.12, - "learning_rate": 8.575057508866492e-05, - "loss": 1.9542, - "step": 147000 - }, - { - "epoch": 2.12, - "learning_rate": 8.569864834691219e-05, - "loss": 1.9513, - "step": 147500 - }, - { - "epoch": 2.13, - "learning_rate": 8.564672160515944e-05, - "loss": 1.9507, - "step": 148000 - }, - { - "epoch": 2.14, - "learning_rate": 8.559479486340671e-05, - "loss": 1.948, - "step": 148500 - }, - { - "epoch": 2.14, - "learning_rate": 8.554297197513748e-05, - "loss": 1.9506, - "step": 149000 - }, - { - "epoch": 2.15, - "learning_rate": 8.549104523338474e-05, - "loss": 1.9474, - "step": 149500 - }, - { - "epoch": 2.16, - "learning_rate": 8.543911849163201e-05, - "loss": 1.9465, - "step": 150000 - }, - { - "epoch": 2.17, - "learning_rate": 8.538719174987927e-05, - "loss": 1.9501, - "step": 150500 - }, - { - "epoch": 2.17, - "learning_rate": 8.533536886161005e-05, - "loss": 1.9468, - "step": 151000 - }, - { - "epoch": 2.18, - "learning_rate": 8.52834421198573e-05, - "loss": 1.9448, - "step": 151500 - }, - { - "epoch": 2.19, - "learning_rate": 8.523151537810457e-05, - "loss": 1.947, - "step": 152000 - }, - { - "epoch": 2.2, - "learning_rate": 8.517958863635184e-05, - "loss": 1.9451, - "step": 152500 - }, - { - "epoch": 2.2, - "learning_rate": 8.51277657480826e-05, - "loss": 1.9453, - "step": 153000 - }, - { - "epoch": 2.21, - "learning_rate": 8.507583900632987e-05, - "loss": 1.9474, - "step": 153500 - }, - { - "epoch": 2.22, - "learning_rate": 8.502391226457714e-05, - "loss": 1.9443, - "step": 154000 - }, - { - "epoch": 2.22, - "learning_rate": 8.497198552282441e-05, - "loss": 1.9418, - "step": 154500 - }, - { - "epoch": 2.23, - "learning_rate": 8.492016263455518e-05, - "loss": 1.9412, - "step": 155000 - }, - { - "epoch": 2.24, - "learning_rate": 8.486823589280244e-05, - "loss": 1.9425, - "step": 155500 - }, - { - "epoch": 2.25, - "learning_rate": 8.48163091510497e-05, - "loss": 1.9403, - "step": 156000 - }, - { - "epoch": 2.25, - "learning_rate": 8.476438240929697e-05, - "loss": 1.9469, - "step": 156500 - }, - { - "epoch": 2.26, - "learning_rate": 8.471255952102773e-05, - "loss": 1.9344, - "step": 157000 - }, - { - "epoch": 2.27, - "learning_rate": 8.4660632779275e-05, - "loss": 1.9409, - "step": 157500 - }, - { - "epoch": 2.27, - "learning_rate": 8.460870603752227e-05, - "loss": 1.9353, - "step": 158000 - }, - { - "epoch": 2.28, - "learning_rate": 8.455677929576953e-05, - "loss": 1.94, - "step": 158500 - }, - { - "epoch": 2.29, - "learning_rate": 8.45049564075003e-05, - "loss": 1.9376, - "step": 159000 - }, - { - "epoch": 2.3, - "learning_rate": 8.445302966574756e-05, - "loss": 1.9371, - "step": 159500 - }, - { - "epoch": 2.3, - "learning_rate": 8.440110292399484e-05, - "loss": 1.938, - "step": 160000 - }, - { - "epoch": 2.31, - "learning_rate": 8.43491761822421e-05, - "loss": 1.9371, - "step": 160500 - }, - { - "epoch": 2.32, - "learning_rate": 8.429735329397288e-05, - "loss": 1.9377, - "step": 161000 - }, - { - "epoch": 2.32, - "learning_rate": 8.424542655222013e-05, - "loss": 1.9352, - "step": 161500 - }, - { - "epoch": 2.33, - "learning_rate": 8.419349981046739e-05, - "loss": 1.9325, - "step": 162000 - }, - { - "epoch": 2.34, - "learning_rate": 8.414157306871467e-05, - "loss": 1.9353, - "step": 162500 - }, - { - "epoch": 2.35, - "learning_rate": 8.408975018044543e-05, - "loss": 1.9306, - "step": 163000 - }, - { - "epoch": 2.35, - "learning_rate": 8.40378234386927e-05, - "loss": 1.9349, - "step": 163500 - }, - { - "epoch": 2.36, - "learning_rate": 8.398589669693996e-05, - "loss": 1.9311, - "step": 164000 - }, - { - "epoch": 2.37, - "learning_rate": 8.393396995518722e-05, - "loss": 1.931, - "step": 164500 - }, - { - "epoch": 2.38, - "learning_rate": 8.388214706691799e-05, - "loss": 1.9308, - "step": 165000 - }, - { - "epoch": 2.38, - "learning_rate": 8.383022032516526e-05, - "loss": 1.931, - "step": 165500 - }, - { - "epoch": 2.39, - "learning_rate": 8.377829358341253e-05, - "loss": 1.9317, - "step": 166000 - }, - { - "epoch": 2.4, - "learning_rate": 8.372636684165978e-05, - "loss": 1.931, - "step": 166500 - }, - { - "epoch": 2.4, - "learning_rate": 8.367454395339056e-05, - "loss": 1.9337, - "step": 167000 - }, - { - "epoch": 2.41, - "learning_rate": 8.362261721163782e-05, - "loss": 1.9331, - "step": 167500 - }, - { - "epoch": 2.42, - "learning_rate": 8.35706904698851e-05, - "loss": 1.9228, - "step": 168000 - }, - { - "epoch": 2.43, - "learning_rate": 8.351876372813235e-05, - "loss": 1.9276, - "step": 168500 - }, - { - "epoch": 2.43, - "learning_rate": 8.346694083986313e-05, - "loss": 1.9263, - "step": 169000 - }, - { - "epoch": 2.44, - "learning_rate": 8.341501409811039e-05, - "loss": 1.928, - "step": 169500 - }, - { - "epoch": 2.45, - "learning_rate": 8.336308735635766e-05, - "loss": 1.9274, - "step": 170000 - }, - { - "epoch": 2.45, - "learning_rate": 8.331116061460492e-05, - "loss": 1.9253, - "step": 170500 - }, - { - "epoch": 2.46, - "learning_rate": 8.325933772633569e-05, - "loss": 1.9253, - "step": 171000 - }, - { - "epoch": 2.47, - "learning_rate": 8.320741098458296e-05, - "loss": 1.9242, - "step": 171500 - }, - { - "epoch": 2.48, - "learning_rate": 8.315548424283021e-05, - "loss": 1.922, - "step": 172000 - }, - { - "epoch": 2.48, - "learning_rate": 8.31035575010775e-05, - "loss": 1.9241, - "step": 172500 - }, - { - "epoch": 2.49, - "learning_rate": 8.305173461280825e-05, - "loss": 1.9222, - "step": 173000 - }, - { - "epoch": 2.5, - "learning_rate": 8.299980787105553e-05, - "loss": 1.9222, - "step": 173500 - }, - { - "epoch": 2.5, - "learning_rate": 8.294788112930279e-05, - "loss": 1.921, - "step": 174000 - }, - { - "epoch": 2.51, - "learning_rate": 8.289595438755004e-05, - "loss": 1.925, - "step": 174500 - }, - { - "epoch": 2.52, - "learning_rate": 8.284413149928082e-05, - "loss": 1.9215, - "step": 175000 - }, - { - "epoch": 2.53, - "learning_rate": 8.279220475752807e-05, - "loss": 1.9219, - "step": 175500 - }, - { - "epoch": 2.53, - "learning_rate": 8.274027801577536e-05, - "loss": 1.9272, - "step": 176000 - }, - { - "epoch": 2.54, - "learning_rate": 8.268835127402261e-05, - "loss": 1.9202, - "step": 176500 - }, - { - "epoch": 2.55, - "learning_rate": 8.263642453226988e-05, - "loss": 1.9235, - "step": 177000 - }, - { - "epoch": 2.55, - "learning_rate": 8.258460164400065e-05, - "loss": 1.92, - "step": 177500 - }, - { - "epoch": 2.56, - "learning_rate": 8.253267490224791e-05, - "loss": 1.915, - "step": 178000 - }, - { - "epoch": 2.57, - "learning_rate": 8.248074816049518e-05, - "loss": 1.9147, - "step": 178500 - }, - { - "epoch": 2.58, - "learning_rate": 8.242882141874244e-05, - "loss": 1.9213, - "step": 179000 - }, - { - "epoch": 2.58, - "learning_rate": 8.237699853047322e-05, - "loss": 1.9187, - "step": 179500 - }, - { - "epoch": 2.59, - "learning_rate": 8.232507178872047e-05, - "loss": 1.9176, - "step": 180000 - }, - { - "epoch": 2.6, - "learning_rate": 8.227314504696774e-05, - "loss": 1.9149, - "step": 180500 - }, - { - "epoch": 2.61, - "learning_rate": 8.222121830521501e-05, - "loss": 1.9225, - "step": 181000 - }, - { - "epoch": 2.61, - "learning_rate": 8.216939541694577e-05, - "loss": 1.9187, - "step": 181500 - }, - { - "epoch": 2.62, - "learning_rate": 8.211746867519304e-05, - "loss": 1.9136, - "step": 182000 - }, - { - "epoch": 2.63, - "learning_rate": 8.206554193344031e-05, - "loss": 1.9119, - "step": 182500 - }, - { - "epoch": 2.63, - "learning_rate": 8.201361519168758e-05, - "loss": 1.9129, - "step": 183000 - }, - { - "epoch": 2.64, - "learning_rate": 8.196179230341835e-05, - "loss": 1.9192, - "step": 183500 - }, - { - "epoch": 2.65, - "learning_rate": 8.190986556166561e-05, - "loss": 1.9172, - "step": 184000 - }, - { - "epoch": 2.66, - "learning_rate": 8.185793881991287e-05, - "loss": 1.9185, - "step": 184500 - }, - { - "epoch": 2.66, - "learning_rate": 8.180601207816014e-05, - "loss": 1.9126, - "step": 185000 - }, - { - "epoch": 2.67, - "learning_rate": 8.17541891898909e-05, - "loss": 1.9156, - "step": 185500 - }, - { - "epoch": 2.68, - "learning_rate": 8.170226244813817e-05, - "loss": 1.9131, - "step": 186000 - }, - { - "epoch": 2.68, - "learning_rate": 8.165033570638544e-05, - "loss": 1.9099, - "step": 186500 - }, - { - "epoch": 2.69, - "learning_rate": 8.15984089646327e-05, - "loss": 1.9111, - "step": 187000 - }, - { - "epoch": 2.7, - "learning_rate": 8.154658607636347e-05, - "loss": 1.9124, - "step": 187500 - }, - { - "epoch": 2.71, - "learning_rate": 8.149465933461073e-05, - "loss": 1.9158, - "step": 188000 - }, - { - "epoch": 2.71, - "learning_rate": 8.1442732592858e-05, - "loss": 1.9092, - "step": 188500 - }, - { - "epoch": 2.72, - "learning_rate": 8.139080585110527e-05, - "loss": 1.9116, - "step": 189000 - }, - { - "epoch": 2.73, - "learning_rate": 8.133898296283603e-05, - "loss": 1.9074, - "step": 189500 - }, - { - "epoch": 2.73, - "learning_rate": 8.12870562210833e-05, - "loss": 1.9119, - "step": 190000 - }, - { - "epoch": 2.74, - "learning_rate": 8.123512947933057e-05, - "loss": 1.9099, - "step": 190500 - }, - { - "epoch": 2.75, - "learning_rate": 8.118320273757782e-05, - "loss": 1.907, - "step": 191000 - }, - { - "epoch": 2.76, - "learning_rate": 8.11313798493086e-05, - "loss": 1.9111, - "step": 191500 - }, - { - "epoch": 2.76, - "learning_rate": 8.107945310755587e-05, - "loss": 1.9092, - "step": 192000 - }, - { - "epoch": 2.77, - "learning_rate": 8.102752636580313e-05, - "loss": 1.9073, - "step": 192500 - }, - { - "epoch": 2.78, - "learning_rate": 8.09755996240504e-05, - "loss": 1.9052, - "step": 193000 - }, - { - "epoch": 2.79, - "learning_rate": 8.092377673578116e-05, - "loss": 1.9036, - "step": 193500 - }, - { - "epoch": 2.79, - "learning_rate": 8.087184999402843e-05, - "loss": 1.907, - "step": 194000 - }, - { - "epoch": 2.8, - "learning_rate": 8.08199232522757e-05, - "loss": 1.9082, - "step": 194500 - }, - { - "epoch": 2.81, - "learning_rate": 8.076799651052297e-05, - "loss": 1.9103, - "step": 195000 - }, - { - "epoch": 2.81, - "learning_rate": 8.071617362225373e-05, - "loss": 1.9028, - "step": 195500 - }, - { - "epoch": 2.82, - "learning_rate": 8.0664246880501e-05, - "loss": 1.9093, - "step": 196000 - }, - { - "epoch": 2.83, - "learning_rate": 8.061232013874825e-05, - "loss": 1.9014, - "step": 196500 - }, - { - "epoch": 2.84, - "learning_rate": 8.056039339699552e-05, - "loss": 1.9039, - "step": 197000 - }, - { - "epoch": 2.84, - "learning_rate": 8.050857050872629e-05, - "loss": 1.9032, - "step": 197500 - }, - { - "epoch": 2.85, - "learning_rate": 8.045664376697356e-05, - "loss": 1.9024, - "step": 198000 - }, - { - "epoch": 2.86, - "learning_rate": 8.040471702522083e-05, - "loss": 1.901, - "step": 198500 - }, - { - "epoch": 2.86, - "learning_rate": 8.035279028346808e-05, - "loss": 1.9027, - "step": 199000 - }, - { - "epoch": 2.87, - "learning_rate": 8.030096739519886e-05, - "loss": 1.9047, - "step": 199500 - }, - { - "epoch": 2.88, - "learning_rate": 8.024904065344611e-05, - "loss": 1.9004, - "step": 200000 - }, - { - "epoch": 2.89, - "learning_rate": 8.019711391169338e-05, - "loss": 1.9018, - "step": 200500 - }, - { - "epoch": 2.89, - "learning_rate": 8.014518716994065e-05, - "loss": 1.9016, - "step": 201000 - }, - { - "epoch": 2.9, - "learning_rate": 8.009336428167142e-05, - "loss": 1.8981, - "step": 201500 - }, - { - "epoch": 2.91, - "learning_rate": 8.004143753991869e-05, - "loss": 1.8975, - "step": 202000 - }, - { - "epoch": 2.91, - "learning_rate": 7.998951079816595e-05, - "loss": 1.8993, - "step": 202500 - }, - { - "epoch": 2.92, - "learning_rate": 7.993758405641322e-05, - "loss": 1.8978, - "step": 203000 - }, - { - "epoch": 2.93, - "learning_rate": 7.988576116814399e-05, - "loss": 1.8996, - "step": 203500 - }, - { - "epoch": 2.94, - "learning_rate": 7.983383442639126e-05, - "loss": 1.898, - "step": 204000 - }, - { - "epoch": 2.94, - "learning_rate": 7.978190768463851e-05, - "loss": 1.8992, - "step": 204500 - }, - { - "epoch": 2.95, - "learning_rate": 7.972998094288578e-05, - "loss": 1.8987, - "step": 205000 - }, - { - "epoch": 2.96, - "learning_rate": 7.967815805461655e-05, - "loss": 1.8968, - "step": 205500 - }, - { - "epoch": 2.97, - "learning_rate": 7.962623131286381e-05, - "loss": 1.8949, - "step": 206000 - }, - { - "epoch": 2.97, - "learning_rate": 7.957430457111108e-05, - "loss": 1.8961, - "step": 206500 - }, - { - "epoch": 2.98, - "learning_rate": 7.952237782935834e-05, - "loss": 1.8963, - "step": 207000 - }, - { - "epoch": 2.99, - "learning_rate": 7.947055494108912e-05, - "loss": 1.8949, - "step": 207500 - }, - { - "epoch": 2.99, - "learning_rate": 7.941862819933637e-05, - "loss": 1.8918, - "step": 208000 - }, - { - "epoch": 3.0, - "eval_accuracy": 0.6376255333966128, - "eval_loss": 1.765644907951355, - "eval_runtime": 1267.2057, - "eval_samples_per_second": 425.296, - "eval_steps_per_second": 26.581, - "step": 208419 - }, - { - "epoch": 3.0, - "learning_rate": 7.936670145758365e-05, - "loss": 1.896, - "step": 208500 - }, - { - "epoch": 3.01, - "learning_rate": 7.931477471583091e-05, - "loss": 1.889, - "step": 209000 - }, - { - "epoch": 3.02, - "learning_rate": 7.926295182756169e-05, - "loss": 1.888, - "step": 209500 - }, - { - "epoch": 3.02, - "learning_rate": 7.921102508580894e-05, - "loss": 1.8899, - "step": 210000 - }, - { - "epoch": 3.03, - "learning_rate": 7.91590983440562e-05, - "loss": 1.8929, - "step": 210500 - }, - { - "epoch": 3.04, - "learning_rate": 7.910717160230348e-05, - "loss": 1.8867, - "step": 211000 - }, - { - "epoch": 3.04, - "learning_rate": 7.905524486055073e-05, - "loss": 1.8923, - "step": 211500 - }, - { - "epoch": 3.05, - "learning_rate": 7.900342197228151e-05, - "loss": 1.8881, - "step": 212000 - }, - { - "epoch": 3.06, - "learning_rate": 7.895149523052877e-05, - "loss": 1.8883, - "step": 212500 - }, - { - "epoch": 3.07, - "learning_rate": 7.889956848877604e-05, - "loss": 1.8874, - "step": 213000 - }, - { - "epoch": 3.07, - "learning_rate": 7.88476417470233e-05, - "loss": 1.8859, - "step": 213500 - }, - { - "epoch": 3.08, - "learning_rate": 7.879581885875407e-05, - "loss": 1.8889, - "step": 214000 - }, - { - "epoch": 3.09, - "learning_rate": 7.874389211700134e-05, - "loss": 1.8864, - "step": 214500 - }, - { - "epoch": 3.09, - "learning_rate": 7.86919653752486e-05, - "loss": 1.8842, - "step": 215000 - }, - { - "epoch": 3.1, - "learning_rate": 7.864003863349588e-05, - "loss": 1.8901, - "step": 215500 - }, - { - "epoch": 3.11, - "learning_rate": 7.858821574522663e-05, - "loss": 1.888, - "step": 216000 - }, - { - "epoch": 3.12, - "learning_rate": 7.853628900347391e-05, - "loss": 1.8866, - "step": 216500 - }, - { - "epoch": 3.12, - "learning_rate": 7.848436226172117e-05, - "loss": 1.8815, - "step": 217000 - }, - { - "epoch": 3.13, - "learning_rate": 7.843243551996843e-05, - "loss": 1.887, - "step": 217500 - }, - { - "epoch": 3.14, - "learning_rate": 7.83806126316992e-05, - "loss": 1.8878, - "step": 218000 - }, - { - "epoch": 3.15, - "learning_rate": 7.832868588994647e-05, - "loss": 1.8863, - "step": 218500 - }, - { - "epoch": 3.15, - "learning_rate": 7.827675914819374e-05, - "loss": 1.8873, - "step": 219000 - }, - { - "epoch": 3.16, - "learning_rate": 7.822483240644099e-05, - "loss": 1.8812, - "step": 219500 - }, - { - "epoch": 3.17, - "learning_rate": 7.817300951817177e-05, - "loss": 1.8862, - "step": 220000 - }, - { - "epoch": 3.17, - "learning_rate": 7.812108277641903e-05, - "loss": 1.8852, - "step": 220500 - }, - { - "epoch": 3.18, - "learning_rate": 7.806915603466631e-05, - "loss": 1.886, - "step": 221000 - }, - { - "epoch": 3.19, - "learning_rate": 7.801722929291356e-05, - "loss": 1.8846, - "step": 221500 - }, - { - "epoch": 3.2, - "learning_rate": 7.796540640464434e-05, - "loss": 1.8843, - "step": 222000 - }, - { - "epoch": 3.2, - "learning_rate": 7.79134796628916e-05, - "loss": 1.8819, - "step": 222500 - }, - { - "epoch": 3.21, - "learning_rate": 7.786155292113885e-05, - "loss": 1.8774, - "step": 223000 - }, - { - "epoch": 3.22, - "learning_rate": 7.780962617938613e-05, - "loss": 1.8817, - "step": 223500 - }, - { - "epoch": 3.22, - "learning_rate": 7.775780329111689e-05, - "loss": 1.8838, - "step": 224000 - }, - { - "epoch": 3.23, - "learning_rate": 7.770587654936417e-05, - "loss": 1.8812, - "step": 224500 - }, - { - "epoch": 3.24, - "learning_rate": 7.765394980761142e-05, - "loss": 1.8824, - "step": 225000 - }, - { - "epoch": 3.25, - "learning_rate": 7.760202306585869e-05, - "loss": 1.8842, - "step": 225500 - }, - { - "epoch": 3.25, - "learning_rate": 7.755020017758946e-05, - "loss": 1.8833, - "step": 226000 - }, - { - "epoch": 3.26, - "learning_rate": 7.749827343583673e-05, - "loss": 1.8815, - "step": 226500 - }, - { - "epoch": 3.27, - "learning_rate": 7.7446346694084e-05, - "loss": 1.879, - "step": 227000 - }, - { - "epoch": 3.27, - "learning_rate": 7.739441995233125e-05, - "loss": 1.8779, - "step": 227500 - }, - { - "epoch": 3.28, - "learning_rate": 7.734259706406203e-05, - "loss": 1.8777, - "step": 228000 - }, - { - "epoch": 3.29, - "learning_rate": 7.729067032230928e-05, - "loss": 1.8768, - "step": 228500 - }, - { - "epoch": 3.3, - "learning_rate": 7.723874358055655e-05, - "loss": 1.8804, - "step": 229000 - }, - { - "epoch": 3.3, - "learning_rate": 7.718681683880382e-05, - "loss": 1.8789, - "step": 229500 - }, - { - "epoch": 3.31, - "learning_rate": 7.713499395053459e-05, - "loss": 1.8769, - "step": 230000 - }, - { - "epoch": 3.32, - "learning_rate": 7.708306720878185e-05, - "loss": 1.882, - "step": 230500 - }, - { - "epoch": 3.33, - "learning_rate": 7.703114046702912e-05, - "loss": 1.878, - "step": 231000 - }, - { - "epoch": 3.33, - "learning_rate": 7.697921372527639e-05, - "loss": 1.8774, - "step": 231500 - }, - { - "epoch": 3.34, - "learning_rate": 7.692739083700716e-05, - "loss": 1.8789, - "step": 232000 - }, - { - "epoch": 3.35, - "learning_rate": 7.687546409525443e-05, - "loss": 1.877, - "step": 232500 - }, - { - "epoch": 3.35, - "learning_rate": 7.682353735350168e-05, - "loss": 1.8732, - "step": 233000 - }, - { - "epoch": 3.36, - "learning_rate": 7.677161061174895e-05, - "loss": 1.8777, - "step": 233500 - }, - { - "epoch": 3.37, - "learning_rate": 7.671978772347972e-05, - "loss": 1.8753, - "step": 234000 - }, - { - "epoch": 3.38, - "learning_rate": 7.666786098172698e-05, - "loss": 1.8741, - "step": 234500 - }, - { - "epoch": 3.38, - "learning_rate": 7.661593423997425e-05, - "loss": 1.8763, - "step": 235000 - }, - { - "epoch": 3.39, - "learning_rate": 7.65640074982215e-05, - "loss": 1.8773, - "step": 235500 - }, - { - "epoch": 3.4, - "learning_rate": 7.651218460995229e-05, - "loss": 1.8731, - "step": 236000 - }, - { - "epoch": 3.4, - "learning_rate": 7.646025786819954e-05, - "loss": 1.8696, - "step": 236500 - }, - { - "epoch": 3.41, - "learning_rate": 7.640833112644681e-05, - "loss": 1.8737, - "step": 237000 - }, - { - "epoch": 3.42, - "learning_rate": 7.635640438469408e-05, - "loss": 1.8765, - "step": 237500 - }, - { - "epoch": 3.43, - "learning_rate": 7.630458149642484e-05, - "loss": 1.8751, - "step": 238000 - }, - { - "epoch": 3.43, - "learning_rate": 7.625265475467211e-05, - "loss": 1.8759, - "step": 238500 - }, - { - "epoch": 3.44, - "learning_rate": 7.620072801291938e-05, - "loss": 1.8716, - "step": 239000 - }, - { - "epoch": 3.45, - "learning_rate": 7.614880127116664e-05, - "loss": 1.8744, - "step": 239500 - }, - { - "epoch": 3.45, - "learning_rate": 7.609697838289742e-05, - "loss": 1.8746, - "step": 240000 - }, - { - "epoch": 3.46, - "learning_rate": 7.604505164114468e-05, - "loss": 1.8747, - "step": 240500 - }, - { - "epoch": 3.47, - "learning_rate": 7.599312489939194e-05, - "loss": 1.8738, - "step": 241000 - }, - { - "epoch": 3.48, - "learning_rate": 7.59411981576392e-05, - "loss": 1.8706, - "step": 241500 - }, - { - "epoch": 3.48, - "learning_rate": 7.588937526936997e-05, - "loss": 1.8679, - "step": 242000 - }, - { - "epoch": 3.49, - "learning_rate": 7.583744852761724e-05, - "loss": 1.8713, - "step": 242500 - }, - { - "epoch": 3.5, - "learning_rate": 7.578552178586451e-05, - "loss": 1.8706, - "step": 243000 - }, - { - "epoch": 3.5, - "learning_rate": 7.573359504411178e-05, - "loss": 1.8719, - "step": 243500 - }, - { - "epoch": 3.51, - "learning_rate": 7.568177215584254e-05, - "loss": 1.8685, - "step": 244000 - }, - { - "epoch": 3.52, - "learning_rate": 7.562984541408981e-05, - "loss": 1.8701, - "step": 244500 - }, - { - "epoch": 3.53, - "learning_rate": 7.557791867233707e-05, - "loss": 1.8715, - "step": 245000 - }, - { - "epoch": 3.53, - "learning_rate": 7.552599193058434e-05, - "loss": 1.8671, - "step": 245500 - }, - { - "epoch": 3.54, - "learning_rate": 7.54741690423151e-05, - "loss": 1.871, - "step": 246000 - }, - { - "epoch": 3.55, - "learning_rate": 7.542224230056237e-05, - "loss": 1.8719, - "step": 246500 - }, - { - "epoch": 3.56, - "learning_rate": 7.537031555880964e-05, - "loss": 1.8658, - "step": 247000 - }, - { - "epoch": 3.56, - "learning_rate": 7.531838881705689e-05, - "loss": 1.8702, - "step": 247500 - }, - { - "epoch": 3.57, - "learning_rate": 7.526656592878767e-05, - "loss": 1.8717, - "step": 248000 - }, - { - "epoch": 3.58, - "learning_rate": 7.521463918703493e-05, - "loss": 1.8633, - "step": 248500 - }, - { - "epoch": 3.58, - "learning_rate": 7.51627124452822e-05, - "loss": 1.8689, - "step": 249000 - }, - { - "epoch": 3.59, - "learning_rate": 7.511078570352946e-05, - "loss": 1.8647, - "step": 249500 - }, - { - "epoch": 3.6, - "learning_rate": 7.505896281526023e-05, - "loss": 1.868, - "step": 250000 - }, - { - "epoch": 3.61, - "learning_rate": 7.50070360735075e-05, - "loss": 1.8682, - "step": 250500 - }, - { - "epoch": 3.61, - "learning_rate": 7.495510933175477e-05, - "loss": 1.8668, - "step": 251000 - }, - { - "epoch": 3.62, - "learning_rate": 7.490318259000203e-05, - "loss": 1.8652, - "step": 251500 - }, - { - "epoch": 3.63, - "learning_rate": 7.48513597017328e-05, - "loss": 1.8655, - "step": 252000 - }, - { - "epoch": 3.63, - "learning_rate": 7.479943295998007e-05, - "loss": 1.8658, - "step": 252500 - }, - { - "epoch": 3.64, - "learning_rate": 7.474750621822732e-05, - "loss": 1.8606, - "step": 253000 - }, - { - "epoch": 3.65, - "learning_rate": 7.469557947647459e-05, - "loss": 1.8669, - "step": 253500 - }, - { - "epoch": 3.66, - "learning_rate": 7.464375658820536e-05, - "loss": 1.8661, - "step": 254000 - }, - { - "epoch": 3.66, - "learning_rate": 7.459182984645263e-05, - "loss": 1.8636, - "step": 254500 - }, - { - "epoch": 3.67, - "learning_rate": 7.45399031046999e-05, - "loss": 1.8663, - "step": 255000 - }, - { - "epoch": 3.68, - "learning_rate": 7.448797636294715e-05, - "loss": 1.8656, - "step": 255500 - }, - { - "epoch": 3.68, - "learning_rate": 7.443615347467793e-05, - "loss": 1.8647, - "step": 256000 - }, - { - "epoch": 3.69, - "learning_rate": 7.438422673292518e-05, - "loss": 1.8668, - "step": 256500 - }, - { - "epoch": 3.7, - "learning_rate": 7.433229999117247e-05, - "loss": 1.8636, - "step": 257000 - }, - { - "epoch": 3.71, - "learning_rate": 7.428037324941972e-05, - "loss": 1.8654, - "step": 257500 - }, - { - "epoch": 3.71, - "learning_rate": 7.42285503611505e-05, - "loss": 1.8595, - "step": 258000 - }, - { - "epoch": 3.72, - "learning_rate": 7.417662361939776e-05, - "loss": 1.866, - "step": 258500 - }, - { - "epoch": 3.73, - "learning_rate": 7.412469687764501e-05, - "loss": 1.8632, - "step": 259000 - }, - { - "epoch": 3.74, - "learning_rate": 7.407277013589229e-05, - "loss": 1.8598, - "step": 259500 - }, - { - "epoch": 3.74, - "learning_rate": 7.402094724762304e-05, - "loss": 1.8656, - "step": 260000 - }, - { - "epoch": 3.75, - "learning_rate": 7.396902050587033e-05, - "loss": 1.8598, - "step": 260500 - }, - { - "epoch": 3.76, - "learning_rate": 7.391709376411758e-05, - "loss": 1.862, - "step": 261000 - }, - { - "epoch": 3.76, - "learning_rate": 7.386516702236485e-05, - "loss": 1.8642, - "step": 261500 - }, - { - "epoch": 3.77, - "learning_rate": 7.381334413409562e-05, - "loss": 1.8629, - "step": 262000 - }, - { - "epoch": 3.78, - "learning_rate": 7.376141739234288e-05, - "loss": 1.8634, - "step": 262500 - }, - { - "epoch": 3.79, - "learning_rate": 7.370949065059015e-05, - "loss": 1.8613, - "step": 263000 - }, - { - "epoch": 3.79, - "learning_rate": 7.365756390883741e-05, - "loss": 1.8609, - "step": 263500 - }, - { - "epoch": 3.8, - "learning_rate": 7.360574102056819e-05, - "loss": 1.8618, - "step": 264000 - }, - { - "epoch": 3.81, - "learning_rate": 7.355381427881544e-05, - "loss": 1.8611, - "step": 264500 - }, - { - "epoch": 3.81, - "learning_rate": 7.350188753706272e-05, - "loss": 1.8628, - "step": 265000 - }, - { - "epoch": 3.82, - "learning_rate": 7.344996079530998e-05, - "loss": 1.8608, - "step": 265500 - }, - { - "epoch": 3.83, - "learning_rate": 7.339813790704076e-05, - "loss": 1.8583, - "step": 266000 - }, - { - "epoch": 3.84, - "learning_rate": 7.334621116528801e-05, - "loss": 1.8558, - "step": 266500 - }, - { - "epoch": 3.84, - "learning_rate": 7.329428442353528e-05, - "loss": 1.8582, - "step": 267000 - }, - { - "epoch": 3.85, - "learning_rate": 7.324235768178255e-05, - "loss": 1.8574, - "step": 267500 - }, - { - "epoch": 3.86, - "learning_rate": 7.319053479351332e-05, - "loss": 1.8574, - "step": 268000 - }, - { - "epoch": 3.86, - "learning_rate": 7.313860805176058e-05, - "loss": 1.8562, - "step": 268500 - }, - { - "epoch": 3.87, - "learning_rate": 7.308668131000784e-05, - "loss": 1.8591, - "step": 269000 - }, - { - "epoch": 3.88, - "learning_rate": 7.303475456825511e-05, - "loss": 1.8568, - "step": 269500 - }, - { - "epoch": 3.89, - "learning_rate": 7.298293167998587e-05, - "loss": 1.856, - "step": 270000 - }, - { - "epoch": 3.89, - "learning_rate": 7.293100493823316e-05, - "loss": 1.8577, - "step": 270500 - }, - { - "epoch": 3.9, - "learning_rate": 7.287907819648041e-05, - "loss": 1.8551, - "step": 271000 - }, - { - "epoch": 3.91, - "learning_rate": 7.282715145472766e-05, - "loss": 1.8555, - "step": 271500 - }, - { - "epoch": 3.92, - "learning_rate": 7.277522471297495e-05, - "loss": 1.8565, - "step": 272000 - }, - { - "epoch": 3.92, - "learning_rate": 7.27234018247057e-05, - "loss": 1.8574, - "step": 272500 - }, - { - "epoch": 3.93, - "learning_rate": 7.267147508295298e-05, - "loss": 1.8586, - "step": 273000 - }, - { - "epoch": 3.94, - "learning_rate": 7.261954834120024e-05, - "loss": 1.8537, - "step": 273500 - }, - { - "epoch": 3.94, - "learning_rate": 7.25676215994475e-05, - "loss": 1.8567, - "step": 274000 - }, - { - "epoch": 3.95, - "learning_rate": 7.251579871117827e-05, - "loss": 1.8585, - "step": 274500 - }, - { - "epoch": 3.96, - "learning_rate": 7.246387196942554e-05, - "loss": 1.854, - "step": 275000 - }, - { - "epoch": 3.97, - "learning_rate": 7.241194522767281e-05, - "loss": 1.8553, - "step": 275500 - }, - { - "epoch": 3.97, - "learning_rate": 7.236001848592006e-05, - "loss": 1.8532, - "step": 276000 - }, - { - "epoch": 3.98, - "learning_rate": 7.230819559765084e-05, - "loss": 1.8559, - "step": 276500 - }, - { - "epoch": 3.99, - "learning_rate": 7.22562688558981e-05, - "loss": 1.8567, - "step": 277000 - }, - { - "epoch": 3.99, - "learning_rate": 7.220434211414536e-05, - "loss": 1.8518, - "step": 277500 - }, - { - "epoch": 4.0, - "eval_accuracy": 0.6444414608959261, - "eval_loss": 1.721920132637024, - "eval_runtime": 1267.8764, - "eval_samples_per_second": 425.071, - "eval_steps_per_second": 26.567, - "step": 277892 - }, - { - "epoch": 4.0, - "learning_rate": 7.215241537239263e-05, - "loss": 1.8506, - "step": 278000 - }, - { - "epoch": 4.01, - "learning_rate": 7.21005924841234e-05, - "loss": 1.8528, - "step": 278500 - }, - { - "epoch": 4.02, - "learning_rate": 7.204866574237067e-05, - "loss": 1.8486, - "step": 279000 - }, - { - "epoch": 4.02, - "learning_rate": 7.199673900061794e-05, - "loss": 1.8504, - "step": 279500 - }, - { - "epoch": 4.03, - "learning_rate": 7.19448122588652e-05, - "loss": 1.8496, - "step": 280000 - }, - { - "epoch": 4.04, - "learning_rate": 7.189298937059597e-05, - "loss": 1.8481, - "step": 280500 - }, - { - "epoch": 4.04, - "learning_rate": 7.184106262884324e-05, - "loss": 1.8505, - "step": 281000 - }, - { - "epoch": 4.05, - "learning_rate": 7.178913588709049e-05, - "loss": 1.843, - "step": 281500 - }, - { - "epoch": 4.06, - "learning_rate": 7.173720914533776e-05, - "loss": 1.852, - "step": 282000 - }, - { - "epoch": 4.07, - "learning_rate": 7.168538625706853e-05, - "loss": 1.847, - "step": 282500 - }, - { - "epoch": 4.07, - "learning_rate": 7.16334595153158e-05, - "loss": 1.8492, - "step": 283000 - }, - { - "epoch": 4.08, - "learning_rate": 7.158153277356306e-05, - "loss": 1.8476, - "step": 283500 - }, - { - "epoch": 4.09, - "learning_rate": 7.152960603181032e-05, - "loss": 1.8524, - "step": 284000 - }, - { - "epoch": 4.1, - "learning_rate": 7.14777831435411e-05, - "loss": 1.8478, - "step": 284500 - }, - { - "epoch": 4.1, - "learning_rate": 7.142585640178835e-05, - "loss": 1.8498, - "step": 285000 - }, - { - "epoch": 4.11, - "learning_rate": 7.137392966003562e-05, - "loss": 1.8488, - "step": 285500 - }, - { - "epoch": 4.12, - "learning_rate": 7.132200291828289e-05, - "loss": 1.8522, - "step": 286000 - }, - { - "epoch": 4.12, - "learning_rate": 7.127018003001366e-05, - "loss": 1.8446, - "step": 286500 - }, - { - "epoch": 4.13, - "learning_rate": 7.121825328826092e-05, - "loss": 1.8471, - "step": 287000 - }, - { - "epoch": 4.14, - "learning_rate": 7.116632654650819e-05, - "loss": 1.8453, - "step": 287500 - }, - { - "epoch": 4.15, - "learning_rate": 7.111439980475545e-05, - "loss": 1.8443, - "step": 288000 - }, - { - "epoch": 4.15, - "learning_rate": 7.106257691648623e-05, - "loss": 1.8477, - "step": 288500 - }, - { - "epoch": 4.16, - "learning_rate": 7.101065017473348e-05, - "loss": 1.8482, - "step": 289000 - }, - { - "epoch": 4.17, - "learning_rate": 7.095872343298075e-05, - "loss": 1.8482, - "step": 289500 - }, - { - "epoch": 4.17, - "learning_rate": 7.090679669122802e-05, - "loss": 1.8437, - "step": 290000 - }, - { - "epoch": 4.18, - "learning_rate": 7.085497380295879e-05, - "loss": 1.8444, - "step": 290500 - }, - { - "epoch": 4.19, - "learning_rate": 7.080304706120605e-05, - "loss": 1.8431, - "step": 291000 - }, - { - "epoch": 4.2, - "learning_rate": 7.075112031945332e-05, - "loss": 1.8497, - "step": 291500 - }, - { - "epoch": 4.2, - "learning_rate": 7.069919357770059e-05, - "loss": 1.8444, - "step": 292000 - }, - { - "epoch": 4.21, - "learning_rate": 7.064737068943136e-05, - "loss": 1.8459, - "step": 292500 - }, - { - "epoch": 4.22, - "learning_rate": 7.059544394767862e-05, - "loss": 1.8435, - "step": 293000 - }, - { - "epoch": 4.22, - "learning_rate": 7.054351720592588e-05, - "loss": 1.8439, - "step": 293500 - }, - { - "epoch": 4.23, - "learning_rate": 7.049159046417315e-05, - "loss": 1.8423, - "step": 294000 - }, - { - "epoch": 4.24, - "learning_rate": 7.043976757590391e-05, - "loss": 1.8427, - "step": 294500 - }, - { - "epoch": 4.25, - "learning_rate": 7.038784083415118e-05, - "loss": 1.844, - "step": 295000 - }, - { - "epoch": 4.25, - "learning_rate": 7.033591409239845e-05, - "loss": 1.842, - "step": 295500 - }, - { - "epoch": 4.26, - "learning_rate": 7.02839873506457e-05, - "loss": 1.8401, - "step": 296000 - }, - { - "epoch": 4.27, - "learning_rate": 7.023216446237648e-05, - "loss": 1.8422, - "step": 296500 - }, - { - "epoch": 4.28, - "learning_rate": 7.018023772062374e-05, - "loss": 1.8425, - "step": 297000 - }, - { - "epoch": 4.28, - "learning_rate": 7.012831097887101e-05, - "loss": 1.8423, - "step": 297500 - }, - { - "epoch": 4.29, - "learning_rate": 7.007638423711828e-05, - "loss": 1.8378, - "step": 298000 - }, - { - "epoch": 4.3, - "learning_rate": 7.002456134884906e-05, - "loss": 1.8424, - "step": 298500 - }, - { - "epoch": 4.3, - "learning_rate": 6.997263460709631e-05, - "loss": 1.8459, - "step": 299000 - }, - { - "epoch": 4.31, - "learning_rate": 6.992070786534358e-05, - "loss": 1.8393, - "step": 299500 - }, - { - "epoch": 4.32, - "learning_rate": 6.986878112359085e-05, - "loss": 1.8382, - "step": 300000 - }, - { - "epoch": 4.33, - "learning_rate": 6.981695823532161e-05, - "loss": 1.8398, - "step": 300500 - }, - { - "epoch": 4.33, - "learning_rate": 6.976503149356888e-05, - "loss": 1.8415, - "step": 301000 - }, - { - "epoch": 4.34, - "learning_rate": 6.971310475181614e-05, - "loss": 1.836, - "step": 301500 - }, - { - "epoch": 4.35, - "learning_rate": 6.96611780100634e-05, - "loss": 1.8427, - "step": 302000 - }, - { - "epoch": 4.35, - "learning_rate": 6.960935512179417e-05, - "loss": 1.8376, - "step": 302500 - }, - { - "epoch": 4.36, - "learning_rate": 6.955742838004144e-05, - "loss": 1.8433, - "step": 303000 - }, - { - "epoch": 4.37, - "learning_rate": 6.950550163828871e-05, - "loss": 1.8437, - "step": 303500 - }, - { - "epoch": 4.38, - "learning_rate": 6.945357489653596e-05, - "loss": 1.842, - "step": 304000 - }, - { - "epoch": 4.38, - "learning_rate": 6.940175200826674e-05, - "loss": 1.8364, - "step": 304500 - }, - { - "epoch": 4.39, - "learning_rate": 6.9349825266514e-05, - "loss": 1.8412, - "step": 305000 - }, - { - "epoch": 4.4, - "learning_rate": 6.929789852476128e-05, - "loss": 1.8378, - "step": 305500 - }, - { - "epoch": 4.4, - "learning_rate": 6.924597178300853e-05, - "loss": 1.8409, - "step": 306000 - }, - { - "epoch": 4.41, - "learning_rate": 6.919414889473931e-05, - "loss": 1.8416, - "step": 306500 - }, - { - "epoch": 4.42, - "learning_rate": 6.914222215298657e-05, - "loss": 1.8389, - "step": 307000 - }, - { - "epoch": 4.43, - "learning_rate": 6.909029541123384e-05, - "loss": 1.8346, - "step": 307500 - }, - { - "epoch": 4.43, - "learning_rate": 6.90383686694811e-05, - "loss": 1.8392, - "step": 308000 - }, - { - "epoch": 4.44, - "learning_rate": 6.898654578121187e-05, - "loss": 1.8339, - "step": 308500 - }, - { - "epoch": 4.45, - "learning_rate": 6.893461903945914e-05, - "loss": 1.8365, - "step": 309000 - }, - { - "epoch": 4.45, - "learning_rate": 6.88826922977064e-05, - "loss": 1.8375, - "step": 309500 - }, - { - "epoch": 4.46, - "learning_rate": 6.883076555595368e-05, - "loss": 1.8386, - "step": 310000 - }, - { - "epoch": 4.47, - "learning_rate": 6.877894266768443e-05, - "loss": 1.8402, - "step": 310500 - }, - { - "epoch": 4.48, - "learning_rate": 6.872701592593171e-05, - "loss": 1.8372, - "step": 311000 - }, - { - "epoch": 4.48, - "learning_rate": 6.867508918417897e-05, - "loss": 1.8329, - "step": 311500 - }, - { - "epoch": 4.49, - "learning_rate": 6.862316244242622e-05, - "loss": 1.836, - "step": 312000 - }, - { - "epoch": 4.5, - "learning_rate": 6.8571339554157e-05, - "loss": 1.8352, - "step": 312500 - }, - { - "epoch": 4.51, - "learning_rate": 6.851941281240425e-05, - "loss": 1.8343, - "step": 313000 - }, - { - "epoch": 4.51, - "learning_rate": 6.846748607065154e-05, - "loss": 1.84, - "step": 313500 - }, - { - "epoch": 4.52, - "learning_rate": 6.841555932889879e-05, - "loss": 1.8387, - "step": 314000 - }, - { - "epoch": 4.53, - "learning_rate": 6.836373644062957e-05, - "loss": 1.8325, - "step": 314500 - }, - { - "epoch": 4.53, - "learning_rate": 6.831180969887683e-05, - "loss": 1.8345, - "step": 315000 - }, - { - "epoch": 4.54, - "learning_rate": 6.82598829571241e-05, - "loss": 1.835, - "step": 315500 - }, - { - "epoch": 4.55, - "learning_rate": 6.820795621537136e-05, - "loss": 1.8368, - "step": 316000 - }, - { - "epoch": 4.56, - "learning_rate": 6.815613332710213e-05, - "loss": 1.8377, - "step": 316500 - }, - { - "epoch": 4.56, - "learning_rate": 6.81042065853494e-05, - "loss": 1.8386, - "step": 317000 - }, - { - "epoch": 4.57, - "learning_rate": 6.805227984359665e-05, - "loss": 1.8343, - "step": 317500 - }, - { - "epoch": 4.58, - "learning_rate": 6.800035310184392e-05, - "loss": 1.8332, - "step": 318000 - }, - { - "epoch": 4.58, - "learning_rate": 6.794853021357469e-05, - "loss": 1.8349, - "step": 318500 - }, - { - "epoch": 4.59, - "learning_rate": 6.789660347182197e-05, - "loss": 1.8317, - "step": 319000 - }, - { - "epoch": 4.6, - "learning_rate": 6.784467673006922e-05, - "loss": 1.8323, - "step": 319500 - }, - { - "epoch": 4.61, - "learning_rate": 6.779274998831649e-05, - "loss": 1.8338, - "step": 320000 - }, - { - "epoch": 4.61, - "learning_rate": 6.774092710004726e-05, - "loss": 1.8322, - "step": 320500 - }, - { - "epoch": 4.62, - "learning_rate": 6.768900035829453e-05, - "loss": 1.8344, - "step": 321000 - }, - { - "epoch": 4.63, - "learning_rate": 6.76370736165418e-05, - "loss": 1.8345, - "step": 321500 - }, - { - "epoch": 4.63, - "learning_rate": 6.758514687478905e-05, - "loss": 1.8343, - "step": 322000 - }, - { - "epoch": 4.64, - "learning_rate": 6.753332398651983e-05, - "loss": 1.8333, - "step": 322500 - }, - { - "epoch": 4.65, - "learning_rate": 6.748139724476708e-05, - "loss": 1.8374, - "step": 323000 - }, - { - "epoch": 4.66, - "learning_rate": 6.742947050301435e-05, - "loss": 1.8339, - "step": 323500 - }, - { - "epoch": 4.66, - "learning_rate": 6.737754376126162e-05, - "loss": 1.8346, - "step": 324000 - }, - { - "epoch": 4.67, - "learning_rate": 6.732572087299239e-05, - "loss": 1.8318, - "step": 324500 - }, - { - "epoch": 4.68, - "learning_rate": 6.727379413123965e-05, - "loss": 1.8313, - "step": 325000 - }, - { - "epoch": 4.69, - "learning_rate": 6.722186738948691e-05, - "loss": 1.8287, - "step": 325500 - }, - { - "epoch": 4.69, - "learning_rate": 6.716994064773418e-05, - "loss": 1.8282, - "step": 326000 - }, - { - "epoch": 4.7, - "learning_rate": 6.711811775946494e-05, - "loss": 1.8341, - "step": 326500 - }, - { - "epoch": 4.71, - "learning_rate": 6.706619101771221e-05, - "loss": 1.8319, - "step": 327000 - }, - { - "epoch": 4.71, - "learning_rate": 6.701426427595948e-05, - "loss": 1.8299, - "step": 327500 - }, - { - "epoch": 4.72, - "learning_rate": 6.696233753420675e-05, - "loss": 1.8299, - "step": 328000 - }, - { - "epoch": 4.73, - "learning_rate": 6.691051464593751e-05, - "loss": 1.8296, - "step": 328500 - }, - { - "epoch": 4.74, - "learning_rate": 6.685858790418478e-05, - "loss": 1.8306, - "step": 329000 - }, - { - "epoch": 4.74, - "learning_rate": 6.680666116243205e-05, - "loss": 1.8353, - "step": 329500 - }, - { - "epoch": 4.75, - "learning_rate": 6.67547344206793e-05, - "loss": 1.8262, - "step": 330000 - }, - { - "epoch": 4.76, - "learning_rate": 6.670291153241009e-05, - "loss": 1.8285, - "step": 330500 - }, - { - "epoch": 4.76, - "learning_rate": 6.665098479065734e-05, - "loss": 1.8275, - "step": 331000 - }, - { - "epoch": 4.77, - "learning_rate": 6.659905804890461e-05, - "loss": 1.8286, - "step": 331500 - }, - { - "epoch": 4.78, - "learning_rate": 6.654713130715188e-05, - "loss": 1.8289, - "step": 332000 - }, - { - "epoch": 4.79, - "learning_rate": 6.649530841888264e-05, - "loss": 1.8296, - "step": 332500 - }, - { - "epoch": 4.79, - "learning_rate": 6.644338167712991e-05, - "loss": 1.8276, - "step": 333000 - }, - { - "epoch": 4.8, - "learning_rate": 6.639145493537718e-05, - "loss": 1.8309, - "step": 333500 - }, - { - "epoch": 4.81, - "learning_rate": 6.633952819362443e-05, - "loss": 1.8291, - "step": 334000 - }, - { - "epoch": 4.81, - "learning_rate": 6.628770530535521e-05, - "loss": 1.8289, - "step": 334500 - }, - { - "epoch": 4.82, - "learning_rate": 6.623577856360247e-05, - "loss": 1.8281, - "step": 335000 - }, - { - "epoch": 4.83, - "learning_rate": 6.618385182184974e-05, - "loss": 1.8289, - "step": 335500 - }, - { - "epoch": 4.84, - "learning_rate": 6.6131925080097e-05, - "loss": 1.8285, - "step": 336000 - }, - { - "epoch": 4.84, - "learning_rate": 6.608010219182777e-05, - "loss": 1.8256, - "step": 336500 - }, - { - "epoch": 4.85, - "learning_rate": 6.602817545007504e-05, - "loss": 1.8247, - "step": 337000 - }, - { - "epoch": 4.86, - "learning_rate": 6.59762487083223e-05, - "loss": 1.8225, - "step": 337500 - }, - { - "epoch": 4.87, - "learning_rate": 6.592432196656956e-05, - "loss": 1.8277, - "step": 338000 - }, - { - "epoch": 4.87, - "learning_rate": 6.587249907830033e-05, - "loss": 1.8311, - "step": 338500 - }, - { - "epoch": 4.88, - "learning_rate": 6.58205723365476e-05, - "loss": 1.827, - "step": 339000 - }, - { - "epoch": 4.89, - "learning_rate": 6.576864559479487e-05, - "loss": 1.8207, - "step": 339500 - }, - { - "epoch": 4.89, - "learning_rate": 6.571671885304213e-05, - "loss": 1.8263, - "step": 340000 - }, - { - "epoch": 4.9, - "learning_rate": 6.56648959647729e-05, - "loss": 1.8259, - "step": 340500 - }, - { - "epoch": 4.91, - "learning_rate": 6.561296922302017e-05, - "loss": 1.8265, - "step": 341000 - }, - { - "epoch": 4.92, - "learning_rate": 6.556104248126744e-05, - "loss": 1.8268, - "step": 341500 - }, - { - "epoch": 4.92, - "learning_rate": 6.550911573951469e-05, - "loss": 1.8251, - "step": 342000 - }, - { - "epoch": 4.93, - "learning_rate": 6.545729285124547e-05, - "loss": 1.8209, - "step": 342500 - }, - { - "epoch": 4.94, - "learning_rate": 6.540536610949273e-05, - "loss": 1.8248, - "step": 343000 - }, - { - "epoch": 4.94, - "learning_rate": 6.535343936774e-05, - "loss": 1.8261, - "step": 343500 - }, - { - "epoch": 4.95, - "learning_rate": 6.530151262598726e-05, - "loss": 1.8247, - "step": 344000 - }, - { - "epoch": 4.96, - "learning_rate": 6.524968973771803e-05, - "loss": 1.8293, - "step": 344500 - }, - { - "epoch": 4.97, - "learning_rate": 6.51977629959653e-05, - "loss": 1.8268, - "step": 345000 - }, - { - "epoch": 4.97, - "learning_rate": 6.514583625421255e-05, - "loss": 1.8235, - "step": 345500 - }, - { - "epoch": 4.98, - "learning_rate": 6.509390951245983e-05, - "loss": 1.8214, - "step": 346000 - }, - { - "epoch": 4.99, - "learning_rate": 6.504208662419059e-05, - "loss": 1.8261, - "step": 346500 - }, - { - "epoch": 4.99, - "learning_rate": 6.499015988243787e-05, - "loss": 1.8202, - "step": 347000 - }, - { - "epoch": 5.0, - "eval_accuracy": 0.6490041136483795, - "eval_loss": 1.6904325485229492, - "eval_runtime": 1266.0603, - "eval_samples_per_second": 425.68, - "eval_steps_per_second": 26.605, - "step": 347365 - }, - { - "epoch": 5.0, - "learning_rate": 6.493823314068512e-05, - "loss": 1.8242, - "step": 347500 - }, - { - "epoch": 5.01, - "learning_rate": 6.488630639893238e-05, - "loss": 1.817, - "step": 348000 - }, - { - "epoch": 5.02, - "learning_rate": 6.483448351066316e-05, - "loss": 1.8215, - "step": 348500 - }, - { - "epoch": 5.02, - "learning_rate": 6.478255676891043e-05, - "loss": 1.8152, - "step": 349000 - }, - { - "epoch": 5.03, - "learning_rate": 6.47306300271577e-05, - "loss": 1.8193, - "step": 349500 - }, - { - "epoch": 5.04, - "learning_rate": 6.467870328540495e-05, - "loss": 1.8207, - "step": 350000 - }, - { - "epoch": 5.05, - "learning_rate": 6.462688039713573e-05, - "loss": 1.8199, - "step": 350500 - }, - { - "epoch": 5.05, - "learning_rate": 6.457495365538298e-05, - "loss": 1.8181, - "step": 351000 - }, - { - "epoch": 5.06, - "learning_rate": 6.452302691363025e-05, - "loss": 1.8214, - "step": 351500 - }, - { - "epoch": 5.07, - "learning_rate": 6.447110017187752e-05, - "loss": 1.8212, - "step": 352000 - }, - { - "epoch": 5.07, - "learning_rate": 6.441927728360829e-05, - "loss": 1.8196, - "step": 352500 - }, - { - "epoch": 5.08, - "learning_rate": 6.436735054185555e-05, - "loss": 1.8239, - "step": 353000 - }, - { - "epoch": 5.09, - "learning_rate": 6.431542380010281e-05, - "loss": 1.82, - "step": 353500 - }, - { - "epoch": 5.1, - "learning_rate": 6.426349705835009e-05, - "loss": 1.82, - "step": 354000 - }, - { - "epoch": 5.1, - "learning_rate": 6.421167417008084e-05, - "loss": 1.8192, - "step": 354500 - }, - { - "epoch": 5.11, - "learning_rate": 6.415974742832813e-05, - "loss": 1.8149, - "step": 355000 - }, - { - "epoch": 5.12, - "learning_rate": 6.410782068657538e-05, - "loss": 1.8183, - "step": 355500 - }, - { - "epoch": 5.12, - "learning_rate": 6.405589394482265e-05, - "loss": 1.8165, - "step": 356000 - }, - { - "epoch": 5.13, - "learning_rate": 6.400407105655342e-05, - "loss": 1.816, - "step": 356500 - }, - { - "epoch": 5.14, - "learning_rate": 6.395214431480068e-05, - "loss": 1.8177, - "step": 357000 - }, - { - "epoch": 5.15, - "learning_rate": 6.390021757304795e-05, - "loss": 1.817, - "step": 357500 - }, - { - "epoch": 5.15, - "learning_rate": 6.38482908312952e-05, - "loss": 1.8175, - "step": 358000 - }, - { - "epoch": 5.16, - "learning_rate": 6.379646794302599e-05, - "loss": 1.8133, - "step": 358500 - }, - { - "epoch": 5.17, - "learning_rate": 6.374454120127324e-05, - "loss": 1.8163, - "step": 359000 - }, - { - "epoch": 5.17, - "learning_rate": 6.369261445952052e-05, - "loss": 1.8156, - "step": 359500 - }, - { - "epoch": 5.18, - "learning_rate": 6.364068771776778e-05, - "loss": 1.8158, - "step": 360000 - }, - { - "epoch": 5.19, - "learning_rate": 6.358886482949856e-05, - "loss": 1.8141, - "step": 360500 - }, - { - "epoch": 5.2, - "learning_rate": 6.353693808774581e-05, - "loss": 1.8159, - "step": 361000 - }, - { - "epoch": 5.2, - "learning_rate": 6.348501134599307e-05, - "loss": 1.82, - "step": 361500 - }, - { - "epoch": 5.21, - "learning_rate": 6.343308460424035e-05, - "loss": 1.8148, - "step": 362000 - }, - { - "epoch": 5.22, - "learning_rate": 6.33812617159711e-05, - "loss": 1.8171, - "step": 362500 - }, - { - "epoch": 5.23, - "learning_rate": 6.332933497421838e-05, - "loss": 1.8167, - "step": 363000 - }, - { - "epoch": 5.23, - "learning_rate": 6.327740823246564e-05, - "loss": 1.8164, - "step": 363500 - }, - { - "epoch": 5.24, - "learning_rate": 6.32254814907129e-05, - "loss": 1.8144, - "step": 364000 - }, - { - "epoch": 5.25, - "learning_rate": 6.317365860244367e-05, - "loss": 1.8186, - "step": 364500 - }, - { - "epoch": 5.25, - "learning_rate": 6.312173186069094e-05, - "loss": 1.8184, - "step": 365000 - }, - { - "epoch": 5.26, - "learning_rate": 6.306980511893821e-05, - "loss": 1.8155, - "step": 365500 - }, - { - "epoch": 5.27, - "learning_rate": 6.301787837718546e-05, - "loss": 1.8184, - "step": 366000 - }, - { - "epoch": 5.28, - "learning_rate": 6.296605548891624e-05, - "loss": 1.8176, - "step": 366500 - }, - { - "epoch": 5.28, - "learning_rate": 6.29141287471635e-05, - "loss": 1.8143, - "step": 367000 - }, - { - "epoch": 5.29, - "learning_rate": 6.286220200541077e-05, - "loss": 1.8174, - "step": 367500 - }, - { - "epoch": 5.3, - "learning_rate": 6.281027526365803e-05, - "loss": 1.8124, - "step": 368000 - }, - { - "epoch": 5.3, - "learning_rate": 6.275845237538881e-05, - "loss": 1.8168, - "step": 368500 - }, - { - "epoch": 5.31, - "learning_rate": 6.270652563363607e-05, - "loss": 1.8124, - "step": 369000 - }, - { - "epoch": 5.32, - "learning_rate": 6.265459889188334e-05, - "loss": 1.8129, - "step": 369500 - }, - { - "epoch": 5.33, - "learning_rate": 6.26026721501306e-05, - "loss": 1.815, - "step": 370000 - }, - { - "epoch": 5.33, - "learning_rate": 6.255084926186137e-05, - "loss": 1.8112, - "step": 370500 - }, - { - "epoch": 5.34, - "learning_rate": 6.249892252010864e-05, - "loss": 1.8129, - "step": 371000 - }, - { - "epoch": 5.35, - "learning_rate": 6.24469957783559e-05, - "loss": 1.8123, - "step": 371500 - }, - { - "epoch": 5.35, - "learning_rate": 6.239506903660316e-05, - "loss": 1.8152, - "step": 372000 - }, - { - "epoch": 5.36, - "learning_rate": 6.234324614833393e-05, - "loss": 1.8154, - "step": 372500 - }, - { - "epoch": 5.37, - "learning_rate": 6.22913194065812e-05, - "loss": 1.8118, - "step": 373000 - }, - { - "epoch": 5.38, - "learning_rate": 6.223939266482847e-05, - "loss": 1.8102, - "step": 373500 - }, - { - "epoch": 5.38, - "learning_rate": 6.218746592307572e-05, - "loss": 1.8123, - "step": 374000 - }, - { - "epoch": 5.39, - "learning_rate": 6.21356430348065e-05, - "loss": 1.8139, - "step": 374500 - }, - { - "epoch": 5.4, - "learning_rate": 6.208371629305376e-05, - "loss": 1.8072, - "step": 375000 - }, - { - "epoch": 5.4, - "learning_rate": 6.203178955130102e-05, - "loss": 1.8103, - "step": 375500 - }, - { - "epoch": 5.41, - "learning_rate": 6.197986280954829e-05, - "loss": 1.8124, - "step": 376000 - }, - { - "epoch": 5.42, - "learning_rate": 6.192803992127906e-05, - "loss": 1.8093, - "step": 376500 - }, - { - "epoch": 5.43, - "learning_rate": 6.187611317952633e-05, - "loss": 1.8109, - "step": 377000 - }, - { - "epoch": 5.43, - "learning_rate": 6.18241864377736e-05, - "loss": 1.8066, - "step": 377500 - }, - { - "epoch": 5.44, - "learning_rate": 6.177225969602086e-05, - "loss": 1.8095, - "step": 378000 - }, - { - "epoch": 5.45, - "learning_rate": 6.172043680775163e-05, - "loss": 1.8108, - "step": 378500 - }, - { - "epoch": 5.46, - "learning_rate": 6.16685100659989e-05, - "loss": 1.8083, - "step": 379000 - }, - { - "epoch": 5.46, - "learning_rate": 6.161658332424615e-05, - "loss": 1.8126, - "step": 379500 - }, - { - "epoch": 5.47, - "learning_rate": 6.156465658249342e-05, - "loss": 1.8116, - "step": 380000 - }, - { - "epoch": 5.48, - "learning_rate": 6.151283369422419e-05, - "loss": 1.8093, - "step": 380500 - }, - { - "epoch": 5.48, - "learning_rate": 6.146090695247146e-05, - "loss": 1.8125, - "step": 381000 - }, - { - "epoch": 5.49, - "learning_rate": 6.140898021071872e-05, - "loss": 1.8105, - "step": 381500 - }, - { - "epoch": 5.5, - "learning_rate": 6.135705346896599e-05, - "loss": 1.8114, - "step": 382000 - }, - { - "epoch": 5.51, - "learning_rate": 6.130523058069676e-05, - "loss": 1.8111, - "step": 382500 - }, - { - "epoch": 5.51, - "learning_rate": 6.125330383894403e-05, - "loss": 1.8092, - "step": 383000 - }, - { - "epoch": 5.52, - "learning_rate": 6.120137709719128e-05, - "loss": 1.8081, - "step": 383500 - }, - { - "epoch": 5.53, - "learning_rate": 6.114945035543855e-05, - "loss": 1.8051, - "step": 384000 - }, - { - "epoch": 5.53, - "learning_rate": 6.109762746716932e-05, - "loss": 1.8085, - "step": 384500 - }, - { - "epoch": 5.54, - "learning_rate": 6.104570072541658e-05, - "loss": 1.8095, - "step": 385000 - }, - { - "epoch": 5.55, - "learning_rate": 6.0993773983663846e-05, - "loss": 1.8033, - "step": 385500 - }, - { - "epoch": 5.56, - "learning_rate": 6.094184724191111e-05, - "loss": 1.8116, - "step": 386000 - }, - { - "epoch": 5.56, - "learning_rate": 6.089002435364188e-05, - "loss": 1.8123, - "step": 386500 - }, - { - "epoch": 5.57, - "learning_rate": 6.083809761188914e-05, - "loss": 1.806, - "step": 387000 - }, - { - "epoch": 5.58, - "learning_rate": 6.078617087013642e-05, - "loss": 1.8117, - "step": 387500 - }, - { - "epoch": 5.58, - "learning_rate": 6.073424412838368e-05, - "loss": 1.8058, - "step": 388000 - }, - { - "epoch": 5.59, - "learning_rate": 6.068242124011445e-05, - "loss": 1.8067, - "step": 388500 - }, - { - "epoch": 5.6, - "learning_rate": 6.063049449836171e-05, - "loss": 1.8075, - "step": 389000 - }, - { - "epoch": 5.61, - "learning_rate": 6.057856775660898e-05, - "loss": 1.8102, - "step": 389500 - }, - { - "epoch": 5.61, - "learning_rate": 6.052664101485624e-05, - "loss": 1.8078, - "step": 390000 - }, - { - "epoch": 5.62, - "learning_rate": 6.0474714273103504e-05, - "loss": 1.8082, - "step": 390500 - }, - { - "epoch": 5.63, - "learning_rate": 6.042289138483428e-05, - "loss": 1.807, - "step": 391000 - }, - { - "epoch": 5.64, - "learning_rate": 6.037096464308154e-05, - "loss": 1.8013, - "step": 391500 - }, - { - "epoch": 5.64, - "learning_rate": 6.0319037901328814e-05, - "loss": 1.8072, - "step": 392000 - }, - { - "epoch": 5.65, - "learning_rate": 6.0267111159576075e-05, - "loss": 1.8062, - "step": 392500 - }, - { - "epoch": 5.66, - "learning_rate": 6.021528827130685e-05, - "loss": 1.8062, - "step": 393000 - }, - { - "epoch": 5.66, - "learning_rate": 6.016336152955411e-05, - "loss": 1.8034, - "step": 393500 - }, - { - "epoch": 5.67, - "learning_rate": 6.011143478780137e-05, - "loss": 1.808, - "step": 394000 - }, - { - "epoch": 5.68, - "learning_rate": 6.005950804604864e-05, - "loss": 1.8079, - "step": 394500 - }, - { - "epoch": 5.69, - "learning_rate": 6.0007685157779406e-05, - "loss": 1.8035, - "step": 395000 - }, - { - "epoch": 5.69, - "learning_rate": 5.9955758416026674e-05, - "loss": 1.8072, - "step": 395500 - }, - { - "epoch": 5.7, - "learning_rate": 5.9903831674273936e-05, - "loss": 1.8055, - "step": 396000 - }, - { - "epoch": 5.71, - "learning_rate": 5.98519049325212e-05, - "loss": 1.8083, - "step": 396500 - }, - { - "epoch": 5.71, - "learning_rate": 5.980008204425197e-05, - "loss": 1.8048, - "step": 397000 - }, - { - "epoch": 5.72, - "learning_rate": 5.9748155302499245e-05, - "loss": 1.8053, - "step": 397500 - }, - { - "epoch": 5.73, - "learning_rate": 5.96962285607465e-05, - "loss": 1.8088, - "step": 398000 - }, - { - "epoch": 5.74, - "learning_rate": 5.964430181899376e-05, - "loss": 1.8074, - "step": 398500 - }, - { - "epoch": 5.74, - "learning_rate": 5.9592478930724535e-05, - "loss": 1.8055, - "step": 399000 - }, - { - "epoch": 5.75, - "learning_rate": 5.9540552188971796e-05, - "loss": 1.8081, - "step": 399500 - }, - { - "epoch": 5.76, - "learning_rate": 5.948862544721907e-05, - "loss": 1.8091, - "step": 400000 - }, - { - "epoch": 5.76, - "learning_rate": 5.943669870546633e-05, - "loss": 1.8033, - "step": 400500 - }, - { - "epoch": 5.77, - "learning_rate": 5.9384875817197106e-05, - "loss": 1.8031, - "step": 401000 - }, - { - "epoch": 5.78, - "learning_rate": 5.933294907544437e-05, - "loss": 1.8068, - "step": 401500 - }, - { - "epoch": 5.79, - "learning_rate": 5.928102233369163e-05, - "loss": 1.8025, - "step": 402000 - }, - { - "epoch": 5.79, - "learning_rate": 5.92290955919389e-05, - "loss": 1.8037, - "step": 402500 - }, - { - "epoch": 5.8, - "learning_rate": 5.917727270366966e-05, - "loss": 1.8097, - "step": 403000 - }, - { - "epoch": 5.81, - "learning_rate": 5.912534596191693e-05, - "loss": 1.8004, - "step": 403500 - }, - { - "epoch": 5.82, - "learning_rate": 5.907341922016419e-05, - "loss": 1.8012, - "step": 404000 - }, - { - "epoch": 5.82, - "learning_rate": 5.9021492478411455e-05, - "loss": 1.8084, - "step": 404500 - }, - { - "epoch": 5.83, - "learning_rate": 5.896966959014223e-05, - "loss": 1.8011, - "step": 405000 - }, - { - "epoch": 5.84, - "learning_rate": 5.891774284838949e-05, - "loss": 1.8042, - "step": 405500 - }, - { - "epoch": 5.84, - "learning_rate": 5.8865816106636764e-05, - "loss": 1.8036, - "step": 406000 - }, - { - "epoch": 5.85, - "learning_rate": 5.8813889364884026e-05, - "loss": 1.8038, - "step": 406500 - }, - { - "epoch": 5.86, - "learning_rate": 5.87620664766148e-05, - "loss": 1.8015, - "step": 407000 - }, - { - "epoch": 5.87, - "learning_rate": 5.871013973486206e-05, - "loss": 1.8043, - "step": 407500 - }, - { - "epoch": 5.87, - "learning_rate": 5.865821299310933e-05, - "loss": 1.8004, - "step": 408000 - }, - { - "epoch": 5.88, - "learning_rate": 5.860628625135659e-05, - "loss": 1.8037, - "step": 408500 - }, - { - "epoch": 5.89, - "learning_rate": 5.855446336308736e-05, - "loss": 1.8021, - "step": 409000 - }, - { - "epoch": 5.89, - "learning_rate": 5.8502536621334625e-05, - "loss": 1.8039, - "step": 409500 - }, - { - "epoch": 5.9, - "learning_rate": 5.8450609879581886e-05, - "loss": 1.7982, - "step": 410000 - }, - { - "epoch": 5.91, - "learning_rate": 5.8398683137829154e-05, - "loss": 1.8009, - "step": 410500 - }, - { - "epoch": 5.92, - "learning_rate": 5.834686024955992e-05, - "loss": 1.7999, - "step": 411000 - }, - { - "epoch": 5.92, - "learning_rate": 5.8294933507807196e-05, - "loss": 1.8029, - "step": 411500 - }, - { - "epoch": 5.93, - "learning_rate": 5.824300676605445e-05, - "loss": 1.7964, - "step": 412000 - }, - { - "epoch": 5.94, - "learning_rate": 5.819108002430171e-05, - "loss": 1.801, - "step": 412500 - }, - { - "epoch": 5.94, - "learning_rate": 5.8139257136032485e-05, - "loss": 1.8013, - "step": 413000 - }, - { - "epoch": 5.95, - "learning_rate": 5.8087330394279746e-05, - "loss": 1.8027, - "step": 413500 - }, - { - "epoch": 5.96, - "learning_rate": 5.803540365252702e-05, - "loss": 1.7981, - "step": 414000 - }, - { - "epoch": 5.97, - "learning_rate": 5.798347691077428e-05, - "loss": 1.7972, - "step": 414500 - }, - { - "epoch": 5.97, - "learning_rate": 5.7931654022505056e-05, - "loss": 1.8025, - "step": 415000 - }, - { - "epoch": 5.98, - "learning_rate": 5.787972728075232e-05, - "loss": 1.8021, - "step": 415500 - }, - { - "epoch": 5.99, - "learning_rate": 5.782780053899958e-05, - "loss": 1.7997, - "step": 416000 - }, - { - "epoch": 6.0, - "learning_rate": 5.777587379724685e-05, - "loss": 1.7996, - "step": 416500 - }, - { - "epoch": 6.0, - "eval_accuracy": 0.6524074511202295, - "eval_loss": 1.670515775680542, - "eval_runtime": 1271.0271, - "eval_samples_per_second": 424.017, - "eval_steps_per_second": 26.501, - "step": 416838 - }, - { - "epoch": 6.0, - "learning_rate": 5.772405090897762e-05, - "loss": 1.7959, - "step": 417000 - }, - { - "epoch": 6.01, - "learning_rate": 5.767212416722488e-05, - "loss": 1.7976, - "step": 417500 - }, - { - "epoch": 6.02, - "learning_rate": 5.7620197425472143e-05, - "loss": 1.7948, - "step": 418000 - }, - { - "epoch": 6.02, - "learning_rate": 5.756827068371942e-05, - "loss": 1.7999, - "step": 418500 - }, - { - "epoch": 6.03, - "learning_rate": 5.751644779545018e-05, - "loss": 1.7928, - "step": 419000 - }, - { - "epoch": 6.04, - "learning_rate": 5.746452105369745e-05, - "loss": 1.7988, - "step": 419500 - }, - { - "epoch": 6.05, - "learning_rate": 5.7412594311944715e-05, - "loss": 1.7969, - "step": 420000 - }, - { - "epoch": 6.05, - "learning_rate": 5.7360667570191976e-05, - "loss": 1.797, - "step": 420500 - }, - { - "epoch": 6.06, - "learning_rate": 5.730884468192275e-05, - "loss": 1.7943, - "step": 421000 - }, - { - "epoch": 6.07, - "learning_rate": 5.725691794017001e-05, - "loss": 1.7949, - "step": 421500 - }, - { - "epoch": 6.07, - "learning_rate": 5.720499119841728e-05, - "loss": 1.8002, - "step": 422000 - }, - { - "epoch": 6.08, - "learning_rate": 5.715306445666454e-05, - "loss": 1.7974, - "step": 422500 - }, - { - "epoch": 6.09, - "learning_rate": 5.7101241568395313e-05, - "loss": 1.7974, - "step": 423000 - }, - { - "epoch": 6.1, - "learning_rate": 5.7049314826642575e-05, - "loss": 1.7962, - "step": 423500 - }, - { - "epoch": 6.1, - "learning_rate": 5.6997388084889836e-05, - "loss": 1.7945, - "step": 424000 - }, - { - "epoch": 6.11, - "learning_rate": 5.6945461343137105e-05, - "loss": 1.7977, - "step": 424500 - }, - { - "epoch": 6.12, - "learning_rate": 5.689363845486787e-05, - "loss": 1.7925, - "step": 425000 - }, - { - "epoch": 6.12, - "learning_rate": 5.684171171311514e-05, - "loss": 1.7992, - "step": 425500 - }, - { - "epoch": 6.13, - "learning_rate": 5.67897849713624e-05, - "loss": 1.7964, - "step": 426000 - }, - { - "epoch": 6.14, - "learning_rate": 5.673785822960966e-05, - "loss": 1.7952, - "step": 426500 - }, - { - "epoch": 6.15, - "learning_rate": 5.6686035341340435e-05, - "loss": 1.7971, - "step": 427000 - }, - { - "epoch": 6.15, - "learning_rate": 5.663410859958771e-05, - "loss": 1.7936, - "step": 427500 - }, - { - "epoch": 6.16, - "learning_rate": 5.658218185783497e-05, - "loss": 1.7953, - "step": 428000 - }, - { - "epoch": 6.17, - "learning_rate": 5.6530255116082233e-05, - "loss": 1.7987, - "step": 428500 - }, - { - "epoch": 6.18, - "learning_rate": 5.6478432227813006e-05, - "loss": 1.7965, - "step": 429000 - }, - { - "epoch": 6.18, - "learning_rate": 5.642650548606027e-05, - "loss": 1.7943, - "step": 429500 - }, - { - "epoch": 6.19, - "learning_rate": 5.6374578744307536e-05, - "loss": 1.7949, - "step": 430000 - }, - { - "epoch": 6.2, - "learning_rate": 5.63226520025548e-05, - "loss": 1.7938, - "step": 430500 - }, - { - "epoch": 6.2, - "learning_rate": 5.627082911428557e-05, - "loss": 1.7929, - "step": 431000 - }, - { - "epoch": 6.21, - "learning_rate": 5.621890237253283e-05, - "loss": 1.7953, - "step": 431500 - }, - { - "epoch": 6.22, - "learning_rate": 5.6166975630780094e-05, - "loss": 1.796, - "step": 432000 - }, - { - "epoch": 6.23, - "learning_rate": 5.611504888902737e-05, - "loss": 1.7938, - "step": 432500 - }, - { - "epoch": 6.23, - "learning_rate": 5.606322600075813e-05, - "loss": 1.7925, - "step": 433000 - }, - { - "epoch": 6.24, - "learning_rate": 5.6011299259005403e-05, - "loss": 1.7946, - "step": 433500 - }, - { - "epoch": 6.25, - "learning_rate": 5.5959372517252665e-05, - "loss": 1.7945, - "step": 434000 - }, - { - "epoch": 6.25, - "learning_rate": 5.590744577549992e-05, - "loss": 1.7896, - "step": 434500 - }, - { - "epoch": 6.26, - "learning_rate": 5.58556228872307e-05, - "loss": 1.7962, - "step": 435000 - }, - { - "epoch": 6.27, - "learning_rate": 5.5803696145477954e-05, - "loss": 1.7921, - "step": 435500 - }, - { - "epoch": 6.28, - "learning_rate": 5.5751873257208734e-05, - "loss": 1.7922, - "step": 436000 - }, - { - "epoch": 6.28, - "learning_rate": 5.569994651545599e-05, - "loss": 1.7935, - "step": 436500 - }, - { - "epoch": 6.29, - "learning_rate": 5.5648019773703264e-05, - "loss": 1.7973, - "step": 437000 - }, - { - "epoch": 6.3, - "learning_rate": 5.5596093031950525e-05, - "loss": 1.795, - "step": 437500 - }, - { - "epoch": 6.3, - "learning_rate": 5.5544166290197794e-05, - "loss": 1.7883, - "step": 438000 - }, - { - "epoch": 6.31, - "learning_rate": 5.5492239548445055e-05, - "loss": 1.7932, - "step": 438500 - }, - { - "epoch": 6.32, - "learning_rate": 5.544031280669232e-05, - "loss": 1.7926, - "step": 439000 - }, - { - "epoch": 6.33, - "learning_rate": 5.538838606493959e-05, - "loss": 1.7896, - "step": 439500 - }, - { - "epoch": 6.33, - "learning_rate": 5.533656317667035e-05, - "loss": 1.7884, - "step": 440000 - }, - { - "epoch": 6.34, - "learning_rate": 5.5284636434917626e-05, - "loss": 1.7907, - "step": 440500 - }, - { - "epoch": 6.35, - "learning_rate": 5.523270969316489e-05, - "loss": 1.7919, - "step": 441000 - }, - { - "epoch": 6.35, - "learning_rate": 5.518078295141215e-05, - "loss": 1.7958, - "step": 441500 - }, - { - "epoch": 6.36, - "learning_rate": 5.512896006314292e-05, - "loss": 1.7903, - "step": 442000 - }, - { - "epoch": 6.37, - "learning_rate": 5.5077033321390184e-05, - "loss": 1.7971, - "step": 442500 - }, - { - "epoch": 6.38, - "learning_rate": 5.502510657963745e-05, - "loss": 1.7891, - "step": 443000 - }, - { - "epoch": 6.38, - "learning_rate": 5.4973179837884714e-05, - "loss": 1.791, - "step": 443500 - }, - { - "epoch": 6.39, - "learning_rate": 5.492135694961549e-05, - "loss": 1.7917, - "step": 444000 - }, - { - "epoch": 6.4, - "learning_rate": 5.486943020786275e-05, - "loss": 1.7915, - "step": 444500 - }, - { - "epoch": 6.41, - "learning_rate": 5.481750346611001e-05, - "loss": 1.7892, - "step": 445000 - }, - { - "epoch": 6.41, - "learning_rate": 5.4765576724357285e-05, - "loss": 1.7902, - "step": 445500 - }, - { - "epoch": 6.42, - "learning_rate": 5.4713753836088044e-05, - "loss": 1.7873, - "step": 446000 - }, - { - "epoch": 6.43, - "learning_rate": 5.466182709433532e-05, - "loss": 1.7919, - "step": 446500 - }, - { - "epoch": 6.43, - "learning_rate": 5.4609900352582574e-05, - "loss": 1.7903, - "step": 447000 - }, - { - "epoch": 6.44, - "learning_rate": 5.455797361082985e-05, - "loss": 1.7929, - "step": 447500 - }, - { - "epoch": 6.45, - "learning_rate": 5.450615072256061e-05, - "loss": 1.7872, - "step": 448000 - }, - { - "epoch": 6.46, - "learning_rate": 5.4454223980807884e-05, - "loss": 1.7884, - "step": 448500 - }, - { - "epoch": 6.46, - "learning_rate": 5.4402297239055145e-05, - "loss": 1.791, - "step": 449000 - }, - { - "epoch": 6.47, - "learning_rate": 5.435037049730241e-05, - "loss": 1.7905, - "step": 449500 - }, - { - "epoch": 6.48, - "learning_rate": 5.429854760903318e-05, - "loss": 1.7938, - "step": 450000 - }, - { - "epoch": 6.48, - "learning_rate": 5.424662086728044e-05, - "loss": 1.7886, - "step": 450500 - }, - { - "epoch": 6.49, - "learning_rate": 5.419469412552771e-05, - "loss": 1.7929, - "step": 451000 - }, - { - "epoch": 6.5, - "learning_rate": 5.414276738377497e-05, - "loss": 1.7898, - "step": 451500 - }, - { - "epoch": 6.51, - "learning_rate": 5.4090944495505744e-05, - "loss": 1.7883, - "step": 452000 - }, - { - "epoch": 6.51, - "learning_rate": 5.4039017753753005e-05, - "loss": 1.7875, - "step": 452500 - }, - { - "epoch": 6.52, - "learning_rate": 5.398709101200027e-05, - "loss": 1.7869, - "step": 453000 - }, - { - "epoch": 6.53, - "learning_rate": 5.393516427024754e-05, - "loss": 1.7875, - "step": 453500 - }, - { - "epoch": 6.53, - "learning_rate": 5.38833413819783e-05, - "loss": 1.7891, - "step": 454000 - }, - { - "epoch": 6.54, - "learning_rate": 5.383141464022558e-05, - "loss": 1.7887, - "step": 454500 - }, - { - "epoch": 6.55, - "learning_rate": 5.377948789847284e-05, - "loss": 1.7904, - "step": 455000 - }, - { - "epoch": 6.56, - "learning_rate": 5.37275611567201e-05, - "loss": 1.7871, - "step": 455500 - }, - { - "epoch": 6.56, - "learning_rate": 5.367573826845087e-05, - "loss": 1.7887, - "step": 456000 - }, - { - "epoch": 6.57, - "learning_rate": 5.362381152669814e-05, - "loss": 1.7875, - "step": 456500 - }, - { - "epoch": 6.58, - "learning_rate": 5.35718847849454e-05, - "loss": 1.7883, - "step": 457000 - }, - { - "epoch": 6.59, - "learning_rate": 5.3519958043192664e-05, - "loss": 1.7863, - "step": 457500 - }, - { - "epoch": 6.59, - "learning_rate": 5.346813515492344e-05, - "loss": 1.7863, - "step": 458000 - }, - { - "epoch": 6.6, - "learning_rate": 5.34162084131707e-05, - "loss": 1.7886, - "step": 458500 - }, - { - "epoch": 6.61, - "learning_rate": 5.3364281671417974e-05, - "loss": 1.785, - "step": 459000 - }, - { - "epoch": 6.61, - "learning_rate": 5.331235492966523e-05, - "loss": 1.7838, - "step": 459500 - }, - { - "epoch": 6.62, - "learning_rate": 5.326053204139601e-05, - "loss": 1.7836, - "step": 460000 - }, - { - "epoch": 6.63, - "learning_rate": 5.320860529964326e-05, - "loss": 1.7848, - "step": 460500 - }, - { - "epoch": 6.64, - "learning_rate": 5.3156678557890524e-05, - "loss": 1.7871, - "step": 461000 - }, - { - "epoch": 6.64, - "learning_rate": 5.31047518161378e-05, - "loss": 1.7878, - "step": 461500 - }, - { - "epoch": 6.65, - "learning_rate": 5.305292892786856e-05, - "loss": 1.7856, - "step": 462000 - }, - { - "epoch": 6.66, - "learning_rate": 5.3001002186115834e-05, - "loss": 1.788, - "step": 462500 - }, - { - "epoch": 6.66, - "learning_rate": 5.2949075444363095e-05, - "loss": 1.7862, - "step": 463000 - }, - { - "epoch": 6.67, - "learning_rate": 5.289714870261036e-05, - "loss": 1.7813, - "step": 463500 - }, - { - "epoch": 6.68, - "learning_rate": 5.284532581434113e-05, - "loss": 1.7879, - "step": 464000 - }, - { - "epoch": 6.69, - "learning_rate": 5.279339907258839e-05, - "loss": 1.7848, - "step": 464500 - }, - { - "epoch": 6.69, - "learning_rate": 5.274147233083566e-05, - "loss": 1.7879, - "step": 465000 - }, - { - "epoch": 6.7, - "learning_rate": 5.268954558908292e-05, - "loss": 1.7831, - "step": 465500 - }, - { - "epoch": 6.71, - "learning_rate": 5.2637722700813694e-05, - "loss": 1.7868, - "step": 466000 - }, - { - "epoch": 6.71, - "learning_rate": 5.2585795959060956e-05, - "loss": 1.7855, - "step": 466500 - }, - { - "epoch": 6.72, - "learning_rate": 5.253386921730823e-05, - "loss": 1.7821, - "step": 467000 - }, - { - "epoch": 6.73, - "learning_rate": 5.248204632903899e-05, - "loss": 1.787, - "step": 467500 - }, - { - "epoch": 6.74, - "learning_rate": 5.2430119587286265e-05, - "loss": 1.7819, - "step": 468000 - }, - { - "epoch": 6.74, - "learning_rate": 5.237819284553353e-05, - "loss": 1.7921, - "step": 468500 - }, - { - "epoch": 6.75, - "learning_rate": 5.232626610378079e-05, - "loss": 1.7887, - "step": 469000 - }, - { - "epoch": 6.76, - "learning_rate": 5.227433936202806e-05, - "loss": 1.7878, - "step": 469500 - }, - { - "epoch": 6.77, - "learning_rate": 5.222241262027532e-05, - "loss": 1.7862, - "step": 470000 - }, - { - "epoch": 6.77, - "learning_rate": 5.217048587852258e-05, - "loss": 1.7885, - "step": 470500 - }, - { - "epoch": 6.78, - "learning_rate": 5.211855913676985e-05, - "loss": 1.782, - "step": 471000 - }, - { - "epoch": 6.79, - "learning_rate": 5.2066736248500614e-05, - "loss": 1.7865, - "step": 471500 - }, - { - "epoch": 6.79, - "learning_rate": 5.201480950674788e-05, - "loss": 1.7851, - "step": 472000 - }, - { - "epoch": 6.8, - "learning_rate": 5.1962882764995144e-05, - "loss": 1.7843, - "step": 472500 - }, - { - "epoch": 6.81, - "learning_rate": 5.1910956023242406e-05, - "loss": 1.7836, - "step": 473000 - }, - { - "epoch": 6.82, - "learning_rate": 5.185913313497318e-05, - "loss": 1.7843, - "step": 473500 - }, - { - "epoch": 6.82, - "learning_rate": 5.180720639322044e-05, - "loss": 1.7803, - "step": 474000 - }, - { - "epoch": 6.83, - "learning_rate": 5.1755279651467715e-05, - "loss": 1.783, - "step": 474500 - }, - { - "epoch": 6.84, - "learning_rate": 5.170335290971498e-05, - "loss": 1.7814, - "step": 475000 - }, - { - "epoch": 6.84, - "learning_rate": 5.165153002144575e-05, - "loss": 1.7837, - "step": 475500 - }, - { - "epoch": 6.85, - "learning_rate": 5.159960327969301e-05, - "loss": 1.7838, - "step": 476000 - }, - { - "epoch": 6.86, - "learning_rate": 5.154767653794028e-05, - "loss": 1.7855, - "step": 476500 - }, - { - "epoch": 6.87, - "learning_rate": 5.149574979618754e-05, - "loss": 1.7831, - "step": 477000 - }, - { - "epoch": 6.87, - "learning_rate": 5.1443926907918314e-05, - "loss": 1.7825, - "step": 477500 - }, - { - "epoch": 6.88, - "learning_rate": 5.1392000166165576e-05, - "loss": 1.7831, - "step": 478000 - }, - { - "epoch": 6.89, - "learning_rate": 5.134007342441284e-05, - "loss": 1.7813, - "step": 478500 - }, - { - "epoch": 6.89, - "learning_rate": 5.128814668266011e-05, - "loss": 1.7834, - "step": 479000 - }, - { - "epoch": 6.9, - "learning_rate": 5.123632379439087e-05, - "loss": 1.7812, - "step": 479500 - }, - { - "epoch": 6.91, - "learning_rate": 5.118439705263815e-05, - "loss": 1.7805, - "step": 480000 - }, - { - "epoch": 6.92, - "learning_rate": 5.113247031088541e-05, - "loss": 1.7826, - "step": 480500 - }, - { - "epoch": 6.92, - "learning_rate": 5.108054356913266e-05, - "loss": 1.7811, - "step": 481000 - }, - { - "epoch": 6.93, - "learning_rate": 5.102872068086344e-05, - "loss": 1.7838, - "step": 481500 - }, - { - "epoch": 6.94, - "learning_rate": 5.09767939391107e-05, - "loss": 1.7842, - "step": 482000 - }, - { - "epoch": 6.95, - "learning_rate": 5.092486719735797e-05, - "loss": 1.7795, - "step": 482500 - }, - { - "epoch": 6.95, - "learning_rate": 5.0872940455605234e-05, - "loss": 1.7821, - "step": 483000 - }, - { - "epoch": 6.96, - "learning_rate": 5.082111756733601e-05, - "loss": 1.7795, - "step": 483500 - }, - { - "epoch": 6.97, - "learning_rate": 5.076919082558327e-05, - "loss": 1.7815, - "step": 484000 - }, - { - "epoch": 6.97, - "learning_rate": 5.071726408383053e-05, - "loss": 1.7804, - "step": 484500 - }, - { - "epoch": 6.98, - "learning_rate": 5.06653373420778e-05, - "loss": 1.7786, - "step": 485000 - }, - { - "epoch": 6.99, - "learning_rate": 5.061351445380857e-05, - "loss": 1.7784, - "step": 485500 - }, - { - "epoch": 7.0, - "learning_rate": 5.056158771205583e-05, - "loss": 1.7767, - "step": 486000 - }, - { - "epoch": 7.0, - "eval_accuracy": 0.655838567605194, - "eval_loss": 1.647852897644043, - "eval_runtime": 1268.2736, - "eval_samples_per_second": 424.938, - "eval_steps_per_second": 26.559, - "step": 486311 - }, - { - "epoch": 7.0, - "learning_rate": 5.0509660970303095e-05, - "loss": 1.7796, - "step": 486500 - }, - { - "epoch": 7.01, - "learning_rate": 5.045773422855037e-05, - "loss": 1.7809, - "step": 487000 - }, - { - "epoch": 7.02, - "learning_rate": 5.040591134028113e-05, - "loss": 1.778, - "step": 487500 - }, - { - "epoch": 7.02, - "learning_rate": 5.0353984598528404e-05, - "loss": 1.7798, - "step": 488000 - }, - { - "epoch": 7.03, - "learning_rate": 5.0302057856775666e-05, - "loss": 1.7795, - "step": 488500 - }, - { - "epoch": 7.04, - "learning_rate": 5.025013111502293e-05, - "loss": 1.7764, - "step": 489000 - }, - { - "epoch": 7.05, - "learning_rate": 5.01983082267537e-05, - "loss": 1.7809, - "step": 489500 - }, - { - "epoch": 7.05, - "learning_rate": 5.014638148500096e-05, - "loss": 1.7751, - "step": 490000 - }, - { - "epoch": 7.06, - "learning_rate": 5.009445474324823e-05, - "loss": 1.7768, - "step": 490500 - }, - { - "epoch": 7.07, - "learning_rate": 5.004252800149549e-05, - "loss": 1.778, - "step": 491000 - }, - { - "epoch": 7.07, - "learning_rate": 4.999070511322626e-05, - "loss": 1.7785, - "step": 491500 - }, - { - "epoch": 7.08, - "learning_rate": 4.9938778371473526e-05, - "loss": 1.7711, - "step": 492000 - }, - { - "epoch": 7.09, - "learning_rate": 4.9886851629720794e-05, - "loss": 1.7772, - "step": 492500 - }, - { - "epoch": 7.1, - "learning_rate": 4.983492488796806e-05, - "loss": 1.7781, - "step": 493000 - }, - { - "epoch": 7.1, - "learning_rate": 4.978310199969883e-05, - "loss": 1.7742, - "step": 493500 - }, - { - "epoch": 7.11, - "learning_rate": 4.97311752579461e-05, - "loss": 1.7752, - "step": 494000 - }, - { - "epoch": 7.12, - "learning_rate": 4.967924851619335e-05, - "loss": 1.7816, - "step": 494500 - }, - { - "epoch": 7.13, - "learning_rate": 4.962732177444062e-05, - "loss": 1.7716, - "step": 495000 - }, - { - "epoch": 7.13, - "learning_rate": 4.9575498886171386e-05, - "loss": 1.7801, - "step": 495500 - }, - { - "epoch": 7.14, - "learning_rate": 4.9523572144418655e-05, - "loss": 1.7721, - "step": 496000 - }, - { - "epoch": 7.15, - "learning_rate": 4.947164540266592e-05, - "loss": 1.778, - "step": 496500 - }, - { - "epoch": 7.15, - "learning_rate": 4.9419718660913185e-05, - "loss": 1.7751, - "step": 497000 - }, - { - "epoch": 7.16, - "learning_rate": 4.936789577264396e-05, - "loss": 1.776, - "step": 497500 - }, - { - "epoch": 7.17, - "learning_rate": 4.9315969030891226e-05, - "loss": 1.774, - "step": 498000 - }, - { - "epoch": 7.18, - "learning_rate": 4.926404228913849e-05, - "loss": 1.7743, - "step": 498500 - }, - { - "epoch": 7.18, - "learning_rate": 4.921211554738575e-05, - "loss": 1.7742, - "step": 499000 - }, - { - "epoch": 7.19, - "learning_rate": 4.916029265911652e-05, - "loss": 1.7744, - "step": 499500 - }, - { - "epoch": 7.2, - "learning_rate": 4.9108365917363783e-05, - "loss": 1.7771, - "step": 500000 - }, - { - "epoch": 7.2, - "learning_rate": 4.905643917561105e-05, - "loss": 1.7769, - "step": 500500 - }, - { - "epoch": 7.21, - "learning_rate": 4.900451243385831e-05, - "loss": 1.775, - "step": 501000 - }, - { - "epoch": 7.22, - "learning_rate": 4.8952689545589086e-05, - "loss": 1.7775, - "step": 501500 - }, - { - "epoch": 7.23, - "learning_rate": 4.890076280383635e-05, - "loss": 1.7769, - "step": 502000 - }, - { - "epoch": 7.23, - "learning_rate": 4.8848836062083616e-05, - "loss": 1.7737, - "step": 502500 - }, - { - "epoch": 7.24, - "learning_rate": 4.879690932033088e-05, - "loss": 1.7752, - "step": 503000 - }, - { - "epoch": 7.25, - "learning_rate": 4.8744982578578146e-05, - "loss": 1.7762, - "step": 503500 - }, - { - "epoch": 7.25, - "learning_rate": 4.869315969030891e-05, - "loss": 1.7731, - "step": 504000 - }, - { - "epoch": 7.26, - "learning_rate": 4.864123294855618e-05, - "loss": 1.7755, - "step": 504500 - }, - { - "epoch": 7.27, - "learning_rate": 4.858930620680344e-05, - "loss": 1.7746, - "step": 505000 - }, - { - "epoch": 7.28, - "learning_rate": 4.8537483318534215e-05, - "loss": 1.7742, - "step": 505500 - }, - { - "epoch": 7.28, - "learning_rate": 4.8485556576781476e-05, - "loss": 1.7715, - "step": 506000 - }, - { - "epoch": 7.29, - "learning_rate": 4.8433629835028745e-05, - "loss": 1.777, - "step": 506500 - }, - { - "epoch": 7.3, - "learning_rate": 4.8381703093276006e-05, - "loss": 1.7754, - "step": 507000 - }, - { - "epoch": 7.3, - "learning_rate": 4.8329776351523275e-05, - "loss": 1.7773, - "step": 507500 - }, - { - "epoch": 7.31, - "learning_rate": 4.8277849609770536e-05, - "loss": 1.7759, - "step": 508000 - }, - { - "epoch": 7.32, - "learning_rate": 4.8225922868017804e-05, - "loss": 1.7767, - "step": 508500 - }, - { - "epoch": 7.33, - "learning_rate": 4.817399612626507e-05, - "loss": 1.7725, - "step": 509000 - }, - { - "epoch": 7.33, - "learning_rate": 4.812217323799584e-05, - "loss": 1.7718, - "step": 509500 - }, - { - "epoch": 7.34, - "learning_rate": 4.807024649624311e-05, - "loss": 1.7748, - "step": 510000 - }, - { - "epoch": 7.35, - "learning_rate": 4.801831975449036e-05, - "loss": 1.7744, - "step": 510500 - }, - { - "epoch": 7.36, - "learning_rate": 4.796639301273763e-05, - "loss": 1.7763, - "step": 511000 - }, - { - "epoch": 7.36, - "learning_rate": 4.79144662709849e-05, - "loss": 1.7711, - "step": 511500 - }, - { - "epoch": 7.37, - "learning_rate": 4.7862643382715665e-05, - "loss": 1.7741, - "step": 512000 - }, - { - "epoch": 7.38, - "learning_rate": 4.781071664096293e-05, - "loss": 1.7705, - "step": 512500 - }, - { - "epoch": 7.38, - "learning_rate": 4.77587898992102e-05, - "loss": 1.7703, - "step": 513000 - }, - { - "epoch": 7.39, - "learning_rate": 4.770686315745746e-05, - "loss": 1.7754, - "step": 513500 - }, - { - "epoch": 7.4, - "learning_rate": 4.7655040269188236e-05, - "loss": 1.7721, - "step": 514000 - }, - { - "epoch": 7.41, - "learning_rate": 4.76031135274355e-05, - "loss": 1.7728, - "step": 514500 - }, - { - "epoch": 7.41, - "learning_rate": 4.755118678568276e-05, - "loss": 1.7734, - "step": 515000 - }, - { - "epoch": 7.42, - "learning_rate": 4.749926004393003e-05, - "loss": 1.7709, - "step": 515500 - }, - { - "epoch": 7.43, - "learning_rate": 4.7447437155660793e-05, - "loss": 1.7707, - "step": 516000 - }, - { - "epoch": 7.43, - "learning_rate": 4.739551041390806e-05, - "loss": 1.7686, - "step": 516500 - }, - { - "epoch": 7.44, - "learning_rate": 4.734358367215533e-05, - "loss": 1.7732, - "step": 517000 - }, - { - "epoch": 7.45, - "learning_rate": 4.729165693040259e-05, - "loss": 1.7755, - "step": 517500 - }, - { - "epoch": 7.46, - "learning_rate": 4.7239834042133365e-05, - "loss": 1.7707, - "step": 518000 - }, - { - "epoch": 7.46, - "learning_rate": 4.7187907300380626e-05, - "loss": 1.77, - "step": 518500 - }, - { - "epoch": 7.47, - "learning_rate": 4.713598055862789e-05, - "loss": 1.7733, - "step": 519000 - }, - { - "epoch": 7.48, - "learning_rate": 4.7084053816875156e-05, - "loss": 1.7701, - "step": 519500 - }, - { - "epoch": 7.48, - "learning_rate": 4.703223092860592e-05, - "loss": 1.7704, - "step": 520000 - }, - { - "epoch": 7.49, - "learning_rate": 4.698030418685319e-05, - "loss": 1.7733, - "step": 520500 - }, - { - "epoch": 7.5, - "learning_rate": 4.692837744510045e-05, - "loss": 1.7719, - "step": 521000 - }, - { - "epoch": 7.51, - "learning_rate": 4.687645070334772e-05, - "loss": 1.7713, - "step": 521500 - }, - { - "epoch": 7.51, - "learning_rate": 4.682462781507849e-05, - "loss": 1.7694, - "step": 522000 - }, - { - "epoch": 7.52, - "learning_rate": 4.6772701073325755e-05, - "loss": 1.7745, - "step": 522500 - }, - { - "epoch": 7.53, - "learning_rate": 4.6720774331573016e-05, - "loss": 1.7699, - "step": 523000 - }, - { - "epoch": 7.54, - "learning_rate": 4.6668847589820285e-05, - "loss": 1.7734, - "step": 523500 - }, - { - "epoch": 7.54, - "learning_rate": 4.661702470155106e-05, - "loss": 1.7732, - "step": 524000 - }, - { - "epoch": 7.55, - "learning_rate": 4.656509795979832e-05, - "loss": 1.7699, - "step": 524500 - }, - { - "epoch": 7.56, - "learning_rate": 4.651317121804558e-05, - "loss": 1.7735, - "step": 525000 - }, - { - "epoch": 7.56, - "learning_rate": 4.646124447629285e-05, - "loss": 1.7715, - "step": 525500 - }, - { - "epoch": 7.57, - "learning_rate": 4.6409421588023615e-05, - "loss": 1.7697, - "step": 526000 - }, - { - "epoch": 7.58, - "learning_rate": 4.6357494846270883e-05, - "loss": 1.7693, - "step": 526500 - }, - { - "epoch": 7.59, - "learning_rate": 4.630556810451815e-05, - "loss": 1.769, - "step": 527000 - }, - { - "epoch": 7.59, - "learning_rate": 4.625364136276541e-05, - "loss": 1.769, - "step": 527500 - }, - { - "epoch": 7.6, - "learning_rate": 4.6201818474496186e-05, - "loss": 1.7745, - "step": 528000 - }, - { - "epoch": 7.61, - "learning_rate": 4.614989173274345e-05, - "loss": 1.7707, - "step": 528500 - }, - { - "epoch": 7.61, - "learning_rate": 4.609796499099071e-05, - "loss": 1.7688, - "step": 529000 - }, - { - "epoch": 7.62, - "learning_rate": 4.604614210272148e-05, - "loss": 1.7706, - "step": 529500 - }, - { - "epoch": 7.63, - "learning_rate": 4.5994215360968744e-05, - "loss": 1.773, - "step": 530000 - }, - { - "epoch": 7.64, - "learning_rate": 4.594228861921601e-05, - "loss": 1.7672, - "step": 530500 - }, - { - "epoch": 7.64, - "learning_rate": 4.589036187746328e-05, - "loss": 1.7676, - "step": 531000 - }, - { - "epoch": 7.65, - "learning_rate": 4.583843513571054e-05, - "loss": 1.7676, - "step": 531500 - }, - { - "epoch": 7.66, - "learning_rate": 4.5786508393957803e-05, - "loss": 1.7707, - "step": 532000 - }, - { - "epoch": 7.66, - "learning_rate": 4.573458165220507e-05, - "loss": 1.7719, - "step": 532500 - }, - { - "epoch": 7.67, - "learning_rate": 4.568265491045234e-05, - "loss": 1.7694, - "step": 533000 - }, - { - "epoch": 7.68, - "learning_rate": 4.5630832022183106e-05, - "loss": 1.7724, - "step": 533500 - }, - { - "epoch": 7.69, - "learning_rate": 4.5578905280430375e-05, - "loss": 1.7717, - "step": 534000 - }, - { - "epoch": 7.69, - "learning_rate": 4.5526978538677636e-05, - "loss": 1.7678, - "step": 534500 - }, - { - "epoch": 7.7, - "learning_rate": 4.54750517969249e-05, - "loss": 1.7696, - "step": 535000 - }, - { - "epoch": 7.71, - "learning_rate": 4.542322890865567e-05, - "loss": 1.7684, - "step": 535500 - }, - { - "epoch": 7.72, - "learning_rate": 4.5371406020386444e-05, - "loss": 1.7627, - "step": 536000 - }, - { - "epoch": 7.72, - "learning_rate": 4.5319479278633705e-05, - "loss": 1.7686, - "step": 536500 - }, - { - "epoch": 7.73, - "learning_rate": 4.526755253688097e-05, - "loss": 1.7653, - "step": 537000 - }, - { - "epoch": 7.74, - "learning_rate": 4.5215625795128235e-05, - "loss": 1.766, - "step": 537500 - }, - { - "epoch": 7.74, - "learning_rate": 4.51636990533755e-05, - "loss": 1.7649, - "step": 538000 - }, - { - "epoch": 7.75, - "learning_rate": 4.5111772311622765e-05, - "loss": 1.7658, - "step": 538500 - }, - { - "epoch": 7.76, - "learning_rate": 4.5059845569870026e-05, - "loss": 1.7638, - "step": 539000 - }, - { - "epoch": 7.77, - "learning_rate": 4.5007918828117295e-05, - "loss": 1.7658, - "step": 539500 - }, - { - "epoch": 7.77, - "learning_rate": 4.495609593984806e-05, - "loss": 1.7654, - "step": 540000 - }, - { - "epoch": 7.78, - "learning_rate": 4.490416919809533e-05, - "loss": 1.7675, - "step": 540500 - }, - { - "epoch": 7.79, - "learning_rate": 4.485224245634259e-05, - "loss": 1.767, - "step": 541000 - }, - { - "epoch": 7.79, - "learning_rate": 4.480031571458986e-05, - "loss": 1.7705, - "step": 541500 - }, - { - "epoch": 7.8, - "learning_rate": 4.474849282632063e-05, - "loss": 1.7689, - "step": 542000 - }, - { - "epoch": 7.81, - "learning_rate": 4.4696566084567893e-05, - "loss": 1.7689, - "step": 542500 - }, - { - "epoch": 7.82, - "learning_rate": 4.464463934281516e-05, - "loss": 1.7701, - "step": 543000 - }, - { - "epoch": 7.82, - "learning_rate": 4.459271260106242e-05, - "loss": 1.7675, - "step": 543500 - }, - { - "epoch": 7.83, - "learning_rate": 4.4540889712793196e-05, - "loss": 1.7674, - "step": 544000 - }, - { - "epoch": 7.84, - "learning_rate": 4.448896297104046e-05, - "loss": 1.7654, - "step": 544500 - }, - { - "epoch": 7.84, - "learning_rate": 4.443703622928772e-05, - "loss": 1.7649, - "step": 545000 - }, - { - "epoch": 7.85, - "learning_rate": 4.438510948753499e-05, - "loss": 1.7676, - "step": 545500 - }, - { - "epoch": 7.86, - "learning_rate": 4.4333286599265754e-05, - "loss": 1.7621, - "step": 546000 - }, - { - "epoch": 7.87, - "learning_rate": 4.428135985751302e-05, - "loss": 1.7628, - "step": 546500 - }, - { - "epoch": 7.87, - "learning_rate": 4.422943311576029e-05, - "loss": 1.7692, - "step": 547000 - }, - { - "epoch": 7.88, - "learning_rate": 4.417750637400755e-05, - "loss": 1.7658, - "step": 547500 - }, - { - "epoch": 7.89, - "learning_rate": 4.4125683485738325e-05, - "loss": 1.7659, - "step": 548000 - }, - { - "epoch": 7.9, - "learning_rate": 4.4073756743985586e-05, - "loss": 1.7643, - "step": 548500 - }, - { - "epoch": 7.9, - "learning_rate": 4.402183000223285e-05, - "loss": 1.7654, - "step": 549000 - }, - { - "epoch": 7.91, - "learning_rate": 4.3969903260480116e-05, - "loss": 1.7654, - "step": 549500 - }, - { - "epoch": 7.92, - "learning_rate": 4.391808037221088e-05, - "loss": 1.7648, - "step": 550000 - }, - { - "epoch": 7.92, - "learning_rate": 4.386615363045815e-05, - "loss": 1.764, - "step": 550500 - }, - { - "epoch": 7.93, - "learning_rate": 4.381422688870542e-05, - "loss": 1.7654, - "step": 551000 - }, - { - "epoch": 7.94, - "learning_rate": 4.376230014695268e-05, - "loss": 1.7628, - "step": 551500 - }, - { - "epoch": 7.95, - "learning_rate": 4.3710477258683454e-05, - "loss": 1.7696, - "step": 552000 - }, - { - "epoch": 7.95, - "learning_rate": 4.365855051693072e-05, - "loss": 1.7657, - "step": 552500 - }, - { - "epoch": 7.96, - "learning_rate": 4.360662377517798e-05, - "loss": 1.7621, - "step": 553000 - }, - { - "epoch": 7.97, - "learning_rate": 4.3554697033425245e-05, - "loss": 1.7617, - "step": 553500 - }, - { - "epoch": 7.97, - "learning_rate": 4.350287414515601e-05, - "loss": 1.7636, - "step": 554000 - }, - { - "epoch": 7.98, - "learning_rate": 4.345094740340328e-05, - "loss": 1.7631, - "step": 554500 - }, - { - "epoch": 7.99, - "learning_rate": 4.339902066165055e-05, - "loss": 1.7652, - "step": 555000 - }, - { - "epoch": 8.0, - "learning_rate": 4.334709391989781e-05, - "loss": 1.7663, - "step": 555500 - }, - { - "epoch": 8.0, - "eval_accuracy": 0.6577083685785928, - "eval_loss": 1.633870005607605, - "eval_runtime": 1283.7415, - "eval_samples_per_second": 419.817, - "eval_steps_per_second": 26.239, - "step": 555784 - }, - { - "epoch": 8.0, - "learning_rate": 4.329527103162858e-05, - "loss": 1.7639, - "step": 556000 - }, - { - "epoch": 8.01, - "learning_rate": 4.324334428987585e-05, - "loss": 1.757, - "step": 556500 - }, - { - "epoch": 8.02, - "learning_rate": 4.319141754812311e-05, - "loss": 1.7587, - "step": 557000 - }, - { - "epoch": 8.02, - "learning_rate": 4.3139490806370374e-05, - "loss": 1.761, - "step": 557500 - }, - { - "epoch": 8.03, - "learning_rate": 4.3087667918101147e-05, - "loss": 1.7578, - "step": 558000 - }, - { - "epoch": 8.04, - "learning_rate": 4.303574117634841e-05, - "loss": 1.7618, - "step": 558500 - }, - { - "epoch": 8.05, - "learning_rate": 4.2983814434595676e-05, - "loss": 1.7611, - "step": 559000 - }, - { - "epoch": 8.05, - "learning_rate": 4.293188769284294e-05, - "loss": 1.7625, - "step": 559500 - }, - { - "epoch": 8.06, - "learning_rate": 4.288006480457371e-05, - "loss": 1.7631, - "step": 560000 - }, - { - "epoch": 8.07, - "learning_rate": 4.282813806282097e-05, - "loss": 1.7608, - "step": 560500 - }, - { - "epoch": 8.08, - "learning_rate": 4.277621132106824e-05, - "loss": 1.7602, - "step": 561000 - }, - { - "epoch": 8.08, - "learning_rate": 4.27242845793155e-05, - "loss": 1.7622, - "step": 561500 - }, - { - "epoch": 8.09, - "learning_rate": 4.2672461691046275e-05, - "loss": 1.7612, - "step": 562000 - }, - { - "epoch": 8.1, - "learning_rate": 4.262053494929354e-05, - "loss": 1.7629, - "step": 562500 - }, - { - "epoch": 8.1, - "learning_rate": 4.2568608207540805e-05, - "loss": 1.76, - "step": 563000 - }, - { - "epoch": 8.11, - "learning_rate": 4.251678531927157e-05, - "loss": 1.757, - "step": 563500 - }, - { - "epoch": 8.12, - "learning_rate": 4.246485857751884e-05, - "loss": 1.76, - "step": 564000 - }, - { - "epoch": 8.13, - "learning_rate": 4.24129318357661e-05, - "loss": 1.7598, - "step": 564500 - }, - { - "epoch": 8.13, - "learning_rate": 4.236100509401337e-05, - "loss": 1.7594, - "step": 565000 - }, - { - "epoch": 8.14, - "learning_rate": 4.230907835226063e-05, - "loss": 1.7601, - "step": 565500 - }, - { - "epoch": 8.15, - "learning_rate": 4.22571516105079e-05, - "loss": 1.7602, - "step": 566000 - }, - { - "epoch": 8.15, - "learning_rate": 4.220522486875516e-05, - "loss": 1.7642, - "step": 566500 - }, - { - "epoch": 8.16, - "learning_rate": 4.215329812700243e-05, - "loss": 1.7609, - "step": 567000 - }, - { - "epoch": 8.17, - "learning_rate": 4.2101475238733195e-05, - "loss": 1.7595, - "step": 567500 - }, - { - "epoch": 8.18, - "learning_rate": 4.2049548496980464e-05, - "loss": 1.762, - "step": 568000 - }, - { - "epoch": 8.18, - "learning_rate": 4.1997621755227725e-05, - "loss": 1.758, - "step": 568500 - }, - { - "epoch": 8.19, - "learning_rate": 4.1945695013474987e-05, - "loss": 1.7594, - "step": 569000 - }, - { - "epoch": 8.2, - "learning_rate": 4.1893872125205766e-05, - "loss": 1.7586, - "step": 569500 - }, - { - "epoch": 8.2, - "learning_rate": 4.184194538345302e-05, - "loss": 1.7621, - "step": 570000 - }, - { - "epoch": 8.21, - "learning_rate": 4.179001864170029e-05, - "loss": 1.7619, - "step": 570500 - }, - { - "epoch": 8.22, - "learning_rate": 4.173809189994756e-05, - "loss": 1.7591, - "step": 571000 - }, - { - "epoch": 8.23, - "learning_rate": 4.1686269011678324e-05, - "loss": 1.7582, - "step": 571500 - }, - { - "epoch": 8.23, - "learning_rate": 4.163434226992559e-05, - "loss": 1.7603, - "step": 572000 - }, - { - "epoch": 8.24, - "learning_rate": 4.158251938165636e-05, - "loss": 1.7545, - "step": 572500 - }, - { - "epoch": 8.25, - "learning_rate": 4.153059263990363e-05, - "loss": 1.7649, - "step": 573000 - }, - { - "epoch": 8.26, - "learning_rate": 4.1478665898150895e-05, - "loss": 1.756, - "step": 573500 - }, - { - "epoch": 8.26, - "learning_rate": 4.1426739156398157e-05, - "loss": 1.7582, - "step": 574000 - }, - { - "epoch": 8.27, - "learning_rate": 4.137481241464542e-05, - "loss": 1.7578, - "step": 574500 - }, - { - "epoch": 8.28, - "learning_rate": 4.1322885672892686e-05, - "loss": 1.763, - "step": 575000 - }, - { - "epoch": 8.28, - "learning_rate": 4.1270958931139955e-05, - "loss": 1.7579, - "step": 575500 - }, - { - "epoch": 8.29, - "learning_rate": 4.1219032189387216e-05, - "loss": 1.7617, - "step": 576000 - }, - { - "epoch": 8.3, - "learning_rate": 4.116720930111799e-05, - "loss": 1.7528, - "step": 576500 - }, - { - "epoch": 8.31, - "learning_rate": 4.111528255936525e-05, - "loss": 1.7591, - "step": 577000 - }, - { - "epoch": 8.31, - "learning_rate": 4.106335581761251e-05, - "loss": 1.7559, - "step": 577500 - }, - { - "epoch": 8.32, - "learning_rate": 4.101142907585978e-05, - "loss": 1.7591, - "step": 578000 - }, - { - "epoch": 8.33, - "learning_rate": 4.095960618759055e-05, - "loss": 1.7552, - "step": 578500 - }, - { - "epoch": 8.33, - "learning_rate": 4.0907679445837815e-05, - "loss": 1.7548, - "step": 579000 - }, - { - "epoch": 8.34, - "learning_rate": 4.0855752704085077e-05, - "loss": 1.7547, - "step": 579500 - }, - { - "epoch": 8.35, - "learning_rate": 4.0803825962332345e-05, - "loss": 1.7593, - "step": 580000 - }, - { - "epoch": 8.36, - "learning_rate": 4.075200307406312e-05, - "loss": 1.7594, - "step": 580500 - }, - { - "epoch": 8.36, - "learning_rate": 4.070007633231038e-05, - "loss": 1.7571, - "step": 581000 - }, - { - "epoch": 8.37, - "learning_rate": 4.064814959055764e-05, - "loss": 1.7594, - "step": 581500 - }, - { - "epoch": 8.38, - "learning_rate": 4.059622284880491e-05, - "loss": 1.7547, - "step": 582000 - }, - { - "epoch": 8.38, - "learning_rate": 4.0544399960535675e-05, - "loss": 1.7556, - "step": 582500 - }, - { - "epoch": 8.39, - "learning_rate": 4.0492473218782944e-05, - "loss": 1.7571, - "step": 583000 - }, - { - "epoch": 8.4, - "learning_rate": 4.0440546477030205e-05, - "loss": 1.7569, - "step": 583500 - }, - { - "epoch": 8.41, - "learning_rate": 4.0388619735277474e-05, - "loss": 1.7576, - "step": 584000 - }, - { - "epoch": 8.41, - "learning_rate": 4.033679684700824e-05, - "loss": 1.7582, - "step": 584500 - }, - { - "epoch": 8.42, - "learning_rate": 4.028487010525551e-05, - "loss": 1.7555, - "step": 585000 - }, - { - "epoch": 8.43, - "learning_rate": 4.0232943363502776e-05, - "loss": 1.7571, - "step": 585500 - }, - { - "epoch": 8.43, - "learning_rate": 4.018101662175004e-05, - "loss": 1.7567, - "step": 586000 - }, - { - "epoch": 8.44, - "learning_rate": 4.012919373348081e-05, - "loss": 1.753, - "step": 586500 - }, - { - "epoch": 8.45, - "learning_rate": 4.007726699172807e-05, - "loss": 1.7573, - "step": 587000 - }, - { - "epoch": 8.46, - "learning_rate": 4.0025340249975334e-05, - "loss": 1.7568, - "step": 587500 - }, - { - "epoch": 8.46, - "learning_rate": 3.99734135082226e-05, - "loss": 1.7521, - "step": 588000 - }, - { - "epoch": 8.47, - "learning_rate": 3.992159061995337e-05, - "loss": 1.7587, - "step": 588500 - }, - { - "epoch": 8.48, - "learning_rate": 3.986966387820064e-05, - "loss": 1.7561, - "step": 589000 - }, - { - "epoch": 8.49, - "learning_rate": 3.9817737136447905e-05, - "loss": 1.7552, - "step": 589500 - }, - { - "epoch": 8.49, - "learning_rate": 3.9765810394695167e-05, - "loss": 1.7615, - "step": 590000 - }, - { - "epoch": 8.5, - "learning_rate": 3.971398750642594e-05, - "loss": 1.7554, - "step": 590500 - }, - { - "epoch": 8.51, - "learning_rate": 3.96620607646732e-05, - "loss": 1.7579, - "step": 591000 - }, - { - "epoch": 8.51, - "learning_rate": 3.961013402292046e-05, - "loss": 1.7531, - "step": 591500 - }, - { - "epoch": 8.52, - "learning_rate": 3.955820728116773e-05, - "loss": 1.7594, - "step": 592000 - }, - { - "epoch": 8.53, - "learning_rate": 3.95063843928985e-05, - "loss": 1.7561, - "step": 592500 - }, - { - "epoch": 8.54, - "learning_rate": 3.9454457651145765e-05, - "loss": 1.758, - "step": 593000 - }, - { - "epoch": 8.54, - "learning_rate": 3.9402530909393034e-05, - "loss": 1.7567, - "step": 593500 - }, - { - "epoch": 8.55, - "learning_rate": 3.9350604167640295e-05, - "loss": 1.7568, - "step": 594000 - }, - { - "epoch": 8.56, - "learning_rate": 3.929878127937107e-05, - "loss": 1.7541, - "step": 594500 - }, - { - "epoch": 8.56, - "learning_rate": 3.924685453761833e-05, - "loss": 1.7548, - "step": 595000 - }, - { - "epoch": 8.57, - "learning_rate": 3.919492779586559e-05, - "loss": 1.7566, - "step": 595500 - }, - { - "epoch": 8.58, - "learning_rate": 3.914300105411286e-05, - "loss": 1.7522, - "step": 596000 - }, - { - "epoch": 8.59, - "learning_rate": 3.9091178165843626e-05, - "loss": 1.7527, - "step": 596500 - }, - { - "epoch": 8.59, - "learning_rate": 3.9039251424090894e-05, - "loss": 1.7552, - "step": 597000 - }, - { - "epoch": 8.6, - "learning_rate": 3.898732468233816e-05, - "loss": 1.7583, - "step": 597500 - }, - { - "epoch": 8.61, - "learning_rate": 3.893550179406893e-05, - "loss": 1.7533, - "step": 598000 - }, - { - "epoch": 8.61, - "learning_rate": 3.88835750523162e-05, - "loss": 1.7533, - "step": 598500 - }, - { - "epoch": 8.62, - "learning_rate": 3.883164831056346e-05, - "loss": 1.7521, - "step": 599000 - }, - { - "epoch": 8.63, - "learning_rate": 3.877972156881072e-05, - "loss": 1.7587, - "step": 599500 - }, - { - "epoch": 8.64, - "learning_rate": 3.872779482705799e-05, - "loss": 1.7544, - "step": 600000 - }, - { - "epoch": 8.64, - "learning_rate": 3.8675868085305257e-05, - "loss": 1.7521, - "step": 600500 - }, - { - "epoch": 8.65, - "learning_rate": 3.862394134355252e-05, - "loss": 1.7533, - "step": 601000 - }, - { - "epoch": 8.66, - "learning_rate": 3.857201460179978e-05, - "loss": 1.755, - "step": 601500 - }, - { - "epoch": 8.67, - "learning_rate": 3.852019171353055e-05, - "loss": 1.7522, - "step": 602000 - }, - { - "epoch": 8.67, - "learning_rate": 3.846826497177782e-05, - "loss": 1.7543, - "step": 602500 - }, - { - "epoch": 8.68, - "learning_rate": 3.841633823002508e-05, - "loss": 1.7516, - "step": 603000 - }, - { - "epoch": 8.69, - "learning_rate": 3.8364411488272344e-05, - "loss": 1.7528, - "step": 603500 - }, - { - "epoch": 8.69, - "learning_rate": 3.831258860000312e-05, - "loss": 1.752, - "step": 604000 - }, - { - "epoch": 8.7, - "learning_rate": 3.826066185825038e-05, - "loss": 1.7555, - "step": 604500 - }, - { - "epoch": 8.71, - "learning_rate": 3.820873511649765e-05, - "loss": 1.753, - "step": 605000 - }, - { - "epoch": 8.72, - "learning_rate": 3.8156808374744915e-05, - "loss": 1.7504, - "step": 605500 - }, - { - "epoch": 8.72, - "learning_rate": 3.810498548647568e-05, - "loss": 1.7575, - "step": 606000 - }, - { - "epoch": 8.73, - "learning_rate": 3.8053162598206454e-05, - "loss": 1.7528, - "step": 606500 - }, - { - "epoch": 8.74, - "learning_rate": 3.8001235856453716e-05, - "loss": 1.7548, - "step": 607000 - }, - { - "epoch": 8.74, - "learning_rate": 3.7949309114700984e-05, - "loss": 1.7483, - "step": 607500 - }, - { - "epoch": 8.75, - "learning_rate": 3.7897382372948246e-05, - "loss": 1.7564, - "step": 608000 - }, - { - "epoch": 8.76, - "learning_rate": 3.784545563119551e-05, - "loss": 1.7495, - "step": 608500 - }, - { - "epoch": 8.77, - "learning_rate": 3.7793528889442775e-05, - "loss": 1.7505, - "step": 609000 - }, - { - "epoch": 8.77, - "learning_rate": 3.7741602147690044e-05, - "loss": 1.7513, - "step": 609500 - }, - { - "epoch": 8.78, - "learning_rate": 3.7689675405937305e-05, - "loss": 1.7502, - "step": 610000 - }, - { - "epoch": 8.79, - "learning_rate": 3.763785251766808e-05, - "loss": 1.7521, - "step": 610500 - }, - { - "epoch": 8.79, - "learning_rate": 3.758592577591534e-05, - "loss": 1.7514, - "step": 611000 - }, - { - "epoch": 8.8, - "learning_rate": 3.75339990341626e-05, - "loss": 1.7505, - "step": 611500 - }, - { - "epoch": 8.81, - "learning_rate": 3.748207229240987e-05, - "loss": 1.7482, - "step": 612000 - }, - { - "epoch": 8.82, - "learning_rate": 3.743035325762415e-05, - "loss": 1.7525, - "step": 612500 - }, - { - "epoch": 8.82, - "learning_rate": 3.737842651587141e-05, - "loss": 1.7561, - "step": 613000 - }, - { - "epoch": 8.83, - "learning_rate": 3.732649977411867e-05, - "loss": 1.7582, - "step": 613500 - }, - { - "epoch": 8.84, - "learning_rate": 3.727457303236594e-05, - "loss": 1.7514, - "step": 614000 - }, - { - "epoch": 8.85, - "learning_rate": 3.722264629061321e-05, - "loss": 1.7517, - "step": 614500 - }, - { - "epoch": 8.85, - "learning_rate": 3.7170719548860475e-05, - "loss": 1.7498, - "step": 615000 - }, - { - "epoch": 8.86, - "learning_rate": 3.711879280710773e-05, - "loss": 1.7558, - "step": 615500 - }, - { - "epoch": 8.87, - "learning_rate": 3.7066866065355e-05, - "loss": 1.7502, - "step": 616000 - }, - { - "epoch": 8.87, - "learning_rate": 3.7015043177085765e-05, - "loss": 1.753, - "step": 616500 - }, - { - "epoch": 8.88, - "learning_rate": 3.696311643533303e-05, - "loss": 1.7497, - "step": 617000 - }, - { - "epoch": 8.89, - "learning_rate": 3.69111896935803e-05, - "loss": 1.7474, - "step": 617500 - }, - { - "epoch": 8.9, - "learning_rate": 3.685936680531107e-05, - "loss": 1.7486, - "step": 618000 - }, - { - "epoch": 8.9, - "learning_rate": 3.6807440063558336e-05, - "loss": 1.7488, - "step": 618500 - }, - { - "epoch": 8.91, - "learning_rate": 3.67555133218056e-05, - "loss": 1.7491, - "step": 619000 - }, - { - "epoch": 8.92, - "learning_rate": 3.6703586580052865e-05, - "loss": 1.7489, - "step": 619500 - }, - { - "epoch": 8.92, - "learning_rate": 3.665165983830013e-05, - "loss": 1.7524, - "step": 620000 - }, - { - "epoch": 8.93, - "learning_rate": 3.6599733096547395e-05, - "loss": 1.7538, - "step": 620500 - }, - { - "epoch": 8.94, - "learning_rate": 3.654780635479466e-05, - "loss": 1.7516, - "step": 621000 - }, - { - "epoch": 8.95, - "learning_rate": 3.6495879613041925e-05, - "loss": 1.749, - "step": 621500 - }, - { - "epoch": 8.95, - "learning_rate": 3.644405672477269e-05, - "loss": 1.7493, - "step": 622000 - }, - { - "epoch": 8.96, - "learning_rate": 3.639212998301996e-05, - "loss": 1.7492, - "step": 622500 - }, - { - "epoch": 8.97, - "learning_rate": 3.634020324126722e-05, - "loss": 1.7473, - "step": 623000 - }, - { - "epoch": 8.97, - "learning_rate": 3.628827649951448e-05, - "loss": 1.7485, - "step": 623500 - }, - { - "epoch": 8.98, - "learning_rate": 3.6236453611245256e-05, - "loss": 1.7497, - "step": 624000 - }, - { - "epoch": 8.99, - "learning_rate": 3.6184526869492524e-05, - "loss": 1.7524, - "step": 624500 - }, - { - "epoch": 9.0, - "learning_rate": 3.613270398122329e-05, - "loss": 1.7524, - "step": 625000 - }, - { - "epoch": 9.0, - "eval_accuracy": 0.6610600568746429, - "eval_loss": 1.6158946752548218, - "eval_runtime": 1285.3183, - "eval_samples_per_second": 419.302, - "eval_steps_per_second": 26.207, - "step": 625257 - }, - { - "epoch": 9.0, - "learning_rate": 3.608077723947056e-05, - "loss": 1.7455, - "step": 625500 - }, - { - "epoch": 9.01, - "learning_rate": 3.602885049771782e-05, - "loss": 1.7504, - "step": 626000 - }, - { - "epoch": 9.02, - "learning_rate": 3.597692375596509e-05, - "loss": 1.7482, - "step": 626500 - }, - { - "epoch": 9.03, - "learning_rate": 3.592499701421235e-05, - "loss": 1.748, - "step": 627000 - }, - { - "epoch": 9.03, - "learning_rate": 3.587307027245961e-05, - "loss": 1.7454, - "step": 627500 - }, - { - "epoch": 9.04, - "learning_rate": 3.582114353070688e-05, - "loss": 1.7465, - "step": 628000 - }, - { - "epoch": 9.05, - "learning_rate": 3.576921678895415e-05, - "loss": 1.7527, - "step": 628500 - }, - { - "epoch": 9.05, - "learning_rate": 3.5717393900684914e-05, - "loss": 1.7445, - "step": 629000 - }, - { - "epoch": 9.06, - "learning_rate": 3.566546715893218e-05, - "loss": 1.7462, - "step": 629500 - }, - { - "epoch": 9.07, - "learning_rate": 3.561354041717945e-05, - "loss": 1.745, - "step": 630000 - }, - { - "epoch": 9.08, - "learning_rate": 3.5561613675426705e-05, - "loss": 1.7473, - "step": 630500 - }, - { - "epoch": 9.08, - "learning_rate": 3.5509686933673974e-05, - "loss": 1.7444, - "step": 631000 - }, - { - "epoch": 9.09, - "learning_rate": 3.545786404540474e-05, - "loss": 1.7444, - "step": 631500 - }, - { - "epoch": 9.1, - "learning_rate": 3.540593730365201e-05, - "loss": 1.7499, - "step": 632000 - }, - { - "epoch": 9.1, - "learning_rate": 3.5354010561899277e-05, - "loss": 1.7466, - "step": 632500 - }, - { - "epoch": 9.11, - "learning_rate": 3.530208382014654e-05, - "loss": 1.7498, - "step": 633000 - }, - { - "epoch": 9.12, - "learning_rate": 3.52501570783938e-05, - "loss": 1.7483, - "step": 633500 - }, - { - "epoch": 9.13, - "learning_rate": 3.519833419012458e-05, - "loss": 1.7468, - "step": 634000 - }, - { - "epoch": 9.13, - "learning_rate": 3.5146407448371834e-05, - "loss": 1.7432, - "step": 634500 - }, - { - "epoch": 9.14, - "learning_rate": 3.50944807066191e-05, - "loss": 1.7428, - "step": 635000 - }, - { - "epoch": 9.15, - "learning_rate": 3.504255396486637e-05, - "loss": 1.749, - "step": 635500 - }, - { - "epoch": 9.15, - "learning_rate": 3.499062722311363e-05, - "loss": 1.7494, - "step": 636000 - }, - { - "epoch": 9.16, - "learning_rate": 3.49387004813609e-05, - "loss": 1.7472, - "step": 636500 - }, - { - "epoch": 9.17, - "learning_rate": 3.488677373960816e-05, - "loss": 1.7467, - "step": 637000 - }, - { - "epoch": 9.18, - "learning_rate": 3.4834846997855424e-05, - "loss": 1.7455, - "step": 637500 - }, - { - "epoch": 9.18, - "learning_rate": 3.4783024109586197e-05, - "loss": 1.7445, - "step": 638000 - }, - { - "epoch": 9.19, - "learning_rate": 3.473109736783346e-05, - "loss": 1.7454, - "step": 638500 - }, - { - "epoch": 9.2, - "learning_rate": 3.4679170626080726e-05, - "loss": 1.7441, - "step": 639000 - }, - { - "epoch": 9.21, - "learning_rate": 3.46273477378115e-05, - "loss": 1.7443, - "step": 639500 - }, - { - "epoch": 9.21, - "learning_rate": 3.457542099605876e-05, - "loss": 1.7463, - "step": 640000 - }, - { - "epoch": 9.22, - "learning_rate": 3.452349425430603e-05, - "loss": 1.7419, - "step": 640500 - }, - { - "epoch": 9.23, - "learning_rate": 3.447156751255329e-05, - "loss": 1.7453, - "step": 641000 - }, - { - "epoch": 9.23, - "learning_rate": 3.441964077080055e-05, - "loss": 1.7438, - "step": 641500 - }, - { - "epoch": 9.24, - "learning_rate": 3.436771402904782e-05, - "loss": 1.7424, - "step": 642000 - }, - { - "epoch": 9.25, - "learning_rate": 3.431578728729509e-05, - "loss": 1.745, - "step": 642500 - }, - { - "epoch": 9.26, - "learning_rate": 3.426386054554235e-05, - "loss": 1.7461, - "step": 643000 - }, - { - "epoch": 9.26, - "learning_rate": 3.421203765727312e-05, - "loss": 1.7383, - "step": 643500 - }, - { - "epoch": 9.27, - "learning_rate": 3.4160110915520385e-05, - "loss": 1.7443, - "step": 644000 - }, - { - "epoch": 9.28, - "learning_rate": 3.4108184173767646e-05, - "loss": 1.7418, - "step": 644500 - }, - { - "epoch": 9.28, - "learning_rate": 3.4056257432014915e-05, - "loss": 1.7446, - "step": 645000 - }, - { - "epoch": 9.29, - "learning_rate": 3.400443454374568e-05, - "loss": 1.743, - "step": 645500 - }, - { - "epoch": 9.3, - "learning_rate": 3.395250780199295e-05, - "loss": 1.7434, - "step": 646000 - }, - { - "epoch": 9.31, - "learning_rate": 3.390058106024022e-05, - "loss": 1.7457, - "step": 646500 - }, - { - "epoch": 9.31, - "learning_rate": 3.384865431848748e-05, - "loss": 1.7425, - "step": 647000 - }, - { - "epoch": 9.32, - "learning_rate": 3.379683143021825e-05, - "loss": 1.7444, - "step": 647500 - }, - { - "epoch": 9.33, - "learning_rate": 3.3744904688465514e-05, - "loss": 1.747, - "step": 648000 - }, - { - "epoch": 9.33, - "learning_rate": 3.3692977946712775e-05, - "loss": 1.7448, - "step": 648500 - }, - { - "epoch": 9.34, - "learning_rate": 3.3641155058443555e-05, - "loss": 1.744, - "step": 649000 - }, - { - "epoch": 9.35, - "learning_rate": 3.358922831669081e-05, - "loss": 1.7399, - "step": 649500 - }, - { - "epoch": 9.36, - "learning_rate": 3.353730157493808e-05, - "loss": 1.7487, - "step": 650000 - }, - { - "epoch": 9.36, - "learning_rate": 3.3485374833185346e-05, - "loss": 1.7477, - "step": 650500 - }, - { - "epoch": 9.37, - "learning_rate": 3.343344809143261e-05, - "loss": 1.7445, - "step": 651000 - }, - { - "epoch": 9.38, - "learning_rate": 3.3381521349679876e-05, - "loss": 1.7417, - "step": 651500 - }, - { - "epoch": 9.38, - "learning_rate": 3.332969846141064e-05, - "loss": 1.7456, - "step": 652000 - }, - { - "epoch": 9.39, - "learning_rate": 3.327777171965791e-05, - "loss": 1.7447, - "step": 652500 - }, - { - "epoch": 9.4, - "learning_rate": 3.322584497790517e-05, - "loss": 1.7372, - "step": 653000 - }, - { - "epoch": 9.41, - "learning_rate": 3.317391823615244e-05, - "loss": 1.7448, - "step": 653500 - }, - { - "epoch": 9.41, - "learning_rate": 3.31219914943997e-05, - "loss": 1.7469, - "step": 654000 - }, - { - "epoch": 9.42, - "learning_rate": 3.307006475264697e-05, - "loss": 1.7433, - "step": 654500 - }, - { - "epoch": 9.43, - "learning_rate": 3.3018241864377736e-05, - "loss": 1.7427, - "step": 655000 - }, - { - "epoch": 9.44, - "learning_rate": 3.2966315122625005e-05, - "loss": 1.7441, - "step": 655500 - }, - { - "epoch": 9.44, - "learning_rate": 3.2914388380872266e-05, - "loss": 1.7456, - "step": 656000 - }, - { - "epoch": 9.45, - "learning_rate": 3.286246163911953e-05, - "loss": 1.7416, - "step": 656500 - }, - { - "epoch": 9.46, - "learning_rate": 3.2810534897366796e-05, - "loss": 1.7468, - "step": 657000 - }, - { - "epoch": 9.46, - "learning_rate": 3.2758608155614064e-05, - "loss": 1.7435, - "step": 657500 - }, - { - "epoch": 9.47, - "learning_rate": 3.2706681413861326e-05, - "loss": 1.744, - "step": 658000 - }, - { - "epoch": 9.48, - "learning_rate": 3.265475467210859e-05, - "loss": 1.7436, - "step": 658500 - }, - { - "epoch": 9.49, - "learning_rate": 3.260293178383936e-05, - "loss": 1.7418, - "step": 659000 - }, - { - "epoch": 9.49, - "learning_rate": 3.255100504208662e-05, - "loss": 1.7398, - "step": 659500 - }, - { - "epoch": 9.5, - "learning_rate": 3.249907830033389e-05, - "loss": 1.7423, - "step": 660000 - }, - { - "epoch": 9.51, - "learning_rate": 3.244715155858116e-05, - "loss": 1.7419, - "step": 660500 - }, - { - "epoch": 9.51, - "learning_rate": 3.2395328670311925e-05, - "loss": 1.745, - "step": 661000 - }, - { - "epoch": 9.52, - "learning_rate": 3.234340192855919e-05, - "loss": 1.7421, - "step": 661500 - }, - { - "epoch": 9.53, - "learning_rate": 3.2291475186806454e-05, - "loss": 1.7403, - "step": 662000 - }, - { - "epoch": 9.54, - "learning_rate": 3.2239548445053716e-05, - "loss": 1.7404, - "step": 662500 - }, - { - "epoch": 9.54, - "learning_rate": 3.2187725556784496e-05, - "loss": 1.7439, - "step": 663000 - }, - { - "epoch": 9.55, - "learning_rate": 3.213579881503175e-05, - "loss": 1.7412, - "step": 663500 - }, - { - "epoch": 9.56, - "learning_rate": 3.208387207327902e-05, - "loss": 1.7418, - "step": 664000 - }, - { - "epoch": 9.56, - "learning_rate": 3.203194533152629e-05, - "loss": 1.7413, - "step": 664500 - }, - { - "epoch": 9.57, - "learning_rate": 3.198012244325705e-05, - "loss": 1.7445, - "step": 665000 - }, - { - "epoch": 9.58, - "learning_rate": 3.192819570150432e-05, - "loss": 1.7415, - "step": 665500 - }, - { - "epoch": 9.59, - "learning_rate": 3.187626895975158e-05, - "loss": 1.7374, - "step": 666000 - }, - { - "epoch": 9.59, - "learning_rate": 3.182434221799885e-05, - "loss": 1.7452, - "step": 666500 - }, - { - "epoch": 9.6, - "learning_rate": 3.177251932972962e-05, - "loss": 1.7432, - "step": 667000 - }, - { - "epoch": 9.61, - "learning_rate": 3.1720592587976886e-05, - "loss": 1.7409, - "step": 667500 - }, - { - "epoch": 9.62, - "learning_rate": 3.166866584622415e-05, - "loss": 1.7428, - "step": 668000 - }, - { - "epoch": 9.62, - "learning_rate": 3.1616739104471416e-05, - "loss": 1.7441, - "step": 668500 - }, - { - "epoch": 9.63, - "learning_rate": 3.156491621620218e-05, - "loss": 1.7421, - "step": 669000 - }, - { - "epoch": 9.64, - "learning_rate": 3.151298947444945e-05, - "loss": 1.738, - "step": 669500 - }, - { - "epoch": 9.64, - "learning_rate": 3.1461166586180217e-05, - "loss": 1.7426, - "step": 670000 - }, - { - "epoch": 9.65, - "learning_rate": 3.1409239844427485e-05, - "loss": 1.7417, - "step": 670500 - }, - { - "epoch": 9.66, - "learning_rate": 3.1357313102674746e-05, - "loss": 1.7429, - "step": 671000 - }, - { - "epoch": 9.67, - "learning_rate": 3.1305386360922015e-05, - "loss": 1.7447, - "step": 671500 - }, - { - "epoch": 9.67, - "learning_rate": 3.1253459619169276e-05, - "loss": 1.7419, - "step": 672000 - }, - { - "epoch": 9.68, - "learning_rate": 3.1201532877416544e-05, - "loss": 1.7406, - "step": 672500 - }, - { - "epoch": 9.69, - "learning_rate": 3.1149606135663806e-05, - "loss": 1.7409, - "step": 673000 - }, - { - "epoch": 9.69, - "learning_rate": 3.1097679393911074e-05, - "loss": 1.7345, - "step": 673500 - }, - { - "epoch": 9.7, - "learning_rate": 3.104585650564184e-05, - "loss": 1.7444, - "step": 674000 - }, - { - "epoch": 9.71, - "learning_rate": 3.099392976388911e-05, - "loss": 1.7404, - "step": 674500 - }, - { - "epoch": 9.72, - "learning_rate": 3.094200302213637e-05, - "loss": 1.7388, - "step": 675000 - }, - { - "epoch": 9.72, - "learning_rate": 3.089007628038363e-05, - "loss": 1.7414, - "step": 675500 - }, - { - "epoch": 9.73, - "learning_rate": 3.08381495386309e-05, - "loss": 1.7428, - "step": 676000 - }, - { - "epoch": 9.74, - "learning_rate": 3.078622279687817e-05, - "loss": 1.7414, - "step": 676500 - }, - { - "epoch": 9.74, - "learning_rate": 3.073429605512543e-05, - "loss": 1.7389, - "step": 677000 - }, - { - "epoch": 9.75, - "learning_rate": 3.068236931337269e-05, - "loss": 1.7396, - "step": 677500 - }, - { - "epoch": 9.76, - "learning_rate": 3.063054642510347e-05, - "loss": 1.7383, - "step": 678000 - }, - { - "epoch": 9.77, - "learning_rate": 3.057872353683424e-05, - "loss": 1.7364, - "step": 678500 - }, - { - "epoch": 9.77, - "learning_rate": 3.0526796795081506e-05, - "loss": 1.7393, - "step": 679000 - }, - { - "epoch": 9.78, - "learning_rate": 3.0474870053328764e-05, - "loss": 1.7383, - "step": 679500 - }, - { - "epoch": 9.79, - "learning_rate": 3.042294331157603e-05, - "loss": 1.7368, - "step": 680000 - }, - { - "epoch": 9.8, - "learning_rate": 3.0371016569823297e-05, - "loss": 1.7395, - "step": 680500 - }, - { - "epoch": 9.8, - "learning_rate": 3.031908982807056e-05, - "loss": 1.7427, - "step": 681000 - }, - { - "epoch": 9.81, - "learning_rate": 3.026726693980133e-05, - "loss": 1.7401, - "step": 681500 - }, - { - "epoch": 9.82, - "learning_rate": 3.0215340198048593e-05, - "loss": 1.7372, - "step": 682000 - }, - { - "epoch": 9.82, - "learning_rate": 3.0163413456295858e-05, - "loss": 1.7387, - "step": 682500 - }, - { - "epoch": 9.83, - "learning_rate": 3.0111486714543123e-05, - "loss": 1.741, - "step": 683000 - }, - { - "epoch": 9.84, - "learning_rate": 3.005955997279039e-05, - "loss": 1.7371, - "step": 683500 - }, - { - "epoch": 9.85, - "learning_rate": 3.000773708452116e-05, - "loss": 1.739, - "step": 684000 - }, - { - "epoch": 9.85, - "learning_rate": 2.9955810342768426e-05, - "loss": 1.737, - "step": 684500 - }, - { - "epoch": 9.86, - "learning_rate": 2.9903883601015687e-05, - "loss": 1.7383, - "step": 685000 - }, - { - "epoch": 9.87, - "learning_rate": 2.9851956859262952e-05, - "loss": 1.7391, - "step": 685500 - }, - { - "epoch": 9.87, - "learning_rate": 2.980003011751022e-05, - "loss": 1.7381, - "step": 686000 - }, - { - "epoch": 9.88, - "learning_rate": 2.974810337575748e-05, - "loss": 1.7364, - "step": 686500 - }, - { - "epoch": 9.89, - "learning_rate": 2.9696176634004747e-05, - "loss": 1.7382, - "step": 687000 - }, - { - "epoch": 9.9, - "learning_rate": 2.9644249892252012e-05, - "loss": 1.7347, - "step": 687500 - }, - { - "epoch": 9.9, - "learning_rate": 2.959242700398278e-05, - "loss": 1.7368, - "step": 688000 - }, - { - "epoch": 9.91, - "learning_rate": 2.9540500262230046e-05, - "loss": 1.7391, - "step": 688500 - }, - { - "epoch": 9.92, - "learning_rate": 2.9488677373960816e-05, - "loss": 1.7401, - "step": 689000 - }, - { - "epoch": 9.92, - "learning_rate": 2.943675063220808e-05, - "loss": 1.7389, - "step": 689500 - }, - { - "epoch": 9.93, - "learning_rate": 2.938482389045535e-05, - "loss": 1.7362, - "step": 690000 - }, - { - "epoch": 9.94, - "learning_rate": 2.933289714870261e-05, - "loss": 1.7357, - "step": 690500 - }, - { - "epoch": 9.95, - "learning_rate": 2.9280970406949876e-05, - "loss": 1.7325, - "step": 691000 - }, - { - "epoch": 9.95, - "learning_rate": 2.9229043665197144e-05, - "loss": 1.7402, - "step": 691500 - }, - { - "epoch": 9.96, - "learning_rate": 2.917722077692791e-05, - "loss": 1.7362, - "step": 692000 - }, - { - "epoch": 9.97, - "learning_rate": 2.912529403517518e-05, - "loss": 1.7369, - "step": 692500 - }, - { - "epoch": 9.98, - "learning_rate": 2.9073367293422443e-05, - "loss": 1.7375, - "step": 693000 - }, - { - "epoch": 9.98, - "learning_rate": 2.9021440551669705e-05, - "loss": 1.7335, - "step": 693500 - }, - { - "epoch": 9.99, - "learning_rate": 2.896951380991697e-05, - "loss": 1.7384, - "step": 694000 - }, - { - "epoch": 10.0, - "learning_rate": 2.8917587068164238e-05, - "loss": 1.7398, - "step": 694500 - }, - { - "epoch": 10.0, - "eval_accuracy": 0.6627300200297518, - "eval_loss": 1.6019505262374878, - "eval_runtime": 1291.9437, - "eval_samples_per_second": 417.152, - "eval_steps_per_second": 26.072, - "step": 694730 - }, - { - "epoch": 10.0, - "learning_rate": 2.8865660326411496e-05, - "loss": 1.7321, - "step": 695000 - }, - { - "epoch": 10.01, - "learning_rate": 2.8813733584658764e-05, - "loss": 1.7308, - "step": 695500 - }, - { - "epoch": 10.02, - "learning_rate": 2.8761910696389534e-05, - "loss": 1.7356, - "step": 696000 - }, - { - "epoch": 10.03, - "learning_rate": 2.8710087808120307e-05, - "loss": 1.735, - "step": 696500 - }, - { - "epoch": 10.03, - "learning_rate": 2.8658161066367572e-05, - "loss": 1.7381, - "step": 697000 - }, - { - "epoch": 10.04, - "learning_rate": 2.8606234324614833e-05, - "loss": 1.7334, - "step": 697500 - }, - { - "epoch": 10.05, - "learning_rate": 2.85543075828621e-05, - "loss": 1.7388, - "step": 698000 - }, - { - "epoch": 10.05, - "learning_rate": 2.8502380841109367e-05, - "loss": 1.7346, - "step": 698500 - }, - { - "epoch": 10.06, - "learning_rate": 2.8450454099356628e-05, - "loss": 1.7342, - "step": 699000 - }, - { - "epoch": 10.07, - "learning_rate": 2.8398527357603893e-05, - "loss": 1.7311, - "step": 699500 - }, - { - "epoch": 10.08, - "learning_rate": 2.834660061585116e-05, - "loss": 1.7361, - "step": 700000 - }, - { - "epoch": 10.08, - "learning_rate": 2.8294777727581928e-05, - "loss": 1.7332, - "step": 700500 - }, - { - "epoch": 10.09, - "learning_rate": 2.8242850985829196e-05, - "loss": 1.7365, - "step": 701000 - }, - { - "epoch": 10.1, - "learning_rate": 2.819092424407646e-05, - "loss": 1.7311, - "step": 701500 - }, - { - "epoch": 10.1, - "learning_rate": 2.8138997502323722e-05, - "loss": 1.7365, - "step": 702000 - }, - { - "epoch": 10.11, - "learning_rate": 2.8087174614054495e-05, - "loss": 1.7301, - "step": 702500 - }, - { - "epoch": 10.12, - "learning_rate": 2.8035247872301757e-05, - "loss": 1.7319, - "step": 703000 - }, - { - "epoch": 10.13, - "learning_rate": 2.7983321130549022e-05, - "loss": 1.7305, - "step": 703500 - }, - { - "epoch": 10.13, - "learning_rate": 2.793139438879629e-05, - "loss": 1.7322, - "step": 704000 - }, - { - "epoch": 10.14, - "learning_rate": 2.7879571500527056e-05, - "loss": 1.7325, - "step": 704500 - }, - { - "epoch": 10.15, - "learning_rate": 2.7827644758774325e-05, - "loss": 1.7343, - "step": 705000 - }, - { - "epoch": 10.16, - "learning_rate": 2.777582187050509e-05, - "loss": 1.7342, - "step": 705500 - }, - { - "epoch": 10.16, - "learning_rate": 2.772389512875236e-05, - "loss": 1.7341, - "step": 706000 - }, - { - "epoch": 10.17, - "learning_rate": 2.7671968386999624e-05, - "loss": 1.7324, - "step": 706500 - }, - { - "epoch": 10.18, - "learning_rate": 2.7620041645246886e-05, - "loss": 1.7331, - "step": 707000 - }, - { - "epoch": 10.18, - "learning_rate": 2.756821875697766e-05, - "loss": 1.7344, - "step": 707500 - }, - { - "epoch": 10.19, - "learning_rate": 2.751629201522492e-05, - "loss": 1.7285, - "step": 708000 - }, - { - "epoch": 10.2, - "learning_rate": 2.746436527347219e-05, - "loss": 1.7308, - "step": 708500 - }, - { - "epoch": 10.21, - "learning_rate": 2.7412438531719453e-05, - "loss": 1.7294, - "step": 709000 - }, - { - "epoch": 10.21, - "learning_rate": 2.7360511789966715e-05, - "loss": 1.7349, - "step": 709500 - }, - { - "epoch": 10.22, - "learning_rate": 2.730858504821398e-05, - "loss": 1.7308, - "step": 710000 - }, - { - "epoch": 10.23, - "learning_rate": 2.7256658306461248e-05, - "loss": 1.7317, - "step": 710500 - }, - { - "epoch": 10.23, - "learning_rate": 2.7204731564708513e-05, - "loss": 1.7291, - "step": 711000 - }, - { - "epoch": 10.24, - "learning_rate": 2.7152908676439283e-05, - "loss": 1.7321, - "step": 711500 - }, - { - "epoch": 10.25, - "learning_rate": 2.7100981934686547e-05, - "loss": 1.73, - "step": 712000 - }, - { - "epoch": 10.26, - "learning_rate": 2.704905519293381e-05, - "loss": 1.7363, - "step": 712500 - }, - { - "epoch": 10.26, - "learning_rate": 2.6997128451181074e-05, - "loss": 1.7345, - "step": 713000 - }, - { - "epoch": 10.27, - "learning_rate": 2.6945201709428342e-05, - "loss": 1.7328, - "step": 713500 - }, - { - "epoch": 10.28, - "learning_rate": 2.6893274967675604e-05, - "loss": 1.7307, - "step": 714000 - }, - { - "epoch": 10.28, - "learning_rate": 2.684134822592287e-05, - "loss": 1.7372, - "step": 714500 - }, - { - "epoch": 10.29, - "learning_rate": 2.6789421484170137e-05, - "loss": 1.7352, - "step": 715000 - }, - { - "epoch": 10.3, - "learning_rate": 2.6737598595900903e-05, - "loss": 1.7272, - "step": 715500 - }, - { - "epoch": 10.31, - "learning_rate": 2.668567185414817e-05, - "loss": 1.7286, - "step": 716000 - }, - { - "epoch": 10.31, - "learning_rate": 2.6633745112395436e-05, - "loss": 1.7269, - "step": 716500 - }, - { - "epoch": 10.32, - "learning_rate": 2.6581818370642698e-05, - "loss": 1.7301, - "step": 717000 - }, - { - "epoch": 10.33, - "learning_rate": 2.652999548237347e-05, - "loss": 1.7297, - "step": 717500 - }, - { - "epoch": 10.33, - "learning_rate": 2.647817259410424e-05, - "loss": 1.7337, - "step": 718000 - }, - { - "epoch": 10.34, - "learning_rate": 2.6426245852351505e-05, - "loss": 1.7335, - "step": 718500 - }, - { - "epoch": 10.35, - "learning_rate": 2.6374319110598767e-05, - "loss": 1.7327, - "step": 719000 - }, - { - "epoch": 10.36, - "learning_rate": 2.6322392368846032e-05, - "loss": 1.7291, - "step": 719500 - }, - { - "epoch": 10.36, - "learning_rate": 2.62704656270933e-05, - "loss": 1.7325, - "step": 720000 - }, - { - "epoch": 10.37, - "learning_rate": 2.621853888534056e-05, - "loss": 1.7332, - "step": 720500 - }, - { - "epoch": 10.38, - "learning_rate": 2.6166612143587826e-05, - "loss": 1.7302, - "step": 721000 - }, - { - "epoch": 10.39, - "learning_rate": 2.61147892553186e-05, - "loss": 1.731, - "step": 721500 - }, - { - "epoch": 10.39, - "learning_rate": 2.606286251356586e-05, - "loss": 1.734, - "step": 722000 - }, - { - "epoch": 10.4, - "learning_rate": 2.6010935771813126e-05, - "loss": 1.7329, - "step": 722500 - }, - { - "epoch": 10.41, - "learning_rate": 2.5959009030060394e-05, - "loss": 1.7322, - "step": 723000 - }, - { - "epoch": 10.41, - "learning_rate": 2.590718614179116e-05, - "loss": 1.732, - "step": 723500 - }, - { - "epoch": 10.42, - "learning_rate": 2.585525940003843e-05, - "loss": 1.7299, - "step": 724000 - }, - { - "epoch": 10.43, - "learning_rate": 2.580333265828569e-05, - "loss": 1.7322, - "step": 724500 - }, - { - "epoch": 10.44, - "learning_rate": 2.5751405916532955e-05, - "loss": 1.7266, - "step": 725000 - }, - { - "epoch": 10.44, - "learning_rate": 2.5699479174780223e-05, - "loss": 1.7318, - "step": 725500 - }, - { - "epoch": 10.45, - "learning_rate": 2.564755243302749e-05, - "loss": 1.7294, - "step": 726000 - }, - { - "epoch": 10.46, - "learning_rate": 2.5595729544758258e-05, - "loss": 1.7306, - "step": 726500 - }, - { - "epoch": 10.46, - "learning_rate": 2.5543802803005523e-05, - "loss": 1.7302, - "step": 727000 - }, - { - "epoch": 10.47, - "learning_rate": 2.5491876061252784e-05, - "loss": 1.7315, - "step": 727500 - }, - { - "epoch": 10.48, - "learning_rate": 2.543994931950005e-05, - "loss": 1.7284, - "step": 728000 - }, - { - "epoch": 10.49, - "learning_rate": 2.5388022577747318e-05, - "loss": 1.7283, - "step": 728500 - }, - { - "epoch": 10.49, - "learning_rate": 2.533609583599458e-05, - "loss": 1.7341, - "step": 729000 - }, - { - "epoch": 10.5, - "learning_rate": 2.5284272947725352e-05, - "loss": 1.733, - "step": 729500 - }, - { - "epoch": 10.51, - "learning_rate": 2.5232346205972614e-05, - "loss": 1.7285, - "step": 730000 - }, - { - "epoch": 10.51, - "learning_rate": 2.518041946421988e-05, - "loss": 1.7285, - "step": 730500 - }, - { - "epoch": 10.52, - "learning_rate": 2.5128492722467143e-05, - "loss": 1.7273, - "step": 731000 - }, - { - "epoch": 10.53, - "learning_rate": 2.5076565980714412e-05, - "loss": 1.7291, - "step": 731500 - }, - { - "epoch": 10.54, - "learning_rate": 2.5024639238961673e-05, - "loss": 1.7309, - "step": 732000 - }, - { - "epoch": 10.54, - "learning_rate": 2.4972712497208938e-05, - "loss": 1.7282, - "step": 732500 - }, - { - "epoch": 10.55, - "learning_rate": 2.4920785755456203e-05, - "loss": 1.7303, - "step": 733000 - }, - { - "epoch": 10.56, - "learning_rate": 2.4868962867186973e-05, - "loss": 1.729, - "step": 733500 - }, - { - "epoch": 10.57, - "learning_rate": 2.481703612543424e-05, - "loss": 1.7329, - "step": 734000 - }, - { - "epoch": 10.57, - "learning_rate": 2.4765109383681502e-05, - "loss": 1.7288, - "step": 734500 - }, - { - "epoch": 10.58, - "learning_rate": 2.4713182641928767e-05, - "loss": 1.7302, - "step": 735000 - }, - { - "epoch": 10.59, - "learning_rate": 2.4661255900176032e-05, - "loss": 1.7321, - "step": 735500 - }, - { - "epoch": 10.59, - "learning_rate": 2.4609433011906802e-05, - "loss": 1.7292, - "step": 736000 - }, - { - "epoch": 10.6, - "learning_rate": 2.4557506270154067e-05, - "loss": 1.7265, - "step": 736500 - }, - { - "epoch": 10.61, - "learning_rate": 2.4505579528401332e-05, - "loss": 1.7326, - "step": 737000 - }, - { - "epoch": 10.62, - "learning_rate": 2.4453652786648597e-05, - "loss": 1.7307, - "step": 737500 - }, - { - "epoch": 10.62, - "learning_rate": 2.440172604489586e-05, - "loss": 1.7275, - "step": 738000 - }, - { - "epoch": 10.63, - "learning_rate": 2.434979930314313e-05, - "loss": 1.7292, - "step": 738500 - }, - { - "epoch": 10.64, - "learning_rate": 2.4297976414873896e-05, - "loss": 1.7261, - "step": 739000 - }, - { - "epoch": 10.64, - "learning_rate": 2.4246049673121164e-05, - "loss": 1.7256, - "step": 739500 - }, - { - "epoch": 10.65, - "learning_rate": 2.4194122931368426e-05, - "loss": 1.7286, - "step": 740000 - }, - { - "epoch": 10.66, - "learning_rate": 2.414219618961569e-05, - "loss": 1.7312, - "step": 740500 - }, - { - "epoch": 10.67, - "learning_rate": 2.4090269447862956e-05, - "loss": 1.7292, - "step": 741000 - }, - { - "epoch": 10.67, - "learning_rate": 2.403844655959373e-05, - "loss": 1.725, - "step": 741500 - }, - { - "epoch": 10.68, - "learning_rate": 2.398651981784099e-05, - "loss": 1.7271, - "step": 742000 - }, - { - "epoch": 10.69, - "learning_rate": 2.3934593076088255e-05, - "loss": 1.7308, - "step": 742500 - }, - { - "epoch": 10.69, - "learning_rate": 2.388266633433552e-05, - "loss": 1.7253, - "step": 743000 - }, - { - "epoch": 10.7, - "learning_rate": 2.3830843446066293e-05, - "loss": 1.7264, - "step": 743500 - }, - { - "epoch": 10.71, - "learning_rate": 2.3778916704313555e-05, - "loss": 1.7239, - "step": 744000 - }, - { - "epoch": 10.72, - "learning_rate": 2.372698996256082e-05, - "loss": 1.7291, - "step": 744500 - }, - { - "epoch": 10.72, - "learning_rate": 2.3675063220808084e-05, - "loss": 1.7281, - "step": 745000 - }, - { - "epoch": 10.73, - "learning_rate": 2.362313647905535e-05, - "loss": 1.7274, - "step": 745500 - }, - { - "epoch": 10.74, - "learning_rate": 2.357131359078612e-05, - "loss": 1.7242, - "step": 746000 - }, - { - "epoch": 10.75, - "learning_rate": 2.3519386849033384e-05, - "loss": 1.7232, - "step": 746500 - }, - { - "epoch": 10.75, - "learning_rate": 2.3467460107280652e-05, - "loss": 1.7272, - "step": 747000 - }, - { - "epoch": 10.76, - "learning_rate": 2.3415533365527914e-05, - "loss": 1.7296, - "step": 747500 - }, - { - "epoch": 10.77, - "learning_rate": 2.336360662377518e-05, - "loss": 1.7294, - "step": 748000 - }, - { - "epoch": 10.77, - "learning_rate": 2.3311679882022443e-05, - "loss": 1.7216, - "step": 748500 - }, - { - "epoch": 10.78, - "learning_rate": 2.325975314026971e-05, - "loss": 1.723, - "step": 749000 - }, - { - "epoch": 10.79, - "learning_rate": 2.3207930252000478e-05, - "loss": 1.7262, - "step": 749500 - }, - { - "epoch": 10.8, - "learning_rate": 2.3156003510247743e-05, - "loss": 1.7261, - "step": 750000 - }, - { - "epoch": 10.8, - "learning_rate": 2.3104076768495008e-05, - "loss": 1.7268, - "step": 750500 - }, - { - "epoch": 10.81, - "learning_rate": 2.3052150026742273e-05, - "loss": 1.7271, - "step": 751000 - }, - { - "epoch": 10.82, - "learning_rate": 2.3000223284989538e-05, - "loss": 1.7255, - "step": 751500 - }, - { - "epoch": 10.82, - "learning_rate": 2.2948400396720307e-05, - "loss": 1.7262, - "step": 752000 - }, - { - "epoch": 10.83, - "learning_rate": 2.2896473654967572e-05, - "loss": 1.7263, - "step": 752500 - }, - { - "epoch": 10.84, - "learning_rate": 2.2844546913214837e-05, - "loss": 1.7283, - "step": 753000 - }, - { - "epoch": 10.85, - "learning_rate": 2.2792620171462102e-05, - "loss": 1.7261, - "step": 753500 - }, - { - "epoch": 10.85, - "learning_rate": 2.2740693429709367e-05, - "loss": 1.7245, - "step": 754000 - }, - { - "epoch": 10.86, - "learning_rate": 2.2688870541440136e-05, - "loss": 1.7284, - "step": 754500 - }, - { - "epoch": 10.87, - "learning_rate": 2.26369437996874e-05, - "loss": 1.7266, - "step": 755000 - }, - { - "epoch": 10.87, - "learning_rate": 2.258501705793467e-05, - "loss": 1.7276, - "step": 755500 - }, - { - "epoch": 10.88, - "learning_rate": 2.253309031618193e-05, - "loss": 1.7257, - "step": 756000 - }, - { - "epoch": 10.89, - "learning_rate": 2.2481267427912704e-05, - "loss": 1.7238, - "step": 756500 - }, - { - "epoch": 10.9, - "learning_rate": 2.2429340686159966e-05, - "loss": 1.7261, - "step": 757000 - }, - { - "epoch": 10.9, - "learning_rate": 2.237741394440723e-05, - "loss": 1.7229, - "step": 757500 - }, - { - "epoch": 10.91, - "learning_rate": 2.2325487202654495e-05, - "loss": 1.724, - "step": 758000 - }, - { - "epoch": 10.92, - "learning_rate": 2.227356046090176e-05, - "loss": 1.7255, - "step": 758500 - }, - { - "epoch": 10.93, - "learning_rate": 2.2221633719149025e-05, - "loss": 1.7255, - "step": 759000 - }, - { - "epoch": 10.93, - "learning_rate": 2.216970697739629e-05, - "loss": 1.7289, - "step": 759500 - }, - { - "epoch": 10.94, - "learning_rate": 2.211788408912706e-05, - "loss": 1.7237, - "step": 760000 - }, - { - "epoch": 10.95, - "learning_rate": 2.2065957347374325e-05, - "loss": 1.7219, - "step": 760500 - }, - { - "epoch": 10.95, - "learning_rate": 2.201403060562159e-05, - "loss": 1.7243, - "step": 761000 - }, - { - "epoch": 10.96, - "learning_rate": 2.1962103863868855e-05, - "loss": 1.728, - "step": 761500 - }, - { - "epoch": 10.97, - "learning_rate": 2.191017712211612e-05, - "loss": 1.7213, - "step": 762000 - }, - { - "epoch": 10.98, - "learning_rate": 2.1858250380363384e-05, - "loss": 1.7196, - "step": 762500 - }, - { - "epoch": 10.98, - "learning_rate": 2.1806427492094157e-05, - "loss": 1.7258, - "step": 763000 - }, - { - "epoch": 10.99, - "learning_rate": 2.175450075034142e-05, - "loss": 1.7255, - "step": 763500 - }, - { - "epoch": 11.0, - "learning_rate": 2.1702574008588684e-05, - "loss": 1.7229, - "step": 764000 - }, - { - "epoch": 11.0, - "eval_accuracy": 0.6645491401062097, - "eval_loss": 1.5919610261917114, - "eval_runtime": 1300.0829, - "eval_samples_per_second": 414.54, - "eval_steps_per_second": 25.909, - "step": 764203 - }, - { - "epoch": 11.0, - "learning_rate": 2.165064726683595e-05, - "loss": 1.7248, - "step": 764500 - }, - { - "epoch": 11.01, - "learning_rate": 2.1598824378566722e-05, - "loss": 1.7201, - "step": 765000 - }, - { - "epoch": 11.02, - "learning_rate": 2.1546897636813983e-05, - "loss": 1.7237, - "step": 765500 - }, - { - "epoch": 11.03, - "learning_rate": 2.1494970895061248e-05, - "loss": 1.724, - "step": 766000 - }, - { - "epoch": 11.03, - "learning_rate": 2.1443044153308513e-05, - "loss": 1.7209, - "step": 766500 - }, - { - "epoch": 11.04, - "learning_rate": 2.1391117411555778e-05, - "loss": 1.7219, - "step": 767000 - }, - { - "epoch": 11.05, - "learning_rate": 2.1339190669803043e-05, - "loss": 1.7215, - "step": 767500 - }, - { - "epoch": 11.05, - "learning_rate": 2.1287263928050308e-05, - "loss": 1.7206, - "step": 768000 - }, - { - "epoch": 11.06, - "learning_rate": 2.1235441039781077e-05, - "loss": 1.7198, - "step": 768500 - }, - { - "epoch": 11.07, - "learning_rate": 2.1183514298028342e-05, - "loss": 1.7244, - "step": 769000 - }, - { - "epoch": 11.08, - "learning_rate": 2.1131587556275607e-05, - "loss": 1.7213, - "step": 769500 - }, - { - "epoch": 11.08, - "learning_rate": 2.1079660814522872e-05, - "loss": 1.7236, - "step": 770000 - }, - { - "epoch": 11.09, - "learning_rate": 2.1027837926253645e-05, - "loss": 1.721, - "step": 770500 - }, - { - "epoch": 11.1, - "learning_rate": 2.0975911184500907e-05, - "loss": 1.7223, - "step": 771000 - }, - { - "epoch": 11.11, - "learning_rate": 2.092398444274817e-05, - "loss": 1.7238, - "step": 771500 - }, - { - "epoch": 11.11, - "learning_rate": 2.0872057700995436e-05, - "loss": 1.7226, - "step": 772000 - }, - { - "epoch": 11.12, - "learning_rate": 2.08201309592427e-05, - "loss": 1.7211, - "step": 772500 - }, - { - "epoch": 11.13, - "learning_rate": 2.0768204217489966e-05, - "loss": 1.7223, - "step": 773000 - }, - { - "epoch": 11.13, - "learning_rate": 2.0716381329220736e-05, - "loss": 1.7184, - "step": 773500 - }, - { - "epoch": 11.14, - "learning_rate": 2.0664454587468e-05, - "loss": 1.723, - "step": 774000 - }, - { - "epoch": 11.15, - "learning_rate": 2.0612527845715266e-05, - "loss": 1.7148, - "step": 774500 - }, - { - "epoch": 11.16, - "learning_rate": 2.056060110396253e-05, - "loss": 1.7249, - "step": 775000 - }, - { - "epoch": 11.16, - "learning_rate": 2.05087782156933e-05, - "loss": 1.7156, - "step": 775500 - }, - { - "epoch": 11.17, - "learning_rate": 2.0456851473940565e-05, - "loss": 1.7209, - "step": 776000 - }, - { - "epoch": 11.18, - "learning_rate": 2.040492473218783e-05, - "loss": 1.7263, - "step": 776500 - }, - { - "epoch": 11.18, - "learning_rate": 2.0352997990435095e-05, - "loss": 1.7217, - "step": 777000 - }, - { - "epoch": 11.19, - "learning_rate": 2.0301175102165865e-05, - "loss": 1.7213, - "step": 777500 - }, - { - "epoch": 11.2, - "learning_rate": 2.024924836041313e-05, - "loss": 1.7238, - "step": 778000 - }, - { - "epoch": 11.21, - "learning_rate": 2.0197321618660394e-05, - "loss": 1.7207, - "step": 778500 - }, - { - "epoch": 11.21, - "learning_rate": 2.014539487690766e-05, - "loss": 1.7236, - "step": 779000 - }, - { - "epoch": 11.22, - "learning_rate": 2.0093468135154924e-05, - "loss": 1.7216, - "step": 779500 - }, - { - "epoch": 11.23, - "learning_rate": 2.0041645246885697e-05, - "loss": 1.7229, - "step": 780000 - }, - { - "epoch": 11.23, - "learning_rate": 1.998971850513296e-05, - "loss": 1.7196, - "step": 780500 - }, - { - "epoch": 11.24, - "learning_rate": 1.9937791763380224e-05, - "loss": 1.7199, - "step": 781000 - }, - { - "epoch": 11.25, - "learning_rate": 1.988586502162749e-05, - "loss": 1.7186, - "step": 781500 - }, - { - "epoch": 11.26, - "learning_rate": 1.9833938279874753e-05, - "loss": 1.7208, - "step": 782000 - }, - { - "epoch": 11.26, - "learning_rate": 1.9782011538122018e-05, - "loss": 1.7177, - "step": 782500 - }, - { - "epoch": 11.27, - "learning_rate": 1.9730084796369283e-05, - "loss": 1.7212, - "step": 783000 - }, - { - "epoch": 11.28, - "learning_rate": 1.9678158054616548e-05, - "loss": 1.7219, - "step": 783500 - }, - { - "epoch": 11.28, - "learning_rate": 1.9626335166347318e-05, - "loss": 1.7191, - "step": 784000 - }, - { - "epoch": 11.29, - "learning_rate": 1.9574512278078087e-05, - "loss": 1.7213, - "step": 784500 - }, - { - "epoch": 11.3, - "learning_rate": 1.9522585536325352e-05, - "loss": 1.7227, - "step": 785000 - }, - { - "epoch": 11.31, - "learning_rate": 1.9470658794572617e-05, - "loss": 1.7204, - "step": 785500 - }, - { - "epoch": 11.31, - "learning_rate": 1.9418732052819882e-05, - "loss": 1.7223, - "step": 786000 - }, - { - "epoch": 11.32, - "learning_rate": 1.9366805311067147e-05, - "loss": 1.7194, - "step": 786500 - }, - { - "epoch": 11.33, - "learning_rate": 1.9314878569314412e-05, - "loss": 1.7205, - "step": 787000 - }, - { - "epoch": 11.34, - "learning_rate": 1.9262951827561677e-05, - "loss": 1.7195, - "step": 787500 - }, - { - "epoch": 11.34, - "learning_rate": 1.9211128939292446e-05, - "loss": 1.7208, - "step": 788000 - }, - { - "epoch": 11.35, - "learning_rate": 1.915920219753971e-05, - "loss": 1.7161, - "step": 788500 - }, - { - "epoch": 11.36, - "learning_rate": 1.9107275455786976e-05, - "loss": 1.7181, - "step": 789000 - }, - { - "epoch": 11.36, - "learning_rate": 1.905534871403424e-05, - "loss": 1.7177, - "step": 789500 - }, - { - "epoch": 11.37, - "learning_rate": 1.9003421972281506e-05, - "loss": 1.7162, - "step": 790000 - }, - { - "epoch": 11.38, - "learning_rate": 1.8951599084012276e-05, - "loss": 1.717, - "step": 790500 - }, - { - "epoch": 11.39, - "learning_rate": 1.889967234225954e-05, - "loss": 1.7229, - "step": 791000 - }, - { - "epoch": 11.39, - "learning_rate": 1.8847745600506805e-05, - "loss": 1.7197, - "step": 791500 - }, - { - "epoch": 11.4, - "learning_rate": 1.879581885875407e-05, - "loss": 1.7194, - "step": 792000 - }, - { - "epoch": 11.41, - "learning_rate": 1.8743892117001335e-05, - "loss": 1.7226, - "step": 792500 - }, - { - "epoch": 11.41, - "learning_rate": 1.8692069228732105e-05, - "loss": 1.7229, - "step": 793000 - }, - { - "epoch": 11.42, - "learning_rate": 1.864014248697937e-05, - "loss": 1.7167, - "step": 793500 - }, - { - "epoch": 11.43, - "learning_rate": 1.8588215745226638e-05, - "loss": 1.7203, - "step": 794000 - }, - { - "epoch": 11.44, - "learning_rate": 1.85362890034739e-05, - "loss": 1.723, - "step": 794500 - }, - { - "epoch": 11.44, - "learning_rate": 1.8484362261721164e-05, - "loss": 1.7216, - "step": 795000 - }, - { - "epoch": 11.45, - "learning_rate": 1.8432539373451934e-05, - "loss": 1.7197, - "step": 795500 - }, - { - "epoch": 11.46, - "learning_rate": 1.83806126316992e-05, - "loss": 1.7171, - "step": 796000 - }, - { - "epoch": 11.46, - "learning_rate": 1.8328685889946464e-05, - "loss": 1.7186, - "step": 796500 - }, - { - "epoch": 11.47, - "learning_rate": 1.827675914819373e-05, - "loss": 1.7188, - "step": 797000 - }, - { - "epoch": 11.48, - "learning_rate": 1.82249362599245e-05, - "loss": 1.7182, - "step": 797500 - }, - { - "epoch": 11.49, - "learning_rate": 1.8173009518171763e-05, - "loss": 1.7204, - "step": 798000 - }, - { - "epoch": 11.49, - "learning_rate": 1.8121082776419028e-05, - "loss": 1.7228, - "step": 798500 - }, - { - "epoch": 11.5, - "learning_rate": 1.8069156034666293e-05, - "loss": 1.7191, - "step": 799000 - }, - { - "epoch": 11.51, - "learning_rate": 1.8017229292913558e-05, - "loss": 1.7188, - "step": 799500 - }, - { - "epoch": 11.52, - "learning_rate": 1.7965406404644328e-05, - "loss": 1.7207, - "step": 800000 - }, - { - "epoch": 11.52, - "learning_rate": 1.7913479662891593e-05, - "loss": 1.7174, - "step": 800500 - }, - { - "epoch": 11.53, - "learning_rate": 1.7861552921138858e-05, - "loss": 1.7187, - "step": 801000 - }, - { - "epoch": 11.54, - "learning_rate": 1.7809626179386122e-05, - "loss": 1.7177, - "step": 801500 - }, - { - "epoch": 11.54, - "learning_rate": 1.7757699437633387e-05, - "loss": 1.7169, - "step": 802000 - }, - { - "epoch": 11.55, - "learning_rate": 1.770587654936416e-05, - "loss": 1.7187, - "step": 802500 - }, - { - "epoch": 11.56, - "learning_rate": 1.7653949807611422e-05, - "loss": 1.7142, - "step": 803000 - }, - { - "epoch": 11.57, - "learning_rate": 1.760202306585869e-05, - "loss": 1.7166, - "step": 803500 - }, - { - "epoch": 11.57, - "learning_rate": 1.755009632410595e-05, - "loss": 1.7169, - "step": 804000 - }, - { - "epoch": 11.58, - "learning_rate": 1.7498273435836725e-05, - "loss": 1.7156, - "step": 804500 - }, - { - "epoch": 11.59, - "learning_rate": 1.7446346694083986e-05, - "loss": 1.7156, - "step": 805000 - }, - { - "epoch": 11.59, - "learning_rate": 1.739441995233125e-05, - "loss": 1.7159, - "step": 805500 - }, - { - "epoch": 11.6, - "learning_rate": 1.7342493210578516e-05, - "loss": 1.717, - "step": 806000 - }, - { - "epoch": 11.61, - "learning_rate": 1.729056646882578e-05, - "loss": 1.716, - "step": 806500 - }, - { - "epoch": 11.62, - "learning_rate": 1.723874358055655e-05, - "loss": 1.7217, - "step": 807000 - }, - { - "epoch": 11.62, - "learning_rate": 1.7186816838803815e-05, - "loss": 1.7168, - "step": 807500 - }, - { - "epoch": 11.63, - "learning_rate": 1.713489009705108e-05, - "loss": 1.7147, - "step": 808000 - }, - { - "epoch": 11.64, - "learning_rate": 1.7082963355298345e-05, - "loss": 1.7156, - "step": 808500 - }, - { - "epoch": 11.64, - "learning_rate": 1.7031140467029115e-05, - "loss": 1.7187, - "step": 809000 - }, - { - "epoch": 11.65, - "learning_rate": 1.697921372527638e-05, - "loss": 1.7168, - "step": 809500 - }, - { - "epoch": 11.66, - "learning_rate": 1.6927286983523648e-05, - "loss": 1.7184, - "step": 810000 - }, - { - "epoch": 11.67, - "learning_rate": 1.687536024177091e-05, - "loss": 1.7142, - "step": 810500 - }, - { - "epoch": 11.67, - "learning_rate": 1.6823433500018178e-05, - "loss": 1.7171, - "step": 811000 - }, - { - "epoch": 11.68, - "learning_rate": 1.6771610611748944e-05, - "loss": 1.7163, - "step": 811500 - }, - { - "epoch": 11.69, - "learning_rate": 1.6719683869996212e-05, - "loss": 1.7208, - "step": 812000 - }, - { - "epoch": 11.7, - "learning_rate": 1.6667757128243474e-05, - "loss": 1.714, - "step": 812500 - }, - { - "epoch": 11.7, - "learning_rate": 1.661583038649074e-05, - "loss": 1.7158, - "step": 813000 - }, - { - "epoch": 11.71, - "learning_rate": 1.656400749822151e-05, - "loss": 1.7167, - "step": 813500 - }, - { - "epoch": 11.72, - "learning_rate": 1.6512080756468777e-05, - "loss": 1.7153, - "step": 814000 - }, - { - "epoch": 11.72, - "learning_rate": 1.6460154014716038e-05, - "loss": 1.7164, - "step": 814500 - }, - { - "epoch": 11.73, - "learning_rate": 1.6408227272963303e-05, - "loss": 1.7186, - "step": 815000 - }, - { - "epoch": 11.74, - "learning_rate": 1.6356404384694073e-05, - "loss": 1.7179, - "step": 815500 - }, - { - "epoch": 11.75, - "learning_rate": 1.630447764294134e-05, - "loss": 1.7159, - "step": 816000 - }, - { - "epoch": 11.75, - "learning_rate": 1.6252550901188603e-05, - "loss": 1.7159, - "step": 816500 - }, - { - "epoch": 11.76, - "learning_rate": 1.6200624159435868e-05, - "loss": 1.7179, - "step": 817000 - }, - { - "epoch": 11.77, - "learning_rate": 1.6148801271166637e-05, - "loss": 1.7141, - "step": 817500 - }, - { - "epoch": 11.77, - "learning_rate": 1.6096874529413902e-05, - "loss": 1.7203, - "step": 818000 - }, - { - "epoch": 11.78, - "learning_rate": 1.604494778766117e-05, - "loss": 1.7158, - "step": 818500 - }, - { - "epoch": 11.79, - "learning_rate": 1.5993021045908432e-05, - "loss": 1.7126, - "step": 819000 - }, - { - "epoch": 11.8, - "learning_rate": 1.59410943041557e-05, - "loss": 1.7146, - "step": 819500 - }, - { - "epoch": 11.8, - "learning_rate": 1.5889271415886466e-05, - "loss": 1.713, - "step": 820000 - }, - { - "epoch": 11.81, - "learning_rate": 1.5837344674133735e-05, - "loss": 1.715, - "step": 820500 - }, - { - "epoch": 11.82, - "learning_rate": 1.5785417932380996e-05, - "loss": 1.7166, - "step": 821000 - }, - { - "epoch": 11.82, - "learning_rate": 1.5733491190628264e-05, - "loss": 1.7166, - "step": 821500 - }, - { - "epoch": 11.83, - "learning_rate": 1.5681564448875526e-05, - "loss": 1.7136, - "step": 822000 - }, - { - "epoch": 11.84, - "learning_rate": 1.562963770712279e-05, - "loss": 1.7152, - "step": 822500 - }, - { - "epoch": 11.85, - "learning_rate": 1.557781481885356e-05, - "loss": 1.7161, - "step": 823000 - }, - { - "epoch": 11.85, - "learning_rate": 1.552588807710083e-05, - "loss": 1.7157, - "step": 823500 - }, - { - "epoch": 11.86, - "learning_rate": 1.547396133534809e-05, - "loss": 1.7162, - "step": 824000 - }, - { - "epoch": 11.87, - "learning_rate": 1.5422034593595355e-05, - "loss": 1.717, - "step": 824500 - }, - { - "epoch": 11.88, - "learning_rate": 1.5370211705326125e-05, - "loss": 1.7182, - "step": 825000 - }, - { - "epoch": 11.88, - "learning_rate": 1.5318284963573393e-05, - "loss": 1.7174, - "step": 825500 - }, - { - "epoch": 11.89, - "learning_rate": 1.5266358221820655e-05, - "loss": 1.7126, - "step": 826000 - }, - { - "epoch": 11.9, - "learning_rate": 1.521443148006792e-05, - "loss": 1.7112, - "step": 826500 - }, - { - "epoch": 11.9, - "learning_rate": 1.5162608591798691e-05, - "loss": 1.7143, - "step": 827000 - }, - { - "epoch": 11.91, - "learning_rate": 1.5110681850045954e-05, - "loss": 1.7127, - "step": 827500 - }, - { - "epoch": 11.92, - "learning_rate": 1.505875510829322e-05, - "loss": 1.72, - "step": 828000 - }, - { - "epoch": 11.93, - "learning_rate": 1.5006828366540484e-05, - "loss": 1.7146, - "step": 828500 - }, - { - "epoch": 11.93, - "learning_rate": 1.4955005478271255e-05, - "loss": 1.7156, - "step": 829000 - }, - { - "epoch": 11.94, - "learning_rate": 1.4903078736518518e-05, - "loss": 1.7144, - "step": 829500 - }, - { - "epoch": 11.95, - "learning_rate": 1.4851151994765785e-05, - "loss": 1.7136, - "step": 830000 - }, - { - "epoch": 11.95, - "learning_rate": 1.479922525301305e-05, - "loss": 1.713, - "step": 830500 - }, - { - "epoch": 11.96, - "learning_rate": 1.474740236474382e-05, - "loss": 1.7165, - "step": 831000 - }, - { - "epoch": 11.97, - "learning_rate": 1.4695475622991084e-05, - "loss": 1.7137, - "step": 831500 - }, - { - "epoch": 11.98, - "learning_rate": 1.4643548881238351e-05, - "loss": 1.7115, - "step": 832000 - }, - { - "epoch": 11.98, - "learning_rate": 1.4591622139485614e-05, - "loss": 1.7183, - "step": 832500 - }, - { - "epoch": 11.99, - "learning_rate": 1.4539695397732881e-05, - "loss": 1.7142, - "step": 833000 - }, - { - "epoch": 12.0, - "learning_rate": 1.4487872509463649e-05, - "loss": 1.7127, - "step": 833500 - }, - { - "epoch": 12.0, - "eval_accuracy": 0.6658040615937678, - "eval_loss": 1.5836162567138672, - "eval_runtime": 1303.2983, - "eval_samples_per_second": 413.518, - "eval_steps_per_second": 25.845, - "step": 833676 - }, - { - "epoch": 12.0, - "learning_rate": 1.4435945767710915e-05, - "loss": 1.7129, - "step": 834000 - }, - { - "epoch": 12.01, - "learning_rate": 1.4384019025958179e-05, - "loss": 1.7078, - "step": 834500 - }, - { - "epoch": 12.02, - "learning_rate": 1.4332092284205445e-05, - "loss": 1.7095, - "step": 835000 - }, - { - "epoch": 12.03, - "learning_rate": 1.4280165542452708e-05, - "loss": 1.7136, - "step": 835500 - }, - { - "epoch": 12.03, - "learning_rate": 1.422834265418348e-05, - "loss": 1.7119, - "step": 836000 - }, - { - "epoch": 12.04, - "learning_rate": 1.4176415912430743e-05, - "loss": 1.7085, - "step": 836500 - }, - { - "epoch": 12.05, - "learning_rate": 1.4124489170678006e-05, - "loss": 1.7114, - "step": 837000 - }, - { - "epoch": 12.06, - "learning_rate": 1.4072562428925273e-05, - "loss": 1.7107, - "step": 837500 - }, - { - "epoch": 12.06, - "learning_rate": 1.4020739540656044e-05, - "loss": 1.7106, - "step": 838000 - }, - { - "epoch": 12.07, - "learning_rate": 1.3968812798903307e-05, - "loss": 1.7099, - "step": 838500 - }, - { - "epoch": 12.08, - "learning_rate": 1.3916886057150572e-05, - "loss": 1.7152, - "step": 839000 - }, - { - "epoch": 12.08, - "learning_rate": 1.3864959315397839e-05, - "loss": 1.7166, - "step": 839500 - }, - { - "epoch": 12.09, - "learning_rate": 1.3813136427128608e-05, - "loss": 1.7123, - "step": 840000 - }, - { - "epoch": 12.1, - "learning_rate": 1.3761209685375873e-05, - "loss": 1.7127, - "step": 840500 - }, - { - "epoch": 12.11, - "learning_rate": 1.3709282943623137e-05, - "loss": 1.7144, - "step": 841000 - }, - { - "epoch": 12.11, - "learning_rate": 1.3657356201870403e-05, - "loss": 1.7121, - "step": 841500 - }, - { - "epoch": 12.12, - "learning_rate": 1.3605429460117666e-05, - "loss": 1.714, - "step": 842000 - }, - { - "epoch": 12.13, - "learning_rate": 1.3553502718364933e-05, - "loss": 1.7112, - "step": 842500 - }, - { - "epoch": 12.13, - "learning_rate": 1.3501679830095701e-05, - "loss": 1.7121, - "step": 843000 - }, - { - "epoch": 12.14, - "learning_rate": 1.3449753088342967e-05, - "loss": 1.706, - "step": 843500 - }, - { - "epoch": 12.15, - "learning_rate": 1.339782634659023e-05, - "loss": 1.7121, - "step": 844000 - }, - { - "epoch": 12.16, - "learning_rate": 1.3345899604837494e-05, - "loss": 1.7075, - "step": 844500 - }, - { - "epoch": 12.16, - "learning_rate": 1.3294076716568265e-05, - "loss": 1.7105, - "step": 845000 - }, - { - "epoch": 12.17, - "learning_rate": 1.3242149974815532e-05, - "loss": 1.7086, - "step": 845500 - }, - { - "epoch": 12.18, - "learning_rate": 1.3190223233062795e-05, - "loss": 1.7103, - "step": 846000 - }, - { - "epoch": 12.18, - "learning_rate": 1.313829649131006e-05, - "loss": 1.7082, - "step": 846500 - }, - { - "epoch": 12.19, - "learning_rate": 1.3086369749557325e-05, - "loss": 1.7101, - "step": 847000 - }, - { - "epoch": 12.2, - "learning_rate": 1.3034546861288096e-05, - "loss": 1.7145, - "step": 847500 - }, - { - "epoch": 12.21, - "learning_rate": 1.2982620119535361e-05, - "loss": 1.7118, - "step": 848000 - }, - { - "epoch": 12.21, - "learning_rate": 1.2930693377782624e-05, - "loss": 1.7077, - "step": 848500 - }, - { - "epoch": 12.22, - "learning_rate": 1.2878766636029891e-05, - "loss": 1.7097, - "step": 849000 - }, - { - "epoch": 12.23, - "learning_rate": 1.2826943747760659e-05, - "loss": 1.7084, - "step": 849500 - }, - { - "epoch": 12.23, - "learning_rate": 1.2775017006007925e-05, - "loss": 1.7086, - "step": 850000 - }, - { - "epoch": 12.24, - "learning_rate": 1.2723090264255189e-05, - "loss": 1.7108, - "step": 850500 - }, - { - "epoch": 12.25, - "learning_rate": 1.2671163522502455e-05, - "loss": 1.7097, - "step": 851000 - }, - { - "epoch": 12.26, - "learning_rate": 1.2619340634233223e-05, - "loss": 1.7124, - "step": 851500 - }, - { - "epoch": 12.26, - "learning_rate": 1.256741389248049e-05, - "loss": 1.7127, - "step": 852000 - }, - { - "epoch": 12.27, - "learning_rate": 1.2515487150727753e-05, - "loss": 1.7104, - "step": 852500 - }, - { - "epoch": 12.28, - "learning_rate": 1.2463560408975018e-05, - "loss": 1.7132, - "step": 853000 - }, - { - "epoch": 12.29, - "learning_rate": 1.2411633667222283e-05, - "loss": 1.7116, - "step": 853500 - }, - { - "epoch": 12.29, - "learning_rate": 1.2359810778953054e-05, - "loss": 1.7118, - "step": 854000 - }, - { - "epoch": 12.3, - "learning_rate": 1.2307884037200317e-05, - "loss": 1.7152, - "step": 854500 - }, - { - "epoch": 12.31, - "learning_rate": 1.2255957295447582e-05, - "loss": 1.7078, - "step": 855000 - }, - { - "epoch": 12.31, - "learning_rate": 1.2204030553694847e-05, - "loss": 1.7121, - "step": 855500 - }, - { - "epoch": 12.32, - "learning_rate": 1.2152207665425617e-05, - "loss": 1.7093, - "step": 856000 - }, - { - "epoch": 12.33, - "learning_rate": 1.2100280923672883e-05, - "loss": 1.708, - "step": 856500 - }, - { - "epoch": 12.34, - "learning_rate": 1.2048354181920148e-05, - "loss": 1.7122, - "step": 857000 - }, - { - "epoch": 12.34, - "learning_rate": 1.1996427440167413e-05, - "loss": 1.7071, - "step": 857500 - }, - { - "epoch": 12.35, - "learning_rate": 1.1944604551898183e-05, - "loss": 1.7148, - "step": 858000 - }, - { - "epoch": 12.36, - "learning_rate": 1.1892677810145448e-05, - "loss": 1.7069, - "step": 858500 - }, - { - "epoch": 12.36, - "learning_rate": 1.1840751068392713e-05, - "loss": 1.7117, - "step": 859000 - }, - { - "epoch": 12.37, - "learning_rate": 1.1788824326639977e-05, - "loss": 1.7114, - "step": 859500 - }, - { - "epoch": 12.38, - "learning_rate": 1.1737001438370747e-05, - "loss": 1.7095, - "step": 860000 - }, - { - "epoch": 12.39, - "learning_rate": 1.1685074696618012e-05, - "loss": 1.7103, - "step": 860500 - }, - { - "epoch": 12.39, - "learning_rate": 1.1633147954865277e-05, - "loss": 1.7089, - "step": 861000 - }, - { - "epoch": 12.4, - "learning_rate": 1.1581221213112542e-05, - "loss": 1.709, - "step": 861500 - }, - { - "epoch": 12.41, - "learning_rate": 1.1529294471359805e-05, - "loss": 1.7089, - "step": 862000 - }, - { - "epoch": 12.41, - "learning_rate": 1.147736772960707e-05, - "loss": 1.7028, - "step": 862500 - }, - { - "epoch": 12.42, - "learning_rate": 1.1425544841337841e-05, - "loss": 1.7084, - "step": 863000 - }, - { - "epoch": 12.43, - "learning_rate": 1.1373618099585104e-05, - "loss": 1.7113, - "step": 863500 - }, - { - "epoch": 12.44, - "learning_rate": 1.1321691357832371e-05, - "loss": 1.7077, - "step": 864000 - }, - { - "epoch": 12.44, - "learning_rate": 1.1269764616079636e-05, - "loss": 1.7084, - "step": 864500 - }, - { - "epoch": 12.45, - "learning_rate": 1.1217837874326901e-05, - "loss": 1.7084, - "step": 865000 - }, - { - "epoch": 12.46, - "learning_rate": 1.116601498605767e-05, - "loss": 1.7087, - "step": 865500 - }, - { - "epoch": 12.47, - "learning_rate": 1.1114088244304935e-05, - "loss": 1.7086, - "step": 866000 - }, - { - "epoch": 12.47, - "learning_rate": 1.10621615025522e-05, - "loss": 1.7121, - "step": 866500 - }, - { - "epoch": 12.48, - "learning_rate": 1.1010234760799465e-05, - "loss": 1.7092, - "step": 867000 - }, - { - "epoch": 12.49, - "learning_rate": 1.0958411872530235e-05, - "loss": 1.7075, - "step": 867500 - }, - { - "epoch": 12.49, - "learning_rate": 1.09064851307775e-05, - "loss": 1.7053, - "step": 868000 - }, - { - "epoch": 12.5, - "learning_rate": 1.0854558389024765e-05, - "loss": 1.7118, - "step": 868500 - }, - { - "epoch": 12.51, - "learning_rate": 1.080263164727203e-05, - "loss": 1.7089, - "step": 869000 - }, - { - "epoch": 12.52, - "learning_rate": 1.07508087590028e-05, - "loss": 1.7085, - "step": 869500 - }, - { - "epoch": 12.52, - "learning_rate": 1.0698882017250064e-05, - "loss": 1.7098, - "step": 870000 - }, - { - "epoch": 12.53, - "learning_rate": 1.0646955275497329e-05, - "loss": 1.7075, - "step": 870500 - }, - { - "epoch": 12.54, - "learning_rate": 1.0595028533744594e-05, - "loss": 1.7067, - "step": 871000 - }, - { - "epoch": 12.54, - "learning_rate": 1.0543101791991857e-05, - "loss": 1.7055, - "step": 871500 - }, - { - "epoch": 12.55, - "learning_rate": 1.0491175050239124e-05, - "loss": 1.7062, - "step": 872000 - }, - { - "epoch": 12.56, - "learning_rate": 1.0439352161969893e-05, - "loss": 1.708, - "step": 872500 - }, - { - "epoch": 12.57, - "learning_rate": 1.0387425420217158e-05, - "loss": 1.7085, - "step": 873000 - }, - { - "epoch": 12.57, - "learning_rate": 1.0335498678464423e-05, - "loss": 1.7083, - "step": 873500 - }, - { - "epoch": 12.58, - "learning_rate": 1.0283571936711688e-05, - "loss": 1.7091, - "step": 874000 - }, - { - "epoch": 12.59, - "learning_rate": 1.0231645194958953e-05, - "loss": 1.707, - "step": 874500 - }, - { - "epoch": 12.59, - "learning_rate": 1.0179822306689723e-05, - "loss": 1.7085, - "step": 875000 - }, - { - "epoch": 12.6, - "learning_rate": 1.0127895564936987e-05, - "loss": 1.7052, - "step": 875500 - }, - { - "epoch": 12.61, - "learning_rate": 1.0075968823184252e-05, - "loss": 1.7092, - "step": 876000 - }, - { - "epoch": 12.62, - "learning_rate": 1.0024042081431517e-05, - "loss": 1.705, - "step": 876500 - }, - { - "epoch": 12.62, - "learning_rate": 9.972219193162287e-06, - "loss": 1.7111, - "step": 877000 - }, - { - "epoch": 12.63, - "learning_rate": 9.920292451409552e-06, - "loss": 1.712, - "step": 877500 - }, - { - "epoch": 12.64, - "learning_rate": 9.868365709656817e-06, - "loss": 1.7061, - "step": 878000 - }, - { - "epoch": 12.65, - "learning_rate": 9.816438967904082e-06, - "loss": 1.7059, - "step": 878500 - }, - { - "epoch": 12.65, - "learning_rate": 9.764616079634851e-06, - "loss": 1.7086, - "step": 879000 - }, - { - "epoch": 12.66, - "learning_rate": 9.712689337882116e-06, - "loss": 1.7065, - "step": 879500 - }, - { - "epoch": 12.67, - "learning_rate": 9.660762596129381e-06, - "loss": 1.703, - "step": 880000 - }, - { - "epoch": 12.67, - "learning_rate": 9.608835854376646e-06, - "loss": 1.7063, - "step": 880500 - }, - { - "epoch": 12.68, - "learning_rate": 9.556909112623911e-06, - "loss": 1.7087, - "step": 881000 - }, - { - "epoch": 12.69, - "learning_rate": 9.504982370871176e-06, - "loss": 1.7095, - "step": 881500 - }, - { - "epoch": 12.7, - "learning_rate": 9.45305562911844e-06, - "loss": 1.7099, - "step": 882000 - }, - { - "epoch": 12.7, - "learning_rate": 9.40123274084921e-06, - "loss": 1.709, - "step": 882500 - }, - { - "epoch": 12.71, - "learning_rate": 9.349305999096475e-06, - "loss": 1.7032, - "step": 883000 - }, - { - "epoch": 12.72, - "learning_rate": 9.29737925734374e-06, - "loss": 1.7104, - "step": 883500 - }, - { - "epoch": 12.72, - "learning_rate": 9.245452515591005e-06, - "loss": 1.706, - "step": 884000 - }, - { - "epoch": 12.73, - "learning_rate": 9.19352577383827e-06, - "loss": 1.7081, - "step": 884500 - }, - { - "epoch": 12.74, - "learning_rate": 9.14170288556904e-06, - "loss": 1.7088, - "step": 885000 - }, - { - "epoch": 12.75, - "learning_rate": 9.089776143816304e-06, - "loss": 1.7035, - "step": 885500 - }, - { - "epoch": 12.75, - "learning_rate": 9.03784940206357e-06, - "loss": 1.7031, - "step": 886000 - }, - { - "epoch": 12.76, - "learning_rate": 8.985922660310834e-06, - "loss": 1.7079, - "step": 886500 - }, - { - "epoch": 12.77, - "learning_rate": 8.933995918558097e-06, - "loss": 1.7058, - "step": 887000 - }, - { - "epoch": 12.77, - "learning_rate": 8.882173030288869e-06, - "loss": 1.7032, - "step": 887500 - }, - { - "epoch": 12.78, - "learning_rate": 8.830246288536134e-06, - "loss": 1.7037, - "step": 888000 - }, - { - "epoch": 12.79, - "learning_rate": 8.778319546783399e-06, - "loss": 1.703, - "step": 888500 - }, - { - "epoch": 12.8, - "learning_rate": 8.726392805030663e-06, - "loss": 1.7074, - "step": 889000 - }, - { - "epoch": 12.8, - "learning_rate": 8.674466063277928e-06, - "loss": 1.7061, - "step": 889500 - }, - { - "epoch": 12.81, - "learning_rate": 8.622643175008698e-06, - "loss": 1.7065, - "step": 890000 - }, - { - "epoch": 12.82, - "learning_rate": 8.570716433255963e-06, - "loss": 1.7058, - "step": 890500 - }, - { - "epoch": 12.83, - "learning_rate": 8.518789691503228e-06, - "loss": 1.7054, - "step": 891000 - }, - { - "epoch": 12.83, - "learning_rate": 8.466862949750493e-06, - "loss": 1.7071, - "step": 891500 - }, - { - "epoch": 12.84, - "learning_rate": 8.415040061481262e-06, - "loss": 1.703, - "step": 892000 - }, - { - "epoch": 12.85, - "learning_rate": 8.363113319728527e-06, - "loss": 1.7054, - "step": 892500 - }, - { - "epoch": 12.85, - "learning_rate": 8.311186577975792e-06, - "loss": 1.7059, - "step": 893000 - }, - { - "epoch": 12.86, - "learning_rate": 8.259259836223057e-06, - "loss": 1.7043, - "step": 893500 - }, - { - "epoch": 12.87, - "learning_rate": 8.207436947953827e-06, - "loss": 1.6995, - "step": 894000 - }, - { - "epoch": 12.88, - "learning_rate": 8.155510206201092e-06, - "loss": 1.7058, - "step": 894500 - }, - { - "epoch": 12.88, - "learning_rate": 8.103583464448357e-06, - "loss": 1.7018, - "step": 895000 - }, - { - "epoch": 12.89, - "learning_rate": 8.051656722695621e-06, - "loss": 1.7065, - "step": 895500 - }, - { - "epoch": 12.9, - "learning_rate": 7.999729980942886e-06, - "loss": 1.7049, - "step": 896000 - }, - { - "epoch": 12.9, - "learning_rate": 7.947907092673656e-06, - "loss": 1.7068, - "step": 896500 - }, - { - "epoch": 12.91, - "learning_rate": 7.895980350920921e-06, - "loss": 1.7001, - "step": 897000 - }, - { - "epoch": 12.92, - "learning_rate": 7.844053609168186e-06, - "loss": 1.7098, - "step": 897500 - }, - { - "epoch": 12.93, - "learning_rate": 7.79212686741545e-06, - "loss": 1.7014, - "step": 898000 - }, - { - "epoch": 12.93, - "learning_rate": 7.740303979146222e-06, - "loss": 1.7061, - "step": 898500 - }, - { - "epoch": 12.94, - "learning_rate": 7.688377237393487e-06, - "loss": 1.705, - "step": 899000 - }, - { - "epoch": 12.95, - "learning_rate": 7.63645049564075e-06, - "loss": 1.7037, - "step": 899500 - }, - { - "epoch": 12.95, - "learning_rate": 7.584523753888015e-06, - "loss": 1.7083, - "step": 900000 - }, - { - "epoch": 12.96, - "learning_rate": 7.53259701213528e-06, - "loss": 1.7029, - "step": 900500 - }, - { - "epoch": 12.97, - "learning_rate": 7.4807741238660495e-06, - "loss": 1.7047, - "step": 901000 - }, - { - "epoch": 12.98, - "learning_rate": 7.4288473821133144e-06, - "loss": 1.7049, - "step": 901500 - }, - { - "epoch": 12.98, - "learning_rate": 7.376920640360579e-06, - "loss": 1.7073, - "step": 902000 - }, - { - "epoch": 12.99, - "learning_rate": 7.324993898607844e-06, - "loss": 1.7036, - "step": 902500 - }, - { - "epoch": 13.0, - "learning_rate": 7.273171010338614e-06, - "loss": 1.7011, - "step": 903000 - }, - { - "epoch": 13.0, - "eval_accuracy": 0.6677293660710026, - "eval_loss": 1.5736616849899292, - "eval_runtime": 1292.8595, - "eval_samples_per_second": 416.857, - "eval_steps_per_second": 26.054, - "step": 903149 - }, - { - "epoch": 13.01, - "learning_rate": 7.221244268585879e-06, - "loss": 1.7015, - "step": 903500 - }, - { - "epoch": 13.01, - "learning_rate": 7.1693175268331445e-06, - "loss": 1.7008, - "step": 904000 - }, - { - "epoch": 13.02, - "learning_rate": 7.1173907850804094e-06, - "loss": 1.7008, - "step": 904500 - }, - { - "epoch": 13.03, - "learning_rate": 7.065567896811179e-06, - "loss": 1.7008, - "step": 905000 - }, - { - "epoch": 13.03, - "learning_rate": 7.013641155058444e-06, - "loss": 1.7018, - "step": 905500 - }, - { - "epoch": 13.04, - "learning_rate": 6.961714413305709e-06, - "loss": 1.7048, - "step": 906000 - }, - { - "epoch": 13.05, - "learning_rate": 6.909787671552974e-06, - "loss": 1.7036, - "step": 906500 - }, - { - "epoch": 13.06, - "learning_rate": 6.857860929800238e-06, - "loss": 1.7012, - "step": 907000 - }, - { - "epoch": 13.06, - "learning_rate": 6.805934188047503e-06, - "loss": 1.6996, - "step": 907500 - }, - { - "epoch": 13.07, - "learning_rate": 6.754111299778273e-06, - "loss": 1.701, - "step": 908000 - }, - { - "epoch": 13.08, - "learning_rate": 6.702184558025538e-06, - "loss": 1.7037, - "step": 908500 - }, - { - "epoch": 13.08, - "learning_rate": 6.650257816272802e-06, - "loss": 1.7042, - "step": 909000 - }, - { - "epoch": 13.09, - "learning_rate": 6.598331074520067e-06, - "loss": 1.704, - "step": 909500 - }, - { - "epoch": 13.1, - "learning_rate": 6.546508186250838e-06, - "loss": 1.7035, - "step": 910000 - }, - { - "epoch": 13.11, - "learning_rate": 6.494581444498102e-06, - "loss": 1.7007, - "step": 910500 - }, - { - "epoch": 13.11, - "learning_rate": 6.4426547027453665e-06, - "loss": 1.703, - "step": 911000 - }, - { - "epoch": 13.12, - "learning_rate": 6.390727960992631e-06, - "loss": 1.7008, - "step": 911500 - }, - { - "epoch": 13.13, - "learning_rate": 6.338905072723401e-06, - "loss": 1.7057, - "step": 912000 - }, - { - "epoch": 13.13, - "learning_rate": 6.286978330970667e-06, - "loss": 1.7022, - "step": 912500 - }, - { - "epoch": 13.14, - "learning_rate": 6.235051589217932e-06, - "loss": 1.7011, - "step": 913000 - }, - { - "epoch": 13.15, - "learning_rate": 6.183124847465197e-06, - "loss": 1.7006, - "step": 913500 - }, - { - "epoch": 13.16, - "learning_rate": 6.131198105712461e-06, - "loss": 1.6992, - "step": 914000 - }, - { - "epoch": 13.16, - "learning_rate": 6.079375217443231e-06, - "loss": 1.702, - "step": 914500 - }, - { - "epoch": 13.17, - "learning_rate": 6.027448475690496e-06, - "loss": 1.6989, - "step": 915000 - }, - { - "epoch": 13.18, - "learning_rate": 5.975521733937761e-06, - "loss": 1.7008, - "step": 915500 - }, - { - "epoch": 13.18, - "learning_rate": 5.923594992185026e-06, - "loss": 1.7023, - "step": 916000 - }, - { - "epoch": 13.19, - "learning_rate": 5.871668250432291e-06, - "loss": 1.7007, - "step": 916500 - }, - { - "epoch": 13.2, - "learning_rate": 5.819845362163061e-06, - "loss": 1.6995, - "step": 917000 - }, - { - "epoch": 13.21, - "learning_rate": 5.767918620410325e-06, - "loss": 1.7002, - "step": 917500 - }, - { - "epoch": 13.21, - "learning_rate": 5.71599187865759e-06, - "loss": 1.7031, - "step": 918000 - }, - { - "epoch": 13.22, - "learning_rate": 5.664065136904855e-06, - "loss": 1.6987, - "step": 918500 - }, - { - "epoch": 13.23, - "learning_rate": 5.612138395152119e-06, - "loss": 1.7016, - "step": 919000 - }, - { - "epoch": 13.24, - "learning_rate": 5.56031550688289e-06, - "loss": 1.6996, - "step": 919500 - }, - { - "epoch": 13.24, - "learning_rate": 5.5083887651301545e-06, - "loss": 1.7036, - "step": 920000 - }, - { - "epoch": 13.25, - "learning_rate": 5.456462023377419e-06, - "loss": 1.7015, - "step": 920500 - }, - { - "epoch": 13.26, - "learning_rate": 5.404535281624684e-06, - "loss": 1.7019, - "step": 921000 - }, - { - "epoch": 13.26, - "learning_rate": 5.352712393355455e-06, - "loss": 1.7019, - "step": 921500 - }, - { - "epoch": 13.27, - "learning_rate": 5.30078565160272e-06, - "loss": 1.7024, - "step": 922000 - }, - { - "epoch": 13.28, - "learning_rate": 5.248858909849984e-06, - "loss": 1.7003, - "step": 922500 - }, - { - "epoch": 13.29, - "learning_rate": 5.196932168097249e-06, - "loss": 1.6995, - "step": 923000 - }, - { - "epoch": 13.29, - "learning_rate": 5.145109279828019e-06, - "loss": 1.6998, - "step": 923500 - }, - { - "epoch": 13.3, - "learning_rate": 5.093182538075283e-06, - "loss": 1.6999, - "step": 924000 - }, - { - "epoch": 13.31, - "learning_rate": 5.041255796322548e-06, - "loss": 1.6995, - "step": 924500 - }, - { - "epoch": 13.31, - "learning_rate": 4.989329054569814e-06, - "loss": 1.6965, - "step": 925000 - }, - { - "epoch": 13.32, - "learning_rate": 4.9375061663005835e-06, - "loss": 1.7033, - "step": 925500 - }, - { - "epoch": 13.33, - "learning_rate": 4.885579424547848e-06, - "loss": 1.6986, - "step": 926000 - }, - { - "epoch": 13.34, - "learning_rate": 4.833652682795113e-06, - "loss": 1.6984, - "step": 926500 - }, - { - "epoch": 13.34, - "learning_rate": 4.781725941042377e-06, - "loss": 1.7015, - "step": 927000 - }, - { - "epoch": 13.35, - "learning_rate": 4.729799199289642e-06, - "loss": 1.6982, - "step": 927500 - }, - { - "epoch": 13.36, - "learning_rate": 4.677976311020413e-06, - "loss": 1.7004, - "step": 928000 - }, - { - "epoch": 13.36, - "learning_rate": 4.626049569267677e-06, - "loss": 1.702, - "step": 928500 - }, - { - "epoch": 13.37, - "learning_rate": 4.5741228275149425e-06, - "loss": 1.7002, - "step": 929000 - }, - { - "epoch": 13.38, - "learning_rate": 4.5221960857622074e-06, - "loss": 1.7009, - "step": 929500 - }, - { - "epoch": 13.39, - "learning_rate": 4.4702693440094715e-06, - "loss": 1.6987, - "step": 930000 - }, - { - "epoch": 13.39, - "learning_rate": 4.418446455740242e-06, - "loss": 1.7007, - "step": 930500 - }, - { - "epoch": 13.4, - "learning_rate": 4.366519713987507e-06, - "loss": 1.6964, - "step": 931000 - }, - { - "epoch": 13.41, - "learning_rate": 4.314592972234771e-06, - "loss": 1.6999, - "step": 931500 - }, - { - "epoch": 13.42, - "learning_rate": 4.262666230482036e-06, - "loss": 1.7002, - "step": 932000 - }, - { - "epoch": 13.42, - "learning_rate": 4.210843342212806e-06, - "loss": 1.6982, - "step": 932500 - }, - { - "epoch": 13.43, - "learning_rate": 4.158916600460071e-06, - "loss": 1.697, - "step": 933000 - }, - { - "epoch": 13.44, - "learning_rate": 4.106989858707336e-06, - "loss": 1.7032, - "step": 933500 - }, - { - "epoch": 13.44, - "learning_rate": 4.055063116954601e-06, - "loss": 1.7021, - "step": 934000 - }, - { - "epoch": 13.45, - "learning_rate": 4.003136375201865e-06, - "loss": 1.7011, - "step": 934500 - }, - { - "epoch": 13.46, - "learning_rate": 3.9513134869326356e-06, - "loss": 1.7007, - "step": 935000 - }, - { - "epoch": 13.47, - "learning_rate": 3.8993867451799005e-06, - "loss": 1.6997, - "step": 935500 - }, - { - "epoch": 13.47, - "learning_rate": 3.847460003427165e-06, - "loss": 1.7, - "step": 936000 - }, - { - "epoch": 13.48, - "learning_rate": 3.79553326167443e-06, - "loss": 1.7009, - "step": 936500 - }, - { - "epoch": 13.49, - "learning_rate": 3.7436065199216947e-06, - "loss": 1.698, - "step": 937000 - }, - { - "epoch": 13.49, - "learning_rate": 3.6917836316524652e-06, - "loss": 1.703, - "step": 937500 - }, - { - "epoch": 13.5, - "learning_rate": 3.6398568898997293e-06, - "loss": 1.6989, - "step": 938000 - }, - { - "epoch": 13.51, - "learning_rate": 3.5879301481469946e-06, - "loss": 1.6989, - "step": 938500 - }, - { - "epoch": 13.52, - "learning_rate": 3.5360034063942595e-06, - "loss": 1.6994, - "step": 939000 - }, - { - "epoch": 13.52, - "learning_rate": 3.484180518125029e-06, - "loss": 1.7032, - "step": 939500 - }, - { - "epoch": 13.53, - "learning_rate": 3.432253776372294e-06, - "loss": 1.7012, - "step": 940000 - }, - { - "epoch": 13.54, - "learning_rate": 3.380327034619559e-06, - "loss": 1.6949, - "step": 940500 - }, - { - "epoch": 13.54, - "learning_rate": 3.3284002928668234e-06, - "loss": 1.6991, - "step": 941000 - }, - { - "epoch": 13.55, - "learning_rate": 3.2764735511140883e-06, - "loss": 1.6962, - "step": 941500 - }, - { - "epoch": 13.56, - "learning_rate": 3.224650662844859e-06, - "loss": 1.6984, - "step": 942000 - }, - { - "epoch": 13.57, - "learning_rate": 3.1727239210921233e-06, - "loss": 1.6974, - "step": 942500 - }, - { - "epoch": 13.57, - "learning_rate": 3.120797179339388e-06, - "loss": 1.6989, - "step": 943000 - }, - { - "epoch": 13.58, - "learning_rate": 3.0688704375866527e-06, - "loss": 1.6986, - "step": 943500 - }, - { - "epoch": 13.59, - "learning_rate": 3.017047549317423e-06, - "loss": 1.6972, - "step": 944000 - }, - { - "epoch": 13.6, - "learning_rate": 2.9651208075646876e-06, - "loss": 1.6985, - "step": 944500 - }, - { - "epoch": 13.6, - "learning_rate": 2.913194065811953e-06, - "loss": 1.6983, - "step": 945000 - }, - { - "epoch": 13.61, - "learning_rate": 2.8612673240592174e-06, - "loss": 1.7019, - "step": 945500 - }, - { - "epoch": 13.62, - "learning_rate": 2.809444435789988e-06, - "loss": 1.6978, - "step": 946000 - }, - { - "epoch": 13.62, - "learning_rate": 2.7575176940372524e-06, - "loss": 1.6995, - "step": 946500 - }, - { - "epoch": 13.63, - "learning_rate": 2.705590952284517e-06, - "loss": 1.6951, - "step": 947000 - }, - { - "epoch": 13.64, - "learning_rate": 2.6536642105317818e-06, - "loss": 1.7, - "step": 947500 - }, - { - "epoch": 13.65, - "learning_rate": 2.6017374687790467e-06, - "loss": 1.6994, - "step": 948000 - }, - { - "epoch": 13.65, - "learning_rate": 2.5499145805098167e-06, - "loss": 1.6966, - "step": 948500 - }, - { - "epoch": 13.66, - "learning_rate": 2.4979878387570816e-06, - "loss": 1.6966, - "step": 949000 - }, - { - "epoch": 13.67, - "learning_rate": 2.4460610970043465e-06, - "loss": 1.697, - "step": 949500 - }, - { - "epoch": 13.67, - "learning_rate": 2.394134355251611e-06, - "loss": 1.6988, - "step": 950000 - }, - { - "epoch": 13.68, - "learning_rate": 2.342207613498876e-06, - "loss": 1.6992, - "step": 950500 - }, - { - "epoch": 13.69, - "learning_rate": 2.290280871746141e-06, - "loss": 1.6961, - "step": 951000 - }, - { - "epoch": 13.7, - "learning_rate": 2.238457983476911e-06, - "loss": 1.7023, - "step": 951500 - }, - { - "epoch": 13.7, - "learning_rate": 2.1865312417241758e-06, - "loss": 1.7008, - "step": 952000 - }, - { - "epoch": 13.71, - "learning_rate": 2.1346044999714403e-06, - "loss": 1.7013, - "step": 952500 - }, - { - "epoch": 13.72, - "learning_rate": 2.082677758218705e-06, - "loss": 1.6989, - "step": 953000 - }, - { - "epoch": 13.72, - "learning_rate": 2.0307510164659696e-06, - "loss": 1.6959, - "step": 953500 - }, - { - "epoch": 13.73, - "learning_rate": 1.97892812819674e-06, - "loss": 1.6983, - "step": 954000 - }, - { - "epoch": 13.74, - "learning_rate": 1.9270013864440046e-06, - "loss": 1.7009, - "step": 954500 - }, - { - "epoch": 13.75, - "learning_rate": 1.8750746446912697e-06, - "loss": 1.6965, - "step": 955000 - }, - { - "epoch": 13.75, - "learning_rate": 1.8231479029385344e-06, - "loss": 1.697, - "step": 955500 - }, - { - "epoch": 13.76, - "learning_rate": 1.7713250146693047e-06, - "loss": 1.6969, - "step": 956000 - }, - { - "epoch": 13.77, - "learning_rate": 1.7193982729165694e-06, - "loss": 1.6936, - "step": 956500 - }, - { - "epoch": 13.78, - "learning_rate": 1.667471531163834e-06, - "loss": 1.7003, - "step": 957000 - }, - { - "epoch": 13.78, - "learning_rate": 1.6155447894110987e-06, - "loss": 1.6998, - "step": 957500 - }, - { - "epoch": 13.79, - "learning_rate": 1.563721901141869e-06, - "loss": 1.6969, - "step": 958000 - }, - { - "epoch": 13.8, - "learning_rate": 1.511795159389134e-06, - "loss": 1.6965, - "step": 958500 - }, - { - "epoch": 13.8, - "learning_rate": 1.4598684176363986e-06, - "loss": 1.6967, - "step": 959000 - }, - { - "epoch": 13.81, - "learning_rate": 1.4079416758836633e-06, - "loss": 1.6951, - "step": 959500 - }, - { - "epoch": 13.82, - "learning_rate": 1.3561187876144336e-06, - "loss": 1.6965, - "step": 960000 - }, - { - "epoch": 13.83, - "learning_rate": 1.3041920458616985e-06, - "loss": 1.696, - "step": 960500 - }, - { - "epoch": 13.83, - "learning_rate": 1.2522653041089632e-06, - "loss": 1.6984, - "step": 961000 - }, - { - "epoch": 13.84, - "learning_rate": 1.2003385623562278e-06, - "loss": 1.6992, - "step": 961500 - }, - { - "epoch": 13.85, - "learning_rate": 1.1484118206034927e-06, - "loss": 1.6976, - "step": 962000 - }, - { - "epoch": 13.85, - "learning_rate": 1.0965889323342628e-06, - "loss": 1.6984, - "step": 962500 - }, - { - "epoch": 13.86, - "learning_rate": 1.0446621905815277e-06, - "loss": 1.6995, - "step": 963000 - }, - { - "epoch": 13.87, - "learning_rate": 9.927354488287924e-07, - "loss": 1.6993, - "step": 963500 - }, - { - "epoch": 13.88, - "learning_rate": 9.408087070760572e-07, - "loss": 1.6974, - "step": 964000 - }, - { - "epoch": 13.88, - "learning_rate": 8.889858188068274e-07, - "loss": 1.6952, - "step": 964500 - }, - { - "epoch": 13.89, - "learning_rate": 8.370590770540923e-07, - "loss": 1.6941, - "step": 965000 - }, - { - "epoch": 13.9, - "learning_rate": 7.851323353013568e-07, - "loss": 1.6948, - "step": 965500 - }, - { - "epoch": 13.9, - "learning_rate": 7.332055935486216e-07, - "loss": 1.6992, - "step": 966000 - }, - { - "epoch": 13.91, - "learning_rate": 6.812788517958863e-07, - "loss": 1.7009, - "step": 966500 - }, - { - "epoch": 13.92, - "learning_rate": 6.294559635266566e-07, - "loss": 1.6947, - "step": 967000 - }, - { - "epoch": 13.93, - "learning_rate": 5.775292217739214e-07, - "loss": 1.7006, - "step": 967500 - }, - { - "epoch": 13.93, - "learning_rate": 5.256024800211861e-07, - "loss": 1.7017, - "step": 968000 - }, - { - "epoch": 13.94, - "learning_rate": 4.736757382684509e-07, - "loss": 1.6951, - "step": 968500 - }, - { - "epoch": 13.95, - "learning_rate": 4.218528499992211e-07, - "loss": 1.6955, - "step": 969000 - }, - { - "epoch": 13.96, - "learning_rate": 3.6992610824648584e-07, - "loss": 1.6968, - "step": 969500 - }, - { - "epoch": 13.96, - "learning_rate": 3.1799936649375064e-07, - "loss": 1.6982, - "step": 970000 - }, - { - "epoch": 13.97, - "learning_rate": 2.660726247410154e-07, - "loss": 1.6923, - "step": 970500 - }, - { - "epoch": 13.98, - "learning_rate": 2.1414588298828015e-07, - "loss": 1.6925, - "step": 971000 - }, - { - "epoch": 13.98, - "learning_rate": 1.6232299471905037e-07, - "loss": 1.695, - "step": 971500 - }, - { - "epoch": 13.99, - "learning_rate": 1.1039625296631512e-07, - "loss": 1.6977, - "step": 972000 - }, - { - "epoch": 14.0, - "learning_rate": 5.846951121357988e-08, - "loss": 1.7, - "step": 972500 - }, - { - "epoch": 14.0, - "eval_accuracy": 0.6690181349948218, - "eval_loss": 1.5665007829666138, - "eval_runtime": 1297.3439, - "eval_samples_per_second": 415.416, - "eval_steps_per_second": 25.964, - "step": 972622 - } - ], - "max_steps": 972622, - "num_train_epochs": 14, - "total_flos": 7.363280880004055e+18, - "trial_name": null, - "trial_params": null -}