diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" deleted file mode 100644--- "a/last-checkpoint/trainer_state.json" +++ /dev/null @@ -1,11812 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 14.0, - "global_step": 972622, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 6.763399204600845e-05, - "loss": 6.9832, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 7.520427275451058e-05, - "loss": 5.5271, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 7.961254711978457e-05, - "loss": 4.6913, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 8.275270773909965e-05, - "loss": 4.2907, - "step": 2000 - }, - { - "epoch": 0.04, - "learning_rate": 8.517839539271491e-05, - "loss": 4.0302, - "step": 2500 - }, - { - "epoch": 0.04, - "learning_rate": 8.716825914421595e-05, - "loss": 3.8379, - "step": 3000 - }, - { - "epoch": 0.05, - "learning_rate": 8.884385574179297e-05, - "loss": 3.6906, - "step": 3500 - }, - { - "epoch": 0.06, - "learning_rate": 9.030114272368873e-05, - "loss": 3.5642, - "step": 4000 - }, - { - "epoch": 0.06, - "learning_rate": 9.158138540964539e-05, - "loss": 3.4704, - "step": 4500 - }, - { - "epoch": 0.07, - "learning_rate": 9.273119601765861e-05, - "loss": 3.3821, - "step": 5000 - }, - { - "epoch": 0.08, - "learning_rate": 9.376714826181023e-05, - "loss": 3.3064, - "step": 5500 - }, - { - "epoch": 0.09, - "learning_rate": 9.471669412880503e-05, - "loss": 3.2476, - "step": 6000 - }, - { - "epoch": 0.09, - "learning_rate": 9.558668751136338e-05, - "loss": 3.1892, - "step": 6500 - }, - { - "epoch": 0.1, - "learning_rate": 9.639540886232853e-05, - "loss": 3.1299, - "step": 7000 - }, - { - "epoch": 0.11, - "learning_rate": 9.714529240377096e-05, - "loss": 3.0869, - "step": 7500 - }, - { - "epoch": 0.12, - "learning_rate": 9.784957770827781e-05, - "loss": 3.0405, - "step": 8000 - }, - { - "epoch": 0.12, - "learning_rate": 9.850850144881638e-05, - "loss": 2.9989, - "step": 8500 - }, - { - "epoch": 0.13, - "learning_rate": 9.91322455339213e-05, - "loss": 2.9583, - "step": 9000 - }, - { - "epoch": 0.14, - "learning_rate": 9.97198937413398e-05, - "loss": 2.923, - "step": 9500 - }, - { - "epoch": 0.14, - "learning_rate": 9.997382892215664e-05, - "loss": 2.8859, - "step": 10000 - }, - { - "epoch": 0.15, - "learning_rate": 9.99221098873709e-05, - "loss": 2.8536, - "step": 10500 - }, - { - "epoch": 0.16, - "learning_rate": 9.987018314561817e-05, - "loss": 2.8222, - "step": 11000 - }, - { - "epoch": 0.17, - "learning_rate": 9.981846411083245e-05, - "loss": 2.7972, - "step": 11500 - }, - { - "epoch": 0.17, - "learning_rate": 9.97665373690797e-05, - "loss": 2.7657, - "step": 12000 - }, - { - "epoch": 0.18, - "learning_rate": 9.971481833429398e-05, - "loss": 2.7461, - "step": 12500 - }, - { - "epoch": 0.19, - "learning_rate": 9.966289159254125e-05, - "loss": 2.721, - "step": 13000 - }, - { - "epoch": 0.19, - "learning_rate": 9.961117255775553e-05, - "loss": 2.7012, - "step": 13500 - }, - { - "epoch": 0.2, - "learning_rate": 9.95592458160028e-05, - "loss": 2.6758, - "step": 14000 - }, - { - "epoch": 0.21, - "learning_rate": 9.950752678121706e-05, - "loss": 2.666, - "step": 14500 - }, - { - "epoch": 0.22, - "learning_rate": 9.945560003946433e-05, - "loss": 2.6412, - "step": 15000 - }, - { - "epoch": 0.22, - "learning_rate": 9.94038810046786e-05, - "loss": 2.6264, - "step": 15500 - }, - { - "epoch": 0.23, - "learning_rate": 9.935195426292587e-05, - "loss": 2.6089, - "step": 16000 - }, - { - "epoch": 0.24, - "learning_rate": 9.930023522814015e-05, - "loss": 2.5915, - "step": 16500 - }, - { - "epoch": 0.24, - "learning_rate": 9.92483084863874e-05, - "loss": 2.5783, - "step": 17000 - }, - { - "epoch": 0.25, - "learning_rate": 9.919658945160168e-05, - "loss": 2.558, - "step": 17500 - }, - { - "epoch": 0.26, - "learning_rate": 9.914466270984895e-05, - "loss": 2.5481, - "step": 18000 - }, - { - "epoch": 0.27, - "learning_rate": 9.909294367506321e-05, - "loss": 2.5376, - "step": 18500 - }, - { - "epoch": 0.27, - "learning_rate": 9.90410169333105e-05, - "loss": 2.5236, - "step": 19000 - }, - { - "epoch": 0.28, - "learning_rate": 9.898929789852477e-05, - "loss": 2.5129, - "step": 19500 - }, - { - "epoch": 0.29, - "learning_rate": 9.893737115677203e-05, - "loss": 2.5024, - "step": 20000 - }, - { - "epoch": 0.3, - "learning_rate": 9.88856521219863e-05, - "loss": 2.4841, - "step": 20500 - }, - { - "epoch": 0.3, - "learning_rate": 9.883372538023356e-05, - "loss": 2.4802, - "step": 21000 - }, - { - "epoch": 0.31, - "learning_rate": 9.878200634544784e-05, - "loss": 2.4725, - "step": 21500 - }, - { - "epoch": 0.32, - "learning_rate": 9.873007960369511e-05, - "loss": 2.4581, - "step": 22000 - }, - { - "epoch": 0.32, - "learning_rate": 9.867836056890939e-05, - "loss": 2.4493, - "step": 22500 - }, - { - "epoch": 0.33, - "learning_rate": 9.862643382715665e-05, - "loss": 2.4399, - "step": 23000 - }, - { - "epoch": 0.34, - "learning_rate": 9.857471479237093e-05, - "loss": 2.4297, - "step": 23500 - }, - { - "epoch": 0.35, - "learning_rate": 9.85227880506182e-05, - "loss": 2.4259, - "step": 24000 - }, - { - "epoch": 0.35, - "learning_rate": 9.847106901583248e-05, - "loss": 2.4199, - "step": 24500 - }, - { - "epoch": 0.36, - "learning_rate": 9.841914227407973e-05, - "loss": 2.4068, - "step": 25000 - }, - { - "epoch": 0.37, - "learning_rate": 9.836742323929401e-05, - "loss": 2.4023, - "step": 25500 - }, - { - "epoch": 0.37, - "learning_rate": 9.831549649754126e-05, - "loss": 2.3903, - "step": 26000 - }, - { - "epoch": 0.38, - "learning_rate": 9.826377746275554e-05, - "loss": 2.3849, - "step": 26500 - }, - { - "epoch": 0.39, - "learning_rate": 9.821185072100282e-05, - "loss": 2.3787, - "step": 27000 - }, - { - "epoch": 0.4, - "learning_rate": 9.816013168621709e-05, - "loss": 2.3722, - "step": 27500 - }, - { - "epoch": 0.4, - "learning_rate": 9.810820494446436e-05, - "loss": 2.3646, - "step": 28000 - }, - { - "epoch": 0.41, - "learning_rate": 9.805648590967863e-05, - "loss": 2.3537, - "step": 28500 - }, - { - "epoch": 0.42, - "learning_rate": 9.800455916792589e-05, - "loss": 2.353, - "step": 29000 - }, - { - "epoch": 0.42, - "learning_rate": 9.795284013314017e-05, - "loss": 2.3468, - "step": 29500 - }, - { - "epoch": 0.43, - "learning_rate": 9.790091339138744e-05, - "loss": 2.3371, - "step": 30000 - }, - { - "epoch": 0.44, - "learning_rate": 9.784919435660171e-05, - "loss": 2.3346, - "step": 30500 - }, - { - "epoch": 0.45, - "learning_rate": 9.779726761484898e-05, - "loss": 2.3244, - "step": 31000 - }, - { - "epoch": 0.45, - "learning_rate": 9.774554858006325e-05, - "loss": 2.3233, - "step": 31500 - }, - { - "epoch": 0.46, - "learning_rate": 9.769362183831051e-05, - "loss": 2.3214, - "step": 32000 - }, - { - "epoch": 0.47, - "learning_rate": 9.764190280352479e-05, - "loss": 2.312, - "step": 32500 - }, - { - "epoch": 0.48, - "learning_rate": 9.758997606177206e-05, - "loss": 2.3099, - "step": 33000 - }, - { - "epoch": 0.48, - "learning_rate": 9.753825702698634e-05, - "loss": 2.3025, - "step": 33500 - }, - { - "epoch": 0.49, - "learning_rate": 9.748633028523359e-05, - "loss": 2.2935, - "step": 34000 - }, - { - "epoch": 0.5, - "learning_rate": 9.743461125044787e-05, - "loss": 2.2924, - "step": 34500 - }, - { - "epoch": 0.5, - "learning_rate": 9.738268450869514e-05, - "loss": 2.2895, - "step": 35000 - }, - { - "epoch": 0.51, - "learning_rate": 9.73309654739094e-05, - "loss": 2.2806, - "step": 35500 - }, - { - "epoch": 0.52, - "learning_rate": 9.727903873215668e-05, - "loss": 2.2815, - "step": 36000 - }, - { - "epoch": 0.53, - "learning_rate": 9.722731969737096e-05, - "loss": 2.2735, - "step": 36500 - }, - { - "epoch": 0.53, - "learning_rate": 9.717539295561822e-05, - "loss": 2.2697, - "step": 37000 - }, - { - "epoch": 0.54, - "learning_rate": 9.71236739208325e-05, - "loss": 2.2686, - "step": 37500 - }, - { - "epoch": 0.55, - "learning_rate": 9.707174717907976e-05, - "loss": 2.2617, - "step": 38000 - }, - { - "epoch": 0.55, - "learning_rate": 9.702002814429404e-05, - "loss": 2.2561, - "step": 38500 - }, - { - "epoch": 0.56, - "learning_rate": 9.69681014025413e-05, - "loss": 2.2533, - "step": 39000 - }, - { - "epoch": 0.57, - "learning_rate": 9.691638236775557e-05, - "loss": 2.2484, - "step": 39500 - }, - { - "epoch": 0.58, - "learning_rate": 9.686445562600284e-05, - "loss": 2.2471, - "step": 40000 - }, - { - "epoch": 0.58, - "learning_rate": 9.681273659121712e-05, - "loss": 2.2443, - "step": 40500 - }, - { - "epoch": 0.59, - "learning_rate": 9.676080984946439e-05, - "loss": 2.2331, - "step": 41000 - }, - { - "epoch": 0.6, - "learning_rate": 9.670909081467867e-05, - "loss": 2.2349, - "step": 41500 - }, - { - "epoch": 0.6, - "learning_rate": 9.665716407292592e-05, - "loss": 2.2296, - "step": 42000 - }, - { - "epoch": 0.61, - "learning_rate": 9.66054450381402e-05, - "loss": 2.2296, - "step": 42500 - }, - { - "epoch": 0.62, - "learning_rate": 9.655351829638745e-05, - "loss": 2.2238, - "step": 43000 - }, - { - "epoch": 0.63, - "learning_rate": 9.650179926160173e-05, - "loss": 2.2193, - "step": 43500 - }, - { - "epoch": 0.63, - "learning_rate": 9.6449872519849e-05, - "loss": 2.2185, - "step": 44000 - }, - { - "epoch": 0.64, - "learning_rate": 9.639815348506328e-05, - "loss": 2.2157, - "step": 44500 - }, - { - "epoch": 0.65, - "learning_rate": 9.634622674331054e-05, - "loss": 2.2092, - "step": 45000 - }, - { - "epoch": 0.65, - "learning_rate": 9.629450770852482e-05, - "loss": 2.2096, - "step": 45500 - }, - { - "epoch": 0.66, - "learning_rate": 9.624258096677208e-05, - "loss": 2.2026, - "step": 46000 - }, - { - "epoch": 0.67, - "learning_rate": 9.619086193198635e-05, - "loss": 2.1971, - "step": 46500 - }, - { - "epoch": 0.68, - "learning_rate": 9.613893519023362e-05, - "loss": 2.1962, - "step": 47000 - }, - { - "epoch": 0.68, - "learning_rate": 9.60872161554479e-05, - "loss": 2.1918, - "step": 47500 - }, - { - "epoch": 0.69, - "learning_rate": 9.603528941369516e-05, - "loss": 2.1932, - "step": 48000 - }, - { - "epoch": 0.7, - "learning_rate": 9.598357037890943e-05, - "loss": 2.1844, - "step": 48500 - }, - { - "epoch": 0.71, - "learning_rate": 9.59316436371567e-05, - "loss": 2.1872, - "step": 49000 - }, - { - "epoch": 0.71, - "learning_rate": 9.587992460237098e-05, - "loss": 2.1781, - "step": 49500 - }, - { - "epoch": 0.72, - "learning_rate": 9.582799786061825e-05, - "loss": 2.1797, - "step": 50000 - }, - { - "epoch": 0.73, - "learning_rate": 9.577627882583253e-05, - "loss": 2.1793, - "step": 50500 - }, - { - "epoch": 0.73, - "learning_rate": 9.572435208407978e-05, - "loss": 2.1736, - "step": 51000 - }, - { - "epoch": 0.74, - "learning_rate": 9.567263304929406e-05, - "loss": 2.1723, - "step": 51500 - }, - { - "epoch": 0.75, - "learning_rate": 9.562070630754133e-05, - "loss": 2.1732, - "step": 52000 - }, - { - "epoch": 0.76, - "learning_rate": 9.55689872727556e-05, - "loss": 2.1654, - "step": 52500 - }, - { - "epoch": 0.76, - "learning_rate": 9.551706053100287e-05, - "loss": 2.1678, - "step": 53000 - }, - { - "epoch": 0.77, - "learning_rate": 9.546534149621714e-05, - "loss": 2.1632, - "step": 53500 - }, - { - "epoch": 0.78, - "learning_rate": 9.54134147544644e-05, - "loss": 2.158, - "step": 54000 - }, - { - "epoch": 0.78, - "learning_rate": 9.536169571967868e-05, - "loss": 2.1551, - "step": 54500 - }, - { - "epoch": 0.79, - "learning_rate": 9.530976897792595e-05, - "loss": 2.1553, - "step": 55000 - }, - { - "epoch": 0.8, - "learning_rate": 9.525804994314023e-05, - "loss": 2.1469, - "step": 55500 - }, - { - "epoch": 0.81, - "learning_rate": 9.520612320138748e-05, - "loss": 2.1517, - "step": 56000 - }, - { - "epoch": 0.81, - "learning_rate": 9.515440416660176e-05, - "loss": 2.1499, - "step": 56500 - }, - { - "epoch": 0.82, - "learning_rate": 9.510247742484903e-05, - "loss": 2.144, - "step": 57000 - }, - { - "epoch": 0.83, - "learning_rate": 9.50507583900633e-05, - "loss": 2.1478, - "step": 57500 - }, - { - "epoch": 0.83, - "learning_rate": 9.499883164831058e-05, - "loss": 2.1369, - "step": 58000 - }, - { - "epoch": 0.84, - "learning_rate": 9.494711261352485e-05, - "loss": 2.1394, - "step": 58500 - }, - { - "epoch": 0.85, - "learning_rate": 9.489518587177211e-05, - "loss": 2.1391, - "step": 59000 - }, - { - "epoch": 0.86, - "learning_rate": 9.484346683698639e-05, - "loss": 2.1344, - "step": 59500 - }, - { - "epoch": 0.86, - "learning_rate": 9.479154009523364e-05, - "loss": 2.1371, - "step": 60000 - }, - { - "epoch": 0.87, - "learning_rate": 9.473982106044792e-05, - "loss": 2.1289, - "step": 60500 - }, - { - "epoch": 0.88, - "learning_rate": 9.468789431869519e-05, - "loss": 2.1271, - "step": 61000 - }, - { - "epoch": 0.89, - "learning_rate": 9.463617528390946e-05, - "loss": 2.1276, - "step": 61500 - }, - { - "epoch": 0.89, - "learning_rate": 9.458424854215673e-05, - "loss": 2.1255, - "step": 62000 - }, - { - "epoch": 0.9, - "learning_rate": 9.453252950737101e-05, - "loss": 2.1192, - "step": 62500 - }, - { - "epoch": 0.91, - "learning_rate": 9.448060276561828e-05, - "loss": 2.1166, - "step": 63000 - }, - { - "epoch": 0.91, - "learning_rate": 9.442888373083256e-05, - "loss": 2.1171, - "step": 63500 - }, - { - "epoch": 0.92, - "learning_rate": 9.437695698907981e-05, - "loss": 2.1127, - "step": 64000 - }, - { - "epoch": 0.93, - "learning_rate": 9.432523795429409e-05, - "loss": 2.1116, - "step": 64500 - }, - { - "epoch": 0.94, - "learning_rate": 9.427331121254134e-05, - "loss": 2.1121, - "step": 65000 - }, - { - "epoch": 0.94, - "learning_rate": 9.422159217775562e-05, - "loss": 2.1093, - "step": 65500 - }, - { - "epoch": 0.95, - "learning_rate": 9.41696654360029e-05, - "loss": 2.1103, - "step": 66000 - }, - { - "epoch": 0.96, - "learning_rate": 9.411794640121717e-05, - "loss": 2.1073, - "step": 66500 - }, - { - "epoch": 0.96, - "learning_rate": 9.406601965946444e-05, - "loss": 2.1057, - "step": 67000 - }, - { - "epoch": 0.97, - "learning_rate": 9.401430062467871e-05, - "loss": 2.1012, - "step": 67500 - }, - { - "epoch": 0.98, - "learning_rate": 9.396237388292597e-05, - "loss": 2.1012, - "step": 68000 - }, - { - "epoch": 0.99, - "learning_rate": 9.391065484814025e-05, - "loss": 2.1041, - "step": 68500 - }, - { - "epoch": 0.99, - "learning_rate": 9.385872810638751e-05, - "loss": 2.0974, - "step": 69000 - }, - { - "epoch": 1.0, - "eval_accuracy": 0.6071486253641302, - "eval_loss": 1.974609375, - "eval_runtime": 652.8749, - "eval_samples_per_second": 825.483, - "eval_steps_per_second": 34.396, - "step": 69473 - }, - { - "epoch": 1.0, - "learning_rate": 9.380700907160179e-05, - "loss": 2.0964, - "step": 69500 - }, - { - "epoch": 1.01, - "learning_rate": 9.375508232984905e-05, - "loss": 2.094, - "step": 70000 - }, - { - "epoch": 1.01, - "learning_rate": 9.370336329506332e-05, - "loss": 2.09, - "step": 70500 - }, - { - "epoch": 1.02, - "learning_rate": 9.365143655331059e-05, - "loss": 2.0926, - "step": 71000 - }, - { - "epoch": 1.03, - "learning_rate": 9.359971751852487e-05, - "loss": 2.0925, - "step": 71500 - }, - { - "epoch": 1.04, - "learning_rate": 9.354779077677214e-05, - "loss": 2.0854, - "step": 72000 - }, - { - "epoch": 1.04, - "learning_rate": 9.349607174198642e-05, - "loss": 2.086, - "step": 72500 - }, - { - "epoch": 1.05, - "learning_rate": 9.344414500023367e-05, - "loss": 2.0858, - "step": 73000 - }, - { - "epoch": 1.06, - "learning_rate": 9.339242596544795e-05, - "loss": 2.0805, - "step": 73500 - }, - { - "epoch": 1.07, - "learning_rate": 9.33404992236952e-05, - "loss": 2.0817, - "step": 74000 - }, - { - "epoch": 1.07, - "learning_rate": 9.328878018890948e-05, - "loss": 2.0814, - "step": 74500 - }, - { - "epoch": 1.08, - "learning_rate": 9.323685344715676e-05, - "loss": 2.0786, - "step": 75000 - }, - { - "epoch": 1.09, - "learning_rate": 9.318513441237103e-05, - "loss": 2.0745, - "step": 75500 - }, - { - "epoch": 1.09, - "learning_rate": 9.31332076706183e-05, - "loss": 2.0713, - "step": 76000 - }, - { - "epoch": 1.1, - "learning_rate": 9.308148863583257e-05, - "loss": 2.0749, - "step": 76500 - }, - { - "epoch": 1.11, - "learning_rate": 9.302956189407984e-05, - "loss": 2.0731, - "step": 77000 - }, - { - "epoch": 1.12, - "learning_rate": 9.297784285929412e-05, - "loss": 2.0711, - "step": 77500 - }, - { - "epoch": 1.12, - "learning_rate": 9.292591611754137e-05, - "loss": 2.0667, - "step": 78000 - }, - { - "epoch": 1.13, - "learning_rate": 9.287419708275565e-05, - "loss": 2.068, - "step": 78500 - }, - { - "epoch": 1.14, - "learning_rate": 9.282227034100292e-05, - "loss": 2.0654, - "step": 79000 - }, - { - "epoch": 1.14, - "learning_rate": 9.277055130621718e-05, - "loss": 2.0655, - "step": 79500 - }, - { - "epoch": 1.15, - "learning_rate": 9.271862456446447e-05, - "loss": 2.062, - "step": 80000 - }, - { - "epoch": 1.16, - "learning_rate": 9.266690552967874e-05, - "loss": 2.0595, - "step": 80500 - }, - { - "epoch": 1.17, - "learning_rate": 9.2614978787926e-05, - "loss": 2.0599, - "step": 81000 - }, - { - "epoch": 1.17, - "learning_rate": 9.256325975314028e-05, - "loss": 2.0633, - "step": 81500 - }, - { - "epoch": 1.18, - "learning_rate": 9.251133301138753e-05, - "loss": 2.0601, - "step": 82000 - }, - { - "epoch": 1.19, - "learning_rate": 9.245961397660181e-05, - "loss": 2.0581, - "step": 82500 - }, - { - "epoch": 1.19, - "learning_rate": 9.240768723484908e-05, - "loss": 2.0552, - "step": 83000 - }, - { - "epoch": 1.2, - "learning_rate": 9.235596820006336e-05, - "loss": 2.0571, - "step": 83500 - }, - { - "epoch": 1.21, - "learning_rate": 9.230404145831062e-05, - "loss": 2.0519, - "step": 84000 - }, - { - "epoch": 1.22, - "learning_rate": 9.22523224235249e-05, - "loss": 2.0507, - "step": 84500 - }, - { - "epoch": 1.22, - "learning_rate": 9.220039568177216e-05, - "loss": 2.0483, - "step": 85000 - }, - { - "epoch": 1.23, - "learning_rate": 9.214867664698643e-05, - "loss": 2.046, - "step": 85500 - }, - { - "epoch": 1.24, - "learning_rate": 9.20967499052337e-05, - "loss": 2.0438, - "step": 86000 - }, - { - "epoch": 1.25, - "learning_rate": 9.204503087044798e-05, - "loss": 2.0519, - "step": 86500 - }, - { - "epoch": 1.25, - "learning_rate": 9.199310412869523e-05, - "loss": 2.0494, - "step": 87000 - }, - { - "epoch": 1.26, - "learning_rate": 9.194138509390951e-05, - "loss": 2.0425, - "step": 87500 - }, - { - "epoch": 1.27, - "learning_rate": 9.188945835215678e-05, - "loss": 2.0423, - "step": 88000 - }, - { - "epoch": 1.27, - "learning_rate": 9.183773931737106e-05, - "loss": 2.0402, - "step": 88500 - }, - { - "epoch": 1.28, - "learning_rate": 9.178581257561833e-05, - "loss": 2.0439, - "step": 89000 - }, - { - "epoch": 1.29, - "learning_rate": 9.17340935408326e-05, - "loss": 2.0391, - "step": 89500 - }, - { - "epoch": 1.3, - "learning_rate": 9.168216679907986e-05, - "loss": 2.0378, - "step": 90000 - }, - { - "epoch": 1.3, - "learning_rate": 9.163044776429414e-05, - "loss": 2.0397, - "step": 90500 - }, - { - "epoch": 1.31, - "learning_rate": 9.15785210225414e-05, - "loss": 2.0361, - "step": 91000 - }, - { - "epoch": 1.32, - "learning_rate": 9.152680198775568e-05, - "loss": 2.0351, - "step": 91500 - }, - { - "epoch": 1.32, - "learning_rate": 9.147487524600295e-05, - "loss": 2.0312, - "step": 92000 - }, - { - "epoch": 1.33, - "learning_rate": 9.142315621121722e-05, - "loss": 2.0313, - "step": 92500 - }, - { - "epoch": 1.34, - "learning_rate": 9.137122946946448e-05, - "loss": 2.0284, - "step": 93000 - }, - { - "epoch": 1.35, - "learning_rate": 9.131951043467876e-05, - "loss": 2.028, - "step": 93500 - }, - { - "epoch": 1.35, - "learning_rate": 9.126758369292603e-05, - "loss": 2.0306, - "step": 94000 - }, - { - "epoch": 1.36, - "learning_rate": 9.121586465814031e-05, - "loss": 2.0318, - "step": 94500 - }, - { - "epoch": 1.37, - "learning_rate": 9.116393791638756e-05, - "loss": 2.0303, - "step": 95000 - }, - { - "epoch": 1.37, - "learning_rate": 9.111221888160184e-05, - "loss": 2.0246, - "step": 95500 - }, - { - "epoch": 1.38, - "learning_rate": 9.106029213984911e-05, - "loss": 2.0279, - "step": 96000 - }, - { - "epoch": 1.39, - "learning_rate": 9.100857310506337e-05, - "loss": 2.0274, - "step": 96500 - }, - { - "epoch": 1.4, - "learning_rate": 9.095664636331065e-05, - "loss": 2.0175, - "step": 97000 - }, - { - "epoch": 1.4, - "learning_rate": 9.090492732852492e-05, - "loss": 2.0204, - "step": 97500 - }, - { - "epoch": 1.41, - "learning_rate": 9.085300058677219e-05, - "loss": 2.0237, - "step": 98000 - }, - { - "epoch": 1.42, - "learning_rate": 9.080128155198646e-05, - "loss": 2.0192, - "step": 98500 - }, - { - "epoch": 1.43, - "learning_rate": 9.074935481023372e-05, - "loss": 2.0134, - "step": 99000 - }, - { - "epoch": 1.43, - "learning_rate": 9.0697635775448e-05, - "loss": 2.0191, - "step": 99500 - }, - { - "epoch": 1.44, - "learning_rate": 9.064570903369527e-05, - "loss": 2.0198, - "step": 100000 - }, - { - "epoch": 1.45, - "learning_rate": 9.059398999890954e-05, - "loss": 2.0122, - "step": 100500 - }, - { - "epoch": 1.45, - "learning_rate": 9.054206325715681e-05, - "loss": 2.014, - "step": 101000 - }, - { - "epoch": 1.46, - "learning_rate": 9.049034422237108e-05, - "loss": 2.0123, - "step": 101500 - }, - { - "epoch": 1.47, - "learning_rate": 9.043841748061834e-05, - "loss": 2.0093, - "step": 102000 - }, - { - "epoch": 1.48, - "learning_rate": 9.038669844583262e-05, - "loss": 2.0113, - "step": 102500 - }, - { - "epoch": 1.48, - "learning_rate": 9.033477170407989e-05, - "loss": 2.0137, - "step": 103000 - }, - { - "epoch": 1.49, - "learning_rate": 9.028305266929417e-05, - "loss": 2.0062, - "step": 103500 - }, - { - "epoch": 1.5, - "learning_rate": 9.023112592754142e-05, - "loss": 2.0074, - "step": 104000 - }, - { - "epoch": 1.5, - "learning_rate": 9.01794068927557e-05, - "loss": 2.0021, - "step": 104500 - }, - { - "epoch": 1.51, - "learning_rate": 9.012748015100297e-05, - "loss": 2.0069, - "step": 105000 - }, - { - "epoch": 1.52, - "learning_rate": 9.007576111621725e-05, - "loss": 2.0073, - "step": 105500 - }, - { - "epoch": 1.53, - "learning_rate": 9.002383437446451e-05, - "loss": 2.0059, - "step": 106000 - }, - { - "epoch": 1.53, - "learning_rate": 8.997211533967879e-05, - "loss": 2.0013, - "step": 106500 - }, - { - "epoch": 1.54, - "learning_rate": 8.992018859792605e-05, - "loss": 2.0013, - "step": 107000 - }, - { - "epoch": 1.55, - "learning_rate": 8.986846956314032e-05, - "loss": 2.0043, - "step": 107500 - }, - { - "epoch": 1.55, - "learning_rate": 8.981654282138759e-05, - "loss": 2.0009, - "step": 108000 - }, - { - "epoch": 1.56, - "learning_rate": 8.976482378660187e-05, - "loss": 2.0021, - "step": 108500 - }, - { - "epoch": 1.57, - "learning_rate": 8.971289704484913e-05, - "loss": 2.0004, - "step": 109000 - }, - { - "epoch": 1.58, - "learning_rate": 8.96611780100634e-05, - "loss": 1.9988, - "step": 109500 - }, - { - "epoch": 1.58, - "learning_rate": 8.960925126831067e-05, - "loss": 2.0009, - "step": 110000 - }, - { - "epoch": 1.59, - "learning_rate": 8.955753223352495e-05, - "loss": 1.9925, - "step": 110500 - }, - { - "epoch": 1.6, - "learning_rate": 8.950560549177222e-05, - "loss": 1.9919, - "step": 111000 - }, - { - "epoch": 1.6, - "learning_rate": 8.94538864569865e-05, - "loss": 1.9969, - "step": 111500 - }, - { - "epoch": 1.61, - "learning_rate": 8.940195971523375e-05, - "loss": 1.9928, - "step": 112000 - }, - { - "epoch": 1.62, - "learning_rate": 8.935024068044803e-05, - "loss": 1.996, - "step": 112500 - }, - { - "epoch": 1.63, - "learning_rate": 8.929831393869528e-05, - "loss": 1.9906, - "step": 113000 - }, - { - "epoch": 1.63, - "learning_rate": 8.924659490390956e-05, - "loss": 1.9976, - "step": 113500 - }, - { - "epoch": 1.64, - "learning_rate": 8.919466816215684e-05, - "loss": 1.9927, - "step": 114000 - }, - { - "epoch": 1.65, - "learning_rate": 8.91429491273711e-05, - "loss": 1.9937, - "step": 114500 - }, - { - "epoch": 1.66, - "learning_rate": 8.909102238561837e-05, - "loss": 1.9887, - "step": 115000 - }, - { - "epoch": 1.66, - "learning_rate": 8.903930335083265e-05, - "loss": 1.9878, - "step": 115500 - }, - { - "epoch": 1.67, - "learning_rate": 8.898737660907991e-05, - "loss": 1.992, - "step": 116000 - }, - { - "epoch": 1.68, - "learning_rate": 8.89356575742942e-05, - "loss": 1.9836, - "step": 116500 - }, - { - "epoch": 1.68, - "learning_rate": 8.888373083254145e-05, - "loss": 1.987, - "step": 117000 - }, - { - "epoch": 1.69, - "learning_rate": 8.883201179775573e-05, - "loss": 1.9801, - "step": 117500 - }, - { - "epoch": 1.7, - "learning_rate": 8.8780085056003e-05, - "loss": 1.9816, - "step": 118000 - }, - { - "epoch": 1.71, - "learning_rate": 8.872836602121726e-05, - "loss": 1.9911, - "step": 118500 - }, - { - "epoch": 1.71, - "learning_rate": 8.867643927946455e-05, - "loss": 1.9876, - "step": 119000 - }, - { - "epoch": 1.72, - "learning_rate": 8.862472024467881e-05, - "loss": 1.9835, - "step": 119500 - }, - { - "epoch": 1.73, - "learning_rate": 8.857279350292608e-05, - "loss": 1.9814, - "step": 120000 - }, - { - "epoch": 1.73, - "learning_rate": 8.852107446814036e-05, - "loss": 1.9809, - "step": 120500 - }, - { - "epoch": 1.74, - "learning_rate": 8.846914772638761e-05, - "loss": 1.9794, - "step": 121000 - }, - { - "epoch": 1.75, - "learning_rate": 8.841742869160189e-05, - "loss": 1.9818, - "step": 121500 - }, - { - "epoch": 1.76, - "learning_rate": 8.836550194984916e-05, - "loss": 1.977, - "step": 122000 - }, - { - "epoch": 1.76, - "learning_rate": 8.831378291506343e-05, - "loss": 1.9787, - "step": 122500 - }, - { - "epoch": 1.77, - "learning_rate": 8.82618561733107e-05, - "loss": 1.9769, - "step": 123000 - }, - { - "epoch": 1.78, - "learning_rate": 8.821013713852497e-05, - "loss": 1.9777, - "step": 123500 - }, - { - "epoch": 1.78, - "learning_rate": 8.815821039677223e-05, - "loss": 1.9745, - "step": 124000 - }, - { - "epoch": 1.79, - "learning_rate": 8.810649136198651e-05, - "loss": 1.974, - "step": 124500 - }, - { - "epoch": 1.8, - "learning_rate": 8.805456462023378e-05, - "loss": 1.9743, - "step": 125000 - }, - { - "epoch": 1.81, - "learning_rate": 8.800284558544806e-05, - "loss": 1.9734, - "step": 125500 - }, - { - "epoch": 1.81, - "learning_rate": 8.795091884369531e-05, - "loss": 1.9779, - "step": 126000 - }, - { - "epoch": 1.82, - "learning_rate": 8.789919980890959e-05, - "loss": 1.9774, - "step": 126500 - }, - { - "epoch": 1.83, - "learning_rate": 8.784727306715686e-05, - "loss": 1.9762, - "step": 127000 - }, - { - "epoch": 1.84, - "learning_rate": 8.779555403237112e-05, - "loss": 1.971, - "step": 127500 - }, - { - "epoch": 1.84, - "learning_rate": 8.77436272906184e-05, - "loss": 1.9664, - "step": 128000 - }, - { - "epoch": 1.85, - "learning_rate": 8.769190825583268e-05, - "loss": 1.969, - "step": 128500 - }, - { - "epoch": 1.86, - "learning_rate": 8.763998151407994e-05, - "loss": 1.9673, - "step": 129000 - }, - { - "epoch": 1.86, - "learning_rate": 8.758826247929422e-05, - "loss": 1.9667, - "step": 129500 - }, - { - "epoch": 1.87, - "learning_rate": 8.753633573754148e-05, - "loss": 1.9699, - "step": 130000 - }, - { - "epoch": 1.88, - "learning_rate": 8.748461670275576e-05, - "loss": 1.9691, - "step": 130500 - }, - { - "epoch": 1.89, - "learning_rate": 8.743268996100302e-05, - "loss": 1.9673, - "step": 131000 - }, - { - "epoch": 1.89, - "learning_rate": 8.73809709262173e-05, - "loss": 1.9623, - "step": 131500 - }, - { - "epoch": 1.9, - "learning_rate": 8.732904418446456e-05, - "loss": 1.963, - "step": 132000 - }, - { - "epoch": 1.91, - "learning_rate": 8.727732514967884e-05, - "loss": 1.9629, - "step": 132500 - }, - { - "epoch": 1.91, - "learning_rate": 8.722539840792611e-05, - "loss": 1.967, - "step": 133000 - }, - { - "epoch": 1.92, - "learning_rate": 8.717367937314039e-05, - "loss": 1.9657, - "step": 133500 - }, - { - "epoch": 1.93, - "learning_rate": 8.712175263138764e-05, - "loss": 1.9658, - "step": 134000 - }, - { - "epoch": 1.94, - "learning_rate": 8.707003359660192e-05, - "loss": 1.9617, - "step": 134500 - }, - { - "epoch": 1.94, - "learning_rate": 8.701810685484917e-05, - "loss": 1.9596, - "step": 135000 - }, - { - "epoch": 1.95, - "learning_rate": 8.696638782006345e-05, - "loss": 1.9639, - "step": 135500 - }, - { - "epoch": 1.96, - "learning_rate": 8.691446107831073e-05, - "loss": 1.9594, - "step": 136000 - }, - { - "epoch": 1.96, - "learning_rate": 8.6862742043525e-05, - "loss": 1.9596, - "step": 136500 - }, - { - "epoch": 1.97, - "learning_rate": 8.681081530177227e-05, - "loss": 1.9588, - "step": 137000 - }, - { - "epoch": 1.98, - "learning_rate": 8.675909626698654e-05, - "loss": 1.9524, - "step": 137500 - }, - { - "epoch": 1.99, - "learning_rate": 8.67071695252338e-05, - "loss": 1.9547, - "step": 138000 - }, - { - "epoch": 1.99, - "learning_rate": 8.665545049044808e-05, - "loss": 1.9586, - "step": 138500 - }, - { - "epoch": 2.0, - "eval_accuracy": 0.6284000055251463, - "eval_loss": 1.830078125, - "eval_runtime": 654.5639, - "eval_samples_per_second": 823.353, - "eval_steps_per_second": 34.307, - "step": 138946 - }, - { - "epoch": 2.0, - "learning_rate": 8.660352374869534e-05, - "loss": 1.9514, - "step": 139000 - }, - { - "epoch": 2.01, - "learning_rate": 8.655180471390962e-05, - "loss": 1.9524, - "step": 139500 - }, - { - "epoch": 2.02, - "learning_rate": 8.649987797215689e-05, - "loss": 1.9539, - "step": 140000 - }, - { - "epoch": 2.02, - "learning_rate": 8.644815893737115e-05, - "loss": 1.9484, - "step": 140500 - }, - { - "epoch": 2.03, - "learning_rate": 8.639623219561842e-05, - "loss": 1.9489, - "step": 141000 - }, - { - "epoch": 2.04, - "learning_rate": 8.63445131608327e-05, - "loss": 1.954, - "step": 141500 - }, - { - "epoch": 2.04, - "learning_rate": 8.629258641907997e-05, - "loss": 1.9527, - "step": 142000 - }, - { - "epoch": 2.05, - "learning_rate": 8.624086738429425e-05, - "loss": 1.9509, - "step": 142500 - }, - { - "epoch": 2.06, - "learning_rate": 8.61889406425415e-05, - "loss": 1.9507, - "step": 143000 - }, - { - "epoch": 2.07, - "learning_rate": 8.613722160775578e-05, - "loss": 1.9465, - "step": 143500 - }, - { - "epoch": 2.07, - "learning_rate": 8.608529486600305e-05, - "loss": 1.9438, - "step": 144000 - }, - { - "epoch": 2.08, - "learning_rate": 8.603357583121732e-05, - "loss": 1.9451, - "step": 144500 - }, - { - "epoch": 2.09, - "learning_rate": 8.59816490894646e-05, - "loss": 1.9469, - "step": 145000 - }, - { - "epoch": 2.09, - "learning_rate": 8.592993005467887e-05, - "loss": 1.9459, - "step": 145500 - }, - { - "epoch": 2.1, - "learning_rate": 8.587800331292613e-05, - "loss": 1.9404, - "step": 146000 - }, - { - "epoch": 2.11, - "learning_rate": 8.58262842781404e-05, - "loss": 1.9465, - "step": 146500 - }, - { - "epoch": 2.12, - "learning_rate": 8.577435753638767e-05, - "loss": 1.9439, - "step": 147000 - }, - { - "epoch": 2.12, - "learning_rate": 8.572263850160195e-05, - "loss": 1.9431, - "step": 147500 - }, - { - "epoch": 2.13, - "learning_rate": 8.56707117598492e-05, - "loss": 1.9412, - "step": 148000 - }, - { - "epoch": 2.14, - "learning_rate": 8.561899272506348e-05, - "loss": 1.9458, - "step": 148500 - }, - { - "epoch": 2.14, - "learning_rate": 8.556706598331075e-05, - "loss": 1.9452, - "step": 149000 - }, - { - "epoch": 2.15, - "learning_rate": 8.551534694852503e-05, - "loss": 1.9436, - "step": 149500 - }, - { - "epoch": 2.16, - "learning_rate": 8.54634202067723e-05, - "loss": 1.9423, - "step": 150000 - }, - { - "epoch": 2.17, - "learning_rate": 8.541170117198657e-05, - "loss": 1.941, - "step": 150500 - }, - { - "epoch": 2.17, - "learning_rate": 8.535977443023383e-05, - "loss": 1.939, - "step": 151000 - }, - { - "epoch": 2.18, - "learning_rate": 8.53080553954481e-05, - "loss": 1.9381, - "step": 151500 - }, - { - "epoch": 2.19, - "learning_rate": 8.525612865369536e-05, - "loss": 1.9416, - "step": 152000 - }, - { - "epoch": 2.2, - "learning_rate": 8.520440961890964e-05, - "loss": 1.9382, - "step": 152500 - }, - { - "epoch": 2.2, - "learning_rate": 8.515248287715691e-05, - "loss": 1.9404, - "step": 153000 - }, - { - "epoch": 2.21, - "learning_rate": 8.510076384237119e-05, - "loss": 1.9399, - "step": 153500 - }, - { - "epoch": 2.22, - "learning_rate": 8.504883710061845e-05, - "loss": 1.9358, - "step": 154000 - }, - { - "epoch": 2.22, - "learning_rate": 8.499711806583273e-05, - "loss": 1.9338, - "step": 154500 - }, - { - "epoch": 2.23, - "learning_rate": 8.494519132407999e-05, - "loss": 1.9366, - "step": 155000 - }, - { - "epoch": 2.24, - "learning_rate": 8.489347228929426e-05, - "loss": 1.9342, - "step": 155500 - }, - { - "epoch": 2.25, - "learning_rate": 8.484154554754153e-05, - "loss": 1.9324, - "step": 156000 - }, - { - "epoch": 2.25, - "learning_rate": 8.478982651275581e-05, - "loss": 1.9324, - "step": 156500 - }, - { - "epoch": 2.26, - "learning_rate": 8.473789977100306e-05, - "loss": 1.9328, - "step": 157000 - }, - { - "epoch": 2.27, - "learning_rate": 8.468618073621734e-05, - "loss": 1.9345, - "step": 157500 - }, - { - "epoch": 2.27, - "learning_rate": 8.463425399446462e-05, - "loss": 1.9389, - "step": 158000 - }, - { - "epoch": 2.28, - "learning_rate": 8.458253495967889e-05, - "loss": 1.9304, - "step": 158500 - }, - { - "epoch": 2.29, - "learning_rate": 8.453060821792616e-05, - "loss": 1.9303, - "step": 159000 - }, - { - "epoch": 2.3, - "learning_rate": 8.447888918314043e-05, - "loss": 1.9285, - "step": 159500 - }, - { - "epoch": 2.3, - "learning_rate": 8.442696244138769e-05, - "loss": 1.9328, - "step": 160000 - }, - { - "epoch": 2.31, - "learning_rate": 8.437524340660197e-05, - "loss": 1.9321, - "step": 160500 - }, - { - "epoch": 2.32, - "learning_rate": 8.432331666484924e-05, - "loss": 1.9304, - "step": 161000 - }, - { - "epoch": 2.32, - "learning_rate": 8.427159763006351e-05, - "loss": 1.9284, - "step": 161500 - }, - { - "epoch": 2.33, - "learning_rate": 8.421967088831078e-05, - "loss": 1.9279, - "step": 162000 - }, - { - "epoch": 2.34, - "learning_rate": 8.416795185352505e-05, - "loss": 1.9262, - "step": 162500 - }, - { - "epoch": 2.35, - "learning_rate": 8.411602511177231e-05, - "loss": 1.9275, - "step": 163000 - }, - { - "epoch": 2.35, - "learning_rate": 8.406430607698659e-05, - "loss": 1.9244, - "step": 163500 - }, - { - "epoch": 2.36, - "learning_rate": 8.401237933523386e-05, - "loss": 1.9273, - "step": 164000 - }, - { - "epoch": 2.37, - "learning_rate": 8.396066030044814e-05, - "loss": 1.9264, - "step": 164500 - }, - { - "epoch": 2.38, - "learning_rate": 8.390873355869539e-05, - "loss": 1.9267, - "step": 165000 - }, - { - "epoch": 2.38, - "learning_rate": 8.385701452390967e-05, - "loss": 1.9224, - "step": 165500 - }, - { - "epoch": 2.39, - "learning_rate": 8.380508778215694e-05, - "loss": 1.9232, - "step": 166000 - }, - { - "epoch": 2.4, - "learning_rate": 8.37533687473712e-05, - "loss": 1.9209, - "step": 166500 - }, - { - "epoch": 2.4, - "learning_rate": 8.370144200561848e-05, - "loss": 1.9213, - "step": 167000 - }, - { - "epoch": 2.41, - "learning_rate": 8.364972297083276e-05, - "loss": 1.9191, - "step": 167500 - }, - { - "epoch": 2.42, - "learning_rate": 8.359779622908002e-05, - "loss": 1.9213, - "step": 168000 - }, - { - "epoch": 2.43, - "learning_rate": 8.35460771942943e-05, - "loss": 1.9191, - "step": 168500 - }, - { - "epoch": 2.43, - "learning_rate": 8.349415045254155e-05, - "loss": 1.9165, - "step": 169000 - }, - { - "epoch": 2.44, - "learning_rate": 8.344243141775583e-05, - "loss": 1.921, - "step": 169500 - }, - { - "epoch": 2.45, - "learning_rate": 8.33905046760031e-05, - "loss": 1.9179, - "step": 170000 - }, - { - "epoch": 2.45, - "learning_rate": 8.333878564121737e-05, - "loss": 1.9175, - "step": 170500 - }, - { - "epoch": 2.46, - "learning_rate": 8.328685889946464e-05, - "loss": 1.9255, - "step": 171000 - }, - { - "epoch": 2.47, - "learning_rate": 8.323513986467892e-05, - "loss": 1.9195, - "step": 171500 - }, - { - "epoch": 2.48, - "learning_rate": 8.318321312292619e-05, - "loss": 1.9166, - "step": 172000 - }, - { - "epoch": 2.48, - "learning_rate": 8.313149408814047e-05, - "loss": 1.9215, - "step": 172500 - }, - { - "epoch": 2.49, - "learning_rate": 8.307956734638772e-05, - "loss": 1.9168, - "step": 173000 - }, - { - "epoch": 2.5, - "learning_rate": 8.3027848311602e-05, - "loss": 1.9176, - "step": 173500 - }, - { - "epoch": 2.5, - "learning_rate": 8.297592156984925e-05, - "loss": 1.9157, - "step": 174000 - }, - { - "epoch": 2.51, - "learning_rate": 8.292420253506353e-05, - "loss": 1.9197, - "step": 174500 - }, - { - "epoch": 2.52, - "learning_rate": 8.287227579331081e-05, - "loss": 1.9129, - "step": 175000 - }, - { - "epoch": 2.53, - "learning_rate": 8.282055675852508e-05, - "loss": 1.9155, - "step": 175500 - }, - { - "epoch": 2.53, - "learning_rate": 8.276863001677234e-05, - "loss": 1.9158, - "step": 176000 - }, - { - "epoch": 2.54, - "learning_rate": 8.271691098198662e-05, - "loss": 1.9164, - "step": 176500 - }, - { - "epoch": 2.55, - "learning_rate": 8.266498424023388e-05, - "loss": 1.918, - "step": 177000 - }, - { - "epoch": 2.55, - "learning_rate": 8.261326520544815e-05, - "loss": 1.9084, - "step": 177500 - }, - { - "epoch": 2.56, - "learning_rate": 8.256133846369542e-05, - "loss": 1.9101, - "step": 178000 - }, - { - "epoch": 2.57, - "learning_rate": 8.25096194289097e-05, - "loss": 1.9149, - "step": 178500 - }, - { - "epoch": 2.58, - "learning_rate": 8.245769268715696e-05, - "loss": 1.914, - "step": 179000 - }, - { - "epoch": 2.58, - "learning_rate": 8.240597365237123e-05, - "loss": 1.9118, - "step": 179500 - }, - { - "epoch": 2.59, - "learning_rate": 8.23540469106185e-05, - "loss": 1.909, - "step": 180000 - }, - { - "epoch": 2.6, - "learning_rate": 8.230232787583278e-05, - "loss": 1.9102, - "step": 180500 - }, - { - "epoch": 2.61, - "learning_rate": 8.225040113408005e-05, - "loss": 1.9091, - "step": 181000 - }, - { - "epoch": 2.61, - "learning_rate": 8.219868209929433e-05, - "loss": 1.9135, - "step": 181500 - }, - { - "epoch": 2.62, - "learning_rate": 8.214675535754158e-05, - "loss": 1.9071, - "step": 182000 - }, - { - "epoch": 2.63, - "learning_rate": 8.209503632275586e-05, - "loss": 1.9131, - "step": 182500 - }, - { - "epoch": 2.63, - "learning_rate": 8.204310958100313e-05, - "loss": 1.9074, - "step": 183000 - }, - { - "epoch": 2.64, - "learning_rate": 8.19913905462174e-05, - "loss": 1.908, - "step": 183500 - }, - { - "epoch": 2.65, - "learning_rate": 8.193946380446467e-05, - "loss": 1.9074, - "step": 184000 - }, - { - "epoch": 2.66, - "learning_rate": 8.188774476967894e-05, - "loss": 1.9076, - "step": 184500 - }, - { - "epoch": 2.66, - "learning_rate": 8.18358180279262e-05, - "loss": 1.9133, - "step": 185000 - }, - { - "epoch": 2.67, - "learning_rate": 8.178409899314048e-05, - "loss": 1.9086, - "step": 185500 - }, - { - "epoch": 2.68, - "learning_rate": 8.173217225138775e-05, - "loss": 1.9083, - "step": 186000 - }, - { - "epoch": 2.68, - "learning_rate": 8.168045321660203e-05, - "loss": 1.9109, - "step": 186500 - }, - { - "epoch": 2.69, - "learning_rate": 8.162852647484928e-05, - "loss": 1.9034, - "step": 187000 - }, - { - "epoch": 2.7, - "learning_rate": 8.157680744006356e-05, - "loss": 1.9057, - "step": 187500 - }, - { - "epoch": 2.71, - "learning_rate": 8.152488069831083e-05, - "loss": 1.9068, - "step": 188000 - }, - { - "epoch": 2.71, - "learning_rate": 8.14731616635251e-05, - "loss": 1.9033, - "step": 188500 - }, - { - "epoch": 2.72, - "learning_rate": 8.142123492177238e-05, - "loss": 1.9017, - "step": 189000 - }, - { - "epoch": 2.73, - "learning_rate": 8.136951588698665e-05, - "loss": 1.9021, - "step": 189500 - }, - { - "epoch": 2.73, - "learning_rate": 8.131758914523391e-05, - "loss": 1.9068, - "step": 190000 - }, - { - "epoch": 2.74, - "learning_rate": 8.126587011044819e-05, - "loss": 1.9016, - "step": 190500 - }, - { - "epoch": 2.75, - "learning_rate": 8.121394336869544e-05, - "loss": 1.9028, - "step": 191000 - }, - { - "epoch": 2.76, - "learning_rate": 8.116222433390972e-05, - "loss": 1.8994, - "step": 191500 - }, - { - "epoch": 2.76, - "learning_rate": 8.111029759215699e-05, - "loss": 1.9, - "step": 192000 - }, - { - "epoch": 2.77, - "learning_rate": 8.105857855737126e-05, - "loss": 1.9026, - "step": 192500 - }, - { - "epoch": 2.78, - "learning_rate": 8.100665181561853e-05, - "loss": 1.9022, - "step": 193000 - }, - { - "epoch": 2.79, - "learning_rate": 8.095493278083281e-05, - "loss": 1.902, - "step": 193500 - }, - { - "epoch": 2.79, - "learning_rate": 8.090300603908006e-05, - "loss": 1.9006, - "step": 194000 - }, - { - "epoch": 2.8, - "learning_rate": 8.085128700429434e-05, - "loss": 1.9015, - "step": 194500 - }, - { - "epoch": 2.81, - "learning_rate": 8.079936026254161e-05, - "loss": 1.9018, - "step": 195000 - }, - { - "epoch": 2.81, - "learning_rate": 8.074764122775589e-05, - "loss": 1.9006, - "step": 195500 - }, - { - "epoch": 2.82, - "learning_rate": 8.069571448600314e-05, - "loss": 1.9021, - "step": 196000 - }, - { - "epoch": 2.83, - "learning_rate": 8.064399545121742e-05, - "loss": 1.8977, - "step": 196500 - }, - { - "epoch": 2.84, - "learning_rate": 8.05920687094647e-05, - "loss": 1.8992, - "step": 197000 - }, - { - "epoch": 2.84, - "learning_rate": 8.054034967467897e-05, - "loss": 1.8952, - "step": 197500 - }, - { - "epoch": 2.85, - "learning_rate": 8.048842293292624e-05, - "loss": 1.8974, - "step": 198000 - }, - { - "epoch": 2.86, - "learning_rate": 8.043670389814051e-05, - "loss": 1.8979, - "step": 198500 - }, - { - "epoch": 2.86, - "learning_rate": 8.038477715638777e-05, - "loss": 1.9013, - "step": 199000 - }, - { - "epoch": 2.87, - "learning_rate": 8.033305812160205e-05, - "loss": 1.8929, - "step": 199500 - }, - { - "epoch": 2.88, - "learning_rate": 8.028113137984931e-05, - "loss": 1.8936, - "step": 200000 - }, - { - "epoch": 2.89, - "learning_rate": 8.022941234506359e-05, - "loss": 1.8984, - "step": 200500 - }, - { - "epoch": 2.89, - "learning_rate": 8.017748560331086e-05, - "loss": 1.895, - "step": 201000 - }, - { - "epoch": 2.9, - "learning_rate": 8.012576656852512e-05, - "loss": 1.8959, - "step": 201500 - }, - { - "epoch": 2.91, - "learning_rate": 8.007383982677239e-05, - "loss": 1.8935, - "step": 202000 - }, - { - "epoch": 2.91, - "learning_rate": 8.002212079198667e-05, - "loss": 1.893, - "step": 202500 - }, - { - "epoch": 2.92, - "learning_rate": 7.997019405023394e-05, - "loss": 1.8923, - "step": 203000 - }, - { - "epoch": 2.93, - "learning_rate": 7.991847501544822e-05, - "loss": 1.8926, - "step": 203500 - }, - { - "epoch": 2.94, - "learning_rate": 7.986654827369547e-05, - "loss": 1.8934, - "step": 204000 - }, - { - "epoch": 2.94, - "learning_rate": 7.981482923890975e-05, - "loss": 1.8973, - "step": 204500 - }, - { - "epoch": 2.95, - "learning_rate": 7.976290249715702e-05, - "loss": 1.8885, - "step": 205000 - }, - { - "epoch": 2.96, - "learning_rate": 7.971118346237128e-05, - "loss": 1.8895, - "step": 205500 - }, - { - "epoch": 2.97, - "learning_rate": 7.965925672061856e-05, - "loss": 1.8869, - "step": 206000 - }, - { - "epoch": 2.97, - "learning_rate": 7.960753768583283e-05, - "loss": 1.8889, - "step": 206500 - }, - { - "epoch": 2.98, - "learning_rate": 7.95556109440801e-05, - "loss": 1.8932, - "step": 207000 - }, - { - "epoch": 2.99, - "learning_rate": 7.950389190929437e-05, - "loss": 1.8897, - "step": 207500 - }, - { - "epoch": 2.99, - "learning_rate": 7.945196516754163e-05, - "loss": 1.889, - "step": 208000 - }, - { - "epoch": 3.0, - "eval_accuracy": 0.6382595753723315, - "eval_loss": 1.7626953125, - "eval_runtime": 652.4368, - "eval_samples_per_second": 826.037, - "eval_steps_per_second": 34.419, - "step": 208419 - }, - { - "epoch": 3.0, - "learning_rate": 7.94002461327559e-05, - "loss": 1.8923, - "step": 208500 - }, - { - "epoch": 3.01, - "learning_rate": 7.934831939100317e-05, - "loss": 1.8823, - "step": 209000 - }, - { - "epoch": 3.02, - "learning_rate": 7.929660035621745e-05, - "loss": 1.8852, - "step": 209500 - }, - { - "epoch": 3.02, - "learning_rate": 7.924467361446472e-05, - "loss": 1.8847, - "step": 210000 - }, - { - "epoch": 3.03, - "learning_rate": 7.919295457967898e-05, - "loss": 1.8836, - "step": 210500 - }, - { - "epoch": 3.04, - "learning_rate": 7.914102783792627e-05, - "loss": 1.8852, - "step": 211000 - }, - { - "epoch": 3.04, - "learning_rate": 7.908930880314054e-05, - "loss": 1.8807, - "step": 211500 - }, - { - "epoch": 3.05, - "learning_rate": 7.90373820613878e-05, - "loss": 1.8877, - "step": 212000 - }, - { - "epoch": 3.06, - "learning_rate": 7.898566302660208e-05, - "loss": 1.88, - "step": 212500 - }, - { - "epoch": 3.07, - "learning_rate": 7.893373628484933e-05, - "loss": 1.8825, - "step": 213000 - }, - { - "epoch": 3.07, - "learning_rate": 7.888201725006361e-05, - "loss": 1.8838, - "step": 213500 - }, - { - "epoch": 3.08, - "learning_rate": 7.883009050831088e-05, - "loss": 1.8845, - "step": 214000 - }, - { - "epoch": 3.09, - "learning_rate": 7.877837147352515e-05, - "loss": 1.8829, - "step": 214500 - }, - { - "epoch": 3.09, - "learning_rate": 7.872644473177242e-05, - "loss": 1.8885, - "step": 215000 - }, - { - "epoch": 3.1, - "learning_rate": 7.86747256969867e-05, - "loss": 1.882, - "step": 215500 - }, - { - "epoch": 3.11, - "learning_rate": 7.862279895523396e-05, - "loss": 1.8821, - "step": 216000 - }, - { - "epoch": 3.12, - "learning_rate": 7.857107992044823e-05, - "loss": 1.8825, - "step": 216500 - }, - { - "epoch": 3.12, - "learning_rate": 7.85191531786955e-05, - "loss": 1.8818, - "step": 217000 - }, - { - "epoch": 3.13, - "learning_rate": 7.846743414390978e-05, - "loss": 1.8837, - "step": 217500 - }, - { - "epoch": 3.14, - "learning_rate": 7.841550740215703e-05, - "loss": 1.877, - "step": 218000 - }, - { - "epoch": 3.15, - "learning_rate": 7.836378836737131e-05, - "loss": 1.8835, - "step": 218500 - }, - { - "epoch": 3.15, - "learning_rate": 7.831186162561858e-05, - "loss": 1.8835, - "step": 219000 - }, - { - "epoch": 3.16, - "learning_rate": 7.826014259083286e-05, - "loss": 1.8789, - "step": 219500 - }, - { - "epoch": 3.17, - "learning_rate": 7.820821584908013e-05, - "loss": 1.8814, - "step": 220000 - }, - { - "epoch": 3.17, - "learning_rate": 7.81564968142944e-05, - "loss": 1.8783, - "step": 220500 - }, - { - "epoch": 3.18, - "learning_rate": 7.810457007254166e-05, - "loss": 1.8846, - "step": 221000 - }, - { - "epoch": 3.19, - "learning_rate": 7.805285103775594e-05, - "loss": 1.8769, - "step": 221500 - }, - { - "epoch": 3.2, - "learning_rate": 7.800092429600319e-05, - "loss": 1.8762, - "step": 222000 - }, - { - "epoch": 3.2, - "learning_rate": 7.794920526121747e-05, - "loss": 1.8787, - "step": 222500 - }, - { - "epoch": 3.21, - "learning_rate": 7.789727851946475e-05, - "loss": 1.8793, - "step": 223000 - }, - { - "epoch": 3.22, - "learning_rate": 7.784555948467902e-05, - "loss": 1.8754, - "step": 223500 - }, - { - "epoch": 3.22, - "learning_rate": 7.779363274292628e-05, - "loss": 1.8788, - "step": 224000 - }, - { - "epoch": 3.23, - "learning_rate": 7.774191370814056e-05, - "loss": 1.8753, - "step": 224500 - }, - { - "epoch": 3.24, - "learning_rate": 7.768998696638783e-05, - "loss": 1.8792, - "step": 225000 - }, - { - "epoch": 3.25, - "learning_rate": 7.763826793160211e-05, - "loss": 1.8741, - "step": 225500 - }, - { - "epoch": 3.25, - "learning_rate": 7.758634118984936e-05, - "loss": 1.8782, - "step": 226000 - }, - { - "epoch": 3.26, - "learning_rate": 7.753462215506364e-05, - "loss": 1.8763, - "step": 226500 - }, - { - "epoch": 3.27, - "learning_rate": 7.748269541331091e-05, - "loss": 1.8718, - "step": 227000 - }, - { - "epoch": 3.27, - "learning_rate": 7.743097637852517e-05, - "loss": 1.8757, - "step": 227500 - }, - { - "epoch": 3.28, - "learning_rate": 7.737904963677245e-05, - "loss": 1.876, - "step": 228000 - }, - { - "epoch": 3.29, - "learning_rate": 7.732733060198672e-05, - "loss": 1.8746, - "step": 228500 - }, - { - "epoch": 3.3, - "learning_rate": 7.727540386023399e-05, - "loss": 1.8725, - "step": 229000 - }, - { - "epoch": 3.3, - "learning_rate": 7.722368482544826e-05, - "loss": 1.8764, - "step": 229500 - }, - { - "epoch": 3.31, - "learning_rate": 7.717175808369552e-05, - "loss": 1.8748, - "step": 230000 - }, - { - "epoch": 3.32, - "learning_rate": 7.71200390489098e-05, - "loss": 1.8738, - "step": 230500 - }, - { - "epoch": 3.33, - "learning_rate": 7.706811230715707e-05, - "loss": 1.8709, - "step": 231000 - }, - { - "epoch": 3.33, - "learning_rate": 7.701639327237134e-05, - "loss": 1.8728, - "step": 231500 - }, - { - "epoch": 3.34, - "learning_rate": 7.696446653061861e-05, - "loss": 1.8718, - "step": 232000 - }, - { - "epoch": 3.35, - "learning_rate": 7.691274749583288e-05, - "loss": 1.876, - "step": 232500 - }, - { - "epoch": 3.35, - "learning_rate": 7.686082075408014e-05, - "loss": 1.8677, - "step": 233000 - }, - { - "epoch": 3.36, - "learning_rate": 7.680910171929442e-05, - "loss": 1.8703, - "step": 233500 - }, - { - "epoch": 3.37, - "learning_rate": 7.675717497754169e-05, - "loss": 1.8715, - "step": 234000 - }, - { - "epoch": 3.38, - "learning_rate": 7.670545594275597e-05, - "loss": 1.871, - "step": 234500 - }, - { - "epoch": 3.38, - "learning_rate": 7.665352920100322e-05, - "loss": 1.8684, - "step": 235000 - }, - { - "epoch": 3.39, - "learning_rate": 7.66018101662175e-05, - "loss": 1.8688, - "step": 235500 - }, - { - "epoch": 3.4, - "learning_rate": 7.654988342446477e-05, - "loss": 1.8785, - "step": 236000 - }, - { - "epoch": 3.4, - "learning_rate": 7.649816438967903e-05, - "loss": 1.8702, - "step": 236500 - }, - { - "epoch": 3.41, - "learning_rate": 7.644623764792631e-05, - "loss": 1.8683, - "step": 237000 - }, - { - "epoch": 3.42, - "learning_rate": 7.639451861314059e-05, - "loss": 1.8698, - "step": 237500 - }, - { - "epoch": 3.43, - "learning_rate": 7.634259187138785e-05, - "loss": 1.8672, - "step": 238000 - }, - { - "epoch": 3.43, - "learning_rate": 7.629087283660212e-05, - "loss": 1.8718, - "step": 238500 - }, - { - "epoch": 3.44, - "learning_rate": 7.623894609484939e-05, - "loss": 1.8644, - "step": 239000 - }, - { - "epoch": 3.45, - "learning_rate": 7.618722706006367e-05, - "loss": 1.8734, - "step": 239500 - }, - { - "epoch": 3.45, - "learning_rate": 7.613530031831093e-05, - "loss": 1.8671, - "step": 240000 - }, - { - "epoch": 3.46, - "learning_rate": 7.60835812835252e-05, - "loss": 1.8703, - "step": 240500 - }, - { - "epoch": 3.47, - "learning_rate": 7.603165454177247e-05, - "loss": 1.8675, - "step": 241000 - }, - { - "epoch": 3.48, - "learning_rate": 7.597993550698675e-05, - "loss": 1.8679, - "step": 241500 - }, - { - "epoch": 3.48, - "learning_rate": 7.592800876523402e-05, - "loss": 1.8715, - "step": 242000 - }, - { - "epoch": 3.49, - "learning_rate": 7.58762897304483e-05, - "loss": 1.8662, - "step": 242500 - }, - { - "epoch": 3.5, - "learning_rate": 7.582436298869555e-05, - "loss": 1.8639, - "step": 243000 - }, - { - "epoch": 3.5, - "learning_rate": 7.577264395390983e-05, - "loss": 1.8661, - "step": 243500 - }, - { - "epoch": 3.51, - "learning_rate": 7.572071721215708e-05, - "loss": 1.8613, - "step": 244000 - }, - { - "epoch": 3.52, - "learning_rate": 7.566899817737136e-05, - "loss": 1.8641, - "step": 244500 - }, - { - "epoch": 3.53, - "learning_rate": 7.561707143561864e-05, - "loss": 1.8643, - "step": 245000 - }, - { - "epoch": 3.53, - "learning_rate": 7.55653524008329e-05, - "loss": 1.8642, - "step": 245500 - }, - { - "epoch": 3.54, - "learning_rate": 7.551342565908017e-05, - "loss": 1.8654, - "step": 246000 - }, - { - "epoch": 3.55, - "learning_rate": 7.546170662429445e-05, - "loss": 1.8618, - "step": 246500 - }, - { - "epoch": 3.56, - "learning_rate": 7.540977988254171e-05, - "loss": 1.8703, - "step": 247000 - }, - { - "epoch": 3.56, - "learning_rate": 7.535806084775598e-05, - "loss": 1.8616, - "step": 247500 - }, - { - "epoch": 3.57, - "learning_rate": 7.530613410600325e-05, - "loss": 1.8625, - "step": 248000 - }, - { - "epoch": 3.58, - "learning_rate": 7.525441507121753e-05, - "loss": 1.8626, - "step": 248500 - }, - { - "epoch": 3.58, - "learning_rate": 7.52024883294648e-05, - "loss": 1.8662, - "step": 249000 - }, - { - "epoch": 3.59, - "learning_rate": 7.515076929467906e-05, - "loss": 1.8635, - "step": 249500 - }, - { - "epoch": 3.6, - "learning_rate": 7.509884255292635e-05, - "loss": 1.8634, - "step": 250000 - }, - { - "epoch": 3.61, - "learning_rate": 7.50469158111736e-05, - "loss": 1.8644, - "step": 250500 - }, - { - "epoch": 3.61, - "learning_rate": 7.499519677638788e-05, - "loss": 1.8601, - "step": 251000 - }, - { - "epoch": 3.62, - "learning_rate": 7.494327003463513e-05, - "loss": 1.8632, - "step": 251500 - }, - { - "epoch": 3.63, - "learning_rate": 7.489155099984941e-05, - "loss": 1.8648, - "step": 252000 - }, - { - "epoch": 3.63, - "learning_rate": 7.483962425809669e-05, - "loss": 1.8629, - "step": 252500 - }, - { - "epoch": 3.64, - "learning_rate": 7.478790522331096e-05, - "loss": 1.8611, - "step": 253000 - }, - { - "epoch": 3.65, - "learning_rate": 7.473597848155822e-05, - "loss": 1.8574, - "step": 253500 - }, - { - "epoch": 3.66, - "learning_rate": 7.46842594467725e-05, - "loss": 1.8636, - "step": 254000 - }, - { - "epoch": 3.66, - "learning_rate": 7.463233270501976e-05, - "loss": 1.8605, - "step": 254500 - }, - { - "epoch": 3.67, - "learning_rate": 7.458061367023403e-05, - "loss": 1.8583, - "step": 255000 - }, - { - "epoch": 3.68, - "learning_rate": 7.45286869284813e-05, - "loss": 1.8616, - "step": 255500 - }, - { - "epoch": 3.68, - "learning_rate": 7.447696789369558e-05, - "loss": 1.8571, - "step": 256000 - }, - { - "epoch": 3.69, - "learning_rate": 7.442504115194285e-05, - "loss": 1.8589, - "step": 256500 - }, - { - "epoch": 3.7, - "learning_rate": 7.437332211715711e-05, - "loss": 1.8585, - "step": 257000 - }, - { - "epoch": 3.71, - "learning_rate": 7.432139537540438e-05, - "loss": 1.8582, - "step": 257500 - }, - { - "epoch": 3.71, - "learning_rate": 7.426967634061866e-05, - "loss": 1.8597, - "step": 258000 - }, - { - "epoch": 3.72, - "learning_rate": 7.421774959886593e-05, - "loss": 1.8592, - "step": 258500 - }, - { - "epoch": 3.73, - "learning_rate": 7.41660305640802e-05, - "loss": 1.8589, - "step": 259000 - }, - { - "epoch": 3.74, - "learning_rate": 7.411410382232746e-05, - "loss": 1.8527, - "step": 259500 - }, - { - "epoch": 3.74, - "learning_rate": 7.406238478754174e-05, - "loss": 1.8583, - "step": 260000 - }, - { - "epoch": 3.75, - "learning_rate": 7.4010458045789e-05, - "loss": 1.8536, - "step": 260500 - }, - { - "epoch": 3.76, - "learning_rate": 7.395873901100327e-05, - "loss": 1.8548, - "step": 261000 - }, - { - "epoch": 3.76, - "learning_rate": 7.390681226925055e-05, - "loss": 1.8536, - "step": 261500 - }, - { - "epoch": 3.77, - "learning_rate": 7.385509323446482e-05, - "loss": 1.852, - "step": 262000 - }, - { - "epoch": 3.78, - "learning_rate": 7.380316649271208e-05, - "loss": 1.8566, - "step": 262500 - }, - { - "epoch": 3.79, - "learning_rate": 7.375144745792636e-05, - "loss": 1.8541, - "step": 263000 - }, - { - "epoch": 3.79, - "learning_rate": 7.369952071617363e-05, - "loss": 1.8562, - "step": 263500 - }, - { - "epoch": 3.8, - "learning_rate": 7.364780168138791e-05, - "loss": 1.8576, - "step": 264000 - }, - { - "epoch": 3.81, - "learning_rate": 7.359587493963516e-05, - "loss": 1.8577, - "step": 264500 - }, - { - "epoch": 3.81, - "learning_rate": 7.354415590484944e-05, - "loss": 1.8544, - "step": 265000 - }, - { - "epoch": 3.82, - "learning_rate": 7.349222916309671e-05, - "loss": 1.8567, - "step": 265500 - }, - { - "epoch": 3.83, - "learning_rate": 7.344051012831097e-05, - "loss": 1.8526, - "step": 266000 - }, - { - "epoch": 3.84, - "learning_rate": 7.338858338655826e-05, - "loss": 1.8545, - "step": 266500 - }, - { - "epoch": 3.84, - "learning_rate": 7.333686435177253e-05, - "loss": 1.8532, - "step": 267000 - }, - { - "epoch": 3.85, - "learning_rate": 7.328493761001979e-05, - "loss": 1.8525, - "step": 267500 - }, - { - "epoch": 3.86, - "learning_rate": 7.323321857523407e-05, - "loss": 1.8527, - "step": 268000 - }, - { - "epoch": 3.86, - "learning_rate": 7.318129183348132e-05, - "loss": 1.8557, - "step": 268500 - }, - { - "epoch": 3.87, - "learning_rate": 7.31295727986956e-05, - "loss": 1.8522, - "step": 269000 - }, - { - "epoch": 3.88, - "learning_rate": 7.307764605694287e-05, - "loss": 1.857, - "step": 269500 - }, - { - "epoch": 3.89, - "learning_rate": 7.302592702215714e-05, - "loss": 1.8594, - "step": 270000 - }, - { - "epoch": 3.89, - "learning_rate": 7.297400028040441e-05, - "loss": 1.8492, - "step": 270500 - }, - { - "epoch": 3.9, - "learning_rate": 7.292228124561869e-05, - "loss": 1.8502, - "step": 271000 - }, - { - "epoch": 3.91, - "learning_rate": 7.287035450386594e-05, - "loss": 1.8525, - "step": 271500 - }, - { - "epoch": 3.92, - "learning_rate": 7.281863546908022e-05, - "loss": 1.8482, - "step": 272000 - }, - { - "epoch": 3.92, - "learning_rate": 7.276670872732749e-05, - "loss": 1.8502, - "step": 272500 - }, - { - "epoch": 3.93, - "learning_rate": 7.271498969254177e-05, - "loss": 1.8505, - "step": 273000 - }, - { - "epoch": 3.94, - "learning_rate": 7.266306295078902e-05, - "loss": 1.8493, - "step": 273500 - }, - { - "epoch": 3.94, - "learning_rate": 7.26113439160033e-05, - "loss": 1.8499, - "step": 274000 - }, - { - "epoch": 3.95, - "learning_rate": 7.255941717425057e-05, - "loss": 1.8448, - "step": 274500 - }, - { - "epoch": 3.96, - "learning_rate": 7.250769813946485e-05, - "loss": 1.8507, - "step": 275000 - }, - { - "epoch": 3.97, - "learning_rate": 7.245577139771212e-05, - "loss": 1.8514, - "step": 275500 - }, - { - "epoch": 3.97, - "learning_rate": 7.240405236292639e-05, - "loss": 1.8507, - "step": 276000 - }, - { - "epoch": 3.98, - "learning_rate": 7.235212562117365e-05, - "loss": 1.8475, - "step": 276500 - }, - { - "epoch": 3.99, - "learning_rate": 7.230040658638793e-05, - "loss": 1.8495, - "step": 277000 - }, - { - "epoch": 3.99, - "learning_rate": 7.22484798446352e-05, - "loss": 1.8496, - "step": 277500 - }, - { - "epoch": 4.0, - "eval_accuracy": 0.6442238837933792, - "eval_loss": 1.7236328125, - "eval_runtime": 653.8369, - "eval_samples_per_second": 824.268, - "eval_steps_per_second": 34.345, - "step": 277892 - }, - { - "epoch": 4.0, - "learning_rate": 7.219676080984947e-05, - "loss": 1.8473, - "step": 278000 - }, - { - "epoch": 4.01, - "learning_rate": 7.214483406809674e-05, - "loss": 1.8419, - "step": 278500 - }, - { - "epoch": 4.02, - "learning_rate": 7.2093115033311e-05, - "loss": 1.8469, - "step": 279000 - }, - { - "epoch": 4.02, - "learning_rate": 7.204118829155827e-05, - "loss": 1.8423, - "step": 279500 - }, - { - "epoch": 4.03, - "learning_rate": 7.198946925677255e-05, - "loss": 1.8486, - "step": 280000 - }, - { - "epoch": 4.04, - "learning_rate": 7.193754251501982e-05, - "loss": 1.8417, - "step": 280500 - }, - { - "epoch": 4.04, - "learning_rate": 7.18858234802341e-05, - "loss": 1.8455, - "step": 281000 - }, - { - "epoch": 4.05, - "learning_rate": 7.183389673848135e-05, - "loss": 1.8395, - "step": 281500 - }, - { - "epoch": 4.06, - "learning_rate": 7.178217770369563e-05, - "loss": 1.8428, - "step": 282000 - }, - { - "epoch": 4.07, - "learning_rate": 7.17302509619429e-05, - "loss": 1.8469, - "step": 282500 - }, - { - "epoch": 4.07, - "learning_rate": 7.167853192715716e-05, - "loss": 1.8421, - "step": 283000 - }, - { - "epoch": 4.08, - "learning_rate": 7.162660518540444e-05, - "loss": 1.8466, - "step": 283500 - }, - { - "epoch": 4.09, - "learning_rate": 7.157488615061871e-05, - "loss": 1.8436, - "step": 284000 - }, - { - "epoch": 4.1, - "learning_rate": 7.152295940886598e-05, - "loss": 1.8429, - "step": 284500 - }, - { - "epoch": 4.1, - "learning_rate": 7.147124037408025e-05, - "loss": 1.844, - "step": 285000 - }, - { - "epoch": 4.11, - "learning_rate": 7.141931363232751e-05, - "loss": 1.8403, - "step": 285500 - }, - { - "epoch": 4.12, - "learning_rate": 7.136759459754179e-05, - "loss": 1.8411, - "step": 286000 - }, - { - "epoch": 4.12, - "learning_rate": 7.131566785578905e-05, - "loss": 1.8395, - "step": 286500 - }, - { - "epoch": 4.13, - "learning_rate": 7.126394882100333e-05, - "loss": 1.8433, - "step": 287000 - }, - { - "epoch": 4.14, - "learning_rate": 7.12120220792506e-05, - "loss": 1.8413, - "step": 287500 - }, - { - "epoch": 4.15, - "learning_rate": 7.116030304446486e-05, - "loss": 1.8417, - "step": 288000 - }, - { - "epoch": 4.15, - "learning_rate": 7.110837630271213e-05, - "loss": 1.8398, - "step": 288500 - }, - { - "epoch": 4.16, - "learning_rate": 7.105665726792641e-05, - "loss": 1.8434, - "step": 289000 - }, - { - "epoch": 4.17, - "learning_rate": 7.100473052617368e-05, - "loss": 1.8411, - "step": 289500 - }, - { - "epoch": 4.17, - "learning_rate": 7.095301149138796e-05, - "loss": 1.838, - "step": 290000 - }, - { - "epoch": 4.18, - "learning_rate": 7.090108474963521e-05, - "loss": 1.8426, - "step": 290500 - }, - { - "epoch": 4.19, - "learning_rate": 7.084936571484949e-05, - "loss": 1.8393, - "step": 291000 - }, - { - "epoch": 4.2, - "learning_rate": 7.079743897309676e-05, - "loss": 1.8392, - "step": 291500 - }, - { - "epoch": 4.2, - "learning_rate": 7.074571993831103e-05, - "loss": 1.8387, - "step": 292000 - }, - { - "epoch": 4.21, - "learning_rate": 7.06937931965583e-05, - "loss": 1.8382, - "step": 292500 - }, - { - "epoch": 4.22, - "learning_rate": 7.064207416177258e-05, - "loss": 1.8399, - "step": 293000 - }, - { - "epoch": 4.22, - "learning_rate": 7.059014742001984e-05, - "loss": 1.8352, - "step": 293500 - }, - { - "epoch": 4.23, - "learning_rate": 7.053842838523411e-05, - "loss": 1.8379, - "step": 294000 - }, - { - "epoch": 4.24, - "learning_rate": 7.048650164348138e-05, - "loss": 1.837, - "step": 294500 - }, - { - "epoch": 4.25, - "learning_rate": 7.043478260869566e-05, - "loss": 1.8381, - "step": 295000 - }, - { - "epoch": 4.25, - "learning_rate": 7.038285586694291e-05, - "loss": 1.8319, - "step": 295500 - }, - { - "epoch": 4.26, - "learning_rate": 7.033113683215719e-05, - "loss": 1.8366, - "step": 296000 - }, - { - "epoch": 4.27, - "learning_rate": 7.027921009040446e-05, - "loss": 1.8369, - "step": 296500 - }, - { - "epoch": 4.28, - "learning_rate": 7.022749105561874e-05, - "loss": 1.8374, - "step": 297000 - }, - { - "epoch": 4.28, - "learning_rate": 7.0175564313866e-05, - "loss": 1.8394, - "step": 297500 - }, - { - "epoch": 4.29, - "learning_rate": 7.012384527908028e-05, - "loss": 1.8352, - "step": 298000 - }, - { - "epoch": 4.3, - "learning_rate": 7.007191853732754e-05, - "loss": 1.8362, - "step": 298500 - }, - { - "epoch": 4.3, - "learning_rate": 7.002019950254182e-05, - "loss": 1.8384, - "step": 299000 - }, - { - "epoch": 4.31, - "learning_rate": 6.996827276078907e-05, - "loss": 1.8377, - "step": 299500 - }, - { - "epoch": 4.32, - "learning_rate": 6.991655372600335e-05, - "loss": 1.8357, - "step": 300000 - }, - { - "epoch": 4.33, - "learning_rate": 6.986462698425063e-05, - "loss": 1.8343, - "step": 300500 - }, - { - "epoch": 4.33, - "learning_rate": 6.98129079494649e-05, - "loss": 1.838, - "step": 301000 - }, - { - "epoch": 4.34, - "learning_rate": 6.976098120771216e-05, - "loss": 1.8332, - "step": 301500 - }, - { - "epoch": 4.35, - "learning_rate": 6.970926217292644e-05, - "loss": 1.8372, - "step": 302000 - }, - { - "epoch": 4.35, - "learning_rate": 6.96573354311737e-05, - "loss": 1.8377, - "step": 302500 - }, - { - "epoch": 4.36, - "learning_rate": 6.960561639638797e-05, - "loss": 1.8372, - "step": 303000 - }, - { - "epoch": 4.37, - "learning_rate": 6.955368965463524e-05, - "loss": 1.8373, - "step": 303500 - }, - { - "epoch": 4.38, - "learning_rate": 6.950197061984952e-05, - "loss": 1.8351, - "step": 304000 - }, - { - "epoch": 4.38, - "learning_rate": 6.945004387809679e-05, - "loss": 1.8344, - "step": 304500 - }, - { - "epoch": 4.39, - "learning_rate": 6.939832484331105e-05, - "loss": 1.8312, - "step": 305000 - }, - { - "epoch": 4.4, - "learning_rate": 6.934639810155833e-05, - "loss": 1.8339, - "step": 305500 - }, - { - "epoch": 4.4, - "learning_rate": 6.929467906677261e-05, - "loss": 1.8334, - "step": 306000 - }, - { - "epoch": 4.41, - "learning_rate": 6.924275232501987e-05, - "loss": 1.8357, - "step": 306500 - }, - { - "epoch": 4.42, - "learning_rate": 6.919103329023414e-05, - "loss": 1.8298, - "step": 307000 - }, - { - "epoch": 4.43, - "learning_rate": 6.91391065484814e-05, - "loss": 1.832, - "step": 307500 - }, - { - "epoch": 4.43, - "learning_rate": 6.908738751369568e-05, - "loss": 1.8347, - "step": 308000 - }, - { - "epoch": 4.44, - "learning_rate": 6.903546077194295e-05, - "loss": 1.8348, - "step": 308500 - }, - { - "epoch": 4.45, - "learning_rate": 6.898374173715722e-05, - "loss": 1.8337, - "step": 309000 - }, - { - "epoch": 4.45, - "learning_rate": 6.893181499540449e-05, - "loss": 1.8286, - "step": 309500 - }, - { - "epoch": 4.46, - "learning_rate": 6.888009596061877e-05, - "loss": 1.8294, - "step": 310000 - }, - { - "epoch": 4.47, - "learning_rate": 6.882816921886602e-05, - "loss": 1.8312, - "step": 310500 - }, - { - "epoch": 4.48, - "learning_rate": 6.87764501840803e-05, - "loss": 1.8297, - "step": 311000 - }, - { - "epoch": 4.48, - "learning_rate": 6.872452344232757e-05, - "loss": 1.8321, - "step": 311500 - }, - { - "epoch": 4.49, - "learning_rate": 6.867280440754185e-05, - "loss": 1.8331, - "step": 312000 - }, - { - "epoch": 4.5, - "learning_rate": 6.86208776657891e-05, - "loss": 1.8319, - "step": 312500 - }, - { - "epoch": 4.51, - "learning_rate": 6.856915863100338e-05, - "loss": 1.83, - "step": 313000 - }, - { - "epoch": 4.51, - "learning_rate": 6.851723188925065e-05, - "loss": 1.8297, - "step": 313500 - }, - { - "epoch": 4.52, - "learning_rate": 6.846551285446493e-05, - "loss": 1.8318, - "step": 314000 - }, - { - "epoch": 4.53, - "learning_rate": 6.84135861127122e-05, - "loss": 1.8292, - "step": 314500 - }, - { - "epoch": 4.53, - "learning_rate": 6.836186707792647e-05, - "loss": 1.8294, - "step": 315000 - }, - { - "epoch": 4.54, - "learning_rate": 6.830994033617373e-05, - "loss": 1.8341, - "step": 315500 - }, - { - "epoch": 4.55, - "learning_rate": 6.8258221301388e-05, - "loss": 1.8306, - "step": 316000 - }, - { - "epoch": 4.56, - "learning_rate": 6.820629455963527e-05, - "loss": 1.8316, - "step": 316500 - }, - { - "epoch": 4.56, - "learning_rate": 6.815457552484955e-05, - "loss": 1.8292, - "step": 317000 - }, - { - "epoch": 4.57, - "learning_rate": 6.81026487830968e-05, - "loss": 1.832, - "step": 317500 - }, - { - "epoch": 4.58, - "learning_rate": 6.805092974831108e-05, - "loss": 1.8291, - "step": 318000 - }, - { - "epoch": 4.58, - "learning_rate": 6.799900300655835e-05, - "loss": 1.8291, - "step": 318500 - }, - { - "epoch": 4.59, - "learning_rate": 6.794728397177263e-05, - "loss": 1.8318, - "step": 319000 - }, - { - "epoch": 4.6, - "learning_rate": 6.78953572300199e-05, - "loss": 1.8302, - "step": 319500 - }, - { - "epoch": 4.61, - "learning_rate": 6.784363819523418e-05, - "loss": 1.8249, - "step": 320000 - }, - { - "epoch": 4.61, - "learning_rate": 6.779171145348143e-05, - "loss": 1.8325, - "step": 320500 - }, - { - "epoch": 4.62, - "learning_rate": 6.773999241869571e-05, - "loss": 1.8276, - "step": 321000 - }, - { - "epoch": 4.63, - "learning_rate": 6.768806567694296e-05, - "loss": 1.8306, - "step": 321500 - }, - { - "epoch": 4.63, - "learning_rate": 6.763634664215724e-05, - "loss": 1.8286, - "step": 322000 - }, - { - "epoch": 4.64, - "learning_rate": 6.758441990040452e-05, - "loss": 1.8288, - "step": 322500 - }, - { - "epoch": 4.65, - "learning_rate": 6.753270086561879e-05, - "loss": 1.8283, - "step": 323000 - }, - { - "epoch": 4.66, - "learning_rate": 6.748077412386605e-05, - "loss": 1.8309, - "step": 323500 - }, - { - "epoch": 4.66, - "learning_rate": 6.742905508908033e-05, - "loss": 1.8287, - "step": 324000 - }, - { - "epoch": 4.67, - "learning_rate": 6.737712834732759e-05, - "loss": 1.8266, - "step": 324500 - }, - { - "epoch": 4.68, - "learning_rate": 6.732540931254186e-05, - "loss": 1.8256, - "step": 325000 - }, - { - "epoch": 4.69, - "learning_rate": 6.727348257078913e-05, - "loss": 1.8299, - "step": 325500 - }, - { - "epoch": 4.69, - "learning_rate": 6.722176353600341e-05, - "loss": 1.8279, - "step": 326000 - }, - { - "epoch": 4.7, - "learning_rate": 6.716983679425068e-05, - "loss": 1.8276, - "step": 326500 - }, - { - "epoch": 4.71, - "learning_rate": 6.711811775946494e-05, - "loss": 1.8256, - "step": 327000 - }, - { - "epoch": 4.71, - "learning_rate": 6.706619101771221e-05, - "loss": 1.8254, - "step": 327500 - }, - { - "epoch": 4.72, - "learning_rate": 6.701447198292649e-05, - "loss": 1.827, - "step": 328000 - }, - { - "epoch": 4.73, - "learning_rate": 6.696254524117376e-05, - "loss": 1.8264, - "step": 328500 - }, - { - "epoch": 4.74, - "learning_rate": 6.691082620638804e-05, - "loss": 1.8267, - "step": 329000 - }, - { - "epoch": 4.74, - "learning_rate": 6.685889946463529e-05, - "loss": 1.827, - "step": 329500 - }, - { - "epoch": 4.75, - "learning_rate": 6.680718042984957e-05, - "loss": 1.8263, - "step": 330000 - }, - { - "epoch": 4.76, - "learning_rate": 6.675525368809684e-05, - "loss": 1.8223, - "step": 330500 - }, - { - "epoch": 4.76, - "learning_rate": 6.670353465331111e-05, - "loss": 1.8231, - "step": 331000 - }, - { - "epoch": 4.77, - "learning_rate": 6.665160791155838e-05, - "loss": 1.8201, - "step": 331500 - }, - { - "epoch": 4.78, - "learning_rate": 6.659988887677266e-05, - "loss": 1.826, - "step": 332000 - }, - { - "epoch": 4.79, - "learning_rate": 6.654796213501991e-05, - "loss": 1.8219, - "step": 332500 - }, - { - "epoch": 4.79, - "learning_rate": 6.649624310023419e-05, - "loss": 1.8242, - "step": 333000 - }, - { - "epoch": 4.8, - "learning_rate": 6.644431635848146e-05, - "loss": 1.8225, - "step": 333500 - }, - { - "epoch": 4.81, - "learning_rate": 6.639259732369574e-05, - "loss": 1.8243, - "step": 334000 - }, - { - "epoch": 4.81, - "learning_rate": 6.634067058194299e-05, - "loss": 1.8222, - "step": 334500 - }, - { - "epoch": 4.82, - "learning_rate": 6.628895154715727e-05, - "loss": 1.8258, - "step": 335000 - }, - { - "epoch": 4.83, - "learning_rate": 6.623702480540454e-05, - "loss": 1.8233, - "step": 335500 - }, - { - "epoch": 4.84, - "learning_rate": 6.618530577061882e-05, - "loss": 1.8246, - "step": 336000 - }, - { - "epoch": 4.84, - "learning_rate": 6.613337902886609e-05, - "loss": 1.8245, - "step": 336500 - }, - { - "epoch": 4.85, - "learning_rate": 6.608165999408036e-05, - "loss": 1.8255, - "step": 337000 - }, - { - "epoch": 4.86, - "learning_rate": 6.602973325232762e-05, - "loss": 1.8231, - "step": 337500 - }, - { - "epoch": 4.87, - "learning_rate": 6.59780142175419e-05, - "loss": 1.8195, - "step": 338000 - }, - { - "epoch": 4.87, - "learning_rate": 6.592608747578915e-05, - "loss": 1.8231, - "step": 338500 - }, - { - "epoch": 4.88, - "learning_rate": 6.587436844100343e-05, - "loss": 1.8218, - "step": 339000 - }, - { - "epoch": 4.89, - "learning_rate": 6.582244169925071e-05, - "loss": 1.8182, - "step": 339500 - }, - { - "epoch": 4.89, - "learning_rate": 6.577072266446497e-05, - "loss": 1.8183, - "step": 340000 - }, - { - "epoch": 4.9, - "learning_rate": 6.571879592271224e-05, - "loss": 1.8206, - "step": 340500 - }, - { - "epoch": 4.91, - "learning_rate": 6.566707688792652e-05, - "loss": 1.82, - "step": 341000 - }, - { - "epoch": 4.92, - "learning_rate": 6.561515014617377e-05, - "loss": 1.8225, - "step": 341500 - }, - { - "epoch": 4.92, - "learning_rate": 6.556343111138805e-05, - "loss": 1.8218, - "step": 342000 - }, - { - "epoch": 4.93, - "learning_rate": 6.551150436963532e-05, - "loss": 1.815, - "step": 342500 - }, - { - "epoch": 4.94, - "learning_rate": 6.54597853348496e-05, - "loss": 1.82, - "step": 343000 - }, - { - "epoch": 4.94, - "learning_rate": 6.540785859309687e-05, - "loss": 1.8212, - "step": 343500 - }, - { - "epoch": 4.95, - "learning_rate": 6.535613955831113e-05, - "loss": 1.8202, - "step": 344000 - }, - { - "epoch": 4.96, - "learning_rate": 6.530421281655841e-05, - "loss": 1.8165, - "step": 344500 - }, - { - "epoch": 4.97, - "learning_rate": 6.525249378177268e-05, - "loss": 1.8195, - "step": 345000 - }, - { - "epoch": 4.97, - "learning_rate": 6.520056704001995e-05, - "loss": 1.8137, - "step": 345500 - }, - { - "epoch": 4.98, - "learning_rate": 6.514884800523422e-05, - "loss": 1.8152, - "step": 346000 - }, - { - "epoch": 4.99, - "learning_rate": 6.509692126348148e-05, - "loss": 1.8201, - "step": 346500 - }, - { - "epoch": 4.99, - "learning_rate": 6.504520222869576e-05, - "loss": 1.8188, - "step": 347000 - }, - { - "epoch": 5.0, - "eval_accuracy": 0.6490117128316449, - "eval_loss": 1.6923828125, - "eval_runtime": 653.4402, - "eval_samples_per_second": 824.769, - "eval_steps_per_second": 34.366, - "step": 347365 - }, - { - "epoch": 5.0, - "learning_rate": 6.499327548694302e-05, - "loss": 1.8205, - "step": 347500 - }, - { - "epoch": 5.01, - "learning_rate": 6.49415564521573e-05, - "loss": 1.8156, - "step": 348000 - }, - { - "epoch": 5.02, - "learning_rate": 6.488962971040457e-05, - "loss": 1.8186, - "step": 348500 - }, - { - "epoch": 5.02, - "learning_rate": 6.483791067561883e-05, - "loss": 1.809, - "step": 349000 - }, - { - "epoch": 5.03, - "learning_rate": 6.47859839338661e-05, - "loss": 1.8136, - "step": 349500 - }, - { - "epoch": 5.04, - "learning_rate": 6.473426489908038e-05, - "loss": 1.8182, - "step": 350000 - }, - { - "epoch": 5.05, - "learning_rate": 6.468233815732765e-05, - "loss": 1.8141, - "step": 350500 - }, - { - "epoch": 5.05, - "learning_rate": 6.463061912254193e-05, - "loss": 1.8174, - "step": 351000 - }, - { - "epoch": 5.06, - "learning_rate": 6.457869238078918e-05, - "loss": 1.8149, - "step": 351500 - }, - { - "epoch": 5.07, - "learning_rate": 6.452697334600346e-05, - "loss": 1.8188, - "step": 352000 - }, - { - "epoch": 5.07, - "learning_rate": 6.447504660425073e-05, - "loss": 1.8127, - "step": 352500 - }, - { - "epoch": 5.08, - "learning_rate": 6.442332756946499e-05, - "loss": 1.8144, - "step": 353000 - }, - { - "epoch": 5.09, - "learning_rate": 6.437140082771227e-05, - "loss": 1.8183, - "step": 353500 - }, - { - "epoch": 5.1, - "learning_rate": 6.431968179292655e-05, - "loss": 1.8149, - "step": 354000 - }, - { - "epoch": 5.1, - "learning_rate": 6.42677550511738e-05, - "loss": 1.8138, - "step": 354500 - }, - { - "epoch": 5.11, - "learning_rate": 6.421603601638808e-05, - "loss": 1.8104, - "step": 355000 - }, - { - "epoch": 5.12, - "learning_rate": 6.416410927463534e-05, - "loss": 1.814, - "step": 355500 - }, - { - "epoch": 5.12, - "learning_rate": 6.411239023984962e-05, - "loss": 1.811, - "step": 356000 - }, - { - "epoch": 5.13, - "learning_rate": 6.406046349809688e-05, - "loss": 1.8139, - "step": 356500 - }, - { - "epoch": 5.14, - "learning_rate": 6.400874446331116e-05, - "loss": 1.8135, - "step": 357000 - }, - { - "epoch": 5.15, - "learning_rate": 6.395681772155843e-05, - "loss": 1.8111, - "step": 357500 - }, - { - "epoch": 5.15, - "learning_rate": 6.390509868677271e-05, - "loss": 1.8143, - "step": 358000 - }, - { - "epoch": 5.16, - "learning_rate": 6.385317194501998e-05, - "loss": 1.8134, - "step": 358500 - }, - { - "epoch": 5.17, - "learning_rate": 6.380145291023425e-05, - "loss": 1.8142, - "step": 359000 - }, - { - "epoch": 5.17, - "learning_rate": 6.374952616848151e-05, - "loss": 1.8122, - "step": 359500 - }, - { - "epoch": 5.18, - "learning_rate": 6.369780713369579e-05, - "loss": 1.8108, - "step": 360000 - }, - { - "epoch": 5.19, - "learning_rate": 6.364588039194304e-05, - "loss": 1.8123, - "step": 360500 - }, - { - "epoch": 5.2, - "learning_rate": 6.359416135715732e-05, - "loss": 1.8136, - "step": 361000 - }, - { - "epoch": 5.2, - "learning_rate": 6.35422346154046e-05, - "loss": 1.8121, - "step": 361500 - }, - { - "epoch": 5.21, - "learning_rate": 6.349051558061887e-05, - "loss": 1.811, - "step": 362000 - }, - { - "epoch": 5.22, - "learning_rate": 6.343858883886613e-05, - "loss": 1.8123, - "step": 362500 - }, - { - "epoch": 5.23, - "learning_rate": 6.338686980408041e-05, - "loss": 1.8083, - "step": 363000 - }, - { - "epoch": 5.23, - "learning_rate": 6.333494306232767e-05, - "loss": 1.8145, - "step": 363500 - }, - { - "epoch": 5.24, - "learning_rate": 6.328322402754194e-05, - "loss": 1.8137, - "step": 364000 - }, - { - "epoch": 5.25, - "learning_rate": 6.323129728578921e-05, - "loss": 1.8127, - "step": 364500 - }, - { - "epoch": 5.25, - "learning_rate": 6.317957825100349e-05, - "loss": 1.8109, - "step": 365000 - }, - { - "epoch": 5.26, - "learning_rate": 6.312765150925076e-05, - "loss": 1.8084, - "step": 365500 - }, - { - "epoch": 5.27, - "learning_rate": 6.307593247446502e-05, - "loss": 1.8125, - "step": 366000 - }, - { - "epoch": 5.28, - "learning_rate": 6.302400573271229e-05, - "loss": 1.8099, - "step": 366500 - }, - { - "epoch": 5.28, - "learning_rate": 6.297228669792657e-05, - "loss": 1.8108, - "step": 367000 - }, - { - "epoch": 5.29, - "learning_rate": 6.292035995617384e-05, - "loss": 1.8158, - "step": 367500 - }, - { - "epoch": 5.3, - "learning_rate": 6.286864092138811e-05, - "loss": 1.8079, - "step": 368000 - }, - { - "epoch": 5.3, - "learning_rate": 6.281671417963537e-05, - "loss": 1.8104, - "step": 368500 - }, - { - "epoch": 5.31, - "learning_rate": 6.276499514484965e-05, - "loss": 1.8068, - "step": 369000 - }, - { - "epoch": 5.32, - "learning_rate": 6.271306840309691e-05, - "loss": 1.8106, - "step": 369500 - }, - { - "epoch": 5.33, - "learning_rate": 6.266134936831118e-05, - "loss": 1.8083, - "step": 370000 - }, - { - "epoch": 5.33, - "learning_rate": 6.260942262655846e-05, - "loss": 1.8108, - "step": 370500 - }, - { - "epoch": 5.34, - "learning_rate": 6.255770359177273e-05, - "loss": 1.8119, - "step": 371000 - }, - { - "epoch": 5.35, - "learning_rate": 6.250577685002e-05, - "loss": 1.8102, - "step": 371500 - }, - { - "epoch": 5.35, - "learning_rate": 6.245405781523427e-05, - "loss": 1.8077, - "step": 372000 - }, - { - "epoch": 5.36, - "learning_rate": 6.240213107348154e-05, - "loss": 1.8087, - "step": 372500 - }, - { - "epoch": 5.37, - "learning_rate": 6.235041203869582e-05, - "loss": 1.8107, - "step": 373000 - }, - { - "epoch": 5.38, - "learning_rate": 6.229848529694307e-05, - "loss": 1.8095, - "step": 373500 - }, - { - "epoch": 5.38, - "learning_rate": 6.224676626215735e-05, - "loss": 1.8033, - "step": 374000 - }, - { - "epoch": 5.39, - "learning_rate": 6.219483952040462e-05, - "loss": 1.8088, - "step": 374500 - }, - { - "epoch": 5.4, - "learning_rate": 6.214312048561888e-05, - "loss": 1.8079, - "step": 375000 - }, - { - "epoch": 5.4, - "learning_rate": 6.209119374386616e-05, - "loss": 1.8119, - "step": 375500 - }, - { - "epoch": 5.41, - "learning_rate": 6.203947470908044e-05, - "loss": 1.8074, - "step": 376000 - }, - { - "epoch": 5.42, - "learning_rate": 6.19875479673277e-05, - "loss": 1.8048, - "step": 376500 - }, - { - "epoch": 5.43, - "learning_rate": 6.193582893254197e-05, - "loss": 1.8044, - "step": 377000 - }, - { - "epoch": 5.43, - "learning_rate": 6.188390219078923e-05, - "loss": 1.8127, - "step": 377500 - }, - { - "epoch": 5.44, - "learning_rate": 6.183218315600351e-05, - "loss": 1.8062, - "step": 378000 - }, - { - "epoch": 5.45, - "learning_rate": 6.178025641425078e-05, - "loss": 1.8083, - "step": 378500 - }, - { - "epoch": 5.46, - "learning_rate": 6.172853737946505e-05, - "loss": 1.8032, - "step": 379000 - }, - { - "epoch": 5.46, - "learning_rate": 6.167661063771232e-05, - "loss": 1.8118, - "step": 379500 - }, - { - "epoch": 5.47, - "learning_rate": 6.16248916029266e-05, - "loss": 1.8037, - "step": 380000 - }, - { - "epoch": 5.48, - "learning_rate": 6.157296486117385e-05, - "loss": 1.8066, - "step": 380500 - }, - { - "epoch": 5.48, - "learning_rate": 6.152124582638813e-05, - "loss": 1.8047, - "step": 381000 - }, - { - "epoch": 5.49, - "learning_rate": 6.14693190846354e-05, - "loss": 1.8061, - "step": 381500 - }, - { - "epoch": 5.5, - "learning_rate": 6.141760004984968e-05, - "loss": 1.8078, - "step": 382000 - }, - { - "epoch": 5.51, - "learning_rate": 6.136567330809693e-05, - "loss": 1.8105, - "step": 382500 - }, - { - "epoch": 5.51, - "learning_rate": 6.131395427331121e-05, - "loss": 1.801, - "step": 383000 - }, - { - "epoch": 5.52, - "learning_rate": 6.126202753155849e-05, - "loss": 1.8049, - "step": 383500 - }, - { - "epoch": 5.53, - "learning_rate": 6.121030849677276e-05, - "loss": 1.8085, - "step": 384000 - }, - { - "epoch": 5.53, - "learning_rate": 6.115838175502002e-05, - "loss": 1.8047, - "step": 384500 - }, - { - "epoch": 5.54, - "learning_rate": 6.11066627202343e-05, - "loss": 1.8083, - "step": 385000 - }, - { - "epoch": 5.55, - "learning_rate": 6.105473597848156e-05, - "loss": 1.8053, - "step": 385500 - }, - { - "epoch": 5.56, - "learning_rate": 6.1003016943695835e-05, - "loss": 1.8079, - "step": 386000 - }, - { - "epoch": 5.56, - "learning_rate": 6.095109020194311e-05, - "loss": 1.8046, - "step": 386500 - }, - { - "epoch": 5.57, - "learning_rate": 6.089937116715738e-05, - "loss": 1.8037, - "step": 387000 - }, - { - "epoch": 5.58, - "learning_rate": 6.084744442540464e-05, - "loss": 1.8011, - "step": 387500 - }, - { - "epoch": 5.58, - "learning_rate": 6.079572539061892e-05, - "loss": 1.8042, - "step": 388000 - }, - { - "epoch": 5.59, - "learning_rate": 6.074379864886618e-05, - "loss": 1.8056, - "step": 388500 - }, - { - "epoch": 5.6, - "learning_rate": 6.069207961408045e-05, - "loss": 1.803, - "step": 389000 - }, - { - "epoch": 5.61, - "learning_rate": 6.064015287232773e-05, - "loss": 1.7992, - "step": 389500 - }, - { - "epoch": 5.61, - "learning_rate": 6.0588433837542005e-05, - "loss": 1.8068, - "step": 390000 - }, - { - "epoch": 5.62, - "learning_rate": 6.053650709578926e-05, - "loss": 1.8008, - "step": 390500 - }, - { - "epoch": 5.63, - "learning_rate": 6.048478806100354e-05, - "loss": 1.8023, - "step": 391000 - }, - { - "epoch": 5.64, - "learning_rate": 6.04328613192508e-05, - "loss": 1.8037, - "step": 391500 - }, - { - "epoch": 5.64, - "learning_rate": 6.038114228446508e-05, - "loss": 1.8034, - "step": 392000 - }, - { - "epoch": 5.65, - "learning_rate": 6.0329215542712345e-05, - "loss": 1.7997, - "step": 392500 - }, - { - "epoch": 5.66, - "learning_rate": 6.027749650792662e-05, - "loss": 1.8028, - "step": 393000 - }, - { - "epoch": 5.66, - "learning_rate": 6.0225569766173884e-05, - "loss": 1.7945, - "step": 393500 - }, - { - "epoch": 5.67, - "learning_rate": 6.017385073138816e-05, - "loss": 1.8, - "step": 394000 - }, - { - "epoch": 5.68, - "learning_rate": 6.012192398963542e-05, - "loss": 1.7997, - "step": 394500 - }, - { - "epoch": 5.69, - "learning_rate": 6.0070204954849695e-05, - "loss": 1.8051, - "step": 395000 - }, - { - "epoch": 5.69, - "learning_rate": 6.001827821309697e-05, - "loss": 1.8016, - "step": 395500 - }, - { - "epoch": 5.7, - "learning_rate": 5.996655917831124e-05, - "loss": 1.8013, - "step": 396000 - }, - { - "epoch": 5.71, - "learning_rate": 5.99146324365585e-05, - "loss": 1.7962, - "step": 396500 - }, - { - "epoch": 5.71, - "learning_rate": 5.986291340177278e-05, - "loss": 1.802, - "step": 397000 - }, - { - "epoch": 5.72, - "learning_rate": 5.9810986660020055e-05, - "loss": 1.8034, - "step": 397500 - }, - { - "epoch": 5.73, - "learning_rate": 5.9759267625234326e-05, - "loss": 1.7997, - "step": 398000 - }, - { - "epoch": 5.74, - "learning_rate": 5.970734088348159e-05, - "loss": 1.8004, - "step": 398500 - }, - { - "epoch": 5.74, - "learning_rate": 5.9655621848695865e-05, - "loss": 1.7989, - "step": 399000 - }, - { - "epoch": 5.75, - "learning_rate": 5.960369510694313e-05, - "loss": 1.798, - "step": 399500 - }, - { - "epoch": 5.76, - "learning_rate": 5.95519760721574e-05, - "loss": 1.8007, - "step": 400000 - }, - { - "epoch": 5.76, - "learning_rate": 5.950004933040467e-05, - "loss": 1.803, - "step": 400500 - }, - { - "epoch": 5.77, - "learning_rate": 5.944833029561895e-05, - "loss": 1.8009, - "step": 401000 - }, - { - "epoch": 5.78, - "learning_rate": 5.939640355386621e-05, - "loss": 1.8006, - "step": 401500 - }, - { - "epoch": 5.79, - "learning_rate": 5.934468451908048e-05, - "loss": 1.7991, - "step": 402000 - }, - { - "epoch": 5.79, - "learning_rate": 5.9292757777327745e-05, - "loss": 1.8012, - "step": 402500 - }, - { - "epoch": 5.8, - "learning_rate": 5.924103874254202e-05, - "loss": 1.7991, - "step": 403000 - }, - { - "epoch": 5.81, - "learning_rate": 5.918911200078929e-05, - "loss": 1.7986, - "step": 403500 - }, - { - "epoch": 5.82, - "learning_rate": 5.913739296600357e-05, - "loss": 1.7957, - "step": 404000 - }, - { - "epoch": 5.82, - "learning_rate": 5.908546622425083e-05, - "loss": 1.8021, - "step": 404500 - }, - { - "epoch": 5.83, - "learning_rate": 5.903374718946511e-05, - "loss": 1.7995, - "step": 405000 - }, - { - "epoch": 5.84, - "learning_rate": 5.898182044771237e-05, - "loss": 1.7988, - "step": 405500 - }, - { - "epoch": 5.84, - "learning_rate": 5.893010141292664e-05, - "loss": 1.7979, - "step": 406000 - }, - { - "epoch": 5.85, - "learning_rate": 5.8878174671173915e-05, - "loss": 1.8014, - "step": 406500 - }, - { - "epoch": 5.86, - "learning_rate": 5.8826455636388186e-05, - "loss": 1.7983, - "step": 407000 - }, - { - "epoch": 5.87, - "learning_rate": 5.877452889463545e-05, - "loss": 1.7942, - "step": 407500 - }, - { - "epoch": 5.87, - "learning_rate": 5.8722809859849726e-05, - "loss": 1.8022, - "step": 408000 - }, - { - "epoch": 5.88, - "learning_rate": 5.867088311809699e-05, - "loss": 1.8001, - "step": 408500 - }, - { - "epoch": 5.89, - "learning_rate": 5.8619164083311265e-05, - "loss": 1.7983, - "step": 409000 - }, - { - "epoch": 5.89, - "learning_rate": 5.856723734155853e-05, - "loss": 1.7952, - "step": 409500 - }, - { - "epoch": 5.9, - "learning_rate": 5.851551830677281e-05, - "loss": 1.7954, - "step": 410000 - }, - { - "epoch": 5.91, - "learning_rate": 5.846359156502007e-05, - "loss": 1.7962, - "step": 410500 - }, - { - "epoch": 5.92, - "learning_rate": 5.841187253023434e-05, - "loss": 1.7982, - "step": 411000 - }, - { - "epoch": 5.92, - "learning_rate": 5.835994578848162e-05, - "loss": 1.7967, - "step": 411500 - }, - { - "epoch": 5.93, - "learning_rate": 5.8308226753695896e-05, - "loss": 1.7942, - "step": 412000 - }, - { - "epoch": 5.94, - "learning_rate": 5.825630001194316e-05, - "loss": 1.7974, - "step": 412500 - }, - { - "epoch": 5.94, - "learning_rate": 5.820458097715743e-05, - "loss": 1.7998, - "step": 413000 - }, - { - "epoch": 5.95, - "learning_rate": 5.815265423540469e-05, - "loss": 1.8, - "step": 413500 - }, - { - "epoch": 5.96, - "learning_rate": 5.810093520061897e-05, - "loss": 1.7984, - "step": 414000 - }, - { - "epoch": 5.97, - "learning_rate": 5.8049008458866236e-05, - "loss": 1.7952, - "step": 414500 - }, - { - "epoch": 5.97, - "learning_rate": 5.7997289424080514e-05, - "loss": 1.7919, - "step": 415000 - }, - { - "epoch": 5.98, - "learning_rate": 5.7945362682327775e-05, - "loss": 1.8007, - "step": 415500 - }, - { - "epoch": 5.99, - "learning_rate": 5.789364364754205e-05, - "loss": 1.7937, - "step": 416000 - }, - { - "epoch": 6.0, - "learning_rate": 5.7841716905789315e-05, - "loss": 1.7983, - "step": 416500 - }, - { - "epoch": 6.0, - "eval_accuracy": 0.653541434859015, - "eval_loss": 1.6650390625, - "eval_runtime": 654.5009, - "eval_samples_per_second": 823.432, - "eval_steps_per_second": 34.31, - "step": 416838 - }, - { - "epoch": 6.0, - "learning_rate": 5.7789997871003586e-05, - "loss": 1.7937, - "step": 417000 - }, - { - "epoch": 6.01, - "learning_rate": 5.773807112925086e-05, - "loss": 1.7956, - "step": 417500 - }, - { - "epoch": 6.02, - "learning_rate": 5.768635209446513e-05, - "loss": 1.7956, - "step": 418000 - }, - { - "epoch": 6.02, - "learning_rate": 5.763442535271239e-05, - "loss": 1.7945, - "step": 418500 - }, - { - "epoch": 6.03, - "learning_rate": 5.758270631792667e-05, - "loss": 1.7949, - "step": 419000 - }, - { - "epoch": 6.04, - "learning_rate": 5.753077957617393e-05, - "loss": 1.7931, - "step": 419500 - }, - { - "epoch": 6.05, - "learning_rate": 5.747906054138821e-05, - "loss": 1.7941, - "step": 420000 - }, - { - "epoch": 6.05, - "learning_rate": 5.742713379963548e-05, - "loss": 1.7906, - "step": 420500 - }, - { - "epoch": 6.06, - "learning_rate": 5.7375414764849756e-05, - "loss": 1.7891, - "step": 421000 - }, - { - "epoch": 6.07, - "learning_rate": 5.732348802309702e-05, - "loss": 1.7921, - "step": 421500 - }, - { - "epoch": 6.07, - "learning_rate": 5.727176898831129e-05, - "loss": 1.7909, - "step": 422000 - }, - { - "epoch": 6.08, - "learning_rate": 5.721984224655855e-05, - "loss": 1.7931, - "step": 422500 - }, - { - "epoch": 6.09, - "learning_rate": 5.716812321177283e-05, - "loss": 1.7914, - "step": 423000 - }, - { - "epoch": 6.1, - "learning_rate": 5.71161964700201e-05, - "loss": 1.7881, - "step": 423500 - }, - { - "epoch": 6.1, - "learning_rate": 5.7064477435234374e-05, - "loss": 1.7966, - "step": 424000 - }, - { - "epoch": 6.11, - "learning_rate": 5.7012550693481636e-05, - "loss": 1.7895, - "step": 424500 - }, - { - "epoch": 6.12, - "learning_rate": 5.6960831658695913e-05, - "loss": 1.7907, - "step": 425000 - }, - { - "epoch": 6.12, - "learning_rate": 5.690890491694318e-05, - "loss": 1.7931, - "step": 425500 - }, - { - "epoch": 6.13, - "learning_rate": 5.685718588215746e-05, - "loss": 1.795, - "step": 426000 - }, - { - "epoch": 6.14, - "learning_rate": 5.680525914040472e-05, - "loss": 1.7916, - "step": 426500 - }, - { - "epoch": 6.15, - "learning_rate": 5.6753540105619e-05, - "loss": 1.7892, - "step": 427000 - }, - { - "epoch": 6.15, - "learning_rate": 5.670161336386626e-05, - "loss": 1.7971, - "step": 427500 - }, - { - "epoch": 6.16, - "learning_rate": 5.664989432908053e-05, - "loss": 1.787, - "step": 428000 - }, - { - "epoch": 6.17, - "learning_rate": 5.6597967587327806e-05, - "loss": 1.7915, - "step": 428500 - }, - { - "epoch": 6.18, - "learning_rate": 5.654624855254208e-05, - "loss": 1.7912, - "step": 429000 - }, - { - "epoch": 6.18, - "learning_rate": 5.649432181078934e-05, - "loss": 1.7941, - "step": 429500 - }, - { - "epoch": 6.19, - "learning_rate": 5.6442602776003617e-05, - "loss": 1.7921, - "step": 430000 - }, - { - "epoch": 6.2, - "learning_rate": 5.639067603425088e-05, - "loss": 1.7908, - "step": 430500 - }, - { - "epoch": 6.2, - "learning_rate": 5.6338956999465156e-05, - "loss": 1.7863, - "step": 431000 - }, - { - "epoch": 6.21, - "learning_rate": 5.6287030257712424e-05, - "loss": 1.791, - "step": 431500 - }, - { - "epoch": 6.22, - "learning_rate": 5.62353112229267e-05, - "loss": 1.7888, - "step": 432000 - }, - { - "epoch": 6.23, - "learning_rate": 5.618338448117396e-05, - "loss": 1.7913, - "step": 432500 - }, - { - "epoch": 6.23, - "learning_rate": 5.6131665446388234e-05, - "loss": 1.7945, - "step": 433000 - }, - { - "epoch": 6.24, - "learning_rate": 5.6079738704635496e-05, - "loss": 1.7891, - "step": 433500 - }, - { - "epoch": 6.25, - "learning_rate": 5.6028019669849774e-05, - "loss": 1.7894, - "step": 434000 - }, - { - "epoch": 6.25, - "learning_rate": 5.597609292809705e-05, - "loss": 1.7893, - "step": 434500 - }, - { - "epoch": 6.26, - "learning_rate": 5.592437389331132e-05, - "loss": 1.787, - "step": 435000 - }, - { - "epoch": 6.27, - "learning_rate": 5.587244715155858e-05, - "loss": 1.7925, - "step": 435500 - }, - { - "epoch": 6.28, - "learning_rate": 5.582072811677286e-05, - "loss": 1.7886, - "step": 436000 - }, - { - "epoch": 6.28, - "learning_rate": 5.576880137502012e-05, - "loss": 1.7886, - "step": 436500 - }, - { - "epoch": 6.29, - "learning_rate": 5.571708234023439e-05, - "loss": 1.7894, - "step": 437000 - }, - { - "epoch": 6.3, - "learning_rate": 5.5665155598481666e-05, - "loss": 1.791, - "step": 437500 - }, - { - "epoch": 6.3, - "learning_rate": 5.5613436563695944e-05, - "loss": 1.7893, - "step": 438000 - }, - { - "epoch": 6.31, - "learning_rate": 5.5561509821943206e-05, - "loss": 1.7938, - "step": 438500 - }, - { - "epoch": 6.32, - "learning_rate": 5.550979078715748e-05, - "loss": 1.7898, - "step": 439000 - }, - { - "epoch": 6.33, - "learning_rate": 5.545786404540475e-05, - "loss": 1.789, - "step": 439500 - }, - { - "epoch": 6.33, - "learning_rate": 5.540614501061902e-05, - "loss": 1.787, - "step": 440000 - }, - { - "epoch": 6.34, - "learning_rate": 5.5354218268866284e-05, - "loss": 1.7902, - "step": 440500 - }, - { - "epoch": 6.35, - "learning_rate": 5.530249923408056e-05, - "loss": 1.7909, - "step": 441000 - }, - { - "epoch": 6.35, - "learning_rate": 5.5250572492327824e-05, - "loss": 1.7877, - "step": 441500 - }, - { - "epoch": 6.36, - "learning_rate": 5.51988534575421e-05, - "loss": 1.7861, - "step": 442000 - }, - { - "epoch": 6.37, - "learning_rate": 5.514692671578937e-05, - "loss": 1.7923, - "step": 442500 - }, - { - "epoch": 6.38, - "learning_rate": 5.509520768100365e-05, - "loss": 1.7884, - "step": 443000 - }, - { - "epoch": 6.38, - "learning_rate": 5.504328093925091e-05, - "loss": 1.7857, - "step": 443500 - }, - { - "epoch": 6.39, - "learning_rate": 5.499156190446518e-05, - "loss": 1.7871, - "step": 444000 - }, - { - "epoch": 6.4, - "learning_rate": 5.493963516271244e-05, - "loss": 1.7854, - "step": 444500 - }, - { - "epoch": 6.41, - "learning_rate": 5.488791612792672e-05, - "loss": 1.7808, - "step": 445000 - }, - { - "epoch": 6.41, - "learning_rate": 5.4835989386173994e-05, - "loss": 1.7894, - "step": 445500 - }, - { - "epoch": 6.42, - "learning_rate": 5.4784270351388265e-05, - "loss": 1.787, - "step": 446000 - }, - { - "epoch": 6.43, - "learning_rate": 5.473234360963553e-05, - "loss": 1.785, - "step": 446500 - }, - { - "epoch": 6.43, - "learning_rate": 5.4680624574849804e-05, - "loss": 1.7849, - "step": 447000 - }, - { - "epoch": 6.44, - "learning_rate": 5.4628697833097066e-05, - "loss": 1.7826, - "step": 447500 - }, - { - "epoch": 6.45, - "learning_rate": 5.457697879831134e-05, - "loss": 1.7885, - "step": 448000 - }, - { - "epoch": 6.46, - "learning_rate": 5.452505205655861e-05, - "loss": 1.7873, - "step": 448500 - }, - { - "epoch": 6.46, - "learning_rate": 5.447333302177289e-05, - "loss": 1.7839, - "step": 449000 - }, - { - "epoch": 6.47, - "learning_rate": 5.442140628002015e-05, - "loss": 1.7839, - "step": 449500 - }, - { - "epoch": 6.48, - "learning_rate": 5.436968724523442e-05, - "loss": 1.785, - "step": 450000 - }, - { - "epoch": 6.48, - "learning_rate": 5.43177605034817e-05, - "loss": 1.7891, - "step": 450500 - }, - { - "epoch": 6.49, - "learning_rate": 5.4266041468695975e-05, - "loss": 1.7841, - "step": 451000 - }, - { - "epoch": 6.5, - "learning_rate": 5.421411472694323e-05, - "loss": 1.787, - "step": 451500 - }, - { - "epoch": 6.51, - "learning_rate": 5.416239569215751e-05, - "loss": 1.7834, - "step": 452000 - }, - { - "epoch": 6.51, - "learning_rate": 5.411046895040477e-05, - "loss": 1.7867, - "step": 452500 - }, - { - "epoch": 6.52, - "learning_rate": 5.405874991561905e-05, - "loss": 1.7861, - "step": 453000 - }, - { - "epoch": 6.53, - "learning_rate": 5.4006823173866315e-05, - "loss": 1.7801, - "step": 453500 - }, - { - "epoch": 6.53, - "learning_rate": 5.395510413908059e-05, - "loss": 1.7849, - "step": 454000 - }, - { - "epoch": 6.54, - "learning_rate": 5.3903177397327854e-05, - "loss": 1.786, - "step": 454500 - }, - { - "epoch": 6.55, - "learning_rate": 5.385145836254213e-05, - "loss": 1.7866, - "step": 455000 - }, - { - "epoch": 6.56, - "learning_rate": 5.379953162078939e-05, - "loss": 1.7842, - "step": 455500 - }, - { - "epoch": 6.56, - "learning_rate": 5.3747812586003665e-05, - "loss": 1.7848, - "step": 456000 - }, - { - "epoch": 6.57, - "learning_rate": 5.369588584425094e-05, - "loss": 1.7831, - "step": 456500 - }, - { - "epoch": 6.58, - "learning_rate": 5.364416680946521e-05, - "loss": 1.784, - "step": 457000 - }, - { - "epoch": 6.59, - "learning_rate": 5.359224006771247e-05, - "loss": 1.7814, - "step": 457500 - }, - { - "epoch": 6.59, - "learning_rate": 5.354052103292675e-05, - "loss": 1.7813, - "step": 458000 - }, - { - "epoch": 6.6, - "learning_rate": 5.348859429117401e-05, - "loss": 1.7835, - "step": 458500 - }, - { - "epoch": 6.61, - "learning_rate": 5.343687525638828e-05, - "loss": 1.7839, - "step": 459000 - }, - { - "epoch": 6.61, - "learning_rate": 5.338494851463556e-05, - "loss": 1.7875, - "step": 459500 - }, - { - "epoch": 6.62, - "learning_rate": 5.3333229479849835e-05, - "loss": 1.7841, - "step": 460000 - }, - { - "epoch": 6.63, - "learning_rate": 5.32813027380971e-05, - "loss": 1.7816, - "step": 460500 - }, - { - "epoch": 6.64, - "learning_rate": 5.322958370331137e-05, - "loss": 1.7807, - "step": 461000 - }, - { - "epoch": 6.64, - "learning_rate": 5.317765696155863e-05, - "loss": 1.7856, - "step": 461500 - }, - { - "epoch": 6.65, - "learning_rate": 5.312593792677291e-05, - "loss": 1.7811, - "step": 462000 - }, - { - "epoch": 6.66, - "learning_rate": 5.3074011185020175e-05, - "loss": 1.7778, - "step": 462500 - }, - { - "epoch": 6.66, - "learning_rate": 5.302229215023445e-05, - "loss": 1.7846, - "step": 463000 - }, - { - "epoch": 6.67, - "learning_rate": 5.2970365408481715e-05, - "loss": 1.7823, - "step": 463500 - }, - { - "epoch": 6.68, - "learning_rate": 5.291864637369599e-05, - "loss": 1.7802, - "step": 464000 - }, - { - "epoch": 6.69, - "learning_rate": 5.286671963194326e-05, - "loss": 1.783, - "step": 464500 - }, - { - "epoch": 6.69, - "learning_rate": 5.281500059715754e-05, - "loss": 1.7805, - "step": 465000 - }, - { - "epoch": 6.7, - "learning_rate": 5.27630738554048e-05, - "loss": 1.779, - "step": 465500 - }, - { - "epoch": 6.71, - "learning_rate": 5.271135482061908e-05, - "loss": 1.7814, - "step": 466000 - }, - { - "epoch": 6.71, - "learning_rate": 5.265942807886633e-05, - "loss": 1.7842, - "step": 466500 - }, - { - "epoch": 6.72, - "learning_rate": 5.260770904408061e-05, - "loss": 1.7831, - "step": 467000 - }, - { - "epoch": 6.73, - "learning_rate": 5.2555782302327885e-05, - "loss": 1.7806, - "step": 467500 - }, - { - "epoch": 6.74, - "learning_rate": 5.2504063267542156e-05, - "loss": 1.7771, - "step": 468000 - }, - { - "epoch": 6.74, - "learning_rate": 5.245213652578942e-05, - "loss": 1.782, - "step": 468500 - }, - { - "epoch": 6.75, - "learning_rate": 5.2400417491003695e-05, - "loss": 1.7842, - "step": 469000 - }, - { - "epoch": 6.76, - "learning_rate": 5.234849074925096e-05, - "loss": 1.7814, - "step": 469500 - }, - { - "epoch": 6.77, - "learning_rate": 5.2296771714465235e-05, - "loss": 1.781, - "step": 470000 - }, - { - "epoch": 6.77, - "learning_rate": 5.22448449727125e-05, - "loss": 1.7829, - "step": 470500 - }, - { - "epoch": 6.78, - "learning_rate": 5.219312593792678e-05, - "loss": 1.7795, - "step": 471000 - }, - { - "epoch": 6.79, - "learning_rate": 5.214119919617404e-05, - "loss": 1.7825, - "step": 471500 - }, - { - "epoch": 6.79, - "learning_rate": 5.208948016138831e-05, - "loss": 1.7825, - "step": 472000 - }, - { - "epoch": 6.8, - "learning_rate": 5.2037553419635575e-05, - "loss": 1.7795, - "step": 472500 - }, - { - "epoch": 6.81, - "learning_rate": 5.198583438484985e-05, - "loss": 1.7813, - "step": 473000 - }, - { - "epoch": 6.82, - "learning_rate": 5.193390764309712e-05, - "loss": 1.78, - "step": 473500 - }, - { - "epoch": 6.82, - "learning_rate": 5.18821886083114e-05, - "loss": 1.7778, - "step": 474000 - }, - { - "epoch": 6.83, - "learning_rate": 5.183026186655866e-05, - "loss": 1.7773, - "step": 474500 - }, - { - "epoch": 6.84, - "learning_rate": 5.177854283177294e-05, - "loss": 1.7746, - "step": 475000 - }, - { - "epoch": 6.84, - "learning_rate": 5.17266160900202e-05, - "loss": 1.7806, - "step": 475500 - }, - { - "epoch": 6.85, - "learning_rate": 5.167489705523447e-05, - "loss": 1.7832, - "step": 476000 - }, - { - "epoch": 6.86, - "learning_rate": 5.1622970313481745e-05, - "loss": 1.7784, - "step": 476500 - }, - { - "epoch": 6.87, - "learning_rate": 5.157125127869602e-05, - "loss": 1.7803, - "step": 477000 - }, - { - "epoch": 6.87, - "learning_rate": 5.151932453694328e-05, - "loss": 1.7763, - "step": 477500 - }, - { - "epoch": 6.88, - "learning_rate": 5.1467605502157556e-05, - "loss": 1.7796, - "step": 478000 - }, - { - "epoch": 6.89, - "learning_rate": 5.141567876040483e-05, - "loss": 1.7793, - "step": 478500 - }, - { - "epoch": 6.89, - "learning_rate": 5.13639597256191e-05, - "loss": 1.7778, - "step": 479000 - }, - { - "epoch": 6.9, - "learning_rate": 5.131203298386636e-05, - "loss": 1.7813, - "step": 479500 - }, - { - "epoch": 6.91, - "learning_rate": 5.126031394908064e-05, - "loss": 1.7826, - "step": 480000 - }, - { - "epoch": 6.92, - "learning_rate": 5.12083872073279e-05, - "loss": 1.7755, - "step": 480500 - }, - { - "epoch": 6.92, - "learning_rate": 5.115666817254218e-05, - "loss": 1.7787, - "step": 481000 - }, - { - "epoch": 6.93, - "learning_rate": 5.110474143078945e-05, - "loss": 1.7764, - "step": 481500 - }, - { - "epoch": 6.94, - "learning_rate": 5.1053022396003726e-05, - "loss": 1.7785, - "step": 482000 - }, - { - "epoch": 6.95, - "learning_rate": 5.100109565425099e-05, - "loss": 1.7748, - "step": 482500 - }, - { - "epoch": 6.95, - "learning_rate": 5.094937661946526e-05, - "loss": 1.7775, - "step": 483000 - }, - { - "epoch": 6.96, - "learning_rate": 5.089744987771252e-05, - "loss": 1.7784, - "step": 483500 - }, - { - "epoch": 6.97, - "learning_rate": 5.08457308429268e-05, - "loss": 1.7801, - "step": 484000 - }, - { - "epoch": 6.97, - "learning_rate": 5.0793804101174066e-05, - "loss": 1.7745, - "step": 484500 - }, - { - "epoch": 6.98, - "learning_rate": 5.0742085066388344e-05, - "loss": 1.7795, - "step": 485000 - }, - { - "epoch": 6.99, - "learning_rate": 5.0690158324635606e-05, - "loss": 1.7768, - "step": 485500 - }, - { - "epoch": 7.0, - "learning_rate": 5.063843928984988e-05, - "loss": 1.7788, - "step": 486000 - }, - { - "epoch": 7.0, - "eval_accuracy": 0.6557946838285342, - "eval_loss": 1.6484375, - "eval_runtime": 654.0163, - "eval_samples_per_second": 824.042, - "eval_steps_per_second": 34.336, - "step": 486311 - }, - { - "epoch": 7.0, - "learning_rate": 5.0586512548097145e-05, - "loss": 1.779, - "step": 486500 - }, - { - "epoch": 7.01, - "learning_rate": 5.0534793513311416e-05, - "loss": 1.7751, - "step": 487000 - }, - { - "epoch": 7.02, - "learning_rate": 5.048286677155869e-05, - "loss": 1.7737, - "step": 487500 - }, - { - "epoch": 7.02, - "learning_rate": 5.043114773677297e-05, - "loss": 1.7722, - "step": 488000 - }, - { - "epoch": 7.03, - "learning_rate": 5.037922099502022e-05, - "loss": 1.7749, - "step": 488500 - }, - { - "epoch": 7.04, - "learning_rate": 5.03275019602345e-05, - "loss": 1.7724, - "step": 489000 - }, - { - "epoch": 7.05, - "learning_rate": 5.027557521848176e-05, - "loss": 1.7736, - "step": 489500 - }, - { - "epoch": 7.05, - "learning_rate": 5.022385618369604e-05, - "loss": 1.7769, - "step": 490000 - }, - { - "epoch": 7.06, - "learning_rate": 5.017192944194331e-05, - "loss": 1.7723, - "step": 490500 - }, - { - "epoch": 7.07, - "learning_rate": 5.0120210407157586e-05, - "loss": 1.7733, - "step": 491000 - }, - { - "epoch": 7.07, - "learning_rate": 5.006828366540485e-05, - "loss": 1.769, - "step": 491500 - }, - { - "epoch": 7.08, - "learning_rate": 5.0016564630619126e-05, - "loss": 1.776, - "step": 492000 - }, - { - "epoch": 7.09, - "learning_rate": 4.996463788886639e-05, - "loss": 1.7717, - "step": 492500 - }, - { - "epoch": 7.1, - "learning_rate": 4.9912918854080665e-05, - "loss": 1.776, - "step": 493000 - }, - { - "epoch": 7.1, - "learning_rate": 4.986099211232793e-05, - "loss": 1.7734, - "step": 493500 - }, - { - "epoch": 7.11, - "learning_rate": 4.9809273077542204e-05, - "loss": 1.7775, - "step": 494000 - }, - { - "epoch": 7.12, - "learning_rate": 4.975734633578947e-05, - "loss": 1.7776, - "step": 494500 - }, - { - "epoch": 7.13, - "learning_rate": 4.970562730100375e-05, - "loss": 1.7721, - "step": 495000 - }, - { - "epoch": 7.13, - "learning_rate": 4.965370055925101e-05, - "loss": 1.7699, - "step": 495500 - }, - { - "epoch": 7.14, - "learning_rate": 4.960198152446528e-05, - "loss": 1.7733, - "step": 496000 - }, - { - "epoch": 7.15, - "learning_rate": 4.955005478271255e-05, - "loss": 1.7729, - "step": 496500 - }, - { - "epoch": 7.15, - "learning_rate": 4.949833574792683e-05, - "loss": 1.7708, - "step": 497000 - }, - { - "epoch": 7.16, - "learning_rate": 4.944640900617409e-05, - "loss": 1.7723, - "step": 497500 - }, - { - "epoch": 7.17, - "learning_rate": 4.939468997138837e-05, - "loss": 1.7712, - "step": 498000 - }, - { - "epoch": 7.18, - "learning_rate": 4.934276322963563e-05, - "loss": 1.7676, - "step": 498500 - }, - { - "epoch": 7.18, - "learning_rate": 4.929104419484991e-05, - "loss": 1.7722, - "step": 499000 - }, - { - "epoch": 7.19, - "learning_rate": 4.923911745309717e-05, - "loss": 1.7726, - "step": 499500 - }, - { - "epoch": 7.2, - "learning_rate": 4.9187398418311447e-05, - "loss": 1.7728, - "step": 500000 - }, - { - "epoch": 7.2, - "learning_rate": 4.9135471676558715e-05, - "loss": 1.7717, - "step": 500500 - }, - { - "epoch": 7.21, - "learning_rate": 4.908354493480598e-05, - "loss": 1.7681, - "step": 501000 - }, - { - "epoch": 7.22, - "learning_rate": 4.9031825900020254e-05, - "loss": 1.7714, - "step": 501500 - }, - { - "epoch": 7.23, - "learning_rate": 4.8979899158267516e-05, - "loss": 1.7739, - "step": 502000 - }, - { - "epoch": 7.23, - "learning_rate": 4.8928180123481793e-05, - "loss": 1.7722, - "step": 502500 - }, - { - "epoch": 7.24, - "learning_rate": 4.887625338172906e-05, - "loss": 1.774, - "step": 503000 - }, - { - "epoch": 7.25, - "learning_rate": 4.882453434694333e-05, - "loss": 1.773, - "step": 503500 - }, - { - "epoch": 7.25, - "learning_rate": 4.87726076051906e-05, - "loss": 1.7714, - "step": 504000 - }, - { - "epoch": 7.26, - "learning_rate": 4.872088857040488e-05, - "loss": 1.7707, - "step": 504500 - }, - { - "epoch": 7.27, - "learning_rate": 4.866896182865214e-05, - "loss": 1.771, - "step": 505000 - }, - { - "epoch": 7.28, - "learning_rate": 4.861724279386641e-05, - "loss": 1.7751, - "step": 505500 - }, - { - "epoch": 7.28, - "learning_rate": 4.856531605211368e-05, - "loss": 1.7725, - "step": 506000 - }, - { - "epoch": 7.29, - "learning_rate": 4.851359701732796e-05, - "loss": 1.769, - "step": 506500 - }, - { - "epoch": 7.3, - "learning_rate": 4.846167027557522e-05, - "loss": 1.7724, - "step": 507000 - }, - { - "epoch": 7.3, - "learning_rate": 4.8409951240789497e-05, - "loss": 1.7742, - "step": 507500 - }, - { - "epoch": 7.31, - "learning_rate": 4.8358024499036765e-05, - "loss": 1.7667, - "step": 508000 - }, - { - "epoch": 7.32, - "learning_rate": 4.830630546425104e-05, - "loss": 1.7694, - "step": 508500 - }, - { - "epoch": 7.33, - "learning_rate": 4.82543787224983e-05, - "loss": 1.7746, - "step": 509000 - }, - { - "epoch": 7.33, - "learning_rate": 4.8202659687712575e-05, - "loss": 1.7697, - "step": 509500 - }, - { - "epoch": 7.34, - "learning_rate": 4.815073294595984e-05, - "loss": 1.7704, - "step": 510000 - }, - { - "epoch": 7.35, - "learning_rate": 4.809901391117412e-05, - "loss": 1.7721, - "step": 510500 - }, - { - "epoch": 7.36, - "learning_rate": 4.804708716942138e-05, - "loss": 1.7674, - "step": 511000 - }, - { - "epoch": 7.36, - "learning_rate": 4.799536813463566e-05, - "loss": 1.7727, - "step": 511500 - }, - { - "epoch": 7.37, - "learning_rate": 4.794344139288292e-05, - "loss": 1.7702, - "step": 512000 - }, - { - "epoch": 7.38, - "learning_rate": 4.78917223580972e-05, - "loss": 1.768, - "step": 512500 - }, - { - "epoch": 7.38, - "learning_rate": 4.783979561634446e-05, - "loss": 1.7709, - "step": 513000 - }, - { - "epoch": 7.39, - "learning_rate": 4.778807658155874e-05, - "loss": 1.7669, - "step": 513500 - }, - { - "epoch": 7.4, - "learning_rate": 4.773614983980601e-05, - "loss": 1.7699, - "step": 514000 - }, - { - "epoch": 7.41, - "learning_rate": 4.768443080502028e-05, - "loss": 1.7722, - "step": 514500 - }, - { - "epoch": 7.41, - "learning_rate": 4.7632504063267546e-05, - "loss": 1.7668, - "step": 515000 - }, - { - "epoch": 7.42, - "learning_rate": 4.7580785028481824e-05, - "loss": 1.7692, - "step": 515500 - }, - { - "epoch": 7.43, - "learning_rate": 4.7528858286729086e-05, - "loss": 1.7711, - "step": 516000 - }, - { - "epoch": 7.43, - "learning_rate": 4.747713925194336e-05, - "loss": 1.7698, - "step": 516500 - }, - { - "epoch": 7.44, - "learning_rate": 4.7425212510190625e-05, - "loss": 1.7683, - "step": 517000 - }, - { - "epoch": 7.45, - "learning_rate": 4.73734934754049e-05, - "loss": 1.7646, - "step": 517500 - }, - { - "epoch": 7.46, - "learning_rate": 4.7321566733652164e-05, - "loss": 1.7708, - "step": 518000 - }, - { - "epoch": 7.46, - "learning_rate": 4.726984769886644e-05, - "loss": 1.769, - "step": 518500 - }, - { - "epoch": 7.47, - "learning_rate": 4.7217920957113704e-05, - "loss": 1.7661, - "step": 519000 - }, - { - "epoch": 7.48, - "learning_rate": 4.716620192232798e-05, - "loss": 1.7644, - "step": 519500 - }, - { - "epoch": 7.48, - "learning_rate": 4.711427518057524e-05, - "loss": 1.7686, - "step": 520000 - }, - { - "epoch": 7.49, - "learning_rate": 4.706255614578952e-05, - "loss": 1.7745, - "step": 520500 - }, - { - "epoch": 7.5, - "learning_rate": 4.701062940403679e-05, - "loss": 1.7687, - "step": 521000 - }, - { - "epoch": 7.51, - "learning_rate": 4.6958910369251067e-05, - "loss": 1.7672, - "step": 521500 - }, - { - "epoch": 7.51, - "learning_rate": 4.690698362749833e-05, - "loss": 1.7682, - "step": 522000 - }, - { - "epoch": 7.52, - "learning_rate": 4.6855264592712606e-05, - "loss": 1.7658, - "step": 522500 - }, - { - "epoch": 7.53, - "learning_rate": 4.680333785095987e-05, - "loss": 1.7689, - "step": 523000 - }, - { - "epoch": 7.54, - "learning_rate": 4.6751618816174145e-05, - "loss": 1.7651, - "step": 523500 - }, - { - "epoch": 7.54, - "learning_rate": 4.669969207442141e-05, - "loss": 1.7693, - "step": 524000 - }, - { - "epoch": 7.55, - "learning_rate": 4.6647973039635684e-05, - "loss": 1.7638, - "step": 524500 - }, - { - "epoch": 7.56, - "learning_rate": 4.659604629788295e-05, - "loss": 1.7658, - "step": 525000 - }, - { - "epoch": 7.56, - "learning_rate": 4.6544327263097224e-05, - "loss": 1.7689, - "step": 525500 - }, - { - "epoch": 7.57, - "learning_rate": 4.6492400521344485e-05, - "loss": 1.7721, - "step": 526000 - }, - { - "epoch": 7.58, - "learning_rate": 4.644068148655876e-05, - "loss": 1.768, - "step": 526500 - }, - { - "epoch": 7.59, - "learning_rate": 4.638875474480603e-05, - "loss": 1.7643, - "step": 527000 - }, - { - "epoch": 7.59, - "learning_rate": 4.63370357100203e-05, - "loss": 1.764, - "step": 527500 - }, - { - "epoch": 7.6, - "learning_rate": 4.628510896826757e-05, - "loss": 1.7626, - "step": 528000 - }, - { - "epoch": 7.61, - "learning_rate": 4.623338993348185e-05, - "loss": 1.7668, - "step": 528500 - }, - { - "epoch": 7.61, - "learning_rate": 4.618146319172911e-05, - "loss": 1.7643, - "step": 529000 - }, - { - "epoch": 7.62, - "learning_rate": 4.612974415694339e-05, - "loss": 1.7623, - "step": 529500 - }, - { - "epoch": 7.63, - "learning_rate": 4.607781741519065e-05, - "loss": 1.7665, - "step": 530000 - }, - { - "epoch": 7.64, - "learning_rate": 4.602609838040493e-05, - "loss": 1.7632, - "step": 530500 - }, - { - "epoch": 7.64, - "learning_rate": 4.597417163865219e-05, - "loss": 1.7667, - "step": 531000 - }, - { - "epoch": 7.65, - "learning_rate": 4.5922452603866466e-05, - "loss": 1.764, - "step": 531500 - }, - { - "epoch": 7.66, - "learning_rate": 4.5870525862113734e-05, - "loss": 1.7625, - "step": 532000 - }, - { - "epoch": 7.66, - "learning_rate": 4.581880682732801e-05, - "loss": 1.7651, - "step": 532500 - }, - { - "epoch": 7.67, - "learning_rate": 4.576688008557527e-05, - "loss": 1.7636, - "step": 533000 - }, - { - "epoch": 7.68, - "learning_rate": 4.5715161050789545e-05, - "loss": 1.7662, - "step": 533500 - }, - { - "epoch": 7.69, - "learning_rate": 4.566323430903681e-05, - "loss": 1.7648, - "step": 534000 - }, - { - "epoch": 7.69, - "learning_rate": 4.561151527425109e-05, - "loss": 1.7657, - "step": 534500 - }, - { - "epoch": 7.7, - "learning_rate": 4.555958853249835e-05, - "loss": 1.7677, - "step": 535000 - }, - { - "epoch": 7.71, - "learning_rate": 4.550786949771263e-05, - "loss": 1.7634, - "step": 535500 - }, - { - "epoch": 7.72, - "learning_rate": 4.54559427559599e-05, - "loss": 1.7663, - "step": 536000 - }, - { - "epoch": 7.72, - "learning_rate": 4.540422372117417e-05, - "loss": 1.7633, - "step": 536500 - }, - { - "epoch": 7.73, - "learning_rate": 4.535229697942143e-05, - "loss": 1.7646, - "step": 537000 - }, - { - "epoch": 7.74, - "learning_rate": 4.530057794463571e-05, - "loss": 1.7652, - "step": 537500 - }, - { - "epoch": 7.74, - "learning_rate": 4.524865120288298e-05, - "loss": 1.7615, - "step": 538000 - }, - { - "epoch": 7.75, - "learning_rate": 4.519693216809725e-05, - "loss": 1.7644, - "step": 538500 - }, - { - "epoch": 7.76, - "learning_rate": 4.5145005426344516e-05, - "loss": 1.763, - "step": 539000 - }, - { - "epoch": 7.77, - "learning_rate": 4.5093286391558794e-05, - "loss": 1.7656, - "step": 539500 - }, - { - "epoch": 7.77, - "learning_rate": 4.5041359649806055e-05, - "loss": 1.7649, - "step": 540000 - }, - { - "epoch": 7.78, - "learning_rate": 4.498964061502033e-05, - "loss": 1.7648, - "step": 540500 - }, - { - "epoch": 7.79, - "learning_rate": 4.4937713873267595e-05, - "loss": 1.763, - "step": 541000 - }, - { - "epoch": 7.79, - "learning_rate": 4.488599483848187e-05, - "loss": 1.7672, - "step": 541500 - }, - { - "epoch": 7.8, - "learning_rate": 4.4834068096729134e-05, - "loss": 1.7667, - "step": 542000 - }, - { - "epoch": 7.81, - "learning_rate": 4.478234906194341e-05, - "loss": 1.7627, - "step": 542500 - }, - { - "epoch": 7.82, - "learning_rate": 4.473042232019068e-05, - "loss": 1.7636, - "step": 543000 - }, - { - "epoch": 7.82, - "learning_rate": 4.467870328540496e-05, - "loss": 1.7627, - "step": 543500 - }, - { - "epoch": 7.83, - "learning_rate": 4.462677654365221e-05, - "loss": 1.7661, - "step": 544000 - }, - { - "epoch": 7.84, - "learning_rate": 4.457505750886649e-05, - "loss": 1.7573, - "step": 544500 - }, - { - "epoch": 7.84, - "learning_rate": 4.452313076711376e-05, - "loss": 1.7623, - "step": 545000 - }, - { - "epoch": 7.85, - "learning_rate": 4.4471411732328036e-05, - "loss": 1.7647, - "step": 545500 - }, - { - "epoch": 7.86, - "learning_rate": 4.44194849905753e-05, - "loss": 1.7603, - "step": 546000 - }, - { - "epoch": 7.87, - "learning_rate": 4.4367765955789575e-05, - "loss": 1.7626, - "step": 546500 - }, - { - "epoch": 7.87, - "learning_rate": 4.4315839214036844e-05, - "loss": 1.7634, - "step": 547000 - }, - { - "epoch": 7.88, - "learning_rate": 4.4264120179251115e-05, - "loss": 1.7607, - "step": 547500 - }, - { - "epoch": 7.89, - "learning_rate": 4.4212193437498376e-05, - "loss": 1.7629, - "step": 548000 - }, - { - "epoch": 7.9, - "learning_rate": 4.4160474402712654e-05, - "loss": 1.7633, - "step": 548500 - }, - { - "epoch": 7.9, - "learning_rate": 4.410854766095992e-05, - "loss": 1.7592, - "step": 549000 - }, - { - "epoch": 7.91, - "learning_rate": 4.405682862617419e-05, - "loss": 1.7629, - "step": 549500 - }, - { - "epoch": 7.92, - "learning_rate": 4.400490188442146e-05, - "loss": 1.7628, - "step": 550000 - }, - { - "epoch": 7.92, - "learning_rate": 4.395318284963574e-05, - "loss": 1.759, - "step": 550500 - }, - { - "epoch": 7.93, - "learning_rate": 4.3901256107883e-05, - "loss": 1.7644, - "step": 551000 - }, - { - "epoch": 7.94, - "learning_rate": 4.384953707309727e-05, - "loss": 1.7658, - "step": 551500 - }, - { - "epoch": 7.95, - "learning_rate": 4.379761033134454e-05, - "loss": 1.7626, - "step": 552000 - }, - { - "epoch": 7.95, - "learning_rate": 4.374589129655882e-05, - "loss": 1.761, - "step": 552500 - }, - { - "epoch": 7.96, - "learning_rate": 4.3693964554806086e-05, - "loss": 1.762, - "step": 553000 - }, - { - "epoch": 7.97, - "learning_rate": 4.364224552002036e-05, - "loss": 1.761, - "step": 553500 - }, - { - "epoch": 7.97, - "learning_rate": 4.3590318778267625e-05, - "loss": 1.7568, - "step": 554000 - }, - { - "epoch": 7.98, - "learning_rate": 4.35385997434819e-05, - "loss": 1.7623, - "step": 554500 - }, - { - "epoch": 7.99, - "learning_rate": 4.3486673001729165e-05, - "loss": 1.7599, - "step": 555000 - }, - { - "epoch": 8.0, - "learning_rate": 4.3434953966943436e-05, - "loss": 1.7623, - "step": 555500 - }, - { - "epoch": 8.0, - "eval_accuracy": 0.6579531596492939, - "eval_loss": 1.6328125, - "eval_runtime": 654.7599, - "eval_samples_per_second": 823.106, - "eval_steps_per_second": 34.297, - "step": 555784 - }, - { - "epoch": 8.0, - "learning_rate": 4.3383027225190704e-05, - "loss": 1.7571, - "step": 556000 - }, - { - "epoch": 8.01, - "learning_rate": 4.333130819040498e-05, - "loss": 1.7592, - "step": 556500 - }, - { - "epoch": 8.02, - "learning_rate": 4.327938144865224e-05, - "loss": 1.758, - "step": 557000 - }, - { - "epoch": 8.02, - "learning_rate": 4.322766241386652e-05, - "loss": 1.7543, - "step": 557500 - }, - { - "epoch": 8.03, - "learning_rate": 4.317573567211378e-05, - "loss": 1.7562, - "step": 558000 - }, - { - "epoch": 8.04, - "learning_rate": 4.312401663732806e-05, - "loss": 1.756, - "step": 558500 - }, - { - "epoch": 8.05, - "learning_rate": 4.307208989557532e-05, - "loss": 1.7556, - "step": 559000 - }, - { - "epoch": 8.05, - "learning_rate": 4.30203708607896e-05, - "loss": 1.7592, - "step": 559500 - }, - { - "epoch": 8.06, - "learning_rate": 4.296844411903687e-05, - "loss": 1.7575, - "step": 560000 - }, - { - "epoch": 8.07, - "learning_rate": 4.291672508425114e-05, - "loss": 1.7559, - "step": 560500 - }, - { - "epoch": 8.08, - "learning_rate": 4.286479834249841e-05, - "loss": 1.7574, - "step": 561000 - }, - { - "epoch": 8.08, - "learning_rate": 4.2813079307712685e-05, - "loss": 1.7577, - "step": 561500 - }, - { - "epoch": 8.09, - "learning_rate": 4.2761152565959946e-05, - "loss": 1.7553, - "step": 562000 - }, - { - "epoch": 8.1, - "learning_rate": 4.270943353117422e-05, - "loss": 1.758, - "step": 562500 - }, - { - "epoch": 8.1, - "learning_rate": 4.2657506789421486e-05, - "loss": 1.7571, - "step": 563000 - }, - { - "epoch": 8.11, - "learning_rate": 4.260578775463576e-05, - "loss": 1.7599, - "step": 563500 - }, - { - "epoch": 8.12, - "learning_rate": 4.255386101288303e-05, - "loss": 1.7541, - "step": 564000 - }, - { - "epoch": 8.13, - "learning_rate": 4.25021419780973e-05, - "loss": 1.7598, - "step": 564500 - }, - { - "epoch": 8.13, - "learning_rate": 4.2450215236344564e-05, - "loss": 1.7611, - "step": 565000 - }, - { - "epoch": 8.14, - "learning_rate": 4.239849620155884e-05, - "loss": 1.7534, - "step": 565500 - }, - { - "epoch": 8.15, - "learning_rate": 4.234656945980611e-05, - "loss": 1.7584, - "step": 566000 - }, - { - "epoch": 8.15, - "learning_rate": 4.229485042502038e-05, - "loss": 1.7602, - "step": 566500 - }, - { - "epoch": 8.16, - "learning_rate": 4.224292368326765e-05, - "loss": 1.7593, - "step": 567000 - }, - { - "epoch": 8.17, - "learning_rate": 4.219120464848193e-05, - "loss": 1.7546, - "step": 567500 - }, - { - "epoch": 8.18, - "learning_rate": 4.213927790672919e-05, - "loss": 1.7557, - "step": 568000 - }, - { - "epoch": 8.18, - "learning_rate": 4.2087558871943466e-05, - "loss": 1.7597, - "step": 568500 - }, - { - "epoch": 8.19, - "learning_rate": 4.203563213019073e-05, - "loss": 1.7578, - "step": 569000 - }, - { - "epoch": 8.2, - "learning_rate": 4.1983913095405006e-05, - "loss": 1.7572, - "step": 569500 - }, - { - "epoch": 8.2, - "learning_rate": 4.193198635365227e-05, - "loss": 1.7593, - "step": 570000 - }, - { - "epoch": 8.21, - "learning_rate": 4.1880267318866545e-05, - "loss": 1.7553, - "step": 570500 - }, - { - "epoch": 8.22, - "learning_rate": 4.182834057711381e-05, - "loss": 1.76, - "step": 571000 - }, - { - "epoch": 8.23, - "learning_rate": 4.1776621542328084e-05, - "loss": 1.7588, - "step": 571500 - }, - { - "epoch": 8.23, - "learning_rate": 4.1724694800575346e-05, - "loss": 1.7568, - "step": 572000 - }, - { - "epoch": 8.24, - "learning_rate": 4.1672975765789623e-05, - "loss": 1.7573, - "step": 572500 - }, - { - "epoch": 8.25, - "learning_rate": 4.162104902403689e-05, - "loss": 1.7541, - "step": 573000 - }, - { - "epoch": 8.26, - "learning_rate": 4.156932998925116e-05, - "loss": 1.7586, - "step": 573500 - }, - { - "epoch": 8.26, - "learning_rate": 4.151740324749843e-05, - "loss": 1.7587, - "step": 574000 - }, - { - "epoch": 8.27, - "learning_rate": 4.146568421271271e-05, - "loss": 1.7537, - "step": 574500 - }, - { - "epoch": 8.28, - "learning_rate": 4.141375747095998e-05, - "loss": 1.7526, - "step": 575000 - }, - { - "epoch": 8.28, - "learning_rate": 4.136203843617425e-05, - "loss": 1.7541, - "step": 575500 - }, - { - "epoch": 8.29, - "learning_rate": 4.131011169442151e-05, - "loss": 1.7572, - "step": 576000 - }, - { - "epoch": 8.3, - "learning_rate": 4.125839265963579e-05, - "loss": 1.7557, - "step": 576500 - }, - { - "epoch": 8.31, - "learning_rate": 4.1206465917883056e-05, - "loss": 1.7518, - "step": 577000 - }, - { - "epoch": 8.31, - "learning_rate": 4.1154746883097327e-05, - "loss": 1.7542, - "step": 577500 - }, - { - "epoch": 8.32, - "learning_rate": 4.1102820141344595e-05, - "loss": 1.755, - "step": 578000 - }, - { - "epoch": 8.33, - "learning_rate": 4.105110110655887e-05, - "loss": 1.7548, - "step": 578500 - }, - { - "epoch": 8.33, - "learning_rate": 4.0999174364806134e-05, - "loss": 1.7575, - "step": 579000 - }, - { - "epoch": 8.34, - "learning_rate": 4.0947455330020405e-05, - "loss": 1.755, - "step": 579500 - }, - { - "epoch": 8.35, - "learning_rate": 4.0895528588267673e-05, - "loss": 1.7511, - "step": 580000 - }, - { - "epoch": 8.36, - "learning_rate": 4.084380955348195e-05, - "loss": 1.7565, - "step": 580500 - }, - { - "epoch": 8.36, - "learning_rate": 4.079188281172921e-05, - "loss": 1.7583, - "step": 581000 - }, - { - "epoch": 8.37, - "learning_rate": 4.074016377694349e-05, - "loss": 1.7517, - "step": 581500 - }, - { - "epoch": 8.38, - "learning_rate": 4.068823703519076e-05, - "loss": 1.7596, - "step": 582000 - }, - { - "epoch": 8.38, - "learning_rate": 4.063651800040503e-05, - "loss": 1.7544, - "step": 582500 - }, - { - "epoch": 8.39, - "learning_rate": 4.058459125865229e-05, - "loss": 1.7576, - "step": 583000 - }, - { - "epoch": 8.4, - "learning_rate": 4.053287222386657e-05, - "loss": 1.7514, - "step": 583500 - }, - { - "epoch": 8.41, - "learning_rate": 4.048094548211384e-05, - "loss": 1.7555, - "step": 584000 - }, - { - "epoch": 8.41, - "learning_rate": 4.042922644732811e-05, - "loss": 1.7529, - "step": 584500 - }, - { - "epoch": 8.42, - "learning_rate": 4.0377299705575377e-05, - "loss": 1.7569, - "step": 585000 - }, - { - "epoch": 8.43, - "learning_rate": 4.0325580670789654e-05, - "loss": 1.75, - "step": 585500 - }, - { - "epoch": 8.43, - "learning_rate": 4.0273653929036916e-05, - "loss": 1.7545, - "step": 586000 - }, - { - "epoch": 8.44, - "learning_rate": 4.022193489425119e-05, - "loss": 1.755, - "step": 586500 - }, - { - "epoch": 8.45, - "learning_rate": 4.0170008152498455e-05, - "loss": 1.7537, - "step": 587000 - }, - { - "epoch": 8.46, - "learning_rate": 4.011828911771273e-05, - "loss": 1.7558, - "step": 587500 - }, - { - "epoch": 8.46, - "learning_rate": 4.006636237596e-05, - "loss": 1.7521, - "step": 588000 - }, - { - "epoch": 8.47, - "learning_rate": 4.001464334117427e-05, - "loss": 1.7522, - "step": 588500 - }, - { - "epoch": 8.48, - "learning_rate": 3.996271659942154e-05, - "loss": 1.7573, - "step": 589000 - }, - { - "epoch": 8.49, - "learning_rate": 3.991099756463582e-05, - "loss": 1.7578, - "step": 589500 - }, - { - "epoch": 8.49, - "learning_rate": 3.985907082288308e-05, - "loss": 1.7547, - "step": 590000 - }, - { - "epoch": 8.5, - "learning_rate": 3.980735178809735e-05, - "loss": 1.7534, - "step": 590500 - }, - { - "epoch": 8.51, - "learning_rate": 3.975542504634462e-05, - "loss": 1.7522, - "step": 591000 - }, - { - "epoch": 8.51, - "learning_rate": 3.97037060115589e-05, - "loss": 1.7569, - "step": 591500 - }, - { - "epoch": 8.52, - "learning_rate": 3.965177926980616e-05, - "loss": 1.755, - "step": 592000 - }, - { - "epoch": 8.53, - "learning_rate": 3.9600060235020436e-05, - "loss": 1.7506, - "step": 592500 - }, - { - "epoch": 8.54, - "learning_rate": 3.95481334932677e-05, - "loss": 1.7538, - "step": 593000 - }, - { - "epoch": 8.54, - "learning_rate": 3.9496414458481975e-05, - "loss": 1.7516, - "step": 593500 - }, - { - "epoch": 8.55, - "learning_rate": 3.944448771672924e-05, - "loss": 1.753, - "step": 594000 - }, - { - "epoch": 8.56, - "learning_rate": 3.9392768681943514e-05, - "loss": 1.7505, - "step": 594500 - }, - { - "epoch": 8.56, - "learning_rate": 3.934084194019078e-05, - "loss": 1.751, - "step": 595000 - }, - { - "epoch": 8.57, - "learning_rate": 3.9289122905405054e-05, - "loss": 1.7484, - "step": 595500 - }, - { - "epoch": 8.58, - "learning_rate": 3.923719616365232e-05, - "loss": 1.7533, - "step": 596000 - }, - { - "epoch": 8.59, - "learning_rate": 3.91854771288666e-05, - "loss": 1.7547, - "step": 596500 - }, - { - "epoch": 8.59, - "learning_rate": 3.913355038711386e-05, - "loss": 1.7507, - "step": 597000 - }, - { - "epoch": 8.6, - "learning_rate": 3.908183135232813e-05, - "loss": 1.751, - "step": 597500 - }, - { - "epoch": 8.61, - "learning_rate": 3.90299046105754e-05, - "loss": 1.7521, - "step": 598000 - }, - { - "epoch": 8.61, - "learning_rate": 3.897818557578968e-05, - "loss": 1.7497, - "step": 598500 - }, - { - "epoch": 8.62, - "learning_rate": 3.8926258834036947e-05, - "loss": 1.7492, - "step": 599000 - }, - { - "epoch": 8.63, - "learning_rate": 3.887453979925122e-05, - "loss": 1.7515, - "step": 599500 - }, - { - "epoch": 8.64, - "learning_rate": 3.882261305749848e-05, - "loss": 1.7518, - "step": 600000 - }, - { - "epoch": 8.64, - "learning_rate": 3.877089402271276e-05, - "loss": 1.7518, - "step": 600500 - }, - { - "epoch": 8.65, - "learning_rate": 3.8718967280960025e-05, - "loss": 1.7552, - "step": 601000 - }, - { - "epoch": 8.66, - "learning_rate": 3.8667248246174296e-05, - "loss": 1.7506, - "step": 601500 - }, - { - "epoch": 8.67, - "learning_rate": 3.8615321504421564e-05, - "loss": 1.752, - "step": 602000 - }, - { - "epoch": 8.67, - "learning_rate": 3.856360246963584e-05, - "loss": 1.7484, - "step": 602500 - }, - { - "epoch": 8.68, - "learning_rate": 3.8511675727883104e-05, - "loss": 1.7495, - "step": 603000 - }, - { - "epoch": 8.69, - "learning_rate": 3.845995669309738e-05, - "loss": 1.7487, - "step": 603500 - }, - { - "epoch": 8.69, - "learning_rate": 3.840802995134464e-05, - "loss": 1.7505, - "step": 604000 - }, - { - "epoch": 8.7, - "learning_rate": 3.835631091655892e-05, - "loss": 1.7505, - "step": 604500 - }, - { - "epoch": 8.71, - "learning_rate": 3.830438417480618e-05, - "loss": 1.7483, - "step": 605000 - }, - { - "epoch": 8.72, - "learning_rate": 3.825266514002046e-05, - "loss": 1.7491, - "step": 605500 - }, - { - "epoch": 8.72, - "learning_rate": 3.820073839826773e-05, - "loss": 1.7482, - "step": 606000 - }, - { - "epoch": 8.73, - "learning_rate": 3.8149019363482e-05, - "loss": 1.7522, - "step": 606500 - }, - { - "epoch": 8.74, - "learning_rate": 3.809709262172926e-05, - "loss": 1.7476, - "step": 607000 - }, - { - "epoch": 8.74, - "learning_rate": 3.8045373586943545e-05, - "loss": 1.7543, - "step": 607500 - }, - { - "epoch": 8.75, - "learning_rate": 3.799344684519081e-05, - "loss": 1.7492, - "step": 608000 - }, - { - "epoch": 8.76, - "learning_rate": 3.794172781040508e-05, - "loss": 1.7532, - "step": 608500 - }, - { - "epoch": 8.77, - "learning_rate": 3.7889801068652346e-05, - "loss": 1.7484, - "step": 609000 - }, - { - "epoch": 8.77, - "learning_rate": 3.7838082033866624e-05, - "loss": 1.7506, - "step": 609500 - }, - { - "epoch": 8.78, - "learning_rate": 3.778615529211389e-05, - "loss": 1.7525, - "step": 610000 - }, - { - "epoch": 8.79, - "learning_rate": 3.773443625732816e-05, - "loss": 1.7502, - "step": 610500 - }, - { - "epoch": 8.79, - "learning_rate": 3.7682509515575425e-05, - "loss": 1.7464, - "step": 611000 - }, - { - "epoch": 8.8, - "learning_rate": 3.76307904807897e-05, - "loss": 1.7522, - "step": 611500 - }, - { - "epoch": 8.81, - "learning_rate": 3.757886373903697e-05, - "loss": 1.7506, - "step": 612000 - }, - { - "epoch": 8.82, - "learning_rate": 3.752714470425124e-05, - "loss": 1.7496, - "step": 612500 - }, - { - "epoch": 8.82, - "learning_rate": 3.747521796249851e-05, - "loss": 1.7477, - "step": 613000 - }, - { - "epoch": 8.83, - "learning_rate": 3.742349892771279e-05, - "loss": 1.75, - "step": 613500 - }, - { - "epoch": 8.84, - "learning_rate": 3.737157218596005e-05, - "loss": 1.7512, - "step": 614000 - }, - { - "epoch": 8.85, - "learning_rate": 3.731985315117433e-05, - "loss": 1.7449, - "step": 614500 - }, - { - "epoch": 8.85, - "learning_rate": 3.726792640942159e-05, - "loss": 1.7519, - "step": 615000 - }, - { - "epoch": 8.86, - "learning_rate": 3.7216207374635866e-05, - "loss": 1.7481, - "step": 615500 - }, - { - "epoch": 8.87, - "learning_rate": 3.716428063288313e-05, - "loss": 1.7484, - "step": 616000 - }, - { - "epoch": 8.87, - "learning_rate": 3.7112561598097405e-05, - "loss": 1.7471, - "step": 616500 - }, - { - "epoch": 8.88, - "learning_rate": 3.7060634856344674e-05, - "loss": 1.7532, - "step": 617000 - }, - { - "epoch": 8.89, - "learning_rate": 3.700891582155895e-05, - "loss": 1.7484, - "step": 617500 - }, - { - "epoch": 8.9, - "learning_rate": 3.6956989079806206e-05, - "loss": 1.743, - "step": 618000 - }, - { - "epoch": 8.9, - "learning_rate": 3.6905270045020484e-05, - "loss": 1.751, - "step": 618500 - }, - { - "epoch": 8.91, - "learning_rate": 3.685334330326775e-05, - "loss": 1.7484, - "step": 619000 - }, - { - "epoch": 8.92, - "learning_rate": 3.680162426848203e-05, - "loss": 1.7469, - "step": 619500 - }, - { - "epoch": 8.92, - "learning_rate": 3.674969752672929e-05, - "loss": 1.7493, - "step": 620000 - }, - { - "epoch": 8.93, - "learning_rate": 3.669797849194357e-05, - "loss": 1.7495, - "step": 620500 - }, - { - "epoch": 8.94, - "learning_rate": 3.664605175019084e-05, - "loss": 1.7462, - "step": 621000 - }, - { - "epoch": 8.95, - "learning_rate": 3.659433271540511e-05, - "loss": 1.7477, - "step": 621500 - }, - { - "epoch": 8.95, - "learning_rate": 3.654240597365237e-05, - "loss": 1.7481, - "step": 622000 - }, - { - "epoch": 8.96, - "learning_rate": 3.649068693886665e-05, - "loss": 1.7445, - "step": 622500 - }, - { - "epoch": 8.97, - "learning_rate": 3.6438760197113916e-05, - "loss": 1.7434, - "step": 623000 - }, - { - "epoch": 8.97, - "learning_rate": 3.638704116232819e-05, - "loss": 1.7434, - "step": 623500 - }, - { - "epoch": 8.98, - "learning_rate": 3.6335114420575455e-05, - "loss": 1.7473, - "step": 624000 - }, - { - "epoch": 8.99, - "learning_rate": 3.628339538578973e-05, - "loss": 1.7411, - "step": 624500 - }, - { - "epoch": 9.0, - "learning_rate": 3.6231468644036995e-05, - "loss": 1.7497, - "step": 625000 - }, - { - "epoch": 9.0, - "eval_accuracy": 0.6605183065873255, - "eval_loss": 1.6181640625, - "eval_runtime": 652.1948, - "eval_samples_per_second": 826.344, - "eval_steps_per_second": 34.431, - "step": 625257 - }, - { - "epoch": 9.0, - "learning_rate": 3.6179749609251266e-05, - "loss": 1.7505, - "step": 625500 - }, - { - "epoch": 9.01, - "learning_rate": 3.6127822867498534e-05, - "loss": 1.7437, - "step": 626000 - }, - { - "epoch": 9.02, - "learning_rate": 3.607610383271281e-05, - "loss": 1.7475, - "step": 626500 - }, - { - "epoch": 9.03, - "learning_rate": 3.602417709096007e-05, - "loss": 1.7475, - "step": 627000 - }, - { - "epoch": 9.03, - "learning_rate": 3.597245805617435e-05, - "loss": 1.7413, - "step": 627500 - }, - { - "epoch": 9.04, - "learning_rate": 3.592053131442162e-05, - "loss": 1.7411, - "step": 628000 - }, - { - "epoch": 9.05, - "learning_rate": 3.58688122796359e-05, - "loss": 1.7475, - "step": 628500 - }, - { - "epoch": 9.05, - "learning_rate": 3.581688553788315e-05, - "loss": 1.7414, - "step": 629000 - }, - { - "epoch": 9.06, - "learning_rate": 3.576516650309743e-05, - "loss": 1.7422, - "step": 629500 - }, - { - "epoch": 9.07, - "learning_rate": 3.57132397613447e-05, - "loss": 1.7463, - "step": 630000 - }, - { - "epoch": 9.08, - "learning_rate": 3.5661520726558976e-05, - "loss": 1.7413, - "step": 630500 - }, - { - "epoch": 9.08, - "learning_rate": 3.560959398480624e-05, - "loss": 1.7422, - "step": 631000 - }, - { - "epoch": 9.09, - "learning_rate": 3.5557874950020515e-05, - "loss": 1.7433, - "step": 631500 - }, - { - "epoch": 9.1, - "learning_rate": 3.5505948208267776e-05, - "loss": 1.7418, - "step": 632000 - }, - { - "epoch": 9.1, - "learning_rate": 3.5454229173482054e-05, - "loss": 1.741, - "step": 632500 - }, - { - "epoch": 9.11, - "learning_rate": 3.5402302431729316e-05, - "loss": 1.7441, - "step": 633000 - }, - { - "epoch": 9.12, - "learning_rate": 3.535058339694359e-05, - "loss": 1.7457, - "step": 633500 - }, - { - "epoch": 9.13, - "learning_rate": 3.529865665519086e-05, - "loss": 1.7462, - "step": 634000 - }, - { - "epoch": 9.13, - "learning_rate": 3.524693762040513e-05, - "loss": 1.742, - "step": 634500 - }, - { - "epoch": 9.14, - "learning_rate": 3.51950108786524e-05, - "loss": 1.7441, - "step": 635000 - }, - { - "epoch": 9.15, - "learning_rate": 3.514329184386668e-05, - "loss": 1.7426, - "step": 635500 - }, - { - "epoch": 9.15, - "learning_rate": 3.509136510211394e-05, - "loss": 1.7427, - "step": 636000 - }, - { - "epoch": 9.16, - "learning_rate": 3.503964606732821e-05, - "loss": 1.7409, - "step": 636500 - }, - { - "epoch": 9.17, - "learning_rate": 3.498771932557548e-05, - "loss": 1.7426, - "step": 637000 - }, - { - "epoch": 9.18, - "learning_rate": 3.493600029078976e-05, - "loss": 1.7458, - "step": 637500 - }, - { - "epoch": 9.18, - "learning_rate": 3.488407354903702e-05, - "loss": 1.743, - "step": 638000 - }, - { - "epoch": 9.19, - "learning_rate": 3.4832354514251296e-05, - "loss": 1.7433, - "step": 638500 - }, - { - "epoch": 9.2, - "learning_rate": 3.478042777249856e-05, - "loss": 1.7444, - "step": 639000 - }, - { - "epoch": 9.21, - "learning_rate": 3.4728708737712836e-05, - "loss": 1.7398, - "step": 639500 - }, - { - "epoch": 9.21, - "learning_rate": 3.46767819959601e-05, - "loss": 1.7425, - "step": 640000 - }, - { - "epoch": 9.22, - "learning_rate": 3.4625062961174375e-05, - "loss": 1.7447, - "step": 640500 - }, - { - "epoch": 9.23, - "learning_rate": 3.457313621942164e-05, - "loss": 1.7396, - "step": 641000 - }, - { - "epoch": 9.23, - "learning_rate": 3.452141718463592e-05, - "loss": 1.7431, - "step": 641500 - }, - { - "epoch": 9.24, - "learning_rate": 3.446949044288318e-05, - "loss": 1.7428, - "step": 642000 - }, - { - "epoch": 9.25, - "learning_rate": 3.441777140809746e-05, - "loss": 1.7444, - "step": 642500 - }, - { - "epoch": 9.26, - "learning_rate": 3.436584466634472e-05, - "loss": 1.7447, - "step": 643000 - }, - { - "epoch": 9.26, - "learning_rate": 3.4314125631559e-05, - "loss": 1.7434, - "step": 643500 - }, - { - "epoch": 9.27, - "learning_rate": 3.426219888980626e-05, - "loss": 1.7433, - "step": 644000 - }, - { - "epoch": 9.28, - "learning_rate": 3.421047985502054e-05, - "loss": 1.7378, - "step": 644500 - }, - { - "epoch": 9.28, - "learning_rate": 3.415855311326781e-05, - "loss": 1.7408, - "step": 645000 - }, - { - "epoch": 9.29, - "learning_rate": 3.410683407848208e-05, - "loss": 1.7458, - "step": 645500 - }, - { - "epoch": 9.3, - "learning_rate": 3.405490733672934e-05, - "loss": 1.742, - "step": 646000 - }, - { - "epoch": 9.31, - "learning_rate": 3.400318830194362e-05, - "loss": 1.7375, - "step": 646500 - }, - { - "epoch": 9.31, - "learning_rate": 3.3951261560190886e-05, - "loss": 1.7436, - "step": 647000 - }, - { - "epoch": 9.32, - "learning_rate": 3.389954252540516e-05, - "loss": 1.7416, - "step": 647500 - }, - { - "epoch": 9.33, - "learning_rate": 3.3847615783652425e-05, - "loss": 1.7445, - "step": 648000 - }, - { - "epoch": 9.33, - "learning_rate": 3.37958967488667e-05, - "loss": 1.7424, - "step": 648500 - }, - { - "epoch": 9.34, - "learning_rate": 3.3743970007113964e-05, - "loss": 1.744, - "step": 649000 - }, - { - "epoch": 9.35, - "learning_rate": 3.369225097232824e-05, - "loss": 1.7392, - "step": 649500 - }, - { - "epoch": 9.36, - "learning_rate": 3.3640324230575503e-05, - "loss": 1.7414, - "step": 650000 - }, - { - "epoch": 9.36, - "learning_rate": 3.358860519578978e-05, - "loss": 1.7409, - "step": 650500 - }, - { - "epoch": 9.37, - "learning_rate": 3.353667845403704e-05, - "loss": 1.7405, - "step": 651000 - }, - { - "epoch": 9.38, - "learning_rate": 3.348495941925132e-05, - "loss": 1.7461, - "step": 651500 - }, - { - "epoch": 9.38, - "learning_rate": 3.343303267749859e-05, - "loss": 1.7405, - "step": 652000 - }, - { - "epoch": 9.39, - "learning_rate": 3.3381313642712867e-05, - "loss": 1.7434, - "step": 652500 - }, - { - "epoch": 9.4, - "learning_rate": 3.332938690096012e-05, - "loss": 1.738, - "step": 653000 - }, - { - "epoch": 9.41, - "learning_rate": 3.32776678661744e-05, - "loss": 1.7393, - "step": 653500 - }, - { - "epoch": 9.41, - "learning_rate": 3.322574112442167e-05, - "loss": 1.7426, - "step": 654000 - }, - { - "epoch": 9.42, - "learning_rate": 3.3174022089635945e-05, - "loss": 1.7413, - "step": 654500 - }, - { - "epoch": 9.43, - "learning_rate": 3.3122095347883207e-05, - "loss": 1.7395, - "step": 655000 - }, - { - "epoch": 9.44, - "learning_rate": 3.3070376313097484e-05, - "loss": 1.7428, - "step": 655500 - }, - { - "epoch": 9.44, - "learning_rate": 3.301844957134475e-05, - "loss": 1.7407, - "step": 656000 - }, - { - "epoch": 9.45, - "learning_rate": 3.2966730536559024e-05, - "loss": 1.7388, - "step": 656500 - }, - { - "epoch": 9.46, - "learning_rate": 3.2914803794806285e-05, - "loss": 1.7394, - "step": 657000 - }, - { - "epoch": 9.46, - "learning_rate": 3.286308476002056e-05, - "loss": 1.7391, - "step": 657500 - }, - { - "epoch": 9.47, - "learning_rate": 3.281115801826783e-05, - "loss": 1.7433, - "step": 658000 - }, - { - "epoch": 9.48, - "learning_rate": 3.27594389834821e-05, - "loss": 1.7433, - "step": 658500 - }, - { - "epoch": 9.49, - "learning_rate": 3.270751224172937e-05, - "loss": 1.7395, - "step": 659000 - }, - { - "epoch": 9.49, - "learning_rate": 3.265579320694365e-05, - "loss": 1.746, - "step": 659500 - }, - { - "epoch": 9.5, - "learning_rate": 3.260386646519091e-05, - "loss": 1.7359, - "step": 660000 - }, - { - "epoch": 9.51, - "learning_rate": 3.255214743040518e-05, - "loss": 1.7443, - "step": 660500 - }, - { - "epoch": 9.51, - "learning_rate": 3.250022068865245e-05, - "loss": 1.7441, - "step": 661000 - }, - { - "epoch": 9.52, - "learning_rate": 3.244850165386673e-05, - "loss": 1.733, - "step": 661500 - }, - { - "epoch": 9.53, - "learning_rate": 3.2396574912113995e-05, - "loss": 1.7359, - "step": 662000 - }, - { - "epoch": 9.54, - "learning_rate": 3.2344855877328266e-05, - "loss": 1.7396, - "step": 662500 - }, - { - "epoch": 9.54, - "learning_rate": 3.2292929135575534e-05, - "loss": 1.7425, - "step": 663000 - }, - { - "epoch": 9.55, - "learning_rate": 3.224121010078981e-05, - "loss": 1.7378, - "step": 663500 - }, - { - "epoch": 9.56, - "learning_rate": 3.2189283359037074e-05, - "loss": 1.735, - "step": 664000 - }, - { - "epoch": 9.56, - "learning_rate": 3.2137564324251344e-05, - "loss": 1.7372, - "step": 664500 - }, - { - "epoch": 9.57, - "learning_rate": 3.208563758249861e-05, - "loss": 1.742, - "step": 665000 - }, - { - "epoch": 9.58, - "learning_rate": 3.203391854771289e-05, - "loss": 1.7396, - "step": 665500 - }, - { - "epoch": 9.59, - "learning_rate": 3.198199180596015e-05, - "loss": 1.7375, - "step": 666000 - }, - { - "epoch": 9.59, - "learning_rate": 3.193027277117443e-05, - "loss": 1.7418, - "step": 666500 - }, - { - "epoch": 9.6, - "learning_rate": 3.187834602942169e-05, - "loss": 1.7416, - "step": 667000 - }, - { - "epoch": 9.61, - "learning_rate": 3.182662699463597e-05, - "loss": 1.7412, - "step": 667500 - }, - { - "epoch": 9.62, - "learning_rate": 3.177470025288323e-05, - "loss": 1.7362, - "step": 668000 - }, - { - "epoch": 9.62, - "learning_rate": 3.172298121809751e-05, - "loss": 1.739, - "step": 668500 - }, - { - "epoch": 9.63, - "learning_rate": 3.167105447634478e-05, - "loss": 1.7383, - "step": 669000 - }, - { - "epoch": 9.64, - "learning_rate": 3.161933544155905e-05, - "loss": 1.7393, - "step": 669500 - }, - { - "epoch": 9.64, - "learning_rate": 3.1567408699806316e-05, - "loss": 1.7413, - "step": 670000 - }, - { - "epoch": 9.65, - "learning_rate": 3.1515689665020594e-05, - "loss": 1.74, - "step": 670500 - }, - { - "epoch": 9.66, - "learning_rate": 3.1463762923267855e-05, - "loss": 1.7382, - "step": 671000 - }, - { - "epoch": 9.67, - "learning_rate": 3.1412043888482126e-05, - "loss": 1.7352, - "step": 671500 - }, - { - "epoch": 9.67, - "learning_rate": 3.1360117146729394e-05, - "loss": 1.7344, - "step": 672000 - }, - { - "epoch": 9.68, - "learning_rate": 3.130839811194367e-05, - "loss": 1.7389, - "step": 672500 - }, - { - "epoch": 9.69, - "learning_rate": 3.125647137019094e-05, - "loss": 1.7362, - "step": 673000 - }, - { - "epoch": 9.69, - "learning_rate": 3.120475233540521e-05, - "loss": 1.7389, - "step": 673500 - }, - { - "epoch": 9.7, - "learning_rate": 3.115282559365248e-05, - "loss": 1.7387, - "step": 674000 - }, - { - "epoch": 9.71, - "learning_rate": 3.110110655886676e-05, - "loss": 1.7359, - "step": 674500 - }, - { - "epoch": 9.72, - "learning_rate": 3.104917981711402e-05, - "loss": 1.7372, - "step": 675000 - }, - { - "epoch": 9.72, - "learning_rate": 3.099746078232829e-05, - "loss": 1.737, - "step": 675500 - }, - { - "epoch": 9.73, - "learning_rate": 3.094553404057556e-05, - "loss": 1.7345, - "step": 676000 - }, - { - "epoch": 9.74, - "learning_rate": 3.0893815005789836e-05, - "loss": 1.7387, - "step": 676500 - }, - { - "epoch": 9.74, - "learning_rate": 3.08418882640371e-05, - "loss": 1.7383, - "step": 677000 - }, - { - "epoch": 9.75, - "learning_rate": 3.0790169229251375e-05, - "loss": 1.7341, - "step": 677500 - }, - { - "epoch": 9.76, - "learning_rate": 3.073824248749864e-05, - "loss": 1.7377, - "step": 678000 - }, - { - "epoch": 9.77, - "learning_rate": 3.0686523452712915e-05, - "loss": 1.7385, - "step": 678500 - }, - { - "epoch": 9.77, - "learning_rate": 3.0634596710960176e-05, - "loss": 1.7367, - "step": 679000 - }, - { - "epoch": 9.78, - "learning_rate": 3.0582877676174454e-05, - "loss": 1.737, - "step": 679500 - }, - { - "epoch": 9.79, - "learning_rate": 3.053095093442172e-05, - "loss": 1.733, - "step": 680000 - }, - { - "epoch": 9.8, - "learning_rate": 3.0479231899635996e-05, - "loss": 1.7389, - "step": 680500 - }, - { - "epoch": 9.8, - "learning_rate": 3.042730515788326e-05, - "loss": 1.7415, - "step": 681000 - }, - { - "epoch": 9.81, - "learning_rate": 3.037558612309754e-05, - "loss": 1.7378, - "step": 681500 - }, - { - "epoch": 9.82, - "learning_rate": 3.0323659381344797e-05, - "loss": 1.7375, - "step": 682000 - }, - { - "epoch": 9.82, - "learning_rate": 3.0271940346559075e-05, - "loss": 1.7362, - "step": 682500 - }, - { - "epoch": 9.83, - "learning_rate": 3.022001360480634e-05, - "loss": 1.7365, - "step": 683000 - }, - { - "epoch": 9.84, - "learning_rate": 3.0168294570020618e-05, - "loss": 1.7351, - "step": 683500 - }, - { - "epoch": 9.85, - "learning_rate": 3.0116367828267883e-05, - "loss": 1.7348, - "step": 684000 - }, - { - "epoch": 9.85, - "learning_rate": 3.0064648793482157e-05, - "loss": 1.7356, - "step": 684500 - }, - { - "epoch": 9.86, - "learning_rate": 3.001272205172942e-05, - "loss": 1.7365, - "step": 685000 - }, - { - "epoch": 9.87, - "learning_rate": 2.9961003016943696e-05, - "loss": 1.7343, - "step": 685500 - }, - { - "epoch": 9.87, - "learning_rate": 2.990907627519096e-05, - "loss": 1.7377, - "step": 686000 - }, - { - "epoch": 9.88, - "learning_rate": 2.9857357240405235e-05, - "loss": 1.7389, - "step": 686500 - }, - { - "epoch": 9.89, - "learning_rate": 2.9805430498652504e-05, - "loss": 1.7355, - "step": 687000 - }, - { - "epoch": 9.9, - "learning_rate": 2.9753711463866778e-05, - "loss": 1.7348, - "step": 687500 - }, - { - "epoch": 9.9, - "learning_rate": 2.9701784722114046e-05, - "loss": 1.7332, - "step": 688000 - }, - { - "epoch": 9.91, - "learning_rate": 2.965006568732832e-05, - "loss": 1.733, - "step": 688500 - }, - { - "epoch": 9.92, - "learning_rate": 2.9598138945575582e-05, - "loss": 1.7346, - "step": 689000 - }, - { - "epoch": 9.92, - "learning_rate": 2.9546419910789857e-05, - "loss": 1.7339, - "step": 689500 - }, - { - "epoch": 9.93, - "learning_rate": 2.9494493169037125e-05, - "loss": 1.7314, - "step": 690000 - }, - { - "epoch": 9.94, - "learning_rate": 2.94427741342514e-05, - "loss": 1.7375, - "step": 690500 - }, - { - "epoch": 9.95, - "learning_rate": 2.9390847392498668e-05, - "loss": 1.7345, - "step": 691000 - }, - { - "epoch": 9.95, - "learning_rate": 2.9339128357712942e-05, - "loss": 1.7369, - "step": 691500 - }, - { - "epoch": 9.96, - "learning_rate": 2.9287201615960204e-05, - "loss": 1.7318, - "step": 692000 - }, - { - "epoch": 9.97, - "learning_rate": 2.9235482581174478e-05, - "loss": 1.7302, - "step": 692500 - }, - { - "epoch": 9.98, - "learning_rate": 2.9183555839421746e-05, - "loss": 1.7346, - "step": 693000 - }, - { - "epoch": 9.98, - "learning_rate": 2.913183680463602e-05, - "loss": 1.7336, - "step": 693500 - }, - { - "epoch": 9.99, - "learning_rate": 2.9079910062883285e-05, - "loss": 1.7355, - "step": 694000 - }, - { - "epoch": 10.0, - "learning_rate": 2.9028191028097563e-05, - "loss": 1.7321, - "step": 694500 - }, - { - "epoch": 10.0, - "eval_accuracy": 0.6623401667185361, - "eval_loss": 1.6064453125, - "eval_runtime": 654.7283, - "eval_samples_per_second": 823.146, - "eval_steps_per_second": 34.298, - "step": 694730 - }, - { - "epoch": 10.0, - "learning_rate": 2.8976264286344828e-05, - "loss": 1.7335, - "step": 695000 - }, - { - "epoch": 10.01, - "learning_rate": 2.8924545251559106e-05, - "loss": 1.729, - "step": 695500 - }, - { - "epoch": 10.02, - "learning_rate": 2.8872618509806364e-05, - "loss": 1.7314, - "step": 696000 - }, - { - "epoch": 10.03, - "learning_rate": 2.8820899475020642e-05, - "loss": 1.7345, - "step": 696500 - }, - { - "epoch": 10.03, - "learning_rate": 2.8768972733267907e-05, - "loss": 1.7342, - "step": 697000 - }, - { - "epoch": 10.04, - "learning_rate": 2.8717253698482184e-05, - "loss": 1.7326, - "step": 697500 - }, - { - "epoch": 10.05, - "learning_rate": 2.866532695672945e-05, - "loss": 1.7301, - "step": 698000 - }, - { - "epoch": 10.05, - "learning_rate": 2.8613607921943724e-05, - "loss": 1.7292, - "step": 698500 - }, - { - "epoch": 10.06, - "learning_rate": 2.8561681180190985e-05, - "loss": 1.7291, - "step": 699000 - }, - { - "epoch": 10.07, - "learning_rate": 2.8509962145405263e-05, - "loss": 1.7321, - "step": 699500 - }, - { - "epoch": 10.08, - "learning_rate": 2.8458035403652528e-05, - "loss": 1.7314, - "step": 700000 - }, - { - "epoch": 10.08, - "learning_rate": 2.8406316368866802e-05, - "loss": 1.7333, - "step": 700500 - }, - { - "epoch": 10.09, - "learning_rate": 2.835438962711407e-05, - "loss": 1.7329, - "step": 701000 - }, - { - "epoch": 10.1, - "learning_rate": 2.8302670592328345e-05, - "loss": 1.7337, - "step": 701500 - }, - { - "epoch": 10.1, - "learning_rate": 2.8250743850575613e-05, - "loss": 1.7346, - "step": 702000 - }, - { - "epoch": 10.11, - "learning_rate": 2.8199024815789887e-05, - "loss": 1.734, - "step": 702500 - }, - { - "epoch": 10.12, - "learning_rate": 2.814709807403715e-05, - "loss": 1.736, - "step": 703000 - }, - { - "epoch": 10.13, - "learning_rate": 2.8095379039251423e-05, - "loss": 1.7305, - "step": 703500 - }, - { - "epoch": 10.13, - "learning_rate": 2.804345229749869e-05, - "loss": 1.7273, - "step": 704000 - }, - { - "epoch": 10.14, - "learning_rate": 2.7991733262712966e-05, - "loss": 1.734, - "step": 704500 - }, - { - "epoch": 10.15, - "learning_rate": 2.793980652096023e-05, - "loss": 1.7322, - "step": 705000 - }, - { - "epoch": 10.16, - "learning_rate": 2.788808748617451e-05, - "loss": 1.7312, - "step": 705500 - }, - { - "epoch": 10.16, - "learning_rate": 2.783616074442177e-05, - "loss": 1.7309, - "step": 706000 - }, - { - "epoch": 10.17, - "learning_rate": 2.7784441709636045e-05, - "loss": 1.7292, - "step": 706500 - }, - { - "epoch": 10.18, - "learning_rate": 2.773251496788331e-05, - "loss": 1.7313, - "step": 707000 - }, - { - "epoch": 10.18, - "learning_rate": 2.7680795933097587e-05, - "loss": 1.7328, - "step": 707500 - }, - { - "epoch": 10.19, - "learning_rate": 2.7628869191344852e-05, - "loss": 1.7298, - "step": 708000 - }, - { - "epoch": 10.2, - "learning_rate": 2.757715015655913e-05, - "loss": 1.73, - "step": 708500 - }, - { - "epoch": 10.21, - "learning_rate": 2.7525223414806395e-05, - "loss": 1.7338, - "step": 709000 - }, - { - "epoch": 10.21, - "learning_rate": 2.747350438002067e-05, - "loss": 1.7327, - "step": 709500 - }, - { - "epoch": 10.22, - "learning_rate": 2.742157763826793e-05, - "loss": 1.7283, - "step": 710000 - }, - { - "epoch": 10.23, - "learning_rate": 2.736985860348221e-05, - "loss": 1.7319, - "step": 710500 - }, - { - "epoch": 10.23, - "learning_rate": 2.7317931861729473e-05, - "loss": 1.729, - "step": 711000 - }, - { - "epoch": 10.24, - "learning_rate": 2.7266212826943748e-05, - "loss": 1.7318, - "step": 711500 - }, - { - "epoch": 10.25, - "learning_rate": 2.7214286085191016e-05, - "loss": 1.7307, - "step": 712000 - }, - { - "epoch": 10.26, - "learning_rate": 2.716256705040529e-05, - "loss": 1.7321, - "step": 712500 - }, - { - "epoch": 10.26, - "learning_rate": 2.7110640308652552e-05, - "loss": 1.7313, - "step": 713000 - }, - { - "epoch": 10.27, - "learning_rate": 2.7058921273866826e-05, - "loss": 1.7283, - "step": 713500 - }, - { - "epoch": 10.28, - "learning_rate": 2.7006994532114095e-05, - "loss": 1.7248, - "step": 714000 - }, - { - "epoch": 10.28, - "learning_rate": 2.695527549732837e-05, - "loss": 1.7267, - "step": 714500 - }, - { - "epoch": 10.29, - "learning_rate": 2.6903348755575637e-05, - "loss": 1.7297, - "step": 715000 - }, - { - "epoch": 10.3, - "learning_rate": 2.685162972078991e-05, - "loss": 1.7297, - "step": 715500 - }, - { - "epoch": 10.31, - "learning_rate": 2.6799702979037176e-05, - "loss": 1.7281, - "step": 716000 - }, - { - "epoch": 10.31, - "learning_rate": 2.6747983944251454e-05, - "loss": 1.7306, - "step": 716500 - }, - { - "epoch": 10.32, - "learning_rate": 2.6696057202498716e-05, - "loss": 1.7297, - "step": 717000 - }, - { - "epoch": 10.33, - "learning_rate": 2.664433816771299e-05, - "loss": 1.7319, - "step": 717500 - }, - { - "epoch": 10.33, - "learning_rate": 2.6592411425960255e-05, - "loss": 1.7312, - "step": 718000 - }, - { - "epoch": 10.34, - "learning_rate": 2.6540692391174533e-05, - "loss": 1.7296, - "step": 718500 - }, - { - "epoch": 10.35, - "learning_rate": 2.6488765649421798e-05, - "loss": 1.7301, - "step": 719000 - }, - { - "epoch": 10.36, - "learning_rate": 2.6437046614636075e-05, - "loss": 1.7252, - "step": 719500 - }, - { - "epoch": 10.36, - "learning_rate": 2.6385119872883333e-05, - "loss": 1.7303, - "step": 720000 - }, - { - "epoch": 10.37, - "learning_rate": 2.633340083809761e-05, - "loss": 1.7276, - "step": 720500 - }, - { - "epoch": 10.38, - "learning_rate": 2.6281474096344876e-05, - "loss": 1.7308, - "step": 721000 - }, - { - "epoch": 10.39, - "learning_rate": 2.6229755061559154e-05, - "loss": 1.7275, - "step": 721500 - }, - { - "epoch": 10.39, - "learning_rate": 2.617782831980642e-05, - "loss": 1.7294, - "step": 722000 - }, - { - "epoch": 10.4, - "learning_rate": 2.6126109285020693e-05, - "loss": 1.7296, - "step": 722500 - }, - { - "epoch": 10.41, - "learning_rate": 2.607418254326796e-05, - "loss": 1.7331, - "step": 723000 - }, - { - "epoch": 10.41, - "learning_rate": 2.6022463508482236e-05, - "loss": 1.7316, - "step": 723500 - }, - { - "epoch": 10.42, - "learning_rate": 2.5970536766729497e-05, - "loss": 1.7348, - "step": 724000 - }, - { - "epoch": 10.43, - "learning_rate": 2.5918817731943772e-05, - "loss": 1.7302, - "step": 724500 - }, - { - "epoch": 10.44, - "learning_rate": 2.586689099019104e-05, - "loss": 1.7292, - "step": 725000 - }, - { - "epoch": 10.44, - "learning_rate": 2.5815171955405314e-05, - "loss": 1.7273, - "step": 725500 - }, - { - "epoch": 10.45, - "learning_rate": 2.5763245213652583e-05, - "loss": 1.7286, - "step": 726000 - }, - { - "epoch": 10.46, - "learning_rate": 2.5711526178866857e-05, - "loss": 1.7255, - "step": 726500 - }, - { - "epoch": 10.46, - "learning_rate": 2.565959943711412e-05, - "loss": 1.7281, - "step": 727000 - }, - { - "epoch": 10.47, - "learning_rate": 2.5607880402328393e-05, - "loss": 1.7314, - "step": 727500 - }, - { - "epoch": 10.48, - "learning_rate": 2.555595366057566e-05, - "loss": 1.7246, - "step": 728000 - }, - { - "epoch": 10.49, - "learning_rate": 2.5504234625789936e-05, - "loss": 1.729, - "step": 728500 - }, - { - "epoch": 10.49, - "learning_rate": 2.54523078840372e-05, - "loss": 1.7246, - "step": 729000 - }, - { - "epoch": 10.5, - "learning_rate": 2.5400588849251478e-05, - "loss": 1.73, - "step": 729500 - }, - { - "epoch": 10.51, - "learning_rate": 2.5348662107498743e-05, - "loss": 1.7267, - "step": 730000 - }, - { - "epoch": 10.51, - "learning_rate": 2.529694307271302e-05, - "loss": 1.7278, - "step": 730500 - }, - { - "epoch": 10.52, - "learning_rate": 2.524501633096028e-05, - "loss": 1.7264, - "step": 731000 - }, - { - "epoch": 10.53, - "learning_rate": 2.5193297296174557e-05, - "loss": 1.7308, - "step": 731500 - }, - { - "epoch": 10.54, - "learning_rate": 2.514137055442182e-05, - "loss": 1.7262, - "step": 732000 - }, - { - "epoch": 10.54, - "learning_rate": 2.50896515196361e-05, - "loss": 1.7284, - "step": 732500 - }, - { - "epoch": 10.55, - "learning_rate": 2.5037724777883364e-05, - "loss": 1.727, - "step": 733000 - }, - { - "epoch": 10.56, - "learning_rate": 2.498600574309764e-05, - "loss": 1.7257, - "step": 733500 - }, - { - "epoch": 10.57, - "learning_rate": 2.4934079001344904e-05, - "loss": 1.7264, - "step": 734000 - }, - { - "epoch": 10.57, - "learning_rate": 2.4882359966559178e-05, - "loss": 1.7271, - "step": 734500 - }, - { - "epoch": 10.58, - "learning_rate": 2.4830433224806443e-05, - "loss": 1.7238, - "step": 735000 - }, - { - "epoch": 10.59, - "learning_rate": 2.4778714190020717e-05, - "loss": 1.7282, - "step": 735500 - }, - { - "epoch": 10.59, - "learning_rate": 2.4726787448267985e-05, - "loss": 1.728, - "step": 736000 - }, - { - "epoch": 10.6, - "learning_rate": 2.467506841348226e-05, - "loss": 1.7236, - "step": 736500 - }, - { - "epoch": 10.61, - "learning_rate": 2.4623141671729525e-05, - "loss": 1.7285, - "step": 737000 - }, - { - "epoch": 10.62, - "learning_rate": 2.45714226369438e-05, - "loss": 1.7228, - "step": 737500 - }, - { - "epoch": 10.62, - "learning_rate": 2.4519495895191067e-05, - "loss": 1.7276, - "step": 738000 - }, - { - "epoch": 10.63, - "learning_rate": 2.4467776860405342e-05, - "loss": 1.7287, - "step": 738500 - }, - { - "epoch": 10.64, - "learning_rate": 2.4415850118652607e-05, - "loss": 1.7278, - "step": 739000 - }, - { - "epoch": 10.64, - "learning_rate": 2.436413108386688e-05, - "loss": 1.728, - "step": 739500 - }, - { - "epoch": 10.65, - "learning_rate": 2.431220434211415e-05, - "loss": 1.7278, - "step": 740000 - }, - { - "epoch": 10.66, - "learning_rate": 2.4260485307328424e-05, - "loss": 1.7244, - "step": 740500 - }, - { - "epoch": 10.67, - "learning_rate": 2.4208558565575685e-05, - "loss": 1.7253, - "step": 741000 - }, - { - "epoch": 10.67, - "learning_rate": 2.4156839530789963e-05, - "loss": 1.7291, - "step": 741500 - }, - { - "epoch": 10.68, - "learning_rate": 2.4104912789037224e-05, - "loss": 1.7234, - "step": 742000 - }, - { - "epoch": 10.69, - "learning_rate": 2.4053193754251502e-05, - "loss": 1.7239, - "step": 742500 - }, - { - "epoch": 10.69, - "learning_rate": 2.4001267012498767e-05, - "loss": 1.726, - "step": 743000 - }, - { - "epoch": 10.7, - "learning_rate": 2.3949547977713045e-05, - "loss": 1.729, - "step": 743500 - }, - { - "epoch": 10.71, - "learning_rate": 2.3897621235960306e-05, - "loss": 1.7244, - "step": 744000 - }, - { - "epoch": 10.72, - "learning_rate": 2.3845902201174584e-05, - "loss": 1.724, - "step": 744500 - }, - { - "epoch": 10.72, - "learning_rate": 2.379397545942185e-05, - "loss": 1.7243, - "step": 745000 - }, - { - "epoch": 10.73, - "learning_rate": 2.3742256424636123e-05, - "loss": 1.7262, - "step": 745500 - }, - { - "epoch": 10.74, - "learning_rate": 2.369032968288339e-05, - "loss": 1.7221, - "step": 746000 - }, - { - "epoch": 10.75, - "learning_rate": 2.3638610648097663e-05, - "loss": 1.7231, - "step": 746500 - }, - { - "epoch": 10.75, - "learning_rate": 2.358668390634493e-05, - "loss": 1.7281, - "step": 747000 - }, - { - "epoch": 10.76, - "learning_rate": 2.3534964871559205e-05, - "loss": 1.728, - "step": 747500 - }, - { - "epoch": 10.77, - "learning_rate": 2.348303812980647e-05, - "loss": 1.7238, - "step": 748000 - }, - { - "epoch": 10.77, - "learning_rate": 2.3431319095020745e-05, - "loss": 1.7245, - "step": 748500 - }, - { - "epoch": 10.78, - "learning_rate": 2.337939235326801e-05, - "loss": 1.7276, - "step": 749000 - }, - { - "epoch": 10.79, - "learning_rate": 2.3327673318482287e-05, - "loss": 1.7243, - "step": 749500 - }, - { - "epoch": 10.8, - "learning_rate": 2.3275746576729552e-05, - "loss": 1.7242, - "step": 750000 - }, - { - "epoch": 10.8, - "learning_rate": 2.3224027541943827e-05, - "loss": 1.7268, - "step": 750500 - }, - { - "epoch": 10.81, - "learning_rate": 2.317210080019109e-05, - "loss": 1.7224, - "step": 751000 - }, - { - "epoch": 10.82, - "learning_rate": 2.3120174058438356e-05, - "loss": 1.7241, - "step": 751500 - }, - { - "epoch": 10.82, - "learning_rate": 2.3068455023652634e-05, - "loss": 1.7226, - "step": 752000 - }, - { - "epoch": 10.83, - "learning_rate": 2.3016528281899896e-05, - "loss": 1.7251, - "step": 752500 - }, - { - "epoch": 10.84, - "learning_rate": 2.2964809247114173e-05, - "loss": 1.7182, - "step": 753000 - }, - { - "epoch": 10.85, - "learning_rate": 2.2912882505361438e-05, - "loss": 1.7232, - "step": 753500 - }, - { - "epoch": 10.85, - "learning_rate": 2.2861163470575713e-05, - "loss": 1.7256, - "step": 754000 - }, - { - "epoch": 10.86, - "learning_rate": 2.2809236728822978e-05, - "loss": 1.7254, - "step": 754500 - }, - { - "epoch": 10.87, - "learning_rate": 2.2757517694037252e-05, - "loss": 1.7269, - "step": 755000 - }, - { - "epoch": 10.87, - "learning_rate": 2.2705590952284517e-05, - "loss": 1.7254, - "step": 755500 - }, - { - "epoch": 10.88, - "learning_rate": 2.2653871917498795e-05, - "loss": 1.7247, - "step": 756000 - }, - { - "epoch": 10.89, - "learning_rate": 2.260194517574606e-05, - "loss": 1.7232, - "step": 756500 - }, - { - "epoch": 10.9, - "learning_rate": 2.2550226140960334e-05, - "loss": 1.7252, - "step": 757000 - }, - { - "epoch": 10.9, - "learning_rate": 2.24982993992076e-05, - "loss": 1.7227, - "step": 757500 - }, - { - "epoch": 10.91, - "learning_rate": 2.2446580364421873e-05, - "loss": 1.7207, - "step": 758000 - }, - { - "epoch": 10.92, - "learning_rate": 2.239465362266914e-05, - "loss": 1.7222, - "step": 758500 - }, - { - "epoch": 10.93, - "learning_rate": 2.2342934587883416e-05, - "loss": 1.7244, - "step": 759000 - }, - { - "epoch": 10.93, - "learning_rate": 2.229100784613068e-05, - "loss": 1.7237, - "step": 759500 - }, - { - "epoch": 10.94, - "learning_rate": 2.2239288811344955e-05, - "loss": 1.7208, - "step": 760000 - }, - { - "epoch": 10.95, - "learning_rate": 2.218736206959222e-05, - "loss": 1.7214, - "step": 760500 - }, - { - "epoch": 10.95, - "learning_rate": 2.2135643034806498e-05, - "loss": 1.7169, - "step": 761000 - }, - { - "epoch": 10.96, - "learning_rate": 2.208371629305376e-05, - "loss": 1.7241, - "step": 761500 - }, - { - "epoch": 10.97, - "learning_rate": 2.2031997258268037e-05, - "loss": 1.7228, - "step": 762000 - }, - { - "epoch": 10.98, - "learning_rate": 2.1980070516515302e-05, - "loss": 1.72, - "step": 762500 - }, - { - "epoch": 10.98, - "learning_rate": 2.192835148172958e-05, - "loss": 1.7202, - "step": 763000 - }, - { - "epoch": 10.99, - "learning_rate": 2.187642473997684e-05, - "loss": 1.7232, - "step": 763500 - }, - { - "epoch": 11.0, - "learning_rate": 2.182470570519112e-05, - "loss": 1.7225, - "step": 764000 - }, - { - "epoch": 11.0, - "eval_accuracy": 0.6647001699746609, - "eval_loss": 1.5908203125, - "eval_runtime": 653.6993, - "eval_samples_per_second": 824.442, - "eval_steps_per_second": 34.352, - "step": 764203 - }, - { - "epoch": 11.0, - "learning_rate": 2.177277896343838e-05, - "loss": 1.7241, - "step": 764500 - }, - { - "epoch": 11.01, - "learning_rate": 2.1721059928652658e-05, - "loss": 1.7231, - "step": 765000 - }, - { - "epoch": 11.02, - "learning_rate": 2.1669133186899923e-05, - "loss": 1.7191, - "step": 765500 - }, - { - "epoch": 11.03, - "learning_rate": 2.1617414152114197e-05, - "loss": 1.7213, - "step": 766000 - }, - { - "epoch": 11.03, - "learning_rate": 2.1565487410361462e-05, - "loss": 1.7183, - "step": 766500 - }, - { - "epoch": 11.04, - "learning_rate": 2.1513768375575737e-05, - "loss": 1.7218, - "step": 767000 - }, - { - "epoch": 11.05, - "learning_rate": 2.1461841633823005e-05, - "loss": 1.72, - "step": 767500 - }, - { - "epoch": 11.05, - "learning_rate": 2.141012259903728e-05, - "loss": 1.7179, - "step": 768000 - }, - { - "epoch": 11.06, - "learning_rate": 2.1358195857284544e-05, - "loss": 1.722, - "step": 768500 - }, - { - "epoch": 11.07, - "learning_rate": 2.130647682249882e-05, - "loss": 1.7249, - "step": 769000 - }, - { - "epoch": 11.08, - "learning_rate": 2.1254550080746087e-05, - "loss": 1.7184, - "step": 769500 - }, - { - "epoch": 11.08, - "learning_rate": 2.120283104596036e-05, - "loss": 1.7217, - "step": 770000 - }, - { - "epoch": 11.09, - "learning_rate": 2.1150904304207626e-05, - "loss": 1.7151, - "step": 770500 - }, - { - "epoch": 11.1, - "learning_rate": 2.10991852694219e-05, - "loss": 1.7216, - "step": 771000 - }, - { - "epoch": 11.11, - "learning_rate": 2.1047258527669165e-05, - "loss": 1.7187, - "step": 771500 - }, - { - "epoch": 11.11, - "learning_rate": 2.099553949288344e-05, - "loss": 1.7231, - "step": 772000 - }, - { - "epoch": 11.12, - "learning_rate": 2.0943612751130705e-05, - "loss": 1.7166, - "step": 772500 - }, - { - "epoch": 11.13, - "learning_rate": 2.0891893716344982e-05, - "loss": 1.721, - "step": 773000 - }, - { - "epoch": 11.13, - "learning_rate": 2.0839966974592244e-05, - "loss": 1.7188, - "step": 773500 - }, - { - "epoch": 11.14, - "learning_rate": 2.0788247939806522e-05, - "loss": 1.7186, - "step": 774000 - }, - { - "epoch": 11.15, - "learning_rate": 2.0736321198053787e-05, - "loss": 1.7172, - "step": 774500 - }, - { - "epoch": 11.16, - "learning_rate": 2.0684602163268064e-05, - "loss": 1.7187, - "step": 775000 - }, - { - "epoch": 11.16, - "learning_rate": 2.0632675421515326e-05, - "loss": 1.721, - "step": 775500 - }, - { - "epoch": 11.17, - "learning_rate": 2.0580956386729604e-05, - "loss": 1.72, - "step": 776000 - }, - { - "epoch": 11.18, - "learning_rate": 2.052902964497687e-05, - "loss": 1.7235, - "step": 776500 - }, - { - "epoch": 11.18, - "learning_rate": 2.0477310610191143e-05, - "loss": 1.7188, - "step": 777000 - }, - { - "epoch": 11.19, - "learning_rate": 2.0425383868438408e-05, - "loss": 1.7149, - "step": 777500 - }, - { - "epoch": 11.2, - "learning_rate": 2.0373664833652682e-05, - "loss": 1.7217, - "step": 778000 - }, - { - "epoch": 11.21, - "learning_rate": 2.0321738091899947e-05, - "loss": 1.7167, - "step": 778500 - }, - { - "epoch": 11.21, - "learning_rate": 2.027001905711422e-05, - "loss": 1.7174, - "step": 779000 - }, - { - "epoch": 11.22, - "learning_rate": 2.021809231536149e-05, - "loss": 1.7199, - "step": 779500 - }, - { - "epoch": 11.23, - "learning_rate": 2.0166373280575764e-05, - "loss": 1.7173, - "step": 780000 - }, - { - "epoch": 11.23, - "learning_rate": 2.011444653882303e-05, - "loss": 1.7184, - "step": 780500 - }, - { - "epoch": 11.24, - "learning_rate": 2.0062727504037303e-05, - "loss": 1.714, - "step": 781000 - }, - { - "epoch": 11.25, - "learning_rate": 2.001080076228457e-05, - "loss": 1.7196, - "step": 781500 - }, - { - "epoch": 11.26, - "learning_rate": 1.9959081727498846e-05, - "loss": 1.7174, - "step": 782000 - }, - { - "epoch": 11.26, - "learning_rate": 1.990715498574611e-05, - "loss": 1.7227, - "step": 782500 - }, - { - "epoch": 11.27, - "learning_rate": 1.9855435950960385e-05, - "loss": 1.7177, - "step": 783000 - }, - { - "epoch": 11.28, - "learning_rate": 1.980350920920765e-05, - "loss": 1.7197, - "step": 783500 - }, - { - "epoch": 11.28, - "learning_rate": 1.9751790174421928e-05, - "loss": 1.7229, - "step": 784000 - }, - { - "epoch": 11.29, - "learning_rate": 1.969986343266919e-05, - "loss": 1.7191, - "step": 784500 - }, - { - "epoch": 11.3, - "learning_rate": 1.9648144397883467e-05, - "loss": 1.7138, - "step": 785000 - }, - { - "epoch": 11.31, - "learning_rate": 1.959621765613073e-05, - "loss": 1.7207, - "step": 785500 - }, - { - "epoch": 11.31, - "learning_rate": 1.954449862134501e-05, - "loss": 1.7146, - "step": 786000 - }, - { - "epoch": 11.32, - "learning_rate": 1.949257187959227e-05, - "loss": 1.7196, - "step": 786500 - }, - { - "epoch": 11.33, - "learning_rate": 1.944085284480655e-05, - "loss": 1.7191, - "step": 787000 - }, - { - "epoch": 11.34, - "learning_rate": 1.938892610305381e-05, - "loss": 1.7159, - "step": 787500 - }, - { - "epoch": 11.34, - "learning_rate": 1.933720706826809e-05, - "loss": 1.7162, - "step": 788000 - }, - { - "epoch": 11.35, - "learning_rate": 1.9285280326515353e-05, - "loss": 1.7182, - "step": 788500 - }, - { - "epoch": 11.36, - "learning_rate": 1.9233561291729628e-05, - "loss": 1.7189, - "step": 789000 - }, - { - "epoch": 11.36, - "learning_rate": 1.9181634549976893e-05, - "loss": 1.717, - "step": 789500 - }, - { - "epoch": 11.37, - "learning_rate": 1.9129915515191167e-05, - "loss": 1.7177, - "step": 790000 - }, - { - "epoch": 11.38, - "learning_rate": 1.9077988773438435e-05, - "loss": 1.7161, - "step": 790500 - }, - { - "epoch": 11.39, - "learning_rate": 1.902626973865271e-05, - "loss": 1.7167, - "step": 791000 - }, - { - "epoch": 11.39, - "learning_rate": 1.8974342996899974e-05, - "loss": 1.7182, - "step": 791500 - }, - { - "epoch": 11.4, - "learning_rate": 1.892262396211425e-05, - "loss": 1.7168, - "step": 792000 - }, - { - "epoch": 11.41, - "learning_rate": 1.8870697220361517e-05, - "loss": 1.7151, - "step": 792500 - }, - { - "epoch": 11.41, - "learning_rate": 1.881897818557579e-05, - "loss": 1.7218, - "step": 793000 - }, - { - "epoch": 11.42, - "learning_rate": 1.8767051443823056e-05, - "loss": 1.7196, - "step": 793500 - }, - { - "epoch": 11.43, - "learning_rate": 1.871533240903733e-05, - "loss": 1.7158, - "step": 794000 - }, - { - "epoch": 11.44, - "learning_rate": 1.8663405667284596e-05, - "loss": 1.7207, - "step": 794500 - }, - { - "epoch": 11.44, - "learning_rate": 1.861168663249887e-05, - "loss": 1.7135, - "step": 795000 - }, - { - "epoch": 11.45, - "learning_rate": 1.855975989074614e-05, - "loss": 1.7185, - "step": 795500 - }, - { - "epoch": 11.46, - "learning_rate": 1.8508040855960413e-05, - "loss": 1.715, - "step": 796000 - }, - { - "epoch": 11.46, - "learning_rate": 1.8456114114207678e-05, - "loss": 1.7159, - "step": 796500 - }, - { - "epoch": 11.47, - "learning_rate": 1.8404395079421952e-05, - "loss": 1.722, - "step": 797000 - }, - { - "epoch": 11.48, - "learning_rate": 1.8352468337669217e-05, - "loss": 1.7182, - "step": 797500 - }, - { - "epoch": 11.49, - "learning_rate": 1.8300749302883495e-05, - "loss": 1.7209, - "step": 798000 - }, - { - "epoch": 11.49, - "learning_rate": 1.8248822561130756e-05, - "loss": 1.7154, - "step": 798500 - }, - { - "epoch": 11.5, - "learning_rate": 1.8197103526345034e-05, - "loss": 1.7182, - "step": 799000 - }, - { - "epoch": 11.51, - "learning_rate": 1.81451767845923e-05, - "loss": 1.7143, - "step": 799500 - }, - { - "epoch": 11.52, - "learning_rate": 1.8093457749806577e-05, - "loss": 1.7141, - "step": 800000 - }, - { - "epoch": 11.52, - "learning_rate": 1.8041531008053838e-05, - "loss": 1.7146, - "step": 800500 - }, - { - "epoch": 11.53, - "learning_rate": 1.7989811973268116e-05, - "loss": 1.7161, - "step": 801000 - }, - { - "epoch": 11.54, - "learning_rate": 1.7937885231515377e-05, - "loss": 1.7129, - "step": 801500 - }, - { - "epoch": 11.54, - "learning_rate": 1.7886166196729655e-05, - "loss": 1.7192, - "step": 802000 - }, - { - "epoch": 11.55, - "learning_rate": 1.783423945497692e-05, - "loss": 1.7209, - "step": 802500 - }, - { - "epoch": 11.56, - "learning_rate": 1.7782520420191194e-05, - "loss": 1.717, - "step": 803000 - }, - { - "epoch": 11.57, - "learning_rate": 1.773059367843846e-05, - "loss": 1.7134, - "step": 803500 - }, - { - "epoch": 11.57, - "learning_rate": 1.7678874643652734e-05, - "loss": 1.7199, - "step": 804000 - }, - { - "epoch": 11.58, - "learning_rate": 1.7626947901900002e-05, - "loss": 1.7099, - "step": 804500 - }, - { - "epoch": 11.59, - "learning_rate": 1.7575228867114276e-05, - "loss": 1.7197, - "step": 805000 - }, - { - "epoch": 11.59, - "learning_rate": 1.752330212536154e-05, - "loss": 1.7135, - "step": 805500 - }, - { - "epoch": 11.6, - "learning_rate": 1.7471583090575816e-05, - "loss": 1.7177, - "step": 806000 - }, - { - "epoch": 11.61, - "learning_rate": 1.7419656348823084e-05, - "loss": 1.7178, - "step": 806500 - }, - { - "epoch": 11.62, - "learning_rate": 1.7367937314037358e-05, - "loss": 1.7157, - "step": 807000 - }, - { - "epoch": 11.62, - "learning_rate": 1.7316010572284623e-05, - "loss": 1.7178, - "step": 807500 - }, - { - "epoch": 11.63, - "learning_rate": 1.7264291537498897e-05, - "loss": 1.7147, - "step": 808000 - }, - { - "epoch": 11.64, - "learning_rate": 1.7212364795746162e-05, - "loss": 1.7179, - "step": 808500 - }, - { - "epoch": 11.64, - "learning_rate": 1.7160645760960437e-05, - "loss": 1.7159, - "step": 809000 - }, - { - "epoch": 11.65, - "learning_rate": 1.71087190192077e-05, - "loss": 1.7157, - "step": 809500 - }, - { - "epoch": 11.66, - "learning_rate": 1.705699998442198e-05, - "loss": 1.7194, - "step": 810000 - }, - { - "epoch": 11.67, - "learning_rate": 1.700507324266924e-05, - "loss": 1.7165, - "step": 810500 - }, - { - "epoch": 11.67, - "learning_rate": 1.695335420788352e-05, - "loss": 1.717, - "step": 811000 - }, - { - "epoch": 11.68, - "learning_rate": 1.6901427466130784e-05, - "loss": 1.7156, - "step": 811500 - }, - { - "epoch": 11.69, - "learning_rate": 1.684970843134506e-05, - "loss": 1.7136, - "step": 812000 - }, - { - "epoch": 11.7, - "learning_rate": 1.6797781689592323e-05, - "loss": 1.7164, - "step": 812500 - }, - { - "epoch": 11.7, - "learning_rate": 1.67460626548066e-05, - "loss": 1.7159, - "step": 813000 - }, - { - "epoch": 11.71, - "learning_rate": 1.6694135913053865e-05, - "loss": 1.7176, - "step": 813500 - }, - { - "epoch": 11.72, - "learning_rate": 1.664241687826814e-05, - "loss": 1.7167, - "step": 814000 - }, - { - "epoch": 11.72, - "learning_rate": 1.6590490136515405e-05, - "loss": 1.7175, - "step": 814500 - }, - { - "epoch": 11.73, - "learning_rate": 1.653877110172968e-05, - "loss": 1.7126, - "step": 815000 - }, - { - "epoch": 11.74, - "learning_rate": 1.6486844359976944e-05, - "loss": 1.7118, - "step": 815500 - }, - { - "epoch": 11.75, - "learning_rate": 1.6435125325191222e-05, - "loss": 1.7122, - "step": 816000 - }, - { - "epoch": 11.75, - "learning_rate": 1.6383198583438487e-05, - "loss": 1.7169, - "step": 816500 - }, - { - "epoch": 11.76, - "learning_rate": 1.633147954865276e-05, - "loss": 1.7138, - "step": 817000 - }, - { - "epoch": 11.77, - "learning_rate": 1.6279552806900026e-05, - "loss": 1.7133, - "step": 817500 - }, - { - "epoch": 11.77, - "learning_rate": 1.62278337721143e-05, - "loss": 1.7167, - "step": 818000 - }, - { - "epoch": 11.78, - "learning_rate": 1.617590703036157e-05, - "loss": 1.7165, - "step": 818500 - }, - { - "epoch": 11.79, - "learning_rate": 1.6124187995575843e-05, - "loss": 1.7147, - "step": 819000 - }, - { - "epoch": 11.8, - "learning_rate": 1.6072261253823108e-05, - "loss": 1.7116, - "step": 819500 - }, - { - "epoch": 11.8, - "learning_rate": 1.6020542219037382e-05, - "loss": 1.7131, - "step": 820000 - }, - { - "epoch": 11.81, - "learning_rate": 1.5968615477284647e-05, - "loss": 1.7125, - "step": 820500 - }, - { - "epoch": 11.82, - "learning_rate": 1.5916896442498925e-05, - "loss": 1.7159, - "step": 821000 - }, - { - "epoch": 11.82, - "learning_rate": 1.5864969700746186e-05, - "loss": 1.7116, - "step": 821500 - }, - { - "epoch": 11.83, - "learning_rate": 1.5813250665960464e-05, - "loss": 1.7134, - "step": 822000 - }, - { - "epoch": 11.84, - "learning_rate": 1.576132392420773e-05, - "loss": 1.7176, - "step": 822500 - }, - { - "epoch": 11.85, - "learning_rate": 1.5709604889422007e-05, - "loss": 1.7149, - "step": 823000 - }, - { - "epoch": 11.85, - "learning_rate": 1.565767814766927e-05, - "loss": 1.7154, - "step": 823500 - }, - { - "epoch": 11.86, - "learning_rate": 1.5605959112883546e-05, - "loss": 1.7141, - "step": 824000 - }, - { - "epoch": 11.87, - "learning_rate": 1.5554032371130808e-05, - "loss": 1.714, - "step": 824500 - }, - { - "epoch": 11.88, - "learning_rate": 1.5502313336345085e-05, - "loss": 1.7131, - "step": 825000 - }, - { - "epoch": 11.88, - "learning_rate": 1.545038659459235e-05, - "loss": 1.7094, - "step": 825500 - }, - { - "epoch": 11.89, - "learning_rate": 1.5398667559806625e-05, - "loss": 1.713, - "step": 826000 - }, - { - "epoch": 11.9, - "learning_rate": 1.534674081805389e-05, - "loss": 1.7131, - "step": 826500 - }, - { - "epoch": 11.9, - "learning_rate": 1.5295021783268164e-05, - "loss": 1.7134, - "step": 827000 - }, - { - "epoch": 11.91, - "learning_rate": 1.5243095041515432e-05, - "loss": 1.7143, - "step": 827500 - }, - { - "epoch": 11.92, - "learning_rate": 1.5191376006729707e-05, - "loss": 1.7123, - "step": 828000 - }, - { - "epoch": 11.93, - "learning_rate": 1.5139449264976971e-05, - "loss": 1.7154, - "step": 828500 - }, - { - "epoch": 11.93, - "learning_rate": 1.5087730230191246e-05, - "loss": 1.7142, - "step": 829000 - }, - { - "epoch": 11.94, - "learning_rate": 1.5035803488438512e-05, - "loss": 1.7115, - "step": 829500 - }, - { - "epoch": 11.95, - "learning_rate": 1.4984084453652788e-05, - "loss": 1.714, - "step": 830000 - }, - { - "epoch": 11.95, - "learning_rate": 1.4932157711900052e-05, - "loss": 1.7141, - "step": 830500 - }, - { - "epoch": 11.96, - "learning_rate": 1.4880438677114328e-05, - "loss": 1.7081, - "step": 831000 - }, - { - "epoch": 11.97, - "learning_rate": 1.4828511935361591e-05, - "loss": 1.7111, - "step": 831500 - }, - { - "epoch": 11.98, - "learning_rate": 1.4776792900575867e-05, - "loss": 1.7114, - "step": 832000 - }, - { - "epoch": 11.98, - "learning_rate": 1.4724866158823134e-05, - "loss": 1.7137, - "step": 832500 - }, - { - "epoch": 11.99, - "learning_rate": 1.467314712403741e-05, - "loss": 1.7155, - "step": 833000 - }, - { - "epoch": 12.0, - "learning_rate": 1.4621220382284673e-05, - "loss": 1.707, - "step": 833500 - }, - { - "epoch": 12.0, - "eval_accuracy": 0.6659963122125594, - "eval_loss": 1.5859375, - "eval_runtime": 653.3093, - "eval_samples_per_second": 824.934, - "eval_steps_per_second": 34.373, - "step": 833676 - }, - { - "epoch": 12.0, - "learning_rate": 1.4569501347498949e-05, - "loss": 1.7114, - "step": 834000 - }, - { - "epoch": 12.01, - "learning_rate": 1.4517574605746216e-05, - "loss": 1.7119, - "step": 834500 - }, - { - "epoch": 12.02, - "learning_rate": 1.446585557096049e-05, - "loss": 1.7107, - "step": 835000 - }, - { - "epoch": 12.03, - "learning_rate": 1.4413928829207755e-05, - "loss": 1.7124, - "step": 835500 - }, - { - "epoch": 12.03, - "learning_rate": 1.4362209794422029e-05, - "loss": 1.7061, - "step": 836000 - }, - { - "epoch": 12.04, - "learning_rate": 1.4310283052669296e-05, - "loss": 1.7062, - "step": 836500 - }, - { - "epoch": 12.05, - "learning_rate": 1.4258564017883572e-05, - "loss": 1.7093, - "step": 837000 - }, - { - "epoch": 12.06, - "learning_rate": 1.4206637276130835e-05, - "loss": 1.7088, - "step": 837500 - }, - { - "epoch": 12.06, - "learning_rate": 1.4154918241345111e-05, - "loss": 1.715, - "step": 838000 - }, - { - "epoch": 12.07, - "learning_rate": 1.4102991499592374e-05, - "loss": 1.7135, - "step": 838500 - }, - { - "epoch": 12.08, - "learning_rate": 1.405127246480665e-05, - "loss": 1.7104, - "step": 839000 - }, - { - "epoch": 12.08, - "learning_rate": 1.3999345723053917e-05, - "loss": 1.7061, - "step": 839500 - }, - { - "epoch": 12.09, - "learning_rate": 1.3947626688268193e-05, - "loss": 1.7104, - "step": 840000 - }, - { - "epoch": 12.1, - "learning_rate": 1.3895699946515456e-05, - "loss": 1.7089, - "step": 840500 - }, - { - "epoch": 12.11, - "learning_rate": 1.3843980911729732e-05, - "loss": 1.709, - "step": 841000 - }, - { - "epoch": 12.11, - "learning_rate": 1.3792054169976997e-05, - "loss": 1.7148, - "step": 841500 - }, - { - "epoch": 12.12, - "learning_rate": 1.3740335135191273e-05, - "loss": 1.7093, - "step": 842000 - }, - { - "epoch": 12.13, - "learning_rate": 1.3688408393438536e-05, - "loss": 1.7113, - "step": 842500 - }, - { - "epoch": 12.13, - "learning_rate": 1.3636689358652812e-05, - "loss": 1.7068, - "step": 843000 - }, - { - "epoch": 12.14, - "learning_rate": 1.3584762616900079e-05, - "loss": 1.7078, - "step": 843500 - }, - { - "epoch": 12.15, - "learning_rate": 1.3533043582114355e-05, - "loss": 1.7096, - "step": 844000 - }, - { - "epoch": 12.16, - "learning_rate": 1.3481116840361618e-05, - "loss": 1.7111, - "step": 844500 - }, - { - "epoch": 12.16, - "learning_rate": 1.3429397805575894e-05, - "loss": 1.7084, - "step": 845000 - }, - { - "epoch": 12.17, - "learning_rate": 1.3377471063823158e-05, - "loss": 1.715, - "step": 845500 - }, - { - "epoch": 12.18, - "learning_rate": 1.3325752029037434e-05, - "loss": 1.7066, - "step": 846000 - }, - { - "epoch": 12.18, - "learning_rate": 1.32738252872847e-05, - "loss": 1.7098, - "step": 846500 - }, - { - "epoch": 12.19, - "learning_rate": 1.3222106252498975e-05, - "loss": 1.7082, - "step": 847000 - }, - { - "epoch": 12.2, - "learning_rate": 1.317017951074624e-05, - "loss": 1.7081, - "step": 847500 - }, - { - "epoch": 12.21, - "learning_rate": 1.3118460475960514e-05, - "loss": 1.7124, - "step": 848000 - }, - { - "epoch": 12.21, - "learning_rate": 1.306653373420778e-05, - "loss": 1.7086, - "step": 848500 - }, - { - "epoch": 12.22, - "learning_rate": 1.3014814699422057e-05, - "loss": 1.7109, - "step": 849000 - }, - { - "epoch": 12.23, - "learning_rate": 1.296288795766932e-05, - "loss": 1.7114, - "step": 849500 - }, - { - "epoch": 12.23, - "learning_rate": 1.2911168922883596e-05, - "loss": 1.7097, - "step": 850000 - }, - { - "epoch": 12.24, - "learning_rate": 1.2859242181130862e-05, - "loss": 1.7067, - "step": 850500 - }, - { - "epoch": 12.25, - "learning_rate": 1.2807523146345138e-05, - "loss": 1.7092, - "step": 851000 - }, - { - "epoch": 12.26, - "learning_rate": 1.2755596404592402e-05, - "loss": 1.7094, - "step": 851500 - }, - { - "epoch": 12.26, - "learning_rate": 1.2703877369806678e-05, - "loss": 1.7079, - "step": 852000 - }, - { - "epoch": 12.27, - "learning_rate": 1.2651950628053943e-05, - "loss": 1.7042, - "step": 852500 - }, - { - "epoch": 12.28, - "learning_rate": 1.2600231593268219e-05, - "loss": 1.7083, - "step": 853000 - }, - { - "epoch": 12.29, - "learning_rate": 1.2548304851515482e-05, - "loss": 1.7076, - "step": 853500 - }, - { - "epoch": 12.29, - "learning_rate": 1.2496585816729758e-05, - "loss": 1.7063, - "step": 854000 - }, - { - "epoch": 12.3, - "learning_rate": 1.2444659074977023e-05, - "loss": 1.7082, - "step": 854500 - }, - { - "epoch": 12.31, - "learning_rate": 1.2392940040191299e-05, - "loss": 1.7097, - "step": 855000 - }, - { - "epoch": 12.31, - "learning_rate": 1.2341013298438564e-05, - "loss": 1.7103, - "step": 855500 - }, - { - "epoch": 12.32, - "learning_rate": 1.228929426365284e-05, - "loss": 1.7061, - "step": 856000 - }, - { - "epoch": 12.33, - "learning_rate": 1.2237367521900103e-05, - "loss": 1.7058, - "step": 856500 - }, - { - "epoch": 12.34, - "learning_rate": 1.2185648487114381e-05, - "loss": 1.7078, - "step": 857000 - }, - { - "epoch": 12.34, - "learning_rate": 1.2133721745361644e-05, - "loss": 1.7082, - "step": 857500 - }, - { - "epoch": 12.35, - "learning_rate": 1.208200271057592e-05, - "loss": 1.7065, - "step": 858000 - }, - { - "epoch": 12.36, - "learning_rate": 1.2030075968823185e-05, - "loss": 1.7073, - "step": 858500 - }, - { - "epoch": 12.36, - "learning_rate": 1.197835693403746e-05, - "loss": 1.7073, - "step": 859000 - }, - { - "epoch": 12.37, - "learning_rate": 1.1926430192284726e-05, - "loss": 1.7079, - "step": 859500 - }, - { - "epoch": 12.38, - "learning_rate": 1.1874711157499e-05, - "loss": 1.7045, - "step": 860000 - }, - { - "epoch": 12.39, - "learning_rate": 1.1822784415746265e-05, - "loss": 1.7102, - "step": 860500 - }, - { - "epoch": 12.39, - "learning_rate": 1.1771065380960541e-05, - "loss": 1.7039, - "step": 861000 - }, - { - "epoch": 12.4, - "learning_rate": 1.1719138639207806e-05, - "loss": 1.7058, - "step": 861500 - }, - { - "epoch": 12.41, - "learning_rate": 1.1667419604422082e-05, - "loss": 1.7056, - "step": 862000 - }, - { - "epoch": 12.41, - "learning_rate": 1.1615492862669347e-05, - "loss": 1.7047, - "step": 862500 - }, - { - "epoch": 12.42, - "learning_rate": 1.1563773827883623e-05, - "loss": 1.7115, - "step": 863000 - }, - { - "epoch": 12.43, - "learning_rate": 1.1511847086130888e-05, - "loss": 1.7049, - "step": 863500 - }, - { - "epoch": 12.44, - "learning_rate": 1.1460128051345164e-05, - "loss": 1.7092, - "step": 864000 - }, - { - "epoch": 12.44, - "learning_rate": 1.1408201309592427e-05, - "loss": 1.7057, - "step": 864500 - }, - { - "epoch": 12.45, - "learning_rate": 1.1356482274806702e-05, - "loss": 1.7077, - "step": 865000 - }, - { - "epoch": 12.46, - "learning_rate": 1.1304555533053968e-05, - "loss": 1.7083, - "step": 865500 - }, - { - "epoch": 12.47, - "learning_rate": 1.1252836498268243e-05, - "loss": 1.7076, - "step": 866000 - }, - { - "epoch": 12.47, - "learning_rate": 1.1200909756515508e-05, - "loss": 1.7032, - "step": 866500 - }, - { - "epoch": 12.48, - "learning_rate": 1.1149190721729784e-05, - "loss": 1.7079, - "step": 867000 - }, - { - "epoch": 12.49, - "learning_rate": 1.1097263979977049e-05, - "loss": 1.7072, - "step": 867500 - }, - { - "epoch": 12.49, - "learning_rate": 1.1045544945191325e-05, - "loss": 1.7072, - "step": 868000 - }, - { - "epoch": 12.5, - "learning_rate": 1.099361820343859e-05, - "loss": 1.7081, - "step": 868500 - }, - { - "epoch": 12.51, - "learning_rate": 1.0941899168652866e-05, - "loss": 1.7071, - "step": 869000 - }, - { - "epoch": 12.52, - "learning_rate": 1.088997242690013e-05, - "loss": 1.7067, - "step": 869500 - }, - { - "epoch": 12.52, - "learning_rate": 1.0838253392114407e-05, - "loss": 1.7113, - "step": 870000 - }, - { - "epoch": 12.53, - "learning_rate": 1.0786326650361671e-05, - "loss": 1.7068, - "step": 870500 - }, - { - "epoch": 12.54, - "learning_rate": 1.0734607615575946e-05, - "loss": 1.7077, - "step": 871000 - }, - { - "epoch": 12.54, - "learning_rate": 1.068268087382321e-05, - "loss": 1.7061, - "step": 871500 - }, - { - "epoch": 12.55, - "learning_rate": 1.0630961839037487e-05, - "loss": 1.7066, - "step": 872000 - }, - { - "epoch": 12.56, - "learning_rate": 1.057903509728475e-05, - "loss": 1.7114, - "step": 872500 - }, - { - "epoch": 12.57, - "learning_rate": 1.0527316062499026e-05, - "loss": 1.7065, - "step": 873000 - }, - { - "epoch": 12.57, - "learning_rate": 1.0475389320746291e-05, - "loss": 1.7078, - "step": 873500 - }, - { - "epoch": 12.58, - "learning_rate": 1.0423670285960567e-05, - "loss": 1.7055, - "step": 874000 - }, - { - "epoch": 12.59, - "learning_rate": 1.0371743544207832e-05, - "loss": 1.7053, - "step": 874500 - }, - { - "epoch": 12.59, - "learning_rate": 1.0320024509422108e-05, - "loss": 1.7064, - "step": 875000 - }, - { - "epoch": 12.6, - "learning_rate": 1.0268097767669373e-05, - "loss": 1.7066, - "step": 875500 - }, - { - "epoch": 12.61, - "learning_rate": 1.0216378732883649e-05, - "loss": 1.7088, - "step": 876000 - }, - { - "epoch": 12.62, - "learning_rate": 1.0164451991130914e-05, - "loss": 1.7057, - "step": 876500 - }, - { - "epoch": 12.62, - "learning_rate": 1.0112732956345188e-05, - "loss": 1.7101, - "step": 877000 - }, - { - "epoch": 12.63, - "learning_rate": 1.0060806214592453e-05, - "loss": 1.7067, - "step": 877500 - }, - { - "epoch": 12.64, - "learning_rate": 1.000908717980673e-05, - "loss": 1.7029, - "step": 878000 - }, - { - "epoch": 12.65, - "learning_rate": 9.957160438053994e-06, - "loss": 1.7068, - "step": 878500 - }, - { - "epoch": 12.65, - "learning_rate": 9.90544140326827e-06, - "loss": 1.7016, - "step": 879000 - }, - { - "epoch": 12.66, - "learning_rate": 9.853514661515533e-06, - "loss": 1.7056, - "step": 879500 - }, - { - "epoch": 12.67, - "learning_rate": 9.80179562672981e-06, - "loss": 1.7051, - "step": 880000 - }, - { - "epoch": 12.67, - "learning_rate": 9.749868884977074e-06, - "loss": 1.7045, - "step": 880500 - }, - { - "epoch": 12.68, - "learning_rate": 9.69814985019135e-06, - "loss": 1.7064, - "step": 881000 - }, - { - "epoch": 12.69, - "learning_rate": 9.646223108438615e-06, - "loss": 1.7036, - "step": 881500 - }, - { - "epoch": 12.7, - "learning_rate": 9.594504073652891e-06, - "loss": 1.7039, - "step": 882000 - }, - { - "epoch": 12.7, - "learning_rate": 9.542577331900156e-06, - "loss": 1.7067, - "step": 882500 - }, - { - "epoch": 12.71, - "learning_rate": 9.49085829711443e-06, - "loss": 1.7064, - "step": 883000 - }, - { - "epoch": 12.72, - "learning_rate": 9.438931555361697e-06, - "loss": 1.7044, - "step": 883500 - }, - { - "epoch": 12.72, - "learning_rate": 9.387212520575972e-06, - "loss": 1.7062, - "step": 884000 - }, - { - "epoch": 12.73, - "learning_rate": 9.335285778823236e-06, - "loss": 1.7072, - "step": 884500 - }, - { - "epoch": 12.74, - "learning_rate": 9.283566744037513e-06, - "loss": 1.704, - "step": 885000 - }, - { - "epoch": 12.75, - "learning_rate": 9.231640002284777e-06, - "loss": 1.7025, - "step": 885500 - }, - { - "epoch": 12.75, - "learning_rate": 9.179920967499054e-06, - "loss": 1.7006, - "step": 886000 - }, - { - "epoch": 12.76, - "learning_rate": 9.127994225746317e-06, - "loss": 1.7019, - "step": 886500 - }, - { - "epoch": 12.77, - "learning_rate": 9.076275190960594e-06, - "loss": 1.7047, - "step": 887000 - }, - { - "epoch": 12.77, - "learning_rate": 9.024348449207858e-06, - "loss": 1.705, - "step": 887500 - }, - { - "epoch": 12.78, - "learning_rate": 8.972629414422134e-06, - "loss": 1.7044, - "step": 888000 - }, - { - "epoch": 12.79, - "learning_rate": 8.920702672669399e-06, - "loss": 1.6981, - "step": 888500 - }, - { - "epoch": 12.8, - "learning_rate": 8.868983637883673e-06, - "loss": 1.7067, - "step": 889000 - }, - { - "epoch": 12.8, - "learning_rate": 8.81705689613094e-06, - "loss": 1.7091, - "step": 889500 - }, - { - "epoch": 12.81, - "learning_rate": 8.765337861345214e-06, - "loss": 1.7041, - "step": 890000 - }, - { - "epoch": 12.82, - "learning_rate": 8.713411119592479e-06, - "loss": 1.7066, - "step": 890500 - }, - { - "epoch": 12.83, - "learning_rate": 8.661692084806755e-06, - "loss": 1.7049, - "step": 891000 - }, - { - "epoch": 12.83, - "learning_rate": 8.60976534305402e-06, - "loss": 1.7046, - "step": 891500 - }, - { - "epoch": 12.84, - "learning_rate": 8.558046308268296e-06, - "loss": 1.7052, - "step": 892000 - }, - { - "epoch": 12.85, - "learning_rate": 8.50611956651556e-06, - "loss": 1.7046, - "step": 892500 - }, - { - "epoch": 12.85, - "learning_rate": 8.454400531729837e-06, - "loss": 1.7027, - "step": 893000 - }, - { - "epoch": 12.86, - "learning_rate": 8.402473789977102e-06, - "loss": 1.7061, - "step": 893500 - }, - { - "epoch": 12.87, - "learning_rate": 8.350754755191378e-06, - "loss": 1.7058, - "step": 894000 - }, - { - "epoch": 12.88, - "learning_rate": 8.298828013438641e-06, - "loss": 1.7022, - "step": 894500 - }, - { - "epoch": 12.88, - "learning_rate": 8.247108978652917e-06, - "loss": 1.7045, - "step": 895000 - }, - { - "epoch": 12.89, - "learning_rate": 8.195182236900182e-06, - "loss": 1.7035, - "step": 895500 - }, - { - "epoch": 12.9, - "learning_rate": 8.143463202114456e-06, - "loss": 1.6989, - "step": 896000 - }, - { - "epoch": 12.9, - "learning_rate": 8.091536460361721e-06, - "loss": 1.7054, - "step": 896500 - }, - { - "epoch": 12.91, - "learning_rate": 8.039817425575997e-06, - "loss": 1.7068, - "step": 897000 - }, - { - "epoch": 12.92, - "learning_rate": 7.987890683823262e-06, - "loss": 1.7069, - "step": 897500 - }, - { - "epoch": 12.93, - "learning_rate": 7.936171649037538e-06, - "loss": 1.703, - "step": 898000 - }, - { - "epoch": 12.93, - "learning_rate": 7.884244907284803e-06, - "loss": 1.7018, - "step": 898500 - }, - { - "epoch": 12.94, - "learning_rate": 7.83252587249908e-06, - "loss": 1.7047, - "step": 899000 - }, - { - "epoch": 12.95, - "learning_rate": 7.780599130746344e-06, - "loss": 1.7039, - "step": 899500 - }, - { - "epoch": 12.95, - "learning_rate": 7.72888009596062e-06, - "loss": 1.6989, - "step": 900000 - }, - { - "epoch": 12.96, - "learning_rate": 7.676953354207885e-06, - "loss": 1.7002, - "step": 900500 - }, - { - "epoch": 12.97, - "learning_rate": 7.62523431942216e-06, - "loss": 1.7026, - "step": 901000 - }, - { - "epoch": 12.98, - "learning_rate": 7.5733075776694235e-06, - "loss": 1.7036, - "step": 901500 - }, - { - "epoch": 12.98, - "learning_rate": 7.5215885428837e-06, - "loss": 1.7003, - "step": 902000 - }, - { - "epoch": 12.99, - "learning_rate": 7.4696618011309645e-06, - "loss": 1.7021, - "step": 902500 - }, - { - "epoch": 13.0, - "learning_rate": 7.41794276634524e-06, - "loss": 1.7049, - "step": 903000 - }, - { - "epoch": 13.0, - "eval_accuracy": 0.6671741584513293, - "eval_loss": 1.5751953125, - "eval_runtime": 652.7224, - "eval_samples_per_second": 825.676, - "eval_steps_per_second": 34.404, - "step": 903149 - }, - { - "epoch": 13.01, - "learning_rate": 7.366016024592505e-06, - "loss": 1.7045, - "step": 903500 - }, - { - "epoch": 13.01, - "learning_rate": 7.314296989806781e-06, - "loss": 1.7035, - "step": 904000 - }, - { - "epoch": 13.02, - "learning_rate": 7.2623702480540456e-06, - "loss": 1.6998, - "step": 904500 - }, - { - "epoch": 13.03, - "learning_rate": 7.210651213268322e-06, - "loss": 1.7022, - "step": 905000 - }, - { - "epoch": 13.03, - "learning_rate": 7.1587244715155865e-06, - "loss": 1.7006, - "step": 905500 - }, - { - "epoch": 13.04, - "learning_rate": 7.107005436729862e-06, - "loss": 1.7002, - "step": 906000 - }, - { - "epoch": 13.05, - "learning_rate": 7.055078694977127e-06, - "loss": 1.703, - "step": 906500 - }, - { - "epoch": 13.06, - "learning_rate": 7.003359660191403e-06, - "loss": 1.7053, - "step": 907000 - }, - { - "epoch": 13.06, - "learning_rate": 6.951432918438668e-06, - "loss": 1.6992, - "step": 907500 - }, - { - "epoch": 13.07, - "learning_rate": 6.899713883652943e-06, - "loss": 1.7002, - "step": 908000 - }, - { - "epoch": 13.08, - "learning_rate": 6.8477871419002086e-06, - "loss": 1.7036, - "step": 908500 - }, - { - "epoch": 13.08, - "learning_rate": 6.796068107114484e-06, - "loss": 1.6992, - "step": 909000 - }, - { - "epoch": 13.09, - "learning_rate": 6.744141365361748e-06, - "loss": 1.7025, - "step": 909500 - }, - { - "epoch": 13.1, - "learning_rate": 6.692422330576023e-06, - "loss": 1.6985, - "step": 910000 - }, - { - "epoch": 13.11, - "learning_rate": 6.640495588823288e-06, - "loss": 1.7017, - "step": 910500 - }, - { - "epoch": 13.11, - "learning_rate": 6.588776554037564e-06, - "loss": 1.6991, - "step": 911000 - }, - { - "epoch": 13.12, - "learning_rate": 6.536849812284829e-06, - "loss": 1.7015, - "step": 911500 - }, - { - "epoch": 13.13, - "learning_rate": 6.485130777499104e-06, - "loss": 1.703, - "step": 912000 - }, - { - "epoch": 13.13, - "learning_rate": 6.433204035746369e-06, - "loss": 1.6997, - "step": 912500 - }, - { - "epoch": 13.14, - "learning_rate": 6.381485000960645e-06, - "loss": 1.7033, - "step": 913000 - }, - { - "epoch": 13.15, - "learning_rate": 6.32955825920791e-06, - "loss": 1.7016, - "step": 913500 - }, - { - "epoch": 13.16, - "learning_rate": 6.277839224422186e-06, - "loss": 1.7034, - "step": 914000 - }, - { - "epoch": 13.16, - "learning_rate": 6.22591248266945e-06, - "loss": 1.7012, - "step": 914500 - }, - { - "epoch": 13.17, - "learning_rate": 6.174193447883726e-06, - "loss": 1.7021, - "step": 915000 - }, - { - "epoch": 13.18, - "learning_rate": 6.12226670613099e-06, - "loss": 1.7031, - "step": 915500 - }, - { - "epoch": 13.18, - "learning_rate": 6.070547671345266e-06, - "loss": 1.6961, - "step": 916000 - }, - { - "epoch": 13.19, - "learning_rate": 6.018620929592531e-06, - "loss": 1.701, - "step": 916500 - }, - { - "epoch": 13.2, - "learning_rate": 5.966901894806807e-06, - "loss": 1.7007, - "step": 917000 - }, - { - "epoch": 13.21, - "learning_rate": 5.914975153054072e-06, - "loss": 1.6968, - "step": 917500 - }, - { - "epoch": 13.21, - "learning_rate": 5.863256118268347e-06, - "loss": 1.7013, - "step": 918000 - }, - { - "epoch": 13.22, - "learning_rate": 5.811329376515611e-06, - "loss": 1.7012, - "step": 918500 - }, - { - "epoch": 13.23, - "learning_rate": 5.7596103417298875e-06, - "loss": 1.706, - "step": 919000 - }, - { - "epoch": 13.24, - "learning_rate": 5.707683599977152e-06, - "loss": 1.6966, - "step": 919500 - }, - { - "epoch": 13.24, - "learning_rate": 5.655964565191428e-06, - "loss": 1.7014, - "step": 920000 - }, - { - "epoch": 13.25, - "learning_rate": 5.604037823438693e-06, - "loss": 1.6998, - "step": 920500 - }, - { - "epoch": 13.26, - "learning_rate": 5.5523187886529685e-06, - "loss": 1.7011, - "step": 921000 - }, - { - "epoch": 13.26, - "learning_rate": 5.5003920469002334e-06, - "loss": 1.6975, - "step": 921500 - }, - { - "epoch": 13.27, - "learning_rate": 5.4486730121145095e-06, - "loss": 1.7014, - "step": 922000 - }, - { - "epoch": 13.28, - "learning_rate": 5.3967462703617736e-06, - "loss": 1.6976, - "step": 922500 - }, - { - "epoch": 13.29, - "learning_rate": 5.34502723557605e-06, - "loss": 1.6994, - "step": 923000 - }, - { - "epoch": 13.29, - "learning_rate": 5.2931004938233145e-06, - "loss": 1.6963, - "step": 923500 - }, - { - "epoch": 13.3, - "learning_rate": 5.24138145903759e-06, - "loss": 1.7022, - "step": 924000 - }, - { - "epoch": 13.31, - "learning_rate": 5.189454717284855e-06, - "loss": 1.699, - "step": 924500 - }, - { - "epoch": 13.31, - "learning_rate": 5.137735682499131e-06, - "loss": 1.6969, - "step": 925000 - }, - { - "epoch": 13.32, - "learning_rate": 5.085808940746395e-06, - "loss": 1.6995, - "step": 925500 - }, - { - "epoch": 13.33, - "learning_rate": 5.034089905960672e-06, - "loss": 1.7032, - "step": 926000 - }, - { - "epoch": 13.34, - "learning_rate": 4.982163164207936e-06, - "loss": 1.7019, - "step": 926500 - }, - { - "epoch": 13.34, - "learning_rate": 4.930444129422211e-06, - "loss": 1.6999, - "step": 927000 - }, - { - "epoch": 13.35, - "learning_rate": 4.878517387669476e-06, - "loss": 1.6992, - "step": 927500 - }, - { - "epoch": 13.36, - "learning_rate": 4.826798352883752e-06, - "loss": 1.6981, - "step": 928000 - }, - { - "epoch": 13.36, - "learning_rate": 4.774871611131017e-06, - "loss": 1.6997, - "step": 928500 - }, - { - "epoch": 13.37, - "learning_rate": 4.723152576345293e-06, - "loss": 1.699, - "step": 929000 - }, - { - "epoch": 13.38, - "learning_rate": 4.671225834592557e-06, - "loss": 1.7004, - "step": 929500 - }, - { - "epoch": 13.39, - "learning_rate": 4.619506799806832e-06, - "loss": 1.6985, - "step": 930000 - }, - { - "epoch": 13.39, - "learning_rate": 4.567580058054097e-06, - "loss": 1.7005, - "step": 930500 - }, - { - "epoch": 13.4, - "learning_rate": 4.515861023268373e-06, - "loss": 1.6992, - "step": 931000 - }, - { - "epoch": 13.41, - "learning_rate": 4.463934281515638e-06, - "loss": 1.6971, - "step": 931500 - }, - { - "epoch": 13.42, - "learning_rate": 4.412215246729914e-06, - "loss": 1.6986, - "step": 932000 - }, - { - "epoch": 13.42, - "learning_rate": 4.360288504977179e-06, - "loss": 1.6984, - "step": 932500 - }, - { - "epoch": 13.43, - "learning_rate": 4.308569470191454e-06, - "loss": 1.6983, - "step": 933000 - }, - { - "epoch": 13.44, - "learning_rate": 4.256642728438719e-06, - "loss": 1.7026, - "step": 933500 - }, - { - "epoch": 13.44, - "learning_rate": 4.204923693652994e-06, - "loss": 1.696, - "step": 934000 - }, - { - "epoch": 13.45, - "learning_rate": 4.152996951900259e-06, - "loss": 1.7, - "step": 934500 - }, - { - "epoch": 13.46, - "learning_rate": 4.101277917114535e-06, - "loss": 1.6985, - "step": 935000 - }, - { - "epoch": 13.47, - "learning_rate": 4.0493511753618e-06, - "loss": 1.6992, - "step": 935500 - }, - { - "epoch": 13.47, - "learning_rate": 3.997632140576075e-06, - "loss": 1.6999, - "step": 936000 - }, - { - "epoch": 13.48, - "learning_rate": 3.94570539882334e-06, - "loss": 1.7012, - "step": 936500 - }, - { - "epoch": 13.49, - "learning_rate": 3.893986364037616e-06, - "loss": 1.6972, - "step": 937000 - }, - { - "epoch": 13.49, - "learning_rate": 3.84205962228488e-06, - "loss": 1.6928, - "step": 937500 - }, - { - "epoch": 13.5, - "learning_rate": 3.790340587499156e-06, - "loss": 1.6992, - "step": 938000 - }, - { - "epoch": 13.51, - "learning_rate": 3.7384138457464213e-06, - "loss": 1.6987, - "step": 938500 - }, - { - "epoch": 13.52, - "learning_rate": 3.686694810960697e-06, - "loss": 1.6999, - "step": 939000 - }, - { - "epoch": 13.52, - "learning_rate": 3.634768069207962e-06, - "loss": 1.701, - "step": 939500 - }, - { - "epoch": 13.53, - "learning_rate": 3.5830490344222375e-06, - "loss": 1.7007, - "step": 940000 - }, - { - "epoch": 13.54, - "learning_rate": 3.5311222926695015e-06, - "loss": 1.6933, - "step": 940500 - }, - { - "epoch": 13.54, - "learning_rate": 3.479403257883778e-06, - "loss": 1.6991, - "step": 941000 - }, - { - "epoch": 13.55, - "learning_rate": 3.4274765161310425e-06, - "loss": 1.698, - "step": 941500 - }, - { - "epoch": 13.56, - "learning_rate": 3.375757481345318e-06, - "loss": 1.697, - "step": 942000 - }, - { - "epoch": 13.57, - "learning_rate": 3.323830739592583e-06, - "loss": 1.7021, - "step": 942500 - }, - { - "epoch": 13.57, - "learning_rate": 3.2721117048068587e-06, - "loss": 1.6991, - "step": 943000 - }, - { - "epoch": 13.58, - "learning_rate": 3.2201849630541236e-06, - "loss": 1.7001, - "step": 943500 - }, - { - "epoch": 13.59, - "learning_rate": 3.1684659282683992e-06, - "loss": 1.7013, - "step": 944000 - }, - { - "epoch": 13.6, - "learning_rate": 3.116539186515664e-06, - "loss": 1.7002, - "step": 944500 - }, - { - "epoch": 13.6, - "learning_rate": 3.0648201517299398e-06, - "loss": 1.7008, - "step": 945000 - }, - { - "epoch": 13.61, - "learning_rate": 3.0128934099772042e-06, - "loss": 1.6958, - "step": 945500 - }, - { - "epoch": 13.62, - "learning_rate": 2.96117437519148e-06, - "loss": 1.6979, - "step": 946000 - }, - { - "epoch": 13.62, - "learning_rate": 2.9092476334387448e-06, - "loss": 1.6968, - "step": 946500 - }, - { - "epoch": 13.63, - "learning_rate": 2.8575285986530204e-06, - "loss": 1.6985, - "step": 947000 - }, - { - "epoch": 13.64, - "learning_rate": 2.8056018569002853e-06, - "loss": 1.6994, - "step": 947500 - }, - { - "epoch": 13.65, - "learning_rate": 2.753882822114561e-06, - "loss": 1.6951, - "step": 948000 - }, - { - "epoch": 13.65, - "learning_rate": 2.701956080361826e-06, - "loss": 1.698, - "step": 948500 - }, - { - "epoch": 13.66, - "learning_rate": 2.6502370455761015e-06, - "loss": 1.7016, - "step": 949000 - }, - { - "epoch": 13.67, - "learning_rate": 2.598310303823366e-06, - "loss": 1.6996, - "step": 949500 - }, - { - "epoch": 13.67, - "learning_rate": 2.5465912690376416e-06, - "loss": 1.7003, - "step": 950000 - }, - { - "epoch": 13.68, - "learning_rate": 2.494664527284907e-06, - "loss": 1.7003, - "step": 950500 - }, - { - "epoch": 13.69, - "learning_rate": 2.4429454924991826e-06, - "loss": 1.6981, - "step": 951000 - }, - { - "epoch": 13.7, - "learning_rate": 2.391018750746447e-06, - "loss": 1.6989, - "step": 951500 - }, - { - "epoch": 13.7, - "learning_rate": 2.3392997159607227e-06, - "loss": 1.6978, - "step": 952000 - }, - { - "epoch": 13.71, - "learning_rate": 2.287372974207987e-06, - "loss": 1.6977, - "step": 952500 - }, - { - "epoch": 13.72, - "learning_rate": 2.2356539394222632e-06, - "loss": 1.6967, - "step": 953000 - }, - { - "epoch": 13.72, - "learning_rate": 2.183727197669528e-06, - "loss": 1.6991, - "step": 953500 - }, - { - "epoch": 13.73, - "learning_rate": 2.1320081628838038e-06, - "loss": 1.6996, - "step": 954000 - }, - { - "epoch": 13.74, - "learning_rate": 2.0800814211310682e-06, - "loss": 1.6975, - "step": 954500 - }, - { - "epoch": 13.75, - "learning_rate": 2.0283623863453443e-06, - "loss": 1.6967, - "step": 955000 - }, - { - "epoch": 13.75, - "learning_rate": 1.9764356445926088e-06, - "loss": 1.6981, - "step": 955500 - }, - { - "epoch": 13.76, - "learning_rate": 1.9247166098068844e-06, - "loss": 1.6951, - "step": 956000 - }, - { - "epoch": 13.77, - "learning_rate": 1.872789868054149e-06, - "loss": 1.6957, - "step": 956500 - }, - { - "epoch": 13.78, - "learning_rate": 1.8210708332684252e-06, - "loss": 1.6952, - "step": 957000 - }, - { - "epoch": 13.78, - "learning_rate": 1.7691440915156898e-06, - "loss": 1.6982, - "step": 957500 - }, - { - "epoch": 13.79, - "learning_rate": 1.7174250567299655e-06, - "loss": 1.6967, - "step": 958000 - }, - { - "epoch": 13.8, - "learning_rate": 1.6654983149772302e-06, - "loss": 1.6964, - "step": 958500 - }, - { - "epoch": 13.8, - "learning_rate": 1.6137792801915058e-06, - "loss": 1.7014, - "step": 959000 - }, - { - "epoch": 13.81, - "learning_rate": 1.5618525384387705e-06, - "loss": 1.6958, - "step": 959500 - }, - { - "epoch": 13.82, - "learning_rate": 1.5101335036530463e-06, - "loss": 1.6981, - "step": 960000 - }, - { - "epoch": 13.83, - "learning_rate": 1.458206761900311e-06, - "loss": 1.6965, - "step": 960500 - }, - { - "epoch": 13.83, - "learning_rate": 1.4064877271145869e-06, - "loss": 1.6983, - "step": 961000 - }, - { - "epoch": 13.84, - "learning_rate": 1.3545609853618516e-06, - "loss": 1.6996, - "step": 961500 - }, - { - "epoch": 13.85, - "learning_rate": 1.3028419505761274e-06, - "loss": 1.6969, - "step": 962000 - }, - { - "epoch": 13.85, - "learning_rate": 1.250915208823392e-06, - "loss": 1.6967, - "step": 962500 - }, - { - "epoch": 13.86, - "learning_rate": 1.1991961740376677e-06, - "loss": 1.696, - "step": 963000 - }, - { - "epoch": 13.87, - "learning_rate": 1.1472694322849324e-06, - "loss": 1.6942, - "step": 963500 - }, - { - "epoch": 13.88, - "learning_rate": 1.0955503974992083e-06, - "loss": 1.6953, - "step": 964000 - }, - { - "epoch": 13.88, - "learning_rate": 1.043623655746473e-06, - "loss": 1.7009, - "step": 964500 - }, - { - "epoch": 13.89, - "learning_rate": 9.919046209607486e-07, - "loss": 1.6978, - "step": 965000 - }, - { - "epoch": 13.9, - "learning_rate": 9.399778792080134e-07, - "loss": 1.6983, - "step": 965500 - }, - { - "epoch": 13.9, - "learning_rate": 8.88258844422289e-07, - "loss": 1.7025, - "step": 966000 - }, - { - "epoch": 13.91, - "learning_rate": 8.363321026695538e-07, - "loss": 1.6945, - "step": 966500 - }, - { - "epoch": 13.92, - "learning_rate": 7.846130678838295e-07, - "loss": 1.6922, - "step": 967000 - }, - { - "epoch": 13.93, - "learning_rate": 7.326863261310943e-07, - "loss": 1.6949, - "step": 967500 - }, - { - "epoch": 13.93, - "learning_rate": 6.8096729134537e-07, - "loss": 1.6979, - "step": 968000 - }, - { - "epoch": 13.94, - "learning_rate": 6.290405495926348e-07, - "loss": 1.695, - "step": 968500 - }, - { - "epoch": 13.95, - "learning_rate": 5.773215148069104e-07, - "loss": 1.6957, - "step": 969000 - }, - { - "epoch": 13.96, - "learning_rate": 5.253947730541751e-07, - "loss": 1.6958, - "step": 969500 - }, - { - "epoch": 13.96, - "learning_rate": 4.736757382684509e-07, - "loss": 1.6954, - "step": 970000 - }, - { - "epoch": 13.97, - "learning_rate": 4.217489965157156e-07, - "loss": 1.6977, - "step": 970500 - }, - { - "epoch": 13.98, - "learning_rate": 3.700299617299913e-07, - "loss": 1.6977, - "step": 971000 - }, - { - "epoch": 13.98, - "learning_rate": 3.181032199772561e-07, - "loss": 1.6974, - "step": 971500 - }, - { - "epoch": 13.99, - "learning_rate": 2.663841851915318e-07, - "loss": 1.6946, - "step": 972000 - }, - { - "epoch": 14.0, - "learning_rate": 2.1445744343879657e-07, - "loss": 1.6982, - "step": 972500 - }, - { - "epoch": 14.0, - "eval_accuracy": 0.6685586444048556, - "eval_loss": 1.5693359375, - "eval_runtime": 652.5674, - "eval_samples_per_second": 825.872, - "eval_steps_per_second": 34.412, - "step": 972622 - } - ], - "max_steps": 972622, - "num_train_epochs": 14, - "total_flos": 6.971418159270068e+18, - "trial_name": null, - "trial_params": null -}