schnell's picture
Training in progress, epoch 13
eaf38e5
raw
history blame
226 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 13.0,
"global_step": 903149,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 6.763399204600845e-05,
"loss": 6.9832,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 7.520427275451058e-05,
"loss": 5.5271,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 7.961254711978457e-05,
"loss": 4.6913,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 8.275270773909965e-05,
"loss": 4.2907,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 8.517839539271491e-05,
"loss": 4.0302,
"step": 2500
},
{
"epoch": 0.04,
"learning_rate": 8.716825914421595e-05,
"loss": 3.8379,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 8.884385574179297e-05,
"loss": 3.6906,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 9.030114272368873e-05,
"loss": 3.5642,
"step": 4000
},
{
"epoch": 0.06,
"learning_rate": 9.158138540964539e-05,
"loss": 3.4704,
"step": 4500
},
{
"epoch": 0.07,
"learning_rate": 9.273119601765861e-05,
"loss": 3.3821,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 9.376714826181023e-05,
"loss": 3.3064,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 9.471669412880503e-05,
"loss": 3.2476,
"step": 6000
},
{
"epoch": 0.09,
"learning_rate": 9.558668751136338e-05,
"loss": 3.1892,
"step": 6500
},
{
"epoch": 0.1,
"learning_rate": 9.639540886232853e-05,
"loss": 3.1299,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 9.714529240377096e-05,
"loss": 3.0869,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 9.784957770827781e-05,
"loss": 3.0405,
"step": 8000
},
{
"epoch": 0.12,
"learning_rate": 9.850850144881638e-05,
"loss": 2.9989,
"step": 8500
},
{
"epoch": 0.13,
"learning_rate": 9.91322455339213e-05,
"loss": 2.9583,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 9.97198937413398e-05,
"loss": 2.923,
"step": 9500
},
{
"epoch": 0.14,
"learning_rate": 9.997382892215664e-05,
"loss": 2.8859,
"step": 10000
},
{
"epoch": 0.15,
"learning_rate": 9.99221098873709e-05,
"loss": 2.8536,
"step": 10500
},
{
"epoch": 0.16,
"learning_rate": 9.987018314561817e-05,
"loss": 2.8222,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 9.981846411083245e-05,
"loss": 2.7972,
"step": 11500
},
{
"epoch": 0.17,
"learning_rate": 9.97665373690797e-05,
"loss": 2.7657,
"step": 12000
},
{
"epoch": 0.18,
"learning_rate": 9.971481833429398e-05,
"loss": 2.7461,
"step": 12500
},
{
"epoch": 0.19,
"learning_rate": 9.966289159254125e-05,
"loss": 2.721,
"step": 13000
},
{
"epoch": 0.19,
"learning_rate": 9.961117255775553e-05,
"loss": 2.7012,
"step": 13500
},
{
"epoch": 0.2,
"learning_rate": 9.95592458160028e-05,
"loss": 2.6758,
"step": 14000
},
{
"epoch": 0.21,
"learning_rate": 9.950752678121706e-05,
"loss": 2.666,
"step": 14500
},
{
"epoch": 0.22,
"learning_rate": 9.945560003946433e-05,
"loss": 2.6412,
"step": 15000
},
{
"epoch": 0.22,
"learning_rate": 9.94038810046786e-05,
"loss": 2.6264,
"step": 15500
},
{
"epoch": 0.23,
"learning_rate": 9.935195426292587e-05,
"loss": 2.6089,
"step": 16000
},
{
"epoch": 0.24,
"learning_rate": 9.930023522814015e-05,
"loss": 2.5915,
"step": 16500
},
{
"epoch": 0.24,
"learning_rate": 9.92483084863874e-05,
"loss": 2.5783,
"step": 17000
},
{
"epoch": 0.25,
"learning_rate": 9.919658945160168e-05,
"loss": 2.558,
"step": 17500
},
{
"epoch": 0.26,
"learning_rate": 9.914466270984895e-05,
"loss": 2.5481,
"step": 18000
},
{
"epoch": 0.27,
"learning_rate": 9.909294367506321e-05,
"loss": 2.5376,
"step": 18500
},
{
"epoch": 0.27,
"learning_rate": 9.90410169333105e-05,
"loss": 2.5236,
"step": 19000
},
{
"epoch": 0.28,
"learning_rate": 9.898929789852477e-05,
"loss": 2.5129,
"step": 19500
},
{
"epoch": 0.29,
"learning_rate": 9.893737115677203e-05,
"loss": 2.5024,
"step": 20000
},
{
"epoch": 0.3,
"learning_rate": 9.88856521219863e-05,
"loss": 2.4841,
"step": 20500
},
{
"epoch": 0.3,
"learning_rate": 9.883372538023356e-05,
"loss": 2.4802,
"step": 21000
},
{
"epoch": 0.31,
"learning_rate": 9.878200634544784e-05,
"loss": 2.4725,
"step": 21500
},
{
"epoch": 0.32,
"learning_rate": 9.873007960369511e-05,
"loss": 2.4581,
"step": 22000
},
{
"epoch": 0.32,
"learning_rate": 9.867836056890939e-05,
"loss": 2.4493,
"step": 22500
},
{
"epoch": 0.33,
"learning_rate": 9.862643382715665e-05,
"loss": 2.4399,
"step": 23000
},
{
"epoch": 0.34,
"learning_rate": 9.857471479237093e-05,
"loss": 2.4297,
"step": 23500
},
{
"epoch": 0.35,
"learning_rate": 9.85227880506182e-05,
"loss": 2.4259,
"step": 24000
},
{
"epoch": 0.35,
"learning_rate": 9.847106901583248e-05,
"loss": 2.4199,
"step": 24500
},
{
"epoch": 0.36,
"learning_rate": 9.841914227407973e-05,
"loss": 2.4068,
"step": 25000
},
{
"epoch": 0.37,
"learning_rate": 9.836742323929401e-05,
"loss": 2.4023,
"step": 25500
},
{
"epoch": 0.37,
"learning_rate": 9.831549649754126e-05,
"loss": 2.3903,
"step": 26000
},
{
"epoch": 0.38,
"learning_rate": 9.826377746275554e-05,
"loss": 2.3849,
"step": 26500
},
{
"epoch": 0.39,
"learning_rate": 9.821185072100282e-05,
"loss": 2.3787,
"step": 27000
},
{
"epoch": 0.4,
"learning_rate": 9.816013168621709e-05,
"loss": 2.3722,
"step": 27500
},
{
"epoch": 0.4,
"learning_rate": 9.810820494446436e-05,
"loss": 2.3646,
"step": 28000
},
{
"epoch": 0.41,
"learning_rate": 9.805648590967863e-05,
"loss": 2.3537,
"step": 28500
},
{
"epoch": 0.42,
"learning_rate": 9.800455916792589e-05,
"loss": 2.353,
"step": 29000
},
{
"epoch": 0.42,
"learning_rate": 9.795284013314017e-05,
"loss": 2.3468,
"step": 29500
},
{
"epoch": 0.43,
"learning_rate": 9.790091339138744e-05,
"loss": 2.3371,
"step": 30000
},
{
"epoch": 0.44,
"learning_rate": 9.784919435660171e-05,
"loss": 2.3346,
"step": 30500
},
{
"epoch": 0.45,
"learning_rate": 9.779726761484898e-05,
"loss": 2.3244,
"step": 31000
},
{
"epoch": 0.45,
"learning_rate": 9.774554858006325e-05,
"loss": 2.3233,
"step": 31500
},
{
"epoch": 0.46,
"learning_rate": 9.769362183831051e-05,
"loss": 2.3214,
"step": 32000
},
{
"epoch": 0.47,
"learning_rate": 9.764190280352479e-05,
"loss": 2.312,
"step": 32500
},
{
"epoch": 0.48,
"learning_rate": 9.758997606177206e-05,
"loss": 2.3099,
"step": 33000
},
{
"epoch": 0.48,
"learning_rate": 9.753825702698634e-05,
"loss": 2.3025,
"step": 33500
},
{
"epoch": 0.49,
"learning_rate": 9.748633028523359e-05,
"loss": 2.2935,
"step": 34000
},
{
"epoch": 0.5,
"learning_rate": 9.743461125044787e-05,
"loss": 2.2924,
"step": 34500
},
{
"epoch": 0.5,
"learning_rate": 9.738268450869514e-05,
"loss": 2.2895,
"step": 35000
},
{
"epoch": 0.51,
"learning_rate": 9.73309654739094e-05,
"loss": 2.2806,
"step": 35500
},
{
"epoch": 0.52,
"learning_rate": 9.727903873215668e-05,
"loss": 2.2815,
"step": 36000
},
{
"epoch": 0.53,
"learning_rate": 9.722731969737096e-05,
"loss": 2.2735,
"step": 36500
},
{
"epoch": 0.53,
"learning_rate": 9.717539295561822e-05,
"loss": 2.2697,
"step": 37000
},
{
"epoch": 0.54,
"learning_rate": 9.71236739208325e-05,
"loss": 2.2686,
"step": 37500
},
{
"epoch": 0.55,
"learning_rate": 9.707174717907976e-05,
"loss": 2.2617,
"step": 38000
},
{
"epoch": 0.55,
"learning_rate": 9.702002814429404e-05,
"loss": 2.2561,
"step": 38500
},
{
"epoch": 0.56,
"learning_rate": 9.69681014025413e-05,
"loss": 2.2533,
"step": 39000
},
{
"epoch": 0.57,
"learning_rate": 9.691638236775557e-05,
"loss": 2.2484,
"step": 39500
},
{
"epoch": 0.58,
"learning_rate": 9.686445562600284e-05,
"loss": 2.2471,
"step": 40000
},
{
"epoch": 0.58,
"learning_rate": 9.681273659121712e-05,
"loss": 2.2443,
"step": 40500
},
{
"epoch": 0.59,
"learning_rate": 9.676080984946439e-05,
"loss": 2.2331,
"step": 41000
},
{
"epoch": 0.6,
"learning_rate": 9.670909081467867e-05,
"loss": 2.2349,
"step": 41500
},
{
"epoch": 0.6,
"learning_rate": 9.665716407292592e-05,
"loss": 2.2296,
"step": 42000
},
{
"epoch": 0.61,
"learning_rate": 9.66054450381402e-05,
"loss": 2.2296,
"step": 42500
},
{
"epoch": 0.62,
"learning_rate": 9.655351829638745e-05,
"loss": 2.2238,
"step": 43000
},
{
"epoch": 0.63,
"learning_rate": 9.650179926160173e-05,
"loss": 2.2193,
"step": 43500
},
{
"epoch": 0.63,
"learning_rate": 9.6449872519849e-05,
"loss": 2.2185,
"step": 44000
},
{
"epoch": 0.64,
"learning_rate": 9.639815348506328e-05,
"loss": 2.2157,
"step": 44500
},
{
"epoch": 0.65,
"learning_rate": 9.634622674331054e-05,
"loss": 2.2092,
"step": 45000
},
{
"epoch": 0.65,
"learning_rate": 9.629450770852482e-05,
"loss": 2.2096,
"step": 45500
},
{
"epoch": 0.66,
"learning_rate": 9.624258096677208e-05,
"loss": 2.2026,
"step": 46000
},
{
"epoch": 0.67,
"learning_rate": 9.619086193198635e-05,
"loss": 2.1971,
"step": 46500
},
{
"epoch": 0.68,
"learning_rate": 9.613893519023362e-05,
"loss": 2.1962,
"step": 47000
},
{
"epoch": 0.68,
"learning_rate": 9.60872161554479e-05,
"loss": 2.1918,
"step": 47500
},
{
"epoch": 0.69,
"learning_rate": 9.603528941369516e-05,
"loss": 2.1932,
"step": 48000
},
{
"epoch": 0.7,
"learning_rate": 9.598357037890943e-05,
"loss": 2.1844,
"step": 48500
},
{
"epoch": 0.71,
"learning_rate": 9.59316436371567e-05,
"loss": 2.1872,
"step": 49000
},
{
"epoch": 0.71,
"learning_rate": 9.587992460237098e-05,
"loss": 2.1781,
"step": 49500
},
{
"epoch": 0.72,
"learning_rate": 9.582799786061825e-05,
"loss": 2.1797,
"step": 50000
},
{
"epoch": 0.73,
"learning_rate": 9.577627882583253e-05,
"loss": 2.1793,
"step": 50500
},
{
"epoch": 0.73,
"learning_rate": 9.572435208407978e-05,
"loss": 2.1736,
"step": 51000
},
{
"epoch": 0.74,
"learning_rate": 9.567263304929406e-05,
"loss": 2.1723,
"step": 51500
},
{
"epoch": 0.75,
"learning_rate": 9.562070630754133e-05,
"loss": 2.1732,
"step": 52000
},
{
"epoch": 0.76,
"learning_rate": 9.55689872727556e-05,
"loss": 2.1654,
"step": 52500
},
{
"epoch": 0.76,
"learning_rate": 9.551706053100287e-05,
"loss": 2.1678,
"step": 53000
},
{
"epoch": 0.77,
"learning_rate": 9.546534149621714e-05,
"loss": 2.1632,
"step": 53500
},
{
"epoch": 0.78,
"learning_rate": 9.54134147544644e-05,
"loss": 2.158,
"step": 54000
},
{
"epoch": 0.78,
"learning_rate": 9.536169571967868e-05,
"loss": 2.1551,
"step": 54500
},
{
"epoch": 0.79,
"learning_rate": 9.530976897792595e-05,
"loss": 2.1553,
"step": 55000
},
{
"epoch": 0.8,
"learning_rate": 9.525804994314023e-05,
"loss": 2.1469,
"step": 55500
},
{
"epoch": 0.81,
"learning_rate": 9.520612320138748e-05,
"loss": 2.1517,
"step": 56000
},
{
"epoch": 0.81,
"learning_rate": 9.515440416660176e-05,
"loss": 2.1499,
"step": 56500
},
{
"epoch": 0.82,
"learning_rate": 9.510247742484903e-05,
"loss": 2.144,
"step": 57000
},
{
"epoch": 0.83,
"learning_rate": 9.50507583900633e-05,
"loss": 2.1478,
"step": 57500
},
{
"epoch": 0.83,
"learning_rate": 9.499883164831058e-05,
"loss": 2.1369,
"step": 58000
},
{
"epoch": 0.84,
"learning_rate": 9.494711261352485e-05,
"loss": 2.1394,
"step": 58500
},
{
"epoch": 0.85,
"learning_rate": 9.489518587177211e-05,
"loss": 2.1391,
"step": 59000
},
{
"epoch": 0.86,
"learning_rate": 9.484346683698639e-05,
"loss": 2.1344,
"step": 59500
},
{
"epoch": 0.86,
"learning_rate": 9.479154009523364e-05,
"loss": 2.1371,
"step": 60000
},
{
"epoch": 0.87,
"learning_rate": 9.473982106044792e-05,
"loss": 2.1289,
"step": 60500
},
{
"epoch": 0.88,
"learning_rate": 9.468789431869519e-05,
"loss": 2.1271,
"step": 61000
},
{
"epoch": 0.89,
"learning_rate": 9.463617528390946e-05,
"loss": 2.1276,
"step": 61500
},
{
"epoch": 0.89,
"learning_rate": 9.458424854215673e-05,
"loss": 2.1255,
"step": 62000
},
{
"epoch": 0.9,
"learning_rate": 9.453252950737101e-05,
"loss": 2.1192,
"step": 62500
},
{
"epoch": 0.91,
"learning_rate": 9.448060276561828e-05,
"loss": 2.1166,
"step": 63000
},
{
"epoch": 0.91,
"learning_rate": 9.442888373083256e-05,
"loss": 2.1171,
"step": 63500
},
{
"epoch": 0.92,
"learning_rate": 9.437695698907981e-05,
"loss": 2.1127,
"step": 64000
},
{
"epoch": 0.93,
"learning_rate": 9.432523795429409e-05,
"loss": 2.1116,
"step": 64500
},
{
"epoch": 0.94,
"learning_rate": 9.427331121254134e-05,
"loss": 2.1121,
"step": 65000
},
{
"epoch": 0.94,
"learning_rate": 9.422159217775562e-05,
"loss": 2.1093,
"step": 65500
},
{
"epoch": 0.95,
"learning_rate": 9.41696654360029e-05,
"loss": 2.1103,
"step": 66000
},
{
"epoch": 0.96,
"learning_rate": 9.411794640121717e-05,
"loss": 2.1073,
"step": 66500
},
{
"epoch": 0.96,
"learning_rate": 9.406601965946444e-05,
"loss": 2.1057,
"step": 67000
},
{
"epoch": 0.97,
"learning_rate": 9.401430062467871e-05,
"loss": 2.1012,
"step": 67500
},
{
"epoch": 0.98,
"learning_rate": 9.396237388292597e-05,
"loss": 2.1012,
"step": 68000
},
{
"epoch": 0.99,
"learning_rate": 9.391065484814025e-05,
"loss": 2.1041,
"step": 68500
},
{
"epoch": 0.99,
"learning_rate": 9.385872810638751e-05,
"loss": 2.0974,
"step": 69000
},
{
"epoch": 1.0,
"eval_accuracy": 0.6071486253641302,
"eval_loss": 1.974609375,
"eval_runtime": 652.8749,
"eval_samples_per_second": 825.483,
"eval_steps_per_second": 34.396,
"step": 69473
},
{
"epoch": 1.0,
"learning_rate": 9.380700907160179e-05,
"loss": 2.0964,
"step": 69500
},
{
"epoch": 1.01,
"learning_rate": 9.375508232984905e-05,
"loss": 2.094,
"step": 70000
},
{
"epoch": 1.01,
"learning_rate": 9.370336329506332e-05,
"loss": 2.09,
"step": 70500
},
{
"epoch": 1.02,
"learning_rate": 9.365143655331059e-05,
"loss": 2.0926,
"step": 71000
},
{
"epoch": 1.03,
"learning_rate": 9.359971751852487e-05,
"loss": 2.0925,
"step": 71500
},
{
"epoch": 1.04,
"learning_rate": 9.354779077677214e-05,
"loss": 2.0854,
"step": 72000
},
{
"epoch": 1.04,
"learning_rate": 9.349607174198642e-05,
"loss": 2.086,
"step": 72500
},
{
"epoch": 1.05,
"learning_rate": 9.344414500023367e-05,
"loss": 2.0858,
"step": 73000
},
{
"epoch": 1.06,
"learning_rate": 9.339242596544795e-05,
"loss": 2.0805,
"step": 73500
},
{
"epoch": 1.07,
"learning_rate": 9.33404992236952e-05,
"loss": 2.0817,
"step": 74000
},
{
"epoch": 1.07,
"learning_rate": 9.328878018890948e-05,
"loss": 2.0814,
"step": 74500
},
{
"epoch": 1.08,
"learning_rate": 9.323685344715676e-05,
"loss": 2.0786,
"step": 75000
},
{
"epoch": 1.09,
"learning_rate": 9.318513441237103e-05,
"loss": 2.0745,
"step": 75500
},
{
"epoch": 1.09,
"learning_rate": 9.31332076706183e-05,
"loss": 2.0713,
"step": 76000
},
{
"epoch": 1.1,
"learning_rate": 9.308148863583257e-05,
"loss": 2.0749,
"step": 76500
},
{
"epoch": 1.11,
"learning_rate": 9.302956189407984e-05,
"loss": 2.0731,
"step": 77000
},
{
"epoch": 1.12,
"learning_rate": 9.297784285929412e-05,
"loss": 2.0711,
"step": 77500
},
{
"epoch": 1.12,
"learning_rate": 9.292591611754137e-05,
"loss": 2.0667,
"step": 78000
},
{
"epoch": 1.13,
"learning_rate": 9.287419708275565e-05,
"loss": 2.068,
"step": 78500
},
{
"epoch": 1.14,
"learning_rate": 9.282227034100292e-05,
"loss": 2.0654,
"step": 79000
},
{
"epoch": 1.14,
"learning_rate": 9.277055130621718e-05,
"loss": 2.0655,
"step": 79500
},
{
"epoch": 1.15,
"learning_rate": 9.271862456446447e-05,
"loss": 2.062,
"step": 80000
},
{
"epoch": 1.16,
"learning_rate": 9.266690552967874e-05,
"loss": 2.0595,
"step": 80500
},
{
"epoch": 1.17,
"learning_rate": 9.2614978787926e-05,
"loss": 2.0599,
"step": 81000
},
{
"epoch": 1.17,
"learning_rate": 9.256325975314028e-05,
"loss": 2.0633,
"step": 81500
},
{
"epoch": 1.18,
"learning_rate": 9.251133301138753e-05,
"loss": 2.0601,
"step": 82000
},
{
"epoch": 1.19,
"learning_rate": 9.245961397660181e-05,
"loss": 2.0581,
"step": 82500
},
{
"epoch": 1.19,
"learning_rate": 9.240768723484908e-05,
"loss": 2.0552,
"step": 83000
},
{
"epoch": 1.2,
"learning_rate": 9.235596820006336e-05,
"loss": 2.0571,
"step": 83500
},
{
"epoch": 1.21,
"learning_rate": 9.230404145831062e-05,
"loss": 2.0519,
"step": 84000
},
{
"epoch": 1.22,
"learning_rate": 9.22523224235249e-05,
"loss": 2.0507,
"step": 84500
},
{
"epoch": 1.22,
"learning_rate": 9.220039568177216e-05,
"loss": 2.0483,
"step": 85000
},
{
"epoch": 1.23,
"learning_rate": 9.214867664698643e-05,
"loss": 2.046,
"step": 85500
},
{
"epoch": 1.24,
"learning_rate": 9.20967499052337e-05,
"loss": 2.0438,
"step": 86000
},
{
"epoch": 1.25,
"learning_rate": 9.204503087044798e-05,
"loss": 2.0519,
"step": 86500
},
{
"epoch": 1.25,
"learning_rate": 9.199310412869523e-05,
"loss": 2.0494,
"step": 87000
},
{
"epoch": 1.26,
"learning_rate": 9.194138509390951e-05,
"loss": 2.0425,
"step": 87500
},
{
"epoch": 1.27,
"learning_rate": 9.188945835215678e-05,
"loss": 2.0423,
"step": 88000
},
{
"epoch": 1.27,
"learning_rate": 9.183773931737106e-05,
"loss": 2.0402,
"step": 88500
},
{
"epoch": 1.28,
"learning_rate": 9.178581257561833e-05,
"loss": 2.0439,
"step": 89000
},
{
"epoch": 1.29,
"learning_rate": 9.17340935408326e-05,
"loss": 2.0391,
"step": 89500
},
{
"epoch": 1.3,
"learning_rate": 9.168216679907986e-05,
"loss": 2.0378,
"step": 90000
},
{
"epoch": 1.3,
"learning_rate": 9.163044776429414e-05,
"loss": 2.0397,
"step": 90500
},
{
"epoch": 1.31,
"learning_rate": 9.15785210225414e-05,
"loss": 2.0361,
"step": 91000
},
{
"epoch": 1.32,
"learning_rate": 9.152680198775568e-05,
"loss": 2.0351,
"step": 91500
},
{
"epoch": 1.32,
"learning_rate": 9.147487524600295e-05,
"loss": 2.0312,
"step": 92000
},
{
"epoch": 1.33,
"learning_rate": 9.142315621121722e-05,
"loss": 2.0313,
"step": 92500
},
{
"epoch": 1.34,
"learning_rate": 9.137122946946448e-05,
"loss": 2.0284,
"step": 93000
},
{
"epoch": 1.35,
"learning_rate": 9.131951043467876e-05,
"loss": 2.028,
"step": 93500
},
{
"epoch": 1.35,
"learning_rate": 9.126758369292603e-05,
"loss": 2.0306,
"step": 94000
},
{
"epoch": 1.36,
"learning_rate": 9.121586465814031e-05,
"loss": 2.0318,
"step": 94500
},
{
"epoch": 1.37,
"learning_rate": 9.116393791638756e-05,
"loss": 2.0303,
"step": 95000
},
{
"epoch": 1.37,
"learning_rate": 9.111221888160184e-05,
"loss": 2.0246,
"step": 95500
},
{
"epoch": 1.38,
"learning_rate": 9.106029213984911e-05,
"loss": 2.0279,
"step": 96000
},
{
"epoch": 1.39,
"learning_rate": 9.100857310506337e-05,
"loss": 2.0274,
"step": 96500
},
{
"epoch": 1.4,
"learning_rate": 9.095664636331065e-05,
"loss": 2.0175,
"step": 97000
},
{
"epoch": 1.4,
"learning_rate": 9.090492732852492e-05,
"loss": 2.0204,
"step": 97500
},
{
"epoch": 1.41,
"learning_rate": 9.085300058677219e-05,
"loss": 2.0237,
"step": 98000
},
{
"epoch": 1.42,
"learning_rate": 9.080128155198646e-05,
"loss": 2.0192,
"step": 98500
},
{
"epoch": 1.43,
"learning_rate": 9.074935481023372e-05,
"loss": 2.0134,
"step": 99000
},
{
"epoch": 1.43,
"learning_rate": 9.0697635775448e-05,
"loss": 2.0191,
"step": 99500
},
{
"epoch": 1.44,
"learning_rate": 9.064570903369527e-05,
"loss": 2.0198,
"step": 100000
},
{
"epoch": 1.45,
"learning_rate": 9.059398999890954e-05,
"loss": 2.0122,
"step": 100500
},
{
"epoch": 1.45,
"learning_rate": 9.054206325715681e-05,
"loss": 2.014,
"step": 101000
},
{
"epoch": 1.46,
"learning_rate": 9.049034422237108e-05,
"loss": 2.0123,
"step": 101500
},
{
"epoch": 1.47,
"learning_rate": 9.043841748061834e-05,
"loss": 2.0093,
"step": 102000
},
{
"epoch": 1.48,
"learning_rate": 9.038669844583262e-05,
"loss": 2.0113,
"step": 102500
},
{
"epoch": 1.48,
"learning_rate": 9.033477170407989e-05,
"loss": 2.0137,
"step": 103000
},
{
"epoch": 1.49,
"learning_rate": 9.028305266929417e-05,
"loss": 2.0062,
"step": 103500
},
{
"epoch": 1.5,
"learning_rate": 9.023112592754142e-05,
"loss": 2.0074,
"step": 104000
},
{
"epoch": 1.5,
"learning_rate": 9.01794068927557e-05,
"loss": 2.0021,
"step": 104500
},
{
"epoch": 1.51,
"learning_rate": 9.012748015100297e-05,
"loss": 2.0069,
"step": 105000
},
{
"epoch": 1.52,
"learning_rate": 9.007576111621725e-05,
"loss": 2.0073,
"step": 105500
},
{
"epoch": 1.53,
"learning_rate": 9.002383437446451e-05,
"loss": 2.0059,
"step": 106000
},
{
"epoch": 1.53,
"learning_rate": 8.997211533967879e-05,
"loss": 2.0013,
"step": 106500
},
{
"epoch": 1.54,
"learning_rate": 8.992018859792605e-05,
"loss": 2.0013,
"step": 107000
},
{
"epoch": 1.55,
"learning_rate": 8.986846956314032e-05,
"loss": 2.0043,
"step": 107500
},
{
"epoch": 1.55,
"learning_rate": 8.981654282138759e-05,
"loss": 2.0009,
"step": 108000
},
{
"epoch": 1.56,
"learning_rate": 8.976482378660187e-05,
"loss": 2.0021,
"step": 108500
},
{
"epoch": 1.57,
"learning_rate": 8.971289704484913e-05,
"loss": 2.0004,
"step": 109000
},
{
"epoch": 1.58,
"learning_rate": 8.96611780100634e-05,
"loss": 1.9988,
"step": 109500
},
{
"epoch": 1.58,
"learning_rate": 8.960925126831067e-05,
"loss": 2.0009,
"step": 110000
},
{
"epoch": 1.59,
"learning_rate": 8.955753223352495e-05,
"loss": 1.9925,
"step": 110500
},
{
"epoch": 1.6,
"learning_rate": 8.950560549177222e-05,
"loss": 1.9919,
"step": 111000
},
{
"epoch": 1.6,
"learning_rate": 8.94538864569865e-05,
"loss": 1.9969,
"step": 111500
},
{
"epoch": 1.61,
"learning_rate": 8.940195971523375e-05,
"loss": 1.9928,
"step": 112000
},
{
"epoch": 1.62,
"learning_rate": 8.935024068044803e-05,
"loss": 1.996,
"step": 112500
},
{
"epoch": 1.63,
"learning_rate": 8.929831393869528e-05,
"loss": 1.9906,
"step": 113000
},
{
"epoch": 1.63,
"learning_rate": 8.924659490390956e-05,
"loss": 1.9976,
"step": 113500
},
{
"epoch": 1.64,
"learning_rate": 8.919466816215684e-05,
"loss": 1.9927,
"step": 114000
},
{
"epoch": 1.65,
"learning_rate": 8.91429491273711e-05,
"loss": 1.9937,
"step": 114500
},
{
"epoch": 1.66,
"learning_rate": 8.909102238561837e-05,
"loss": 1.9887,
"step": 115000
},
{
"epoch": 1.66,
"learning_rate": 8.903930335083265e-05,
"loss": 1.9878,
"step": 115500
},
{
"epoch": 1.67,
"learning_rate": 8.898737660907991e-05,
"loss": 1.992,
"step": 116000
},
{
"epoch": 1.68,
"learning_rate": 8.89356575742942e-05,
"loss": 1.9836,
"step": 116500
},
{
"epoch": 1.68,
"learning_rate": 8.888373083254145e-05,
"loss": 1.987,
"step": 117000
},
{
"epoch": 1.69,
"learning_rate": 8.883201179775573e-05,
"loss": 1.9801,
"step": 117500
},
{
"epoch": 1.7,
"learning_rate": 8.8780085056003e-05,
"loss": 1.9816,
"step": 118000
},
{
"epoch": 1.71,
"learning_rate": 8.872836602121726e-05,
"loss": 1.9911,
"step": 118500
},
{
"epoch": 1.71,
"learning_rate": 8.867643927946455e-05,
"loss": 1.9876,
"step": 119000
},
{
"epoch": 1.72,
"learning_rate": 8.862472024467881e-05,
"loss": 1.9835,
"step": 119500
},
{
"epoch": 1.73,
"learning_rate": 8.857279350292608e-05,
"loss": 1.9814,
"step": 120000
},
{
"epoch": 1.73,
"learning_rate": 8.852107446814036e-05,
"loss": 1.9809,
"step": 120500
},
{
"epoch": 1.74,
"learning_rate": 8.846914772638761e-05,
"loss": 1.9794,
"step": 121000
},
{
"epoch": 1.75,
"learning_rate": 8.841742869160189e-05,
"loss": 1.9818,
"step": 121500
},
{
"epoch": 1.76,
"learning_rate": 8.836550194984916e-05,
"loss": 1.977,
"step": 122000
},
{
"epoch": 1.76,
"learning_rate": 8.831378291506343e-05,
"loss": 1.9787,
"step": 122500
},
{
"epoch": 1.77,
"learning_rate": 8.82618561733107e-05,
"loss": 1.9769,
"step": 123000
},
{
"epoch": 1.78,
"learning_rate": 8.821013713852497e-05,
"loss": 1.9777,
"step": 123500
},
{
"epoch": 1.78,
"learning_rate": 8.815821039677223e-05,
"loss": 1.9745,
"step": 124000
},
{
"epoch": 1.79,
"learning_rate": 8.810649136198651e-05,
"loss": 1.974,
"step": 124500
},
{
"epoch": 1.8,
"learning_rate": 8.805456462023378e-05,
"loss": 1.9743,
"step": 125000
},
{
"epoch": 1.81,
"learning_rate": 8.800284558544806e-05,
"loss": 1.9734,
"step": 125500
},
{
"epoch": 1.81,
"learning_rate": 8.795091884369531e-05,
"loss": 1.9779,
"step": 126000
},
{
"epoch": 1.82,
"learning_rate": 8.789919980890959e-05,
"loss": 1.9774,
"step": 126500
},
{
"epoch": 1.83,
"learning_rate": 8.784727306715686e-05,
"loss": 1.9762,
"step": 127000
},
{
"epoch": 1.84,
"learning_rate": 8.779555403237112e-05,
"loss": 1.971,
"step": 127500
},
{
"epoch": 1.84,
"learning_rate": 8.77436272906184e-05,
"loss": 1.9664,
"step": 128000
},
{
"epoch": 1.85,
"learning_rate": 8.769190825583268e-05,
"loss": 1.969,
"step": 128500
},
{
"epoch": 1.86,
"learning_rate": 8.763998151407994e-05,
"loss": 1.9673,
"step": 129000
},
{
"epoch": 1.86,
"learning_rate": 8.758826247929422e-05,
"loss": 1.9667,
"step": 129500
},
{
"epoch": 1.87,
"learning_rate": 8.753633573754148e-05,
"loss": 1.9699,
"step": 130000
},
{
"epoch": 1.88,
"learning_rate": 8.748461670275576e-05,
"loss": 1.9691,
"step": 130500
},
{
"epoch": 1.89,
"learning_rate": 8.743268996100302e-05,
"loss": 1.9673,
"step": 131000
},
{
"epoch": 1.89,
"learning_rate": 8.73809709262173e-05,
"loss": 1.9623,
"step": 131500
},
{
"epoch": 1.9,
"learning_rate": 8.732904418446456e-05,
"loss": 1.963,
"step": 132000
},
{
"epoch": 1.91,
"learning_rate": 8.727732514967884e-05,
"loss": 1.9629,
"step": 132500
},
{
"epoch": 1.91,
"learning_rate": 8.722539840792611e-05,
"loss": 1.967,
"step": 133000
},
{
"epoch": 1.92,
"learning_rate": 8.717367937314039e-05,
"loss": 1.9657,
"step": 133500
},
{
"epoch": 1.93,
"learning_rate": 8.712175263138764e-05,
"loss": 1.9658,
"step": 134000
},
{
"epoch": 1.94,
"learning_rate": 8.707003359660192e-05,
"loss": 1.9617,
"step": 134500
},
{
"epoch": 1.94,
"learning_rate": 8.701810685484917e-05,
"loss": 1.9596,
"step": 135000
},
{
"epoch": 1.95,
"learning_rate": 8.696638782006345e-05,
"loss": 1.9639,
"step": 135500
},
{
"epoch": 1.96,
"learning_rate": 8.691446107831073e-05,
"loss": 1.9594,
"step": 136000
},
{
"epoch": 1.96,
"learning_rate": 8.6862742043525e-05,
"loss": 1.9596,
"step": 136500
},
{
"epoch": 1.97,
"learning_rate": 8.681081530177227e-05,
"loss": 1.9588,
"step": 137000
},
{
"epoch": 1.98,
"learning_rate": 8.675909626698654e-05,
"loss": 1.9524,
"step": 137500
},
{
"epoch": 1.99,
"learning_rate": 8.67071695252338e-05,
"loss": 1.9547,
"step": 138000
},
{
"epoch": 1.99,
"learning_rate": 8.665545049044808e-05,
"loss": 1.9586,
"step": 138500
},
{
"epoch": 2.0,
"eval_accuracy": 0.6284000055251463,
"eval_loss": 1.830078125,
"eval_runtime": 654.5639,
"eval_samples_per_second": 823.353,
"eval_steps_per_second": 34.307,
"step": 138946
},
{
"epoch": 2.0,
"learning_rate": 8.660352374869534e-05,
"loss": 1.9514,
"step": 139000
},
{
"epoch": 2.01,
"learning_rate": 8.655180471390962e-05,
"loss": 1.9524,
"step": 139500
},
{
"epoch": 2.02,
"learning_rate": 8.649987797215689e-05,
"loss": 1.9539,
"step": 140000
},
{
"epoch": 2.02,
"learning_rate": 8.644815893737115e-05,
"loss": 1.9484,
"step": 140500
},
{
"epoch": 2.03,
"learning_rate": 8.639623219561842e-05,
"loss": 1.9489,
"step": 141000
},
{
"epoch": 2.04,
"learning_rate": 8.63445131608327e-05,
"loss": 1.954,
"step": 141500
},
{
"epoch": 2.04,
"learning_rate": 8.629258641907997e-05,
"loss": 1.9527,
"step": 142000
},
{
"epoch": 2.05,
"learning_rate": 8.624086738429425e-05,
"loss": 1.9509,
"step": 142500
},
{
"epoch": 2.06,
"learning_rate": 8.61889406425415e-05,
"loss": 1.9507,
"step": 143000
},
{
"epoch": 2.07,
"learning_rate": 8.613722160775578e-05,
"loss": 1.9465,
"step": 143500
},
{
"epoch": 2.07,
"learning_rate": 8.608529486600305e-05,
"loss": 1.9438,
"step": 144000
},
{
"epoch": 2.08,
"learning_rate": 8.603357583121732e-05,
"loss": 1.9451,
"step": 144500
},
{
"epoch": 2.09,
"learning_rate": 8.59816490894646e-05,
"loss": 1.9469,
"step": 145000
},
{
"epoch": 2.09,
"learning_rate": 8.592993005467887e-05,
"loss": 1.9459,
"step": 145500
},
{
"epoch": 2.1,
"learning_rate": 8.587800331292613e-05,
"loss": 1.9404,
"step": 146000
},
{
"epoch": 2.11,
"learning_rate": 8.58262842781404e-05,
"loss": 1.9465,
"step": 146500
},
{
"epoch": 2.12,
"learning_rate": 8.577435753638767e-05,
"loss": 1.9439,
"step": 147000
},
{
"epoch": 2.12,
"learning_rate": 8.572263850160195e-05,
"loss": 1.9431,
"step": 147500
},
{
"epoch": 2.13,
"learning_rate": 8.56707117598492e-05,
"loss": 1.9412,
"step": 148000
},
{
"epoch": 2.14,
"learning_rate": 8.561899272506348e-05,
"loss": 1.9458,
"step": 148500
},
{
"epoch": 2.14,
"learning_rate": 8.556706598331075e-05,
"loss": 1.9452,
"step": 149000
},
{
"epoch": 2.15,
"learning_rate": 8.551534694852503e-05,
"loss": 1.9436,
"step": 149500
},
{
"epoch": 2.16,
"learning_rate": 8.54634202067723e-05,
"loss": 1.9423,
"step": 150000
},
{
"epoch": 2.17,
"learning_rate": 8.541170117198657e-05,
"loss": 1.941,
"step": 150500
},
{
"epoch": 2.17,
"learning_rate": 8.535977443023383e-05,
"loss": 1.939,
"step": 151000
},
{
"epoch": 2.18,
"learning_rate": 8.53080553954481e-05,
"loss": 1.9381,
"step": 151500
},
{
"epoch": 2.19,
"learning_rate": 8.525612865369536e-05,
"loss": 1.9416,
"step": 152000
},
{
"epoch": 2.2,
"learning_rate": 8.520440961890964e-05,
"loss": 1.9382,
"step": 152500
},
{
"epoch": 2.2,
"learning_rate": 8.515248287715691e-05,
"loss": 1.9404,
"step": 153000
},
{
"epoch": 2.21,
"learning_rate": 8.510076384237119e-05,
"loss": 1.9399,
"step": 153500
},
{
"epoch": 2.22,
"learning_rate": 8.504883710061845e-05,
"loss": 1.9358,
"step": 154000
},
{
"epoch": 2.22,
"learning_rate": 8.499711806583273e-05,
"loss": 1.9338,
"step": 154500
},
{
"epoch": 2.23,
"learning_rate": 8.494519132407999e-05,
"loss": 1.9366,
"step": 155000
},
{
"epoch": 2.24,
"learning_rate": 8.489347228929426e-05,
"loss": 1.9342,
"step": 155500
},
{
"epoch": 2.25,
"learning_rate": 8.484154554754153e-05,
"loss": 1.9324,
"step": 156000
},
{
"epoch": 2.25,
"learning_rate": 8.478982651275581e-05,
"loss": 1.9324,
"step": 156500
},
{
"epoch": 2.26,
"learning_rate": 8.473789977100306e-05,
"loss": 1.9328,
"step": 157000
},
{
"epoch": 2.27,
"learning_rate": 8.468618073621734e-05,
"loss": 1.9345,
"step": 157500
},
{
"epoch": 2.27,
"learning_rate": 8.463425399446462e-05,
"loss": 1.9389,
"step": 158000
},
{
"epoch": 2.28,
"learning_rate": 8.458253495967889e-05,
"loss": 1.9304,
"step": 158500
},
{
"epoch": 2.29,
"learning_rate": 8.453060821792616e-05,
"loss": 1.9303,
"step": 159000
},
{
"epoch": 2.3,
"learning_rate": 8.447888918314043e-05,
"loss": 1.9285,
"step": 159500
},
{
"epoch": 2.3,
"learning_rate": 8.442696244138769e-05,
"loss": 1.9328,
"step": 160000
},
{
"epoch": 2.31,
"learning_rate": 8.437524340660197e-05,
"loss": 1.9321,
"step": 160500
},
{
"epoch": 2.32,
"learning_rate": 8.432331666484924e-05,
"loss": 1.9304,
"step": 161000
},
{
"epoch": 2.32,
"learning_rate": 8.427159763006351e-05,
"loss": 1.9284,
"step": 161500
},
{
"epoch": 2.33,
"learning_rate": 8.421967088831078e-05,
"loss": 1.9279,
"step": 162000
},
{
"epoch": 2.34,
"learning_rate": 8.416795185352505e-05,
"loss": 1.9262,
"step": 162500
},
{
"epoch": 2.35,
"learning_rate": 8.411602511177231e-05,
"loss": 1.9275,
"step": 163000
},
{
"epoch": 2.35,
"learning_rate": 8.406430607698659e-05,
"loss": 1.9244,
"step": 163500
},
{
"epoch": 2.36,
"learning_rate": 8.401237933523386e-05,
"loss": 1.9273,
"step": 164000
},
{
"epoch": 2.37,
"learning_rate": 8.396066030044814e-05,
"loss": 1.9264,
"step": 164500
},
{
"epoch": 2.38,
"learning_rate": 8.390873355869539e-05,
"loss": 1.9267,
"step": 165000
},
{
"epoch": 2.38,
"learning_rate": 8.385701452390967e-05,
"loss": 1.9224,
"step": 165500
},
{
"epoch": 2.39,
"learning_rate": 8.380508778215694e-05,
"loss": 1.9232,
"step": 166000
},
{
"epoch": 2.4,
"learning_rate": 8.37533687473712e-05,
"loss": 1.9209,
"step": 166500
},
{
"epoch": 2.4,
"learning_rate": 8.370144200561848e-05,
"loss": 1.9213,
"step": 167000
},
{
"epoch": 2.41,
"learning_rate": 8.364972297083276e-05,
"loss": 1.9191,
"step": 167500
},
{
"epoch": 2.42,
"learning_rate": 8.359779622908002e-05,
"loss": 1.9213,
"step": 168000
},
{
"epoch": 2.43,
"learning_rate": 8.35460771942943e-05,
"loss": 1.9191,
"step": 168500
},
{
"epoch": 2.43,
"learning_rate": 8.349415045254155e-05,
"loss": 1.9165,
"step": 169000
},
{
"epoch": 2.44,
"learning_rate": 8.344243141775583e-05,
"loss": 1.921,
"step": 169500
},
{
"epoch": 2.45,
"learning_rate": 8.33905046760031e-05,
"loss": 1.9179,
"step": 170000
},
{
"epoch": 2.45,
"learning_rate": 8.333878564121737e-05,
"loss": 1.9175,
"step": 170500
},
{
"epoch": 2.46,
"learning_rate": 8.328685889946464e-05,
"loss": 1.9255,
"step": 171000
},
{
"epoch": 2.47,
"learning_rate": 8.323513986467892e-05,
"loss": 1.9195,
"step": 171500
},
{
"epoch": 2.48,
"learning_rate": 8.318321312292619e-05,
"loss": 1.9166,
"step": 172000
},
{
"epoch": 2.48,
"learning_rate": 8.313149408814047e-05,
"loss": 1.9215,
"step": 172500
},
{
"epoch": 2.49,
"learning_rate": 8.307956734638772e-05,
"loss": 1.9168,
"step": 173000
},
{
"epoch": 2.5,
"learning_rate": 8.3027848311602e-05,
"loss": 1.9176,
"step": 173500
},
{
"epoch": 2.5,
"learning_rate": 8.297592156984925e-05,
"loss": 1.9157,
"step": 174000
},
{
"epoch": 2.51,
"learning_rate": 8.292420253506353e-05,
"loss": 1.9197,
"step": 174500
},
{
"epoch": 2.52,
"learning_rate": 8.287227579331081e-05,
"loss": 1.9129,
"step": 175000
},
{
"epoch": 2.53,
"learning_rate": 8.282055675852508e-05,
"loss": 1.9155,
"step": 175500
},
{
"epoch": 2.53,
"learning_rate": 8.276863001677234e-05,
"loss": 1.9158,
"step": 176000
},
{
"epoch": 2.54,
"learning_rate": 8.271691098198662e-05,
"loss": 1.9164,
"step": 176500
},
{
"epoch": 2.55,
"learning_rate": 8.266498424023388e-05,
"loss": 1.918,
"step": 177000
},
{
"epoch": 2.55,
"learning_rate": 8.261326520544815e-05,
"loss": 1.9084,
"step": 177500
},
{
"epoch": 2.56,
"learning_rate": 8.256133846369542e-05,
"loss": 1.9101,
"step": 178000
},
{
"epoch": 2.57,
"learning_rate": 8.25096194289097e-05,
"loss": 1.9149,
"step": 178500
},
{
"epoch": 2.58,
"learning_rate": 8.245769268715696e-05,
"loss": 1.914,
"step": 179000
},
{
"epoch": 2.58,
"learning_rate": 8.240597365237123e-05,
"loss": 1.9118,
"step": 179500
},
{
"epoch": 2.59,
"learning_rate": 8.23540469106185e-05,
"loss": 1.909,
"step": 180000
},
{
"epoch": 2.6,
"learning_rate": 8.230232787583278e-05,
"loss": 1.9102,
"step": 180500
},
{
"epoch": 2.61,
"learning_rate": 8.225040113408005e-05,
"loss": 1.9091,
"step": 181000
},
{
"epoch": 2.61,
"learning_rate": 8.219868209929433e-05,
"loss": 1.9135,
"step": 181500
},
{
"epoch": 2.62,
"learning_rate": 8.214675535754158e-05,
"loss": 1.9071,
"step": 182000
},
{
"epoch": 2.63,
"learning_rate": 8.209503632275586e-05,
"loss": 1.9131,
"step": 182500
},
{
"epoch": 2.63,
"learning_rate": 8.204310958100313e-05,
"loss": 1.9074,
"step": 183000
},
{
"epoch": 2.64,
"learning_rate": 8.19913905462174e-05,
"loss": 1.908,
"step": 183500
},
{
"epoch": 2.65,
"learning_rate": 8.193946380446467e-05,
"loss": 1.9074,
"step": 184000
},
{
"epoch": 2.66,
"learning_rate": 8.188774476967894e-05,
"loss": 1.9076,
"step": 184500
},
{
"epoch": 2.66,
"learning_rate": 8.18358180279262e-05,
"loss": 1.9133,
"step": 185000
},
{
"epoch": 2.67,
"learning_rate": 8.178409899314048e-05,
"loss": 1.9086,
"step": 185500
},
{
"epoch": 2.68,
"learning_rate": 8.173217225138775e-05,
"loss": 1.9083,
"step": 186000
},
{
"epoch": 2.68,
"learning_rate": 8.168045321660203e-05,
"loss": 1.9109,
"step": 186500
},
{
"epoch": 2.69,
"learning_rate": 8.162852647484928e-05,
"loss": 1.9034,
"step": 187000
},
{
"epoch": 2.7,
"learning_rate": 8.157680744006356e-05,
"loss": 1.9057,
"step": 187500
},
{
"epoch": 2.71,
"learning_rate": 8.152488069831083e-05,
"loss": 1.9068,
"step": 188000
},
{
"epoch": 2.71,
"learning_rate": 8.14731616635251e-05,
"loss": 1.9033,
"step": 188500
},
{
"epoch": 2.72,
"learning_rate": 8.142123492177238e-05,
"loss": 1.9017,
"step": 189000
},
{
"epoch": 2.73,
"learning_rate": 8.136951588698665e-05,
"loss": 1.9021,
"step": 189500
},
{
"epoch": 2.73,
"learning_rate": 8.131758914523391e-05,
"loss": 1.9068,
"step": 190000
},
{
"epoch": 2.74,
"learning_rate": 8.126587011044819e-05,
"loss": 1.9016,
"step": 190500
},
{
"epoch": 2.75,
"learning_rate": 8.121394336869544e-05,
"loss": 1.9028,
"step": 191000
},
{
"epoch": 2.76,
"learning_rate": 8.116222433390972e-05,
"loss": 1.8994,
"step": 191500
},
{
"epoch": 2.76,
"learning_rate": 8.111029759215699e-05,
"loss": 1.9,
"step": 192000
},
{
"epoch": 2.77,
"learning_rate": 8.105857855737126e-05,
"loss": 1.9026,
"step": 192500
},
{
"epoch": 2.78,
"learning_rate": 8.100665181561853e-05,
"loss": 1.9022,
"step": 193000
},
{
"epoch": 2.79,
"learning_rate": 8.095493278083281e-05,
"loss": 1.902,
"step": 193500
},
{
"epoch": 2.79,
"learning_rate": 8.090300603908006e-05,
"loss": 1.9006,
"step": 194000
},
{
"epoch": 2.8,
"learning_rate": 8.085128700429434e-05,
"loss": 1.9015,
"step": 194500
},
{
"epoch": 2.81,
"learning_rate": 8.079936026254161e-05,
"loss": 1.9018,
"step": 195000
},
{
"epoch": 2.81,
"learning_rate": 8.074764122775589e-05,
"loss": 1.9006,
"step": 195500
},
{
"epoch": 2.82,
"learning_rate": 8.069571448600314e-05,
"loss": 1.9021,
"step": 196000
},
{
"epoch": 2.83,
"learning_rate": 8.064399545121742e-05,
"loss": 1.8977,
"step": 196500
},
{
"epoch": 2.84,
"learning_rate": 8.05920687094647e-05,
"loss": 1.8992,
"step": 197000
},
{
"epoch": 2.84,
"learning_rate": 8.054034967467897e-05,
"loss": 1.8952,
"step": 197500
},
{
"epoch": 2.85,
"learning_rate": 8.048842293292624e-05,
"loss": 1.8974,
"step": 198000
},
{
"epoch": 2.86,
"learning_rate": 8.043670389814051e-05,
"loss": 1.8979,
"step": 198500
},
{
"epoch": 2.86,
"learning_rate": 8.038477715638777e-05,
"loss": 1.9013,
"step": 199000
},
{
"epoch": 2.87,
"learning_rate": 8.033305812160205e-05,
"loss": 1.8929,
"step": 199500
},
{
"epoch": 2.88,
"learning_rate": 8.028113137984931e-05,
"loss": 1.8936,
"step": 200000
},
{
"epoch": 2.89,
"learning_rate": 8.022941234506359e-05,
"loss": 1.8984,
"step": 200500
},
{
"epoch": 2.89,
"learning_rate": 8.017748560331086e-05,
"loss": 1.895,
"step": 201000
},
{
"epoch": 2.9,
"learning_rate": 8.012576656852512e-05,
"loss": 1.8959,
"step": 201500
},
{
"epoch": 2.91,
"learning_rate": 8.007383982677239e-05,
"loss": 1.8935,
"step": 202000
},
{
"epoch": 2.91,
"learning_rate": 8.002212079198667e-05,
"loss": 1.893,
"step": 202500
},
{
"epoch": 2.92,
"learning_rate": 7.997019405023394e-05,
"loss": 1.8923,
"step": 203000
},
{
"epoch": 2.93,
"learning_rate": 7.991847501544822e-05,
"loss": 1.8926,
"step": 203500
},
{
"epoch": 2.94,
"learning_rate": 7.986654827369547e-05,
"loss": 1.8934,
"step": 204000
},
{
"epoch": 2.94,
"learning_rate": 7.981482923890975e-05,
"loss": 1.8973,
"step": 204500
},
{
"epoch": 2.95,
"learning_rate": 7.976290249715702e-05,
"loss": 1.8885,
"step": 205000
},
{
"epoch": 2.96,
"learning_rate": 7.971118346237128e-05,
"loss": 1.8895,
"step": 205500
},
{
"epoch": 2.97,
"learning_rate": 7.965925672061856e-05,
"loss": 1.8869,
"step": 206000
},
{
"epoch": 2.97,
"learning_rate": 7.960753768583283e-05,
"loss": 1.8889,
"step": 206500
},
{
"epoch": 2.98,
"learning_rate": 7.95556109440801e-05,
"loss": 1.8932,
"step": 207000
},
{
"epoch": 2.99,
"learning_rate": 7.950389190929437e-05,
"loss": 1.8897,
"step": 207500
},
{
"epoch": 2.99,
"learning_rate": 7.945196516754163e-05,
"loss": 1.889,
"step": 208000
},
{
"epoch": 3.0,
"eval_accuracy": 0.6382595753723315,
"eval_loss": 1.7626953125,
"eval_runtime": 652.4368,
"eval_samples_per_second": 826.037,
"eval_steps_per_second": 34.419,
"step": 208419
},
{
"epoch": 3.0,
"learning_rate": 7.94002461327559e-05,
"loss": 1.8923,
"step": 208500
},
{
"epoch": 3.01,
"learning_rate": 7.934831939100317e-05,
"loss": 1.8823,
"step": 209000
},
{
"epoch": 3.02,
"learning_rate": 7.929660035621745e-05,
"loss": 1.8852,
"step": 209500
},
{
"epoch": 3.02,
"learning_rate": 7.924467361446472e-05,
"loss": 1.8847,
"step": 210000
},
{
"epoch": 3.03,
"learning_rate": 7.919295457967898e-05,
"loss": 1.8836,
"step": 210500
},
{
"epoch": 3.04,
"learning_rate": 7.914102783792627e-05,
"loss": 1.8852,
"step": 211000
},
{
"epoch": 3.04,
"learning_rate": 7.908930880314054e-05,
"loss": 1.8807,
"step": 211500
},
{
"epoch": 3.05,
"learning_rate": 7.90373820613878e-05,
"loss": 1.8877,
"step": 212000
},
{
"epoch": 3.06,
"learning_rate": 7.898566302660208e-05,
"loss": 1.88,
"step": 212500
},
{
"epoch": 3.07,
"learning_rate": 7.893373628484933e-05,
"loss": 1.8825,
"step": 213000
},
{
"epoch": 3.07,
"learning_rate": 7.888201725006361e-05,
"loss": 1.8838,
"step": 213500
},
{
"epoch": 3.08,
"learning_rate": 7.883009050831088e-05,
"loss": 1.8845,
"step": 214000
},
{
"epoch": 3.09,
"learning_rate": 7.877837147352515e-05,
"loss": 1.8829,
"step": 214500
},
{
"epoch": 3.09,
"learning_rate": 7.872644473177242e-05,
"loss": 1.8885,
"step": 215000
},
{
"epoch": 3.1,
"learning_rate": 7.86747256969867e-05,
"loss": 1.882,
"step": 215500
},
{
"epoch": 3.11,
"learning_rate": 7.862279895523396e-05,
"loss": 1.8821,
"step": 216000
},
{
"epoch": 3.12,
"learning_rate": 7.857107992044823e-05,
"loss": 1.8825,
"step": 216500
},
{
"epoch": 3.12,
"learning_rate": 7.85191531786955e-05,
"loss": 1.8818,
"step": 217000
},
{
"epoch": 3.13,
"learning_rate": 7.846743414390978e-05,
"loss": 1.8837,
"step": 217500
},
{
"epoch": 3.14,
"learning_rate": 7.841550740215703e-05,
"loss": 1.877,
"step": 218000
},
{
"epoch": 3.15,
"learning_rate": 7.836378836737131e-05,
"loss": 1.8835,
"step": 218500
},
{
"epoch": 3.15,
"learning_rate": 7.831186162561858e-05,
"loss": 1.8835,
"step": 219000
},
{
"epoch": 3.16,
"learning_rate": 7.826014259083286e-05,
"loss": 1.8789,
"step": 219500
},
{
"epoch": 3.17,
"learning_rate": 7.820821584908013e-05,
"loss": 1.8814,
"step": 220000
},
{
"epoch": 3.17,
"learning_rate": 7.81564968142944e-05,
"loss": 1.8783,
"step": 220500
},
{
"epoch": 3.18,
"learning_rate": 7.810457007254166e-05,
"loss": 1.8846,
"step": 221000
},
{
"epoch": 3.19,
"learning_rate": 7.805285103775594e-05,
"loss": 1.8769,
"step": 221500
},
{
"epoch": 3.2,
"learning_rate": 7.800092429600319e-05,
"loss": 1.8762,
"step": 222000
},
{
"epoch": 3.2,
"learning_rate": 7.794920526121747e-05,
"loss": 1.8787,
"step": 222500
},
{
"epoch": 3.21,
"learning_rate": 7.789727851946475e-05,
"loss": 1.8793,
"step": 223000
},
{
"epoch": 3.22,
"learning_rate": 7.784555948467902e-05,
"loss": 1.8754,
"step": 223500
},
{
"epoch": 3.22,
"learning_rate": 7.779363274292628e-05,
"loss": 1.8788,
"step": 224000
},
{
"epoch": 3.23,
"learning_rate": 7.774191370814056e-05,
"loss": 1.8753,
"step": 224500
},
{
"epoch": 3.24,
"learning_rate": 7.768998696638783e-05,
"loss": 1.8792,
"step": 225000
},
{
"epoch": 3.25,
"learning_rate": 7.763826793160211e-05,
"loss": 1.8741,
"step": 225500
},
{
"epoch": 3.25,
"learning_rate": 7.758634118984936e-05,
"loss": 1.8782,
"step": 226000
},
{
"epoch": 3.26,
"learning_rate": 7.753462215506364e-05,
"loss": 1.8763,
"step": 226500
},
{
"epoch": 3.27,
"learning_rate": 7.748269541331091e-05,
"loss": 1.8718,
"step": 227000
},
{
"epoch": 3.27,
"learning_rate": 7.743097637852517e-05,
"loss": 1.8757,
"step": 227500
},
{
"epoch": 3.28,
"learning_rate": 7.737904963677245e-05,
"loss": 1.876,
"step": 228000
},
{
"epoch": 3.29,
"learning_rate": 7.732733060198672e-05,
"loss": 1.8746,
"step": 228500
},
{
"epoch": 3.3,
"learning_rate": 7.727540386023399e-05,
"loss": 1.8725,
"step": 229000
},
{
"epoch": 3.3,
"learning_rate": 7.722368482544826e-05,
"loss": 1.8764,
"step": 229500
},
{
"epoch": 3.31,
"learning_rate": 7.717175808369552e-05,
"loss": 1.8748,
"step": 230000
},
{
"epoch": 3.32,
"learning_rate": 7.71200390489098e-05,
"loss": 1.8738,
"step": 230500
},
{
"epoch": 3.33,
"learning_rate": 7.706811230715707e-05,
"loss": 1.8709,
"step": 231000
},
{
"epoch": 3.33,
"learning_rate": 7.701639327237134e-05,
"loss": 1.8728,
"step": 231500
},
{
"epoch": 3.34,
"learning_rate": 7.696446653061861e-05,
"loss": 1.8718,
"step": 232000
},
{
"epoch": 3.35,
"learning_rate": 7.691274749583288e-05,
"loss": 1.876,
"step": 232500
},
{
"epoch": 3.35,
"learning_rate": 7.686082075408014e-05,
"loss": 1.8677,
"step": 233000
},
{
"epoch": 3.36,
"learning_rate": 7.680910171929442e-05,
"loss": 1.8703,
"step": 233500
},
{
"epoch": 3.37,
"learning_rate": 7.675717497754169e-05,
"loss": 1.8715,
"step": 234000
},
{
"epoch": 3.38,
"learning_rate": 7.670545594275597e-05,
"loss": 1.871,
"step": 234500
},
{
"epoch": 3.38,
"learning_rate": 7.665352920100322e-05,
"loss": 1.8684,
"step": 235000
},
{
"epoch": 3.39,
"learning_rate": 7.66018101662175e-05,
"loss": 1.8688,
"step": 235500
},
{
"epoch": 3.4,
"learning_rate": 7.654988342446477e-05,
"loss": 1.8785,
"step": 236000
},
{
"epoch": 3.4,
"learning_rate": 7.649816438967903e-05,
"loss": 1.8702,
"step": 236500
},
{
"epoch": 3.41,
"learning_rate": 7.644623764792631e-05,
"loss": 1.8683,
"step": 237000
},
{
"epoch": 3.42,
"learning_rate": 7.639451861314059e-05,
"loss": 1.8698,
"step": 237500
},
{
"epoch": 3.43,
"learning_rate": 7.634259187138785e-05,
"loss": 1.8672,
"step": 238000
},
{
"epoch": 3.43,
"learning_rate": 7.629087283660212e-05,
"loss": 1.8718,
"step": 238500
},
{
"epoch": 3.44,
"learning_rate": 7.623894609484939e-05,
"loss": 1.8644,
"step": 239000
},
{
"epoch": 3.45,
"learning_rate": 7.618722706006367e-05,
"loss": 1.8734,
"step": 239500
},
{
"epoch": 3.45,
"learning_rate": 7.613530031831093e-05,
"loss": 1.8671,
"step": 240000
},
{
"epoch": 3.46,
"learning_rate": 7.60835812835252e-05,
"loss": 1.8703,
"step": 240500
},
{
"epoch": 3.47,
"learning_rate": 7.603165454177247e-05,
"loss": 1.8675,
"step": 241000
},
{
"epoch": 3.48,
"learning_rate": 7.597993550698675e-05,
"loss": 1.8679,
"step": 241500
},
{
"epoch": 3.48,
"learning_rate": 7.592800876523402e-05,
"loss": 1.8715,
"step": 242000
},
{
"epoch": 3.49,
"learning_rate": 7.58762897304483e-05,
"loss": 1.8662,
"step": 242500
},
{
"epoch": 3.5,
"learning_rate": 7.582436298869555e-05,
"loss": 1.8639,
"step": 243000
},
{
"epoch": 3.5,
"learning_rate": 7.577264395390983e-05,
"loss": 1.8661,
"step": 243500
},
{
"epoch": 3.51,
"learning_rate": 7.572071721215708e-05,
"loss": 1.8613,
"step": 244000
},
{
"epoch": 3.52,
"learning_rate": 7.566899817737136e-05,
"loss": 1.8641,
"step": 244500
},
{
"epoch": 3.53,
"learning_rate": 7.561707143561864e-05,
"loss": 1.8643,
"step": 245000
},
{
"epoch": 3.53,
"learning_rate": 7.55653524008329e-05,
"loss": 1.8642,
"step": 245500
},
{
"epoch": 3.54,
"learning_rate": 7.551342565908017e-05,
"loss": 1.8654,
"step": 246000
},
{
"epoch": 3.55,
"learning_rate": 7.546170662429445e-05,
"loss": 1.8618,
"step": 246500
},
{
"epoch": 3.56,
"learning_rate": 7.540977988254171e-05,
"loss": 1.8703,
"step": 247000
},
{
"epoch": 3.56,
"learning_rate": 7.535806084775598e-05,
"loss": 1.8616,
"step": 247500
},
{
"epoch": 3.57,
"learning_rate": 7.530613410600325e-05,
"loss": 1.8625,
"step": 248000
},
{
"epoch": 3.58,
"learning_rate": 7.525441507121753e-05,
"loss": 1.8626,
"step": 248500
},
{
"epoch": 3.58,
"learning_rate": 7.52024883294648e-05,
"loss": 1.8662,
"step": 249000
},
{
"epoch": 3.59,
"learning_rate": 7.515076929467906e-05,
"loss": 1.8635,
"step": 249500
},
{
"epoch": 3.6,
"learning_rate": 7.509884255292635e-05,
"loss": 1.8634,
"step": 250000
},
{
"epoch": 3.61,
"learning_rate": 7.50469158111736e-05,
"loss": 1.8644,
"step": 250500
},
{
"epoch": 3.61,
"learning_rate": 7.499519677638788e-05,
"loss": 1.8601,
"step": 251000
},
{
"epoch": 3.62,
"learning_rate": 7.494327003463513e-05,
"loss": 1.8632,
"step": 251500
},
{
"epoch": 3.63,
"learning_rate": 7.489155099984941e-05,
"loss": 1.8648,
"step": 252000
},
{
"epoch": 3.63,
"learning_rate": 7.483962425809669e-05,
"loss": 1.8629,
"step": 252500
},
{
"epoch": 3.64,
"learning_rate": 7.478790522331096e-05,
"loss": 1.8611,
"step": 253000
},
{
"epoch": 3.65,
"learning_rate": 7.473597848155822e-05,
"loss": 1.8574,
"step": 253500
},
{
"epoch": 3.66,
"learning_rate": 7.46842594467725e-05,
"loss": 1.8636,
"step": 254000
},
{
"epoch": 3.66,
"learning_rate": 7.463233270501976e-05,
"loss": 1.8605,
"step": 254500
},
{
"epoch": 3.67,
"learning_rate": 7.458061367023403e-05,
"loss": 1.8583,
"step": 255000
},
{
"epoch": 3.68,
"learning_rate": 7.45286869284813e-05,
"loss": 1.8616,
"step": 255500
},
{
"epoch": 3.68,
"learning_rate": 7.447696789369558e-05,
"loss": 1.8571,
"step": 256000
},
{
"epoch": 3.69,
"learning_rate": 7.442504115194285e-05,
"loss": 1.8589,
"step": 256500
},
{
"epoch": 3.7,
"learning_rate": 7.437332211715711e-05,
"loss": 1.8585,
"step": 257000
},
{
"epoch": 3.71,
"learning_rate": 7.432139537540438e-05,
"loss": 1.8582,
"step": 257500
},
{
"epoch": 3.71,
"learning_rate": 7.426967634061866e-05,
"loss": 1.8597,
"step": 258000
},
{
"epoch": 3.72,
"learning_rate": 7.421774959886593e-05,
"loss": 1.8592,
"step": 258500
},
{
"epoch": 3.73,
"learning_rate": 7.41660305640802e-05,
"loss": 1.8589,
"step": 259000
},
{
"epoch": 3.74,
"learning_rate": 7.411410382232746e-05,
"loss": 1.8527,
"step": 259500
},
{
"epoch": 3.74,
"learning_rate": 7.406238478754174e-05,
"loss": 1.8583,
"step": 260000
},
{
"epoch": 3.75,
"learning_rate": 7.4010458045789e-05,
"loss": 1.8536,
"step": 260500
},
{
"epoch": 3.76,
"learning_rate": 7.395873901100327e-05,
"loss": 1.8548,
"step": 261000
},
{
"epoch": 3.76,
"learning_rate": 7.390681226925055e-05,
"loss": 1.8536,
"step": 261500
},
{
"epoch": 3.77,
"learning_rate": 7.385509323446482e-05,
"loss": 1.852,
"step": 262000
},
{
"epoch": 3.78,
"learning_rate": 7.380316649271208e-05,
"loss": 1.8566,
"step": 262500
},
{
"epoch": 3.79,
"learning_rate": 7.375144745792636e-05,
"loss": 1.8541,
"step": 263000
},
{
"epoch": 3.79,
"learning_rate": 7.369952071617363e-05,
"loss": 1.8562,
"step": 263500
},
{
"epoch": 3.8,
"learning_rate": 7.364780168138791e-05,
"loss": 1.8576,
"step": 264000
},
{
"epoch": 3.81,
"learning_rate": 7.359587493963516e-05,
"loss": 1.8577,
"step": 264500
},
{
"epoch": 3.81,
"learning_rate": 7.354415590484944e-05,
"loss": 1.8544,
"step": 265000
},
{
"epoch": 3.82,
"learning_rate": 7.349222916309671e-05,
"loss": 1.8567,
"step": 265500
},
{
"epoch": 3.83,
"learning_rate": 7.344051012831097e-05,
"loss": 1.8526,
"step": 266000
},
{
"epoch": 3.84,
"learning_rate": 7.338858338655826e-05,
"loss": 1.8545,
"step": 266500
},
{
"epoch": 3.84,
"learning_rate": 7.333686435177253e-05,
"loss": 1.8532,
"step": 267000
},
{
"epoch": 3.85,
"learning_rate": 7.328493761001979e-05,
"loss": 1.8525,
"step": 267500
},
{
"epoch": 3.86,
"learning_rate": 7.323321857523407e-05,
"loss": 1.8527,
"step": 268000
},
{
"epoch": 3.86,
"learning_rate": 7.318129183348132e-05,
"loss": 1.8557,
"step": 268500
},
{
"epoch": 3.87,
"learning_rate": 7.31295727986956e-05,
"loss": 1.8522,
"step": 269000
},
{
"epoch": 3.88,
"learning_rate": 7.307764605694287e-05,
"loss": 1.857,
"step": 269500
},
{
"epoch": 3.89,
"learning_rate": 7.302592702215714e-05,
"loss": 1.8594,
"step": 270000
},
{
"epoch": 3.89,
"learning_rate": 7.297400028040441e-05,
"loss": 1.8492,
"step": 270500
},
{
"epoch": 3.9,
"learning_rate": 7.292228124561869e-05,
"loss": 1.8502,
"step": 271000
},
{
"epoch": 3.91,
"learning_rate": 7.287035450386594e-05,
"loss": 1.8525,
"step": 271500
},
{
"epoch": 3.92,
"learning_rate": 7.281863546908022e-05,
"loss": 1.8482,
"step": 272000
},
{
"epoch": 3.92,
"learning_rate": 7.276670872732749e-05,
"loss": 1.8502,
"step": 272500
},
{
"epoch": 3.93,
"learning_rate": 7.271498969254177e-05,
"loss": 1.8505,
"step": 273000
},
{
"epoch": 3.94,
"learning_rate": 7.266306295078902e-05,
"loss": 1.8493,
"step": 273500
},
{
"epoch": 3.94,
"learning_rate": 7.26113439160033e-05,
"loss": 1.8499,
"step": 274000
},
{
"epoch": 3.95,
"learning_rate": 7.255941717425057e-05,
"loss": 1.8448,
"step": 274500
},
{
"epoch": 3.96,
"learning_rate": 7.250769813946485e-05,
"loss": 1.8507,
"step": 275000
},
{
"epoch": 3.97,
"learning_rate": 7.245577139771212e-05,
"loss": 1.8514,
"step": 275500
},
{
"epoch": 3.97,
"learning_rate": 7.240405236292639e-05,
"loss": 1.8507,
"step": 276000
},
{
"epoch": 3.98,
"learning_rate": 7.235212562117365e-05,
"loss": 1.8475,
"step": 276500
},
{
"epoch": 3.99,
"learning_rate": 7.230040658638793e-05,
"loss": 1.8495,
"step": 277000
},
{
"epoch": 3.99,
"learning_rate": 7.22484798446352e-05,
"loss": 1.8496,
"step": 277500
},
{
"epoch": 4.0,
"eval_accuracy": 0.6442238837933792,
"eval_loss": 1.7236328125,
"eval_runtime": 653.8369,
"eval_samples_per_second": 824.268,
"eval_steps_per_second": 34.345,
"step": 277892
},
{
"epoch": 4.0,
"learning_rate": 7.219676080984947e-05,
"loss": 1.8473,
"step": 278000
},
{
"epoch": 4.01,
"learning_rate": 7.214483406809674e-05,
"loss": 1.8419,
"step": 278500
},
{
"epoch": 4.02,
"learning_rate": 7.2093115033311e-05,
"loss": 1.8469,
"step": 279000
},
{
"epoch": 4.02,
"learning_rate": 7.204118829155827e-05,
"loss": 1.8423,
"step": 279500
},
{
"epoch": 4.03,
"learning_rate": 7.198946925677255e-05,
"loss": 1.8486,
"step": 280000
},
{
"epoch": 4.04,
"learning_rate": 7.193754251501982e-05,
"loss": 1.8417,
"step": 280500
},
{
"epoch": 4.04,
"learning_rate": 7.18858234802341e-05,
"loss": 1.8455,
"step": 281000
},
{
"epoch": 4.05,
"learning_rate": 7.183389673848135e-05,
"loss": 1.8395,
"step": 281500
},
{
"epoch": 4.06,
"learning_rate": 7.178217770369563e-05,
"loss": 1.8428,
"step": 282000
},
{
"epoch": 4.07,
"learning_rate": 7.17302509619429e-05,
"loss": 1.8469,
"step": 282500
},
{
"epoch": 4.07,
"learning_rate": 7.167853192715716e-05,
"loss": 1.8421,
"step": 283000
},
{
"epoch": 4.08,
"learning_rate": 7.162660518540444e-05,
"loss": 1.8466,
"step": 283500
},
{
"epoch": 4.09,
"learning_rate": 7.157488615061871e-05,
"loss": 1.8436,
"step": 284000
},
{
"epoch": 4.1,
"learning_rate": 7.152295940886598e-05,
"loss": 1.8429,
"step": 284500
},
{
"epoch": 4.1,
"learning_rate": 7.147124037408025e-05,
"loss": 1.844,
"step": 285000
},
{
"epoch": 4.11,
"learning_rate": 7.141931363232751e-05,
"loss": 1.8403,
"step": 285500
},
{
"epoch": 4.12,
"learning_rate": 7.136759459754179e-05,
"loss": 1.8411,
"step": 286000
},
{
"epoch": 4.12,
"learning_rate": 7.131566785578905e-05,
"loss": 1.8395,
"step": 286500
},
{
"epoch": 4.13,
"learning_rate": 7.126394882100333e-05,
"loss": 1.8433,
"step": 287000
},
{
"epoch": 4.14,
"learning_rate": 7.12120220792506e-05,
"loss": 1.8413,
"step": 287500
},
{
"epoch": 4.15,
"learning_rate": 7.116030304446486e-05,
"loss": 1.8417,
"step": 288000
},
{
"epoch": 4.15,
"learning_rate": 7.110837630271213e-05,
"loss": 1.8398,
"step": 288500
},
{
"epoch": 4.16,
"learning_rate": 7.105665726792641e-05,
"loss": 1.8434,
"step": 289000
},
{
"epoch": 4.17,
"learning_rate": 7.100473052617368e-05,
"loss": 1.8411,
"step": 289500
},
{
"epoch": 4.17,
"learning_rate": 7.095301149138796e-05,
"loss": 1.838,
"step": 290000
},
{
"epoch": 4.18,
"learning_rate": 7.090108474963521e-05,
"loss": 1.8426,
"step": 290500
},
{
"epoch": 4.19,
"learning_rate": 7.084936571484949e-05,
"loss": 1.8393,
"step": 291000
},
{
"epoch": 4.2,
"learning_rate": 7.079743897309676e-05,
"loss": 1.8392,
"step": 291500
},
{
"epoch": 4.2,
"learning_rate": 7.074571993831103e-05,
"loss": 1.8387,
"step": 292000
},
{
"epoch": 4.21,
"learning_rate": 7.06937931965583e-05,
"loss": 1.8382,
"step": 292500
},
{
"epoch": 4.22,
"learning_rate": 7.064207416177258e-05,
"loss": 1.8399,
"step": 293000
},
{
"epoch": 4.22,
"learning_rate": 7.059014742001984e-05,
"loss": 1.8352,
"step": 293500
},
{
"epoch": 4.23,
"learning_rate": 7.053842838523411e-05,
"loss": 1.8379,
"step": 294000
},
{
"epoch": 4.24,
"learning_rate": 7.048650164348138e-05,
"loss": 1.837,
"step": 294500
},
{
"epoch": 4.25,
"learning_rate": 7.043478260869566e-05,
"loss": 1.8381,
"step": 295000
},
{
"epoch": 4.25,
"learning_rate": 7.038285586694291e-05,
"loss": 1.8319,
"step": 295500
},
{
"epoch": 4.26,
"learning_rate": 7.033113683215719e-05,
"loss": 1.8366,
"step": 296000
},
{
"epoch": 4.27,
"learning_rate": 7.027921009040446e-05,
"loss": 1.8369,
"step": 296500
},
{
"epoch": 4.28,
"learning_rate": 7.022749105561874e-05,
"loss": 1.8374,
"step": 297000
},
{
"epoch": 4.28,
"learning_rate": 7.0175564313866e-05,
"loss": 1.8394,
"step": 297500
},
{
"epoch": 4.29,
"learning_rate": 7.012384527908028e-05,
"loss": 1.8352,
"step": 298000
},
{
"epoch": 4.3,
"learning_rate": 7.007191853732754e-05,
"loss": 1.8362,
"step": 298500
},
{
"epoch": 4.3,
"learning_rate": 7.002019950254182e-05,
"loss": 1.8384,
"step": 299000
},
{
"epoch": 4.31,
"learning_rate": 6.996827276078907e-05,
"loss": 1.8377,
"step": 299500
},
{
"epoch": 4.32,
"learning_rate": 6.991655372600335e-05,
"loss": 1.8357,
"step": 300000
},
{
"epoch": 4.33,
"learning_rate": 6.986462698425063e-05,
"loss": 1.8343,
"step": 300500
},
{
"epoch": 4.33,
"learning_rate": 6.98129079494649e-05,
"loss": 1.838,
"step": 301000
},
{
"epoch": 4.34,
"learning_rate": 6.976098120771216e-05,
"loss": 1.8332,
"step": 301500
},
{
"epoch": 4.35,
"learning_rate": 6.970926217292644e-05,
"loss": 1.8372,
"step": 302000
},
{
"epoch": 4.35,
"learning_rate": 6.96573354311737e-05,
"loss": 1.8377,
"step": 302500
},
{
"epoch": 4.36,
"learning_rate": 6.960561639638797e-05,
"loss": 1.8372,
"step": 303000
},
{
"epoch": 4.37,
"learning_rate": 6.955368965463524e-05,
"loss": 1.8373,
"step": 303500
},
{
"epoch": 4.38,
"learning_rate": 6.950197061984952e-05,
"loss": 1.8351,
"step": 304000
},
{
"epoch": 4.38,
"learning_rate": 6.945004387809679e-05,
"loss": 1.8344,
"step": 304500
},
{
"epoch": 4.39,
"learning_rate": 6.939832484331105e-05,
"loss": 1.8312,
"step": 305000
},
{
"epoch": 4.4,
"learning_rate": 6.934639810155833e-05,
"loss": 1.8339,
"step": 305500
},
{
"epoch": 4.4,
"learning_rate": 6.929467906677261e-05,
"loss": 1.8334,
"step": 306000
},
{
"epoch": 4.41,
"learning_rate": 6.924275232501987e-05,
"loss": 1.8357,
"step": 306500
},
{
"epoch": 4.42,
"learning_rate": 6.919103329023414e-05,
"loss": 1.8298,
"step": 307000
},
{
"epoch": 4.43,
"learning_rate": 6.91391065484814e-05,
"loss": 1.832,
"step": 307500
},
{
"epoch": 4.43,
"learning_rate": 6.908738751369568e-05,
"loss": 1.8347,
"step": 308000
},
{
"epoch": 4.44,
"learning_rate": 6.903546077194295e-05,
"loss": 1.8348,
"step": 308500
},
{
"epoch": 4.45,
"learning_rate": 6.898374173715722e-05,
"loss": 1.8337,
"step": 309000
},
{
"epoch": 4.45,
"learning_rate": 6.893181499540449e-05,
"loss": 1.8286,
"step": 309500
},
{
"epoch": 4.46,
"learning_rate": 6.888009596061877e-05,
"loss": 1.8294,
"step": 310000
},
{
"epoch": 4.47,
"learning_rate": 6.882816921886602e-05,
"loss": 1.8312,
"step": 310500
},
{
"epoch": 4.48,
"learning_rate": 6.87764501840803e-05,
"loss": 1.8297,
"step": 311000
},
{
"epoch": 4.48,
"learning_rate": 6.872452344232757e-05,
"loss": 1.8321,
"step": 311500
},
{
"epoch": 4.49,
"learning_rate": 6.867280440754185e-05,
"loss": 1.8331,
"step": 312000
},
{
"epoch": 4.5,
"learning_rate": 6.86208776657891e-05,
"loss": 1.8319,
"step": 312500
},
{
"epoch": 4.51,
"learning_rate": 6.856915863100338e-05,
"loss": 1.83,
"step": 313000
},
{
"epoch": 4.51,
"learning_rate": 6.851723188925065e-05,
"loss": 1.8297,
"step": 313500
},
{
"epoch": 4.52,
"learning_rate": 6.846551285446493e-05,
"loss": 1.8318,
"step": 314000
},
{
"epoch": 4.53,
"learning_rate": 6.84135861127122e-05,
"loss": 1.8292,
"step": 314500
},
{
"epoch": 4.53,
"learning_rate": 6.836186707792647e-05,
"loss": 1.8294,
"step": 315000
},
{
"epoch": 4.54,
"learning_rate": 6.830994033617373e-05,
"loss": 1.8341,
"step": 315500
},
{
"epoch": 4.55,
"learning_rate": 6.8258221301388e-05,
"loss": 1.8306,
"step": 316000
},
{
"epoch": 4.56,
"learning_rate": 6.820629455963527e-05,
"loss": 1.8316,
"step": 316500
},
{
"epoch": 4.56,
"learning_rate": 6.815457552484955e-05,
"loss": 1.8292,
"step": 317000
},
{
"epoch": 4.57,
"learning_rate": 6.81026487830968e-05,
"loss": 1.832,
"step": 317500
},
{
"epoch": 4.58,
"learning_rate": 6.805092974831108e-05,
"loss": 1.8291,
"step": 318000
},
{
"epoch": 4.58,
"learning_rate": 6.799900300655835e-05,
"loss": 1.8291,
"step": 318500
},
{
"epoch": 4.59,
"learning_rate": 6.794728397177263e-05,
"loss": 1.8318,
"step": 319000
},
{
"epoch": 4.6,
"learning_rate": 6.78953572300199e-05,
"loss": 1.8302,
"step": 319500
},
{
"epoch": 4.61,
"learning_rate": 6.784363819523418e-05,
"loss": 1.8249,
"step": 320000
},
{
"epoch": 4.61,
"learning_rate": 6.779171145348143e-05,
"loss": 1.8325,
"step": 320500
},
{
"epoch": 4.62,
"learning_rate": 6.773999241869571e-05,
"loss": 1.8276,
"step": 321000
},
{
"epoch": 4.63,
"learning_rate": 6.768806567694296e-05,
"loss": 1.8306,
"step": 321500
},
{
"epoch": 4.63,
"learning_rate": 6.763634664215724e-05,
"loss": 1.8286,
"step": 322000
},
{
"epoch": 4.64,
"learning_rate": 6.758441990040452e-05,
"loss": 1.8288,
"step": 322500
},
{
"epoch": 4.65,
"learning_rate": 6.753270086561879e-05,
"loss": 1.8283,
"step": 323000
},
{
"epoch": 4.66,
"learning_rate": 6.748077412386605e-05,
"loss": 1.8309,
"step": 323500
},
{
"epoch": 4.66,
"learning_rate": 6.742905508908033e-05,
"loss": 1.8287,
"step": 324000
},
{
"epoch": 4.67,
"learning_rate": 6.737712834732759e-05,
"loss": 1.8266,
"step": 324500
},
{
"epoch": 4.68,
"learning_rate": 6.732540931254186e-05,
"loss": 1.8256,
"step": 325000
},
{
"epoch": 4.69,
"learning_rate": 6.727348257078913e-05,
"loss": 1.8299,
"step": 325500
},
{
"epoch": 4.69,
"learning_rate": 6.722176353600341e-05,
"loss": 1.8279,
"step": 326000
},
{
"epoch": 4.7,
"learning_rate": 6.716983679425068e-05,
"loss": 1.8276,
"step": 326500
},
{
"epoch": 4.71,
"learning_rate": 6.711811775946494e-05,
"loss": 1.8256,
"step": 327000
},
{
"epoch": 4.71,
"learning_rate": 6.706619101771221e-05,
"loss": 1.8254,
"step": 327500
},
{
"epoch": 4.72,
"learning_rate": 6.701447198292649e-05,
"loss": 1.827,
"step": 328000
},
{
"epoch": 4.73,
"learning_rate": 6.696254524117376e-05,
"loss": 1.8264,
"step": 328500
},
{
"epoch": 4.74,
"learning_rate": 6.691082620638804e-05,
"loss": 1.8267,
"step": 329000
},
{
"epoch": 4.74,
"learning_rate": 6.685889946463529e-05,
"loss": 1.827,
"step": 329500
},
{
"epoch": 4.75,
"learning_rate": 6.680718042984957e-05,
"loss": 1.8263,
"step": 330000
},
{
"epoch": 4.76,
"learning_rate": 6.675525368809684e-05,
"loss": 1.8223,
"step": 330500
},
{
"epoch": 4.76,
"learning_rate": 6.670353465331111e-05,
"loss": 1.8231,
"step": 331000
},
{
"epoch": 4.77,
"learning_rate": 6.665160791155838e-05,
"loss": 1.8201,
"step": 331500
},
{
"epoch": 4.78,
"learning_rate": 6.659988887677266e-05,
"loss": 1.826,
"step": 332000
},
{
"epoch": 4.79,
"learning_rate": 6.654796213501991e-05,
"loss": 1.8219,
"step": 332500
},
{
"epoch": 4.79,
"learning_rate": 6.649624310023419e-05,
"loss": 1.8242,
"step": 333000
},
{
"epoch": 4.8,
"learning_rate": 6.644431635848146e-05,
"loss": 1.8225,
"step": 333500
},
{
"epoch": 4.81,
"learning_rate": 6.639259732369574e-05,
"loss": 1.8243,
"step": 334000
},
{
"epoch": 4.81,
"learning_rate": 6.634067058194299e-05,
"loss": 1.8222,
"step": 334500
},
{
"epoch": 4.82,
"learning_rate": 6.628895154715727e-05,
"loss": 1.8258,
"step": 335000
},
{
"epoch": 4.83,
"learning_rate": 6.623702480540454e-05,
"loss": 1.8233,
"step": 335500
},
{
"epoch": 4.84,
"learning_rate": 6.618530577061882e-05,
"loss": 1.8246,
"step": 336000
},
{
"epoch": 4.84,
"learning_rate": 6.613337902886609e-05,
"loss": 1.8245,
"step": 336500
},
{
"epoch": 4.85,
"learning_rate": 6.608165999408036e-05,
"loss": 1.8255,
"step": 337000
},
{
"epoch": 4.86,
"learning_rate": 6.602973325232762e-05,
"loss": 1.8231,
"step": 337500
},
{
"epoch": 4.87,
"learning_rate": 6.59780142175419e-05,
"loss": 1.8195,
"step": 338000
},
{
"epoch": 4.87,
"learning_rate": 6.592608747578915e-05,
"loss": 1.8231,
"step": 338500
},
{
"epoch": 4.88,
"learning_rate": 6.587436844100343e-05,
"loss": 1.8218,
"step": 339000
},
{
"epoch": 4.89,
"learning_rate": 6.582244169925071e-05,
"loss": 1.8182,
"step": 339500
},
{
"epoch": 4.89,
"learning_rate": 6.577072266446497e-05,
"loss": 1.8183,
"step": 340000
},
{
"epoch": 4.9,
"learning_rate": 6.571879592271224e-05,
"loss": 1.8206,
"step": 340500
},
{
"epoch": 4.91,
"learning_rate": 6.566707688792652e-05,
"loss": 1.82,
"step": 341000
},
{
"epoch": 4.92,
"learning_rate": 6.561515014617377e-05,
"loss": 1.8225,
"step": 341500
},
{
"epoch": 4.92,
"learning_rate": 6.556343111138805e-05,
"loss": 1.8218,
"step": 342000
},
{
"epoch": 4.93,
"learning_rate": 6.551150436963532e-05,
"loss": 1.815,
"step": 342500
},
{
"epoch": 4.94,
"learning_rate": 6.54597853348496e-05,
"loss": 1.82,
"step": 343000
},
{
"epoch": 4.94,
"learning_rate": 6.540785859309687e-05,
"loss": 1.8212,
"step": 343500
},
{
"epoch": 4.95,
"learning_rate": 6.535613955831113e-05,
"loss": 1.8202,
"step": 344000
},
{
"epoch": 4.96,
"learning_rate": 6.530421281655841e-05,
"loss": 1.8165,
"step": 344500
},
{
"epoch": 4.97,
"learning_rate": 6.525249378177268e-05,
"loss": 1.8195,
"step": 345000
},
{
"epoch": 4.97,
"learning_rate": 6.520056704001995e-05,
"loss": 1.8137,
"step": 345500
},
{
"epoch": 4.98,
"learning_rate": 6.514884800523422e-05,
"loss": 1.8152,
"step": 346000
},
{
"epoch": 4.99,
"learning_rate": 6.509692126348148e-05,
"loss": 1.8201,
"step": 346500
},
{
"epoch": 4.99,
"learning_rate": 6.504520222869576e-05,
"loss": 1.8188,
"step": 347000
},
{
"epoch": 5.0,
"eval_accuracy": 0.6490117128316449,
"eval_loss": 1.6923828125,
"eval_runtime": 653.4402,
"eval_samples_per_second": 824.769,
"eval_steps_per_second": 34.366,
"step": 347365
},
{
"epoch": 5.0,
"learning_rate": 6.499327548694302e-05,
"loss": 1.8205,
"step": 347500
},
{
"epoch": 5.01,
"learning_rate": 6.49415564521573e-05,
"loss": 1.8156,
"step": 348000
},
{
"epoch": 5.02,
"learning_rate": 6.488962971040457e-05,
"loss": 1.8186,
"step": 348500
},
{
"epoch": 5.02,
"learning_rate": 6.483791067561883e-05,
"loss": 1.809,
"step": 349000
},
{
"epoch": 5.03,
"learning_rate": 6.47859839338661e-05,
"loss": 1.8136,
"step": 349500
},
{
"epoch": 5.04,
"learning_rate": 6.473426489908038e-05,
"loss": 1.8182,
"step": 350000
},
{
"epoch": 5.05,
"learning_rate": 6.468233815732765e-05,
"loss": 1.8141,
"step": 350500
},
{
"epoch": 5.05,
"learning_rate": 6.463061912254193e-05,
"loss": 1.8174,
"step": 351000
},
{
"epoch": 5.06,
"learning_rate": 6.457869238078918e-05,
"loss": 1.8149,
"step": 351500
},
{
"epoch": 5.07,
"learning_rate": 6.452697334600346e-05,
"loss": 1.8188,
"step": 352000
},
{
"epoch": 5.07,
"learning_rate": 6.447504660425073e-05,
"loss": 1.8127,
"step": 352500
},
{
"epoch": 5.08,
"learning_rate": 6.442332756946499e-05,
"loss": 1.8144,
"step": 353000
},
{
"epoch": 5.09,
"learning_rate": 6.437140082771227e-05,
"loss": 1.8183,
"step": 353500
},
{
"epoch": 5.1,
"learning_rate": 6.431968179292655e-05,
"loss": 1.8149,
"step": 354000
},
{
"epoch": 5.1,
"learning_rate": 6.42677550511738e-05,
"loss": 1.8138,
"step": 354500
},
{
"epoch": 5.11,
"learning_rate": 6.421603601638808e-05,
"loss": 1.8104,
"step": 355000
},
{
"epoch": 5.12,
"learning_rate": 6.416410927463534e-05,
"loss": 1.814,
"step": 355500
},
{
"epoch": 5.12,
"learning_rate": 6.411239023984962e-05,
"loss": 1.811,
"step": 356000
},
{
"epoch": 5.13,
"learning_rate": 6.406046349809688e-05,
"loss": 1.8139,
"step": 356500
},
{
"epoch": 5.14,
"learning_rate": 6.400874446331116e-05,
"loss": 1.8135,
"step": 357000
},
{
"epoch": 5.15,
"learning_rate": 6.395681772155843e-05,
"loss": 1.8111,
"step": 357500
},
{
"epoch": 5.15,
"learning_rate": 6.390509868677271e-05,
"loss": 1.8143,
"step": 358000
},
{
"epoch": 5.16,
"learning_rate": 6.385317194501998e-05,
"loss": 1.8134,
"step": 358500
},
{
"epoch": 5.17,
"learning_rate": 6.380145291023425e-05,
"loss": 1.8142,
"step": 359000
},
{
"epoch": 5.17,
"learning_rate": 6.374952616848151e-05,
"loss": 1.8122,
"step": 359500
},
{
"epoch": 5.18,
"learning_rate": 6.369780713369579e-05,
"loss": 1.8108,
"step": 360000
},
{
"epoch": 5.19,
"learning_rate": 6.364588039194304e-05,
"loss": 1.8123,
"step": 360500
},
{
"epoch": 5.2,
"learning_rate": 6.359416135715732e-05,
"loss": 1.8136,
"step": 361000
},
{
"epoch": 5.2,
"learning_rate": 6.35422346154046e-05,
"loss": 1.8121,
"step": 361500
},
{
"epoch": 5.21,
"learning_rate": 6.349051558061887e-05,
"loss": 1.811,
"step": 362000
},
{
"epoch": 5.22,
"learning_rate": 6.343858883886613e-05,
"loss": 1.8123,
"step": 362500
},
{
"epoch": 5.23,
"learning_rate": 6.338686980408041e-05,
"loss": 1.8083,
"step": 363000
},
{
"epoch": 5.23,
"learning_rate": 6.333494306232767e-05,
"loss": 1.8145,
"step": 363500
},
{
"epoch": 5.24,
"learning_rate": 6.328322402754194e-05,
"loss": 1.8137,
"step": 364000
},
{
"epoch": 5.25,
"learning_rate": 6.323129728578921e-05,
"loss": 1.8127,
"step": 364500
},
{
"epoch": 5.25,
"learning_rate": 6.317957825100349e-05,
"loss": 1.8109,
"step": 365000
},
{
"epoch": 5.26,
"learning_rate": 6.312765150925076e-05,
"loss": 1.8084,
"step": 365500
},
{
"epoch": 5.27,
"learning_rate": 6.307593247446502e-05,
"loss": 1.8125,
"step": 366000
},
{
"epoch": 5.28,
"learning_rate": 6.302400573271229e-05,
"loss": 1.8099,
"step": 366500
},
{
"epoch": 5.28,
"learning_rate": 6.297228669792657e-05,
"loss": 1.8108,
"step": 367000
},
{
"epoch": 5.29,
"learning_rate": 6.292035995617384e-05,
"loss": 1.8158,
"step": 367500
},
{
"epoch": 5.3,
"learning_rate": 6.286864092138811e-05,
"loss": 1.8079,
"step": 368000
},
{
"epoch": 5.3,
"learning_rate": 6.281671417963537e-05,
"loss": 1.8104,
"step": 368500
},
{
"epoch": 5.31,
"learning_rate": 6.276499514484965e-05,
"loss": 1.8068,
"step": 369000
},
{
"epoch": 5.32,
"learning_rate": 6.271306840309691e-05,
"loss": 1.8106,
"step": 369500
},
{
"epoch": 5.33,
"learning_rate": 6.266134936831118e-05,
"loss": 1.8083,
"step": 370000
},
{
"epoch": 5.33,
"learning_rate": 6.260942262655846e-05,
"loss": 1.8108,
"step": 370500
},
{
"epoch": 5.34,
"learning_rate": 6.255770359177273e-05,
"loss": 1.8119,
"step": 371000
},
{
"epoch": 5.35,
"learning_rate": 6.250577685002e-05,
"loss": 1.8102,
"step": 371500
},
{
"epoch": 5.35,
"learning_rate": 6.245405781523427e-05,
"loss": 1.8077,
"step": 372000
},
{
"epoch": 5.36,
"learning_rate": 6.240213107348154e-05,
"loss": 1.8087,
"step": 372500
},
{
"epoch": 5.37,
"learning_rate": 6.235041203869582e-05,
"loss": 1.8107,
"step": 373000
},
{
"epoch": 5.38,
"learning_rate": 6.229848529694307e-05,
"loss": 1.8095,
"step": 373500
},
{
"epoch": 5.38,
"learning_rate": 6.224676626215735e-05,
"loss": 1.8033,
"step": 374000
},
{
"epoch": 5.39,
"learning_rate": 6.219483952040462e-05,
"loss": 1.8088,
"step": 374500
},
{
"epoch": 5.4,
"learning_rate": 6.214312048561888e-05,
"loss": 1.8079,
"step": 375000
},
{
"epoch": 5.4,
"learning_rate": 6.209119374386616e-05,
"loss": 1.8119,
"step": 375500
},
{
"epoch": 5.41,
"learning_rate": 6.203947470908044e-05,
"loss": 1.8074,
"step": 376000
},
{
"epoch": 5.42,
"learning_rate": 6.19875479673277e-05,
"loss": 1.8048,
"step": 376500
},
{
"epoch": 5.43,
"learning_rate": 6.193582893254197e-05,
"loss": 1.8044,
"step": 377000
},
{
"epoch": 5.43,
"learning_rate": 6.188390219078923e-05,
"loss": 1.8127,
"step": 377500
},
{
"epoch": 5.44,
"learning_rate": 6.183218315600351e-05,
"loss": 1.8062,
"step": 378000
},
{
"epoch": 5.45,
"learning_rate": 6.178025641425078e-05,
"loss": 1.8083,
"step": 378500
},
{
"epoch": 5.46,
"learning_rate": 6.172853737946505e-05,
"loss": 1.8032,
"step": 379000
},
{
"epoch": 5.46,
"learning_rate": 6.167661063771232e-05,
"loss": 1.8118,
"step": 379500
},
{
"epoch": 5.47,
"learning_rate": 6.16248916029266e-05,
"loss": 1.8037,
"step": 380000
},
{
"epoch": 5.48,
"learning_rate": 6.157296486117385e-05,
"loss": 1.8066,
"step": 380500
},
{
"epoch": 5.48,
"learning_rate": 6.152124582638813e-05,
"loss": 1.8047,
"step": 381000
},
{
"epoch": 5.49,
"learning_rate": 6.14693190846354e-05,
"loss": 1.8061,
"step": 381500
},
{
"epoch": 5.5,
"learning_rate": 6.141760004984968e-05,
"loss": 1.8078,
"step": 382000
},
{
"epoch": 5.51,
"learning_rate": 6.136567330809693e-05,
"loss": 1.8105,
"step": 382500
},
{
"epoch": 5.51,
"learning_rate": 6.131395427331121e-05,
"loss": 1.801,
"step": 383000
},
{
"epoch": 5.52,
"learning_rate": 6.126202753155849e-05,
"loss": 1.8049,
"step": 383500
},
{
"epoch": 5.53,
"learning_rate": 6.121030849677276e-05,
"loss": 1.8085,
"step": 384000
},
{
"epoch": 5.53,
"learning_rate": 6.115838175502002e-05,
"loss": 1.8047,
"step": 384500
},
{
"epoch": 5.54,
"learning_rate": 6.11066627202343e-05,
"loss": 1.8083,
"step": 385000
},
{
"epoch": 5.55,
"learning_rate": 6.105473597848156e-05,
"loss": 1.8053,
"step": 385500
},
{
"epoch": 5.56,
"learning_rate": 6.1003016943695835e-05,
"loss": 1.8079,
"step": 386000
},
{
"epoch": 5.56,
"learning_rate": 6.095109020194311e-05,
"loss": 1.8046,
"step": 386500
},
{
"epoch": 5.57,
"learning_rate": 6.089937116715738e-05,
"loss": 1.8037,
"step": 387000
},
{
"epoch": 5.58,
"learning_rate": 6.084744442540464e-05,
"loss": 1.8011,
"step": 387500
},
{
"epoch": 5.58,
"learning_rate": 6.079572539061892e-05,
"loss": 1.8042,
"step": 388000
},
{
"epoch": 5.59,
"learning_rate": 6.074379864886618e-05,
"loss": 1.8056,
"step": 388500
},
{
"epoch": 5.6,
"learning_rate": 6.069207961408045e-05,
"loss": 1.803,
"step": 389000
},
{
"epoch": 5.61,
"learning_rate": 6.064015287232773e-05,
"loss": 1.7992,
"step": 389500
},
{
"epoch": 5.61,
"learning_rate": 6.0588433837542005e-05,
"loss": 1.8068,
"step": 390000
},
{
"epoch": 5.62,
"learning_rate": 6.053650709578926e-05,
"loss": 1.8008,
"step": 390500
},
{
"epoch": 5.63,
"learning_rate": 6.048478806100354e-05,
"loss": 1.8023,
"step": 391000
},
{
"epoch": 5.64,
"learning_rate": 6.04328613192508e-05,
"loss": 1.8037,
"step": 391500
},
{
"epoch": 5.64,
"learning_rate": 6.038114228446508e-05,
"loss": 1.8034,
"step": 392000
},
{
"epoch": 5.65,
"learning_rate": 6.0329215542712345e-05,
"loss": 1.7997,
"step": 392500
},
{
"epoch": 5.66,
"learning_rate": 6.027749650792662e-05,
"loss": 1.8028,
"step": 393000
},
{
"epoch": 5.66,
"learning_rate": 6.0225569766173884e-05,
"loss": 1.7945,
"step": 393500
},
{
"epoch": 5.67,
"learning_rate": 6.017385073138816e-05,
"loss": 1.8,
"step": 394000
},
{
"epoch": 5.68,
"learning_rate": 6.012192398963542e-05,
"loss": 1.7997,
"step": 394500
},
{
"epoch": 5.69,
"learning_rate": 6.0070204954849695e-05,
"loss": 1.8051,
"step": 395000
},
{
"epoch": 5.69,
"learning_rate": 6.001827821309697e-05,
"loss": 1.8016,
"step": 395500
},
{
"epoch": 5.7,
"learning_rate": 5.996655917831124e-05,
"loss": 1.8013,
"step": 396000
},
{
"epoch": 5.71,
"learning_rate": 5.99146324365585e-05,
"loss": 1.7962,
"step": 396500
},
{
"epoch": 5.71,
"learning_rate": 5.986291340177278e-05,
"loss": 1.802,
"step": 397000
},
{
"epoch": 5.72,
"learning_rate": 5.9810986660020055e-05,
"loss": 1.8034,
"step": 397500
},
{
"epoch": 5.73,
"learning_rate": 5.9759267625234326e-05,
"loss": 1.7997,
"step": 398000
},
{
"epoch": 5.74,
"learning_rate": 5.970734088348159e-05,
"loss": 1.8004,
"step": 398500
},
{
"epoch": 5.74,
"learning_rate": 5.9655621848695865e-05,
"loss": 1.7989,
"step": 399000
},
{
"epoch": 5.75,
"learning_rate": 5.960369510694313e-05,
"loss": 1.798,
"step": 399500
},
{
"epoch": 5.76,
"learning_rate": 5.95519760721574e-05,
"loss": 1.8007,
"step": 400000
},
{
"epoch": 5.76,
"learning_rate": 5.950004933040467e-05,
"loss": 1.803,
"step": 400500
},
{
"epoch": 5.77,
"learning_rate": 5.944833029561895e-05,
"loss": 1.8009,
"step": 401000
},
{
"epoch": 5.78,
"learning_rate": 5.939640355386621e-05,
"loss": 1.8006,
"step": 401500
},
{
"epoch": 5.79,
"learning_rate": 5.934468451908048e-05,
"loss": 1.7991,
"step": 402000
},
{
"epoch": 5.79,
"learning_rate": 5.9292757777327745e-05,
"loss": 1.8012,
"step": 402500
},
{
"epoch": 5.8,
"learning_rate": 5.924103874254202e-05,
"loss": 1.7991,
"step": 403000
},
{
"epoch": 5.81,
"learning_rate": 5.918911200078929e-05,
"loss": 1.7986,
"step": 403500
},
{
"epoch": 5.82,
"learning_rate": 5.913739296600357e-05,
"loss": 1.7957,
"step": 404000
},
{
"epoch": 5.82,
"learning_rate": 5.908546622425083e-05,
"loss": 1.8021,
"step": 404500
},
{
"epoch": 5.83,
"learning_rate": 5.903374718946511e-05,
"loss": 1.7995,
"step": 405000
},
{
"epoch": 5.84,
"learning_rate": 5.898182044771237e-05,
"loss": 1.7988,
"step": 405500
},
{
"epoch": 5.84,
"learning_rate": 5.893010141292664e-05,
"loss": 1.7979,
"step": 406000
},
{
"epoch": 5.85,
"learning_rate": 5.8878174671173915e-05,
"loss": 1.8014,
"step": 406500
},
{
"epoch": 5.86,
"learning_rate": 5.8826455636388186e-05,
"loss": 1.7983,
"step": 407000
},
{
"epoch": 5.87,
"learning_rate": 5.877452889463545e-05,
"loss": 1.7942,
"step": 407500
},
{
"epoch": 5.87,
"learning_rate": 5.8722809859849726e-05,
"loss": 1.8022,
"step": 408000
},
{
"epoch": 5.88,
"learning_rate": 5.867088311809699e-05,
"loss": 1.8001,
"step": 408500
},
{
"epoch": 5.89,
"learning_rate": 5.8619164083311265e-05,
"loss": 1.7983,
"step": 409000
},
{
"epoch": 5.89,
"learning_rate": 5.856723734155853e-05,
"loss": 1.7952,
"step": 409500
},
{
"epoch": 5.9,
"learning_rate": 5.851551830677281e-05,
"loss": 1.7954,
"step": 410000
},
{
"epoch": 5.91,
"learning_rate": 5.846359156502007e-05,
"loss": 1.7962,
"step": 410500
},
{
"epoch": 5.92,
"learning_rate": 5.841187253023434e-05,
"loss": 1.7982,
"step": 411000
},
{
"epoch": 5.92,
"learning_rate": 5.835994578848162e-05,
"loss": 1.7967,
"step": 411500
},
{
"epoch": 5.93,
"learning_rate": 5.8308226753695896e-05,
"loss": 1.7942,
"step": 412000
},
{
"epoch": 5.94,
"learning_rate": 5.825630001194316e-05,
"loss": 1.7974,
"step": 412500
},
{
"epoch": 5.94,
"learning_rate": 5.820458097715743e-05,
"loss": 1.7998,
"step": 413000
},
{
"epoch": 5.95,
"learning_rate": 5.815265423540469e-05,
"loss": 1.8,
"step": 413500
},
{
"epoch": 5.96,
"learning_rate": 5.810093520061897e-05,
"loss": 1.7984,
"step": 414000
},
{
"epoch": 5.97,
"learning_rate": 5.8049008458866236e-05,
"loss": 1.7952,
"step": 414500
},
{
"epoch": 5.97,
"learning_rate": 5.7997289424080514e-05,
"loss": 1.7919,
"step": 415000
},
{
"epoch": 5.98,
"learning_rate": 5.7945362682327775e-05,
"loss": 1.8007,
"step": 415500
},
{
"epoch": 5.99,
"learning_rate": 5.789364364754205e-05,
"loss": 1.7937,
"step": 416000
},
{
"epoch": 6.0,
"learning_rate": 5.7841716905789315e-05,
"loss": 1.7983,
"step": 416500
},
{
"epoch": 6.0,
"eval_accuracy": 0.653541434859015,
"eval_loss": 1.6650390625,
"eval_runtime": 654.5009,
"eval_samples_per_second": 823.432,
"eval_steps_per_second": 34.31,
"step": 416838
},
{
"epoch": 6.0,
"learning_rate": 5.7789997871003586e-05,
"loss": 1.7937,
"step": 417000
},
{
"epoch": 6.01,
"learning_rate": 5.773807112925086e-05,
"loss": 1.7956,
"step": 417500
},
{
"epoch": 6.02,
"learning_rate": 5.768635209446513e-05,
"loss": 1.7956,
"step": 418000
},
{
"epoch": 6.02,
"learning_rate": 5.763442535271239e-05,
"loss": 1.7945,
"step": 418500
},
{
"epoch": 6.03,
"learning_rate": 5.758270631792667e-05,
"loss": 1.7949,
"step": 419000
},
{
"epoch": 6.04,
"learning_rate": 5.753077957617393e-05,
"loss": 1.7931,
"step": 419500
},
{
"epoch": 6.05,
"learning_rate": 5.747906054138821e-05,
"loss": 1.7941,
"step": 420000
},
{
"epoch": 6.05,
"learning_rate": 5.742713379963548e-05,
"loss": 1.7906,
"step": 420500
},
{
"epoch": 6.06,
"learning_rate": 5.7375414764849756e-05,
"loss": 1.7891,
"step": 421000
},
{
"epoch": 6.07,
"learning_rate": 5.732348802309702e-05,
"loss": 1.7921,
"step": 421500
},
{
"epoch": 6.07,
"learning_rate": 5.727176898831129e-05,
"loss": 1.7909,
"step": 422000
},
{
"epoch": 6.08,
"learning_rate": 5.721984224655855e-05,
"loss": 1.7931,
"step": 422500
},
{
"epoch": 6.09,
"learning_rate": 5.716812321177283e-05,
"loss": 1.7914,
"step": 423000
},
{
"epoch": 6.1,
"learning_rate": 5.71161964700201e-05,
"loss": 1.7881,
"step": 423500
},
{
"epoch": 6.1,
"learning_rate": 5.7064477435234374e-05,
"loss": 1.7966,
"step": 424000
},
{
"epoch": 6.11,
"learning_rate": 5.7012550693481636e-05,
"loss": 1.7895,
"step": 424500
},
{
"epoch": 6.12,
"learning_rate": 5.6960831658695913e-05,
"loss": 1.7907,
"step": 425000
},
{
"epoch": 6.12,
"learning_rate": 5.690890491694318e-05,
"loss": 1.7931,
"step": 425500
},
{
"epoch": 6.13,
"learning_rate": 5.685718588215746e-05,
"loss": 1.795,
"step": 426000
},
{
"epoch": 6.14,
"learning_rate": 5.680525914040472e-05,
"loss": 1.7916,
"step": 426500
},
{
"epoch": 6.15,
"learning_rate": 5.6753540105619e-05,
"loss": 1.7892,
"step": 427000
},
{
"epoch": 6.15,
"learning_rate": 5.670161336386626e-05,
"loss": 1.7971,
"step": 427500
},
{
"epoch": 6.16,
"learning_rate": 5.664989432908053e-05,
"loss": 1.787,
"step": 428000
},
{
"epoch": 6.17,
"learning_rate": 5.6597967587327806e-05,
"loss": 1.7915,
"step": 428500
},
{
"epoch": 6.18,
"learning_rate": 5.654624855254208e-05,
"loss": 1.7912,
"step": 429000
},
{
"epoch": 6.18,
"learning_rate": 5.649432181078934e-05,
"loss": 1.7941,
"step": 429500
},
{
"epoch": 6.19,
"learning_rate": 5.6442602776003617e-05,
"loss": 1.7921,
"step": 430000
},
{
"epoch": 6.2,
"learning_rate": 5.639067603425088e-05,
"loss": 1.7908,
"step": 430500
},
{
"epoch": 6.2,
"learning_rate": 5.6338956999465156e-05,
"loss": 1.7863,
"step": 431000
},
{
"epoch": 6.21,
"learning_rate": 5.6287030257712424e-05,
"loss": 1.791,
"step": 431500
},
{
"epoch": 6.22,
"learning_rate": 5.62353112229267e-05,
"loss": 1.7888,
"step": 432000
},
{
"epoch": 6.23,
"learning_rate": 5.618338448117396e-05,
"loss": 1.7913,
"step": 432500
},
{
"epoch": 6.23,
"learning_rate": 5.6131665446388234e-05,
"loss": 1.7945,
"step": 433000
},
{
"epoch": 6.24,
"learning_rate": 5.6079738704635496e-05,
"loss": 1.7891,
"step": 433500
},
{
"epoch": 6.25,
"learning_rate": 5.6028019669849774e-05,
"loss": 1.7894,
"step": 434000
},
{
"epoch": 6.25,
"learning_rate": 5.597609292809705e-05,
"loss": 1.7893,
"step": 434500
},
{
"epoch": 6.26,
"learning_rate": 5.592437389331132e-05,
"loss": 1.787,
"step": 435000
},
{
"epoch": 6.27,
"learning_rate": 5.587244715155858e-05,
"loss": 1.7925,
"step": 435500
},
{
"epoch": 6.28,
"learning_rate": 5.582072811677286e-05,
"loss": 1.7886,
"step": 436000
},
{
"epoch": 6.28,
"learning_rate": 5.576880137502012e-05,
"loss": 1.7886,
"step": 436500
},
{
"epoch": 6.29,
"learning_rate": 5.571708234023439e-05,
"loss": 1.7894,
"step": 437000
},
{
"epoch": 6.3,
"learning_rate": 5.5665155598481666e-05,
"loss": 1.791,
"step": 437500
},
{
"epoch": 6.3,
"learning_rate": 5.5613436563695944e-05,
"loss": 1.7893,
"step": 438000
},
{
"epoch": 6.31,
"learning_rate": 5.5561509821943206e-05,
"loss": 1.7938,
"step": 438500
},
{
"epoch": 6.32,
"learning_rate": 5.550979078715748e-05,
"loss": 1.7898,
"step": 439000
},
{
"epoch": 6.33,
"learning_rate": 5.545786404540475e-05,
"loss": 1.789,
"step": 439500
},
{
"epoch": 6.33,
"learning_rate": 5.540614501061902e-05,
"loss": 1.787,
"step": 440000
},
{
"epoch": 6.34,
"learning_rate": 5.5354218268866284e-05,
"loss": 1.7902,
"step": 440500
},
{
"epoch": 6.35,
"learning_rate": 5.530249923408056e-05,
"loss": 1.7909,
"step": 441000
},
{
"epoch": 6.35,
"learning_rate": 5.5250572492327824e-05,
"loss": 1.7877,
"step": 441500
},
{
"epoch": 6.36,
"learning_rate": 5.51988534575421e-05,
"loss": 1.7861,
"step": 442000
},
{
"epoch": 6.37,
"learning_rate": 5.514692671578937e-05,
"loss": 1.7923,
"step": 442500
},
{
"epoch": 6.38,
"learning_rate": 5.509520768100365e-05,
"loss": 1.7884,
"step": 443000
},
{
"epoch": 6.38,
"learning_rate": 5.504328093925091e-05,
"loss": 1.7857,
"step": 443500
},
{
"epoch": 6.39,
"learning_rate": 5.499156190446518e-05,
"loss": 1.7871,
"step": 444000
},
{
"epoch": 6.4,
"learning_rate": 5.493963516271244e-05,
"loss": 1.7854,
"step": 444500
},
{
"epoch": 6.41,
"learning_rate": 5.488791612792672e-05,
"loss": 1.7808,
"step": 445000
},
{
"epoch": 6.41,
"learning_rate": 5.4835989386173994e-05,
"loss": 1.7894,
"step": 445500
},
{
"epoch": 6.42,
"learning_rate": 5.4784270351388265e-05,
"loss": 1.787,
"step": 446000
},
{
"epoch": 6.43,
"learning_rate": 5.473234360963553e-05,
"loss": 1.785,
"step": 446500
},
{
"epoch": 6.43,
"learning_rate": 5.4680624574849804e-05,
"loss": 1.7849,
"step": 447000
},
{
"epoch": 6.44,
"learning_rate": 5.4628697833097066e-05,
"loss": 1.7826,
"step": 447500
},
{
"epoch": 6.45,
"learning_rate": 5.457697879831134e-05,
"loss": 1.7885,
"step": 448000
},
{
"epoch": 6.46,
"learning_rate": 5.452505205655861e-05,
"loss": 1.7873,
"step": 448500
},
{
"epoch": 6.46,
"learning_rate": 5.447333302177289e-05,
"loss": 1.7839,
"step": 449000
},
{
"epoch": 6.47,
"learning_rate": 5.442140628002015e-05,
"loss": 1.7839,
"step": 449500
},
{
"epoch": 6.48,
"learning_rate": 5.436968724523442e-05,
"loss": 1.785,
"step": 450000
},
{
"epoch": 6.48,
"learning_rate": 5.43177605034817e-05,
"loss": 1.7891,
"step": 450500
},
{
"epoch": 6.49,
"learning_rate": 5.4266041468695975e-05,
"loss": 1.7841,
"step": 451000
},
{
"epoch": 6.5,
"learning_rate": 5.421411472694323e-05,
"loss": 1.787,
"step": 451500
},
{
"epoch": 6.51,
"learning_rate": 5.416239569215751e-05,
"loss": 1.7834,
"step": 452000
},
{
"epoch": 6.51,
"learning_rate": 5.411046895040477e-05,
"loss": 1.7867,
"step": 452500
},
{
"epoch": 6.52,
"learning_rate": 5.405874991561905e-05,
"loss": 1.7861,
"step": 453000
},
{
"epoch": 6.53,
"learning_rate": 5.4006823173866315e-05,
"loss": 1.7801,
"step": 453500
},
{
"epoch": 6.53,
"learning_rate": 5.395510413908059e-05,
"loss": 1.7849,
"step": 454000
},
{
"epoch": 6.54,
"learning_rate": 5.3903177397327854e-05,
"loss": 1.786,
"step": 454500
},
{
"epoch": 6.55,
"learning_rate": 5.385145836254213e-05,
"loss": 1.7866,
"step": 455000
},
{
"epoch": 6.56,
"learning_rate": 5.379953162078939e-05,
"loss": 1.7842,
"step": 455500
},
{
"epoch": 6.56,
"learning_rate": 5.3747812586003665e-05,
"loss": 1.7848,
"step": 456000
},
{
"epoch": 6.57,
"learning_rate": 5.369588584425094e-05,
"loss": 1.7831,
"step": 456500
},
{
"epoch": 6.58,
"learning_rate": 5.364416680946521e-05,
"loss": 1.784,
"step": 457000
},
{
"epoch": 6.59,
"learning_rate": 5.359224006771247e-05,
"loss": 1.7814,
"step": 457500
},
{
"epoch": 6.59,
"learning_rate": 5.354052103292675e-05,
"loss": 1.7813,
"step": 458000
},
{
"epoch": 6.6,
"learning_rate": 5.348859429117401e-05,
"loss": 1.7835,
"step": 458500
},
{
"epoch": 6.61,
"learning_rate": 5.343687525638828e-05,
"loss": 1.7839,
"step": 459000
},
{
"epoch": 6.61,
"learning_rate": 5.338494851463556e-05,
"loss": 1.7875,
"step": 459500
},
{
"epoch": 6.62,
"learning_rate": 5.3333229479849835e-05,
"loss": 1.7841,
"step": 460000
},
{
"epoch": 6.63,
"learning_rate": 5.32813027380971e-05,
"loss": 1.7816,
"step": 460500
},
{
"epoch": 6.64,
"learning_rate": 5.322958370331137e-05,
"loss": 1.7807,
"step": 461000
},
{
"epoch": 6.64,
"learning_rate": 5.317765696155863e-05,
"loss": 1.7856,
"step": 461500
},
{
"epoch": 6.65,
"learning_rate": 5.312593792677291e-05,
"loss": 1.7811,
"step": 462000
},
{
"epoch": 6.66,
"learning_rate": 5.3074011185020175e-05,
"loss": 1.7778,
"step": 462500
},
{
"epoch": 6.66,
"learning_rate": 5.302229215023445e-05,
"loss": 1.7846,
"step": 463000
},
{
"epoch": 6.67,
"learning_rate": 5.2970365408481715e-05,
"loss": 1.7823,
"step": 463500
},
{
"epoch": 6.68,
"learning_rate": 5.291864637369599e-05,
"loss": 1.7802,
"step": 464000
},
{
"epoch": 6.69,
"learning_rate": 5.286671963194326e-05,
"loss": 1.783,
"step": 464500
},
{
"epoch": 6.69,
"learning_rate": 5.281500059715754e-05,
"loss": 1.7805,
"step": 465000
},
{
"epoch": 6.7,
"learning_rate": 5.27630738554048e-05,
"loss": 1.779,
"step": 465500
},
{
"epoch": 6.71,
"learning_rate": 5.271135482061908e-05,
"loss": 1.7814,
"step": 466000
},
{
"epoch": 6.71,
"learning_rate": 5.265942807886633e-05,
"loss": 1.7842,
"step": 466500
},
{
"epoch": 6.72,
"learning_rate": 5.260770904408061e-05,
"loss": 1.7831,
"step": 467000
},
{
"epoch": 6.73,
"learning_rate": 5.2555782302327885e-05,
"loss": 1.7806,
"step": 467500
},
{
"epoch": 6.74,
"learning_rate": 5.2504063267542156e-05,
"loss": 1.7771,
"step": 468000
},
{
"epoch": 6.74,
"learning_rate": 5.245213652578942e-05,
"loss": 1.782,
"step": 468500
},
{
"epoch": 6.75,
"learning_rate": 5.2400417491003695e-05,
"loss": 1.7842,
"step": 469000
},
{
"epoch": 6.76,
"learning_rate": 5.234849074925096e-05,
"loss": 1.7814,
"step": 469500
},
{
"epoch": 6.77,
"learning_rate": 5.2296771714465235e-05,
"loss": 1.781,
"step": 470000
},
{
"epoch": 6.77,
"learning_rate": 5.22448449727125e-05,
"loss": 1.7829,
"step": 470500
},
{
"epoch": 6.78,
"learning_rate": 5.219312593792678e-05,
"loss": 1.7795,
"step": 471000
},
{
"epoch": 6.79,
"learning_rate": 5.214119919617404e-05,
"loss": 1.7825,
"step": 471500
},
{
"epoch": 6.79,
"learning_rate": 5.208948016138831e-05,
"loss": 1.7825,
"step": 472000
},
{
"epoch": 6.8,
"learning_rate": 5.2037553419635575e-05,
"loss": 1.7795,
"step": 472500
},
{
"epoch": 6.81,
"learning_rate": 5.198583438484985e-05,
"loss": 1.7813,
"step": 473000
},
{
"epoch": 6.82,
"learning_rate": 5.193390764309712e-05,
"loss": 1.78,
"step": 473500
},
{
"epoch": 6.82,
"learning_rate": 5.18821886083114e-05,
"loss": 1.7778,
"step": 474000
},
{
"epoch": 6.83,
"learning_rate": 5.183026186655866e-05,
"loss": 1.7773,
"step": 474500
},
{
"epoch": 6.84,
"learning_rate": 5.177854283177294e-05,
"loss": 1.7746,
"step": 475000
},
{
"epoch": 6.84,
"learning_rate": 5.17266160900202e-05,
"loss": 1.7806,
"step": 475500
},
{
"epoch": 6.85,
"learning_rate": 5.167489705523447e-05,
"loss": 1.7832,
"step": 476000
},
{
"epoch": 6.86,
"learning_rate": 5.1622970313481745e-05,
"loss": 1.7784,
"step": 476500
},
{
"epoch": 6.87,
"learning_rate": 5.157125127869602e-05,
"loss": 1.7803,
"step": 477000
},
{
"epoch": 6.87,
"learning_rate": 5.151932453694328e-05,
"loss": 1.7763,
"step": 477500
},
{
"epoch": 6.88,
"learning_rate": 5.1467605502157556e-05,
"loss": 1.7796,
"step": 478000
},
{
"epoch": 6.89,
"learning_rate": 5.141567876040483e-05,
"loss": 1.7793,
"step": 478500
},
{
"epoch": 6.89,
"learning_rate": 5.13639597256191e-05,
"loss": 1.7778,
"step": 479000
},
{
"epoch": 6.9,
"learning_rate": 5.131203298386636e-05,
"loss": 1.7813,
"step": 479500
},
{
"epoch": 6.91,
"learning_rate": 5.126031394908064e-05,
"loss": 1.7826,
"step": 480000
},
{
"epoch": 6.92,
"learning_rate": 5.12083872073279e-05,
"loss": 1.7755,
"step": 480500
},
{
"epoch": 6.92,
"learning_rate": 5.115666817254218e-05,
"loss": 1.7787,
"step": 481000
},
{
"epoch": 6.93,
"learning_rate": 5.110474143078945e-05,
"loss": 1.7764,
"step": 481500
},
{
"epoch": 6.94,
"learning_rate": 5.1053022396003726e-05,
"loss": 1.7785,
"step": 482000
},
{
"epoch": 6.95,
"learning_rate": 5.100109565425099e-05,
"loss": 1.7748,
"step": 482500
},
{
"epoch": 6.95,
"learning_rate": 5.094937661946526e-05,
"loss": 1.7775,
"step": 483000
},
{
"epoch": 6.96,
"learning_rate": 5.089744987771252e-05,
"loss": 1.7784,
"step": 483500
},
{
"epoch": 6.97,
"learning_rate": 5.08457308429268e-05,
"loss": 1.7801,
"step": 484000
},
{
"epoch": 6.97,
"learning_rate": 5.0793804101174066e-05,
"loss": 1.7745,
"step": 484500
},
{
"epoch": 6.98,
"learning_rate": 5.0742085066388344e-05,
"loss": 1.7795,
"step": 485000
},
{
"epoch": 6.99,
"learning_rate": 5.0690158324635606e-05,
"loss": 1.7768,
"step": 485500
},
{
"epoch": 7.0,
"learning_rate": 5.063843928984988e-05,
"loss": 1.7788,
"step": 486000
},
{
"epoch": 7.0,
"eval_accuracy": 0.6557946838285342,
"eval_loss": 1.6484375,
"eval_runtime": 654.0163,
"eval_samples_per_second": 824.042,
"eval_steps_per_second": 34.336,
"step": 486311
},
{
"epoch": 7.0,
"learning_rate": 5.0586512548097145e-05,
"loss": 1.779,
"step": 486500
},
{
"epoch": 7.01,
"learning_rate": 5.0534793513311416e-05,
"loss": 1.7751,
"step": 487000
},
{
"epoch": 7.02,
"learning_rate": 5.048286677155869e-05,
"loss": 1.7737,
"step": 487500
},
{
"epoch": 7.02,
"learning_rate": 5.043114773677297e-05,
"loss": 1.7722,
"step": 488000
},
{
"epoch": 7.03,
"learning_rate": 5.037922099502022e-05,
"loss": 1.7749,
"step": 488500
},
{
"epoch": 7.04,
"learning_rate": 5.03275019602345e-05,
"loss": 1.7724,
"step": 489000
},
{
"epoch": 7.05,
"learning_rate": 5.027557521848176e-05,
"loss": 1.7736,
"step": 489500
},
{
"epoch": 7.05,
"learning_rate": 5.022385618369604e-05,
"loss": 1.7769,
"step": 490000
},
{
"epoch": 7.06,
"learning_rate": 5.017192944194331e-05,
"loss": 1.7723,
"step": 490500
},
{
"epoch": 7.07,
"learning_rate": 5.0120210407157586e-05,
"loss": 1.7733,
"step": 491000
},
{
"epoch": 7.07,
"learning_rate": 5.006828366540485e-05,
"loss": 1.769,
"step": 491500
},
{
"epoch": 7.08,
"learning_rate": 5.0016564630619126e-05,
"loss": 1.776,
"step": 492000
},
{
"epoch": 7.09,
"learning_rate": 4.996463788886639e-05,
"loss": 1.7717,
"step": 492500
},
{
"epoch": 7.1,
"learning_rate": 4.9912918854080665e-05,
"loss": 1.776,
"step": 493000
},
{
"epoch": 7.1,
"learning_rate": 4.986099211232793e-05,
"loss": 1.7734,
"step": 493500
},
{
"epoch": 7.11,
"learning_rate": 4.9809273077542204e-05,
"loss": 1.7775,
"step": 494000
},
{
"epoch": 7.12,
"learning_rate": 4.975734633578947e-05,
"loss": 1.7776,
"step": 494500
},
{
"epoch": 7.13,
"learning_rate": 4.970562730100375e-05,
"loss": 1.7721,
"step": 495000
},
{
"epoch": 7.13,
"learning_rate": 4.965370055925101e-05,
"loss": 1.7699,
"step": 495500
},
{
"epoch": 7.14,
"learning_rate": 4.960198152446528e-05,
"loss": 1.7733,
"step": 496000
},
{
"epoch": 7.15,
"learning_rate": 4.955005478271255e-05,
"loss": 1.7729,
"step": 496500
},
{
"epoch": 7.15,
"learning_rate": 4.949833574792683e-05,
"loss": 1.7708,
"step": 497000
},
{
"epoch": 7.16,
"learning_rate": 4.944640900617409e-05,
"loss": 1.7723,
"step": 497500
},
{
"epoch": 7.17,
"learning_rate": 4.939468997138837e-05,
"loss": 1.7712,
"step": 498000
},
{
"epoch": 7.18,
"learning_rate": 4.934276322963563e-05,
"loss": 1.7676,
"step": 498500
},
{
"epoch": 7.18,
"learning_rate": 4.929104419484991e-05,
"loss": 1.7722,
"step": 499000
},
{
"epoch": 7.19,
"learning_rate": 4.923911745309717e-05,
"loss": 1.7726,
"step": 499500
},
{
"epoch": 7.2,
"learning_rate": 4.9187398418311447e-05,
"loss": 1.7728,
"step": 500000
},
{
"epoch": 7.2,
"learning_rate": 4.9135471676558715e-05,
"loss": 1.7717,
"step": 500500
},
{
"epoch": 7.21,
"learning_rate": 4.908354493480598e-05,
"loss": 1.7681,
"step": 501000
},
{
"epoch": 7.22,
"learning_rate": 4.9031825900020254e-05,
"loss": 1.7714,
"step": 501500
},
{
"epoch": 7.23,
"learning_rate": 4.8979899158267516e-05,
"loss": 1.7739,
"step": 502000
},
{
"epoch": 7.23,
"learning_rate": 4.8928180123481793e-05,
"loss": 1.7722,
"step": 502500
},
{
"epoch": 7.24,
"learning_rate": 4.887625338172906e-05,
"loss": 1.774,
"step": 503000
},
{
"epoch": 7.25,
"learning_rate": 4.882453434694333e-05,
"loss": 1.773,
"step": 503500
},
{
"epoch": 7.25,
"learning_rate": 4.87726076051906e-05,
"loss": 1.7714,
"step": 504000
},
{
"epoch": 7.26,
"learning_rate": 4.872088857040488e-05,
"loss": 1.7707,
"step": 504500
},
{
"epoch": 7.27,
"learning_rate": 4.866896182865214e-05,
"loss": 1.771,
"step": 505000
},
{
"epoch": 7.28,
"learning_rate": 4.861724279386641e-05,
"loss": 1.7751,
"step": 505500
},
{
"epoch": 7.28,
"learning_rate": 4.856531605211368e-05,
"loss": 1.7725,
"step": 506000
},
{
"epoch": 7.29,
"learning_rate": 4.851359701732796e-05,
"loss": 1.769,
"step": 506500
},
{
"epoch": 7.3,
"learning_rate": 4.846167027557522e-05,
"loss": 1.7724,
"step": 507000
},
{
"epoch": 7.3,
"learning_rate": 4.8409951240789497e-05,
"loss": 1.7742,
"step": 507500
},
{
"epoch": 7.31,
"learning_rate": 4.8358024499036765e-05,
"loss": 1.7667,
"step": 508000
},
{
"epoch": 7.32,
"learning_rate": 4.830630546425104e-05,
"loss": 1.7694,
"step": 508500
},
{
"epoch": 7.33,
"learning_rate": 4.82543787224983e-05,
"loss": 1.7746,
"step": 509000
},
{
"epoch": 7.33,
"learning_rate": 4.8202659687712575e-05,
"loss": 1.7697,
"step": 509500
},
{
"epoch": 7.34,
"learning_rate": 4.815073294595984e-05,
"loss": 1.7704,
"step": 510000
},
{
"epoch": 7.35,
"learning_rate": 4.809901391117412e-05,
"loss": 1.7721,
"step": 510500
},
{
"epoch": 7.36,
"learning_rate": 4.804708716942138e-05,
"loss": 1.7674,
"step": 511000
},
{
"epoch": 7.36,
"learning_rate": 4.799536813463566e-05,
"loss": 1.7727,
"step": 511500
},
{
"epoch": 7.37,
"learning_rate": 4.794344139288292e-05,
"loss": 1.7702,
"step": 512000
},
{
"epoch": 7.38,
"learning_rate": 4.78917223580972e-05,
"loss": 1.768,
"step": 512500
},
{
"epoch": 7.38,
"learning_rate": 4.783979561634446e-05,
"loss": 1.7709,
"step": 513000
},
{
"epoch": 7.39,
"learning_rate": 4.778807658155874e-05,
"loss": 1.7669,
"step": 513500
},
{
"epoch": 7.4,
"learning_rate": 4.773614983980601e-05,
"loss": 1.7699,
"step": 514000
},
{
"epoch": 7.41,
"learning_rate": 4.768443080502028e-05,
"loss": 1.7722,
"step": 514500
},
{
"epoch": 7.41,
"learning_rate": 4.7632504063267546e-05,
"loss": 1.7668,
"step": 515000
},
{
"epoch": 7.42,
"learning_rate": 4.7580785028481824e-05,
"loss": 1.7692,
"step": 515500
},
{
"epoch": 7.43,
"learning_rate": 4.7528858286729086e-05,
"loss": 1.7711,
"step": 516000
},
{
"epoch": 7.43,
"learning_rate": 4.747713925194336e-05,
"loss": 1.7698,
"step": 516500
},
{
"epoch": 7.44,
"learning_rate": 4.7425212510190625e-05,
"loss": 1.7683,
"step": 517000
},
{
"epoch": 7.45,
"learning_rate": 4.73734934754049e-05,
"loss": 1.7646,
"step": 517500
},
{
"epoch": 7.46,
"learning_rate": 4.7321566733652164e-05,
"loss": 1.7708,
"step": 518000
},
{
"epoch": 7.46,
"learning_rate": 4.726984769886644e-05,
"loss": 1.769,
"step": 518500
},
{
"epoch": 7.47,
"learning_rate": 4.7217920957113704e-05,
"loss": 1.7661,
"step": 519000
},
{
"epoch": 7.48,
"learning_rate": 4.716620192232798e-05,
"loss": 1.7644,
"step": 519500
},
{
"epoch": 7.48,
"learning_rate": 4.711427518057524e-05,
"loss": 1.7686,
"step": 520000
},
{
"epoch": 7.49,
"learning_rate": 4.706255614578952e-05,
"loss": 1.7745,
"step": 520500
},
{
"epoch": 7.5,
"learning_rate": 4.701062940403679e-05,
"loss": 1.7687,
"step": 521000
},
{
"epoch": 7.51,
"learning_rate": 4.6958910369251067e-05,
"loss": 1.7672,
"step": 521500
},
{
"epoch": 7.51,
"learning_rate": 4.690698362749833e-05,
"loss": 1.7682,
"step": 522000
},
{
"epoch": 7.52,
"learning_rate": 4.6855264592712606e-05,
"loss": 1.7658,
"step": 522500
},
{
"epoch": 7.53,
"learning_rate": 4.680333785095987e-05,
"loss": 1.7689,
"step": 523000
},
{
"epoch": 7.54,
"learning_rate": 4.6751618816174145e-05,
"loss": 1.7651,
"step": 523500
},
{
"epoch": 7.54,
"learning_rate": 4.669969207442141e-05,
"loss": 1.7693,
"step": 524000
},
{
"epoch": 7.55,
"learning_rate": 4.6647973039635684e-05,
"loss": 1.7638,
"step": 524500
},
{
"epoch": 7.56,
"learning_rate": 4.659604629788295e-05,
"loss": 1.7658,
"step": 525000
},
{
"epoch": 7.56,
"learning_rate": 4.6544327263097224e-05,
"loss": 1.7689,
"step": 525500
},
{
"epoch": 7.57,
"learning_rate": 4.6492400521344485e-05,
"loss": 1.7721,
"step": 526000
},
{
"epoch": 7.58,
"learning_rate": 4.644068148655876e-05,
"loss": 1.768,
"step": 526500
},
{
"epoch": 7.59,
"learning_rate": 4.638875474480603e-05,
"loss": 1.7643,
"step": 527000
},
{
"epoch": 7.59,
"learning_rate": 4.63370357100203e-05,
"loss": 1.764,
"step": 527500
},
{
"epoch": 7.6,
"learning_rate": 4.628510896826757e-05,
"loss": 1.7626,
"step": 528000
},
{
"epoch": 7.61,
"learning_rate": 4.623338993348185e-05,
"loss": 1.7668,
"step": 528500
},
{
"epoch": 7.61,
"learning_rate": 4.618146319172911e-05,
"loss": 1.7643,
"step": 529000
},
{
"epoch": 7.62,
"learning_rate": 4.612974415694339e-05,
"loss": 1.7623,
"step": 529500
},
{
"epoch": 7.63,
"learning_rate": 4.607781741519065e-05,
"loss": 1.7665,
"step": 530000
},
{
"epoch": 7.64,
"learning_rate": 4.602609838040493e-05,
"loss": 1.7632,
"step": 530500
},
{
"epoch": 7.64,
"learning_rate": 4.597417163865219e-05,
"loss": 1.7667,
"step": 531000
},
{
"epoch": 7.65,
"learning_rate": 4.5922452603866466e-05,
"loss": 1.764,
"step": 531500
},
{
"epoch": 7.66,
"learning_rate": 4.5870525862113734e-05,
"loss": 1.7625,
"step": 532000
},
{
"epoch": 7.66,
"learning_rate": 4.581880682732801e-05,
"loss": 1.7651,
"step": 532500
},
{
"epoch": 7.67,
"learning_rate": 4.576688008557527e-05,
"loss": 1.7636,
"step": 533000
},
{
"epoch": 7.68,
"learning_rate": 4.5715161050789545e-05,
"loss": 1.7662,
"step": 533500
},
{
"epoch": 7.69,
"learning_rate": 4.566323430903681e-05,
"loss": 1.7648,
"step": 534000
},
{
"epoch": 7.69,
"learning_rate": 4.561151527425109e-05,
"loss": 1.7657,
"step": 534500
},
{
"epoch": 7.7,
"learning_rate": 4.555958853249835e-05,
"loss": 1.7677,
"step": 535000
},
{
"epoch": 7.71,
"learning_rate": 4.550786949771263e-05,
"loss": 1.7634,
"step": 535500
},
{
"epoch": 7.72,
"learning_rate": 4.54559427559599e-05,
"loss": 1.7663,
"step": 536000
},
{
"epoch": 7.72,
"learning_rate": 4.540422372117417e-05,
"loss": 1.7633,
"step": 536500
},
{
"epoch": 7.73,
"learning_rate": 4.535229697942143e-05,
"loss": 1.7646,
"step": 537000
},
{
"epoch": 7.74,
"learning_rate": 4.530057794463571e-05,
"loss": 1.7652,
"step": 537500
},
{
"epoch": 7.74,
"learning_rate": 4.524865120288298e-05,
"loss": 1.7615,
"step": 538000
},
{
"epoch": 7.75,
"learning_rate": 4.519693216809725e-05,
"loss": 1.7644,
"step": 538500
},
{
"epoch": 7.76,
"learning_rate": 4.5145005426344516e-05,
"loss": 1.763,
"step": 539000
},
{
"epoch": 7.77,
"learning_rate": 4.5093286391558794e-05,
"loss": 1.7656,
"step": 539500
},
{
"epoch": 7.77,
"learning_rate": 4.5041359649806055e-05,
"loss": 1.7649,
"step": 540000
},
{
"epoch": 7.78,
"learning_rate": 4.498964061502033e-05,
"loss": 1.7648,
"step": 540500
},
{
"epoch": 7.79,
"learning_rate": 4.4937713873267595e-05,
"loss": 1.763,
"step": 541000
},
{
"epoch": 7.79,
"learning_rate": 4.488599483848187e-05,
"loss": 1.7672,
"step": 541500
},
{
"epoch": 7.8,
"learning_rate": 4.4834068096729134e-05,
"loss": 1.7667,
"step": 542000
},
{
"epoch": 7.81,
"learning_rate": 4.478234906194341e-05,
"loss": 1.7627,
"step": 542500
},
{
"epoch": 7.82,
"learning_rate": 4.473042232019068e-05,
"loss": 1.7636,
"step": 543000
},
{
"epoch": 7.82,
"learning_rate": 4.467870328540496e-05,
"loss": 1.7627,
"step": 543500
},
{
"epoch": 7.83,
"learning_rate": 4.462677654365221e-05,
"loss": 1.7661,
"step": 544000
},
{
"epoch": 7.84,
"learning_rate": 4.457505750886649e-05,
"loss": 1.7573,
"step": 544500
},
{
"epoch": 7.84,
"learning_rate": 4.452313076711376e-05,
"loss": 1.7623,
"step": 545000
},
{
"epoch": 7.85,
"learning_rate": 4.4471411732328036e-05,
"loss": 1.7647,
"step": 545500
},
{
"epoch": 7.86,
"learning_rate": 4.44194849905753e-05,
"loss": 1.7603,
"step": 546000
},
{
"epoch": 7.87,
"learning_rate": 4.4367765955789575e-05,
"loss": 1.7626,
"step": 546500
},
{
"epoch": 7.87,
"learning_rate": 4.4315839214036844e-05,
"loss": 1.7634,
"step": 547000
},
{
"epoch": 7.88,
"learning_rate": 4.4264120179251115e-05,
"loss": 1.7607,
"step": 547500
},
{
"epoch": 7.89,
"learning_rate": 4.4212193437498376e-05,
"loss": 1.7629,
"step": 548000
},
{
"epoch": 7.9,
"learning_rate": 4.4160474402712654e-05,
"loss": 1.7633,
"step": 548500
},
{
"epoch": 7.9,
"learning_rate": 4.410854766095992e-05,
"loss": 1.7592,
"step": 549000
},
{
"epoch": 7.91,
"learning_rate": 4.405682862617419e-05,
"loss": 1.7629,
"step": 549500
},
{
"epoch": 7.92,
"learning_rate": 4.400490188442146e-05,
"loss": 1.7628,
"step": 550000
},
{
"epoch": 7.92,
"learning_rate": 4.395318284963574e-05,
"loss": 1.759,
"step": 550500
},
{
"epoch": 7.93,
"learning_rate": 4.3901256107883e-05,
"loss": 1.7644,
"step": 551000
},
{
"epoch": 7.94,
"learning_rate": 4.384953707309727e-05,
"loss": 1.7658,
"step": 551500
},
{
"epoch": 7.95,
"learning_rate": 4.379761033134454e-05,
"loss": 1.7626,
"step": 552000
},
{
"epoch": 7.95,
"learning_rate": 4.374589129655882e-05,
"loss": 1.761,
"step": 552500
},
{
"epoch": 7.96,
"learning_rate": 4.3693964554806086e-05,
"loss": 1.762,
"step": 553000
},
{
"epoch": 7.97,
"learning_rate": 4.364224552002036e-05,
"loss": 1.761,
"step": 553500
},
{
"epoch": 7.97,
"learning_rate": 4.3590318778267625e-05,
"loss": 1.7568,
"step": 554000
},
{
"epoch": 7.98,
"learning_rate": 4.35385997434819e-05,
"loss": 1.7623,
"step": 554500
},
{
"epoch": 7.99,
"learning_rate": 4.3486673001729165e-05,
"loss": 1.7599,
"step": 555000
},
{
"epoch": 8.0,
"learning_rate": 4.3434953966943436e-05,
"loss": 1.7623,
"step": 555500
},
{
"epoch": 8.0,
"eval_accuracy": 0.6579531596492939,
"eval_loss": 1.6328125,
"eval_runtime": 654.7599,
"eval_samples_per_second": 823.106,
"eval_steps_per_second": 34.297,
"step": 555784
},
{
"epoch": 8.0,
"learning_rate": 4.3383027225190704e-05,
"loss": 1.7571,
"step": 556000
},
{
"epoch": 8.01,
"learning_rate": 4.333130819040498e-05,
"loss": 1.7592,
"step": 556500
},
{
"epoch": 8.02,
"learning_rate": 4.327938144865224e-05,
"loss": 1.758,
"step": 557000
},
{
"epoch": 8.02,
"learning_rate": 4.322766241386652e-05,
"loss": 1.7543,
"step": 557500
},
{
"epoch": 8.03,
"learning_rate": 4.317573567211378e-05,
"loss": 1.7562,
"step": 558000
},
{
"epoch": 8.04,
"learning_rate": 4.312401663732806e-05,
"loss": 1.756,
"step": 558500
},
{
"epoch": 8.05,
"learning_rate": 4.307208989557532e-05,
"loss": 1.7556,
"step": 559000
},
{
"epoch": 8.05,
"learning_rate": 4.30203708607896e-05,
"loss": 1.7592,
"step": 559500
},
{
"epoch": 8.06,
"learning_rate": 4.296844411903687e-05,
"loss": 1.7575,
"step": 560000
},
{
"epoch": 8.07,
"learning_rate": 4.291672508425114e-05,
"loss": 1.7559,
"step": 560500
},
{
"epoch": 8.08,
"learning_rate": 4.286479834249841e-05,
"loss": 1.7574,
"step": 561000
},
{
"epoch": 8.08,
"learning_rate": 4.2813079307712685e-05,
"loss": 1.7577,
"step": 561500
},
{
"epoch": 8.09,
"learning_rate": 4.2761152565959946e-05,
"loss": 1.7553,
"step": 562000
},
{
"epoch": 8.1,
"learning_rate": 4.270943353117422e-05,
"loss": 1.758,
"step": 562500
},
{
"epoch": 8.1,
"learning_rate": 4.2657506789421486e-05,
"loss": 1.7571,
"step": 563000
},
{
"epoch": 8.11,
"learning_rate": 4.260578775463576e-05,
"loss": 1.7599,
"step": 563500
},
{
"epoch": 8.12,
"learning_rate": 4.255386101288303e-05,
"loss": 1.7541,
"step": 564000
},
{
"epoch": 8.13,
"learning_rate": 4.25021419780973e-05,
"loss": 1.7598,
"step": 564500
},
{
"epoch": 8.13,
"learning_rate": 4.2450215236344564e-05,
"loss": 1.7611,
"step": 565000
},
{
"epoch": 8.14,
"learning_rate": 4.239849620155884e-05,
"loss": 1.7534,
"step": 565500
},
{
"epoch": 8.15,
"learning_rate": 4.234656945980611e-05,
"loss": 1.7584,
"step": 566000
},
{
"epoch": 8.15,
"learning_rate": 4.229485042502038e-05,
"loss": 1.7602,
"step": 566500
},
{
"epoch": 8.16,
"learning_rate": 4.224292368326765e-05,
"loss": 1.7593,
"step": 567000
},
{
"epoch": 8.17,
"learning_rate": 4.219120464848193e-05,
"loss": 1.7546,
"step": 567500
},
{
"epoch": 8.18,
"learning_rate": 4.213927790672919e-05,
"loss": 1.7557,
"step": 568000
},
{
"epoch": 8.18,
"learning_rate": 4.2087558871943466e-05,
"loss": 1.7597,
"step": 568500
},
{
"epoch": 8.19,
"learning_rate": 4.203563213019073e-05,
"loss": 1.7578,
"step": 569000
},
{
"epoch": 8.2,
"learning_rate": 4.1983913095405006e-05,
"loss": 1.7572,
"step": 569500
},
{
"epoch": 8.2,
"learning_rate": 4.193198635365227e-05,
"loss": 1.7593,
"step": 570000
},
{
"epoch": 8.21,
"learning_rate": 4.1880267318866545e-05,
"loss": 1.7553,
"step": 570500
},
{
"epoch": 8.22,
"learning_rate": 4.182834057711381e-05,
"loss": 1.76,
"step": 571000
},
{
"epoch": 8.23,
"learning_rate": 4.1776621542328084e-05,
"loss": 1.7588,
"step": 571500
},
{
"epoch": 8.23,
"learning_rate": 4.1724694800575346e-05,
"loss": 1.7568,
"step": 572000
},
{
"epoch": 8.24,
"learning_rate": 4.1672975765789623e-05,
"loss": 1.7573,
"step": 572500
},
{
"epoch": 8.25,
"learning_rate": 4.162104902403689e-05,
"loss": 1.7541,
"step": 573000
},
{
"epoch": 8.26,
"learning_rate": 4.156932998925116e-05,
"loss": 1.7586,
"step": 573500
},
{
"epoch": 8.26,
"learning_rate": 4.151740324749843e-05,
"loss": 1.7587,
"step": 574000
},
{
"epoch": 8.27,
"learning_rate": 4.146568421271271e-05,
"loss": 1.7537,
"step": 574500
},
{
"epoch": 8.28,
"learning_rate": 4.141375747095998e-05,
"loss": 1.7526,
"step": 575000
},
{
"epoch": 8.28,
"learning_rate": 4.136203843617425e-05,
"loss": 1.7541,
"step": 575500
},
{
"epoch": 8.29,
"learning_rate": 4.131011169442151e-05,
"loss": 1.7572,
"step": 576000
},
{
"epoch": 8.3,
"learning_rate": 4.125839265963579e-05,
"loss": 1.7557,
"step": 576500
},
{
"epoch": 8.31,
"learning_rate": 4.1206465917883056e-05,
"loss": 1.7518,
"step": 577000
},
{
"epoch": 8.31,
"learning_rate": 4.1154746883097327e-05,
"loss": 1.7542,
"step": 577500
},
{
"epoch": 8.32,
"learning_rate": 4.1102820141344595e-05,
"loss": 1.755,
"step": 578000
},
{
"epoch": 8.33,
"learning_rate": 4.105110110655887e-05,
"loss": 1.7548,
"step": 578500
},
{
"epoch": 8.33,
"learning_rate": 4.0999174364806134e-05,
"loss": 1.7575,
"step": 579000
},
{
"epoch": 8.34,
"learning_rate": 4.0947455330020405e-05,
"loss": 1.755,
"step": 579500
},
{
"epoch": 8.35,
"learning_rate": 4.0895528588267673e-05,
"loss": 1.7511,
"step": 580000
},
{
"epoch": 8.36,
"learning_rate": 4.084380955348195e-05,
"loss": 1.7565,
"step": 580500
},
{
"epoch": 8.36,
"learning_rate": 4.079188281172921e-05,
"loss": 1.7583,
"step": 581000
},
{
"epoch": 8.37,
"learning_rate": 4.074016377694349e-05,
"loss": 1.7517,
"step": 581500
},
{
"epoch": 8.38,
"learning_rate": 4.068823703519076e-05,
"loss": 1.7596,
"step": 582000
},
{
"epoch": 8.38,
"learning_rate": 4.063651800040503e-05,
"loss": 1.7544,
"step": 582500
},
{
"epoch": 8.39,
"learning_rate": 4.058459125865229e-05,
"loss": 1.7576,
"step": 583000
},
{
"epoch": 8.4,
"learning_rate": 4.053287222386657e-05,
"loss": 1.7514,
"step": 583500
},
{
"epoch": 8.41,
"learning_rate": 4.048094548211384e-05,
"loss": 1.7555,
"step": 584000
},
{
"epoch": 8.41,
"learning_rate": 4.042922644732811e-05,
"loss": 1.7529,
"step": 584500
},
{
"epoch": 8.42,
"learning_rate": 4.0377299705575377e-05,
"loss": 1.7569,
"step": 585000
},
{
"epoch": 8.43,
"learning_rate": 4.0325580670789654e-05,
"loss": 1.75,
"step": 585500
},
{
"epoch": 8.43,
"learning_rate": 4.0273653929036916e-05,
"loss": 1.7545,
"step": 586000
},
{
"epoch": 8.44,
"learning_rate": 4.022193489425119e-05,
"loss": 1.755,
"step": 586500
},
{
"epoch": 8.45,
"learning_rate": 4.0170008152498455e-05,
"loss": 1.7537,
"step": 587000
},
{
"epoch": 8.46,
"learning_rate": 4.011828911771273e-05,
"loss": 1.7558,
"step": 587500
},
{
"epoch": 8.46,
"learning_rate": 4.006636237596e-05,
"loss": 1.7521,
"step": 588000
},
{
"epoch": 8.47,
"learning_rate": 4.001464334117427e-05,
"loss": 1.7522,
"step": 588500
},
{
"epoch": 8.48,
"learning_rate": 3.996271659942154e-05,
"loss": 1.7573,
"step": 589000
},
{
"epoch": 8.49,
"learning_rate": 3.991099756463582e-05,
"loss": 1.7578,
"step": 589500
},
{
"epoch": 8.49,
"learning_rate": 3.985907082288308e-05,
"loss": 1.7547,
"step": 590000
},
{
"epoch": 8.5,
"learning_rate": 3.980735178809735e-05,
"loss": 1.7534,
"step": 590500
},
{
"epoch": 8.51,
"learning_rate": 3.975542504634462e-05,
"loss": 1.7522,
"step": 591000
},
{
"epoch": 8.51,
"learning_rate": 3.97037060115589e-05,
"loss": 1.7569,
"step": 591500
},
{
"epoch": 8.52,
"learning_rate": 3.965177926980616e-05,
"loss": 1.755,
"step": 592000
},
{
"epoch": 8.53,
"learning_rate": 3.9600060235020436e-05,
"loss": 1.7506,
"step": 592500
},
{
"epoch": 8.54,
"learning_rate": 3.95481334932677e-05,
"loss": 1.7538,
"step": 593000
},
{
"epoch": 8.54,
"learning_rate": 3.9496414458481975e-05,
"loss": 1.7516,
"step": 593500
},
{
"epoch": 8.55,
"learning_rate": 3.944448771672924e-05,
"loss": 1.753,
"step": 594000
},
{
"epoch": 8.56,
"learning_rate": 3.9392768681943514e-05,
"loss": 1.7505,
"step": 594500
},
{
"epoch": 8.56,
"learning_rate": 3.934084194019078e-05,
"loss": 1.751,
"step": 595000
},
{
"epoch": 8.57,
"learning_rate": 3.9289122905405054e-05,
"loss": 1.7484,
"step": 595500
},
{
"epoch": 8.58,
"learning_rate": 3.923719616365232e-05,
"loss": 1.7533,
"step": 596000
},
{
"epoch": 8.59,
"learning_rate": 3.91854771288666e-05,
"loss": 1.7547,
"step": 596500
},
{
"epoch": 8.59,
"learning_rate": 3.913355038711386e-05,
"loss": 1.7507,
"step": 597000
},
{
"epoch": 8.6,
"learning_rate": 3.908183135232813e-05,
"loss": 1.751,
"step": 597500
},
{
"epoch": 8.61,
"learning_rate": 3.90299046105754e-05,
"loss": 1.7521,
"step": 598000
},
{
"epoch": 8.61,
"learning_rate": 3.897818557578968e-05,
"loss": 1.7497,
"step": 598500
},
{
"epoch": 8.62,
"learning_rate": 3.8926258834036947e-05,
"loss": 1.7492,
"step": 599000
},
{
"epoch": 8.63,
"learning_rate": 3.887453979925122e-05,
"loss": 1.7515,
"step": 599500
},
{
"epoch": 8.64,
"learning_rate": 3.882261305749848e-05,
"loss": 1.7518,
"step": 600000
},
{
"epoch": 8.64,
"learning_rate": 3.877089402271276e-05,
"loss": 1.7518,
"step": 600500
},
{
"epoch": 8.65,
"learning_rate": 3.8718967280960025e-05,
"loss": 1.7552,
"step": 601000
},
{
"epoch": 8.66,
"learning_rate": 3.8667248246174296e-05,
"loss": 1.7506,
"step": 601500
},
{
"epoch": 8.67,
"learning_rate": 3.8615321504421564e-05,
"loss": 1.752,
"step": 602000
},
{
"epoch": 8.67,
"learning_rate": 3.856360246963584e-05,
"loss": 1.7484,
"step": 602500
},
{
"epoch": 8.68,
"learning_rate": 3.8511675727883104e-05,
"loss": 1.7495,
"step": 603000
},
{
"epoch": 8.69,
"learning_rate": 3.845995669309738e-05,
"loss": 1.7487,
"step": 603500
},
{
"epoch": 8.69,
"learning_rate": 3.840802995134464e-05,
"loss": 1.7505,
"step": 604000
},
{
"epoch": 8.7,
"learning_rate": 3.835631091655892e-05,
"loss": 1.7505,
"step": 604500
},
{
"epoch": 8.71,
"learning_rate": 3.830438417480618e-05,
"loss": 1.7483,
"step": 605000
},
{
"epoch": 8.72,
"learning_rate": 3.825266514002046e-05,
"loss": 1.7491,
"step": 605500
},
{
"epoch": 8.72,
"learning_rate": 3.820073839826773e-05,
"loss": 1.7482,
"step": 606000
},
{
"epoch": 8.73,
"learning_rate": 3.8149019363482e-05,
"loss": 1.7522,
"step": 606500
},
{
"epoch": 8.74,
"learning_rate": 3.809709262172926e-05,
"loss": 1.7476,
"step": 607000
},
{
"epoch": 8.74,
"learning_rate": 3.8045373586943545e-05,
"loss": 1.7543,
"step": 607500
},
{
"epoch": 8.75,
"learning_rate": 3.799344684519081e-05,
"loss": 1.7492,
"step": 608000
},
{
"epoch": 8.76,
"learning_rate": 3.794172781040508e-05,
"loss": 1.7532,
"step": 608500
},
{
"epoch": 8.77,
"learning_rate": 3.7889801068652346e-05,
"loss": 1.7484,
"step": 609000
},
{
"epoch": 8.77,
"learning_rate": 3.7838082033866624e-05,
"loss": 1.7506,
"step": 609500
},
{
"epoch": 8.78,
"learning_rate": 3.778615529211389e-05,
"loss": 1.7525,
"step": 610000
},
{
"epoch": 8.79,
"learning_rate": 3.773443625732816e-05,
"loss": 1.7502,
"step": 610500
},
{
"epoch": 8.79,
"learning_rate": 3.7682509515575425e-05,
"loss": 1.7464,
"step": 611000
},
{
"epoch": 8.8,
"learning_rate": 3.76307904807897e-05,
"loss": 1.7522,
"step": 611500
},
{
"epoch": 8.81,
"learning_rate": 3.757886373903697e-05,
"loss": 1.7506,
"step": 612000
},
{
"epoch": 8.82,
"learning_rate": 3.752714470425124e-05,
"loss": 1.7496,
"step": 612500
},
{
"epoch": 8.82,
"learning_rate": 3.747521796249851e-05,
"loss": 1.7477,
"step": 613000
},
{
"epoch": 8.83,
"learning_rate": 3.742349892771279e-05,
"loss": 1.75,
"step": 613500
},
{
"epoch": 8.84,
"learning_rate": 3.737157218596005e-05,
"loss": 1.7512,
"step": 614000
},
{
"epoch": 8.85,
"learning_rate": 3.731985315117433e-05,
"loss": 1.7449,
"step": 614500
},
{
"epoch": 8.85,
"learning_rate": 3.726792640942159e-05,
"loss": 1.7519,
"step": 615000
},
{
"epoch": 8.86,
"learning_rate": 3.7216207374635866e-05,
"loss": 1.7481,
"step": 615500
},
{
"epoch": 8.87,
"learning_rate": 3.716428063288313e-05,
"loss": 1.7484,
"step": 616000
},
{
"epoch": 8.87,
"learning_rate": 3.7112561598097405e-05,
"loss": 1.7471,
"step": 616500
},
{
"epoch": 8.88,
"learning_rate": 3.7060634856344674e-05,
"loss": 1.7532,
"step": 617000
},
{
"epoch": 8.89,
"learning_rate": 3.700891582155895e-05,
"loss": 1.7484,
"step": 617500
},
{
"epoch": 8.9,
"learning_rate": 3.6956989079806206e-05,
"loss": 1.743,
"step": 618000
},
{
"epoch": 8.9,
"learning_rate": 3.6905270045020484e-05,
"loss": 1.751,
"step": 618500
},
{
"epoch": 8.91,
"learning_rate": 3.685334330326775e-05,
"loss": 1.7484,
"step": 619000
},
{
"epoch": 8.92,
"learning_rate": 3.680162426848203e-05,
"loss": 1.7469,
"step": 619500
},
{
"epoch": 8.92,
"learning_rate": 3.674969752672929e-05,
"loss": 1.7493,
"step": 620000
},
{
"epoch": 8.93,
"learning_rate": 3.669797849194357e-05,
"loss": 1.7495,
"step": 620500
},
{
"epoch": 8.94,
"learning_rate": 3.664605175019084e-05,
"loss": 1.7462,
"step": 621000
},
{
"epoch": 8.95,
"learning_rate": 3.659433271540511e-05,
"loss": 1.7477,
"step": 621500
},
{
"epoch": 8.95,
"learning_rate": 3.654240597365237e-05,
"loss": 1.7481,
"step": 622000
},
{
"epoch": 8.96,
"learning_rate": 3.649068693886665e-05,
"loss": 1.7445,
"step": 622500
},
{
"epoch": 8.97,
"learning_rate": 3.6438760197113916e-05,
"loss": 1.7434,
"step": 623000
},
{
"epoch": 8.97,
"learning_rate": 3.638704116232819e-05,
"loss": 1.7434,
"step": 623500
},
{
"epoch": 8.98,
"learning_rate": 3.6335114420575455e-05,
"loss": 1.7473,
"step": 624000
},
{
"epoch": 8.99,
"learning_rate": 3.628339538578973e-05,
"loss": 1.7411,
"step": 624500
},
{
"epoch": 9.0,
"learning_rate": 3.6231468644036995e-05,
"loss": 1.7497,
"step": 625000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6605183065873255,
"eval_loss": 1.6181640625,
"eval_runtime": 652.1948,
"eval_samples_per_second": 826.344,
"eval_steps_per_second": 34.431,
"step": 625257
},
{
"epoch": 9.0,
"learning_rate": 3.6179749609251266e-05,
"loss": 1.7505,
"step": 625500
},
{
"epoch": 9.01,
"learning_rate": 3.6127822867498534e-05,
"loss": 1.7437,
"step": 626000
},
{
"epoch": 9.02,
"learning_rate": 3.607610383271281e-05,
"loss": 1.7475,
"step": 626500
},
{
"epoch": 9.03,
"learning_rate": 3.602417709096007e-05,
"loss": 1.7475,
"step": 627000
},
{
"epoch": 9.03,
"learning_rate": 3.597245805617435e-05,
"loss": 1.7413,
"step": 627500
},
{
"epoch": 9.04,
"learning_rate": 3.592053131442162e-05,
"loss": 1.7411,
"step": 628000
},
{
"epoch": 9.05,
"learning_rate": 3.58688122796359e-05,
"loss": 1.7475,
"step": 628500
},
{
"epoch": 9.05,
"learning_rate": 3.581688553788315e-05,
"loss": 1.7414,
"step": 629000
},
{
"epoch": 9.06,
"learning_rate": 3.576516650309743e-05,
"loss": 1.7422,
"step": 629500
},
{
"epoch": 9.07,
"learning_rate": 3.57132397613447e-05,
"loss": 1.7463,
"step": 630000
},
{
"epoch": 9.08,
"learning_rate": 3.5661520726558976e-05,
"loss": 1.7413,
"step": 630500
},
{
"epoch": 9.08,
"learning_rate": 3.560959398480624e-05,
"loss": 1.7422,
"step": 631000
},
{
"epoch": 9.09,
"learning_rate": 3.5557874950020515e-05,
"loss": 1.7433,
"step": 631500
},
{
"epoch": 9.1,
"learning_rate": 3.5505948208267776e-05,
"loss": 1.7418,
"step": 632000
},
{
"epoch": 9.1,
"learning_rate": 3.5454229173482054e-05,
"loss": 1.741,
"step": 632500
},
{
"epoch": 9.11,
"learning_rate": 3.5402302431729316e-05,
"loss": 1.7441,
"step": 633000
},
{
"epoch": 9.12,
"learning_rate": 3.535058339694359e-05,
"loss": 1.7457,
"step": 633500
},
{
"epoch": 9.13,
"learning_rate": 3.529865665519086e-05,
"loss": 1.7462,
"step": 634000
},
{
"epoch": 9.13,
"learning_rate": 3.524693762040513e-05,
"loss": 1.742,
"step": 634500
},
{
"epoch": 9.14,
"learning_rate": 3.51950108786524e-05,
"loss": 1.7441,
"step": 635000
},
{
"epoch": 9.15,
"learning_rate": 3.514329184386668e-05,
"loss": 1.7426,
"step": 635500
},
{
"epoch": 9.15,
"learning_rate": 3.509136510211394e-05,
"loss": 1.7427,
"step": 636000
},
{
"epoch": 9.16,
"learning_rate": 3.503964606732821e-05,
"loss": 1.7409,
"step": 636500
},
{
"epoch": 9.17,
"learning_rate": 3.498771932557548e-05,
"loss": 1.7426,
"step": 637000
},
{
"epoch": 9.18,
"learning_rate": 3.493600029078976e-05,
"loss": 1.7458,
"step": 637500
},
{
"epoch": 9.18,
"learning_rate": 3.488407354903702e-05,
"loss": 1.743,
"step": 638000
},
{
"epoch": 9.19,
"learning_rate": 3.4832354514251296e-05,
"loss": 1.7433,
"step": 638500
},
{
"epoch": 9.2,
"learning_rate": 3.478042777249856e-05,
"loss": 1.7444,
"step": 639000
},
{
"epoch": 9.21,
"learning_rate": 3.4728708737712836e-05,
"loss": 1.7398,
"step": 639500
},
{
"epoch": 9.21,
"learning_rate": 3.46767819959601e-05,
"loss": 1.7425,
"step": 640000
},
{
"epoch": 9.22,
"learning_rate": 3.4625062961174375e-05,
"loss": 1.7447,
"step": 640500
},
{
"epoch": 9.23,
"learning_rate": 3.457313621942164e-05,
"loss": 1.7396,
"step": 641000
},
{
"epoch": 9.23,
"learning_rate": 3.452141718463592e-05,
"loss": 1.7431,
"step": 641500
},
{
"epoch": 9.24,
"learning_rate": 3.446949044288318e-05,
"loss": 1.7428,
"step": 642000
},
{
"epoch": 9.25,
"learning_rate": 3.441777140809746e-05,
"loss": 1.7444,
"step": 642500
},
{
"epoch": 9.26,
"learning_rate": 3.436584466634472e-05,
"loss": 1.7447,
"step": 643000
},
{
"epoch": 9.26,
"learning_rate": 3.4314125631559e-05,
"loss": 1.7434,
"step": 643500
},
{
"epoch": 9.27,
"learning_rate": 3.426219888980626e-05,
"loss": 1.7433,
"step": 644000
},
{
"epoch": 9.28,
"learning_rate": 3.421047985502054e-05,
"loss": 1.7378,
"step": 644500
},
{
"epoch": 9.28,
"learning_rate": 3.415855311326781e-05,
"loss": 1.7408,
"step": 645000
},
{
"epoch": 9.29,
"learning_rate": 3.410683407848208e-05,
"loss": 1.7458,
"step": 645500
},
{
"epoch": 9.3,
"learning_rate": 3.405490733672934e-05,
"loss": 1.742,
"step": 646000
},
{
"epoch": 9.31,
"learning_rate": 3.400318830194362e-05,
"loss": 1.7375,
"step": 646500
},
{
"epoch": 9.31,
"learning_rate": 3.3951261560190886e-05,
"loss": 1.7436,
"step": 647000
},
{
"epoch": 9.32,
"learning_rate": 3.389954252540516e-05,
"loss": 1.7416,
"step": 647500
},
{
"epoch": 9.33,
"learning_rate": 3.3847615783652425e-05,
"loss": 1.7445,
"step": 648000
},
{
"epoch": 9.33,
"learning_rate": 3.37958967488667e-05,
"loss": 1.7424,
"step": 648500
},
{
"epoch": 9.34,
"learning_rate": 3.3743970007113964e-05,
"loss": 1.744,
"step": 649000
},
{
"epoch": 9.35,
"learning_rate": 3.369225097232824e-05,
"loss": 1.7392,
"step": 649500
},
{
"epoch": 9.36,
"learning_rate": 3.3640324230575503e-05,
"loss": 1.7414,
"step": 650000
},
{
"epoch": 9.36,
"learning_rate": 3.358860519578978e-05,
"loss": 1.7409,
"step": 650500
},
{
"epoch": 9.37,
"learning_rate": 3.353667845403704e-05,
"loss": 1.7405,
"step": 651000
},
{
"epoch": 9.38,
"learning_rate": 3.348495941925132e-05,
"loss": 1.7461,
"step": 651500
},
{
"epoch": 9.38,
"learning_rate": 3.343303267749859e-05,
"loss": 1.7405,
"step": 652000
},
{
"epoch": 9.39,
"learning_rate": 3.3381313642712867e-05,
"loss": 1.7434,
"step": 652500
},
{
"epoch": 9.4,
"learning_rate": 3.332938690096012e-05,
"loss": 1.738,
"step": 653000
},
{
"epoch": 9.41,
"learning_rate": 3.32776678661744e-05,
"loss": 1.7393,
"step": 653500
},
{
"epoch": 9.41,
"learning_rate": 3.322574112442167e-05,
"loss": 1.7426,
"step": 654000
},
{
"epoch": 9.42,
"learning_rate": 3.3174022089635945e-05,
"loss": 1.7413,
"step": 654500
},
{
"epoch": 9.43,
"learning_rate": 3.3122095347883207e-05,
"loss": 1.7395,
"step": 655000
},
{
"epoch": 9.44,
"learning_rate": 3.3070376313097484e-05,
"loss": 1.7428,
"step": 655500
},
{
"epoch": 9.44,
"learning_rate": 3.301844957134475e-05,
"loss": 1.7407,
"step": 656000
},
{
"epoch": 9.45,
"learning_rate": 3.2966730536559024e-05,
"loss": 1.7388,
"step": 656500
},
{
"epoch": 9.46,
"learning_rate": 3.2914803794806285e-05,
"loss": 1.7394,
"step": 657000
},
{
"epoch": 9.46,
"learning_rate": 3.286308476002056e-05,
"loss": 1.7391,
"step": 657500
},
{
"epoch": 9.47,
"learning_rate": 3.281115801826783e-05,
"loss": 1.7433,
"step": 658000
},
{
"epoch": 9.48,
"learning_rate": 3.27594389834821e-05,
"loss": 1.7433,
"step": 658500
},
{
"epoch": 9.49,
"learning_rate": 3.270751224172937e-05,
"loss": 1.7395,
"step": 659000
},
{
"epoch": 9.49,
"learning_rate": 3.265579320694365e-05,
"loss": 1.746,
"step": 659500
},
{
"epoch": 9.5,
"learning_rate": 3.260386646519091e-05,
"loss": 1.7359,
"step": 660000
},
{
"epoch": 9.51,
"learning_rate": 3.255214743040518e-05,
"loss": 1.7443,
"step": 660500
},
{
"epoch": 9.51,
"learning_rate": 3.250022068865245e-05,
"loss": 1.7441,
"step": 661000
},
{
"epoch": 9.52,
"learning_rate": 3.244850165386673e-05,
"loss": 1.733,
"step": 661500
},
{
"epoch": 9.53,
"learning_rate": 3.2396574912113995e-05,
"loss": 1.7359,
"step": 662000
},
{
"epoch": 9.54,
"learning_rate": 3.2344855877328266e-05,
"loss": 1.7396,
"step": 662500
},
{
"epoch": 9.54,
"learning_rate": 3.2292929135575534e-05,
"loss": 1.7425,
"step": 663000
},
{
"epoch": 9.55,
"learning_rate": 3.224121010078981e-05,
"loss": 1.7378,
"step": 663500
},
{
"epoch": 9.56,
"learning_rate": 3.2189283359037074e-05,
"loss": 1.735,
"step": 664000
},
{
"epoch": 9.56,
"learning_rate": 3.2137564324251344e-05,
"loss": 1.7372,
"step": 664500
},
{
"epoch": 9.57,
"learning_rate": 3.208563758249861e-05,
"loss": 1.742,
"step": 665000
},
{
"epoch": 9.58,
"learning_rate": 3.203391854771289e-05,
"loss": 1.7396,
"step": 665500
},
{
"epoch": 9.59,
"learning_rate": 3.198199180596015e-05,
"loss": 1.7375,
"step": 666000
},
{
"epoch": 9.59,
"learning_rate": 3.193027277117443e-05,
"loss": 1.7418,
"step": 666500
},
{
"epoch": 9.6,
"learning_rate": 3.187834602942169e-05,
"loss": 1.7416,
"step": 667000
},
{
"epoch": 9.61,
"learning_rate": 3.182662699463597e-05,
"loss": 1.7412,
"step": 667500
},
{
"epoch": 9.62,
"learning_rate": 3.177470025288323e-05,
"loss": 1.7362,
"step": 668000
},
{
"epoch": 9.62,
"learning_rate": 3.172298121809751e-05,
"loss": 1.739,
"step": 668500
},
{
"epoch": 9.63,
"learning_rate": 3.167105447634478e-05,
"loss": 1.7383,
"step": 669000
},
{
"epoch": 9.64,
"learning_rate": 3.161933544155905e-05,
"loss": 1.7393,
"step": 669500
},
{
"epoch": 9.64,
"learning_rate": 3.1567408699806316e-05,
"loss": 1.7413,
"step": 670000
},
{
"epoch": 9.65,
"learning_rate": 3.1515689665020594e-05,
"loss": 1.74,
"step": 670500
},
{
"epoch": 9.66,
"learning_rate": 3.1463762923267855e-05,
"loss": 1.7382,
"step": 671000
},
{
"epoch": 9.67,
"learning_rate": 3.1412043888482126e-05,
"loss": 1.7352,
"step": 671500
},
{
"epoch": 9.67,
"learning_rate": 3.1360117146729394e-05,
"loss": 1.7344,
"step": 672000
},
{
"epoch": 9.68,
"learning_rate": 3.130839811194367e-05,
"loss": 1.7389,
"step": 672500
},
{
"epoch": 9.69,
"learning_rate": 3.125647137019094e-05,
"loss": 1.7362,
"step": 673000
},
{
"epoch": 9.69,
"learning_rate": 3.120475233540521e-05,
"loss": 1.7389,
"step": 673500
},
{
"epoch": 9.7,
"learning_rate": 3.115282559365248e-05,
"loss": 1.7387,
"step": 674000
},
{
"epoch": 9.71,
"learning_rate": 3.110110655886676e-05,
"loss": 1.7359,
"step": 674500
},
{
"epoch": 9.72,
"learning_rate": 3.104917981711402e-05,
"loss": 1.7372,
"step": 675000
},
{
"epoch": 9.72,
"learning_rate": 3.099746078232829e-05,
"loss": 1.737,
"step": 675500
},
{
"epoch": 9.73,
"learning_rate": 3.094553404057556e-05,
"loss": 1.7345,
"step": 676000
},
{
"epoch": 9.74,
"learning_rate": 3.0893815005789836e-05,
"loss": 1.7387,
"step": 676500
},
{
"epoch": 9.74,
"learning_rate": 3.08418882640371e-05,
"loss": 1.7383,
"step": 677000
},
{
"epoch": 9.75,
"learning_rate": 3.0790169229251375e-05,
"loss": 1.7341,
"step": 677500
},
{
"epoch": 9.76,
"learning_rate": 3.073824248749864e-05,
"loss": 1.7377,
"step": 678000
},
{
"epoch": 9.77,
"learning_rate": 3.0686523452712915e-05,
"loss": 1.7385,
"step": 678500
},
{
"epoch": 9.77,
"learning_rate": 3.0634596710960176e-05,
"loss": 1.7367,
"step": 679000
},
{
"epoch": 9.78,
"learning_rate": 3.0582877676174454e-05,
"loss": 1.737,
"step": 679500
},
{
"epoch": 9.79,
"learning_rate": 3.053095093442172e-05,
"loss": 1.733,
"step": 680000
},
{
"epoch": 9.8,
"learning_rate": 3.0479231899635996e-05,
"loss": 1.7389,
"step": 680500
},
{
"epoch": 9.8,
"learning_rate": 3.042730515788326e-05,
"loss": 1.7415,
"step": 681000
},
{
"epoch": 9.81,
"learning_rate": 3.037558612309754e-05,
"loss": 1.7378,
"step": 681500
},
{
"epoch": 9.82,
"learning_rate": 3.0323659381344797e-05,
"loss": 1.7375,
"step": 682000
},
{
"epoch": 9.82,
"learning_rate": 3.0271940346559075e-05,
"loss": 1.7362,
"step": 682500
},
{
"epoch": 9.83,
"learning_rate": 3.022001360480634e-05,
"loss": 1.7365,
"step": 683000
},
{
"epoch": 9.84,
"learning_rate": 3.0168294570020618e-05,
"loss": 1.7351,
"step": 683500
},
{
"epoch": 9.85,
"learning_rate": 3.0116367828267883e-05,
"loss": 1.7348,
"step": 684000
},
{
"epoch": 9.85,
"learning_rate": 3.0064648793482157e-05,
"loss": 1.7356,
"step": 684500
},
{
"epoch": 9.86,
"learning_rate": 3.001272205172942e-05,
"loss": 1.7365,
"step": 685000
},
{
"epoch": 9.87,
"learning_rate": 2.9961003016943696e-05,
"loss": 1.7343,
"step": 685500
},
{
"epoch": 9.87,
"learning_rate": 2.990907627519096e-05,
"loss": 1.7377,
"step": 686000
},
{
"epoch": 9.88,
"learning_rate": 2.9857357240405235e-05,
"loss": 1.7389,
"step": 686500
},
{
"epoch": 9.89,
"learning_rate": 2.9805430498652504e-05,
"loss": 1.7355,
"step": 687000
},
{
"epoch": 9.9,
"learning_rate": 2.9753711463866778e-05,
"loss": 1.7348,
"step": 687500
},
{
"epoch": 9.9,
"learning_rate": 2.9701784722114046e-05,
"loss": 1.7332,
"step": 688000
},
{
"epoch": 9.91,
"learning_rate": 2.965006568732832e-05,
"loss": 1.733,
"step": 688500
},
{
"epoch": 9.92,
"learning_rate": 2.9598138945575582e-05,
"loss": 1.7346,
"step": 689000
},
{
"epoch": 9.92,
"learning_rate": 2.9546419910789857e-05,
"loss": 1.7339,
"step": 689500
},
{
"epoch": 9.93,
"learning_rate": 2.9494493169037125e-05,
"loss": 1.7314,
"step": 690000
},
{
"epoch": 9.94,
"learning_rate": 2.94427741342514e-05,
"loss": 1.7375,
"step": 690500
},
{
"epoch": 9.95,
"learning_rate": 2.9390847392498668e-05,
"loss": 1.7345,
"step": 691000
},
{
"epoch": 9.95,
"learning_rate": 2.9339128357712942e-05,
"loss": 1.7369,
"step": 691500
},
{
"epoch": 9.96,
"learning_rate": 2.9287201615960204e-05,
"loss": 1.7318,
"step": 692000
},
{
"epoch": 9.97,
"learning_rate": 2.9235482581174478e-05,
"loss": 1.7302,
"step": 692500
},
{
"epoch": 9.98,
"learning_rate": 2.9183555839421746e-05,
"loss": 1.7346,
"step": 693000
},
{
"epoch": 9.98,
"learning_rate": 2.913183680463602e-05,
"loss": 1.7336,
"step": 693500
},
{
"epoch": 9.99,
"learning_rate": 2.9079910062883285e-05,
"loss": 1.7355,
"step": 694000
},
{
"epoch": 10.0,
"learning_rate": 2.9028191028097563e-05,
"loss": 1.7321,
"step": 694500
},
{
"epoch": 10.0,
"eval_accuracy": 0.6623401667185361,
"eval_loss": 1.6064453125,
"eval_runtime": 654.7283,
"eval_samples_per_second": 823.146,
"eval_steps_per_second": 34.298,
"step": 694730
},
{
"epoch": 10.0,
"learning_rate": 2.8976264286344828e-05,
"loss": 1.7335,
"step": 695000
},
{
"epoch": 10.01,
"learning_rate": 2.8924545251559106e-05,
"loss": 1.729,
"step": 695500
},
{
"epoch": 10.02,
"learning_rate": 2.8872618509806364e-05,
"loss": 1.7314,
"step": 696000
},
{
"epoch": 10.03,
"learning_rate": 2.8820899475020642e-05,
"loss": 1.7345,
"step": 696500
},
{
"epoch": 10.03,
"learning_rate": 2.8768972733267907e-05,
"loss": 1.7342,
"step": 697000
},
{
"epoch": 10.04,
"learning_rate": 2.8717253698482184e-05,
"loss": 1.7326,
"step": 697500
},
{
"epoch": 10.05,
"learning_rate": 2.866532695672945e-05,
"loss": 1.7301,
"step": 698000
},
{
"epoch": 10.05,
"learning_rate": 2.8613607921943724e-05,
"loss": 1.7292,
"step": 698500
},
{
"epoch": 10.06,
"learning_rate": 2.8561681180190985e-05,
"loss": 1.7291,
"step": 699000
},
{
"epoch": 10.07,
"learning_rate": 2.8509962145405263e-05,
"loss": 1.7321,
"step": 699500
},
{
"epoch": 10.08,
"learning_rate": 2.8458035403652528e-05,
"loss": 1.7314,
"step": 700000
},
{
"epoch": 10.08,
"learning_rate": 2.8406316368866802e-05,
"loss": 1.7333,
"step": 700500
},
{
"epoch": 10.09,
"learning_rate": 2.835438962711407e-05,
"loss": 1.7329,
"step": 701000
},
{
"epoch": 10.1,
"learning_rate": 2.8302670592328345e-05,
"loss": 1.7337,
"step": 701500
},
{
"epoch": 10.1,
"learning_rate": 2.8250743850575613e-05,
"loss": 1.7346,
"step": 702000
},
{
"epoch": 10.11,
"learning_rate": 2.8199024815789887e-05,
"loss": 1.734,
"step": 702500
},
{
"epoch": 10.12,
"learning_rate": 2.814709807403715e-05,
"loss": 1.736,
"step": 703000
},
{
"epoch": 10.13,
"learning_rate": 2.8095379039251423e-05,
"loss": 1.7305,
"step": 703500
},
{
"epoch": 10.13,
"learning_rate": 2.804345229749869e-05,
"loss": 1.7273,
"step": 704000
},
{
"epoch": 10.14,
"learning_rate": 2.7991733262712966e-05,
"loss": 1.734,
"step": 704500
},
{
"epoch": 10.15,
"learning_rate": 2.793980652096023e-05,
"loss": 1.7322,
"step": 705000
},
{
"epoch": 10.16,
"learning_rate": 2.788808748617451e-05,
"loss": 1.7312,
"step": 705500
},
{
"epoch": 10.16,
"learning_rate": 2.783616074442177e-05,
"loss": 1.7309,
"step": 706000
},
{
"epoch": 10.17,
"learning_rate": 2.7784441709636045e-05,
"loss": 1.7292,
"step": 706500
},
{
"epoch": 10.18,
"learning_rate": 2.773251496788331e-05,
"loss": 1.7313,
"step": 707000
},
{
"epoch": 10.18,
"learning_rate": 2.7680795933097587e-05,
"loss": 1.7328,
"step": 707500
},
{
"epoch": 10.19,
"learning_rate": 2.7628869191344852e-05,
"loss": 1.7298,
"step": 708000
},
{
"epoch": 10.2,
"learning_rate": 2.757715015655913e-05,
"loss": 1.73,
"step": 708500
},
{
"epoch": 10.21,
"learning_rate": 2.7525223414806395e-05,
"loss": 1.7338,
"step": 709000
},
{
"epoch": 10.21,
"learning_rate": 2.747350438002067e-05,
"loss": 1.7327,
"step": 709500
},
{
"epoch": 10.22,
"learning_rate": 2.742157763826793e-05,
"loss": 1.7283,
"step": 710000
},
{
"epoch": 10.23,
"learning_rate": 2.736985860348221e-05,
"loss": 1.7319,
"step": 710500
},
{
"epoch": 10.23,
"learning_rate": 2.7317931861729473e-05,
"loss": 1.729,
"step": 711000
},
{
"epoch": 10.24,
"learning_rate": 2.7266212826943748e-05,
"loss": 1.7318,
"step": 711500
},
{
"epoch": 10.25,
"learning_rate": 2.7214286085191016e-05,
"loss": 1.7307,
"step": 712000
},
{
"epoch": 10.26,
"learning_rate": 2.716256705040529e-05,
"loss": 1.7321,
"step": 712500
},
{
"epoch": 10.26,
"learning_rate": 2.7110640308652552e-05,
"loss": 1.7313,
"step": 713000
},
{
"epoch": 10.27,
"learning_rate": 2.7058921273866826e-05,
"loss": 1.7283,
"step": 713500
},
{
"epoch": 10.28,
"learning_rate": 2.7006994532114095e-05,
"loss": 1.7248,
"step": 714000
},
{
"epoch": 10.28,
"learning_rate": 2.695527549732837e-05,
"loss": 1.7267,
"step": 714500
},
{
"epoch": 10.29,
"learning_rate": 2.6903348755575637e-05,
"loss": 1.7297,
"step": 715000
},
{
"epoch": 10.3,
"learning_rate": 2.685162972078991e-05,
"loss": 1.7297,
"step": 715500
},
{
"epoch": 10.31,
"learning_rate": 2.6799702979037176e-05,
"loss": 1.7281,
"step": 716000
},
{
"epoch": 10.31,
"learning_rate": 2.6747983944251454e-05,
"loss": 1.7306,
"step": 716500
},
{
"epoch": 10.32,
"learning_rate": 2.6696057202498716e-05,
"loss": 1.7297,
"step": 717000
},
{
"epoch": 10.33,
"learning_rate": 2.664433816771299e-05,
"loss": 1.7319,
"step": 717500
},
{
"epoch": 10.33,
"learning_rate": 2.6592411425960255e-05,
"loss": 1.7312,
"step": 718000
},
{
"epoch": 10.34,
"learning_rate": 2.6540692391174533e-05,
"loss": 1.7296,
"step": 718500
},
{
"epoch": 10.35,
"learning_rate": 2.6488765649421798e-05,
"loss": 1.7301,
"step": 719000
},
{
"epoch": 10.36,
"learning_rate": 2.6437046614636075e-05,
"loss": 1.7252,
"step": 719500
},
{
"epoch": 10.36,
"learning_rate": 2.6385119872883333e-05,
"loss": 1.7303,
"step": 720000
},
{
"epoch": 10.37,
"learning_rate": 2.633340083809761e-05,
"loss": 1.7276,
"step": 720500
},
{
"epoch": 10.38,
"learning_rate": 2.6281474096344876e-05,
"loss": 1.7308,
"step": 721000
},
{
"epoch": 10.39,
"learning_rate": 2.6229755061559154e-05,
"loss": 1.7275,
"step": 721500
},
{
"epoch": 10.39,
"learning_rate": 2.617782831980642e-05,
"loss": 1.7294,
"step": 722000
},
{
"epoch": 10.4,
"learning_rate": 2.6126109285020693e-05,
"loss": 1.7296,
"step": 722500
},
{
"epoch": 10.41,
"learning_rate": 2.607418254326796e-05,
"loss": 1.7331,
"step": 723000
},
{
"epoch": 10.41,
"learning_rate": 2.6022463508482236e-05,
"loss": 1.7316,
"step": 723500
},
{
"epoch": 10.42,
"learning_rate": 2.5970536766729497e-05,
"loss": 1.7348,
"step": 724000
},
{
"epoch": 10.43,
"learning_rate": 2.5918817731943772e-05,
"loss": 1.7302,
"step": 724500
},
{
"epoch": 10.44,
"learning_rate": 2.586689099019104e-05,
"loss": 1.7292,
"step": 725000
},
{
"epoch": 10.44,
"learning_rate": 2.5815171955405314e-05,
"loss": 1.7273,
"step": 725500
},
{
"epoch": 10.45,
"learning_rate": 2.5763245213652583e-05,
"loss": 1.7286,
"step": 726000
},
{
"epoch": 10.46,
"learning_rate": 2.5711526178866857e-05,
"loss": 1.7255,
"step": 726500
},
{
"epoch": 10.46,
"learning_rate": 2.565959943711412e-05,
"loss": 1.7281,
"step": 727000
},
{
"epoch": 10.47,
"learning_rate": 2.5607880402328393e-05,
"loss": 1.7314,
"step": 727500
},
{
"epoch": 10.48,
"learning_rate": 2.555595366057566e-05,
"loss": 1.7246,
"step": 728000
},
{
"epoch": 10.49,
"learning_rate": 2.5504234625789936e-05,
"loss": 1.729,
"step": 728500
},
{
"epoch": 10.49,
"learning_rate": 2.54523078840372e-05,
"loss": 1.7246,
"step": 729000
},
{
"epoch": 10.5,
"learning_rate": 2.5400588849251478e-05,
"loss": 1.73,
"step": 729500
},
{
"epoch": 10.51,
"learning_rate": 2.5348662107498743e-05,
"loss": 1.7267,
"step": 730000
},
{
"epoch": 10.51,
"learning_rate": 2.529694307271302e-05,
"loss": 1.7278,
"step": 730500
},
{
"epoch": 10.52,
"learning_rate": 2.524501633096028e-05,
"loss": 1.7264,
"step": 731000
},
{
"epoch": 10.53,
"learning_rate": 2.5193297296174557e-05,
"loss": 1.7308,
"step": 731500
},
{
"epoch": 10.54,
"learning_rate": 2.514137055442182e-05,
"loss": 1.7262,
"step": 732000
},
{
"epoch": 10.54,
"learning_rate": 2.50896515196361e-05,
"loss": 1.7284,
"step": 732500
},
{
"epoch": 10.55,
"learning_rate": 2.5037724777883364e-05,
"loss": 1.727,
"step": 733000
},
{
"epoch": 10.56,
"learning_rate": 2.498600574309764e-05,
"loss": 1.7257,
"step": 733500
},
{
"epoch": 10.57,
"learning_rate": 2.4934079001344904e-05,
"loss": 1.7264,
"step": 734000
},
{
"epoch": 10.57,
"learning_rate": 2.4882359966559178e-05,
"loss": 1.7271,
"step": 734500
},
{
"epoch": 10.58,
"learning_rate": 2.4830433224806443e-05,
"loss": 1.7238,
"step": 735000
},
{
"epoch": 10.59,
"learning_rate": 2.4778714190020717e-05,
"loss": 1.7282,
"step": 735500
},
{
"epoch": 10.59,
"learning_rate": 2.4726787448267985e-05,
"loss": 1.728,
"step": 736000
},
{
"epoch": 10.6,
"learning_rate": 2.467506841348226e-05,
"loss": 1.7236,
"step": 736500
},
{
"epoch": 10.61,
"learning_rate": 2.4623141671729525e-05,
"loss": 1.7285,
"step": 737000
},
{
"epoch": 10.62,
"learning_rate": 2.45714226369438e-05,
"loss": 1.7228,
"step": 737500
},
{
"epoch": 10.62,
"learning_rate": 2.4519495895191067e-05,
"loss": 1.7276,
"step": 738000
},
{
"epoch": 10.63,
"learning_rate": 2.4467776860405342e-05,
"loss": 1.7287,
"step": 738500
},
{
"epoch": 10.64,
"learning_rate": 2.4415850118652607e-05,
"loss": 1.7278,
"step": 739000
},
{
"epoch": 10.64,
"learning_rate": 2.436413108386688e-05,
"loss": 1.728,
"step": 739500
},
{
"epoch": 10.65,
"learning_rate": 2.431220434211415e-05,
"loss": 1.7278,
"step": 740000
},
{
"epoch": 10.66,
"learning_rate": 2.4260485307328424e-05,
"loss": 1.7244,
"step": 740500
},
{
"epoch": 10.67,
"learning_rate": 2.4208558565575685e-05,
"loss": 1.7253,
"step": 741000
},
{
"epoch": 10.67,
"learning_rate": 2.4156839530789963e-05,
"loss": 1.7291,
"step": 741500
},
{
"epoch": 10.68,
"learning_rate": 2.4104912789037224e-05,
"loss": 1.7234,
"step": 742000
},
{
"epoch": 10.69,
"learning_rate": 2.4053193754251502e-05,
"loss": 1.7239,
"step": 742500
},
{
"epoch": 10.69,
"learning_rate": 2.4001267012498767e-05,
"loss": 1.726,
"step": 743000
},
{
"epoch": 10.7,
"learning_rate": 2.3949547977713045e-05,
"loss": 1.729,
"step": 743500
},
{
"epoch": 10.71,
"learning_rate": 2.3897621235960306e-05,
"loss": 1.7244,
"step": 744000
},
{
"epoch": 10.72,
"learning_rate": 2.3845902201174584e-05,
"loss": 1.724,
"step": 744500
},
{
"epoch": 10.72,
"learning_rate": 2.379397545942185e-05,
"loss": 1.7243,
"step": 745000
},
{
"epoch": 10.73,
"learning_rate": 2.3742256424636123e-05,
"loss": 1.7262,
"step": 745500
},
{
"epoch": 10.74,
"learning_rate": 2.369032968288339e-05,
"loss": 1.7221,
"step": 746000
},
{
"epoch": 10.75,
"learning_rate": 2.3638610648097663e-05,
"loss": 1.7231,
"step": 746500
},
{
"epoch": 10.75,
"learning_rate": 2.358668390634493e-05,
"loss": 1.7281,
"step": 747000
},
{
"epoch": 10.76,
"learning_rate": 2.3534964871559205e-05,
"loss": 1.728,
"step": 747500
},
{
"epoch": 10.77,
"learning_rate": 2.348303812980647e-05,
"loss": 1.7238,
"step": 748000
},
{
"epoch": 10.77,
"learning_rate": 2.3431319095020745e-05,
"loss": 1.7245,
"step": 748500
},
{
"epoch": 10.78,
"learning_rate": 2.337939235326801e-05,
"loss": 1.7276,
"step": 749000
},
{
"epoch": 10.79,
"learning_rate": 2.3327673318482287e-05,
"loss": 1.7243,
"step": 749500
},
{
"epoch": 10.8,
"learning_rate": 2.3275746576729552e-05,
"loss": 1.7242,
"step": 750000
},
{
"epoch": 10.8,
"learning_rate": 2.3224027541943827e-05,
"loss": 1.7268,
"step": 750500
},
{
"epoch": 10.81,
"learning_rate": 2.317210080019109e-05,
"loss": 1.7224,
"step": 751000
},
{
"epoch": 10.82,
"learning_rate": 2.3120174058438356e-05,
"loss": 1.7241,
"step": 751500
},
{
"epoch": 10.82,
"learning_rate": 2.3068455023652634e-05,
"loss": 1.7226,
"step": 752000
},
{
"epoch": 10.83,
"learning_rate": 2.3016528281899896e-05,
"loss": 1.7251,
"step": 752500
},
{
"epoch": 10.84,
"learning_rate": 2.2964809247114173e-05,
"loss": 1.7182,
"step": 753000
},
{
"epoch": 10.85,
"learning_rate": 2.2912882505361438e-05,
"loss": 1.7232,
"step": 753500
},
{
"epoch": 10.85,
"learning_rate": 2.2861163470575713e-05,
"loss": 1.7256,
"step": 754000
},
{
"epoch": 10.86,
"learning_rate": 2.2809236728822978e-05,
"loss": 1.7254,
"step": 754500
},
{
"epoch": 10.87,
"learning_rate": 2.2757517694037252e-05,
"loss": 1.7269,
"step": 755000
},
{
"epoch": 10.87,
"learning_rate": 2.2705590952284517e-05,
"loss": 1.7254,
"step": 755500
},
{
"epoch": 10.88,
"learning_rate": 2.2653871917498795e-05,
"loss": 1.7247,
"step": 756000
},
{
"epoch": 10.89,
"learning_rate": 2.260194517574606e-05,
"loss": 1.7232,
"step": 756500
},
{
"epoch": 10.9,
"learning_rate": 2.2550226140960334e-05,
"loss": 1.7252,
"step": 757000
},
{
"epoch": 10.9,
"learning_rate": 2.24982993992076e-05,
"loss": 1.7227,
"step": 757500
},
{
"epoch": 10.91,
"learning_rate": 2.2446580364421873e-05,
"loss": 1.7207,
"step": 758000
},
{
"epoch": 10.92,
"learning_rate": 2.239465362266914e-05,
"loss": 1.7222,
"step": 758500
},
{
"epoch": 10.93,
"learning_rate": 2.2342934587883416e-05,
"loss": 1.7244,
"step": 759000
},
{
"epoch": 10.93,
"learning_rate": 2.229100784613068e-05,
"loss": 1.7237,
"step": 759500
},
{
"epoch": 10.94,
"learning_rate": 2.2239288811344955e-05,
"loss": 1.7208,
"step": 760000
},
{
"epoch": 10.95,
"learning_rate": 2.218736206959222e-05,
"loss": 1.7214,
"step": 760500
},
{
"epoch": 10.95,
"learning_rate": 2.2135643034806498e-05,
"loss": 1.7169,
"step": 761000
},
{
"epoch": 10.96,
"learning_rate": 2.208371629305376e-05,
"loss": 1.7241,
"step": 761500
},
{
"epoch": 10.97,
"learning_rate": 2.2031997258268037e-05,
"loss": 1.7228,
"step": 762000
},
{
"epoch": 10.98,
"learning_rate": 2.1980070516515302e-05,
"loss": 1.72,
"step": 762500
},
{
"epoch": 10.98,
"learning_rate": 2.192835148172958e-05,
"loss": 1.7202,
"step": 763000
},
{
"epoch": 10.99,
"learning_rate": 2.187642473997684e-05,
"loss": 1.7232,
"step": 763500
},
{
"epoch": 11.0,
"learning_rate": 2.182470570519112e-05,
"loss": 1.7225,
"step": 764000
},
{
"epoch": 11.0,
"eval_accuracy": 0.6647001699746609,
"eval_loss": 1.5908203125,
"eval_runtime": 653.6993,
"eval_samples_per_second": 824.442,
"eval_steps_per_second": 34.352,
"step": 764203
},
{
"epoch": 11.0,
"learning_rate": 2.177277896343838e-05,
"loss": 1.7241,
"step": 764500
},
{
"epoch": 11.01,
"learning_rate": 2.1721059928652658e-05,
"loss": 1.7231,
"step": 765000
},
{
"epoch": 11.02,
"learning_rate": 2.1669133186899923e-05,
"loss": 1.7191,
"step": 765500
},
{
"epoch": 11.03,
"learning_rate": 2.1617414152114197e-05,
"loss": 1.7213,
"step": 766000
},
{
"epoch": 11.03,
"learning_rate": 2.1565487410361462e-05,
"loss": 1.7183,
"step": 766500
},
{
"epoch": 11.04,
"learning_rate": 2.1513768375575737e-05,
"loss": 1.7218,
"step": 767000
},
{
"epoch": 11.05,
"learning_rate": 2.1461841633823005e-05,
"loss": 1.72,
"step": 767500
},
{
"epoch": 11.05,
"learning_rate": 2.141012259903728e-05,
"loss": 1.7179,
"step": 768000
},
{
"epoch": 11.06,
"learning_rate": 2.1358195857284544e-05,
"loss": 1.722,
"step": 768500
},
{
"epoch": 11.07,
"learning_rate": 2.130647682249882e-05,
"loss": 1.7249,
"step": 769000
},
{
"epoch": 11.08,
"learning_rate": 2.1254550080746087e-05,
"loss": 1.7184,
"step": 769500
},
{
"epoch": 11.08,
"learning_rate": 2.120283104596036e-05,
"loss": 1.7217,
"step": 770000
},
{
"epoch": 11.09,
"learning_rate": 2.1150904304207626e-05,
"loss": 1.7151,
"step": 770500
},
{
"epoch": 11.1,
"learning_rate": 2.10991852694219e-05,
"loss": 1.7216,
"step": 771000
},
{
"epoch": 11.11,
"learning_rate": 2.1047258527669165e-05,
"loss": 1.7187,
"step": 771500
},
{
"epoch": 11.11,
"learning_rate": 2.099553949288344e-05,
"loss": 1.7231,
"step": 772000
},
{
"epoch": 11.12,
"learning_rate": 2.0943612751130705e-05,
"loss": 1.7166,
"step": 772500
},
{
"epoch": 11.13,
"learning_rate": 2.0891893716344982e-05,
"loss": 1.721,
"step": 773000
},
{
"epoch": 11.13,
"learning_rate": 2.0839966974592244e-05,
"loss": 1.7188,
"step": 773500
},
{
"epoch": 11.14,
"learning_rate": 2.0788247939806522e-05,
"loss": 1.7186,
"step": 774000
},
{
"epoch": 11.15,
"learning_rate": 2.0736321198053787e-05,
"loss": 1.7172,
"step": 774500
},
{
"epoch": 11.16,
"learning_rate": 2.0684602163268064e-05,
"loss": 1.7187,
"step": 775000
},
{
"epoch": 11.16,
"learning_rate": 2.0632675421515326e-05,
"loss": 1.721,
"step": 775500
},
{
"epoch": 11.17,
"learning_rate": 2.0580956386729604e-05,
"loss": 1.72,
"step": 776000
},
{
"epoch": 11.18,
"learning_rate": 2.052902964497687e-05,
"loss": 1.7235,
"step": 776500
},
{
"epoch": 11.18,
"learning_rate": 2.0477310610191143e-05,
"loss": 1.7188,
"step": 777000
},
{
"epoch": 11.19,
"learning_rate": 2.0425383868438408e-05,
"loss": 1.7149,
"step": 777500
},
{
"epoch": 11.2,
"learning_rate": 2.0373664833652682e-05,
"loss": 1.7217,
"step": 778000
},
{
"epoch": 11.21,
"learning_rate": 2.0321738091899947e-05,
"loss": 1.7167,
"step": 778500
},
{
"epoch": 11.21,
"learning_rate": 2.027001905711422e-05,
"loss": 1.7174,
"step": 779000
},
{
"epoch": 11.22,
"learning_rate": 2.021809231536149e-05,
"loss": 1.7199,
"step": 779500
},
{
"epoch": 11.23,
"learning_rate": 2.0166373280575764e-05,
"loss": 1.7173,
"step": 780000
},
{
"epoch": 11.23,
"learning_rate": 2.011444653882303e-05,
"loss": 1.7184,
"step": 780500
},
{
"epoch": 11.24,
"learning_rate": 2.0062727504037303e-05,
"loss": 1.714,
"step": 781000
},
{
"epoch": 11.25,
"learning_rate": 2.001080076228457e-05,
"loss": 1.7196,
"step": 781500
},
{
"epoch": 11.26,
"learning_rate": 1.9959081727498846e-05,
"loss": 1.7174,
"step": 782000
},
{
"epoch": 11.26,
"learning_rate": 1.990715498574611e-05,
"loss": 1.7227,
"step": 782500
},
{
"epoch": 11.27,
"learning_rate": 1.9855435950960385e-05,
"loss": 1.7177,
"step": 783000
},
{
"epoch": 11.28,
"learning_rate": 1.980350920920765e-05,
"loss": 1.7197,
"step": 783500
},
{
"epoch": 11.28,
"learning_rate": 1.9751790174421928e-05,
"loss": 1.7229,
"step": 784000
},
{
"epoch": 11.29,
"learning_rate": 1.969986343266919e-05,
"loss": 1.7191,
"step": 784500
},
{
"epoch": 11.3,
"learning_rate": 1.9648144397883467e-05,
"loss": 1.7138,
"step": 785000
},
{
"epoch": 11.31,
"learning_rate": 1.959621765613073e-05,
"loss": 1.7207,
"step": 785500
},
{
"epoch": 11.31,
"learning_rate": 1.954449862134501e-05,
"loss": 1.7146,
"step": 786000
},
{
"epoch": 11.32,
"learning_rate": 1.949257187959227e-05,
"loss": 1.7196,
"step": 786500
},
{
"epoch": 11.33,
"learning_rate": 1.944085284480655e-05,
"loss": 1.7191,
"step": 787000
},
{
"epoch": 11.34,
"learning_rate": 1.938892610305381e-05,
"loss": 1.7159,
"step": 787500
},
{
"epoch": 11.34,
"learning_rate": 1.933720706826809e-05,
"loss": 1.7162,
"step": 788000
},
{
"epoch": 11.35,
"learning_rate": 1.9285280326515353e-05,
"loss": 1.7182,
"step": 788500
},
{
"epoch": 11.36,
"learning_rate": 1.9233561291729628e-05,
"loss": 1.7189,
"step": 789000
},
{
"epoch": 11.36,
"learning_rate": 1.9181634549976893e-05,
"loss": 1.717,
"step": 789500
},
{
"epoch": 11.37,
"learning_rate": 1.9129915515191167e-05,
"loss": 1.7177,
"step": 790000
},
{
"epoch": 11.38,
"learning_rate": 1.9077988773438435e-05,
"loss": 1.7161,
"step": 790500
},
{
"epoch": 11.39,
"learning_rate": 1.902626973865271e-05,
"loss": 1.7167,
"step": 791000
},
{
"epoch": 11.39,
"learning_rate": 1.8974342996899974e-05,
"loss": 1.7182,
"step": 791500
},
{
"epoch": 11.4,
"learning_rate": 1.892262396211425e-05,
"loss": 1.7168,
"step": 792000
},
{
"epoch": 11.41,
"learning_rate": 1.8870697220361517e-05,
"loss": 1.7151,
"step": 792500
},
{
"epoch": 11.41,
"learning_rate": 1.881897818557579e-05,
"loss": 1.7218,
"step": 793000
},
{
"epoch": 11.42,
"learning_rate": 1.8767051443823056e-05,
"loss": 1.7196,
"step": 793500
},
{
"epoch": 11.43,
"learning_rate": 1.871533240903733e-05,
"loss": 1.7158,
"step": 794000
},
{
"epoch": 11.44,
"learning_rate": 1.8663405667284596e-05,
"loss": 1.7207,
"step": 794500
},
{
"epoch": 11.44,
"learning_rate": 1.861168663249887e-05,
"loss": 1.7135,
"step": 795000
},
{
"epoch": 11.45,
"learning_rate": 1.855975989074614e-05,
"loss": 1.7185,
"step": 795500
},
{
"epoch": 11.46,
"learning_rate": 1.8508040855960413e-05,
"loss": 1.715,
"step": 796000
},
{
"epoch": 11.46,
"learning_rate": 1.8456114114207678e-05,
"loss": 1.7159,
"step": 796500
},
{
"epoch": 11.47,
"learning_rate": 1.8404395079421952e-05,
"loss": 1.722,
"step": 797000
},
{
"epoch": 11.48,
"learning_rate": 1.8352468337669217e-05,
"loss": 1.7182,
"step": 797500
},
{
"epoch": 11.49,
"learning_rate": 1.8300749302883495e-05,
"loss": 1.7209,
"step": 798000
},
{
"epoch": 11.49,
"learning_rate": 1.8248822561130756e-05,
"loss": 1.7154,
"step": 798500
},
{
"epoch": 11.5,
"learning_rate": 1.8197103526345034e-05,
"loss": 1.7182,
"step": 799000
},
{
"epoch": 11.51,
"learning_rate": 1.81451767845923e-05,
"loss": 1.7143,
"step": 799500
},
{
"epoch": 11.52,
"learning_rate": 1.8093457749806577e-05,
"loss": 1.7141,
"step": 800000
},
{
"epoch": 11.52,
"learning_rate": 1.8041531008053838e-05,
"loss": 1.7146,
"step": 800500
},
{
"epoch": 11.53,
"learning_rate": 1.7989811973268116e-05,
"loss": 1.7161,
"step": 801000
},
{
"epoch": 11.54,
"learning_rate": 1.7937885231515377e-05,
"loss": 1.7129,
"step": 801500
},
{
"epoch": 11.54,
"learning_rate": 1.7886166196729655e-05,
"loss": 1.7192,
"step": 802000
},
{
"epoch": 11.55,
"learning_rate": 1.783423945497692e-05,
"loss": 1.7209,
"step": 802500
},
{
"epoch": 11.56,
"learning_rate": 1.7782520420191194e-05,
"loss": 1.717,
"step": 803000
},
{
"epoch": 11.57,
"learning_rate": 1.773059367843846e-05,
"loss": 1.7134,
"step": 803500
},
{
"epoch": 11.57,
"learning_rate": 1.7678874643652734e-05,
"loss": 1.7199,
"step": 804000
},
{
"epoch": 11.58,
"learning_rate": 1.7626947901900002e-05,
"loss": 1.7099,
"step": 804500
},
{
"epoch": 11.59,
"learning_rate": 1.7575228867114276e-05,
"loss": 1.7197,
"step": 805000
},
{
"epoch": 11.59,
"learning_rate": 1.752330212536154e-05,
"loss": 1.7135,
"step": 805500
},
{
"epoch": 11.6,
"learning_rate": 1.7471583090575816e-05,
"loss": 1.7177,
"step": 806000
},
{
"epoch": 11.61,
"learning_rate": 1.7419656348823084e-05,
"loss": 1.7178,
"step": 806500
},
{
"epoch": 11.62,
"learning_rate": 1.7367937314037358e-05,
"loss": 1.7157,
"step": 807000
},
{
"epoch": 11.62,
"learning_rate": 1.7316010572284623e-05,
"loss": 1.7178,
"step": 807500
},
{
"epoch": 11.63,
"learning_rate": 1.7264291537498897e-05,
"loss": 1.7147,
"step": 808000
},
{
"epoch": 11.64,
"learning_rate": 1.7212364795746162e-05,
"loss": 1.7179,
"step": 808500
},
{
"epoch": 11.64,
"learning_rate": 1.7160645760960437e-05,
"loss": 1.7159,
"step": 809000
},
{
"epoch": 11.65,
"learning_rate": 1.71087190192077e-05,
"loss": 1.7157,
"step": 809500
},
{
"epoch": 11.66,
"learning_rate": 1.705699998442198e-05,
"loss": 1.7194,
"step": 810000
},
{
"epoch": 11.67,
"learning_rate": 1.700507324266924e-05,
"loss": 1.7165,
"step": 810500
},
{
"epoch": 11.67,
"learning_rate": 1.695335420788352e-05,
"loss": 1.717,
"step": 811000
},
{
"epoch": 11.68,
"learning_rate": 1.6901427466130784e-05,
"loss": 1.7156,
"step": 811500
},
{
"epoch": 11.69,
"learning_rate": 1.684970843134506e-05,
"loss": 1.7136,
"step": 812000
},
{
"epoch": 11.7,
"learning_rate": 1.6797781689592323e-05,
"loss": 1.7164,
"step": 812500
},
{
"epoch": 11.7,
"learning_rate": 1.67460626548066e-05,
"loss": 1.7159,
"step": 813000
},
{
"epoch": 11.71,
"learning_rate": 1.6694135913053865e-05,
"loss": 1.7176,
"step": 813500
},
{
"epoch": 11.72,
"learning_rate": 1.664241687826814e-05,
"loss": 1.7167,
"step": 814000
},
{
"epoch": 11.72,
"learning_rate": 1.6590490136515405e-05,
"loss": 1.7175,
"step": 814500
},
{
"epoch": 11.73,
"learning_rate": 1.653877110172968e-05,
"loss": 1.7126,
"step": 815000
},
{
"epoch": 11.74,
"learning_rate": 1.6486844359976944e-05,
"loss": 1.7118,
"step": 815500
},
{
"epoch": 11.75,
"learning_rate": 1.6435125325191222e-05,
"loss": 1.7122,
"step": 816000
},
{
"epoch": 11.75,
"learning_rate": 1.6383198583438487e-05,
"loss": 1.7169,
"step": 816500
},
{
"epoch": 11.76,
"learning_rate": 1.633147954865276e-05,
"loss": 1.7138,
"step": 817000
},
{
"epoch": 11.77,
"learning_rate": 1.6279552806900026e-05,
"loss": 1.7133,
"step": 817500
},
{
"epoch": 11.77,
"learning_rate": 1.62278337721143e-05,
"loss": 1.7167,
"step": 818000
},
{
"epoch": 11.78,
"learning_rate": 1.617590703036157e-05,
"loss": 1.7165,
"step": 818500
},
{
"epoch": 11.79,
"learning_rate": 1.6124187995575843e-05,
"loss": 1.7147,
"step": 819000
},
{
"epoch": 11.8,
"learning_rate": 1.6072261253823108e-05,
"loss": 1.7116,
"step": 819500
},
{
"epoch": 11.8,
"learning_rate": 1.6020542219037382e-05,
"loss": 1.7131,
"step": 820000
},
{
"epoch": 11.81,
"learning_rate": 1.5968615477284647e-05,
"loss": 1.7125,
"step": 820500
},
{
"epoch": 11.82,
"learning_rate": 1.5916896442498925e-05,
"loss": 1.7159,
"step": 821000
},
{
"epoch": 11.82,
"learning_rate": 1.5864969700746186e-05,
"loss": 1.7116,
"step": 821500
},
{
"epoch": 11.83,
"learning_rate": 1.5813250665960464e-05,
"loss": 1.7134,
"step": 822000
},
{
"epoch": 11.84,
"learning_rate": 1.576132392420773e-05,
"loss": 1.7176,
"step": 822500
},
{
"epoch": 11.85,
"learning_rate": 1.5709604889422007e-05,
"loss": 1.7149,
"step": 823000
},
{
"epoch": 11.85,
"learning_rate": 1.565767814766927e-05,
"loss": 1.7154,
"step": 823500
},
{
"epoch": 11.86,
"learning_rate": 1.5605959112883546e-05,
"loss": 1.7141,
"step": 824000
},
{
"epoch": 11.87,
"learning_rate": 1.5554032371130808e-05,
"loss": 1.714,
"step": 824500
},
{
"epoch": 11.88,
"learning_rate": 1.5502313336345085e-05,
"loss": 1.7131,
"step": 825000
},
{
"epoch": 11.88,
"learning_rate": 1.545038659459235e-05,
"loss": 1.7094,
"step": 825500
},
{
"epoch": 11.89,
"learning_rate": 1.5398667559806625e-05,
"loss": 1.713,
"step": 826000
},
{
"epoch": 11.9,
"learning_rate": 1.534674081805389e-05,
"loss": 1.7131,
"step": 826500
},
{
"epoch": 11.9,
"learning_rate": 1.5295021783268164e-05,
"loss": 1.7134,
"step": 827000
},
{
"epoch": 11.91,
"learning_rate": 1.5243095041515432e-05,
"loss": 1.7143,
"step": 827500
},
{
"epoch": 11.92,
"learning_rate": 1.5191376006729707e-05,
"loss": 1.7123,
"step": 828000
},
{
"epoch": 11.93,
"learning_rate": 1.5139449264976971e-05,
"loss": 1.7154,
"step": 828500
},
{
"epoch": 11.93,
"learning_rate": 1.5087730230191246e-05,
"loss": 1.7142,
"step": 829000
},
{
"epoch": 11.94,
"learning_rate": 1.5035803488438512e-05,
"loss": 1.7115,
"step": 829500
},
{
"epoch": 11.95,
"learning_rate": 1.4984084453652788e-05,
"loss": 1.714,
"step": 830000
},
{
"epoch": 11.95,
"learning_rate": 1.4932157711900052e-05,
"loss": 1.7141,
"step": 830500
},
{
"epoch": 11.96,
"learning_rate": 1.4880438677114328e-05,
"loss": 1.7081,
"step": 831000
},
{
"epoch": 11.97,
"learning_rate": 1.4828511935361591e-05,
"loss": 1.7111,
"step": 831500
},
{
"epoch": 11.98,
"learning_rate": 1.4776792900575867e-05,
"loss": 1.7114,
"step": 832000
},
{
"epoch": 11.98,
"learning_rate": 1.4724866158823134e-05,
"loss": 1.7137,
"step": 832500
},
{
"epoch": 11.99,
"learning_rate": 1.467314712403741e-05,
"loss": 1.7155,
"step": 833000
},
{
"epoch": 12.0,
"learning_rate": 1.4621220382284673e-05,
"loss": 1.707,
"step": 833500
},
{
"epoch": 12.0,
"eval_accuracy": 0.6659963122125594,
"eval_loss": 1.5859375,
"eval_runtime": 653.3093,
"eval_samples_per_second": 824.934,
"eval_steps_per_second": 34.373,
"step": 833676
},
{
"epoch": 12.0,
"learning_rate": 1.4569501347498949e-05,
"loss": 1.7114,
"step": 834000
},
{
"epoch": 12.01,
"learning_rate": 1.4517574605746216e-05,
"loss": 1.7119,
"step": 834500
},
{
"epoch": 12.02,
"learning_rate": 1.446585557096049e-05,
"loss": 1.7107,
"step": 835000
},
{
"epoch": 12.03,
"learning_rate": 1.4413928829207755e-05,
"loss": 1.7124,
"step": 835500
},
{
"epoch": 12.03,
"learning_rate": 1.4362209794422029e-05,
"loss": 1.7061,
"step": 836000
},
{
"epoch": 12.04,
"learning_rate": 1.4310283052669296e-05,
"loss": 1.7062,
"step": 836500
},
{
"epoch": 12.05,
"learning_rate": 1.4258564017883572e-05,
"loss": 1.7093,
"step": 837000
},
{
"epoch": 12.06,
"learning_rate": 1.4206637276130835e-05,
"loss": 1.7088,
"step": 837500
},
{
"epoch": 12.06,
"learning_rate": 1.4154918241345111e-05,
"loss": 1.715,
"step": 838000
},
{
"epoch": 12.07,
"learning_rate": 1.4102991499592374e-05,
"loss": 1.7135,
"step": 838500
},
{
"epoch": 12.08,
"learning_rate": 1.405127246480665e-05,
"loss": 1.7104,
"step": 839000
},
{
"epoch": 12.08,
"learning_rate": 1.3999345723053917e-05,
"loss": 1.7061,
"step": 839500
},
{
"epoch": 12.09,
"learning_rate": 1.3947626688268193e-05,
"loss": 1.7104,
"step": 840000
},
{
"epoch": 12.1,
"learning_rate": 1.3895699946515456e-05,
"loss": 1.7089,
"step": 840500
},
{
"epoch": 12.11,
"learning_rate": 1.3843980911729732e-05,
"loss": 1.709,
"step": 841000
},
{
"epoch": 12.11,
"learning_rate": 1.3792054169976997e-05,
"loss": 1.7148,
"step": 841500
},
{
"epoch": 12.12,
"learning_rate": 1.3740335135191273e-05,
"loss": 1.7093,
"step": 842000
},
{
"epoch": 12.13,
"learning_rate": 1.3688408393438536e-05,
"loss": 1.7113,
"step": 842500
},
{
"epoch": 12.13,
"learning_rate": 1.3636689358652812e-05,
"loss": 1.7068,
"step": 843000
},
{
"epoch": 12.14,
"learning_rate": 1.3584762616900079e-05,
"loss": 1.7078,
"step": 843500
},
{
"epoch": 12.15,
"learning_rate": 1.3533043582114355e-05,
"loss": 1.7096,
"step": 844000
},
{
"epoch": 12.16,
"learning_rate": 1.3481116840361618e-05,
"loss": 1.7111,
"step": 844500
},
{
"epoch": 12.16,
"learning_rate": 1.3429397805575894e-05,
"loss": 1.7084,
"step": 845000
},
{
"epoch": 12.17,
"learning_rate": 1.3377471063823158e-05,
"loss": 1.715,
"step": 845500
},
{
"epoch": 12.18,
"learning_rate": 1.3325752029037434e-05,
"loss": 1.7066,
"step": 846000
},
{
"epoch": 12.18,
"learning_rate": 1.32738252872847e-05,
"loss": 1.7098,
"step": 846500
},
{
"epoch": 12.19,
"learning_rate": 1.3222106252498975e-05,
"loss": 1.7082,
"step": 847000
},
{
"epoch": 12.2,
"learning_rate": 1.317017951074624e-05,
"loss": 1.7081,
"step": 847500
},
{
"epoch": 12.21,
"learning_rate": 1.3118460475960514e-05,
"loss": 1.7124,
"step": 848000
},
{
"epoch": 12.21,
"learning_rate": 1.306653373420778e-05,
"loss": 1.7086,
"step": 848500
},
{
"epoch": 12.22,
"learning_rate": 1.3014814699422057e-05,
"loss": 1.7109,
"step": 849000
},
{
"epoch": 12.23,
"learning_rate": 1.296288795766932e-05,
"loss": 1.7114,
"step": 849500
},
{
"epoch": 12.23,
"learning_rate": 1.2911168922883596e-05,
"loss": 1.7097,
"step": 850000
},
{
"epoch": 12.24,
"learning_rate": 1.2859242181130862e-05,
"loss": 1.7067,
"step": 850500
},
{
"epoch": 12.25,
"learning_rate": 1.2807523146345138e-05,
"loss": 1.7092,
"step": 851000
},
{
"epoch": 12.26,
"learning_rate": 1.2755596404592402e-05,
"loss": 1.7094,
"step": 851500
},
{
"epoch": 12.26,
"learning_rate": 1.2703877369806678e-05,
"loss": 1.7079,
"step": 852000
},
{
"epoch": 12.27,
"learning_rate": 1.2651950628053943e-05,
"loss": 1.7042,
"step": 852500
},
{
"epoch": 12.28,
"learning_rate": 1.2600231593268219e-05,
"loss": 1.7083,
"step": 853000
},
{
"epoch": 12.29,
"learning_rate": 1.2548304851515482e-05,
"loss": 1.7076,
"step": 853500
},
{
"epoch": 12.29,
"learning_rate": 1.2496585816729758e-05,
"loss": 1.7063,
"step": 854000
},
{
"epoch": 12.3,
"learning_rate": 1.2444659074977023e-05,
"loss": 1.7082,
"step": 854500
},
{
"epoch": 12.31,
"learning_rate": 1.2392940040191299e-05,
"loss": 1.7097,
"step": 855000
},
{
"epoch": 12.31,
"learning_rate": 1.2341013298438564e-05,
"loss": 1.7103,
"step": 855500
},
{
"epoch": 12.32,
"learning_rate": 1.228929426365284e-05,
"loss": 1.7061,
"step": 856000
},
{
"epoch": 12.33,
"learning_rate": 1.2237367521900103e-05,
"loss": 1.7058,
"step": 856500
},
{
"epoch": 12.34,
"learning_rate": 1.2185648487114381e-05,
"loss": 1.7078,
"step": 857000
},
{
"epoch": 12.34,
"learning_rate": 1.2133721745361644e-05,
"loss": 1.7082,
"step": 857500
},
{
"epoch": 12.35,
"learning_rate": 1.208200271057592e-05,
"loss": 1.7065,
"step": 858000
},
{
"epoch": 12.36,
"learning_rate": 1.2030075968823185e-05,
"loss": 1.7073,
"step": 858500
},
{
"epoch": 12.36,
"learning_rate": 1.197835693403746e-05,
"loss": 1.7073,
"step": 859000
},
{
"epoch": 12.37,
"learning_rate": 1.1926430192284726e-05,
"loss": 1.7079,
"step": 859500
},
{
"epoch": 12.38,
"learning_rate": 1.1874711157499e-05,
"loss": 1.7045,
"step": 860000
},
{
"epoch": 12.39,
"learning_rate": 1.1822784415746265e-05,
"loss": 1.7102,
"step": 860500
},
{
"epoch": 12.39,
"learning_rate": 1.1771065380960541e-05,
"loss": 1.7039,
"step": 861000
},
{
"epoch": 12.4,
"learning_rate": 1.1719138639207806e-05,
"loss": 1.7058,
"step": 861500
},
{
"epoch": 12.41,
"learning_rate": 1.1667419604422082e-05,
"loss": 1.7056,
"step": 862000
},
{
"epoch": 12.41,
"learning_rate": 1.1615492862669347e-05,
"loss": 1.7047,
"step": 862500
},
{
"epoch": 12.42,
"learning_rate": 1.1563773827883623e-05,
"loss": 1.7115,
"step": 863000
},
{
"epoch": 12.43,
"learning_rate": 1.1511847086130888e-05,
"loss": 1.7049,
"step": 863500
},
{
"epoch": 12.44,
"learning_rate": 1.1460128051345164e-05,
"loss": 1.7092,
"step": 864000
},
{
"epoch": 12.44,
"learning_rate": 1.1408201309592427e-05,
"loss": 1.7057,
"step": 864500
},
{
"epoch": 12.45,
"learning_rate": 1.1356482274806702e-05,
"loss": 1.7077,
"step": 865000
},
{
"epoch": 12.46,
"learning_rate": 1.1304555533053968e-05,
"loss": 1.7083,
"step": 865500
},
{
"epoch": 12.47,
"learning_rate": 1.1252836498268243e-05,
"loss": 1.7076,
"step": 866000
},
{
"epoch": 12.47,
"learning_rate": 1.1200909756515508e-05,
"loss": 1.7032,
"step": 866500
},
{
"epoch": 12.48,
"learning_rate": 1.1149190721729784e-05,
"loss": 1.7079,
"step": 867000
},
{
"epoch": 12.49,
"learning_rate": 1.1097263979977049e-05,
"loss": 1.7072,
"step": 867500
},
{
"epoch": 12.49,
"learning_rate": 1.1045544945191325e-05,
"loss": 1.7072,
"step": 868000
},
{
"epoch": 12.5,
"learning_rate": 1.099361820343859e-05,
"loss": 1.7081,
"step": 868500
},
{
"epoch": 12.51,
"learning_rate": 1.0941899168652866e-05,
"loss": 1.7071,
"step": 869000
},
{
"epoch": 12.52,
"learning_rate": 1.088997242690013e-05,
"loss": 1.7067,
"step": 869500
},
{
"epoch": 12.52,
"learning_rate": 1.0838253392114407e-05,
"loss": 1.7113,
"step": 870000
},
{
"epoch": 12.53,
"learning_rate": 1.0786326650361671e-05,
"loss": 1.7068,
"step": 870500
},
{
"epoch": 12.54,
"learning_rate": 1.0734607615575946e-05,
"loss": 1.7077,
"step": 871000
},
{
"epoch": 12.54,
"learning_rate": 1.068268087382321e-05,
"loss": 1.7061,
"step": 871500
},
{
"epoch": 12.55,
"learning_rate": 1.0630961839037487e-05,
"loss": 1.7066,
"step": 872000
},
{
"epoch": 12.56,
"learning_rate": 1.057903509728475e-05,
"loss": 1.7114,
"step": 872500
},
{
"epoch": 12.57,
"learning_rate": 1.0527316062499026e-05,
"loss": 1.7065,
"step": 873000
},
{
"epoch": 12.57,
"learning_rate": 1.0475389320746291e-05,
"loss": 1.7078,
"step": 873500
},
{
"epoch": 12.58,
"learning_rate": 1.0423670285960567e-05,
"loss": 1.7055,
"step": 874000
},
{
"epoch": 12.59,
"learning_rate": 1.0371743544207832e-05,
"loss": 1.7053,
"step": 874500
},
{
"epoch": 12.59,
"learning_rate": 1.0320024509422108e-05,
"loss": 1.7064,
"step": 875000
},
{
"epoch": 12.6,
"learning_rate": 1.0268097767669373e-05,
"loss": 1.7066,
"step": 875500
},
{
"epoch": 12.61,
"learning_rate": 1.0216378732883649e-05,
"loss": 1.7088,
"step": 876000
},
{
"epoch": 12.62,
"learning_rate": 1.0164451991130914e-05,
"loss": 1.7057,
"step": 876500
},
{
"epoch": 12.62,
"learning_rate": 1.0112732956345188e-05,
"loss": 1.7101,
"step": 877000
},
{
"epoch": 12.63,
"learning_rate": 1.0060806214592453e-05,
"loss": 1.7067,
"step": 877500
},
{
"epoch": 12.64,
"learning_rate": 1.000908717980673e-05,
"loss": 1.7029,
"step": 878000
},
{
"epoch": 12.65,
"learning_rate": 9.957160438053994e-06,
"loss": 1.7068,
"step": 878500
},
{
"epoch": 12.65,
"learning_rate": 9.90544140326827e-06,
"loss": 1.7016,
"step": 879000
},
{
"epoch": 12.66,
"learning_rate": 9.853514661515533e-06,
"loss": 1.7056,
"step": 879500
},
{
"epoch": 12.67,
"learning_rate": 9.80179562672981e-06,
"loss": 1.7051,
"step": 880000
},
{
"epoch": 12.67,
"learning_rate": 9.749868884977074e-06,
"loss": 1.7045,
"step": 880500
},
{
"epoch": 12.68,
"learning_rate": 9.69814985019135e-06,
"loss": 1.7064,
"step": 881000
},
{
"epoch": 12.69,
"learning_rate": 9.646223108438615e-06,
"loss": 1.7036,
"step": 881500
},
{
"epoch": 12.7,
"learning_rate": 9.594504073652891e-06,
"loss": 1.7039,
"step": 882000
},
{
"epoch": 12.7,
"learning_rate": 9.542577331900156e-06,
"loss": 1.7067,
"step": 882500
},
{
"epoch": 12.71,
"learning_rate": 9.49085829711443e-06,
"loss": 1.7064,
"step": 883000
},
{
"epoch": 12.72,
"learning_rate": 9.438931555361697e-06,
"loss": 1.7044,
"step": 883500
},
{
"epoch": 12.72,
"learning_rate": 9.387212520575972e-06,
"loss": 1.7062,
"step": 884000
},
{
"epoch": 12.73,
"learning_rate": 9.335285778823236e-06,
"loss": 1.7072,
"step": 884500
},
{
"epoch": 12.74,
"learning_rate": 9.283566744037513e-06,
"loss": 1.704,
"step": 885000
},
{
"epoch": 12.75,
"learning_rate": 9.231640002284777e-06,
"loss": 1.7025,
"step": 885500
},
{
"epoch": 12.75,
"learning_rate": 9.179920967499054e-06,
"loss": 1.7006,
"step": 886000
},
{
"epoch": 12.76,
"learning_rate": 9.127994225746317e-06,
"loss": 1.7019,
"step": 886500
},
{
"epoch": 12.77,
"learning_rate": 9.076275190960594e-06,
"loss": 1.7047,
"step": 887000
},
{
"epoch": 12.77,
"learning_rate": 9.024348449207858e-06,
"loss": 1.705,
"step": 887500
},
{
"epoch": 12.78,
"learning_rate": 8.972629414422134e-06,
"loss": 1.7044,
"step": 888000
},
{
"epoch": 12.79,
"learning_rate": 8.920702672669399e-06,
"loss": 1.6981,
"step": 888500
},
{
"epoch": 12.8,
"learning_rate": 8.868983637883673e-06,
"loss": 1.7067,
"step": 889000
},
{
"epoch": 12.8,
"learning_rate": 8.81705689613094e-06,
"loss": 1.7091,
"step": 889500
},
{
"epoch": 12.81,
"learning_rate": 8.765337861345214e-06,
"loss": 1.7041,
"step": 890000
},
{
"epoch": 12.82,
"learning_rate": 8.713411119592479e-06,
"loss": 1.7066,
"step": 890500
},
{
"epoch": 12.83,
"learning_rate": 8.661692084806755e-06,
"loss": 1.7049,
"step": 891000
},
{
"epoch": 12.83,
"learning_rate": 8.60976534305402e-06,
"loss": 1.7046,
"step": 891500
},
{
"epoch": 12.84,
"learning_rate": 8.558046308268296e-06,
"loss": 1.7052,
"step": 892000
},
{
"epoch": 12.85,
"learning_rate": 8.50611956651556e-06,
"loss": 1.7046,
"step": 892500
},
{
"epoch": 12.85,
"learning_rate": 8.454400531729837e-06,
"loss": 1.7027,
"step": 893000
},
{
"epoch": 12.86,
"learning_rate": 8.402473789977102e-06,
"loss": 1.7061,
"step": 893500
},
{
"epoch": 12.87,
"learning_rate": 8.350754755191378e-06,
"loss": 1.7058,
"step": 894000
},
{
"epoch": 12.88,
"learning_rate": 8.298828013438641e-06,
"loss": 1.7022,
"step": 894500
},
{
"epoch": 12.88,
"learning_rate": 8.247108978652917e-06,
"loss": 1.7045,
"step": 895000
},
{
"epoch": 12.89,
"learning_rate": 8.195182236900182e-06,
"loss": 1.7035,
"step": 895500
},
{
"epoch": 12.9,
"learning_rate": 8.143463202114456e-06,
"loss": 1.6989,
"step": 896000
},
{
"epoch": 12.9,
"learning_rate": 8.091536460361721e-06,
"loss": 1.7054,
"step": 896500
},
{
"epoch": 12.91,
"learning_rate": 8.039817425575997e-06,
"loss": 1.7068,
"step": 897000
},
{
"epoch": 12.92,
"learning_rate": 7.987890683823262e-06,
"loss": 1.7069,
"step": 897500
},
{
"epoch": 12.93,
"learning_rate": 7.936171649037538e-06,
"loss": 1.703,
"step": 898000
},
{
"epoch": 12.93,
"learning_rate": 7.884244907284803e-06,
"loss": 1.7018,
"step": 898500
},
{
"epoch": 12.94,
"learning_rate": 7.83252587249908e-06,
"loss": 1.7047,
"step": 899000
},
{
"epoch": 12.95,
"learning_rate": 7.780599130746344e-06,
"loss": 1.7039,
"step": 899500
},
{
"epoch": 12.95,
"learning_rate": 7.72888009596062e-06,
"loss": 1.6989,
"step": 900000
},
{
"epoch": 12.96,
"learning_rate": 7.676953354207885e-06,
"loss": 1.7002,
"step": 900500
},
{
"epoch": 12.97,
"learning_rate": 7.62523431942216e-06,
"loss": 1.7026,
"step": 901000
},
{
"epoch": 12.98,
"learning_rate": 7.5733075776694235e-06,
"loss": 1.7036,
"step": 901500
},
{
"epoch": 12.98,
"learning_rate": 7.5215885428837e-06,
"loss": 1.7003,
"step": 902000
},
{
"epoch": 12.99,
"learning_rate": 7.4696618011309645e-06,
"loss": 1.7021,
"step": 902500
},
{
"epoch": 13.0,
"learning_rate": 7.41794276634524e-06,
"loss": 1.7049,
"step": 903000
},
{
"epoch": 13.0,
"eval_accuracy": 0.6671741584513293,
"eval_loss": 1.5751953125,
"eval_runtime": 652.7224,
"eval_samples_per_second": 825.676,
"eval_steps_per_second": 34.404,
"step": 903149
}
],
"max_steps": 972622,
"num_train_epochs": 14,
"total_flos": 6.473347929208783e+18,
"trial_name": null,
"trial_params": null
}