|
{ |
|
"best_metric": 0.9545454545454546, |
|
"best_model_checkpoint": "ViT-base-16-224-7.5-1.5-1.5-split-lion-4\\checkpoint-943", |
|
"epoch": 143.47826086956522, |
|
"eval_steps": 500, |
|
"global_step": 1650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.7077922077922078, |
|
"eval_loss": 0.7381948232650757, |
|
"eval_runtime": 1.6508, |
|
"eval_samples_per_second": 93.289, |
|
"eval_steps_per_second": 6.058, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.5780272002695102e-05, |
|
"loss": 0.8102, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.5202796459197998, |
|
"eval_runtime": 1.6447, |
|
"eval_samples_per_second": 93.636, |
|
"eval_steps_per_second": 6.08, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 0.4076531231403351, |
|
"eval_runtime": 1.6584, |
|
"eval_samples_per_second": 92.862, |
|
"eval_steps_per_second": 6.03, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.3309654930856552e-05, |
|
"loss": 0.4016, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.3777826428413391, |
|
"eval_runtime": 1.6734, |
|
"eval_samples_per_second": 92.03, |
|
"eval_steps_per_second": 5.976, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.8766233766233766, |
|
"eval_loss": 0.35903552174568176, |
|
"eval_runtime": 1.6866, |
|
"eval_samples_per_second": 91.308, |
|
"eval_steps_per_second": 5.929, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 9.780401480557754e-06, |
|
"loss": 0.2052, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.29355403780937195, |
|
"eval_runtime": 1.7256, |
|
"eval_samples_per_second": 89.245, |
|
"eval_steps_per_second": 5.795, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 5.943306394074249e-06, |
|
"loss": 0.0838, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.2710248529911041, |
|
"eval_runtime": 1.6959, |
|
"eval_samples_per_second": 90.805, |
|
"eval_steps_per_second": 5.896, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.2775874435901642, |
|
"eval_runtime": 1.6736, |
|
"eval_samples_per_second": 92.018, |
|
"eval_steps_per_second": 5.975, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.6146530177605546e-06, |
|
"loss": 0.0407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.22880351543426514, |
|
"eval_runtime": 1.6916, |
|
"eval_samples_per_second": 91.039, |
|
"eval_steps_per_second": 5.912, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.2207733541727066, |
|
"eval_runtime": 1.6896, |
|
"eval_samples_per_second": 91.147, |
|
"eval_steps_per_second": 5.919, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 5.025614934507641e-07, |
|
"loss": 0.039, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.22476842999458313, |
|
"eval_runtime": 1.7237, |
|
"eval_samples_per_second": 89.342, |
|
"eval_steps_per_second": 5.801, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.3085295557975769, |
|
"eval_runtime": 1.727, |
|
"eval_samples_per_second": 89.173, |
|
"eval_steps_per_second": 5.79, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 1.6610319647849526e-05, |
|
"loss": 0.0324, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.26784980297088623, |
|
"eval_runtime": 1.7027, |
|
"eval_samples_per_second": 90.446, |
|
"eval_steps_per_second": 5.873, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 1.529573176177447e-05, |
|
"loss": 0.022, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.948051948051948, |
|
"eval_loss": 0.2529321610927582, |
|
"eval_runtime": 1.7126, |
|
"eval_samples_per_second": 89.923, |
|
"eval_steps_per_second": 5.839, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.24028430879116058, |
|
"eval_runtime": 1.7176, |
|
"eval_samples_per_second": 89.659, |
|
"eval_steps_per_second": 5.822, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.012, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.3513343632221222, |
|
"eval_runtime": 1.7476, |
|
"eval_samples_per_second": 88.12, |
|
"eval_steps_per_second": 5.722, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.3014402389526367, |
|
"eval_runtime": 1.7276, |
|
"eval_samples_per_second": 89.139, |
|
"eval_steps_per_second": 5.788, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 8.817873574253966e-06, |
|
"loss": 0.0097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.31746622920036316, |
|
"eval_runtime": 1.9143, |
|
"eval_samples_per_second": 80.446, |
|
"eval_steps_per_second": 5.224, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.2747339904308319, |
|
"eval_runtime": 1.7322, |
|
"eval_samples_per_second": 88.904, |
|
"eval_steps_per_second": 5.773, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 5.03266861634036e-06, |
|
"loss": 0.0052, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.2932997941970825, |
|
"eval_runtime": 1.7167, |
|
"eval_samples_per_second": 89.706, |
|
"eval_steps_per_second": 5.825, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 1.9496296406751813e-06, |
|
"loss": 0.009, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.28077924251556396, |
|
"eval_runtime": 1.7005, |
|
"eval_samples_per_second": 90.563, |
|
"eval_steps_per_second": 5.881, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.29576078057289124, |
|
"eval_runtime": 1.7368, |
|
"eval_samples_per_second": 88.67, |
|
"eval_steps_per_second": 5.758, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 2.246260785014683e-07, |
|
"loss": 0.0115, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 22.96, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.2983975112438202, |
|
"eval_runtime": 1.7116, |
|
"eval_samples_per_second": 89.976, |
|
"eval_steps_per_second": 5.843, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.3521440327167511, |
|
"eval_runtime": 1.7503, |
|
"eval_samples_per_second": 87.986, |
|
"eval_steps_per_second": 5.713, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 1.64420405881652e-05, |
|
"loss": 0.0104, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.4289417862892151, |
|
"eval_runtime": 1.7466, |
|
"eval_samples_per_second": 88.173, |
|
"eval_steps_per_second": 5.726, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.6231942176818848, |
|
"eval_runtime": 1.7359, |
|
"eval_samples_per_second": 88.715, |
|
"eval_steps_per_second": 5.761, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 1.4717037025991483e-05, |
|
"loss": 0.0086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5161650776863098, |
|
"eval_runtime": 1.7289, |
|
"eval_samples_per_second": 89.075, |
|
"eval_steps_per_second": 5.784, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 1.1633998050326307e-05, |
|
"loss": 0.0205, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.6762561798095703, |
|
"eval_runtime": 1.7546, |
|
"eval_samples_per_second": 87.77, |
|
"eval_steps_per_second": 5.699, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.4664335250854492, |
|
"eval_runtime": 1.7617, |
|
"eval_samples_per_second": 87.414, |
|
"eval_steps_per_second": 5.676, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 7.848793092412702e-06, |
|
"loss": 0.012, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.3841441869735718, |
|
"eval_runtime": 1.7019, |
|
"eval_samples_per_second": 90.487, |
|
"eval_steps_per_second": 5.876, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.42462781071662903, |
|
"eval_runtime": 1.741, |
|
"eval_samples_per_second": 88.455, |
|
"eval_steps_per_second": 5.744, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 4.1666666666666686e-06, |
|
"loss": 0.0061, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.43087005615234375, |
|
"eval_runtime": 1.7385, |
|
"eval_samples_per_second": 88.582, |
|
"eval_steps_per_second": 5.752, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.45713886618614197, |
|
"eval_runtime": 1.7098, |
|
"eval_samples_per_second": 90.069, |
|
"eval_steps_per_second": 5.849, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 1.3709349048921951e-06, |
|
"loss": 0.0093, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.48606640100479126, |
|
"eval_runtime": 1.7668, |
|
"eval_samples_per_second": 87.163, |
|
"eval_steps_per_second": 5.66, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 5.634701881714148e-08, |
|
"loss": 0.0101, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 34.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.49100440740585327, |
|
"eval_runtime": 1.7267, |
|
"eval_samples_per_second": 89.186, |
|
"eval_steps_per_second": 5.791, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.5978976488113403, |
|
"eval_runtime": 1.7357, |
|
"eval_samples_per_second": 88.725, |
|
"eval_steps_per_second": 5.761, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"learning_rate": 1.6164105173215904e-05, |
|
"loss": 0.011, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.3977736830711365, |
|
"eval_runtime": 1.7968, |
|
"eval_samples_per_second": 85.708, |
|
"eval_steps_per_second": 5.565, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.40591639280319214, |
|
"eval_runtime": 1.7136, |
|
"eval_samples_per_second": 89.87, |
|
"eval_steps_per_second": 5.836, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 1.4052013648906114e-05, |
|
"loss": 0.0226, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 38.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.4942101240158081, |
|
"eval_runtime": 1.7437, |
|
"eval_samples_per_second": 88.318, |
|
"eval_steps_per_second": 5.735, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.0723360272592418e-05, |
|
"loss": 0.0118, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.7233626842498779, |
|
"eval_runtime": 1.7317, |
|
"eval_samples_per_second": 88.928, |
|
"eval_steps_per_second": 5.775, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 40.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5826935172080994, |
|
"eval_runtime": 1.7439, |
|
"eval_samples_per_second": 88.31, |
|
"eval_steps_per_second": 5.734, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"learning_rate": 6.886265186108914e-06, |
|
"loss": 0.011, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.662550687789917, |
|
"eval_runtime": 1.7627, |
|
"eval_samples_per_second": 87.366, |
|
"eval_steps_per_second": 5.673, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 42.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5871102213859558, |
|
"eval_runtime": 1.7486, |
|
"eval_samples_per_second": 88.072, |
|
"eval_steps_per_second": 5.719, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 3.3570117358101172e-06, |
|
"loss": 0.0003, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5640321969985962, |
|
"eval_runtime": 1.6975, |
|
"eval_samples_per_second": 90.721, |
|
"eval_steps_per_second": 5.891, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5655084848403931, |
|
"eval_runtime": 1.7184, |
|
"eval_samples_per_second": 89.619, |
|
"eval_steps_per_second": 5.819, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 45.22, |
|
"learning_rate": 8.863946639715635e-07, |
|
"loss": 0.0005, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5844298601150513, |
|
"eval_runtime": 1.7344, |
|
"eval_samples_per_second": 88.793, |
|
"eval_steps_per_second": 5.766, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0064, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5887525677680969, |
|
"eval_runtime": 1.7094, |
|
"eval_samples_per_second": 90.091, |
|
"eval_steps_per_second": 5.85, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.6656709313392639, |
|
"eval_runtime": 1.7414, |
|
"eval_samples_per_second": 88.435, |
|
"eval_steps_per_second": 5.743, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 1.5780272002695102e-05, |
|
"loss": 0.0084, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.7156269550323486, |
|
"eval_runtime": 1.7014, |
|
"eval_samples_per_second": 90.514, |
|
"eval_steps_per_second": 5.878, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8766233766233766, |
|
"eval_loss": 0.9346238970756531, |
|
"eval_runtime": 1.7164, |
|
"eval_samples_per_second": 89.724, |
|
"eval_steps_per_second": 5.826, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 50.43, |
|
"learning_rate": 1.3309654930856552e-05, |
|
"loss": 0.0318, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.8030693531036377, |
|
"eval_runtime": 1.6994, |
|
"eval_samples_per_second": 90.621, |
|
"eval_steps_per_second": 5.884, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.5700052976608276, |
|
"eval_runtime": 1.7294, |
|
"eval_samples_per_second": 89.049, |
|
"eval_steps_per_second": 5.782, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 9.780401480557754e-06, |
|
"loss": 0.0338, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 52.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.40834710001945496, |
|
"eval_runtime": 1.6964, |
|
"eval_samples_per_second": 90.781, |
|
"eval_steps_per_second": 5.895, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 53.91, |
|
"learning_rate": 5.943306394074249e-06, |
|
"loss": 0.0147, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.41295498609542847, |
|
"eval_runtime": 1.7144, |
|
"eval_samples_per_second": 89.828, |
|
"eval_steps_per_second": 5.833, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 54.96, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.40372058749198914, |
|
"eval_runtime": 1.7344, |
|
"eval_samples_per_second": 88.792, |
|
"eval_steps_per_second": 5.766, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"learning_rate": 2.6146530177605546e-06, |
|
"loss": 0.0011, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.36978378891944885, |
|
"eval_runtime": 1.7234, |
|
"eval_samples_per_second": 89.359, |
|
"eval_steps_per_second": 5.803, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.38696253299713135, |
|
"eval_runtime": 1.7014, |
|
"eval_samples_per_second": 90.515, |
|
"eval_steps_per_second": 5.878, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 57.39, |
|
"learning_rate": 5.025614934507641e-07, |
|
"loss": 0.0021, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.39924710988998413, |
|
"eval_runtime": 1.6854, |
|
"eval_samples_per_second": 91.374, |
|
"eval_steps_per_second": 5.933, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 58.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.44311344623565674, |
|
"eval_runtime": 1.6854, |
|
"eval_samples_per_second": 91.374, |
|
"eval_steps_per_second": 5.933, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 59.13, |
|
"learning_rate": 1.6610319647849526e-05, |
|
"loss": 0.0002, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.44143199920654297, |
|
"eval_runtime": 1.7504, |
|
"eval_samples_per_second": 87.98, |
|
"eval_steps_per_second": 5.713, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 60.87, |
|
"learning_rate": 1.529573176177447e-05, |
|
"loss": 0.0088, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.4255146086215973, |
|
"eval_runtime": 1.7144, |
|
"eval_samples_per_second": 89.828, |
|
"eval_steps_per_second": 5.833, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.4168331027030945, |
|
"eval_runtime": 1.7434, |
|
"eval_samples_per_second": 88.334, |
|
"eval_steps_per_second": 5.736, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 62.61, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0061, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.49312305450439453, |
|
"eval_runtime": 1.7334, |
|
"eval_samples_per_second": 88.843, |
|
"eval_steps_per_second": 5.769, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.948051948051948, |
|
"eval_loss": 0.28522124886512756, |
|
"eval_runtime": 1.7184, |
|
"eval_samples_per_second": 89.619, |
|
"eval_steps_per_second": 5.819, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 64.35, |
|
"learning_rate": 8.817873574253966e-06, |
|
"loss": 0.0179, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.44120827317237854, |
|
"eval_runtime": 1.7364, |
|
"eval_samples_per_second": 88.69, |
|
"eval_steps_per_second": 5.759, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.331409215927124, |
|
"eval_runtime": 1.7124, |
|
"eval_samples_per_second": 89.933, |
|
"eval_steps_per_second": 5.84, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 66.09, |
|
"learning_rate": 5.03266861634036e-06, |
|
"loss": 0.0014, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.2971489727497101, |
|
"eval_runtime": 1.7494, |
|
"eval_samples_per_second": 88.031, |
|
"eval_steps_per_second": 5.716, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 67.83, |
|
"learning_rate": 1.9496296406751813e-06, |
|
"loss": 0.0199, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.3261590301990509, |
|
"eval_runtime": 1.7504, |
|
"eval_samples_per_second": 87.98, |
|
"eval_steps_per_second": 5.713, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.3835467994213104, |
|
"eval_runtime": 1.7824, |
|
"eval_samples_per_second": 86.401, |
|
"eval_steps_per_second": 5.61, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"learning_rate": 2.246260785014683e-07, |
|
"loss": 0.0091, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.3903743326663971, |
|
"eval_runtime": 1.8425, |
|
"eval_samples_per_second": 83.582, |
|
"eval_steps_per_second": 5.427, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.47730717062950134, |
|
"eval_runtime": 1.8005, |
|
"eval_samples_per_second": 85.531, |
|
"eval_steps_per_second": 5.554, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 71.3, |
|
"learning_rate": 1.64420405881652e-05, |
|
"loss": 0.0029, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.4937627613544464, |
|
"eval_runtime": 1.7875, |
|
"eval_samples_per_second": 86.152, |
|
"eval_steps_per_second": 5.594, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 72.96, |
|
"eval_accuracy": 0.948051948051948, |
|
"eval_loss": 0.456775426864624, |
|
"eval_runtime": 1.7718, |
|
"eval_samples_per_second": 86.918, |
|
"eval_steps_per_second": 5.644, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 73.04, |
|
"learning_rate": 1.4717037025991483e-05, |
|
"loss": 0.0224, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.4465040862560272, |
|
"eval_runtime": 1.7516, |
|
"eval_samples_per_second": 87.92, |
|
"eval_steps_per_second": 5.709, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 74.78, |
|
"learning_rate": 1.1633998050326307e-05, |
|
"loss": 0.0045, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 74.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.6093705296516418, |
|
"eval_runtime": 1.8028, |
|
"eval_samples_per_second": 85.421, |
|
"eval_steps_per_second": 5.547, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.6924065947532654, |
|
"eval_runtime": 1.7779, |
|
"eval_samples_per_second": 86.617, |
|
"eval_steps_per_second": 5.624, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 76.52, |
|
"learning_rate": 7.848793092412702e-06, |
|
"loss": 0.0088, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.48120445013046265, |
|
"eval_runtime": 1.7635, |
|
"eval_samples_per_second": 87.327, |
|
"eval_steps_per_second": 5.671, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.6367415189743042, |
|
"eval_runtime": 1.7799, |
|
"eval_samples_per_second": 86.522, |
|
"eval_steps_per_second": 5.618, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"learning_rate": 4.1666666666666686e-06, |
|
"loss": 0.0033, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 78.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.49070408940315247, |
|
"eval_runtime": 1.7676, |
|
"eval_samples_per_second": 87.125, |
|
"eval_steps_per_second": 5.657, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.3709349048921951e-06, |
|
"loss": 0.0076, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.9415584415584416, |
|
"eval_loss": 0.31148040294647217, |
|
"eval_runtime": 1.7496, |
|
"eval_samples_per_second": 88.021, |
|
"eval_steps_per_second": 5.716, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"eval_accuracy": 0.948051948051948, |
|
"eval_loss": 0.2700955271720886, |
|
"eval_runtime": 1.7827, |
|
"eval_samples_per_second": 86.387, |
|
"eval_steps_per_second": 5.61, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 81.74, |
|
"learning_rate": 5.634701881714148e-08, |
|
"loss": 0.0002, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.9545454545454546, |
|
"eval_loss": 0.2613329589366913, |
|
"eval_runtime": 1.7597, |
|
"eval_samples_per_second": 87.515, |
|
"eval_steps_per_second": 5.683, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.40443289279937744, |
|
"eval_runtime": 1.7637, |
|
"eval_samples_per_second": 87.314, |
|
"eval_steps_per_second": 5.67, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 83.48, |
|
"learning_rate": 1.6164105173215904e-05, |
|
"loss": 0.0193, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.9613493084907532, |
|
"eval_runtime": 1.7854, |
|
"eval_samples_per_second": 86.255, |
|
"eval_steps_per_second": 5.601, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.6934040784835815, |
|
"eval_runtime": 1.7617, |
|
"eval_samples_per_second": 87.414, |
|
"eval_steps_per_second": 5.676, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 85.22, |
|
"learning_rate": 1.4052013648906114e-05, |
|
"loss": 0.0238, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.9348794221878052, |
|
"eval_runtime": 1.7915, |
|
"eval_samples_per_second": 85.962, |
|
"eval_steps_per_second": 5.582, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"learning_rate": 1.0723360272592418e-05, |
|
"loss": 0.011, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.8836289644241333, |
|
"eval_runtime": 1.8098, |
|
"eval_samples_per_second": 85.095, |
|
"eval_steps_per_second": 5.526, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.7403988838195801, |
|
"eval_runtime": 1.7608, |
|
"eval_samples_per_second": 87.459, |
|
"eval_steps_per_second": 5.679, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 88.7, |
|
"learning_rate": 6.886265186108914e-06, |
|
"loss": 0.018, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 88.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5259799957275391, |
|
"eval_runtime": 1.7749, |
|
"eval_samples_per_second": 86.764, |
|
"eval_steps_per_second": 5.634, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5202356576919556, |
|
"eval_runtime": 1.7868, |
|
"eval_samples_per_second": 86.187, |
|
"eval_steps_per_second": 5.597, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 90.43, |
|
"learning_rate": 3.3570117358101172e-06, |
|
"loss": 0.0041, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 90.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.48472902178764343, |
|
"eval_runtime": 1.7845, |
|
"eval_samples_per_second": 86.297, |
|
"eval_steps_per_second": 5.604, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.48678579926490784, |
|
"eval_runtime": 1.7317, |
|
"eval_samples_per_second": 88.93, |
|
"eval_steps_per_second": 5.775, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 92.17, |
|
"learning_rate": 8.863946639715635e-07, |
|
"loss": 0.001, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.4975196421146393, |
|
"eval_runtime": 1.7866, |
|
"eval_samples_per_second": 86.198, |
|
"eval_steps_per_second": 5.597, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 93.91, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0014, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.6255179643630981, |
|
"eval_runtime": 1.7777, |
|
"eval_samples_per_second": 86.631, |
|
"eval_steps_per_second": 5.625, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"eval_accuracy": 0.8766233766233766, |
|
"eval_loss": 0.9968315362930298, |
|
"eval_runtime": 1.7619, |
|
"eval_samples_per_second": 87.408, |
|
"eval_steps_per_second": 5.676, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 95.65, |
|
"learning_rate": 1.5780272002695102e-05, |
|
"loss": 0.0165, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.6173205971717834, |
|
"eval_runtime": 1.8105, |
|
"eval_samples_per_second": 85.06, |
|
"eval_steps_per_second": 5.523, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 96.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.7537987232208252, |
|
"eval_runtime": 1.7727, |
|
"eval_samples_per_second": 86.873, |
|
"eval_steps_per_second": 5.641, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 97.39, |
|
"learning_rate": 1.3309654930856552e-05, |
|
"loss": 0.013, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8766233766233766, |
|
"eval_loss": 0.7560279965400696, |
|
"eval_runtime": 1.7715, |
|
"eval_samples_per_second": 86.931, |
|
"eval_steps_per_second": 5.645, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 98.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5807818174362183, |
|
"eval_runtime": 1.79, |
|
"eval_samples_per_second": 86.032, |
|
"eval_steps_per_second": 5.586, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 99.13, |
|
"learning_rate": 9.780401480557754e-06, |
|
"loss": 0.0237, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5147651433944702, |
|
"eval_runtime": 1.7527, |
|
"eval_samples_per_second": 87.864, |
|
"eval_steps_per_second": 5.705, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 100.87, |
|
"learning_rate": 5.943306394074249e-06, |
|
"loss": 0.0061, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 100.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5672047734260559, |
|
"eval_runtime": 1.7885, |
|
"eval_samples_per_second": 86.106, |
|
"eval_steps_per_second": 5.591, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.4343276619911194, |
|
"eval_runtime": 1.7759, |
|
"eval_samples_per_second": 86.714, |
|
"eval_steps_per_second": 5.631, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 102.61, |
|
"learning_rate": 2.6146530177605546e-06, |
|
"loss": 0.002, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 102.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.32392024993896484, |
|
"eval_runtime": 1.8027, |
|
"eval_samples_per_second": 85.425, |
|
"eval_steps_per_second": 5.547, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.2951604127883911, |
|
"eval_runtime": 1.8006, |
|
"eval_samples_per_second": 85.525, |
|
"eval_steps_per_second": 5.554, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"learning_rate": 5.025614934507641e-07, |
|
"loss": 0.0005, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 104.96, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.2926579713821411, |
|
"eval_runtime": 1.7666, |
|
"eval_samples_per_second": 87.174, |
|
"eval_steps_per_second": 5.661, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.3511568605899811, |
|
"eval_runtime": 1.7794, |
|
"eval_samples_per_second": 86.546, |
|
"eval_steps_per_second": 5.62, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 106.09, |
|
"learning_rate": 1.6610319647849526e-05, |
|
"loss": 0.0003, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 106.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.4030219316482544, |
|
"eval_runtime": 1.7918, |
|
"eval_samples_per_second": 85.945, |
|
"eval_steps_per_second": 5.581, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 107.83, |
|
"learning_rate": 1.529573176177447e-05, |
|
"loss": 0.0023, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.3911021053791046, |
|
"eval_runtime": 1.7635, |
|
"eval_samples_per_second": 87.328, |
|
"eval_steps_per_second": 5.671, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 108.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5156851410865784, |
|
"eval_runtime": 1.7917, |
|
"eval_samples_per_second": 85.953, |
|
"eval_steps_per_second": 5.581, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 109.57, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0114, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5531629323959351, |
|
"eval_runtime": 1.7244, |
|
"eval_samples_per_second": 89.307, |
|
"eval_steps_per_second": 5.799, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 110.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.6447522640228271, |
|
"eval_runtime": 1.7304, |
|
"eval_samples_per_second": 88.997, |
|
"eval_steps_per_second": 5.779, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 111.3, |
|
"learning_rate": 8.817873574253966e-06, |
|
"loss": 0.0003, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.6866676807403564, |
|
"eval_runtime": 1.7494, |
|
"eval_samples_per_second": 88.031, |
|
"eval_steps_per_second": 5.716, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 112.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.7411206960678101, |
|
"eval_runtime": 1.7444, |
|
"eval_samples_per_second": 88.283, |
|
"eval_steps_per_second": 5.733, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 113.04, |
|
"learning_rate": 5.03266861634036e-06, |
|
"loss": 0.0153, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.7488161325454712, |
|
"eval_runtime": 1.7644, |
|
"eval_samples_per_second": 87.282, |
|
"eval_steps_per_second": 5.668, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 114.78, |
|
"learning_rate": 1.9496296406751813e-06, |
|
"loss": 0.0039, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 114.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.6945769786834717, |
|
"eval_runtime": 1.7344, |
|
"eval_samples_per_second": 88.792, |
|
"eval_steps_per_second": 5.766, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.7218338847160339, |
|
"eval_runtime": 1.7644, |
|
"eval_samples_per_second": 87.281, |
|
"eval_steps_per_second": 5.668, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 116.52, |
|
"learning_rate": 2.246260785014683e-07, |
|
"loss": 0.0002, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 116.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.7305352091789246, |
|
"eval_runtime": 1.7634, |
|
"eval_samples_per_second": 87.332, |
|
"eval_steps_per_second": 5.671, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 1.0061231851577759, |
|
"eval_runtime": 1.7854, |
|
"eval_samples_per_second": 86.255, |
|
"eval_steps_per_second": 5.601, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 118.26, |
|
"learning_rate": 1.64420405881652e-05, |
|
"loss": 0.0066, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 118.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.5966177582740784, |
|
"eval_runtime": 1.7834, |
|
"eval_samples_per_second": 86.352, |
|
"eval_steps_per_second": 5.607, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 1.4717037025991483e-05, |
|
"loss": 0.0083, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 1.1087840795516968, |
|
"eval_runtime": 1.7834, |
|
"eval_samples_per_second": 86.352, |
|
"eval_steps_per_second": 5.607, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 120.96, |
|
"eval_accuracy": 0.8831168831168831, |
|
"eval_loss": 0.821342945098877, |
|
"eval_runtime": 1.7534, |
|
"eval_samples_per_second": 87.83, |
|
"eval_steps_per_second": 5.703, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 121.74, |
|
"learning_rate": 1.1633998050326307e-05, |
|
"loss": 0.0202, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5226480960845947, |
|
"eval_runtime": 1.7954, |
|
"eval_samples_per_second": 85.775, |
|
"eval_steps_per_second": 5.57, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 122.96, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.44853323698043823, |
|
"eval_runtime": 1.7774, |
|
"eval_samples_per_second": 86.643, |
|
"eval_steps_per_second": 5.626, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 123.48, |
|
"learning_rate": 7.848793092412702e-06, |
|
"loss": 0.0033, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.7281427979469299, |
|
"eval_runtime": 1.8004, |
|
"eval_samples_per_second": 85.537, |
|
"eval_steps_per_second": 5.554, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 124.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.6655176281929016, |
|
"eval_runtime": 1.7664, |
|
"eval_samples_per_second": 87.183, |
|
"eval_steps_per_second": 5.661, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 125.22, |
|
"learning_rate": 4.1666666666666686e-06, |
|
"loss": 0.0185, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.5161400437355042, |
|
"eval_runtime": 1.7704, |
|
"eval_samples_per_second": 86.986, |
|
"eval_steps_per_second": 5.648, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 126.96, |
|
"learning_rate": 1.3709349048921951e-06, |
|
"loss": 0.0001, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 126.96, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.4859886169433594, |
|
"eval_runtime": 1.7874, |
|
"eval_samples_per_second": 86.159, |
|
"eval_steps_per_second": 5.595, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.4834165573120117, |
|
"eval_runtime": 1.7944, |
|
"eval_samples_per_second": 85.822, |
|
"eval_steps_per_second": 5.573, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 128.7, |
|
"learning_rate": 5.634701881714148e-08, |
|
"loss": 0.0047, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 128.96, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.48360273241996765, |
|
"eval_runtime": 1.7574, |
|
"eval_samples_per_second": 87.63, |
|
"eval_steps_per_second": 5.69, |
|
"step": 1483 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.9155844155844156, |
|
"eval_loss": 0.6164301037788391, |
|
"eval_runtime": 1.7394, |
|
"eval_samples_per_second": 88.537, |
|
"eval_steps_per_second": 5.749, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"learning_rate": 1.6164105173215904e-05, |
|
"loss": 0.011, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 130.96, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.7818012237548828, |
|
"eval_runtime": 1.7684, |
|
"eval_samples_per_second": 87.085, |
|
"eval_steps_per_second": 5.655, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.8022345900535583, |
|
"eval_runtime": 1.7544, |
|
"eval_samples_per_second": 87.779, |
|
"eval_steps_per_second": 5.7, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 132.17, |
|
"learning_rate": 1.4052013648906114e-05, |
|
"loss": 0.0023, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 132.96, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.8653693199157715, |
|
"eval_runtime": 1.7604, |
|
"eval_samples_per_second": 87.48, |
|
"eval_steps_per_second": 5.681, |
|
"step": 1529 |
|
}, |
|
{ |
|
"epoch": 133.91, |
|
"learning_rate": 1.0723360272592418e-05, |
|
"loss": 0.0222, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.8896103896103896, |
|
"eval_loss": 0.6707515716552734, |
|
"eval_runtime": 1.7434, |
|
"eval_samples_per_second": 88.334, |
|
"eval_steps_per_second": 5.736, |
|
"step": 1541 |
|
}, |
|
{ |
|
"epoch": 134.96, |
|
"eval_accuracy": 0.935064935064935, |
|
"eval_loss": 0.4996984004974365, |
|
"eval_runtime": 1.7534, |
|
"eval_samples_per_second": 87.83, |
|
"eval_steps_per_second": 5.703, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 135.65, |
|
"learning_rate": 6.886265186108914e-06, |
|
"loss": 0.0126, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.5560286641120911, |
|
"eval_runtime": 1.7314, |
|
"eval_samples_per_second": 88.946, |
|
"eval_steps_per_second": 5.776, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.6162758469581604, |
|
"eval_runtime": 1.7204, |
|
"eval_samples_per_second": 89.515, |
|
"eval_steps_per_second": 5.813, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 137.39, |
|
"learning_rate": 3.3570117358101172e-06, |
|
"loss": 0.014, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.6488694548606873, |
|
"eval_runtime": 1.7194, |
|
"eval_samples_per_second": 89.567, |
|
"eval_steps_per_second": 5.816, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 138.96, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 0.684516191482544, |
|
"eval_runtime": 1.7304, |
|
"eval_samples_per_second": 88.997, |
|
"eval_steps_per_second": 5.779, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 139.13, |
|
"learning_rate": 8.863946639715635e-07, |
|
"loss": 0.0088, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.8766233766233766, |
|
"eval_loss": 0.7022619247436523, |
|
"eval_runtime": 1.7374, |
|
"eval_samples_per_second": 88.639, |
|
"eval_steps_per_second": 5.756, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 140.87, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0022, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 140.96, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 0.762221097946167, |
|
"eval_runtime": 1.6984, |
|
"eval_samples_per_second": 90.675, |
|
"eval_steps_per_second": 5.888, |
|
"step": 1621 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.8961038961038961, |
|
"eval_loss": 0.6736029982566833, |
|
"eval_runtime": 1.7594, |
|
"eval_samples_per_second": 87.53, |
|
"eval_steps_per_second": 5.684, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 142.61, |
|
"learning_rate": 1.5780272002695102e-05, |
|
"loss": 0.0017, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 142.96, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.5298991799354553, |
|
"eval_runtime": 1.6954, |
|
"eval_samples_per_second": 90.835, |
|
"eval_steps_per_second": 5.898, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"eval_accuracy": 0.9025974025974026, |
|
"eval_loss": 0.5584802031517029, |
|
"eval_runtime": 1.6964, |
|
"eval_samples_per_second": 90.782, |
|
"eval_steps_per_second": 5.895, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"step": 1650, |
|
"total_flos": 8.017005638819359e+18, |
|
"train_loss": 0.028458750352940775, |
|
"train_runtime": 2286.6106, |
|
"train_samples_per_second": 47.297, |
|
"train_steps_per_second": 0.722 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1650, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"total_flos": 8.017005638819359e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|