{ "best_metric": 0.9545454545454546, "best_model_checkpoint": "ViT-base-16-224-7.5-1.5-1.5-split-lion-4\\checkpoint-943", "epoch": 143.47826086956522, "eval_steps": 500, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "eval_accuracy": 0.7077922077922078, "eval_loss": 0.7381948232650757, "eval_runtime": 1.6508, "eval_samples_per_second": 93.289, "eval_steps_per_second": 6.058, "step": 11 }, { "epoch": 1.74, "learning_rate": 1.5780272002695102e-05, "loss": 0.8102, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.8181818181818182, "eval_loss": 0.5202796459197998, "eval_runtime": 1.6447, "eval_samples_per_second": 93.636, "eval_steps_per_second": 6.08, "step": 23 }, { "epoch": 2.96, "eval_accuracy": 0.8701298701298701, "eval_loss": 0.4076531231403351, "eval_runtime": 1.6584, "eval_samples_per_second": 92.862, "eval_steps_per_second": 6.03, "step": 34 }, { "epoch": 3.48, "learning_rate": 1.3309654930856552e-05, "loss": 0.4016, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.3777826428413391, "eval_runtime": 1.6734, "eval_samples_per_second": 92.03, "eval_steps_per_second": 5.976, "step": 46 }, { "epoch": 4.96, "eval_accuracy": 0.8766233766233766, "eval_loss": 0.35903552174568176, "eval_runtime": 1.6866, "eval_samples_per_second": 91.308, "eval_steps_per_second": 5.929, "step": 57 }, { "epoch": 5.22, "learning_rate": 9.780401480557754e-06, "loss": 0.2052, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.29355403780937195, "eval_runtime": 1.7256, "eval_samples_per_second": 89.245, "eval_steps_per_second": 5.795, "step": 69 }, { "epoch": 6.96, "learning_rate": 5.943306394074249e-06, "loss": 0.0838, "step": 80 }, { "epoch": 6.96, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.2710248529911041, "eval_runtime": 1.6959, "eval_samples_per_second": 90.805, "eval_steps_per_second": 5.896, "step": 80 }, { "epoch": 8.0, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.2775874435901642, "eval_runtime": 1.6736, "eval_samples_per_second": 92.018, "eval_steps_per_second": 5.975, "step": 92 }, { "epoch": 8.7, "learning_rate": 2.6146530177605546e-06, "loss": 0.0407, "step": 100 }, { "epoch": 8.96, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.22880351543426514, "eval_runtime": 1.6916, "eval_samples_per_second": 91.039, "eval_steps_per_second": 5.912, "step": 103 }, { "epoch": 10.0, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.2207733541727066, "eval_runtime": 1.6896, "eval_samples_per_second": 91.147, "eval_steps_per_second": 5.919, "step": 115 }, { "epoch": 10.43, "learning_rate": 5.025614934507641e-07, "loss": 0.039, "step": 120 }, { "epoch": 10.96, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.22476842999458313, "eval_runtime": 1.7237, "eval_samples_per_second": 89.342, "eval_steps_per_second": 5.801, "step": 126 }, { "epoch": 12.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.3085295557975769, "eval_runtime": 1.727, "eval_samples_per_second": 89.173, "eval_steps_per_second": 5.79, "step": 138 }, { "epoch": 12.17, "learning_rate": 1.6610319647849526e-05, "loss": 0.0324, "step": 140 }, { "epoch": 12.96, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.26784980297088623, "eval_runtime": 1.7027, "eval_samples_per_second": 90.446, "eval_steps_per_second": 5.873, "step": 149 }, { "epoch": 13.91, "learning_rate": 1.529573176177447e-05, "loss": 0.022, "step": 160 }, { "epoch": 14.0, "eval_accuracy": 0.948051948051948, "eval_loss": 0.2529321610927582, "eval_runtime": 1.7126, "eval_samples_per_second": 89.923, "eval_steps_per_second": 5.839, "step": 161 }, { "epoch": 14.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.24028430879116058, "eval_runtime": 1.7176, "eval_samples_per_second": 89.659, "eval_steps_per_second": 5.822, "step": 172 }, { "epoch": 15.65, "learning_rate": 1.25e-05, "loss": 0.012, "step": 180 }, { "epoch": 16.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.3513343632221222, "eval_runtime": 1.7476, "eval_samples_per_second": 88.12, "eval_steps_per_second": 5.722, "step": 184 }, { "epoch": 16.96, "eval_accuracy": 0.935064935064935, "eval_loss": 0.3014402389526367, "eval_runtime": 1.7276, "eval_samples_per_second": 89.139, "eval_steps_per_second": 5.788, "step": 195 }, { "epoch": 17.39, "learning_rate": 8.817873574253966e-06, "loss": 0.0097, "step": 200 }, { "epoch": 18.0, "eval_accuracy": 0.935064935064935, "eval_loss": 0.31746622920036316, "eval_runtime": 1.9143, "eval_samples_per_second": 80.446, "eval_steps_per_second": 5.224, "step": 207 }, { "epoch": 18.96, "eval_accuracy": 0.935064935064935, "eval_loss": 0.2747339904308319, "eval_runtime": 1.7322, "eval_samples_per_second": 88.904, "eval_steps_per_second": 5.773, "step": 218 }, { "epoch": 19.13, "learning_rate": 5.03266861634036e-06, "loss": 0.0052, "step": 220 }, { "epoch": 20.0, "eval_accuracy": 0.935064935064935, "eval_loss": 0.2932997941970825, "eval_runtime": 1.7167, "eval_samples_per_second": 89.706, "eval_steps_per_second": 5.825, "step": 230 }, { "epoch": 20.87, "learning_rate": 1.9496296406751813e-06, "loss": 0.009, "step": 240 }, { "epoch": 20.96, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.28077924251556396, "eval_runtime": 1.7005, "eval_samples_per_second": 90.563, "eval_steps_per_second": 5.881, "step": 241 }, { "epoch": 22.0, "eval_accuracy": 0.935064935064935, "eval_loss": 0.29576078057289124, "eval_runtime": 1.7368, "eval_samples_per_second": 88.67, "eval_steps_per_second": 5.758, "step": 253 }, { "epoch": 22.61, "learning_rate": 2.246260785014683e-07, "loss": 0.0115, "step": 260 }, { "epoch": 22.96, "eval_accuracy": 0.935064935064935, "eval_loss": 0.2983975112438202, "eval_runtime": 1.7116, "eval_samples_per_second": 89.976, "eval_steps_per_second": 5.843, "step": 264 }, { "epoch": 24.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3521440327167511, "eval_runtime": 1.7503, "eval_samples_per_second": 87.986, "eval_steps_per_second": 5.713, "step": 276 }, { "epoch": 24.35, "learning_rate": 1.64420405881652e-05, "loss": 0.0104, "step": 280 }, { "epoch": 24.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.4289417862892151, "eval_runtime": 1.7466, "eval_samples_per_second": 88.173, "eval_steps_per_second": 5.726, "step": 287 }, { "epoch": 26.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.6231942176818848, "eval_runtime": 1.7359, "eval_samples_per_second": 88.715, "eval_steps_per_second": 5.761, "step": 299 }, { "epoch": 26.09, "learning_rate": 1.4717037025991483e-05, "loss": 0.0086, "step": 300 }, { "epoch": 26.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5161650776863098, "eval_runtime": 1.7289, "eval_samples_per_second": 89.075, "eval_steps_per_second": 5.784, "step": 310 }, { "epoch": 27.83, "learning_rate": 1.1633998050326307e-05, "loss": 0.0205, "step": 320 }, { "epoch": 28.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.6762561798095703, "eval_runtime": 1.7546, "eval_samples_per_second": 87.77, "eval_steps_per_second": 5.699, "step": 322 }, { "epoch": 28.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.4664335250854492, "eval_runtime": 1.7617, "eval_samples_per_second": 87.414, "eval_steps_per_second": 5.676, "step": 333 }, { "epoch": 29.57, "learning_rate": 7.848793092412702e-06, "loss": 0.012, "step": 340 }, { "epoch": 30.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3841441869735718, "eval_runtime": 1.7019, "eval_samples_per_second": 90.487, "eval_steps_per_second": 5.876, "step": 345 }, { "epoch": 30.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.42462781071662903, "eval_runtime": 1.741, "eval_samples_per_second": 88.455, "eval_steps_per_second": 5.744, "step": 356 }, { "epoch": 31.3, "learning_rate": 4.1666666666666686e-06, "loss": 0.0061, "step": 360 }, { "epoch": 32.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.43087005615234375, "eval_runtime": 1.7385, "eval_samples_per_second": 88.582, "eval_steps_per_second": 5.752, "step": 368 }, { "epoch": 32.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.45713886618614197, "eval_runtime": 1.7098, "eval_samples_per_second": 90.069, "eval_steps_per_second": 5.849, "step": 379 }, { "epoch": 33.04, "learning_rate": 1.3709349048921951e-06, "loss": 0.0093, "step": 380 }, { "epoch": 34.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.48606640100479126, "eval_runtime": 1.7668, "eval_samples_per_second": 87.163, "eval_steps_per_second": 5.66, "step": 391 }, { "epoch": 34.78, "learning_rate": 5.634701881714148e-08, "loss": 0.0101, "step": 400 }, { "epoch": 34.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.49100440740585327, "eval_runtime": 1.7267, "eval_samples_per_second": 89.186, "eval_steps_per_second": 5.791, "step": 402 }, { "epoch": 36.0, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.5978976488113403, "eval_runtime": 1.7357, "eval_samples_per_second": 88.725, "eval_steps_per_second": 5.761, "step": 414 }, { "epoch": 36.52, "learning_rate": 1.6164105173215904e-05, "loss": 0.011, "step": 420 }, { "epoch": 36.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.3977736830711365, "eval_runtime": 1.7968, "eval_samples_per_second": 85.708, "eval_steps_per_second": 5.565, "step": 425 }, { "epoch": 38.0, "eval_accuracy": 0.935064935064935, "eval_loss": 0.40591639280319214, "eval_runtime": 1.7136, "eval_samples_per_second": 89.87, "eval_steps_per_second": 5.836, "step": 437 }, { "epoch": 38.26, "learning_rate": 1.4052013648906114e-05, "loss": 0.0226, "step": 440 }, { "epoch": 38.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.4942101240158081, "eval_runtime": 1.7437, "eval_samples_per_second": 88.318, "eval_steps_per_second": 5.735, "step": 448 }, { "epoch": 40.0, "learning_rate": 1.0723360272592418e-05, "loss": 0.0118, "step": 460 }, { "epoch": 40.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.7233626842498779, "eval_runtime": 1.7317, "eval_samples_per_second": 88.928, "eval_steps_per_second": 5.775, "step": 460 }, { "epoch": 40.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5826935172080994, "eval_runtime": 1.7439, "eval_samples_per_second": 88.31, "eval_steps_per_second": 5.734, "step": 471 }, { "epoch": 41.74, "learning_rate": 6.886265186108914e-06, "loss": 0.011, "step": 480 }, { "epoch": 42.0, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.662550687789917, "eval_runtime": 1.7627, "eval_samples_per_second": 87.366, "eval_steps_per_second": 5.673, "step": 483 }, { "epoch": 42.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5871102213859558, "eval_runtime": 1.7486, "eval_samples_per_second": 88.072, "eval_steps_per_second": 5.719, "step": 494 }, { "epoch": 43.48, "learning_rate": 3.3570117358101172e-06, "loss": 0.0003, "step": 500 }, { "epoch": 44.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5640321969985962, "eval_runtime": 1.6975, "eval_samples_per_second": 90.721, "eval_steps_per_second": 5.891, "step": 506 }, { "epoch": 44.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5655084848403931, "eval_runtime": 1.7184, "eval_samples_per_second": 89.619, "eval_steps_per_second": 5.819, "step": 517 }, { "epoch": 45.22, "learning_rate": 8.863946639715635e-07, "loss": 0.0005, "step": 520 }, { "epoch": 46.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5844298601150513, "eval_runtime": 1.7344, "eval_samples_per_second": 88.793, "eval_steps_per_second": 5.766, "step": 529 }, { "epoch": 46.96, "learning_rate": 1.6666666666666667e-05, "loss": 0.0064, "step": 540 }, { "epoch": 46.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5887525677680969, "eval_runtime": 1.7094, "eval_samples_per_second": 90.091, "eval_steps_per_second": 5.85, "step": 540 }, { "epoch": 48.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.6656709313392639, "eval_runtime": 1.7414, "eval_samples_per_second": 88.435, "eval_steps_per_second": 5.743, "step": 552 }, { "epoch": 48.7, "learning_rate": 1.5780272002695102e-05, "loss": 0.0084, "step": 560 }, { "epoch": 48.96, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.7156269550323486, "eval_runtime": 1.7014, "eval_samples_per_second": 90.514, "eval_steps_per_second": 5.878, "step": 563 }, { "epoch": 50.0, "eval_accuracy": 0.8766233766233766, "eval_loss": 0.9346238970756531, "eval_runtime": 1.7164, "eval_samples_per_second": 89.724, "eval_steps_per_second": 5.826, "step": 575 }, { "epoch": 50.43, "learning_rate": 1.3309654930856552e-05, "loss": 0.0318, "step": 580 }, { "epoch": 50.96, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.8030693531036377, "eval_runtime": 1.6994, "eval_samples_per_second": 90.621, "eval_steps_per_second": 5.884, "step": 586 }, { "epoch": 52.0, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.5700052976608276, "eval_runtime": 1.7294, "eval_samples_per_second": 89.049, "eval_steps_per_second": 5.782, "step": 598 }, { "epoch": 52.17, "learning_rate": 9.780401480557754e-06, "loss": 0.0338, "step": 600 }, { "epoch": 52.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.40834710001945496, "eval_runtime": 1.6964, "eval_samples_per_second": 90.781, "eval_steps_per_second": 5.895, "step": 609 }, { "epoch": 53.91, "learning_rate": 5.943306394074249e-06, "loss": 0.0147, "step": 620 }, { "epoch": 54.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.41295498609542847, "eval_runtime": 1.7144, "eval_samples_per_second": 89.828, "eval_steps_per_second": 5.833, "step": 621 }, { "epoch": 54.96, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.40372058749198914, "eval_runtime": 1.7344, "eval_samples_per_second": 88.792, "eval_steps_per_second": 5.766, "step": 632 }, { "epoch": 55.65, "learning_rate": 2.6146530177605546e-06, "loss": 0.0011, "step": 640 }, { "epoch": 56.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.36978378891944885, "eval_runtime": 1.7234, "eval_samples_per_second": 89.359, "eval_steps_per_second": 5.803, "step": 644 }, { "epoch": 56.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.38696253299713135, "eval_runtime": 1.7014, "eval_samples_per_second": 90.515, "eval_steps_per_second": 5.878, "step": 655 }, { "epoch": 57.39, "learning_rate": 5.025614934507641e-07, "loss": 0.0021, "step": 660 }, { "epoch": 58.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.39924710988998413, "eval_runtime": 1.6854, "eval_samples_per_second": 91.374, "eval_steps_per_second": 5.933, "step": 667 }, { "epoch": 58.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.44311344623565674, "eval_runtime": 1.6854, "eval_samples_per_second": 91.374, "eval_steps_per_second": 5.933, "step": 678 }, { "epoch": 59.13, "learning_rate": 1.6610319647849526e-05, "loss": 0.0002, "step": 680 }, { "epoch": 60.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.44143199920654297, "eval_runtime": 1.7504, "eval_samples_per_second": 87.98, "eval_steps_per_second": 5.713, "step": 690 }, { "epoch": 60.87, "learning_rate": 1.529573176177447e-05, "loss": 0.0088, "step": 700 }, { "epoch": 60.96, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.4255146086215973, "eval_runtime": 1.7144, "eval_samples_per_second": 89.828, "eval_steps_per_second": 5.833, "step": 701 }, { "epoch": 62.0, "eval_accuracy": 0.922077922077922, "eval_loss": 0.4168331027030945, "eval_runtime": 1.7434, "eval_samples_per_second": 88.334, "eval_steps_per_second": 5.736, "step": 713 }, { "epoch": 62.61, "learning_rate": 1.25e-05, "loss": 0.0061, "step": 720 }, { "epoch": 62.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.49312305450439453, "eval_runtime": 1.7334, "eval_samples_per_second": 88.843, "eval_steps_per_second": 5.769, "step": 724 }, { "epoch": 64.0, "eval_accuracy": 0.948051948051948, "eval_loss": 0.28522124886512756, "eval_runtime": 1.7184, "eval_samples_per_second": 89.619, "eval_steps_per_second": 5.819, "step": 736 }, { "epoch": 64.35, "learning_rate": 8.817873574253966e-06, "loss": 0.0179, "step": 740 }, { "epoch": 64.96, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.44120827317237854, "eval_runtime": 1.7364, "eval_samples_per_second": 88.69, "eval_steps_per_second": 5.759, "step": 747 }, { "epoch": 66.0, "eval_accuracy": 0.935064935064935, "eval_loss": 0.331409215927124, "eval_runtime": 1.7124, "eval_samples_per_second": 89.933, "eval_steps_per_second": 5.84, "step": 759 }, { "epoch": 66.09, "learning_rate": 5.03266861634036e-06, "loss": 0.0014, "step": 760 }, { "epoch": 66.96, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.2971489727497101, "eval_runtime": 1.7494, "eval_samples_per_second": 88.031, "eval_steps_per_second": 5.716, "step": 770 }, { "epoch": 67.83, "learning_rate": 1.9496296406751813e-06, "loss": 0.0199, "step": 780 }, { "epoch": 68.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3261590301990509, "eval_runtime": 1.7504, "eval_samples_per_second": 87.98, "eval_steps_per_second": 5.713, "step": 782 }, { "epoch": 68.96, "eval_accuracy": 0.935064935064935, "eval_loss": 0.3835467994213104, "eval_runtime": 1.7824, "eval_samples_per_second": 86.401, "eval_steps_per_second": 5.61, "step": 793 }, { "epoch": 69.57, "learning_rate": 2.246260785014683e-07, "loss": 0.0091, "step": 800 }, { "epoch": 70.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3903743326663971, "eval_runtime": 1.8425, "eval_samples_per_second": 83.582, "eval_steps_per_second": 5.427, "step": 805 }, { "epoch": 70.96, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.47730717062950134, "eval_runtime": 1.8005, "eval_samples_per_second": 85.531, "eval_steps_per_second": 5.554, "step": 816 }, { "epoch": 71.3, "learning_rate": 1.64420405881652e-05, "loss": 0.0029, "step": 820 }, { "epoch": 72.0, "eval_accuracy": 0.922077922077922, "eval_loss": 0.4937627613544464, "eval_runtime": 1.7875, "eval_samples_per_second": 86.152, "eval_steps_per_second": 5.594, "step": 828 }, { "epoch": 72.96, "eval_accuracy": 0.948051948051948, "eval_loss": 0.456775426864624, "eval_runtime": 1.7718, "eval_samples_per_second": 86.918, "eval_steps_per_second": 5.644, "step": 839 }, { "epoch": 73.04, "learning_rate": 1.4717037025991483e-05, "loss": 0.0224, "step": 840 }, { "epoch": 74.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.4465040862560272, "eval_runtime": 1.7516, "eval_samples_per_second": 87.92, "eval_steps_per_second": 5.709, "step": 851 }, { "epoch": 74.78, "learning_rate": 1.1633998050326307e-05, "loss": 0.0045, "step": 860 }, { "epoch": 74.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.6093705296516418, "eval_runtime": 1.8028, "eval_samples_per_second": 85.421, "eval_steps_per_second": 5.547, "step": 862 }, { "epoch": 76.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.6924065947532654, "eval_runtime": 1.7779, "eval_samples_per_second": 86.617, "eval_steps_per_second": 5.624, "step": 874 }, { "epoch": 76.52, "learning_rate": 7.848793092412702e-06, "loss": 0.0088, "step": 880 }, { "epoch": 76.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.48120445013046265, "eval_runtime": 1.7635, "eval_samples_per_second": 87.327, "eval_steps_per_second": 5.671, "step": 885 }, { "epoch": 78.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.6367415189743042, "eval_runtime": 1.7799, "eval_samples_per_second": 86.522, "eval_steps_per_second": 5.618, "step": 897 }, { "epoch": 78.26, "learning_rate": 4.1666666666666686e-06, "loss": 0.0033, "step": 900 }, { "epoch": 78.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.49070408940315247, "eval_runtime": 1.7676, "eval_samples_per_second": 87.125, "eval_steps_per_second": 5.657, "step": 908 }, { "epoch": 80.0, "learning_rate": 1.3709349048921951e-06, "loss": 0.0076, "step": 920 }, { "epoch": 80.0, "eval_accuracy": 0.9415584415584416, "eval_loss": 0.31148040294647217, "eval_runtime": 1.7496, "eval_samples_per_second": 88.021, "eval_steps_per_second": 5.716, "step": 920 }, { "epoch": 80.96, "eval_accuracy": 0.948051948051948, "eval_loss": 0.2700955271720886, "eval_runtime": 1.7827, "eval_samples_per_second": 86.387, "eval_steps_per_second": 5.61, "step": 931 }, { "epoch": 81.74, "learning_rate": 5.634701881714148e-08, "loss": 0.0002, "step": 940 }, { "epoch": 82.0, "eval_accuracy": 0.9545454545454546, "eval_loss": 0.2613329589366913, "eval_runtime": 1.7597, "eval_samples_per_second": 87.515, "eval_steps_per_second": 5.683, "step": 943 }, { "epoch": 82.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.40443289279937744, "eval_runtime": 1.7637, "eval_samples_per_second": 87.314, "eval_steps_per_second": 5.67, "step": 954 }, { "epoch": 83.48, "learning_rate": 1.6164105173215904e-05, "loss": 0.0193, "step": 960 }, { "epoch": 84.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.9613493084907532, "eval_runtime": 1.7854, "eval_samples_per_second": 86.255, "eval_steps_per_second": 5.601, "step": 966 }, { "epoch": 84.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.6934040784835815, "eval_runtime": 1.7617, "eval_samples_per_second": 87.414, "eval_steps_per_second": 5.676, "step": 977 }, { "epoch": 85.22, "learning_rate": 1.4052013648906114e-05, "loss": 0.0238, "step": 980 }, { "epoch": 86.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.9348794221878052, "eval_runtime": 1.7915, "eval_samples_per_second": 85.962, "eval_steps_per_second": 5.582, "step": 989 }, { "epoch": 86.96, "learning_rate": 1.0723360272592418e-05, "loss": 0.011, "step": 1000 }, { "epoch": 86.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.8836289644241333, "eval_runtime": 1.8098, "eval_samples_per_second": 85.095, "eval_steps_per_second": 5.526, "step": 1000 }, { "epoch": 88.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.7403988838195801, "eval_runtime": 1.7608, "eval_samples_per_second": 87.459, "eval_steps_per_second": 5.679, "step": 1012 }, { "epoch": 88.7, "learning_rate": 6.886265186108914e-06, "loss": 0.018, "step": 1020 }, { "epoch": 88.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5259799957275391, "eval_runtime": 1.7749, "eval_samples_per_second": 86.764, "eval_steps_per_second": 5.634, "step": 1023 }, { "epoch": 90.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5202356576919556, "eval_runtime": 1.7868, "eval_samples_per_second": 86.187, "eval_steps_per_second": 5.597, "step": 1035 }, { "epoch": 90.43, "learning_rate": 3.3570117358101172e-06, "loss": 0.0041, "step": 1040 }, { "epoch": 90.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.48472902178764343, "eval_runtime": 1.7845, "eval_samples_per_second": 86.297, "eval_steps_per_second": 5.604, "step": 1046 }, { "epoch": 92.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.48678579926490784, "eval_runtime": 1.7317, "eval_samples_per_second": 88.93, "eval_steps_per_second": 5.775, "step": 1058 }, { "epoch": 92.17, "learning_rate": 8.863946639715635e-07, "loss": 0.001, "step": 1060 }, { "epoch": 92.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.4975196421146393, "eval_runtime": 1.7866, "eval_samples_per_second": 86.198, "eval_steps_per_second": 5.597, "step": 1069 }, { "epoch": 93.91, "learning_rate": 1.6666666666666667e-05, "loss": 0.0014, "step": 1080 }, { "epoch": 94.0, "eval_accuracy": 0.922077922077922, "eval_loss": 0.6255179643630981, "eval_runtime": 1.7777, "eval_samples_per_second": 86.631, "eval_steps_per_second": 5.625, "step": 1081 }, { "epoch": 94.96, "eval_accuracy": 0.8766233766233766, "eval_loss": 0.9968315362930298, "eval_runtime": 1.7619, "eval_samples_per_second": 87.408, "eval_steps_per_second": 5.676, "step": 1092 }, { "epoch": 95.65, "learning_rate": 1.5780272002695102e-05, "loss": 0.0165, "step": 1100 }, { "epoch": 96.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.6173205971717834, "eval_runtime": 1.8105, "eval_samples_per_second": 85.06, "eval_steps_per_second": 5.523, "step": 1104 }, { "epoch": 96.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.7537987232208252, "eval_runtime": 1.7727, "eval_samples_per_second": 86.873, "eval_steps_per_second": 5.641, "step": 1115 }, { "epoch": 97.39, "learning_rate": 1.3309654930856552e-05, "loss": 0.013, "step": 1120 }, { "epoch": 98.0, "eval_accuracy": 0.8766233766233766, "eval_loss": 0.7560279965400696, "eval_runtime": 1.7715, "eval_samples_per_second": 86.931, "eval_steps_per_second": 5.645, "step": 1127 }, { "epoch": 98.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5807818174362183, "eval_runtime": 1.79, "eval_samples_per_second": 86.032, "eval_steps_per_second": 5.586, "step": 1138 }, { "epoch": 99.13, "learning_rate": 9.780401480557754e-06, "loss": 0.0237, "step": 1140 }, { "epoch": 100.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5147651433944702, "eval_runtime": 1.7527, "eval_samples_per_second": 87.864, "eval_steps_per_second": 5.705, "step": 1150 }, { "epoch": 100.87, "learning_rate": 5.943306394074249e-06, "loss": 0.0061, "step": 1160 }, { "epoch": 100.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5672047734260559, "eval_runtime": 1.7885, "eval_samples_per_second": 86.106, "eval_steps_per_second": 5.591, "step": 1161 }, { "epoch": 102.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.4343276619911194, "eval_runtime": 1.7759, "eval_samples_per_second": 86.714, "eval_steps_per_second": 5.631, "step": 1173 }, { "epoch": 102.61, "learning_rate": 2.6146530177605546e-06, "loss": 0.002, "step": 1180 }, { "epoch": 102.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.32392024993896484, "eval_runtime": 1.8027, "eval_samples_per_second": 85.425, "eval_steps_per_second": 5.547, "step": 1184 }, { "epoch": 104.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.2951604127883911, "eval_runtime": 1.8006, "eval_samples_per_second": 85.525, "eval_steps_per_second": 5.554, "step": 1196 }, { "epoch": 104.35, "learning_rate": 5.025614934507641e-07, "loss": 0.0005, "step": 1200 }, { "epoch": 104.96, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.2926579713821411, "eval_runtime": 1.7666, "eval_samples_per_second": 87.174, "eval_steps_per_second": 5.661, "step": 1207 }, { "epoch": 106.0, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.3511568605899811, "eval_runtime": 1.7794, "eval_samples_per_second": 86.546, "eval_steps_per_second": 5.62, "step": 1219 }, { "epoch": 106.09, "learning_rate": 1.6610319647849526e-05, "loss": 0.0003, "step": 1220 }, { "epoch": 106.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.4030219316482544, "eval_runtime": 1.7918, "eval_samples_per_second": 85.945, "eval_steps_per_second": 5.581, "step": 1230 }, { "epoch": 107.83, "learning_rate": 1.529573176177447e-05, "loss": 0.0023, "step": 1240 }, { "epoch": 108.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.3911021053791046, "eval_runtime": 1.7635, "eval_samples_per_second": 87.328, "eval_steps_per_second": 5.671, "step": 1242 }, { "epoch": 108.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5156851410865784, "eval_runtime": 1.7917, "eval_samples_per_second": 85.953, "eval_steps_per_second": 5.581, "step": 1253 }, { "epoch": 109.57, "learning_rate": 1.25e-05, "loss": 0.0114, "step": 1260 }, { "epoch": 110.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5531629323959351, "eval_runtime": 1.7244, "eval_samples_per_second": 89.307, "eval_steps_per_second": 5.799, "step": 1265 }, { "epoch": 110.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.6447522640228271, "eval_runtime": 1.7304, "eval_samples_per_second": 88.997, "eval_steps_per_second": 5.779, "step": 1276 }, { "epoch": 111.3, "learning_rate": 8.817873574253966e-06, "loss": 0.0003, "step": 1280 }, { "epoch": 112.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.6866676807403564, "eval_runtime": 1.7494, "eval_samples_per_second": 88.031, "eval_steps_per_second": 5.716, "step": 1288 }, { "epoch": 112.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.7411206960678101, "eval_runtime": 1.7444, "eval_samples_per_second": 88.283, "eval_steps_per_second": 5.733, "step": 1299 }, { "epoch": 113.04, "learning_rate": 5.03266861634036e-06, "loss": 0.0153, "step": 1300 }, { "epoch": 114.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.7488161325454712, "eval_runtime": 1.7644, "eval_samples_per_second": 87.282, "eval_steps_per_second": 5.668, "step": 1311 }, { "epoch": 114.78, "learning_rate": 1.9496296406751813e-06, "loss": 0.0039, "step": 1320 }, { "epoch": 114.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.6945769786834717, "eval_runtime": 1.7344, "eval_samples_per_second": 88.792, "eval_steps_per_second": 5.766, "step": 1322 }, { "epoch": 116.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.7218338847160339, "eval_runtime": 1.7644, "eval_samples_per_second": 87.281, "eval_steps_per_second": 5.668, "step": 1334 }, { "epoch": 116.52, "learning_rate": 2.246260785014683e-07, "loss": 0.0002, "step": 1340 }, { "epoch": 116.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.7305352091789246, "eval_runtime": 1.7634, "eval_samples_per_second": 87.332, "eval_steps_per_second": 5.671, "step": 1345 }, { "epoch": 118.0, "eval_accuracy": 0.8701298701298701, "eval_loss": 1.0061231851577759, "eval_runtime": 1.7854, "eval_samples_per_second": 86.255, "eval_steps_per_second": 5.601, "step": 1357 }, { "epoch": 118.26, "learning_rate": 1.64420405881652e-05, "loss": 0.0066, "step": 1360 }, { "epoch": 118.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.5966177582740784, "eval_runtime": 1.7834, "eval_samples_per_second": 86.352, "eval_steps_per_second": 5.607, "step": 1368 }, { "epoch": 120.0, "learning_rate": 1.4717037025991483e-05, "loss": 0.0083, "step": 1380 }, { "epoch": 120.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 1.1087840795516968, "eval_runtime": 1.7834, "eval_samples_per_second": 86.352, "eval_steps_per_second": 5.607, "step": 1380 }, { "epoch": 120.96, "eval_accuracy": 0.8831168831168831, "eval_loss": 0.821342945098877, "eval_runtime": 1.7534, "eval_samples_per_second": 87.83, "eval_steps_per_second": 5.703, "step": 1391 }, { "epoch": 121.74, "learning_rate": 1.1633998050326307e-05, "loss": 0.0202, "step": 1400 }, { "epoch": 122.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5226480960845947, "eval_runtime": 1.7954, "eval_samples_per_second": 85.775, "eval_steps_per_second": 5.57, "step": 1403 }, { "epoch": 122.96, "eval_accuracy": 0.922077922077922, "eval_loss": 0.44853323698043823, "eval_runtime": 1.7774, "eval_samples_per_second": 86.643, "eval_steps_per_second": 5.626, "step": 1414 }, { "epoch": 123.48, "learning_rate": 7.848793092412702e-06, "loss": 0.0033, "step": 1420 }, { "epoch": 124.0, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.7281427979469299, "eval_runtime": 1.8004, "eval_samples_per_second": 85.537, "eval_steps_per_second": 5.554, "step": 1426 }, { "epoch": 124.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.6655176281929016, "eval_runtime": 1.7664, "eval_samples_per_second": 87.183, "eval_steps_per_second": 5.661, "step": 1437 }, { "epoch": 125.22, "learning_rate": 4.1666666666666686e-06, "loss": 0.0185, "step": 1440 }, { "epoch": 126.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.5161400437355042, "eval_runtime": 1.7704, "eval_samples_per_second": 86.986, "eval_steps_per_second": 5.648, "step": 1449 }, { "epoch": 126.96, "learning_rate": 1.3709349048921951e-06, "loss": 0.0001, "step": 1460 }, { "epoch": 126.96, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.4859886169433594, "eval_runtime": 1.7874, "eval_samples_per_second": 86.159, "eval_steps_per_second": 5.595, "step": 1460 }, { "epoch": 128.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.4834165573120117, "eval_runtime": 1.7944, "eval_samples_per_second": 85.822, "eval_steps_per_second": 5.573, "step": 1472 }, { "epoch": 128.7, "learning_rate": 5.634701881714148e-08, "loss": 0.0047, "step": 1480 }, { "epoch": 128.96, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.48360273241996765, "eval_runtime": 1.7574, "eval_samples_per_second": 87.63, "eval_steps_per_second": 5.69, "step": 1483 }, { "epoch": 130.0, "eval_accuracy": 0.9155844155844156, "eval_loss": 0.6164301037788391, "eval_runtime": 1.7394, "eval_samples_per_second": 88.537, "eval_steps_per_second": 5.749, "step": 1495 }, { "epoch": 130.43, "learning_rate": 1.6164105173215904e-05, "loss": 0.011, "step": 1500 }, { "epoch": 130.96, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.7818012237548828, "eval_runtime": 1.7684, "eval_samples_per_second": 87.085, "eval_steps_per_second": 5.655, "step": 1506 }, { "epoch": 132.0, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.8022345900535583, "eval_runtime": 1.7544, "eval_samples_per_second": 87.779, "eval_steps_per_second": 5.7, "step": 1518 }, { "epoch": 132.17, "learning_rate": 1.4052013648906114e-05, "loss": 0.0023, "step": 1520 }, { "epoch": 132.96, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.8653693199157715, "eval_runtime": 1.7604, "eval_samples_per_second": 87.48, "eval_steps_per_second": 5.681, "step": 1529 }, { "epoch": 133.91, "learning_rate": 1.0723360272592418e-05, "loss": 0.0222, "step": 1540 }, { "epoch": 134.0, "eval_accuracy": 0.8896103896103896, "eval_loss": 0.6707515716552734, "eval_runtime": 1.7434, "eval_samples_per_second": 88.334, "eval_steps_per_second": 5.736, "step": 1541 }, { "epoch": 134.96, "eval_accuracy": 0.935064935064935, "eval_loss": 0.4996984004974365, "eval_runtime": 1.7534, "eval_samples_per_second": 87.83, "eval_steps_per_second": 5.703, "step": 1552 }, { "epoch": 135.65, "learning_rate": 6.886265186108914e-06, "loss": 0.0126, "step": 1560 }, { "epoch": 136.0, "eval_accuracy": 0.922077922077922, "eval_loss": 0.5560286641120911, "eval_runtime": 1.7314, "eval_samples_per_second": 88.946, "eval_steps_per_second": 5.776, "step": 1564 }, { "epoch": 136.96, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.6162758469581604, "eval_runtime": 1.7204, "eval_samples_per_second": 89.515, "eval_steps_per_second": 5.813, "step": 1575 }, { "epoch": 137.39, "learning_rate": 3.3570117358101172e-06, "loss": 0.014, "step": 1580 }, { "epoch": 138.0, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.6488694548606873, "eval_runtime": 1.7194, "eval_samples_per_second": 89.567, "eval_steps_per_second": 5.816, "step": 1587 }, { "epoch": 138.96, "eval_accuracy": 0.8701298701298701, "eval_loss": 0.684516191482544, "eval_runtime": 1.7304, "eval_samples_per_second": 88.997, "eval_steps_per_second": 5.779, "step": 1598 }, { "epoch": 139.13, "learning_rate": 8.863946639715635e-07, "loss": 0.0088, "step": 1600 }, { "epoch": 140.0, "eval_accuracy": 0.8766233766233766, "eval_loss": 0.7022619247436523, "eval_runtime": 1.7374, "eval_samples_per_second": 88.639, "eval_steps_per_second": 5.756, "step": 1610 }, { "epoch": 140.87, "learning_rate": 1.6666666666666667e-05, "loss": 0.0022, "step": 1620 }, { "epoch": 140.96, "eval_accuracy": 0.8701298701298701, "eval_loss": 0.762221097946167, "eval_runtime": 1.6984, "eval_samples_per_second": 90.675, "eval_steps_per_second": 5.888, "step": 1621 }, { "epoch": 142.0, "eval_accuracy": 0.8961038961038961, "eval_loss": 0.6736029982566833, "eval_runtime": 1.7594, "eval_samples_per_second": 87.53, "eval_steps_per_second": 5.684, "step": 1633 }, { "epoch": 142.61, "learning_rate": 1.5780272002695102e-05, "loss": 0.0017, "step": 1640 }, { "epoch": 142.96, "eval_accuracy": 0.9090909090909091, "eval_loss": 0.5298991799354553, "eval_runtime": 1.6954, "eval_samples_per_second": 90.835, "eval_steps_per_second": 5.898, "step": 1644 }, { "epoch": 143.48, "eval_accuracy": 0.9025974025974026, "eval_loss": 0.5584802031517029, "eval_runtime": 1.6964, "eval_samples_per_second": 90.782, "eval_steps_per_second": 5.895, "step": 1650 }, { "epoch": 143.48, "step": 1650, "total_flos": 8.017005638819359e+18, "train_loss": 0.028458750352940775, "train_runtime": 2286.6106, "train_samples_per_second": 47.297, "train_steps_per_second": 0.722 } ], "logging_steps": 20, "max_steps": 1650, "num_train_epochs": 150, "save_steps": 500, "total_flos": 8.017005638819359e+18, "trial_name": null, "trial_params": null }