{ "best_metric": 0.75, "best_model_checkpoint": "Swin-DMAE-H-DA-REVAL-80\\checkpoint-546", "epoch": 77.36263736263736, "eval_steps": 500, "global_step": 1760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 2.840909090909091e-06, "loss": 1.6093, "step": 10 }, { "epoch": 0.88, "learning_rate": 5.681818181818182e-06, "loss": 1.608, "step": 20 }, { "epoch": 0.97, "eval_accuracy": 0.25, "eval_loss": 1.6090906858444214, "eval_runtime": 0.9532, "eval_samples_per_second": 54.552, "eval_steps_per_second": 4.196, "step": 22 }, { "epoch": 1.32, "learning_rate": 8.522727272727273e-06, "loss": 1.6038, "step": 30 }, { "epoch": 1.76, "learning_rate": 1.1363636363636365e-05, "loss": 1.5899, "step": 40 }, { "epoch": 1.98, "eval_accuracy": 0.19230769230769232, "eval_loss": 1.5960006713867188, "eval_runtime": 0.6772, "eval_samples_per_second": 76.791, "eval_steps_per_second": 5.907, "step": 45 }, { "epoch": 2.2, "learning_rate": 1.4204545454545456e-05, "loss": 1.5458, "step": 50 }, { "epoch": 2.64, "learning_rate": 1.7045454545454546e-05, "loss": 1.4759, "step": 60 }, { "epoch": 2.99, "eval_accuracy": 0.34615384615384615, "eval_loss": 1.4429875612258911, "eval_runtime": 0.6507, "eval_samples_per_second": 79.919, "eval_steps_per_second": 6.148, "step": 68 }, { "epoch": 3.08, "learning_rate": 1.9886363636363638e-05, "loss": 1.3432, "step": 70 }, { "epoch": 3.52, "learning_rate": 2.272727272727273e-05, "loss": 1.2227, "step": 80 }, { "epoch": 3.96, "learning_rate": 2.5568181818181817e-05, "loss": 1.1012, "step": 90 }, { "epoch": 4.0, "eval_accuracy": 0.5192307692307693, "eval_loss": 1.3212602138519287, "eval_runtime": 0.6366, "eval_samples_per_second": 81.678, "eval_steps_per_second": 6.283, "step": 91 }, { "epoch": 4.4, "learning_rate": 2.8409090909090912e-05, "loss": 0.9932, "step": 100 }, { "epoch": 4.84, "learning_rate": 3.125e-05, "loss": 0.8965, "step": 110 }, { "epoch": 4.97, "eval_accuracy": 0.4230769230769231, "eval_loss": 1.1938039064407349, "eval_runtime": 0.7082, "eval_samples_per_second": 73.43, "eval_steps_per_second": 5.648, "step": 113 }, { "epoch": 5.27, "learning_rate": 3.409090909090909e-05, "loss": 0.8385, "step": 120 }, { "epoch": 5.71, "learning_rate": 3.6931818181818184e-05, "loss": 0.7214, "step": 130 }, { "epoch": 5.98, "eval_accuracy": 0.46153846153846156, "eval_loss": 1.1870416402816772, "eval_runtime": 0.6792, "eval_samples_per_second": 76.566, "eval_steps_per_second": 5.89, "step": 136 }, { "epoch": 6.15, "learning_rate": 3.9772727272727275e-05, "loss": 0.6341, "step": 140 }, { "epoch": 6.59, "learning_rate": 4.261363636363637e-05, "loss": 0.6757, "step": 150 }, { "epoch": 6.99, "eval_accuracy": 0.5, "eval_loss": 1.2117116451263428, "eval_runtime": 0.6842, "eval_samples_per_second": 76.005, "eval_steps_per_second": 5.847, "step": 159 }, { "epoch": 7.03, "learning_rate": 4.545454545454546e-05, "loss": 0.5533, "step": 160 }, { "epoch": 7.47, "learning_rate": 4.829545454545455e-05, "loss": 0.5691, "step": 170 }, { "epoch": 7.91, "learning_rate": 4.9873737373737375e-05, "loss": 0.5529, "step": 180 }, { "epoch": 8.0, "eval_accuracy": 0.46153846153846156, "eval_loss": 1.1975771188735962, "eval_runtime": 0.6381, "eval_samples_per_second": 81.486, "eval_steps_per_second": 6.268, "step": 182 }, { "epoch": 8.35, "learning_rate": 4.955808080808081e-05, "loss": 0.4971, "step": 190 }, { "epoch": 8.79, "learning_rate": 4.9242424242424245e-05, "loss": 0.5279, "step": 200 }, { "epoch": 8.97, "eval_accuracy": 0.5192307692307693, "eval_loss": 1.1249598264694214, "eval_runtime": 0.6211, "eval_samples_per_second": 83.717, "eval_steps_per_second": 6.44, "step": 204 }, { "epoch": 9.23, "learning_rate": 4.892676767676767e-05, "loss": 0.5037, "step": 210 }, { "epoch": 9.67, "learning_rate": 4.8611111111111115e-05, "loss": 0.4701, "step": 220 }, { "epoch": 9.98, "eval_accuracy": 0.5576923076923077, "eval_loss": 1.099881887435913, "eval_runtime": 0.6342, "eval_samples_per_second": 81.999, "eval_steps_per_second": 6.308, "step": 227 }, { "epoch": 10.11, "learning_rate": 4.829545454545455e-05, "loss": 0.3906, "step": 230 }, { "epoch": 10.55, "learning_rate": 4.797979797979798e-05, "loss": 0.3812, "step": 240 }, { "epoch": 10.99, "learning_rate": 4.7664141414141413e-05, "loss": 0.3721, "step": 250 }, { "epoch": 10.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.7842198014259338, "eval_runtime": 0.6467, "eval_samples_per_second": 80.414, "eval_steps_per_second": 6.186, "step": 250 }, { "epoch": 11.43, "learning_rate": 4.7348484848484855e-05, "loss": 0.327, "step": 260 }, { "epoch": 11.87, "learning_rate": 4.7032828282828283e-05, "loss": 0.3631, "step": 270 }, { "epoch": 12.0, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.1728084087371826, "eval_runtime": 0.7257, "eval_samples_per_second": 71.658, "eval_steps_per_second": 5.512, "step": 273 }, { "epoch": 12.31, "learning_rate": 4.671717171717172e-05, "loss": 0.335, "step": 280 }, { "epoch": 12.75, "learning_rate": 4.6401515151515154e-05, "loss": 0.3384, "step": 290 }, { "epoch": 12.97, "eval_accuracy": 0.5769230769230769, "eval_loss": 1.241263508796692, "eval_runtime": 0.7252, "eval_samples_per_second": 71.706, "eval_steps_per_second": 5.516, "step": 295 }, { "epoch": 13.19, "learning_rate": 4.608585858585859e-05, "loss": 0.2861, "step": 300 }, { "epoch": 13.63, "learning_rate": 4.5770202020202024e-05, "loss": 0.2531, "step": 310 }, { "epoch": 13.98, "eval_accuracy": 0.6346153846153846, "eval_loss": 0.9144014716148376, "eval_runtime": 0.6357, "eval_samples_per_second": 81.806, "eval_steps_per_second": 6.293, "step": 318 }, { "epoch": 14.07, "learning_rate": 4.545454545454546e-05, "loss": 0.2761, "step": 320 }, { "epoch": 14.51, "learning_rate": 4.5138888888888894e-05, "loss": 0.2721, "step": 330 }, { "epoch": 14.95, "learning_rate": 4.482323232323233e-05, "loss": 0.2753, "step": 340 }, { "epoch": 14.99, "eval_accuracy": 0.6923076923076923, "eval_loss": 0.8958693146705627, "eval_runtime": 0.6421, "eval_samples_per_second": 80.979, "eval_steps_per_second": 6.229, "step": 341 }, { "epoch": 15.38, "learning_rate": 4.450757575757576e-05, "loss": 0.2275, "step": 350 }, { "epoch": 15.82, "learning_rate": 4.41919191919192e-05, "loss": 0.2611, "step": 360 }, { "epoch": 16.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.139877438545227, "eval_runtime": 0.7657, "eval_samples_per_second": 67.913, "eval_steps_per_second": 5.224, "step": 364 }, { "epoch": 16.26, "learning_rate": 4.387626262626263e-05, "loss": 0.2614, "step": 370 }, { "epoch": 16.7, "learning_rate": 4.356060606060606e-05, "loss": 0.2072, "step": 380 }, { "epoch": 16.97, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.0731658935546875, "eval_runtime": 0.6391, "eval_samples_per_second": 81.359, "eval_steps_per_second": 6.258, "step": 386 }, { "epoch": 17.14, "learning_rate": 4.32449494949495e-05, "loss": 0.1875, "step": 390 }, { "epoch": 17.58, "learning_rate": 4.292929292929293e-05, "loss": 0.2532, "step": 400 }, { "epoch": 17.98, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.1921563148498535, "eval_runtime": 0.7727, "eval_samples_per_second": 67.298, "eval_steps_per_second": 5.177, "step": 409 }, { "epoch": 18.02, "learning_rate": 4.261363636363637e-05, "loss": 0.1789, "step": 410 }, { "epoch": 18.46, "learning_rate": 4.2297979797979795e-05, "loss": 0.1653, "step": 420 }, { "epoch": 18.9, "learning_rate": 4.198232323232324e-05, "loss": 0.1633, "step": 430 }, { "epoch": 18.99, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.059952974319458, "eval_runtime": 0.6427, "eval_samples_per_second": 80.914, "eval_steps_per_second": 6.224, "step": 432 }, { "epoch": 19.34, "learning_rate": 4.166666666666667e-05, "loss": 0.1899, "step": 440 }, { "epoch": 19.78, "learning_rate": 4.13510101010101e-05, "loss": 0.1946, "step": 450 }, { "epoch": 20.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.228935718536377, "eval_runtime": 0.7167, "eval_samples_per_second": 72.558, "eval_steps_per_second": 5.581, "step": 455 }, { "epoch": 20.22, "learning_rate": 4.1035353535353535e-05, "loss": 0.1413, "step": 460 }, { "epoch": 20.66, "learning_rate": 4.071969696969698e-05, "loss": 0.2214, "step": 470 }, { "epoch": 20.97, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.3590692281723022, "eval_runtime": 0.6267, "eval_samples_per_second": 82.98, "eval_steps_per_second": 6.383, "step": 477 }, { "epoch": 21.1, "learning_rate": 4.0404040404040405e-05, "loss": 0.1925, "step": 480 }, { "epoch": 21.54, "learning_rate": 4.008838383838384e-05, "loss": 0.1842, "step": 490 }, { "epoch": 21.98, "learning_rate": 3.9772727272727275e-05, "loss": 0.1666, "step": 500 }, { "epoch": 21.98, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.0735695362091064, "eval_runtime": 0.6487, "eval_samples_per_second": 80.166, "eval_steps_per_second": 6.167, "step": 500 }, { "epoch": 22.42, "learning_rate": 3.945707070707071e-05, "loss": 0.1609, "step": 510 }, { "epoch": 22.86, "learning_rate": 3.9141414141414145e-05, "loss": 0.141, "step": 520 }, { "epoch": 22.99, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.031468391418457, "eval_runtime": 0.7467, "eval_samples_per_second": 69.643, "eval_steps_per_second": 5.357, "step": 523 }, { "epoch": 23.3, "learning_rate": 3.8825757575757574e-05, "loss": 0.1474, "step": 530 }, { "epoch": 23.74, "learning_rate": 3.8510101010101015e-05, "loss": 0.1275, "step": 540 }, { "epoch": 24.0, "eval_accuracy": 0.75, "eval_loss": 1.0766026973724365, "eval_runtime": 0.6561, "eval_samples_per_second": 79.25, "eval_steps_per_second": 6.096, "step": 546 }, { "epoch": 24.18, "learning_rate": 3.8194444444444444e-05, "loss": 0.1708, "step": 550 }, { "epoch": 24.62, "learning_rate": 3.787878787878788e-05, "loss": 0.136, "step": 560 }, { "epoch": 24.97, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.1796035766601562, "eval_runtime": 0.6882, "eval_samples_per_second": 75.564, "eval_steps_per_second": 5.813, "step": 568 }, { "epoch": 25.05, "learning_rate": 3.7563131313131314e-05, "loss": 0.1539, "step": 570 }, { "epoch": 25.49, "learning_rate": 3.724747474747475e-05, "loss": 0.129, "step": 580 }, { "epoch": 25.93, "learning_rate": 3.6931818181818184e-05, "loss": 0.1402, "step": 590 }, { "epoch": 25.98, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.0338743925094604, "eval_runtime": 0.6301, "eval_samples_per_second": 82.521, "eval_steps_per_second": 6.348, "step": 591 }, { "epoch": 26.37, "learning_rate": 3.661616161616162e-05, "loss": 0.1492, "step": 600 }, { "epoch": 26.81, "learning_rate": 3.6300505050505054e-05, "loss": 0.1336, "step": 610 }, { "epoch": 26.99, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.3445886373519897, "eval_runtime": 0.6437, "eval_samples_per_second": 80.789, "eval_steps_per_second": 6.215, "step": 614 }, { "epoch": 27.25, "learning_rate": 3.598484848484849e-05, "loss": 0.0881, "step": 620 }, { "epoch": 27.69, "learning_rate": 3.566919191919192e-05, "loss": 0.1218, "step": 630 }, { "epoch": 28.0, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.2967442274093628, "eval_runtime": 0.6371, "eval_samples_per_second": 81.614, "eval_steps_per_second": 6.278, "step": 637 }, { "epoch": 28.13, "learning_rate": 3.535353535353535e-05, "loss": 0.1095, "step": 640 }, { "epoch": 28.57, "learning_rate": 3.5037878787878794e-05, "loss": 0.1034, "step": 650 }, { "epoch": 28.97, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.5955091714859009, "eval_runtime": 0.6367, "eval_samples_per_second": 81.675, "eval_steps_per_second": 6.283, "step": 659 }, { "epoch": 29.01, "learning_rate": 3.472222222222222e-05, "loss": 0.1467, "step": 660 }, { "epoch": 29.45, "learning_rate": 3.440656565656566e-05, "loss": 0.1292, "step": 670 }, { "epoch": 29.89, "learning_rate": 3.409090909090909e-05, "loss": 0.1196, "step": 680 }, { "epoch": 29.98, "eval_accuracy": 0.5769230769230769, "eval_loss": 1.5720566511154175, "eval_runtime": 0.6382, "eval_samples_per_second": 81.484, "eval_steps_per_second": 6.268, "step": 682 }, { "epoch": 30.33, "learning_rate": 3.377525252525253e-05, "loss": 0.0938, "step": 690 }, { "epoch": 30.77, "learning_rate": 3.345959595959596e-05, "loss": 0.1368, "step": 700 }, { "epoch": 30.99, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.8208184242248535, "eval_runtime": 0.6327, "eval_samples_per_second": 82.194, "eval_steps_per_second": 6.323, "step": 705 }, { "epoch": 31.21, "learning_rate": 3.314393939393939e-05, "loss": 0.1194, "step": 710 }, { "epoch": 31.65, "learning_rate": 3.282828282828283e-05, "loss": 0.1477, "step": 720 }, { "epoch": 32.0, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.4237351417541504, "eval_runtime": 0.6551, "eval_samples_per_second": 79.371, "eval_steps_per_second": 6.105, "step": 728 }, { "epoch": 32.09, "learning_rate": 3.251262626262627e-05, "loss": 0.1274, "step": 730 }, { "epoch": 32.53, "learning_rate": 3.2196969696969696e-05, "loss": 0.1198, "step": 740 }, { "epoch": 32.97, "learning_rate": 3.188131313131314e-05, "loss": 0.1299, "step": 750 }, { "epoch": 32.97, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.4061400890350342, "eval_runtime": 0.6447, "eval_samples_per_second": 80.663, "eval_steps_per_second": 6.205, "step": 750 }, { "epoch": 33.41, "learning_rate": 3.1565656565656566e-05, "loss": 0.0806, "step": 760 }, { "epoch": 33.85, "learning_rate": 3.125e-05, "loss": 0.1111, "step": 770 }, { "epoch": 33.98, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.666426420211792, "eval_runtime": 0.6892, "eval_samples_per_second": 75.455, "eval_steps_per_second": 5.804, "step": 773 }, { "epoch": 34.29, "learning_rate": 3.0934343434343436e-05, "loss": 0.0773, "step": 780 }, { "epoch": 34.73, "learning_rate": 3.061868686868687e-05, "loss": 0.068, "step": 790 }, { "epoch": 34.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.7432496547698975, "eval_runtime": 0.7027, "eval_samples_per_second": 74.004, "eval_steps_per_second": 5.693, "step": 796 }, { "epoch": 35.16, "learning_rate": 3.0303030303030306e-05, "loss": 0.1432, "step": 800 }, { "epoch": 35.6, "learning_rate": 2.9987373737373737e-05, "loss": 0.1142, "step": 810 }, { "epoch": 36.0, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.4517791271209717, "eval_runtime": 0.6291, "eval_samples_per_second": 82.652, "eval_steps_per_second": 6.358, "step": 819 }, { "epoch": 36.04, "learning_rate": 2.9671717171717172e-05, "loss": 0.1228, "step": 820 }, { "epoch": 36.48, "learning_rate": 2.935606060606061e-05, "loss": 0.0968, "step": 830 }, { "epoch": 36.92, "learning_rate": 2.904040404040404e-05, "loss": 0.1258, "step": 840 }, { "epoch": 36.97, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.7216651439666748, "eval_runtime": 0.7027, "eval_samples_per_second": 74.003, "eval_steps_per_second": 5.693, "step": 841 }, { "epoch": 37.36, "learning_rate": 2.8724747474747477e-05, "loss": 0.0971, "step": 850 }, { "epoch": 37.8, "learning_rate": 2.8409090909090912e-05, "loss": 0.1055, "step": 860 }, { "epoch": 37.98, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.6348490715026855, "eval_runtime": 0.6862, "eval_samples_per_second": 75.785, "eval_steps_per_second": 5.83, "step": 864 }, { "epoch": 38.24, "learning_rate": 2.8093434343434344e-05, "loss": 0.1109, "step": 870 }, { "epoch": 38.68, "learning_rate": 2.777777777777778e-05, "loss": 0.1049, "step": 880 }, { "epoch": 38.99, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.837777853012085, "eval_runtime": 0.7382, "eval_samples_per_second": 70.445, "eval_steps_per_second": 5.419, "step": 887 }, { "epoch": 39.12, "learning_rate": 2.746212121212121e-05, "loss": 0.0726, "step": 890 }, { "epoch": 39.56, "learning_rate": 2.714646464646465e-05, "loss": 0.0672, "step": 900 }, { "epoch": 40.0, "learning_rate": 2.6830808080808084e-05, "loss": 0.0822, "step": 910 }, { "epoch": 40.0, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.6760356426239014, "eval_runtime": 0.6697, "eval_samples_per_second": 77.651, "eval_steps_per_second": 5.973, "step": 910 }, { "epoch": 40.44, "learning_rate": 2.6515151515151516e-05, "loss": 0.0798, "step": 920 }, { "epoch": 40.88, "learning_rate": 2.619949494949495e-05, "loss": 0.1114, "step": 930 }, { "epoch": 40.97, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.7310110330581665, "eval_runtime": 0.7447, "eval_samples_per_second": 69.829, "eval_steps_per_second": 5.371, "step": 932 }, { "epoch": 41.32, "learning_rate": 2.5883838383838382e-05, "loss": 0.0942, "step": 940 }, { "epoch": 41.76, "learning_rate": 2.5568181818181817e-05, "loss": 0.0704, "step": 950 }, { "epoch": 41.98, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.7105393409729004, "eval_runtime": 0.6411, "eval_samples_per_second": 81.105, "eval_steps_per_second": 6.239, "step": 955 }, { "epoch": 42.2, "learning_rate": 2.5252525252525256e-05, "loss": 0.115, "step": 960 }, { "epoch": 42.64, "learning_rate": 2.4936868686868688e-05, "loss": 0.0983, "step": 970 }, { "epoch": 42.99, "eval_accuracy": 0.5961538461538461, "eval_loss": 1.8320040702819824, "eval_runtime": 0.6571, "eval_samples_per_second": 79.13, "eval_steps_per_second": 6.087, "step": 978 }, { "epoch": 43.08, "learning_rate": 2.4621212121212123e-05, "loss": 0.0707, "step": 980 }, { "epoch": 43.52, "learning_rate": 2.4305555555555558e-05, "loss": 0.0665, "step": 990 }, { "epoch": 43.96, "learning_rate": 2.398989898989899e-05, "loss": 0.0909, "step": 1000 }, { "epoch": 44.0, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.563212275505066, "eval_runtime": 0.7357, "eval_samples_per_second": 70.683, "eval_steps_per_second": 5.437, "step": 1001 }, { "epoch": 44.4, "learning_rate": 2.3674242424242428e-05, "loss": 0.1091, "step": 1010 }, { "epoch": 44.84, "learning_rate": 2.335858585858586e-05, "loss": 0.0991, "step": 1020 }, { "epoch": 44.97, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.7606291770935059, "eval_runtime": 0.6397, "eval_samples_per_second": 81.294, "eval_steps_per_second": 6.253, "step": 1023 }, { "epoch": 45.27, "learning_rate": 2.3042929292929294e-05, "loss": 0.0821, "step": 1030 }, { "epoch": 45.71, "learning_rate": 2.272727272727273e-05, "loss": 0.0658, "step": 1040 }, { "epoch": 45.98, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.592665433883667, "eval_runtime": 0.6456, "eval_samples_per_second": 80.539, "eval_steps_per_second": 6.195, "step": 1046 }, { "epoch": 46.15, "learning_rate": 2.2411616161616164e-05, "loss": 0.0869, "step": 1050 }, { "epoch": 46.59, "learning_rate": 2.20959595959596e-05, "loss": 0.0412, "step": 1060 }, { "epoch": 46.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.4660203456878662, "eval_runtime": 0.6371, "eval_samples_per_second": 81.614, "eval_steps_per_second": 6.278, "step": 1069 }, { "epoch": 47.03, "learning_rate": 2.178030303030303e-05, "loss": 0.084, "step": 1070 }, { "epoch": 47.47, "learning_rate": 2.1464646464646466e-05, "loss": 0.0618, "step": 1080 }, { "epoch": 47.91, "learning_rate": 2.1148989898989898e-05, "loss": 0.0919, "step": 1090 }, { "epoch": 48.0, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.3294285535812378, "eval_runtime": 0.6842, "eval_samples_per_second": 76.006, "eval_steps_per_second": 5.847, "step": 1092 }, { "epoch": 48.35, "learning_rate": 2.0833333333333336e-05, "loss": 0.0733, "step": 1100 }, { "epoch": 48.79, "learning_rate": 2.0517676767676768e-05, "loss": 0.0726, "step": 1110 }, { "epoch": 48.97, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.5551464557647705, "eval_runtime": 0.6647, "eval_samples_per_second": 78.236, "eval_steps_per_second": 6.018, "step": 1114 }, { "epoch": 49.23, "learning_rate": 2.0202020202020203e-05, "loss": 0.0662, "step": 1120 }, { "epoch": 49.67, "learning_rate": 1.9886363636363638e-05, "loss": 0.0554, "step": 1130 }, { "epoch": 49.98, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.7157161235809326, "eval_runtime": 0.6597, "eval_samples_per_second": 78.829, "eval_steps_per_second": 6.064, "step": 1137 }, { "epoch": 50.11, "learning_rate": 1.9570707070707073e-05, "loss": 0.0697, "step": 1140 }, { "epoch": 50.55, "learning_rate": 1.9255050505050508e-05, "loss": 0.1054, "step": 1150 }, { "epoch": 50.99, "learning_rate": 1.893939393939394e-05, "loss": 0.0585, "step": 1160 }, { "epoch": 50.99, "eval_accuracy": 0.5961538461538461, "eval_loss": 1.8279716968536377, "eval_runtime": 0.6462, "eval_samples_per_second": 80.475, "eval_steps_per_second": 6.19, "step": 1160 }, { "epoch": 51.43, "learning_rate": 1.8623737373737374e-05, "loss": 0.0821, "step": 1170 }, { "epoch": 51.87, "learning_rate": 1.830808080808081e-05, "loss": 0.0607, "step": 1180 }, { "epoch": 52.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.6141858100891113, "eval_runtime": 0.6587, "eval_samples_per_second": 78.949, "eval_steps_per_second": 6.073, "step": 1183 }, { "epoch": 52.31, "learning_rate": 1.7992424242424244e-05, "loss": 0.0444, "step": 1190 }, { "epoch": 52.75, "learning_rate": 1.7676767676767676e-05, "loss": 0.0719, "step": 1200 }, { "epoch": 52.97, "eval_accuracy": 0.5961538461538461, "eval_loss": 1.992350459098816, "eval_runtime": 0.6432, "eval_samples_per_second": 80.851, "eval_steps_per_second": 6.219, "step": 1205 }, { "epoch": 53.19, "learning_rate": 1.736111111111111e-05, "loss": 0.1031, "step": 1210 }, { "epoch": 53.63, "learning_rate": 1.7045454545454546e-05, "loss": 0.0877, "step": 1220 }, { "epoch": 53.98, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.7806010246276855, "eval_runtime": 0.6742, "eval_samples_per_second": 77.134, "eval_steps_per_second": 5.933, "step": 1228 }, { "epoch": 54.07, "learning_rate": 1.672979797979798e-05, "loss": 0.0783, "step": 1230 }, { "epoch": 54.51, "learning_rate": 1.6414141414141416e-05, "loss": 0.0664, "step": 1240 }, { "epoch": 54.95, "learning_rate": 1.6098484848484848e-05, "loss": 0.0743, "step": 1250 }, { "epoch": 54.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.9819928407669067, "eval_runtime": 0.7032, "eval_samples_per_second": 73.95, "eval_steps_per_second": 5.688, "step": 1251 }, { "epoch": 55.38, "learning_rate": 1.5782828282828283e-05, "loss": 0.0652, "step": 1260 }, { "epoch": 55.82, "learning_rate": 1.5467171717171718e-05, "loss": 0.0464, "step": 1270 }, { "epoch": 56.0, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.944918155670166, "eval_runtime": 0.6432, "eval_samples_per_second": 80.851, "eval_steps_per_second": 6.219, "step": 1274 }, { "epoch": 56.26, "learning_rate": 1.5151515151515153e-05, "loss": 0.0522, "step": 1280 }, { "epoch": 56.7, "learning_rate": 1.4835858585858586e-05, "loss": 0.077, "step": 1290 }, { "epoch": 56.97, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.6825530529022217, "eval_runtime": 0.6687, "eval_samples_per_second": 77.768, "eval_steps_per_second": 5.982, "step": 1296 }, { "epoch": 57.14, "learning_rate": 1.452020202020202e-05, "loss": 0.0688, "step": 1300 }, { "epoch": 57.58, "learning_rate": 1.4204545454545456e-05, "loss": 0.073, "step": 1310 }, { "epoch": 57.98, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.7594443559646606, "eval_runtime": 0.6671, "eval_samples_per_second": 77.944, "eval_steps_per_second": 5.996, "step": 1319 }, { "epoch": 58.02, "learning_rate": 1.388888888888889e-05, "loss": 0.0488, "step": 1320 }, { "epoch": 58.46, "learning_rate": 1.3573232323232325e-05, "loss": 0.0568, "step": 1330 }, { "epoch": 58.9, "learning_rate": 1.3257575757575758e-05, "loss": 0.0623, "step": 1340 }, { "epoch": 58.99, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.8303287029266357, "eval_runtime": 0.6563, "eval_samples_per_second": 79.235, "eval_steps_per_second": 6.095, "step": 1342 }, { "epoch": 59.34, "learning_rate": 1.2941919191919191e-05, "loss": 0.0818, "step": 1350 }, { "epoch": 59.78, "learning_rate": 1.2626262626262628e-05, "loss": 0.0383, "step": 1360 }, { "epoch": 60.0, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.8124154806137085, "eval_runtime": 0.6437, "eval_samples_per_second": 80.789, "eval_steps_per_second": 6.215, "step": 1365 }, { "epoch": 60.22, "learning_rate": 1.2310606060606061e-05, "loss": 0.0486, "step": 1370 }, { "epoch": 60.66, "learning_rate": 1.1994949494949495e-05, "loss": 0.0526, "step": 1380 }, { "epoch": 60.97, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.8164315223693848, "eval_runtime": 0.6712, "eval_samples_per_second": 77.479, "eval_steps_per_second": 5.96, "step": 1387 }, { "epoch": 61.1, "learning_rate": 1.167929292929293e-05, "loss": 0.0676, "step": 1390 }, { "epoch": 61.54, "learning_rate": 1.1363636363636365e-05, "loss": 0.0482, "step": 1400 }, { "epoch": 61.98, "learning_rate": 1.10479797979798e-05, "loss": 0.0679, "step": 1410 }, { "epoch": 61.98, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.8585782051086426, "eval_runtime": 0.6812, "eval_samples_per_second": 76.341, "eval_steps_per_second": 5.872, "step": 1410 }, { "epoch": 62.42, "learning_rate": 1.0732323232323233e-05, "loss": 0.0791, "step": 1420 }, { "epoch": 62.86, "learning_rate": 1.0416666666666668e-05, "loss": 0.0625, "step": 1430 }, { "epoch": 62.99, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.9150481224060059, "eval_runtime": 0.7422, "eval_samples_per_second": 70.065, "eval_steps_per_second": 5.39, "step": 1433 }, { "epoch": 63.3, "learning_rate": 1.0101010101010101e-05, "loss": 0.0363, "step": 1440 }, { "epoch": 63.74, "learning_rate": 9.785353535353536e-06, "loss": 0.0482, "step": 1450 }, { "epoch": 64.0, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.9622400999069214, "eval_runtime": 0.6337, "eval_samples_per_second": 82.064, "eval_steps_per_second": 6.313, "step": 1456 }, { "epoch": 64.18, "learning_rate": 9.46969696969697e-06, "loss": 0.071, "step": 1460 }, { "epoch": 64.62, "learning_rate": 9.154040404040405e-06, "loss": 0.0646, "step": 1470 }, { "epoch": 64.97, "eval_accuracy": 0.6153846153846154, "eval_loss": 1.947584629058838, "eval_runtime": 1.1818, "eval_samples_per_second": 44.001, "eval_steps_per_second": 3.385, "step": 1478 }, { "epoch": 65.05, "learning_rate": 8.838383838383838e-06, "loss": 0.0348, "step": 1480 }, { "epoch": 65.49, "learning_rate": 8.522727272727273e-06, "loss": 0.0363, "step": 1490 }, { "epoch": 65.93, "learning_rate": 8.207070707070708e-06, "loss": 0.0594, "step": 1500 }, { "epoch": 65.98, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.5957955121994019, "eval_runtime": 0.6717, "eval_samples_per_second": 77.42, "eval_steps_per_second": 5.955, "step": 1501 }, { "epoch": 66.37, "learning_rate": 7.891414141414141e-06, "loss": 0.0375, "step": 1510 }, { "epoch": 66.81, "learning_rate": 7.5757575757575764e-06, "loss": 0.0568, "step": 1520 }, { "epoch": 66.99, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.8275247812271118, "eval_runtime": 0.7647, "eval_samples_per_second": 68.002, "eval_steps_per_second": 5.231, "step": 1524 }, { "epoch": 67.25, "learning_rate": 7.26010101010101e-06, "loss": 0.0443, "step": 1530 }, { "epoch": 67.69, "learning_rate": 6.944444444444445e-06, "loss": 0.0662, "step": 1540 }, { "epoch": 68.0, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.757631778717041, "eval_runtime": 0.6531, "eval_samples_per_second": 79.615, "eval_steps_per_second": 6.124, "step": 1547 }, { "epoch": 68.13, "learning_rate": 6.628787878787879e-06, "loss": 0.0352, "step": 1550 }, { "epoch": 68.57, "learning_rate": 6.313131313131314e-06, "loss": 0.0428, "step": 1560 }, { "epoch": 68.97, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.9324886798858643, "eval_runtime": 0.7317, "eval_samples_per_second": 71.07, "eval_steps_per_second": 5.467, "step": 1569 }, { "epoch": 69.01, "learning_rate": 5.997474747474747e-06, "loss": 0.0549, "step": 1570 }, { "epoch": 69.45, "learning_rate": 5.681818181818182e-06, "loss": 0.0571, "step": 1580 }, { "epoch": 69.89, "learning_rate": 5.3661616161616165e-06, "loss": 0.0433, "step": 1590 }, { "epoch": 69.98, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.8206470012664795, "eval_runtime": 0.6331, "eval_samples_per_second": 82.13, "eval_steps_per_second": 6.318, "step": 1592 }, { "epoch": 70.33, "learning_rate": 5.050505050505051e-06, "loss": 0.0477, "step": 1600 }, { "epoch": 70.77, "learning_rate": 4.734848484848485e-06, "loss": 0.0511, "step": 1610 }, { "epoch": 70.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.9029183387756348, "eval_runtime": 0.6361, "eval_samples_per_second": 81.743, "eval_steps_per_second": 6.288, "step": 1615 }, { "epoch": 71.21, "learning_rate": 4.419191919191919e-06, "loss": 0.058, "step": 1620 }, { "epoch": 71.65, "learning_rate": 4.103535353535354e-06, "loss": 0.0502, "step": 1630 }, { "epoch": 72.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8820760250091553, "eval_runtime": 0.6281, "eval_samples_per_second": 82.784, "eval_steps_per_second": 6.368, "step": 1638 }, { "epoch": 72.09, "learning_rate": 3.7878787878787882e-06, "loss": 0.0507, "step": 1640 }, { "epoch": 72.53, "learning_rate": 3.4722222222222224e-06, "loss": 0.0497, "step": 1650 }, { "epoch": 72.97, "learning_rate": 3.156565656565657e-06, "loss": 0.0544, "step": 1660 }, { "epoch": 72.97, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.9534863233566284, "eval_runtime": 0.6302, "eval_samples_per_second": 82.519, "eval_steps_per_second": 6.348, "step": 1660 }, { "epoch": 73.41, "learning_rate": 2.840909090909091e-06, "loss": 0.0406, "step": 1670 }, { "epoch": 73.85, "learning_rate": 2.5252525252525253e-06, "loss": 0.0399, "step": 1680 }, { "epoch": 73.98, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8454902172088623, "eval_runtime": 0.6792, "eval_samples_per_second": 76.566, "eval_steps_per_second": 5.89, "step": 1683 }, { "epoch": 74.29, "learning_rate": 2.2095959595959595e-06, "loss": 0.0517, "step": 1690 }, { "epoch": 74.73, "learning_rate": 1.8939393939393941e-06, "loss": 0.0561, "step": 1700 }, { "epoch": 74.99, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8289686441421509, "eval_runtime": 0.6441, "eval_samples_per_second": 80.727, "eval_steps_per_second": 6.21, "step": 1706 }, { "epoch": 75.16, "learning_rate": 1.5782828282828285e-06, "loss": 0.0487, "step": 1710 }, { "epoch": 75.6, "learning_rate": 1.2626262626262627e-06, "loss": 0.041, "step": 1720 }, { "epoch": 76.0, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8427259922027588, "eval_runtime": 0.6382, "eval_samples_per_second": 81.484, "eval_steps_per_second": 6.268, "step": 1729 }, { "epoch": 76.04, "learning_rate": 9.469696969696971e-07, "loss": 0.0387, "step": 1730 }, { "epoch": 76.48, "learning_rate": 6.313131313131313e-07, "loss": 0.0612, "step": 1740 }, { "epoch": 76.92, "learning_rate": 3.1565656565656567e-07, "loss": 0.0582, "step": 1750 }, { "epoch": 76.97, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8591234683990479, "eval_runtime": 0.6637, "eval_samples_per_second": 78.354, "eval_steps_per_second": 6.027, "step": 1751 }, { "epoch": 77.36, "learning_rate": 0.0, "loss": 0.0315, "step": 1760 }, { "epoch": 77.36, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.8611557483673096, "eval_runtime": 1.1148, "eval_samples_per_second": 46.647, "eval_steps_per_second": 3.588, "step": 1760 }, { "epoch": 77.36, "step": 1760, "total_flos": 3.637414425770459e+18, "train_loss": 0.22083052936941386, "train_runtime": 1958.4327, "train_samples_per_second": 59.027, "train_steps_per_second": 0.899 } ], "logging_steps": 10, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "total_flos": 3.637414425770459e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }