{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 1150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 9.91304347826087e-05, "loss": 0.1114, "step": 10 }, { "epoch": 0.17, "learning_rate": 9.82608695652174e-05, "loss": 0.0247, "step": 20 }, { "epoch": 0.26, "learning_rate": 9.739130434782609e-05, "loss": 0.0153, "step": 30 }, { "epoch": 0.35, "learning_rate": 9.652173913043479e-05, "loss": 0.0139, "step": 40 }, { "epoch": 0.43, "learning_rate": 9.565217391304348e-05, "loss": 0.0117, "step": 50 }, { "epoch": 0.52, "learning_rate": 9.478260869565218e-05, "loss": 0.0029, "step": 60 }, { "epoch": 0.61, "learning_rate": 9.391304347826087e-05, "loss": 0.0031, "step": 70 }, { "epoch": 0.7, "learning_rate": 9.304347826086957e-05, "loss": 0.0175, "step": 80 }, { "epoch": 0.78, "learning_rate": 9.217391304347827e-05, "loss": 0.0141, "step": 90 }, { "epoch": 0.87, "learning_rate": 9.130434782608696e-05, "loss": 0.0073, "step": 100 }, { "epoch": 0.96, "learning_rate": 9.043478260869566e-05, "loss": 0.0067, "step": 110 }, { "epoch": 1.0, "eval_loss": 0.005401695612818003, "eval_mse": 0.005401696544140577, "eval_runtime": 184.1257, "eval_samples_per_second": 1.249, "eval_steps_per_second": 0.158, "step": 115 }, { "epoch": 1.04, "learning_rate": 8.956521739130435e-05, "loss": 0.0041, "step": 120 }, { "epoch": 1.13, "learning_rate": 8.869565217391305e-05, "loss": 0.009, "step": 130 }, { "epoch": 1.22, "learning_rate": 8.782608695652174e-05, "loss": 0.0031, "step": 140 }, { "epoch": 1.3, "learning_rate": 8.695652173913044e-05, "loss": 0.0133, "step": 150 }, { "epoch": 1.39, "learning_rate": 8.608695652173914e-05, "loss": 0.0022, "step": 160 }, { "epoch": 1.48, "learning_rate": 8.521739130434783e-05, "loss": 0.0019, "step": 170 }, { "epoch": 1.57, "learning_rate": 8.434782608695653e-05, "loss": 0.0023, "step": 180 }, { "epoch": 1.65, "learning_rate": 8.347826086956521e-05, "loss": 0.0018, "step": 190 }, { "epoch": 1.74, "learning_rate": 8.260869565217392e-05, "loss": 0.0018, "step": 200 }, { "epoch": 1.83, "learning_rate": 8.173913043478262e-05, "loss": 0.0036, "step": 210 }, { "epoch": 1.91, "learning_rate": 8.086956521739131e-05, "loss": 0.0067, "step": 220 }, { "epoch": 2.0, "learning_rate": 8e-05, "loss": 0.0079, "step": 230 }, { "epoch": 2.0, "eval_loss": 0.006907076574862003, "eval_mse": 0.006907076574862003, "eval_runtime": 2268.8659, "eval_samples_per_second": 0.101, "eval_steps_per_second": 0.013, "step": 230 }, { "epoch": 2.09, "learning_rate": 7.91304347826087e-05, "loss": 0.0031, "step": 240 }, { "epoch": 2.17, "learning_rate": 7.82608695652174e-05, "loss": 0.0066, "step": 250 }, { "epoch": 2.26, "learning_rate": 7.73913043478261e-05, "loss": 0.0032, "step": 260 }, { "epoch": 2.35, "learning_rate": 7.652173913043479e-05, "loss": 0.0028, "step": 270 }, { "epoch": 2.43, "learning_rate": 7.565217391304347e-05, "loss": 0.0067, "step": 280 }, { "epoch": 2.52, "learning_rate": 7.478260869565218e-05, "loss": 0.0029, "step": 290 }, { "epoch": 2.61, "learning_rate": 7.391304347826086e-05, "loss": 0.0017, "step": 300 }, { "epoch": 2.7, "learning_rate": 7.304347826086957e-05, "loss": 0.0019, "step": 310 }, { "epoch": 2.78, "learning_rate": 7.217391304347827e-05, "loss": 0.013, "step": 320 }, { "epoch": 2.87, "learning_rate": 7.130434782608696e-05, "loss": 0.0048, "step": 330 }, { "epoch": 2.96, "learning_rate": 7.043478260869566e-05, "loss": 0.0033, "step": 340 }, { "epoch": 3.0, "eval_loss": 0.005840361583977938, "eval_mse": 0.005840362515300512, "eval_runtime": 179.4443, "eval_samples_per_second": 1.282, "eval_steps_per_second": 0.162, "step": 345 }, { "epoch": 3.04, "learning_rate": 6.956521739130436e-05, "loss": 0.0062, "step": 350 }, { "epoch": 3.13, "learning_rate": 6.869565217391305e-05, "loss": 0.0019, "step": 360 }, { "epoch": 3.22, "learning_rate": 6.782608695652173e-05, "loss": 0.008, "step": 370 }, { "epoch": 3.3, "learning_rate": 6.695652173913044e-05, "loss": 0.0033, "step": 380 }, { "epoch": 3.39, "learning_rate": 6.608695652173912e-05, "loss": 0.0034, "step": 390 }, { "epoch": 3.48, "learning_rate": 6.521739130434783e-05, "loss": 0.002, "step": 400 }, { "epoch": 3.57, "learning_rate": 6.434782608695652e-05, "loss": 0.0012, "step": 410 }, { "epoch": 3.65, "learning_rate": 6.347826086956523e-05, "loss": 0.0166, "step": 420 }, { "epoch": 3.74, "learning_rate": 6.260869565217392e-05, "loss": 0.0039, "step": 430 }, { "epoch": 3.83, "learning_rate": 6.173913043478262e-05, "loss": 0.0016, "step": 440 }, { "epoch": 3.91, "learning_rate": 6.086956521739131e-05, "loss": 0.0016, "step": 450 }, { "epoch": 4.0, "learning_rate": 6e-05, "loss": 0.0011, "step": 460 }, { "epoch": 4.0, "eval_loss": 0.005455708596855402, "eval_mse": 0.005455708596855402, "eval_runtime": 69.7545, "eval_samples_per_second": 3.297, "eval_steps_per_second": 0.416, "step": 460 }, { "epoch": 4.09, "learning_rate": 5.9130434782608704e-05, "loss": 0.003, "step": 470 }, { "epoch": 4.17, "learning_rate": 5.826086956521739e-05, "loss": 0.001, "step": 480 }, { "epoch": 4.26, "learning_rate": 5.739130434782609e-05, "loss": 0.0047, "step": 490 }, { "epoch": 4.35, "learning_rate": 5.652173913043478e-05, "loss": 0.0019, "step": 500 }, { "epoch": 4.43, "learning_rate": 5.565217391304348e-05, "loss": 0.0019, "step": 510 }, { "epoch": 4.52, "learning_rate": 5.478260869565217e-05, "loss": 0.0116, "step": 520 }, { "epoch": 4.61, "learning_rate": 5.391304347826087e-05, "loss": 0.0073, "step": 530 }, { "epoch": 4.7, "learning_rate": 5.3043478260869574e-05, "loss": 0.0022, "step": 540 }, { "epoch": 4.78, "learning_rate": 5.217391304347826e-05, "loss": 0.0012, "step": 550 }, { "epoch": 4.87, "learning_rate": 5.1304347826086966e-05, "loss": 0.0082, "step": 560 }, { "epoch": 4.96, "learning_rate": 5.0434782608695655e-05, "loss": 0.003, "step": 570 }, { "epoch": 5.0, "eval_loss": 0.008183675818145275, "eval_mse": 0.008183675818145275, "eval_runtime": 76.479, "eval_samples_per_second": 3.007, "eval_steps_per_second": 0.379, "step": 575 }, { "epoch": 5.04, "learning_rate": 4.956521739130435e-05, "loss": 0.0178, "step": 580 }, { "epoch": 5.13, "learning_rate": 4.8695652173913046e-05, "loss": 0.0035, "step": 590 }, { "epoch": 5.22, "learning_rate": 4.782608695652174e-05, "loss": 0.0048, "step": 600 }, { "epoch": 5.3, "learning_rate": 4.695652173913044e-05, "loss": 0.0013, "step": 610 }, { "epoch": 5.39, "learning_rate": 4.608695652173913e-05, "loss": 0.0058, "step": 620 }, { "epoch": 5.48, "learning_rate": 4.521739130434783e-05, "loss": 0.006, "step": 630 }, { "epoch": 5.57, "learning_rate": 4.4347826086956525e-05, "loss": 0.0053, "step": 640 }, { "epoch": 5.65, "learning_rate": 4.347826086956522e-05, "loss": 0.0011, "step": 650 }, { "epoch": 5.74, "learning_rate": 4.2608695652173916e-05, "loss": 0.0012, "step": 660 }, { "epoch": 5.83, "learning_rate": 4.1739130434782605e-05, "loss": 0.0011, "step": 670 }, { "epoch": 5.91, "learning_rate": 4.086956521739131e-05, "loss": 0.0017, "step": 680 }, { "epoch": 6.0, "learning_rate": 4e-05, "loss": 0.0012, "step": 690 }, { "epoch": 6.0, "eval_loss": 0.00548972561955452, "eval_mse": 0.00548972561955452, "eval_runtime": 68.4873, "eval_samples_per_second": 3.358, "eval_steps_per_second": 0.423, "step": 690 }, { "epoch": 6.09, "learning_rate": 3.91304347826087e-05, "loss": 0.0025, "step": 700 }, { "epoch": 6.17, "learning_rate": 3.8260869565217395e-05, "loss": 0.0024, "step": 710 }, { "epoch": 6.26, "learning_rate": 3.739130434782609e-05, "loss": 0.0016, "step": 720 }, { "epoch": 6.35, "learning_rate": 3.6521739130434786e-05, "loss": 0.0051, "step": 730 }, { "epoch": 6.43, "learning_rate": 3.565217391304348e-05, "loss": 0.0046, "step": 740 }, { "epoch": 6.52, "learning_rate": 3.478260869565218e-05, "loss": 0.0031, "step": 750 }, { "epoch": 6.61, "learning_rate": 3.3913043478260867e-05, "loss": 0.0012, "step": 760 }, { "epoch": 6.7, "learning_rate": 3.304347826086956e-05, "loss": 0.0078, "step": 770 }, { "epoch": 6.78, "learning_rate": 3.217391304347826e-05, "loss": 0.0045, "step": 780 }, { "epoch": 6.87, "learning_rate": 3.130434782608696e-05, "loss": 0.0014, "step": 790 }, { "epoch": 6.96, "learning_rate": 3.0434782608695656e-05, "loss": 0.0015, "step": 800 }, { "epoch": 7.0, "eval_loss": 0.005614197812974453, "eval_mse": 0.005614197812974453, "eval_runtime": 59.7135, "eval_samples_per_second": 3.852, "eval_steps_per_second": 0.486, "step": 805 }, { "epoch": 7.04, "learning_rate": 2.9565217391304352e-05, "loss": 0.0055, "step": 810 }, { "epoch": 7.13, "learning_rate": 2.8695652173913044e-05, "loss": 0.0027, "step": 820 }, { "epoch": 7.22, "learning_rate": 2.782608695652174e-05, "loss": 0.001, "step": 830 }, { "epoch": 7.3, "learning_rate": 2.6956521739130436e-05, "loss": 0.0033, "step": 840 }, { "epoch": 7.39, "learning_rate": 2.608695652173913e-05, "loss": 0.0018, "step": 850 }, { "epoch": 7.48, "learning_rate": 2.5217391304347827e-05, "loss": 0.0013, "step": 860 }, { "epoch": 7.57, "learning_rate": 2.4347826086956523e-05, "loss": 0.001, "step": 870 }, { "epoch": 7.65, "learning_rate": 2.347826086956522e-05, "loss": 0.0032, "step": 880 }, { "epoch": 7.74, "learning_rate": 2.2608695652173914e-05, "loss": 0.0011, "step": 890 }, { "epoch": 7.83, "learning_rate": 2.173913043478261e-05, "loss": 0.0007, "step": 900 }, { "epoch": 7.91, "learning_rate": 2.0869565217391303e-05, "loss": 0.0012, "step": 910 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 0.0008, "step": 920 }, { "epoch": 8.0, "eval_loss": 0.005982376169413328, "eval_mse": 0.005982376169413328, "eval_runtime": 60.492, "eval_samples_per_second": 3.802, "eval_steps_per_second": 0.479, "step": 920 }, { "epoch": 8.09, "learning_rate": 1.9130434782608697e-05, "loss": 0.0011, "step": 930 }, { "epoch": 8.17, "learning_rate": 1.8260869565217393e-05, "loss": 0.0009, "step": 940 }, { "epoch": 8.26, "learning_rate": 1.739130434782609e-05, "loss": 0.0027, "step": 950 }, { "epoch": 8.35, "learning_rate": 1.652173913043478e-05, "loss": 0.0028, "step": 960 }, { "epoch": 8.43, "learning_rate": 1.565217391304348e-05, "loss": 0.0012, "step": 970 }, { "epoch": 8.52, "learning_rate": 1.4782608695652176e-05, "loss": 0.0008, "step": 980 }, { "epoch": 8.61, "learning_rate": 1.391304347826087e-05, "loss": 0.0047, "step": 990 }, { "epoch": 8.7, "learning_rate": 1.3043478260869566e-05, "loss": 0.0013, "step": 1000 }, { "epoch": 8.78, "learning_rate": 1.2173913043478261e-05, "loss": 0.0009, "step": 1010 }, { "epoch": 8.87, "learning_rate": 1.1304347826086957e-05, "loss": 0.0009, "step": 1020 }, { "epoch": 8.96, "learning_rate": 1.0434782608695651e-05, "loss": 0.0092, "step": 1030 }, { "epoch": 9.0, "eval_loss": 0.005765838548541069, "eval_mse": 0.005765838548541069, "eval_runtime": 61.2576, "eval_samples_per_second": 3.755, "eval_steps_per_second": 0.473, "step": 1035 }, { "epoch": 9.04, "learning_rate": 9.565217391304349e-06, "loss": 0.0008, "step": 1040 }, { "epoch": 9.13, "learning_rate": 8.695652173913044e-06, "loss": 0.0023, "step": 1050 }, { "epoch": 9.22, "learning_rate": 7.82608695652174e-06, "loss": 0.0011, "step": 1060 }, { "epoch": 9.3, "learning_rate": 6.956521739130435e-06, "loss": 0.0011, "step": 1070 }, { "epoch": 9.39, "learning_rate": 6.086956521739131e-06, "loss": 0.0066, "step": 1080 }, { "epoch": 9.48, "learning_rate": 5.217391304347826e-06, "loss": 0.0006, "step": 1090 }, { "epoch": 9.57, "learning_rate": 4.347826086956522e-06, "loss": 0.001, "step": 1100 }, { "epoch": 9.65, "learning_rate": 3.4782608695652175e-06, "loss": 0.0026, "step": 1110 }, { "epoch": 9.74, "learning_rate": 2.608695652173913e-06, "loss": 0.0048, "step": 1120 }, { "epoch": 9.83, "learning_rate": 1.7391304347826088e-06, "loss": 0.0009, "step": 1130 }, { "epoch": 9.91, "learning_rate": 8.695652173913044e-07, "loss": 0.0011, "step": 1140 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.0012, "step": 1150 } ], "max_steps": 1150, "num_train_epochs": 10, "total_flos": 0.0, "trial_name": null, "trial_params": null }